From 145449b1e420787bb99721a429341fa6be3adfb6 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sun, 3 Jul 2022 16:10:23 +0200 Subject: Vendor import of llvm-project main llvmorg-15-init-15358-g53dc0f107877. --- llvm/include/llvm-c/Core.h | 46 +- llvm/include/llvm-c/DisassemblerTypes.h | 16 +- llvm/include/llvm-c/Object.h | 32 +- llvm/include/llvm-c/Orc.h | 110 +- llvm/include/llvm-c/TargetMachine.h | 4 +- llvm/include/llvm-c/Transforms/Coroutines.h | 56 - llvm/include/llvm-c/Transforms/IPO.h | 3 - .../include/llvm-c/Transforms/PassManagerBuilder.h | 6 - llvm/include/llvm-c/Transforms/Scalar.h | 3 - llvm/include/llvm-c/blake3.h | 79 + llvm/include/llvm/ADT/APFloat.h | 3 +- llvm/include/llvm/ADT/APInt.h | 58 +- llvm/include/llvm/ADT/AddressRanges.h | 79 + llvm/include/llvm/ADT/ArrayRef.h | 41 +- llvm/include/llvm/ADT/BitmaskEnum.h | 12 +- llvm/include/llvm/ADT/BreadthFirstIterator.h | 2 +- llvm/include/llvm/ADT/DenseMap.h | 1 + llvm/include/llvm/ADT/EpochTracker.h | 4 +- llvm/include/llvm/ADT/EquivalenceClasses.h | 3 +- llvm/include/llvm/ADT/FloatingPointMode.h | 28 +- llvm/include/llvm/ADT/FoldingSet.h | 55 +- llvm/include/llvm/ADT/GenericCycleImpl.h | 54 +- llvm/include/llvm/ADT/GenericCycleInfo.h | 18 + llvm/include/llvm/ADT/IntervalMap.h | 34 +- llvm/include/llvm/ADT/IntrusiveRefCntPtr.h | 4 +- llvm/include/llvm/ADT/Optional.h | 195 +- llvm/include/llvm/ADT/PointerIntPair.h | 10 +- llvm/include/llvm/ADT/PointerSumType.h | 5 +- llvm/include/llvm/ADT/PointerUnion.h | 74 +- llvm/include/llvm/ADT/SCCIterator.h | 11 +- llvm/include/llvm/ADT/STLExtras.h | 56 +- llvm/include/llvm/ADT/SmallVector.h | 5 +- llvm/include/llvm/ADT/Statistic.h | 38 +- llvm/include/llvm/ADT/StringRef.h | 4 + llvm/include/llvm/ADT/Triple.h | 91 +- llvm/include/llvm/ADT/edit_distance.h | 38 +- llvm/include/llvm/Analysis/AliasAnalysis.h | 12 +- .../include/llvm/Analysis/AliasAnalysisEvaluator.h | 4 +- llvm/include/llvm/Analysis/AliasSetTracker.h | 7 - llvm/include/llvm/Analysis/AssumeBundleQueries.h | 6 +- llvm/include/llvm/Analysis/BasicAliasAnalysis.h | 2 - .../include/llvm/Analysis/BlockFrequencyInfoImpl.h | 24 +- llvm/include/llvm/Analysis/BranchProbabilityInfo.h | 2 - llvm/include/llvm/Analysis/CFGPrinter.h | 4 +- llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h | 2 + .../include/llvm/Analysis/CFLAndersAliasAnalysis.h | 2 +- .../include/llvm/Analysis/CFLSteensAliasAnalysis.h | 2 - llvm/include/llvm/Analysis/CGSCCPassManager.h | 24 +- llvm/include/llvm/Analysis/CallGraph.h | 5 +- llvm/include/llvm/Analysis/CallPrinter.h | 14 + llvm/include/llvm/Analysis/CaptureTracking.h | 35 +- llvm/include/llvm/Analysis/CmpInstAnalysis.h | 37 +- llvm/include/llvm/Analysis/CodeMetrics.h | 7 +- llvm/include/llvm/Analysis/ConstantFolding.h | 36 +- llvm/include/llvm/Analysis/ConstraintSystem.h | 19 +- llvm/include/llvm/Analysis/DDG.h | 4 +- llvm/include/llvm/Analysis/DDGPrinter.h | 3 +- llvm/include/llvm/Analysis/DOTGraphTraitsPass.h | 195 +- llvm/include/llvm/Analysis/Delinearization.h | 15 +- llvm/include/llvm/Analysis/DependenceAnalysis.h | 6 +- llvm/include/llvm/Analysis/DivergenceAnalysis.h | 8 +- llvm/include/llvm/Analysis/DomPrinter.h | 118 +- llvm/include/llvm/Analysis/DomTreeUpdater.h | 43 - llvm/include/llvm/Analysis/DominanceFrontierImpl.h | 1 - llvm/include/llvm/Analysis/EHPersonalities.h | 1 - .../llvm/Analysis/FunctionPropertiesAnalysis.h | 49 +- llvm/include/llvm/Analysis/GlobalsModRef.h | 11 +- .../include/llvm/Analysis/IRSimilarityIdentifier.h | 41 +- llvm/include/llvm/Analysis/IVDescriptors.h | 52 +- llvm/include/llvm/Analysis/IVUsers.h | 2 - llvm/include/llvm/Analysis/InlineAdvisor.h | 51 +- llvm/include/llvm/Analysis/InlineCost.h | 14 +- .../include/llvm/Analysis/InlineModelFeatureMaps.h | 4 +- llvm/include/llvm/Analysis/InlineOrder.h | 99 +- llvm/include/llvm/Analysis/InstSimplifyFolder.h | 141 +- llvm/include/llvm/Analysis/InstructionSimplify.h | 94 +- llvm/include/llvm/Analysis/IntervalIterator.h | 3 +- llvm/include/llvm/Analysis/LazyCallGraph.h | 11 +- llvm/include/llvm/Analysis/LazyValueInfo.h | 3 + llvm/include/llvm/Analysis/Loads.h | 4 +- llvm/include/llvm/Analysis/LoopAccessAnalysis.h | 91 +- llvm/include/llvm/Analysis/LoopAnalysisManager.h | 1 - llvm/include/llvm/Analysis/LoopCacheAnalysis.h | 24 +- llvm/include/llvm/Analysis/LoopInfo.h | 20 +- llvm/include/llvm/Analysis/LoopInfoImpl.h | 15 +- llvm/include/llvm/Analysis/LoopPass.h | 3 +- llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h | 7 +- llvm/include/llvm/Analysis/MLInlineAdvisor.h | 36 +- llvm/include/llvm/Analysis/MLModelRunner.h | 21 +- llvm/include/llvm/Analysis/MemoryBuiltins.h | 33 +- llvm/include/llvm/Analysis/MemoryLocation.h | 1 + llvm/include/llvm/Analysis/MemorySSA.h | 48 +- llvm/include/llvm/Analysis/MemorySSAUpdater.h | 3 +- .../llvm/Analysis/ModelUnderTrainingRunner.h | 7 +- .../include/llvm/Analysis/ModuleDebugInfoPrinter.h | 2 +- llvm/include/llvm/Analysis/MustExecute.h | 2 +- .../include/llvm/Analysis/NoInferenceModelRunner.h | 12 +- llvm/include/llvm/Analysis/ObjCARCUtil.h | 4 +- llvm/include/llvm/Analysis/OverflowInstAnalysis.h | 4 +- llvm/include/llvm/Analysis/PhiValues.h | 1 - llvm/include/llvm/Analysis/PostDominators.h | 5 +- llvm/include/llvm/Analysis/ProfileSummaryInfo.h | 4 +- llvm/include/llvm/Analysis/PtrUseVisitor.h | 11 +- llvm/include/llvm/Analysis/RegionInfo.h | 6 +- llvm/include/llvm/Analysis/RegionInfoImpl.h | 4 +- llvm/include/llvm/Analysis/RegionIterator.h | 2 +- llvm/include/llvm/Analysis/RegionPass.h | 3 +- llvm/include/llvm/Analysis/RegionPrinter.h | 10 + .../include/llvm/Analysis/ReleaseModeModelRunner.h | 44 +- llvm/include/llvm/Analysis/ReplayInlineAdvisor.h | 14 +- llvm/include/llvm/Analysis/ScalarEvolution.h | 137 +- .../llvm/Analysis/ScalarEvolutionAliasAnalysis.h | 7 +- .../llvm/Analysis/ScalarEvolutionExpressions.h | 9 +- .../llvm/Analysis/ScalarEvolutionNormalization.h | 2 +- llvm/include/llvm/Analysis/ScalarFuncs.def | 117 + llvm/include/llvm/Analysis/SparsePropagation.h | 1 + llvm/include/llvm/Analysis/StackLifetime.h | 3 +- .../include/llvm/Analysis/SyncDependenceAnalysis.h | 6 +- llvm/include/llvm/Analysis/SyntheticCountsUtils.h | 2 +- llvm/include/llvm/Analysis/TargetFolder.h | 162 +- llvm/include/llvm/Analysis/TargetLibraryInfo.h | 14 +- llvm/include/llvm/Analysis/TargetTransformInfo.h | 139 +- .../llvm/Analysis/TargetTransformInfoImpl.h | 78 +- llvm/include/llvm/Analysis/TensorSpec.h | 132 + llvm/include/llvm/Analysis/TypeMetadataUtils.h | 2 +- llvm/include/llvm/Analysis/Utils/TFUtils.h | 102 +- llvm/include/llvm/Analysis/ValueLattice.h | 6 +- llvm/include/llvm/Analysis/ValueTracking.h | 40 +- llvm/include/llvm/Analysis/VectorUtils.h | 30 +- llvm/include/llvm/AsmParser/LLLexer.h | 2 +- llvm/include/llvm/AsmParser/LLParser.h | 23 +- llvm/include/llvm/AsmParser/LLToken.h | 98 +- llvm/include/llvm/AsmParser/Parser.h | 4 +- llvm/include/llvm/BinaryFormat/COFF.h | 5 +- llvm/include/llvm/BinaryFormat/DXContainer.h | 131 + llvm/include/llvm/BinaryFormat/Dwarf.h | 4 + llvm/include/llvm/BinaryFormat/DynamicTags.def | 1 + llvm/include/llvm/BinaryFormat/ELF.h | 90 +- .../llvm/BinaryFormat/ELFRelocs/LoongArch.def | 62 + llvm/include/llvm/BinaryFormat/GOFF.h | 33 + llvm/include/llvm/BinaryFormat/MachO.h | 45 +- llvm/include/llvm/BinaryFormat/Magic.h | 3 + llvm/include/llvm/BinaryFormat/Swift.def | 7 + llvm/include/llvm/BinaryFormat/Wasm.h | 22 +- llvm/include/llvm/BinaryFormat/XCOFF.h | 30 + llvm/include/llvm/Bitcode/BitcodeAnalyzer.h | 3 +- llvm/include/llvm/Bitcode/BitcodeReader.h | 7 +- llvm/include/llvm/Bitcode/BitcodeWriter.h | 2 +- llvm/include/llvm/Bitcode/BitcodeWriterPass.h | 1 - llvm/include/llvm/Bitcode/LLVMBitCodes.h | 27 +- llvm/include/llvm/Bitstream/BitCodeEnums.h | 90 + llvm/include/llvm/Bitstream/BitCodes.h | 71 +- llvm/include/llvm/Bitstream/BitstreamReader.h | 50 +- llvm/include/llvm/Bitstream/BitstreamWriter.h | 21 +- llvm/include/llvm/CodeGen/AccelTable.h | 8 +- llvm/include/llvm/CodeGen/Analysis.h | 5 +- llvm/include/llvm/CodeGen/AsmPrinter.h | 36 +- .../llvm/CodeGen/BasicBlockSectionsProfileReader.h | 109 + llvm/include/llvm/CodeGen/BasicTTIImpl.h | 248 +- llvm/include/llvm/CodeGen/CFIFixup.h | 38 + llvm/include/llvm/CodeGen/CalcSpillWeights.h | 12 - llvm/include/llvm/CodeGen/CallingConvLower.h | 2 - llvm/include/llvm/CodeGen/CodeGenCommonISel.h | 8 +- llvm/include/llvm/CodeGen/CodeGenPassBuilder.h | 11 +- llvm/include/llvm/CodeGen/CommandFlags.h | 14 +- llvm/include/llvm/CodeGen/DFAPacketizer.h | 3 +- .../llvm/CodeGen/DbgEntityHistoryCalculator.h | 2 +- llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h | 94 +- llvm/include/llvm/CodeGen/FastISel.h | 11 +- llvm/include/llvm/CodeGen/FaultMaps.h | 1 - llvm/include/llvm/CodeGen/FunctionLoweringInfo.h | 4 + .../llvm/CodeGen/GlobalISel/CSEMIRBuilder.h | 2 +- .../include/llvm/CodeGen/GlobalISel/CallLowering.h | 5 +- llvm/include/llvm/CodeGen/GlobalISel/Combiner.h | 1 - .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 46 +- .../llvm/CodeGen/GlobalISel/GISelWorkList.h | 12 +- .../llvm/CodeGen/GlobalISel/GenericMachineInstrs.h | 32 + .../include/llvm/CodeGen/GlobalISel/IRTranslator.h | 9 +- .../llvm/CodeGen/GlobalISel/InstructionSelect.h | 4 +- .../llvm/CodeGen/GlobalISel/InstructionSelector.h | 9 +- .../CodeGen/GlobalISel/InstructionSelectorImpl.h | 5 +- .../GlobalISel/LegalizationArtifactCombiner.h | 8 +- llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h | 8 +- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 20 +- .../llvm/CodeGen/GlobalISel/LegalizerInfo.h | 38 +- .../include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h | 13 +- llvm/include/llvm/CodeGen/GlobalISel/Localizer.h | 5 +- .../llvm/CodeGen/GlobalISel/MIPatternMatch.h | 42 + .../llvm/CodeGen/GlobalISel/MachineIRBuilder.h | 40 +- .../llvm/CodeGen/GlobalISel/RegBankSelect.h | 2 +- .../include/llvm/CodeGen/GlobalISel/RegisterBank.h | 98 - .../llvm/CodeGen/GlobalISel/RegisterBankInfo.h | 775 - llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 61 +- llvm/include/llvm/CodeGen/ISDOpcodes.h | 77 +- llvm/include/llvm/CodeGen/IntrinsicLowering.h | 2 - .../llvm/CodeGen/LazyMachineBlockFrequencyInfo.h | 2 +- llvm/include/llvm/CodeGen/LiveInterval.h | 14 +- llvm/include/llvm/CodeGen/LiveIntervalUnion.h | 14 +- llvm/include/llvm/CodeGen/LiveIntervals.h | 2 +- llvm/include/llvm/CodeGen/LivePhysRegs.h | 2 + llvm/include/llvm/CodeGen/LiveRangeCalc.h | 1 - llvm/include/llvm/CodeGen/LiveRangeEdit.h | 10 +- llvm/include/llvm/CodeGen/LiveRegMatrix.h | 12 +- llvm/include/llvm/CodeGen/LiveStacks.h | 6 +- llvm/include/llvm/CodeGen/LiveVariables.h | 1 + llvm/include/llvm/CodeGen/MIRFSDiscriminator.h | 21 +- llvm/include/llvm/CodeGen/MIRParser/MIRParser.h | 11 +- llvm/include/llvm/CodeGen/MIRSampleProfile.h | 28 +- llvm/include/llvm/CodeGen/MIRYamlMapping.h | 19 +- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 35 +- .../llvm/CodeGen/MachineBranchProbabilityInfo.h | 2 - llvm/include/llvm/CodeGen/MachineCombinerPattern.h | 4 + llvm/include/llvm/CodeGen/MachineCycleAnalysis.h | 26 +- llvm/include/llvm/CodeGen/MachineDominators.h | 5 + llvm/include/llvm/CodeGen/MachineFrameInfo.h | 34 +- llvm/include/llvm/CodeGen/MachineFunction.h | 67 +- llvm/include/llvm/CodeGen/MachineInstr.h | 45 +- llvm/include/llvm/CodeGen/MachineLoopInfo.h | 1 - llvm/include/llvm/CodeGen/MachineMemOperand.h | 3 +- llvm/include/llvm/CodeGen/MachineModuleInfo.h | 71 +- llvm/include/llvm/CodeGen/MachineOperand.h | 15 +- .../CodeGen/MachineOptimizationRemarkEmitter.h | 3 +- llvm/include/llvm/CodeGen/MachineOutliner.h | 138 +- llvm/include/llvm/CodeGen/MachinePassManager.h | 6 +- llvm/include/llvm/CodeGen/MachinePassRegistry.def | 3 +- llvm/include/llvm/CodeGen/MachinePipeliner.h | 20 +- llvm/include/llvm/CodeGen/MachineRegisterInfo.h | 33 +- llvm/include/llvm/CodeGen/MachineSSAContext.h | 10 +- llvm/include/llvm/CodeGen/MachineScheduler.h | 4 +- llvm/include/llvm/CodeGen/MachineStableHash.h | 4 + llvm/include/llvm/CodeGen/ModuloSchedule.h | 7 +- llvm/include/llvm/CodeGen/PBQP/ReductionRules.h | 2 +- llvm/include/llvm/CodeGen/Passes.h | 17 +- llvm/include/llvm/CodeGen/PseudoSourceValue.h | 19 +- llvm/include/llvm/CodeGen/RDFGraph.h | 1 - llvm/include/llvm/CodeGen/RegAllocPBQP.h | 17 +- llvm/include/llvm/CodeGen/Register.h | 2 +- llvm/include/llvm/CodeGen/RegisterBank.h | 98 + llvm/include/llvm/CodeGen/RegisterBankInfo.h | 775 + llvm/include/llvm/CodeGen/RegisterClassInfo.h | 7 +- llvm/include/llvm/CodeGen/RegisterPressure.h | 1 - llvm/include/llvm/CodeGen/RegisterScavenging.h | 20 + llvm/include/llvm/CodeGen/RegisterUsageInfo.h | 2 +- llvm/include/llvm/CodeGen/ReplaceWithVeclib.h | 4 +- llvm/include/llvm/CodeGen/ScheduleDAG.h | 2 +- llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h | 2 +- llvm/include/llvm/CodeGen/SelectionDAG.h | 222 +- .../llvm/CodeGen/SelectionDAGAddressAnalysis.h | 2 +- llvm/include/llvm/CodeGen/SelectionDAGISel.h | 4 +- llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 178 +- llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h | 4 +- llvm/include/llvm/CodeGen/SlotIndexes.h | 1 - llvm/include/llvm/CodeGen/StackMaps.h | 2 +- llvm/include/llvm/CodeGen/StackProtector.h | 1 - .../include/llvm/CodeGen/SwiftErrorValueTracking.h | 2 - llvm/include/llvm/CodeGen/TailDuplicator.h | 5 +- llvm/include/llvm/CodeGen/TargetCallingConv.h | 3 +- llvm/include/llvm/CodeGen/TargetFrameLowering.h | 12 + llvm/include/llvm/CodeGen/TargetInstrInfo.h | 26 +- llvm/include/llvm/CodeGen/TargetLowering.h | 284 +- .../llvm/CodeGen/TargetLoweringObjectFileImpl.h | 11 + llvm/include/llvm/CodeGen/TargetPassConfig.h | 3 + llvm/include/llvm/CodeGen/TargetRegisterInfo.h | 47 +- llvm/include/llvm/CodeGen/TargetSubtargetInfo.h | 10 +- llvm/include/llvm/CodeGen/TileShapeInfo.h | 4 +- llvm/include/llvm/CodeGen/ValueTypes.h | 7 +- llvm/include/llvm/CodeGen/ValueTypes.td | 403 +- llvm/include/llvm/DWARFLinker/DWARFLinker.h | 61 +- .../llvm/DWARFLinker/DWARFLinkerCompileUnit.h | 8 +- .../llvm/DWARFLinker/DWARFLinkerDeclContext.h | 10 +- llvm/include/llvm/DWARFLinker/DWARFStreamer.h | 3 +- llvm/include/llvm/DWP/DWPStringPool.h | 2 +- .../DebugInfo/CodeView/AppendingTypeTableBuilder.h | 2 +- .../llvm/DebugInfo/CodeView/CVSymbolVisitor.h | 10 +- .../llvm/DebugInfo/CodeView/CVTypeVisitor.h | 5 +- .../llvm/DebugInfo/CodeView/CodeViewRecordIO.h | 10 +- .../DebugInfo/CodeView/ContinuationRecordBuilder.h | 10 +- .../DebugInfo/CodeView/DebugChecksumsSubsection.h | 4 +- .../DebugInfo/CodeView/DebugCrossExSubsection.h | 3 +- .../DebugInfo/CodeView/DebugCrossImpSubsection.h | 3 +- .../DebugInfo/CodeView/DebugFrameDataSubsection.h | 6 +- .../CodeView/DebugInlineeLinesSubsection.h | 1 - .../llvm/DebugInfo/CodeView/DebugLinesSubsection.h | 3 +- .../llvm/DebugInfo/CodeView/DebugSubsection.h | 6 +- .../DebugInfo/CodeView/DebugSubsectionVisitor.h | 1 - llvm/include/llvm/DebugInfo/CodeView/EnumTables.h | 2 +- llvm/include/llvm/DebugInfo/CodeView/Formatters.h | 2 + .../DebugInfo/CodeView/GlobalTypeTableBuilder.h | 4 +- llvm/include/llvm/DebugInfo/CodeView/Line.h | 1 - .../DebugInfo/CodeView/MergingTypeTableBuilder.h | 6 +- llvm/include/llvm/DebugInfo/CodeView/RecordName.h | 7 +- .../llvm/DebugInfo/CodeView/RecordSerialization.h | 3 +- .../llvm/DebugInfo/CodeView/StringsAndChecksums.h | 6 +- .../include/llvm/DebugInfo/CodeView/SymbolDumper.h | 8 +- .../include/llvm/DebugInfo/CodeView/SymbolRecord.h | 4 +- .../llvm/DebugInfo/CodeView/SymbolSerializer.h | 4 +- .../llvm/DebugInfo/CodeView/TypeCollection.h | 2 +- .../llvm/DebugInfo/CodeView/TypeDumpVisitor.h | 10 +- llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h | 6 +- llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h | 2 +- .../llvm/DebugInfo/CodeView/TypeIndexDiscovery.h | 6 +- .../llvm/DebugInfo/CodeView/TypeRecordMapping.h | 5 +- .../llvm/DebugInfo/CodeView/TypeStreamMerger.h | 3 +- llvm/include/llvm/DebugInfo/DIContext.h | 10 + .../DebugInfo/DWARF/DWARFAbbreviationDeclaration.h | 4 +- .../llvm/DebugInfo/DWARF/DWARFAddressRange.h | 3 + .../llvm/DebugInfo/DWARF/DWARFCompileUnit.h | 7 +- llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h | 36 +- llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h | 5 +- .../llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h | 3 +- .../llvm/DebugInfo/DWARF/DWARFDebugAranges.h | 7 +- .../include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h | 8 +- .../llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h | 2 +- llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h | 8 +- llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h | 9 +- .../include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h | 1 - .../llvm/DebugInfo/DWARF/DWARFDebugPubTable.h | 5 +- .../llvm/DebugInfo/DWARF/DWARFDebugRangeList.h | 6 +- .../llvm/DebugInfo/DWARF/DWARFDebugRnglists.h | 11 +- llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h | 9 +- .../include/llvm/DebugInfo/DWARF/DWARFExpression.h | 5 +- llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h | 14 +- llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h | 2 +- llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h | 1 - llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h | 1 + .../llvm/DebugInfo/DWARF/DWARFTypePrinter.h | 67 + llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h | 3 +- llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 30 +- llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h | 21 +- llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h | 28 +- .../include/llvm/DebugInfo/GSYM/DwarfTransformer.h | 2 +- llvm/include/llvm/DebugInfo/GSYM/ExtractRanges.h | 81 + llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h | 17 +- llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h | 2 +- llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h | 3 +- llvm/include/llvm/DebugInfo/GSYM/LineEntry.h | 2 +- llvm/include/llvm/DebugInfo/GSYM/LookupResult.h | 2 +- llvm/include/llvm/DebugInfo/GSYM/Range.h | 130 - llvm/include/llvm/DebugInfo/GSYM/StringTable.h | 2 +- llvm/include/llvm/DebugInfo/MSF/MSFBuilder.h | 4 +- llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h | 1 + .../DebugInfo/PDB/Native/DbiModuleDescriptor.h | 6 +- .../PDB/Native/DbiModuleDescriptorBuilder.h | 10 +- llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h | 14 +- .../llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h | 22 +- .../include/llvm/DebugInfo/PDB/Native/EnumTables.h | 2 +- .../include/llvm/DebugInfo/PDB/Native/FormatUtil.h | 133 + .../llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h | 14 +- .../llvm/DebugInfo/PDB/Native/GlobalsStream.h | 14 +- llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h | 3 - .../include/llvm/DebugInfo/PDB/Native/InfoStream.h | 10 +- .../llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h | 8 +- .../DebugInfo/PDB/Native/InjectedSourceStream.h | 9 +- llvm/include/llvm/DebugInfo/PDB/Native/InputFile.h | 231 + .../llvm/DebugInfo/PDB/Native/LinePrinter.h | 185 + .../llvm/DebugInfo/PDB/Native/ModuleDebugStream.h | 13 +- .../llvm/DebugInfo/PDB/Native/NamedStreamMap.h | 1 - .../llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h | 2 +- .../DebugInfo/PDB/Native/NativeEnumLineNumbers.h | 5 +- .../llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h | 2 +- .../llvm/DebugInfo/PDB/Native/NativeEnumTypes.h | 7 +- .../llvm/DebugInfo/PDB/Native/NativeExeSymbol.h | 5 +- .../DebugInfo/PDB/Native/NativeFunctionSymbol.h | 7 +- .../DebugInfo/PDB/Native/NativeInlineSiteSymbol.h | 6 +- .../llvm/DebugInfo/PDB/Native/NativeLineNumber.h | 4 +- .../llvm/DebugInfo/PDB/Native/NativePublicSymbol.h | 5 +- .../llvm/DebugInfo/PDB/Native/NativeSession.h | 12 +- .../llvm/DebugInfo/PDB/Native/NativeSourceFile.h | 5 +- .../DebugInfo/PDB/Native/NativeSymbolEnumerator.h | 8 +- .../llvm/DebugInfo/PDB/Native/NativeTypeEnum.h | 8 +- .../DebugInfo/PDB/Native/NativeTypeFunctionSig.h | 8 +- .../llvm/DebugInfo/PDB/Native/NativeTypePointer.h | 5 +- .../llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h | 9 +- .../llvm/DebugInfo/PDB/Native/NativeTypeUDT.h | 8 +- .../llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h | 6 +- llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h | 2 - .../llvm/DebugInfo/PDB/Native/PDBFileBuilder.h | 14 +- .../llvm/DebugInfo/PDB/Native/PDBStringTable.h | 6 - .../llvm/DebugInfo/PDB/Native/PublicsStream.h | 13 +- .../llvm/DebugInfo/PDB/Native/SymbolCache.h | 16 +- .../llvm/DebugInfo/PDB/Native/SymbolStream.h | 5 +- llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h | 7 +- .../llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h | 15 +- llvm/include/llvm/DebugInfo/PDB/PDBContext.h | 2 + llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h | 5 +- .../llvm/DebugInfo/PDB/PDBSymbolAnnotation.h | 1 - llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h | 2 - .../llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h | 1 - .../llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h | 1 - llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h | 2 - llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h | 6 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h | 11 +- .../llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h | 2 - .../llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h | 1 - llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h | 1 - .../llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h | 1 - llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h | 1 - .../llvm/DebugInfo/PDB/PDBSymbolTypeArray.h | 1 - .../llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h | 6 +- .../llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h | 1 - .../llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h | 1 - .../llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h | 1 - .../include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h | 8 +- .../llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h | 1 - .../llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h | 1 - .../llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h | 1 - .../llvm/DebugInfo/PDB/PDBSymbolTypePointer.h | 1 - .../llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h | 1 - llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h | 9 +- .../llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h | 1 - .../llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h | 1 - llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h | 1 - .../llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h | 1 - llvm/include/llvm/DebugInfo/PDB/PDBTypes.h | 3 +- llvm/include/llvm/DebugInfo/PDB/UDTLayout.h | 1 - llvm/include/llvm/DebugInfo/Symbolize/DIFetcher.h | 51 + llvm/include/llvm/DebugInfo/Symbolize/Markup.h | 120 + .../llvm/DebugInfo/Symbolize/MarkupFilter.h | 76 + .../DebugInfo/Symbolize/SymbolizableObjectFile.h | 103 + llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h | 103 +- llvm/include/llvm/Debuginfod/DIFetcher.h | 34 + llvm/include/llvm/Debuginfod/HTTPClient.h | 44 +- llvm/include/llvm/Demangle/Demangle.h | 4 +- llvm/include/llvm/Demangle/ItaniumDemangle.h | 2289 +- llvm/include/llvm/Demangle/ItaniumNodes.def | 95 + llvm/include/llvm/Demangle/Utility.h | 114 +- .../JITLink/DWARFRecordSectionSplitter.h | 35 + .../include/llvm/ExecutionEngine/JITLink/JITLink.h | 21 +- .../llvm/ExecutionEngine/JITLink/MachO_arm64.h | 27 - .../llvm/ExecutionEngine/JITLink/MemoryFlags.h | 10 +- .../include/llvm/ExecutionEngine/JITLink/aarch64.h | 339 +- llvm/include/llvm/ExecutionEngine/JITLink/riscv.h | 17 +- llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h | 9 +- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 21 +- llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h | 3 + .../llvm/ExecutionEngine/Orc/ELFNixPlatform.h | 3 +- .../ExecutionEngine/Orc/EPCDebugObjectRegistrar.h | 2 - .../llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h | 2 +- .../ExecutionEngine/Orc/JITTargetMachineBuilder.h | 2 +- llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h | 20 +- .../llvm/ExecutionEngine/Orc/MachOPlatform.h | 146 +- .../llvm/ExecutionEngine/Orc/MemoryMapper.h | 115 + .../llvm/ExecutionEngine/Orc/OrcABISupport.h | 39 + .../ExecutionEngine/Orc/Shared/ExecutorAddress.h | 13 +- .../Orc/Shared/SimplePackedSerialization.h | 2 +- .../include/llvm/ExecutionEngine/Orc/Speculation.h | 9 +- .../llvm/ExecutionEngine/Orc/SymbolStringPool.h | 7 + llvm/include/llvm/FileCheck/FileCheck.h | 8 +- llvm/include/llvm/Frontend/OpenMP/OMP.td | 279 +- llvm/include/llvm/Frontend/OpenMP/OMPConstants.h | 129 +- llvm/include/llvm/Frontend/OpenMP/OMPContext.h | 8 +- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 261 +- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def | 12 +- llvm/include/llvm/FuzzMutate/FuzzerCLI.h | 27 +- llvm/include/llvm/FuzzMutate/IRMutator.h | 26 + llvm/include/llvm/FuzzMutate/OpDescriptor.h | 6 +- llvm/include/llvm/FuzzMutate/RandomIRBuilder.h | 13 +- llvm/include/llvm/IR/AbstractCallSite.h | 6 +- llvm/include/llvm/IR/Argument.h | 1 - llvm/include/llvm/IR/Assumptions.h | 4 + llvm/include/llvm/IR/Attributes.h | 51 +- llvm/include/llvm/IR/Attributes.td | 23 +- llvm/include/llvm/IR/AttributesAMDGPU.td | 14 - llvm/include/llvm/IR/AutoUpgrade.h | 16 +- llvm/include/llvm/IR/BasicBlock.h | 9 +- llvm/include/llvm/IR/CFG.h | 1 - llvm/include/llvm/IR/ConstantFold.h | 60 + llvm/include/llvm/IR/ConstantFolder.h | 178 +- llvm/include/llvm/IR/ConstantRange.h | 3 + llvm/include/llvm/IR/Constants.h | 15 +- llvm/include/llvm/IR/DIBuilder.h | 23 +- llvm/include/llvm/IR/DataLayout.h | 2 +- llvm/include/llvm/IR/DebugInfoMetadata.h | 183 +- llvm/include/llvm/IR/DerivedTypes.h | 9 +- llvm/include/llvm/IR/DiagnosticInfo.h | 20 + llvm/include/llvm/IR/Dominators.h | 4 + llvm/include/llvm/IR/FMF.h | 121 + llvm/include/llvm/IR/FPEnv.h | 19 + llvm/include/llvm/IR/FixedMetadataKinds.def | 2 + llvm/include/llvm/IR/Function.h | 23 +- llvm/include/llvm/IR/GCStrategy.h | 17 +- llvm/include/llvm/IR/GlobalIFunc.h | 5 + llvm/include/llvm/IR/GlobalObject.h | 5 +- llvm/include/llvm/IR/GlobalValue.h | 52 +- llvm/include/llvm/IR/IRBuilder.h | 303 +- llvm/include/llvm/IR/IRBuilderFolder.h | 71 +- llvm/include/llvm/IR/InlineAsm.h | 25 +- llvm/include/llvm/IR/InstVisitor.h | 3 +- llvm/include/llvm/IR/InstrTypes.h | 50 +- llvm/include/llvm/IR/Instruction.h | 1 - llvm/include/llvm/IR/Instructions.h | 57 +- llvm/include/llvm/IR/IntrinsicInst.h | 71 +- llvm/include/llvm/IR/Intrinsics.h | 14 +- llvm/include/llvm/IR/Intrinsics.td | 420 +- llvm/include/llvm/IR/IntrinsicsAArch64.td | 166 +- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 528 +- llvm/include/llvm/IR/IntrinsicsARM.td | 194 +- llvm/include/llvm/IR/IntrinsicsBPF.td | 20 +- llvm/include/llvm/IR/IntrinsicsDirectX.td | 20 + llvm/include/llvm/IR/IntrinsicsHexagon.td | 13 +- llvm/include/llvm/IR/IntrinsicsMips.td | 1342 +- llvm/include/llvm/IR/IntrinsicsNVVM.td | 1449 +- llvm/include/llvm/IR/IntrinsicsPowerPC.td | 746 +- llvm/include/llvm/IR/IntrinsicsRISCV.td | 589 +- llvm/include/llvm/IR/IntrinsicsSPIRV.td | 31 + llvm/include/llvm/IR/IntrinsicsSystemZ.td | 56 +- llvm/include/llvm/IR/IntrinsicsVE.td | 15 +- llvm/include/llvm/IR/IntrinsicsVEVL.gen.td | 2470 +-- llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 22 +- llvm/include/llvm/IR/IntrinsicsX86.td | 2332 ++- llvm/include/llvm/IR/IntrinsicsXCore.td | 8 +- llvm/include/llvm/IR/LLVMContext.h | 22 +- llvm/include/llvm/IR/LegacyPassManagers.h | 8 +- llvm/include/llvm/IR/MDBuilder.h | 4 + llvm/include/llvm/IR/MatrixBuilder.h | 20 +- llvm/include/llvm/IR/Metadata.h | 158 +- llvm/include/llvm/IR/Module.h | 22 +- llvm/include/llvm/IR/NoFolder.h | 164 +- llvm/include/llvm/IR/Operator.h | 100 +- llvm/include/llvm/IR/PatternMatch.h | 116 +- llvm/include/llvm/IR/RuntimeLibcalls.def | 16 + llvm/include/llvm/IR/Statepoint.h | 5 +- llvm/include/llvm/IR/Type.h | 18 +- llvm/include/llvm/IR/User.h | 4 +- llvm/include/llvm/IR/VPIntrinsics.def | 158 +- llvm/include/llvm/IR/ValueMap.h | 6 +- llvm/include/llvm/IR/VectorBuilder.h | 99 + llvm/include/llvm/IRReader/IRReader.h | 4 +- llvm/include/llvm/InitializePasses.h | 38 +- llvm/include/llvm/InterfaceStub/ELFObjHandler.h | 11 +- llvm/include/llvm/InterfaceStub/IFSHandler.h | 6 +- llvm/include/llvm/InterfaceStub/IFSStub.h | 5 +- llvm/include/llvm/LTO/Config.h | 10 +- llvm/include/llvm/LTO/LTO.h | 13 +- llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h | 2 +- .../include/llvm/LTO/legacy/ThinLTOCodeGenerator.h | 7 - llvm/include/llvm/LinkAllPasses.h | 28 +- llvm/include/llvm/Linker/IRMover.h | 7 +- llvm/include/llvm/MC/ConstantPools.h | 3 +- llvm/include/llvm/MC/MCAsmBackend.h | 8 +- llvm/include/llvm/MC/MCAsmInfo.h | 22 + llvm/include/llvm/MC/MCAssembler.h | 19 +- llvm/include/llvm/MC/MCCodeView.h | 13 +- llvm/include/llvm/MC/MCContext.h | 1432 +- llvm/include/llvm/MC/MCDXContainerStreamer.h | 49 + llvm/include/llvm/MC/MCDXContainerWriter.h | 45 + llvm/include/llvm/MC/MCDecoderOps.h | 33 + llvm/include/llvm/MC/MCDirectives.h | 1 + .../llvm/MC/MCDisassembler/MCDisassembler.h | 28 +- .../llvm/MC/MCDisassembler/MCExternalSymbolizer.h | 5 +- llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h | 5 +- llvm/include/llvm/MC/MCDwarf.h | 11 +- llvm/include/llvm/MC/MCELFStreamer.h | 9 +- llvm/include/llvm/MC/MCFixedLenDisassembler.h | 33 - llvm/include/llvm/MC/MCFragment.h | 7 +- llvm/include/llvm/MC/MCInstrAnalysis.h | 3 + llvm/include/llvm/MC/MCInstrDesc.h | 10 +- llvm/include/llvm/MC/MCInstrInfo.h | 1 + llvm/include/llvm/MC/MCLinkerOptimizationHint.h | 2 +- llvm/include/llvm/MC/MCMachObjectWriter.h | 2 + llvm/include/llvm/MC/MCObjectFileInfo.h | 16 +- llvm/include/llvm/MC/MCObjectStreamer.h | 8 +- llvm/include/llvm/MC/MCObjectWriter.h | 12 +- llvm/include/llvm/MC/MCParser/MCAsmLexer.h | 2 - llvm/include/llvm/MC/MCParser/MCAsmParser.h | 6 +- .../llvm/MC/MCParser/MCAsmParserExtension.h | 3 +- llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h | 12 +- llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h | 15 +- llvm/include/llvm/MC/MCPseudoProbe.h | 16 +- llvm/include/llvm/MC/MCRegisterInfo.h | 8 + llvm/include/llvm/MC/MCSPIRVObjectWriter.h | 40 + llvm/include/llvm/MC/MCSPIRVStreamer.h | 50 + llvm/include/llvm/MC/MCSection.h | 8 +- llvm/include/llvm/MC/MCSectionCOFF.h | 6 +- llvm/include/llvm/MC/MCSectionDXContainer.h | 38 + llvm/include/llvm/MC/MCSectionELF.h | 8 +- llvm/include/llvm/MC/MCSectionGOFF.h | 15 +- llvm/include/llvm/MC/MCSectionMachO.h | 4 +- llvm/include/llvm/MC/MCSectionSPIRV.h | 41 + llvm/include/llvm/MC/MCSectionWasm.h | 4 +- llvm/include/llvm/MC/MCSectionXCOFF.h | 29 +- llvm/include/llvm/MC/MCStreamer.h | 124 +- llvm/include/llvm/MC/MCSubtargetInfo.h | 3 +- llvm/include/llvm/MC/MCSymbol.h | 2 +- llvm/include/llvm/MC/MCSymbolWasm.h | 14 +- llvm/include/llvm/MC/MCSymbolXCOFF.h | 3 +- llvm/include/llvm/MC/MCTargetOptions.h | 21 +- llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h | 3 + llvm/include/llvm/MC/MCValue.h | 1 - llvm/include/llvm/MC/MCWin64EH.h | 8 +- llvm/include/llvm/MC/MCWinCOFFStreamer.h | 20 +- llvm/include/llvm/MC/MCWinEH.h | 10 +- llvm/include/llvm/MC/MCXCOFFStreamer.h | 4 + llvm/include/llvm/MC/SectionKind.h | 7 + llvm/include/llvm/MC/StringTableBuilder.h | 1 - llvm/include/llvm/MC/SubtargetFeature.h | 3 +- llvm/include/llvm/MC/TargetRegistry.h | 56 +- llvm/include/llvm/MCA/CustomBehaviour.h | 5 + llvm/include/llvm/MCA/IncrementalSourceMgr.h | 92 + llvm/include/llvm/MCA/InstrBuilder.h | 30 + llvm/include/llvm/MCA/Instruction.h | 45 +- llvm/include/llvm/MCA/Pipeline.h | 12 +- llvm/include/llvm/MCA/SourceMgr.h | 57 +- llvm/include/llvm/MCA/Stages/EntryStage.h | 3 +- llvm/include/llvm/MCA/Stages/Stage.h | 13 + llvm/include/llvm/ObjCopy/COFF/COFFConfig.h | 27 + llvm/include/llvm/ObjCopy/COFF/COFFObjcopy.h | 36 + llvm/include/llvm/ObjCopy/CommonConfig.h | 271 + llvm/include/llvm/ObjCopy/ConfigManager.h | 50 + llvm/include/llvm/ObjCopy/ELF/ELFConfig.h | 38 + llvm/include/llvm/ObjCopy/ELF/ELFObjcopy.h | 53 + llvm/include/llvm/ObjCopy/MachO/MachOConfig.h | 46 + llvm/include/llvm/ObjCopy/MachO/MachOObjcopy.h | 45 + llvm/include/llvm/ObjCopy/MultiFormatConfig.h | 39 + llvm/include/llvm/ObjCopy/ObjCopy.h | 42 + llvm/include/llvm/ObjCopy/XCOFF/XCOFFConfig.h | 21 + llvm/include/llvm/ObjCopy/XCOFF/XCOFFObjcopy.h | 35 + llvm/include/llvm/ObjCopy/wasm/WasmConfig.h | 21 + llvm/include/llvm/ObjCopy/wasm/WasmObjcopy.h | 35 + llvm/include/llvm/Object/Archive.h | 12 +- llvm/include/llvm/Object/ArchiveWriter.h | 5 + llvm/include/llvm/Object/Binary.h | 6 +- llvm/include/llvm/Object/COFF.h | 12 +- llvm/include/llvm/Object/COFFImportFile.h | 3 +- llvm/include/llvm/Object/COFFModuleDefinition.h | 2 +- llvm/include/llvm/Object/DXContainer.h | 124 + llvm/include/llvm/Object/Decompressor.h | 6 +- llvm/include/llvm/Object/ELF.h | 2 +- llvm/include/llvm/Object/ELFObjectFile.h | 32 +- llvm/include/llvm/Object/ELFTypes.h | 12 + llvm/include/llvm/Object/Error.h | 1 + llvm/include/llvm/Object/IRObjectFile.h | 1 - llvm/include/llvm/Object/MachO.h | 130 + llvm/include/llvm/Object/MachOUniversal.h | 2 +- llvm/include/llvm/Object/MachOUniversalWriter.h | 13 +- llvm/include/llvm/Object/ObjectFile.h | 13 +- llvm/include/llvm/Object/OffloadBinary.h | 169 + llvm/include/llvm/Object/RelocationResolver.h | 15 +- llvm/include/llvm/Object/SymbolicFile.h | 8 +- llvm/include/llvm/Object/TapiFile.h | 15 +- llvm/include/llvm/Object/TapiUniversal.h | 6 +- llvm/include/llvm/Object/Wasm.h | 1 - llvm/include/llvm/Object/WindowsResource.h | 2 +- llvm/include/llvm/Object/XCOFFObjectFile.h | 15 +- llvm/include/llvm/ObjectYAML/DXContainerYAML.h | 101 + llvm/include/llvm/ObjectYAML/ELFYAML.h | 45 +- llvm/include/llvm/ObjectYAML/MachOYAML.h | 1 + llvm/include/llvm/ObjectYAML/ObjectYAML.h | 4 + llvm/include/llvm/ObjectYAML/OffloadYAML.h | 79 + llvm/include/llvm/ObjectYAML/WasmYAML.h | 22 +- llvm/include/llvm/ObjectYAML/yaml2obj.h | 11 + llvm/include/llvm/Option/ArgList.h | 14 +- llvm/include/llvm/Pass.h | 10 + llvm/include/llvm/Passes/PassBuilder.h | 43 +- .../include/llvm/Passes/StandardInstrumentations.h | 31 +- .../llvm/ProfileData/Coverage/CoverageMapping.h | 4 +- llvm/include/llvm/ProfileData/GCOV.h | 4 - llvm/include/llvm/ProfileData/InstrProf.h | 41 +- .../include/llvm/ProfileData/InstrProfCorrelator.h | 9 +- llvm/include/llvm/ProfileData/InstrProfData.inc | 4 +- llvm/include/llvm/ProfileData/InstrProfReader.h | 49 +- llvm/include/llvm/ProfileData/InstrProfWriter.h | 34 +- llvm/include/llvm/ProfileData/MIBEntryDef.inc | 47 + llvm/include/llvm/ProfileData/MemProf.h | 613 + llvm/include/llvm/ProfileData/MemProfData.inc | 143 +- llvm/include/llvm/ProfileData/RawMemProfReader.h | 127 +- llvm/include/llvm/ProfileData/SampleProf.h | 97 +- llvm/include/llvm/ProfileData/SampleProfReader.h | 17 +- llvm/include/llvm/ProfileData/SampleProfWriter.h | 4 - llvm/include/llvm/Remarks/RemarkSerializer.h | 1 - llvm/include/llvm/Support/AArch64TargetParser.def | 55 +- llvm/include/llvm/Support/AMDHSAKernelDescriptor.h | 14 +- llvm/include/llvm/Support/ARMBuildAttributes.h | 39 +- llvm/include/llvm/Support/ARMTargetParser.def | 8 +- llvm/include/llvm/Support/ARMWinEH.h | 5 +- llvm/include/llvm/Support/Alignment.h | 73 +- llvm/include/llvm/Support/Allocator.h | 7 +- llvm/include/llvm/Support/BLAKE3.h | 124 + llvm/include/llvm/Support/Base64.h | 1 + llvm/include/llvm/Support/BinaryStreamArray.h | 2 + llvm/include/llvm/Support/BinaryStreamRef.h | 6 +- llvm/include/llvm/Support/BranchProbability.h | 1 + llvm/include/llvm/Support/CSKYAttributeParser.h | 43 + llvm/include/llvm/Support/CSKYAttributes.h | 95 + llvm/include/llvm/Support/CSKYTargetParser.def | 524 + llvm/include/llvm/Support/CSKYTargetParser.h | 203 + llvm/include/llvm/Support/Casting.h | 769 +- llvm/include/llvm/Support/CodeGen.h | 36 +- llvm/include/llvm/Support/CommandLine.h | 290 +- llvm/include/llvm/Support/Compiler.h | 64 +- llvm/include/llvm/Support/Compression.h | 4 +- llvm/include/llvm/Support/ConvertUTF.h | 21 + llvm/include/llvm/Support/CrashRecoveryContext.h | 3 + llvm/include/llvm/Support/Debug.h | 4 +- llvm/include/llvm/Support/Errno.h | 1 - llvm/include/llvm/Support/Error.h | 4 +- llvm/include/llvm/Support/ErrorHandling.h | 25 +- llvm/include/llvm/Support/FileUtilities.h | 21 + llvm/include/llvm/Support/FormatProviders.h | 2 +- llvm/include/llvm/Support/FormatVariadic.h | 2 +- llvm/include/llvm/Support/HashBuilder.h | 7 +- llvm/include/llvm/Support/Host.h | 1 + llvm/include/llvm/Support/KnownBits.h | 8 +- llvm/include/llvm/Support/LowLevelTypeImpl.h | 12 + llvm/include/llvm/Support/MD5.h | 29 +- llvm/include/llvm/Support/MachineValueType.h | 439 +- llvm/include/llvm/Support/MathExtras.h | 52 +- llvm/include/llvm/Support/Parallel.h | 4 +- llvm/include/llvm/Support/Path.h | 1 - llvm/include/llvm/Support/PluginLoader.h | 6 +- llvm/include/llvm/Support/Printable.h | 8 +- llvm/include/llvm/Support/Process.h | 1 - llvm/include/llvm/Support/Program.h | 2 +- llvm/include/llvm/Support/RISCVISAInfo.h | 2 + llvm/include/llvm/Support/RWMutex.h | 4 +- llvm/include/llvm/Support/SHA1.h | 13 +- llvm/include/llvm/Support/SHA256.h | 13 +- llvm/include/llvm/Support/ScopedPrinter.h | 7 +- llvm/include/llvm/Support/Signals.h | 1 + llvm/include/llvm/Support/Signposts.h | 2 +- llvm/include/llvm/Support/SourceMgr.h | 30 + llvm/include/llvm/Support/TargetOpcodes.def | 6 + llvm/include/llvm/Support/TargetParser.h | 14 +- llvm/include/llvm/Support/ThreadPool.h | 97 +- llvm/include/llvm/Support/Threading.h | 22 +- llvm/include/llvm/Support/TrigramIndex.h | 2 +- llvm/include/llvm/Support/TypeSize.h | 19 + llvm/include/llvm/Support/Unicode.h | 42 +- llvm/include/llvm/Support/VersionTuple.h | 14 +- llvm/include/llvm/Support/VirtualFileSystem.h | 140 +- llvm/include/llvm/Support/Win64EH.h | 36 +- llvm/include/llvm/Support/WithColor.h | 18 +- .../llvm/Support/X86DisassemblerDecoderCommon.h | 2 - llvm/include/llvm/Support/X86TargetParser.def | 72 +- llvm/include/llvm/Support/YAMLParser.h | 1 - llvm/include/llvm/Support/YAMLTraits.h | 67 +- llvm/include/llvm/Support/circular_raw_ostream.h | 11 +- llvm/include/llvm/Support/raw_sha1_ostream.h | 2 +- llvm/include/llvm/TableGen/Parser.h | 34 + llvm/include/llvm/TableGen/Record.h | 305 +- llvm/include/llvm/Target/CGPassBuilderOption.h | 1 + llvm/include/llvm/Target/GenericOpcodes.td | 13 + llvm/include/llvm/Target/GlobalISel/Combine.td | 73 +- llvm/include/llvm/Target/Target.td | 54 + .../include/llvm/Target/TargetLoweringObjectFile.h | 3 +- llvm/include/llvm/Target/TargetMachine.h | 22 +- llvm/include/llvm/Target/TargetOptions.h | 33 +- llvm/include/llvm/Target/TargetSelectionDAG.td | 138 + llvm/include/llvm/Testing/Support/SupportHelpers.h | 8 +- llvm/include/llvm/TextAPI/Symbol.h | 1 - .../AggressiveInstCombine/AggressiveInstCombine.h | 4 +- llvm/include/llvm/Transforms/Coroutines.h | 37 - .../llvm/Transforms/Coroutines/CoroCleanup.h | 4 +- .../Transforms/Coroutines/CoroConditionalWrapper.h | 30 + .../include/llvm/Transforms/Coroutines/CoroEarly.h | 4 +- llvm/include/llvm/Transforms/IPO.h | 7 - llvm/include/llvm/Transforms/IPO/AlwaysInliner.h | 4 +- .../llvm/Transforms/IPO/ArgumentPromotion.h | 6 +- llvm/include/llvm/Transforms/IPO/Attributor.h | 514 +- .../llvm/Transforms/IPO/DeadArgumentElimination.h | 41 +- .../llvm/Transforms/IPO/ForceFunctionAttrs.h | 3 +- llvm/include/llvm/Transforms/IPO/FunctionAttrs.h | 17 +- llvm/include/llvm/Transforms/IPO/GlobalDCE.h | 9 +- llvm/include/llvm/Transforms/IPO/IROutliner.h | 41 +- .../llvm/Transforms/IPO/InferFunctionAttrs.h | 4 +- llvm/include/llvm/Transforms/IPO/Inliner.h | 8 +- llvm/include/llvm/Transforms/IPO/Internalize.h | 1 - llvm/include/llvm/Transforms/IPO/ModuleInliner.h | 9 +- .../llvm/Transforms/IPO/PassManagerBuilder.h | 4 - .../llvm/Transforms/IPO/ProfiledCallGraph.h | 13 +- .../llvm/Transforms/IPO/SampleContextTracker.h | 114 +- llvm/include/llvm/Transforms/IPO/SampleProfile.h | 2 +- .../llvm/Transforms/IPO/SampleProfileProbe.h | 12 +- .../llvm/Transforms/IPO/StripDeadPrototypes.h | 3 +- .../llvm/Transforms/IPO/ThinLTOBitcodeWriter.h | 3 +- .../llvm/Transforms/IPO/WholeProgramDevirt.h | 5 +- .../llvm/Transforms/InstCombine/InstCombine.h | 1 + llvm/include/llvm/Transforms/Instrumentation.h | 39 +- .../Transforms/Instrumentation/AddressSanitizer.h | 110 +- .../Instrumentation/AddressSanitizerCommon.h | 45 - .../Instrumentation/AddressSanitizerOptions.h | 5 +- .../Transforms/Instrumentation/BoundsChecking.h | 3 +- .../llvm/Transforms/Instrumentation/CGProfile.h | 2 +- .../Instrumentation/ControlHeightReduction.h | 1 - .../Transforms/Instrumentation/DataFlowSanitizer.h | 2 +- .../Instrumentation/HWAddressSanitizer.h | 12 +- .../Transforms/Instrumentation/InstrProfiling.h | 4 +- .../llvm/Transforms/Instrumentation/MemProfiler.h | 7 +- .../Transforms/Instrumentation/MemorySanitizer.h | 11 +- .../Transforms/Instrumentation/SanitizerCoverage.h | 3 +- .../Transforms/Instrumentation/ThreadSanitizer.h | 6 +- llvm/include/llvm/Transforms/Scalar.h | 22 +- llvm/include/llvm/Transforms/Scalar/BDCE.h | 3 +- .../llvm/Transforms/Scalar/CallSiteSplitting.h | 3 +- .../llvm/Transforms/Scalar/ConstantHoisting.h | 1 - llvm/include/llvm/Transforms/Scalar/DCE.h | 3 +- .../llvm/Transforms/Scalar/DFAJumpThreading.h | 3 +- llvm/include/llvm/Transforms/Scalar/Float2Int.h | 11 +- llvm/include/llvm/Transforms/Scalar/GVN.h | 5 +- .../include/llvm/Transforms/Scalar/GuardWidening.h | 5 +- .../llvm/Transforms/Scalar/IVUsersPrinter.h | 8 +- .../include/llvm/Transforms/Scalar/JumpThreading.h | 6 +- llvm/include/llvm/Transforms/Scalar/LICM.h | 60 +- .../Transforms/Scalar/LoopAccessAnalysisPrinter.h | 8 +- .../llvm/Transforms/Scalar/LoopBoundSplit.h | 4 +- .../llvm/Transforms/Scalar/LoopDataPrefetch.h | 3 +- llvm/include/llvm/Transforms/Scalar/LoopDeletion.h | 6 +- llvm/include/llvm/Transforms/Scalar/LoopFlatten.h | 4 +- .../llvm/Transforms/Scalar/LoopInterchange.h | 5 +- .../llvm/Transforms/Scalar/LoopPassManager.h | 3 +- .../llvm/Transforms/Scalar/LoopPredication.h | 5 +- llvm/include/llvm/Transforms/Scalar/LoopRotation.h | 5 +- .../llvm/Transforms/Scalar/LoopSimplifyCFG.h | 6 +- llvm/include/llvm/Transforms/Scalar/LoopSink.h | 4 +- .../llvm/Transforms/Scalar/LoopUnrollAndJamPass.h | 4 +- .../llvm/Transforms/Scalar/LoopVersioningLICM.h | 4 +- llvm/include/llvm/Transforms/Scalar/LowerAtomic.h | 35 - .../llvm/Transforms/Scalar/LowerAtomicPass.h | 30 + .../Transforms/Scalar/LowerConstantIntrinsics.h | 3 +- .../llvm/Transforms/Scalar/LowerExpectIntrinsic.h | 3 +- .../llvm/Transforms/Scalar/MemCpyOptimizer.h | 4 +- .../llvm/Transforms/Scalar/MergedLoadStoreMotion.h | 3 +- .../Transforms/Scalar/PartiallyInlineLibCalls.h | 2 +- llvm/include/llvm/Transforms/Scalar/SCCP.h | 18 +- .../Transforms/Scalar/ScalarizeMaskedMemIntrin.h | 2 +- llvm/include/llvm/Transforms/Scalar/Scalarizer.h | 21 +- .../llvm/Transforms/Scalar/SimpleLoopUnswitch.h | 9 +- llvm/include/llvm/Transforms/Scalar/Sink.h | 3 +- .../llvm/Transforms/Scalar/SpeculativeExecution.h | 2 +- .../llvm/Transforms/Scalar/TLSVariableHoist.h | 131 + .../Transforms/Scalar/TailRecursionElimination.h | 3 +- .../llvm/Transforms/Scalar/WarnMissedTransforms.h | 3 +- llvm/include/llvm/Transforms/Utils.h | 6 + .../llvm/Transforms/Utils/AssumeBundleBuilder.h | 7 +- .../llvm/Transforms/Utils/BasicBlockUtils.h | 17 +- .../llvm/Transforms/Utils/BreakCriticalEdges.h | 3 +- llvm/include/llvm/Transforms/Utils/BuildLibCalls.h | 69 +- .../llvm/Transforms/Utils/CallGraphUpdater.h | 5 +- .../llvm/Transforms/Utils/CallPromotionUtils.h | 10 + .../llvm/Transforms/Utils/CanonicalizeAliases.h | 3 +- .../Transforms/Utils/CanonicalizeFreezeInLoops.h | 2 +- llvm/include/llvm/Transforms/Utils/CodeExtractor.h | 15 +- llvm/include/llvm/Transforms/Utils/CtorUtils.h | 8 +- llvm/include/llvm/Transforms/Utils/Debugify.h | 38 +- .../llvm/Transforms/Utils/EscapeEnumerator.h | 5 +- llvm/include/llvm/Transforms/Utils/Evaluator.h | 5 +- .../llvm/Transforms/Utils/FunctionComparator.h | 2 +- llvm/include/llvm/Transforms/Utils/GlobalStatus.h | 3 + .../llvm/Transforms/Utils/InjectTLIMappings.h | 1 + llvm/include/llvm/Transforms/Utils/Local.h | 20 +- llvm/include/llvm/Transforms/Utils/LoopUtils.h | 45 +- .../include/llvm/Transforms/Utils/LoopVersioning.h | 7 +- llvm/include/llvm/Transforms/Utils/LowerAtomic.h | 37 + .../llvm/Transforms/Utils/LowerGlobalDtors.h | 28 + .../llvm/Transforms/Utils/LowerMemIntrinsics.h | 24 +- .../llvm/Transforms/Utils/MemoryTaggingSupport.h | 82 + llvm/include/llvm/Transforms/Utils/MisExpect.h | 77 + llvm/include/llvm/Transforms/Utils/ModuleUtils.h | 11 +- .../llvm/Transforms/Utils/NameAnonGlobals.h | 1 - llvm/include/llvm/Transforms/Utils/PredicateInfo.h | 2 +- .../Transforms/Utils/RelLookupTableConverter.h | 3 +- llvm/include/llvm/Transforms/Utils/SCCPSolver.h | 42 +- .../include/llvm/Transforms/Utils/SSAUpdaterImpl.h | 26 + .../llvm/Transforms/Utils/SampleProfileInference.h | 1 - .../Transforms/Utils/SampleProfileLoaderBaseImpl.h | 5 +- .../Transforms/Utils/SampleProfileLoaderBaseUtil.h | 8 +- .../Transforms/Utils/ScalarEvolutionExpander.h | 24 +- .../llvm/Transforms/Utils/SimplifyCFGOptions.h | 5 + .../include/llvm/Transforms/Utils/SimplifyIndVar.h | 7 +- .../llvm/Transforms/Utils/SimplifyLibCalls.h | 12 +- llvm/include/llvm/Transforms/Utils/SizeOpts.h | 1 - llvm/include/llvm/Transforms/Utils/SplitModule.h | 2 +- llvm/include/llvm/Transforms/Utils/UnrollLoop.h | 9 +- .../Transforms/Vectorize/LoadStoreVectorizer.h | 3 +- .../Vectorize/LoopVectorizationLegality.h | 31 +- .../llvm/Transforms/Vectorize/SLPVectorizer.h | 4 +- llvm/include/llvm/WindowsDriver/MSVCPaths.h | 107 + llvm/include/llvm/WindowsDriver/MSVCSetupApi.h | 523 + llvm/include/llvm/module.modulemap | 7 +- llvm/lib/Analysis/AliasAnalysis.cpp | 25 +- llvm/lib/Analysis/AliasAnalysisEvaluator.cpp | 110 +- llvm/lib/Analysis/AliasSetTracker.cpp | 33 +- llvm/lib/Analysis/Analysis.cpp | 16 +- llvm/lib/Analysis/AssumeBundleQueries.cpp | 4 +- llvm/lib/Analysis/AssumptionCache.cpp | 4 +- llvm/lib/Analysis/BasicAliasAnalysis.cpp | 75 +- llvm/lib/Analysis/BlockFrequencyInfo.cpp | 1 - llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp | 5 +- llvm/lib/Analysis/BranchProbabilityInfo.cpp | 17 +- llvm/lib/Analysis/CFG.cpp | 6 +- llvm/lib/Analysis/CFGPrinter.cpp | 2 +- llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp | 4 +- llvm/lib/Analysis/CFLGraph.h | 4 +- llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp | 14 +- llvm/lib/Analysis/CGSCCPassManager.cpp | 39 +- llvm/lib/Analysis/CallGraph.cpp | 4 +- llvm/lib/Analysis/CallGraphSCCPass.cpp | 7 +- llvm/lib/Analysis/CallPrinter.cpp | 106 +- llvm/lib/Analysis/CaptureTracking.cpp | 331 +- llvm/lib/Analysis/CmpInstAnalysis.cpp | 16 +- llvm/lib/Analysis/CodeMetrics.cpp | 15 +- llvm/lib/Analysis/ConstantFolding.cpp | 266 +- llvm/lib/Analysis/ConstraintSystem.cpp | 1 - llvm/lib/Analysis/CostModel.cpp | 4 +- llvm/lib/Analysis/CycleAnalysis.cpp | 6 +- llvm/lib/Analysis/DDG.cpp | 9 +- llvm/lib/Analysis/DDGPrinter.cpp | 4 +- llvm/lib/Analysis/Delinearization.cpp | 40 +- llvm/lib/Analysis/DemandedBits.cpp | 6 - llvm/lib/Analysis/DependenceAnalysis.cpp | 102 +- llvm/lib/Analysis/DependenceGraphBuilder.cpp | 1 + llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp | 8 +- llvm/lib/Analysis/DivergenceAnalysis.cpp | 3 +- llvm/lib/Analysis/DomPrinter.cpp | 305 +- llvm/lib/Analysis/DomTreeUpdater.cpp | 93 +- llvm/lib/Analysis/DominanceFrontier.cpp | 1 - llvm/lib/Analysis/EHPersonalities.cpp | 6 +- llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp | 233 +- llvm/lib/Analysis/GlobalsModRef.cpp | 51 +- llvm/lib/Analysis/IRSimilarityIdentifier.cpp | 96 +- llvm/lib/Analysis/IVDescriptors.cpp | 266 +- llvm/lib/Analysis/IVUsers.cpp | 6 +- .../lib/Analysis/IndirectCallPromotionAnalysis.cpp | 13 +- llvm/lib/Analysis/InlineAdvisor.cpp | 102 +- llvm/lib/Analysis/InlineCost.cpp | 178 +- llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp | 25 +- llvm/lib/Analysis/InstructionSimplify.cpp | 1310 +- llvm/lib/Analysis/Interval.cpp | 1 - llvm/lib/Analysis/LazyCallGraph.cpp | 8 +- llvm/lib/Analysis/LazyValueInfo.cpp | 32 +- llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp | 1 + llvm/lib/Analysis/Lint.cpp | 220 +- llvm/lib/Analysis/Loads.cpp | 9 +- llvm/lib/Analysis/LoopAccessAnalysis.cpp | 486 +- llvm/lib/Analysis/LoopAnalysisManager.cpp | 3 - llvm/lib/Analysis/LoopCacheAnalysis.cpp | 129 +- llvm/lib/Analysis/LoopInfo.cpp | 11 +- llvm/lib/Analysis/LoopNestAnalysis.cpp | 3 +- llvm/lib/Analysis/LoopPass.cpp | 8 +- llvm/lib/Analysis/LoopUnrollAnalyzer.cpp | 11 +- llvm/lib/Analysis/MLInlineAdvisor.cpp | 141 +- llvm/lib/Analysis/MemDepPrinter.cpp | 1 - llvm/lib/Analysis/MemDerefPrinter.cpp | 3 - llvm/lib/Analysis/MemoryBuiltins.cpp | 524 +- llvm/lib/Analysis/MemoryDependenceAnalysis.cpp | 64 +- llvm/lib/Analysis/MemoryLocation.cpp | 2 - llvm/lib/Analysis/MemorySSA.cpp | 36 +- llvm/lib/Analysis/MemorySSAUpdater.cpp | 23 +- llvm/lib/Analysis/ModelUnderTrainingRunner.cpp | 29 +- llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp | 2 +- llvm/lib/Analysis/ModuleSummaryAnalysis.cpp | 15 +- llvm/lib/Analysis/MustExecute.cpp | 7 +- llvm/lib/Analysis/NoInferenceModelRunner.cpp | 16 +- llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp | 2 - llvm/lib/Analysis/OptimizationRemarkEmitter.cpp | 4 +- llvm/lib/Analysis/OverflowInstAnalysis.cpp | 1 - llvm/lib/Analysis/PHITransAddr.cpp | 9 +- llvm/lib/Analysis/ProfileSummaryInfo.cpp | 13 +- llvm/lib/Analysis/PtrUseVisitor.cpp | 1 - llvm/lib/Analysis/RegionInfo.cpp | 1 + llvm/lib/Analysis/RegionPass.cpp | 8 +- llvm/lib/Analysis/RegionPrinter.cpp | 69 +- llvm/lib/Analysis/ReplayInlineAdvisor.cpp | 22 +- llvm/lib/Analysis/ScalarEvolution.cpp | 1323 +- llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp | 1 + llvm/lib/Analysis/ScalarEvolutionDivision.cpp | 2 - llvm/lib/Analysis/ScalarEvolutionNormalization.cpp | 1 + llvm/lib/Analysis/ScopedNoAliasAA.cpp | 1 - llvm/lib/Analysis/StackLifetime.cpp | 7 +- llvm/lib/Analysis/StackSafetyAnalysis.cpp | 5 +- llvm/lib/Analysis/StratifiedSets.h | 6 +- llvm/lib/Analysis/SyncDependenceAnalysis.cpp | 8 +- llvm/lib/Analysis/SyntheticCountsUtils.cpp | 7 +- llvm/lib/Analysis/TFUtils.cpp | 163 +- llvm/lib/Analysis/TargetLibraryInfo.cpp | 12 +- llvm/lib/Analysis/TargetTransformInfo.cpp | 70 +- llvm/lib/Analysis/TensorSpec.cpp | 144 + llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp | 1 - llvm/lib/Analysis/TypeMetadataUtils.cpp | 1 - llvm/lib/Analysis/VFABIDemangling.cpp | 2 - llvm/lib/Analysis/ValueLatticeUtils.cpp | 9 +- llvm/lib/Analysis/ValueTracking.cpp | 509 +- llvm/lib/Analysis/VectorUtils.cpp | 132 +- llvm/lib/AsmParser/LLLexer.cpp | 94 +- llvm/lib/AsmParser/LLParser.cpp | 225 +- llvm/lib/AsmParser/Parser.cpp | 2 - llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp | 4 +- llvm/lib/BinaryFormat/COFF.cpp | 57 + llvm/lib/BinaryFormat/Magic.cpp | 14 + llvm/lib/BinaryFormat/Wasm.cpp | 29 +- llvm/lib/Bitcode/Reader/BitReader.cpp | 1 - llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp | 16 +- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 1774 +- llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 90 +- llvm/lib/Bitcode/Reader/MetadataLoader.h | 6 +- llvm/lib/Bitcode/Reader/ValueList.cpp | 195 +- llvm/lib/Bitcode/Reader/ValueList.h | 61 +- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 90 +- llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp | 1 - llvm/lib/Bitcode/Writer/ValueEnumerator.cpp | 136 +- llvm/lib/Bitstream/Reader/BitstreamReader.cpp | 57 +- llvm/lib/CodeGen/Analysis.cpp | 3 - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp | 19 +- llvm/lib/CodeGen/AsmPrinter/ARMException.cpp | 15 +- llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp | 3 +- llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp | 4 +- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 460 +- llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 12 +- .../lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp | 11 +- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 123 +- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h | 70 +- llvm/lib/CodeGen/AsmPrinter/DIE.cpp | 11 +- llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp | 1 - .../AsmPrinter/DbgEntityHistoryCalculator.cpp | 3 +- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp | 1 - llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp | 12 +- llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 23 +- llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 1 - llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 60 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 19 +- llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp | 11 +- llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp | 4 +- llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp | 10 +- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 28 +- llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp | 7 +- llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp | 6 +- llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 12 +- llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp | 2 +- llvm/lib/CodeGen/AsmPrinter/WasmException.cpp | 2 + llvm/lib/CodeGen/AsmPrinter/WasmException.h | 5 +- llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp | 19 +- llvm/lib/CodeGen/AsmPrinter/WinException.cpp | 58 +- llvm/lib/CodeGen/AtomicExpandPass.cpp | 332 +- llvm/lib/CodeGen/BasicBlockSections.cpp | 181 +- .../CodeGen/BasicBlockSectionsProfileReader.cpp | 144 + llvm/lib/CodeGen/BranchFolding.cpp | 8 +- llvm/lib/CodeGen/BranchFolding.h | 1 - llvm/lib/CodeGen/BranchRelaxation.cpp | 1 - llvm/lib/CodeGen/BreakFalseDeps.cpp | 4 +- llvm/lib/CodeGen/CFIFixup.cpp | 225 + llvm/lib/CodeGen/CFIInstrInserter.cpp | 4 +- llvm/lib/CodeGen/CalcSpillWeights.cpp | 5 - llvm/lib/CodeGen/CallingConvLower.cpp | 16 +- llvm/lib/CodeGen/CodeGen.cpp | 4 + llvm/lib/CodeGen/CodeGenCommonISel.cpp | 34 +- llvm/lib/CodeGen/CodeGenPrepare.cpp | 192 +- llvm/lib/CodeGen/CommandFlags.cpp | 33 +- llvm/lib/CodeGen/DFAPacketizer.cpp | 2 +- llvm/lib/CodeGen/DeadMachineInstructionElim.cpp | 1 - llvm/lib/CodeGen/DetectDeadLanes.cpp | 20 +- llvm/lib/CodeGen/EHContGuardCatchret.cpp | 2 - llvm/lib/CodeGen/EarlyIfConversion.cpp | 7 +- llvm/lib/CodeGen/ExpandMemCmp.cpp | 14 +- llvm/lib/CodeGen/ExpandPostRAPseudos.cpp | 10 +- llvm/lib/CodeGen/ExpandReductions.cpp | 2 - llvm/lib/CodeGen/ExpandVectorPredication.cpp | 27 +- llvm/lib/CodeGen/FEntryInserter.cpp | 3 - llvm/lib/CodeGen/FaultMaps.cpp | 2 +- llvm/lib/CodeGen/FinalizeISel.cpp | 2 - llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp | 12 +- llvm/lib/CodeGen/GCMetadata.cpp | 3 - llvm/lib/CodeGen/GCRootLowering.cpp | 5 +- llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp | 5 +- llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp | 45 +- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 10 +- llvm/lib/CodeGen/GlobalISel/Combiner.cpp | 8 +- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 313 +- llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp | 41 + llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp | 1 - llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 58 +- llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp | 10 +- llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp | 5 +- .../lib/CodeGen/GlobalISel/InstructionSelector.cpp | 11 +- llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp | 7 + llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp | 21 + llvm/lib/CodeGen/GlobalISel/Legalizer.cpp | 6 +- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 150 +- llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp | 10 +- llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp | 7 + llvm/lib/CodeGen/GlobalISel/Localizer.cpp | 1 + llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 56 +- llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp | 8 +- llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp | 110 - llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp | 805 - llvm/lib/CodeGen/GlobalISel/Utils.cpp | 154 +- llvm/lib/CodeGen/GlobalMerge.cpp | 14 + llvm/lib/CodeGen/HardwareLoops.cpp | 3 - llvm/lib/CodeGen/IfConversion.cpp | 4 +- llvm/lib/CodeGen/IndirectBrExpandPass.cpp | 6 +- llvm/lib/CodeGen/InlineSpiller.cpp | 14 +- llvm/lib/CodeGen/InterferenceCache.h | 2 +- llvm/lib/CodeGen/InterleavedAccessPass.cpp | 2 +- llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp | 22 +- llvm/lib/CodeGen/JMCInstrumenter.cpp | 233 + llvm/lib/CodeGen/LLVMTargetMachine.cpp | 38 +- llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp | 3 +- .../CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp | 625 +- .../CodeGen/LiveDebugValues/InstrRefBasedImpl.h | 142 +- .../CodeGen/LiveDebugValues/LiveDebugValues.cpp | 15 +- llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h | 11 +- .../CodeGen/LiveDebugValues/VarLocBasedImpl.cpp | 22 +- llvm/lib/CodeGen/LiveDebugVariables.cpp | 29 +- llvm/lib/CodeGen/LiveInterval.cpp | 19 +- llvm/lib/CodeGen/LiveIntervalCalc.cpp | 11 +- llvm/lib/CodeGen/LiveIntervalUnion.cpp | 15 +- llvm/lib/CodeGen/LiveIntervals.cpp | 14 +- llvm/lib/CodeGen/LiveRangeCalc.cpp | 2 - llvm/lib/CodeGen/LiveRangeEdit.cpp | 2 +- llvm/lib/CodeGen/LiveRangeShrink.cpp | 1 - llvm/lib/CodeGen/LiveRegMatrix.cpp | 17 +- llvm/lib/CodeGen/LiveStacks.cpp | 5 +- llvm/lib/CodeGen/LocalStackSlotAllocation.cpp | 19 +- llvm/lib/CodeGen/LowLevelType.cpp | 1 - llvm/lib/CodeGen/LowerEmuTLS.cpp | 1 - llvm/lib/CodeGen/MIRCanonicalizerPass.cpp | 10 +- llvm/lib/CodeGen/MIRFSDiscriminator.cpp | 7 +- llvm/lib/CodeGen/MIRNamerPass.cpp | 4 - llvm/lib/CodeGen/MIRParser/MILexer.cpp | 3 +- llvm/lib/CodeGen/MIRParser/MIParser.cpp | 74 +- llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 30 +- llvm/lib/CodeGen/MIRPrinter.cpp | 32 +- llvm/lib/CodeGen/MIRSampleProfile.cpp | 8 + llvm/lib/CodeGen/MIRVRegNamerUtils.cpp | 1 - llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp | 119 +- llvm/lib/CodeGen/MachineBasicBlock.cpp | 27 +- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 15 +- llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp | 2 - llvm/lib/CodeGen/MachineCSE.cpp | 6 +- llvm/lib/CodeGen/MachineCheckDebugify.cpp | 18 +- llvm/lib/CodeGen/MachineCombiner.cpp | 3 +- llvm/lib/CodeGen/MachineCopyPropagation.cpp | 426 +- llvm/lib/CodeGen/MachineCycleAnalysis.cpp | 110 +- llvm/lib/CodeGen/MachineDebugify.cpp | 3 - llvm/lib/CodeGen/MachineDominanceFrontier.cpp | 3 +- llvm/lib/CodeGen/MachineDominators.cpp | 2 + llvm/lib/CodeGen/MachineFunction.cpp | 114 +- llvm/lib/CodeGen/MachineFunctionPass.cpp | 1 + llvm/lib/CodeGen/MachineFunctionSplitter.cpp | 9 +- llvm/lib/CodeGen/MachineInstr.cpp | 49 +- llvm/lib/CodeGen/MachineInstrBundle.cpp | 5 +- llvm/lib/CodeGen/MachineLICM.cpp | 20 +- llvm/lib/CodeGen/MachineLoopInfo.cpp | 5 +- llvm/lib/CodeGen/MachineLoopUtils.cpp | 20 +- llvm/lib/CodeGen/MachineModuleInfo.cpp | 218 +- llvm/lib/CodeGen/MachineOperand.cpp | 2 - .../CodeGen/MachineOptimizationRemarkEmitter.cpp | 4 +- llvm/lib/CodeGen/MachineOutliner.cpp | 53 +- llvm/lib/CodeGen/MachinePipeliner.cpp | 133 +- llvm/lib/CodeGen/MachineRegisterInfo.cpp | 16 +- llvm/lib/CodeGen/MachineSSAContext.cpp | 2 + llvm/lib/CodeGen/MachineScheduler.cpp | 15 +- llvm/lib/CodeGen/MachineSink.cpp | 290 +- llvm/lib/CodeGen/MachineStableHash.cpp | 56 +- llvm/lib/CodeGen/MachineStripDebug.cpp | 4 +- llvm/lib/CodeGen/MachineVerifier.cpp | 86 +- llvm/lib/CodeGen/MacroFusion.cpp | 3 +- llvm/lib/CodeGen/ModuloSchedule.cpp | 88 +- llvm/lib/CodeGen/NonRelocatableStringpool.cpp | 4 +- llvm/lib/CodeGen/OptimizePHIs.cpp | 1 - llvm/lib/CodeGen/PHIElimination.cpp | 2 - llvm/lib/CodeGen/ParallelCG.cpp | 3 +- llvm/lib/CodeGen/PatchableFunction.cpp | 4 +- llvm/lib/CodeGen/PeepholeOptimizer.cpp | 16 +- llvm/lib/CodeGen/PostRAHazardRecognizer.cpp | 12 +- llvm/lib/CodeGen/PostRASchedulerList.cpp | 8 +- llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 2 - llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 11 +- llvm/lib/CodeGen/PrologEpilogInserter.cpp | 184 +- llvm/lib/CodeGen/PseudoProbeInserter.cpp | 4 +- llvm/lib/CodeGen/PseudoSourceValue.cpp | 46 +- llvm/lib/CodeGen/RDFGraph.cpp | 16 +- llvm/lib/CodeGen/RDFLiveness.cpp | 6 +- llvm/lib/CodeGen/ReachingDefAnalysis.cpp | 14 +- llvm/lib/CodeGen/RegAllocBase.cpp | 9 +- llvm/lib/CodeGen/RegAllocBase.h | 10 +- llvm/lib/CodeGen/RegAllocBasic.cpp | 33 +- llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp | 38 +- llvm/lib/CodeGen/RegAllocEvictionAdvisor.h | 48 +- llvm/lib/CodeGen/RegAllocFast.cpp | 36 +- llvm/lib/CodeGen/RegAllocGreedy.cpp | 564 +- llvm/lib/CodeGen/RegAllocGreedy.h | 187 +- llvm/lib/CodeGen/RegAllocPBQP.cpp | 1 + llvm/lib/CodeGen/RegAllocScore.cpp | 22 +- llvm/lib/CodeGen/RegAllocScore.h | 19 +- llvm/lib/CodeGen/RegUsageInfoCollector.cpp | 5 +- llvm/lib/CodeGen/RegUsageInfoPropagate.cpp | 3 +- llvm/lib/CodeGen/RegisterBank.cpp | 110 + llvm/lib/CodeGen/RegisterBankInfo.cpp | 802 + llvm/lib/CodeGen/RegisterClassInfo.cpp | 19 +- llvm/lib/CodeGen/RegisterCoalescer.cpp | 2 +- llvm/lib/CodeGen/RegisterScavenging.cpp | 2 - llvm/lib/CodeGen/RegisterUsageInfo.cpp | 2 - llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp | 3 +- llvm/lib/CodeGen/RenameIndependentSubregs.cpp | 2 +- llvm/lib/CodeGen/ReplaceWithVeclib.cpp | 5 +- llvm/lib/CodeGen/SafeStack.cpp | 57 +- llvm/lib/CodeGen/SafeStackLayout.cpp | 1 - llvm/lib/CodeGen/SafeStackLayout.h | 2 +- llvm/lib/CodeGen/ScheduleDAGInstrs.cpp | 10 +- llvm/lib/CodeGen/ScheduleDAGPrinter.cpp | 5 - llvm/lib/CodeGen/SelectOptimize.cpp | 989 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1973 +- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 40 +- .../CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 28 +- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 22 +- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h | 3 +- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 262 +- .../CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 24 +- .../CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 388 +- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 92 +- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 13 +- .../lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 46 +- .../CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 936 +- .../CodeGen/SelectionDAG/ResourcePriorityQueue.cpp | 14 +- llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h | 1 + llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 42 +- .../lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 19 +- .../CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 5 +- llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp | 4 - llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 860 +- .../SelectionDAG/SelectionDAGAddressAnalysis.cpp | 2 +- .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 491 +- .../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 26 +- .../CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 12 +- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 36 +- .../CodeGen/SelectionDAG/SelectionDAGPrinter.cpp | 28 +- .../CodeGen/SelectionDAG/StatepointLowering.cpp | 59 +- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 888 +- llvm/lib/CodeGen/ShadowStackGCLowering.cpp | 3 +- llvm/lib/CodeGen/SjLjEHPrepare.cpp | 2 +- llvm/lib/CodeGen/SplitKit.cpp | 89 +- llvm/lib/CodeGen/SplitKit.h | 23 +- llvm/lib/CodeGen/StackColoring.cpp | 10 +- llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp | 2 +- llvm/lib/CodeGen/StackMaps.cpp | 4 +- llvm/lib/CodeGen/StackProtector.cpp | 4 +- llvm/lib/CodeGen/StackSlotColoring.cpp | 1 - llvm/lib/CodeGen/TailDuplication.cpp | 4 +- llvm/lib/CodeGen/TailDuplicator.cpp | 23 +- llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp | 9 +- llvm/lib/CodeGen/TargetInstrInfo.cpp | 14 +- llvm/lib/CodeGen/TargetLoweringBase.cpp | 209 +- llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 91 +- llvm/lib/CodeGen/TargetOptionsImpl.cpp | 1 - llvm/lib/CodeGen/TargetPassConfig.cpp | 80 +- llvm/lib/CodeGen/TargetRegisterInfo.cpp | 3 +- llvm/lib/CodeGen/TargetSchedule.cpp | 1 - llvm/lib/CodeGen/TargetSubtargetInfo.cpp | 4 - llvm/lib/CodeGen/TwoAddressInstructionPass.cpp | 72 +- llvm/lib/CodeGen/TypePromotion.cpp | 137 +- llvm/lib/CodeGen/UnreachableBlockElim.cpp | 14 +- llvm/lib/CodeGen/VLIWMachineScheduler.cpp | 10 +- llvm/lib/CodeGen/ValueTypes.cpp | 15 +- llvm/lib/CodeGen/WasmEHPrepare.cpp | 12 +- llvm/lib/CodeGen/WinEHPrepare.cpp | 6 +- llvm/lib/DWARFLinker/DWARFLinker.cpp | 150 +- llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp | 6 +- llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp | 1 + llvm/lib/DWARFLinker/DWARFStreamer.cpp | 49 +- llvm/lib/DWP/DWP.cpp | 17 +- .../CodeView/AppendingTypeTableBuilder.cpp | 9 +- llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp | 75 +- llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp | 5 +- llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp | 33 +- .../CodeView/ContinuationRecordBuilder.cpp | 8 +- .../DebugInfo/CodeView/DebugCrossExSubsection.cpp | 1 + .../CodeView/DebugFrameDataSubsection.cpp | 2 + .../CodeView/DebugInlineeLinesSubsection.cpp | 1 + llvm/lib/DebugInfo/CodeView/DebugSubsection.cpp | 4 +- .../DebugInfo/CodeView/DebugSubsectionRecord.cpp | 1 - .../DebugInfo/CodeView/DebugSubsectionVisitor.cpp | 3 +- .../DebugInfo/CodeView/DebugSymbolsSubsection.cpp | 1 + llvm/lib/DebugInfo/CodeView/Formatters.cpp | 4 +- .../DebugInfo/CodeView/GlobalTypeTableBuilder.cpp | 10 +- .../CodeView/LazyRandomTypeCollection.cpp | 5 +- .../DebugInfo/CodeView/MergingTypeTableBuilder.cpp | 11 +- llvm/lib/DebugInfo/CodeView/RecordName.cpp | 6 +- .../lib/DebugInfo/CodeView/RecordSerialization.cpp | 2 +- .../DebugInfo/CodeView/SimpleTypeSerializer.cpp | 5 +- .../lib/DebugInfo/CodeView/StringsAndChecksums.cpp | 1 - llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp | 4 +- .../lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp | 2 +- llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp | 6 +- llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp | 5 +- llvm/lib/DebugInfo/CodeView/TypeHashing.cpp | 3 +- llvm/lib/DebugInfo/CodeView/TypeIndex.cpp | 1 + llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp | 38 +- llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp | 6 +- .../lib/DebugInfo/CodeView/TypeTableCollection.cpp | 5 +- .../DWARF/DWARFAbbreviationDeclaration.cpp | 2 +- llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp | 1 - llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp | 2 +- llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp | 3 +- llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 127 +- llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp | 4 +- llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp | 1 + llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp | 15 +- llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp | 13 +- llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp | 3 +- llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 14 +- llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp | 12 +- llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp | 7 +- llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 648 +- llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp | 5 +- llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp | 2 +- llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp | 608 + llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp | 2 - llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 119 +- llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp | 1 + llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp | 70 +- llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 29 +- llvm/lib/DebugInfo/GSYM/ExtractRanges.cpp | 79 + llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp | 14 +- llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 6 +- llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 2 +- llvm/lib/DebugInfo/GSYM/InlineInfo.cpp | 16 +- llvm/lib/DebugInfo/GSYM/LookupResult.cpp | 3 +- llvm/lib/DebugInfo/GSYM/Range.cpp | 123 - llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp | 1 - .../PDB/Native/DbiModuleDescriptorBuilder.cpp | 12 +- llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp | 1 + llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp | 2 - llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp | 13 +- llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp | 1 + llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp | 207 + llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp | 9 +- llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp | 3 +- llvm/lib/DebugInfo/PDB/Native/HashTable.cpp | 3 - llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp | 4 +- .../lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp | 4 +- .../DebugInfo/PDB/Native/InjectedSourceStream.cpp | 2 +- llvm/lib/DebugInfo/PDB/Native/InputFile.cpp | 587 + llvm/lib/DebugInfo/PDB/Native/LinePrinter.cpp | 340 + .../lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp | 7 +- llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp | 4 +- .../DebugInfo/PDB/Native/NativeCompilandSymbol.cpp | 2 - .../lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp | 4 +- .../PDB/Native/NativeEnumInjectedSources.cpp | 4 +- .../DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp | 8 +- .../lib/DebugInfo/PDB/Native/NativeEnumModules.cpp | 5 +- .../lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp | 4 +- llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp | 9 +- llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp | 6 +- .../DebugInfo/PDB/Native/NativeFunctionSymbol.cpp | 10 +- .../PDB/Native/NativeInlineSiteSymbol.cpp | 68 +- llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp | 1 + .../DebugInfo/PDB/Native/NativePublicSymbol.cpp | 5 +- llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp | 1 - llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp | 26 +- llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp | 2 + .../PDB/Native/NativeSymbolEnumerator.cpp | 4 +- llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp | 9 +- .../lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp | 3 +- llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp | 20 +- .../DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp | 5 +- .../lib/DebugInfo/PDB/Native/NativeTypePointer.cpp | 5 +- .../lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp | 4 +- llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp | 13 +- .../lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp | 5 +- llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp | 1 - llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp | 21 +- llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp | 1 - .../DebugInfo/PDB/Native/PDBStringTableBuilder.cpp | 7 +- llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp | 4 +- llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp | 15 +- llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp | 5 +- llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp | 6 +- llvm/lib/DebugInfo/PDB/PDB.cpp | 1 - llvm/lib/DebugInfo/PDB/PDBContext.cpp | 9 + llvm/lib/DebugInfo/PDB/PDBExtras.cpp | 2 +- llvm/lib/DebugInfo/PDB/PDBSymbol.cpp | 2 +- llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp | 2 - llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp | 3 - llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp | 3 +- .../DebugInfo/PDB/PDBSymbolCompilandDetails.cpp | 3 - llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp | 4 +- llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp | 3 - llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp | 3 +- llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp | 4 +- llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp | 2 + llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp | 3 - llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp | 4 +- llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp | 2 - llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp | 4 +- llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp | 2 - llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp | 2 - llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp | 4 +- llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp | 2 - llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp | 3 - llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp | 3 - llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp | 3 +- llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp | 3 - .../lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp | 2 - llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp | 3 - llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp | 3 - llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp | 2 - llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp | 10 +- llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp | 2 - .../lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp | 3 - llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp | 3 - llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp | 3 - llvm/lib/DebugInfo/PDB/UDTLayout.cpp | 3 + llvm/lib/DebugInfo/Symbolize/DIFetcher.cpp | 57 + llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp | 6 +- llvm/lib/DebugInfo/Symbolize/Markup.cpp | 202 + llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp | 143 + .../DebugInfo/Symbolize/SymbolizableObjectFile.cpp | 10 +- .../DebugInfo/Symbolize/SymbolizableObjectFile.h | 103 - llvm/lib/DebugInfo/Symbolize/Symbolize.cpp | 316 +- llvm/lib/Debuginfod/DIFetcher.cpp | 28 + llvm/lib/Debuginfod/Debuginfod.cpp | 63 +- llvm/lib/Debuginfod/HTTPClient.cpp | 88 +- llvm/lib/Demangle/Demangle.cpp | 2 +- llvm/lib/Demangle/ItaniumDemangle.cpp | 58 +- llvm/lib/Demangle/MicrosoftDemangle.cpp | 37 +- llvm/lib/Demangle/MicrosoftDemangleNodes.cpp | 4 +- llvm/lib/Demangle/RustDemangle.cpp | 58 +- .../ExecutionEngine/GDBRegistrationListener.cpp | 5 +- llvm/lib/ExecutionEngine/Interpreter/Interpreter.h | 2 +- .../JITLink/DWARFRecordSectionSplitter.cpp | 117 + .../lib/ExecutionEngine/JITLink/EHFrameSupport.cpp | 564 +- .../ExecutionEngine/JITLink/EHFrameSupportImpl.h | 53 +- .../JITLink/ELFLinkGraphBuilder.cpp | 2 +- llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp | 317 +- llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp | 72 +- llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp | 26 +- llvm/lib/ExecutionEngine/JITLink/JITLink.cpp | 11 +- .../lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp | 2 +- .../JITLink/JITLinkMemoryManager.cpp | 2 +- .../JITLink/MachOLinkGraphBuilder.cpp | 45 +- llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp | 493 +- llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp | 7 +- llvm/lib/ExecutionEngine/JITLink/aarch64.cpp | 52 +- llvm/lib/ExecutionEngine/JITLink/riscv.cpp | 4 + llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp | 1 + llvm/lib/ExecutionEngine/MCJIT/MCJIT.h | 3 +- llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp | 1 + llvm/lib/ExecutionEngine/Orc/Core.cpp | 76 +- .../Orc/DebugObjectManagerPlugin.cpp | 2 +- llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp | 7 + .../ExecutionEngine/Orc/DebuggerSupportPlugin.cpp | 11 +- llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp | 88 +- .../Orc/EPCDebugObjectRegistrar.cpp | 3 +- .../ExecutionEngine/Orc/EPCIndirectionUtils.cpp | 11 +- llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp | 8 +- .../ExecutionEngine/Orc/ExecutorProcessControl.cpp | 4 +- llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp | 2 +- llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp | 16 +- .../Orc/JITTargetMachineBuilder.cpp | 1 + llvm/lib/ExecutionEngine/Orc/LLJIT.cpp | 56 +- llvm/lib/ExecutionEngine/Orc/Layer.cpp | 4 +- llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp | 4 + .../ExecutionEngine/Orc/LookupAndRecordAddrs.cpp | 4 +- llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp | 433 +- llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp | 152 + .../ExecutionEngine/Orc/ObjectFileInterface.cpp | 11 +- .../lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp | 14 +- llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp | 171 + llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp | 252 +- .../Orc/Shared/SimpleRemoteEPCUtils.cpp | 4 +- llvm/lib/ExecutionEngine/Orc/Speculation.cpp | 4 +- .../Orc/TargetProcess/SimpleRemoteEPCServer.cpp | 4 +- llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp | 2 +- .../RuntimeDyld/RTDyldMemoryManager.cpp | 8 +- .../ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp | 4 +- .../RuntimeDyld/RuntimeDyldChecker.cpp | 3 +- .../ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 9 +- llvm/lib/ExecutionEngine/SectionMemoryManager.cpp | 2 +- llvm/lib/FileCheck/FileCheck.cpp | 28 +- llvm/lib/Frontend/OpenMP/OMPContext.cpp | 5 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 1143 +- llvm/lib/FuzzMutate/FuzzerCLI.cpp | 48 - llvm/lib/FuzzMutate/IRMutator.cpp | 56 +- llvm/lib/FuzzMutate/Operations.cpp | 17 +- llvm/lib/FuzzMutate/RandomIRBuilder.cpp | 16 +- llvm/lib/IR/AbstractCallSite.cpp | 1 - llvm/lib/IR/AsmWriter.cpp | 32 +- llvm/lib/IR/Assumptions.cpp | 1 + llvm/lib/IR/AttributeImpl.h | 2 + llvm/lib/IR/Attributes.cpp | 181 +- llvm/lib/IR/AutoUpgrade.cpp | 188 +- llvm/lib/IR/BasicBlock.cpp | 6 - llvm/lib/IR/BuiltinGCs.cpp | 2 +- llvm/lib/IR/ConstantFold.cpp | 36 +- llvm/lib/IR/ConstantFold.h | 57 - llvm/lib/IR/ConstantRange.cpp | 77 +- llvm/lib/IR/Constants.cpp | 132 +- llvm/lib/IR/ConstantsContext.h | 37 - llvm/lib/IR/Core.cpp | 40 +- llvm/lib/IR/DIBuilder.cpp | 22 +- llvm/lib/IR/DebugInfoMetadata.cpp | 188 +- llvm/lib/IR/DiagnosticHandler.cpp | 9 +- llvm/lib/IR/DiagnosticInfo.cpp | 11 + llvm/lib/IR/Dominators.cpp | 1 - llvm/lib/IR/FPEnv.cpp | 45 + llvm/lib/IR/Function.cpp | 123 +- llvm/lib/IR/GVMaterializer.cpp | 2 +- llvm/lib/IR/Globals.cpp | 25 +- llvm/lib/IR/IRBuilder.cpp | 178 +- llvm/lib/IR/Instruction.cpp | 5 +- llvm/lib/IR/Instructions.cpp | 60 +- llvm/lib/IR/IntrinsicInst.cpp | 107 +- llvm/lib/IR/LLVMContext.cpp | 37 +- llvm/lib/IR/LLVMContextImpl.cpp | 20 +- llvm/lib/IR/LLVMContextImpl.h | 34 +- llvm/lib/IR/LegacyPassManager.cpp | 14 +- llvm/lib/IR/MDBuilder.cpp | 8 + llvm/lib/IR/Mangler.cpp | 2 +- llvm/lib/IR/Metadata.cpp | 174 +- llvm/lib/IR/Module.cpp | 33 +- llvm/lib/IR/Pass.cpp | 10 + llvm/lib/IR/ReplaceConstant.cpp | 1 + llvm/lib/IR/SafepointIRVerifier.cpp | 11 + llvm/lib/IR/Use.cpp | 4 - llvm/lib/IR/User.cpp | 12 +- llvm/lib/IR/Value.cpp | 17 +- llvm/lib/IR/VectorBuilder.cpp | 103 + llvm/lib/IR/Verifier.cpp | 3888 ++-- llvm/lib/InterfaceStub/ELFObjHandler.cpp | 139 +- llvm/lib/InterfaceStub/IFSHandler.cpp | 48 +- llvm/lib/InterfaceStub/IFSStub.cpp | 2 +- llvm/lib/LTO/LTO.cpp | 106 +- llvm/lib/LTO/LTOBackend.cpp | 46 +- llvm/lib/LTO/LTOCodeGenerator.cpp | 6 +- llvm/lib/LTO/LTOModule.cpp | 2 +- llvm/lib/LTO/SummaryBasedOptimizations.cpp | 2 +- llvm/lib/LTO/ThinLTOCodeGenerator.cpp | 51 +- llvm/lib/LineEditor/LineEditor.cpp | 4 +- llvm/lib/Linker/IRMover.cpp | 69 +- llvm/lib/Linker/LinkModules.cpp | 13 +- llvm/lib/MC/ConstantPools.cpp | 25 +- llvm/lib/MC/ELFObjectWriter.cpp | 50 +- llvm/lib/MC/MCAsmBackend.cpp | 10 +- llvm/lib/MC/MCAsmInfo.cpp | 5 +- llvm/lib/MC/MCAsmStreamer.cpp | 204 +- llvm/lib/MC/MCAssembler.cpp | 15 +- llvm/lib/MC/MCCodeView.cpp | 7 +- llvm/lib/MC/MCContext.cpp | 90 +- llvm/lib/MC/MCDXContainerStreamer.cpp | 31 + llvm/lib/MC/MCDXContainerWriter.cpp | 143 + llvm/lib/MC/MCDisassembler/Disassembler.cpp | 1 - llvm/lib/MC/MCDisassembler/Disassembler.h | 2 +- llvm/lib/MC/MCDisassembler/MCDisassembler.cpp | 17 +- .../lib/MC/MCDisassembler/MCExternalSymbolizer.cpp | 16 +- llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp | 2 +- llvm/lib/MC/MCDwarf.cpp | 55 +- llvm/lib/MC/MCELFStreamer.cpp | 25 +- llvm/lib/MC/MCExpr.cpp | 6 +- llvm/lib/MC/MCFragment.cpp | 2 +- llvm/lib/MC/MCInstPrinter.cpp | 1 + llvm/lib/MC/MCInstrAnalysis.cpp | 7 +- llvm/lib/MC/MCInstrDesc.cpp | 1 - llvm/lib/MC/MCMachOStreamer.cpp | 33 +- llvm/lib/MC/MCNullStreamer.cpp | 18 +- llvm/lib/MC/MCObjectFileInfo.cpp | 52 +- llvm/lib/MC/MCObjectStreamer.cpp | 10 +- llvm/lib/MC/MCObjectWriter.cpp | 4 +- llvm/lib/MC/MCParser/AsmLexer.cpp | 8 +- llvm/lib/MC/MCParser/AsmParser.cpp | 83 +- llvm/lib/MC/MCParser/COFFAsmParser.cpp | 45 +- llvm/lib/MC/MCParser/COFFMasmParser.cpp | 27 +- llvm/lib/MC/MCParser/DarwinAsmParser.cpp | 19 +- llvm/lib/MC/MCParser/ELFAsmParser.cpp | 33 +- llvm/lib/MC/MCParser/GOFFAsmParser.cpp | 11 +- llvm/lib/MC/MCParser/MCAsmLexer.cpp | 1 - llvm/lib/MC/MCParser/MCAsmParser.cpp | 2 +- llvm/lib/MC/MCParser/MCAsmParserExtension.cpp | 2 + llvm/lib/MC/MCParser/MasmParser.cpp | 139 +- llvm/lib/MC/MCParser/WasmAsmParser.cpp | 15 +- llvm/lib/MC/MCParser/XCOFFAsmParser.cpp | 9 +- llvm/lib/MC/MCPseudoProbe.cpp | 176 +- llvm/lib/MC/MCRegisterInfo.cpp | 11 + llvm/lib/MC/MCSPIRVStreamer.cpp | 45 + llvm/lib/MC/MCSchedule.cpp | 4 +- llvm/lib/MC/MCSection.cpp | 2 +- llvm/lib/MC/MCSectionCOFF.cpp | 12 +- llvm/lib/MC/MCSectionDXContainer.cpp | 15 + llvm/lib/MC/MCSectionELF.cpp | 15 +- llvm/lib/MC/MCSectionMachO.cpp | 17 +- llvm/lib/MC/MCSectionWasm.cpp | 5 +- llvm/lib/MC/MCSectionXCOFF.cpp | 10 +- llvm/lib/MC/MCStreamer.cpp | 117 +- llvm/lib/MC/MCSymbol.cpp | 1 - llvm/lib/MC/MCSymbolELF.cpp | 1 - llvm/lib/MC/MCTargetOptions.cpp | 9 +- llvm/lib/MC/MCTargetOptionsCommandFlags.cpp | 19 +- llvm/lib/MC/MCWasmStreamer.cpp | 18 +- llvm/lib/MC/MCWin64EH.cpp | 1320 +- llvm/lib/MC/MCWinCOFFStreamer.cpp | 43 +- llvm/lib/MC/MCWinEH.cpp | 9 +- llvm/lib/MC/MCXCOFFStreamer.cpp | 5 + llvm/lib/MC/MachObjectWriter.cpp | 25 +- llvm/lib/MC/SPIRVObjectWriter.cpp | 76 + llvm/lib/MC/SubtargetFeature.cpp | 4 - llvm/lib/MC/TargetRegistry.cpp | 4 +- llvm/lib/MC/WasmObjectWriter.cpp | 135 +- llvm/lib/MC/WinCOFFObjectWriter.cpp | 51 +- llvm/lib/MC/XCOFFObjectWriter.cpp | 480 +- llvm/lib/MCA/CustomBehaviour.cpp | 2 +- llvm/lib/MCA/HardwareUnits/LSUnit.cpp | 32 +- llvm/lib/MCA/IncrementalSourceMgr.cpp | 51 + llvm/lib/MCA/InstrBuilder.cpp | 96 +- llvm/lib/MCA/Instruction.cpp | 12 + llvm/lib/MCA/Pipeline.cpp | 15 +- llvm/lib/MCA/Stages/DispatchStage.cpp | 6 +- llvm/lib/MCA/Stages/EntryStage.cpp | 23 +- llvm/lib/MCA/Stages/ExecuteStage.cpp | 4 +- llvm/lib/MCA/Stages/InOrderIssueStage.cpp | 11 +- llvm/lib/MCA/Stages/Stage.cpp | 1 + llvm/lib/ObjCopy/Archive.cpp | 110 + llvm/lib/ObjCopy/Archive.h | 31 + llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp | 311 + llvm/lib/ObjCopy/COFF/COFFObject.cpp | 132 + llvm/lib/ObjCopy/COFF/COFFObject.h | 212 + llvm/lib/ObjCopy/COFF/COFFReader.cpp | 226 + llvm/lib/ObjCopy/COFF/COFFReader.h | 41 + llvm/lib/ObjCopy/COFF/COFFWriter.cpp | 466 + llvm/lib/ObjCopy/COFF/COFFWriter.h | 63 + llvm/lib/ObjCopy/CommonConfig.cpp | 50 + llvm/lib/ObjCopy/ConfigManager.cpp | 97 + llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp | 821 + llvm/lib/ObjCopy/ELF/ELFObject.cpp | 2795 +++ llvm/lib/ObjCopy/ELF/ELFObject.h | 1108 + llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp | 441 + llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.h | 97 + llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp | 550 + llvm/lib/ObjCopy/MachO/MachOObject.cpp | 214 + llvm/lib/ObjCopy/MachO/MachOObject.h | 374 + llvm/lib/ObjCopy/MachO/MachOReader.cpp | 374 + llvm/lib/ObjCopy/MachO/MachOReader.h | 62 + llvm/lib/ObjCopy/MachO/MachOWriter.cpp | 662 + llvm/lib/ObjCopy/MachO/MachOWriter.h | 76 + llvm/lib/ObjCopy/ObjCopy.cpp | 90 + llvm/lib/ObjCopy/XCOFF/XCOFFObjcopy.cpp | 45 + llvm/lib/ObjCopy/XCOFF/XCOFFObject.h | 48 + llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp | 101 + llvm/lib/ObjCopy/XCOFF/XCOFFReader.h | 35 + llvm/lib/ObjCopy/XCOFF/XCOFFWriter.cpp | 125 + llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h | 48 + llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp | 160 + llvm/lib/ObjCopy/wasm/WasmObject.cpp | 34 + llvm/lib/ObjCopy/wasm/WasmObject.h | 47 + llvm/lib/ObjCopy/wasm/WasmReader.cpp | 39 + llvm/lib/ObjCopy/wasm/WasmReader.h | 31 + llvm/lib/ObjCopy/wasm/WasmWriter.cpp | 79 + llvm/lib/ObjCopy/wasm/WasmWriter.h | 49 + llvm/lib/Object/Archive.cpp | 18 +- llvm/lib/Object/ArchiveWriter.cpp | 234 +- llvm/lib/Object/Binary.cpp | 7 +- llvm/lib/Object/COFFImportFile.cpp | 4 + llvm/lib/Object/COFFModuleDefinition.cpp | 2 - llvm/lib/Object/COFFObjectFile.cpp | 161 +- llvm/lib/Object/DXContainer.cpp | 111 + llvm/lib/Object/Decompressor.cpp | 2 +- llvm/lib/Object/ELF.cpp | 29 +- llvm/lib/Object/ELFObjectFile.cpp | 87 +- llvm/lib/Object/Error.cpp | 2 + llvm/lib/Object/IRObjectFile.cpp | 16 +- llvm/lib/Object/IRSymtab.cpp | 1 - llvm/lib/Object/MachOObjectFile.cpp | 229 +- llvm/lib/Object/MachOUniversal.cpp | 6 +- llvm/lib/Object/MachOUniversalWriter.cpp | 12 +- llvm/lib/Object/ModuleSymbolTable.cpp | 3 - llvm/lib/Object/Object.cpp | 2 + llvm/lib/Object/ObjectFile.cpp | 15 +- llvm/lib/Object/OffloadBinary.cpp | 164 + llvm/lib/Object/RecordStreamer.h | 8 +- llvm/lib/Object/RelocationResolver.cpp | 45 + llvm/lib/Object/SymbolicFile.cpp | 9 +- llvm/lib/Object/TapiFile.cpp | 6 +- llvm/lib/Object/TapiUniversal.cpp | 5 +- llvm/lib/Object/WasmObjectFile.cpp | 123 +- llvm/lib/Object/WindowsResource.cpp | 2 - llvm/lib/Object/XCOFFObjectFile.cpp | 27 +- llvm/lib/ObjectYAML/COFFEmitter.cpp | 7 +- llvm/lib/ObjectYAML/COFFYAML.cpp | 3 + llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp | 1 + llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp | 5 +- llvm/lib/ObjectYAML/DWARFEmitter.cpp | 6 +- llvm/lib/ObjectYAML/DWARFYAML.cpp | 2 +- llvm/lib/ObjectYAML/DXContainerEmitter.cpp | 190 + llvm/lib/ObjectYAML/DXContainerYAML.cpp | 61 + llvm/lib/ObjectYAML/ELFEmitter.cpp | 43 +- llvm/lib/ObjectYAML/ELFYAML.cpp | 48 +- llvm/lib/ObjectYAML/MachOEmitter.cpp | 22 +- llvm/lib/ObjectYAML/MachOYAML.cpp | 9 +- llvm/lib/ObjectYAML/MinidumpEmitter.cpp | 2 +- llvm/lib/ObjectYAML/ObjectYAML.cpp | 7 + llvm/lib/ObjectYAML/OffloadEmitter.cpp | 68 + llvm/lib/ObjectYAML/OffloadYAML.cpp | 78 + llvm/lib/ObjectYAML/WasmEmitter.cpp | 62 +- llvm/lib/ObjectYAML/WasmYAML.cpp | 69 +- llvm/lib/ObjectYAML/XCOFFEmitter.cpp | 162 +- llvm/lib/ObjectYAML/yaml2obj.cpp | 4 + llvm/lib/Option/ArgList.cpp | 7 + llvm/lib/Passes/PassBuilder.cpp | 41 +- llvm/lib/Passes/PassBuilderPipelines.cpp | 229 +- llvm/lib/Passes/PassRegistry.def | 54 +- llvm/lib/Passes/StandardInstrumentations.cpp | 84 +- llvm/lib/ProfileData/Coverage/CoverageMapping.cpp | 12 +- .../ProfileData/Coverage/CoverageMappingReader.cpp | 4 +- .../ProfileData/Coverage/CoverageMappingWriter.cpp | 8 +- llvm/lib/ProfileData/GCOV.cpp | 8 +- llvm/lib/ProfileData/InstrProf.cpp | 81 +- llvm/lib/ProfileData/InstrProfCorrelator.cpp | 11 +- llvm/lib/ProfileData/InstrProfReader.cpp | 146 +- llvm/lib/ProfileData/InstrProfWriter.cpp | 143 +- llvm/lib/ProfileData/MemProf.cpp | 110 + llvm/lib/ProfileData/ProfileSummaryBuilder.cpp | 32 +- llvm/lib/ProfileData/RawMemProfReader.cpp | 543 +- llvm/lib/ProfileData/SampleProf.cpp | 32 +- llvm/lib/ProfileData/SampleProfReader.cpp | 31 +- llvm/lib/ProfileData/SampleProfWriter.cpp | 30 +- llvm/lib/Remarks/BitstreamRemarkSerializer.cpp | 1 + llvm/lib/Remarks/RemarkLinker.cpp | 7 +- llvm/lib/Remarks/RemarkParser.cpp | 2 +- llvm/lib/Remarks/YAMLRemarkSerializer.cpp | 10 +- llvm/lib/Support/AArch64TargetParser.cpp | 64 +- llvm/lib/Support/APFixedPoint.cpp | 20 +- llvm/lib/Support/APFloat.cpp | 9 +- llvm/lib/Support/APInt.cpp | 126 +- llvm/lib/Support/ARMAttributeParser.cpp | 2 +- llvm/lib/Support/ARMWinEH.cpp | 21 +- llvm/lib/Support/AddressRanges.cpp | 59 + llvm/lib/Support/BLAKE3/LICENSE | 330 + llvm/lib/Support/BLAKE3/README.md | 296 + llvm/lib/Support/BLAKE3/blake3.c | 627 + llvm/lib/Support/BLAKE3/blake3_avx2.c | 326 + llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S | 1826 ++ .../BLAKE3/blake3_avx2_x86-64_windows_gnu.S | 1817 ++ .../BLAKE3/blake3_avx2_x86-64_windows_msvc.asm | 1828 ++ llvm/lib/Support/BLAKE3/blake3_avx512.c | 1207 ++ .../lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S | 2601 +++ .../BLAKE3/blake3_avx512_x86-64_windows_gnu.S | 2615 +++ .../BLAKE3/blake3_avx512_x86-64_windows_msvc.asm | 2634 +++ llvm/lib/Support/BLAKE3/blake3_dispatch.c | 277 + llvm/lib/Support/BLAKE3/blake3_impl.h | 312 + llvm/lib/Support/BLAKE3/blake3_neon.c | 356 + llvm/lib/Support/BLAKE3/blake3_portable.c | 160 + llvm/lib/Support/BLAKE3/blake3_sse2.c | 566 + llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S | 2307 ++ .../BLAKE3/blake3_sse2_x86-64_windows_gnu.S | 2332 +++ .../BLAKE3/blake3_sse2_x86-64_windows_msvc.asm | 2350 +++ llvm/lib/Support/BLAKE3/blake3_sse41.c | 560 + llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S | 2044 ++ .../BLAKE3/blake3_sse41_x86-64_windows_gnu.S | 2069 ++ .../BLAKE3/blake3_sse41_x86-64_windows_msvc.asm | 2089 ++ llvm/lib/Support/BinaryStreamWriter.cpp | 10 +- llvm/lib/Support/CSKYAttributeParser.cpp | 155 + llvm/lib/Support/CSKYAttributes.cpp | 33 + llvm/lib/Support/CSKYTargetParser.cpp | 181 + llvm/lib/Support/CodeGenCoverage.cpp | 2 +- llvm/lib/Support/CommandLine.cpp | 94 +- llvm/lib/Support/Compression.cpp | 12 +- llvm/lib/Support/ConvertUTFWrapper.cpp | 102 +- llvm/lib/Support/CrashRecoveryContext.cpp | 20 +- llvm/lib/Support/Debug.cpp | 2 +- llvm/lib/Support/DebugCounter.cpp | 3 +- llvm/lib/Support/DeltaAlgorithm.cpp | 3 +- llvm/lib/Support/DynamicLibrary.cpp | 7 +- llvm/lib/Support/Errno.cpp | 3 +- llvm/lib/Support/ErrorHandling.cpp | 5 +- llvm/lib/Support/FileUtilities.cpp | 66 + llvm/lib/Support/FoldingSet.cpp | 48 - llvm/lib/Support/FormatVariadic.cpp | 2 +- llvm/lib/Support/Host.cpp | 115 +- llvm/lib/Support/ItaniumManglingCanonicalizer.cpp | 14 - llvm/lib/Support/JSON.cpp | 20 +- llvm/lib/Support/KnownBits.cpp | 12 +- llvm/lib/Support/LineIterator.cpp | 2 +- llvm/lib/Support/MD5.cpp | 14 +- llvm/lib/Support/MathExtras.cpp | 2 +- llvm/lib/Support/Memory.cpp | 1 - llvm/lib/Support/MemoryBuffer.cpp | 13 +- llvm/lib/Support/NativeFormatting.cpp | 10 +- llvm/lib/Support/Parallel.cpp | 10 +- llvm/lib/Support/Path.cpp | 16 +- llvm/lib/Support/Process.cpp | 2 +- llvm/lib/Support/Program.cpp | 1 - llvm/lib/Support/RISCVISAInfo.cpp | 106 +- llvm/lib/Support/SHA1.cpp | 21 +- llvm/lib/Support/SHA256.cpp | 21 +- llvm/lib/Support/ScopedPrinter.cpp | 9 +- llvm/lib/Support/Signals.cpp | 18 +- llvm/lib/Support/Signposts.cpp | 5 +- llvm/lib/Support/SourceMgr.cpp | 16 +- llvm/lib/Support/SpecialCaseList.cpp | 2 +- llvm/lib/Support/Statistic.cpp | 6 +- llvm/lib/Support/StringMap.cpp | 76 +- llvm/lib/Support/StringRef.cpp | 7 + llvm/lib/Support/TargetParser.cpp | 27 +- llvm/lib/Support/ThreadPool.cpp | 171 +- llvm/lib/Support/TrigramIndex.cpp | 1 + llvm/lib/Support/Triple.cpp | 194 +- llvm/lib/Support/TypeSize.cpp | 5 +- llvm/lib/Support/Unicode.cpp | 452 +- llvm/lib/Support/UnicodeNameToCodepoint.cpp | 551 + .../Support/UnicodeNameToCodepointGenerated.cpp | 20911 +++++++++++++++++++ llvm/lib/Support/Unix/COM.inc | 2 +- llvm/lib/Support/Unix/Memory.inc | 1 + llvm/lib/Support/Unix/Path.inc | 24 +- llvm/lib/Support/Unix/Process.inc | 39 +- llvm/lib/Support/Unix/Signals.inc | 12 +- llvm/lib/Support/Unix/ThreadLocal.inc | 12 - llvm/lib/Support/Unix/Threading.inc | 37 +- llvm/lib/Support/VirtualFileSystem.cpp | 592 +- llvm/lib/Support/Windows/Path.inc | 2 +- llvm/lib/Support/Windows/Process.inc | 10 +- llvm/lib/Support/Windows/Program.inc | 3 +- llvm/lib/Support/Windows/Signals.inc | 54 +- llvm/lib/Support/Windows/Threading.inc | 8 +- llvm/lib/Support/WithColor.cpp | 20 +- llvm/lib/Support/YAMLParser.cpp | 71 +- llvm/lib/Support/Z3Solver.cpp | 8 +- llvm/lib/Support/raw_ostream.cpp | 4 +- llvm/lib/Support/regcomp.c | 26 +- llvm/lib/Support/regengine.inc | 39 +- llvm/lib/Support/xxhash.cpp | 1 - llvm/lib/TableGen/Error.cpp | 4 +- llvm/lib/TableGen/Parser.cpp | 39 + llvm/lib/TableGen/Record.cpp | 493 +- llvm/lib/TableGen/TGLexer.cpp | 7 +- llvm/lib/TableGen/TGLexer.h | 5 +- llvm/lib/TableGen/TGParser.cpp | 247 +- llvm/lib/TableGen/TGParser.h | 2 +- llvm/lib/Target/AArch64/AArch64.h | 3 + llvm/lib/Target/AArch64/AArch64.td | 113 +- llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp | 1 + llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 49 +- .../lib/Target/AArch64/AArch64CallingConvention.td | 6 +- llvm/lib/Target/AArch64/AArch64CollectLOH.cpp | 6 +- llvm/lib/Target/AArch64/AArch64Combine.td | 4 +- .../Target/AArch64/AArch64ConditionalCompares.cpp | 4 +- .../Target/AArch64/AArch64ExpandPseudoInsts.cpp | 34 + llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp | 2 +- llvm/lib/Target/AArch64/AArch64FastISel.cpp | 11 +- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 1098 +- llvm/lib/Target/AArch64/AArch64FrameLowering.h | 19 +- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 294 +- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4083 ++-- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 83 +- llvm/lib/Target/AArch64/AArch64InstrAtomics.td | 37 +- llvm/lib/Target/AArch64/AArch64InstrFormats.td | 257 +- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 608 +- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 56 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 876 +- .../Target/AArch64/AArch64LoadStoreOptimizer.cpp | 201 +- llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp | 236 +- .../Target/AArch64/AArch64MachineFunctionInfo.cpp | 49 +- .../Target/AArch64/AArch64MachineFunctionInfo.h | 30 +- .../lib/Target/AArch64/AArch64MachineScheduler.cpp | 82 + llvm/lib/Target/AArch64/AArch64MachineScheduler.h | 33 + llvm/lib/Target/AArch64/AArch64MacroFusion.cpp | 15 +- llvm/lib/Target/AArch64/AArch64PerfectShuffle.h | 13169 ++++++------ llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp | 114 +- llvm/lib/Target/AArch64/AArch64RegisterInfo.h | 5 +- llvm/lib/Target/AArch64/AArch64RegisterInfo.td | 51 +- llvm/lib/Target/AArch64/AArch64SLSHardening.cpp | 4 +- llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td | 73 +- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 583 +- llvm/lib/Target/AArch64/AArch64SchedA55.td | 127 +- llvm/lib/Target/AArch64/AArch64SchedA64FX.td | 12 +- llvm/lib/Target/AArch64/AArch64SchedAmpere1.td | 1136 + llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td | 25 + llvm/lib/Target/AArch64/AArch64SchedPredExynos.td | 5 +- llvm/lib/Target/AArch64/AArch64SchedPredicates.td | 149 +- llvm/lib/Target/AArch64/AArch64SchedTSV110.td | 3 +- .../lib/Target/AArch64/AArch64SelectionDAGInfo.cpp | 34 +- llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h | 2 +- llvm/lib/Target/AArch64/AArch64StackTagging.cpp | 203 +- .../Target/AArch64/AArch64StackTaggingPreRA.cpp | 1 - llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 37 +- llvm/lib/Target/AArch64/AArch64Subtarget.h | 365 +- llvm/lib/Target/AArch64/AArch64SystemOperands.td | 12 +- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 36 +- llvm/lib/Target/AArch64/AArch64TargetMachine.h | 2 +- .../Target/AArch64/AArch64TargetTransformInfo.cpp | 383 +- .../Target/AArch64/AArch64TargetTransformInfo.h | 24 +- .../Target/AArch64/AsmParser/AArch64AsmParser.cpp | 134 +- .../AArch64/Disassembler/AArch64Disassembler.cpp | 580 +- .../AArch64/Disassembler/AArch64Disassembler.h | 8 +- .../Disassembler/AArch64ExternalSymbolizer.cpp | 6 +- .../Disassembler/AArch64ExternalSymbolizer.h | 3 +- .../Target/AArch64/GISel/AArch64CallLowering.cpp | 38 +- .../AArch64/GISel/AArch64InstructionSelector.cpp | 590 +- .../Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 12 +- .../AArch64/GISel/AArch64PostLegalizerCombiner.cpp | 6 +- .../AArch64/GISel/AArch64PostLegalizerLowering.cpp | 2 +- .../AArch64/GISel/AArch64PostSelectOptimize.cpp | 2 +- .../AArch64/GISel/AArch64PreLegalizerCombiner.cpp | 8 +- .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 9 +- .../Target/AArch64/GISel/AArch64RegisterBankInfo.h | 2 +- .../AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 26 +- .../AArch64/MCTargetDesc/AArch64ELFStreamer.cpp | 1 + .../AArch64/MCTargetDesc/AArch64InstPrinter.cpp | 7 +- .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 2 +- .../Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp | 1 + .../AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp | 38 +- .../AArch64/MCTargetDesc/AArch64MCTargetDesc.h | 11 +- .../AArch64/MCTargetDesc/AArch64TargetStreamer.cpp | 4 +- .../MCTargetDesc/AArch64WinCOFFObjectWriter.cpp | 1 + .../MCTargetDesc/AArch64WinCOFFStreamer.cpp | 31 +- llvm/lib/Target/AArch64/SMEInstrFormats.td | 538 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 378 +- llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp | 10 +- llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h | 3 +- llvm/lib/Target/AMDGPU/AMDGPU.h | 13 +- llvm/lib/Target/AMDGPU/AMDGPU.td | 280 +- .../Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp | 144 +- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 95 +- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 3 + llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | 6 + llvm/lib/Target/AMDGPU/AMDGPUAttributes.def | 31 + llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 266 +- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 50 +- llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td | 66 +- llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp | 3 +- llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp | 2 +- .../Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp | 64 - llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 29 +- llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp | 11 + llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h | 4 +- .../Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 91 +- llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h | 2 +- llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 439 + llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h | 22 + llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 253 +- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 17 +- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 401 +- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 11 +- llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp | 457 + .../Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 78 +- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 6 +- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 770 +- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 31 +- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 158 +- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 824 +- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 17 + llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 11 +- llvm/lib/Target/AMDGPU/AMDGPULibFunc.h | 4 +- llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp | 4 - .../Target/AMDGPU/AMDGPULowerKernelArguments.cpp | 7 +- .../Target/AMDGPU/AMDGPULowerKernelAttributes.cpp | 38 +- .../lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp | 27 +- llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 15 +- llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h | 2 +- llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp | 9 +- llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h | 2 +- .../Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | 50 +- llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h | 20 +- llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp | 1 + .../AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp | 3 +- llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp | 18 +- .../Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp | 2 +- .../Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp | 12 +- llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 215 +- .../Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp | 64 +- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 663 +- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h | 7 +- llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp | 140 + .../AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp | 4 +- .../Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp | 26 +- .../Target/AMDGPU/AMDGPUResourceUsageAnalysis.h | 12 +- .../Target/AMDGPU/AMDGPURewriteOutArguments.cpp | 152 +- llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td | 168 +- llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp | 166 + llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 158 +- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 42 +- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 88 +- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 6 +- .../Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 54 +- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 23 +- llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp | 1638 -- llvm/lib/Target/AMDGPU/AMDKernelCodeT.h | 2 +- .../Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 1146 +- llvm/lib/Target/AMDGPU/BUFInstructions.td | 891 +- llvm/lib/Target/AMDGPU/DSInstructions.td | 546 +- .../AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 470 +- .../AMDGPU/Disassembler/AMDGPUDisassembler.h | 87 +- llvm/lib/Target/AMDGPU/EXPInstructions.td | 79 +- llvm/lib/Target/AMDGPU/FLATInstructions.td | 1038 +- llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 18 +- llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 901 +- llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 25 + llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp | 29 +- llvm/lib/Target/AMDGPU/GCNProcessors.td | 28 + llvm/lib/Target/AMDGPU/GCNRegPressure.h | 2 +- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 356 +- llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 36 +- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 349 +- llvm/lib/Target/AMDGPU/LDSDIRInstructions.td | 116 + .../Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp | 6 +- llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h | 4 +- .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp | 29 +- .../AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp | 5 +- .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 257 +- .../Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h | 17 +- .../AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h | 56 +- .../AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp | 2 + .../AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h | 2 - .../AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 38 +- .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 4 +- .../Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h | 1 - .../Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp | 165 +- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 618 +- llvm/lib/Target/AMDGPU/R600.h | 2 +- llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp | 4 +- llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp | 3 +- .../lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp | 1 + llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp | 5 +- llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp | 2 + llvm/lib/Target/AMDGPU/R600FrameLowering.cpp | 1 + llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 183 +- llvm/lib/Target/AMDGPU/R600InstrInfo.cpp | 19 +- llvm/lib/Target/AMDGPU/R600InstrInfo.h | 3 - .../Target/AMDGPU/R600MachineCFGStructurizer.cpp | 1640 ++ llvm/lib/Target/AMDGPU/R600Packetizer.cpp | 2 +- llvm/lib/Target/AMDGPU/R600Subtarget.cpp | 2 - llvm/lib/Target/AMDGPU/R600Subtarget.h | 16 +- llvm/lib/Target/AMDGPU/R600TargetMachine.cpp | 4 +- llvm/lib/Target/AMDGPU/R600TargetMachine.h | 4 +- llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp | 58 +- llvm/lib/Target/AMDGPU/SIDefines.h | 196 +- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 189 +- llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 2 +- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 230 +- llvm/lib/Target/AMDGPU/SIFrameLowering.h | 3 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 1927 +- llvm/lib/Target/AMDGPU/SIISelLowering.h | 24 +- llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp | 77 +- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 354 +- llvm/lib/Target/AMDGPU/SIInstrFormats.td | 83 +- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 667 +- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 68 +- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 625 +- llvm/lib/Target/AMDGPU/SIInstructions.td | 244 +- llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp | 12 +- llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 842 +- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 42 +- llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp | 33 +- llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 16 +- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 126 +- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 179 +- llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp | 58 +- llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 484 +- llvm/lib/Target/AMDGPU/SIModeRegister.cpp | 17 +- llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp | 251 +- .../Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 110 +- llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp | 125 +- llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 12 +- llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp | 21 +- llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 28 +- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 603 +- llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 28 +- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 127 +- llvm/lib/Target/AMDGPU/SISchedule.td | 65 + llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp | 435 +- llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 42 +- llvm/lib/Target/AMDGPU/SMInstructions.td | 410 +- llvm/lib/Target/AMDGPU/SOPInstructions.td | 425 +- llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp | 314 +- llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h | 56 +- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 686 +- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 133 +- llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp | 144 - llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h | 38 - llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp | 220 + llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h | 51 + llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp | 5 + llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h | 4 + llvm/lib/Target/AMDGPU/VIInstrFormats.td | 2 +- llvm/lib/Target/AMDGPU/VINTERPInstructions.td | 180 + llvm/lib/Target/AMDGPU/VOP1Instructions.td | 380 +- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 626 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 453 +- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 671 +- llvm/lib/Target/AMDGPU/VOPCInstructions.td | 873 +- llvm/lib/Target/AMDGPU/VOPDInstructions.td | 159 + llvm/lib/Target/AMDGPU/VOPInstructions.td | 658 +- llvm/lib/Target/ARC/ARCMachineFunctionInfo.cpp | 7 + llvm/lib/Target/ARC/ARCMachineFunctionInfo.h | 6 +- llvm/lib/Target/ARC/ARCOptAddrMode.cpp | 8 +- llvm/lib/Target/ARC/ARCTargetMachine.cpp | 4 +- llvm/lib/Target/ARC/ARCTargetMachine.h | 2 +- .../Target/ARC/Disassembler/ARCDisassembler.cpp | 78 +- llvm/lib/Target/ARM/A15SDOptimizer.cpp | 3 +- llvm/lib/Target/ARM/ARM.h | 2 + llvm/lib/Target/ARM/ARM.td | 163 +- llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 143 +- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 227 +- llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 29 +- llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 26 +- llvm/lib/Target/ARM/ARMBaseRegisterInfo.h | 39 +- llvm/lib/Target/ARM/ARMBlockPlacement.cpp | 3 +- llvm/lib/Target/ARM/ARMCallingConv.td | 21 +- llvm/lib/Target/ARM/ARMConstantIslandPass.cpp | 2 +- llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 17 +- llvm/lib/Target/ARM/ARMFastISel.cpp | 45 +- .../Target/ARM/ARMFixCortexA57AES1742098Pass.cpp | 432 + llvm/lib/Target/ARM/ARMFrameLowering.cpp | 846 +- llvm/lib/Target/ARM/ARMFrameLowering.h | 1 + llvm/lib/Target/ARM/ARMHazardRecognizer.cpp | 2 + llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 35 +- llvm/lib/Target/ARM/ARMISelLowering.cpp | 413 +- llvm/lib/Target/ARM/ARMISelLowering.h | 12 +- llvm/lib/Target/ARM/ARMInstrFormats.td | 26 +- llvm/lib/Target/ARM/ARMInstrInfo.td | 27 +- llvm/lib/Target/ARM/ARMInstrMVE.td | 89 +- llvm/lib/Target/ARM/ARMInstrNEON.td | 3 + llvm/lib/Target/ARM/ARMInstrThumb2.td | 7 +- llvm/lib/Target/ARM/ARMInstrVFP.td | 96 +- llvm/lib/Target/ARM/ARMInstructionSelector.cpp | 16 +- llvm/lib/Target/ARM/ARMLegalizerInfo.cpp | 1 + llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 12 +- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 4 +- llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp | 7 + llvm/lib/Target/ARM/ARMMachineFunctionInfo.h | 8 + llvm/lib/Target/ARM/ARMParallelDSP.cpp | 5 + llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp | 7 +- llvm/lib/Target/ARM/ARMRegisterBankInfo.h | 2 +- llvm/lib/Target/ARM/ARMRegisterInfo.cpp | 2 +- llvm/lib/Target/ARM/ARMSLSHardening.cpp | 4 +- llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp | 9 +- llvm/lib/Target/ARM/ARMSelectionDAGInfo.h | 1 + llvm/lib/Target/ARM/ARMSubtarget.cpp | 43 +- llvm/lib/Target/ARM/ARMSubtarget.h | 476 +- llvm/lib/Target/ARM/ARMTargetMachine.cpp | 28 +- llvm/lib/Target/ARM/ARMTargetMachine.h | 2 +- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 50 +- llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 3 +- llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 317 +- .../Target/ARM/Disassembler/ARMDisassembler.cpp | 1287 +- .../lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp | 132 +- llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp | 4 +- .../Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 2 - .../Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 12 - llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h | 4 +- .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 6 +- .../Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp | 29 +- .../Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp | 227 + llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp | 57 +- llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp | 7 +- .../Target/ARM/MVETPAndVPTOptimisationsPass.cpp | 14 +- llvm/lib/Target/ARM/MVEVPTBlockPass.cpp | 3 +- llvm/lib/Target/ARM/Thumb1FrameLowering.cpp | 758 +- llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp | 6 +- llvm/lib/Target/ARM/Thumb2InstrInfo.cpp | 6 +- llvm/lib/Target/ARM/Thumb2SizeReduction.cpp | 32 +- llvm/lib/Target/ARM/ThumbRegisterInfo.cpp | 50 +- llvm/lib/Target/AVR/AVR.h | 4 +- llvm/lib/Target/AVR/AVRAsmPrinter.cpp | 43 + llvm/lib/Target/AVR/AVRCallingConv.td | 4 + llvm/lib/Target/AVR/AVRDevices.td | 165 +- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | 446 +- llvm/lib/Target/AVR/AVRFrameLowering.cpp | 123 +- llvm/lib/Target/AVR/AVRISelLowering.cpp | 160 +- llvm/lib/Target/AVR/AVRISelLowering.h | 3 + llvm/lib/Target/AVR/AVRInstrFormats.td | 4 +- llvm/lib/Target/AVR/AVRInstrInfo.cpp | 23 +- llvm/lib/Target/AVR/AVRInstrInfo.td | 97 +- llvm/lib/Target/AVR/AVRMachineFunctionInfo.h | 7 + llvm/lib/Target/AVR/AVRRegisterInfo.cpp | 34 +- llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp | 144 - llvm/lib/Target/AVR/AVRSubtarget.h | 12 +- llvm/lib/Target/AVR/AVRTargetMachine.cpp | 4 +- llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp | 17 + .../Target/AVR/Disassembler/AVRDisassembler.cpp | 200 +- .../Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp | 2 +- .../lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp | 1 + llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h | 3 + .../Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp | 1 - llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h | 2 +- llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h | 1 - llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp | 1 + llvm/lib/Target/BPF/BPF.h | 2 + llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp | 50 +- llvm/lib/Target/BPF/BPFAdjustOpt.cpp | 10 +- llvm/lib/Target/BPF/BPFCORE.h | 2 + llvm/lib/Target/BPF/BPFISelLowering.cpp | 3 +- llvm/lib/Target/BPF/BPFInstrFormats.td | 1 + llvm/lib/Target/BPF/BPFInstrInfo.cpp | 3 +- llvm/lib/Target/BPF/BPFInstrInfo.td | 2 + llvm/lib/Target/BPF/BPFMIChecking.cpp | 1 + llvm/lib/Target/BPF/BPFMIPeephole.cpp | 7 +- llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp | 24 +- llvm/lib/Target/BPF/BPFPreserveDIType.cpp | 1 + llvm/lib/Target/BPF/BPFTargetMachine.cpp | 4 +- llvm/lib/Target/BPF/BPFTargetMachine.h | 2 +- llvm/lib/Target/BPF/BPFTargetTransformInfo.h | 9 + llvm/lib/Target/BPF/BTF.def | 1 + llvm/lib/Target/BPF/BTF.h | 10 + llvm/lib/Target/BPF/BTFDebug.cpp | 197 +- llvm/lib/Target/BPF/BTFDebug.h | 26 +- .../Target/BPF/Disassembler/BPFDisassembler.cpp | 16 +- llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp | 5 + .../lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp | 1 + llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h | 2 - .../Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp | 6 +- llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h | 3 +- llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp | 271 +- llvm/lib/Target/CSKY/CSKY.h | 2 + llvm/lib/Target/CSKY/CSKY.td | 523 + llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp | 105 +- llvm/lib/Target/CSKY/CSKYAsmPrinter.h | 14 +- llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp | 6 +- llvm/lib/Target/CSKY/CSKYFrameLowering.cpp | 23 +- llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp | 219 + llvm/lib/Target/CSKY/CSKYISelLowering.cpp | 180 +- llvm/lib/Target/CSKY/CSKYISelLowering.h | 6 + llvm/lib/Target/CSKY/CSKYInstrAlias.td | 38 + llvm/lib/Target/CSKY/CSKYInstrFormats.td | 2 +- llvm/lib/Target/CSKY/CSKYInstrInfo.cpp | 9 +- llvm/lib/Target/CSKY/CSKYInstrInfo.h | 2 +- llvm/lib/Target/CSKY/CSKYInstrInfo.td | 32 +- llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td | 131 + llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h | 11 +- llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp | 20 +- llvm/lib/Target/CSKY/CSKYRegisterInfo.td | 14 +- llvm/lib/Target/CSKY/CSKYSubtarget.cpp | 33 +- llvm/lib/Target/CSKY/CSKYSubtarget.h | 102 +- llvm/lib/Target/CSKY/CSKYTargetMachine.cpp | 12 +- llvm/lib/Target/CSKY/CSKYTargetObjectFile.cpp | 25 + llvm/lib/Target/CSKY/CSKYTargetObjectFile.h | 24 + .../Target/CSKY/Disassembler/CSKYDisassembler.cpp | 553 + .../Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp | 184 +- llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h | 13 + .../CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp | 110 +- .../Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp | 335 + .../lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.h | 148 + .../Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp | 68 +- .../Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp | 161 +- .../Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h | 12 + llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp | 2 + .../Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp | 88 + .../Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h | 4 +- .../CSKY/MCTargetDesc/CSKYTargetStreamer.cpp | 143 + .../Target/CSKY/MCTargetDesc/CSKYTargetStreamer.h | 110 + llvm/lib/Target/DirectX/DXIL.td | 144 + llvm/lib/Target/DirectX/DXILConstants.h | 25 + llvm/lib/Target/DirectX/DXILOpLowering.cpp | 265 + llvm/lib/Target/DirectX/DXILPointerType.cpp | 66 + llvm/lib/Target/DirectX/DXILPointerType.h | 52 + llvm/lib/Target/DirectX/DXILPrepare.cpp | 184 + llvm/lib/Target/DirectX/DXILStubs.td | 18 + llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp | 121 + .../DirectX/DXILWriter/DXILBitcodeWriter.cpp | 2963 +++ .../Target/DirectX/DXILWriter/DXILBitcodeWriter.h | 82 + .../DirectX/DXILWriter/DXILValueEnumerator.cpp | 1147 + .../DirectX/DXILWriter/DXILValueEnumerator.h | 308 + .../Target/DirectX/DXILWriter/DXILWriterPass.cpp | 100 + .../lib/Target/DirectX/DXILWriter/DXILWriterPass.h | 37 + llvm/lib/Target/DirectX/DirectX.h | 43 + llvm/lib/Target/DirectX/DirectX.td | 54 + llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp | 57 + llvm/lib/Target/DirectX/DirectXFrameLowering.h | 35 + llvm/lib/Target/DirectX/DirectXInstrInfo.cpp | 20 + llvm/lib/Target/DirectX/DirectXInstrInfo.h | 30 + llvm/lib/Target/DirectX/DirectXRegisterInfo.cpp | 24 + llvm/lib/Target/DirectX/DirectXRegisterInfo.h | 28 + llvm/lib/Target/DirectX/DirectXSubtarget.cpp | 29 + llvm/lib/Target/DirectX/DirectXSubtarget.h | 56 + llvm/lib/Target/DirectX/DirectXTargetLowering.h | 31 + llvm/lib/Target/DirectX/DirectXTargetMachine.cpp | 144 + llvm/lib/Target/DirectX/DirectXTargetMachine.h | 51 + .../Target/DirectX/DirectXTargetTransformInfo.h | 39 + .../MCTargetDesc/DirectXContainerObjectWriter.cpp | 28 + .../MCTargetDesc/DirectXContainerObjectWriter.h | 24 + .../DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp | 152 + .../DirectX/MCTargetDesc/DirectXMCTargetDesc.h | 29 + llvm/lib/Target/DirectX/PointerTypeAnalysis.cpp | 119 + llvm/lib/Target/DirectX/PointerTypeAnalysis.h | 43 + .../DirectX/TargetInfo/DirectXTargetInfo.cpp | 30 + .../Target/DirectX/TargetInfo/DirectXTargetInfo.h | 18 + .../Target/Hexagon/AsmParser/HexagonAsmParser.cpp | 8 +- llvm/lib/Target/Hexagon/BitTracker.cpp | 3 +- .../Hexagon/Disassembler/HexagonDisassembler.cpp | 160 +- llvm/lib/Target/Hexagon/HexagonArch.h | 31 - llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp | 8 +- llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp | 84 +- .../lib/Target/Hexagon/HexagonBranchRelaxation.cpp | 5 +- llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp | 7 +- llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp | 12 +- .../lib/Target/Hexagon/HexagonConstPropagation.cpp | 22 +- llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp | 18 +- llvm/lib/Target/Hexagon/HexagonDepArch.h | 88 +- llvm/lib/Target/Hexagon/HexagonDepDecoders.inc | 44 +- llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp | 4 +- llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp | 2 +- llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp | 61 +- llvm/lib/Target/Hexagon/HexagonGenInsert.cpp | 51 +- llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp | 10 +- .../lib/Target/Hexagon/HexagonHazardRecognizer.cpp | 2 +- llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 11 +- llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp | 2 +- llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 101 +- llvm/lib/Target/Hexagon/HexagonISelLowering.h | 2 +- llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp | 145 +- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 65 +- .../Target/Hexagon/HexagonLoopIdiomRecognition.cpp | 13 +- .../Target/Hexagon/HexagonMachineFunctionInfo.cpp | 6 + .../Target/Hexagon/HexagonMachineFunctionInfo.h | 4 + llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp | 3 +- llvm/lib/Target/Hexagon/HexagonPatterns.td | 6 + llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 19 +- llvm/lib/Target/Hexagon/HexagonPeephole.cpp | 28 +- llvm/lib/Target/Hexagon/HexagonPseudo.td | 22 + llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp | 2 +- llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp | 2 +- llvm/lib/Target/Hexagon/HexagonSubtarget.cpp | 58 +- llvm/lib/Target/Hexagon/HexagonSubtarget.h | 2 +- llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp | 75 +- llvm/lib/Target/Hexagon/HexagonTargetMachine.h | 2 +- .../lib/Target/Hexagon/HexagonTargetObjectFile.cpp | 7 +- .../Target/Hexagon/HexagonTargetTransformInfo.cpp | 3 +- .../Target/Hexagon/HexagonTargetTransformInfo.h | 10 +- llvm/lib/Target/Hexagon/HexagonVExtract.cpp | 12 +- llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 25 +- llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp | 4 +- .../Hexagon/HexagonVectorLoopCarriedReuse.cpp | 6 +- .../Target/Hexagon/HexagonVectorLoopCarriedReuse.h | 2 +- llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp | 6 +- .../Hexagon/MCTargetDesc/HexagonAsmBackend.cpp | 1 + .../Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp | 1 - .../Hexagon/MCTargetDesc/HexagonMCChecker.cpp | 5 +- .../Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp | 1 - .../Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp | 4 +- .../Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp | 1 + .../Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp | 25 +- .../Hexagon/MCTargetDesc/HexagonMCTargetDesc.h | 1 - .../Hexagon/MCTargetDesc/HexagonShuffler.cpp | 4 +- llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp | 4 +- .../Lanai/Disassembler/LanaiDisassembler.cpp | 45 +- llvm/lib/Target/Lanai/LanaiISelLowering.cpp | 6 +- llvm/lib/Target/Lanai/LanaiInstrInfo.cpp | 4 +- llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp | 7 + llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h | 4 + llvm/lib/Target/Lanai/LanaiTargetMachine.cpp | 4 +- llvm/lib/Target/Lanai/LanaiTargetMachine.h | 2 +- .../Target/Lanai/MCTargetDesc/LanaiInstPrinter.h | 3 +- .../Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp | 1 - .../Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h | 1 - .../LoongArch/AsmParser/LoongArchAsmParser.cpp | 556 + .../Disassembler/LoongArchDisassembler.cpp | 145 + llvm/lib/Target/LoongArch/LoongArch.h | 38 + llvm/lib/Target/LoongArch/LoongArch.td | 139 + llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp | 48 + llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h | 46 + llvm/lib/Target/LoongArch/LoongArchCallingConv.td | 23 + .../Target/LoongArch/LoongArchFloat32InstrInfo.td | 177 + .../Target/LoongArch/LoongArchFloat64InstrInfo.td | 188 + .../Target/LoongArch/LoongArchFloatInstrFormats.td | 241 + .../Target/LoongArch/LoongArchFrameLowering.cpp | 55 + llvm/lib/Target/LoongArch/LoongArchFrameLowering.h | 38 + .../lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp | 132 + llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h | 55 + .../lib/Target/LoongArch/LoongArchISelLowering.cpp | 531 + llvm/lib/Target/LoongArch/LoongArchISelLowering.h | 95 + llvm/lib/Target/LoongArch/LoongArchInstrFormats.td | 404 + llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp | 49 + llvm/lib/Target/LoongArch/LoongArchInstrInfo.h | 36 + llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 730 + llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp | 66 + .../LoongArch/LoongArchMachineFunctionInfo.h | 57 + .../lib/Target/LoongArch/LoongArchRegisterInfo.cpp | 115 + llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h | 50 + llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td | 161 + llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp | 54 + llvm/lib/Target/LoongArch/LoongArchSubtarget.h | 89 + .../Target/LoongArch/LoongArchTargetMachine.cpp | 118 + llvm/lib/Target/LoongArch/LoongArchTargetMachine.h | 46 + .../LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp | 68 + .../LoongArch/MCTargetDesc/LoongArchAsmBackend.h | 63 + .../LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp | 40 + .../LoongArch/MCTargetDesc/LoongArchBaseInfo.h | 44 + .../MCTargetDesc/LoongArchELFObjectWriter.cpp | 64 + .../MCTargetDesc/LoongArchInstPrinter.cpp | 63 + .../LoongArch/MCTargetDesc/LoongArchInstPrinter.h | 49 + .../LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp | 34 + .../LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h | 30 + .../MCTargetDesc/LoongArchMCCodeEmitter.cpp | 127 + .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 114 + .../LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h | 54 + .../LoongArch/MCTargetDesc/LoongArchMatInt.cpp | 51 + .../LoongArch/MCTargetDesc/LoongArchMatInt.h | 30 + .../LoongArch/TargetInfo/LoongArchTargetInfo.cpp | 30 + .../LoongArch/TargetInfo/LoongArchTargetInfo.h | 21 + llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp | 1 + .../Target/M68k/Disassembler/M68kDisassembler.cpp | 618 +- llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp | 98 +- llvm/lib/Target/M68k/GISel/M68kCallLowering.h | 12 + .../lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp | 4 +- llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h | 2 +- llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp | 2 +- llvm/lib/Target/M68k/M68kExpandPseudo.cpp | 2 +- llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp | 30 +- llvm/lib/Target/M68k/M68kISelLowering.cpp | 107 +- llvm/lib/Target/M68k/M68kISelLowering.h | 2 + llvm/lib/Target/M68k/M68kInstrArithmetic.td | 717 +- llvm/lib/Target/M68k/M68kInstrBits.td | 75 +- llvm/lib/Target/M68k/M68kInstrControl.td | 166 +- llvm/lib/Target/M68k/M68kInstrData.td | 653 +- llvm/lib/Target/M68k/M68kInstrFormats.td | 136 + llvm/lib/Target/M68k/M68kInstrInfo.cpp | 53 +- llvm/lib/Target/M68k/M68kInstrInfo.td | 106 +- llvm/lib/Target/M68k/M68kInstrShiftRotate.td | 54 +- llvm/lib/Target/M68k/M68kMachineFunction.cpp | 7 + llvm/lib/Target/M68k/M68kMachineFunction.h | 9 +- llvm/lib/Target/M68k/M68kRegisterInfo.cpp | 1 + llvm/lib/Target/M68k/M68kRegisterInfo.h | 8 + llvm/lib/Target/M68k/M68kSubtarget.h | 2 +- .../Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp | 452 +- .../Target/M68k/MCTargetDesc/M68kMCTargetDesc.h | 1 - .../Target/MSP430/AsmParser/MSP430AsmParser.cpp | 1 + .../MSP430/Disassembler/MSP430Disassembler.cpp | 14 +- .../MSP430/MCTargetDesc/MSP430AsmBackend.cpp | 2 +- .../MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp | 2 +- .../MSP430/MCTargetDesc/MSP430ELFStreamer.cpp | 3 +- .../MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp | 3 +- .../MSP430/MCTargetDesc/MSP430MCTargetDesc.h | 1 - llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp | 4 +- llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp | 3 +- llvm/lib/Target/MSP430/MSP430ISelLowering.cpp | 34 +- llvm/lib/Target/MSP430/MSP430InstrInfo.cpp | 3 +- .../Target/MSP430/MSP430MachineFunctionInfo.cpp | 7 + llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h | 5 + llvm/lib/Target/MSP430/MSP430TargetMachine.cpp | 6 +- llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 17 +- .../Target/Mips/Disassembler/MipsDisassembler.cpp | 866 +- .../Target/Mips/MCTargetDesc/MipsABIFlagsSection.h | 2 +- llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp | 2 + .../Target/Mips/MCTargetDesc/MipsAsmBackend.cpp | 13 + .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 2 + .../Target/Mips/MCTargetDesc/MipsELFStreamer.cpp | 4 +- .../lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h | 2 +- .../Target/Mips/MCTargetDesc/MipsInstPrinter.cpp | 162 +- .../lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h | 50 +- .../Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 2 - .../Target/Mips/MCTargetDesc/MipsMCTargetDesc.h | 2 - .../Target/Mips/MCTargetDesc/MipsOptionRecord.cpp | 8 +- .../Mips/MCTargetDesc/MipsTargetStreamer.cpp | 36 +- llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td | 4 + llvm/lib/Target/Mips/MicroMipsInstrFPU.td | 28 +- llvm/lib/Target/Mips/MicroMipsInstrInfo.td | 5 + llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp | 2 +- llvm/lib/Target/Mips/Mips.h | 2 + llvm/lib/Target/Mips/Mips.td | 6 + llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp | 2 +- llvm/lib/Target/Mips/Mips16RegisterInfo.cpp | 2 +- llvm/lib/Target/Mips/Mips32r6InstrInfo.td | 2 + llvm/lib/Target/Mips/MipsAsmPrinter.cpp | 28 +- llvm/lib/Target/Mips/MipsBranchExpansion.cpp | 44 +- llvm/lib/Target/Mips/MipsCallLowering.cpp | 4 +- llvm/lib/Target/Mips/MipsCombine.td | 15 + llvm/lib/Target/Mips/MipsConstantIslandPass.cpp | 6 +- llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp | 2 +- llvm/lib/Target/Mips/MipsExpandPseudo.cpp | 2 +- llvm/lib/Target/Mips/MipsFastISel.cpp | 18 +- llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp | 2 +- llvm/lib/Target/Mips/MipsISelLowering.cpp | 98 +- llvm/lib/Target/Mips/MipsISelLowering.h | 10 +- llvm/lib/Target/Mips/MipsInstrInfo.cpp | 44 +- llvm/lib/Target/Mips/MipsInstrInfo.h | 13 + llvm/lib/Target/Mips/MipsInstrInfo.td | 4 + llvm/lib/Target/Mips/MipsLegalizerInfo.cpp | 4 +- llvm/lib/Target/Mips/MipsMachineFunction.cpp | 9 +- llvm/lib/Target/Mips/MipsMachineFunction.h | 5 + llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp | 3 +- llvm/lib/Target/Mips/MipsOptimizePICCall.cpp | 4 +- llvm/lib/Target/Mips/MipsOs16.cpp | 1 + llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp | 148 + llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp | 4 +- llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp | 8 +- llvm/lib/Target/Mips/MipsRegisterBankInfo.h | 2 +- llvm/lib/Target/Mips/MipsSEFrameLowering.cpp | 2 +- llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 36 +- llvm/lib/Target/Mips/MipsSEISelLowering.cpp | 24 +- llvm/lib/Target/Mips/MipsSERegisterInfo.cpp | 2 +- llvm/lib/Target/Mips/MipsScheduleGeneric.td | 8 +- llvm/lib/Target/Mips/MipsSubtarget.cpp | 15 +- llvm/lib/Target/Mips/MipsSubtarget.h | 11 +- llvm/lib/Target/Mips/MipsTargetMachine.cpp | 15 +- llvm/lib/Target/Mips/MipsTargetMachine.h | 2 +- llvm/lib/Target/Mips/MipsTargetStreamer.h | 2 +- llvm/lib/Target/Mips/MipsTargetTransformInfo.cpp | 17 + llvm/lib/Target/Mips/MipsTargetTransformInfo.h | 40 + .../Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp | 11 + .../NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 133 +- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h | 5 +- llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp | 17 +- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 45 +- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 520 +- llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 21 + llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 53 +- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 352 +- llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp | 3 +- llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp | 119 +- llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h | 7 + llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp | 8 +- llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 4 +- llvm/lib/Target/NVPTX/NVPTXTargetMachine.h | 2 +- llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h | 2 +- llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 65 +- llvm/lib/Target/NVPTX/NVVMReflect.cpp | 12 +- llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 63 +- .../PowerPC/Disassembler/PPCDisassembler.cpp | 98 +- llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp | 1 + .../Target/PowerPC/GISel/PPCRegisterBankInfo.cpp | 3 +- .../lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h | 4 +- .../Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 2 + .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 2 + .../Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp | 6 +- .../Target/PowerPC/MCTargetDesc/PPCFixupKinds.h | 4 + .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 11 +- llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 13 +- .../Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h | 1 - .../PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp | 23 +- .../PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp | 1 + llvm/lib/Target/PowerPC/P10InstrResources.td | 8 +- llvm/lib/Target/PowerPC/P9InstrResources.td | 10 +- llvm/lib/Target/PowerPC/PPC.h | 8 +- llvm/lib/Target/PowerPC/PPC.td | 12 +- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 88 +- llvm/lib/Target/PowerPC/PPCBack2BackFusion.def | 2 + llvm/lib/Target/PowerPC/PPCCTRLoops.cpp | 421 +- llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp | 185 + llvm/lib/Target/PowerPC/PPCCallingConv.td | 22 + llvm/lib/Target/PowerPC/PPCFastISel.cpp | 2 +- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 42 +- .../lib/Target/PowerPC/PPCGenScalarMASSEntries.cpp | 149 + llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 14 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 485 +- llvm/lib/Target/PowerPC/PPCISelLowering.h | 43 +- llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 19 +- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 91 +- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 99 + llvm/lib/Target/PowerPC/PPCInstrInfo.td | 543 +- llvm/lib/Target/PowerPC/PPCInstrMMA.td | 628 + llvm/lib/Target/PowerPC/PPCInstrP10.td | 2315 ++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td | 2889 --- llvm/lib/Target/PowerPC/PPCInstrVSX.td | 76 +- llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp | 1 - llvm/lib/Target/PowerPC/PPCMCInstLower.cpp | 2 +- llvm/lib/Target/PowerPC/PPCMIPeephole.cpp | 3 +- llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp | 7 + llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h | 5 + llvm/lib/Target/PowerPC/PPCMacroFusion.cpp | 5 +- llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp | 37 + llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 171 +- llvm/lib/Target/PowerPC/PPCRegisterInfo.h | 2 + llvm/lib/Target/PowerPC/PPCRegisterInfo.td | 655 +- llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td | 106 + llvm/lib/Target/PowerPC/PPCScheduleP10.td | 2 +- llvm/lib/Target/PowerPC/PPCScheduleP9.td | 3 +- llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 2 + llvm/lib/Target/PowerPC/PPCSubtarget.h | 4 +- llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 34 +- llvm/lib/Target/PowerPC/PPCTargetMachine.h | 2 +- llvm/lib/Target/PowerPC/PPCTargetStreamer.h | 1 + llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 20 +- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h | 7 +- llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp | 2 + llvm/lib/Target/PowerPC/README_P9.txt | 9 +- llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp | 172 +- .../RISCV/Disassembler/RISCVDisassembler.cpp | 118 +- .../Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 10 +- .../Target/RISCV/MCTargetDesc/RISCVAsmBackend.h | 6 +- .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp | 25 +- llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 57 +- .../RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp | 2 +- .../Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp | 101 +- .../Target/RISCV/MCTargetDesc/RISCVELFStreamer.h | 5 + .../Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp | 10 +- .../Target/RISCV/MCTargetDesc/RISCVInstPrinter.h | 4 +- .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 8 +- llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp | 1 + .../RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp | 1 + .../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp | 6 +- .../Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h | 1 - llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp | 179 +- llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h | 10 + .../RISCV/MCTargetDesc/RISCVTargetStreamer.cpp | 16 +- .../RISCV/MCTargetDesc/RISCVTargetStreamer.h | 6 + llvm/lib/Target/RISCV/RISCV.h | 11 +- llvm/lib/Target/RISCV/RISCV.td | 108 +- llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp | 21 +- llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp | 11 +- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 468 +- llvm/lib/Target/RISCV/RISCVFrameLowering.h | 5 +- .../Target/RISCV/RISCVGatherScatterLowering.cpp | 26 +- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 970 +- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 20 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4026 ++-- llvm/lib/Target/RISCV/RISCVISelLowering.h | 115 +- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 1772 +- llvm/lib/Target/RISCV/RISCVInstrFormats.td | 5 +- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 206 +- llvm/lib/Target/RISCV/RISCVInstrInfo.h | 26 +- llvm/lib/Target/RISCV/RISCVInstrInfo.td | 325 +- llvm/lib/Target/RISCV/RISCVInstrInfoA.td | 30 +- llvm/lib/Target/RISCV/RISCVInstrInfoD.td | 239 +- llvm/lib/Target/RISCV/RISCVInstrInfoF.td | 327 +- llvm/lib/Target/RISCV/RISCVInstrInfoM.td | 8 +- llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 57 +- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 987 +- llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td | 575 +- llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td | 1227 +- llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 264 +- llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td | 245 +- llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td | 71 + llvm/lib/Target/RISCV/RISCVMCInstLower.cpp | 13 +- llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp | 37 + llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h | 35 + llvm/lib/Target/RISCV/RISCVMacroFusion.cpp | 67 + llvm/lib/Target/RISCV/RISCVMacroFusion.h | 28 + llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp | 382 + llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp | 280 +- .../Target/RISCV/RISCVRedundantCopyElimination.cpp | 179 + llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp | 7 +- llvm/lib/Target/RISCV/RISCVRegisterBankInfo.h | 2 +- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 15 +- llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 36 + llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp | 275 +- llvm/lib/Target/RISCV/RISCVSchedRocket.td | 5 + llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 5 + llvm/lib/Target/RISCV/RISCVScheduleB.td | 206 + llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 58 +- llvm/lib/Target/RISCV/RISCVSubtarget.h | 100 +- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 84 +- llvm/lib/Target/RISCV/RISCVTargetMachine.h | 10 +- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 231 +- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h | 106 +- .../Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp | 63 + .../Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp | 1072 + llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h | 739 + .../Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp | 556 + .../Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.h | 94 + .../Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.cpp | 34 + .../lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.h | 29 + .../SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp | 132 + .../SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp | 102 + .../Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h | 52 + .../SPIRV/MCTargetDesc/SPIRVObjectTargetWriter.cpp | 25 + .../SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp | 18 + .../SPIRV/MCTargetDesc/SPIRVTargetStreamer.h | 28 + llvm/lib/Target/SPIRV/SPIRV.h | 34 + llvm/lib/Target/SPIRV/SPIRV.td | 43 + llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp | 348 + llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp | 223 + llvm/lib/Target/SPIRV/SPIRVCallLowering.h | 50 + llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 433 + llvm/lib/Target/SPIRV/SPIRVEnums.td | 51 + llvm/lib/Target/SPIRV/SPIRVFrameLowering.h | 39 + llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp | 459 + llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h | 174 + llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp | 45 + llvm/lib/Target/SPIRV/SPIRVISelLowering.h | 47 + llvm/lib/Target/SPIRV/SPIRVInstrFormats.td | 31 + llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp | 195 + llvm/lib/Target/SPIRV/SPIRVInstrInfo.h | 54 + llvm/lib/Target/SPIRV/SPIRVInstrInfo.td | 732 + llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp | 1268 ++ llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp | 301 + llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h | 36 + llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp | 58 + llvm/lib/Target/SPIRV/SPIRVMCInstLower.h | 29 + llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 250 + llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h | 137 + llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp | 440 + llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.cpp | 47 + llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.h | 38 + llvm/lib/Target/SPIRV/SPIRVRegisterBanks.td | 15 + llvm/lib/Target/SPIRV/SPIRVRegisterInfo.cpp | 32 + llvm/lib/Target/SPIRV/SPIRVRegisterInfo.h | 36 + llvm/lib/Target/SPIRV/SPIRVRegisterInfo.td | 39 + llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp | 68 + llvm/lib/Target/SPIRV/SPIRVSubtarget.h | 93 + llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 186 + llvm/lib/Target/SPIRV/SPIRVTargetMachine.h | 47 + llvm/lib/Target/SPIRV/SPIRVTargetObjectFile.h | 45 + llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h | 44 + llvm/lib/Target/SPIRV/SPIRVUtils.cpp | 207 + llvm/lib/Target/SPIRV/SPIRVUtils.h | 83 + .../Target/SPIRV/TargetInfo/SPIRVTargetInfo.cpp | 28 + llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.h | 21 + llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp | 124 +- llvm/lib/Target/Sparc/DelaySlotFiller.cpp | 11 +- .../Sparc/Disassembler/SparcDisassembler.cpp | 185 +- .../Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp | 23 +- .../Sparc/MCTargetDesc/SparcELFObjectWriter.cpp | 7 +- .../Target/Sparc/MCTargetDesc/SparcFixupKinds.h | 12 + .../Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp | 15 +- llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp | 16 + llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h | 7 +- .../Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h | 1 - llvm/lib/Target/Sparc/SparcCallingConv.td | 2 +- llvm/lib/Target/Sparc/SparcFrameLowering.cpp | 31 +- llvm/lib/Target/Sparc/SparcISelLowering.cpp | 172 +- llvm/lib/Target/Sparc/SparcISelLowering.h | 10 +- llvm/lib/Target/Sparc/SparcInstr64Bit.td | 23 +- llvm/lib/Target/Sparc/SparcInstrInfo.td | 123 +- llvm/lib/Target/Sparc/SparcMachineFunctionInfo.cpp | 7 + llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h | 5 + llvm/lib/Target/Sparc/SparcTargetMachine.cpp | 4 +- llvm/lib/Target/Sparc/SparcTargetObjectFile.h | 2 +- .../Target/SystemZ/AsmParser/SystemZAsmParser.cpp | 12 +- .../SystemZ/Disassembler/SystemZDisassembler.cpp | 139 +- .../SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp | 10 +- .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp | 3 +- .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.h | 1 - llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp | 325 +- llvm/lib/Target/SystemZ/SystemZAsmPrinter.h | 21 +- llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp | 2 +- llvm/lib/Target/SystemZ/SystemZElimCompare.cpp | 16 +- llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp | 90 +- llvm/lib/Target/SystemZ/SystemZFrameLowering.h | 3 + llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 9 +- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 443 +- llvm/lib/Target/SystemZ/SystemZISelLowering.h | 42 +- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 49 +- llvm/lib/Target/SystemZ/SystemZInstrInfo.h | 9 +- llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 32 +- llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp | 2 +- .../Target/SystemZ/SystemZMachineFunctionInfo.cpp | 6 + .../Target/SystemZ/SystemZMachineFunctionInfo.h | 5 + llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp | 3 +- llvm/lib/Target/SystemZ/SystemZProcessors.td | 3 +- llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp | 5 +- llvm/lib/Target/SystemZ/SystemZRegisterInfo.h | 6 +- llvm/lib/Target/SystemZ/SystemZSchedule.td | 4 +- llvm/lib/Target/SystemZ/SystemZScheduleZ13.td | 6 +- llvm/lib/Target/SystemZ/SystemZScheduleZ14.td | 6 +- llvm/lib/Target/SystemZ/SystemZScheduleZ15.td | 6 +- llvm/lib/Target/SystemZ/SystemZScheduleZ16.td | 1728 ++ llvm/lib/Target/SystemZ/SystemZScheduleZ196.td | 6 +- llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td | 6 +- .../lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 2 +- llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 2 +- llvm/lib/Target/SystemZ/SystemZShortenInst.cpp | 14 +- llvm/lib/Target/SystemZ/SystemZSubtarget.cpp | 20 +- llvm/lib/Target/SystemZ/SystemZSubtarget.h | 4 +- llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp | 12 +- llvm/lib/Target/SystemZ/SystemZTargetMachine.h | 2 +- llvm/lib/Target/SystemZ/SystemZTargetStreamer.h | 1 + .../Target/SystemZ/SystemZTargetTransformInfo.cpp | 49 +- .../Target/SystemZ/SystemZTargetTransformInfo.h | 8 +- llvm/lib/Target/TargetIntrinsicInfo.cpp | 8 +- llvm/lib/Target/TargetLoweringObjectFile.cpp | 2 - llvm/lib/Target/TargetMachine.cpp | 17 +- llvm/lib/Target/TargetMachineC.cpp | 8 +- llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp | 1 + llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp | 126 +- .../Target/VE/MCTargetDesc/VEELFObjectWriter.cpp | 2 +- llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h | 16 +- .../lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp | 1 - llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp | 1 + llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h | 3 +- llvm/lib/Target/VE/VE.h | 4 +- llvm/lib/Target/VE/VECustomDAG.cpp | 514 +- llvm/lib/Target/VE/VECustomDAG.h | 144 + llvm/lib/Target/VE/VEISelDAGToDAG.cpp | 37 + llvm/lib/Target/VE/VEISelLowering.cpp | 281 +- llvm/lib/Target/VE/VEISelLowering.h | 29 +- llvm/lib/Target/VE/VEInstrInfo.cpp | 7 +- llvm/lib/Target/VE/VEInstrInfo.td | 50 +- llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td | 54 + llvm/lib/Target/VE/VEInstrIntrinsicVL.td | 3 - llvm/lib/Target/VE/VEInstrPatternsVec.td | 43 + llvm/lib/Target/VE/VEMachineFunctionInfo.cpp | 7 + llvm/lib/Target/VE/VEMachineFunctionInfo.h | 5 + llvm/lib/Target/VE/VERegisterInfo.td | 4 +- llvm/lib/Target/VE/VETargetMachine.cpp | 7 +- llvm/lib/Target/VE/VETargetMachine.h | 2 +- llvm/lib/Target/VE/VETargetTransformInfo.h | 66 + llvm/lib/Target/VE/VVPISelLowering.cpp | 443 + llvm/lib/Target/VE/VVPInstrInfo.td | 111 +- llvm/lib/Target/VE/VVPInstrPatternsVec.td | 358 + llvm/lib/Target/VE/VVPNodes.def | 89 +- .../WebAssembly/AsmParser/WebAssemblyAsmParser.cpp | 18 +- .../AsmParser/WebAssemblyAsmTypeCheck.cpp | 95 +- .../AsmParser/WebAssemblyAsmTypeCheck.h | 9 +- .../Disassembler/WebAssemblyDisassembler.cpp | 2 +- .../MCTargetDesc/WebAssemblyMCAsmInfo.cpp | 2 - .../MCTargetDesc/WebAssemblyMCTargetDesc.cpp | 1 - .../MCTargetDesc/WebAssemblyTargetStreamer.cpp | 6 - .../MCTargetDesc/WebAssemblyTargetStreamer.h | 5 - .../WebAssembly/Utils/WebAssemblyTypeUtilities.h | 4 + llvm/lib/Target/WebAssembly/WebAssembly.h | 4 - llvm/lib/Target/WebAssembly/WebAssembly.td | 4 + .../Target/WebAssembly/WebAssemblyAsmPrinter.cpp | 207 +- .../lib/Target/WebAssembly/WebAssemblyAsmPrinter.h | 4 +- .../Target/WebAssembly/WebAssemblyCFGStackify.cpp | 2 +- .../WebAssembly/WebAssemblyExceptionInfo.cpp | 1 + .../WebAssembly/WebAssemblyFixBrTableDefaults.cpp | 2 +- .../WebAssemblyFixIrreducibleControlFlow.cpp | 54 +- .../Target/WebAssembly/WebAssemblyISelLowering.cpp | 46 +- .../Target/WebAssembly/WebAssemblyISelLowering.h | 4 + .../Target/WebAssembly/WebAssemblyInstrAtomics.td | 22 +- .../Target/WebAssembly/WebAssemblyInstrFormats.td | 16 +- .../lib/Target/WebAssembly/WebAssemblyInstrInfo.td | 16 +- .../Target/WebAssembly/WebAssemblyInstrMemory.td | 8 +- llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td | 6 + .../lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 131 +- .../WebAssembly/WebAssemblyLateEHPrepare.cpp | 4 +- .../WebAssemblyLowerEmscriptenEHSjLj.cpp | 74 +- .../WebAssembly/WebAssemblyLowerGlobalDtors.cpp | 210 - .../WebAssembly/WebAssemblyMCLowerPrePass.cpp | 3 + .../WebAssembly/WebAssemblyMachineFunctionInfo.cpp | 14 +- .../WebAssembly/WebAssemblyMachineFunctionInfo.h | 13 +- .../WebAssemblyNullifyDebugValueLists.cpp | 1 + .../WebAssemblyOptimizeLiveIntervals.cpp | 7 +- .../WebAssemblyPrepareForLiveIntervals.cpp | 126 - .../WebAssembly/WebAssemblyReplacePhysRegs.cpp | 3 - .../WebAssembly/WebAssemblySelectionDAGInfo.cpp | 2 +- .../WebAssembly/WebAssemblySelectionDAGInfo.h | 1 + llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h | 1 + .../WebAssembly/WebAssemblyTargetMachine.cpp | 42 +- .../Target/WebAssembly/WebAssemblyTargetMachine.h | 2 +- .../WebAssembly/WebAssemblyTargetTransformInfo.cpp | 4 + .../WebAssembly/WebAssemblyTargetTransformInfo.h | 2 + llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 513 +- llvm/lib/Target/X86/AsmParser/X86Operand.h | 36 +- .../Target/X86/Disassembler/X86Disassembler.cpp | 77 +- llvm/lib/Target/X86/MCA/X86CustomBehaviour.h | 2 +- .../Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp | 8 +- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 105 +- .../X86/MCTargetDesc/X86InstPrinterCommon.cpp | 22 +- .../Target/X86/MCTargetDesc/X86InstPrinterCommon.h | 3 +- .../X86/MCTargetDesc/X86InstrRelaxTables.cpp | 165 + .../Target/X86/MCTargetDesc/X86InstrRelaxTables.h | 54 + .../X86/MCTargetDesc/X86IntelInstPrinter.cpp | 4 +- .../Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 134 +- llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h | 1 + .../Target/X86/MCTargetDesc/X86MCTargetDesc.cpp | 91 + llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h | 26 +- .../Target/X86/MCTargetDesc/X86MnemonicTables.cpp | 16 + .../Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp | 21 +- .../X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp | 1 + llvm/lib/Target/X86/X86.h | 4 + llvm/lib/Target/X86/X86.td | 279 +- llvm/lib/Target/X86/X86AsmPrinter.cpp | 96 +- llvm/lib/Target/X86/X86AsmPrinter.h | 5 +- llvm/lib/Target/X86/X86AvoidTrailingCall.cpp | 7 +- llvm/lib/Target/X86/X86CallingConv.cpp | 2 +- llvm/lib/Target/X86/X86CmovConversion.cpp | 27 +- llvm/lib/Target/X86/X86DiscriminateMemOps.cpp | 3 +- llvm/lib/Target/X86/X86DomainReassignment.cpp | 14 +- llvm/lib/Target/X86/X86ExpandPseudo.cpp | 11 +- llvm/lib/Target/X86/X86FastISel.cpp | 133 +- llvm/lib/Target/X86/X86FastPreTileConfig.cpp | 709 + llvm/lib/Target/X86/X86FastTileConfig.cpp | 293 +- llvm/lib/Target/X86/X86FixupLEAs.cpp | 3 +- llvm/lib/Target/X86/X86FloatingPoint.cpp | 26 +- llvm/lib/Target/X86/X86FrameLowering.cpp | 136 +- llvm/lib/Target/X86/X86FrameLowering.h | 7 +- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 282 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 3225 ++- llvm/lib/Target/X86/X86ISelLowering.h | 58 +- llvm/lib/Target/X86/X86IndirectThunks.cpp | 1 + llvm/lib/Target/X86/X86InsertPrefetch.cpp | 1 + llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp | 49 +- llvm/lib/Target/X86/X86InstrAMX.td | 18 +- llvm/lib/Target/X86/X86InstrAVX512.td | 131 +- llvm/lib/Target/X86/X86InstrArithmetic.td | 8 +- llvm/lib/Target/X86/X86InstrCMovSetCC.td | 8 +- llvm/lib/Target/X86/X86InstrCompiler.td | 85 +- llvm/lib/Target/X86/X86InstrControl.td | 4 +- llvm/lib/Target/X86/X86InstrFPStack.td | 22 +- llvm/lib/Target/X86/X86InstrFoldTables.cpp | 4 +- llvm/lib/Target/X86/X86InstrFormats.td | 6 +- llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 1 - llvm/lib/Target/X86/X86InstrInfo.cpp | 851 +- llvm/lib/Target/X86/X86InstrInfo.h | 18 +- llvm/lib/Target/X86/X86InstrInfo.td | 111 +- llvm/lib/Target/X86/X86InstrMMX.td | 4 +- llvm/lib/Target/X86/X86InstrSSE.td | 68 +- llvm/lib/Target/X86/X86InstrSystem.td | 16 +- llvm/lib/Target/X86/X86InstrTSX.td | 2 + llvm/lib/Target/X86/X86InstrVecCompiler.td | 6 +- llvm/lib/Target/X86/X86InstrXOP.td | 4 +- llvm/lib/Target/X86/X86InstructionSelector.cpp | 16 +- llvm/lib/Target/X86/X86IntrinsicsInfo.h | 12 +- .../X86/X86LoadValueInjectionLoadHardening.cpp | 3 +- llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp | 1 + llvm/lib/Target/X86/X86LowerAMXType.cpp | 181 +- llvm/lib/Target/X86/X86MCInstLower.cpp | 41 +- llvm/lib/Target/X86/X86MachineFunctionInfo.cpp | 7 + llvm/lib/Target/X86/X86MachineFunctionInfo.h | 10 +- llvm/lib/Target/X86/X86MacroFusion.cpp | 1 + llvm/lib/Target/X86/X86PadShortFunction.cpp | 11 +- llvm/lib/Target/X86/X86PartialReduction.cpp | 35 +- llvm/lib/Target/X86/X86PreAMXConfig.cpp | 56 +- llvm/lib/Target/X86/X86PreTileConfig.cpp | 53 +- llvm/lib/Target/X86/X86RegisterBankInfo.cpp | 7 +- llvm/lib/Target/X86/X86RegisterBankInfo.h | 2 +- llvm/lib/Target/X86/X86RegisterInfo.cpp | 62 + llvm/lib/Target/X86/X86RegisterInfo.h | 12 + llvm/lib/Target/X86/X86RegisterInfo.td | 15 +- llvm/lib/Target/X86/X86SchedBroadwell.td | 20 +- llvm/lib/Target/X86/X86SchedHaswell.td | 20 +- llvm/lib/Target/X86/X86SchedIceLake.td | 20 +- llvm/lib/Target/X86/X86SchedSandyBridge.td | 40 +- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 26 +- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 32 +- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 4 +- llvm/lib/Target/X86/X86ScheduleSLM.td | 6 +- llvm/lib/Target/X86/X86ScheduleZnver1.td | 106 +- llvm/lib/Target/X86/X86ScheduleZnver2.td | 86 +- llvm/lib/Target/X86/X86SelectionDAGInfo.cpp | 39 +- llvm/lib/Target/X86/X86SelectionDAGInfo.h | 2 +- .../lib/Target/X86/X86SpeculativeLoadHardening.cpp | 31 +- llvm/lib/Target/X86/X86Subtarget.cpp | 12 +- llvm/lib/Target/X86/X86Subtarget.h | 629 +- llvm/lib/Target/X86/X86TargetMachine.cpp | 51 +- llvm/lib/Target/X86/X86TargetMachine.h | 2 +- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 290 +- llvm/lib/Target/X86/X86TargetTransformInfo.h | 21 +- llvm/lib/Target/X86/X86TileConfig.cpp | 15 +- .../XCore/Disassembler/XCoreDisassembler.cpp | 286 +- .../Target/XCore/MCTargetDesc/XCoreInstPrinter.h | 3 +- llvm/lib/Target/XCore/XCore.h | 1 + llvm/lib/Target/XCore/XCoreAsmPrinter.cpp | 2 +- llvm/lib/Target/XCore/XCoreISelLowering.cpp | 36 +- llvm/lib/Target/XCore/XCoreInstrInfo.td | 2 +- llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp | 7 + llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h | 5 + llvm/lib/Target/XCore/XCoreTargetMachine.cpp | 4 +- llvm/lib/Target/XCore/XCoreTargetMachine.h | 4 +- llvm/lib/Testing/Support/Annotations.cpp | 4 +- llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp | 28 +- llvm/lib/ToolDrivers/llvm-lib/Options.td | 16 +- .../AggressiveInstCombine.cpp | 92 +- .../AggressiveInstCombineInternal.h | 46 +- .../AggressiveInstCombine/TruncInstCombine.cpp | 86 +- llvm/lib/Transforms/Coroutines/CoroCleanup.cpp | 81 +- .../Coroutines/CoroConditionalWrapper.cpp | 24 + llvm/lib/Transforms/Coroutines/CoroEarly.cpp | 79 +- llvm/lib/Transforms/Coroutines/CoroElide.cpp | 125 +- llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 177 +- llvm/lib/Transforms/Coroutines/CoroInternal.h | 47 +- llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 377 +- llvm/lib/Transforms/Coroutines/Coroutines.cpp | 193 +- llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 47 +- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 1139 +- llvm/lib/Transforms/IPO/Attributor.cpp | 462 +- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 2060 +- llvm/lib/Transforms/IPO/BlockExtractor.cpp | 11 +- llvm/lib/Transforms/IPO/CalledValuePropagation.cpp | 6 +- llvm/lib/Transforms/IPO/ConstantMerge.cpp | 2 +- llvm/lib/Transforms/IPO/CrossDSOCFI.cpp | 5 - .../lib/Transforms/IPO/DeadArgumentElimination.cpp | 578 +- llvm/lib/Transforms/IPO/ExtractGV.cpp | 1 - llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp | 2 +- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 175 +- llvm/lib/Transforms/IPO/FunctionImport.cpp | 19 +- llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 501 +- llvm/lib/Transforms/IPO/GlobalDCE.cpp | 34 +- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 177 +- llvm/lib/Transforms/IPO/GlobalSplit.cpp | 4 +- llvm/lib/Transforms/IPO/HotColdSplitting.cpp | 26 +- llvm/lib/Transforms/IPO/IPO.cpp | 5 - llvm/lib/Transforms/IPO/IROutliner.cpp | 326 +- llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp | 5 +- llvm/lib/Transforms/IPO/InlineSimple.cpp | 8 +- llvm/lib/Transforms/IPO/Inliner.cpp | 111 +- llvm/lib/Transforms/IPO/Internalize.cpp | 3 - llvm/lib/Transforms/IPO/LoopExtractor.cpp | 5 - llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 21 +- llvm/lib/Transforms/IPO/MergeFunctions.cpp | 48 +- llvm/lib/Transforms/IPO/ModuleInliner.cpp | 25 +- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 255 +- llvm/lib/Transforms/IPO/PartialInlining.cpp | 16 +- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 295 +- llvm/lib/Transforms/IPO/PruneEH.cpp | 5 +- llvm/lib/Transforms/IPO/SCCP.cpp | 1 + llvm/lib/Transforms/IPO/SampleContextTracker.cpp | 123 +- llvm/lib/Transforms/IPO/SampleProfile.cpp | 293 +- llvm/lib/Transforms/IPO/SampleProfileProbe.cpp | 10 +- .../Transforms/IPO/SyntheticCountsPropagation.cpp | 10 +- llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp | 8 +- llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp | 134 +- .../Transforms/InstCombine/InstCombineAddSub.cpp | 115 +- .../Transforms/InstCombine/InstCombineAndOrXor.cpp | 1037 +- .../InstCombine/InstCombineAtomicRMW.cpp | 1 - .../Transforms/InstCombine/InstCombineCalls.cpp | 383 +- .../Transforms/InstCombine/InstCombineCasts.cpp | 185 +- .../Transforms/InstCombine/InstCombineCompares.cpp | 874 +- .../Transforms/InstCombine/InstCombineInternal.h | 41 +- .../InstCombine/InstCombineLoadStoreAlloca.cpp | 13 +- .../InstCombine/InstCombineMulDivRem.cpp | 344 +- .../Transforms/InstCombine/InstCombineNegator.cpp | 14 + llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp | 127 +- .../Transforms/InstCombine/InstCombineSelect.cpp | 913 +- .../Transforms/InstCombine/InstCombineShifts.cpp | 164 +- .../InstCombine/InstCombineSimplifyDemanded.cpp | 202 +- .../InstCombine/InstCombineVectorOps.cpp | 157 +- .../InstCombine/InstructionCombining.cpp | 533 +- .../Instrumentation/AddressSanitizer.cpp | 400 +- .../Transforms/Instrumentation/BoundsChecking.cpp | 5 +- llvm/lib/Transforms/Instrumentation/CGProfile.cpp | 3 - .../Instrumentation/ControlHeightReduction.cpp | 78 +- .../Instrumentation/DataFlowSanitizer.cpp | 237 +- .../Transforms/Instrumentation/GCOVProfiling.cpp | 59 +- .../Instrumentation/HWAddressSanitizer.cpp | 521 +- .../Instrumentation/IndirectCallPromotion.cpp | 73 +- .../Transforms/Instrumentation/InstrOrderFile.cpp | 9 +- .../Transforms/Instrumentation/InstrProfiling.cpp | 59 +- .../Transforms/Instrumentation/Instrumentation.cpp | 10 - .../Instrumentation/MaximumSpanningTree.h | 109 - .../lib/Transforms/Instrumentation/MemProfiler.cpp | 54 +- .../Transforms/Instrumentation/MemorySanitizer.cpp | 155 +- .../Instrumentation/PGOInstrumentation.cpp | 176 +- .../Transforms/Instrumentation/PGOMemOPSizeOpt.cpp | 65 +- .../Transforms/Instrumentation/PoisonChecking.cpp | 6 - .../Instrumentation/SanitizerCoverage.cpp | 23 +- .../Transforms/Instrumentation/ThreadSanitizer.cpp | 84 +- .../Instrumentation/ValueProfileCollector.cpp | 7 +- .../Instrumentation/ValueProfileCollector.h | 2 +- .../Instrumentation/ValueProfilePlugins.inc | 1 + llvm/lib/Transforms/ObjCARC/ObjCARC.cpp | 1 - llvm/lib/Transforms/ObjCARC/ObjCARC.h | 1 - llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp | 5 +- llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp | 9 +- llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp | 2 +- llvm/lib/Transforms/Scalar/ADCE.cpp | 1 - .../Transforms/Scalar/AlignmentFromAssumptions.cpp | 7 +- llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp | 3 - llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp | 13 +- llvm/lib/Transforms/Scalar/ConstantHoisting.cpp | 1 + .../Transforms/Scalar/ConstraintElimination.cpp | 754 +- .../Scalar/CorrelatedValuePropagation.cpp | 110 +- llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp | 157 +- .../lib/Transforms/Scalar/DeadStoreElimination.cpp | 138 +- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 25 +- llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp | 2 - llvm/lib/Transforms/Scalar/Float2Int.cpp | 207 +- llvm/lib/Transforms/Scalar/GVN.cpp | 231 +- llvm/lib/Transforms/Scalar/GVNHoist.cpp | 16 +- llvm/lib/Transforms/Scalar/GVNSink.cpp | 30 +- llvm/lib/Transforms/Scalar/GuardWidening.cpp | 3 +- llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp | 1 - llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 15 +- .../Scalar/InductiveRangeCheckElimination.cpp | 34 +- llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp | 72 +- llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp | 6 +- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 142 +- llvm/lib/Transforms/Scalar/LICM.cpp | 482 +- .../Scalar/LoopAccessAnalysisPrinter.cpp | 1 + llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp | 26 +- llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp | 18 +- llvm/lib/Transforms/Scalar/LoopDeletion.cpp | 12 +- llvm/lib/Transforms/Scalar/LoopDistribute.cpp | 15 +- llvm/lib/Transforms/Scalar/LoopFlatten.cpp | 14 +- llvm/lib/Transforms/Scalar/LoopFuse.cpp | 15 +- llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 64 +- llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp | 13 +- llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 200 +- llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp | 14 +- llvm/lib/Transforms/Scalar/LoopPassManager.cpp | 10 +- llvm/lib/Transforms/Scalar/LoopPredication.cpp | 5 +- llvm/lib/Transforms/Scalar/LoopRerollPass.cpp | 11 +- llvm/lib/Transforms/Scalar/LoopRotation.cpp | 13 +- llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp | 26 +- llvm/lib/Transforms/Scalar/LoopSink.cpp | 91 +- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 664 +- .../lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp | 30 +- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 57 +- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp | 1774 -- llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp | 2 - llvm/lib/Transforms/Scalar/LowerAtomic.cpp | 177 - llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp | 99 + .../Transforms/Scalar/LowerConstantIntrinsics.cpp | 18 +- .../lib/Transforms/Scalar/LowerExpectIntrinsic.cpp | 12 +- llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp | 11 +- .../Transforms/Scalar/LowerMatrixIntrinsics.cpp | 57 +- .../Transforms/Scalar/LowerWidenableCondition.cpp | 13 +- llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp | 4 +- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 187 +- llvm/lib/Transforms/Scalar/MergeICmps.cpp | 59 +- .../Transforms/Scalar/MergedLoadStoreMotion.cpp | 6 +- llvm/lib/Transforms/Scalar/NewGVN.cpp | 46 +- .../Transforms/Scalar/PartiallyInlineLibCalls.cpp | 5 +- llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp | 3 +- llvm/lib/Transforms/Scalar/Reassociate.cpp | 7 +- llvm/lib/Transforms/Scalar/Reg2Mem.cpp | 2 - .../Transforms/Scalar/RewriteStatepointsForGC.cpp | 489 +- llvm/lib/Transforms/Scalar/SCCP.cpp | 105 +- llvm/lib/Transforms/Scalar/SROA.cpp | 75 +- llvm/lib/Transforms/Scalar/Scalar.cpp | 9 +- .../Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp | 7 +- llvm/lib/Transforms/Scalar/Scalarizer.cpp | 103 +- .../Scalar/SeparateConstOffsetFromGEP.cpp | 1 - llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 121 +- llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp | 14 +- llvm/lib/Transforms/Scalar/Sink.cpp | 7 +- .../lib/Transforms/Scalar/SpeculativeExecution.cpp | 6 +- .../Scalar/StraightLineStrengthReduce.cpp | 19 +- llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 67 +- llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp | 306 + .../Transforms/Scalar/TailRecursionElimination.cpp | 15 +- .../lib/Transforms/Scalar/WarnMissedTransforms.cpp | 2 +- llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp | 3 - llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp | 1 - llvm/lib/Transforms/Utils/AddDiscriminators.cpp | 4 +- llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp | 1 + llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 8 +- llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp | 15 +- llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 406 +- llvm/lib/Transforms/Utils/CallGraphUpdater.cpp | 3 + llvm/lib/Transforms/Utils/CallPromotionUtils.cpp | 4 +- llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp | 3 +- .../Transforms/Utils/CanonicalizeFreezeInLoops.cpp | 1 - llvm/lib/Transforms/Utils/CloneFunction.cpp | 106 +- llvm/lib/Transforms/Utils/CloneModule.cpp | 5 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 24 +- llvm/lib/Transforms/Utils/CodeLayout.cpp | 28 +- llvm/lib/Transforms/Utils/CtorUtils.cpp | 65 +- llvm/lib/Transforms/Utils/Debugify.cpp | 184 +- llvm/lib/Transforms/Utils/DemoteRegToStack.cpp | 3 +- llvm/lib/Transforms/Utils/Evaluator.cpp | 109 +- llvm/lib/Transforms/Utils/FixIrreducible.cpp | 9 + llvm/lib/Transforms/Utils/FunctionImportUtils.cpp | 2 - llvm/lib/Transforms/Utils/GlobalStatus.cpp | 32 +- llvm/lib/Transforms/Utils/InjectTLIMappings.cpp | 1 - llvm/lib/Transforms/Utils/InlineFunction.cpp | 77 +- llvm/lib/Transforms/Utils/IntegerDivision.cpp | 1 - llvm/lib/Transforms/Utils/LCSSA.cpp | 3 +- llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp | 2 - llvm/lib/Transforms/Utils/Local.cpp | 77 +- llvm/lib/Transforms/Utils/LoopPeel.cpp | 122 +- llvm/lib/Transforms/Utils/LoopRotationUtils.cpp | 19 +- llvm/lib/Transforms/Utils/LoopSimplify.cpp | 9 +- llvm/lib/Transforms/Utils/LoopUnroll.cpp | 4 +- llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp | 4 +- llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp | 47 +- llvm/lib/Transforms/Utils/LoopUtils.cpp | 85 +- llvm/lib/Transforms/Utils/LoopVersioning.cpp | 7 +- llvm/lib/Transforms/Utils/LowerAtomic.cpp | 93 + llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp | 221 + llvm/lib/Transforms/Utils/LowerInvoke.cpp | 2 - llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp | 217 +- llvm/lib/Transforms/Utils/LowerSwitch.cpp | 43 +- llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp | 195 + llvm/lib/Transforms/Utils/MisExpect.cpp | 249 + llvm/lib/Transforms/Utils/ModuleUtils.cpp | 12 +- llvm/lib/Transforms/Utils/PredicateInfo.cpp | 8 - .../Transforms/Utils/PromoteMemoryToRegister.cpp | 8 +- .../Transforms/Utils/RelLookupTableConverter.cpp | 27 +- llvm/lib/Transforms/Utils/SCCPSolver.cpp | 204 +- llvm/lib/Transforms/Utils/SSAUpdater.cpp | 3 +- .../Transforms/Utils/SampleProfileInference.cpp | 394 +- .../Utils/SampleProfileLoaderBaseUtil.cpp | 10 +- llvm/lib/Transforms/Utils/SanitizerStats.cpp | 1 - .../Transforms/Utils/ScalarEvolutionExpander.cpp | 258 +- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 818 +- llvm/lib/Transforms/Utils/SimplifyIndVar.cpp | 18 +- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 665 +- llvm/lib/Transforms/Utils/SizeOpts.cpp | 4 +- llvm/lib/Transforms/Utils/StripGCRelocates.cpp | 4 +- llvm/lib/Transforms/Utils/SymbolRewriter.cpp | 1 - llvm/lib/Transforms/Utils/UnifyLoopExits.cpp | 48 +- llvm/lib/Transforms/Utils/Utils.cpp | 1 + llvm/lib/Transforms/Utils/VNCoercion.cpp | 124 +- .../Transforms/Vectorize/LoadStoreVectorizer.cpp | 19 +- .../Vectorize/LoopVectorizationLegality.cpp | 133 +- .../Vectorize/LoopVectorizationPlanner.h | 22 +- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2149 +- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 4378 ++-- llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h | 12 +- llvm/lib/Transforms/Vectorize/VPlan.cpp | 1161 +- llvm/lib/Transforms/Vectorize/VPlan.h | 592 +- llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp | 135 +- llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h | 10 +- llvm/lib/Transforms/Vectorize/VPlanLoopInfo.h | 44 - llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp | 248 - llvm/lib/Transforms/Vectorize/VPlanPredicator.h | 74 - llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 840 + llvm/lib/Transforms/Vectorize/VPlanSLP.cpp | 15 +- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 114 +- llvm/lib/Transforms/Vectorize/VPlanTransforms.h | 16 +- llvm/lib/Transforms/Vectorize/VPlanValue.h | 24 +- llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp | 55 +- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 379 +- llvm/lib/Transforms/Vectorize/Vectorize.cpp | 1 - llvm/lib/WindowsDriver/MSVCPaths.cpp | 719 + llvm/lib/WindowsManifest/WindowsManifestMerger.cpp | 4 +- llvm/lib/XRay/FDRTraceWriter.cpp | 2 +- llvm/tools/bugpoint/CrashDebugger.cpp | 4 +- llvm/tools/bugpoint/ExecutionDriver.cpp | 8 +- llvm/tools/bugpoint/OptimizerDriver.cpp | 2 +- llvm/tools/bugpoint/bugpoint.cpp | 12 +- llvm/tools/llc/llc.cpp | 42 +- llvm/tools/lli/lli.cpp | 158 +- llvm/tools/llvm-ar/llvm-ar.cpp | 169 +- llvm/tools/llvm-cov/CodeCoverage.cpp | 68 +- llvm/tools/llvm-cov/CoverageViewOptions.h | 2 + llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp | 42 +- llvm/tools/llvm-cov/TestingSupport.cpp | 1 + llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp | 2 +- llvm/tools/llvm-cxxfilt/Opts.td | 2 +- llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp | 2 +- llvm/tools/llvm-dis/llvm-dis.cpp | 12 +- llvm/tools/llvm-dwarfdump/Statistics.cpp | 18 +- llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp | 93 +- llvm/tools/llvm-dwp/llvm-dwp.cpp | 17 +- llvm/tools/llvm-extract/llvm-extract.cpp | 21 +- llvm/tools/llvm-link/llvm-link.cpp | 24 +- llvm/tools/llvm-lto/llvm-lto.cpp | 16 +- llvm/tools/llvm-lto2/llvm-lto2.cpp | 57 +- llvm/tools/llvm-mc/llvm-mc.cpp | 4 +- llvm/tools/llvm-mca/CodeRegionGenerator.cpp | 9 +- llvm/tools/llvm-mca/Views/InstructionInfoView.cpp | 4 +- llvm/tools/llvm-mca/Views/InstructionView.h | 3 +- llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp | 8 +- llvm/tools/llvm-mca/llvm-mca.cpp | 31 +- llvm/tools/llvm-modextract/llvm-modextract.cpp | 1 + llvm/tools/llvm-nm/Opts.td | 11 +- llvm/tools/llvm-nm/llvm-nm.cpp | 956 +- llvm/tools/llvm-objcopy/BitcodeStripOpts.td | 8 +- llvm/tools/llvm-objcopy/COFF/COFFConfig.h | 27 - llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp | 297 - llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h | 33 - llvm/tools/llvm-objcopy/COFF/Object.cpp | 132 - llvm/tools/llvm-objcopy/COFF/Object.h | 211 - llvm/tools/llvm-objcopy/COFF/Reader.cpp | 226 - llvm/tools/llvm-objcopy/COFF/Reader.h | 41 - llvm/tools/llvm-objcopy/COFF/Writer.cpp | 457 - llvm/tools/llvm-objcopy/COFF/Writer.h | 63 - llvm/tools/llvm-objcopy/CommonConfig.h | 260 - llvm/tools/llvm-objcopy/ConfigManager.cpp | 1432 -- llvm/tools/llvm-objcopy/ConfigManager.h | 80 - llvm/tools/llvm-objcopy/ELF/ELFConfig.h | 38 - llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp | 833 - llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h | 40 - llvm/tools/llvm-objcopy/ELF/Object.cpp | 2826 --- llvm/tools/llvm-objcopy/ELF/Object.h | 1113 - llvm/tools/llvm-objcopy/MachO/MachOConfig.h | 43 - .../llvm-objcopy/MachO/MachOLayoutBuilder.cpp | 441 - llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h | 97 - llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp | 549 - llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h | 39 - llvm/tools/llvm-objcopy/MachO/MachOReader.cpp | 374 - llvm/tools/llvm-objcopy/MachO/MachOReader.h | 57 - llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp | 748 - llvm/tools/llvm-objcopy/MachO/MachOWriter.h | 71 - llvm/tools/llvm-objcopy/MachO/Object.cpp | 214 - llvm/tools/llvm-objcopy/MachO/Object.h | 374 - llvm/tools/llvm-objcopy/MultiFormatConfig.h | 37 - llvm/tools/llvm-objcopy/ObjcopyOptions.cpp | 1364 ++ llvm/tools/llvm-objcopy/ObjcopyOptions.h | 58 + llvm/tools/llvm-objcopy/ObjcopyOpts.td | 6 +- llvm/tools/llvm-objcopy/llvm-objcopy.cpp | 227 +- llvm/tools/llvm-objcopy/llvm-objcopy.h | 34 - llvm/tools/llvm-objcopy/wasm/Object.cpp | 34 - llvm/tools/llvm-objcopy/wasm/Object.h | 47 - llvm/tools/llvm-objcopy/wasm/Reader.cpp | 33 - llvm/tools/llvm-objcopy/wasm/Reader.h | 31 - llvm/tools/llvm-objcopy/wasm/WasmConfig.h | 21 - llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp | 162 - llvm/tools/llvm-objcopy/wasm/WasmObjcopy.h | 32 - llvm/tools/llvm-objcopy/wasm/Writer.cpp | 79 - llvm/tools/llvm-objcopy/wasm/Writer.h | 49 - llvm/tools/llvm-objdump/COFFDump.cpp | 32 +- llvm/tools/llvm-objdump/ELFDump.cpp | 8 +- llvm/tools/llvm-objdump/MachODump.cpp | 69 +- llvm/tools/llvm-objdump/MachODump.h | 1 + llvm/tools/llvm-objdump/ObjdumpOpts.td | 9 + llvm/tools/llvm-objdump/OffloadDump.cpp | 102 + llvm/tools/llvm-objdump/OffloadDump.h | 22 + llvm/tools/llvm-objdump/OtoolOpts.td | 1 - llvm/tools/llvm-objdump/SourcePrinter.cpp | 2 + llvm/tools/llvm-objdump/SourcePrinter.h | 1 + llvm/tools/llvm-objdump/XCOFFDump.cpp | 2 +- llvm/tools/llvm-objdump/llvm-objdump.cpp | 125 +- llvm/tools/llvm-pdbutil/BytesOutputStyle.cpp | 12 +- llvm/tools/llvm-pdbutil/BytesOutputStyle.h | 2 +- llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp | 398 +- llvm/tools/llvm-pdbutil/DumpOutputStyle.h | 2 +- llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp | 9 +- llvm/tools/llvm-pdbutil/ExplainOutputStyle.h | 3 +- llvm/tools/llvm-pdbutil/FormatUtil.cpp | 258 - llvm/tools/llvm-pdbutil/FormatUtil.h | 141 - llvm/tools/llvm-pdbutil/InputFile.cpp | 510 - llvm/tools/llvm-pdbutil/InputFile.h | 154 - llvm/tools/llvm-pdbutil/LinePrinter.cpp | 335 - llvm/tools/llvm-pdbutil/LinePrinter.h | 167 - llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp | 10 +- llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp | 7 +- llvm/tools/llvm-pdbutil/OutputStyle.h | 5 +- llvm/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp | 4 +- .../llvm-pdbutil/PrettyClassDefinitionDumper.cpp | 3 +- .../PrettyClassLayoutGraphicalDumper.cpp | 3 +- llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp | 1 - llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp | 3 +- .../llvm-pdbutil/PrettyExternalSymbolDumper.cpp | 3 +- llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp | 5 +- llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp | 4 +- llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp | 4 +- llvm/tools/llvm-pdbutil/PrettyVariableDumper.cpp | 6 +- llvm/tools/llvm-pdbutil/StreamUtil.cpp | 4 +- llvm/tools/llvm-pdbutil/TypeReferenceTracker.cpp | 6 +- llvm/tools/llvm-pdbutil/TypeReferenceTracker.h | 3 +- llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp | 145 +- llvm/tools/llvm-pdbutil/llvm-pdbutil.h | 3 + llvm/tools/llvm-profdata/llvm-profdata.cpp | 107 +- llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp | 176 +- llvm/tools/llvm-readobj/ARMWinEHPrinter.h | 3 +- llvm/tools/llvm-readobj/ELFDumper.cpp | 196 +- llvm/tools/llvm-readobj/MachODumper.cpp | 53 +- llvm/tools/llvm-readobj/ObjDumper.h | 58 +- llvm/tools/llvm-readobj/Opts.td | 3 +- llvm/tools/llvm-readobj/WasmDumper.cpp | 14 +- llvm/tools/llvm-readobj/XCOFFDumper.cpp | 253 +- llvm/tools/llvm-readobj/llvm-readobj.cpp | 53 +- llvm/tools/llvm-readobj/llvm-readobj.h | 5 +- llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp | 19 +- llvm/tools/llvm-sim/llvm-sim.cpp | 5 +- llvm/tools/llvm-stress/llvm-stress.cpp | 101 +- llvm/tools/llvm-strings/llvm-strings.cpp | 3 +- llvm/tools/llvm-symbolizer/Opts.td | 6 + llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp | 210 +- llvm/tools/llvm-tapi-diff/llvm-tapi-diff.cpp | 1 + llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp | 1 + llvm/tools/llvm-xray/func-id-helper.cpp | 1 + llvm/tools/llvm-xray/func-id-helper.h | 1 + llvm/tools/llvm-xray/xray-graph-diff.cpp | 1 + llvm/tools/opt/NewPMDriver.cpp | 70 +- llvm/tools/opt/NewPMDriver.h | 5 +- llvm/tools/opt/PassPrinters.cpp | 212 - llvm/tools/opt/PassPrinters.h | 40 - llvm/tools/opt/opt.cpp | 122 +- llvm/utils/TableGen/AsmMatcherEmitter.cpp | 3 +- llvm/utils/TableGen/AsmWriterEmitter.cpp | 23 +- llvm/utils/TableGen/AsmWriterInst.cpp | 1 + llvm/utils/TableGen/Attributes.cpp | 3 - llvm/utils/TableGen/CallingConvEmitter.cpp | 139 +- llvm/utils/TableGen/CodeBeadsGen.cpp | 137 - llvm/utils/TableGen/CodeEmitterGen.cpp | 250 +- llvm/utils/TableGen/CodeGenDAGPatterns.cpp | 14 +- llvm/utils/TableGen/CodeGenDAGPatterns.h | 1 - llvm/utils/TableGen/CodeGenInstruction.cpp | 6 +- llvm/utils/TableGen/CodeGenInstruction.h | 3 +- llvm/utils/TableGen/CodeGenIntrinsics.h | 5 +- llvm/utils/TableGen/CodeGenMapTable.cpp | 2 +- llvm/utils/TableGen/CodeGenRegisters.cpp | 38 +- llvm/utils/TableGen/CodeGenRegisters.h | 36 +- llvm/utils/TableGen/CodeGenSchedule.cpp | 1 - llvm/utils/TableGen/CodeGenSchedule.h | 3 - llvm/utils/TableGen/CodeGenTarget.cpp | 24 +- llvm/utils/TableGen/CodeGenTarget.h | 7 +- llvm/utils/TableGen/DAGISelEmitter.cpp | 1 + llvm/utils/TableGen/DAGISelMatcherEmitter.cpp | 2 - llvm/utils/TableGen/DAGISelMatcherGen.cpp | 3 +- llvm/utils/TableGen/DFAEmitter.cpp | 4 +- llvm/utils/TableGen/DFAPacketizerEmitter.cpp | 2 - llvm/utils/TableGen/DXILEmitter.cpp | 374 + llvm/utils/TableGen/DecoderEmitter.cpp | 2705 +++ llvm/utils/TableGen/DirectiveEmitter.cpp | 4 +- llvm/utils/TableGen/DisassemblerEmitter.cpp | 26 +- llvm/utils/TableGen/ExegesisEmitter.cpp | 4 - llvm/utils/TableGen/FastISelEmitter.cpp | 2 +- llvm/utils/TableGen/FixedLenDecoderEmitter.cpp | 2560 --- llvm/utils/TableGen/GICombinerEmitter.cpp | 17 +- llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp | 4 +- llvm/utils/TableGen/GlobalISel/GIMatchTree.h | 4 +- llvm/utils/TableGen/GlobalISelEmitter.cpp | 23 +- llvm/utils/TableGen/InstrInfoEmitter.cpp | 51 +- llvm/utils/TableGen/IntrinsicEmitter.cpp | 81 +- llvm/utils/TableGen/OptParserEmitter.cpp | 2 +- llvm/utils/TableGen/OptRSTEmitter.cpp | 29 +- llvm/utils/TableGen/PseudoLoweringEmitter.cpp | 3 +- llvm/utils/TableGen/RegisterBankEmitter.cpp | 8 +- llvm/utils/TableGen/RegisterInfoEmitter.cpp | 120 +- llvm/utils/TableGen/SearchableTableEmitter.cpp | 4 +- llvm/utils/TableGen/SequenceToOffsetTable.h | 16 +- llvm/utils/TableGen/SubtargetEmitter.cpp | 53 +- llvm/utils/TableGen/SubtargetFeatureInfo.cpp | 66 +- llvm/utils/TableGen/TableGen.cpp | 20 +- llvm/utils/TableGen/TableGenBackends.h | 3 +- llvm/utils/TableGen/VarLenCodeEmitterGen.cpp | 487 + llvm/utils/TableGen/VarLenCodeEmitterGen.h | 66 + .../TableGen/WebAssemblyDisassemblerEmitter.cpp | 18 +- llvm/utils/TableGen/X86DisassemblerTables.cpp | 34 +- llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp | 75 +- llvm/utils/TableGen/X86FoldTablesEmitter.cpp | 266 +- llvm/utils/TableGen/X86MnemonicTables.cpp | 94 + llvm/utils/TableGen/X86RecognizableInstr.cpp | 192 +- llvm/utils/TableGen/X86RecognizableInstr.h | 77 +- 3394 files changed, 262975 insertions(+), 110125 deletions(-) delete mode 100644 llvm/include/llvm-c/Transforms/Coroutines.h create mode 100644 llvm/include/llvm-c/blake3.h create mode 100644 llvm/include/llvm/ADT/AddressRanges.h create mode 100644 llvm/include/llvm/Analysis/ScalarFuncs.def create mode 100644 llvm/include/llvm/Analysis/TensorSpec.h create mode 100644 llvm/include/llvm/BinaryFormat/DXContainer.h create mode 100644 llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def create mode 100644 llvm/include/llvm/BinaryFormat/GOFF.h create mode 100644 llvm/include/llvm/Bitstream/BitCodeEnums.h create mode 100644 llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h create mode 100644 llvm/include/llvm/CodeGen/CFIFixup.h delete mode 100644 llvm/include/llvm/CodeGen/GlobalISel/RegisterBank.h delete mode 100644 llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h create mode 100644 llvm/include/llvm/CodeGen/RegisterBank.h create mode 100644 llvm/include/llvm/CodeGen/RegisterBankInfo.h create mode 100644 llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h create mode 100644 llvm/include/llvm/DebugInfo/GSYM/ExtractRanges.h delete mode 100644 llvm/include/llvm/DebugInfo/GSYM/Range.h create mode 100644 llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h create mode 100644 llvm/include/llvm/DebugInfo/PDB/Native/InputFile.h create mode 100644 llvm/include/llvm/DebugInfo/PDB/Native/LinePrinter.h create mode 100644 llvm/include/llvm/DebugInfo/Symbolize/DIFetcher.h create mode 100644 llvm/include/llvm/DebugInfo/Symbolize/Markup.h create mode 100644 llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h create mode 100644 llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h create mode 100644 llvm/include/llvm/Debuginfod/DIFetcher.h create mode 100644 llvm/include/llvm/Demangle/ItaniumNodes.def create mode 100644 llvm/include/llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h delete mode 100644 llvm/include/llvm/IR/AttributesAMDGPU.td create mode 100644 llvm/include/llvm/IR/ConstantFold.h create mode 100644 llvm/include/llvm/IR/FMF.h create mode 100644 llvm/include/llvm/IR/IntrinsicsDirectX.td create mode 100644 llvm/include/llvm/IR/IntrinsicsSPIRV.td create mode 100644 llvm/include/llvm/IR/VectorBuilder.h create mode 100644 llvm/include/llvm/MC/MCDXContainerStreamer.h create mode 100644 llvm/include/llvm/MC/MCDXContainerWriter.h create mode 100644 llvm/include/llvm/MC/MCDecoderOps.h delete mode 100644 llvm/include/llvm/MC/MCFixedLenDisassembler.h create mode 100644 llvm/include/llvm/MC/MCSPIRVObjectWriter.h create mode 100644 llvm/include/llvm/MC/MCSPIRVStreamer.h create mode 100644 llvm/include/llvm/MC/MCSectionDXContainer.h create mode 100644 llvm/include/llvm/MC/MCSectionSPIRV.h create mode 100644 llvm/include/llvm/MCA/IncrementalSourceMgr.h create mode 100644 llvm/include/llvm/ObjCopy/COFF/COFFConfig.h create mode 100644 llvm/include/llvm/ObjCopy/COFF/COFFObjcopy.h create mode 100644 llvm/include/llvm/ObjCopy/CommonConfig.h create mode 100644 llvm/include/llvm/ObjCopy/ConfigManager.h create mode 100644 llvm/include/llvm/ObjCopy/ELF/ELFConfig.h create mode 100644 llvm/include/llvm/ObjCopy/ELF/ELFObjcopy.h create mode 100644 llvm/include/llvm/ObjCopy/MachO/MachOConfig.h create mode 100644 llvm/include/llvm/ObjCopy/MachO/MachOObjcopy.h create mode 100644 llvm/include/llvm/ObjCopy/MultiFormatConfig.h create mode 100644 llvm/include/llvm/ObjCopy/ObjCopy.h create mode 100644 llvm/include/llvm/ObjCopy/XCOFF/XCOFFConfig.h create mode 100644 llvm/include/llvm/ObjCopy/XCOFF/XCOFFObjcopy.h create mode 100644 llvm/include/llvm/ObjCopy/wasm/WasmConfig.h create mode 100644 llvm/include/llvm/ObjCopy/wasm/WasmObjcopy.h create mode 100644 llvm/include/llvm/Object/DXContainer.h create mode 100644 llvm/include/llvm/Object/OffloadBinary.h create mode 100644 llvm/include/llvm/ObjectYAML/DXContainerYAML.h create mode 100644 llvm/include/llvm/ObjectYAML/OffloadYAML.h create mode 100644 llvm/include/llvm/ProfileData/MIBEntryDef.inc create mode 100644 llvm/include/llvm/ProfileData/MemProf.h create mode 100644 llvm/include/llvm/Support/BLAKE3.h create mode 100644 llvm/include/llvm/Support/CSKYAttributeParser.h create mode 100644 llvm/include/llvm/Support/CSKYAttributes.h create mode 100644 llvm/include/llvm/Support/CSKYTargetParser.def create mode 100644 llvm/include/llvm/Support/CSKYTargetParser.h create mode 100644 llvm/include/llvm/TableGen/Parser.h delete mode 100644 llvm/include/llvm/Transforms/Coroutines.h create mode 100644 llvm/include/llvm/Transforms/Coroutines/CoroConditionalWrapper.h delete mode 100644 llvm/include/llvm/Transforms/Scalar/LowerAtomic.h create mode 100644 llvm/include/llvm/Transforms/Scalar/LowerAtomicPass.h create mode 100644 llvm/include/llvm/Transforms/Scalar/TLSVariableHoist.h create mode 100644 llvm/include/llvm/Transforms/Utils/LowerAtomic.h create mode 100644 llvm/include/llvm/Transforms/Utils/LowerGlobalDtors.h create mode 100644 llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h create mode 100644 llvm/include/llvm/Transforms/Utils/MisExpect.h create mode 100644 llvm/include/llvm/WindowsDriver/MSVCPaths.h create mode 100644 llvm/include/llvm/WindowsDriver/MSVCSetupApi.h create mode 100644 llvm/lib/Analysis/TensorSpec.cpp create mode 100644 llvm/lib/BinaryFormat/COFF.cpp create mode 100644 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp create mode 100644 llvm/lib/CodeGen/CFIFixup.cpp delete mode 100644 llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp delete mode 100644 llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp create mode 100644 llvm/lib/CodeGen/JMCInstrumenter.cpp create mode 100644 llvm/lib/CodeGen/RegisterBank.cpp create mode 100644 llvm/lib/CodeGen/RegisterBankInfo.cpp create mode 100644 llvm/lib/CodeGen/SelectOptimize.cpp create mode 100644 llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp create mode 100644 llvm/lib/DebugInfo/GSYM/ExtractRanges.cpp delete mode 100644 llvm/lib/DebugInfo/GSYM/Range.cpp create mode 100644 llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp create mode 100644 llvm/lib/DebugInfo/PDB/Native/InputFile.cpp create mode 100644 llvm/lib/DebugInfo/PDB/Native/LinePrinter.cpp create mode 100644 llvm/lib/DebugInfo/Symbolize/DIFetcher.cpp create mode 100644 llvm/lib/DebugInfo/Symbolize/Markup.cpp create mode 100644 llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp delete mode 100644 llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h create mode 100644 llvm/lib/Debuginfod/DIFetcher.cpp create mode 100644 llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp create mode 100644 llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp delete mode 100644 llvm/lib/IR/ConstantFold.h create mode 100644 llvm/lib/IR/VectorBuilder.cpp create mode 100644 llvm/lib/MC/MCDXContainerStreamer.cpp create mode 100644 llvm/lib/MC/MCDXContainerWriter.cpp create mode 100644 llvm/lib/MC/MCSPIRVStreamer.cpp create mode 100644 llvm/lib/MC/MCSectionDXContainer.cpp create mode 100644 llvm/lib/MC/SPIRVObjectWriter.cpp create mode 100644 llvm/lib/MCA/IncrementalSourceMgr.cpp create mode 100644 llvm/lib/ObjCopy/Archive.cpp create mode 100644 llvm/lib/ObjCopy/Archive.h create mode 100644 llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp create mode 100644 llvm/lib/ObjCopy/COFF/COFFObject.cpp create mode 100644 llvm/lib/ObjCopy/COFF/COFFObject.h create mode 100644 llvm/lib/ObjCopy/COFF/COFFReader.cpp create mode 100644 llvm/lib/ObjCopy/COFF/COFFReader.h create mode 100644 llvm/lib/ObjCopy/COFF/COFFWriter.cpp create mode 100644 llvm/lib/ObjCopy/COFF/COFFWriter.h create mode 100644 llvm/lib/ObjCopy/CommonConfig.cpp create mode 100644 llvm/lib/ObjCopy/ConfigManager.cpp create mode 100644 llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp create mode 100644 llvm/lib/ObjCopy/ELF/ELFObject.cpp create mode 100644 llvm/lib/ObjCopy/ELF/ELFObject.h create mode 100644 llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp create mode 100644 llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.h create mode 100644 llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp create mode 100644 llvm/lib/ObjCopy/MachO/MachOObject.cpp create mode 100644 llvm/lib/ObjCopy/MachO/MachOObject.h create mode 100644 llvm/lib/ObjCopy/MachO/MachOReader.cpp create mode 100644 llvm/lib/ObjCopy/MachO/MachOReader.h create mode 100644 llvm/lib/ObjCopy/MachO/MachOWriter.cpp create mode 100644 llvm/lib/ObjCopy/MachO/MachOWriter.h create mode 100644 llvm/lib/ObjCopy/ObjCopy.cpp create mode 100644 llvm/lib/ObjCopy/XCOFF/XCOFFObjcopy.cpp create mode 100644 llvm/lib/ObjCopy/XCOFF/XCOFFObject.h create mode 100644 llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp create mode 100644 llvm/lib/ObjCopy/XCOFF/XCOFFReader.h create mode 100644 llvm/lib/ObjCopy/XCOFF/XCOFFWriter.cpp create mode 100644 llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h create mode 100644 llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp create mode 100644 llvm/lib/ObjCopy/wasm/WasmObject.cpp create mode 100644 llvm/lib/ObjCopy/wasm/WasmObject.h create mode 100644 llvm/lib/ObjCopy/wasm/WasmReader.cpp create mode 100644 llvm/lib/ObjCopy/wasm/WasmReader.h create mode 100644 llvm/lib/ObjCopy/wasm/WasmWriter.cpp create mode 100644 llvm/lib/ObjCopy/wasm/WasmWriter.h create mode 100644 llvm/lib/Object/DXContainer.cpp create mode 100644 llvm/lib/Object/OffloadBinary.cpp create mode 100644 llvm/lib/ObjectYAML/DXContainerEmitter.cpp create mode 100644 llvm/lib/ObjectYAML/DXContainerYAML.cpp create mode 100644 llvm/lib/ObjectYAML/OffloadEmitter.cpp create mode 100644 llvm/lib/ObjectYAML/OffloadYAML.cpp create mode 100644 llvm/lib/ProfileData/MemProf.cpp create mode 100644 llvm/lib/Support/AddressRanges.cpp create mode 100644 llvm/lib/Support/BLAKE3/LICENSE create mode 100644 llvm/lib/Support/BLAKE3/README.md create mode 100644 llvm/lib/Support/BLAKE3/blake3.c create mode 100644 llvm/lib/Support/BLAKE3/blake3_avx2.c create mode 100644 llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S create mode 100644 llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S create mode 100644 llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm create mode 100644 llvm/lib/Support/BLAKE3/blake3_avx512.c create mode 100644 llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S create mode 100644 llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S create mode 100644 llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm create mode 100644 llvm/lib/Support/BLAKE3/blake3_dispatch.c create mode 100644 llvm/lib/Support/BLAKE3/blake3_impl.h create mode 100644 llvm/lib/Support/BLAKE3/blake3_neon.c create mode 100644 llvm/lib/Support/BLAKE3/blake3_portable.c create mode 100644 llvm/lib/Support/BLAKE3/blake3_sse2.c create mode 100644 llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S create mode 100644 llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S create mode 100644 llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm create mode 100644 llvm/lib/Support/BLAKE3/blake3_sse41.c create mode 100644 llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S create mode 100644 llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S create mode 100644 llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm create mode 100644 llvm/lib/Support/CSKYAttributeParser.cpp create mode 100644 llvm/lib/Support/CSKYAttributes.cpp create mode 100644 llvm/lib/Support/CSKYTargetParser.cpp create mode 100644 llvm/lib/Support/UnicodeNameToCodepoint.cpp create mode 100644 llvm/lib/Support/UnicodeNameToCodepointGenerated.cpp create mode 100644 llvm/lib/TableGen/Parser.cpp create mode 100644 llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp create mode 100644 llvm/lib/Target/AArch64/AArch64MachineScheduler.h create mode 100644 llvm/lib/Target/AArch64/AArch64SchedAmpere1.td create mode 100644 llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUAttributes.def delete mode 100644 llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp delete mode 100644 llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp create mode 100644 llvm/lib/Target/AMDGPU/LDSDIRInstructions.td create mode 100644 llvm/lib/Target/AMDGPU/R600MachineCFGStructurizer.cpp delete mode 100644 llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp delete mode 100644 llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h create mode 100644 llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp create mode 100644 llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h create mode 100644 llvm/lib/Target/AMDGPU/VINTERPInstructions.td create mode 100644 llvm/lib/Target/AMDGPU/VOPDInstructions.td create mode 100644 llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp delete mode 100644 llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp create mode 100644 llvm/lib/Target/CSKY/CSKYInstrAlias.td create mode 100644 llvm/lib/Target/CSKY/CSKYTargetObjectFile.cpp create mode 100644 llvm/lib/Target/CSKY/CSKYTargetObjectFile.h create mode 100644 llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp create mode 100644 llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp create mode 100644 llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.h create mode 100644 llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp create mode 100644 llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.h create mode 100644 llvm/lib/Target/DirectX/DXIL.td create mode 100644 llvm/lib/Target/DirectX/DXILConstants.h create mode 100644 llvm/lib/Target/DirectX/DXILOpLowering.cpp create mode 100644 llvm/lib/Target/DirectX/DXILPointerType.cpp create mode 100644 llvm/lib/Target/DirectX/DXILPointerType.h create mode 100644 llvm/lib/Target/DirectX/DXILPrepare.cpp create mode 100644 llvm/lib/Target/DirectX/DXILStubs.td create mode 100644 llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp create mode 100644 llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp create mode 100644 llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h create mode 100644 llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp create mode 100644 llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.h create mode 100644 llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp create mode 100644 llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.h create mode 100644 llvm/lib/Target/DirectX/DirectX.h create mode 100644 llvm/lib/Target/DirectX/DirectX.td create mode 100644 llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp create mode 100644 llvm/lib/Target/DirectX/DirectXFrameLowering.h create mode 100644 llvm/lib/Target/DirectX/DirectXInstrInfo.cpp create mode 100644 llvm/lib/Target/DirectX/DirectXInstrInfo.h create mode 100644 llvm/lib/Target/DirectX/DirectXRegisterInfo.cpp create mode 100644 llvm/lib/Target/DirectX/DirectXRegisterInfo.h create mode 100644 llvm/lib/Target/DirectX/DirectXSubtarget.cpp create mode 100644 llvm/lib/Target/DirectX/DirectXSubtarget.h create mode 100644 llvm/lib/Target/DirectX/DirectXTargetLowering.h create mode 100644 llvm/lib/Target/DirectX/DirectXTargetMachine.cpp create mode 100644 llvm/lib/Target/DirectX/DirectXTargetMachine.h create mode 100644 llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h create mode 100644 llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.cpp create mode 100644 llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.h create mode 100644 llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp create mode 100644 llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.h create mode 100644 llvm/lib/Target/DirectX/PointerTypeAnalysis.cpp create mode 100644 llvm/lib/Target/DirectX/PointerTypeAnalysis.h create mode 100644 llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp create mode 100644 llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.h delete mode 100644 llvm/lib/Target/Hexagon/HexagonArch.h create mode 100644 llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp create mode 100644 llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArch.h create mode 100644 llvm/lib/Target/LoongArch/LoongArch.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h create mode 100644 llvm/lib/Target/LoongArch/LoongArchCallingConv.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchFrameLowering.h create mode 100644 llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h create mode 100644 llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchISelLowering.h create mode 100644 llvm/lib/Target/LoongArch/LoongArchInstrFormats.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchInstrInfo.h create mode 100644 llvm/lib/Target/LoongArch/LoongArchInstrInfo.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h create mode 100644 llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h create mode 100644 llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchSubtarget.h create mode 100644 llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchTargetMachine.h create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h create mode 100644 llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp create mode 100644 llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h create mode 100644 llvm/lib/Target/Mips/MipsCombine.td create mode 100644 llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp create mode 100644 llvm/lib/Target/Mips/MipsTargetTransformInfo.cpp create mode 100644 llvm/lib/Target/Mips/MipsTargetTransformInfo.h create mode 100644 llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp create mode 100644 llvm/lib/Target/PowerPC/PPCGenScalarMASSEntries.cpp create mode 100644 llvm/lib/Target/PowerPC/PPCInstrMMA.td create mode 100644 llvm/lib/Target/PowerPC/PPCInstrP10.td delete mode 100644 llvm/lib/Target/PowerPC/PPCInstrPrefix.td create mode 100644 llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td create mode 100644 llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td create mode 100644 llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp create mode 100644 llvm/lib/Target/RISCV/RISCVMacroFusion.cpp create mode 100644 llvm/lib/Target/RISCV/RISCVMacroFusion.h create mode 100644 llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp create mode 100644 llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp create mode 100644 llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp create mode 100644 llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp create mode 100644 llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h create mode 100644 llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp create mode 100644 llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.h create mode 100644 llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.cpp create mode 100644 llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.h create mode 100644 llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp create mode 100644 llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp create mode 100644 llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h create mode 100644 llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVObjectTargetWriter.cpp create mode 100644 llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp create mode 100644 llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.h create mode 100644 llvm/lib/Target/SPIRV/SPIRV.h create mode 100644 llvm/lib/Target/SPIRV/SPIRV.td create mode 100644 llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVCallLowering.h create mode 100644 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVEnums.td create mode 100644 llvm/lib/Target/SPIRV/SPIRVFrameLowering.h create mode 100644 llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h create mode 100644 llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVISelLowering.h create mode 100644 llvm/lib/Target/SPIRV/SPIRVInstrFormats.td create mode 100644 llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVInstrInfo.h create mode 100644 llvm/lib/Target/SPIRV/SPIRVInstrInfo.td create mode 100644 llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h create mode 100644 llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVMCInstLower.h create mode 100644 llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h create mode 100644 llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.h create mode 100644 llvm/lib/Target/SPIRV/SPIRVRegisterBanks.td create mode 100644 llvm/lib/Target/SPIRV/SPIRVRegisterInfo.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVRegisterInfo.h create mode 100644 llvm/lib/Target/SPIRV/SPIRVRegisterInfo.td create mode 100644 llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVSubtarget.h create mode 100644 llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVTargetMachine.h create mode 100644 llvm/lib/Target/SPIRV/SPIRVTargetObjectFile.h create mode 100644 llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h create mode 100644 llvm/lib/Target/SPIRV/SPIRVUtils.cpp create mode 100644 llvm/lib/Target/SPIRV/SPIRVUtils.h create mode 100644 llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.cpp create mode 100644 llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.h create mode 100644 llvm/lib/Target/SystemZ/SystemZScheduleZ16.td create mode 100644 llvm/lib/Target/VE/VVPISelLowering.cpp delete mode 100644 llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp delete mode 100644 llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp create mode 100644 llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp create mode 100644 llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.h create mode 100644 llvm/lib/Target/X86/MCTargetDesc/X86MnemonicTables.cpp create mode 100644 llvm/lib/Target/X86/X86FastPreTileConfig.cpp create mode 100644 llvm/lib/Transforms/Coroutines/CoroConditionalWrapper.cpp delete mode 100644 llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h delete mode 100644 llvm/lib/Transforms/Scalar/LoopUnswitch.cpp delete mode 100644 llvm/lib/Transforms/Scalar/LowerAtomic.cpp create mode 100644 llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp create mode 100644 llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp create mode 100644 llvm/lib/Transforms/Utils/LowerAtomic.cpp create mode 100644 llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp create mode 100644 llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp create mode 100644 llvm/lib/Transforms/Utils/MisExpect.cpp delete mode 100644 llvm/lib/Transforms/Vectorize/VPlanLoopInfo.h delete mode 100644 llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp delete mode 100644 llvm/lib/Transforms/Vectorize/VPlanPredicator.h create mode 100644 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp create mode 100644 llvm/lib/WindowsDriver/MSVCPaths.cpp delete mode 100644 llvm/tools/llvm-objcopy/COFF/COFFConfig.h delete mode 100644 llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp delete mode 100644 llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h delete mode 100644 llvm/tools/llvm-objcopy/COFF/Object.cpp delete mode 100644 llvm/tools/llvm-objcopy/COFF/Object.h delete mode 100644 llvm/tools/llvm-objcopy/COFF/Reader.cpp delete mode 100644 llvm/tools/llvm-objcopy/COFF/Reader.h delete mode 100644 llvm/tools/llvm-objcopy/COFF/Writer.cpp delete mode 100644 llvm/tools/llvm-objcopy/COFF/Writer.h delete mode 100644 llvm/tools/llvm-objcopy/CommonConfig.h delete mode 100644 llvm/tools/llvm-objcopy/ConfigManager.cpp delete mode 100644 llvm/tools/llvm-objcopy/ConfigManager.h delete mode 100644 llvm/tools/llvm-objcopy/ELF/ELFConfig.h delete mode 100644 llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp delete mode 100644 llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h delete mode 100644 llvm/tools/llvm-objcopy/ELF/Object.cpp delete mode 100644 llvm/tools/llvm-objcopy/ELF/Object.h delete mode 100644 llvm/tools/llvm-objcopy/MachO/MachOConfig.h delete mode 100644 llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp delete mode 100644 llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h delete mode 100644 llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp delete mode 100644 llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h delete mode 100644 llvm/tools/llvm-objcopy/MachO/MachOReader.cpp delete mode 100644 llvm/tools/llvm-objcopy/MachO/MachOReader.h delete mode 100644 llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp delete mode 100644 llvm/tools/llvm-objcopy/MachO/MachOWriter.h delete mode 100644 llvm/tools/llvm-objcopy/MachO/Object.cpp delete mode 100644 llvm/tools/llvm-objcopy/MachO/Object.h delete mode 100644 llvm/tools/llvm-objcopy/MultiFormatConfig.h create mode 100644 llvm/tools/llvm-objcopy/ObjcopyOptions.cpp create mode 100644 llvm/tools/llvm-objcopy/ObjcopyOptions.h delete mode 100644 llvm/tools/llvm-objcopy/llvm-objcopy.h delete mode 100644 llvm/tools/llvm-objcopy/wasm/Object.cpp delete mode 100644 llvm/tools/llvm-objcopy/wasm/Object.h delete mode 100644 llvm/tools/llvm-objcopy/wasm/Reader.cpp delete mode 100644 llvm/tools/llvm-objcopy/wasm/Reader.h delete mode 100644 llvm/tools/llvm-objcopy/wasm/WasmConfig.h delete mode 100644 llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp delete mode 100644 llvm/tools/llvm-objcopy/wasm/WasmObjcopy.h delete mode 100644 llvm/tools/llvm-objcopy/wasm/Writer.cpp delete mode 100644 llvm/tools/llvm-objcopy/wasm/Writer.h create mode 100644 llvm/tools/llvm-objdump/OffloadDump.cpp create mode 100644 llvm/tools/llvm-objdump/OffloadDump.h delete mode 100644 llvm/tools/llvm-pdbutil/FormatUtil.cpp delete mode 100644 llvm/tools/llvm-pdbutil/FormatUtil.h delete mode 100644 llvm/tools/llvm-pdbutil/InputFile.cpp delete mode 100644 llvm/tools/llvm-pdbutil/InputFile.h delete mode 100644 llvm/tools/llvm-pdbutil/LinePrinter.cpp delete mode 100644 llvm/tools/llvm-pdbutil/LinePrinter.h delete mode 100644 llvm/tools/opt/PassPrinters.cpp delete mode 100644 llvm/tools/opt/PassPrinters.h delete mode 100644 llvm/utils/TableGen/CodeBeadsGen.cpp create mode 100644 llvm/utils/TableGen/DXILEmitter.cpp create mode 100644 llvm/utils/TableGen/DecoderEmitter.cpp delete mode 100644 llvm/utils/TableGen/FixedLenDecoderEmitter.cpp create mode 100644 llvm/utils/TableGen/VarLenCodeEmitterGen.cpp create mode 100644 llvm/utils/TableGen/VarLenCodeEmitterGen.h create mode 100644 llvm/utils/TableGen/X86MnemonicTables.cpp (limited to 'llvm') diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index 09d80841fa5d..2abc29851cd9 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -548,6 +548,13 @@ LLVMBool LLVMContextShouldDiscardValueNames(LLVMContextRef C); */ void LLVMContextSetDiscardValueNames(LLVMContextRef C, LLVMBool Discard); +/** + * Set whether the given context is in opaque pointer mode. + * + * @see LLVMContext::setOpaquePointers() + */ +void LLVMContextSetOpaquePointers(LLVMContextRef C, LLVMBool OpaquePointers); + /** * Destroy a context instance. * @@ -1391,9 +1398,9 @@ LLVMBool LLVMIsLiteralStruct(LLVMTypeRef StructTy); */ /** - * Obtain the type of elements within a sequential type. + * Obtain the element type of an array or vector type. * - * This works on array, vector, and pointer types. + * This currently also works for pointer types, but this usage is deprecated. * * @see llvm::SequentialType::getElementType() */ @@ -1442,6 +1449,22 @@ unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy); */ LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace); +/** + * Determine whether a pointer is opaque. + * + * True if this is an instance of an opaque PointerType. + * + * @see llvm::Type::isOpaquePointerTy() + */ +LLVMBool LLVMPointerTypeIsOpaque(LLVMTypeRef Ty); + +/** + * Create an opaque pointer type in a context. + * + * @see llvm::PointerType::get() + */ +LLVMTypeRef LLVMPointerTypeInContext(LLVMContextRef C, unsigned AddressSpace); + /** * Obtain the address space of a pointer type. * @@ -2088,12 +2111,24 @@ LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy, LLVMValueRef *ConstantVals, unsigned Count); +/** + * Get element of a constant aggregate (struct, array or vector) at the + * specified index. Returns null if the index is out of range, or it's not + * possible to determine the element (e.g., because the constant is a + * constant expression.) + * + * @see llvm::Constant::getAggregateElement() + */ +LLVMValueRef LLVMGetAggregateElement(LLVMValueRef C, unsigned Idx); + /** * Get an element at specified index as a constant. * * @see ConstantDataSequential::getElementAsConstant() */ -LLVMValueRef LLVMGetElementAsConstant(LLVMValueRef C, unsigned idx); +LLVM_ATTRIBUTE_C_DEPRECATED( + LLVMValueRef LLVMGetElementAsConstant(LLVMValueRef C, unsigned idx), + "Use LLVMGetAggregateElement instead"); /** * Create a ConstantVector from values. @@ -2203,8 +2238,6 @@ LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant, LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant, LLVMValueRef VectorBConstant, LLVMValueRef MaskConstant); -LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList, - unsigned NumIdx); LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, LLVMValueRef ElementValueConstant, unsigned *IdxList, unsigned NumIdx); @@ -3978,6 +4011,9 @@ LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef, LLVMValueRef Val, LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef, LLVMValueRef Val, /*Signed cast!*/ LLVMTypeRef DestTy, const char *Name); +LLVMOpcode LLVMGetCastOpcode(LLVMValueRef Src, LLVMBool SrcIsSigned, + LLVMTypeRef DestTy, LLVMBool DestIsSigned); + /* Comparisons */ LLVMValueRef LLVMBuildICmp(LLVMBuilderRef, LLVMIntPredicate Op, LLVMValueRef LHS, LLVMValueRef RHS, diff --git a/llvm/include/llvm-c/DisassemblerTypes.h b/llvm/include/llvm-c/DisassemblerTypes.h index 53baaef11033..6999a350ec91 100644 --- a/llvm/include/llvm-c/DisassemblerTypes.h +++ b/llvm/include/llvm-c/DisassemblerTypes.h @@ -38,15 +38,15 @@ typedef void *LLVMDisasmContextRef; * one operand with symbolic information. To determine the symbolic operand * information for each operand, the bytes for the specific operand in the * instruction are specified by the Offset parameter and its byte widith is the - * size parameter. For instructions sets with fixed widths and one symbolic - * operand per instruction, the Offset parameter will be zero and Size parameter - * will be the instruction width. The information is returned in TagBuf and is - * Triple specific with its specific information defined by the value of - * TagType for that Triple. If symbolic information is returned the function - * returns 1, otherwise it returns 0. + * OpSize parameter. For instructions sets with fixed widths and one symbolic + * operand per instruction, the Offset parameter will be zero and InstSize + * parameter will be the instruction width. The information is returned in + * TagBuf and is Triple specific with its specific information defined by the + * value of TagType for that Triple. If symbolic information is returned the + * function * returns 1, otherwise it returns 0. */ -typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, - uint64_t Offset, uint64_t Size, +typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, + uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf); /** diff --git a/llvm/include/llvm-c/Object.h b/llvm/include/llvm-c/Object.h index 9a9596aaa08c..f422c1ad224d 100644 --- a/llvm/include/llvm-c/Object.h +++ b/llvm/include/llvm-c/Object.h @@ -38,21 +38,23 @@ typedef struct LLVMOpaqueSymbolIterator *LLVMSymbolIteratorRef; typedef struct LLVMOpaqueRelocationIterator *LLVMRelocationIteratorRef; typedef enum { - LLVMBinaryTypeArchive, /**< Archive file. */ - LLVMBinaryTypeMachOUniversalBinary, /**< Mach-O Universal Binary file. */ - LLVMBinaryTypeCOFFImportFile, /**< COFF Import file. */ - LLVMBinaryTypeIR, /**< LLVM IR. */ - LLVMBinaryTypeWinRes, /**< Windows resource (.res) file. */ - LLVMBinaryTypeCOFF, /**< COFF Object file. */ - LLVMBinaryTypeELF32L, /**< ELF 32-bit, little endian. */ - LLVMBinaryTypeELF32B, /**< ELF 32-bit, big endian. */ - LLVMBinaryTypeELF64L, /**< ELF 64-bit, little endian. */ - LLVMBinaryTypeELF64B, /**< ELF 64-bit, big endian. */ - LLVMBinaryTypeMachO32L, /**< MachO 32-bit, little endian. */ - LLVMBinaryTypeMachO32B, /**< MachO 32-bit, big endian. */ - LLVMBinaryTypeMachO64L, /**< MachO 64-bit, little endian. */ - LLVMBinaryTypeMachO64B, /**< MachO 64-bit, big endian. */ - LLVMBinaryTypeWasm, /**< Web Assembly. */ + LLVMBinaryTypeArchive, /**< Archive file. */ + LLVMBinaryTypeMachOUniversalBinary, /**< Mach-O Universal Binary file. */ + LLVMBinaryTypeCOFFImportFile, /**< COFF Import file. */ + LLVMBinaryTypeIR, /**< LLVM IR. */ + LLVMBinaryTypeWinRes, /**< Windows resource (.res) file. */ + LLVMBinaryTypeCOFF, /**< COFF Object file. */ + LLVMBinaryTypeELF32L, /**< ELF 32-bit, little endian. */ + LLVMBinaryTypeELF32B, /**< ELF 32-bit, big endian. */ + LLVMBinaryTypeELF64L, /**< ELF 64-bit, little endian. */ + LLVMBinaryTypeELF64B, /**< ELF 64-bit, big endian. */ + LLVMBinaryTypeMachO32L, /**< MachO 32-bit, little endian. */ + LLVMBinaryTypeMachO32B, /**< MachO 32-bit, big endian. */ + LLVMBinaryTypeMachO64L, /**< MachO 64-bit, little endian. */ + LLVMBinaryTypeMachO64B, /**< MachO 64-bit, big endian. */ + LLVMBinaryTypeWasm, /**< Web Assembly. */ + LLVMBinaryTypeOffload, /**< Offloading fatbinary. */ + } LLVMBinaryType; /** diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h index e2f30b7cdf45..0dcfb06865aa 100644 --- a/llvm/include/llvm-c/Orc.h +++ b/llvm/include/llvm-c/Orc.h @@ -54,6 +54,7 @@ typedef uint64_t LLVMOrcExecutorAddress; * Represents generic linkage flags for a symbol definition. */ typedef enum { + LLVMJITSymbolGenericFlagsNone = 0, LLVMJITSymbolGenericFlagsExported = 1U << 0, LLVMJITSymbolGenericFlagsWeak = 1U << 1, LLVMJITSymbolGenericFlagsCallable = 1U << 2, @@ -122,13 +123,13 @@ typedef LLVMOrcCSymbolFlagsMapPair *LLVMOrcCSymbolFlagsMapPairs; typedef struct { LLVMOrcSymbolStringPoolEntryRef Name; LLVMJITEvaluatedSymbol Sym; -} LLVMJITCSymbolMapPair; +} LLVMOrcCSymbolMapPair; /** * Represents a list of (SymbolStringPtr, JITEvaluatedSymbol) pairs that can be * used to construct a SymbolMap. */ -typedef LLVMJITCSymbolMapPair *LLVMOrcCSymbolMapPairs; +typedef LLVMOrcCSymbolMapPair *LLVMOrcCSymbolMapPairs; /** * Represents a SymbolAliasMapEntry @@ -202,6 +203,22 @@ typedef enum { LLVMOrcJITDylibLookupFlagsMatchAllSymbols } LLVMOrcJITDylibLookupFlags; +/** + * An element type for a JITDylib search order. + */ +typedef struct { + LLVMOrcJITDylibRef JD; + LLVMOrcJITDylibLookupFlags JDLookupFlags; +} LLVMOrcCJITDylibSearchOrderElement; + +/** + * A JITDylib search order. + * + * The list is terminated with an element containing a null pointer for the JD + * field. + */ +typedef LLVMOrcCJITDylibSearchOrderElement *LLVMOrcCJITDylibSearchOrder; + /** * Symbol lookup flags for lookup sets. This should be kept in sync with * llvm::orc::SymbolLookupFlags. @@ -340,6 +357,14 @@ typedef LLVMErrorRef (*LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction)( LLVMOrcJITDylibRef JD, LLVMOrcJITDylibLookupFlags JDLookupFlags, LLVMOrcCLookupSet LookupSet, size_t LookupSetSize); +/** + * Disposer for a custom generator. + * + * Will be called by ORC when the JITDylib that the generator is attached to + * is destroyed. + */ +typedef void (*LLVMOrcDisposeCAPIDefinitionGeneratorFunction)(void *Ctx); + /** * Predicate function for SymbolStringPoolEntries. */ @@ -494,6 +519,58 @@ void LLVMOrcSymbolStringPoolClearDeadEntries(LLVMOrcSymbolStringPoolRef SSP); LLVMOrcSymbolStringPoolEntryRef LLVMOrcExecutionSessionIntern(LLVMOrcExecutionSessionRef ES, const char *Name); +/** + * Callback type for ExecutionSession lookups. + * + * If Err is LLVMErrorSuccess then Result will contain a pointer to a + * list of ( SymbolStringPtr, JITEvaluatedSymbol ) pairs of length NumPairs. + * + * If Err is a failure value then Result and Ctx are undefined and should + * not be accessed. The Callback is responsible for handling the error + * value (e.g. by calling LLVMGetErrorMessage + LLVMDisposeErrorMessage). + * + * The caller retains ownership of the Result array and will release all + * contained symbol names. Clients are responsible for retaining any symbol + * names that they wish to hold after the function returns. + */ +typedef void (*LLVMOrcExecutionSessionLookupHandleResultFunction)( + LLVMErrorRef Err, LLVMOrcCSymbolMapPairs Result, size_t NumPairs, + void *Ctx); + +/** + * Look up symbols in an execution session. + * + * This is a wrapper around the general ExecutionSession::lookup function. + * + * The SearchOrder argument contains a list of (JITDylibs, JITDylibSearchFlags) + * pairs that describe the search order. The JITDylibs will be searched in the + * given order to try to find the symbols in the Symbols argument. + * + * The Symbols argument should contain a null-terminated array of + * (SymbolStringPtr, SymbolLookupFlags) pairs describing the symbols to be + * searched for. This function takes ownership of the elements of the Symbols + * array. The Name fields of the Symbols elements are taken to have been + * retained by the client for this function. The client should *not* release the + * Name fields, but are still responsible for destroying the array itself. + * + * The HandleResult function will be called once all searched for symbols have + * been found, or an error occurs. The HandleResult function will be passed an + * LLVMErrorRef indicating success or failure, and (on success) a + * null-terminated LLVMOrcCSymbolMapPairs array containing the function result, + * and the Ctx value passed to the lookup function. + * + * The client is fully responsible for managing the lifetime of the Ctx object. + * A common idiom is to allocate the context prior to the lookup and deallocate + * it in the handler. + * + * THIS API IS EXPERIMENTAL AND LIKELY TO CHANGE IN THE NEAR FUTURE! + */ +void LLVMOrcExecutionSessionLookup( + LLVMOrcExecutionSessionRef ES, LLVMOrcLookupKind K, + LLVMOrcCJITDylibSearchOrder SearchOrder, size_t SearchOrderSize, + LLVMOrcCLookupSet Symbols, size_t SymbolsSize, + LLVMOrcExecutionSessionLookupHandleResultFunction HandleResult, void *Ctx); + /** * Increments the ref-count for a SymbolStringPool entry. */ @@ -504,6 +581,11 @@ void LLVMOrcRetainSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S); */ void LLVMOrcReleaseSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S); +/** + * Return the c-string for the given symbol. This string will remain valid until + * the entry is freed (once all LLVMOrcSymbolStringPoolEntryRefs have been + * released). + */ const char *LLVMOrcSymbolStringPoolEntryStr(LLVMOrcSymbolStringPoolEntryRef S); /** @@ -547,7 +629,7 @@ void LLVMOrcDisposeMaterializationUnit(LLVMOrcMaterializationUnitRef MU); * unit. This function takes ownership of the elements of the Syms array. The * Name fields of the array elements are taken to have been retained for this * function. The client should *not* release the elements of the array, but is - * still responsible for destroyingthe array itself. + * still responsible for destroying the array itself. * * The InitSym argument indicates whether or not this MaterializationUnit * contains static initializers. If three are no static initializers (the common @@ -701,7 +783,7 @@ LLVMOrcMaterializationResponsibilityGetRequestedSymbols( */ void LLVMOrcDisposeSymbols(LLVMOrcSymbolStringPoolEntryRef *Symbols); -/* +/** * Notifies the target JITDylib that the given symbols have been resolved. * This will update the given symbols' addresses in the JITDylib, and notify * any pending queries on the given symbols of their resolution. The given @@ -901,9 +983,27 @@ void LLVMOrcJITDylibAddGenerator(LLVMOrcJITDylibRef JD, /** * Create a custom generator. + * + * The F argument will be used to implement the DefinitionGenerator's + * tryToGenerate method (see + * LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction). + * + * Ctx is a context object that will be passed to F. This argument is + * permitted to be null. + * + * Dispose is the disposal function for Ctx. This argument is permitted to be + * null (in which case the client is responsible for the lifetime of Ctx). */ LLVMOrcDefinitionGeneratorRef LLVMOrcCreateCustomCAPIDefinitionGenerator( - LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction F, void *Ctx); + LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction F, void *Ctx, + LLVMOrcDisposeCAPIDefinitionGeneratorFunction Dispose); + +/** + * Continue a lookup that was suspended in a generator (see + * LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction). + */ +void LLVMOrcLookupStateContinueLookup(LLVMOrcLookupStateRef S, + LLVMErrorRef Err); /** * Get a DynamicLibrarySearchGenerator that will reflect process symbols into diff --git a/llvm/include/llvm-c/TargetMachine.h b/llvm/include/llvm-c/TargetMachine.h index 23c8c63ff0b4..bfbe1421a356 100644 --- a/llvm/include/llvm-c/TargetMachine.h +++ b/llvm/include/llvm-c/TargetMachine.h @@ -136,7 +136,9 @@ void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T, wraps several c++ only classes (among them a file stream). Returns any error in ErrorMessage. Use LLVMDisposeMessage to dispose the message. */ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, - char *Filename, LLVMCodeGenFileType codegen, char **ErrorMessage); + const char *Filename, + LLVMCodeGenFileType codegen, + char **ErrorMessage); /** Compile the LLVM IR stored in \p M and store the result in \p OutMemBuf. */ LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T, LLVMModuleRef M, diff --git a/llvm/include/llvm-c/Transforms/Coroutines.h b/llvm/include/llvm-c/Transforms/Coroutines.h deleted file mode 100644 index 03b6822033c9..000000000000 --- a/llvm/include/llvm-c/Transforms/Coroutines.h +++ /dev/null @@ -1,56 +0,0 @@ -/*===-- Coroutines.h - Coroutines Library C Interface -----------*- C++ -*-===*\ -|* *| -|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| -|* Exceptions. *| -|* See https://llvm.org/LICENSE.txt for license information. *| -|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| -|* *| -|*===----------------------------------------------------------------------===*| -|* *| -|* This header declares the C interface to libLLVMCoroutines.a, which *| -|* implements various scalar transformations of the LLVM IR. *| -|* *| -|* Many exotic languages can interoperate with C code but have a harder time *| -|* with C++ due to name mangling. So in addition to C, this interface enables *| -|* tools written in such languages. *| -|* *| -\*===----------------------------------------------------------------------===*/ - -#ifndef LLVM_C_TRANSFORMS_COROUTINES_H -#define LLVM_C_TRANSFORMS_COROUTINES_H - -#include "llvm-c/ExternC.h" -#include "llvm-c/Types.h" -#include "llvm-c/Transforms/PassManagerBuilder.h" - -LLVM_C_EXTERN_C_BEGIN - -/** - * @defgroup LLVMCTransformsCoroutines Coroutine transformations - * @ingroup LLVMCTransforms - * - * @{ - */ - -/** See llvm::createCoroEarlyLegacyPass function. */ -void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM); - -/** See llvm::createCoroSplitLegacyPass function. */ -void LLVMAddCoroSplitPass(LLVMPassManagerRef PM); - -/** See llvm::createCoroElideLegacyPass function. */ -void LLVMAddCoroElidePass(LLVMPassManagerRef PM); - -/** See llvm::createCoroCleanupLegacyPass function. */ -void LLVMAddCoroCleanupPass(LLVMPassManagerRef PM); - -/** See llvm::addCoroutinePassesToExtensionPoints. */ -void LLVMPassManagerBuilderAddCoroutinePassesToExtensionPoints(LLVMPassManagerBuilderRef PMB); - -/** - * @} - */ - -LLVM_C_EXTERN_C_END - -#endif diff --git a/llvm/include/llvm-c/Transforms/IPO.h b/llvm/include/llvm-c/Transforms/IPO.h index 3f2cadf32366..c806156281bd 100644 --- a/llvm/include/llvm-c/Transforms/IPO.h +++ b/llvm/include/llvm-c/Transforms/IPO.h @@ -27,9 +27,6 @@ LLVM_C_EXTERN_C_BEGIN * @{ */ -/** See llvm::createArgumentPromotionPass function. */ -void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM); - /** See llvm::createConstantMergePass function. */ void LLVMAddConstantMergePass(LLVMPassManagerRef PM); diff --git a/llvm/include/llvm-c/Transforms/PassManagerBuilder.h b/llvm/include/llvm-c/Transforms/PassManagerBuilder.h index 6e13e18e063b..3ba75440129a 100644 --- a/llvm/include/llvm-c/Transforms/PassManagerBuilder.h +++ b/llvm/include/llvm-c/Transforms/PassManagerBuilder.h @@ -72,12 +72,6 @@ void LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, LLVMPassManagerRef PM); -/** See llvm::PassManagerBuilder::populateLTOPassManager. */ -void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, - LLVMPassManagerRef PM, - LLVMBool Internalize, - LLVMBool RunInliner); - /** * @} */ diff --git a/llvm/include/llvm-c/Transforms/Scalar.h b/llvm/include/llvm-c/Transforms/Scalar.h index ba142508bbe4..1d0944799710 100644 --- a/llvm/include/llvm-c/Transforms/Scalar.h +++ b/llvm/include/llvm-c/Transforms/Scalar.h @@ -94,9 +94,6 @@ void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM); /** See llvm::createLoopUnrollAndJamPass function. */ void LLVMAddLoopUnrollAndJamPass(LLVMPassManagerRef PM); -/** See llvm::createLoopUnswitchPass function. */ -void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM); - /** See llvm::createLowerAtomicPass function. */ void LLVMAddLowerAtomicPass(LLVMPassManagerRef PM); diff --git a/llvm/include/llvm-c/blake3.h b/llvm/include/llvm-c/blake3.h new file mode 100644 index 000000000000..679477c3aa7f --- /dev/null +++ b/llvm/include/llvm-c/blake3.h @@ -0,0 +1,79 @@ +/*===-- llvm-c/blake3.h - BLAKE3 C Interface ----------------------*- C -*-===*\ +|* *| +|* Released into the public domain with CC0 1.0 *| +|* See 'llvm/lib/Support/BLAKE3/LICENSE' for info. *| +|* SPDX-License-Identifier: CC0-1.0 *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares the C interface to LLVM's BLAKE3 implementation. *| +|* Original BLAKE3 C API: https://github.com/BLAKE3-team/BLAKE3/tree/1.3.1/c *| +|* *| +|* Symbols are prefixed with 'llvm' to avoid a potential conflict with *| +|* another BLAKE3 version within the same program. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_BLAKE3_H +#define LLVM_C_BLAKE3_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define LLVM_BLAKE3_VERSION_STRING "1.3.1" +#define LLVM_BLAKE3_KEY_LEN 32 +#define LLVM_BLAKE3_OUT_LEN 32 +#define LLVM_BLAKE3_BLOCK_LEN 64 +#define LLVM_BLAKE3_CHUNK_LEN 1024 +#define LLVM_BLAKE3_MAX_DEPTH 54 + +// This struct is a private implementation detail. It has to be here because +// it's part of llvm_blake3_hasher below. +typedef struct { + uint32_t cv[8]; + uint64_t chunk_counter; + uint8_t buf[LLVM_BLAKE3_BLOCK_LEN]; + uint8_t buf_len; + uint8_t blocks_compressed; + uint8_t flags; +} llvm_blake3_chunk_state; + +typedef struct { + uint32_t key[8]; + llvm_blake3_chunk_state chunk; + uint8_t cv_stack_len; + // The stack size is MAX_DEPTH + 1 because we do lazy merging. For example, + // with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk + // requires a 4th entry, rather than merging everything down to 1, because we + // don't know whether more input is coming. This is different from how the + // reference implementation does things. + uint8_t cv_stack[(LLVM_BLAKE3_MAX_DEPTH + 1) * LLVM_BLAKE3_OUT_LEN]; +} llvm_blake3_hasher; + +const char *llvm_blake3_version(void); +void llvm_blake3_hasher_init(llvm_blake3_hasher *self); +void llvm_blake3_hasher_init_keyed(llvm_blake3_hasher *self, + const uint8_t key[LLVM_BLAKE3_KEY_LEN]); +void llvm_blake3_hasher_init_derive_key(llvm_blake3_hasher *self, + const char *context); +void llvm_blake3_hasher_init_derive_key_raw(llvm_blake3_hasher *self, + const void *context, + size_t context_len); +void llvm_blake3_hasher_update(llvm_blake3_hasher *self, const void *input, + size_t input_len); +void llvm_blake3_hasher_finalize(const llvm_blake3_hasher *self, uint8_t *out, + size_t out_len); +void llvm_blake3_hasher_finalize_seek(const llvm_blake3_hasher *self, + uint64_t seek, uint8_t *out, + size_t out_len); +void llvm_blake3_hasher_reset(llvm_blake3_hasher *self); + +#ifdef __cplusplus +} +#endif + +#endif /* LLVM_C_BLAKE3_H */ diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 17b57de7b0aa..cdedb6ece992 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -155,7 +155,8 @@ struct APFloatBase { S_IEEEdouble, S_x87DoubleExtended, S_IEEEquad, - S_PPCDoubleDouble + S_PPCDoubleDouble, + S_MaxSemantics = S_PPCDoubleDouble }; static const llvm::fltSemantics &EnumToSemantics(Semantics S); diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index b1fc85d3c09d..4155cb260a2a 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -486,7 +486,7 @@ public: return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth); } - /// Return true if this APInt value contains a sequence of ones with + /// Return true if this APInt value contains a non-empty sequence of ones with /// the remainder zero. bool isShiftedMask() const { if (isSingleWord()) @@ -496,6 +496,23 @@ public: return (Ones + LeadZ + countTrailingZeros()) == BitWidth; } + /// Return true if this APInt value contains a non-empty sequence of ones with + /// the remainder zero. If true, \p MaskIdx will specify the index of the + /// lowest set bit and \p MaskLen is updated to specify the length of the + /// mask, else neither are updated. + bool isShiftedMask(unsigned &MaskIdx, unsigned &MaskLen) const { + if (isSingleWord()) + return isShiftedMask_64(U.VAL, MaskIdx, MaskLen); + unsigned Ones = countPopulationSlowCase(); + unsigned LeadZ = countLeadingZerosSlowCase(); + unsigned TrailZ = countTrailingZerosSlowCase(); + if ((Ones + LeadZ + TrailZ) != BitWidth) + return false; + MaskLen = Ones; + MaskIdx = TrailZ; + return true; + } + /// Compute an APInt containing numBits highbits from this APInt. /// /// Get an APInt with the same BitWidth as this APInt, just zero mask the low @@ -1201,7 +1218,7 @@ public: /// Truncate to new width. /// /// Truncate the APInt to a specified width. It is an error to specify a width - /// that is greater than or equal to the current width. + /// that is greater than the current width. APInt trunc(unsigned width) const; /// Truncate to new width with unsigned saturation. @@ -1221,7 +1238,7 @@ public: /// /// This operation sign extends the APInt to a new width. If the high order /// bit is set, the fill on the left will be done with 1 bits, otherwise zero. - /// It is an error to specify a width that is less than or equal to the + /// It is an error to specify a width that is less than the /// current width. APInt sext(unsigned width) const; @@ -1229,7 +1246,7 @@ public: /// /// This operation zero extends the APInt to a new width. The high order bits /// are filled with 0 bits. It is an error to specify a width that is less - /// than or equal to the current width. + /// than the current width. APInt zext(unsigned width) const; /// Sign extend or truncate to width @@ -1244,24 +1261,6 @@ public: /// extended, truncated, or left alone to make it that width. APInt zextOrTrunc(unsigned width) const; - /// Truncate to width - /// - /// Make this APInt have the bit width given by \p width. The value is - /// truncated or left alone to make it that width. - APInt truncOrSelf(unsigned width) const; - - /// Sign extend or truncate to width - /// - /// Make this APInt have the bit width given by \p width. The value is sign - /// extended, or left alone to make it that width. - APInt sextOrSelf(unsigned width) const; - - /// Zero extend or truncate to width - /// - /// Make this APInt have the bit width given by \p width. The value is zero - /// extended, or left alone to make it that width. - APInt zextOrSelf(unsigned width) const; - /// @} /// \name Bit Manipulation Operators /// @{ @@ -1489,6 +1488,11 @@ public: /// equivalent of the string given by \p str. static unsigned getBitsNeeded(StringRef str, uint8_t radix); + /// Get the bits that are sufficient to represent the string value. This may + /// over estimate the amount of bits required, but it does not require + /// parsing the value in the string. + static unsigned getSufficientBitsNeeded(StringRef Str, uint8_t Radix); + /// The APInt version of the countLeadingZeros functions in /// MathExtras.h. /// @@ -2235,12 +2239,16 @@ Optional GetMostSignificantDifferentBit(const APInt &A, /// Splat/Merge neighboring bits to widen/narrow the bitmask represented /// by \param A to \param NewBitWidth bits. /// +/// MatchAnyBits: (Default) /// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011 /// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111 -/// A.getBitwidth() or NewBitWidth must be a whole multiples of the other. /// -/// TODO: Do we need a mode where all bits must be set when merging down? -APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth); +/// MatchAllBits: +/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011 +/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0001 +/// A.getBitwidth() or NewBitWidth must be a whole multiples of the other. +APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, + bool MatchAllBits = false); } // namespace APIntOps // See friend declaration above. This additional declaration is required in diff --git a/llvm/include/llvm/ADT/AddressRanges.h b/llvm/include/llvm/ADT/AddressRanges.h new file mode 100644 index 000000000000..1953680d5222 --- /dev/null +++ b/llvm/include/llvm/ADT/AddressRanges.h @@ -0,0 +1,79 @@ +//===- AddressRanges.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_ADDRESSRANGES_H +#define LLVM_ADT_ADDRESSRANGES_H + +#include "llvm/ADT/Optional.h" +#include +#include +#include + +namespace llvm { + +/// A class that represents an address range. The range is specified using +/// a start and an end address: [Start, End). +class AddressRange { +public: + AddressRange() {} + AddressRange(uint64_t S, uint64_t E) : Start(S), End(E) { + assert(Start <= End); + } + uint64_t start() const { return Start; } + uint64_t end() const { return End; } + uint64_t size() const { return End - Start; } + bool contains(uint64_t Addr) const { return Start <= Addr && Addr < End; } + bool intersects(const AddressRange &R) const { + return Start < R.End && R.Start < End; + } + bool operator==(const AddressRange &R) const { + return Start == R.Start && End == R.End; + } + bool operator!=(const AddressRange &R) const { return !(*this == R); } + bool operator<(const AddressRange &R) const { + return std::make_pair(Start, End) < std::make_pair(R.Start, R.End); + } + +private: + uint64_t Start = 0; + uint64_t End = 0; +}; + +/// The AddressRanges class helps normalize address range collections. +/// This class keeps a sorted vector of AddressRange objects and can perform +/// insertions and searches efficiently. The address ranges are always sorted +/// and never contain any invalid or empty address ranges. Intersecting +/// address ranges are combined during insertion. +class AddressRanges { +protected: + using Collection = std::vector; + Collection Ranges; + +public: + void clear() { Ranges.clear(); } + bool empty() const { return Ranges.empty(); } + bool contains(uint64_t Addr) const; + bool contains(AddressRange Range) const; + Optional getRangeThatContains(uint64_t Addr) const; + void insert(AddressRange Range); + void reserve(size_t Capacity) { Ranges.reserve(Capacity); } + size_t size() const { return Ranges.size(); } + bool operator==(const AddressRanges &RHS) const { + return Ranges == RHS.Ranges; + } + const AddressRange &operator[](size_t i) const { + assert(i < Ranges.size()); + return Ranges[i]; + } + Collection::const_iterator begin() const { return Ranges.begin(); } + Collection::const_iterator end() const { return Ranges.end(); } +}; + +} // namespace llvm + +#endif // LLVM_ADT_ADDRESSRANGES_H diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h index b6896395dae8..ee35a5686fc4 100644 --- a/llvm/include/llvm/ADT/ArrayRef.h +++ b/llvm/include/llvm/ADT/ArrayRef.h @@ -25,6 +25,7 @@ #include namespace llvm { + template class LLVM_NODISCARD MutableArrayRef; /// ArrayRef - Represent a constant reference to an array (0 or more elements /// consecutively in memory), i.e. a start pointer and a length. It allows @@ -175,10 +176,10 @@ namespace llvm { } // copy - Allocate copy in Allocator and return ArrayRef to it. - template ArrayRef copy(Allocator &A) { + template MutableArrayRef copy(Allocator &A) { T *Buff = A.template Allocate(Length); std::uninitialized_copy(begin(), end(), Buff); - return ArrayRef(Buff, Length); + return MutableArrayRef(Buff, Length); } /// equals - Check for element-wise equality. @@ -539,6 +540,42 @@ namespace llvm { return MutableArrayRef(data, length); } + /// Construct a MutableArrayRef from a SmallVector. + template + MutableArrayRef makeMutableArrayRef(SmallVectorImpl &Vec) { + return Vec; + } + + /// Construct a MutableArrayRef from a SmallVector. + template + MutableArrayRef makeMutableArrayRef(SmallVector &Vec) { + return Vec; + } + + /// Construct a MutableArrayRef from a std::vector. + template + MutableArrayRef makeMutableArrayRef(std::vector &Vec) { + return Vec; + } + + /// Construct a MutableArrayRef from a std::array. + template + MutableArrayRef makeMutableArrayRef(std::array &Arr) { + return Arr; + } + + /// Construct a MutableArrayRef from a MutableArrayRef (no-op) (const) + template + MutableArrayRef makeMutableArrayRef(const MutableArrayRef &Vec) { + return Vec; + } + + /// Construct a MutableArrayRef from a C array. + template + MutableArrayRef makeMutableArrayRef(T (&Arr)[N]) { + return MutableArrayRef(Arr); + } + /// @} /// @name ArrayRef Comparison Operators /// @{ diff --git a/llvm/include/llvm/ADT/BitmaskEnum.h b/llvm/include/llvm/ADT/BitmaskEnum.h index 89e5508e08e1..205da1240d44 100644 --- a/llvm/include/llvm/ADT/BitmaskEnum.h +++ b/llvm/include/llvm/ADT/BitmaskEnum.h @@ -77,7 +77,7 @@ namespace BitmaskEnumDetail { /// Get a bitmask with 1s in all places up to the high-order bit of E's largest /// value. -template std::underlying_type_t Mask() { +template constexpr std::underlying_type_t Mask() { // On overflow, NextPowerOf2 returns zero with the type uint64_t, so // subtracting 1 gives us the mask with all bits set, like we want. return NextPowerOf2(static_cast>( @@ -87,7 +87,7 @@ template std::underlying_type_t Mask() { /// Check that Val is in range for E, and return Val cast to E's underlying /// type. -template std::underlying_type_t Underlying(E Val) { +template constexpr std::underlying_type_t Underlying(E Val) { auto U = static_cast>(Val); assert(U >= 0 && "Negative enum values are not allowed."); assert(U <= Mask() && "Enum value too large (or largest val too small?)"); @@ -99,22 +99,22 @@ constexpr unsigned bitWidth(uint64_t Value) { } template ::value>> -E operator~(E Val) { +constexpr E operator~(E Val) { return static_cast(~Underlying(Val) & Mask()); } template ::value>> -E operator|(E LHS, E RHS) { +constexpr E operator|(E LHS, E RHS) { return static_cast(Underlying(LHS) | Underlying(RHS)); } template ::value>> -E operator&(E LHS, E RHS) { +constexpr E operator&(E LHS, E RHS) { return static_cast(Underlying(LHS) & Underlying(RHS)); } template ::value>> -E operator^(E LHS, E RHS) { +constexpr E operator^(E LHS, E RHS) { return static_cast(Underlying(LHS) ^ Underlying(RHS)); } diff --git a/llvm/include/llvm/ADT/BreadthFirstIterator.h b/llvm/include/llvm/ADT/BreadthFirstIterator.h index 1312b5f91e83..807b0a92c48c 100644 --- a/llvm/include/llvm/ADT/BreadthFirstIterator.h +++ b/llvm/include/llvm/ADT/BreadthFirstIterator.h @@ -80,7 +80,7 @@ private: inline void toNext() { Optional Head = VisitQueue.front(); - QueueElement H = Head.getValue(); + QueueElement H = *Head; NodeRef Node = H.first; Optional &ChildIt = H.second; diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index 7673b66ca42a..c14414c46419 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -137,6 +137,7 @@ public: } } assert(NumEntries == 0 && "Node count imbalance!"); + (void)NumEntries; } setNumEntries(0); setNumTombstones(0); diff --git a/llvm/include/llvm/ADT/EpochTracker.h b/llvm/include/llvm/ADT/EpochTracker.h index b06888494466..b46989bc5111 100644 --- a/llvm/include/llvm/ADT/EpochTracker.h +++ b/llvm/include/llvm/ADT/EpochTracker.h @@ -34,10 +34,10 @@ namespace llvm { /// is still valid. /// class DebugEpochBase { - uint64_t Epoch; + uint64_t Epoch = 0; public: - DebugEpochBase() : Epoch(0) {} + DebugEpochBase() = default; /// Calling incrementEpoch invalidates all handles pointing into the /// calling instance. diff --git a/llvm/include/llvm/ADT/EquivalenceClasses.h b/llvm/include/llvm/ADT/EquivalenceClasses.h index f12b683ead2d..4f98b84cf97d 100644 --- a/llvm/include/llvm/ADT/EquivalenceClasses.h +++ b/llvm/include/llvm/ADT/EquivalenceClasses.h @@ -161,7 +161,8 @@ public: // /// iterator* - Provides a way to iterate over all values in the set. - using iterator = typename std::set::const_iterator; + using iterator = + typename std::set::const_iterator; iterator begin() const { return TheMapping.begin(); } iterator end() const { return TheMapping.end(); } diff --git a/llvm/include/llvm/ADT/FloatingPointMode.h b/llvm/include/llvm/ADT/FloatingPointMode.h index 9cc69b8a8344..59ccea1f9d44 100644 --- a/llvm/include/llvm/ADT/FloatingPointMode.h +++ b/llvm/include/llvm/ADT/FloatingPointMode.h @@ -7,7 +7,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// Utilities for dealing with flags related to floating point mode controls. +/// Utilities for dealing with flags related to floating point properties and +/// mode controls. /// //===----------------------------------------------------------------------===/ @@ -193,4 +194,29 @@ void DenormalMode::print(raw_ostream &OS) const { } +/// Floating-point class tests, supported by 'is_fpclass' intrinsic. Actual +/// test may be an OR combination of basic tests. +enum FPClassTest { + fcSNan = 0x0001, + fcQNan = 0x0002, + fcNegInf = 0x0004, + fcNegNormal = 0x0008, + fcNegSubnormal = 0x0010, + fcNegZero = 0x0020, + fcPosZero = 0x0040, + fcPosSubnormal = 0x0080, + fcPosNormal = 0x0100, + fcPosInf = 0x0200, + + fcNan = fcSNan | fcQNan, + fcInf = fcPosInf | fcNegInf, + fcNormal = fcPosNormal | fcNegNormal, + fcSubnormal = fcPosSubnormal | fcNegSubnormal, + fcZero = fcPosZero | fcNegZero, + fcPosFinite = fcPosNormal | fcPosSubnormal | fcPosZero, + fcNegFinite = fcNegNormal | fcNegSubnormal | fcNegZero, + fcFinite = fcPosFinite | fcNegFinite, + fcAllFlags = fcNan | fcInf | fcFinite +}; + #endif // LLVM_ADT_FLOATINGPOINTMODE_H diff --git a/llvm/include/llvm/ADT/FoldingSet.h b/llvm/include/llvm/ADT/FoldingSet.h index a8707f0ee81e..ec276d41da80 100644 --- a/llvm/include/llvm/ADT/FoldingSet.h +++ b/llvm/include/llvm/ADT/FoldingSet.h @@ -16,12 +16,14 @@ #ifndef LLVM_ADT_FOLDINGSET_H #define LLVM_ADT_FOLDINGSET_H +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" #include "llvm/Support/Allocator.h" #include #include #include +#include #include namespace llvm { @@ -255,8 +257,8 @@ template struct DefaultFoldingSetTrait { /// through template specialization the behavior can be tailored for specific /// types. Combined with the FoldingSetNodeWrapper class, one can add objects /// to FoldingSets that were not originally designed to have that behavior. -template struct FoldingSetTrait - : public DefaultFoldingSetTrait {}; +template +struct FoldingSetTrait : public DefaultFoldingSetTrait {}; /// DefaultContextualFoldingSetTrait - Like DefaultFoldingSetTrait, but /// for ContextualFoldingSets. @@ -293,7 +295,9 @@ public: /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, /// used to lookup the node in the FoldingSetBase. - unsigned ComputeHash() const; + unsigned ComputeHash() const { + return static_cast(hash_combine_range(Data, Data + Size)); + } bool operator==(FoldingSetNodeIDRef) const; @@ -323,13 +327,33 @@ public: : Bits(Ref.getData(), Ref.getData() + Ref.getSize()) {} /// Add* - Add various data types to Bit data. - void AddPointer(const void *Ptr); - void AddInteger(signed I); - void AddInteger(unsigned I); - void AddInteger(long I); - void AddInteger(unsigned long I); - void AddInteger(long long I); - void AddInteger(unsigned long long I); + void AddPointer(const void *Ptr) { + // Note: this adds pointers to the hash using sizes and endianness that + // depend on the host. It doesn't matter, however, because hashing on + // pointer values is inherently unstable. Nothing should depend on the + // ordering of nodes in the folding set. + static_assert(sizeof(uintptr_t) <= sizeof(unsigned long long), + "unexpected pointer size"); + AddInteger(reinterpret_cast(Ptr)); + } + void AddInteger(signed I) { Bits.push_back(I); } + void AddInteger(unsigned I) { Bits.push_back(I); } + void AddInteger(long I) { AddInteger((unsigned long)I); } + void AddInteger(unsigned long I) { + if (sizeof(long) == sizeof(int)) + AddInteger(unsigned(I)); + else if (sizeof(long) == sizeof(long long)) { + AddInteger((unsigned long long)I); + } else { + llvm_unreachable("unexpected sizeof(long)"); + } + } + void AddInteger(long long I) { AddInteger((unsigned long long)I); } + void AddInteger(unsigned long long I) { + AddInteger(unsigned(I)); + AddInteger(unsigned(I >> 32)); + } + void AddBoolean(bool B) { AddInteger(B ? 1U : 0U); } void AddString(StringRef String); void AddNodeID(const FoldingSetNodeID &ID); @@ -343,7 +367,9 @@ public: /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used /// to lookup the node in the FoldingSetBase. - unsigned ComputeHash() const; + unsigned ComputeHash() const { + return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); + } /// operator== - Used to compare two nodes to each other. bool operator==(const FoldingSetNodeID &RHS) const; @@ -803,6 +829,13 @@ struct FoldingSetTrait> { } }; +template +struct FoldingSetTrait::value>> { + static void Profile(const T &X, FoldingSetNodeID &ID) { + ID.AddInteger(static_cast>(X)); + } +}; + } // end namespace llvm #endif // LLVM_ADT_FOLDINGSET_H diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h index d443f9e21a47..ea2847f8c8ee 100644 --- a/llvm/include/llvm/ADT/GenericCycleImpl.h +++ b/llvm/include/llvm/ADT/GenericCycleImpl.h @@ -66,6 +66,44 @@ void GenericCycle::getExitBlocks( } } +template +auto GenericCycle::getCyclePreheader() const -> BlockT * { + BlockT *Predecessor = getCyclePredecessor(); + if (!Predecessor) + return nullptr; + + assert(isReducible() && "Cycle Predecessor must be in a reducible cycle!"); + + if (succ_size(Predecessor) != 1) + return nullptr; + + // Make sure we are allowed to hoist instructions into the predecessor. + if (!Predecessor->isLegalToHoistInto()) + return nullptr; + + return Predecessor; +} + +template +auto GenericCycle::getCyclePredecessor() const -> BlockT * { + if (!isReducible()) + return nullptr; + + BlockT *Out = nullptr; + + // Loop over the predecessors of the header node... + BlockT *Header = getHeader(); + for (const auto Pred : predecessors(Header)) { + if (!contains(Pred)) { + if (Out && Out != Pred) + return nullptr; + Out = Pred; + } + } + + return Out; +} + /// \brief Helper class for computing cycle information. template class GenericCycleInfoCompute { using BlockT = typename ContextT::BlockT; @@ -267,8 +305,8 @@ void GenericCycleInfoCompute::dfs(BlockT *EntryBlock) { DFSTreeStack.emplace_back(TraverseStack.size()); llvm::append_range(TraverseStack, successors(Block)); - LLVM_ATTRIBUTE_UNUSED bool Added = BlockDFSInfo.try_emplace(Block, ++Counter).second; + (void)Added; assert(Added); BlockPreorder.push_back(Block); LLVM_DEBUG(errs() << " preorder number: " << Counter << "\n"); @@ -326,6 +364,19 @@ auto GenericCycleInfo::getCycle(const BlockT *Block) const return nullptr; } +/// \brief get the depth for the cycle which containing a given block. +/// +/// \returns the depth for the innermost cycle containing \p Block or 0 if it is +/// not contained in any cycle. +template +unsigned GenericCycleInfo::getCycleDepth(const BlockT *Block) const { + CycleT *Cycle = getCycle(Block); + if (!Cycle) + return 0; + return Cycle->getDepth(); +} + +#ifndef NDEBUG /// \brief Validate the internal consistency of the cycle tree. /// /// Note that this does \em not check that cycles are really cycles in the CFG, @@ -391,6 +442,7 @@ bool GenericCycleInfo::validateTree() const { return true; } +#endif /// \brief Print the cycle info. template diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h index d5f9cd9142ac..970664b85715 100644 --- a/llvm/include/llvm/ADT/GenericCycleInfo.h +++ b/llvm/include/llvm/ADT/GenericCycleInfo.h @@ -100,6 +100,10 @@ public: BlockT *getHeader() const { return Entries[0]; } + const SmallVectorImpl & getEntries() const { + return Entries; + } + /// \brief Return whether \p Block is an entry block of the cycle. bool isEntry(BlockT *Block) const { return is_contained(Entries, Block); } @@ -124,6 +128,16 @@ public: /// branched to. void getExitBlocks(SmallVectorImpl &TmpStorage) const; + /// Return the preheader block for this cycle. Pre-header is well-defined for + /// reducible cycle in docs/LoopTerminology.rst as: the only one entering + /// block and its only edge is to the entry block. Return null for irreducible + /// cycles. + BlockT *getCyclePreheader() const; + + /// If the cycle has exactly one entry with exactly one predecessor, return + /// it, otherwise return nullptr. + BlockT *getCyclePredecessor() const; + /// Iteration over child cycles. //@{ using const_child_iterator_base = @@ -178,6 +192,7 @@ public: iterator_range entries() const { return llvm::make_range(Entries.begin(), Entries.end()); } + //@} Printable printEntries(const ContextT &Ctx) const { return Printable([this, &Ctx](raw_ostream &Out) { @@ -238,6 +253,7 @@ public: const ContextT &getSSAContext() const { return Context; } CycleT *getCycle(const BlockT *Block) const; + unsigned getCycleDepth(const BlockT *Block) const; CycleT *getTopLevelParentCycle(const BlockT *Block) const; /// Move \p Child to \p NewParent by manipulating Children vectors. @@ -248,7 +264,9 @@ public: /// Methods for debug and self-test. //@{ +#ifndef NDEBUG bool validateTree() const; +#endif void print(raw_ostream &Out) const; void dump() const { print(dbgs()); } //@} diff --git a/llvm/include/llvm/ADT/IntervalMap.h b/llvm/include/llvm/ADT/IntervalMap.h index 368ed46f98d2..57f02df252c0 100644 --- a/llvm/include/llvm/ADT/IntervalMap.h +++ b/llvm/include/llvm/ADT/IntervalMap.h @@ -106,13 +106,10 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/bit.h" -#include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/RecyclingAllocator.h" #include #include -#include #include #include #include @@ -969,7 +966,10 @@ public: private: // The root data is either a RootLeaf or a RootBranchData instance. - AlignedCharArrayUnion data; + union { + RootLeaf leaf; + RootBranchData branchData; + }; // Tree height. // 0: Leaves in root. @@ -983,25 +983,22 @@ private: // Allocator used for creating external nodes. Allocator &allocator; - /// Represent data as a node type without breaking aliasing rules. - template T &dataAs() const { return *bit_cast(&data); } - const RootLeaf &rootLeaf() const { assert(!branched() && "Cannot acces leaf data in branched root"); - return dataAs(); + return leaf; } RootLeaf &rootLeaf() { assert(!branched() && "Cannot acces leaf data in branched root"); - return dataAs(); + return leaf; } - RootBranchData &rootBranchData() const { + const RootBranchData &rootBranchData() const { assert(branched() && "Cannot access branch data in non-branched root"); - return dataAs(); + return branchData; } RootBranchData &rootBranchData() { assert(branched() && "Cannot access branch data in non-branched root"); - return dataAs(); + return branchData; } const RootBranch &rootBranch() const { return rootBranchData().node; } @@ -1042,11 +1039,20 @@ private: public: explicit IntervalMap(Allocator &a) : height(0), rootSize(0), allocator(a) { - assert((uintptr_t(&data) & (alignof(RootLeaf) - 1)) == 0 && - "Insufficient alignment"); new(&rootLeaf()) RootLeaf(); } + // The default copy/move constructors and assignment operators would perform + // a shallow copy, leading to an incorrect internal state. To prevent + // accidental use, explicitly delete these operators. + // If necessary, implement them to perform a deep copy. + IntervalMap(const IntervalMap &Other) = delete; + IntervalMap(IntervalMap &&Other) = delete; + // Note: these are already implicitly deleted, because RootLeaf (union + // member) has a non-trivial assignment operator (because of std::pair). + IntervalMap &operator=(const IntervalMap &Other) = delete; + IntervalMap &operator=(IntervalMap &&Other) = delete; + ~IntervalMap() { clear(); rootLeaf().~RootLeaf(); diff --git a/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h b/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h index 975535bb5676..e41eb0639ce3 100644 --- a/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h +++ b/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h @@ -84,7 +84,7 @@ protected: #ifndef NDEBUG ~RefCountedBase() { assert(RefCount == 0 && - "Destruction occured when there are still references to this."); + "Destruction occurred when there are still references to this."); } #else // Default the destructor in release builds, A trivial destructor may enable @@ -115,7 +115,7 @@ protected: #ifndef NDEBUG ~ThreadSafeRefCountedBase() { assert(RefCount == 0 && - "Destruction occured when there are still references to this."); + "Destruction occurred when there are still references to this."); } #else // Default the destructor in release builds, A trivial destructor may enable diff --git a/llvm/include/llvm/ADT/Optional.h b/llvm/include/llvm/ADT/Optional.h index e047b0fc6514..d1615d903e98 100644 --- a/llvm/include/llvm/ADT/Optional.h +++ b/llvm/include/llvm/ADT/Optional.h @@ -60,85 +60,96 @@ template - constexpr explicit OptionalStorage(in_place_t, Args &&... args) - : value(std::forward(args)...), hasVal(true) {} + constexpr explicit OptionalStorage(in_place_t, Args &&...args) + : val(std::forward(args)...), hasVal(true) {} void reset() noexcept { if (hasVal) { - value.~T(); + val.~T(); hasVal = false; } } + constexpr bool has_value() const noexcept { return hasVal; } constexpr bool hasValue() const noexcept { return hasVal; } - T &getValue() LLVM_LVALUE_FUNCTION noexcept { + T &value() &noexcept { + assert(hasVal); + return val; + } + T &getValue() &noexcept { + assert(hasVal); + return val; + } + constexpr T const &value() const &noexcept { assert(hasVal); - return value; + return val; } - constexpr T const &getValue() const LLVM_LVALUE_FUNCTION noexcept { + constexpr T const &getValue() const &noexcept { assert(hasVal); - return value; + return val; } -#if LLVM_HAS_RVALUE_REFERENCE_THIS - T &&getValue() && noexcept { + T &&value() &&noexcept { assert(hasVal); - return std::move(value); + return std::move(val); + } + T &&getValue() &&noexcept { + assert(hasVal); + return std::move(val); } -#endif - template void emplace(Args &&... args) { + template void emplace(Args &&...args) { reset(); - ::new ((void *)std::addressof(value)) T(std::forward(args)...); + ::new ((void *)std::addressof(val)) T(std::forward(args)...); hasVal = true; } OptionalStorage &operator=(T const &y) { - if (hasValue()) { - value = y; + if (has_value()) { + val = y; } else { - ::new ((void *)std::addressof(value)) T(y); + ::new ((void *)std::addressof(val)) T(y); hasVal = true; } return *this; } OptionalStorage &operator=(T &&y) { - if (hasValue()) { - value = std::move(y); + if (has_value()) { + val = std::move(y); } else { - ::new ((void *)std::addressof(value)) T(std::move(y)); + ::new ((void *)std::addressof(val)) T(std::move(y)); hasVal = true; } return *this; } OptionalStorage &operator=(OptionalStorage const &other) { - if (other.hasValue()) { - if (hasValue()) { - value = other.value; + if (other.has_value()) { + if (has_value()) { + val = other.val; } else { - ::new ((void *)std::addressof(value)) T(other.value); + ::new ((void *)std::addressof(val)) T(other.val); hasVal = true; } } else { @@ -148,11 +159,11 @@ public: } OptionalStorage &operator=(OptionalStorage &&other) { - if (other.hasValue()) { - if (hasValue()) { - value = std::move(other.value); + if (other.has_value()) { + if (has_value()) { + val = std::move(other.val); } else { - ::new ((void *)std::addressof(value)) T(std::move(other.value)); + ::new ((void *)std::addressof(val)) T(std::move(other.val)); hasVal = true; } } else { @@ -165,7 +176,7 @@ public: template class OptionalStorage { union { char empty; - T value; + T val; }; bool hasVal = false; @@ -181,53 +192,64 @@ public: OptionalStorage &operator=(OptionalStorage &&other) = default; template - constexpr explicit OptionalStorage(in_place_t, Args &&... args) - : value(std::forward(args)...), hasVal(true) {} + constexpr explicit OptionalStorage(in_place_t, Args &&...args) + : val(std::forward(args)...), hasVal(true) {} void reset() noexcept { if (hasVal) { - value.~T(); + val.~T(); hasVal = false; } } + constexpr bool has_value() const noexcept { return hasVal; } constexpr bool hasValue() const noexcept { return hasVal; } - T &getValue() LLVM_LVALUE_FUNCTION noexcept { + T &value() &noexcept { + assert(hasVal); + return val; + } + T &getValue() &noexcept { assert(hasVal); - return value; + return val; } - constexpr T const &getValue() const LLVM_LVALUE_FUNCTION noexcept { + constexpr T const &value() const &noexcept { assert(hasVal); - return value; + return val; } -#if LLVM_HAS_RVALUE_REFERENCE_THIS - T &&getValue() && noexcept { + constexpr T const &getValue() const &noexcept { assert(hasVal); - return std::move(value); + return val; + } + T &&value() &&noexcept { + assert(hasVal); + return std::move(val); + } + T &&getValue() &&noexcept { + assert(hasVal); + return std::move(val); } -#endif - template void emplace(Args &&... args) { + template void emplace(Args &&...args) { reset(); - ::new ((void *)std::addressof(value)) T(std::forward(args)...); + ::new ((void *)std::addressof(val)) T(std::forward(args)...); hasVal = true; } OptionalStorage &operator=(T const &y) { - if (hasValue()) { - value = y; + if (has_value()) { + val = y; } else { - ::new ((void *)std::addressof(value)) T(y); + ::new ((void *)std::addressof(val)) T(y); hasVal = true; } return *this; } OptionalStorage &operator=(T &&y) { - if (hasValue()) { - value = std::move(y); + if (has_value()) { + val = std::move(y); } else { - ::new ((void *)std::addressof(value)) T(std::move(y)); + ::new ((void *)std::addressof(val)) T(std::move(y)); hasVal = true; } return *this; @@ -278,52 +300,55 @@ public: void reset() { Storage.reset(); } - constexpr const T *getPointer() const { return &Storage.getValue(); } - T *getPointer() { return &Storage.getValue(); } - constexpr const T &getValue() const LLVM_LVALUE_FUNCTION { - return Storage.getValue(); - } - T &getValue() LLVM_LVALUE_FUNCTION { return Storage.getValue(); } + constexpr const T *getPointer() const { return &Storage.value(); } + T *getPointer() { return &Storage.value(); } + constexpr const T &value() const & { return Storage.value(); } + constexpr const T &getValue() const & { return Storage.value(); } + T &value() & { return Storage.value(); } + T &getValue() & { return Storage.value(); } - constexpr explicit operator bool() const { return hasValue(); } - constexpr bool hasValue() const { return Storage.hasValue(); } + constexpr explicit operator bool() const { return has_value(); } + constexpr bool has_value() const { return Storage.has_value(); } + constexpr bool hasValue() const { return Storage.has_value(); } constexpr const T *operator->() const { return getPointer(); } T *operator->() { return getPointer(); } - constexpr const T &operator*() const LLVM_LVALUE_FUNCTION { - return getValue(); - } - T &operator*() LLVM_LVALUE_FUNCTION { return getValue(); } + constexpr const T &operator*() const & { return value(); } + T &operator*() & { return value(); } - template - constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION { - return hasValue() ? getValue() : std::forward(value); + template constexpr T value_or(U &&alt) const & { + return has_value() ? value() : std::forward(alt); + } + template constexpr T getValueOr(U &&alt) const & { + return has_value() ? value() : std::forward(alt); } /// Apply a function to the value if present; otherwise return None. template - auto map(const Function &F) const LLVM_LVALUE_FUNCTION - -> Optional { - if (*this) return F(getValue()); + auto map(const Function &F) const & -> Optional { + if (*this) + return F(value()); return None; } -#if LLVM_HAS_RVALUE_REFERENCE_THIS - T &&getValue() && { return std::move(Storage.getValue()); } - T &&operator*() && { return std::move(Storage.getValue()); } + T &&value() && { return std::move(Storage.value()); } + T &&getValue() && { return std::move(Storage.value()); } + T &&operator*() && { return std::move(Storage.value()); } - template - T getValueOr(U &&value) && { - return hasValue() ? std::move(getValue()) : std::forward(value); + template T value_or(U &&alt) && { + return has_value() ? std::move(value()) : std::forward(alt); + } + template T getValueOr(U &&alt) && { + return has_value() ? std::move(value()) : std::forward(alt); } /// Apply a function to the value if present; otherwise return None. template - auto map(const Function &F) && - -> Optional { - if (*this) return F(std::move(*this).getValue()); + auto map(const Function &F) + && -> Optional { + if (*this) + return F(std::move(*this).value()); return None; } -#endif }; template llvm::hash_code hash_value(const Optional &O) { @@ -334,7 +359,7 @@ template constexpr bool operator==(const Optional &X, const Optional &Y) { if (X && Y) return *X == *Y; - return X.hasValue() == Y.hasValue(); + return X.has_value() == Y.has_value(); } template @@ -346,7 +371,7 @@ template constexpr bool operator<(const Optional &X, const Optional &Y) { if (X && Y) return *X < *Y; - return X.hasValue() < Y.hasValue(); + return X.has_value() < Y.has_value(); } template @@ -389,7 +414,7 @@ template constexpr bool operator<(const Optional &, NoneType) { } template constexpr bool operator<(NoneType, const Optional &X) { - return X.hasValue(); + return X.has_value(); } template diff --git a/llvm/include/llvm/ADT/PointerIntPair.h b/llvm/include/llvm/ADT/PointerIntPair.h index b7ddf8855605..7d10b2a6dd14 100644 --- a/llvm/include/llvm/ADT/PointerIntPair.h +++ b/llvm/include/llvm/ADT/PointerIntPair.h @@ -61,19 +61,19 @@ public: IntType getInt() const { return (IntType)Info::getInt(Value); } - void setPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION { + void setPointer(PointerTy PtrVal) & { Value = Info::updatePointer(Value, PtrVal); } - void setInt(IntType IntVal) LLVM_LVALUE_FUNCTION { + void setInt(IntType IntVal) & { Value = Info::updateInt(Value, static_cast(IntVal)); } - void initWithPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION { + void initWithPointer(PointerTy PtrVal) & { Value = Info::updatePointer(0, PtrVal); } - void setPointerAndInt(PointerTy PtrVal, IntType IntVal) LLVM_LVALUE_FUNCTION { + void setPointerAndInt(PointerTy PtrVal, IntType IntVal) & { Value = Info::updateInt(Info::updatePointer(0, PtrVal), static_cast(IntVal)); } @@ -91,7 +91,7 @@ public: void *getOpaqueValue() const { return reinterpret_cast(Value); } - void setFromOpaqueValue(void *Val) LLVM_LVALUE_FUNCTION { + void setFromOpaqueValue(void *Val) & { Value = reinterpret_cast(Val); } diff --git a/llvm/include/llvm/ADT/PointerSumType.h b/llvm/include/llvm/ADT/PointerSumType.h index a7ef774e205e..57f045035a78 100644 --- a/llvm/include/llvm/ADT/PointerSumType.h +++ b/llvm/include/llvm/ADT/PointerSumType.h @@ -272,11 +272,12 @@ struct DenseMapInfo> { using SomePointerInfo = DenseMapInfo; static inline SumType getEmptyKey() { - return SumType::create(SomePointerInfo::getEmptyKey()); + return SumType::template create(SomePointerInfo::getEmptyKey()); } static inline SumType getTombstoneKey() { - return SumType::create(SomePointerInfo::getTombstoneKey()); + return SumType::template create( + SomePointerInfo::getTombstoneKey()); } static unsigned getHashValue(const SumType &Arg) { diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h index 04d566bbc75e..f01db09dd765 100644 --- a/llvm/include/llvm/ADT/PointerUnion.h +++ b/llvm/include/llvm/ADT/PointerUnion.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/PointerLikeTypeTraits.h" #include #include @@ -87,6 +88,9 @@ namespace pointer_union_detail { }; } +// This is a forward declaration of CastInfoPointerUnionImpl +// Refer to its definition below for further details +template struct CastInfoPointerUnionImpl; /// A discriminated union of two or more pointer types, with the discriminator /// in the low bit of the pointer. /// @@ -122,6 +126,11 @@ class PointerUnion using First = TypeAtIndex<0, PTs...>; using Base = typename PointerUnion::PointerUnionMembers; + /// This is needed to give the CastInfo implementation below access + /// to protected members. + /// Refer to its definition for further details. + friend struct CastInfoPointerUnionImpl; + public: PointerUnion() = default; @@ -134,25 +143,24 @@ public: explicit operator bool() const { return !isNull(); } + // FIXME: Replace the uses of is(), get() and dyn_cast() with + // isa, cast and the llvm::dyn_cast + /// Test if the Union currently holds the type matching T. - template bool is() const { - return this->Val.getInt() == FirstIndexOfType::value; - } + template inline bool is() const { return isa(*this); } /// Returns the value of the specified pointer type. /// /// If the specified pointer type is incorrect, assert. - template T get() const { - assert(is() && "Invalid accessor called"); - return PointerLikeTypeTraits::getFromVoidPointer(this->Val.getPointer()); + template inline T get() const { + assert(isa(*this) && "Invalid accessor called"); + return cast(*this); } /// Returns the current pointer if it is of the specified pointer type, /// otherwise returns null. - template T dyn_cast() const { - if (is()) - return get(); - return T(); + template inline T dyn_cast() const { + return llvm::dyn_cast(*this); } /// If the union is set to the first pointer type get an address pointing to @@ -205,6 +213,52 @@ bool operator<(PointerUnion lhs, PointerUnion rhs) { return lhs.getOpaqueValue() < rhs.getOpaqueValue(); } +/// We can't (at least, at this moment with C++14) declare CastInfo +/// as a friend of PointerUnion like this: +/// ``` +/// template +/// friend struct CastInfo>; +/// ``` +/// The compiler complains 'Partial specialization cannot be declared as a +/// friend'. +/// So we define this struct to be a bridge between CastInfo and +/// PointerUnion. +template struct CastInfoPointerUnionImpl { + using From = PointerUnion; + + template static inline bool isPossible(From &F) { + return F.Val.getInt() == FirstIndexOfType::value; + } + + template static To doCast(From &F) { + assert(isPossible(F) && "cast to an incompatible type !"); + return PointerLikeTypeTraits::getFromVoidPointer(F.Val.getPointer()); + } +}; + +// Specialization of CastInfo for PointerUnion +template +struct CastInfo> + : public DefaultDoCastIfPossible, + CastInfo>> { + using From = PointerUnion; + using Impl = CastInfoPointerUnionImpl; + + static inline bool isPossible(From &f) { + return Impl::template isPossible(f); + } + + static To doCast(From &f) { return Impl::template doCast(f); } + + static inline To castFailed() { return To(); } +}; + +template +struct CastInfo> + : public ConstStrippingForwardingCast, + CastInfo>> { +}; + // Teach SmallPtrSet that PointerUnion is "basically a pointer", that has // # low bits available = min(PT1bits,PT2bits)-1. template diff --git a/llvm/include/llvm/ADT/SCCIterator.h b/llvm/include/llvm/ADT/SCCIterator.h index ad35e09f0f74..e4035a02b5f5 100644 --- a/llvm/include/llvm/ADT/SCCIterator.h +++ b/llvm/include/llvm/ADT/SCCIterator.h @@ -348,9 +348,14 @@ scc_member_iterator::scc_member_iterator( NodeInfoMap[Edge->Target].Visited = false; std::queue Queue; - for (auto &Node : NodeInfoMap) - if (Node.second.Visited) - Queue.push(Node.first); + // Initialze the queue with MST roots. Note that walking through SortedEdges + // instead of NodeInfoMap ensures an ordered deterministic push. + for (auto *Edge : SortedEdges) { + if (NodeInfoMap[Edge->Source].Visited) { + Queue.push(Edge->Source); + NodeInfoMap[Edge->Source].Visited = false; + } + } while (!Queue.empty()) { auto *Node = Queue.front(); diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index e2972f4f902a..0efa96e69a8c 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -129,7 +129,7 @@ struct function_traits { /// Overload for class function types. template struct function_traits - : function_traits {}; + : public function_traits {}; /// Overload for non-class function types. template struct function_traits { @@ -143,6 +143,9 @@ struct function_traits { template using arg_t = typename std::tuple_element>::type; }; +template +struct function_traits + : public function_traits {}; /// Overload for non-class function type references. template struct function_traits @@ -203,6 +206,17 @@ struct FirstIndexOfType : std::integral_constant {}; template using TypeAtIndex = std::tuple_element_t>; +/// Helper which adds two underlying types of enumeration type. +/// Implicit conversion to a common type is accepted. +template ::value, + std::underlying_type_t>, + typename UT2 = std::enable_if_t::value, + std::underlying_type_t>> +constexpr auto addEnumValues(EnumTy1 LHS, EnumTy2 RHS) { + return static_cast(LHS) + static_cast(RHS); +} + //===----------------------------------------------------------------------===// // Extra additions to //===----------------------------------------------------------------------===// @@ -268,6 +282,13 @@ template auto drop_begin(T &&RangeOrContainer, size_t N = 1) { adl_end(RangeOrContainer)); } +/// Return a range covering \p RangeOrContainer with the last N elements +/// excluded. +template auto drop_end(T &&RangeOrContainer, size_t N = 1) { + return make_range(adl_begin(RangeOrContainer), + std::prev(adl_end(RangeOrContainer), N)); +} + // mapped_iterator - This is a simple iterator adapter that causes a function to // be applied whenever operator* is invoked on the iterator. @@ -423,6 +444,16 @@ public: findNextValid(); return *this; } + + decltype(auto) operator*() const { + assert(BaseT::wrapped() != End && "Cannot dereference end iterator!"); + return BaseT::operator*(); + } + + decltype(auto) operator->() const { + assert(BaseT::wrapped() != End && "Cannot dereference end iterator!"); + return BaseT::operator->(); + } }; /// Specialization of filter_iterator_base for forward iteration only. @@ -1160,13 +1191,15 @@ public: } /// Compare this range with another. - template bool operator==(const OtherT &other) const { - return size() == - static_cast(std::distance(other.begin(), other.end())) && - std::equal(begin(), end(), other.begin()); + template + friend bool operator==(const indexed_accessor_range_base &lhs, + const OtherT &rhs) { + return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); } - template bool operator!=(const OtherT &other) const { - return !(*this == other); + template + friend bool operator!=(const indexed_accessor_range_base &lhs, + const OtherT &rhs) { + return !(lhs == rhs); } /// Return the size of this range. @@ -1650,6 +1683,15 @@ bool is_contained(R &&Range, const E &Element) { return std::find(adl_begin(Range), adl_end(Range), Element) != adl_end(Range); } +template +constexpr bool is_contained(std::initializer_list Set, T Value) { + // TODO: Use std::find when we switch to C++20. + for (T V : Set) + if (V == Value) + return true; + return false; +} + /// Wrapper function around std::is_sorted to check if elements in a range \p R /// are sorted with respect to a comparator \p C. template bool is_sorted(R &&Range, Compare C) { diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index a4a790323a6b..e34702bdbb3c 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// /// -/// /file +/// \file /// This file defines the SmallVector class. /// //===----------------------------------------------------------------------===// @@ -949,6 +949,9 @@ public: return std::lexicographical_compare(this->begin(), this->end(), RHS.begin(), RHS.end()); } + bool operator>(const SmallVectorImpl &RHS) const { return RHS < *this; } + bool operator<=(const SmallVectorImpl &RHS) const { return !(*this > RHS); } + bool operator>=(const SmallVectorImpl &RHS) const { return !(*this < RHS); } }; template diff --git a/llvm/include/llvm/ADT/Statistic.h b/llvm/include/llvm/ADT/Statistic.h index c39e161bcbcd..6c195cc44990 100644 --- a/llvm/include/llvm/ADT/Statistic.h +++ b/llvm/include/llvm/ADT/Statistic.h @@ -53,7 +53,7 @@ public: const char *const Name; const char *const Desc; - std::atomic Value; + std::atomic Value; std::atomic Initialized; constexpr TrackingStatistic(const char *DebugType, const char *Name, @@ -65,12 +65,12 @@ public: const char *getName() const { return Name; } const char *getDesc() const { return Desc; } - unsigned getValue() const { return Value.load(std::memory_order_relaxed); } + uint64_t getValue() const { return Value.load(std::memory_order_relaxed); } // Allow use of this class as the value itself. - operator unsigned() const { return getValue(); } + operator uint64_t() const { return getValue(); } - const TrackingStatistic &operator=(unsigned Val) { + const TrackingStatistic &operator=(uint64_t Val) { Value.store(Val, std::memory_order_relaxed); return init(); } @@ -80,7 +80,7 @@ public: return init(); } - unsigned operator++(int) { + uint64_t operator++(int) { init(); return Value.fetch_add(1, std::memory_order_relaxed); } @@ -90,27 +90,27 @@ public: return init(); } - unsigned operator--(int) { + uint64_t operator--(int) { init(); return Value.fetch_sub(1, std::memory_order_relaxed); } - const TrackingStatistic &operator+=(unsigned V) { + const TrackingStatistic &operator+=(uint64_t V) { if (V == 0) return *this; Value.fetch_add(V, std::memory_order_relaxed); return init(); } - const TrackingStatistic &operator-=(unsigned V) { + const TrackingStatistic &operator-=(uint64_t V) { if (V == 0) return *this; Value.fetch_sub(V, std::memory_order_relaxed); return init(); } - void updateMax(unsigned V) { - unsigned PrevMax = Value.load(std::memory_order_relaxed); + void updateMax(uint64_t V) { + uint64_t PrevMax = Value.load(std::memory_order_relaxed); // Keep trying to update max until we succeed or another thread produces // a bigger max than us. while (V > PrevMax && !Value.compare_exchange_weak( @@ -134,26 +134,26 @@ public: NoopStatistic(const char * /*DebugType*/, const char * /*Name*/, const char * /*Desc*/) {} - unsigned getValue() const { return 0; } + uint64_t getValue() const { return 0; } // Allow use of this class as the value itself. - operator unsigned() const { return 0; } + operator uint64_t() const { return 0; } - const NoopStatistic &operator=(unsigned Val) { return *this; } + const NoopStatistic &operator=(uint64_t Val) { return *this; } const NoopStatistic &operator++() { return *this; } - unsigned operator++(int) { return 0; } + uint64_t operator++(int) { return 0; } const NoopStatistic &operator--() { return *this; } - unsigned operator--(int) { return 0; } + uint64_t operator--(int) { return 0; } - const NoopStatistic &operator+=(const unsigned &V) { return *this; } + const NoopStatistic &operator+=(const uint64_t &V) { return *this; } - const NoopStatistic &operator-=(const unsigned &V) { return *this; } + const NoopStatistic &operator-=(const uint64_t &V) { return *this; } - void updateMax(unsigned V) {} + void updateMax(uint64_t V) {} }; #if LLVM_ENABLE_STATS @@ -200,7 +200,7 @@ void PrintStatisticsJSON(raw_ostream &OS); /// during it's execution. It will return the value at the point that it is /// read. However, it will prevent new statistics from registering until it /// completes. -const std::vector> GetStatistics(); +const std::vector> GetStatistics(); /// Reset the statistics. This can be used to zero and de-register the /// statistics in order to measure a compilation. diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index 118def2f43e1..80ba47dd619c 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -240,6 +240,10 @@ namespace llvm { unsigned edit_distance(StringRef Other, bool AllowReplacements = true, unsigned MaxEditDistance = 0) const; + LLVM_NODISCARD unsigned + edit_distance_insensitive(StringRef Other, bool AllowReplacements = true, + unsigned MaxEditDistance = 0) const; + /// str - Get the contents as an std::string. LLVM_NODISCARD std::string str() const { diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index 42277c013035..9d85a28fbf04 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -56,7 +56,10 @@ public: bpfel, // eBPF or extended BPF or 64-bit BPF (little endian) bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian) csky, // CSKY: csky + dxil, // DXIL 32-bit DirectX bytecode hexagon, // Hexagon: hexagon + loongarch32, // LoongArch (32-bit): loongarch32 + loongarch64, // LoongArch (64-bit): loongarch64 m68k, // M68k: Motorola 680x0 family mips, // MIPS: mips, mipsallegrex, mipsr6 mipsel, // MIPSEL: mipsel, mipsallegrexe, mipsr6el @@ -146,7 +149,15 @@ public: MipsSubArch_r6, - PPCSubArch_spe + PPCSubArch_spe, + + // SPIR-V sub-arch corresponds to its version. + SPIRVSubArch_v10, + SPIRVSubArch_v11, + SPIRVSubArch_v12, + SPIRVSubArch_v13, + SPIRVSubArch_v14, + SPIRVSubArch_v15, }; enum VendorType { UnknownVendor, @@ -195,9 +206,11 @@ public: NVCL, // NVIDIA OpenCL AMDHSA, // AMD HSA Runtime PS4, + PS5, ELFIAMCU, TvOS, // Apple tvOS WatchOS, // Apple watchOS + DriverKit, // Apple DriverKit Mesa3D, Contiki, AMDPAL, // AMD PAL Runtime @@ -205,7 +218,8 @@ public: Hurd, // GNU/Hurd WASI, // Experimental WebAssembly OS Emscripten, - LastOSType = Emscripten + ShaderModel, // DirectX ShaderModel + LastOSType = ShaderModel }; enum EnvironmentType { UnknownEnvironment, @@ -232,15 +246,35 @@ public: CoreCLR, Simulator, // Simulator variants of other systems, e.g., Apple's iOS MacABI, // Mac Catalyst variant of Apple's iOS deployment target. - LastEnvironmentType = MacABI + + // Shader Stages + Pixel, + Vertex, + Geometry, + Hull, + Domain, + Compute, + Library, + RayGeneration, + Intersection, + AnyHit, + ClosestHit, + Miss, + Callable, + Mesh, + Amplification, + + LastEnvironmentType = Amplification }; enum ObjectFormatType { UnknownObjectFormat, COFF, + DXContainer, ELF, GOFF, MachO, + SPIRV, Wasm, XCOFF, }; @@ -360,6 +394,9 @@ public: /// with WatchOS or generic triples. VersionTuple getWatchOSVersion() const; + /// Parse the version number as with getOSVersion. + VersionTuple getDriverKitVersion() const; + /// @} /// @name Direct Component Access /// @{ @@ -462,11 +499,14 @@ public: return getSubArch() == Triple::ARMSubArch_v7k; } + /// Is this an Apple DriverKit triple. + bool isDriverKit() const { return getOS() == Triple::DriverKit; } + bool isOSzOS() const { return getOS() == Triple::ZOS; } - /// Is this a "Darwin" OS (macOS, iOS, tvOS or watchOS). + /// Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, or DriverKit). bool isOSDarwin() const { - return isMacOSX() || isiOS() || isWatchOS(); + return isMacOSX() || isiOS() || isWatchOS() || isDriverKit(); } bool isSimulatorEnvironment() const { @@ -640,19 +680,23 @@ public: return getObjectFormat() == Triple::XCOFF; } - /// Tests whether the target is the PS4 CPU - bool isPS4CPU() const { + /// Tests whether the target is the PS4 platform. + bool isPS4() const { return getArch() == Triple::x86_64 && getVendor() == Triple::SCEI && getOS() == Triple::PS4; } - /// Tests whether the target is the PS4 platform - bool isPS4() const { - return getVendor() == Triple::SCEI && - getOS() == Triple::PS4; + /// Tests whether the target is the PS5 platform. + bool isPS5() const { + return getArch() == Triple::x86_64 && + getVendor() == Triple::SCEI && + getOS() == Triple::PS5; } + /// Tests whether the target is the PS4 or PS5 platform. + bool isPS() const { return isPS4() || isPS5(); } + /// Tests whether the target is Android bool isAndroid() const { return getEnvironment() == Triple::Android; } @@ -676,6 +720,11 @@ public: getEnvironment() == Triple::MuslX32; } + /// Tests whether the target is DXIL. + bool isDXIL() const { + return getArch() == Triple::dxil; + } + /// Tests whether the target is SPIR (32- or 64-bit). bool isSPIR() const { return getArch() == Triple::spir || getArch() == Triple::spir64; @@ -774,6 +823,11 @@ public: : PointerWidth == 64; } + /// Tests whether the target is LoongArch (32- and 64-bit). + bool isLoongArch() const { + return getArch() == Triple::loongarch32 || getArch() == Triple::loongarch64; + } + /// Tests whether the target is MIPS 32-bit (little and big endian). bool isMIPS32() const { return getArch() == Triple::mips || getArch() == Triple::mipsel; @@ -810,6 +864,17 @@ public: return getArch() == Triple::riscv32 || getArch() == Triple::riscv64; } + /// Tests whether the target is 32-bit SPARC (little and big endian). + bool isSPARC32() const { + return getArch() == Triple::sparc || getArch() == Triple::sparcel; + } + + /// Tests whether the target is 64-bit SPARC (big endian). + bool isSPARC64() const { return getArch() == Triple::sparcv9; } + + /// Tests whether the target is SPARC. + bool isSPARC() const { return isSPARC32() || isSPARC64(); } + /// Tests whether the target is SystemZ. bool isSystemZ() const { return getArch() == Triple::systemz; @@ -863,7 +928,7 @@ public: } /// Tests if the environment supports dllimport/export annotations. - bool hasDLLImportExport() const { return isOSWindows() || isPS4CPU(); } + bool hasDLLImportExport() const { return isOSWindows() || isPS(); } /// @} /// @name Mutators @@ -971,7 +1036,7 @@ public: /// Get the "prefix" canonical name for the \p Kind architecture. This is the /// prefix used by the architecture specific builtins, and is suitable for - /// passing to \see Intrinsic::getIntrinsicForGCCBuiltin(). + /// passing to \see Intrinsic::getIntrinsicForClangBuiltin(). /// /// \return - The architecture prefix, or 0 if none is defined. static StringRef getArchTypePrefix(ArchType Kind); diff --git a/llvm/include/llvm/ADT/edit_distance.h b/llvm/include/llvm/ADT/edit_distance.h index c480c1e7cd78..6df3db6125d4 100644 --- a/llvm/include/llvm/ADT/edit_distance.h +++ b/llvm/include/llvm/ADT/edit_distance.h @@ -28,6 +28,9 @@ namespace llvm { /// /// \param ToArray the second sequence to compare. /// +/// \param Map A Functor to apply to each item of the sequences before +/// comparison. +/// /// \param AllowReplacements whether to allow element replacements (change one /// element into another) as a single operation, rather than as two operations /// (an insertion and a removal). @@ -39,10 +42,10 @@ namespace llvm { /// \returns the minimum number of element insertions, removals, or (if /// \p AllowReplacements is \c true) replacements needed to transform one of /// the given sequences into the other. If zero, the sequences are identical. -template -unsigned ComputeEditDistance(ArrayRef FromArray, ArrayRef ToArray, - bool AllowReplacements = true, - unsigned MaxEditDistance = 0) { +template +unsigned ComputeMappedEditDistance(ArrayRef FromArray, ArrayRef ToArray, + Functor Map, bool AllowReplacements = true, + unsigned MaxEditDistance = 0) { // The algorithm implemented below is the "classic" // dynamic-programming algorithm for computing the Levenshtein // distance, which is described here: @@ -58,6 +61,15 @@ unsigned ComputeEditDistance(ArrayRef FromArray, ArrayRef ToArray, typename ArrayRef::size_type m = FromArray.size(); typename ArrayRef::size_type n = ToArray.size(); + if (MaxEditDistance) { + // If the difference in size between the 2 arrays is larger than the max + // distance allowed, we can bail out as we will always need at least + // MaxEditDistance insertions or removals. + typename ArrayRef::size_type AbsDiff = m > n ? m - n : n - m; + if (AbsDiff > MaxEditDistance) + return MaxEditDistance + 1; + } + const unsigned SmallBufferSize = 64; unsigned SmallBuffer[SmallBufferSize]; std::unique_ptr Allocated; @@ -75,15 +87,16 @@ unsigned ComputeEditDistance(ArrayRef FromArray, ArrayRef ToArray, unsigned BestThisRow = Row[0]; unsigned Previous = y - 1; + const auto &CurItem = Map(FromArray[y - 1]); for (typename ArrayRef::size_type x = 1; x <= n; ++x) { int OldRow = Row[x]; if (AllowReplacements) { - Row[x] = std::min( - Previous + (FromArray[y-1] == ToArray[x-1] ? 0u : 1u), - std::min(Row[x-1], Row[x])+1); + Row[x] = std::min(Previous + (CurItem == Map(ToArray[x - 1]) ? 0u : 1u), + std::min(Row[x - 1], Row[x]) + 1); } else { - if (FromArray[y-1] == ToArray[x-1]) Row[x] = Previous; + if (CurItem == Map(ToArray[x - 1])) + Row[x] = Previous; else Row[x] = std::min(Row[x-1], Row[x]) + 1; } Previous = OldRow; @@ -98,6 +111,15 @@ unsigned ComputeEditDistance(ArrayRef FromArray, ArrayRef ToArray, return Result; } +template +unsigned ComputeEditDistance(ArrayRef FromArray, ArrayRef ToArray, + bool AllowReplacements = true, + unsigned MaxEditDistance = 0) { + return ComputeMappedEditDistance( + FromArray, ToArray, [](const T &X) -> const T & { return X; }, + AllowReplacements, MaxEditDistance); +} + } // End llvm namespace #endif diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index d4febe6c1db9..c065553db8e9 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -38,7 +38,6 @@ #define LLVM_ANALYSIS_ALIASANALYSIS_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/MemoryLocation.h" @@ -64,6 +63,7 @@ class LoopInfo; class PreservedAnalyses; class TargetLibraryInfo; class Value; +template class SmallPtrSetImpl; /// The possible results of an alias query. /// @@ -413,8 +413,12 @@ class EarliestEscapeInfo final : public CaptureInfo { /// This is used for cache invalidation purposes. DenseMap> Inst2Obj; + const SmallPtrSetImpl &EphValues; + public: - EarliestEscapeInfo(DominatorTree &DT, const LoopInfo &LI) : DT(DT), LI(LI) {} + EarliestEscapeInfo(DominatorTree &DT, const LoopInfo &LI, + const SmallPtrSetImpl &EphValues) + : DT(DT), LI(LI), EphValues(EphValues) {} bool isNotCapturedBeforeOrAt(const Value *Object, const Instruction *I) override; @@ -1267,6 +1271,10 @@ bool isIdentifiedObject(const Value *V); /// IdentifiedObjects. bool isIdentifiedFunctionLocal(const Value *V); +/// Returns true if the pointer is one which would have been considered an +/// escape by isNonEscapingLocalObject. +bool isEscapeSource(const Value *V); + /// Return true if Object memory is not visible after an unwind, in the sense /// that program semantics cannot depend on Object containing any particular /// value on unwind. If the RequiresNoCaptureBeforeUnwind out parameter is set diff --git a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h index 2dd2e7ca916d..48181cc52626 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h +++ b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h @@ -24,12 +24,12 @@ #ifndef LLVM_ANALYSIS_ALIASANALYSISEVALUATOR_H #define LLVM_ANALYSIS_ALIASANALYSISEVALUATOR_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" namespace llvm { class AAResults; +class Function; +class FunctionPass; class AAEvaluator : public PassInfoMixin { int64_t FunctionCount = 0; diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h index b66ff395454d..78f5545ab215 100644 --- a/llvm/include/llvm/Analysis/AliasSetTracker.h +++ b/llvm/include/llvm/Analysis/AliasSetTracker.h @@ -22,13 +22,10 @@ #include "llvm/ADT/ilist_node.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/Instruction.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/Support/Casting.h" #include #include -#include #include #include @@ -224,10 +221,6 @@ public: // track of the list's exact size. unsigned size() { return SetSize; } - /// If this alias set is known to contain a single instruction and *only* a - /// single unique instruction, return it. Otherwise, return nullptr. - Instruction* getUniqueInstruction(); - void print(raw_ostream &OS) const; void dump() const; diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h index 77da19110246..785980130386 100644 --- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h +++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h @@ -14,14 +14,14 @@ #ifndef LLVM_ANALYSIS_ASSUMEBUNDLEQUERIES_H #define LLVM_ANALYSIS_ASSUMEBUNDLEQUERIES_H -#include "llvm/IR/Attributes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/IR/IntrinsicInst.h" namespace llvm { class AssumptionCache; class DominatorTree; +class Instruction; +class Value; /// Index of elements in the operand bundle. /// If the element exist it is guaranteed to be what is specified in this enum diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h index 97dda58109e9..46f14a21a9ff 100644 --- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -18,8 +18,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" -#include -#include #include #include diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 858dd369dd0b..d8e524d7cb80 100644 --- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -20,6 +20,7 @@ #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Twine.h" @@ -31,7 +32,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/ScaledNumber.h" #include "llvm/Support/raw_ostream.h" @@ -45,7 +45,6 @@ #include #include #include -#include #include #include @@ -1300,7 +1299,7 @@ bool BlockFrequencyInfoImpl::computeMassInLoop(LoopData &Loop) { auto &HeaderNode = Loop.Nodes[H]; assert(!getBlock(HeaderNode)->getIrrLoopHeaderWeight() && "Shouldn't have a weight metadata"); - uint64_t MinWeight = MinHeaderWeight.getValue(); + uint64_t MinWeight = *MinHeaderWeight; LLVM_DEBUG(dbgs() << "Giving weight " << MinWeight << " to " << getBlockName(HeaderNode) << "\n"); if (MinWeight) @@ -1516,7 +1515,7 @@ void BlockFrequencyInfoImpl::findReachableBlocks( // Find all blocks to apply inference on, that is, reachable from the entry // along edges with non-zero probablities std::queue Queue; - std::unordered_set Reachable; + SmallPtrSet Reachable; const BlockT *Entry = &F->front(); Queue.push(Entry); Reachable.insert(Entry); @@ -1527,16 +1526,14 @@ void BlockFrequencyInfoImpl::findReachableBlocks( auto EP = BPI->getEdgeProbability(SrcBB, DstBB); if (EP.isZero()) continue; - if (Reachable.find(DstBB) == Reachable.end()) { + if (Reachable.insert(DstBB).second) Queue.push(DstBB); - Reachable.insert(DstBB); - } } } // Find all blocks to apply inference on, that is, backward reachable from // the entry along (backward) edges with non-zero probablities - std::unordered_set InverseReachable; + SmallPtrSet InverseReachable; for (const BlockT &BB : *F) { // An exit block is a block without any successors bool HasSucc = GraphTraits::child_begin(&BB) != @@ -1553,10 +1550,8 @@ void BlockFrequencyInfoImpl::findReachableBlocks( auto EP = BPI->getEdgeProbability(DstBB, SrcBB); if (EP.isZero()) continue; - if (InverseReachable.find(DstBB) == InverseReachable.end()) { + if (InverseReachable.insert(DstBB).second) Queue.push(DstBB); - InverseReachable.insert(DstBB); - } } } @@ -1581,15 +1576,14 @@ void BlockFrequencyInfoImpl::initTransitionProbabilities( // Find unique successors and corresponding probabilities for every block for (size_t Src = 0; Src < NumBlocks; Src++) { const BlockT *BB = Blocks[Src]; - std::unordered_set UniqueSuccs; + SmallPtrSet UniqueSuccs; for (const auto SI : children(BB)) { // Ignore cold blocks if (BlockIndex.find(SI) == BlockIndex.end()) continue; // Ignore parallel edges between BB and SI blocks - if (UniqueSuccs.find(SI) != UniqueSuccs.end()) + if (!UniqueSuccs.insert(SI).second) continue; - UniqueSuccs.insert(SI); // Ignore jumps with zero probability auto EP = BPI->getEdgeProbability(BB, SI); if (EP.isZero()) @@ -1875,7 +1869,7 @@ struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits { case GVDT_Count: { auto Count = Graph->getBlockProfileCount(Node); if (Count) - OS << Count.getValue(); + OS << *Count; else OS << "Unknown"; break; diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index e2099eba0f65..28418198acea 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -16,14 +16,12 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" -#include "llvm/Support/Casting.h" #include #include #include diff --git a/llvm/include/llvm/Analysis/CFGPrinter.h b/llvm/include/llvm/Analysis/CFGPrinter.h index c0cabceb4a54..768cda59c57d 100644 --- a/llvm/include/llvm/Analysis/CFGPrinter.h +++ b/llvm/include/llvm/Analysis/CFGPrinter.h @@ -18,7 +18,6 @@ #ifndef LLVM_ANALYSIS_CFGPRINTER_H #define LLVM_ANALYSIS_CFGPRINTER_H -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/HeatUtils.h" @@ -27,10 +26,11 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/GraphWriter.h" namespace llvm { +template struct GraphTraits; class CFGViewerPass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); diff --git a/llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h b/llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h index 2eae2824bec3..6543c53c9b28 100644 --- a/llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h +++ b/llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h @@ -14,10 +14,12 @@ #ifndef LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H #define LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H +#include "llvm/IR/Argument.h" #include "llvm/IR/Function.h" #include "llvm/IR/ValueHandle.h" namespace llvm { + namespace cflaa { template struct FunctionHandle final : public CallbackVH { diff --git a/llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h index 5f5e52af3d88..dfb363173187 100644 --- a/llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h @@ -15,7 +15,6 @@ #define LLVM_ANALYSIS_CFLANDERSALIASANALYSIS_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFLAliasAnalysisUtils.h" #include "llvm/IR/PassManager.h" @@ -25,6 +24,7 @@ namespace llvm { +template class Optional; class Function; class MemoryLocation; class TargetLibraryInfo; diff --git a/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h index ec05b3706ca3..865f4a54c094 100644 --- a/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h @@ -15,13 +15,11 @@ #define LLVM_ANALYSIS_CFLSTEENSALIASANALYSIS_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFLAliasAnalysisUtils.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" -#include "llvm/Support/Casting.h" #include #include diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h index 7cf172dc1dd1..9d1b331346b6 100644 --- a/llvm/include/llvm/Analysis/CGSCCPassManager.h +++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h @@ -88,27 +88,21 @@ #ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H #define LLVM_ANALYSIS_CGSCCPASSMANAGER_H -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/PriorityWorklist.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include #include #include namespace llvm { +class Function; +class Value; +template class SmallPriorityWorklist; struct CGSCCUpdateResult; + class Module; // Allow debug logging in this inline function. @@ -278,16 +272,6 @@ struct CGSCCUpdateResult { /// the list and removing entries from it. SmallPtrSetImpl &InvalidatedSCCs; - /// If non-null, the updated current \c RefSCC being processed. - /// - /// This is set when a graph refinement takes place and the "current" point - /// in the graph moves "down" or earlier in the post-order walk. This will - /// often cause the "current" RefSCC to be a newly created RefSCC object and - /// the old one to be added to the above worklist. When that happens, this - /// pointer is non-null and can be used to continue processing the "top" of - /// the post-order walk. - LazyCallGraph::RefSCC *UpdatedRC; - /// If non-null, the updated current \c SCC being processed. /// /// This is set when a graph refinement takes place and the "current" point diff --git a/llvm/include/llvm/Analysis/CallGraph.h b/llvm/include/llvm/Analysis/CallGraph.h index 4da448c9900b..88d56785de67 100644 --- a/llvm/include/llvm/Analysis/CallGraph.h +++ b/llvm/include/llvm/Analysis/CallGraph.h @@ -45,9 +45,6 @@ #ifndef LLVM_ANALYSIS_CALLGRAPH_H #define LLVM_ANALYSIS_CALLGRAPH_H -#include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/PassManager.h" @@ -61,7 +58,9 @@ namespace llvm { +template struct GraphTraits; class CallGraphNode; +class Function; class Module; class raw_ostream; diff --git a/llvm/include/llvm/Analysis/CallPrinter.h b/llvm/include/llvm/Analysis/CallPrinter.h index 8d4159f3ddc0..d325d0010371 100644 --- a/llvm/include/llvm/Analysis/CallPrinter.h +++ b/llvm/include/llvm/Analysis/CallPrinter.h @@ -14,10 +14,24 @@ #ifndef LLVM_ANALYSIS_CALLPRINTER_H #define LLVM_ANALYSIS_CALLPRINTER_H +#include "llvm/IR/PassManager.h" + namespace llvm { class ModulePass; +/// Pass for printing the call graph to a dot file +class CallGraphDOTPrinterPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +/// Pass for viewing the call graph +class CallGraphViewerPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + ModulePass *createCallGraphViewerPass(); ModulePass *createCallGraphDOTPrinterPass(); diff --git a/llvm/include/llvm/Analysis/CaptureTracking.h b/llvm/include/llvm/Analysis/CaptureTracking.h index 50d12db7a1c3..a2d9277745e4 100644 --- a/llvm/include/llvm/Analysis/CaptureTracking.h +++ b/llvm/include/llvm/Analysis/CaptureTracking.h @@ -14,6 +14,7 @@ #define LLVM_ANALYSIS_CAPTURETRACKING_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLFunctionalExtras.h" namespace llvm { @@ -24,6 +25,7 @@ namespace llvm { class DominatorTree; class LoopInfo; class Function; + template class SmallPtrSetImpl; /// getDefaultMaxUsesToExploreForCaptureTracking - Return default value of /// the maximal number of uses to explore before giving up. It is used by @@ -40,8 +42,14 @@ namespace llvm { /// MaxUsesToExplore specifies how many uses the analysis should explore for /// one value before giving up due too "too many uses". If MaxUsesToExplore /// is zero, a default value is assumed. + bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, + bool StoreCaptures, unsigned MaxUsesToExplore = 0); + + /// Variant of the above function which accepts a set of Values that are + /// ephemeral and cannot cause pointers to escape. bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, + const SmallPtrSetImpl &EphValues, unsigned MaxUsesToExplore = 0); /// PointerMayBeCapturedBefore - Return true if this pointer value may be @@ -72,10 +80,11 @@ namespace llvm { // nullptr is returned. Note that the caller of the function has to ensure // that the instruction the result value is compared against is not in a // cycle. - Instruction *FindEarliestCapture(const Value *V, Function &F, - bool ReturnCaptures, bool StoreCaptures, - const DominatorTree &DT, - unsigned MaxUsesToExplore = 0); + Instruction * + FindEarliestCapture(const Value *V, Function &F, bool ReturnCaptures, + bool StoreCaptures, const DominatorTree &DT, + const SmallPtrSetImpl &EphValues, + unsigned MaxUsesToExplore = 0); /// This callback is used in conjunction with PointerMayBeCaptured. In /// addition to the interface here, you'll need to provide your own getters @@ -105,6 +114,24 @@ namespace llvm { virtual bool isDereferenceableOrNull(Value *O, const DataLayout &DL); }; + /// Types of use capture kinds, see \p DetermineUseCaptureKind. + enum class UseCaptureKind { + NO_CAPTURE, + MAY_CAPTURE, + PASSTHROUGH, + }; + + /// Determine what kind of capture behaviour \p U may exhibit. + /// + /// A use can be no-capture, a use can potentially capture, or a use can be + /// passthrough such that the uses of the user or \p U should be inspected. + /// The \p IsDereferenceableOrNull callback is used to rule out capturing for + /// certain comparisons. + UseCaptureKind + DetermineUseCaptureKind(const Use &U, + llvm::function_ref + IsDereferenceableOrNull); + /// PointerMayBeCaptured - Visit the value and the values derived from it and /// find values which appear to be capturing the pointer value. This feeds /// results into and is controlled by the CaptureTracker object. diff --git a/llvm/include/llvm/Analysis/CmpInstAnalysis.h b/llvm/include/llvm/Analysis/CmpInstAnalysis.h index 3d34cd12aea4..332eb9b66e9c 100644 --- a/llvm/include/llvm/Analysis/CmpInstAnalysis.h +++ b/llvm/include/llvm/Analysis/CmpInstAnalysis.h @@ -17,7 +17,7 @@ #include "llvm/IR/InstrTypes.h" namespace llvm { - class ICmpInst; + class Type; class Value; /// Encode a icmp predicate into a three bit mask. These bits are carefully @@ -43,7 +43,7 @@ namespace llvm { /// 110 6 A <= B /// 111 7 Always true /// - unsigned getICmpCode(const ICmpInst *ICI, bool InvertPred = false); + unsigned getICmpCode(CmpInst::Predicate Pred); /// This is the complement of getICmpCode. It turns a predicate code into /// either a constant true or false or the predicate for a new ICmp. @@ -58,6 +58,39 @@ namespace llvm { /// equality comparison (which is signless). bool predicatesFoldable(CmpInst::Predicate P1, CmpInst::Predicate P2); + /// Similar to getICmpCode but for FCmpInst. This encodes a fcmp predicate + /// into a four bit mask. + inline unsigned getFCmpCode(CmpInst::Predicate CC) { + assert(CmpInst::FCMP_FALSE <= CC && CC <= CmpInst::FCMP_TRUE && + "Unexpected FCmp predicate!"); + // Take advantage of the bit pattern of CmpInst::Predicate here. + // U L G E + static_assert(CmpInst::FCMP_FALSE == 0, ""); // 0 0 0 0 + static_assert(CmpInst::FCMP_OEQ == 1, ""); // 0 0 0 1 + static_assert(CmpInst::FCMP_OGT == 2, ""); // 0 0 1 0 + static_assert(CmpInst::FCMP_OGE == 3, ""); // 0 0 1 1 + static_assert(CmpInst::FCMP_OLT == 4, ""); // 0 1 0 0 + static_assert(CmpInst::FCMP_OLE == 5, ""); // 0 1 0 1 + static_assert(CmpInst::FCMP_ONE == 6, ""); // 0 1 1 0 + static_assert(CmpInst::FCMP_ORD == 7, ""); // 0 1 1 1 + static_assert(CmpInst::FCMP_UNO == 8, ""); // 1 0 0 0 + static_assert(CmpInst::FCMP_UEQ == 9, ""); // 1 0 0 1 + static_assert(CmpInst::FCMP_UGT == 10, ""); // 1 0 1 0 + static_assert(CmpInst::FCMP_UGE == 11, ""); // 1 0 1 1 + static_assert(CmpInst::FCMP_ULT == 12, ""); // 1 1 0 0 + static_assert(CmpInst::FCMP_ULE == 13, ""); // 1 1 0 1 + static_assert(CmpInst::FCMP_UNE == 14, ""); // 1 1 1 0 + static_assert(CmpInst::FCMP_TRUE == 15, ""); // 1 1 1 1 + return CC; + } + + /// This is the complement of getFCmpCode. It turns a predicate code into + /// either a constant true or false or the predicate for a new FCmp. + /// Non-NULL return value will be a true or false constant. + /// NULL return means a new ICmp is needed. The predicate is output in Pred. + Constant *getPredForFCmpCode(unsigned Code, Type *OpTy, + CmpInst::Predicate &Pred); + /// Decompose an icmp into the form ((X & Mask) pred 0) if possible. The /// returned predicate is either == or !=. Returns false if decomposition /// fails. diff --git a/llvm/include/llvm/Analysis/CodeMetrics.h b/llvm/include/llvm/Analysis/CodeMetrics.h index 615591aa83ad..a9431bca1125 100644 --- a/llvm/include/llvm/Analysis/CodeMetrics.h +++ b/llvm/include/llvm/Analysis/CodeMetrics.h @@ -15,6 +15,7 @@ #define LLVM_ANALYSIS_CODEMETRICS_H #include "llvm/ADT/DenseMap.h" +#include "llvm/Support/InstructionCost.h" namespace llvm { class AssumptionCache; @@ -47,14 +48,14 @@ struct CodeMetrics { /// True if this function calls alloca (in the C sense). bool usesDynamicAlloca = false; - /// Number of instructions in the analyzed blocks. - unsigned NumInsts = false; + /// Code size cost of the analyzed blocks. + InstructionCost NumInsts = 0; /// Number of analyzed blocks. unsigned NumBlocks = false; /// Keeps track of basic block code size estimates. - DenseMap NumBBInsts; + DenseMap NumBBInsts; /// Keep track of the number of calls to 'big' functions. unsigned NumCalls = false; diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h index 37258c80e3a3..23ec7d6b70ec 100644 --- a/llvm/include/llvm/Analysis/ConstantFolding.h +++ b/llvm/include/llvm/Analysis/ConstantFolding.h @@ -19,16 +19,18 @@ #ifndef LLVM_ANALYSIS_CONSTANTFOLDING_H #define LLVM_ANALYSIS_CONSTANTFOLDING_H +#include + namespace llvm { class APInt; template class ArrayRef; class CallBase; class Constant; -class ConstantExpr; class DSOLocalEquivalent; class DataLayout; class Function; class GlobalValue; +class GlobalVariable; class Instruction; class TargetLibraryInfo; class Type; @@ -65,14 +67,13 @@ Constant *ConstantFoldInstOperands(Instruction *I, ArrayRef Ops, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr); -/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare -/// instruction (icmp/fcmp) with the specified operands. If it fails, it -/// returns a constant expression of the specified operands. -/// -Constant * -ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, - Constant *RHS, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr); +/// Attempt to constant fold a compare instruction (icmp/fcmp) with the +/// specified operands. If it fails, it returns a constant expression of the +/// specified operands. +/// Denormal inputs may be flushed based on the denormal handling mode. +Constant *ConstantFoldCompareInstOperands( + unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, + const TargetLibraryInfo *TLI = nullptr, const Instruction *I = nullptr); /// Attempt to constant fold a unary operation with the specified /// operand. If it fails, it returns a constant expression of the specified @@ -86,6 +87,21 @@ Constant *ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, Constant *ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL); +/// Attempt to constant fold a floating point binary operation with the +/// specified operands, applying the denormal handling mod to the operands. If +/// it fails, it returns a constant expression of the specified operands. +Constant *ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS, + Constant *RHS, const DataLayout &DL, + const Instruction *I); + +/// Attempt to flush float point constant according to denormal mode set in the +/// instruction's parent function attributes. If so, return a zero with the +/// correct sign, otherwise return the original constant. Inputs and outputs to +/// floating point instructions can have their mode set separately, so the +/// direction is also needed. +Constant *FlushFPConstant(Constant *Operand, const Instruction *I, + bool IsOutput); + /// Attempt to constant fold a select instruction with the specified /// operands. The constant result is returned if successful; if not, null is /// returned. @@ -173,6 +189,8 @@ Constant *ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, /// Check whether the given call has no side-effects. /// Specifically checks for math routimes which sometimes set errno. bool isMathLibCallNoop(const CallBase *Call, const TargetLibraryInfo *TLI); + +Constant *ReadByteArrayFromGlobal(const GlobalVariable *GV, uint64_t Offset); } #endif diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h index d7800f578325..2c83658b81dc 100644 --- a/llvm/include/llvm/Analysis/ConstraintSystem.h +++ b/llvm/include/llvm/Analysis/ConstraintSystem.h @@ -11,7 +11,6 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include @@ -37,7 +36,7 @@ class ConstraintSystem { bool mayHaveSolutionImpl(); public: - bool addVariableRow(const SmallVector &R) { + bool addVariableRow(ArrayRef R) { assert(Constraints.empty() || R.size() == Constraints.back().size()); // If all variable coefficients are 0, the constraint does not provide any // usable information. @@ -49,11 +48,16 @@ public: GCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)A}, {32, GCD}) .getZExtValue(); } - Constraints.push_back(R); + Constraints.emplace_back(R.begin(), R.end()); return true; } - bool addVariableRowFill(const SmallVector &R) { + bool addVariableRowFill(ArrayRef R) { + // If all variable coefficients are 0, the constraint does not provide any + // usable information. + if (all_of(makeArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; })) + return false; + for (auto &CR : Constraints) { while (CR.size() != R.size()) CR.push_back(0); @@ -75,7 +79,14 @@ public: bool isConditionImplied(SmallVector R) const; + ArrayRef getLastConstraint() { return Constraints[0]; } void popLastConstraint() { Constraints.pop_back(); } + void popLastNVariables(unsigned N) { + for (auto &C : Constraints) { + for (unsigned i = 0; i < N; i++) + C.pop_back(); + } + } /// Returns the number of rows in the constraint system. unsigned size() const { return Constraints.size(); } diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h index c5107da2a017..7649e630b23d 100644 --- a/llvm/include/llvm/Analysis/DDG.h +++ b/llvm/include/llvm/Analysis/DDG.h @@ -18,9 +18,11 @@ #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/DependenceGraphBuilder.h" #include "llvm/Analysis/LoopAnalysisManager.h" -#include "llvm/IR/Instructions.h" namespace llvm { +class Function; +class Loop; +class LoopInfo; class DDGNode; class DDGEdge; using DDGNodeBase = DGNode; diff --git a/llvm/include/llvm/Analysis/DDGPrinter.h b/llvm/include/llvm/Analysis/DDGPrinter.h index 4477b387fe50..d93c28280bac 100644 --- a/llvm/include/llvm/Analysis/DDGPrinter.h +++ b/llvm/include/llvm/Analysis/DDGPrinter.h @@ -16,10 +16,11 @@ #define LLVM_ANALYSIS_DDGPRINTER_H #include "llvm/Analysis/DDG.h" -#include "llvm/Pass.h" #include "llvm/Support/DOTGraphTraits.h" namespace llvm { +class LPMUpdater; +class Loop; //===--------------------------------------------------------------------===// // Implementation of DDG DOT Printer for a loop. diff --git a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h index d8021907b5b2..c35e189de6fc 100644 --- a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -14,23 +14,156 @@ #define LLVM_ANALYSIS_DOTGRAPHTRAITSPASS_H #include "llvm/Analysis/CFGPrinter.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/GraphWriter.h" namespace llvm { +/// Default traits class for extracting a graph from an analysis pass. +/// +/// This assumes that 'GraphT' is 'AnalysisT::Result *', and pass it through +template +struct DefaultAnalysisGraphTraits { + static GraphT getGraph(Result R) { return &R; } +}; + +template +void viewGraphForFunction(Function &F, GraphT Graph, StringRef Name, + bool IsSimple) { + std::string GraphName = DOTGraphTraits::getGraphName(&Graph); + + ViewGraph(Graph, Name, IsSimple, + GraphName + " for '" + F.getName() + "' function"); +} + +template > +struct DOTGraphTraitsViewer + : PassInfoMixin> { + DOTGraphTraitsViewer(StringRef GraphName) : Name(GraphName) {} + + /// Return true if this function should be processed. + /// + /// An implementation of this class my override this function to indicate that + /// only certain functions should be viewed. + /// + /// @param Result The current analysis result for this function. + virtual bool processFunction(Function &F, + const typename AnalysisT::Result &Result) { + return true; + } + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM) { + auto &Result = FAM.getResult(F); + if (!processFunction(F, Result)) + return PreservedAnalyses::all(); + + GraphT Graph = AnalysisGraphTraitsT::getGraph(Result); + viewGraphForFunction(F, Graph, Name, IsSimple); + + return PreservedAnalyses::all(); + }; + +protected: + /// Avoid compiler warning "has virtual functions but non-virtual destructor + /// [-Wnon-virtual-dtor]" in derived classes. + /// + /// DOTGraphTraitsViewer is also used as a mixin for avoiding repeated + /// implementation of viewer passes, ie there should be no + /// runtime-polymorphisms/downcasting involving this class and hence no + /// virtual destructor needed. Making this dtor protected stops accidental + /// invocation when the derived class destructor should have been called. + /// Those derived classes sould be marked final to avoid the warning. + ~DOTGraphTraitsViewer() {} + +private: + StringRef Name; +}; + +template +void printGraphForFunction(Function &F, GraphT Graph, StringRef Name, + bool IsSimple) { + std::string Filename = Name.str() + "." + F.getName().str() + ".dot"; + std::error_code EC; + + errs() << "Writing '" << Filename << "'..."; + + raw_fd_ostream File(Filename, EC, sys::fs::OF_TextWithCRLF); + std::string GraphName = DOTGraphTraits::getGraphName(Graph); + + if (!EC) + WriteGraph(File, Graph, IsSimple, + GraphName + " for '" + F.getName() + "' function"); + else + errs() << " error opening file for writing!"; + errs() << "\n"; +} + +template > +struct DOTGraphTraitsPrinter + : PassInfoMixin> { + DOTGraphTraitsPrinter(StringRef GraphName) : Name(GraphName) {} + + /// Return true if this function should be processed. + /// + /// An implementation of this class my override this function to indicate that + /// only certain functions should be viewed. + /// + /// @param Analysis The current analysis result for this function. + virtual bool processFunction(Function &F, + const typename AnalysisT::Result &Result) { + return true; + } + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM) { + auto &Result = FAM.getResult(F); + if (!processFunction(F, Result)) + return PreservedAnalyses::all(); + + GraphT Graph = AnalysisGraphTraitsT::getGraph(Result); + + printGraphForFunction(F, Graph, Name, IsSimple); + + return PreservedAnalyses::all(); + }; + +protected: + /// Avoid compiler warning "has virtual functions but non-virtual destructor + /// [-Wnon-virtual-dtor]" in derived classes. + /// + /// DOTGraphTraitsPrinter is also used as a mixin for avoiding repeated + /// implementation of printer passes, ie there should be no + /// runtime-polymorphisms/downcasting involving this class and hence no + /// virtual destructor needed. Making this dtor protected stops accidental + /// invocation when the derived class destructor should have been called. + /// Those derived classes sould be marked final to avoid the warning. + ~DOTGraphTraitsPrinter() {} + +private: + StringRef Name; +}; + /// Default traits class for extracting a graph from an analysis pass. /// /// This assumes that 'GraphT' is 'AnalysisT *' and so just passes it through. template -struct DefaultAnalysisGraphTraits { +struct LegacyDefaultAnalysisGraphTraits { static GraphT getGraph(AnalysisT *A) { return A; } }; -template < - typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, - typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits > -class DOTGraphTraitsViewer : public FunctionPass { +template > +class DOTGraphTraitsViewerWrapperPass : public FunctionPass { public: - DOTGraphTraitsViewer(StringRef GraphName, char &ID) + DOTGraphTraitsViewerWrapperPass(StringRef GraphName, char &ID) : FunctionPass(ID), Name(GraphName) {} /// Return true if this function should be processed. @@ -50,10 +183,7 @@ public: return false; GraphT Graph = AnalysisGraphTraitsT::getGraph(&Analysis); - std::string GraphName = DOTGraphTraits::getGraphName(Graph); - std::string Title = GraphName + " for '" + F.getName().str() + "' function"; - - ViewGraph(Graph, Name, IsSimple, Title); + viewGraphForFunction(F, Graph, Name, IsSimple); return false; } @@ -67,12 +197,12 @@ private: std::string Name; }; -template < - typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, - typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits > -class DOTGraphTraitsPrinter : public FunctionPass { +template > +class DOTGraphTraitsPrinterWrapperPass : public FunctionPass { public: - DOTGraphTraitsPrinter(StringRef GraphName, char &ID) + DOTGraphTraitsPrinterWrapperPass(StringRef GraphName, char &ID) : FunctionPass(ID), Name(GraphName) {} /// Return true if this function should be processed. @@ -92,20 +222,7 @@ public: return false; GraphT Graph = AnalysisGraphTraitsT::getGraph(&Analysis); - std::string Filename = Name + "." + F.getName().str() + ".dot"; - std::error_code EC; - - errs() << "Writing '" << Filename << "'..."; - - raw_fd_ostream File(Filename, EC, sys::fs::OF_TextWithCRLF); - std::string GraphName = DOTGraphTraits::getGraphName(Graph); - std::string Title = GraphName + " for '" + F.getName().str() + "' function"; - - if (!EC) - WriteGraph(File, Graph, IsSimple, Title); - else - errs() << " error opening file for writing!"; - errs() << "\n"; + printGraphForFunction(F, Graph, Name, IsSimple); return false; } @@ -119,12 +236,12 @@ private: std::string Name; }; -template < - typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, - typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits > -class DOTGraphTraitsModuleViewer : public ModulePass { +template > +class DOTGraphTraitsModuleViewerWrapperPass : public ModulePass { public: - DOTGraphTraitsModuleViewer(StringRef GraphName, char &ID) + DOTGraphTraitsModuleViewerWrapperPass(StringRef GraphName, char &ID) : ModulePass(ID), Name(GraphName) {} bool runOnModule(Module &M) override { @@ -145,12 +262,12 @@ private: std::string Name; }; -template < - typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, - typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits > -class DOTGraphTraitsModulePrinter : public ModulePass { +template > +class DOTGraphTraitsModulePrinterWrapperPass : public ModulePass { public: - DOTGraphTraitsModulePrinter(StringRef GraphName, char &ID) + DOTGraphTraitsModulePrinterWrapperPass(StringRef GraphName, char &ID) : ModulePass(ID), Name(GraphName) {} bool runOnModule(Module &M) override { diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h index 6e942530f253..95a36b8b79a4 100644 --- a/llvm/include/llvm/Analysis/Delinearization.h +++ b/llvm/include/llvm/Analysis/Delinearization.h @@ -16,11 +16,11 @@ #ifndef LLVM_ANALYSIS_DELINEARIZATION_H #define LLVM_ANALYSIS_DELINEARIZATION_H -#include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { +class raw_ostream; +template class SmallVectorImpl; class GetElementPtrInst; class ScalarEvolution; class SCEV; @@ -125,6 +125,17 @@ bool getIndexExpressionsFromGEP(ScalarEvolution &SE, SmallVectorImpl &Subscripts, SmallVectorImpl &Sizes); +/// Implementation of fixed size array delinearization. Try to delinearize +/// access function for a fixed size multi-dimensional array, by deriving +/// subscripts from GEP instructions. Returns true upon success and false +/// otherwise. \p Inst is the load/store instruction whose pointer operand is +/// the one we want to delinearize. \p AccessFn is its corresponding SCEV +/// expression w.r.t. the surrounding loop. +bool tryDelinearizeFixedSizeImpl(ScalarEvolution *SE, Instruction *Inst, + const SCEV *AccessFn, + SmallVectorImpl &Subscripts, + SmallVectorImpl &Sizes); + struct DelinearizationPrinterPass : public PassInfoMixin { explicit DelinearizationPrinterPass(raw_ostream &OS); diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h index 638f4869d677..a34afe9fb38d 100644 --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -927,9 +927,9 @@ namespace llvm { bool tryDelinearize(Instruction *Src, Instruction *Dst, SmallVectorImpl &Pair); - /// Tries to delinearize access function for a fixed size multi-dimensional - /// array, by deriving subscripts from GEP instructions. Returns true upon - /// success and false otherwise. + /// Tries to delinearize \p Src and \p Dst access functions for a fixed size + /// multi-dimensional array. Calls tryDelinearizeFixedSizeImpl() to + /// delinearize \p Src and \p Dst separately, bool tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, const SCEV *DstAccessFn, diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h index c52b42ae8dc2..4c2a5399ea54 100644 --- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h @@ -17,16 +17,16 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/Analysis/SyncDependenceAnalysis.h" -#include "llvm/IR/Function.h" -#include "llvm/Pass.h" +#include "llvm/IR/PassManager.h" #include namespace llvm { -class Value; +class Function; class Instruction; class Loop; class raw_ostream; class TargetTransformInfo; +class Value; /// \brief Generic divergence analysis for reducible CFGs. /// @@ -41,7 +41,7 @@ public: /// \param RegionLoop if non-null the analysis is restricted to \p RegionLoop. /// Otherwise the whole function is analyzed. /// \param IsLCSSAForm whether the analysis may assume that the IR in the - /// region in in LCSSA form. + /// region in LCSSA form. DivergenceAnalysisImpl(const Function &F, const Loop *RegionLoop, const DominatorTree &DT, const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm); diff --git a/llvm/include/llvm/Analysis/DomPrinter.h b/llvm/include/llvm/Analysis/DomPrinter.h index e6df12d88072..83fe721346ab 100644 --- a/llvm/include/llvm/Analysis/DomPrinter.h +++ b/llvm/include/llvm/Analysis/DomPrinter.h @@ -14,30 +14,120 @@ #ifndef LLVM_ANALYSIS_DOMPRINTER_H #define LLVM_ANALYSIS_DOMPRINTER_H +#include "llvm/Analysis/DOTGraphTraitsPass.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/PassManager.h" namespace llvm { -class DomTreePrinterPass : public PassInfoMixin { -public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +template <> +struct DOTGraphTraits : public DefaultDOTGraphTraits { + + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + + std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) { + + BasicBlock *BB = Node->getBlock(); + + if (!BB) + return "Post dominance root node"; + + if (isSimple()) + return DOTGraphTraits::getSimpleNodeLabel(BB, nullptr); + + return DOTGraphTraits::getCompleteNodeLabel(BB, nullptr); + } +}; + +template <> +struct DOTGraphTraits + : public DOTGraphTraits { + + DOTGraphTraits(bool isSimple = false) + : DOTGraphTraits(isSimple) {} + + static std::string getGraphName(DominatorTree *DT) { + return "Dominator tree"; + } + + std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) { + return DOTGraphTraits::getNodeLabel(Node, + G->getRootNode()); + } +}; + +template<> +struct DOTGraphTraits + : public DOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) + : DOTGraphTraits(isSimple) {} + + static std::string getGraphName(PostDominatorTree *DT) { + return "Post dominator tree"; + } + + std::string getNodeLabel(DomTreeNode *Node, + PostDominatorTree *G) { + return DOTGraphTraits::getNodeLabel(Node, G->getRootNode()); + } +}; + +struct DomViewer final : DOTGraphTraitsViewer { + DomViewer() : DOTGraphTraitsViewer("dom") {} +}; + +struct DomOnlyViewer final : DOTGraphTraitsViewer { + DomOnlyViewer() + : DOTGraphTraitsViewer("domonly") {} +}; + +struct PostDomViewer final + : DOTGraphTraitsViewer { + PostDomViewer() + : DOTGraphTraitsViewer("postdom") {} +}; + +struct PostDomOnlyViewer final + : DOTGraphTraitsViewer { + PostDomOnlyViewer() + : DOTGraphTraitsViewer("postdomonly") {} +}; + +struct DomPrinter final : DOTGraphTraitsPrinter { + DomPrinter() : DOTGraphTraitsPrinter("dom") {} +}; + +struct DomOnlyPrinter final + : DOTGraphTraitsPrinter { + DomOnlyPrinter() + : DOTGraphTraitsPrinter("domonly") {} +}; + +struct PostDomPrinter final + : DOTGraphTraitsPrinter { + PostDomPrinter() + : DOTGraphTraitsPrinter("postdom") {} }; -class DomTreeOnlyPrinterPass : public PassInfoMixin { -public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +struct PostDomOnlyPrinter final + : DOTGraphTraitsPrinter { + PostDomOnlyPrinter() + : DOTGraphTraitsPrinter("postdomonly") {} }; } // namespace llvm namespace llvm { class FunctionPass; - FunctionPass *createDomPrinterPass(); - FunctionPass *createDomOnlyPrinterPass(); - FunctionPass *createDomViewerPass(); - FunctionPass *createDomOnlyViewerPass(); - FunctionPass *createPostDomPrinterPass(); - FunctionPass *createPostDomOnlyPrinterPass(); - FunctionPass *createPostDomViewerPass(); - FunctionPass *createPostDomOnlyViewerPass(); + FunctionPass *createDomPrinterWrapperPassPass(); + FunctionPass *createDomOnlyPrinterWrapperPassPass(); + FunctionPass *createDomViewerWrapperPassPass(); + FunctionPass *createDomOnlyViewerWrapperPassPass(); + FunctionPass *createPostDomPrinterWrapperPassPass(); + FunctionPass *createPostDomOnlyPrinterWrapperPassPass(); + FunctionPass *createPostDomViewerWrapperPassPass(); + FunctionPass *createPostDomOnlyViewerWrapperPassPass(); } // End llvm namespace #endif diff --git a/llvm/include/llvm/Analysis/DomTreeUpdater.h b/llvm/include/llvm/Analysis/DomTreeUpdater.h index d09154d506ed..ddb958455ccd 100644 --- a/llvm/include/llvm/Analysis/DomTreeUpdater.h +++ b/llvm/include/llvm/Analysis/DomTreeUpdater.h @@ -150,49 +150,6 @@ public: /// awaiting deletion immediately. void recalculate(Function &F); - /// \deprecated { Submit an edge insertion to all available trees. The Eager - /// Strategy flushes this update immediately while the Lazy Strategy queues - /// the update. An internal function checks if the edge exists in the CFG in - /// DEBUG mode. CAUTION! This function has to be called *after* making the - /// update on the actual CFG. It is illegal to submit any update that has - /// already been applied. } - LLVM_ATTRIBUTE_DEPRECATED(void insertEdge(BasicBlock *From, BasicBlock *To), - "Use applyUpdates() instead."); - - /// \deprecated {Submit an edge insertion to all available trees. - /// Under either Strategy, an invalid update will be discard silently. - /// Invalid update means inserting an edge that does not exist in the CFG. - /// The Eager Strategy flushes this update immediately while the Lazy Strategy - /// queues the update. It is only recommended to use this method when you - /// want to discard an invalid update. - /// CAUTION! It is illegal to submit any update that has already been - /// submitted. } - LLVM_ATTRIBUTE_DEPRECATED(void insertEdgeRelaxed(BasicBlock *From, - BasicBlock *To), - "Use applyUpdatesPermissive() instead."); - - /// \deprecated { Submit an edge deletion to all available trees. The Eager - /// Strategy flushes this update immediately while the Lazy Strategy queues - /// the update. An internal function checks if the edge doesn't exist in the - /// CFG in DEBUG mode. - /// CAUTION! This function has to be called *after* making the update on the - /// actual CFG. It is illegal to submit any update that has already been - /// submitted. } - LLVM_ATTRIBUTE_DEPRECATED(void deleteEdge(BasicBlock *From, BasicBlock *To), - "Use applyUpdates() instead."); - - /// \deprecated { Submit an edge deletion to all available trees. - /// Under either Strategy, an invalid update will be discard silently. - /// Invalid update means deleting an edge that exists in the CFG. - /// The Eager Strategy flushes this update immediately while the Lazy Strategy - /// queues the update. It is only recommended to use this method when you - /// want to discard an invalid update. - /// CAUTION! It is illegal to submit any update that has already been - /// submitted. } - LLVM_ATTRIBUTE_DEPRECATED(void deleteEdgeRelaxed(BasicBlock *From, - BasicBlock *To), - "Use applyUpdatesPermissive() instead."); - /// Delete DelBB. DelBB will be removed from its Parent and /// erased from available trees if it exists and finally get deleted. /// Under Eager UpdateStrategy, DelBB will be processed immediately. diff --git a/llvm/include/llvm/Analysis/DominanceFrontierImpl.h b/llvm/include/llvm/Analysis/DominanceFrontierImpl.h index aa764be93b91..7a5f8f31bae3 100644 --- a/llvm/include/llvm/Analysis/DominanceFrontierImpl.h +++ b/llvm/include/llvm/Analysis/DominanceFrontierImpl.h @@ -17,7 +17,6 @@ #ifndef LLVM_ANALYSIS_DOMINANCEFRONTIERIMPL_H #define LLVM_ANALYSIS_DOMINANCEFRONTIERIMPL_H -#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Config/llvm-config.h" diff --git a/llvm/include/llvm/Analysis/EHPersonalities.h b/llvm/include/llvm/Analysis/EHPersonalities.h index eaada6627494..660d431bb063 100644 --- a/llvm/include/llvm/Analysis/EHPersonalities.h +++ b/llvm/include/llvm/Analysis/EHPersonalities.h @@ -11,7 +11,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/TinyPtrVector.h" -#include "llvm/Support/ErrorHandling.h" namespace llvm { class BasicBlock; diff --git a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h index cf07c873b17c..a0f5331fdba5 100644 --- a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h +++ b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h @@ -14,16 +14,33 @@ #ifndef LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H #define LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/PassManager.h" namespace llvm { +class DominatorTree; class Function; +class LoopInfo; class FunctionPropertiesInfo { + friend class FunctionPropertiesUpdater; + void updateForBB(const BasicBlock &BB, int64_t Direction); + void updateAggregateStats(const Function &F, const LoopInfo &LI); + void reIncludeBB(const BasicBlock &BB); + public: - static FunctionPropertiesInfo getFunctionPropertiesInfo(const Function &F, - const LoopInfo &LI); + static FunctionPropertiesInfo + getFunctionPropertiesInfo(const Function &F, FunctionAnalysisManager &FAM); + + bool operator==(const FunctionPropertiesInfo &FPI) const { + return std::memcmp(this, &FPI, sizeof(FunctionPropertiesInfo)) == 0; + } + + bool operator!=(const FunctionPropertiesInfo &FPI) const { + return !(*this == FPI); + } void print(raw_ostream &OS) const; @@ -57,6 +74,9 @@ public: // Number of Top Level Loops in the Function int64_t TopLevelLoopCount = 0; + + // All non-debug instructions + int64_t TotalInstructionCount = 0; }; // Analysis pass @@ -66,9 +86,9 @@ class FunctionPropertiesAnalysis public: static AnalysisKey Key; - using Result = FunctionPropertiesInfo; + using Result = const FunctionPropertiesInfo; - Result run(Function &F, FunctionAnalysisManager &FAM); + FunctionPropertiesInfo run(Function &F, FunctionAnalysisManager &FAM); }; /// Printer pass for the FunctionPropertiesAnalysis results. @@ -82,5 +102,24 @@ public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; +/// Correctly update FunctionPropertiesInfo post-inlining. A +/// FunctionPropertiesUpdater keeps the state necessary for tracking the changes +/// llvm::InlineFunction makes. The idea is that inlining will at most modify +/// a few BBs of the Caller (maybe the entry BB and definitely the callsite BB) +/// and potentially affect exception handling BBs in the case of invoke +/// inlining. +class FunctionPropertiesUpdater { +public: + FunctionPropertiesUpdater(FunctionPropertiesInfo &FPI, const CallBase &CB); + + void finish(FunctionAnalysisManager &FAM) const; + +private: + FunctionPropertiesInfo &FPI; + const BasicBlock &CallSiteBB; + const Function &Caller; + + DenseSet Successors; +}; } // namespace llvm #endif // LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H diff --git a/llvm/include/llvm/Analysis/GlobalsModRef.h b/llvm/include/llvm/Analysis/GlobalsModRef.h index 7daaa7f484de..4d8ed10bb18e 100644 --- a/llvm/include/llvm/Analysis/GlobalsModRef.h +++ b/llvm/include/llvm/Analysis/GlobalsModRef.h @@ -14,15 +14,14 @@ #define LLVM_ANALYSIS_GLOBALSMODREF_H #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include namespace llvm { class CallGraph; +class Function; /// An alias analysis result set for globals. /// @@ -79,6 +78,8 @@ class GlobalsAAResult : public AAResultBase { const DataLayout &DL, std::function GetTLI); + friend struct RecomputeGlobalsAAPass; + public: GlobalsAAResult(GlobalsAAResult &&Arg); ~GlobalsAAResult(); @@ -139,6 +140,10 @@ public: GlobalsAAResult run(Module &M, ModuleAnalysisManager &AM); }; +struct RecomputeGlobalsAAPass : PassInfoMixin { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + /// Legacy wrapper pass to provide the GlobalsAAResult object. class GlobalsAAWrapperPass : public ModulePass { std::unique_ptr Result; diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index 90ab2833e428..a3f1c1335cac 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -51,12 +51,13 @@ #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" namespace llvm { +class Module; + namespace IRSimilarity { struct IRInstructionDataList; @@ -546,7 +547,7 @@ struct IRInstructionMapper { // an outlined function. Also, assume-like intrinsics could be removed // from the region, removing arguments, causing discrepencies in the // number of inputs between different regions. - if (II.isLifetimeStartOrEnd() || II.isAssumeLikeIntrinsic()) + if (II.isAssumeLikeIntrinsic()) return Illegal; return EnableIntrinsics ? Legal : Illegal; } @@ -559,6 +560,18 @@ struct IRInstructionMapper { return Illegal; if (!F && !IsIndirectCall) return Illegal; + // Functions marked with the swifttailcc and tailcc calling conventions + // require special handling when outlining musttail functions. The + // calling convention must be passed down to the outlined function as + // well. Further, there is special handling for musttail calls as well, + // requiring a return call directly after. For now, the outliner does not + // support this, so we do not handle matching this case either. + if ((CI.getCallingConv() == CallingConv::SwiftTail || + CI.getCallingConv() == CallingConv::Tail) && + !EnableMustTailCalls) + return Illegal; + if (CI.isMustTailCall() && !EnableMustTailCalls) + return Illegal; return Legal; } // TODO: We do not current handle similarity that changes the control flow. @@ -580,6 +593,10 @@ struct IRInstructionMapper { // Flag that lets the classifier know whether we should allow intrinsics to // be checked for similarity. bool EnableIntrinsics = false; + + // Flag that lets the classifier know whether we should allow tail calls to + // be checked for similarity. + bool EnableMustTailCalls = false; }; /// Maps an Instruction to a member of InstrType. @@ -814,8 +831,6 @@ public: void getBasicBlocks(DenseSet &BBSet) const { for (IRInstructionData &ID : *this) { BasicBlock *BB = ID.Inst->getParent(); - if (BBSet.contains(BB)) - continue; BBSet.insert(BB); } } @@ -826,10 +841,8 @@ public: SmallVector &BBList) const { for (IRInstructionData &ID : *this) { BasicBlock *BB = ID.Inst->getParent(); - if (BBSet.contains(BB)) - continue; - BBSet.insert(BB); - BBList.push_back(BB); + if (BBSet.insert(BB).second) + BBList.push_back(BB); } } @@ -967,11 +980,13 @@ public: IRSimilarityIdentifier(bool MatchBranches = true, bool MatchIndirectCalls = true, bool MatchCallsWithName = false, - bool MatchIntrinsics = true) + bool MatchIntrinsics = true, + bool MatchMustTailCalls = true) : Mapper(&InstDataAllocator, &InstDataListAllocator), EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls), EnableMatchingCallsByName(MatchCallsWithName), - EnableIntrinsics(MatchIntrinsics) {} + EnableIntrinsics(MatchIntrinsics), + EnableMustTailCalls(MatchMustTailCalls) {} private: /// Map the instructions in the module to unsigned integers, using mapping @@ -1024,7 +1039,7 @@ public: // If we've already analyzed a Module or set of Modules, so we must clear // the SimilarityCandidates to make sure we do not have only old values // hanging around. - if (SimilarityCandidates.hasValue()) + if (SimilarityCandidates) SimilarityCandidates->clear(); else SimilarityCandidates = SimilarityGroupList(); @@ -1064,6 +1079,10 @@ private: /// similarity. bool EnableIntrinsics = true; + // The flag variable that marks whether we should allow tailcalls + // to be checked for similarity. + bool EnableMustTailCalls = false; + /// The SimilarityGroups found with the most recent run of \ref /// findSimilarity. None if there is no recent run. Optional SimilarityCandidates; diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index dec488a6f26d..231d3bbf534b 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -13,27 +13,23 @@ #ifndef LLVM_ANALYSIS_IVDESCRIPTORS_H #define LLVM_ANALYSIS_IVDESCRIPTORS_H -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/Support/Casting.h" namespace llvm { -class DemandedBits; class AssumptionCache; +class DemandedBits; +class DominatorTree; +class Instruction; class Loop; class PredicatedScalarEvolution; class ScalarEvolution; class SCEV; -class DominatorTree; +class StoreInst; /// These are the kinds of recurrences that we support. enum class RecurKind { @@ -74,14 +70,14 @@ class RecurrenceDescriptor { public: RecurrenceDescriptor() = default; - RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurKind K, - FastMathFlags FMF, Instruction *ExactFP, Type *RT, - bool Signed, bool Ordered, + RecurrenceDescriptor(Value *Start, Instruction *Exit, StoreInst *Store, + RecurKind K, FastMathFlags FMF, Instruction *ExactFP, + Type *RT, bool Signed, bool Ordered, SmallPtrSetImpl &CI, unsigned MinWidthCastToRecurTy) - : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF), - ExactFPMathInst(ExactFP), RecurrenceType(RT), IsSigned(Signed), - IsOrdered(Ordered), + : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit), + Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT), + IsSigned(Signed), IsOrdered(Ordered), MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) { CastInsts.insert(CI.begin(), CI.end()); } @@ -168,22 +164,21 @@ public: /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are /// non-null, the minimal bit width needed to compute the reduction will be /// computed. - static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, - FastMathFlags FuncFMF, - RecurrenceDescriptor &RedDes, - DemandedBits *DB = nullptr, - AssumptionCache *AC = nullptr, - DominatorTree *DT = nullptr); + static bool + AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, + FastMathFlags FuncFMF, RecurrenceDescriptor &RedDes, + DemandedBits *DB = nullptr, AssumptionCache *AC = nullptr, + DominatorTree *DT = nullptr, ScalarEvolution *SE = nullptr); /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor /// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are /// non-null, the minimal bit width needed to compute the reduction will be - /// computed. - static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, - RecurrenceDescriptor &RedDes, - DemandedBits *DB = nullptr, - AssumptionCache *AC = nullptr, - DominatorTree *DT = nullptr); + /// computed. If \p SE is non-null, store instructions to loop invariant + /// addresses are processed. + static bool + isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, + DemandedBits *DB = nullptr, AssumptionCache *AC = nullptr, + DominatorTree *DT = nullptr, ScalarEvolution *SE = nullptr); /// Returns true if Phi is a first-order recurrence. A first-order recurrence /// is a non-reduction recurrence relation in which the value of the @@ -275,6 +270,11 @@ public: cast(I)->getIntrinsicID() == Intrinsic::fmuladd; } + /// Reductions may store temporary or final result to an invariant address. + /// If there is such a store in the loop then, after successfull run of + /// AddReductionVar method, this field will be assigned the last met store. + StoreInst *IntermediateStore = nullptr; + private: // The starting value of the recurrence. // It does not have to be zero! diff --git a/llvm/include/llvm/Analysis/IVUsers.h b/llvm/include/llvm/Analysis/IVUsers.h index 390d09848dde..e5a496037691 100644 --- a/llvm/include/llvm/Analysis/IVUsers.h +++ b/llvm/include/llvm/Analysis/IVUsers.h @@ -23,8 +23,6 @@ namespace llvm { class AssumptionCache; class DominatorTree; -class Instruction; -class Value; class ScalarEvolution; class SCEV; class IVUsers; diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h index 0103ee7f8386..31524126027b 100644 --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -9,19 +9,20 @@ #ifndef LLVM_ANALYSIS_INLINEADVISOR_H #define LLVM_ANALYSIS_INLINEADVISOR_H +#include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/PassManager.h" #include -#include namespace llvm { class BasicBlock; class CallBase; class Function; class Module; +class OptimizationRemark; +class ImportedFunctionsInliningStatistics; class OptimizationRemarkEmitter; struct ReplayInlinerSettings; @@ -40,6 +41,28 @@ struct ReplayInlinerSettings; /// training. enum class InliningAdvisorMode : int { Default, Release, Development }; +// Each entry represents an inline driver. +enum class InlinePass : int { + AlwaysInliner, + CGSCCInliner, + EarlyInliner, + ModuleInliner, + MLInliner, + ReplayCGSCCInliner, + ReplaySampleProfileInliner, + SampleProfileInliner, +}; + +/// Provides context on when an inline advisor is constructed in the pipeline +/// (e.g., link phase, inline driver). +struct InlineContext { + ThinOrFullLTOPhase LTOPhase; + + InlinePass Pass; +}; + +std::string AnnotateInlinePassName(InlineContext IC); + class InlineAdvisor; /// Capture state between an inlining decision having had been made, and /// its impact being observable. When collecting model training data, this @@ -122,7 +145,7 @@ public: DefaultInlineAdvice(InlineAdvisor *Advisor, CallBase &CB, Optional OIC, OptimizationRemarkEmitter &ORE, bool EmitRemarks = true) - : InlineAdvice(Advisor, CB, ORE, OIC.hasValue()), OriginalCB(&CB), + : InlineAdvice(Advisor, CB, ORE, OIC.has_value()), OriginalCB(&CB), OIC(OIC), EmitRemarks(EmitRemarks) {} private: @@ -158,7 +181,7 @@ public: /// This must be called when the Inliner pass is entered, to allow the /// InlineAdvisor update internal state, as result of function passes run /// between Inliner pass runs (for the same module). - virtual void onPassEntry() {} + virtual void onPassEntry(LazyCallGraph::SCC *SCC = nullptr) {} /// This must be called when the Inliner pass is exited, as function passes /// may be run subsequently. This allows an implementation of InlineAdvisor @@ -170,14 +193,22 @@ public: OS << "Unimplemented InlineAdvisor print\n"; } + /// NOTE pass name is annotated only when inline advisor constructor provides InlineContext. + const char *getAnnotatedInlinePassName() const { + return AnnotatedInlinePassName.c_str(); + } + protected: - InlineAdvisor(Module &M, FunctionAnalysisManager &FAM); + InlineAdvisor(Module &M, FunctionAnalysisManager &FAM, + Optional IC = NoneType::None); virtual std::unique_ptr getAdviceImpl(CallBase &CB) = 0; virtual std::unique_ptr getMandatoryAdvice(CallBase &CB, bool Advice); Module &M; FunctionAnalysisManager &FAM; + const Optional IC; + const std::string AnnotatedInlinePassName; std::unique_ptr ImportedFunctionsStats; enum class MandatoryInliningKind { NotMandatory, Always, Never }; @@ -198,8 +229,8 @@ private: class DefaultInlineAdvisor : public InlineAdvisor { public: DefaultInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, - InlineParams Params) - : InlineAdvisor(M, FAM), Params(Params) {} + InlineParams Params, InlineContext IC) + : InlineAdvisor(M, FAM, IC), Params(Params) {} private: std::unique_ptr getAdviceImpl(CallBase &CB) override; @@ -223,7 +254,8 @@ public: return !PAC.preservedWhenStateless(); } bool tryCreate(InlineParams Params, InliningAdvisorMode Mode, - const ReplayInlinerSettings &ReplaySettings); + const ReplayInlinerSettings &ReplaySettings, + InlineContext IC); InlineAdvisor *getAdvisor() const { return Advisor.get(); } private: @@ -244,6 +276,9 @@ public: explicit InlineAdvisorAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); + + PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR); }; std::unique_ptr diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index f86ee5a14874..756f1fb61f95 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -13,14 +13,17 @@ #ifndef LLVM_ANALYSIS_INLINECOST_H #define LLVM_ANALYSIS_INLINECOST_H -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Analysis/InlineModelFeatureMaps.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/IR/PassManager.h" #include #include namespace llvm { +class AssumptionCache; +class OptimizationRemarkEmitter; class BlockFrequencyInfo; class CallBase; class DataLayout; @@ -52,6 +55,9 @@ const unsigned TotalAllocaSizeRecursiveCaller = 1024; /// Do not inline dynamic allocas that have been constant propagated to be /// static allocas above this amount in bytes. const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536; + +const char FunctionInlineCostMultiplierAttributeName[] = + "function-inline-cost-multiplier"; } // namespace InlineConstants // The cost-benefit pair computed by cost-benefit analysis. @@ -217,6 +223,8 @@ struct InlineParams { Optional AllowRecursiveCall = false; }; +Optional getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind); + /// Generate the parameters to tune the inline cost analysis based only on the /// commandline options. InlineParams getInlineParams(); diff --git a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h index 1afa8a825f15..fb8236c28b25 100644 --- a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h +++ b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h @@ -10,6 +10,8 @@ #ifndef LLVM_ANALYSIS_INLINEMODELFEATUREMAPS_H #define LLVM_ANALYSIS_INLINEMODELFEATUREMAPS_H +#include "llvm/Analysis/TensorSpec.h" + #include #include #include @@ -127,7 +129,7 @@ inlineCostFeatureToMlFeature(InlineCostFeatureIndex Feature) { constexpr size_t NumberOfFeatures = static_cast(FeatureIndex::NumberOfFeatures); -extern const std::array FeatureNameMap; +extern const std::array FeatureMap; extern const char *const DecisionName; extern const char *const DefaultDecisionName; diff --git a/llvm/include/llvm/Analysis/InlineOrder.h b/llvm/include/llvm/Analysis/InlineOrder.h index 84252bcf1b06..aabd86c98780 100644 --- a/llvm/include/llvm/Analysis/InlineOrder.h +++ b/llvm/include/llvm/Analysis/InlineOrder.h @@ -10,10 +10,9 @@ #define LLVM_ANALYSIS_INLINEORDER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstrTypes.h" #include #include @@ -71,34 +70,52 @@ private: size_t FirstIndex = 0; }; -class InlineSizePriority { +class InlinePriority { public: - InlineSizePriority(int Size) : Size(Size) {} + virtual ~InlinePriority() = default; + virtual bool hasLowerPriority(const CallBase *L, const CallBase *R) const = 0; + virtual void update(const CallBase *CB) = 0; + virtual bool updateAndCheckDecreased(const CallBase *CB) = 0; +}; - static bool isMoreDesirable(const InlineSizePriority &S1, - const InlineSizePriority &S2) { - return S1.Size < S2.Size; - } +class SizePriority : public InlinePriority { + using PriorityT = unsigned; + DenseMap Priorities; - static InlineSizePriority evaluate(CallBase *CB) { + static PriorityT evaluate(const CallBase *CB) { Function *Callee = CB->getCalledFunction(); - return InlineSizePriority(Callee->getInstructionCount()); + return Callee->getInstructionCount(); + } + + static bool isMoreDesirable(const PriorityT &P1, const PriorityT &P2) { + return P1 < P2; } - int Size; + bool hasLowerPriority(const CallBase *L, const CallBase *R) const override { + const auto I1 = Priorities.find(L); + const auto I2 = Priorities.find(R); + assert(I1 != Priorities.end() && I2 != Priorities.end()); + return isMoreDesirable(I2->second, I1->second); + } + +public: + // Update the priority associated with CB. + void update(const CallBase *CB) override { Priorities[CB] = evaluate(CB); }; + + bool updateAndCheckDecreased(const CallBase *CB) override { + auto It = Priorities.find(CB); + const auto OldPriority = It->second; + It->second = evaluate(CB); + const auto NewPriority = It->second; + return isMoreDesirable(OldPriority, NewPriority); + } }; -template class PriorityInlineOrder : public InlineOrder> { using T = std::pair; - using HeapT = std::pair; using reference = T &; using const_reference = const T &; - static bool cmp(const HeapT &P1, const HeapT &P2) { - return PriorityT::isMoreDesirable(P2.second, P1.second); - } - // A call site could become less desirable for inlining because of the size // growth from prior inlining into the callee. This method is used to lazily // update the desirability of a call site if it's decreasing. It is only @@ -107,31 +124,29 @@ class PriorityInlineOrder : public InlineOrder> { // pushed right back into the heap. For simplicity, those cases where // the desirability of a call site increases are ignored here. void adjust() { - bool Changed = false; - do { - CallBase *CB = Heap.front().first; - const PriorityT PreviousGoodness = Heap.front().second; - const PriorityT CurrentGoodness = PriorityT::evaluate(CB); - Changed = PriorityT::isMoreDesirable(PreviousGoodness, CurrentGoodness); - if (Changed) { - std::pop_heap(Heap.begin(), Heap.end(), cmp); - Heap.pop_back(); - Heap.push_back({CB, CurrentGoodness}); - std::push_heap(Heap.begin(), Heap.end(), cmp); - } - } while (Changed); + while (PriorityPtr->updateAndCheckDecreased(Heap.front())) { + std::pop_heap(Heap.begin(), Heap.end(), isLess); + std::push_heap(Heap.begin(), Heap.end(), isLess); + } } public: + PriorityInlineOrder(std::unique_ptr PriorityPtr) + : PriorityPtr(std::move(PriorityPtr)) { + isLess = [this](const CallBase *L, const CallBase *R) { + return this->PriorityPtr->hasLowerPriority(L, R); + }; + } + size_t size() override { return Heap.size(); } void push(const T &Elt) override { CallBase *CB = Elt.first; const int InlineHistoryID = Elt.second; - const PriorityT Goodness = PriorityT::evaluate(CB); - Heap.push_back({CB, Goodness}); - std::push_heap(Heap.begin(), Heap.end(), cmp); + Heap.push_back(CB); + PriorityPtr->update(CB); + std::push_heap(Heap.begin(), Heap.end(), isLess); InlineHistoryMap[CB] = InlineHistoryID; } @@ -139,10 +154,10 @@ public: assert(size() > 0); adjust(); - CallBase *CB = Heap.front().first; + CallBase *CB = Heap.front(); T Result = std::make_pair(CB, InlineHistoryMap[CB]); InlineHistoryMap.erase(CB); - std::pop_heap(Heap.begin(), Heap.end(), cmp); + std::pop_heap(Heap.begin(), Heap.end(), isLess); Heap.pop_back(); return Result; } @@ -151,21 +166,23 @@ public: assert(size() > 0); adjust(); - CallBase *CB = Heap.front().first; + CallBase *CB = Heap.front(); return *InlineHistoryMap.find(CB); } void erase_if(function_ref Pred) override { - auto PredWrapper = [=](HeapT P) -> bool { - return Pred(std::make_pair(P.first, 0)); + auto PredWrapper = [=](CallBase *CB) -> bool { + return Pred(std::make_pair(CB, 0)); }; llvm::erase_if(Heap, PredWrapper); - std::make_heap(Heap.begin(), Heap.end(), cmp); + std::make_heap(Heap.begin(), Heap.end(), isLess); } private: - SmallVector Heap; + SmallVector Heap; + std::function isLess; DenseMap InlineHistoryMap; + std::unique_ptr PriorityPtr; }; } // namespace llvm #endif // LLVM_ANALYSIS_INLINEORDER_H diff --git a/llvm/include/llvm/Analysis/InstSimplifyFolder.h b/llvm/include/llvm/Analysis/InstSimplifyFolder.h index 54ef1ddf6085..d4ea7d73ec92 100644 --- a/llvm/include/llvm/Analysis/InstSimplifyFolder.h +++ b/llvm/include/llvm/Analysis/InstSimplifyFolder.h @@ -22,12 +22,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetFolder.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/IRBuilderFolder.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" namespace llvm { +class Constant; /// InstSimplifyFolder - Use InstructionSimplify to fold operations to existing /// values. Also applies target-specific constant folding when not using @@ -47,108 +46,74 @@ public: // Return an existing value or a constant if the operation can be simplified. // Otherwise return nullptr. //===--------------------------------------------------------------------===// - Value *FoldAdd(Value *LHS, Value *RHS, bool HasNUW = false, - bool HasNSW = false) const override { - return SimplifyAddInst(LHS, RHS, HasNUW, HasNSW, SQ); + + Value *FoldBinOp(Instruction::BinaryOps Opc, Value *LHS, + Value *RHS) const override { + return simplifyBinOp(Opc, LHS, RHS, SQ); + } + + Value *FoldExactBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, + bool IsExact) const override { + return simplifyBinOp(Opc, LHS, RHS, SQ); } - Value *FoldAnd(Value *LHS, Value *RHS) const override { - return SimplifyAndInst(LHS, RHS, SQ); + Value *FoldNoWrapBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, + bool HasNUW, bool HasNSW) const override { + return simplifyBinOp(Opc, LHS, RHS, SQ); } - Value *FoldOr(Value *LHS, Value *RHS) const override { - return SimplifyOrInst(LHS, RHS, SQ); + Value *FoldBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, + FastMathFlags FMF) const override { + return simplifyBinOp(Opc, LHS, RHS, FMF, SQ); } Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override { - return SimplifyICmpInst(P, LHS, RHS, SQ); + return simplifyICmpInst(P, LHS, RHS, SQ); } Value *FoldGEP(Type *Ty, Value *Ptr, ArrayRef IdxList, bool IsInBounds = false) const override { - return SimplifyGEPInst(Ty, Ptr, IdxList, IsInBounds, SQ); + return simplifyGEPInst(Ty, Ptr, IdxList, IsInBounds, SQ); } Value *FoldSelect(Value *C, Value *True, Value *False) const override { - return SimplifySelectInst(C, True, False, SQ); + return simplifySelectInst(C, True, False, SQ); } - //===--------------------------------------------------------------------===// - // Binary Operators - //===--------------------------------------------------------------------===// + Value *FoldExtractValue(Value *Agg, + ArrayRef IdxList) const override { + return simplifyExtractValueInst(Agg, IdxList, SQ); + }; - Value *CreateFAdd(Constant *LHS, Constant *RHS) const override { - return ConstFolder.CreateFAdd(LHS, RHS); - } - Value *CreateSub(Constant *LHS, Constant *RHS, bool HasNUW = false, - bool HasNSW = false) const override { - return ConstFolder.CreateSub(LHS, RHS, HasNUW, HasNSW); - } - Value *CreateFSub(Constant *LHS, Constant *RHS) const override { - return ConstFolder.CreateFSub(LHS, RHS); - } - Value *CreateMul(Constant *LHS, Constant *RHS, bool HasNUW = false, - bool HasNSW = false) const override { - return ConstFolder.CreateMul(LHS, RHS, HasNUW, HasNSW); - } - Value *CreateFMul(Constant *LHS, Constant *RHS) const override { - return ConstFolder.CreateFMul(LHS, RHS); - } - Value *CreateUDiv(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - return ConstFolder.CreateUDiv(LHS, RHS, isExact); - } - Value *CreateSDiv(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - return ConstFolder.CreateSDiv(LHS, RHS, isExact); - } - Value *CreateFDiv(Constant *LHS, Constant *RHS) const override { - return ConstFolder.CreateFDiv(LHS, RHS); - } - Value *CreateURem(Constant *LHS, Constant *RHS) const override { - return ConstFolder.CreateURem(LHS, RHS); - } - Value *CreateSRem(Constant *LHS, Constant *RHS) const override { - return ConstFolder.CreateSRem(LHS, RHS); - } - Value *CreateFRem(Constant *LHS, Constant *RHS) const override { - return ConstFolder.CreateFRem(LHS, RHS); - } - Value *CreateShl(Constant *LHS, Constant *RHS, bool HasNUW = false, - bool HasNSW = false) const override { - return ConstFolder.CreateShl(LHS, RHS, HasNUW, HasNSW); - } - Value *CreateLShr(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - return ConstFolder.CreateLShr(LHS, RHS, isExact); + Value *FoldInsertValue(Value *Agg, Value *Val, + ArrayRef IdxList) const override { + return simplifyInsertValueInst(Agg, Val, IdxList, SQ); } - Value *CreateAShr(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - return ConstFolder.CreateAShr(LHS, RHS, isExact); + + Value *FoldExtractElement(Value *Vec, Value *Idx) const override { + return simplifyExtractElementInst(Vec, Idx, SQ); } - Value *CreateXor(Constant *LHS, Constant *RHS) const override { - return ConstFolder.CreateXor(LHS, RHS); + + Value *FoldInsertElement(Value *Vec, Value *NewElt, + Value *Idx) const override { + return simplifyInsertElementInst(Vec, NewElt, Idx, SQ); } - Value *CreateBinOp(Instruction::BinaryOps Opc, Constant *LHS, - Constant *RHS) const override { - return ConstFolder.CreateBinOp(Opc, LHS, RHS); + Value *FoldShuffleVector(Value *V1, Value *V2, + ArrayRef Mask) const override { + Type *RetTy = VectorType::get( + cast(V1->getType())->getElementType(), Mask.size(), + isa(V1->getType())); + return simplifyShuffleVectorInst(V1, V2, Mask, RetTy, SQ); } //===--------------------------------------------------------------------===// // Unary Operators //===--------------------------------------------------------------------===// - Value *CreateNeg(Constant *C, bool HasNUW = false, - bool HasNSW = false) const override { - return ConstFolder.CreateNeg(C, HasNUW, HasNSW); - } Value *CreateFNeg(Constant *C) const override { return ConstFolder.CreateFNeg(C); } - Value *CreateNot(Constant *C) const override { - return ConstFolder.CreateNot(C); - } Value *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const override { return ConstFolder.CreateUnOp(Opc, C); @@ -220,34 +185,6 @@ public: Constant *RHS) const override { return ConstFolder.CreateFCmp(P, LHS, RHS); } - - //===--------------------------------------------------------------------===// - // Other Instructions - //===--------------------------------------------------------------------===// - - Value *CreateExtractElement(Constant *Vec, Constant *Idx) const override { - return ConstFolder.CreateExtractElement(Vec, Idx); - } - - Value *CreateInsertElement(Constant *Vec, Constant *NewElt, - Constant *Idx) const override { - return ConstFolder.CreateInsertElement(Vec, NewElt, Idx); - } - - Value *CreateShuffleVector(Constant *V1, Constant *V2, - ArrayRef Mask) const override { - return ConstFolder.CreateShuffleVector(V1, V2, Mask); - } - - Value *CreateExtractValue(Constant *Agg, - ArrayRef IdxList) const override { - return ConstFolder.CreateExtractValue(Agg, IdxList); - } - - Value *CreateInsertValue(Constant *Agg, Constant *Val, - ArrayRef IdxList) const override { - return ConstFolder.CreateInsertValue(Agg, Val, IdxList); - } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h index 8b49c115f101..52d43bf5c2a6 100644 --- a/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -35,8 +35,6 @@ #ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H #define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" namespace llvm { @@ -49,6 +47,7 @@ class CallBase; class DataLayout; class DominatorTree; class Function; +class Instruction; struct LoopStandardAnalysisResults; class MDNode; class OptimizationRemarkEmitter; @@ -145,176 +144,185 @@ struct SimplifyQuery { // Please use the SimplifyQuery versions in new code. /// Given operand for an FNeg, fold the result or return null. -Value *SimplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q); +Value *simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q); /// Given operands for an Add, fold the result or return null. -Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, +Value *simplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, const SimplifyQuery &Q); /// Given operands for a Sub, fold the result or return null. -Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, +Value *simplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, const SimplifyQuery &Q); /// Given operands for an FAdd, fold the result or return null. Value * -SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, +simplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior = fp::ebIgnore, RoundingMode Rounding = RoundingMode::NearestTiesToEven); /// Given operands for an FSub, fold the result or return null. Value * -SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, +simplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior = fp::ebIgnore, RoundingMode Rounding = RoundingMode::NearestTiesToEven); /// Given operands for an FMul, fold the result or return null. Value * -SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, +simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior = fp::ebIgnore, RoundingMode Rounding = RoundingMode::NearestTiesToEven); /// Given operands for the multiplication of a FMA, fold the result or return -/// null. In contrast to SimplifyFMulInst, this function will not perform +/// null. In contrast to simplifyFMulInst, this function will not perform /// simplifications whose unrounded results differ when rounded to the argument /// type. -Value *SimplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, +Value *simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior = fp::ebIgnore, RoundingMode Rounding = RoundingMode::NearestTiesToEven); /// Given operands for a Mul, fold the result or return null. -Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +Value *simplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operands for an SDiv, fold the result or return null. -Value *SimplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +Value *simplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operands for a UDiv, fold the result or return null. -Value *SimplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +Value *simplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operands for an FDiv, fold the result or return null. Value * -SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF, +simplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior = fp::ebIgnore, RoundingMode Rounding = RoundingMode::NearestTiesToEven); /// Given operands for an SRem, fold the result or return null. -Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +Value *simplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operands for a URem, fold the result or return null. -Value *SimplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +Value *simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operands for an FRem, fold the result or return null. Value * -SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF, +simplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior = fp::ebIgnore, RoundingMode Rounding = RoundingMode::NearestTiesToEven); /// Given operands for a Shl, fold the result or return null. -Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, +Value *simplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const SimplifyQuery &Q); /// Given operands for a LShr, fold the result or return null. -Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, +Value *simplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const SimplifyQuery &Q); /// Given operands for a AShr, fold the result or return nulll. -Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, +Value *simplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const SimplifyQuery &Q); /// Given operands for an And, fold the result or return null. -Value *SimplifyAndInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +Value *simplifyAndInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operands for an Or, fold the result or return null. -Value *SimplifyOrInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +Value *simplifyOrInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operands for an Xor, fold the result or return null. -Value *SimplifyXorInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +Value *simplifyXorInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operands for an ICmpInst, fold the result or return null. -Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, +Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operands for an FCmpInst, fold the result or return null. -Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, +Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q); /// Given operands for a SelectInst, fold the result or return null. -Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, +Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const SimplifyQuery &Q); /// Given operands for a GetElementPtrInst, fold the result or return null. -Value *SimplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef Indices, +Value *simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef Indices, bool InBounds, const SimplifyQuery &Q); /// Given operands for an InsertValueInst, fold the result or return null. -Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef Idxs, +Value *simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef Idxs, const SimplifyQuery &Q); /// Given operands for an InsertElement, fold the result or return null. -Value *SimplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, +Value *simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q); /// Given operands for an ExtractValueInst, fold the result or return null. -Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, +Value *simplifyExtractValueInst(Value *Agg, ArrayRef Idxs, const SimplifyQuery &Q); /// Given operands for an ExtractElementInst, fold the result or return null. -Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, +Value *simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q); /// Given operands for a CastInst, fold the result or return null. -Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, +Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const SimplifyQuery &Q); /// Given operands for a ShuffleVectorInst, fold the result or return null. /// See class ShuffleVectorInst for a description of the mask representation. -Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef Mask, +Value *simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef Mask, Type *RetTy, const SimplifyQuery &Q); //=== Helper functions for higher up the class hierarchy. /// Given operands for a CmpInst, fold the result or return null. -Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, +Value *simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operand for a UnaryOperator, fold the result or return null. -Value *SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q); +Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q); /// Given operand for a UnaryOperator, fold the result or return null. /// Try to use FastMathFlags when folding the result. -Value *SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, +Value *simplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, const SimplifyQuery &Q); /// Given operands for a BinaryOperator, fold the result or return null. -Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, +Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operands for a BinaryOperator, fold the result or return null. /// Try to use FastMathFlags when folding the result. -Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, +Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q); /// Given a callsite, fold the result or return null. -Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q); +Value *simplifyCall(CallBase *Call, const SimplifyQuery &Q); + +/// Given a constrained FP intrinsic call, tries to compute its simplified +/// version. Returns a simplified result or null. +/// +/// This function provides an additional contract: it guarantees that if +/// simplification succeeds that the intrinsic is side effect free. As a result, +/// successful simplification can be used to delete the intrinsic not just +/// replace its result. +Value *simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q); /// Given an operand for a Freeze, see if we can fold the result. /// If not, this returns null. -Value *SimplifyFreezeInst(Value *Op, const SimplifyQuery &Q); +Value *simplifyFreezeInst(Value *Op, const SimplifyQuery &Q); /// See if we can compute a simplified version of this instruction. If not, /// return null. -Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, +Value *simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE = nullptr); -/// Like \p SimplifyInstruction but the operands of \p I are replaced with +/// Like \p simplifyInstruction but the operands of \p I are replaced with /// \p NewOps. Returns a simplified value, or null if none was found. Value * -SimplifyInstructionWithOperands(Instruction *I, ArrayRef NewOps, +simplifyInstructionWithOperands(Instruction *I, ArrayRef NewOps, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE = nullptr); diff --git a/llvm/include/llvm/Analysis/IntervalIterator.h b/llvm/include/llvm/Analysis/IntervalIterator.h index 8e2273618a66..cbb7cac1c508 100644 --- a/llvm/include/llvm/Analysis/IntervalIterator.h +++ b/llvm/include/llvm/Analysis/IntervalIterator.h @@ -36,8 +36,6 @@ #include "llvm/Analysis/Interval.h" #include "llvm/Analysis/IntervalPartition.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/ErrorHandling.h" #include #include #include @@ -48,6 +46,7 @@ namespace llvm { class BasicBlock; +class Function; // getNodeHeader - Given a source graph node and the source graph, return the // BasicBlock that is the header node. This is the opposite of diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index c0404d37d04d..4cacf8951d6a 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -38,20 +38,14 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerIntPair.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -60,8 +54,11 @@ namespace llvm { +class Constant; +class Function; template struct GraphTraits; class Module; +class TargetLibraryInfo; class Value; /// A lazily constructed view of the call graph of a module. @@ -331,7 +328,7 @@ public: bool operator!=(const Node &N) const { return !operator==(N); } /// Tests whether the node has been populated with edges. - bool isPopulated() const { return Edges.hasValue(); } + bool isPopulated() const { return Edges.has_value(); } /// Tests whether this is actually a dead node and no longer valid. /// diff --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h index 754391e10630..24c2bfcc74b9 100644 --- a/llvm/include/llvm/Analysis/LazyValueInfo.h +++ b/llvm/include/llvm/Analysis/LazyValueInfo.h @@ -114,6 +114,9 @@ public: /// Inform the analysis cache that we have erased a block. void eraseBlock(BasicBlock *BB); + /// Complete flush all previously computed values + void clear(const Module *M); + /// Print the \LazyValueInfo Analysis. /// We pass in the DTree that is required for identifying which basic blocks /// we can solve/print for, in the LVIPrinter. diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 09bf98d324ed..29e3efb38e19 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -75,9 +75,9 @@ bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, /// within the specified loop) would access only dereferenceable memory, and /// be properly aligned on every iteration of the specified loop regardless of /// its placement within the loop. (i.e. does not require predication beyond -/// that required by the the header itself and could be hoisted into the header +/// that required by the header itself and could be hoisted into the header /// if desired.) This is more powerful than the variants above when the -/// address loaded from is analyzeable by SCEV. +/// address loaded from is analyzeable by SCEV. bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT); diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index c83a04991b04..8f71ce9e96c0 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -244,6 +244,15 @@ public: SmallVector getInstructionsForAccess(Value *Ptr, bool isWrite) const; + /// Return the program order indices for the access location (Ptr, IsWrite). + /// Returns an empty ArrayRef if there are no accesses for the location. + ArrayRef getOrderForAccess(Value *Ptr, bool IsWrite) const { + auto I = Accesses.find({Ptr, IsWrite}); + if (I != Accesses.end()) + return I->second; + return {}; + } + private: /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and /// applies dynamic knowledge to simplify SCEV expressions and convert them @@ -327,12 +336,6 @@ struct RuntimeCheckingPtrGroup { /// pointer, with index \p Index in RtCheck. RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck); - RuntimeCheckingPtrGroup(unsigned Index, const SCEV *Start, const SCEV *End, - unsigned AS) - : High(End), Low(Start), AddressSpace(AS) { - Members.push_back(Index); - } - /// Tries to add the pointer recorded in RtCheck at index /// \p Index to this pointer checking group. We can only add a pointer /// to a checking group if we will still be able to get @@ -340,7 +343,7 @@ struct RuntimeCheckingPtrGroup { /// of success, false otherwise. bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck); bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End, - unsigned AS, ScalarEvolution &SE); + unsigned AS, bool NeedsFreeze, ScalarEvolution &SE); /// The SCEV expression which represents the upper bound of all the /// pointers in this group. @@ -352,6 +355,9 @@ struct RuntimeCheckingPtrGroup { SmallVector Members; /// Address space of the involved pointers. unsigned AddressSpace; + /// Whether the pointer needs to be frozen after expansion, e.g. because it + /// may be poison outside the loop. + bool NeedsFreeze = false; }; /// A memcheck which made up of a pair of grouped pointers. @@ -359,6 +365,18 @@ typedef std::pair RuntimePointerCheck; +struct PointerDiffInfo { + const SCEV *SrcStart; + const SCEV *SinkStart; + unsigned AccessSize; + bool NeedsFreeze; + + PointerDiffInfo(const SCEV *SrcStart, const SCEV *SinkStart, + unsigned AccessSize, bool NeedsFreeze) + : SrcStart(SrcStart), SinkStart(SinkStart), AccessSize(AccessSize), + NeedsFreeze(NeedsFreeze) {} +}; + /// Holds information about the memory runtime legality checks to verify /// that a group of pointers do not overlap. class RuntimePointerChecking { @@ -383,16 +401,19 @@ public: unsigned AliasSetId; /// SCEV for the access. const SCEV *Expr; + /// True if the pointer expressions needs to be frozen after expansion. + bool NeedsFreeze; PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End, bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId, - const SCEV *Expr) + const SCEV *Expr, bool NeedsFreeze) : PointerValue(PointerValue), Start(Start), End(End), IsWritePtr(IsWritePtr), DependencySetId(DependencySetId), - AliasSetId(AliasSetId), Expr(Expr) {} + AliasSetId(AliasSetId), Expr(Expr), NeedsFreeze(NeedsFreeze) {} }; - RuntimePointerChecking(ScalarEvolution *SE) : SE(SE) {} + RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE) + : DC(DC), SE(SE) {} /// Reset the state of the pointer runtime information. void reset() { @@ -406,9 +427,9 @@ public: /// according to the assumptions that we've made during the analysis. /// The method might also version the pointer stride according to \p Strides, /// and add new predicates to \p PSE. - void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, - unsigned ASId, const ValueToValueMap &Strides, - PredicatedScalarEvolution &PSE); + void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, + bool WritePtr, unsigned DepSetId, unsigned ASId, + PredicatedScalarEvolution &PSE, bool NeedsFreeze); /// No run-time memory checking is necessary. bool empty() const { return Pointers.empty(); } @@ -418,11 +439,23 @@ public: void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies); - /// Returns the checks that generateChecks created. + /// Returns the checks that generateChecks created. They can be used to ensure + /// no read/write accesses overlap across all loop iterations. const SmallVectorImpl &getChecks() const { return Checks; } + // Returns an optional list of (pointer-difference expressions, access size) + // pairs that can be used to prove that there are no vectorization-preventing + // dependencies at runtime. There are is a vectorization-preventing dependency + // if any pointer-difference is > getDiffChecks() const { + if (!CanUseDiffCheck) + return None; + return {DiffChecks}; + } + /// Decide if we need to add a check between two groups of pointers, /// according to needsChecking. bool needsChecking(const RuntimeCheckingPtrGroup &M, @@ -477,7 +510,15 @@ private: bool UseDependencies); /// Generate the checks and return them. - SmallVector generateChecks() const; + SmallVector generateChecks(); + + /// Try to create add a new (pointer-difference, access size) pair to + /// DiffCheck for checking groups \p CGI and \p CGJ. If pointer-difference + /// checks cannot be used for the groups, set CanUseDiffCheck to false. + void tryToCreateDiffCheck(const RuntimeCheckingPtrGroup &CGI, + const RuntimeCheckingPtrGroup &CGJ); + + MemoryDepChecker &DC; /// Holds a pointer to the ScalarEvolution analysis. ScalarEvolution *SE; @@ -485,6 +526,13 @@ private: /// Set of run-time checks required to establish independence of /// otherwise may-aliasing pointers in the loop. SmallVector Checks; + + /// Flag indicating if pointer-difference checks can be used + bool CanUseDiffCheck = true; + + /// A list of (pointer-difference, access size) pairs that can be used to + /// prove that there are no vectorization-preventing dependencies. + SmallVector DiffChecks; }; /// Drive the analysis of memory accesses in the loop @@ -575,6 +623,11 @@ public: return HasDependenceInvolvingLoopInvariantAddress; } + /// Return the list of stores to invariant addresses. + const ArrayRef getStoresToInvariantAddresses() const { + return StoresToInvariantAddresses; + } + /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts /// them to a more usable form. All SCEV expressions during the analysis /// should be re-written (and therefore simplified) according to PSE. @@ -605,6 +658,11 @@ private: /// invariant. void collectStridedAccess(Value *LoadOrStoreInst); + // Emits the first unsafe memory dependence in a loop. + // Emits nothing if there are no unsafe dependences + // or if the dependences were not recorded. + void emitUnsafeDependenceRemark(); + std::unique_ptr PSE; /// We need to check that all of the pointers in this list are disjoint @@ -629,6 +687,9 @@ private: /// Indicator that there are non vectorizable stores to a uniform address. bool HasDependenceInvolvingLoopInvariantAddress = false; + /// List of stores to invariant addresses. + SmallVector StoresToInvariantAddresses; + /// The diagnostics report generated for the analysis. E.g. why we /// couldn't analyze the loop. std::unique_ptr Report; diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h index d07e6977fed1..d22675a308aa 100644 --- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h +++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h @@ -29,7 +29,6 @@ #ifndef LLVM_ANALYSIS_LOOPANALYSISMANAGER_H #define LLVM_ANALYSIS_LOOPANALYSISMANAGER_H -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/IR/PassManager.h" namespace llvm { diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h index 21882ebd0087..4c5083f3c980 100644 --- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h @@ -15,15 +15,17 @@ #define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H #include "llvm/Analysis/LoopAnalysisManager.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { class AAResults; class DependenceInfo; +class Instruction; class LPMUpdater; +class raw_ostream; +class LoopInfo; +class Loop; class ScalarEvolution; class SCEV; class TargetTransformInfo; @@ -96,6 +98,10 @@ private: /// Attempt to delinearize the indexed reference. bool delinearize(const LoopInfo &LI); + /// Attempt to delinearize \p AccessFn for fixed-size arrays. + bool tryDelinearizeFixedSize(const SCEV *AccessFn, + SmallVectorImpl &Subscripts); + /// Return true if the index reference is invariant with respect to loop \p L. bool isLoopInvariant(const Loop &L) const; @@ -105,6 +111,13 @@ private: /// smaller than the cache line size \p CLS. bool isConsecutive(const Loop &L, unsigned CLS) const; + /// Retrieve the index of the subscript corresponding to the given loop \p + /// L. Return a zero-based positive index if the subscript index is + /// succesfully located and a negative value otherwise. For example given the + /// indexed reference 'A[i][2j+1][3k+2]', the call + /// 'getSubscriptIndex(loop-k)' would return value 2. + int getSubscriptIndex(const Loop &L) const; + /// Return the coefficient used in the rightmost dimension. const SCEV *getLastCoefficient() const; @@ -237,9 +250,10 @@ private: /// Sort the LoopCosts vector by decreasing cache cost. void sortLoopCosts() { - sort(LoopCosts, [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) { - return A.second > B.second; - }); + stable_sort(LoopCosts, + [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) { + return A.second > B.second; + }); } private: diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index a0ffdb07a7ec..9351b83ad747 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -44,7 +44,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" @@ -55,9 +54,10 @@ namespace llvm { class DominatorTree; +class InductionDescriptor; +class Instruction; class LoopInfo; class Loop; -class InductionDescriptor; class MDNode; class MemorySSAUpdater; class ScalarEvolution; @@ -112,6 +112,22 @@ public: /// parent is the innermost loop in which it is enclosed. LoopT *getParentLoop() const { return ParentLoop; } + /// Get the outermost loop in which this loop is contained. + /// This may be the loop itself, if it already is the outermost loop. + const LoopT *getOutermostLoop() const { + const LoopT *L = static_cast(this); + while (L->ParentLoop) + L = L->ParentLoop; + return L; + } + + LoopT *getOutermostLoop() { + LoopT *L = static_cast(this); + while (L->ParentLoop) + L = L->ParentLoop; + return L; + } + /// This is a raw interface for bypassing addChildLoop. void setParentLoop(LoopT *L) { assert(!isInvalid() && "Loop not in a valid state!"); diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h index b8b8330d0fe1..a96a698f3afb 100644 --- a/llvm/include/llvm/Analysis/LoopInfoImpl.h +++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h @@ -14,7 +14,6 @@ #ifndef LLVM_ANALYSIS_LOOPINFOIMPL_H #define LLVM_ANALYSIS_LOOPINFOIMPL_H -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" @@ -315,12 +314,11 @@ void LoopBase::verifyLoop() const { "Loop block has no in-loop predecessors!"); SmallVector OutsideLoopPreds; - std::for_each(GraphTraits>::child_begin(BB), - GraphTraits>::child_end(BB), - [&](BlockT *B) { - if (!contains(B)) - OutsideLoopPreds.push_back(B); - }); + for (BlockT *B : + llvm::make_range(GraphTraits>::child_begin(BB), + GraphTraits>::child_end(BB))) + if (!contains(B)) + OutsideLoopPreds.push_back(B); if (BB == getHeader()) { assert(!OutsideLoopPreds.empty() && "Loop is unreachable!"); @@ -455,8 +453,7 @@ static void discoverAndMapSubloop(LoopT *L, ArrayRef Backedges, InvBlockTraits::child_end(PredBB)); } else { // This is a discovered block. Find its outermost discovered loop. - while (LoopT *Parent = Subloop->getParentLoop()) - Subloop = Parent; + Subloop = Subloop->getOutermostLoop(); // If it is already discovered to be a subloop of this loop, continue. if (Subloop == L) diff --git a/llvm/include/llvm/Analysis/LoopPass.h b/llvm/include/llvm/Analysis/LoopPass.h index 0fd2a39eefc0..c5f08d0ae8af 100644 --- a/llvm/include/llvm/Analysis/LoopPass.h +++ b/llvm/include/llvm/Analysis/LoopPass.h @@ -14,13 +14,14 @@ #ifndef LLVM_ANALYSIS_LOOPPASS_H #define LLVM_ANALYSIS_LOOPPASS_H -#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/LegacyPassManagers.h" #include "llvm/Pass.h" #include namespace llvm { +class Loop; +class LoopInfo; class LPPassManager; class Function; diff --git a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h index 7cf8a081f9a2..eada6a647763 100644 --- a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h +++ b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h @@ -15,8 +15,9 @@ #ifndef LLVM_ANALYSIS_LOOPUNROLLANALYZER_H #define LLVM_ANALYSIS_LOOPUNROLLANALYZER_H -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/InstVisitor.h" // This class is used to get an estimate of the optimization effects that we @@ -36,6 +37,8 @@ // And finally: // v = b[1] namespace llvm { +class Instruction; + class UnrolledInstAnalyzer : private InstVisitor { typedef InstVisitor Base; friend class InstVisitor; diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h index b1a81d5e7030..00e8d7d7dd4d 100644 --- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h @@ -9,6 +9,7 @@ #ifndef LLVM_ANALYSIS_MLINLINEADVISOR_H #define LLVM_ANALYSIS_MLINLINEADVISOR_H +#include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/MLModelRunner.h" @@ -19,6 +20,7 @@ #include namespace llvm { +class DiagnosticInfoOptimizationBase; class Module; class MLInlineAdvice; @@ -29,16 +31,19 @@ public: virtual ~MLInlineAdvisor() = default; - void onPassEntry() override; + void onPassEntry(LazyCallGraph::SCC *SCC) override; void onPassExit(LazyCallGraph::SCC *SCC) override; - int64_t getIRSize(const Function &F) const { return F.getInstructionCount(); } + int64_t getIRSize(Function &F) const { + return getCachedFPI(F).TotalInstructionCount; + } void onSuccessfulInlining(const MLInlineAdvice &Advice, bool CalleeWasDeleted); bool isForcedToStop() const { return ForceStop; } int64_t getLocalCalls(Function &F); const MLModelRunner &getModelRunner() const { return *ModelRunner.get(); } + FunctionPropertiesInfo &getCachedFPI(Function &) const; protected: std::unique_ptr getAdviceImpl(CallBase &CB) override; @@ -60,11 +65,11 @@ protected: private: int64_t getModuleIRSize() const; + std::unique_ptr + getSkipAdviceIfUnreachableCallsite(CallBase &CB); + void print(raw_ostream &OS) const override; - void print(raw_ostream &OS) const override { - OS << "[MLInlineAdvisor] Nodes: " << NodeCount << " Edges: " << EdgeCount - << "\n"; - } + mutable DenseMap FPICache; LazyCallGraph &CG; @@ -75,7 +80,7 @@ private: std::map FunctionLevels; const int32_t InitialIRSize = 0; int32_t CurrentIRSize = 0; - std::deque NodesInLastSCC; + llvm::SmallPtrSet NodesInLastSCC; DenseSet AllNodes; bool ForceStop = false; }; @@ -85,16 +90,7 @@ private: class MLInlineAdvice : public InlineAdvice { public: MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB, - OptimizationRemarkEmitter &ORE, bool Recommendation) - : InlineAdvice(Advisor, CB, ORE, Recommendation), - CallerIRSize(Advisor->isForcedToStop() ? 0 - : Advisor->getIRSize(*Caller)), - CalleeIRSize(Advisor->isForcedToStop() ? 0 - : Advisor->getIRSize(*Callee)), - CallerAndCalleeEdges(Advisor->isForcedToStop() - ? 0 - : (Advisor->getLocalCalls(*Caller) + - Advisor->getLocalCalls(*Callee))) {} + OptimizationRemarkEmitter &ORE, bool Recommendation); virtual ~MLInlineAdvice() = default; void recordInliningImpl() override; @@ -108,13 +104,17 @@ public: const int64_t CallerIRSize; const int64_t CalleeIRSize; const int64_t CallerAndCalleeEdges; + void updateCachedCallerFPI(FunctionAnalysisManager &FAM) const; private: void reportContextForRemark(DiagnosticInfoOptimizationBase &OR); - MLInlineAdvisor *getAdvisor() const { return static_cast(Advisor); }; + // Make a copy of the FPI of the caller right before inlining. If inlining + // fails, we can just update the cache with that value. + const FunctionPropertiesInfo PreInlineCallerFPI; + Optional FPU; }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/MLModelRunner.h b/llvm/include/llvm/Analysis/MLModelRunner.h index 669c02af0b3b..872c0e37f00e 100644 --- a/llvm/include/llvm/Analysis/MLModelRunner.h +++ b/llvm/include/llvm/Analysis/MLModelRunner.h @@ -10,10 +10,11 @@ #ifndef LLVM_ANALYSIS_MLMODELRUNNER_H #define LLVM_ANALYSIS_MLMODELRUNNER_H -#include "llvm/IR/LLVMContext.h" +#include "llvm/Analysis/TensorSpec.h" #include "llvm/IR/PassManager.h" namespace llvm { +class LLVMContext; /// MLModelRunner interface: abstraction of a mechanism for evaluating a /// tensorflow "saved model". @@ -41,7 +42,7 @@ public: getTensorUntyped(static_cast(FeatureID))); } - virtual void *getTensorUntyped(size_t Index) = 0; + void *getTensorUntyped(size_t Index) { return InputBuffers[Index]; } const void *getTensorUntyped(size_t Index) const { return (const_cast(this))->getTensorUntyped(Index); } @@ -50,13 +51,27 @@ public: Kind getKind() const { return Type; } protected: - MLModelRunner(LLVMContext &Ctx, Kind Type) : Ctx(Ctx), Type(Type) { + MLModelRunner(LLVMContext &Ctx, Kind Type, size_t NrInputs) + : Ctx(Ctx), Type(Type), InputBuffers(NrInputs) { assert(Type != Kind::Unknown); } virtual void *evaluateUntyped() = 0; + void setUpBufferForTensor(size_t Index, const TensorSpec &Spec, + void *Buffer) { + if (!Buffer) { + OwnedBuffers.emplace_back(Spec.getTotalTensorBufferSize()); + Buffer = OwnedBuffers.back().data(); + } + InputBuffers[Index] = Buffer; + } + LLVMContext &Ctx; const Kind Type; + +private: + std::vector InputBuffers; + std::vector> OwnedBuffers; }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h index d5b60ee540e0..7ad83612880f 100644 --- a/llvm/include/llvm/Analysis/MemoryBuiltins.h +++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h @@ -28,6 +28,7 @@ namespace llvm { class AllocaInst; +class AAResults; class Argument; class CallInst; class ConstantPointerNull; @@ -100,7 +101,10 @@ inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) { /// insertion or speculative execution of allocation routines. bool isAllocRemovable(const CallBase *V, const TargetLibraryInfo *TLI); -/// Gets the alignment argument for an aligned_alloc-like function +/// Gets the alignment argument for an aligned_alloc-like function, using either +/// built-in knowledge based on fuction names/signatures or allocalign +/// attributes. Note: the Value returned may not indicate a valid alignment, per +/// the definition of the allocalign attribute. Value *getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI); /// Return the size of the requested allocation. With a trivial mapper, this is @@ -111,12 +115,19 @@ Optional getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, std::function Mapper); -/// If this allocation function initializes memory to a fixed value, return -/// said value in the requested type. Otherwise, return nullptr. -Constant *getInitialValueOfAllocation(const CallBase *Alloc, +/// If this is a call to an allocation function that initializes memory to a +/// fixed value, return said value in the requested type. Otherwise, return +/// nullptr. +Constant *getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty); +/// If a function is part of an allocation family (e.g. +/// malloc/realloc/calloc/free), return the identifier for its family +/// of functions. +Optional getAllocationFamily(const Value *I, + const TargetLibraryInfo *TLI); + //===----------------------------------------------------------------------===// // Utility functions to compute size of objects. // @@ -143,6 +154,8 @@ struct ObjectSizeOpts { /// though they can't be evaluated. Otherwise, null is always considered to /// point to a 0 byte region of memory. bool NullIsUnknownSize = false; + /// If set, used for more accurate evaluation + AAResults *AA = nullptr; }; /// Compute the size of the object pointed by Ptr. Returns true and the @@ -162,8 +175,9 @@ bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, /// argument of the call to objectsize. Value *lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed); - - +Value *lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, + const TargetLibraryInfo *TLI, AAResults *AA, + bool MustSucceed); using SizeOffsetType = std::pair; @@ -210,7 +224,6 @@ public: SizeOffsetType visitConstantPointerNull(ConstantPointerNull&); SizeOffsetType visitExtractElementInst(ExtractElementInst &I); SizeOffsetType visitExtractValueInst(ExtractValueInst &I); - SizeOffsetType visitGEPOperator(GEPOperator &GEP); SizeOffsetType visitGlobalAlias(GlobalAlias &GA); SizeOffsetType visitGlobalVariable(GlobalVariable &GV); SizeOffsetType visitIntToPtrInst(IntToPtrInst&); @@ -221,6 +234,12 @@ public: SizeOffsetType visitInstruction(Instruction &I); private: + SizeOffsetType findLoadSizeOffset( + LoadInst &LoadFrom, BasicBlock &BB, BasicBlock::iterator From, + SmallDenseMap &VisitedBlocks, + unsigned &ScannedInstCount); + SizeOffsetType combineSizeOffset(SizeOffsetType LHS, SizeOffsetType RHS); + SizeOffsetType computeImpl(Value *V); bool CheckedZextOrTrunc(APInt &I); }; diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h index 23e50f601e04..dfac49445d75 100644 --- a/llvm/include/llvm/Analysis/MemoryLocation.h +++ b/llvm/include/llvm/Analysis/MemoryLocation.h @@ -36,6 +36,7 @@ class AnyMemTransferInst; class AnyMemIntrinsic; class TargetLibraryInfo; class VAArgInst; +class Value; // Represents the size of a MemoryLocation. Logically, it's an // Optional that also carries a bit to represent whether the integer diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index b41f5771bacd..8cadb6a4c912 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -66,6 +66,19 @@ /// MemoryDefs are not disambiguated because it would require multiple reaching /// definitions, which would require multiple phis, and multiple memoryaccesses /// per instruction. +/// +/// In addition to the def/use graph described above, MemoryDefs also contain +/// an "optimized" definition use. The "optimized" use points to some def +/// reachable through the memory def chain. The optimized def *may* (but is +/// not required to) alias the original MemoryDef, but no def *closer* to the +/// source def may alias it. As the name implies, the purpose of the optimized +/// use is to allow caching of clobber searches for memory defs. The optimized +/// def may be nullptr, in which case clients must walk the defining access +/// chain. +/// +/// When iterating the uses of a MemoryDef, both defining uses and optimized +/// uses will be encountered. If only one type is needed, the client must +/// filter the use walk. // //===----------------------------------------------------------------------===// @@ -73,30 +86,18 @@ #define LLVM_ANALYSIS_MEMORYSSA_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" -#include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/simple_ilist.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/PHITransAddr.h" -#include "llvm/IR/BasicBlock.h" #include "llvm/IR/DerivedUser.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" -#include "llvm/IR/Use.h" #include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include #include #include @@ -106,11 +107,16 @@ namespace llvm { +template struct GraphTraits; +class BasicBlock; class Function; class Instruction; +class LLVMContext; class MemoryAccess; class MemorySSAWalker; -class LLVMContext; +class Module; +class Use; +class Value; class raw_ostream; namespace MSSAHelpers { @@ -259,10 +265,11 @@ public: return MA->getValueID() == MemoryUseVal || MA->getValueID() == MemoryDefVal; } - // Sadly, these have to be public because they are needed in some of the - // iterators. + /// Do we have an optimized use? inline bool isOptimized() const; + /// Return the MemoryAccess associated with the optimized use, or nullptr. inline MemoryAccess *getOptimized() const; + /// Sets the optimized use for a MemoryDef. inline void setOptimized(MemoryAccess *); // Retrieve AliasResult type of the optimized access. Ideally this would be @@ -339,6 +346,9 @@ public: setOperand(0, DMA); } + /// Whether the MemoryUse is optimized. If ensureOptimizedUses() was called, + /// uses will usually be optimized, but this is not guaranteed (e.g. due to + /// invalidation and optimization limits.) bool isOptimized() const { return getDefiningAccess() && OptimizedID == getDefiningAccess()->getID(); } @@ -791,6 +801,13 @@ public: /// about the beginning or end of a block. enum InsertionPlace { Beginning, End, BeforeTerminator }; + /// By default, uses are *not* optimized during MemorySSA construction. + /// Calling this method will attempt to optimize all MemoryUses, if this has + /// not happened yet for this MemorySSA instance. This should be done if you + /// plan to query the clobbering access for most uses, or if you walk the + /// def-use chain of uses. + void ensureOptimizedUses(); + protected: // Used by Memory SSA dumpers and wrapper pass friend class MemorySSAPrinterLegacyPass; @@ -893,6 +910,7 @@ private: std::unique_ptr> Walker; std::unique_ptr> SkipWalker; unsigned NextID = 0; + bool IsOptimized = false; }; /// Enables verification of MemorySSA. diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h index 3e5ebe9cb427..2bcd1a462871 100644 --- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h +++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h @@ -31,7 +31,6 @@ #ifndef LLVM_ANALYSIS_MEMORYSSAUPDATER_H #define LLVM_ANALYSIS_MEMORYSSAUPDATER_H -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -39,7 +38,6 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Support/CFGDiff.h" -#include namespace llvm { @@ -47,6 +45,7 @@ class BasicBlock; class DominatorTree; class Instruction; class LoopBlocksRPO; +template class SmallSetVector; using ValueToValueMapTy = ValueMap; using PhiToDefMap = SmallDenseMap; diff --git a/llvm/include/llvm/Analysis/ModelUnderTrainingRunner.h b/llvm/include/llvm/Analysis/ModelUnderTrainingRunner.h index 071ccf96fe5b..72bd185b6c32 100644 --- a/llvm/include/llvm/Analysis/ModelUnderTrainingRunner.h +++ b/llvm/include/llvm/Analysis/ModelUnderTrainingRunner.h @@ -10,6 +10,7 @@ #ifndef LLVM_ANALYSIS_MODELUNDERTRAININGRUNNER_H #define LLVM_ANALYSIS_MODELUNDERTRAININGRUNNER_H +#include "llvm/Analysis/TensorSpec.h" #include "llvm/Config/llvm-config.h" #ifdef LLVM_HAVE_TF_API @@ -48,6 +49,11 @@ public: StringRef DecisionName, const std::vector &InputSpecs, StringRef OutputSpecsPathOverride = ""); + static std::unique_ptr + createAndEnsureValid(LLVMContext &Ctx, const std::string &ModelPath, + StringRef DecisionName, + const std::vector &InputSpecs, + const std::vector &OutputSpecs); private: ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath, @@ -58,7 +64,6 @@ private: const std::vector OutputSpecs; Optional LastEvaluationResult; void *evaluateUntyped() override; - void *getTensorUntyped(size_t Index) override; bool isValid() const { return !!Evaluator; } }; diff --git a/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h b/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h index 99aa315319b8..fa91e4f653d0 100644 --- a/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h +++ b/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h @@ -11,9 +11,9 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/PassManager.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { +class raw_ostream; class ModuleDebugInfoPrinterPass : public PassInfoMixin { diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h index 18a0bfee5730..1e4994207555 100644 --- a/llvm/include/llvm/Analysis/MustExecute.h +++ b/llvm/include/llvm/Analysis/MustExecute.h @@ -28,7 +28,6 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionPrecedenceTracking.h" #include "llvm/IR/PassManager.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { @@ -42,6 +41,7 @@ class Instruction; class Loop; class LoopInfo; class PostDominatorTree; +class raw_ostream; /// Captures loop safety information. /// It keep information for loop blocks may throw exception or otherwise diff --git a/llvm/include/llvm/Analysis/NoInferenceModelRunner.h b/llvm/include/llvm/Analysis/NoInferenceModelRunner.h index 5bcedf98865c..980b40500d7c 100644 --- a/llvm/include/llvm/Analysis/NoInferenceModelRunner.h +++ b/llvm/include/llvm/Analysis/NoInferenceModelRunner.h @@ -10,13 +10,9 @@ #ifndef LLVM_ANALYSIS_NOINFERENCEMODELRUNNER_H #define LLVM_ANALYSIS_NOINFERENCEMODELRUNNER_H -#include "llvm/Config/llvm-config.h" - -/// While not strictly necessary to conditionally compile this, it really -/// has no usecase outside the 'development' mode. -#ifdef LLVM_HAVE_TF_API #include "llvm/Analysis/MLModelRunner.h" -#include "llvm/Analysis/Utils/TFUtils.h" +#include "llvm/Analysis/TensorSpec.h" +#include "llvm/Config/llvm-config.h" namespace llvm { /// A pseudo model runner. We use it to store feature values when collecting /// logs for the default policy, in 'development' mode, but never ask it to @@ -34,10 +30,6 @@ private: void *evaluateUntyped() override { llvm_unreachable("We shouldn't call run on this model runner."); } - void *getTensorUntyped(size_t Index) override; - - std::vector> ValuesBuffer; }; } // namespace llvm -#endif // defined(LLVM_HAVE_TF_API) #endif // LLVM_ANALYSIS_NOINFERENCEMODELRUNNER_H diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h index 385fa5422926..56faa20c4c6e 100644 --- a/llvm/include/llvm/Analysis/ObjCARCUtil.h +++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h @@ -35,7 +35,7 @@ inline bool hasAttachedCallOpBundle(const CallBase *CB) { // functions. return !CB->getFunctionType()->getReturnType()->isVoidTy() && CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall) - .hasValue(); + .has_value(); } /// This function returns operand bundle clang_arc_attachedcall's argument, @@ -59,7 +59,7 @@ inline bool isRetainOrClaimRV(ARCInstKind Kind) { /// or UnsafeClaimRV. inline ARCInstKind getAttachedARCFunctionKind(const CallBase *CB) { Optional Fn = getAttachedARCFunction(CB); - if (!Fn.hasValue()) + if (!Fn) return ARCInstKind::None; auto FnClass = GetFunctionClass(*Fn); assert(isRetainOrClaimRV(FnClass) && "unexpected ARC runtime function"); diff --git a/llvm/include/llvm/Analysis/OverflowInstAnalysis.h b/llvm/include/llvm/Analysis/OverflowInstAnalysis.h index 7523fb9392cd..761d20f17a8b 100644 --- a/llvm/include/llvm/Analysis/OverflowInstAnalysis.h +++ b/llvm/include/llvm/Analysis/OverflowInstAnalysis.h @@ -14,11 +14,9 @@ #ifndef LLVM_ANALYSIS_OVERFLOWINSTANALYSIS_H #define LLVM_ANALYSIS_OVERFLOWINSTANALYSIS_H -#include "llvm/IR/InstrTypes.h" - namespace llvm { -class Value; class Use; +class Value; /// Match one of the patterns up to the select/logic op: /// %Op0 = icmp ne i4 %X, 0 diff --git a/llvm/include/llvm/Analysis/PhiValues.h b/llvm/include/llvm/Analysis/PhiValues.h index c0e91c8b0bdf..ecbb8874b378 100644 --- a/llvm/include/llvm/Analysis/PhiValues.h +++ b/llvm/include/llvm/Analysis/PhiValues.h @@ -22,7 +22,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" diff --git a/llvm/include/llvm/Analysis/PostDominators.h b/llvm/include/llvm/Analysis/PostDominators.h index 296110d8d03b..4383113c8db1 100644 --- a/llvm/include/llvm/Analysis/PostDominators.h +++ b/llvm/include/llvm/Analysis/PostDominators.h @@ -102,10 +102,7 @@ template <> struct GraphTraits } static nodes_iterator nodes_begin(PostDominatorTree *N) { - if (getEntryNode(N)) - return df_begin(getEntryNode(N)); - else - return df_end(getEntryNode(N)); + return df_begin(getEntryNode(N)); } static nodes_iterator nodes_end(PostDominatorTree *N) { diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h index 886800d8a0f5..773784ac418c 100644 --- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h @@ -170,11 +170,11 @@ public: uint64_t getOrCompColdCountThreshold() const; /// Returns HotCountThreshold if set. uint64_t getHotCountThreshold() const { - return HotCountThreshold.getValueOr(0); + return HotCountThreshold.value_or(0); } /// Returns ColdCountThreshold if set. uint64_t getColdCountThreshold() const { - return ColdCountThreshold.getValueOr(0); + return ColdCountThreshold.value_or(0); } private: diff --git a/llvm/include/llvm/Analysis/PtrUseVisitor.h b/llvm/include/llvm/Analysis/PtrUseVisitor.h index 78e9251da627..86206b2d5e9f 100644 --- a/llvm/include/llvm/Analysis/PtrUseVisitor.h +++ b/llvm/include/llvm/Analysis/PtrUseVisitor.h @@ -26,22 +26,15 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InstVisitor.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Use.h" -#include "llvm/IR/User.h" -#include "llvm/Support/Casting.h" -#include #include #include namespace llvm { +class DataLayout; +class Use; namespace detail { diff --git a/llvm/include/llvm/Analysis/RegionInfo.h b/llvm/include/llvm/Analysis/RegionInfo.h index f93081d6f51d..612b977f1ffa 100644 --- a/llvm/include/llvm/Analysis/RegionInfo.h +++ b/llvm/include/llvm/Analysis/RegionInfo.h @@ -42,11 +42,9 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" -#include "llvm/Support/raw_ostream.h" #include #include #include @@ -58,6 +56,7 @@ namespace llvm { +class BasicBlock; class DominanceFrontier; class Loop; class LoopInfo; @@ -67,6 +66,7 @@ template class RegionBase; class RegionInfo; template class RegionInfoBase; class RegionNode; +class raw_ostream; // Class to be specialized for different users of RegionInfo // (i.e. BasicBlocks or MachineBasicBlocks). This is only to avoid needing to @@ -242,7 +242,7 @@ public: /// /// You can obtain more examples by either calling /// -/// "opt -regions -analyze anyprogram.ll" +/// "opt -passes='print' anyprogram.ll" /// or /// "opt -view-regions-only anyprogram.ll" /// diff --git a/llvm/include/llvm/Analysis/RegionInfoImpl.h b/llvm/include/llvm/Analysis/RegionInfoImpl.h index b694effb2229..561702db3790 100644 --- a/llvm/include/llvm/Analysis/RegionInfoImpl.h +++ b/llvm/include/llvm/Analysis/RegionInfoImpl.h @@ -15,8 +15,6 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/RegionInfo.h" @@ -24,7 +22,6 @@ #include "llvm/Config/llvm-config.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include #include #include @@ -37,6 +34,7 @@ #define DEBUG_TYPE "region" namespace llvm { +class raw_ostream; //===----------------------------------------------------------------------===// /// RegionBase Implementation diff --git a/llvm/include/llvm/Analysis/RegionIterator.h b/llvm/include/llvm/Analysis/RegionIterator.h index fecb28725dcc..ba28b1b902ea 100644 --- a/llvm/include/llvm/Analysis/RegionIterator.h +++ b/llvm/include/llvm/Analysis/RegionIterator.h @@ -15,7 +15,6 @@ #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/Analysis/RegionInfo.h" -#include "llvm/IR/CFG.h" #include #include #include @@ -23,6 +22,7 @@ namespace llvm { class BasicBlock; +class RegionInfo; //===----------------------------------------------------------------------===// /// Hierarchical RegionNode successor iterator. diff --git a/llvm/include/llvm/Analysis/RegionPass.h b/llvm/include/llvm/Analysis/RegionPass.h index 5c7fa5f56693..dd5e6a1a3b24 100644 --- a/llvm/include/llvm/Analysis/RegionPass.h +++ b/llvm/include/llvm/Analysis/RegionPass.h @@ -15,7 +15,6 @@ #ifndef LLVM_ANALYSIS_REGIONPASS_H #define LLVM_ANALYSIS_REGIONPASS_H -#include "llvm/Analysis/RegionInfo.h" #include "llvm/IR/LegacyPassManagers.h" #include "llvm/Pass.h" #include @@ -23,6 +22,8 @@ namespace llvm { class Function; class RGPassManager; +class Region; +class RegionInfo; //===----------------------------------------------------------------------===// /// A pass that runs on each Region in a function. diff --git a/llvm/include/llvm/Analysis/RegionPrinter.h b/llvm/include/llvm/Analysis/RegionPrinter.h index 154ac35c486a..501a5406236e 100644 --- a/llvm/include/llvm/Analysis/RegionPrinter.h +++ b/llvm/include/llvm/Analysis/RegionPrinter.h @@ -14,6 +14,9 @@ #ifndef LLVM_ANALYSIS_REGIONPRINTER_H #define LLVM_ANALYSIS_REGIONPRINTER_H +#include "llvm/Analysis/DOTGraphTraitsPass.h" +#include "llvm/Analysis/RegionInfo.h" + namespace llvm { class FunctionPass; class Function; @@ -24,6 +27,13 @@ namespace llvm { FunctionPass *createRegionPrinterPass(); FunctionPass *createRegionOnlyPrinterPass(); + template <> + struct DOTGraphTraits : public DefaultDOTGraphTraits { + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + + std::string getNodeLabel(RegionNode *Node, RegionNode *Graph); + }; + #ifndef NDEBUG /// Open a viewer to display the GraphViz vizualization of the analysis /// result. diff --git a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h index 1bf2e853980c..bf1aaca2adbb 100644 --- a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h +++ b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h @@ -15,11 +15,12 @@ #define LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H #include "llvm/Analysis/MLModelRunner.h" +#include "llvm/Analysis/TensorSpec.h" +#include "llvm/Support/ErrorHandling.h" #include #include -using namespace llvm; namespace llvm { /// ReleaseModeModelRunner - production mode implementation of the @@ -30,21 +31,20 @@ public: /// FeatureNames' type should be an indexed collection of std::string, like /// std::array or std::vector, that has a size() method. template - ReleaseModeModelRunner(LLVMContext &Ctx, const FType &FeatureNames, + ReleaseModeModelRunner(LLVMContext &Ctx, const FType &InputSpec, StringRef DecisionName, StringRef FeedPrefix = "feed_", StringRef FetchPrefix = "fetch_") - : MLModelRunner(Ctx, MLModelRunner::Kind::Release), + : MLModelRunner(Ctx, MLModelRunner::Kind::Release, InputSpec.size()), CompiledModel(std::make_unique()) { assert(CompiledModel && "The CompiledModel should be valid"); - const size_t FeatureCount = FeatureNames.size(); - FeatureIndices.resize(FeatureCount); - - for (size_t I = 0; I < FeatureCount; ++I) { + for (size_t I = 0; I < InputSpec.size(); ++I) { const int Index = - CompiledModel->LookupArgIndex(FeedPrefix.str() + FeatureNames[I]); - assert(Index >= 0 && "Cannot find Feature in inlining model"); - FeatureIndices[I] = Index; + CompiledModel->LookupArgIndex(FeedPrefix.str() + InputSpec[I].name()); + void *Buffer = nullptr; + if (Index >= 0) + Buffer = CompiledModel->arg_data(Index); + setUpBufferForTensor(I, InputSpec[I], Buffer); } ResultIndex = CompiledModel->LookupResultIndex(FetchPrefix.str() + @@ -64,15 +64,27 @@ private: return CompiledModel->result_data(ResultIndex); } - void *getTensorUntyped(size_t Index) override { - return reinterpret_cast( - CompiledModel->arg_data(FeatureIndices[Index])); - } - - std::vector FeatureIndices; int32_t ResultIndex = -1; std::unique_ptr CompiledModel; }; + +/// A mock class satisfying the interface expected by ReleaseModeModelRunner for +/// its `TGen` parameter. Useful to avoid conditional compilation complexity, as +/// a compile-time replacement for a real AOT-ed model. +class NoopSavedModelImpl final { +#define NOOP_MODEL_ERRMSG \ + "The mock AOT-ed saved model is a compile-time stub and should not be " \ + "called." + +public: + NoopSavedModelImpl() = default; + int LookupArgIndex(const std::string &) { llvm_unreachable(NOOP_MODEL_ERRMSG); } + int LookupResultIndex(const std::string &) { llvm_unreachable(NOOP_MODEL_ERRMSG); } + void Run() { llvm_unreachable(NOOP_MODEL_ERRMSG); } + void *result_data(int) { llvm_unreachable(NOOP_MODEL_ERRMSG); } + void *arg_data(int) { llvm_unreachable(NOOP_MODEL_ERRMSG); } +#undef NOOP_MODEL_ERRMSG +}; } // namespace llvm #endif // LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h index dc2efeafb568..0c5b566f60a4 100644 --- a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h @@ -11,11 +11,11 @@ #include "llvm/ADT/StringSet.h" #include "llvm/Analysis/InlineAdvisor.h" -#include "llvm/IR/LLVMContext.h" namespace llvm { class CallBase; class Function; +class LLVMContext; class Module; struct CallSiteFormat { @@ -53,10 +53,12 @@ struct ReplayInlinerSettings { /// Get call site location as a string with the given format std::string formatCallSiteLocation(DebugLoc DLoc, const CallSiteFormat &Format); -std::unique_ptr getReplayInlineAdvisor( - Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, - std::unique_ptr OriginalAdvisor, - const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks); +std::unique_ptr +getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, + LLVMContext &Context, + std::unique_ptr OriginalAdvisor, + const ReplayInlinerSettings &ReplaySettings, + bool EmitRemarks, InlineContext IC); /// Replay inline advisor that uses optimization remarks from inlining of /// previous build to guide current inlining. This is useful for inliner tuning. @@ -66,7 +68,7 @@ public: LLVMContext &Context, std::unique_ptr OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, - bool EmitRemarks); + bool EmitRemarks, InlineContext IC); std::unique_ptr getAdviceImpl(CallBase &CB) override; bool areReplayRemarksLoaded() const { return HasReplayRemarks; } diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index b16aa7017719..de1cc299f062 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -31,18 +31,12 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/ConstantRange.h" -#include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Compiler.h" -#include #include #include #include @@ -50,12 +44,14 @@ namespace llvm { +class OverflowingBinaryOperator; class AssumptionCache; class BasicBlock; class Constant; class ConstantInt; class DataLayout; class DominatorTree; +class Function; class GEPOperator; class Instruction; class LLVMContext; @@ -71,6 +67,8 @@ class Type; class Value; enum SCEVTypes : unsigned short; +extern bool VerifySCEV; + /// This class represents an analyzed expression in the program. These are /// opaque objects that the client is not allowed to do much with directly. /// @@ -222,7 +220,7 @@ class SCEVPredicate : public FoldingSetNode { FoldingSetNodeIDRef FastID; public: - enum SCEVPredicateKind { P_Union, P_Equal, P_Wrap }; + enum SCEVPredicateKind { P_Union, P_Compare, P_Wrap }; protected: SCEVPredicateKind Kind; @@ -249,10 +247,6 @@ public: /// Prints a textual representation of this predicate with an indentation of /// \p Depth. virtual void print(raw_ostream &OS, unsigned Depth = 0) const = 0; - - /// Returns the SCEV to which this predicate applies, or nullptr if this is - /// a SCEVUnionPredicate. - virtual const SCEV *getExpr() const = 0; }; inline raw_ostream &operator<<(raw_ostream &OS, const SCEVPredicate &P) { @@ -279,32 +273,35 @@ struct FoldingSetTrait : DefaultFoldingSetTrait { } }; -/// This class represents an assumption that two SCEV expressions are equal, -/// and this can be checked at run-time. -class SCEVEqualPredicate final : public SCEVPredicate { - /// We assume that LHS == RHS. +/// This class represents an assumption that the expression LHS Pred RHS +/// evaluates to true, and this can be checked at run-time. +class SCEVComparePredicate final : public SCEVPredicate { + /// We assume that LHS Pred RHS is true. + const ICmpInst::Predicate Pred; const SCEV *LHS; const SCEV *RHS; public: - SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEV *LHS, - const SCEV *RHS); + SCEVComparePredicate(const FoldingSetNodeIDRef ID, + const ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); /// Implementation of the SCEVPredicate interface bool implies(const SCEVPredicate *N) const override; void print(raw_ostream &OS, unsigned Depth = 0) const override; bool isAlwaysTrue() const override; - const SCEV *getExpr() const override; - /// Returns the left hand side of the equality. + ICmpInst::Predicate getPredicate() const { return Pred; } + + /// Returns the left hand side of the predicate. const SCEV *getLHS() const { return LHS; } - /// Returns the right hand side of the equality. + /// Returns the right hand side of the predicate. const SCEV *getRHS() const { return RHS; } /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEVPredicate *P) { - return P->getKind() == P_Equal; + return P->getKind() == P_Compare; } }; @@ -396,7 +393,7 @@ public: IncrementWrapFlags getFlags() const { return Flags; } /// Implementation of the SCEVPredicate interface - const SCEV *getExpr() const override; + const SCEVAddRecExpr *getExpr() const; bool implies(const SCEVPredicate *N) const override; void print(raw_ostream &OS, unsigned Depth = 0) const override; bool isAlwaysTrue() const override; @@ -421,28 +418,20 @@ private: /// Vector with references to all predicates in this union. SmallVector Preds; - /// Maps SCEVs to predicates for quick look-ups. - PredicateMap SCEVToPreds; + /// Adds a predicate to this union. + void add(const SCEVPredicate *N); public: - SCEVUnionPredicate(); + SCEVUnionPredicate(ArrayRef Preds); const SmallVectorImpl &getPredicates() const { return Preds; } - /// Adds a predicate to this union. - void add(const SCEVPredicate *N); - - /// Returns a reference to a vector containing all predicates which apply to - /// \p Expr. - ArrayRef getPredicatesForExpr(const SCEV *Expr); - /// Implementation of the SCEVPredicate interface bool isAlwaysTrue() const override; bool implies(const SCEVPredicate *N) const override; void print(raw_ostream &OS, unsigned Depth) const override; - const SCEV *getExpr() const override; /// We estimate the complexity of a union predicate as the size number of /// predicates in the union. @@ -556,6 +545,10 @@ public: /// Return true if the SCEV expression contains an undef value. bool containsUndefs(const SCEV *S) const; + /// Return true if the SCEV expression contains a Value that has been + /// optimised out and is now a nullptr. + bool containsErasedValue(const SCEV *S) const; + /// Return a SCEV expression for the full generality of the specified /// expression. const SCEV *getSCEV(Value *V); @@ -885,7 +878,7 @@ public: /// the answer to be correct. Predicates can be checked with run-time /// checks and can be used to perform loop versioning. const SCEV *getPredicatedBackedgeTakenCount(const Loop *L, - SCEVUnionPredicate &Predicates); + SmallVector &Predicates); /// When successful, this returns a SCEVConstant that is greater than or equal /// to (i.e. a "conservative over-approximation") of the value returend by @@ -1166,6 +1159,8 @@ public: } const SCEVPredicate *getEqualPredicate(const SCEV *LHS, const SCEV *RHS); + const SCEVPredicate *getComparePredicate(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); const SCEVPredicate * getWrapPredicate(const SCEVAddRecExpr *AR, @@ -1173,7 +1168,7 @@ public: /// Re-writes the SCEV according to the Predicates in \p A. const SCEV *rewriteUsingPredicate(const SCEV *S, const Loop *L, - SCEVUnionPredicate &A); + const SCEVPredicate &A); /// Tries to convert the \p S expression to an AddRec expression, /// adding additional predicates to \p Preds as required. const SCEVAddRecExpr *convertSCEVToAddRecWithPredicates( @@ -1256,30 +1251,11 @@ private: HasRecMapType HasRecMap; /// The type for ExprValueMap. - using ValueOffsetPair = std::pair; - using ValueOffsetPairSetVector = SmallSetVector; - using ExprValueMapType = DenseMap; + using ValueSetVector = SmallSetVector; + using ExprValueMapType = DenseMap; /// ExprValueMap -- This map records the original values from which /// the SCEV expr is generated from. - /// - /// We want to represent the mapping as SCEV -> ValueOffsetPair instead - /// of SCEV -> Value: - /// Suppose we know S1 expands to V1, and - /// S1 = S2 + C_a - /// S3 = S2 + C_b - /// where C_a and C_b are different SCEVConstants. Then we'd like to - /// expand S3 as V1 - C_a + C_b instead of expanding S2 literally. - /// It is helpful when S2 is a complex SCEV expr. - /// - /// In order to do that, we represent ExprValueMap as a mapping from - /// SCEV to ValueOffsetPair. We will save both S1->{V1, 0} and - /// S2->{V1, C_a} into the map when we create SCEV for V1. When S3 - /// is expanded, it will first expand S2 to V1 - C_a because of - /// S2->{V1, C_a} in the map, then expand S3 to V1 - C_a + C_b. - /// - /// Note: S->{V, Offset} in the ExprValueMap means S can be expanded - /// to V - Offset. ExprValueMapType ExprValueMap; /// The type for ValueExprMap. @@ -1310,7 +1286,7 @@ private: DenseMap MinTrailingZerosCache; /// Return the Value set from which the SCEV expr is generated. - ValueOffsetPairSetVector *getSCEVValues(const SCEV *S); + ArrayRef getSCEVValues(const SCEV *S); /// Private helper method for the GetMinTrailingZeros method uint32_t GetMinTrailingZerosImpl(const SCEV *S); @@ -1369,17 +1345,17 @@ private: PoisoningVH ExitingBlock; const SCEV *ExactNotTaken; const SCEV *MaxNotTaken; - std::unique_ptr Predicate; + SmallPtrSet Predicates; explicit ExitNotTakenInfo(PoisoningVH ExitingBlock, const SCEV *ExactNotTaken, const SCEV *MaxNotTaken, - std::unique_ptr Predicate) + const SmallPtrSet &Predicates) : ExitingBlock(ExitingBlock), ExactNotTaken(ExactNotTaken), - MaxNotTaken(ExactNotTaken), Predicate(std::move(Predicate)) {} + MaxNotTaken(ExactNotTaken), Predicates(Predicates) {} bool hasAlwaysTruePredicate() const { - return !Predicate || Predicate->isAlwaysTrue(); + return Predicates.empty(); } }; @@ -1452,7 +1428,7 @@ private: /// vector, this information can contain them and therefore a /// SCEVPredicate argument should be added to getExact. const SCEV *getExact(const Loop *L, ScalarEvolution *SE, - SCEVUnionPredicate *Predicates = nullptr) const; + SmallVector *Predicates = nullptr) const; /// Return the number of times this loop exit may fall through to the back /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via @@ -1599,9 +1575,17 @@ private: ConstantRange getRangeForUnknownRecurrence(const SCEVUnknown *U); /// We know that there is no SCEV for the specified value. Analyze the - /// expression. + /// expression recursively. const SCEV *createSCEV(Value *V); + /// We know that there is no SCEV for the specified value. Create a new SCEV + /// for \p V iteratively. + const SCEV *createSCEVIter(Value *V); + /// Collect operands of \p V for which SCEV expressions should be constructed + /// first. Returns a SCEV directly if it can be constructed trivially for \p + /// V. + const SCEV *getOperandsToCreate(Value *V, SmallVectorImpl &Ops); + /// Provide the special handling we need to analyze PHI SCEVs. const SCEV *createNodeForPHI(PHINode *PN); @@ -1619,8 +1603,22 @@ private: /// is either a select instruction or a phi node). \p I is the instruction /// being processed, and it is assumed equivalent to "Cond ? TrueVal : /// FalseVal". - const SCEV *createNodeForSelectOrPHI(Instruction *I, Value *Cond, - Value *TrueVal, Value *FalseVal); + const SCEV *createNodeForSelectOrPHIInstWithICmpInstCond(Instruction *I, + ICmpInst *Cond, + Value *TrueVal, + Value *FalseVal); + + /// See if we can model this select-like instruction via umin_seq expression. + const SCEV *createNodeForSelectOrPHIViaUMinSeq(Value *I, Value *Cond, + Value *TrueVal, + Value *FalseVal); + + /// Given a value \p V, which is a select-like instruction (currently this is + /// either a select instruction or a phi node), which is assumed equivalent to + /// Cond ? TrueVal : FalseVal + /// see if we can model it as a SCEV expression. + const SCEV *createNodeForSelectOrPHI(Value *V, Value *Cond, Value *TrueVal, + Value *FalseVal); /// Provide the special handling we need to analyze GEP SCEVs. const SCEV *createNodeForGEP(GEPOperator *GEP); @@ -2097,6 +2095,11 @@ private: /// `UniqueSCEVs`. Return if found, else nullptr. SCEV *findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef Ops); + /// Get reachable blocks in this function, making limited use of SCEV + /// reasoning about conditions. + void getReachableBlocks(SmallPtrSetImpl &Reachable, + Function &F); + FoldingSet UniqueSCEVs; FoldingSet UniquePreds; BumpPtrAllocator SCEVAllocator; @@ -2182,7 +2185,7 @@ class PredicatedScalarEvolution { public: PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L); - const SCEVUnionPredicate &getUnionPredicate() const; + const SCEVPredicate &getPredicate() const; /// Returns the SCEV expression of V, in the context of the current SCEV /// predicate. The order of transformations applied on the expression of V @@ -2251,7 +2254,7 @@ private: /// The SCEVPredicate that forms our context. We will rewrite all /// expressions assuming that this predicate true. - SCEVUnionPredicate Preds; + std::unique_ptr Preds; /// Marks the version of the SCEV predicate used. When rewriting a SCEV /// expression we mark it with the version of the predicate. We use this to diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h index ebd427354cee..15e27283021c 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h @@ -14,13 +14,14 @@ #define LLVM_ANALYSIS_SCALAREVOLUTIONALIASANALYSIS_H #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/Pass.h" namespace llvm { +class Function; +class ScalarEvolution; +class SCEV; + /// A simple alias analysis implementation that uses ScalarEvolution to answer /// queries. class SCEVAAResult : public AAResultBase { diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h index cd8e5fab6766..b29854cddc66 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -14,13 +14,11 @@ #define LLVM_ANALYSIS_SCALAREVOLUTIONEXPRESSIONS_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -31,9 +29,11 @@ namespace llvm { class APInt; class Constant; +class ConstantInt; class ConstantRange; class Loop; class Type; +class Value; enum SCEVTypes : unsigned short { // These should be ordered in terms of increasing complexity to make the @@ -699,8 +699,11 @@ public: case scUMinExpr: case scSequentialUMinExpr: case scAddRecExpr: - for (const auto *Op : cast(S)->operands()) + for (const auto *Op : cast(S)->operands()) { push(Op); + if (Visitor.isDone()) + break; + } continue; case scUDivExpr: { const SCEVUDivExpr *UDiv = cast(S); diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h b/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h index 6ab92a3a977f..da420ff1e6d2 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h @@ -35,7 +35,7 @@ #ifndef LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H #define LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallPtrSet.h" namespace llvm { diff --git a/llvm/include/llvm/Analysis/ScalarFuncs.def b/llvm/include/llvm/Analysis/ScalarFuncs.def new file mode 100644 index 000000000000..2ed9be538091 --- /dev/null +++ b/llvm/include/llvm/Analysis/ScalarFuncs.def @@ -0,0 +1,117 @@ +//===-- ScalarFuncs.def - Library information ----------*- C++ -*----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This .def file creates mapping from standard IEEE math functions +// their corresponding entries in the IBM MASS (scalar) library. +// LLVM intrinsic math functions will be handled in PPCISelLowing to +// allow existing optimizations like pow(x,0.5) --> sqrt(x). + +#if defined(TLI_DEFINE_SCALAR_MASS_FUNCS) +#define TLI_DEFINE_SCALAR_MASS_FUNC(SCAL, MASSENTRY) {SCAL, MASSENTRY}, +#endif + +TLI_DEFINE_SCALAR_MASS_FUNC("acosf", "__xl_acosf") +TLI_DEFINE_SCALAR_MASS_FUNC("__acosf_finite", "__xl_acosf") +TLI_DEFINE_SCALAR_MASS_FUNC("acos", "__xl_acos") +TLI_DEFINE_SCALAR_MASS_FUNC("__acos_finite", "__xl_acos") + +TLI_DEFINE_SCALAR_MASS_FUNC("acoshf", "__xl_acoshf") +TLI_DEFINE_SCALAR_MASS_FUNC("__acoshf_finite", "__xl_acoshf") +TLI_DEFINE_SCALAR_MASS_FUNC("acosh", "__xl_acosh") +TLI_DEFINE_SCALAR_MASS_FUNC("__acosh_finite", "__xl_acosh") + +TLI_DEFINE_SCALAR_MASS_FUNC("asinf", "__xl_asinf") +TLI_DEFINE_SCALAR_MASS_FUNC("__asinf_finite", "__xl_asinf") +TLI_DEFINE_SCALAR_MASS_FUNC("asin", "__xl_asin") +TLI_DEFINE_SCALAR_MASS_FUNC("__asin_finite", "__xl_asin") + +TLI_DEFINE_SCALAR_MASS_FUNC("asinhf", "__xl_asinhf") +TLI_DEFINE_SCALAR_MASS_FUNC("asinh", "__xl_asinh") + +TLI_DEFINE_SCALAR_MASS_FUNC("atanf", "__xl_atanf") +TLI_DEFINE_SCALAR_MASS_FUNC("atan", "__xl_atan") + +TLI_DEFINE_SCALAR_MASS_FUNC("atan2f", "__xl_atan2f") +TLI_DEFINE_SCALAR_MASS_FUNC("__atan2f_finite", "__xl_atan2f") +TLI_DEFINE_SCALAR_MASS_FUNC("atan2", "__xl_atan2") +TLI_DEFINE_SCALAR_MASS_FUNC("__atan2_finite", "__xl_atan2") + +TLI_DEFINE_SCALAR_MASS_FUNC("atanhf", "__xl_atanhf") +TLI_DEFINE_SCALAR_MASS_FUNC("__atanhf_finite", "__xl_atanhf") +TLI_DEFINE_SCALAR_MASS_FUNC("atanh", "__xl_atanh") +TLI_DEFINE_SCALAR_MASS_FUNC("__atanh_finite", "__xl_atanh") + +TLI_DEFINE_SCALAR_MASS_FUNC("cbrtf", "__xl_cbrtf") +TLI_DEFINE_SCALAR_MASS_FUNC("cbrt", "__xl_cbrt") + +TLI_DEFINE_SCALAR_MASS_FUNC("cosf", "__xl_cosf") +TLI_DEFINE_SCALAR_MASS_FUNC("cos", "__xl_cos") + +TLI_DEFINE_SCALAR_MASS_FUNC("coshf", "__xl_coshf") +TLI_DEFINE_SCALAR_MASS_FUNC("__coshf_finite", "__xl_coshf") +TLI_DEFINE_SCALAR_MASS_FUNC("cosh", "__xl_cosh") +TLI_DEFINE_SCALAR_MASS_FUNC("__cosh_finite", "__xl_cosh") + +TLI_DEFINE_SCALAR_MASS_FUNC("erff", "__xl_erff") +TLI_DEFINE_SCALAR_MASS_FUNC("erf", "__xl_erf") + +TLI_DEFINE_SCALAR_MASS_FUNC("erfcf", "__xl_erfcf") +TLI_DEFINE_SCALAR_MASS_FUNC("erfc", "__xl_erfc") + +TLI_DEFINE_SCALAR_MASS_FUNC("expf", "__xl_expf") +TLI_DEFINE_SCALAR_MASS_FUNC("__expf_finite", "__xl_expf") +TLI_DEFINE_SCALAR_MASS_FUNC("exp", "__xl_exp") +TLI_DEFINE_SCALAR_MASS_FUNC("__exp_finite", "__xl_exp") + +TLI_DEFINE_SCALAR_MASS_FUNC("expm1f", "__xl_expm1f") +TLI_DEFINE_SCALAR_MASS_FUNC("expm1", "__xl_expm1") + +TLI_DEFINE_SCALAR_MASS_FUNC("hypotf", "__xl_hypotf") +TLI_DEFINE_SCALAR_MASS_FUNC("hypot", "__xl_hypot") + +TLI_DEFINE_SCALAR_MASS_FUNC("lgammaf", "__xl_lgammaf") +TLI_DEFINE_SCALAR_MASS_FUNC("lgamma", "__xl_lgamma") + +TLI_DEFINE_SCALAR_MASS_FUNC("logf", "__xl_logf") +TLI_DEFINE_SCALAR_MASS_FUNC("__logf_finite", "__xl_logf") +TLI_DEFINE_SCALAR_MASS_FUNC("log", "__xl_log") +TLI_DEFINE_SCALAR_MASS_FUNC("__log_finite", "__xl_log") + +TLI_DEFINE_SCALAR_MASS_FUNC("log10f", "__xl_log10f") +TLI_DEFINE_SCALAR_MASS_FUNC("__log10f_finite", "__xl_log10f") +TLI_DEFINE_SCALAR_MASS_FUNC("log10", "__xl_log10") +TLI_DEFINE_SCALAR_MASS_FUNC("__log10_finite", "__xl_log10") + +TLI_DEFINE_SCALAR_MASS_FUNC("log1pf", "__xl_log1pf") +TLI_DEFINE_SCALAR_MASS_FUNC("log1p", "__xl_log1p") + +TLI_DEFINE_SCALAR_MASS_FUNC("powf", "__xl_powf") +TLI_DEFINE_SCALAR_MASS_FUNC("__powf_finite", "__xl_powf") +TLI_DEFINE_SCALAR_MASS_FUNC("pow", "__xl_pow") +TLI_DEFINE_SCALAR_MASS_FUNC("__pow_finite", "__xl_pow") + +TLI_DEFINE_SCALAR_MASS_FUNC("rsqrt", "__xl_rsqrt") + +TLI_DEFINE_SCALAR_MASS_FUNC("sinf", "__xl_sinf") +TLI_DEFINE_SCALAR_MASS_FUNC("sin", "__xl_sin") + +TLI_DEFINE_SCALAR_MASS_FUNC("sinhf", "__xl_sinhf") +TLI_DEFINE_SCALAR_MASS_FUNC("__sinhf_finite", "__xl_sinhf") +TLI_DEFINE_SCALAR_MASS_FUNC("sinh", "__xl_sinh") +TLI_DEFINE_SCALAR_MASS_FUNC("__sinh_finite", "__xl_sinh") + +TLI_DEFINE_SCALAR_MASS_FUNC("sqrt", "__xl_sqrt") + +TLI_DEFINE_SCALAR_MASS_FUNC("tanf", "__xl_tanf") +TLI_DEFINE_SCALAR_MASS_FUNC("tan", "__xl_tan") + +TLI_DEFINE_SCALAR_MASS_FUNC("tanhf", "__xl_tanhf") +TLI_DEFINE_SCALAR_MASS_FUNC("tanh", "__xl_tanh") + +#undef TLI_DEFINE_SCALAR_MASS_FUNCS +#undef TLI_DEFINE_SCALAR_MASS_FUNC diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h index 6eb6d5518a41..428238c5fa0b 100644 --- a/llvm/include/llvm/Analysis/SparsePropagation.h +++ b/llvm/include/llvm/Analysis/SparsePropagation.h @@ -15,6 +15,7 @@ #define LLVM_ANALYSIS_SPARSEPROPAGATION_H #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" #include diff --git a/llvm/include/llvm/Analysis/StackLifetime.h b/llvm/include/llvm/Analysis/StackLifetime.h index 239aec4e258b..7fd88362276a 100644 --- a/llvm/include/llvm/Analysis/StackLifetime.h +++ b/llvm/include/llvm/Analysis/StackLifetime.h @@ -14,10 +14,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/raw_ostream.h" -#include #include namespace llvm { @@ -26,6 +24,7 @@ class AllocaInst; class BasicBlock; class Function; class Instruction; +class IntrinsicInst; /// Compute live ranges of allocas. /// Live ranges are represented as sets of "interesting" instructions, which are diff --git a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h index cfc1e20255d1..e6e3efbe0fcb 100644 --- a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h @@ -16,18 +16,18 @@ #ifndef LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H #define LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/LoopInfo.h" #include #include #include +#include namespace llvm { class BasicBlock; class DominatorTree; +class Instruction; +class LoopInfo; class PostDominatorTree; using ConstBlockSet = SmallPtrSet; diff --git a/llvm/include/llvm/Analysis/SyntheticCountsUtils.h b/llvm/include/llvm/Analysis/SyntheticCountsUtils.h index f9bac739cee6..458b599f2937 100644 --- a/llvm/include/llvm/Analysis/SyntheticCountsUtils.h +++ b/llvm/include/llvm/Analysis/SyntheticCountsUtils.h @@ -13,7 +13,7 @@ #ifndef LLVM_ANALYSIS_SYNTHETICCOUNTSUTILS_H #define LLVM_ANALYSIS_SYNTHETICCOUNTSUTILS_H -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Support/ScaledNumber.h" diff --git a/llvm/include/llvm/Analysis/TargetFolder.h b/llvm/include/llvm/Analysis/TargetFolder.h index 1df0530e40e6..3a7218b10b97 100644 --- a/llvm/include/llvm/Analysis/TargetFolder.h +++ b/llvm/include/llvm/Analysis/TargetFolder.h @@ -21,12 +21,14 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/IRBuilderFolder.h" +#include "llvm/IR/Operator.h" namespace llvm { +class Constant; class DataLayout; +class Type; /// TargetFolder - Create constants with target dependent folding. class TargetFolder final : public IRBuilderFolder { @@ -48,31 +50,45 @@ public: // Return an existing value or a constant if the operation can be simplified. // Otherwise return nullptr. //===--------------------------------------------------------------------===// - Value *FoldAdd(Value *LHS, Value *RHS, bool HasNUW = false, - bool HasNSW = false) const override { + + Value *FoldBinOp(Instruction::BinaryOps Opc, Value *LHS, + Value *RHS) const override { auto *LC = dyn_cast(LHS); auto *RC = dyn_cast(RHS); if (LC && RC) - return Fold(ConstantExpr::getAdd(LC, RC, HasNUW, HasNSW)); + return Fold(ConstantExpr::get(Opc, LC, RC)); return nullptr; } - Value *FoldAnd(Value *LHS, Value *RHS) const override { + Value *FoldExactBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, + bool IsExact) const override { auto *LC = dyn_cast(LHS); auto *RC = dyn_cast(RHS); if (LC && RC) - return Fold(ConstantExpr::getAnd(LC, RC)); + return Fold(ConstantExpr::get( + Opc, LC, RC, IsExact ? PossiblyExactOperator::IsExact : 0)); return nullptr; } - Value *FoldOr(Value *LHS, Value *RHS) const override { + Value *FoldNoWrapBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, + bool HasNUW, bool HasNSW) const override { auto *LC = dyn_cast(LHS); auto *RC = dyn_cast(RHS); - if (LC && RC) - return Fold(ConstantExpr::getOr(LC, RC)); + if (LC && RC) { + unsigned Flags = 0; + if (HasNUW) + Flags |= OverflowingBinaryOperator::NoUnsignedWrap; + if (HasNSW) + Flags |= OverflowingBinaryOperator::NoSignedWrap; + return Fold(ConstantExpr::get(Opc, LC, RC, Flags)); + } return nullptr; } + Value *FoldBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, + FastMathFlags FMF) const override { + return FoldBinOp(Opc, LHS, RHS); + } Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override { auto *LC = dyn_cast(LHS); auto *RC = dyn_cast(RHS); @@ -105,82 +121,56 @@ public: return nullptr; } - //===--------------------------------------------------------------------===// - // Binary Operators - //===--------------------------------------------------------------------===// - - Constant *CreateFAdd(Constant *LHS, Constant *RHS) const override { - return Fold(ConstantExpr::getFAdd(LHS, RHS)); - } - Constant *CreateSub(Constant *LHS, Constant *RHS, - bool HasNUW = false, bool HasNSW = false) const override { - return Fold(ConstantExpr::getSub(LHS, RHS, HasNUW, HasNSW)); - } - Constant *CreateFSub(Constant *LHS, Constant *RHS) const override { - return Fold(ConstantExpr::getFSub(LHS, RHS)); - } - Constant *CreateMul(Constant *LHS, Constant *RHS, - bool HasNUW = false, bool HasNSW = false) const override { - return Fold(ConstantExpr::getMul(LHS, RHS, HasNUW, HasNSW)); - } - Constant *CreateFMul(Constant *LHS, Constant *RHS) const override { - return Fold(ConstantExpr::getFMul(LHS, RHS)); - } - Constant *CreateUDiv(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - return Fold(ConstantExpr::getUDiv(LHS, RHS, isExact)); - } - Constant *CreateSDiv(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - return Fold(ConstantExpr::getSDiv(LHS, RHS, isExact)); - } - Constant *CreateFDiv(Constant *LHS, Constant *RHS) const override { - return Fold(ConstantExpr::getFDiv(LHS, RHS)); - } - Constant *CreateURem(Constant *LHS, Constant *RHS) const override { - return Fold(ConstantExpr::getURem(LHS, RHS)); - } - Constant *CreateSRem(Constant *LHS, Constant *RHS) const override { - return Fold(ConstantExpr::getSRem(LHS, RHS)); - } - Constant *CreateFRem(Constant *LHS, Constant *RHS) const override { - return Fold(ConstantExpr::getFRem(LHS, RHS)); - } - Constant *CreateShl(Constant *LHS, Constant *RHS, - bool HasNUW = false, bool HasNSW = false) const override { - return Fold(ConstantExpr::getShl(LHS, RHS, HasNUW, HasNSW)); - } - Constant *CreateLShr(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - return Fold(ConstantExpr::getLShr(LHS, RHS, isExact)); + Value *FoldExtractValue(Value *Agg, + ArrayRef IdxList) const override { + if (auto *CAgg = dyn_cast(Agg)) + return ConstantFoldExtractValueInstruction(CAgg, IdxList); + return nullptr; + }; + + Value *FoldInsertValue(Value *Agg, Value *Val, + ArrayRef IdxList) const override { + auto *CAgg = dyn_cast(Agg); + auto *CVal = dyn_cast(Val); + if (CAgg && CVal) + return ConstantFoldInsertValueInstruction(CAgg, CVal, IdxList); + return nullptr; } - Constant *CreateAShr(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - return Fold(ConstantExpr::getAShr(LHS, RHS, isExact)); + + Value *FoldExtractElement(Value *Vec, Value *Idx) const override { + auto *CVec = dyn_cast(Vec); + auto *CIdx = dyn_cast(Idx); + if (CVec && CIdx) + return Fold(ConstantExpr::getExtractElement(CVec, CIdx)); + return nullptr; } - Constant *CreateXor(Constant *LHS, Constant *RHS) const override { - return Fold(ConstantExpr::getXor(LHS, RHS)); + + Value *FoldInsertElement(Value *Vec, Value *NewElt, + Value *Idx) const override { + auto *CVec = dyn_cast(Vec); + auto *CNewElt = dyn_cast(NewElt); + auto *CIdx = dyn_cast(Idx); + if (CVec && CNewElt && CIdx) + return Fold(ConstantExpr::getInsertElement(CVec, CNewElt, CIdx)); + return nullptr; } - Constant *CreateBinOp(Instruction::BinaryOps Opc, - Constant *LHS, Constant *RHS) const override { - return Fold(ConstantExpr::get(Opc, LHS, RHS)); + Value *FoldShuffleVector(Value *V1, Value *V2, + ArrayRef Mask) const override { + auto *C1 = dyn_cast(V1); + auto *C2 = dyn_cast(V2); + if (C1 && C2) + return Fold(ConstantExpr::getShuffleVector(C1, C2, Mask)); + return nullptr; } //===--------------------------------------------------------------------===// // Unary Operators //===--------------------------------------------------------------------===// - Constant *CreateNeg(Constant *C, - bool HasNUW = false, bool HasNSW = false) const override { - return Fold(ConstantExpr::getNeg(C, HasNUW, HasNSW)); - } Constant *CreateFNeg(Constant *C) const override { return Fold(ConstantExpr::getFNeg(C)); } - Constant *CreateNot(Constant *C) const override { - return Fold(ConstantExpr::getNot(C)); - } Constant *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const override { return Fold(ConstantExpr::get(Opc, C)); @@ -252,34 +242,6 @@ public: Constant *RHS) const override { return Fold(ConstantExpr::getCompare(P, LHS, RHS)); } - - //===--------------------------------------------------------------------===// - // Other Instructions - //===--------------------------------------------------------------------===// - - Constant *CreateExtractElement(Constant *Vec, Constant *Idx) const override { - return Fold(ConstantExpr::getExtractElement(Vec, Idx)); - } - - Constant *CreateInsertElement(Constant *Vec, Constant *NewElt, - Constant *Idx) const override { - return Fold(ConstantExpr::getInsertElement(Vec, NewElt, Idx)); - } - - Constant *CreateShuffleVector(Constant *V1, Constant *V2, - ArrayRef Mask) const override { - return Fold(ConstantExpr::getShuffleVector(V1, V2, Mask)); - } - - Constant *CreateExtractValue(Constant *Agg, - ArrayRef IdxList) const override { - return Fold(ConstantExpr::getExtractValue(Agg, IdxList)); - } - - Constant *CreateInsertValue(Constant *Agg, Constant *Val, - ArrayRef IdxList) const override { - return Fold(ConstantExpr::getInsertValue(Agg, Val, IdxList)); - } }; } diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index 17d1e3f770c1..7bfda0124de7 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -12,14 +12,15 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" -#include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" namespace llvm { + template class ArrayRef; +class Function; +class Module; class Triple; /// Describes a possible vectorization of a function. @@ -49,7 +50,7 @@ class TargetLibraryInfoImpl { friend class TargetLibraryInfo; unsigned char AvailableArray[(NumLibFuncs+3)/4]; - llvm::DenseMap CustomNames; + DenseMap CustomNames; static StringLiteral const StandardNames[NumLibFuncs]; bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param; unsigned SizeOfInt; @@ -279,6 +280,13 @@ public: return B == OverrideAsUnavailable; } + /// Return true if the function type FTy is valid for the library function + /// F, regardless of whether the function is available. + bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F, + const Module &M) const { + return Impl->isValidProtoForLibFunc(FTy, F, M); + } + /// Searches for a particular function name. /// /// If it is one of the known library functions, return true and set F to the diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 7412e050322e..372f17cfc7ff 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -21,13 +21,13 @@ #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/IR/FMF.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/BranchProbability.h" -#include "llvm/Support/DataTypes.h" #include "llvm/Support/InstructionCost.h" #include #include @@ -617,8 +617,8 @@ public: Instruction *I = nullptr) const; /// Return true if LSR cost of C1 is lower than C1. - bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2) const; + bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) const; /// Return true if LSR major cost is number of registers. Targets which /// implement their own isLSRCostLess and unset number of registers as major @@ -659,6 +659,10 @@ public: /// Return true if the target supports nontemporal load. bool isLegalNTLoad(Type *DataType, Align Alignment) const; + /// \Returns true if the target supports broadcasting a load to a vector of + /// type . + bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const; + /// Return true if the target supports masked scatter. bool isLegalMaskedScatter(Type *DataType, Align Alignment) const; /// Return true if the target supports masked gather. @@ -675,6 +679,16 @@ public: /// Return true if the target supports masked expand load. bool isLegalMaskedExpandLoad(Type *DataType) const; + /// Return true if this is an alternating opcode pattern that can be lowered + /// to a single instruction on the target. In X86 this is for the addsub + /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR. + /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being + /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0` + /// when \p Opcode0 is selected and `1` when Opcode1 is selected. + /// \p VecTy is the vector type of the instruction to be generated. + bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, + const SmallBitVector &OpcodeMask) const; + /// Return true if we should be enabling ordered reductions for the target. bool enableOrderedReductions() const; @@ -727,7 +741,7 @@ public: bool isTypeLegal(Type *Ty) const; /// Returns the estimated number of registers required to represent \p Ty. - InstructionCost getRegUsageForType(Type *Ty) const; + unsigned getRegUsageForType(Type *Ty) const; /// Return true if switches should be turned into lookup tables for the /// target. @@ -762,6 +776,9 @@ public: /// the scalarization cost of a load/store. bool supportsEfficientVectorElementLoadStore() const; + /// If the target supports tail calls. + bool supportsTailCalls() const; + /// Don't restrict interleaved unrolling to small loops. bool enableAggressiveInterleaving(bool LoopHasReductions) const; @@ -934,7 +951,8 @@ public: /// creating vectors that span multiple vector registers. /// If false, the vectorization factor will be chosen based on the /// size of the widest element type. - bool shouldMaximizeVectorBandwidth() const; + /// \p K Register Kind for vectorization. + bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const; /// \return The minimum vectorization factor for types of given element /// bit width, or 0 if there is no minimum VF. The returned value only @@ -947,6 +965,17 @@ public: /// Currently only used by the SLP vectorizer. unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const; + /// \return The minimum vectorization factor for the store instruction. Given + /// the initial estimation of the minimum vector factor and store value type, + /// it tries to find possible lowest VF, which still might be profitable for + /// the vectorization. + /// \param VF Initial estimation of the minimum vector factor. + /// \param ScalarMemTy Scalar memory type of the store operation. + /// \param ScalarValTy Scalar type of the stored value. + /// Currently only used by the SLP vectorizer. + unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, + Type *ScalarValTy) const; + /// \return True if it should be considered for address type promotion. /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is /// profitable without finding other extensions fed by the same input. @@ -1045,11 +1074,14 @@ public: /// The exact mask may be passed as Mask, or else the array will be empty. /// The index and subtype parameters are used by the subvector insertion and /// extraction shuffle kinds to show the insert/extract point and the type of - /// the subvector being inserted/extracted. + /// the subvector being inserted/extracted. The operands of the shuffle can be + /// passed through \p Args, which helps improve the cost estimation in some + /// cases, like in broadcast loads. /// NOTE: For subvector extractions Tp represents the source type. InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef Mask = None, int Index = 0, - VectorType *SubTp = nullptr) const; + VectorType *SubTp = nullptr, + ArrayRef Args = None) const; /// Represents a hint about the context in which a cast is used. /// @@ -1283,9 +1315,11 @@ public: Type *ExpectedType) const; /// \returns The type to use in a loop expansion of a memcpy call. - Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, - unsigned SrcAddrSpace, unsigned DestAddrSpace, - unsigned SrcAlign, unsigned DestAlign) const; + Type * + getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAddrSpace, unsigned DestAddrSpace, + unsigned SrcAlign, unsigned DestAlign, + Optional AtomicElementSize = None) const; /// \param[out] OpsOut The operand types to copy RemainingBytes of memory. /// \param RemainingBytes The number of bytes to copy. @@ -1296,7 +1330,8 @@ public: void getMemcpyLoopResidualLoweringType( SmallVectorImpl &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, - unsigned SrcAlign, unsigned DestAlign) const; + unsigned SrcAlign, unsigned DestAlign, + Optional AtomicCpySize = None) const; /// \returns True if the two functions have compatible attributes for inlining /// purposes. @@ -1536,8 +1571,8 @@ public: int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I) = 0; - virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2) = 0; + virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) = 0; virtual bool isNumRegsMajorCostOfLSR() = 0; virtual bool isProfitableLSRChainElement(Instruction *I) = 0; virtual bool canMacroFuseCmp() = 0; @@ -1550,6 +1585,8 @@ public: virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0; virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0; virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0; + virtual bool isLegalBroadcastLoad(Type *ElementTy, + ElementCount NumElements) const = 0; virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0; virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0; virtual bool forceScalarizeMaskedGather(VectorType *DataType, @@ -1558,6 +1595,9 @@ public: Align Alignment) = 0; virtual bool isLegalMaskedCompressStore(Type *DataType) = 0; virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0; + virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, + unsigned Opcode1, + const SmallBitVector &OpcodeMask) const = 0; virtual bool enableOrderedReductions() = 0; virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; @@ -1571,7 +1611,7 @@ public: virtual bool isProfitableToHoist(Instruction *I) = 0; virtual bool useAA() = 0; virtual bool isTypeLegal(Type *Ty) = 0; - virtual InstructionCost getRegUsageForType(Type *Ty) = 0; + virtual unsigned getRegUsageForType(Type *Ty) = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; virtual bool shouldBuildRelLookupTables() = 0; @@ -1584,6 +1624,7 @@ public: getOperandsScalarizationOverhead(ArrayRef Args, ArrayRef Tys) = 0; virtual bool supportsEfficientVectorElementLoadStore() = 0; + virtual bool supportsTailCalls() = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0; @@ -1618,10 +1659,13 @@ public: virtual unsigned getMinVectorRegisterBitWidth() const = 0; virtual Optional getMaxVScale() const = 0; virtual Optional getVScaleForTuning() const = 0; - virtual bool shouldMaximizeVectorBandwidth() const = 0; + virtual bool + shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0; virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const = 0; virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0; + virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, + Type *ScalarValTy) const = 0; virtual bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; virtual unsigned getCacheLineSize() const = 0; @@ -1660,7 +1704,8 @@ public: ArrayRef Args, const Instruction *CxtI = nullptr) = 0; virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, - VectorType *SubTp) = 0; + VectorType *SubTp, + ArrayRef Args) = 0; virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, @@ -1734,15 +1779,17 @@ public: virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0; virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) = 0; - virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, - unsigned SrcAddrSpace, - unsigned DestAddrSpace, - unsigned SrcAlign, - unsigned DestAlign) const = 0; + virtual Type * + getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAddrSpace, unsigned DestAddrSpace, + unsigned SrcAlign, unsigned DestAlign, + Optional AtomicElementSize) const = 0; + virtual void getMemcpyLoopResidualLoweringType( SmallVectorImpl &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, - unsigned SrcAlign, unsigned DestAlign) const = 0; + unsigned SrcAlign, unsigned DestAlign, + Optional AtomicCpySize) const = 0; virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const = 0; virtual bool areTypesABICompatible(const Function *Caller, @@ -1920,8 +1967,8 @@ public: return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace, I); } - bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2) override { + bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) override { return Impl.isLSRCostLess(C1, C2); } bool isNumRegsMajorCostOfLSR() override { @@ -1953,6 +2000,10 @@ public: bool isLegalNTLoad(Type *DataType, Align Alignment) override { return Impl.isLegalNTLoad(DataType, Alignment); } + bool isLegalBroadcastLoad(Type *ElementTy, + ElementCount NumElements) const override { + return Impl.isLegalBroadcastLoad(ElementTy, NumElements); + } bool isLegalMaskedScatter(Type *DataType, Align Alignment) override { return Impl.isLegalMaskedScatter(DataType, Alignment); } @@ -1973,6 +2024,10 @@ public: bool isLegalMaskedExpandLoad(Type *DataType) override { return Impl.isLegalMaskedExpandLoad(DataType); } + bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, + const SmallBitVector &OpcodeMask) const override { + return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask); + } bool enableOrderedReductions() override { return Impl.enableOrderedReductions(); } @@ -2001,7 +2056,7 @@ public: } bool useAA() override { return Impl.useAA(); } bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } - InstructionCost getRegUsageForType(Type *Ty) override { + unsigned getRegUsageForType(Type *Ty) override { return Impl.getRegUsageForType(Ty); } bool shouldBuildLookupTables() override { @@ -2032,6 +2087,8 @@ public: return Impl.supportsEfficientVectorElementLoadStore(); } + bool supportsTailCalls() override { return Impl.supportsTailCalls(); } + bool enableAggressiveInterleaving(bool LoopHasReductions) override { return Impl.enableAggressiveInterleaving(LoopHasReductions); } @@ -2108,8 +2165,9 @@ public: Optional getVScaleForTuning() const override { return Impl.getVScaleForTuning(); } - bool shouldMaximizeVectorBandwidth() const override { - return Impl.shouldMaximizeVectorBandwidth(); + bool shouldMaximizeVectorBandwidth( + TargetTransformInfo::RegisterKind K) const override { + return Impl.shouldMaximizeVectorBandwidth(K); } ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const override { @@ -2118,6 +2176,10 @@ public: unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override { return Impl.getMaximumVF(ElemWidth, Opcode); } + unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, + Type *ScalarValTy) const override { + return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy); + } bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override { return Impl.shouldConsiderAddressTypePromotion( @@ -2180,8 +2242,9 @@ public: } InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, - VectorType *SubTp) override { - return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp); + VectorType *SubTp, + ArrayRef Args) override { + return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp, Args); } InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, @@ -2298,20 +2361,22 @@ public: Type *ExpectedType) override { return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); } - Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, - unsigned SrcAddrSpace, unsigned DestAddrSpace, - unsigned SrcAlign, - unsigned DestAlign) const override { + Type *getMemcpyLoopLoweringType( + LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, + unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, + Optional AtomicElementSize) const override { return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace, - DestAddrSpace, SrcAlign, DestAlign); + DestAddrSpace, SrcAlign, DestAlign, + AtomicElementSize); } void getMemcpyLoopResidualLoweringType( SmallVectorImpl &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, - unsigned SrcAlign, unsigned DestAlign) const override { + unsigned SrcAlign, unsigned DestAlign, + Optional AtomicCpySize) const override { Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, - SrcAlign, DestAlign); + SrcAlign, DestAlign, AtomicCpySize); } bool areInlineCompatible(const Function *Caller, const Function *Callee) const override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index a32744f8d58b..a70c418974f5 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -18,18 +18,16 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/IR/Type.h" #include -using namespace llvm::PatternMatch; - namespace llvm { +class Function; + /// Base class for use as a mix-in that aids implementing /// a TargetTransformInfo-compatible class. class TargetTransformInfoImplBase { @@ -212,7 +210,7 @@ public: return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); } - bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const { + bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const { return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) < std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, @@ -258,6 +256,10 @@ public: return Alignment >= DataSize && isPowerOf2_32(DataSize); } + bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const { + return false; + } + bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { return false; } @@ -277,6 +279,11 @@ public: bool isLegalMaskedCompressStore(Type *DataType) const { return false; } + bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, + const SmallBitVector &OpcodeMask) const { + return false; + } + bool isLegalMaskedExpandLoad(Type *DataType) const { return false; } bool enableOrderedReductions() const { return false; } @@ -310,7 +317,7 @@ public: bool isTypeLegal(Type *Ty) const { return false; } - InstructionCost getRegUsageForType(Type *Ty) const { return 1; } + unsigned getRegUsageForType(Type *Ty) const { return 1; } bool shouldBuildLookupTables() const { return true; } @@ -333,6 +340,8 @@ public: bool supportsEfficientVectorElementLoadStore() const { return false; } + bool supportsTailCalls() const { return true; } + bool enableAggressiveInterleaving(bool LoopHasReductions) const { return false; } @@ -415,13 +424,17 @@ public: Optional getMaxVScale() const { return None; } Optional getVScaleForTuning() const { return None; } - bool shouldMaximizeVectorBandwidth() const { return false; } + bool + shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const { + return false; + } ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const { return ElementCount::get(0, IsScalable); } unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; } + unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; } bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { @@ -490,7 +503,8 @@ public: InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef Mask, int Index, - VectorType *SubTp) const { + VectorType *SubTp, + ArrayRef Args = None) const { return 1; } @@ -697,16 +711,21 @@ public: Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, - unsigned SrcAlign, unsigned DestAlign) const { - return Type::getInt8Ty(Context); + unsigned SrcAlign, unsigned DestAlign, + Optional AtomicElementSize) const { + return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8) + : Type::getInt8Ty(Context); } void getMemcpyLoopResidualLoweringType( SmallVectorImpl &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, - unsigned SrcAlign, unsigned DestAlign) const { - for (unsigned i = 0; i != RemainingBytes; ++i) - OpsOut.push_back(Type::getInt8Ty(Context)); + unsigned SrcAlign, unsigned DestAlign, + Optional AtomicCpySize) const { + unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1; + Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8); + for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes) + OpsOut.push_back(OpType); } bool areInlineCompatible(const Function *Caller, @@ -960,6 +979,8 @@ public: InstructionCost getUserCost(const User *U, ArrayRef Operands, TTI::TargetCostKind CostKind) { + using namespace llvm::PatternMatch; + auto *TargetTTI = static_cast(this); // Handle non-intrinsic calls, invokes, and callbr. // FIXME: Unlikely to be true for anything but CodeSize. @@ -976,8 +997,6 @@ public: } Type *Ty = U->getType(); - Type *OpTy = - U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr; unsigned Opcode = Operator::getOpcode(U); auto *I = dyn_cast(U); switch (Opcode) { @@ -1049,9 +1068,11 @@ public: case Instruction::FPExt: case Instruction::SExt: case Instruction::ZExt: - case Instruction::AddrSpaceCast: + case Instruction::AddrSpaceCast: { + Type *OpTy = U->getOperand(0)->getType(); return TargetTTI->getCastInstrCost( Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I); + } case Instruction::Store: { auto *SI = cast(U); Type *ValTy = U->getOperand(0)->getType(); @@ -1137,13 +1158,14 @@ public: if (Shuffle->isExtractSubvectorMask(SubIndex)) return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, Shuffle->getShuffleMask(), SubIndex, - VecTy); + VecTy, Operands); if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) return TargetTTI->getShuffleCost( TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), SubIndex, - FixedVectorType::get(VecTy->getScalarType(), NumSubElts)); + FixedVectorType::get(VecTy->getScalarType(), NumSubElts), + Operands); int ReplicationFactor, VF; if (Shuffle->isReplicationMask(ReplicationFactor, VF)) { @@ -1166,31 +1188,37 @@ public: if (Shuffle->isReverse()) return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, - Shuffle->getShuffleMask(), 0, nullptr); + Shuffle->getShuffleMask(), 0, nullptr, + Operands); if (Shuffle->isSelect()) return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, - Shuffle->getShuffleMask(), 0, nullptr); + Shuffle->getShuffleMask(), 0, nullptr, + Operands); if (Shuffle->isTranspose()) return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, - Shuffle->getShuffleMask(), 0, nullptr); + Shuffle->getShuffleMask(), 0, nullptr, + Operands); if (Shuffle->isZeroEltSplat()) return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, - Shuffle->getShuffleMask(), 0, nullptr); + Shuffle->getShuffleMask(), 0, nullptr, + Operands); if (Shuffle->isSingleSource()) return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, - Shuffle->getShuffleMask(), 0, nullptr); + Shuffle->getShuffleMask(), 0, nullptr, + Operands); if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) return TargetTTI->getShuffleCost( TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), SubIndex, - FixedVectorType::get(VecTy->getScalarType(), NumSubElts)); + FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands); return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, - Shuffle->getShuffleMask(), 0, nullptr); + Shuffle->getShuffleMask(), 0, nullptr, + Operands); } case Instruction::ExtractElement: { auto *EEI = dyn_cast(U); diff --git a/llvm/include/llvm/Analysis/TensorSpec.h b/llvm/include/llvm/Analysis/TensorSpec.h new file mode 100644 index 000000000000..382ab3f10445 --- /dev/null +++ b/llvm/include/llvm/Analysis/TensorSpec.h @@ -0,0 +1,132 @@ +//===- TensorSpec.h - type descriptor for a tensor --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_ANALYSIS_TENSORSPEC_H +#define LLVM_ANALYSIS_TENSORSPEC_H + +#include "llvm/Config/llvm-config.h" + +#include "llvm/ADT/StringMap.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/JSON.h" + +#include +#include + +namespace llvm { +/// TensorSpec encapsulates the specification of a tensor: its dimensions, or +/// "shape" (row-major), its type (see TensorSpec::getDataType specializations +/// for supported types), its name and port (see "TensorFlow: Large-Scale +/// Machine Learning on Heterogeneous Distributed Systems", section 4.2, para 2: +/// https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45166.pdf) +/// +/// Known tensor types. The left part is the C type, the right is a name we +/// can use to identify the type (to implement TensorSpec equality checks), and +/// to use, if needed, when mapping to an underlying evaluator's type system. +/// The main requirement is that the C type we use has the same size and +/// encoding (e.g. endian-ness) as the one used by the evaluator. +#define SUPPORTED_TENSOR_TYPES(M) \ + M(float, Float) \ + M(double, Double) \ + M(int8_t, Int8) \ + M(uint8_t, UInt8) \ + M(int16_t, Int16) \ + M(uint16_t, UInt16) \ + M(int32_t, Int32) \ + M(uint32_t, UInt32) \ + M(int64_t, Int64) \ + M(uint64_t, UInt64) + +enum class TensorType { + Invalid, +#define _TENSOR_TYPE_ENUM_MEMBERS(_, Name) Name, + SUPPORTED_TENSOR_TYPES(_TENSOR_TYPE_ENUM_MEMBERS) +#undef _TENSOR_TYPE_ENUM_MEMBERS +}; + +class TensorSpec final { +public: + template + static TensorSpec createSpec(const std::string &Name, + const std::vector &Shape, + int Port = 0) { + return TensorSpec(Name, Port, getDataType(), sizeof(T), Shape); + } + + const std::string &name() const { return Name; } + int port() const { return Port; } + TensorType type() const { return Type; } + const std::vector &shape() const { return Shape; } + + bool operator==(const TensorSpec &Other) const { + return Name == Other.Name && Port == Other.Port && Type == Other.Type && + Shape == Other.Shape; + } + + bool operator!=(const TensorSpec &Other) const { return !(*this == Other); } + + /// Get the number of elements in a tensor with this shape. + size_t getElementCount() const { return ElementCount; } + /// Get the size, in bytes, of one element. + size_t getElementByteSize() const { return ElementSize; } + /// Get the total size of a memory buffer needed to store the whole tensor. + size_t getTotalTensorBufferSize() const { return ElementCount * ElementSize; } + + template bool isElementType() const { + return getDataType() == Type; + } + +private: + TensorSpec(const std::string &Name, int Port, TensorType Type, + size_t ElementSize, const std::vector &Shape); + + template static TensorType getDataType(); + + std::string Name; + int Port = 0; + TensorType Type = TensorType::Invalid; + std::vector Shape; + size_t ElementCount = 0; + size_t ElementSize = 0; +}; + +/// Construct a TensorSpec from a JSON dictionary of the form: +/// { "name": , +/// "port": , +/// "type": , +/// "shape": } +/// For the "type" field, see the C++ primitive types used in +/// TFUTILS_SUPPORTED_TYPES. +Optional getTensorSpecFromJSON(LLVMContext &Ctx, + const json::Value &Value); + +struct LoggedFeatureSpec { + TensorSpec Spec; + Optional LoggingName; + const std::string &getLoggingName() const { + return LoggingName ? *LoggingName : Spec.name(); + } +}; + +/// Load the output specs. If SpecFileOverride is not empty, that path is used. +/// Otherwise, the file is assumed to be called 'output_spec.json' and be found +/// under ModelPath (the model directory). +/// The first output tensor name must match ExpectedDecisionName. +/// In case of error, the return is None and the error is logged. +Optional> +loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName, + StringRef ModelPath, StringRef SpecFileOverride = StringRef()); + +#define TFUTILS_GETDATATYPE_DEF(T, Name) \ + template <> TensorType TensorSpec::getDataType(); +SUPPORTED_TENSOR_TYPES(TFUTILS_GETDATATYPE_DEF) + +#undef TFUTILS_GETDATATYPE_DEF +} // namespace llvm + +#endif // LLVM_ANALYSIS_TENSORSPEC_H diff --git a/llvm/include/llvm/Analysis/TypeMetadataUtils.h b/llvm/include/llvm/Analysis/TypeMetadataUtils.h index 074c40942b06..dab67aad1ab0 100644 --- a/llvm/include/llvm/Analysis/TypeMetadataUtils.h +++ b/llvm/include/llvm/Analysis/TypeMetadataUtils.h @@ -14,11 +14,11 @@ #ifndef LLVM_ANALYSIS_TYPEMETADATAUTILS_H #define LLVM_ANALYSIS_TYPEMETADATAUTILS_H -#include "llvm/ADT/SmallVector.h" #include namespace llvm { +template class SmallVectorImpl; class CallBase; class CallInst; class Constant; diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h index 785b9fe949a5..372c35863f3f 100644 --- a/llvm/include/llvm/Analysis/Utils/TFUtils.h +++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h @@ -13,6 +13,7 @@ #ifdef LLVM_HAVE_TF_API #include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/TensorSpec.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/JSON.h" @@ -38,86 +39,6 @@ namespace llvm { class TFModelEvaluatorImpl; class EvaluationResultImpl; -/// TensorSpec encapsulates the specification of a tensor: its dimensions, or -/// "shape" (row-major), its type (see TensorSpec::getDataType specializations -/// for supported types), its name and port (see "TensorFlow: Large-Scale -/// Machine Learning on Heterogeneous Distributed Systems", section 4.2, para 2: -/// https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45166.pdf) -/// -/// TensorSpec is used to set up a TFModelEvaluator by describing the expected -/// inputs and outputs. -class TensorSpec final { -public: - template - static TensorSpec createSpec(const std::string &Name, - const std::vector &Shape, - int Port = 0) { - return TensorSpec(Name, Port, getDataType(), Shape); - } - - const std::string &name() const { return Name; } - int port() const { return Port; } - int typeIndex() const { return TypeIndex; } - const std::vector &shape() const { return Shape; } - - bool operator==(const TensorSpec &Other) const { - return Name == Other.Name && Port == Other.Port && - TypeIndex == Other.TypeIndex && Shape == Other.Shape; - } - - bool operator!=(const TensorSpec &Other) const { return !(*this == Other); } - - /// Get the number of elements in a tensor with this shape. - size_t getElementCount() const { return ElementCount; } - /// Get the size, in bytes, of one element. - size_t getElementByteSize() const; - - template bool isElementType() const { - return getDataType() == TypeIndex; - } - -private: - TensorSpec(const std::string &Name, int Port, int TypeIndex, - const std::vector &Shape); - - template static int getDataType() { - llvm_unreachable("Undefined tensor type"); - } - - std::string Name; - int Port = 0; - int TypeIndex = 0; - std::vector Shape; - size_t ElementCount = 0; -}; - -/// Construct a TensorSpec from a JSON dictionary of the form: -/// { "name": , -/// "port": , -/// "type": , -/// "shape": } -/// For the "type" field, see the C++ primitive types used in -/// TFUTILS_SUPPORTED_TYPES. -Optional getTensorSpecFromJSON(LLVMContext &Ctx, - const json::Value &Value); - -struct LoggedFeatureSpec { - TensorSpec Spec; - Optional LoggingName; - const std::string &getLoggingName() const { - return LoggingName ? *LoggingName : Spec.name(); - } -}; - -/// Load the output specs. If SpecFileOverride is not empty, that path is used. -/// Otherwise, the file is assumed to be called 'output_spec.json' and be found -/// under ModelPath (the model directory). -/// The first output tensor name must match ExpectedDecisionName. -/// In case of error, the return is None and the error is logged. -Optional> -loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName, - StringRef ModelPath, StringRef SpecFileOverride = StringRef()); - /// Logging utility - given an ordered specification of features, and assuming /// a scalar reward, allow logging feature values and rewards, and then print /// as tf.train.SequenceExample text protobuf. @@ -262,27 +183,6 @@ private: std::unique_ptr Impl; }; -/// List of supported types, as a pair: -/// - C++ type -/// - enum name (implementation-specific) -#define TFUTILS_SUPPORTED_TYPES(M) \ - M(float, TF_FLOAT) \ - M(double, TF_DOUBLE) \ - M(int8_t, TF_INT8) \ - M(uint8_t, TF_UINT8) \ - M(int16_t, TF_INT16) \ - M(uint16_t, TF_UINT16) \ - M(int32_t, TF_INT32) \ - M(uint32_t, TF_UINT32) \ - M(int64_t, TF_INT64) \ - M(uint64_t, TF_UINT64) - -#define TFUTILS_GETDATATYPE_DEF(T, E) \ - template <> int TensorSpec::getDataType(); - -TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_DEF) - -#undef TFUTILS_GETDATATYPE_DEF } // namespace llvm #endif // LLVM_HAVE_TF_API diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h index 1b32fca50697..bc6b279e9ed5 100644 --- a/llvm/include/llvm/Analysis/ValueLattice.h +++ b/llvm/include/llvm/Analysis/ValueLattice.h @@ -9,16 +9,18 @@ #ifndef LLVM_ANALYSIS_VALUELATTICE_H #define LLVM_ANALYSIS_VALUELATTICE_H -#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Instructions.h" -// + //===----------------------------------------------------------------------===// // ValueLatticeElement //===----------------------------------------------------------------------===// namespace llvm { +class Constant; + /// This class represents lattice values for constants. /// /// FIXME: This is basically just for bringup, this can be made a lot more rich diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 5b39b0244339..3b29bf1d53b4 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -21,12 +21,12 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Operator.h" #include #include namespace llvm { +class Operator; class AddOperator; class AllocaInst; class APInt; @@ -463,15 +463,37 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; const DominatorTree *DT = nullptr, const TargetLibraryInfo *TLI = nullptr); + /// This returns the same result as isSafeToSpeculativelyExecute if Opcode is + /// the actual opcode of Inst. If the provided and actual opcode differ, the + /// function (virtually) overrides the opcode of Inst with the provided + /// Opcode. There are come constraints in this case: + /// * If Opcode has a fixed number of operands (eg, as binary operators do), + /// then Inst has to have at least as many leading operands. The function + /// will ignore all trailing operands beyond that number. + /// * If Opcode allows for an arbitrary number of operands (eg, as CallInsts + /// do), then all operands are considered. + /// * The virtual instruction has to satisfy all typing rules of the provided + /// Opcode. + /// * This function is pessimistic in the following sense: If one actually + /// materialized the virtual instruction, then isSafeToSpeculativelyExecute + /// may say that the materialized instruction is speculatable whereas this + /// function may have said that the instruction wouldn't be speculatable. + /// This behavior is a shortcoming in the current implementation and not + /// intentional. + bool isSafeToSpeculativelyExecuteWithOpcode( + unsigned Opcode, const Operator *Inst, const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); + /// Returns true if the result or effects of the given instructions \p I - /// depend on or influence global memory. - /// Memory dependence arises for example if the instruction reads from - /// memory or may produce effects or undefined behaviour. Memory dependent - /// instructions generally cannot be reorderd with respect to other memory - /// dependent instructions or moved into non-dominated basic blocks. - /// Instructions which just compute a value based on the values of their - /// operands are not memory dependent. - bool mayBeMemoryDependent(const Instruction &I); + /// depend values not reachable through the def use graph. + /// * Memory dependence arises for example if the instruction reads from + /// memory or may produce effects or undefined behaviour. Memory dependent + /// instructions generally cannot be reorderd with respect to other memory + /// dependent instructions. + /// * Control dependence arises for example if the instruction may fault + /// if lifted above a throwing call or infinite loop. + bool mayHaveNonDefUseDependency(const Instruction &I); /// Return true if it is an intrinsic that cannot be speculated but also /// cannot trap. diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 751c88a4ecbb..0005874ba040 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -236,7 +236,7 @@ class VFDatabase { // ensuring that the variant described in the attribute has a // corresponding definition or declaration of the vector // function in the Module M. - if (Shape.hasValue() && (Shape.getValue().ScalarName == ScalarName)) { + if (Shape && (Shape.getValue().ScalarName == ScalarName)) { assert(CI.getModule()->getFunction(Shape.getValue().VectorName) && "Vector function is missing."); Mappings.push_back(Shape.getValue()); @@ -309,16 +309,16 @@ inline Type *ToVectorTy(Type *Scalar, unsigned VF) { /// Identify if the intrinsic is trivially vectorizable. /// This method returns true if the intrinsic's argument types are all scalars /// for the scalar form of the intrinsic and all vectors (or scalars handled by -/// hasVectorInstrinsicScalarOpd) for the vector form of the intrinsic. +/// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic. bool isTriviallyVectorizable(Intrinsic::ID ID); /// Identifies if the vector form of the intrinsic has a scalar operand. -bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx); +bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, + unsigned ScalarOpdIdx); -/// Identifies if the vector form of the intrinsic has a scalar operand that has +/// Identifies if the vector form of the intrinsic has a operand that has /// an overloaded type. -bool hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID, - unsigned ScalarOpdIdx); +bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx); /// Returns intrinsic ID for call. /// For the input call instruction it finds mapping intrinsic and returns @@ -398,6 +398,24 @@ void narrowShuffleMaskElts(int Scale, ArrayRef Mask, bool widenShuffleMaskElts(int Scale, ArrayRef Mask, SmallVectorImpl &ScaledMask); +/// Splits and processes shuffle mask depending on the number of input and +/// output registers. The function does 2 main things: 1) splits the +/// source/destination vectors into real registers; 2) do the mask analysis to +/// identify which real registers are permuted. Then the function processes +/// resulting registers mask using provided action items. If no input register +/// is defined, \p NoInputAction action is used. If only 1 input register is +/// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to +/// process > 2 input registers and masks. +/// \param Mask Original shuffle mask. +/// \param NumOfSrcRegs Number of source registers. +/// \param NumOfDestRegs Number of destination registers. +/// \param NumOfUsedRegs Number of actually used destination registers. +void processShuffleMasks( + ArrayRef Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, + unsigned NumOfUsedRegs, function_ref NoInputAction, + function_ref, unsigned, unsigned)> SingleInputAction, + function_ref, unsigned, unsigned)> ManyInputsAction); + /// Compute a map of integer instructions to their minimum legal type /// size. /// diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index c30165e4a97b..7bcb33f18768 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -37,7 +37,7 @@ namespace llvm { lltok::Kind CurKind; std::string StrVal; unsigned UIntVal; - Type *TyVal; + Type *TyVal = nullptr; APFloat APFloatVal; APSInt APSIntVal; diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index 62af3afbc142..3389475b2c9a 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -14,18 +14,25 @@ #define LLVM_ASMPARSER_LLPARSER_H #include "LLLexer.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringMap.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/FMF.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/ModuleSummaryIndex.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" #include namespace llvm { class Module; + class ConstantRange; + class FunctionType; + class GlobalObject; + class SMDiagnostic; + class SMLoc; + class SourceMgr; + class Type; + struct MaybeAlign; + template class Optional; class Function; class Value; class BasicBlock; @@ -88,6 +95,8 @@ namespace llvm { typedef LLLexer::LocTy LocTy; private: LLVMContext &Context; + // Lexer to determine whether to use opaque pointers or not. + LLLexer OPLex; LLLexer Lex; // Module being parsed, null if we are only parsing summary index. Module *M; @@ -150,8 +159,9 @@ namespace llvm { LLParser(StringRef F, SourceMgr &SM, SMDiagnostic &Err, Module *M, ModuleSummaryIndex *Index, LLVMContext &Context, SlotMapping *Slots = nullptr) - : Context(Context), Lex(F, SM, Err, Context), M(M), Index(Index), - Slots(Slots), BlockAddressPFS(nullptr) {} + : Context(Context), OPLex(F, SM, Err, Context), + Lex(F, SM, Err, Context), M(M), Index(Index), Slots(Slots), + BlockAddressPFS(nullptr) {} bool Run( bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); @@ -263,6 +273,8 @@ namespace llvm { bool parseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens = false); bool parseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes); + bool parseOptionalUWTableKind(UWTableKind &Kind); + bool parseAllocKind(AllocFnKind &Kind); bool parseScopeAndOrdering(bool IsAtomic, SyncScope::ID &SSID, AtomicOrdering &Ordering); bool parseScope(SyncScope::ID &SSID); @@ -503,6 +515,7 @@ namespace llvm { bool parseGlobalValueVector(SmallVectorImpl &Elts, Optional *InRangeOp = nullptr); bool parseOptionalComdat(StringRef GlobalName, Comdat *&C); + bool parseSanitizer(GlobalVariable *GV); bool parseMetadataAsValue(Value *&V, PerFunctionState &PFS); bool parseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg, PerFunctionState *PFS); diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index 78ebb35e0ea4..230a1662cc04 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -88,7 +88,6 @@ enum Kind { kw_triple, kw_source_filename, kw_unwind, - kw_deplibs, // FIXME: Remove in 4.0 kw_datalayout, kw_volatile, kw_atomic, @@ -112,7 +111,6 @@ enum Kind { kw_exact, kw_inbounds, kw_inrange, - kw_align, kw_addrspace, kw_section, kw_partition, @@ -121,7 +119,6 @@ enum Kind { kw_module, kw_asm, kw_sideeffect, - kw_alignstack, kw_inteldialect, kw_gc, kw_prefix, @@ -177,81 +174,12 @@ enum Kind { // Attributes: kw_attributes, - kw_allocsize, - kw_alwaysinline, - kw_argmemonly, - kw_sanitize_address, - kw_sanitize_hwaddress, - kw_sanitize_memtag, - kw_builtin, - kw_byval, - kw_inalloca, - kw_cold, - kw_convergent, - kw_dereferenceable, - kw_dereferenceable_or_null, - kw_disable_sanitizer_instrumentation, - kw_elementtype, - kw_inaccessiblememonly, - kw_inaccessiblemem_or_argmemonly, - kw_inlinehint, - kw_inreg, - kw_jumptable, - kw_minsize, - kw_naked, - kw_nest, - kw_noalias, - kw_noundef, - kw_nobuiltin, - kw_nocallback, - kw_nocapture, - kw_noduplicate, - kw_nofree, - kw_noimplicitfloat, - kw_noinline, - kw_norecurse, - kw_nonlazybind, - kw_nomerge, - kw_nonnull, - kw_noprofile, - kw_noredzone, - kw_noreturn, - kw_nosync, - kw_nocf_check, - kw_nounwind, - kw_nosanitize_coverage, - kw_null_pointer_is_valid, - kw_optforfuzzing, - kw_optnone, - kw_optsize, - kw_preallocated, - kw_readnone, - kw_readonly, - kw_returned, - kw_returns_twice, - kw_signext, - kw_speculatable, - kw_ssp, - kw_sspreq, - kw_sspstrong, - kw_safestack, - kw_shadowcallstack, - kw_sret, - kw_sanitize_thread, - kw_sanitize_memory, - kw_speculative_load_hardening, - kw_strictfp, - kw_swifterror, - kw_swiftself, - kw_swiftasync, - kw_uwtable, - kw_vscale_range, - kw_willreturn, - kw_writeonly, - kw_zeroext, - kw_immarg, - kw_byref, - kw_mustprogress, + kw_sync, + kw_async, +#define GET_ATTR_NAMES +#define ATTRIBUTE_ENUM(ENUM_NAME, DISPLAY_NAME) \ + kw_##DISPLAY_NAME, +#include "llvm/IR/Attributes.inc" kw_type, kw_opaque, @@ -415,7 +343,6 @@ enum Kind { kw_param, kw_hotness, kw_unknown, - kw_hot, kw_critical, kw_relbf, kw_variable, @@ -464,6 +391,19 @@ enum Kind { kw_bit, kw_varFlags, + // GV's with __attribute__((no_sanitize("address"))), or things in + // -fsanitize-ignorelist when built with ASan. + kw_no_sanitize_address, + // GV's with __attribute__((no_sanitize("hwaddress"))), or things in + // -fsanitize-ignorelist when built with HWASan. + kw_no_sanitize_hwaddress, + // GV's with __attribute__((no_sanitize("memtag"))), or things in + // -fsanitize-ignorelist when built with memory tagging. + kw_no_sanitize_memtag, + // GV's where the clang++ frontend (when ASan is used) notes that this is + // dynamically initialized, and thus needs ODR detection. + kw_sanitize_address_dyninit, + // Unsigned Valued tokens (UIntVal). LabelID, // 42: GlobalID, // @42 diff --git a/llvm/include/llvm/AsmParser/Parser.h b/llvm/include/llvm/AsmParser/Parser.h index e1c7f746a335..6710ae6e358d 100644 --- a/llvm/include/llvm/AsmParser/Parser.h +++ b/llvm/include/llvm/AsmParser/Parser.h @@ -13,7 +13,9 @@ #ifndef LLVM_ASMPARSER_PARSER_H #define LLVM_ASMPARSER_PARSER_H -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLForwardCompat.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringRef.h" #include diff --git a/llvm/include/llvm/BinaryFormat/COFF.h b/llvm/include/llvm/BinaryFormat/COFF.h index e7dde986784f..fb563ff198ef 100644 --- a/llvm/include/llvm/BinaryFormat/COFF.h +++ b/llvm/include/llvm/BinaryFormat/COFF.h @@ -24,7 +24,6 @@ #include "llvm/Support/DataTypes.h" #include -#include namespace llvm { namespace COFF { @@ -731,6 +730,10 @@ inline bool isReservedSectionNumber(int32_t SectionNumber) { return SectionNumber <= 0; } +/// Encode section name based on string table offset. +/// The size of Out must be at least COFF::NameSize. +bool encodeSectionName(char *Out, uint64_t Offset); + } // End namespace COFF. } // End namespace llvm. diff --git a/llvm/include/llvm/BinaryFormat/DXContainer.h b/llvm/include/llvm/BinaryFormat/DXContainer.h new file mode 100644 index 000000000000..9e912c7bd4ba --- /dev/null +++ b/llvm/include/llvm/BinaryFormat/DXContainer.h @@ -0,0 +1,131 @@ +//===-- llvm/BinaryFormat/DXContainer.h - The DXBC file format --*- C++/-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines manifest constants for the DXContainer object file format. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_DXCONTAINER_H +#define LLVM_BINARYFORMAT_DXCONTAINER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/SwapByteOrder.h" + +#include + +namespace llvm { + +// The DXContainer file format is arranged as a header and "parts". Semantically +// parts are similar to sections in other object file formats. The File format +// structure is roughly: + +// ┌────────────────────────────────┐ +// │ Header │ +// ├────────────────────────────────┤ +// │ Part │ +// ├────────────────────────────────┤ +// │ Part │ +// ├────────────────────────────────┤ +// │ ... │ +// └────────────────────────────────┘ + +namespace dxbc { + +struct Hash { + uint8_t Digest[16]; +}; + +enum class HashFlags : uint32_t { + None = 0, // No flags defined. + IncludesSource = 1, // This flag indicates that the shader hash was computed + // taking into account source information (-Zss) +}; + +struct ShaderHash { + uint32_t Flags; // DxilShaderHashFlags + uint8_t Digest[16]; + + void swapBytes() { sys::swapByteOrder(Flags); } +}; + +struct ContainerVersion { + uint16_t Major; + uint16_t Minor; + + void swapBytes() { + sys::swapByteOrder(Major); + sys::swapByteOrder(Minor); + } +}; + +struct Header { + uint8_t Magic[4]; // "DXBC" + Hash FileHash; + ContainerVersion Version; + uint32_t FileSize; + uint32_t PartCount; + + void swapBytes() { + Version.swapBytes(); + sys::swapByteOrder(FileSize); + sys::swapByteOrder(PartCount); + } + // Structure is followed by part offsets: uint32_t PartOffset[PartCount]; + // The offset is to a PartHeader, which is followed by the Part Data. +}; + +/// Use this type to describe the size and type of a DXIL container part. +struct PartHeader { + uint8_t Name[4]; + uint32_t Size; + + void swapBytes() { sys::swapByteOrder(Size); } + StringRef getName() const { + return StringRef(reinterpret_cast(&Name[0]), 4); + } + // Structure is followed directly by part data: uint8_t PartData[PartSize]. +}; + +struct BitcodeHeader { + uint8_t Magic[4]; // ACSII "DXIL". + uint8_t MajorVersion; // DXIL version. + uint8_t MinorVersion; // DXIL version. + uint16_t Unused; + uint32_t Offset; // Offset to LLVM bitcode (from start of header). + uint32_t Size; // Size of LLVM bitcode (in bytes). + // Followed by uint8_t[BitcodeHeader.Size] at &BitcodeHeader + Header.Offset + + void swapBytes() { + sys::swapByteOrder(MinorVersion); + sys::swapByteOrder(MajorVersion); + sys::swapByteOrder(Offset); + sys::swapByteOrder(Size); + } +}; + +struct ProgramHeader { + uint8_t MinorVersion : 4; + uint8_t MajorVersion : 4; + uint8_t Unused; + uint16_t ShaderKind; + uint32_t Size; // Size in uint32_t words including this header. + BitcodeHeader Bitcode; + + void swapBytes() { + sys::swapByteOrder(ShaderKind); + sys::swapByteOrder(Size); + Bitcode.swapBytes(); + } +}; + +static_assert(sizeof(ProgramHeader) == 24, "ProgramHeader Size incorrect!"); + +} // namespace dxbc +} // namespace llvm + +#endif // LLVM_BINARYFORMAT_DXCONTAINER_H diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h index 4473f506d371..e288c5191bdb 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.h +++ b/llvm/include/llvm/BinaryFormat/Dwarf.h @@ -320,6 +320,10 @@ inline bool isFortran(SourceLanguage S) { return result; } +inline TypeKind getArrayIndexTypeEncoding(SourceLanguage S) { + return isFortran(S) ? DW_ATE_signed : DW_ATE_unsigned; +} + enum CaseSensitivity { // Identifier case codes DW_ID_case_sensitive = 0x00, diff --git a/llvm/include/llvm/BinaryFormat/DynamicTags.def b/llvm/include/llvm/BinaryFormat/DynamicTags.def index 814d8b113ec4..ae25ec53813c 100644 --- a/llvm/include/llvm/BinaryFormat/DynamicTags.def +++ b/llvm/include/llvm/BinaryFormat/DynamicTags.def @@ -209,6 +209,7 @@ MIPS_DYNAMIC_TAG(MIPS_RWPLT, 0x70000034) // Points to the base // of a writable PLT. MIPS_DYNAMIC_TAG(MIPS_RLD_MAP_REL, 0x70000035) // Relative offset of run time loader // map, used for debugging. +MIPS_DYNAMIC_TAG(MIPS_XHASH, 0x70000036) // GNU-style hash table with xlat. // PPC specific dynamic table entries. PPC_DYNAMIC_TAG(PPC_GOT, 0x70000000) // Uses Secure PLT ABI. diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 5d3b1270b538..1e0ef613788d 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -319,6 +319,7 @@ enum { EM_BPF = 247, // Linux kernel bpf virtual machine EM_VE = 251, // NEC SX-Aurora VE EM_CSKY = 252, // C-SKY 32-bit processor + EM_LOONGARCH = 258, // LoongArch }; // Object file classes. @@ -563,6 +564,15 @@ enum : unsigned { EF_MIPS_ARCH = 0xf0000000 // Mask for applying EF_MIPS_ARCH_ variant }; +// MIPS-specific section indexes +enum { + SHN_MIPS_ACOMMON = 0xff00, // Common symbols which are defined and allocated + SHN_MIPS_TEXT = 0xff01, // Not ABI compliant + SHN_MIPS_DATA = 0xff02, // Not ABI compliant + SHN_MIPS_SCOMMON = 0xff03, // Common symbols for global data area + SHN_MIPS_SUNDEFINED = 0xff04 // Undefined symbols for global data area +}; + // ELF Relocation types for Mips enum { #include "ELFRelocs/Mips.def" @@ -753,16 +763,18 @@ enum : unsigned { EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d, EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e, EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f, - EF_AMDGPU_MACH_AMDGCN_RESERVED_0X40 = 0x040, - EF_AMDGPU_MACH_AMDGCN_RESERVED_0X41 = 0x041, + EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040, + EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041, EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X43 = 0x043, - EF_AMDGPU_MACH_AMDGCN_RESERVED_0X44 = 0x044, - EF_AMDGPU_MACH_AMDGCN_RESERVED_0X45 = 0x045, + EF_AMDGPU_MACH_AMDGCN_GFX1103 = 0x044, + EF_AMDGPU_MACH_AMDGCN_GFX1036 = 0x045, + EF_AMDGPU_MACH_AMDGCN_GFX1101 = 0x046, + EF_AMDGPU_MACH_AMDGCN_GFX1102 = 0x047, // First/last AMDGCN-based processors. EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600, - EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_RESERVED_0X45, + EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1102, // Indicates if the "xnack" target feature is enabled for all code contained // in the object. @@ -865,12 +877,34 @@ enum { #include "ELFRelocs/VE.def" }; +// CSKY Specific e_flags +enum : unsigned { + EF_CSKY_801 = 0xa, + EF_CSKY_802 = 0x10, + EF_CSKY_803 = 0x9, + EF_CSKY_805 = 0x11, + EF_CSKY_807 = 0x6, + EF_CSKY_810 = 0x8, + EF_CSKY_860 = 0xb, + EF_CSKY_800 = 0x1f, + EF_CSKY_FLOAT = 0x2000, + EF_CSKY_DSP = 0x4000, + EF_CSKY_ABIV2 = 0x20000000, + EF_CSKY_EFV1 = 0x1000000, + EF_CSKY_EFV2 = 0x2000000, + EF_CSKY_EFV3 = 0x3000000 +}; // ELF Relocation types for CSKY enum { #include "ELFRelocs/CSKY.def" }; +// ELF Relocation types for LoongArch +enum { +#include "ELFRelocs/LoongArch.def" +}; + #undef ELF_RELOC // Section header. @@ -947,12 +981,15 @@ enum : unsigned { SHT_LLVM_ADDRSIG = 0x6fff4c03, // List of address-significant symbols // for safe ICF. SHT_LLVM_DEPENDENT_LIBRARIES = - 0x6fff4c04, // LLVM Dependent Library Specifiers. - SHT_LLVM_SYMPART = 0x6fff4c05, // Symbol partition specification. - SHT_LLVM_PART_EHDR = 0x6fff4c06, // ELF header for loadable partition. - SHT_LLVM_PART_PHDR = 0x6fff4c07, // Phdrs for loadable partition. - SHT_LLVM_BB_ADDR_MAP = 0x6fff4c08, // LLVM Basic Block Address Map. + 0x6fff4c04, // LLVM Dependent Library Specifiers. + SHT_LLVM_SYMPART = 0x6fff4c05, // Symbol partition specification. + SHT_LLVM_PART_EHDR = 0x6fff4c06, // ELF header for loadable partition. + SHT_LLVM_PART_PHDR = 0x6fff4c07, // Phdrs for loadable partition. + SHT_LLVM_BB_ADDR_MAP_V0 = + 0x6fff4c08, // LLVM Basic Block Address Map (old version kept for + // backward-compatibility). SHT_LLVM_CALL_GRAPH_PROFILE = 0x6fff4c09, // LLVM Call Graph Profile. + SHT_LLVM_BB_ADDR_MAP = 0x6fff4c0a, // LLVM Basic Block Address Map. // Android's experimental support for SHT_RELR sections. // https://android.googlesource.com/platform/bionic/+/b7feec74547f84559a1467aca02708ff61346d2a/libc/include/elf.h#512 SHT_ANDROID_RELR = 0x6fffff00, // Relocation entries; only offsets. @@ -985,6 +1022,8 @@ enum : unsigned { SHT_RISCV_ATTRIBUTES = 0x70000003U, + SHT_CSKY_ATTRIBUTES = 0x70000001U, + SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type. SHT_LOUSER = 0x80000000, // Lowest type reserved for applications. SHT_HIUSER = 0xffffffff // Highest type reserved for applications. @@ -1036,6 +1075,9 @@ enum : unsigned { SHF_MASKOS = 0x0ff00000, + // Solaris equivalent of SHF_GNU_RETAIN. + SHF_SUNW_NODISCARD = 0x00100000, + // Bits indicating processor-specific flags. SHF_MASKPROC = 0xf0000000, @@ -1329,6 +1371,9 @@ enum { PT_MIPS_RTPROC = 0x70000001, // Runtime procedure table. PT_MIPS_OPTIONS = 0x70000002, // Options segment. PT_MIPS_ABIFLAGS = 0x70000003, // Abiflags segment. + + // RISCV program header types. + PT_RISCV_ATTRIBUTES = 0x70000003, }; // Segment flag bits. @@ -1531,6 +1576,31 @@ enum { NT_GNU_PROPERTY_TYPE_0 = 5, }; +// Android note types. +enum { + NT_ANDROID_TYPE_IDENT = 1, + NT_ANDROID_TYPE_KUSER = 3, + NT_ANDROID_TYPE_MEMTAG = 4, +}; + +// Memory tagging values used in NT_ANDROID_TYPE_MEMTAG notes. +enum { + // Enumeration to determine the tagging mode. In Android-land, 'SYNC' means + // running all threads in MTE Synchronous mode, and 'ASYNC' means to use the + // kernels auto-upgrade feature to allow for either MTE Asynchronous, + // Asymmetric, or Synchronous mode. This allows silicon vendors to specify, on + // a per-cpu basis what 'ASYNC' should mean. Generally, the expectation is + // "pick the most precise mode that's very fast". + NT_MEMTAG_LEVEL_NONE = 0, + NT_MEMTAG_LEVEL_ASYNC = 1, + NT_MEMTAG_LEVEL_SYNC = 2, + NT_MEMTAG_LEVEL_MASK = 3, + // Bits indicating whether the loader should prepare for MTE to be enabled on + // the heap and/or stack. + NT_MEMTAG_HEAP = 4, + NT_MEMTAG_STACK = 8, +}; + // Property types used in GNU_PROPERTY_TYPE_0 notes. enum : unsigned { GNU_PROPERTY_STACK_SIZE = 1, diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def new file mode 100644 index 000000000000..8cbfe2fe4235 --- /dev/null +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def @@ -0,0 +1,62 @@ +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +// These types and values are from the LoongArch ELF psABI which can be found at +// https://github.com/loongson/LoongArch-Documentation +// and these definitions has been adopted by binutils (include/elf/loongarch.h). +// The commit hash (main branch) we reference is: +// 9b3bd9f4a497115913c22f1a2a47863798fbc02a + +ELF_RELOC(R_LARCH_NONE, 0) +ELF_RELOC(R_LARCH_32, 1) +ELF_RELOC(R_LARCH_64, 2) +ELF_RELOC(R_LARCH_RELATIVE, 3) +ELF_RELOC(R_LARCH_COPY, 4) +ELF_RELOC(R_LARCH_JUMP_SLOT, 5) +ELF_RELOC(R_LARCH_TLS_DTPMOD32, 6) +ELF_RELOC(R_LARCH_TLS_DTPMOD64, 7) +ELF_RELOC(R_LARCH_TLS_DTPREL32, 8) +ELF_RELOC(R_LARCH_TLS_DTPREL64, 9) +ELF_RELOC(R_LARCH_TLS_TPREL32, 10) +ELF_RELOC(R_LARCH_TLS_TPREL64, 11) +ELF_RELOC(R_LARCH_IRELATIVE, 12) +ELF_RELOC(R_LARCH_MARK_LA, 20) +ELF_RELOC(R_LARCH_MARK_PCREL, 21) +ELF_RELOC(R_LARCH_SOP_PUSH_PCREL, 22) +ELF_RELOC(R_LARCH_SOP_PUSH_ABSOLUTE, 23) +ELF_RELOC(R_LARCH_SOP_PUSH_DUP, 24) +ELF_RELOC(R_LARCH_SOP_PUSH_GPREL, 25) +ELF_RELOC(R_LARCH_SOP_PUSH_TLS_TPREL, 26) +ELF_RELOC(R_LARCH_SOP_PUSH_TLS_GOT, 27) +ELF_RELOC(R_LARCH_SOP_PUSH_TLS_GD, 28) +ELF_RELOC(R_LARCH_SOP_PUSH_PLT_PCREL, 29) +ELF_RELOC(R_LARCH_SOP_ASSERT, 30) +ELF_RELOC(R_LARCH_SOP_NOT, 31) +ELF_RELOC(R_LARCH_SOP_SUB, 32) +ELF_RELOC(R_LARCH_SOP_SL, 33) +ELF_RELOC(R_LARCH_SOP_SR, 34) +ELF_RELOC(R_LARCH_SOP_ADD, 35) +ELF_RELOC(R_LARCH_SOP_AND, 36) +ELF_RELOC(R_LARCH_SOP_IF_ELSE, 37) +ELF_RELOC(R_LARCH_SOP_POP_32_S_10_5, 38) +ELF_RELOC(R_LARCH_SOP_POP_32_U_10_12, 39) +ELF_RELOC(R_LARCH_SOP_POP_32_S_10_12, 40) +ELF_RELOC(R_LARCH_SOP_POP_32_S_10_16, 41) +ELF_RELOC(R_LARCH_SOP_POP_32_S_10_16_S2, 42) +ELF_RELOC(R_LARCH_SOP_POP_32_S_5_20, 43) +ELF_RELOC(R_LARCH_SOP_POP_32_S_0_5_10_16_S2, 44) +ELF_RELOC(R_LARCH_SOP_POP_32_S_0_10_10_16_S2, 45) +ELF_RELOC(R_LARCH_SOP_POP_32_U, 46) +ELF_RELOC(R_LARCH_ADD8, 47) +ELF_RELOC(R_LARCH_ADD16, 48) +ELF_RELOC(R_LARCH_ADD24, 49) +ELF_RELOC(R_LARCH_ADD32, 50) +ELF_RELOC(R_LARCH_ADD64, 51) +ELF_RELOC(R_LARCH_SUB8, 52) +ELF_RELOC(R_LARCH_SUB16, 53) +ELF_RELOC(R_LARCH_SUB24, 54) +ELF_RELOC(R_LARCH_SUB32, 55) +ELF_RELOC(R_LARCH_SUB64, 56) +ELF_RELOC(R_LARCH_GNU_VTINHERIT, 57) +ELF_RELOC(R_LARCH_GNU_VTENTRY, 58) diff --git a/llvm/include/llvm/BinaryFormat/GOFF.h b/llvm/include/llvm/BinaryFormat/GOFF.h new file mode 100644 index 000000000000..96992414c6cc --- /dev/null +++ b/llvm/include/llvm/BinaryFormat/GOFF.h @@ -0,0 +1,33 @@ +//===-- llvm/BinaryFormat/GOFF.h - GOFF definitions --------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header contains common, non-processor-specific data structures and +// constants for the GOFF file format. +// +// GOFF specifics can be found in MVS Program Management: Advanced Facilities +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_GOFF_H +#define LLVM_BINARYFORMAT_GOFF_H + +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +namespace GOFF { + +// \brief Subsections of the primary C_CODE section in the object file. +enum SubsectionKind : uint8_t { + SK_PPA1 = 2, +}; + +} // end namespace GOFF + +} // end namespace llvm + +#endif // LLVM_BINARYFORMAT_GOFF_H diff --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h index ce3a5c46e0d1..c05e79333d38 100644 --- a/llvm/include/llvm/BinaryFormat/MachO.h +++ b/llvm/include/llvm/BinaryFormat/MachO.h @@ -255,7 +255,8 @@ enum BindType { enum BindSpecialDylib { BIND_SPECIAL_DYLIB_SELF = 0, BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE = -1, - BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2 + BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2, + BIND_SPECIAL_DYLIB_WEAK_LOOKUP = -3 }; enum { @@ -1001,6 +1002,27 @@ struct nlist_64 { uint64_t n_value; }; +/// Structs for dyld chained fixups. +/// dyld_chained_fixups_header is the data pointed to by LC_DYLD_CHAINED_FIXUPS +/// load command. +struct dyld_chained_fixups_header { + uint32_t fixups_version; ///< 0 + uint32_t starts_offset; ///< Offset of dyld_chained_starts_in_image. + uint32_t imports_offset; ///< Offset of imports table in chain_data. + uint32_t symbols_offset; ///< Offset of symbol strings in chain_data. + uint32_t imports_count; ///< Number of imported symbol names. + uint32_t imports_format; ///< DYLD_CHAINED_IMPORT* + uint32_t symbols_format; ///< 0 => uncompressed, 1 => zlib compressed +}; + +/// dyld_chained_starts_in_image is embedded in LC_DYLD_CHAINED_FIXUPS payload. +/// Each each seg_info_offset entry is the offset into this struct for that +/// segment followed by pool of dyld_chain_starts_in_segment data. +struct dyld_chained_starts_in_image { + uint32_t seg_count; + uint32_t seg_info_offset[1]; +}; + // Byte order swapping functions for MachO structs inline void swapStruct(fat_header &mh) { @@ -2008,6 +2030,16 @@ union alignas(4) macho_load_command { }; LLVM_PACKED_END +inline void swapStruct(dyld_chained_fixups_header &C) { + sys::swapByteOrder(C.fixups_version); + sys::swapByteOrder(C.starts_offset); + sys::swapByteOrder(C.imports_offset); + sys::swapByteOrder(C.symbols_offset); + sys::swapByteOrder(C.imports_count); + sys::swapByteOrder(C.imports_format); + sys::swapByteOrder(C.symbols_format); +} + /* code signing attributes of a process */ enum CodeSignAttrs { @@ -2205,6 +2237,17 @@ enum SecCSDigestAlgorithm { kSecCodeSignatureHashSHA512 = 5, /* SHA-512 */ }; +enum LinkerOptimizationHintKind { + LOH_ARM64_ADRP_ADRP = 1, + LOH_ARM64_ADRP_LDR = 2, + LOH_ARM64_ADRP_ADD_LDR = 3, + LOH_ARM64_ADRP_LDR_GOT_LDR = 4, + LOH_ARM64_ADRP_ADD_STR = 5, + LOH_ARM64_ADRP_LDR_GOT_STR = 6, + LOH_ARM64_ADRP_ADD = 7, + LOH_ARM64_ADRP_LDR_GOT = 8, +}; + } // end namespace MachO } // end namespace llvm diff --git a/llvm/include/llvm/BinaryFormat/Magic.h b/llvm/include/llvm/BinaryFormat/Magic.h index 6988b2dde656..c8e0dad42b0b 100644 --- a/llvm/include/llvm/BinaryFormat/Magic.h +++ b/llvm/include/llvm/BinaryFormat/Magic.h @@ -51,6 +51,9 @@ struct file_magic { wasm_object, ///< WebAssembly Object file pdb, ///< Windows PDB debug info file tapi_file, ///< Text-based Dynamic Library Stub file + cuda_fatbinary, ///< CUDA Fatbinary object file + offload_binary, ///< LLVM offload object file + dxcontainer_object, ///< DirectX container file }; bool is_object() const { return V != unknown; } diff --git a/llvm/include/llvm/BinaryFormat/Swift.def b/llvm/include/llvm/BinaryFormat/Swift.def index 6160e2551432..05b60e40632c 100644 --- a/llvm/include/llvm/BinaryFormat/Swift.def +++ b/llvm/include/llvm/BinaryFormat/Swift.def @@ -24,3 +24,10 @@ HANDLE_SWIFT_SECTION(builtin, "__swift5_builtin", "swift5_builtin", ".sw5bltn") HANDLE_SWIFT_SECTION(capture, "__swift5_capture", "swift5_capture", ".sw5cptr") HANDLE_SWIFT_SECTION(typeref, "__swift5_typeref", "swift5_typeref", ".sw5tyrf") HANDLE_SWIFT_SECTION(reflstr, "__swift5_reflstr", "swift5_reflstr", ".sw5rfst") +HANDLE_SWIFT_SECTION(conform, "__swift5_proto", "swift5_protocol_conformances", + ".sw5prtc$B") +HANDLE_SWIFT_SECTION(protocs, "__swift5_protos", "swift5_protocols", + ".sw5prt$B") +HANDLE_SWIFT_SECTION(acfuncs, "__swift5_acfuncs", "swift5_accessible_functions", + ".sw5acfn$B") +HANDLE_SWIFT_SECTION(mpenum, "__swift5_mpenum", "swift5_mpenum", ".sw5mpen$B") diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index 0bc8c4e167d8..62a6881ef36a 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -91,7 +91,7 @@ struct WasmTable { StringRef SymbolName; // from the "linking" section }; -struct WasmInitExpr { +struct WasmInitExprMVP { uint8_t Opcode; union { int32_t Int32; @@ -102,6 +102,13 @@ struct WasmInitExpr { } Value; }; +struct WasmInitExpr { + uint8_t Extended; // Set to non-zero if extended const is used (i.e. more than + // one instruction) + WasmInitExprMVP Inst; + ArrayRef Body; +}; + struct WasmGlobalType { uint8_t Type; bool Mutable; @@ -245,7 +252,8 @@ enum : unsigned { WASM_SEC_CODE = 10, // Function bodies (code) WASM_SEC_DATA = 11, // Data segments WASM_SEC_DATACOUNT = 12, // Data segment count - WASM_SEC_TAG = 13 // Tag declarations + WASM_SEC_TAG = 13, // Tag declarations + WASM_SEC_LAST_KNOWN = WASM_SEC_TAG, }; // Type immediate encodings used in various contexts. @@ -276,6 +284,7 @@ enum : unsigned { WASM_OPCODE_CALL = 0x10, WASM_OPCODE_LOCAL_GET = 0x20, WASM_OPCODE_LOCAL_SET = 0x21, + WASM_OPCODE_LOCAL_TEE = 0x22, WASM_OPCODE_GLOBAL_GET = 0x23, WASM_OPCODE_GLOBAL_SET = 0x24, WASM_OPCODE_I32_STORE = 0x36, @@ -285,7 +294,11 @@ enum : unsigned { WASM_OPCODE_F32_CONST = 0x43, WASM_OPCODE_F64_CONST = 0x44, WASM_OPCODE_I32_ADD = 0x6a, + WASM_OPCODE_I32_SUB = 0x6b, + WASM_OPCODE_I32_MUL = 0x6c, WASM_OPCODE_I64_ADD = 0x7c, + WASM_OPCODE_I64_SUB = 0x7d, + WASM_OPCODE_I64_MUL = 0x7e, WASM_OPCODE_REF_NULL = 0xd0, }; @@ -458,8 +471,9 @@ inline bool operator==(const WasmTableType &LHS, const WasmTableType &RHS) { return LHS.ElemType == RHS.ElemType && LHS.Limits == RHS.Limits; } -std::string toString(WasmSymbolType type); -std::string relocTypetoString(uint32_t type); +llvm::StringRef toString(WasmSymbolType type); +llvm::StringRef relocTypetoString(uint32_t type); +llvm::StringRef sectionTypeToString(uint32_t type); bool relocTypeHasAddend(uint32_t type); } // end namespace wasm diff --git a/llvm/include/llvm/BinaryFormat/XCOFF.h b/llvm/include/llvm/BinaryFormat/XCOFF.h index cffd8618f1e3..5d23ec5cd911 100644 --- a/llvm/include/llvm/BinaryFormat/XCOFF.h +++ b/llvm/include/llvm/BinaryFormat/XCOFF.h @@ -54,6 +54,34 @@ enum AuxHeaderFlags64 : uint16_t { ///< future use and should be set to 0. }; +enum XCOFFInterpret : uint16_t { + OLD_XCOFF_INTERPRET = 1, + NEW_XCOFF_INTERPRET = 2 +}; + +enum FileFlag : uint16_t { + F_RELFLG = 0x0001, ///< relocation info stripped from file + F_EXEC = 0x0002, ///< file is executable (i.e., it + ///< has a loader section) + F_LNNO = 0x0004, ///< line numbers stripped from file + F_LSYMS = 0x0008, ///< local symbols stripped from file + F_FDPR_PROF = 0x0010, ///< file was profiled with FDPR + F_FDPR_OPTI = 0x0020, ///< file was reordered with FDPR + F_DSA = 0x0040, ///< file uses Dynamic Segment Allocation (32-bit + ///< only) + F_DEP_1 = 0x0080, ///< Data Execution Protection bit 1 + F_VARPG = 0x0100, ///< executable requests using variable size pages + F_LPTEXT = 0x0400, ///< executable requires large pages for text + F_LPDATA = 0x0800, ///< executable requires large pages for data + F_DYNLOAD = 0x1000, ///< file is dynamically loadable and + ///< executable (equivalent to F_EXEC on AIX) + F_SHROBJ = 0x2000, ///< file is a shared object + F_LOADONLY = + 0x4000, ///< file can be loaded by the system loader, but it is + ///< ignored by the linker if it is a member of an archive. + F_DEP_2 = 0x8000 ///< Data Execution Protection bit 2 +}; + // x_smclas field of x_csect from system header: /usr/include/syms.h /// Storage Mapping Class definitions. enum StorageMappingClass : uint8_t { @@ -212,6 +240,8 @@ enum VisibilityType : uint16_t { SYM_V_EXPORTED = 0x4000 }; +constexpr uint16_t VISIBILITY_MASK = 0x7000; + // Relocation types, defined in `/usr/include/reloc.h`. enum RelocationType : uint8_t { R_POS = 0x00, ///< Positive relocation. Provides the address of the referenced diff --git a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h index f6fc284da33f..102e2257abcc 100644 --- a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h +++ b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h @@ -18,12 +18,13 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Bitstream/BitstreamReader.h" #include "llvm/Support/Error.h" -#include "llvm/Support/raw_ostream.h" #include #include namespace llvm { +class raw_ostream; + /// CurStreamTypeType - A type for CurStreamType enum CurStreamTypeType { UnknownBitstream, diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h index a82791c8720b..39ea48c33fc3 100644 --- a/llvm/include/llvm/Bitcode/BitcodeReader.h +++ b/llvm/include/llvm/Bitcode/BitcodeReader.h @@ -15,12 +15,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Bitstream/BitCodes.h" -#include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/Bitstream/BitCodeEnums.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" #include #include #include @@ -30,6 +29,8 @@ namespace llvm { class LLVMContext; class Module; +class MemoryBuffer; +class ModuleSummaryIndex; typedef llvm::function_ref(StringRef)> DataLayoutCallbackTy; diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h index 96f25fce8ddb..248d33f4502e 100644 --- a/llvm/include/llvm/Bitcode/BitcodeWriter.h +++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h @@ -17,7 +17,7 @@ #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" #include #include #include diff --git a/llvm/include/llvm/Bitcode/BitcodeWriterPass.h b/llvm/include/llvm/Bitcode/BitcodeWriterPass.h index dda5b20973c1..3c2471237532 100644 --- a/llvm/include/llvm/Bitcode/BitcodeWriterPass.h +++ b/llvm/include/llvm/Bitcode/BitcodeWriterPass.h @@ -14,7 +14,6 @@ #ifndef LLVM_BITCODE_BITCODEWRITERPASS_H #define LLVM_BITCODE_BITCODEWRITERPASS_H -#include "llvm/ADT/StringRef.h" #include "llvm/IR/PassManager.h" namespace llvm { diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 6d0f51ce9c6d..5d96204ba42a 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -17,7 +17,10 @@ #ifndef LLVM_BITCODE_LLVMBITCODES_H #define LLVM_BITCODE_LLVMBITCODES_H -#include "llvm/Bitstream/BitCodes.h" +// This is the only file included, and it, in turn, is a leaf header. +// This allows external tools to dump the AST of this file and analyze it for +// changes without needing to fully or partially build LLVM itself. +#include "llvm/Bitstream/BitCodeEnums.h" namespace llvm { namespace bitc { @@ -582,14 +585,15 @@ enum FunctionCodes { 52, // CATCHSWITCH: [num,args...] or [num,args...,bb] // 53 is unused. // 54 is unused. - FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...] - FUNC_CODE_INST_UNOP = 56, // UNOP: [opcode, ty, opval] - FUNC_CODE_INST_CALLBR = 57, // CALLBR: [attr, cc, norm, transfs, - // fnty, fnid, args...] - FUNC_CODE_INST_FREEZE = 58, // FREEZE: [opty, opval] - FUNC_CODE_INST_ATOMICRMW = 59, // ATOMICRMW: [ptrty, ptr, valty, val, - // operation, align, vol, - // ordering, synchscope] + FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...] + FUNC_CODE_INST_UNOP = 56, // UNOP: [opcode, ty, opval] + FUNC_CODE_INST_CALLBR = 57, // CALLBR: [attr, cc, norm, transfs, + // fnty, fnid, args...] + FUNC_CODE_INST_FREEZE = 58, // FREEZE: [opty, opval] + FUNC_CODE_INST_ATOMICRMW = 59, // ATOMICRMW: [ptrty, ptr, valty, val, + // operation, align, vol, + // ordering, synchscope] + FUNC_CODE_BLOCKADDR_USERS = 60, // BLOCKADDR_USERS: [value...] }; enum UseListCodes { @@ -677,6 +681,11 @@ enum AttributeKindCodes { ATTR_KIND_NO_SANITIZE_COVERAGE = 76, ATTR_KIND_ELEMENTTYPE = 77, ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION = 78, + ATTR_KIND_NO_SANITIZE_BOUNDS = 79, + ATTR_KIND_ALLOC_ALIGN = 80, + ATTR_KIND_ALLOCATED_POINTER = 81, + ATTR_KIND_ALLOC_KIND = 82, + ATTR_KIND_PRESPLIT_COROUTINE = 83, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/Bitstream/BitCodeEnums.h b/llvm/include/llvm/Bitstream/BitCodeEnums.h new file mode 100644 index 000000000000..4288bd3987ae --- /dev/null +++ b/llvm/include/llvm/Bitstream/BitCodeEnums.h @@ -0,0 +1,90 @@ +//===- BitCodeEnums.h - Core enums for the bitstream format -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header defines "core" bitstream enum values. +// It has been separated from the other header that defines bitstream enum +// values, BitCodes.h, to allow tools to track changes to the various +// bitstream and bitcode enums without needing to fully or partially build +// LLVM itself. +// +// The enum values defined in this file should be considered permanent. If +// new features are added, they should have values added at the end of the +// respective lists. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITSTREAM_BITCODEENUMS_H +#define LLVM_BITSTREAM_BITCODEENUMS_H + +namespace llvm { +/// Offsets of the 32-bit fields of bitstream wrapper header. +enum BitstreamWrapperHeader : unsigned { + BWH_MagicField = 0 * 4, + BWH_VersionField = 1 * 4, + BWH_OffsetField = 2 * 4, + BWH_SizeField = 3 * 4, + BWH_CPUTypeField = 4 * 4, + BWH_HeaderSize = 5 * 4 +}; + +namespace bitc { +enum StandardWidths { + BlockIDWidth = 8, // We use VBR-8 for block IDs. + CodeLenWidth = 4, // Codelen are VBR-4. + BlockSizeWidth = 32 // BlockSize up to 2^32 32-bit words = 16GB per block. +}; + +// The standard abbrev namespace always has a way to exit a block, enter a +// nested block, define abbrevs, and define an unabbreviated record. +enum FixedAbbrevIDs { + END_BLOCK = 0, // Must be zero to guarantee termination for broken bitcode. + ENTER_SUBBLOCK = 1, + + /// DEFINE_ABBREV - Defines an abbrev for the current block. It consists + /// of a vbr5 for # operand infos. Each operand info is emitted with a + /// single bit to indicate if it is a literal encoding. If so, the value is + /// emitted with a vbr8. If not, the encoding is emitted as 3 bits followed + /// by the info value as a vbr5 if needed. + DEFINE_ABBREV = 2, + + // UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by + // a vbr6 for the # operands, followed by vbr6's for each operand. + UNABBREV_RECORD = 3, + + // This is not a code, this is a marker for the first abbrev assignment. + FIRST_APPLICATION_ABBREV = 4 +}; + +/// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO +/// block, which contains metadata about other blocks in the file. +enum StandardBlockIDs { + /// BLOCKINFO_BLOCK is used to define metadata about blocks, for example, + /// standard abbrevs that should be available to all blocks of a specified + /// ID. + BLOCKINFO_BLOCK_ID = 0, + + // Block IDs 1-7 are reserved for future expansion. + FIRST_APPLICATION_BLOCKID = 8 +}; + +/// BlockInfoCodes - The blockinfo block contains metadata about user-defined +/// blocks. +enum BlockInfoCodes { + // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd + // block, instead of the BlockInfo block. + + BLOCKINFO_CODE_SETBID = 1, // SETBID: [blockid#] + BLOCKINFO_CODE_BLOCKNAME = 2, // BLOCKNAME: [name] + BLOCKINFO_CODE_SETRECORDNAME = 3 // BLOCKINFO_CODE_SETRECORDNAME: + // [id, name] +}; + +} // namespace bitc +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Bitstream/BitCodes.h b/llvm/include/llvm/Bitstream/BitCodes.h index 9cd4e535a470..93888f7d3b33 100644 --- a/llvm/include/llvm/Bitstream/BitCodes.h +++ b/llvm/include/llvm/Bitstream/BitCodes.h @@ -19,75 +19,12 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Bitstream/BitCodeEnums.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" #include namespace llvm { -/// Offsets of the 32-bit fields of bitstream wrapper header. -enum BitstreamWrapperHeader : unsigned { - BWH_MagicField = 0 * 4, - BWH_VersionField = 1 * 4, - BWH_OffsetField = 2 * 4, - BWH_SizeField = 3 * 4, - BWH_CPUTypeField = 4 * 4, - BWH_HeaderSize = 5 * 4 -}; - -namespace bitc { - enum StandardWidths { - BlockIDWidth = 8, // We use VBR-8 for block IDs. - CodeLenWidth = 4, // Codelen are VBR-4. - BlockSizeWidth = 32 // BlockSize up to 2^32 32-bit words = 16GB per block. - }; - - // The standard abbrev namespace always has a way to exit a block, enter a - // nested block, define abbrevs, and define an unabbreviated record. - enum FixedAbbrevIDs { - END_BLOCK = 0, // Must be zero to guarantee termination for broken bitcode. - ENTER_SUBBLOCK = 1, - - /// DEFINE_ABBREV - Defines an abbrev for the current block. It consists - /// of a vbr5 for # operand infos. Each operand info is emitted with a - /// single bit to indicate if it is a literal encoding. If so, the value is - /// emitted with a vbr8. If not, the encoding is emitted as 3 bits followed - /// by the info value as a vbr5 if needed. - DEFINE_ABBREV = 2, - - // UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by - // a vbr6 for the # operands, followed by vbr6's for each operand. - UNABBREV_RECORD = 3, - - // This is not a code, this is a marker for the first abbrev assignment. - FIRST_APPLICATION_ABBREV = 4 - }; - - /// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO - /// block, which contains metadata about other blocks in the file. - enum StandardBlockIDs { - /// BLOCKINFO_BLOCK is used to define metadata about blocks, for example, - /// standard abbrevs that should be available to all blocks of a specified - /// ID. - BLOCKINFO_BLOCK_ID = 0, - - // Block IDs 1-7 are reserved for future expansion. - FIRST_APPLICATION_BLOCKID = 8 - }; - - /// BlockInfoCodes - The blockinfo block contains metadata about user-defined - /// blocks. - enum BlockInfoCodes { - // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd - // block, instead of the BlockInfo block. - - BLOCKINFO_CODE_SETBID = 1, // SETBID: [blockid#] - BLOCKINFO_CODE_BLOCKNAME = 2, // BLOCKNAME: [name] - BLOCKINFO_CODE_SETRECORDNAME = 3 // BLOCKINFO_CODE_SETRECORDNAME: - // [id, name] - }; - -} // End bitc namespace - /// BitCodeAbbrevOp - This describes one or more operands in an abbreviation. /// This is actually a union of two different things: /// 1. It could be a literal integer value ("the operand is always 17"). @@ -106,6 +43,10 @@ public: Blob = 5 // 32-bit aligned array of 8-bit characters. }; + static bool isValidEncoding(uint64_t E) { + return E >= 1 && E <= 5; + } + explicit BitCodeAbbrevOp(uint64_t V) : Val(V), IsLiteral(true) {} explicit BitCodeAbbrevOp(Encoding E, uint64_t Data = 0) : Val(Data), IsLiteral(false), Enc(E) {} @@ -179,6 +120,6 @@ public: OperandList.push_back(OpInfo); } }; -} // End llvm namespace +} // namespace llvm #endif diff --git a/llvm/include/llvm/Bitstream/BitstreamReader.h b/llvm/include/llvm/Bitstream/BitstreamReader.h index 37b7c4d73cff..10a0a4e0039e 100644 --- a/llvm/include/llvm/Bitstream/BitstreamReader.h +++ b/llvm/include/llvm/Bitstream/BitstreamReader.h @@ -19,7 +19,6 @@ #include "llvm/Bitstream/BitCodes.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBufferRef.h" #include #include @@ -97,8 +96,6 @@ private: unsigned BitsInCurWord = 0; public: - static const constexpr size_t MaxChunkSize = sizeof(word_t) * 8; - SimpleBitstreamCursor() = default; explicit SimpleBitstreamCursor(ArrayRef BitcodeBytes) : BitcodeBytes(BitcodeBytes) {} @@ -187,7 +184,7 @@ public: } Expected Read(unsigned NumBits) { - static const unsigned BitsInWord = MaxChunkSize; + static const unsigned BitsInWord = sizeof(word_t) * 8; assert(NumBits && NumBits <= BitsInWord && "Cannot return zero or more than BitsInWord bits!"); @@ -229,24 +226,32 @@ public: return R; } - Expected ReadVBR(unsigned NumBits) { + Expected ReadVBR(const unsigned NumBits) { Expected MaybeRead = Read(NumBits); if (!MaybeRead) return MaybeRead; uint32_t Piece = MaybeRead.get(); - if ((Piece & (1U << (NumBits-1))) == 0) + assert(NumBits <= 32 && NumBits >= 1 && "Invalid NumBits value"); + const uint32_t MaskBitOrder = (NumBits - 1); + const uint32_t Mask = 1UL << MaskBitOrder; + + if ((Piece & Mask) == 0) return Piece; uint32_t Result = 0; unsigned NextBit = 0; while (true) { - Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; + Result |= (Piece & (Mask - 1)) << NextBit; - if ((Piece & (1U << (NumBits-1))) == 0) + if ((Piece & Mask) == 0) return Result; NextBit += NumBits-1; + if (NextBit >= 32) + return createStringError(std::errc::illegal_byte_sequence, + "Unterminated VBR"); + MaybeRead = Read(NumBits); if (!MaybeRead) return MaybeRead; @@ -256,24 +261,31 @@ public: // Read a VBR that may have a value up to 64-bits in size. The chunk size of // the VBR must still be <= 32 bits though. - Expected ReadVBR64(unsigned NumBits) { + Expected ReadVBR64(const unsigned NumBits) { Expected MaybeRead = Read(NumBits); if (!MaybeRead) return MaybeRead; uint32_t Piece = MaybeRead.get(); + assert(NumBits <= 32 && NumBits >= 1 && "Invalid NumBits value"); + const uint32_t MaskBitOrder = (NumBits - 1); + const uint32_t Mask = 1UL << MaskBitOrder; - if ((Piece & (1U << (NumBits-1))) == 0) + if ((Piece & Mask) == 0) return uint64_t(Piece); uint64_t Result = 0; unsigned NextBit = 0; while (true) { - Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit; + Result |= uint64_t(Piece & (Mask - 1)) << NextBit; - if ((Piece & (1U << (NumBits-1))) == 0) + if ((Piece & Mask) == 0) return Result; NextBit += NumBits-1; + if (NextBit >= 64) + return createStringError(std::errc::illegal_byte_sequence, + "Unterminated VBR"); + MaybeRead = Read(NumBits); if (!MaybeRead) return MaybeRead; @@ -299,6 +311,13 @@ public: /// Skip to the end of the file. void skipToEnd() { NextChar = BitcodeBytes.size(); } + + /// Check whether a reservation of Size elements is plausible. + bool isSizePlausible(size_t Size) const { + // Don't allow reserving more elements than the number of bits, assuming + // at least one bit is needed to encode an element. + return Size < BitcodeBytes.size() * 8; + } }; /// When advancing through a bitstream cursor, each advance can discover a few @@ -357,7 +376,7 @@ class BitstreamCursor : SimpleBitstreamCursor { BitstreamBlockInfo *BlockInfo = nullptr; public: - static const size_t MaxChunkSize = sizeof(word_t) * 8; + static const size_t MaxChunkSize = 32; BitstreamCursor() = default; explicit BitstreamCursor(ArrayRef BitcodeBytes) @@ -521,10 +540,11 @@ private: public: /// Return the abbreviation for the specified AbbrevId. - const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) { + Expected getAbbrev(unsigned AbbrevID) { unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV; if (AbbrevNo >= CurAbbrevs.size()) - report_fatal_error("Invalid abbrev number"); + return createStringError( + std::errc::illegal_byte_sequence, "Invalid abbrev number"); return CurAbbrevs[AbbrevNo].get(); } diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h index 21b260b7b9f3..be6bab5532bd 100644 --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -74,16 +74,10 @@ class BitstreamWriter { }; std::vector BlockInfoRecords; - void WriteByte(unsigned char Value) { - Out.push_back(Value); - FlushToFile(); - } - void WriteWord(unsigned Value) { Value = support::endian::byte_swap(Value); Out.append(reinterpret_cast(&Value), reinterpret_cast(&Value + 1)); - FlushToFile(); } uint64_t GetNumOfFlushedBytes() const { return FS ? FS->tell() : 0; } @@ -114,7 +108,7 @@ public: /// null, \p O does not flush incrementially, but writes to disk at the end. /// /// \p FlushThreshold is the threshold (unit M) to flush \p O if \p FS is - /// valid. + /// valid. Flushing only occurs at (sub)block boundaries. BitstreamWriter(SmallVectorImpl &O, raw_fd_stream *FS = nullptr, uint32_t FlushThreshold = 512) : Out(O), FS(FS), FlushThreshold(FlushThreshold << 20), CurBit(0), @@ -249,8 +243,8 @@ public: // Emit the bits with VBR encoding, NumBits-1 bits at a time. while (Val >= Threshold) { - Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) | - (1 << (NumBits-1)), NumBits); + Emit(((uint32_t)Val & ((1 << (NumBits - 1)) - 1)) | (1 << (NumBits - 1)), + NumBits); Val >>= NumBits-1; } @@ -327,6 +321,7 @@ public: CurCodeSize = B.PrevCodeSize; CurAbbrevs = std::move(B.PrevAbbrevs); BlockScope.pop_back(); + FlushToFile(); } //===--------------------------------------------------------------------===// @@ -472,14 +467,12 @@ public: FlushToWord(); // Emit literal bytes. - for (const auto &B : Bytes) { - assert(isUInt<8>(B) && "Value too large to emit as byte"); - WriteByte((unsigned char)B); - } + assert(llvm::all_of(Bytes, [](UIntTy B) { return isUInt<8>(B); })); + Out.append(Bytes.begin(), Bytes.end()); // Align end to 32-bits. while (GetBufferOffset() & 3) - WriteByte(0); + Out.push_back(0); } void emitBlob(StringRef Bytes, bool ShouldEmitSize = true) { emitBlob(makeArrayRef((const uint8_t *)Bytes.data(), Bytes.size()), diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h index 1190d6061e45..c0e976317aef 100644 --- a/llvm/include/llvm/CodeGen/AccelTable.h +++ b/llvm/include/llvm/CodeGen/AccelTable.h @@ -14,19 +14,15 @@ #define LLVM_CODEGEN_ACCELTABLE_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/DJB.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include #include #include @@ -108,6 +104,8 @@ namespace llvm { class AsmPrinter; class DwarfCompileUnit; class DwarfDebug; +class MCSymbol; +class raw_ostream; /// Interface which the different types of accelerator table data have to /// conform. It serves as a base class for different values of the template diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h index 60442326d6c7..1a09820f80ef 100644 --- a/llvm/include/llvm/CodeGen/Analysis.h +++ b/llvm/include/llvm/CodeGen/Analysis.h @@ -15,14 +15,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" -#include "llvm/Support/CodeGen.h" namespace llvm { +template class SmallVectorImpl; class GlobalValue; class LLT; class MachineBasicBlock; diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index d911bfd435ae..fb4627c029b0 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -22,9 +22,7 @@ #include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/IR/InlineAsm.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/SourceMgr.h" #include #include #include @@ -32,6 +30,7 @@ namespace llvm { +class AddrLabelMap; class BasicBlock; class BlockAddress; class Constant; @@ -176,6 +175,10 @@ private: // function. This is used to calculate the size of the BB section. MCSymbol *CurrentSectionBeginSym = nullptr; + /// This map keeps track of which symbol is being used for the specified basic + /// block's address of label. + std::unique_ptr AddrLabelSymbols; + // The garbage collection metadata printer table. void *GCMetadataPrinters = nullptr; // Really a DenseMap. @@ -212,6 +215,16 @@ private: /// CFISection type the module needs i.e. either .eh_frame or .debug_frame. CFISection ModuleCFISection = CFISection::None; + /// True if the module contains split-stack functions. This is used to + /// emit .note.GNU-split-stack section as required by the linker for + /// special handling split-stack function calling no-split-stack function. + bool HasSplitStack = false; + + /// True if the module contains no-split-stack functions. This is used to emit + /// .note.GNU-no-split-stack section when it also contains functions without a + /// split stack prologue. + bool HasNoSplitStack = false; + protected: explicit AsmPrinter(TargetMachine &TM, std::unique_ptr Streamer); @@ -254,6 +267,25 @@ public: // given basic block. MCSymbol *getMBBExceptionSym(const MachineBasicBlock &MBB); + /// Return the symbol to be used for the specified basic block when its + /// address is taken. This cannot be its normal LBB label because the block + /// may be accessed outside its containing function. + MCSymbol *getAddrLabelSymbol(const BasicBlock *BB) { + return getAddrLabelSymbolToEmit(BB).front(); + } + + /// Return the symbol to be used for the specified basic block when its + /// address is taken. If other blocks were RAUW'd to this one, we may have + /// to emit them as well, return the whole set. + ArrayRef getAddrLabelSymbolToEmit(const BasicBlock *BB); + + /// If the specified function has had any references to address-taken blocks + /// generated, but the block got deleted, return the symbol now so we can + /// emit it. This prevents emitting a reference to a symbol that has no + /// definition. + void takeDeletedSymbolsForFunction(const Function *F, + std::vector &Result); + /// Return information about object file lowering. const TargetLoweringObjectFile &getObjFileLowering() const; diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h new file mode 100644 index 000000000000..7ae1304cced9 --- /dev/null +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -0,0 +1,109 @@ +//===-- BasicBlockSectionsProfileReader.h - BB sections profile reader pass ==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass creates the basic block cluster info by reading the basic block +// sections profile. The cluster info will be used by the basic-block-sections +// pass to arrange basic blocks in their sections. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H +#define LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; + +namespace llvm { + +// The cluster information for a machine basic block. +struct BBClusterInfo { + // MachineBasicBlock ID. + unsigned MBBNumber; + // Cluster ID this basic block belongs to. + unsigned ClusterID; + // Position of basic block within the cluster. + unsigned PositionInCluster; +}; + +using ProgramBBClusterInfoMapTy = StringMap>; + +class BasicBlockSectionsProfileReader : public ImmutablePass { +public: + static char ID; + + BasicBlockSectionsProfileReader(const MemoryBuffer *Buf) + : ImmutablePass(ID), MBuf(Buf) { + initializeBasicBlockSectionsProfileReaderPass( + *PassRegistry::getPassRegistry()); + }; + + BasicBlockSectionsProfileReader() : ImmutablePass(ID) { + initializeBasicBlockSectionsProfileReaderPass( + *PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Basic Block Sections Profile Reader"; + } + + // Returns true if basic block sections profile exist for function \p + // FuncName. + bool isFunctionHot(StringRef FuncName) const; + + // Returns a pair with first element representing whether basic block sections + // profile exist for the function \p FuncName, and the second element + // representing the basic block sections profile (cluster info) for this + // function. If the first element is true and the second element is empty, it + // means unique basic block sections are desired for all basic blocks of the + // function. + std::pair> + getBBClusterInfoForFunction(StringRef FuncName) const; + + /// Read profiles of basic blocks if available here. + void initializePass() override; + +private: + StringRef getAliasName(StringRef FuncName) const { + auto R = FuncAliasMap.find(FuncName); + return R == FuncAliasMap.end() ? FuncName : R->second; + } + + // This contains the basic-block-sections profile. + const MemoryBuffer *MBuf = nullptr; + + // This encapsulates the BB cluster information for the whole program. + // + // For every function name, it contains the cluster information for (all or + // some of) its basic blocks. The cluster information for every basic block + // includes its cluster ID along with the position of the basic block in that + // cluster. + ProgramBBClusterInfoMapTy ProgramBBClusterInfo; + + // Some functions have alias names. We use this map to find the main alias + // name for which we have mapping in ProgramBBClusterInfo. + StringMap FuncAliasMap; +}; + +// Creates a BasicBlockSectionsProfileReader pass to parse the basic block +// sections profile. \p Buf is a memory buffer that contains the list of +// functions and basic block ids to selectively enable basic block sections. +ImmutablePass * +createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf); + +} // namespace llvm +#endif // LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 0b2737628923..46be8e030406 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -195,6 +195,10 @@ private: bool VariableMask, bool IsGatherScatter, TTI::TargetCostKind CostKind) { + // We cannot scalarize scalable vectors, so return Invalid. + if (isa(DataTy)) + return InstructionCost::getInvalid(); + auto *VT = cast(DataTy); // Assume the target does not have support for gather/scatter operations // and provide a rough estimate. @@ -312,6 +316,26 @@ public: return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); } + unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, + Type *ScalarValTy) const { + auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) { + auto *SrcTy = FixedVectorType::get(ScalarMemTy, VF / 2); + EVT VT = getTLI()->getValueType(DL, SrcTy); + if (getTLI()->isOperationLegal(ISD::STORE, VT) || + getTLI()->isOperationCustom(ISD::STORE, VT)) + return true; + + EVT ValVT = + getTLI()->getValueType(DL, FixedVectorType::get(ScalarValTy, VF / 2)); + EVT LegalizedVT = + getTLI()->getTypeToTransformTo(ScalarMemTy->getContext(), VT); + return getTLI()->isTruncStoreLegal(LegalizedVT, ValVT); + }; + while (VF > 2 && IsSupportedByTarget(VF)) + VF /= 2; + return VF; + } + bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const { EVT VT = getTLI()->getValueType(DL, Ty); @@ -362,10 +386,9 @@ public: return getTLI()->isTypeLegal(VT); } - InstructionCost getRegUsageForType(Type *Ty) { - InstructionCost Val = getTLI()->getTypeLegalizationCost(DL, Ty).first; - assert(Val >= 0 && "Negative cost!"); - return Val; + unsigned getRegUsageForType(Type *Ty) { + EVT ETy = getTLI()->getValueType(DL, Ty); + return getTLI()->getNumRegisters(Ty->getContext(), ETy); } InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, @@ -680,6 +703,8 @@ public: bool Insert, bool Extract) { /// FIXME: a bitfield is not a reasonable abstraction for talking about /// which elements are needed from a scalable vector + if (isa(InTy)) + return InstructionCost::getInvalid(); auto *Ty = cast(InTy); assert(DemandedElts.getBitWidth() == Ty->getNumElements() && @@ -702,6 +727,8 @@ public: /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead. InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert, bool Extract) { + if (isa(InTy)) + return InstructionCost::getInvalid(); auto *Ty = cast(InTy); APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements()); @@ -871,7 +898,8 @@ public: InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, - VectorType *SubTp) { + VectorType *SubTp, + ArrayRef Args = None) { switch (improveShuffleKindFromMask(Kind, Mask)) { case TTI::SK_Broadcast: @@ -1100,6 +1128,9 @@ public: // TODO: If one of the types get legalized by splitting, handle this // similarly to what getCastInstrCost() does. if (auto *ValVTy = dyn_cast(ValTy)) { + if (isa(ValTy)) + return InstructionCost::getInvalid(); + unsigned Num = cast(ValVTy)->getNumElements(); if (CondTy) CondTy = CondTy->getScalarType(); @@ -1172,11 +1203,12 @@ public: if (CostKind != TTI::TCK_RecipThroughput) return Cost; + const DataLayout &DL = this->getDataLayout(); if (Src->isVectorTy() && // In practice it's not currently possible to have a change in lane // length for extending loads or truncating stores so both types should // have the same scalable property. - TypeSize::isKnownLT(Src->getPrimitiveSizeInBits(), + TypeSize::isKnownLT(DL.getTypeStoreSizeInBits(Src), LT.second.getSizeInBits())) { // This is a vector load that legalizes to a larger type than the vector // itself. Unless the corresponding extending load or truncating store is @@ -1220,6 +1252,11 @@ public: unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false) { + + // We cannot scalarize scalable vectors, so return Invalid. + if (isa(VecTy)) + return InstructionCost::getInvalid(); + auto *VT = cast(VecTy); unsigned NumElts = VT->getNumElements(); @@ -1274,8 +1311,7 @@ public: // Scale the cost of the load by the fraction of legal instructions that // will be used. - Cost = divideCeil(UsedInsts.count() * Cost.getValue().getValue(), - NumLegalInsts); + Cost = divideCeil(UsedInsts.count() * *Cost.getValue(), NumLegalInsts); } // Then plus the cost of interleave operation. @@ -1382,6 +1418,26 @@ public: default: break; + case Intrinsic::powi: + if (auto *RHSC = dyn_cast(Args[1])) { + bool ShouldOptForSize = I->getParent()->getParent()->hasOptSize(); + if (getTLI()->isBeneficialToExpandPowI(RHSC->getSExtValue(), + ShouldOptForSize)) { + // The cost is modeled on the expansion performed by ExpandPowI in + // SelectionDAGBuilder. + APInt Exponent = RHSC->getValue().abs(); + unsigned ActiveBits = Exponent.getActiveBits(); + unsigned PopCount = Exponent.countPopulation(); + InstructionCost Cost = (ActiveBits + PopCount - 2) * + thisT()->getArithmeticInstrCost( + Instruction::FMul, RetTy, CostKind); + if (RHSC->getSExtValue() < 0) + Cost += thisT()->getArithmeticInstrCost(Instruction::FDiv, RetTy, + CostKind); + return Cost; + } + } + break; case Intrinsic::cttz: // FIXME: If necessary, this should go in target-specific overrides. if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz()) @@ -1418,7 +1474,7 @@ public: // The cost of materialising a constant integer vector. return TargetTransformInfo::TCC_Basic; } - case Intrinsic::experimental_vector_extract: { + case Intrinsic::vector_extract: { // FIXME: Handle case where a scalable vector is extracted from a scalable // vector if (isa(RetTy)) @@ -1428,7 +1484,7 @@ public: cast(Args[0]->getType()), None, Index, cast(RetTy)); } - case Intrinsic::experimental_vector_insert: { + case Intrinsic::vector_insert: { // FIXME: Handle case where a scalable vector is inserted into a scalable // vector if (isa(Args[1]->getType())) @@ -1471,8 +1527,6 @@ public: } case Intrinsic::fshl: case Intrinsic::fshr: { - if (isa(RetTy)) - return BaseT::getIntrinsicInstrCost(ICA, CostKind); const Value *X = Args[0]; const Value *Y = Args[1]; const Value *Z = Args[2]; @@ -1512,6 +1566,29 @@ public: } return Cost; } + case Intrinsic::get_active_lane_mask: { + EVT ResVT = getTLI()->getValueType(DL, RetTy, true); + EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true); + + // If we're not expanding the intrinsic then we assume this is cheap + // to implement. + if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) { + std::pair LT = + getTLI()->getTypeLegalizationCost(DL, RetTy); + return LT.first; + } + + // Create the expanded types that will be used to calculate the uadd_sat + // operation. + Type *ExpRetTy = VectorType::get( + ICA.getArgTypes()[0], cast(RetTy)->getElementCount()); + IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF); + InstructionCost Cost = + thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy, + CmpInst::ICMP_ULT, CostKind); + return Cost; + } } // Assume that we need to scalarize this intrinsic. @@ -1560,7 +1637,7 @@ public: // Library call cost - other than size, make it expensive. unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10; - SmallVector ISDs; + unsigned ISD = 0; switch (IID) { default: { // Scalable vectors cannot be scalarized, so return Invalid. @@ -1605,82 +1682,82 @@ public: // Look for intrinsics that can be lowered directly or turned into a scalar // intrinsic call. case Intrinsic::sqrt: - ISDs.push_back(ISD::FSQRT); + ISD = ISD::FSQRT; break; case Intrinsic::sin: - ISDs.push_back(ISD::FSIN); + ISD = ISD::FSIN; break; case Intrinsic::cos: - ISDs.push_back(ISD::FCOS); + ISD = ISD::FCOS; break; case Intrinsic::exp: - ISDs.push_back(ISD::FEXP); + ISD = ISD::FEXP; break; case Intrinsic::exp2: - ISDs.push_back(ISD::FEXP2); + ISD = ISD::FEXP2; break; case Intrinsic::log: - ISDs.push_back(ISD::FLOG); + ISD = ISD::FLOG; break; case Intrinsic::log10: - ISDs.push_back(ISD::FLOG10); + ISD = ISD::FLOG10; break; case Intrinsic::log2: - ISDs.push_back(ISD::FLOG2); + ISD = ISD::FLOG2; break; case Intrinsic::fabs: - ISDs.push_back(ISD::FABS); + ISD = ISD::FABS; break; case Intrinsic::canonicalize: - ISDs.push_back(ISD::FCANONICALIZE); + ISD = ISD::FCANONICALIZE; break; case Intrinsic::minnum: - ISDs.push_back(ISD::FMINNUM); + ISD = ISD::FMINNUM; break; case Intrinsic::maxnum: - ISDs.push_back(ISD::FMAXNUM); + ISD = ISD::FMAXNUM; break; case Intrinsic::minimum: - ISDs.push_back(ISD::FMINIMUM); + ISD = ISD::FMINIMUM; break; case Intrinsic::maximum: - ISDs.push_back(ISD::FMAXIMUM); + ISD = ISD::FMAXIMUM; break; case Intrinsic::copysign: - ISDs.push_back(ISD::FCOPYSIGN); + ISD = ISD::FCOPYSIGN; break; case Intrinsic::floor: - ISDs.push_back(ISD::FFLOOR); + ISD = ISD::FFLOOR; break; case Intrinsic::ceil: - ISDs.push_back(ISD::FCEIL); + ISD = ISD::FCEIL; break; case Intrinsic::trunc: - ISDs.push_back(ISD::FTRUNC); + ISD = ISD::FTRUNC; break; case Intrinsic::nearbyint: - ISDs.push_back(ISD::FNEARBYINT); + ISD = ISD::FNEARBYINT; break; case Intrinsic::rint: - ISDs.push_back(ISD::FRINT); + ISD = ISD::FRINT; break; case Intrinsic::round: - ISDs.push_back(ISD::FROUND); + ISD = ISD::FROUND; break; case Intrinsic::roundeven: - ISDs.push_back(ISD::FROUNDEVEN); + ISD = ISD::FROUNDEVEN; break; case Intrinsic::pow: - ISDs.push_back(ISD::FPOW); + ISD = ISD::FPOW; break; case Intrinsic::fma: - ISDs.push_back(ISD::FMA); + ISD = ISD::FMA; break; case Intrinsic::fmuladd: - ISDs.push_back(ISD::FMA); + ISD = ISD::FMA; break; case Intrinsic::experimental_constrained_fmuladd: - ISDs.push_back(ISD::STRICT_FMA); + ISD = ISD::STRICT_FMA; break; // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. case Intrinsic::lifetime_start: @@ -1897,23 +1974,49 @@ public: BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); return Cost; } + case Intrinsic::fptosi_sat: + case Intrinsic::fptoui_sat: { + if (Tys.empty()) + break; + Type *FromTy = Tys[0]; + bool IsSigned = IID == Intrinsic::fptosi_sat; + + InstructionCost Cost = 0; + IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy, + {FromTy, FromTy}); + Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind); + IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy, + {FromTy, FromTy}); + Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind); + Cost += thisT()->getCastInstrCost( + IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy, + TTI::CastContextHint::None, CostKind); + if (IsSigned) { + Type *CondTy = RetTy->getWithNewBitWidth(1); + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind); + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, CostKind); + } + return Cost; + } case Intrinsic::ctpop: - ISDs.push_back(ISD::CTPOP); + ISD = ISD::CTPOP; // In case of legalization use TCC_Expensive. This is cheaper than a // library call but still not a cheap instruction. SingleCallCost = TargetTransformInfo::TCC_Expensive; break; case Intrinsic::ctlz: - ISDs.push_back(ISD::CTLZ); + ISD = ISD::CTLZ; break; case Intrinsic::cttz: - ISDs.push_back(ISD::CTTZ); + ISD = ISD::CTTZ; break; case Intrinsic::bswap: - ISDs.push_back(ISD::BSWAP); + ISD = ISD::BSWAP; break; case Intrinsic::bitreverse: - ISDs.push_back(ISD::BITREVERSE); + ISD = ISD::BITREVERSE; break; } @@ -1921,38 +2024,25 @@ public: std::pair LT = TLI->getTypeLegalizationCost(DL, RetTy); - SmallVector LegalCost; - SmallVector CustomCost; - for (unsigned ISD : ISDs) { - if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { - if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && - TLI->isFAbsFree(LT.second)) { - return 0; - } - - // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that there is some - // overhead to this. - // TODO: Once we have extract/insert subvector cost we need to use them. - if (LT.first > 1) - LegalCost.push_back(LT.first * 2); - else - LegalCost.push_back(LT.first * 1); - } else if (!TLI->isOperationExpand(ISD, LT.second)) { - // If the operation is custom lowered then assume - // that the code is twice as expensive. - CustomCost.push_back(LT.first * 2); + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { + if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && + TLI->isFAbsFree(LT.second)) { + return 0; } - } - auto *MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); - if (MinLegalCostI != LegalCost.end()) - return *MinLegalCostI; - - auto MinCustomCostI = - std::min_element(CustomCost.begin(), CustomCost.end()); - if (MinCustomCostI != CustomCost.end()) - return *MinCustomCostI; + // The operation is legal. Assume it costs 1. + // If the type is split to multiple registers, assume that there is some + // overhead to this. + // TODO: Once we have extract/insert subvector cost we need to use them. + if (LT.first > 1) + return (LT.first * 2); + else + return (LT.first * 1); + } else if (!TLI->isOperationExpand(ISD, LT.second)) { + // If the operation is custom lowered then assume + // that the code is twice as expensive. + return (LT.first * 2); + } // If we can't lower fmuladd into an FMA estimate the cost as a floating // point mul followed by an add. @@ -2061,6 +2151,11 @@ public: /// vector is reduced on each iteration. InstructionCost getTreeReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind) { + // Targets must implement a default value for the scalable case, since + // we don't know how many lanes the vector has. + if (isa(Ty)) + return InstructionCost::getInvalid(); + Type *ScalarTy = Ty->getElementType(); unsigned NumVecElts = cast(Ty)->getNumElements(); if ((Opcode == Instruction::Or || Opcode == Instruction::And) && @@ -2159,6 +2254,11 @@ public: InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind) { + // Targets must implement a default value for the scalable case, since + // we don't know how many lanes the vector has. + if (isa(Ty)) + return InstructionCost::getInvalid(); + Type *ScalarTy = Ty->getElementType(); Type *ScalarCondTy = CondTy->getElementType(); unsigned NumVecElts = cast(Ty)->getNumElements(); diff --git a/llvm/include/llvm/CodeGen/CFIFixup.h b/llvm/include/llvm/CodeGen/CFIFixup.h new file mode 100644 index 000000000000..40e535106751 --- /dev/null +++ b/llvm/include/llvm/CodeGen/CFIFixup.h @@ -0,0 +1,38 @@ +//===-- CFIFixup.h - Insert CFI remember/restore instructions ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains definition of the base CFIFixup pass. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_CFIFIXUP_H +#define LLVM_CODEGEN_CFIFIXUP_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" + +namespace llvm { +class CFIFixup : public MachineFunctionPass { +public: + static char ID; + + CFIFixup() : MachineFunctionPass(ID) { + initializeCFIFixupPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // namespace llvm + +#endif // LLVM_CODEGEN_CFIFIXUP_H diff --git a/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/llvm/include/llvm/CodeGen/CalcSpillWeights.h index bfd5bab3d1c0..41b7f10cfc38 100644 --- a/llvm/include/llvm/CodeGen/CalcSpillWeights.h +++ b/llvm/include/llvm/CodeGen/CalcSpillWeights.h @@ -9,7 +9,6 @@ #ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H #define LLVM_CODEGEN_CALCSPILLWEIGHTS_H -#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/SlotIndexes.h" namespace llvm { @@ -65,17 +64,6 @@ class VirtRegMap; /// (re)compute li's spill weight and allocation hint. void calculateSpillWeightAndHint(LiveInterval &LI); - /// Compute future expected spill weight of a split artifact of LI - /// that will span between start and end slot indexes. - /// \param LI The live interval to be split. - /// \param Start The expected beginning of the split artifact. Instructions - /// before start will not affect the weight. - /// \param End The expected end of the split artifact. Instructions - /// after end will not affect the weight. - /// \return The expected spill weight of the split artifact. Returns - /// negative weight for unspillable LI. - float futureWeight(LiveInterval &LI, SlotIndex Start, SlotIndex End); - /// Compute spill weights and allocation hints for all virtual register /// live intervals. void calculateSpillWeightsAndHints(); diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h index 8dbcd6b8ab7d..90afbfc32a4e 100644 --- a/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -15,11 +15,9 @@ #define LLVM_CODEGEN_CALLINGCONVLOWER_H #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/TargetCallingConv.h" #include "llvm/IR/CallingConv.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Alignment.h" namespace llvm { diff --git a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h index 270f935b6738..ce278468dffc 100644 --- a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h +++ b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h @@ -19,7 +19,6 @@ namespace llvm { class BasicBlock; -class MachineBasicBlock; /// Encapsulates all of the information needed to generate a stack protector /// check, and signals to isel when initialized that one needs to be generated. /// @@ -213,6 +212,13 @@ private: MachineBasicBlock::iterator findSplitPointForStackProtector(MachineBasicBlock *BB, const TargetInstrInfo &TII); +/// Evaluates if the specified FP class test is an inversion of a simpler test. +/// An example is the test "inf|normal|subnormal|zero", which is an inversion +/// of "nan". +/// \param Test The test as specified in 'is_fpclass' intrinsic invocation. +/// \returns The inverted test, or zero, if inversion does not produce simpler +/// test. +unsigned getInvertedFPClassTest(unsigned Test); } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h index f6563971f981..f4b1980b9ede 100644 --- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -15,7 +15,6 @@ #ifndef LLVM_CODEGEN_CODEGENPASSBUILDER_H #define LLVM_CODEGEN_CODEGENPASSBUILDER_H -#include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -26,7 +25,6 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/CodeGen/ExpandReductions.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePassManager.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" @@ -35,7 +33,6 @@ #include "llvm/IR/PassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" @@ -43,7 +40,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/LoopStrengthReduce.h" @@ -51,7 +47,6 @@ #include "llvm/Transforms/Scalar/MergeICmps.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" -#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/LowerInvoke.h" #include @@ -668,6 +663,10 @@ void CodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const { // Expand reduction intrinsics into shuffle sequences if the target wants to. addPass(ExpandReductionsPass()); + + // Convert conditional moves to conditional jumps when profitable. + if (getOptLevel() != CodeGenOpt::None && !Opt.DisableSelectOptimize) + addPass(SelectOptimizePass()); } /// Turn exception handling constructs into something the code generators can @@ -751,7 +750,7 @@ template Error CodeGenPassBuilder::addCoreISelPasses( AddMachinePass &addPass) const { // Enable FastISel with -fast-isel, but allow that to be overridden. - TM.setO0WantsFastISel(Opt.EnableFastISelOption.getValueOr(true)); + TM.setO0WantsFastISel(Opt.EnableFastISelOption.value_or(true)); // Determine an instruction selector. enum class SelectorType { SelectionDAG, FastISel, GlobalISel }; diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index 73d39fecc268..9281ed723854 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -16,11 +16,6 @@ #define LLVM_CODEGEN_COMMANDFLAGS_H #include "llvm/ADT/FloatingPointMode.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Triple.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/MC/MCTargetOptionsCommandFlags.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetOptions.h" #include @@ -29,6 +24,9 @@ namespace llvm { class Module; +class AttrBuilder; +class Function; +class Triple; namespace codegen { @@ -62,6 +60,8 @@ bool getEnableNoNaNsFPMath(); bool getEnableNoSignedZerosFPMath(); +bool getEnableApproxFuncFPMath(); + bool getEnableNoTrappingFPMath(); DenormalMode::DenormalModeKind getDenormalFPMath(); @@ -93,6 +93,8 @@ std::string getTrapFuncName(); bool getUseCtors(); +bool getLowerGlobalDtorsViaCxaAtExit(); + bool getRelaxELFRelocations(); bool getDataSections(); @@ -140,6 +142,8 @@ bool getDebugStrictDwarf(); unsigned getAlignLoops(); +bool getJMCInstrument(); + /// Create this object with static storage to register codegen-related command /// line options. struct RegisterCodeGenFlags { diff --git a/llvm/include/llvm/CodeGen/DFAPacketizer.h b/llvm/include/llvm/CodeGen/DFAPacketizer.h index 9cdaedc9e861..aba6503a6a1f 100644 --- a/llvm/include/llvm/CodeGen/DFAPacketizer.h +++ b/llvm/include/llvm/CodeGen/DFAPacketizer.h @@ -25,9 +25,7 @@ #ifndef LLVM_CODEGEN_DFAPACKETIZER_H #define LLVM_CODEGEN_DFAPACKETIZER_H -#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/Support/Automaton.h" #include #include @@ -38,6 +36,7 @@ namespace llvm { class DefaultVLIWScheduler; +class ScheduleDAGMutation; class InstrItineraryData; class MachineFunction; class MachineInstr; diff --git a/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h b/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h index 2ac9d938d281..465829159e42 100644 --- a/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h +++ b/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h @@ -12,12 +12,12 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LexicalScopes.h" #include namespace llvm { class DILocation; +class LexicalScopes; class DINode; class MachineFunction; class MachineInstr; diff --git a/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h b/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h index abeba62707c1..f19d321793e9 100644 --- a/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h +++ b/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h @@ -9,7 +9,7 @@ #ifndef LLVM_CODEGEN_DWARFSTRINGPOOLENTRY_H #define LLVM_CODEGEN_DWARFSTRINGPOOLENTRY_H -#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/StringMap.h" namespace llvm { @@ -20,49 +20,91 @@ class MCSymbol; struct DwarfStringPoolEntry { static constexpr unsigned NotIndexed = -1; - MCSymbol *Symbol; - uint64_t Offset; - unsigned Index; + MCSymbol *Symbol = nullptr; + uint64_t Offset = 0; + unsigned Index = 0; bool isIndexed() const { return Index != NotIndexed; } }; -/// String pool entry reference. +/// DwarfStringPoolEntryRef: Dwarf string pool entry reference. +/// +/// Dwarf string pool entry keeps string value and its data. +/// There are two variants how data are represented: +/// +/// 1. By value - StringMapEntry. +/// 2. By pointer - StringMapEntry. +/// +/// The "By pointer" variant allows for reducing memory usage for the case +/// when string pool entry does not have data: it keeps the null pointer +/// and so no need to waste space for the full DwarfStringPoolEntry. +/// It is recommended to use "By pointer" variant if not all entries +/// of dwarf string pool have corresponding DwarfStringPoolEntry. + class DwarfStringPoolEntryRef { - PointerIntPair *, 1, bool> - MapEntryAndIndexed; + /// Pointer type for "By value" string entry. + using ByValStringEntryPtr = const StringMapEntry *; - const StringMapEntry *getMapEntry() const { - return MapEntryAndIndexed.getPointer(); - } + /// Pointer type for "By pointer" string entry. + using ByPtrStringEntryPtr = const StringMapEntry *; + + /// Pointer to the dwarf string pool Entry. + PointerUnion MapEntry = nullptr; public: DwarfStringPoolEntryRef() = default; - DwarfStringPoolEntryRef(const StringMapEntry &Entry, - bool Indexed) - : MapEntryAndIndexed(&Entry, Indexed) {} - explicit operator bool() const { return getMapEntry(); } + /// ASSUMPTION: DwarfStringPoolEntryRef keeps pointer to \p Entry, + /// thus specified entry mustn`t be reallocated. + DwarfStringPoolEntryRef(const StringMapEntry &Entry) + : MapEntry(&Entry) {} + + /// ASSUMPTION: DwarfStringPoolEntryRef keeps pointer to \p Entry, + /// thus specified entry mustn`t be reallocated. + DwarfStringPoolEntryRef(const StringMapEntry &Entry) + : MapEntry(&Entry) { + assert(MapEntry.get()->second != nullptr); + } + + explicit operator bool() const { return !MapEntry.isNull(); } + + /// \returns symbol for the dwarf string. MCSymbol *getSymbol() const { - assert(getMapEntry()->second.Symbol && "No symbol available!"); - return getMapEntry()->second.Symbol; + assert(getEntry().Symbol && "No symbol available!"); + return getEntry().Symbol; } - uint64_t getOffset() const { return getMapEntry()->second.Offset; } - bool isIndexed() const { return MapEntryAndIndexed.getInt(); } + + /// \returns offset for the dwarf string. + uint64_t getOffset() const { return getEntry().Offset; } + + /// \returns index for the dwarf string. unsigned getIndex() const { - assert(isIndexed()); - assert(getMapEntry()->getValue().isIndexed()); - return getMapEntry()->second.Index; + assert(getEntry().isIndexed() && "Index is not set!"); + return getEntry().Index; + } + + /// \returns string. + StringRef getString() const { + if (MapEntry.is()) + return MapEntry.get()->first(); + + return MapEntry.get()->first(); + } + + /// \returns the entire string pool entry for convenience. + const DwarfStringPoolEntry &getEntry() const { + if (MapEntry.is()) + return MapEntry.get()->second; + + return *MapEntry.get()->second; } - StringRef getString() const { return getMapEntry()->first(); } - /// Return the entire string pool entry for convenience. - DwarfStringPoolEntry getEntry() const { return getMapEntry()->getValue(); } bool operator==(const DwarfStringPoolEntryRef &X) const { - return getMapEntry() == X.getMapEntry(); + return MapEntry.getOpaqueValue() == X.MapEntry.getOpaqueValue(); } + bool operator!=(const DwarfStringPoolEntryRef &X) const { - return getMapEntry() != X.getMapEntry(); + return MapEntry.getOpaqueValue() != X.MapEntry.getOpaqueValue(); } }; diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h index 775698a66ada..8be97d2c2095 100644 --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -24,15 +24,15 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/MachineValueType.h" -#include #include #include namespace llvm { class AllocaInst; +class Instruction; +class IntrinsicInst; class BasicBlock; class CallInst; class Constant; @@ -212,6 +212,7 @@ protected: const TargetRegisterInfo &TRI; const TargetLibraryInfo *LibInfo; bool SkipTargetIndependentISel; + bool UseInstrRefDebugInfo = false; /// The position of the last instruction for materializing constants /// for use in the current block. It resets to EmitStartPt when it makes sense @@ -318,6 +319,12 @@ public: /// Reset InsertPt to the given old insert position. void leaveLocalValueArea(SavePoint Old); + /// Signal whether instruction referencing variable locations are desired for + /// this function's debug-info. + void useInstrRefDebugInfo(bool Flag) { + UseInstrRefDebugInfo = Flag; + } + protected: explicit FastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, diff --git a/llvm/include/llvm/CodeGen/FaultMaps.h b/llvm/include/llvm/CodeGen/FaultMaps.h index 8a8b1d2e6008..c228bb895edd 100644 --- a/llvm/include/llvm/CodeGen/FaultMaps.h +++ b/llvm/include/llvm/CodeGen/FaultMaps.h @@ -10,7 +10,6 @@ #define LLVM_CODEGEN_FAULTMAPS_H #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Endian.h" #include #include diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h index 524730d53694..f8156ce73196 100644 --- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -101,6 +101,10 @@ public: // Value was lowered to tied def and gc.relocate should be replaced with // copy from vreg. VReg, + // Value was lowered to tied def and gc.relocate should be replaced with + // SDValue kept in StatepointLoweringInfo structure. This valid for local + // relocates only. + SDValueNode, } type = NoRelocate; // Payload contains either frame index of the stack slot in which the value // was spilled, or virtual register which contains the re-definition. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h index 4f95335db74b..4d9694347f17 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h @@ -13,10 +13,10 @@ #define LLVM_CODEGEN_GLOBALISEL_CSEMIRBUILDER_H #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/Utils.h" namespace llvm { +class GISelInstProfileBuilder; /// Defines a builder that does CSE of MachineInstructions using GISelCSEInfo. /// Eg usage. /// diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index f9663fadb868..9bf1c134618c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -17,25 +17,26 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetCallingConv.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MachineValueType.h" #include #include namespace llvm { +class AttributeList; class CallBase; class DataLayout; class Function; class FunctionLoweringInfo; class MachineIRBuilder; +class MachineFunction; struct MachinePointerInfo; class MachineRegisterInfo; class TargetLowering; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h b/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h index 795686980842..8c295428afe8 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h @@ -15,7 +15,6 @@ #define LLVM_CODEGEN_GLOBALISEL_COMBINER_H #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { class MachineRegisterInfo; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 45c27c25aea0..73edc3c37970 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -17,16 +17,20 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_COMBINERHELPER_H #define LLVM_CODEGEN_GLOBALISEL_COMBINERHELPER_H -#include "llvm/ADT/APFloat.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" -#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Register.h" -#include "llvm/Support/Alignment.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include namespace llvm { class GISelChangeObserver; +class APFloat; +class APInt; +class GPtrAdd; +class GStore; +class GZExtLoad; class MachineIRBuilder; class MachineInstrBuilder; class MachineRegisterInfo; @@ -124,10 +128,20 @@ public: const TargetLowering &getTargetLowering() const; + /// \returns true if the combiner is running pre-legalization. + bool isPreLegalize() const; + + /// \returns true if \p Query is legal on the target. + bool isLegal(const LegalityQuery &Query) const; + /// \return true if the combine is running prior to legalization, or if \p /// Query is legal on the target. bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const; + /// \return true if the combine is running prior to legalization, or if \p Ty + /// is a legal integer constant type on the target. + bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const; + /// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const; @@ -529,6 +543,13 @@ public: /// Combine G_UREM x, (known power of 2) to an add and bitmasking. void applySimplifyURemByPow2(MachineInstr &MI); + /// Push a binary operator through a select on constants. + /// + /// binop (select cond, K0, K1), K2 -> + /// select cond, (binop K0, K2), (binop K1, K2) + bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo); + bool applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo); + bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl &MatchInfo); @@ -645,6 +666,14 @@ public: /// (G_SMULO x, 2) -> (G_SADDO x, x) bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Match: + /// (G_*MULO x, 0) -> 0 + no carry out + bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo); + + /// Match: + /// (G_*ADDO x, 0) -> x + no carry out + bool matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Transform (fadd x, fneg(y)) -> (fsub x, y) /// (fadd fneg(x), y) -> (fsub y, x) /// (fsub x, fneg(y)) -> (fadd x, y) @@ -702,6 +731,15 @@ public: bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Fold boolean selects to logical operations. + bool matchSelectToLogical(MachineInstr &MI, BuildFnTy &MatchInfo); + + bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info); + + /// Transform G_ADD(x, G_SUB(y, x)) to y. + /// Transform G_ADD(G_SUB(y, x), x) to y. + bool matchAddSubSameReg(MachineInstr &MI, Register &Src); + private: /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a post-indexing operation. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h index 7d198fada411..3ec6a1da201e 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h @@ -28,7 +28,7 @@ class GISelWorkList { SmallVector Worklist; DenseMap WorklistMap; -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS bool Finalized = true; #endif @@ -49,7 +49,7 @@ public: // of most passes. void deferred_insert(MachineInstr *I) { Worklist.push_back(I); -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS Finalized = false; #endif } @@ -65,21 +65,25 @@ public: for (unsigned i = 0; i < Worklist.size(); ++i) if (!WorklistMap.try_emplace(Worklist[i], i).second) llvm_unreachable("Duplicate elements in the list"); -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS Finalized = true; #endif } /// Add the specified instruction to the worklist if it isn't already in it. void insert(MachineInstr *I) { +#if LLVM_ENABLE_ABI_BREAKING_CHECKS assert(Finalized && "GISelWorkList used without finalizing"); +#endif if (WorklistMap.try_emplace(I, Worklist.size()).second) Worklist.push_back(I); } /// Remove I from the worklist if it exists. void remove(const MachineInstr *I) { +#if LLVM_ENABLE_ABI_BREAKING_CHECKS assert((Finalized || WorklistMap.empty()) && "Neither finalized nor empty"); +#endif auto It = WorklistMap.find(I); if (It == WorklistMap.end()) return; // Not in worklist. @@ -96,7 +100,9 @@ public: } MachineInstr *pop_back_val() { +#if LLVM_ENABLE_ABI_BREAKING_CHECKS assert(Finalized && "GISelWorkList used without finalizing"); +#endif MachineInstr *I; do { I = Worklist.pop_back_val(); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index 7103656365b1..58fe48200e73 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -14,6 +14,7 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H #define LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H +#include "llvm/IR/Instructions.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/TargetOpcodes.h" @@ -226,6 +227,37 @@ public: } }; +/// Represent a G_ICMP or G_FCMP. +class GAnyCmp : public GenericMachineInstr { +public: + CmpInst::Predicate getCond() const { + return static_cast(getOperand(1).getPredicate()); + } + Register getLHSReg() const { return getReg(2); } + Register getRHSReg() const { return getReg(3); } + + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_ICMP || + MI->getOpcode() == TargetOpcode::G_FCMP; + } +}; + +/// Represent a G_ICMP. +class GICmp : public GAnyCmp { +public: + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_ICMP; + } +}; + +/// Represent a G_FCMP. +class GFCmp : public GAnyCmp { +public: + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_FCMP; + } +}; + } // namespace llvm #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index ebe16cd4f58c..5e7428a5edc5 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -22,11 +22,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CodeGen.h" #include @@ -248,12 +247,6 @@ private: bool translateInlineAsm(const CallBase &CB, MachineIRBuilder &MIRBuilder); - /// Returns true if the value should be split into multiple LLTs. - /// If \p Offsets is given then the split type's offsets will be stored in it. - /// If \p Offsets is not empty it will be cleared first. - bool valueIsSplit(const Value &V, - SmallVectorImpl *Offsets = nullptr); - /// Common code for translating normal calls or invokes. bool translateCallBase(const CallBase &CB, MachineIRBuilder &MIRBuilder); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h index 4a72621ec61e..60c7694725a5 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h @@ -13,8 +13,10 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECT_H #define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECT_H -#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/CodeGen.h" namespace llvm { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 03f4f3bf0b19..8ea45e576e4d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -18,12 +18,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Support/CodeGenCoverage.h" +#include "llvm/IR/Function.h" #include "llvm/Support/LowLevelTypeImpl.h" #include #include @@ -34,6 +31,10 @@ namespace llvm { +class BlockFrequencyInfo; +class CodeGenCoverage; +class MachineBasicBlock; +class ProfileSummaryInfo; class APInt; class APFloat; class GISelKnownBits; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index bc9f952146c2..c06b33d11170 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -17,16 +17,17 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/Support/CodeGenCoverage.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -673,7 +674,7 @@ bool InstructionSelector::executeMatchTable( ComplexRendererFns Renderer = (ISel.*ISelInfo.ComplexPredicates[ComplexPredicateID])( State.MIs[InsnID]->getOperand(OpIdx)); - if (Renderer.hasValue()) + if (Renderer) State.Renderers[RendererID] = Renderer.getValue(); else if (handleReject() == RejectAndGiveUp) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 38d2fe28063a..6802591b6350 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -24,10 +24,10 @@ #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Register.h" +#include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "legalizer" -using namespace llvm::MIPatternMatch; namespace llvm { class LegalizationArtifactCombiner { @@ -56,6 +56,7 @@ public: SmallVectorImpl &DeadInsts, SmallVectorImpl &UpdatedDefs, GISelObserverWrapper &Observer) { + using namespace llvm::MIPatternMatch; assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); Builder.setInstrAndDebugLoc(MI); @@ -109,6 +110,7 @@ public: SmallVectorImpl &DeadInsts, SmallVectorImpl &UpdatedDefs, GISelObserverWrapper &Observer) { + using namespace llvm::MIPatternMatch; assert(MI.getOpcode() == TargetOpcode::G_ZEXT); Builder.setInstrAndDebugLoc(MI); @@ -170,6 +172,7 @@ public: bool tryCombineSExt(MachineInstr &MI, SmallVectorImpl &DeadInsts, SmallVectorImpl &UpdatedDefs) { + using namespace llvm::MIPatternMatch; assert(MI.getOpcode() == TargetOpcode::G_SEXT); Builder.setInstrAndDebugLoc(MI); @@ -227,6 +230,7 @@ public: SmallVectorImpl &DeadInsts, SmallVectorImpl &UpdatedDefs, GISelObserverWrapper &Observer) { + using namespace llvm::MIPatternMatch; assert(MI.getOpcode() == TargetOpcode::G_TRUNC); Builder.setInstr(MI); @@ -1281,6 +1285,8 @@ private: /// Looks through copy instructions and returns the actual /// source register. Register lookThroughCopyInstrs(Register Reg) { + using namespace llvm::MIPatternMatch; + Register TmpReg; while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) { if (MRI.getType(TmpReg).isValid()) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h index c19f1d5330ba..7884b3f2ea6e 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h @@ -20,11 +20,17 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZER_H #define LLVM_CODEGEN_GLOBALISEL_LEGALIZER_H -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { +class LegalizerInfo; +class MachineIRBuilder; +class MachineInstr; +class GISelChangeObserver; class LostDebugLocObserver; class Legalizer : public MachineFunctionPass { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 3b2f937375eb..c6c57ac07f0e 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -21,14 +21,22 @@ #define LLVM_CODEGEN_GLOBALISEL_LEGALIZERHELPER_H #include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/LowLevelType.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/TargetOpcodes.h" namespace llvm { // Forward declarations. +class APInt; +class GAnyLoad; +class GLoadStore; +class GStore; +class GenericMachineInstr; +class MachineFunction; +class MachineIRBuilder; +class MachineInstr; +class MachineInstrBuilder; +struct MachinePointerInfo; +template class SmallVectorImpl; class LegalizerInfo; class MachineRegisterInfo; class GISelChangeObserver; @@ -159,10 +167,6 @@ public: /// def by inserting a G_BITCAST from \p CastTy void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx); - /// Widen \p OrigReg to \p WideTy by merging to a wider type, padding with - /// G_IMPLICIT_DEF, and producing dead results. - Register widenWithUnmerge(LLT WideTy, Register OrigReg); - private: LegalizeResult widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 17cb53dd2d5b..c0cad8ff675d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -14,26 +14,26 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H #define LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Support/raw_ostream.h" #include #include #include -#include #include namespace llvm { extern cl::opt DisableGISelLegalityCheck; +class MachineFunction; +class raw_ostream; class LegalizerHelper; class MachineInstr; class MachineRegisterInfo; @@ -327,8 +327,14 @@ LegalityPredicate largerThan(unsigned TypeIdx0, unsigned TypeIdx1); /// index. LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1); -/// True iff the specified MMO index has a size that is not a power of 2 +/// True iff the specified MMO index has a size (rounded to bytes) that is not a +/// power of 2. LegalityPredicate memSizeInBytesNotPow2(unsigned MMOIdx); + +/// True iff the specified MMO index has a size that is not an even byte size, +/// or that even byte size is not a power of 2. +LegalityPredicate memSizeNotByteSizePow2(unsigned MMOIdx); + /// True iff the specified type index is a vector whose element count is not a /// power of 2. LegalityPredicate numElementsNotPow2(unsigned TypeIdx); @@ -351,6 +357,14 @@ LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx); /// Keep the same scalar or element type as the given type. LegalizeMutation changeElementTo(unsigned TypeIdx, LLT Ty); +/// Keep the same scalar or element type as \p TypeIdx, but take the number of +/// elements from \p FromTypeIdx. +LegalizeMutation changeElementCountTo(unsigned TypeIdx, unsigned FromTypeIdx); + +/// Keep the same scalar or element type as \p TypeIdx, but take the number of +/// elements from \p Ty. +LegalizeMutation changeElementCountTo(unsigned TypeIdx, LLT Ty); + /// Change the scalar size or element size to have the same scalar size as type /// index \p FromIndex. Unlike changeElementTo, this discards pointer types and /// only changes the size. @@ -800,11 +814,23 @@ public: return actionIf(LegalizeAction::Unsupported, LegalityPredicates::memSizeInBytesNotPow2(0)); } + + /// Lower a memory operation if the memory size, rounded to bytes, is not a + /// power of 2. For example, this will not trigger for s1 or s7, but will for + /// s24. LegalizeRuleSet &lowerIfMemSizeNotPow2() { return actionIf(LegalizeAction::Lower, LegalityPredicates::memSizeInBytesNotPow2(0)); } + /// Lower a memory operation if the memory access size is not a round power of + /// 2 byte size. This is stricter than lowerIfMemSizeNotPow2, and more likely + /// what you want (e.g. this will lower s1, s7 and s24). + LegalizeRuleSet &lowerIfMemSizeNotByteSizePow2() { + return actionIf(LegalizeAction::Lower, + LegalityPredicates::memSizeNotByteSizePow2(0)); + } + LegalizeRuleSet &customIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that a custom action with a // free-form user provided Predicate properly handles all type indices: diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h index 0845c001abdb..6efe7c7c9bbd 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h @@ -17,18 +17,19 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" -#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/Utils.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" namespace llvm { // Forward declarations. +class AnalysisUsage; +class GStore; +class LegalizerInfo; +class MachineBasicBlock; +class MachineInstr; +class TargetLowering; +struct LegalityQuery; class MachineRegisterInfo; namespace GISelAddressing { /// Helper struct to store a base, index and offset that forms an address diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h index 1d1afff7f934..9ea0d095eeb1 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h @@ -22,11 +22,14 @@ #define LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H #include "llvm/ADT/SetVector.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { // Forward declarations. +class AnalysisUsage; +class MachineBasicBlock; +class MachineInstr; +class MachineOperand; class MachineRegisterInfo; class TargetTransformInfo; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index daf1ff052983..1cacf96620f0 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -94,6 +94,48 @@ inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); } +template +inline Optional matchConstantSplat(Register, + const MachineRegisterInfo &); + +template <> +inline Optional matchConstantSplat(Register Reg, + const MachineRegisterInfo &MRI) { + return getIConstantSplatVal(Reg, MRI); +} + +template <> +inline Optional matchConstantSplat(Register Reg, + const MachineRegisterInfo &MRI) { + return getIConstantSplatSExtVal(Reg, MRI); +} + +template struct ICstOrSplatMatch { + ConstT &CR; + ICstOrSplatMatch(ConstT &C) : CR(C) {} + bool match(const MachineRegisterInfo &MRI, Register Reg) { + if (auto MaybeCst = matchConstant(Reg, MRI)) { + CR = *MaybeCst; + return true; + } + + if (auto MaybeCstSplat = matchConstantSplat(Reg, MRI)) { + CR = *MaybeCstSplat; + return true; + } + + return false; + }; +}; + +inline ICstOrSplatMatch m_ICstOrSplat(APInt &Cst) { + return ICstOrSplatMatch(Cst); +} + +inline ICstOrSplatMatch m_ICstOrSplat(int64_t &Cst) { + return ICstOrSplatMatch(Cst); +} + struct GCstAndRegMatch { Optional &ValReg; GCstAndRegMatch(Optional &ValReg) : ValReg(ValReg) {} diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index c4c2fc076dd8..16ba568c1be9 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -13,19 +13,26 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_MACHINEIRBUILDER_H #define LLVM_CODEGEN_GLOBALISEL_MACHINEIRBUILDER_H -#include "llvm/CodeGen/GlobalISel/CSEInfo.h" -#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Module.h" namespace llvm { // Forward declarations. +class APInt; +class BlockAddress; +class Constant; +class ConstantFP; +class ConstantInt; +class DataLayout; +class GISelCSEInfo; +class GlobalValue; +class TargetRegisterClass; class MachineFunction; class MachineInstr; class TargetInstrInfo; @@ -942,22 +949,6 @@ public: /// Build and insert \p Res = IMPLICIT_DEF. MachineInstrBuilder buildUndef(const DstOp &Res); - /// Build and insert instructions to put \p Ops together at the specified p - /// Indices to form a larger register. - /// - /// If the types of the input registers are uniform and cover the entirity of - /// \p Res then a G_MERGE_VALUES will be produced. Otherwise an IMPLICIT_DEF - /// followed by a sequence of G_INSERT instructions. - /// - /// \pre setBasicBlock or setMI must have been called. - /// \pre The final element of the sequence must not extend past the end of the - /// destination register. - /// \pre The bits defined by each Op (derived from index and scalar size) must - /// not overlap. - /// \pre \p Indices must be in ascending order of bit position. - void buildSequence(Register Res, ArrayRef Ops, - ArrayRef Indices); - /// Build and insert \p Res = G_MERGE_VALUES \p Op0, ... /// /// G_MERGE_VALUES combines the input elements contiguously into a larger @@ -1001,6 +992,11 @@ public: MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef Ops); + /// Build and insert \p Res = G_BUILD_VECTOR \p Op0, ... where each OpN is + /// built with G_CONSTANT. + MachineInstrBuilder buildBuildVectorConstant(const DstOp &Res, + ArrayRef Ops); + /// Build and insert \p Res = G_BUILD_VECTOR with \p Src replicated to fill /// the number of elements MachineInstrBuilder buildSplatVector(const DstOp &Res, @@ -1442,8 +1438,8 @@ public: /// Build and insert \p Res = G_SUB \p Op0, \p Op1 /// - /// G_SUB sets \p Res to the sum of integer parameters \p Op0 and \p Op1, - /// truncated to their width. + /// G_SUB sets \p Res to the difference of integer parameters \p Op0 and + /// \p Op1, truncated to their width. /// /// \pre setBasicBlock or setMI must have been called. /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers @@ -1459,7 +1455,7 @@ public: /// Build and insert \p Res = G_MUL \p Op0, \p Op1 /// - /// G_MUL sets \p Res to the sum of integer parameters \p Op0 and \p Op1, + /// G_MUL sets \p Res to the product of integer parameters \p Op0 and \p Op1, /// truncated to their width. /// /// \pre setBasicBlock or setMI must have been called. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h index 45006eecfce6..d0918485249d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h @@ -66,10 +66,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include #include #include diff --git a/llvm/include/llvm/CodeGen/GlobalISel/RegisterBank.h b/llvm/include/llvm/CodeGen/GlobalISel/RegisterBank.h deleted file mode 100644 index 5440d97728b4..000000000000 --- a/llvm/include/llvm/CodeGen/GlobalISel/RegisterBank.h +++ /dev/null @@ -1,98 +0,0 @@ -//==-- llvm/CodeGen/GlobalISel/RegisterBank.h - Register Bank ----*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file This file declares the API of register banks. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_GLOBALISEL_REGISTERBANK_H -#define LLVM_CODEGEN_GLOBALISEL_REGISTERBANK_H - -#include "llvm/ADT/BitVector.h" - -namespace llvm { -// Forward declarations. -class RegisterBankInfo; -class raw_ostream; -class TargetRegisterClass; -class TargetRegisterInfo; - -/// This class implements the register bank concept. -/// Two instances of RegisterBank must have different ID. -/// This property is enforced by the RegisterBankInfo class. -class RegisterBank { -private: - unsigned ID; - const char *Name; - unsigned Size; - BitVector ContainedRegClasses; - - /// Sentinel value used to recognize register bank not properly - /// initialized yet. - static const unsigned InvalidID; - - /// Only the RegisterBankInfo can initialize RegisterBank properly. - friend RegisterBankInfo; - -public: - RegisterBank(unsigned ID, const char *Name, unsigned Size, - const uint32_t *CoveredClasses, unsigned NumRegClasses); - - /// Get the identifier of this register bank. - unsigned getID() const { return ID; } - - /// Get a user friendly name of this register bank. - /// Should be used only for debugging purposes. - const char *getName() const { return Name; } - - /// Get the maximal size in bits that fits in this register bank. - unsigned getSize() const { return Size; } - - /// Check whether this instance is ready to be used. - bool isValid() const; - - /// Check if this register bank is valid. In other words, - /// if it has been properly constructed. - /// - /// \note This method does not check anything when assertions are disabled. - /// - /// \return True is the check was successful. - bool verify(const TargetRegisterInfo &TRI) const; - - /// Check whether this register bank covers \p RC. - /// In other words, check if this register bank fully covers - /// the registers that \p RC contains. - /// \pre isValid() - bool covers(const TargetRegisterClass &RC) const; - - /// Check whether \p OtherRB is the same as this. - bool operator==(const RegisterBank &OtherRB) const; - bool operator!=(const RegisterBank &OtherRB) const { - return !this->operator==(OtherRB); - } - - /// Dump the register mask on dbgs() stream. - /// The dump is verbose. - void dump(const TargetRegisterInfo *TRI = nullptr) const; - - /// Print the register mask on OS. - /// If IsForDebug is false, then only the name of the register bank - /// is printed. Otherwise, all the fields are printing. - /// TRI is then used to print the name of the register classes that - /// this register bank covers. - void print(raw_ostream &OS, bool IsForDebug = false, - const TargetRegisterInfo *TRI = nullptr) const; -}; - -inline raw_ostream &operator<<(raw_ostream &OS, const RegisterBank &RegBank) { - RegBank.print(OS); - return OS; -} -} // End namespace llvm. - -#endif diff --git a/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h deleted file mode 100644 index da785406bc31..000000000000 --- a/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h +++ /dev/null @@ -1,775 +0,0 @@ -//===- llvm/CodeGen/GlobalISel/RegisterBankInfo.h ---------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file This file declares the API for the register bank info. -/// This API is responsible for handling the register banks. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_GLOBALISEL_REGISTERBANKINFO_H -#define LLVM_CODEGEN_GLOBALISEL_REGISTERBANKINFO_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/Register.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/LowLevelTypeImpl.h" -#include -#include -#include - -namespace llvm { - -class MachineInstr; -class MachineRegisterInfo; -class raw_ostream; -class RegisterBank; -class TargetInstrInfo; -class TargetRegisterClass; -class TargetRegisterInfo; - -/// Holds all the information related to register banks. -class RegisterBankInfo { -public: - /// Helper struct that represents how a value is partially mapped - /// into a register. - /// The StartIdx and Length represent what region of the orginal - /// value this partial mapping covers. - /// This can be represented as a Mask of contiguous bit starting - /// at StartIdx bit and spanning Length bits. - /// StartIdx is the number of bits from the less significant bits. - struct PartialMapping { - /// Number of bits at which this partial mapping starts in the - /// original value. The bits are counted from less significant - /// bits to most significant bits. - unsigned StartIdx; - - /// Length of this mapping in bits. This is how many bits this - /// partial mapping covers in the original value: - /// from StartIdx to StartIdx + Length -1. - unsigned Length; - - /// Register bank where the partial value lives. - const RegisterBank *RegBank; - - PartialMapping() = default; - - /// Provide a shortcut for quickly building PartialMapping. - PartialMapping(unsigned StartIdx, unsigned Length, - const RegisterBank &RegBank) - : StartIdx(StartIdx), Length(Length), RegBank(&RegBank) {} - - /// \return the index of in the original value of the most - /// significant bit that this partial mapping covers. - unsigned getHighBitIdx() const { return StartIdx + Length - 1; } - - /// Print this partial mapping on dbgs() stream. - void dump() const; - - /// Print this partial mapping on \p OS; - void print(raw_ostream &OS) const; - - /// Check that the Mask is compatible with the RegBank. - /// Indeed, if the RegBank cannot accomadate the "active bits" of the mask, - /// there is no way this mapping is valid. - /// - /// \note This method does not check anything when assertions are disabled. - /// - /// \return True is the check was successful. - bool verify() const; - }; - - /// Helper struct that represents how a value is mapped through - /// different register banks. - /// - /// \note: So far we do not have any users of the complex mappings - /// (mappings with more than one partial mapping), but when we do, - /// we would have needed to duplicate partial mappings. - /// The alternative could be to use an array of pointers of partial - /// mapping (i.e., PartialMapping **BreakDown) and duplicate the - /// pointers instead. - /// - /// E.g., - /// Let say we have a 32-bit add and a <2 x 32-bit> vadd. We - /// can expand the - /// <2 x 32-bit> add into 2 x 32-bit add. - /// - /// Currently the TableGen-like file would look like: - /// \code - /// PartialMapping[] = { - /// /*32-bit add*/ {0, 32, GPR}, // Scalar entry repeated for first - /// // vec elt. - /// /*2x32-bit add*/ {0, 32, GPR}, {32, 32, GPR}, - /// /*<2x32-bit> vadd*/ {0, 64, VPR} - /// }; // PartialMapping duplicated. - /// - /// ValueMapping[] { - /// /*plain 32-bit add*/ {&PartialMapping[0], 1}, - /// /*expanded vadd on 2xadd*/ {&PartialMapping[1], 2}, - /// /*plain <2x32-bit> vadd*/ {&PartialMapping[3], 1} - /// }; - /// \endcode - /// - /// With the array of pointer, we would have: - /// \code - /// PartialMapping[] = { - /// /*32-bit add lower */ { 0, 32, GPR}, - /// /*32-bit add upper */ {32, 32, GPR}, - /// /*<2x32-bit> vadd */ { 0, 64, VPR} - /// }; // No more duplication. - /// - /// BreakDowns[] = { - /// /*AddBreakDown*/ &PartialMapping[0], - /// /*2xAddBreakDown*/ &PartialMapping[0], &PartialMapping[1], - /// /*VAddBreakDown*/ &PartialMapping[2] - /// }; // Addresses of PartialMapping duplicated (smaller). - /// - /// ValueMapping[] { - /// /*plain 32-bit add*/ {&BreakDowns[0], 1}, - /// /*expanded vadd on 2xadd*/ {&BreakDowns[1], 2}, - /// /*plain <2x32-bit> vadd*/ {&BreakDowns[3], 1} - /// }; - /// \endcode - /// - /// Given that a PartialMapping is actually small, the code size - /// impact is actually a degradation. Moreover the compile time will - /// be hit by the additional indirection. - /// If PartialMapping gets bigger we may reconsider. - struct ValueMapping { - /// How the value is broken down between the different register banks. - const PartialMapping *BreakDown; - - /// Number of partial mapping to break down this value. - unsigned NumBreakDowns; - - /// The default constructor creates an invalid (isValid() == false) - /// instance. - ValueMapping() : ValueMapping(nullptr, 0) {} - - /// Initialize a ValueMapping with the given parameter. - /// \p BreakDown needs to have a life time at least as long - /// as this instance. - ValueMapping(const PartialMapping *BreakDown, unsigned NumBreakDowns) - : BreakDown(BreakDown), NumBreakDowns(NumBreakDowns) {} - - /// Iterators through the PartialMappings. - const PartialMapping *begin() const { return BreakDown; } - const PartialMapping *end() const { return BreakDown + NumBreakDowns; } - - /// \return true if all partial mappings are the same size and register - /// bank. - bool partsAllUniform() const; - - /// Check if this ValueMapping is valid. - bool isValid() const { return BreakDown && NumBreakDowns; } - - /// Verify that this mapping makes sense for a value of - /// \p MeaningfulBitWidth. - /// \note This method does not check anything when assertions are disabled. - /// - /// \return True is the check was successful. - bool verify(unsigned MeaningfulBitWidth) const; - - /// Print this on dbgs() stream. - void dump() const; - - /// Print this on \p OS; - void print(raw_ostream &OS) const; - }; - - /// Helper class that represents how the value of an instruction may be - /// mapped and what is the related cost of such mapping. - class InstructionMapping { - /// Identifier of the mapping. - /// This is used to communicate between the target and the optimizers - /// which mapping should be realized. - unsigned ID = InvalidMappingID; - - /// Cost of this mapping. - unsigned Cost = 0; - - /// Mapping of all the operands. - const ValueMapping *OperandsMapping = nullptr; - - /// Number of operands. - unsigned NumOperands = 0; - - const ValueMapping &getOperandMapping(unsigned i) { - assert(i < getNumOperands() && "Out of bound operand"); - return OperandsMapping[i]; - } - - public: - /// Constructor for the mapping of an instruction. - /// \p NumOperands must be equal to number of all the operands of - /// the related instruction. - /// The rationale is that it is more efficient for the optimizers - /// to be able to assume that the mapping of the ith operand is - /// at the index i. - InstructionMapping(unsigned ID, unsigned Cost, - const ValueMapping *OperandsMapping, - unsigned NumOperands) - : ID(ID), Cost(Cost), OperandsMapping(OperandsMapping), - NumOperands(NumOperands) { - } - - /// Default constructor. - /// Use this constructor to express that the mapping is invalid. - InstructionMapping() = default; - - /// Get the cost. - unsigned getCost() const { return Cost; } - - /// Get the ID. - unsigned getID() const { return ID; } - - /// Get the number of operands. - unsigned getNumOperands() const { return NumOperands; } - - /// Get the value mapping of the ith operand. - /// \pre The mapping for the ith operand has been set. - /// \pre The ith operand is a register. - const ValueMapping &getOperandMapping(unsigned i) const { - const ValueMapping &ValMapping = - const_cast(this)->getOperandMapping(i); - return ValMapping; - } - - /// Set the mapping for all the operands. - /// In other words, OpdsMapping should hold at least getNumOperands - /// ValueMapping. - void setOperandsMapping(const ValueMapping *OpdsMapping) { - OperandsMapping = OpdsMapping; - } - - /// Check whether this object is valid. - /// This is a lightweight check for obvious wrong instance. - bool isValid() const { - return getID() != InvalidMappingID && OperandsMapping; - } - - /// Verifiy that this mapping makes sense for \p MI. - /// \pre \p MI must be connected to a MachineFunction. - /// - /// \note This method does not check anything when assertions are disabled. - /// - /// \return True is the check was successful. - bool verify(const MachineInstr &MI) const; - - /// Print this on dbgs() stream. - void dump() const; - - /// Print this on \p OS; - void print(raw_ostream &OS) const; - }; - - /// Convenient type to represent the alternatives for mapping an - /// instruction. - /// \todo When we move to TableGen this should be an array ref. - using InstructionMappings = SmallVector; - - /// Helper class used to get/create the virtual registers that will be used - /// to replace the MachineOperand when applying a mapping. - class OperandsMapper { - /// The OpIdx-th cell contains the index in NewVRegs where the VRegs of the - /// OpIdx-th operand starts. -1 means we do not have such mapping yet. - /// Note: We use a SmallVector to avoid heap allocation for most cases. - SmallVector OpToNewVRegIdx; - - /// Hold the registers that will be used to map MI with InstrMapping. - SmallVector NewVRegs; - - /// Current MachineRegisterInfo, used to create new virtual registers. - MachineRegisterInfo &MRI; - - /// Instruction being remapped. - MachineInstr &MI; - - /// New mapping of the instruction. - const InstructionMapping &InstrMapping; - - /// Constant value identifying that the index in OpToNewVRegIdx - /// for an operand has not been set yet. - static const int DontKnowIdx; - - /// Get the range in NewVRegs to store all the partial - /// values for the \p OpIdx-th operand. - /// - /// \return The iterator range for the space created. - // - /// \pre getMI().getOperand(OpIdx).isReg() - iterator_range::iterator> - getVRegsMem(unsigned OpIdx); - - /// Get the end iterator for a range starting at \p StartIdx and - /// spannig \p NumVal in NewVRegs. - /// \pre StartIdx + NumVal <= NewVRegs.size() - SmallVectorImpl::const_iterator - getNewVRegsEnd(unsigned StartIdx, unsigned NumVal) const; - SmallVectorImpl::iterator getNewVRegsEnd(unsigned StartIdx, - unsigned NumVal); - - public: - /// Create an OperandsMapper that will hold the information to apply \p - /// InstrMapping to \p MI. - /// \pre InstrMapping.verify(MI) - OperandsMapper(MachineInstr &MI, const InstructionMapping &InstrMapping, - MachineRegisterInfo &MRI); - - /// \name Getters. - /// @{ - /// The MachineInstr being remapped. - MachineInstr &getMI() const { return MI; } - - /// The final mapping of the instruction. - const InstructionMapping &getInstrMapping() const { return InstrMapping; } - - /// The MachineRegisterInfo we used to realize the mapping. - MachineRegisterInfo &getMRI() const { return MRI; } - /// @} - - /// Create as many new virtual registers as needed for the mapping of the \p - /// OpIdx-th operand. - /// The number of registers is determined by the number of breakdown for the - /// related operand in the instruction mapping. - /// The type of the new registers is a plain scalar of the right size. - /// The proper type is expected to be set when the mapping is applied to - /// the instruction(s) that realizes the mapping. - /// - /// \pre getMI().getOperand(OpIdx).isReg() - /// - /// \post All the partial mapping of the \p OpIdx-th operand have been - /// assigned a new virtual register. - void createVRegs(unsigned OpIdx); - - /// Set the virtual register of the \p PartialMapIdx-th partial mapping of - /// the OpIdx-th operand to \p NewVReg. - /// - /// \pre getMI().getOperand(OpIdx).isReg() - /// \pre getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() > - /// PartialMapIdx - /// \pre NewReg != 0 - /// - /// \post the \p PartialMapIdx-th register of the value mapping of the \p - /// OpIdx-th operand has been set. - void setVRegs(unsigned OpIdx, unsigned PartialMapIdx, Register NewVReg); - - /// Get all the virtual registers required to map the \p OpIdx-th operand of - /// the instruction. - /// - /// This return an empty range when createVRegs or setVRegs has not been - /// called. - /// The iterator may be invalidated by a call to setVRegs or createVRegs. - /// - /// When \p ForDebug is true, we will not check that the list of new virtual - /// registers does not contain uninitialized values. - /// - /// \pre getMI().getOperand(OpIdx).isReg() - /// \pre ForDebug || All partial mappings have been set a register - iterator_range::const_iterator> - getVRegs(unsigned OpIdx, bool ForDebug = false) const; - - /// Print this operands mapper on dbgs() stream. - void dump() const; - - /// Print this operands mapper on \p OS stream. - void print(raw_ostream &OS, bool ForDebug = false) const; - }; - -protected: - /// Hold the set of supported register banks. - RegisterBank **RegBanks; - - /// Total number of register banks. - unsigned NumRegBanks; - - /// Keep dynamically allocated PartialMapping in a separate map. - /// This shouldn't be needed when everything gets TableGen'ed. - mutable DenseMap> - MapOfPartialMappings; - - /// Keep dynamically allocated ValueMapping in a separate map. - /// This shouldn't be needed when everything gets TableGen'ed. - mutable DenseMap> - MapOfValueMappings; - - /// Keep dynamically allocated array of ValueMapping in a separate map. - /// This shouldn't be needed when everything gets TableGen'ed. - mutable DenseMap> - MapOfOperandsMappings; - - /// Keep dynamically allocated InstructionMapping in a separate map. - /// This shouldn't be needed when everything gets TableGen'ed. - mutable DenseMap> - MapOfInstructionMappings; - - /// Getting the minimal register class of a physreg is expensive. - /// Cache this information as we get it. - mutable DenseMap PhysRegMinimalRCs; - - /// Create a RegisterBankInfo that can accommodate up to \p NumRegBanks - /// RegisterBank instances. - RegisterBankInfo(RegisterBank **RegBanks, unsigned NumRegBanks); - - /// This constructor is meaningless. - /// It just provides a default constructor that can be used at link time - /// when GlobalISel is not built. - /// That way, targets can still inherit from this class without doing - /// crazy gymnastic to avoid link time failures. - /// \note That works because the constructor is inlined. - RegisterBankInfo() { - llvm_unreachable("This constructor should not be executed"); - } - - /// Get the register bank identified by \p ID. - RegisterBank &getRegBank(unsigned ID) { - assert(ID < getNumRegBanks() && "Accessing an unknown register bank"); - return *RegBanks[ID]; - } - - /// Get the MinimalPhysRegClass for Reg. - /// \pre Reg is a physical register. - const TargetRegisterClass & - getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const; - - /// Try to get the mapping of \p MI. - /// See getInstrMapping for more details on what a mapping represents. - /// - /// Unlike getInstrMapping the returned InstructionMapping may be invalid - /// (isValid() == false). - /// This means that the target independent code is not smart enough - /// to get the mapping of \p MI and thus, the target has to provide the - /// information for \p MI. - /// - /// This implementation is able to get the mapping of: - /// - Target specific instructions by looking at the encoding constraints. - /// - Any instruction if all the register operands have already been assigned - /// a register, a register class, or a register bank. - /// - Copies and phis if at least one of the operands has been assigned a - /// register, a register class, or a register bank. - /// In other words, this method will likely fail to find a mapping for - /// any generic opcode that has not been lowered by target specific code. - const InstructionMapping &getInstrMappingImpl(const MachineInstr &MI) const; - - /// Get the uniquely generated PartialMapping for the - /// given arguments. - const PartialMapping &getPartialMapping(unsigned StartIdx, unsigned Length, - const RegisterBank &RegBank) const; - - /// \name Methods to get a uniquely generated ValueMapping. - /// @{ - - /// The most common ValueMapping consists of a single PartialMapping. - /// Feature a method for that. - const ValueMapping &getValueMapping(unsigned StartIdx, unsigned Length, - const RegisterBank &RegBank) const; - - /// Get the ValueMapping for the given arguments. - const ValueMapping &getValueMapping(const PartialMapping *BreakDown, - unsigned NumBreakDowns) const; - /// @} - - /// \name Methods to get a uniquely generated array of ValueMapping. - /// @{ - - /// Get the uniquely generated array of ValueMapping for the - /// elements of between \p Begin and \p End. - /// - /// Elements that are nullptr will be replaced by - /// invalid ValueMapping (ValueMapping::isValid == false). - /// - /// \pre The pointers on ValueMapping between \p Begin and \p End - /// must uniquely identify a ValueMapping. Otherwise, there is no - /// guarantee that the return instance will be unique, i.e., another - /// OperandsMapping could have the same content. - template - const ValueMapping *getOperandsMapping(Iterator Begin, Iterator End) const; - - /// Get the uniquely generated array of ValueMapping for the - /// elements of \p OpdsMapping. - /// - /// Elements of \p OpdsMapping that are nullptr will be replaced by - /// invalid ValueMapping (ValueMapping::isValid == false). - const ValueMapping *getOperandsMapping( - const SmallVectorImpl &OpdsMapping) const; - - /// Get the uniquely generated array of ValueMapping for the - /// given arguments. - /// - /// Arguments that are nullptr will be replaced by invalid - /// ValueMapping (ValueMapping::isValid == false). - const ValueMapping *getOperandsMapping( - std::initializer_list OpdsMapping) const; - /// @} - - /// \name Methods to get a uniquely generated InstructionMapping. - /// @{ - -private: - /// Method to get a uniquely generated InstructionMapping. - const InstructionMapping & - getInstructionMappingImpl(bool IsInvalid, unsigned ID = InvalidMappingID, - unsigned Cost = 0, - const ValueMapping *OperandsMapping = nullptr, - unsigned NumOperands = 0) const; - -public: - /// Method to get a uniquely generated InstructionMapping. - const InstructionMapping & - getInstructionMapping(unsigned ID, unsigned Cost, - const ValueMapping *OperandsMapping, - unsigned NumOperands) const { - return getInstructionMappingImpl(/*IsInvalid*/ false, ID, Cost, - OperandsMapping, NumOperands); - } - - /// Method to get a uniquely generated invalid InstructionMapping. - const InstructionMapping &getInvalidInstructionMapping() const { - return getInstructionMappingImpl(/*IsInvalid*/ true); - } - /// @} - - /// Get the register bank for the \p OpIdx-th operand of \p MI form - /// the encoding constraints, if any. - /// - /// \return A register bank that covers the register class of the - /// related encoding constraints or nullptr if \p MI did not provide - /// enough information to deduce it. - const RegisterBank * - getRegBankFromConstraints(const MachineInstr &MI, unsigned OpIdx, - const TargetInstrInfo &TII, - const MachineRegisterInfo &MRI) const; - - /// Helper method to apply something that is like the default mapping. - /// Basically, that means that \p OpdMapper.getMI() is left untouched - /// aside from the reassignment of the register operand that have been - /// remapped. - /// - /// The type of all the new registers that have been created by the - /// mapper are properly remapped to the type of the original registers - /// they replace. In other words, the semantic of the instruction does - /// not change, only the register banks. - /// - /// If the mapping of one of the operand spans several registers, this - /// method will abort as this is not like a default mapping anymore. - /// - /// \pre For OpIdx in {0..\p OpdMapper.getMI().getNumOperands()) - /// the range OpdMapper.getVRegs(OpIdx) is empty or of size 1. - static void applyDefaultMapping(const OperandsMapper &OpdMapper); - - /// See ::applyMapping. - virtual void applyMappingImpl(const OperandsMapper &OpdMapper) const { - llvm_unreachable("The target has to implement that part"); - } - -public: - virtual ~RegisterBankInfo() = default; - - /// Get the register bank identified by \p ID. - const RegisterBank &getRegBank(unsigned ID) const { - return const_cast(this)->getRegBank(ID); - } - - /// Get the register bank of \p Reg. - /// If Reg has not been assigned a register, a register class, - /// or a register bank, then this returns nullptr. - /// - /// \pre Reg != 0 (NoRegister) - const RegisterBank *getRegBank(Register Reg, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI) const; - - /// Get the total number of register banks. - unsigned getNumRegBanks() const { return NumRegBanks; } - - /// Get a register bank that covers \p RC. - /// - /// \pre \p RC is a user-defined register class (as opposed as one - /// generated by TableGen). - /// - /// \note The mapping RC -> RegBank could be built while adding the - /// coverage for the register banks. However, we do not do it, because, - /// at least for now, we only need this information for register classes - /// that are used in the description of instruction. In other words, - /// there are just a handful of them and we do not want to waste space. - /// - /// \todo This should be TableGen'ed. - virtual const RegisterBank & - getRegBankFromRegClass(const TargetRegisterClass &RC, LLT Ty) const { - llvm_unreachable("The target must override this method"); - } - - /// Get the cost of a copy from \p B to \p A, or put differently, - /// get the cost of A = COPY B. Since register banks may cover - /// different size, \p Size specifies what will be the size in bits - /// that will be copied around. - /// - /// \note Since this is a copy, both registers have the same size. - virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, - unsigned Size) const { - // Optimistically assume that copies are coalesced. I.e., when - // they are on the same bank, they are free. - // Otherwise assume a non-zero cost of 1. The targets are supposed - // to override that properly anyway if they care. - return &A != &B; - } - - /// \returns true if emitting a copy from \p Src to \p Dst is impossible. - bool cannotCopy(const RegisterBank &Dst, const RegisterBank &Src, - unsigned Size) const { - return copyCost(Dst, Src, Size) == std::numeric_limits::max(); - } - - /// Get the cost of using \p ValMapping to decompose a register. This is - /// similar to ::copyCost, except for cases where multiple copy-like - /// operations need to be inserted. If the register is used as a source - /// operand and already has a bank assigned, \p CurBank is non-null. - virtual unsigned getBreakDownCost(const ValueMapping &ValMapping, - const RegisterBank *CurBank = nullptr) const { - return std::numeric_limits::max(); - } - - /// Constrain the (possibly generic) virtual register \p Reg to \p RC. - /// - /// \pre \p Reg is a virtual register that either has a bank or a class. - /// \returns The constrained register class, or nullptr if there is none. - /// \note This is a generic variant of MachineRegisterInfo::constrainRegClass - /// \note Use MachineRegisterInfo::constrainRegAttrs instead for any non-isel - /// purpose, including non-select passes of GlobalISel - static const TargetRegisterClass * - constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, - MachineRegisterInfo &MRI); - - /// Identifier used when the related instruction mapping instance - /// is generated by target independent code. - /// Make sure not to use that identifier to avoid possible collision. - static const unsigned DefaultMappingID; - - /// Identifier used when the related instruction mapping instance - /// is generated by the default constructor. - /// Make sure not to use that identifier. - static const unsigned InvalidMappingID; - - /// Get the mapping of the different operands of \p MI - /// on the register bank. - /// This mapping should be the direct translation of \p MI. - /// In other words, when \p MI is mapped with the returned mapping, - /// only the register banks of the operands of \p MI need to be updated. - /// In particular, neither the opcode nor the type of \p MI needs to be - /// updated for this direct mapping. - /// - /// The target independent implementation gives a mapping based on - /// the register classes for the target specific opcode. - /// It uses the ID RegisterBankInfo::DefaultMappingID for that mapping. - /// Make sure you do not use that ID for the alternative mapping - /// for MI. See getInstrAlternativeMappings for the alternative - /// mappings. - /// - /// For instance, if \p MI is a vector add, the mapping should - /// not be a scalarization of the add. - /// - /// \post returnedVal.verify(MI). - /// - /// \note If returnedVal does not verify MI, this would probably mean - /// that the target does not support that instruction. - virtual const InstructionMapping & - getInstrMapping(const MachineInstr &MI) const; - - /// Get the alternative mappings for \p MI. - /// Alternative in the sense different from getInstrMapping. - virtual InstructionMappings - getInstrAlternativeMappings(const MachineInstr &MI) const; - - /// Get the possible mapping for \p MI. - /// A mapping defines where the different operands may live and at what cost. - /// For instance, let us consider: - /// v0(16) = G_ADD <2 x i8> v1, v2 - /// The possible mapping could be: - /// - /// {/*ID*/VectorAdd, /*Cost*/1, /*v0*/{(0xFFFF, VPR)}, /*v1*/{(0xFFFF, VPR)}, - /// /*v2*/{(0xFFFF, VPR)}} - /// {/*ID*/ScalarAddx2, /*Cost*/2, /*v0*/{(0x00FF, GPR),(0xFF00, GPR)}, - /// /*v1*/{(0x00FF, GPR),(0xFF00, GPR)}, - /// /*v2*/{(0x00FF, GPR),(0xFF00, GPR)}} - /// - /// \note The first alternative of the returned mapping should be the - /// direct translation of \p MI current form. - /// - /// \post !returnedVal.empty(). - InstructionMappings getInstrPossibleMappings(const MachineInstr &MI) const; - - /// Apply \p OpdMapper.getInstrMapping() to \p OpdMapper.getMI(). - /// After this call \p OpdMapper.getMI() may not be valid anymore. - /// \p OpdMapper.getInstrMapping().getID() carries the information of - /// what has been chosen to map \p OpdMapper.getMI(). This ID is set - /// by the various getInstrXXXMapping method. - /// - /// Therefore, getting the mapping and applying it should be kept in - /// sync. - void applyMapping(const OperandsMapper &OpdMapper) const { - // The only mapping we know how to handle is the default mapping. - if (OpdMapper.getInstrMapping().getID() == DefaultMappingID) - return applyDefaultMapping(OpdMapper); - // For other mapping, the target needs to do the right thing. - // If that means calling applyDefaultMapping, fine, but this - // must be explicitly stated. - applyMappingImpl(OpdMapper); - } - - /// Get the size in bits of \p Reg. - /// Utility method to get the size of any registers. Unlike - /// MachineRegisterInfo::getSize, the register does not need to be a - /// virtual register. - /// - /// \pre \p Reg != 0 (NoRegister). - unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI) const; - - /// Check that information hold by this instance make sense for the - /// given \p TRI. - /// - /// \note This method does not check anything when assertions are disabled. - /// - /// \return True is the check was successful. - bool verify(const TargetRegisterInfo &TRI) const; -}; - -inline raw_ostream & -operator<<(raw_ostream &OS, - const RegisterBankInfo::PartialMapping &PartMapping) { - PartMapping.print(OS); - return OS; -} - -inline raw_ostream & -operator<<(raw_ostream &OS, const RegisterBankInfo::ValueMapping &ValMapping) { - ValMapping.print(OS); - return OS; -} - -inline raw_ostream & -operator<<(raw_ostream &OS, - const RegisterBankInfo::InstructionMapping &InstrMapping) { - InstrMapping.print(OS); - return OS; -} - -inline raw_ostream & -operator<<(raw_ostream &OS, const RegisterBankInfo::OperandsMapper &OpdMapper) { - OpdMapper.print(OS, /*ForDebug*/ false); - return OS; -} - -/// Hashing function for PartialMapping. -/// It is required for the hashing of ValueMapping. -hash_code hash_value(const RegisterBankInfo::PartialMapping &PartMapping); - -} // end namespace llvm - -#endif // LLVM_CODEGEN_GLOBALISEL_REGISTERBANKINFO_H diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index aed915d2cc4b..78f1b49da822 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -15,18 +15,20 @@ #define LLVM_CODEGEN_GLOBALISEL_UTILS_H #include "GISelWorkList.h" -#include "LostDebugLocObserver.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/Register.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/Support/Alignment.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/LowLevelTypeImpl.h" #include namespace llvm { class AnalysisUsage; +class LostDebugLocObserver; +class MachineBasicBlock; class BlockFrequencyInfo; class GISelKnownBits; class MachineFunction; @@ -267,13 +269,10 @@ Optional ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const MachineRegisterInfo &MRI); /// Tries to constant fold a vector binop with sources \p Op1 and \p Op2. -/// If successful, returns the G_BUILD_VECTOR representing the folded vector -/// constant. \p MIB should have an insertion point already set to create new -/// G_CONSTANT instructions as needed. -Register ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, - const Register Op2, - const MachineRegisterInfo &MRI, - MachineIRBuilder &MIB); +/// Returns an empty vector on failure. +SmallVector ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, + const Register Op2, + const MachineRegisterInfo &MRI); Optional ConstantFoldExtOp(unsigned Opcode, const Register Op1, uint64_t Imm, const MachineRegisterInfo &MRI); @@ -374,9 +373,23 @@ public: /// If \p MI is not a splat, returns None. Optional getSplatIndex(MachineInstr &MI); -/// Returns a scalar constant of a G_BUILD_VECTOR splat if it exists. -Optional getBuildVectorConstantSplat(const MachineInstr &MI, - const MachineRegisterInfo &MRI); +/// \returns the scalar integral splat value of \p Reg if possible. +Optional getIConstantSplatVal(const Register Reg, + const MachineRegisterInfo &MRI); + +/// \returns the scalar integral splat value defined by \p MI if possible. +Optional getIConstantSplatVal(const MachineInstr &MI, + const MachineRegisterInfo &MRI); + +/// \returns the scalar sign extended integral splat value of \p Reg if +/// possible. +Optional getIConstantSplatSExtVal(const Register Reg, + const MachineRegisterInfo &MRI); + +/// \returns the scalar sign extended integral splat value defined by \p MI if +/// possible. +Optional getIConstantSplatSExtVal(const MachineInstr &MI, + const MachineRegisterInfo &MRI); /// Returns a floating point scalar constant of a build vector splat if it /// exists. When \p AllowUndef == true some elements can be undef but not all. @@ -408,6 +421,30 @@ bool isBuildVectorAllOnes(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef = false); +/// Return true if the specified instruction is known to be a constant, or a +/// vector of constants. +/// +/// If \p AllowFP is true, this will consider G_FCONSTANT in addition to +/// G_CONSTANT. If \p AllowOpaqueConstants is true, constant-like instructions +/// such as G_GLOBAL_VALUE will also be considered. +bool isConstantOrConstantVector(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + bool AllowFP = true, + bool AllowOpaqueConstants = true); + +/// Return true if the value is a constant 0 integer or a splatted vector of a +/// constant 0 integer (with no undefs if \p AllowUndefs is false). This will +/// handle G_BUILD_VECTOR and G_BUILD_VECTOR_TRUNC as truncation is not an issue +/// for null values. +bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, + bool AllowUndefs = false); + +/// Return true if the value is a constant -1 integer or a splatted vector of a +/// constant -1 integer (with no undefs if \p AllowUndefs is false). +bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + bool AllowUndefs = false); + /// \returns a value when \p MI is a vector splat. The splat can be either a /// Register or a constant. /// diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index b07c7cd3db3a..120f89952a95 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -281,12 +281,25 @@ enum NodeType { /// Carry-using nodes for multiple precision addition and subtraction. /// These nodes take three operands: The first two are the normal lhs and - /// rhs to the add or sub, and the third is a boolean indicating if there - /// is an incoming carry. These nodes produce two results: the normal - /// result of the add or sub, and the output carry so they can be chained - /// together. The use of this opcode is preferable to adde/sube if the - /// target supports it, as the carry is a regular value rather than a - /// glue, which allows further optimisation. + /// rhs to the add or sub, and the third is a boolean value that is 1 if and + /// only if there is an incoming carry/borrow. These nodes produce two + /// results: the normal result of the add or sub, and a boolean value that is + /// 1 if and only if there is an outgoing carry/borrow. + /// + /// Care must be taken if these opcodes are lowered to hardware instructions + /// that use the inverse logic -- 0 if and only if there is an + /// incoming/outgoing carry/borrow. In such cases, you must preserve the + /// semantics of these opcodes by inverting the incoming carry/borrow, feeding + /// it to the add/sub hardware instruction, and then inverting the outgoing + /// carry/borrow. + /// + /// The use of these opcodes is preferable to adde/sube if the target supports + /// it, as the carry is a regular value rather than a glue, which allows + /// further optimisation. + /// + /// These opcodes are different from [US]{ADD,SUB}O in that ADDCARRY/SUBCARRY + /// consume and produce a carry/borrow, whereas [US]{ADD,SUB}O produce an + /// overflow. ADDCARRY, SUBCARRY, @@ -294,7 +307,7 @@ enum NodeType { /// subtraction. These nodes take three operands: The first two are normal lhs /// and rhs to the add or sub, and the third is a boolean indicating if there /// is an incoming carry. They produce two results: the normal result of the - /// add or sub, and a boolean that indicates if an overflow occured (*not* + /// add or sub, and a boolean that indicates if an overflow occurred (*not* /// flag, because it may be a store to memory, etc.). If the type of the /// boolean is not i1 then the high bits conform to getBooleanContents. SADDO_CARRY, @@ -462,6 +475,9 @@ enum NodeType { STRICT_FSETCC, STRICT_FSETCCS, + // FPTRUNC_ROUND - This corresponds to the fptrunc_round intrinsic. + FPTRUNC_ROUND, + /// FMA - Perform a * b + c with no intermediate rounding step. FMA, @@ -482,6 +498,13 @@ enum NodeType { /// Returns platform specific canonical encoding of a floating point number. FCANONICALIZE, + /// Performs a check of floating point class property, defined by IEEE-754. + /// The first operand is the floating point value to check. The second operand + /// specifies the checked property and is a TargetConstant which specifies + /// test in the same way as intrinsic 'is_fpclass'. + /// Returns boolean value. + IS_FPCLASS, + /// BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector /// with the specified, possibly variable, elements. The types of the /// operands must match the vector element type, except that integer types @@ -614,6 +637,17 @@ enum NodeType { MULHU, MULHS, + /// AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of + /// type i[N+1], halving the result by shifting it one bit right. + /// shr(add(ext(X), ext(Y)), 1) + AVGFLOORS, + AVGFLOORU, + /// AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an + /// integer of type i[N+2], add 1 and halve the result by shifting it one bit + /// right. shr(add(ext(X), ext(Y), 1), 1) + AVGCEILS, + AVGCEILU, + // ABDS/ABDU - Absolute difference - Return the absolute difference between // two numbers interpreted as signed/unsigned. // i.e trunc(abs(sext(Op0) - sext(Op1))) becomes abds(Op0, Op1) @@ -864,6 +898,13 @@ enum NodeType { STRICT_FP16_TO_FP, STRICT_FP_TO_FP16, + /// BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions + /// and truncation for bfloat16. These nodes form a semi-softened interface + /// for dealing with bf16 (as an i16), which is often a storage-only type but + /// has native conversions. + BF16_TO_FP, + FP_TO_BF16, + /// Perform various unary floating-point operations inspired by libm. For /// FPOWI, the result is undefined if if the integer operand doesn't fit into /// sizeof(int). @@ -1324,18 +1365,18 @@ static const int LAST_INDEXED_MODE = POST_DEC + 1; /// MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's /// index parameter when calculating addresses. /// -/// SIGNED_SCALED Addr = Base + ((signed)Index * sizeof(element)) -/// SIGNED_UNSCALED Addr = Base + (signed)Index -/// UNSIGNED_SCALED Addr = Base + ((unsigned)Index * sizeof(element)) -/// UNSIGNED_UNSCALED Addr = Base + (unsigned)Index -enum MemIndexType { - SIGNED_SCALED = 0, - SIGNED_UNSCALED, - UNSIGNED_SCALED, - UNSIGNED_UNSCALED -}; +/// SIGNED_SCALED Addr = Base + ((signed)Index * Scale) +/// UNSIGNED_SCALED Addr = Base + ((unsigned)Index * Scale) +/// +/// NOTE: The value of Scale is typically only known to the node owning the +/// IndexType, with a value of 1 the equivalent of being unscaled. +enum MemIndexType { SIGNED_SCALED = 0, UNSIGNED_SCALED }; -static const int LAST_MEM_INDEX_TYPE = UNSIGNED_UNSCALED + 1; +static const int LAST_MEM_INDEX_TYPE = UNSIGNED_SCALED + 1; + +inline bool isIndexTypeSigned(MemIndexType IndexType) { + return IndexType == SIGNED_SCALED; +} //===--------------------------------------------------------------------===// /// LoadExtType enum - This enum defines the three variants of LOADEXT diff --git a/llvm/include/llvm/CodeGen/IntrinsicLowering.h b/llvm/include/llvm/CodeGen/IntrinsicLowering.h index 06512f2dc560..0b327a34ca09 100644 --- a/llvm/include/llvm/CodeGen/IntrinsicLowering.h +++ b/llvm/include/llvm/CodeGen/IntrinsicLowering.h @@ -15,8 +15,6 @@ #ifndef LLVM_CODEGEN_INTRINSICLOWERING_H #define LLVM_CODEGEN_INTRINSICLOWERING_H -#include "llvm/IR/Intrinsics.h" - namespace llvm { class CallInst; class DataLayout; diff --git a/llvm/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h b/llvm/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h index c692dbc2199e..e5794966ce63 100644 --- a/llvm/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h +++ b/llvm/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h @@ -17,8 +17,8 @@ #define LLVM_CODEGEN_LAZYMACHINEBLOCKFREQUENCYINFO_H #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" namespace llvm { diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h index 51ffe2807434..92e35c9a4ab9 100644 --- a/llvm/include/llvm/CodeGen/LiveInterval.h +++ b/llvm/include/llvm/CodeGen/LiveInterval.h @@ -227,6 +227,14 @@ namespace llvm { const_vni_iterator vni_begin() const { return valnos.begin(); } const_vni_iterator vni_end() const { return valnos.end(); } + iterator_range vnis() { + return make_range(vni_begin(), vni_end()); + } + + iterator_range vnis() const { + return make_range(vni_begin(), vni_end()); + } + /// Constructs a new LiveRange object. LiveRange(bool UseSegmentSet = false) : segmentSet(UseSegmentSet ? std::make_unique() @@ -625,10 +633,8 @@ namespace llvm { // if the Seg is lower find first segment that is above Idx using binary // search if (Seg->end <= *Idx) { - Seg = std::upper_bound( - ++Seg, EndSeg, *Idx, - [=](std::remove_reference_t V, - const std::remove_reference_t &S) { + Seg = + std::upper_bound(++Seg, EndSeg, *Idx, [=](auto V, const auto &S) { return V < S.end; }); if (Seg == EndSeg) diff --git a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h index 3b6a4a379d72..81003455da42 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h +++ b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h @@ -43,7 +43,7 @@ class LiveIntervalUnion { // A set of live virtual register segments that supports fast insertion, // intersection, and removal. // Mapping SlotIndex intervals to virtual register numbers. - using LiveSegments = IntervalMap; + using LiveSegments = IntervalMap; public: // SegmentIter can advance to the next segment ordered by starting position @@ -88,10 +88,10 @@ public: bool changedSince(unsigned tag) const { return tag != Tag; } // Add a live virtual register to this union and merge its segments. - void unify(LiveInterval &VirtReg, const LiveRange &Range); + void unify(const LiveInterval &VirtReg, const LiveRange &Range); // Remove a live virtual register's segments from this union. - void extract(LiveInterval &VirtReg, const LiveRange &Range); + void extract(const LiveInterval &VirtReg, const LiveRange &Range); // Remove all inserted virtual registers. void clear() { Segments.clear(); ++Tag; } @@ -105,7 +105,7 @@ public: #endif // Get any virtual register that is assign to this physical unit - LiveInterval *getOneVReg() const; + const LiveInterval *getOneVReg() const; /// Query interferences between a single live virtual register and a live /// interval union. @@ -114,7 +114,7 @@ public: const LiveRange *LR = nullptr; LiveRange::const_iterator LRI; ///< current position in LR ConstSegmentIter LiveUnionI; ///< current position in LiveUnion - SmallVector InterferingVRegs; + SmallVector InterferingVRegs; bool CheckedFirstInterference = false; bool SeenAllInterferences = false; unsigned Tag = 0; @@ -125,7 +125,7 @@ public: unsigned collectInterferingVRegs(unsigned MaxInterferingRegs); // Was this virtual register visited during collectInterferingVRegs? - bool isSeenInterference(LiveInterval *VirtReg) const; + bool isSeenInterference(const LiveInterval *VirtReg) const; public: Query() = default; @@ -159,7 +159,7 @@ public: bool checkInterference() { return collectInterferingVRegs(1); } // Vector generated by collectInterferingVRegs. - const SmallVectorImpl &interferingVRegs( + const SmallVectorImpl &interferingVRegs( unsigned MaxInterferingRegs = std::numeric_limits::max()) { if (!SeenAllInterferences || MaxInterferingRegs < InterferingVRegs.size()) collectInterferingVRegs(MaxInterferingRegs); diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h index fa08166791b0..b832eaa37305 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervals.h +++ b/llvm/include/llvm/CodeGen/LiveIntervals.h @@ -374,7 +374,7 @@ class VirtRegMap; /// /// Returns false if \p LI doesn't cross any register mask instructions. In /// that case, the bit vector is not filled in. - bool checkRegMaskInterference(LiveInterval &LI, + bool checkRegMaskInterference(const LiveInterval &LI, BitVector &UsableRegs); // Register unit functions. diff --git a/llvm/include/llvm/CodeGen/LivePhysRegs.h b/llvm/include/llvm/CodeGen/LivePhysRegs.h index 99ba1a28c934..27285d63aa83 100644 --- a/llvm/include/llvm/CodeGen/LivePhysRegs.h +++ b/llvm/include/llvm/CodeGen/LivePhysRegs.h @@ -32,6 +32,7 @@ #include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/MC/MCRegister.h" #include "llvm/MC/MCRegisterInfo.h" #include #include @@ -39,6 +40,7 @@ namespace llvm { class MachineInstr; +class MachineFunction; class MachineOperand; class MachineRegisterInfo; class raw_ostream; diff --git a/llvm/include/llvm/CodeGen/LiveRangeCalc.h b/llvm/include/llvm/CodeGen/LiveRangeCalc.h index 31efd6e37e01..895ecff18f89 100644 --- a/llvm/include/llvm/CodeGen/LiveRangeCalc.h +++ b/llvm/include/llvm/CodeGen/LiveRangeCalc.h @@ -31,7 +31,6 @@ #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/MC/LaneBitmask.h" #include namespace llvm { diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h index d80522f5bdac..c6efa7b30d71 100644 --- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -66,7 +66,7 @@ public: }; private: - LiveInterval *Parent; + const LiveInterval *const Parent; SmallVectorImpl &NewRegs; MachineRegisterInfo &MRI; LiveIntervals &LIS; @@ -129,7 +129,7 @@ public: /// be done. This could be the case if called before Regalloc. /// @param deadRemats The collection of all the instructions defining an /// original reg and are dead after remat. - LiveRangeEdit(LiveInterval *parent, SmallVectorImpl &newRegs, + LiveRangeEdit(const LiveInterval *parent, SmallVectorImpl &newRegs, MachineFunction &MF, LiveIntervals &lis, VirtRegMap *vrm, Delegate *delegate = nullptr, SmallPtrSet *deadRemats = nullptr) @@ -141,7 +141,7 @@ public: ~LiveRangeEdit() override { MRI.resetDelegate(this); } - LiveInterval &getParent() const { + const LiveInterval &getParent() const { assert(Parent && "No parent LiveInterval"); return *Parent; } @@ -193,11 +193,11 @@ public: /// Remat - Information needed to rematerialize at a specific location. struct Remat { - VNInfo *ParentVNI; // parent_'s value at the remat location. + const VNInfo *const ParentVNI; // parent_'s value at the remat location. MachineInstr *OrigMI = nullptr; // Instruction defining OrigVNI. It contains // the real expr for remat. - explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI) {} + explicit Remat(const VNInfo *ParentVNI) : ParentVNI(ParentVNI) {} }; /// allUsesAvailableAt - Return true if all registers used by OrigMI at diff --git a/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/llvm/include/llvm/CodeGen/LiveRegMatrix.h index fc67bce329ab..9e28e4d243c2 100644 --- a/llvm/include/llvm/CodeGen/LiveRegMatrix.h +++ b/llvm/include/llvm/CodeGen/LiveRegMatrix.h @@ -104,7 +104,8 @@ public: /// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg). /// When there is more than one kind of interference, the InterferenceKind /// with the highest enum value is returned. - InterferenceKind checkInterference(LiveInterval &VirtReg, MCRegister PhysReg); + InterferenceKind checkInterference(const LiveInterval &VirtReg, + MCRegister PhysReg); /// Check for interference in the segment [Start, End) that may prevent /// assignment to PhysReg. If this function returns true, there is @@ -116,12 +117,12 @@ public: /// Assign VirtReg to PhysReg. /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and /// update VirtRegMap. The live range is expected to be available in PhysReg. - void assign(LiveInterval &VirtReg, MCRegister PhysReg); + void assign(const LiveInterval &VirtReg, MCRegister PhysReg); /// Unassign VirtReg from its PhysReg. /// Assuming that VirtReg was previously assigned to a PhysReg, this undoes /// the assignment and updates VirtRegMap accordingly. - void unassign(LiveInterval &VirtReg); + void unassign(const LiveInterval &VirtReg); /// Returns true if the given \p PhysReg has any live intervals assigned. bool isPhysRegUsed(MCRegister PhysReg) const; @@ -136,13 +137,14 @@ public: /// Check for regmask interference only. /// Return true if VirtReg crosses a regmask operand that clobbers PhysReg. /// If PhysReg is null, check if VirtReg crosses any regmask operands. - bool checkRegMaskInterference(LiveInterval &VirtReg, + bool checkRegMaskInterference(const LiveInterval &VirtReg, MCRegister PhysReg = MCRegister::NoRegister); /// Check for regunit interference only. /// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's /// register units. - bool checkRegUnitInterference(LiveInterval &VirtReg, MCRegister PhysReg); + bool checkRegUnitInterference(const LiveInterval &VirtReg, + MCRegister PhysReg); /// Query a line of the assigned virtual register matrix directly. /// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg. diff --git a/llvm/include/llvm/CodeGen/LiveStacks.h b/llvm/include/llvm/CodeGen/LiveStacks.h index 1cbdb8bd86bd..26f30fb4d088 100644 --- a/llvm/include/llvm/CodeGen/LiveStacks.h +++ b/llvm/include/llvm/CodeGen/LiveStacks.h @@ -18,13 +18,17 @@ #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" -#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" #include #include #include namespace llvm { +class AnalysisUsage; +class MachineFunction; +class Module; +class raw_ostream; class TargetRegisterClass; class TargetRegisterInfo; diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h index dee316677b25..aa198527415d 100644 --- a/llvm/include/llvm/CodeGen/LiveVariables.h +++ b/llvm/include/llvm/CodeGen/LiveVariables.h @@ -37,6 +37,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" namespace llvm { diff --git a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h index deb6b37a9bcf..3bbcfd63e3aa 100644 --- a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h +++ b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h @@ -17,29 +17,16 @@ #ifndef LLVM_CODEGEN_MIRFSDISCRIMINATOR_H #define LLVM_CODEGEN_MIRFSDISCRIMINATOR_H -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" -#include "llvm/CodeGen/MachinePostDominators.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/InitializePasses.h" -#include "llvm/ProfileData/InstrProf.h" -#include "llvm/ProfileData/SampleProf.h" -#include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/Support/Discriminator.h" #include +#include namespace llvm { +class MachineFunction; using namespace sampleprof; class MIRAddFSDiscriminators : public MachineFunctionPass { diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h index a7c69e2d43ef..aa9891a80a32 100644 --- a/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h +++ b/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h @@ -17,13 +17,20 @@ #ifndef LLVM_CODEGEN_MIRPARSER_MIRPARSER_H #define LLVM_CODEGEN_MIRPARSER_MIRPARSER_H -#include "llvm/IR/Module.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLForwardCompat.h" +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/StringRef.h" +#include #include namespace llvm { class Function; +class LLVMContext; +class MemoryBuffer; +class Module; class MIRParserImpl; class MachineModuleInfo; class SMDiagnostic; diff --git a/llvm/include/llvm/CodeGen/MIRSampleProfile.h b/llvm/include/llvm/CodeGen/MIRSampleProfile.h index 2503524ccfdf..f54c4b5891be 100644 --- a/llvm/include/llvm/CodeGen/MIRSampleProfile.h +++ b/llvm/include/llvm/CodeGen/MIRSampleProfile.h @@ -14,29 +14,17 @@ #ifndef LLVM_CODEGEN_MIRSAMPLEPROFILE_H #define LLVM_CODEGEN_MIRSAMPLEPROFILE_H -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" -#include "llvm/CodeGen/MachinePostDominators.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/InitializePasses.h" -#include "llvm/ProfileData/InstrProf.h" -#include "llvm/ProfileData/SampleProf.h" -#include "llvm/ProfileData/SampleProfReader.h" - -#include +#include "llvm/Support/Discriminator.h" +#include +#include namespace llvm { +class AnalysisUsage; +class MachineBlockFrequencyInfo; +class MachineFunction; +class Module; using namespace sampleprof; diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h index 02eb5d24271d..25247437b641 100644 --- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h +++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h @@ -605,7 +605,7 @@ struct MachineFrameInfo { bool AdjustsStack = false; bool HasCalls = false; StringValue StackProtector; - // TODO: Serialize FunctionContextIdx + StringValue FunctionContext; unsigned MaxCallFrameSize = ~0u; ///< ~0u means: not computed yet. unsigned CVBytesOfCalleeSavedRegisters = 0; bool HasOpaqueSPAdjustment = false; @@ -626,6 +626,7 @@ struct MachineFrameInfo { MaxAlignment == Other.MaxAlignment && AdjustsStack == Other.AdjustsStack && HasCalls == Other.HasCalls && StackProtector == Other.StackProtector && + FunctionContext == Other.FunctionContext && MaxCallFrameSize == Other.MaxCallFrameSize && CVBytesOfCalleeSavedRegisters == Other.CVBytesOfCalleeSavedRegisters && @@ -651,6 +652,8 @@ template <> struct MappingTraits { YamlIO.mapOptional("hasCalls", MFI.HasCalls, false); YamlIO.mapOptional("stackProtector", MFI.StackProtector, StringValue()); // Don't print it out when it's empty. + YamlIO.mapOptional("functionContext", MFI.FunctionContext, + StringValue()); // Don't print it out when it's empty. YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize, (unsigned)~0); YamlIO.mapOptional("cvBytesOfCalleeSavedRegisters", MFI.CVBytesOfCalleeSavedRegisters, 0U); @@ -694,6 +697,13 @@ struct MachineFunction { // Register information bool TracksRegLiveness = false; bool HasWinCFI = false; + + bool CallsEHReturn = false; + bool CallsUnwindInit = false; + bool HasEHCatchret = false; + bool HasEHScopes = false; + bool HasEHFunclets = false; + bool FailsVerification = false; bool TracksDebugUserValues = false; std::vector VirtualRegisters; @@ -724,6 +734,13 @@ template <> struct MappingTraits { YamlIO.mapOptional("failedISel", MF.FailedISel, false); YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false); YamlIO.mapOptional("hasWinCFI", MF.HasWinCFI, false); + + YamlIO.mapOptional("callsEHReturn", MF.CallsEHReturn, false); + YamlIO.mapOptional("callsUnwindInit", MF.CallsUnwindInit, false); + YamlIO.mapOptional("hasEHCatchret", MF.HasEHCatchret, false); + YamlIO.mapOptional("hasEHScopes", MF.HasEHScopes, false); + YamlIO.mapOptional("hasEHFunclets", MF.HasEHFunclets, false); + YamlIO.mapOptional("failsVerification", MF.FailsVerification, false); YamlIO.mapOptional("tracksDebugUserValues", MF.TracksDebugUserValues, false); diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 638b6732a543..ddfbd4018590 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -14,9 +14,9 @@ #define LLVM_CODEGEN_MACHINEBASICBLOCK_H #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/SparseBitVector.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundleIterator.h" #include "llvm/IR/DebugLoc.h" @@ -24,7 +24,6 @@ #include "llvm/Support/BranchProbability.h" #include #include -#include #include #include #include @@ -110,10 +109,10 @@ public: private: using Instructions = ilist>; - Instructions Insts; const BasicBlock *BB; int Number; MachineFunction *xParent; + Instructions Insts; /// Keep track of the predecessor / successor basic blocks. std::vector Predecessors; @@ -205,6 +204,12 @@ public: /// to an LLVM basic block. const BasicBlock *getBasicBlock() const { return BB; } + /// Remove the reference to the underlying IR BasicBlock. This is for + /// reduction tools and should generally not be used. + void clearBasicBlock() { + BB = nullptr; + } + /// Return the name of the corresponding LLVM basic block, or an empty string. StringRef getName() const; @@ -241,6 +246,7 @@ public: MachineInstrBundleIterator; unsigned size() const { return (unsigned)Insts.size(); } + bool sizeWithoutDebugLargerThan(unsigned Limit) const; bool empty() const { return Insts.empty(); } MachineInstr &instr_front() { return Insts.front(); } @@ -400,7 +406,7 @@ public: // Iteration support for live in sets. These sets are kept in sorted // order by their register number. using livein_iterator = LiveInVector::const_iterator; -#ifndef NDEBUG + /// Unlike livein_begin, this method does not check that the liveness /// information is accurate. Still for debug purposes it may be useful /// to have iterators that won't assert if the liveness information @@ -409,7 +415,7 @@ public: iterator_range liveins_dbg() const { return make_range(livein_begin_dbg(), livein_end()); } -#endif + livein_iterator livein_begin() const; livein_iterator livein_end() const { return LiveIns.end(); } bool livein_empty() const { return LiveIns.empty(); } @@ -731,6 +737,15 @@ public: /// other block. bool isLayoutSuccessor(const MachineBasicBlock *MBB) const; + /// Return the successor of this block if it has a single successor. + /// Otherwise return a null pointer. + /// + const MachineBasicBlock *getSingleSuccessor() const; + MachineBasicBlock *getSingleSuccessor() { + return const_cast( + static_cast(this)->getSingleSuccessor()); + } + /// Return the fallthrough block if the block can implicitly /// transfer control to the block after it by falling off the end of /// it. This should return null if it can reach the block after @@ -1087,6 +1102,11 @@ public: IrrLoopHeaderWeight = Weight; } + /// Return probability of the edge from this block to MBB. This method should + /// NOT be called directly, but by using getEdgeProbability method from + /// MachineBranchProbabilityInfo class. + BranchProbability getSuccProbability(const_succ_iterator Succ) const; + private: /// Return probability iterator corresponding to the I successor iterator. probability_iterator getProbabilityIterator(succ_iterator I); @@ -1096,11 +1116,6 @@ private: friend class MachineBranchProbabilityInfo; friend class MIPrinter; - /// Return probability of the edge from this block to MBB. This method should - /// NOT be called directly, but by using getEdgeProbability method from - /// MachineBranchProbabilityInfo class. - BranchProbability getSuccProbability(const_succ_iterator Succ) const; - // Methods used to maintain doubly linked list of blocks... friend struct ilist_callback_traits; diff --git a/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h index 7e7e0a9c477a..bd544421bc0f 100644 --- a/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h +++ b/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h @@ -16,8 +16,6 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" -#include -#include namespace llvm { diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h index 67544779f34c..68c95679d466 100644 --- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h @@ -34,6 +34,10 @@ enum class MachineCombinerPattern { REASSOC_XY_BCA, REASSOC_XY_BAC, + // These are patterns used to reduce the length of dependence chain. + SUBADD_OP1, + SUBADD_OP2, + // These are multiply-add patterns matched by the AArch64 machine combiner. MULADDW_OP1, MULADDW_OP2, diff --git a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h index d3816bbc0780..3f89f2076d50 100644 --- a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h +++ b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h @@ -15,8 +15,9 @@ #define LLVM_CODEGEN_MACHINECYCLEANALYSIS_H #include "llvm/ADT/GenericCycleInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineSSAContext.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" namespace llvm { @@ -26,6 +27,29 @@ extern template class GenericCycle; using MachineCycleInfo = GenericCycleInfo; using MachineCycle = MachineCycleInfo::CycleT; +/// Legacy analysis pass which computes a \ref MachineCycleInfo. +class MachineCycleInfoWrapperPass : public MachineFunctionPass { + MachineFunction *F = nullptr; + MachineCycleInfo CI; + +public: + static char ID; + + MachineCycleInfoWrapperPass(); + + MachineCycleInfo &getCycleInfo() { return CI; } + const MachineCycleInfo &getCycleInfo() const { return CI; } + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; +}; + +// TODO: add this function to GenericCycle template after implementing IR +// version. +bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I); + } // end namespace llvm #endif // LLVM_CODEGEN_MACHINECYCLEANALYSIS_H diff --git a/llvm/include/llvm/CodeGen/MachineDominators.h b/llvm/include/llvm/CodeGen/MachineDominators.h index f749e9ff7e0a..30c18ef410fa 100644 --- a/llvm/include/llvm/CodeGen/MachineDominators.h +++ b/llvm/include/llvm/CodeGen/MachineDominators.h @@ -19,12 +19,17 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/GenericDomTreeConstruction.h" #include #include namespace llvm { +class AnalysisUsage; +class MachineFunction; +class Module; +class raw_ostream; template <> inline void DominatorTreeBase::addRoot( diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 864ca73180af..7ea731b46655 100644 --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -16,7 +16,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Register.h" #include "llvm/Support/Alignment.h" -#include "llvm/Support/DataTypes.h" #include #include @@ -335,10 +334,13 @@ private: /// Not null, if shrink-wrapping found a better place for the epilogue. MachineBasicBlock *Restore = nullptr; + /// Size of the UnsafeStack Frame + uint64_t UnsafeStackSize = 0; + public: - explicit MachineFrameInfo(unsigned StackAlignment, bool StackRealignable, + explicit MachineFrameInfo(Align StackAlignment, bool StackRealignable, bool ForcedRealign) - : StackAlignment(assumeAligned(StackAlignment)), + : StackAlignment(StackAlignment), StackRealignable(StackRealignable), ForcedRealign(ForcedRealign) {} MachineFrameInfo(const MachineFrameInfo &) = delete; @@ -360,6 +362,7 @@ public: /// This object is used for SjLj exceptions. int getFunctionContextIndex() const { return FunctionContextIdx; } void setFunctionContextIndex(int I) { FunctionContextIdx = I; } + bool hasFunctionContextIndex() const { return FunctionContextIdx != -1; } /// This method may be called any time after instruction /// selection is complete to determine if there is a call to @@ -385,6 +388,20 @@ public: bool hasPatchPoint() const { return HasPatchPoint; } void setHasPatchPoint(bool s = true) { HasPatchPoint = s; } + /// Return true if this function requires a split stack prolog, even if it + /// uses no stack space. This is only meaningful for functions where + /// MachineFunction::shouldSplitStack() returns true. + // + // For non-leaf functions we have to allow for the possibility that the call + // is to a non-split function, as in PR37807. This function could also take + // the address of a non-split function. When the linker tries to adjust its + // non-existent prologue, it would fail with an error. Mark the object file so + // that such failures are not errors. See this Go language bug-report + // https://go-review.googlesource.com/c/go/+/148819/ + bool needsSplitStackProlog() const { + return getStackSize() != 0 || hasTailCall(); + } + /// Return the minimum frame object index. int getObjectIndexBegin() const { return -NumFixedObjects; } @@ -488,6 +505,14 @@ public: return Objects[ObjectIdx+NumFixedObjects].Alloca; } + /// Remove the underlying Alloca of the specified stack object if it + /// exists. This generally should not be used and is for reduction tooling. + void clearObjectAllocation(int ObjectIdx) { + assert(unsigned(ObjectIdx + NumFixedObjects) < Objects.size() && + "Invalid Object Idx!"); + Objects[ObjectIdx + NumFixedObjects].Alloca = nullptr; + } + /// Return the assigned stack offset of the specified object /// from the incoming stack pointer. int64_t getObjectOffset(int ObjectIdx) const { @@ -773,6 +798,9 @@ public: MachineBasicBlock *getRestorePoint() const { return Restore; } void setRestorePoint(MachineBasicBlock *NewRestore) { Restore = NewRestore; } + uint64_t getUnsafeStackSize() const { return UnsafeStackSize; } + void setUnsafeStackSize(uint64_t Size) { UnsafeStackSize = Size; } + /// Return a set of physical registers that are pristine. /// /// Pristine registers hold a value that is useless to the current function, diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index c4767a51b094..fc1188186ac4 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -103,6 +103,22 @@ struct MachineFunctionInfo { static Ty *create(BumpPtrAllocator &Allocator, MachineFunction &MF) { return new (Allocator.Allocate()) Ty(MF); } + + template + static Ty *create(BumpPtrAllocator &Allocator, const Ty &MFI) { + return new (Allocator.Allocate()) Ty(MFI); + } + + /// Make a functionally equivalent copy of this MachineFunctionInfo in \p MF. + /// This requires remapping MachineBasicBlock references from the original + /// parent to values in the new function. Targets may assume that virtual + /// register and frame index values are preserved in the new function. + virtual MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const { + return nullptr; + } }; /// Properties which a MachineFunction may have at a given point in time. @@ -277,12 +293,6 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { // numbered and this vector keeps track of the mapping from ID's to MBB's. std::vector MBBNumbering; - // Unary encoding of basic block symbols is used to reduce size of ".strtab". - // Basic block number 'i' gets a prefix of length 'i'. The ith character also - // denotes the type of basic block number 'i'. Return blocks are marked with - // 'r', landing pads with 'l' and regular blocks with 'a'. - std::vector BBSectionsSymbolPrefix; - // Pool-allocate MachineFunction-lifetime and IR objects. BumpPtrAllocator Allocator; @@ -537,8 +547,13 @@ public: /// the copied value; or for parameters, creates a DBG_PHI on entry. /// May insert instructions into the entry block! /// \p MI The copy-like instruction to salvage. + /// \p DbgPHICache A container to cache already-solved COPYs. /// \returns An instruction/operand pair identifying the defining value. - DebugInstrOperandPair salvageCopySSA(MachineInstr &MI); + DebugInstrOperandPair + salvageCopySSA(MachineInstr &MI, + DenseMap &DbgPHICache); + + DebugInstrOperandPair salvageCopySSAImpl(MachineInstr &MI); /// Finalise any partially emitted debug instructions. These are DBG_INSTR_REF /// instructions where we only knew the vreg of the value they use, not the @@ -747,6 +762,21 @@ public: return const_cast(this)->getInfo(); } + template Ty *cloneInfo(const Ty &Old) { + assert(!MFInfo); + MFInfo = Ty::template create(Allocator, Old); + return static_cast(MFInfo); + } + + MachineFunctionInfo *cloneInfoFrom( + const MachineFunction &OrigMF, + const DenseMap &Src2DstMBB) { + assert(!MFInfo && "new function already has MachineFunctionInfo"); + if (!OrigMF.MFInfo) + return nullptr; + return OrigMF.MFInfo->clone(Allocator, *this, Src2DstMBB); + } + /// Returns the denormal handling type for the default rounding mode of the /// function. DenormalMode getDenormalMode(const fltSemantics &FPType) const; @@ -1101,12 +1131,6 @@ public: /// Add a cleanup action for a landing pad. void addCleanup(MachineBasicBlock *LandingPad); - void addSEHCatchHandler(MachineBasicBlock *LandingPad, const Function *Filter, - const BlockAddress *RecoverBA); - - void addSEHCleanupHandler(MachineBasicBlock *LandingPad, - const Function *Cleanup); - /// Return the type id for the specified typeinfo. This is function wide. unsigned getTypeIDFor(const GlobalValue *TI); @@ -1116,6 +1140,11 @@ public: /// Map the landing pad's EH symbol to the call site indexes. void setCallSiteLandingPad(MCSymbol *Sym, ArrayRef Sites); + /// Return if there is any wasm exception handling. + bool hasAnyWasmLandingPadIndex() const { + return !WasmLPadToIndexMap.empty(); + } + /// Map the landing pad to its index. Used for Wasm exception handling. void setWasmLandingPadIndex(const MachineBasicBlock *LPad, unsigned Index) { WasmLPadToIndexMap[LPad] = Index; @@ -1132,6 +1161,10 @@ public: return WasmLPadToIndexMap.lookup(LPad); } + bool hasAnyCallSiteLandingPad() const { + return !LPadToCallSiteMap.empty(); + } + /// Get the call site indexes for a landing pad EH symbol. SmallVectorImpl &getCallSiteLandingPad(MCSymbol *Sym) { assert(hasCallSiteLandingPad(Sym) && @@ -1144,6 +1177,10 @@ public: return !LPadToCallSiteMap[Sym].empty(); } + bool hasAnyCallSiteLabel() const { + return !CallSiteMap.empty(); + } + /// Map the begin label for a call site. void setCallSiteBeginLabel(MCSymbol *BeginLabel, unsigned Site) { CallSiteMap[BeginLabel] = Site; @@ -1220,10 +1257,6 @@ public: void copyCallSiteInfo(const MachineInstr *Old, const MachineInstr *New); - const std::vector &getBBSectionsSymbolPrefix() const { - return BBSectionsSymbolPrefix; - } - /// Move the call site info from \p Old to \New call site info. This function /// is used when we are replacing one call instruction with another one to /// the same callee. diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 2893e138a95c..acc4c9a24c01 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -26,7 +26,6 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/InlineAsm.h" -#include "llvm/IR/PseudoProbe.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ArrayRecycler.h" @@ -38,6 +37,9 @@ namespace llvm { +class DILabel; +class Instruction; +class MDNode; class AAResults; template class ArrayRef; class DIExpression; @@ -96,7 +98,7 @@ public: FmContract = 1 << 8, // Instruction supports Fast math // contraction operations like fma. FmAfn = 1 << 9, // Instruction may map to Fast math - // instrinsic approximation. + // intrinsic approximation. FmReassoc = 1 << 10, // Instruction supports Fast math // reassociation of operand order. NoUWrap = 1 << 11, // Instruction supports binary operator @@ -586,8 +588,7 @@ public: /// Return true if operand \p OpIdx is a subregister index. bool isOperandSubregIdx(unsigned OpIdx) const { - assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate && - "Expected MO_Immediate operand type."); + assert(getOperand(OpIdx).isImm() && "Expected MO_Immediate operand type."); if (isExtractSubreg() && OpIdx == 2) return true; if (isInsertSubreg() && OpIdx == 3) @@ -810,6 +811,12 @@ public: return hasProperty(MCID::Pseudo, Type); } + /// Return true if this instruction doesn't produce any output in the form of + /// executable instructions. + bool isMetaInstruction(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::Meta, Type); + } + bool isReturn(QueryType Type = AnyInBundle) const { return hasProperty(MCID::Return, Type); } @@ -1306,30 +1313,6 @@ public: getOperand(0).getSubReg() == getOperand(1).getSubReg(); } - /// Return true if this instruction doesn't produce any output in the form of - /// executable instructions. - bool isMetaInstruction() const { - switch (getOpcode()) { - default: - return false; - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::CFI_INSTRUCTION: - case TargetOpcode::EH_LABEL: - case TargetOpcode::GC_LABEL: - case TargetOpcode::DBG_VALUE: - case TargetOpcode::DBG_VALUE_LIST: - case TargetOpcode::DBG_INSTR_REF: - case TargetOpcode::DBG_PHI: - case TargetOpcode::DBG_LABEL: - case TargetOpcode::LIFETIME_START: - case TargetOpcode::LIFETIME_END: - case TargetOpcode::PSEUDO_PROBE: - case TargetOpcode::ARITH_FENCE: - return true; - } - } - /// Return true if this is a transient instruction that is either very likely /// to be eliminated during register allocation (such as copy-like /// instructions), or if this instruction doesn't have an execution-time cost. @@ -1744,7 +1727,7 @@ public: /// Erase an operand from an instruction, leaving it with one /// fewer operand than it started with. - void RemoveOperand(unsigned OpNo); + void removeOperand(unsigned OpNo); /// Clear this MachineInstr's memory reference descriptor list. This resets /// the memrefs to their most conservative state. This should be used only @@ -1863,12 +1846,12 @@ private: /// Unlink all of the register operands in this instruction from their /// respective use lists. This requires that the operands already be on their /// use lists. - void RemoveRegOperandsFromUseLists(MachineRegisterInfo&); + void removeRegOperandsFromUseLists(MachineRegisterInfo&); /// Add all of the register operands in this instruction from their /// respective use lists. This requires that the operands not be on their /// use lists yet. - void AddRegOperandsToUseLists(MachineRegisterInfo&); + void addRegOperandsToUseLists(MachineRegisterInfo&); /// Slow path for hasProperty when we're dealing with a bundle. bool hasPropertyInBundle(uint64_t Mask, QueryType Type) const; diff --git a/llvm/include/llvm/CodeGen/MachineLoopInfo.h b/llvm/include/llvm/CodeGen/MachineLoopInfo.h index c90f07096d02..daf0f18a7518 100644 --- a/llvm/include/llvm/CodeGen/MachineLoopInfo.h +++ b/llvm/include/llvm/CodeGen/MachineLoopInfo.h @@ -33,7 +33,6 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/Pass.h" namespace llvm { diff --git a/llvm/include/llvm/CodeGen/MachineMemOperand.h b/llvm/include/llvm/CodeGen/MachineMemOperand.h index 00080b171974..41574d8d556a 100644 --- a/llvm/include/llvm/CodeGen/MachineMemOperand.h +++ b/llvm/include/llvm/CodeGen/MachineMemOperand.h @@ -31,14 +31,13 @@ class MDNode; class raw_ostream; class MachineFunction; class ModuleSlotTracker; +class TargetInstrInfo; /// This class contains a discriminated union of information about pointers in /// memory operands, relating them back to LLVM IR or to virtual locations (such /// as frame indices) that are exposed during codegen. struct MachinePointerInfo { /// This is the IR pointer value for the access, or it is null if unknown. - /// If this is null, then the access is to a pointer in the default address - /// space. PointerUnion V; /// Offset - This is an offset from the base Value*. diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/llvm/include/llvm/CodeGen/MachineModuleInfo.h index c07606e89374..cdd0073749d3 100644 --- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h +++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h @@ -30,12 +30,10 @@ #ifndef LLVM_CODEGEN_MACHINEMODULEINFO_H #define LLVM_CODEGEN_MACHINEMODULEINFO_H -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/IR/PassManager.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" #include #include @@ -46,9 +44,9 @@ namespace llvm { class BasicBlock; class Function; class LLVMTargetMachine; -class MMIAddrLabelMap; class MachineFunction; class Module; +class MCSymbol; //===----------------------------------------------------------------------===// /// This class can be derived from and used by targets to hold private @@ -106,10 +104,6 @@ class MachineModuleInfo { /// \} - /// This map keeps track of which symbol is being used for the specified - /// basic block's address of label. - MMIAddrLabelMap *AddrLabelSymbols; - // TODO: Ideally, what we'd like is to have a switch that allows emitting // synchronous (precise at call-sites only) CFA into .eh_frame. However, // even under this switch, we'd like .debug_frame to be precise when using @@ -123,22 +117,6 @@ class MachineModuleInfo { /// point. This is used to emit an undefined reference to _fltused. bool UsesMSVCFloatingPoint; - /// True if the module calls the __morestack function indirectly, as is - /// required under the large code model on x86. This is used to emit - /// a definition of a symbol, __morestack_addr, containing the address. See - /// comments in lib/Target/X86/X86FrameLowering.cpp for more details. - bool UsesMorestackAddr; - - /// True if the module contains split-stack functions. This is used to - /// emit .note.GNU-split-stack section as required by the linker for - /// special handling split-stack function calling no-split-stack function. - bool HasSplitStack; - - /// True if the module contains no-split-stack functions. This is used to - /// emit .note.GNU-no-split-stack section when it also contains split-stack - /// functions. - bool HasNosplitStack; - /// Maps IR Functions to their corresponding MachineFunctions. DenseMap> MachineFunctions; /// Next unique number available for a MachineFunction. @@ -184,6 +162,9 @@ public: /// Machine Function map. void deleteMachineFunctionFor(Function &F); + /// Add an externally created MachineFunction \p MF for \p F. + void insertFunction(const Function &F, std::unique_ptr &&MF); + /// Keep track of various per-module pieces of information for backends /// that would like to do so. template @@ -200,55 +181,11 @@ public: /// Returns true if valid debug info is present. bool hasDebugInfo() const { return DbgInfoAvailable; } - void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; } bool usesMSVCFloatingPoint() const { return UsesMSVCFloatingPoint; } void setUsesMSVCFloatingPoint(bool b) { UsesMSVCFloatingPoint = b; } - bool usesMorestackAddr() const { - return UsesMorestackAddr; - } - - void setUsesMorestackAddr(bool b) { - UsesMorestackAddr = b; - } - - bool hasSplitStack() const { - return HasSplitStack; - } - - void setHasSplitStack(bool b) { - HasSplitStack = b; - } - - bool hasNosplitStack() const { - return HasNosplitStack; - } - - void setHasNosplitStack(bool b) { - HasNosplitStack = b; - } - - /// Return the symbol to be used for the specified basic block when its - /// address is taken. This cannot be its normal LBB label because the block - /// may be accessed outside its containing function. - MCSymbol *getAddrLabelSymbol(const BasicBlock *BB) { - return getAddrLabelSymbolToEmit(BB).front(); - } - - /// Return the symbol to be used for the specified basic block when its - /// address is taken. If other blocks were RAUW'd to this one, we may have - /// to emit them as well, return the whole set. - ArrayRef getAddrLabelSymbolToEmit(const BasicBlock *BB); - - /// If the specified function has had any references to address-taken blocks - /// generated, but the block got deleted, return the symbol now so we can - /// emit it. This prevents emitting a reference to a symbol that has no - /// definition. - void takeDeletedSymbolsForFunction(const Function *F, - std::vector &Result); - /// \name Exception Handling /// \{ diff --git a/llvm/include/llvm/CodeGen/MachineOperand.h b/llvm/include/llvm/CodeGen/MachineOperand.h index eded28183ea2..c88e72cdc1d9 100644 --- a/llvm/include/llvm/CodeGen/MachineOperand.h +++ b/llvm/include/llvm/CodeGen/MachineOperand.h @@ -13,15 +13,14 @@ #ifndef LLVM_CODEGEN_MACHINEOPERAND_H #define LLVM_CODEGEN_MACHINEOPERAND_H -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/CodeGen/Register.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/Support/DataTypes.h" -#include "llvm/Support/LowLevelTypeImpl.h" #include namespace llvm { +class LLT; class BlockAddress; class Constant; class ConstantFP; @@ -460,6 +459,16 @@ public: return !isUndef() && !isInternalRead() && (isUse() || getSubReg()); } + /// Return true if this operand can validly be appended to an arbitrary + /// operand list. i.e. this behaves like an implicit operand. + bool isValidExcessOperand() const { + if ((isReg() && isImplicit()) || isRegMask()) + return true; + + // Debug operands + return isMetadata() || isMCSymbol(); + } + //===--------------------------------------------------------------------===// // Mutators for Register Operands //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h index 285b858c96cb..cb0998984dfb 100644 --- a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h +++ b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h @@ -15,8 +15,9 @@ #ifndef LLVM_CODEGEN_MACHINEOPTIMIZATIONREMARKEMITTER_H #define LLVM_CODEGEN_MACHINEOPTIMIZATIONREMARKEMITTER_H -#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" namespace llvm { class MachineBasicBlock; diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h index 08b76295dbf2..f968089e0de0 100644 --- a/llvm/include/llvm/CodeGen/MachineOutliner.h +++ b/llvm/include/llvm/CodeGen/MachineOutliner.h @@ -15,11 +15,10 @@ #ifndef LLVM_CODEGEN_MACHINEOUTLINER_H #define LLVM_CODEGEN_MACHINEOUTLINER_H -#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" +#include namespace llvm { namespace outliner { @@ -56,6 +55,55 @@ private: /// target. unsigned CallOverhead = 0; + /// Liveness information for this Candidate. Tracks from the end of the + /// block containing this Candidate to the beginning of its sequence. + /// + /// Optional. Can be used to fine-tune the cost model, or fine-tune legality + /// decisions. + LiveRegUnits FromEndOfBlockToStartOfSeq; + + /// Liveness information restricted to this Candidate's instruction sequence. + /// + /// Optional. Can be used to fine-tune the cost model, or fine-tune legality + /// decisions. + LiveRegUnits InSeq; + + /// True if FromEndOfBlockToStartOfSeq has been initialized. + bool FromEndOfBlockToStartOfSeqWasSet = false; + + /// True if InSeq has been initialized. + bool InSeqWasSet = false; + + /// Populate FromEndOfBlockToStartOfSeq with liveness information. + void initFromEndOfBlockToStartOfSeq(const TargetRegisterInfo &TRI) { + assert(MBB->getParent()->getRegInfo().tracksLiveness() && + "Candidate's Machine Function must track liveness"); + // Only initialize once. + if (FromEndOfBlockToStartOfSeqWasSet) + return; + FromEndOfBlockToStartOfSeqWasSet = true; + FromEndOfBlockToStartOfSeq.init(TRI); + FromEndOfBlockToStartOfSeq.addLiveOuts(*MBB); + // Compute liveness from the end of the block up to the beginning of the + // outlining candidate. + for (auto &MI : make_range(MBB->rbegin(), + (MachineBasicBlock::reverse_iterator)front())) + FromEndOfBlockToStartOfSeq.stepBackward(MI); + } + + /// Populate InSeq with liveness information. + void initInSeq(const TargetRegisterInfo &TRI) { + assert(MBB->getParent()->getRegInfo().tracksLiveness() && + "Candidate's Machine Function must track liveness"); + // Only initialize once. + if (InSeqWasSet) + return; + InSeqWasSet = true; + InSeq.init(TRI); + for (auto &MI : make_range(front(), std::next(back()))) + InSeq.accumulate(MI); + } + public: /// The index of this \p Candidate's \p OutlinedFunction in the list of /// \p OutlinedFunctions. @@ -65,26 +113,9 @@ public: /// from this point. Defined by the target. unsigned CallConstructionID = 0; - /// Contains physical register liveness information for the MBB containing - /// this \p Candidate. - /// - /// This is optionally used by the target to calculate more fine-grained - /// cost model information. - LiveRegUnits LRU; - - /// Contains the accumulated register liveness information for the - /// instructions in this \p Candidate. - /// - /// This is optionally used by the target to determine which registers have - /// been used across the sequence. - LiveRegUnits UsedInSequence; - /// Target-specific flags for this Candidate's MBB. unsigned Flags = 0x0; - /// True if initLRU has been called on this Candidate. - bool LRUWasSet = false; - /// Return the number of instructions in this Candidate. unsigned getLength() const { return Len; } @@ -109,6 +140,50 @@ public: MachineFunction *getMF() const { return MBB->getParent(); } MachineBasicBlock *getMBB() const { return MBB; } + /// \returns True if \p Reg is available from the end of the block to the + /// beginning of the sequence. + /// + /// This query considers the following range: + /// + /// in_seq_1 + /// in_seq_2 + /// ... + /// in_seq_n + /// not_in_seq_1 + /// ... + /// + bool isAvailableAcrossAndOutOfSeq(Register Reg, + const TargetRegisterInfo &TRI) { + if (!FromEndOfBlockToStartOfSeqWasSet) + initFromEndOfBlockToStartOfSeq(TRI); + return FromEndOfBlockToStartOfSeq.available(Reg); + } + + /// \returns True if `isAvailableAcrossAndOutOfSeq` fails for any register + /// in \p Regs. + bool isAnyUnavailableAcrossOrOutOfSeq(std::initializer_list Regs, + const TargetRegisterInfo &TRI) { + if (!FromEndOfBlockToStartOfSeqWasSet) + initFromEndOfBlockToStartOfSeq(TRI); + return any_of(Regs, [&](Register Reg) { + return !FromEndOfBlockToStartOfSeq.available(Reg); + }); + } + + /// \returns True if \p Reg is available within the sequence itself. + /// + /// This query considers the following range: + /// + /// in_seq_1 + /// in_seq_2 + /// ... + /// in_seq_n + bool isAvailableInsideSeq(Register Reg, const TargetRegisterInfo &TRI) { + if (!InSeqWasSet) + initInSeq(TRI); + return InSeq.available(Reg); + } + /// The number of instructions that would be saved by outlining every /// candidate of this type. /// @@ -132,31 +207,6 @@ public: return getStartIdx() > RHS.getStartIdx(); } - /// Compute the registers that are live across this Candidate. - /// Used by targets that need this information for cost model calculation. - /// If a target does not need this information, then this should not be - /// called. - void initLRU(const TargetRegisterInfo &TRI) { - assert(MBB->getParent()->getRegInfo().tracksLiveness() && - "Candidate's Machine Function must track liveness"); - // Only initialize once. - if (LRUWasSet) - return; - LRUWasSet = true; - LRU.init(TRI); - LRU.addLiveOuts(*MBB); - - // Compute liveness from the end of the block up to the beginning of the - // outlining candidate. - std::for_each(MBB->rbegin(), (MachineBasicBlock::reverse_iterator)front(), - [this](MachineInstr &MI) { LRU.stepBackward(MI); }); - - // Walk over the sequence itself and figure out which registers were used - // in the sequence. - UsedInSequence.init(TRI); - std::for_each(front(), std::next(back()), - [this](MachineInstr &MI) { UsedInSequence.accumulate(MI); }); - } }; /// The information necessary to create an outlined function for some diff --git a/llvm/include/llvm/CodeGen/MachinePassManager.h b/llvm/include/llvm/CodeGen/MachinePassManager.h index 75b8a89c812e..6089339c7f5a 100644 --- a/llvm/include/llvm/CodeGen/MachinePassManager.h +++ b/llvm/include/llvm/CodeGen/MachinePassManager.h @@ -25,13 +25,15 @@ #include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/Error.h" -#include "llvm/Support/type_traits.h" + +#include namespace llvm { class Module; +class Function; +class MachineFunction; extern template class AnalysisManager; diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def index e6763899a083..7748055f5d35 100644 --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -47,6 +47,7 @@ FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ()) FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ()) FUNCTION_PASS("lowerinvoke", LowerInvokePass, ()) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass, ()) +FUNCTION_PASS("tlshoist", TLSVariableHoistPass, ()) FUNCTION_PASS("verify", VerifierPass, ()) #undef FUNCTION_PASS @@ -119,6 +120,7 @@ DUMMY_FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, ()) DUMMY_FUNCTION_PASS("cfguard-dispatch", CFGuardDispatchPass, ()) DUMMY_FUNCTION_PASS("cfguard-check", CFGuardCheckPass, ()) DUMMY_FUNCTION_PASS("gc-info-printer", GCInfoPrinterPass, ()) +DUMMY_FUNCTION_PASS("select-optimize", SelectOptimizePass, ()) #undef DUMMY_FUNCTION_PASS #ifndef DUMMY_MODULE_PASS @@ -197,6 +199,5 @@ DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass, ()) DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass, ()) DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass, ()) DUMMY_MACHINE_FUNCTION_PASS("machineverifier", MachineVerifierPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machine-cycles", MachineCycleInfoWrapperPass, ()) DUMMY_MACHINE_FUNCTION_PASS("print-machine-cycles", MachineCycleInfoPrinterPass, ()) #undef DUMMY_MACHINE_FUNCTION_PASS diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h index 7e7fa57d80da..4559f7a9bde7 100644 --- a/llvm/include/llvm/CodeGen/MachinePipeliner.h +++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h @@ -40,13 +40,17 @@ #ifndef LLVM_CODEGEN_MACHINEPIPELINER_H #define LLVM_CODEGEN_MACHINEPIPELINER_H +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" +#include + namespace llvm { class AAResults; @@ -80,6 +84,8 @@ public: SmallVector BrCond; MachineInstr *LoopInductionVar = nullptr; MachineInstr *LoopCompare = nullptr; + std::unique_ptr LoopPipelinerInfo = + nullptr; }; LoopInfo LI; @@ -115,6 +121,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { LiveIntervals &LIS; const RegisterClassInfo &RegClassInfo; unsigned II_setByPragma = 0; + TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr; /// A toplogical ordering of the SUnits, which is needed for changing /// dependences and iterating over the SUnits. @@ -192,9 +199,11 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { public: SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, - const RegisterClassInfo &rci, unsigned II) + const RegisterClassInfo &rci, unsigned II, + TargetInstrInfo::PipelinerLoopInfo *PLI) : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis), - RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) { + RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI), + Topo(SUnits, &ExitSU) { P.MF->getSubtarget().getSMSMutations(Mutations); if (SwpEnableCopyToPhi) Mutations.push_back(std::make_unique()); @@ -585,6 +594,13 @@ public: return ScheduledInstrs[cycle]; } + SmallSet + computeUnpipelineableNodes(SwingSchedulerDAG *SSD, + TargetInstrInfo::PipelinerLoopInfo *PLI); + + bool + normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, + TargetInstrInfo::PipelinerLoopInfo *PLI); bool isValidSchedule(SwingSchedulerDAG *SSD); void finalizeSchedule(SwingSchedulerDAG *SSD); void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index 94ae6fe02e9c..b2c5f12106af 100644 --- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -15,18 +15,16 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/RegisterBank.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/LaneBitmask.h" @@ -229,6 +227,16 @@ public: /// Returns true if the updated CSR list was initialized and false otherwise. bool isUpdatedCSRsInitialized() const { return IsUpdatedCSRsInitialized; } + /// Returns true if a register can be used as an argument to a function. + bool isArgumentRegister(const MachineFunction &MF, MCRegister Reg) const; + + /// Returns true if a register is a fixed register. + bool isFixedRegister(const MachineFunction &MF, MCRegister Reg) const; + + /// Returns true if a register is a general purpose register. + bool isGeneralPurposeRegister(const MachineFunction &MF, + MCRegister Reg) const; + /// Disables the register from the list of CSRs. /// I.e. the register will not appear as part of the CSR mask. /// \see UpdatedCalleeSavedRegs. @@ -825,23 +833,12 @@ public: /// to refer to the designated register. void updateDbgUsersToReg(MCRegister OldReg, MCRegister NewReg, ArrayRef Users) const { - SmallSet OldRegUnits; - for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo()); RUI.isValid(); - ++RUI) - OldRegUnits.insert(*RUI); - // If this operand is a register, check whether it overlaps with OldReg. // If it does, replace with NewReg. - auto UpdateOp = [this, &NewReg, &OldReg, &OldRegUnits](MachineOperand &Op) { - if (Op.isReg()) { - for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo()); - RUI.isValid(); ++RUI) { - if (OldRegUnits.contains(*RUI)) { - Op.setReg(NewReg); - break; - } - } - } + auto UpdateOp = [this, &NewReg, &OldReg](MachineOperand &Op) { + if (Op.isReg() && + getTargetRegisterInfo()->regsOverlap(Op.getReg(), OldReg)) + Op.setReg(NewReg); }; // Iterate through (possibly several) operands to DBG_VALUEs and update diff --git a/llvm/include/llvm/CodeGen/MachineSSAContext.h b/llvm/include/llvm/CodeGen/MachineSSAContext.h index 6dbf321bdeaa..f59d7cf8a522 100644 --- a/llvm/include/llvm/CodeGen/MachineSSAContext.h +++ b/llvm/include/llvm/CodeGen/MachineSSAContext.h @@ -15,21 +15,21 @@ #ifndef LLVM_CODEGEN_MACHINESSACONTEXT_H #define LLVM_CODEGEN_MACHINESSACONTEXT_H -#include "llvm/ADT/GenericSSAContext.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/Support/Printable.h" -#include - namespace llvm { +class MachineRegisterInfo; class MachineInstr; -class MachineBasicBlock; class MachineFunction; class Register; +template class GenericSSAContext; template class DominatorTreeBase; inline auto successors(MachineBasicBlock *BB) { return BB->successors(); } inline auto predecessors(MachineBasicBlock *BB) { return BB->predecessors(); } +inline unsigned succ_size(MachineBasicBlock *BB) { return BB->succ_size(); } +inline unsigned pred_size(MachineBasicBlock *BB) { return BB->pred_size(); } template <> class GenericSSAContext { const MachineRegisterInfo *RegInfo = nullptr; diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index 267c4b595eec..0554eb1ab77e 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -287,7 +287,7 @@ protected: const SUnit *NextClusterPred = nullptr; const SUnit *NextClusterSucc = nullptr; -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS /// The number of instructions scheduled so far. Used to cut off the /// scheduler at the point determined by misched-cutoff. unsigned NumInstrsScheduled = 0; @@ -679,7 +679,7 @@ private: // For each PIdx, stores the resource group IDs of its subunits SmallVector ResourceGroupSubUnitMasks; -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // Remember the greatest possible stall as an upper bound on the number of // times we should retry the pending queue because of a hazard. unsigned MaxObservedStall; diff --git a/llvm/include/llvm/CodeGen/MachineStableHash.h b/llvm/include/llvm/CodeGen/MachineStableHash.h index 8423b2da1c78..43571b7b8afd 100644 --- a/llvm/include/llvm/CodeGen/MachineStableHash.h +++ b/llvm/include/llvm/CodeGen/MachineStableHash.h @@ -17,6 +17,8 @@ #include "llvm/CodeGen/StableHashing.h" namespace llvm { +class MachineBasicBlock; +class MachineFunction; class MachineInstr; class MachineOperand; @@ -24,6 +26,8 @@ stable_hash stableHashValue(const MachineOperand &MO); stable_hash stableHashValue(const MachineInstr &MI, bool HashVRegs = false, bool HashConstantPoolIndices = false, bool HashMemOperands = false); +stable_hash stableHashValue(const MachineBasicBlock &MBB); +stable_hash stableHashValue(const MachineFunction &MF); } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/ModuloSchedule.h b/llvm/include/llvm/CodeGen/ModuloSchedule.h index e8dbf49994bb..c515101e80fd 100644 --- a/llvm/include/llvm/CodeGen/ModuloSchedule.h +++ b/llvm/include/llvm/CodeGen/ModuloSchedule.h @@ -61,7 +61,6 @@ #define LLVM_CODEGEN_MODULOSCHEDULE_H #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineLoopUtils.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -70,6 +69,8 @@ namespace llvm { class MachineBasicBlock; +class MachineLoop; +class MachineRegisterInfo; class MachineInstr; class LiveIntervals; @@ -190,8 +191,8 @@ private: void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB, ValueMapTy *VRMap, MBBVectorTy &PrologBBs); void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB, - ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, - MBBVectorTy &PrologBBs); + MachineBasicBlock *OrigBB, ValueMapTy *VRMap, + MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs); void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, diff --git a/llvm/include/llvm/CodeGen/PBQP/ReductionRules.h b/llvm/include/llvm/CodeGen/PBQP/ReductionRules.h index 51822d082bad..043b6b120632 100644 --- a/llvm/include/llvm/CodeGen/PBQP/ReductionRules.h +++ b/llvm/include/llvm/CodeGen/PBQP/ReductionRules.h @@ -190,7 +190,7 @@ namespace PBQP { RawVector v = G.getNodeCosts(NId); -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // Although a conservatively allocatable node can be allocated to a register, // spilling it may provide a lower cost solution. Assert here that spilling // is done by choice, not because there were no register available. diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 616ab1034133..6e37d42f0d29 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -51,10 +51,8 @@ namespace llvm { FunctionPass *createUnreachableBlockEliminationPass(); /// createBasicBlockSections Pass - This pass assigns sections to machine - /// basic blocks and is enabled with -fbasic-block-sections. Buf is a memory - /// buffer that contains the list of functions and basic block ids to - /// selectively enable basic block sections. - MachineFunctionPass *createBasicBlockSectionsPass(const MemoryBuffer *Buf); + /// basic blocks and is enabled with -fbasic-block-sections. + MachineFunctionPass *createBasicBlockSectionsPass(); /// createMachineFunctionSplitterPass - This pass splits machine functions /// using profile information. @@ -331,6 +329,8 @@ namespace llvm { /// machine instructions. extern char &MachineCopyPropagationID; + MachineFunctionPass *createMachineCopyPropagationPass(bool UseCopyInstr); + /// PeepholeOptimizer - This pass performs peephole optimizations - /// like extension and comparison eliminations. extern char &PeepholeOptimizerID; @@ -494,6 +494,9 @@ namespace llvm { // This pass expands indirectbr instructions. FunctionPass *createIndirectBrExpandPass(); + /// Creates CFI Fixup pass. \see CFIFixup.cpp + FunctionPass *createCFIFixup(); + /// Creates CFI Instruction Inserter pass. \see CFIInstrInserter.cpp FunctionPass *createCFIInstrInserter(); @@ -554,6 +557,12 @@ namespace llvm { /// When learning an eviction policy, extract score(reward) information, /// otherwise this does nothing FunctionPass *createRegAllocScoringPass(); + + /// JMC instrument pass. + ModulePass *createJMCInstrumenterPass(); + + /// This pass converts conditional moves to conditional jumps when profitable. + FunctionPass *createSelectOptimizePass(); } // End llvm namespace #endif diff --git a/llvm/include/llvm/CodeGen/PseudoSourceValue.h b/llvm/include/llvm/CodeGen/PseudoSourceValue.h index f1487017f205..07b7ba321566 100644 --- a/llvm/include/llvm/CodeGen/PseudoSourceValue.h +++ b/llvm/include/llvm/CodeGen/PseudoSourceValue.h @@ -25,7 +25,7 @@ class MachineMemOperand; class MIRFormatter; class PseudoSourceValue; class raw_ostream; -class TargetInstrInfo; +class TargetMachine; raw_ostream &operator<<(raw_ostream &OS, const PseudoSourceValue* PSV); @@ -59,7 +59,7 @@ private: virtual void printCustom(raw_ostream &O) const; public: - explicit PseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII); + explicit PseudoSourceValue(unsigned Kind, const TargetMachine &TM); virtual ~PseudoSourceValue(); @@ -95,8 +95,8 @@ class FixedStackPseudoSourceValue : public PseudoSourceValue { const int FI; public: - explicit FixedStackPseudoSourceValue(int FI, const TargetInstrInfo &TII) - : PseudoSourceValue(FixedStack, TII), FI(FI) {} + explicit FixedStackPseudoSourceValue(int FI, const TargetMachine &TM) + : PseudoSourceValue(FixedStack, TM), FI(FI) {} static bool classof(const PseudoSourceValue *V) { return V->kind() == FixedStack; @@ -115,7 +115,7 @@ public: class CallEntryPseudoSourceValue : public PseudoSourceValue { protected: - CallEntryPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII); + CallEntryPseudoSourceValue(unsigned Kind, const TargetMachine &TM); public: bool isConstant(const MachineFrameInfo *) const override; @@ -128,8 +128,7 @@ class GlobalValuePseudoSourceValue : public CallEntryPseudoSourceValue { const GlobalValue *GV; public: - GlobalValuePseudoSourceValue(const GlobalValue *GV, - const TargetInstrInfo &TII); + GlobalValuePseudoSourceValue(const GlobalValue *GV, const TargetMachine &TM); static bool classof(const PseudoSourceValue *V) { return V->kind() == GlobalValueCallEntry; @@ -143,7 +142,7 @@ class ExternalSymbolPseudoSourceValue : public CallEntryPseudoSourceValue { const char *ES; public: - ExternalSymbolPseudoSourceValue(const char *ES, const TargetInstrInfo &TII); + ExternalSymbolPseudoSourceValue(const char *ES, const TargetMachine &TM); static bool classof(const PseudoSourceValue *V) { return V->kind() == ExternalSymbolCallEntry; @@ -154,7 +153,7 @@ public: /// Manages creation of pseudo source values. class PseudoSourceValueManager { - const TargetInstrInfo &TII; + const TargetMachine &TM; const PseudoSourceValue StackPSV, GOTPSV, JumpTablePSV, ConstantPoolPSV; std::map> FSValues; StringMap> @@ -164,7 +163,7 @@ class PseudoSourceValueManager { GlobalCallEntries; public: - PseudoSourceValueManager(const TargetInstrInfo &TII); + PseudoSourceValueManager(const TargetMachine &TM); /// Return a pseudo source value referencing the area below the stack frame of /// a function, e.g., the argument space. diff --git a/llvm/include/llvm/CodeGen/RDFGraph.h b/llvm/include/llvm/CodeGen/RDFGraph.h index e0205d7c92c8..a323ee9dc396 100644 --- a/llvm/include/llvm/CodeGen/RDFGraph.h +++ b/llvm/include/llvm/CodeGen/RDFGraph.h @@ -749,7 +749,6 @@ namespace rdf { RegisterRef makeRegRef(unsigned Reg, unsigned Sub) const; RegisterRef makeRegRef(const MachineOperand &Op) const; - RegisterRef restrictRef(RegisterRef AR, RegisterRef BR) const; NodeAddr getNextRelated(NodeAddr IA, NodeAddr RA) const; diff --git a/llvm/include/llvm/CodeGen/RegAllocPBQP.h b/llvm/include/llvm/CodeGen/RegAllocPBQP.h index 1ed55082e32c..1ea8840947bc 100644 --- a/llvm/include/llvm/CodeGen/RegAllocPBQP.h +++ b/llvm/include/llvm/CodeGen/RegAllocPBQP.h @@ -183,11 +183,12 @@ public: NodeMetadata() = default; NodeMetadata(const NodeMetadata &Other) - : RS(Other.RS), NumOpts(Other.NumOpts), DeniedOpts(Other.DeniedOpts), - OptUnsafeEdges(new unsigned[NumOpts]), VReg(Other.VReg), - AllowedRegs(Other.AllowedRegs) -#ifndef NDEBUG - , everConservativelyAllocatable(Other.everConservativelyAllocatable) + : RS(Other.RS), NumOpts(Other.NumOpts), DeniedOpts(Other.DeniedOpts), + OptUnsafeEdges(new unsigned[NumOpts]), VReg(Other.VReg), + AllowedRegs(Other.AllowedRegs) +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + , + everConservativelyAllocatable(Other.everConservativelyAllocatable) #endif { if (NumOpts > 0) { @@ -217,7 +218,7 @@ public: assert(RS >= this->RS && "A node's reduction state can not be downgraded"); this->RS = RS; -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // Remember this state to assert later that a non-infinite register // option was available. if (RS == ConservativelyAllocatable) @@ -247,7 +248,7 @@ public: &OptUnsafeEdges[NumOpts]); } -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS bool wasConservativelyAllocatable() const { return everConservativelyAllocatable; } @@ -261,7 +262,7 @@ private: Register VReg; GraphMetadata::AllowedRegVecRef AllowedRegs; -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS bool everConservativelyAllocatable = false; #endif }; diff --git a/llvm/include/llvm/CodeGen/Register.h b/llvm/include/llvm/CodeGen/Register.h index a683223b5a4a..9dc3e98fe837 100644 --- a/llvm/include/llvm/CodeGen/Register.h +++ b/llvm/include/llvm/CodeGen/Register.h @@ -69,7 +69,7 @@ public: /// Return true if the specified register number is in /// the virtual register namespace. static bool isVirtualRegister(unsigned Reg) { - return Reg & MCRegister::VirtualRegFlag && !isStackSlot(Reg); + return Reg & MCRegister::VirtualRegFlag; } /// Convert a virtual register number to a 0-based index. diff --git a/llvm/include/llvm/CodeGen/RegisterBank.h b/llvm/include/llvm/CodeGen/RegisterBank.h new file mode 100644 index 000000000000..66885f113e8e --- /dev/null +++ b/llvm/include/llvm/CodeGen/RegisterBank.h @@ -0,0 +1,98 @@ +//==-- llvm/CodeGen/RegisterBank.h - Register Bank ---------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file declares the API of register banks. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REGISTERBANK_H +#define LLVM_CODEGEN_REGISTERBANK_H + +#include "llvm/ADT/BitVector.h" + +namespace llvm { +// Forward declarations. +class RegisterBankInfo; +class raw_ostream; +class TargetRegisterClass; +class TargetRegisterInfo; + +/// This class implements the register bank concept. +/// Two instances of RegisterBank must have different ID. +/// This property is enforced by the RegisterBankInfo class. +class RegisterBank { +private: + unsigned ID; + const char *Name; + unsigned Size; + BitVector ContainedRegClasses; + + /// Sentinel value used to recognize register bank not properly + /// initialized yet. + static const unsigned InvalidID; + + /// Only the RegisterBankInfo can initialize RegisterBank properly. + friend RegisterBankInfo; + +public: + RegisterBank(unsigned ID, const char *Name, unsigned Size, + const uint32_t *CoveredClasses, unsigned NumRegClasses); + + /// Get the identifier of this register bank. + unsigned getID() const { return ID; } + + /// Get a user friendly name of this register bank. + /// Should be used only for debugging purposes. + const char *getName() const { return Name; } + + /// Get the maximal size in bits that fits in this register bank. + unsigned getSize() const { return Size; } + + /// Check whether this instance is ready to be used. + bool isValid() const; + + /// Check if this register bank is valid. In other words, + /// if it has been properly constructed. + /// + /// \note This method does not check anything when assertions are disabled. + /// + /// \return True is the check was successful. + bool verify(const TargetRegisterInfo &TRI) const; + + /// Check whether this register bank covers \p RC. + /// In other words, check if this register bank fully covers + /// the registers that \p RC contains. + /// \pre isValid() + bool covers(const TargetRegisterClass &RC) const; + + /// Check whether \p OtherRB is the same as this. + bool operator==(const RegisterBank &OtherRB) const; + bool operator!=(const RegisterBank &OtherRB) const { + return !this->operator==(OtherRB); + } + + /// Dump the register mask on dbgs() stream. + /// The dump is verbose. + void dump(const TargetRegisterInfo *TRI = nullptr) const; + + /// Print the register mask on OS. + /// If IsForDebug is false, then only the name of the register bank + /// is printed. Otherwise, all the fields are printing. + /// TRI is then used to print the name of the register classes that + /// this register bank covers. + void print(raw_ostream &OS, bool IsForDebug = false, + const TargetRegisterInfo *TRI = nullptr) const; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const RegisterBank &RegBank) { + RegBank.print(OS); + return OS; +} +} // End namespace llvm. + +#endif diff --git a/llvm/include/llvm/CodeGen/RegisterBankInfo.h b/llvm/include/llvm/CodeGen/RegisterBankInfo.h new file mode 100644 index 000000000000..bba4f1f025a0 --- /dev/null +++ b/llvm/include/llvm/CodeGen/RegisterBankInfo.h @@ -0,0 +1,775 @@ +//===- llvm/CodeGen/RegisterBankInfo.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file declares the API for the register bank info. +/// This API is responsible for handling the register banks. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REGISTERBANKINFO_H +#define LLVM_CODEGEN_REGISTERBANKINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include +#include +#include + +namespace llvm { + +class MachineInstr; +class MachineRegisterInfo; +class raw_ostream; +class RegisterBank; +class TargetInstrInfo; +class TargetRegisterClass; +class TargetRegisterInfo; + +/// Holds all the information related to register banks. +class RegisterBankInfo { +public: + /// Helper struct that represents how a value is partially mapped + /// into a register. + /// The StartIdx and Length represent what region of the orginal + /// value this partial mapping covers. + /// This can be represented as a Mask of contiguous bit starting + /// at StartIdx bit and spanning Length bits. + /// StartIdx is the number of bits from the less significant bits. + struct PartialMapping { + /// Number of bits at which this partial mapping starts in the + /// original value. The bits are counted from less significant + /// bits to most significant bits. + unsigned StartIdx; + + /// Length of this mapping in bits. This is how many bits this + /// partial mapping covers in the original value: + /// from StartIdx to StartIdx + Length -1. + unsigned Length; + + /// Register bank where the partial value lives. + const RegisterBank *RegBank; + + PartialMapping() = default; + + /// Provide a shortcut for quickly building PartialMapping. + PartialMapping(unsigned StartIdx, unsigned Length, + const RegisterBank &RegBank) + : StartIdx(StartIdx), Length(Length), RegBank(&RegBank) {} + + /// \return the index of in the original value of the most + /// significant bit that this partial mapping covers. + unsigned getHighBitIdx() const { return StartIdx + Length - 1; } + + /// Print this partial mapping on dbgs() stream. + void dump() const; + + /// Print this partial mapping on \p OS; + void print(raw_ostream &OS) const; + + /// Check that the Mask is compatible with the RegBank. + /// Indeed, if the RegBank cannot accomadate the "active bits" of the mask, + /// there is no way this mapping is valid. + /// + /// \note This method does not check anything when assertions are disabled. + /// + /// \return True is the check was successful. + bool verify() const; + }; + + /// Helper struct that represents how a value is mapped through + /// different register banks. + /// + /// \note: So far we do not have any users of the complex mappings + /// (mappings with more than one partial mapping), but when we do, + /// we would have needed to duplicate partial mappings. + /// The alternative could be to use an array of pointers of partial + /// mapping (i.e., PartialMapping **BreakDown) and duplicate the + /// pointers instead. + /// + /// E.g., + /// Let say we have a 32-bit add and a <2 x 32-bit> vadd. We + /// can expand the + /// <2 x 32-bit> add into 2 x 32-bit add. + /// + /// Currently the TableGen-like file would look like: + /// \code + /// PartialMapping[] = { + /// /*32-bit add*/ {0, 32, GPR}, // Scalar entry repeated for first + /// // vec elt. + /// /*2x32-bit add*/ {0, 32, GPR}, {32, 32, GPR}, + /// /*<2x32-bit> vadd*/ {0, 64, VPR} + /// }; // PartialMapping duplicated. + /// + /// ValueMapping[] { + /// /*plain 32-bit add*/ {&PartialMapping[0], 1}, + /// /*expanded vadd on 2xadd*/ {&PartialMapping[1], 2}, + /// /*plain <2x32-bit> vadd*/ {&PartialMapping[3], 1} + /// }; + /// \endcode + /// + /// With the array of pointer, we would have: + /// \code + /// PartialMapping[] = { + /// /*32-bit add lower */ { 0, 32, GPR}, + /// /*32-bit add upper */ {32, 32, GPR}, + /// /*<2x32-bit> vadd */ { 0, 64, VPR} + /// }; // No more duplication. + /// + /// BreakDowns[] = { + /// /*AddBreakDown*/ &PartialMapping[0], + /// /*2xAddBreakDown*/ &PartialMapping[0], &PartialMapping[1], + /// /*VAddBreakDown*/ &PartialMapping[2] + /// }; // Addresses of PartialMapping duplicated (smaller). + /// + /// ValueMapping[] { + /// /*plain 32-bit add*/ {&BreakDowns[0], 1}, + /// /*expanded vadd on 2xadd*/ {&BreakDowns[1], 2}, + /// /*plain <2x32-bit> vadd*/ {&BreakDowns[3], 1} + /// }; + /// \endcode + /// + /// Given that a PartialMapping is actually small, the code size + /// impact is actually a degradation. Moreover the compile time will + /// be hit by the additional indirection. + /// If PartialMapping gets bigger we may reconsider. + struct ValueMapping { + /// How the value is broken down between the different register banks. + const PartialMapping *BreakDown; + + /// Number of partial mapping to break down this value. + unsigned NumBreakDowns; + + /// The default constructor creates an invalid (isValid() == false) + /// instance. + ValueMapping() : ValueMapping(nullptr, 0) {} + + /// Initialize a ValueMapping with the given parameter. + /// \p BreakDown needs to have a life time at least as long + /// as this instance. + ValueMapping(const PartialMapping *BreakDown, unsigned NumBreakDowns) + : BreakDown(BreakDown), NumBreakDowns(NumBreakDowns) {} + + /// Iterators through the PartialMappings. + const PartialMapping *begin() const { return BreakDown; } + const PartialMapping *end() const { return BreakDown + NumBreakDowns; } + + /// \return true if all partial mappings are the same size and register + /// bank. + bool partsAllUniform() const; + + /// Check if this ValueMapping is valid. + bool isValid() const { return BreakDown && NumBreakDowns; } + + /// Verify that this mapping makes sense for a value of + /// \p MeaningfulBitWidth. + /// \note This method does not check anything when assertions are disabled. + /// + /// \return True is the check was successful. + bool verify(unsigned MeaningfulBitWidth) const; + + /// Print this on dbgs() stream. + void dump() const; + + /// Print this on \p OS; + void print(raw_ostream &OS) const; + }; + + /// Helper class that represents how the value of an instruction may be + /// mapped and what is the related cost of such mapping. + class InstructionMapping { + /// Identifier of the mapping. + /// This is used to communicate between the target and the optimizers + /// which mapping should be realized. + unsigned ID = InvalidMappingID; + + /// Cost of this mapping. + unsigned Cost = 0; + + /// Mapping of all the operands. + const ValueMapping *OperandsMapping = nullptr; + + /// Number of operands. + unsigned NumOperands = 0; + + const ValueMapping &getOperandMapping(unsigned i) { + assert(i < getNumOperands() && "Out of bound operand"); + return OperandsMapping[i]; + } + + public: + /// Constructor for the mapping of an instruction. + /// \p NumOperands must be equal to number of all the operands of + /// the related instruction. + /// The rationale is that it is more efficient for the optimizers + /// to be able to assume that the mapping of the ith operand is + /// at the index i. + InstructionMapping(unsigned ID, unsigned Cost, + const ValueMapping *OperandsMapping, + unsigned NumOperands) + : ID(ID), Cost(Cost), OperandsMapping(OperandsMapping), + NumOperands(NumOperands) {} + + /// Default constructor. + /// Use this constructor to express that the mapping is invalid. + InstructionMapping() = default; + + /// Get the cost. + unsigned getCost() const { return Cost; } + + /// Get the ID. + unsigned getID() const { return ID; } + + /// Get the number of operands. + unsigned getNumOperands() const { return NumOperands; } + + /// Get the value mapping of the ith operand. + /// \pre The mapping for the ith operand has been set. + /// \pre The ith operand is a register. + const ValueMapping &getOperandMapping(unsigned i) const { + const ValueMapping &ValMapping = + const_cast(this)->getOperandMapping(i); + return ValMapping; + } + + /// Set the mapping for all the operands. + /// In other words, OpdsMapping should hold at least getNumOperands + /// ValueMapping. + void setOperandsMapping(const ValueMapping *OpdsMapping) { + OperandsMapping = OpdsMapping; + } + + /// Check whether this object is valid. + /// This is a lightweight check for obvious wrong instance. + bool isValid() const { + return getID() != InvalidMappingID && OperandsMapping; + } + + /// Verifiy that this mapping makes sense for \p MI. + /// \pre \p MI must be connected to a MachineFunction. + /// + /// \note This method does not check anything when assertions are disabled. + /// + /// \return True is the check was successful. + bool verify(const MachineInstr &MI) const; + + /// Print this on dbgs() stream. + void dump() const; + + /// Print this on \p OS; + void print(raw_ostream &OS) const; + }; + + /// Convenient type to represent the alternatives for mapping an + /// instruction. + /// \todo When we move to TableGen this should be an array ref. + using InstructionMappings = SmallVector; + + /// Helper class used to get/create the virtual registers that will be used + /// to replace the MachineOperand when applying a mapping. + class OperandsMapper { + /// The OpIdx-th cell contains the index in NewVRegs where the VRegs of the + /// OpIdx-th operand starts. -1 means we do not have such mapping yet. + /// Note: We use a SmallVector to avoid heap allocation for most cases. + SmallVector OpToNewVRegIdx; + + /// Hold the registers that will be used to map MI with InstrMapping. + SmallVector NewVRegs; + + /// Current MachineRegisterInfo, used to create new virtual registers. + MachineRegisterInfo &MRI; + + /// Instruction being remapped. + MachineInstr &MI; + + /// New mapping of the instruction. + const InstructionMapping &InstrMapping; + + /// Constant value identifying that the index in OpToNewVRegIdx + /// for an operand has not been set yet. + static const int DontKnowIdx; + + /// Get the range in NewVRegs to store all the partial + /// values for the \p OpIdx-th operand. + /// + /// \return The iterator range for the space created. + // + /// \pre getMI().getOperand(OpIdx).isReg() + iterator_range::iterator> + getVRegsMem(unsigned OpIdx); + + /// Get the end iterator for a range starting at \p StartIdx and + /// spannig \p NumVal in NewVRegs. + /// \pre StartIdx + NumVal <= NewVRegs.size() + SmallVectorImpl::const_iterator + getNewVRegsEnd(unsigned StartIdx, unsigned NumVal) const; + SmallVectorImpl::iterator getNewVRegsEnd(unsigned StartIdx, + unsigned NumVal); + + public: + /// Create an OperandsMapper that will hold the information to apply \p + /// InstrMapping to \p MI. + /// \pre InstrMapping.verify(MI) + OperandsMapper(MachineInstr &MI, const InstructionMapping &InstrMapping, + MachineRegisterInfo &MRI); + + /// \name Getters. + /// @{ + /// The MachineInstr being remapped. + MachineInstr &getMI() const { return MI; } + + /// The final mapping of the instruction. + const InstructionMapping &getInstrMapping() const { return InstrMapping; } + + /// The MachineRegisterInfo we used to realize the mapping. + MachineRegisterInfo &getMRI() const { return MRI; } + /// @} + + /// Create as many new virtual registers as needed for the mapping of the \p + /// OpIdx-th operand. + /// The number of registers is determined by the number of breakdown for the + /// related operand in the instruction mapping. + /// The type of the new registers is a plain scalar of the right size. + /// The proper type is expected to be set when the mapping is applied to + /// the instruction(s) that realizes the mapping. + /// + /// \pre getMI().getOperand(OpIdx).isReg() + /// + /// \post All the partial mapping of the \p OpIdx-th operand have been + /// assigned a new virtual register. + void createVRegs(unsigned OpIdx); + + /// Set the virtual register of the \p PartialMapIdx-th partial mapping of + /// the OpIdx-th operand to \p NewVReg. + /// + /// \pre getMI().getOperand(OpIdx).isReg() + /// \pre getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() > + /// PartialMapIdx + /// \pre NewReg != 0 + /// + /// \post the \p PartialMapIdx-th register of the value mapping of the \p + /// OpIdx-th operand has been set. + void setVRegs(unsigned OpIdx, unsigned PartialMapIdx, Register NewVReg); + + /// Get all the virtual registers required to map the \p OpIdx-th operand of + /// the instruction. + /// + /// This return an empty range when createVRegs or setVRegs has not been + /// called. + /// The iterator may be invalidated by a call to setVRegs or createVRegs. + /// + /// When \p ForDebug is true, we will not check that the list of new virtual + /// registers does not contain uninitialized values. + /// + /// \pre getMI().getOperand(OpIdx).isReg() + /// \pre ForDebug || All partial mappings have been set a register + iterator_range::const_iterator> + getVRegs(unsigned OpIdx, bool ForDebug = false) const; + + /// Print this operands mapper on dbgs() stream. + void dump() const; + + /// Print this operands mapper on \p OS stream. + void print(raw_ostream &OS, bool ForDebug = false) const; + }; + +protected: + /// Hold the set of supported register banks. + RegisterBank **RegBanks; + + /// Total number of register banks. + unsigned NumRegBanks; + + /// Keep dynamically allocated PartialMapping in a separate map. + /// This shouldn't be needed when everything gets TableGen'ed. + mutable DenseMap> + MapOfPartialMappings; + + /// Keep dynamically allocated ValueMapping in a separate map. + /// This shouldn't be needed when everything gets TableGen'ed. + mutable DenseMap> + MapOfValueMappings; + + /// Keep dynamically allocated array of ValueMapping in a separate map. + /// This shouldn't be needed when everything gets TableGen'ed. + mutable DenseMap> + MapOfOperandsMappings; + + /// Keep dynamically allocated InstructionMapping in a separate map. + /// This shouldn't be needed when everything gets TableGen'ed. + mutable DenseMap> + MapOfInstructionMappings; + + /// Getting the minimal register class of a physreg is expensive. + /// Cache this information as we get it. + mutable DenseMap PhysRegMinimalRCs; + + /// Create a RegisterBankInfo that can accommodate up to \p NumRegBanks + /// RegisterBank instances. + RegisterBankInfo(RegisterBank **RegBanks, unsigned NumRegBanks); + + /// This constructor is meaningless. + /// It just provides a default constructor that can be used at link time + /// when GlobalISel is not built. + /// That way, targets can still inherit from this class without doing + /// crazy gymnastic to avoid link time failures. + /// \note That works because the constructor is inlined. + RegisterBankInfo() { + llvm_unreachable("This constructor should not be executed"); + } + + /// Get the register bank identified by \p ID. + RegisterBank &getRegBank(unsigned ID) { + assert(ID < getNumRegBanks() && "Accessing an unknown register bank"); + return *RegBanks[ID]; + } + + /// Get the MinimalPhysRegClass for Reg. + /// \pre Reg is a physical register. + const TargetRegisterClass & + getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const; + + /// Try to get the mapping of \p MI. + /// See getInstrMapping for more details on what a mapping represents. + /// + /// Unlike getInstrMapping the returned InstructionMapping may be invalid + /// (isValid() == false). + /// This means that the target independent code is not smart enough + /// to get the mapping of \p MI and thus, the target has to provide the + /// information for \p MI. + /// + /// This implementation is able to get the mapping of: + /// - Target specific instructions by looking at the encoding constraints. + /// - Any instruction if all the register operands have already been assigned + /// a register, a register class, or a register bank. + /// - Copies and phis if at least one of the operands has been assigned a + /// register, a register class, or a register bank. + /// In other words, this method will likely fail to find a mapping for + /// any generic opcode that has not been lowered by target specific code. + const InstructionMapping &getInstrMappingImpl(const MachineInstr &MI) const; + + /// Get the uniquely generated PartialMapping for the + /// given arguments. + const PartialMapping &getPartialMapping(unsigned StartIdx, unsigned Length, + const RegisterBank &RegBank) const; + + /// \name Methods to get a uniquely generated ValueMapping. + /// @{ + + /// The most common ValueMapping consists of a single PartialMapping. + /// Feature a method for that. + const ValueMapping &getValueMapping(unsigned StartIdx, unsigned Length, + const RegisterBank &RegBank) const; + + /// Get the ValueMapping for the given arguments. + const ValueMapping &getValueMapping(const PartialMapping *BreakDown, + unsigned NumBreakDowns) const; + /// @} + + /// \name Methods to get a uniquely generated array of ValueMapping. + /// @{ + + /// Get the uniquely generated array of ValueMapping for the + /// elements of between \p Begin and \p End. + /// + /// Elements that are nullptr will be replaced by + /// invalid ValueMapping (ValueMapping::isValid == false). + /// + /// \pre The pointers on ValueMapping between \p Begin and \p End + /// must uniquely identify a ValueMapping. Otherwise, there is no + /// guarantee that the return instance will be unique, i.e., another + /// OperandsMapping could have the same content. + template + const ValueMapping *getOperandsMapping(Iterator Begin, Iterator End) const; + + /// Get the uniquely generated array of ValueMapping for the + /// elements of \p OpdsMapping. + /// + /// Elements of \p OpdsMapping that are nullptr will be replaced by + /// invalid ValueMapping (ValueMapping::isValid == false). + const ValueMapping *getOperandsMapping( + const SmallVectorImpl &OpdsMapping) const; + + /// Get the uniquely generated array of ValueMapping for the + /// given arguments. + /// + /// Arguments that are nullptr will be replaced by invalid + /// ValueMapping (ValueMapping::isValid == false). + const ValueMapping *getOperandsMapping( + std::initializer_list OpdsMapping) const; + /// @} + + /// \name Methods to get a uniquely generated InstructionMapping. + /// @{ + +private: + /// Method to get a uniquely generated InstructionMapping. + const InstructionMapping & + getInstructionMappingImpl(bool IsInvalid, unsigned ID = InvalidMappingID, + unsigned Cost = 0, + const ValueMapping *OperandsMapping = nullptr, + unsigned NumOperands = 0) const; + +public: + /// Method to get a uniquely generated InstructionMapping. + const InstructionMapping & + getInstructionMapping(unsigned ID, unsigned Cost, + const ValueMapping *OperandsMapping, + unsigned NumOperands) const { + return getInstructionMappingImpl(/*IsInvalid*/ false, ID, Cost, + OperandsMapping, NumOperands); + } + + /// Method to get a uniquely generated invalid InstructionMapping. + const InstructionMapping &getInvalidInstructionMapping() const { + return getInstructionMappingImpl(/*IsInvalid*/ true); + } + /// @} + + /// Get the register bank for the \p OpIdx-th operand of \p MI form + /// the encoding constraints, if any. + /// + /// \return A register bank that covers the register class of the + /// related encoding constraints or nullptr if \p MI did not provide + /// enough information to deduce it. + const RegisterBank * + getRegBankFromConstraints(const MachineInstr &MI, unsigned OpIdx, + const TargetInstrInfo &TII, + const MachineRegisterInfo &MRI) const; + + /// Helper method to apply something that is like the default mapping. + /// Basically, that means that \p OpdMapper.getMI() is left untouched + /// aside from the reassignment of the register operand that have been + /// remapped. + /// + /// The type of all the new registers that have been created by the + /// mapper are properly remapped to the type of the original registers + /// they replace. In other words, the semantic of the instruction does + /// not change, only the register banks. + /// + /// If the mapping of one of the operand spans several registers, this + /// method will abort as this is not like a default mapping anymore. + /// + /// \pre For OpIdx in {0..\p OpdMapper.getMI().getNumOperands()) + /// the range OpdMapper.getVRegs(OpIdx) is empty or of size 1. + static void applyDefaultMapping(const OperandsMapper &OpdMapper); + + /// See ::applyMapping. + virtual void applyMappingImpl(const OperandsMapper &OpdMapper) const { + llvm_unreachable("The target has to implement that part"); + } + +public: + virtual ~RegisterBankInfo() = default; + + /// Get the register bank identified by \p ID. + const RegisterBank &getRegBank(unsigned ID) const { + return const_cast(this)->getRegBank(ID); + } + + /// Get the register bank of \p Reg. + /// If Reg has not been assigned a register, a register class, + /// or a register bank, then this returns nullptr. + /// + /// \pre Reg != 0 (NoRegister) + const RegisterBank *getRegBank(Register Reg, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + + /// Get the total number of register banks. + unsigned getNumRegBanks() const { return NumRegBanks; } + + /// Get a register bank that covers \p RC. + /// + /// \pre \p RC is a user-defined register class (as opposed as one + /// generated by TableGen). + /// + /// \note The mapping RC -> RegBank could be built while adding the + /// coverage for the register banks. However, we do not do it, because, + /// at least for now, we only need this information for register classes + /// that are used in the description of instruction. In other words, + /// there are just a handful of them and we do not want to waste space. + /// + /// \todo This should be TableGen'ed. + virtual const RegisterBank & + getRegBankFromRegClass(const TargetRegisterClass &RC, LLT Ty) const { + llvm_unreachable("The target must override this method"); + } + + /// Get the cost of a copy from \p B to \p A, or put differently, + /// get the cost of A = COPY B. Since register banks may cover + /// different size, \p Size specifies what will be the size in bits + /// that will be copied around. + /// + /// \note Since this is a copy, both registers have the same size. + virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, + unsigned Size) const { + // Optimistically assume that copies are coalesced. I.e., when + // they are on the same bank, they are free. + // Otherwise assume a non-zero cost of 1. The targets are supposed + // to override that properly anyway if they care. + return &A != &B; + } + + /// \returns true if emitting a copy from \p Src to \p Dst is impossible. + bool cannotCopy(const RegisterBank &Dst, const RegisterBank &Src, + unsigned Size) const { + return copyCost(Dst, Src, Size) == std::numeric_limits::max(); + } + + /// Get the cost of using \p ValMapping to decompose a register. This is + /// similar to ::copyCost, except for cases where multiple copy-like + /// operations need to be inserted. If the register is used as a source + /// operand and already has a bank assigned, \p CurBank is non-null. + virtual unsigned + getBreakDownCost(const ValueMapping &ValMapping, + const RegisterBank *CurBank = nullptr) const { + return std::numeric_limits::max(); + } + + /// Constrain the (possibly generic) virtual register \p Reg to \p RC. + /// + /// \pre \p Reg is a virtual register that either has a bank or a class. + /// \returns The constrained register class, or nullptr if there is none. + /// \note This is a generic variant of MachineRegisterInfo::constrainRegClass + /// \note Use MachineRegisterInfo::constrainRegAttrs instead for any non-isel + /// purpose, including non-select passes of GlobalISel + static const TargetRegisterClass * + constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, + MachineRegisterInfo &MRI); + + /// Identifier used when the related instruction mapping instance + /// is generated by target independent code. + /// Make sure not to use that identifier to avoid possible collision. + static const unsigned DefaultMappingID; + + /// Identifier used when the related instruction mapping instance + /// is generated by the default constructor. + /// Make sure not to use that identifier. + static const unsigned InvalidMappingID; + + /// Get the mapping of the different operands of \p MI + /// on the register bank. + /// This mapping should be the direct translation of \p MI. + /// In other words, when \p MI is mapped with the returned mapping, + /// only the register banks of the operands of \p MI need to be updated. + /// In particular, neither the opcode nor the type of \p MI needs to be + /// updated for this direct mapping. + /// + /// The target independent implementation gives a mapping based on + /// the register classes for the target specific opcode. + /// It uses the ID RegisterBankInfo::DefaultMappingID for that mapping. + /// Make sure you do not use that ID for the alternative mapping + /// for MI. See getInstrAlternativeMappings for the alternative + /// mappings. + /// + /// For instance, if \p MI is a vector add, the mapping should + /// not be a scalarization of the add. + /// + /// \post returnedVal.verify(MI). + /// + /// \note If returnedVal does not verify MI, this would probably mean + /// that the target does not support that instruction. + virtual const InstructionMapping & + getInstrMapping(const MachineInstr &MI) const; + + /// Get the alternative mappings for \p MI. + /// Alternative in the sense different from getInstrMapping. + virtual InstructionMappings + getInstrAlternativeMappings(const MachineInstr &MI) const; + + /// Get the possible mapping for \p MI. + /// A mapping defines where the different operands may live and at what cost. + /// For instance, let us consider: + /// v0(16) = G_ADD <2 x i8> v1, v2 + /// The possible mapping could be: + /// + /// {/*ID*/VectorAdd, /*Cost*/1, /*v0*/{(0xFFFF, VPR)}, /*v1*/{(0xFFFF, VPR)}, + /// /*v2*/{(0xFFFF, VPR)}} + /// {/*ID*/ScalarAddx2, /*Cost*/2, /*v0*/{(0x00FF, GPR),(0xFF00, GPR)}, + /// /*v1*/{(0x00FF, GPR),(0xFF00, GPR)}, + /// /*v2*/{(0x00FF, GPR),(0xFF00, GPR)}} + /// + /// \note The first alternative of the returned mapping should be the + /// direct translation of \p MI current form. + /// + /// \post !returnedVal.empty(). + InstructionMappings getInstrPossibleMappings(const MachineInstr &MI) const; + + /// Apply \p OpdMapper.getInstrMapping() to \p OpdMapper.getMI(). + /// After this call \p OpdMapper.getMI() may not be valid anymore. + /// \p OpdMapper.getInstrMapping().getID() carries the information of + /// what has been chosen to map \p OpdMapper.getMI(). This ID is set + /// by the various getInstrXXXMapping method. + /// + /// Therefore, getting the mapping and applying it should be kept in + /// sync. + void applyMapping(const OperandsMapper &OpdMapper) const { + // The only mapping we know how to handle is the default mapping. + if (OpdMapper.getInstrMapping().getID() == DefaultMappingID) + return applyDefaultMapping(OpdMapper); + // For other mapping, the target needs to do the right thing. + // If that means calling applyDefaultMapping, fine, but this + // must be explicitly stated. + applyMappingImpl(OpdMapper); + } + + /// Get the size in bits of \p Reg. + /// Utility method to get the size of any registers. Unlike + /// MachineRegisterInfo::getSize, the register does not need to be a + /// virtual register. + /// + /// \pre \p Reg != 0 (NoRegister). + unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + + /// Check that information hold by this instance make sense for the + /// given \p TRI. + /// + /// \note This method does not check anything when assertions are disabled. + /// + /// \return True is the check was successful. + bool verify(const TargetRegisterInfo &TRI) const; +}; + +inline raw_ostream & +operator<<(raw_ostream &OS, + const RegisterBankInfo::PartialMapping &PartMapping) { + PartMapping.print(OS); + return OS; +} + +inline raw_ostream & +operator<<(raw_ostream &OS, const RegisterBankInfo::ValueMapping &ValMapping) { + ValMapping.print(OS); + return OS; +} + +inline raw_ostream & +operator<<(raw_ostream &OS, + const RegisterBankInfo::InstructionMapping &InstrMapping) { + InstrMapping.print(OS); + return OS; +} + +inline raw_ostream & +operator<<(raw_ostream &OS, const RegisterBankInfo::OperandsMapper &OpdMapper) { + OpdMapper.print(OS, /*ForDebug*/ false); + return OS; +} + +/// Hashing function for PartialMapping. +/// It is required for the hashing of ValueMapping. +hash_code hash_value(const RegisterBankInfo::PartialMapping &PartMapping); + +} // end namespace llvm + +#endif // LLVM_CODEGEN_REGISTERBANKINFO_H diff --git a/llvm/include/llvm/CodeGen/RegisterClassInfo.h b/llvm/include/llvm/CodeGen/RegisterClassInfo.h index d82f1db60d8b..39c72a42c433 100644 --- a/llvm/include/llvm/CodeGen/RegisterClassInfo.h +++ b/llvm/include/llvm/CodeGen/RegisterClassInfo.h @@ -20,8 +20,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include +#include "llvm/MC/MCRegister.h" #include #include @@ -61,6 +60,10 @@ class RegisterClassInfo { // Map register alias to the callee saved Register. SmallVector CalleeSavedAliases; + // Indicate if a specified callee saved register be in the allocation order + // exactly as written in the tablegen descriptions or listed later. + BitVector IgnoreCSRForAllocOrder; + // Reserved registers in the current MF. BitVector Reserved; diff --git a/llvm/include/llvm/CodeGen/RegisterPressure.h b/llvm/include/llvm/CodeGen/RegisterPressure.h index 1deeb4d41511..c40c0eec80ec 100644 --- a/llvm/include/llvm/CodeGen/RegisterPressure.h +++ b/llvm/include/llvm/CodeGen/RegisterPressure.h @@ -22,7 +22,6 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include -#include #include #include #include diff --git a/llvm/include/llvm/CodeGen/RegisterScavenging.h b/llvm/include/llvm/CodeGen/RegisterScavenging.h index 218e05f6eb6b..1f0cd273bf61 100644 --- a/llvm/include/llvm/CodeGen/RegisterScavenging.h +++ b/llvm/include/llvm/CodeGen/RegisterScavenging.h @@ -70,6 +70,26 @@ class RegScavenger { public: RegScavenger() = default; + /// Record that \p Reg is in use at scavenging index \p FI. This is for + /// targets which need to directly manage the spilling process, and need to + /// update the scavenger's internal state. It's expected this be called a + /// second time with \p Restore set to a non-null value, so that the + /// externally inserted restore instruction resets the scavenged slot + /// liveness when encountered. + void assignRegToScavengingIndex(int FI, Register Reg, + MachineInstr *Restore = nullptr) { + for (ScavengedInfo &Slot : Scavenged) { + if (Slot.FrameIndex == FI) { + assert(!Slot.Reg || Slot.Reg == Reg); + Slot.Reg = Reg; + Slot.Restore = Restore; + return; + } + } + + llvm_unreachable("did not find scavenging index"); + } + /// Start tracking liveness from the begin of basic block \p MBB. void enterBasicBlock(MachineBasicBlock &MBB); diff --git a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h index bf347c0753e5..8b406a275025 100644 --- a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h @@ -20,9 +20,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/PassRegistry.h" #include #include diff --git a/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h index 7c0ebe7191e4..c71aca0c992b 100644 --- a/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h +++ b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h @@ -1,4 +1,4 @@ -//===- ReplaceWithVeclib.h - Replace vector instrinsics with veclib calls -===// +//===- ReplaceWithVeclib.h - Replace vector intrinsics with veclib calls --===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -17,8 +17,10 @@ #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/PassRegistry.h" namespace llvm { +class Function; struct ReplaceWithVeclib : public PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h index af8c0cd8756e..f1c377f76d02 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -16,7 +16,6 @@ #define LLVM_CODEGEN_SCHEDULEDAG_H #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" @@ -31,6 +30,7 @@ namespace llvm { +template struct GraphTraits; template class GraphWriter; class LLVMTargetMachine; class MachineFunction; diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h index 50b186de2b05..fb3900b4a9c1 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -16,10 +16,10 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseMultiSet.h" #include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/identity.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAG.h" diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index e31719bcff0b..bcbd7ebcc0c9 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -20,7 +20,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/ilist.h" @@ -33,17 +32,13 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/ArrayRecycler.h" -#include "llvm/Support/AtomicOrdering.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/RecyclingAllocator.h" -#include #include #include #include @@ -55,6 +50,15 @@ namespace llvm { +class DIExpression; +class DILabel; +class DIVariable; +class Function; +class Pass; +class Type; +template struct GraphTraits; +template class SmallSetVector; +template struct FoldingSetTrait; class AAResults; class BlockAddress; class BlockFrequencyInfo; @@ -276,8 +280,16 @@ class SelectionDAG { DenseMap SDCallSiteDbgInfo; + /// PersistentId counter to be used when inserting the next + /// SDNode to this SelectionDAG. We do not place that under + /// `#if LLVM_ENABLE_ABI_BREAKING_CHECKS` intentionally because + /// it adds unneeded complexity without noticeable + /// benefits (see discussion with @thakis in D120714). uint16_t NextPersistentId = 0; + /// Are instruction referencing variable locations desired for this function? + bool UseInstrRefDebugInfo = false; + public: /// Clients of various APIs that cause global effects on /// the DAG can optionally implement this interface. This allows the clients @@ -440,6 +452,9 @@ public: const DataLayout &getDataLayout() const { return MF->getDataLayout(); } const TargetMachine &getTarget() const { return TM; } const TargetSubtargetInfo &getSubtarget() const { return MF->getSubtarget(); } + template const STC &getSubtarget() const { + return MF->getSubtarget(); + } const TargetLowering &getTargetLoweringInfo() const { return *TLI; } const TargetLibraryInfo &getLibInfo() const { return *LibInfo; } const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; } @@ -467,7 +482,7 @@ public: void viewGraph(const std::string &Title); void viewGraph(); -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS std::map NodeGraphAttrs; #endif @@ -893,6 +908,11 @@ public: /// Create a logical NOT operation as (XOR Val, BooleanOne). SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT); + /// Create a vector-predicated logical NOT operation as (VP_XOR Val, + /// BooleanOne, Mask, EVL). + SDValue getVPLogicalNOT(const SDLoc &DL, SDValue Val, SDValue Mask, + SDValue EVL, EVT VT); + /// Returns sum of the base pointer and offset. /// Unlike getObjectPtrOffset this does not set NoUnsignedWrap by default. SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, @@ -1032,25 +1052,26 @@ public: const AAMDNodes &AAInfo = AAMDNodes()); SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVol, bool isTailCall, + SDValue Size, Align Alignment, bool isVol, + bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo = AAMDNodes()); SDValue getAtomicMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, - unsigned DstAlign, SDValue Src, unsigned SrcAlign, - SDValue Size, Type *SizeTy, unsigned ElemSz, - bool isTailCall, MachinePointerInfo DstPtrInfo, + SDValue Src, SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo); SDValue getAtomicMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, - unsigned DstAlign, SDValue Src, unsigned SrcAlign, - SDValue Size, Type *SizeTy, unsigned ElemSz, - bool isTailCall, MachinePointerInfo DstPtrInfo, + SDValue Src, SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo); SDValue getAtomicMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, - unsigned DstAlign, SDValue Value, SDValue Size, - Type *SizeTy, unsigned ElemSz, bool isTailCall, + SDValue Value, SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, MachinePointerInfo DstPtrInfo); /// Helper function to make it easier to build SetCC's if you just have an @@ -1070,14 +1091,24 @@ public: return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond)); } + /// Helper function to make it easier to build VP_SETCCs if you just have an + /// ISD::CondCode instead of an SDValue. + SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, + ISD::CondCode Cond, SDValue Mask, SDValue EVL) { + assert(LHS.getValueType().isVector() && RHS.getValueType().isVector() && + "Cannot compare scalars"); + assert(Cond != ISD::SETCC_INVALID && + "Cannot create a setCC of an invalid node."); + return getNode(ISD::VP_SETCC, DL, VT, LHS, RHS, getCondCode(Cond), Mask, + EVL); + } + /// Helper function to make it easier to build Select's if you just have /// operands and don't want to check for vector. SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS) { - assert(LHS.getValueType() == RHS.getValueType() && + assert(LHS.getValueType() == VT && RHS.getValueType() == VT && "Cannot use select on differing types"); - assert(VT.isVector() == LHS.getValueType().isVector() && - "Cannot mix vectors and scalars"); auto Opcode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; return getNode(Opcode, DL, VT, Cond, LHS, RHS); } @@ -1149,7 +1180,7 @@ public: uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes()) { // Ensure that codegen never sees alignment 0 return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, PtrInfo, - Alignment.getValueOr(getEVTAlign(MemVT)), Flags, + Alignment.value_or(getEVTAlign(MemVT)), Flags, Size, AAInfo); } @@ -1230,7 +1261,7 @@ public: const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr) { // Ensures that codegen never sees a None Alignment. return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, PtrInfo, MemVT, - Alignment.getValueOr(getEVTAlign(MemVT)), MMOFlags, AAInfo, + Alignment.value_or(getEVTAlign(MemVT)), MMOFlags, AAInfo, Ranges); } /// FIXME: Remove once transition to Align is over. @@ -1264,7 +1295,7 @@ public: MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()) { return getStore(Chain, dl, Val, Ptr, PtrInfo, - Alignment.getValueOr(getEVTAlign(Val.getValueType())), + Alignment.value_or(getEVTAlign(Val.getValueType())), MMOFlags, AAInfo); } /// FIXME: Remove once transition to Align is over. @@ -1290,7 +1321,7 @@ public: MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()) { return getTruncStore(Chain, dl, Val, Ptr, PtrInfo, SVT, - Alignment.getValueOr(getEVTAlign(SVT)), MMOFlags, + Alignment.value_or(getEVTAlign(SVT)), MMOFlags, AAInfo); } /// FIXME: Remove once transition to Align is over. @@ -1323,7 +1354,7 @@ public: const MDNode *Ranges = nullptr, bool IsExpanding = false) { // Ensures that codegen never sees a None Alignment. return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL, - PtrInfo, MemVT, Alignment.getValueOr(getEVTAlign(MemVT)), + PtrInfo, MemVT, Alignment.value_or(getEVTAlign(MemVT)), MMOFlags, AAInfo, Ranges, IsExpanding); } SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, @@ -1364,6 +1395,77 @@ public: SDValue getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM); + SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, + SDValue Offset, SDValue Stride, SDValue Mask, + SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, + Align Alignment, MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, + const MDNode *Ranges = nullptr, + bool IsExpanding = false); + inline SDValue getStridedLoadVP( + ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, + SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, + SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, + MaybeAlign Alignment = MaybeAlign(), + MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, + const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr, + bool IsExpanding = false) { + // Ensures that codegen never sees a None Alignment. + return getStridedLoadVP(AM, ExtType, VT, DL, Chain, Ptr, Offset, Stride, + Mask, EVL, PtrInfo, MemVT, + Alignment.value_or(getEVTAlign(MemVT)), MMOFlags, + AAInfo, Ranges, IsExpanding); + } + SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, + SDValue Offset, SDValue Stride, SDValue Mask, + SDValue EVL, EVT MemVT, MachineMemOperand *MMO, + bool IsExpanding = false); + SDValue getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, + SDValue Stride, SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, MaybeAlign Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, + const MDNode *Ranges = nullptr, + bool IsExpanding = false); + SDValue getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, + SDValue Stride, SDValue Mask, SDValue EVL, + MachineMemOperand *MMO, bool IsExpanding = false); + SDValue + getExtStridedLoadVP(ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, + SDValue Chain, SDValue Ptr, SDValue Stride, SDValue Mask, + SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, + MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, bool IsExpanding = false); + SDValue getExtStridedLoadVP(ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, + SDValue Chain, SDValue Ptr, SDValue Stride, + SDValue Mask, SDValue EVL, EVT MemVT, + MachineMemOperand *MMO, bool IsExpanding = false); + SDValue getIndexedStridedLoadVP(SDValue OrigLoad, const SDLoc &DL, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM); + SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, + SDValue Ptr, SDValue Offset, SDValue Stride, + SDValue Mask, SDValue EVL, EVT MemVT, + MachineMemOperand *MMO, ISD::MemIndexedMode AM, + bool IsTruncating = false, + bool IsCompressing = false); + SDValue getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, + SDValue Ptr, SDValue Stride, SDValue Mask, + SDValue EVL, MachinePointerInfo PtrInfo, + EVT SVT, Align Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, + bool IsCompressing = false); + SDValue getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, + SDValue Ptr, SDValue Stride, SDValue Mask, + SDValue EVL, EVT SVT, MachineMemOperand *MMO, + bool IsCompressing = false); + SDValue getIndexedStridedStoreVP(SDValue OrigStore, const SDLoc &DL, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM); + SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType); @@ -1412,6 +1514,11 @@ public: /// Return an AssertAlignSDNode. SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A); + /// Swap N1 and N2 if Opcode is a commutative binary opcode + /// and the canonical form expects the opposite order. + void canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1, + SDValue &N2) const; + /// Return the specified value casted to /// the target's desired shift amount type. SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op); @@ -1702,6 +1809,16 @@ public: /// function mirrors \c llvm::salvageDebugInfo. void salvageDebugInfo(SDNode &N); + /// Signal whether instruction referencing variable locations are desired for + /// this function's debug-info. + void useInstrRefDebugInfo(bool Flag) { + UseInstrRefDebugInfo = Flag; + } + + bool getUseInstrRefDebugInfo() const { + return UseInstrRefDebugInfo; + } + void dump() const; /// In most cases this function returns the ABI alignment for a given type, @@ -1745,16 +1862,6 @@ public: /// simplify nodes with multiple uses more aggressively.) SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits); - /// See if the specified operand can be simplified with the knowledge that - /// only the bits specified by DemandedBits are used in the elements specified - /// by DemandedElts. If so, return the simpler operand, otherwise return a - /// null SDValue. - /// - /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can - /// simplify nodes with multiple uses more aggressively.) - SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits, - const APInt &DemandedElts); - /// Return true if the sign bit of Op is known to be zero. /// We use this predicate to simplify operations downstream. bool SignBitIsZero(SDValue Op, unsigned Depth = 0) const; @@ -1771,6 +1878,11 @@ public: bool MaskedValueIsZero(SDValue Op, const APInt &Mask, const APInt &DemandedElts, unsigned Depth = 0) const; + /// Return true if 'Op' is known to be zero in DemandedElts. We + /// use this predicate to simplify operations downstream. + bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, + unsigned Depth = 0) const; + /// Return true if '(Op & Mask) == Mask'. /// Op and Mask are known to be the same type. bool MaskedValueIsAllOnes(SDValue Op, const APInt &Mask, @@ -2020,11 +2132,6 @@ public: /// Compute the default alignment value for the given type. Align getEVTAlign(EVT MemoryVT) const; - /// Compute the default alignment value for the given type. - /// FIXME: Remove once transition to Align is over. - inline unsigned getEVTAlignment(EVT MemoryVT) const { - return getEVTAlign(MemoryVT).value(); - } /// Test whether the given value is a constant int or similar node. SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) const; @@ -2039,39 +2146,34 @@ public: isConstantFPBuildVectorOrConstantFP(N); } - void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) { - SDCallSiteDbgInfo[CallNode].CSInfo = std::move(CallInfo); + /// Set CallSiteInfo to be associated with Node. + void addCallSiteInfo(const SDNode *Node, CallSiteInfoImpl &&CallInfo) { + SDCallSiteDbgInfo[Node].CSInfo = std::move(CallInfo); } - - CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) { - auto I = SDCallSiteDbgInfo.find(CallNode); - if (I != SDCallSiteDbgInfo.end()) - return std::move(I->second).CSInfo; - return CallSiteInfo(); + /// Return CallSiteInfo associated with Node, or a default if none exists. + CallSiteInfo getCallSiteInfo(const SDNode *Node) { + auto I = SDCallSiteDbgInfo.find(Node); + return I != SDCallSiteDbgInfo.end() ? std::move(I->second).CSInfo + : CallSiteInfo(); } - + /// Set HeapAllocSite to be associated with Node. void addHeapAllocSite(const SDNode *Node, MDNode *MD) { SDCallSiteDbgInfo[Node].HeapAllocSite = MD; } - - /// Return the HeapAllocSite type associated with the SDNode, if it exists. - MDNode *getHeapAllocSite(const SDNode *Node) { - auto It = SDCallSiteDbgInfo.find(Node); - if (It == SDCallSiteDbgInfo.end()) - return nullptr; - return It->second.HeapAllocSite; + /// Return HeapAllocSite associated with Node, or nullptr if none exists. + MDNode *getHeapAllocSite(const SDNode *Node) const { + auto I = SDCallSiteDbgInfo.find(Node); + return I != SDCallSiteDbgInfo.end() ? I->second.HeapAllocSite : nullptr; } - + /// Set NoMergeSiteInfo to be associated with Node if NoMerge is true. void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge) { if (NoMerge) SDCallSiteDbgInfo[Node].NoMerge = NoMerge; } - - bool getNoMergeSiteInfo(const SDNode *Node) { + /// Return NoMerge info associated with Node. + bool getNoMergeSiteInfo(const SDNode *Node) const { auto I = SDCallSiteDbgInfo.find(Node); - if (I == SDCallSiteDbgInfo.end()) - return false; - return I->second.NoMerge; + return I != SDCallSiteDbgInfo.end() ? I->second.NoMerge : false; } /// Return the current function's default denormal handling kind for the given diff --git a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h index 0f3af915da64..e23eebec81db 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h @@ -49,7 +49,7 @@ public: SDValue getBase() const { return Base; } SDValue getIndex() { return Index; } SDValue getIndex() const { return Index; } - bool hasValidOffset() const { return Offset.hasValue(); } + bool hasValidOffset() const { return Offset.has_value(); } int64_t getOffset() const { return *Offset; } // Returns true if `Other` and `*this` are both some offset from the same base diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index 9cea197724cc..35fb0bc80593 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -16,12 +16,13 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" #include namespace llvm { class AAResults; +class TargetInstrInfo; +class TargetMachine; class SelectionDAGBuilder; class SDValue; class MachineRegisterInfo; @@ -53,6 +54,7 @@ public: const TargetLowering *TLI; bool FastISelFailed; SmallPtrSet ElidedArgCopyInstrs; + bool UseInstrRefDebugInfo = false; /// Current optimization remark emitter. /// Used to report things like combines and FastISel failures. diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 04c6b50197d4..5974f13a296b 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -508,7 +508,7 @@ BEGIN_TWO_BYTE_PACK() class LSBaseSDNodeBitfields { friend class LSBaseSDNode; - friend class VPLoadStoreSDNode; + friend class VPBaseLoadStoreSDNode; friend class MaskedLoadStoreSDNode; friend class MaskedGatherScatterSDNode; friend class VPGatherScatterSDNode; @@ -529,6 +529,7 @@ BEGIN_TWO_BYTE_PACK() class LoadSDNodeBitfields { friend class LoadSDNode; friend class VPLoadSDNode; + friend class VPStridedLoadSDNode; friend class MaskedLoadSDNode; friend class MaskedGatherSDNode; friend class VPGatherSDNode; @@ -542,6 +543,7 @@ BEGIN_TWO_BYTE_PACK() class StoreSDNodeBitfields { friend class StoreSDNode; friend class VPStoreSDNode; + friend class VPStridedStoreSDNode; friend class MaskedStoreSDNode; friend class MaskedScatterSDNode; friend class VPScatterSDNode; @@ -613,8 +615,10 @@ private: SDNodeFlags Flags; public: - /// Unique and persistent id per SDNode in the DAG. - /// Used for debug printing. + /// Unique and persistent id per SDNode in the DAG. Used for debug printing. + /// We do not place that under `#if LLVM_ENABLE_ABI_BREAKING_CHECKS` + /// intentionally because it adds unneeded complexity without noticeable + /// benefits (see discussion with @thakis in D120714). uint16_t PersistentId; //===--------------------------------------------------------------------===// @@ -1191,12 +1195,13 @@ inline void SDValue::dumpr(const SelectionDAG *G) const { inline void SDUse::set(const SDValue &V) { if (Val.getNode()) removeFromList(); Val = V; - if (V.getNode()) V.getNode()->addUse(*this); + if (V.getNode()) + V->addUse(*this); } inline void SDUse::setInitial(const SDValue &V) { Val = V; - V.getNode()->addUse(*this); + V->addUse(*this); } inline void SDUse::setNode(SDNode *N) { @@ -1364,6 +1369,7 @@ public: case ISD::VP_STORE: case ISD::MSTORE: case ISD::VP_SCATTER: + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: return getOperand(2); case ISD::MGATHER: case ISD::MSCATTER: @@ -1407,6 +1413,8 @@ public: case ISD::VP_STORE: case ISD::VP_GATHER: case ISD::VP_SCATTER: + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: return true; default: return N->isMemIntrinsic() || N->isTargetMemoryOpcode(); @@ -1661,6 +1669,9 @@ bool isAllOnesConstant(SDValue V); /// Returns true if \p V is a constant integer one. bool isOneConstant(SDValue V); +/// Returns true if \p V is a constant min signed integer value. +bool isMinSignedConstant(SDValue V); + /// Return the non-bitcasted source operand of \p V if it exists. /// If \p V is not a bitcasted value, it is returned as-is. SDValue peekThroughBitcasts(SDValue V); @@ -1677,6 +1688,11 @@ SDValue peekThroughExtractSubvectors(SDValue V); /// constant is canonicalized to be operand 1. bool isBitwiseNot(SDValue V, bool AllowUndefs = false); +/// If \p V is a bitwise not, returns the inverted operand. Otherwise returns +/// an empty SDValue. Only bits set in \p Mask are required to be inverted, +/// other bits may be arbitrary. +SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs); + /// Returns the SDNode if it is a constant splat BuildVector or constant int. ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false, bool AllowTruncation = false); @@ -2353,34 +2369,64 @@ public: } }; -/// This base class is used to represent VP_LOAD and VP_STORE nodes -class VPLoadStoreSDNode : public MemSDNode { +/// This base class is used to represent VP_LOAD, VP_STORE, +/// EXPERIMENTAL_VP_STRIDED_LOAD and EXPERIMENTAL_VP_STRIDED_STORE nodes +class VPBaseLoadStoreSDNode : public MemSDNode { public: friend class SelectionDAG; - VPLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl, - SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT, - MachineMemOperand *MMO) - : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { + VPBaseLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, + const DebugLoc &DL, SDVTList VTs, + ISD::MemIndexedMode AM, EVT MemVT, + MachineMemOperand *MMO) + : MemSDNode(NodeTy, Order, DL, VTs, MemVT, MMO) { LSBaseSDNodeBits.AddressingMode = AM; assert(getAddressingMode() == AM && "Value truncated"); } - // VPLoadSDNode (Chain, Ptr, Offset, Mask, EVL) - // VPStoreSDNode (Chain, Data, Ptr, Offset, Mask, EVL) + // VPStridedStoreSDNode (Chain, Data, Ptr, Offset, Stride, Mask, EVL) + // VPStoreSDNode (Chain, Data, Ptr, Offset, Mask, EVL) + // VPStridedLoadSDNode (Chain, Ptr, Offset, Stride, Mask, EVL) + // VPLoadSDNode (Chain, Ptr, Offset, Mask, EVL) // Mask is a vector of i1 elements; // the type of EVL is TLI.getVPExplicitVectorLengthTy(). const SDValue &getOffset() const { - return getOperand(getOpcode() == ISD::VP_LOAD ? 2 : 3); + return getOperand((getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD || + getOpcode() == ISD::VP_LOAD) + ? 2 + : 3); } const SDValue &getBasePtr() const { - return getOperand(getOpcode() == ISD::VP_LOAD ? 1 : 2); + return getOperand((getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD || + getOpcode() == ISD::VP_LOAD) + ? 1 + : 2); } const SDValue &getMask() const { - return getOperand(getOpcode() == ISD::VP_LOAD ? 3 : 4); + switch (getOpcode()) { + default: + llvm_unreachable("Invalid opcode"); + case ISD::VP_LOAD: + return getOperand(3); + case ISD::VP_STORE: + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + return getOperand(4); + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + return getOperand(5); + } } const SDValue &getVectorLength() const { - return getOperand(getOpcode() == ISD::VP_LOAD ? 4 : 5); + switch (getOpcode()) { + default: + llvm_unreachable("Invalid opcode"); + case ISD::VP_LOAD: + return getOperand(4); + case ISD::VP_STORE: + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + return getOperand(5); + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + return getOperand(6); + } } /// Return the addressing mode for this load or store: @@ -2396,19 +2442,21 @@ public: bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } static bool classof(const SDNode *N) { - return N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE; + return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD || + N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE || + N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE; } }; /// This class is used to represent a VP_LOAD node -class VPLoadSDNode : public VPLoadStoreSDNode { +class VPLoadSDNode : public VPBaseLoadStoreSDNode { public: friend class SelectionDAG; VPLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, ISD::MemIndexedMode AM, ISD::LoadExtType ETy, bool isExpanding, EVT MemVT, MachineMemOperand *MMO) - : VPLoadStoreSDNode(ISD::VP_LOAD, Order, dl, VTs, AM, MemVT, MMO) { + : VPBaseLoadStoreSDNode(ISD::VP_LOAD, Order, dl, VTs, AM, MemVT, MMO) { LoadSDNodeBits.ExtTy = ETy; LoadSDNodeBits.IsExpanding = isExpanding; } @@ -2428,15 +2476,45 @@ public: bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; } }; +/// This class is used to represent an EXPERIMENTAL_VP_STRIDED_LOAD node. +class VPStridedLoadSDNode : public VPBaseLoadStoreSDNode { +public: + friend class SelectionDAG; + + VPStridedLoadSDNode(unsigned Order, const DebugLoc &DL, SDVTList VTs, + ISD::MemIndexedMode AM, ISD::LoadExtType ETy, + bool IsExpanding, EVT MemVT, MachineMemOperand *MMO) + : VPBaseLoadStoreSDNode(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, Order, DL, VTs, + AM, MemVT, MMO) { + LoadSDNodeBits.ExtTy = ETy; + LoadSDNodeBits.IsExpanding = IsExpanding; + } + + ISD::LoadExtType getExtensionType() const { + return static_cast(LoadSDNodeBits.ExtTy); + } + + const SDValue &getBasePtr() const { return getOperand(1); } + const SDValue &getOffset() const { return getOperand(2); } + const SDValue &getStride() const { return getOperand(3); } + const SDValue &getMask() const { return getOperand(4); } + const SDValue &getVectorLength() const { return getOperand(5); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD; + } + bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; } +}; + /// This class is used to represent a VP_STORE node -class VPStoreSDNode : public VPLoadStoreSDNode { +class VPStoreSDNode : public VPBaseLoadStoreSDNode { public: friend class SelectionDAG; VPStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing, EVT MemVT, MachineMemOperand *MMO) - : VPLoadStoreSDNode(ISD::VP_STORE, Order, dl, VTs, AM, MemVT, MMO) { + : VPBaseLoadStoreSDNode(ISD::VP_STORE, Order, dl, VTs, AM, MemVT, MMO) { StoreSDNodeBits.IsTruncating = isTrunc; StoreSDNodeBits.IsCompressing = isCompressing; } @@ -2463,6 +2541,43 @@ public: } }; +/// This class is used to represent an EXPERIMENTAL_VP_STRIDED_STORE node. +class VPStridedStoreSDNode : public VPBaseLoadStoreSDNode { +public: + friend class SelectionDAG; + + VPStridedStoreSDNode(unsigned Order, const DebugLoc &DL, SDVTList VTs, + ISD::MemIndexedMode AM, bool IsTrunc, bool IsCompressing, + EVT MemVT, MachineMemOperand *MMO) + : VPBaseLoadStoreSDNode(ISD::EXPERIMENTAL_VP_STRIDED_STORE, Order, DL, + VTs, AM, MemVT, MMO) { + StoreSDNodeBits.IsTruncating = IsTrunc; + StoreSDNodeBits.IsCompressing = IsCompressing; + } + + /// Return true if this is a truncating store. + /// For integers this is the same as doing a TRUNCATE and storing the result. + /// For floats, it is the same as doing an FP_ROUND and storing the result. + bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } + + /// Returns true if the op does a compression to the vector before storing. + /// The node contiguously stores the active elements (integers or floats) + /// in src (those with their respective bit set in writemask k) to unaligned + /// memory at base_addr. + bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; } + + const SDValue &getValue() const { return getOperand(1); } + const SDValue &getBasePtr() const { return getOperand(2); } + const SDValue &getOffset() const { return getOperand(3); } + const SDValue &getStride() const { return getOperand(4); } + const SDValue &getMask() const { return getOperand(5); } + const SDValue &getVectorLength() const { return getOperand(6); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE; + } +}; + /// This base class is used to represent MLOAD and MSTORE nodes class MaskedLoadStoreSDNode : public MemSDNode { public: @@ -2588,13 +2703,9 @@ public: return static_cast(LSBaseSDNodeBits.AddressingMode); } bool isIndexScaled() const { - return (getIndexType() == ISD::SIGNED_SCALED) || - (getIndexType() == ISD::UNSIGNED_SCALED); - } - bool isIndexSigned() const { - return (getIndexType() == ISD::SIGNED_SCALED) || - (getIndexType() == ISD::SIGNED_UNSCALED); + return !cast(getScale())->isOne(); } + bool isIndexSigned() const { return isIndexTypeSigned(getIndexType()); } // In the both nodes address is Op1, mask is Op2: // VPGatherSDNode (Chain, base, index, scale, mask, vlen) @@ -2675,17 +2786,10 @@ public: ISD::MemIndexType getIndexType() const { return static_cast(LSBaseSDNodeBits.AddressingMode); } - void setIndexType(ISD::MemIndexType IndexType) { - LSBaseSDNodeBits.AddressingMode = IndexType; - } bool isIndexScaled() const { - return (getIndexType() == ISD::SIGNED_SCALED) || - (getIndexType() == ISD::UNSIGNED_SCALED); - } - bool isIndexSigned() const { - return (getIndexType() == ISD::SIGNED_SCALED) || - (getIndexType() == ISD::SIGNED_UNSCALED); + return !cast(getScale())->isOne(); } + bool isIndexSigned() const { return isIndexTypeSigned(getIndexType()); } // In the both nodes address is Op1, mask is Op2: // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale) diff --git a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h index 722c3275fd06..e7d608969124 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h @@ -76,11 +76,13 @@ public: /// that don't fit the target's parameters for simple stores and can be more /// efficient than using a library call. This function can return a null /// SDValue if the target declines to use custom code and a different - /// lowering strategy should be used. + /// lowering strategy should be used. Note that if AlwaysInline is true the + /// function has to return a valid SDValue. virtual SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, Align Alignment, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo) const { return SDValue(); } diff --git a/llvm/include/llvm/CodeGen/SlotIndexes.h b/llvm/include/llvm/CodeGen/SlotIndexes.h index e8d618a24f9b..942a47c6cc7d 100644 --- a/llvm/include/llvm/CodeGen/SlotIndexes.h +++ b/llvm/include/llvm/CodeGen/SlotIndexes.h @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" -#include "llvm/Pass.h" #include "llvm/Support/Allocator.h" #include #include diff --git a/llvm/include/llvm/CodeGen/StackMaps.h b/llvm/include/llvm/CodeGen/StackMaps.h index 928d7cc6cc04..01cc9bc37931 100644 --- a/llvm/include/llvm/CodeGen/StackMaps.h +++ b/llvm/include/llvm/CodeGen/StackMaps.h @@ -13,7 +13,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/CallingConv.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" #include #include @@ -23,6 +22,7 @@ namespace llvm { class AsmPrinter; +class MCSymbol; class MCExpr; class MCStreamer; class raw_ostream; diff --git a/llvm/include/llvm/CodeGen/StackProtector.h b/llvm/include/llvm/CodeGen/StackProtector.h index 57456b3f6c16..b96c0c74fabc 100644 --- a/llvm/include/llvm/CodeGen/StackProtector.h +++ b/llvm/include/llvm/CodeGen/StackProtector.h @@ -20,7 +20,6 @@ #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" namespace llvm { diff --git a/llvm/include/llvm/CodeGen/SwiftErrorValueTracking.h b/llvm/include/llvm/CodeGen/SwiftErrorValueTracking.h index 08ab2abbdd5b..a374736347f6 100644 --- a/llvm/include/llvm/CodeGen/SwiftErrorValueTracking.h +++ b/llvm/include/llvm/CodeGen/SwiftErrorValueTracking.h @@ -20,8 +20,6 @@ #include "llvm/CodeGen/Register.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugLoc.h" -#include -#include #include diff --git a/llvm/include/llvm/CodeGen/TailDuplicator.h b/llvm/include/llvm/CodeGen/TailDuplicator.h index daaa27f72d52..94e8092319d7 100644 --- a/llvm/include/llvm/CodeGen/TailDuplicator.h +++ b/llvm/include/llvm/CodeGen/TailDuplicator.h @@ -16,15 +16,16 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include #include namespace llvm { +template class SmallSetVector; +template class function_ref; +class MBFIWrapper; class MachineBasicBlock; class MachineBranchProbabilityInfo; class MachineFunction; diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h index 62365330379d..1333f2d98973 100644 --- a/llvm/include/llvm/CodeGen/TargetCallingConv.h +++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h @@ -46,7 +46,8 @@ namespace ISD { unsigned IsHvaStart : 1; ///< HVA structure start unsigned IsSecArgPass : 1; ///< Second argument unsigned MemAlign : 4; ///< Log 2 of alignment when arg is passed in memory - ///< (including byval/byref) + ///< (including byval/byref). The max alignment is + ///< verified in IR verification. unsigned OrigAlign : 5; ///< Log 2 of original alignment unsigned IsInConsecutiveRegsLast : 1; unsigned IsInConsecutiveRegs : 1; diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index f2ca1590fc39..fbce5d7a9102 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -213,12 +213,24 @@ public: virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const = 0; + /// emitZeroCallUsedRegs - Zeros out call used registers. + virtual void emitZeroCallUsedRegs(BitVector RegsToZero, + MachineBasicBlock &MBB) const {} + /// With basic block sections, emit callee saved frame moves for basic blocks /// that are in a different section. virtual void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {} + /// Returns true if we may need to fix the unwind information for the + /// function. + virtual bool enableCFIFixup(MachineFunction &MF) const; + + /// Emit CFI instructions that recreate the state of the unwind information + /// upon fucntion entry. + virtual void resetCFIToInitialState(MachineBasicBlock &MBB) const {} + /// Replace a StackProbe stub (if any) with the actual probe code inline virtual void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {} diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 411811d08c18..f9183e0a9c66 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -382,6 +382,17 @@ public: /// to which instructions should be sunk. virtual bool shouldSink(const MachineInstr &MI) const { return true; } + /// Return false if the instruction should not be hoisted by MachineLICM. + /// + /// MachineLICM determines on its own whether the instruction is safe to + /// hoist; this gives the target a hook to extend this assessment and prevent + /// an instruction being hoisted from a given loop for target specific + /// reasons. + virtual bool shouldHoist(const MachineInstr &MI, + const MachineLoop *FromLoop) const { + return true; + } + /// Re-issue the specified 'original' instruction at the /// specific location targeting a new destination register. /// The register in Orig->getOperand(0).getReg() will be substituted by @@ -723,12 +734,16 @@ public: virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const = 0; /// Create a condition to determine if the trip count of the loop is greater - /// than TC. + /// than TC, where TC is always one more than for the previous prologue or + /// 0 if this is being called for the outermost prologue. /// /// If the trip count is statically known to be greater than TC, return /// true. If the trip count is statically known to be not greater than TC, /// return false. Otherwise return nullopt and fill out Cond with the test /// condition. + /// + /// Note: This hook is guaranteed to be called from the innermost to the + /// outermost prologue of the loop being software pipelined. virtual Optional createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, SmallVectorImpl &Cond) = 0; @@ -1268,13 +1283,6 @@ protected: } public: - /// getAddressSpaceForPseudoSourceKind - Given the kind of memory - /// (e.g. stack) the target returns the corresponding address space. - virtual unsigned - getAddressSpaceForPseudoSourceKind(unsigned Kind) const { - return 0; - } - /// unfoldMemoryOperand - Separate a single instruction which folded a load or /// a store or a load and a store into two or more instruction. If this is /// possible, returns true as well as the new instructions by reference. @@ -1942,7 +1950,7 @@ public: virtual MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - const outliner::Candidate &C) const { + outliner::Candidate &C) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::insertOutlinedCall!"); } diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 3861648a5feb..98b9a416ea59 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -25,7 +25,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLArrayExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DAGCombine.h" @@ -248,12 +248,21 @@ public: /// w.r.t. what they should expand to. enum class AtomicExpansionKind { None, // Don't expand the instruction. + CastToInteger, // Cast the atomic instruction to another type, e.g. from + // floating-point to integer type. LLSC, // Expand the instruction into loadlinked/storeconditional; used // by ARM/AArch64. LLOnly, // Expand the (load) instruction into just a load-linked, which has // greater atomic guarantees than a normal load. CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. - MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. + MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. + BitTestIntrinsic, // Use a target-specific intrinsic for special bit + // operations; used by X86. + Expand, // Generic expansion in terms of other atomic operations. + + // Rewrite to a non-atomic form for use in a known non-preemptible + // environment. + NotAtomic }; /// Enum that specifies when a multiplication should be expanded. @@ -1071,6 +1080,11 @@ public: return false; } + /// How to legalize this custom operation? + virtual LegalizeAction getCustomOperationAction(SDNode &Op) const { + return Legal; + } + /// Return how this operation should be treated: either it is legal, needs to /// be promoted to a larger size, needs to be expanded to some other code /// sequence, or the target has a custom expander for it. @@ -1210,6 +1224,10 @@ public: uint64_t Range, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const; + /// Returns preferred type for switch condition. + virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, + EVT ConditionVT) const; + /// Return true if lowering to a bit test is suitable for a set of case /// clusters which contains \p NumDests unique destinations, \p Low and /// \p High as its lowest and highest case values, and expects \p NumCmps @@ -1372,7 +1390,9 @@ public: // Returns true if VT is a legal index type for masked gathers/scatters // on this target - virtual bool shouldRemoveExtendFromGSIndex(EVT VT) const { return false; } + virtual bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const { + return false; + } /// Return how the condition code should be treated: either it is legal, needs /// to be expanded to some other code sequence, or the target has a custom @@ -1871,7 +1891,7 @@ public: /// minimum size the object must be to be aligned and PrefAlign is set to the /// preferred alignment. virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, - unsigned & /*PrefAlign*/) const { + Align & /*PrefAlign*/) const { return false; } @@ -1946,6 +1966,14 @@ public: llvm_unreachable("Masked atomicrmw expansion unimplemented on this target"); } + /// Perform a bit test atomicrmw using a target-specific intrinsic. This + /// represents the combined bit test intrinsic which will be lowered at a late + /// stage by the backend. + virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const { + llvm_unreachable( + "Bit test atomicrmw expansion unimplemented on this target"); + } + /// Perform a masked cmpxchg using a target-specific intrinsic. This /// represents the core LL/SC loop which will be lowered at a late stage by /// the backend. @@ -2005,12 +2033,6 @@ public: // be unnecessarily held, except if clrex, inserted by this hook, is executed. virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const {} - /// Returns true if the given (atomic) store should be expanded by the - /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. - virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const { - return false; - } - /// Returns true if arguments should be sign-extended in lib calls. virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { return IsSigned; @@ -2027,6 +2049,30 @@ public: return AtomicExpansionKind::None; } + /// Returns how the given (atomic) load should be cast by the IR-level + /// AtomicExpand pass. + virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const { + if (LI->getType()->isFloatingPointTy()) + return AtomicExpansionKind::CastToInteger; + return AtomicExpansionKind::None; + } + + /// Returns how the given (atomic) store should be expanded by the IR-level + /// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try + /// to use an atomicrmw xchg. + virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const { + return AtomicExpansionKind::None; + } + + /// Returns how the given (atomic) store should be cast by the IR-level + /// AtomicExpand pass into. For instance AtomicExpansionKind::CastToInteger + /// will try to cast the operands to integer values. + virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const { + if (SI->getValueOperand()->getType()->isFloatingPointTy()) + return AtomicExpansionKind::CastToInteger; + return AtomicExpansionKind::None; + } + /// Returns how the given atomic cmpxchg should be expanded by the IR-level /// AtomicExpand pass. virtual AtomicExpansionKind @@ -2041,6 +2087,18 @@ public: AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; } + /// Returns how the given atomic atomicrmw should be cast by the IR-level + /// AtomicExpand pass. + virtual AtomicExpansionKind + shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const { + if (RMWI->getOperation() == AtomicRMWInst::Xchg && + (RMWI->getValOperand()->getType()->isFloatingPointTy() || + RMWI->getValOperand()->getType()->isPointerTy())) + return AtomicExpansionKind::CastToInteger; + + return AtomicExpansionKind::None; + } + /// On some platforms, an AtomicRMW that never actually modifies the value /// (such as fetch_add of 0) can be turned into a fence followed by an /// atomic load. This may sound useless, but it makes it possible for the @@ -2123,8 +2181,8 @@ public: /// about some cases, a default true can be returned to let the DAGCombiner /// decide. /// AddNode is (add x, c1), and ConstNode is c2. - virtual bool isMulAddWithConstProfitable(const SDValue &AddNode, - const SDValue &ConstNode) const { + virtual bool isMulAddWithConstProfitable(SDValue AddNode, + SDValue ConstNode) const { return true; } @@ -2138,6 +2196,18 @@ public: return false; } + /// Return true if it is beneficial to expand an @llvm.powi.* intrinsic. + /// If not optimizing for size, expanding @llvm.powi.* intrinsics is always + /// considered beneficial. + /// If optimizing for size, expansion is only considered beneficial for upto + /// 5 multiplies and a divide (if the exponent is negative). + bool isBeneficialToExpandPowI(int Exponent, bool OptForSize) const { + if (Exponent < 0) + Exponent = -Exponent; + return !OptForSize || + (countPopulation((unsigned int)Exponent) + Log2_32(Exponent) < 7); + } + //===--------------------------------------------------------------------===// // TargetLowering Configuration Methods - These methods should be invoked by // the derived class constructor to configure this object for the target. @@ -2232,6 +2302,16 @@ protected: assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); OpActions[(unsigned)VT.SimpleTy][Op] = Action; } + void setOperationAction(ArrayRef Ops, MVT VT, + LegalizeAction Action) { + for (auto Op : Ops) + setOperationAction(Op, VT, Action); + } + void setOperationAction(ArrayRef Ops, ArrayRef VTs, + LegalizeAction Action) { + for (auto VT : VTs) + setOperationAction(Ops, VT, Action); + } /// Indicate that the specified load with extension does not work with the /// specified type and indicate what to do about it. @@ -2244,6 +2324,16 @@ protected: LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; } + void setLoadExtAction(ArrayRef ExtTypes, MVT ValVT, MVT MemVT, + LegalizeAction Action) { + for (auto ExtType : ExtTypes) + setLoadExtAction(ExtType, ValVT, MemVT, Action); + } + void setLoadExtAction(ArrayRef ExtTypes, MVT ValVT, + ArrayRef MemVTs, LegalizeAction Action) { + for (auto MemVT : MemVTs) + setLoadExtAction(ExtTypes, ValVT, MemVT, Action); + } /// Indicate that the specified truncating store does not work with the /// specified type and indicate what to do about it. @@ -2257,8 +2347,16 @@ protected: /// /// NOTE: All indexed mode loads are initialized to Expand in /// TargetLowering.cpp - void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { - setIndexedModeAction(IdxMode, VT, IMAB_Load, Action); + void setIndexedLoadAction(ArrayRef IdxModes, MVT VT, + LegalizeAction Action) { + for (auto IdxMode : IdxModes) + setIndexedModeAction(IdxMode, VT, IMAB_Load, Action); + } + + void setIndexedLoadAction(ArrayRef IdxModes, ArrayRef VTs, + LegalizeAction Action) { + for (auto VT : VTs) + setIndexedLoadAction(IdxModes, VT, Action); } /// Indicate that the specified indexed store does or does not work with the @@ -2266,8 +2364,16 @@ protected: /// /// NOTE: All indexed mode stores are initialized to Expand in /// TargetLowering.cpp - void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { - setIndexedModeAction(IdxMode, VT, IMAB_Store, Action); + void setIndexedStoreAction(ArrayRef IdxModes, MVT VT, + LegalizeAction Action) { + for (auto IdxMode : IdxModes) + setIndexedModeAction(IdxMode, VT, IMAB_Store, Action); + } + + void setIndexedStoreAction(ArrayRef IdxModes, ArrayRef VTs, + LegalizeAction Action) { + for (auto VT : VTs) + setIndexedStoreAction(IdxModes, VT, Action); } /// Indicate that the specified indexed masked load does or does not work with @@ -2292,17 +2398,24 @@ protected: /// Indicate that the specified condition code is or isn't supported on the /// target and indicate what to do about it. - void setCondCodeAction(ISD::CondCode CC, MVT VT, + void setCondCodeAction(ArrayRef CCs, MVT VT, LegalizeAction Action) { - assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && - "Table isn't big enough!"); - assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); - /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit - /// value and the upper 29 bits index into the second dimension of the array - /// to select what 32-bit value to use. - uint32_t Shift = 4 * (VT.SimpleTy & 0x7); - CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); - CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; + for (auto CC : CCs) { + assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && + "Table isn't big enough!"); + assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); + /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the + /// 32-bit value and the upper 29 bits index into the second dimension of + /// the array to select what 32-bit value to use. + uint32_t Shift = 4 * (VT.SimpleTy & 0x7); + CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); + CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; + } + } + void setCondCodeAction(ArrayRef CCs, ArrayRef VTs, + LegalizeAction Action) { + for (auto VT : VTs) + setCondCodeAction(CCs, VT, Action); } /// If Opc/OrigVT is specified as being promoted, the promotion code defaults @@ -2323,9 +2436,11 @@ protected: /// Targets should invoke this method for each target independent node that /// they want to provide a custom DAG combiner for by implementing the /// PerformDAGCombine virtual method. - void setTargetDAGCombine(ISD::NodeType NT) { - assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); - TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7); + void setTargetDAGCombine(ArrayRef NTs) { + for (auto NT : NTs) { + assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); + TargetDAGCombineArray[NT >> 3] |= 1 << (NT & 7); + } } /// Set the target's minimum function alignment. @@ -2510,6 +2625,10 @@ public: case ISD::FMAXNUM_IEEE: case ISD::FMINIMUM: case ISD::FMAXIMUM: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: + case ISD::AVGCEILS: + case ISD::AVGCEILU: return true; default: return false; } @@ -2653,6 +2772,10 @@ public: return false; } + /// Return true if this constant should be sign extended when promoting to + /// a larger type. + virtual bool signExtendConstant(const ConstantInt *C) const { return false; } + /// Return true if sinking I's operands to the same basic block as I is /// profitable, e.g. because the operands can be folded into a target /// instruction during instruction selection. After calling the function @@ -2851,6 +2974,14 @@ public: return false; } + /// Return true if pulling a binary operation into a select with an identity + /// constant is profitable. This is the inverse of an IR transform. + /// Example: X + (Cond ? Y : 0) --> Cond ? (X + Y) : X + virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, + EVT VT) const { + return false; + } + /// Return true if it is beneficial to convert a load of a constant to /// just the constant itself. /// On some targets it might be more efficient to use a combination of @@ -2940,6 +3071,10 @@ public: void setLibcallName(RTLIB::Libcall Call, const char *Name) { LibcallRoutineNames[Call] = Name; } + void setLibcallName(ArrayRef Calls, const char *Name) { + for (auto Call : Calls) + setLibcallName(Call, Name); + } /// Get the libcall routine name for the specified libcall. const char *getLibcallName(RTLIB::Libcall Call) const { @@ -3421,11 +3556,13 @@ public: /// Determines the optimal series of memory ops to replace the memset / memcpy. /// Return true if the number of memory ops is below the threshold (Limit). + /// Note that this is always the case when Limit is ~0. /// It returns the types of the sequence of memory ops to perform /// memset / memcpy by reference. - bool findOptimalMemOpLowering(std::vector &MemOps, unsigned Limit, - const MemOp &Op, unsigned DstAS, unsigned SrcAS, - const AttributeList &FuncAttributes) const; + virtual bool + findOptimalMemOpLowering(std::vector &MemOps, unsigned Limit, + const MemOp &Op, unsigned DstAS, unsigned SrcAS, + const AttributeList &FuncAttributes) const; /// Check to see if the specified operand of the specified instruction is a /// constant integer. If so, check to see if there are any bits set in the @@ -3534,9 +3671,16 @@ public: /// Helper wrapper around SimplifyDemandedVectorElts. /// Adds Op back to the worklist upon success. bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, - APInt &KnownUndef, APInt &KnownZero, DAGCombinerInfo &DCI) const; + /// Return true if the target supports simplifying demanded vector elements by + /// converting them to undefs. + virtual bool + shouldSimplifyDemandedVectorElts(SDValue Op, + const TargetLoweringOpt &TLO) const { + return true; + } + /// Determine which of the bits specified in Mask are known to be either zero /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts /// argument allows us to only collect the known bits that are shared by the @@ -3653,6 +3797,12 @@ public: APInt &UndefElts, unsigned Depth = 0) const; + /// Returns true if the given Opc is considered a canonical constant for the + /// target, which should not be transformed back into a BUILD_VECTOR. + virtual bool isTargetCanonicalConstantNode(SDValue Op) const { + return Op.getOpcode() == ISD::SPLAT_VECTOR; + } + struct DAGCombinerInfo { void *DC; // The DAG Combiner object. CombineLevel Level; @@ -3805,7 +3955,7 @@ public: if (Neg && Cost == NegatibleCost::Cheaper) return Neg; // Remove the new created node to avoid the side effect to the DAG. - if (Neg && Neg.getNode()->use_empty()) + if (Neg && Neg->use_empty()) DAG.RemoveDeadNode(Neg.getNode()); return SDValue(); } @@ -4270,6 +4420,7 @@ public: C_Register, // Constraint represents specific register(s). C_RegisterClass, // Constraint represents any of register(s) in class. C_Memory, // Memory constraint. + C_Address, // Address constraint. C_Immediate, // Requires an immediate. C_Other, // Something else. C_Unknown // Unsupported constraint. @@ -4374,6 +4525,8 @@ public: return InlineAsm::Constraint_o; if (ConstraintCode == "X") return InlineAsm::Constraint_X; + if (ConstraintCode == "p") + return InlineAsm::Constraint_p; return InlineAsm::Constraint_Unknown; } @@ -4410,6 +4563,14 @@ public: SelectionDAG &DAG, SmallVectorImpl &Created) const; + /// Targets may override this function to provide custom SREM lowering for + /// power-of-2 denominators. If the target returns an empty SDValue, LLVM + /// assumes SREM is expensive and replaces it with a series of other integer + /// operations. + virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl &Created) const; + /// Indicate whether this target prefers to combine FDIVs with the same /// divisor. If the transform should never be done, return zero. If the /// transform should be done, return the minimum number of divisor uses @@ -4442,6 +4603,13 @@ public: return SDValue(); } + /// Try to convert the fminnum/fmaxnum to a compare/select sequence. This is + /// required for correctness since InstCombine might have canonicalized a + /// fcmp+select sequence to a FMINNUM/FMAXNUM intrinsic. If we were to fall + /// through to the default expansion/soften to libcall, we might introduce a + /// link-time dependency on libm into a file that originally did not have one. + SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const; + /// Return a reciprocal estimate value for the input operand. /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or /// 'Enabled' as set by a potential default override attribute. @@ -4554,6 +4722,16 @@ public: /// \returns The expansion result SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const; + /// Expand check for floating point class. + /// \param ResultVT The type of intrinsic call result. + /// \param Op The tested value. + /// \param Test The test to perform. + /// \param Flags The optimization flags. + /// \returns The expansion result or SDValue() if it fails. + SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, unsigned Test, + SDNodeFlags Flags, const SDLoc &DL, + SelectionDAG &DAG) const; + /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, /// vector nodes can only succeed if all operations are legal/custom. /// \param N Node to expand @@ -4693,28 +4871,32 @@ public: /// method accepts vectors as its arguments. SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const; - /// Legalize a SETCC with given LHS and RHS and condition code CC on the - /// current target. + /// Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC + /// on the current target. A VP_SETCC will additionally be given a Mask + /// and/or EVL not equal to SDValue(). /// /// If the SETCC has been legalized using AND / OR, then the legalized node /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert - /// will be set to false. + /// will be set to false. This will also hold if the VP_SETCC has been + /// legalized using VP_AND / VP_OR. /// - /// If the SETCC has been legalized by using getSetCCSwappedOperands(), - /// then the values of LHS and RHS will be swapped, CC will be set to the - /// new condition, and NeedInvert will be set to false. + /// If the SETCC / VP_SETCC has been legalized by using + /// getSetCCSwappedOperands(), then the values of LHS and RHS will be + /// swapped, CC will be set to the new condition, and NeedInvert will be set + /// to false. /// - /// If the SETCC has been legalized using the inverse condcode, then LHS and - /// RHS will be unchanged, CC will set to the inverted condcode, and - /// NeedInvert will be set to true. The caller must invert the result of the - /// SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to swap - /// the effect of a true/false result. + /// If the SETCC / VP_SETCC has been legalized using the inverse condcode, + /// then LHS and RHS will be unchanged, CC will set to the inverted condcode, + /// and NeedInvert will be set to true. The caller must invert the result of + /// the SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to + /// swap the effect of a true/false result. /// - /// \returns true if the SetCC has been legalized, false if it hasn't. + /// \returns true if the SETCC / VP_SETCC has been legalized, false if it + /// hasn't. bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, - SDValue &RHS, SDValue &CC, bool &NeedInvert, - const SDLoc &dl, SDValue &Chain, - bool IsSignaling = false) const; + SDValue &RHS, SDValue &CC, SDValue Mask, + SDValue EVL, bool &NeedInvert, const SDLoc &dl, + SDValue &Chain, bool IsSignaling = false) const; //===--------------------------------------------------------------------===// // Instruction Emitting Hooks @@ -4766,10 +4948,6 @@ public: // combiner can fold the new nodes. SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; - /// Give targets the chance to reduce the number of distinct addresing modes. - ISD::MemIndexType getCanonicalIndexType(ISD::MemIndexType IndexType, - EVT MemVT, SDValue Offsets) const; - private: SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, DAGCombinerInfo &DCI) const; diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index 2c8b17807f7c..08267d70906a 100644 --- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/BinaryFormat/XCOFF.h" +#include "llvm/MC/MCExpr.h" #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { @@ -118,6 +119,9 @@ public: void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + MCSection *getStaticDtorSection(unsigned Priority, + const MCSymbol *KeySym) const override; + /// Emit the module flags that specify the garbage collection information. void emitModuleMetadata(MCStreamer &Streamer, Module &M) const override; @@ -282,6 +286,13 @@ public: MCSymbol *getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const override; + + /// For functions, this will return the LSDA section. If option + /// -ffunction-sections is on, this will return a unique csect with the + /// function name appended to .gcc_except_table as a suffix of the LSDA + /// section name. + MCSection *getSectionForLSDA(const Function &F, const MCSymbol &FnSym, + const TargetMachine &TM) const override; }; class TargetLoweringObjectFileGOFF : public TargetLoweringObjectFile { diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h index 9b13b61fc9de..8d7086d02c8a 100644 --- a/llvm/include/llvm/CodeGen/TargetPassConfig.h +++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h @@ -345,6 +345,9 @@ protected: // Helper to verify the analysis is really immutable. void setOpt(bool &Opt, bool Val); + /// Return true if register allocator is specified by -regalloc=override. + bool isCustomizedRegAlloc(); + /// Methods with trivial inline returns are convenient points in the common /// codegen pass pipeline where targets may insert passes. Methods with /// out-of-line standard implementations are major CodeGen stages called by diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index c3b842052ef5..04369a5bfe0d 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -29,7 +29,6 @@ #include "llvm/Support/Printable.h" #include #include -#include namespace llvm { @@ -56,6 +55,8 @@ public: const LaneBitmask LaneMask; /// Classes with a higher priority value are assigned first by register /// allocators using a greedy heuristic. The value is in the range [0,63]. + /// Values >= 32 should be used with care since they may overlap with other + /// fields in the allocator's priority heuristics. const uint8_t AllocationPriority; /// Configurable target specific flags. const uint8_t TSFlags; @@ -415,19 +416,11 @@ public: /// Returns true if the two registers are equal or alias each other. /// The registers may be virtual registers. - bool regsOverlap(Register regA, Register regB) const { - if (regA == regB) return true; - if (!regA.isPhysical() || !regB.isPhysical()) - return false; - - // Regunits are numerically ordered. Find a common unit. - MCRegUnitIterator RUA(regA.asMCReg(), this); - MCRegUnitIterator RUB(regB.asMCReg(), this); - do { - if (*RUA == *RUB) return true; - if (*RUA < *RUB) ++RUA; - else ++RUB; - } while (RUA.isValid() && RUB.isValid()); + bool regsOverlap(Register RegA, Register RegB) const { + if (RegA == RegB) + return true; + if (RegA.isPhysical() && RegB.isPhysical()) + return MCRegisterInfo::regsOverlap(RegA.asMCReg(), RegB.asMCReg()); return false; } @@ -567,6 +560,24 @@ public: virtual bool isCalleeSavedPhysReg(MCRegister PhysReg, const MachineFunction &MF) const; + /// Returns true if PhysReg can be used as an argument to a function. + virtual bool isArgumentRegister(const MachineFunction &MF, + MCRegister PhysReg) const { + return false; + } + + /// Returns true if PhysReg is a fixed register. + virtual bool isFixedRegister(const MachineFunction &MF, + MCRegister PhysReg) const { + return false; + } + + /// Returns true if PhysReg is a general purpose register. + virtual bool isGeneralPurposeRegister(const MachineFunction &MF, + MCRegister PhysReg) const { + return false; + } + /// Prior to adding the live-out mask to a stackmap or patchpoint /// instruction, provide the target the opportunity to adjust it (mainly to /// remove pseudo-registers that should be ignored). @@ -1067,6 +1078,14 @@ public: return false; } + /// When prioritizing live ranges in register allocation, if this hook returns + /// true then the AllocationPriority of the register class will be treated as + /// more important than whether the range is local to a basic block or global. + virtual bool + regClassPriorityTrumpsGlobalness(const MachineFunction &MF) const { + return false; + } + //===--------------------------------------------------------------------===// /// Debug information queries. diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h index 3fac2f688dd8..dbd678b75d05 100644 --- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -13,12 +13,10 @@ #ifndef LLVM_CODEGEN_TARGETSUBTARGETINFO_H #define LLVM_CODEGEN_TARGETSUBTARGETINFO_H -#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/PBQPRAConstraint.h" -#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/CodeGen.h" @@ -27,6 +25,9 @@ namespace llvm { +class APInt; +class MachineFunction; +class ScheduleDAGMutation; class CallLowering; class InlineAsmLowering; class InstrItineraryData; @@ -272,11 +273,6 @@ public: /// a finer grain to tune the register allocator. virtual bool enableRALocalReassignment(CodeGenOpt::Level OptLevel) const; - /// True if the subtarget should consider the cost of local intervals - /// created by a split candidate when choosing the best split candidate. This - /// heuristic may be compile time intensive. - virtual bool enableAdvancedRASplitCost() const; - /// Enable use of alias analysis during code generation (during MI /// scheduling, DAGCombine, etc.). virtual bool useAA() const; diff --git a/llvm/include/llvm/CodeGen/TileShapeInfo.h b/llvm/include/llvm/CodeGen/TileShapeInfo.h index 4e574bd96cca..1b5f902139fb 100644 --- a/llvm/include/llvm/CodeGen/TileShapeInfo.h +++ b/llvm/include/llvm/CodeGen/TileShapeInfo.h @@ -38,7 +38,7 @@ public: ShapeT() : Row(nullptr), Col(nullptr), RowImm(InvalidImmShape), ColImm(InvalidImmShape) {} - bool operator==(const ShapeT &Shape) { + bool operator==(const ShapeT &Shape) const { MachineOperand *R = Shape.Row; MachineOperand *C = Shape.Col; if (!R || !C) @@ -52,7 +52,7 @@ public: return false; } - bool operator!=(const ShapeT &Shape) { return !(*this == Shape); } + bool operator!=(const ShapeT &Shape) const { return !(*this == Shape); } MachineOperand *getRow() const { return Row; } diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h index 7b17b98d5c55..48d265476ca8 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.h +++ b/llvm/include/llvm/CodeGen/ValueTypes.h @@ -19,7 +19,6 @@ #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TypeSize.h" -#include "llvm/Support/WithColor.h" #include #include #include @@ -365,6 +364,12 @@ namespace llvm { return {(BaseSize.getKnownMinSize() + 7) / 8, BaseSize.isScalable()}; } + // Return the number of bytes overwritten by a store of this value type or + // this value type's element type in the case of a vector. + uint64_t getScalarStoreSize() const { + return getScalarType().getStoreSize().getFixedSize(); + } + /// Return the number of bits overwritten by a store of the specified value /// type. /// diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index 7f989e08e9bf..2194800b7ba9 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -20,204 +20,211 @@ class ValueType { def OtherVT : ValueType<0, 1>; // "Other" value def i1 : ValueType<1, 2>; // One bit boolean value -def i8 : ValueType<8, 3>; // 8-bit integer value -def i16 : ValueType<16, 4>; // 16-bit integer value -def i32 : ValueType<32, 5>; // 32-bit integer value -def i64 : ValueType<64, 6>; // 64-bit integer value -def i128 : ValueType<128, 7>; // 128-bit integer value - -def bf16 : ValueType<16, 8>; // 16-bit brain floating point value -def f16 : ValueType<16, 9>; // 16-bit floating point value -def f32 : ValueType<32, 10>; // 32-bit floating point value -def f64 : ValueType<64, 11>; // 64-bit floating point value -def f80 : ValueType<80, 12>; // 80-bit floating point value -def f128 : ValueType<128, 13>; // 128-bit floating point value -def ppcf128 : ValueType<128, 14>; // PPC 128-bit floating point value - -def v1i1 : ValueType<1, 15>; // 1 x i1 vector value -def v2i1 : ValueType<2, 16>; // 2 x i1 vector value -def v4i1 : ValueType<4, 17>; // 4 x i1 vector value -def v8i1 : ValueType<8, 18>; // 8 x i1 vector value -def v16i1 : ValueType<16, 19>; // 16 x i1 vector value -def v32i1 : ValueType<32, 20>; // 32 x i1 vector value -def v64i1 : ValueType<64, 21>; // 64 x i1 vector value -def v128i1 : ValueType<128, 22>; // 128 x i1 vector value -def v256i1 : ValueType<256, 23>; // 256 x i1 vector value -def v512i1 : ValueType<512, 24>; // 512 x i1 vector value -def v1024i1 : ValueType<1024, 25>; // 1024 x i1 vector value - -def v1i8 : ValueType<8, 26>; // 1 x i8 vector value -def v2i8 : ValueType<16, 27>; // 2 x i8 vector value -def v4i8 : ValueType<32, 28>; // 4 x i8 vector value -def v8i8 : ValueType<64, 29>; // 8 x i8 vector value -def v16i8 : ValueType<128, 30>; // 16 x i8 vector value -def v32i8 : ValueType<256, 31>; // 32 x i8 vector value -def v64i8 : ValueType<512, 32>; // 64 x i8 vector value -def v128i8 : ValueType<1024, 33>; // 128 x i8 vector value -def v256i8 : ValueType<2048, 34>; // 256 x i8 vector value -def v512i8 : ValueType<4096, 35>; // 512 x i8 vector value -def v1024i8 : ValueType<8192, 36>; // 1024 x i8 vector value - -def v1i16 : ValueType<16, 37>; // 1 x i16 vector value -def v2i16 : ValueType<32, 38>; // 2 x i16 vector value -def v3i16 : ValueType<48, 39>; // 3 x i16 vector value -def v4i16 : ValueType<64, 40>; // 4 x i16 vector value -def v8i16 : ValueType<128, 41>; // 8 x i16 vector value -def v16i16 : ValueType<256, 42>; // 16 x i16 vector value -def v32i16 : ValueType<512, 43>; // 32 x i16 vector value -def v64i16 : ValueType<1024, 44>; // 64 x i16 vector value -def v128i16 : ValueType<2048, 45>; // 128 x i16 vector value -def v256i16 : ValueType<4096, 46>; // 256 x i16 vector value -def v512i16 : ValueType<8192, 47>; // 512 x i16 vector value - -def v1i32 : ValueType<32, 48>; // 1 x i32 vector value -def v2i32 : ValueType<64, 49>; // 2 x i32 vector value -def v3i32 : ValueType<96, 50>; // 3 x i32 vector value -def v4i32 : ValueType<128, 51>; // 4 x i32 vector value -def v5i32 : ValueType<160, 52>; // 5 x i32 vector value -def v6i32 : ValueType<192, 53>; // 6 x f32 vector value -def v7i32 : ValueType<224, 54>; // 7 x f32 vector value -def v8i32 : ValueType<256, 55>; // 8 x i32 vector value -def v16i32 : ValueType<512, 56>; // 16 x i32 vector value -def v32i32 : ValueType<1024, 57>; // 32 x i32 vector value -def v64i32 : ValueType<2048, 58>; // 64 x i32 vector value -def v128i32 : ValueType<4096, 59>; // 128 x i32 vector value -def v256i32 : ValueType<8192, 60>; // 256 x i32 vector value -def v512i32 : ValueType<16384, 61>; // 512 x i32 vector value -def v1024i32 : ValueType<32768, 62>; // 1024 x i32 vector value -def v2048i32 : ValueType<65536, 63>; // 2048 x i32 vector value - -def v1i64 : ValueType<64, 64>; // 1 x i64 vector value -def v2i64 : ValueType<128, 65>; // 2 x i64 vector value -def v3i64 : ValueType<192, 66>; // 3 x i64 vector value -def v4i64 : ValueType<256, 67>; // 4 x i64 vector value -def v8i64 : ValueType<512, 68>; // 8 x i64 vector value -def v16i64 : ValueType<1024, 69>; // 16 x i64 vector value -def v32i64 : ValueType<2048, 70>; // 32 x i64 vector value -def v64i64 : ValueType<4096, 71>; // 64 x i64 vector value -def v128i64 : ValueType<8192, 72>; // 128 x i64 vector value -def v256i64 : ValueType<16384, 73>; // 256 x i64 vector value - -def v1i128 : ValueType<128, 74>; // 1 x i128 vector value - -def v1f16 : ValueType<16, 75>; // 1 x f16 vector value -def v2f16 : ValueType<32, 76>; // 2 x f16 vector value -def v3f16 : ValueType<48, 77>; // 3 x f16 vector value -def v4f16 : ValueType<64, 78>; // 4 x f16 vector value -def v8f16 : ValueType<128, 79>; // 8 x f16 vector value -def v16f16 : ValueType<256, 80>; // 16 x f16 vector value -def v32f16 : ValueType<512, 81>; // 32 x f16 vector value -def v64f16 : ValueType<1024, 82>; // 64 x f16 vector value -def v128f16 : ValueType<2048, 83>; // 128 x f16 vector value -def v256f16 : ValueType<4096, 84>; // 256 x f16 vector value -def v512f16 : ValueType<8192, 85>; // 512 x f16 vector value - -def v2bf16 : ValueType<32, 86>; // 2 x bf16 vector value -def v3bf16 : ValueType<48, 87>; // 3 x bf16 vector value -def v4bf16 : ValueType<64, 88>; // 4 x bf16 vector value -def v8bf16 : ValueType<128, 89>; // 8 x bf16 vector value -def v16bf16 : ValueType<256, 90>; // 16 x bf16 vector value -def v32bf16 : ValueType<512, 91>; // 32 x bf16 vector value -def v64bf16 : ValueType<1024, 92>; // 64 x bf16 vector value -def v128bf16 : ValueType<2048, 93>; // 128 x bf16 vector value - -def v1f32 : ValueType<32, 94>; // 1 x f32 vector value -def v2f32 : ValueType<64, 95>; // 2 x f32 vector value -def v3f32 : ValueType<96, 96>; // 3 x f32 vector value -def v4f32 : ValueType<128, 97>; // 4 x f32 vector value -def v5f32 : ValueType<160, 98>; // 5 x f32 vector value -def v6f32 : ValueType<192, 99>; // 6 x f32 vector value -def v7f32 : ValueType<224, 100>; // 7 x f32 vector value -def v8f32 : ValueType<256, 101>; // 8 x f32 vector value -def v16f32 : ValueType<512, 102>; // 16 x f32 vector value -def v32f32 : ValueType<1024, 103>; // 32 x f32 vector value -def v64f32 : ValueType<2048, 104>; // 64 x f32 vector value -def v128f32 : ValueType<4096, 105>; // 128 x f32 vector value -def v256f32 : ValueType<8192, 106>; // 256 x f32 vector value -def v512f32 : ValueType<16384, 107>; // 512 x f32 vector value -def v1024f32 : ValueType<32768, 108>; // 1024 x f32 vector value -def v2048f32 : ValueType<65536, 109>; // 2048 x f32 vector value - -def v1f64 : ValueType<64, 110>; // 1 x f64 vector value -def v2f64 : ValueType<128, 111>; // 2 x f64 vector value -def v3f64 : ValueType<192, 112>; // 3 x f64 vector value -def v4f64 : ValueType<256, 113>; // 4 x f64 vector value -def v8f64 : ValueType<512, 114>; // 8 x f64 vector value -def v16f64 : ValueType<1024, 115>; // 16 x f64 vector value -def v32f64 : ValueType<2048, 116>; // 32 x f64 vector value -def v64f64 : ValueType<4096, 117>; // 64 x f64 vector value -def v128f64 : ValueType<8192, 118>; // 128 x f64 vector value -def v256f64 : ValueType<16384, 119>; // 256 x f64 vector value - -def nxv1i1 : ValueType<1, 120>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 121>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 122>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 123>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 124>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 125>; // n x 32 x i1 vector value -def nxv64i1 : ValueType<64, 126>; // n x 64 x i1 vector value - -def nxv1i8 : ValueType<8, 127>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 128>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 129>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 130>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 131>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 132>; // n x 32 x i8 vector value -def nxv64i8 : ValueType<512, 133>; // n x 64 x i8 vector value - -def nxv1i16 : ValueType<16, 134>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 135>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 136>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 137>; // n x 8 x i16 vector value -def nxv16i16 : ValueType<256, 138>; // n x 16 x i16 vector value -def nxv32i16 : ValueType<512, 139>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 140>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 141>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 142>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 143>; // n x 8 x i32 vector value -def nxv16i32 : ValueType<512, 144>; // n x 16 x i32 vector value -def nxv32i32 : ValueType<1024, 145>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 146>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 147>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 148>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 149>; // n x 8 x i64 vector value -def nxv16i64 : ValueType<1024, 150>; // n x 16 x i64 vector value -def nxv32i64 : ValueType<2048, 151>; // n x 32 x i64 vector value - -def nxv1f16 : ValueType<16, 152>; // n x 1 x f16 vector value -def nxv2f16 : ValueType<32, 153>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64, 154>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 155>; // n x 8 x f16 vector value -def nxv16f16 : ValueType<256, 156>; // n x 16 x f16 vector value -def nxv32f16 : ValueType<512, 157>; // n x 32 x f16 vector value - -def nxv1bf16 : ValueType<16, 158>; // n x 1 x bf16 vector value -def nxv2bf16 : ValueType<32, 159>; // n x 2 x bf16 vector value -def nxv4bf16 : ValueType<64, 160>; // n x 4 x bf16 vector value -def nxv8bf16 : ValueType<128, 161>; // n x 8 x bf16 vector value - -def nxv1f32 : ValueType<32, 162>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64, 163>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 164>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 165>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 166>; // n x 16 x f32 vector value - -def nxv1f64 : ValueType<64, 167>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 168>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 169>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 170>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64, 171>; // X86 MMX value -def FlagVT : ValueType<0, 172>; // Pre-RA sched glue -def isVoid : ValueType<0, 173>; // Produces no value -def untyped : ValueType<8, 174>; // Produces an untyped value -def funcref : ValueType<0, 175>; // WebAssembly's funcref type -def externref : ValueType<0, 176>; // WebAssembly's externref type -def x86amx : ValueType<8192, 177>; // X86 AMX value -def i64x8 : ValueType<512, 178>; // 8 Consecutive GPRs (AArch64) - +def i2 : ValueType<2, 3>; // 2-bit integer value +def i4 : ValueType<4, 4>; // 4-bit integer value +def i8 : ValueType<8, 5>; // 8-bit integer value +def i16 : ValueType<16, 6>; // 16-bit integer value +def i32 : ValueType<32, 7>; // 32-bit integer value +def i64 : ValueType<64, 8>; // 64-bit integer value +def i128 : ValueType<128, 9>; // 128-bit integer value + +def bf16 : ValueType<16, 10>; // 16-bit brain floating point value +def f16 : ValueType<16, 11>; // 16-bit floating point value +def f32 : ValueType<32, 12>; // 32-bit floating point value +def f64 : ValueType<64, 13>; // 64-bit floating point value +def f80 : ValueType<80, 14>; // 80-bit floating point value +def f128 : ValueType<128, 15>; // 128-bit floating point value +def ppcf128 : ValueType<128, 16>; // PPC 128-bit floating point value + +def v1i1 : ValueType<1, 17>; // 1 x i1 vector value +def v2i1 : ValueType<2, 18>; // 2 x i1 vector value +def v4i1 : ValueType<4, 19>; // 4 x i1 vector value +def v8i1 : ValueType<8, 20>; // 8 x i1 vector value +def v16i1 : ValueType<16, 21>; // 16 x i1 vector value +def v32i1 : ValueType<32, 22>; // 32 x i1 vector value +def v64i1 : ValueType<64, 23>; // 64 x i1 vector value +def v128i1 : ValueType<128, 24>; // 128 x i1 vector value +def v256i1 : ValueType<256, 25>; // 256 x i1 vector value +def v512i1 : ValueType<512, 26>; // 512 x i1 vector value +def v1024i1 : ValueType<1024, 27>; // 1024 x i1 vector value + +def v128i2 : ValueType<256, 28>; // 128 x i2 vector value + +def v64i4 : ValueType<256, 29>; // 64 x i4 vector value + +def v1i8 : ValueType<8, 30>; // 1 x i8 vector value +def v2i8 : ValueType<16, 31>; // 2 x i8 vector value +def v4i8 : ValueType<32, 32>; // 4 x i8 vector value +def v8i8 : ValueType<64, 33>; // 8 x i8 vector value +def v16i8 : ValueType<128, 34>; // 16 x i8 vector value +def v32i8 : ValueType<256, 35>; // 32 x i8 vector value +def v64i8 : ValueType<512, 36>; // 64 x i8 vector value +def v128i8 : ValueType<1024, 37>; // 128 x i8 vector value +def v256i8 : ValueType<2048, 38>; // 256 x i8 vector value +def v512i8 : ValueType<4096, 39>; // 512 x i8 vector value +def v1024i8 : ValueType<8192, 40>; // 1024 x i8 vector value + +def v1i16 : ValueType<16, 41>; // 1 x i16 vector value +def v2i16 : ValueType<32, 42>; // 2 x i16 vector value +def v3i16 : ValueType<48, 43>; // 3 x i16 vector value +def v4i16 : ValueType<64, 44>; // 4 x i16 vector value +def v8i16 : ValueType<128, 45>; // 8 x i16 vector value +def v16i16 : ValueType<256, 46>; // 16 x i16 vector value +def v32i16 : ValueType<512, 47>; // 32 x i16 vector value +def v64i16 : ValueType<1024, 48>; // 64 x i16 vector value +def v128i16 : ValueType<2048, 49>; // 128 x i16 vector value +def v256i16 : ValueType<4096, 50>; // 256 x i16 vector value +def v512i16 : ValueType<8192, 51>; // 512 x i16 vector value + +def v1i32 : ValueType<32, 52>; // 1 x i32 vector value +def v2i32 : ValueType<64, 53>; // 2 x i32 vector value +def v3i32 : ValueType<96, 54>; // 3 x i32 vector value +def v4i32 : ValueType<128, 55>; // 4 x i32 vector value +def v5i32 : ValueType<160, 56>; // 5 x i32 vector value +def v6i32 : ValueType<192, 57>; // 6 x f32 vector value +def v7i32 : ValueType<224, 58>; // 7 x f32 vector value +def v8i32 : ValueType<256, 59>; // 8 x i32 vector value +def v16i32 : ValueType<512, 60>; // 16 x i32 vector value +def v32i32 : ValueType<1024, 61>; // 32 x i32 vector value +def v64i32 : ValueType<2048, 62>; // 64 x i32 vector value +def v128i32 : ValueType<4096, 63>; // 128 x i32 vector value +def v256i32 : ValueType<8192, 64>; // 256 x i32 vector value +def v512i32 : ValueType<16384, 65>; // 512 x i32 vector value +def v1024i32 : ValueType<32768, 66>; // 1024 x i32 vector value +def v2048i32 : ValueType<65536, 67>; // 2048 x i32 vector value + +def v1i64 : ValueType<64, 68>; // 1 x i64 vector value +def v2i64 : ValueType<128, 69>; // 2 x i64 vector value +def v3i64 : ValueType<192, 70>; // 3 x i64 vector value +def v4i64 : ValueType<256, 71>; // 4 x i64 vector value +def v8i64 : ValueType<512, 72>; // 8 x i64 vector value +def v16i64 : ValueType<1024, 73>; // 16 x i64 vector value +def v32i64 : ValueType<2048, 74>; // 32 x i64 vector value +def v64i64 : ValueType<4096, 75>; // 64 x i64 vector value +def v128i64 : ValueType<8192, 76>; // 128 x i64 vector value +def v256i64 : ValueType<16384, 77>; // 256 x i64 vector value + +def v1i128 : ValueType<128, 78>; // 1 x i128 vector value + +def v1f16 : ValueType<16, 79>; // 1 x f16 vector value +def v2f16 : ValueType<32, 80>; // 2 x f16 vector value +def v3f16 : ValueType<48, 81>; // 3 x f16 vector value +def v4f16 : ValueType<64, 82>; // 4 x f16 vector value +def v8f16 : ValueType<128, 83>; // 8 x f16 vector value +def v16f16 : ValueType<256, 84>; // 16 x f16 vector value +def v32f16 : ValueType<512, 85>; // 32 x f16 vector value +def v64f16 : ValueType<1024, 86>; // 64 x f16 vector value +def v128f16 : ValueType<2048, 87>; // 128 x f16 vector value +def v256f16 : ValueType<4096, 88>; // 256 x f16 vector value +def v512f16 : ValueType<8192, 89>; // 512 x f16 vector value + +def v2bf16 : ValueType<32, 90>; // 2 x bf16 vector value +def v3bf16 : ValueType<48, 91>; // 3 x bf16 vector value +def v4bf16 : ValueType<64, 92>; // 4 x bf16 vector value +def v8bf16 : ValueType<128, 93>; // 8 x bf16 vector value +def v16bf16 : ValueType<256, 94>; // 16 x bf16 vector value +def v32bf16 : ValueType<512, 95>; // 32 x bf16 vector value +def v64bf16 : ValueType<1024, 96>; // 64 x bf16 vector value +def v128bf16 : ValueType<2048, 97>; // 128 x bf16 vector value + +def v1f32 : ValueType<32, 98>; // 1 x f32 vector value +def v2f32 : ValueType<64, 99>; // 2 x f32 vector value +def v3f32 : ValueType<96, 100>; // 3 x f32 vector value +def v4f32 : ValueType<128, 101>; // 4 x f32 vector value +def v5f32 : ValueType<160, 102>; // 5 x f32 vector value +def v6f32 : ValueType<192, 103>; // 6 x f32 vector value +def v7f32 : ValueType<224, 104>; // 7 x f32 vector value +def v8f32 : ValueType<256, 105>; // 8 x f32 vector value +def v16f32 : ValueType<512, 106>; // 16 x f32 vector value +def v32f32 : ValueType<1024, 107>; // 32 x f32 vector value +def v64f32 : ValueType<2048, 108>; // 64 x f32 vector value +def v128f32 : ValueType<4096, 109>; // 128 x f32 vector value +def v256f32 : ValueType<8192, 110>; // 256 x f32 vector value +def v512f32 : ValueType<16384, 111>; // 512 x f32 vector value +def v1024f32 : ValueType<32768, 112>; // 1024 x f32 vector value +def v2048f32 : ValueType<65536, 113>; // 2048 x f32 vector value + +def v1f64 : ValueType<64, 114>; // 1 x f64 vector value +def v2f64 : ValueType<128, 115>; // 2 x f64 vector value +def v3f64 : ValueType<192, 116>; // 3 x f64 vector value +def v4f64 : ValueType<256, 117>; // 4 x f64 vector value +def v8f64 : ValueType<512, 118>; // 8 x f64 vector value +def v16f64 : ValueType<1024, 119>; // 16 x f64 vector value +def v32f64 : ValueType<2048, 120>; // 32 x f64 vector value +def v64f64 : ValueType<4096, 121>; // 64 x f64 vector value +def v128f64 : ValueType<8192, 122>; // 128 x f64 vector value +def v256f64 : ValueType<16384, 123>; // 256 x f64 vector value + +def nxv1i1 : ValueType<1, 124>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 125>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 126>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 127>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 128>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 129>; // n x 32 x i1 vector value +def nxv64i1 : ValueType<64, 130>; // n x 64 x i1 vector value + +def nxv1i8 : ValueType<8, 131>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 132>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 133>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 134>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 135>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 136>; // n x 32 x i8 vector value +def nxv64i8 : ValueType<512, 137>; // n x 64 x i8 vector value + +def nxv1i16 : ValueType<16, 138>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 139>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 140>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 141>; // n x 8 x i16 vector value +def nxv16i16 : ValueType<256, 142>; // n x 16 x i16 vector value +def nxv32i16 : ValueType<512, 143>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 144>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 145>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 146>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 147>; // n x 8 x i32 vector value +def nxv16i32 : ValueType<512, 148>; // n x 16 x i32 vector value +def nxv32i32 : ValueType<1024, 149>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 150>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 151>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 152>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 153>; // n x 8 x i64 vector value +def nxv16i64 : ValueType<1024, 154>; // n x 16 x i64 vector value +def nxv32i64 : ValueType<2048, 155>; // n x 32 x i64 vector value + +def nxv1f16 : ValueType<16, 156>; // n x 1 x f16 vector value +def nxv2f16 : ValueType<32, 157>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64, 158>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 159>; // n x 8 x f16 vector value +def nxv16f16 : ValueType<256, 160>; // n x 16 x f16 vector value +def nxv32f16 : ValueType<512, 161>; // n x 32 x f16 vector value + +def nxv1bf16 : ValueType<16, 162>; // n x 1 x bf16 vector value +def nxv2bf16 : ValueType<32, 163>; // n x 2 x bf16 vector value +def nxv4bf16 : ValueType<64, 164>; // n x 4 x bf16 vector value +def nxv8bf16 : ValueType<128, 165>; // n x 8 x bf16 vector value +def nxv16bf16 : ValueType<256, 166>; // n x 16 x bf16 vector value +def nxv32bf16 : ValueType<512, 167>; // n x 32 x bf16 vector value + +def nxv1f32 : ValueType<32, 168>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64, 169>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 170>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 171>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 172>; // n x 16 x f32 vector value + +def nxv1f64 : ValueType<64, 173>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 174>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 175>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 176>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64, 177>; // X86 MMX value +def FlagVT : ValueType<0, 178>; // Pre-RA sched glue +def isVoid : ValueType<0, 179>; // Produces no value +def untyped : ValueType<8, 180>; // Produces an untyped value +def funcref : ValueType<0, 181>; // WebAssembly's funcref type +def externref : ValueType<0, 182>; // WebAssembly's externref type +def x86amx : ValueType<8192, 183>; // X86 AMX value +def i64x8 : ValueType<512, 184>; // 8 Consecutive GPRs (AArch64) def token : ValueType<0, 248>; // TokenTy def MetadataVT : ValueType<0, 249>; // Metadata diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h index 4f1c666df35f..0b2e033bd97a 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h @@ -11,18 +11,26 @@ #include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/NonRelocatableStringpool.h" -#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h" -#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/MC/MCDwarf.h" +#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" +#include "llvm/DebugInfo/DWARF/DWARFDie.h" #include namespace llvm { +class DWARFContext; +class DWARFExpression; +class DWARFUnit; +class DataExtractor; +class DeclContextTree; +struct MCDwarfLineTableParams; +template class SmallVectorImpl; enum class DwarfLinkerClient { Dsymutil, LLD, General }; /// The kind of accelerator tables we should emit. -enum class AccelTableKind { +enum class DwarfLinkerAccelTableKind : uint8_t { + None, Apple, ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc. Dwarf, ///< DWARF v5 .debug_names. Default, ///< Dwarf for DWARF5 or later, Apple otherwise. @@ -56,28 +64,21 @@ class AddressesMap { public: virtual ~AddressesMap(); - /// Returns true if represented addresses are from linked file. - /// Returns false if represented addresses are from not-linked - /// object file. - virtual bool areRelocationsResolved() const = 0; - /// Checks that there are valid relocations against a .debug_info /// section. virtual bool hasValidRelocs() = 0; - /// Checks that the specified DIE has a DW_AT_Location attribute - /// that references into a live code section. - /// + /// Checks that the specified variable \p DIE references live code section. + /// Allowed kind of input die: DW_TAG_variable, DW_TAG_constant. /// \returns true and sets Info.InDebugMap if it is the case. - virtual bool hasLiveMemoryLocation(const DWARFDie &DIE, - CompileUnit::DIEInfo &Info) = 0; + virtual bool isLiveVariable(const DWARFDie &DIE, + CompileUnit::DIEInfo &Info) = 0; - /// Checks that the specified DIE has a DW_AT_Low_pc attribute - /// that references into a live code section. - /// + /// Checks that the specified subprogram \p DIE references live code section. + /// Allowed kind of input die: DW_TAG_subprogram, DW_TAG_label. /// \returns true and sets Info.InDebugMap if it is the case. - virtual bool hasLiveAddressRange(const DWARFDie &DIE, - CompileUnit::DIEInfo &Info) = 0; + virtual bool isLiveSubprogram(const DWARFDie &DIE, + CompileUnit::DIEInfo &Info) = 0; /// Apply the valid relocations to the buffer \p Data, taking into /// account that Data is at \p BaseOffset in the .debug_info section. @@ -272,6 +273,9 @@ public: /// Print statistics to standard output. void setStatistics(bool Statistics) { Options.Statistics = Statistics; } + /// Verify the input DWARF. + void setVerifyInputDWARF(bool Verify) { Options.VerifyInputDWARF = Verify; } + /// Do not emit linked dwarf info. void setNoOutput(bool NoOut) { Options.NoOutput = NoOut; } @@ -290,7 +294,7 @@ public: void setNumThreads(unsigned NumThreads) { Options.Threads = NumThreads; } /// Set kind of accelerator tables to be generated. - void setAccelTableKind(AccelTableKind Kind) { + void setAccelTableKind(DwarfLinkerAccelTableKind Kind) { Options.TheAccelTableKind = Kind; } @@ -361,6 +365,8 @@ private: /// Given a DIE, update its incompleteness based on whether the DIEs it /// references are incomplete. UpdateRefIncompleteness, + /// Given a DIE, mark it as ODR Canonical if applicable. + MarkODRCanonicalDie, }; /// This class represents an item in the work list. The type defines what kind @@ -389,6 +395,9 @@ private: AncestorIdx(AncestorIdx) {} }; + /// Verify the given DWARF file. + bool verify(const DWARFFile &File); + /// returns true if we need to translate strings. bool needToTranslateStrings() { return StringsTranslator != nullptr; } @@ -457,6 +466,10 @@ private: const DWARFFile &File, SmallVectorImpl &Worklist); + /// Mark context corresponding to the specified \p Die as having canonical + /// die, if applicable. + void markODRCanonicalDie(const DWARFDie &Die, CompileUnit &CU); + /// \defgroup FindRootDIEs Find DIEs corresponding to Address map entries. /// /// @{ @@ -778,6 +791,9 @@ private: /// Print statistics. bool Statistics = false; + /// Verify the input DWARF. + bool VerifyInputDWARF = false; + /// Skip emitting output bool NoOutput = false; @@ -795,7 +811,8 @@ private: unsigned Threads = 1; /// The accelerator table kind - AccelTableKind TheAccelTableKind = AccelTableKind::Default; + DwarfLinkerAccelTableKind TheAccelTableKind = + DwarfLinkerAccelTableKind::Default; /// Prepend path for the clang modules. std::string PrependPath; diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h index afba19ac7d42..788275782235 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h @@ -9,10 +9,10 @@ #ifndef LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H #define LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/CodeGen/DIE.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" -#include "llvm/Support/DataExtractor.h" namespace llvm { @@ -74,6 +74,12 @@ public: /// Does DIE transitively refer an incomplete decl? bool Incomplete : 1; + + /// Is DIE in the clang module scope? + bool InModuleScope : 1; + + /// Is ODR marking done? + bool ODRMarkingDone : 1; }; CompileUnit(DWARFUnit &OrigUnit, unsigned ID, bool CanUseODR, diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h index d2274488e85f..fb02b0fc1b4d 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h @@ -14,14 +14,15 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/NonRelocatableStringpool.h" -#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" +#include namespace llvm { +class CompileUnit; struct DeclMapInfo; /// Small helper that resolves and caches file paths. This helps reduce the @@ -91,6 +92,10 @@ public: bool setLastSeenDIE(CompileUnit &U, const DWARFDie &Die); + void setHasCanonicalDIE() { HasCanonicalDIE = true; } + + bool hasCanonicalDIE() const { return HasCanonicalDIE; } + uint32_t getCanonicalDIEOffset() const { return CanonicalDIEOffset; } void setCanonicalDIEOffset(uint32_t Offset) { CanonicalDIEOffset = Offset; } @@ -112,7 +117,8 @@ private: const DeclContext &Parent; DWARFDie LastSeenDIE; uint32_t LastSeenCompileUnitID = 0; - uint32_t CanonicalDIEOffset = 0; + std::atomic CanonicalDIEOffset = {0}; + bool HasCanonicalDIE = false; }; /// This class gives a tree-like API to the DenseMap that stores the diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h index fc8c59904cfb..003fe548252a 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h +++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h @@ -10,7 +10,6 @@ #define LLVM_DWARFLINKER_DWARFSTREAMER_H #include "llvm/BinaryFormat/Swift.h" -#include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/DWARFLinker/DWARFLinker.h" #include "llvm/MC/MCAsmInfo.h" @@ -18,9 +17,11 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Target/TargetMachine.h" namespace llvm { +template class AccelTable; enum class OutputFileType { Object, diff --git a/llvm/include/llvm/DWP/DWPStringPool.h b/llvm/include/llvm/DWP/DWPStringPool.h index 9f69851f0055..1354b46f156b 100644 --- a/llvm/include/llvm/DWP/DWPStringPool.h +++ b/llvm/include/llvm/DWP/DWPStringPool.h @@ -43,7 +43,7 @@ public: auto Pair = Pool.insert(std::make_pair(Str, Offset)); if (Pair.second) { - Out.SwitchSection(Sec); + Out.switchSection(Sec); Out.emitBytes(StringRef(Str, Length)); Offset += Length; } diff --git a/llvm/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h index 5a91682e9bd4..d474173973b5 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h +++ b/llvm/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h @@ -11,7 +11,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" diff --git a/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h b/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h index 82ef8c173bee..ef44b622d955 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h +++ b/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h @@ -10,7 +10,7 @@ #define LLVM_DEBUGINFO_CODEVIEW_CVSYMBOLVISITOR_H #include "llvm/DebugInfo/CodeView/CVRecord.h" -#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Error.h" namespace llvm { namespace codeview { @@ -18,12 +18,20 @@ class SymbolVisitorCallbacks; class CVSymbolVisitor { public: + struct FilterOptions { + llvm::Optional SymbolOffset; + llvm::Optional ParentRecursiveDepth; + llvm::Optional ChildRecursiveDepth; + }; + CVSymbolVisitor(SymbolVisitorCallbacks &Callbacks); Error visitSymbolRecord(CVSymbol &Record); Error visitSymbolRecord(CVSymbol &Record, uint32_t Offset); Error visitSymbolStream(const CVSymbolArray &Symbols); Error visitSymbolStream(const CVSymbolArray &Symbols, uint32_t InitialOffset); + Error visitSymbolStreamFiltered(const CVSymbolArray &Symbols, + const FilterOptions &Filter); private: SymbolVisitorCallbacks &Callbacks; diff --git a/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h b/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h index 7538cb2c2548..7780e233cab3 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h +++ b/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h @@ -9,14 +9,17 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_CVTYPEVISITOR_H #define LLVM_DEBUGINFO_CODEVIEW_CVTYPEVISITOR_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/DebugInfo/CodeView/CVRecord.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/Support/Error.h" namespace llvm { namespace codeview { +class TypeIndex; class TypeCollection; class TypeVisitorCallbacks; +struct CVMemberRecord; enum VisitorDataSource { VDS_BytesPresent, // The record bytes are passed into the visitation diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h index d851dea0a27f..4fbe7e835a8a 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h +++ b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h @@ -9,14 +9,11 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_CODEVIEWRECORDIO_H #define LLVM_DEBUGINFO_CODEVIEW_CODEVIEWRECORDIO_H -#include "llvm/ADT/APSInt.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" -#include "llvm/DebugInfo/CodeView/GUID.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Error.h" @@ -26,7 +23,12 @@ namespace llvm { +template class ArrayRef; +class APSInt; + namespace codeview { +class TypeIndex; +struct GUID; class CodeViewRecordStreamer { public: @@ -246,7 +248,7 @@ private: Optional MaxLength; Optional bytesRemaining(uint32_t CurrentOffset) const { - if (!MaxLength.hasValue()) + if (!MaxLength) return None; assert(CurrentOffset >= BeginOffset); diff --git a/llvm/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h index 0e2f5d90e243..0f83ae370a1e 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h +++ b/llvm/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h @@ -12,22 +12,16 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/DebugInfo/CodeView/RecordSerialization.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/Error.h" -#include #include -#include #include namespace llvm { namespace codeview { +class TypeIndex; enum class ContinuationRecordKind { FieldList, MethodOverloadList }; class ContinuationRecordBuilder { diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h index 01f83676afdf..615fd216e655 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h @@ -16,7 +16,6 @@ #include "llvm/DebugInfo/CodeView/DebugSubsection.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Error.h" #include @@ -24,6 +23,9 @@ namespace llvm { +class BinaryStreamReader; +class BinaryStreamWriter; + namespace codeview { class DebugStringTableSubsection; diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h index 64a78a7cef21..e21873a3af8f 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h @@ -12,13 +12,14 @@ #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/DebugSubsection.h" #include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Error.h" #include #include namespace llvm { +class BinaryStreamReader; +class BinaryStreamWriter; namespace codeview { class DebugCrossModuleExportsSubsectionRef final : public DebugSubsectionRef { diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h index e7683cb2a9c4..198ce4a8b4e4 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h @@ -14,7 +14,6 @@ #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/DebugSubsection.h" #include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" @@ -22,6 +21,8 @@ #include namespace llvm { +class BinaryStreamReader; +class BinaryStreamWriter; namespace codeview { diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h index d5cd640231f9..f2c5bf9d7c95 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h @@ -11,11 +11,15 @@ #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/DebugSubsection.h" -#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" namespace llvm { +class BinaryStreamReader; +class BinaryStreamWriter; + namespace codeview { class DebugFrameDataSubsectionRef final : public DebugSubsectionRef { public: diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h index 9fd88a64873a..f9d1507af5f3 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h @@ -12,7 +12,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/DebugSubsection.h" -#include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamReader.h" diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h index 1f8e56c5311f..68eb9e1af3bd 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h @@ -14,7 +14,6 @@ #include "llvm/DebugInfo/CodeView/DebugSubsection.h" #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" @@ -22,6 +21,8 @@ #include namespace llvm { +class BinaryStreamReader; +class BinaryStreamWriter; namespace codeview { class DebugChecksumsSubsection; diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugSubsection.h index 2e1cd15a3956..39413bb73b58 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/DebugSubsection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/DebugSubsection.h @@ -10,10 +10,12 @@ #define LLVM_DEBUGINFO_CODEVIEW_DEBUGSUBSECTION_H #include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" + +#include namespace llvm { +class BinaryStreamWriter; namespace codeview { class DebugSubsectionRef { diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h b/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h index 151930d6d43d..fdca2ad063a1 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h +++ b/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h @@ -9,7 +9,6 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSUBSECTIONVISITOR_H #define LLVM_DEBUGINFO_CODEVIEW_DEBUGSUBSECTIONVISITOR_H -#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" #include "llvm/Support/Error.h" diff --git a/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h b/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h index 270cd4b8330c..ec874b7ca114 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h +++ b/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h @@ -12,10 +12,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/Support/ScopedPrinter.h" #include namespace llvm { +template struct EnumEntry; namespace codeview { ArrayRef> getSymbolTypeNames(); diff --git a/llvm/include/llvm/DebugInfo/CodeView/Formatters.h b/llvm/include/llvm/DebugInfo/CodeView/Formatters.h index 7d04a6a89bef..10683c289224 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/Formatters.h +++ b/llvm/include/llvm/DebugInfo/CodeView/Formatters.h @@ -22,6 +22,8 @@ namespace llvm { namespace codeview { +struct GUID; + namespace detail { class GuidAdapter final : public FormatAdapter> { diff --git a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h index 465c26ec2ce6..d592bde18bae 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h +++ b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h @@ -10,9 +10,9 @@ #define LLVM_DEBUGINFO_CODEVIEW_GLOBALTYPETABLEBUILDER_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeHashing.h" diff --git a/llvm/include/llvm/DebugInfo/CodeView/Line.h b/llvm/include/llvm/DebugInfo/CodeView/Line.h index eb2aa154df1b..6918645b94d2 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/Line.h +++ b/llvm/include/llvm/DebugInfo/CodeView/Line.h @@ -9,7 +9,6 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_LINE_H #define LLVM_DEBUGINFO_CODEVIEW_LINE_H -#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/Endian.h" #include diff --git a/llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h index 0f9d5e476075..1965aab9b5cc 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h +++ b/llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h @@ -10,18 +10,18 @@ #define LLVM_DEBUGINFO_CODEVIEW_MERGINGTYPETABLEBUILDER_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" -#include "llvm/DebugInfo/CodeView/TypeHashing.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/Allocator.h" #include namespace llvm { namespace codeview { +struct LocallyHashedType; class ContinuationRecordBuilder; diff --git a/llvm/include/llvm/DebugInfo/CodeView/RecordName.h b/llvm/include/llvm/DebugInfo/CodeView/RecordName.h index 8e06be9e41e8..9078ed38d2f1 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/RecordName.h +++ b/llvm/include/llvm/DebugInfo/CodeView/RecordName.h @@ -9,11 +9,14 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_RECORDNAME_H #define LLVM_DEBUGINFO_CODEVIEW_RECORDNAME_H -#include "llvm/DebugInfo/CodeView/TypeCollection.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include namespace llvm { namespace codeview { +class TypeCollection; +class TypeIndex; std::string computeTypeName(TypeCollection &Types, TypeIndex Index); StringRef getSymbolName(CVSymbol Sym); } // namespace codeview diff --git a/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h b/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h index 36c0f2fbd8fa..10248dbf646b 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h +++ b/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h @@ -9,7 +9,6 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_RECORDSERIALIZATION_H #define LLVM_DEBUGINFO_CODEVIEW_RECORDSERIALIZATION_H -#include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/CodeView/CodeView.h" @@ -18,9 +17,9 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include -#include namespace llvm { +class APSInt; namespace codeview { using llvm::support::little32_t; using llvm::support::ulittle16_t; diff --git a/llvm/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h b/llvm/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h index 22a283e785e1..50e745e5c2ab 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h +++ b/llvm/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h @@ -10,13 +10,15 @@ #define LLVM_DEBUGINFO_CODEVIEW_STRINGSANDCHECKSUMS_H #include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" -#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include namespace llvm { namespace codeview { +class DebugChecksumsSubsection; +class DebugChecksumsSubsectionRef; +class DebugStringTableSubsection; +class DebugStringTableSubsectionRef; class StringsAndChecksumsRef { public: diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h index aaeffb2446ad..c674700fac59 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h +++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h @@ -9,11 +9,13 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLDUMPER_H #define LLVM_DEBUGINFO_CODEVIEW_SYMBOLDUMPER_H -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/Support/Error.h" + +#include +#include namespace llvm { class ScopedPrinter; diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h index c37f6b4d5fa7..9513e19a330a 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -196,7 +196,7 @@ struct BinaryAnnotationIterator const DecodedAnnotation &operator*() { ParseCurrentAnnotation(); - return Current.getValue(); + return *Current; } private: @@ -249,7 +249,7 @@ private: } bool ParseCurrentAnnotation() { - if (Current.hasValue()) + if (Current) return true; Next = Data; diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h index fb806c692cfd..53986f9a6db6 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h +++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h @@ -10,15 +10,17 @@ #define LLVM_DEBUGINFO_CODEVIEW_SYMBOLSERIALIZER_H #include "llvm/ADT/Optional.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/RecordSerialization.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h" #include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" +#include #include namespace llvm { diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h b/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h index bde5a8b3ab2f..f643bc4d7451 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h @@ -34,7 +34,7 @@ public: template void ForEachRecord(TFunc Func) { Optional Next = getFirst(); - while (Next.hasValue()) { + while (Next) { TypeIndex N = *Next; Func(N, getType(N)); Next = getNext(N); diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h b/llvm/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h index 41a219ae5a7b..1fad50343e3a 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h @@ -9,16 +9,18 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEDUMPVISITOR_H #define LLVM_DEBUGINFO_CODEVIEW_TYPEDUMPVISITOR_H -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" namespace llvm { class ScopedPrinter; namespace codeview { +class TypeIndex; +struct CVMemberRecord; +struct MemberAttributes; class TypeCollection; diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h index 9f34d026b1ba..f49bc9b8e790 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h @@ -9,10 +9,11 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H #define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H -#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" @@ -21,6 +22,7 @@ #include namespace llvm { +class raw_ostream; namespace codeview { /// A locally hashed type represents a straightforward hash code of a serialized diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h index 226a436c0930..653eafa04e0a 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h @@ -13,7 +13,6 @@ #include "llvm/Support/Endian.h" #include #include -#include namespace llvm { @@ -36,6 +35,7 @@ enum class SimpleTypeKind : uint32_t { WideCharacter = 0x0071, // wide char Character16 = 0x007a, // char16_t Character32 = 0x007b, // char32_t + Character8 = 0x007c, // char8_t SByte = 0x0068, // 8 bit signed int Byte = 0x0069, // 8 bit unsigned int diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h index f4f5835d8b57..7ef8521604fb 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h @@ -9,13 +9,13 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEINDEXDISCOVERY_H #define LLVM_DEBUGINFO_CODEVIEW_TYPEINDEXDISCOVERY_H -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/DebugInfo/CodeView/CVRecord.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/Support/Error.h" namespace llvm { +template class SmallVectorImpl; namespace codeview { +class TypeIndex; enum class TiRefKind { TypeRef, IndexRef }; struct TiReference { TiRefKind Kind; diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h b/llvm/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h index c6044d5138a8..ed4fc7a75624 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h @@ -10,7 +10,8 @@ #define LLVM_DEBUGINFO_CODEVIEW_TYPERECORDMAPPING_H #include "llvm/ADT/Optional.h" -#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" #include "llvm/Support/Error.h" @@ -20,6 +21,8 @@ class BinaryStreamReader; class BinaryStreamWriter; namespace codeview { +class TypeIndex; +struct CVMemberRecord; class TypeRecordMapping : public TypeVisitorCallbacks { public: explicit TypeRecordMapping(BinaryStreamReader &Reader) : IO(Reader) {} diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h index 04d7c7b0420a..04a1e44dd809 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h @@ -10,11 +10,12 @@ #define LLVM_DEBUGINFO_CODEVIEW_TYPESTREAMMERGER_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/Support/Error.h" namespace llvm { +template class Optional; +template class SmallVectorImpl; namespace codeview { class TypeIndex; diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h index d029556c9d89..9b278b696073 100644 --- a/llvm/include/llvm/DebugInfo/DIContext.h +++ b/llvm/include/llvm/DebugInfo/DIContext.h @@ -90,6 +90,8 @@ class DIInliningInfo { public: DIInliningInfo() = default; + /// Returns the frame at `Index`. Frames are stored in bottom-up + /// (leaf-to-root) order with increasing index. const DILineInfo &getFrame(unsigned Index) const { assert(Index < Frames.size()); return Frames[Index]; @@ -112,6 +114,8 @@ struct DIGlobal { std::string Name; uint64_t Start = 0; uint64_t Size = 0; + std::string DeclFile; + uint64_t DeclLine = 0; DIGlobal() : Name(DILineInfo::BadString) {} }; @@ -151,6 +155,10 @@ struct DILineInfoSpecifier { DILineInfoSpecifier(FileLineInfoKind FLIKind = FileLineInfoKind::RawValue, FunctionNameKind FNKind = FunctionNameKind::None) : FLIKind(FLIKind), FNKind(FNKind) {} + + inline bool operator==(const DILineInfoSpecifier &RHS) const { + return FLIKind == RHS.FLIKind && FNKind == RHS.FNKind; + } }; /// This is just a helper to programmatically construct DIDumpType. @@ -233,6 +241,8 @@ public: virtual DILineInfo getLineInfoForAddress( object::SectionedAddress Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; + virtual DILineInfo + getLineInfoForDataAddress(object::SectionedAddress Address) = 0; virtual DILineInfoTable getLineInfoForAddressRange( object::SectionedAddress Address, uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h index cdf3f60f88be..3887656ceef6 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h @@ -13,13 +13,13 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/Support/DataExtractor.h" #include #include #include namespace llvm { +class DataExtractor; class DWARFFormValue; class DWARFUnit; class raw_ostream; @@ -34,7 +34,7 @@ public: AttributeSpec(dwarf::Attribute A, dwarf::Form F, Optional ByteSize) : Attr(A), Form(F) { assert(!isImplicitConst()); - this->ByteSize.HasByteSize = ByteSize.hasValue(); + this->ByteSize.HasByteSize = ByteSize.has_value(); if (this->ByteSize.HasByteSize) this->ByteSize.ByteSize = *ByteSize; } diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h index 537a03ec11fc..f4d6c451cbe1 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h @@ -10,6 +10,9 @@ #define LLVM_DEBUGINFO_DWARF_DWARFADDRESSRANGE_H #include "llvm/DebugInfo/DIContext.h" +#include "llvm/Object/ObjectFile.h" +#include +#include #include #include #include diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h index ec5a3cd85266..d449b7bed796 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h @@ -10,10 +10,15 @@ #define LLVM_DEBUGINFO_DWARF_DWARFCOMPILEUNIT_H #include "llvm/DebugInfo/DWARF/DWARFUnit.h" -#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" namespace llvm { +class DWARFContext; +class DWARFDebugAbbrev; +class raw_ostream; +struct DIDumpOptions; +struct DWARFSection; + class DWARFCompileUnit : public DWARFUnit { public: DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section, diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h index e82faf6eeb24..bf591ed554c6 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -9,43 +9,37 @@ #ifndef LLVM_DEBUGINFO_DWARF_DWARFCONTEXT_H #define LLVM_DEBUGINFO_DWARF_DWARFCONTEXT_H -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/DIContext.h" -#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" -#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" -#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h" #include "llvm/DebugInfo/DWARF/DWARFObject.h" -#include "llvm/DebugInfo/DWARF/DWARFSection.h" -#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" -#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Error.h" #include "llvm/Support/Host.h" #include -#include -#include #include namespace llvm { class MCRegisterInfo; class MemoryBuffer; -class raw_ostream; +class AppleAcceleratorTable; +class DWARFCompileUnit; +class DWARFDebugAbbrev; +class DWARFDebugAranges; +class DWARFDebugFrame; +class DWARFDebugLoc; +class DWARFDebugMacro; +class DWARFDebugNames; +class DWARFGdbIndex; +class DWARFTypeUnit; +class DWARFUnitIndex; /// DWARFContext /// This data structure is the top level entity that deals with dwarf debug @@ -124,7 +118,7 @@ public: WithColor::defaultErrorHandler, std::function WarningHandler = WithColor::defaultWarningHandler); - ~DWARFContext(); + ~DWARFContext() override; DWARFContext(DWARFContext &) = delete; DWARFContext &operator=(DWARFContext &) = delete; @@ -339,6 +333,10 @@ public: getLineTableForUnit(DWARFUnit *U, function_ref RecoverableErrorHandler); + // Clear the line table object corresponding to a compile unit for memory + // management purpose. When it's referred to again, it'll be re-populated. + void clearLineTableForUnit(DWARFUnit *U); + DataExtractor getStringExtractor() const { return DataExtractor(DObj->getStrSection(), false, 0); } @@ -366,6 +364,8 @@ public: DILineInfo getLineInfoForAddress( object::SectionedAddress Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; + DILineInfo + getLineInfoForDataAddress(object::SectionedAddress Address) override; DILineInfoTable getLineInfoForAddressRange( object::SectionedAddress Address, uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h index e1407ddd89eb..67d9ce1476dd 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h @@ -11,17 +11,14 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DIContext.h" -#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" -#include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include -#include #include namespace llvm { -class Error; class raw_ostream; +class DWARFDataExtractor; /// A class representing an address table as specified in DWARF v5. /// The table consists of a header followed by an array of address values from diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h index 65334b4a4976..760d8826771c 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h @@ -10,7 +10,7 @@ #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGARANGESET_H #include "llvm/ADT/iterator_range.h" -#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Error.h" #include #include @@ -18,6 +18,7 @@ namespace llvm { class raw_ostream; +class DWARFDataExtractor; class DWARFDebugArangeSet { public: diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h index 216dd1e4defc..068674cfae5c 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h @@ -10,11 +10,13 @@ #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGARANGES_H #include "llvm/ADT/DenseSet.h" -#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include #include namespace llvm { +class DWARFDataExtractor; +class Error; class DWARFContext; @@ -26,7 +28,8 @@ public: private: void clear(); void extract(DWARFDataExtractor DebugArangesData, - function_ref RecoverableErrorHandler); + function_ref RecoverableErrorHandler, + function_ref WarningHandler); /// Call appendRange multiple times and then call construct. void appendRange(uint64_t CUOffset, uint64_t LowPC, uint64_t HighPC); diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h index 8167aaaeffb5..48df091412bf 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h @@ -13,7 +13,6 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator.h" -#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/Support/Error.h" #include @@ -23,6 +22,9 @@ namespace llvm { class raw_ostream; +class DWARFDataExtractor; +class MCRegisterInfo; +struct DIDumpOptions; namespace dwarf { @@ -130,7 +132,7 @@ public: uint32_t getRegister() const { return RegNum; } int32_t getOffset() const { return Offset; } uint32_t getAddressSpace() const { - assert(Kind == RegPlusOffset && AddrSpace.hasValue()); + assert(Kind == RegPlusOffset && AddrSpace); return *AddrSpace; } int32_t getConstant() const { return Offset; } @@ -259,7 +261,7 @@ public: UnwindRow() : CFAValue(UnwindLocation::createUnspecified()) {} /// Returns true if the address is valid in this object. - bool hasAddress() const { return Address.hasValue(); } + bool hasAddress() const { return Address.has_value(); } /// Get the address for this row. /// diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h index 6bdd23900182..9befcc0c4182 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h @@ -11,12 +11,12 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" -#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include namespace llvm { class DWARFUnit; +class DWARFDataExtractor; /// DWARFDebugInfoEntry - A DIE with only the minimum required data. class DWARFDebugInfoEntry { diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index ee15b6d4112d..86f90135f8d4 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -11,12 +11,10 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DIContext.h" -#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" -#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" -#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" -#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" #include @@ -26,7 +24,6 @@ namespace llvm { -class DWARFUnit; class raw_ostream; class DWARFDebugLine { @@ -307,6 +304,7 @@ public: getOrParseLineTable(DWARFDataExtractor &DebugLineData, uint64_t Offset, const DWARFContext &Ctx, const DWARFUnit *U, function_ref RecoverableErrorHandler); + void clearLineTable(uint64_t Offset); /// Helper to allow for parsing of an entire .debug_line section in sequence. class SectionParser { diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h index 1794f6649827..90e009e514d4 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h @@ -11,10 +11,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" -#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" -#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Errc.h" #include @@ -22,6 +19,12 @@ namespace llvm { class DWARFUnit; class MCRegisterInfo; class raw_ostream; +class DWARFObject; +struct DIDumpOptions; +struct DWARFLocationExpression; +namespace object { +struct SectionedAddress; +} /// A single location within a location list. Entries are stored in the DWARF5 /// form even if they originally come from a DWARF<=4 location list. diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h index f1768a1ddab5..d98cf9a6045a 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h @@ -12,7 +12,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" -#include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h index cb347615868b..6c82bbfe74f7 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h @@ -10,16 +10,17 @@ #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGPUBTABLE_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" -#include "llvm/DebugInfo/DWARF/DWARFObject.h" #include #include namespace llvm { class raw_ostream; +class DWARFDataExtractor; +class Error; /// Represents structure for holding and parsing .debug_pub* tables. class DWARFDebugPubTable { diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h index 0d9f37c5610b..f4aeac1bb9db 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -10,14 +10,16 @@ #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGRANGELIST_H #include "llvm/DebugInfo/DWARF/DWARFAddressRange.h" -#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" -#include #include #include namespace llvm { class raw_ostream; +class DWARFDataExtractor; +namespace object { +struct SectionedAddress; +} class DWARFDebugRangeList { public: diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h index 2baa6493f709..13f018f53fa1 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h @@ -10,11 +10,9 @@ #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGRNGLISTS_H #include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/DebugInfo/DIContext.h" -#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" +#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h" #include "llvm/DebugInfo/DWARF/DWARFListTable.h" #include @@ -23,6 +21,11 @@ namespace llvm { class Error; class raw_ostream; class DWARFUnit; +class DWARFDataExtractor; +struct DIDumpOptions; +namespace object { +struct SectionedAddress; +} /// A class representing a single range list entry. struct RangeListEntry : public DWARFListEntryBase { diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h index f731d440a35b..149c5ef4e493 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -18,7 +18,7 @@ #include "llvm/DebugInfo/DWARF/DWARFAddressRange.h" #include "llvm/DebugInfo/DWARF/DWARFAttribute.h" #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" +#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" #include #include #include @@ -280,6 +280,13 @@ public: /// \returns an iterator range for the attributes of the current DIE. iterator_range attributes() const; + /// Gets the type size (in bytes) for this DIE. + /// + /// \param PointerSize the pointer size of the containing CU. + /// \returns if this is a type DIE, or this DIE contains a DW_AT_type, returns + /// the size of the type. + Optional getTypeSize(uint64_t PointerSize); + class iterator; iterator begin() const; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h index b694eeacfd9d..c4d81047a4dc 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h @@ -9,16 +9,15 @@ #ifndef LLVM_DEBUGINFO_DWARF_DWARFEXPRESSION_H #define LLVM_DEBUGINFO_DWARF_DWARFEXPRESSION_H -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/DebugInfo/DIContext.h" #include "llvm/Support/DataExtractor.h" namespace llvm { class DWARFUnit; +struct DIDumpOptions; class MCRegisterInfo; class raw_ostream; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index 130cdb8800a9..c2c1df5b590b 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -14,12 +14,14 @@ #include "llvm/ADT/Optional.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DIContext.h" -#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" +#include "llvm/Support/DataExtractor.h" #include namespace llvm { class DWARFContext; +class DWARFObject; +class DWARFDataExtractor; class DWARFUnit; class raw_ostream; @@ -234,7 +236,7 @@ inline Optional toUnsigned(const Optional &V) { /// value or the form value's encoding wasn't an unsigned constant form. inline uint64_t toUnsigned(const Optional &V, uint64_t Default) { - return toUnsigned(V).getValueOr(Default); + return toUnsigned(V).value_or(Default); } /// Take an optional DWARFFormValue and try to extract an reference. @@ -256,7 +258,7 @@ inline Optional toReference(const Optional &V) { /// value or the form value's encoding wasn't a reference form. inline uint64_t toReference(const Optional &V, uint64_t Default) { - return toReference(V).getValueOr(Default); + return toReference(V).value_or(Default); } /// Take an optional DWARFFormValue and try to extract an signed constant. @@ -277,7 +279,7 @@ inline Optional toSigned(const Optional &V) { /// \returns the extracted signed integer value or Default if the V doesn't /// have a value or the form value's encoding wasn't a signed integer form. inline int64_t toSigned(const Optional &V, int64_t Default) { - return toSigned(V).getValueOr(Default); + return toSigned(V).value_or(Default); } /// Take an optional DWARFFormValue and try to extract an address. @@ -305,7 +307,7 @@ toSectionedAddress(const Optional &V) { /// \returns the extracted address value or Default if the V doesn't have a /// value or the form value's encoding wasn't an address form. inline uint64_t toAddress(const Optional &V, uint64_t Default) { - return toAddress(V).getValueOr(Default); + return toAddress(V).value_or(Default); } /// Take an optional DWARFFormValue and try to extract an section offset. @@ -327,7 +329,7 @@ inline Optional toSectionOffset(const Optional &V) { /// have a value or the form value's encoding wasn't a section offset form. inline uint64_t toSectionOffset(const Optional &V, uint64_t Default) { - return toSectionOffset(V).getValueOr(Default); + return toSectionOffset(V).value_or(Default); } /// Take an optional DWARFFormValue and try to extract block data. diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h index 38cd42ddb883..6b23c4e57d95 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h @@ -11,13 +11,13 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/DataExtractor.h" #include #include namespace llvm { class raw_ostream; +class DataExtractor; class DWARFGdbIndex { uint32_t Version; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h index 515623cedc94..84c8d71b04fc 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h @@ -14,7 +14,6 @@ #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h index 3add711943d0..fef59c5e95f8 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h @@ -10,6 +10,7 @@ #define LLVM_DEBUGINFO_DWARF_DWARFRELOCMAP_H #include "llvm/ADT/DenseMap.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Object/RelocationResolver.h" #include diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h new file mode 100644 index 000000000000..e05271740e61 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h @@ -0,0 +1,67 @@ +//===- DWARFTypePrinter.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARF_DWARFTYPEPRINTER_H +#define LLVM_DEBUGINFO_DWARF_DWARFTYPEPRINTER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFDie.h" + +#include + +namespace llvm { + +class raw_ostream; + +// FIXME: We should have pretty printers per language. Currently we print +// everything as if it was C++ and fall back to the TAG type name. +struct DWARFTypePrinter { + raw_ostream &OS; + bool Word = true; + bool EndedWithTemplate = false; + + DWARFTypePrinter(raw_ostream &OS) : OS(OS) {} + + /// Dump the name encoded in the type tag. + void appendTypeTagName(dwarf::Tag T); + + void appendArrayType(const DWARFDie &D); + + DWARFDie skipQualifiers(DWARFDie D); + + bool needsParens(DWARFDie D); + + void appendPointerLikeTypeBefore(DWARFDie D, DWARFDie Inner, StringRef Ptr); + + DWARFDie appendUnqualifiedNameBefore(DWARFDie D, + std::string *OriginalFullName = nullptr); + + void appendUnqualifiedNameAfter(DWARFDie D, DWARFDie Inner, + bool SkipFirstParamIfArtificial = false); + void appendQualifiedName(DWARFDie D); + DWARFDie appendQualifiedNameBefore(DWARFDie D); + bool appendTemplateParameters(DWARFDie D, bool *FirstParameter = nullptr); + void decomposeConstVolatile(DWARFDie &N, DWARFDie &T, DWARFDie &C, + DWARFDie &V); + void appendConstVolatileQualifierAfter(DWARFDie N); + void appendConstVolatileQualifierBefore(DWARFDie N); + + /// Recursively append the DIE type name when applicable. + void appendUnqualifiedName(DWARFDie D, + std::string *OriginalFullName = nullptr); + + void appendSubroutineNameAfter(DWARFDie D, DWARFDie Inner, + bool SkipFirstParamIfArtificial, bool Const, + bool Volatile); + void appendScopes(DWARFDie D); +}; + +} // namespace llvm + +#endif // LLVM_DEBUGINFO_DWARF_DWARFTYPEPRINTER_H diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h index c95bdcbd8a43..85ec6fd86ade 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h @@ -11,12 +11,11 @@ #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" -#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" -#include "llvm/Support/DataExtractor.h" #include namespace llvm { +struct DIDumpOptions; class DWARFContext; class DWARFDebugAbbrev; struct DWARFSection; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h index b96a4c19758f..9188865b4d77 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -9,28 +9,26 @@ #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" -#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" -#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" -#include "llvm/DebugInfo/DWARF/DWARFSection.h" +#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" #include "llvm/Support/DataExtractor.h" -#include #include #include #include #include #include +#include #include #include @@ -40,6 +38,12 @@ class DWARFAbbreviationDeclarationSet; class DWARFContext; class DWARFDebugAbbrev; class DWARFUnit; +class DWARFDebugRangeList; +class DWARFLocationTable; +class DWARFObject; +class raw_ostream; +struct DIDumpOptions; +struct DWARFSection; /// Base class describing the header of any kind of "unit." Some information /// is specific to certain unit types. We separate this class out so we can @@ -238,6 +242,11 @@ class DWARFUnit { /// std::map::upper_bound for address range lookup. std::map> AddrDieMap; + /// Map from the location (interpreted DW_AT_location) of a DW_TAG_variable, + /// to the end address and the corresponding DIE. + std::map> VariableDieMap; + DenseSet RootsParsedForVariables; + using die_iterator_range = iterator_range::iterator>; @@ -320,6 +329,9 @@ public: /// Recursively update address to Die map. void updateAddressDieMap(DWARFDie Die); + /// Recursively update address to variable Die map. + void updateVariableDieMap(DWARFDie Die); + void setRangesSection(const DWARFSection *RS, uint64_t Base) { RangeSection = RS; RangeSectionBase = Base; @@ -434,6 +446,10 @@ public: /// cleared. DWARFDie getSubroutineForAddress(uint64_t Address); + /// Returns variable DIE for the address provided. The pointer is alive as + /// long as parsed compile unit DIEs are not cleared. + DWARFDie getVariableForAddress(uint64_t Address); + /// getInlinedChainForAddress - fetches inlined chain for a given address. /// Returns empty chain if there is no subprogram containing address. The /// chain is valid as long as parsed compile unit DIEs are not cleared. diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h index edea59e474cf..b5e191ba7def 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h @@ -11,13 +11,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/DataExtractor.h" #include #include namespace llvm { class raw_ostream; +class DataExtractor; /// The enum of section identifiers to be used in internal interfaces. /// @@ -64,6 +64,25 @@ enum DWARFSectionKind { DW_SECT_EXT_MACINFO = 10, }; +inline const char *toString(DWARFSectionKind Kind) { + switch (Kind) { + case DW_SECT_EXT_unknown: + return "Unknown DW_SECT value 0"; +#define STRINGIZE(X) #X +#define HANDLE_DW_SECT(ID, NAME) \ + case DW_SECT_##NAME: \ + return "DW_SECT_" STRINGIZE(NAME); +#include "llvm/BinaryFormat/Dwarf.def" + case DW_SECT_EXT_TYPES: + return "DW_SECT_TYPES"; + case DW_SECT_EXT_LOC: + return "DW_SECT_LOC"; + case DW_SECT_EXT_MACINFO: + return "DW_SECT_MACINFO"; + } + llvm_unreachable("unknown DWARFSectionKind"); +} + /// Convert the internal value for a section kind to an on-disk value. /// /// The conversion depends on the version of the index section. diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index 505686bfbf59..1f1ebe943238 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -12,9 +12,9 @@ #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" +#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" -#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include #include #include @@ -22,13 +22,14 @@ namespace llvm { class raw_ostream; struct DWARFAddressRange; +class DWARFUnit; +class DWARFUnitVector; struct DWARFAttribute; class DWARFContext; class DWARFDataExtractor; class DWARFDebugAbbrev; class DataExtractor; struct DWARFSection; -class DWARFUnit; /// A class that verifies DWARF debug information given a DWARF Context. class DWARFVerifier { @@ -151,12 +152,15 @@ private: /// section. /// /// \param S The DWARF Section to verify. - /// \param SectionKind The object-file section kind that S comes from. /// /// \returns The number of errors that occurred during verification. unsigned verifyUnitSection(const DWARFSection &S); unsigned verifyUnits(const DWARFUnitVector &Units); + unsigned verifyIndexes(const DWARFObject &DObj); + unsigned verifyIndex(StringRef Name, DWARFSectionKind SectionKind, + StringRef Index); + /// Verifies that a call site entry is nested within a subprogram with a /// DW_AT_call attribute. /// @@ -301,6 +305,24 @@ public: /// \returns true if all sections verify successfully, false otherwise. bool handleDebugInfo(); + /// Verify the information in the .debug_cu_index section. + /// + /// Any errors are reported to the stream that was this object was + /// constructed with. + /// + /// \returns true if the .debug_cu_index verifies successfully, false + /// otherwise. + bool handleDebugCUIndex(); + + /// Verify the information in the .debug_tu_index section. + /// + /// Any errors are reported to the stream that was this object was + /// constructed with. + /// + /// \returns true if the .debug_tu_index verifies successfully, false + /// otherwise. + bool handleDebugTUIndex(); + /// Verify the information in the .debug_line section. /// /// Any errors are reported to the stream that was this object was diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h index 32fc54b14796..b8d7199f2d87 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h +++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h @@ -10,7 +10,7 @@ #define LLVM_DEBUGINFO_GSYM_DWARFTRANSFORMER_H #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/DebugInfo/GSYM/ExtractRanges.h" #include "llvm/Support/Error.h" namespace llvm { diff --git a/llvm/include/llvm/DebugInfo/GSYM/ExtractRanges.h b/llvm/include/llvm/DebugInfo/GSYM/ExtractRanges.h new file mode 100644 index 000000000000..9a6568719875 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/GSYM/ExtractRanges.h @@ -0,0 +1,81 @@ +//===- ExtractRanges.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_EXTRACTRANGES_H +#define LLVM_DEBUGINFO_GSYM_EXTRACTRANGES_H + +#include "llvm/ADT/AddressRanges.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +#define HEX8(v) llvm::format_hex(v, 4) +#define HEX16(v) llvm::format_hex(v, 6) +#define HEX32(v) llvm::format_hex(v, 10) +#define HEX64(v) llvm::format_hex(v, 18) + +namespace llvm { +class DataExtractor; +class raw_ostream; + +namespace gsym { + +class FileWriter; + +/// AddressRange objects are encoded and decoded to be relative to a base +/// address. This will be the FunctionInfo's start address if the AddressRange +/// is directly contained in a FunctionInfo, or a base address of the +/// containing parent AddressRange or AddressRanges. This allows address +/// ranges to be efficiently encoded using ULEB128 encodings as we encode the +/// offset and size of each range instead of full addresses. This also makes +/// encoded addresses easy to relocate as we just need to relocate one base +/// address. +/// @{ +AddressRange decodeRange(DataExtractor &Data, uint64_t BaseAddr, + uint64_t &Offset); +void encodeRange(const AddressRange &Range, FileWriter &O, uint64_t BaseAddr); +/// @} + +/// Skip an address range object in the specified data a the specified +/// offset. +/// +/// \param Data The binary stream to read the data from. +/// +/// \param Offset The byte offset within \a Data. +void skipRange(DataExtractor &Data, uint64_t &Offset); + +/// Address ranges are decoded and encoded to be relative to a base address. +/// See the AddressRange comment for the encode and decode methods for full +/// details. +/// @{ +void decodeRanges(AddressRanges &Ranges, DataExtractor &Data, uint64_t BaseAddr, + uint64_t &Offset); +void encodeRanges(const AddressRanges &Ranges, FileWriter &O, + uint64_t BaseAddr); +/// @} + +/// Skip an address range object in the specified data a the specified +/// offset. +/// +/// \param Data The binary stream to read the data from. +/// +/// \param Offset The byte offset within \a Data. +/// +/// \returns The number of address ranges that were skipped. +uint64_t skipRanges(DataExtractor &Data, uint64_t &Offset); + +} // namespace gsym + +raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R); + +raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR); + +} // namespace llvm + +#endif // LLVM_DEBUGINFO_GSYM_EXTRACTRANGES_H diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h index 552337f54390..fb48f7f9a93c 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h @@ -10,10 +10,10 @@ #define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H #include "llvm/ADT/Optional.h" +#include "llvm/DebugInfo/GSYM/ExtractRanges.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" #include "llvm/DebugInfo/GSYM/LineTable.h" #include "llvm/DebugInfo/GSYM/LookupResult.h" -#include "llvm/DebugInfo/GSYM/Range.h" #include "llvm/DebugInfo/GSYM/StringTable.h" #include #include @@ -102,9 +102,7 @@ struct FunctionInfo { /// debug info, we might end up with multiple FunctionInfo objects for the /// same range and we need to be able to tell which one is the better object /// to use. - bool hasRichInfo() const { - return OptLineTable.hasValue() || Inline.hasValue(); - } + bool hasRichInfo() const { return OptLineTable || Inline; } /// Query if a FunctionInfo object is valid. /// @@ -170,12 +168,9 @@ struct FunctionInfo { uint64_t FuncAddr, uint64_t Addr); - uint64_t startAddress() const { return Range.Start; } - uint64_t endAddress() const { return Range.End; } + uint64_t startAddress() const { return Range.start(); } + uint64_t endAddress() const { return Range.end(); } uint64_t size() const { return Range.size(); } - void setStartAddress(uint64_t Addr) { Range.Start = Addr; } - void setEndAddress(uint64_t Addr) { Range.End = Addr; } - void setSize(uint64_t Size) { Range.End = Range.Start + Size; } void clear() { Range = {0, 0}; @@ -203,8 +198,8 @@ inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) { return LHS.Range < RHS.Range; // Then sort by inline - if (LHS.Inline.hasValue() != RHS.Inline.hasValue()) - return RHS.Inline.hasValue(); + if (LHS.Inline.has_value() != RHS.Inline.has_value()) + return RHS.Inline.has_value(); return LHS.OptLineTable < RHS.OptLineTable; } diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h index 872ccd4a0b6a..29ad1c18e295 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -14,11 +14,11 @@ #include #include +#include "llvm/ADT/AddressRanges.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/DebugInfo/GSYM/FileEntry.h" #include "llvm/DebugInfo/GSYM/FunctionInfo.h" -#include "llvm/DebugInfo/GSYM/Range.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" diff --git a/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h b/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h index 9bcfa5935180..80385116598a 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h @@ -10,14 +10,13 @@ #define LLVM_DEBUGINFO_GSYM_INLINEINFO_H #include "llvm/ADT/Optional.h" +#include "llvm/DebugInfo/GSYM/ExtractRanges.h" #include "llvm/DebugInfo/GSYM/LineEntry.h" #include "llvm/DebugInfo/GSYM/LookupResult.h" -#include "llvm/DebugInfo/GSYM/Range.h" #include "llvm/Support/Error.h" #include #include - namespace llvm { class raw_ostream; diff --git a/llvm/include/llvm/DebugInfo/GSYM/LineEntry.h b/llvm/include/llvm/DebugInfo/GSYM/LineEntry.h index b4e7587fc5ee..e68624b21929 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/LineEntry.h +++ b/llvm/include/llvm/DebugInfo/GSYM/LineEntry.h @@ -9,7 +9,7 @@ #ifndef LLVM_DEBUGINFO_GSYM_LINEENTRY_H #define LLVM_DEBUGINFO_GSYM_LINEENTRY_H -#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/DebugInfo/GSYM/ExtractRanges.h" namespace llvm { namespace gsym { diff --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h index 3dabbce32bb2..44e58f522002 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h +++ b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h @@ -9,8 +9,8 @@ #ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H #define LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H +#include "llvm/ADT/AddressRanges.h" #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/GSYM/Range.h" #include #include diff --git a/llvm/include/llvm/DebugInfo/GSYM/Range.h b/llvm/include/llvm/DebugInfo/GSYM/Range.h deleted file mode 100644 index 36ad95602d14..000000000000 --- a/llvm/include/llvm/DebugInfo/GSYM/Range.h +++ /dev/null @@ -1,130 +0,0 @@ -//===- Range.h --------------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEBUGINFO_GSYM_RANGE_H -#define LLVM_DEBUGINFO_GSYM_RANGE_H - -#include "llvm/ADT/Optional.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include -#include - -#define HEX8(v) llvm::format_hex(v, 4) -#define HEX16(v) llvm::format_hex(v, 6) -#define HEX32(v) llvm::format_hex(v, 10) -#define HEX64(v) llvm::format_hex(v, 18) - -namespace llvm { -class DataExtractor; -class raw_ostream; - -namespace gsym { - -class FileWriter; - -/// A class that represents an address range. The range is specified using -/// a start and an end address. -struct AddressRange { - uint64_t Start; - uint64_t End; - AddressRange() : Start(0), End(0) {} - AddressRange(uint64_t S, uint64_t E) : Start(S), End(E) {} - uint64_t size() const { return End - Start; } - bool contains(uint64_t Addr) const { return Start <= Addr && Addr < End; } - bool intersects(const AddressRange &R) const { - return Start < R.End && R.Start < End; - } - - bool operator==(const AddressRange &R) const { - return Start == R.Start && End == R.End; - } - bool operator!=(const AddressRange &R) const { - return !(*this == R); - } - bool operator<(const AddressRange &R) const { - return std::make_pair(Start, End) < std::make_pair(R.Start, R.End); - } - /// AddressRange objects are encoded and decoded to be relative to a base - /// address. This will be the FunctionInfo's start address if the AddressRange - /// is directly contained in a FunctionInfo, or a base address of the - /// containing parent AddressRange or AddressRanges. This allows address - /// ranges to be efficiently encoded using ULEB128 encodings as we encode the - /// offset and size of each range instead of full addresses. This also makes - /// encoded addresses easy to relocate as we just need to relocate one base - /// address. - /// @{ - void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset); - void encode(FileWriter &O, uint64_t BaseAddr) const; - /// @} - - /// Skip an address range object in the specified data a the specified - /// offset. - /// - /// \param Data The binary stream to read the data from. - /// - /// \param Offset The byte offset within \a Data. - static void skip(DataExtractor &Data, uint64_t &Offset); -}; - -raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R); - -/// The AddressRanges class helps normalize address range collections. -/// This class keeps a sorted vector of AddressRange objects and can perform -/// insertions and searches efficiently. The address ranges are always sorted -/// and never contain any invalid or empty address ranges. This allows us to -/// emit address ranges into the GSYM file efficiently. Intersecting address -/// ranges are combined during insertion so that we can emit the most compact -/// representation for address ranges when writing to disk. -class AddressRanges { -protected: - using Collection = std::vector; - Collection Ranges; -public: - void clear() { Ranges.clear(); } - bool empty() const { return Ranges.empty(); } - bool contains(uint64_t Addr) const; - bool contains(AddressRange Range) const; - Optional getRangeThatContains(uint64_t Addr) const; - void insert(AddressRange Range); - size_t size() const { return Ranges.size(); } - bool operator==(const AddressRanges &RHS) const { - return Ranges == RHS.Ranges; - } - const AddressRange &operator[](size_t i) const { - assert(i < Ranges.size()); - return Ranges[i]; - } - Collection::const_iterator begin() const { return Ranges.begin(); } - Collection::const_iterator end() const { return Ranges.end(); } - - /// Address ranges are decoded and encoded to be relative to a base address. - /// See the AddressRange comment for the encode and decode methods for full - /// details. - /// @{ - void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset); - void encode(FileWriter &O, uint64_t BaseAddr) const; - /// @} - - /// Skip an address range object in the specified data a the specified - /// offset. - /// - /// \param Data The binary stream to read the data from. - /// - /// \param Offset The byte offset within \a Data. - /// - /// \returns The number of address ranges that were skipped. - static uint64_t skip(DataExtractor &Data, uint64_t &Offset); -}; - -raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR); - -} // namespace gsym -} // namespace llvm - -#endif // LLVM_DEBUGINFO_GSYM_RANGE_H diff --git a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h index d920335d373e..d9c9ede91be5 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h +++ b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h @@ -10,7 +10,7 @@ #define LLVM_DEBUGINFO_GSYM_STRINGTABLE_H #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/DebugInfo/GSYM/ExtractRanges.h" #include namespace llvm { diff --git a/llvm/include/llvm/DebugInfo/MSF/MSFBuilder.h b/llvm/include/llvm/DebugInfo/MSF/MSFBuilder.h index 1a03d42ded92..2ac18a8efaba 100644 --- a/llvm/include/llvm/DebugInfo/MSF/MSFBuilder.h +++ b/llvm/include/llvm/DebugInfo/MSF/MSFBuilder.h @@ -11,7 +11,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -#include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" #include @@ -22,6 +22,8 @@ namespace llvm { class FileBufferByteStream; namespace msf { +struct MSFLayout; + class MSFBuilder { public: /// Create a new `MSFBuilder`. diff --git a/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h b/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h index bfa67d39bc76..6cd5c8d1d668 100644 --- a/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h +++ b/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h @@ -9,6 +9,7 @@ #ifndef LLVM_DEBUGINFO_PDB_IPDBENUMCHILDREN_H #define LLVM_DEBUGINFO_PDB_IPDBENUMCHILDREN_H +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include #include #include diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h index 70ef4d058082..1ecae5c32509 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h @@ -10,16 +10,16 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_DBIMODULEDESCRIPTOR_H #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Error.h" #include namespace llvm { +template struct VarStreamArrayExtractor; namespace pdb { - +struct ModuleInfoHeader; +struct SectionContrib; class DbiModuleDescriptor { friend class DbiStreamBuilder; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h index 8a49f46320b0..287f319e01b0 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h @@ -9,13 +9,12 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_DBIMODULEDESCRIPTORBUILDER_H #define LLVM_DEBUGINFO_PDB_NATIVE_DBIMODULEDESCRIPTORBUILDER_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" -#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" -#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Error.h" #include #include @@ -23,9 +22,8 @@ namespace llvm { class BinaryStreamWriter; - namespace codeview { -class DebugSubsectionRecordBuilder; +class DebugSubsection; } namespace msf { diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h index 0bdb27a0a991..3f60130f5752 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h @@ -9,14 +9,10 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_DBISTREAM_H #define LLVM_DEBUGINFO_PDB_NATIVE_DBISTREAM_H -#include "llvm/DebugInfo/CodeView/DebugSubsection.h" #include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" -#include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleList.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamRef.h" @@ -24,13 +20,19 @@ #include "llvm/Support/Error.h" namespace llvm { +class BinaryStream; namespace object { struct FpoData; struct coff_section; } - +namespace msf { +class MappedBlockStream; +} namespace pdb { -class DbiStreamBuilder; +struct DbiStreamHeader; +struct SecMapEntry; +struct SectionContrib2; +struct SectionContrib; class PDBFile; class ISectionContribVisitor; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h index ef441d433040..2f99aa942a05 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h @@ -10,35 +10,33 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_DBISTREAMBUILDER_H #include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" #include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" -#include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/BinaryByteStream.h" -#include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/Endian.h" +#include "llvm/Support/BinaryStreamRef.h" namespace llvm { + +class BinaryStreamWriter; namespace codeview { struct FrameData; } namespace msf { class MSFBuilder; -} -namespace object { -struct coff_section; -struct FpoData; +struct MSFLayout; } namespace pdb { -class DbiStream; -struct DbiStreamHeader; class DbiModuleDescriptorBuilder; -class PDBFile; class DbiStreamBuilder { public: @@ -134,7 +132,7 @@ private: std::vector SectionMap; std::array, (int)DbgHeaderType::Max> DbgStreams; }; -} +} // namespace pdb } #endif diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/EnumTables.h b/llvm/include/llvm/DebugInfo/PDB/Native/EnumTables.h index 60cd494639c1..dcc67f1e4a8c 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/EnumTables.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/EnumTables.h @@ -10,9 +10,9 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_ENUMTABLES_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/Support/ScopedPrinter.h" namespace llvm { +template struct EnumEntry; namespace pdb { ArrayRef> getOMFSegMapDescFlagNames(); } diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h b/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h new file mode 100644 index 000000000000..ed745eaf9727 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h @@ -0,0 +1,133 @@ +//===- FormatUtil.h ------------------------------------------- *- C++ --*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_FORMATUTIL_H +#define LLVM_DEBUGINFO_PDB_NATIVE_FORMATUTIL_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/FormatVariadic.h" + +#include +#include + +namespace llvm { +namespace pdb { + +#define PUSH_MASKED_FLAG(Enum, Mask, TheOpt, Value, Text) \ + if (Enum::TheOpt == (Value & Mask)) \ + Opts.push_back(Text); + +#define PUSH_FLAG(Enum, TheOpt, Value, Text) \ + PUSH_MASKED_FLAG(Enum, Enum::TheOpt, TheOpt, Value, Text) + +#define RETURN_CASE(Enum, X, Ret) \ + case Enum::X: \ + return Ret; + +template std::string formatUnknownEnum(T Value) { + return formatv("unknown ({0})", static_cast>(Value)) + .str(); +} + +std::string formatSegmentOffset(uint16_t Segment, uint32_t Offset); + +enum class CharacteristicStyle { + HeaderDefinition, // format as windows header definition + Descriptive, // format as human readable words +}; +std::string formatSectionCharacteristics( + uint32_t IndentLevel, uint32_t C, uint32_t FlagsPerLine, + StringRef Separator, + CharacteristicStyle Style = CharacteristicStyle::HeaderDefinition); + +std::string typesetItemList(ArrayRef Opts, uint32_t IndentLevel, + uint32_t GroupSize, StringRef Sep); + +std::string typesetStringList(uint32_t IndentLevel, + ArrayRef Strings); + +std::string formatChunkKind(codeview::DebugSubsectionKind Kind, + bool Friendly = true); +std::string formatSymbolKind(codeview::SymbolKind K); +std::string formatTypeLeafKind(codeview::TypeLeafKind K); + +/// Returns the number of digits in the given integer. +inline int NumDigits(uint64_t N) { + if (N < 10ULL) + return 1; + if (N < 100ULL) + return 2; + if (N < 1000ULL) + return 3; + if (N < 10000ULL) + return 4; + if (N < 100000ULL) + return 5; + if (N < 1000000ULL) + return 6; + if (N < 10000000ULL) + return 7; + if (N < 100000000ULL) + return 8; + if (N < 1000000000ULL) + return 9; + if (N < 10000000000ULL) + return 10; + if (N < 100000000000ULL) + return 11; + if (N < 1000000000000ULL) + return 12; + if (N < 10000000000000ULL) + return 13; + if (N < 100000000000000ULL) + return 14; + if (N < 1000000000000000ULL) + return 15; + if (N < 10000000000000000ULL) + return 16; + if (N < 100000000000000000ULL) + return 17; + if (N < 1000000000000000000ULL) + return 18; + if (N < 10000000000000000000ULL) + return 19; + return 20; +} + +namespace detail { +template +struct EndianAdapter final + : public FormatAdapter> { + using EndianType = + support::detail::packed_endian_specific_integral; + + explicit EndianAdapter(EndianType &&Item) + : FormatAdapter(std::move(Item)) {} + + void format(llvm::raw_ostream &Stream, StringRef Style) override { + format_provider::format(static_cast(this->Item), Stream, Style); + } +}; +} // namespace detail + +template +detail::EndianAdapter +fmtle(support::detail::packed_endian_specific_integral + Value) { + return detail::EndianAdapter(std::move(Value)); +} +} // namespace pdb +} // namespace llvm +#endif diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h index 9530a15849d5..28a72c887f25 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h @@ -10,18 +10,20 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_GSISTREAMBUILDER_H #include "llvm/ADT/DenseSet.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/Support/BinaryByteStream.h" -#include "llvm/Support/BinaryItemStream.h" #include "llvm/Support/BinaryStreamRef.h" -#include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" namespace llvm { +namespace codeview { +class ConstantSym; +class DataSym; +class ProcRefSym; +} // namespace codeview +template struct BinaryItemTraits; template <> struct BinaryItemTraits { static size_t length(const codeview::CVSymbol &Item) { diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h index 2b74babd6ab9..2988bef4a75b 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h @@ -10,18 +10,18 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_GLOBALSSTREAM_H #include "llvm/ADT/iterator.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/DebugInfo/PDB/Native/RawConstants.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" namespace llvm { +class BinaryStreamReader; +namespace msf { +class MappedBlockStream; +} namespace pdb { -class DbiStream; -class PDBFile; class SymbolStream; /// Iterator over hash records producing symbol record offsets. Abstracts away @@ -81,7 +81,7 @@ private: GSIHashTable GlobalsTable; std::unique_ptr Stream; }; -} +} // namespace pdb } #endif diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h index 474bd796b2b3..7924cffd640f 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h @@ -23,9 +23,6 @@ namespace llvm { -class BinaryStreamReader; -class BinaryStreamWriter; - namespace pdb { Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V); diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h index 67db92b64913..625bab6a4378 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h @@ -9,22 +9,18 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_INFOSTREAM_H #define LLVM_DEBUGINFO_PDB_NATIVE_INFOSTREAM_H -#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/StringMap.h" #include "llvm/DebugInfo/CodeView/GUID.h" -#include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" -#include "llvm/DebugInfo/PDB/PDBTypes.h" +#include "llvm/Support/BinaryStream.h" +#include "llvm/Support/BinaryStreamRef.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" namespace llvm { namespace pdb { -class InfoStreamBuilder; -class PDBFile; - +struct InfoStreamHeader; class InfoStream { friend class InfoStreamBuilder; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h index 4952173c5873..2d5088a3bd42 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h @@ -12,19 +12,17 @@ #include "llvm/ADT/Optional.h" #include "llvm/Support/Error.h" -#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" -#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/CodeView/GUID.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" -#include "llvm/DebugInfo/PDB/PDBTypes.h" namespace llvm { class WritableBinaryStreamRef; namespace msf { class MSFBuilder; +struct MSFLayout; } namespace pdb { -class PDBFile; class NamedStreamMap; class InfoStreamBuilder { @@ -70,7 +68,7 @@ private: NamedStreamMap &NamedStreams; }; -} +} // namespace pdb } #endif diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h index b2ba81a88254..259c924d9d7c 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h @@ -9,15 +9,14 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_INJECTEDSOURCESTREAM_H #define LLVM_DEBUGINFO_PDB_NATIVE_INJECTEDSOURCESTREAM_H +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/HashTable.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/Error.h" namespace llvm { -namespace msf { -class MappedBlockStream; -} namespace pdb { +struct SrcHeaderBlockEntry; +struct SrcHeaderBlockHeader; class PDBStringTable; class InjectedSourceStream { @@ -38,6 +37,6 @@ private: HashTable InjectedSourceTable; }; } -} +} // namespace llvm #endif diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/InputFile.h b/llvm/include/llvm/DebugInfo/PDB/Native/InputFile.h new file mode 100644 index 000000000000..c0d722960540 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/PDB/Native/InputFile.h @@ -0,0 +1,231 @@ +//===- InputFile.h -------------------------------------------- *- C++ --*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_INPUTFILE_H +#define LLVM_DEBUGINFO_PDB_NATIVE_INPUTFILE_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/iterator.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" +#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace codeview { +class LazyRandomTypeCollection; +} +namespace object { +class COFFObjectFile; +} // namespace object + +namespace pdb { +class InputFile; +class LinePrinter; +class PDBFile; +class NativeSession; +class SymbolGroupIterator; +class SymbolGroup; + +class InputFile { + InputFile(); + + std::unique_ptr PdbSession; + object::OwningBinary CoffObject; + std::unique_ptr UnknownFile; + PointerUnion PdbOrObj; + + using TypeCollectionPtr = std::unique_ptr; + + TypeCollectionPtr Types; + TypeCollectionPtr Ids; + + enum TypeCollectionKind { kTypes, kIds }; + codeview::LazyRandomTypeCollection & + getOrCreateTypeCollection(TypeCollectionKind Kind); + +public: + InputFile(PDBFile *Pdb) { PdbOrObj = Pdb; } + InputFile(object::COFFObjectFile *Obj) { PdbOrObj = Obj; } + InputFile(MemoryBuffer *Buffer) { PdbOrObj = Buffer; } + ~InputFile(); + InputFile(InputFile &&Other) = default; + + static Expected open(StringRef Path, + bool AllowUnknownFile = false); + + PDBFile &pdb(); + const PDBFile &pdb() const; + object::COFFObjectFile &obj(); + const object::COFFObjectFile &obj() const; + MemoryBuffer &unknown(); + const MemoryBuffer &unknown() const; + + StringRef getFilePath() const; + + bool hasTypes() const; + bool hasIds() const; + + codeview::LazyRandomTypeCollection &types(); + codeview::LazyRandomTypeCollection &ids(); + + iterator_range symbol_groups(); + SymbolGroupIterator symbol_groups_begin(); + SymbolGroupIterator symbol_groups_end(); + + bool isPdb() const; + bool isObj() const; + bool isUnknown() const; +}; + +class SymbolGroup { + friend class SymbolGroupIterator; + +public: + explicit SymbolGroup(InputFile *File, uint32_t GroupIndex = 0); + + Expected getNameFromStringTable(uint32_t Offset) const; + Expected getNameFromChecksums(uint32_t Offset) const; + + void formatFromFileName(LinePrinter &Printer, StringRef File, + bool Append = false) const; + + void formatFromChecksumsOffset(LinePrinter &Printer, uint32_t Offset, + bool Append = false) const; + + StringRef name() const; + + codeview::DebugSubsectionArray getDebugSubsections() const { + return Subsections; + } + const ModuleDebugStreamRef &getPdbModuleStream() const; + + const InputFile &getFile() const { return *File; } + InputFile &getFile() { return *File; } + + bool hasDebugStream() const { return DebugStream != nullptr; } + +private: + void initializeForPdb(uint32_t Modi); + void updatePdbModi(uint32_t Modi); + void updateDebugS(const codeview::DebugSubsectionArray &SS); + + void rebuildChecksumMap(); + InputFile *File = nullptr; + StringRef Name; + codeview::DebugSubsectionArray Subsections; + std::shared_ptr DebugStream; + codeview::StringsAndChecksumsRef SC; + StringMap ChecksumsByFile; +}; + +class SymbolGroupIterator + : public iterator_facade_base { +public: + SymbolGroupIterator(); + explicit SymbolGroupIterator(InputFile &File); + SymbolGroupIterator(const SymbolGroupIterator &Other) = default; + SymbolGroupIterator &operator=(const SymbolGroupIterator &R) = default; + + const SymbolGroup &operator*() const; + SymbolGroup &operator*(); + + bool operator==(const SymbolGroupIterator &R) const; + SymbolGroupIterator &operator++(); + +private: + void scanToNextDebugS(); + bool isEnd() const; + + uint32_t Index = 0; + Optional SectionIter; + SymbolGroup Value; +}; + +Expected +getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index); +Expected getModuleDebugStream(PDBFile &File, + uint32_t Index); + +bool shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group, + const FilterOptions &Filters); + +// TODO: Change these callbacks to be function_refs (de-templatify them). +template +Error iterateOneModule(InputFile &File, const PrintScope &HeaderScope, + const SymbolGroup &SG, uint32_t Modi, + CallbackT Callback) { + HeaderScope.P.formatLine( + "Mod {0:4} | `{1}`: ", + fmt_align(Modi, AlignStyle::Right, HeaderScope.LabelWidth), SG.name()); + + AutoIndent Indent(HeaderScope); + return Callback(Modi, SG); +} + +template +Error iterateSymbolGroups(InputFile &Input, const PrintScope &HeaderScope, + CallbackT Callback) { + AutoIndent Indent(HeaderScope); + + FilterOptions Filters = HeaderScope.P.getFilters(); + if (Filters.DumpModi) { + uint32_t Modi = *Filters.DumpModi; + SymbolGroup SG(&Input, Modi); + return iterateOneModule(Input, withLabelWidth(HeaderScope, NumDigits(Modi)), + SG, Modi, Callback); + } + + uint32_t I = 0; + + for (const auto &SG : Input.symbol_groups()) { + if (shouldDumpSymbolGroup(I, SG, Filters)) + if (auto Err = + iterateOneModule(Input, withLabelWidth(HeaderScope, NumDigits(I)), + SG, I, Callback)) + return Err; + + ++I; + } + return Error::success(); +} + +template +Error iterateModuleSubsections( + InputFile &File, const PrintScope &HeaderScope, + llvm::function_ref + Callback) { + + return iterateSymbolGroups( + File, HeaderScope, [&](uint32_t Modi, const SymbolGroup &SG) -> Error { + for (const auto &SS : SG.getDebugSubsections()) { + SubsectionT Subsection; + + if (SS.kind() != Subsection.kind()) + continue; + + BinaryStreamReader Reader(SS.getRecordData()); + if (auto Err = Subsection.initialize(Reader)) + continue; + if (auto Err = Callback(Modi, SG, Subsection)) + return Err; + } + return Error::success(); + }); +} + +} // namespace pdb +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/LinePrinter.h b/llvm/include/llvm/DebugInfo/PDB/Native/LinePrinter.h new file mode 100644 index 000000000000..0db21309f593 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/PDB/Native/LinePrinter.h @@ -0,0 +1,185 @@ +//===- LinePrinter.h ------------------------------------------ *- C++ --*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_LINEPRINTER_H +#define LLVM_DEBUGINFO_PDB_NATIVE_LINEPRINTER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/DebugInfo/PDB/Native/FormatUtil.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" + +#include + +// Container for filter options to control which elements will be printed. +struct FilterOptions { + std::list ExcludeTypes; + std::list ExcludeSymbols; + std::list ExcludeCompilands; + std::list IncludeTypes; + std::list IncludeSymbols; + std::list IncludeCompilands; + uint32_t PaddingThreshold; + uint32_t SizeThreshold; + llvm::Optional DumpModi; + llvm::Optional ParentRecurseDepth; + llvm::Optional ChildrenRecurseDepth; + llvm::Optional SymbolOffset; + bool JustMyCode; +}; + +namespace llvm { +namespace msf { +class MSFStreamLayout; +} // namespace msf +namespace pdb { + +class ClassLayout; +class PDBFile; +class SymbolGroup; + +class LinePrinter { + friend class WithColor; + +public: + LinePrinter(int Indent, bool UseColor, raw_ostream &Stream, + const FilterOptions &Filters); + + void Indent(uint32_t Amount = 0); + void Unindent(uint32_t Amount = 0); + void NewLine(); + + void printLine(const Twine &T); + void print(const Twine &T); + template void formatLine(const char *Fmt, Ts &&...Items) { + printLine(formatv(Fmt, std::forward(Items)...)); + } + template void format(const char *Fmt, Ts &&...Items) { + print(formatv(Fmt, std::forward(Items)...)); + } + + void formatBinary(StringRef Label, ArrayRef Data, + uint64_t StartOffset); + void formatBinary(StringRef Label, ArrayRef Data, uint64_t BaseAddr, + uint64_t StartOffset); + + void formatMsfStreamData(StringRef Label, PDBFile &File, uint32_t StreamIdx, + StringRef StreamPurpose, uint64_t Offset, + uint64_t Size); + void formatMsfStreamData(StringRef Label, PDBFile &File, + const msf::MSFStreamLayout &Stream, + BinarySubstreamRef Substream); + void formatMsfStreamBlocks(PDBFile &File, const msf::MSFStreamLayout &Stream); + + bool hasColor() const { return UseColor; } + raw_ostream &getStream() { return OS; } + int getIndentLevel() const { return CurrentIndent; } + + bool IsClassExcluded(const ClassLayout &Class); + bool IsTypeExcluded(llvm::StringRef TypeName, uint64_t Size); + bool IsSymbolExcluded(llvm::StringRef SymbolName); + bool IsCompilandExcluded(llvm::StringRef CompilandName); + + const FilterOptions &getFilters() const { return Filters; } + +private: + template + void SetFilters(std::list &List, Iter Begin, Iter End) { + List.clear(); + for (; Begin != End; ++Begin) + List.emplace_back(StringRef(*Begin)); + } + + raw_ostream &OS; + int IndentSpaces; + int CurrentIndent; + bool UseColor; + const FilterOptions &Filters; + + std::list ExcludeCompilandFilters; + std::list ExcludeTypeFilters; + std::list ExcludeSymbolFilters; + + std::list IncludeCompilandFilters; + std::list IncludeTypeFilters; + std::list IncludeSymbolFilters; +}; + +struct PrintScope { + explicit PrintScope(LinePrinter &P, uint32_t IndentLevel) + : P(P), IndentLevel(IndentLevel) {} + explicit PrintScope(const PrintScope &Other, uint32_t LabelWidth) + : P(Other.P), IndentLevel(Other.IndentLevel), LabelWidth(LabelWidth) {} + + LinePrinter &P; + uint32_t IndentLevel; + uint32_t LabelWidth = 0; +}; + +inline PrintScope withLabelWidth(const PrintScope &Scope, uint32_t W) { + return PrintScope{Scope, W}; +} + +struct AutoIndent { + explicit AutoIndent(LinePrinter &L, uint32_t Amount = 0) + : L(&L), Amount(Amount) { + L.Indent(Amount); + } + explicit AutoIndent(const PrintScope &Scope) { + L = &Scope.P; + Amount = Scope.IndentLevel; + } + ~AutoIndent() { + if (L) + L->Unindent(Amount); + } + + LinePrinter *L = nullptr; + uint32_t Amount = 0; +}; + +template +inline raw_ostream &operator<<(LinePrinter &Printer, const T &Item) { + return Printer.getStream() << Item; +} + +enum class PDB_ColorItem { + None, + Address, + Type, + Comment, + Padding, + Keyword, + Offset, + Identifier, + Path, + SectionHeader, + LiteralValue, + Register, +}; + +class WithColor { +public: + WithColor(LinePrinter &P, PDB_ColorItem C); + ~WithColor(); + + raw_ostream &get() { return OS; } + +private: + void applyColor(PDB_ColorItem C); + raw_ostream &OS; + bool UseColor; +}; +} // namespace pdb +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h index cb1ffc729512..0caf9fffbad6 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h @@ -10,10 +10,8 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_MODULEDEBUGSTREAM_H #include "llvm/ADT/iterator_range.h" -#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Error.h" @@ -21,10 +19,15 @@ #include namespace llvm { +class BinaryStreamReader; +namespace codeview { +class DebugChecksumsSubsectionRef; +} +namespace msf { +class MappedBlockStream; +} namespace pdb { -class DbiModuleDescriptor; - class ModuleDebugStreamRef { using DebugSubsectionIterator = codeview::DebugSubsectionArray::Iterator; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h index f110e90b3f90..18fbab0dd38c 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h @@ -11,7 +11,6 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/PDB/Native/HashTable.h" #include "llvm/Support/Error.h" #include diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h index 073878afd129..c10e652efa8d 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h @@ -9,7 +9,7 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMGLOBALS_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMGLOBALS_H -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h index 32a4515d557e..a936b769d688 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h @@ -9,16 +9,13 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMLINENUMBERS_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMLINENUMBERS_H -#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" -#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" -#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/Native/NativeLineNumber.h" +#include namespace llvm { namespace pdb { -class IPDBLineNumber; class NativeEnumLineNumbers : public IPDBEnumChildren { public: diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h index 480b3fb11419..5fc91675f209 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h @@ -9,9 +9,9 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMSYMBOLS_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMSYMBOLS_H -#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" #include diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h index 25c56567384f..2ca000c1c0fe 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h @@ -9,14 +9,17 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMTYPES_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMTYPES_H -#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" #include namespace llvm { +namespace codeview { +class LazyRandomTypeCollection; +} namespace pdb { class NativeSession; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h index 280358d02305..82fdff130c4f 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h @@ -9,12 +9,15 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEEXESYMBOL_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEEXESYMBOL_H +#include "llvm/DebugInfo/CodeView/GUID.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" namespace llvm { namespace pdb { +class NativeSession; + class DbiStream; class NativeExeSymbol : public NativeRawSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h index b219055d2153..c15e22f61077 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h @@ -9,14 +9,17 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEFUNCTIONSYMBOL_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEFUNCTIONSYMBOL_H -#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" namespace llvm { +class raw_ostream; namespace pdb { +class NativeSession; + class NativeFunctionSymbol : public NativeRawSymbol { public: NativeFunctionSymbol(NativeSession &Session, SymIndexId Id, diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h index 2f6aba038ae8..3467ac912162 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h @@ -9,14 +9,16 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEINLINESITESYMBOL_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEINLINESITESYMBOL_H -#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" namespace llvm { namespace pdb { +class NativeSession; + class NativeInlineSiteSymbol : public NativeRawSymbol { public: NativeInlineSiteSymbol(NativeSession &Session, SymIndexId Id, diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h index be0ddf0a063a..53f2985833fd 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h @@ -11,10 +11,12 @@ #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/PDB/IPDBLineNumber.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" namespace llvm { namespace pdb { + +class NativeSession; + class NativeLineNumber : public IPDBLineNumber { public: explicit NativeLineNumber(const NativeSession &Session, diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h index 9f410e27f4cb..43de80507d02 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h @@ -9,13 +9,14 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEPUBLICSYMBOL_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEPUBLICSYMBOL_H -#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" namespace llvm { + +class raw_ostream; namespace pdb { +class NativeSession; class NativePublicSymbol : public NativeRawSymbol { public: diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h index 5f8fc587e546..95be7d09aae9 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h @@ -9,13 +9,11 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVESESSION_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVESESSION_H -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" -#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/SymbolCache.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" @@ -24,6 +22,12 @@ class MemoryBuffer; namespace pdb { class PDBFile; class NativeExeSymbol; +class IPDBSourceFile; +class ModuleDebugStreamRef; +class PDBSymbol; +class PDBSymbolCompiland; +class PDBSymbolExe; +template class IPDBEnumChildren; class NativeSession : public IPDBSession { struct PdbSearchOptions { diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeSourceFile.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeSourceFile.h index eb6336f268e8..c6653368bc0c 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeSourceFile.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeSourceFile.h @@ -11,11 +11,12 @@ #include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" #include "llvm/DebugInfo/PDB/IPDBSourceFile.h" -#include "llvm/DebugInfo/PDB/Native/PDBFile.h" -#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" namespace llvm { namespace pdb { +class PDBSymbolCompiland; +template class IPDBEnumChildren; class NativeSession; class NativeSourceFile : public IPDBSourceFile { diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h index d6a3125ee40b..ab4abc4d3c2c 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h @@ -9,12 +9,16 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVESYMBOLENUMERATOR_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVESYMBOLENUMERATOR_H -#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" namespace llvm { + +class raw_ostream; namespace pdb { +class NativeSession; class NativeTypeEnum; class NativeSymbolEnumerator : public NativeRawSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h index 2068c88fc74a..429c06f29ac7 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h @@ -10,12 +10,14 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEENUM_H #include "llvm/ADT/Optional.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" namespace llvm { +class raw_ostream; namespace pdb { class NativeTypeBuiltin; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h index 90b5d8068959..47ea722313c3 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h @@ -9,17 +9,15 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H -#include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" namespace llvm { namespace pdb { -class NativeTypeUDT; - class NativeTypeFunctionSig : public NativeRawSymbol { protected: void initialize() override; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h index 7a3dfaecefeb..1f357754ac0f 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h @@ -10,10 +10,11 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEPOINTER_H #include "llvm/ADT/Optional.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" namespace llvm { namespace pdb { diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h index 292fc48e7b6d..ce4ebcd00c4a 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h @@ -9,14 +9,19 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPETYPEDEF_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPETYPEDEF_H -#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" namespace llvm { + +class raw_ostream; + namespace pdb { +class NativeSession; + class NativeTypeTypedef : public NativeRawSymbol { public: // Create a pointer record for a non-simple type. diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h index e1b31a256c12..a1dd39c0b4be 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h @@ -10,13 +10,17 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEUDT_H #include "llvm/ADT/Optional.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" namespace llvm { + +class raw_ostream; namespace pdb { +class NativeSession; class NativeTypeUDT : public NativeRawSymbol { public: diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h index 21995ca665c1..92d51706c1da 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h @@ -9,13 +9,15 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H -#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" namespace llvm { namespace pdb { +class NativeSession; class NativeTypeVTShape : public NativeRawSymbol { public: diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h index c5ee73280c46..1ea92ed4bf21 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h @@ -9,14 +9,12 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_PDBFILE_H #define LLVM_DEBUGINFO_PDB_NATIVE_PDBFILE_H -#include "llvm/ADT/DenseMap.h" #include "llvm/DebugInfo/MSF/IMSFFile.h" #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" -#include "llvm/Support/MathExtras.h" #include diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h index 004d005280d4..c23d958f8ed0 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h @@ -9,24 +9,28 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_PDBFILEBUILDER_H #define LLVM_DEBUGINFO_PDB_NATIVE_PDBFILEBUILDER_H -#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/PDB/Native/HashTable.h" #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" -#include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" -#include "llvm/DebugInfo/PDB/Native/RawConstants.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" #include namespace llvm { +class WritableBinaryStream; +namespace codeview { +struct GUID; +} + namespace msf { class MSFBuilder; +struct MSFLayout; } namespace pdb { +struct SrcHeaderBlockEntry; class DbiStreamBuilder; class InfoStreamBuilder; class GSIStreamBuilder; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h index 5cb749c8a747..4336cd398baf 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h @@ -9,11 +9,9 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_PDBSTRINGTABLE_H #define LLVM_DEBUGINFO_PDB_NATIVE_PDBSTRINGTABLE_H -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" #include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include @@ -21,10 +19,6 @@ namespace llvm { class BinaryStreamReader; -namespace msf { -class MappedBlockStream; -} - namespace pdb { struct PDBStringTableHeader; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h index bf6da3ea2920..a59a752ff911 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h @@ -9,20 +9,17 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_PUBLICSSTREAM_H #define LLVM_DEBUGINFO_PDB_NATIVE_PUBLICSSTREAM_H -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" -#include "llvm/DebugInfo/PDB/Native/RawConstants.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/Error.h" namespace llvm { +namespace msf { +class MappedBlockStream; +} namespace pdb { -class DbiStream; -struct GSIHashHeader; -class PDBFile; +struct PublicsStreamHeader; +struct SectionOffset; class PublicsStream { public: diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h b/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h index 1ff6ca173b2b..7c5b6b9e1bdf 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h @@ -10,23 +10,29 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_SYMBOLCACHE_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/IntervalMap.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/Line.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeSourceFile.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" #include #include namespace llvm { +namespace codeview { +class InlineSiteSym; +struct FileChecksumEntry; +} // namespace codeview namespace pdb { +class IPDBSourceFile; +class NativeSession; +class PDBSymbol; +class PDBSymbolCompiland; class DbiStream; -class PDBFile; class SymbolCache { NativeSession &Session; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/SymbolStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/SymbolStream.h index 839cc8d2c503..c2f7eb04d16e 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/SymbolStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/SymbolStream.h @@ -9,7 +9,7 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_SYMBOLSTREAM_H #define LLVM_DEBUGINFO_PDB_NATIVE_SYMBOLSTREAM_H -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/Support/Error.h" @@ -18,7 +18,6 @@ namespace msf { class MappedBlockStream; } namespace pdb { -class PDBFile; class SymbolStream { public: @@ -41,7 +40,7 @@ private: codeview::CVSymbolArray SymbolRecords; std::unique_ptr Stream; }; -} +} // namespace pdb } #endif diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h index e49d58af4421..4c413abb2bf0 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h @@ -12,22 +12,23 @@ #include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/PDB/Native/HashTable.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamRef.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Error.h" namespace llvm { +class BinaryStream; namespace codeview { +class TypeIndex; +struct TypeIndexOffset; class LazyRandomTypeCollection; } namespace msf { class MappedBlockStream; } namespace pdb { +struct TpiStreamHeader; class PDBFile; class TpiStream { diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h index f18d38ae0b31..9f320358144c 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h @@ -10,12 +10,10 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_TPISTREAMBUILDER_H #include "llvm/ADT/Optional.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/BinaryByteStream.h" -#include "llvm/Support/BinaryItemStream.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Error.h" @@ -23,7 +21,7 @@ namespace llvm { class BinaryByteStream; -class WritableBinaryStreamRef; +template struct BinaryItemTraits; template <> struct BinaryItemTraits { static size_t length(const codeview::CVType &Item) { return Item.length(); } @@ -32,16 +30,11 @@ template <> struct BinaryItemTraits { } }; -namespace codeview { -class TypeRecord; -} namespace msf { class MSFBuilder; struct MSFLayout; } namespace pdb { -class PDBFile; -class TpiStream; struct TpiStreamHeader; class TpiStreamBuilder { @@ -88,7 +81,7 @@ private: const TpiStreamHeader *Header; uint32_t Idx; }; -} +} // namespace pdb } #endif diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBContext.h b/llvm/include/llvm/DebugInfo/PDB/PDBContext.h index 7b6793f0a639..3163c0a1dae0 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBContext.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBContext.h @@ -45,6 +45,8 @@ namespace pdb { DILineInfo getLineInfoForAddress( object::SectionedAddress Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; + DILineInfo + getLineInfoForDataAddress(object::SectionedAddress Address) override; DILineInfoTable getLineInfoForAddressRange( object::SectionedAddress Address, uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h index 24cf1e459f92..4e34b75b6117 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h @@ -9,11 +9,9 @@ #ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOL_H #define LLVM_DEBUGINFO_PDB_PDBSYMBOL_H -#include "ConcreteSymbolEnumerator.h" #include "IPDBRawSymbol.h" #include "PDBExtras.h" #include "PDBTypes.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Support/Casting.h" #define FORWARD_SYMBOL_METHOD(MethodName) \ @@ -43,6 +41,9 @@ class raw_ostream; namespace pdb { class IPDBSession; +class PDBSymDumper; +class PDBSymbol; +template class ConcreteSymbolEnumerator; #define DECLARE_PDB_SYMBOL_CONCRETE_TYPE(TagValue) \ private: \ diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h index c76466a97b66..c8d3d0b7bb96 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h @@ -13,7 +13,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolAnnotation : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h index cf471450d989..09142227b017 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h @@ -13,8 +13,6 @@ namespace llvm { -class raw_ostream; - namespace pdb { class PDBSymbolBlock : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h index dbd8ba5a63ff..46c159268533 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolCompilandDetails : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h index 61607a03593d..cba082f2ff19 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolCompilandEnv : public PDBSymbol { DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::CompilandEnv) diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h index 75a86411643a..c78b47ce9924 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h @@ -15,8 +15,6 @@ namespace llvm { -class raw_ostream; - namespace pdb { /// PDBSymbolCustom represents symbols that are compiler-specific and do not /// fit anywhere else in the lexical hierarchy. diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h index 7e9b69d7cf4b..61e67d1368a8 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h @@ -9,16 +9,16 @@ #ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLDATA_H #define LLVM_DEBUGINFO_PDB_PDBSYMBOLDATA_H -#include "IPDBLineNumber.h" #include "PDBSymbol.h" #include "PDBTypes.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" namespace llvm { -class raw_ostream; - namespace pdb { +class PDBSymDumper; + class PDBSymbolData : public PDBSymbol { DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Data) public: diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h index f50057c68406..bfc7f7689718 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h @@ -9,17 +9,20 @@ #ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLFUNC_H #define LLVM_DEBUGINFO_PDB_PDBSYMBOLFUNC_H -#include "IPDBLineNumber.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" + #include "PDBSymbol.h" -#include "PDBSymbolTypeFunctionSig.h" #include "PDBTypes.h" namespace llvm { -class raw_ostream; - namespace pdb { +class PDBSymDumper; +class PDBSymbolData; +class PDBSymbolTypeFunctionSig; +template class IPDBEnumChildren; + class PDBSymbolFunc : public PDBSymbol { DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Function) public: diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h index 1cdc1811bb1a..09c6f4728960 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h @@ -14,8 +14,6 @@ namespace llvm { -class raw_ostream; - namespace pdb { class PDBSymbolFuncDebugEnd : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h index 021f27c7f0f7..843a8348a2f0 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolFuncDebugStart : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h index 33eb36696cc2..148802a47cbc 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolLabel : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h index f8dcb2ba9d5f..a757cc02624b 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolPublicSymbol : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h index a5f795cc1303..2b81a63995e6 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolThunk : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h index d4cd6e71423e..496141e5fa68 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolTypeArray : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h index bd2dbc914725..c74ac3fb9cce 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h @@ -12,14 +12,14 @@ #include "PDBSymbol.h" #include "PDBTypes.h" -#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" -#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" namespace llvm { -class raw_ostream; namespace pdb { +class PDBSymDumper; + class PDBSymbolTypeBaseClass : public PDBSymbol { DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::BaseClass) public: diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h index df6309b1545c..b923983095f3 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolTypeBuiltin : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h index 7bf0317ff1ca..b15abf7bedfd 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolTypeCustom : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h index 5d742237bac4..e7570b41dd21 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolTypeDimension : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h index 0aab91039509..ee1f736c17a0 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h @@ -9,16 +9,18 @@ #ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEENUM_H #define LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEENUM_H -#include "IPDBLineNumber.h" #include "PDBSymbol.h" -#include "PDBSymbolTypeBuiltin.h" #include "PDBTypes.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" + namespace llvm { -class raw_ostream; namespace pdb { +class PDBSymDumper; +class PDBSymbolTypeBuiltin; + class PDBSymbolTypeEnum : public PDBSymbol { DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Enum) public: diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h index d56a90662dae..9fde42116261 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolTypeFriend : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h index 559ceec5aace..71decff722a5 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolTypeFunctionArg : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h index 5e7b83ce8004..866bf520a3b2 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolTypeManaged : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h index da25eab50f9b..1b43ef9a21bd 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolTypePointer : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h index 8dc29ca26192..3f37730cf1df 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolTypeTypedef : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h index 3e73ad7ac85a..a3a49a4b619a 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h @@ -9,18 +9,17 @@ #ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEUDT_H #define LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEUDT_H -#include "IPDBLineNumber.h" -#include "IPDBSession.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" + #include "PDBSymbol.h" -#include "PDBSymbolTypeBaseClass.h" #include "PDBTypes.h" namespace llvm { -class raw_ostream; - namespace pdb { +class PDBSymDumper; + class PDBSymbolTypeUDT : public PDBSymbol { DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::UDT) public: diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h index d08728dafa76..6223bee98670 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolTypeVTable : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h index c7e2ac148503..bec0a9970a9f 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolTypeVTableShape : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h index 5b4909b800b9..a53af49bc9e0 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h @@ -13,7 +13,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolUnknown : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h index 19a8f414eb43..dde25a023d00 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h @@ -14,7 +14,6 @@ namespace llvm { -class raw_ostream; namespace pdb { class PDBSymbolUsingNamespace : public PDBSymbol { diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h b/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h index e7c2ded1bee1..b6a794ad7e76 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h @@ -352,7 +352,8 @@ enum class PDB_BuiltinType { BSTR = 30, HResult = 31, Char16 = 32, - Char32 = 33 + Char32 = 33, + Char8 = 34, }; /// These values correspond to the flags that can be combined to control the diff --git a/llvm/include/llvm/DebugInfo/PDB/UDTLayout.h b/llvm/include/llvm/DebugInfo/PDB/UDTLayout.h index c67b093b63c0..8631c412f114 100644 --- a/llvm/include/llvm/DebugInfo/PDB/UDTLayout.h +++ b/llvm/include/llvm/DebugInfo/PDB/UDTLayout.h @@ -18,7 +18,6 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" -#include "llvm/DebugInfo/PDB/PDBTypes.h" #include #include #include diff --git a/llvm/include/llvm/DebugInfo/Symbolize/DIFetcher.h b/llvm/include/llvm/DebugInfo/Symbolize/DIFetcher.h new file mode 100644 index 000000000000..c5340b5f0460 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/Symbolize/DIFetcher.h @@ -0,0 +1,51 @@ +//===-- llvm/DebugInfo/Symbolize/DIFetcher.h --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares a DIFetcher abstraction for obtaining debug info from an +/// arbitrary outside source. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_SYMBOLIZE_DIFETCHER_H +#define LLVM_DEBUGINFO_SYMBOLIZE_DIFETCHER_H + +#include +#include + +#include "llvm/ADT/ArrayRef.h" + +namespace llvm { +namespace symbolize { + +/// The DIFetcher interface provides arbitrary mechanisms for obtaining debug +/// info from an outside source. +class DIFetcher { +public: + virtual ~DIFetcher() = default; + virtual Optional + fetchBuildID(ArrayRef BuildID) const = 0; +}; + +/// LocalDIFetcher searches local cache directories for debug info. +class LocalDIFetcher : public DIFetcher { +public: + LocalDIFetcher(ArrayRef DebugFileDirectory) + : DebugFileDirectory(DebugFileDirectory){}; + virtual ~LocalDIFetcher() = default; + + Optional fetchBuildID(ArrayRef BuildID) const override; + +private: + const ArrayRef DebugFileDirectory; +}; + +} // end namespace symbolize +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_SYMBOLIZE_DIFETCHER_H diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h new file mode 100644 index 000000000000..2628b47cf6d3 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h @@ -0,0 +1,120 @@ +//===- Markup.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares the log symbolizer markup data model and parser. +/// +/// See https://llvm.org/docs/SymbolizerMarkupFormat.html +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H +#define LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H + +#include + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/Regex.h" + +namespace llvm { +namespace symbolize { + +/// A node of symbolizer markup. +/// +/// If only the Text field is set, this represents a region of text outside a +/// markup element. ANSI SGR control codes are also reported this way; if +/// detected, then the control code will be the entirety of the Text field, and +/// any surrounding text will be reported as preceding and following nodes. +struct MarkupNode { + /// The full text of this node in the input. + StringRef Text; + + /// If this represents an element, the tag. Otherwise, empty. + StringRef Tag; + + /// If this represents an element with fields, a list of the field contents. + /// Otherwise, empty. + SmallVector Fields; + + bool operator==(const MarkupNode &Other) const { + return Text == Other.Text && Tag == Other.Tag && Fields == Other.Fields; + } + bool operator!=(const MarkupNode &Other) const { return !(*this == Other); } +}; + +/// Parses a log containing symbolizer markup into a sequence of nodes. +class MarkupParser { +public: + MarkupParser(StringSet<> MultilineTags = {}); + + /// Parses an individual \p Line of input. + /// + /// Nodes from the previous parseLine() call that haven't yet been extracted + /// by nextNode() are discarded. The nodes returned by nextNode() may + /// reference the input string, so it must be retained by the caller until the + /// last use. + /// + /// Note that some elements may span multiple lines. If a line ends with the + /// start of one of these elements, then no nodes will be produced until the + /// either the end or something that cannot be part of an element is + /// encountered. This may only occur after multiple calls to parseLine(), + /// corresponding to the lines of the multi-line element. + void parseLine(StringRef Line); + + /// Inform the parser of that the input stream has ended. + /// + /// This allows the parser to finish any deferred processing (e.g., an + /// in-progress multi-line element) and may cause nextNode() to return + /// additional nodes. + void flush(); + + /// Returns the next node in the input sequence. + /// + /// Calling nextNode() may invalidate the contents of the node returned by the + /// previous call. + /// + /// \returns the next markup node or None if none remain. + Optional nextNode(); + +private: + Optional parseElement(StringRef Line); + void parseTextOutsideMarkup(StringRef Text); + Optional parseMultiLineBegin(StringRef Line); + Optional parseMultiLineEnd(StringRef Line); + + // Tags of elements that can span multiple lines. + const StringSet<> MultilineTags; + + // Contents of a multi-line element that has finished being parsed. Retained + // to keep returned StringRefs for the contents valid. + std::string FinishedMultiline; + + // Contents of a multi-line element that is still in the process of receiving + // lines. + std::string InProgressMultiline; + + // The line currently being parsed. + StringRef Line; + + // Buffer for nodes parsed from the current line. + SmallVector Buffer; + + // Next buffer index to return. + size_t NextIdx; + + // Regular expression matching supported ANSI SGR escape sequences. + const Regex SGRSyntax; +}; + +} // end namespace symbolize +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H diff --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h new file mode 100644 index 000000000000..b7d70ccafe66 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h @@ -0,0 +1,76 @@ +//===- MarkupFilter.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares a filter that replaces symbolizer markup with +/// human-readable expressions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H +#define LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H + +#include "Markup.h" + +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace symbolize { + +/// Filter to convert parsed log symbolizer markup elements into human-readable +/// text. +class MarkupFilter { +public: + MarkupFilter(raw_ostream &OS, Optional ColorsEnabled = llvm::None); + + /// Begins a logical \p Line of markup. + /// + /// This must be called for each line of the input stream before calls to + /// filter() for elements of that line. The provided \p Line must be the same + /// one that was passed to parseLine() to produce the elements to be later + /// passed to filter(). + /// + /// This informs the filter that a new line is beginning and establishes a + /// context for error location reporting. + void beginLine(StringRef Line); + + /// Handle a \p Node of symbolizer markup. + /// + /// If the node is a recognized, valid markup element, it is replaced with a + /// human-readable string. If the node isn't an element or the element isn't + /// recognized, it is output verbatim. If the element is recognized but isn't + /// valid, it is omitted from the output. + void filter(const MarkupNode &Node); + +private: + bool trySGR(const MarkupNode &Node); + + void highlight(); + void restoreColor(); + void resetColor(); + + bool checkTag(const MarkupNode &Node) const; + bool checkNumFields(const MarkupNode &Node, size_t Size) const; + + void reportTypeError(StringRef Str, StringRef TypeName) const; + void reportLocation(StringRef::iterator Loc) const; + + raw_ostream &OS; + const bool ColorsEnabled; + + StringRef Line; + + Optional Color; + bool Bold = false; +}; + +} // end namespace symbolize +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H diff --git a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h new file mode 100644 index 000000000000..075dbe3e0e37 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h @@ -0,0 +1,103 @@ +//===- SymbolizableObjectFile.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the SymbolizableObjectFile class. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H +#define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include +#include + +namespace llvm { + +class DataExtractor; + +namespace symbolize { + +class SymbolizableObjectFile : public SymbolizableModule { +public: + static Expected> + create(const object::ObjectFile *Obj, std::unique_ptr DICtx, + bool UntagAddresses); + + DILineInfo symbolizeCode(object::SectionedAddress ModuleOffset, + DILineInfoSpecifier LineInfoSpecifier, + bool UseSymbolTable) const override; + DIInliningInfo symbolizeInlinedCode(object::SectionedAddress ModuleOffset, + DILineInfoSpecifier LineInfoSpecifier, + bool UseSymbolTable) const override; + DIGlobal symbolizeData(object::SectionedAddress ModuleOffset) const override; + std::vector + symbolizeFrame(object::SectionedAddress ModuleOffset) const override; + + // Return true if this is a 32-bit x86 PE COFF module. + bool isWin32Module() const override; + + // Returns the preferred base of the module, i.e. where the loader would place + // it in memory assuming there were no conflicts. + uint64_t getModulePreferredBase() const override; + +private: + bool shouldOverrideWithSymbolTable(FunctionNameKind FNKind, + bool UseSymbolTable) const; + + bool getNameFromSymbolTable(uint64_t Address, std::string &Name, + uint64_t &Addr, uint64_t &Size, + std::string &FileName) const; + // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd + // (function descriptor) section and OpdExtractor refers to its contents. + Error addSymbol(const object::SymbolRef &Symbol, uint64_t SymbolSize, + DataExtractor *OpdExtractor = nullptr, + uint64_t OpdAddress = 0); + Error addCoffExportSymbols(const object::COFFObjectFile *CoffObj); + + /// Search for the first occurence of specified Address in ObjectFile. + uint64_t getModuleSectionIndexForAddress(uint64_t Address) const; + + const object::ObjectFile *Module; + std::unique_ptr DebugInfoContext; + bool UntagAddresses; + + struct SymbolDesc { + uint64_t Addr; + // If size is 0, assume that symbol occupies the whole memory range up to + // the following symbol. + uint64_t Size; + + StringRef Name; + // Non-zero if this is an ELF local symbol. See the comment in + // getNameFromSymbolTable. + uint32_t ELFLocalSymIdx; + + bool operator<(const SymbolDesc &RHS) const { + return Addr != RHS.Addr ? Addr < RHS.Addr : Size < RHS.Size; + } + }; + std::vector Symbols; + // (index, filename) pairs of ELF STT_FILE symbols. + std::vector> FileSymbols; + + SymbolizableObjectFile(const object::ObjectFile *Obj, + std::unique_ptr DICtx, + bool UntagAddresses); +}; + +} // end namespace symbolize + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h index 4ec333422c4b..00c4bf0a615f 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -13,10 +13,12 @@ #ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H #define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H -#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/simple_ilist.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/Symbolize/DIFetcher.h" #include "llvm/Object/Binary.h" -#include "llvm/Object/ELFObjectFile.h" -#include "llvm/Object/ObjectFile.h" #include "llvm/Support/Error.h" #include #include @@ -27,13 +29,24 @@ #include namespace llvm { +namespace object { +class ELFObjectFileBase; +class MachOObjectFile; +class ObjectFile; +struct SectionedAddress; +} // namespace object + namespace symbolize { +class SymbolizableModule; + using namespace object; using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind; using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind; +class CachedBinary; + class LLVMSymbolizer { public: struct Options { @@ -49,40 +62,63 @@ public: std::string FallbackDebugPath; std::string DWPName; std::vector DebugFileDirectory; + size_t MaxCacheSize = + sizeof(size_t) == 4 + ? 512 * 1024 * 1024 /* 512 MiB */ + : static_cast(4ULL * 1024 * 1024 * 1024) /* 4 GiB */; }; - LLVMSymbolizer() = default; - LLVMSymbolizer(const Options &Opts) : Opts(Opts) {} + LLVMSymbolizer(); + LLVMSymbolizer(const Options &Opts); - ~LLVMSymbolizer() { flush(); } + ~LLVMSymbolizer(); // Overloads accepting ObjectFile does not support COFF currently Expected symbolizeCode(const ObjectFile &Obj, object::SectionedAddress ModuleOffset); Expected symbolizeCode(const std::string &ModuleName, object::SectionedAddress ModuleOffset); + Expected symbolizeCode(ArrayRef BuildID, + object::SectionedAddress ModuleOffset); Expected symbolizeInlinedCode(const ObjectFile &Obj, object::SectionedAddress ModuleOffset); Expected symbolizeInlinedCode(const std::string &ModuleName, object::SectionedAddress ModuleOffset); + Expected + symbolizeInlinedCode(ArrayRef BuildID, + object::SectionedAddress ModuleOffset); Expected symbolizeData(const ObjectFile &Obj, object::SectionedAddress ModuleOffset); Expected symbolizeData(const std::string &ModuleName, object::SectionedAddress ModuleOffset); + Expected symbolizeData(ArrayRef BuildID, + object::SectionedAddress ModuleOffset); Expected> symbolizeFrame(const ObjectFile &Obj, object::SectionedAddress ModuleOffset); Expected> symbolizeFrame(const std::string &ModuleName, object::SectionedAddress ModuleOffset); + Expected> + symbolizeFrame(ArrayRef BuildID, + object::SectionedAddress ModuleOffset); void flush(); + // Evict entries from the binary cache until it is under the maximum size + // given in the options. Calling this invalidates references in the DI... + // objects returned by the methods above. + void pruneCache(); + static std::string DemangleName(const std::string &Name, const SymbolizableModule *DbiModuleDescriptor); + void addDIFetcher(std::unique_ptr Fetcher) { + DIFetchers.push_back(std::move(Fetcher)); + } + private: // Bundles together object file with code/data and object file with // corresponding debug info. These objects can be the same. @@ -112,6 +148,12 @@ private: getOrCreateModuleInfo(const std::string &ModuleName); Expected getOrCreateModuleInfo(const ObjectFile &Obj); + /// Returns a SymbolizableModule or an error if loading debug info failed. + /// Unlike the above, errors are reported each time, since they are more + /// likely to be transient. + Expected + getOrCreateModuleInfo(ArrayRef BuildID); + Expected createModuleInfo(const ObjectFile *Obj, std::unique_ptr Context, StringRef ModuleName); @@ -126,6 +168,13 @@ private: const ELFObjectFileBase *Obj, const std::string &ArchName); + bool findDebugBinary(const std::string &OrigPath, + const std::string &DebuglinkName, uint32_t CRCHash, + std::string &Result); + + bool getOrFindDebugBinary(const ArrayRef BuildID, + std::string &Result); + /// Returns pair of pointers to object and debug object. Expected getOrCreateObjectPair(const std::string &Path, const std::string &ArchName); @@ -136,15 +185,24 @@ private: Expected getOrCreateObject(const std::string &Path, const std::string &ArchName); + /// Update the LRU cache order when a binary is accessed. + void recordAccess(CachedBinary &Bin); + std::map, std::less<>> Modules; + StringMap BuildIDPaths; /// Contains cached results of getOrCreateObjectPair(). std::map, ObjectPair> ObjectPairForPathArch; /// Contains parsed binary for each path, or parsing error. - std::map> BinaryForPath; + std::map BinaryForPath; + + /// A list of cached binaries in LRU order. + simple_ilist LRUBinaries; + /// Sum of the sizes of the cached binaries. + size_t CacheSize = 0; /// Parsed object file for path/architecture pair, where "path" refers /// to Mach-O universal binary. @@ -152,6 +210,37 @@ private: ObjectForUBPathAndArch; Options Opts; + + SmallVector> DIFetchers; +}; + +// A binary intrusively linked into a LRU cache list. If the binary is empty, +// then the entry marks that an error occurred, and it is not part of the LRU +// list. +class CachedBinary : public ilist_node { +public: + CachedBinary() = default; + CachedBinary(OwningBinary Bin) : Bin(std::move(Bin)) {} + + OwningBinary &operator*() { return Bin; } + OwningBinary *operator->() { return &Bin; } + + // Add an action to be performed when the binary is evicted, before all + // previously registered evictors. + void pushEvictor(std::function Evictor); + + // Run all registered evictors in the reverse of the order in which they were + // added. + void evict() { + if (Evictor) + Evictor(); + } + + size_t size() { return Bin.getBinary()->getData().size(); } + +private: + OwningBinary Bin; + std::function Evictor; }; } // end namespace symbolize diff --git a/llvm/include/llvm/Debuginfod/DIFetcher.h b/llvm/include/llvm/Debuginfod/DIFetcher.h new file mode 100644 index 000000000000..d398fd900051 --- /dev/null +++ b/llvm/include/llvm/Debuginfod/DIFetcher.h @@ -0,0 +1,34 @@ +//===- llvm/DebugInfod/DIFetcher.h - Debug info fetcher----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares a DIFetcher implementation for obtaining debug info from +/// debuginfod. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFOD_DIFETCHER_H +#define LLVM_DEBUGINFOD_DIFETCHER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/Symbolize/DIFetcher.h" + +namespace llvm { + +class DebuginfodDIFetcher : public symbolize::DIFetcher { +public: + virtual ~DebuginfodDIFetcher() = default; + + /// Fetches the given Build ID using debuginfod and returns a local path to + /// the resulting debug binary. + Optional fetchBuildID(ArrayRef BuildID) const override; +}; + +} // namespace llvm + +#endif // LLVM_DEBUGINFOD_DIFETCHER_H diff --git a/llvm/include/llvm/Debuginfod/HTTPClient.h b/llvm/include/llvm/Debuginfod/HTTPClient.h index ca3b76ca9f3f..6c94961032e7 100644 --- a/llvm/include/llvm/Debuginfod/HTTPClient.h +++ b/llvm/include/llvm/Debuginfod/HTTPClient.h @@ -7,9 +7,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file contains the declarations of the HTTPClient, HTTPMethod, -/// HTTPResponseHandler, and BufferedHTTPResponseHandler classes, as well as -/// the HTTPResponseBuffer and HTTPRequest structs. +/// This file contains the declarations of the HTTPClient library for issuing +/// HTTP requests and handling the responses. /// //===----------------------------------------------------------------------===// @@ -40,43 +39,13 @@ bool operator==(const HTTPRequest &A, const HTTPRequest &B); /// of its methods. class HTTPResponseHandler { public: - /// Processes one line of HTTP response headers. - virtual Error handleHeaderLine(StringRef HeaderLine) = 0; - /// Processes an additional chunk of bytes of the HTTP response body. virtual Error handleBodyChunk(StringRef BodyChunk) = 0; - /// Processes the HTTP response status code. - virtual Error handleStatusCode(unsigned Code) = 0; - protected: ~HTTPResponseHandler(); }; -/// An HTTP response status code bundled with a buffer to store the body. -struct HTTPResponseBuffer { - unsigned Code = 0; - std::unique_ptr Body; -}; - -/// A simple handler which writes returned data to an HTTPResponseBuffer. -/// Ignores all headers except the Content-Length, which it uses to -/// allocate an appropriately-sized Body buffer. -class BufferedHTTPResponseHandler final : public HTTPResponseHandler { - size_t Offset = 0; - -public: - /// Stores the data received from the HTTP server. - HTTPResponseBuffer ResponseBuffer; - - /// These callbacks store the body and status code in an HTTPResponseBuffer - /// allocated based on Content-Length. The Content-Length header must be - /// handled by handleHeaderLine before any calls to handleBodyChunk. - Error handleHeaderLine(StringRef HeaderLine) override; - Error handleBodyChunk(StringRef BodyChunk) override; - Error handleStatusCode(unsigned Code) override; -}; - /// A reusable client that can perform HTTPRequests through a network socket. class HTTPClient { #ifdef LLVM_ENABLE_CURL @@ -107,13 +76,8 @@ public: /// Handler method. Error perform(const HTTPRequest &Request, HTTPResponseHandler &Handler); - /// Performs the Request with the default BufferedHTTPResponseHandler, and - /// returns its HTTPResponseBuffer or an Error. - Expected perform(const HTTPRequest &Request); - - /// Performs an HTTPRequest with the default configuration to make a GET - /// request to the given Url. Returns an HTTPResponseBuffer or an Error. - Expected get(StringRef Url); + /// Returns the last received response code or zero if none. + unsigned responseCode(); }; } // end namespace llvm diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h index 3150e049320b..6133d0b95bbf 100644 --- a/llvm/include/llvm/Demangle/Demangle.h +++ b/llvm/include/llvm/Demangle/Demangle.h @@ -57,8 +57,8 @@ char *microsoftDemangle(const char *mangled_name, size_t *n_read, char *buf, size_t *n_buf, int *status, MSDemangleFlags Flags = MSDF_None); -// Demangles a Rust v0 mangled symbol. The API follows that of __cxa_demangle. -char *rustDemangle(const char *MangledName, char *Buf, size_t *N, int *Status); +// Demangles a Rust v0 mangled symbol. +char *rustDemangle(const char *MangledName); // Demangles a D mangled symbol. char *dlangDemangle(const char *MangledName); diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 760319544a02..959632f13e1e 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -16,10 +16,6 @@ #ifndef DEMANGLE_ITANIUMDEMANGLE_H #define DEMANGLE_ITANIUMDEMANGLE_H -// FIXME: (possibly) incomplete list of features that clang mangles that this -// file does not yet support: -// - C++ modules TS - #include "DemangleConfig.h" #include "StringView.h" #include "Utility.h" @@ -32,85 +28,6 @@ #include #include -#define FOR_EACH_NODE_KIND(X) \ - X(NodeArrayNode) \ - X(DotSuffix) \ - X(VendorExtQualType) \ - X(QualType) \ - X(ConversionOperatorType) \ - X(PostfixQualifiedType) \ - X(ElaboratedTypeSpefType) \ - X(NameType) \ - X(AbiTagAttr) \ - X(EnableIfAttr) \ - X(ObjCProtoName) \ - X(PointerType) \ - X(ReferenceType) \ - X(PointerToMemberType) \ - X(ArrayType) \ - X(FunctionType) \ - X(NoexceptSpec) \ - X(DynamicExceptionSpec) \ - X(FunctionEncoding) \ - X(LiteralOperator) \ - X(SpecialName) \ - X(CtorVtableSpecialName) \ - X(QualifiedName) \ - X(NestedName) \ - X(LocalName) \ - X(VectorType) \ - X(PixelVectorType) \ - X(BinaryFPType) \ - X(SyntheticTemplateParamName) \ - X(TypeTemplateParamDecl) \ - X(NonTypeTemplateParamDecl) \ - X(TemplateTemplateParamDecl) \ - X(TemplateParamPackDecl) \ - X(ParameterPack) \ - X(TemplateArgumentPack) \ - X(ParameterPackExpansion) \ - X(TemplateArgs) \ - X(ForwardTemplateReference) \ - X(NameWithTemplateArgs) \ - X(GlobalQualifiedName) \ - X(StdQualifiedName) \ - X(ExpandedSpecialSubstitution) \ - X(SpecialSubstitution) \ - X(CtorDtorName) \ - X(DtorName) \ - X(UnnamedTypeName) \ - X(ClosureTypeName) \ - X(StructuredBindingName) \ - X(BinaryExpr) \ - X(ArraySubscriptExpr) \ - X(PostfixExpr) \ - X(ConditionalExpr) \ - X(MemberExpr) \ - X(SubobjectExpr) \ - X(EnclosingExpr) \ - X(CastExpr) \ - X(SizeofParamPackExpr) \ - X(CallExpr) \ - X(NewExpr) \ - X(DeleteExpr) \ - X(PrefixExpr) \ - X(FunctionParam) \ - X(ConversionExpr) \ - X(PointerToMemberConversionExpr) \ - X(InitListExpr) \ - X(FoldExpr) \ - X(ThrowExpr) \ - X(BoolExpr) \ - X(StringLiteral) \ - X(LambdaExpr) \ - X(EnumLiteral) \ - X(IntegerLiteral) \ - X(FloatLiteral) \ - X(DoubleLiteral) \ - X(LongDoubleLiteral) \ - X(BracedExpr) \ - X(BracedRangeExpr) - DEMANGLE_NAMESPACE_BEGIN template class PODSmallVector { @@ -238,37 +155,68 @@ public: class Node { public: enum Kind : unsigned char { -#define ENUMERATOR(NodeKind) K ## NodeKind, - FOR_EACH_NODE_KIND(ENUMERATOR) -#undef ENUMERATOR +#define NODE(NodeKind) K##NodeKind, +#include "ItaniumNodes.def" }; /// Three-way bool to track a cached value. Unknown is possible if this node /// has an unexpanded parameter pack below it that may affect this cache. enum class Cache : unsigned char { Yes, No, Unknown, }; + /// Operator precedence for expression nodes. Used to determine required + /// parens in expression emission. + enum class Prec { + Primary, + Postfix, + Unary, + Cast, + PtrMem, + Multiplicative, + Additive, + Shift, + Spaceship, + Relational, + Equality, + And, + Xor, + Ior, + AndIf, + OrIf, + Conditional, + Assign, + Comma, + Default, + }; + private: Kind K; + Prec Precedence : 6; + // FIXME: Make these protected. public: /// Tracks if this node has a component on its right side, in which case we /// need to call printRight. - Cache RHSComponentCache; + Cache RHSComponentCache : 2; /// Track if this node is a (possibly qualified) array type. This can affect /// how we format the output string. - Cache ArrayCache; + Cache ArrayCache : 2; /// Track if this node is a (possibly qualified) function type. This can /// affect how we format the output string. - Cache FunctionCache; + Cache FunctionCache : 2; public: - Node(Kind K_, Cache RHSComponentCache_ = Cache::No, - Cache ArrayCache_ = Cache::No, Cache FunctionCache_ = Cache::No) - : K(K_), RHSComponentCache(RHSComponentCache_), ArrayCache(ArrayCache_), - FunctionCache(FunctionCache_) {} + Node(Kind K_, Prec Precedence_ = Prec::Primary, + Cache RHSComponentCache_ = Cache::No, Cache ArrayCache_ = Cache::No, + Cache FunctionCache_ = Cache::No) + : K(K_), Precedence(Precedence_), RHSComponentCache(RHSComponentCache_), + ArrayCache(ArrayCache_), FunctionCache(FunctionCache_) {} + Node(Kind K_, Cache RHSComponentCache_, Cache ArrayCache_ = Cache::No, + Cache FunctionCache_ = Cache::No) + : Node(K_, Prec::Primary, RHSComponentCache_, ArrayCache_, + FunctionCache_) {} /// Visit the most-derived object corresponding to this object. template void visit(Fn F) const; @@ -299,6 +247,8 @@ public: Kind getKind() const { return K; } + Prec getPrecedence() const { return Precedence; } + virtual bool hasRHSComponentSlow(OutputBuffer &) const { return false; } virtual bool hasArraySlow(OutputBuffer &) const { return false; } virtual bool hasFunctionSlow(OutputBuffer &) const { return false; } @@ -307,6 +257,19 @@ public: // get at a node that actually represents some concrete syntax. virtual const Node *getSyntaxNode(OutputBuffer &) const { return this; } + // Print this node as an expression operand, surrounding it in parentheses if + // its precedence is [Strictly] weaker than P. + void printAsOperand(OutputBuffer &OB, Prec P = Prec::Default, + bool StrictlyWorse = false) const { + bool Paren = + unsigned(getPrecedence()) >= unsigned(P) + unsigned(StrictlyWorse); + if (Paren) + OB.printOpen(); + print(OB); + if (Paren) + OB.printClose(); + } + void print(OutputBuffer &OB) const { printLeft(OB); if (RHSComponentCache != Cache::No) @@ -356,7 +319,7 @@ public: if (!FirstElement) OB += ", "; size_t AfterComma = OB.getCurrentPosition(); - Elements[Idx]->print(OB); + Elements[Idx]->printAsOperand(OB, Node::Prec::Comma); // Elements[Idx] is an empty parameter pack expansion, we should erase the // comma we just printed. @@ -494,7 +457,7 @@ class PostfixQualifiedType final : public Node { const StringView Postfix; public: - PostfixQualifiedType(Node *Ty_, StringView Postfix_) + PostfixQualifiedType(const Node *Ty_, StringView Postfix_) : Node(KPostfixQualifiedType), Ty(Ty_), Postfix(Postfix_) {} template void match(Fn F) const { F(Ty, Postfix); } @@ -519,6 +482,26 @@ public: void printLeft(OutputBuffer &OB) const override { OB += Name; } }; +class BitIntType final : public Node { + const Node *Size; + bool Signed; + +public: + BitIntType(const Node *Size_, bool Signed_) + : Node(KBitIntType), Size(Size_), Signed(Signed_) {} + + template void match(Fn F) const { F(Size, Signed); } + + void printLeft(OutputBuffer &OB) const override { + if (!Signed) + OB += "unsigned "; + OB += "_BitInt"; + OB.printOpen(); + Size->printAsOperand(OB); + OB.printClose(); + } +}; + class ElaboratedTypeSpefType : public Node { StringView Kind; Node *Child; @@ -693,7 +676,7 @@ public: void printLeft(OutputBuffer &OB) const override { if (Printing) return; - SwapAndRestore SavePrinting(Printing, true); + ScopedOverride SavePrinting(Printing, true); std::pair Collapsed = collapse(OB); if (!Collapsed.second) return; @@ -708,7 +691,7 @@ public: void printRight(OutputBuffer &OB) const override { if (Printing) return; - SwapAndRestore SavePrinting(Printing, true); + ScopedOverride SavePrinting(Printing, true); std::pair Collapsed = collapse(OB); if (!Collapsed.second) return; @@ -815,9 +798,9 @@ public: } void printRight(OutputBuffer &OB) const override { - OB += "("; + OB.printOpen(); Params.printWithComma(OB); - OB += ")"; + OB.printClose(); Ret->printRight(OB); if (CVQuals & QualConst) @@ -847,9 +830,10 @@ public: template void match(Fn F) const { F(E); } void printLeft(OutputBuffer &OB) const override { - OB += "noexcept("; - E->print(OB); - OB += ")"; + OB += "noexcept"; + OB.printOpen(); + E->printAsOperand(OB); + OB.printClose(); } }; @@ -862,9 +846,10 @@ public: template void match(Fn F) const { F(Types); } void printLeft(OutputBuffer &OB) const override { - OB += "throw("; + OB += "throw"; + OB.printOpen(); Types.printWithComma(OB); - OB += ')'; + OB.printClose(); } }; @@ -910,9 +895,9 @@ public: } void printRight(OutputBuffer &OB) const override { - OB += "("; + OB.printOpen(); Params.printWithComma(OB); - OB += ")"; + OB.printClose(); if (Ret) Ret->printRight(OB); @@ -1001,6 +986,46 @@ struct NestedName : Node { } }; +struct ModuleName : Node { + ModuleName *Parent; + Node *Name; + bool IsPartition; + + ModuleName(ModuleName *Parent_, Node *Name_, bool IsPartition_ = false) + : Node(KModuleName), Parent(Parent_), Name(Name_), + IsPartition(IsPartition_) {} + + template void match(Fn F) const { + F(Parent, Name, IsPartition); + } + + void printLeft(OutputBuffer &OB) const override { + if (Parent) + Parent->print(OB); + if (Parent || IsPartition) + OB += IsPartition ? ':' : '.'; + Name->print(OB); + } +}; + +struct ModuleEntity : Node { + ModuleName *Module; + Node *Name; + + ModuleEntity(ModuleName *Module_, Node *Name_) + : Node(KModuleEntity), Module(Module_), Name(Name_) {} + + template void match(Fn F) const { F(Module, Name); } + + StringView getBaseName() const override { return Name->getBaseName(); } + + void printLeft(OutputBuffer &OB) const override { + Name->print(OB); + OB += '@'; + Module->print(OB); + } +}; + struct LocalName : Node { Node *Encoding; Node *Entity; @@ -1042,9 +1067,8 @@ class VectorType final : public Node { const Node *Dimension; public: - VectorType(const Node *BaseType_, Node *Dimension_) - : Node(KVectorType), BaseType(BaseType_), - Dimension(Dimension_) {} + VectorType(const Node *BaseType_, const Node *Dimension_) + : Node(KVectorType), BaseType(BaseType_), Dimension(Dimension_) {} template void match(Fn F) const { F(BaseType, Dimension); } @@ -1176,6 +1200,7 @@ public: template void match(Fn F) const { F(Name, Params); } void printLeft(OutputBuffer &OB) const override { + ScopedOverride LT(OB.GtIsGt, 0); OB += "template<"; Params.printWithComma(OB); OB += "> typename "; @@ -1311,8 +1336,8 @@ public: void printLeft(OutputBuffer &OB) const override { constexpr unsigned Max = std::numeric_limits::max(); - SwapAndRestore SavePackIdx(OB.CurrentPackIndex, Max); - SwapAndRestore SavePackMax(OB.CurrentPackMax, Max); + ScopedOverride SavePackIdx(OB.CurrentPackIndex, Max); + ScopedOverride SavePackMax(OB.CurrentPackMax, Max); size_t StreamPos = OB.getCurrentPosition(); // Print the first element in the pack. If Child contains a ParameterPack, @@ -1353,10 +1378,9 @@ public: NodeArray getParams() { return Params; } void printLeft(OutputBuffer &OB) const override { + ScopedOverride LT(OB.GtIsGt, 0); OB += "<"; Params.printWithComma(OB); - if (OB.back() == '>') - OB += " "; OB += ">"; } }; @@ -1402,38 +1426,38 @@ struct ForwardTemplateReference : Node { bool hasRHSComponentSlow(OutputBuffer &OB) const override { if (Printing) return false; - SwapAndRestore SavePrinting(Printing, true); + ScopedOverride SavePrinting(Printing, true); return Ref->hasRHSComponent(OB); } bool hasArraySlow(OutputBuffer &OB) const override { if (Printing) return false; - SwapAndRestore SavePrinting(Printing, true); + ScopedOverride SavePrinting(Printing, true); return Ref->hasArray(OB); } bool hasFunctionSlow(OutputBuffer &OB) const override { if (Printing) return false; - SwapAndRestore SavePrinting(Printing, true); + ScopedOverride SavePrinting(Printing, true); return Ref->hasFunction(OB); } const Node *getSyntaxNode(OutputBuffer &OB) const override { if (Printing) return this; - SwapAndRestore SavePrinting(Printing, true); + ScopedOverride SavePrinting(Printing, true); return Ref->getSyntaxNode(OB); } void printLeft(OutputBuffer &OB) const override { if (Printing) return; - SwapAndRestore SavePrinting(Printing, true); + ScopedOverride SavePrinting(Printing, true); Ref->printLeft(OB); } void printRight(OutputBuffer &OB) const override { if (Printing) return; - SwapAndRestore SavePrinting(Printing, true); + ScopedOverride SavePrinting(Printing, true); Ref->printRight(OB); } }; @@ -1473,21 +1497,6 @@ public: } }; -struct StdQualifiedName : Node { - Node *Child; - - StdQualifiedName(Node *Child_) : Node(KStdQualifiedName), Child(Child_) {} - - template void match(Fn F) const { F(Child); } - - StringView getBaseName() const override { return Child->getBaseName(); } - - void printLeft(OutputBuffer &OB) const override { - OB += "std::"; - Child->print(OB); - } -}; - enum class SpecialSubKind { allocator, basic_string, @@ -1497,15 +1506,25 @@ enum class SpecialSubKind { iostream, }; -class ExpandedSpecialSubstitution final : public Node { +class SpecialSubstitution; +class ExpandedSpecialSubstitution : public Node { +protected: SpecialSubKind SSK; + ExpandedSpecialSubstitution(SpecialSubKind SSK_, Kind K_) + : Node(K_), SSK(SSK_) {} public: ExpandedSpecialSubstitution(SpecialSubKind SSK_) - : Node(KExpandedSpecialSubstitution), SSK(SSK_) {} + : ExpandedSpecialSubstitution(SSK_, KExpandedSpecialSubstitution) {} + inline ExpandedSpecialSubstitution(SpecialSubstitution const *); template void match(Fn F) const { F(SSK); } +protected: + bool isInstantiation() const { + return unsigned(SSK) >= unsigned(SpecialSubKind::string); + } + StringView getBaseName() const override { switch (SSK) { case SpecialSubKind::allocator: @@ -1524,82 +1543,44 @@ public: DEMANGLE_UNREACHABLE; } +private: void printLeft(OutputBuffer &OB) const override { - switch (SSK) { - case SpecialSubKind::allocator: - OB += "std::allocator"; - break; - case SpecialSubKind::basic_string: - OB += "std::basic_string"; - break; - case SpecialSubKind::string: - OB += "std::basic_string, " - "std::allocator >"; - break; - case SpecialSubKind::istream: - OB += "std::basic_istream >"; - break; - case SpecialSubKind::ostream: - OB += "std::basic_ostream >"; - break; - case SpecialSubKind::iostream: - OB += "std::basic_iostream >"; - break; + OB << "std::" << getBaseName(); + if (isInstantiation()) { + OB << ""; + if (SSK == SpecialSubKind::string) + OB << ", std::allocator"; + OB << ">"; } } }; -class SpecialSubstitution final : public Node { +class SpecialSubstitution final : public ExpandedSpecialSubstitution { public: - SpecialSubKind SSK; - SpecialSubstitution(SpecialSubKind SSK_) - : Node(KSpecialSubstitution), SSK(SSK_) {} + : ExpandedSpecialSubstitution(SSK_, KSpecialSubstitution) {} template void match(Fn F) const { F(SSK); } StringView getBaseName() const override { - switch (SSK) { - case SpecialSubKind::allocator: - return StringView("allocator"); - case SpecialSubKind::basic_string: - return StringView("basic_string"); - case SpecialSubKind::string: - return StringView("string"); - case SpecialSubKind::istream: - return StringView("istream"); - case SpecialSubKind::ostream: - return StringView("ostream"); - case SpecialSubKind::iostream: - return StringView("iostream"); + auto SV = ExpandedSpecialSubstitution::getBaseName (); + if (isInstantiation()) { + // The instantiations are typedefs that drop the "basic_" prefix. + assert(SV.startsWith("basic_")); + SV = SV.dropFront(sizeof("basic_") - 1); } - DEMANGLE_UNREACHABLE; + return SV; } void printLeft(OutputBuffer &OB) const override { - switch (SSK) { - case SpecialSubKind::allocator: - OB += "std::allocator"; - break; - case SpecialSubKind::basic_string: - OB += "std::basic_string"; - break; - case SpecialSubKind::string: - OB += "std::string"; - break; - case SpecialSubKind::istream: - OB += "std::istream"; - break; - case SpecialSubKind::ostream: - OB += "std::ostream"; - break; - case SpecialSubKind::iostream: - OB += "std::iostream"; - break; - } + OB << "std::" << getBaseName(); } }; +inline ExpandedSpecialSubstitution::ExpandedSpecialSubstitution( + SpecialSubstitution const *SS) + : ExpandedSpecialSubstitution(SS->SSK) {} + class CtorDtorName final : public Node { const Node *Basename; const bool IsDtor; @@ -1665,13 +1646,14 @@ public: void printDeclarator(OutputBuffer &OB) const { if (!TemplateParams.empty()) { + ScopedOverride LT(OB.GtIsGt, 0); OB += "<"; TemplateParams.printWithComma(OB); OB += ">"; } - OB += "("; + OB.printOpen(); Params.printWithComma(OB); - OB += ")"; + OB.printClose(); } void printLeft(OutputBuffer &OB) const override { @@ -1691,9 +1673,9 @@ public: template void match(Fn F) const { F(Bindings); } void printLeft(OutputBuffer &OB) const override { - OB += '['; + OB.printOpen('['); Bindings.printWithComma(OB); - OB += ']'; + OB.printClose(']'); } }; @@ -1705,28 +1687,31 @@ class BinaryExpr : public Node { const Node *RHS; public: - BinaryExpr(const Node *LHS_, StringView InfixOperator_, const Node *RHS_) - : Node(KBinaryExpr), LHS(LHS_), InfixOperator(InfixOperator_), RHS(RHS_) { - } + BinaryExpr(const Node *LHS_, StringView InfixOperator_, const Node *RHS_, + Prec Prec_) + : Node(KBinaryExpr, Prec_), LHS(LHS_), InfixOperator(InfixOperator_), + RHS(RHS_) {} - template void match(Fn F) const { F(LHS, InfixOperator, RHS); } + template void match(Fn F) const { + F(LHS, InfixOperator, RHS, getPrecedence()); + } void printLeft(OutputBuffer &OB) const override { - // might be a template argument expression, then we need to disambiguate - // with parens. - if (InfixOperator == ">") - OB += "("; - - OB += "("; - LHS->print(OB); - OB += ") "; + bool ParenAll = OB.isGtInsideTemplateArgs() && + (InfixOperator == ">" || InfixOperator == ">>"); + if (ParenAll) + OB.printOpen(); + // Assignment is right associative, with special LHS precedence. + bool IsAssign = getPrecedence() == Prec::Assign; + LHS->printAsOperand(OB, IsAssign ? Prec::OrIf : getPrecedence(), !IsAssign); + // No space before comma operator + if (!(InfixOperator == ",")) + OB += " "; OB += InfixOperator; - OB += " ("; - RHS->print(OB); - OB += ")"; - - if (InfixOperator == ">") - OB += ")"; + OB += " "; + RHS->printAsOperand(OB, getPrecedence(), IsAssign); + if (ParenAll) + OB.printClose(); } }; @@ -1735,17 +1720,18 @@ class ArraySubscriptExpr : public Node { const Node *Op2; public: - ArraySubscriptExpr(const Node *Op1_, const Node *Op2_) - : Node(KArraySubscriptExpr), Op1(Op1_), Op2(Op2_) {} + ArraySubscriptExpr(const Node *Op1_, const Node *Op2_, Prec Prec_) + : Node(KArraySubscriptExpr, Prec_), Op1(Op1_), Op2(Op2_) {} - template void match(Fn F) const { F(Op1, Op2); } + template void match(Fn F) const { + F(Op1, Op2, getPrecedence()); + } void printLeft(OutputBuffer &OB) const override { - OB += "("; - Op1->print(OB); - OB += ")["; - Op2->print(OB); - OB += "]"; + Op1->printAsOperand(OB, getPrecedence()); + OB.printOpen('['); + Op2->printAsOperand(OB); + OB.printClose(']'); } }; @@ -1754,15 +1740,15 @@ class PostfixExpr : public Node { const StringView Operator; public: - PostfixExpr(const Node *Child_, StringView Operator_) - : Node(KPostfixExpr), Child(Child_), Operator(Operator_) {} + PostfixExpr(const Node *Child_, StringView Operator_, Prec Prec_) + : Node(KPostfixExpr, Prec_), Child(Child_), Operator(Operator_) {} - template void match(Fn F) const { F(Child, Operator); } + template void match(Fn F) const { + F(Child, Operator, getPrecedence()); + } void printLeft(OutputBuffer &OB) const override { - OB += "("; - Child->print(OB); - OB += ")"; + Child->printAsOperand(OB, getPrecedence(), true); OB += Operator; } }; @@ -1773,19 +1759,20 @@ class ConditionalExpr : public Node { const Node *Else; public: - ConditionalExpr(const Node *Cond_, const Node *Then_, const Node *Else_) - : Node(KConditionalExpr), Cond(Cond_), Then(Then_), Else(Else_) {} + ConditionalExpr(const Node *Cond_, const Node *Then_, const Node *Else_, + Prec Prec_) + : Node(KConditionalExpr, Prec_), Cond(Cond_), Then(Then_), Else(Else_) {} - template void match(Fn F) const { F(Cond, Then, Else); } + template void match(Fn F) const { + F(Cond, Then, Else, getPrecedence()); + } void printLeft(OutputBuffer &OB) const override { - OB += "("; - Cond->print(OB); - OB += ") ? ("; - Then->print(OB); - OB += ") : ("; - Else->print(OB); - OB += ")"; + Cond->printAsOperand(OB, getPrecedence()); + OB += " ? "; + Then->printAsOperand(OB); + OB += " : "; + Else->printAsOperand(OB, Prec::Assign, true); } }; @@ -1795,15 +1782,17 @@ class MemberExpr : public Node { const Node *RHS; public: - MemberExpr(const Node *LHS_, StringView Kind_, const Node *RHS_) - : Node(KMemberExpr), LHS(LHS_), Kind(Kind_), RHS(RHS_) {} + MemberExpr(const Node *LHS_, StringView Kind_, const Node *RHS_, Prec Prec_) + : Node(KMemberExpr, Prec_), LHS(LHS_), Kind(Kind_), RHS(RHS_) {} - template void match(Fn F) const { F(LHS, Kind, RHS); } + template void match(Fn F) const { + F(LHS, Kind, RHS, getPrecedence()); + } void printLeft(OutputBuffer &OB) const override { - LHS->print(OB); + LHS->printAsOperand(OB, getPrecedence(), true); OB += Kind; - RHS->print(OB); + RHS->printAsOperand(OB, getPrecedence(), false); } }; @@ -1847,15 +1836,19 @@ class EnclosingExpr : public Node { const StringView Postfix; public: - EnclosingExpr(StringView Prefix_, Node *Infix_, StringView Postfix_) - : Node(KEnclosingExpr), Prefix(Prefix_), Infix(Infix_), - Postfix(Postfix_) {} + EnclosingExpr(StringView Prefix_, const Node *Infix_, + Prec Prec_ = Prec::Primary) + : Node(KEnclosingExpr, Prec_), Prefix(Prefix_), Infix(Infix_) {} - template void match(Fn F) const { F(Prefix, Infix, Postfix); } + template void match(Fn F) const { + F(Prefix, Infix, getPrecedence()); + } void printLeft(OutputBuffer &OB) const override { OB += Prefix; + OB.printOpen(); Infix->print(OB); + OB.printClose(); OB += Postfix; } }; @@ -1867,18 +1860,24 @@ class CastExpr : public Node { const Node *From; public: - CastExpr(StringView CastKind_, const Node *To_, const Node *From_) - : Node(KCastExpr), CastKind(CastKind_), To(To_), From(From_) {} + CastExpr(StringView CastKind_, const Node *To_, const Node *From_, Prec Prec_) + : Node(KCastExpr, Prec_), CastKind(CastKind_), To(To_), From(From_) {} - template void match(Fn F) const { F(CastKind, To, From); } + template void match(Fn F) const { + F(CastKind, To, From, getPrecedence()); + } void printLeft(OutputBuffer &OB) const override { OB += CastKind; - OB += "<"; - To->printLeft(OB); - OB += ">("; - From->printLeft(OB); - OB += ")"; + { + ScopedOverride LT(OB.GtIsGt, 0); + OB += "<"; + To->printLeft(OB); + OB += ">"; + } + OB.printOpen(); + From->printAsOperand(OB); + OB.printClose(); } }; @@ -1892,10 +1891,11 @@ public: template void match(Fn F) const { F(Pack); } void printLeft(OutputBuffer &OB) const override { - OB += "sizeof...("; + OB += "sizeof..."; + OB.printOpen(); ParameterPackExpansion PPE(Pack); PPE.printLeft(OB); - OB += ")"; + OB.printClose(); } }; @@ -1904,16 +1904,18 @@ class CallExpr : public Node { NodeArray Args; public: - CallExpr(const Node *Callee_, NodeArray Args_) - : Node(KCallExpr), Callee(Callee_), Args(Args_) {} + CallExpr(const Node *Callee_, NodeArray Args_, Prec Prec_) + : Node(KCallExpr, Prec_), Callee(Callee_), Args(Args_) {} - template void match(Fn F) const { F(Callee, Args); } + template void match(Fn F) const { + F(Callee, Args, getPrecedence()); + } void printLeft(OutputBuffer &OB) const override { Callee->print(OB); - OB += "("; + OB.printOpen(); Args.printWithComma(OB); - OB += ")"; + OB.printClose(); } }; @@ -1926,31 +1928,31 @@ class NewExpr : public Node { bool IsArray; // new[] ? public: NewExpr(NodeArray ExprList_, Node *Type_, NodeArray InitList_, bool IsGlobal_, - bool IsArray_) - : Node(KNewExpr), ExprList(ExprList_), Type(Type_), InitList(InitList_), - IsGlobal(IsGlobal_), IsArray(IsArray_) {} + bool IsArray_, Prec Prec_) + : Node(KNewExpr, Prec_), ExprList(ExprList_), Type(Type_), + InitList(InitList_), IsGlobal(IsGlobal_), IsArray(IsArray_) {} template void match(Fn F) const { - F(ExprList, Type, InitList, IsGlobal, IsArray); + F(ExprList, Type, InitList, IsGlobal, IsArray, getPrecedence()); } void printLeft(OutputBuffer &OB) const override { if (IsGlobal) - OB += "::operator "; + OB += "::"; OB += "new"; if (IsArray) OB += "[]"; - OB += ' '; if (!ExprList.empty()) { - OB += "("; + OB.printOpen(); ExprList.printWithComma(OB); - OB += ")"; + OB.printClose(); } + OB += " "; Type->print(OB); if (!InitList.empty()) { - OB += "("; + OB.printOpen(); InitList.printWithComma(OB); - OB += ")"; + OB.printClose(); } } }; @@ -1961,17 +1963,21 @@ class DeleteExpr : public Node { bool IsArray; public: - DeleteExpr(Node *Op_, bool IsGlobal_, bool IsArray_) - : Node(KDeleteExpr), Op(Op_), IsGlobal(IsGlobal_), IsArray(IsArray_) {} + DeleteExpr(Node *Op_, bool IsGlobal_, bool IsArray_, Prec Prec_) + : Node(KDeleteExpr, Prec_), Op(Op_), IsGlobal(IsGlobal_), + IsArray(IsArray_) {} - template void match(Fn F) const { F(Op, IsGlobal, IsArray); } + template void match(Fn F) const { + F(Op, IsGlobal, IsArray, getPrecedence()); + } void printLeft(OutputBuffer &OB) const override { if (IsGlobal) OB += "::"; OB += "delete"; if (IsArray) - OB += "[] "; + OB += "[]"; + OB += ' '; Op->print(OB); } }; @@ -1981,16 +1987,16 @@ class PrefixExpr : public Node { Node *Child; public: - PrefixExpr(StringView Prefix_, Node *Child_) - : Node(KPrefixExpr), Prefix(Prefix_), Child(Child_) {} + PrefixExpr(StringView Prefix_, Node *Child_, Prec Prec_) + : Node(KPrefixExpr, Prec_), Prefix(Prefix_), Child(Child_) {} - template void match(Fn F) const { F(Prefix, Child); } + template void match(Fn F) const { + F(Prefix, Child, getPrecedence()); + } void printLeft(OutputBuffer &OB) const override { OB += Prefix; - OB += "("; - Child->print(OB); - OB += ")"; + Child->printAsOperand(OB, getPrecedence()); } }; @@ -2013,17 +2019,20 @@ class ConversionExpr : public Node { NodeArray Expressions; public: - ConversionExpr(const Node *Type_, NodeArray Expressions_) - : Node(KConversionExpr), Type(Type_), Expressions(Expressions_) {} + ConversionExpr(const Node *Type_, NodeArray Expressions_, Prec Prec_) + : Node(KConversionExpr, Prec_), Type(Type_), Expressions(Expressions_) {} - template void match(Fn F) const { F(Type, Expressions); } + template void match(Fn F) const { + F(Type, Expressions, getPrecedence()); + } void printLeft(OutputBuffer &OB) const override { - OB += "("; + OB.printOpen(); Type->print(OB); - OB += ")("; + OB.printClose(); + OB.printOpen(); Expressions.printWithComma(OB); - OB += ")"; + OB.printClose(); } }; @@ -2034,18 +2043,21 @@ class PointerToMemberConversionExpr : public Node { public: PointerToMemberConversionExpr(const Node *Type_, const Node *SubExpr_, - StringView Offset_) - : Node(KPointerToMemberConversionExpr), Type(Type_), SubExpr(SubExpr_), - Offset(Offset_) {} + StringView Offset_, Prec Prec_) + : Node(KPointerToMemberConversionExpr, Prec_), Type(Type_), + SubExpr(SubExpr_), Offset(Offset_) {} - template void match(Fn F) const { F(Type, SubExpr, Offset); } + template void match(Fn F) const { + F(Type, SubExpr, Offset, getPrecedence()); + } void printLeft(OutputBuffer &OB) const override { - OB += "("; + OB.printOpen(); Type->print(OB); - OB += ")("; + OB.printClose(); + OB.printOpen(); SubExpr->print(OB); - OB += ")"; + OB.printClose(); } }; @@ -2131,41 +2143,33 @@ public: void printLeft(OutputBuffer &OB) const override { auto PrintPack = [&] { - OB += '('; + OB.printOpen(); ParameterPackExpansion(Pack).print(OB); - OB += ')'; + OB.printClose(); }; - OB += '('; - - if (IsLeftFold) { - // init op ... op pack - if (Init != nullptr) { - Init->print(OB); - OB += ' '; - OB += OperatorName; - OB += ' '; - } - // ... op pack - OB += "... "; - OB += OperatorName; - OB += ' '; - PrintPack(); - } else { // !IsLeftFold - // pack op ... - PrintPack(); - OB += ' '; - OB += OperatorName; - OB += " ..."; - // pack op ... op init - if (Init != nullptr) { - OB += ' '; - OB += OperatorName; - OB += ' '; - Init->print(OB); - } + OB.printOpen(); + // Either '[init op ]... op pack' or 'pack op ...[ op init]' + // Refactored to '[(init|pack) op ]...[ op (pack|init)]' + // Fold expr operands are cast-expressions + if (!IsLeftFold || Init != nullptr) { + // '(init|pack) op ' + if (IsLeftFold) + Init->printAsOperand(OB, Prec::Cast, true); + else + PrintPack(); + OB << " " << OperatorName << " "; + } + OB << "..."; + if (IsLeftFold || Init != nullptr) { + // ' op (init|pack)' + OB << " " << OperatorName << " "; + if (IsLeftFold) + PrintPack(); + else + Init->printAsOperand(OB, Prec::Cast, true); } - OB += ')'; + OB.printClose(); } }; @@ -2239,9 +2243,9 @@ public: template void match(Fn F) const { F(Ty, Integer); } void printLeft(OutputBuffer &OB) const override { - OB << "("; + OB.printOpen(); Ty->print(OB); - OB << ")"; + OB.printClose(); if (Integer[0] == 'n') OB << "-" << Integer.dropFront(1); @@ -2262,13 +2266,13 @@ public: void printLeft(OutputBuffer &OB) const override { if (Type.size() > 3) { - OB += "("; + OB.printOpen(); OB += Type; - OB += ")"; + OB.printClose(); } if (Value[0] == 'n') { - OB += "-"; + OB += '-'; OB += Value.dropFront(1); } else OB += Value; @@ -2344,24 +2348,22 @@ using LongDoubleLiteral = FloatLiteralImpl; template void Node::visit(Fn F) const { switch (K) { -#define CASE(X) case K ## X: return F(static_cast(this)); - FOR_EACH_NODE_KIND(CASE) -#undef CASE +#define NODE(X) \ + case K##X: \ + return F(static_cast(this)); +#include "ItaniumNodes.def" } assert(0 && "unknown mangling node kind"); } /// Determine the kind of a node from its type. template struct NodeKind; -#define SPECIALIZATION(X) \ - template<> struct NodeKind { \ - static constexpr Node::Kind Kind = Node::K##X; \ - static constexpr const char *name() { return #X; } \ +#define NODE(X) \ + template <> struct NodeKind { \ + static constexpr Node::Kind Kind = Node::K##X; \ + static constexpr const char *name() { return #X; } \ }; -FOR_EACH_NODE_KIND(SPECIALIZATION) -#undef SPECIALIZATION - -#undef FOR_EACH_NODE_KIND +#include "ItaniumNodes.def" template struct AbstractManglingParser { const char *First; @@ -2499,17 +2501,16 @@ template struct AbstractManglingParser { /// Parse the production. Node *parseExpr(); - Node *parsePrefixExpr(StringView Kind); - Node *parseBinaryExpr(StringView Kind); + Node *parsePrefixExpr(StringView Kind, Node::Prec Prec); + Node *parseBinaryExpr(StringView Kind, Node::Prec Prec); Node *parseIntegerLiteral(StringView Lit); Node *parseExprPrimary(); template Node *parseFloatingLiteral(); Node *parseFunctionParam(); - Node *parseNewExpr(); Node *parseConversionExpr(); Node *parseBracedExpr(); Node *parseFoldExpr(); - Node *parsePointerToMemberConversionExpr(); + Node *parsePointerToMemberConversionExpr(Node::Prec Prec); Node *parseSubobjectExpr(); /// Parse the production. @@ -2557,17 +2558,80 @@ template struct AbstractManglingParser { Node *parseName(NameState *State = nullptr); Node *parseLocalName(NameState *State); Node *parseOperatorName(NameState *State); - Node *parseUnqualifiedName(NameState *State); + bool parseModuleNameOpt(ModuleName *&Module); + Node *parseUnqualifiedName(NameState *State, Node *Scope, ModuleName *Module); Node *parseUnnamedTypeName(NameState *State); Node *parseSourceName(NameState *State); - Node *parseUnscopedName(NameState *State); + Node *parseUnscopedName(NameState *State, bool *isSubstName); Node *parseNestedName(NameState *State); Node *parseCtorDtorName(Node *&SoFar, NameState *State); Node *parseAbiTags(Node *N); + struct OperatorInfo { + enum OIKind : unsigned char { + Prefix, // Prefix unary: @ expr + Postfix, // Postfix unary: expr @ + Binary, // Binary: lhs @ rhs + Array, // Array index: lhs [ rhs ] + Member, // Member access: lhs @ rhs + New, // New + Del, // Delete + Call, // Function call: expr (expr*) + CCast, // C cast: (type)expr + Conditional, // Conditional: expr ? expr : expr + NameOnly, // Overload only, not allowed in expression. + // Below do not have operator names + NamedCast, // Named cast, @(expr) + OfIdOp, // alignof, sizeof, typeid + + Unnameable = NamedCast, + }; + char Enc[2]; // Encoding + OIKind Kind; // Kind of operator + bool Flag : 1; // Entry-specific flag + Node::Prec Prec : 7; // Precedence + const char *Name; // Spelling + + public: + constexpr OperatorInfo(const char (&E)[3], OIKind K, bool F, Node::Prec P, + const char *N) + : Enc{E[0], E[1]}, Kind{K}, Flag{F}, Prec{P}, Name{N} {} + + public: + bool operator<(const OperatorInfo &Other) const { + return *this < Other.Enc; + } + bool operator<(const char *Peek) const { + return Enc[0] < Peek[0] || (Enc[0] == Peek[0] && Enc[1] < Peek[1]); + } + bool operator==(const char *Peek) const { + return Enc[0] == Peek[0] && Enc[1] == Peek[1]; + } + bool operator!=(const char *Peek) const { return !this->operator==(Peek); } + + public: + StringView getSymbol() const { + StringView Res = Name; + if (Kind < Unnameable) { + assert(Res.startsWith("operator") && + "operator name does not start with 'operator'"); + Res = Res.dropFront(sizeof("operator") - 1); + Res.consumeFront(' '); + } + return Res; + } + StringView getName() const { return Name; } + OIKind getKind() const { return Kind; } + bool getFlag() const { return Flag; } + Node::Prec getPrecedence() const { return Prec; } + }; + static const OperatorInfo Ops[]; + static const size_t NumOps; + const OperatorInfo *parseOperatorEncoding(); + /// Parse the production. - Node *parseUnresolvedName(); + Node *parseUnresolvedName(bool Global); Node *parseSimpleId(); Node *parseBaseUnresolvedName(); Node *parseUnresolvedType(); @@ -2588,26 +2652,16 @@ const char* parse_discriminator(const char* first, const char* last); // ::= template Node *AbstractManglingParser::parseName(NameState *State) { - consumeIf('L'); // extension - if (look() == 'N') return getDerived().parseNestedName(State); if (look() == 'Z') return getDerived().parseLocalName(State); Node *Result = nullptr; - bool IsSubst = look() == 'S' && look(1) != 't'; - if (IsSubst) { - // A substitution must lead to: - // ::= - Result = getDerived().parseSubstitution(); - } else { - // An unscoped name can be one of: - // ::= - // ::= - Result = getDerived().parseUnscopedName(State); - } - if (Result == nullptr) + bool IsSubst = false; + + Result = getDerived().parseUnscopedName(State, &IsSubst); + if (!Result) return nullptr; if (look() == 'I') { @@ -2667,38 +2721,63 @@ Node *AbstractManglingParser::parseLocalName(NameState *State) { // ::= // ::= St # ::std:: -// extension ::= StL +// [*] extension template Node * -AbstractManglingParser::parseUnscopedName(NameState *State) { - bool IsStd = consumeIf("St"); - if (IsStd) - consumeIf('L'); +AbstractManglingParser::parseUnscopedName(NameState *State, + bool *IsSubst) { - Node *Result = getDerived().parseUnqualifiedName(State); - if (Result == nullptr) - return nullptr; - if (IsStd) - Result = make(Result); + Node *Std = nullptr; + if (consumeIf("St")) { + Std = make("std"); + if (Std == nullptr) + return nullptr; + } - return Result; + Node *Res = nullptr; + ModuleName *Module = nullptr; + if (look() == 'S') { + Node *S = getDerived().parseSubstitution(); + if (!S) + return nullptr; + if (S->getKind() == Node::KModuleName) + Module = static_cast(S); + else if (IsSubst && Std == nullptr) { + Res = S; + *IsSubst = true; + } else { + return nullptr; + } + } + + if (Res == nullptr || Std != nullptr) { + Res = getDerived().parseUnqualifiedName(State, Std, Module); + } + + return Res; } -// ::= [abi-tags] -// ::= -// ::= -// ::= -// ::= DC + E # structured binding declaration +// ::= [] L? [] +// ::= [] [] +// ::= [] L? [] +// ::= [] L? [] +// # structured binding declaration +// ::= [] L? DC + E template -Node * -AbstractManglingParser::parseUnqualifiedName(NameState *State) { - // s are special-cased in parseNestedName(). +Node *AbstractManglingParser::parseUnqualifiedName( + NameState *State, Node *Scope, ModuleName *Module) { + if (getDerived().parseModuleNameOpt(Module)) + return nullptr; + + consumeIf('L'); + Node *Result; - if (look() == 'U') - Result = getDerived().parseUnnamedTypeName(State); - else if (look() >= '1' && look() <= '9') + if (look() >= '1' && look() <= '9') { Result = getDerived().parseSourceName(State); - else if (consumeIf("DC")) { + } else if (look() == 'U') { + Result = getDerived().parseUnnamedTypeName(State); + } else if (consumeIf("DC")) { + // Structured binding size_t BindingsBegin = Names.size(); do { Node *Binding = getDerived().parseSourceName(State); @@ -2707,13 +2786,46 @@ AbstractManglingParser::parseUnqualifiedName(NameState *State) { Names.push_back(Binding); } while (!consumeIf('E')); Result = make(popTrailingNodeArray(BindingsBegin)); - } else + } else if (look() == 'C' || look() == 'D') { + // A . + if (Scope == nullptr || Module != nullptr) + return nullptr; + Result = getDerived().parseCtorDtorName(Scope, State); + } else { Result = getDerived().parseOperatorName(State); + } + + if (Result != nullptr && Module != nullptr) + Result = make(Module, Result); if (Result != nullptr) Result = getDerived().parseAbiTags(Result); + if (Result != nullptr && Scope != nullptr) + Result = make(Scope, Result); + return Result; } +// ::= +// ::= +// ::= # passed in by caller +// ::= W +// ::= W P +template +bool AbstractManglingParser::parseModuleNameOpt( + ModuleName *&Module) { + while (consumeIf('W')) { + bool IsPartition = consumeIf('P'); + Node *Sub = getDerived().parseSourceName(nullptr); + if (!Sub) + return true; + Module = + static_cast(make(Module, Sub, IsPartition)); + Subs.push_back(Module); + } + + return false; +} + // ::= Ut [] _ // ::= // @@ -2735,7 +2847,7 @@ AbstractManglingParser::parseUnnamedTypeName(NameState *State) { return make(Count); } if (consumeIf("Ul")) { - SwapAndRestore SwapParams(ParsingLambdaParamsAtLevel, + ScopedOverride SwapParams(ParsingLambdaParamsAtLevel, TemplateParams.size()); ScopedTemplateParamList LambdaTemplateParams(this); @@ -2813,97 +2925,124 @@ Node *AbstractManglingParser::parseSourceName(NameState *) { return make(Name); } -// ::= aa # && -// ::= ad # & (unary) -// ::= an # & -// ::= aN # &= -// ::= aS # = -// ::= cl # () -// ::= cm # , -// ::= co # ~ -// ::= cv # (cast) -// ::= da # delete[] -// ::= de # * (unary) -// ::= dl # delete -// ::= dv # / -// ::= dV # /= -// ::= eo # ^ -// ::= eO # ^= -// ::= eq # == -// ::= ge # >= -// ::= gt # > -// ::= ix # [] -// ::= le # <= +// Operator encodings +template +const typename AbstractManglingParser< + Derived, Alloc>::OperatorInfo AbstractManglingParser::Ops[] = { + // Keep ordered by encoding + {"aN", OperatorInfo::Binary, false, Node::Prec::Assign, "operator&="}, + {"aS", OperatorInfo::Binary, false, Node::Prec::Assign, "operator="}, + {"aa", OperatorInfo::Binary, false, Node::Prec::AndIf, "operator&&"}, + {"ad", OperatorInfo::Prefix, false, Node::Prec::Unary, "operator&"}, + {"an", OperatorInfo::Binary, false, Node::Prec::And, "operator&"}, + {"at", OperatorInfo::OfIdOp, /*Type*/ true, Node::Prec::Unary, "alignof "}, + {"aw", OperatorInfo::NameOnly, false, Node::Prec::Primary, + "operator co_await"}, + {"az", OperatorInfo::OfIdOp, /*Type*/ false, Node::Prec::Unary, "alignof "}, + {"cc", OperatorInfo::NamedCast, false, Node::Prec::Postfix, "const_cast"}, + {"cl", OperatorInfo::Call, false, Node::Prec::Postfix, "operator()"}, + {"cm", OperatorInfo::Binary, false, Node::Prec::Comma, "operator,"}, + {"co", OperatorInfo::Prefix, false, Node::Prec::Unary, "operator~"}, + {"cv", OperatorInfo::CCast, false, Node::Prec::Cast, "operator"}, // C Cast + {"dV", OperatorInfo::Binary, false, Node::Prec::Assign, "operator/="}, + {"da", OperatorInfo::Del, /*Ary*/ true, Node::Prec::Unary, + "operator delete[]"}, + {"dc", OperatorInfo::NamedCast, false, Node::Prec::Postfix, "dynamic_cast"}, + {"de", OperatorInfo::Prefix, false, Node::Prec::Unary, "operator*"}, + {"dl", OperatorInfo::Del, /*Ary*/ false, Node::Prec::Unary, + "operator delete"}, + {"ds", OperatorInfo::Member, /*Named*/ false, Node::Prec::PtrMem, + "operator.*"}, + {"dt", OperatorInfo::Member, /*Named*/ false, Node::Prec::Postfix, + "operator."}, + {"dv", OperatorInfo::Binary, false, Node::Prec::Assign, "operator/"}, + {"eO", OperatorInfo::Binary, false, Node::Prec::Assign, "operator^="}, + {"eo", OperatorInfo::Binary, false, Node::Prec::Xor, "operator^"}, + {"eq", OperatorInfo::Binary, false, Node::Prec::Equality, "operator=="}, + {"ge", OperatorInfo::Binary, false, Node::Prec::Relational, "operator>="}, + {"gt", OperatorInfo::Binary, false, Node::Prec::Relational, "operator>"}, + {"ix", OperatorInfo::Array, false, Node::Prec::Postfix, "operator[]"}, + {"lS", OperatorInfo::Binary, false, Node::Prec::Assign, "operator<<="}, + {"le", OperatorInfo::Binary, false, Node::Prec::Relational, "operator<="}, + {"ls", OperatorInfo::Binary, false, Node::Prec::Shift, "operator<<"}, + {"lt", OperatorInfo::Binary, false, Node::Prec::Relational, "operator<"}, + {"mI", OperatorInfo::Binary, false, Node::Prec::Assign, "operator-="}, + {"mL", OperatorInfo::Binary, false, Node::Prec::Assign, "operator*="}, + {"mi", OperatorInfo::Binary, false, Node::Prec::Additive, "operator-"}, + {"ml", OperatorInfo::Binary, false, Node::Prec::Multiplicative, + "operator*"}, + {"mm", OperatorInfo::Postfix, false, Node::Prec::Postfix, "operator--"}, + {"na", OperatorInfo::New, /*Ary*/ true, Node::Prec::Unary, + "operator new[]"}, + {"ne", OperatorInfo::Binary, false, Node::Prec::Equality, "operator!="}, + {"ng", OperatorInfo::Prefix, false, Node::Prec::Unary, "operator-"}, + {"nt", OperatorInfo::Prefix, false, Node::Prec::Unary, "operator!"}, + {"nw", OperatorInfo::New, /*Ary*/ false, Node::Prec::Unary, "operator new"}, + {"oR", OperatorInfo::Binary, false, Node::Prec::Assign, "operator|="}, + {"oo", OperatorInfo::Binary, false, Node::Prec::OrIf, "operator||"}, + {"or", OperatorInfo::Binary, false, Node::Prec::Ior, "operator|"}, + {"pL", OperatorInfo::Binary, false, Node::Prec::Assign, "operator+="}, + {"pl", OperatorInfo::Binary, false, Node::Prec::Additive, "operator+"}, + {"pm", OperatorInfo::Member, /*Named*/ false, Node::Prec::PtrMem, + "operator->*"}, + {"pp", OperatorInfo::Postfix, false, Node::Prec::Postfix, "operator++"}, + {"ps", OperatorInfo::Prefix, false, Node::Prec::Unary, "operator+"}, + {"pt", OperatorInfo::Member, /*Named*/ true, Node::Prec::Postfix, + "operator->"}, + {"qu", OperatorInfo::Conditional, false, Node::Prec::Conditional, + "operator?"}, + {"rM", OperatorInfo::Binary, false, Node::Prec::Assign, "operator%="}, + {"rS", OperatorInfo::Binary, false, Node::Prec::Assign, "operator>>="}, + {"rc", OperatorInfo::NamedCast, false, Node::Prec::Postfix, + "reinterpret_cast"}, + {"rm", OperatorInfo::Binary, false, Node::Prec::Multiplicative, + "operator%"}, + {"rs", OperatorInfo::Binary, false, Node::Prec::Shift, "operator>>"}, + {"sc", OperatorInfo::NamedCast, false, Node::Prec::Postfix, "static_cast"}, + {"ss", OperatorInfo::Binary, false, Node::Prec::Spaceship, "operator<=>"}, + {"st", OperatorInfo::OfIdOp, /*Type*/ true, Node::Prec::Unary, "sizeof "}, + {"sz", OperatorInfo::OfIdOp, /*Type*/ false, Node::Prec::Unary, "sizeof "}, + {"te", OperatorInfo::OfIdOp, /*Type*/ false, Node::Prec::Postfix, + "typeid "}, + {"ti", OperatorInfo::OfIdOp, /*Type*/ true, Node::Prec::Postfix, "typeid "}, +}; +template +const size_t AbstractManglingParser::NumOps = sizeof(Ops) / + sizeof(Ops[0]); + +// If the next 2 chars are an operator encoding, consume them and return their +// OperatorInfo. Otherwise return nullptr. +template +const typename AbstractManglingParser::OperatorInfo * +AbstractManglingParser::parseOperatorEncoding() { + if (numLeft() < 2) + return nullptr; + + auto Op = std::lower_bound( + &Ops[0], &Ops[NumOps], First, + [](const OperatorInfo &Op_, const char *Enc_) { return Op_ < Enc_; }); + if (Op == &Ops[NumOps] || *Op != First) + return nullptr; + + First += 2; + return Op; +} + +// ::= See parseOperatorEncoding() // ::= li # operator "" -// ::= ls # << -// ::= lS # <<= -// ::= lt # < -// ::= mi # - -// ::= mI # -= -// ::= ml # * -// ::= mL # *= -// ::= mm # -- (postfix in context) -// ::= na # new[] -// ::= ne # != -// ::= ng # - (unary) -// ::= nt # ! -// ::= nw # new -// ::= oo # || -// ::= or # | -// ::= oR # |= -// ::= pm # ->* -// ::= pl # + -// ::= pL # += -// ::= pp # ++ (postfix in context) -// ::= ps # + (unary) -// ::= pt # -> -// ::= qu # ? -// ::= rm # % -// ::= rM # %= -// ::= rs # >> -// ::= rS # >>= -// ::= ss # <=> C++2a -// ::= v # vendor extended operator +// ::= v # vendor extended operator template Node * AbstractManglingParser::parseOperatorName(NameState *State) { - switch (look()) { - case 'a': - switch (look(1)) { - case 'a': - First += 2; - return make("operator&&"); - case 'd': - case 'n': - First += 2; - return make("operator&"); - case 'N': - First += 2; - return make("operator&="); - case 'S': - First += 2; - return make("operator="); - } - return nullptr; - case 'c': - switch (look(1)) { - case 'l': - First += 2; - return make("operator()"); - case 'm': - First += 2; - return make("operator,"); - case 'o': - First += 2; - return make("operator~"); - // ::= cv # (cast) - case 'v': { - First += 2; - SwapAndRestore SaveTemplate(TryToParseTemplateArgs, false); + if (const auto *Op = parseOperatorEncoding()) { + if (Op->getKind() == OperatorInfo::CCast) { + // ::= cv # (cast) + ScopedOverride SaveTemplate(TryToParseTemplateArgs, false); // If we're parsing an encoding, State != nullptr and the conversion // operators' could have a that refers to some // s further ahead in the mangled name. - SwapAndRestore SavePermit(PermitForwardTemplateReferences, + ScopedOverride SavePermit(PermitForwardTemplateReferences, PermitForwardTemplateReferences || State != nullptr); Node *Ty = getDerived().parseType(); @@ -2912,185 +3051,29 @@ AbstractManglingParser::parseOperatorName(NameState *State) { if (State) State->CtorDtorConversion = true; return make(Ty); } - } - return nullptr; - case 'd': - switch (look(1)) { - case 'a': - First += 2; - return make("operator delete[]"); - case 'e': - First += 2; - return make("operator*"); - case 'l': - First += 2; - return make("operator delete"); - case 'v': - First += 2; - return make("operator/"); - case 'V': - First += 2; - return make("operator/="); - } - return nullptr; - case 'e': - switch (look(1)) { - case 'o': - First += 2; - return make("operator^"); - case 'O': - First += 2; - return make("operator^="); - case 'q': - First += 2; - return make("operator=="); - } - return nullptr; - case 'g': - switch (look(1)) { - case 'e': - First += 2; - return make("operator>="); - case 't': - First += 2; - return make("operator>"); - } - return nullptr; - case 'i': - if (look(1) == 'x') { - First += 2; - return make("operator[]"); - } - return nullptr; - case 'l': - switch (look(1)) { - case 'e': - First += 2; - return make("operator<="); + + if (Op->getKind() >= OperatorInfo::Unnameable) + /* Not a nameable operator. */ + return nullptr; + if (Op->getKind() == OperatorInfo::Member && !Op->getFlag()) + /* Not a nameable MemberExpr */ + return nullptr; + + return make(Op->getName()); + } + + if (consumeIf("li")) { // ::= li # operator "" - case 'i': { - First += 2; - Node *SN = getDerived().parseSourceName(State); - if (SN == nullptr) - return nullptr; - return make(SN); - } - case 's': - First += 2; - return make("operator<<"); - case 'S': - First += 2; - return make("operator<<="); - case 't': - First += 2; - return make("operator<"); - } - return nullptr; - case 'm': - switch (look(1)) { - case 'i': - First += 2; - return make("operator-"); - case 'I': - First += 2; - return make("operator-="); - case 'l': - First += 2; - return make("operator*"); - case 'L': - First += 2; - return make("operator*="); - case 'm': - First += 2; - return make("operator--"); - } - return nullptr; - case 'n': - switch (look(1)) { - case 'a': - First += 2; - return make("operator new[]"); - case 'e': - First += 2; - return make("operator!="); - case 'g': - First += 2; - return make("operator-"); - case 't': - First += 2; - return make("operator!"); - case 'w': - First += 2; - return make("operator new"); - } - return nullptr; - case 'o': - switch (look(1)) { - case 'o': - First += 2; - return make("operator||"); - case 'r': - First += 2; - return make("operator|"); - case 'R': - First += 2; - return make("operator|="); - } - return nullptr; - case 'p': - switch (look(1)) { - case 'm': - First += 2; - return make("operator->*"); - case 'l': - First += 2; - return make("operator+"); - case 'L': - First += 2; - return make("operator+="); - case 'p': - First += 2; - return make("operator++"); - case 's': - First += 2; - return make("operator+"); - case 't': - First += 2; - return make("operator->"); - } - return nullptr; - case 'q': - if (look(1) == 'u') { - First += 2; - return make("operator?"); - } - return nullptr; - case 'r': - switch (look(1)) { - case 'm': - First += 2; - return make("operator%"); - case 'M': - First += 2; - return make("operator%="); - case 's': - First += 2; - return make("operator>>"); - case 'S': - First += 2; - return make("operator>>="); - } - return nullptr; - case 's': - if (look(1) == 's') { - First += 2; - return make("operator<=>"); - } - return nullptr; - // ::= v # vendor extended operator - case 'v': - if (std::isdigit(look(1))) { - First += 2; + Node *SN = getDerived().parseSourceName(State); + if (SN == nullptr) + return nullptr; + return make(SN); + } + + if (consumeIf('v')) { + // ::= v # vendor extended operator + if (look() >= '0' && look() <= '9') { + First++; Node *SN = getDerived().parseSourceName(State); if (SN == nullptr) return nullptr; @@ -3098,6 +3081,7 @@ AbstractManglingParser::parseOperatorName(NameState *State) { } return nullptr; } + return nullptr; } @@ -3116,19 +3100,11 @@ Node * AbstractManglingParser::parseCtorDtorName(Node *&SoFar, NameState *State) { if (SoFar->getKind() == Node::KSpecialSubstitution) { - auto SSK = static_cast(SoFar)->SSK; - switch (SSK) { - case SpecialSubKind::string: - case SpecialSubKind::istream: - case SpecialSubKind::ostream: - case SpecialSubKind::iostream: - SoFar = make(SSK); - if (!SoFar) - return nullptr; - break; - default: - break; - } + // Expand the special substitution. + SoFar = make( + static_cast(SoFar)); + if (!SoFar) + return nullptr; } if (consumeIf('C')) { @@ -3157,8 +3133,10 @@ AbstractManglingParser::parseCtorDtorName(Node *&SoFar, return nullptr; } -// ::= N [] [] E -// ::= N [] [] E +// ::= N [] [] +// E +// ::= N [] [] +// E // // ::= // ::= @@ -3167,7 +3145,7 @@ AbstractManglingParser::parseCtorDtorName(Node *&SoFar, // ::= # empty // ::= // ::= -// extension ::= L +// [*] extension // // := [] M // @@ -3187,90 +3165,76 @@ AbstractManglingParser::parseNestedName(NameState *State) { if (State) State->ReferenceQualifier = FrefQualRValue; } else if (consumeIf('R')) { if (State) State->ReferenceQualifier = FrefQualLValue; - } else + } else { if (State) State->ReferenceQualifier = FrefQualNone; - - Node *SoFar = nullptr; - auto PushComponent = [&](Node *Comp) { - if (!Comp) return false; - if (SoFar) SoFar = make(SoFar, Comp); - else SoFar = Comp; - if (State) State->EndsWithTemplateArgs = false; - return SoFar != nullptr; - }; - - if (consumeIf("St")) { - SoFar = make("std"); - if (!SoFar) - return nullptr; } + Node *SoFar = nullptr; while (!consumeIf('E')) { - consumeIf('L'); // extension - - // := [] M - if (consumeIf('M')) { - if (SoFar == nullptr) - return nullptr; - continue; - } + if (State) + // Only set end-with-template on the case that does that. + State->EndsWithTemplateArgs = false; - // ::= if (look() == 'T') { - if (!PushComponent(getDerived().parseTemplateParam())) - return nullptr; - Subs.push_back(SoFar); - continue; - } - - // ::= - if (look() == 'I') { + // ::= + if (SoFar != nullptr) + return nullptr; // Cannot have a prefix. + SoFar = getDerived().parseTemplateParam(); + } else if (look() == 'I') { + // ::= + if (SoFar == nullptr) + return nullptr; // Must have a prefix. Node *TA = getDerived().parseTemplateArgs(State != nullptr); - if (TA == nullptr || SoFar == nullptr) - return nullptr; - SoFar = make(SoFar, TA); - if (!SoFar) - return nullptr; - if (State) State->EndsWithTemplateArgs = true; - Subs.push_back(SoFar); - continue; - } - - // ::= - if (look() == 'D' && (look(1) == 't' || look(1) == 'T')) { - if (!PushComponent(getDerived().parseDecltype())) + if (TA == nullptr) return nullptr; - Subs.push_back(SoFar); - continue; - } - - // ::= - if (look() == 'S' && look(1) != 't') { - Node *S = getDerived().parseSubstitution(); - if (!PushComponent(S)) + if (SoFar->getKind() == Node::KNameWithTemplateArgs) + // Semantically cannot be generated by a + // C++ entity. There will always be [something like] a name between + // them. return nullptr; - if (SoFar != S) - Subs.push_back(S); - continue; - } + if (State) + State->EndsWithTemplateArgs = true; + SoFar = make(SoFar, TA); + } else if (look() == 'D' && (look(1) == 't' || look(1) == 'T')) { + // ::= + if (SoFar != nullptr) + return nullptr; // Cannot have a prefix. + SoFar = getDerived().parseDecltype(); + } else { + ModuleName *Module = nullptr; + + if (look() == 'S') { + // ::= + Node *S = nullptr; + if (look(1) == 't') { + First += 2; + S = make("std"); + } else { + S = getDerived().parseSubstitution(); + } + if (!S) + return nullptr; + if (S->getKind() == Node::KModuleName) { + Module = static_cast(S); + } else if (SoFar != nullptr) { + return nullptr; // Cannot have a prefix. + } else { + SoFar = S; + continue; // Do not push a new substitution. + } + } - // Parse an thats actually a . - if (look() == 'C' || (look() == 'D' && look(1) != 'C')) { - if (SoFar == nullptr) - return nullptr; - if (!PushComponent(getDerived().parseCtorDtorName(SoFar, State))) - return nullptr; - SoFar = getDerived().parseAbiTags(SoFar); - if (SoFar == nullptr) - return nullptr; - Subs.push_back(SoFar); - continue; + // ::= [] + SoFar = getDerived().parseUnqualifiedName(State, SoFar, Module); } - // ::= - if (!PushComponent(getDerived().parseUnqualifiedName(State))) + if (SoFar == nullptr) return nullptr; Subs.push_back(SoFar); + + // No longer used. + // := [] M + consumeIf('M'); } if (SoFar == nullptr || Subs.empty()) @@ -3365,6 +3329,7 @@ Node *AbstractManglingParser::parseBaseUnresolvedName() { // ::= [gs] # x or (with "gs") ::x // ::= [gs] sr + E // # A::x, N::y, A::z; "gs" means leading "::" +// [gs] has been parsed by caller. // ::= sr # T::x / decltype(p)::x // extension ::= sr // # T::N::x /decltype(p)::N::x @@ -3372,7 +3337,7 @@ Node *AbstractManglingParser::parseBaseUnresolvedName() { // // ::= template -Node *AbstractManglingParser::parseUnresolvedName() { +Node *AbstractManglingParser::parseUnresolvedName(bool Global) { Node *SoFar = nullptr; // srN [] * E @@ -3406,8 +3371,6 @@ Node *AbstractManglingParser::parseUnresolvedName() { return make(SoFar, Base); } - bool Global = consumeIf("gs"); - // [gs] # x or (with "gs") ::x if (!consumeIf("sr")) { SoFar = getDerived().parseBaseUnresolvedName(); @@ -3637,7 +3600,7 @@ Node *AbstractManglingParser::parseDecltype() { return nullptr; if (!consumeIf('E')) return nullptr; - return make("decltype(", E, ")"); + return make("decltype", E); } // ::= A _ @@ -3723,8 +3686,8 @@ Node *AbstractManglingParser::parseQualifiedType() { StringView ProtoSourceName = Qual.dropFront(std::strlen("objcproto")); StringView Proto; { - SwapAndRestore SaveFirst(First, ProtoSourceName.begin()), - SaveLast(Last, ProtoSourceName.end()); + ScopedOverride SaveFirst(First, ProtoSourceName.begin()), + SaveLast(Last, ProtoSourceName.end()); Proto = parseBareSourceName(); } if (Proto.empty()) @@ -3929,6 +3892,22 @@ Node *AbstractManglingParser::parseType() { return nullptr; return make(DimensionNumber); } + // ::= DB _ # C23 signed _BitInt(N) + // ::= DB _ # C23 signed _BitInt(N) + // ::= DU _ # C23 unsigned _BitInt(N) + // ::= DU _ # C23 unsigned _BitInt(N) + case 'B': + case 'U': { + bool Signed = look(1) == 'B'; + First += 2; + Node *Size = std::isdigit(look()) ? make(parseNumber()) + : getDerived().parseExpr(); + if (!Size) + return nullptr; + if (!consumeIf('_')) + return nullptr; + return make(Size, Signed); + } // ::= Di # char32_t case 'i': First += 2; @@ -4077,8 +4056,9 @@ Node *AbstractManglingParser::parseType() { // ::= # See Compression below case 'S': { if (look(1) != 't') { - Result = getDerived().parseSubstitution(); - if (Result == nullptr) + bool IsSubst = false; + Result = getDerived().parseUnscopedName(nullptr, &IsSubst); + if (!Result) return nullptr; // Sub could be either of: @@ -4091,12 +4071,14 @@ Node *AbstractManglingParser::parseType() { // If this is followed by some , and we're permitted to // parse them, take the second production. - if (TryToParseTemplateArgs && look() == 'I') { + if (look() == 'I' && (!IsSubst || TryToParseTemplateArgs)) { + if (!IsSubst) + Subs.push_back(Result); Node *TA = getDerived().parseTemplateArgs(); if (TA == nullptr) return nullptr; Result = make(Result, TA); - } else { + } else if (IsSubst) { // If all we parsed was a substitution, don't re-insert into the // substitution table. return Result; @@ -4121,22 +4103,24 @@ Node *AbstractManglingParser::parseType() { } template -Node *AbstractManglingParser::parsePrefixExpr(StringView Kind) { +Node *AbstractManglingParser::parsePrefixExpr(StringView Kind, + Node::Prec Prec) { Node *E = getDerived().parseExpr(); if (E == nullptr) return nullptr; - return make(Kind, E); + return make(Kind, E, Prec); } template -Node *AbstractManglingParser::parseBinaryExpr(StringView Kind) { +Node *AbstractManglingParser::parseBinaryExpr(StringView Kind, + Node::Prec Prec) { Node *LHS = getDerived().parseExpr(); if (LHS == nullptr) return nullptr; Node *RHS = getDerived().parseExpr(); if (RHS == nullptr) return nullptr; - return make(LHS, Kind, RHS); + return make(LHS, Kind, RHS, Prec); } template @@ -4191,43 +4175,6 @@ Node *AbstractManglingParser::parseFunctionParam() { return nullptr; } -// [gs] nw * _ E # new (expr-list) type -// [gs] nw * _ # new (expr-list) type (init) -// [gs] na * _ E # new[] (expr-list) type -// [gs] na * _ # new[] (expr-list) type (init) -// ::= pi * E # parenthesized initialization -template -Node *AbstractManglingParser::parseNewExpr() { - bool Global = consumeIf("gs"); - bool IsArray = look(1) == 'a'; - if (!consumeIf("nw") && !consumeIf("na")) - return nullptr; - size_t Exprs = Names.size(); - while (!consumeIf('_')) { - Node *Ex = getDerived().parseExpr(); - if (Ex == nullptr) - return nullptr; - Names.push_back(Ex); - } - NodeArray ExprList = popTrailingNodeArray(Exprs); - Node *Ty = getDerived().parseType(); - if (Ty == nullptr) - return Ty; - if (consumeIf("pi")) { - size_t InitsBegin = Names.size(); - while (!consumeIf('E')) { - Node *Init = getDerived().parseExpr(); - if (Init == nullptr) - return Init; - Names.push_back(Init); - } - NodeArray Inits = popTrailingNodeArray(InitsBegin); - return make(ExprList, Ty, Inits, Global, IsArray); - } else if (!consumeIf('E')) - return nullptr; - return make(ExprList, Ty, NodeArray(), Global, IsArray); -} - // cv # conversion with one argument // cv _ * E # conversion with a different number of arguments template @@ -4236,7 +4183,7 @@ Node *AbstractManglingParser::parseConversionExpr() { return nullptr; Node *Ty; { - SwapAndRestore SaveTemp(TryToParseTemplateArgs, false); + ScopedOverride SaveTemp(TryToParseTemplateArgs, false); Ty = getDerived().parseType(); } @@ -4353,7 +4300,7 @@ Node *AbstractManglingParser::parseExprPrimary() { return nullptr; } case 'D': - if (consumeIf("DnE")) + if (consumeIf("Dn") && (consumeIf('0'), consumeIf('E'))) return make("nullptr"); return nullptr; case 'T': @@ -4440,55 +4387,38 @@ Node *AbstractManglingParser::parseFoldExpr() { if (!consumeIf('f')) return nullptr; - char FoldKind = look(); - bool IsLeftFold, HasInitializer; - HasInitializer = FoldKind == 'L' || FoldKind == 'R'; - if (FoldKind == 'l' || FoldKind == 'L') - IsLeftFold = true; - else if (FoldKind == 'r' || FoldKind == 'R') - IsLeftFold = false; - else + bool IsLeftFold = false, HasInitializer = false; + switch (look()) { + default: return nullptr; + case 'L': + IsLeftFold = true; + HasInitializer = true; + break; + case 'R': + HasInitializer = true; + break; + case 'l': + IsLeftFold = true; + break; + case 'r': + break; + } ++First; - // FIXME: This map is duplicated in parseOperatorName and parseExpr. - StringView OperatorName; - if (consumeIf("aa")) OperatorName = "&&"; - else if (consumeIf("an")) OperatorName = "&"; - else if (consumeIf("aN")) OperatorName = "&="; - else if (consumeIf("aS")) OperatorName = "="; - else if (consumeIf("cm")) OperatorName = ","; - else if (consumeIf("ds")) OperatorName = ".*"; - else if (consumeIf("dv")) OperatorName = "/"; - else if (consumeIf("dV")) OperatorName = "/="; - else if (consumeIf("eo")) OperatorName = "^"; - else if (consumeIf("eO")) OperatorName = "^="; - else if (consumeIf("eq")) OperatorName = "=="; - else if (consumeIf("ge")) OperatorName = ">="; - else if (consumeIf("gt")) OperatorName = ">"; - else if (consumeIf("le")) OperatorName = "<="; - else if (consumeIf("ls")) OperatorName = "<<"; - else if (consumeIf("lS")) OperatorName = "<<="; - else if (consumeIf("lt")) OperatorName = "<"; - else if (consumeIf("mi")) OperatorName = "-"; - else if (consumeIf("mI")) OperatorName = "-="; - else if (consumeIf("ml")) OperatorName = "*"; - else if (consumeIf("mL")) OperatorName = "*="; - else if (consumeIf("ne")) OperatorName = "!="; - else if (consumeIf("oo")) OperatorName = "||"; - else if (consumeIf("or")) OperatorName = "|"; - else if (consumeIf("oR")) OperatorName = "|="; - else if (consumeIf("pl")) OperatorName = "+"; - else if (consumeIf("pL")) OperatorName = "+="; - else if (consumeIf("rm")) OperatorName = "%"; - else if (consumeIf("rM")) OperatorName = "%="; - else if (consumeIf("rs")) OperatorName = ">>"; - else if (consumeIf("rS")) OperatorName = ">>="; - else return nullptr; - - Node *Pack = getDerived().parseExpr(), *Init = nullptr; + const auto *Op = parseOperatorEncoding(); + if (!Op) + return nullptr; + if (!(Op->getKind() == OperatorInfo::Binary + || (Op->getKind() == OperatorInfo::Member + && Op->getName().back() == '*'))) + return nullptr; + + Node *Pack = getDerived().parseExpr(); if (Pack == nullptr) return nullptr; + + Node *Init = nullptr; if (HasInitializer) { Init = getDerived().parseExpr(); if (Init == nullptr) @@ -4498,14 +4428,16 @@ Node *AbstractManglingParser::parseFoldExpr() { if (IsLeftFold && Init) std::swap(Pack, Init); - return make(IsLeftFold, OperatorName, Pack, Init); + return make(IsLeftFold, Op->getSymbol(), Pack, Init); } // ::= mc [] E // // Not yet in the spec: https://github.com/itanium-cxx-abi/cxx-abi/issues/47 template -Node *AbstractManglingParser::parsePointerToMemberConversionExpr() { +Node * +AbstractManglingParser::parsePointerToMemberConversionExpr( + Node::Prec Prec) { Node *Ty = getDerived().parseType(); if (!Ty) return nullptr; @@ -4515,7 +4447,7 @@ Node *AbstractManglingParser::parsePointerToMemberConversionExpr StringView Offset = getDerived().parseNumber(true); if (!consumeIf('E')) return nullptr; - return make(Ty, Expr, Offset); + return make(Ty, Expr, Offset, Prec); } // ::= so [] * [p] E @@ -4592,316 +4524,127 @@ Node *AbstractManglingParser::parseSubobjectExpr() { template Node *AbstractManglingParser::parseExpr() { bool Global = consumeIf("gs"); - if (numLeft() < 2) - return nullptr; - switch (*First) { - case 'L': - return getDerived().parseExprPrimary(); - case 'T': - return getDerived().parseTemplateParam(); - case 'f': { - // Disambiguate a fold expression from a . - if (look(1) == 'p' || (look(1) == 'L' && std::isdigit(look(2)))) - return getDerived().parseFunctionParam(); - return getDerived().parseFoldExpr(); - } - case 'a': - switch (First[1]) { - case 'a': - First += 2; - return getDerived().parseBinaryExpr("&&"); - case 'd': - First += 2; - return getDerived().parsePrefixExpr("&"); - case 'n': - First += 2; - return getDerived().parseBinaryExpr("&"); - case 'N': - First += 2; - return getDerived().parseBinaryExpr("&="); - case 'S': - First += 2; - return getDerived().parseBinaryExpr("="); - case 't': { - First += 2; - Node *Ty = getDerived().parseType(); - if (Ty == nullptr) + const auto *Op = parseOperatorEncoding(); + if (Op) { + auto Sym = Op->getSymbol(); + switch (Op->getKind()) { + case OperatorInfo::Binary: + // Binary operator: lhs @ rhs + return getDerived().parseBinaryExpr(Sym, Op->getPrecedence()); + case OperatorInfo::Prefix: + // Prefix unary operator: @ expr + return getDerived().parsePrefixExpr(Sym, Op->getPrecedence()); + case OperatorInfo::Postfix: { + // Postfix unary operator: expr @ + if (consumeIf('_')) + return getDerived().parsePrefixExpr(Sym, Op->getPrecedence()); + Node *Ex = getDerived().parseExpr(); + if (Ex == nullptr) return nullptr; - return make("alignof (", Ty, ")"); + return make(Ex, Sym, Op->getPrecedence()); } - case 'z': { - First += 2; - Node *Ty = getDerived().parseExpr(); - if (Ty == nullptr) + case OperatorInfo::Array: { + // Array Index: lhs [ rhs ] + Node *Base = getDerived().parseExpr(); + if (Base == nullptr) return nullptr; - return make("alignof (", Ty, ")"); - } - } - return nullptr; - case 'c': - switch (First[1]) { - // cc # const_cast(expression) - case 'c': { - First += 2; - Node *Ty = getDerived().parseType(); - if (Ty == nullptr) - return Ty; - Node *Ex = getDerived().parseExpr(); - if (Ex == nullptr) - return Ex; - return make("const_cast", Ty, Ex); + Node *Index = getDerived().parseExpr(); + if (Index == nullptr) + return nullptr; + return make(Base, Index, Op->getPrecedence()); } - // cl + E # call - case 'l': { - First += 2; - Node *Callee = getDerived().parseExpr(); - if (Callee == nullptr) - return Callee; - size_t ExprsBegin = Names.size(); - while (!consumeIf('E')) { - Node *E = getDerived().parseExpr(); - if (E == nullptr) - return E; - Names.push_back(E); - } - return make(Callee, popTrailingNodeArray(ExprsBegin)); - } - case 'm': - First += 2; - return getDerived().parseBinaryExpr(","); - case 'o': - First += 2; - return getDerived().parsePrefixExpr("~"); - case 'v': - return getDerived().parseConversionExpr(); - } - return nullptr; - case 'd': - switch (First[1]) { - case 'a': { - First += 2; - Node *Ex = getDerived().parseExpr(); - if (Ex == nullptr) - return Ex; - return make(Ex, Global, /*is_array=*/true); - } - case 'c': { - First += 2; - Node *T = getDerived().parseType(); - if (T == nullptr) - return T; - Node *Ex = getDerived().parseExpr(); - if (Ex == nullptr) - return Ex; - return make("dynamic_cast", T, Ex); - } - case 'e': - First += 2; - return getDerived().parsePrefixExpr("*"); - case 'l': { - First += 2; - Node *E = getDerived().parseExpr(); - if (E == nullptr) - return E; - return make(E, Global, /*is_array=*/false); - } - case 'n': - return getDerived().parseUnresolvedName(); - case 's': { - First += 2; + case OperatorInfo::Member: { + // Member access lhs @ rhs Node *LHS = getDerived().parseExpr(); if (LHS == nullptr) return nullptr; Node *RHS = getDerived().parseExpr(); if (RHS == nullptr) return nullptr; - return make(LHS, ".*", RHS); - } - case 't': { - First += 2; - Node *LHS = getDerived().parseExpr(); - if (LHS == nullptr) - return LHS; - Node *RHS = getDerived().parseExpr(); - if (RHS == nullptr) - return nullptr; - return make(LHS, ".", RHS); - } - case 'v': - First += 2; - return getDerived().parseBinaryExpr("/"); - case 'V': - First += 2; - return getDerived().parseBinaryExpr("/="); - } - return nullptr; - case 'e': - switch (First[1]) { - case 'o': - First += 2; - return getDerived().parseBinaryExpr("^"); - case 'O': - First += 2; - return getDerived().parseBinaryExpr("^="); - case 'q': - First += 2; - return getDerived().parseBinaryExpr("=="); - } - return nullptr; - case 'g': - switch (First[1]) { - case 'e': - First += 2; - return getDerived().parseBinaryExpr(">="); - case 't': - First += 2; - return getDerived().parseBinaryExpr(">"); - } - return nullptr; - case 'i': - switch (First[1]) { - case 'x': { - First += 2; - Node *Base = getDerived().parseExpr(); - if (Base == nullptr) + return make(LHS, Sym, RHS, Op->getPrecedence()); + } + case OperatorInfo::New: { + // New + // # new (expr-list) type [(init)] + // [gs] nw * _ [pi *] E + // # new[] (expr-list) type [(init)] + // [gs] na * _ [pi *] E + size_t Exprs = Names.size(); + while (!consumeIf('_')) { + Node *Ex = getDerived().parseExpr(); + if (Ex == nullptr) + return nullptr; + Names.push_back(Ex); + } + NodeArray ExprList = popTrailingNodeArray(Exprs); + Node *Ty = getDerived().parseType(); + if (Ty == nullptr) return nullptr; - Node *Index = getDerived().parseExpr(); - if (Index == nullptr) - return Index; - return make(Base, Index); - } - case 'l': { - First += 2; + bool HaveInits = consumeIf("pi"); size_t InitsBegin = Names.size(); while (!consumeIf('E')) { - Node *E = getDerived().parseBracedExpr(); - if (E == nullptr) + if (!HaveInits) return nullptr; - Names.push_back(E); + Node *Init = getDerived().parseExpr(); + if (Init == nullptr) + return Init; + Names.push_back(Init); } - return make(nullptr, popTrailingNodeArray(InitsBegin)); + NodeArray Inits = popTrailingNodeArray(InitsBegin); + return make(ExprList, Ty, Inits, Global, + /*IsArray=*/Op->getFlag(), Op->getPrecedence()); } - } - return nullptr; - case 'l': - switch (First[1]) { - case 'e': - First += 2; - return getDerived().parseBinaryExpr("<="); - case 's': - First += 2; - return getDerived().parseBinaryExpr("<<"); - case 'S': - First += 2; - return getDerived().parseBinaryExpr("<<="); - case 't': - First += 2; - return getDerived().parseBinaryExpr("<"); - } - return nullptr; - case 'm': - switch (First[1]) { - case 'c': - First += 2; - return parsePointerToMemberConversionExpr(); - case 'i': - First += 2; - return getDerived().parseBinaryExpr("-"); - case 'I': - First += 2; - return getDerived().parseBinaryExpr("-="); - case 'l': - First += 2; - return getDerived().parseBinaryExpr("*"); - case 'L': - First += 2; - return getDerived().parseBinaryExpr("*="); - case 'm': - First += 2; - if (consumeIf('_')) - return getDerived().parsePrefixExpr("--"); + case OperatorInfo::Del: { + // Delete Node *Ex = getDerived().parseExpr(); if (Ex == nullptr) return nullptr; - return make(Ex, "--"); - } - return nullptr; - case 'n': - switch (First[1]) { - case 'a': - case 'w': - return getDerived().parseNewExpr(); - case 'e': - First += 2; - return getDerived().parseBinaryExpr("!="); - case 'g': - First += 2; - return getDerived().parsePrefixExpr("-"); - case 't': - First += 2; - return getDerived().parsePrefixExpr("!"); - case 'x': - First += 2; - Node *Ex = getDerived().parseExpr(); - if (Ex == nullptr) - return Ex; - return make("noexcept (", Ex, ")"); - } - return nullptr; - case 'o': - switch (First[1]) { - case 'n': - return getDerived().parseUnresolvedName(); - case 'o': - First += 2; - return getDerived().parseBinaryExpr("||"); - case 'r': - First += 2; - return getDerived().parseBinaryExpr("|"); - case 'R': - First += 2; - return getDerived().parseBinaryExpr("|="); + return make(Ex, Global, /*IsArray=*/Op->getFlag(), + Op->getPrecedence()); } - return nullptr; - case 'p': - switch (First[1]) { - case 'm': - First += 2; - return getDerived().parseBinaryExpr("->*"); - case 'l': - First += 2; - return getDerived().parseBinaryExpr("+"); - case 'L': - First += 2; - return getDerived().parseBinaryExpr("+="); - case 'p': { - First += 2; - if (consumeIf('_')) - return getDerived().parsePrefixExpr("++"); - Node *Ex = getDerived().parseExpr(); - if (Ex == nullptr) - return Ex; - return make(Ex, "++"); + case OperatorInfo::Call: { + // Function Call + Node *Callee = getDerived().parseExpr(); + if (Callee == nullptr) + return nullptr; + size_t ExprsBegin = Names.size(); + while (!consumeIf('E')) { + Node *E = getDerived().parseExpr(); + if (E == nullptr) + return nullptr; + Names.push_back(E); + } + return make(Callee, popTrailingNodeArray(ExprsBegin), + Op->getPrecedence()); } - case 's': - First += 2; - return getDerived().parsePrefixExpr("+"); - case 't': { - First += 2; - Node *L = getDerived().parseExpr(); - if (L == nullptr) + case OperatorInfo::CCast: { + // C Cast: (type)expr + Node *Ty; + { + ScopedOverride SaveTemp(TryToParseTemplateArgs, false); + Ty = getDerived().parseType(); + } + if (Ty == nullptr) return nullptr; - Node *R = getDerived().parseExpr(); - if (R == nullptr) + + size_t ExprsBegin = Names.size(); + bool IsMany = consumeIf('_'); + while (!consumeIf('E')) { + Node *E = getDerived().parseExpr(); + if (E == nullptr) + return E; + Names.push_back(E); + if (!IsMany) + break; + } + NodeArray Exprs = popTrailingNodeArray(ExprsBegin); + if (!IsMany && Exprs.size() != 1) return nullptr; - return make(L, "->", R); + return make(Ty, Exprs, Op->getPrecedence()); } - } - return nullptr; - case 'q': - if (First[1] == 'u') { - First += 2; + case OperatorInfo::Conditional: { + // Conditional operator: expr ? expr : expr Node *Cond = getDerived().parseExpr(); if (Cond == nullptr) return nullptr; @@ -4911,147 +4654,120 @@ Node *AbstractManglingParser::parseExpr() { Node *RHS = getDerived().parseExpr(); if (RHS == nullptr) return nullptr; - return make(Cond, LHS, RHS); - } - return nullptr; - case 'r': - switch (First[1]) { - case 'c': { - First += 2; - Node *T = getDerived().parseType(); - if (T == nullptr) - return T; - Node *Ex = getDerived().parseExpr(); - if (Ex == nullptr) - return Ex; - return make("reinterpret_cast", T, Ex); - } - case 'm': - First += 2; - return getDerived().parseBinaryExpr("%"); - case 'M': - First += 2; - return getDerived().parseBinaryExpr("%="); - case 's': - First += 2; - return getDerived().parseBinaryExpr(">>"); - case 'S': - First += 2; - return getDerived().parseBinaryExpr(">>="); - } - return nullptr; - case 's': - switch (First[1]) { - case 'c': { - First += 2; - Node *T = getDerived().parseType(); - if (T == nullptr) - return T; - Node *Ex = getDerived().parseExpr(); - if (Ex == nullptr) - return Ex; - return make("static_cast", T, Ex); - } - case 'o': - First += 2; - return parseSubobjectExpr(); - case 'p': { - First += 2; - Node *Child = getDerived().parseExpr(); - if (Child == nullptr) - return nullptr; - return make(Child); + return make(Cond, LHS, RHS, Op->getPrecedence()); } - case 'r': - return getDerived().parseUnresolvedName(); - case 't': { - First += 2; + case OperatorInfo::NamedCast: { + // Named cast operation, @(expr) Node *Ty = getDerived().parseType(); if (Ty == nullptr) - return Ty; - return make("sizeof (", Ty, ")"); - } - case 'z': { - First += 2; + return nullptr; Node *Ex = getDerived().parseExpr(); if (Ex == nullptr) - return Ex; - return make("sizeof (", Ex, ")"); + return nullptr; + return make(Sym, Ty, Ex, Op->getPrecedence()); } - case 'Z': - First += 2; - if (look() == 'T') { - Node *R = getDerived().parseTemplateParam(); - if (R == nullptr) - return nullptr; - return make(R); - } else if (look() == 'f') { - Node *FP = getDerived().parseFunctionParam(); - if (FP == nullptr) - return nullptr; - return make("sizeof... (", FP, ")"); - } - return nullptr; - case 'P': { - First += 2; - size_t ArgsBegin = Names.size(); - while (!consumeIf('E')) { - Node *Arg = getDerived().parseTemplateArg(); - if (Arg == nullptr) - return nullptr; - Names.push_back(Arg); - } - auto *Pack = make(popTrailingNodeArray(ArgsBegin)); - if (!Pack) + case OperatorInfo::OfIdOp: { + // [sizeof/alignof/typeid] ( | ) + Node *Arg = + Op->getFlag() ? getDerived().parseType() : getDerived().parseExpr(); + if (!Arg) return nullptr; - return make("sizeof... (", Pack, ")"); + return make(Sym, Arg, Op->getPrecedence()); } + case OperatorInfo::NameOnly: { + // Not valid as an expression operand. + return nullptr; } - return nullptr; - case 't': - switch (First[1]) { - case 'e': { - First += 2; - Node *Ex = getDerived().parseExpr(); - if (Ex == nullptr) - return Ex; - return make("typeid (", Ex, ")"); } - case 'i': { - First += 2; - Node *Ty = getDerived().parseType(); - if (Ty == nullptr) - return Ty; - return make("typeid (", Ty, ")"); + DEMANGLE_UNREACHABLE; + } + + if (numLeft() < 2) + return nullptr; + + if (look() == 'L') + return getDerived().parseExprPrimary(); + if (look() == 'T') + return getDerived().parseTemplateParam(); + if (look() == 'f') { + // Disambiguate a fold expression from a . + if (look(1) == 'p' || (look(1) == 'L' && std::isdigit(look(2)))) + return getDerived().parseFunctionParam(); + return getDerived().parseFoldExpr(); + } + if (consumeIf("il")) { + size_t InitsBegin = Names.size(); + while (!consumeIf('E')) { + Node *E = getDerived().parseBracedExpr(); + if (E == nullptr) + return nullptr; + Names.push_back(E); } - case 'l': { - First += 2; - Node *Ty = getDerived().parseType(); - if (Ty == nullptr) + return make(nullptr, popTrailingNodeArray(InitsBegin)); + } + if (consumeIf("mc")) + return parsePointerToMemberConversionExpr(Node::Prec::Unary); + if (consumeIf("nx")) { + Node *Ex = getDerived().parseExpr(); + if (Ex == nullptr) + return Ex; + return make("noexcept ", Ex, Node::Prec::Unary); + } + if (consumeIf("so")) + return parseSubobjectExpr(); + if (consumeIf("sp")) { + Node *Child = getDerived().parseExpr(); + if (Child == nullptr) + return nullptr; + return make(Child); + } + if (consumeIf("sZ")) { + if (look() == 'T') { + Node *R = getDerived().parseTemplateParam(); + if (R == nullptr) return nullptr; - size_t InitsBegin = Names.size(); - while (!consumeIf('E')) { - Node *E = getDerived().parseBracedExpr(); - if (E == nullptr) - return nullptr; - Names.push_back(E); - } - return make(Ty, popTrailingNodeArray(InitsBegin)); + return make(R); } - case 'r': - First += 2; - return make("throw"); - case 'w': { - First += 2; - Node *Ex = getDerived().parseExpr(); - if (Ex == nullptr) + Node *FP = getDerived().parseFunctionParam(); + if (FP == nullptr) + return nullptr; + return make("sizeof... ", FP); + } + if (consumeIf("sP")) { + size_t ArgsBegin = Names.size(); + while (!consumeIf('E')) { + Node *Arg = getDerived().parseTemplateArg(); + if (Arg == nullptr) return nullptr; - return make(Ex); + Names.push_back(Arg); } + auto *Pack = make(popTrailingNodeArray(ArgsBegin)); + if (!Pack) + return nullptr; + return make("sizeof... ", Pack); + } + if (consumeIf("tl")) { + Node *Ty = getDerived().parseType(); + if (Ty == nullptr) + return nullptr; + size_t InitsBegin = Names.size(); + while (!consumeIf('E')) { + Node *E = getDerived().parseBracedExpr(); + if (E == nullptr) + return nullptr; + Names.push_back(E); } - return nullptr; - case 'u': { - ++First; + return make(Ty, popTrailingNodeArray(InitsBegin)); + } + if (consumeIf("tr")) + return make("throw"); + if (consumeIf("tw")) { + Node *Ex = getDerived().parseExpr(); + if (Ex == nullptr) + return nullptr; + return make(Ex); + } + if (consumeIf('u')) { Node *Name = getDerived().parseSourceName(/*NameState=*/nullptr); if (!Name) return nullptr; @@ -5060,45 +4776,36 @@ Node *AbstractManglingParser::parseExpr() { // interpreted as node 'short' or 'ellipsis'. However, neither // __uuidof(short) nor __uuidof(...) can actually appear, so there is no // actual conflict here. + bool IsUUID = false; + Node *UUID = nullptr; if (Name->getBaseName() == "__uuidof") { - if (numLeft() < 2) - return nullptr; - if (*First == 't') { - ++First; - Node *Ty = getDerived().parseType(); - if (!Ty) - return nullptr; - return make(Name, makeNodeArray(&Ty, &Ty + 1)); - } - if (*First == 'z') { - ++First; - Node *Ex = getDerived().parseExpr(); - if (!Ex) - return nullptr; - return make(Name, makeNodeArray(&Ex, &Ex + 1)); + if (consumeIf('t')) { + UUID = getDerived().parseType(); + IsUUID = true; + } else if (consumeIf('z')) { + UUID = getDerived().parseExpr(); + IsUUID = true; } } size_t ExprsBegin = Names.size(); - while (!consumeIf('E')) { - Node *E = getDerived().parseTemplateArg(); - if (E == nullptr) - return E; - Names.push_back(E); + if (IsUUID) { + if (UUID == nullptr) + return nullptr; + Names.push_back(UUID); + } else { + while (!consumeIf('E')) { + Node *E = getDerived().parseTemplateArg(); + if (E == nullptr) + return E; + Names.push_back(E); + } } - return make(Name, popTrailingNodeArray(ExprsBegin)); - } - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - return getDerived().parseUnresolvedName(); + return make(Name, popTrailingNodeArray(ExprsBegin), + Node::Prec::Postfix); } - return nullptr; + + // Only unresolved names remain. + return getDerived().parseUnresolvedName(Global); } // ::= h _ @@ -5131,14 +4838,17 @@ bool AbstractManglingParser::parseCallOffset() { // # second call-offset is result adjustment // ::= T // # base is the nominal target function of thunk -// ::= GV # Guard variable for one-time initialization +// # Guard variable for one-time initialization +// ::= GV // # No // ::= TW # Thread-local wrapper // ::= TH # Thread-local initialization // ::= GR _ # First temporary // ::= GR _ # Subsequent temporaries -// extension ::= TC _ # construction vtable for second-in-first +// # construction vtable for second-in-first +// extension ::= TC _ // extension ::= GR # reference temporary for object +// extension ::= GI # module global initializer template Node *AbstractManglingParser::parseSpecialName() { switch (look()) { @@ -5265,6 +4975,16 @@ Node *AbstractManglingParser::parseSpecialName() { return nullptr; return make("reference temporary for ", Name); } + // GI v + case 'I': { + First += 2; + ModuleName *Module = nullptr; + if (getDerived().parseModuleNameOpt(Module)) + return nullptr; + if (Module == nullptr) + return nullptr; + return make("initializer for module ", Module); + } } } return nullptr; @@ -5379,7 +5099,7 @@ template <> struct FloatData { #if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \ - defined(__wasm__) + defined(__wasm__) || defined(__riscv) static const size_t mangled_size = 32; #elif defined(__arm__) || defined(__mips__) || defined(__hexagon__) static const size_t mangled_size = 16; @@ -5444,6 +5164,7 @@ bool AbstractManglingParser::parseSeqId(size_t *Out) { // ::= Si # ::std::basic_istream > // ::= So # ::std::basic_ostream > // ::= Sd # ::std::basic_iostream > +// The St case is handled specially in parseNestedName. template Node *AbstractManglingParser::parseSubstitution() { if (!consumeIf('S')) diff --git a/llvm/include/llvm/Demangle/ItaniumNodes.def b/llvm/include/llvm/Demangle/ItaniumNodes.def new file mode 100644 index 000000000000..c0e277d554cc --- /dev/null +++ b/llvm/include/llvm/Demangle/ItaniumNodes.def @@ -0,0 +1,95 @@ +//===--- ItaniumNodes.def ------------*- mode:c++;eval:(read-only-mode) -*-===// +// Do not edit! See README.txt. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Define the demangler's node names + +#ifndef NODE +#error Define NODE to handle nodes +#endif + +NODE(NodeArrayNode) +NODE(DotSuffix) +NODE(VendorExtQualType) +NODE(QualType) +NODE(ConversionOperatorType) +NODE(PostfixQualifiedType) +NODE(ElaboratedTypeSpefType) +NODE(NameType) +NODE(AbiTagAttr) +NODE(EnableIfAttr) +NODE(ObjCProtoName) +NODE(PointerType) +NODE(ReferenceType) +NODE(PointerToMemberType) +NODE(ArrayType) +NODE(FunctionType) +NODE(NoexceptSpec) +NODE(DynamicExceptionSpec) +NODE(FunctionEncoding) +NODE(LiteralOperator) +NODE(SpecialName) +NODE(CtorVtableSpecialName) +NODE(QualifiedName) +NODE(NestedName) +NODE(LocalName) +NODE(ModuleName) +NODE(ModuleEntity) +NODE(VectorType) +NODE(PixelVectorType) +NODE(BinaryFPType) +NODE(BitIntType) +NODE(SyntheticTemplateParamName) +NODE(TypeTemplateParamDecl) +NODE(NonTypeTemplateParamDecl) +NODE(TemplateTemplateParamDecl) +NODE(TemplateParamPackDecl) +NODE(ParameterPack) +NODE(TemplateArgumentPack) +NODE(ParameterPackExpansion) +NODE(TemplateArgs) +NODE(ForwardTemplateReference) +NODE(NameWithTemplateArgs) +NODE(GlobalQualifiedName) +NODE(ExpandedSpecialSubstitution) +NODE(SpecialSubstitution) +NODE(CtorDtorName) +NODE(DtorName) +NODE(UnnamedTypeName) +NODE(ClosureTypeName) +NODE(StructuredBindingName) +NODE(BinaryExpr) +NODE(ArraySubscriptExpr) +NODE(PostfixExpr) +NODE(ConditionalExpr) +NODE(MemberExpr) +NODE(SubobjectExpr) +NODE(EnclosingExpr) +NODE(CastExpr) +NODE(SizeofParamPackExpr) +NODE(CallExpr) +NODE(NewExpr) +NODE(DeleteExpr) +NODE(PrefixExpr) +NODE(FunctionParam) +NODE(ConversionExpr) +NODE(PointerToMemberConversionExpr) +NODE(InitListExpr) +NODE(FoldExpr) +NODE(ThrowExpr) +NODE(BoolExpr) +NODE(StringLiteral) +NODE(LambdaExpr) +NODE(EnumLiteral) +NODE(IntegerLiteral) +NODE(FloatLiteral) +NODE(DoubleLiteral) +NODE(LongDoubleLiteral) +NODE(BracedExpr) +NODE(BracedRangeExpr) + +#undef NODE diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h index 1cf7e8f1df45..ca7e44b948c7 100644 --- a/llvm/include/llvm/Demangle/Utility.h +++ b/llvm/include/llvm/Demangle/Utility.h @@ -33,43 +33,50 @@ class OutputBuffer { size_t CurrentPosition = 0; size_t BufferCapacity = 0; - // Ensure there is at least n more positions in buffer. + // Ensure there are at least N more positions in the buffer. void grow(size_t N) { - if (N + CurrentPosition >= BufferCapacity) { + size_t Need = N + CurrentPosition; + if (Need > BufferCapacity) { + // Reduce the number of reallocations, with a bit of hysteresis. The + // number here is chosen so the first allocation will more-than-likely not + // allocate more than 1K. + Need += 1024 - 32; BufferCapacity *= 2; - if (BufferCapacity < N + CurrentPosition) - BufferCapacity = N + CurrentPosition; + if (BufferCapacity < Need) + BufferCapacity = Need; Buffer = static_cast(std::realloc(Buffer, BufferCapacity)); if (Buffer == nullptr) std::terminate(); } } - void writeUnsigned(uint64_t N, bool isNeg = false) { - // Handle special case... - if (N == 0) { - *this << '0'; - return; - } - + OutputBuffer &writeUnsigned(uint64_t N, bool isNeg = false) { std::array Temp; char *TempPtr = Temp.data() + Temp.size(); - while (N) { + // Output at least one character. + do { *--TempPtr = char('0' + N % 10); N /= 10; - } + } while (N); - // Add negative sign... + // Add negative sign. if (isNeg) *--TempPtr = '-'; - this->operator<<(StringView(TempPtr, Temp.data() + Temp.size())); + + return operator+=(StringView(TempPtr, Temp.data() + Temp.size())); } public: OutputBuffer(char *StartBuf, size_t Size) : Buffer(StartBuf), CurrentPosition(0), BufferCapacity(Size) {} OutputBuffer() = default; + // Non-copyable + OutputBuffer(const OutputBuffer &) = delete; + OutputBuffer &operator=(const OutputBuffer &) = delete; + + operator StringView() const { return StringView(Buffer, CurrentPosition); } + void reset(char *Buffer_, size_t BufferCapacity_) { CurrentPosition = 0; Buffer = Buffer_; @@ -81,13 +88,27 @@ public: unsigned CurrentPackIndex = std::numeric_limits::max(); unsigned CurrentPackMax = std::numeric_limits::max(); + /// When zero, we're printing template args and '>' needs to be parenthesized. + /// Use a counter so we can simply increment inside parentheses. + unsigned GtIsGt = 1; + + bool isGtInsideTemplateArgs() const { return GtIsGt == 0; } + + void printOpen(char Open = '(') { + GtIsGt++; + *this += Open; + } + void printClose(char Close = ')') { + GtIsGt--; + *this += Close; + } + OutputBuffer &operator+=(StringView R) { - size_t Size = R.size(); - if (Size == 0) - return *this; - grow(Size); - std::memmove(Buffer + CurrentPosition, R.begin(), Size); - CurrentPosition += Size; + if (size_t Size = R.size()) { + grow(Size); + std::memcpy(Buffer + CurrentPosition, R.begin(), Size); + CurrentPosition += Size; + } return *this; } @@ -97,9 +118,7 @@ public: return *this; } - OutputBuffer &operator<<(StringView R) { return (*this += R); } - - OutputBuffer prepend(StringView R) { + OutputBuffer &prepend(StringView R) { size_t Size = R.size(); grow(Size); @@ -110,19 +129,16 @@ public: return *this; } + OutputBuffer &operator<<(StringView R) { return (*this += R); } + OutputBuffer &operator<<(char C) { return (*this += C); } OutputBuffer &operator<<(long long N) { - if (N < 0) - writeUnsigned(static_cast(-N), true); - else - writeUnsigned(static_cast(N)); - return *this; + return writeUnsigned(static_cast(std::abs(N)), N < 0); } OutputBuffer &operator<<(unsigned long long N) { - writeUnsigned(N, false); - return *this; + return writeUnsigned(N, false); } OutputBuffer &operator<<(long N) { @@ -155,7 +171,8 @@ public: void setCurrentPosition(size_t NewPos) { CurrentPosition = NewPos; } char back() const { - return CurrentPosition ? Buffer[CurrentPosition - 1] : '\0'; + assert(CurrentPosition); + return Buffer[CurrentPosition - 1]; } bool empty() const { return CurrentPosition == 0; } @@ -165,35 +182,20 @@ public: size_t getBufferCapacity() const { return BufferCapacity; } }; -template class SwapAndRestore { - T &Restore; - T OriginalValue; - bool ShouldRestore = true; +template class ScopedOverride { + T &Loc; + T Original; public: - SwapAndRestore(T &Restore_) : SwapAndRestore(Restore_, Restore_) {} - - SwapAndRestore(T &Restore_, T NewVal) - : Restore(Restore_), OriginalValue(Restore) { - Restore = std::move(NewVal); - } - ~SwapAndRestore() { - if (ShouldRestore) - Restore = std::move(OriginalValue); - } - - void shouldRestore(bool ShouldRestore_) { ShouldRestore = ShouldRestore_; } - - void restoreNow(bool Force) { - if (!Force && !ShouldRestore) - return; + ScopedOverride(T &Loc_) : ScopedOverride(Loc_, Loc_) {} - Restore = std::move(OriginalValue); - ShouldRestore = false; + ScopedOverride(T &Loc_, T NewVal) : Loc(Loc_), Original(Loc_) { + Loc_ = std::move(NewVal); } + ~ScopedOverride() { Loc = std::move(Original); } - SwapAndRestore(const SwapAndRestore &) = delete; - SwapAndRestore &operator=(const SwapAndRestore &) = delete; + ScopedOverride(const ScopedOverride &) = delete; + ScopedOverride &operator=(const ScopedOverride &) = delete; }; inline bool initializeOutputBuffer(char *Buf, size_t *N, OutputBuffer &OB, diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h b/llvm/include/llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h new file mode 100644 index 000000000000..d748d4b0fa59 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h @@ -0,0 +1,35 @@ +//===--------- DWARFRecordSectionSplitter.h - JITLink -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_DWARFRECORDSECTIONSPLITTER_H +#define LLVM_EXECUTIONENGINE_JITLINK_DWARFRECORDSECTIONSPLITTER_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +namespace llvm { +namespace jitlink { + +/// A LinkGraph pass that splits blocks in a section that follows the DWARF +/// Record format into sub-blocks where each header gets its own block. +/// When splitting EHFrames, DWARFRecordSectionSplitter should not be run +/// without EHFrameEdgeFixer, which is responsible for adding FDE-to-CIE edges. +class DWARFRecordSectionSplitter { +public: + DWARFRecordSectionSplitter(StringRef SectionName); + Error operator()(LinkGraph &G); + +private: + Error processBlock(LinkGraph &G, Block &B, LinkGraph::SplitBlockCache &Cache); + + StringRef SectionName; +}; + +} // namespace jitlink +} // namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_DWARFRECORDSECTIONSPLITTER_H diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 25f1349f15f2..897808c0ee83 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -223,6 +223,11 @@ public: /// Returns the size of this defined addressable. size_t getSize() const { return Size; } + /// Returns the address range of this defined addressable. + orc::ExecutorAddrRange getRange() const { + return orc::ExecutorAddrRange(getAddress(), getSize()); + } + /// Get the content for this block. Block must not be a zero-fill block. ArrayRef getContent() const { assert(Data && "Block does not contain content"); @@ -576,6 +581,11 @@ public: this->Size = Size; } + /// Returns the address range of this symbol. + orc::ExecutorAddrRange getRange() const { + return orc::ExecutorAddrRange(getAddress(), getSize()); + } + /// Returns true if this symbol is backed by a zero-fill block. /// This method may only be called on defined symbols. bool isSymbolZeroFill() const { return getBlock().isZeroFill(); } @@ -1215,8 +1225,11 @@ public: /// Make the given symbol an absolute with the given address (must not already /// be absolute). /// - /// Symbol size, linkage, scope, and callability, and liveness will be left - /// unchanged. Symbol offset will be reset to 0. + /// The symbol's size, linkage, and callability, and liveness will be left + /// unchanged, and its offset will be reset to 0. + /// + /// If the symbol was external then its scope will be set to local, otherwise + /// it will be left unchanged. void makeAbsolute(Symbol &Sym, orc::ExecutorAddr Address) { assert(!Sym.isAbsolute() && "Symbol is already absolute"); if (Sym.isExternal()) { @@ -1225,6 +1238,7 @@ public: assert(Sym.getOffset() == 0 && "External is not at offset 0"); ExternalSymbols.erase(&Sym); Sym.getAddressable().setAbsolute(true); + Sym.setScope(Scope::Local); } else { assert(Sym.isDefined() && "Sym is not a defined symbol"); Section &Sec = Sym.getBlock().getSection(); @@ -1733,6 +1747,9 @@ Error markAllSymbolsLive(LinkGraph &G); Error makeTargetOutOfRangeError(const LinkGraph &G, const Block &B, const Edge &E); +Error makeAlignmentError(llvm::orc::ExecutorAddr Loc, uint64_t Value, int N, + const Edge &E); + /// Base case for edge-visitors where the visitor-list is empty. inline void visitEdge(LinkGraph &G, Block *B, Edge &E) {} diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h index aee14c0d1fe5..6f2ff012697d 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h @@ -18,30 +18,6 @@ namespace llvm { namespace jitlink { -namespace MachO_arm64_Edges { - -enum MachOARM64RelocationKind : Edge::Kind { - Branch26 = Edge::FirstRelocation, - Pointer32, - Pointer64, - Pointer64Anon, - Page21, - PageOffset12, - GOTPage21, - GOTPageOffset12, - TLVPage21, - TLVPageOffset12, - PointerToGOT, - PairedAddend, - LDRLiteral19, - Delta32, - Delta64, - NegDelta32, - NegDelta64, -}; - -} // namespace MachO_arm64_Edges - /// Create a LinkGraph from a MachO/arm64 relocatable object. /// /// Note: The graph does not take ownership of the underlying buffer, nor copy @@ -62,9 +38,6 @@ createLinkGraphFromMachOObject_arm64(MemoryBufferRef ObjectBuffer); void link_MachO_arm64(std::unique_ptr G, std::unique_ptr Ctx); -/// Return the string name of the given MachO arm64 edge kind. -const char *getMachOARM64RelocationKindName(Edge::Kind R); - } // end namespace jitlink } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h b/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h index e9771319ef06..a18098e5a1a9 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h @@ -152,13 +152,9 @@ public: using iterator = typename VectorTy::iterator; AllocGroupSmallMap() = default; - AllocGroupSmallMap(std::initializer_list> Inits) { - Elems.reserve(Inits.size()); - for (const auto &E : Inits) - Elems.push_back(E); - llvm::sort(Elems, [](const ElemT &LHS, const ElemT &RHS) { - return LHS.first < RHS.first; - }); + AllocGroupSmallMap(std::initializer_list> Inits) + : Elems(Inits) { + llvm::sort(Elems, llvm::less_first()); } iterator begin() { return Elems.begin(); } diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h index 994ce783b058..53ff6c7a219e 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h @@ -13,24 +13,353 @@ #ifndef LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H #define LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H +#include "TableManager.h" #include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/ExecutionEngine/JITLink/MemoryFlags.h" namespace llvm { namespace jitlink { namespace aarch64 { -/// Represets aarch64 fixups enum EdgeKind_aarch64 : Edge::Kind { - - /// Set a CALL immediate field to bits [27:2] of X = Target - Fixup + Addend - R_AARCH64_CALL26 = Edge::FirstRelocation, - + Branch26 = Edge::FirstRelocation, + Pointer32, + Pointer64, + Pointer64Anon, + Page21, + PageOffset12, + MoveWide16, + GOTPage21, + GOTPageOffset12, + TLVPage21, + TLVPageOffset12, + PointerToGOT, + PairedAddend, + LDRLiteral19, + Delta32, + Delta64, + NegDelta32, + NegDelta64, }; /// Returns a string name for the given aarch64 edge. For debugging purposes /// only const char *getEdgeKindName(Edge::Kind K); +// Returns whether the Instr is LD/ST (imm12) +inline bool isLoadStoreImm12(uint32_t Instr) { + constexpr uint32_t LoadStoreImm12Mask = 0x3b000000; + return (Instr & LoadStoreImm12Mask) == 0x39000000; +} + +// Returns the amount the address operand of LD/ST (imm12) +// should be shifted right by. +// +// The shift value varies by the data size of LD/ST instruction. +// For instance, LDH instructoin needs the address to be shifted +// right by 1. +inline unsigned getPageOffset12Shift(uint32_t Instr) { + constexpr uint32_t Vec128Mask = 0x04800000; + + if (isLoadStoreImm12(Instr)) { + uint32_t ImplicitShift = Instr >> 30; + if (ImplicitShift == 0) + if ((Instr & Vec128Mask) == Vec128Mask) + ImplicitShift = 4; + + return ImplicitShift; + } + + return 0; +} + +// Returns whether the Instr is MOVK/MOVZ (imm16) with a zero immediate field +inline bool isMoveWideImm16(uint32_t Instr) { + constexpr uint32_t MoveWideImm16Mask = 0x5f9fffe0; + return (Instr & MoveWideImm16Mask) == 0x52800000; +} + +// Returns the amount the address operand of MOVK/MOVZ (imm16) +// should be shifted right by. +// +// The shift value is specfied in the assembly as LSL #. +inline unsigned getMoveWide16Shift(uint32_t Instr) { + if (isMoveWideImm16(Instr)) { + uint32_t ImplicitShift = (Instr >> 21) & 0b11; + return ImplicitShift << 4; + } + + return 0; +} + +/// Apply fixup expression for edge to block content. +inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) { + using namespace support; + + char *BlockWorkingMem = B.getAlreadyMutableContent().data(); + char *FixupPtr = BlockWorkingMem + E.getOffset(); + orc::ExecutorAddr FixupAddress = B.getAddress() + E.getOffset(); + + switch (E.getKind()) { + case Branch26: { + assert((FixupAddress.getValue() & 0x3) == 0 && + "Branch-inst is not 32-bit aligned"); + + int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); + + if (static_cast(Value) & 0x3) + return make_error("Branch26 target is not 32-bit " + "aligned"); + + if (Value < -(1 << 27) || Value > ((1 << 27) - 1)) + return makeTargetOutOfRangeError(G, B, E); + + uint32_t RawInstr = *(little32_t *)FixupPtr; + assert((RawInstr & 0x7fffffff) == 0x14000000 && + "RawInstr isn't a B or BR immediate instruction"); + uint32_t Imm = (static_cast(Value) & ((1 << 28) - 1)) >> 2; + uint32_t FixedInstr = RawInstr | Imm; + *(little32_t *)FixupPtr = FixedInstr; + break; + } + case Pointer32: { + uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); + if (Value > std::numeric_limits::max()) + return makeTargetOutOfRangeError(G, B, E); + *(ulittle32_t *)FixupPtr = Value; + break; + } + case Pointer64: + case Pointer64Anon: { + uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); + *(ulittle64_t *)FixupPtr = Value; + break; + } + case Page21: { + assert((E.getKind() != GOTPage21 || E.getAddend() == 0) && + "GOTPAGE21 with non-zero addend"); + uint64_t TargetPage = + (E.getTarget().getAddress().getValue() + E.getAddend()) & + ~static_cast(4096 - 1); + uint64_t PCPage = + FixupAddress.getValue() & ~static_cast(4096 - 1); + + int64_t PageDelta = TargetPage - PCPage; + if (!isInt<33>(PageDelta)) + return makeTargetOutOfRangeError(G, B, E); + + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + assert((RawInstr & 0xffffffe0) == 0x90000000 && + "RawInstr isn't an ADRP instruction"); + uint32_t ImmLo = (static_cast(PageDelta) >> 12) & 0x3; + uint32_t ImmHi = (static_cast(PageDelta) >> 14) & 0x7ffff; + uint32_t FixedInstr = RawInstr | (ImmLo << 29) | (ImmHi << 5); + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case PageOffset12: { + uint64_t TargetOffset = + (E.getTarget().getAddress() + E.getAddend()).getValue() & 0xfff; + + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + unsigned ImmShift = getPageOffset12Shift(RawInstr); + + if (TargetOffset & ((1 << ImmShift) - 1)) + return make_error("PAGEOFF12 target is not aligned"); + + uint32_t EncodedImm = (TargetOffset >> ImmShift) << 10; + uint32_t FixedInstr = RawInstr | EncodedImm; + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case MoveWide16: { + uint64_t TargetOffset = + (E.getTarget().getAddress() + E.getAddend()).getValue(); + + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + assert(isMoveWideImm16(RawInstr) && + "RawInstr isn't a MOVK/MOVZ instruction"); + + unsigned ImmShift = getMoveWide16Shift(RawInstr); + uint32_t Imm = (TargetOffset >> ImmShift) & 0xffff; + uint32_t FixedInstr = RawInstr | (Imm << 5); + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case LDRLiteral19: { + assert((FixupAddress.getValue() & 0x3) == 0 && "LDR is not 32-bit aligned"); + assert(E.getAddend() == 0 && "LDRLiteral19 with non-zero addend"); + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + assert(RawInstr == 0x58000010 && "RawInstr isn't a 64-bit LDR literal"); + int64_t Delta = E.getTarget().getAddress() - FixupAddress; + if (Delta & 0x3) + return make_error("LDR literal target is not 32-bit " + "aligned"); + if (Delta < -(1 << 20) || Delta > ((1 << 20) - 1)) + return makeTargetOutOfRangeError(G, B, E); + + uint32_t EncodedImm = ((static_cast(Delta) >> 2) & 0x7ffff) << 5; + uint32_t FixedInstr = RawInstr | EncodedImm; + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case Delta32: + case Delta64: + case NegDelta32: + case NegDelta64: { + int64_t Value; + if (E.getKind() == Delta32 || E.getKind() == Delta64) + Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); + else + Value = FixupAddress - E.getTarget().getAddress() + E.getAddend(); + + if (E.getKind() == Delta32 || E.getKind() == NegDelta32) { + if (Value < std::numeric_limits::min() || + Value > std::numeric_limits::max()) + return makeTargetOutOfRangeError(G, B, E); + *(little32_t *)FixupPtr = Value; + } else + *(little64_t *)FixupPtr = Value; + break; + } + case TLVPage21: + case GOTPage21: + case TLVPageOffset12: + case GOTPageOffset12: + case PointerToGOT: { + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + "GOT/TLV edge kinds not lowered: " + getEdgeKindName(E.getKind())); + } + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + "unsupported edge kind" + getEdgeKindName(E.getKind())); + } + + return Error::success(); +} + +/// AArch64 null pointer content. +extern const uint8_t NullGOTEntryContent[8]; + +/// AArch64 PLT stub content. +extern const uint8_t StubContent[8]; + +/// Global Offset Table Builder. +class GOTTableManager : public TableManager { +public: + static StringRef getSectionName() { return "$__GOT"; } + + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { + Edge::Kind KindToSet = Edge::Invalid; + const char *BlockWorkingMem = B->getContent().data(); + const char *FixupPtr = BlockWorkingMem + E.getOffset(); + + switch (E.getKind()) { + case aarch64::GOTPage21: + case aarch64::TLVPage21: { + KindToSet = aarch64::Page21; + break; + } + case aarch64::GOTPageOffset12: + case aarch64::TLVPageOffset12: { + KindToSet = aarch64::PageOffset12; + uint32_t RawInstr = *(const support::ulittle32_t *)FixupPtr; + (void)RawInstr; + assert(E.getAddend() == 0 && + "GOTPageOffset12/TLVPageOffset12 with non-zero addend"); + assert((RawInstr & 0xfffffc00) == 0xf9400000 && + "RawInstr isn't a 64-bit LDR immediate"); + break; + } + case aarch64::PointerToGOT: { + KindToSet = aarch64::Delta64; + break; + } + default: + return false; + } + assert(KindToSet != Edge::Invalid && + "Fell through switch, but no new kind to set"); + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " + << formatv("{0:x}", E.getOffset()) << ")\n"; + }); + E.setKind(KindToSet); + E.setTarget(getEntryForTarget(G, E.getTarget())); + return true; + } + + Symbol &createEntry(LinkGraph &G, Symbol &Target) { + auto &GOTEntryBlock = G.createContentBlock( + getGOTSection(G), getGOTEntryBlockContent(), orc::ExecutorAddr(), 8, 0); + GOTEntryBlock.addEdge(aarch64::Pointer64, 0, Target, 0); + return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false); + } + +private: + Section &getGOTSection(LinkGraph &G) { + if (!GOTSection) + GOTSection = + &G.createSection(getSectionName(), MemProt::Read | MemProt::Exec); + return *GOTSection; + } + + ArrayRef getGOTEntryBlockContent() { + return {reinterpret_cast(NullGOTEntryContent), + sizeof(NullGOTEntryContent)}; + } + + Section *GOTSection = nullptr; +}; + +/// Procedure Linkage Table Builder. +class PLTTableManager : public TableManager { +public: + PLTTableManager(GOTTableManager &GOT) : GOT(GOT) {} + + static StringRef getSectionName() { return "$__STUBS"; } + + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { + if (E.getKind() == aarch64::Branch26 && !E.getTarget().isDefined()) { + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " + << formatv("{0:x}", E.getOffset()) << ")\n"; + }); + E.setTarget(getEntryForTarget(G, E.getTarget())); + return true; + } + return false; + } + + Symbol &createEntry(LinkGraph &G, Symbol &Target) { + auto &StubContentBlock = G.createContentBlock( + getStubsSection(G), getStubBlockContent(), orc::ExecutorAddr(), 1, 0); + // Re-use GOT entries for stub targets. + auto &GOTEntrySymbol = GOT.getEntryForTarget(G, Target); + StubContentBlock.addEdge(aarch64::LDRLiteral19, 0, GOTEntrySymbol, 0); + return G.addAnonymousSymbol(StubContentBlock, 0, 8, true, false); + } + +public: + Section &getStubsSection(LinkGraph &G) { + if (!StubsSection) + StubsSection = + &G.createSection(getSectionName(), MemProt::Read | MemProt::Exec); + return *StubsSection; + } + + ArrayRef getStubBlockContent() { + return {reinterpret_cast(StubContent), sizeof(StubContent)}; + } + + GOTTableManager &GOT; + Section *StubsSection = nullptr; +}; + } // namespace aarch64 } // namespace jitlink } // namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h index 5abd4cf11dea..95f45fae91e4 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h @@ -37,13 +37,20 @@ enum EdgeKind_riscv : Edge::Kind { /// R_RISCV_64, - /// Low 12 bits of PC-relative branch pointer value relocation + /// PC-relative branch pointer value relocation /// /// Fixup expression: - /// Fixup <- (Target - Fixup + Addend) & 0xFFF + /// Fixup <- (Target - Fixup + Addend) /// R_RISCV_BRANCH, + /// High 20 bits of PC-relative jump pointer value relocation + /// + /// Fixup expression: + /// Fixup <- Target - Fixup + Addend + /// + R_RISCV_JAL, + /// High 20 bits of 32-bit pointer value relocation /// /// Fixup expression @@ -145,6 +152,12 @@ enum EdgeKind_riscv : Edge::Kind { /// Fixup <- (Target - *{1}Fixup - Addend) R_RISCV_SUB8, + /// 6 bits label subtraction + /// + /// Fixup expression + /// Fixup <- (Target - *{1}Fixup - Addend) + R_RISCV_SUB6, + /// Local label assignment /// /// Fixup expression: diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h index 4a4e8d15be66..9a2bc9b09350 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h @@ -447,11 +447,10 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E, break; } - default: { - // If you hit this you should check that *constructor and other non-fixup - // edges have been removed prior to applying fixups. - llvm_unreachable("Graph contains edge kind with no fixup expression"); - } + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + "unsupported edge kind" + getEdgeKindName(E.getKind())); } return Error::success(); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index c4647148f287..df2826b50784 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -339,11 +339,7 @@ public: /// Sort the lookup set by pointer value. This sort is fast but sensitive to /// allocation order and so should not be used where a consistent order is /// required. - void sortByAddress() { - llvm::sort(Symbols, [](const value_type &LHS, const value_type &RHS) { - return LHS.first < RHS.first; - }); - } + void sortByAddress() { llvm::sort(Symbols, llvm::less_first()); } /// Sort the lookup set lexicographically. This sort is slow but the order /// is unaffected by allocation order. @@ -420,12 +416,15 @@ class FailedToMaterialize : public ErrorInfo { public: static char ID; - FailedToMaterialize(std::shared_ptr Symbols); + FailedToMaterialize(std::shared_ptr SSP, + std::shared_ptr Symbols); + ~FailedToMaterialize(); std::error_code convertToErrorCode() const override; void log(raw_ostream &OS) const override; const SymbolDependenceMap &getSymbols() const { return *Symbols; } private: + std::shared_ptr SSP; std::shared_ptr Symbols; }; @@ -1331,7 +1330,7 @@ public: lookupInitSymbols(ExecutionSession &ES, const DenseMap &InitSyms); - /// Performs an async lookup for the the given symbols in each of the given + /// Performs an async lookup for the given symbols in each of the given /// JITDylibs, calling the given handler once all lookups have completed. static void lookupInitSymbolsAsync(unique_function OnComplete, @@ -1389,8 +1388,12 @@ public: /// object. ExecutionSession(std::unique_ptr EPC); + /// Destroy an ExecutionSession. Verifies that endSession was called prior to + /// destruction. + ~ExecutionSession(); + /// End the session. Closes all JITDylibs and disconnects from the - /// executor. + /// executor. Clients must call this method before destroying the session. Error endSession(); /// Get the ExecutorProcessControl object associated with this @@ -1523,7 +1526,7 @@ public: /// after resolution, the function will return a success value, but the /// error will be reported via reportErrors. Expected lookup(const JITDylibSearchOrder &SearchOrder, - const SymbolLookupSet &Symbols, + SymbolLookupSet Symbols, LookupKind K = LookupKind::Static, SymbolState RequiredState = SymbolState::Ready, RegisterDependenciesFunction RegisterDependencies = diff --git a/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h index 7eb98dfc741e..c4ef06f1fbc6 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h @@ -92,6 +92,9 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S); /// Render a LookupKind. raw_ostream &operator<<(raw_ostream &OS, const LookupKind &K); +/// Dump a SymbolStringPool. Useful for debugging dangling-pointer crashes. +raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPool &SSP); + /// A function object that can be used as an ObjectTransformLayer transform /// to dump object files to disk at a specified path. class DumpObjects { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h index 6b12fe990a8a..3804b6dda91f 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h @@ -109,7 +109,8 @@ public: /// Returns an AliasMap containing the default aliases for the ELFNixPlatform. /// This can be modified by clients when constructing the platform to add /// or remove aliases. - static SymbolAliasMap standardPlatformAliases(ExecutionSession &ES); + static Expected standardPlatformAliases(ExecutionSession &ES, + JITDylib &PlatformJD); /// Returns the array of required CXX aliases. static ArrayRef> requiredCXXAliases(); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h index ac7051b5b75c..241453320ad5 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h @@ -23,8 +23,6 @@ #include #include -using namespace llvm::orc::shared; - namespace llvm { namespace orc { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h index 92de5882bafe..354984b540a9 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h @@ -148,7 +148,7 @@ private: std::mutex EPCUIMutex; ExecutorProcessControl &EPC; std::unique_ptr ABI; - JITTargetAddress ResolverBlockAddr; + JITTargetAddress ResolverBlockAddr = 0; FinalizedAlloc ResolverBlock; std::unique_ptr TP; std::unique_ptr LCTM; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h b/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h index 2cc8c29b2813..e6a63707653a 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h @@ -125,7 +125,7 @@ public: /// Set TargetOptions. /// /// Note: This operation will overwrite any previously configured options, - /// including EmulatedTLS and ExplicitEmulatedTLS which + /// including EmulatedTLS, ExplicitEmulatedTLS, and UseInitArray which /// the JITTargetMachineBuilder sets by default. Clients are responsible /// for re-enabling these overwritten options. JITTargetMachineBuilder &setOptions(TargetOptions Options) { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h index d76e6a21a9bb..d67a7f2bfeb2 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h @@ -56,7 +56,7 @@ public: /// Destruct this instance. If a multi-threaded instance, waits for all /// compile threads to complete. - ~LLJIT(); + virtual ~LLJIT(); /// Returns the ExecutionSession for this instance. ExecutionSession &getExecutionSession() { return *ES; } @@ -110,30 +110,30 @@ public: /// Look up a symbol in JITDylib JD by the symbol's linker-mangled name (to /// look up symbols based on their IR name use the lookup function instead). - Expected lookupLinkerMangled(JITDylib &JD, - SymbolStringPtr Name); + Expected lookupLinkerMangled(JITDylib &JD, + SymbolStringPtr Name); /// Look up a symbol in JITDylib JD by the symbol's linker-mangled name (to /// look up symbols based on their IR name use the lookup function instead). - Expected lookupLinkerMangled(JITDylib &JD, - StringRef Name) { + Expected lookupLinkerMangled(JITDylib &JD, + StringRef Name) { return lookupLinkerMangled(JD, ES->intern(Name)); } /// Look up a symbol in the main JITDylib by the symbol's linker-mangled name /// (to look up symbols based on their IR name use the lookup function /// instead). - Expected lookupLinkerMangled(StringRef Name) { + Expected lookupLinkerMangled(StringRef Name) { return lookupLinkerMangled(*Main, Name); } /// Look up a symbol in JITDylib JD based on its IR symbol name. - Expected lookup(JITDylib &JD, StringRef UnmangledName) { + Expected lookup(JITDylib &JD, StringRef UnmangledName) { return lookupLinkerMangled(JD, mangle(UnmangledName)); } /// Look up a symbol in the main JITDylib based on its IR symbol name. - Expected lookup(StringRef UnmangledName) { + Expected lookup(StringRef UnmangledName) { return lookup(*Main, UnmangledName); } @@ -401,7 +401,7 @@ public: std::function()>; Triple TT; - JITTargetAddress LazyCompileFailureAddr = 0; + ExecutorAddr LazyCompileFailureAddr; std::unique_ptr LCTMgr; IndirectStubsManagerBuilderFunction ISMBuilder; @@ -415,7 +415,7 @@ public: /// Set the address in the target address to call if a lazy compile fails. /// /// If this method is not called then the value will default to 0. - SetterImpl &setLazyCompileFailureAddr(JITTargetAddress Addr) { + SetterImpl &setLazyCompileFailureAddr(ExecutorAddr Addr) { this->impl().LazyCompileFailureAddr = Addr; return this->impl(); } diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h index 01f3f1b2ab63..141dd73548c8 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h @@ -26,30 +26,19 @@ namespace llvm { namespace orc { -struct MachOJITDylibInitializers { - using SectionList = std::vector; - - MachOJITDylibInitializers(std::string Name, ExecutorAddr MachOHeaderAddress) - : Name(std::move(Name)), - MachOHeaderAddress(std::move(MachOHeaderAddress)) {} - - std::string Name; - ExecutorAddr MachOHeaderAddress; - ExecutorAddr ObjCImageInfoAddress; - - StringMap InitSections; -}; - -class MachOJITDylibDeinitializers {}; - -using MachOJITDylibInitializerSequence = std::vector; - -using MachOJITDylibDeinitializerSequence = - std::vector; - /// Mediates between MachO initialization and ExecutionSession state. class MachOPlatform : public Platform { public: + // Used internally by MachOPlatform, but made public to enable serialization. + struct MachOJITDylibDepInfo { + bool Sealed = false; + std::vector DepHeaders; + }; + + // Used internally by MachOPlatform, but made public to enable serialization. + using MachOJITDylibDepInfoMap = + std::vector>; + /// Try to create a MachOPlatform instance, adding the ORC runtime to the /// given JITDylib. /// @@ -161,26 +150,28 @@ private: Error processObjCImageInfo(jitlink::LinkGraph &G, MaterializationResponsibility &MR); - Error registerInitSections(jitlink::LinkGraph &G, JITDylib &JD); - Error fixTLVSectionsAndEdges(jitlink::LinkGraph &G, JITDylib &JD); - Error registerEHAndTLVSections(jitlink::LinkGraph &G); + Error registerObjectPlatformSections(jitlink::LinkGraph &G, JITDylib &JD); Error registerEHSectionsPhase1(jitlink::LinkGraph &G); std::mutex PluginMutex; MachOPlatform &MP; + + // FIXME: ObjCImageInfos and HeaderAddrs need to be cleared when + // JITDylibs are removed. DenseMap> ObjCImageInfos; + DenseMap HeaderAddrs; InitSymbolDepMap InitSymbolDeps; }; - using SendInitializerSequenceFn = - unique_function)>; - - using SendDeinitializerSequenceFn = - unique_function)>; - + using GetJITDylibHeaderSendResultFn = + unique_function)>; + using GetJITDylibNameSendResultFn = + unique_function)>; + using PushInitializersSendResultFn = + unique_function)>; using SendSymbolAddressFn = unique_function)>; static bool supportedTarget(const Triple &TT); @@ -193,28 +184,24 @@ private: // Associate MachOPlatform JIT-side runtime support functions with handlers. Error associateRuntimeSupportFunctions(JITDylib &PlatformJD); - void getInitializersBuildSequencePhase(SendInitializerSequenceFn SendResult, - JITDylib &JD, - std::vector DFSLinkOrder); + // Implements rt_pushInitializers by making repeat async lookups for + // initializer symbols (each lookup may spawn more initializer symbols if + // it pulls in new materializers, e.g. from objects in a static library). + void pushInitializersLoop(PushInitializersSendResultFn SendResult, + JITDylibSP JD); - void getInitializersLookupPhase(SendInitializerSequenceFn SendResult, - JITDylib &JD); - - void rt_getInitializers(SendInitializerSequenceFn SendResult, - StringRef JDName); - - void rt_getDeinitializers(SendDeinitializerSequenceFn SendResult, - ExecutorAddr Handle); + // Handle requests from the ORC runtime to push MachO initializer info. + void rt_pushInitializers(PushInitializersSendResultFn SendResult, + ExecutorAddr JDHeaderAddr); + // Handle requests for symbol addresses from the ORC runtime. void rt_lookupSymbol(SendSymbolAddressFn SendResult, ExecutorAddr Handle, StringRef SymbolName); // Records the addresses of runtime symbols used by the platform. Error bootstrapMachORuntime(JITDylib &PlatformJD); - Error registerInitInfo(JITDylib &JD, ExecutorAddr ObjCImageInfoAddr, - ArrayRef InitSections); - + // Call the ORC runtime to create a pthread key. Expected createPThreadKey(); enum PlatformState { BootstrapPhase1, BootstrapPhase2, Initialized }; @@ -229,81 +216,24 @@ private: ExecutorAddr orc_rt_macho_platform_shutdown; ExecutorAddr orc_rt_macho_register_ehframe_section; ExecutorAddr orc_rt_macho_deregister_ehframe_section; - ExecutorAddr orc_rt_macho_register_thread_data_section; - ExecutorAddr orc_rt_macho_deregister_thread_data_section; + ExecutorAddr orc_rt_macho_register_jitdylib; + ExecutorAddr orc_rt_macho_deregister_jitdylib; + ExecutorAddr orc_rt_macho_register_object_platform_sections; + ExecutorAddr orc_rt_macho_deregister_object_platform_sections; ExecutorAddr orc_rt_macho_create_pthread_key; DenseMap RegisteredInitSymbols; - // InitSeqs gets its own mutex to avoid locking the whole session when - // aggregating data from the jitlink. std::mutex PlatformMutex; - DenseMap InitSeqs; - + DenseMap JITDylibToHeaderAddr; DenseMap HeaderAddrToJITDylib; DenseMap JITDylibToPThreadKey; }; namespace shared { -using SPSNamedExecutorAddrRangeSequenceMap = - SPSSequence>; - -using SPSMachOJITDylibInitializers = - SPSTuple; - -using SPSMachOJITDylibInitializerSequence = - SPSSequence; - -/// Serialization traits for MachOJITDylibInitializers. -template <> -class SPSSerializationTraits { -public: - static size_t size(const MachOJITDylibInitializers &MOJDIs) { - return SPSMachOJITDylibInitializers::AsArgList::size( - MOJDIs.Name, MOJDIs.MachOHeaderAddress, MOJDIs.ObjCImageInfoAddress, - MOJDIs.InitSections); - } - - static bool serialize(SPSOutputBuffer &OB, - const MachOJITDylibInitializers &MOJDIs) { - return SPSMachOJITDylibInitializers::AsArgList::serialize( - OB, MOJDIs.Name, MOJDIs.MachOHeaderAddress, MOJDIs.ObjCImageInfoAddress, - MOJDIs.InitSections); - } - - static bool deserialize(SPSInputBuffer &IB, - MachOJITDylibInitializers &MOJDIs) { - return SPSMachOJITDylibInitializers::AsArgList::deserialize( - IB, MOJDIs.Name, MOJDIs.MachOHeaderAddress, MOJDIs.ObjCImageInfoAddress, - MOJDIs.InitSections); - } -}; - -using SPSMachOJITDylibDeinitializers = SPSEmpty; - -using SPSMachOJITDylibDeinitializerSequence = - SPSSequence; - -template <> -class SPSSerializationTraits { -public: - static size_t size(const MachOJITDylibDeinitializers &MOJDDs) { return 0; } - - static bool serialize(SPSOutputBuffer &OB, - const MachOJITDylibDeinitializers &MOJDDs) { - return true; - } - - static bool deserialize(SPSInputBuffer &IB, - MachOJITDylibDeinitializers &MOJDDs) { - MOJDDs = MachOJITDylibDeinitializers(); - return true; - } -}; +using SPSNamedExecutorAddrRangeSequence = + SPSSequence>; } // end namespace shared } // end namespace orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h b/llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h new file mode 100644 index 000000000000..d023bfbdb5b6 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h @@ -0,0 +1,115 @@ +//===- MemoryMapper.h - Cross-process memory mapper -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Cross-process (and in-process) memory mapping and transfer +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_MEMORYMAPPER_H +#define LLVM_EXECUTIONENGINE_ORC_MEMORYMAPPER_H + +#include "llvm/ExecutionEngine/Orc/Core.h" + +#include + +namespace llvm { +namespace orc { + +/// Manages mapping, content transfer and protections for JIT memory +class MemoryMapper { +public: + /// Represents a single allocation containing multiple segments and + /// initialization and deinitialization actions + struct AllocInfo { + struct SegInfo { + ExecutorAddrDiff Offset; + const char *WorkingMem; + size_t ContentSize; + size_t ZeroFillSize; + unsigned Prot; + }; + + ExecutorAddr MappingBase; + std::vector Segments; + shared::AllocActions Actions; + }; + + using OnReservedFunction = unique_function)>; + + /// Reserves address space in executor process + virtual void reserve(size_t NumBytes, OnReservedFunction OnReserved) = 0; + + /// Provides working memory + virtual char *prepare(ExecutorAddr Addr, size_t ContentSize) = 0; + + using OnInitializedFunction = unique_function)>; + + /// Ensures executor memory is synchronized with working copy memory, sends + /// functions to be called after initilization and before deinitialization and + /// applies memory protections + /// Returns a unique address identifying the allocation. This address should + /// be passed to deinitialize to run deallocation actions (and reset + /// permissions where possible). + virtual void initialize(AllocInfo &AI, + OnInitializedFunction OnInitialized) = 0; + + using OnDeinitializedFunction = unique_function; + + /// Runs previously specified deinitialization actions + /// Executor addresses returned by initialize should be passed + virtual void deinitialize(ArrayRef Allocations, + OnDeinitializedFunction OnDeInitialized) = 0; + + using OnReleasedFunction = unique_function; + + /// Release address space acquired through reserve() + virtual void release(ArrayRef Reservations, + OnReleasedFunction OnRelease) = 0; + + virtual ~MemoryMapper(); +}; + +class InProcessMemoryMapper final : public MemoryMapper { +public: + InProcessMemoryMapper() {} + + void reserve(size_t NumBytes, OnReservedFunction OnReserved) override; + + void initialize(AllocInfo &AI, OnInitializedFunction OnInitialized) override; + + char *prepare(ExecutorAddr Addr, size_t ContentSize) override; + + void deinitialize(ArrayRef Allocations, + OnDeinitializedFunction OnDeInitialized) override; + + void release(ArrayRef Reservations, + OnReleasedFunction OnRelease) override; + + ~InProcessMemoryMapper() override; + +private: + struct Allocation { + std::vector DeinitializationActions; + }; + using AllocationMap = DenseMap; + + struct Reservation { + size_t Size; + std::vector Allocations; + }; + using ReservationMap = DenseMap; + + std::mutex Mutex; + ReservationMap Reservations; + AllocationMap Allocations; +}; + +} // namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_MEMORYMAPPER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h index 82dfdc270128..c5c2780bc9ee 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h @@ -330,6 +330,45 @@ public: JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); }; +// @brief riscv64 support. +// +// RISC-V 64 supports lazy JITing. +class OrcRiscv64 { +public: + static constexpr unsigned PointerSize = 8; + static constexpr unsigned TrampolineSize = 16; + static constexpr unsigned StubSize = 16; + static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31; + static constexpr unsigned ResolverCodeSize = 0x148; + + /// Write the resolver code into the given memory. The user is + /// responsible for allocating the memory and setting permissions. + /// + /// ReentryFnAddr should be the address of a function whose signature matches + /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr + /// argument of writeResolverCode will be passed as the second argument to + /// the function at ReentryFnAddr. + static void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr); + + /// Write the requested number of trampolines into the given memory, + /// which must be big enough to hold 1 pointer, plus NumTrampolines + /// trampolines. + static void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverFnAddr, + unsigned NumTrampolines); + /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem. + /// Stubs will be written as if linked at StubsBlockTargetAddress, with the + /// Nth stub using the Nth pointer in memory starting at + /// PointersBlockTargetAddress. + static void writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); +}; + } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h index dc080cfc79d1..5d545f8abdb9 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h @@ -43,13 +43,22 @@ public: /// Cast this ExecutorAddr to a pointer of the given type. /// Warning: This should only be used when JITing in-process. - template T toPtr() const { - static_assert(std::is_pointer::value, "T must be a pointer type"); + template + std::enable_if_t::value, T> toPtr() const { uintptr_t IntPtr = static_cast(Addr); assert(IntPtr == Addr && "ExecutorAddr value out of range for uintptr_t"); return reinterpret_cast(IntPtr); } + /// Cast this ExecutorAddr to a pointer of the given function type. + /// Warning: This should only be used when JITing in-process. + template + std::enable_if_t::value, T *> toPtr() const { + uintptr_t IntPtr = static_cast(Addr); + assert(IntPtr == Addr && "ExecutorAddr value out of range for uintptr_t"); + return reinterpret_cast(IntPtr); + } + uint64_t getValue() const { return Addr; } void setValue(uint64_t Addr) { this->Addr = Addr; } bool isNull() const { return Addr == 0; } diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h index 302b60b80fd0..9be58e9f0fa9 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h @@ -586,7 +586,7 @@ SPSSerializableExpected toSPSSerializable(Expected E) { if (E) return {true, std::move(*E), {}}; else - return {false, {}, toString(E.takeError())}; + return {false, T(), toString(E.takeError())}; } template diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h index a138f60a7756..b7bba7a48786 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h @@ -88,7 +88,7 @@ private: for (auto &Callee : CandidateSet) { auto ImplSymbol = AliaseeImplTable.getImplFor(Callee); // try to distinguish already compiled & library symbols - if (!ImplSymbol.hasValue()) + if (!ImplSymbol) continue; const auto &ImplSymbolName = ImplSymbol.getPointer()->first; JITDylib *ImplJD = ImplSymbol.getPointer()->second; @@ -175,9 +175,8 @@ public: using ResultEval = std::function; using TargetAndLikelies = DenseMap; - IRSpeculationLayer(ExecutionSession &ES, IRCompileLayer &BaseLayer, - Speculator &Spec, MangleAndInterner &Mangle, - ResultEval Interpreter) + IRSpeculationLayer(ExecutionSession &ES, IRLayer &BaseLayer, Speculator &Spec, + MangleAndInterner &Mangle, ResultEval Interpreter) : IRLayer(ES, BaseLayer.getManglingOptions()), NextLayer(BaseLayer), S(Spec), Mangle(Mangle), QueryAnalysis(Interpreter) {} @@ -198,7 +197,7 @@ private: return InternedNames; } - IRCompileLayer &NextLayer; + IRLayer &NextLayer; Speculator &S; MangleAndInterner &Mangle; ResultEval QueryAnalysis; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h b/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h index 63abb196ba49..7e433965c922 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h @@ -19,6 +19,9 @@ #include namespace llvm { + +class raw_ostream; + namespace orc { class SymbolStringPtr; @@ -26,6 +29,10 @@ class SymbolStringPtr; /// String pool for symbol names used by the JIT. class SymbolStringPool { friend class SymbolStringPtr; + + // Implemented in DebugUtils.h. + friend raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPool &SSP); + public: /// Destroy a SymbolStringPool. ~SymbolStringPool(); diff --git a/llvm/include/llvm/FileCheck/FileCheck.h b/llvm/include/llvm/FileCheck/FileCheck.h index 7a6c98db3029..d6d8dc531e10 100644 --- a/llvm/include/llvm/FileCheck/FileCheck.h +++ b/llvm/include/llvm/FileCheck/FileCheck.h @@ -14,14 +14,17 @@ #define LLVM_FILECHECK_FILECHECK_H #include "llvm/ADT/StringRef.h" -#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" -#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/SMLoc.h" #include +#include #include #include namespace llvm { +class MemoryBuffer; +class SourceMgr; +template class SmallVectorImpl; /// Contains info about various FileCheck options. struct FileCheckRequest { @@ -45,6 +48,7 @@ namespace Check { enum FileCheckKind { CheckNone = 0, + CheckMisspelled, CheckPlain, CheckNext, CheckSame, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index c5abb16dd9e5..5f1d335ef04f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -122,13 +122,12 @@ def OMPC_ProcBind : Clause<"proc_bind"> { ]; } -// static and auto are C++ keywords so need a capital to disambiguate. -def OMP_SCHEDULE_Static : ClauseVal<"Static", 2, 1> {} -def OMP_SCHEDULE_Dynamic : ClauseVal<"Dynamic", 3, 1> {} -def OMP_SCHEDULE_Guided : ClauseVal<"Guided", 4, 1> {} -def OMP_SCHEDULE_Auto : ClauseVal<"Auto", 5, 1> {} -def OMP_SCHEDULE_Runtime : ClauseVal<"Runtime", 6, 1> {} -def OMP_SCHEDULE_Default : ClauseVal<"Default", 7, 0> { let isDefault = 1; } +def OMP_SCHEDULE_Static : ClauseVal<"static", 2, 1> {} +def OMP_SCHEDULE_Dynamic : ClauseVal<"dynamic", 3, 1> {} +def OMP_SCHEDULE_Guided : ClauseVal<"guided", 4, 1> {} +def OMP_SCHEDULE_Auto : ClauseVal<"auto", 5, 1> {} +def OMP_SCHEDULE_Runtime : ClauseVal<"runtime", 6, 1> {} +def OMP_SCHEDULE_Default : ClauseVal<"default", 7, 0> { let isDefault = 1; } def OMPC_Schedule : Clause<"schedule"> { let clangClass = "OMPScheduleClause"; @@ -164,6 +163,25 @@ def OMPC_MemoryOrder : Clause<"memory_order"> { ]; } +def OMP_CANCELLATION_CONSTRUCT_Parallel : ClauseVal<"parallel", 1, 1> {} +def OMP_CANCELLATION_CONSTRUCT_Loop : ClauseVal<"loop", 2, 1> {} +def OMP_CANCELLATION_CONSTRUCT_Sections : ClauseVal<"sections", 3, 1> {} +def OMP_CANCELLATION_CONSTRUCT_Taskgroup : ClauseVal<"taskgroup", 4, 1> {} +def OMP_CANCELLATION_CONSTRUCT_None : ClauseVal<"none", 5, 0> { + let isDefault = 1; +} + +def OMPC_CancellationConstructType : Clause<"cancellation_construct_type"> { + let enumClauseValue = "CancellationConstructType"; + let allowedClauseValues = [ + OMP_CANCELLATION_CONSTRUCT_Parallel, + OMP_CANCELLATION_CONSTRUCT_Loop, + OMP_CANCELLATION_CONSTRUCT_Sections, + OMP_CANCELLATION_CONSTRUCT_Taskgroup, + OMP_CANCELLATION_CONSTRUCT_None + ]; +} + def OMPC_Ordered : Clause<"ordered"> { let clangClass = "OMPOrderedClause"; let flangClass = "ScalarIntConstantExpr"; @@ -254,12 +272,18 @@ def OMPC_IsDevicePtr : Clause<"is_device_ptr"> { let flangClass = "Name"; let isValueList = true; } +def OMPC_HasDeviceAddr : Clause<"has_device_addr"> { + let clangClass = "OMPHasDeviceAddrClause"; + let flangClass = "Name"; + let isValueList = true; +} def OMPC_TaskReduction : Clause<"task_reduction"> { let clangClass = "OMPTaskReductionClause"; let flangClass = "OmpReductionClause"; } def OMPC_InReduction : Clause<"in_reduction"> { let clangClass = "OMPInReductionClause"; + let flangClass = "OmpInReductionClause"; } def OMPC_UnifiedAddress : Clause<"unified_address"> { let clangClass = "OMPUnifiedAddressClause"; @@ -557,7 +581,9 @@ def OMP_Target : Directive<"target"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause ]; @@ -590,11 +616,20 @@ def OMP_Requires : Directive<"requires"> { let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, + // OpenMP 5.2 Spec: If an implementation is not supporting a requirement + // (reverse offload in this case) then it should give compile-time error + // termination. + // Seeting supported version for reverse_offload to a distant future version + // 9.9 so that its partial support can be tested in the meantime. + // + // TODO: Correct this supprted version number whenever complete + // implementation of reverse_offload is available. + VersionedClause, VersionedClause, VersionedClause ]; } +def OMP_Nothing : Directive<"nothing"> {} def OMP_TargetData : Directive<"target data"> { let allowedClauses = [ VersionedClause, @@ -645,6 +680,7 @@ def OMP_TargetParallel : Directive<"target parallel"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause ]; @@ -677,6 +713,7 @@ def OMP_TargetParallelFor : Directive<"target parallel for"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause @@ -693,6 +730,7 @@ def OMP_TargetParallelDo : Directive<"target parallel do"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -825,6 +863,21 @@ def OMP_ParallelMaster : Directive<"parallel master"> { VersionedClause ]; } +def OMP_ParallelMasked : Directive<"parallel masked"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; +} def OMP_ParallelSections : Directive<"parallel sections"> { let allowedClauses = [ VersionedClause, @@ -1126,6 +1179,7 @@ def OMP_TargetParallelForSimd : Directive<"target parallel for simd"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1156,6 +1210,7 @@ def OMP_TargetParallelDoSimd : Directive<"target parallel do simd"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1169,6 +1224,7 @@ def OMP_TargetSimd : Directive<"target simd"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1342,6 +1398,7 @@ def OMP_TargetTeams : Directive<"target teams"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1365,6 +1422,7 @@ def OMP_TargetTeamsDistribute : Directive<"target teams distribute"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1395,6 +1453,7 @@ def OMP_TargetTeamsDistributeParallelFor : VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1420,6 +1479,7 @@ def OMP_TargetTeamsDistributeParallelDo : VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1456,6 +1516,7 @@ def OMP_TargetTeamsDistributeParallelForSimd : VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1485,6 +1546,7 @@ def OMP_TargetTeamsDistributeParallelDoSimd : VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1523,6 +1585,7 @@ def OMP_TargetTeamsDistributeSimd : VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1581,6 +1644,28 @@ def OMP_MasterTaskloop : Directive<"master taskloop"> { VersionedClause ]; } +def OMP_MaskedTaskloop : Directive<"masked taskloop"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; +} def OMP_ParallelMasterTaskloop : Directive<"parallel master taskloop"> { let allowedClauses = [ @@ -1605,6 +1690,31 @@ def OMP_ParallelMasterTaskloop : VersionedClause ]; } +def OMP_ParallelMaskedTaskloop : + Directive<"parallel masked taskloop"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; +} def OMP_MasterTaskloopSimd : Directive<"master taskloop simd"> { let allowedClauses = [ VersionedClause, @@ -1632,6 +1742,34 @@ def OMP_MasterTaskloopSimd : Directive<"master taskloop simd"> { VersionedClause ]; } +def OMP_MaskedTaskloopSimd : Directive<"masked taskloop simd"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; +} def OMP_ParallelMasterTaskloopSimd : Directive<"parallel master taskloop simd"> { let allowedClauses = [ @@ -1662,6 +1800,37 @@ def OMP_ParallelMasterTaskloopSimd : VersionedClause ]; } +def OMP_ParallelMaskedTaskloopSimd : + Directive<"parallel masked taskloop simd"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; +} def OMP_Depobj : Directive<"depobj"> { let allowedClauses = [ VersionedClause, @@ -1734,6 +1903,7 @@ def OMP_dispatch : Directive<"dispatch"> { let allowedClauses = [ VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1757,6 +1927,99 @@ def OMP_loop : Directive<"loop"> { VersionedClause, ]; } +def OMP_teams_loop : Directive<"teams loop"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; +} +def OMP_target_teams_loop : Directive<"target teams loop"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; +} +def OMP_parallel_loop : Directive<"parallel loop"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; +} +def OMP_target_parallel_loop : Directive<"target parallel loop"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; +} def OMP_Metadirective : Directive<"metadirective"> { let allowedClauses = [VersionedClause]; let allowedOnceClauses = [VersionedClause]; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index bee90281e086..76104f6bc9cf 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -74,26 +74,114 @@ enum class IdentFlag { /// \note This needs to be kept in sync with kmp.h enum sched_type. /// Todo: Update kmp.h to include this file, and remove the enums in kmp.h -/// To complete this, more enum values will need to be moved here. enum class OMPScheduleType { - StaticChunked = 33, - Static = 34, // static unspecialized - DistributeChunked = 91, - Distribute = 92, - DynamicChunked = 35, - GuidedChunked = 36, // guided unspecialized - Runtime = 37, - Auto = 38, // auto - - StaticBalancedChunked = 45, // static with chunk adjustment (e.g., simd) - GuidedSimd = 46, // guided with chunk adjustment - RuntimeSimd = 47, // runtime with chunk adjustment - - ModifierMonotonic = - (1 << 29), // Set if the monotonic schedule modifier was present - ModifierNonmonotonic = - (1 << 30), // Set if the nonmonotonic schedule modifier was present - ModifierMask = ModifierMonotonic | ModifierNonmonotonic, + // For typed comparisons, not a valid schedule + None = 0, + + // Schedule algorithms + BaseStaticChunked = 1, + BaseStatic = 2, + BaseDynamicChunked = 3, + BaseGuidedChunked = 4, + BaseRuntime = 5, + BaseAuto = 6, + BaseTrapezoidal = 7, + BaseGreedy = 8, + BaseBalanced = 9, + BaseGuidedIterativeChunked = 10, + BaseGuidedAnalyticalChunked = 11, + BaseSteal = 12, + + // with chunk adjustment (e.g., simd) + BaseStaticBalancedChunked = 13, + BaseGuidedSimd = 14, + BaseRuntimeSimd = 15, + + // static schedules algorithims for distribute + BaseDistributeChunked = 27, + BaseDistribute = 28, + + // Modifier flags to be combined with schedule algorithms + ModifierUnordered = (1 << 5), + ModifierOrdered = (1 << 6), + ModifierNomerge = (1 << 7), + ModifierMonotonic = (1 << 29), + ModifierNonmonotonic = (1 << 30), + + // Masks combining multiple flags + OrderingMask = ModifierUnordered | ModifierOrdered | ModifierNomerge, + MonotonicityMask = ModifierMonotonic | ModifierNonmonotonic, + ModifierMask = OrderingMask | MonotonicityMask, + + // valid schedule type values, without monotonicity flags + UnorderedStaticChunked = BaseStaticChunked | ModifierUnordered, // 33 + UnorderedStatic = BaseStatic | ModifierUnordered, // 34 + UnorderedDynamicChunked = BaseDynamicChunked | ModifierUnordered, // 35 + UnorderedGuidedChunked = BaseGuidedChunked | ModifierUnordered, // 36 + UnorderedRuntime = BaseRuntime | ModifierUnordered, // 37 + UnorderedAuto = BaseAuto | ModifierUnordered, // 38 + UnorderedTrapezoidal = BaseTrapezoidal | ModifierUnordered, // 39 + UnorderedGreedy = BaseGreedy | ModifierUnordered, // 40 + UnorderedBalanced = BaseBalanced | ModifierUnordered, // 41 + UnorderedGuidedIterativeChunked = + BaseGuidedIterativeChunked | ModifierUnordered, // 42 + UnorderedGuidedAnalyticalChunked = + BaseGuidedAnalyticalChunked | ModifierUnordered, // 43 + UnorderedSteal = BaseSteal | ModifierUnordered, // 44 + + UnorderedStaticBalancedChunked = + BaseStaticBalancedChunked | ModifierUnordered, // 45 + UnorderedGuidedSimd = BaseGuidedSimd | ModifierUnordered, // 46 + UnorderedRuntimeSimd = BaseRuntimeSimd | ModifierUnordered, // 47 + + OrderedStaticChunked = BaseStaticChunked | ModifierOrdered, // 65 + OrderedStatic = BaseStatic | ModifierOrdered, // 66 + OrderedDynamicChunked = BaseDynamicChunked | ModifierOrdered, // 67 + OrderedGuidedChunked = BaseGuidedChunked | ModifierOrdered, // 68 + OrderedRuntime = BaseRuntime | ModifierOrdered, // 69 + OrderedAuto = BaseAuto | ModifierOrdered, // 70 + OrderdTrapezoidal = BaseTrapezoidal | ModifierOrdered, // 71 + + OrderedDistributeChunked = BaseDistributeChunked | ModifierOrdered, // 91 + OrderedDistribute = BaseDistribute | ModifierOrdered, // 92 + + NomergeUnorderedStaticChunked = + BaseStaticChunked | ModifierUnordered | ModifierNomerge, // 161 + NomergeUnorderedStatic = + BaseStatic | ModifierUnordered | ModifierNomerge, // 162 + NomergeUnorderedDynamicChunked = + BaseDynamicChunked | ModifierUnordered | ModifierNomerge, // 163 + NomergeUnorderedGuidedChunked = + BaseGuidedChunked | ModifierUnordered | ModifierNomerge, // 164 + NomergeUnorderedRuntime = + BaseRuntime | ModifierUnordered | ModifierNomerge, // 165 + NomergeUnorderedAuto = BaseAuto | ModifierUnordered | ModifierNomerge, // 166 + NomergeUnorderedTrapezoidal = + BaseTrapezoidal | ModifierUnordered | ModifierNomerge, // 167 + NomergeUnorderedGreedy = + BaseGreedy | ModifierUnordered | ModifierNomerge, // 168 + NomergeUnorderedBalanced = + BaseBalanced | ModifierUnordered | ModifierNomerge, // 169 + NomergeUnorderedGuidedIterativeChunked = + BaseGuidedIterativeChunked | ModifierUnordered | ModifierNomerge, // 170 + NomergeUnorderedGuidedAnalyticalChunked = + BaseGuidedAnalyticalChunked | ModifierUnordered | ModifierNomerge, // 171 + NomergeUnorderedSteal = + BaseSteal | ModifierUnordered | ModifierNomerge, // 172 + + NomergeOrderedStaticChunked = + BaseStaticChunked | ModifierOrdered | ModifierNomerge, // 193 + NomergeOrderedStatic = BaseStatic | ModifierOrdered | ModifierNomerge, // 194 + NomergeOrderedDynamicChunked = + BaseDynamicChunked | ModifierOrdered | ModifierNomerge, // 195 + NomergeOrderedGuidedChunked = + BaseGuidedChunked | ModifierOrdered | ModifierNomerge, // 196 + NomergeOrderedRuntime = + BaseRuntime | ModifierOrdered | ModifierNomerge, // 197 + NomergeOrderedAuto = BaseAuto | ModifierOrdered | ModifierNomerge, // 198 + NomergeOrderedTrapezoidal = + BaseTrapezoidal | ModifierOrdered | ModifierNomerge, // 199 + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierMask) }; @@ -116,6 +204,9 @@ enum class AddressSpace : unsigned { /// \note This needs to be kept in sync with interop.h enum kmp_interop_type_t.: enum class OMPInteropType { Unknown, Target, TargetSync }; +/// Atomic compare operations. Currently OpenMP only supports ==, >, and <. +enum class OMPAtomicCompareOp : unsigned { EQ, MIN, MAX }; + } // end namespace omp } // end namespace llvm diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPContext.h b/llvm/include/llvm/Frontend/OpenMP/OMPContext.h index 544f698655a4..b13b74ceab86 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPContext.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPContext.h @@ -15,14 +15,14 @@ #ifndef LLVM_FRONTEND_OPENMP_OMPCONTEXT_H #define LLVM_FRONTEND_OPENMP_OMPCONTEXT_H -#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Triple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" namespace llvm { +class Triple; namespace omp { /// OpenMP Context related IDs and helpers diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index f60debe8411c..8a6b1c7d412d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -23,6 +23,52 @@ namespace llvm { class CanonicalLoopInfo; +/// Move the instruction after an InsertPoint to the beginning of another +/// BasicBlock. +/// +/// The instructions after \p IP are moved to the beginning of \p New which must +/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to +/// \p New will be added such that there is no semantic change. Otherwise, the +/// \p IP insert block remains degenerate and it is up to the caller to insert a +/// terminator. +void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, + bool CreateBranch); + +/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new +/// insert location will stick to after the instruction before the insertion +/// point (instead of moving with the instruction the InsertPoint stores +/// internally). +void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch); + +/// Split a BasicBlock at an InsertPoint, even if the block is degenerate +/// (missing the terminator). +/// +/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed +/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch +/// is true, a branch to the new successor will new created such that +/// semantically there is no change; otherwise the block of the insertion point +/// remains degenerate and it is the caller's responsibility to insert a +/// terminator. Returns the new successor block. +BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, + llvm::Twine Name = {}); + +/// Split a BasicBlock at \p Builder's insertion point, even if the block is +/// degenerate (missing the terminator). Its new insert location will stick to +/// after the instruction before the insertion point (instead of moving with the +/// instruction the InsertPoint stores internally). +BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch, + llvm::Twine Name = {}); + +/// Split a BasicBlock at \p Builder's insertion point, even if the block is +/// degenerate (missing the terminator). Its new insert location will stick to +/// after the instruction before the insertion point (instead of moving with the +/// instruction the InsertPoint stores internally). +BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name); + +/// Like splitBB, but reuses the current block's name for the new name. +BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, + llvm::Twine Suffix = ".split"); + /// An interface to create LLVM-IR for OpenMP directives. /// /// Each OpenMP directive has a corresponding public generator method. @@ -87,27 +133,36 @@ public: /// Callback type for body (=inner region) code generation /// /// The callback takes code locations as arguments, each describing a - /// location at which code might need to be generated or a location that is - /// the target of control transfer. + /// location where additional instructions can be inserted. + /// + /// The CodeGenIP may be in the middle of a basic block or point to the end of + /// it. The basic block may have a terminator or be degenerate. The callback + /// function may just insert instructions at that position, but also split the + /// block (without the Before argument of BasicBlock::splitBasicBlock such + /// that the identify of the split predecessor block is preserved) and insert + /// additional control flow, including branches that do not lead back to what + /// follows the CodeGenIP. Note that since the callback is allowed to split + /// the block, callers must assume that InsertPoints to positions in the + /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If + /// such InsertPoints need to be preserved, it can split the block itself + /// before calling the callback. + /// + /// AllocaIP and CodeGenIP must not point to the same position. /// /// \param AllocaIP is the insertion point at which new alloca instructions - /// should be placed. + /// should be placed. The BasicBlock it is pointing to must + /// not be split. /// \param CodeGenIP is the insertion point at which the body code should be /// placed. - /// \param ContinuationBB is the basic block target to leave the body. - /// - /// Note that all blocks pointed to by the arguments have terminators. using BodyGenCallbackTy = - function_ref; + function_ref; // This is created primarily for sections construct as llvm::function_ref // (BodyGenCallbackTy) is not storable (as described in the comments of // function_ref class - function_ref contains non-ownable reference // to the callable. using StorableBodyGenCallbackTy = - std::function; + std::function; /// Callback type for loop body code generation. /// @@ -145,8 +200,7 @@ public: /// Description of a LLVM-IR insertion point (IP) and a debug/source location /// (filename, line, column, ...). struct LocationDescription { - template - LocationDescription(const IRBuilder &IRB) + LocationDescription(const IRBuilderBase &IRB) : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {} LocationDescription(const InsertPointTy &IP) : IP(IP) {} LocationDescription(const InsertPointTy &IP, const DebugLoc &DL) @@ -345,6 +399,7 @@ public: ArrayRef Loops, InsertPointTy ComputeIP); +private: /// Modifies the canonical loop to be a statically-scheduled workshare loop. /// /// This takes a \p LoopInfo representing a canonical loop, such as the one @@ -354,14 +409,6 @@ public: /// the current thread, updates the relevant instructions in the canonical /// loop and calls to an OpenMP runtime finalization function after the loop. /// - /// TODO: Workshare loops with static scheduling may contain up to two loops - /// that fulfill the requirements of an OpenMP canonical loop. One for - /// iterating over all iterations of a chunk and another one for iterating - /// over all chunks that are executed on the same thread. Returning - /// CanonicalLoopInfo objects representing them may eventually be useful for - /// the apply clause planned in OpenMP 6.0, but currently whether these are - /// canonical loops is irrelevant. - /// /// \param DL Debug location for instructions added for the /// workshare-loop construct itself. /// \param CLI A descriptor of the canonical loop to workshare. @@ -369,14 +416,30 @@ public: /// preheader of the loop. /// \param NeedsBarrier Indicates whether a barrier must be inserted after /// the loop. - /// \param Chunk The size of loop chunk considered as a unit when - /// scheduling. If \p nullptr, defaults to 1. /// /// \returns Point where to insert code after the workshare construct. InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, - bool NeedsBarrier, - Value *Chunk = nullptr); + bool NeedsBarrier); + + /// Modifies the canonical loop a statically-scheduled workshare loop with a + /// user-specified chunk size. + /// + /// \param DL Debug location for instructions added for the + /// workshare-loop construct itself. + /// \param CLI A descriptor of the canonical loop to workshare. + /// \param AllocaIP An insertion point for Alloca instructions usable in + /// the preheader of the loop. + /// \param NeedsBarrier Indicates whether a barrier must be inserted after the + /// loop. + /// \param ChunkSize The user-specified chunk size. + /// + /// \returns Point where to insert code after the workshare construct. + InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL, + CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, + bool NeedsBarrier, + Value *ChunkSize); /// Modifies the canonical loop to be a dynamically-scheduled workshare loop. /// @@ -404,6 +467,7 @@ public: bool NeedsBarrier, Value *Chunk = nullptr); +public: /// Modifies the canonical loop to be a workshare loop. /// /// This takes a \p LoopInfo representing a canonical loop, such as the one @@ -413,6 +477,10 @@ public: /// the current thread, updates the relevant instructions in the canonical /// loop and calls to an OpenMP runtime finalization function after the loop. /// + /// The concrete transformation is done by applyStaticWorkshareLoop, + /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending + /// on the value of \p SchedKind and \p ChunkSize. + /// /// \param DL Debug location for instructions added for the /// workshare-loop construct itself. /// \param CLI A descriptor of the canonical loop to workshare. @@ -420,10 +488,25 @@ public: /// preheader of the loop. /// \param NeedsBarrier Indicates whether a barrier must be insterted after /// the loop. + /// \param SchedKind Scheduling algorithm to use. + /// \param ChunkSize The chunk size for the inner loop. + /// \param HasSimdModifier Whether the simd modifier is present in the + /// schedule clause. + /// \param HasMonotonicModifier Whether the monotonic modifier is present in + /// the schedule clause. + /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is + /// present in the schedule clause. + /// \param HasOrderedClause Whether the (parameterless) ordered clause is + /// present. /// /// \returns Point where to insert code after the workshare construct. - InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, bool NeedsBarrier); + InsertPointTy applyWorkshareLoop( + DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, + bool NeedsBarrier, + llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default, + Value *ChunkSize = nullptr, bool HasSimdModifier = false, + bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false, + bool HasOrderedClause = false); /// Tile a loop nest. /// @@ -535,6 +618,18 @@ public: /// \param Loc The location where the taskyield directive was encountered. void createTaskyield(const LocationDescription &Loc); + /// Generator for `#omp task` + /// + /// \param Loc The location where the task construct was encountered. + /// \param AllocaIP The insertion point to be used for alloca instructions. + /// \param BodyGenCB Callback that will generate the region code. + /// \param Tied True if the task is tied, false if the task is untied. + /// \param Final i1 value which is `true` if the task is final, `false` if the + /// task is not final. + InsertPointTy createTask(const LocationDescription &Loc, + InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, + bool Tied = true, Value *Final = nullptr); + /// Functions used to generate reductions. Such functions take two Values /// representing LHS and RHS of the reduction, respectively, and a reference /// to the value that is updated to refer to the reduction result. @@ -696,6 +791,27 @@ public: /// Value. GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); + /// Create an offloading section struct used to register this global at + /// runtime. + /// + /// Type struct __tgt_offload_entry{ + /// void *addr; // Pointer to the offload entry info. + /// // (function or global) + /// char *name; // Name of the function or global. + /// size_t size; // Size of the entry info (0 if it a function). + /// int32_t flags; + /// int32_t reserved; + /// }; + /// + /// \param Addr The pointer to the global being registered. + /// \param Name The symbol name associated with the global. + /// \param Size The size in bytes of the global (0 for functions). + /// \param Flags Flags associated with the entry. + /// \param SectionName The section this entry will be placed at. + void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size, + int32_t Flags, + StringRef SectionName = "omp_offloading_entries"); + /// Generate control flow and cleanup for cancellation. /// /// \param CancelFlag Flag indicating if the cancellation is performed. @@ -768,7 +884,7 @@ public: struct OutlineInfo { using PostOutlineCBTy = std::function; PostOutlineCBTy PostOutlineCB; - BasicBlock *EntryBB, *ExitBB; + BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB; SmallVector ExcludeArgsFromAggregate; /// Collect all blocks in between EntryBB and ExitBB in both the given @@ -851,12 +967,14 @@ public: /// \param Loc The source location description. /// \param BodyGenCB Callback that will generate the region code. /// \param FiniCB Callback to finalize variable copies. + /// \param IsNowait If false, a barrier is emitted. /// \param DidIt Local variable used as a flag to indicate 'single' thread /// /// \returns The insertion position *after* the single call. InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, llvm::Value *DidIt); + FinalizeCallbackTy FiniCB, bool IsNowait, + llvm::Value *DidIt); /// Generator for '#omp master' /// @@ -1198,7 +1316,7 @@ private: const function_ref &IRB)>; private: - enum AtomicKind { Read, Write, Update, Capture }; + enum AtomicKind { Read, Write, Update, Capture, Compare }; /// Determine whether to emit flush or not /// @@ -1214,7 +1332,8 @@ private: /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) /// Only Scalar data types. /// - /// \param AllocIP Instruction to create AllocaInst before. + /// \param AllocaIP The insertion point to be used for alloca + /// instructions. /// \param X The target atomic pointer to be updated /// \param XElemTy The element type of the atomic pointer. /// \param Expr The value to update X with. @@ -1234,7 +1353,7 @@ private: /// \returns A pair of the old value of X before the update, and the value /// used for the update. std::pair - emitAtomicUpdate(Instruction *AllocIP, Value *X, Type *XElemTy, Value *Expr, + emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr); @@ -1286,7 +1405,7 @@ public: /// Only Scalar data types. /// /// \param Loc The insert and source location description. - /// \param AllocIP Instruction to create AllocaInst before. + /// \param AllocaIP The insertion point to be used for alloca instructions. /// \param X The target atomic pointer to be updated /// \param Expr The value to update X with. /// \param AO Atomic ordering of the generated atomic instructions. @@ -1302,7 +1421,7 @@ public: /// /// \return Insertion point after generated atomic update IR. InsertPointTy createAtomicUpdate(const LocationDescription &Loc, - Instruction *AllocIP, AtomicOpValue &X, + InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, @@ -1317,7 +1436,7 @@ public: /// X = UpdateOp(X); V = X, /// /// \param Loc The insert and source location description. - /// \param AllocIP Instruction to create AllocaInst before. + /// \param AllocaIP The insertion point to be used for alloca instructions. /// \param X The target atomic pointer to be updated /// \param V Memory address where to store captured value /// \param Expr The value to update X with. @@ -1338,12 +1457,63 @@ public: /// /// \return Insertion point after generated atomic capture IR. InsertPointTy - createAtomicCapture(const LocationDescription &Loc, Instruction *AllocIP, + createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr); + /// Emit atomic compare for constructs: --- Only scalar data types + /// cond-expr-stmt: + /// x = x ordop expr ? expr : x; + /// x = expr ordop x ? expr : x; + /// x = x == e ? d : x; + /// x = e == x ? d : x; (this one is not in the spec) + /// cond-update-stmt: + /// if (x ordop expr) { x = expr; } + /// if (expr ordop x) { x = expr; } + /// if (x == e) { x = d; } + /// if (e == x) { x = d; } (this one is not in the spec) + /// conditional-update-capture-atomic: + /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false) + /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false) + /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false, + /// IsFailOnly=true) + /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false) + /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false, + /// IsFailOnly=true) + /// + /// \param Loc The insert and source location description. + /// \param X The target atomic pointer to be updated. + /// \param V Memory address where to store captured value (for + /// compare capture only). + /// \param R Memory address where to store comparison result + /// (for compare capture with '==' only). + /// \param E The expected value ('e') for forms that use an + /// equality comparison or an expression ('expr') for + /// forms that use 'ordop' (logically an atomic maximum or + /// minimum). + /// \param D The desired value for forms that use an equality + /// comparison. If forms that use 'ordop', it should be + /// \p nullptr. + /// \param AO Atomic ordering of the generated atomic instructions. + /// \param Op Atomic compare operation. It can only be ==, <, or >. + /// \param IsXBinopExpr True if the conditional statement is in the form where + /// x is on LHS. It only matters for < or >. + /// \param IsPostfixUpdate True if original value of 'x' must be stored in + /// 'v', not an updated one (for compare capture + /// only). + /// \param IsFailOnly True if the original value of 'x' is stored to 'v' + /// only when the comparison fails. This is only valid for + /// the case the comparison is '=='. + /// + /// \return Insertion point after generated atomic capture IR. + InsertPointTy + createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, + AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, + AtomicOrdering AO, omp::OMPAtomicCompareOp Op, + bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly); + /// Create the control flow structure of a canonical OpenMP loop. /// /// The emitted loop will be disconnected, i.e. no edge to the loop's @@ -1484,6 +1654,27 @@ private: /// Re-evaluated whether this makes sense. void collectControlBlocks(SmallVectorImpl &BBs); + /// Sets the number of loop iterations to the given value. This value must be + /// valid in the condition block (i.e., defined in the preheader) and is + /// interpreted as an unsigned integer. + void setTripCount(Value *TripCount); + + /// Replace all uses of the canonical induction variable in the loop body with + /// a new one. + /// + /// The intended use case is to update the induction variable for an updated + /// iteration space such that it can stay normalized in the 0...tripcount-1 + /// range. + /// + /// The \p Updater is called with the (presumable updated) current normalized + /// induction variable and is expected to return the value that uses of the + /// pre-updated induction values should use instead, typically dependent on + /// the new induction variable. This is a lambda (instead of e.g. just passing + /// the new value) to be able to distinguish the uses of the pre-updated + /// induction variable and uses of the induction varible to compute the + /// updated induction variable value. + void mapIndVar(llvm::function_ref Updater); + public: /// Returns whether this object currently represents the IR of a loop. If /// returning false, it may have been consumed by a loop transformation or not diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 0c3cb3f43105..14aa53a6b08d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -86,6 +86,8 @@ __OMP_ARRAY_TYPE(KmpCriticalName, Int32, 8) OMP_STRUCT_TYPE(VarName, "struct." #Name, __VA_ARGS__) __OMP_STRUCT_TYPE(Ident, ident_t, Int32, Int32, Int32, Int32, Int8Ptr) +__OMP_STRUCT_TYPE(OffloadEntry, __tgt_offload_entry, Int8Ptr, Int8Ptr, SizeTy, + Int32, Int32) __OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, Int8Ptr) #undef __OMP_STRUCT_TYPE @@ -475,6 +477,7 @@ __OMP_RTL(__last, false, Void, ) #define ParamAttrs(...) ArrayRef({__VA_ARGS__}) #define EnumAttr(Kind) Attribute::get(Ctx, Attribute::AttrKind::Kind) #define EnumAttrInt(Kind, N) Attribute::get(Ctx, Attribute::AttrKind::Kind, N) +#define AllocSizeAttr(N, M) Attribute::getWithAllocSizeArgs(Ctx, N, M) #define AttributeSet(...) \ AttributeSet::get(Ctx, ArrayRef({__VA_ARGS__})) @@ -908,8 +911,10 @@ __OMP_RTL_ATTRS(__kmpc_doacross_wait, BarrierAttrs, AttributeSet(), __OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) -__OMP_RTL_ATTRS(__kmpc_alloc_shared, DeviceAllocAttrs, ReturnPtrAttrs, - ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_alloc_shared, AttributeSet( + EnumAttr(NoUnwind), + EnumAttr(NoSync), + AllocSizeAttr(0, None)), ReturnPtrAttrs, ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_free_shared, DeviceAllocAttrs, AttributeSet(), ParamAttrs(NoCaptureAttrs)) @@ -962,6 +967,7 @@ __OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(), #undef EnumAttr #undef EnumAttrInt #undef ParamAttrs +#undef AllocSizeAttr ///} @@ -1026,6 +1032,7 @@ __OMP_CANCEL_KIND(taskgroup, 4) __OMP_DEFAULT_KIND(none) __OMP_DEFAULT_KIND(shared) +__OMP_DEFAULT_KIND(private) __OMP_DEFAULT_KIND(firstprivate) __OMP_DEFAULT_KIND(unknown) @@ -1153,6 +1160,7 @@ __OMP_TRAIT_PROPERTY(implementation, extension, match_any) __OMP_TRAIT_PROPERTY(implementation, extension, match_none) __OMP_TRAIT_PROPERTY(implementation, extension, disable_implicit_base) __OMP_TRAIT_PROPERTY(implementation, extension, allow_templates) +__OMP_TRAIT_PROPERTY(implementation, extension, bind_to_declaration) __OMP_TRAIT_SET(user) diff --git a/llvm/include/llvm/FuzzMutate/FuzzerCLI.h b/llvm/include/llvm/FuzzMutate/FuzzerCLI.h index 473277396a90..db0168d3e675 100644 --- a/llvm/include/llvm/FuzzMutate/FuzzerCLI.h +++ b/llvm/include/llvm/FuzzMutate/FuzzerCLI.h @@ -14,8 +14,8 @@ #ifndef LLVM_FUZZMUTATE_FUZZERCLI_H #define LLVM_FUZZMUTATE_FUZZERCLI_H -#include "llvm/IR/LLVMContext.h" #include "llvm/Support/DataTypes.h" +#include namespace llvm { @@ -51,29 +51,6 @@ using FuzzerInitFun = int (*)(int *argc, char ***argv); int runFuzzerOnInputs(int ArgC, char *ArgV[], FuzzerTestFun TestOne, FuzzerInitFun Init = [](int *, char ***) { return 0; }); -/// Fuzzer friendly interface for the llvm bitcode parser. -/// -/// \param Data Bitcode we are going to parse -/// \param Size Size of the 'Data' in bytes -/// \return New module or nullptr in case of error -std::unique_ptr parseModule(const uint8_t *Data, size_t Size, - LLVMContext &Context); - -/// Fuzzer friendly interface for the llvm bitcode printer. -/// -/// \param M Module to print -/// \param Dest Location to store serialized module -/// \param MaxSize Size of the destination buffer -/// \return Number of bytes that were written. When module size exceeds MaxSize -/// returns 0 and leaves Dest unchanged. -size_t writeModule(const Module &M, uint8_t *Dest, size_t MaxSize); - -/// Try to parse module and verify it. May output verification errors to the -/// errs(). -/// \return New module or nullptr in case of error. -std::unique_ptr parseAndVerify(const uint8_t *Data, size_t Size, - LLVMContext &Context); - -} // end llvm namespace +} // namespace llvm #endif // LLVM_FUZZMUTATE_FUZZERCLI_H diff --git a/llvm/include/llvm/FuzzMutate/IRMutator.h b/llvm/include/llvm/FuzzMutate/IRMutator.h index 423582eace9b..ade76f1b5845 100644 --- a/llvm/include/llvm/FuzzMutate/IRMutator.h +++ b/llvm/include/llvm/FuzzMutate/IRMutator.h @@ -10,6 +10,9 @@ // configurable set of strategies. Some common strategies are also included // here. // +// Fuzzer-friendly (de)serialization functions are also provided, as these +// are usually needed when mutating IR. +// //===----------------------------------------------------------------------===// #ifndef LLVM_FUZZMUTATE_IRMUTATOR_H @@ -113,6 +116,29 @@ public: void mutate(Instruction &Inst, RandomIRBuilder &IB) override; }; +/// Fuzzer friendly interface for the llvm bitcode parser. +/// +/// \param Data Bitcode we are going to parse +/// \param Size Size of the 'Data' in bytes +/// \return New module or nullptr in case of error +std::unique_ptr parseModule(const uint8_t *Data, size_t Size, + LLVMContext &Context); + +/// Fuzzer friendly interface for the llvm bitcode printer. +/// +/// \param M Module to print +/// \param Dest Location to store serialized module +/// \param MaxSize Size of the destination buffer +/// \return Number of bytes that were written. When module size exceeds MaxSize +/// returns 0 and leaves Dest unchanged. +size_t writeModule(const Module &M, uint8_t *Dest, size_t MaxSize); + +/// Try to parse module and verify it. May output verification errors to the +/// errs(). +/// \return New module or nullptr in case of error. +std::unique_ptr parseAndVerify(const uint8_t *Data, size_t Size, + LLVMContext &Context); + } // end llvm namespace #endif // LLVM_FUZZMUTATE_IRMUTATOR_H diff --git a/llvm/include/llvm/FuzzMutate/OpDescriptor.h b/llvm/include/llvm/FuzzMutate/OpDescriptor.h index 43c810920766..847f975571bc 100644 --- a/llvm/include/llvm/FuzzMutate/OpDescriptor.h +++ b/llvm/include/llvm/FuzzMutate/OpDescriptor.h @@ -15,16 +15,15 @@ #define LLVM_FUZZMUTATE_OPDESCRIPTOR_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include namespace llvm { +class Instruction; namespace fuzzerop { /// @{ @@ -146,7 +145,8 @@ static inline SourcePred sizedPtrType() { return false; if (const auto *PtrT = dyn_cast(V->getType())) - return PtrT->getPointerElementType()->isSized(); + return PtrT->isOpaque() || + PtrT->getNonOpaquePointerElementType()->isSized(); return false; }; auto Make = [](ArrayRef, ArrayRef Ts) { diff --git a/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h b/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h index f3b609702e9d..aeb41baa8e07 100644 --- a/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h +++ b/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h @@ -13,12 +13,19 @@ #ifndef LLVM_FUZZMUTATE_RANDOMIRBUILDER_H #define LLVM_FUZZMUTATE_RANDOMIRBUILDER_H -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/FuzzMutate/IRMutator.h" -#include "llvm/FuzzMutate/Random.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include namespace llvm { +class BasicBlock; +class Instruction; +class LLVMContext; +class Type; +class Value; +namespace fuzzerop { +class SourcePred; +} using RandomEngine = std::mt19937; diff --git a/llvm/include/llvm/IR/AbstractCallSite.h b/llvm/include/llvm/IR/AbstractCallSite.h index 69048554a05c..50afe016f0d6 100644 --- a/llvm/include/llvm/IR/AbstractCallSite.h +++ b/llvm/include/llvm/IR/AbstractCallSite.h @@ -14,17 +14,17 @@ #ifndef LLVM_IR_ABSTRACTCALLSITE_H #define LLVM_IR_ABSTRACTCALLSITE_H -#include "llvm/IR/Argument.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Use.h" #include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" #include namespace llvm { +class Argument; +class Use; + /// AbstractCallSite /// /// An abstract call site is a wrapper that allows to treat direct, diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h index 7cbfa2a7b6ce..3b74853cdafa 100644 --- a/llvm/include/llvm/IR/Argument.h +++ b/llvm/include/llvm/IR/Argument.h @@ -14,7 +14,6 @@ #define LLVM_IR_ARGUMENT_H #include "llvm/ADT/Twine.h" -#include "llvm/ADT/ilist_node.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Value.h" diff --git a/llvm/include/llvm/IR/Assumptions.h b/llvm/include/llvm/IR/Assumptions.h index 08e6c8b6f1e0..2d2ecfbde6e6 100644 --- a/llvm/include/llvm/IR/Assumptions.h +++ b/llvm/include/llvm/IR/Assumptions.h @@ -34,6 +34,10 @@ extern StringSet<> KnownAssumptionStrings; /// Helper that allows to insert a new assumption string in the known assumption /// set by creating a (static) object. struct KnownAssumptionString { + KnownAssumptionString(const char *AssumptionStr) + : AssumptionStr(AssumptionStr) { + KnownAssumptionStrings.insert(AssumptionStr); + } KnownAssumptionString(StringRef AssumptionStr) : AssumptionStr(AssumptionStr) { KnownAssumptionStrings.insert(AssumptionStr); diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h index 74b60f1e3d05..6a4e6d63a973 100644 --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -17,11 +17,13 @@ #include "llvm-c/Types.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/Alignment.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/PointerLikeTypeTraits.h" #include #include @@ -42,6 +44,18 @@ class Function; class LLVMContext; class Type; +enum class AllocFnKind : uint64_t { + Unknown = 0, + Alloc = 1 << 0, // Allocator function returns a new allocation + Realloc = 1 << 1, // Allocator function resizes the `allocptr` argument + Free = 1 << 2, // Allocator function frees the `allocptr` argument + Uninitialized = 1 << 3, // Allocator function returns uninitialized memory + Zeroed = 1 << 4, // Allocator function returns zeroed memory + Aligned = 1 << 5, // Allocator function aligns allocations per the + // `allocalign` argument + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Aligned) +}; + //===----------------------------------------------------------------------===// /// \class /// Functions, function parameters, and return types can have attributes @@ -130,6 +144,7 @@ public: static Attribute getWithByRefType(LLVMContext &Context, Type *Ty); static Attribute getWithPreallocatedType(LLVMContext &Context, Type *Ty); static Attribute getWithInAllocaType(LLVMContext &Context, Type *Ty); + static Attribute getWithUWTableKind(LLVMContext &Context, UWTableKind Kind); /// For a typed attribute, return the equivalent attribute with the type /// changed to \p ReplacementTy. @@ -223,6 +238,12 @@ public: /// unknown. Optional getVScaleRangeMax() const; + // Returns the unwind table kind. + UWTableKind getUWTableKind() const; + + // Returns the allocator function kind. + AllocFnKind getAllocKind() const; + /// The Attribute is converted to a string of equivalent mnemonic. This /// is, presumably, for writing out the mnemonics for the assembly writer. std::string getAsString(bool InAttrGrp = false) const; @@ -353,6 +374,8 @@ public: std::pair> getAllocSizeArgs() const; unsigned getVScaleRangeMin() const; Optional getVScaleRangeMax() const; + UWTableKind getUWTableKind() const; + AllocFnKind getAllocKind() const; std::string getAsString(bool InAttrGrp = false) const; /// Return true if this attribute set belongs to the LLVMContext. @@ -841,6 +864,11 @@ public: /// arg. uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const; + /// Get the unwind table kind requested for the function. + UWTableKind getUWTableKind() const; + + AllocFnKind getAllocKind() const; + /// Return the attributes at the index as a string. std::string getAsString(unsigned Index, bool InAttrGrp = false) const; @@ -1190,6 +1218,13 @@ public: /// Attribute.getIntValue(). AttrBuilder &addVScaleRangeAttrFromRawRepr(uint64_t RawVScaleRangeRepr); + /// This turns the unwind table kind into the form used internally in + /// Attribute. + AttrBuilder &addUWTableAttr(UWTableKind Kind); + + // This turns the allocator kind into the form used internally in Attribute. + AttrBuilder &addAllocKindAttr(AllocFnKind Kind); + ArrayRef attrs() const { return Attrs; } bool operator==(const AttrBuilder &B) const; @@ -1198,8 +1233,17 @@ public: namespace AttributeFuncs { -/// Which attributes cannot be applied to a type. -AttributeMask typeIncompatible(Type *Ty); +enum AttributeSafetyKind : uint8_t { + ASK_SAFE_TO_DROP = 1, + ASK_UNSAFE_TO_DROP = 2, + ASK_ALL = ASK_SAFE_TO_DROP | ASK_UNSAFE_TO_DROP, +}; + +/// Which attributes cannot be applied to a type. The argument \p ASK indicates, +/// if only attributes that are known to be safely droppable are contained in +/// the mask; only attributes that might be unsafe to drop (e.g., ABI-related +/// attributes) are in the mask; or both. +AttributeMask typeIncompatible(Type *Ty, AttributeSafetyKind ASK = ASK_ALL); /// Get param/return attributes which imply immediate undefined behavior if an /// invalid value is passed. For example, this includes noundef (where undef @@ -1230,6 +1274,9 @@ void mergeAttributesForInlining(Function &Caller, const Function &Callee); /// \param [in] ToMerge - The function to merge attributes from. void mergeAttributesForOutlining(Function &Base, const Function &ToMerge); +/// Update min-legal-vector-width if it is in Attribute and less than Width. +void updateMinLegalVectorWidthAttr(Function &Fn, uint64_t Width); + } // end namespace AttributeFuncs } // end namespace llvm diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index 40c554c269ca..7b955b40b0a8 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -47,6 +47,16 @@ class StrBoolAttr : Attr; /// 0 means unaligned (different from align(1)). def Alignment : IntAttr<"align", [ParamAttr, RetAttr]>; +/// Parameter of a function that tells us the alignment of an allocation, as in +/// aligned_alloc and aligned ::operator::new. +def AllocAlign: EnumAttr<"allocalign", [ParamAttr]>; + +/// Describes behavior of an allocator function in terms of known properties. +def AllocKind: IntAttr<"allockind", [FnAttr]>; + +/// Parameter is the pointer to be manipulated by the allocator function. +def AllocatedPointer : EnumAttr<"allocptr", [ParamAttr]>; + /// The result of the function is guaranteed to point to a number of bytes that /// we can determine if we know the value of the function's arguments. def AllocSize : IntAttr<"allocsize", [FnAttr]>; @@ -175,6 +185,9 @@ def NoProfile : EnumAttr<"noprofile", [FnAttr]>; /// Function doesn't unwind stack. def NoUnwind : EnumAttr<"nounwind", [FnAttr]>; +/// No SanitizeBounds instrumentation. +def NoSanitizeBounds : EnumAttr<"nosanitize_bounds", [FnAttr]>; + /// No SanitizeCoverage instrumentation. def NoSanitizeCoverage : EnumAttr<"nosanitize_coverage", [FnAttr]>; @@ -273,7 +286,7 @@ def SwiftSelf : EnumAttr<"swiftself", [ParamAttr]>; def SwiftAsync : EnumAttr<"swiftasync", [ParamAttr]>; /// Function must be in a unwind table. -def UWTable : EnumAttr<"uwtable", [FnAttr]>; +def UWTable : IntAttr<"uwtable", [FnAttr]>; /// Minimum/Maximum vscale value for function. def VScaleRange : IntAttr<"vscale_range", [FnAttr]>; @@ -290,10 +303,14 @@ def ZExt : EnumAttr<"zeroext", [ParamAttr, RetAttr]>; /// Function is required to make Forward Progress. def MustProgress : EnumAttr<"mustprogress", [FnAttr]>; +/// Function is a presplit coroutine. +def PresplitCoroutine : EnumAttr<"presplitcoroutine", [FnAttr]>; + /// Target-independent string attributes. def LessPreciseFPMAD : StrBoolAttr<"less-precise-fpmad">; def NoInfsFPMath : StrBoolAttr<"no-infs-fp-math">; def NoNansFPMath : StrBoolAttr<"no-nans-fp-math">; +def ApproxFuncFPMath : StrBoolAttr<"approx-func-fp-math">; def NoSignedZerosFPMath : StrBoolAttr<"no-signed-zeros-fp-math">; def UnsafeFPMath : StrBoolAttr<"unsafe-fp-math">; def NoJumpTables : StrBoolAttr<"no-jump-tables">; @@ -333,6 +350,7 @@ class MergeRule { def : MergeRule<"setAND">; def : MergeRule<"setAND">; def : MergeRule<"setAND">; +def : MergeRule<"setAND">; def : MergeRule<"setAND">; def : MergeRule<"setAND">; def : MergeRule<"setOR">; @@ -345,6 +363,3 @@ def : MergeRule<"adjustCallerStackProbeSize">; def : MergeRule<"adjustMinLegalVectorWidth">; def : MergeRule<"adjustNullPointerValidAttr">; def : MergeRule<"setAND">; - -// Target dependent attributes -include "llvm/IR/AttributesAMDGPU.td" diff --git a/llvm/include/llvm/IR/AttributesAMDGPU.td b/llvm/include/llvm/IR/AttributesAMDGPU.td deleted file mode 100644 index e2a0f045b656..000000000000 --- a/llvm/include/llvm/IR/AttributesAMDGPU.td +++ /dev/null @@ -1,14 +0,0 @@ -//===- AttributesAMDGPU.td - Defines AMDGPU attributes -----*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines AMDGPU specific attributes. -// -//===----------------------------------------------------------------------===// - -def AMDGPUUnsafeFPAtomics : StrBoolAttr<"amdgpu-unsafe-fp-atomics">; -def : MergeRule<"setAND">; diff --git a/llvm/include/llvm/IR/AutoUpgrade.h b/llvm/include/llvm/IR/AutoUpgrade.h index f331fc3c413f..12952f25cbda 100644 --- a/llvm/include/llvm/IR/AutoUpgrade.h +++ b/llvm/include/llvm/IR/AutoUpgrade.h @@ -14,19 +14,24 @@ #define LLVM_IR_AUTOUPGRADE_H #include "llvm/ADT/StringRef.h" +#include namespace llvm { class AttrBuilder; - class CallInst; + class CallBase; class Constant; class Function; class Instruction; + class GlobalVariable; class MDNode; class Module; - class GlobalVariable; + class StringRef; class Type; class Value; + template class OperandBundleDefT; + using OperandBundleDef = OperandBundleDefT; + /// This is a more granular function that simply checks an intrinsic function /// for upgrading, and returns true if it requires upgrading. It may return /// null in NewFn if the all calls to the original intrinsic function @@ -35,7 +40,7 @@ namespace llvm { /// This is the complement to the above, replacing a specific call to an /// intrinsic function with a call to the specified new function. - void UpgradeIntrinsicCall(CallInst *CI, Function *NewFn); + void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn); // This upgrades the comment for objc retain release markers in inline asm // calls @@ -77,7 +82,7 @@ namespace llvm { /// This is an auto-upgrade for bitcast constant expression between pointers /// with different address spaces: the instruction is replaced by a pair /// ptrtoint+inttoptr. - Value *UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy); + Constant *UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy); /// Check the debug info version number, if it is out-dated, drop the debug /// info. Return true if module is modified. @@ -98,6 +103,9 @@ namespace llvm { /// Upgrade attributes that changed format or kind. void UpgradeAttributes(AttrBuilder &B); + /// Upgrade operand bundles (without knowing about their user instruction). + void UpgradeOperandBundles(std::vector &OperandBundles); + } // End llvm namespace #endif diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h index 184ddfc01c29..d487223eca02 100644 --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -22,9 +22,6 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/IR/Value.h" -#include "llvm/Support/CBindingWrapping.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Compiler.h" #include #include #include @@ -119,7 +116,11 @@ public: /// Returns the terminator instruction if the block is well formed or null /// if the block is not well formed. - const Instruction *getTerminator() const LLVM_READONLY; + const Instruction *getTerminator() const LLVM_READONLY { + if (InstList.empty() || !InstList.back().isTerminator()) + return nullptr; + return &InstList.back(); + } Instruction *getTerminator() { return const_cast( static_cast(this)->getTerminator()); diff --git a/llvm/include/llvm/IR/CFG.h b/llvm/include/llvm/IR/CFG.h index 0ee584f8af7e..28a8d31a4cc6 100644 --- a/llvm/include/llvm/IR/CFG.h +++ b/llvm/include/llvm/IR/CFG.h @@ -25,7 +25,6 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" #include #include #include diff --git a/llvm/include/llvm/IR/ConstantFold.h b/llvm/include/llvm/IR/ConstantFold.h new file mode 100644 index 000000000000..d637a180b0ba --- /dev/null +++ b/llvm/include/llvm/IR/ConstantFold.h @@ -0,0 +1,60 @@ +//==-- ConstantFold.h - DL-independent Constant Folding Interface -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the DataLayout-independent constant folding interface. +// When possible, the DataLayout-aware constant folding interface in +// Analysis/ConstantFolding.h should be preferred. +// +// These interfaces are used by the ConstantExpr::get* methods to automatically +// fold constants when possible. +// +// These operators may return a null object if they don't know how to perform +// the specified operation on the specified constant types. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_CONSTANTFOLD_H +#define LLVM_IR_CONSTANTFOLD_H + +#include "llvm/ADT/Optional.h" +#include "llvm/IR/InstrTypes.h" + +namespace llvm { + template class ArrayRef; + class Value; + class Constant; + class Type; + + // Constant fold various types of instruction... + Constant *ConstantFoldCastInstruction( + unsigned opcode, ///< The opcode of the cast + Constant *V, ///< The source constant + Type *DestTy ///< The destination type + ); + Constant *ConstantFoldSelectInstruction(Constant *Cond, + Constant *V1, Constant *V2); + Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx); + Constant *ConstantFoldInsertElementInstruction(Constant *Val, Constant *Elt, + Constant *Idx); + Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, + ArrayRef Mask); + Constant *ConstantFoldExtractValueInstruction(Constant *Agg, + ArrayRef Idxs); + Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, + ArrayRef Idxs); + Constant *ConstantFoldUnaryInstruction(unsigned Opcode, Constant *V); + Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, + Constant *V2); + Constant *ConstantFoldCompareInstruction(CmpInst::Predicate Predicate, + Constant *C1, Constant *C2); + Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool InBounds, + Optional InRangeIndex, + ArrayRef Idxs); +} // End llvm namespace + +#endif diff --git a/llvm/include/llvm/IR/ConstantFolder.h b/llvm/include/llvm/IR/ConstantFolder.h index 28dc63a5886e..5e7ddb9aa673 100644 --- a/llvm/include/llvm/IR/ConstantFolder.h +++ b/llvm/include/llvm/IR/ConstantFolder.h @@ -19,9 +19,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/ConstantFold.h" #include "llvm/IR/IRBuilderFolder.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Operator.h" namespace llvm { @@ -38,31 +39,46 @@ public: // Return an existing value or a constant if the operation can be simplified. // Otherwise return nullptr. //===--------------------------------------------------------------------===// - Value *FoldAdd(Value *LHS, Value *RHS, bool HasNUW = false, - bool HasNSW = false) const override { + + Value *FoldBinOp(Instruction::BinaryOps Opc, Value *LHS, + Value *RHS) const override { auto *LC = dyn_cast(LHS); auto *RC = dyn_cast(RHS); if (LC && RC) - return ConstantExpr::getAdd(LC, RC, HasNUW, HasNSW); + return ConstantExpr::get(Opc, LC, RC); return nullptr; } - Value *FoldAnd(Value *LHS, Value *RHS) const override { + Value *FoldExactBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, + bool IsExact) const override { auto *LC = dyn_cast(LHS); auto *RC = dyn_cast(RHS); if (LC && RC) - return ConstantExpr::getAnd(LC, RC); + return ConstantExpr::get(Opc, LC, RC, + IsExact ? PossiblyExactOperator::IsExact : 0); return nullptr; } - Value *FoldOr(Value *LHS, Value *RHS) const override { + Value *FoldNoWrapBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, + bool HasNUW, bool HasNSW) const override { auto *LC = dyn_cast(LHS); auto *RC = dyn_cast(RHS); - if (LC && RC) - return ConstantExpr::getOr(LC, RC); + if (LC && RC) { + unsigned Flags = 0; + if (HasNUW) + Flags |= OverflowingBinaryOperator::NoUnsignedWrap; + if (HasNSW) + Flags |= OverflowingBinaryOperator::NoSignedWrap; + return ConstantExpr::get(Opc, LC, RC, Flags); + } return nullptr; } + Value *FoldBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, + FastMathFlags FMF) const override { + return FoldBinOp(Opc, LHS, RHS); + } + Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override { auto *LC = dyn_cast(LHS); auto *RC = dyn_cast(RHS); @@ -95,103 +111,57 @@ public: return nullptr; } - //===--------------------------------------------------------------------===// - // Binary Operators - //===--------------------------------------------------------------------===// - - Constant *CreateFAdd(Constant *LHS, Constant *RHS) const override { - return ConstantExpr::getFAdd(LHS, RHS); - } - - Constant *CreateSub(Constant *LHS, Constant *RHS, - bool HasNUW = false, bool HasNSW = false) const override { - return ConstantExpr::getSub(LHS, RHS, HasNUW, HasNSW); - } - - Constant *CreateFSub(Constant *LHS, Constant *RHS) const override { - return ConstantExpr::getFSub(LHS, RHS); - } - - Constant *CreateMul(Constant *LHS, Constant *RHS, - bool HasNUW = false, bool HasNSW = false) const override { - return ConstantExpr::getMul(LHS, RHS, HasNUW, HasNSW); - } - - Constant *CreateFMul(Constant *LHS, Constant *RHS) const override { - return ConstantExpr::getFMul(LHS, RHS); - } - - Constant *CreateUDiv(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - return ConstantExpr::getUDiv(LHS, RHS, isExact); - } - - Constant *CreateSDiv(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - return ConstantExpr::getSDiv(LHS, RHS, isExact); - } - - Constant *CreateFDiv(Constant *LHS, Constant *RHS) const override { - return ConstantExpr::getFDiv(LHS, RHS); - } - - Constant *CreateURem(Constant *LHS, Constant *RHS) const override { - return ConstantExpr::getURem(LHS, RHS); - } - - Constant *CreateSRem(Constant *LHS, Constant *RHS) const override { - return ConstantExpr::getSRem(LHS, RHS); - } - - Constant *CreateFRem(Constant *LHS, Constant *RHS) const override { - return ConstantExpr::getFRem(LHS, RHS); - } - - Constant *CreateShl(Constant *LHS, Constant *RHS, - bool HasNUW = false, bool HasNSW = false) const override { - return ConstantExpr::getShl(LHS, RHS, HasNUW, HasNSW); - } - - Constant *CreateLShr(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - return ConstantExpr::getLShr(LHS, RHS, isExact); - } - - Constant *CreateAShr(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - return ConstantExpr::getAShr(LHS, RHS, isExact); + Value *FoldExtractValue(Value *Agg, + ArrayRef IdxList) const override { + if (auto *CAgg = dyn_cast(Agg)) + return ConstantFoldExtractValueInstruction(CAgg, IdxList); + return nullptr; + }; + + Value *FoldInsertValue(Value *Agg, Value *Val, + ArrayRef IdxList) const override { + auto *CAgg = dyn_cast(Agg); + auto *CVal = dyn_cast(Val); + if (CAgg && CVal) + return ConstantFoldInsertValueInstruction(CAgg, CVal, IdxList); + return nullptr; } - Constant *CreateOr(Constant *LHS, Constant *RHS) const { - return ConstantExpr::getOr(LHS, RHS); + Value *FoldExtractElement(Value *Vec, Value *Idx) const override { + auto *CVec = dyn_cast(Vec); + auto *CIdx = dyn_cast(Idx); + if (CVec && CIdx) + return ConstantExpr::getExtractElement(CVec, CIdx); + return nullptr; } - Constant *CreateXor(Constant *LHS, Constant *RHS) const override { - return ConstantExpr::getXor(LHS, RHS); + Value *FoldInsertElement(Value *Vec, Value *NewElt, + Value *Idx) const override { + auto *CVec = dyn_cast(Vec); + auto *CNewElt = dyn_cast(NewElt); + auto *CIdx = dyn_cast(Idx); + if (CVec && CNewElt && CIdx) + return ConstantExpr::getInsertElement(CVec, CNewElt, CIdx); + return nullptr; } - Constant *CreateBinOp(Instruction::BinaryOps Opc, - Constant *LHS, Constant *RHS) const override { - return ConstantExpr::get(Opc, LHS, RHS); + Value *FoldShuffleVector(Value *V1, Value *V2, + ArrayRef Mask) const override { + auto *C1 = dyn_cast(V1); + auto *C2 = dyn_cast(V2); + if (C1 && C2) + return ConstantExpr::getShuffleVector(C1, C2, Mask); + return nullptr; } //===--------------------------------------------------------------------===// // Unary Operators //===--------------------------------------------------------------------===// - Constant *CreateNeg(Constant *C, - bool HasNUW = false, bool HasNSW = false) const override { - return ConstantExpr::getNeg(C, HasNUW, HasNSW); - } - Constant *CreateFNeg(Constant *C) const override { return ConstantExpr::getFNeg(C); } - Constant *CreateNot(Constant *C) const override { - return ConstantExpr::getNot(C); - } - Constant *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const override { return ConstantExpr::get(Opc, C); } @@ -255,34 +225,6 @@ public: Constant *RHS) const override { return ConstantExpr::getCompare(P, LHS, RHS); } - - //===--------------------------------------------------------------------===// - // Other Instructions - //===--------------------------------------------------------------------===// - - Constant *CreateExtractElement(Constant *Vec, Constant *Idx) const override { - return ConstantExpr::getExtractElement(Vec, Idx); - } - - Constant *CreateInsertElement(Constant *Vec, Constant *NewElt, - Constant *Idx) const override { - return ConstantExpr::getInsertElement(Vec, NewElt, Idx); - } - - Constant *CreateShuffleVector(Constant *V1, Constant *V2, - ArrayRef Mask) const override { - return ConstantExpr::getShuffleVector(V1, V2, Mask); - } - - Constant *CreateExtractValue(Constant *Agg, - ArrayRef IdxList) const override { - return ConstantExpr::getExtractValue(Agg, IdxList); - } - - Constant *CreateInsertValue(Constant *Agg, Constant *Val, - ArrayRef IdxList) const override { - return ConstantExpr::getInsertValue(Agg, Val, IdxList); - } }; } // end namespace llvm diff --git a/llvm/include/llvm/IR/ConstantRange.h b/llvm/include/llvm/IR/ConstantRange.h index fea4d0da1d0d..68abf4ef555d 100644 --- a/llvm/include/llvm/IR/ConstantRange.h +++ b/llvm/include/llvm/IR/ConstantRange.h @@ -553,6 +553,9 @@ public: /// Return whether unsigned mul of the two ranges always/never overflows. OverflowResult unsignedMulMayOverflow(const ConstantRange &Other) const; + /// Return known bits for values in this range. + KnownBits toKnownBits() const; + /// Print out the bounds to a stream. void print(raw_ostream &OS) const; diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index fb884912b318..b5445ff71b74 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -289,7 +289,8 @@ public: APInt *Payload = nullptr); static Constant *getSNaN(Type *Ty, bool Negative = false, APInt *Payload = nullptr); - static Constant *getNegativeZero(Type *Ty); + static Constant *getZero(Type *Ty, bool Negative = false); + static Constant *getNegativeZero(Type *Ty) { return getZero(Ty, true); } static Constant *getInfinity(Type *Ty, bool Negative = false); /// Return true if Ty is big enough to represent V. @@ -1120,9 +1121,12 @@ public: /// commutative, callers can acquire the operand 1 identity constant by /// setting AllowRHSConstant to true. For example, any shift has a zero /// identity constant for operand 1: X shift 0 = X. + /// If this is a fadd/fsub operation and we don't care about signed zeros, + /// then setting NSZ to true returns the identity +0.0 instead of -0.0. /// Return nullptr if the operator does not have an identity constant. static Constant *getBinOpIdentity(unsigned Opcode, Type *Ty, - bool AllowRHSConstant = false); + bool AllowRHSConstant = false, + bool NSZ = false); /// Return the absorbing element for the given binary /// operation, i.e. a constant C such that X op C = C and C op X = C for @@ -1160,6 +1164,11 @@ public: Type *Ty ///< The type to trunc or bitcast C to ); + /// Create either an sext, trunc or nothing, depending on whether Ty is + /// wider, narrower or the same as C->getType(). This only works with + /// integer or vector of integer types. + static Constant *getSExtOrTrunc(Constant *C, Type *Ty); + /// Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant /// expression. static Constant * @@ -1285,8 +1294,6 @@ public: static Constant *getShuffleVector(Constant *V1, Constant *V2, ArrayRef Mask, Type *OnlyIfReducedTy = nullptr); - static Constant *getExtractValue(Constant *Agg, ArrayRef Idxs, - Type *OnlyIfReducedTy = nullptr); static Constant *getInsertValue(Constant *Agg, Constant *Val, ArrayRef Idxs, Type *OnlyIfReducedTy = nullptr); diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index fc461fc3f49f..9afa715b650c 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -21,6 +21,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/TrackingMDRef.h" #include "llvm/Support/Casting.h" @@ -220,6 +221,23 @@ namespace llvm { /// \param SizeInBits Size of the type. DIStringType *createStringType(StringRef Name, uint64_t SizeInBits); + /// Create debugging information entry for Fortran + /// assumed length string type. + /// \param Name Type name. + /// \param StringLength String length expressed as DIVariable *. + /// \param StrLocationExp Optional memory location of the string. + DIStringType *createStringType(StringRef Name, DIVariable *StringLength, + DIExpression *StrLocationExp = nullptr); + + /// Create debugging information entry for Fortran + /// assumed length string type. + /// \param Name Type name. + /// \param StringLengthExp String length expressed in DIExpression form. + /// \param StrLocationExp Optional memory location of the string. + DIStringType *createStringType(StringRef Name, + DIExpression *StringLengthExp, + DIExpression *StrLocationExp = nullptr); + /// Create debugging information entry for a qualified /// type, e.g. 'const int'. /// \param Tag Tag identifing type, e.g. dwarf::TAG_volatile_type @@ -734,6 +752,8 @@ namespace llvm { /// \param TParams Function template parameters. /// \param ThrownTypes Exception types this function may throw. /// \param Annotations Attribute Annotations. + /// \param TargetFuncName The name of the target function if this is + /// a trampoline. DISubprogram * createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DISubroutineType *Ty, @@ -742,7 +762,8 @@ namespace llvm { DITemplateParameterArray TParams = nullptr, DISubprogram *Decl = nullptr, DITypeArray ThrownTypes = nullptr, - DINodeArray Annotations = nullptr); + DINodeArray Annotations = nullptr, + StringRef TargetFuncName = ""); /// Identical to createFunction, /// except that the resulting DbgNode is meant to be RAUWed. diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 36438fc4f4e0..a6621c963d85 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -26,10 +26,10 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/Alignment.h" #include "llvm/Support/TrailingObjects.h" #include "llvm/Support/TypeSize.h" #include diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index 96569179060f..db1d031a062d 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -22,7 +22,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" @@ -61,6 +60,10 @@ namespace llvm { +namespace dwarf { +enum Tag : uint16_t; +} + extern cl::opt EnableFSDiscriminator; class DITypeRefArray { @@ -156,7 +159,7 @@ protected: void setTag(unsigned Tag) { SubclassData16 = Tag; } public: - dwarf::Tag getTag() const { return (dwarf::Tag)SubclassData16; } + dwarf::Tag getTag() const; /// Debug info flags. /// @@ -267,7 +270,7 @@ public: /// Return a (temporary) clone of this. TempGenericDINode clone() const { return cloneImpl(); } - dwarf::Tag getTag() const { return (dwarf::Tag)SubclassData16; } + dwarf::Tag getTag() const; StringRef getHeader() const { return getStringOperand(0); } MDString *getRawHeader() const { return getOperandAs(0); } @@ -298,8 +301,7 @@ class DISubrange : public DINode { friend class LLVMContextImpl; friend class MDNode; - DISubrange(LLVMContext &C, StorageType Storage, ArrayRef Ops) - : DINode(C, DISubrangeKind, Storage, dwarf::DW_TAG_subrange_type, Ops) {} + DISubrange(LLVMContext &C, StorageType Storage, ArrayRef Ops); ~DISubrange() = default; @@ -363,9 +365,7 @@ class DIGenericSubrange : public DINode { friend class MDNode; DIGenericSubrange(LLVMContext &C, StorageType Storage, - ArrayRef Ops) - : DINode(C, DIGenericSubrangeKind, Storage, - dwarf::DW_TAG_generic_subrange, Ops) {} + ArrayRef Ops); ~DIGenericSubrange() = default; @@ -414,11 +414,7 @@ class DIEnumerator : public DINode { APInt Value; DIEnumerator(LLVMContext &C, StorageType Storage, const APInt &Value, - bool IsUnsigned, ArrayRef Ops) - : DINode(C, DIEnumeratorKind, Storage, dwarf::DW_TAG_enumerator, Ops), - Value(Value) { - SubclassData32 = IsUnsigned; - } + bool IsUnsigned, ArrayRef Ops); DIEnumerator(LLVMContext &C, StorageType Storage, int64_t Value, bool IsUnsigned, ArrayRef Ops) : DIEnumerator(C, Storage, APInt(64, Value, !IsUnsigned), IsUnsigned, @@ -568,9 +564,7 @@ private: DIFile(LLVMContext &C, StorageType Storage, Optional> CS, Optional Src, - ArrayRef Ops) - : DIScope(C, DIFileKind, Storage, dwarf::DW_TAG_file_type, Ops), - Checksum(CS), Source(Src) {} + ArrayRef Ops); ~DIFile() = default; static DIFile *getImpl(LLVMContext &Context, StringRef Filename, @@ -1021,42 +1015,19 @@ public: /// Get casted version of extra data. /// @{ - DIType *getClassType() const { - assert(getTag() == dwarf::DW_TAG_ptr_to_member_type); - return cast_or_null(getExtraData()); - } + DIType *getClassType() const; DIObjCProperty *getObjCProperty() const { return dyn_cast_or_null(getExtraData()); } - uint32_t getVBPtrOffset() const { - assert(getTag() == dwarf::DW_TAG_inheritance); - if (auto *CM = cast_or_null(getExtraData())) - if (auto *CI = dyn_cast_or_null(CM->getValue())) - return static_cast(CI->getZExtValue()); - return 0; - } + uint32_t getVBPtrOffset() const; - Constant *getStorageOffsetInBits() const { - assert(getTag() == dwarf::DW_TAG_member && isBitField()); - if (auto *C = cast_or_null(getExtraData())) - return C->getValue(); - return nullptr; - } + Constant *getStorageOffsetInBits() const; - Constant *getConstant() const { - assert(getTag() == dwarf::DW_TAG_member && isStaticMember()); - if (auto *C = cast_or_null(getExtraData())) - return C->getValue(); - return nullptr; - } - Constant *getDiscriminantValue() const { - assert(getTag() == dwarf::DW_TAG_member && !isStaticMember()); - if (auto *C = cast_or_null(getExtraData())) - return C->getValue(); - return nullptr; - } + Constant *getConstant() const; + + Constant *getDiscriminantValue() const; /// @} static bool classof(const Metadata *MD) { @@ -1300,10 +1271,7 @@ class DISubroutineType : public DIType { uint8_t CC; DISubroutineType(LLVMContext &C, StorageType Storage, DIFlags Flags, - uint8_t CC, ArrayRef Ops) - : DIType(C, DISubroutineTypeKind, Storage, dwarf::DW_TAG_subroutine_type, - 0, 0, 0, 0, Flags, Ops), - CC(CC) {} + uint8_t CC, ArrayRef Ops); ~DISubroutineType() = default; static DISubroutineType *getImpl(LLVMContext &Context, DIFlags Flags, @@ -1330,6 +1298,12 @@ public: (Flags, CC, TypeArray)) TempDISubroutineType clone() const { return cloneImpl(); } + // Returns a new temporary DISubroutineType with updated CC + TempDISubroutineType cloneWithCC(uint8_t CC) const { + auto NewTy = clone(); + NewTy->CC = CC; + return NewTy; + } uint8_t getCC() const { return CC; } @@ -1385,15 +1359,7 @@ private: bool IsOptimized, unsigned RuntimeVersion, unsigned EmissionKind, uint64_t DWOId, bool SplitDebugInlining, bool DebugInfoForProfiling, unsigned NameTableKind, - bool RangesBaseAddress, ArrayRef Ops) - : DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops), - SourceLanguage(SourceLanguage), IsOptimized(IsOptimized), - RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind), - DWOId(DWOId), SplitDebugInlining(SplitDebugInlining), - DebugInfoForProfiling(DebugInfoForProfiling), - NameTableKind(NameTableKind), RangesBaseAddress(RangesBaseAddress) { - assert(Storage != Uniqued); - } + bool RangesBaseAddress, ArrayRef Ops); ~DICompileUnit() = default; static DICompileUnit * @@ -1872,19 +1838,7 @@ public: static DISPFlags toSPFlags(bool IsLocalToUnit, bool IsDefinition, bool IsOptimized, unsigned Virtuality = SPFlagNonvirtual, - bool IsMainSubprogram = false) { - // We're assuming virtuality is the low-order field. - static_assert(int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) && - int(SPFlagPureVirtual) == - int(dwarf::DW_VIRTUALITY_pure_virtual), - "Virtuality constant mismatch"); - return static_cast( - (Virtuality & SPFlagVirtuality) | - (IsLocalToUnit ? SPFlagLocalToUnit : SPFlagZero) | - (IsDefinition ? SPFlagDefinition : SPFlagZero) | - (IsOptimized ? SPFlagOptimized : SPFlagZero) | - (IsMainSubprogram ? SPFlagMainSubprogram : SPFlagZero)); - } + bool IsMainSubprogram = false); private: DIFlags Flags; @@ -1892,13 +1846,7 @@ private: DISubprogram(LLVMContext &C, StorageType Storage, unsigned Line, unsigned ScopeLine, unsigned VirtualIndex, int ThisAdjustment, - DIFlags Flags, DISPFlags SPFlags, ArrayRef Ops) - : DILocalScope(C, DISubprogramKind, Storage, dwarf::DW_TAG_subprogram, - Ops), - Line(Line), ScopeLine(ScopeLine), VirtualIndex(VirtualIndex), - ThisAdjustment(ThisAdjustment), Flags(Flags), SPFlags(SPFlags) { - static_assert(dwarf::DW_VIRTUALITY_max < 4, "Virtuality out of range"); - } + DIFlags Flags, DISPFlags SPFlags, ArrayRef Ops); ~DISubprogram() = default; static DISubprogram * @@ -1909,13 +1857,14 @@ private: DISPFlags SPFlags, DICompileUnit *Unit, DITemplateParameterArray TemplateParams, DISubprogram *Declaration, DINodeArray RetainedNodes, DITypeArray ThrownTypes, - DINodeArray Annotations, StorageType Storage, - bool ShouldCreate = true) { + DINodeArray Annotations, StringRef TargetFuncName, + StorageType Storage, bool ShouldCreate = true) { return getImpl(Context, Scope, getCanonicalMDString(Context, Name), getCanonicalMDString(Context, LinkageName), File, Line, Type, ScopeLine, ContainingType, VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams.get(), Declaration, RetainedNodes.get(), ThrownTypes.get(), Annotations.get(), + getCanonicalMDString(Context, TargetFuncName), Storage, ShouldCreate); } static DISubprogram * @@ -1925,7 +1874,8 @@ private: int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags, Metadata *Unit, Metadata *TemplateParams, Metadata *Declaration, Metadata *RetainedNodes, Metadata *ThrownTypes, Metadata *Annotations, - StorageType Storage, bool ShouldCreate = true); + MDString *TargetFuncName, StorageType Storage, + bool ShouldCreate = true); TempDISubprogram cloneImpl() const { return getTemporary(getContext(), getScope(), getName(), getLinkageName(), @@ -1933,7 +1883,8 @@ private: getContainingType(), getVirtualIndex(), getThisAdjustment(), getFlags(), getSPFlags(), getUnit(), getTemplateParams(), getDeclaration(), - getRetainedNodes(), getThrownTypes(), getAnnotations()); + getRetainedNodes(), getThrownTypes(), getAnnotations(), + getTargetFuncName()); } public: @@ -1945,10 +1896,11 @@ public: DIFlags Flags, DISPFlags SPFlags, DICompileUnit *Unit, DITemplateParameterArray TemplateParams = nullptr, DISubprogram *Declaration = nullptr, DINodeArray RetainedNodes = nullptr, - DITypeArray ThrownTypes = nullptr, DINodeArray Annotations = nullptr), + DITypeArray ThrownTypes = nullptr, DINodeArray Annotations = nullptr, + StringRef TargetFuncName = ""), (Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType, VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams, - Declaration, RetainedNodes, ThrownTypes, Annotations)) + Declaration, RetainedNodes, ThrownTypes, Annotations, TargetFuncName)) DEFINE_MDNODE_GET( DISubprogram, @@ -1958,10 +1910,10 @@ public: DIFlags Flags, DISPFlags SPFlags, Metadata *Unit, Metadata *TemplateParams = nullptr, Metadata *Declaration = nullptr, Metadata *RetainedNodes = nullptr, Metadata *ThrownTypes = nullptr, - Metadata *Annotations = nullptr), + Metadata *Annotations = nullptr, MDString *TargetFuncName = nullptr), (Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType, VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams, - Declaration, RetainedNodes, ThrownTypes, Annotations)) + Declaration, RetainedNodes, ThrownTypes, Annotations, TargetFuncName)) TempDISubprogram clone() const { return cloneImpl(); } @@ -2050,6 +2002,10 @@ public: DIType *getContainingType() const { return cast_or_null(getRawContainingType()); } + void replaceType(DISubroutineType *Ty) { + assert(isDistinct() && "Only distinct nodes can mutate"); + replaceOperandWith(4, Ty); + } DICompileUnit *getUnit() const { return cast_or_null(getRawUnit()); @@ -2070,6 +2026,9 @@ public: DINodeArray getAnnotations() const { return cast_or_null(getRawAnnotations()); } + StringRef getTargetFuncName() const { + return (getRawTargetFuncName()) ? getStringOperand(12) : StringRef(); + } Metadata *getRawScope() const { return getOperand(1); } MDString *getRawName() const { return getOperandAs(2); } @@ -2090,6 +2049,9 @@ public: Metadata *getRawAnnotations() const { return getNumOperands() > 11 ? getOperandAs(11) : nullptr; } + MDString *getRawTargetFuncName() const { + return getNumOperands() > 12 ? getOperandAs(12) : nullptr; + } void replaceRawLinkageName(MDString *LinkageName) { replaceOperandWith(3, LinkageName); @@ -2108,8 +2070,7 @@ public: class DILexicalBlockBase : public DILocalScope { protected: DILexicalBlockBase(LLVMContext &C, unsigned ID, StorageType Storage, - ArrayRef Ops) - : DILocalScope(C, ID, Storage, dwarf::DW_TAG_lexical_block, Ops) {} + ArrayRef Ops); ~DILexicalBlockBase() = default; public: @@ -2301,10 +2262,7 @@ class DINamespace : public DIScope { unsigned ExportSymbols : 1; DINamespace(LLVMContext &Context, StorageType Storage, bool ExportSymbols, - ArrayRef Ops) - : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace, - Ops), - ExportSymbols(ExportSymbols) {} + ArrayRef Ops); ~DINamespace() = default; static DINamespace *getImpl(LLVMContext &Context, DIScope *Scope, @@ -2353,9 +2311,7 @@ class DIModule : public DIScope { bool IsDecl; DIModule(LLVMContext &Context, StorageType Storage, unsigned LineNo, - bool IsDecl, ArrayRef Ops) - : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops), - LineNo(LineNo), IsDecl(IsDecl) {} + bool IsDecl, ArrayRef Ops); ~DIModule() = default; static DIModule *getImpl(LLVMContext &Context, DIFile *File, DIScope *Scope, @@ -2449,10 +2405,7 @@ class DITemplateTypeParameter : public DITemplateParameter { friend class MDNode; DITemplateTypeParameter(LLVMContext &Context, StorageType Storage, - bool IsDefault, ArrayRef Ops) - : DITemplateParameter(Context, DITemplateTypeParameterKind, Storage, - dwarf::DW_TAG_template_type_parameter, IsDefault, - Ops) {} + bool IsDefault, ArrayRef Ops); ~DITemplateTypeParameter() = default; static DITemplateTypeParameter *getImpl(LLVMContext &Context, StringRef Name, @@ -2541,10 +2494,8 @@ class DIVariable : public DINode { uint32_t AlignInBits; protected: - DIVariable(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Line, - ArrayRef Ops, uint32_t AlignInBits = 0) - : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line), - AlignInBits(AlignInBits) {} + DIVariable(LLVMContext &C, unsigned ID, StorageType Storage, signed Line, + ArrayRef Ops, uint32_t AlignInBits = 0); ~DIVariable() = default; public: @@ -2763,9 +2714,7 @@ public: } /// Return whether the first element a DW_OP_deref. - bool startsWithDeref() const { - return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_deref; - } + bool startsWithDeref() const; /// Holds the characteristics of one fragment of a larger variable. struct FragmentInfo { @@ -2783,7 +2732,7 @@ public: } /// Return whether this is a piece of an aggregate variable. - bool isFragment() const { return getFragmentInfo().hasValue(); } + bool isFragment() const { return getFragmentInfo().has_value(); } /// Return whether this is an implicit location description. bool isImplicit() const; @@ -2923,10 +2872,7 @@ public: /// Check if the expression consists of exactly one entry value operand. /// (This is the only configuration of entry values that is supported.) - bool isEntryValue() const { - return getNumElements() > 0 && - getElement(0) == dwarf::DW_OP_LLVM_entry_value; - } + bool isEntryValue() const; /// Try to shorten an expression with an initial constant operand. /// Returns a new expression and constant on success, or the original @@ -3057,10 +3003,7 @@ class DICommonBlock : public DIScope { friend class MDNode; DICommonBlock(LLVMContext &Context, StorageType Storage, unsigned LineNo, - ArrayRef Ops) - : DIScope(Context, DICommonBlockKind, Storage, dwarf::DW_TAG_common_block, - Ops), - LineNo(LineNo) {} + ArrayRef Ops); static DICommonBlock *getImpl(LLVMContext &Context, DIScope *Scope, DIGlobalVariable *Decl, StringRef Name, @@ -3209,8 +3152,7 @@ class DILabel : public DINode { unsigned Line; DILabel(LLVMContext &C, StorageType Storage, unsigned Line, - ArrayRef Ops) - : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops), Line(Line) {} + ArrayRef Ops); ~DILabel() = default; static DILabel *getImpl(LLVMContext &Context, DIScope *Scope, StringRef Name, @@ -3276,10 +3218,7 @@ class DIObjCProperty : public DINode { unsigned Attributes; DIObjCProperty(LLVMContext &C, StorageType Storage, unsigned Line, - unsigned Attributes, ArrayRef Ops) - : DINode(C, DIObjCPropertyKind, Storage, dwarf::DW_TAG_APPLE_property, - Ops), - Line(Line), Attributes(Attributes) {} + unsigned Attributes, ArrayRef Ops); ~DIObjCProperty() = default; static DIObjCProperty * @@ -3705,7 +3644,7 @@ public: const DILocation *getInlinedAt() const { return InlinedAt; } FragmentInfo getFragmentOrDefault() const { - return Fragment.getValueOr(DefaultFragment); + return Fragment.value_or(DefaultFragment); } static bool isDefaultFragment(const FragmentInfo F) { diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index f52ce3cde318..f505fd3f3e32 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -659,7 +659,7 @@ public: } /// This constructs a pointer type with the same pointee type as input - /// PointerType (or opaque pointer is the input PointerType is opaque) and the + /// PointerType (or opaque pointer if the input PointerType is opaque) and the /// given address space. This is only useful during the opaque pointer /// transition. /// TODO: remove after opaque pointer transition is complete. @@ -670,13 +670,6 @@ public: return get(PT->PointeeTy, AddressSpace); } - [[deprecated("Pointer element types are deprecated. You can *temporarily* " - "use Type::getPointerElementType() instead")]] - Type *getElementType() const { - assert(!isOpaque() && "Attempting to get element type of opaque pointer"); - return PointeeTy; - } - bool isOpaque() const { return !PointeeTy; } /// Return true if the specified type is valid as a element type. diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index 1ea1d9787d61..da37801b6d19 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -85,6 +85,7 @@ enum DiagnosticKind { DK_Unsupported, DK_SrcMgr, DK_DontCall, + DK_MisExpect, DK_FirstPluginKind // Must be last value to work with // getNextAvailablePluginDiagnosticKind }; @@ -1032,6 +1033,25 @@ public: void print(DiagnosticPrinter &DP) const override; }; +/// Diagnostic information for MisExpect analysis. +class DiagnosticInfoMisExpect : public DiagnosticInfoWithLocationBase { +public: + DiagnosticInfoMisExpect(const Instruction *Inst, Twine &Msg); + + /// \see DiagnosticInfo::print. + void print(DiagnosticPrinter &DP) const override; + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_MisExpect; + } + + const Twine &getMsg() const { return Msg; } + +private: + /// Message to report. + const Twine &Msg; +}; + static DiagnosticSeverity getDiagnosticSeverity(SourceMgr::DiagKind DK) { switch (DK) { case llvm::SourceMgr::DK_Error: diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h index d13a5856df3b..a381c075d77b 100644 --- a/llvm/include/llvm/IR/Dominators.h +++ b/llvm/include/llvm/IR/Dominators.h @@ -14,6 +14,7 @@ #ifndef LLVM_IR_DOMINATORS_H #define LLVM_IR_DOMINATORS_H +#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" @@ -22,6 +23,8 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/ilist_iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/PassManager.h" @@ -31,6 +34,7 @@ #include "llvm/Support/CFGUpdate.h" #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/GenericDomTreeConstruction.h" +#include #include #include diff --git a/llvm/include/llvm/IR/FMF.h b/llvm/include/llvm/IR/FMF.h new file mode 100644 index 000000000000..a49feb5a8946 --- /dev/null +++ b/llvm/include/llvm/IR/FMF.h @@ -0,0 +1,121 @@ +//===-- llvm/FMF.h - Fast math flags subclass -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the fast math flags. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_FMF_H +#define LLVM_IR_FMF_H + +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +/// Convenience struct for specifying and reasoning about fast-math flags. +class FastMathFlags { +private: + friend class FPMathOperator; + + unsigned Flags = 0; + + FastMathFlags(unsigned F) { + // If all 7 bits are set, turn this into -1. If the number of bits grows, + // this must be updated. This is intended to provide some forward binary + // compatibility insurance for the meaning of 'fast' in case bits are added. + if (F == 0x7F) Flags = ~0U; + else Flags = F; + } + +public: + // This is how the bits are used in Value::SubclassOptionalData so they + // should fit there too. + // WARNING: We're out of space. SubclassOptionalData only has 7 bits. New + // functionality will require a change in how this information is stored. + enum { + AllowReassoc = (1 << 0), + NoNaNs = (1 << 1), + NoInfs = (1 << 2), + NoSignedZeros = (1 << 3), + AllowReciprocal = (1 << 4), + AllowContract = (1 << 5), + ApproxFunc = (1 << 6) + }; + + FastMathFlags() = default; + + static FastMathFlags getFast() { + FastMathFlags FMF; + FMF.setFast(); + return FMF; + } + + bool any() const { return Flags != 0; } + bool none() const { return Flags == 0; } + bool all() const { return Flags == ~0U; } + + void clear() { Flags = 0; } + void set() { Flags = ~0U; } + + /// Flag queries + bool allowReassoc() const { return 0 != (Flags & AllowReassoc); } + bool noNaNs() const { return 0 != (Flags & NoNaNs); } + bool noInfs() const { return 0 != (Flags & NoInfs); } + bool noSignedZeros() const { return 0 != (Flags & NoSignedZeros); } + bool allowReciprocal() const { return 0 != (Flags & AllowReciprocal); } + bool allowContract() const { return 0 != (Flags & AllowContract); } + bool approxFunc() const { return 0 != (Flags & ApproxFunc); } + /// 'Fast' means all bits are set. + bool isFast() const { return all(); } + + /// Flag setters + void setAllowReassoc(bool B = true) { + Flags = (Flags & ~AllowReassoc) | B * AllowReassoc; + } + void setNoNaNs(bool B = true) { + Flags = (Flags & ~NoNaNs) | B * NoNaNs; + } + void setNoInfs(bool B = true) { + Flags = (Flags & ~NoInfs) | B * NoInfs; + } + void setNoSignedZeros(bool B = true) { + Flags = (Flags & ~NoSignedZeros) | B * NoSignedZeros; + } + void setAllowReciprocal(bool B = true) { + Flags = (Flags & ~AllowReciprocal) | B * AllowReciprocal; + } + void setAllowContract(bool B = true) { + Flags = (Flags & ~AllowContract) | B * AllowContract; + } + void setApproxFunc(bool B = true) { + Flags = (Flags & ~ApproxFunc) | B * ApproxFunc; + } + void setFast(bool B = true) { B ? set() : clear(); } + + void operator&=(const FastMathFlags &OtherFlags) { + Flags &= OtherFlags.Flags; + } + void operator|=(const FastMathFlags &OtherFlags) { + Flags |= OtherFlags.Flags; + } + bool operator!=(const FastMathFlags &OtherFlags) const { + return Flags != OtherFlags.Flags; + } + + /// Print fast-math flags to \p O. + void print(raw_ostream &O) const; +}; + +inline raw_ostream &operator<<(raw_ostream &O, FastMathFlags FMF) { + FMF.print(O); + return O; +} + +} // end namespace llvm + +#endif // LLVM_IR_FMF_H diff --git a/llvm/include/llvm/IR/FPEnv.h b/llvm/include/llvm/IR/FPEnv.h index bf435ec6d109..e598db224211 100644 --- a/llvm/include/llvm/IR/FPEnv.h +++ b/llvm/include/llvm/IR/FPEnv.h @@ -17,10 +17,17 @@ #include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/Optional.h" +#include "llvm/IR/FMF.h" namespace llvm { class StringRef; +namespace Intrinsic { +typedef unsigned ID; +} + +class Instruction; + namespace fp { /// Exception behavior used for floating point operations. @@ -59,10 +66,22 @@ inline bool isDefaultFPEnvironment(fp::ExceptionBehavior EB, RoundingMode RM) { return EB == fp::ebIgnore && RM == RoundingMode::NearestTiesToEven; } +/// Returns constrained intrinsic id to represent the given instruction in +/// strictfp function. If the instruction is already a constrained intrinsic or +/// does not have a constrained intrinsic counterpart, the function returns +/// zero. +Intrinsic::ID getConstrainedIntrinsicID(const Instruction &Instr); + /// Returns true if the rounding mode RM may be QRM at compile time or /// at run time. inline bool canRoundingModeBe(RoundingMode RM, RoundingMode QRM) { return RM == QRM || RM == RoundingMode::Dynamic; } + +/// Returns true if the possibility of a signaling NaN can be safely +/// ignored. +inline bool canIgnoreSNaN(fp::ExceptionBehavior EB, FastMathFlags FMF) { + return (EB == fp::ebIgnore || FMF.noNaNs()); +} } #endif diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index 31979cd2f9db..7c32c5d13760 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -42,3 +42,5 @@ LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27) LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 28) LLVM_FIXED_MD_KIND(MD_noundef, "noundef", 29) LLVM_FIXED_MD_KIND(MD_annotation, "annotation", 30) +LLVM_FIXED_MD_KIND(MD_nosanitize, "nosanitize", 31) +LLVM_FIXED_MD_KIND(MD_func_sanitize, "func_sanitize", 32) diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index 90095cd1bc77..7945c64c8610 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -32,8 +32,6 @@ #include "llvm/IR/OperandTraits.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Compiler.h" #include #include #include @@ -290,7 +288,7 @@ public: /// profile annotations. If IncludeSynthetic is false, only return true /// when the profile data is real. bool hasProfileData(bool IncludeSynthetic = false) const { - return getEntryCount(IncludeSynthetic).hasValue(); + return getEntryCount(IncludeSynthetic).has_value(); } /// Returns the set of GUIDs that needs to be imported to the function for @@ -486,11 +484,12 @@ public: return AttributeSets.getParamDereferenceableOrNullBytes(ArgNo); } - /// A function will have the "coroutine.presplit" attribute if it's - /// a coroutine and has not gone through full CoroSplit pass. + /// Determine if the function is presplit coroutine. bool isPresplitCoroutine() const { - return hasFnAttribute("coroutine.presplit"); + return hasFnAttribute(Attribute::PresplitCoroutine); } + void setPresplitCoroutine() { addFnAttr(Attribute::PresplitCoroutine); } + void setSplittedCoroutine() { removeFnAttr(Attribute::PresplitCoroutine); } /// Determine if the function does not access memory. bool doesNotAccessMemory() const { @@ -623,15 +622,19 @@ public: bool willReturn() const { return hasFnAttribute(Attribute::WillReturn); } void setWillReturn() { addFnAttr(Attribute::WillReturn); } + /// Get what kind of unwind table entry to generate for this function. + UWTableKind getUWTableKind() const { + return AttributeSets.getUWTableKind(); + } + /// True if the ABI mandates (or the user requested) that this /// function be in a unwind table. bool hasUWTable() const { - return hasFnAttribute(Attribute::UWTable); + return getUWTableKind() != UWTableKind::None; } - void setHasUWTable() { - addFnAttr(Attribute::UWTable); + void setUWTableKind(UWTableKind K) { + addFnAttr(Attribute::getWithUWTableKind(getContext(), K)); } - /// True if this function needs an unwind table. bool needsUnwindTableEntry() const { return hasUWTable() || !doesNotThrow() || hasPersonalityFn(); diff --git a/llvm/include/llvm/IR/GCStrategy.h b/llvm/include/llvm/IR/GCStrategy.h index 4fa8e3a8dcf4..41024469044f 100644 --- a/llvm/include/llvm/IR/GCStrategy.h +++ b/llvm/include/llvm/IR/GCStrategy.h @@ -38,9 +38,7 @@ // When used with gc.statepoint, information about safepoint and roots can be // found in the binary StackMap section after code generation. Safepoint // placement is currently the responsibility of the frontend, though late -// insertion support is planned. gc.statepoint does not currently support -// custom stack map formats; such can be generated by parsing the standard -// stack map section if desired. +// insertion support is planned. // // The read and write barrier support can be used with either implementation. // @@ -101,6 +99,11 @@ public: } ///@} + /// If set, appropriate metadata tables must be emitted by the back-end + /// (assembler, JIT, or otherwise). The default stackmap information can be + /// found in the StackMap section as described in the documentation. + bool usesMetadata() const { return UsesMetadata; } + /** @name GCRoot Specific Properties * These properties and overrides only apply to collector strategies using * GCRoot. @@ -110,12 +113,6 @@ public: /// True if safe points need to be inferred on call sites bool needsSafePoints() const { return NeededSafePoints; } - /// If set, appropriate metadata tables must be emitted by the back-end - /// (assembler, JIT, or otherwise). For statepoint, this method is - /// currently unsupported. The stackmap information can be found in the - /// StackMap section as described in the documentation. - bool usesMetadata() const { return UsesMetadata; } - ///@} }; @@ -126,7 +123,7 @@ public: /// static GCRegistry::Add X("custom-name", /// "my custom supper fancy gc strategy"); /// -/// Note that to use a custom GCMetadataPrinter w/gc.roots, you must also +/// Note that to use a custom GCMetadataPrinter, you must also /// register your GCMetadataPrinter subclass with the /// GCMetadataPrinterRegistery as well. using GCRegistry = Registry; diff --git a/llvm/include/llvm/IR/GlobalIFunc.h b/llvm/include/llvm/IR/GlobalIFunc.h index 10088ee2fff4..976772b343fd 100644 --- a/llvm/include/llvm/IR/GlobalIFunc.h +++ b/llvm/include/llvm/IR/GlobalIFunc.h @@ -84,6 +84,11 @@ public: return FunctionType::get(IFuncValTy->getPointerTo(), false); } + static bool isValidLinkage(LinkageTypes L) { + return isExternalLinkage(L) || isLocalLinkage(L) || isWeakLinkage(L) || + isLinkOnceLinkage(L); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Value *V) { return V->getValueID() == Value::GlobalIFuncVal; diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h index 0bb9fd730059..96a270316686 100644 --- a/llvm/include/llvm/IR/GlobalObject.h +++ b/llvm/include/llvm/IR/GlobalObject.h @@ -43,13 +43,12 @@ protected: GlobalObject(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps, LinkageTypes Linkage, const Twine &Name, unsigned AddressSpace = 0) - : GlobalValue(Ty, VTy, Ops, NumOps, Linkage, Name, AddressSpace), - ObjComdat(nullptr) { + : GlobalValue(Ty, VTy, Ops, NumOps, Linkage, Name, AddressSpace) { setGlobalValueSubClassData(0); } ~GlobalObject(); - Comdat *ObjComdat; + Comdat *ObjComdat = nullptr; enum { LastAlignmentBit = 5, HasSectionHashEntryBit, diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h index 1818f2a8f3cc..a17423dd965b 100644 --- a/llvm/include/llvm/IR/GlobalValue.h +++ b/llvm/include/llvm/IR/GlobalValue.h @@ -80,14 +80,14 @@ protected: UnnamedAddrVal(unsigned(UnnamedAddr::None)), DllStorageClass(DefaultStorageClass), ThreadLocal(NotThreadLocal), HasLLVMReservedName(false), IsDSOLocal(false), HasPartition(false), - IntID((Intrinsic::ID)0U), Parent(nullptr) { + HasSanitizerMetadata(false) { setLinkage(Linkage); setName(Name); } Type *ValueType; - static const unsigned GlobalValueSubClassDataBits = 16; + static const unsigned GlobalValueSubClassDataBits = 15; // All bitfields use unsigned as the underlying type so that MSVC will pack // them. @@ -112,9 +112,14 @@ protected: /// https://lld.llvm.org/Partitions.html). unsigned HasPartition : 1; + /// True if this symbol has sanitizer metadata available. Should only happen + /// if sanitizers were enabled when building the translation unit which + /// contains this GV. + unsigned HasSanitizerMetadata : 1; + private: // Give subclasses access to what otherwise would be wasted padding. - // (16 + 4 + 2 + 2 + 2 + 3 + 1 + 1 + 1) == 32. + // (15 + 4 + 2 + 2 + 2 + 3 + 1 + 1 + 1 + 1) == 32. unsigned SubClassData : GlobalValueSubClassDataBits; friend class Constant; @@ -153,7 +158,7 @@ protected: /// Subclasses can use it to store their intrinsic ID, if they have one. /// /// This is stored here to save space in Function on 64-bit hosts. - Intrinsic::ID IntID; + Intrinsic::ID IntID = (Intrinsic::ID)0U; unsigned getGlobalValueSubClassData() const { return SubClassData; @@ -163,7 +168,7 @@ protected: SubClassData = V; } - Module *Parent; // The containing module. + Module *Parent = nullptr; // The containing module. // Used by SymbolTableListTraits. void setParent(Module *parent) { @@ -289,6 +294,43 @@ public: StringRef getPartition() const; void setPartition(StringRef Part); + // ASan, HWASan and Memtag sanitizers have some instrumentation that applies + // specifically to global variables. This instrumentation is implicitly + // applied to all global variables when built with -fsanitize=*. What we need + // is a way to persist the information that a certain global variable should + // *not* have sanitizers applied, which occurs if: + // 1. The global variable is in the sanitizer ignore list, or + // 2. The global variable is created by the sanitizers itself for internal + // usage, or + // 3. The global variable has __attribute__((no_sanitize("..."))) or + // __attribute__((disable_sanitizer_instrumentation)). + // + // This is important, a some IR passes like GlobalMerge can delete global + // variables and replace them with new ones. If the old variables were marked + // to be unsanitized, then the new ones should also be. + struct SanitizerMetadata { + SanitizerMetadata() + : NoAddress(false), NoHWAddress(false), NoMemtag(false), + IsDynInit(false) {} + unsigned NoAddress : 1; + unsigned NoHWAddress : 1; + unsigned NoMemtag : 1; + + // ASan-specific metadata. Is this global variable dynamically initialized + // (from a C++ language perspective), and should therefore be checked for + // ODR violations. + unsigned IsDynInit : 1; + }; + + bool hasSanitizerMetadata() const { return HasSanitizerMetadata; } + const SanitizerMetadata &getSanitizerMetadata() const; + // Note: Not byref as it's a POD and otherwise it's too easy to call + // G.setSanitizerMetadata(G2.getSanitizerMetadata()), and the argument becomes + // dangling when the backing storage allocates the metadata for `G`, as the + // storage is shared between `G1` and `G2`. + void setSanitizerMetadata(SanitizerMetadata Meta); + void removeSanitizerMetadata(); + static LinkageTypes getLinkOnceLinkage(bool ODR) { return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage; } diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index a1789759960d..d8f08934b3d6 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -25,7 +25,6 @@ #include "llvm/IR/ConstantFolder.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/FPEnv.h" @@ -77,7 +76,7 @@ class IRBuilderCallbackInserter : public IRBuilderDefaultInserter { std::function Callback; public: - virtual ~IRBuilderCallbackInserter(); + ~IRBuilderCallbackInserter() override; IRBuilderCallbackInserter(std::function Callback) : Callback(std::move(Callback)) {} @@ -125,21 +124,18 @@ protected: MDNode *DefaultFPMathTag; FastMathFlags FMF; - bool IsFPConstrained; - fp::ExceptionBehavior DefaultConstrainedExcept; - RoundingMode DefaultConstrainedRounding; + bool IsFPConstrained = false; + fp::ExceptionBehavior DefaultConstrainedExcept = fp::ebStrict; + RoundingMode DefaultConstrainedRounding = RoundingMode::Dynamic; ArrayRef DefaultOperandBundles; public: IRBuilderBase(LLVMContext &context, const IRBuilderFolder &Folder, - const IRBuilderDefaultInserter &Inserter, - MDNode *FPMathTag, ArrayRef OpBundles) + const IRBuilderDefaultInserter &Inserter, MDNode *FPMathTag, + ArrayRef OpBundles) : Context(context), Folder(Folder), Inserter(Inserter), - DefaultFPMathTag(FPMathTag), IsFPConstrained(false), - DefaultConstrainedExcept(fp::ebStrict), - DefaultConstrainedRounding(RoundingMode::Dynamic), - DefaultOperandBundles(OpBundles) { + DefaultFPMathTag(FPMathTag), DefaultOperandBundles(OpBundles) { ClearInsertionPoint(); } @@ -218,23 +214,11 @@ public: } /// Get location information used by debugging information. - DebugLoc getCurrentDebugLocation() const { - for (auto &KV : MetadataToCopy) - if (KV.first == LLVMContext::MD_dbg) - return {cast(KV.second)}; - - return {}; - } + DebugLoc getCurrentDebugLocation() const; /// If this builder has a current debug location, set it on the /// specified instruction. - void SetInstDebugLocation(Instruction *I) const { - for (const auto &KV : MetadataToCopy) - if (KV.first == LLVMContext::MD_dbg) { - I->setDebugLoc(DebugLoc(KV.second)); - return; - } - } + void SetInstDebugLocation(Instruction *I) const; /// Add all entries in MetadataToCopy to \p I. void AddMetadataToInst(Instruction *I) const { @@ -316,7 +300,7 @@ public: void setDefaultConstrainedExcept(fp::ExceptionBehavior NewExcept) { #ifndef NDEBUG Optional ExceptStr = convertExceptionBehaviorToStr(NewExcept); - assert(ExceptStr.hasValue() && "Garbage strict exception behavior!"); + assert(ExceptStr && "Garbage strict exception behavior!"); #endif DefaultConstrainedExcept = NewExcept; } @@ -325,7 +309,7 @@ public: void setDefaultConstrainedRounding(RoundingMode NewRounding) { #ifndef NDEBUG Optional RoundingStr = convertRoundingModeToStr(NewRounding); - assert(RoundingStr.hasValue() && "Garbage strict rounding mode!"); + assert(RoundingStr && "Garbage strict rounding mode!"); #endif DefaultConstrainedRounding = NewRounding; } @@ -556,6 +540,11 @@ public: return Type::getVoidTy(Context); } + /// Fetch the type representing a pointer. + PointerType *getPtrTy(unsigned AddrSpace = 0) { + return PointerType::get(Context, AddrSpace); + } + /// Fetch the type representing a pointer to an 8-bit integer value. PointerType *getInt8PtrTy(unsigned AddrSpace = 0) { return Type::getInt8PtrTy(Context, AddrSpace); @@ -589,6 +578,12 @@ public: MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); + CallInst *CreateMemSetInline(Value *Dst, MaybeAlign DstAlign, Value *Val, + Value *Size, bool IsVolatile = false, + MDNode *TBAATag = nullptr, + MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr); + /// Create and insert an element unordered-atomic memset of the region of /// memory starting at the given pointer to the given value. /// @@ -789,7 +784,7 @@ public: /// Create a call to the experimental.gc.statepoint intrinsic to /// start a new statepoint sequence. CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes, - Value *ActualCallee, + FunctionCallee ActualCallee, ArrayRef CallArgs, Optional> DeoptArgs, ArrayRef GCArgs, @@ -798,7 +793,7 @@ public: /// Create a call to the experimental.gc.statepoint intrinsic to /// start a new statepoint sequence. CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes, - Value *ActualCallee, uint32_t Flags, + FunctionCallee ActualCallee, uint32_t Flags, ArrayRef CallArgs, Optional> TransitionArgs, Optional> DeoptArgs, @@ -809,7 +804,8 @@ public: /// in using makeArrayRef(CS.arg_begin(), CS.arg_end()); Use needs to be /// .get()'ed to get the Value pointer. CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes, - Value *ActualCallee, ArrayRef CallArgs, + FunctionCallee ActualCallee, + ArrayRef CallArgs, Optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name = ""); @@ -818,7 +814,7 @@ public: /// start a new statepoint sequence. InvokeInst * CreateGCStatepointInvoke(uint64_t ID, uint32_t NumPatchBytes, - Value *ActualInvokee, BasicBlock *NormalDest, + FunctionCallee ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef InvokeArgs, Optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name = ""); @@ -826,7 +822,7 @@ public: /// Create an invoke to the experimental.gc.statepoint intrinsic to /// start a new statepoint sequence. InvokeInst *CreateGCStatepointInvoke( - uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee, + uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags, ArrayRef InvokeArgs, Optional> TransitionArgs, Optional> DeoptArgs, ArrayRef GCArgs, @@ -837,7 +833,7 @@ public: // get the Value *. InvokeInst * CreateGCStatepointInvoke(uint64_t ID, uint32_t NumPatchBytes, - Value *ActualInvokee, BasicBlock *NormalDest, + FunctionCallee ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef InvokeArgs, Optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name = ""); @@ -918,18 +914,18 @@ public: Name); } - /// Create a call to the experimental.vector.extract intrinsic. + /// Create a call to the vector.extract intrinsic. CallInst *CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name = "") { - return CreateIntrinsic(Intrinsic::experimental_vector_extract, + return CreateIntrinsic(Intrinsic::vector_extract, {DstType, SrcVec->getType()}, {SrcVec, Idx}, nullptr, Name); } - /// Create a call to the experimental.vector.insert intrinsic. + /// Create a call to the vector.insert intrinsic. CallInst *CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name = "") { - return CreateIntrinsic(Intrinsic::experimental_vector_insert, + return CreateIntrinsic(Intrinsic::vector_insert, {DstType, SubVec->getType()}, {SrcVec, SubVec, Idx}, nullptr, Name); } @@ -1162,21 +1158,14 @@ private: return I; } - Value *foldConstant(Instruction::BinaryOps Opc, Value *L, - Value *R, const Twine &Name) const { - auto *LC = dyn_cast(L); - auto *RC = dyn_cast(R); - return (LC && RC) ? Insert(Folder.CreateBinOp(Opc, LC, RC), Name) : nullptr; - } - Value *getConstrainedFPRounding(Optional Rounding) { RoundingMode UseRounding = DefaultConstrainedRounding; - if (Rounding.hasValue()) + if (Rounding) UseRounding = Rounding.getValue(); Optional RoundingStr = convertRoundingModeToStr(UseRounding); - assert(RoundingStr.hasValue() && "Garbage strict rounding mode!"); + assert(RoundingStr && "Garbage strict rounding mode!"); auto *RoundingMDS = MDString::get(Context, RoundingStr.getValue()); return MetadataAsValue::get(Context, RoundingMDS); @@ -1185,11 +1174,11 @@ private: Value *getConstrainedFPExcept(Optional Except) { fp::ExceptionBehavior UseExcept = DefaultConstrainedExcept; - if (Except.hasValue()) + if (Except) UseExcept = Except.getValue(); Optional ExceptStr = convertExceptionBehaviorToStr(UseExcept); - assert(ExceptStr.hasValue() && "Garbage strict exception behavior!"); + assert(ExceptStr && "Garbage strict exception behavior!"); auto *ExceptMDS = MDString::get(Context, ExceptStr.getValue()); return MetadataAsValue::get(Context, ExceptMDS); @@ -1210,10 +1199,11 @@ private: public: Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "", bool HasNUW = false, bool HasNSW = false) { - if (auto *V = Folder.FoldAdd(LHS, RHS, HasNUW, HasNSW)) + if (Value *V = + Folder.FoldNoWrapBinOp(Instruction::Add, LHS, RHS, HasNUW, HasNSW)) return V; - return CreateInsertNUWNSWBinOp(Instruction::Add, LHS, RHS, Name, - HasNUW, HasNSW); + return CreateInsertNUWNSWBinOp(Instruction::Add, LHS, RHS, Name, HasNUW, + HasNSW); } Value *CreateNSWAdd(Value *LHS, Value *RHS, const Twine &Name = "") { @@ -1226,11 +1216,11 @@ public: Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "", bool HasNUW = false, bool HasNSW = false) { - if (auto *LC = dyn_cast(LHS)) - if (auto *RC = dyn_cast(RHS)) - return Insert(Folder.CreateSub(LC, RC, HasNUW, HasNSW), Name); - return CreateInsertNUWNSWBinOp(Instruction::Sub, LHS, RHS, Name, - HasNUW, HasNSW); + if (Value *V = + Folder.FoldNoWrapBinOp(Instruction::Sub, LHS, RHS, HasNUW, HasNSW)) + return V; + return CreateInsertNUWNSWBinOp(Instruction::Sub, LHS, RHS, Name, HasNUW, + HasNSW); } Value *CreateNSWSub(Value *LHS, Value *RHS, const Twine &Name = "") { @@ -1243,11 +1233,11 @@ public: Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "", bool HasNUW = false, bool HasNSW = false) { - if (auto *LC = dyn_cast(LHS)) - if (auto *RC = dyn_cast(RHS)) - return Insert(Folder.CreateMul(LC, RC, HasNUW, HasNSW), Name); - return CreateInsertNUWNSWBinOp(Instruction::Mul, LHS, RHS, Name, - HasNUW, HasNSW); + if (Value *V = + Folder.FoldNoWrapBinOp(Instruction::Mul, LHS, RHS, HasNUW, HasNSW)) + return V; + return CreateInsertNUWNSWBinOp(Instruction::Mul, LHS, RHS, Name, HasNUW, + HasNSW); } Value *CreateNSWMul(Value *LHS, Value *RHS, const Twine &Name = "") { @@ -1260,9 +1250,8 @@ public: Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "", bool isExact = false) { - if (auto *LC = dyn_cast(LHS)) - if (auto *RC = dyn_cast(RHS)) - return Insert(Folder.CreateUDiv(LC, RC, isExact), Name); + if (Value *V = Folder.FoldExactBinOp(Instruction::UDiv, LHS, RHS, isExact)) + return V; if (!isExact) return Insert(BinaryOperator::CreateUDiv(LHS, RHS), Name); return Insert(BinaryOperator::CreateExactUDiv(LHS, RHS), Name); @@ -1274,9 +1263,8 @@ public: Value *CreateSDiv(Value *LHS, Value *RHS, const Twine &Name = "", bool isExact = false) { - if (auto *LC = dyn_cast(LHS)) - if (auto *RC = dyn_cast(RHS)) - return Insert(Folder.CreateSDiv(LC, RC, isExact), Name); + if (Value *V = Folder.FoldExactBinOp(Instruction::SDiv, LHS, RHS, isExact)) + return V; if (!isExact) return Insert(BinaryOperator::CreateSDiv(LHS, RHS), Name); return Insert(BinaryOperator::CreateExactSDiv(LHS, RHS), Name); @@ -1287,20 +1275,22 @@ public: } Value *CreateURem(Value *LHS, Value *RHS, const Twine &Name = "") { - if (Value *V = foldConstant(Instruction::URem, LHS, RHS, Name)) return V; + if (Value *V = Folder.FoldBinOp(Instruction::URem, LHS, RHS)) + return V; return Insert(BinaryOperator::CreateURem(LHS, RHS), Name); } Value *CreateSRem(Value *LHS, Value *RHS, const Twine &Name = "") { - if (Value *V = foldConstant(Instruction::SRem, LHS, RHS, Name)) return V; + if (Value *V = Folder.FoldBinOp(Instruction::SRem, LHS, RHS)) + return V; return Insert(BinaryOperator::CreateSRem(LHS, RHS), Name); } Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "", bool HasNUW = false, bool HasNSW = false) { - if (auto *LC = dyn_cast(LHS)) - if (auto *RC = dyn_cast(RHS)) - return Insert(Folder.CreateShl(LC, RC, HasNUW, HasNSW), Name); + if (Value *V = + Folder.FoldNoWrapBinOp(Instruction::Shl, LHS, RHS, HasNUW, HasNSW)) + return V; return CreateInsertNUWNSWBinOp(Instruction::Shl, LHS, RHS, Name, HasNUW, HasNSW); } @@ -1319,9 +1309,8 @@ public: Value *CreateLShr(Value *LHS, Value *RHS, const Twine &Name = "", bool isExact = false) { - if (auto *LC = dyn_cast(LHS)) - if (auto *RC = dyn_cast(RHS)) - return Insert(Folder.CreateLShr(LC, RC, isExact), Name); + if (Value *V = Folder.FoldExactBinOp(Instruction::LShr, LHS, RHS, isExact)) + return V; if (!isExact) return Insert(BinaryOperator::CreateLShr(LHS, RHS), Name); return Insert(BinaryOperator::CreateExactLShr(LHS, RHS), Name); @@ -1339,9 +1328,8 @@ public: Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "", bool isExact = false) { - if (auto *LC = dyn_cast(LHS)) - if (auto *RC = dyn_cast(RHS)) - return Insert(Folder.CreateAShr(LC, RC, isExact), Name); + if (Value *V = Folder.FoldExactBinOp(Instruction::AShr, LHS, RHS, isExact)) + return V; if (!isExact) return Insert(BinaryOperator::CreateAShr(LHS, RHS), Name); return Insert(BinaryOperator::CreateExactAShr(LHS, RHS), Name); @@ -1358,7 +1346,7 @@ public: } Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") { - if (auto *V = Folder.FoldAnd(LHS, RHS)) + if (auto *V = Folder.FoldBinOp(Instruction::And, LHS, RHS)) return V; return Insert(BinaryOperator::CreateAnd(LHS, RHS), Name); } @@ -1380,7 +1368,7 @@ public: } Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") { - if (auto *V = Folder.FoldOr(LHS, RHS)) + if (auto *V = Folder.FoldBinOp(Instruction::Or, LHS, RHS)) return V; return Insert(BinaryOperator::CreateOr(LHS, RHS), Name); } @@ -1402,7 +1390,8 @@ public: } Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") { - if (Value *V = foldConstant(Instruction::Xor, LHS, RHS, Name)) return V; + if (Value *V = Folder.FoldBinOp(Instruction::Xor, LHS, RHS)) + return V; return Insert(BinaryOperator::CreateXor(LHS, RHS), Name); } @@ -1420,7 +1409,8 @@ public: return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fadd, L, R, nullptr, Name, FPMD); - if (Value *V = foldConstant(Instruction::FAdd, L, R, Name)) return V; + if (Value *V = Folder.FoldBinOpFMF(Instruction::FAdd, L, R, FMF)) + return V; Instruction *I = setFPAttrs(BinaryOperator::CreateFAdd(L, R), FPMD, FMF); return Insert(I, Name); } @@ -1433,9 +1423,10 @@ public: return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fadd, L, R, FMFSource, Name); - if (Value *V = foldConstant(Instruction::FAdd, L, R, Name)) return V; - Instruction *I = setFPAttrs(BinaryOperator::CreateFAdd(L, R), nullptr, - FMFSource->getFastMathFlags()); + FastMathFlags FMF = FMFSource->getFastMathFlags(); + if (Value *V = Folder.FoldBinOpFMF(Instruction::FAdd, L, R, FMF)) + return V; + Instruction *I = setFPAttrs(BinaryOperator::CreateFAdd(L, R), nullptr, FMF); return Insert(I, Name); } @@ -1445,7 +1436,8 @@ public: return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fsub, L, R, nullptr, Name, FPMD); - if (Value *V = foldConstant(Instruction::FSub, L, R, Name)) return V; + if (Value *V = Folder.FoldBinOpFMF(Instruction::FSub, L, R, FMF)) + return V; Instruction *I = setFPAttrs(BinaryOperator::CreateFSub(L, R), FPMD, FMF); return Insert(I, Name); } @@ -1458,9 +1450,10 @@ public: return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fsub, L, R, FMFSource, Name); - if (Value *V = foldConstant(Instruction::FSub, L, R, Name)) return V; - Instruction *I = setFPAttrs(BinaryOperator::CreateFSub(L, R), nullptr, - FMFSource->getFastMathFlags()); + FastMathFlags FMF = FMFSource->getFastMathFlags(); + if (Value *V = Folder.FoldBinOpFMF(Instruction::FSub, L, R, FMF)) + return V; + Instruction *I = setFPAttrs(BinaryOperator::CreateFSub(L, R), nullptr, FMF); return Insert(I, Name); } @@ -1470,7 +1463,8 @@ public: return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fmul, L, R, nullptr, Name, FPMD); - if (Value *V = foldConstant(Instruction::FMul, L, R, Name)) return V; + if (Value *V = Folder.FoldBinOpFMF(Instruction::FMul, L, R, FMF)) + return V; Instruction *I = setFPAttrs(BinaryOperator::CreateFMul(L, R), FPMD, FMF); return Insert(I, Name); } @@ -1483,9 +1477,10 @@ public: return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fmul, L, R, FMFSource, Name); - if (Value *V = foldConstant(Instruction::FMul, L, R, Name)) return V; - Instruction *I = setFPAttrs(BinaryOperator::CreateFMul(L, R), nullptr, - FMFSource->getFastMathFlags()); + FastMathFlags FMF = FMFSource->getFastMathFlags(); + if (Value *V = Folder.FoldBinOpFMF(Instruction::FMul, L, R, FMF)) + return V; + Instruction *I = setFPAttrs(BinaryOperator::CreateFMul(L, R), nullptr, FMF); return Insert(I, Name); } @@ -1495,7 +1490,8 @@ public: return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fdiv, L, R, nullptr, Name, FPMD); - if (Value *V = foldConstant(Instruction::FDiv, L, R, Name)) return V; + if (Value *V = Folder.FoldBinOpFMF(Instruction::FDiv, L, R, FMF)) + return V; Instruction *I = setFPAttrs(BinaryOperator::CreateFDiv(L, R), FPMD, FMF); return Insert(I, Name); } @@ -1508,9 +1504,9 @@ public: return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fdiv, L, R, FMFSource, Name); - if (Value *V = foldConstant(Instruction::FDiv, L, R, Name)) return V; - Instruction *I = setFPAttrs(BinaryOperator::CreateFDiv(L, R), nullptr, - FMFSource->getFastMathFlags()); + if (Value *V = Folder.FoldBinOpFMF(Instruction::FDiv, L, R, FMF)) + return V; + Instruction *I = setFPAttrs(BinaryOperator::CreateFDiv(L, R), nullptr, FMF); return Insert(I, Name); } @@ -1520,7 +1516,7 @@ public: return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_frem, L, R, nullptr, Name, FPMD); - if (Value *V = foldConstant(Instruction::FRem, L, R, Name)) return V; + if (Value *V = Folder.FoldBinOpFMF(Instruction::FRem, L, R, FMF)) return V; Instruction *I = setFPAttrs(BinaryOperator::CreateFRem(L, R), FPMD, FMF); return Insert(I, Name); } @@ -1533,16 +1529,16 @@ public: return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_frem, L, R, FMFSource, Name); - if (Value *V = foldConstant(Instruction::FRem, L, R, Name)) return V; - Instruction *I = setFPAttrs(BinaryOperator::CreateFRem(L, R), nullptr, - FMFSource->getFastMathFlags()); + FastMathFlags FMF = FMFSource->getFastMathFlags(); + if (Value *V = Folder.FoldBinOpFMF(Instruction::FRem, L, R, FMF)) return V; + Instruction *I = setFPAttrs(BinaryOperator::CreateFRem(L, R), nullptr, FMF); return Insert(I, Name); } Value *CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name = "", MDNode *FPMathTag = nullptr) { - if (Value *V = foldConstant(Opc, LHS, RHS, Name)) return V; + if (Value *V = Folder.FoldBinOp(Opc, LHS, RHS)) return V; Instruction *BinOp = BinaryOperator::Create(Opc, LHS, RHS); if (isa(BinOp)) setFPAttrs(BinOp, FPMathTag, FMF); @@ -1576,14 +1572,10 @@ public: Optional Rounding = None, Optional Except = None); - Value *CreateNeg(Value *V, const Twine &Name = "", - bool HasNUW = false, bool HasNSW = false) { - if (auto *VC = dyn_cast(V)) - return Insert(Folder.CreateNeg(VC, HasNUW, HasNSW), Name); - BinaryOperator *BO = Insert(BinaryOperator::CreateNeg(V), Name); - if (HasNUW) BO->setHasNoUnsignedWrap(); - if (HasNSW) BO->setHasNoSignedWrap(); - return BO; + Value *CreateNeg(Value *V, const Twine &Name = "", bool HasNUW = false, + bool HasNSW = false) { + return CreateSub(Constant::getNullValue(V->getType()), V, Name, HasNUW, + HasNSW); } Value *CreateNSWNeg(Value *V, const Twine &Name = "") { @@ -1614,9 +1606,7 @@ public: } Value *CreateNot(Value *V, const Twine &Name = "") { - if (auto *VC = dyn_cast(V)) - return Insert(Folder.CreateNot(VC), Name); - return Insert(BinaryOperator::CreateNot(V), Name); + return CreateXor(V, Constant::getAllOnesValue(V->getType()), Name); } Value *CreateUnOp(Instruction::UnaryOps Opc, @@ -1733,30 +1723,18 @@ public: } Value *CreateGEP(Type *Ty, Value *Ptr, ArrayRef IdxList, - const Twine &Name = "") { - if (auto *V = Folder.FoldGEP(Ty, Ptr, IdxList, /*IsInBounds=*/false)) + const Twine &Name = "", bool IsInBounds = false) { + if (auto *V = Folder.FoldGEP(Ty, Ptr, IdxList, IsInBounds)) return V; - return Insert(GetElementPtrInst::Create(Ty, Ptr, IdxList), Name); + return Insert(IsInBounds + ? GetElementPtrInst::CreateInBounds(Ty, Ptr, IdxList) + : GetElementPtrInst::Create(Ty, Ptr, IdxList), + Name); } Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef IdxList, const Twine &Name = "") { - if (auto *V = Folder.FoldGEP(Ty, Ptr, IdxList, /*IsInBounds=*/true)) - return V; - return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, IdxList), Name); - } - - Value *CreateGEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "") { - if (auto *V = Folder.FoldGEP(Ty, Ptr, {Idx}, /*IsInBounds=*/false)) - return V; - return Insert(GetElementPtrInst::Create(Ty, Ptr, Idx), Name); - } - - Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, Value *Idx, - const Twine &Name = "") { - if (auto *V = Folder.FoldGEP(Ty, Ptr, {Idx}, /*IsInBounds=*/true)) - return V; - return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name); + return CreateGEP(Ty, Ptr, IdxList, Name, /* IsInBounds */ true); } Value *CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, @@ -2297,9 +2275,8 @@ public: Value *CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name = "") { - if (auto *VC = dyn_cast(Vec)) - if (auto *IC = dyn_cast(Idx)) - return Insert(Folder.CreateExtractElement(VC, IC), Name); + if (Value *V = Folder.FoldExtractElement(Vec, Idx)) + return V; return Insert(ExtractElementInst::Create(Vec, Idx), Name); } @@ -2320,10 +2297,8 @@ public: Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name = "") { - if (auto *VC = dyn_cast(Vec)) - if (auto *NC = dyn_cast(NewElt)) - if (auto *IC = dyn_cast(Idx)) - return Insert(Folder.CreateInsertElement(VC, NC, IC), Name); + if (Value *V = Folder.FoldInsertElement(Vec, NewElt, Idx)) + return V; return Insert(InsertElementInst::Create(Vec, NewElt, Idx), Name); } @@ -2339,21 +2314,11 @@ public: return CreateShuffleVector(V1, V2, IntMask, Name); } - LLVM_ATTRIBUTE_DEPRECATED(Value *CreateShuffleVector(Value *V1, Value *V2, - ArrayRef Mask, - const Twine &Name = ""), - "Pass indices as 'int' instead") { - SmallVector IntMask; - IntMask.assign(Mask.begin(), Mask.end()); - return CreateShuffleVector(V1, V2, IntMask, Name); - } - /// See class ShuffleVectorInst for a description of the mask representation. Value *CreateShuffleVector(Value *V1, Value *V2, ArrayRef Mask, const Twine &Name = "") { - if (auto *V1C = dyn_cast(V1)) - if (auto *V2C = dyn_cast(V2)) - return Insert(Folder.CreateShuffleVector(V1C, V2C, Mask), Name); + if (Value *V = Folder.FoldShuffleVector(V1, V2, Mask)) + return V; return Insert(new ShuffleVectorInst(V1, V2, Mask), Name); } @@ -2364,20 +2329,17 @@ public: return CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask, Name); } - Value *CreateExtractValue(Value *Agg, - ArrayRef Idxs, + Value *CreateExtractValue(Value *Agg, ArrayRef Idxs, const Twine &Name = "") { - if (auto *AggC = dyn_cast(Agg)) - return Insert(Folder.CreateExtractValue(AggC, Idxs), Name); + if (auto *V = Folder.FoldExtractValue(Agg, Idxs)) + return V; return Insert(ExtractValueInst::Create(Agg, Idxs), Name); } - Value *CreateInsertValue(Value *Agg, Value *Val, - ArrayRef Idxs, + Value *CreateInsertValue(Value *Agg, Value *Val, ArrayRef Idxs, const Twine &Name = "") { - if (auto *AggC = dyn_cast(Agg)) - if (auto *ValC = dyn_cast(Val)) - return Insert(Folder.CreateInsertValue(AggC, ValC, Idxs), Name); + if (auto *V = Folder.FoldInsertValue(Agg, Val, Idxs)) + return V; return Insert(InsertValueInst::Create(Agg, Val, Idxs), Name); } @@ -2394,16 +2356,25 @@ public: // Utility creation methods //===--------------------------------------------------------------------===// - /// Return an i1 value testing if \p Arg is null. + /// Return a boolean value testing if \p Arg == 0. Value *CreateIsNull(Value *Arg, const Twine &Name = "") { - return CreateICmpEQ(Arg, Constant::getNullValue(Arg->getType()), - Name); + return CreateICmpEQ(Arg, ConstantInt::getNullValue(Arg->getType()), Name); } - /// Return an i1 value testing if \p Arg is not null. + /// Return a boolean value testing if \p Arg != 0. Value *CreateIsNotNull(Value *Arg, const Twine &Name = "") { - return CreateICmpNE(Arg, Constant::getNullValue(Arg->getType()), - Name); + return CreateICmpNE(Arg, ConstantInt::getNullValue(Arg->getType()), Name); + } + + /// Return a boolean value testing if \p Arg < 0. + Value *CreateIsNeg(Value *Arg, const Twine &Name = "") { + return CreateICmpSLT(Arg, ConstantInt::getNullValue(Arg->getType()), Name); + } + + /// Return a boolean value testing if \p Arg > -1. + Value *CreateIsNotNeg(Value *Arg, const Twine &Name = "") { + return CreateICmpSGT(Arg, ConstantInt::getAllOnesValue(Arg->getType()), + Name); } /// Return the i64 difference between two pointer values, dividing out diff --git a/llvm/include/llvm/IR/IRBuilderFolder.h b/llvm/include/llvm/IR/IRBuilderFolder.h index 2827ab553adc..9505f1e3be2a 100644 --- a/llvm/include/llvm/IR/IRBuilderFolder.h +++ b/llvm/include/llvm/IR/IRBuilderFolder.h @@ -31,12 +31,19 @@ public: // Return an existing value or a constant if the operation can be simplified. // Otherwise return nullptr. //===--------------------------------------------------------------------===// - virtual Value *FoldAdd(Value *LHS, Value *RHS, bool HasNUW = false, - bool HasNSW = false) const = 0; - virtual Value *FoldAnd(Value *LHS, Value *RHS) const = 0; + virtual Value *FoldBinOp(Instruction::BinaryOps Opc, Value *LHS, + Value *RHS) const = 0; - virtual Value *FoldOr(Value *LHS, Value *RHS) const = 0; + virtual Value *FoldExactBinOp(Instruction::BinaryOps Opc, Value *LHS, + Value *RHS, bool IsExact) const = 0; + + virtual Value *FoldNoWrapBinOp(Instruction::BinaryOps Opc, Value *LHS, + Value *RHS, bool HasNUW, + bool HasNSW) const = 0; + + virtual Value *FoldBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, + Value *RHS, FastMathFlags FMF) const = 0; virtual Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const = 0; @@ -46,43 +53,25 @@ public: virtual Value *FoldSelect(Value *C, Value *True, Value *False) const = 0; - //===--------------------------------------------------------------------===// - // Binary Operators - //===--------------------------------------------------------------------===// + virtual Value *FoldExtractValue(Value *Agg, + ArrayRef IdxList) const = 0; + + virtual Value *FoldInsertValue(Value *Agg, Value *Val, + ArrayRef IdxList) const = 0; + + virtual Value *FoldExtractElement(Value *Vec, Value *Idx) const = 0; - virtual Value *CreateFAdd(Constant *LHS, Constant *RHS) const = 0; - virtual Value *CreateSub(Constant *LHS, Constant *RHS, - bool HasNUW = false, bool HasNSW = false) const = 0; - virtual Value *CreateFSub(Constant *LHS, Constant *RHS) const = 0; - virtual Value *CreateMul(Constant *LHS, Constant *RHS, - bool HasNUW = false, bool HasNSW = false) const = 0; - virtual Value *CreateFMul(Constant *LHS, Constant *RHS) const = 0; - virtual Value *CreateUDiv(Constant *LHS, Constant *RHS, - bool isExact = false) const = 0; - virtual Value *CreateSDiv(Constant *LHS, Constant *RHS, - bool isExact = false) const = 0; - virtual Value *CreateFDiv(Constant *LHS, Constant *RHS) const = 0; - virtual Value *CreateURem(Constant *LHS, Constant *RHS) const = 0; - virtual Value *CreateSRem(Constant *LHS, Constant *RHS) const = 0; - virtual Value *CreateFRem(Constant *LHS, Constant *RHS) const = 0; - virtual Value *CreateShl(Constant *LHS, Constant *RHS, - bool HasNUW = false, bool HasNSW = false) const = 0; - virtual Value *CreateLShr(Constant *LHS, Constant *RHS, - bool isExact = false) const = 0; - virtual Value *CreateAShr(Constant *LHS, Constant *RHS, - bool isExact = false) const = 0; - virtual Value *CreateXor(Constant *LHS, Constant *RHS) const = 0; - virtual Value *CreateBinOp(Instruction::BinaryOps Opc, - Constant *LHS, Constant *RHS) const = 0; + virtual Value *FoldInsertElement(Value *Vec, Value *NewElt, + Value *Idx) const = 0; + + virtual Value *FoldShuffleVector(Value *V1, Value *V2, + ArrayRef Mask) const = 0; //===--------------------------------------------------------------------===// // Unary Operators //===--------------------------------------------------------------------===// - virtual Value *CreateNeg(Constant *C, - bool HasNUW = false, bool HasNSW = false) const = 0; virtual Value *CreateFNeg(Constant *C) const = 0; - virtual Value *CreateNot(Constant *C) const = 0; virtual Value *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const = 0; //===--------------------------------------------------------------------===// @@ -110,20 +99,6 @@ public: virtual Value *CreateFCmp(CmpInst::Predicate P, Constant *LHS, Constant *RHS) const = 0; - - //===--------------------------------------------------------------------===// - // Other Instructions - //===--------------------------------------------------------------------===// - - virtual Value *CreateExtractElement(Constant *Vec, Constant *Idx) const = 0; - virtual Value *CreateInsertElement(Constant *Vec, Constant *NewElt, - Constant *Idx) const = 0; - virtual Value *CreateShuffleVector(Constant *V1, Constant *V2, - ArrayRef Mask) const = 0; - virtual Value *CreateExtractValue(Constant *Agg, - ArrayRef IdxList) const = 0; - virtual Value *CreateInsertValue(Constant *Agg, Constant *Val, - ArrayRef IdxList) const = 0; }; } // end namespace llvm diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h index cf6b7af96980..57f2da27e04e 100644 --- a/llvm/include/llvm/IR/InlineAsm.h +++ b/llvm/include/llvm/IR/InlineAsm.h @@ -240,12 +240,15 @@ public: Kind_RegDefEarlyClobber = 3, // Early-clobber output register, "=&r". Kind_Clobber = 4, // Clobbered register, "~r". Kind_Imm = 5, // Immediate. - Kind_Mem = 6, // Memory operand, "m". + Kind_Mem = 6, // Memory operand, "m", or an address, "p". // Memory constraint codes. // These could be tablegenerated but there's little need to do that since // there's plenty of space in the encoding to support the union of all // constraint codes for all targets. + // Addresses are included here as they need to be treated the same by the + // backend, the only difference is that they are not used to actaully + // access memory by the instruction. Constraint_Unknown = 0, Constraint_es, Constraint_i, @@ -268,7 +271,15 @@ public: Constraint_Z, Constraint_ZC, Constraint_Zy, - Constraints_Max = Constraint_Zy, + + // Address constraints + Constraint_p, + Constraint_ZQ, + Constraint_ZR, + Constraint_ZS, + Constraint_ZT, + + Constraints_Max = Constraint_ZT, Constraints_ShiftAmount = 16, Flag_MatchingOperand = 0x80000000 @@ -453,6 +464,16 @@ public: return "ZC"; case InlineAsm::Constraint_Zy: return "Zy"; + case InlineAsm::Constraint_p: + return "p"; + case InlineAsm::Constraint_ZQ: + return "ZQ"; + case InlineAsm::Constraint_ZR: + return "ZR"; + case InlineAsm::Constraint_ZS: + return "ZS"; + case InlineAsm::Constraint_ZT: + return "ZT"; default: llvm_unreachable("Unknown memory constraint"); } diff --git a/llvm/include/llvm/IR/InstVisitor.h b/llvm/include/llvm/IR/InstVisitor.h index 585129904dd4..7fec081d8155 100644 --- a/llvm/include/llvm/IR/InstVisitor.h +++ b/llvm/include/llvm/IR/InstVisitor.h @@ -15,7 +15,6 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" -#include "llvm/Support/ErrorHandling.h" namespace llvm { @@ -200,7 +199,7 @@ public: RetTy visitCatchPadInst(CatchPadInst &I) { DELEGATE(FuncletPadInst); } RetTy visitFreezeInst(FreezeInst &I) { DELEGATE(Instruction); } - // Handle the special instrinsic instruction classes. + // Handle the special intrinsic instruction classes. RetTy visitDbgDeclareInst(DbgDeclareInst &I) { DELEGATE(DbgVariableIntrinsic);} RetTy visitDbgValueInst(DbgValueInst &I) { DELEGATE(DbgVariableIntrinsic);} RetTy visitDbgVariableIntrinsic(DbgVariableIntrinsic &I) diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 589926c0faf1..eb6f89d740c6 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -21,22 +21,16 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/OperandTraits.h" -#include "llvm/IR/Type.h" #include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/ErrorHandling.h" #include #include #include @@ -47,6 +41,10 @@ namespace llvm { +class StringRef; +class Type; +class Value; + namespace Intrinsic { typedef unsigned ID; } @@ -1615,12 +1613,18 @@ public: /// Get the attribute of a given kind for the function. Attribute getFnAttr(StringRef Kind) const { - return getAttributes().getFnAttr(Kind); + Attribute Attr = getAttributes().getFnAttr(Kind); + if (Attr.isValid()) + return Attr; + return getFnAttrOnCalledFunction(Kind); } /// Get the attribute of a given kind for the function. Attribute getFnAttr(Attribute::AttrKind Kind) const { - return getAttributes().getFnAttr(Kind); + Attribute A = getAttributes().getFnAttr(Kind); + if (A.isValid()) + return A; + return getFnAttrOnCalledFunction(Kind); } /// Get the attribute of a given kind from a given arg @@ -1761,7 +1765,7 @@ public: return nullptr; } - /// Extract the preallocated type for a call or parameter. + /// Extract the inalloca type for a call or parameter. Type *getParamInAllocaType(unsigned ArgNo) const { if (auto *Ty = Attrs.getParamInAllocaType(ArgNo)) return Ty; @@ -1770,6 +1774,22 @@ public: return nullptr; } + /// Extract the sret type for a call or parameter. + Type *getParamStructRetType(unsigned ArgNo) const { + if (auto *Ty = Attrs.getParamStructRetType(ArgNo)) + return Ty; + if (const Function *F = getCalledFunction()) + return F->getAttributes().getParamStructRetType(ArgNo); + return nullptr; + } + + /// Extract the elementtype type for a parameter. + /// Note that elementtype() can only be applied to call arguments, not + /// function declaration parameters. + Type *getParamElementType(unsigned ArgNo) const { + return Attrs.getParamElementType(ArgNo); + } + /// Extract the number of dereferenceable bytes for a call or /// parameter (0=unknown). uint64_t getRetDereferenceableBytes() const { @@ -1806,7 +1826,13 @@ public: /// If one of the arguments has the 'returned' attribute, returns its /// operand value. Otherwise, return nullptr. - Value *getReturnedArgOperand() const; + Value *getReturnedArgOperand() const { + return getArgOperandWithAttribute(Attribute::Returned); + } + + /// If one of the arguments has the specified attribute, returns its + /// operand value. Otherwise, return nullptr. + Value *getArgOperandWithAttribute(Attribute::AttrKind Kind) const; /// Return true if the call should not be treated as a call to a /// builtin. @@ -2052,7 +2078,8 @@ public: bool hasClobberingOperandBundles() const { for (auto &BOI : bundle_op_infos()) { if (BOI.Tag->second == LLVMContext::OB_deopt || - BOI.Tag->second == LLVMContext::OB_funclet) + BOI.Tag->second == LLVMContext::OB_funclet || + BOI.Tag->second == LLVMContext::OB_ptrauth) continue; // This instruction has an operand bundle that is not known to us. @@ -2296,6 +2323,7 @@ private: return hasFnAttrOnCalledFunction(Kind); } + template Attribute getFnAttrOnCalledFunction(AK Kind) const; /// A specialized version of hasFnAttrImpl for when the caller wants to /// know if an attribute's semantics are implied, not whether the attribute diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 1937ffd36f7b..8d0a8363cdfb 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -24,7 +24,6 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Support/AtomicOrdering.h" -#include "llvm/Support/Casting.h" #include #include diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 5929cff3b4fb..d152e86488e1 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -21,24 +21,18 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/OperandTraits.h" -#include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" -#include "llvm/IR/Value.h" #include "llvm/Support/AtomicOrdering.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include #include @@ -47,9 +41,14 @@ namespace llvm { +class APFloat; class APInt; +class BasicBlock; class ConstantInt; class DataLayout; +class StringRef; +class Type; +class Value; //===----------------------------------------------------------------------===// // AllocaInst Class @@ -127,9 +126,6 @@ public: setSubclassData(Log2(Align)); } - // FIXME: Remove this one transition to Align is over. - uint64_t getAlignment() const { return getAlign().value(); } - /// Return true if this alloca is in the entry block of the function and is a /// constant size. If so, the code generator will fold it into the /// prolog/epilog code, so it is basically free. @@ -216,11 +212,6 @@ public: /// Specify whether this is a volatile load or not. void setVolatile(bool V) { setSubclassData(V); } - /// Return the alignment of the access that is being performed. - /// FIXME: Remove this function once transition to Align is over. - /// Use getAlign() instead. - uint64_t getAlignment() const { return getAlign().value(); } - /// Return the alignment of the access that is being performed. Align getAlign() const { return Align(1ULL << (getSubclassData())); @@ -347,11 +338,6 @@ public: /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); - /// Return the alignment of the access that is being performed - /// FIXME: Remove this function once transition to Align is over. - /// Use getAlign() instead. - uint64_t getAlignment() const { return getAlign().value(); } - Align getAlign() const { return Align(1ULL << (getSubclassData())); } @@ -2138,6 +2124,12 @@ public: static bool isIdentityMask(ArrayRef Mask); static bool isIdentityMask(const Constant *Mask) { assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant."); + + // Not possible to express a shuffle mask for a scalable vector for this + // case. + if (isa(Mask->getType())) + return false; + SmallVector MaskAsInts; getShuffleMask(Mask, MaskAsInts); return isIdentityMask(MaskAsInts); @@ -2148,6 +2140,11 @@ public: /// from its input vectors. /// Example: shufflevector <4 x n> A, <4 x n> B, <4,undef,6,undef> bool isIdentity() const { + // Not possible to express a shuffle mask for a scalable vector for this + // case. + if (isa(getType())) + return false; + return !changesLength() && isIdentityMask(ShuffleMask); } @@ -5311,6 +5308,10 @@ public: } }; +//===----------------------------------------------------------------------===// +// Helper functions +//===----------------------------------------------------------------------===// + /// A helper function that returns the pointer operand of a load or store /// instruction. Returns nullptr if not load or store. inline const Value *getLoadStorePointerOperand(const Value *V) { @@ -5366,6 +5367,24 @@ inline Type *getLoadStoreType(Value *I) { return cast(I)->getValueOperand()->getType(); } +/// A helper function that returns an atomic operation's sync scope; returns +/// None if it is not an atomic operation. +inline Optional getAtomicSyncScopeID(const Instruction *I) { + if (!I->isAtomic()) + return None; + if (auto *AI = dyn_cast(I)) + return AI->getSyncScopeID(); + if (auto *AI = dyn_cast(I)) + return AI->getSyncScopeID(); + if (auto *AI = dyn_cast(I)) + return AI->getSyncScopeID(); + if (auto *AI = dyn_cast(I)) + return AI->getSyncScopeID(); + if (auto *AI = dyn_cast(I)) + return AI->getSyncScopeID(); + llvm_unreachable("unhandled atomic operation"); +} + //===----------------------------------------------------------------------===// // FreezeInst Class //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 01dada25a285..06d2335821d3 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -31,7 +31,6 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include @@ -39,6 +38,8 @@ namespace llvm { +class Metadata; + /// A wrapper class for inspecting calls to intrinsic functions. /// This allows the standard isa/dyncast/cast functionality to work with calls /// to intrinsic functions. @@ -472,6 +473,38 @@ public: /// @} }; +class VPCastIntrinsic : public VPIntrinsic { +public: + static bool isVPCast(Intrinsic::ID ID); + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + /// @{ + static bool classof(const IntrinsicInst *I) { + return VPCastIntrinsic::isVPCast(I->getIntrinsicID()); + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + /// @} +}; + +class VPCmpIntrinsic : public VPIntrinsic { +public: + static bool isVPCmp(Intrinsic::ID ID); + + CmpInst::Predicate getPredicate() const; + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + /// @{ + static bool classof(const IntrinsicInst *I) { + return VPCmpIntrinsic::isVPCmp(I->getIntrinsicID()); + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + /// @} +}; + /// This is the common base class for constrained floating point intrinsics. class ConstrainedFPIntrinsic : public IntrinsicInst { public: @@ -492,6 +525,9 @@ public: class ConstrainedFPCmpIntrinsic : public ConstrainedFPIntrinsic { public: FCmpInst::Predicate getPredicate() const; + bool isSignaling() const { + return getIntrinsicID() == Intrinsic::experimental_constrained_fcmps; + } // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { @@ -723,11 +759,6 @@ public: setArgOperand(ARG_DEST, Ptr); } - /// FIXME: Remove this function once transition to Align is over. - /// Use the version that takes MaybeAlign instead of this one. - void setDestAlignment(unsigned Alignment) { - setDestAlignment(MaybeAlign(Alignment)); - } void setDestAlignment(MaybeAlign Alignment) { removeParamAttr(ARG_DEST, Attribute::Alignment); if (Alignment) @@ -942,6 +973,7 @@ public: case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: + case Intrinsic::memset_inline: case Intrinsic::memcpy_inline: return true; default: @@ -953,12 +985,33 @@ public: } }; -/// This class wraps the llvm.memset intrinsic. +/// This class wraps the llvm.memset and llvm.memset.inline intrinsics. class MemSetInst : public MemSetBase { public: // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::memset; + switch (I->getIntrinsicID()) { + case Intrinsic::memset: + case Intrinsic::memset_inline: + return true; + default: + return false; + } + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This class wraps the llvm.memset.inline intrinsic. +class MemSetInlineInst : public MemSetInst { +public: + ConstantInt *getLength() const { + return cast(MemSetInst::getLength()); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memset_inline; } static bool classof(const Value *V) { return isa(V) && classof(cast(V)); @@ -1043,6 +1096,7 @@ public: case Intrinsic::memcpy_inline: case Intrinsic::memmove: case Intrinsic::memset: + case Intrinsic::memset_inline: case Intrinsic::memcpy_element_unordered_atomic: case Intrinsic::memmove_element_unordered_atomic: case Intrinsic::memset_element_unordered_atomic: @@ -1064,6 +1118,7 @@ public: static bool classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { case Intrinsic::memset: + case Intrinsic::memset_inline: case Intrinsic::memset_element_unordered_atomic: return true; default: diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h index 2ff48380ac28..a3db2fa59399 100644 --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -104,8 +104,8 @@ namespace Intrinsic { int lookupLLVMIntrinsicByName(ArrayRef NameTable, StringRef Name); - /// Map a GCC builtin name to an intrinsic ID. - ID getIntrinsicForGCCBuiltin(const char *Prefix, StringRef BuiltinName); + /// Map a Clang builtin name to an intrinsic ID. + ID getIntrinsicForClangBuiltin(const char *Prefix, StringRef BuiltinName); /// Map a MS builtin name to an intrinsic ID. ID getIntrinsicForMSBuiltin(const char *Prefix, StringRef BuiltinName); @@ -142,6 +142,7 @@ namespace Intrinsic { VecOfBitcastsToInt, AMX, PPCQuad, + AnyPtrToElt, } Kind; union { @@ -180,14 +181,15 @@ namespace Intrinsic { return (ArgKind)(Argument_Info & 7); } - // VecOfAnyPtrsToElt uses both an overloaded argument (for address space) - // and a reference argument (for matching vector width and element types) + // VecOfAnyPtrsToElt and AnyPtrToElt uses both an overloaded argument (for + // address space) and a reference argument (for matching vector width and + // element types) unsigned getOverloadArgNumber() const { - assert(Kind == VecOfAnyPtrsToElt); + assert(Kind == VecOfAnyPtrsToElt || Kind == AnyPtrToElt); return Argument_Info >> 16; } unsigned getRefArgNumber() const { - assert(Kind == VecOfAnyPtrsToElt); + assert(Kind == VecOfAnyPtrsToElt || Kind == AnyPtrToElt); return Argument_Info & 0xFFFF; } diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index f5248e82ad21..0dceea13ea36 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -120,6 +120,9 @@ class ReadNone : IntrinsicProperty { def IntrNoReturn : IntrinsicProperty; +// Applied by default. +def IntrNoCallback : IntrinsicProperty<1>; + // IntrNoSync - Threads executing the intrinsic will not synchronize using // memory or other means. Applied by default. def IntrNoSync : IntrinsicProperty<1>; @@ -212,6 +215,7 @@ class LLVMScalarOrSameVectorWidth class LLVMPointerTo : LLVMMatchType; class LLVMPointerToElt : LLVMMatchType; +class LLVMAnyPointerToElt : LLVMMatchType; class LLVMVectorOfAnyPointersToElt : LLVMMatchType; class LLVMVectorElementType : LLVMMatchType; @@ -241,6 +245,7 @@ def llvm_i8_ty : LLVMType; def llvm_i16_ty : LLVMType; def llvm_i32_ty : LLVMType; def llvm_i64_ty : LLVMType; +def llvm_i128_ty : LLVMType; def llvm_half_ty : LLVMType; def llvm_bfloat_ty : LLVMType; def llvm_float_ty : LLVMType; @@ -380,11 +385,11 @@ class DefaultAttrsIntrinsic ret_types, intr_properties, name, sd_properties, /*disable_default_attributes*/ 0> {} -/// GCCBuiltin - If this intrinsic exactly corresponds to a GCC builtin, this +/// ClangBuiltin - If this intrinsic exactly corresponds to a Clang builtin, this /// specifies the name of the builtin. This provides automatic CBE and CFE /// support. -class GCCBuiltin { - string GCCBuiltinName = name; +class ClangBuiltin { + string ClangBuiltinName = name; } class MSBuiltin { @@ -540,14 +545,14 @@ def int_seh_scope_end : Intrinsic<[], [], [IntrNoMem]>; // Note: we treat stacksave/stackrestore as writemem because we don't otherwise // model their dependencies on allocas. def int_stacksave : DefaultAttrsIntrinsic<[llvm_ptr_ty]>, - GCCBuiltin<"__builtin_stack_save">; + ClangBuiltin<"__builtin_stack_save">; def int_stackrestore : DefaultAttrsIntrinsic<[], [llvm_ptr_ty]>, - GCCBuiltin<"__builtin_stack_restore">; + ClangBuiltin<"__builtin_stack_restore">; def int_get_dynamic_area_offset : DefaultAttrsIntrinsic<[llvm_anyint_ty]>; def int_thread_pointer : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], [IntrNoMem]>, - GCCBuiltin<"__builtin_thread_pointer">; + ClangBuiltin<"__builtin_thread_pointer">; // IntrInaccessibleMemOrArgMemOnly is a little more pessimistic than strictly // necessary for prefetch, however it does conveniently prevent the prefetch @@ -647,6 +652,17 @@ def int_memset : Intrinsic<[], NoCapture>, WriteOnly>, ImmArg>]>; +// Memset version that is guaranteed to be inlined. +// In particular this means that the generated code is not allowed to call any +// external function. +// The third argument (specifying the size) must be a constant. +def int_memset_inline + : Intrinsic<[], + [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty], + [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, + NoCapture>, WriteOnly>, + ImmArg>, ImmArg>]>; + // FIXME: Add version of these floating point intrinsics which allow non-default // rounding modes and FP exception handling. @@ -715,7 +731,7 @@ def int_objectsize : DefaultAttrsIntrinsic<[llvm_anyint_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>, ImmArg>, ImmArg>]>, - GCCBuiltin<"__builtin_object_size">; + ClangBuiltin<"__builtin_object_size">; //===--------------- Access to Floating Point Environment -----------------===// // @@ -725,6 +741,14 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { def int_set_rounding : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>; } +//===--------------- Floating Point Properties ----------------------------===// +// + +def int_is_fpclass + : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [llvm_anyfloat_ty, llvm_i32_ty], + [IntrNoMem, IntrWillReturn, ImmArg>]>; + //===--------------- Constrained Floating Point Intrinsics ----------------===// // @@ -909,6 +933,12 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { } // FIXME: Consider maybe adding intrinsics for sitofp, uitofp. + +// Truncate a floating point number with a specific rounding mode +def int_fptrunc_round : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ llvm_anyfloat_ty, llvm_metadata_ty ], + [ IntrNoMem, IntrWillReturn ]>; + //===------------------------- Expect Intrinsics --------------------------===// // def int_expect : DefaultAttrsIntrinsic<[llvm_anyint_ty], @@ -984,12 +1014,12 @@ def int_eh_exceptioncode : Intrinsic<[llvm_i32_ty], [llvm_token_ty], [IntrNoMem] // callee-saved registers to be saved and restored (regardless of whether they // are used) in the calling function. It is used by libgcc_eh. def int_eh_unwind_init: Intrinsic<[]>, - GCCBuiltin<"__builtin_unwind_init">; + ClangBuiltin<"__builtin_unwind_init">; def int_eh_dwarf_cfa : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty]>; def int_eh_sjlj_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; -def int_eh_sjlj_callsite : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>; +def int_eh_sjlj_callsite : Intrinsic<[], [llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_eh_sjlj_functioncontext : Intrinsic<[], [llvm_ptr_ty]>; def int_eh_sjlj_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>; @@ -1025,11 +1055,11 @@ def int_init_trampoline : DefaultAttrsIntrinsic< [], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], [IntrArgMemOnly, NoCapture>, WriteOnly>, ReadNone>, ReadNone>]>, - GCCBuiltin<"__builtin_init_trampoline">; + ClangBuiltin<"__builtin_init_trampoline">; def int_adjust_trampoline : DefaultAttrsIntrinsic< [llvm_ptr_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>, - GCCBuiltin<"__builtin_adjust_trampoline">; + ClangBuiltin<"__builtin_adjust_trampoline">; //===------------------------ Overflow Intrinsics -------------------------===// // @@ -1309,9 +1339,9 @@ def int_coro_subfn_addr : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty], ///===-------------------------- Other Intrinsics --------------------------===// // def int_trap : Intrinsic<[], [], [IntrNoReturn, IntrCold]>, - GCCBuiltin<"__builtin_trap">; + ClangBuiltin<"__builtin_trap">; def int_debugtrap : Intrinsic<[]>, - GCCBuiltin<"__builtin_debugtrap">; + ClangBuiltin<"__builtin_debugtrap">; def int_ubsantrap : Intrinsic<[], [llvm_i8_ty], [IntrNoReturn, IntrCold, ImmArg>]>; @@ -1397,14 +1427,31 @@ def int_vp_gather: DefaultAttrsIntrinsic<[ llvm_anyvector_ty], [ IntrReadMem, IntrNoSync, IntrWillReturn, IntrArgMemOnly ]>; def int_vp_scatter: DefaultAttrsIntrinsic<[], - [ llvm_anyvector_ty, - LLVMVectorOfAnyPointersToElt<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty], - [ IntrArgMemOnly, IntrNoSync, IntrWillReturn ]>; // TODO allow IntrNoCapture for vectors of pointers - -// Speculatable Binary operators -let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { + [ llvm_anyvector_ty, + LLVMVectorOfAnyPointersToElt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrArgMemOnly, IntrNoSync, IntrWillReturn ]>; // TODO allow IntrNoCapture for vectors of pointers + +// Experimental strided memory accesses +def int_experimental_vp_strided_store : DefaultAttrsIntrinsic<[], + [ llvm_anyvector_ty, + LLVMAnyPointerToElt<0>, + llvm_anyint_ty, // Stride in bytes + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ NoCapture>, IntrNoSync, IntrWriteMem, IntrArgMemOnly, IntrWillReturn ]>; + +def int_experimental_vp_strided_load : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [ LLVMAnyPointerToElt<0>, + llvm_anyint_ty, // Stride in bytes + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ NoCapture>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>; + +// Operators +let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { + // Integer arithmetic def int_vp_add : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -1416,30 +1463,30 @@ let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] i LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; def int_vp_mul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_ashr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_lshr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_shl : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; def int_vp_or : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_and : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -1450,35 +1497,28 @@ let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] i LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; -} - -// Non-speculatable binary operators. -let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { def int_vp_sdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_udiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_srem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_urem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -} + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; -// Floating-point arithmetic. -let IntrProperties = - [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { + // Floating-point arithmetic def int_vp_fadd : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -1490,101 +1530,169 @@ let IntrProperties = LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; def int_vp_fmul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_fdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_frem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -} -// Shuffles. -def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<0>, - LLVMMatchType<0>, - llvm_i32_ty]>; - -def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<0>, - LLVMMatchType<0>, - llvm_i32_ty]>; - -// Reductions -let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_fneg : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_fma : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + + // Casts + def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_zext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_sext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_fptrunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_fpext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_fptoui : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_fptosi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_uitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_sitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_ptrtoint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_inttoptr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + + // Shuffles + def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty]>; + def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty]>; + + // Comparisons + def int_vp_fcmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], + [ llvm_anyvector_ty, + LLVMMatchType<0>, + llvm_metadata_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_icmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], + [ llvm_anyvector_ty, + LLVMMatchType<0>, + llvm_metadata_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + + // Reductions def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_fmul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_add : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_mul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_and : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_or : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_xor : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_smax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_smin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_umax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_umin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_fmax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; } def int_get_active_lane_mask: @@ -1840,28 +1948,26 @@ def int_preserve_struct_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty], //===------------ Intrinsics to perform common vector shuffles ------------===// def int_experimental_vector_reverse : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>], - [IntrNoMem]>; + [LLVMMatchType<0>], + [IntrNoMem]>; -//===---------- Intrinsics to query properties of scalable vectors --------===// -def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; - -//===---------- Intrinsics to perform subvector insertion/extraction ------===// -def int_experimental_vector_insert : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i64_ty], - [IntrNoMem, ImmArg>]>; - -def int_experimental_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, llvm_i64_ty], - [IntrNoMem, ImmArg>]>; - -//===---------- Named shufflevector intrinsics ------===// def int_experimental_vector_splice : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, ImmArg>]>; +//===---------- Intrinsics to query properties of scalable vectors --------===// +def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; + +//===---------- Intrinsics to perform subvector insertion/extraction ------===// +def int_vector_insert : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i64_ty], + [IntrNoMem, ImmArg>]>; + +def int_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, llvm_i64_ty], + [IntrNoMem, ImmArg>]>; //===----------------- Pointer Authentication Intrinsics ------------------===// // @@ -1936,4 +2042,6 @@ include "llvm/IR/IntrinsicsBPF.td" include "llvm/IR/IntrinsicsSystemZ.td" include "llvm/IR/IntrinsicsWebAssembly.td" include "llvm/IR/IntrinsicsRISCV.td" +include "llvm/IR/IntrinsicsSPIRV.td" include "llvm/IR/IntrinsicsVE.td" +include "llvm/IR/IntrinsicsDirectX.td" diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index a65ddff07a29..1256ab2c9f84 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -62,14 +62,17 @@ def int_aarch64_frint64x def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>; +def int_aarch64_break : Intrinsic<[], [llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, ImmArg>]>; + //===----------------------------------------------------------------------===// // Data Barrier Instructions -def int_aarch64_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">, +def int_aarch64_dmb : ClangBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">, Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>; -def int_aarch64_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">, +def int_aarch64_dsb : ClangBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">, Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>; -def int_aarch64_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, +def int_aarch64_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>; // A space-consuming intrinsic primarily for testing block and jump table @@ -907,15 +910,15 @@ let TargetPrefix = "aarch64" in { // Transactional Memory Extension (TME) Intrinsics let TargetPrefix = "aarch64" in { -def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">, +def int_aarch64_tstart : ClangBuiltin<"__builtin_arm_tstart">, Intrinsic<[llvm_i64_ty], [], [IntrWillReturn]>; -def int_aarch64_tcommit : GCCBuiltin<"__builtin_arm_tcommit">, Intrinsic<[], [], [IntrWillReturn]>; +def int_aarch64_tcommit : ClangBuiltin<"__builtin_arm_tcommit">, Intrinsic<[], [], [IntrWillReturn]>; -def int_aarch64_tcancel : GCCBuiltin<"__builtin_arm_tcancel">, +def int_aarch64_tcancel : ClangBuiltin<"__builtin_arm_tcancel">, Intrinsic<[], [llvm_i64_ty], [IntrWillReturn, ImmArg>]>; -def int_aarch64_ttest : GCCBuiltin<"__builtin_arm_ttest">, +def int_aarch64_ttest : ClangBuiltin<"__builtin_arm_ttest">, Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; @@ -1759,10 +1762,10 @@ def int_aarch64_sve_cntp : AdvSIMD_SVE_CNTP_Intrinsic; // FFR manipulation // -def int_aarch64_sve_rdffr : GCCBuiltin<"__builtin_sve_svrdffr">, DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], [], [IntrReadMem, IntrInaccessibleMemOnly]>; -def int_aarch64_sve_rdffr_z : GCCBuiltin<"__builtin_sve_svrdffr_z">, DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], [llvm_nxv16i1_ty], [IntrReadMem, IntrInaccessibleMemOnly]>; -def int_aarch64_sve_setffr : GCCBuiltin<"__builtin_sve_svsetffr">, DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrInaccessibleMemOnly]>; -def int_aarch64_sve_wrffr : GCCBuiltin<"__builtin_sve_svwrffr">, DefaultAttrsIntrinsic<[], [llvm_nxv16i1_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>; +def int_aarch64_sve_rdffr : ClangBuiltin<"__builtin_sve_svrdffr">, DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], [], [IntrReadMem, IntrInaccessibleMemOnly]>; +def int_aarch64_sve_rdffr_z : ClangBuiltin<"__builtin_sve_svrdffr_z">, DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], [llvm_nxv16i1_ty], [IntrReadMem, IntrInaccessibleMemOnly]>; +def int_aarch64_sve_setffr : ClangBuiltin<"__builtin_sve_svsetffr">, DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrInaccessibleMemOnly]>; +def int_aarch64_sve_wrffr : ClangBuiltin<"__builtin_sve_svwrffr">, DefaultAttrsIntrinsic<[], [llvm_nxv16i1_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>; // // Saturating scalar arithmetic @@ -2493,31 +2496,31 @@ def int_aarch64_sve_xar : AdvSIMD_2VectorArgIndexed_Intrinsic; // SVE2 - Optional AES, SHA-3 and SM4 // -def int_aarch64_sve_aesd : GCCBuiltin<"__builtin_sve_svaesd_u8">, +def int_aarch64_sve_aesd : ClangBuiltin<"__builtin_sve_svaesd_u8">, DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty, llvm_nxv16i8_ty], [IntrNoMem]>; -def int_aarch64_sve_aesimc : GCCBuiltin<"__builtin_sve_svaesimc_u8">, +def int_aarch64_sve_aesimc : ClangBuiltin<"__builtin_sve_svaesimc_u8">, DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty], [IntrNoMem]>; -def int_aarch64_sve_aese : GCCBuiltin<"__builtin_sve_svaese_u8">, +def int_aarch64_sve_aese : ClangBuiltin<"__builtin_sve_svaese_u8">, DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty, llvm_nxv16i8_ty], [IntrNoMem]>; -def int_aarch64_sve_aesmc : GCCBuiltin<"__builtin_sve_svaesmc_u8">, +def int_aarch64_sve_aesmc : ClangBuiltin<"__builtin_sve_svaesmc_u8">, DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty], [IntrNoMem]>; -def int_aarch64_sve_rax1 : GCCBuiltin<"__builtin_sve_svrax1_u64">, +def int_aarch64_sve_rax1 : ClangBuiltin<"__builtin_sve_svrax1_u64">, DefaultAttrsIntrinsic<[llvm_nxv2i64_ty], [llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>; -def int_aarch64_sve_sm4e : GCCBuiltin<"__builtin_sve_svsm4e_u32">, +def int_aarch64_sve_sm4e : ClangBuiltin<"__builtin_sve_svsm4e_u32">, DefaultAttrsIntrinsic<[llvm_nxv4i32_ty], [llvm_nxv4i32_ty, llvm_nxv4i32_ty], [IntrNoMem]>; -def int_aarch64_sve_sm4ekey : GCCBuiltin<"__builtin_sve_svsm4ekey_u32">, +def int_aarch64_sve_sm4ekey : ClangBuiltin<"__builtin_sve_svsm4ekey_u32">, DefaultAttrsIntrinsic<[llvm_nxv4i32_ty], [llvm_nxv4i32_ty, llvm_nxv4i32_ty], [IntrNoMem]>; @@ -2580,3 +2583,130 @@ def int_aarch64_sve_whilewr_b : SVE2_CONFLICT_DETECT_Intrinsic; def int_aarch64_sve_whilewr_h : SVE2_CONFLICT_DETECT_Intrinsic; def int_aarch64_sve_whilewr_s : SVE2_CONFLICT_DETECT_Intrinsic; def int_aarch64_sve_whilewr_d : SVE2_CONFLICT_DETECT_Intrinsic; + +// Scalable Matrix Extension (SME) Intrinsics +let TargetPrefix = "aarch64" in { + class SME_Load_Store_Intrinsic + : DefaultAttrsIntrinsic<[], + [pred_ty, llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty], []>; + + // Loads + def int_aarch64_sme_ld1b_horiz : SME_Load_Store_Intrinsic; + def int_aarch64_sme_ld1h_horiz : SME_Load_Store_Intrinsic; + def int_aarch64_sme_ld1w_horiz : SME_Load_Store_Intrinsic; + def int_aarch64_sme_ld1d_horiz : SME_Load_Store_Intrinsic; + def int_aarch64_sme_ld1q_horiz : SME_Load_Store_Intrinsic; + def int_aarch64_sme_ld1b_vert : SME_Load_Store_Intrinsic; + def int_aarch64_sme_ld1h_vert : SME_Load_Store_Intrinsic; + def int_aarch64_sme_ld1w_vert : SME_Load_Store_Intrinsic; + def int_aarch64_sme_ld1d_vert : SME_Load_Store_Intrinsic; + def int_aarch64_sme_ld1q_vert : SME_Load_Store_Intrinsic; + + // Stores + def int_aarch64_sme_st1b_horiz : SME_Load_Store_Intrinsic; + def int_aarch64_sme_st1h_horiz : SME_Load_Store_Intrinsic; + def int_aarch64_sme_st1w_horiz : SME_Load_Store_Intrinsic; + def int_aarch64_sme_st1d_horiz : SME_Load_Store_Intrinsic; + def int_aarch64_sme_st1q_horiz : SME_Load_Store_Intrinsic; + def int_aarch64_sme_st1b_vert : SME_Load_Store_Intrinsic; + def int_aarch64_sme_st1h_vert : SME_Load_Store_Intrinsic; + def int_aarch64_sme_st1w_vert : SME_Load_Store_Intrinsic; + def int_aarch64_sme_st1d_vert : SME_Load_Store_Intrinsic; + def int_aarch64_sme_st1q_vert : SME_Load_Store_Intrinsic; + + // Spill + fill + def int_aarch64_sme_ldr : DefaultAttrsIntrinsic< + [], [llvm_i32_ty, llvm_ptr_ty]>; + def int_aarch64_sme_str : DefaultAttrsIntrinsic< + [], [llvm_i32_ty, llvm_ptr_ty]>; + + class SME_TileToVector_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i64_ty, llvm_i32_ty]>; + class SME_VectorToTile_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i64_ty, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty]>; + + def int_aarch64_sme_read_horiz : SME_TileToVector_Intrinsic; + def int_aarch64_sme_read_vert : SME_TileToVector_Intrinsic; + def int_aarch64_sme_write_horiz : SME_VectorToTile_Intrinsic; + def int_aarch64_sme_write_vert : SME_VectorToTile_Intrinsic; + + def int_aarch64_sme_readq_horiz : SME_TileToVector_Intrinsic; + def int_aarch64_sme_readq_vert : SME_TileToVector_Intrinsic; + def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic; + def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic; + + def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i64_ty]>; + + class SME_OuterProduct_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i64_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + llvm_anyvector_ty]>; + + def int_aarch64_sme_mopa : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_mops : SME_OuterProduct_Intrinsic; + + def int_aarch64_sme_mopa_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_mops_wide : SME_OuterProduct_Intrinsic; + + def int_aarch64_sme_smopa_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_smops_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_umopa_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_umops_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_sumopa_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_sumops_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_usmopa_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic; + + // + // Counting elements + // + + class AdvSIMD_SME_CNTSB_Intrinsic + : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem]>; + + def int_aarch64_sme_cntsb : AdvSIMD_SME_CNTSB_Intrinsic; + def int_aarch64_sme_cntsh : AdvSIMD_SME_CNTSB_Intrinsic; + def int_aarch64_sme_cntsw : AdvSIMD_SME_CNTSB_Intrinsic; + def int_aarch64_sme_cntsd : AdvSIMD_SME_CNTSB_Intrinsic; + + // + // PSTATE Functions + // + + def int_aarch64_sme_get_pstatesm + : DefaultAttrsIntrinsic<[llvm_i64_ty], [], + [IntrReadMem, IntrInaccessibleMemOnly]>; + + def int_aarch64_sme_get_tpidr2 + : DefaultAttrsIntrinsic<[llvm_i64_ty], [], + [IntrNoMem, IntrHasSideEffects]>; + def int_aarch64_sme_set_tpidr2 + : DefaultAttrsIntrinsic<[], [llvm_i64_ty], + [IntrNoMem, IntrHasSideEffects]>; + // Clamp + // + + def int_aarch64_sve_sclamp : AdvSIMD_3VectorArg_Intrinsic; + def int_aarch64_sve_uclamp : AdvSIMD_3VectorArg_Intrinsic; + + // + // Reversal + // + + def int_aarch64_sve_revd : AdvSIMD_Merged1VectorArg_Intrinsic; + + // + // Predicate selection + // + + def int_aarch64_sve_psel + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, llvm_i32_ty]>; +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index c5d266eb57ec..c2dcfc254568 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -14,7 +14,7 @@ class AMDGPUReadPreloadRegisterIntrinsic : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; class AMDGPUReadPreloadRegisterIntrinsicNamed - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>, GCCBuiltin; + : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>, ClangBuiltin; // Used to tag image and resource intrinsics with information used to generate // mem operands. @@ -47,12 +47,12 @@ defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz; defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz; -def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">, +def int_r600_group_barrier : ClangBuiltin<"__builtin_r600_group_barrier">, Intrinsic<[], [], [IntrConvergent, IntrWillReturn]>; // AS 7 is PARAM_I_ADDRESS, used for kernel arguments def int_r600_implicitarg_ptr : - GCCBuiltin<"__builtin_r600_implicitarg_ptr">, + ClangBuiltin<"__builtin_r600_implicitarg_ptr">, Intrinsic<[LLVMQualPointerType], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; @@ -61,7 +61,7 @@ def int_r600_rat_store_typed : // 2nd parameter: Index // 3rd parameter: Constant RAT ID Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrWillReturn]>, - GCCBuiltin<"__builtin_r600_rat_store_typed">; + ClangBuiltin<"__builtin_r600_rat_store_typed">; def int_r600_recipsqrt_ieee : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] @@ -145,30 +145,30 @@ def int_amdgcn_dispatch_ptr : [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; def int_amdgcn_queue_ptr : - GCCBuiltin<"__builtin_amdgcn_queue_ptr">, + ClangBuiltin<"__builtin_amdgcn_queue_ptr">, Intrinsic<[LLVMQualPointerType], [], [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; def int_amdgcn_kernarg_segment_ptr : - GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, + ClangBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, Intrinsic<[LLVMQualPointerType], [], [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; def int_amdgcn_implicitarg_ptr : - GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">, + ClangBuiltin<"__builtin_amdgcn_implicitarg_ptr">, Intrinsic<[LLVMQualPointerType], [], [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; def int_amdgcn_groupstaticsize : - GCCBuiltin<"__builtin_amdgcn_groupstaticsize">, + ClangBuiltin<"__builtin_amdgcn_groupstaticsize">, Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; def int_amdgcn_dispatch_id : - GCCBuiltin<"__builtin_amdgcn_dispatch_id">, + ClangBuiltin<"__builtin_amdgcn_dispatch_id">, Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; def int_amdgcn_implicit_buffer_ptr : - GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">, + ClangBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">, Intrinsic<[LLVMQualPointerType], [], [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; @@ -190,7 +190,7 @@ def int_amdgcn_init_exec_from_input : Intrinsic<[], [IntrConvergent, ImmArg>]>; def int_amdgcn_wavefrontsize : - GCCBuiltin<"__builtin_amdgcn_wavefrontsize">, + ClangBuiltin<"__builtin_amdgcn_wavefrontsize">, Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; @@ -200,20 +200,44 @@ def int_amdgcn_wavefrontsize : // The first parameter is s_sendmsg immediate (i16), // the second one is copied to m0 -def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">, +def int_amdgcn_s_sendmsg : ClangBuiltin<"__builtin_amdgcn_s_sendmsg">, Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects]>; -def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">, +def int_amdgcn_s_sendmsghalt : ClangBuiltin<"__builtin_amdgcn_s_sendmsghalt">, Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects]>; -def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, + +// gfx11 intrinsic +// The first parameter is s_sendmsg immediate (i16). Return type is i32 or i64. +def int_amdgcn_s_sendmsg_rtn : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty], + [ImmArg>, IntrNoMem, IntrHasSideEffects]>; + +def int_amdgcn_s_barrier : ClangBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>; -def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">, +def int_amdgcn_wave_barrier : ClangBuiltin<"__builtin_amdgcn_wave_barrier">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>; -def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">, +// The 1st parameter is a mask for the types of instructions that may be allowed +// to cross the SCHED_BARRIER during scheduling. +// MASK = 0x0000 0000: No instructions may be scheduled across SCHED_BARRIER. +// MASK = 0x0000 0001: ALL, non-memory, non-side-effect producing instructions may be +// scheduled across SCHED_BARRIER, i.e. allow ALU instructions to pass. +// MASK = 0x0000 0002: VALU instructions may be scheduled across SCHED_BARRIER. +// MASK = 0x0000 0004: SALU instructions may be scheduled across SCHED_BARRIER. +// MASK = 0x0000 0008: MFMA instructions may be scheduled across SCHED_BARRIER. +// MASK = 0x0000 0010: ALL VMEM instructions may be scheduled across SCHED_BARRIER. +// MASK = 0x0000 0020: VMEM read instructions may be scheduled across SCHED_BARRIER. +// MASK = 0x0000 0040: VMEM write instructions may be scheduled across SCHED_BARRIER. +// MASK = 0x0000 0080: ALL DS instructions may be scheduled across SCHED_BARRIER. +// MASK = 0x0000 0100: ALL DS read instructions may be scheduled accoss SCHED_BARRIER. +// MASK = 0x0000 0200: ALL DS write instructions may be scheduled across SCHED_BARRIER. +def int_amdgcn_sched_barrier : ClangBuiltin<"__builtin_amdgcn_sched_barrier">, + Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrConvergent, + IntrWillReturn]>; + +def int_amdgcn_s_waitcnt : ClangBuiltin<"__builtin_amdgcn_s_waitcnt">, Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; def int_amdgcn_div_scale : Intrinsic< @@ -255,7 +279,7 @@ def int_amdgcn_log_clamp : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; -def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">, +def int_amdgcn_fmul_legacy : ClangBuiltin<"__builtin_amdgcn_fmul_legacy">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; @@ -274,7 +298,7 @@ def int_amdgcn_rcp : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; -def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">, +def int_amdgcn_rcp_legacy : ClangBuiltin<"__builtin_amdgcn_rcp_legacy">, Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; @@ -287,7 +311,7 @@ def int_amdgcn_rsq : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; -def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">, +def int_amdgcn_rsq_legacy : ClangBuiltin<"__builtin_amdgcn_rsq_legacy">, Intrinsic< [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; @@ -316,31 +340,31 @@ def int_amdgcn_fract : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; -def int_amdgcn_cvt_pkrtz : GCCBuiltin<"__builtin_amdgcn_cvt_pkrtz">, +def int_amdgcn_cvt_pkrtz : ClangBuiltin<"__builtin_amdgcn_cvt_pkrtz">, Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; def int_amdgcn_cvt_pknorm_i16 : - GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">, + ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">, Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; def int_amdgcn_cvt_pknorm_u16 : - GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">, + ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">, Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; def int_amdgcn_cvt_pk_i16 : - GCCBuiltin<"__builtin_amdgcn_cvt_pk_i16">, + ClangBuiltin<"__builtin_amdgcn_cvt_pk_i16">, Intrinsic< [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; -def int_amdgcn_cvt_pk_u16 : GCCBuiltin<"__builtin_amdgcn_cvt_pk_u16">, +def int_amdgcn_cvt_pk_u16 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_u16">, Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; @@ -350,31 +374,31 @@ def int_amdgcn_class : Intrinsic< [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; -def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">, +def int_amdgcn_fmed3 : ClangBuiltin<"__builtin_amdgcn_fmed3">, Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; -def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">, +def int_amdgcn_cubeid : ClangBuiltin<"__builtin_amdgcn_cubeid">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; -def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">, +def int_amdgcn_cubema : ClangBuiltin<"__builtin_amdgcn_cubema">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; -def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">, +def int_amdgcn_cubesc : ClangBuiltin<"__builtin_amdgcn_cubesc">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; -def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">, +def int_amdgcn_cubetc : ClangBuiltin<"__builtin_amdgcn_cubetc">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] @@ -838,6 +862,13 @@ defset list AMDGPUImageDimIntrinsics = { [IntrReadMem], [SDNPMemOperand]>; } + foreach dim = AMDGPUDims.Msaa in { + def int_amdgcn_image_msaa_load # _ # dim.Name: + AMDGPUImageDimIntrinsic< + AMDGPUDimNoSampleProfile<"MSAA_LOAD", dim, [llvm_any_ty], []>, + [IntrReadMem], [SDNPMemOperand]>; + } + ////////////////////////////////////////////////////////////////////////// // sample and getlod intrinsics ////////////////////////////////////////////////////////////////////////// @@ -949,10 +980,12 @@ class AMDGPUBufferLoad : Intrinsic < def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; def int_amdgcn_buffer_load : AMDGPUBufferLoad; +// Generate a buffer_load instruction that may be optimized to s_buffer_load if +// the offset argument is uniform. def int_amdgcn_s_buffer_load : Intrinsic < [llvm_any_ty], [llvm_v4i32_ty, // rsrc(SGPR) - llvm_i32_ty, // byte offset(SGPR/imm) + llvm_i32_ty, // byte offset llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 2 = dlc) [IntrNoMem, IntrWillReturn, ImmArg>]>, AMDGPURsrcIntrinsic<0>; @@ -1259,6 +1292,40 @@ class AMDGPUBufferAtomicFP : Intrinsic < // Legacy form of the intrinsic. raw and struct forms should be preferred. def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicFP; + +class AMDGPURawBufferLoadLDS : Intrinsic < + [], + [llvm_v4i32_ty, // rsrc(SGPR) + LLVMQualPointerType, // LDS base offset + llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+)) + // swizzled buffer (bit 3 = swz)) + [IntrWillReturn, NoCapture>, ImmArg>, ImmArg>, + ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; +def int_amdgcn_raw_buffer_load_lds : AMDGPURawBufferLoadLDS; + +class AMDGPUStructBufferLoadLDS : Intrinsic < + [], + [llvm_v4i32_ty, // rsrc(SGPR) + LLVMQualPointerType, // LDS base offset + llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // vindex(VGPR) + llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+)) + // swizzled buffer (bit 3 = swz)) + [IntrWillReturn, NoCapture>, ImmArg>, ImmArg>, + ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; +def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS; + } // defset AMDGPUBufferIntrinsics // Uses that do not set the done bit should set IntrWriteMem on the @@ -1278,7 +1345,21 @@ def int_amdgcn_exp : Intrinsic <[], [ IntrWillReturn] >; -// exp with compr bit set. +// exp with row_en bit set. Only supported on GFX11+. +def int_amdgcn_exp_row : Intrinsic <[], [ + llvm_i32_ty, // tgt, + llvm_i32_ty, // en + llvm_any_ty, // src0 (f32 or i32) + LLVMMatchType<0>, // src1 + LLVMMatchType<0>, // src2 + LLVMMatchType<0>, // src3 + llvm_i1_ty, // done + llvm_i32_ty], // row number + [ImmArg>, ImmArg>, ImmArg>, + IntrWriteMem, IntrInaccessibleMemOnly, IntrWillReturn] +>; + +// exp with compr bit set. Not supported on GFX11+. def int_amdgcn_exp_compr : Intrinsic <[], [ llvm_i32_ty, // tgt, llvm_i32_ty, // en @@ -1292,35 +1373,35 @@ def int_amdgcn_exp_compr : Intrinsic <[], [ >; def int_amdgcn_buffer_wbinvl1_sc : - GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">, + ClangBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; def int_amdgcn_buffer_wbinvl1 : - GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">, + ClangBuiltin<"__builtin_amdgcn_buffer_wbinvl1">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; def int_amdgcn_s_dcache_inv : - GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">, + ClangBuiltin<"__builtin_amdgcn_s_dcache_inv">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; def int_amdgcn_s_memtime : - GCCBuiltin<"__builtin_amdgcn_s_memtime">, + ClangBuiltin<"__builtin_amdgcn_s_memtime">, Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; def int_amdgcn_s_sleep : - GCCBuiltin<"__builtin_amdgcn_s_sleep">, + ClangBuiltin<"__builtin_amdgcn_s_sleep">, Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrWillReturn]> { } def int_amdgcn_s_incperflevel : - GCCBuiltin<"__builtin_amdgcn_s_incperflevel">, + ClangBuiltin<"__builtin_amdgcn_s_incperflevel">, Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrWillReturn]> { } def int_amdgcn_s_decperflevel : - GCCBuiltin<"__builtin_amdgcn_s_decperflevel">, + ClangBuiltin<"__builtin_amdgcn_s_decperflevel">, Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrWillReturn]> { } @@ -1329,11 +1410,16 @@ def int_amdgcn_s_sethalt : Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; +def int_amdgcn_s_setprio : + ClangBuiltin<"__builtin_amdgcn_s_setprio">, + Intrinsic<[], [llvm_i16_ty], [ImmArg>, IntrNoMem, + IntrHasSideEffects, IntrWillReturn]>; + +// This is IntrHasSideEffects so it can be used to read cycle counters. def int_amdgcn_s_getreg : - GCCBuiltin<"__builtin_amdgcn_s_getreg">, + ClangBuiltin<"__builtin_amdgcn_s_getreg">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, - IntrWillReturn, ImmArg>] + [IntrNoMem, IntrHasSideEffects, IntrWillReturn, ImmArg>] >; // Note this can be used to set FP environment properties that are @@ -1341,7 +1427,7 @@ def int_amdgcn_s_getreg : // available (and value required to access them) may differ per // subtarget. llvm.amdgcn.s.setreg(hwmode, value) def int_amdgcn_s_setreg : - GCCBuiltin<"__builtin_amdgcn_s_setreg">, + ClangBuiltin<"__builtin_amdgcn_s_setreg">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrWillReturn, ImmArg>] >; @@ -1353,14 +1439,14 @@ def int_amdgcn_s_setreg : // produce the desired results as optimizations may cause code movement, // especially as we explicitly use IntrNoMem to allow optimizations. def int_amdgcn_s_getpc : - GCCBuiltin<"__builtin_amdgcn_s_getpc">, + ClangBuiltin<"__builtin_amdgcn_s_getpc">, Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; // __builtin_amdgcn_interp_mov , , , // param values: 0 = P10, 1 = P20, 2 = P0 def int_amdgcn_interp_mov : - GCCBuiltin<"__builtin_amdgcn_interp_mov">, + ClangBuiltin<"__builtin_amdgcn_interp_mov">, Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn, @@ -1370,7 +1456,7 @@ def int_amdgcn_interp_mov : // This intrinsic reads from lds, but the memory values are constant, // so it behaves like IntrNoMem. def int_amdgcn_interp_p1 : - GCCBuiltin<"__builtin_amdgcn_interp_p1">, + ClangBuiltin<"__builtin_amdgcn_interp_p1">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn, @@ -1378,7 +1464,7 @@ def int_amdgcn_interp_p1 : // __builtin_amdgcn_interp_p2 , , , , def int_amdgcn_interp_p2 : - GCCBuiltin<"__builtin_amdgcn_interp_p2">, + ClangBuiltin<"__builtin_amdgcn_interp_p2">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn, @@ -1388,7 +1474,7 @@ def int_amdgcn_interp_p2 : // __builtin_amdgcn_interp_p1_f16 , , , , // high selects whether high or low 16-bits are loaded from LDS def int_amdgcn_interp_p1_f16 : - GCCBuiltin<"__builtin_amdgcn_interp_p1_f16">, + ClangBuiltin<"__builtin_amdgcn_interp_p1_f16">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn, @@ -1397,12 +1483,57 @@ def int_amdgcn_interp_p1_f16 : // __builtin_amdgcn_interp_p2_f16 , , , , , // high selects whether high or low 16-bits are loaded from LDS def int_amdgcn_interp_p2_f16 : - GCCBuiltin<"__builtin_amdgcn_interp_p2_f16">, + ClangBuiltin<"__builtin_amdgcn_interp_p2_f16">, Intrinsic<[llvm_half_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>, ImmArg>, ImmArg>]>; +// llvm.amdgcn.lds.direct.load +// The input argument is m0, which contains a packed combination of address +// offset and flags describing the data type. +def int_amdgcn_lds_direct_load : + Intrinsic<[llvm_any_ty], // overloaded for types u8, u16, i32/f32, i8, i16 + [llvm_i32_ty], + [IntrReadMem, IntrSpeculatable, IntrWillReturn]>; + +// llvm.amdgcn.lds.param.load , , +// Like interp intrinsics, this reads from lds, but the memory values are constant, +// so it behaves like IntrNoMem. +def int_amdgcn_lds_param_load : + Intrinsic<[llvm_float_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn, + ImmArg>, ImmArg>]>; + +// llvm.amdgcn.interp.inreg.p10

, , +def int_amdgcn_interp_inreg_p10 : + Intrinsic<[llvm_float_ty], + [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + +// llvm.amdgcn.interp.inreg.p2

, , +def int_amdgcn_interp_inreg_p2 : + Intrinsic<[llvm_float_ty], + [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + +// llvm.amdgcn.interp.inreg.p10.f16

, , , +// high selects whether high or low 16-bits are used for p and p0 operands +def int_amdgcn_interp_inreg_p10_f16: + Intrinsic<[llvm_float_ty], + [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_i1_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn, + ImmArg>]>; + +// llvm.amdgcn.interp.inreg.p2.f16

, , , +// high selects whether high or low 16-bits are used for p operand +def int_amdgcn_interp_inreg_p2_f16 : + Intrinsic<[llvm_half_ty], + [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_i1_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn, + ImmArg>]>; + // Deprecated: use llvm.amdgcn.live.mask instead. def int_amdgcn_ps_live : Intrinsic < [llvm_i1_ty], @@ -1416,18 +1547,18 @@ def int_amdgcn_live_mask : Intrinsic <[llvm_i1_ty], >; def int_amdgcn_mbcnt_lo : - GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">, + ClangBuiltin<"__builtin_amdgcn_mbcnt_lo">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; def int_amdgcn_mbcnt_hi : - GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">, + ClangBuiltin<"__builtin_amdgcn_mbcnt_hi">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; // llvm.amdgcn.ds.swizzle src offset def int_amdgcn_ds_swizzle : - GCCBuiltin<"__builtin_amdgcn_ds_swizzle">, + ClangBuiltin<"__builtin_amdgcn_ds_swizzle">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg>]>; @@ -1443,55 +1574,55 @@ def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty], >; def int_amdgcn_lerp : - GCCBuiltin<"__builtin_amdgcn_lerp">, + ClangBuiltin<"__builtin_amdgcn_lerp">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; def int_amdgcn_sad_u8 : - GCCBuiltin<"__builtin_amdgcn_sad_u8">, + ClangBuiltin<"__builtin_amdgcn_sad_u8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; def int_amdgcn_msad_u8 : - GCCBuiltin<"__builtin_amdgcn_msad_u8">, + ClangBuiltin<"__builtin_amdgcn_msad_u8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; def int_amdgcn_sad_hi_u8 : - GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">, + ClangBuiltin<"__builtin_amdgcn_sad_hi_u8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; def int_amdgcn_sad_u16 : - GCCBuiltin<"__builtin_amdgcn_sad_u16">, + ClangBuiltin<"__builtin_amdgcn_sad_u16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; def int_amdgcn_qsad_pk_u16_u8 : - GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">, + ClangBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; def int_amdgcn_mqsad_pk_u16_u8 : - GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">, + ClangBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; def int_amdgcn_mqsad_u32_u8 : - GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">, + ClangBuiltin<"__builtin_amdgcn_mqsad_u32_u8">, Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; def int_amdgcn_cvt_pk_u8_f32 : - GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">, + ClangBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">, Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; @@ -1511,14 +1642,14 @@ def int_amdgcn_ballot : [IntrNoMem, IntrConvergent, IntrWillReturn]>; def int_amdgcn_readfirstlane : - GCCBuiltin<"__builtin_amdgcn_readfirstlane">, + ClangBuiltin<"__builtin_amdgcn_readfirstlane">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent, IntrWillReturn]>; // The lane argument must be uniform across the currently active threads of the // current wave. Otherwise, the result is undefined. def int_amdgcn_readlane : - GCCBuiltin<"__builtin_amdgcn_readlane">, + ClangBuiltin<"__builtin_amdgcn_readlane">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent, IntrWillReturn]>; @@ -1526,7 +1657,7 @@ def int_amdgcn_readlane : // currently active threads of the current wave. Otherwise, the result is // undefined. def int_amdgcn_writelane : - GCCBuiltin<"__builtin_amdgcn_writelane">, + ClangBuiltin<"__builtin_amdgcn_writelane">, Intrinsic<[llvm_i32_ty], [ llvm_i32_ty, // uniform value to write: returned by the selected lane llvm_i32_ty, // uniform lane select @@ -1535,7 +1666,7 @@ def int_amdgcn_writelane : [IntrNoMem, IntrConvergent, IntrWillReturn] >; -def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">, +def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; @@ -1565,7 +1696,7 @@ def int_amdgcn_mulhi_u24 : Intrinsic<[llvm_i32_ty], // bar_val is the total number of waves that will wait on this // barrier, minus 1. def int_amdgcn_ds_gws_init : - GCCBuiltin<"__builtin_amdgcn_ds_gws_init">, + ClangBuiltin<"__builtin_amdgcn_ds_gws_init">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrWriteMem, @@ -1577,7 +1708,7 @@ def int_amdgcn_ds_gws_init : // bar_val is the total number of waves that will wait on this // barrier, minus 1. def int_amdgcn_ds_gws_barrier : - GCCBuiltin<"__builtin_amdgcn_ds_gws_barrier">, + ClangBuiltin<"__builtin_amdgcn_ds_gws_barrier">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", @@ -1586,7 +1717,7 @@ def int_amdgcn_ds_gws_barrier : // llvm.amdgcn.ds.gws.sema.v(i32 resource_id) def int_amdgcn_ds_gws_sema_v : - GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_v">, + ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_v">, Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", @@ -1595,7 +1726,7 @@ def int_amdgcn_ds_gws_sema_v : // llvm.amdgcn.ds.gws.sema.br(i32 vsrc, i32 resource_id) def int_amdgcn_ds_gws_sema_br : - GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_br">, + ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_br">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", @@ -1604,7 +1735,7 @@ def int_amdgcn_ds_gws_sema_br : // llvm.amdgcn.ds.gws.sema.p(i32 resource_id) def int_amdgcn_ds_gws_sema_p : - GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_p">, + ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_p">, Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", @@ -1613,7 +1744,7 @@ def int_amdgcn_ds_gws_sema_p : // llvm.amdgcn.ds.gws.sema.release.all(i32 resource_id) def int_amdgcn_ds_gws_sema_release_all : - GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_release_all">, + ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_release_all">, Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", @@ -1644,7 +1775,7 @@ def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty], // FIXME: Should this be IntrNoMem, IntrHasSideEffects, or IntrWillReturn? def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>; -def int_amdgcn_endpgm : GCCBuiltin<"__builtin_amdgcn_endpgm">, +def int_amdgcn_endpgm : ClangBuiltin<"__builtin_amdgcn_endpgm">, Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects] >; @@ -1683,13 +1814,13 @@ def int_amdgcn_set_inactive : [IntrNoMem, IntrConvergent, IntrWillReturn]>; // Return if the given flat pointer points to a local memory address. -def int_amdgcn_is_shared : GCCBuiltin<"__builtin_amdgcn_is_shared">, +def int_amdgcn_is_shared : ClangBuiltin<"__builtin_amdgcn_is_shared">, Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable, NoCapture>, IntrWillReturn] >; // Return if the given flat pointer points to a prvate memory address. -def int_amdgcn_is_private : GCCBuiltin<"__builtin_amdgcn_is_private">, +def int_amdgcn_is_private : ClangBuiltin<"__builtin_amdgcn_is_private">, Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable, NoCapture>, IntrWillReturn] >; @@ -1699,11 +1830,11 @@ def int_amdgcn_is_private : GCCBuiltin<"__builtin_amdgcn_is_private">, //===----------------------------------------------------------------------===// def int_amdgcn_s_dcache_inv_vol : - GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">, + ClangBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; def int_amdgcn_buffer_wbinvl1_vol : - GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">, + ClangBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; //===----------------------------------------------------------------------===// @@ -1732,48 +1863,67 @@ def int_amdgcn_update_dpp : ImmArg>, ImmArg>]>; def int_amdgcn_s_dcache_wb : - GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">, + ClangBuiltin<"__builtin_amdgcn_s_dcache_wb">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; def int_amdgcn_s_dcache_wb_vol : - GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, + ClangBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; def int_amdgcn_s_memrealtime : - GCCBuiltin<"__builtin_amdgcn_s_memrealtime">, + ClangBuiltin<"__builtin_amdgcn_s_memrealtime">, Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; // llvm.amdgcn.ds.permute def int_amdgcn_ds_permute : - GCCBuiltin<"__builtin_amdgcn_ds_permute">, + ClangBuiltin<"__builtin_amdgcn_ds_permute">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent, IntrWillReturn]>; // llvm.amdgcn.ds.bpermute def int_amdgcn_ds_bpermute : - GCCBuiltin<"__builtin_amdgcn_ds_bpermute">, + ClangBuiltin<"__builtin_amdgcn_ds_bpermute">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent, IntrWillReturn]>; // llvm.amdgcn.perm def int_amdgcn_perm : - GCCBuiltin<"__builtin_amdgcn_perm">, + ClangBuiltin<"__builtin_amdgcn_perm">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +//===----------------------------------------------------------------------===// +// GFX9 Intrinsics +//===----------------------------------------------------------------------===// + +class AMDGPUGlobalLoadLDS : Intrinsic < + [], + [LLVMQualPointerType, // Base global pointer to load from + LLVMQualPointerType, // LDS base pointer to store to + llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // imm offset (applied to both global and LDS address) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc/sc0, + // bit 1 = slc/sc1, + // bit 2 = dlc on gfx10+)) + // bit 4 = scc/nt on gfx90a+)) + [IntrWillReturn, NoCapture>, NoCapture>, + ImmArg>, ImmArg>, ImmArg>, ImmArg>], + "", [SDNPMemOperand]>; +def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS; + //===----------------------------------------------------------------------===// // GFX10 Intrinsics //===----------------------------------------------------------------------===// // llvm.amdgcn.permlane16 -def int_amdgcn_permlane16 : GCCBuiltin<"__builtin_amdgcn_permlane16">, +def int_amdgcn_permlane16 : ClangBuiltin<"__builtin_amdgcn_permlane16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg>, ImmArg>]>; // llvm.amdgcn.permlanex16 -def int_amdgcn_permlanex16 : GCCBuiltin<"__builtin_amdgcn_permlanex16">, +def int_amdgcn_permlanex16 : ClangBuiltin<"__builtin_amdgcn_permlanex16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, @@ -1789,9 +1939,9 @@ def int_amdgcn_mov_dpp8 : ImmArg>]>; def int_amdgcn_s_get_waveid_in_workgroup : - GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">, + ClangBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">, Intrinsic<[llvm_i32_ty], [], - [IntrReadMem, IntrInaccessibleMemOnly, IntrWillReturn]>; + [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; class AMDGPUGlobalAtomicRtn : Intrinsic < [vt], @@ -1812,6 +1962,75 @@ def int_amdgcn_image_bvh_intersect_ray : LLVMMatchType<1>, llvm_v4i32_ty], [IntrReadMem, IntrWillReturn]>; +//===----------------------------------------------------------------------===// +// GFX11 Intrinsics +//===----------------------------------------------------------------------===// + +// llvm.amdgcn.permlane64 +def int_amdgcn_permlane64 : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn]>; + +def int_amdgcn_ds_add_gs_reg_rtn : + ClangBuiltin<"__builtin_amdgcn_ds_add_gs_reg_rtn">, + Intrinsic<[llvm_anyint_ty], [llvm_i32_ty, llvm_i32_ty], + [ImmArg>, IntrHasSideEffects, IntrWillReturn]>; + +def int_amdgcn_ds_sub_gs_reg_rtn : + ClangBuiltin<"__builtin_amdgcn_ds_sub_gs_reg_rtn">, + Intrinsic<[llvm_anyint_ty], [llvm_i32_ty, llvm_i32_ty], + [ImmArg>, IntrHasSideEffects, IntrWillReturn]>; + +// WMMA (Wave Matrix Multiply-Accumulate) intrinsics +// +// These operations perform a matrix multiplication and accumulation of +// the form: D = A * B + C . + +class AMDGPUWmmaIntrinsic : + Intrinsic< + [CD], // %D + [ + AB, // %A + AB, // %B + LLVMMatchType<0>, // %C + ], + [IntrNoMem, IntrConvergent, IntrWillReturn] +>; + +class AMDGPUWmmaIntrinsicOPSEL : + Intrinsic< + [CD], // %D + [ + AB, // %A + AB, // %B + LLVMMatchType<0>, // %C + llvm_i1_ty, // %high + ], + [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg>] +>; + +class AMDGPUWmmaIntrinsicIU : + Intrinsic< + [CD], // %D + [ + llvm_i1_ty, // %A_sign + AB, // %A + llvm_i1_ty, // %B_sign + AB, // %B + LLVMMatchType<0>, // %C + llvm_i1_ty, // %clamp + ], + [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg>, ImmArg>, ImmArg>] +>; + +def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic; +def int_amdgcn_wmma_f32_16x16x16_bf16 : AMDGPUWmmaIntrinsic; +def int_amdgcn_wmma_f16_16x16x16_f16 : AMDGPUWmmaIntrinsicOPSEL; +def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL; +def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU; +def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU; + + //===----------------------------------------------------------------------===// // Deep learning intrinsics. //===----------------------------------------------------------------------===// @@ -1819,7 +2038,7 @@ def int_amdgcn_image_bvh_intersect_ray : // f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_fdot2 : - GCCBuiltin<"__builtin_amdgcn_fdot2">, + ClangBuiltin<"__builtin_amdgcn_fdot2">, Intrinsic< [llvm_float_ty], // %r [ @@ -1831,10 +2050,53 @@ def int_amdgcn_fdot2 : [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] >; +// f16 %r = llvm.amdgcn.fdot2.f16.f16(v2f16 %a, v2f16 %b, f16 %c) +// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c +def int_amdgcn_fdot2_f16_f16 : + ClangBuiltin<"__builtin_amdgcn_fdot2_f16_f16">, + Intrinsic< + [llvm_half_ty], // %r + [ + llvm_v2f16_ty, // %a + llvm_v2f16_ty, // %b + llvm_half_ty // %c + ], + [IntrNoMem, IntrSpeculatable, IntrWillReturn] + >; + +// bf16 %r = llvm.amdgcn.fdot2.bf16.bf16(v2bf16 %a, v2bf16 %b, bf16 %c) +// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c +def int_amdgcn_fdot2_bf16_bf16 : + ClangBuiltin<"__builtin_amdgcn_fdot2_bf16_bf16">, + Intrinsic< + [llvm_i16_ty], // %r + [ + llvm_v2i16_ty, // %a + llvm_v2i16_ty, // %b + llvm_i16_ty // %c + ], + [IntrNoMem, IntrSpeculatable, IntrWillReturn] + >; + +// f32 %r = llvm.amdgcn.fdot2.f32.bf16(v2bf16 %a, v2bf16 %b, f32 %c, i1 %clamp) +// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c +def int_amdgcn_fdot2_f32_bf16 : + ClangBuiltin<"__builtin_amdgcn_fdot2_f32_bf16">, + Intrinsic< + [llvm_float_ty], // %r + [ + llvm_v2i16_ty, // %a + llvm_v2i16_ty, // %b + llvm_float_ty, // %c + llvm_i1_ty // %clamp + ], + [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + >; + // i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_sdot2 : - GCCBuiltin<"__builtin_amdgcn_sdot2">, + ClangBuiltin<"__builtin_amdgcn_sdot2">, Intrinsic< [llvm_i32_ty], // %r [ @@ -1849,7 +2111,7 @@ def int_amdgcn_sdot2 : // u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_udot2 : - GCCBuiltin<"__builtin_amdgcn_udot2">, + ClangBuiltin<"__builtin_amdgcn_udot2">, Intrinsic< [llvm_i32_ty], // %r [ @@ -1864,7 +2126,7 @@ def int_amdgcn_udot2 : // i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c def int_amdgcn_sdot4 : - GCCBuiltin<"__builtin_amdgcn_sdot4">, + ClangBuiltin<"__builtin_amdgcn_sdot4">, Intrinsic< [llvm_i32_ty], // %r [ @@ -1879,7 +2141,7 @@ def int_amdgcn_sdot4 : // u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c def int_amdgcn_udot4 : - GCCBuiltin<"__builtin_amdgcn_udot4">, + ClangBuiltin<"__builtin_amdgcn_udot4">, Intrinsic< [llvm_i32_ty], // %r [ @@ -1891,11 +2153,32 @@ def int_amdgcn_udot4 : [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] >; +// i32 %r = llvm.amdgcn.sudot4(i1 %a_sign, v4i8 (as i32) %a, i1 %b_sign, v4i8 (as i32) %b, i32 %c, i1 %clamp) +// Treat input as signed (_sign = 1) or unsigned (_sign = 0). +// a[i in 0. . . 3] = (%a_sign ? a.i8[i] : promoteToSigned(a.u8[i])); +// b[i in 0. . . 3] = (%b_sign ? b.i8[i] : promoteToSigned(b.u8[i])); +// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c +def int_amdgcn_sudot4 : + ClangBuiltin<"__builtin_amdgcn_sudot4">, + Intrinsic< + [llvm_i32_ty], // %r + [ + llvm_i1_ty, // %a_sign + llvm_i32_ty, // %a + llvm_i1_ty, // %b_sign + llvm_i32_ty, // %b + llvm_i32_ty, // %c + llvm_i1_ty // %clamp + ], + [IntrNoMem, IntrSpeculatable, IntrWillReturn, + ImmArg>, ImmArg>, ImmArg>] + >; + // i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c def int_amdgcn_sdot8 : - GCCBuiltin<"__builtin_amdgcn_sdot8">, + ClangBuiltin<"__builtin_amdgcn_sdot8">, Intrinsic< [llvm_i32_ty], // %r [ @@ -1911,7 +2194,7 @@ def int_amdgcn_sdot8 : // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c def int_amdgcn_udot8 : - GCCBuiltin<"__builtin_amdgcn_udot8">, + ClangBuiltin<"__builtin_amdgcn_udot8">, Intrinsic< [llvm_i32_ty], // %r [ @@ -1923,6 +2206,28 @@ def int_amdgcn_udot8 : [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] >; +// i32 %r = llvm.amdgcn.sudot8(i1 %a_sign, v8i4 (as i32) %a, i1 %b_sign, v8i4 (as i32) %b, i32 %c, i1 %clamp) +// Treat input as signed (_sign = 1) or unsigned (_sign = 0). +// a[i in 0. . . 7] = (%a_sign ? a.i4[i] : promoteToSigned(a.u4[i])); +// b[i in 0. . . 7] = (%b_sign ? b.i4[i] : promoteToSigned(b.u4[i])); +// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + +// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c + def int_amdgcn_sudot8 : + ClangBuiltin<"__builtin_amdgcn_sudot8">, + Intrinsic< + [llvm_i32_ty], // %r + [ + llvm_i1_ty, // %a_sign + llvm_i32_ty, // %a + llvm_i1_ty, // %b_sign + llvm_i32_ty, // %b + llvm_i32_ty, // %c + llvm_i1_ty // %clamp + ], + [IntrNoMem, IntrSpeculatable, IntrWillReturn, + ImmArg>, ImmArg>, ImmArg>] + >; + //===----------------------------------------------------------------------===// // gfx908 intrinsics // ===----------------------------------------------------------------------===// @@ -1931,7 +2236,7 @@ def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicRtn; // llvm.amdgcn.mfma.*.* vdst, srcA, srcB, srcC, cbsz, abid, blgp class AMDGPUMfmaIntrinsic : - GCCBuiltin, + ClangBuiltin, Intrinsic<[DestTy], [SrcABTy, SrcABTy, DestTy, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], @@ -1975,9 +2280,46 @@ def int_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUMfmaIntrinsic; def int_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUMfmaIntrinsic; +// Note: in gfx940 BLGP argument is replaced by NEG bitfield in the DGEMM MFMA. +// Three bits corresponding to the neg modifier applied to the respective +// source operand. def int_amdgcn_mfma_f64_16x16x4f64 : AMDGPUMfmaIntrinsic; def int_amdgcn_mfma_f64_4x4x4f64 : AMDGPUMfmaIntrinsic; +//===----------------------------------------------------------------------===// +// gfx940 intrinsics +// ===----------------------------------------------------------------------===// + +// bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm. +def int_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUGlobalAtomicRtn; +def int_amdgcn_flat_atomic_fadd_v2bf16 : AMDGPUGlobalAtomicRtn; +def int_amdgcn_ds_fadd_v2bf16 : Intrinsic< + [llvm_v2i16_ty], + [LLVMQualPointerType, llvm_v2i16_ty], + [IntrArgMemOnly, IntrWillReturn, NoCapture>]>, + ClangBuiltin<"__builtin_amdgcn_ds_atomic_fadd_v2bf16">; + +def int_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_16x16x8_xf32 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_32x32x4_xf32 : AMDGPUMfmaIntrinsic; + +// llvm.amdgcn.smfmac.?32.* vdst, srcA, srcB, srcC, index, cbsz, abid +class AMDGPUMSmfmacIntrinsic : + ClangBuiltin, + Intrinsic<[DestTy], + [SrcA, SrcB, DestTy, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, IntrWillReturn, + ImmArg>, ImmArg>]>; + +def int_amdgcn_smfmac_f32_16x16x32_f16 : AMDGPUMSmfmacIntrinsic; +def int_amdgcn_smfmac_f32_32x32x16_f16 : AMDGPUMSmfmacIntrinsic; +def int_amdgcn_smfmac_f32_16x16x32_bf16 : AMDGPUMSmfmacIntrinsic; +def int_amdgcn_smfmac_f32_32x32x16_bf16 : AMDGPUMSmfmacIntrinsic; +def int_amdgcn_smfmac_i32_16x16x64_i8 : AMDGPUMSmfmacIntrinsic; +def int_amdgcn_smfmac_i32_32x32x32_i8 : AMDGPUMSmfmacIntrinsic; + //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index a42484757592..3d905dbca6b9 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -22,199 +22,199 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.". def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; // 16-bit multiplications -def int_arm_smulbb : GCCBuiltin<"__builtin_arm_smulbb">, +def int_arm_smulbb : ClangBuiltin<"__builtin_arm_smulbb">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smulbt : GCCBuiltin<"__builtin_arm_smulbt">, +def int_arm_smulbt : ClangBuiltin<"__builtin_arm_smulbt">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smultb : GCCBuiltin<"__builtin_arm_smultb">, +def int_arm_smultb : ClangBuiltin<"__builtin_arm_smultb">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smultt : GCCBuiltin<"__builtin_arm_smultt">, +def int_arm_smultt : ClangBuiltin<"__builtin_arm_smultt">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smulwb : GCCBuiltin<"__builtin_arm_smulwb">, +def int_arm_smulwb : ClangBuiltin<"__builtin_arm_smulwb">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smulwt : GCCBuiltin<"__builtin_arm_smulwt">, +def int_arm_smulwt : ClangBuiltin<"__builtin_arm_smulwt">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; //===----------------------------------------------------------------------===// // Saturating Arithmetic -def int_arm_qadd : GCCBuiltin<"__builtin_arm_qadd">, +def int_arm_qadd : ClangBuiltin<"__builtin_arm_qadd">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [Commutative, IntrNoMem]>; -def int_arm_qsub : GCCBuiltin<"__builtin_arm_qsub">, +def int_arm_qsub : ClangBuiltin<"__builtin_arm_qsub">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_ssat : GCCBuiltin<"__builtin_arm_ssat">, +def int_arm_ssat : ClangBuiltin<"__builtin_arm_ssat">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_usat : GCCBuiltin<"__builtin_arm_usat">, +def int_arm_usat : ClangBuiltin<"__builtin_arm_usat">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Accumulating multiplications -def int_arm_smlabb : GCCBuiltin<"__builtin_arm_smlabb">, +def int_arm_smlabb : ClangBuiltin<"__builtin_arm_smlabb">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smlabt : GCCBuiltin<"__builtin_arm_smlabt">, +def int_arm_smlabt : ClangBuiltin<"__builtin_arm_smlabt">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smlatb : GCCBuiltin<"__builtin_arm_smlatb">, +def int_arm_smlatb : ClangBuiltin<"__builtin_arm_smlatb">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smlatt : GCCBuiltin<"__builtin_arm_smlatt">, +def int_arm_smlatt : ClangBuiltin<"__builtin_arm_smlatt">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smlawb : GCCBuiltin<"__builtin_arm_smlawb">, +def int_arm_smlawb : ClangBuiltin<"__builtin_arm_smlawb">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smlawt : GCCBuiltin<"__builtin_arm_smlawt">, +def int_arm_smlawt : ClangBuiltin<"__builtin_arm_smlawt">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Parallel 16-bit saturation -def int_arm_ssat16 : GCCBuiltin<"__builtin_arm_ssat16">, +def int_arm_ssat16 : ClangBuiltin<"__builtin_arm_ssat16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_usat16 : GCCBuiltin<"__builtin_arm_usat16">, +def int_arm_usat16 : ClangBuiltin<"__builtin_arm_usat16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Packing and unpacking -def int_arm_sxtab16 : GCCBuiltin<"__builtin_arm_sxtab16">, +def int_arm_sxtab16 : ClangBuiltin<"__builtin_arm_sxtab16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_sxtb16 : GCCBuiltin<"__builtin_arm_sxtb16">, +def int_arm_sxtb16 : ClangBuiltin<"__builtin_arm_sxtb16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; -def int_arm_uxtab16 : GCCBuiltin<"__builtin_arm_uxtab16">, +def int_arm_uxtab16 : ClangBuiltin<"__builtin_arm_uxtab16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_uxtb16 : GCCBuiltin<"__builtin_arm_uxtb16">, +def int_arm_uxtb16 : ClangBuiltin<"__builtin_arm_uxtb16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; // Parallel selection, reads the GE flags. -def int_arm_sel : GCCBuiltin<"__builtin_arm_sel">, +def int_arm_sel : ClangBuiltin<"__builtin_arm_sel">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; // Parallel 8-bit addition and subtraction -def int_arm_qadd8 : GCCBuiltin<"__builtin_arm_qadd8">, +def int_arm_qadd8 : ClangBuiltin<"__builtin_arm_qadd8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_qsub8 : GCCBuiltin<"__builtin_arm_qsub8">, +def int_arm_qsub8 : ClangBuiltin<"__builtin_arm_qsub8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Writes to the GE bits. -def int_arm_sadd8 : GCCBuiltin<"__builtin_arm_sadd8">, +def int_arm_sadd8 : ClangBuiltin<"__builtin_arm_sadd8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; -def int_arm_shadd8 : GCCBuiltin<"__builtin_arm_shadd8">, +def int_arm_shadd8 : ClangBuiltin<"__builtin_arm_shadd8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_shsub8 : GCCBuiltin<"__builtin_arm_shsub8">, +def int_arm_shsub8 : ClangBuiltin<"__builtin_arm_shsub8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Writes to the GE bits. -def int_arm_ssub8 : GCCBuiltin<"__builtin_arm_ssub8">, +def int_arm_ssub8 : ClangBuiltin<"__builtin_arm_ssub8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; // Writes to the GE bits. -def int_arm_uadd8 : GCCBuiltin<"__builtin_arm_uadd8">, +def int_arm_uadd8 : ClangBuiltin<"__builtin_arm_uadd8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; -def int_arm_uhadd8 : GCCBuiltin<"__builtin_arm_uhadd8">, +def int_arm_uhadd8 : ClangBuiltin<"__builtin_arm_uhadd8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_uhsub8 : GCCBuiltin<"__builtin_arm_uhsub8">, +def int_arm_uhsub8 : ClangBuiltin<"__builtin_arm_uhsub8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_uqadd8 : GCCBuiltin<"__builtin_arm_uqadd8">, +def int_arm_uqadd8 : ClangBuiltin<"__builtin_arm_uqadd8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_uqsub8 : GCCBuiltin<"__builtin_arm_uqsub8">, +def int_arm_uqsub8 : ClangBuiltin<"__builtin_arm_uqsub8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Writes to the GE bits. -def int_arm_usub8 : GCCBuiltin<"__builtin_arm_usub8">, +def int_arm_usub8 : ClangBuiltin<"__builtin_arm_usub8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; // Sum of 8-bit absolute differences -def int_arm_usad8 : GCCBuiltin<"__builtin_arm_usad8">, +def int_arm_usad8 : ClangBuiltin<"__builtin_arm_usad8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_usada8 : GCCBuiltin<"__builtin_arm_usada8">, +def int_arm_usada8 : ClangBuiltin<"__builtin_arm_usada8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Parallel 16-bit addition and subtraction -def int_arm_qadd16 : GCCBuiltin<"__builtin_arm_qadd16">, +def int_arm_qadd16 : ClangBuiltin<"__builtin_arm_qadd16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_qasx : GCCBuiltin<"__builtin_arm_qasx">, +def int_arm_qasx : ClangBuiltin<"__builtin_arm_qasx">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_qsax : GCCBuiltin<"__builtin_arm_qsax">, +def int_arm_qsax : ClangBuiltin<"__builtin_arm_qsax">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_qsub16 : GCCBuiltin<"__builtin_arm_qsub16">, +def int_arm_qsub16 : ClangBuiltin<"__builtin_arm_qsub16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Writes to the GE bits. -def int_arm_sadd16 : GCCBuiltin<"__builtin_arm_sadd16">, +def int_arm_sadd16 : ClangBuiltin<"__builtin_arm_sadd16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; // Writes to the GE bits. -def int_arm_sasx : GCCBuiltin<"__builtin_arm_sasx">, +def int_arm_sasx : ClangBuiltin<"__builtin_arm_sasx">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; -def int_arm_shadd16 : GCCBuiltin<"__builtin_arm_shadd16">, +def int_arm_shadd16 : ClangBuiltin<"__builtin_arm_shadd16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_shasx : GCCBuiltin<"__builtin_arm_shasx">, +def int_arm_shasx : ClangBuiltin<"__builtin_arm_shasx">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_shsax : GCCBuiltin<"__builtin_arm_shsax">, +def int_arm_shsax : ClangBuiltin<"__builtin_arm_shsax">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_shsub16 : GCCBuiltin<"__builtin_arm_shsub16">, +def int_arm_shsub16 : ClangBuiltin<"__builtin_arm_shsub16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Writes to the GE bits. -def int_arm_ssax : GCCBuiltin<"__builtin_arm_ssax">, +def int_arm_ssax : ClangBuiltin<"__builtin_arm_ssax">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; // Writes to the GE bits. -def int_arm_ssub16 : GCCBuiltin<"__builtin_arm_ssub16">, +def int_arm_ssub16 : ClangBuiltin<"__builtin_arm_ssub16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; // Writes to the GE bits. -def int_arm_uadd16 : GCCBuiltin<"__builtin_arm_uadd16">, +def int_arm_uadd16 : ClangBuiltin<"__builtin_arm_uadd16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; // Writes to the GE bits. -def int_arm_uasx : GCCBuiltin<"__builtin_arm_uasx">, +def int_arm_uasx : ClangBuiltin<"__builtin_arm_uasx">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; -def int_arm_uhadd16 : GCCBuiltin<"__builtin_arm_uhadd16">, +def int_arm_uhadd16 : ClangBuiltin<"__builtin_arm_uhadd16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_uhasx : GCCBuiltin<"__builtin_arm_uhasx">, +def int_arm_uhasx : ClangBuiltin<"__builtin_arm_uhasx">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_uhsax : GCCBuiltin<"__builtin_arm_uhsax">, +def int_arm_uhsax : ClangBuiltin<"__builtin_arm_uhsax">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_uhsub16 : GCCBuiltin<"__builtin_arm_uhsub16">, +def int_arm_uhsub16 : ClangBuiltin<"__builtin_arm_uhsub16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_uqadd16 : GCCBuiltin<"__builtin_arm_uqadd16">, +def int_arm_uqadd16 : ClangBuiltin<"__builtin_arm_uqadd16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_uqasx : GCCBuiltin<"__builtin_arm_uqasx">, +def int_arm_uqasx : ClangBuiltin<"__builtin_arm_uqasx">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_uqsax : GCCBuiltin<"__builtin_arm_uqsax">, +def int_arm_uqsax : ClangBuiltin<"__builtin_arm_uqsax">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_uqsub16 : GCCBuiltin<"__builtin_arm_uqsub16">, +def int_arm_uqsub16 : ClangBuiltin<"__builtin_arm_uqsub16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Writes to the GE bits. -def int_arm_usax : GCCBuiltin<"__builtin_arm_usax">, +def int_arm_usax : ClangBuiltin<"__builtin_arm_usax">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; // Writes to the GE bits. -def int_arm_usub16 : GCCBuiltin<"__builtin_arm_usub16">, +def int_arm_usub16 : ClangBuiltin<"__builtin_arm_usub16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; // Parallel 16-bit multiplication -def int_arm_smlad : GCCBuiltin<"__builtin_arm_smlad">, +def int_arm_smlad : ClangBuiltin<"__builtin_arm_smlad">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smladx : GCCBuiltin<"__builtin_arm_smladx">, +def int_arm_smladx : ClangBuiltin<"__builtin_arm_smladx">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smlald : GCCBuiltin<"__builtin_arm_smlald">, +def int_arm_smlald : ClangBuiltin<"__builtin_arm_smlald">, Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; -def int_arm_smlaldx : GCCBuiltin<"__builtin_arm_smlaldx">, +def int_arm_smlaldx : ClangBuiltin<"__builtin_arm_smlaldx">, Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; -def int_arm_smlsd : GCCBuiltin<"__builtin_arm_smlsd">, +def int_arm_smlsd : ClangBuiltin<"__builtin_arm_smlsd">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smlsdx : GCCBuiltin<"__builtin_arm_smlsdx">, +def int_arm_smlsdx : ClangBuiltin<"__builtin_arm_smlsdx">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smlsld : GCCBuiltin<"__builtin_arm_smlsld">, +def int_arm_smlsld : ClangBuiltin<"__builtin_arm_smlsld">, Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; -def int_arm_smlsldx : GCCBuiltin<"__builtin_arm_smlsldx">, +def int_arm_smlsldx : ClangBuiltin<"__builtin_arm_smlsldx">, Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; -def int_arm_smuad : GCCBuiltin<"__builtin_arm_smuad">, +def int_arm_smuad : ClangBuiltin<"__builtin_arm_smuad">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smuadx : GCCBuiltin<"__builtin_arm_smuadx">, +def int_arm_smuadx : ClangBuiltin<"__builtin_arm_smuadx">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smusd : GCCBuiltin<"__builtin_arm_smusd">, +def int_arm_smusd : ClangBuiltin<"__builtin_arm_smusd">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm_smusdx : GCCBuiltin<"__builtin_arm_smusdx">, +def int_arm_smusdx : ClangBuiltin<"__builtin_arm_smusdx">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; @@ -239,19 +239,19 @@ def int_arm_ldaexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>; //===----------------------------------------------------------------------===// // Data barrier instructions -def int_arm_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">, +def int_arm_dmb : ClangBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">, Intrinsic<[], [llvm_i32_ty]>; -def int_arm_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">, +def int_arm_dsb : ClangBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">, Intrinsic<[], [llvm_i32_ty]>; -def int_arm_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, +def int_arm_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, Intrinsic<[], [llvm_i32_ty]>; //===----------------------------------------------------------------------===// // VFP -def int_arm_get_fpscr : GCCBuiltin<"__builtin_arm_get_fpscr">, +def int_arm_get_fpscr : ClangBuiltin<"__builtin_arm_get_fpscr">, Intrinsic<[llvm_i32_ty], [], []>; -def int_arm_set_fpscr : GCCBuiltin<"__builtin_arm_set_fpscr">, +def int_arm_set_fpscr : ClangBuiltin<"__builtin_arm_set_fpscr">, Intrinsic<[], [llvm_i32_ty], []>; def int_arm_vcvtr : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty], [IntrNoMem]>; @@ -261,47 +261,47 @@ def int_arm_vcvtru : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty], //===----------------------------------------------------------------------===// // Coprocessor -def int_arm_ldc : GCCBuiltin<"__builtin_arm_ldc">, +def int_arm_ldc : ClangBuiltin<"__builtin_arm_ldc">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; -def int_arm_ldcl : GCCBuiltin<"__builtin_arm_ldcl">, +def int_arm_ldcl : ClangBuiltin<"__builtin_arm_ldcl">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; -def int_arm_ldc2 : GCCBuiltin<"__builtin_arm_ldc2">, +def int_arm_ldc2 : ClangBuiltin<"__builtin_arm_ldc2">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; -def int_arm_ldc2l : GCCBuiltin<"__builtin_arm_ldc2l">, +def int_arm_ldc2l : ClangBuiltin<"__builtin_arm_ldc2l">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; -def int_arm_stc : GCCBuiltin<"__builtin_arm_stc">, +def int_arm_stc : ClangBuiltin<"__builtin_arm_stc">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; -def int_arm_stcl : GCCBuiltin<"__builtin_arm_stcl">, +def int_arm_stcl : ClangBuiltin<"__builtin_arm_stcl">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; -def int_arm_stc2 : GCCBuiltin<"__builtin_arm_stc2">, +def int_arm_stc2 : ClangBuiltin<"__builtin_arm_stc2">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; -def int_arm_stc2l : GCCBuiltin<"__builtin_arm_stc2l">, +def int_arm_stc2l : ClangBuiltin<"__builtin_arm_stc2l">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; // Move to coprocessor -def int_arm_mcr : GCCBuiltin<"__builtin_arm_mcr">, +def int_arm_mcr : ClangBuiltin<"__builtin_arm_mcr">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; -def int_arm_mcr2 : GCCBuiltin<"__builtin_arm_mcr2">, +def int_arm_mcr2 : ClangBuiltin<"__builtin_arm_mcr2">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; // Move from coprocessor -def int_arm_mrc : GCCBuiltin<"__builtin_arm_mrc">, +def int_arm_mrc : ClangBuiltin<"__builtin_arm_mrc">, MSBuiltin<"_MoveFromCoprocessor">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; -def int_arm_mrc2 : GCCBuiltin<"__builtin_arm_mrc2">, +def int_arm_mrc2 : ClangBuiltin<"__builtin_arm_mrc2">, MSBuiltin<"_MoveFromCoprocessor2">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; // Coprocessor data processing -def int_arm_cdp : GCCBuiltin<"__builtin_arm_cdp">, +def int_arm_cdp : ClangBuiltin<"__builtin_arm_cdp">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; -def int_arm_cdp2 : GCCBuiltin<"__builtin_arm_cdp2">, +def int_arm_cdp2 : ClangBuiltin<"__builtin_arm_cdp2">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; @@ -335,13 +335,13 @@ def int_arm_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], //===----------------------------------------------------------------------===// // CMSE -def int_arm_cmse_tt : GCCBuiltin<"__builtin_arm_cmse_TT">, +def int_arm_cmse_tt : ClangBuiltin<"__builtin_arm_cmse_TT">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; -def int_arm_cmse_ttt : GCCBuiltin<"__builtin_arm_cmse_TTT">, +def int_arm_cmse_ttt : ClangBuiltin<"__builtin_arm_cmse_TTT">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; -def int_arm_cmse_tta : GCCBuiltin<"__builtin_arm_cmse_TTA">, +def int_arm_cmse_tta : ClangBuiltin<"__builtin_arm_cmse_TTA">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; -def int_arm_cmse_ttat : GCCBuiltin<"__builtin_arm_cmse_TTAT">, +def int_arm_cmse_ttat : ClangBuiltin<"__builtin_arm_cmse_TTAT">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; //===----------------------------------------------------------------------===// @@ -1158,7 +1158,7 @@ defm int_arm_mve_vabav: MVEPredicated< [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], llvm_anyvector_ty>; -// The following 3 instrinsics are MVE vector reductions with two vector +// The following 3 intrinsics are MVE vector reductions with two vector // operands. // The first 3 operands are boolean flags (must be compile-time constants): // * unsigned - the instruction operates on vectors of unsigned values and diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td index a6bd6f841aab..8916b60d2be3 100644 --- a/llvm/include/llvm/IR/IntrinsicsBPF.td +++ b/llvm/include/llvm/IR/IntrinsicsBPF.td @@ -12,29 +12,29 @@ // Specialized loads from packet let TargetPrefix = "bpf" in { // All intrinsics start with "llvm.bpf." - def int_bpf_load_byte : GCCBuiltin<"__builtin_bpf_load_byte">, + def int_bpf_load_byte : ClangBuiltin<"__builtin_bpf_load_byte">, Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>; - def int_bpf_load_half : GCCBuiltin<"__builtin_bpf_load_half">, + def int_bpf_load_half : ClangBuiltin<"__builtin_bpf_load_half">, Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>; - def int_bpf_load_word : GCCBuiltin<"__builtin_bpf_load_word">, + def int_bpf_load_word : ClangBuiltin<"__builtin_bpf_load_word">, Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>; - def int_bpf_pseudo : GCCBuiltin<"__builtin_bpf_pseudo">, + def int_bpf_pseudo : ClangBuiltin<"__builtin_bpf_pseudo">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>; - def int_bpf_preserve_field_info : GCCBuiltin<"__builtin_bpf_preserve_field_info">, + def int_bpf_preserve_field_info : ClangBuiltin<"__builtin_bpf_preserve_field_info">, Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i64_ty], [IntrNoMem, ImmArg>]>; - def int_bpf_btf_type_id : GCCBuiltin<"__builtin_bpf_btf_type_id">, + def int_bpf_btf_type_id : ClangBuiltin<"__builtin_bpf_btf_type_id">, Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; - def int_bpf_preserve_type_info : GCCBuiltin<"__builtin_bpf_preserve_type_info">, + def int_bpf_preserve_type_info : ClangBuiltin<"__builtin_bpf_preserve_type_info">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; - def int_bpf_preserve_enum_value : GCCBuiltin<"__builtin_bpf_preserve_enum_value">, + def int_bpf_preserve_enum_value : ClangBuiltin<"__builtin_bpf_preserve_enum_value">, Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_ptr_ty, llvm_i64_ty], [IntrNoMem]>; - def int_bpf_passthrough : GCCBuiltin<"__builtin_bpf_passthrough">, + def int_bpf_passthrough : ClangBuiltin<"__builtin_bpf_passthrough">, Intrinsic<[llvm_any_ty], [llvm_i32_ty, llvm_any_ty], [IntrNoMem]>; - def int_bpf_compare : GCCBuiltin<"__builtin_bpf_compare">, + def int_bpf_compare : ClangBuiltin<"__builtin_bpf_compare">, Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_anyint_ty, llvm_anyint_ty], [IntrNoMem]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td new file mode 100644 index 000000000000..4a21cf1eb7fc --- /dev/null +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -0,0 +1,20 @@ +//===- IntrinsicsDirectX.td - Defines DirectX intrinsics ---*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the DirectX-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "dxil" in { + +def int_dxil_thread_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; +def int_dxil_group_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; +def int_dxil_thread_id_in_group : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; +def int_dxil_flattened_thread_id_in_group : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrWillReturn]>; + +} diff --git a/llvm/include/llvm/IR/IntrinsicsHexagon.td b/llvm/include/llvm/IR/IntrinsicsHexagon.td index 212262c28706..52c29ef31f0a 100644 --- a/llvm/include/llvm/IR/IntrinsicsHexagon.td +++ b/llvm/include/llvm/IR/IntrinsicsHexagon.td @@ -18,7 +18,7 @@ let TargetPrefix = "hexagon" in { class Hexagon_Intrinsic ret_types, list param_types, list properties> - : GCCBuiltin, + : ClangBuiltin, Intrinsic; /// Hexagon_NonGCC_Intrinsic - Base class for bitcode convertible Hexagon @@ -404,4 +404,15 @@ def int_hexagon_V6_vmaskedstorenq_128B: Hexagon_custom_vms_Intrinsic_128B; def int_hexagon_V6_vmaskedstorentq_128B: Hexagon_custom_vms_Intrinsic_128B; def int_hexagon_V6_vmaskedstorentnq_128B: Hexagon_custom_vms_Intrinsic_128B; + +// Intrinsic for instrumentation based profiling using a custom handler. The +// name of the handler is passed as the first operand to the intrinsic. The +// handler can take only one int32 input which is passed as the second +// operand to the intrinsic. +def int_hexagon_instrprof_custom + : Hexagon_NonGCC_Intrinsic<[], + [llvm_ptr_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly]>; + + include "llvm/IR/IntrinsicsHexagonDep.td" diff --git a/llvm/include/llvm/IR/IntrinsicsMips.td b/llvm/include/llvm/IR/IntrinsicsMips.td index 271142ca7788..3056f37b9d87 100644 --- a/llvm/include/llvm/IR/IntrinsicsMips.td +++ b/llvm/include/llvm/IR/IntrinsicsMips.td @@ -24,370 +24,370 @@ let TargetPrefix = "mips" in { // All intrinsics start with "llvm.mips.". //===----------------------------------------------------------------------===// // Addition/subtraction -def int_mips_addu_qb : GCCBuiltin<"__builtin_mips_addu_qb">, +def int_mips_addu_qb : ClangBuiltin<"__builtin_mips_addu_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative, IntrNoMem]>; -def int_mips_addu_s_qb : GCCBuiltin<"__builtin_mips_addu_s_qb">, +def int_mips_addu_s_qb : ClangBuiltin<"__builtin_mips_addu_s_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative, IntrNoMem]>; -def int_mips_subu_qb : GCCBuiltin<"__builtin_mips_subu_qb">, +def int_mips_subu_qb : ClangBuiltin<"__builtin_mips_subu_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>; -def int_mips_subu_s_qb : GCCBuiltin<"__builtin_mips_subu_s_qb">, +def int_mips_subu_s_qb : ClangBuiltin<"__builtin_mips_subu_s_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>; -def int_mips_addq_ph : GCCBuiltin<"__builtin_mips_addq_ph">, +def int_mips_addq_ph : ClangBuiltin<"__builtin_mips_addq_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative, IntrNoMem]>; -def int_mips_addq_s_ph : GCCBuiltin<"__builtin_mips_addq_s_ph">, +def int_mips_addq_s_ph : ClangBuiltin<"__builtin_mips_addq_s_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative, IntrNoMem]>; -def int_mips_subq_ph : GCCBuiltin<"__builtin_mips_subq_ph">, +def int_mips_subq_ph : ClangBuiltin<"__builtin_mips_subq_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>; -def int_mips_subq_s_ph : GCCBuiltin<"__builtin_mips_subq_s_ph">, +def int_mips_subq_s_ph : ClangBuiltin<"__builtin_mips_subq_s_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>; -def int_mips_madd: GCCBuiltin<"__builtin_mips_madd">, +def int_mips_madd: ClangBuiltin<"__builtin_mips_madd">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, Commutative]>; -def int_mips_maddu: GCCBuiltin<"__builtin_mips_maddu">, +def int_mips_maddu: ClangBuiltin<"__builtin_mips_maddu">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, Commutative]>; -def int_mips_msub: GCCBuiltin<"__builtin_mips_msub">, +def int_mips_msub: ClangBuiltin<"__builtin_mips_msub">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_msubu: GCCBuiltin<"__builtin_mips_msubu">, +def int_mips_msubu: ClangBuiltin<"__builtin_mips_msubu">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_addq_s_w: GCCBuiltin<"__builtin_mips_addq_s_w">, +def int_mips_addq_s_w: ClangBuiltin<"__builtin_mips_addq_s_w">, Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [Commutative]>; -def int_mips_subq_s_w: GCCBuiltin<"__builtin_mips_subq_s_w">, +def int_mips_subq_s_w: ClangBuiltin<"__builtin_mips_subq_s_w">, Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], []>; -def int_mips_addsc: GCCBuiltin<"__builtin_mips_addsc">, +def int_mips_addsc: ClangBuiltin<"__builtin_mips_addsc">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [Commutative]>; -def int_mips_addwc: GCCBuiltin<"__builtin_mips_addwc">, +def int_mips_addwc: ClangBuiltin<"__builtin_mips_addwc">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [Commutative]>; -def int_mips_modsub: GCCBuiltin<"__builtin_mips_modsub">, +def int_mips_modsub: ClangBuiltin<"__builtin_mips_modsub">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_raddu_w_qb: GCCBuiltin<"__builtin_mips_raddu_w_qb">, +def int_mips_raddu_w_qb: ClangBuiltin<"__builtin_mips_raddu_w_qb">, Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty], [IntrNoMem]>; //===----------------------------------------------------------------------===// // Absolute value -def int_mips_absq_s_ph: GCCBuiltin<"__builtin_mips_absq_s_ph">, +def int_mips_absq_s_ph: ClangBuiltin<"__builtin_mips_absq_s_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty], []>; -def int_mips_absq_s_w: GCCBuiltin<"__builtin_mips_absq_s_w">, +def int_mips_absq_s_w: ClangBuiltin<"__builtin_mips_absq_s_w">, Intrinsic<[mips_q31_ty], [mips_q31_ty], []>; //===----------------------------------------------------------------------===// // Precision reduce/expand -def int_mips_precrq_qb_ph: GCCBuiltin<"__builtin_mips_precrq_qb_ph">, +def int_mips_precrq_qb_ph: ClangBuiltin<"__builtin_mips_precrq_qb_ph">, Intrinsic<[llvm_v4i8_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>; -def int_mips_precrqu_s_qb_ph: GCCBuiltin<"__builtin_mips_precrqu_s_qb_ph">, +def int_mips_precrqu_s_qb_ph: ClangBuiltin<"__builtin_mips_precrqu_s_qb_ph">, Intrinsic<[llvm_v4i8_ty], [mips_v2q15_ty, mips_v2q15_ty], []>; -def int_mips_precrq_ph_w: GCCBuiltin<"__builtin_mips_precrq_ph_w">, +def int_mips_precrq_ph_w: ClangBuiltin<"__builtin_mips_precrq_ph_w">, Intrinsic<[mips_v2q15_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem]>; -def int_mips_precrq_rs_ph_w: GCCBuiltin<"__builtin_mips_precrq_rs_ph_w">, +def int_mips_precrq_rs_ph_w: ClangBuiltin<"__builtin_mips_precrq_rs_ph_w">, Intrinsic<[mips_v2q15_ty], [mips_q31_ty, mips_q31_ty], []>; -def int_mips_preceq_w_phl: GCCBuiltin<"__builtin_mips_preceq_w_phl">, +def int_mips_preceq_w_phl: ClangBuiltin<"__builtin_mips_preceq_w_phl">, Intrinsic<[mips_q31_ty], [mips_v2q15_ty], [IntrNoMem]>; -def int_mips_preceq_w_phr: GCCBuiltin<"__builtin_mips_preceq_w_phr">, +def int_mips_preceq_w_phr: ClangBuiltin<"__builtin_mips_preceq_w_phr">, Intrinsic<[mips_q31_ty], [mips_v2q15_ty], [IntrNoMem]>; -def int_mips_precequ_ph_qbl: GCCBuiltin<"__builtin_mips_precequ_ph_qbl">, +def int_mips_precequ_ph_qbl: ClangBuiltin<"__builtin_mips_precequ_ph_qbl">, Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; -def int_mips_precequ_ph_qbr: GCCBuiltin<"__builtin_mips_precequ_ph_qbr">, +def int_mips_precequ_ph_qbr: ClangBuiltin<"__builtin_mips_precequ_ph_qbr">, Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; -def int_mips_precequ_ph_qbla: GCCBuiltin<"__builtin_mips_precequ_ph_qbla">, +def int_mips_precequ_ph_qbla: ClangBuiltin<"__builtin_mips_precequ_ph_qbla">, Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; -def int_mips_precequ_ph_qbra: GCCBuiltin<"__builtin_mips_precequ_ph_qbra">, +def int_mips_precequ_ph_qbra: ClangBuiltin<"__builtin_mips_precequ_ph_qbra">, Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; -def int_mips_preceu_ph_qbl: GCCBuiltin<"__builtin_mips_preceu_ph_qbl">, +def int_mips_preceu_ph_qbl: ClangBuiltin<"__builtin_mips_preceu_ph_qbl">, Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; -def int_mips_preceu_ph_qbr: GCCBuiltin<"__builtin_mips_preceu_ph_qbr">, +def int_mips_preceu_ph_qbr: ClangBuiltin<"__builtin_mips_preceu_ph_qbr">, Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; -def int_mips_preceu_ph_qbla: GCCBuiltin<"__builtin_mips_preceu_ph_qbla">, +def int_mips_preceu_ph_qbla: ClangBuiltin<"__builtin_mips_preceu_ph_qbla">, Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; -def int_mips_preceu_ph_qbra: GCCBuiltin<"__builtin_mips_preceu_ph_qbra">, +def int_mips_preceu_ph_qbra: ClangBuiltin<"__builtin_mips_preceu_ph_qbra">, Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; //===----------------------------------------------------------------------===// // Shift -def int_mips_shll_qb: GCCBuiltin<"__builtin_mips_shll_qb">, +def int_mips_shll_qb: ClangBuiltin<"__builtin_mips_shll_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], []>; -def int_mips_shrl_qb: GCCBuiltin<"__builtin_mips_shrl_qb">, +def int_mips_shrl_qb: ClangBuiltin<"__builtin_mips_shrl_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_shll_ph: GCCBuiltin<"__builtin_mips_shll_ph">, +def int_mips_shll_ph: ClangBuiltin<"__builtin_mips_shll_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], []>; -def int_mips_shll_s_ph: GCCBuiltin<"__builtin_mips_shll_s_ph">, +def int_mips_shll_s_ph: ClangBuiltin<"__builtin_mips_shll_s_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], []>; -def int_mips_shra_ph: GCCBuiltin<"__builtin_mips_shra_ph">, +def int_mips_shra_ph: ClangBuiltin<"__builtin_mips_shra_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_shra_r_ph: GCCBuiltin<"__builtin_mips_shra_r_ph">, +def int_mips_shra_r_ph: ClangBuiltin<"__builtin_mips_shra_r_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_shll_s_w: GCCBuiltin<"__builtin_mips_shll_s_w">, +def int_mips_shll_s_w: ClangBuiltin<"__builtin_mips_shll_s_w">, Intrinsic<[mips_q31_ty], [mips_q31_ty, llvm_i32_ty], []>; -def int_mips_shra_r_w: GCCBuiltin<"__builtin_mips_shra_r_w">, +def int_mips_shra_r_w: ClangBuiltin<"__builtin_mips_shra_r_w">, Intrinsic<[mips_q31_ty], [mips_q31_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_shilo: GCCBuiltin<"__builtin_mips_shilo">, +def int_mips_shilo: ClangBuiltin<"__builtin_mips_shilo">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; //===----------------------------------------------------------------------===// // Multiplication -def int_mips_muleu_s_ph_qbl: GCCBuiltin<"__builtin_mips_muleu_s_ph_qbl">, +def int_mips_muleu_s_ph_qbl: ClangBuiltin<"__builtin_mips_muleu_s_ph_qbl">, Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty, mips_v2q15_ty], []>; -def int_mips_muleu_s_ph_qbr: GCCBuiltin<"__builtin_mips_muleu_s_ph_qbr">, +def int_mips_muleu_s_ph_qbr: ClangBuiltin<"__builtin_mips_muleu_s_ph_qbr">, Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty, mips_v2q15_ty], []>; -def int_mips_mulq_rs_ph: GCCBuiltin<"__builtin_mips_mulq_rs_ph">, +def int_mips_mulq_rs_ph: ClangBuiltin<"__builtin_mips_mulq_rs_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>; -def int_mips_muleq_s_w_phl: GCCBuiltin<"__builtin_mips_muleq_s_w_phl">, +def int_mips_muleq_s_w_phl: ClangBuiltin<"__builtin_mips_muleq_s_w_phl">, Intrinsic<[mips_q31_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>; -def int_mips_muleq_s_w_phr: GCCBuiltin<"__builtin_mips_muleq_s_w_phr">, +def int_mips_muleq_s_w_phr: ClangBuiltin<"__builtin_mips_muleq_s_w_phr">, Intrinsic<[mips_q31_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>; -def int_mips_mulsaq_s_w_ph: GCCBuiltin<"__builtin_mips_mulsaq_s_w_ph">, +def int_mips_mulsaq_s_w_ph: ClangBuiltin<"__builtin_mips_mulsaq_s_w_ph">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; -def int_mips_maq_s_w_phl: GCCBuiltin<"__builtin_mips_maq_s_w_phl">, +def int_mips_maq_s_w_phl: ClangBuiltin<"__builtin_mips_maq_s_w_phl">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; -def int_mips_maq_s_w_phr: GCCBuiltin<"__builtin_mips_maq_s_w_phr">, +def int_mips_maq_s_w_phr: ClangBuiltin<"__builtin_mips_maq_s_w_phr">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; -def int_mips_maq_sa_w_phl: GCCBuiltin<"__builtin_mips_maq_sa_w_phl">, +def int_mips_maq_sa_w_phl: ClangBuiltin<"__builtin_mips_maq_sa_w_phl">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; -def int_mips_maq_sa_w_phr: GCCBuiltin<"__builtin_mips_maq_sa_w_phr">, +def int_mips_maq_sa_w_phr: ClangBuiltin<"__builtin_mips_maq_sa_w_phr">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; -def int_mips_mult: GCCBuiltin<"__builtin_mips_mult">, +def int_mips_mult: ClangBuiltin<"__builtin_mips_mult">, Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, Commutative]>; -def int_mips_multu: GCCBuiltin<"__builtin_mips_multu">, +def int_mips_multu: ClangBuiltin<"__builtin_mips_multu">, Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, Commutative]>; //===----------------------------------------------------------------------===// // Dot product with accumulate/subtract -def int_mips_dpau_h_qbl: GCCBuiltin<"__builtin_mips_dpau_h_qbl">, +def int_mips_dpau_h_qbl: ClangBuiltin<"__builtin_mips_dpau_h_qbl">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>; -def int_mips_dpau_h_qbr: GCCBuiltin<"__builtin_mips_dpau_h_qbr">, +def int_mips_dpau_h_qbr: ClangBuiltin<"__builtin_mips_dpau_h_qbr">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>; -def int_mips_dpsu_h_qbl: GCCBuiltin<"__builtin_mips_dpsu_h_qbl">, +def int_mips_dpsu_h_qbl: ClangBuiltin<"__builtin_mips_dpsu_h_qbl">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>; -def int_mips_dpsu_h_qbr: GCCBuiltin<"__builtin_mips_dpsu_h_qbr">, +def int_mips_dpsu_h_qbr: ClangBuiltin<"__builtin_mips_dpsu_h_qbr">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>; -def int_mips_dpaq_s_w_ph: GCCBuiltin<"__builtin_mips_dpaq_s_w_ph">, +def int_mips_dpaq_s_w_ph: ClangBuiltin<"__builtin_mips_dpaq_s_w_ph">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; -def int_mips_dpsq_s_w_ph: GCCBuiltin<"__builtin_mips_dpsq_s_w_ph">, +def int_mips_dpsq_s_w_ph: ClangBuiltin<"__builtin_mips_dpsq_s_w_ph">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; -def int_mips_dpaq_sa_l_w: GCCBuiltin<"__builtin_mips_dpaq_sa_l_w">, +def int_mips_dpaq_sa_l_w: ClangBuiltin<"__builtin_mips_dpaq_sa_l_w">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_q31_ty, mips_q31_ty], []>; -def int_mips_dpsq_sa_l_w: GCCBuiltin<"__builtin_mips_dpsq_sa_l_w">, +def int_mips_dpsq_sa_l_w: ClangBuiltin<"__builtin_mips_dpsq_sa_l_w">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_q31_ty, mips_q31_ty], []>; //===----------------------------------------------------------------------===// // Comparison -def int_mips_cmpu_eq_qb: GCCBuiltin<"__builtin_mips_cmpu_eq_qb">, +def int_mips_cmpu_eq_qb: ClangBuiltin<"__builtin_mips_cmpu_eq_qb">, Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>; -def int_mips_cmpu_lt_qb: GCCBuiltin<"__builtin_mips_cmpu_lt_qb">, +def int_mips_cmpu_lt_qb: ClangBuiltin<"__builtin_mips_cmpu_lt_qb">, Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], []>; -def int_mips_cmpu_le_qb: GCCBuiltin<"__builtin_mips_cmpu_le_qb">, +def int_mips_cmpu_le_qb: ClangBuiltin<"__builtin_mips_cmpu_le_qb">, Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], []>; -def int_mips_cmpgu_eq_qb: GCCBuiltin<"__builtin_mips_cmpgu_eq_qb">, +def int_mips_cmpgu_eq_qb: ClangBuiltin<"__builtin_mips_cmpgu_eq_qb">, Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>; -def int_mips_cmpgu_lt_qb: GCCBuiltin<"__builtin_mips_cmpgu_lt_qb">, +def int_mips_cmpgu_lt_qb: ClangBuiltin<"__builtin_mips_cmpgu_lt_qb">, Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], []>; -def int_mips_cmpgu_le_qb: GCCBuiltin<"__builtin_mips_cmpgu_le_qb">, +def int_mips_cmpgu_le_qb: ClangBuiltin<"__builtin_mips_cmpgu_le_qb">, Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], []>; -def int_mips_cmp_eq_ph: GCCBuiltin<"__builtin_mips_cmp_eq_ph">, +def int_mips_cmp_eq_ph: ClangBuiltin<"__builtin_mips_cmp_eq_ph">, Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>; -def int_mips_cmp_lt_ph: GCCBuiltin<"__builtin_mips_cmp_lt_ph">, +def int_mips_cmp_lt_ph: ClangBuiltin<"__builtin_mips_cmp_lt_ph">, Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], []>; -def int_mips_cmp_le_ph: GCCBuiltin<"__builtin_mips_cmp_le_ph">, +def int_mips_cmp_le_ph: ClangBuiltin<"__builtin_mips_cmp_le_ph">, Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], []>; //===----------------------------------------------------------------------===// // Extracting -def int_mips_extr_s_h: GCCBuiltin<"__builtin_mips_extr_s_h">, +def int_mips_extr_s_h: ClangBuiltin<"__builtin_mips_extr_s_h">, Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>; -def int_mips_extr_w: GCCBuiltin<"__builtin_mips_extr_w">, +def int_mips_extr_w: ClangBuiltin<"__builtin_mips_extr_w">, Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>; -def int_mips_extr_rs_w: GCCBuiltin<"__builtin_mips_extr_rs_w">, +def int_mips_extr_rs_w: ClangBuiltin<"__builtin_mips_extr_rs_w">, Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>; -def int_mips_extr_r_w: GCCBuiltin<"__builtin_mips_extr_r_w">, +def int_mips_extr_r_w: ClangBuiltin<"__builtin_mips_extr_r_w">, Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>; -def int_mips_extp: GCCBuiltin<"__builtin_mips_extp">, +def int_mips_extp: ClangBuiltin<"__builtin_mips_extp">, Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>; -def int_mips_extpdp: GCCBuiltin<"__builtin_mips_extpdp">, +def int_mips_extpdp: ClangBuiltin<"__builtin_mips_extpdp">, Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>; //===----------------------------------------------------------------------===// // Misc -def int_mips_wrdsp: GCCBuiltin<"__builtin_mips_wrdsp">, +def int_mips_wrdsp: ClangBuiltin<"__builtin_mips_wrdsp">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; -def int_mips_rddsp: GCCBuiltin<"__builtin_mips_rddsp">, +def int_mips_rddsp: ClangBuiltin<"__builtin_mips_rddsp">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem, ImmArg>]>; -def int_mips_insv: GCCBuiltin<"__builtin_mips_insv">, +def int_mips_insv: ClangBuiltin<"__builtin_mips_insv">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; -def int_mips_bitrev: GCCBuiltin<"__builtin_mips_bitrev">, +def int_mips_bitrev: ClangBuiltin<"__builtin_mips_bitrev">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; -def int_mips_packrl_ph: GCCBuiltin<"__builtin_mips_packrl_ph">, +def int_mips_packrl_ph: ClangBuiltin<"__builtin_mips_packrl_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>; -def int_mips_repl_qb: GCCBuiltin<"__builtin_mips_repl_qb">, +def int_mips_repl_qb: ClangBuiltin<"__builtin_mips_repl_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_i32_ty], [IntrNoMem]>; -def int_mips_repl_ph: GCCBuiltin<"__builtin_mips_repl_ph">, +def int_mips_repl_ph: ClangBuiltin<"__builtin_mips_repl_ph">, Intrinsic<[mips_v2q15_ty], [llvm_i32_ty], [IntrNoMem]>; -def int_mips_pick_qb: GCCBuiltin<"__builtin_mips_pick_qb">, +def int_mips_pick_qb: ClangBuiltin<"__builtin_mips_pick_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrReadMem]>; -def int_mips_pick_ph: GCCBuiltin<"__builtin_mips_pick_ph">, +def int_mips_pick_ph: ClangBuiltin<"__builtin_mips_pick_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrReadMem]>; -def int_mips_mthlip: GCCBuiltin<"__builtin_mips_mthlip">, +def int_mips_mthlip: ClangBuiltin<"__builtin_mips_mthlip">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], []>; -def int_mips_bposge32: GCCBuiltin<"__builtin_mips_bposge32">, +def int_mips_bposge32: ClangBuiltin<"__builtin_mips_bposge32">, Intrinsic<[llvm_i32_ty], [], [IntrReadMem]>; -def int_mips_lbux: GCCBuiltin<"__builtin_mips_lbux">, +def int_mips_lbux: ClangBuiltin<"__builtin_mips_lbux">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; -def int_mips_lhx: GCCBuiltin<"__builtin_mips_lhx">, +def int_mips_lhx: ClangBuiltin<"__builtin_mips_lhx">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; -def int_mips_lwx: GCCBuiltin<"__builtin_mips_lwx">, +def int_mips_lwx: ClangBuiltin<"__builtin_mips_lwx">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; //===----------------------------------------------------------------------===// // MIPS DSP Rev 2 -def int_mips_absq_s_qb: GCCBuiltin<"__builtin_mips_absq_s_qb">, +def int_mips_absq_s_qb: ClangBuiltin<"__builtin_mips_absq_s_qb">, Intrinsic<[mips_v4q7_ty], [mips_v4q7_ty], []>; -def int_mips_addqh_ph: GCCBuiltin<"__builtin_mips_addqh_ph">, +def int_mips_addqh_ph: ClangBuiltin<"__builtin_mips_addqh_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem, Commutative]>; -def int_mips_addqh_r_ph: GCCBuiltin<"__builtin_mips_addqh_r_ph">, +def int_mips_addqh_r_ph: ClangBuiltin<"__builtin_mips_addqh_r_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem, Commutative]>; -def int_mips_addqh_w: GCCBuiltin<"__builtin_mips_addqh_w">, +def int_mips_addqh_w: ClangBuiltin<"__builtin_mips_addqh_w">, Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem, Commutative]>; -def int_mips_addqh_r_w: GCCBuiltin<"__builtin_mips_addqh_r_w">, +def int_mips_addqh_r_w: ClangBuiltin<"__builtin_mips_addqh_r_w">, Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem, Commutative]>; -def int_mips_addu_ph: GCCBuiltin<"__builtin_mips_addu_ph">, +def int_mips_addu_ph: ClangBuiltin<"__builtin_mips_addu_ph">, Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], [Commutative]>; -def int_mips_addu_s_ph: GCCBuiltin<"__builtin_mips_addu_s_ph">, +def int_mips_addu_s_ph: ClangBuiltin<"__builtin_mips_addu_s_ph">, Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], [Commutative]>; -def int_mips_adduh_qb: GCCBuiltin<"__builtin_mips_adduh_qb">, +def int_mips_adduh_qb: ClangBuiltin<"__builtin_mips_adduh_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem, Commutative]>; -def int_mips_adduh_r_qb: GCCBuiltin<"__builtin_mips_adduh_r_qb">, +def int_mips_adduh_r_qb: ClangBuiltin<"__builtin_mips_adduh_r_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem, Commutative]>; -def int_mips_append: GCCBuiltin<"__builtin_mips_append">, +def int_mips_append: ClangBuiltin<"__builtin_mips_append">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_balign: GCCBuiltin<"__builtin_mips_balign">, +def int_mips_balign: ClangBuiltin<"__builtin_mips_balign">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_cmpgdu_eq_qb: GCCBuiltin<"__builtin_mips_cmpgdu_eq_qb">, +def int_mips_cmpgdu_eq_qb: ClangBuiltin<"__builtin_mips_cmpgdu_eq_qb">, Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>; -def int_mips_cmpgdu_lt_qb: GCCBuiltin<"__builtin_mips_cmpgdu_lt_qb">, +def int_mips_cmpgdu_lt_qb: ClangBuiltin<"__builtin_mips_cmpgdu_lt_qb">, Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], []>; -def int_mips_cmpgdu_le_qb: GCCBuiltin<"__builtin_mips_cmpgdu_le_qb">, +def int_mips_cmpgdu_le_qb: ClangBuiltin<"__builtin_mips_cmpgdu_le_qb">, Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], []>; -def int_mips_dpa_w_ph: GCCBuiltin<"__builtin_mips_dpa_w_ph">, +def int_mips_dpa_w_ph: ClangBuiltin<"__builtin_mips_dpa_w_ph">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty], [IntrNoMem]>; -def int_mips_dps_w_ph: GCCBuiltin<"__builtin_mips_dps_w_ph">, +def int_mips_dps_w_ph: ClangBuiltin<"__builtin_mips_dps_w_ph">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty], [IntrNoMem]>; -def int_mips_dpaqx_s_w_ph: GCCBuiltin<"__builtin_mips_dpaqx_s_w_ph">, +def int_mips_dpaqx_s_w_ph: ClangBuiltin<"__builtin_mips_dpaqx_s_w_ph">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; -def int_mips_dpaqx_sa_w_ph: GCCBuiltin<"__builtin_mips_dpaqx_sa_w_ph">, +def int_mips_dpaqx_sa_w_ph: ClangBuiltin<"__builtin_mips_dpaqx_sa_w_ph">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; -def int_mips_dpax_w_ph: GCCBuiltin<"__builtin_mips_dpax_w_ph">, +def int_mips_dpax_w_ph: ClangBuiltin<"__builtin_mips_dpax_w_ph">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty], [IntrNoMem]>; -def int_mips_dpsx_w_ph: GCCBuiltin<"__builtin_mips_dpsx_w_ph">, +def int_mips_dpsx_w_ph: ClangBuiltin<"__builtin_mips_dpsx_w_ph">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty], [IntrNoMem]>; -def int_mips_dpsqx_s_w_ph: GCCBuiltin<"__builtin_mips_dpsqx_s_w_ph">, +def int_mips_dpsqx_s_w_ph: ClangBuiltin<"__builtin_mips_dpsqx_s_w_ph">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; -def int_mips_dpsqx_sa_w_ph: GCCBuiltin<"__builtin_mips_dpsqx_sa_w_ph">, +def int_mips_dpsqx_sa_w_ph: ClangBuiltin<"__builtin_mips_dpsqx_sa_w_ph">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; -def int_mips_mul_ph: GCCBuiltin<"__builtin_mips_mul_ph">, +def int_mips_mul_ph: ClangBuiltin<"__builtin_mips_mul_ph">, Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], [Commutative]>; -def int_mips_mul_s_ph: GCCBuiltin<"__builtin_mips_mul_s_ph">, +def int_mips_mul_s_ph: ClangBuiltin<"__builtin_mips_mul_s_ph">, Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], [Commutative]>; -def int_mips_mulq_rs_w: GCCBuiltin<"__builtin_mips_mulq_rs_w">, +def int_mips_mulq_rs_w: ClangBuiltin<"__builtin_mips_mulq_rs_w">, Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [Commutative]>; -def int_mips_mulq_s_ph: GCCBuiltin<"__builtin_mips_mulq_s_ph">, +def int_mips_mulq_s_ph: ClangBuiltin<"__builtin_mips_mulq_s_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>; -def int_mips_mulq_s_w: GCCBuiltin<"__builtin_mips_mulq_s_w">, +def int_mips_mulq_s_w: ClangBuiltin<"__builtin_mips_mulq_s_w">, Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [Commutative]>; -def int_mips_mulsa_w_ph: GCCBuiltin<"__builtin_mips_mulsa_w_ph">, +def int_mips_mulsa_w_ph: ClangBuiltin<"__builtin_mips_mulsa_w_ph">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty], [IntrNoMem]>; -def int_mips_precr_qb_ph: GCCBuiltin<"__builtin_mips_precr_qb_ph">, +def int_mips_precr_qb_ph: ClangBuiltin<"__builtin_mips_precr_qb_ph">, Intrinsic<[llvm_v4i8_ty], [llvm_v2i16_ty, llvm_v2i16_ty], []>; -def int_mips_precr_sra_ph_w: GCCBuiltin<"__builtin_mips_precr_sra_ph_w">, +def int_mips_precr_sra_ph_w: ClangBuiltin<"__builtin_mips_precr_sra_ph_w">, Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_precr_sra_r_ph_w: GCCBuiltin<"__builtin_mips_precr_sra_r_ph_w">, +def int_mips_precr_sra_r_ph_w: ClangBuiltin<"__builtin_mips_precr_sra_r_ph_w">, Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_prepend: GCCBuiltin<"__builtin_mips_prepend">, +def int_mips_prepend: ClangBuiltin<"__builtin_mips_prepend">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_shra_qb: GCCBuiltin<"__builtin_mips_shra_qb">, +def int_mips_shra_qb: ClangBuiltin<"__builtin_mips_shra_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_shra_r_qb: GCCBuiltin<"__builtin_mips_shra_r_qb">, +def int_mips_shra_r_qb: ClangBuiltin<"__builtin_mips_shra_r_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_shrl_ph: GCCBuiltin<"__builtin_mips_shrl_ph">, +def int_mips_shrl_ph: ClangBuiltin<"__builtin_mips_shrl_ph">, Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_subqh_ph: GCCBuiltin<"__builtin_mips_subqh_ph">, +def int_mips_subqh_ph: ClangBuiltin<"__builtin_mips_subqh_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>; -def int_mips_subqh_r_ph: GCCBuiltin<"__builtin_mips_subqh_r_ph">, +def int_mips_subqh_r_ph: ClangBuiltin<"__builtin_mips_subqh_r_ph">, Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>; -def int_mips_subqh_w: GCCBuiltin<"__builtin_mips_subqh_w">, +def int_mips_subqh_w: ClangBuiltin<"__builtin_mips_subqh_w">, Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem]>; -def int_mips_subqh_r_w: GCCBuiltin<"__builtin_mips_subqh_r_w">, +def int_mips_subqh_r_w: ClangBuiltin<"__builtin_mips_subqh_r_w">, Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem]>; -def int_mips_subu_ph: GCCBuiltin<"__builtin_mips_subu_ph">, +def int_mips_subu_ph: ClangBuiltin<"__builtin_mips_subu_ph">, Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], []>; -def int_mips_subu_s_ph: GCCBuiltin<"__builtin_mips_subu_s_ph">, +def int_mips_subu_s_ph: ClangBuiltin<"__builtin_mips_subu_s_ph">, Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], []>; -def int_mips_subuh_qb: GCCBuiltin<"__builtin_mips_subuh_qb">, +def int_mips_subuh_qb: ClangBuiltin<"__builtin_mips_subuh_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>; -def int_mips_subuh_r_qb: GCCBuiltin<"__builtin_mips_subuh_r_qb">, +def int_mips_subuh_r_qb: ClangBuiltin<"__builtin_mips_subuh_r_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>; //===----------------------------------------------------------------------===// @@ -396,1389 +396,1389 @@ def int_mips_subuh_r_qb: GCCBuiltin<"__builtin_mips_subuh_r_qb">, //===----------------------------------------------------------------------===// // Addition/subtraction -def int_mips_add_a_b : GCCBuiltin<"__builtin_msa_add_a_b">, +def int_mips_add_a_b : ClangBuiltin<"__builtin_msa_add_a_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [Commutative, IntrNoMem]>; -def int_mips_add_a_h : GCCBuiltin<"__builtin_msa_add_a_h">, +def int_mips_add_a_h : ClangBuiltin<"__builtin_msa_add_a_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [Commutative, IntrNoMem]>; -def int_mips_add_a_w : GCCBuiltin<"__builtin_msa_add_a_w">, +def int_mips_add_a_w : ClangBuiltin<"__builtin_msa_add_a_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [Commutative, IntrNoMem]>; -def int_mips_add_a_d : GCCBuiltin<"__builtin_msa_add_a_d">, +def int_mips_add_a_d : ClangBuiltin<"__builtin_msa_add_a_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [Commutative, IntrNoMem]>; -def int_mips_adds_a_b : GCCBuiltin<"__builtin_msa_adds_a_b">, +def int_mips_adds_a_b : ClangBuiltin<"__builtin_msa_adds_a_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [Commutative, IntrNoMem]>; -def int_mips_adds_a_h : GCCBuiltin<"__builtin_msa_adds_a_h">, +def int_mips_adds_a_h : ClangBuiltin<"__builtin_msa_adds_a_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [Commutative, IntrNoMem]>; -def int_mips_adds_a_w : GCCBuiltin<"__builtin_msa_adds_a_w">, +def int_mips_adds_a_w : ClangBuiltin<"__builtin_msa_adds_a_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [Commutative, IntrNoMem]>; -def int_mips_adds_a_d : GCCBuiltin<"__builtin_msa_adds_a_d">, +def int_mips_adds_a_d : ClangBuiltin<"__builtin_msa_adds_a_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [Commutative, IntrNoMem]>; -def int_mips_adds_s_b : GCCBuiltin<"__builtin_msa_adds_s_b">, +def int_mips_adds_s_b : ClangBuiltin<"__builtin_msa_adds_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [Commutative, IntrNoMem]>; -def int_mips_adds_s_h : GCCBuiltin<"__builtin_msa_adds_s_h">, +def int_mips_adds_s_h : ClangBuiltin<"__builtin_msa_adds_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [Commutative, IntrNoMem]>; -def int_mips_adds_s_w : GCCBuiltin<"__builtin_msa_adds_s_w">, +def int_mips_adds_s_w : ClangBuiltin<"__builtin_msa_adds_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [Commutative, IntrNoMem]>; -def int_mips_adds_s_d : GCCBuiltin<"__builtin_msa_adds_s_d">, +def int_mips_adds_s_d : ClangBuiltin<"__builtin_msa_adds_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [Commutative, IntrNoMem]>; -def int_mips_adds_u_b : GCCBuiltin<"__builtin_msa_adds_u_b">, +def int_mips_adds_u_b : ClangBuiltin<"__builtin_msa_adds_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [Commutative, IntrNoMem]>; -def int_mips_adds_u_h : GCCBuiltin<"__builtin_msa_adds_u_h">, +def int_mips_adds_u_h : ClangBuiltin<"__builtin_msa_adds_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [Commutative, IntrNoMem]>; -def int_mips_adds_u_w : GCCBuiltin<"__builtin_msa_adds_u_w">, +def int_mips_adds_u_w : ClangBuiltin<"__builtin_msa_adds_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [Commutative, IntrNoMem]>; -def int_mips_adds_u_d : GCCBuiltin<"__builtin_msa_adds_u_d">, +def int_mips_adds_u_d : ClangBuiltin<"__builtin_msa_adds_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [Commutative, IntrNoMem]>; -def int_mips_addv_b : GCCBuiltin<"__builtin_msa_addv_b">, +def int_mips_addv_b : ClangBuiltin<"__builtin_msa_addv_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [Commutative, IntrNoMem]>; -def int_mips_addv_h : GCCBuiltin<"__builtin_msa_addv_h">, +def int_mips_addv_h : ClangBuiltin<"__builtin_msa_addv_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [Commutative, IntrNoMem]>; -def int_mips_addv_w : GCCBuiltin<"__builtin_msa_addv_w">, +def int_mips_addv_w : ClangBuiltin<"__builtin_msa_addv_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [Commutative, IntrNoMem]>; -def int_mips_addv_d : GCCBuiltin<"__builtin_msa_addv_d">, +def int_mips_addv_d : ClangBuiltin<"__builtin_msa_addv_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [Commutative, IntrNoMem]>; -def int_mips_addvi_b : GCCBuiltin<"__builtin_msa_addvi_b">, +def int_mips_addvi_b : ClangBuiltin<"__builtin_msa_addvi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [Commutative, IntrNoMem, ImmArg>]>; -def int_mips_addvi_h : GCCBuiltin<"__builtin_msa_addvi_h">, +def int_mips_addvi_h : ClangBuiltin<"__builtin_msa_addvi_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [Commutative, IntrNoMem, ImmArg>]>; -def int_mips_addvi_w : GCCBuiltin<"__builtin_msa_addvi_w">, +def int_mips_addvi_w : ClangBuiltin<"__builtin_msa_addvi_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [Commutative, IntrNoMem, ImmArg>]>; -def int_mips_addvi_d : GCCBuiltin<"__builtin_msa_addvi_d">, +def int_mips_addvi_d : ClangBuiltin<"__builtin_msa_addvi_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [Commutative, IntrNoMem, ImmArg>]>; -def int_mips_and_v : GCCBuiltin<"__builtin_msa_and_v">, +def int_mips_and_v : ClangBuiltin<"__builtin_msa_and_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_andi_b : GCCBuiltin<"__builtin_msa_andi_b">, +def int_mips_andi_b : ClangBuiltin<"__builtin_msa_andi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_asub_s_b : GCCBuiltin<"__builtin_msa_asub_s_b">, +def int_mips_asub_s_b : ClangBuiltin<"__builtin_msa_asub_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_asub_s_h : GCCBuiltin<"__builtin_msa_asub_s_h">, +def int_mips_asub_s_h : ClangBuiltin<"__builtin_msa_asub_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_asub_s_w : GCCBuiltin<"__builtin_msa_asub_s_w">, +def int_mips_asub_s_w : ClangBuiltin<"__builtin_msa_asub_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_asub_s_d : GCCBuiltin<"__builtin_msa_asub_s_d">, +def int_mips_asub_s_d : ClangBuiltin<"__builtin_msa_asub_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_asub_u_b : GCCBuiltin<"__builtin_msa_asub_u_b">, +def int_mips_asub_u_b : ClangBuiltin<"__builtin_msa_asub_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_asub_u_h : GCCBuiltin<"__builtin_msa_asub_u_h">, +def int_mips_asub_u_h : ClangBuiltin<"__builtin_msa_asub_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_asub_u_w : GCCBuiltin<"__builtin_msa_asub_u_w">, +def int_mips_asub_u_w : ClangBuiltin<"__builtin_msa_asub_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_asub_u_d : GCCBuiltin<"__builtin_msa_asub_u_d">, +def int_mips_asub_u_d : ClangBuiltin<"__builtin_msa_asub_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_ave_s_b : GCCBuiltin<"__builtin_msa_ave_s_b">, +def int_mips_ave_s_b : ClangBuiltin<"__builtin_msa_ave_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [Commutative, IntrNoMem]>; -def int_mips_ave_s_h : GCCBuiltin<"__builtin_msa_ave_s_h">, +def int_mips_ave_s_h : ClangBuiltin<"__builtin_msa_ave_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [Commutative, IntrNoMem]>; -def int_mips_ave_s_w : GCCBuiltin<"__builtin_msa_ave_s_w">, +def int_mips_ave_s_w : ClangBuiltin<"__builtin_msa_ave_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [Commutative, IntrNoMem]>; -def int_mips_ave_s_d : GCCBuiltin<"__builtin_msa_ave_s_d">, +def int_mips_ave_s_d : ClangBuiltin<"__builtin_msa_ave_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [Commutative, IntrNoMem]>; -def int_mips_ave_u_b : GCCBuiltin<"__builtin_msa_ave_u_b">, +def int_mips_ave_u_b : ClangBuiltin<"__builtin_msa_ave_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [Commutative, IntrNoMem]>; -def int_mips_ave_u_h : GCCBuiltin<"__builtin_msa_ave_u_h">, +def int_mips_ave_u_h : ClangBuiltin<"__builtin_msa_ave_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [Commutative, IntrNoMem]>; -def int_mips_ave_u_w : GCCBuiltin<"__builtin_msa_ave_u_w">, +def int_mips_ave_u_w : ClangBuiltin<"__builtin_msa_ave_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [Commutative, IntrNoMem]>; -def int_mips_ave_u_d : GCCBuiltin<"__builtin_msa_ave_u_d">, +def int_mips_ave_u_d : ClangBuiltin<"__builtin_msa_ave_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [Commutative, IntrNoMem]>; -def int_mips_aver_s_b : GCCBuiltin<"__builtin_msa_aver_s_b">, +def int_mips_aver_s_b : ClangBuiltin<"__builtin_msa_aver_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [Commutative, IntrNoMem]>; -def int_mips_aver_s_h : GCCBuiltin<"__builtin_msa_aver_s_h">, +def int_mips_aver_s_h : ClangBuiltin<"__builtin_msa_aver_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [Commutative, IntrNoMem]>; -def int_mips_aver_s_w : GCCBuiltin<"__builtin_msa_aver_s_w">, +def int_mips_aver_s_w : ClangBuiltin<"__builtin_msa_aver_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [Commutative, IntrNoMem]>; -def int_mips_aver_s_d : GCCBuiltin<"__builtin_msa_aver_s_d">, +def int_mips_aver_s_d : ClangBuiltin<"__builtin_msa_aver_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [Commutative, IntrNoMem]>; -def int_mips_aver_u_b : GCCBuiltin<"__builtin_msa_aver_u_b">, +def int_mips_aver_u_b : ClangBuiltin<"__builtin_msa_aver_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [Commutative, IntrNoMem]>; -def int_mips_aver_u_h : GCCBuiltin<"__builtin_msa_aver_u_h">, +def int_mips_aver_u_h : ClangBuiltin<"__builtin_msa_aver_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [Commutative, IntrNoMem]>; -def int_mips_aver_u_w : GCCBuiltin<"__builtin_msa_aver_u_w">, +def int_mips_aver_u_w : ClangBuiltin<"__builtin_msa_aver_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [Commutative, IntrNoMem]>; -def int_mips_aver_u_d : GCCBuiltin<"__builtin_msa_aver_u_d">, +def int_mips_aver_u_d : ClangBuiltin<"__builtin_msa_aver_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [Commutative, IntrNoMem]>; -def int_mips_bclr_b : GCCBuiltin<"__builtin_msa_bclr_b">, +def int_mips_bclr_b : ClangBuiltin<"__builtin_msa_bclr_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_bclr_h : GCCBuiltin<"__builtin_msa_bclr_h">, +def int_mips_bclr_h : ClangBuiltin<"__builtin_msa_bclr_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_bclr_w : GCCBuiltin<"__builtin_msa_bclr_w">, +def int_mips_bclr_w : ClangBuiltin<"__builtin_msa_bclr_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_bclr_d : GCCBuiltin<"__builtin_msa_bclr_d">, +def int_mips_bclr_d : ClangBuiltin<"__builtin_msa_bclr_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_bclri_b : GCCBuiltin<"__builtin_msa_bclri_b">, +def int_mips_bclri_b : ClangBuiltin<"__builtin_msa_bclri_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bclri_h : GCCBuiltin<"__builtin_msa_bclri_h">, +def int_mips_bclri_h : ClangBuiltin<"__builtin_msa_bclri_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bclri_w : GCCBuiltin<"__builtin_msa_bclri_w">, +def int_mips_bclri_w : ClangBuiltin<"__builtin_msa_bclri_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bclri_d : GCCBuiltin<"__builtin_msa_bclri_d">, +def int_mips_bclri_d : ClangBuiltin<"__builtin_msa_bclri_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_binsl_b : GCCBuiltin<"__builtin_msa_binsl_b">, +def int_mips_binsl_b : ClangBuiltin<"__builtin_msa_binsl_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_binsl_h : GCCBuiltin<"__builtin_msa_binsl_h">, +def int_mips_binsl_h : ClangBuiltin<"__builtin_msa_binsl_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_binsl_w : GCCBuiltin<"__builtin_msa_binsl_w">, +def int_mips_binsl_w : ClangBuiltin<"__builtin_msa_binsl_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_binsl_d : GCCBuiltin<"__builtin_msa_binsl_d">, +def int_mips_binsl_d : ClangBuiltin<"__builtin_msa_binsl_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_binsli_b : GCCBuiltin<"__builtin_msa_binsli_b">, +def int_mips_binsli_b : ClangBuiltin<"__builtin_msa_binsli_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_binsli_h : GCCBuiltin<"__builtin_msa_binsli_h">, +def int_mips_binsli_h : ClangBuiltin<"__builtin_msa_binsli_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_binsli_w : GCCBuiltin<"__builtin_msa_binsli_w">, +def int_mips_binsli_w : ClangBuiltin<"__builtin_msa_binsli_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_binsli_d : GCCBuiltin<"__builtin_msa_binsli_d">, +def int_mips_binsli_d : ClangBuiltin<"__builtin_msa_binsli_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_binsr_b : GCCBuiltin<"__builtin_msa_binsr_b">, +def int_mips_binsr_b : ClangBuiltin<"__builtin_msa_binsr_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_binsr_h : GCCBuiltin<"__builtin_msa_binsr_h">, +def int_mips_binsr_h : ClangBuiltin<"__builtin_msa_binsr_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_binsr_w : GCCBuiltin<"__builtin_msa_binsr_w">, +def int_mips_binsr_w : ClangBuiltin<"__builtin_msa_binsr_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_binsr_d : GCCBuiltin<"__builtin_msa_binsr_d">, +def int_mips_binsr_d : ClangBuiltin<"__builtin_msa_binsr_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_binsri_b : GCCBuiltin<"__builtin_msa_binsri_b">, +def int_mips_binsri_b : ClangBuiltin<"__builtin_msa_binsri_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_binsri_h : GCCBuiltin<"__builtin_msa_binsri_h">, +def int_mips_binsri_h : ClangBuiltin<"__builtin_msa_binsri_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_binsri_w : GCCBuiltin<"__builtin_msa_binsri_w">, +def int_mips_binsri_w : ClangBuiltin<"__builtin_msa_binsri_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_binsri_d : GCCBuiltin<"__builtin_msa_binsri_d">, +def int_mips_binsri_d : ClangBuiltin<"__builtin_msa_binsri_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bmnz_v : GCCBuiltin<"__builtin_msa_bmnz_v">, +def int_mips_bmnz_v : ClangBuiltin<"__builtin_msa_bmnz_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_bmnzi_b : GCCBuiltin<"__builtin_msa_bmnzi_b">, +def int_mips_bmnzi_b : ClangBuiltin<"__builtin_msa_bmnzi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bmz_v : GCCBuiltin<"__builtin_msa_bmz_v">, +def int_mips_bmz_v : ClangBuiltin<"__builtin_msa_bmz_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_bmzi_b : GCCBuiltin<"__builtin_msa_bmzi_b">, +def int_mips_bmzi_b : ClangBuiltin<"__builtin_msa_bmzi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bneg_b : GCCBuiltin<"__builtin_msa_bneg_b">, +def int_mips_bneg_b : ClangBuiltin<"__builtin_msa_bneg_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_bneg_h : GCCBuiltin<"__builtin_msa_bneg_h">, +def int_mips_bneg_h : ClangBuiltin<"__builtin_msa_bneg_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_bneg_w : GCCBuiltin<"__builtin_msa_bneg_w">, +def int_mips_bneg_w : ClangBuiltin<"__builtin_msa_bneg_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_bneg_d : GCCBuiltin<"__builtin_msa_bneg_d">, +def int_mips_bneg_d : ClangBuiltin<"__builtin_msa_bneg_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_bnegi_b : GCCBuiltin<"__builtin_msa_bnegi_b">, +def int_mips_bnegi_b : ClangBuiltin<"__builtin_msa_bnegi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bnegi_h : GCCBuiltin<"__builtin_msa_bnegi_h">, +def int_mips_bnegi_h : ClangBuiltin<"__builtin_msa_bnegi_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bnegi_w : GCCBuiltin<"__builtin_msa_bnegi_w">, +def int_mips_bnegi_w : ClangBuiltin<"__builtin_msa_bnegi_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bnegi_d : GCCBuiltin<"__builtin_msa_bnegi_d">, +def int_mips_bnegi_d : ClangBuiltin<"__builtin_msa_bnegi_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bnz_b : GCCBuiltin<"__builtin_msa_bnz_b">, +def int_mips_bnz_b : ClangBuiltin<"__builtin_msa_bnz_b">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_bnz_h : GCCBuiltin<"__builtin_msa_bnz_h">, +def int_mips_bnz_h : ClangBuiltin<"__builtin_msa_bnz_h">, Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_bnz_w : GCCBuiltin<"__builtin_msa_bnz_w">, +def int_mips_bnz_w : ClangBuiltin<"__builtin_msa_bnz_w">, Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_bnz_d : GCCBuiltin<"__builtin_msa_bnz_d">, +def int_mips_bnz_d : ClangBuiltin<"__builtin_msa_bnz_d">, Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_bnz_v : GCCBuiltin<"__builtin_msa_bnz_v">, +def int_mips_bnz_v : ClangBuiltin<"__builtin_msa_bnz_v">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_bsel_v : GCCBuiltin<"__builtin_msa_bsel_v">, +def int_mips_bsel_v : ClangBuiltin<"__builtin_msa_bsel_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_bseli_b : GCCBuiltin<"__builtin_msa_bseli_b">, +def int_mips_bseli_b : ClangBuiltin<"__builtin_msa_bseli_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bset_b : GCCBuiltin<"__builtin_msa_bset_b">, +def int_mips_bset_b : ClangBuiltin<"__builtin_msa_bset_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_bset_h : GCCBuiltin<"__builtin_msa_bset_h">, +def int_mips_bset_h : ClangBuiltin<"__builtin_msa_bset_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_bset_w : GCCBuiltin<"__builtin_msa_bset_w">, +def int_mips_bset_w : ClangBuiltin<"__builtin_msa_bset_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_bset_d : GCCBuiltin<"__builtin_msa_bset_d">, +def int_mips_bset_d : ClangBuiltin<"__builtin_msa_bset_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_bseti_b : GCCBuiltin<"__builtin_msa_bseti_b">, +def int_mips_bseti_b : ClangBuiltin<"__builtin_msa_bseti_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bseti_h : GCCBuiltin<"__builtin_msa_bseti_h">, +def int_mips_bseti_h : ClangBuiltin<"__builtin_msa_bseti_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bseti_w : GCCBuiltin<"__builtin_msa_bseti_w">, +def int_mips_bseti_w : ClangBuiltin<"__builtin_msa_bseti_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bseti_d : GCCBuiltin<"__builtin_msa_bseti_d">, +def int_mips_bseti_d : ClangBuiltin<"__builtin_msa_bseti_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_bz_b : GCCBuiltin<"__builtin_msa_bz_b">, +def int_mips_bz_b : ClangBuiltin<"__builtin_msa_bz_b">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_bz_h : GCCBuiltin<"__builtin_msa_bz_h">, +def int_mips_bz_h : ClangBuiltin<"__builtin_msa_bz_h">, Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_bz_w : GCCBuiltin<"__builtin_msa_bz_w">, +def int_mips_bz_w : ClangBuiltin<"__builtin_msa_bz_w">, Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_bz_d : GCCBuiltin<"__builtin_msa_bz_d">, +def int_mips_bz_d : ClangBuiltin<"__builtin_msa_bz_d">, Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_bz_v : GCCBuiltin<"__builtin_msa_bz_v">, +def int_mips_bz_v : ClangBuiltin<"__builtin_msa_bz_v">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_ceq_b : GCCBuiltin<"__builtin_msa_ceq_b">, +def int_mips_ceq_b : ClangBuiltin<"__builtin_msa_ceq_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_ceq_h : GCCBuiltin<"__builtin_msa_ceq_h">, +def int_mips_ceq_h : ClangBuiltin<"__builtin_msa_ceq_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_ceq_w : GCCBuiltin<"__builtin_msa_ceq_w">, +def int_mips_ceq_w : ClangBuiltin<"__builtin_msa_ceq_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_ceq_d : GCCBuiltin<"__builtin_msa_ceq_d">, +def int_mips_ceq_d : ClangBuiltin<"__builtin_msa_ceq_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_ceqi_b : GCCBuiltin<"__builtin_msa_ceqi_b">, +def int_mips_ceqi_b : ClangBuiltin<"__builtin_msa_ceqi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_ceqi_h : GCCBuiltin<"__builtin_msa_ceqi_h">, +def int_mips_ceqi_h : ClangBuiltin<"__builtin_msa_ceqi_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_ceqi_w : GCCBuiltin<"__builtin_msa_ceqi_w">, +def int_mips_ceqi_w : ClangBuiltin<"__builtin_msa_ceqi_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_ceqi_d : GCCBuiltin<"__builtin_msa_ceqi_d">, +def int_mips_ceqi_d : ClangBuiltin<"__builtin_msa_ceqi_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_cfcmsa : GCCBuiltin<"__builtin_msa_cfcmsa">, +def int_mips_cfcmsa : ClangBuiltin<"__builtin_msa_cfcmsa">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg>]>; -def int_mips_cle_s_b : GCCBuiltin<"__builtin_msa_cle_s_b">, +def int_mips_cle_s_b : ClangBuiltin<"__builtin_msa_cle_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_cle_s_h : GCCBuiltin<"__builtin_msa_cle_s_h">, +def int_mips_cle_s_h : ClangBuiltin<"__builtin_msa_cle_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_cle_s_w : GCCBuiltin<"__builtin_msa_cle_s_w">, +def int_mips_cle_s_w : ClangBuiltin<"__builtin_msa_cle_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_cle_s_d : GCCBuiltin<"__builtin_msa_cle_s_d">, +def int_mips_cle_s_d : ClangBuiltin<"__builtin_msa_cle_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_cle_u_b : GCCBuiltin<"__builtin_msa_cle_u_b">, +def int_mips_cle_u_b : ClangBuiltin<"__builtin_msa_cle_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_cle_u_h : GCCBuiltin<"__builtin_msa_cle_u_h">, +def int_mips_cle_u_h : ClangBuiltin<"__builtin_msa_cle_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_cle_u_w : GCCBuiltin<"__builtin_msa_cle_u_w">, +def int_mips_cle_u_w : ClangBuiltin<"__builtin_msa_cle_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_cle_u_d : GCCBuiltin<"__builtin_msa_cle_u_d">, +def int_mips_cle_u_d : ClangBuiltin<"__builtin_msa_cle_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_clei_s_b : GCCBuiltin<"__builtin_msa_clei_s_b">, +def int_mips_clei_s_b : ClangBuiltin<"__builtin_msa_clei_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clei_s_h : GCCBuiltin<"__builtin_msa_clei_s_h">, +def int_mips_clei_s_h : ClangBuiltin<"__builtin_msa_clei_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clei_s_w : GCCBuiltin<"__builtin_msa_clei_s_w">, +def int_mips_clei_s_w : ClangBuiltin<"__builtin_msa_clei_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clei_s_d : GCCBuiltin<"__builtin_msa_clei_s_d">, +def int_mips_clei_s_d : ClangBuiltin<"__builtin_msa_clei_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clei_u_b : GCCBuiltin<"__builtin_msa_clei_u_b">, +def int_mips_clei_u_b : ClangBuiltin<"__builtin_msa_clei_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clei_u_h : GCCBuiltin<"__builtin_msa_clei_u_h">, +def int_mips_clei_u_h : ClangBuiltin<"__builtin_msa_clei_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clei_u_w : GCCBuiltin<"__builtin_msa_clei_u_w">, +def int_mips_clei_u_w : ClangBuiltin<"__builtin_msa_clei_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clei_u_d : GCCBuiltin<"__builtin_msa_clei_u_d">, +def int_mips_clei_u_d : ClangBuiltin<"__builtin_msa_clei_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clt_s_b : GCCBuiltin<"__builtin_msa_clt_s_b">, +def int_mips_clt_s_b : ClangBuiltin<"__builtin_msa_clt_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_clt_s_h : GCCBuiltin<"__builtin_msa_clt_s_h">, +def int_mips_clt_s_h : ClangBuiltin<"__builtin_msa_clt_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_clt_s_w : GCCBuiltin<"__builtin_msa_clt_s_w">, +def int_mips_clt_s_w : ClangBuiltin<"__builtin_msa_clt_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_clt_s_d : GCCBuiltin<"__builtin_msa_clt_s_d">, +def int_mips_clt_s_d : ClangBuiltin<"__builtin_msa_clt_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_clt_u_b : GCCBuiltin<"__builtin_msa_clt_u_b">, +def int_mips_clt_u_b : ClangBuiltin<"__builtin_msa_clt_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_clt_u_h : GCCBuiltin<"__builtin_msa_clt_u_h">, +def int_mips_clt_u_h : ClangBuiltin<"__builtin_msa_clt_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_clt_u_w : GCCBuiltin<"__builtin_msa_clt_u_w">, +def int_mips_clt_u_w : ClangBuiltin<"__builtin_msa_clt_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_clt_u_d : GCCBuiltin<"__builtin_msa_clt_u_d">, +def int_mips_clt_u_d : ClangBuiltin<"__builtin_msa_clt_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_clti_s_b : GCCBuiltin<"__builtin_msa_clti_s_b">, +def int_mips_clti_s_b : ClangBuiltin<"__builtin_msa_clti_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clti_s_h : GCCBuiltin<"__builtin_msa_clti_s_h">, +def int_mips_clti_s_h : ClangBuiltin<"__builtin_msa_clti_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clti_s_w : GCCBuiltin<"__builtin_msa_clti_s_w">, +def int_mips_clti_s_w : ClangBuiltin<"__builtin_msa_clti_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clti_s_d : GCCBuiltin<"__builtin_msa_clti_s_d">, +def int_mips_clti_s_d : ClangBuiltin<"__builtin_msa_clti_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clti_u_b : GCCBuiltin<"__builtin_msa_clti_u_b">, +def int_mips_clti_u_b : ClangBuiltin<"__builtin_msa_clti_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clti_u_h : GCCBuiltin<"__builtin_msa_clti_u_h">, +def int_mips_clti_u_h : ClangBuiltin<"__builtin_msa_clti_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clti_u_w : GCCBuiltin<"__builtin_msa_clti_u_w">, +def int_mips_clti_u_w : ClangBuiltin<"__builtin_msa_clti_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_clti_u_d : GCCBuiltin<"__builtin_msa_clti_u_d">, +def int_mips_clti_u_d : ClangBuiltin<"__builtin_msa_clti_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_copy_s_b : GCCBuiltin<"__builtin_msa_copy_s_b">, +def int_mips_copy_s_b : ClangBuiltin<"__builtin_msa_copy_s_b">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_copy_s_h : GCCBuiltin<"__builtin_msa_copy_s_h">, +def int_mips_copy_s_h : ClangBuiltin<"__builtin_msa_copy_s_h">, Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_copy_s_w : GCCBuiltin<"__builtin_msa_copy_s_w">, +def int_mips_copy_s_w : ClangBuiltin<"__builtin_msa_copy_s_w">, Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_copy_s_d : GCCBuiltin<"__builtin_msa_copy_s_d">, +def int_mips_copy_s_d : ClangBuiltin<"__builtin_msa_copy_s_d">, Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_copy_u_b : GCCBuiltin<"__builtin_msa_copy_u_b">, +def int_mips_copy_u_b : ClangBuiltin<"__builtin_msa_copy_u_b">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_copy_u_h : GCCBuiltin<"__builtin_msa_copy_u_h">, +def int_mips_copy_u_h : ClangBuiltin<"__builtin_msa_copy_u_h">, Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_copy_u_w : GCCBuiltin<"__builtin_msa_copy_u_w">, +def int_mips_copy_u_w : ClangBuiltin<"__builtin_msa_copy_u_w">, Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_copy_u_d : GCCBuiltin<"__builtin_msa_copy_u_d">, +def int_mips_copy_u_d : ClangBuiltin<"__builtin_msa_copy_u_d">, Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_ctcmsa : GCCBuiltin<"__builtin_msa_ctcmsa">, +def int_mips_ctcmsa : ClangBuiltin<"__builtin_msa_ctcmsa">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; -def int_mips_div_s_b : GCCBuiltin<"__builtin_msa_div_s_b">, +def int_mips_div_s_b : ClangBuiltin<"__builtin_msa_div_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_div_s_h : GCCBuiltin<"__builtin_msa_div_s_h">, +def int_mips_div_s_h : ClangBuiltin<"__builtin_msa_div_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_div_s_w : GCCBuiltin<"__builtin_msa_div_s_w">, +def int_mips_div_s_w : ClangBuiltin<"__builtin_msa_div_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_div_s_d : GCCBuiltin<"__builtin_msa_div_s_d">, +def int_mips_div_s_d : ClangBuiltin<"__builtin_msa_div_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_div_u_b : GCCBuiltin<"__builtin_msa_div_u_b">, +def int_mips_div_u_b : ClangBuiltin<"__builtin_msa_div_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_div_u_h : GCCBuiltin<"__builtin_msa_div_u_h">, +def int_mips_div_u_h : ClangBuiltin<"__builtin_msa_div_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_div_u_w : GCCBuiltin<"__builtin_msa_div_u_w">, +def int_mips_div_u_w : ClangBuiltin<"__builtin_msa_div_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_div_u_d : GCCBuiltin<"__builtin_msa_div_u_d">, +def int_mips_div_u_d : ClangBuiltin<"__builtin_msa_div_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; // This instruction is part of the MSA spec but it does not share the // __builtin_msa prefix because it operates on GP registers. -def int_mips_dlsa : GCCBuiltin<"__builtin_mips_dlsa">, +def int_mips_dlsa : ClangBuiltin<"__builtin_mips_dlsa">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_dotp_s_h : GCCBuiltin<"__builtin_msa_dotp_s_h">, +def int_mips_dotp_s_h : ClangBuiltin<"__builtin_msa_dotp_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_dotp_s_w : GCCBuiltin<"__builtin_msa_dotp_s_w">, +def int_mips_dotp_s_w : ClangBuiltin<"__builtin_msa_dotp_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_dotp_s_d : GCCBuiltin<"__builtin_msa_dotp_s_d">, +def int_mips_dotp_s_d : ClangBuiltin<"__builtin_msa_dotp_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_dotp_u_h : GCCBuiltin<"__builtin_msa_dotp_u_h">, +def int_mips_dotp_u_h : ClangBuiltin<"__builtin_msa_dotp_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_dotp_u_w : GCCBuiltin<"__builtin_msa_dotp_u_w">, +def int_mips_dotp_u_w : ClangBuiltin<"__builtin_msa_dotp_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_dotp_u_d : GCCBuiltin<"__builtin_msa_dotp_u_d">, +def int_mips_dotp_u_d : ClangBuiltin<"__builtin_msa_dotp_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_dpadd_s_h : GCCBuiltin<"__builtin_msa_dpadd_s_h">, +def int_mips_dpadd_s_h : ClangBuiltin<"__builtin_msa_dpadd_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_dpadd_s_w : GCCBuiltin<"__builtin_msa_dpadd_s_w">, +def int_mips_dpadd_s_w : ClangBuiltin<"__builtin_msa_dpadd_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_dpadd_s_d : GCCBuiltin<"__builtin_msa_dpadd_s_d">, +def int_mips_dpadd_s_d : ClangBuiltin<"__builtin_msa_dpadd_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_dpadd_u_h : GCCBuiltin<"__builtin_msa_dpadd_u_h">, +def int_mips_dpadd_u_h : ClangBuiltin<"__builtin_msa_dpadd_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_dpadd_u_w : GCCBuiltin<"__builtin_msa_dpadd_u_w">, +def int_mips_dpadd_u_w : ClangBuiltin<"__builtin_msa_dpadd_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_dpadd_u_d : GCCBuiltin<"__builtin_msa_dpadd_u_d">, +def int_mips_dpadd_u_d : ClangBuiltin<"__builtin_msa_dpadd_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_dpsub_s_h : GCCBuiltin<"__builtin_msa_dpsub_s_h">, +def int_mips_dpsub_s_h : ClangBuiltin<"__builtin_msa_dpsub_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_dpsub_s_w : GCCBuiltin<"__builtin_msa_dpsub_s_w">, +def int_mips_dpsub_s_w : ClangBuiltin<"__builtin_msa_dpsub_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_dpsub_s_d : GCCBuiltin<"__builtin_msa_dpsub_s_d">, +def int_mips_dpsub_s_d : ClangBuiltin<"__builtin_msa_dpsub_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_dpsub_u_h : GCCBuiltin<"__builtin_msa_dpsub_u_h">, +def int_mips_dpsub_u_h : ClangBuiltin<"__builtin_msa_dpsub_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_dpsub_u_w : GCCBuiltin<"__builtin_msa_dpsub_u_w">, +def int_mips_dpsub_u_w : ClangBuiltin<"__builtin_msa_dpsub_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_dpsub_u_d : GCCBuiltin<"__builtin_msa_dpsub_u_d">, +def int_mips_dpsub_u_d : ClangBuiltin<"__builtin_msa_dpsub_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_fadd_w : GCCBuiltin<"__builtin_msa_fadd_w">, +def int_mips_fadd_w : ClangBuiltin<"__builtin_msa_fadd_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fadd_d : GCCBuiltin<"__builtin_msa_fadd_d">, +def int_mips_fadd_d : ClangBuiltin<"__builtin_msa_fadd_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fcaf_w : GCCBuiltin<"__builtin_msa_fcaf_w">, +def int_mips_fcaf_w : ClangBuiltin<"__builtin_msa_fcaf_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fcaf_d : GCCBuiltin<"__builtin_msa_fcaf_d">, +def int_mips_fcaf_d : ClangBuiltin<"__builtin_msa_fcaf_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fceq_w : GCCBuiltin<"__builtin_msa_fceq_w">, +def int_mips_fceq_w : ClangBuiltin<"__builtin_msa_fceq_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fceq_d : GCCBuiltin<"__builtin_msa_fceq_d">, +def int_mips_fceq_d : ClangBuiltin<"__builtin_msa_fceq_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fcle_w : GCCBuiltin<"__builtin_msa_fcle_w">, +def int_mips_fcle_w : ClangBuiltin<"__builtin_msa_fcle_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fcle_d : GCCBuiltin<"__builtin_msa_fcle_d">, +def int_mips_fcle_d : ClangBuiltin<"__builtin_msa_fcle_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fclt_w : GCCBuiltin<"__builtin_msa_fclt_w">, +def int_mips_fclt_w : ClangBuiltin<"__builtin_msa_fclt_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fclt_d : GCCBuiltin<"__builtin_msa_fclt_d">, +def int_mips_fclt_d : ClangBuiltin<"__builtin_msa_fclt_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fclass_w : GCCBuiltin<"__builtin_msa_fclass_w">, +def int_mips_fclass_w : ClangBuiltin<"__builtin_msa_fclass_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fclass_d : GCCBuiltin<"__builtin_msa_fclass_d">, +def int_mips_fclass_d : ClangBuiltin<"__builtin_msa_fclass_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fcne_w : GCCBuiltin<"__builtin_msa_fcne_w">, +def int_mips_fcne_w : ClangBuiltin<"__builtin_msa_fcne_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fcne_d : GCCBuiltin<"__builtin_msa_fcne_d">, +def int_mips_fcne_d : ClangBuiltin<"__builtin_msa_fcne_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fcor_w : GCCBuiltin<"__builtin_msa_fcor_w">, +def int_mips_fcor_w : ClangBuiltin<"__builtin_msa_fcor_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fcor_d : GCCBuiltin<"__builtin_msa_fcor_d">, +def int_mips_fcor_d : ClangBuiltin<"__builtin_msa_fcor_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fcueq_w : GCCBuiltin<"__builtin_msa_fcueq_w">, +def int_mips_fcueq_w : ClangBuiltin<"__builtin_msa_fcueq_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fcueq_d : GCCBuiltin<"__builtin_msa_fcueq_d">, +def int_mips_fcueq_d : ClangBuiltin<"__builtin_msa_fcueq_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fcule_w : GCCBuiltin<"__builtin_msa_fcule_w">, +def int_mips_fcule_w : ClangBuiltin<"__builtin_msa_fcule_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fcule_d : GCCBuiltin<"__builtin_msa_fcule_d">, +def int_mips_fcule_d : ClangBuiltin<"__builtin_msa_fcule_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fcult_w : GCCBuiltin<"__builtin_msa_fcult_w">, +def int_mips_fcult_w : ClangBuiltin<"__builtin_msa_fcult_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fcult_d : GCCBuiltin<"__builtin_msa_fcult_d">, +def int_mips_fcult_d : ClangBuiltin<"__builtin_msa_fcult_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fcun_w : GCCBuiltin<"__builtin_msa_fcun_w">, +def int_mips_fcun_w : ClangBuiltin<"__builtin_msa_fcun_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fcun_d : GCCBuiltin<"__builtin_msa_fcun_d">, +def int_mips_fcun_d : ClangBuiltin<"__builtin_msa_fcun_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fcune_w : GCCBuiltin<"__builtin_msa_fcune_w">, +def int_mips_fcune_w : ClangBuiltin<"__builtin_msa_fcune_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fcune_d : GCCBuiltin<"__builtin_msa_fcune_d">, +def int_mips_fcune_d : ClangBuiltin<"__builtin_msa_fcune_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fdiv_w : GCCBuiltin<"__builtin_msa_fdiv_w">, +def int_mips_fdiv_w : ClangBuiltin<"__builtin_msa_fdiv_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fdiv_d : GCCBuiltin<"__builtin_msa_fdiv_d">, +def int_mips_fdiv_d : ClangBuiltin<"__builtin_msa_fdiv_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fexdo_h : GCCBuiltin<"__builtin_msa_fexdo_h">, +def int_mips_fexdo_h : ClangBuiltin<"__builtin_msa_fexdo_h">, Intrinsic<[llvm_v8f16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fexdo_w : GCCBuiltin<"__builtin_msa_fexdo_w">, +def int_mips_fexdo_w : ClangBuiltin<"__builtin_msa_fexdo_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fexp2_w : GCCBuiltin<"__builtin_msa_fexp2_w">, +def int_mips_fexp2_w : ClangBuiltin<"__builtin_msa_fexp2_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_fexp2_d : GCCBuiltin<"__builtin_msa_fexp2_d">, +def int_mips_fexp2_d : ClangBuiltin<"__builtin_msa_fexp2_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_fexupl_w : GCCBuiltin<"__builtin_msa_fexupl_w">, +def int_mips_fexupl_w : ClangBuiltin<"__builtin_msa_fexupl_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v8f16_ty], [IntrNoMem]>; -def int_mips_fexupl_d : GCCBuiltin<"__builtin_msa_fexupl_d">, +def int_mips_fexupl_d : ClangBuiltin<"__builtin_msa_fexupl_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fexupr_w : GCCBuiltin<"__builtin_msa_fexupr_w">, +def int_mips_fexupr_w : ClangBuiltin<"__builtin_msa_fexupr_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v8f16_ty], [IntrNoMem]>; -def int_mips_fexupr_d : GCCBuiltin<"__builtin_msa_fexupr_d">, +def int_mips_fexupr_d : ClangBuiltin<"__builtin_msa_fexupr_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_ffint_s_w : GCCBuiltin<"__builtin_msa_ffint_s_w">, +def int_mips_ffint_s_w : ClangBuiltin<"__builtin_msa_ffint_s_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_ffint_s_d : GCCBuiltin<"__builtin_msa_ffint_s_d">, +def int_mips_ffint_s_d : ClangBuiltin<"__builtin_msa_ffint_s_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_ffint_u_w : GCCBuiltin<"__builtin_msa_ffint_u_w">, +def int_mips_ffint_u_w : ClangBuiltin<"__builtin_msa_ffint_u_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_ffint_u_d : GCCBuiltin<"__builtin_msa_ffint_u_d">, +def int_mips_ffint_u_d : ClangBuiltin<"__builtin_msa_ffint_u_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_ffql_w : GCCBuiltin<"__builtin_msa_ffql_w">, +def int_mips_ffql_w : ClangBuiltin<"__builtin_msa_ffql_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_ffql_d : GCCBuiltin<"__builtin_msa_ffql_d">, +def int_mips_ffql_d : ClangBuiltin<"__builtin_msa_ffql_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_ffqr_w : GCCBuiltin<"__builtin_msa_ffqr_w">, +def int_mips_ffqr_w : ClangBuiltin<"__builtin_msa_ffqr_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_ffqr_d : GCCBuiltin<"__builtin_msa_ffqr_d">, +def int_mips_ffqr_d : ClangBuiltin<"__builtin_msa_ffqr_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_fill_b : GCCBuiltin<"__builtin_msa_fill_b">, +def int_mips_fill_b : ClangBuiltin<"__builtin_msa_fill_b">, Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem]>; -def int_mips_fill_h : GCCBuiltin<"__builtin_msa_fill_h">, +def int_mips_fill_h : ClangBuiltin<"__builtin_msa_fill_h">, Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem]>; -def int_mips_fill_w : GCCBuiltin<"__builtin_msa_fill_w">, +def int_mips_fill_w : ClangBuiltin<"__builtin_msa_fill_w">, Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem]>; -def int_mips_fill_d : GCCBuiltin<"__builtin_msa_fill_d">, +def int_mips_fill_d : ClangBuiltin<"__builtin_msa_fill_d">, Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty], [IntrNoMem]>; -def int_mips_flog2_w : GCCBuiltin<"__builtin_msa_flog2_w">, +def int_mips_flog2_w : ClangBuiltin<"__builtin_msa_flog2_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_flog2_d : GCCBuiltin<"__builtin_msa_flog2_d">, +def int_mips_flog2_d : ClangBuiltin<"__builtin_msa_flog2_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fmadd_w : GCCBuiltin<"__builtin_msa_fmadd_w">, +def int_mips_fmadd_w : ClangBuiltin<"__builtin_msa_fmadd_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fmadd_d : GCCBuiltin<"__builtin_msa_fmadd_d">, +def int_mips_fmadd_d : ClangBuiltin<"__builtin_msa_fmadd_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fmax_w : GCCBuiltin<"__builtin_msa_fmax_w">, +def int_mips_fmax_w : ClangBuiltin<"__builtin_msa_fmax_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fmax_d : GCCBuiltin<"__builtin_msa_fmax_d">, +def int_mips_fmax_d : ClangBuiltin<"__builtin_msa_fmax_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fmax_a_w : GCCBuiltin<"__builtin_msa_fmax_a_w">, +def int_mips_fmax_a_w : ClangBuiltin<"__builtin_msa_fmax_a_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fmax_a_d : GCCBuiltin<"__builtin_msa_fmax_a_d">, +def int_mips_fmax_a_d : ClangBuiltin<"__builtin_msa_fmax_a_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fmin_w : GCCBuiltin<"__builtin_msa_fmin_w">, +def int_mips_fmin_w : ClangBuiltin<"__builtin_msa_fmin_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fmin_d : GCCBuiltin<"__builtin_msa_fmin_d">, +def int_mips_fmin_d : ClangBuiltin<"__builtin_msa_fmin_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fmin_a_w : GCCBuiltin<"__builtin_msa_fmin_a_w">, +def int_mips_fmin_a_w : ClangBuiltin<"__builtin_msa_fmin_a_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fmin_a_d : GCCBuiltin<"__builtin_msa_fmin_a_d">, +def int_mips_fmin_a_d : ClangBuiltin<"__builtin_msa_fmin_a_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fmsub_w : GCCBuiltin<"__builtin_msa_fmsub_w">, +def int_mips_fmsub_w : ClangBuiltin<"__builtin_msa_fmsub_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fmsub_d : GCCBuiltin<"__builtin_msa_fmsub_d">, +def int_mips_fmsub_d : ClangBuiltin<"__builtin_msa_fmsub_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fmul_w : GCCBuiltin<"__builtin_msa_fmul_w">, +def int_mips_fmul_w : ClangBuiltin<"__builtin_msa_fmul_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fmul_d : GCCBuiltin<"__builtin_msa_fmul_d">, +def int_mips_fmul_d : ClangBuiltin<"__builtin_msa_fmul_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_frint_w : GCCBuiltin<"__builtin_msa_frint_w">, +def int_mips_frint_w : ClangBuiltin<"__builtin_msa_frint_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_frint_d : GCCBuiltin<"__builtin_msa_frint_d">, +def int_mips_frint_d : ClangBuiltin<"__builtin_msa_frint_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_frcp_w : GCCBuiltin<"__builtin_msa_frcp_w">, +def int_mips_frcp_w : ClangBuiltin<"__builtin_msa_frcp_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_frcp_d : GCCBuiltin<"__builtin_msa_frcp_d">, +def int_mips_frcp_d : ClangBuiltin<"__builtin_msa_frcp_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_frsqrt_w : GCCBuiltin<"__builtin_msa_frsqrt_w">, +def int_mips_frsqrt_w : ClangBuiltin<"__builtin_msa_frsqrt_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_frsqrt_d : GCCBuiltin<"__builtin_msa_frsqrt_d">, +def int_mips_frsqrt_d : ClangBuiltin<"__builtin_msa_frsqrt_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fsaf_w : GCCBuiltin<"__builtin_msa_fsaf_w">, +def int_mips_fsaf_w : ClangBuiltin<"__builtin_msa_fsaf_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fsaf_d : GCCBuiltin<"__builtin_msa_fsaf_d">, +def int_mips_fsaf_d : ClangBuiltin<"__builtin_msa_fsaf_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fseq_w : GCCBuiltin<"__builtin_msa_fseq_w">, +def int_mips_fseq_w : ClangBuiltin<"__builtin_msa_fseq_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fseq_d : GCCBuiltin<"__builtin_msa_fseq_d">, +def int_mips_fseq_d : ClangBuiltin<"__builtin_msa_fseq_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fsle_w : GCCBuiltin<"__builtin_msa_fsle_w">, +def int_mips_fsle_w : ClangBuiltin<"__builtin_msa_fsle_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fsle_d : GCCBuiltin<"__builtin_msa_fsle_d">, +def int_mips_fsle_d : ClangBuiltin<"__builtin_msa_fsle_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fslt_w : GCCBuiltin<"__builtin_msa_fslt_w">, +def int_mips_fslt_w : ClangBuiltin<"__builtin_msa_fslt_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fslt_d : GCCBuiltin<"__builtin_msa_fslt_d">, +def int_mips_fslt_d : ClangBuiltin<"__builtin_msa_fslt_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fsne_w : GCCBuiltin<"__builtin_msa_fsne_w">, +def int_mips_fsne_w : ClangBuiltin<"__builtin_msa_fsne_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fsne_d : GCCBuiltin<"__builtin_msa_fsne_d">, +def int_mips_fsne_d : ClangBuiltin<"__builtin_msa_fsne_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fsor_w : GCCBuiltin<"__builtin_msa_fsor_w">, +def int_mips_fsor_w : ClangBuiltin<"__builtin_msa_fsor_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fsor_d : GCCBuiltin<"__builtin_msa_fsor_d">, +def int_mips_fsor_d : ClangBuiltin<"__builtin_msa_fsor_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fsqrt_w : GCCBuiltin<"__builtin_msa_fsqrt_w">, +def int_mips_fsqrt_w : ClangBuiltin<"__builtin_msa_fsqrt_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fsqrt_d : GCCBuiltin<"__builtin_msa_fsqrt_d">, +def int_mips_fsqrt_d : ClangBuiltin<"__builtin_msa_fsqrt_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fsub_w : GCCBuiltin<"__builtin_msa_fsub_w">, +def int_mips_fsub_w : ClangBuiltin<"__builtin_msa_fsub_w">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fsub_d : GCCBuiltin<"__builtin_msa_fsub_d">, +def int_mips_fsub_d : ClangBuiltin<"__builtin_msa_fsub_d">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fsueq_w : GCCBuiltin<"__builtin_msa_fsueq_w">, +def int_mips_fsueq_w : ClangBuiltin<"__builtin_msa_fsueq_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fsueq_d : GCCBuiltin<"__builtin_msa_fsueq_d">, +def int_mips_fsueq_d : ClangBuiltin<"__builtin_msa_fsueq_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fsule_w : GCCBuiltin<"__builtin_msa_fsule_w">, +def int_mips_fsule_w : ClangBuiltin<"__builtin_msa_fsule_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fsule_d : GCCBuiltin<"__builtin_msa_fsule_d">, +def int_mips_fsule_d : ClangBuiltin<"__builtin_msa_fsule_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fsult_w : GCCBuiltin<"__builtin_msa_fsult_w">, +def int_mips_fsult_w : ClangBuiltin<"__builtin_msa_fsult_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fsult_d : GCCBuiltin<"__builtin_msa_fsult_d">, +def int_mips_fsult_d : ClangBuiltin<"__builtin_msa_fsult_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fsun_w : GCCBuiltin<"__builtin_msa_fsun_w">, +def int_mips_fsun_w : ClangBuiltin<"__builtin_msa_fsun_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fsun_d : GCCBuiltin<"__builtin_msa_fsun_d">, +def int_mips_fsun_d : ClangBuiltin<"__builtin_msa_fsun_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_fsune_w : GCCBuiltin<"__builtin_msa_fsune_w">, +def int_mips_fsune_w : ClangBuiltin<"__builtin_msa_fsune_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_fsune_d : GCCBuiltin<"__builtin_msa_fsune_d">, +def int_mips_fsune_d : ClangBuiltin<"__builtin_msa_fsune_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_ftint_s_w : GCCBuiltin<"__builtin_msa_ftint_s_w">, +def int_mips_ftint_s_w : ClangBuiltin<"__builtin_msa_ftint_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_ftint_s_d : GCCBuiltin<"__builtin_msa_ftint_s_d">, +def int_mips_ftint_s_d : ClangBuiltin<"__builtin_msa_ftint_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_ftint_u_w : GCCBuiltin<"__builtin_msa_ftint_u_w">, +def int_mips_ftint_u_w : ClangBuiltin<"__builtin_msa_ftint_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_ftint_u_d : GCCBuiltin<"__builtin_msa_ftint_u_d">, +def int_mips_ftint_u_d : ClangBuiltin<"__builtin_msa_ftint_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_ftq_h : GCCBuiltin<"__builtin_msa_ftq_h">, +def int_mips_ftq_h : ClangBuiltin<"__builtin_msa_ftq_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_ftq_w : GCCBuiltin<"__builtin_msa_ftq_w">, +def int_mips_ftq_w : ClangBuiltin<"__builtin_msa_ftq_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_ftrunc_s_w : GCCBuiltin<"__builtin_msa_ftrunc_s_w">, +def int_mips_ftrunc_s_w : ClangBuiltin<"__builtin_msa_ftrunc_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_ftrunc_s_d : GCCBuiltin<"__builtin_msa_ftrunc_s_d">, +def int_mips_ftrunc_s_d : ClangBuiltin<"__builtin_msa_ftrunc_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_ftrunc_u_w : GCCBuiltin<"__builtin_msa_ftrunc_u_w">, +def int_mips_ftrunc_u_w : ClangBuiltin<"__builtin_msa_ftrunc_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_mips_ftrunc_u_d : GCCBuiltin<"__builtin_msa_ftrunc_u_d">, +def int_mips_ftrunc_u_d : ClangBuiltin<"__builtin_msa_ftrunc_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; -def int_mips_hadd_s_h : GCCBuiltin<"__builtin_msa_hadd_s_h">, +def int_mips_hadd_s_h : ClangBuiltin<"__builtin_msa_hadd_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_hadd_s_w : GCCBuiltin<"__builtin_msa_hadd_s_w">, +def int_mips_hadd_s_w : ClangBuiltin<"__builtin_msa_hadd_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_hadd_s_d : GCCBuiltin<"__builtin_msa_hadd_s_d">, +def int_mips_hadd_s_d : ClangBuiltin<"__builtin_msa_hadd_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_hadd_u_h : GCCBuiltin<"__builtin_msa_hadd_u_h">, +def int_mips_hadd_u_h : ClangBuiltin<"__builtin_msa_hadd_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_hadd_u_w : GCCBuiltin<"__builtin_msa_hadd_u_w">, +def int_mips_hadd_u_w : ClangBuiltin<"__builtin_msa_hadd_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_hadd_u_d : GCCBuiltin<"__builtin_msa_hadd_u_d">, +def int_mips_hadd_u_d : ClangBuiltin<"__builtin_msa_hadd_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_hsub_s_h : GCCBuiltin<"__builtin_msa_hsub_s_h">, +def int_mips_hsub_s_h : ClangBuiltin<"__builtin_msa_hsub_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_hsub_s_w : GCCBuiltin<"__builtin_msa_hsub_s_w">, +def int_mips_hsub_s_w : ClangBuiltin<"__builtin_msa_hsub_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_hsub_s_d : GCCBuiltin<"__builtin_msa_hsub_s_d">, +def int_mips_hsub_s_d : ClangBuiltin<"__builtin_msa_hsub_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_hsub_u_h : GCCBuiltin<"__builtin_msa_hsub_u_h">, +def int_mips_hsub_u_h : ClangBuiltin<"__builtin_msa_hsub_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_hsub_u_w : GCCBuiltin<"__builtin_msa_hsub_u_w">, +def int_mips_hsub_u_w : ClangBuiltin<"__builtin_msa_hsub_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_hsub_u_d : GCCBuiltin<"__builtin_msa_hsub_u_d">, +def int_mips_hsub_u_d : ClangBuiltin<"__builtin_msa_hsub_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_ilvev_b : GCCBuiltin<"__builtin_msa_ilvev_b">, +def int_mips_ilvev_b : ClangBuiltin<"__builtin_msa_ilvev_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_ilvev_h : GCCBuiltin<"__builtin_msa_ilvev_h">, +def int_mips_ilvev_h : ClangBuiltin<"__builtin_msa_ilvev_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_ilvev_w : GCCBuiltin<"__builtin_msa_ilvev_w">, +def int_mips_ilvev_w : ClangBuiltin<"__builtin_msa_ilvev_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_ilvev_d : GCCBuiltin<"__builtin_msa_ilvev_d">, +def int_mips_ilvev_d : ClangBuiltin<"__builtin_msa_ilvev_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_ilvl_b : GCCBuiltin<"__builtin_msa_ilvl_b">, +def int_mips_ilvl_b : ClangBuiltin<"__builtin_msa_ilvl_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_ilvl_h : GCCBuiltin<"__builtin_msa_ilvl_h">, +def int_mips_ilvl_h : ClangBuiltin<"__builtin_msa_ilvl_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_ilvl_w : GCCBuiltin<"__builtin_msa_ilvl_w">, +def int_mips_ilvl_w : ClangBuiltin<"__builtin_msa_ilvl_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_ilvl_d : GCCBuiltin<"__builtin_msa_ilvl_d">, +def int_mips_ilvl_d : ClangBuiltin<"__builtin_msa_ilvl_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_ilvod_b : GCCBuiltin<"__builtin_msa_ilvod_b">, +def int_mips_ilvod_b : ClangBuiltin<"__builtin_msa_ilvod_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_ilvod_h : GCCBuiltin<"__builtin_msa_ilvod_h">, +def int_mips_ilvod_h : ClangBuiltin<"__builtin_msa_ilvod_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_ilvod_w : GCCBuiltin<"__builtin_msa_ilvod_w">, +def int_mips_ilvod_w : ClangBuiltin<"__builtin_msa_ilvod_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_ilvod_d : GCCBuiltin<"__builtin_msa_ilvod_d">, +def int_mips_ilvod_d : ClangBuiltin<"__builtin_msa_ilvod_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_ilvr_b : GCCBuiltin<"__builtin_msa_ilvr_b">, +def int_mips_ilvr_b : ClangBuiltin<"__builtin_msa_ilvr_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_ilvr_h : GCCBuiltin<"__builtin_msa_ilvr_h">, +def int_mips_ilvr_h : ClangBuiltin<"__builtin_msa_ilvr_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_ilvr_w : GCCBuiltin<"__builtin_msa_ilvr_w">, +def int_mips_ilvr_w : ClangBuiltin<"__builtin_msa_ilvr_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_ilvr_d : GCCBuiltin<"__builtin_msa_ilvr_d">, +def int_mips_ilvr_d : ClangBuiltin<"__builtin_msa_ilvr_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_insert_b : GCCBuiltin<"__builtin_msa_insert_b">, +def int_mips_insert_b : ClangBuiltin<"__builtin_msa_insert_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_insert_h : GCCBuiltin<"__builtin_msa_insert_h">, +def int_mips_insert_h : ClangBuiltin<"__builtin_msa_insert_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_insert_w : GCCBuiltin<"__builtin_msa_insert_w">, +def int_mips_insert_w : ClangBuiltin<"__builtin_msa_insert_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_insert_d : GCCBuiltin<"__builtin_msa_insert_d">, +def int_mips_insert_d : ClangBuiltin<"__builtin_msa_insert_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; -def int_mips_insve_b : GCCBuiltin<"__builtin_msa_insve_b">, +def int_mips_insve_b : ClangBuiltin<"__builtin_msa_insve_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty], [IntrNoMem, ImmArg>]>; -def int_mips_insve_h : GCCBuiltin<"__builtin_msa_insve_h">, +def int_mips_insve_h : ClangBuiltin<"__builtin_msa_insve_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty], [IntrNoMem, ImmArg>]>; -def int_mips_insve_w : GCCBuiltin<"__builtin_msa_insve_w">, +def int_mips_insve_w : ClangBuiltin<"__builtin_msa_insve_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_insve_d : GCCBuiltin<"__builtin_msa_insve_d">, +def int_mips_insve_d : ClangBuiltin<"__builtin_msa_insve_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty], [IntrNoMem, ImmArg>]>; -def int_mips_ld_b : GCCBuiltin<"__builtin_msa_ld_b">, +def int_mips_ld_b : ClangBuiltin<"__builtin_msa_ld_b">, Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; -def int_mips_ld_h : GCCBuiltin<"__builtin_msa_ld_h">, +def int_mips_ld_h : ClangBuiltin<"__builtin_msa_ld_h">, Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; -def int_mips_ld_w : GCCBuiltin<"__builtin_msa_ld_w">, +def int_mips_ld_w : ClangBuiltin<"__builtin_msa_ld_w">, Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; -def int_mips_ld_d : GCCBuiltin<"__builtin_msa_ld_d">, +def int_mips_ld_d : ClangBuiltin<"__builtin_msa_ld_d">, Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; -def int_mips_ldr_d : GCCBuiltin<"__builtin_msa_ldr_d">, +def int_mips_ldr_d : ClangBuiltin<"__builtin_msa_ldr_d">, Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; -def int_mips_ldr_w : GCCBuiltin<"__builtin_msa_ldr_w">, +def int_mips_ldr_w : ClangBuiltin<"__builtin_msa_ldr_w">, Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; -def int_mips_ldi_b : GCCBuiltin<"__builtin_msa_ldi_b">, +def int_mips_ldi_b : ClangBuiltin<"__builtin_msa_ldi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_ldi_h : GCCBuiltin<"__builtin_msa_ldi_h">, +def int_mips_ldi_h : ClangBuiltin<"__builtin_msa_ldi_h">, Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_ldi_w : GCCBuiltin<"__builtin_msa_ldi_w">, +def int_mips_ldi_w : ClangBuiltin<"__builtin_msa_ldi_w">, Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_ldi_d : GCCBuiltin<"__builtin_msa_ldi_d">, +def int_mips_ldi_d : ClangBuiltin<"__builtin_msa_ldi_d">, Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem, ImmArg>]>; // This instruction is part of the MSA spec but it does not share the // __builtin_msa prefix because it operates on the GPR registers. -def int_mips_lsa : GCCBuiltin<"__builtin_mips_lsa">, +def int_mips_lsa : ClangBuiltin<"__builtin_mips_lsa">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_madd_q_h : GCCBuiltin<"__builtin_msa_madd_q_h">, +def int_mips_madd_q_h : ClangBuiltin<"__builtin_msa_madd_q_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_madd_q_w : GCCBuiltin<"__builtin_msa_madd_q_w">, +def int_mips_madd_q_w : ClangBuiltin<"__builtin_msa_madd_q_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_maddr_q_h : GCCBuiltin<"__builtin_msa_maddr_q_h">, +def int_mips_maddr_q_h : ClangBuiltin<"__builtin_msa_maddr_q_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_maddr_q_w : GCCBuiltin<"__builtin_msa_maddr_q_w">, +def int_mips_maddr_q_w : ClangBuiltin<"__builtin_msa_maddr_q_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_maddv_b : GCCBuiltin<"__builtin_msa_maddv_b">, +def int_mips_maddv_b : ClangBuiltin<"__builtin_msa_maddv_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_maddv_h : GCCBuiltin<"__builtin_msa_maddv_h">, +def int_mips_maddv_h : ClangBuiltin<"__builtin_msa_maddv_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_maddv_w : GCCBuiltin<"__builtin_msa_maddv_w">, +def int_mips_maddv_w : ClangBuiltin<"__builtin_msa_maddv_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_maddv_d : GCCBuiltin<"__builtin_msa_maddv_d">, +def int_mips_maddv_d : ClangBuiltin<"__builtin_msa_maddv_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_max_a_b : GCCBuiltin<"__builtin_msa_max_a_b">, +def int_mips_max_a_b : ClangBuiltin<"__builtin_msa_max_a_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_max_a_h : GCCBuiltin<"__builtin_msa_max_a_h">, +def int_mips_max_a_h : ClangBuiltin<"__builtin_msa_max_a_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_max_a_w : GCCBuiltin<"__builtin_msa_max_a_w">, +def int_mips_max_a_w : ClangBuiltin<"__builtin_msa_max_a_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_max_a_d : GCCBuiltin<"__builtin_msa_max_a_d">, +def int_mips_max_a_d : ClangBuiltin<"__builtin_msa_max_a_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_max_s_b : GCCBuiltin<"__builtin_msa_max_s_b">, +def int_mips_max_s_b : ClangBuiltin<"__builtin_msa_max_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_max_s_h : GCCBuiltin<"__builtin_msa_max_s_h">, +def int_mips_max_s_h : ClangBuiltin<"__builtin_msa_max_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_max_s_w : GCCBuiltin<"__builtin_msa_max_s_w">, +def int_mips_max_s_w : ClangBuiltin<"__builtin_msa_max_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_max_s_d : GCCBuiltin<"__builtin_msa_max_s_d">, +def int_mips_max_s_d : ClangBuiltin<"__builtin_msa_max_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_max_u_b : GCCBuiltin<"__builtin_msa_max_u_b">, +def int_mips_max_u_b : ClangBuiltin<"__builtin_msa_max_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_max_u_h : GCCBuiltin<"__builtin_msa_max_u_h">, +def int_mips_max_u_h : ClangBuiltin<"__builtin_msa_max_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_max_u_w : GCCBuiltin<"__builtin_msa_max_u_w">, +def int_mips_max_u_w : ClangBuiltin<"__builtin_msa_max_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_max_u_d : GCCBuiltin<"__builtin_msa_max_u_d">, +def int_mips_max_u_d : ClangBuiltin<"__builtin_msa_max_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_maxi_s_b : GCCBuiltin<"__builtin_msa_maxi_s_b">, +def int_mips_maxi_s_b : ClangBuiltin<"__builtin_msa_maxi_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_maxi_s_h : GCCBuiltin<"__builtin_msa_maxi_s_h">, +def int_mips_maxi_s_h : ClangBuiltin<"__builtin_msa_maxi_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_maxi_s_w : GCCBuiltin<"__builtin_msa_maxi_s_w">, +def int_mips_maxi_s_w : ClangBuiltin<"__builtin_msa_maxi_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_maxi_s_d : GCCBuiltin<"__builtin_msa_maxi_s_d">, +def int_mips_maxi_s_d : ClangBuiltin<"__builtin_msa_maxi_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_maxi_u_b : GCCBuiltin<"__builtin_msa_maxi_u_b">, +def int_mips_maxi_u_b : ClangBuiltin<"__builtin_msa_maxi_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_maxi_u_h : GCCBuiltin<"__builtin_msa_maxi_u_h">, +def int_mips_maxi_u_h : ClangBuiltin<"__builtin_msa_maxi_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_maxi_u_w : GCCBuiltin<"__builtin_msa_maxi_u_w">, +def int_mips_maxi_u_w : ClangBuiltin<"__builtin_msa_maxi_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_maxi_u_d : GCCBuiltin<"__builtin_msa_maxi_u_d">, +def int_mips_maxi_u_d : ClangBuiltin<"__builtin_msa_maxi_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_min_a_b : GCCBuiltin<"__builtin_msa_min_a_b">, +def int_mips_min_a_b : ClangBuiltin<"__builtin_msa_min_a_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_min_a_h : GCCBuiltin<"__builtin_msa_min_a_h">, +def int_mips_min_a_h : ClangBuiltin<"__builtin_msa_min_a_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_min_a_w : GCCBuiltin<"__builtin_msa_min_a_w">, +def int_mips_min_a_w : ClangBuiltin<"__builtin_msa_min_a_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_min_a_d : GCCBuiltin<"__builtin_msa_min_a_d">, +def int_mips_min_a_d : ClangBuiltin<"__builtin_msa_min_a_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_min_s_b : GCCBuiltin<"__builtin_msa_min_s_b">, +def int_mips_min_s_b : ClangBuiltin<"__builtin_msa_min_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_min_s_h : GCCBuiltin<"__builtin_msa_min_s_h">, +def int_mips_min_s_h : ClangBuiltin<"__builtin_msa_min_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_min_s_w : GCCBuiltin<"__builtin_msa_min_s_w">, +def int_mips_min_s_w : ClangBuiltin<"__builtin_msa_min_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_min_s_d : GCCBuiltin<"__builtin_msa_min_s_d">, +def int_mips_min_s_d : ClangBuiltin<"__builtin_msa_min_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_min_u_b : GCCBuiltin<"__builtin_msa_min_u_b">, +def int_mips_min_u_b : ClangBuiltin<"__builtin_msa_min_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_min_u_h : GCCBuiltin<"__builtin_msa_min_u_h">, +def int_mips_min_u_h : ClangBuiltin<"__builtin_msa_min_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_min_u_w : GCCBuiltin<"__builtin_msa_min_u_w">, +def int_mips_min_u_w : ClangBuiltin<"__builtin_msa_min_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_min_u_d : GCCBuiltin<"__builtin_msa_min_u_d">, +def int_mips_min_u_d : ClangBuiltin<"__builtin_msa_min_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_mini_s_b : GCCBuiltin<"__builtin_msa_mini_s_b">, +def int_mips_mini_s_b : ClangBuiltin<"__builtin_msa_mini_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_mini_s_h : GCCBuiltin<"__builtin_msa_mini_s_h">, +def int_mips_mini_s_h : ClangBuiltin<"__builtin_msa_mini_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_mini_s_w : GCCBuiltin<"__builtin_msa_mini_s_w">, +def int_mips_mini_s_w : ClangBuiltin<"__builtin_msa_mini_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_mini_s_d : GCCBuiltin<"__builtin_msa_mini_s_d">, +def int_mips_mini_s_d : ClangBuiltin<"__builtin_msa_mini_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_mini_u_b : GCCBuiltin<"__builtin_msa_mini_u_b">, +def int_mips_mini_u_b : ClangBuiltin<"__builtin_msa_mini_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_mini_u_h : GCCBuiltin<"__builtin_msa_mini_u_h">, +def int_mips_mini_u_h : ClangBuiltin<"__builtin_msa_mini_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_mini_u_w : GCCBuiltin<"__builtin_msa_mini_u_w">, +def int_mips_mini_u_w : ClangBuiltin<"__builtin_msa_mini_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_mini_u_d : GCCBuiltin<"__builtin_msa_mini_u_d">, +def int_mips_mini_u_d : ClangBuiltin<"__builtin_msa_mini_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_mod_s_b : GCCBuiltin<"__builtin_msa_mod_s_b">, +def int_mips_mod_s_b : ClangBuiltin<"__builtin_msa_mod_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_mod_s_h : GCCBuiltin<"__builtin_msa_mod_s_h">, +def int_mips_mod_s_h : ClangBuiltin<"__builtin_msa_mod_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_mod_s_w : GCCBuiltin<"__builtin_msa_mod_s_w">, +def int_mips_mod_s_w : ClangBuiltin<"__builtin_msa_mod_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_mod_s_d : GCCBuiltin<"__builtin_msa_mod_s_d">, +def int_mips_mod_s_d : ClangBuiltin<"__builtin_msa_mod_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_mod_u_b : GCCBuiltin<"__builtin_msa_mod_u_b">, +def int_mips_mod_u_b : ClangBuiltin<"__builtin_msa_mod_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_mod_u_h : GCCBuiltin<"__builtin_msa_mod_u_h">, +def int_mips_mod_u_h : ClangBuiltin<"__builtin_msa_mod_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_mod_u_w : GCCBuiltin<"__builtin_msa_mod_u_w">, +def int_mips_mod_u_w : ClangBuiltin<"__builtin_msa_mod_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_mod_u_d : GCCBuiltin<"__builtin_msa_mod_u_d">, +def int_mips_mod_u_d : ClangBuiltin<"__builtin_msa_mod_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_move_v : GCCBuiltin<"__builtin_msa_move_v">, +def int_mips_move_v : ClangBuiltin<"__builtin_msa_move_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_msub_q_h : GCCBuiltin<"__builtin_msa_msub_q_h">, +def int_mips_msub_q_h : ClangBuiltin<"__builtin_msa_msub_q_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_msub_q_w : GCCBuiltin<"__builtin_msa_msub_q_w">, +def int_mips_msub_q_w : ClangBuiltin<"__builtin_msa_msub_q_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_msubr_q_h : GCCBuiltin<"__builtin_msa_msubr_q_h">, +def int_mips_msubr_q_h : ClangBuiltin<"__builtin_msa_msubr_q_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_msubr_q_w : GCCBuiltin<"__builtin_msa_msubr_q_w">, +def int_mips_msubr_q_w : ClangBuiltin<"__builtin_msa_msubr_q_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_msubv_b : GCCBuiltin<"__builtin_msa_msubv_b">, +def int_mips_msubv_b : ClangBuiltin<"__builtin_msa_msubv_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_msubv_h : GCCBuiltin<"__builtin_msa_msubv_h">, +def int_mips_msubv_h : ClangBuiltin<"__builtin_msa_msubv_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_msubv_w : GCCBuiltin<"__builtin_msa_msubv_w">, +def int_mips_msubv_w : ClangBuiltin<"__builtin_msa_msubv_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_msubv_d : GCCBuiltin<"__builtin_msa_msubv_d">, +def int_mips_msubv_d : ClangBuiltin<"__builtin_msa_msubv_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_mul_q_h : GCCBuiltin<"__builtin_msa_mul_q_h">, +def int_mips_mul_q_h : ClangBuiltin<"__builtin_msa_mul_q_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_mul_q_w : GCCBuiltin<"__builtin_msa_mul_q_w">, +def int_mips_mul_q_w : ClangBuiltin<"__builtin_msa_mul_q_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_mulr_q_h : GCCBuiltin<"__builtin_msa_mulr_q_h">, +def int_mips_mulr_q_h : ClangBuiltin<"__builtin_msa_mulr_q_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_mulr_q_w : GCCBuiltin<"__builtin_msa_mulr_q_w">, +def int_mips_mulr_q_w : ClangBuiltin<"__builtin_msa_mulr_q_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_mulv_b : GCCBuiltin<"__builtin_msa_mulv_b">, +def int_mips_mulv_b : ClangBuiltin<"__builtin_msa_mulv_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_mulv_h : GCCBuiltin<"__builtin_msa_mulv_h">, +def int_mips_mulv_h : ClangBuiltin<"__builtin_msa_mulv_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_mulv_w : GCCBuiltin<"__builtin_msa_mulv_w">, +def int_mips_mulv_w : ClangBuiltin<"__builtin_msa_mulv_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_mulv_d : GCCBuiltin<"__builtin_msa_mulv_d">, +def int_mips_mulv_d : ClangBuiltin<"__builtin_msa_mulv_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_nloc_b : GCCBuiltin<"__builtin_msa_nloc_b">, +def int_mips_nloc_b : ClangBuiltin<"__builtin_msa_nloc_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_nloc_h : GCCBuiltin<"__builtin_msa_nloc_h">, +def int_mips_nloc_h : ClangBuiltin<"__builtin_msa_nloc_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_nloc_w : GCCBuiltin<"__builtin_msa_nloc_w">, +def int_mips_nloc_w : ClangBuiltin<"__builtin_msa_nloc_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_nloc_d : GCCBuiltin<"__builtin_msa_nloc_d">, +def int_mips_nloc_d : ClangBuiltin<"__builtin_msa_nloc_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_nlzc_b : GCCBuiltin<"__builtin_msa_nlzc_b">, +def int_mips_nlzc_b : ClangBuiltin<"__builtin_msa_nlzc_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_nlzc_h : GCCBuiltin<"__builtin_msa_nlzc_h">, +def int_mips_nlzc_h : ClangBuiltin<"__builtin_msa_nlzc_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_nlzc_w : GCCBuiltin<"__builtin_msa_nlzc_w">, +def int_mips_nlzc_w : ClangBuiltin<"__builtin_msa_nlzc_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_nlzc_d : GCCBuiltin<"__builtin_msa_nlzc_d">, +def int_mips_nlzc_d : ClangBuiltin<"__builtin_msa_nlzc_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_nor_v : GCCBuiltin<"__builtin_msa_nor_v">, +def int_mips_nor_v : ClangBuiltin<"__builtin_msa_nor_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_nori_b : GCCBuiltin<"__builtin_msa_nori_b">, +def int_mips_nori_b : ClangBuiltin<"__builtin_msa_nori_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_or_v : GCCBuiltin<"__builtin_msa_or_v">, +def int_mips_or_v : ClangBuiltin<"__builtin_msa_or_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_ori_b : GCCBuiltin<"__builtin_msa_ori_b">, +def int_mips_ori_b : ClangBuiltin<"__builtin_msa_ori_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_pckev_b : GCCBuiltin<"__builtin_msa_pckev_b">, +def int_mips_pckev_b : ClangBuiltin<"__builtin_msa_pckev_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_pckev_h : GCCBuiltin<"__builtin_msa_pckev_h">, +def int_mips_pckev_h : ClangBuiltin<"__builtin_msa_pckev_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_pckev_w : GCCBuiltin<"__builtin_msa_pckev_w">, +def int_mips_pckev_w : ClangBuiltin<"__builtin_msa_pckev_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_pckev_d : GCCBuiltin<"__builtin_msa_pckev_d">, +def int_mips_pckev_d : ClangBuiltin<"__builtin_msa_pckev_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_pckod_b : GCCBuiltin<"__builtin_msa_pckod_b">, +def int_mips_pckod_b : ClangBuiltin<"__builtin_msa_pckod_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_pckod_h : GCCBuiltin<"__builtin_msa_pckod_h">, +def int_mips_pckod_h : ClangBuiltin<"__builtin_msa_pckod_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_pckod_w : GCCBuiltin<"__builtin_msa_pckod_w">, +def int_mips_pckod_w : ClangBuiltin<"__builtin_msa_pckod_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_pckod_d : GCCBuiltin<"__builtin_msa_pckod_d">, +def int_mips_pckod_d : ClangBuiltin<"__builtin_msa_pckod_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_pcnt_b : GCCBuiltin<"__builtin_msa_pcnt_b">, +def int_mips_pcnt_b : ClangBuiltin<"__builtin_msa_pcnt_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_pcnt_h : GCCBuiltin<"__builtin_msa_pcnt_h">, +def int_mips_pcnt_h : ClangBuiltin<"__builtin_msa_pcnt_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_pcnt_w : GCCBuiltin<"__builtin_msa_pcnt_w">, +def int_mips_pcnt_w : ClangBuiltin<"__builtin_msa_pcnt_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_pcnt_d : GCCBuiltin<"__builtin_msa_pcnt_d">, +def int_mips_pcnt_d : ClangBuiltin<"__builtin_msa_pcnt_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_sat_s_b : GCCBuiltin<"__builtin_msa_sat_s_b">, +def int_mips_sat_s_b : ClangBuiltin<"__builtin_msa_sat_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_sat_s_h : GCCBuiltin<"__builtin_msa_sat_s_h">, +def int_mips_sat_s_h : ClangBuiltin<"__builtin_msa_sat_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_sat_s_w : GCCBuiltin<"__builtin_msa_sat_s_w">, +def int_mips_sat_s_w : ClangBuiltin<"__builtin_msa_sat_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_sat_s_d : GCCBuiltin<"__builtin_msa_sat_s_d">, +def int_mips_sat_s_d : ClangBuiltin<"__builtin_msa_sat_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_sat_u_b : GCCBuiltin<"__builtin_msa_sat_u_b">, +def int_mips_sat_u_b : ClangBuiltin<"__builtin_msa_sat_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_sat_u_h : GCCBuiltin<"__builtin_msa_sat_u_h">, +def int_mips_sat_u_h : ClangBuiltin<"__builtin_msa_sat_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_sat_u_w : GCCBuiltin<"__builtin_msa_sat_u_w">, +def int_mips_sat_u_w : ClangBuiltin<"__builtin_msa_sat_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_sat_u_d : GCCBuiltin<"__builtin_msa_sat_u_d">, +def int_mips_sat_u_d : ClangBuiltin<"__builtin_msa_sat_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_shf_b : GCCBuiltin<"__builtin_msa_shf_b">, +def int_mips_shf_b : ClangBuiltin<"__builtin_msa_shf_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_shf_h : GCCBuiltin<"__builtin_msa_shf_h">, +def int_mips_shf_h : ClangBuiltin<"__builtin_msa_shf_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_shf_w : GCCBuiltin<"__builtin_msa_shf_w">, +def int_mips_shf_w : ClangBuiltin<"__builtin_msa_shf_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_sld_b : GCCBuiltin<"__builtin_msa_sld_b">, +def int_mips_sld_b : ClangBuiltin<"__builtin_msa_sld_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_sld_h : GCCBuiltin<"__builtin_msa_sld_h">, +def int_mips_sld_h : ClangBuiltin<"__builtin_msa_sld_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_sld_w : GCCBuiltin<"__builtin_msa_sld_w">, +def int_mips_sld_w : ClangBuiltin<"__builtin_msa_sld_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_sld_d : GCCBuiltin<"__builtin_msa_sld_d">, +def int_mips_sld_d : ClangBuiltin<"__builtin_msa_sld_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_sldi_b : GCCBuiltin<"__builtin_msa_sldi_b">, +def int_mips_sldi_b : ClangBuiltin<"__builtin_msa_sldi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_sldi_h : GCCBuiltin<"__builtin_msa_sldi_h">, +def int_mips_sldi_h : ClangBuiltin<"__builtin_msa_sldi_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_sldi_w : GCCBuiltin<"__builtin_msa_sldi_w">, +def int_mips_sldi_w : ClangBuiltin<"__builtin_msa_sldi_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_sldi_d : GCCBuiltin<"__builtin_msa_sldi_d">, +def int_mips_sldi_d : ClangBuiltin<"__builtin_msa_sldi_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_sll_b : GCCBuiltin<"__builtin_msa_sll_b">, +def int_mips_sll_b : ClangBuiltin<"__builtin_msa_sll_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_sll_h : GCCBuiltin<"__builtin_msa_sll_h">, +def int_mips_sll_h : ClangBuiltin<"__builtin_msa_sll_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_sll_w : GCCBuiltin<"__builtin_msa_sll_w">, +def int_mips_sll_w : ClangBuiltin<"__builtin_msa_sll_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_sll_d : GCCBuiltin<"__builtin_msa_sll_d">, +def int_mips_sll_d : ClangBuiltin<"__builtin_msa_sll_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_slli_b : GCCBuiltin<"__builtin_msa_slli_b">, +def int_mips_slli_b : ClangBuiltin<"__builtin_msa_slli_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_slli_h : GCCBuiltin<"__builtin_msa_slli_h">, +def int_mips_slli_h : ClangBuiltin<"__builtin_msa_slli_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_slli_w : GCCBuiltin<"__builtin_msa_slli_w">, +def int_mips_slli_w : ClangBuiltin<"__builtin_msa_slli_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_slli_d : GCCBuiltin<"__builtin_msa_slli_d">, +def int_mips_slli_d : ClangBuiltin<"__builtin_msa_slli_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_splat_b : GCCBuiltin<"__builtin_msa_splat_b">, +def int_mips_splat_b : ClangBuiltin<"__builtin_msa_splat_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_splat_h : GCCBuiltin<"__builtin_msa_splat_h">, +def int_mips_splat_h : ClangBuiltin<"__builtin_msa_splat_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_splat_w : GCCBuiltin<"__builtin_msa_splat_w">, +def int_mips_splat_w : ClangBuiltin<"__builtin_msa_splat_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_splat_d : GCCBuiltin<"__builtin_msa_splat_d">, +def int_mips_splat_d : ClangBuiltin<"__builtin_msa_splat_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_splati_b : GCCBuiltin<"__builtin_msa_splati_b">, +def int_mips_splati_b : ClangBuiltin<"__builtin_msa_splati_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_splati_h : GCCBuiltin<"__builtin_msa_splati_h">, +def int_mips_splati_h : ClangBuiltin<"__builtin_msa_splati_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_splati_w : GCCBuiltin<"__builtin_msa_splati_w">, +def int_mips_splati_w : ClangBuiltin<"__builtin_msa_splati_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_splati_d : GCCBuiltin<"__builtin_msa_splati_d">, +def int_mips_splati_d : ClangBuiltin<"__builtin_msa_splati_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_sra_b : GCCBuiltin<"__builtin_msa_sra_b">, +def int_mips_sra_b : ClangBuiltin<"__builtin_msa_sra_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_sra_h : GCCBuiltin<"__builtin_msa_sra_h">, +def int_mips_sra_h : ClangBuiltin<"__builtin_msa_sra_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_sra_w : GCCBuiltin<"__builtin_msa_sra_w">, +def int_mips_sra_w : ClangBuiltin<"__builtin_msa_sra_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_sra_d : GCCBuiltin<"__builtin_msa_sra_d">, +def int_mips_sra_d : ClangBuiltin<"__builtin_msa_sra_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_srai_b : GCCBuiltin<"__builtin_msa_srai_b">, +def int_mips_srai_b : ClangBuiltin<"__builtin_msa_srai_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srai_h : GCCBuiltin<"__builtin_msa_srai_h">, +def int_mips_srai_h : ClangBuiltin<"__builtin_msa_srai_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srai_w : GCCBuiltin<"__builtin_msa_srai_w">, +def int_mips_srai_w : ClangBuiltin<"__builtin_msa_srai_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srai_d : GCCBuiltin<"__builtin_msa_srai_d">, +def int_mips_srai_d : ClangBuiltin<"__builtin_msa_srai_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srar_b : GCCBuiltin<"__builtin_msa_srar_b">, +def int_mips_srar_b : ClangBuiltin<"__builtin_msa_srar_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_srar_h : GCCBuiltin<"__builtin_msa_srar_h">, +def int_mips_srar_h : ClangBuiltin<"__builtin_msa_srar_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_srar_w : GCCBuiltin<"__builtin_msa_srar_w">, +def int_mips_srar_w : ClangBuiltin<"__builtin_msa_srar_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_srar_d : GCCBuiltin<"__builtin_msa_srar_d">, +def int_mips_srar_d : ClangBuiltin<"__builtin_msa_srar_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_srari_b : GCCBuiltin<"__builtin_msa_srari_b">, +def int_mips_srari_b : ClangBuiltin<"__builtin_msa_srari_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srari_h : GCCBuiltin<"__builtin_msa_srari_h">, +def int_mips_srari_h : ClangBuiltin<"__builtin_msa_srari_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srari_w : GCCBuiltin<"__builtin_msa_srari_w">, +def int_mips_srari_w : ClangBuiltin<"__builtin_msa_srari_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srari_d : GCCBuiltin<"__builtin_msa_srari_d">, +def int_mips_srari_d : ClangBuiltin<"__builtin_msa_srari_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srl_b : GCCBuiltin<"__builtin_msa_srl_b">, +def int_mips_srl_b : ClangBuiltin<"__builtin_msa_srl_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_srl_h : GCCBuiltin<"__builtin_msa_srl_h">, +def int_mips_srl_h : ClangBuiltin<"__builtin_msa_srl_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_srl_w : GCCBuiltin<"__builtin_msa_srl_w">, +def int_mips_srl_w : ClangBuiltin<"__builtin_msa_srl_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_srl_d : GCCBuiltin<"__builtin_msa_srl_d">, +def int_mips_srl_d : ClangBuiltin<"__builtin_msa_srl_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_srli_b : GCCBuiltin<"__builtin_msa_srli_b">, +def int_mips_srli_b : ClangBuiltin<"__builtin_msa_srli_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srli_h : GCCBuiltin<"__builtin_msa_srli_h">, +def int_mips_srli_h : ClangBuiltin<"__builtin_msa_srli_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srli_w : GCCBuiltin<"__builtin_msa_srli_w">, +def int_mips_srli_w : ClangBuiltin<"__builtin_msa_srli_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srli_d : GCCBuiltin<"__builtin_msa_srli_d">, +def int_mips_srli_d : ClangBuiltin<"__builtin_msa_srli_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srlr_b : GCCBuiltin<"__builtin_msa_srlr_b">, +def int_mips_srlr_b : ClangBuiltin<"__builtin_msa_srlr_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_srlr_h : GCCBuiltin<"__builtin_msa_srlr_h">, +def int_mips_srlr_h : ClangBuiltin<"__builtin_msa_srlr_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_srlr_w : GCCBuiltin<"__builtin_msa_srlr_w">, +def int_mips_srlr_w : ClangBuiltin<"__builtin_msa_srlr_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_srlr_d : GCCBuiltin<"__builtin_msa_srlr_d">, +def int_mips_srlr_d : ClangBuiltin<"__builtin_msa_srlr_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_srlri_b : GCCBuiltin<"__builtin_msa_srlri_b">, +def int_mips_srlri_b : ClangBuiltin<"__builtin_msa_srlri_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srlri_h : GCCBuiltin<"__builtin_msa_srlri_h">, +def int_mips_srlri_h : ClangBuiltin<"__builtin_msa_srlri_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srlri_w : GCCBuiltin<"__builtin_msa_srlri_w">, +def int_mips_srlri_w : ClangBuiltin<"__builtin_msa_srlri_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_srlri_d : GCCBuiltin<"__builtin_msa_srlri_d">, +def int_mips_srlri_d : ClangBuiltin<"__builtin_msa_srlri_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_st_b : GCCBuiltin<"__builtin_msa_st_b">, +def int_mips_st_b : ClangBuiltin<"__builtin_msa_st_b">, Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; -def int_mips_st_h : GCCBuiltin<"__builtin_msa_st_h">, +def int_mips_st_h : ClangBuiltin<"__builtin_msa_st_h">, Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; -def int_mips_st_w : GCCBuiltin<"__builtin_msa_st_w">, +def int_mips_st_w : ClangBuiltin<"__builtin_msa_st_w">, Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; -def int_mips_st_d : GCCBuiltin<"__builtin_msa_st_d">, +def int_mips_st_d : ClangBuiltin<"__builtin_msa_st_d">, Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; -def int_mips_str_d : GCCBuiltin<"__builtin_msa_str_d">, +def int_mips_str_d : ClangBuiltin<"__builtin_msa_str_d">, Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; -def int_mips_str_w : GCCBuiltin<"__builtin_msa_str_w">, +def int_mips_str_w : ClangBuiltin<"__builtin_msa_str_w">, Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; -def int_mips_subs_s_b : GCCBuiltin<"__builtin_msa_subs_s_b">, +def int_mips_subs_s_b : ClangBuiltin<"__builtin_msa_subs_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_subs_s_h : GCCBuiltin<"__builtin_msa_subs_s_h">, +def int_mips_subs_s_h : ClangBuiltin<"__builtin_msa_subs_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_subs_s_w : GCCBuiltin<"__builtin_msa_subs_s_w">, +def int_mips_subs_s_w : ClangBuiltin<"__builtin_msa_subs_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_subs_s_d : GCCBuiltin<"__builtin_msa_subs_s_d">, +def int_mips_subs_s_d : ClangBuiltin<"__builtin_msa_subs_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_subs_u_b : GCCBuiltin<"__builtin_msa_subs_u_b">, +def int_mips_subs_u_b : ClangBuiltin<"__builtin_msa_subs_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_subs_u_h : GCCBuiltin<"__builtin_msa_subs_u_h">, +def int_mips_subs_u_h : ClangBuiltin<"__builtin_msa_subs_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_subs_u_w : GCCBuiltin<"__builtin_msa_subs_u_w">, +def int_mips_subs_u_w : ClangBuiltin<"__builtin_msa_subs_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_subs_u_d : GCCBuiltin<"__builtin_msa_subs_u_d">, +def int_mips_subs_u_d : ClangBuiltin<"__builtin_msa_subs_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_subsus_u_b : GCCBuiltin<"__builtin_msa_subsus_u_b">, +def int_mips_subsus_u_b : ClangBuiltin<"__builtin_msa_subsus_u_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_subsus_u_h : GCCBuiltin<"__builtin_msa_subsus_u_h">, +def int_mips_subsus_u_h : ClangBuiltin<"__builtin_msa_subsus_u_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_subsus_u_w : GCCBuiltin<"__builtin_msa_subsus_u_w">, +def int_mips_subsus_u_w : ClangBuiltin<"__builtin_msa_subsus_u_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_subsus_u_d : GCCBuiltin<"__builtin_msa_subsus_u_d">, +def int_mips_subsus_u_d : ClangBuiltin<"__builtin_msa_subsus_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_subsuu_s_b : GCCBuiltin<"__builtin_msa_subsuu_s_b">, +def int_mips_subsuu_s_b : ClangBuiltin<"__builtin_msa_subsuu_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_subsuu_s_h : GCCBuiltin<"__builtin_msa_subsuu_s_h">, +def int_mips_subsuu_s_h : ClangBuiltin<"__builtin_msa_subsuu_s_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_subsuu_s_w : GCCBuiltin<"__builtin_msa_subsuu_s_w">, +def int_mips_subsuu_s_w : ClangBuiltin<"__builtin_msa_subsuu_s_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_subsuu_s_d : GCCBuiltin<"__builtin_msa_subsuu_s_d">, +def int_mips_subsuu_s_d : ClangBuiltin<"__builtin_msa_subsuu_s_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_subv_b : GCCBuiltin<"__builtin_msa_subv_b">, +def int_mips_subv_b : ClangBuiltin<"__builtin_msa_subv_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_subv_h : GCCBuiltin<"__builtin_msa_subv_h">, +def int_mips_subv_h : ClangBuiltin<"__builtin_msa_subv_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_subv_w : GCCBuiltin<"__builtin_msa_subv_w">, +def int_mips_subv_w : ClangBuiltin<"__builtin_msa_subv_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_subv_d : GCCBuiltin<"__builtin_msa_subv_d">, +def int_mips_subv_d : ClangBuiltin<"__builtin_msa_subv_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_subvi_b : GCCBuiltin<"__builtin_msa_subvi_b">, +def int_mips_subvi_b : ClangBuiltin<"__builtin_msa_subvi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_subvi_h : GCCBuiltin<"__builtin_msa_subvi_h">, +def int_mips_subvi_h : ClangBuiltin<"__builtin_msa_subvi_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_subvi_w : GCCBuiltin<"__builtin_msa_subvi_w">, +def int_mips_subvi_w : ClangBuiltin<"__builtin_msa_subvi_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_subvi_d : GCCBuiltin<"__builtin_msa_subvi_d">, +def int_mips_subvi_d : ClangBuiltin<"__builtin_msa_subvi_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; -def int_mips_vshf_b : GCCBuiltin<"__builtin_msa_vshf_b">, +def int_mips_vshf_b : ClangBuiltin<"__builtin_msa_vshf_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_vshf_h : GCCBuiltin<"__builtin_msa_vshf_h">, +def int_mips_vshf_h : ClangBuiltin<"__builtin_msa_vshf_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; -def int_mips_vshf_w : GCCBuiltin<"__builtin_msa_vshf_w">, +def int_mips_vshf_w : ClangBuiltin<"__builtin_msa_vshf_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_mips_vshf_d : GCCBuiltin<"__builtin_msa_vshf_d">, +def int_mips_vshf_d : ClangBuiltin<"__builtin_msa_vshf_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; -def int_mips_xor_v : GCCBuiltin<"__builtin_msa_xor_v">, +def int_mips_xor_v : ClangBuiltin<"__builtin_msa_xor_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_mips_xori_b : GCCBuiltin<"__builtin_msa_xori_b">, +def int_mips_xori_b : ClangBuiltin<"__builtin_msa_xori_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 41b28db56c75..9c3813128364 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -556,95 +556,124 @@ class SHFL_INFO { } let TargetPrefix = "nvvm" in { - def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">, + def int_nvvm_prmt : ClangBuiltin<"__nvvm_prmt">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, Commutative]>; + [IntrNoMem, IntrSpeculatable]>; // // Min Max // - def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_fmin_ftz_f : GCCBuiltin<"__nvvm_fmin_ftz_f">, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + foreach operation = ["min", "max"] in { + def int_nvvm_f # operation # _d : + ClangBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_fmax_f : GCCBuiltin<"__nvvm_fmax_f">, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty] - , [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_fmax_ftz_f : GCCBuiltin<"__nvvm_fmax_ftz_f">, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; + foreach variant = ["_f", "_ftz_f", "_nan_f", "_ftz_nan_f", + "_xorsign_abs_f", "_ftz_xorsign_abs_f", "_nan_xorsign_abs_f", + "_ftz_nan_xorsign_abs_f"] in { + def int_nvvm_f # operation # variant : + ClangBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; + } - def int_nvvm_fmin_d : GCCBuiltin<"__nvvm_fmin_d">, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_fmax_d : GCCBuiltin<"__nvvm_fmax_d">, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; + foreach variant = ["_f16", "_ftz_f16", "_nan_f16", "_ftz_nan_f16", + "_xorsign_abs_f16", "_ftz_xorsign_abs_f16", "_nan_xorsign_abs_f16", + "_ftz_nan_xorsign_abs_f16"] in { + def int_nvvm_f # operation # variant : + ClangBuiltin, + DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty, llvm_half_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; + } + + foreach variant = ["_f16x2", "_ftz_f16x2", "_nan_f16x2", + "_ftz_nan_f16x2", "_xorsign_abs_f16x2", "_ftz_xorsign_abs_f16x2", + "_nan_xorsign_abs_f16x2", "_ftz_nan_xorsign_abs_f16x2"] in { + def int_nvvm_f # operation # variant : + ClangBuiltin, + DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty, llvm_v2f16_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; + } + + foreach variant = ["_bf16", "_nan_bf16", "_xorsign_abs_bf16", + "_nan_xorsign_abs_bf16"] in { + def int_nvvm_f # operation # variant : + ClangBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; + } + + foreach variant = ["_bf16x2", "_nan_bf16x2", "_xorsign_abs_bf16x2", + "_nan_xorsign_abs_bf16x2"] in { + def int_nvvm_f # operation # variant : + ClangBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; + } + } // // Multiplication // - def int_nvvm_mulhi_i : GCCBuiltin<"__nvvm_mulhi_i">, + def int_nvvm_mulhi_i : ClangBuiltin<"__nvvm_mulhi_i">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mulhi_ui : GCCBuiltin<"__nvvm_mulhi_ui">, + def int_nvvm_mulhi_ui : ClangBuiltin<"__nvvm_mulhi_ui">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mulhi_ll : GCCBuiltin<"__nvvm_mulhi_ll">, + def int_nvvm_mulhi_ll : ClangBuiltin<"__nvvm_mulhi_ll">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mulhi_ull : GCCBuiltin<"__nvvm_mulhi_ull">, + def int_nvvm_mulhi_ull : ClangBuiltin<"__nvvm_mulhi_ull">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul_rn_ftz_f : GCCBuiltin<"__nvvm_mul_rn_ftz_f">, + def int_nvvm_mul_rn_ftz_f : ClangBuiltin<"__nvvm_mul_rn_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul_rn_f : GCCBuiltin<"__nvvm_mul_rn_f">, + def int_nvvm_mul_rn_f : ClangBuiltin<"__nvvm_mul_rn_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul_rz_ftz_f : GCCBuiltin<"__nvvm_mul_rz_ftz_f">, + def int_nvvm_mul_rz_ftz_f : ClangBuiltin<"__nvvm_mul_rz_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul_rz_f : GCCBuiltin<"__nvvm_mul_rz_f">, + def int_nvvm_mul_rz_f : ClangBuiltin<"__nvvm_mul_rz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul_rm_ftz_f : GCCBuiltin<"__nvvm_mul_rm_ftz_f">, + def int_nvvm_mul_rm_ftz_f : ClangBuiltin<"__nvvm_mul_rm_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul_rm_f : GCCBuiltin<"__nvvm_mul_rm_f">, + def int_nvvm_mul_rm_f : ClangBuiltin<"__nvvm_mul_rm_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul_rp_ftz_f : GCCBuiltin<"__nvvm_mul_rp_ftz_f">, + def int_nvvm_mul_rp_ftz_f : ClangBuiltin<"__nvvm_mul_rp_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul_rp_f : GCCBuiltin<"__nvvm_mul_rp_f">, + def int_nvvm_mul_rp_f : ClangBuiltin<"__nvvm_mul_rp_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul_rn_d : GCCBuiltin<"__nvvm_mul_rn_d">, + def int_nvvm_mul_rn_d : ClangBuiltin<"__nvvm_mul_rn_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul_rz_d : GCCBuiltin<"__nvvm_mul_rz_d">, + def int_nvvm_mul_rz_d : ClangBuiltin<"__nvvm_mul_rz_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul_rm_d : GCCBuiltin<"__nvvm_mul_rm_d">, + def int_nvvm_mul_rm_d : ClangBuiltin<"__nvvm_mul_rm_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul_rp_d : GCCBuiltin<"__nvvm_mul_rp_d">, + def int_nvvm_mul_rp_d : ClangBuiltin<"__nvvm_mul_rp_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul24_i : GCCBuiltin<"__nvvm_mul24_i">, + def int_nvvm_mul24_i : ClangBuiltin<"__nvvm_mul24_i">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul24_ui : GCCBuiltin<"__nvvm_mul24_ui">, + def int_nvvm_mul24_ui : ClangBuiltin<"__nvvm_mul24_ui">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; @@ -652,51 +681,51 @@ let TargetPrefix = "nvvm" in { // Div // - def int_nvvm_div_approx_ftz_f : GCCBuiltin<"__nvvm_div_approx_ftz_f">, + def int_nvvm_div_approx_ftz_f : ClangBuiltin<"__nvvm_div_approx_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_div_approx_f : GCCBuiltin<"__nvvm_div_approx_f">, + def int_nvvm_div_approx_f : ClangBuiltin<"__nvvm_div_approx_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_div_rn_ftz_f : GCCBuiltin<"__nvvm_div_rn_ftz_f">, + def int_nvvm_div_rn_ftz_f : ClangBuiltin<"__nvvm_div_rn_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_div_rn_f : GCCBuiltin<"__nvvm_div_rn_f">, + def int_nvvm_div_rn_f : ClangBuiltin<"__nvvm_div_rn_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_div_rz_ftz_f : GCCBuiltin<"__nvvm_div_rz_ftz_f">, + def int_nvvm_div_rz_ftz_f : ClangBuiltin<"__nvvm_div_rz_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_div_rz_f : GCCBuiltin<"__nvvm_div_rz_f">, + def int_nvvm_div_rz_f : ClangBuiltin<"__nvvm_div_rz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_div_rm_ftz_f : GCCBuiltin<"__nvvm_div_rm_ftz_f">, + def int_nvvm_div_rm_ftz_f : ClangBuiltin<"__nvvm_div_rm_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_div_rm_f : GCCBuiltin<"__nvvm_div_rm_f">, + def int_nvvm_div_rm_f : ClangBuiltin<"__nvvm_div_rm_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_div_rp_ftz_f : GCCBuiltin<"__nvvm_div_rp_ftz_f">, + def int_nvvm_div_rp_ftz_f : ClangBuiltin<"__nvvm_div_rp_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_div_rp_f : GCCBuiltin<"__nvvm_div_rp_f">, + def int_nvvm_div_rp_f : ClangBuiltin<"__nvvm_div_rp_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_div_rn_d : GCCBuiltin<"__nvvm_div_rn_d">, + def int_nvvm_div_rn_d : ClangBuiltin<"__nvvm_div_rn_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem]>; - def int_nvvm_div_rz_d : GCCBuiltin<"__nvvm_div_rz_d">, + def int_nvvm_div_rz_d : ClangBuiltin<"__nvvm_div_rz_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem]>; - def int_nvvm_div_rm_d : GCCBuiltin<"__nvvm_div_rm_d">, + def int_nvvm_div_rm_d : ClangBuiltin<"__nvvm_div_rm_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem]>; - def int_nvvm_div_rp_d : GCCBuiltin<"__nvvm_div_rp_d">, + def int_nvvm_div_rp_d : ClangBuiltin<"__nvvm_div_rp_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem]>; @@ -704,10 +733,10 @@ let TargetPrefix = "nvvm" in { // Sad // - def int_nvvm_sad_i : GCCBuiltin<"__nvvm_sad_i">, + def int_nvvm_sad_i : ClangBuiltin<"__nvvm_sad_i">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, Commutative]>; - def int_nvvm_sad_ui : GCCBuiltin<"__nvvm_sad_ui">, + def int_nvvm_sad_ui : ClangBuiltin<"__nvvm_sad_ui">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, Commutative]>; @@ -715,264 +744,286 @@ let TargetPrefix = "nvvm" in { // Floor Ceil // - def int_nvvm_floor_ftz_f : GCCBuiltin<"__nvvm_floor_ftz_f">, + def int_nvvm_floor_ftz_f : ClangBuiltin<"__nvvm_floor_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_floor_f : GCCBuiltin<"__nvvm_floor_f">, + def int_nvvm_floor_f : ClangBuiltin<"__nvvm_floor_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_floor_d : GCCBuiltin<"__nvvm_floor_d">, + def int_nvvm_floor_d : ClangBuiltin<"__nvvm_floor_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ceil_ftz_f : GCCBuiltin<"__nvvm_ceil_ftz_f">, + def int_nvvm_ceil_ftz_f : ClangBuiltin<"__nvvm_ceil_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ceil_f : GCCBuiltin<"__nvvm_ceil_f">, + def int_nvvm_ceil_f : ClangBuiltin<"__nvvm_ceil_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ceil_d : GCCBuiltin<"__nvvm_ceil_d">, + def int_nvvm_ceil_d : ClangBuiltin<"__nvvm_ceil_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Abs // - def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">, + def int_nvvm_fabs_ftz_f : ClangBuiltin<"__nvvm_fabs_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">, + def int_nvvm_fabs_f : ClangBuiltin<"__nvvm_fabs_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">, + def int_nvvm_fabs_d : ClangBuiltin<"__nvvm_fabs_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; +// +// Abs, Neg bf16, bf16x2 +// + + foreach unary = ["abs", "neg"] in { + def int_nvvm_ # unary # _bf16 : + ClangBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem]>; + def int_nvvm_ # unary # _bf16x2 : + ClangBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + } + // // Round // - def int_nvvm_round_ftz_f : GCCBuiltin<"__nvvm_round_ftz_f">, + def int_nvvm_round_ftz_f : ClangBuiltin<"__nvvm_round_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_round_f : GCCBuiltin<"__nvvm_round_f">, + def int_nvvm_round_f : ClangBuiltin<"__nvvm_round_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_round_d : GCCBuiltin<"__nvvm_round_d">, + def int_nvvm_round_d : ClangBuiltin<"__nvvm_round_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Trunc // - def int_nvvm_trunc_ftz_f : GCCBuiltin<"__nvvm_trunc_ftz_f">, + def int_nvvm_trunc_ftz_f : ClangBuiltin<"__nvvm_trunc_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_trunc_f : GCCBuiltin<"__nvvm_trunc_f">, + def int_nvvm_trunc_f : ClangBuiltin<"__nvvm_trunc_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_trunc_d : GCCBuiltin<"__nvvm_trunc_d">, + def int_nvvm_trunc_d : ClangBuiltin<"__nvvm_trunc_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Saturate // - def int_nvvm_saturate_ftz_f : GCCBuiltin<"__nvvm_saturate_ftz_f">, + def int_nvvm_saturate_ftz_f : ClangBuiltin<"__nvvm_saturate_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_saturate_f : GCCBuiltin<"__nvvm_saturate_f">, + def int_nvvm_saturate_f : ClangBuiltin<"__nvvm_saturate_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_saturate_d : GCCBuiltin<"__nvvm_saturate_d">, + def int_nvvm_saturate_d : ClangBuiltin<"__nvvm_saturate_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Exp2 Log2 // - def int_nvvm_ex2_approx_ftz_f : GCCBuiltin<"__nvvm_ex2_approx_ftz_f">, + def int_nvvm_ex2_approx_ftz_f : ClangBuiltin<"__nvvm_ex2_approx_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_ex2_approx_f : GCCBuiltin<"__nvvm_ex2_approx_f">, + def int_nvvm_ex2_approx_f : ClangBuiltin<"__nvvm_ex2_approx_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">, + def int_nvvm_ex2_approx_d : ClangBuiltin<"__nvvm_ex2_approx_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + def int_nvvm_ex2_approx_f16 : ClangBuiltin<"__nvvm_ex2_approx_f16">, + DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty], [IntrNoMem]>; + def int_nvvm_ex2_approx_f16x2 : ClangBuiltin<"__nvvm_ex2_approx_f16x2">, + DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty], [IntrNoMem]>; - def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">, + def int_nvvm_lg2_approx_ftz_f : ClangBuiltin<"__nvvm_lg2_approx_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_lg2_approx_f : GCCBuiltin<"__nvvm_lg2_approx_f">, + def int_nvvm_lg2_approx_f : ClangBuiltin<"__nvvm_lg2_approx_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_lg2_approx_d : GCCBuiltin<"__nvvm_lg2_approx_d">, + def int_nvvm_lg2_approx_d : ClangBuiltin<"__nvvm_lg2_approx_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; // // Sin Cos // - def int_nvvm_sin_approx_ftz_f : GCCBuiltin<"__nvvm_sin_approx_ftz_f">, + def int_nvvm_sin_approx_ftz_f : ClangBuiltin<"__nvvm_sin_approx_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_sin_approx_f : GCCBuiltin<"__nvvm_sin_approx_f">, + def int_nvvm_sin_approx_f : ClangBuiltin<"__nvvm_sin_approx_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_cos_approx_ftz_f : GCCBuiltin<"__nvvm_cos_approx_ftz_f">, + def int_nvvm_cos_approx_ftz_f : ClangBuiltin<"__nvvm_cos_approx_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">, + def int_nvvm_cos_approx_f : ClangBuiltin<"__nvvm_cos_approx_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; // // Fma // - def int_nvvm_fma_rn_ftz_f : GCCBuiltin<"__nvvm_fma_rn_ftz_f">, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_fma_rn_f : GCCBuiltin<"__nvvm_fma_rn_f">, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_fma_rz_ftz_f : GCCBuiltin<"__nvvm_fma_rz_ftz_f">, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_fma_rz_f : GCCBuiltin<"__nvvm_fma_rz_f">, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_fma_rm_ftz_f : GCCBuiltin<"__nvvm_fma_rm_ftz_f">, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_fma_rm_f : GCCBuiltin<"__nvvm_fma_rm_f">, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_fma_rp_ftz_f : GCCBuiltin<"__nvvm_fma_rp_ftz_f">, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_fma_rp_f : GCCBuiltin<"__nvvm_fma_rp_f">, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + foreach variant = ["_rn_f16", "_rn_ftz_f16", "_rn_sat_f16", + "_rn_ftz_sat_f16", "_rn_relu_f16", "_rn_ftz_relu_f16"] in { + def int_nvvm_fma # variant : ClangBuiltin, + DefaultAttrsIntrinsic<[llvm_half_ty], + [llvm_half_ty, llvm_half_ty, llvm_half_ty], + [IntrNoMem, IntrSpeculatable]>; + } + + foreach variant = ["_rn_f16x2", "_rn_ftz_f16x2", "_rn_sat_f16x2", + "_rn_ftz_sat_f16x2", "_rn_relu_f16x2", "_rn_ftz_relu_f16x2"] in { + def int_nvvm_fma # variant : ClangBuiltin, + DefaultAttrsIntrinsic<[llvm_v2f16_ty], + [llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty], [IntrNoMem, IntrSpeculatable]>; + } - def int_nvvm_fma_rn_d : GCCBuiltin<"__nvvm_fma_rn_d">, - DefaultAttrsIntrinsic<[llvm_double_ty], - [llvm_double_ty, llvm_double_ty, llvm_double_ty], + foreach variant = ["_rn_bf16", "_rn_relu_bf16"] in { + def int_nvvm_fma # variant : ClangBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], + [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_fma_rz_d : GCCBuiltin<"__nvvm_fma_rz_d">, - DefaultAttrsIntrinsic<[llvm_double_ty], - [llvm_double_ty, llvm_double_ty, llvm_double_ty], + } + + foreach variant = ["_rn_bf16x2", "_rn_relu_bf16x2"] in { + def int_nvvm_fma # variant : ClangBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_fma_rm_d : GCCBuiltin<"__nvvm_fma_rm_d">, - DefaultAttrsIntrinsic<[llvm_double_ty], - [llvm_double_ty, llvm_double_ty, llvm_double_ty], + } + + foreach variant = ["_rn_ftz_f", "_rn_f", "_rz_ftz_f", "_rz_f", "_rm_ftz_f", + "_rm_f", "_rp_ftz_f", "_rp_f"] in { + def int_nvvm_fma # variant : ClangBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], + [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_fma_rp_d : GCCBuiltin<"__nvvm_fma_rp_d">, + } + + foreach variant = ["_rn_d", "_rz_d", "_rm_d", "_rp_d"] in { + def int_nvvm_fma # variant : ClangBuiltin, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; + } // // Rcp // - def int_nvvm_rcp_rn_ftz_f : GCCBuiltin<"__nvvm_rcp_rn_ftz_f">, + def int_nvvm_rcp_rn_ftz_f : ClangBuiltin<"__nvvm_rcp_rn_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_rcp_rn_f : GCCBuiltin<"__nvvm_rcp_rn_f">, + def int_nvvm_rcp_rn_f : ClangBuiltin<"__nvvm_rcp_rn_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_rcp_rz_ftz_f : GCCBuiltin<"__nvvm_rcp_rz_ftz_f">, + def int_nvvm_rcp_rz_ftz_f : ClangBuiltin<"__nvvm_rcp_rz_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_rcp_rz_f : GCCBuiltin<"__nvvm_rcp_rz_f">, + def int_nvvm_rcp_rz_f : ClangBuiltin<"__nvvm_rcp_rz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_rcp_rm_ftz_f : GCCBuiltin<"__nvvm_rcp_rm_ftz_f">, + def int_nvvm_rcp_rm_ftz_f : ClangBuiltin<"__nvvm_rcp_rm_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_rcp_rm_f : GCCBuiltin<"__nvvm_rcp_rm_f">, + def int_nvvm_rcp_rm_f : ClangBuiltin<"__nvvm_rcp_rm_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_rcp_rp_ftz_f : GCCBuiltin<"__nvvm_rcp_rp_ftz_f">, + def int_nvvm_rcp_rp_ftz_f : ClangBuiltin<"__nvvm_rcp_rp_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_rcp_rp_f : GCCBuiltin<"__nvvm_rcp_rp_f">, + def int_nvvm_rcp_rp_f : ClangBuiltin<"__nvvm_rcp_rp_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_rcp_rn_d : GCCBuiltin<"__nvvm_rcp_rn_d">, + def int_nvvm_rcp_rn_d : ClangBuiltin<"__nvvm_rcp_rn_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; - def int_nvvm_rcp_rz_d : GCCBuiltin<"__nvvm_rcp_rz_d">, + def int_nvvm_rcp_rz_d : ClangBuiltin<"__nvvm_rcp_rz_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; - def int_nvvm_rcp_rm_d : GCCBuiltin<"__nvvm_rcp_rm_d">, + def int_nvvm_rcp_rm_d : ClangBuiltin<"__nvvm_rcp_rm_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; - def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">, + def int_nvvm_rcp_rp_d : ClangBuiltin<"__nvvm_rcp_rp_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; - def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">, + def int_nvvm_rcp_approx_ftz_f : ClangBuiltin<"__nvvm_rcp_approx_ftz_f">, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_nvvm_rcp_approx_ftz_d : ClangBuiltin<"__nvvm_rcp_approx_ftz_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; // // Sqrt // - def int_nvvm_sqrt_f : GCCBuiltin<"__nvvm_sqrt_f">, + def int_nvvm_sqrt_f : ClangBuiltin<"__nvvm_sqrt_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">, + def int_nvvm_sqrt_rn_ftz_f : ClangBuiltin<"__nvvm_sqrt_rn_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">, + def int_nvvm_sqrt_rn_f : ClangBuiltin<"__nvvm_sqrt_rn_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_sqrt_rz_ftz_f : GCCBuiltin<"__nvvm_sqrt_rz_ftz_f">, + def int_nvvm_sqrt_rz_ftz_f : ClangBuiltin<"__nvvm_sqrt_rz_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_sqrt_rz_f : GCCBuiltin<"__nvvm_sqrt_rz_f">, + def int_nvvm_sqrt_rz_f : ClangBuiltin<"__nvvm_sqrt_rz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_sqrt_rm_ftz_f : GCCBuiltin<"__nvvm_sqrt_rm_ftz_f">, + def int_nvvm_sqrt_rm_ftz_f : ClangBuiltin<"__nvvm_sqrt_rm_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_sqrt_rm_f : GCCBuiltin<"__nvvm_sqrt_rm_f">, + def int_nvvm_sqrt_rm_f : ClangBuiltin<"__nvvm_sqrt_rm_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_sqrt_rp_ftz_f : GCCBuiltin<"__nvvm_sqrt_rp_ftz_f">, + def int_nvvm_sqrt_rp_ftz_f : ClangBuiltin<"__nvvm_sqrt_rp_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_sqrt_rp_f : GCCBuiltin<"__nvvm_sqrt_rp_f">, + def int_nvvm_sqrt_rp_f : ClangBuiltin<"__nvvm_sqrt_rp_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_sqrt_approx_ftz_f : GCCBuiltin<"__nvvm_sqrt_approx_ftz_f">, + def int_nvvm_sqrt_approx_ftz_f : ClangBuiltin<"__nvvm_sqrt_approx_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_sqrt_approx_f : GCCBuiltin<"__nvvm_sqrt_approx_f">, + def int_nvvm_sqrt_approx_f : ClangBuiltin<"__nvvm_sqrt_approx_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_sqrt_rn_d : GCCBuiltin<"__nvvm_sqrt_rn_d">, + def int_nvvm_sqrt_rn_d : ClangBuiltin<"__nvvm_sqrt_rn_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; - def int_nvvm_sqrt_rz_d : GCCBuiltin<"__nvvm_sqrt_rz_d">, + def int_nvvm_sqrt_rz_d : ClangBuiltin<"__nvvm_sqrt_rz_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; - def int_nvvm_sqrt_rm_d : GCCBuiltin<"__nvvm_sqrt_rm_d">, + def int_nvvm_sqrt_rm_d : ClangBuiltin<"__nvvm_sqrt_rm_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; - def int_nvvm_sqrt_rp_d : GCCBuiltin<"__nvvm_sqrt_rp_d">, + def int_nvvm_sqrt_rp_d : ClangBuiltin<"__nvvm_sqrt_rp_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; // // Rsqrt // - def int_nvvm_rsqrt_approx_ftz_f : GCCBuiltin<"__nvvm_rsqrt_approx_ftz_f">, + def int_nvvm_rsqrt_approx_ftz_f : ClangBuiltin<"__nvvm_rsqrt_approx_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_rsqrt_approx_f : GCCBuiltin<"__nvvm_rsqrt_approx_f">, + def int_nvvm_rsqrt_approx_f : ClangBuiltin<"__nvvm_rsqrt_approx_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_rsqrt_approx_d : GCCBuiltin<"__nvvm_rsqrt_approx_d">, + def int_nvvm_rsqrt_approx_d : ClangBuiltin<"__nvvm_rsqrt_approx_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; // // Add // - def int_nvvm_add_rn_ftz_f : GCCBuiltin<"__nvvm_add_rn_ftz_f">, + def int_nvvm_add_rn_ftz_f : ClangBuiltin<"__nvvm_add_rn_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_add_rn_f : GCCBuiltin<"__nvvm_add_rn_f">, + def int_nvvm_add_rn_f : ClangBuiltin<"__nvvm_add_rn_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_add_rz_ftz_f : GCCBuiltin<"__nvvm_add_rz_ftz_f">, + def int_nvvm_add_rz_ftz_f : ClangBuiltin<"__nvvm_add_rz_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_add_rz_f : GCCBuiltin<"__nvvm_add_rz_f">, + def int_nvvm_add_rz_f : ClangBuiltin<"__nvvm_add_rz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_add_rm_ftz_f : GCCBuiltin<"__nvvm_add_rm_ftz_f">, + def int_nvvm_add_rm_ftz_f : ClangBuiltin<"__nvvm_add_rm_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_add_rm_f : GCCBuiltin<"__nvvm_add_rm_f">, + def int_nvvm_add_rm_f : ClangBuiltin<"__nvvm_add_rm_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_add_rp_ftz_f : GCCBuiltin<"__nvvm_add_rp_ftz_f">, + def int_nvvm_add_rp_ftz_f : ClangBuiltin<"__nvvm_add_rp_ftz_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_add_rp_f : GCCBuiltin<"__nvvm_add_rp_f">, + def int_nvvm_add_rp_f : ClangBuiltin<"__nvvm_add_rp_f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_add_rn_d : GCCBuiltin<"__nvvm_add_rn_d">, + def int_nvvm_add_rn_d : ClangBuiltin<"__nvvm_add_rn_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_add_rz_d : GCCBuiltin<"__nvvm_add_rz_d">, + def int_nvvm_add_rz_d : ClangBuiltin<"__nvvm_add_rz_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_add_rm_d : GCCBuiltin<"__nvvm_add_rm_d">, + def int_nvvm_add_rm_d : ClangBuiltin<"__nvvm_add_rm_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_add_rp_d : GCCBuiltin<"__nvvm_add_rp_d">, + def int_nvvm_add_rp_d : ClangBuiltin<"__nvvm_add_rp_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; @@ -980,278 +1031,278 @@ let TargetPrefix = "nvvm" in { // Convert // - def int_nvvm_d2f_rn_ftz : GCCBuiltin<"__nvvm_d2f_rn_ftz">, + def int_nvvm_d2f_rn_ftz : ClangBuiltin<"__nvvm_d2f_rn_ftz">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2f_rn : GCCBuiltin<"__nvvm_d2f_rn">, + def int_nvvm_d2f_rn : ClangBuiltin<"__nvvm_d2f_rn">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2f_rz_ftz : GCCBuiltin<"__nvvm_d2f_rz_ftz">, + def int_nvvm_d2f_rz_ftz : ClangBuiltin<"__nvvm_d2f_rz_ftz">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2f_rz : GCCBuiltin<"__nvvm_d2f_rz">, + def int_nvvm_d2f_rz : ClangBuiltin<"__nvvm_d2f_rz">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2f_rm_ftz : GCCBuiltin<"__nvvm_d2f_rm_ftz">, + def int_nvvm_d2f_rm_ftz : ClangBuiltin<"__nvvm_d2f_rm_ftz">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2f_rm : GCCBuiltin<"__nvvm_d2f_rm">, + def int_nvvm_d2f_rm : ClangBuiltin<"__nvvm_d2f_rm">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2f_rp_ftz : GCCBuiltin<"__nvvm_d2f_rp_ftz">, + def int_nvvm_d2f_rp_ftz : ClangBuiltin<"__nvvm_d2f_rp_ftz">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2f_rp : GCCBuiltin<"__nvvm_d2f_rp">, + def int_nvvm_d2f_rp : ClangBuiltin<"__nvvm_d2f_rp">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2i_rn : GCCBuiltin<"__nvvm_d2i_rn">, + def int_nvvm_d2i_rn : ClangBuiltin<"__nvvm_d2i_rn">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2i_rz : GCCBuiltin<"__nvvm_d2i_rz">, + def int_nvvm_d2i_rz : ClangBuiltin<"__nvvm_d2i_rz">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2i_rm : GCCBuiltin<"__nvvm_d2i_rm">, + def int_nvvm_d2i_rm : ClangBuiltin<"__nvvm_d2i_rm">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2i_rp : GCCBuiltin<"__nvvm_d2i_rp">, + def int_nvvm_d2i_rp : ClangBuiltin<"__nvvm_d2i_rp">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2ui_rn : GCCBuiltin<"__nvvm_d2ui_rn">, + def int_nvvm_d2ui_rn : ClangBuiltin<"__nvvm_d2ui_rn">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2ui_rz : GCCBuiltin<"__nvvm_d2ui_rz">, + def int_nvvm_d2ui_rz : ClangBuiltin<"__nvvm_d2ui_rz">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2ui_rm : GCCBuiltin<"__nvvm_d2ui_rm">, + def int_nvvm_d2ui_rm : ClangBuiltin<"__nvvm_d2ui_rm">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2ui_rp : GCCBuiltin<"__nvvm_d2ui_rp">, + def int_nvvm_d2ui_rp : ClangBuiltin<"__nvvm_d2ui_rp">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_i2d_rn : GCCBuiltin<"__nvvm_i2d_rn">, + def int_nvvm_i2d_rn : ClangBuiltin<"__nvvm_i2d_rn">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_i2d_rz : GCCBuiltin<"__nvvm_i2d_rz">, + def int_nvvm_i2d_rz : ClangBuiltin<"__nvvm_i2d_rz">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_i2d_rm : GCCBuiltin<"__nvvm_i2d_rm">, + def int_nvvm_i2d_rm : ClangBuiltin<"__nvvm_i2d_rm">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_i2d_rp : GCCBuiltin<"__nvvm_i2d_rp">, + def int_nvvm_i2d_rp : ClangBuiltin<"__nvvm_i2d_rp">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ui2d_rn : GCCBuiltin<"__nvvm_ui2d_rn">, + def int_nvvm_ui2d_rn : ClangBuiltin<"__nvvm_ui2d_rn">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ui2d_rz : GCCBuiltin<"__nvvm_ui2d_rz">, + def int_nvvm_ui2d_rz : ClangBuiltin<"__nvvm_ui2d_rz">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ui2d_rm : GCCBuiltin<"__nvvm_ui2d_rm">, + def int_nvvm_ui2d_rm : ClangBuiltin<"__nvvm_ui2d_rm">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ui2d_rp : GCCBuiltin<"__nvvm_ui2d_rp">, + def int_nvvm_ui2d_rp : ClangBuiltin<"__nvvm_ui2d_rp">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2i_rn_ftz : GCCBuiltin<"__nvvm_f2i_rn_ftz">, + def int_nvvm_f2i_rn_ftz : ClangBuiltin<"__nvvm_f2i_rn_ftz">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2i_rn : GCCBuiltin<"__nvvm_f2i_rn">, + def int_nvvm_f2i_rn : ClangBuiltin<"__nvvm_f2i_rn">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2i_rz_ftz : GCCBuiltin<"__nvvm_f2i_rz_ftz">, + def int_nvvm_f2i_rz_ftz : ClangBuiltin<"__nvvm_f2i_rz_ftz">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2i_rz : GCCBuiltin<"__nvvm_f2i_rz">, + def int_nvvm_f2i_rz : ClangBuiltin<"__nvvm_f2i_rz">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2i_rm_ftz : GCCBuiltin<"__nvvm_f2i_rm_ftz">, + def int_nvvm_f2i_rm_ftz : ClangBuiltin<"__nvvm_f2i_rm_ftz">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2i_rm : GCCBuiltin<"__nvvm_f2i_rm">, + def int_nvvm_f2i_rm : ClangBuiltin<"__nvvm_f2i_rm">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2i_rp_ftz : GCCBuiltin<"__nvvm_f2i_rp_ftz">, + def int_nvvm_f2i_rp_ftz : ClangBuiltin<"__nvvm_f2i_rp_ftz">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2i_rp : GCCBuiltin<"__nvvm_f2i_rp">, + def int_nvvm_f2i_rp : ClangBuiltin<"__nvvm_f2i_rp">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ui_rn_ftz : GCCBuiltin<"__nvvm_f2ui_rn_ftz">, + def int_nvvm_f2ui_rn_ftz : ClangBuiltin<"__nvvm_f2ui_rn_ftz">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ui_rn : GCCBuiltin<"__nvvm_f2ui_rn">, + def int_nvvm_f2ui_rn : ClangBuiltin<"__nvvm_f2ui_rn">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ui_rz_ftz : GCCBuiltin<"__nvvm_f2ui_rz_ftz">, + def int_nvvm_f2ui_rz_ftz : ClangBuiltin<"__nvvm_f2ui_rz_ftz">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ui_rz : GCCBuiltin<"__nvvm_f2ui_rz">, + def int_nvvm_f2ui_rz : ClangBuiltin<"__nvvm_f2ui_rz">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ui_rm_ftz : GCCBuiltin<"__nvvm_f2ui_rm_ftz">, + def int_nvvm_f2ui_rm_ftz : ClangBuiltin<"__nvvm_f2ui_rm_ftz">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ui_rm : GCCBuiltin<"__nvvm_f2ui_rm">, + def int_nvvm_f2ui_rm : ClangBuiltin<"__nvvm_f2ui_rm">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ui_rp_ftz : GCCBuiltin<"__nvvm_f2ui_rp_ftz">, + def int_nvvm_f2ui_rp_ftz : ClangBuiltin<"__nvvm_f2ui_rp_ftz">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ui_rp : GCCBuiltin<"__nvvm_f2ui_rp">, + def int_nvvm_f2ui_rp : ClangBuiltin<"__nvvm_f2ui_rp">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_i2f_rn : GCCBuiltin<"__nvvm_i2f_rn">, + def int_nvvm_i2f_rn : ClangBuiltin<"__nvvm_i2f_rn">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_i2f_rz : GCCBuiltin<"__nvvm_i2f_rz">, + def int_nvvm_i2f_rz : ClangBuiltin<"__nvvm_i2f_rz">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_i2f_rm : GCCBuiltin<"__nvvm_i2f_rm">, + def int_nvvm_i2f_rm : ClangBuiltin<"__nvvm_i2f_rm">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_i2f_rp : GCCBuiltin<"__nvvm_i2f_rp">, + def int_nvvm_i2f_rp : ClangBuiltin<"__nvvm_i2f_rp">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ui2f_rn : GCCBuiltin<"__nvvm_ui2f_rn">, + def int_nvvm_ui2f_rn : ClangBuiltin<"__nvvm_ui2f_rn">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ui2f_rz : GCCBuiltin<"__nvvm_ui2f_rz">, + def int_nvvm_ui2f_rz : ClangBuiltin<"__nvvm_ui2f_rz">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ui2f_rm : GCCBuiltin<"__nvvm_ui2f_rm">, + def int_nvvm_ui2f_rm : ClangBuiltin<"__nvvm_ui2f_rm">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ui2f_rp : GCCBuiltin<"__nvvm_ui2f_rp">, + def int_nvvm_ui2f_rp : ClangBuiltin<"__nvvm_ui2f_rp">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_lohi_i2d : GCCBuiltin<"__nvvm_lohi_i2d">, + def int_nvvm_lohi_i2d : ClangBuiltin<"__nvvm_lohi_i2d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_d2i_lo : GCCBuiltin<"__nvvm_d2i_lo">, + def int_nvvm_d2i_lo : ClangBuiltin<"__nvvm_d2i_lo">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2i_hi : GCCBuiltin<"__nvvm_d2i_hi">, + def int_nvvm_d2i_hi : ClangBuiltin<"__nvvm_d2i_hi">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ll_rn_ftz : GCCBuiltin<"__nvvm_f2ll_rn_ftz">, + def int_nvvm_f2ll_rn_ftz : ClangBuiltin<"__nvvm_f2ll_rn_ftz">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ll_rn : GCCBuiltin<"__nvvm_f2ll_rn">, + def int_nvvm_f2ll_rn : ClangBuiltin<"__nvvm_f2ll_rn">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ll_rz_ftz : GCCBuiltin<"__nvvm_f2ll_rz_ftz">, + def int_nvvm_f2ll_rz_ftz : ClangBuiltin<"__nvvm_f2ll_rz_ftz">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ll_rz : GCCBuiltin<"__nvvm_f2ll_rz">, + def int_nvvm_f2ll_rz : ClangBuiltin<"__nvvm_f2ll_rz">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ll_rm_ftz : GCCBuiltin<"__nvvm_f2ll_rm_ftz">, + def int_nvvm_f2ll_rm_ftz : ClangBuiltin<"__nvvm_f2ll_rm_ftz">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ll_rm : GCCBuiltin<"__nvvm_f2ll_rm">, + def int_nvvm_f2ll_rm : ClangBuiltin<"__nvvm_f2ll_rm">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ll_rp_ftz : GCCBuiltin<"__nvvm_f2ll_rp_ftz">, + def int_nvvm_f2ll_rp_ftz : ClangBuiltin<"__nvvm_f2ll_rp_ftz">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ll_rp : GCCBuiltin<"__nvvm_f2ll_rp">, + def int_nvvm_f2ll_rp : ClangBuiltin<"__nvvm_f2ll_rp">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ull_rn_ftz : GCCBuiltin<"__nvvm_f2ull_rn_ftz">, + def int_nvvm_f2ull_rn_ftz : ClangBuiltin<"__nvvm_f2ull_rn_ftz">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ull_rn : GCCBuiltin<"__nvvm_f2ull_rn">, + def int_nvvm_f2ull_rn : ClangBuiltin<"__nvvm_f2ull_rn">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ull_rz_ftz : GCCBuiltin<"__nvvm_f2ull_rz_ftz">, + def int_nvvm_f2ull_rz_ftz : ClangBuiltin<"__nvvm_f2ull_rz_ftz">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ull_rz : GCCBuiltin<"__nvvm_f2ull_rz">, + def int_nvvm_f2ull_rz : ClangBuiltin<"__nvvm_f2ull_rz">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ull_rm_ftz : GCCBuiltin<"__nvvm_f2ull_rm_ftz">, + def int_nvvm_f2ull_rm_ftz : ClangBuiltin<"__nvvm_f2ull_rm_ftz">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ull_rm : GCCBuiltin<"__nvvm_f2ull_rm">, + def int_nvvm_f2ull_rm : ClangBuiltin<"__nvvm_f2ull_rm">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ull_rp_ftz : GCCBuiltin<"__nvvm_f2ull_rp_ftz">, + def int_nvvm_f2ull_rp_ftz : ClangBuiltin<"__nvvm_f2ull_rp_ftz">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2ull_rp : GCCBuiltin<"__nvvm_f2ull_rp">, + def int_nvvm_f2ull_rp : ClangBuiltin<"__nvvm_f2ull_rp">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2ll_rn : GCCBuiltin<"__nvvm_d2ll_rn">, + def int_nvvm_d2ll_rn : ClangBuiltin<"__nvvm_d2ll_rn">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2ll_rz : GCCBuiltin<"__nvvm_d2ll_rz">, + def int_nvvm_d2ll_rz : ClangBuiltin<"__nvvm_d2ll_rz">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2ll_rm : GCCBuiltin<"__nvvm_d2ll_rm">, + def int_nvvm_d2ll_rm : ClangBuiltin<"__nvvm_d2ll_rm">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2ll_rp : GCCBuiltin<"__nvvm_d2ll_rp">, + def int_nvvm_d2ll_rp : ClangBuiltin<"__nvvm_d2ll_rp">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2ull_rn : GCCBuiltin<"__nvvm_d2ull_rn">, + def int_nvvm_d2ull_rn : ClangBuiltin<"__nvvm_d2ull_rn">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2ull_rz : GCCBuiltin<"__nvvm_d2ull_rz">, + def int_nvvm_d2ull_rz : ClangBuiltin<"__nvvm_d2ull_rz">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2ull_rm : GCCBuiltin<"__nvvm_d2ull_rm">, + def int_nvvm_d2ull_rm : ClangBuiltin<"__nvvm_d2ull_rm">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2ull_rp : GCCBuiltin<"__nvvm_d2ull_rp">, + def int_nvvm_d2ull_rp : ClangBuiltin<"__nvvm_d2ull_rp">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ll2f_rn : GCCBuiltin<"__nvvm_ll2f_rn">, + def int_nvvm_ll2f_rn : ClangBuiltin<"__nvvm_ll2f_rn">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ll2f_rz : GCCBuiltin<"__nvvm_ll2f_rz">, + def int_nvvm_ll2f_rz : ClangBuiltin<"__nvvm_ll2f_rz">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ll2f_rm : GCCBuiltin<"__nvvm_ll2f_rm">, + def int_nvvm_ll2f_rm : ClangBuiltin<"__nvvm_ll2f_rm">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ll2f_rp : GCCBuiltin<"__nvvm_ll2f_rp">, + def int_nvvm_ll2f_rp : ClangBuiltin<"__nvvm_ll2f_rp">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ull2f_rn : GCCBuiltin<"__nvvm_ull2f_rn">, + def int_nvvm_ull2f_rn : ClangBuiltin<"__nvvm_ull2f_rn">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ull2f_rz : GCCBuiltin<"__nvvm_ull2f_rz">, + def int_nvvm_ull2f_rz : ClangBuiltin<"__nvvm_ull2f_rz">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ull2f_rm : GCCBuiltin<"__nvvm_ull2f_rm">, + def int_nvvm_ull2f_rm : ClangBuiltin<"__nvvm_ull2f_rm">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ull2f_rp : GCCBuiltin<"__nvvm_ull2f_rp">, + def int_nvvm_ull2f_rp : ClangBuiltin<"__nvvm_ull2f_rp">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ll2d_rn : GCCBuiltin<"__nvvm_ll2d_rn">, + def int_nvvm_ll2d_rn : ClangBuiltin<"__nvvm_ll2d_rn">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ll2d_rz : GCCBuiltin<"__nvvm_ll2d_rz">, + def int_nvvm_ll2d_rz : ClangBuiltin<"__nvvm_ll2d_rz">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ll2d_rm : GCCBuiltin<"__nvvm_ll2d_rm">, + def int_nvvm_ll2d_rm : ClangBuiltin<"__nvvm_ll2d_rm">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ll2d_rp : GCCBuiltin<"__nvvm_ll2d_rp">, + def int_nvvm_ll2d_rp : ClangBuiltin<"__nvvm_ll2d_rp">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ull2d_rn : GCCBuiltin<"__nvvm_ull2d_rn">, + def int_nvvm_ull2d_rn : ClangBuiltin<"__nvvm_ull2d_rn">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ull2d_rz : GCCBuiltin<"__nvvm_ull2d_rz">, + def int_nvvm_ull2d_rz : ClangBuiltin<"__nvvm_ull2d_rz">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ull2d_rm : GCCBuiltin<"__nvvm_ull2d_rm">, + def int_nvvm_ull2d_rm : ClangBuiltin<"__nvvm_ull2d_rm">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ull2d_rp : GCCBuiltin<"__nvvm_ull2d_rp">, + def int_nvvm_ull2d_rp : ClangBuiltin<"__nvvm_ull2d_rp">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2h_rn_ftz : GCCBuiltin<"__nvvm_f2h_rn_ftz">, + def int_nvvm_f2h_rn_ftz : ClangBuiltin<"__nvvm_f2h_rn_ftz">, DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">, + def int_nvvm_f2h_rn : ClangBuiltin<"__nvvm_f2h_rn">, DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ff2bf16x2_rn : GCCBuiltin<"__nvvm_ff2bf16x2_rn">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_ff2bf16x2_rn_relu : GCCBuiltin<"__nvvm_ff2bf16x2_rn_relu">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_ff2bf16x2_rz : GCCBuiltin<"__nvvm_ff2bf16x2_rz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_ff2bf16x2_rz_relu : GCCBuiltin<"__nvvm_ff2bf16x2_rz_relu">, + def int_nvvm_ff2bf16x2_rn : ClangBuiltin<"__nvvm_ff2bf16x2_rn">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_ff2bf16x2_rn_relu : ClangBuiltin<"__nvvm_ff2bf16x2_rn_relu">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_ff2bf16x2_rz : ClangBuiltin<"__nvvm_ff2bf16x2_rz">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_ff2bf16x2_rz_relu : ClangBuiltin<"__nvvm_ff2bf16x2_rz_relu">, Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_ff2f16x2_rn : GCCBuiltin<"__nvvm_ff2f16x2_rn">, - Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_ff2f16x2_rn_relu : GCCBuiltin<"__nvvm_ff2f16x2_rn_relu">, - Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_ff2f16x2_rz : GCCBuiltin<"__nvvm_ff2f16x2_rz">, - Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_nvvm_ff2f16x2_rz_relu : GCCBuiltin<"__nvvm_ff2f16x2_rz_relu">, - Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - - def int_nvvm_f2bf16_rn : GCCBuiltin<"__nvvm_f2bf16_rn">, - Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_f2bf16_rn_relu : GCCBuiltin<"__nvvm_f2bf16_rn_relu">, - Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_f2bf16_rz : GCCBuiltin<"__nvvm_f2bf16_rz">, - Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_f2bf16_rz_relu : GCCBuiltin<"__nvvm_f2bf16_rz_relu">, - Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>; - - def int_nvvm_f2tf32_rna : GCCBuiltin<"__nvvm_f2tf32_rna">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + def int_nvvm_ff2f16x2_rn : ClangBuiltin<"__nvvm_ff2f16x2_rn">, + Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_ff2f16x2_rn_relu : ClangBuiltin<"__nvvm_ff2f16x2_rn_relu">, + Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_ff2f16x2_rz : ClangBuiltin<"__nvvm_ff2f16x2_rz">, + Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_ff2f16x2_rz_relu : ClangBuiltin<"__nvvm_ff2f16x2_rz_relu">, + Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + + def int_nvvm_f2bf16_rn : ClangBuiltin<"__nvvm_f2bf16_rn">, + Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_f2bf16_rn_relu : ClangBuiltin<"__nvvm_f2bf16_rn_relu">, + Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_f2bf16_rz : ClangBuiltin<"__nvvm_f2bf16_rz">, + Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_f2bf16_rz_relu : ClangBuiltin<"__nvvm_f2bf16_rz_relu">, + Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + + def int_nvvm_f2tf32_rna : ClangBuiltin<"__nvvm_f2tf32_rna">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; // // Bitcast // - def int_nvvm_bitcast_f2i : GCCBuiltin<"__nvvm_bitcast_f2i">, + def int_nvvm_bitcast_f2i : ClangBuiltin<"__nvvm_bitcast_f2i">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_bitcast_i2f : GCCBuiltin<"__nvvm_bitcast_i2f">, + def int_nvvm_bitcast_i2f : ClangBuiltin<"__nvvm_bitcast_i2f">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_bitcast_ll2d : GCCBuiltin<"__nvvm_bitcast_ll2d">, + def int_nvvm_bitcast_ll2d : ClangBuiltin<"__nvvm_bitcast_ll2d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">, + def int_nvvm_bitcast_d2ll : ClangBuiltin<"__nvvm_bitcast_d2ll">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // FNS - def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">, + def int_nvvm_fns : ClangBuiltin<"__nvvm_fns">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Atomics not available as llvm intrinsics. def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty], [LLVMAnyPointerType, llvm_i32_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrArgMemOnly, IntrNoCallback, NoCapture>]>; def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty], [LLVMAnyPointerType, llvm_i32_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrArgMemOnly, IntrNoCallback, NoCapture>]>; class SCOPED_ATOMIC2_impl : Intrinsic<[elty], [LLVMAnyPointerType>, LLVMMatchType<0>], - [IntrArgMemOnly, NoCapture>]>; + [IntrArgMemOnly, IntrNoCallback, NoCapture>]>; class SCOPED_ATOMIC3_impl : Intrinsic<[elty], [LLVMAnyPointerType>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrArgMemOnly, NoCapture>]>; + [IntrArgMemOnly, IntrNoCallback, NoCapture>]>; multiclass PTXAtomicWithScope2 { def _cta : SCOPED_ATOMIC2_impl; @@ -1280,177 +1331,179 @@ let TargetPrefix = "nvvm" in { // The builtin for "bar.sync 0" is called __syncthreads. Unlike most of the // intrinsics in this file, this one is a user-facing API. - def int_nvvm_barrier0 : GCCBuiltin<"__syncthreads">, - Intrinsic<[], [], [IntrConvergent]>; + def int_nvvm_barrier0 : ClangBuiltin<"__syncthreads">, + Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>; // Synchronize all threads in the CTA at barrier 'n'. - def int_nvvm_barrier_n : GCCBuiltin<"__nvvm_bar_n">, - Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>; + def int_nvvm_barrier_n : ClangBuiltin<"__nvvm_bar_n">, + Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>; // Synchronize 'm', a multiple of warp size, (arg 2) threads in // the CTA at barrier 'n' (arg 1). - def int_nvvm_barrier : GCCBuiltin<"__nvvm_bar">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent]>; - def int_nvvm_barrier0_popc : GCCBuiltin<"__nvvm_bar0_popc">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>; - def int_nvvm_barrier0_and : GCCBuiltin<"__nvvm_bar0_and">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>; - def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>; + def int_nvvm_barrier : ClangBuiltin<"__nvvm_bar">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>; + def int_nvvm_barrier0_popc : ClangBuiltin<"__nvvm_bar0_popc">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>; + def int_nvvm_barrier0_and : ClangBuiltin<"__nvvm_bar0_and">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>; + def int_nvvm_barrier0_or : ClangBuiltin<"__nvvm_bar0_or">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>; def int_nvvm_bar_sync : - Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>, - GCCBuiltin<"__nvvm_bar_sync">; + Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>, + ClangBuiltin<"__nvvm_bar_sync">; def int_nvvm_bar_warp_sync : - Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>, - GCCBuiltin<"__nvvm_bar_warp_sync">; + Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>, + ClangBuiltin<"__nvvm_bar_warp_sync">; // barrier.sync id[, cnt] def int_nvvm_barrier_sync : - Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>, - GCCBuiltin<"__nvvm_barrier_sync">; + Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>, + ClangBuiltin<"__nvvm_barrier_sync">; def int_nvvm_barrier_sync_cnt : - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent]>, - GCCBuiltin<"__nvvm_barrier_sync_cnt">; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>, + ClangBuiltin<"__nvvm_barrier_sync_cnt">; // Membar - def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">, - Intrinsic<[], [], []>; - def int_nvvm_membar_gl : GCCBuiltin<"__nvvm_membar_gl">, - Intrinsic<[], [], []>; - def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">, - Intrinsic<[], [], []>; + def int_nvvm_membar_cta : ClangBuiltin<"__nvvm_membar_cta">, + Intrinsic<[], [], [IntrNoCallback]>; + def int_nvvm_membar_gl : ClangBuiltin<"__nvvm_membar_gl">, + Intrinsic<[], [], [IntrNoCallback]>; + def int_nvvm_membar_sys : ClangBuiltin<"__nvvm_membar_sys">, + Intrinsic<[], [], [IntrNoCallback]>; // Async Copy def int_nvvm_cp_async_mbarrier_arrive : - GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive">, - Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent]>; + ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive">, + Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_cp_async_mbarrier_arrive_shared : - GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive_shared">, - Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent]>; + ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_shared">, + Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_cp_async_mbarrier_arrive_noinc : - GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc">, - Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent]>; + ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc">, + Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_cp_async_mbarrier_arrive_noinc_shared : - GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc_shared">, - Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent]>; + ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc_shared">, + Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_cp_async_ca_shared_global_4 : - GCCBuiltin<"__nvvm_cp_async_ca_shared_global_4">, + ClangBuiltin<"__nvvm_cp_async_ca_shared_global_4">, Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty], - [IntrArgMemOnly, NoAlias>, NoAlias>, + [IntrArgMemOnly, IntrNoCallback, NoAlias>, NoAlias>, WriteOnly>, ReadOnly>], "llvm.nvvm.cp.async.ca.shared.global.4">; def int_nvvm_cp_async_ca_shared_global_8 : - GCCBuiltin<"__nvvm_cp_async_ca_shared_global_8">, + ClangBuiltin<"__nvvm_cp_async_ca_shared_global_8">, Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty], - [IntrArgMemOnly, NoAlias>, NoAlias>, + [IntrArgMemOnly, IntrNoCallback, NoAlias>, NoAlias>, WriteOnly>, ReadOnly>], "llvm.nvvm.cp.async.ca.shared.global.8">; def int_nvvm_cp_async_ca_shared_global_16 : - GCCBuiltin<"__nvvm_cp_async_ca_shared_global_16">, + ClangBuiltin<"__nvvm_cp_async_ca_shared_global_16">, Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty], - [IntrArgMemOnly, NoAlias>, NoAlias>, + [IntrArgMemOnly, IntrNoCallback, NoAlias>, NoAlias>, WriteOnly>, ReadOnly>], "llvm.nvvm.cp.async.ca.shared.global.16">; def int_nvvm_cp_async_cg_shared_global_16 : - GCCBuiltin<"__nvvm_cp_async_cg_shared_global_16">, + ClangBuiltin<"__nvvm_cp_async_cg_shared_global_16">, Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty], - [IntrArgMemOnly, NoAlias>, NoAlias>, + [IntrArgMemOnly, IntrNoCallback, NoAlias>, NoAlias>, WriteOnly>, ReadOnly>], "llvm.nvvm.cp.async.cg.shared.global.16">; def int_nvvm_cp_async_commit_group : - GCCBuiltin<"__nvvm_cp_async_commit_group">, + ClangBuiltin<"__nvvm_cp_async_commit_group">, Intrinsic<[],[],[]>; def int_nvvm_cp_async_wait_group : - GCCBuiltin<"__nvvm_cp_async_wait_group">, + ClangBuiltin<"__nvvm_cp_async_wait_group">, Intrinsic<[],[llvm_i32_ty],[ImmArg>]>; def int_nvvm_cp_async_wait_all : - GCCBuiltin<"__nvvm_cp_async_wait_all">, + ClangBuiltin<"__nvvm_cp_async_wait_all">, Intrinsic<[],[],[]>; // mbarrier -def int_nvvm_mbarrier_init : GCCBuiltin<"__nvvm_mbarrier_init">, - Intrinsic<[],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>; +def int_nvvm_mbarrier_init : ClangBuiltin<"__nvvm_mbarrier_init">, + Intrinsic<[],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_init_shared : - GCCBuiltin<"__nvvm_mbarrier_init_shared">, - Intrinsic<[],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>; + ClangBuiltin<"__nvvm_mbarrier_init_shared">, + Intrinsic<[],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>; -def int_nvvm_mbarrier_inval : GCCBuiltin<"__nvvm_mbarrier_inval">, +def int_nvvm_mbarrier_inval : ClangBuiltin<"__nvvm_mbarrier_inval">, Intrinsic<[],[llvm_i64ptr_ty], - [IntrConvergent, IntrWriteMem, IntrArgMemOnly, + [IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback, WriteOnly>, NoCapture>]>; def int_nvvm_mbarrier_inval_shared : - GCCBuiltin<"__nvvm_mbarrier_inval_shared">, + ClangBuiltin<"__nvvm_mbarrier_inval_shared">, Intrinsic<[],[llvm_shared_i64ptr_ty], - [IntrConvergent, IntrWriteMem, IntrArgMemOnly, + [IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback, WriteOnly>, NoCapture>]>; -def int_nvvm_mbarrier_arrive : GCCBuiltin<"__nvvm_mbarrier_arrive">, - Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent]>; +def int_nvvm_mbarrier_arrive : ClangBuiltin<"__nvvm_mbarrier_arrive">, + Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_arrive_shared : - GCCBuiltin<"__nvvm_mbarrier_arrive_shared">, - Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent]>; + ClangBuiltin<"__nvvm_mbarrier_arrive_shared">, + Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_arrive_noComplete : - GCCBuiltin<"__nvvm_mbarrier_arrive_noComplete">, - Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>; + ClangBuiltin<"__nvvm_mbarrier_arrive_noComplete">, + Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_arrive_noComplete_shared : - GCCBuiltin<"__nvvm_mbarrier_arrive_noComplete_shared">, - Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>; + ClangBuiltin<"__nvvm_mbarrier_arrive_noComplete_shared">, + Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty, + llvm_i32_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_arrive_drop : - GCCBuiltin<"__nvvm_mbarrier_arrive_drop">, - Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent]>; + ClangBuiltin<"__nvvm_mbarrier_arrive_drop">, + Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_arrive_drop_shared : - GCCBuiltin<"__nvvm_mbarrier_arrive_drop_shared">, - Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent]>; + ClangBuiltin<"__nvvm_mbarrier_arrive_drop_shared">, + Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_arrive_drop_noComplete : - GCCBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete">, - Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>; + ClangBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete">, + Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_arrive_drop_noComplete_shared : - GCCBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete_shared">, - Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>; + ClangBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete_shared">, + Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty, + llvm_i32_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_test_wait : - GCCBuiltin<"__nvvm_mbarrier_test_wait">, - Intrinsic<[llvm_i1_ty],[llvm_i64ptr_ty, llvm_i64_ty],[IntrConvergent]>; + ClangBuiltin<"__nvvm_mbarrier_test_wait">, + Intrinsic<[llvm_i1_ty],[llvm_i64ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_test_wait_shared : - GCCBuiltin<"__nvvm_mbarrier_test_wait_shared">, - Intrinsic<[llvm_i1_ty],[llvm_shared_i64ptr_ty, llvm_i64_ty],[IntrConvergent]>; + ClangBuiltin<"__nvvm_mbarrier_test_wait_shared">, + Intrinsic<[llvm_i1_ty],[llvm_shared_i64ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_pending_count : - GCCBuiltin<"__nvvm_mbarrier_pending_count">, - Intrinsic<[llvm_i32_ty],[llvm_i64_ty],[IntrNoMem, IntrConvergent]>; + ClangBuiltin<"__nvvm_mbarrier_pending_count">, + Intrinsic<[llvm_i32_ty],[llvm_i64_ty],[IntrNoMem, IntrConvergent, IntrNoCallback]>; // Generated within nvvm. Use for ldu on sm_20 or later. Second arg is the // pointer's alignment. def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], "llvm.nvvm.ldu.global.i">; def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], "llvm.nvvm.ldu.global.f">; def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], "llvm.nvvm.ldu.global.p">; // Generated within nvvm. Use for ldg on sm_35 or later. Second arg is the // pointer's alignment. def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], "llvm.nvvm.ldg.global.i">; def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], "llvm.nvvm.ldg.global.f">; def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], "llvm.nvvm.ldg.global.p">; // Use for generic pointers @@ -1491,7 +1544,7 @@ def int_nvvm_ptr_gen_to_constant: DefaultAttrsIntrinsic<[llvm_anyptr_ty], // This is for params that are passed to kernel functions by pointer by-val. def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty], - [IntrNoMem, IntrSpeculatable], + [IntrNoMem, IntrSpeculatable, IntrNoCallback], "llvm.nvvm.ptr.gen.to.param">; // Move intrinsics, used in nvvm internally @@ -1531,149 +1584,149 @@ def int_nvvm_reflect : def int_nvvm_isspacep_const : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.isspacep.const">, - GCCBuiltin<"__nvvm_isspacep_const">; + ClangBuiltin<"__nvvm_isspacep_const">; def int_nvvm_isspacep_global : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.isspacep.global">, - GCCBuiltin<"__nvvm_isspacep_global">; + ClangBuiltin<"__nvvm_isspacep_global">; def int_nvvm_isspacep_local : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.isspacep.local">, - GCCBuiltin<"__nvvm_isspacep_local">; + ClangBuiltin<"__nvvm_isspacep_local">; def int_nvvm_isspacep_shared : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.isspacep.shared">, - GCCBuiltin<"__nvvm_isspacep_shared">; + ClangBuiltin<"__nvvm_isspacep_shared">; // Environment register read def int_nvvm_read_ptx_sreg_envreg0 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg0">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg0">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg0">; def int_nvvm_read_ptx_sreg_envreg1 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg1">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg1">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg1">; def int_nvvm_read_ptx_sreg_envreg2 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg2">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg2">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg2">; def int_nvvm_read_ptx_sreg_envreg3 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg3">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg3">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg3">; def int_nvvm_read_ptx_sreg_envreg4 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg4">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg4">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg4">; def int_nvvm_read_ptx_sreg_envreg5 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg5">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg5">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg5">; def int_nvvm_read_ptx_sreg_envreg6 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg6">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg6">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg6">; def int_nvvm_read_ptx_sreg_envreg7 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg7">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg7">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg7">; def int_nvvm_read_ptx_sreg_envreg8 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg8">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg8">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg8">; def int_nvvm_read_ptx_sreg_envreg9 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg9">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg9">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg9">; def int_nvvm_read_ptx_sreg_envreg10 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg10">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg10">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg10">; def int_nvvm_read_ptx_sreg_envreg11 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg11">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg11">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg11">; def int_nvvm_read_ptx_sreg_envreg12 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg12">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg12">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg12">; def int_nvvm_read_ptx_sreg_envreg13 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg13">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg13">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg13">; def int_nvvm_read_ptx_sreg_envreg14 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg14">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg14">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg14">; def int_nvvm_read_ptx_sreg_envreg15 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg15">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg15">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg15">; def int_nvvm_read_ptx_sreg_envreg16 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg16">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg16">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg16">; def int_nvvm_read_ptx_sreg_envreg17 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg17">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg17">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg17">; def int_nvvm_read_ptx_sreg_envreg18 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg18">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg18">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg18">; def int_nvvm_read_ptx_sreg_envreg19 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg19">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg19">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg19">; def int_nvvm_read_ptx_sreg_envreg20 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg20">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg20">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg20">; def int_nvvm_read_ptx_sreg_envreg21 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg21">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg21">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg21">; def int_nvvm_read_ptx_sreg_envreg22 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg22">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg22">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg22">; def int_nvvm_read_ptx_sreg_envreg23 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg23">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg23">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg23">; def int_nvvm_read_ptx_sreg_envreg24 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg24">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg24">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg24">; def int_nvvm_read_ptx_sreg_envreg25 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg25">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg25">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg25">; def int_nvvm_read_ptx_sreg_envreg26 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg26">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg26">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg26">; def int_nvvm_read_ptx_sreg_envreg27 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg27">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg27">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg27">; def int_nvvm_read_ptx_sreg_envreg28 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg28">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg28">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg28">; def int_nvvm_read_ptx_sreg_envreg29 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg29">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg29">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg29">; def int_nvvm_read_ptx_sreg_envreg30 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg30">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg30">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg30">; def int_nvvm_read_ptx_sreg_envreg31 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg31">, - GCCBuiltin<"__nvvm_read_ptx_sreg_envreg31">; + ClangBuiltin<"__nvvm_read_ptx_sreg_envreg31">; // Texture Fetch @@ -3161,62 +3214,62 @@ def int_nvvm_suld_3d_v4i32_zero def int_nvvm_txq_channel_order : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.txq.channel.order">, - GCCBuiltin<"__nvvm_txq_channel_order">; + ClangBuiltin<"__nvvm_txq_channel_order">; def int_nvvm_txq_channel_data_type : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.txq.channel.data.type">, - GCCBuiltin<"__nvvm_txq_channel_data_type">; + ClangBuiltin<"__nvvm_txq_channel_data_type">; def int_nvvm_txq_width : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.txq.width">, - GCCBuiltin<"__nvvm_txq_width">; + ClangBuiltin<"__nvvm_txq_width">; def int_nvvm_txq_height : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.txq.height">, - GCCBuiltin<"__nvvm_txq_height">; + ClangBuiltin<"__nvvm_txq_height">; def int_nvvm_txq_depth : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.txq.depth">, - GCCBuiltin<"__nvvm_txq_depth">; + ClangBuiltin<"__nvvm_txq_depth">; def int_nvvm_txq_array_size : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.txq.array.size">, - GCCBuiltin<"__nvvm_txq_array_size">; + ClangBuiltin<"__nvvm_txq_array_size">; def int_nvvm_txq_num_samples : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.txq.num.samples">, - GCCBuiltin<"__nvvm_txq_num_samples">; + ClangBuiltin<"__nvvm_txq_num_samples">; def int_nvvm_txq_num_mipmap_levels : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.txq.num.mipmap.levels">, - GCCBuiltin<"__nvvm_txq_num_mipmap_levels">; + ClangBuiltin<"__nvvm_txq_num_mipmap_levels">; //===- Surface Query ------------------------------------------------------===// def int_nvvm_suq_channel_order : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.suq.channel.order">, - GCCBuiltin<"__nvvm_suq_channel_order">; + ClangBuiltin<"__nvvm_suq_channel_order">; def int_nvvm_suq_channel_data_type : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.suq.channel.data.type">, - GCCBuiltin<"__nvvm_suq_channel_data_type">; + ClangBuiltin<"__nvvm_suq_channel_data_type">; def int_nvvm_suq_width : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.suq.width">, - GCCBuiltin<"__nvvm_suq_width">; + ClangBuiltin<"__nvvm_suq_width">; def int_nvvm_suq_height : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.suq.height">, - GCCBuiltin<"__nvvm_suq_height">; + ClangBuiltin<"__nvvm_suq_height">; def int_nvvm_suq_depth : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.suq.depth">, - GCCBuiltin<"__nvvm_suq_depth">; + ClangBuiltin<"__nvvm_suq_depth">; def int_nvvm_suq_array_size : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.suq.array.size">, - GCCBuiltin<"__nvvm_suq_array_size">; + ClangBuiltin<"__nvvm_suq_array_size">; //===- Handle Query -------------------------------------------------------===// @@ -3224,15 +3277,15 @@ def int_nvvm_suq_array_size def int_nvvm_istypep_sampler : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.istypep.sampler">, - GCCBuiltin<"__nvvm_istypep_sampler">; + ClangBuiltin<"__nvvm_istypep_sampler">; def int_nvvm_istypep_surface : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.istypep.surface">, - GCCBuiltin<"__nvvm_istypep_surface">; + ClangBuiltin<"__nvvm_istypep_surface">; def int_nvvm_istypep_texture : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem], "llvm.nvvm.istypep.texture">, - GCCBuiltin<"__nvvm_istypep_texture">; + ClangBuiltin<"__nvvm_istypep_texture">; @@ -3243,810 +3296,810 @@ def int_nvvm_istypep_texture def int_nvvm_sust_b_1d_i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_i8_clamp">; def int_nvvm_sust_b_1d_i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_i16_clamp">; def int_nvvm_sust_b_1d_i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_i32_clamp">; def int_nvvm_sust_b_1d_i64_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.1d.i64.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_i64_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_i64_clamp">; def int_nvvm_sust_b_1d_v2i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.v2i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_v2i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_v2i8_clamp">; def int_nvvm_sust_b_1d_v2i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.v2i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_v2i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_v2i16_clamp">; def int_nvvm_sust_b_1d_v2i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.v2i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_v2i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_v2i32_clamp">; def int_nvvm_sust_b_1d_v2i64_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.1d.v2i64.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_v2i64_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_v2i64_clamp">; def int_nvvm_sust_b_1d_v4i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.v4i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_v4i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_v4i8_clamp">; def int_nvvm_sust_b_1d_v4i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.v4i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_v4i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_v4i16_clamp">; def int_nvvm_sust_b_1d_v4i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.v4i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_v4i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_v4i32_clamp">; def int_nvvm_sust_b_1d_array_i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_array_i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_array_i8_clamp">; def int_nvvm_sust_b_1d_array_i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_array_i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_array_i16_clamp">; def int_nvvm_sust_b_1d_array_i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.array.i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_array_i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_array_i32_clamp">; def int_nvvm_sust_b_1d_array_i64_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.1d.array.i64.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_array_i64_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_array_i64_clamp">; def int_nvvm_sust_b_1d_array_v2i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.v2i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v2i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_clamp">; def int_nvvm_sust_b_1d_array_v2i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.v2i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v2i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_clamp">; def int_nvvm_sust_b_1d_array_v2i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.array.v2i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v2i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_clamp">; def int_nvvm_sust_b_1d_array_v2i64_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.1d.array.v2i64.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v2i64_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_clamp">; def int_nvvm_sust_b_1d_array_v4i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.v4i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v4i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_clamp">; def int_nvvm_sust_b_1d_array_v4i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.v4i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v4i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_clamp">; def int_nvvm_sust_b_1d_array_v4i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.array.v4i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v4i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_clamp">; def int_nvvm_sust_b_2d_i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_i8_clamp">; def int_nvvm_sust_b_2d_i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_i16_clamp">; def int_nvvm_sust_b_2d_i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_i32_clamp">; def int_nvvm_sust_b_2d_i64_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.2d.i64.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_i64_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_i64_clamp">; def int_nvvm_sust_b_2d_v2i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.v2i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_v2i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_v2i8_clamp">; def int_nvvm_sust_b_2d_v2i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.v2i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_v2i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_v2i16_clamp">; def int_nvvm_sust_b_2d_v2i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.v2i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_v2i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_v2i32_clamp">; def int_nvvm_sust_b_2d_v2i64_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.2d.v2i64.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_v2i64_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_v2i64_clamp">; def int_nvvm_sust_b_2d_v4i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.v4i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_v4i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_v4i8_clamp">; def int_nvvm_sust_b_2d_v4i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.v4i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_v4i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_v4i16_clamp">; def int_nvvm_sust_b_2d_v4i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.v4i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_v4i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_v4i32_clamp">; def int_nvvm_sust_b_2d_array_i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_array_i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_array_i8_clamp">; def int_nvvm_sust_b_2d_array_i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_array_i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_array_i16_clamp">; def int_nvvm_sust_b_2d_array_i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.array.i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_array_i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_array_i32_clamp">; def int_nvvm_sust_b_2d_array_i64_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.2d.array.i64.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_array_i64_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_array_i64_clamp">; def int_nvvm_sust_b_2d_array_v2i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.v2i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v2i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_clamp">; def int_nvvm_sust_b_2d_array_v2i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.v2i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v2i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_clamp">; def int_nvvm_sust_b_2d_array_v2i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.array.v2i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v2i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_clamp">; def int_nvvm_sust_b_2d_array_v2i64_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.2d.array.v2i64.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v2i64_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_clamp">; def int_nvvm_sust_b_2d_array_v4i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.v4i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v4i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_clamp">; def int_nvvm_sust_b_2d_array_v4i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.v4i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v4i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_clamp">; def int_nvvm_sust_b_2d_array_v4i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.array.v4i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v4i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_clamp">; def int_nvvm_sust_b_3d_i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_3d_i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_3d_i8_clamp">; def int_nvvm_sust_b_3d_i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_3d_i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_3d_i16_clamp">; def int_nvvm_sust_b_3d_i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.3d.i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_3d_i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_3d_i32_clamp">; def int_nvvm_sust_b_3d_i64_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.3d.i64.clamp">, - GCCBuiltin<"__nvvm_sust_b_3d_i64_clamp">; + ClangBuiltin<"__nvvm_sust_b_3d_i64_clamp">; def int_nvvm_sust_b_3d_v2i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.v2i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_3d_v2i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_3d_v2i8_clamp">; def int_nvvm_sust_b_3d_v2i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.v2i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_3d_v2i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_3d_v2i16_clamp">; def int_nvvm_sust_b_3d_v2i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.3d.v2i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_3d_v2i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_3d_v2i32_clamp">; def int_nvvm_sust_b_3d_v2i64_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.3d.v2i64.clamp">, - GCCBuiltin<"__nvvm_sust_b_3d_v2i64_clamp">; + ClangBuiltin<"__nvvm_sust_b_3d_v2i64_clamp">; def int_nvvm_sust_b_3d_v4i8_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.v4i8.clamp">, - GCCBuiltin<"__nvvm_sust_b_3d_v4i8_clamp">; + ClangBuiltin<"__nvvm_sust_b_3d_v4i8_clamp">; def int_nvvm_sust_b_3d_v4i16_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.v4i16.clamp">, - GCCBuiltin<"__nvvm_sust_b_3d_v4i16_clamp">; + ClangBuiltin<"__nvvm_sust_b_3d_v4i16_clamp">; def int_nvvm_sust_b_3d_v4i32_clamp : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.3d.v4i32.clamp">, - GCCBuiltin<"__nvvm_sust_b_3d_v4i32_clamp">; + ClangBuiltin<"__nvvm_sust_b_3d_v4i32_clamp">; // .trap variant def int_nvvm_sust_b_1d_i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.i8.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_i8_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_i8_trap">; def int_nvvm_sust_b_1d_i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.i16.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_i16_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_i16_trap">; def int_nvvm_sust_b_1d_i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.i32.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_i32_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_i32_trap">; def int_nvvm_sust_b_1d_i64_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.1d.i64.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_i64_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_i64_trap">; def int_nvvm_sust_b_1d_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.v2i8.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_v2i8_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_v2i8_trap">; def int_nvvm_sust_b_1d_v2i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.v2i16.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_v2i16_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_v2i16_trap">; def int_nvvm_sust_b_1d_v2i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.v2i32.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_v2i32_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_v2i32_trap">; def int_nvvm_sust_b_1d_v2i64_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.1d.v2i64.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_v2i64_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_v2i64_trap">; def int_nvvm_sust_b_1d_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.v4i8.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_v4i8_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_v4i8_trap">; def int_nvvm_sust_b_1d_v4i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.v4i16.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_v4i16_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_v4i16_trap">; def int_nvvm_sust_b_1d_v4i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.v4i32.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_v4i32_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_v4i32_trap">; def int_nvvm_sust_b_1d_array_i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.i8.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_array_i8_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_array_i8_trap">; def int_nvvm_sust_b_1d_array_i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.i16.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_array_i16_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_array_i16_trap">; def int_nvvm_sust_b_1d_array_i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.array.i32.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_array_i32_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_array_i32_trap">; def int_nvvm_sust_b_1d_array_i64_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.1d.array.i64.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_array_i64_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_array_i64_trap">; def int_nvvm_sust_b_1d_array_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.v2i8.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v2i8_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_trap">; def int_nvvm_sust_b_1d_array_v2i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.v2i16.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v2i16_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_trap">; def int_nvvm_sust_b_1d_array_v2i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.array.v2i32.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v2i32_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_trap">; def int_nvvm_sust_b_1d_array_v2i64_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.1d.array.v2i64.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v2i64_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_trap">; def int_nvvm_sust_b_1d_array_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.v4i8.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v4i8_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_trap">; def int_nvvm_sust_b_1d_array_v4i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.v4i16.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v4i16_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_trap">; def int_nvvm_sust_b_1d_array_v4i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.array.v4i32.trap">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v4i32_trap">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_trap">; def int_nvvm_sust_b_2d_i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.i8.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_i8_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_i8_trap">; def int_nvvm_sust_b_2d_i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.i16.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_i16_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_i16_trap">; def int_nvvm_sust_b_2d_i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.i32.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_i32_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_i32_trap">; def int_nvvm_sust_b_2d_i64_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.2d.i64.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_i64_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_i64_trap">; def int_nvvm_sust_b_2d_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.v2i8.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_v2i8_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_v2i8_trap">; def int_nvvm_sust_b_2d_v2i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.v2i16.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_v2i16_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_v2i16_trap">; def int_nvvm_sust_b_2d_v2i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.v2i32.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_v2i32_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_v2i32_trap">; def int_nvvm_sust_b_2d_v2i64_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.2d.v2i64.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_v2i64_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_v2i64_trap">; def int_nvvm_sust_b_2d_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.v4i8.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_v4i8_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_v4i8_trap">; def int_nvvm_sust_b_2d_v4i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.v4i16.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_v4i16_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_v4i16_trap">; def int_nvvm_sust_b_2d_v4i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.v4i32.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_v4i32_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_v4i32_trap">; def int_nvvm_sust_b_2d_array_i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.i8.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_array_i8_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_array_i8_trap">; def int_nvvm_sust_b_2d_array_i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.i16.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_array_i16_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_array_i16_trap">; def int_nvvm_sust_b_2d_array_i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.array.i32.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_array_i32_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_array_i32_trap">; def int_nvvm_sust_b_2d_array_i64_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.2d.array.i64.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_array_i64_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_array_i64_trap">; def int_nvvm_sust_b_2d_array_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.v2i8.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v2i8_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_trap">; def int_nvvm_sust_b_2d_array_v2i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.v2i16.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v2i16_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_trap">; def int_nvvm_sust_b_2d_array_v2i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.array.v2i32.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v2i32_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_trap">; def int_nvvm_sust_b_2d_array_v2i64_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.2d.array.v2i64.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v2i64_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_trap">; def int_nvvm_sust_b_2d_array_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.v4i8.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v4i8_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_trap">; def int_nvvm_sust_b_2d_array_v4i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.v4i16.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v4i16_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_trap">; def int_nvvm_sust_b_2d_array_v4i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.array.v4i32.trap">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v4i32_trap">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_trap">; def int_nvvm_sust_b_3d_i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.i8.trap">, - GCCBuiltin<"__nvvm_sust_b_3d_i8_trap">; + ClangBuiltin<"__nvvm_sust_b_3d_i8_trap">; def int_nvvm_sust_b_3d_i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.i16.trap">, - GCCBuiltin<"__nvvm_sust_b_3d_i16_trap">; + ClangBuiltin<"__nvvm_sust_b_3d_i16_trap">; def int_nvvm_sust_b_3d_i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.3d.i32.trap">, - GCCBuiltin<"__nvvm_sust_b_3d_i32_trap">; + ClangBuiltin<"__nvvm_sust_b_3d_i32_trap">; def int_nvvm_sust_b_3d_i64_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.3d.i64.trap">, - GCCBuiltin<"__nvvm_sust_b_3d_i64_trap">; + ClangBuiltin<"__nvvm_sust_b_3d_i64_trap">; def int_nvvm_sust_b_3d_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.v2i8.trap">, - GCCBuiltin<"__nvvm_sust_b_3d_v2i8_trap">; + ClangBuiltin<"__nvvm_sust_b_3d_v2i8_trap">; def int_nvvm_sust_b_3d_v2i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.v2i16.trap">, - GCCBuiltin<"__nvvm_sust_b_3d_v2i16_trap">; + ClangBuiltin<"__nvvm_sust_b_3d_v2i16_trap">; def int_nvvm_sust_b_3d_v2i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.3d.v2i32.trap">, - GCCBuiltin<"__nvvm_sust_b_3d_v2i32_trap">; + ClangBuiltin<"__nvvm_sust_b_3d_v2i32_trap">; def int_nvvm_sust_b_3d_v2i64_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.3d.v2i64.trap">, - GCCBuiltin<"__nvvm_sust_b_3d_v2i64_trap">; + ClangBuiltin<"__nvvm_sust_b_3d_v2i64_trap">; def int_nvvm_sust_b_3d_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.v4i8.trap">, - GCCBuiltin<"__nvvm_sust_b_3d_v4i8_trap">; + ClangBuiltin<"__nvvm_sust_b_3d_v4i8_trap">; def int_nvvm_sust_b_3d_v4i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.v4i16.trap">, - GCCBuiltin<"__nvvm_sust_b_3d_v4i16_trap">; + ClangBuiltin<"__nvvm_sust_b_3d_v4i16_trap">; def int_nvvm_sust_b_3d_v4i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.3d.v4i32.trap">, - GCCBuiltin<"__nvvm_sust_b_3d_v4i32_trap">; + ClangBuiltin<"__nvvm_sust_b_3d_v4i32_trap">; // .zero variant def int_nvvm_sust_b_1d_i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.i8.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_i8_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_i8_zero">; def int_nvvm_sust_b_1d_i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.i16.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_i16_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_i16_zero">; def int_nvvm_sust_b_1d_i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.i32.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_i32_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_i32_zero">; def int_nvvm_sust_b_1d_i64_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.1d.i64.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_i64_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_i64_zero">; def int_nvvm_sust_b_1d_v2i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.v2i8.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_v2i8_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_v2i8_zero">; def int_nvvm_sust_b_1d_v2i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.v2i16.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_v2i16_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_v2i16_zero">; def int_nvvm_sust_b_1d_v2i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.v2i32.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_v2i32_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_v2i32_zero">; def int_nvvm_sust_b_1d_v2i64_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.1d.v2i64.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_v2i64_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_v2i64_zero">; def int_nvvm_sust_b_1d_v4i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.v4i8.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_v4i8_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_v4i8_zero">; def int_nvvm_sust_b_1d_v4i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.v4i16.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_v4i16_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_v4i16_zero">; def int_nvvm_sust_b_1d_v4i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.v4i32.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_v4i32_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_v4i32_zero">; def int_nvvm_sust_b_1d_array_i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.i8.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_array_i8_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_array_i8_zero">; def int_nvvm_sust_b_1d_array_i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.i16.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_array_i16_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_array_i16_zero">; def int_nvvm_sust_b_1d_array_i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.array.i32.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_array_i32_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_array_i32_zero">; def int_nvvm_sust_b_1d_array_i64_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.1d.array.i64.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_array_i64_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_array_i64_zero">; def int_nvvm_sust_b_1d_array_v2i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.v2i8.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v2i8_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_zero">; def int_nvvm_sust_b_1d_array_v2i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.v2i16.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v2i16_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_zero">; def int_nvvm_sust_b_1d_array_v2i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.array.v2i32.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v2i32_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_zero">; def int_nvvm_sust_b_1d_array_v2i64_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.1d.array.v2i64.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v2i64_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_zero">; def int_nvvm_sust_b_1d_array_v4i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.v4i8.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v4i8_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_zero">; def int_nvvm_sust_b_1d_array_v4i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.array.v4i16.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v4i16_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_zero">; def int_nvvm_sust_b_1d_array_v4i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.array.v4i32.zero">, - GCCBuiltin<"__nvvm_sust_b_1d_array_v4i32_zero">; + ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_zero">; def int_nvvm_sust_b_2d_i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.i8.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_i8_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_i8_zero">; def int_nvvm_sust_b_2d_i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.i16.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_i16_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_i16_zero">; def int_nvvm_sust_b_2d_i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.i32.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_i32_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_i32_zero">; def int_nvvm_sust_b_2d_i64_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.2d.i64.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_i64_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_i64_zero">; def int_nvvm_sust_b_2d_v2i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.v2i8.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_v2i8_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_v2i8_zero">; def int_nvvm_sust_b_2d_v2i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.v2i16.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_v2i16_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_v2i16_zero">; def int_nvvm_sust_b_2d_v2i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.v2i32.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_v2i32_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_v2i32_zero">; def int_nvvm_sust_b_2d_v2i64_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.2d.v2i64.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_v2i64_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_v2i64_zero">; def int_nvvm_sust_b_2d_v4i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.v4i8.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_v4i8_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_v4i8_zero">; def int_nvvm_sust_b_2d_v4i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.v4i16.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_v4i16_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_v4i16_zero">; def int_nvvm_sust_b_2d_v4i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.v4i32.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_v4i32_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_v4i32_zero">; def int_nvvm_sust_b_2d_array_i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.i8.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_array_i8_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_array_i8_zero">; def int_nvvm_sust_b_2d_array_i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.i16.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_array_i16_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_array_i16_zero">; def int_nvvm_sust_b_2d_array_i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.array.i32.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_array_i32_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_array_i32_zero">; def int_nvvm_sust_b_2d_array_i64_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.2d.array.i64.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_array_i64_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_array_i64_zero">; def int_nvvm_sust_b_2d_array_v2i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.v2i8.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v2i8_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_zero">; def int_nvvm_sust_b_2d_array_v2i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.v2i16.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v2i16_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_zero">; def int_nvvm_sust_b_2d_array_v2i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.array.v2i32.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v2i32_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_zero">; def int_nvvm_sust_b_2d_array_v2i64_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.2d.array.v2i64.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v2i64_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_zero">; def int_nvvm_sust_b_2d_array_v4i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.v4i8.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v4i8_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_zero">; def int_nvvm_sust_b_2d_array_v4i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.2d.array.v4i16.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v4i16_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_zero">; def int_nvvm_sust_b_2d_array_v4i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.array.v4i32.zero">, - GCCBuiltin<"__nvvm_sust_b_2d_array_v4i32_zero">; + ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_zero">; def int_nvvm_sust_b_3d_i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.i8.zero">, - GCCBuiltin<"__nvvm_sust_b_3d_i8_zero">; + ClangBuiltin<"__nvvm_sust_b_3d_i8_zero">; def int_nvvm_sust_b_3d_i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.i16.zero">, - GCCBuiltin<"__nvvm_sust_b_3d_i16_zero">; + ClangBuiltin<"__nvvm_sust_b_3d_i16_zero">; def int_nvvm_sust_b_3d_i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.3d.i32.zero">, - GCCBuiltin<"__nvvm_sust_b_3d_i32_zero">; + ClangBuiltin<"__nvvm_sust_b_3d_i32_zero">; def int_nvvm_sust_b_3d_i64_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.3d.i64.zero">, - GCCBuiltin<"__nvvm_sust_b_3d_i64_zero">; + ClangBuiltin<"__nvvm_sust_b_3d_i64_zero">; def int_nvvm_sust_b_3d_v2i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.v2i8.zero">, - GCCBuiltin<"__nvvm_sust_b_3d_v2i8_zero">; + ClangBuiltin<"__nvvm_sust_b_3d_v2i8_zero">; def int_nvvm_sust_b_3d_v2i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.v2i16.zero">, - GCCBuiltin<"__nvvm_sust_b_3d_v2i16_zero">; + ClangBuiltin<"__nvvm_sust_b_3d_v2i16_zero">; def int_nvvm_sust_b_3d_v2i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.3d.v2i32.zero">, - GCCBuiltin<"__nvvm_sust_b_3d_v2i32_zero">; + ClangBuiltin<"__nvvm_sust_b_3d_v2i32_zero">; def int_nvvm_sust_b_3d_v2i64_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], "llvm.nvvm.sust.b.3d.v2i64.zero">, - GCCBuiltin<"__nvvm_sust_b_3d_v2i64_zero">; + ClangBuiltin<"__nvvm_sust_b_3d_v2i64_zero">; def int_nvvm_sust_b_3d_v4i8_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.v4i8.zero">, - GCCBuiltin<"__nvvm_sust_b_3d_v4i8_zero">; + ClangBuiltin<"__nvvm_sust_b_3d_v4i8_zero">; def int_nvvm_sust_b_3d_v4i16_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.3d.v4i16.zero">, - GCCBuiltin<"__nvvm_sust_b_3d_v4i16_zero">; + ClangBuiltin<"__nvvm_sust_b_3d_v4i16_zero">; def int_nvvm_sust_b_3d_v4i32_zero : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.3d.v4i32.zero">, - GCCBuiltin<"__nvvm_sust_b_3d_v4i32_zero">; + ClangBuiltin<"__nvvm_sust_b_3d_v4i32_zero">; @@ -4055,245 +4108,245 @@ def int_nvvm_sust_b_3d_v4i32_zero def int_nvvm_sust_p_1d_i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.1d.i8.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_i8_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_i8_trap">; def int_nvvm_sust_p_1d_i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.1d.i16.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_i16_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_i16_trap">; def int_nvvm_sust_p_1d_i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.1d.i32.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_i32_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_i32_trap">; def int_nvvm_sust_p_1d_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.1d.v2i8.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_v2i8_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_v2i8_trap">; def int_nvvm_sust_p_1d_v2i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.1d.v2i16.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_v2i16_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_v2i16_trap">; def int_nvvm_sust_p_1d_v2i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.1d.v2i32.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_v2i32_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_v2i32_trap">; def int_nvvm_sust_p_1d_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.1d.v4i8.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_v4i8_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_v4i8_trap">; def int_nvvm_sust_p_1d_v4i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.1d.v4i16.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_v4i16_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_v4i16_trap">; def int_nvvm_sust_p_1d_v4i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.1d.v4i32.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_v4i32_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_v4i32_trap">; def int_nvvm_sust_p_1d_array_i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.1d.array.i8.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_array_i8_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_array_i8_trap">; def int_nvvm_sust_p_1d_array_i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.1d.array.i16.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_array_i16_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_array_i16_trap">; def int_nvvm_sust_p_1d_array_i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.1d.array.i32.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_array_i32_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_array_i32_trap">; def int_nvvm_sust_p_1d_array_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.1d.array.v2i8.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_array_v2i8_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_array_v2i8_trap">; def int_nvvm_sust_p_1d_array_v2i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.1d.array.v2i16.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_array_v2i16_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_array_v2i16_trap">; def int_nvvm_sust_p_1d_array_v2i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.1d.array.v2i32.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_array_v2i32_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_array_v2i32_trap">; def int_nvvm_sust_p_1d_array_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.1d.array.v4i8.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_array_v4i8_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_array_v4i8_trap">; def int_nvvm_sust_p_1d_array_v4i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.1d.array.v4i16.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_array_v4i16_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_array_v4i16_trap">; def int_nvvm_sust_p_1d_array_v4i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.1d.array.v4i32.trap">, - GCCBuiltin<"__nvvm_sust_p_1d_array_v4i32_trap">; + ClangBuiltin<"__nvvm_sust_p_1d_array_v4i32_trap">; def int_nvvm_sust_p_2d_i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.2d.i8.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_i8_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_i8_trap">; def int_nvvm_sust_p_2d_i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.2d.i16.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_i16_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_i16_trap">; def int_nvvm_sust_p_2d_i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.2d.i32.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_i32_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_i32_trap">; def int_nvvm_sust_p_2d_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.2d.v2i8.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_v2i8_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_v2i8_trap">; def int_nvvm_sust_p_2d_v2i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.2d.v2i16.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_v2i16_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_v2i16_trap">; def int_nvvm_sust_p_2d_v2i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.2d.v2i32.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_v2i32_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_v2i32_trap">; def int_nvvm_sust_p_2d_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.2d.v4i8.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_v4i8_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_v4i8_trap">; def int_nvvm_sust_p_2d_v4i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.2d.v4i16.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_v4i16_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_v4i16_trap">; def int_nvvm_sust_p_2d_v4i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.2d.v4i32.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_v4i32_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_v4i32_trap">; def int_nvvm_sust_p_2d_array_i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.2d.array.i8.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_array_i8_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_array_i8_trap">; def int_nvvm_sust_p_2d_array_i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.2d.array.i16.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_array_i16_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_array_i16_trap">; def int_nvvm_sust_p_2d_array_i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.2d.array.i32.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_array_i32_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_array_i32_trap">; def int_nvvm_sust_p_2d_array_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.2d.array.v2i8.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_array_v2i8_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_array_v2i8_trap">; def int_nvvm_sust_p_2d_array_v2i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.2d.array.v2i16.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_array_v2i16_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_array_v2i16_trap">; def int_nvvm_sust_p_2d_array_v2i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.2d.array.v2i32.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_array_v2i32_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_array_v2i32_trap">; def int_nvvm_sust_p_2d_array_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.2d.array.v4i8.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_array_v4i8_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_array_v4i8_trap">; def int_nvvm_sust_p_2d_array_v4i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.2d.array.v4i16.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_array_v4i16_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_array_v4i16_trap">; def int_nvvm_sust_p_2d_array_v4i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.2d.array.v4i32.trap">, - GCCBuiltin<"__nvvm_sust_p_2d_array_v4i32_trap">; + ClangBuiltin<"__nvvm_sust_p_2d_array_v4i32_trap">; def int_nvvm_sust_p_3d_i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.3d.i8.trap">, - GCCBuiltin<"__nvvm_sust_p_3d_i8_trap">; + ClangBuiltin<"__nvvm_sust_p_3d_i8_trap">; def int_nvvm_sust_p_3d_i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.3d.i16.trap">, - GCCBuiltin<"__nvvm_sust_p_3d_i16_trap">; + ClangBuiltin<"__nvvm_sust_p_3d_i16_trap">; def int_nvvm_sust_p_3d_i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.3d.i32.trap">, - GCCBuiltin<"__nvvm_sust_p_3d_i32_trap">; + ClangBuiltin<"__nvvm_sust_p_3d_i32_trap">; def int_nvvm_sust_p_3d_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.3d.v2i8.trap">, - GCCBuiltin<"__nvvm_sust_p_3d_v2i8_trap">; + ClangBuiltin<"__nvvm_sust_p_3d_v2i8_trap">; def int_nvvm_sust_p_3d_v2i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.3d.v2i16.trap">, - GCCBuiltin<"__nvvm_sust_p_3d_v2i16_trap">; + ClangBuiltin<"__nvvm_sust_p_3d_v2i16_trap">; def int_nvvm_sust_p_3d_v2i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.3d.v2i32.trap">, - GCCBuiltin<"__nvvm_sust_p_3d_v2i32_trap">; + ClangBuiltin<"__nvvm_sust_p_3d_v2i32_trap">; def int_nvvm_sust_p_3d_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.3d.v4i8.trap">, - GCCBuiltin<"__nvvm_sust_p_3d_v4i8_trap">; + ClangBuiltin<"__nvvm_sust_p_3d_v4i8_trap">; def int_nvvm_sust_p_3d_v4i16_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.p.3d.v4i16.trap">, - GCCBuiltin<"__nvvm_sust_p_3d_v4i16_trap">; + ClangBuiltin<"__nvvm_sust_p_3d_v4i16_trap">; def int_nvvm_sust_p_3d_v4i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.p.3d.v4i32.trap">, - GCCBuiltin<"__nvvm_sust_p_3d_v4i32_trap">; + ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">; def int_nvvm_rotate_b32 : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b32">, - GCCBuiltin<"__nvvm_rotate_b32">; + ClangBuiltin<"__nvvm_rotate_b32">; def int_nvvm_rotate_b64 : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b64">, - GCCBuiltin<"__nvvm_rotate_b64">; + ClangBuiltin<"__nvvm_rotate_b64">; def int_nvvm_rotate_right_b64 : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.right.b64">, - GCCBuiltin<"__nvvm_rotate_right_b64">; + ClangBuiltin<"__nvvm_rotate_right_b64">; def int_nvvm_swap_lo_hi_b64 : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">, - GCCBuiltin<"__nvvm_swap_lo_hi_b64">; + ClangBuiltin<"__nvvm_swap_lo_hi_b64">; // Accessing special registers. @@ -4304,31 +4357,31 @@ multiclass PTXReadSRegIntrinsic_v4i32 { // FIXME: Enable this once v4i32 support is enabled in back-end. // def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>; - def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, - GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">; - def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, - GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">; - def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, - GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">; - def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, - GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">; + def _x : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, + ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">; + def _y : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, + ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">; + def _z : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, + ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">; + def _w : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, + ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">; } class PTXReadSRegIntrinsic_r32 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, - GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; + ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>; class PTXReadSRegIntrinsic_r64 : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>, - GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; + ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>; // Intrinsics to read registers with non-constant values. E.g. the values that // do change over the kernel lifetime. Such reads should not be CSE'd. class PTXReadNCSRegIntrinsic_r32 - : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>, - GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; + : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback]>, + ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>; class PTXReadNCSRegIntrinsic_r64 - : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly]>, - GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; + : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback]>, + ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>; defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">; defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">; @@ -4375,14 +4428,16 @@ foreach sync = [false, true] in { foreach return_pred = [false, true] in { foreach i = [SHFL_INFO] in { if i.withGccBuiltin then { - def i.Name : GCCBuiltin, + def i.Name : ClangBuiltin, Intrinsic; } if i.withoutGccBuiltin then { def i.Name : Intrinsic; + [IntrInaccessibleMemOnly, IntrConvergent, + IntrNoCallback], i.IntrName>; } } } @@ -4397,23 +4452,23 @@ foreach sync = [false, true] in { // vote.all pred def int_nvvm_vote_all : Intrinsic<[llvm_i1_ty], [llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.all">, - GCCBuiltin<"__nvvm_vote_all">; + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.all">, + ClangBuiltin<"__nvvm_vote_all">; // vote.any pred def int_nvvm_vote_any : Intrinsic<[llvm_i1_ty], [llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.any">, - GCCBuiltin<"__nvvm_vote_any">; + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.any">, + ClangBuiltin<"__nvvm_vote_any">; // vote.uni pred def int_nvvm_vote_uni : Intrinsic<[llvm_i1_ty], [llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.uni">, - GCCBuiltin<"__nvvm_vote_uni">; + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.uni">, + ClangBuiltin<"__nvvm_vote_uni">; // vote.ballot pred def int_nvvm_vote_ballot : Intrinsic<[llvm_i32_ty], [llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.ballot">, - GCCBuiltin<"__nvvm_vote_ballot">; + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.ballot">, + ClangBuiltin<"__nvvm_vote_ballot">; // // VOTE.SYNC @@ -4422,23 +4477,23 @@ def int_nvvm_vote_ballot : // vote.sync.all mask, pred def int_nvvm_vote_all_sync : Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.all.sync">, - GCCBuiltin<"__nvvm_vote_all_sync">; + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.all.sync">, + ClangBuiltin<"__nvvm_vote_all_sync">; // vote.sync.any mask, pred def int_nvvm_vote_any_sync : Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.any.sync">, - GCCBuiltin<"__nvvm_vote_any_sync">; + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.any.sync">, + ClangBuiltin<"__nvvm_vote_any_sync">; // vote.sync.uni mask, pred def int_nvvm_vote_uni_sync : Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.uni.sync">, - GCCBuiltin<"__nvvm_vote_uni_sync">; + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.uni.sync">, + ClangBuiltin<"__nvvm_vote_uni_sync">; // vote.sync.ballot mask, pred def int_nvvm_vote_ballot_sync : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.ballot.sync">, - GCCBuiltin<"__nvvm_vote_ballot_sync">; + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.ballot.sync">, + ClangBuiltin<"__nvvm_vote_ballot_sync">; // // MATCH.SYNC @@ -4446,13 +4501,13 @@ def int_nvvm_vote_ballot_sync : // match.any.sync.b32 mask, value def int_nvvm_match_any_sync_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.any.sync.i32">, - GCCBuiltin<"__nvvm_match_any_sync_i32">; + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.any.sync.i32">, + ClangBuiltin<"__nvvm_match_any_sync_i32">; // match.any.sync.b64 mask, value def int_nvvm_match_any_sync_i64 : - Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.any.sync.i64">, - GCCBuiltin<"__nvvm_match_any_sync_i64">; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.any.sync.i64">, + ClangBuiltin<"__nvvm_match_any_sync_i64">; // match.all instruction have two variants -- one returns a single value, another // returns a pair {value, predicate}. We currently only implement the latter as @@ -4461,54 +4516,54 @@ def int_nvvm_match_any_sync_i64 : // match.all.sync.b32p mask, value def int_nvvm_match_all_sync_i32p : Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.all.sync.i32p">; + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.all.sync.i32p">; // match.all.sync.b64p mask, value def int_nvvm_match_all_sync_i64p : - Intrinsic<[llvm_i64_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.all.sync.i64p">; + Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty], + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.all.sync.i64p">; // // REDUX.SYNC // // redux.sync.min.u32 dst, src, membermask; -def int_nvvm_redux_sync_umin : GCCBuiltin<"__nvvm_redux_sync_umin">, +def int_nvvm_redux_sync_umin : ClangBuiltin<"__nvvm_redux_sync_umin">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // redux.sync.max.u32 dst, src, membermask; -def int_nvvm_redux_sync_umax : GCCBuiltin<"__nvvm_redux_sync_umax">, +def int_nvvm_redux_sync_umax : ClangBuiltin<"__nvvm_redux_sync_umax">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // redux.sync.add.s32 dst, src, membermask; -def int_nvvm_redux_sync_add : GCCBuiltin<"__nvvm_redux_sync_add">, +def int_nvvm_redux_sync_add : ClangBuiltin<"__nvvm_redux_sync_add">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // redux.sync.min.s32 dst, src, membermask; -def int_nvvm_redux_sync_min : GCCBuiltin<"__nvvm_redux_sync_min">, +def int_nvvm_redux_sync_min : ClangBuiltin<"__nvvm_redux_sync_min">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // redux.sync.max.s32 dst, src, membermask; -def int_nvvm_redux_sync_max : GCCBuiltin<"__nvvm_redux_sync_max">, +def int_nvvm_redux_sync_max : ClangBuiltin<"__nvvm_redux_sync_max">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // redux.sync.and.b32 dst, src, membermask; -def int_nvvm_redux_sync_and : GCCBuiltin<"__nvvm_redux_sync_and">, +def int_nvvm_redux_sync_and : ClangBuiltin<"__nvvm_redux_sync_and">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // redux.sync.xor.b32 dst, src, membermask; -def int_nvvm_redux_sync_xor : GCCBuiltin<"__nvvm_redux_sync_xor">, +def int_nvvm_redux_sync_xor : ClangBuiltin<"__nvvm_redux_sync_xor">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // redux.sync.or.b32 dst, src, membermask; -def int_nvvm_redux_sync_or : GCCBuiltin<"__nvvm_redux_sync_or">, +def int_nvvm_redux_sync_or : ClangBuiltin<"__nvvm_redux_sync_or">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // // WMMA instructions @@ -4517,7 +4572,7 @@ def int_nvvm_redux_sync_or : GCCBuiltin<"__nvvm_redux_sync_or">, class NVVM_WMMA_LD : Intrinsic>, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly>, NoCapture>], WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>; // WMMA.STORE.D @@ -4527,7 +4582,7 @@ class NVVM_WMMA_ST [llvm_anyptr_ty], Frag.regs, !if(WithStride, [llvm_i32_ty], [])), - [IntrWriteMem, IntrArgMemOnly, WriteOnly>, NoCapture>], + [IntrWriteMem, IntrArgMemOnly, IntrNoCallback, WriteOnly>, NoCapture>], WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr>; // Create all load/store variants @@ -4550,7 +4605,7 @@ class NVVM_WMMA_MMA : Intrinsic.llvm>; foreach layout_a = ["row", "col"] in { @@ -4577,7 +4632,7 @@ class NVVM_MMA : Intrinsic.llvm>; foreach layout_a = ["row", "col"] in { @@ -4598,7 +4653,7 @@ foreach layout_a = ["row", "col"] in { // LDMATRIX class NVVM_LDMATRIX : Intrinsic>, + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly>, NoCapture>], LDMATRIX_NAME.intr>; diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index b01fa10763b8..577122328dd2 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -18,7 +18,7 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". // dcba/dcbf/dcbi/dcbst/dcbt/dcbz/dcbzl(PPC970) instructions. def int_ppc_dcba : Intrinsic<[], [llvm_ptr_ty], []>; - def int_ppc_dcbf : GCCBuiltin<"__builtin_dcbf">, + def int_ppc_dcbf : ClangBuiltin<"__builtin_dcbf">, Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>; def int_ppc_dcbfps : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>; def int_ppc_dcbstps : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>; @@ -30,136 +30,170 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>; // Get content from current FPSCR register - def int_ppc_readflm : GCCBuiltin<"__builtin_readflm">, + def int_ppc_readflm : ClangBuiltin<"__builtin_readflm">, Intrinsic<[llvm_double_ty], [], [IntrNoMerge, IntrHasSideEffects]>; // Set FPSCR register, and return previous content - def int_ppc_setflm : GCCBuiltin<"__builtin_setflm">, + def int_ppc_setflm : ClangBuiltin<"__builtin_setflm">, Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrHasSideEffects]>; // Intrinsics for [double]word extended forms of divide instructions - def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">, + def int_ppc_divwe : ClangBuiltin<"__builtin_divwe">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_divweu : GCCBuiltin<"__builtin_divweu">, + def int_ppc_divweu : ClangBuiltin<"__builtin_divweu">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_divde : GCCBuiltin<"__builtin_divde">, + def int_ppc_divde : ClangBuiltin<"__builtin_divde">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; - def int_ppc_divdeu : GCCBuiltin<"__builtin_divdeu">, + def int_ppc_divdeu : ClangBuiltin<"__builtin_divdeu">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; - def int_ppc_unpack_longdouble : GCCBuiltin<"__builtin_unpack_longdouble">, + def int_ppc_unpack_longdouble : ClangBuiltin<"__builtin_unpack_longdouble">, Intrinsic<[llvm_double_ty], [llvm_ppcf128_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_pack_longdouble : GCCBuiltin<"__builtin_pack_longdouble">, + def int_ppc_pack_longdouble : ClangBuiltin<"__builtin_pack_longdouble">, Intrinsic<[llvm_ppcf128_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem]>; // Generate a random number - def int_ppc_darn : GCCBuiltin<"__builtin_darn">, - Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; - def int_ppc_darnraw : GCCBuiltin<"__builtin_darn_raw">, - Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; - def int_ppc_darn32 : GCCBuiltin<"__builtin_darn_32">, - Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + def int_ppc_darn : ClangBuiltin<"__builtin_darn">, + Intrinsic<[llvm_i64_ty], [], + [IntrNoMerge, IntrHasSideEffects]>; + def int_ppc_darnraw : ClangBuiltin<"__builtin_darn_raw">, + Intrinsic<[llvm_i64_ty], [], + [IntrNoMerge, IntrHasSideEffects]>; + def int_ppc_darn32 : ClangBuiltin<"__builtin_darn_32">, + Intrinsic<[llvm_i32_ty], [], + [IntrNoMerge, IntrHasSideEffects]>; // Bit permute doubleword - def int_ppc_bpermd : GCCBuiltin<"__builtin_bpermd">, + def int_ppc_bpermd : ClangBuiltin<"__builtin_bpermd">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; // Parallel Bits Deposit/Extract Doubleword Builtins. def int_ppc_pdepd - : GCCBuiltin<"__builtin_pdepd">, + : ClangBuiltin<"__builtin_pdepd">, Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; def int_ppc_pextd - : GCCBuiltin<"__builtin_pextd">, + : ClangBuiltin<"__builtin_pextd">, Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; // Centrifuge Doubleword Builtin. def int_ppc_cfuged - : GCCBuiltin<"__builtin_cfuged">, + : ClangBuiltin<"__builtin_cfuged">, Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; // Count Leading / Trailing Zeroes under bit Mask Builtins. def int_ppc_cntlzdm - : GCCBuiltin<"__builtin_cntlzdm">, + : ClangBuiltin<"__builtin_cntlzdm">, Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; def int_ppc_cnttzdm - : GCCBuiltin<"__builtin_cnttzdm">, + : ClangBuiltin<"__builtin_cnttzdm">, Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; def int_ppc_truncf128_round_to_odd - : GCCBuiltin<"__builtin_truncf128_round_to_odd">, + : ClangBuiltin<"__builtin_truncf128_round_to_odd">, Intrinsic <[llvm_double_ty], [llvm_f128_ty], [IntrNoMem]>; def int_ppc_sqrtf128_round_to_odd - : GCCBuiltin<"__builtin_sqrtf128_round_to_odd">, + : ClangBuiltin<"__builtin_sqrtf128_round_to_odd">, Intrinsic <[llvm_f128_ty], [llvm_f128_ty], [IntrNoMem]>; def int_ppc_addf128_round_to_odd - : GCCBuiltin<"__builtin_addf128_round_to_odd">, + : ClangBuiltin<"__builtin_addf128_round_to_odd">, Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>; def int_ppc_subf128_round_to_odd - : GCCBuiltin<"__builtin_subf128_round_to_odd">, + : ClangBuiltin<"__builtin_subf128_round_to_odd">, Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>; def int_ppc_mulf128_round_to_odd - : GCCBuiltin<"__builtin_mulf128_round_to_odd">, + : ClangBuiltin<"__builtin_mulf128_round_to_odd">, Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>; def int_ppc_divf128_round_to_odd - : GCCBuiltin<"__builtin_divf128_round_to_odd">, + : ClangBuiltin<"__builtin_divf128_round_to_odd">, Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>; def int_ppc_fmaf128_round_to_odd - : GCCBuiltin<"__builtin_fmaf128_round_to_odd">, + : ClangBuiltin<"__builtin_fmaf128_round_to_odd">, Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>; def int_ppc_scalar_extract_expq - : GCCBuiltin<"__builtin_vsx_scalar_extract_expq">, + : ClangBuiltin<"__builtin_vsx_scalar_extract_expq">, Intrinsic <[llvm_i64_ty], [llvm_f128_ty], [IntrNoMem]>; def int_ppc_scalar_insert_exp_qp - : GCCBuiltin<"__builtin_vsx_scalar_insert_exp_qp">, + : ClangBuiltin<"__builtin_vsx_scalar_insert_exp_qp">, Intrinsic <[llvm_f128_ty], [llvm_f128_ty, llvm_i64_ty], [IntrNoMem]>; // Intrinsics defined to maintain XL compatibility def int_ppc_tdw - : GCCBuiltin<"__builtin_ppc_tdw">, + : ClangBuiltin<"__builtin_ppc_tdw">, Intrinsic <[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [ImmArg>]>; def int_ppc_tw - : GCCBuiltin<"__builtin_ppc_tw">, + : ClangBuiltin<"__builtin_ppc_tw">, Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; def int_ppc_trapd - : GCCBuiltin<"__builtin_ppc_trapd">, + : ClangBuiltin<"__builtin_ppc_trapd">, Intrinsic <[], [llvm_i64_ty], []>; def int_ppc_trap - : GCCBuiltin<"__builtin_ppc_trap">, + : ClangBuiltin<"__builtin_ppc_trap">, Intrinsic <[], [llvm_i32_ty], []>; def int_ppc_fcfid - : GCCBuiltin<"__builtin_ppc_fcfid">, + : ClangBuiltin<"__builtin_ppc_fcfid">, Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_ppc_fcfud - : GCCBuiltin<"__builtin_ppc_fcfud">, + : ClangBuiltin<"__builtin_ppc_fcfud">, Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_ppc_fctid - : GCCBuiltin<"__builtin_ppc_fctid">, + : ClangBuiltin<"__builtin_ppc_fctid">, Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_ppc_fctidz - : GCCBuiltin<"__builtin_ppc_fctidz">, + : ClangBuiltin<"__builtin_ppc_fctidz">, Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_ppc_fctiw - : GCCBuiltin<"__builtin_ppc_fctiw">, + : ClangBuiltin<"__builtin_ppc_fctiw">, Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_ppc_fctiwz - : GCCBuiltin<"__builtin_ppc_fctiwz">, + : ClangBuiltin<"__builtin_ppc_fctiwz">, Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_ppc_fctudz - : GCCBuiltin<"__builtin_ppc_fctudz">, + : ClangBuiltin<"__builtin_ppc_fctudz">, Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_ppc_fctuwz - : GCCBuiltin<"__builtin_ppc_fctuwz">, + : ClangBuiltin<"__builtin_ppc_fctuwz">, Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + + // XL compatible select functions + // TODO: Add llvm_f128_ty support. + def int_ppc_maxfe + : Intrinsic< + [llvm_ppcf128_ty], + [llvm_ppcf128_ty, llvm_ppcf128_ty, llvm_ppcf128_ty, llvm_vararg_ty], + [IntrNoMem]>; + def int_ppc_maxfl + : Intrinsic< + [llvm_double_ty], + [llvm_double_ty, llvm_double_ty, llvm_double_ty, llvm_vararg_ty], + [IntrNoMem]>; + def int_ppc_maxfs + : Intrinsic<[llvm_float_ty], + [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_vararg_ty], + [IntrNoMem]>; + def int_ppc_minfe + : Intrinsic< + [llvm_ppcf128_ty], + [llvm_ppcf128_ty, llvm_ppcf128_ty, llvm_ppcf128_ty, llvm_vararg_ty], + [IntrNoMem]>; + def int_ppc_minfl + : Intrinsic< + [llvm_double_ty], + [llvm_double_ty, llvm_double_ty, llvm_double_ty, llvm_vararg_ty], + [IntrNoMem]>; + def int_ppc_minfs + : Intrinsic<[llvm_float_ty], + [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_vararg_ty], + [IntrNoMem]>; } let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". @@ -167,14 +201,14 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". class PowerPC_Vec_Intrinsic ret_types, list param_types, list properties> - : GCCBuiltin, + : ClangBuiltin, Intrinsic; /// PowerPC_VSX_Intrinsic - Base class for all VSX intrinsics. class PowerPC_VSX_Intrinsic ret_types, list param_types, list properties> - : GCCBuiltin, + : ClangBuiltin, Intrinsic; } @@ -289,31 +323,31 @@ class PowerPC_VSX_Sca_DDD_Intrinsic let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". // Data Stream Control. - def int_ppc_altivec_dss : GCCBuiltin<"__builtin_altivec_dss">, + def int_ppc_altivec_dss : ClangBuiltin<"__builtin_altivec_dss">, Intrinsic<[], [llvm_i32_ty], []>; - def int_ppc_altivec_dssall : GCCBuiltin<"__builtin_altivec_dssall">, + def int_ppc_altivec_dssall : ClangBuiltin<"__builtin_altivec_dssall">, Intrinsic<[], [], []>; - def int_ppc_altivec_dst : GCCBuiltin<"__builtin_altivec_dst">, + def int_ppc_altivec_dst : ClangBuiltin<"__builtin_altivec_dst">, Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_ppc_altivec_dstt : GCCBuiltin<"__builtin_altivec_dstt">, + def int_ppc_altivec_dstt : ClangBuiltin<"__builtin_altivec_dstt">, Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_ppc_altivec_dstst : GCCBuiltin<"__builtin_altivec_dstst">, + def int_ppc_altivec_dstst : ClangBuiltin<"__builtin_altivec_dstst">, Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_ppc_altivec_dststt : GCCBuiltin<"__builtin_altivec_dststt">, + def int_ppc_altivec_dststt : ClangBuiltin<"__builtin_altivec_dststt">, Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; // VSCR access. - def int_ppc_altivec_mfvscr : GCCBuiltin<"__builtin_altivec_mfvscr">, + def int_ppc_altivec_mfvscr : ClangBuiltin<"__builtin_altivec_mfvscr">, Intrinsic<[llvm_v8i16_ty], [], [IntrNoMem, IntrHasSideEffects]>; - def int_ppc_altivec_mtvscr : GCCBuiltin<"__builtin_altivec_mtvscr">, + def int_ppc_altivec_mtvscr : ClangBuiltin<"__builtin_altivec_mtvscr">, Intrinsic<[], [llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; @@ -349,354 +383,354 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". [IntrWriteMem, IntrArgMemOnly]>; // Comparisons setting a vector. - def int_ppc_altivec_vcmpbfp : GCCBuiltin<"__builtin_altivec_vcmpbfp">, + def int_ppc_altivec_vcmpbfp : ClangBuiltin<"__builtin_altivec_vcmpbfp">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpeqfp : GCCBuiltin<"__builtin_altivec_vcmpeqfp">, + def int_ppc_altivec_vcmpeqfp : ClangBuiltin<"__builtin_altivec_vcmpeqfp">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgefp : GCCBuiltin<"__builtin_altivec_vcmpgefp">, + def int_ppc_altivec_vcmpgefp : ClangBuiltin<"__builtin_altivec_vcmpgefp">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtfp : GCCBuiltin<"__builtin_altivec_vcmpgtfp">, + def int_ppc_altivec_vcmpgtfp : ClangBuiltin<"__builtin_altivec_vcmpgtfp">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpequd : GCCBuiltin<"__builtin_altivec_vcmpequd">, + def int_ppc_altivec_vcmpequd : ClangBuiltin<"__builtin_altivec_vcmpequd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtsd : GCCBuiltin<"__builtin_altivec_vcmpgtsd">, + def int_ppc_altivec_vcmpgtsd : ClangBuiltin<"__builtin_altivec_vcmpgtsd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtud : GCCBuiltin<"__builtin_altivec_vcmpgtud">, + def int_ppc_altivec_vcmpgtud : ClangBuiltin<"__builtin_altivec_vcmpgtud">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpequw : GCCBuiltin<"__builtin_altivec_vcmpequw">, + def int_ppc_altivec_vcmpequw : ClangBuiltin<"__builtin_altivec_vcmpequw">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtsw : GCCBuiltin<"__builtin_altivec_vcmpgtsw">, + def int_ppc_altivec_vcmpgtsw : ClangBuiltin<"__builtin_altivec_vcmpgtsw">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtuw : GCCBuiltin<"__builtin_altivec_vcmpgtuw">, + def int_ppc_altivec_vcmpgtuw : ClangBuiltin<"__builtin_altivec_vcmpgtuw">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpnew : GCCBuiltin<"__builtin_altivec_vcmpnew">, + def int_ppc_altivec_vcmpnew : ClangBuiltin<"__builtin_altivec_vcmpnew">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpnezw : GCCBuiltin<"__builtin_altivec_vcmpnezw">, + def int_ppc_altivec_vcmpnezw : ClangBuiltin<"__builtin_altivec_vcmpnezw">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpequh : GCCBuiltin<"__builtin_altivec_vcmpequh">, + def int_ppc_altivec_vcmpequh : ClangBuiltin<"__builtin_altivec_vcmpequh">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtsh : GCCBuiltin<"__builtin_altivec_vcmpgtsh">, + def int_ppc_altivec_vcmpgtsh : ClangBuiltin<"__builtin_altivec_vcmpgtsh">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtuh : GCCBuiltin<"__builtin_altivec_vcmpgtuh">, + def int_ppc_altivec_vcmpgtuh : ClangBuiltin<"__builtin_altivec_vcmpgtuh">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpneh : GCCBuiltin<"__builtin_altivec_vcmpneh">, + def int_ppc_altivec_vcmpneh : ClangBuiltin<"__builtin_altivec_vcmpneh">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpnezh : GCCBuiltin<"__builtin_altivec_vcmpnezh">, + def int_ppc_altivec_vcmpnezh : ClangBuiltin<"__builtin_altivec_vcmpnezh">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpequb : GCCBuiltin<"__builtin_altivec_vcmpequb">, + def int_ppc_altivec_vcmpequb : ClangBuiltin<"__builtin_altivec_vcmpequb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtsb : GCCBuiltin<"__builtin_altivec_vcmpgtsb">, + def int_ppc_altivec_vcmpgtsb : ClangBuiltin<"__builtin_altivec_vcmpgtsb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtub : GCCBuiltin<"__builtin_altivec_vcmpgtub">, + def int_ppc_altivec_vcmpgtub : ClangBuiltin<"__builtin_altivec_vcmpgtub">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpneb : GCCBuiltin<"__builtin_altivec_vcmpneb">, + def int_ppc_altivec_vcmpneb : ClangBuiltin<"__builtin_altivec_vcmpneb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpnezb : GCCBuiltin<"__builtin_altivec_vcmpnezb">, + def int_ppc_altivec_vcmpnezb : ClangBuiltin<"__builtin_altivec_vcmpnezb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpequq : GCCBuiltin<"__builtin_altivec_vcmpequq">, + def int_ppc_altivec_vcmpequq : ClangBuiltin<"__builtin_altivec_vcmpequq">, Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtsq : GCCBuiltin<"__builtin_altivec_vcmpgtsq">, + def int_ppc_altivec_vcmpgtsq : ClangBuiltin<"__builtin_altivec_vcmpgtsq">, Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtuq : GCCBuiltin<"__builtin_altivec_vcmpgtuq">, + def int_ppc_altivec_vcmpgtuq : ClangBuiltin<"__builtin_altivec_vcmpgtuq">, Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpequq_p : GCCBuiltin<"__builtin_altivec_vcmpequq_p">, + def int_ppc_altivec_vcmpequq_p : ClangBuiltin<"__builtin_altivec_vcmpequq_p">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtsq_p : GCCBuiltin<"__builtin_altivec_vcmpgtsq_p">, + def int_ppc_altivec_vcmpgtsq_p : ClangBuiltin<"__builtin_altivec_vcmpgtsq_p">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtuq_p : GCCBuiltin<"__builtin_altivec_vcmpgtuq_p">, + def int_ppc_altivec_vcmpgtuq_p : ClangBuiltin<"__builtin_altivec_vcmpgtuq_p">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty], [IntrNoMem]>; // Predicate Comparisons. The first operand specifies interpretation of CR6. - def int_ppc_altivec_vcmpbfp_p : GCCBuiltin<"__builtin_altivec_vcmpbfp_p">, + def int_ppc_altivec_vcmpbfp_p : ClangBuiltin<"__builtin_altivec_vcmpbfp_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpeqfp_p : GCCBuiltin<"__builtin_altivec_vcmpeqfp_p">, + def int_ppc_altivec_vcmpeqfp_p : ClangBuiltin<"__builtin_altivec_vcmpeqfp_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgefp_p : GCCBuiltin<"__builtin_altivec_vcmpgefp_p">, + def int_ppc_altivec_vcmpgefp_p : ClangBuiltin<"__builtin_altivec_vcmpgefp_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtfp_p : GCCBuiltin<"__builtin_altivec_vcmpgtfp_p">, + def int_ppc_altivec_vcmpgtfp_p : ClangBuiltin<"__builtin_altivec_vcmpgtfp_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpequd_p : GCCBuiltin<"__builtin_altivec_vcmpequd_p">, + def int_ppc_altivec_vcmpequd_p : ClangBuiltin<"__builtin_altivec_vcmpequd_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtsd_p : GCCBuiltin<"__builtin_altivec_vcmpgtsd_p">, + def int_ppc_altivec_vcmpgtsd_p : ClangBuiltin<"__builtin_altivec_vcmpgtsd_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtud_p : GCCBuiltin<"__builtin_altivec_vcmpgtud_p">, + def int_ppc_altivec_vcmpgtud_p : ClangBuiltin<"__builtin_altivec_vcmpgtud_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpequw_p : GCCBuiltin<"__builtin_altivec_vcmpequw_p">, + def int_ppc_altivec_vcmpequw_p : ClangBuiltin<"__builtin_altivec_vcmpequw_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtsw_p : GCCBuiltin<"__builtin_altivec_vcmpgtsw_p">, + def int_ppc_altivec_vcmpgtsw_p : ClangBuiltin<"__builtin_altivec_vcmpgtsw_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtuw_p : GCCBuiltin<"__builtin_altivec_vcmpgtuw_p">, + def int_ppc_altivec_vcmpgtuw_p : ClangBuiltin<"__builtin_altivec_vcmpgtuw_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpnew_p : GCCBuiltin<"__builtin_altivec_vcmpnew_p">, + def int_ppc_altivec_vcmpnew_p : ClangBuiltin<"__builtin_altivec_vcmpnew_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpnezw_p : GCCBuiltin<"__builtin_altivec_vcmpnezw_p">, + def int_ppc_altivec_vcmpnezw_p : ClangBuiltin<"__builtin_altivec_vcmpnezw_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpequh_p : GCCBuiltin<"__builtin_altivec_vcmpequh_p">, + def int_ppc_altivec_vcmpequh_p : ClangBuiltin<"__builtin_altivec_vcmpequh_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtsh_p : GCCBuiltin<"__builtin_altivec_vcmpgtsh_p">, + def int_ppc_altivec_vcmpgtsh_p : ClangBuiltin<"__builtin_altivec_vcmpgtsh_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtuh_p : GCCBuiltin<"__builtin_altivec_vcmpgtuh_p">, + def int_ppc_altivec_vcmpgtuh_p : ClangBuiltin<"__builtin_altivec_vcmpgtuh_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpneh_p : GCCBuiltin<"__builtin_altivec_vcmpneh_p">, + def int_ppc_altivec_vcmpneh_p : ClangBuiltin<"__builtin_altivec_vcmpneh_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpnezh_p : GCCBuiltin<"__builtin_altivec_vcmpnezh_p">, + def int_ppc_altivec_vcmpnezh_p : ClangBuiltin<"__builtin_altivec_vcmpnezh_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpequb_p : GCCBuiltin<"__builtin_altivec_vcmpequb_p">, + def int_ppc_altivec_vcmpequb_p : ClangBuiltin<"__builtin_altivec_vcmpequb_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtsb_p : GCCBuiltin<"__builtin_altivec_vcmpgtsb_p">, + def int_ppc_altivec_vcmpgtsb_p : ClangBuiltin<"__builtin_altivec_vcmpgtsb_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpgtub_p : GCCBuiltin<"__builtin_altivec_vcmpgtub_p">, + def int_ppc_altivec_vcmpgtub_p : ClangBuiltin<"__builtin_altivec_vcmpgtub_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpneb_p : GCCBuiltin<"__builtin_altivec_vcmpneb_p">, + def int_ppc_altivec_vcmpneb_p : ClangBuiltin<"__builtin_altivec_vcmpneb_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vcmpnezb_p : GCCBuiltin<"__builtin_altivec_vcmpnezb_p">, + def int_ppc_altivec_vcmpnezb_p : ClangBuiltin<"__builtin_altivec_vcmpnezb_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vclzlsbb : GCCBuiltin<"__builtin_altivec_vclzlsbb">, + def int_ppc_altivec_vclzlsbb : ClangBuiltin<"__builtin_altivec_vclzlsbb">, Intrinsic<[llvm_i32_ty],[llvm_v16i8_ty],[IntrNoMem]>; - def int_ppc_altivec_vctzlsbb : GCCBuiltin<"__builtin_altivec_vctzlsbb">, + def int_ppc_altivec_vctzlsbb : ClangBuiltin<"__builtin_altivec_vctzlsbb">, Intrinsic<[llvm_i32_ty],[llvm_v16i8_ty],[IntrNoMem]>; - def int_ppc_altivec_vprtybw : GCCBuiltin<"__builtin_altivec_vprtybw">, + def int_ppc_altivec_vprtybw : ClangBuiltin<"__builtin_altivec_vprtybw">, Intrinsic<[llvm_v4i32_ty],[llvm_v4i32_ty],[IntrNoMem]>; - def int_ppc_altivec_vprtybd : GCCBuiltin<"__builtin_altivec_vprtybd">, + def int_ppc_altivec_vprtybd : ClangBuiltin<"__builtin_altivec_vprtybd">, Intrinsic<[llvm_v2i64_ty],[llvm_v2i64_ty],[IntrNoMem]>; - def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">, + def int_ppc_altivec_vprtybq : ClangBuiltin<"__builtin_altivec_vprtybq">, Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>; // BCD intrinsics. - def int_ppc_bcdadd : GCCBuiltin<"__builtin_ppc_bcdadd">, Intrinsic< + def int_ppc_bcdadd : ClangBuiltin<"__builtin_ppc_bcdadd">, Intrinsic< [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_ppc_bcdadd_p : GCCBuiltin<"__builtin_ppc_bcdadd_p">, Intrinsic< + def int_ppc_bcdadd_p : ClangBuiltin<"__builtin_ppc_bcdadd_p">, Intrinsic< [llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem, ImmArg>]>; - def int_ppc_bcdsub : GCCBuiltin<"__builtin_ppc_bcdsub">, Intrinsic< + def int_ppc_bcdsub : ClangBuiltin<"__builtin_ppc_bcdsub">, Intrinsic< [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_ppc_bcdsub_p : GCCBuiltin<"__builtin_ppc_bcdsub_p">, Intrinsic< + def int_ppc_bcdsub_p : ClangBuiltin<"__builtin_ppc_bcdsub_p">, Intrinsic< [llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem, ImmArg>]>; // P10 Vector Extract with Mask - def int_ppc_altivec_vextractbm : GCCBuiltin<"__builtin_altivec_vextractbm">, + def int_ppc_altivec_vextractbm : ClangBuiltin<"__builtin_altivec_vextractbm">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vextracthm : GCCBuiltin<"__builtin_altivec_vextracthm">, + def int_ppc_altivec_vextracthm : ClangBuiltin<"__builtin_altivec_vextracthm">, Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vextractwm : GCCBuiltin<"__builtin_altivec_vextractwm">, + def int_ppc_altivec_vextractwm : ClangBuiltin<"__builtin_altivec_vextractwm">, Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vextractdm : GCCBuiltin<"__builtin_altivec_vextractdm">, + def int_ppc_altivec_vextractdm : ClangBuiltin<"__builtin_altivec_vextractdm">, Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>; - def int_ppc_altivec_vextractqm : GCCBuiltin<"__builtin_altivec_vextractqm">, + def int_ppc_altivec_vextractqm : ClangBuiltin<"__builtin_altivec_vextractqm">, Intrinsic<[llvm_i32_ty], [llvm_v1i128_ty], [IntrNoMem]>; // P10 Vector Expand with Mask - def int_ppc_altivec_vexpandbm : GCCBuiltin<"__builtin_altivec_vexpandbm">, + def int_ppc_altivec_vexpandbm : ClangBuiltin<"__builtin_altivec_vexpandbm">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vexpandhm : GCCBuiltin<"__builtin_altivec_vexpandhm">, + def int_ppc_altivec_vexpandhm : ClangBuiltin<"__builtin_altivec_vexpandhm">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vexpandwm : GCCBuiltin<"__builtin_altivec_vexpandwm">, + def int_ppc_altivec_vexpandwm : ClangBuiltin<"__builtin_altivec_vexpandwm">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vexpanddm : GCCBuiltin<"__builtin_altivec_vexpanddm">, + def int_ppc_altivec_vexpanddm : ClangBuiltin<"__builtin_altivec_vexpanddm">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; - def int_ppc_altivec_vexpandqm : GCCBuiltin<"__builtin_altivec_vexpandqm">, + def int_ppc_altivec_vexpandqm : ClangBuiltin<"__builtin_altivec_vexpandqm">, Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty], [IntrNoMem]>; // P10 Vector Count with Mask intrinsics. - def int_ppc_altivec_vcntmbb : GCCBuiltin<"__builtin_altivec_vcntmbb">, + def int_ppc_altivec_vcntmbb : ClangBuiltin<"__builtin_altivec_vcntmbb">, Intrinsic<[llvm_i64_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_ppc_altivec_vcntmbh : GCCBuiltin<"__builtin_altivec_vcntmbh">, + def int_ppc_altivec_vcntmbh : ClangBuiltin<"__builtin_altivec_vcntmbh">, Intrinsic<[llvm_i64_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_ppc_altivec_vcntmbw : GCCBuiltin<"__builtin_altivec_vcntmbw">, + def int_ppc_altivec_vcntmbw : ClangBuiltin<"__builtin_altivec_vcntmbw">, Intrinsic<[llvm_i64_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_ppc_altivec_vcntmbd : GCCBuiltin<"__builtin_altivec_vcntmbd">, + def int_ppc_altivec_vcntmbd : ClangBuiltin<"__builtin_altivec_vcntmbd">, Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; // P10 Move to VSR with Mask Intrinsics. - def int_ppc_altivec_mtvsrbm : GCCBuiltin<"__builtin_altivec_mtvsrbm">, + def int_ppc_altivec_mtvsrbm : ClangBuiltin<"__builtin_altivec_mtvsrbm">, Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty], [IntrNoMem]>; - def int_ppc_altivec_mtvsrhm : GCCBuiltin<"__builtin_altivec_mtvsrhm">, + def int_ppc_altivec_mtvsrhm : ClangBuiltin<"__builtin_altivec_mtvsrhm">, Intrinsic<[llvm_v8i16_ty], [llvm_i64_ty], [IntrNoMem]>; - def int_ppc_altivec_mtvsrwm : GCCBuiltin<"__builtin_altivec_mtvsrwm">, + def int_ppc_altivec_mtvsrwm : ClangBuiltin<"__builtin_altivec_mtvsrwm">, Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty], [IntrNoMem]>; - def int_ppc_altivec_mtvsrdm : GCCBuiltin<"__builtin_altivec_mtvsrdm">, + def int_ppc_altivec_mtvsrdm : ClangBuiltin<"__builtin_altivec_mtvsrdm">, Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty], [IntrNoMem]>; - def int_ppc_altivec_mtvsrqm : GCCBuiltin<"__builtin_altivec_mtvsrqm">, + def int_ppc_altivec_mtvsrqm : ClangBuiltin<"__builtin_altivec_mtvsrqm">, Intrinsic<[llvm_v1i128_ty], [llvm_i64_ty], [IntrNoMem]>; // P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins. - def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">, + def int_ppc_altivec_vpdepd : ClangBuiltin<"__builtin_altivec_vpdepd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_ppc_altivec_vpextd : GCCBuiltin<"__builtin_altivec_vpextd">, + def int_ppc_altivec_vpextd : ClangBuiltin<"__builtin_altivec_vpextd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; // P10 Vector String Isolate Intrinsics. - def int_ppc_altivec_vstribr : GCCBuiltin<"__builtin_altivec_vstribr">, + def int_ppc_altivec_vstribr : ClangBuiltin<"__builtin_altivec_vstribr">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vstribl : GCCBuiltin<"__builtin_altivec_vstribl">, + def int_ppc_altivec_vstribl : ClangBuiltin<"__builtin_altivec_vstribl">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vstrihr : GCCBuiltin<"__builtin_altivec_vstrihr">, + def int_ppc_altivec_vstrihr : ClangBuiltin<"__builtin_altivec_vstrihr">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vstrihl : GCCBuiltin<"__builtin_altivec_vstrihl">, + def int_ppc_altivec_vstrihl : ClangBuiltin<"__builtin_altivec_vstrihl">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; // Predicate Intrinsics: The first operand specifies interpretation of CR6. - def int_ppc_altivec_vstribr_p : GCCBuiltin<"__builtin_altivec_vstribr_p">, + def int_ppc_altivec_vstribr_p : ClangBuiltin<"__builtin_altivec_vstribr_p">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vstribl_p : GCCBuiltin<"__builtin_altivec_vstribl_p">, + def int_ppc_altivec_vstribl_p : ClangBuiltin<"__builtin_altivec_vstribl_p">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vstrihr_p : GCCBuiltin<"__builtin_altivec_vstrihr_p">, + def int_ppc_altivec_vstrihr_p : ClangBuiltin<"__builtin_altivec_vstrihr_p">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vstrihl_p : GCCBuiltin<"__builtin_altivec_vstrihl_p">, + def int_ppc_altivec_vstrihl_p : ClangBuiltin<"__builtin_altivec_vstrihl_p">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v8i16_ty], [IntrNoMem]>; // P10 Vector Centrifuge Builtin. - def int_ppc_altivec_vcfuged : GCCBuiltin<"__builtin_altivec_vcfuged">, + def int_ppc_altivec_vcfuged : ClangBuiltin<"__builtin_altivec_vcfuged">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; // P10 Vector Gather Every Nth Bit Builtin. - def int_ppc_altivec_vgnb : GCCBuiltin<"__builtin_altivec_vgnb">, + def int_ppc_altivec_vgnb : ClangBuiltin<"__builtin_altivec_vgnb">, Intrinsic<[llvm_i64_ty], [llvm_v1i128_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; // P10 Vector Clear Bytes - def int_ppc_altivec_vclrlb : GCCBuiltin<"__builtin_altivec_vclrlb">, + def int_ppc_altivec_vclrlb : ClangBuiltin<"__builtin_altivec_vclrlb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vclrrb : GCCBuiltin<"__builtin_altivec_vclrrb">, + def int_ppc_altivec_vclrrb : ClangBuiltin<"__builtin_altivec_vclrrb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; // P10 Vector Shift Double Bit Immediate. - def int_ppc_altivec_vsldbi : GCCBuiltin<"__builtin_altivec_vsldbi">, + def int_ppc_altivec_vsldbi : ClangBuiltin<"__builtin_altivec_vsldbi">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_ppc_altivec_vsrdbi : GCCBuiltin<"__builtin_altivec_vsrdbi">, + def int_ppc_altivec_vsrdbi : ClangBuiltin<"__builtin_altivec_vsrdbi">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; // P10 Vector Insert. - def int_ppc_altivec_vinsblx : GCCBuiltin<"__builtin_altivec_vinsblx">, + def int_ppc_altivec_vinsblx : ClangBuiltin<"__builtin_altivec_vinsblx">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vinsbrx : GCCBuiltin<"__builtin_altivec_vinsbrx">, + def int_ppc_altivec_vinsbrx : ClangBuiltin<"__builtin_altivec_vinsbrx">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vinshlx : GCCBuiltin<"__builtin_altivec_vinshlx">, + def int_ppc_altivec_vinshlx : ClangBuiltin<"__builtin_altivec_vinshlx">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vinshrx : GCCBuiltin<"__builtin_altivec_vinshrx">, + def int_ppc_altivec_vinshrx : ClangBuiltin<"__builtin_altivec_vinshrx">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vinswlx : GCCBuiltin<"__builtin_altivec_vinswlx">, + def int_ppc_altivec_vinswlx : ClangBuiltin<"__builtin_altivec_vinswlx">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vinswrx : GCCBuiltin<"__builtin_altivec_vinswrx">, + def int_ppc_altivec_vinswrx : ClangBuiltin<"__builtin_altivec_vinswrx">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vinsdlx : GCCBuiltin<"__builtin_altivec_vinsdlx">, + def int_ppc_altivec_vinsdlx : ClangBuiltin<"__builtin_altivec_vinsdlx">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; - def int_ppc_altivec_vinsdrx : GCCBuiltin<"__builtin_altivec_vinsdrx">, + def int_ppc_altivec_vinsdrx : ClangBuiltin<"__builtin_altivec_vinsdrx">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; - def int_ppc_altivec_vinsbvlx : GCCBuiltin<"__builtin_altivec_vinsbvlx">, + def int_ppc_altivec_vinsbvlx : ClangBuiltin<"__builtin_altivec_vinsbvlx">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vinsbvrx : GCCBuiltin<"__builtin_altivec_vinsbvrx">, + def int_ppc_altivec_vinsbvrx : ClangBuiltin<"__builtin_altivec_vinsbvrx">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vinshvlx : GCCBuiltin<"__builtin_altivec_vinshvlx">, + def int_ppc_altivec_vinshvlx : ClangBuiltin<"__builtin_altivec_vinshvlx">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vinshvrx : GCCBuiltin<"__builtin_altivec_vinshvrx">, + def int_ppc_altivec_vinshvrx : ClangBuiltin<"__builtin_altivec_vinshvrx">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vinswvlx : GCCBuiltin<"__builtin_altivec_vinswvlx">, + def int_ppc_altivec_vinswvlx : ClangBuiltin<"__builtin_altivec_vinswvlx">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vinswvrx : GCCBuiltin<"__builtin_altivec_vinswvrx">, + def int_ppc_altivec_vinswvrx : ClangBuiltin<"__builtin_altivec_vinswvrx">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>; @@ -710,35 +744,35 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". [llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; // P10 Vector Extract. - def int_ppc_altivec_vextdubvlx : GCCBuiltin<"__builtin_altivec_vextdubvlx">, + def int_ppc_altivec_vextdubvlx : ClangBuiltin<"__builtin_altivec_vextdubvlx">, Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vextdubvrx : GCCBuiltin<"__builtin_altivec_vextdubvrx">, + def int_ppc_altivec_vextdubvrx : ClangBuiltin<"__builtin_altivec_vextdubvrx">, Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vextduhvlx : GCCBuiltin<"__builtin_altivec_vextduhvlx">, + def int_ppc_altivec_vextduhvlx : ClangBuiltin<"__builtin_altivec_vextduhvlx">, Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vextduhvrx : GCCBuiltin<"__builtin_altivec_vextduhvrx">, + def int_ppc_altivec_vextduhvrx : ClangBuiltin<"__builtin_altivec_vextduhvrx">, Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vextduwvlx : GCCBuiltin<"__builtin_altivec_vextduwvlx">, + def int_ppc_altivec_vextduwvlx : ClangBuiltin<"__builtin_altivec_vextduwvlx">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vextduwvrx : GCCBuiltin<"__builtin_altivec_vextduwvrx">, + def int_ppc_altivec_vextduwvrx : ClangBuiltin<"__builtin_altivec_vextduwvrx">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vextddvlx : GCCBuiltin<"__builtin_altivec_vextddvlx">, + def int_ppc_altivec_vextddvlx : ClangBuiltin<"__builtin_altivec_vextddvlx">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vextddvrx : GCCBuiltin<"__builtin_altivec_vextddvrx">, + def int_ppc_altivec_vextddvrx : ClangBuiltin<"__builtin_altivec_vextddvrx">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; @@ -796,229 +830,229 @@ def int_ppc_altivec_vsubcuq : PowerPC_Vec_QQQ_Intrinsic<"vsubcuq">; let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". // Saturating multiply-adds. - def int_ppc_altivec_vmhaddshs : GCCBuiltin<"__builtin_altivec_vmhaddshs">, + def int_ppc_altivec_vmhaddshs : ClangBuiltin<"__builtin_altivec_vmhaddshs">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, IntrHasSideEffects]>; - def int_ppc_altivec_vmhraddshs : GCCBuiltin<"__builtin_altivec_vmhraddshs">, + def int_ppc_altivec_vmhraddshs : ClangBuiltin<"__builtin_altivec_vmhraddshs">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, IntrHasSideEffects]>; - def int_ppc_altivec_vmaddfp : GCCBuiltin<"__builtin_altivec_vmaddfp">, + def int_ppc_altivec_vmaddfp : ClangBuiltin<"__builtin_altivec_vmaddfp">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_ppc_altivec_vnmsubfp : GCCBuiltin<"__builtin_altivec_vnmsubfp">, + def int_ppc_altivec_vnmsubfp : ClangBuiltin<"__builtin_altivec_vnmsubfp">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; // Vector Multiply Sum Instructions. - def int_ppc_altivec_vmsummbm : GCCBuiltin<"__builtin_altivec_vmsummbm">, + def int_ppc_altivec_vmsummbm : ClangBuiltin<"__builtin_altivec_vmsummbm">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vmsumshm : GCCBuiltin<"__builtin_altivec_vmsumshm">, + def int_ppc_altivec_vmsumshm : ClangBuiltin<"__builtin_altivec_vmsumshm">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vmsumshs : GCCBuiltin<"__builtin_altivec_vmsumshs">, + def int_ppc_altivec_vmsumshs : ClangBuiltin<"__builtin_altivec_vmsumshs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; - def int_ppc_altivec_vmsumubm : GCCBuiltin<"__builtin_altivec_vmsumubm">, + def int_ppc_altivec_vmsumubm : ClangBuiltin<"__builtin_altivec_vmsumubm">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vmsumuhm : GCCBuiltin<"__builtin_altivec_vmsumuhm">, + def int_ppc_altivec_vmsumuhm : ClangBuiltin<"__builtin_altivec_vmsumuhm">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vmsumudm : GCCBuiltin<"__builtin_altivec_vmsumudm">, + def int_ppc_altivec_vmsumudm : ClangBuiltin<"__builtin_altivec_vmsumudm">, Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v1i128_ty], [IntrNoMem]>; - def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">, + def int_ppc_altivec_vmsumuhs : ClangBuiltin<"__builtin_altivec_vmsumuhs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; - def int_ppc_altivec_vmsumcud : GCCBuiltin<"__builtin_altivec_vmsumcud">, + def int_ppc_altivec_vmsumcud : ClangBuiltin<"__builtin_altivec_vmsumcud">, Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v1i128_ty], [IntrNoMem]>; // Vector Multiply Instructions. - def int_ppc_altivec_vmulesb : GCCBuiltin<"__builtin_altivec_vmulesb">, + def int_ppc_altivec_vmulesb : ClangBuiltin<"__builtin_altivec_vmulesb">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vmulesh : GCCBuiltin<"__builtin_altivec_vmulesh">, + def int_ppc_altivec_vmulesh : ClangBuiltin<"__builtin_altivec_vmulesh">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vmulesw : GCCBuiltin<"__builtin_altivec_vmulesw">, + def int_ppc_altivec_vmulesw : ClangBuiltin<"__builtin_altivec_vmulesw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_ppc_altivec_vmulesd : PowerPC_Vec_QDD_Intrinsic<"vmulesd">; - def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">, + def int_ppc_altivec_vmuleub : ClangBuiltin<"__builtin_altivec_vmuleub">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vmuleuh : GCCBuiltin<"__builtin_altivec_vmuleuh">, + def int_ppc_altivec_vmuleuh : ClangBuiltin<"__builtin_altivec_vmuleuh">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vmuleuw : GCCBuiltin<"__builtin_altivec_vmuleuw">, + def int_ppc_altivec_vmuleuw : ClangBuiltin<"__builtin_altivec_vmuleuw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_ppc_altivec_vmuleud : PowerPC_Vec_QDD_Intrinsic<"vmuleud">; - def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">, + def int_ppc_altivec_vmulosb : ClangBuiltin<"__builtin_altivec_vmulosb">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vmulosh : GCCBuiltin<"__builtin_altivec_vmulosh">, + def int_ppc_altivec_vmulosh : ClangBuiltin<"__builtin_altivec_vmulosh">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vmulosw : GCCBuiltin<"__builtin_altivec_vmulosw">, + def int_ppc_altivec_vmulosw : ClangBuiltin<"__builtin_altivec_vmulosw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_ppc_altivec_vmulosd : PowerPC_Vec_QDD_Intrinsic<"vmulosd">; - def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">, + def int_ppc_altivec_vmuloub : ClangBuiltin<"__builtin_altivec_vmuloub">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vmulouh : GCCBuiltin<"__builtin_altivec_vmulouh">, + def int_ppc_altivec_vmulouh : ClangBuiltin<"__builtin_altivec_vmulouh">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vmulouw : GCCBuiltin<"__builtin_altivec_vmulouw">, + def int_ppc_altivec_vmulouw : ClangBuiltin<"__builtin_altivec_vmulouw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_ppc_altivec_vmuloud : PowerPC_Vec_QDD_Intrinsic<"vmuloud">; // Vector Sum Instructions. - def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">, + def int_ppc_altivec_vsumsws : ClangBuiltin<"__builtin_altivec_vsumsws">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; - def int_ppc_altivec_vsum2sws : GCCBuiltin<"__builtin_altivec_vsum2sws">, + def int_ppc_altivec_vsum2sws : ClangBuiltin<"__builtin_altivec_vsum2sws">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; - def int_ppc_altivec_vsum4sbs : GCCBuiltin<"__builtin_altivec_vsum4sbs">, + def int_ppc_altivec_vsum4sbs : ClangBuiltin<"__builtin_altivec_vsum4sbs">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; - def int_ppc_altivec_vsum4shs : GCCBuiltin<"__builtin_altivec_vsum4shs">, + def int_ppc_altivec_vsum4shs : ClangBuiltin<"__builtin_altivec_vsum4shs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; - def int_ppc_altivec_vsum4ubs : GCCBuiltin<"__builtin_altivec_vsum4ubs">, + def int_ppc_altivec_vsum4ubs : ClangBuiltin<"__builtin_altivec_vsum4ubs">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; // Vector Sign Extension Instructions - def int_ppc_altivec_vextsb2w : GCCBuiltin<"__builtin_altivec_vextsb2w">, + def int_ppc_altivec_vextsb2w : ClangBuiltin<"__builtin_altivec_vextsb2w">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vextsb2d : GCCBuiltin<"__builtin_altivec_vextsb2d">, + def int_ppc_altivec_vextsb2d : ClangBuiltin<"__builtin_altivec_vextsb2d">, Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vextsh2w : GCCBuiltin<"__builtin_altivec_vextsh2w">, + def int_ppc_altivec_vextsh2w : ClangBuiltin<"__builtin_altivec_vextsh2w">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vextsh2d : GCCBuiltin<"__builtin_altivec_vextsh2d">, + def int_ppc_altivec_vextsh2d : ClangBuiltin<"__builtin_altivec_vextsh2d">, Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vextsw2d : GCCBuiltin<"__builtin_altivec_vextsw2d">, + def int_ppc_altivec_vextsw2d : ClangBuiltin<"__builtin_altivec_vextsw2d">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vextsd2q : GCCBuiltin<"__builtin_altivec_vextsd2q">, + def int_ppc_altivec_vextsd2q : ClangBuiltin<"__builtin_altivec_vextsd2q">, Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty], [IntrNoMem]>; // Other multiplies. - def int_ppc_altivec_vmladduhm : GCCBuiltin<"__builtin_altivec_vmladduhm">, + def int_ppc_altivec_vmladduhm : ClangBuiltin<"__builtin_altivec_vmladduhm">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; // Packs. - def int_ppc_altivec_vpkpx : GCCBuiltin<"__builtin_altivec_vpkpx">, + def int_ppc_altivec_vpkpx : ClangBuiltin<"__builtin_altivec_vpkpx">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vpkshss : GCCBuiltin<"__builtin_altivec_vpkshss">, + def int_ppc_altivec_vpkshss : ClangBuiltin<"__builtin_altivec_vpkshss">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, IntrHasSideEffects]>; - def int_ppc_altivec_vpkshus : GCCBuiltin<"__builtin_altivec_vpkshus">, + def int_ppc_altivec_vpkshus : ClangBuiltin<"__builtin_altivec_vpkshus">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, IntrHasSideEffects]>; - def int_ppc_altivec_vpkswss : GCCBuiltin<"__builtin_altivec_vpkswss">, + def int_ppc_altivec_vpkswss : ClangBuiltin<"__builtin_altivec_vpkswss">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; - def int_ppc_altivec_vpkswus : GCCBuiltin<"__builtin_altivec_vpkswus">, + def int_ppc_altivec_vpkswus : ClangBuiltin<"__builtin_altivec_vpkswus">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; - def int_ppc_altivec_vpksdss : GCCBuiltin<"__builtin_altivec_vpksdss">, + def int_ppc_altivec_vpksdss : ClangBuiltin<"__builtin_altivec_vpksdss">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem, IntrHasSideEffects]>; - def int_ppc_altivec_vpksdus : GCCBuiltin<"__builtin_altivec_vpksdus">, + def int_ppc_altivec_vpksdus : ClangBuiltin<"__builtin_altivec_vpksdus">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem, IntrHasSideEffects]>; // vpkuhum is lowered to a shuffle. - def int_ppc_altivec_vpkuhus : GCCBuiltin<"__builtin_altivec_vpkuhus">, + def int_ppc_altivec_vpkuhus : ClangBuiltin<"__builtin_altivec_vpkuhus">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, IntrHasSideEffects]>; // vpkuwum is lowered to a shuffle. - def int_ppc_altivec_vpkuwus : GCCBuiltin<"__builtin_altivec_vpkuwus">, + def int_ppc_altivec_vpkuwus : ClangBuiltin<"__builtin_altivec_vpkuwus">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; // vpkudum is lowered to a shuffle. - def int_ppc_altivec_vpkudus : GCCBuiltin<"__builtin_altivec_vpkudus">, + def int_ppc_altivec_vpkudus : ClangBuiltin<"__builtin_altivec_vpkudus">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem, IntrHasSideEffects]>; // Unpacks. - def int_ppc_altivec_vupkhpx : GCCBuiltin<"__builtin_altivec_vupkhpx">, + def int_ppc_altivec_vupkhpx : ClangBuiltin<"__builtin_altivec_vupkhpx">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vupkhsb : GCCBuiltin<"__builtin_altivec_vupkhsb">, + def int_ppc_altivec_vupkhsb : ClangBuiltin<"__builtin_altivec_vupkhsb">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vupkhsh : GCCBuiltin<"__builtin_altivec_vupkhsh">, + def int_ppc_altivec_vupkhsh : ClangBuiltin<"__builtin_altivec_vupkhsh">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vupkhsw : GCCBuiltin<"__builtin_altivec_vupkhsw">, + def int_ppc_altivec_vupkhsw : ClangBuiltin<"__builtin_altivec_vupkhsw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vupklpx : GCCBuiltin<"__builtin_altivec_vupklpx">, + def int_ppc_altivec_vupklpx : ClangBuiltin<"__builtin_altivec_vupklpx">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vupklsb : GCCBuiltin<"__builtin_altivec_vupklsb">, + def int_ppc_altivec_vupklsb : ClangBuiltin<"__builtin_altivec_vupklsb">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vupklsh : GCCBuiltin<"__builtin_altivec_vupklsh">, + def int_ppc_altivec_vupklsh : ClangBuiltin<"__builtin_altivec_vupklsh">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; - def int_ppc_altivec_vupklsw : GCCBuiltin<"__builtin_altivec_vupklsw">, + def int_ppc_altivec_vupklsw : ClangBuiltin<"__builtin_altivec_vupklsw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; // FP <-> integer conversion. - def int_ppc_altivec_vcfsx : GCCBuiltin<"__builtin_altivec_vcfsx">, + def int_ppc_altivec_vcfsx : ClangBuiltin<"__builtin_altivec_vcfsx">, Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_ppc_altivec_vcfux : GCCBuiltin<"__builtin_altivec_vcfux">, + def int_ppc_altivec_vcfux : ClangBuiltin<"__builtin_altivec_vcfux">, Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_ppc_altivec_vctsxs : GCCBuiltin<"__builtin_altivec_vctsxs">, + def int_ppc_altivec_vctsxs : ClangBuiltin<"__builtin_altivec_vctsxs">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_ppc_altivec_vctuxs : GCCBuiltin<"__builtin_altivec_vctuxs">, + def int_ppc_altivec_vctuxs : ClangBuiltin<"__builtin_altivec_vctuxs">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_ppc_altivec_vrfim : GCCBuiltin<"__builtin_altivec_vrfim">, + def int_ppc_altivec_vrfim : ClangBuiltin<"__builtin_altivec_vrfim">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_ppc_altivec_vrfin : GCCBuiltin<"__builtin_altivec_vrfin">, + def int_ppc_altivec_vrfin : ClangBuiltin<"__builtin_altivec_vrfin">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_ppc_altivec_vrfip : GCCBuiltin<"__builtin_altivec_vrfip">, + def int_ppc_altivec_vrfip : ClangBuiltin<"__builtin_altivec_vrfip">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_ppc_altivec_vrfiz : GCCBuiltin<"__builtin_altivec_vrfiz">, + def int_ppc_altivec_vrfiz : ClangBuiltin<"__builtin_altivec_vrfiz">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; // Add Extended Quadword - def int_ppc_altivec_vaddeuqm : GCCBuiltin<"__builtin_altivec_vaddeuqm">, + def int_ppc_altivec_vaddeuqm : ClangBuiltin<"__builtin_altivec_vaddeuqm">, Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; - def int_ppc_altivec_vaddecuq : GCCBuiltin<"__builtin_altivec_vaddecuq">, + def int_ppc_altivec_vaddecuq : ClangBuiltin<"__builtin_altivec_vaddecuq">, Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; // Sub Extended Quadword - def int_ppc_altivec_vsubeuqm : GCCBuiltin<"__builtin_altivec_vsubeuqm">, + def int_ppc_altivec_vsubeuqm : ClangBuiltin<"__builtin_altivec_vsubeuqm">, Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; - def int_ppc_altivec_vsubecuq : GCCBuiltin<"__builtin_altivec_vsubecuq">, + def int_ppc_altivec_vsubecuq : ClangBuiltin<"__builtin_altivec_vsubecuq">, Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; // P10 Vector Count Leading / Trailing Zeroes under bit Mask Builtins. - def int_ppc_altivec_vclzdm : GCCBuiltin<"__builtin_altivec_vclzdm">, + def int_ppc_altivec_vclzdm : ClangBuiltin<"__builtin_altivec_vclzdm">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_ppc_altivec_vctzdm : GCCBuiltin<"__builtin_altivec_vctzdm">, + def int_ppc_altivec_vctzdm : ClangBuiltin<"__builtin_altivec_vctzdm">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; } @@ -1056,18 +1090,18 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_lvsr : Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrNoMem]>; - def int_ppc_altivec_vperm : GCCBuiltin<"__builtin_altivec_vperm_4si">, + def int_ppc_altivec_vperm : ClangBuiltin<"__builtin_altivec_vperm_4si">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vsel : GCCBuiltin<"__builtin_altivec_vsel_4si">, + def int_ppc_altivec_vsel : ClangBuiltin<"__builtin_altivec_vsel_4si">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_ppc_altivec_vgbbd : GCCBuiltin<"__builtin_altivec_vgbbd">, + def int_ppc_altivec_vgbbd : ClangBuiltin<"__builtin_altivec_vgbbd">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vbpermq : GCCBuiltin<"__builtin_altivec_vbpermq">, + def int_ppc_altivec_vbpermq : ClangBuiltin<"__builtin_altivec_vbpermq">, Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_ppc_altivec_vbpermd : GCCBuiltin<"__builtin_altivec_vbpermd">, + def int_ppc_altivec_vbpermd : ClangBuiltin<"__builtin_altivec_vbpermd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty], [IntrNoMem]>; } @@ -1081,23 +1115,23 @@ def int_ppc_altivec_vrsqrtefp : PowerPC_Vec_FF_Intrinsic<"vrsqrtefp">; // Crypto let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_crypto_vsbox : - GCCBuiltin<"__builtin_altivec_crypto_vsbox">, + ClangBuiltin<"__builtin_altivec_crypto_vsbox">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; def int_ppc_altivec_crypto_vpermxor : - GCCBuiltin<"__builtin_altivec_crypto_vpermxor">, + ClangBuiltin<"__builtin_altivec_crypto_vpermxor">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_ppc_altivec_crypto_vpermxor_be : - GCCBuiltin<"__builtin_altivec_crypto_vpermxor_be">, + ClangBuiltin<"__builtin_altivec_crypto_vpermxor_be">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_ppc_altivec_crypto_vshasigmad : - GCCBuiltin<"__builtin_altivec_crypto_vshasigmad">, + ClangBuiltin<"__builtin_altivec_crypto_vshasigmad">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_ppc_altivec_crypto_vshasigmaw : - GCCBuiltin<"__builtin_altivec_crypto_vshasigmaw">, + ClangBuiltin<"__builtin_altivec_crypto_vshasigmaw">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; } @@ -1224,52 +1258,52 @@ def int_ppc_vsx_xvrdpip : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; // Vector reciprocal estimate -def int_ppc_vsx_xvresp : GCCBuiltin<"__builtin_vsx_xvresp">, +def int_ppc_vsx_xvresp : ClangBuiltin<"__builtin_vsx_xvresp">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_ppc_vsx_xvredp : GCCBuiltin<"__builtin_vsx_xvredp">, +def int_ppc_vsx_xvredp : ClangBuiltin<"__builtin_vsx_xvredp">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; // Vector rsqrte -def int_ppc_vsx_xvrsqrtesp : GCCBuiltin<"__builtin_vsx_xvrsqrtesp">, +def int_ppc_vsx_xvrsqrtesp : ClangBuiltin<"__builtin_vsx_xvrsqrtesp">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_ppc_vsx_xvrsqrtedp : GCCBuiltin<"__builtin_vsx_xvrsqrtedp">, +def int_ppc_vsx_xvrsqrtedp : ClangBuiltin<"__builtin_vsx_xvrsqrtedp">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; // Vector compare def int_ppc_vsx_xvcmpeqdp : PowerPC_VSX_Intrinsic<"xvcmpeqdp", [llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_ppc_vsx_xvcmpeqdp_p : GCCBuiltin<"__builtin_vsx_xvcmpeqdp_p">, +def int_ppc_vsx_xvcmpeqdp_p : ClangBuiltin<"__builtin_vsx_xvcmpeqdp_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty], [IntrNoMem]>; def int_ppc_vsx_xvcmpeqsp : PowerPC_VSX_Intrinsic<"xvcmpeqsp", [llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_ppc_vsx_xvcmpeqsp_p : GCCBuiltin<"__builtin_vsx_xvcmpeqsp_p">, +def int_ppc_vsx_xvcmpeqsp_p : ClangBuiltin<"__builtin_vsx_xvcmpeqsp_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty], [IntrNoMem]>; def int_ppc_vsx_xvcmpgedp : PowerPC_VSX_Intrinsic<"xvcmpgedp", [llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_ppc_vsx_xvcmpgedp_p : GCCBuiltin<"__builtin_vsx_xvcmpgedp_p">, +def int_ppc_vsx_xvcmpgedp_p : ClangBuiltin<"__builtin_vsx_xvcmpgedp_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty], [IntrNoMem]>; def int_ppc_vsx_xvcmpgesp : PowerPC_VSX_Intrinsic<"xvcmpgesp", [llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_ppc_vsx_xvcmpgesp_p : GCCBuiltin<"__builtin_vsx_xvcmpgesp_p">, +def int_ppc_vsx_xvcmpgesp_p : ClangBuiltin<"__builtin_vsx_xvcmpgesp_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty], [IntrNoMem]>; def int_ppc_vsx_xvcmpgtdp : PowerPC_VSX_Intrinsic<"xvcmpgtdp", [llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_ppc_vsx_xvcmpgtdp_p : GCCBuiltin<"__builtin_vsx_xvcmpgtdp_p">, +def int_ppc_vsx_xvcmpgtdp_p : ClangBuiltin<"__builtin_vsx_xvcmpgtdp_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty], [IntrNoMem]>; def int_ppc_vsx_xvcmpgtsp : PowerPC_VSX_Intrinsic<"xvcmpgtsp", [llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -def int_ppc_vsx_xvcmpgtsp_p : GCCBuiltin<"__builtin_vsx_xvcmpgtsp_p">, +def int_ppc_vsx_xvcmpgtsp_p : ClangBuiltin<"__builtin_vsx_xvcmpgtsp_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty], [IntrNoMem]>; def int_ppc_vsx_xxleqv : @@ -1381,21 +1415,21 @@ def int_ppc_vsx_xxgenpcvdm : // P10 VSX Vector permute extended. def int_ppc_vsx_xxpermx : - GCCBuiltin<"__builtin_vsx_xxpermx">, + ClangBuiltin<"__builtin_vsx_xxpermx">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,llvm_v16i8_ty,llvm_v16i8_ty,llvm_i32_ty], [IntrNoMem, ImmArg>]>; // P10 VSX Vector Blend Variable. -def int_ppc_vsx_xxblendvb: GCCBuiltin<"__builtin_vsx_xxblendvb">, +def int_ppc_vsx_xxblendvb: ClangBuiltin<"__builtin_vsx_xxblendvb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; -def int_ppc_vsx_xxblendvh: GCCBuiltin<"__builtin_vsx_xxblendvh">, +def int_ppc_vsx_xxblendvh: ClangBuiltin<"__builtin_vsx_xxblendvh">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,llvm_v8i16_ty], [IntrNoMem]>; -def int_ppc_vsx_xxblendvw: GCCBuiltin<"__builtin_vsx_xxblendvw">, +def int_ppc_vsx_xxblendvw: ClangBuiltin<"__builtin_vsx_xxblendvw">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; -def int_ppc_vsx_xxblendvd: GCCBuiltin<"__builtin_vsx_xxblendvd">, +def int_ppc_vsx_xxblendvd: ClangBuiltin<"__builtin_vsx_xxblendvd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; } @@ -1405,64 +1439,68 @@ def int_ppc_vsx_xxblendvd: GCCBuiltin<"__builtin_vsx_xxblendvd">, let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". -def int_ppc_tbegin : GCCBuiltin<"__builtin_tbegin">, +def int_ppc_tbegin : ClangBuiltin<"__builtin_tbegin">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg>]>; -def int_ppc_tend : GCCBuiltin<"__builtin_tend">, +def int_ppc_tend : ClangBuiltin<"__builtin_tend">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg>]>; -def int_ppc_tabort : GCCBuiltin<"__builtin_tabort">, +def int_ppc_tabort : ClangBuiltin<"__builtin_tabort">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; -def int_ppc_tabortwc : GCCBuiltin<"__builtin_tabortwc">, +def int_ppc_tabortwc : ClangBuiltin<"__builtin_tabortwc">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_ppc_tabortwci : GCCBuiltin<"__builtin_tabortwci">, +def int_ppc_tabortwci : ClangBuiltin<"__builtin_tabortwci">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_ppc_tabortdc : GCCBuiltin<"__builtin_tabortdc">, +def int_ppc_tabortdc : ClangBuiltin<"__builtin_tabortdc">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_ppc_tabortdci : GCCBuiltin<"__builtin_tabortdci">, +def int_ppc_tabortdci : ClangBuiltin<"__builtin_tabortdci">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_ppc_tcheck : GCCBuiltin<"__builtin_tcheck">, +def int_ppc_tcheck : ClangBuiltin<"__builtin_tcheck">, Intrinsic<[llvm_i32_ty], [], []>; -def int_ppc_treclaim : GCCBuiltin<"__builtin_treclaim">, +def int_ppc_treclaim : ClangBuiltin<"__builtin_treclaim">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; -def int_ppc_trechkpt : GCCBuiltin<"__builtin_trechkpt">, +def int_ppc_trechkpt : ClangBuiltin<"__builtin_trechkpt">, Intrinsic<[llvm_i32_ty], [], []>; -def int_ppc_tsr : GCCBuiltin<"__builtin_tsr">, +def int_ppc_tsr : ClangBuiltin<"__builtin_tsr">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; -def int_ppc_get_texasr : GCCBuiltin<"__builtin_get_texasr">, +def int_ppc_get_texasr : ClangBuiltin<"__builtin_get_texasr">, Intrinsic<[llvm_i64_ty], [], []>; -def int_ppc_get_texasru : GCCBuiltin<"__builtin_get_texasru">, +def int_ppc_get_texasru : ClangBuiltin<"__builtin_get_texasru">, Intrinsic<[llvm_i64_ty], [], []>; -def int_ppc_get_tfhar : GCCBuiltin<"__builtin_get_tfhar">, +def int_ppc_get_tfhar : ClangBuiltin<"__builtin_get_tfhar">, Intrinsic<[llvm_i64_ty], [], []>; -def int_ppc_get_tfiar : GCCBuiltin<"__builtin_get_tfiar">, +def int_ppc_get_tfiar : ClangBuiltin<"__builtin_get_tfiar">, Intrinsic<[llvm_i64_ty], [], []>; -def int_ppc_set_texasr : GCCBuiltin<"__builtin_set_texasr">, +def int_ppc_set_texasr : ClangBuiltin<"__builtin_set_texasr">, Intrinsic<[], [llvm_i64_ty], []>; -def int_ppc_set_texasru : GCCBuiltin<"__builtin_set_texasru">, +def int_ppc_set_texasru : ClangBuiltin<"__builtin_set_texasru">, Intrinsic<[], [llvm_i64_ty], []>; -def int_ppc_set_tfhar : GCCBuiltin<"__builtin_set_tfhar">, +def int_ppc_set_tfhar : ClangBuiltin<"__builtin_set_tfhar">, Intrinsic<[], [llvm_i64_ty], []>; -def int_ppc_set_tfiar : GCCBuiltin<"__builtin_set_tfiar">, +def int_ppc_set_tfiar : ClangBuiltin<"__builtin_set_tfiar">, Intrinsic<[], [llvm_i64_ty], []>; // Extended mnemonics -def int_ppc_tendall : GCCBuiltin<"__builtin_tendall">, +def int_ppc_tendall : ClangBuiltin<"__builtin_tendall">, Intrinsic<[llvm_i32_ty], [], []>; -def int_ppc_tresume : GCCBuiltin<"__builtin_tresume">, +def int_ppc_tresume : ClangBuiltin<"__builtin_tresume">, Intrinsic<[llvm_i32_ty], [], []>; -def int_ppc_tsuspend : GCCBuiltin<"__builtin_tsuspend">, +def int_ppc_tsuspend : ClangBuiltin<"__builtin_tsuspend">, Intrinsic<[llvm_i32_ty], [], []>; -def int_ppc_ttest : GCCBuiltin<"__builtin_ttest">, +def int_ppc_ttest : ClangBuiltin<"__builtin_ttest">, Intrinsic<[llvm_i64_ty], [], []>; -def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>; +// We currently use llvm.ppc.cfence in the context of atomic load which +// in LLVM IR requires its type to be one of integer, pointer and +// float point type. So llvm_any_ty here refers to type mentioned above. +// Backend is supposed to lower these types to appropriate MVTs. +def int_ppc_cfence : Intrinsic<[], [llvm_any_ty], []>; // PowerPC set FPSCR Intrinsic Definitions. -def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">, +def int_ppc_setrnd : ClangBuiltin<"__builtin_setrnd">, Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>; } @@ -1552,218 +1590,212 @@ let TargetPrefix = "ppc" in { // XL Compat intrinsics. let TargetPrefix = "ppc" in { - def int_ppc_dcbfl : GCCBuiltin<"__builtin_ppc_dcbfl">, + def int_ppc_dcbfl : ClangBuiltin<"__builtin_ppc_dcbfl">, Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>; - def int_ppc_dcbflp : GCCBuiltin<"__builtin_ppc_dcbflp">, + def int_ppc_dcbflp : ClangBuiltin<"__builtin_ppc_dcbflp">, Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>; - def int_ppc_dcbst : GCCBuiltin<"__builtin_ppc_dcbst">, + def int_ppc_dcbst : ClangBuiltin<"__builtin_ppc_dcbst">, Intrinsic<[], [llvm_ptr_ty], []>; - def int_ppc_dcbt : GCCBuiltin<"__builtin_ppc_dcbt">, + def int_ppc_dcbt : ClangBuiltin<"__builtin_ppc_dcbt">, Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly, NoCapture>]>; - def int_ppc_dcbtst : GCCBuiltin<"__builtin_ppc_dcbtst">, + def int_ppc_dcbtst : ClangBuiltin<"__builtin_ppc_dcbtst">, Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly, NoCapture>]>; - def int_ppc_dcbz : GCCBuiltin<"__builtin_ppc_dcbz">, + def int_ppc_dcbz : ClangBuiltin<"__builtin_ppc_dcbz">, Intrinsic<[], [llvm_ptr_ty], []>; - def int_ppc_icbt : GCCBuiltin<"__builtin_ppc_icbt">, + def int_ppc_icbt : ClangBuiltin<"__builtin_ppc_icbt">, Intrinsic<[], [llvm_ptr_ty], []>; // Population Count in each Byte. def int_ppc_popcntb : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty], [IntrNoMem]>; // sync instruction (i.e. sync 0, a.k.a hwsync) - def int_ppc_sync : GCCBuiltin<"__builtin_ppc_sync">, + def int_ppc_sync : ClangBuiltin<"__builtin_ppc_sync">, Intrinsic<[], [], []>; - def int_ppc_iospace_sync : GCCBuiltin<"__builtin_ppc_iospace_sync">, + def int_ppc_iospace_sync : ClangBuiltin<"__builtin_ppc_iospace_sync">, Intrinsic<[], [], []>; // isync instruction - def int_ppc_isync : GCCBuiltin<"__builtin_ppc_isync">, + def int_ppc_isync : ClangBuiltin<"__builtin_ppc_isync">, Intrinsic<[], [], []>; // lwsync is sync 1 - def int_ppc_lwsync : GCCBuiltin<"__builtin_ppc_lwsync">, + def int_ppc_lwsync : ClangBuiltin<"__builtin_ppc_lwsync">, Intrinsic<[], [], []>; - def int_ppc_iospace_lwsync : GCCBuiltin<"__builtin_ppc_iospace_lwsync">, + def int_ppc_iospace_lwsync : ClangBuiltin<"__builtin_ppc_iospace_lwsync">, Intrinsic<[], [], []>; // eieio instruction - def int_ppc_eieio : GCCBuiltin<"__builtin_ppc_eieio">, + def int_ppc_eieio : ClangBuiltin<"__builtin_ppc_eieio">, Intrinsic<[],[],[]>; - def int_ppc_iospace_eieio : GCCBuiltin<"__builtin_ppc_iospace_eieio">, + def int_ppc_iospace_eieio : ClangBuiltin<"__builtin_ppc_iospace_eieio">, Intrinsic<[],[],[]>; - def int_ppc_stdcx : GCCBuiltin<"__builtin_ppc_stdcx">, + def int_ppc_stdcx : ClangBuiltin<"__builtin_ppc_stdcx">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrWriteMem]>; - def int_ppc_stwcx : GCCBuiltin<"__builtin_ppc_stwcx">, + def int_ppc_stwcx : ClangBuiltin<"__builtin_ppc_stwcx">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrWriteMem]>; def int_ppc_sthcx : Intrinsic<[llvm_i32_ty], [ llvm_ptr_ty, llvm_i32_ty ], [IntrWriteMem]>; - def int_ppc_stbcx : GCCBuiltin<"__builtin_ppc_stbcx">, + def int_ppc_stbcx : ClangBuiltin<"__builtin_ppc_stbcx">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrWriteMem]>; - def int_ppc_dcbtstt : GCCBuiltin<"__builtin_ppc_dcbtstt">, + def int_ppc_dcbtstt : ClangBuiltin<"__builtin_ppc_dcbtstt">, Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly, NoCapture>]>; - def int_ppc_dcbtt : GCCBuiltin<"__builtin_ppc_dcbtt">, + def int_ppc_dcbtt : ClangBuiltin<"__builtin_ppc_dcbtt">, Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly, NoCapture>]>; - def int_ppc_mftbu : GCCBuiltin<"__builtin_ppc_mftbu">, + def int_ppc_mftbu : ClangBuiltin<"__builtin_ppc_mftbu">, Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; - def int_ppc_mfmsr : GCCBuiltin<"__builtin_ppc_mfmsr">, + def int_ppc_mfmsr : ClangBuiltin<"__builtin_ppc_mfmsr">, Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; def int_ppc_mfspr : Intrinsic<[llvm_anyint_ty], [llvm_i32_ty], [ImmArg>]>; def int_ppc_mtmsr - : GCCBuiltin<"__builtin_ppc_mtmsr">, Intrinsic<[], [llvm_i32_ty], []>; + : ClangBuiltin<"__builtin_ppc_mtmsr">, Intrinsic<[], [llvm_i32_ty], []>; def int_ppc_mtspr : Intrinsic<[], [llvm_i32_ty, llvm_anyint_ty], [ImmArg>]>; - def int_ppc_stfiw : GCCBuiltin<"__builtin_ppc_stfiw">, + def int_ppc_stfiw : ClangBuiltin<"__builtin_ppc_stfiw">, Intrinsic<[], [llvm_ptr_ty, llvm_double_ty], [IntrWriteMem]>; // compare def int_ppc_cmpeqb - : GCCBuiltin<"__builtin_ppc_cmpeqb">, + : ClangBuiltin<"__builtin_ppc_cmpeqb">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; def int_ppc_cmprb - : GCCBuiltin<"__builtin_ppc_cmprb">, + : ClangBuiltin<"__builtin_ppc_cmprb">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_ppc_setb - : GCCBuiltin<"__builtin_ppc_setb">, + : ClangBuiltin<"__builtin_ppc_setb">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; def int_ppc_cmpb : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, llvm_anyint_ty], [IntrNoMem]>; // multiply def int_ppc_mulhd - : GCCBuiltin<"__builtin_ppc_mulhd">, + : ClangBuiltin<"__builtin_ppc_mulhd">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; def int_ppc_mulhdu - : GCCBuiltin<"__builtin_ppc_mulhdu">, + : ClangBuiltin<"__builtin_ppc_mulhdu">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; def int_ppc_mulhw - : GCCBuiltin<"__builtin_ppc_mulhw">, + : ClangBuiltin<"__builtin_ppc_mulhw">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_ppc_mulhwu - : GCCBuiltin<"__builtin_ppc_mulhwu">, + : ClangBuiltin<"__builtin_ppc_mulhwu">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_ppc_maddhd - : GCCBuiltin<"__builtin_ppc_maddhd">, + : ClangBuiltin<"__builtin_ppc_maddhd">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; def int_ppc_maddhdu - : GCCBuiltin<"__builtin_ppc_maddhdu">, + : ClangBuiltin<"__builtin_ppc_maddhdu">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; def int_ppc_maddld - : GCCBuiltin<"__builtin_ppc_maddld">, + : ClangBuiltin<"__builtin_ppc_maddld">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; // load def int_ppc_load2r : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; def int_ppc_load4r - : GCCBuiltin<"__builtin_ppc_load4r">, + : ClangBuiltin<"__builtin_ppc_load4r">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; def int_ppc_load8r - : GCCBuiltin<"__builtin_ppc_load8r">, + : ClangBuiltin<"__builtin_ppc_load8r">, Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; // store def int_ppc_store2r - : GCCBuiltin<"__builtin_ppc_store2r">, + : ClangBuiltin<"__builtin_ppc_store2r">, Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], [IntrWriteMem]>; def int_ppc_store4r - : GCCBuiltin<"__builtin_ppc_store4r">, + : ClangBuiltin<"__builtin_ppc_store4r">, Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], [IntrWriteMem]>; def int_ppc_store8r - : GCCBuiltin<"__builtin_ppc_store8r">, + : ClangBuiltin<"__builtin_ppc_store8r">, Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty], [IntrWriteMem]>; def int_ppc_insert_exp - : GCCBuiltin<"__builtin_ppc_insert_exp">, + : ClangBuiltin<"__builtin_ppc_insert_exp">, Intrinsic <[llvm_double_ty], [llvm_double_ty, llvm_i64_ty], [IntrNoMem]>; def int_ppc_extract_exp - : GCCBuiltin<"__builtin_ppc_extract_exp">, + : ClangBuiltin<"__builtin_ppc_extract_exp">, Intrinsic <[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; def int_ppc_extract_sig - : GCCBuiltin<"__builtin_ppc_extract_sig">, + : ClangBuiltin<"__builtin_ppc_extract_sig">, Intrinsic <[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; def int_ppc_mtfsb0 - : GCCBuiltin<"__builtin_ppc_mtfsb0">, + : ClangBuiltin<"__builtin_ppc_mtfsb0">, Intrinsic <[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, ImmArg>]>; def int_ppc_mtfsb1 - : GCCBuiltin<"__builtin_ppc_mtfsb1">, + : ClangBuiltin<"__builtin_ppc_mtfsb1">, Intrinsic <[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, ImmArg>]>; def int_ppc_mtfsf : Intrinsic <[], [llvm_i32_ty, llvm_double_ty], [IntrNoMem, IntrHasSideEffects, ImmArg>]>; def int_ppc_mtfsfi - : GCCBuiltin<"__builtin_ppc_mtfsfi">, + : ClangBuiltin<"__builtin_ppc_mtfsfi">, Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, ImmArg>,ImmArg>]>; def int_ppc_fmsub - : GCCBuiltin<"__builtin_ppc_fmsub">, + : ClangBuiltin<"__builtin_ppc_fmsub">, Intrinsic <[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], [IntrNoMem]>; def int_ppc_fmsubs - : GCCBuiltin<"__builtin_ppc_fmsubs">, + : ClangBuiltin<"__builtin_ppc_fmsubs">, Intrinsic <[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_ppc_fnmadd - : GCCBuiltin<"__builtin_ppc_fnmadd">, + : ClangBuiltin<"__builtin_ppc_fnmadd">, Intrinsic <[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], [IntrNoMem]>; def int_ppc_fnmadds - : GCCBuiltin<"__builtin_ppc_fnmadds">, + : ClangBuiltin<"__builtin_ppc_fnmadds">, Intrinsic <[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_ppc_fnmsub - : GCCBuiltin<"__builtin_ppc_fnmsub">, - Intrinsic <[llvm_double_ty], - [llvm_double_ty, llvm_double_ty, llvm_double_ty], - [IntrNoMem]>; - def int_ppc_fnmsubs - : GCCBuiltin<"__builtin_ppc_fnmsubs">, - Intrinsic <[llvm_float_ty], - [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem]>; + : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; def int_ppc_fre - : GCCBuiltin<"__builtin_ppc_fre">, + : ClangBuiltin<"__builtin_ppc_fre">, Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_ppc_fres - : GCCBuiltin<"__builtin_ppc_fres">, + : ClangBuiltin<"__builtin_ppc_fres">, Intrinsic <[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_ppc_addex - : GCCBuiltin<"__builtin_ppc_addex">, + : ClangBuiltin<"__builtin_ppc_addex">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, ImmArg>]>; - def int_ppc_fsel : GCCBuiltin<"__builtin_ppc_fsel">, + def int_ppc_fsel : ClangBuiltin<"__builtin_ppc_fsel">, Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], [IntrNoMem]>; - def int_ppc_fsels : GCCBuiltin<"__builtin_ppc_fsels">, + def int_ppc_fsels : ClangBuiltin<"__builtin_ppc_fsels">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_ppc_frsqrte : GCCBuiltin<"__builtin_ppc_frsqrte">, + def int_ppc_frsqrte : ClangBuiltin<"__builtin_ppc_frsqrte">, Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; - def int_ppc_frsqrtes : GCCBuiltin<"__builtin_ppc_frsqrtes">, + def int_ppc_frsqrtes : ClangBuiltin<"__builtin_ppc_frsqrtes">, Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_ppc_compare_exp_uo : GCCBuiltin<"__builtin_ppc_compare_exp_uo">, + def int_ppc_compare_exp_uo : ClangBuiltin<"__builtin_ppc_compare_exp_uo">, Intrinsic<[llvm_i32_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem]>; - def int_ppc_compare_exp_lt : GCCBuiltin<"__builtin_ppc_compare_exp_lt">, + def int_ppc_compare_exp_lt : ClangBuiltin<"__builtin_ppc_compare_exp_lt">, Intrinsic<[llvm_i32_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem]>; - def int_ppc_compare_exp_gt : GCCBuiltin<"__builtin_ppc_compare_exp_gt">, + def int_ppc_compare_exp_gt : ClangBuiltin<"__builtin_ppc_compare_exp_gt">, Intrinsic<[llvm_i32_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem]>; - def int_ppc_compare_exp_eq : GCCBuiltin<"__builtin_ppc_compare_exp_eq">, + def int_ppc_compare_exp_eq : ClangBuiltin<"__builtin_ppc_compare_exp_eq">, Intrinsic<[llvm_i32_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem]>; @@ -1773,6 +1805,12 @@ let TargetPrefix = "ppc" in { def int_ppc_test_data_class_f : Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; + def int_ppc_fnabs + : ClangBuiltin<"__builtin_ppc_fnabs">, + Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + def int_ppc_fnabss + : ClangBuiltin<"__builtin_ppc_fnabss">, + Intrinsic <[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_ppc_convert_f128_to_ppcf128 : Intrinsic<[llvm_ppcf128_ty], [llvm_f128_ty], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 6780436bd701..098ca1bc6cfb 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -140,7 +140,7 @@ let TargetPrefix = "riscv" in { // Vectors // The intrinsic does not have any operand that must be extended. -defvar NoSplatOperand = 0xF; +defvar NoScalarOperand = 0xF; // The intrinsic does not have a VL operand. // (e.g., riscv_vmv_x_s and riscv_vfmv_f_s) @@ -150,7 +150,7 @@ class RISCVVIntrinsic { // These intrinsics may accept illegal integer values in their llvm_any_ty // operand, so they have to be extended. Intrinsic IntrinsicID = !cast(NAME); - bits<4> SplatOperand = NoSplatOperand; + bits<4> ScalarOperand = NoScalarOperand; bits<5> VLOperand = NoVLOperand; } @@ -219,8 +219,8 @@ let TargetPrefix = "riscv" in { let VLOperand = 2; } // For unit stride load with mask - // Input: (maskedoff, pointer, mask, vl, ta) - class RISCVUSLoadMask + // Input: (maskedoff, pointer, mask, vl, policy) + class RISCVUSLoadMasked : Intrinsic<[llvm_anyvector_ty ], [LLVMMatchType<0>, LLVMPointerType>, @@ -231,11 +231,11 @@ let TargetPrefix = "riscv" in { let VLOperand = 3; } // For unit stride fault-only-first load with mask - // Input: (maskedoff, pointer, mask, vl, ta) + // Input: (maskedoff, pointer, mask, vl, policy) // Output: (data, vl) // NOTE: We model this with default memory properties since we model writing // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work. - class RISCVUSLoadFFMask + class RISCVUSLoadFFMasked : Intrinsic<[llvm_anyvector_ty, llvm_anyint_ty], [LLVMMatchType<0>, LLVMPointerType>, @@ -255,8 +255,8 @@ let TargetPrefix = "riscv" in { let VLOperand = 3; } // For strided load with mask - // Input: (maskedoff, pointer, stride, mask, vl, ta) - class RISCVSLoadMask + // Input: (maskedoff, pointer, stride, mask, vl, policy) + class RISCVSLoadMasked : Intrinsic<[llvm_anyvector_ty ], [LLVMMatchType<0>, LLVMPointerType>, llvm_anyint_ty, @@ -277,8 +277,8 @@ let TargetPrefix = "riscv" in { let VLOperand = 3; } // For indexed load with mask - // Input: (maskedoff, pointer, index, mask, vl, ta) - class RISCVILoadMask + // Input: (maskedoff, pointer, index, mask, vl, policy) + class RISCVILoadMasked : Intrinsic<[llvm_anyvector_ty ], [LLVMMatchType<0>, LLVMPointerType>, llvm_anyvector_ty, @@ -300,7 +300,7 @@ let TargetPrefix = "riscv" in { } // For unit stride store with mask // Input: (vector_in, pointer, mask, vl) - class RISCVUSStoreMask + class RISCVUSStoreMasked : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerType>, @@ -321,7 +321,7 @@ let TargetPrefix = "riscv" in { } // For stride store with mask // Input: (vector_in, pointer, stirde, mask, vl) - class RISCVSStoreMask + class RISCVSStoreMasked : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerType>, llvm_anyint_ty, @@ -341,7 +341,7 @@ let TargetPrefix = "riscv" in { } // For indexed store with mask // Input: (vector_in, pointer, index, mask, vl) - class RISCVIStoreMask + class RISCVIStoreMasked : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerType>, llvm_anyvector_ty, @@ -350,16 +350,16 @@ let TargetPrefix = "riscv" in { let VLOperand = 4; } // For destination vector type is the same as source vector. - // Input: (vector_in, vl) - class RISCVUnaryAANoMask + // Input: (passthru, vector_in, vl) + class RISCVUnaryAAUnMasked : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 1; + let VLOperand = 2; } // For destination vector type is the same as first source vector (with mask). - // Input: (vector_in, mask, vl, ta) - class RISCVUnaryAAMask + // Input: (vector_in, vector_in, mask, vl, policy) + class RISCVUnaryAAMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, @@ -367,7 +367,8 @@ let TargetPrefix = "riscv" in { [ImmArg>, IntrNoMem]>, RISCVVIntrinsic { let VLOperand = 3; } - class RISCVUnaryAAMaskNoTA + // Input: (passthru, vector_in, vector_in, mask, vl) + class RISCVCompress : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], @@ -376,23 +377,24 @@ let TargetPrefix = "riscv" in { } // For destination vector type is the same as first and second source vector. // Input: (vector_in, vector_in, vl) - class RISCVBinaryAAANoMask + class RISCVBinaryAAAUnMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { let VLOperand = 2; } // For destination vector type is the same as first and second source vector. - // Input: (vector_in, int_vector_in, vl) - class RISCVRGatherVVNoMask + // Input: (passthru, vector_in, int_vector_in, vl) + class RISCVRGatherVVUnMasked : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMVectorOfBitcastsToInt<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 2; + let VLOperand = 3; } // For destination vector type is the same as first and second source vector. - // Input: (vector_in, vector_in, int_vector_in, vl, ta) - class RISCVRGatherVVMask + // Input: (vector_in, vector_in, int_vector_in, vl, policy) + class RISCVRGatherVVMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, @@ -400,17 +402,18 @@ let TargetPrefix = "riscv" in { [ImmArg>, IntrNoMem]>, RISCVVIntrinsic { let VLOperand = 4; } - // Input: (vector_in, int16_vector_in, vl) - class RISCVRGatherEI16VVNoMask + // Input: (passthru, vector_in, int16_vector_in, vl) + class RISCVRGatherEI16VVUnMasked : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>, + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 2; + let VLOperand = 3; } // For destination vector type is the same as first and second source vector. - // Input: (vector_in, vector_in, int16_vector_in, vl, ta) - class RISCVRGatherEI16VVMask + // Input: (vector_in, vector_in, int16_vector_in, vl, policy) + class RISCVRGatherEI16VVMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>, @@ -421,17 +424,18 @@ let TargetPrefix = "riscv" in { } // For destination vector type is the same as first source vector, and the // second operand is XLen. - // Input: (vector_in, xlen_in, vl) - class RISCVGatherVXNoMask + // Input: (passthru, vector_in, xlen_in, vl) + class RISCVGatherVXUnMasked : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_anyint_ty, LLVMMatchType<1>], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, + LLVMMatchType<1>], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 2; + let VLOperand = 3; } // For destination vector type is the same as first source vector (with mask). // Second operand is XLen. - // Input: (maskedoff, vector_in, xlen_in, mask, vl, ta) - class RISCVGatherVXMask + // Input: (maskedoff, vector_in, xlen_in, mask, vl, policy) + class RISCVGatherVXMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>, @@ -440,38 +444,40 @@ let TargetPrefix = "riscv" in { let VLOperand = 4; } // For destination vector type is the same as first source vector. - // Input: (vector_in, vector_in/scalar_in, vl) - class RISCVBinaryAAXNoMask + // Input: (passthru, vector_in, vector_in/scalar_in, vl) + class RISCVBinaryAAXUnMasked : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, + llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let SplatOperand = 1; - let VLOperand = 2; + let ScalarOperand = 2; + let VLOperand = 3; } // For destination vector type is the same as first source vector (with mask). - // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) - class RISCVBinaryAAXMask + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy) + class RISCVBinaryAAXMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, LLVMMatchType<2>], [ImmArg>, IntrNoMem]>, RISCVVIntrinsic { - let SplatOperand = 2; + let ScalarOperand = 2; let VLOperand = 4; } // For destination vector type is the same as first source vector. The // second source operand must match the destination type or be an XLen scalar. - // Input: (vector_in, vector_in/scalar_in, vl) - class RISCVBinaryAAShiftNoMask + // Input: (passthru, vector_in, vector_in/scalar_in, vl) + class RISCVBinaryAAShiftUnMasked : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, + llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 2; + let VLOperand = 3; } // For destination vector type is the same as first source vector (with mask). // The second source operand must match the destination type or be an XLen scalar. - // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) - class RISCVBinaryAAShiftMask + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy) + class RISCVBinaryAAShiftMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, @@ -480,38 +486,40 @@ let TargetPrefix = "riscv" in { let VLOperand = 4; } // For destination vector type is NOT the same as first source vector. - // Input: (vector_in, vector_in/scalar_in, vl) - class RISCVBinaryABXNoMask + // Input: (passthru, vector_in, vector_in/scalar_in, vl) + class RISCVBinaryABXUnMasked : Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, + llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let SplatOperand = 1; - let VLOperand = 2; + let ScalarOperand = 2; + let VLOperand = 3; } // For destination vector type is NOT the same as first source vector (with mask). - // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) - class RISCVBinaryABXMask + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy) + class RISCVBinaryABXMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, LLVMMatchType<3>], [ImmArg>, IntrNoMem]>, RISCVVIntrinsic { - let SplatOperand = 2; + let ScalarOperand = 2; let VLOperand = 4; } // For destination vector type is NOT the same as first source vector. The // second source operand must match the destination type or be an XLen scalar. - // Input: (vector_in, vector_in/scalar_in, vl) - class RISCVBinaryABShiftNoMask + // Input: (passthru, vector_in, vector_in/scalar_in, vl) + class RISCVBinaryABShiftUnMasked : Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, + llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 2; + let VLOperand = 3; } // For destination vector type is NOT the same as first source vector (with mask). // The second source operand must match the destination type or be an XLen scalar. - // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) - class RISCVBinaryABShiftMask + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy) + class RISCVBinaryABShiftMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, @@ -520,15 +528,15 @@ let TargetPrefix = "riscv" in { let VLOperand = 4; } // For binary operations with V0 as input. - // Input: (vector_in, vector_in/scalar_in, V0, vl) + // Input: (passthru, vector_in, vector_in/scalar_in, V0, vl) class RISCVBinaryWithV0 : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_any_ty, + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let SplatOperand = 1; - let VLOperand = 3; + let ScalarOperand = 2; + let VLOperand = 4; } // For binary operations with mask type output and V0 as input. // Output: (mask type output) @@ -539,7 +547,7 @@ let TargetPrefix = "riscv" in { LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let SplatOperand = 1; + let ScalarOperand = 1; let VLOperand = 3; } // For binary operations with mask type output. @@ -549,87 +557,91 @@ let TargetPrefix = "riscv" in { : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let SplatOperand = 1; + let ScalarOperand = 1; let VLOperand = 2; } // For binary operations with mask type output without mask. // Output: (mask type output) // Input: (vector_in, vector_in/scalar_in, vl) - class RISCVCompareNoMask + class RISCVCompareUnMasked : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let SplatOperand = 1; + let ScalarOperand = 1; let VLOperand = 2; } // For binary operations with mask type output with mask. // Output: (mask type output) // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl) - class RISCVCompareMask + class RISCVCompareMasked : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyvector_ty, llvm_any_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let SplatOperand = 2; + let ScalarOperand = 2; let VLOperand = 4; } // For FP classify operations. // Output: (bit mask type output) - // Input: (vector_in, vl) - class RISCVClassifyNoMask + // Input: (passthru, vector_in, vl) + class RISCVClassifyUnMasked : Intrinsic<[LLVMVectorOfBitcastsToInt<0>], - [llvm_anyvector_ty, llvm_anyint_ty], + [LLVMVectorOfBitcastsToInt<0>, llvm_anyvector_ty, + llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { let VLOperand = 1; } // For FP classify operations with mask. // Output: (bit mask type output) - // Input: (maskedoff, vector_in, mask, vl) - class RISCVClassifyMask + // Input: (maskedoff, vector_in, mask, vl, policy) + class RISCVClassifyMasked : Intrinsic<[LLVMVectorOfBitcastsToInt<0>], [LLVMVectorOfBitcastsToInt<0>, llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], - [IntrNoMem]>, RISCVVIntrinsic { + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty, LLVMMatchType<1>], + [IntrNoMem, ImmArg>]>, RISCVVIntrinsic { let VLOperand = 3; } // For Saturating binary operations. // The destination vector type is the same as first source vector. - // Input: (vector_in, vector_in/scalar_in, vl) - class RISCVSaturatingBinaryAAXNoMask + // Input: (passthru, vector_in, vector_in/scalar_in, vl) + class RISCVSaturatingBinaryAAXUnMasked : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, + llvm_anyint_ty], [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { - let SplatOperand = 1; - let VLOperand = 2; + let ScalarOperand = 2; + let VLOperand = 3; } // For Saturating binary operations with mask. // The destination vector type is the same as first source vector. - // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) - class RISCVSaturatingBinaryAAXMask + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy) + class RISCVSaturatingBinaryAAXMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, LLVMMatchType<2>], [ImmArg>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { - let SplatOperand = 2; + let ScalarOperand = 2; let VLOperand = 4; } // For Saturating binary operations. // The destination vector type is the same as first source vector. // The second source operand matches the destination type or is an XLen scalar. - // Input: (vector_in, vector_in/scalar_in, vl) - class RISCVSaturatingBinaryAAShiftNoMask + // Input: (passthru, vector_in, vector_in/scalar_in, vl) + class RISCVSaturatingBinaryAAShiftUnMasked : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, + llvm_anyint_ty], [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { - let VLOperand = 2; + let VLOperand = 3; } // For Saturating binary operations with mask. // The destination vector type is the same as first source vector. // The second source operand matches the destination type or is an XLen scalar. - // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) - class RISCVSaturatingBinaryAAShiftMask + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy) + class RISCVSaturatingBinaryAAShiftMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, @@ -640,18 +652,19 @@ let TargetPrefix = "riscv" in { // For Saturating binary operations. // The destination vector type is NOT the same as first source vector. // The second source operand matches the destination type or is an XLen scalar. - // Input: (vector_in, vector_in/scalar_in, vl) - class RISCVSaturatingBinaryABShiftNoMask + // Input: (passthru, vector_in, vector_in/scalar_in, vl) + class RISCVSaturatingBinaryABShiftUnMasked : Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, + llvm_anyint_ty], [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { - let VLOperand = 2; + let VLOperand = 3; } // For Saturating binary operations with mask. // The destination vector type is NOT the same as first source vector (with mask). // The second source operand matches the destination type or is an XLen scalar. - // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) - class RISCVSaturatingBinaryABShiftMask + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy) + class RISCVSaturatingBinaryABShiftMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, @@ -659,56 +672,69 @@ let TargetPrefix = "riscv" in { [ImmArg>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { let VLOperand = 4; } - class RISCVTernaryAAAXNoMask + // Input: (vector_in, vector_in, scalar_in, vl, policy) + class RVVSlideUnMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, - LLVMMatchType<1>], - [IntrNoMem]>, RISCVVIntrinsic { + LLVMMatchType<1>, LLVMMatchType<1>], + [ImmArg>, IntrNoMem]>, RISCVVIntrinsic { let VLOperand = 3; } - class RISCVTernaryAAAXMask + // Input: (vector_in, vector_in, vector_in/scalar_in, mask, vl, policy) + class RVVSlideMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>], - [IntrNoMem]>, RISCVVIntrinsic { + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<1>, LLVMMatchType<1>], + [ImmArg>, IntrNoMem]>, RISCVVIntrinsic { let VLOperand = 4; } - class RISCVTernaryAAXANoMask + // UnMasked Vector Multiply-Add operations, its first operand can not be undef. + // Input: (vector_in, vector_in/scalar, vector_in, vl, policy) + class RISCVTernaryAAXAUnMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_any_ty, LLVMMatchType<0>, - llvm_anyint_ty], - [IntrNoMem]>, RISCVVIntrinsic { - let SplatOperand = 1; + llvm_anyint_ty, LLVMMatchType<2>], + [ImmArg>, IntrNoMem]>, RISCVVIntrinsic { + let ScalarOperand = 1; let VLOperand = 3; } - class RISCVTernaryAAXAMask + // Masked Vector Multiply-Add operations, its first operand can not be undef. + // Input: (vector_in, vector_in/scalar, vector_in, mask, vl, policy + class RISCVTernaryAAXAMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_any_ty, LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], - [IntrNoMem]>, RISCVVIntrinsic { - let SplatOperand = 1; + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty, LLVMMatchType<2>], + [ImmArg>, IntrNoMem]>, RISCVVIntrinsic { + let ScalarOperand = 1; let VLOperand = 4; } - class RISCVTernaryWideNoMask + // UnMasked Widening Vector Multiply-Add operations, its first operand can not be undef. + // Input: (vector_in, vector_in/scalar, vector_in, vl, policy) + class RISCVTernaryWideUnMasked : Intrinsic< [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_any_ty, llvm_anyvector_ty, - llvm_anyint_ty], - [IntrNoMem] >, RISCVVIntrinsic { - let SplatOperand = 1; + llvm_anyint_ty, LLVMMatchType<3>], + [ImmArg>, IntrNoMem] >, RISCVVIntrinsic { + let ScalarOperand = 1; let VLOperand = 3; } - class RISCVTernaryWideMask + // Masked Widening Vector Multiply-Add operations, its first operand can not be undef. + // Input: (vector_in, vector_in/scalar, vector_in, mask, vl, policy + class RISCVTernaryWideMasked : Intrinsic< [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_any_ty, llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], - [IntrNoMem]>, RISCVVIntrinsic { - let SplatOperand = 1; + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty, LLVMMatchType<3>], + [ImmArg>, IntrNoMem]>, RISCVVIntrinsic { + let ScalarOperand = 1; let VLOperand = 4; } // For Reduction ternary operations. // For destination vector type is the same as first and third source vector. // Input: (vector_in, vector_in, vector_in, vl) - class RISCVReductionNoMask + class RISCVReductionUnMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>, llvm_anyint_ty], @@ -719,7 +745,7 @@ let TargetPrefix = "riscv" in { // For destination vector type is the same as first and third source vector. // The mask type come from second source vector. // Input: (maskedoff, vector_in, vector_in, vector_in, mask, vl) - class RISCVReductionMask + class RISCVReductionMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>, llvm_anyint_ty], @@ -729,7 +755,7 @@ let TargetPrefix = "riscv" in { // For unary operations with scalar type output without mask // Output: (scalar type) // Input: (vector_in, vl) - class RISCVMaskUnarySOutNoMask + class RISCVMaskedUnarySOutUnMasked : Intrinsic<[LLVMMatchType<1>], [llvm_anyvector_ty, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { @@ -738,23 +764,23 @@ let TargetPrefix = "riscv" in { // For unary operations with scalar type output with mask // Output: (scalar type) // Input: (vector_in, mask, vl) - class RISCVMaskUnarySOutMask + class RISCVMaskedUnarySOutMasked : Intrinsic<[LLVMMatchType<1>], [llvm_anyvector_ty, LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { let VLOperand = 2; } // For destination vector type is NOT the same as source vector. - // Input: (vector_in, vl) - class RISCVUnaryABNoMask + // Input: (passthru, vector_in, vl) + class RISCVUnaryABUnMasked : Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, llvm_anyint_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 1; + let VLOperand = 2; } // For destination vector type is NOT the same as source vector (with mask). - // Input: (maskedoff, vector_in, mask, vl, ta) - class RISCVUnaryABMask + // Input: (maskedoff, vector_in, mask, vl, policy) + class RISCVUnaryABMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>, @@ -765,7 +791,7 @@ let TargetPrefix = "riscv" in { // For unary operations with the same vector type in/out without mask // Output: (vector) // Input: (vector_in, vl) - class RISCVUnaryNoMask + class RISCVUnaryUnMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { @@ -774,7 +800,7 @@ let TargetPrefix = "riscv" in { // For mask unary operations with mask type in/out with mask // Output: (mask type output) // Input: (mask type maskedoff, mask type vector_in, mask, vl) - class RISCVMaskUnaryMOutMask + class RISCVMaskedUnaryMOutMasked : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty], @@ -785,21 +811,28 @@ let TargetPrefix = "riscv" in { // Input: (vl) class RISCVNullaryIntrinsic : Intrinsic<[llvm_anyvector_ty], - [llvm_anyint_ty], + [llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { + let VLOperand = 1; + } + // Output: (vector) + // Input: (passthru, vl) + class RISCVID + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 0; + let VLOperand = 1; } // For Conversion unary operations. - // Input: (vector_in, vl) - class RISCVConversionNoMask + // Input: (passthru, vector_in, vl) + class RISCVConversionUnMasked : Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, llvm_anyint_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 1; + let VLOperand = 2; } // For Conversion unary operations with mask. - // Input: (maskedoff, vector_in, mask, vl, ta) - class RISCVConversionMask + // Input: (maskedoff, vector_in, mask, vl, policy) + class RISCVConversionMasked : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, @@ -809,17 +842,18 @@ let TargetPrefix = "riscv" in { } // For unit stride segment load - // Input: (pointer, vl) + // Input: (passthru, pointer, vl) class RISCVUSSegLoad : Intrinsic, !add(nf, -1))), - [LLVMPointerToElt<0>, llvm_anyint_ty], - [NoCapture>, IntrReadMem]>, RISCVVIntrinsic { - let VLOperand = 1; + !listconcat(!listsplat(LLVMMatchType<0>, nf), + [LLVMPointerToElt<0>, llvm_anyint_ty]), + [NoCapture>, IntrReadMem]>, RISCVVIntrinsic { + let VLOperand = !add(nf, 1); } // For unit stride segment load with mask - // Input: (maskedoff, pointer, mask, vl, ta) - class RISCVUSSegLoadMask + // Input: (maskedoff, pointer, mask, vl, policy) + class RISCVUSSegLoadMasked : Intrinsic, !add(nf, -1))), !listconcat(!listsplat(LLVMMatchType<0>, nf), @@ -832,23 +866,24 @@ let TargetPrefix = "riscv" in { } // For unit stride fault-only-first segment load - // Input: (pointer, vl) + // Input: (passthru, pointer, vl) // Output: (data, vl) // NOTE: We model this with default memory properties since we model writing // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work. class RISCVUSSegLoadFF : Intrinsic, !add(nf, -1)), [llvm_anyint_ty]), - [LLVMPointerToElt<0>, LLVMMatchType<1>], - [NoCapture>]>, RISCVVIntrinsic { - let VLOperand = 1; + !listconcat(!listsplat(LLVMMatchType<0>, nf), + [LLVMPointerToElt<0>, LLVMMatchType<1>]), + [NoCapture>]>, RISCVVIntrinsic { + let VLOperand = !add(nf, 1); } // For unit stride fault-only-first segment load with mask - // Input: (maskedoff, pointer, mask, vl, ta) + // Input: (maskedoff, pointer, mask, vl, policy) // Output: (data, vl) // NOTE: We model this with default memory properties since we model writing // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work. - class RISCVUSSegLoadFFMask + class RISCVUSSegLoadFFMasked : Intrinsic, !add(nf, -1)), [llvm_anyint_ty]), !listconcat(!listsplat(LLVMMatchType<0>, nf), @@ -861,17 +896,18 @@ let TargetPrefix = "riscv" in { } // For stride segment load - // Input: (pointer, offset, vl) + // Input: (passthru, pointer, offset, vl) class RISCVSSegLoad : Intrinsic, !add(nf, -1))), - [LLVMPointerToElt<0>, llvm_anyint_ty, LLVMMatchType<1>], - [NoCapture>, IntrReadMem]>, RISCVVIntrinsic { - let VLOperand = 2; + !listconcat(!listsplat(LLVMMatchType<0>, nf), + [LLVMPointerToElt<0>, llvm_anyint_ty, LLVMMatchType<1>]), + [NoCapture>, IntrReadMem]>, RISCVVIntrinsic { + let VLOperand = !add(nf, 2); } // For stride segment load with mask - // Input: (maskedoff, pointer, offset, mask, vl, ta) - class RISCVSSegLoadMask + // Input: (maskedoff, pointer, offset, mask, vl, policy) + class RISCVSSegLoadMasked : Intrinsic, !add(nf, -1))), !listconcat(!listsplat(LLVMMatchType<0>, nf), @@ -885,17 +921,18 @@ let TargetPrefix = "riscv" in { } // For indexed segment load - // Input: (pointer, index, vl) + // Input: (passthru, pointer, index, vl) class RISCVISegLoad : Intrinsic, !add(nf, -1))), - [LLVMPointerToElt<0>, llvm_anyvector_ty, llvm_anyint_ty], - [NoCapture>, IntrReadMem]>, RISCVVIntrinsic { - let VLOperand = 2; + !listconcat(!listsplat(LLVMMatchType<0>, nf), + [LLVMPointerToElt<0>, llvm_anyvector_ty, llvm_anyint_ty]), + [NoCapture>, IntrReadMem]>, RISCVVIntrinsic { + let VLOperand = !add(nf, 2); } // For indexed segment load with mask - // Input: (maskedoff, pointer, index, mask, vl, ta) - class RISCVISegLoadMask + // Input: (maskedoff, pointer, index, mask, vl, policy) + class RISCVISegLoadMasked : Intrinsic, !add(nf, -1))), !listconcat(!listsplat(LLVMMatchType<0>, nf), @@ -920,7 +957,7 @@ let TargetPrefix = "riscv" in { } // For unit stride segment store with mask // Input: (value, pointer, mask, vl) - class RISCVUSSegStoreMask + class RISCVUSSegStoreMasked : Intrinsic<[], !listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>, !add(nf, -1)), @@ -944,7 +981,7 @@ let TargetPrefix = "riscv" in { } // For stride segment store with mask // Input: (value, pointer, offset, mask, vl) - class RISCVSSegStoreMask + class RISCVSSegStoreMasked : Intrinsic<[], !listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>, !add(nf, -1)), @@ -968,7 +1005,7 @@ let TargetPrefix = "riscv" in { } // For indexed segment store with mask // Input: (value, pointer, offset, mask, vl) - class RISCVISegStoreMask + class RISCVISegStoreMasked : Intrinsic<[], !listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>, !add(nf, -1)), @@ -981,76 +1018,76 @@ let TargetPrefix = "riscv" in { multiclass RISCVUSLoad { def "int_riscv_" # NAME : RISCVUSLoad; - def "int_riscv_" # NAME # "_mask" : RISCVUSLoadMask; + def "int_riscv_" # NAME # "_mask" : RISCVUSLoadMasked; } multiclass RISCVUSLoadFF { def "int_riscv_" # NAME : RISCVUSLoadFF; - def "int_riscv_" # NAME # "_mask" : RISCVUSLoadFFMask; + def "int_riscv_" # NAME # "_mask" : RISCVUSLoadFFMasked; } multiclass RISCVSLoad { def "int_riscv_" # NAME : RISCVSLoad; - def "int_riscv_" # NAME # "_mask" : RISCVSLoadMask; + def "int_riscv_" # NAME # "_mask" : RISCVSLoadMasked; } multiclass RISCVILoad { def "int_riscv_" # NAME : RISCVILoad; - def "int_riscv_" # NAME # "_mask" : RISCVILoadMask; + def "int_riscv_" # NAME # "_mask" : RISCVILoadMasked; } multiclass RISCVUSStore { def "int_riscv_" # NAME : RISCVUSStore; - def "int_riscv_" # NAME # "_mask" : RISCVUSStoreMask; + def "int_riscv_" # NAME # "_mask" : RISCVUSStoreMasked; } multiclass RISCVSStore { def "int_riscv_" # NAME : RISCVSStore; - def "int_riscv_" # NAME # "_mask" : RISCVSStoreMask; + def "int_riscv_" # NAME # "_mask" : RISCVSStoreMasked; } multiclass RISCVIStore { def "int_riscv_" # NAME : RISCVIStore; - def "int_riscv_" # NAME # "_mask" : RISCVIStoreMask; + def "int_riscv_" # NAME # "_mask" : RISCVIStoreMasked; } multiclass RISCVUnaryAA { - def "int_riscv_" # NAME : RISCVUnaryAANoMask; - def "int_riscv_" # NAME # "_mask" : RISCVUnaryAAMask; + def "int_riscv_" # NAME : RISCVUnaryAAUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVUnaryAAMasked; } multiclass RISCVUnaryAB { - def "int_riscv_" # NAME : RISCVUnaryABNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVUnaryABMask; + def "int_riscv_" # NAME : RISCVUnaryABUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVUnaryABMasked; } // AAX means the destination type(A) is the same as the first source // type(A). X means any type for the second source operand. multiclass RISCVBinaryAAX { - def "int_riscv_" # NAME : RISCVBinaryAAXNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAXMask; + def "int_riscv_" # NAME : RISCVBinaryAAXUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAXMasked; } // Like RISCVBinaryAAX, but the second operand is used a shift amount so it // must be a vector or an XLen scalar. multiclass RISCVBinaryAAShift { - def "int_riscv_" # NAME : RISCVBinaryAAShiftNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAShiftMask; + def "int_riscv_" # NAME : RISCVBinaryAAShiftUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAShiftMasked; } multiclass RISCVRGatherVV { - def "int_riscv_" # NAME : RISCVRGatherVVNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVRGatherVVMask; + def "int_riscv_" # NAME : RISCVRGatherVVUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVRGatherVVMasked; } multiclass RISCVRGatherVX { - def "int_riscv_" # NAME : RISCVGatherVXNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVGatherVXMask; + def "int_riscv_" # NAME : RISCVGatherVXUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVGatherVXMasked; } multiclass RISCVRGatherEI16VV { - def "int_riscv_" # NAME : RISCVRGatherEI16VVNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVRGatherEI16VVMask; + def "int_riscv_" # NAME : RISCVRGatherEI16VVUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVRGatherEI16VVMasked; } // ABX means the destination type(A) is different from the first source // type(B). X means any type for the second source operand. multiclass RISCVBinaryABX { - def "int_riscv_" # NAME : RISCVBinaryABXNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVBinaryABXMask; + def "int_riscv_" # NAME : RISCVBinaryABXUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVBinaryABXMasked; } // Like RISCVBinaryABX, but the second operand is used a shift amount so it // must be a vector or an XLen scalar. multiclass RISCVBinaryABShift { - def "int_riscv_" # NAME : RISCVBinaryABShiftNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVBinaryABShiftMask; + def "int_riscv_" # NAME : RISCVBinaryABShiftUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVBinaryABShiftMasked; } multiclass RISCVBinaryWithV0 { def "int_riscv_" # NAME : RISCVBinaryWithV0; @@ -1062,80 +1099,80 @@ let TargetPrefix = "riscv" in { def "int_riscv_" # NAME : RISCVBinaryMOut; } multiclass RISCVSaturatingBinaryAAX { - def "int_riscv_" # NAME : RISCVSaturatingBinaryAAXNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAXMask; + def "int_riscv_" # NAME : RISCVSaturatingBinaryAAXUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAXMasked; } multiclass RISCVSaturatingBinaryAAShift { - def "int_riscv_" # NAME : RISCVSaturatingBinaryAAShiftNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAShiftMask; + def "int_riscv_" # NAME : RISCVSaturatingBinaryAAShiftUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAShiftMasked; } multiclass RISCVSaturatingBinaryABShift { - def "int_riscv_" # NAME : RISCVSaturatingBinaryABShiftNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryABShiftMask; + def "int_riscv_" # NAME : RISCVSaturatingBinaryABShiftUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryABShiftMasked; } - multiclass RISCVTernaryAAAX { - def "int_riscv_" # NAME : RISCVTernaryAAAXNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAAXMask; + multiclass RVVSlide { + def "int_riscv_" # NAME : RVVSlideUnMasked; + def "int_riscv_" # NAME # "_mask" : RVVSlideMasked; } multiclass RISCVTernaryAAXA { - def "int_riscv_" # NAME : RISCVTernaryAAXANoMask; - def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAXAMask; + def "int_riscv_" # NAME : RISCVTernaryAAXAUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAXAMasked; } multiclass RISCVCompare { - def "int_riscv_" # NAME : RISCVCompareNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVCompareMask; + def "int_riscv_" # NAME : RISCVCompareUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVCompareMasked; } multiclass RISCVClassify { - def "int_riscv_" # NAME : RISCVClassifyNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVClassifyMask; + def "int_riscv_" # NAME : RISCVClassifyUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVClassifyMasked; } multiclass RISCVTernaryWide { - def "int_riscv_" # NAME : RISCVTernaryWideNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVTernaryWideMask; + def "int_riscv_" # NAME : RISCVTernaryWideUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVTernaryWideMasked; } multiclass RISCVReduction { - def "int_riscv_" # NAME : RISCVReductionNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVReductionMask; + def "int_riscv_" # NAME : RISCVReductionUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVReductionMasked; } - multiclass RISCVMaskUnarySOut { - def "int_riscv_" # NAME : RISCVMaskUnarySOutNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVMaskUnarySOutMask; + multiclass RISCVMaskedUnarySOut { + def "int_riscv_" # NAME : RISCVMaskedUnarySOutUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVMaskedUnarySOutMasked; } - multiclass RISCVMaskUnaryMOut { - def "int_riscv_" # NAME : RISCVUnaryNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVMaskUnaryMOutMask; + multiclass RISCVMaskedUnaryMOut { + def "int_riscv_" # NAME : RISCVUnaryUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVMaskedUnaryMOutMasked; } multiclass RISCVConversion { - def "int_riscv_" #NAME :RISCVConversionNoMask; - def "int_riscv_" # NAME # "_mask" : RISCVConversionMask; + def "int_riscv_" #NAME :RISCVConversionUnMasked; + def "int_riscv_" # NAME # "_mask" : RISCVConversionMasked; } multiclass RISCVUSSegLoad { def "int_riscv_" # NAME : RISCVUSSegLoad; - def "int_riscv_" # NAME # "_mask" : RISCVUSSegLoadMask; + def "int_riscv_" # NAME # "_mask" : RISCVUSSegLoadMasked; } multiclass RISCVUSSegLoadFF { def "int_riscv_" # NAME : RISCVUSSegLoadFF; - def "int_riscv_" # NAME # "_mask" : RISCVUSSegLoadFFMask; + def "int_riscv_" # NAME # "_mask" : RISCVUSSegLoadFFMasked; } multiclass RISCVSSegLoad { def "int_riscv_" # NAME : RISCVSSegLoad; - def "int_riscv_" # NAME # "_mask" : RISCVSSegLoadMask; + def "int_riscv_" # NAME # "_mask" : RISCVSSegLoadMasked; } multiclass RISCVISegLoad { def "int_riscv_" # NAME : RISCVISegLoad; - def "int_riscv_" # NAME # "_mask" : RISCVISegLoadMask; + def "int_riscv_" # NAME # "_mask" : RISCVISegLoadMasked; } multiclass RISCVUSSegStore { def "int_riscv_" # NAME : RISCVUSSegStore; - def "int_riscv_" # NAME # "_mask" : RISCVUSSegStoreMask; + def "int_riscv_" # NAME # "_mask" : RISCVUSSegStoreMasked; } multiclass RISCVSSegStore { def "int_riscv_" # NAME : RISCVSSegStore; - def "int_riscv_" # NAME # "_mask" : RISCVSSegStoreMask; + def "int_riscv_" # NAME # "_mask" : RISCVSSegStoreMasked; } multiclass RISCVISegStore { def "int_riscv_" # NAME : RISCVISegStore; - def "int_riscv_" # NAME # "_mask" : RISCVISegStoreMask; + def "int_riscv_" # NAME # "_mask" : RISCVISegStoreMasked; } defm vle : RISCVUSLoad; @@ -1242,20 +1279,29 @@ let TargetPrefix = "riscv" in { defm vmerge : RISCVBinaryWithV0; + // Output: (vector) + // Input: (passthru, vector_in, vl) def int_riscv_vmv_v_v : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 1; + let VLOperand = 2; } + // Output: (vector) + // Input: (passthru, scalar, vl) def int_riscv_vmv_v_x : Intrinsic<[llvm_anyint_ty], - [LLVMVectorElementType<0>, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMVectorElementType<0>, + llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 1; + let VLOperand = 2; } + // Output: (vector) + // Input: (passthru, scalar, vl) def int_riscv_vfmv_v_f : Intrinsic<[llvm_anyfloat_ty], - [LLVMVectorElementType<0>, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMVectorElementType<0>, + llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 1; + let VLOperand = 2; } def int_riscv_vmv_x_s : Intrinsic<[LLVMVectorElementType<0>], @@ -1313,8 +1359,8 @@ let TargetPrefix = "riscv" in { defm vfmerge : RISCVBinaryWithV0; - defm vslideup : RISCVTernaryAAAX; - defm vslidedown : RISCVTernaryAAAX; + defm vslideup : RVVSlide; + defm vslidedown : RVVSlide; defm vslide1up : RISCVBinaryAAX; defm vslide1down : RISCVBinaryAAX; @@ -1325,7 +1371,7 @@ let TargetPrefix = "riscv" in { defm vrgather_vx : RISCVRGatherVX; defm vrgatherei16_vv : RISCVRGatherEI16VV; - def "int_riscv_vcompress" : RISCVUnaryAAMaskNoTA; + def "int_riscv_vcompress" : RISCVCompress; defm vaaddu : RISCVSaturatingBinaryAAX; defm vaadd : RISCVSaturatingBinaryAAX; @@ -1367,22 +1413,22 @@ let TargetPrefix = "riscv" in { defm vfwredusum : RISCVReduction; defm vfwredosum : RISCVReduction; - def int_riscv_vmand: RISCVBinaryAAANoMask; - def int_riscv_vmnand: RISCVBinaryAAANoMask; - def int_riscv_vmandn: RISCVBinaryAAANoMask; - def int_riscv_vmxor: RISCVBinaryAAANoMask; - def int_riscv_vmor: RISCVBinaryAAANoMask; - def int_riscv_vmnor: RISCVBinaryAAANoMask; - def int_riscv_vmorn: RISCVBinaryAAANoMask; - def int_riscv_vmxnor: RISCVBinaryAAANoMask; + def int_riscv_vmand: RISCVBinaryAAAUnMasked; + def int_riscv_vmnand: RISCVBinaryAAAUnMasked; + def int_riscv_vmandn: RISCVBinaryAAAUnMasked; + def int_riscv_vmxor: RISCVBinaryAAAUnMasked; + def int_riscv_vmor: RISCVBinaryAAAUnMasked; + def int_riscv_vmnor: RISCVBinaryAAAUnMasked; + def int_riscv_vmorn: RISCVBinaryAAAUnMasked; + def int_riscv_vmxnor: RISCVBinaryAAAUnMasked; def int_riscv_vmclr : RISCVNullaryIntrinsic; def int_riscv_vmset : RISCVNullaryIntrinsic; - defm vcpop : RISCVMaskUnarySOut; - defm vfirst : RISCVMaskUnarySOut; - defm vmsbf : RISCVMaskUnaryMOut; - defm vmsof : RISCVMaskUnaryMOut; - defm vmsif : RISCVMaskUnaryMOut; + defm vcpop : RISCVMaskedUnarySOut; + defm vfirst : RISCVMaskedUnarySOut; + defm vmsbf : RISCVMaskedUnaryMOut; + defm vmsof : RISCVMaskedUnaryMOut; + defm vmsif : RISCVMaskedUnaryMOut; defm vfcvt_xu_f_v : RISCVConversion; defm vfcvt_x_f_v : RISCVConversion; @@ -1409,34 +1455,35 @@ let TargetPrefix = "riscv" in { defm vfncvt_rod_f_f_w : RISCVConversion; // Output: (vector) - // Input: (mask type input, vl) + // Input: (passthru, mask type input, vl) def int_riscv_viota : Intrinsic<[llvm_anyvector_ty], - [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + [LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 1; + let VLOperand = 2; } // Output: (vector) - // Input: (maskedoff, mask type vector_in, mask, vl) + // Input: (maskedoff, mask type vector_in, mask, vl, policy) def int_riscv_viota_mask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyint_ty], - [IntrNoMem]>, RISCVVIntrinsic { + llvm_anyint_ty, LLVMMatchType<1>], + [ImmArg>, IntrNoMem]>, RISCVVIntrinsic { let VLOperand = 3; } // Output: (vector) - // Input: (vl) - def int_riscv_vid : RISCVNullaryIntrinsic; + // Input: (passthru, vl) + def int_riscv_vid : RISCVID; // Output: (vector) - // Input: (maskedoff, mask, vl) + // Input: (maskedoff, mask, vl, policy) def int_riscv_vid_mask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyint_ty], - [IntrNoMem]>, RISCVVIntrinsic { + llvm_anyint_ty, LLVMMatchType<1>], + [ImmArg>, IntrNoMem]>, RISCVVIntrinsic { let VLOperand = 2; } @@ -1463,6 +1510,16 @@ let TargetPrefix = "riscv" in { [llvm_anyvector_ty, llvm_anyptr_ty, llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [NoCapture>, IntrWriteMem]>; + + // Segment loads for fixed vectors. + foreach nf = [2, 3, 4, 5, 6, 7, 8] in { + def int_riscv_seg # nf # _load + : Intrinsic, + !add(nf, -1))), + [llvm_anyptr_ty, llvm_anyint_ty], + [NoCapture>, IntrReadMem]>; + } + } // TargetPrefix = "riscv" //===----------------------------------------------------------------------===// @@ -1503,7 +1560,7 @@ class ScalarCryptoByteSelectAny : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i8_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn, - ImmArg>, Returned>]>; + ImmArg>]>; // Zknd def int_riscv_aes32dsi : ScalarCryptoByteSelect32; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td new file mode 100644 index 000000000000..14c628595d30 --- /dev/null +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -0,0 +1,31 @@ +//===- IntrinsicsSPIRV.td - Defines SPIRV intrinsics -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the SPIRV-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "spv" in { + def int_spv_assign_type : Intrinsic<[], [llvm_any_ty, llvm_metadata_ty]>; + def int_spv_assign_name : Intrinsic<[], [llvm_any_ty, llvm_vararg_ty]>; + + def int_spv_track_constant : Intrinsic<[llvm_any_ty], [llvm_any_ty, llvm_metadata_ty]>; + def int_spv_init_global : Intrinsic<[], [llvm_any_ty, llvm_any_ty]>; + def int_spv_unref_global : Intrinsic<[], [llvm_any_ty]>; + + def int_spv_gep : Intrinsic<[llvm_anyptr_ty], [llvm_i1_ty, llvm_any_ty, llvm_vararg_ty], [ImmArg>]>; + def int_spv_load : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i16_ty, llvm_i8_ty], [ImmArg>, ImmArg>]>; + def int_spv_store : Intrinsic<[], [llvm_i32_ty, llvm_anyptr_ty, llvm_i16_ty, llvm_i8_ty], [ImmArg>, ImmArg>]>; + def int_spv_extractv : Intrinsic<[llvm_any_ty], [llvm_i32_ty, llvm_vararg_ty]>; + def int_spv_insertv : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_any_ty, llvm_vararg_ty]>; + def int_spv_extractelt : Intrinsic<[llvm_any_ty], [llvm_any_ty, llvm_anyint_ty]>; + def int_spv_insertelt : Intrinsic<[llvm_any_ty], [llvm_any_ty, llvm_any_ty, llvm_anyint_ty]>; + def int_spv_const_composite : Intrinsic<[llvm_i32_ty], [llvm_vararg_ty]>; + def int_spv_bitcast : Intrinsic<[llvm_any_ty], [llvm_any_ty]>; + def int_spv_switch : Intrinsic<[], [llvm_any_ty, llvm_vararg_ty]>; +} diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td index a149b571072c..d881a1126bf2 100644 --- a/llvm/include/llvm/IR/IntrinsicsSystemZ.td +++ b/llvm/include/llvm/IR/IntrinsicsSystemZ.td @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// class SystemZUnaryConv - : GCCBuiltin<"__builtin_s390_" # name>, + : ClangBuiltin<"__builtin_s390_" # name>, Intrinsic<[result], [arg], [IntrNoMem]>; class SystemZUnary @@ -24,14 +24,14 @@ class SystemZUnaryCC : SystemZUnaryConvCC; class SystemZBinaryConv - : GCCBuiltin<"__builtin_s390_" # name>, + : ClangBuiltin<"__builtin_s390_" # name>, Intrinsic<[result], [arg, arg], [IntrNoMem]>; class SystemZBinary : SystemZBinaryConv; class SystemZBinaryInt - : GCCBuiltin<"__builtin_s390_" # name>, + : ClangBuiltin<"__builtin_s390_" # name>, Intrinsic<[type], [type, llvm_i32_ty], [IntrNoMem]>; class SystemZBinaryConvCC @@ -45,7 +45,7 @@ class SystemZBinaryCC : SystemZBinaryConvCC; class SystemZTernaryConv - : GCCBuiltin<"__builtin_s390_" # name>, + : ClangBuiltin<"__builtin_s390_" # name>, Intrinsic<[result], [arg, arg, result], [IntrNoMem]>; class SystemZTernaryConvCC @@ -55,7 +55,7 @@ class SystemZTernary : SystemZTernaryConv; class SystemZTernaryInt - : GCCBuiltin<"__builtin_s390_" # name>, + : ClangBuiltin<"__builtin_s390_" # name>, Intrinsic<[type], [type, type, llvm_i32_ty], [IntrNoMem, ImmArg>]>; class SystemZTernaryIntCC @@ -63,7 +63,7 @@ class SystemZTernaryIntCC [IntrNoMem, ImmArg>]>; class SystemZQuaternaryInt - : GCCBuiltin<"__builtin_s390_" # name>, + : ClangBuiltin<"__builtin_s390_" # name>, Intrinsic<[type], [type, type, type, llvm_i32_ty], [IntrNoMem, ImmArg>]>; @@ -216,16 +216,16 @@ let TargetPrefix = "s390" in { def int_s390_tabort : Intrinsic<[], [llvm_i64_ty], [IntrNoReturn, Throws, IntrWriteMem]>; - def int_s390_tend : GCCBuiltin<"__builtin_tend">, + def int_s390_tend : ClangBuiltin<"__builtin_tend">, Intrinsic<[llvm_i32_ty], []>; - def int_s390_etnd : GCCBuiltin<"__builtin_tx_nesting_depth">, + def int_s390_etnd : ClangBuiltin<"__builtin_tx_nesting_depth">, Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; def int_s390_ntstg : Intrinsic<[], [llvm_i64_ty, llvm_ptr64_ty], [IntrArgMemOnly, IntrWriteMem]>; - def int_s390_ppa_txassist : GCCBuiltin<"__builtin_tx_assist">, + def int_s390_ppa_txassist : ClangBuiltin<"__builtin_tx_assist">, Intrinsic<[], [llvm_i32_ty]>; } @@ -236,24 +236,24 @@ let TargetPrefix = "s390" in { //===----------------------------------------------------------------------===// let TargetPrefix = "s390" in { - def int_s390_lcbb : GCCBuiltin<"__builtin_s390_lcbb">, + def int_s390_lcbb : ClangBuiltin<"__builtin_s390_lcbb">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_s390_vlbb : GCCBuiltin<"__builtin_s390_vlbb">, + def int_s390_vlbb : ClangBuiltin<"__builtin_s390_vlbb">, Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly, ImmArg>]>; - def int_s390_vll : GCCBuiltin<"__builtin_s390_vll">, + def int_s390_vll : ClangBuiltin<"__builtin_s390_vll">, Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_s390_vpdi : GCCBuiltin<"__builtin_s390_vpdi">, + def int_s390_vpdi : ClangBuiltin<"__builtin_s390_vpdi">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_s390_vperm : GCCBuiltin<"__builtin_s390_vperm">, + def int_s390_vperm : ClangBuiltin<"__builtin_s390_vperm">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -264,7 +264,7 @@ let TargetPrefix = "s390" in { defm int_s390_vpkls : SystemZBinaryTruncHFG<"vpkls">; defm int_s390_vpkls : SystemZBinaryTruncCCHFG; - def int_s390_vstl : GCCBuiltin<"__builtin_s390_vstl">, + def int_s390_vstl : ClangBuiltin<"__builtin_s390_vstl">, Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty], [IntrArgMemOnly, IntrWriteMem]>; @@ -314,7 +314,7 @@ let TargetPrefix = "s390" in { def int_s390_vsrl : SystemZBinary<"vsrl", llvm_v16i8_ty>; def int_s390_vsrlb : SystemZBinary<"vsrlb", llvm_v16i8_ty>; - def int_s390_vsldb : GCCBuiltin<"__builtin_s390_vsldb">, + def int_s390_vsldb : ClangBuiltin<"__builtin_s390_vsldb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; @@ -382,7 +382,7 @@ let TargetPrefix = "s390" in { def int_s390_vbperm : SystemZBinaryConv<"vbperm", llvm_v2i64_ty, llvm_v16i8_ty>; - def int_s390_vmslg : GCCBuiltin<"__builtin_s390_vmslg">, + def int_s390_vmslg : ClangBuiltin<"__builtin_s390_vmslg">, Intrinsic<[llvm_v16i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; @@ -411,21 +411,21 @@ let TargetPrefix = "s390" in { [IntrNoMem, ImmArg>, ImmArg>]>; // Instructions from the Vector Packed Decimal Facility - def int_s390_vlrl : GCCBuiltin<"__builtin_s390_vlrl">, + def int_s390_vlrl : ClangBuiltin<"__builtin_s390_vlrl">, Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_s390_vstrl : GCCBuiltin<"__builtin_s390_vstrl">, + def int_s390_vstrl : ClangBuiltin<"__builtin_s390_vstrl">, Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty], [IntrArgMemOnly, IntrWriteMem]>; // Instructions from the Vector Enhancements Facility 2 - def int_s390_vsld : GCCBuiltin<"__builtin_s390_vsld">, + def int_s390_vsld : ClangBuiltin<"__builtin_s390_vsld">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_s390_vsrd : GCCBuiltin<"__builtin_s390_vsrd">, + def int_s390_vsrd : ClangBuiltin<"__builtin_s390_vsrd">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; @@ -438,23 +438,23 @@ let TargetPrefix = "s390" in { def int_s390_vstrszf : SystemZTernaryConvCC; // Instructions from the NNP-assist Facility - def int_s390_vclfnhs : GCCBuiltin<"__builtin_s390_vclfnhs">, + def int_s390_vclfnhs : ClangBuiltin<"__builtin_s390_vclfnhs">, Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_s390_vclfnls : GCCBuiltin<"__builtin_s390_vclfnls">, + def int_s390_vclfnls : ClangBuiltin<"__builtin_s390_vclfnls">, Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_s390_vcrnfs : GCCBuiltin<"__builtin_s390_vcrnfs">, + def int_s390_vcrnfs : ClangBuiltin<"__builtin_s390_vcrnfs">, Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_s390_vcfn : GCCBuiltin<"__builtin_s390_vcfn">, + def int_s390_vcfn : ClangBuiltin<"__builtin_s390_vcfn">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_s390_vcnf : GCCBuiltin<"__builtin_s390_vcnf">, + def int_s390_vcnf : ClangBuiltin<"__builtin_s390_vcnf">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; @@ -467,9 +467,9 @@ let TargetPrefix = "s390" in { //===----------------------------------------------------------------------===// let TargetPrefix = "s390" in { - def int_s390_sfpc : GCCBuiltin<"__builtin_s390_sfpc">, + def int_s390_sfpc : ClangBuiltin<"__builtin_s390_sfpc">, Intrinsic<[], [llvm_i32_ty], []>; - def int_s390_efpc : GCCBuiltin<"__builtin_s390_efpc">, + def int_s390_efpc : ClangBuiltin<"__builtin_s390_efpc">, Intrinsic<[llvm_i32_ty], [], []>; def int_s390_tdc : Intrinsic<[llvm_i32_ty], [llvm_anyfloat_ty, llvm_i64_ty], diff --git a/llvm/include/llvm/IR/IntrinsicsVE.td b/llvm/include/llvm/IR/IntrinsicsVE.td index be4bccef0cc1..15b828b320ea 100644 --- a/llvm/include/llvm/IR/IntrinsicsVE.td +++ b/llvm/include/llvm/IR/IntrinsicsVE.td @@ -2,31 +2,28 @@ // VEL Intrinsic instructions. let TargetPrefix = "ve" in { - def int_ve_vl_svob : GCCBuiltin<"__builtin_ve_vl_svob">, - Intrinsic<[], [], [IntrHasSideEffects]>; - - def int_ve_vl_pack_f32p : GCCBuiltin<"__builtin_ve_vl_pack_f32p">, + def int_ve_vl_pack_f32p : ClangBuiltin<"__builtin_ve_vl_pack_f32p">, Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty], [IntrReadMem]>; - def int_ve_vl_pack_f32a : GCCBuiltin<"__builtin_ve_vl_pack_f32a">, + def int_ve_vl_pack_f32a : ClangBuiltin<"__builtin_ve_vl_pack_f32a">, Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_ve_vl_extract_vm512u : - GCCBuiltin<"__builtin_ve_vl_extract_vm512u">, + ClangBuiltin<"__builtin_ve_vl_extract_vm512u">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; def int_ve_vl_extract_vm512l : - GCCBuiltin<"__builtin_ve_vl_extract_vm512l">, + ClangBuiltin<"__builtin_ve_vl_extract_vm512l">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; def int_ve_vl_insert_vm512u : - GCCBuiltin<"__builtin_ve_vl_insert_vm512u">, + ClangBuiltin<"__builtin_ve_vl_insert_vm512u">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; def int_ve_vl_insert_vm512l : - GCCBuiltin<"__builtin_ve_vl_insert_vm512l">, + ClangBuiltin<"__builtin_ve_vl_insert_vm512l">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td index 67cbd307903d..554dd8557200 100644 --- a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td +++ b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td @@ -1,1213 +1,1257 @@ -let TargetPrefix = "ve" in def int_ve_vl_vld_vssl : GCCBuiltin<"__builtin_ve_vl_vld_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vld_vssvl : GCCBuiltin<"__builtin_ve_vl_vld_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldu_vssl : GCCBuiltin<"__builtin_ve_vl_vldu_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldu_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssl : GCCBuiltin<"__builtin_ve_vl_vldunc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldunc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssl : GCCBuiltin<"__builtin_ve_vl_vldlsx_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlsx_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldlsxnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlsxnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssl : GCCBuiltin<"__builtin_ve_vl_vldlzx_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlzx_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldlzxnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlzxnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssl : GCCBuiltin<"__builtin_ve_vl_vld2d_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssvl : GCCBuiltin<"__builtin_ve_vl_vld2d_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vld2dnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vld2dnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssl : GCCBuiltin<"__builtin_ve_vl_vldu2d_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu2d_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldu2dnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu2dnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dsx_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dsx_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dzx_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dzx_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vst_vssl : GCCBuiltin<"__builtin_ve_vl_vst_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vst_vssml : GCCBuiltin<"__builtin_ve_vl_vst_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstot_vssl : GCCBuiltin<"__builtin_ve_vl_vstot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstot_vssml : GCCBuiltin<"__builtin_ve_vl_vstot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstu_vssl : GCCBuiltin<"__builtin_ve_vl_vstu_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstu_vssml : GCCBuiltin<"__builtin_ve_vl_vstu_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstunc_vssl : GCCBuiltin<"__builtin_ve_vl_vstunc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstunc_vssml : GCCBuiltin<"__builtin_ve_vl_vstunc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstuot_vssl : GCCBuiltin<"__builtin_ve_vl_vstuot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstuot_vssml : GCCBuiltin<"__builtin_ve_vl_vstuot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstuncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstuncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstuncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstuncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstl_vssl : GCCBuiltin<"__builtin_ve_vl_vstl_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstl_vssml : GCCBuiltin<"__builtin_ve_vl_vstl_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstlnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstlnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstlnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstlnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstlot_vssl : GCCBuiltin<"__builtin_ve_vl_vstlot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstlot_vssml : GCCBuiltin<"__builtin_ve_vl_vstlot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstlncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstlncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstlncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstlncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vst2d_vssl : GCCBuiltin<"__builtin_ve_vl_vst2d_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vst2d_vssml : GCCBuiltin<"__builtin_ve_vl_vst2d_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vst2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vst2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vst2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vst2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vst2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vst2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstu2d_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2d_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstu2d_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2d_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstu2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstu2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstu2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstu2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstu2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstu2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstl2d_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2d_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstl2d_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2d_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstl2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstl2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstl2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstl2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstl2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vstl2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pfchv_ssl : GCCBuiltin<"__builtin_ve_vl_pfchv_ssl">, Intrinsic<[], [LLVMType, llvm_ptr_ty, LLVMType], [IntrInaccessibleMemOrArgMemOnly]>; -let TargetPrefix = "ve" in def int_ve_vl_pfchvnc_ssl : GCCBuiltin<"__builtin_ve_vl_pfchvnc_ssl">, Intrinsic<[], [LLVMType, llvm_ptr_ty, LLVMType], [IntrInaccessibleMemOrArgMemOnly]>; -let TargetPrefix = "ve" in def int_ve_vl_lsv_vvss : GCCBuiltin<"__builtin_ve_vl_lsv_vvss">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_lvsl_svs : GCCBuiltin<"__builtin_ve_vl_lvsl_svs">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_lvsd_svs : GCCBuiltin<"__builtin_ve_vl_lvsd_svs">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_lvss_svs : GCCBuiltin<"__builtin_ve_vl_lvss_svs">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_lvm_mmss : GCCBuiltin<"__builtin_ve_vl_lvm_mmss">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_lvm_MMss : GCCBuiltin<"__builtin_ve_vl_lvm_MMss">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_svm_sms : GCCBuiltin<"__builtin_ve_vl_svm_sms">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_svm_sMs : GCCBuiltin<"__builtin_ve_vl_svm_sMs">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsl : GCCBuiltin<"__builtin_ve_vl_vbrdd_vsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrdd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrdd_vsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsl : GCCBuiltin<"__builtin_ve_vl_vbrdl_vsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrdl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrdl_vsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsl : GCCBuiltin<"__builtin_ve_vl_vbrds_vsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrds_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrds_vsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsl : GCCBuiltin<"__builtin_ve_vl_vbrdw_vsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrdw_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrdw_vsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsl : GCCBuiltin<"__builtin_ve_vl_pvbrd_vsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsvl : GCCBuiltin<"__builtin_ve_vl_pvbrd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsMvl : GCCBuiltin<"__builtin_ve_vl_pvbrd_vsMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvl : GCCBuiltin<"__builtin_ve_vl_vmv_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmv_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmv_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vand_vvvl : GCCBuiltin<"__builtin_ve_vl_vand_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vand_vvvvl : GCCBuiltin<"__builtin_ve_vl_vand_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vand_vsvl : GCCBuiltin<"__builtin_ve_vl_vand_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vand_vsvvl : GCCBuiltin<"__builtin_ve_vl_vand_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vand_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vand_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vand_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vand_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvl : GCCBuiltin<"__builtin_ve_vl_pvand_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvand_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvl : GCCBuiltin<"__builtin_ve_vl_pvand_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvand_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvand_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvand_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vor_vvvl : GCCBuiltin<"__builtin_ve_vl_vor_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vor_vvvvl : GCCBuiltin<"__builtin_ve_vl_vor_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vor_vsvl : GCCBuiltin<"__builtin_ve_vl_vor_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vor_vsvvl : GCCBuiltin<"__builtin_ve_vl_vor_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vor_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vor_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vor_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vor_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvl : GCCBuiltin<"__builtin_ve_vl_pvor_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvor_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvl : GCCBuiltin<"__builtin_ve_vl_pvor_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvor_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvor_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvor_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvl : GCCBuiltin<"__builtin_ve_vl_vxor_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvvl : GCCBuiltin<"__builtin_ve_vl_vxor_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvl : GCCBuiltin<"__builtin_ve_vl_vxor_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvvl : GCCBuiltin<"__builtin_ve_vl_vxor_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vxor_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vxor_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvl : GCCBuiltin<"__builtin_ve_vl_veqv_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvvl : GCCBuiltin<"__builtin_ve_vl_veqv_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvl : GCCBuiltin<"__builtin_ve_vl_veqv_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvvl : GCCBuiltin<"__builtin_ve_vl_veqv_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvmvl : GCCBuiltin<"__builtin_ve_vl_veqv_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvmvl : GCCBuiltin<"__builtin_ve_vl_veqv_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vseq_vl : GCCBuiltin<"__builtin_ve_vl_vseq_vl">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vseq_vvl : GCCBuiltin<"__builtin_ve_vl_vseq_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvseqlo_vl : GCCBuiltin<"__builtin_ve_vl_pvseqlo_vl">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvseqlo_vvl : GCCBuiltin<"__builtin_ve_vl_pvseqlo_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsequp_vl : GCCBuiltin<"__builtin_ve_vl_pvsequp_vl">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsequp_vvl : GCCBuiltin<"__builtin_ve_vl_pvsequp_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvseq_vl : GCCBuiltin<"__builtin_ve_vl_pvseq_vl">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvseq_vvl : GCCBuiltin<"__builtin_ve_vl_pvseq_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsl : GCCBuiltin<"__builtin_ve_vl_vsll_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvsMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvsMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvsMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsl : GCCBuiltin<"__builtin_ve_vl_vslal_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvsMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsl : GCCBuiltin<"__builtin_ve_vl_vsral_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssvl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsqrtd_vvl : GCCBuiltin<"__builtin_ve_vl_vfsqrtd_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsqrtd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsqrtd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsqrts_vvl : GCCBuiltin<"__builtin_ve_vl_vfsqrts_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsqrts_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsqrts_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vsvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vsvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vsvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrcpd_vvl : GCCBuiltin<"__builtin_ve_vl_vrcpd_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrcpd_vvvl : GCCBuiltin<"__builtin_ve_vl_vrcpd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrcps_vvl : GCCBuiltin<"__builtin_ve_vl_vrcps_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrcps_vvvl : GCCBuiltin<"__builtin_ve_vl_vrcps_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvrcp_vvl : GCCBuiltin<"__builtin_ve_vl_pvrcp_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvrcp_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrcp_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrsqrtd_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtd_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrsqrtd_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrsqrts_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrts_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrsqrts_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrts_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvrsqrt_vvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrt_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvrsqrt_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrt_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrsqrtdnex_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtdnex_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrsqrtdnex_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtdnex_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrsqrtsnex_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtsnex_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrsqrtsnex_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtsnex_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvrsqrtnex_vvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrtnex_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvrsqrtnex_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrtnex_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvMvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvMvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtdw_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtdw_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtdw_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtdw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtsw_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtsw_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtsw_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtsw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcvtsw_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtsw_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvcvtsw_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtsw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtdl_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtdl_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtdl_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtdl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtds_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtds_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtds_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtds_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtsd_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtsd_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcvtsd_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtsd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmrg_vvvml : GCCBuiltin<"__builtin_ve_vl_vmrg_vvvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmrg_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmrg_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmrg_vsvml : GCCBuiltin<"__builtin_ve_vl_vmrg_vsvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmrg_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmrg_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vvvMl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vvvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vvvMvl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vsvMl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vsvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vsvMvl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vshf_vvvsl : GCCBuiltin<"__builtin_ve_vl_vshf_vvvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vshf_vvvsvl : GCCBuiltin<"__builtin_ve_vl_vshf_vvvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vcp_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcp_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vex_vvmvl : GCCBuiltin<"__builtin_ve_vl_vex_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklat_ml : GCCBuiltin<"__builtin_ve_vl_vfmklat_ml">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklaf_ml : GCCBuiltin<"__builtin_ve_vl_vfmklaf_ml">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkat_Ml : GCCBuiltin<"__builtin_ve_vl_pvfmkat_Ml">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkaf_Ml : GCCBuiltin<"__builtin_ve_vl_pvfmkaf_Ml">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklgt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklgt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkllt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkllt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkllt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkllt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklne_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklne_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkleq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkleq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkleq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkleq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklle_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklle_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklnum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklnum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklgtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklgtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklnenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklnenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkleqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkleqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkleqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkleqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklgenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmklgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklgenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkllenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkllenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkllenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkllenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwgt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwgt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwlt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwlt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwlt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwlt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwne_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwne_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkweq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkweq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkweq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkweq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwle_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwle_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwnum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwnum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwgtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwgtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwnenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwnenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkweqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkweqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkweqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkweqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwgenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwgenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwlenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwlenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkwlenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwlenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlone_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlone_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupne_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupne_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlone_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlone_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupne_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupne_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlole_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlole_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuple_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwuple_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlole_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlole_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuple_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwuple_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgt_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgt_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlt_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlt_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwne_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwne_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwne_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwne_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkweq_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkweq_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkweq_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkweq_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwge_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwge_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwge_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwge_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwle_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwle_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwle_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwle_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnum_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnum_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnum_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnum_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgtnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgtnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgtnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgtnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwltnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwltnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwltnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwltnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnenan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnenan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkweqnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkweqnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkweqnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkweqnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgenan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgenan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlenan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlenan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdgt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdgt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdlt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdlt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdlt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdlt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdne_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdne_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdeq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdeq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdeq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdeq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdle_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdle_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdnum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdnum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdgtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdgtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdnenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdnenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdeqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdeqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdgenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdgenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdlenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdlenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkdlenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdlenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksgt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksgt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkslt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkslt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkslt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkslt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksne_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksne_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkseq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkseq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkseq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkseq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksle_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksle_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksnum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksnum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksgtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksgtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksnenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksnenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkseqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkseqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkseqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkseqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksgenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmksgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksgenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkslenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkslenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfmkslenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkslenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslogt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupgt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslogt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupgt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslolt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksuplt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslolt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksuplt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslone_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslone_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupne_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupne_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslone_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslone_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupne_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupne_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloeq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupeq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloeq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupeq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksloge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksloge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslole_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslole_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksuple_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksuple_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslole_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslole_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksuple_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksuple_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslonum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupnum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslonum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupnum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslonan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslonan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslogtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupgtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslogtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupgtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksloltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksloltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslonenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupnenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslonenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupnenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloeqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupeqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloeqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupeqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslogenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupgenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslogenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupgenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslolenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksuplenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslolenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksuplenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksgt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksgt_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksgt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksgt_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslt_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkslt_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksne_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksne_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksne_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksne_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkseq_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkseq_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkseq_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkseq_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksge_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksge_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksge_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksge_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksle_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksle_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksle_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksle_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksnum_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksnum_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksnum_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksnum_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksgtnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksgtnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksgtnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksgtnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksltnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksltnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksltnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksltnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksnenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksnenan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksnenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksnenan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkseqnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkseqnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkseqnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkseqnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksgenan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksgenan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslenan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkslenan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvl : GCCBuiltin<"__builtin_ve_vl_vsumwsx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvml : GCCBuiltin<"__builtin_ve_vl_vsumwsx_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvl : GCCBuiltin<"__builtin_ve_vl_vsumwzx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvml : GCCBuiltin<"__builtin_ve_vl_vsumwzx_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvl : GCCBuiltin<"__builtin_ve_vl_vsuml_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvml : GCCBuiltin<"__builtin_ve_vl_vsuml_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvl : GCCBuiltin<"__builtin_ve_vl_vfsumd_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvml : GCCBuiltin<"__builtin_ve_vl_vfsumd_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvl : GCCBuiltin<"__builtin_ve_vl_vfsums_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvml : GCCBuiltin<"__builtin_ve_vl_vfsums_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstsx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstsx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstzx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstzx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrminswfstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstsx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrminswfstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrminswlstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstsx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrminswlstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrminswfstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstzx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrminswfstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrminswlstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstzx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrminswlstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrmaxslfst_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxslfst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrmaxslfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxslfst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrmaxsllst_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxsllst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrmaxsllst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxsllst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrminslfst_vvl : GCCBuiltin<"__builtin_ve_vl_vrminslfst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrminslfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminslfst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrminsllst_vvl : GCCBuiltin<"__builtin_ve_vl_vrminsllst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrminsllst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminsllst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdfst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdfst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdlst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdlst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdlst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdlst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrmaxsfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxsfst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrmaxsfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxsfst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrmaxslst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxslst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrmaxslst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxslst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrmindfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmindfst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrmindfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmindfst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrmindlst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmindlst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrmindlst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmindlst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrminsfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrminsfst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrminsfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrminsfst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrminslst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrminslst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vfrminslst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrminslst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrand_vvl : GCCBuiltin<"__builtin_ve_vl_vrand_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrand_vvml : GCCBuiltin<"__builtin_ve_vl_vrand_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vror_vvl : GCCBuiltin<"__builtin_ve_vl_vror_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vror_vvml : GCCBuiltin<"__builtin_ve_vl_vror_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvl : GCCBuiltin<"__builtin_ve_vl_vrxor_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvml : GCCBuiltin<"__builtin_ve_vl_vrxor_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssl : GCCBuiltin<"__builtin_ve_vl_vgt_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgt_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssml : GCCBuiltin<"__builtin_ve_vl_vgt_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgt_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssl : GCCBuiltin<"__builtin_ve_vl_vsc_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssml : GCCBuiltin<"__builtin_ve_vl_vsc_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vscnc_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vscnc_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscot_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscot_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscncot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscncot_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscncot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscncot_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscu_vvssl : GCCBuiltin<"__builtin_ve_vl_vscu_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscu_vvssml : GCCBuiltin<"__builtin_ve_vl_vscu_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscunc_vvssl : GCCBuiltin<"__builtin_ve_vl_vscunc_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscunc_vvssml : GCCBuiltin<"__builtin_ve_vl_vscunc_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscuot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscuot_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscuot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscuot_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscuncot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscuncot_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscuncot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscuncot_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscl_vvssl : GCCBuiltin<"__builtin_ve_vl_vscl_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vscl_vvssml : GCCBuiltin<"__builtin_ve_vl_vscl_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsclnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vsclnc_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsclnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vsclnc_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsclot_vvssl : GCCBuiltin<"__builtin_ve_vl_vsclot_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsclot_vvssml : GCCBuiltin<"__builtin_ve_vl_vsclot_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsclncot_vvssl : GCCBuiltin<"__builtin_ve_vl_vsclncot_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_vsclncot_vvssml : GCCBuiltin<"__builtin_ve_vl_vsclncot_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; -let TargetPrefix = "ve" in def int_ve_vl_andm_mmm : GCCBuiltin<"__builtin_ve_vl_andm_mmm">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_andm_MMM : GCCBuiltin<"__builtin_ve_vl_andm_MMM">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_orm_mmm : GCCBuiltin<"__builtin_ve_vl_orm_mmm">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_orm_MMM : GCCBuiltin<"__builtin_ve_vl_orm_MMM">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_xorm_mmm : GCCBuiltin<"__builtin_ve_vl_xorm_mmm">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_xorm_MMM : GCCBuiltin<"__builtin_ve_vl_xorm_MMM">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_eqvm_mmm : GCCBuiltin<"__builtin_ve_vl_eqvm_mmm">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_eqvm_MMM : GCCBuiltin<"__builtin_ve_vl_eqvm_MMM">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_nndm_mmm : GCCBuiltin<"__builtin_ve_vl_nndm_mmm">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_nndm_MMM : GCCBuiltin<"__builtin_ve_vl_nndm_MMM">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_negm_mm : GCCBuiltin<"__builtin_ve_vl_negm_mm">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_negm_MM : GCCBuiltin<"__builtin_ve_vl_negm_MM">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_pcvm_sml : GCCBuiltin<"__builtin_ve_vl_pcvm_sml">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_lzvm_sml : GCCBuiltin<"__builtin_ve_vl_lzvm_sml">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; -let TargetPrefix = "ve" in def int_ve_vl_tovm_sml : GCCBuiltin<"__builtin_ve_vl_tovm_sml">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld_vssl : ClangBuiltin<"__builtin_ve_vl_vld_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld_vssvl : ClangBuiltin<"__builtin_ve_vl_vld_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu_vssl : ClangBuiltin<"__builtin_ve_vl_vldu_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst_vssl : ClangBuiltin<"__builtin_ve_vl_vst_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst_vssml : ClangBuiltin<"__builtin_ve_vl_vst_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssl : ClangBuiltin<"__builtin_ve_vl_vstnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssml : ClangBuiltin<"__builtin_ve_vl_vstnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstot_vssl : ClangBuiltin<"__builtin_ve_vl_vstot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstot_vssml : ClangBuiltin<"__builtin_ve_vl_vstot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstncot_vssl : ClangBuiltin<"__builtin_ve_vl_vstncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstncot_vssml : ClangBuiltin<"__builtin_ve_vl_vstncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu_vssl : ClangBuiltin<"__builtin_ve_vl_vstu_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu_vssml : ClangBuiltin<"__builtin_ve_vl_vstu_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstunc_vssl : ClangBuiltin<"__builtin_ve_vl_vstunc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstunc_vssml : ClangBuiltin<"__builtin_ve_vl_vstunc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstuot_vssl : ClangBuiltin<"__builtin_ve_vl_vstuot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstuot_vssml : ClangBuiltin<"__builtin_ve_vl_vstuot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstuncot_vssl : ClangBuiltin<"__builtin_ve_vl_vstuncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstuncot_vssml : ClangBuiltin<"__builtin_ve_vl_vstuncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl_vssl : ClangBuiltin<"__builtin_ve_vl_vstl_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl_vssml : ClangBuiltin<"__builtin_ve_vl_vstl_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlnc_vssl : ClangBuiltin<"__builtin_ve_vl_vstlnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlnc_vssml : ClangBuiltin<"__builtin_ve_vl_vstlnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlot_vssl : ClangBuiltin<"__builtin_ve_vl_vstlot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlot_vssml : ClangBuiltin<"__builtin_ve_vl_vstlot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlncot_vssl : ClangBuiltin<"__builtin_ve_vl_vstlncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlncot_vssml : ClangBuiltin<"__builtin_ve_vl_vstlncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2d_vssl : ClangBuiltin<"__builtin_ve_vl_vst2d_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2d_vssml : ClangBuiltin<"__builtin_ve_vl_vst2d_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vst2dnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dnc_vssml : ClangBuiltin<"__builtin_ve_vl_vst2dnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dot_vssl : ClangBuiltin<"__builtin_ve_vl_vst2dot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dot_vssml : ClangBuiltin<"__builtin_ve_vl_vst2dot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dncot_vssl : ClangBuiltin<"__builtin_ve_vl_vst2dncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dncot_vssml : ClangBuiltin<"__builtin_ve_vl_vst2dncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2d_vssl : ClangBuiltin<"__builtin_ve_vl_vstu2d_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2d_vssml : ClangBuiltin<"__builtin_ve_vl_vstu2d_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vstu2dnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dnc_vssml : ClangBuiltin<"__builtin_ve_vl_vstu2dnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dot_vssl : ClangBuiltin<"__builtin_ve_vl_vstu2dot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dot_vssml : ClangBuiltin<"__builtin_ve_vl_vstu2dot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dncot_vssl : ClangBuiltin<"__builtin_ve_vl_vstu2dncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dncot_vssml : ClangBuiltin<"__builtin_ve_vl_vstu2dncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2d_vssl : ClangBuiltin<"__builtin_ve_vl_vstl2d_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2d_vssml : ClangBuiltin<"__builtin_ve_vl_vstl2d_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vstl2dnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dnc_vssml : ClangBuiltin<"__builtin_ve_vl_vstl2dnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dot_vssl : ClangBuiltin<"__builtin_ve_vl_vstl2dot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dot_vssml : ClangBuiltin<"__builtin_ve_vl_vstl2dot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dncot_vssl : ClangBuiltin<"__builtin_ve_vl_vstl2dncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dncot_vssml : ClangBuiltin<"__builtin_ve_vl_vstl2dncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pfchv_ssl : ClangBuiltin<"__builtin_ve_vl_pfchv_ssl">, Intrinsic<[], [LLVMType, llvm_ptr_ty, LLVMType], [IntrInaccessibleMemOrArgMemOnly]>; +let TargetPrefix = "ve" in def int_ve_vl_pfchvnc_ssl : ClangBuiltin<"__builtin_ve_vl_pfchvnc_ssl">, Intrinsic<[], [LLVMType, llvm_ptr_ty, LLVMType], [IntrInaccessibleMemOrArgMemOnly]>; +let TargetPrefix = "ve" in def int_ve_vl_lsv_vvss : ClangBuiltin<"__builtin_ve_vl_lsv_vvss">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lvsl_svs : ClangBuiltin<"__builtin_ve_vl_lvsl_svs">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lvsd_svs : ClangBuiltin<"__builtin_ve_vl_lvsd_svs">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lvss_svs : ClangBuiltin<"__builtin_ve_vl_lvss_svs">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lvm_mmss : ClangBuiltin<"__builtin_ve_vl_lvm_mmss">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lvm_MMss : ClangBuiltin<"__builtin_ve_vl_lvm_MMss">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_svm_sms : ClangBuiltin<"__builtin_ve_vl_svm_sms">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_svm_sMs : ClangBuiltin<"__builtin_ve_vl_svm_sMs">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsl : ClangBuiltin<"__builtin_ve_vl_vbrdd_vsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsvl : ClangBuiltin<"__builtin_ve_vl_vbrdd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsmvl : ClangBuiltin<"__builtin_ve_vl_vbrdd_vsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsl : ClangBuiltin<"__builtin_ve_vl_vbrdl_vsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsvl : ClangBuiltin<"__builtin_ve_vl_vbrdl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsmvl : ClangBuiltin<"__builtin_ve_vl_vbrdl_vsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsl : ClangBuiltin<"__builtin_ve_vl_vbrds_vsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsvl : ClangBuiltin<"__builtin_ve_vl_vbrds_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsmvl : ClangBuiltin<"__builtin_ve_vl_vbrds_vsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsl : ClangBuiltin<"__builtin_ve_vl_vbrdw_vsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsvl : ClangBuiltin<"__builtin_ve_vl_vbrdw_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsmvl : ClangBuiltin<"__builtin_ve_vl_vbrdw_vsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsl : ClangBuiltin<"__builtin_ve_vl_pvbrd_vsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsvl : ClangBuiltin<"__builtin_ve_vl_pvbrd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsMvl : ClangBuiltin<"__builtin_ve_vl_pvbrd_vsMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvl : ClangBuiltin<"__builtin_ve_vl_vmv_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmv_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmv_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvl : ClangBuiltin<"__builtin_ve_vl_vaddul_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvvl : ClangBuiltin<"__builtin_ve_vl_vaddul_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvl : ClangBuiltin<"__builtin_ve_vl_vaddul_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvvl : ClangBuiltin<"__builtin_ve_vl_vaddul_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vaddul_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vaddul_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvl : ClangBuiltin<"__builtin_ve_vl_vadduw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvvl : ClangBuiltin<"__builtin_ve_vl_vadduw_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvl : ClangBuiltin<"__builtin_ve_vl_vadduw_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvvl : ClangBuiltin<"__builtin_ve_vl_vadduw_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vadduw_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vadduw_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvl : ClangBuiltin<"__builtin_ve_vl_pvaddu_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvaddu_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvl : ClangBuiltin<"__builtin_ve_vl_pvaddu_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvaddu_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvaddu_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvaddu_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vaddswsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vaddswsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvl : ClangBuiltin<"__builtin_ve_vl_vaddswsx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vaddswsx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vaddswsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vaddswsx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vaddswzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vaddswzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvl : ClangBuiltin<"__builtin_ve_vl_vaddswzx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vaddswzx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vaddswzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vaddswzx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvl : ClangBuiltin<"__builtin_ve_vl_pvadds_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvadds_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvl : ClangBuiltin<"__builtin_ve_vl_pvadds_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvadds_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvadds_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvadds_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvl : ClangBuiltin<"__builtin_ve_vl_vaddsl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vaddsl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvl : ClangBuiltin<"__builtin_ve_vl_vaddsl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvvl : ClangBuiltin<"__builtin_ve_vl_vaddsl_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vaddsl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vaddsl_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvl : ClangBuiltin<"__builtin_ve_vl_vsubul_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsubul_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvl : ClangBuiltin<"__builtin_ve_vl_vsubul_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvvl : ClangBuiltin<"__builtin_ve_vl_vsubul_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsubul_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vsubul_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvl : ClangBuiltin<"__builtin_ve_vl_vsubuw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsubuw_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvl : ClangBuiltin<"__builtin_ve_vl_vsubuw_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvvl : ClangBuiltin<"__builtin_ve_vl_vsubuw_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsubuw_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vsubuw_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvl : ClangBuiltin<"__builtin_ve_vl_pvsubu_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvsubu_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvl : ClangBuiltin<"__builtin_ve_vl_pvsubu_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvsubu_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvsubu_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvsubu_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vsubswsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsubswsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvl : ClangBuiltin<"__builtin_ve_vl_vsubswsx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vsubswsx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsubswsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vsubswsx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vsubswzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsubswzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvl : ClangBuiltin<"__builtin_ve_vl_vsubswzx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vsubswzx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsubswzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vsubswzx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvl : ClangBuiltin<"__builtin_ve_vl_pvsubs_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvsubs_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvl : ClangBuiltin<"__builtin_ve_vl_pvsubs_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvsubs_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvsubs_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvsubs_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvl : ClangBuiltin<"__builtin_ve_vl_vsubsl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsubsl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvl : ClangBuiltin<"__builtin_ve_vl_vsubsl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvvl : ClangBuiltin<"__builtin_ve_vl_vsubsl_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsubsl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vsubsl_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvl : ClangBuiltin<"__builtin_ve_vl_vmulul_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmulul_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvl : ClangBuiltin<"__builtin_ve_vl_vmulul_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmulul_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmulul_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmulul_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvl : ClangBuiltin<"__builtin_ve_vl_vmuluw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmuluw_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvl : ClangBuiltin<"__builtin_ve_vl_vmuluw_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmuluw_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmuluw_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmuluw_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vmulswsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmulswsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvl : ClangBuiltin<"__builtin_ve_vl_vmulswsx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmulswsx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmulswsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmulswsx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vmulswzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmulswzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvl : ClangBuiltin<"__builtin_ve_vl_vmulswzx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmulswzx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmulswzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmulswzx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvl : ClangBuiltin<"__builtin_ve_vl_vmulsl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmulsl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvl : ClangBuiltin<"__builtin_ve_vl_vmulsl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmulsl_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmulsl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmulsl_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vvvl : ClangBuiltin<"__builtin_ve_vl_vmulslw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmulslw_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vsvl : ClangBuiltin<"__builtin_ve_vl_vmulslw_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmulslw_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsl : ClangBuiltin<"__builtin_ve_vl_vdivul_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvl : ClangBuiltin<"__builtin_ve_vl_vcmpul_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvvl : ClangBuiltin<"__builtin_ve_vl_vcmpul_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvl : ClangBuiltin<"__builtin_ve_vl_vcmpul_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvvl : ClangBuiltin<"__builtin_ve_vl_vcmpul_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpul_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpul_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvl : ClangBuiltin<"__builtin_ve_vl_vcmpuw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvvl : ClangBuiltin<"__builtin_ve_vl_vcmpuw_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvl : ClangBuiltin<"__builtin_ve_vl_vcmpuw_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvvl : ClangBuiltin<"__builtin_ve_vl_vcmpuw_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpuw_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpuw_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvl : ClangBuiltin<"__builtin_ve_vl_pvcmpu_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvcmpu_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvl : ClangBuiltin<"__builtin_ve_vl_pvcmpu_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvcmpu_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvcmpu_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvcmpu_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vcmpswsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vcmpswsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvl : ClangBuiltin<"__builtin_ve_vl_vcmpswsx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vcmpswsx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpswsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpswsx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vcmpswzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vcmpswzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvl : ClangBuiltin<"__builtin_ve_vl_vcmpswzx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vcmpswzx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpswzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpswzx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvl : ClangBuiltin<"__builtin_ve_vl_pvcmps_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvcmps_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvl : ClangBuiltin<"__builtin_ve_vl_pvcmps_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvcmps_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvcmps_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvcmps_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvl : ClangBuiltin<"__builtin_ve_vl_vcmpsl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vcmpsl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvl : ClangBuiltin<"__builtin_ve_vl_vcmpsl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvvl : ClangBuiltin<"__builtin_ve_vl_vcmpsl_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpsl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpsl_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vmaxswsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmaxswsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvl : ClangBuiltin<"__builtin_ve_vl_vmaxswsx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmaxswsx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmaxswsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmaxswsx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vmaxswzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmaxswzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvl : ClangBuiltin<"__builtin_ve_vl_vmaxswzx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmaxswzx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmaxswzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmaxswzx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvl : ClangBuiltin<"__builtin_ve_vl_pvmaxs_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvmaxs_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvl : ClangBuiltin<"__builtin_ve_vl_pvmaxs_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvmaxs_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvmaxs_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvmaxs_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vminswsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vminswsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvl : ClangBuiltin<"__builtin_ve_vl_vminswsx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vminswsx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vminswsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vminswsx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vminswzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vminswzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvl : ClangBuiltin<"__builtin_ve_vl_vminswzx_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vminswzx_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vminswzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vminswzx_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvl : ClangBuiltin<"__builtin_ve_vl_pvmins_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvmins_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvl : ClangBuiltin<"__builtin_ve_vl_pvmins_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvmins_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvmins_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvmins_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvl : ClangBuiltin<"__builtin_ve_vl_vmaxsl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmaxsl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvl : ClangBuiltin<"__builtin_ve_vl_vmaxsl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmaxsl_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmaxsl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmaxsl_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvl : ClangBuiltin<"__builtin_ve_vl_vminsl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vminsl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvl : ClangBuiltin<"__builtin_ve_vl_vminsl_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvvl : ClangBuiltin<"__builtin_ve_vl_vminsl_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vminsl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vminsl_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vand_vvvl : ClangBuiltin<"__builtin_ve_vl_vand_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vand_vvvvl : ClangBuiltin<"__builtin_ve_vl_vand_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vand_vsvl : ClangBuiltin<"__builtin_ve_vl_vand_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vand_vsvvl : ClangBuiltin<"__builtin_ve_vl_vand_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vand_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vand_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vand_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vand_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvl : ClangBuiltin<"__builtin_ve_vl_pvand_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvand_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvl : ClangBuiltin<"__builtin_ve_vl_pvand_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvand_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvand_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvand_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vor_vvvl : ClangBuiltin<"__builtin_ve_vl_vor_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vor_vvvvl : ClangBuiltin<"__builtin_ve_vl_vor_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vor_vsvl : ClangBuiltin<"__builtin_ve_vl_vor_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vor_vsvvl : ClangBuiltin<"__builtin_ve_vl_vor_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vor_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vor_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vor_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vor_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvl : ClangBuiltin<"__builtin_ve_vl_pvor_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvor_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvl : ClangBuiltin<"__builtin_ve_vl_pvor_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvor_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvor_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvor_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvl : ClangBuiltin<"__builtin_ve_vl_vxor_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvvl : ClangBuiltin<"__builtin_ve_vl_vxor_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvl : ClangBuiltin<"__builtin_ve_vl_vxor_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvvl : ClangBuiltin<"__builtin_ve_vl_vxor_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vxor_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vxor_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvl : ClangBuiltin<"__builtin_ve_vl_pvxor_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvxor_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvl : ClangBuiltin<"__builtin_ve_vl_pvxor_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvxor_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvxor_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvxor_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvl : ClangBuiltin<"__builtin_ve_vl_veqv_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvvl : ClangBuiltin<"__builtin_ve_vl_veqv_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvl : ClangBuiltin<"__builtin_ve_vl_veqv_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvvl : ClangBuiltin<"__builtin_ve_vl_veqv_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvmvl : ClangBuiltin<"__builtin_ve_vl_veqv_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvmvl : ClangBuiltin<"__builtin_ve_vl_veqv_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvl : ClangBuiltin<"__builtin_ve_vl_pveqv_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvvl : ClangBuiltin<"__builtin_ve_vl_pveqv_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvl : ClangBuiltin<"__builtin_ve_vl_pveqv_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvvl : ClangBuiltin<"__builtin_ve_vl_pveqv_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pveqv_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pveqv_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldz_vvl : ClangBuiltin<"__builtin_ve_vl_vldz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldz_vvvl : ClangBuiltin<"__builtin_ve_vl_vldz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldz_vvmvl : ClangBuiltin<"__builtin_ve_vl_vldz_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldzlo_vvl : ClangBuiltin<"__builtin_ve_vl_pvldzlo_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldzlo_vvvl : ClangBuiltin<"__builtin_ve_vl_pvldzlo_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldzlo_vvmvl : ClangBuiltin<"__builtin_ve_vl_pvldzlo_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldzup_vvl : ClangBuiltin<"__builtin_ve_vl_pvldzup_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldzup_vvvl : ClangBuiltin<"__builtin_ve_vl_pvldzup_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldzup_vvmvl : ClangBuiltin<"__builtin_ve_vl_pvldzup_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldz_vvl : ClangBuiltin<"__builtin_ve_vl_pvldz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldz_vvvl : ClangBuiltin<"__builtin_ve_vl_pvldz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldz_vvMvl : ClangBuiltin<"__builtin_ve_vl_pvldz_vvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vpcnt_vvl : ClangBuiltin<"__builtin_ve_vl_vpcnt_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vpcnt_vvvl : ClangBuiltin<"__builtin_ve_vl_vpcnt_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vpcnt_vvmvl : ClangBuiltin<"__builtin_ve_vl_vpcnt_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcntlo_vvl : ClangBuiltin<"__builtin_ve_vl_pvpcntlo_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcntlo_vvvl : ClangBuiltin<"__builtin_ve_vl_pvpcntlo_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcntlo_vvmvl : ClangBuiltin<"__builtin_ve_vl_pvpcntlo_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcntup_vvl : ClangBuiltin<"__builtin_ve_vl_pvpcntup_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcntup_vvvl : ClangBuiltin<"__builtin_ve_vl_pvpcntup_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcntup_vvmvl : ClangBuiltin<"__builtin_ve_vl_pvpcntup_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcnt_vvl : ClangBuiltin<"__builtin_ve_vl_pvpcnt_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcnt_vvvl : ClangBuiltin<"__builtin_ve_vl_pvpcnt_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcnt_vvMvl : ClangBuiltin<"__builtin_ve_vl_pvpcnt_vvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrv_vvl : ClangBuiltin<"__builtin_ve_vl_vbrv_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrv_vvvl : ClangBuiltin<"__builtin_ve_vl_vbrv_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrv_vvmvl : ClangBuiltin<"__builtin_ve_vl_vbrv_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrvlo_vvl : ClangBuiltin<"__builtin_ve_vl_pvbrvlo_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrvlo_vvvl : ClangBuiltin<"__builtin_ve_vl_pvbrvlo_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrvlo_vvmvl : ClangBuiltin<"__builtin_ve_vl_pvbrvlo_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrvup_vvl : ClangBuiltin<"__builtin_ve_vl_pvbrvup_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrvup_vvvl : ClangBuiltin<"__builtin_ve_vl_pvbrvup_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrvup_vvmvl : ClangBuiltin<"__builtin_ve_vl_pvbrvup_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrv_vvl : ClangBuiltin<"__builtin_ve_vl_pvbrv_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrv_vvvl : ClangBuiltin<"__builtin_ve_vl_pvbrv_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrv_vvMvl : ClangBuiltin<"__builtin_ve_vl_pvbrv_vvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vseq_vl : ClangBuiltin<"__builtin_ve_vl_vseq_vl">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vseq_vvl : ClangBuiltin<"__builtin_ve_vl_vseq_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvseqlo_vl : ClangBuiltin<"__builtin_ve_vl_pvseqlo_vl">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvseqlo_vvl : ClangBuiltin<"__builtin_ve_vl_pvseqlo_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsequp_vl : ClangBuiltin<"__builtin_ve_vl_pvsequp_vl">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsequp_vvl : ClangBuiltin<"__builtin_ve_vl_pvsequp_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvseq_vl : ClangBuiltin<"__builtin_ve_vl_pvseq_vl">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvseq_vvl : ClangBuiltin<"__builtin_ve_vl_pvseq_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvl : ClangBuiltin<"__builtin_ve_vl_vsll_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsll_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsl : ClangBuiltin<"__builtin_ve_vl_vsll_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsvl : ClangBuiltin<"__builtin_ve_vl_vsll_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsll_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vsll_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvl : ClangBuiltin<"__builtin_ve_vl_pvsll_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvsll_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsl : ClangBuiltin<"__builtin_ve_vl_pvsll_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvsll_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvsll_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsMvl : ClangBuiltin<"__builtin_ve_vl_pvsll_vvsMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvl : ClangBuiltin<"__builtin_ve_vl_vsrl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsrl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsl : ClangBuiltin<"__builtin_ve_vl_vsrl_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsvl : ClangBuiltin<"__builtin_ve_vl_vsrl_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsrl_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vsrl_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvl : ClangBuiltin<"__builtin_ve_vl_pvsrl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvsrl_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsl : ClangBuiltin<"__builtin_ve_vl_pvsrl_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvsrl_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvsrl_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsMvl : ClangBuiltin<"__builtin_ve_vl_pvsrl_vvsMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vslawsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vslawsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsl : ClangBuiltin<"__builtin_ve_vl_vslawsx_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsvl : ClangBuiltin<"__builtin_ve_vl_vslawsx_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vslawsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vslawsx_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vslawzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vslawzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsl : ClangBuiltin<"__builtin_ve_vl_vslawzx_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsvl : ClangBuiltin<"__builtin_ve_vl_vslawzx_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vslawzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vslawzx_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvl : ClangBuiltin<"__builtin_ve_vl_pvsla_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvsla_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsl : ClangBuiltin<"__builtin_ve_vl_pvsla_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvsla_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvsla_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsMvl : ClangBuiltin<"__builtin_ve_vl_pvsla_vvsMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvl : ClangBuiltin<"__builtin_ve_vl_vslal_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvvl : ClangBuiltin<"__builtin_ve_vl_vslal_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsl : ClangBuiltin<"__builtin_ve_vl_vslal_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsvl : ClangBuiltin<"__builtin_ve_vl_vslal_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vslal_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vslal_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vsrawsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsrawsx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsl : ClangBuiltin<"__builtin_ve_vl_vsrawsx_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsvl : ClangBuiltin<"__builtin_ve_vl_vsrawsx_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsrawsx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vsrawsx_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vsrawzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsrawzx_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsl : ClangBuiltin<"__builtin_ve_vl_vsrawzx_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsvl : ClangBuiltin<"__builtin_ve_vl_vsrawzx_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsrawzx_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vsrawzx_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvl : ClangBuiltin<"__builtin_ve_vl_pvsra_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvsra_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsl : ClangBuiltin<"__builtin_ve_vl_pvsra_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvsra_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvsra_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsMvl : ClangBuiltin<"__builtin_ve_vl_pvsra_vvsMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvl : ClangBuiltin<"__builtin_ve_vl_vsral_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsral_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsl : ClangBuiltin<"__builtin_ve_vl_vsral_vvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsvl : ClangBuiltin<"__builtin_ve_vl_vsral_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsral_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vsral_vvsmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssl : ClangBuiltin<"__builtin_ve_vl_vsfa_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssvl : ClangBuiltin<"__builtin_ve_vl_vsfa_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vsfa_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvl : ClangBuiltin<"__builtin_ve_vl_vfaddd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfaddd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvl : ClangBuiltin<"__builtin_ve_vl_vfaddd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfaddd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfaddd_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfaddd_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvl : ClangBuiltin<"__builtin_ve_vl_vfadds_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfadds_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvl : ClangBuiltin<"__builtin_ve_vl_vfadds_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfadds_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfadds_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfadds_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvl : ClangBuiltin<"__builtin_ve_vl_pvfadd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfadd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvl : ClangBuiltin<"__builtin_ve_vl_pvfadd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfadd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfadd_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfadd_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvl : ClangBuiltin<"__builtin_ve_vl_vfsubd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfsubd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvl : ClangBuiltin<"__builtin_ve_vl_vfsubd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfsubd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfsubd_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfsubd_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvl : ClangBuiltin<"__builtin_ve_vl_vfsubs_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfsubs_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvl : ClangBuiltin<"__builtin_ve_vl_vfsubs_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfsubs_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfsubs_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfsubs_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvl : ClangBuiltin<"__builtin_ve_vl_pvfsub_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfsub_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvl : ClangBuiltin<"__builtin_ve_vl_pvfsub_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfsub_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfsub_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfsub_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvl : ClangBuiltin<"__builtin_ve_vl_vfmuld_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmuld_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvl : ClangBuiltin<"__builtin_ve_vl_vfmuld_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmuld_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmuld_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmuld_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvl : ClangBuiltin<"__builtin_ve_vl_vfmuls_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmuls_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvl : ClangBuiltin<"__builtin_ve_vl_vfmuls_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmuls_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmuls_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmuls_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvl : ClangBuiltin<"__builtin_ve_vl_pvfmul_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmul_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvl : ClangBuiltin<"__builtin_ve_vl_pvfmul_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfmul_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmul_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmul_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvl : ClangBuiltin<"__builtin_ve_vl_vfdivd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfdivd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvl : ClangBuiltin<"__builtin_ve_vl_vfdivd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfdivd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfdivd_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfdivd_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvl : ClangBuiltin<"__builtin_ve_vl_vfdivs_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfdivs_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvl : ClangBuiltin<"__builtin_ve_vl_vfdivs_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfdivs_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfdivs_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfdivs_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsqrtd_vvl : ClangBuiltin<"__builtin_ve_vl_vfsqrtd_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsqrtd_vvvl : ClangBuiltin<"__builtin_ve_vl_vfsqrtd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsqrts_vvl : ClangBuiltin<"__builtin_ve_vl_vfsqrts_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsqrts_vvvl : ClangBuiltin<"__builtin_ve_vl_vfsqrts_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvl : ClangBuiltin<"__builtin_ve_vl_vfcmpd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfcmpd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvl : ClangBuiltin<"__builtin_ve_vl_vfcmpd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfcmpd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfcmpd_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfcmpd_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvl : ClangBuiltin<"__builtin_ve_vl_vfcmps_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfcmps_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvl : ClangBuiltin<"__builtin_ve_vl_vfcmps_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfcmps_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfcmps_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfcmps_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvl : ClangBuiltin<"__builtin_ve_vl_pvfcmp_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfcmp_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvl : ClangBuiltin<"__builtin_ve_vl_pvfcmp_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfcmp_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfcmp_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfcmp_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvl : ClangBuiltin<"__builtin_ve_vl_vfmaxd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmaxd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvl : ClangBuiltin<"__builtin_ve_vl_vfmaxd_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmaxd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmaxd_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmaxd_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvl : ClangBuiltin<"__builtin_ve_vl_vfmaxs_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmaxs_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvl : ClangBuiltin<"__builtin_ve_vl_vfmaxs_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmaxs_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmaxs_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmaxs_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvl : ClangBuiltin<"__builtin_ve_vl_pvfmax_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmax_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvl : ClangBuiltin<"__builtin_ve_vl_pvfmax_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfmax_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmax_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmax_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvl : ClangBuiltin<"__builtin_ve_vl_vfmind_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmind_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvl : ClangBuiltin<"__builtin_ve_vl_vfmind_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmind_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmind_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmind_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvl : ClangBuiltin<"__builtin_ve_vl_vfmins_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmins_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvl : ClangBuiltin<"__builtin_ve_vl_vfmins_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmins_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmins_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmins_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvl : ClangBuiltin<"__builtin_ve_vl_pvfmin_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmin_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvl : ClangBuiltin<"__builtin_ve_vl_pvfmin_vsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfmin_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmin_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmin_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vvvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vsvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vvsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vvvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vsvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vvsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vvvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vsvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vvsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrcpd_vvl : ClangBuiltin<"__builtin_ve_vl_vrcpd_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrcpd_vvvl : ClangBuiltin<"__builtin_ve_vl_vrcpd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrcps_vvl : ClangBuiltin<"__builtin_ve_vl_vrcps_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrcps_vvvl : ClangBuiltin<"__builtin_ve_vl_vrcps_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvrcp_vvl : ClangBuiltin<"__builtin_ve_vl_pvrcp_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvrcp_vvvl : ClangBuiltin<"__builtin_ve_vl_pvrcp_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrtd_vvl : ClangBuiltin<"__builtin_ve_vl_vrsqrtd_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrtd_vvvl : ClangBuiltin<"__builtin_ve_vl_vrsqrtd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrts_vvl : ClangBuiltin<"__builtin_ve_vl_vrsqrts_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrts_vvvl : ClangBuiltin<"__builtin_ve_vl_vrsqrts_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvrsqrt_vvl : ClangBuiltin<"__builtin_ve_vl_pvrsqrt_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvrsqrt_vvvl : ClangBuiltin<"__builtin_ve_vl_pvrsqrt_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrtdnex_vvl : ClangBuiltin<"__builtin_ve_vl_vrsqrtdnex_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrtdnex_vvvl : ClangBuiltin<"__builtin_ve_vl_vrsqrtdnex_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrtsnex_vvl : ClangBuiltin<"__builtin_ve_vl_vrsqrtsnex_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrtsnex_vvvl : ClangBuiltin<"__builtin_ve_vl_vrsqrtsnex_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvrsqrtnex_vvl : ClangBuiltin<"__builtin_ve_vl_pvrsqrtnex_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvrsqrtnex_vvvl : ClangBuiltin<"__builtin_ve_vl_pvrsqrtnex_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdsx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdsx_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdzx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdzx_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwssx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwssx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwssx_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwszx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwszx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwszx_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvl : ClangBuiltin<"__builtin_ve_vl_pvcvtws_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvvl : ClangBuiltin<"__builtin_ve_vl_pvcvtws_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvMvl : ClangBuiltin<"__builtin_ve_vl_pvcvtws_vvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvl : ClangBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvvl : ClangBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvMvl : ClangBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtld_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtld_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtld_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtldrz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtldrz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtldrz_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtdw_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtdw_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtdw_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtdw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtsw_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtsw_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtsw_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtsw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtsw_vvl : ClangBuiltin<"__builtin_ve_vl_pvcvtsw_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtsw_vvvl : ClangBuiltin<"__builtin_ve_vl_pvcvtsw_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtdl_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtdl_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtdl_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtdl_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtds_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtds_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtds_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtds_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtsd_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtsd_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtsd_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtsd_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrg_vvvml : ClangBuiltin<"__builtin_ve_vl_vmrg_vvvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrg_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmrg_vvvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrg_vsvml : ClangBuiltin<"__builtin_ve_vl_vmrg_vsvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrg_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmrg_vsvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vvvMl : ClangBuiltin<"__builtin_ve_vl_vmrgw_vvvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vvvMvl : ClangBuiltin<"__builtin_ve_vl_vmrgw_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vsvMl : ClangBuiltin<"__builtin_ve_vl_vmrgw_vsvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vsvMvl : ClangBuiltin<"__builtin_ve_vl_vmrgw_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vshf_vvvsl : ClangBuiltin<"__builtin_ve_vl_vshf_vvvsl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vshf_vvvsvl : ClangBuiltin<"__builtin_ve_vl_vshf_vvvsvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcp_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcp_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vex_vvmvl : ClangBuiltin<"__builtin_ve_vl_vex_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklat_ml : ClangBuiltin<"__builtin_ve_vl_vfmklat_ml">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklaf_ml : ClangBuiltin<"__builtin_ve_vl_vfmklaf_ml">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkat_Ml : ClangBuiltin<"__builtin_ve_vl_pvfmkat_Ml">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkaf_Ml : ClangBuiltin<"__builtin_ve_vl_pvfmkaf_Ml">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklgt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklgt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklgt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklgt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkllt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkllt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkllt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkllt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklne_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklne_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklne_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklne_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkleq_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkleq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkleq_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkleq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklge_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklge_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklle_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklle_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklle_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklle_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklnum_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklnum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklnum_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklnum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklgtnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklgtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklgtnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklgtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklltnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklltnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklnenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklnenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklnenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklnenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkleqnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkleqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkleqnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkleqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklgenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklgenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklgenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklgenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkllenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkllenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkllenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkllenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwgt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwgt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwgt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwgt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwlt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwlt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwlt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwlt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwne_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwne_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwne_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwne_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkweq_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkweq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkweq_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkweq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwge_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwge_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwle_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwle_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwle_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwle_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwnum_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwnum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwnum_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwnum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwgtnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwgtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwgtnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwgtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwltnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwltnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwnenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwnenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwnenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwnenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkweqnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkweqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkweqnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkweqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwgenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwgenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwgenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwgenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwlenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwlenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwlenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwlenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlogt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupgt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlogt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupgt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlolt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwuplt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlolt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwuplt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlone_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlone_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupne_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupne_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlone_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlone_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupne_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupne_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeq_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwloeq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeq_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupeq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeq_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwloeq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeq_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupeq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloge_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwloge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupge_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloge_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwloge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupge_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlole_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlole_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuple_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwuple_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlole_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlole_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuple_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwuple_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonum_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlonum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnum_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupnum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonum_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlonum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnum_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupnum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlonan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlonan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogtnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlogtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgtnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupgtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogtnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlogtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgtnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupgtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloltnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwloltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupltnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloltnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwloltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupltnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlonenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupnenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlonenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupnenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeqnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwloeqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeqnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupeqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeqnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwloeqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeqnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupeqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlogenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupgenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlogenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupgenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlolenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwuplenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlolenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwuplenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgt_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwgt_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgt_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwgt_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlt_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlt_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlt_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlt_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwne_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwne_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwne_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwne_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkweq_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkweq_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkweq_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkweq_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwge_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwge_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwge_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwge_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwle_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwle_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwle_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwle_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnum_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwnum_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnum_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwnum_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgtnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwgtnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgtnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwgtnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwltnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwltnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwltnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwltnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnenan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwnenan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnenan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwnenan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkweqnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkweqnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkweqnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkweqnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgenan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwgenan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgenan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwgenan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlenan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlenan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlenan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlenan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdgt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdgt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdgt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdgt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdlt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdlt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdlt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdlt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdne_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdne_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdne_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdne_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdeq_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdeq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdeq_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdeq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdge_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdge_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdle_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdle_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdle_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdle_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdnum_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdnum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdnum_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdnum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdgtnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdgtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdgtnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdgtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdltnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdltnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdnenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdnenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdnenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdnenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdeqnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdeqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdeqnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdeqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdgenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdgenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdgenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdgenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdlenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdlenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdlenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdlenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksgt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksgt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksgt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksgt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkslt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkslt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkslt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkslt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksne_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksne_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksne_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksne_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkseq_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkseq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkseq_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkseq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksge_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksge_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksle_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksle_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksle_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksle_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksnum_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksnum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksnum_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksnum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksgtnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksgtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksgtnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksgtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksltnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksltnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksnenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksnenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksnenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksnenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkseqnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkseqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkseqnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkseqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksgenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksgenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksgenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksgenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkslenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkslenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkslenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkslenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslogt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupgt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslogt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupgt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslolt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksuplt_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslolt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksuplt_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslone_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslone_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupne_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupne_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslone_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslone_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupne_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupne_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeq_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksloeq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeq_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupeq_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeq_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksloeq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeq_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupeq_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloge_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksloge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupge_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupge_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloge_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksloge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupge_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupge_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslole_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslole_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksuple_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksuple_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslole_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslole_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksuple_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksuple_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonum_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslonum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnum_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupnum_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonum_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslonum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnum_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupnum_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslonan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslonan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogtnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslogtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgtnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupgtnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogtnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslogtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgtnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupgtnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloltnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksloltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupltnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupltnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloltnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksloltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupltnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupltnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslonenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupnenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslonenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupnenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeqnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksloeqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeqnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupeqnan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeqnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksloeqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeqnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupeqnan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslogenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupgenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslogenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupgenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslolenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksuplenan_mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslolenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksuplenan_mvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksgt_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksgt_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksgt_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksgt_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslt_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslt_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslt_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkslt_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksne_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksne_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksne_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksne_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkseq_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkseq_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkseq_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkseq_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksge_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksge_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksge_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksge_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksle_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksle_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksle_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksle_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksnum_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksnum_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksnum_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksnum_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksgtnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksgtnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksgtnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksgtnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksltnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksltnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksltnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksltnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksnenan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksnenan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksnenan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksnenan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkseqnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkseqnan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkseqnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkseqnan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksgenan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksgenan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslenan_Mvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkslenan_MvMl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvl : ClangBuiltin<"__builtin_ve_vl_vsumwsx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvml : ClangBuiltin<"__builtin_ve_vl_vsumwsx_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvl : ClangBuiltin<"__builtin_ve_vl_vsumwzx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvml : ClangBuiltin<"__builtin_ve_vl_vsumwzx_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvl : ClangBuiltin<"__builtin_ve_vl_vsuml_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvml : ClangBuiltin<"__builtin_ve_vl_vsuml_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvl : ClangBuiltin<"__builtin_ve_vl_vfsumd_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvml : ClangBuiltin<"__builtin_ve_vl_vfsumd_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvl : ClangBuiltin<"__builtin_ve_vl_vfsums_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvml : ClangBuiltin<"__builtin_ve_vl_vfsums_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstsx_vvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswfstsx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswfstsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstsx_vvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswlstsx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswlstsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstzx_vvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswfstzx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswfstzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstzx_vvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswlstzx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswlstzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswfstsx_vvl : ClangBuiltin<"__builtin_ve_vl_vrminswfstsx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswfstsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrminswfstsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswlstsx_vvl : ClangBuiltin<"__builtin_ve_vl_vrminswlstsx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswlstsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrminswlstsx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswfstzx_vvl : ClangBuiltin<"__builtin_ve_vl_vrminswfstzx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswfstzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrminswfstzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswlstzx_vvl : ClangBuiltin<"__builtin_ve_vl_vrminswlstzx_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswlstzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrminswlstzx_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxslfst_vvl : ClangBuiltin<"__builtin_ve_vl_vrmaxslfst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxslfst_vvvl : ClangBuiltin<"__builtin_ve_vl_vrmaxslfst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxsllst_vvl : ClangBuiltin<"__builtin_ve_vl_vrmaxsllst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxsllst_vvvl : ClangBuiltin<"__builtin_ve_vl_vrmaxsllst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminslfst_vvl : ClangBuiltin<"__builtin_ve_vl_vrminslfst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminslfst_vvvl : ClangBuiltin<"__builtin_ve_vl_vrminslfst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminsllst_vvl : ClangBuiltin<"__builtin_ve_vl_vrminsllst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminsllst_vvvl : ClangBuiltin<"__builtin_ve_vl_vrminsllst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdfst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxdfst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdfst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxdfst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdlst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxdlst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdlst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxdlst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxsfst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxsfst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxsfst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxsfst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxslst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxslst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxslst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxslst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmindfst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrmindfst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmindfst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrmindfst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmindlst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrmindlst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmindlst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrmindlst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrminsfst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrminsfst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrminsfst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrminsfst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrminslst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrminslst_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrminslst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrminslst_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrand_vvl : ClangBuiltin<"__builtin_ve_vl_vrand_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrand_vvml : ClangBuiltin<"__builtin_ve_vl_vrand_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vror_vvl : ClangBuiltin<"__builtin_ve_vl_vror_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vror_vvml : ClangBuiltin<"__builtin_ve_vl_vror_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvl : ClangBuiltin<"__builtin_ve_vl_vrxor_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvml : ClangBuiltin<"__builtin_ve_vl_vrxor_vvml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssml : ClangBuiltin<"__builtin_ve_vl_vgt_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssml">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssl : ClangBuiltin<"__builtin_ve_vl_vsc_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssml : ClangBuiltin<"__builtin_ve_vl_vsc_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vscnc_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vscnc_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscot_vvssl : ClangBuiltin<"__builtin_ve_vl_vscot_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscot_vvssml : ClangBuiltin<"__builtin_ve_vl_vscot_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscncot_vvssl : ClangBuiltin<"__builtin_ve_vl_vscncot_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscncot_vvssml : ClangBuiltin<"__builtin_ve_vl_vscncot_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscu_vvssl : ClangBuiltin<"__builtin_ve_vl_vscu_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscu_vvssml : ClangBuiltin<"__builtin_ve_vl_vscu_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscunc_vvssl : ClangBuiltin<"__builtin_ve_vl_vscunc_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscunc_vvssml : ClangBuiltin<"__builtin_ve_vl_vscunc_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscuot_vvssl : ClangBuiltin<"__builtin_ve_vl_vscuot_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscuot_vvssml : ClangBuiltin<"__builtin_ve_vl_vscuot_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscuncot_vvssl : ClangBuiltin<"__builtin_ve_vl_vscuncot_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscuncot_vvssml : ClangBuiltin<"__builtin_ve_vl_vscuncot_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscl_vvssl : ClangBuiltin<"__builtin_ve_vl_vscl_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscl_vvssml : ClangBuiltin<"__builtin_ve_vl_vscl_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsclnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vsclnc_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsclnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vsclnc_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsclot_vvssl : ClangBuiltin<"__builtin_ve_vl_vsclot_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsclot_vvssml : ClangBuiltin<"__builtin_ve_vl_vsclot_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsclncot_vvssl : ClangBuiltin<"__builtin_ve_vl_vsclncot_vvssl">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsclncot_vvssml : ClangBuiltin<"__builtin_ve_vl_vsclncot_vvssml">, Intrinsic<[], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_andm_mmm : ClangBuiltin<"__builtin_ve_vl_andm_mmm">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_andm_MMM : ClangBuiltin<"__builtin_ve_vl_andm_MMM">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_orm_mmm : ClangBuiltin<"__builtin_ve_vl_orm_mmm">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_orm_MMM : ClangBuiltin<"__builtin_ve_vl_orm_MMM">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_xorm_mmm : ClangBuiltin<"__builtin_ve_vl_xorm_mmm">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_xorm_MMM : ClangBuiltin<"__builtin_ve_vl_xorm_MMM">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_eqvm_mmm : ClangBuiltin<"__builtin_ve_vl_eqvm_mmm">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_eqvm_MMM : ClangBuiltin<"__builtin_ve_vl_eqvm_MMM">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_nndm_mmm : ClangBuiltin<"__builtin_ve_vl_nndm_mmm">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_nndm_MMM : ClangBuiltin<"__builtin_ve_vl_nndm_MMM">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_negm_mm : ClangBuiltin<"__builtin_ve_vl_negm_mm">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_negm_MM : ClangBuiltin<"__builtin_ve_vl_negm_MM">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pcvm_sml : ClangBuiltin<"__builtin_ve_vl_pcvm_sml">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lzvm_sml : ClangBuiltin<"__builtin_ve_vl_lzvm_sml">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_tovm_sml : ClangBuiltin<"__builtin_ve_vl_tovm_sml">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lcr_sss : ClangBuiltin<"__builtin_ve_vl_lcr_sss">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_scr_sss : ClangBuiltin<"__builtin_ve_vl_scr_sss">, Intrinsic<[], [LLVMType, LLVMType, LLVMType], [IntrNoMem, IntrHasSideEffects]>; +let TargetPrefix = "ve" in def int_ve_vl_tscr_ssss : ClangBuiltin<"__builtin_ve_vl_tscr_ssss">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem, IntrHasSideEffects]>; +let TargetPrefix = "ve" in def int_ve_vl_fidcr_sss : ClangBuiltin<"__builtin_ve_vl_fidcr_sss">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem, IntrHasSideEffects]>; +let TargetPrefix = "ve" in def int_ve_vl_fencei : ClangBuiltin<"__builtin_ve_vl_fencei">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; +let TargetPrefix = "ve" in def int_ve_vl_fencem_s : ClangBuiltin<"__builtin_ve_vl_fencem_s">, Intrinsic<[], [LLVMType], [IntrNoMem, IntrHasSideEffects]>; +let TargetPrefix = "ve" in def int_ve_vl_fencec_s : ClangBuiltin<"__builtin_ve_vl_fencec_s">, Intrinsic<[], [LLVMType], [IntrNoMem, IntrHasSideEffects]>; +let TargetPrefix = "ve" in def int_ve_vl_svob : ClangBuiltin<"__builtin_ve_vl_svob">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index aecc3d91fae7..f313be1b2235 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -31,6 +31,10 @@ def int_wasm_memory_grow : Intrinsic<[llvm_anyint_ty], //===----------------------------------------------------------------------===// def int_wasm_ref_null_extern : Intrinsic<[llvm_externref_ty], [], [IntrNoMem]>; def int_wasm_ref_null_func : Intrinsic<[llvm_funcref_ty], [], [IntrNoMem]>; +def int_wasm_ref_is_null_extern : Intrinsic<[llvm_i32_ty], [llvm_externref_ty], + [IntrNoMem], "llvm.wasm.ref.is_null.extern">; +def int_wasm_ref_is_null_func : Intrinsic<[llvm_i32_ty], [llvm_funcref_ty], + [IntrNoMem], "llvm.wasm.ref.is_null.func">; //===----------------------------------------------------------------------===// // Table intrinsics @@ -256,16 +260,30 @@ def int_wasm_relaxed_trunc_unsigned: [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable]>; -def int_wasm_relaxed_trunc_zero_signed: +def int_wasm_relaxed_trunc_signed_zero: Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem, IntrSpeculatable]>; -def int_wasm_relaxed_trunc_zero_unsigned: +def int_wasm_relaxed_trunc_unsigned_zero: Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_relaxed_q15mulr_signed: + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_dot_i8x16_i7x16_signed: + Intrinsic<[llvm_v8i16_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_dot_i8x16_i7x16_add_signed: + Intrinsic<[llvm_v4i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v4i32_ty], + [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// // Thread-local storage intrinsics diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 8de737a1c7a5..0930abcc0993 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -31,20 +31,20 @@ let TargetPrefix = "x86" in { //===----------------------------------------------------------------------===// // FLAGS. let TargetPrefix = "x86" in { - def int_x86_flags_read_u32 : GCCBuiltin<"__builtin_ia32_readeflags_u32">, + def int_x86_flags_read_u32 : ClangBuiltin<"__builtin_ia32_readeflags_u32">, Intrinsic<[llvm_i32_ty], [], []>; - def int_x86_flags_read_u64 : GCCBuiltin<"__builtin_ia32_readeflags_u64">, + def int_x86_flags_read_u64 : ClangBuiltin<"__builtin_ia32_readeflags_u64">, Intrinsic<[llvm_i64_ty], [], []>; - def int_x86_flags_write_u32 : GCCBuiltin<"__builtin_ia32_writeeflags_u32">, + def int_x86_flags_write_u32 : ClangBuiltin<"__builtin_ia32_writeeflags_u32">, Intrinsic<[], [llvm_i32_ty], []>; - def int_x86_flags_write_u64 : GCCBuiltin<"__builtin_ia32_writeeflags_u64">, + def int_x86_flags_write_u64 : ClangBuiltin<"__builtin_ia32_writeeflags_u64">, Intrinsic<[], [llvm_i64_ty], []>; } //===----------------------------------------------------------------------===// // Read Time Stamp Counter. let TargetPrefix = "x86" in { - def int_x86_rdtsc : GCCBuiltin<"__builtin_ia32_rdtsc">, + def int_x86_rdtsc : ClangBuiltin<"__builtin_ia32_rdtsc">, Intrinsic<[llvm_i64_ty], [], []>; def int_x86_rdtscp : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>; @@ -52,42 +52,52 @@ let TargetPrefix = "x86" in { // Read Performance-Monitoring Counter. let TargetPrefix = "x86" in { - def int_x86_rdpmc : GCCBuiltin<"__builtin_ia32_rdpmc">, + def int_x86_rdpmc : ClangBuiltin<"__builtin_ia32_rdpmc">, Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>; } // Read processor ID. let TargetPrefix = "x86" in { - def int_x86_rdpid : GCCBuiltin<"__builtin_ia32_rdpid">, + def int_x86_rdpid : ClangBuiltin<"__builtin_ia32_rdpid">, Intrinsic<[llvm_i32_ty], [], []>; } +// Lock bit test. +let TargetPrefix = "x86" in { + def int_x86_atomic_bts : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty], + [ImmArg>]>; + def int_x86_atomic_btc : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty], + [ImmArg>]>; + def int_x86_atomic_btr : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty], + [ImmArg>]>; +} + //===----------------------------------------------------------------------===// // CET SS let TargetPrefix = "x86" in { - def int_x86_incsspd : GCCBuiltin<"__builtin_ia32_incsspd">, + def int_x86_incsspd : ClangBuiltin<"__builtin_ia32_incsspd">, Intrinsic<[], [llvm_i32_ty], []>; - def int_x86_incsspq : GCCBuiltin<"__builtin_ia32_incsspq">, + def int_x86_incsspq : ClangBuiltin<"__builtin_ia32_incsspq">, Intrinsic<[], [llvm_i64_ty], []>; - def int_x86_rdsspd : GCCBuiltin<"__builtin_ia32_rdsspd">, + def int_x86_rdsspd : ClangBuiltin<"__builtin_ia32_rdsspd">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; - def int_x86_rdsspq : GCCBuiltin<"__builtin_ia32_rdsspq">, + def int_x86_rdsspq : ClangBuiltin<"__builtin_ia32_rdsspq">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>; - def int_x86_saveprevssp : GCCBuiltin<"__builtin_ia32_saveprevssp">, + def int_x86_saveprevssp : ClangBuiltin<"__builtin_ia32_saveprevssp">, Intrinsic<[], [], []>; - def int_x86_rstorssp : GCCBuiltin<"__builtin_ia32_rstorssp">, + def int_x86_rstorssp : ClangBuiltin<"__builtin_ia32_rstorssp">, Intrinsic<[], [llvm_ptr_ty], []>; - def int_x86_wrssd : GCCBuiltin<"__builtin_ia32_wrssd">, + def int_x86_wrssd : ClangBuiltin<"__builtin_ia32_wrssd">, Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>; - def int_x86_wrssq : GCCBuiltin<"__builtin_ia32_wrssq">, + def int_x86_wrssq : ClangBuiltin<"__builtin_ia32_wrssq">, Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty], []>; - def int_x86_wrussd : GCCBuiltin<"__builtin_ia32_wrussd">, + def int_x86_wrussd : ClangBuiltin<"__builtin_ia32_wrussd">, Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>; - def int_x86_wrussq : GCCBuiltin<"__builtin_ia32_wrussq">, + def int_x86_wrussq : ClangBuiltin<"__builtin_ia32_wrussq">, Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty], []>; - def int_x86_setssbsy : GCCBuiltin<"__builtin_ia32_setssbsy">, + def int_x86_setssbsy : ClangBuiltin<"__builtin_ia32_setssbsy">, Intrinsic<[], [], []>; - def int_x86_clrssbsy : GCCBuiltin<"__builtin_ia32_clrssbsy">, + def int_x86_clrssbsy : ClangBuiltin<"__builtin_ia32_clrssbsy">, Intrinsic<[], [llvm_ptr_ty], []>; } @@ -95,57 +105,57 @@ let TargetPrefix = "x86" in { // 3DNow! let TargetPrefix = "x86" in { - def int_x86_3dnow_pavgusb : GCCBuiltin<"__builtin_ia32_pavgusb">, + def int_x86_3dnow_pavgusb : ClangBuiltin<"__builtin_ia32_pavgusb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pf2id : GCCBuiltin<"__builtin_ia32_pf2id">, + def int_x86_3dnow_pf2id : ClangBuiltin<"__builtin_ia32_pf2id">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfacc : GCCBuiltin<"__builtin_ia32_pfacc">, + def int_x86_3dnow_pfacc : ClangBuiltin<"__builtin_ia32_pfacc">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfadd : GCCBuiltin<"__builtin_ia32_pfadd">, + def int_x86_3dnow_pfadd : ClangBuiltin<"__builtin_ia32_pfadd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfcmpeq : GCCBuiltin<"__builtin_ia32_pfcmpeq">, + def int_x86_3dnow_pfcmpeq : ClangBuiltin<"__builtin_ia32_pfcmpeq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfcmpge : GCCBuiltin<"__builtin_ia32_pfcmpge">, + def int_x86_3dnow_pfcmpge : ClangBuiltin<"__builtin_ia32_pfcmpge">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfcmpgt : GCCBuiltin<"__builtin_ia32_pfcmpgt">, + def int_x86_3dnow_pfcmpgt : ClangBuiltin<"__builtin_ia32_pfcmpgt">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfmax : GCCBuiltin<"__builtin_ia32_pfmax">, + def int_x86_3dnow_pfmax : ClangBuiltin<"__builtin_ia32_pfmax">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfmin : GCCBuiltin<"__builtin_ia32_pfmin">, + def int_x86_3dnow_pfmin : ClangBuiltin<"__builtin_ia32_pfmin">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfmul : GCCBuiltin<"__builtin_ia32_pfmul">, + def int_x86_3dnow_pfmul : ClangBuiltin<"__builtin_ia32_pfmul">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfrcp : GCCBuiltin<"__builtin_ia32_pfrcp">, + def int_x86_3dnow_pfrcp : ClangBuiltin<"__builtin_ia32_pfrcp">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfrcpit1 : GCCBuiltin<"__builtin_ia32_pfrcpit1">, + def int_x86_3dnow_pfrcpit1 : ClangBuiltin<"__builtin_ia32_pfrcpit1">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfrcpit2 : GCCBuiltin<"__builtin_ia32_pfrcpit2">, + def int_x86_3dnow_pfrcpit2 : ClangBuiltin<"__builtin_ia32_pfrcpit2">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfrsqrt : GCCBuiltin<"__builtin_ia32_pfrsqrt">, + def int_x86_3dnow_pfrsqrt : ClangBuiltin<"__builtin_ia32_pfrsqrt">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfrsqit1 : GCCBuiltin<"__builtin_ia32_pfrsqit1">, + def int_x86_3dnow_pfrsqit1 : ClangBuiltin<"__builtin_ia32_pfrsqit1">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfsub : GCCBuiltin<"__builtin_ia32_pfsub">, + def int_x86_3dnow_pfsub : ClangBuiltin<"__builtin_ia32_pfsub">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfsubr : GCCBuiltin<"__builtin_ia32_pfsubr">, + def int_x86_3dnow_pfsubr : ClangBuiltin<"__builtin_ia32_pfsubr">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pi2fd : GCCBuiltin<"__builtin_ia32_pi2fd">, + def int_x86_3dnow_pi2fd : ClangBuiltin<"__builtin_ia32_pi2fd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pmulhrw : GCCBuiltin<"__builtin_ia32_pmulhrw">, + def int_x86_3dnow_pmulhrw : ClangBuiltin<"__builtin_ia32_pmulhrw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; } @@ -154,15 +164,15 @@ let TargetPrefix = "x86" in { // 3DNow! extensions let TargetPrefix = "x86" in { - def int_x86_3dnowa_pf2iw : GCCBuiltin<"__builtin_ia32_pf2iw">, + def int_x86_3dnowa_pf2iw : ClangBuiltin<"__builtin_ia32_pf2iw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnowa_pfnacc : GCCBuiltin<"__builtin_ia32_pfnacc">, + def int_x86_3dnowa_pfnacc : ClangBuiltin<"__builtin_ia32_pfnacc">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnowa_pfpnacc : GCCBuiltin<"__builtin_ia32_pfpnacc">, + def int_x86_3dnowa_pfpnacc : ClangBuiltin<"__builtin_ia32_pfpnacc">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnowa_pi2fw : GCCBuiltin<"__builtin_ia32_pi2fw">, + def int_x86_3dnowa_pi2fw : ClangBuiltin<"__builtin_ia32_pi2fw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnowa_pswapd : Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; @@ -173,35 +183,35 @@ let TargetPrefix = "x86" in { // Arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse_rcp_ss : GCCBuiltin<"__builtin_ia32_rcpss">, + def int_x86_sse_rcp_ss : ClangBuiltin<"__builtin_ia32_rcpss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_rcp_ps : GCCBuiltin<"__builtin_ia32_rcpps">, + def int_x86_sse_rcp_ps : ClangBuiltin<"__builtin_ia32_rcpps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_rsqrt_ss : GCCBuiltin<"__builtin_ia32_rsqrtss">, + def int_x86_sse_rsqrt_ss : ClangBuiltin<"__builtin_ia32_rsqrtss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_rsqrt_ps : GCCBuiltin<"__builtin_ia32_rsqrtps">, + def int_x86_sse_rsqrt_ps : ClangBuiltin<"__builtin_ia32_rsqrtps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_min_ss : GCCBuiltin<"__builtin_ia32_minss">, + def int_x86_sse_min_ss : ClangBuiltin<"__builtin_ia32_minss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_min_ps : GCCBuiltin<"__builtin_ia32_minps">, + def int_x86_sse_min_ps : ClangBuiltin<"__builtin_ia32_minps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_max_ss : GCCBuiltin<"__builtin_ia32_maxss">, + def int_x86_sse_max_ss : ClangBuiltin<"__builtin_ia32_maxss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_max_ps : GCCBuiltin<"__builtin_ia32_maxps">, + def int_x86_sse_max_ps : ClangBuiltin<"__builtin_ia32_maxps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; } // Comparison ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss">, + def int_x86_sse_cmp_ss : ClangBuiltin<"__builtin_ia32_cmpss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; // NOTE: This comparison intrinsic is not used by clang as long as the @@ -209,40 +219,40 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_cmp_ps : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse_comieq_ss : GCCBuiltin<"__builtin_ia32_comieq">, + def int_x86_sse_comieq_ss : ClangBuiltin<"__builtin_ia32_comieq">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_comilt_ss : GCCBuiltin<"__builtin_ia32_comilt">, + def int_x86_sse_comilt_ss : ClangBuiltin<"__builtin_ia32_comilt">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_comile_ss : GCCBuiltin<"__builtin_ia32_comile">, + def int_x86_sse_comile_ss : ClangBuiltin<"__builtin_ia32_comile">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_comigt_ss : GCCBuiltin<"__builtin_ia32_comigt">, + def int_x86_sse_comigt_ss : ClangBuiltin<"__builtin_ia32_comigt">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_comige_ss : GCCBuiltin<"__builtin_ia32_comige">, + def int_x86_sse_comige_ss : ClangBuiltin<"__builtin_ia32_comige">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_comineq_ss : GCCBuiltin<"__builtin_ia32_comineq">, + def int_x86_sse_comineq_ss : ClangBuiltin<"__builtin_ia32_comineq">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_ucomieq_ss : GCCBuiltin<"__builtin_ia32_ucomieq">, + def int_x86_sse_ucomieq_ss : ClangBuiltin<"__builtin_ia32_ucomieq">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_ucomilt_ss : GCCBuiltin<"__builtin_ia32_ucomilt">, + def int_x86_sse_ucomilt_ss : ClangBuiltin<"__builtin_ia32_ucomilt">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_ucomile_ss : GCCBuiltin<"__builtin_ia32_ucomile">, + def int_x86_sse_ucomile_ss : ClangBuiltin<"__builtin_ia32_ucomile">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_ucomigt_ss : GCCBuiltin<"__builtin_ia32_ucomigt">, + def int_x86_sse_ucomigt_ss : ClangBuiltin<"__builtin_ia32_ucomigt">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_ucomige_ss : GCCBuiltin<"__builtin_ia32_ucomige">, + def int_x86_sse_ucomige_ss : ClangBuiltin<"__builtin_ia32_ucomige">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_ucomineq_ss : GCCBuiltin<"__builtin_ia32_ucomineq">, + def int_x86_sse_ucomineq_ss : ClangBuiltin<"__builtin_ia32_ucomineq">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; } @@ -250,27 +260,27 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Conversion ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse_cvtss2si : GCCBuiltin<"__builtin_ia32_cvtss2si">, + def int_x86_sse_cvtss2si : ClangBuiltin<"__builtin_ia32_cvtss2si">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_cvtss2si64 : GCCBuiltin<"__builtin_ia32_cvtss2si64">, + def int_x86_sse_cvtss2si64 : ClangBuiltin<"__builtin_ia32_cvtss2si64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_cvttss2si : GCCBuiltin<"__builtin_ia32_cvttss2si">, + def int_x86_sse_cvttss2si : ClangBuiltin<"__builtin_ia32_cvttss2si">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_cvttss2si64 : GCCBuiltin<"__builtin_ia32_cvttss2si64">, + def int_x86_sse_cvttss2si64 : ClangBuiltin<"__builtin_ia32_cvttss2si64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">, + def int_x86_sse_cvtps2pi : ClangBuiltin<"__builtin_ia32_cvtps2pi">, Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">, + def int_x86_sse_cvttps2pi: ClangBuiltin<"__builtin_ia32_cvttps2pi">, Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_cvtpi2ps : GCCBuiltin<"__builtin_ia32_cvtpi2ps">, + def int_x86_sse_cvtpi2ps : ClangBuiltin<"__builtin_ia32_cvtpi2ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_x86mmx_ty], [IntrNoMem]>; } // Cacheability support ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse_sfence : GCCBuiltin<"__builtin_ia32_sfence">, + def int_x86_sse_sfence : ClangBuiltin<"__builtin_ia32_sfence">, Intrinsic<[], [], []>; } @@ -291,7 +301,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse_movmsk_ps : GCCBuiltin<"__builtin_ia32_movmskps">, + def int_x86_sse_movmsk_ps : ClangBuiltin<"__builtin_ia32_movmskps">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; } @@ -300,23 +310,23 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // FP arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse2_min_sd : GCCBuiltin<"__builtin_ia32_minsd">, + def int_x86_sse2_min_sd : ClangBuiltin<"__builtin_ia32_minsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_min_pd : GCCBuiltin<"__builtin_ia32_minpd">, + def int_x86_sse2_min_pd : ClangBuiltin<"__builtin_ia32_minpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_max_sd : GCCBuiltin<"__builtin_ia32_maxsd">, + def int_x86_sse2_max_sd : ClangBuiltin<"__builtin_ia32_maxsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_max_pd : GCCBuiltin<"__builtin_ia32_maxpd">, + def int_x86_sse2_max_pd : ClangBuiltin<"__builtin_ia32_maxpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; } // FP comparison ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse2_cmp_sd : GCCBuiltin<"__builtin_ia32_cmpsd">, + def int_x86_sse2_cmp_sd : ClangBuiltin<"__builtin_ia32_cmpsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; // NOTE: This comparison intrinsic is not used by clang as long as the @@ -324,176 +334,176 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_cmp_pd : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse2_comieq_sd : GCCBuiltin<"__builtin_ia32_comisdeq">, + def int_x86_sse2_comieq_sd : ClangBuiltin<"__builtin_ia32_comisdeq">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_comilt_sd : GCCBuiltin<"__builtin_ia32_comisdlt">, + def int_x86_sse2_comilt_sd : ClangBuiltin<"__builtin_ia32_comisdlt">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_comile_sd : GCCBuiltin<"__builtin_ia32_comisdle">, + def int_x86_sse2_comile_sd : ClangBuiltin<"__builtin_ia32_comisdle">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_comigt_sd : GCCBuiltin<"__builtin_ia32_comisdgt">, + def int_x86_sse2_comigt_sd : ClangBuiltin<"__builtin_ia32_comisdgt">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_comige_sd : GCCBuiltin<"__builtin_ia32_comisdge">, + def int_x86_sse2_comige_sd : ClangBuiltin<"__builtin_ia32_comisdge">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_comineq_sd : GCCBuiltin<"__builtin_ia32_comisdneq">, + def int_x86_sse2_comineq_sd : ClangBuiltin<"__builtin_ia32_comisdneq">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_ucomieq_sd : GCCBuiltin<"__builtin_ia32_ucomisdeq">, + def int_x86_sse2_ucomieq_sd : ClangBuiltin<"__builtin_ia32_ucomisdeq">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_ucomilt_sd : GCCBuiltin<"__builtin_ia32_ucomisdlt">, + def int_x86_sse2_ucomilt_sd : ClangBuiltin<"__builtin_ia32_ucomisdlt">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_ucomile_sd : GCCBuiltin<"__builtin_ia32_ucomisdle">, + def int_x86_sse2_ucomile_sd : ClangBuiltin<"__builtin_ia32_ucomisdle">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_ucomigt_sd : GCCBuiltin<"__builtin_ia32_ucomisdgt">, + def int_x86_sse2_ucomigt_sd : ClangBuiltin<"__builtin_ia32_ucomisdgt">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_ucomige_sd : GCCBuiltin<"__builtin_ia32_ucomisdge">, + def int_x86_sse2_ucomige_sd : ClangBuiltin<"__builtin_ia32_ucomisdge">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_ucomineq_sd : GCCBuiltin<"__builtin_ia32_ucomisdneq">, + def int_x86_sse2_ucomineq_sd : ClangBuiltin<"__builtin_ia32_ucomisdneq">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; } // Integer arithmetic ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">, + def int_x86_sse2_pmulhu_w : ClangBuiltin<"__builtin_ia32_pmulhuw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, Commutative]>; - def int_x86_sse2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw128">, + def int_x86_sse2_pmulh_w : ClangBuiltin<"__builtin_ia32_pmulhw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, Commutative]>; - def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">, + def int_x86_sse2_pmadd_wd : ClangBuiltin<"__builtin_ia32_pmaddwd128">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, Commutative]>; - def int_x86_sse2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb128">, + def int_x86_sse2_pavg_b : ClangBuiltin<"__builtin_ia32_pavgb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem, Commutative]>; - def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">, + def int_x86_sse2_pavg_w : ClangBuiltin<"__builtin_ia32_pavgw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, Commutative]>; - def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">, + def int_x86_sse2_psad_bw : ClangBuiltin<"__builtin_ia32_psadbw128">, Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem, Commutative]>; } // Integer shift ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse2_psll_w : GCCBuiltin<"__builtin_ia32_psllw128">, + def int_x86_sse2_psll_w : ClangBuiltin<"__builtin_ia32_psllw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_sse2_psll_d : GCCBuiltin<"__builtin_ia32_pslld128">, + def int_x86_sse2_psll_d : ClangBuiltin<"__builtin_ia32_pslld128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_sse2_psll_q : GCCBuiltin<"__builtin_ia32_psllq128">, + def int_x86_sse2_psll_q : ClangBuiltin<"__builtin_ia32_psllq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_sse2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw128">, + def int_x86_sse2_psrl_w : ClangBuiltin<"__builtin_ia32_psrlw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_sse2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld128">, + def int_x86_sse2_psrl_d : ClangBuiltin<"__builtin_ia32_psrld128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_sse2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq128">, + def int_x86_sse2_psrl_q : ClangBuiltin<"__builtin_ia32_psrlq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_sse2_psra_w : GCCBuiltin<"__builtin_ia32_psraw128">, + def int_x86_sse2_psra_w : ClangBuiltin<"__builtin_ia32_psraw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_sse2_psra_d : GCCBuiltin<"__builtin_ia32_psrad128">, + def int_x86_sse2_psra_d : ClangBuiltin<"__builtin_ia32_psrad128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; // Oddly these don't require an immediate due to a gcc compatibility issue. - def int_x86_sse2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi128">, + def int_x86_sse2_pslli_w : ClangBuiltin<"__builtin_ia32_psllwi128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_sse2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi128">, + def int_x86_sse2_pslli_d : ClangBuiltin<"__builtin_ia32_pslldi128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_sse2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi128">, + def int_x86_sse2_pslli_q : ClangBuiltin<"__builtin_ia32_psllqi128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_sse2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi128">, + def int_x86_sse2_psrli_w : ClangBuiltin<"__builtin_ia32_psrlwi128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_sse2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi128">, + def int_x86_sse2_psrli_d : ClangBuiltin<"__builtin_ia32_psrldi128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_sse2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi128">, + def int_x86_sse2_psrli_q : ClangBuiltin<"__builtin_ia32_psrlqi128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_sse2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi128">, + def int_x86_sse2_psrai_w : ClangBuiltin<"__builtin_ia32_psrawi128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_sse2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi128">, + def int_x86_sse2_psrai_d : ClangBuiltin<"__builtin_ia32_psradi128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; } // Conversion ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse2_cvtpd2dq : GCCBuiltin<"__builtin_ia32_cvtpd2dq">, + def int_x86_sse2_cvtpd2dq : ClangBuiltin<"__builtin_ia32_cvtpd2dq">, Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_cvttpd2dq : GCCBuiltin<"__builtin_ia32_cvttpd2dq">, + def int_x86_sse2_cvttpd2dq : ClangBuiltin<"__builtin_ia32_cvttpd2dq">, Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_cvtpd2ps : GCCBuiltin<"__builtin_ia32_cvtpd2ps">, + def int_x86_sse2_cvtpd2ps : ClangBuiltin<"__builtin_ia32_cvtpd2ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_cvtps2dq : GCCBuiltin<"__builtin_ia32_cvtps2dq">, + def int_x86_sse2_cvtps2dq : ClangBuiltin<"__builtin_ia32_cvtps2dq">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">, + def int_x86_sse2_cvttps2dq : ClangBuiltin<"__builtin_ia32_cvttps2dq">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">, + def int_x86_sse2_cvtsd2si : ClangBuiltin<"__builtin_ia32_cvtsd2si">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">, + def int_x86_sse2_cvtsd2si64 : ClangBuiltin<"__builtin_ia32_cvtsd2si64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_cvttsd2si : GCCBuiltin<"__builtin_ia32_cvttsd2si">, + def int_x86_sse2_cvttsd2si : ClangBuiltin<"__builtin_ia32_cvttsd2si">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_cvttsd2si64">, + def int_x86_sse2_cvttsd2si64 : ClangBuiltin<"__builtin_ia32_cvttsd2si64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">, + def int_x86_sse2_cvtsd2ss : ClangBuiltin<"__builtin_ia32_cvtsd2ss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse_cvtpd2pi : GCCBuiltin<"__builtin_ia32_cvtpd2pi">, + def int_x86_sse_cvtpd2pi : ClangBuiltin<"__builtin_ia32_cvtpd2pi">, Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse_cvttpd2pi: GCCBuiltin<"__builtin_ia32_cvttpd2pi">, + def int_x86_sse_cvttpd2pi: ClangBuiltin<"__builtin_ia32_cvttpd2pi">, Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse_cvtpi2pd : GCCBuiltin<"__builtin_ia32_cvtpi2pd">, + def int_x86_sse_cvtpi2pd : ClangBuiltin<"__builtin_ia32_cvtpi2pd">, Intrinsic<[llvm_v2f64_ty], [llvm_x86mmx_ty], [IntrNoMem]>; } // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse2_packsswb_128 : GCCBuiltin<"__builtin_ia32_packsswb128">, + def int_x86_sse2_packsswb_128 : ClangBuiltin<"__builtin_ia32_packsswb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_sse2_packssdw_128 : GCCBuiltin<"__builtin_ia32_packssdw128">, + def int_x86_sse2_packssdw_128 : ClangBuiltin<"__builtin_ia32_packssdw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_sse2_packuswb_128 : GCCBuiltin<"__builtin_ia32_packuswb128">, + def int_x86_sse2_packuswb_128 : ClangBuiltin<"__builtin_ia32_packuswb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_sse2_movmsk_pd : GCCBuiltin<"__builtin_ia32_movmskpd">, + def int_x86_sse2_movmsk_pd : ClangBuiltin<"__builtin_ia32_movmskpd">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_pmovmskb_128 : GCCBuiltin<"__builtin_ia32_pmovmskb128">, + def int_x86_sse2_pmovmskb_128 : ClangBuiltin<"__builtin_ia32_pmovmskb128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_x86_sse2_maskmov_dqu : GCCBuiltin<"__builtin_ia32_maskmovdqu">, + def int_x86_sse2_maskmov_dqu : ClangBuiltin<"__builtin_ia32_maskmovdqu">, Intrinsic<[], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_ptr_ty], []>; - def int_x86_sse2_clflush : GCCBuiltin<"__builtin_ia32_clflush">, + def int_x86_sse2_clflush : ClangBuiltin<"__builtin_ia32_clflush">, Intrinsic<[], [llvm_ptr_ty], []>; - def int_x86_sse2_lfence : GCCBuiltin<"__builtin_ia32_lfence">, + def int_x86_sse2_lfence : ClangBuiltin<"__builtin_ia32_lfence">, Intrinsic<[], [], []>; - def int_x86_sse2_mfence : GCCBuiltin<"__builtin_ia32_mfence">, + def int_x86_sse2_mfence : ClangBuiltin<"__builtin_ia32_mfence">, Intrinsic<[], [], []>; - def int_x86_sse2_pause : GCCBuiltin<"__builtin_ia32_pause">, + def int_x86_sse2_pause : ClangBuiltin<"__builtin_ia32_pause">, Intrinsic<[], [], []>; } @@ -502,42 +512,42 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Addition / subtraction ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse3_addsub_ps : GCCBuiltin<"__builtin_ia32_addsubps">, + def int_x86_sse3_addsub_ps : ClangBuiltin<"__builtin_ia32_addsubps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse3_addsub_pd : GCCBuiltin<"__builtin_ia32_addsubpd">, + def int_x86_sse3_addsub_pd : ClangBuiltin<"__builtin_ia32_addsubpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; } // Horizontal ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse3_hadd_ps : GCCBuiltin<"__builtin_ia32_haddps">, + def int_x86_sse3_hadd_ps : ClangBuiltin<"__builtin_ia32_haddps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse3_hadd_pd : GCCBuiltin<"__builtin_ia32_haddpd">, + def int_x86_sse3_hadd_pd : ClangBuiltin<"__builtin_ia32_haddpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse3_hsub_ps : GCCBuiltin<"__builtin_ia32_hsubps">, + def int_x86_sse3_hsub_ps : ClangBuiltin<"__builtin_ia32_hsubps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse3_hsub_pd : GCCBuiltin<"__builtin_ia32_hsubpd">, + def int_x86_sse3_hsub_pd : ClangBuiltin<"__builtin_ia32_hsubpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; } // Specialized unaligned load. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse3_ldu_dq : GCCBuiltin<"__builtin_ia32_lddqu">, + def int_x86_sse3_ldu_dq : ClangBuiltin<"__builtin_ia32_lddqu">, Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>; } // Thread synchronization ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse3_monitor : GCCBuiltin<"__builtin_ia32_monitor">, + def int_x86_sse3_monitor : ClangBuiltin<"__builtin_ia32_monitor">, Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_x86_sse3_mwait : GCCBuiltin<"__builtin_ia32_mwait">, + def int_x86_sse3_mwait : ClangBuiltin<"__builtin_ia32_mwait">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; } @@ -547,112 +557,112 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Horizontal arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_ssse3_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw">, + def int_x86_ssse3_phadd_w : ClangBuiltin<"__builtin_ia32_phaddw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_phadd_w_128 : GCCBuiltin<"__builtin_ia32_phaddw128">, + def int_x86_ssse3_phadd_w_128 : ClangBuiltin<"__builtin_ia32_phaddw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_ssse3_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd">, + def int_x86_ssse3_phadd_d : ClangBuiltin<"__builtin_ia32_phaddd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_phadd_d_128 : GCCBuiltin<"__builtin_ia32_phaddd128">, + def int_x86_ssse3_phadd_d_128 : ClangBuiltin<"__builtin_ia32_phaddd128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_ssse3_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw">, + def int_x86_ssse3_phadd_sw : ClangBuiltin<"__builtin_ia32_phaddsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_phadd_sw_128 : GCCBuiltin<"__builtin_ia32_phaddsw128">, + def int_x86_ssse3_phadd_sw_128 : ClangBuiltin<"__builtin_ia32_phaddsw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_ssse3_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw">, + def int_x86_ssse3_phsub_w : ClangBuiltin<"__builtin_ia32_phsubw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_phsub_w_128 : GCCBuiltin<"__builtin_ia32_phsubw128">, + def int_x86_ssse3_phsub_w_128 : ClangBuiltin<"__builtin_ia32_phsubw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_ssse3_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd">, + def int_x86_ssse3_phsub_d : ClangBuiltin<"__builtin_ia32_phsubd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_phsub_d_128 : GCCBuiltin<"__builtin_ia32_phsubd128">, + def int_x86_ssse3_phsub_d_128 : ClangBuiltin<"__builtin_ia32_phsubd128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_ssse3_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw">, + def int_x86_ssse3_phsub_sw : ClangBuiltin<"__builtin_ia32_phsubsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_phsub_sw_128 : GCCBuiltin<"__builtin_ia32_phsubsw128">, + def int_x86_ssse3_phsub_sw_128 : ClangBuiltin<"__builtin_ia32_phsubsw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_ssse3_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw">, + def int_x86_ssse3_pmadd_ub_sw : ClangBuiltin<"__builtin_ia32_pmaddubsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_pmadd_ub_sw_128 : GCCBuiltin<"__builtin_ia32_pmaddubsw128">, + def int_x86_ssse3_pmadd_ub_sw_128 : ClangBuiltin<"__builtin_ia32_pmaddubsw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; } // Packed multiply high with round and scale let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_ssse3_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw">, + def int_x86_ssse3_pmul_hr_sw : ClangBuiltin<"__builtin_ia32_pmulhrsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_ssse3_pmul_hr_sw_128 : GCCBuiltin<"__builtin_ia32_pmulhrsw128">, + def int_x86_ssse3_pmul_hr_sw_128 : ClangBuiltin<"__builtin_ia32_pmulhrsw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, Commutative]>; } // Shuffle ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_ssse3_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb">, + def int_x86_ssse3_pshuf_b : ClangBuiltin<"__builtin_ia32_pshufb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_pshuf_b_128 : GCCBuiltin<"__builtin_ia32_pshufb128">, + def int_x86_ssse3_pshuf_b_128 : ClangBuiltin<"__builtin_ia32_pshufb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_x86_sse_pshuf_w : GCCBuiltin<"__builtin_ia32_pshufw">, + def int_x86_sse_pshuf_w : ClangBuiltin<"__builtin_ia32_pshufw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; } // Sign ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_ssse3_psign_b : GCCBuiltin<"__builtin_ia32_psignb">, + def int_x86_ssse3_psign_b : ClangBuiltin<"__builtin_ia32_psignb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_psign_b_128 : GCCBuiltin<"__builtin_ia32_psignb128">, + def int_x86_ssse3_psign_b_128 : ClangBuiltin<"__builtin_ia32_psignb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_x86_ssse3_psign_w : GCCBuiltin<"__builtin_ia32_psignw">, + def int_x86_ssse3_psign_w : ClangBuiltin<"__builtin_ia32_psignw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_psign_w_128 : GCCBuiltin<"__builtin_ia32_psignw128">, + def int_x86_ssse3_psign_w_128 : ClangBuiltin<"__builtin_ia32_psignw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_ssse3_psign_d : GCCBuiltin<"__builtin_ia32_psignd">, + def int_x86_ssse3_psign_d : ClangBuiltin<"__builtin_ia32_psignd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_psign_d_128 : GCCBuiltin<"__builtin_ia32_psignd128">, + def int_x86_ssse3_psign_d_128 : ClangBuiltin<"__builtin_ia32_psignd128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; } // Absolute value ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_ssse3_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb">, + def int_x86_ssse3_pabs_b : ClangBuiltin<"__builtin_ia32_pabsb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw">, + def int_x86_ssse3_pabs_w : ClangBuiltin<"__builtin_ia32_pabsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd">, + def int_x86_ssse3_pabs_d : ClangBuiltin<"__builtin_ia32_pabsd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; } @@ -661,149 +671,149 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // FP rounding ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse41_round_ss : GCCBuiltin<"__builtin_ia32_roundss">, + def int_x86_sse41_round_ss : ClangBuiltin<"__builtin_ia32_roundss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse41_round_ps : GCCBuiltin<"__builtin_ia32_roundps">, + def int_x86_sse41_round_ps : ClangBuiltin<"__builtin_ia32_roundps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse41_round_sd : GCCBuiltin<"__builtin_ia32_roundsd">, + def int_x86_sse41_round_sd : ClangBuiltin<"__builtin_ia32_roundsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse41_round_pd : GCCBuiltin<"__builtin_ia32_roundpd">, + def int_x86_sse41_round_pd : ClangBuiltin<"__builtin_ia32_roundpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; } // Vector min element let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse41_phminposuw : GCCBuiltin<"__builtin_ia32_phminposuw128">, + def int_x86_sse41_phminposuw : ClangBuiltin<"__builtin_ia32_phminposuw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; } // Advanced Encryption Standard (AES) Instructions let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_aesni_aesimc : GCCBuiltin<"__builtin_ia32_aesimc128">, + def int_x86_aesni_aesimc : ClangBuiltin<"__builtin_ia32_aesimc128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_aesni_aesenc : GCCBuiltin<"__builtin_ia32_aesenc128">, + def int_x86_aesni_aesenc : ClangBuiltin<"__builtin_ia32_aesenc128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_aesni_aesenc_256 : GCCBuiltin<"__builtin_ia32_aesenc256">, + def int_x86_aesni_aesenc_256 : ClangBuiltin<"__builtin_ia32_aesenc256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_aesni_aesenc_512 : GCCBuiltin<"__builtin_ia32_aesenc512">, + def int_x86_aesni_aesenc_512 : ClangBuiltin<"__builtin_ia32_aesenc512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; - def int_x86_aesni_aesenclast : GCCBuiltin<"__builtin_ia32_aesenclast128">, + def int_x86_aesni_aesenclast : ClangBuiltin<"__builtin_ia32_aesenclast128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_aesni_aesenclast_256 : - GCCBuiltin<"__builtin_ia32_aesenclast256">, + ClangBuiltin<"__builtin_ia32_aesenclast256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_aesni_aesenclast_512 : - GCCBuiltin<"__builtin_ia32_aesenclast512">, + ClangBuiltin<"__builtin_ia32_aesenclast512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; - def int_x86_aesni_aesdec : GCCBuiltin<"__builtin_ia32_aesdec128">, + def int_x86_aesni_aesdec : ClangBuiltin<"__builtin_ia32_aesdec128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_aesni_aesdec_256 : GCCBuiltin<"__builtin_ia32_aesdec256">, + def int_x86_aesni_aesdec_256 : ClangBuiltin<"__builtin_ia32_aesdec256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_aesni_aesdec_512 : GCCBuiltin<"__builtin_ia32_aesdec512">, + def int_x86_aesni_aesdec_512 : ClangBuiltin<"__builtin_ia32_aesdec512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; - def int_x86_aesni_aesdeclast : GCCBuiltin<"__builtin_ia32_aesdeclast128">, + def int_x86_aesni_aesdeclast : ClangBuiltin<"__builtin_ia32_aesdeclast128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_aesni_aesdeclast_256 : - GCCBuiltin<"__builtin_ia32_aesdeclast256">, + ClangBuiltin<"__builtin_ia32_aesdeclast256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_aesni_aesdeclast_512 : - GCCBuiltin<"__builtin_ia32_aesdeclast512">, + ClangBuiltin<"__builtin_ia32_aesdeclast512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; def int_x86_aesni_aeskeygenassist : - GCCBuiltin<"__builtin_ia32_aeskeygenassist128">, + ClangBuiltin<"__builtin_ia32_aeskeygenassist128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; } // PCLMUL instructions let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_pclmulqdq : GCCBuiltin<"__builtin_ia32_pclmulqdq128">, + def int_x86_pclmulqdq : ClangBuiltin<"__builtin_ia32_pclmulqdq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_pclmulqdq_256 : GCCBuiltin<"__builtin_ia32_pclmulqdq256">, + def int_x86_pclmulqdq_256 : ClangBuiltin<"__builtin_ia32_pclmulqdq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_pclmulqdq_512 : GCCBuiltin<"__builtin_ia32_pclmulqdq512">, + def int_x86_pclmulqdq_512 : ClangBuiltin<"__builtin_ia32_pclmulqdq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; } // Vector pack let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse41_packusdw : GCCBuiltin<"__builtin_ia32_packusdw128">, + def int_x86_sse41_packusdw : ClangBuiltin<"__builtin_ia32_packusdw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; } // Vector insert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse41_insertps : GCCBuiltin<"__builtin_ia32_insertps128">, + def int_x86_sse41_insertps : ClangBuiltin<"__builtin_ia32_insertps128">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; } // Vector blend let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse41_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb128">, + def int_x86_sse41_pblendvb : ClangBuiltin<"__builtin_ia32_pblendvb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_v16i8_ty], [IntrNoMem]>; - def int_x86_sse41_blendvpd : GCCBuiltin<"__builtin_ia32_blendvpd">, + def int_x86_sse41_blendvpd : ClangBuiltin<"__builtin_ia32_blendvpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse41_blendvps : GCCBuiltin<"__builtin_ia32_blendvps">, + def int_x86_sse41_blendvps : ClangBuiltin<"__builtin_ia32_blendvps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_v4f32_ty], [IntrNoMem]>; } // Vector dot product let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse41_dppd : GCCBuiltin<"__builtin_ia32_dppd">, + def int_x86_sse41_dppd : ClangBuiltin<"__builtin_ia32_dppd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, Commutative, ImmArg>]>; - def int_x86_sse41_dpps : GCCBuiltin<"__builtin_ia32_dpps">, + def int_x86_sse41_dpps : ClangBuiltin<"__builtin_ia32_dpps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, Commutative, ImmArg>]>; } // Vector sum of absolute differences let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">, + def int_x86_sse41_mpsadbw : ClangBuiltin<"__builtin_ia32_mpsadbw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty], [IntrNoMem, ImmArg>]>; } // Test instruction with bitwise comparison. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse41_ptestz : GCCBuiltin<"__builtin_ia32_ptestz128">, + def int_x86_sse41_ptestz : ClangBuiltin<"__builtin_ia32_ptestz128">, Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_sse41_ptestc : GCCBuiltin<"__builtin_ia32_ptestc128">, + def int_x86_sse41_ptestc : ClangBuiltin<"__builtin_ia32_ptestc128">, Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_sse41_ptestnzc : GCCBuiltin<"__builtin_ia32_ptestnzc128">, + def int_x86_sse41_ptestnzc : ClangBuiltin<"__builtin_ia32_ptestnzc128">, Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; } @@ -814,81 +824,81 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Miscellaneous // CRC Instruction let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse42_crc32_32_8 : GCCBuiltin<"__builtin_ia32_crc32qi">, + def int_x86_sse42_crc32_32_8 : ClangBuiltin<"__builtin_ia32_crc32qi">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_sse42_crc32_32_16 : GCCBuiltin<"__builtin_ia32_crc32hi">, + def int_x86_sse42_crc32_32_16 : ClangBuiltin<"__builtin_ia32_crc32hi">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_sse42_crc32_32_32 : GCCBuiltin<"__builtin_ia32_crc32si">, + def int_x86_sse42_crc32_32_32 : ClangBuiltin<"__builtin_ia32_crc32si">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_sse42_crc32_64_64 : GCCBuiltin<"__builtin_ia32_crc32di">, + def int_x86_sse42_crc32_64_64 : ClangBuiltin<"__builtin_ia32_crc32di">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; } // String/text processing ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse42_pcmpistrm128 : GCCBuiltin<"__builtin_ia32_pcmpistrm128">, + def int_x86_sse42_pcmpistrm128 : ClangBuiltin<"__builtin_ia32_pcmpistrm128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse42_pcmpistri128 : GCCBuiltin<"__builtin_ia32_pcmpistri128">, + def int_x86_sse42_pcmpistri128 : ClangBuiltin<"__builtin_ia32_pcmpistri128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse42_pcmpistria128 : GCCBuiltin<"__builtin_ia32_pcmpistria128">, + def int_x86_sse42_pcmpistria128 : ClangBuiltin<"__builtin_ia32_pcmpistria128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse42_pcmpistric128 : GCCBuiltin<"__builtin_ia32_pcmpistric128">, + def int_x86_sse42_pcmpistric128 : ClangBuiltin<"__builtin_ia32_pcmpistric128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse42_pcmpistrio128 : GCCBuiltin<"__builtin_ia32_pcmpistrio128">, + def int_x86_sse42_pcmpistrio128 : ClangBuiltin<"__builtin_ia32_pcmpistrio128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse42_pcmpistris128 : GCCBuiltin<"__builtin_ia32_pcmpistris128">, + def int_x86_sse42_pcmpistris128 : ClangBuiltin<"__builtin_ia32_pcmpistris128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse42_pcmpistriz128 : GCCBuiltin<"__builtin_ia32_pcmpistriz128">, + def int_x86_sse42_pcmpistriz128 : ClangBuiltin<"__builtin_ia32_pcmpistriz128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse42_pcmpestrm128 : GCCBuiltin<"__builtin_ia32_pcmpestrm128">, + def int_x86_sse42_pcmpestrm128 : ClangBuiltin<"__builtin_ia32_pcmpestrm128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse42_pcmpestri128 : GCCBuiltin<"__builtin_ia32_pcmpestri128">, + def int_x86_sse42_pcmpestri128 : ClangBuiltin<"__builtin_ia32_pcmpestri128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse42_pcmpestria128 : GCCBuiltin<"__builtin_ia32_pcmpestria128">, + def int_x86_sse42_pcmpestria128 : ClangBuiltin<"__builtin_ia32_pcmpestria128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse42_pcmpestric128 : GCCBuiltin<"__builtin_ia32_pcmpestric128">, + def int_x86_sse42_pcmpestric128 : ClangBuiltin<"__builtin_ia32_pcmpestric128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse42_pcmpestrio128 : GCCBuiltin<"__builtin_ia32_pcmpestrio128">, + def int_x86_sse42_pcmpestrio128 : ClangBuiltin<"__builtin_ia32_pcmpestrio128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse42_pcmpestris128 : GCCBuiltin<"__builtin_ia32_pcmpestris128">, + def int_x86_sse42_pcmpestris128 : ClangBuiltin<"__builtin_ia32_pcmpestris128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sse42_pcmpestriz128 : GCCBuiltin<"__builtin_ia32_pcmpestriz128">, + def int_x86_sse42_pcmpestriz128 : ClangBuiltin<"__builtin_ia32_pcmpestriz128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], @@ -899,17 +909,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // SSE4A let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse4a_extrqi : GCCBuiltin<"__builtin_ia32_extrqi">, + def int_x86_sse4a_extrqi : ClangBuiltin<"__builtin_ia32_extrqi">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_sse4a_extrq : GCCBuiltin<"__builtin_ia32_extrq">, + def int_x86_sse4a_extrq : ClangBuiltin<"__builtin_ia32_extrq">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_x86_sse4a_insertqi : GCCBuiltin<"__builtin_ia32_insertqi">, + def int_x86_sse4a_insertqi : ClangBuiltin<"__builtin_ia32_insertqi">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_sse4a_insertq : GCCBuiltin<"__builtin_ia32_insertq">, + def int_x86_sse4a_insertq : ClangBuiltin<"__builtin_ia32_insertq">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; } @@ -918,177 +928,177 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_addsub_pd_256 : GCCBuiltin<"__builtin_ia32_addsubpd256">, + def int_x86_avx_addsub_pd_256 : ClangBuiltin<"__builtin_ia32_addsubpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_avx_addsub_ps_256 : GCCBuiltin<"__builtin_ia32_addsubps256">, + def int_x86_avx_addsub_ps_256 : ClangBuiltin<"__builtin_ia32_addsubps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_avx_max_pd_256 : GCCBuiltin<"__builtin_ia32_maxpd256">, + def int_x86_avx_max_pd_256 : ClangBuiltin<"__builtin_ia32_maxpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_avx_max_ps_256 : GCCBuiltin<"__builtin_ia32_maxps256">, + def int_x86_avx_max_ps_256 : ClangBuiltin<"__builtin_ia32_maxps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_avx_min_pd_256 : GCCBuiltin<"__builtin_ia32_minpd256">, + def int_x86_avx_min_pd_256 : ClangBuiltin<"__builtin_ia32_minpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_avx_min_ps_256 : GCCBuiltin<"__builtin_ia32_minps256">, + def int_x86_avx_min_ps_256 : ClangBuiltin<"__builtin_ia32_minps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_avx_rsqrt_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrtps256">, + def int_x86_avx_rsqrt_ps_256 : ClangBuiltin<"__builtin_ia32_rsqrtps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_avx_rcp_ps_256 : GCCBuiltin<"__builtin_ia32_rcpps256">, + def int_x86_avx_rcp_ps_256 : ClangBuiltin<"__builtin_ia32_rcpps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_avx_round_pd_256 : GCCBuiltin<"__builtin_ia32_roundpd256">, + def int_x86_avx_round_pd_256 : ClangBuiltin<"__builtin_ia32_roundpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx_round_ps_256 : GCCBuiltin<"__builtin_ia32_roundps256">, + def int_x86_avx_round_ps_256 : ClangBuiltin<"__builtin_ia32_roundps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; } // Horizontal ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_hadd_pd_256 : GCCBuiltin<"__builtin_ia32_haddpd256">, + def int_x86_avx_hadd_pd_256 : ClangBuiltin<"__builtin_ia32_haddpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_avx_hsub_ps_256 : GCCBuiltin<"__builtin_ia32_hsubps256">, + def int_x86_avx_hsub_ps_256 : ClangBuiltin<"__builtin_ia32_hsubps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_avx_hsub_pd_256 : GCCBuiltin<"__builtin_ia32_hsubpd256">, + def int_x86_avx_hsub_pd_256 : ClangBuiltin<"__builtin_ia32_hsubpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_avx_hadd_ps_256 : GCCBuiltin<"__builtin_ia32_haddps256">, + def int_x86_avx_hadd_ps_256 : ClangBuiltin<"__builtin_ia32_haddps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; } // Vector permutation let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_vpermilvar_pd : GCCBuiltin<"__builtin_ia32_vpermilvarpd">, + def int_x86_avx_vpermilvar_pd : ClangBuiltin<"__builtin_ia32_vpermilvarpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx_vpermilvar_ps : GCCBuiltin<"__builtin_ia32_vpermilvarps">, + def int_x86_avx_vpermilvar_ps : ClangBuiltin<"__builtin_ia32_vpermilvarps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx_vpermilvar_pd_256 : - GCCBuiltin<"__builtin_ia32_vpermilvarpd256">, + ClangBuiltin<"__builtin_ia32_vpermilvarpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx_vpermilvar_ps_256 : - GCCBuiltin<"__builtin_ia32_vpermilvarps256">, + ClangBuiltin<"__builtin_ia32_vpermilvarps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_d_128 : - GCCBuiltin<"__builtin_ia32_vpermi2vard128">, + ClangBuiltin<"__builtin_ia32_vpermi2vard128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_d_256 : - GCCBuiltin<"__builtin_ia32_vpermi2vard256">, + ClangBuiltin<"__builtin_ia32_vpermi2vard256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_d_512 : - GCCBuiltin<"__builtin_ia32_vpermi2vard512">, + ClangBuiltin<"__builtin_ia32_vpermi2vard512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_hi_128 : - GCCBuiltin<"__builtin_ia32_vpermi2varhi128">, + ClangBuiltin<"__builtin_ia32_vpermi2varhi128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_hi_256 : - GCCBuiltin<"__builtin_ia32_vpermi2varhi256">, + ClangBuiltin<"__builtin_ia32_vpermi2varhi256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_hi_512 : - GCCBuiltin<"__builtin_ia32_vpermi2varhi512">, + ClangBuiltin<"__builtin_ia32_vpermi2varhi512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_pd_128 : - GCCBuiltin<"__builtin_ia32_vpermi2varpd128">, + ClangBuiltin<"__builtin_ia32_vpermi2varpd128">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_pd_256 : - GCCBuiltin<"__builtin_ia32_vpermi2varpd256">, + ClangBuiltin<"__builtin_ia32_vpermi2varpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_pd_512 : - GCCBuiltin<"__builtin_ia32_vpermi2varpd512">, + ClangBuiltin<"__builtin_ia32_vpermi2varpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_ps_128 : - GCCBuiltin<"__builtin_ia32_vpermi2varps128">, + ClangBuiltin<"__builtin_ia32_vpermi2varps128">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_ps_256 : - GCCBuiltin<"__builtin_ia32_vpermi2varps256">, + ClangBuiltin<"__builtin_ia32_vpermi2varps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_ps_512 : - GCCBuiltin<"__builtin_ia32_vpermi2varps512">, + ClangBuiltin<"__builtin_ia32_vpermi2varps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_q_128 : - GCCBuiltin<"__builtin_ia32_vpermi2varq128">, + ClangBuiltin<"__builtin_ia32_vpermi2varq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_q_256 : - GCCBuiltin<"__builtin_ia32_vpermi2varq256">, + ClangBuiltin<"__builtin_ia32_vpermi2varq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_q_512 : - GCCBuiltin<"__builtin_ia32_vpermi2varq512">, + ClangBuiltin<"__builtin_ia32_vpermi2varq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_qi_128 : - GCCBuiltin<"__builtin_ia32_vpermi2varqi128">, + ClangBuiltin<"__builtin_ia32_vpermi2varqi128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_qi_256 : - GCCBuiltin<"__builtin_ia32_vpermi2varqi256">, + ClangBuiltin<"__builtin_ia32_vpermi2varqi256">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; def int_x86_avx512_vpermi2var_qi_512 : - GCCBuiltin<"__builtin_ia32_vpermi2varqi512">, + ClangBuiltin<"__builtin_ia32_vpermi2varqi512">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>; def int_x86_avx512_vpermilvar_pd_512 : - GCCBuiltin<"__builtin_ia32_vpermilvarpd512">, + ClangBuiltin<"__builtin_ia32_vpermilvarpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8i64_ty], [IntrNoMem]>; def int_x86_avx512_vpermilvar_ps_512 : - GCCBuiltin<"__builtin_ia32_vpermilvarps512">, + ClangBuiltin<"__builtin_ia32_vpermilvarps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16i32_ty], [IntrNoMem]>; def int_x86_avx512_pshuf_b_512 : - GCCBuiltin<"__builtin_ia32_pshufb512">, + ClangBuiltin<"__builtin_ia32_pshufb512">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>; @@ -1097,49 +1107,49 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // GFNI Instructions let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_vgf2p8affineinvqb_128 : - GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v16qi">, + ClangBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v16qi">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_vgf2p8affineinvqb_256 : - GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v32qi">, + ClangBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v32qi">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_vgf2p8affineinvqb_512 : - GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v64qi">, + ClangBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v64qi">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_vgf2p8affineqb_128 : - GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v16qi">, + ClangBuiltin<"__builtin_ia32_vgf2p8affineqb_v16qi">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_vgf2p8affineqb_256 : - GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v32qi">, + ClangBuiltin<"__builtin_ia32_vgf2p8affineqb_v32qi">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_vgf2p8affineqb_512 : - GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v64qi">, + ClangBuiltin<"__builtin_ia32_vgf2p8affineqb_v64qi">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_vgf2p8mulb_128 : - GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v16qi">, + ClangBuiltin<"__builtin_ia32_vgf2p8mulb_v16qi">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_vgf2p8mulb_256 : - GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v32qi">, + ClangBuiltin<"__builtin_ia32_vgf2p8mulb_v32qi">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; def int_x86_vgf2p8mulb_512 : - GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v64qi">, + ClangBuiltin<"__builtin_ia32_vgf2p8mulb_v64qi">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>; @@ -1147,17 +1157,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector blend let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">, + def int_x86_avx_blendv_pd_256 : ClangBuiltin<"__builtin_ia32_blendvpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_avx_blendv_ps_256 : GCCBuiltin<"__builtin_ia32_blendvps256">, + def int_x86_avx_blendv_ps_256 : ClangBuiltin<"__builtin_ia32_blendvps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; } // Vector dot product let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">, + def int_x86_avx_dp_ps_256 : ClangBuiltin<"__builtin_ia32_dpps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem, Commutative, ImmArg>]>; @@ -1175,63 +1185,63 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector convert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_cvt_pd2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256">, + def int_x86_avx_cvt_pd2_ps_256 : ClangBuiltin<"__builtin_ia32_cvtpd2ps256">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">, + def int_x86_avx_cvt_ps2dq_256 : ClangBuiltin<"__builtin_ia32_cvtps2dq256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">, + def int_x86_avx_cvtt_pd2dq_256 : ClangBuiltin<"__builtin_ia32_cvttpd2dq256">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">, + def int_x86_avx_cvt_pd2dq_256 : ClangBuiltin<"__builtin_ia32_cvtpd2dq256">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_avx_cvtt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvttps2dq256">, + def int_x86_avx_cvtt_ps2dq_256 : ClangBuiltin<"__builtin_ia32_cvttps2dq256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; } // Vector bit test let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_vtestz_pd : GCCBuiltin<"__builtin_ia32_vtestzpd">, + def int_x86_avx_vtestz_pd : ClangBuiltin<"__builtin_ia32_vtestzpd">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_avx_vtestc_pd : GCCBuiltin<"__builtin_ia32_vtestcpd">, + def int_x86_avx_vtestc_pd : ClangBuiltin<"__builtin_ia32_vtestcpd">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_avx_vtestnzc_pd : GCCBuiltin<"__builtin_ia32_vtestnzcpd">, + def int_x86_avx_vtestnzc_pd : ClangBuiltin<"__builtin_ia32_vtestnzcpd">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_avx_vtestz_ps : GCCBuiltin<"__builtin_ia32_vtestzps">, + def int_x86_avx_vtestz_ps : ClangBuiltin<"__builtin_ia32_vtestzps">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_avx_vtestc_ps : GCCBuiltin<"__builtin_ia32_vtestcps">, + def int_x86_avx_vtestc_ps : ClangBuiltin<"__builtin_ia32_vtestcps">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_avx_vtestnzc_ps : GCCBuiltin<"__builtin_ia32_vtestnzcps">, + def int_x86_avx_vtestnzc_ps : ClangBuiltin<"__builtin_ia32_vtestnzcps">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_avx_vtestz_pd_256 : GCCBuiltin<"__builtin_ia32_vtestzpd256">, + def int_x86_avx_vtestz_pd_256 : ClangBuiltin<"__builtin_ia32_vtestzpd256">, Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_avx_vtestc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestcpd256">, + def int_x86_avx_vtestc_pd_256 : ClangBuiltin<"__builtin_ia32_vtestcpd256">, Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_avx_vtestnzc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestnzcpd256">, + def int_x86_avx_vtestnzc_pd_256 : ClangBuiltin<"__builtin_ia32_vtestnzcpd256">, Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_avx_vtestz_ps_256 : GCCBuiltin<"__builtin_ia32_vtestzps256">, + def int_x86_avx_vtestz_ps_256 : ClangBuiltin<"__builtin_ia32_vtestzps256">, Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_avx_vtestc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestcps256">, + def int_x86_avx_vtestc_ps_256 : ClangBuiltin<"__builtin_ia32_vtestcps256">, Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_avx_vtestnzc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestnzcps256">, + def int_x86_avx_vtestnzc_ps_256 : ClangBuiltin<"__builtin_ia32_vtestnzcps256">, Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_avx_ptestz_256 : GCCBuiltin<"__builtin_ia32_ptestz256">, + def int_x86_avx_ptestz_256 : ClangBuiltin<"__builtin_ia32_ptestz256">, Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_avx_ptestc_256 : GCCBuiltin<"__builtin_ia32_ptestc256">, + def int_x86_avx_ptestc_256 : ClangBuiltin<"__builtin_ia32_ptestc256">, Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">, + def int_x86_avx_ptestnzc_256 : ClangBuiltin<"__builtin_ia32_ptestnzc256">, Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; @@ -1254,67 +1264,67 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_fpclass_sd : - GCCBuiltin<"__builtin_ia32_fpclasssd_mask">, + ClangBuiltin<"__builtin_ia32_fpclasssd_mask">, Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_fpclass_ss : - GCCBuiltin<"__builtin_ia32_fpclassss_mask">, + ClangBuiltin<"__builtin_ia32_fpclassss_mask">, Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; } // Vector extract sign mask let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_movmsk_pd_256 : GCCBuiltin<"__builtin_ia32_movmskpd256">, + def int_x86_avx_movmsk_pd_256 : ClangBuiltin<"__builtin_ia32_movmskpd256">, Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_avx_movmsk_ps_256 : GCCBuiltin<"__builtin_ia32_movmskps256">, + def int_x86_avx_movmsk_ps_256 : ClangBuiltin<"__builtin_ia32_movmskps256">, Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; } // Vector zero let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_vzeroall : GCCBuiltin<"__builtin_ia32_vzeroall">, + def int_x86_avx_vzeroall : ClangBuiltin<"__builtin_ia32_vzeroall">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; - def int_x86_avx_vzeroupper : GCCBuiltin<"__builtin_ia32_vzeroupper">, + def int_x86_avx_vzeroupper : ClangBuiltin<"__builtin_ia32_vzeroupper">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; } // SIMD load ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_ldu_dq_256 : GCCBuiltin<"__builtin_ia32_lddqu256">, + def int_x86_avx_ldu_dq_256 : ClangBuiltin<"__builtin_ia32_lddqu256">, Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>; } // Conditional load ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">, + def int_x86_avx_maskload_pd : ClangBuiltin<"__builtin_ia32_maskloadpd">, Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2i64_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx_maskload_ps : GCCBuiltin<"__builtin_ia32_maskloadps">, + def int_x86_avx_maskload_ps : ClangBuiltin<"__builtin_ia32_maskloadps">, Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4i32_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx_maskload_pd_256 : GCCBuiltin<"__builtin_ia32_maskloadpd256">, + def int_x86_avx_maskload_pd_256 : ClangBuiltin<"__builtin_ia32_maskloadpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4i64_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">, + def int_x86_avx_maskload_ps_256 : ClangBuiltin<"__builtin_ia32_maskloadps256">, Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8i32_ty], [IntrReadMem, IntrArgMemOnly]>; } // Conditional store ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">, + def int_x86_avx_maskstore_pd : ClangBuiltin<"__builtin_ia32_maskstorepd">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty], [IntrArgMemOnly]>; - def int_x86_avx_maskstore_ps : GCCBuiltin<"__builtin_ia32_maskstoreps">, + def int_x86_avx_maskstore_ps : ClangBuiltin<"__builtin_ia32_maskstoreps">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty], [IntrArgMemOnly]>; def int_x86_avx_maskstore_pd_256 : - GCCBuiltin<"__builtin_ia32_maskstorepd256">, + ClangBuiltin<"__builtin_ia32_maskstorepd256">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty], [IntrArgMemOnly]>; def int_x86_avx_maskstore_ps_256 : - GCCBuiltin<"__builtin_ia32_maskstoreps256">, + ClangBuiltin<"__builtin_ia32_maskstoreps256">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty], [IntrArgMemOnly]>; } @@ -1334,229 +1344,229 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Integer arithmetic ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">, + def int_x86_avx2_pmulhu_w : ClangBuiltin<"__builtin_ia32_pmulhuw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem, Commutative]>; - def int_x86_avx2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw256">, + def int_x86_avx2_pmulh_w : ClangBuiltin<"__builtin_ia32_pmulhw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem, Commutative]>; - def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">, + def int_x86_avx2_pmadd_wd : ClangBuiltin<"__builtin_ia32_pmaddwd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem, Commutative]>; - def int_x86_avx2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb256">, + def int_x86_avx2_pavg_b : ClangBuiltin<"__builtin_ia32_pavgb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem, Commutative]>; - def int_x86_avx2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw256">, + def int_x86_avx2_pavg_w : ClangBuiltin<"__builtin_ia32_pavgw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem, Commutative]>; - def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">, + def int_x86_avx2_psad_bw : ClangBuiltin<"__builtin_ia32_psadbw256">, Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem, Commutative]>; } // Integer shift ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_psll_w : GCCBuiltin<"__builtin_ia32_psllw256">, + def int_x86_avx2_psll_w : ClangBuiltin<"__builtin_ia32_psllw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx2_psll_d : GCCBuiltin<"__builtin_ia32_pslld256">, + def int_x86_avx2_psll_d : ClangBuiltin<"__builtin_ia32_pslld256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx2_psll_q : GCCBuiltin<"__builtin_ia32_psllq256">, + def int_x86_avx2_psll_q : ClangBuiltin<"__builtin_ia32_psllq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw256">, + def int_x86_avx2_psrl_w : ClangBuiltin<"__builtin_ia32_psrlw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld256">, + def int_x86_avx2_psrl_d : ClangBuiltin<"__builtin_ia32_psrld256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq256">, + def int_x86_avx2_psrl_q : ClangBuiltin<"__builtin_ia32_psrlq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx2_psra_w : GCCBuiltin<"__builtin_ia32_psraw256">, + def int_x86_avx2_psra_w : ClangBuiltin<"__builtin_ia32_psraw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx2_psra_d : GCCBuiltin<"__builtin_ia32_psrad256">, + def int_x86_avx2_psra_d : ClangBuiltin<"__builtin_ia32_psrad256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v4i32_ty], [IntrNoMem]>; // Oddly these don't require an immediate due to a gcc compatibility issue. - def int_x86_avx2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi256">, + def int_x86_avx2_pslli_w : ClangBuiltin<"__builtin_ia32_psllwi256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi256">, + def int_x86_avx2_pslli_d : ClangBuiltin<"__builtin_ia32_pslldi256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi256">, + def int_x86_avx2_pslli_q : ClangBuiltin<"__builtin_ia32_psllqi256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi256">, + def int_x86_avx2_psrli_w : ClangBuiltin<"__builtin_ia32_psrlwi256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi256">, + def int_x86_avx2_psrli_d : ClangBuiltin<"__builtin_ia32_psrldi256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi256">, + def int_x86_avx2_psrli_q : ClangBuiltin<"__builtin_ia32_psrlqi256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi256">, + def int_x86_avx2_psrai_w : ClangBuiltin<"__builtin_ia32_psrawi256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi256">, + def int_x86_avx2_psrai_d : ClangBuiltin<"__builtin_ia32_psradi256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_psra_q_128 : GCCBuiltin<"__builtin_ia32_psraq128">, + def int_x86_avx512_psra_q_128 : ClangBuiltin<"__builtin_ia32_psraq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx512_psra_q_256 : GCCBuiltin<"__builtin_ia32_psraq256">, + def int_x86_avx512_psra_q_256 : ClangBuiltin<"__builtin_ia32_psraq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>; // Oddly these don't require an immediate due to a gcc compatibility issue. - def int_x86_avx512_psrai_q_128 : GCCBuiltin<"__builtin_ia32_psraqi128">, + def int_x86_avx512_psrai_q_128 : ClangBuiltin<"__builtin_ia32_psraqi128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_psrai_q_256 : GCCBuiltin<"__builtin_ia32_psraqi256">, + def int_x86_avx512_psrai_q_256 : ClangBuiltin<"__builtin_ia32_psraqi256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_psll_w_512 : GCCBuiltin<"__builtin_ia32_psllw512">, + def int_x86_avx512_psll_w_512 : ClangBuiltin<"__builtin_ia32_psllw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx512_psll_d_512 : GCCBuiltin<"__builtin_ia32_pslld512">, + def int_x86_avx512_psll_d_512 : ClangBuiltin<"__builtin_ia32_pslld512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx512_psll_q_512 : GCCBuiltin<"__builtin_ia32_psllq512">, + def int_x86_avx512_psll_q_512 : ClangBuiltin<"__builtin_ia32_psllq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx512_psrl_w_512 : GCCBuiltin<"__builtin_ia32_psrlw512">, + def int_x86_avx512_psrl_w_512 : ClangBuiltin<"__builtin_ia32_psrlw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx512_psrl_d_512 : GCCBuiltin<"__builtin_ia32_psrld512">, + def int_x86_avx512_psrl_d_512 : ClangBuiltin<"__builtin_ia32_psrld512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx512_psrl_q_512 : GCCBuiltin<"__builtin_ia32_psrlq512">, + def int_x86_avx512_psrl_q_512 : ClangBuiltin<"__builtin_ia32_psrlq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx512_psra_w_512 : GCCBuiltin<"__builtin_ia32_psraw512">, + def int_x86_avx512_psra_w_512 : ClangBuiltin<"__builtin_ia32_psraw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx512_psra_d_512 : GCCBuiltin<"__builtin_ia32_psrad512">, + def int_x86_avx512_psra_d_512 : ClangBuiltin<"__builtin_ia32_psrad512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx512_psra_q_512 : GCCBuiltin<"__builtin_ia32_psraq512">, + def int_x86_avx512_psra_q_512 : ClangBuiltin<"__builtin_ia32_psraq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v2i64_ty], [IntrNoMem]>; // Oddly these don't require an immediate due to a gcc compatibility issue. - def int_x86_avx512_pslli_w_512 : GCCBuiltin<"__builtin_ia32_psllwi512">, + def int_x86_avx512_pslli_w_512 : ClangBuiltin<"__builtin_ia32_psllwi512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_pslli_d_512 : GCCBuiltin<"__builtin_ia32_pslldi512">, + def int_x86_avx512_pslli_d_512 : ClangBuiltin<"__builtin_ia32_pslldi512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_pslli_q_512 : GCCBuiltin<"__builtin_ia32_psllqi512">, + def int_x86_avx512_pslli_q_512 : ClangBuiltin<"__builtin_ia32_psllqi512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_psrli_w_512 : GCCBuiltin<"__builtin_ia32_psrlwi512">, + def int_x86_avx512_psrli_w_512 : ClangBuiltin<"__builtin_ia32_psrlwi512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_psrli_d_512 : GCCBuiltin<"__builtin_ia32_psrldi512">, + def int_x86_avx512_psrli_d_512 : ClangBuiltin<"__builtin_ia32_psrldi512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_psrli_q_512 : GCCBuiltin<"__builtin_ia32_psrlqi512">, + def int_x86_avx512_psrli_q_512 : ClangBuiltin<"__builtin_ia32_psrlqi512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_psrai_w_512 : GCCBuiltin<"__builtin_ia32_psrawi512">, + def int_x86_avx512_psrai_w_512 : ClangBuiltin<"__builtin_ia32_psrawi512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_psrai_d_512 : GCCBuiltin<"__builtin_ia32_psradi512">, + def int_x86_avx512_psrai_d_512 : ClangBuiltin<"__builtin_ia32_psradi512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_psrai_q_512 : GCCBuiltin<"__builtin_ia32_psraqi512">, + def int_x86_avx512_psrai_q_512 : ClangBuiltin<"__builtin_ia32_psraqi512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_pmultishift_qb_128: - GCCBuiltin<"__builtin_ia32_vpmultishiftqb128">, + ClangBuiltin<"__builtin_ia32_vpmultishiftqb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_avx512_pmultishift_qb_256: - GCCBuiltin<"__builtin_ia32_vpmultishiftqb256">, + ClangBuiltin<"__builtin_ia32_vpmultishiftqb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; def int_x86_avx512_pmultishift_qb_512: - GCCBuiltin<"__builtin_ia32_vpmultishiftqb512">, + ClangBuiltin<"__builtin_ia32_vpmultishiftqb512">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>; } // Pack ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_packsswb : GCCBuiltin<"__builtin_ia32_packsswb256">, + def int_x86_avx2_packsswb : ClangBuiltin<"__builtin_ia32_packsswb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx2_packssdw : GCCBuiltin<"__builtin_ia32_packssdw256">, + def int_x86_avx2_packssdw : ClangBuiltin<"__builtin_ia32_packssdw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx2_packuswb : GCCBuiltin<"__builtin_ia32_packuswb256">, + def int_x86_avx2_packuswb : ClangBuiltin<"__builtin_ia32_packuswb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx2_packusdw : GCCBuiltin<"__builtin_ia32_packusdw256">, + def int_x86_avx2_packusdw : ClangBuiltin<"__builtin_ia32_packusdw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; } // Horizontal arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw256">, + def int_x86_avx2_phadd_w : ClangBuiltin<"__builtin_ia32_phaddw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx2_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd256">, + def int_x86_avx2_phadd_d : ClangBuiltin<"__builtin_ia32_phaddd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx2_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw256">, + def int_x86_avx2_phadd_sw : ClangBuiltin<"__builtin_ia32_phaddsw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx2_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw256">, + def int_x86_avx2_phsub_w : ClangBuiltin<"__builtin_ia32_phsubw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx2_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd256">, + def int_x86_avx2_phsub_d : ClangBuiltin<"__builtin_ia32_phsubd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx2_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw256">, + def int_x86_avx2_phsub_sw : ClangBuiltin<"__builtin_ia32_phsubsw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx2_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw256">, + def int_x86_avx2_pmadd_ub_sw : ClangBuiltin<"__builtin_ia32_pmaddubsw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; } // Sign ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_psign_b : GCCBuiltin<"__builtin_ia32_psignb256">, + def int_x86_avx2_psign_b : ClangBuiltin<"__builtin_ia32_psignb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; - def int_x86_avx2_psign_w : GCCBuiltin<"__builtin_ia32_psignw256">, + def int_x86_avx2_psign_w : ClangBuiltin<"__builtin_ia32_psignw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx2_psign_d : GCCBuiltin<"__builtin_ia32_psignd256">, + def int_x86_avx2_psign_d : ClangBuiltin<"__builtin_ia32_psignd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; } // Packed multiply high with round and scale let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw256">, + def int_x86_avx2_pmul_hr_sw : ClangBuiltin<"__builtin_ia32_pmulhrsw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem, Commutative]>; - def int_x86_avx512_pmul_hr_sw_512 : GCCBuiltin<"__builtin_ia32_pmulhrsw512">, + def int_x86_avx512_pmul_hr_sw_512 : ClangBuiltin<"__builtin_ia32_pmulhrsw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem, Commutative]>; } // Vector blend let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb256">, + def int_x86_avx2_pblendvb : ClangBuiltin<"__builtin_ia32_pblendvb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; } @@ -1564,137 +1574,137 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector permutation let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_permd : GCCBuiltin<"__builtin_ia32_permvarsi256">, + def int_x86_avx2_permd : ClangBuiltin<"__builtin_ia32_permvarsi256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">, + def int_x86_avx2_permps : ClangBuiltin<"__builtin_ia32_permvarsf256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>; } // Conditional load ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">, + def int_x86_avx2_maskload_d : ClangBuiltin<"__builtin_ia32_maskloadd">, Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx2_maskload_q : GCCBuiltin<"__builtin_ia32_maskloadq">, + def int_x86_avx2_maskload_q : ClangBuiltin<"__builtin_ia32_maskloadq">, Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx2_maskload_d_256 : GCCBuiltin<"__builtin_ia32_maskloadd256">, + def int_x86_avx2_maskload_d_256 : ClangBuiltin<"__builtin_ia32_maskloadd256">, Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">, + def int_x86_avx2_maskload_q_256 : ClangBuiltin<"__builtin_ia32_maskloadq256">, Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty], [IntrReadMem, IntrArgMemOnly]>; } // Conditional store ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_maskstore_d : GCCBuiltin<"__builtin_ia32_maskstored">, + def int_x86_avx2_maskstore_d : ClangBuiltin<"__builtin_ia32_maskstored">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrArgMemOnly]>; - def int_x86_avx2_maskstore_q : GCCBuiltin<"__builtin_ia32_maskstoreq">, + def int_x86_avx2_maskstore_q : ClangBuiltin<"__builtin_ia32_maskstoreq">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrArgMemOnly]>; def int_x86_avx2_maskstore_d_256 : - GCCBuiltin<"__builtin_ia32_maskstored256">, + ClangBuiltin<"__builtin_ia32_maskstored256">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrArgMemOnly]>; def int_x86_avx2_maskstore_q_256 : - GCCBuiltin<"__builtin_ia32_maskstoreq256">, + ClangBuiltin<"__builtin_ia32_maskstoreq256">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrArgMemOnly]>; } // Variable bit shift ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_psllv_d : GCCBuiltin<"__builtin_ia32_psllv4si">, + def int_x86_avx2_psllv_d : ClangBuiltin<"__builtin_ia32_psllv4si">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx2_psllv_d_256 : GCCBuiltin<"__builtin_ia32_psllv8si">, + def int_x86_avx2_psllv_d_256 : ClangBuiltin<"__builtin_ia32_psllv8si">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx2_psllv_q : GCCBuiltin<"__builtin_ia32_psllv2di">, + def int_x86_avx2_psllv_q : ClangBuiltin<"__builtin_ia32_psllv2di">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx2_psllv_q_256 : GCCBuiltin<"__builtin_ia32_psllv4di">, + def int_x86_avx2_psllv_q_256 : ClangBuiltin<"__builtin_ia32_psllv4di">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_avx512_psllv_d_512 : GCCBuiltin<"__builtin_ia32_psllv16si">, + def int_x86_avx512_psllv_d_512 : ClangBuiltin<"__builtin_ia32_psllv16si">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; - def int_x86_avx512_psllv_q_512 : GCCBuiltin<"__builtin_ia32_psllv8di">, + def int_x86_avx512_psllv_q_512 : ClangBuiltin<"__builtin_ia32_psllv8di">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; - def int_x86_avx2_psrlv_d : GCCBuiltin<"__builtin_ia32_psrlv4si">, + def int_x86_avx2_psrlv_d : ClangBuiltin<"__builtin_ia32_psrlv4si">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx2_psrlv_d_256 : GCCBuiltin<"__builtin_ia32_psrlv8si">, + def int_x86_avx2_psrlv_d_256 : ClangBuiltin<"__builtin_ia32_psrlv8si">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx2_psrlv_q : GCCBuiltin<"__builtin_ia32_psrlv2di">, + def int_x86_avx2_psrlv_q : ClangBuiltin<"__builtin_ia32_psrlv2di">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx2_psrlv_q_256 : GCCBuiltin<"__builtin_ia32_psrlv4di">, + def int_x86_avx2_psrlv_q_256 : ClangBuiltin<"__builtin_ia32_psrlv4di">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_avx512_psrlv_d_512 : GCCBuiltin<"__builtin_ia32_psrlv16si">, + def int_x86_avx512_psrlv_d_512 : ClangBuiltin<"__builtin_ia32_psrlv16si">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; - def int_x86_avx512_psrlv_q_512 : GCCBuiltin<"__builtin_ia32_psrlv8di">, + def int_x86_avx512_psrlv_q_512 : ClangBuiltin<"__builtin_ia32_psrlv8di">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; - def int_x86_avx2_psrav_d : GCCBuiltin<"__builtin_ia32_psrav4si">, + def int_x86_avx2_psrav_d : ClangBuiltin<"__builtin_ia32_psrav4si">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx2_psrav_d_256 : GCCBuiltin<"__builtin_ia32_psrav8si">, + def int_x86_avx2_psrav_d_256 : ClangBuiltin<"__builtin_ia32_psrav8si">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx512_psrav_d_512 : GCCBuiltin<"__builtin_ia32_psrav16si">, + def int_x86_avx512_psrav_d_512 : ClangBuiltin<"__builtin_ia32_psrav16si">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; - def int_x86_avx512_psrav_q_128 : GCCBuiltin<"__builtin_ia32_psravq128">, + def int_x86_avx512_psrav_q_128 : ClangBuiltin<"__builtin_ia32_psravq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx512_psrav_q_256 : GCCBuiltin<"__builtin_ia32_psravq256">, + def int_x86_avx512_psrav_q_256 : ClangBuiltin<"__builtin_ia32_psravq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_avx512_psrav_q_512 : GCCBuiltin<"__builtin_ia32_psrav8di">, + def int_x86_avx512_psrav_q_512 : ClangBuiltin<"__builtin_ia32_psrav8di">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; - def int_x86_avx512_psllv_w_128 : GCCBuiltin<"__builtin_ia32_psllv8hi">, + def int_x86_avx512_psllv_w_128 : ClangBuiltin<"__builtin_ia32_psllv8hi">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx512_psllv_w_256 : GCCBuiltin<"__builtin_ia32_psllv16hi">, + def int_x86_avx512_psllv_w_256 : ClangBuiltin<"__builtin_ia32_psllv16hi">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx512_psllv_w_512 : GCCBuiltin<"__builtin_ia32_psllv32hi">, + def int_x86_avx512_psllv_w_512 : ClangBuiltin<"__builtin_ia32_psllv32hi">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem]>; - def int_x86_avx512_psrlv_w_128 : GCCBuiltin<"__builtin_ia32_psrlv8hi">, + def int_x86_avx512_psrlv_w_128 : ClangBuiltin<"__builtin_ia32_psrlv8hi">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx512_psrlv_w_256 : GCCBuiltin<"__builtin_ia32_psrlv16hi">, + def int_x86_avx512_psrlv_w_256 : ClangBuiltin<"__builtin_ia32_psrlv16hi">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx512_psrlv_w_512 : GCCBuiltin<"__builtin_ia32_psrlv32hi">, + def int_x86_avx512_psrlv_w_512 : ClangBuiltin<"__builtin_ia32_psrlv32hi">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem]>; - def int_x86_avx512_psrav_w_128 : GCCBuiltin<"__builtin_ia32_psrav8hi">, + def int_x86_avx512_psrav_w_128 : ClangBuiltin<"__builtin_ia32_psrav8hi">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx512_psrav_w_256 : GCCBuiltin<"__builtin_ia32_psrav16hi">, + def int_x86_avx512_psrav_w_256 : ClangBuiltin<"__builtin_ia32_psrav16hi">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx512_psrav_w_512 : GCCBuiltin<"__builtin_ia32_psrav32hi">, + def int_x86_avx512_psrav_w_512 : ClangBuiltin<"__builtin_ia32_psrav32hi">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem]>; } @@ -1703,68 +1713,68 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // NOTE: These can't be ArgMemOnly because you can put the address completely // in the index register. - def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">, + def int_x86_avx2_gather_d_pd : ClangBuiltin<"__builtin_ia32_gatherd_pd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">, + def int_x86_avx2_gather_d_pd_256 : ClangBuiltin<"__builtin_ia32_gatherd_pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">, + def int_x86_avx2_gather_q_pd : ClangBuiltin<"__builtin_ia32_gatherq_pd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">, + def int_x86_avx2_gather_q_pd_256 : ClangBuiltin<"__builtin_ia32_gatherq_pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">, + def int_x86_avx2_gather_d_ps : ClangBuiltin<"__builtin_ia32_gatherd_ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">, + def int_x86_avx2_gather_d_ps_256 : ClangBuiltin<"__builtin_ia32_gatherd_ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">, + def int_x86_avx2_gather_q_ps : ClangBuiltin<"__builtin_ia32_gatherq_ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">, + def int_x86_avx2_gather_q_ps_256 : ClangBuiltin<"__builtin_ia32_gatherq_ps256">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">, + def int_x86_avx2_gather_d_q : ClangBuiltin<"__builtin_ia32_gatherd_q">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">, + def int_x86_avx2_gather_d_q_256 : ClangBuiltin<"__builtin_ia32_gatherd_q256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">, + def int_x86_avx2_gather_q_q : ClangBuiltin<"__builtin_ia32_gatherq_q">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">, + def int_x86_avx2_gather_q_q_256 : ClangBuiltin<"__builtin_ia32_gatherq_q256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">, + def int_x86_avx2_gather_d_d : ClangBuiltin<"__builtin_ia32_gatherd_d">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">, + def int_x86_avx2_gather_d_d_256 : ClangBuiltin<"__builtin_ia32_gatherd_d256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">, + def int_x86_avx2_gather_q_d : ClangBuiltin<"__builtin_ia32_gatherq_d">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; - def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">, + def int_x86_avx2_gather_q_d_256 : ClangBuiltin<"__builtin_ia32_gatherq_d256">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrReadMem, ImmArg>]>; @@ -1772,12 +1782,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">, + def int_x86_avx2_pmovmskb : ClangBuiltin<"__builtin_ia32_pmovmskb256">, Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; - def int_x86_avx2_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb256">, + def int_x86_avx2_pshuf_b : ClangBuiltin<"__builtin_ia32_pshufb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; - def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">, + def int_x86_avx2_mpsadbw : ClangBuiltin<"__builtin_ia32_mpsadbw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; } @@ -1786,21 +1796,21 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // FMA3 and FMA4 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">, + def int_x86_fma_vfmaddsub_ps : ClangBuiltin<"__builtin_ia32_vfmaddsubps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_fma_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">, + def int_x86_fma_vfmaddsub_pd : ClangBuiltin<"__builtin_ia32_vfmaddsubpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_fma_vfmaddsub_ps_256 : - GCCBuiltin<"__builtin_ia32_vfmaddsubps256">, + ClangBuiltin<"__builtin_ia32_vfmaddsubps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_fma_vfmaddsub_pd_256 : - GCCBuiltin<"__builtin_ia32_vfmaddsubpd256">, + ClangBuiltin<"__builtin_ia32_vfmaddsubpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; @@ -1835,27 +1845,27 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [IntrNoMem, ImmArg>]>; def int_x86_avx512_vpmadd52h_uq_128 : - GCCBuiltin<"__builtin_ia32_vpmadd52huq128">, + ClangBuiltin<"__builtin_ia32_vpmadd52huq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_vpmadd52l_uq_128 : - GCCBuiltin<"__builtin_ia32_vpmadd52luq128">, + ClangBuiltin<"__builtin_ia32_vpmadd52luq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_vpmadd52h_uq_256 : - GCCBuiltin<"__builtin_ia32_vpmadd52huq256">, + ClangBuiltin<"__builtin_ia32_vpmadd52huq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx512_vpmadd52l_uq_256 : - GCCBuiltin<"__builtin_ia32_vpmadd52luq256">, + ClangBuiltin<"__builtin_ia32_vpmadd52luq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx512_vpmadd52h_uq_512 : - GCCBuiltin<"__builtin_ia32_vpmadd52huq512">, + ClangBuiltin<"__builtin_ia32_vpmadd52huq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; def int_x86_avx512_vpmadd52l_uq_512 : - GCCBuiltin<"__builtin_ia32_vpmadd52luq512">, + ClangBuiltin<"__builtin_ia32_vpmadd52luq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; } @@ -1863,54 +1873,54 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // VNNI let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_vpdpbusd_128 : - GCCBuiltin<"__builtin_ia32_vpdpbusd128">, + ClangBuiltin<"__builtin_ia32_vpdpbusd128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx512_vpdpbusd_256 : - GCCBuiltin<"__builtin_ia32_vpdpbusd256">, + ClangBuiltin<"__builtin_ia32_vpdpbusd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx512_vpdpbusd_512 : - GCCBuiltin<"__builtin_ia32_vpdpbusd512">, + ClangBuiltin<"__builtin_ia32_vpdpbusd512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; def int_x86_avx512_vpdpbusds_128 : - GCCBuiltin<"__builtin_ia32_vpdpbusds128">, + ClangBuiltin<"__builtin_ia32_vpdpbusds128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx512_vpdpbusds_256 : - GCCBuiltin<"__builtin_ia32_vpdpbusds256">, + ClangBuiltin<"__builtin_ia32_vpdpbusds256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx512_vpdpbusds_512 : - GCCBuiltin<"__builtin_ia32_vpdpbusds512">, + ClangBuiltin<"__builtin_ia32_vpdpbusds512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; def int_x86_avx512_vpdpwssd_128 : - GCCBuiltin<"__builtin_ia32_vpdpwssd128">, + ClangBuiltin<"__builtin_ia32_vpdpwssd128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx512_vpdpwssd_256 : - GCCBuiltin<"__builtin_ia32_vpdpwssd256">, + ClangBuiltin<"__builtin_ia32_vpdpwssd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx512_vpdpwssd_512 : - GCCBuiltin<"__builtin_ia32_vpdpwssd512">, + ClangBuiltin<"__builtin_ia32_vpdpwssd512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; def int_x86_avx512_vpdpwssds_128 : - GCCBuiltin<"__builtin_ia32_vpdpwssds128">, + ClangBuiltin<"__builtin_ia32_vpdpwssds128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx512_vpdpwssds_256 : - GCCBuiltin<"__builtin_ia32_vpdpwssds256">, + ClangBuiltin<"__builtin_ia32_vpdpwssds256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx512_vpdpwssds_512 : - GCCBuiltin<"__builtin_ia32_vpdpwssds512">, + ClangBuiltin<"__builtin_ia32_vpdpwssds512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; } @@ -1919,180 +1929,180 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // XOP let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">, + def int_x86_xop_vpermil2pd : ClangBuiltin<"__builtin_ia32_vpermil2pd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_xop_vpermil2pd_256 : - GCCBuiltin<"__builtin_ia32_vpermil2pd256">, + ClangBuiltin<"__builtin_ia32_vpermil2pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">, + def int_x86_xop_vpermil2ps : ClangBuiltin<"__builtin_ia32_vpermil2ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_xop_vpermil2ps_256 : - GCCBuiltin<"__builtin_ia32_vpermil2ps256">, + ClangBuiltin<"__builtin_ia32_vpermil2ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">, + def int_x86_xop_vfrcz_pd : ClangBuiltin<"__builtin_ia32_vfrczpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_xop_vfrcz_ps : GCCBuiltin<"__builtin_ia32_vfrczps">, + def int_x86_xop_vfrcz_ps : ClangBuiltin<"__builtin_ia32_vfrczps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_xop_vfrcz_sd : GCCBuiltin<"__builtin_ia32_vfrczsd">, + def int_x86_xop_vfrcz_sd : ClangBuiltin<"__builtin_ia32_vfrczsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_xop_vfrcz_ss : GCCBuiltin<"__builtin_ia32_vfrczss">, + def int_x86_xop_vfrcz_ss : ClangBuiltin<"__builtin_ia32_vfrczss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_xop_vfrcz_pd_256 : GCCBuiltin<"__builtin_ia32_vfrczpd256">, + def int_x86_xop_vfrcz_pd_256 : ClangBuiltin<"__builtin_ia32_vfrczpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">, + def int_x86_xop_vfrcz_ps_256 : ClangBuiltin<"__builtin_ia32_vfrczps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; def int_x86_xop_vphaddbd : - GCCBuiltin<"__builtin_ia32_vphaddbd">, + ClangBuiltin<"__builtin_ia32_vphaddbd">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vphaddbq : - GCCBuiltin<"__builtin_ia32_vphaddbq">, + ClangBuiltin<"__builtin_ia32_vphaddbq">, Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vphaddbw : - GCCBuiltin<"__builtin_ia32_vphaddbw">, + ClangBuiltin<"__builtin_ia32_vphaddbw">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vphadddq : - GCCBuiltin<"__builtin_ia32_vphadddq">, + ClangBuiltin<"__builtin_ia32_vphadddq">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vphaddubd : - GCCBuiltin<"__builtin_ia32_vphaddubd">, + ClangBuiltin<"__builtin_ia32_vphaddubd">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vphaddubq : - GCCBuiltin<"__builtin_ia32_vphaddubq">, + ClangBuiltin<"__builtin_ia32_vphaddubq">, Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vphaddubw : - GCCBuiltin<"__builtin_ia32_vphaddubw">, + ClangBuiltin<"__builtin_ia32_vphaddubw">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vphaddudq : - GCCBuiltin<"__builtin_ia32_vphaddudq">, + ClangBuiltin<"__builtin_ia32_vphaddudq">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vphadduwd : - GCCBuiltin<"__builtin_ia32_vphadduwd">, + ClangBuiltin<"__builtin_ia32_vphadduwd">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vphadduwq : - GCCBuiltin<"__builtin_ia32_vphadduwq">, + ClangBuiltin<"__builtin_ia32_vphadduwq">, Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vphaddwd : - GCCBuiltin<"__builtin_ia32_vphaddwd">, + ClangBuiltin<"__builtin_ia32_vphaddwd">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vphaddwq : - GCCBuiltin<"__builtin_ia32_vphaddwq">, + ClangBuiltin<"__builtin_ia32_vphaddwq">, Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vphsubbw : - GCCBuiltin<"__builtin_ia32_vphsubbw">, + ClangBuiltin<"__builtin_ia32_vphsubbw">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vphsubdq : - GCCBuiltin<"__builtin_ia32_vphsubdq">, + ClangBuiltin<"__builtin_ia32_vphsubdq">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vphsubwd : - GCCBuiltin<"__builtin_ia32_vphsubwd">, + ClangBuiltin<"__builtin_ia32_vphsubwd">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vpmacsdd : - GCCBuiltin<"__builtin_ia32_vpmacsdd">, + ClangBuiltin<"__builtin_ia32_vpmacsdd">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem, Commutative]>; def int_x86_xop_vpmacsdqh : - GCCBuiltin<"__builtin_ia32_vpmacsdqh">, + ClangBuiltin<"__builtin_ia32_vpmacsdqh">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], [IntrNoMem, Commutative]>; def int_x86_xop_vpmacsdql : - GCCBuiltin<"__builtin_ia32_vpmacsdql">, + ClangBuiltin<"__builtin_ia32_vpmacsdql">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], [IntrNoMem, Commutative]>; def int_x86_xop_vpmacssdd : - GCCBuiltin<"__builtin_ia32_vpmacssdd">, + ClangBuiltin<"__builtin_ia32_vpmacssdd">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem, Commutative]>; def int_x86_xop_vpmacssdqh : - GCCBuiltin<"__builtin_ia32_vpmacssdqh">, + ClangBuiltin<"__builtin_ia32_vpmacssdqh">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], [IntrNoMem, Commutative]>; def int_x86_xop_vpmacssdql : - GCCBuiltin<"__builtin_ia32_vpmacssdql">, + ClangBuiltin<"__builtin_ia32_vpmacssdql">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], [IntrNoMem, Commutative]>; def int_x86_xop_vpmacsswd : - GCCBuiltin<"__builtin_ia32_vpmacsswd">, + ClangBuiltin<"__builtin_ia32_vpmacsswd">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem, Commutative]>; def int_x86_xop_vpmacssww : - GCCBuiltin<"__builtin_ia32_vpmacssww">, + ClangBuiltin<"__builtin_ia32_vpmacssww">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_xop_vpmacswd : - GCCBuiltin<"__builtin_ia32_vpmacswd">, + ClangBuiltin<"__builtin_ia32_vpmacswd">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem, Commutative]>; def int_x86_xop_vpmacsww : - GCCBuiltin<"__builtin_ia32_vpmacsww">, + ClangBuiltin<"__builtin_ia32_vpmacsww">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_xop_vpmadcsswd : - GCCBuiltin<"__builtin_ia32_vpmadcsswd">, + ClangBuiltin<"__builtin_ia32_vpmadcsswd">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem, Commutative]>; def int_x86_xop_vpmadcswd : - GCCBuiltin<"__builtin_ia32_vpmadcswd">, + ClangBuiltin<"__builtin_ia32_vpmadcswd">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem, Commutative]>; def int_x86_xop_vpperm : - GCCBuiltin<"__builtin_ia32_vpperm">, + ClangBuiltin<"__builtin_ia32_vpperm">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vpshab : - GCCBuiltin<"__builtin_ia32_vpshab">, + ClangBuiltin<"__builtin_ia32_vpshab">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vpshad : - GCCBuiltin<"__builtin_ia32_vpshad">, + ClangBuiltin<"__builtin_ia32_vpshad">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vpshaq : - GCCBuiltin<"__builtin_ia32_vpshaq">, + ClangBuiltin<"__builtin_ia32_vpshaq">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_xop_vpshaw : - GCCBuiltin<"__builtin_ia32_vpshaw">, + ClangBuiltin<"__builtin_ia32_vpshaw">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vpshlb : - GCCBuiltin<"__builtin_ia32_vpshlb">, + ClangBuiltin<"__builtin_ia32_vpshlb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vpshld : - GCCBuiltin<"__builtin_ia32_vpshld">, + ClangBuiltin<"__builtin_ia32_vpshld">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vpshlq : - GCCBuiltin<"__builtin_ia32_vpshlq">, + ClangBuiltin<"__builtin_ia32_vpshlq">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_xop_vpshlw : - GCCBuiltin<"__builtin_ia32_vpshlw">, + ClangBuiltin<"__builtin_ia32_vpshlw">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; } @@ -2101,25 +2111,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // LWP let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_llwpcb : - GCCBuiltin<"__builtin_ia32_llwpcb">, + ClangBuiltin<"__builtin_ia32_llwpcb">, Intrinsic<[], [llvm_ptr_ty], []>; def int_x86_slwpcb : - GCCBuiltin<"__builtin_ia32_slwpcb">, + ClangBuiltin<"__builtin_ia32_slwpcb">, Intrinsic<[llvm_ptr_ty], [], []>; def int_x86_lwpins32 : - GCCBuiltin<"__builtin_ia32_lwpins32">, + ClangBuiltin<"__builtin_ia32_lwpins32">, Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; def int_x86_lwpins64 : - GCCBuiltin<"__builtin_ia32_lwpins64">, + ClangBuiltin<"__builtin_ia32_lwpins64">, Intrinsic<[llvm_i8_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; def int_x86_lwpval32 : - GCCBuiltin<"__builtin_ia32_lwpval32">, + ClangBuiltin<"__builtin_ia32_lwpval32">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; def int_x86_lwpval64 : - GCCBuiltin<"__builtin_ia32_lwpval64">, + ClangBuiltin<"__builtin_ia32_lwpval64">, Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; } @@ -2129,127 +2139,127 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Empty MMX state op. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_mmx_emms : GCCBuiltin<"__builtin_ia32_emms">, + def int_x86_mmx_emms : ClangBuiltin<"__builtin_ia32_emms">, Intrinsic<[], [], []>; - def int_x86_mmx_femms : GCCBuiltin<"__builtin_ia32_femms">, + def int_x86_mmx_femms : ClangBuiltin<"__builtin_ia32_femms">, Intrinsic<[], [], []>; } // Integer arithmetic ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Addition - def int_x86_mmx_padd_b : GCCBuiltin<"__builtin_ia32_paddb">, + def int_x86_mmx_padd_b : ClangBuiltin<"__builtin_ia32_paddb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_padd_w : GCCBuiltin<"__builtin_ia32_paddw">, + def int_x86_mmx_padd_w : ClangBuiltin<"__builtin_ia32_paddw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_padd_d : GCCBuiltin<"__builtin_ia32_paddd">, + def int_x86_mmx_padd_d : ClangBuiltin<"__builtin_ia32_paddd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_padd_q : GCCBuiltin<"__builtin_ia32_paddq">, + def int_x86_mmx_padd_q : ClangBuiltin<"__builtin_ia32_paddq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_padds_b : GCCBuiltin<"__builtin_ia32_paddsb">, + def int_x86_mmx_padds_b : ClangBuiltin<"__builtin_ia32_paddsb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_padds_w : GCCBuiltin<"__builtin_ia32_paddsw">, + def int_x86_mmx_padds_w : ClangBuiltin<"__builtin_ia32_paddsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb">, + def int_x86_mmx_paddus_b : ClangBuiltin<"__builtin_ia32_paddusb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw">, + def int_x86_mmx_paddus_w : ClangBuiltin<"__builtin_ia32_paddusw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; // Subtraction - def int_x86_mmx_psub_b : GCCBuiltin<"__builtin_ia32_psubb">, + def int_x86_mmx_psub_b : ClangBuiltin<"__builtin_ia32_psubb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_psub_w : GCCBuiltin<"__builtin_ia32_psubw">, + def int_x86_mmx_psub_w : ClangBuiltin<"__builtin_ia32_psubw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_psub_d : GCCBuiltin<"__builtin_ia32_psubd">, + def int_x86_mmx_psub_d : ClangBuiltin<"__builtin_ia32_psubd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_psub_q : GCCBuiltin<"__builtin_ia32_psubq">, + def int_x86_mmx_psub_q : ClangBuiltin<"__builtin_ia32_psubq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb">, + def int_x86_mmx_psubs_b : ClangBuiltin<"__builtin_ia32_psubsb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw">, + def int_x86_mmx_psubs_w : ClangBuiltin<"__builtin_ia32_psubsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb">, + def int_x86_mmx_psubus_b : ClangBuiltin<"__builtin_ia32_psubusb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw">, + def int_x86_mmx_psubus_w : ClangBuiltin<"__builtin_ia32_psubusw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; // Multiplication - def int_x86_mmx_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw">, + def int_x86_mmx_pmulh_w : ClangBuiltin<"__builtin_ia32_pmulhw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_pmull_w : GCCBuiltin<"__builtin_ia32_pmullw">, + def int_x86_mmx_pmull_w : ClangBuiltin<"__builtin_ia32_pmullw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw">, + def int_x86_mmx_pmulhu_w : ClangBuiltin<"__builtin_ia32_pmulhuw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq">, + def int_x86_mmx_pmulu_dq : ClangBuiltin<"__builtin_ia32_pmuludq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd">, + def int_x86_mmx_pmadd_wd : ClangBuiltin<"__builtin_ia32_pmaddwd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; // Bitwise operations - def int_x86_mmx_pand : GCCBuiltin<"__builtin_ia32_pand">, + def int_x86_mmx_pand : ClangBuiltin<"__builtin_ia32_pand">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_pandn : GCCBuiltin<"__builtin_ia32_pandn">, + def int_x86_mmx_pandn : ClangBuiltin<"__builtin_ia32_pandn">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_por : GCCBuiltin<"__builtin_ia32_por">, + def int_x86_mmx_por : ClangBuiltin<"__builtin_ia32_por">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_pxor : GCCBuiltin<"__builtin_ia32_pxor">, + def int_x86_mmx_pxor : ClangBuiltin<"__builtin_ia32_pxor">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; // Averages - def int_x86_mmx_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb">, + def int_x86_mmx_pavg_b : ClangBuiltin<"__builtin_ia32_pavgb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw">, + def int_x86_mmx_pavg_w : ClangBuiltin<"__builtin_ia32_pavgw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; // Maximum - def int_x86_mmx_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub">, + def int_x86_mmx_pmaxu_b : ClangBuiltin<"__builtin_ia32_pmaxub">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw">, + def int_x86_mmx_pmaxs_w : ClangBuiltin<"__builtin_ia32_pmaxsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; // Minimum - def int_x86_mmx_pminu_b : GCCBuiltin<"__builtin_ia32_pminub">, + def int_x86_mmx_pminu_b : ClangBuiltin<"__builtin_ia32_pminub">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw">, + def int_x86_mmx_pmins_w : ClangBuiltin<"__builtin_ia32_pminsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; // Packed sum of absolute differences - def int_x86_mmx_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw">, + def int_x86_mmx_psad_bw : ClangBuiltin<"__builtin_ia32_psadbw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; } @@ -2257,178 +2267,178 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Integer shift ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Shift left logical - def int_x86_mmx_psll_w : GCCBuiltin<"__builtin_ia32_psllw">, + def int_x86_mmx_psll_w : ClangBuiltin<"__builtin_ia32_psllw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_psll_d : GCCBuiltin<"__builtin_ia32_pslld">, + def int_x86_mmx_psll_d : ClangBuiltin<"__builtin_ia32_pslld">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_psll_q : GCCBuiltin<"__builtin_ia32_psllq">, + def int_x86_mmx_psll_q : ClangBuiltin<"__builtin_ia32_psllq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw">, + def int_x86_mmx_psrl_w : ClangBuiltin<"__builtin_ia32_psrlw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_psrl_d : GCCBuiltin<"__builtin_ia32_psrld">, + def int_x86_mmx_psrl_d : ClangBuiltin<"__builtin_ia32_psrld">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq">, + def int_x86_mmx_psrl_q : ClangBuiltin<"__builtin_ia32_psrlq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_psra_w : GCCBuiltin<"__builtin_ia32_psraw">, + def int_x86_mmx_psra_w : ClangBuiltin<"__builtin_ia32_psraw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">, + def int_x86_mmx_psra_d : ClangBuiltin<"__builtin_ia32_psrad">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; // Oddly these don't require an immediate due to a gcc compatibility issue. - def int_x86_mmx_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi">, + def int_x86_mmx_pslli_w : ClangBuiltin<"__builtin_ia32_psllwi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_mmx_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi">, + def int_x86_mmx_pslli_d : ClangBuiltin<"__builtin_ia32_pslldi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_mmx_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi">, + def int_x86_mmx_pslli_q : ClangBuiltin<"__builtin_ia32_psllqi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_mmx_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi">, + def int_x86_mmx_psrli_w : ClangBuiltin<"__builtin_ia32_psrlwi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_mmx_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi">, + def int_x86_mmx_psrli_d : ClangBuiltin<"__builtin_ia32_psrldi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_mmx_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi">, + def int_x86_mmx_psrli_q : ClangBuiltin<"__builtin_ia32_psrlqi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_mmx_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi">, + def int_x86_mmx_psrai_w : ClangBuiltin<"__builtin_ia32_psrawi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_mmx_psrai_d : GCCBuiltin<"__builtin_ia32_psradi">, + def int_x86_mmx_psrai_d : ClangBuiltin<"__builtin_ia32_psradi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; } // Permute let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_permvar_df_256 : GCCBuiltin<"__builtin_ia32_permvardf256">, + def int_x86_avx512_permvar_df_256 : ClangBuiltin<"__builtin_ia32_permvardf256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_avx512_permvar_df_512 : GCCBuiltin<"__builtin_ia32_permvardf512">, + def int_x86_avx512_permvar_df_512 : ClangBuiltin<"__builtin_ia32_permvardf512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8i64_ty], [IntrNoMem]>; - def int_x86_avx512_permvar_di_256 : GCCBuiltin<"__builtin_ia32_permvardi256">, + def int_x86_avx512_permvar_di_256 : ClangBuiltin<"__builtin_ia32_permvardi256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_avx512_permvar_di_512 : GCCBuiltin<"__builtin_ia32_permvardi512">, + def int_x86_avx512_permvar_di_512 : ClangBuiltin<"__builtin_ia32_permvardi512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; - def int_x86_avx512_permvar_hi_128 : GCCBuiltin<"__builtin_ia32_permvarhi128">, + def int_x86_avx512_permvar_hi_128 : ClangBuiltin<"__builtin_ia32_permvarhi128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx512_permvar_hi_256 : GCCBuiltin<"__builtin_ia32_permvarhi256">, + def int_x86_avx512_permvar_hi_256 : ClangBuiltin<"__builtin_ia32_permvarhi256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx512_permvar_hi_512 : GCCBuiltin<"__builtin_ia32_permvarhi512">, + def int_x86_avx512_permvar_hi_512 : ClangBuiltin<"__builtin_ia32_permvarhi512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem]>; - def int_x86_avx512_permvar_qi_128 : GCCBuiltin<"__builtin_ia32_permvarqi128">, + def int_x86_avx512_permvar_qi_128 : ClangBuiltin<"__builtin_ia32_permvarqi128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_x86_avx512_permvar_qi_256 : GCCBuiltin<"__builtin_ia32_permvarqi256">, + def int_x86_avx512_permvar_qi_256 : ClangBuiltin<"__builtin_ia32_permvarqi256">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; - def int_x86_avx512_permvar_qi_512 : GCCBuiltin<"__builtin_ia32_permvarqi512">, + def int_x86_avx512_permvar_qi_512 : ClangBuiltin<"__builtin_ia32_permvarqi512">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>; - def int_x86_avx512_permvar_sf_512 : GCCBuiltin<"__builtin_ia32_permvarsf512">, + def int_x86_avx512_permvar_sf_512 : ClangBuiltin<"__builtin_ia32_permvarsf512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16i32_ty], [IntrNoMem]>; - def int_x86_avx512_permvar_si_512 : GCCBuiltin<"__builtin_ia32_permvarsi512">, + def int_x86_avx512_permvar_si_512 : ClangBuiltin<"__builtin_ia32_permvarsi512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; } // Pack ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_mmx_packsswb : GCCBuiltin<"__builtin_ia32_packsswb">, + def int_x86_mmx_packsswb : ClangBuiltin<"__builtin_ia32_packsswb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_packssdw : GCCBuiltin<"__builtin_ia32_packssdw">, + def int_x86_mmx_packssdw : ClangBuiltin<"__builtin_ia32_packssdw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_packuswb : GCCBuiltin<"__builtin_ia32_packuswb">, + def int_x86_mmx_packuswb : ClangBuiltin<"__builtin_ia32_packuswb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; } // Unpacking ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_mmx_punpckhbw : GCCBuiltin<"__builtin_ia32_punpckhbw">, + def int_x86_mmx_punpckhbw : ClangBuiltin<"__builtin_ia32_punpckhbw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_punpckhwd : GCCBuiltin<"__builtin_ia32_punpckhwd">, + def int_x86_mmx_punpckhwd : ClangBuiltin<"__builtin_ia32_punpckhwd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_punpckhdq : GCCBuiltin<"__builtin_ia32_punpckhdq">, + def int_x86_mmx_punpckhdq : ClangBuiltin<"__builtin_ia32_punpckhdq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_punpcklbw : GCCBuiltin<"__builtin_ia32_punpcklbw">, + def int_x86_mmx_punpcklbw : ClangBuiltin<"__builtin_ia32_punpcklbw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_punpcklwd : GCCBuiltin<"__builtin_ia32_punpcklwd">, + def int_x86_mmx_punpcklwd : ClangBuiltin<"__builtin_ia32_punpcklwd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_punpckldq : GCCBuiltin<"__builtin_ia32_punpckldq">, + def int_x86_mmx_punpckldq : ClangBuiltin<"__builtin_ia32_punpckldq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; } // Integer comparison ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_mmx_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb">, + def int_x86_mmx_pcmpeq_b : ClangBuiltin<"__builtin_ia32_pcmpeqb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw">, + def int_x86_mmx_pcmpeq_w : ClangBuiltin<"__builtin_ia32_pcmpeqw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd">, + def int_x86_mmx_pcmpeq_d : ClangBuiltin<"__builtin_ia32_pcmpeqd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; - def int_x86_mmx_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb">, + def int_x86_mmx_pcmpgt_b : ClangBuiltin<"__builtin_ia32_pcmpgtb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw">, + def int_x86_mmx_pcmpgt_w : ClangBuiltin<"__builtin_ia32_pcmpgtw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd">, + def int_x86_mmx_pcmpgt_d : ClangBuiltin<"__builtin_ia32_pcmpgtd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; } // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_mmx_maskmovq : GCCBuiltin<"__builtin_ia32_maskmovq">, + def int_x86_mmx_maskmovq : ClangBuiltin<"__builtin_ia32_maskmovq">, Intrinsic<[], [llvm_x86mmx_ty, llvm_x86mmx_ty, llvm_ptr_ty], []>; - def int_x86_mmx_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb">, + def int_x86_mmx_pmovmskb : ClangBuiltin<"__builtin_ia32_pmovmskb">, Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_mmx_movnt_dq : GCCBuiltin<"__builtin_ia32_movntq">, + def int_x86_mmx_movnt_dq : ClangBuiltin<"__builtin_ia32_movntq">, Intrinsic<[], [llvm_ptrx86mmx_ty, llvm_x86mmx_ty], []>; - def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">, + def int_x86_mmx_palignr_b : ClangBuiltin<"__builtin_ia32_palignr">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_mmx_pextr_w : GCCBuiltin<"__builtin_ia32_vec_ext_v4hi">, + def int_x86_mmx_pextr_w : ClangBuiltin<"__builtin_ia32_vec_ext_v4hi">, Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_mmx_pinsr_w : GCCBuiltin<"__builtin_ia32_vec_set_v4hi">, + def int_x86_mmx_pinsr_w : ClangBuiltin<"__builtin_ia32_vec_set_v4hi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; } @@ -2437,21 +2447,21 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // BMI let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_bmi_bextr_32 : GCCBuiltin<"__builtin_ia32_bextr_u32">, + def int_x86_bmi_bextr_32 : ClangBuiltin<"__builtin_ia32_bextr_u32">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_bmi_bextr_64 : GCCBuiltin<"__builtin_ia32_bextr_u64">, + def int_x86_bmi_bextr_64 : ClangBuiltin<"__builtin_ia32_bextr_u64">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_bmi_bzhi_32 : GCCBuiltin<"__builtin_ia32_bzhi_si">, + def int_x86_bmi_bzhi_32 : ClangBuiltin<"__builtin_ia32_bzhi_si">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_bmi_bzhi_64 : GCCBuiltin<"__builtin_ia32_bzhi_di">, + def int_x86_bmi_bzhi_64 : ClangBuiltin<"__builtin_ia32_bzhi_di">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_bmi_pdep_32 : GCCBuiltin<"__builtin_ia32_pdep_si">, + def int_x86_bmi_pdep_32 : ClangBuiltin<"__builtin_ia32_pdep_si">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_bmi_pdep_64 : GCCBuiltin<"__builtin_ia32_pdep_di">, + def int_x86_bmi_pdep_64 : ClangBuiltin<"__builtin_ia32_pdep_di">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_bmi_pext_32 : GCCBuiltin<"__builtin_ia32_pext_si">, + def int_x86_bmi_pext_32 : ClangBuiltin<"__builtin_ia32_pext_si">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_bmi_pext_64 : GCCBuiltin<"__builtin_ia32_pext_di">, + def int_x86_bmi_pext_64 : ClangBuiltin<"__builtin_ia32_pext_di">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; } @@ -2459,34 +2469,34 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // FS/GS Base let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_rdfsbase_32 : GCCBuiltin<"__builtin_ia32_rdfsbase32">, + def int_x86_rdfsbase_32 : ClangBuiltin<"__builtin_ia32_rdfsbase32">, Intrinsic<[llvm_i32_ty], []>; - def int_x86_rdgsbase_32 : GCCBuiltin<"__builtin_ia32_rdgsbase32">, + def int_x86_rdgsbase_32 : ClangBuiltin<"__builtin_ia32_rdgsbase32">, Intrinsic<[llvm_i32_ty], []>; - def int_x86_rdfsbase_64 : GCCBuiltin<"__builtin_ia32_rdfsbase64">, + def int_x86_rdfsbase_64 : ClangBuiltin<"__builtin_ia32_rdfsbase64">, Intrinsic<[llvm_i64_ty], []>; - def int_x86_rdgsbase_64 : GCCBuiltin<"__builtin_ia32_rdgsbase64">, + def int_x86_rdgsbase_64 : ClangBuiltin<"__builtin_ia32_rdgsbase64">, Intrinsic<[llvm_i64_ty], []>; - def int_x86_wrfsbase_32 : GCCBuiltin<"__builtin_ia32_wrfsbase32">, + def int_x86_wrfsbase_32 : ClangBuiltin<"__builtin_ia32_wrfsbase32">, Intrinsic<[], [llvm_i32_ty]>; - def int_x86_wrgsbase_32 : GCCBuiltin<"__builtin_ia32_wrgsbase32">, + def int_x86_wrgsbase_32 : ClangBuiltin<"__builtin_ia32_wrgsbase32">, Intrinsic<[], [llvm_i32_ty]>; - def int_x86_wrfsbase_64 : GCCBuiltin<"__builtin_ia32_wrfsbase64">, + def int_x86_wrfsbase_64 : ClangBuiltin<"__builtin_ia32_wrfsbase64">, Intrinsic<[], [llvm_i64_ty]>; - def int_x86_wrgsbase_64 : GCCBuiltin<"__builtin_ia32_wrgsbase64">, + def int_x86_wrgsbase_64 : ClangBuiltin<"__builtin_ia32_wrgsbase64">, Intrinsic<[], [llvm_i64_ty]>; } //===----------------------------------------------------------------------===// // FXSR let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_fxrstor : GCCBuiltin<"__builtin_ia32_fxrstor">, + def int_x86_fxrstor : ClangBuiltin<"__builtin_ia32_fxrstor">, Intrinsic<[], [llvm_ptr_ty], []>; - def int_x86_fxrstor64 : GCCBuiltin<"__builtin_ia32_fxrstor64">, + def int_x86_fxrstor64 : ClangBuiltin<"__builtin_ia32_fxrstor64">, Intrinsic<[], [llvm_ptr_ty], []>; - def int_x86_fxsave : GCCBuiltin<"__builtin_ia32_fxsave">, + def int_x86_fxsave : ClangBuiltin<"__builtin_ia32_fxsave">, Intrinsic<[], [llvm_ptr_ty], []>; - def int_x86_fxsave64 : GCCBuiltin<"__builtin_ia32_fxsave64">, + def int_x86_fxsave64 : ClangBuiltin<"__builtin_ia32_fxsave64">, Intrinsic<[], [llvm_ptr_ty], []>; } @@ -2526,44 +2536,44 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". //===----------------------------------------------------------------------===// // CLFLUSHOPT and CLWB let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_clflushopt : GCCBuiltin<"__builtin_ia32_clflushopt">, + def int_x86_clflushopt : ClangBuiltin<"__builtin_ia32_clflushopt">, Intrinsic<[], [llvm_ptr_ty], []>; - def int_x86_clwb : GCCBuiltin<"__builtin_ia32_clwb">, + def int_x86_clwb : ClangBuiltin<"__builtin_ia32_clwb">, Intrinsic<[], [llvm_ptr_ty], []>; } //===----------------------------------------------------------------------===// // Support protection key let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_rdpkru : GCCBuiltin <"__builtin_ia32_rdpkru">, + def int_x86_rdpkru : ClangBuiltin <"__builtin_ia32_rdpkru">, Intrinsic<[llvm_i32_ty], [], []>; - def int_x86_wrpkru : GCCBuiltin<"__builtin_ia32_wrpkru">, + def int_x86_wrpkru : ClangBuiltin<"__builtin_ia32_wrpkru">, Intrinsic<[], [llvm_i32_ty], []>; } //===----------------------------------------------------------------------===// // Half float conversion let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph">, + def int_x86_vcvtps2ph_128 : ClangBuiltin<"__builtin_ia32_vcvtps2ph">, Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">, + def int_x86_vcvtps2ph_256 : ClangBuiltin<"__builtin_ia32_vcvtps2ph256">, Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_vcvtph2ps_512 : Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">, + def int_x86_avx512_mask_vcvtps2ph_512 : ClangBuiltin<"__builtin_ia32_vcvtps2ph512_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256_mask">, + def int_x86_avx512_mask_vcvtps2ph_256 : ClangBuiltin<"__builtin_ia32_vcvtps2ph256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph_mask">, + def int_x86_avx512_mask_vcvtps2ph_128 : ClangBuiltin<"__builtin_ia32_vcvtps2ph_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; @@ -2573,10 +2583,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // TBM let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_tbm_bextri_u32 : GCCBuiltin<"__builtin_ia32_bextri_u32">, + def int_x86_tbm_bextri_u32 : ClangBuiltin<"__builtin_ia32_bextri_u32">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_tbm_bextri_u64 : GCCBuiltin<"__builtin_ia32_bextri_u64">, + def int_x86_tbm_bextri_u64 : ClangBuiltin<"__builtin_ia32_bextri_u64">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, ImmArg>]>; } @@ -2619,13 +2629,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // RTM intrinsics. Transactional Memory support. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_xbegin : GCCBuiltin<"__builtin_ia32_xbegin">, + def int_x86_xbegin : ClangBuiltin<"__builtin_ia32_xbegin">, Intrinsic<[llvm_i32_ty], [], []>; - def int_x86_xend : GCCBuiltin<"__builtin_ia32_xend">, + def int_x86_xend : ClangBuiltin<"__builtin_ia32_xend">, Intrinsic<[], [], []>; - def int_x86_xabort : GCCBuiltin<"__builtin_ia32_xabort">, + def int_x86_xabort : ClangBuiltin<"__builtin_ia32_xabort">, Intrinsic<[], [llvm_i8_ty], [ImmArg>]>; - def int_x86_xtest : GCCBuiltin<"__builtin_ia32_xtest">, + def int_x86_xtest : ClangBuiltin<"__builtin_ia32_xtest">, Intrinsic<[llvm_i32_ty], [], []>; } @@ -2664,86 +2674,86 @@ let TargetPrefix = "x86" in { // Conversion ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">, + def int_x86_avx512_cvttss2si : ClangBuiltin<"__builtin_ia32_vcvttss2si32">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cvttss2si64 : GCCBuiltin<"__builtin_ia32_vcvttss2si64">, + def int_x86_avx512_cvttss2si64 : ClangBuiltin<"__builtin_ia32_vcvttss2si64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_vcvttss2usi32">, + def int_x86_avx512_cvttss2usi : ClangBuiltin<"__builtin_ia32_vcvttss2usi32">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_vcvttss2usi64">, + def int_x86_avx512_cvttss2usi64 : ClangBuiltin<"__builtin_ia32_vcvttss2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss32">, + def int_x86_avx512_cvtusi2ss : ClangBuiltin<"__builtin_ia32_cvtusi2ss32">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss64">, + def int_x86_avx512_cvtusi642ss : ClangBuiltin<"__builtin_ia32_cvtusi2ss64">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cvttsd2si : GCCBuiltin<"__builtin_ia32_vcvttsd2si32">, + def int_x86_avx512_cvttsd2si : ClangBuiltin<"__builtin_ia32_vcvttsd2si32">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_vcvttsd2si64">, + def int_x86_avx512_cvttsd2si64 : ClangBuiltin<"__builtin_ia32_vcvttsd2si64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_vcvttsd2usi32">, + def int_x86_avx512_cvttsd2usi : ClangBuiltin<"__builtin_ia32_vcvttsd2usi32">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvttsd2usi64">, + def int_x86_avx512_cvttsd2usi64 : ClangBuiltin<"__builtin_ia32_vcvttsd2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd64">, + def int_x86_avx512_cvtusi642sd : ClangBuiltin<"__builtin_ia32_cvtusi2sd64">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_vcvtss2usi32 : GCCBuiltin<"__builtin_ia32_vcvtss2usi32">, + def int_x86_avx512_vcvtss2usi32 : ClangBuiltin<"__builtin_ia32_vcvtss2usi32">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_vcvtss2usi64 : GCCBuiltin<"__builtin_ia32_vcvtss2usi64">, + def int_x86_avx512_vcvtss2usi64 : ClangBuiltin<"__builtin_ia32_vcvtss2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_vcvtss2si32 : GCCBuiltin<"__builtin_ia32_vcvtss2si32">, + def int_x86_avx512_vcvtss2si32 : ClangBuiltin<"__builtin_ia32_vcvtss2si32">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_vcvtss2si64 : GCCBuiltin<"__builtin_ia32_vcvtss2si64">, + def int_x86_avx512_vcvtss2si64 : ClangBuiltin<"__builtin_ia32_vcvtss2si64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_vcvtsd2usi32 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi32">, + def int_x86_avx512_vcvtsd2usi32 : ClangBuiltin<"__builtin_ia32_vcvtsd2usi32">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_vcvtsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi64">, + def int_x86_avx512_vcvtsd2usi64 : ClangBuiltin<"__builtin_ia32_vcvtsd2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_vcvtsd2si32 : GCCBuiltin<"__builtin_ia32_vcvtsd2si32">, + def int_x86_avx512_vcvtsd2si32 : ClangBuiltin<"__builtin_ia32_vcvtsd2si32">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_vcvtsd2si64 : GCCBuiltin<"__builtin_ia32_vcvtsd2si64">, + def int_x86_avx512_vcvtsd2si64 : ClangBuiltin<"__builtin_ia32_vcvtsd2si64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">, + def int_x86_avx512_cvtsi2ss32 : ClangBuiltin<"__builtin_ia32_cvtsi2ss32">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cvtsi2ss64 : GCCBuiltin<"__builtin_ia32_cvtsi2ss64">, + def int_x86_avx512_cvtsi2ss64 : ClangBuiltin<"__builtin_ia32_cvtsi2ss64">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">, + def int_x86_avx512_cvtsi2sd64 : ClangBuiltin<"__builtin_ia32_cvtsi2sd64">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; } // Pack ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_packsswb_512 : GCCBuiltin<"__builtin_ia32_packsswb512">, + def int_x86_avx512_packsswb_512 : ClangBuiltin<"__builtin_ia32_packsswb512">, Intrinsic<[llvm_v64i8_ty], [llvm_v32i16_ty,llvm_v32i16_ty], [IntrNoMem]>; - def int_x86_avx512_packssdw_512 : GCCBuiltin<"__builtin_ia32_packssdw512">, + def int_x86_avx512_packssdw_512 : ClangBuiltin<"__builtin_ia32_packssdw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; - def int_x86_avx512_packuswb_512 : GCCBuiltin<"__builtin_ia32_packuswb512">, + def int_x86_avx512_packuswb_512 : ClangBuiltin<"__builtin_ia32_packuswb512">, Intrinsic<[llvm_v64i8_ty], [llvm_v32i16_ty,llvm_v32i16_ty], [IntrNoMem]>; - def int_x86_avx512_packusdw_512 : GCCBuiltin<"__builtin_ia32_packusdw512">, + def int_x86_avx512_packusdw_512 : ClangBuiltin<"__builtin_ia32_packusdw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; } @@ -2759,380 +2769,380 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtpd2dq_128 : - GCCBuiltin<"__builtin_ia32_cvtpd2dq128_mask">, + ClangBuiltin<"__builtin_ia32_cvtpd2dq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2dq_512 : - GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">, + ClangBuiltin<"__builtin_ia32_cvtpd2dq512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtpd2ps_512 : - GCCBuiltin<"__builtin_ia32_cvtpd2ps512_mask">, + ClangBuiltin<"__builtin_ia32_cvtpd2ps512_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtsd2ss_round : - GCCBuiltin<"__builtin_ia32_cvtsd2ss_round_mask">, + ClangBuiltin<"__builtin_ia32_cvtsd2ss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtss2sd_round : - GCCBuiltin<"__builtin_ia32_cvtss2sd_round_mask">, + ClangBuiltin<"__builtin_ia32_cvtss2sd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v4f32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtpd2ps : - GCCBuiltin<"__builtin_ia32_cvtpd2ps_mask">, + ClangBuiltin<"__builtin_ia32_cvtpd2ps_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2qq_128 : - GCCBuiltin<"__builtin_ia32_cvtpd2qq128_mask">, + ClangBuiltin<"__builtin_ia32_cvtpd2qq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2qq_256 : - GCCBuiltin<"__builtin_ia32_cvtpd2qq256_mask">, + ClangBuiltin<"__builtin_ia32_cvtpd2qq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2qq_512 : - GCCBuiltin<"__builtin_ia32_cvtpd2qq512_mask">, + ClangBuiltin<"__builtin_ia32_cvtpd2qq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtpd2udq_128 : - GCCBuiltin<"__builtin_ia32_cvtpd2udq128_mask">, + ClangBuiltin<"__builtin_ia32_cvtpd2udq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2udq_256 : - GCCBuiltin<"__builtin_ia32_cvtpd2udq256_mask">, + ClangBuiltin<"__builtin_ia32_cvtpd2udq256_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2udq_512 : - GCCBuiltin<"__builtin_ia32_cvtpd2udq512_mask">, + ClangBuiltin<"__builtin_ia32_cvtpd2udq512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtpd2uqq_128 : - GCCBuiltin<"__builtin_ia32_cvtpd2uqq128_mask">, + ClangBuiltin<"__builtin_ia32_cvtpd2uqq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2uqq_256 : - GCCBuiltin<"__builtin_ia32_cvtpd2uqq256_mask">, + ClangBuiltin<"__builtin_ia32_cvtpd2uqq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2uqq_512 : - GCCBuiltin<"__builtin_ia32_cvtpd2uqq512_mask">, + ClangBuiltin<"__builtin_ia32_cvtpd2uqq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtps2dq_128 : - GCCBuiltin<"__builtin_ia32_cvtps2dq128_mask">, + ClangBuiltin<"__builtin_ia32_cvtps2dq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2dq_256 : - GCCBuiltin<"__builtin_ia32_cvtps2dq256_mask">, + ClangBuiltin<"__builtin_ia32_cvtps2dq256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2dq_512 : - GCCBuiltin<"__builtin_ia32_cvtps2dq512_mask">, + ClangBuiltin<"__builtin_ia32_cvtps2dq512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtps2pd_512 : - GCCBuiltin<"__builtin_ia32_cvtps2pd512_mask">, + ClangBuiltin<"__builtin_ia32_cvtps2pd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtps2qq_128 : - GCCBuiltin<"__builtin_ia32_cvtps2qq128_mask">, + ClangBuiltin<"__builtin_ia32_cvtps2qq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2qq_256 : - GCCBuiltin<"__builtin_ia32_cvtps2qq256_mask">, + ClangBuiltin<"__builtin_ia32_cvtps2qq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2qq_512 : - GCCBuiltin<"__builtin_ia32_cvtps2qq512_mask">, + ClangBuiltin<"__builtin_ia32_cvtps2qq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtps2udq_128 : - GCCBuiltin<"__builtin_ia32_cvtps2udq128_mask">, + ClangBuiltin<"__builtin_ia32_cvtps2udq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2udq_256 : - GCCBuiltin<"__builtin_ia32_cvtps2udq256_mask">, + ClangBuiltin<"__builtin_ia32_cvtps2udq256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2udq_512 : - GCCBuiltin<"__builtin_ia32_cvtps2udq512_mask">, + ClangBuiltin<"__builtin_ia32_cvtps2udq512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtps2uqq_128 : - GCCBuiltin<"__builtin_ia32_cvtps2uqq128_mask">, + ClangBuiltin<"__builtin_ia32_cvtps2uqq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2uqq_256 : - GCCBuiltin<"__builtin_ia32_cvtps2uqq256_mask">, + ClangBuiltin<"__builtin_ia32_cvtps2uqq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2uqq_512 : - GCCBuiltin<"__builtin_ia32_cvtps2uqq512_mask">, + ClangBuiltin<"__builtin_ia32_cvtps2uqq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtqq2ps_128 : - GCCBuiltin<"__builtin_ia32_cvtqq2ps128_mask">, + ClangBuiltin<"__builtin_ia32_cvtqq2ps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2dq_128 : - GCCBuiltin<"__builtin_ia32_cvttpd2dq128_mask">, + ClangBuiltin<"__builtin_ia32_cvttpd2dq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2dq_512 : - GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">, + ClangBuiltin<"__builtin_ia32_cvttpd2dq512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvttpd2qq_128 : - GCCBuiltin<"__builtin_ia32_cvttpd2qq128_mask">, + ClangBuiltin<"__builtin_ia32_cvttpd2qq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2qq_256 : - GCCBuiltin<"__builtin_ia32_cvttpd2qq256_mask">, + ClangBuiltin<"__builtin_ia32_cvttpd2qq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2qq_512 : - GCCBuiltin<"__builtin_ia32_cvttpd2qq512_mask">, + ClangBuiltin<"__builtin_ia32_cvttpd2qq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvttpd2udq_128 : - GCCBuiltin<"__builtin_ia32_cvttpd2udq128_mask">, + ClangBuiltin<"__builtin_ia32_cvttpd2udq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2udq_256 : - GCCBuiltin<"__builtin_ia32_cvttpd2udq256_mask">, + ClangBuiltin<"__builtin_ia32_cvttpd2udq256_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2udq_512 : - GCCBuiltin<"__builtin_ia32_cvttpd2udq512_mask">, + ClangBuiltin<"__builtin_ia32_cvttpd2udq512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvttpd2uqq_128 : - GCCBuiltin<"__builtin_ia32_cvttpd2uqq128_mask">, + ClangBuiltin<"__builtin_ia32_cvttpd2uqq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2uqq_256 : - GCCBuiltin<"__builtin_ia32_cvttpd2uqq256_mask">, + ClangBuiltin<"__builtin_ia32_cvttpd2uqq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2uqq_512 : - GCCBuiltin<"__builtin_ia32_cvttpd2uqq512_mask">, + ClangBuiltin<"__builtin_ia32_cvttpd2uqq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvttps2dq_512 : - GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">, + ClangBuiltin<"__builtin_ia32_cvttps2dq512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvttps2qq_128 : - GCCBuiltin<"__builtin_ia32_cvttps2qq128_mask">, + ClangBuiltin<"__builtin_ia32_cvttps2qq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2qq_256 : - GCCBuiltin<"__builtin_ia32_cvttps2qq256_mask">, + ClangBuiltin<"__builtin_ia32_cvttps2qq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2qq_512 : - GCCBuiltin<"__builtin_ia32_cvttps2qq512_mask">, + ClangBuiltin<"__builtin_ia32_cvttps2qq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvttps2udq_128 : - GCCBuiltin<"__builtin_ia32_cvttps2udq128_mask">, + ClangBuiltin<"__builtin_ia32_cvttps2udq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2udq_256 : - GCCBuiltin<"__builtin_ia32_cvttps2udq256_mask">, + ClangBuiltin<"__builtin_ia32_cvttps2udq256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2udq_512 : - GCCBuiltin<"__builtin_ia32_cvttps2udq512_mask">, + ClangBuiltin<"__builtin_ia32_cvttps2udq512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvttps2uqq_128 : - GCCBuiltin<"__builtin_ia32_cvttps2uqq128_mask">, + ClangBuiltin<"__builtin_ia32_cvttps2uqq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2uqq_256 : - GCCBuiltin<"__builtin_ia32_cvttps2uqq256_mask">, + ClangBuiltin<"__builtin_ia32_cvttps2uqq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2uqq_512 : - GCCBuiltin<"__builtin_ia32_cvttps2uqq512_mask">, + ClangBuiltin<"__builtin_ia32_cvttps2uqq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtuqq2ps_128 : - GCCBuiltin<"__builtin_ia32_cvtuqq2ps128_mask">, + ClangBuiltin<"__builtin_ia32_cvtuqq2ps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_rndscale_pd_128 : GCCBuiltin<"__builtin_ia32_rndscalepd_128_mask">, + def int_x86_avx512_mask_rndscale_pd_128 : ClangBuiltin<"__builtin_ia32_rndscalepd_128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_rndscale_pd_256 : GCCBuiltin<"__builtin_ia32_rndscalepd_256_mask">, + def int_x86_avx512_mask_rndscale_pd_256 : ClangBuiltin<"__builtin_ia32_rndscalepd_256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd_mask">, + def int_x86_avx512_mask_rndscale_pd_512 : ClangBuiltin<"__builtin_ia32_rndscalepd_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_avx512_mask_rndscale_ps_128 : GCCBuiltin<"__builtin_ia32_rndscaleps_128_mask">, + def int_x86_avx512_mask_rndscale_ps_128 : ClangBuiltin<"__builtin_ia32_rndscaleps_128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_rndscale_ps_256 : GCCBuiltin<"__builtin_ia32_rndscaleps_256_mask">, + def int_x86_avx512_mask_rndscale_ps_256 : ClangBuiltin<"__builtin_ia32_rndscaleps_256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps_mask">, + def int_x86_avx512_mask_rndscale_ps_512 : ClangBuiltin<"__builtin_ia32_rndscaleps_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_avx512_mask_reduce_pd_128 : GCCBuiltin<"__builtin_ia32_reducepd128_mask">, + def int_x86_avx512_mask_reduce_pd_128 : ClangBuiltin<"__builtin_ia32_reducepd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_reduce_pd_256 : GCCBuiltin<"__builtin_ia32_reducepd256_mask">, + def int_x86_avx512_mask_reduce_pd_256 : ClangBuiltin<"__builtin_ia32_reducepd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_reduce_pd_512 : GCCBuiltin<"__builtin_ia32_reducepd512_mask">, + def int_x86_avx512_mask_reduce_pd_512 : ClangBuiltin<"__builtin_ia32_reducepd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_avx512_mask_reduce_ps_128 : GCCBuiltin<"__builtin_ia32_reduceps128_mask">, + def int_x86_avx512_mask_reduce_ps_128 : ClangBuiltin<"__builtin_ia32_reduceps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_reduce_ps_256 : GCCBuiltin<"__builtin_ia32_reduceps256_mask">, + def int_x86_avx512_mask_reduce_ps_256 : ClangBuiltin<"__builtin_ia32_reduceps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_reduce_ps_512 : GCCBuiltin<"__builtin_ia32_reduceps512_mask">, + def int_x86_avx512_mask_reduce_ps_512 : ClangBuiltin<"__builtin_ia32_reduceps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; -def int_x86_avx512_mask_range_pd_128 : GCCBuiltin<"__builtin_ia32_rangepd128_mask">, +def int_x86_avx512_mask_range_pd_128 : ClangBuiltin<"__builtin_ia32_rangepd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; -def int_x86_avx512_mask_range_pd_256 : GCCBuiltin<"__builtin_ia32_rangepd256_mask">, +def int_x86_avx512_mask_range_pd_256 : ClangBuiltin<"__builtin_ia32_rangepd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; -def int_x86_avx512_mask_range_pd_512 : GCCBuiltin<"__builtin_ia32_rangepd512_mask">, +def int_x86_avx512_mask_range_pd_512 : ClangBuiltin<"__builtin_ia32_rangepd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; -def int_x86_avx512_mask_range_ps_128 : GCCBuiltin<"__builtin_ia32_rangeps128_mask">, +def int_x86_avx512_mask_range_ps_128 : ClangBuiltin<"__builtin_ia32_rangeps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; -def int_x86_avx512_mask_range_ps_256 : GCCBuiltin<"__builtin_ia32_rangeps256_mask">, +def int_x86_avx512_mask_range_ps_256 : ClangBuiltin<"__builtin_ia32_rangeps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; -def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mask">, +def int_x86_avx512_mask_range_ps_512 : ClangBuiltin<"__builtin_ia32_rangeps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; @@ -3141,152 +3151,152 @@ def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mas // Vector load with broadcast let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_broadcastmw_512 : - GCCBuiltin<"__builtin_ia32_broadcastmw512">, + ClangBuiltin<"__builtin_ia32_broadcastmw512">, Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_broadcastmw_256 : - GCCBuiltin<"__builtin_ia32_broadcastmw256">, + ClangBuiltin<"__builtin_ia32_broadcastmw256">, Intrinsic<[llvm_v8i32_ty], [llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_broadcastmw_128 : - GCCBuiltin<"__builtin_ia32_broadcastmw128">, + ClangBuiltin<"__builtin_ia32_broadcastmw128">, Intrinsic<[llvm_v4i32_ty], [llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_broadcastmb_512 : - GCCBuiltin<"__builtin_ia32_broadcastmb512">, + ClangBuiltin<"__builtin_ia32_broadcastmb512">, Intrinsic<[llvm_v8i64_ty], [llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_broadcastmb_256 : - GCCBuiltin<"__builtin_ia32_broadcastmb256">, + ClangBuiltin<"__builtin_ia32_broadcastmb256">, Intrinsic<[llvm_v4i64_ty], [llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_broadcastmb_128 : - GCCBuiltin<"__builtin_ia32_broadcastmb128">, + ClangBuiltin<"__builtin_ia32_broadcastmb128">, Intrinsic<[llvm_v2i64_ty], [llvm_i8_ty], [IntrNoMem]>; } // Arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_add_ps_512 : GCCBuiltin<"__builtin_ia32_addps512">, + def int_x86_avx512_add_ps_512 : ClangBuiltin<"__builtin_ia32_addps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_add_pd_512 : GCCBuiltin<"__builtin_ia32_addpd512">, + def int_x86_avx512_add_pd_512 : ClangBuiltin<"__builtin_ia32_addpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_sub_ps_512 : GCCBuiltin<"__builtin_ia32_subps512">, + def int_x86_avx512_sub_ps_512 : ClangBuiltin<"__builtin_ia32_subps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_sub_pd_512 : GCCBuiltin<"__builtin_ia32_subpd512">, + def int_x86_avx512_sub_pd_512 : ClangBuiltin<"__builtin_ia32_subpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mul_ps_512 : GCCBuiltin<"__builtin_ia32_mulps512">, + def int_x86_avx512_mul_ps_512 : ClangBuiltin<"__builtin_ia32_mulps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mul_pd_512 : GCCBuiltin<"__builtin_ia32_mulpd512">, + def int_x86_avx512_mul_pd_512 : ClangBuiltin<"__builtin_ia32_mulpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_div_ps_512 : GCCBuiltin<"__builtin_ia32_divps512">, + def int_x86_avx512_div_ps_512 : ClangBuiltin<"__builtin_ia32_divps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512">, + def int_x86_avx512_div_pd_512 : ClangBuiltin<"__builtin_ia32_divpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512">, + def int_x86_avx512_max_ps_512 : ClangBuiltin<"__builtin_ia32_maxps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">, + def int_x86_avx512_max_pd_512 : ClangBuiltin<"__builtin_ia32_maxpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512">, + def int_x86_avx512_min_ps_512 : ClangBuiltin<"__builtin_ia32_minps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512">, + def int_x86_avx512_min_pd_512 : ClangBuiltin<"__builtin_ia32_minpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_add_ss_round : GCCBuiltin<"__builtin_ia32_addss_round_mask">, + def int_x86_avx512_mask_add_ss_round : ClangBuiltin<"__builtin_ia32_addss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_div_ss_round : GCCBuiltin<"__builtin_ia32_divss_round_mask">, + def int_x86_avx512_mask_div_ss_round : ClangBuiltin<"__builtin_ia32_divss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_mul_ss_round : GCCBuiltin<"__builtin_ia32_mulss_round_mask">, + def int_x86_avx512_mask_mul_ss_round : ClangBuiltin<"__builtin_ia32_mulss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_sub_ss_round : GCCBuiltin<"__builtin_ia32_subss_round_mask">, + def int_x86_avx512_mask_sub_ss_round : ClangBuiltin<"__builtin_ia32_subss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_max_ss_round : GCCBuiltin<"__builtin_ia32_maxss_round_mask">, + def int_x86_avx512_mask_max_ss_round : ClangBuiltin<"__builtin_ia32_maxss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_min_ss_round : GCCBuiltin<"__builtin_ia32_minss_round_mask">, + def int_x86_avx512_mask_min_ss_round : ClangBuiltin<"__builtin_ia32_minss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_add_sd_round : GCCBuiltin<"__builtin_ia32_addsd_round_mask">, + def int_x86_avx512_mask_add_sd_round : ClangBuiltin<"__builtin_ia32_addsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_div_sd_round : GCCBuiltin<"__builtin_ia32_divsd_round_mask">, + def int_x86_avx512_mask_div_sd_round : ClangBuiltin<"__builtin_ia32_divsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_mul_sd_round : GCCBuiltin<"__builtin_ia32_mulsd_round_mask">, + def int_x86_avx512_mask_mul_sd_round : ClangBuiltin<"__builtin_ia32_mulsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_sub_sd_round : GCCBuiltin<"__builtin_ia32_subsd_round_mask">, + def int_x86_avx512_mask_sub_sd_round : ClangBuiltin<"__builtin_ia32_subsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_max_sd_round : GCCBuiltin<"__builtin_ia32_maxsd_round_mask">, + def int_x86_avx512_mask_max_sd_round : ClangBuiltin<"__builtin_ia32_maxsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_min_sd_round : GCCBuiltin<"__builtin_ia32_minsd_round_mask">, + def int_x86_avx512_mask_min_sd_round : ClangBuiltin<"__builtin_ia32_minsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_round_mask">, + def int_x86_avx512_mask_rndscale_ss : ClangBuiltin<"__builtin_ia32_rndscaless_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_round_mask">, + def int_x86_avx512_mask_rndscale_sd : ClangBuiltin<"__builtin_ia32_rndscalesd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess128_round_mask">, + def int_x86_avx512_mask_range_ss : ClangBuiltin<"__builtin_ia32_rangess128_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd128_round_mask">, + def int_x86_avx512_mask_range_sd : ClangBuiltin<"__builtin_ia32_rangesd128_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess_mask">, + def int_x86_avx512_mask_reduce_ss : ClangBuiltin<"__builtin_ia32_reducess_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd_mask">, + def int_x86_avx512_mask_reduce_sd : ClangBuiltin<"__builtin_ia32_reducesd_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_avx512_mask_scalef_sd : GCCBuiltin<"__builtin_ia32_scalefsd_round_mask">, + def int_x86_avx512_mask_scalef_sd : ClangBuiltin<"__builtin_ia32_scalefsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_scalef_ss : GCCBuiltin<"__builtin_ia32_scalefss_round_mask">, + def int_x86_avx512_mask_scalef_ss : ClangBuiltin<"__builtin_ia32_scalefss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_scalef_pd_128 : GCCBuiltin<"__builtin_ia32_scalefpd128_mask">, + def int_x86_avx512_mask_scalef_pd_128 : ClangBuiltin<"__builtin_ia32_scalefpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_scalef_pd_256 : GCCBuiltin<"__builtin_ia32_scalefpd256_mask">, + def int_x86_avx512_mask_scalef_pd_256 : ClangBuiltin<"__builtin_ia32_scalefpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],[IntrNoMem]>; - def int_x86_avx512_mask_scalef_pd_512 : GCCBuiltin<"__builtin_ia32_scalefpd512_mask">, + def int_x86_avx512_mask_scalef_pd_512 : ClangBuiltin<"__builtin_ia32_scalefpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_scalef_ps_128 : GCCBuiltin<"__builtin_ia32_scalefps128_mask">, + def int_x86_avx512_mask_scalef_ps_128 : ClangBuiltin<"__builtin_ia32_scalefps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_scalef_ps_256 : GCCBuiltin<"__builtin_ia32_scalefps256_mask">, + def int_x86_avx512_mask_scalef_ps_256 : ClangBuiltin<"__builtin_ia32_scalefps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_scalef_ps_512 : GCCBuiltin<"__builtin_ia32_scalefps512_mask">, + def int_x86_avx512_mask_scalef_ps_512 : ClangBuiltin<"__builtin_ia32_scalefps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; @@ -3307,290 +3317,290 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_fixupimm_pd_128 : - GCCBuiltin<"__builtin_ia32_fixupimmpd128_mask">, + ClangBuiltin<"__builtin_ia32_fixupimmpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_pd_128 : - GCCBuiltin<"__builtin_ia32_fixupimmpd128_maskz">, + ClangBuiltin<"__builtin_ia32_fixupimmpd128_maskz">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_fixupimm_pd_256 : - GCCBuiltin<"__builtin_ia32_fixupimmpd256_mask">, + ClangBuiltin<"__builtin_ia32_fixupimmpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_pd_256 : - GCCBuiltin<"__builtin_ia32_fixupimmpd256_maskz">, + ClangBuiltin<"__builtin_ia32_fixupimmpd256_maskz">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_fixupimm_pd_512 : - GCCBuiltin<"__builtin_ia32_fixupimmpd512_mask">, + ClangBuiltin<"__builtin_ia32_fixupimmpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_pd_512 : - GCCBuiltin<"__builtin_ia32_fixupimmpd512_maskz">, + ClangBuiltin<"__builtin_ia32_fixupimmpd512_maskz">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_fixupimm_ps_128 : - GCCBuiltin<"__builtin_ia32_fixupimmps128_mask">, + ClangBuiltin<"__builtin_ia32_fixupimmps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_ps_128 : - GCCBuiltin<"__builtin_ia32_fixupimmps128_maskz">, + ClangBuiltin<"__builtin_ia32_fixupimmps128_maskz">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_fixupimm_ps_256 : - GCCBuiltin<"__builtin_ia32_fixupimmps256_mask">, + ClangBuiltin<"__builtin_ia32_fixupimmps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_ps_256 : - GCCBuiltin<"__builtin_ia32_fixupimmps256_maskz">, + ClangBuiltin<"__builtin_ia32_fixupimmps256_maskz">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_fixupimm_ps_512 : - GCCBuiltin<"__builtin_ia32_fixupimmps512_mask">, + ClangBuiltin<"__builtin_ia32_fixupimmps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_ps_512 : - GCCBuiltin<"__builtin_ia32_fixupimmps512_maskz">, + ClangBuiltin<"__builtin_ia32_fixupimmps512_maskz">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_fixupimm_sd : - GCCBuiltin<"__builtin_ia32_fixupimmsd_mask">, + ClangBuiltin<"__builtin_ia32_fixupimmsd_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_sd : - GCCBuiltin<"__builtin_ia32_fixupimmsd_maskz">, + ClangBuiltin<"__builtin_ia32_fixupimmsd_maskz">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_fixupimm_ss : - GCCBuiltin<"__builtin_ia32_fixupimmss_mask">, + ClangBuiltin<"__builtin_ia32_fixupimmss_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_ss : - GCCBuiltin<"__builtin_ia32_fixupimmss_maskz">, + ClangBuiltin<"__builtin_ia32_fixupimmss_maskz">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_avx512_mask_getexp_pd_128 : GCCBuiltin<"__builtin_ia32_getexppd128_mask">, + def int_x86_avx512_mask_getexp_pd_128 : ClangBuiltin<"__builtin_ia32_getexppd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_getexp_pd_256 : GCCBuiltin<"__builtin_ia32_getexppd256_mask">, + def int_x86_avx512_mask_getexp_pd_256 : ClangBuiltin<"__builtin_ia32_getexppd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_getexp_pd_512 : GCCBuiltin<"__builtin_ia32_getexppd512_mask">, + def int_x86_avx512_mask_getexp_pd_512 : ClangBuiltin<"__builtin_ia32_getexppd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_getexp_ps_128 : GCCBuiltin<"__builtin_ia32_getexpps128_mask">, + def int_x86_avx512_mask_getexp_ps_128 : ClangBuiltin<"__builtin_ia32_getexpps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_getexp_ps_256 : GCCBuiltin<"__builtin_ia32_getexpps256_mask">, + def int_x86_avx512_mask_getexp_ps_256 : ClangBuiltin<"__builtin_ia32_getexpps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_getexp_ps_512 : GCCBuiltin<"__builtin_ia32_getexpps512_mask">, + def int_x86_avx512_mask_getexp_ps_512 : ClangBuiltin<"__builtin_ia32_getexpps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_getexp_ss : GCCBuiltin<"__builtin_ia32_getexpss128_round_mask">, + def int_x86_avx512_mask_getexp_ss : ClangBuiltin<"__builtin_ia32_getexpss128_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_mask_getexp_sd : GCCBuiltin<"__builtin_ia32_getexpsd128_round_mask">, + def int_x86_avx512_mask_getexp_sd : ClangBuiltin<"__builtin_ia32_getexpsd128_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_getmant_pd_128 : - GCCBuiltin<"__builtin_ia32_getmantpd128_mask">, + ClangBuiltin<"__builtin_ia32_getmantpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_getmant_pd_256 : - GCCBuiltin<"__builtin_ia32_getmantpd256_mask">, + ClangBuiltin<"__builtin_ia32_getmantpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_getmant_pd_512 : - GCCBuiltin<"__builtin_ia32_getmantpd512_mask">, + ClangBuiltin<"__builtin_ia32_getmantpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty,llvm_i32_ty ], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_getmant_ps_128 : - GCCBuiltin<"__builtin_ia32_getmantps128_mask">, + ClangBuiltin<"__builtin_ia32_getmantps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_getmant_ps_256 : - GCCBuiltin<"__builtin_ia32_getmantps256_mask">, + ClangBuiltin<"__builtin_ia32_getmantps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_getmant_ps_512 : - GCCBuiltin<"__builtin_ia32_getmantps512_mask">, + ClangBuiltin<"__builtin_ia32_getmantps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,llvm_i32_ty, llvm_v16f32_ty,llvm_i16_ty,llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_getmant_ss : - GCCBuiltin<"__builtin_ia32_getmantss_round_mask">, + ClangBuiltin<"__builtin_ia32_getmantss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_getmant_sd : - GCCBuiltin<"__builtin_ia32_getmantsd_round_mask">, + ClangBuiltin<"__builtin_ia32_getmantsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">, + def int_x86_avx512_rsqrt14_ss : ClangBuiltin<"__builtin_ia32_rsqrt14ss_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd_mask">, + def int_x86_avx512_rsqrt14_sd : ClangBuiltin<"__builtin_ia32_rsqrt14sd_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_pd_128 : GCCBuiltin<"__builtin_ia32_rsqrt14pd128_mask">, + def int_x86_avx512_rsqrt14_pd_128 : ClangBuiltin<"__builtin_ia32_rsqrt14pd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_pd_256 : GCCBuiltin<"__builtin_ia32_rsqrt14pd256_mask">, + def int_x86_avx512_rsqrt14_pd_256 : ClangBuiltin<"__builtin_ia32_rsqrt14pd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512_mask">, + def int_x86_avx512_rsqrt14_pd_512 : ClangBuiltin<"__builtin_ia32_rsqrt14pd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_ps_128 : GCCBuiltin<"__builtin_ia32_rsqrt14ps128_mask">, + def int_x86_avx512_rsqrt14_ps_128 : ClangBuiltin<"__builtin_ia32_rsqrt14ps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrt14ps256_mask">, + def int_x86_avx512_rsqrt14_ps_256 : ClangBuiltin<"__builtin_ia32_rsqrt14ps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512_mask">, + def int_x86_avx512_rsqrt14_ps_512 : ClangBuiltin<"__builtin_ia32_rsqrt14ps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss_mask">, + def int_x86_avx512_rcp14_ss : ClangBuiltin<"__builtin_ia32_rcp14ss_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd_mask">, + def int_x86_avx512_rcp14_sd : ClangBuiltin<"__builtin_ia32_rcp14sd_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_pd_128 : GCCBuiltin<"__builtin_ia32_rcp14pd128_mask">, + def int_x86_avx512_rcp14_pd_128 : ClangBuiltin<"__builtin_ia32_rcp14pd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_pd_256 : GCCBuiltin<"__builtin_ia32_rcp14pd256_mask">, + def int_x86_avx512_rcp14_pd_256 : ClangBuiltin<"__builtin_ia32_rcp14pd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512_mask">, + def int_x86_avx512_rcp14_pd_512 : ClangBuiltin<"__builtin_ia32_rcp14pd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_ps_128 : GCCBuiltin<"__builtin_ia32_rcp14ps128_mask">, + def int_x86_avx512_rcp14_ps_128 : ClangBuiltin<"__builtin_ia32_rcp14ps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_ps_256 : GCCBuiltin<"__builtin_ia32_rcp14ps256_mask">, + def int_x86_avx512_rcp14_ps_256 : ClangBuiltin<"__builtin_ia32_rcp14ps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512_mask">, + def int_x86_avx512_rcp14_ps_512 : ClangBuiltin<"__builtin_ia32_rcp14ps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_rcp28_ps : GCCBuiltin<"__builtin_ia32_rcp28ps_mask">, + def int_x86_avx512_rcp28_ps : ClangBuiltin<"__builtin_ia32_rcp28ps_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_rcp28_pd : GCCBuiltin<"__builtin_ia32_rcp28pd_mask">, + def int_x86_avx512_rcp28_pd : ClangBuiltin<"__builtin_ia32_rcp28pd_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_exp2_ps : GCCBuiltin<"__builtin_ia32_exp2ps_mask">, + def int_x86_avx512_exp2_ps : ClangBuiltin<"__builtin_ia32_exp2ps_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_exp2_pd : GCCBuiltin<"__builtin_ia32_exp2pd_mask">, + def int_x86_avx512_exp2_pd : ClangBuiltin<"__builtin_ia32_exp2pd_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_round_mask">, + def int_x86_avx512_rcp28_ss : ClangBuiltin<"__builtin_ia32_rcp28ss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_round_mask">, + def int_x86_avx512_rcp28_sd : ClangBuiltin<"__builtin_ia32_rcp28sd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_rsqrt28_ps : GCCBuiltin<"__builtin_ia32_rsqrt28ps_mask">, + def int_x86_avx512_rsqrt28_ps : ClangBuiltin<"__builtin_ia32_rsqrt28ps_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_rsqrt28_pd : GCCBuiltin<"__builtin_ia32_rsqrt28pd_mask">, + def int_x86_avx512_rsqrt28_pd : ClangBuiltin<"__builtin_ia32_rsqrt28pd_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_round_mask">, + def int_x86_avx512_rsqrt28_ss : ClangBuiltin<"__builtin_ia32_rsqrt28ss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_round_mask">, + def int_x86_avx512_rsqrt28_sd : ClangBuiltin<"__builtin_ia32_rsqrt28sd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; - def int_x86_avx512_psad_bw_512 : GCCBuiltin<"__builtin_ia32_psadbw512">, + def int_x86_avx512_psad_bw_512 : ClangBuiltin<"__builtin_ia32_psadbw512">, Intrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem, Commutative]>; } // Integer arithmetic ops let TargetPrefix = "x86" in { - def int_x86_avx512_pmulhu_w_512 : GCCBuiltin<"__builtin_ia32_pmulhuw512">, + def int_x86_avx512_pmulhu_w_512 : ClangBuiltin<"__builtin_ia32_pmulhuw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem, Commutative]>; - def int_x86_avx512_pmulh_w_512 : GCCBuiltin<"__builtin_ia32_pmulhw512">, + def int_x86_avx512_pmulh_w_512 : ClangBuiltin<"__builtin_ia32_pmulhw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem, Commutative]>; - def int_x86_avx512_pavg_b_512 : GCCBuiltin<"__builtin_ia32_pavgb512">, + def int_x86_avx512_pavg_b_512 : ClangBuiltin<"__builtin_ia32_pavgb512">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>; - def int_x86_avx512_pavg_w_512 : GCCBuiltin<"__builtin_ia32_pavgw512">, + def int_x86_avx512_pavg_w_512 : ClangBuiltin<"__builtin_ia32_pavgw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem]>; - def int_x86_avx512_pmaddw_d_512 : GCCBuiltin<"__builtin_ia32_pmaddwd512">, + def int_x86_avx512_pmaddw_d_512 : ClangBuiltin<"__builtin_ia32_pmaddwd512">, Intrinsic<[llvm_v16i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem, Commutative]>; - def int_x86_avx512_pmaddubs_w_512 : GCCBuiltin<"__builtin_ia32_pmaddubsw512">, + def int_x86_avx512_pmaddubs_w_512 : ClangBuiltin<"__builtin_ia32_pmaddubsw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>; def int_x86_avx512_dbpsadbw_128 : - GCCBuiltin<"__builtin_ia32_dbpsadbw128">, + ClangBuiltin<"__builtin_ia32_dbpsadbw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_dbpsadbw_256 : - GCCBuiltin<"__builtin_ia32_dbpsadbw256">, + ClangBuiltin<"__builtin_ia32_dbpsadbw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_dbpsadbw_512 : - GCCBuiltin<"__builtin_ia32_dbpsadbw512">, + ClangBuiltin<"__builtin_ia32_dbpsadbw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; @@ -3838,32 +3848,32 @@ let TargetPrefix = "x86" in { // gather prefetch // NOTE: These can't be ArgMemOnly because you can put the address completely // in the index register. - def int_x86_avx512_gatherpf_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfdpd">, + def int_x86_avx512_gatherpf_dpd_512 : ClangBuiltin<"__builtin_ia32_gatherpfdpd">, Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; - def int_x86_avx512_gatherpf_dps_512 : GCCBuiltin<"__builtin_ia32_gatherpfdps">, + def int_x86_avx512_gatherpf_dps_512 : ClangBuiltin<"__builtin_ia32_gatherpfdps">, Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; - def int_x86_avx512_gatherpf_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfqpd">, + def int_x86_avx512_gatherpf_qpd_512 : ClangBuiltin<"__builtin_ia32_gatherpfqpd">, Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; - def int_x86_avx512_gatherpf_qps_512 : GCCBuiltin<"__builtin_ia32_gatherpfqps">, + def int_x86_avx512_gatherpf_qps_512 : ClangBuiltin<"__builtin_ia32_gatherpfqps">, Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; // scatter prefetch // NOTE: These can't be ArgMemOnly because you can put the address completely // in the index register. - def int_x86_avx512_scatterpf_dpd_512 : GCCBuiltin<"__builtin_ia32_scatterpfdpd">, + def int_x86_avx512_scatterpf_dpd_512 : ClangBuiltin<"__builtin_ia32_scatterpfdpd">, Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; - def int_x86_avx512_scatterpf_dps_512 : GCCBuiltin<"__builtin_ia32_scatterpfdps">, + def int_x86_avx512_scatterpf_dps_512 : ClangBuiltin<"__builtin_ia32_scatterpfdps">, Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; - def int_x86_avx512_scatterpf_qpd_512 : GCCBuiltin<"__builtin_ia32_scatterpfqpd">, + def int_x86_avx512_scatterpf_qpd_512 : ClangBuiltin<"__builtin_ia32_scatterpfqpd">, Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; - def int_x86_avx512_scatterpf_qps_512 : GCCBuiltin<"__builtin_ia32_scatterpfqps">, + def int_x86_avx512_scatterpf_qps_512 : ClangBuiltin<"__builtin_ia32_scatterpfqps">, Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; } @@ -4109,34 +4119,34 @@ let TargetPrefix = "x86" in { // Instructions that count the number of leading zero bits let TargetPrefix = "x86" in { def int_x86_avx512_conflict_d_128 : - GCCBuiltin<"__builtin_ia32_vpconflictsi_128">, + ClangBuiltin<"__builtin_ia32_vpconflictsi_128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx512_conflict_d_256 : - GCCBuiltin<"__builtin_ia32_vpconflictsi_256">, + ClangBuiltin<"__builtin_ia32_vpconflictsi_256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx512_conflict_d_512 : - GCCBuiltin<"__builtin_ia32_vpconflictsi_512">, + ClangBuiltin<"__builtin_ia32_vpconflictsi_512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty], [IntrNoMem]>; def int_x86_avx512_conflict_q_128 : - GCCBuiltin<"__builtin_ia32_vpconflictdi_128">, + ClangBuiltin<"__builtin_ia32_vpconflictdi_128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_conflict_q_256 : - GCCBuiltin<"__builtin_ia32_vpconflictdi_256">, + ClangBuiltin<"__builtin_ia32_vpconflictdi_256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx512_conflict_q_512 : - GCCBuiltin<"__builtin_ia32_vpconflictdi_512">, + ClangBuiltin<"__builtin_ia32_vpconflictdi_512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty], [IntrNoMem]>; } // Compares let TargetPrefix = "x86" in { // 512-bit - def int_x86_avx512_vcomi_sd : GCCBuiltin<"__builtin_ia32_vcomisd">, + def int_x86_avx512_vcomi_sd : ClangBuiltin<"__builtin_ia32_vcomisd">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">, + def int_x86_avx512_vcomi_ss : ClangBuiltin<"__builtin_ia32_vcomiss">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; @@ -4159,152 +4169,152 @@ let TargetPrefix = "x86" in { // truncate let TargetPrefix = "x86" in { def int_x86_avx512_mask_pmov_qb_128 : - GCCBuiltin<"__builtin_ia32_pmovqb128_mask">, + ClangBuiltin<"__builtin_ia32_pmovqb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qb_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovqb128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovqb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qb_128 : - GCCBuiltin<"__builtin_ia32_pmovsqb128_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qb_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovsqb128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qb_128 : - GCCBuiltin<"__builtin_ia32_pmovusqb128_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qb_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovusqb128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qb_256 : - GCCBuiltin<"__builtin_ia32_pmovqb256_mask">, + ClangBuiltin<"__builtin_ia32_pmovqb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qb_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovqb256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovqb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qb_256 : - GCCBuiltin<"__builtin_ia32_pmovsqb256_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qb_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovsqb256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qb_256 : - GCCBuiltin<"__builtin_ia32_pmovusqb256_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qb_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovusqb256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qb_512 : - GCCBuiltin<"__builtin_ia32_pmovqb512_mask">, + ClangBuiltin<"__builtin_ia32_pmovqb512_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qb_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovqb512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovqb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qb_512 : - GCCBuiltin<"__builtin_ia32_pmovsqb512_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqb512_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qb_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovsqb512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qb_512 : - GCCBuiltin<"__builtin_ia32_pmovusqb512_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqb512_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qb_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovusqb512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qw_128 : - GCCBuiltin<"__builtin_ia32_pmovqw128_mask">, + ClangBuiltin<"__builtin_ia32_pmovqw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qw_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovqw128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovqw128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qw_128 : - GCCBuiltin<"__builtin_ia32_pmovsqw128_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qw_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovsqw128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqw128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qw_128 : - GCCBuiltin<"__builtin_ia32_pmovusqw128_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qw_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovusqw128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqw128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qw_256 : - GCCBuiltin<"__builtin_ia32_pmovqw256_mask">, + ClangBuiltin<"__builtin_ia32_pmovqw256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qw_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovqw256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovqw256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qw_256 : - GCCBuiltin<"__builtin_ia32_pmovsqw256_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqw256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qw_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovsqw256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqw256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qw_256 : - GCCBuiltin<"__builtin_ia32_pmovusqw256_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqw256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qw_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovusqw256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqw256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; @@ -4313,167 +4323,167 @@ let TargetPrefix = "x86" in { [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qw_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovqw512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovqw512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qw_512 : - GCCBuiltin<"__builtin_ia32_pmovsqw512_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqw512_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qw_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovsqw512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqw512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qw_512 : - GCCBuiltin<"__builtin_ia32_pmovusqw512_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqw512_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qw_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovusqw512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqw512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qd_128 : - GCCBuiltin<"__builtin_ia32_pmovqd128_mask">, + ClangBuiltin<"__builtin_ia32_pmovqd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qd_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovqd128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovqd128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qd_128 : - GCCBuiltin<"__builtin_ia32_pmovsqd128_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qd_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovsqd128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqd128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qd_128 : - GCCBuiltin<"__builtin_ia32_pmovusqd128_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qd_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovusqd128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqd128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qd_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovqd256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovqd256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qd_256 : - GCCBuiltin<"__builtin_ia32_pmovsqd256_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqd256_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qd_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovsqd256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqd256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qd_256 : - GCCBuiltin<"__builtin_ia32_pmovusqd256_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqd256_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qd_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovusqd256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqd256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qd_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovqd512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovqd512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qd_512 : - GCCBuiltin<"__builtin_ia32_pmovsqd512_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqd512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qd_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovsqd512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsqd512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qd_512 : - GCCBuiltin<"__builtin_ia32_pmovusqd512_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqd512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qd_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovusqd512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusqd512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_db_128 : - GCCBuiltin<"__builtin_ia32_pmovdb128_mask">, + ClangBuiltin<"__builtin_ia32_pmovdb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_db_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovdb128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovdb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_db_128 : - GCCBuiltin<"__builtin_ia32_pmovsdb128_mask">, + ClangBuiltin<"__builtin_ia32_pmovsdb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_db_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovsdb128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsdb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_db_128 : - GCCBuiltin<"__builtin_ia32_pmovusdb128_mask">, + ClangBuiltin<"__builtin_ia32_pmovusdb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_db_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovusdb128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusdb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_db_256 : - GCCBuiltin<"__builtin_ia32_pmovdb256_mask">, + ClangBuiltin<"__builtin_ia32_pmovdb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_db_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovdb256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovdb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_db_256 : - GCCBuiltin<"__builtin_ia32_pmovsdb256_mask">, + ClangBuiltin<"__builtin_ia32_pmovsdb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_db_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovsdb256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsdb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_db_256 : - GCCBuiltin<"__builtin_ia32_pmovusdb256_mask">, + ClangBuiltin<"__builtin_ia32_pmovusdb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_db_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovusdb256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusdb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; @@ -4482,87 +4492,87 @@ let TargetPrefix = "x86" in { [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_db_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovdb512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovdb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_db_512 : - GCCBuiltin<"__builtin_ia32_pmovsdb512_mask">, + ClangBuiltin<"__builtin_ia32_pmovsdb512_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_db_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovsdb512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsdb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_db_512 : - GCCBuiltin<"__builtin_ia32_pmovusdb512_mask">, + ClangBuiltin<"__builtin_ia32_pmovusdb512_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_db_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovusdb512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusdb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_dw_128 : - GCCBuiltin<"__builtin_ia32_pmovdw128_mask">, + ClangBuiltin<"__builtin_ia32_pmovdw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_dw_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovdw128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovdw128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_dw_128 : - GCCBuiltin<"__builtin_ia32_pmovsdw128_mask">, + ClangBuiltin<"__builtin_ia32_pmovsdw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_dw_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovsdw128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsdw128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_dw_128 : - GCCBuiltin<"__builtin_ia32_pmovusdw128_mask">, + ClangBuiltin<"__builtin_ia32_pmovusdw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_dw_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovusdw128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusdw128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_dw_256 : - GCCBuiltin<"__builtin_ia32_pmovdw256_mask">, + ClangBuiltin<"__builtin_ia32_pmovdw256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_dw_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovdw256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovdw256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_dw_256 : - GCCBuiltin<"__builtin_ia32_pmovsdw256_mask">, + ClangBuiltin<"__builtin_ia32_pmovsdw256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_dw_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovsdw256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsdw256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_dw_256 : - GCCBuiltin<"__builtin_ia32_pmovusdw256_mask">, + ClangBuiltin<"__builtin_ia32_pmovusdw256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_dw_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovusdw256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusdw256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; @@ -4571,107 +4581,107 @@ let TargetPrefix = "x86" in { [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_dw_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovdw512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovdw512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_dw_512 : - GCCBuiltin<"__builtin_ia32_pmovsdw512_mask">, + ClangBuiltin<"__builtin_ia32_pmovsdw512_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_dw_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovsdw512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovsdw512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_dw_512 : - GCCBuiltin<"__builtin_ia32_pmovusdw512_mask">, + ClangBuiltin<"__builtin_ia32_pmovusdw512_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_dw_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovusdw512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovusdw512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_wb_128 : - GCCBuiltin<"__builtin_ia32_pmovwb128_mask">, + ClangBuiltin<"__builtin_ia32_pmovwb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_wb_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovwb128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovwb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_wb_128 : - GCCBuiltin<"__builtin_ia32_pmovswb128_mask">, + ClangBuiltin<"__builtin_ia32_pmovswb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_wb_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovswb128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovswb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_wb_128 : - GCCBuiltin<"__builtin_ia32_pmovuswb128_mask">, + ClangBuiltin<"__builtin_ia32_pmovuswb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_wb_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovuswb128mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovuswb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_wb_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovwb256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovwb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_wb_256 : - GCCBuiltin<"__builtin_ia32_pmovswb256_mask">, + ClangBuiltin<"__builtin_ia32_pmovswb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_wb_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovswb256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovswb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_wb_256 : - GCCBuiltin<"__builtin_ia32_pmovuswb256_mask">, + ClangBuiltin<"__builtin_ia32_pmovuswb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_wb_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovuswb256mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovuswb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_wb_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovwb512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovwb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_wb_512 : - GCCBuiltin<"__builtin_ia32_pmovswb512_mask">, + ClangBuiltin<"__builtin_ia32_pmovswb512_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_wb_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovswb512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovswb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_wb_512 : - GCCBuiltin<"__builtin_ia32_pmovuswb512_mask">, + ClangBuiltin<"__builtin_ia32_pmovuswb512_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_wb_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovuswb512mem_mask">, + ClangBuiltin<"__builtin_ia32_pmovuswb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrArgMemOnly]>; @@ -4680,37 +4690,37 @@ let TargetPrefix = "x86" in { // Bitwise ternary logic let TargetPrefix = "x86" in { def int_x86_avx512_pternlog_d_128 : - GCCBuiltin<"__builtin_ia32_pternlogd128">, + ClangBuiltin<"__builtin_ia32_pternlogd128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_d_256 : - GCCBuiltin<"__builtin_ia32_pternlogd256">, + ClangBuiltin<"__builtin_ia32_pternlogd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_d_512 : - GCCBuiltin<"__builtin_ia32_pternlogd512">, + ClangBuiltin<"__builtin_ia32_pternlogd512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_q_128 : - GCCBuiltin<"__builtin_ia32_pternlogq128">, + ClangBuiltin<"__builtin_ia32_pternlogq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_q_256 : - GCCBuiltin<"__builtin_ia32_pternlogq256">, + ClangBuiltin<"__builtin_ia32_pternlogq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_q_512 : - GCCBuiltin<"__builtin_ia32_pternlogq512">, + ClangBuiltin<"__builtin_ia32_pternlogq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; @@ -4770,12 +4780,12 @@ let TargetPrefix = "x86" in { llvm_i32_ty, llvm_v2i1_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cmp_ss : - GCCBuiltin<"__builtin_ia32_cmpss_mask">, + ClangBuiltin<"__builtin_ia32_cmpss_mask">, Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_cmp_sd : - GCCBuiltin<"__builtin_ia32_cmpsd_mask">, + ClangBuiltin<"__builtin_ia32_cmpsd_mask">, Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; @@ -4784,21 +4794,21 @@ let TargetPrefix = "x86" in { //===----------------------------------------------------------------------===// // SHA intrinsics let TargetPrefix = "x86" in { - def int_x86_sha1rnds4 : GCCBuiltin<"__builtin_ia32_sha1rnds4">, + def int_x86_sha1rnds4 : ClangBuiltin<"__builtin_ia32_sha1rnds4">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; - def int_x86_sha1nexte : GCCBuiltin<"__builtin_ia32_sha1nexte">, + def int_x86_sha1nexte : ClangBuiltin<"__builtin_ia32_sha1nexte">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_sha1msg1 : GCCBuiltin<"__builtin_ia32_sha1msg1">, + def int_x86_sha1msg1 : ClangBuiltin<"__builtin_ia32_sha1msg1">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_sha1msg2 : GCCBuiltin<"__builtin_ia32_sha1msg2">, + def int_x86_sha1msg2 : ClangBuiltin<"__builtin_ia32_sha1msg2">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_sha256rnds2 : GCCBuiltin<"__builtin_ia32_sha256rnds2">, + def int_x86_sha256rnds2 : ClangBuiltin<"__builtin_ia32_sha256rnds2">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_sha256msg1 : GCCBuiltin<"__builtin_ia32_sha256msg1">, + def int_x86_sha256msg1 : ClangBuiltin<"__builtin_ia32_sha256msg1">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_sha256msg2 : GCCBuiltin<"__builtin_ia32_sha256msg2">, + def int_x86_sha256msg2 : ClangBuiltin<"__builtin_ia32_sha256msg2">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; } @@ -4806,17 +4816,17 @@ let TargetPrefix = "x86" in { // Thread synchronization ops with timer. let TargetPrefix = "x86" in { def int_x86_monitorx - : GCCBuiltin<"__builtin_ia32_monitorx">, + : ClangBuiltin<"__builtin_ia32_monitorx">, Intrinsic<[], [ llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty ], []>; def int_x86_mwaitx - : GCCBuiltin<"__builtin_ia32_mwaitx">, + : ClangBuiltin<"__builtin_ia32_mwaitx">, Intrinsic<[], [ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], []>; } //===----------------------------------------------------------------------===// // Cache-line zero let TargetPrefix = "x86" in { - def int_x86_clzero : GCCBuiltin<"__builtin_ia32_clzero">, + def int_x86_clzero : ClangBuiltin<"__builtin_ia32_clzero">, Intrinsic<[], [llvm_ptr_ty], []>; } @@ -4825,11 +4835,11 @@ let TargetPrefix = "x86" in { let TargetPrefix = "x86" in { // Write back and invalidate - def int_x86_wbinvd : GCCBuiltin<"__builtin_ia32_wbinvd">, + def int_x86_wbinvd : ClangBuiltin<"__builtin_ia32_wbinvd">, Intrinsic<[], [], []>; // Write back no-invalidate - def int_x86_wbnoinvd : GCCBuiltin<"__builtin_ia32_wbnoinvd">, + def int_x86_wbnoinvd : ClangBuiltin<"__builtin_ia32_wbnoinvd">, Intrinsic<[], [], []>; } @@ -4837,18 +4847,18 @@ let TargetPrefix = "x86" in { // Cache-line demote let TargetPrefix = "x86" in { - def int_x86_cldemote : GCCBuiltin<"__builtin_ia32_cldemote">, + def int_x86_cldemote : ClangBuiltin<"__builtin_ia32_cldemote">, Intrinsic<[], [llvm_ptr_ty], []>; } //===----------------------------------------------------------------------===// // Wait and pause enhancements let TargetPrefix = "x86" in { - def int_x86_umonitor : GCCBuiltin<"__builtin_ia32_umonitor">, + def int_x86_umonitor : ClangBuiltin<"__builtin_ia32_umonitor">, Intrinsic<[], [llvm_ptr_ty], []>; - def int_x86_umwait : GCCBuiltin<"__builtin_ia32_umwait">, + def int_x86_umwait : ClangBuiltin<"__builtin_ia32_umwait">, Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_x86_tpause : GCCBuiltin<"__builtin_ia32_tpause">, + def int_x86_tpause : ClangBuiltin<"__builtin_ia32_tpause">, Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; } @@ -4856,11 +4866,11 @@ let TargetPrefix = "x86" in { // Direct Move Instructions let TargetPrefix = "x86" in { - def int_x86_directstore32 : GCCBuiltin<"__builtin_ia32_directstore_u32">, + def int_x86_directstore32 : ClangBuiltin<"__builtin_ia32_directstore_u32">, Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], []>; - def int_x86_directstore64 : GCCBuiltin<"__builtin_ia32_directstore_u64">, + def int_x86_directstore64 : ClangBuiltin<"__builtin_ia32_directstore_u64">, Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; - def int_x86_movdir64b : GCCBuiltin<"__builtin_ia32_movdir64b">, + def int_x86_movdir64b : ClangBuiltin<"__builtin_ia32_movdir64b">, Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], []>; } @@ -4868,9 +4878,9 @@ let TargetPrefix = "x86" in { // PTWrite - Write data to processor trace pocket let TargetPrefix = "x86" in { - def int_x86_ptwrite32 : GCCBuiltin<"__builtin_ia32_ptwrite32">, + def int_x86_ptwrite32 : ClangBuiltin<"__builtin_ia32_ptwrite32">, Intrinsic<[], [llvm_i32_ty], []>; - def int_x86_ptwrite64 : GCCBuiltin<"__builtin_ia32_ptwrite64">, + def int_x86_ptwrite64 : ClangBuiltin<"__builtin_ia32_ptwrite64">, Intrinsic<[], [llvm_i64_ty], []>; } @@ -4878,21 +4888,21 @@ let TargetPrefix = "x86" in { // INVPCID - Invalidate Process-Context Identifier let TargetPrefix = "x86" in { - def int_x86_invpcid : GCCBuiltin<"__builtin_ia32_invpcid">, + def int_x86_invpcid : ClangBuiltin<"__builtin_ia32_invpcid">, Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>; } let TargetPrefix = "x86" in { def int_x86_avx512bf16_cvtne2ps2bf16_128: - GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">, + ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">, Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_avx512bf16_cvtne2ps2bf16_256: - GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">, + ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">, Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx512bf16_cvtne2ps2bf16_512: - GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">, + ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">, Intrinsic<[llvm_v32i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty], [IntrNoMem]>; // Intrinsic must be masked due to it producing less than 128 bits of results. @@ -4901,21 +4911,21 @@ let TargetPrefix = "x86" in { [llvm_v4f32_ty, llvm_v8i16_ty, llvm_v4i1_ty], [IntrNoMem]>; def int_x86_avx512bf16_cvtneps2bf16_256: - GCCBuiltin<"__builtin_ia32_cvtneps2bf16_256">, + ClangBuiltin<"__builtin_ia32_cvtneps2bf16_256">, Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx512bf16_cvtneps2bf16_512: - GCCBuiltin<"__builtin_ia32_cvtneps2bf16_512">, + ClangBuiltin<"__builtin_ia32_cvtneps2bf16_512">, Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty], [IntrNoMem]>; def int_x86_avx512bf16_dpbf16ps_128: - GCCBuiltin<"__builtin_ia32_dpbf16ps_128">, + ClangBuiltin<"__builtin_ia32_dpbf16ps_128">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx512bf16_dpbf16ps_256: - GCCBuiltin<"__builtin_ia32_dpbf16ps_256">, + ClangBuiltin<"__builtin_ia32_dpbf16ps_256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx512bf16_dpbf16ps_512: - GCCBuiltin<"__builtin_ia32_dpbf16ps_512">, + ClangBuiltin<"__builtin_ia32_dpbf16ps_512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; } @@ -4924,9 +4934,9 @@ let TargetPrefix = "x86" in { // ENQCMD - Enqueue Stores Instructions let TargetPrefix = "x86" in { - def int_x86_enqcmd : GCCBuiltin<"__builtin_ia32_enqcmd">, + def int_x86_enqcmd : ClangBuiltin<"__builtin_ia32_enqcmd">, Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_ptr_ty], []>; - def int_x86_enqcmds : GCCBuiltin<"__builtin_ia32_enqcmds">, + def int_x86_enqcmds : ClangBuiltin<"__builtin_ia32_enqcmds">, Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_ptr_ty], []>; } @@ -4934,7 +4944,7 @@ let TargetPrefix = "x86" in { // SERIALIZE - Serialize instruction fetch and execution let TargetPrefix = "x86" in { - def int_x86_serialize : GCCBuiltin<"__builtin_ia32_serialize">, + def int_x86_serialize : ClangBuiltin<"__builtin_ia32_serialize">, Intrinsic<[], [], []>; } @@ -4942,16 +4952,16 @@ let TargetPrefix = "x86" in { // TSXLDTRK - TSX Suspend Load Address Tracking let TargetPrefix = "x86" in { - def int_x86_xsusldtrk : GCCBuiltin<"__builtin_ia32_xsusldtrk">, + def int_x86_xsusldtrk : ClangBuiltin<"__builtin_ia32_xsusldtrk">, Intrinsic<[], [], []>; - def int_x86_xresldtrk : GCCBuiltin<"__builtin_ia32_xresldtrk">, + def int_x86_xresldtrk : ClangBuiltin<"__builtin_ia32_xresldtrk">, Intrinsic<[], [], []>; } //===----------------------------------------------------------------------===// // Key Locker let TargetPrefix = "x86" in { - def int_x86_loadiwkey : GCCBuiltin<"__builtin_ia32_loadiwkey">, + def int_x86_loadiwkey : ClangBuiltin<"__builtin_ia32_loadiwkey">, Intrinsic<[], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], []>; def int_x86_encodekey128 : @@ -5004,91 +5014,91 @@ let TargetPrefix = "x86" in { // AMX - Intel AMX extensions let TargetPrefix = "x86" in { - def int_x86_ldtilecfg : GCCBuiltin<"__builtin_ia32_tile_loadconfig">, + def int_x86_ldtilecfg : ClangBuiltin<"__builtin_ia32_tile_loadconfig">, Intrinsic<[], [llvm_ptr_ty], []>; - def int_x86_sttilecfg : GCCBuiltin<"__builtin_ia32_tile_storeconfig">, + def int_x86_sttilecfg : ClangBuiltin<"__builtin_ia32_tile_storeconfig">, Intrinsic<[], [llvm_ptr_ty], []>; - def int_x86_tilerelease : GCCBuiltin<"__builtin_ia32_tilerelease">, + def int_x86_tilerelease : ClangBuiltin<"__builtin_ia32_tilerelease">, Intrinsic<[], [], []>; - def int_x86_tilezero : GCCBuiltin<"__builtin_ia32_tilezero">, + def int_x86_tilezero : ClangBuiltin<"__builtin_ia32_tilezero">, Intrinsic<[], [llvm_i8_ty], [ImmArg>]>; - def int_x86_tileloadd64 : GCCBuiltin<"__builtin_ia32_tileloadd64">, + def int_x86_tileloadd64 : ClangBuiltin<"__builtin_ia32_tileloadd64">, Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty], [ImmArg>]>; - def int_x86_tileloaddt164 : GCCBuiltin<"__builtin_ia32_tileloaddt164">, + def int_x86_tileloaddt164 : ClangBuiltin<"__builtin_ia32_tileloaddt164">, Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty], [ImmArg>]>; - def int_x86_tilestored64 : GCCBuiltin<"__builtin_ia32_tilestored64">, + def int_x86_tilestored64 : ClangBuiltin<"__builtin_ia32_tilestored64">, Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty], [ImmArg>]>; - def int_x86_tdpbssd : GCCBuiltin<"__builtin_ia32_tdpbssd">, + def int_x86_tdpbssd : ClangBuiltin<"__builtin_ia32_tdpbssd">, Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], [ImmArg>, ImmArg>, ImmArg>]>; - def int_x86_tdpbsud : GCCBuiltin<"__builtin_ia32_tdpbsud">, + def int_x86_tdpbsud : ClangBuiltin<"__builtin_ia32_tdpbsud">, Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], [ImmArg>, ImmArg>, ImmArg>]>; - def int_x86_tdpbusd : GCCBuiltin<"__builtin_ia32_tdpbusd">, + def int_x86_tdpbusd : ClangBuiltin<"__builtin_ia32_tdpbusd">, Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], [ImmArg>, ImmArg>, ImmArg>]>; - def int_x86_tdpbuud : GCCBuiltin<"__builtin_ia32_tdpbuud">, + def int_x86_tdpbuud : ClangBuiltin<"__builtin_ia32_tdpbuud">, Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], [ImmArg>, ImmArg>, ImmArg>]>; - def int_x86_tdpbf16ps : GCCBuiltin<"__builtin_ia32_tdpbf16ps">, + def int_x86_tdpbf16ps : ClangBuiltin<"__builtin_ia32_tdpbf16ps">, Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], [ImmArg>, ImmArg>, ImmArg>]>; // AMX - internal intrinsics def int_x86_ldtilecfg_internal : - GCCBuiltin<"__builtin_ia32_tile_loadconfig_internal">, + ClangBuiltin<"__builtin_ia32_tile_loadconfig_internal">, Intrinsic<[], [llvm_ptr_ty], []>; def int_x86_tileloadd64_internal : - GCCBuiltin<"__builtin_ia32_tileloadd64_internal">, + ClangBuiltin<"__builtin_ia32_tileloadd64_internal">, Intrinsic<[llvm_x86amx_ty], [llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty], []>; def int_x86_tileloaddt164_internal : - GCCBuiltin<"__builtin_ia32_tileloaddt164_internal">, + ClangBuiltin<"__builtin_ia32_tileloaddt164_internal">, Intrinsic<[llvm_x86amx_ty], [llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty], []>; def int_x86_tdpbssd_internal : - GCCBuiltin<"__builtin_ia32_tdpbssd_internal">, + ClangBuiltin<"__builtin_ia32_tdpbssd_internal">, Intrinsic<[llvm_x86amx_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_x86amx_ty, llvm_x86amx_ty], []>; def int_x86_tdpbsud_internal : - GCCBuiltin<"__builtin_ia32_tdpbsud_internal">, + ClangBuiltin<"__builtin_ia32_tdpbsud_internal">, Intrinsic<[llvm_x86amx_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_x86amx_ty, llvm_x86amx_ty], []>; def int_x86_tdpbusd_internal : - GCCBuiltin<"__builtin_ia32_tdpbusd_internal">, + ClangBuiltin<"__builtin_ia32_tdpbusd_internal">, Intrinsic<[llvm_x86amx_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_x86amx_ty, llvm_x86amx_ty], []>; def int_x86_tdpbuud_internal : - GCCBuiltin<"__builtin_ia32_tdpbuud_internal">, + ClangBuiltin<"__builtin_ia32_tdpbuud_internal">, Intrinsic<[llvm_x86amx_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_x86amx_ty, llvm_x86amx_ty], []>; def int_x86_tilestored64_internal : - GCCBuiltin<"__builtin_ia32_tilestored64_internal">, + ClangBuiltin<"__builtin_ia32_tilestored64_internal">, Intrinsic<[], [llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty, llvm_x86amx_ty], []>; def int_x86_tilezero_internal : - GCCBuiltin<"__builtin_ia32_tilezero_internal">, + ClangBuiltin<"__builtin_ia32_tilezero_internal">, Intrinsic<[llvm_x86amx_ty], [llvm_i16_ty, llvm_i16_ty], []>; def int_x86_tdpbf16ps_internal : - GCCBuiltin<"__builtin_ia32_tdpbf16ps_internal">, + ClangBuiltin<"__builtin_ia32_tdpbf16ps_internal">, Intrinsic<[llvm_x86amx_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_x86amx_ty, @@ -5103,13 +5113,13 @@ let TargetPrefix = "x86" in { // UINTR - User Level Interrupt let TargetPrefix = "x86" in { - def int_x86_clui : GCCBuiltin<"__builtin_ia32_clui">, + def int_x86_clui : ClangBuiltin<"__builtin_ia32_clui">, Intrinsic<[], [], []>; - def int_x86_stui : GCCBuiltin<"__builtin_ia32_stui">, + def int_x86_stui : ClangBuiltin<"__builtin_ia32_stui">, Intrinsic<[], [], []>; - def int_x86_testui : GCCBuiltin<"__builtin_ia32_testui">, + def int_x86_testui : ClangBuiltin<"__builtin_ia32_testui">, Intrinsic<[llvm_i8_ty], [], []>; - def int_x86_senduipi : GCCBuiltin<"__builtin_ia32_senduipi">, + def int_x86_senduipi : ClangBuiltin<"__builtin_ia32_senduipi">, Intrinsic<[], [llvm_i64_ty], []>; } @@ -5117,48 +5127,48 @@ let TargetPrefix = "x86" in { // avx512_fp16: vaddph let TargetPrefix = "x86" in { def int_x86_avx512fp16_add_ph_512 - : GCCBuiltin<"__builtin_ia32_addph512">, + : ClangBuiltin<"__builtin_ia32_addph512">, Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_sub_ph_512 - : GCCBuiltin<"__builtin_ia32_subph512">, + : ClangBuiltin<"__builtin_ia32_subph512">, Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mul_ph_512 - : GCCBuiltin<"__builtin_ia32_mulph512">, + : ClangBuiltin<"__builtin_ia32_mulph512">, Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_div_ph_512 - : GCCBuiltin<"__builtin_ia32_divph512">, + : ClangBuiltin<"__builtin_ia32_divph512">, Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_max_ph_128 - : GCCBuiltin<"__builtin_ia32_maxph128">, + : ClangBuiltin<"__builtin_ia32_maxph128">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_max_ph_256 - : GCCBuiltin<"__builtin_ia32_maxph256">, + : ClangBuiltin<"__builtin_ia32_maxph256">, Intrinsic<[ llvm_v16f16_ty ], [ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_max_ph_512 - : GCCBuiltin<"__builtin_ia32_maxph512">, + : ClangBuiltin<"__builtin_ia32_maxph512">, Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_min_ph_128 - : GCCBuiltin<"__builtin_ia32_minph128">, + : ClangBuiltin<"__builtin_ia32_minph128">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_min_ph_256 - : GCCBuiltin<"__builtin_ia32_minph256">, + : ClangBuiltin<"__builtin_ia32_minph256">, Intrinsic<[ llvm_v16f16_ty ], [ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_min_ph_512 - : GCCBuiltin<"__builtin_ia32_minph512">, + : ClangBuiltin<"__builtin_ia32_minph512">, Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; @@ -5178,367 +5188,367 @@ let TargetPrefix = "x86" in { [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_add_sh_round - : GCCBuiltin<"__builtin_ia32_addsh_round_mask">, + : ClangBuiltin<"__builtin_ia32_addsh_round_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_sub_sh_round - : GCCBuiltin<"__builtin_ia32_subsh_round_mask">, + : ClangBuiltin<"__builtin_ia32_subsh_round_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_mul_sh_round - : GCCBuiltin<"__builtin_ia32_mulsh_round_mask">, + : ClangBuiltin<"__builtin_ia32_mulsh_round_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_div_sh_round - : GCCBuiltin<"__builtin_ia32_divsh_round_mask">, + : ClangBuiltin<"__builtin_ia32_divsh_round_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_min_sh_round - : GCCBuiltin<"__builtin_ia32_minsh_round_mask">, + : ClangBuiltin<"__builtin_ia32_minsh_round_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_max_sh_round - : GCCBuiltin<"__builtin_ia32_maxsh_round_mask">, + : ClangBuiltin<"__builtin_ia32_maxsh_round_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_cmp_sh - : GCCBuiltin<"__builtin_ia32_cmpsh_mask">, + : ClangBuiltin<"__builtin_ia32_cmpsh_mask">, Intrinsic<[ llvm_i8_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg>, ImmArg> ]>; def int_x86_avx512fp16_vcomi_sh - : GCCBuiltin<"__builtin_ia32_vcomish">, + : ClangBuiltin<"__builtin_ia32_vcomish">, Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg>, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2psx_128 - : GCCBuiltin<"__builtin_ia32_vcvtph2psx128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2psx128_mask">, Intrinsic<[ llvm_v4f32_ty ], [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2psx_256 - : GCCBuiltin<"__builtin_ia32_vcvtph2psx256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2psx256_mask">, Intrinsic<[ llvm_v8f32_ty ], [ llvm_v8f16_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2psx_512 - : GCCBuiltin<"__builtin_ia32_vcvtph2psx512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2psx512_mask">, Intrinsic<[ llvm_v16f32_ty ], [ llvm_v16f16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtps2phx_128 - : GCCBuiltin<"__builtin_ia32_vcvtps2phx128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtps2phx128_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtps2phx_256 - : GCCBuiltin<"__builtin_ia32_vcvtps2phx256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtps2phx256_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtps2phx_512 - : GCCBuiltin<"__builtin_ia32_vcvtps2phx512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtps2phx512_mask">, Intrinsic<[ llvm_v16f16_ty ], [ llvm_v16f32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtpd2ph_128 - : GCCBuiltin<"__builtin_ia32_vcvtpd2ph128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtpd2ph128_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtpd2ph_256 - : GCCBuiltin<"__builtin_ia32_vcvtpd2ph256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtpd2ph256_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v4f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtpd2ph_512 - : GCCBuiltin<"__builtin_ia32_vcvtpd2ph512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtpd2ph512_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f64_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2pd_128 - : GCCBuiltin<"__builtin_ia32_vcvtph2pd128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2pd128_mask">, Intrinsic<[ llvm_v2f64_ty ], [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2pd_256 - : GCCBuiltin<"__builtin_ia32_vcvtph2pd256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2pd256_mask">, Intrinsic<[ llvm_v4f64_ty ], [ llvm_v8f16_ty, llvm_v4f64_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2pd_512 - : GCCBuiltin<"__builtin_ia32_vcvtph2pd512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2pd512_mask">, Intrinsic<[ llvm_v8f64_ty ], [ llvm_v8f16_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtsh2ss_round - : GCCBuiltin<"__builtin_ia32_vcvtsh2ss_round_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtsh2ss_round_mask">, Intrinsic<[ llvm_v4f32_ty ], [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtss2sh_round - : GCCBuiltin<"__builtin_ia32_vcvtss2sh_round_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtss2sh_round_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtsd2sh_round - : GCCBuiltin<"__builtin_ia32_vcvtsd2sh_round_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtsd2sh_round_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtsh2sd_round - : GCCBuiltin<"__builtin_ia32_vcvtsh2sd_round_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtsh2sd_round_mask">, Intrinsic<[ llvm_v2f64_ty ], [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2w_128 - : GCCBuiltin<"__builtin_ia32_vcvtph2w128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2w128_mask">, Intrinsic<[ llvm_v8i16_ty ], [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2w_256 - : GCCBuiltin<"__builtin_ia32_vcvtph2w256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2w256_mask">, Intrinsic<[ llvm_v16i16_ty ], [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2w_512 - : GCCBuiltin<"__builtin_ia32_vcvtph2w512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2w512_mask">, Intrinsic<[ llvm_v32i16_ty ], [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2w_128 - : GCCBuiltin<"__builtin_ia32_vcvttph2w128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2w128_mask">, Intrinsic<[ llvm_v8i16_ty ], [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2w_256 - : GCCBuiltin<"__builtin_ia32_vcvttph2w256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2w256_mask">, Intrinsic<[ llvm_v16i16_ty ], [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2w_512 - : GCCBuiltin<"__builtin_ia32_vcvttph2w512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2w512_mask">, Intrinsic<[ llvm_v32i16_ty ], [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2uw_128 - : GCCBuiltin<"__builtin_ia32_vcvtph2uw128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2uw128_mask">, Intrinsic<[ llvm_v8i16_ty ], [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uw_256 - : GCCBuiltin<"__builtin_ia32_vcvtph2uw256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2uw256_mask">, Intrinsic<[ llvm_v16i16_ty ], [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uw_512 - : GCCBuiltin<"__builtin_ia32_vcvtph2uw512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2uw512_mask">, Intrinsic<[ llvm_v32i16_ty ], [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2uw_128 - : GCCBuiltin<"__builtin_ia32_vcvttph2uw128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2uw128_mask">, Intrinsic<[ llvm_v8i16_ty ], [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2uw_256 - : GCCBuiltin<"__builtin_ia32_vcvttph2uw256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2uw256_mask">, Intrinsic<[ llvm_v16i16_ty ], [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2uw_512 - : GCCBuiltin<"__builtin_ia32_vcvttph2uw512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2uw512_mask">, Intrinsic<[ llvm_v32i16_ty ], [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2dq_128 - : GCCBuiltin<"__builtin_ia32_vcvtph2dq128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2dq128_mask">, Intrinsic<[ llvm_v4i32_ty ], [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2dq_256 - : GCCBuiltin<"__builtin_ia32_vcvtph2dq256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2dq256_mask">, Intrinsic<[ llvm_v8i32_ty ], [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2dq_512 - : GCCBuiltin<"__builtin_ia32_vcvtph2dq512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2dq512_mask">, Intrinsic<[ llvm_v16i32_ty ], [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2udq_128 - : GCCBuiltin<"__builtin_ia32_vcvtph2udq128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2udq128_mask">, Intrinsic<[ llvm_v4i32_ty ], [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2udq_256 - : GCCBuiltin<"__builtin_ia32_vcvtph2udq256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2udq256_mask">, Intrinsic<[ llvm_v8i32_ty ], [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2udq_512 - : GCCBuiltin<"__builtin_ia32_vcvtph2udq512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2udq512_mask">, Intrinsic<[ llvm_v16i32_ty ], [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtdq2ph_128 - : GCCBuiltin<"__builtin_ia32_vcvtdq2ph128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtdq2ph128_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtudq2ph_128 - : GCCBuiltin<"__builtin_ia32_vcvtudq2ph128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtudq2ph128_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2dq_128 - : GCCBuiltin<"__builtin_ia32_vcvttph2dq128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2dq128_mask">, Intrinsic<[ llvm_v4i32_ty ], [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2dq_256 - : GCCBuiltin<"__builtin_ia32_vcvttph2dq256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2dq256_mask">, Intrinsic<[ llvm_v8i32_ty ], [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2dq_512 - : GCCBuiltin<"__builtin_ia32_vcvttph2dq512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2dq512_mask">, Intrinsic<[ llvm_v16i32_ty ], [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2udq_128 - : GCCBuiltin<"__builtin_ia32_vcvttph2udq128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2udq128_mask">, Intrinsic<[ llvm_v4i32_ty ], [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2udq_256 - : GCCBuiltin<"__builtin_ia32_vcvttph2udq256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2udq256_mask">, Intrinsic<[ llvm_v8i32_ty ], [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2udq_512 - : GCCBuiltin<"__builtin_ia32_vcvttph2udq512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2udq512_mask">, Intrinsic<[ llvm_v16i32_ty ], [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtqq2ph_128 - : GCCBuiltin<"__builtin_ia32_vcvtqq2ph128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtqq2ph128_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtqq2ph_256 - : GCCBuiltin<"__builtin_ia32_vcvtqq2ph256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtqq2ph256_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2qq_128 - : GCCBuiltin<"__builtin_ia32_vcvtph2qq128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2qq128_mask">, Intrinsic<[ llvm_v2i64_ty ], [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2qq_256 - : GCCBuiltin<"__builtin_ia32_vcvtph2qq256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2qq256_mask">, Intrinsic<[ llvm_v4i64_ty ], [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2qq_512 - : GCCBuiltin<"__builtin_ia32_vcvtph2qq512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2qq512_mask">, Intrinsic<[ llvm_v8i64_ty ], [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtuqq2ph_128 - : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtuqq2ph128_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtuqq2ph_256 - : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtuqq2ph256_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uqq_128 - : GCCBuiltin<"__builtin_ia32_vcvtph2uqq128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2uqq128_mask">, Intrinsic<[ llvm_v2i64_ty ], [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uqq_256 - : GCCBuiltin<"__builtin_ia32_vcvtph2uqq256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2uqq256_mask">, Intrinsic<[ llvm_v4i64_ty ], [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uqq_512 - : GCCBuiltin<"__builtin_ia32_vcvtph2uqq512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvtph2uqq512_mask">, Intrinsic<[ llvm_v8i64_ty ], [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2qq_128 - : GCCBuiltin<"__builtin_ia32_vcvttph2qq128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2qq128_mask">, Intrinsic<[ llvm_v2i64_ty ], [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2qq_256 - : GCCBuiltin<"__builtin_ia32_vcvttph2qq256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2qq256_mask">, Intrinsic<[ llvm_v4i64_ty ], [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2qq_512 - : GCCBuiltin<"__builtin_ia32_vcvttph2qq512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2qq512_mask">, Intrinsic<[ llvm_v8i64_ty ], [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2uqq_128 - : GCCBuiltin<"__builtin_ia32_vcvttph2uqq128_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2uqq128_mask">, Intrinsic<[ llvm_v2i64_ty ], [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2uqq_256 - : GCCBuiltin<"__builtin_ia32_vcvttph2uqq256_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2uqq256_mask">, Intrinsic<[ llvm_v4i64_ty ], [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2uqq_512 - : GCCBuiltin<"__builtin_ia32_vcvttph2uqq512_mask">, + : ClangBuiltin<"__builtin_ia32_vcvttph2uqq512_mask">, Intrinsic<[ llvm_v8i64_ty ], [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsh2si32 - : GCCBuiltin<"__builtin_ia32_vcvtsh2si32">, + : ClangBuiltin<"__builtin_ia32_vcvtsh2si32">, Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsh2usi32 - : GCCBuiltin<"__builtin_ia32_vcvtsh2usi32">, + : ClangBuiltin<"__builtin_ia32_vcvtsh2usi32">, Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsh2si64 - : GCCBuiltin<"__builtin_ia32_vcvtsh2si64">, + : ClangBuiltin<"__builtin_ia32_vcvtsh2si64">, Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsh2usi64 - : GCCBuiltin<"__builtin_ia32_vcvtsh2usi64">, + : ClangBuiltin<"__builtin_ia32_vcvtsh2usi64">, Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtusi2sh - : GCCBuiltin<"__builtin_ia32_vcvtusi2sh">, + : ClangBuiltin<"__builtin_ia32_vcvtusi2sh">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtusi642sh - : GCCBuiltin<"__builtin_ia32_vcvtusi642sh">, + : ClangBuiltin<"__builtin_ia32_vcvtusi642sh">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsi2sh - : GCCBuiltin<"__builtin_ia32_vcvtsi2sh">, + : ClangBuiltin<"__builtin_ia32_vcvtsi2sh">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsi642sh - : GCCBuiltin<"__builtin_ia32_vcvtsi642sh">, + : ClangBuiltin<"__builtin_ia32_vcvtsi642sh">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvttsh2si32 - : GCCBuiltin<"__builtin_ia32_vcvttsh2si32">, + : ClangBuiltin<"__builtin_ia32_vcvttsh2si32">, Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvttsh2si64 - : GCCBuiltin<"__builtin_ia32_vcvttsh2si64">, + : ClangBuiltin<"__builtin_ia32_vcvttsh2si64">, Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvttsh2usi32 - : GCCBuiltin<"__builtin_ia32_vcvttsh2usi32">, + : ClangBuiltin<"__builtin_ia32_vcvttsh2usi32">, Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvttsh2usi64 - : GCCBuiltin<"__builtin_ia32_vcvttsh2usi64">, + : ClangBuiltin<"__builtin_ia32_vcvttsh2usi64">, Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; @@ -5551,61 +5561,61 @@ let TargetPrefix = "x86" in { llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_rsqrt_ph_128 - : GCCBuiltin<"__builtin_ia32_rsqrtph128_mask">, + : ClangBuiltin<"__builtin_ia32_rsqrtph128_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rsqrt_ph_256 - : GCCBuiltin<"__builtin_ia32_rsqrtph256_mask">, + : ClangBuiltin<"__builtin_ia32_rsqrtph256_mask">, Intrinsic<[ llvm_v16f16_ty ], [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rsqrt_ph_512 - : GCCBuiltin<"__builtin_ia32_rsqrtph512_mask">, + : ClangBuiltin<"__builtin_ia32_rsqrtph512_mask">, Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rsqrt_sh - : GCCBuiltin<"__builtin_ia32_rsqrtsh_mask">, + : ClangBuiltin<"__builtin_ia32_rsqrtsh_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rcp_ph_128 - : GCCBuiltin<"__builtin_ia32_rcpph128_mask">, + : ClangBuiltin<"__builtin_ia32_rcpph128_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rcp_ph_256 - : GCCBuiltin<"__builtin_ia32_rcpph256_mask">, + : ClangBuiltin<"__builtin_ia32_rcpph256_mask">, Intrinsic<[ llvm_v16f16_ty ], [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rcp_ph_512 - : GCCBuiltin<"__builtin_ia32_rcpph512_mask">, + : ClangBuiltin<"__builtin_ia32_rcpph512_mask">, Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rcp_sh - : GCCBuiltin<"__builtin_ia32_rcpsh_mask">, + : ClangBuiltin<"__builtin_ia32_rcpsh_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_reduce_ph_128 - : GCCBuiltin<"__builtin_ia32_reduceph128_mask">, + : ClangBuiltin<"__builtin_ia32_reduceph128_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_reduce_ph_256 - : GCCBuiltin<"__builtin_ia32_reduceph256_mask">, + : ClangBuiltin<"__builtin_ia32_reduceph256_mask">, Intrinsic<[ llvm_v16f16_ty ], [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_reduce_ph_512 - : GCCBuiltin<"__builtin_ia32_reduceph512_mask">, + : ClangBuiltin<"__builtin_ia32_reduceph512_mask">, Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg>, ImmArg> ]>; def int_x86_avx512fp16_mask_reduce_sh - : GCCBuiltin<"__builtin_ia32_reducesh_mask">, + : ClangBuiltin<"__builtin_ia32_reducesh_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty ], @@ -5620,91 +5630,91 @@ let TargetPrefix = "x86" in { : Intrinsic<[ llvm_v32i1_ty ], [ llvm_v32f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_fpclass_sh - : GCCBuiltin<"__builtin_ia32_fpclasssh_mask">, + : ClangBuiltin<"__builtin_ia32_fpclasssh_mask">, Intrinsic<[ llvm_i8_ty ], [ llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getexp_ph_128 - : GCCBuiltin<"__builtin_ia32_getexpph128_mask">, + : ClangBuiltin<"__builtin_ia32_getexpph128_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_getexp_ph_256 - : GCCBuiltin<"__builtin_ia32_getexpph256_mask">, + : ClangBuiltin<"__builtin_ia32_getexpph256_mask">, Intrinsic<[ llvm_v16f16_ty ], [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_getexp_ph_512 - : GCCBuiltin<"__builtin_ia32_getexpph512_mask">, + : ClangBuiltin<"__builtin_ia32_getexpph512_mask">, Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getexp_sh - : GCCBuiltin<"__builtin_ia32_getexpsh128_round_mask">, + : ClangBuiltin<"__builtin_ia32_getexpsh128_round_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getmant_ph_128 - : GCCBuiltin<"__builtin_ia32_getmantph128_mask">, + : ClangBuiltin<"__builtin_ia32_getmantph128_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getmant_ph_256 - : GCCBuiltin<"__builtin_ia32_getmantph256_mask">, + : ClangBuiltin<"__builtin_ia32_getmantph256_mask">, Intrinsic<[ llvm_v16f16_ty ], [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getmant_ph_512 - : GCCBuiltin<"__builtin_ia32_getmantph512_mask">, + : ClangBuiltin<"__builtin_ia32_getmantph512_mask">, Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg>, ImmArg> ]>; def int_x86_avx512fp16_mask_getmant_sh - : GCCBuiltin<"__builtin_ia32_getmantsh_round_mask">, + : ClangBuiltin<"__builtin_ia32_getmantsh_round_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg>, ImmArg> ]>; def int_x86_avx512fp16_mask_rndscale_ph_128 - : GCCBuiltin<"__builtin_ia32_rndscaleph_128_mask">, + : ClangBuiltin<"__builtin_ia32_rndscaleph_128_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_rndscale_ph_256 - : GCCBuiltin<"__builtin_ia32_rndscaleph_256_mask">, + : ClangBuiltin<"__builtin_ia32_rndscaleph_256_mask">, Intrinsic<[ llvm_v16f16_ty ], [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_rndscale_ph_512 - : GCCBuiltin<"__builtin_ia32_rndscaleph_mask">, + : ClangBuiltin<"__builtin_ia32_rndscaleph_mask">, Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg>, ImmArg> ]>; def int_x86_avx512fp16_mask_rndscale_sh - : GCCBuiltin<"__builtin_ia32_rndscalesh_round_mask">, + : ClangBuiltin<"__builtin_ia32_rndscalesh_round_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg>, ImmArg> ]>; def int_x86_avx512fp16_mask_scalef_ph_128 - : GCCBuiltin<"__builtin_ia32_scalefph128_mask">, + : ClangBuiltin<"__builtin_ia32_scalefph128_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_scalef_ph_256 - : GCCBuiltin<"__builtin_ia32_scalefph256_mask">, + : ClangBuiltin<"__builtin_ia32_scalefph256_mask">, Intrinsic<[ llvm_v16f16_ty ], [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_scalef_ph_512 - : GCCBuiltin<"__builtin_ia32_scalefph512_mask">, + : ClangBuiltin<"__builtin_ia32_scalefph512_mask">, Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_scalef_sh - : GCCBuiltin<"__builtin_ia32_scalefsh_round_mask">, + : ClangBuiltin<"__builtin_ia32_scalefsh_round_mask">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], @@ -5715,12 +5725,12 @@ let TargetPrefix = "x86" in { [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vfmaddsub_ph_128 - : GCCBuiltin<"__builtin_ia32_vfmaddsubph">, + : ClangBuiltin<"__builtin_ia32_vfmaddsubph">, Intrinsic<[ llvm_v8f16_ty ], [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_vfmaddsub_ph_256 - : GCCBuiltin<"__builtin_ia32_vfmaddsubph256">, + : ClangBuiltin<"__builtin_ia32_vfmaddsubph256">, Intrinsic<[ llvm_v16f16_ty ], [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>; @@ -5734,133 +5744,133 @@ let TargetPrefix = "x86" in { [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfcmadd_cph_128 - : GCCBuiltin<"__builtin_ia32_vfcmaddcph128_mask">, + : ClangBuiltin<"__builtin_ia32_vfcmaddcph128_mask">, Intrinsic<[ llvm_v4f32_ty ], [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_maskz_vfcmadd_cph_128 - : GCCBuiltin<"__builtin_ia32_vfcmaddcph128_maskz">, + : ClangBuiltin<"__builtin_ia32_vfcmaddcph128_maskz">, Intrinsic<[ llvm_v4f32_ty ], [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfcmadd_cph_256 - : GCCBuiltin<"__builtin_ia32_vfcmaddcph256_mask">, + : ClangBuiltin<"__builtin_ia32_vfcmaddcph256_mask">, Intrinsic<[ llvm_v8f32_ty ], [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_maskz_vfcmadd_cph_256 - : GCCBuiltin<"__builtin_ia32_vfcmaddcph256_maskz">, + : ClangBuiltin<"__builtin_ia32_vfcmaddcph256_maskz">, Intrinsic<[ llvm_v8f32_ty ], [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfcmadd_cph_512 - : GCCBuiltin<"__builtin_ia32_vfcmaddcph512_mask3">, + : ClangBuiltin<"__builtin_ia32_vfcmaddcph512_mask3">, Intrinsic<[ llvm_v16f32_ty ], [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_maskz_vfcmadd_cph_512 - : GCCBuiltin<"__builtin_ia32_vfcmaddcph512_maskz">, + : ClangBuiltin<"__builtin_ia32_vfcmaddcph512_maskz">, Intrinsic<[ llvm_v16f32_ty ], [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfmadd_cph_128 - : GCCBuiltin<"__builtin_ia32_vfmaddcph128_mask">, + : ClangBuiltin<"__builtin_ia32_vfmaddcph128_mask">, Intrinsic<[ llvm_v4f32_ty ], [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_maskz_vfmadd_cph_128 - : GCCBuiltin<"__builtin_ia32_vfmaddcph128_maskz">, + : ClangBuiltin<"__builtin_ia32_vfmaddcph128_maskz">, Intrinsic<[ llvm_v4f32_ty ], [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfmadd_cph_256 - : GCCBuiltin<"__builtin_ia32_vfmaddcph256_mask">, + : ClangBuiltin<"__builtin_ia32_vfmaddcph256_mask">, Intrinsic<[ llvm_v8f32_ty ], [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_maskz_vfmadd_cph_256 - : GCCBuiltin<"__builtin_ia32_vfmaddcph256_maskz">, + : ClangBuiltin<"__builtin_ia32_vfmaddcph256_maskz">, Intrinsic<[ llvm_v8f32_ty ], [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfmadd_cph_512 - : GCCBuiltin<"__builtin_ia32_vfmaddcph512_mask3">, + : ClangBuiltin<"__builtin_ia32_vfmaddcph512_mask3">, Intrinsic<[ llvm_v16f32_ty ], [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_maskz_vfmadd_cph_512 - : GCCBuiltin<"__builtin_ia32_vfmaddcph512_maskz">, + : ClangBuiltin<"__builtin_ia32_vfmaddcph512_maskz">, Intrinsic<[ llvm_v16f32_ty ], [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfmadd_csh - : GCCBuiltin<"__builtin_ia32_vfmaddcsh_mask">, + : ClangBuiltin<"__builtin_ia32_vfmaddcsh_mask">, Intrinsic<[ llvm_v4f32_ty ], [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_maskz_vfmadd_csh - : GCCBuiltin<"__builtin_ia32_vfmaddcsh_maskz">, + : ClangBuiltin<"__builtin_ia32_vfmaddcsh_maskz">, Intrinsic<[ llvm_v4f32_ty ], [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfcmadd_csh - : GCCBuiltin<"__builtin_ia32_vfcmaddcsh_mask">, + : ClangBuiltin<"__builtin_ia32_vfcmaddcsh_mask">, Intrinsic<[ llvm_v4f32_ty ], [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_maskz_vfcmadd_csh - : GCCBuiltin<"__builtin_ia32_vfcmaddcsh_maskz">, + : ClangBuiltin<"__builtin_ia32_vfcmaddcsh_maskz">, Intrinsic<[ llvm_v4f32_ty ], [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfmul_cph_128 - : GCCBuiltin<"__builtin_ia32_vfmulcph128_mask">, + : ClangBuiltin<"__builtin_ia32_vfmulcph128_mask">, Intrinsic<[ llvm_v4f32_ty ], [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfcmul_cph_128 - : GCCBuiltin<"__builtin_ia32_vfcmulcph128_mask">, + : ClangBuiltin<"__builtin_ia32_vfcmulcph128_mask">, Intrinsic<[ llvm_v4f32_ty ], [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfmul_cph_256 - : GCCBuiltin<"__builtin_ia32_vfmulcph256_mask">, + : ClangBuiltin<"__builtin_ia32_vfmulcph256_mask">, Intrinsic<[ llvm_v8f32_ty ], [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfcmul_cph_256 - : GCCBuiltin<"__builtin_ia32_vfcmulcph256_mask">, + : ClangBuiltin<"__builtin_ia32_vfcmulcph256_mask">, Intrinsic<[ llvm_v8f32_ty ], [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfmul_cph_512 - : GCCBuiltin<"__builtin_ia32_vfmulcph512_mask">, + : ClangBuiltin<"__builtin_ia32_vfmulcph512_mask">, Intrinsic<[ llvm_v16f32_ty ], [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfcmul_cph_512 - : GCCBuiltin<"__builtin_ia32_vfcmulcph512_mask">, + : ClangBuiltin<"__builtin_ia32_vfcmulcph512_mask">, Intrinsic<[ llvm_v16f32_ty ], [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfmul_csh - : GCCBuiltin<"__builtin_ia32_vfmulcsh_mask">, + : ClangBuiltin<"__builtin_ia32_vfmulcsh_mask">, Intrinsic<[ llvm_v4f32_ty ], [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty ], [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfcmul_csh - : GCCBuiltin<"__builtin_ia32_vfcmulcsh_mask">, + : ClangBuiltin<"__builtin_ia32_vfcmulcsh_mask">, Intrinsic<[ llvm_v4f32_ty ], [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty ], diff --git a/llvm/include/llvm/IR/IntrinsicsXCore.td b/llvm/include/llvm/IR/IntrinsicsXCore.td index 89dbc65fea44..d2afc3497833 100644 --- a/llvm/include/llvm/IR/IntrinsicsXCore.td +++ b/llvm/include/llvm/IR/IntrinsicsXCore.td @@ -13,7 +13,7 @@ let TargetPrefix = "xcore" in { // All intrinsics start with "llvm.xcore.". // Miscellaneous instructions. def int_xcore_bitrev : Intrinsic<[llvm_i32_ty],[llvm_i32_ty],[IntrNoMem]>, - GCCBuiltin<"__builtin_bitrev">; + ClangBuiltin<"__builtin_bitrev">; def int_xcore_crc8 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i32_ty], [IntrNoMem]>; @@ -25,11 +25,11 @@ let TargetPrefix = "xcore" in { // All intrinsics start with "llvm.xcore.". def int_xcore_zext : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_xcore_getid : Intrinsic<[llvm_i32_ty],[],[IntrNoMem]>, - GCCBuiltin<"__builtin_getid">; + ClangBuiltin<"__builtin_getid">; def int_xcore_getps : Intrinsic<[llvm_i32_ty],[llvm_i32_ty]>, - GCCBuiltin<"__builtin_getps">; + ClangBuiltin<"__builtin_getps">; def int_xcore_setps : Intrinsic<[],[llvm_i32_ty, llvm_i32_ty]>, - GCCBuiltin<"__builtin_setps">; + ClangBuiltin<"__builtin_setps">; def int_xcore_geted : Intrinsic<[llvm_i32_ty],[]>; def int_xcore_getet : Intrinsic<[llvm_i32_ty],[]>; def int_xcore_setsr : Intrinsic<[],[llvm_i32_ty]>; diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h index 446bcecf1c64..91712df153a0 100644 --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -24,6 +24,7 @@ namespace llvm { +class Any; class DiagnosticInfo; enum DiagnosticSeverity : char; class Function; @@ -93,6 +94,7 @@ public: OB_preallocated = 4, // "preallocated" OB_gc_live = 5, // "gc-live" OB_clang_arc_attachedcall = 6, // "clang.arc.attachedcall" + OB_ptrauth = 7, // "ptrauth" }; /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. @@ -201,6 +203,11 @@ public: /// diagnostics. void setDiagnosticsHotnessRequested(bool Requested); + bool getMisExpectWarningRequested() const; + void setMisExpectWarningRequested(bool Requested); + void setDiagnosticsMisExpectTolerance(Optional Tolerance); + uint64_t getDiagnosticsMisExpectTolerance() const; + /// Return the minimum hotness value a diagnostic would need in order /// to be included in optimization diagnostics. /// @@ -304,13 +311,22 @@ public: /// LLVMContext is used by compilation. void setOptPassGate(OptPassGate&); - /// Enable opaque pointers. Can only be called before creating the first - /// pointer type. - void enableOpaquePointers() const; + /// Whether we've decided on using opaque pointers or typed pointers yet. + bool hasSetOpaquePointersValue() const; + + /// Set whether opaque pointers are enabled. The method may be called multiple + /// times, but only with the same value. Note that creating a pointer type or + /// otherwise querying the opaque pointer mode performs an implicit set to + /// the default value. + void setOpaquePointers(bool Enable) const; /// Whether typed pointers are supported. If false, all pointers are opaque. bool supportsTypedPointers() const; + /// Optionally target-spcific data can be attached to the context for lifetime + /// management and bypassing layering restrictions. + llvm::Any &getTargetData() const; + private: // Module needs access to the add/removeModule methods. friend class Module; diff --git a/llvm/include/llvm/IR/LegacyPassManagers.h b/llvm/include/llvm/IR/LegacyPassManagers.h index 311a407f1a19..41c11d26aa45 100644 --- a/llvm/include/llvm/IR/LegacyPassManagers.h +++ b/llvm/include/llvm/IR/LegacyPassManagers.h @@ -294,9 +294,7 @@ private: /// used by pass managers. class PMDataManager { public: - explicit PMDataManager() : TPM(nullptr), Depth(0) { - initializeAnalysisInfo(); - } + explicit PMDataManager() { initializeAnalysisInfo(); } virtual ~PMDataManager(); @@ -418,7 +416,7 @@ public: protected: // Top level manager. - PMTopLevelManager *TPM; + PMTopLevelManager *TPM = nullptr; // Collection of pass that are managed by this manager SmallVector PassVector; @@ -446,7 +444,7 @@ private: // this manager. SmallVector HigherLevelAnalysis; - unsigned Depth; + unsigned Depth = 0; }; //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h index 42829388b79a..21d7b8b6da71 100644 --- a/llvm/include/llvm/IR/MDBuilder.h +++ b/llvm/include/llvm/IR/MDBuilder.h @@ -108,6 +108,10 @@ public: /// Merge the new callback encoding \p NewCB into \p ExistingCallbacks. MDNode *mergeCallbackEncodings(MDNode *ExistingCallbacks, MDNode *NewCB); + /// Return metadata feeding to the CodeGen about how to generate a function + /// prologue for the "function" santizier. + MDNode *createRTTIPointerPrologue(Constant *PrologueSig, Constant *RTTI); + //===------------------------------------------------------------------===// // AA metadata. //===------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h index 4c8286692ebf..dbf2cfb7c5e9 100644 --- a/llvm/include/llvm/IR/MatrixBuilder.h +++ b/llvm/include/llvm/IR/MatrixBuilder.h @@ -30,8 +30,8 @@ class Function; class Twine; class Module; -template class MatrixBuilder { - IRBuilderTy &B; +class MatrixBuilder { + IRBuilderBase &B; Module *getModule() { return B.GetInsertBlock()->getParent()->getParent(); } std::pair splatScalarOperandIfNeeded(Value *LHS, @@ -55,21 +55,17 @@ template class MatrixBuilder { } public: - MatrixBuilder(IRBuilderTy &Builder) : B(Builder) {} + MatrixBuilder(IRBuilderBase &Builder) : B(Builder) {} /// Create a column major, strided matrix load. + /// \p EltTy - Matrix element type /// \p DataPtr - Start address of the matrix read /// \p Rows - Number of rows in matrix (must be a constant) /// \p Columns - Number of columns in matrix (must be a constant) /// \p Stride - Space between columns - CallInst *CreateColumnMajorLoad(Value *DataPtr, Align Alignment, + CallInst *CreateColumnMajorLoad(Type *EltTy, Value *DataPtr, Align Alignment, Value *Stride, bool IsVolatile, unsigned Rows, unsigned Columns, const Twine &Name = "") { - - // Deal with the pointer - PointerType *PtrTy = cast(DataPtr->getType()); - Type *EltTy = PtrTy->getPointerElementType(); - auto *RetType = FixedVectorType::get(EltTy, Rows * Columns); Value *Ops[] = {DataPtr, Stride, B.getInt1(IsVolatile), B.getInt32(Rows), @@ -234,12 +230,11 @@ public: /// Create an assumption that \p Idx is less than \p NumElements. void CreateIndexAssumption(Value *Idx, unsigned NumElements, Twine const &Name = "") { - Value *NumElts = B.getIntN(Idx->getType()->getScalarSizeInBits(), NumElements); auto *Cmp = B.CreateICmpULT(Idx, NumElts); - if (auto *ConstCond = dyn_cast(Cmp)) - assert(ConstCond->isOne() && "Index must be valid!"); + if (isa(Cmp)) + assert(cast(Cmp)->isOne() && "Index must be valid!"); else B.CreateAssumption(Cmp); } @@ -248,7 +243,6 @@ public: /// a matrix with \p NumRows embedded in a vector. Value *CreateIndex(Value *RowIdx, Value *ColumnIdx, unsigned NumRows, Twine const &Name = "") { - unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(), ColumnIdx->getType()->getScalarSizeInBits()); Type *IntTy = IntegerType::get(RowIdx->getType()->getContext(), MaxWidth); diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 7965884990e5..be359d94f812 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -169,7 +169,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Metadata &MD) { /// Metadata wrapper in the Value hierarchy. /// /// A member of the \a Value hierarchy to represent a reference to metadata. -/// This allows, e.g., instrinsics to have metadata as operands. +/// This allows, e.g., intrinsics to have metadata as operands. /// /// Notably, this is the only thing in either hierarchy that is allowed to /// reference \a LocalAsMetadata. @@ -302,7 +302,8 @@ public: /// /// Replace all uses of this with \c MD, which is allowed to be null. void replaceAllUsesWith(Metadata *MD); - + /// Replace all uses of the constant with Undef in debug info metadata + static void SalvageDebugInfo(const Constant &C); /// Returns the list of all DIArgList users of this. SmallVector getAllArgListUsers(); @@ -774,10 +775,21 @@ class MDOperand { public: MDOperand() = default; - MDOperand(MDOperand &&) = delete; MDOperand(const MDOperand &) = delete; - MDOperand &operator=(MDOperand &&) = delete; + MDOperand(MDOperand &&Op) { + MD = Op.MD; + if (MD) + (void)MetadataTracking::retrack(Op.MD, MD); + Op.MD = nullptr; + } MDOperand &operator=(const MDOperand &) = delete; + MDOperand &operator=(MDOperand &&Op) { + MD = Op.MD; + if (MD) + (void)MetadataTracking::retrack(Op.MD, MD); + Op.MD = nullptr; + return *this; + } ~MDOperand() { untrack(); } Metadata *get() const { return MD; } @@ -922,13 +934,109 @@ struct TempMDNodeDeleter { /// If an unresolved node is part of a cycle, \a resolveCycles() needs /// to be called on some member of the cycle once all temporary nodes have been /// replaced. +/// +/// MDNodes can be large or small, as well as resizable or non-resizable. +/// Large MDNodes' operands are allocated in a separate storage vector, +/// whereas small MDNodes' operands are co-allocated. Distinct and temporary +/// MDnodes are resizable, but only MDTuples support this capability. +/// +/// Clients can add operands to resizable MDNodes using push_back(). class MDNode : public Metadata { friend class ReplaceableMetadataImpl; friend class LLVMContextImpl; friend class DIArgList; - unsigned NumOperands; - unsigned NumUnresolved; + /// The header that is coallocated with an MDNode along with its "small" + /// operands. It is located immediately before the main body of the node. + /// The operands are in turn located immediately before the header. + /// For resizable MDNodes, the space for the storage vector is also allocated + /// immediately before the header, overlapping with the operands. + struct Header { + bool IsResizable : 1; + bool IsLarge : 1; + size_t SmallSize : 4; + size_t SmallNumOps : 4; + size_t : sizeof(size_t) * CHAR_BIT - 10; + + unsigned NumUnresolved = 0; + using LargeStorageVector = SmallVector; + + static constexpr size_t NumOpsFitInVector = + sizeof(LargeStorageVector) / sizeof(MDOperand); + static_assert( + NumOpsFitInVector * sizeof(MDOperand) == sizeof(LargeStorageVector), + "sizeof(LargeStorageVector) must be a multiple of sizeof(MDOperand)"); + + static constexpr size_t MaxSmallSize = 15; + + static constexpr size_t getOpSize(unsigned NumOps) { + return sizeof(MDOperand) * NumOps; + } + /// Returns the number of operands the node has space for based on its + /// allocation characteristics. + static size_t getSmallSize(size_t NumOps, bool IsResizable, bool IsLarge) { + return IsLarge ? NumOpsFitInVector + : std::max(NumOps, NumOpsFitInVector * IsResizable); + } + /// Returns the number of bytes allocated for operands and header. + static size_t getAllocSize(StorageType Storage, size_t NumOps) { + return getOpSize( + getSmallSize(NumOps, isResizable(Storage), isLarge(NumOps))) + + sizeof(Header); + } + + /// Only temporary and distinct nodes are resizable. + static bool isResizable(StorageType Storage) { return Storage != Uniqued; } + static bool isLarge(size_t NumOps) { return NumOps > MaxSmallSize; } + + size_t getAllocSize() const { + return getOpSize(SmallSize) + sizeof(Header); + } + void *getAllocation() { + return reinterpret_cast(this + 1) - + alignTo(getAllocSize(), alignof(uint64_t)); + } + + void *getLargePtr() const; + void *getSmallPtr(); + + LargeStorageVector &getLarge() { + assert(IsLarge); + return *reinterpret_cast(getLargePtr()); + } + + const LargeStorageVector &getLarge() const { + assert(IsLarge); + return *reinterpret_cast(getLargePtr()); + } + + void resizeSmall(size_t NumOps); + void resizeSmallToLarge(size_t NumOps); + void resize(size_t NumOps); + + explicit Header(size_t NumOps, StorageType Storage); + ~Header(); + + MutableArrayRef operands() { + if (IsLarge) + return getLarge(); + return makeMutableArrayRef( + reinterpret_cast(this) - SmallSize, SmallNumOps); + } + + ArrayRef operands() const { + if (IsLarge) + return getLarge(); + return makeArrayRef(reinterpret_cast(this) - SmallSize, + SmallNumOps); + } + }; + + Header &getHeader() { return *(reinterpret_cast

(this) - 1); } + + const Header &getHeader() const { + return *(reinterpret_cast(this) - 1); + } ContextAndReplaceableUses Context; @@ -937,7 +1045,7 @@ protected: ArrayRef Ops1, ArrayRef Ops2 = None); ~MDNode() = default; - void *operator new(size_t Size, unsigned NumOps); + void *operator new(size_t Size, size_t NumOps, StorageType Storage); void operator delete(void *Mem); /// Required by std, but never called. @@ -952,8 +1060,8 @@ protected: void dropAllReferences(); - MDOperand *mutable_begin() { return mutable_end() - NumOperands; } - MDOperand *mutable_end() { return reinterpret_cast(this); } + MDOperand *mutable_begin() { return getHeader().operands().begin(); } + MDOperand *mutable_end() { return getHeader().operands().end(); } using mutable_op_range = iterator_range; @@ -999,7 +1107,7 @@ public: /// As forward declarations are resolved, their containers should get /// resolved automatically. However, if this (or one of its operands) is /// involved in a cycle, \a resolveCycles() needs to be called explicitly. - bool isResolved() const { return !isTemporary() && !NumUnresolved; } + bool isResolved() const { return !isTemporary() && !getNumUnresolved(); } bool isUniqued() const { return Storage == Uniqued; } bool isDistinct() const { return Storage == Distinct; } @@ -1093,11 +1201,25 @@ protected: /// Sets the operand directly, without worrying about uniquing. void setOperand(unsigned I, Metadata *New); + unsigned getNumUnresolved() const { return getHeader().NumUnresolved; } + + void setNumUnresolved(unsigned N) { getHeader().NumUnresolved = N; } void storeDistinctInContext(); template static T *storeImpl(T *N, StorageType Storage, StoreT &Store); template static T *storeImpl(T *N, StorageType Storage); + /// Resize the node to hold \a NumOps operands. + /// + /// \pre \a isTemporary() or \a isDistinct() + /// \pre MetadataID == MDTupleKind + void resize(size_t NumOps) { + assert(!isUniqued() && "Resizing is not supported for uniqued nodes"); + assert(getMetadataID() == MDTupleKind && + "Resizing is not supported for this node kind"); + getHeader().resize(NumOps); + } + private: void handleChangedOperand(void *Ref, Metadata *New); @@ -1154,12 +1276,12 @@ public: op_range operands() const { return op_range(op_begin(), op_end()); } const MDOperand &getOperand(unsigned I) const { - assert(I < NumOperands && "Out of range"); - return op_begin()[I]; + assert(I < getNumOperands() && "Out of range"); + return getHeader().operands()[I]; } /// Return number of MDNode operands. - unsigned getNumOperands() const { return NumOperands; } + unsigned getNumOperands() const { return getHeader().operands().size(); } /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Metadata *MD) { @@ -1244,6 +1366,16 @@ public: /// Return a (temporary) clone of this. TempMDTuple clone() const { return cloneImpl(); } + /// Append an element to the tuple. This will resize the node. + void push_back(Metadata *MD) { + size_t NumOps = getNumOperands(); + resize(NumOps + 1); + setOperand(NumOps, MD); + } + + /// Shrink the operands by 1. + void pop_back() { resize(getNumOperands() - 1); } + static bool classof(const Metadata *MD) { return MD->getMetadataID() == MDTupleKind; } diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index 7b834fbeeebf..fc2d60947118 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -58,9 +58,9 @@ class VersionTuple; /// other modules) this module depends on, a symbol table, and various data /// about the target's characteristics. /// -/// A module maintains a GlobalValRefMap object that is used to hold all +/// A module maintains a GlobalList object that is used to hold all /// constant references to global variables in the module. When a global -/// variable is destroyed, it should have no entries in the GlobalValueRefMap. +/// variable is destroyed, it should have no entries in the GlobalList. /// The main container class for the LLVM Intermediate Representation. class LLVM_EXTERNAL_VISIBILITY Module { /// @name Types And Enumerations @@ -146,9 +146,12 @@ public: /// Takes the max of the two values, which are required to be integers. Max = 7, + /// Takes the min of the two values, which are required to be integers. + Min = 8, + // Markers: ModFlagBehaviorFirstVal = Error, - ModFlagBehaviorLastVal = Max + ModFlagBehaviorLastVal = Min }; /// Checks if Metadata represents a valid ModFlagBehavior, and stores the @@ -360,6 +363,8 @@ public: /// In all cases, the returned value is a FunctionCallee wrapper around the /// 'FunctionType *T' passed in, as well as a 'Value*' either of the Function or /// the bitcast to the function. + /// + /// Note: For library calls getOrInsertLibFunc() should be used instead. FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList); @@ -888,8 +893,8 @@ public: void setRtLibUseGOT(); /// Get/set whether synthesized functions should get the uwtable attribute. - bool getUwtable() const; - void setUwtable(); + UWTableKind getUwtable() const; + void setUwtable(UWTableKind Kind); /// Get/set whether synthesized functions should get the "frame-pointer" /// attribute. @@ -939,10 +944,17 @@ public: /// @returns a string containing the target variant triple. StringRef getDarwinTargetVariantTriple() const; + /// Set the target variant triple which is a string describing a variant of + /// the target host platform. + void setDarwinTargetVariantTriple(StringRef T); + /// Get the target variant version build SDK version metadata. /// /// An empty version is returned if no such metadata is attached. VersionTuple getDarwinTargetVariantSDKVersion() const; + + /// Set the target variant version build SDK version metadata. + void setDarwinTargetVariantSDKVersion(VersionTuple Version); }; /// Given "llvm.used" or "llvm.compiler.used" as a global name, collect the diff --git a/llvm/include/llvm/IR/NoFolder.h b/llvm/include/llvm/IR/NoFolder.h index ec149747e3f4..4e9f772dfdb6 100644 --- a/llvm/include/llvm/IR/NoFolder.h +++ b/llvm/include/llvm/IR/NoFolder.h @@ -23,10 +23,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/FMF.h" +#include "llvm/IR/IRBuilderFolder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IRBuilderFolder.h" namespace llvm { @@ -43,144 +44,72 @@ public: // Return an existing value or a constant if the operation can be simplified. // Otherwise return nullptr. //===--------------------------------------------------------------------===// - Value *FoldAdd(Value *LHS, Value *RHS, bool HasNUW = false, - bool HasNSW = false) const override { - return nullptr; - } - Value *FoldAnd(Value *LHS, Value *RHS) const override { return nullptr; } - - Value *FoldOr(Value *LHS, Value *RHS) const override { return nullptr; } - - Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override { + Value *FoldBinOp(Instruction::BinaryOps Opc, Value *LHS, + Value *RHS) const override { return nullptr; } - Value *FoldGEP(Type *Ty, Value *Ptr, ArrayRef IdxList, - bool IsInBounds = false) const override { + Value *FoldExactBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, + bool IsExact) const override { return nullptr; } - Value *FoldSelect(Value *C, Value *True, Value *False) const override { + Value *FoldNoWrapBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, + bool HasNUW, bool HasNSW) const override { return nullptr; } - //===--------------------------------------------------------------------===// - // Binary Operators - //===--------------------------------------------------------------------===// - - Instruction *CreateFAdd(Constant *LHS, Constant *RHS) const override { - return BinaryOperator::CreateFAdd(LHS, RHS); - } - - Instruction *CreateSub(Constant *LHS, Constant *RHS, - bool HasNUW = false, - bool HasNSW = false) const override { - BinaryOperator *BO = BinaryOperator::CreateSub(LHS, RHS); - if (HasNUW) BO->setHasNoUnsignedWrap(); - if (HasNSW) BO->setHasNoSignedWrap(); - return BO; - } - - Instruction *CreateFSub(Constant *LHS, Constant *RHS) const override { - return BinaryOperator::CreateFSub(LHS, RHS); - } - - Instruction *CreateMul(Constant *LHS, Constant *RHS, - bool HasNUW = false, - bool HasNSW = false) const override { - BinaryOperator *BO = BinaryOperator::CreateMul(LHS, RHS); - if (HasNUW) BO->setHasNoUnsignedWrap(); - if (HasNSW) BO->setHasNoSignedWrap(); - return BO; - } - - Instruction *CreateFMul(Constant *LHS, Constant *RHS) const override { - return BinaryOperator::CreateFMul(LHS, RHS); - } - - Instruction *CreateUDiv(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - if (!isExact) - return BinaryOperator::CreateUDiv(LHS, RHS); - return BinaryOperator::CreateExactUDiv(LHS, RHS); - } - - Instruction *CreateSDiv(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - if (!isExact) - return BinaryOperator::CreateSDiv(LHS, RHS); - return BinaryOperator::CreateExactSDiv(LHS, RHS); - } - - Instruction *CreateFDiv(Constant *LHS, Constant *RHS) const override { - return BinaryOperator::CreateFDiv(LHS, RHS); + Value *FoldBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, + FastMathFlags FMF) const override { + return nullptr; } - Instruction *CreateURem(Constant *LHS, Constant *RHS) const override { - return BinaryOperator::CreateURem(LHS, RHS); + Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override { + return nullptr; } - Instruction *CreateSRem(Constant *LHS, Constant *RHS) const override { - return BinaryOperator::CreateSRem(LHS, RHS); + Value *FoldGEP(Type *Ty, Value *Ptr, ArrayRef IdxList, + bool IsInBounds = false) const override { + return nullptr; } - Instruction *CreateFRem(Constant *LHS, Constant *RHS) const override { - return BinaryOperator::CreateFRem(LHS, RHS); + Value *FoldSelect(Value *C, Value *True, Value *False) const override { + return nullptr; } - Instruction *CreateShl(Constant *LHS, Constant *RHS, bool HasNUW = false, - bool HasNSW = false) const override { - BinaryOperator *BO = BinaryOperator::CreateShl(LHS, RHS); - if (HasNUW) BO->setHasNoUnsignedWrap(); - if (HasNSW) BO->setHasNoSignedWrap(); - return BO; + Value *FoldExtractValue(Value *Agg, + ArrayRef IdxList) const override { + return nullptr; } - Instruction *CreateLShr(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - if (!isExact) - return BinaryOperator::CreateLShr(LHS, RHS); - return BinaryOperator::CreateExactLShr(LHS, RHS); + Value *FoldInsertValue(Value *Agg, Value *Val, + ArrayRef IdxList) const override { + return nullptr; } - Instruction *CreateAShr(Constant *LHS, Constant *RHS, - bool isExact = false) const override { - if (!isExact) - return BinaryOperator::CreateAShr(LHS, RHS); - return BinaryOperator::CreateExactAShr(LHS, RHS); + Value *FoldExtractElement(Value *Vec, Value *Idx) const override { + return nullptr; } - Instruction *CreateXor(Constant *LHS, Constant *RHS) const override { - return BinaryOperator::CreateXor(LHS, RHS); + Value *FoldInsertElement(Value *Vec, Value *NewElt, + Value *Idx) const override { + return nullptr; } - Instruction *CreateBinOp(Instruction::BinaryOps Opc, - Constant *LHS, Constant *RHS) const override { - return BinaryOperator::Create(Opc, LHS, RHS); + Value *FoldShuffleVector(Value *V1, Value *V2, + ArrayRef Mask) const override { + return nullptr; } //===--------------------------------------------------------------------===// // Unary Operators //===--------------------------------------------------------------------===// - Instruction *CreateNeg(Constant *C, - bool HasNUW = false, - bool HasNSW = false) const override { - BinaryOperator *BO = BinaryOperator::CreateNeg(C); - if (HasNUW) BO->setHasNoUnsignedWrap(); - if (HasNSW) BO->setHasNoSignedWrap(); - return BO; - } - Instruction *CreateFNeg(Constant *C) const override { return UnaryOperator::CreateFNeg(C); } - Instruction *CreateNot(Constant *C) const override { - return BinaryOperator::CreateNot(C); - } - Instruction *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const override { return UnaryOperator::Create(Opc, C); @@ -245,35 +174,6 @@ public: Constant *LHS, Constant *RHS) const override { return new FCmpInst(P, LHS, RHS); } - - //===--------------------------------------------------------------------===// - // Other Instructions - //===--------------------------------------------------------------------===// - - Instruction *CreateExtractElement(Constant *Vec, - Constant *Idx) const override { - return ExtractElementInst::Create(Vec, Idx); - } - - Instruction *CreateInsertElement(Constant *Vec, Constant *NewElt, - Constant *Idx) const override { - return InsertElementInst::Create(Vec, NewElt, Idx); - } - - Instruction *CreateShuffleVector(Constant *V1, Constant *V2, - ArrayRef Mask) const override { - return new ShuffleVectorInst(V1, V2, Mask); - } - - Instruction *CreateExtractValue(Constant *Agg, - ArrayRef IdxList) const override { - return ExtractValueInst::Create(Agg, IdxList); - } - - Instruction *CreateInsertValue(Constant *Agg, Constant *Val, - ArrayRef IdxList) const override { - return InsertValueInst::Create(Agg, Val, IdxList); - } }; } // end namespace llvm diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h index 7d232bba0864..1a234e273eff 100644 --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -18,6 +18,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/FMF.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" @@ -161,105 +162,6 @@ public: } }; -/// Convenience struct for specifying and reasoning about fast-math flags. -class FastMathFlags { -private: - friend class FPMathOperator; - - unsigned Flags = 0; - - FastMathFlags(unsigned F) { - // If all 7 bits are set, turn this into -1. If the number of bits grows, - // this must be updated. This is intended to provide some forward binary - // compatibility insurance for the meaning of 'fast' in case bits are added. - if (F == 0x7F) Flags = ~0U; - else Flags = F; - } - -public: - // This is how the bits are used in Value::SubclassOptionalData so they - // should fit there too. - // WARNING: We're out of space. SubclassOptionalData only has 7 bits. New - // functionality will require a change in how this information is stored. - enum { - AllowReassoc = (1 << 0), - NoNaNs = (1 << 1), - NoInfs = (1 << 2), - NoSignedZeros = (1 << 3), - AllowReciprocal = (1 << 4), - AllowContract = (1 << 5), - ApproxFunc = (1 << 6) - }; - - FastMathFlags() = default; - - static FastMathFlags getFast() { - FastMathFlags FMF; - FMF.setFast(); - return FMF; - } - - bool any() const { return Flags != 0; } - bool none() const { return Flags == 0; } - bool all() const { return Flags == ~0U; } - - void clear() { Flags = 0; } - void set() { Flags = ~0U; } - - /// Flag queries - bool allowReassoc() const { return 0 != (Flags & AllowReassoc); } - bool noNaNs() const { return 0 != (Flags & NoNaNs); } - bool noInfs() const { return 0 != (Flags & NoInfs); } - bool noSignedZeros() const { return 0 != (Flags & NoSignedZeros); } - bool allowReciprocal() const { return 0 != (Flags & AllowReciprocal); } - bool allowContract() const { return 0 != (Flags & AllowContract); } - bool approxFunc() const { return 0 != (Flags & ApproxFunc); } - /// 'Fast' means all bits are set. - bool isFast() const { return all(); } - - /// Flag setters - void setAllowReassoc(bool B = true) { - Flags = (Flags & ~AllowReassoc) | B * AllowReassoc; - } - void setNoNaNs(bool B = true) { - Flags = (Flags & ~NoNaNs) | B * NoNaNs; - } - void setNoInfs(bool B = true) { - Flags = (Flags & ~NoInfs) | B * NoInfs; - } - void setNoSignedZeros(bool B = true) { - Flags = (Flags & ~NoSignedZeros) | B * NoSignedZeros; - } - void setAllowReciprocal(bool B = true) { - Flags = (Flags & ~AllowReciprocal) | B * AllowReciprocal; - } - void setAllowContract(bool B = true) { - Flags = (Flags & ~AllowContract) | B * AllowContract; - } - void setApproxFunc(bool B = true) { - Flags = (Flags & ~ApproxFunc) | B * ApproxFunc; - } - void setFast(bool B = true) { B ? set() : clear(); } - - void operator&=(const FastMathFlags &OtherFlags) { - Flags &= OtherFlags.Flags; - } - void operator|=(const FastMathFlags &OtherFlags) { - Flags |= OtherFlags.Flags; - } - bool operator!=(const FastMathFlags &OtherFlags) const { - return Flags != OtherFlags.Flags; - } - - /// Print fast-math flags to \p O. - void print(raw_ostream &O) const; -}; - -inline raw_ostream &operator<<(raw_ostream &O, FastMathFlags FMF) { - FMF.print(O); - return O; -} - /// Utility class for floating point operations which can have /// information about relaxed accuracy requirements attached to them. class FPMathOperator : public Operator { diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index f9f4f1603861..7f0695b552e1 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -136,7 +136,9 @@ struct undef_match { inline auto m_Undef() { return undef_match(); } /// Match an arbitrary poison constant. -inline class_match m_Poison() { return class_match(); } +inline class_match m_Poison() { + return class_match(); +} /// Match an arbitrary Constant and ignore it. inline class_match m_Constant() { return class_match(); } @@ -222,7 +224,7 @@ struct apint_match { bool AllowUndef; apint_match(const APInt *&Res, bool AllowUndef) - : Res(Res), AllowUndef(AllowUndef) {} + : Res(Res), AllowUndef(AllowUndef) {} template bool match(ITy *V) { if (auto *CI = dyn_cast(V)) { @@ -231,8 +233,8 @@ struct apint_match { } if (V->getType()->isVectorTy()) if (const auto *C = dyn_cast(V)) - if (auto *CI = dyn_cast_or_null( - C->getSplatValue(AllowUndef))) { + if (auto *CI = + dyn_cast_or_null(C->getSplatValue(AllowUndef))) { Res = &CI->getValue(); return true; } @@ -256,8 +258,8 @@ struct apfloat_match { } if (V->getType()->isVectorTy()) if (const auto *C = dyn_cast(V)) - if (auto *CI = dyn_cast_or_null( - C->getSplatValue(AllowUndef))) { + if (auto *CI = + dyn_cast_or_null(C->getSplatValue(AllowUndef))) { Res = &CI->getValueAPF(); return true; } @@ -467,9 +469,7 @@ struct is_negative { inline cst_pred_ty m_Negative() { return cst_pred_ty(); } -inline api_pred_ty m_Negative(const APInt *&V) { - return V; -} +inline api_pred_ty m_Negative(const APInt *&V) { return V; } struct is_nonnegative { bool isValue(const APInt &C) { return C.isNonNegative(); } @@ -479,9 +479,7 @@ struct is_nonnegative { inline cst_pred_ty m_NonNegative() { return cst_pred_ty(); } -inline api_pred_ty m_NonNegative(const APInt *&V) { - return V; -} +inline api_pred_ty m_NonNegative(const APInt *&V) { return V; } struct is_strictlypositive { bool isValue(const APInt &C) { return C.isStrictlyPositive(); } @@ -510,9 +508,7 @@ struct is_one { }; /// Match an integer 1 or a vector with all elements equal to 1. /// For vectors, this includes constants with undefined elements. -inline cst_pred_ty m_One() { - return cst_pred_ty(); -} +inline cst_pred_ty m_One() { return cst_pred_ty(); } struct is_zero_int { bool isValue(const APInt &C) { return C.isZero(); } @@ -532,21 +528,15 @@ struct is_zero { }; /// Match any null constant or a vector with all elements equal to 0. /// For vectors, this includes constants with undefined elements. -inline is_zero m_Zero() { - return is_zero(); -} +inline is_zero m_Zero() { return is_zero(); } struct is_power2 { bool isValue(const APInt &C) { return C.isPowerOf2(); } }; /// Match an integer or vector power-of-2. /// For vectors, this includes constants with undefined elements. -inline cst_pred_ty m_Power2() { - return cst_pred_ty(); -} -inline api_pred_ty m_Power2(const APInt *&V) { - return V; -} +inline cst_pred_ty m_Power2() { return cst_pred_ty(); } +inline api_pred_ty m_Power2(const APInt *&V) { return V; } struct is_negated_power2 { bool isValue(const APInt &C) { return C.isNegatedPowerOf2(); } @@ -589,9 +579,7 @@ struct is_lowbit_mask { inline cst_pred_ty m_LowBitMask() { return cst_pred_ty(); } -inline api_pred_ty m_LowBitMask(const APInt *&V) { - return V; -} +inline api_pred_ty m_LowBitMask(const APInt *&V) { return V; } struct icmp_pred_with_threshold { ICmpInst::Predicate Pred; @@ -613,9 +601,7 @@ struct is_nan { }; /// Match an arbitrary NaN constant. This includes quiet and signalling nans. /// For vectors, this includes constants with undefined elements. -inline cstfp_pred_ty m_NaN() { - return cstfp_pred_ty(); -} +inline cstfp_pred_ty m_NaN() { return cstfp_pred_ty(); } struct is_nonnan { bool isValue(const APFloat &C) { return !C.isNaN(); } @@ -631,9 +617,7 @@ struct is_inf { }; /// Match a positive or negative infinity FP constant. /// For vectors, this includes constants with undefined elements. -inline cstfp_pred_ty m_Inf() { - return cstfp_pred_ty(); -} +inline cstfp_pred_ty m_Inf() { return cstfp_pred_ty(); } struct is_noninf { bool isValue(const APFloat &C) { return !C.isInfinity(); } @@ -729,7 +713,9 @@ inline bind_ty m_UnOp(UnaryOperator *&I) { return I; } /// Match a binary operator, capturing it if we match. inline bind_ty m_BinOp(BinaryOperator *&I) { return I; } /// Match a with overflow intrinsic, capturing it if we match. -inline bind_ty m_WithOverflowInst(WithOverflowInst *&I) { return I; } +inline bind_ty m_WithOverflowInst(WithOverflowInst *&I) { + return I; +} inline bind_ty m_WithOverflowInst(const WithOverflowInst *&I) { return I; @@ -842,8 +828,7 @@ struct bind_const_intval_ty { /// Match a specified integer value or vector of all elements of that /// value. -template -struct specific_intval { +template struct specific_intval { APInt Val; specific_intval(APInt V) : Val(std::move(V)) {} @@ -1014,7 +999,8 @@ template struct FNeg_match { FNeg_match(const Op_t &Op) : X(Op) {} template bool match(OpTy *V) { auto *FPMO = dyn_cast(V); - if (!FPMO) return false; + if (!FPMO) + return false; if (FPMO->getOpcode() == Instruction::FNeg) return X.match(FPMO->getOperand(0)); @@ -1038,9 +1024,7 @@ template struct FNeg_match { }; /// Match 'fneg X' as 'fsub -0.0, X'. -template -inline FNeg_match -m_FNeg(const OpTy &X) { +template inline FNeg_match m_FNeg(const OpTy &X) { return FNeg_match(X); } @@ -1165,32 +1149,32 @@ inline OverflowingBinaryOp_match m_NSWAdd(const LHS &L, const RHS &R) { return OverflowingBinaryOp_match( - L, R); + OverflowingBinaryOperator::NoSignedWrap>(L, + R); } template inline OverflowingBinaryOp_match m_NSWSub(const LHS &L, const RHS &R) { return OverflowingBinaryOp_match( - L, R); + OverflowingBinaryOperator::NoSignedWrap>(L, + R); } template inline OverflowingBinaryOp_match m_NSWMul(const LHS &L, const RHS &R) { return OverflowingBinaryOp_match( - L, R); + OverflowingBinaryOperator::NoSignedWrap>(L, + R); } template inline OverflowingBinaryOp_match m_NSWShl(const LHS &L, const RHS &R) { return OverflowingBinaryOp_match( - L, R); + OverflowingBinaryOperator::NoSignedWrap>(L, + R); } template @@ -1384,7 +1368,7 @@ struct CmpClass_match { Predicate = I->getPredicate(); return true; } else if (Commutable && L.match(I->getOperand(1)) && - R.match(I->getOperand(0))) { + R.match(I->getOperand(0))) { Predicate = I->getSwappedPredicate(); return true; } @@ -2080,15 +2064,13 @@ template struct m_Intrinsic_Ty { }; template struct m_Intrinsic_Ty { - using Ty = - match_combine_and::Ty, - Argument_match>; + using Ty = match_combine_and::Ty, + Argument_match>; }; template struct m_Intrinsic_Ty { - using Ty = - match_combine_and::Ty, - Argument_match>; + using Ty = match_combine_and::Ty, + Argument_match>; }; template @@ -2097,7 +2079,8 @@ struct m_Intrinsic_Ty { Argument_match>; }; -template +template struct m_Intrinsic_Ty { using Ty = match_combine_and::Ty, Argument_match>; @@ -2117,6 +2100,14 @@ m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2, return m_Intrinsic(Op0, Op1, Op2, Op3); } +/// Matches MaskedGather Intrinsic. +template +inline typename m_Intrinsic_Ty::Ty +m_MaskedGather(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2, + const Opnd3 &Op3) { + return m_Intrinsic(Op0, Op1, Op2, Op3); +} + template inline typename m_Intrinsic_Ty::Ty m_Intrinsic(const T0 &Op0) { return m_CombineAnd(m_Intrinsic(), m_Argument<0>(Op0)); @@ -2204,6 +2195,11 @@ m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) { return m_Intrinsic(Op0, Op1, Op2); } +template +inline typename m_Intrinsic_Ty::Ty m_Sqrt(const Opnd0 &Op0) { + return m_Intrinsic(Op0); +} + //===----------------------------------------------------------------------===// // Matchers for two-operands operators with the operators in either order // @@ -2532,8 +2528,8 @@ struct LogicalOp_match { /// Matches L && R either in the form of L & R or L ? R : false. /// Note that the latter form is poison-blocking. template -inline LogicalOp_match -m_LogicalAnd(const LHS &L, const RHS &R) { +inline LogicalOp_match m_LogicalAnd(const LHS &L, + const RHS &R) { return LogicalOp_match(L, R); } @@ -2550,8 +2546,8 @@ m_c_LogicalAnd(const LHS &L, const RHS &R) { /// Matches L || R either in the form of L | R or L ? true : R. /// Note that the latter form is poison-blocking. template -inline LogicalOp_match -m_LogicalOr(const LHS &L, const RHS &R) { +inline LogicalOp_match m_LogicalOr(const LHS &L, + const RHS &R) { return LogicalOp_match(L, R); } diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index 62d67308114f..39c11771ff41 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -47,6 +47,8 @@ HANDLE_LIBCALL(MUL_I16, "__mulhi3") HANDLE_LIBCALL(MUL_I32, "__mulsi3") HANDLE_LIBCALL(MUL_I64, "__muldi3") HANDLE_LIBCALL(MUL_I128, "__multi3") +HANDLE_LIBCALL(MUL_IEXT, nullptr) + HANDLE_LIBCALL(MULO_I32, "__mulosi4") HANDLE_LIBCALL(MULO_I64, "__mulodi4") HANDLE_LIBCALL(MULO_I128, "__muloti4") @@ -55,31 +57,43 @@ HANDLE_LIBCALL(SDIV_I16, "__divhi3") HANDLE_LIBCALL(SDIV_I32, "__divsi3") HANDLE_LIBCALL(SDIV_I64, "__divdi3") HANDLE_LIBCALL(SDIV_I128, "__divti3") +HANDLE_LIBCALL(SDIV_IEXT, "__divei4") + HANDLE_LIBCALL(UDIV_I8, "__udivqi3") HANDLE_LIBCALL(UDIV_I16, "__udivhi3") HANDLE_LIBCALL(UDIV_I32, "__udivsi3") HANDLE_LIBCALL(UDIV_I64, "__udivdi3") HANDLE_LIBCALL(UDIV_I128, "__udivti3") +HANDLE_LIBCALL(UDIV_IEXT, "__udivei4") + HANDLE_LIBCALL(SREM_I8, "__modqi3") HANDLE_LIBCALL(SREM_I16, "__modhi3") HANDLE_LIBCALL(SREM_I32, "__modsi3") HANDLE_LIBCALL(SREM_I64, "__moddi3") HANDLE_LIBCALL(SREM_I128, "__modti3") +HANDLE_LIBCALL(SREM_IEXT, "__modei4") + HANDLE_LIBCALL(UREM_I8, "__umodqi3") HANDLE_LIBCALL(UREM_I16, "__umodhi3") HANDLE_LIBCALL(UREM_I32, "__umodsi3") HANDLE_LIBCALL(UREM_I64, "__umoddi3") HANDLE_LIBCALL(UREM_I128, "__umodti3") +HANDLE_LIBCALL(UREM_IEXT, "__umodei4") + HANDLE_LIBCALL(SDIVREM_I8, nullptr) HANDLE_LIBCALL(SDIVREM_I16, nullptr) HANDLE_LIBCALL(SDIVREM_I32, nullptr) HANDLE_LIBCALL(SDIVREM_I64, nullptr) HANDLE_LIBCALL(SDIVREM_I128, nullptr) +HANDLE_LIBCALL(SDIVREM_IEXT, nullptr) + HANDLE_LIBCALL(UDIVREM_I8, nullptr) HANDLE_LIBCALL(UDIVREM_I16, nullptr) HANDLE_LIBCALL(UDIVREM_I32, nullptr) HANDLE_LIBCALL(UDIVREM_I64, nullptr) HANDLE_LIBCALL(UDIVREM_I128, nullptr) +HANDLE_LIBCALL(UDIVREM_IEXT, nullptr) + HANDLE_LIBCALL(NEG_I32, "__negsi2") HANDLE_LIBCALL(NEG_I64, "__negdi2") HANDLE_LIBCALL(CTLZ_I32, "__clzsi2") @@ -296,6 +310,8 @@ HANDLE_LIBCALL(FPROUND_F64_F16, "__truncdfhf2") HANDLE_LIBCALL(FPROUND_F80_F16, "__truncxfhf2") HANDLE_LIBCALL(FPROUND_F128_F16, "__trunctfhf2") HANDLE_LIBCALL(FPROUND_PPCF128_F16, "__trunctfhf2") +HANDLE_LIBCALL(FPROUND_F32_BF16, "__truncsfbf2") +HANDLE_LIBCALL(FPROUND_F64_BF16, "__truncdfbf2") HANDLE_LIBCALL(FPROUND_F64_F32, "__truncdfsf2") HANDLE_LIBCALL(FPROUND_F80_F32, "__truncxfsf2") HANDLE_LIBCALL(FPROUND_F128_F32, "__trunctfsf2") diff --git a/llvm/include/llvm/IR/Statepoint.h b/llvm/include/llvm/IR/Statepoint.h index da9c732ad818..ba8ffbbaf397 100644 --- a/llvm/include/llvm/IR/Statepoint.h +++ b/llvm/include/llvm/IR/Statepoint.h @@ -121,9 +121,8 @@ public: /// Return the type of the value returned by the call underlying the /// statepoint. Type *getActualReturnType() const { - auto *CalleeTy = - getActualCalledOperand()->getType()->getPointerElementType(); - return cast(CalleeTy)->getReturnType(); + auto *FT = cast(getParamElementType(CalledFunctionPos)); + return FT->getReturnType(); } diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h index e4e8a5529c87..51263c6b8fcc 100644 --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -68,13 +68,14 @@ public: TokenTyID, ///< Tokens // Derived types... see DerivedTypes.h file. - IntegerTyID, ///< Arbitrary bit width integers - FunctionTyID, ///< Functions - PointerTyID, ///< Pointers - StructTyID, ///< Structures - ArrayTyID, ///< Arrays - FixedVectorTyID, ///< Fixed width SIMD vector type - ScalableVectorTyID ///< Scalable SIMD vector type + IntegerTyID, ///< Arbitrary bit width integers + FunctionTyID, ///< Functions + PointerTyID, ///< Pointers + StructTyID, ///< Structures + ArrayTyID, ///< Arrays + FixedVectorTyID, ///< Fixed width SIMD vector type + ScalableVectorTyID, ///< Scalable SIMD vector type + DXILPointerTyID, ///< DXIL typed pointer used by DirectX target }; private: @@ -368,6 +369,9 @@ public: /// This method is deprecated without replacement. Pointer element types are /// not available with opaque pointers. + [[deprecated("Deprecated without replacement, see " + "https://llvm.org/docs/OpaquePointers.html for context and " + "migration instructions")]] Type *getPointerElementType() const { return getNonOpaquePointerElementType(); } diff --git a/llvm/include/llvm/IR/User.h b/llvm/include/llvm/IR/User.h index 221bb5b2cb1c..a9cf60151e5d 100644 --- a/llvm/include/llvm/IR/User.h +++ b/llvm/include/llvm/IR/User.h @@ -304,8 +304,8 @@ public: /// Replace uses of one Value with another. /// /// Replaces all references to the "From" definition with references to the - /// "To" definition. - void replaceUsesOfWith(Value *From, Value *To); + /// "To" definition. Returns whether any uses were replaced. + bool replaceUsesOfWith(Value *From, Value *To); // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Value *V) { diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 1abcbb874a8d..1d639e8aeb01 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -54,6 +54,12 @@ #define END_REGISTER_VP_SDNODE(VPSD) #endif +// Helper macro to set up the mapping from VP intrinsic to ISD opcode. +// Note: More than one VP intrinsic may map to one ISD opcode. +#ifndef HELPER_MAP_VPID_TO_VPSD +#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) +#endif + // Helper macros for the common "1:1 - Intrinsic : SDNode" case. // // There is one VP intrinsic that maps directly to one SDNode that goes by the @@ -70,7 +76,8 @@ // the SDNode is used. #define BEGIN_REGISTER_VP(VPID, MASKPOS, EVLPOS, VPSD, LEGALPOS) \ BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, EVLPOS) \ - BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, VPID, MASKPOS, EVLPOS) + BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, VPID, MASKPOS, EVLPOS) \ + HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) #define END_REGISTER_VP(VPID, VPSD) \ END_REGISTER_VP_INTRINSIC(VPID) \ @@ -121,6 +128,18 @@ #define VP_PROPERTY_BINARYOP #endif +// A property to infer VP type casts automatically. +#ifndef VP_PROPERTY_CASTOP +#define VP_PROPERTY_CASTOP +#endif + +// This VP Intrinsic is a comparison operation +// The condition code arg is at CCPOS and accepts floating-point condition +// codes if ISFP is set, else it accepts integer condition codes. +#ifndef VP_PROPERTY_CMP +#define VP_PROPERTY_CMP(CCPOS, ISFP) +#endif + /// } Property Macros ///// Integer Arithmetic { @@ -211,22 +230,130 @@ HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem) #undef HELPER_REGISTER_BINARY_FP_VP +// llvm.vp.fneg(x,mask,vlen) +BEGIN_REGISTER_VP(vp_fneg, 1, 2, VP_FNEG, -1) +VP_PROPERTY_FUNCTIONAL_OPC(FNeg) +END_REGISTER_VP(vp_fneg, VP_FNEG) + +// llvm.vp.fma(x,y,z,mask,vlen) +BEGIN_REGISTER_VP(vp_fma, 3, 4, VP_FMA, -1) +VP_PROPERTY_CONSTRAINEDFP(1, 1, experimental_constrained_fma) +END_REGISTER_VP(vp_fma, VP_FMA) + ///// } Floating-Point Arithmetic +///// Type Casts { +// Specialized helper macro for type conversions. +// (%x, %mask, %evl). +#ifdef HELPER_REGISTER_FP_CAST_VP +#error \ + "The internal helper macro HELPER_REGISTER_FP_CAST_VP is already defined!" +#endif +#define HELPER_REGISTER_FP_CAST_VP(OPSUFFIX, VPSD, IROPC, HASROUND) \ + BEGIN_REGISTER_VP(vp_##OPSUFFIX, 1, 2, VPSD, -1) \ + VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \ + VP_PROPERTY_CONSTRAINEDFP(HASROUND, 1, experimental_constrained_##OPSUFFIX) \ + VP_PROPERTY_CASTOP \ + END_REGISTER_VP(vp_##OPSUFFIX, VPSD) + +// llvm.vp.fptoui(x,mask,vlen) +HELPER_REGISTER_FP_CAST_VP(fptoui, VP_FPTOUI, FPToUI, 0) + +// llvm.vp.fptosi(x,mask,vlen) +HELPER_REGISTER_FP_CAST_VP(fptosi, VP_FPTOSI, FPToSI, 0) + +// llvm.vp.uitofp(x,mask,vlen) +HELPER_REGISTER_FP_CAST_VP(uitofp, VP_UITOFP, UIToFP, 1) + +// llvm.vp.sitofp(x,mask,vlen) +HELPER_REGISTER_FP_CAST_VP(sitofp, VP_SITOFP, SIToFP, 1) + +// llvm.vp.fptrunc(x,mask,vlen) +HELPER_REGISTER_FP_CAST_VP(fptrunc, VP_FP_ROUND, FPTrunc, 1) + +// llvm.vp.fpext(x,mask,vlen) +HELPER_REGISTER_FP_CAST_VP(fpext, VP_FP_EXTEND, FPExt, 0) + +#undef HELPER_REGISTER_FP_CAST_VP + +// Specialized helper macro for integer type conversions. +// (%x, %mask, %evl). +#ifdef HELPER_REGISTER_INT_CAST_VP +#error \ + "The internal helper macro HELPER_REGISTER_INT_CAST_VP is already defined!" +#endif +#define HELPER_REGISTER_INT_CAST_VP(OPSUFFIX, VPSD, IROPC) \ + BEGIN_REGISTER_VP(vp_##OPSUFFIX, 1, 2, VPSD, -1) \ + VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \ + VP_PROPERTY_CASTOP \ + END_REGISTER_VP(vp_##OPSUFFIX, VPSD) + +// llvm.vp.trunc(x,mask,vlen) +HELPER_REGISTER_INT_CAST_VP(trunc, VP_TRUNCATE, Trunc) + +// llvm.vp.zext(x,mask,vlen) +HELPER_REGISTER_INT_CAST_VP(zext, VP_ZERO_EXTEND, ZExt) + +// llvm.vp.sext(x,mask,vlen) +HELPER_REGISTER_INT_CAST_VP(sext, VP_SIGN_EXTEND, SExt) + +// llvm.vp.ptrtoint(x,mask,vlen) +HELPER_REGISTER_INT_CAST_VP(ptrtoint, VP_PTRTOINT, PtrToInt) + +// llvm.vp.inttoptr(x,mask,vlen) +HELPER_REGISTER_INT_CAST_VP(inttoptr, VP_INTTOPTR, IntToPtr) + +#undef HELPER_REGISTER_INT_CAST_VP + +///// } Type Casts + +///// Comparisons { + +// VP_SETCC (ISel only) +BEGIN_REGISTER_VP_SDNODE(VP_SETCC, 0, vp_setcc, 3, 4) +END_REGISTER_VP_SDNODE(VP_SETCC) + +// llvm.vp.fcmp(x,y,cc,mask,vlen) +BEGIN_REGISTER_VP_INTRINSIC(vp_fcmp, 3, 4) +HELPER_MAP_VPID_TO_VPSD(vp_fcmp, VP_SETCC) +VP_PROPERTY_FUNCTIONAL_OPC(FCmp) +VP_PROPERTY_CMP(2, true) +VP_PROPERTY_CONSTRAINEDFP(0, 1, experimental_constrained_fcmp) +END_REGISTER_VP_INTRINSIC(vp_fcmp) + +// llvm.vp.icmp(x,y,cc,mask,vlen) +BEGIN_REGISTER_VP_INTRINSIC(vp_icmp, 3, 4) +HELPER_MAP_VPID_TO_VPSD(vp_icmp, VP_SETCC) +VP_PROPERTY_FUNCTIONAL_OPC(ICmp) +VP_PROPERTY_CMP(2, false) +END_REGISTER_VP_INTRINSIC(vp_icmp) + +///// } Comparisons + ///// Memory Operations { // llvm.vp.store(val,ptr,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(vp_store, 2, 3) // chain = VP_STORE chain,val,base,offset,mask,evl BEGIN_REGISTER_VP_SDNODE(VP_STORE, 0, vp_store, 4, 5) +HELPER_MAP_VPID_TO_VPSD(vp_store, VP_STORE) VP_PROPERTY_FUNCTIONAL_OPC(Store) VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_store) VP_PROPERTY_MEMOP(1, 0) END_REGISTER_VP(vp_store, VP_STORE) +// llvm.experimental.vp.strided.store(val,ptr,stride,mask,vlen) +BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_strided_store, 3, 4) +// chain = EXPERIMENTAL_VP_STRIDED_STORE chain,val,base,offset,stride,mask,evl +BEGIN_REGISTER_VP_SDNODE(EXPERIMENTAL_VP_STRIDED_STORE, 0, experimental_vp_strided_store, 5, 6) +HELPER_MAP_VPID_TO_VPSD(experimental_vp_strided_store, EXPERIMENTAL_VP_STRIDED_STORE) +VP_PROPERTY_MEMOP(1, 0) +END_REGISTER_VP(experimental_vp_strided_store, EXPERIMENTAL_VP_STRIDED_STORE) + // llvm.vp.scatter(ptr,val,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(vp_scatter, 2, 3) // chain = VP_SCATTER chain,val,base,indices,scale,mask,evl BEGIN_REGISTER_VP_SDNODE(VP_SCATTER, -1, vp_scatter, 5, 6) +HELPER_MAP_VPID_TO_VPSD(vp_scatter, VP_SCATTER) VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_scatter) VP_PROPERTY_MEMOP(1, 0) END_REGISTER_VP(vp_scatter, VP_SCATTER) @@ -235,15 +362,25 @@ END_REGISTER_VP(vp_scatter, VP_SCATTER) BEGIN_REGISTER_VP_INTRINSIC(vp_load, 1, 2) // val,chain = VP_LOAD chain,base,offset,mask,evl BEGIN_REGISTER_VP_SDNODE(VP_LOAD, -1, vp_load, 3, 4) +HELPER_MAP_VPID_TO_VPSD(vp_load, VP_LOAD) VP_PROPERTY_FUNCTIONAL_OPC(Load) VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_load) VP_PROPERTY_MEMOP(0, None) END_REGISTER_VP(vp_load, VP_LOAD) +// llvm.experimental.vp.strided.load(ptr,stride,mask,vlen) +BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_strided_load, 2, 3) +// chain = EXPERIMENTAL_VP_STRIDED_LOAD chain,base,offset,stride,mask,evl +BEGIN_REGISTER_VP_SDNODE(EXPERIMENTAL_VP_STRIDED_LOAD, -1, experimental_vp_strided_load, 4, 5) +HELPER_MAP_VPID_TO_VPSD(experimental_vp_strided_load, EXPERIMENTAL_VP_STRIDED_LOAD) +VP_PROPERTY_MEMOP(0, None) +END_REGISTER_VP(experimental_vp_strided_load, EXPERIMENTAL_VP_STRIDED_LOAD) + // llvm.vp.gather(ptr,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(vp_gather, 1, 2) // val,chain = VP_GATHER chain,base,indices,scale,mask,evl BEGIN_REGISTER_VP_SDNODE(VP_GATHER, -1, vp_gather, 4, 5) +HELPER_MAP_VPID_TO_VPSD(vp_gather, VP_GATHER) VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_gather) VP_PROPERTY_MEMOP(0, None) END_REGISTER_VP(vp_gather, VP_GATHER) @@ -313,6 +450,8 @@ HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN, // sequential and reassociative. These manifest as the presence of 'reassoc' // fast-math flags in the IR and as two distinct ISD opcodes in the // SelectionDAG. +// Note we by default map from the VP intrinsic to the SEQ ISD opcode, which +// can then be relaxed to the non-SEQ ISD opcode if the 'reassoc' flag is set. #ifdef HELPER_REGISTER_REDUCTION_SEQ_VP #error \ "The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!" @@ -323,6 +462,7 @@ HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN, VP_PROPERTY_REDUCTION(0, 1) \ END_REGISTER_VP_SDNODE(VPSD) \ BEGIN_REGISTER_VP_SDNODE(SEQ_VPSD, -1, VPID, 2, 3) \ + HELPER_MAP_VPID_TO_VPSD(VPID, SEQ_VPSD) \ VP_PROPERTY_REDUCTION(0, 1) \ END_REGISTER_VP_SDNODE(SEQ_VPSD) \ VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) \ @@ -344,13 +484,18 @@ HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fmul, VP_REDUCE_FMUL, ///// Shuffles { -// llvm.vp.select(mask,on_true,on_false,vlen) -BEGIN_REGISTER_VP(vp_select, 0, 3, VP_SELECT, -1) +// The mask 'cond' operand of llvm.vp.select and llvm.vp.merge are not reported +// as masks with the BEGIN_REGISTER_VP_* macros. This is because, unlike other +// VP intrinsics, these two have a defined result on lanes where the mask is +// false. +// +// llvm.vp.select(cond,on_true,on_false,vlen) +BEGIN_REGISTER_VP(vp_select, None, 3, VP_SELECT, -1) VP_PROPERTY_FUNCTIONAL_OPC(Select) END_REGISTER_VP(vp_select, VP_SELECT) -// llvm.vp.merge(mask,on_true,on_false,pivot) -BEGIN_REGISTER_VP(vp_merge, 0, 3, VP_MERGE, -1) +// llvm.vp.merge(cond,on_true,on_false,pivot) +BEGIN_REGISTER_VP(vp_merge, None, 3, VP_MERGE, -1) END_REGISTER_VP(vp_merge, VP_MERGE) BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, EXPERIMENTAL_VP_SPLICE, -1) @@ -364,7 +509,10 @@ END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE) #undef END_REGISTER_VP #undef END_REGISTER_VP_INTRINSIC #undef END_REGISTER_VP_SDNODE +#undef HELPER_MAP_VPID_TO_VPSD #undef VP_PROPERTY_BINARYOP +#undef VP_PROPERTY_CASTOP +#undef VP_PROPERTY_CMP #undef VP_PROPERTY_CONSTRAINEDFP #undef VP_PROPERTY_FUNCTIONAL_INTRINSIC #undef VP_PROPERTY_FUNCTIONAL_OPC diff --git a/llvm/include/llvm/IR/ValueMap.h b/llvm/include/llvm/IR/ValueMap.h index 67f275cc06d9..a4b6091cf115 100644 --- a/llvm/include/llvm/IR/ValueMap.h +++ b/llvm/include/llvm/IR/ValueMap.h @@ -104,8 +104,8 @@ public: : Map(NumInitBuckets), Data() {} explicit ValueMap(const ExtraData &Data, unsigned NumInitBuckets = 64) : Map(NumInitBuckets), Data(Data) {} - // ValueMap can't be copied nor moved, beucase the callbacks store pointer - // to it. + // ValueMap can't be copied nor moved, because the callbacks store pointer to + // it. ValueMap(const ValueMap &) = delete; ValueMap(ValueMap &&) = delete; ValueMap &operator=(const ValueMap &) = delete; @@ -141,7 +141,7 @@ public: size_type size() const { return Map.size(); } /// Grow the map so that it has at least Size buckets. Does not shrink - void resize(size_t Size) { Map.resize(Size); } + void reserve(size_t Size) { Map.reserve(Size); } void clear() { Map.clear(); diff --git a/llvm/include/llvm/IR/VectorBuilder.h b/llvm/include/llvm/IR/VectorBuilder.h new file mode 100644 index 000000000000..301edaed70fe --- /dev/null +++ b/llvm/include/llvm/IR/VectorBuilder.h @@ -0,0 +1,99 @@ +//===- llvm/VectorBuilder.h - Builder for VP Intrinsics ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the VectorBuilder class, which is used as a convenient way +// to create VP intrinsics as if they were LLVM instructions with a consistent +// and simplified interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_VECTORBUILDER_H +#define LLVM_IR_VECTORBUILDER_H + +#include +#include +#include +#include + +namespace llvm { + +class VectorBuilder { +public: + enum class Behavior { + // Abort if the requested VP intrinsic could not be created. + // This is useful for strict consistency. + ReportAndAbort = 0, + + // Return a default-initialized value if the requested VP intrinsic could + // not be created. + // This is useful for a defensive fallback to non-VP code. + SilentlyReturnNone = 1, + }; + +private: + IRBuilderBase &Builder; + Behavior ErrorHandling; + + // Explicit mask parameter. + Value *Mask; + // Explicit vector length parameter. + Value *ExplicitVectorLength; + // Compile-time vector length. + ElementCount StaticVectorLength; + + // Get mask/evl value handles for the current configuration. + Value &requestMask(); + Value &requestEVL(); + + void handleError(const char *ErrorMsg) const; + template + RetType returnWithError(const char *ErrorMsg) const { + handleError(ErrorMsg); + return RetType(); + } + +public: + VectorBuilder(IRBuilderBase &Builder, + Behavior ErrorHandling = Behavior::ReportAndAbort) + : Builder(Builder), ErrorHandling(ErrorHandling), Mask(nullptr), + ExplicitVectorLength(nullptr), + StaticVectorLength(ElementCount::getFixed(0)) {} + + Module &getModule() const; + LLVMContext &getContext() const { return Builder.getContext(); } + + // All-true mask for the currently configured explicit vector length. + Value *getAllTrueMask(); + + VectorBuilder &setMask(Value *NewMask) { + Mask = NewMask; + return *this; + } + VectorBuilder &setEVL(Value *NewExplicitVectorLength) { + ExplicitVectorLength = NewExplicitVectorLength; + return *this; + } + VectorBuilder &setStaticVL(unsigned NewFixedVL) { + StaticVectorLength = ElementCount::getFixed(NewFixedVL); + return *this; + } + // TODO: setStaticVL(ElementCount) for scalable types. + + // Emit a VP intrinsic call that mimics a regular instruction. + // This operation behaves according to the VectorBuilderBehavior. + // \p Opcode The functional instruction opcode of the emitted intrinsic. + // \p ReturnTy The return type of the operation. + // \p VecOpArray The operand list. + Value *createVectorInstruction(unsigned Opcode, Type *ReturnTy, + ArrayRef VecOpArray, + const Twine &Name = Twine()); +}; + +} // namespace llvm + +#endif // LLVM_IR_VECTORBUILDER_H diff --git a/llvm/include/llvm/IRReader/IRReader.h b/llvm/include/llvm/IRReader/IRReader.h index a14e46e2edc8..3f2a01fdc54a 100644 --- a/llvm/include/llvm/IRReader/IRReader.h +++ b/llvm/include/llvm/IRReader/IRReader.h @@ -14,7 +14,9 @@ #ifndef LLVM_IRREADER_IRREADER_H #define LLVM_IRREADER_IRREADER_H -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLForwardCompat.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringRef.h" #include diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 489ef045796f..77f2c6330788 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -48,9 +48,6 @@ void initializeInstrumentation(PassRegistry&); /// Initialize all passes linked into the Analysis library. void initializeAnalysis(PassRegistry&); -/// Initialize all passes linked into the Coroutines library. -void initializeCoroutines(PassRegistry&); - /// Initialize all passes linked into the CodeGen library. void initializeCodeGen(PassRegistry&); @@ -65,9 +62,6 @@ void initializeAAResultsWrapperPassPass(PassRegistry&); void initializeADCELegacyPassPass(PassRegistry&); void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&); void initializeAddFSDiscriminatorsPass(PassRegistry &); -void initializeModuleAddressSanitizerLegacyPassPass(PassRegistry &); -void initializeASanGlobalsMetadataWrapperPassPass(PassRegistry &); -void initializeAddressSanitizerLegacyPassPass(PassRegistry &); void initializeAggressiveInstCombinerLegacyPassPass(PassRegistry&); void initializeAliasSetPrinterPass(PassRegistry&); void initializeAlignmentFromAssumptionsPass(PassRegistry&); @@ -77,11 +71,11 @@ void initializeAssumeBuilderPassLegacyPassPass(PassRegistry &); void initializeAnnotation2MetadataLegacyPass(PassRegistry &); void initializeAnnotationRemarksLegacyPass(PassRegistry &); void initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry &); -void initializeArgPromotionPass(PassRegistry&); void initializeAssumptionCacheTrackerPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); void initializeAttributorLegacyPassPass(PassRegistry&); void initializeAttributorCGSCCLegacyPassPass(PassRegistry &); +void initializeBasicBlockSectionsProfileReaderPass(PassRegistry &); void initializeBasicBlockSectionsPass(PassRegistry &); void initializeBDCELegacyPassPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); @@ -103,6 +97,7 @@ void initializeCFGSimplifyPassPass(PassRegistry&); void initializeCFGuardPass(PassRegistry&); void initializeCFGuardLongjmpPass(PassRegistry&); void initializeCFGViewerLegacyPassPass(PassRegistry&); +void initializeCFIFixupPass(PassRegistry&); void initializeCFIInstrInserterPass(PassRegistry&); void initializeCFLAndersAAWrapperPassPass(PassRegistry&); void initializeCFLSteensAAWrapperPassPass(PassRegistry&); @@ -137,10 +132,10 @@ void initializeDependenceAnalysisPass(PassRegistry&); void initializeDependenceAnalysisWrapperPassPass(PassRegistry&); void initializeDetectDeadLanesPass(PassRegistry&); void initializeDivRemPairsLegacyPassPass(PassRegistry&); -void initializeDomOnlyPrinterPass(PassRegistry&); -void initializeDomOnlyViewerPass(PassRegistry&); -void initializeDomPrinterPass(PassRegistry&); -void initializeDomViewerPass(PassRegistry&); +void initializeDomOnlyPrinterWrapperPassPass(PassRegistry &); +void initializeDomOnlyViewerWrapperPassPass(PassRegistry &); +void initializeDomPrinterWrapperPassPass(PassRegistry &); +void initializeDomViewerWrapperPassPass(PassRegistry &); void initializeDominanceFrontierWrapperPassPass(PassRegistry&); void initializeDominatorTreeWrapperPassPass(PassRegistry&); void initializeDwarfEHPrepareLegacyPassPass(PassRegistry &); @@ -174,7 +169,6 @@ void initializeFunctionImportLegacyPassPass(PassRegistry&); void initializeFunctionSpecializationLegacyPassPass(PassRegistry &); void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); -void initializeGCOVProfilerLegacyPassPass(PassRegistry&); void initializeGVNHoistLegacyPassPass(PassRegistry&); void initializeGVNLegacyPassPass(PassRegistry&); void initializeGVNSinkLegacyPassPass(PassRegistry&); @@ -188,7 +182,6 @@ void initializeHardwareLoopsPass(PassRegistry&); void initializeMIRProfileLoaderPassPass(PassRegistry &); void initializeMemProfilerLegacyPassPass(PassRegistry &); void initializeHotColdSplittingLegacyPassPass(PassRegistry&); -void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &); void initializeIPSCCPLegacyPassPass(PassRegistry&); void initializeIRCELegacyPassPass(PassRegistry&); void initializeIROutlinerLegacyPassPass(PassRegistry&); @@ -215,6 +208,7 @@ void initializeInterleavedAccessPass(PassRegistry&); void initializeInterleavedLoadCombinePass(PassRegistry &); void initializeInternalizeLegacyPassPass(PassRegistry&); void initializeIntervalPartitionPass(PassRegistry&); +void initializeJMCInstrumenterPass(PassRegistry&); void initializeJumpThreadingPass(PassRegistry&); void initializeLCSSAVerificationPassPass(PassRegistry&); void initializeLCSSAWrapperPassPass(PassRegistry&); @@ -273,6 +267,7 @@ void initializeLowerAtomicLegacyPassPass(PassRegistry&); void initializeLowerConstantIntrinsicsPass(PassRegistry&); void initializeLowerEmuTLSPass(PassRegistry&); void initializeLowerExpectIntrinsicPass(PassRegistry&); +void initializeLowerGlobalDtorsLegacyPassPass(PassRegistry &); void initializeLowerGuardIntrinsicLegacyPassPass(PassRegistry&); void initializeLowerWidenableConditionLegacyPassPass(PassRegistry&); void initializeLowerIntrinsicsPass(PassRegistry&); @@ -316,7 +311,6 @@ void initializeMemDerefPrinterPass(PassRegistry&); void initializeMemoryDependenceWrapperPassPass(PassRegistry&); void initializeMemorySSAPrinterLegacyPassPass(PassRegistry&); void initializeMemorySSAWrapperPassPass(PassRegistry&); -void initializeMemorySanitizerLegacyPassPass(PassRegistry&); void initializeMergeFunctionsLegacyPassPass(PassRegistry&); void initializeMergeICmpsLegacyPassPass(PassRegistry &); void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&); @@ -339,11 +333,6 @@ void initializeOptimizationRemarkEmitterWrapperPassPass(PassRegistry&); void initializeOptimizePHIsPass(PassRegistry&); void initializePAEvalPass(PassRegistry&); void initializePEIPass(PassRegistry&); -void initializePGOIndirectCallPromotionLegacyPassPass(PassRegistry&); -void initializePGOInstrumentationGenLegacyPassPass(PassRegistry&); -void initializePGOInstrumentationUseLegacyPassPass(PassRegistry&); -void initializePGOInstrumentationGenCreateVarLegacyPassPass(PassRegistry&); -void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry&); void initializePHIEliminationPass(PassRegistry&); void initializePartialInlinerLegacyPassPass(PassRegistry&); void initializePartiallyInlineLibCallsLegacyPassPass(PassRegistry&); @@ -353,10 +342,10 @@ void initializePhiValuesWrapperPassPass(PassRegistry&); void initializePhysicalRegisterUsageInfoPass(PassRegistry&); void initializePlaceBackedgeSafepointsImplPass(PassRegistry&); void initializePlaceSafepointsPass(PassRegistry&); -void initializePostDomOnlyPrinterPass(PassRegistry&); -void initializePostDomOnlyViewerPass(PassRegistry&); -void initializePostDomPrinterPass(PassRegistry&); -void initializePostDomViewerPass(PassRegistry&); +void initializePostDomOnlyPrinterWrapperPassPass(PassRegistry &); +void initializePostDomOnlyViewerWrapperPassPass(PassRegistry &); +void initializePostDomPrinterWrapperPassPass(PassRegistry &); +void initializePostDomViewerWrapperPassPass(PassRegistry &); void initializePostDominatorTreeWrapperPassPass(PassRegistry&); void initializePostInlineEntryExitInstrumenterPass(PassRegistry&); void initializePostMachineSchedulerPass(PassRegistry&); @@ -405,6 +394,7 @@ void initializeSROALegacyPassPass(PassRegistry&); void initializeSafeStackLegacyPassPass(PassRegistry&); void initializeSafepointIRVerifierPass(PassRegistry&); void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&); +void initializeSelectOptimizePass(PassRegistry &); void initializeModuleSanitizerCoverageLegacyPassPass(PassRegistry &); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &); @@ -443,7 +433,7 @@ void initializeTailDuplicatePass(PassRegistry&); void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&); void initializeTargetPassConfigPass(PassRegistry&); void initializeTargetTransformInfoWrapperPassPass(PassRegistry&); -void initializeThreadSanitizerLegacyPassPass(PassRegistry&); +void initializeTLSVariableHoistLegacyPassPass(PassRegistry &); void initializeTwoAddressInstructionPassPass(PassRegistry&); void initializeTypeBasedAAWrapperPassPass(PassRegistry&); void initializeTypePromotionPass(PassRegistry&); diff --git a/llvm/include/llvm/InterfaceStub/ELFObjHandler.h b/llvm/include/llvm/InterfaceStub/ELFObjHandler.h index 20a02c6d5445..c15838c4ae0a 100644 --- a/llvm/include/llvm/InterfaceStub/ELFObjHandler.h +++ b/llvm/include/llvm/InterfaceStub/ELFObjHandler.h @@ -13,16 +13,15 @@ #ifndef LLVM_INTERFACESTUB_ELFOBJHANDLER_H #define LLVM_INTERFACESTUB_ELFOBJHANDLER_H -#include "llvm/InterfaceStub/IFSStub.h" -#include "llvm/Object/ELFObjectFile.h" -#include "llvm/Object/ELFTypes.h" -#include "llvm/Support/FileSystem.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBufferRef.h" +#include namespace llvm { -class MemoryBuffer; - namespace ifs { +struct IFSStub; /// Attempt to read a binary ELF file from a MemoryBuffer. Expected> readELFFile(MemoryBufferRef Buf); diff --git a/llvm/include/llvm/InterfaceStub/IFSHandler.h b/llvm/include/llvm/InterfaceStub/IFSHandler.h index 6ae6a421318e..bfa5692811d7 100644 --- a/llvm/include/llvm/InterfaceStub/IFSHandler.h +++ b/llvm/include/llvm/InterfaceStub/IFSHandler.h @@ -19,6 +19,8 @@ #include "llvm/Support/Error.h" #include "llvm/Support/VersionTuple.h" #include +#include +#include namespace llvm { @@ -51,8 +53,8 @@ Error validateIFSTarget(IFSStub &Stub, bool ParseTriple); void stripIFSTarget(IFSStub &Stub, bool StripTriple, bool StripArch, bool StripEndianness, bool StripBitWidth); -/// Strips symbols from IFS symbol table that are undefined. -void stripIFSUndefinedSymbols(IFSStub &Stub); +Error filterIFSSyms(IFSStub &Stub, bool StripUndefined, + const std::vector &Exclude = {}); /// Parse llvm triple string into a IFSTarget struct. IFSTarget parseTriple(StringRef TripleStr); diff --git a/llvm/include/llvm/InterfaceStub/IFSStub.h b/llvm/include/llvm/InterfaceStub/IFSStub.h index 8c3cd171b1a2..0f935cd478d5 100644 --- a/llvm/include/llvm/InterfaceStub/IFSStub.h +++ b/llvm/include/llvm/InterfaceStub/IFSStub.h @@ -14,9 +14,8 @@ #ifndef LLVM_INTERFACESTUB_IFSSTUB_H #define LLVM_INTERFACESTUB_IFSSTUB_H -#include "llvm/Support/Error.h" +#include "llvm/ADT/Optional.h" #include "llvm/Support/VersionTuple.h" -#include #include namespace llvm { @@ -54,7 +53,7 @@ struct IFSSymbol { IFSSymbol() = default; explicit IFSSymbol(std::string SymbolName) : Name(std::move(SymbolName)) {} std::string Name; - uint64_t Size; + Optional Size; IFSSymbolType Type; bool Undefined; bool Weak; diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h index eb793d62907e..54bb82d84d96 100644 --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -57,8 +57,8 @@ struct Config { unsigned OptLevel = 2; bool DisableVerify = false; - /// Use the new pass manager - bool UseNewPM = LLVM_ENABLE_NEW_PASS_MANAGER; + /// Use the standard optimization pipeline. + bool UseDefaultPipeline = false; /// Flag to indicate that the optimizer should not assume builtins are present /// on the target. @@ -177,6 +177,10 @@ struct Config { /// Add FSAFDO discriminators. bool AddFSDiscriminator = false; + /// Use opaque pointer types. Used to call LLVMContext::setOpaquePointers + /// unless already set by the `-opaque-pointers` commandline option. + bool OpaquePointers = true; + /// If this field is set, LTO will write input file paths and symbol /// resolutions here in llvm-lto2 command line flag format. This can be /// used for testing and for running the LTO pipeline outside of the linker @@ -288,6 +292,8 @@ struct LTOLLVMContext : LLVMContext { enableDebugTypeODRUniquing(); setDiagnosticHandler( std::make_unique(&DiagHandler), true); + if (!hasSetOpaquePointersValue()) + setOpaquePointers(C.OpaquePointers); } DiagnosticHandlerFunction DiagHandler; }; diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index 0d085a88a193..ea52226dca16 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -197,7 +197,17 @@ using ThinBackend = std::function( /// This ThinBackend runs the individual backend jobs in-process. /// The default value means to use one job per hardware core (not hyper-thread). -ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism); +/// OnWrite is callback which receives module identifier and notifies LTO user +/// that index file for the module (and optionally imports file) was created. +/// ShouldEmitIndexFiles being true will write sharded ThinLTO index files +/// to the same path as the input module, with suffix ".thinlto.bc" +/// ShouldEmitImportsFiles is true it also writes a list of imported files to a +/// similar path with ".imports" appended instead. +using IndexWriteCallback = std::function; +ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism, + IndexWriteCallback OnWrite = nullptr, + bool ShouldEmitIndexFiles = false, + bool ShouldEmitImportsFiles = false); /// This ThinBackend writes individual module indexes to files, instead of /// running the individual backend jobs. This backend is for distributed builds @@ -212,7 +222,6 @@ ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism); /// the final ThinLTO linking. Can be nullptr. /// OnWrite is callback which receives module identifier and notifies LTO user /// that index file for the module (and optionally imports file) was created. -using IndexWriteCallback = std::function; ThinBackend createWriteIndexesThinBackend(std::string OldPrefix, std::string NewPrefix, bool ShouldEmitImportsFiles, diff --git a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h index 333f483f29c5..96f82a9276e0 100644 --- a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h +++ b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h @@ -184,7 +184,7 @@ struct LTOCodeGenerator { void setDisableVerify(bool Value) { Config.DisableVerify = Value; } - void setUseNewPM(bool Value) { Config.UseNewPM = Value; } + void setDebugPassManager(bool Enabled) { Config.DebugPassManager = Enabled; } void setDiagnosticHandler(lto_diagnostic_handler_t, void *); diff --git a/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h index be1f3154029c..ab40d88af8c1 100644 --- a/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h +++ b/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h @@ -225,9 +225,6 @@ public: OptLevel = (NewOptLevel > 3) ? 3 : NewOptLevel; } - /// Enable or disable the new pass manager. - void setUseNewPM(unsigned Enabled) { UseNewPM = Enabled; } - /// Enable or disable debug output for the new pass manager. void setDebugPassManager(unsigned Enabled) { DebugPassManager = Enabled; } @@ -347,10 +344,6 @@ private: /// IR Optimization Level [0-3]. unsigned OptLevel = 3; - /// Flag to indicate whether the new pass manager should be used for IR - /// optimizations. - bool UseNewPM = LLVM_ENABLE_NEW_PASS_MANAGER; - /// Flag to indicate whether debug output should be enabled for the new pass /// manager. bool DebugPassManager = false; diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index c8b9aaeed76a..af5926dcb38b 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -75,7 +75,6 @@ namespace { (void) llvm::createAggressiveInstCombinerPass(); (void) llvm::createBitTrackingDCEPass(); (void)llvm::createOpenMPOptCGSCCLegacyPass(); - (void) llvm::createArgumentPromotionPass(); (void) llvm::createAlignmentFromAssumptionsPass(); (void) llvm::createBasicAAWrapperPass(); (void) llvm::createSCEVAAWrapperPass(); @@ -98,16 +97,10 @@ namespace { (void) llvm::createDeadCodeEliminationPass(); (void) llvm::createDeadStoreEliminationPass(); (void) llvm::createDependenceAnalysisWrapperPass(); - (void) llvm::createDomOnlyPrinterPass(); - (void) llvm::createDomPrinterPass(); - (void) llvm::createDomOnlyViewerPass(); - (void) llvm::createDomViewerPass(); - (void) llvm::createGCOVProfilerPass(); - (void) llvm::createPGOInstrumentationGenLegacyPass(); - (void) llvm::createPGOInstrumentationUseLegacyPass(); - (void) llvm::createPGOInstrumentationGenCreateVarLegacyPass(); - (void) llvm::createPGOIndirectCallPromotionLegacyPass(); - (void) llvm::createPGOMemOPSizeOptLegacyPass(); + (void) llvm::createDomOnlyPrinterWrapperPassPass(); + (void) llvm::createDomPrinterWrapperPassPass(); + (void) llvm::createDomOnlyViewerWrapperPassPass(); + (void) llvm::createDomViewerWrapperPassPass(); (void) llvm::createInstrProfilingLegacyPass(); (void) llvm::createFunctionImportPass(); (void) llvm::createFunctionInliningPass(); @@ -123,6 +116,7 @@ namespace { (void) llvm::createInstSimplifyLegacyPass(); (void) llvm::createInstructionCombiningPass(); (void) llvm::createInternalizePass(); + (void) llvm::createJMCInstrumenterPass(); (void) llvm::createLCSSAPass(); (void) llvm::createLegacyDivergenceAnalysisPass(); (void) llvm::createLICMPass(); @@ -138,12 +132,12 @@ namespace { (void) llvm::createLoopRerollPass(); (void) llvm::createLoopUnrollPass(); (void) llvm::createLoopUnrollAndJamPass(); - (void) llvm::createLoopUnswitchPass(); (void) llvm::createLoopVersioningLICMPass(); (void) llvm::createLoopIdiomPass(); (void) llvm::createLoopRotatePass(); (void) llvm::createLowerConstantIntrinsicsPass(); (void) llvm::createLowerExpectIntrinsicPass(); + (void) llvm::createLowerGlobalDtorsLegacyPass(); (void) llvm::createLowerInvokePass(); (void) llvm::createLowerSwitchPass(); (void) llvm::createNaryReassociatePass(); @@ -156,10 +150,10 @@ namespace { (void) llvm::createPromoteMemoryToRegisterPass(); (void) llvm::createDemoteRegisterToMemoryPass(); (void) llvm::createPruneEHPass(); - (void) llvm::createPostDomOnlyPrinterPass(); - (void) llvm::createPostDomPrinterPass(); - (void) llvm::createPostDomOnlyViewerPass(); - (void) llvm::createPostDomViewerPass(); + (void)llvm::createPostDomOnlyPrinterWrapperPassPass(); + (void)llvm::createPostDomPrinterWrapperPassPass(); + (void)llvm::createPostDomOnlyViewerWrapperPassPass(); + (void)llvm::createPostDomViewerWrapperPassPass(); (void) llvm::createReassociatePass(); (void) llvm::createRedundantDbgInstEliminationPass(); (void) llvm::createRegionInfoPass(); @@ -176,6 +170,7 @@ namespace { (void) llvm::createStripDeadDebugInfoPass(); (void) llvm::createStripDeadPrototypesPass(); (void) llvm::createTailCallEliminationPass(); + (void)llvm::createTLSVariableHoistPass(); (void) llvm::createJumpThreadingPass(); (void) llvm::createDFAJumpThreadingPass(); (void) llvm::createUnifyFunctionExitNodesPass(); @@ -236,6 +231,7 @@ namespace { (void) llvm::createUnifyLoopExitsPass(); (void) llvm::createFixIrreduciblePass(); (void)llvm::createFunctionSpecializationPass(); + (void)llvm::createSelectOptimizePass(); (void)new llvm::IntervalPartition(); (void)new llvm::ScalarEvolutionWrapperPass(); diff --git a/llvm/include/llvm/Linker/IRMover.h b/llvm/include/llvm/Linker/IRMover.h index e5df83f01fe3..1e3c5394ffa2 100644 --- a/llvm/include/llvm/Linker/IRMover.h +++ b/llvm/include/llvm/Linker/IRMover.h @@ -11,6 +11,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FunctionExtras.h" #include namespace llvm { @@ -62,6 +63,8 @@ public: IRMover(Module &M); typedef std::function ValueAdder; + using LazyCallback = + llvm::unique_function; /// Move in the provide values in \p ValuesToLink from \p Src. /// @@ -70,11 +73,11 @@ public: /// not present in ValuesToLink. The GlobalValue and a ValueAdder callback /// are passed as an argument, and the callback is expected to be called /// if the GlobalValue needs to be added to the \p ValuesToLink and linked. + /// Pass nullptr if there's no work to be done in such cases. /// - \p IsPerformingImport is true when this IR link is to perform ThinLTO /// function importing from Src. Error move(std::unique_ptr Src, ArrayRef ValuesToLink, - std::function AddLazyFor, - bool IsPerformingImport); + LazyCallback AddLazyFor, bool IsPerformingImport); Module &getModule() { return Composite; } private: diff --git a/llvm/include/llvm/MC/ConstantPools.h b/llvm/include/llvm/MC/ConstantPools.h index 9fe0cce8d68c..7eac75362eff 100644 --- a/llvm/include/llvm/MC/ConstantPools.h +++ b/llvm/include/llvm/MC/ConstantPools.h @@ -43,7 +43,8 @@ struct ConstantPoolEntry { class ConstantPool { using EntryVecTy = SmallVector; EntryVecTy Entries; - std::map CachedEntries; + std::map CachedConstantEntries; + DenseMap CachedSymbolEntries; public: // Initialize a new empty constant pool diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h index bb57c3453d10..a5e7b3f504f5 100644 --- a/llvm/include/llvm/MC/MCAsmBackend.h +++ b/llvm/include/llvm/MC/MCAsmBackend.h @@ -13,12 +13,17 @@ #include "llvm/ADT/Optional.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCFixup.h" -#include "llvm/MC/MCFragment.h" #include "llvm/Support/Endian.h" #include namespace llvm { +class MCAlignFragment; +class MCDwarfCallFrameFragment; +class MCDwarfLineAddrFragment; +class MCFragment; +class MCRelaxableFragment; +class MCSymbol; class MCAsmLayout; class MCAssembler; class MCCFIInstruction; @@ -31,6 +36,7 @@ class MCSubtargetInfo; class MCValue; class raw_pwrite_stream; class StringRef; +class raw_ostream; /// Generic interface to target specific assembler backends. class MCAsmBackend { diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index 355f569861d8..ec17131e17e8 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -430,6 +430,10 @@ protected: /// hidden visibility. Defaults to MCSA_Hidden. MCSymbolAttr HiddenVisibilityAttr = MCSA_Hidden; + /// This attribute, if not MCSA_Invalid, is used to declare a symbol as having + /// exported visibility. Defaults to MCSA_Exported. + MCSymbolAttr ExportedVisibilityAttr = MCSA_Exported; + /// This attribute, if not MCSA_Invalid, is used to declare an undefined /// symbol as having hidden visibility. Defaults to MCSA_Hidden. MCSymbolAttr HiddenDeclarationVisibilityAttr = MCSA_Hidden; @@ -466,6 +470,10 @@ protected: /// the .loc/.file directives. Defaults to true. bool UsesDwarfFileAndLocDirectives = true; + /// True if DWARF `.file directory' directive syntax is used by + /// default. + bool EnableDwarfFileDirectoryDefault = true; + /// True if the target needs the DWARF section length in the header (if any) /// of the DWARF section in the assembly file. Defaults to true. bool DwarfSectionSizeRequired = true; @@ -478,6 +486,10 @@ protected: /// For example, foo(plt) instead of foo@plt. Defaults to false. bool UseParensForSymbolVariant = false; + /// True if the target uses parens for symbol names starting with + /// '$' character to distinguish them from absolute names. + bool UseParensForDollarSignNames = true; + /// True if the target supports flags in ".loc" directive, false if only /// location is allowed. bool SupportsExtendedDwarfLocDirective = true; @@ -671,6 +683,7 @@ public: const char *getCode64Directive() const { return Code64Directive; } unsigned getAssemblerDialect() const { return AssemblerDialect; } bool doesAllowAtInName() const { return AllowAtInName; } + void setAllowAtInName(bool V) { AllowAtInName = V; } bool doesAllowQuestionAtStartOfIdentifier() const { return AllowQuestionAtStartOfIdentifier; } @@ -749,6 +762,8 @@ public: MCSymbolAttr getHiddenVisibilityAttr() const { return HiddenVisibilityAttr; } + MCSymbolAttr getExportedVisibilityAttr() const { return ExportedVisibilityAttr; } + MCSymbolAttr getHiddenDeclarationVisibilityAttr() const { return HiddenDeclarationVisibilityAttr; } @@ -788,6 +803,9 @@ public: bool doDwarfFDESymbolsUseAbsDiff() const { return DwarfFDESymbolsUseAbsDiff; } bool useDwarfRegNumForCFI() const { return DwarfRegNumForCFI; } bool useParensForSymbolVariant() const { return UseParensForSymbolVariant; } + bool useParensForDollarSignNames() const { + return UseParensForDollarSignNames; + } bool supportsExtendedDwarfLocDirective() const { return SupportsExtendedDwarfLocDirective; } @@ -800,6 +818,10 @@ public: return DwarfSectionSizeRequired; } + bool enableDwarfFileDirectoryDefault() const { + return EnableDwarfFileDirectoryDefault; + } + void addInitialFrameState(const MCCFIInstruction &Inst); const std::vector &getInitialFrameState() const { diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h index 9d5cb620c9de..80aa97c315da 100644 --- a/llvm/include/llvm/MC/MCAssembler.h +++ b/llvm/include/llvm/MC/MCAssembler.h @@ -10,7 +10,6 @@ #define LLVM_MC_MCASSEMBLER_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" @@ -18,20 +17,34 @@ #include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCDwarf.h" -#include "llvm/MC/MCFixup.h" -#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCLinkerOptimizationHint.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/VersionTuple.h" +#include #include #include #include +#include #include +#include #include #include namespace llvm { +class MCBoundaryAlignFragment; +class MCCVDefRangeFragment; +class MCCVInlineLineTableFragment; +class MCDwarfCallFrameFragment; +class MCDwarfLineAddrFragment; +class MCEncodedFragment; +class MCFixup; +class MCLEBFragment; +class MCPseudoProbeAddrFragment; +class MCRelaxableFragment; +class MCSymbolRefExpr; +class raw_ostream; class MCAsmBackend; class MCAsmLayout; class MCContext; diff --git a/llvm/include/llvm/MC/MCCodeView.h b/llvm/include/llvm/MC/MCCodeView.h index 5770f370341d..3d15c4009e43 100644 --- a/llvm/include/llvm/MC/MCCodeView.h +++ b/llvm/include/llvm/MC/MCCodeView.h @@ -13,18 +13,25 @@ #ifndef LLVM_MC_MCCODEVIEW_H #define LLVM_MC_MCCODEVIEW_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCFragment.h" -#include "llvm/MC/MCObjectStreamer.h" #include #include namespace llvm { +class MCAsmLayout; +class MCCVDefRangeFragment; +class MCCVInlineLineTableFragment; +class MCDataFragment; +class MCFragment; +class MCSection; +class MCSymbol; class MCContext; class MCObjectStreamer; class MCStreamer; -class CodeViewContext; /// Instances of this class represent the information from a /// .cv_loc directive. diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index d2307d692278..a0e18891ed90 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -13,18 +13,15 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/BinaryFormat/ELF.h" #include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCAsmMacro.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCPseudoProbe.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" @@ -44,798 +41,825 @@ namespace llvm { - class CodeViewContext; - class MCAsmInfo; - class MCLabel; - class MCObjectFileInfo; - class MCRegisterInfo; - class MCSection; - class MCSectionCOFF; - class MCSectionELF; - class MCSectionGOFF; - class MCSectionMachO; - class MCSectionWasm; - class MCSectionXCOFF; - class MCStreamer; - class MCSymbol; - class MCSymbolELF; - class MCSymbolWasm; - class MCSymbolXCOFF; - class MDNode; - class SMDiagnostic; - class SMLoc; - class SourceMgr; - - /// Context object for machine code objects. This class owns all of the - /// sections that it creates. - /// - class MCContext { - public: - using SymbolTable = StringMap; - using DiagHandlerTy = - std::function &)>; - enum Environment { IsMachO, IsELF, IsGOFF, IsCOFF, IsWasm, IsXCOFF }; - - private: - Environment Env; - - /// The name of the Segment where Swift5 Reflection Section data will be - /// outputted - StringRef Swift5ReflectionSegmentName; - - /// The triple for this object. - Triple TT; - - /// The SourceMgr for this object, if any. - const SourceMgr *SrcMgr; - - /// The SourceMgr for inline assembly, if any. - std::unique_ptr InlineSrcMgr; - std::vector LocInfos; - - DiagHandlerTy DiagHandler; - - /// The MCAsmInfo for this target. - const MCAsmInfo *MAI; - - /// The MCRegisterInfo for this target. - const MCRegisterInfo *MRI; - - /// The MCObjectFileInfo for this target. - const MCObjectFileInfo *MOFI; - - /// The MCSubtargetInfo for this target. - const MCSubtargetInfo *MSTI; - - std::unique_ptr CVContext; - - /// Allocator object used for creating machine code objects. - /// - /// We use a bump pointer allocator to avoid the need to track all allocated - /// objects. - BumpPtrAllocator Allocator; - - SpecificBumpPtrAllocator COFFAllocator; - SpecificBumpPtrAllocator ELFAllocator; - SpecificBumpPtrAllocator MachOAllocator; - SpecificBumpPtrAllocator GOFFAllocator; - SpecificBumpPtrAllocator WasmAllocator; - SpecificBumpPtrAllocator XCOFFAllocator; - SpecificBumpPtrAllocator MCInstAllocator; - - /// Bindings of names to symbols. - SymbolTable Symbols; - - /// A mapping from a local label number and an instance count to a symbol. - /// For example, in the assembly - /// 1: - /// 2: - /// 1: - /// We have three labels represented by the pairs (1, 0), (2, 0) and (1, 1) - DenseMap, MCSymbol *> LocalSymbols; - - /// Keeps tracks of names that were used both for used declared and - /// artificial symbols. The value is "true" if the name has been used for a - /// non-section symbol (there can be at most one of those, plus an unlimited - /// number of section symbols with the same name). - StringMap UsedNames; - - /// Keeps track of labels that are used in inline assembly. - SymbolTable InlineAsmUsedLabelNames; - - /// The next ID to dole out to an unnamed assembler temporary symbol with - /// a given prefix. - StringMap NextID; - - /// Instances of directional local labels. - DenseMap Instances; - /// NextInstance() creates the next instance of the directional local label - /// for the LocalLabelVal and adds it to the map if needed. - unsigned NextInstance(unsigned LocalLabelVal); - /// GetInstance() gets the current instance of the directional local label - /// for the LocalLabelVal and adds it to the map if needed. - unsigned GetInstance(unsigned LocalLabelVal); - - /// The file name of the log file from the environment variable - /// AS_SECURE_LOG_FILE. Which must be set before the .secure_log_unique - /// directive is used or it is an error. - char *SecureLogFile; - /// The stream that gets written to for the .secure_log_unique directive. - std::unique_ptr SecureLog; - /// Boolean toggled when .secure_log_unique / .secure_log_reset is seen to - /// catch errors if .secure_log_unique appears twice without - /// .secure_log_reset appearing between them. - bool SecureLogUsed = false; - - /// The compilation directory to use for DW_AT_comp_dir. - SmallString<128> CompilationDir; - - /// Prefix replacement map for source file information. - std::map DebugPrefixMap; - - /// The main file name if passed in explicitly. - std::string MainFileName; - - /// The dwarf file and directory tables from the dwarf .file directive. - /// We now emit a line table for each compile unit. To reduce the prologue - /// size of each line table, the files and directories used by each compile - /// unit are separated. - std::map MCDwarfLineTablesCUMap; - - /// The current dwarf line information from the last dwarf .loc directive. - MCDwarfLoc CurrentDwarfLoc; - bool DwarfLocSeen = false; - - /// Generate dwarf debugging info for assembly source files. - bool GenDwarfForAssembly = false; - - /// The current dwarf file number when generate dwarf debugging info for - /// assembly source files. - unsigned GenDwarfFileNumber = 0; - - /// Sections for generating the .debug_ranges and .debug_aranges sections. - SetVector SectionsForRanges; - - /// The information gathered from labels that will have dwarf label - /// entries when generating dwarf assembly source files. - std::vector MCGenDwarfLabelEntries; - - /// The string to embed in the debug information for the compile unit, if - /// non-empty. - StringRef DwarfDebugFlags; - - /// The string to embed in as the dwarf AT_producer for the compile unit, if - /// non-empty. - StringRef DwarfDebugProducer; - - /// The maximum version of dwarf that we should emit. - uint16_t DwarfVersion = 4; - - /// The format of dwarf that we emit. - dwarf::DwarfFormat DwarfFormat = dwarf::DWARF32; - - /// Honor temporary labels, this is useful for debugging semantic - /// differences between temporary and non-temporary labels (primarily on - /// Darwin). - bool AllowTemporaryLabels = true; - bool UseNamesOnTempLabels = false; - - /// The Compile Unit ID that we are currently processing. - unsigned DwarfCompileUnitID = 0; - - /// A collection of MCPseudoProbe in the current module - MCPseudoProbeTable PseudoProbeTable; - - // Sections are differentiated by the quadruple (section_name, group_name, - // unique_id, link_to_symbol_name). Sections sharing the same quadruple are - // combined into one section. - struct ELFSectionKey { - std::string SectionName; - StringRef GroupName; - StringRef LinkedToName; - unsigned UniqueID; - - ELFSectionKey(StringRef SectionName, StringRef GroupName, - StringRef LinkedToName, unsigned UniqueID) - : SectionName(SectionName), GroupName(GroupName), - LinkedToName(LinkedToName), UniqueID(UniqueID) {} - - bool operator<(const ELFSectionKey &Other) const { - if (SectionName != Other.SectionName) - return SectionName < Other.SectionName; - if (GroupName != Other.GroupName) - return GroupName < Other.GroupName; - if (int O = LinkedToName.compare(Other.LinkedToName)) - return O < 0; - return UniqueID < Other.UniqueID; - } - }; - - struct COFFSectionKey { - std::string SectionName; - StringRef GroupName; - int SelectionKey; - unsigned UniqueID; - - COFFSectionKey(StringRef SectionName, StringRef GroupName, - int SelectionKey, unsigned UniqueID) - : SectionName(SectionName), GroupName(GroupName), - SelectionKey(SelectionKey), UniqueID(UniqueID) {} - - bool operator<(const COFFSectionKey &Other) const { - if (SectionName != Other.SectionName) - return SectionName < Other.SectionName; - if (GroupName != Other.GroupName) - return GroupName < Other.GroupName; - if (SelectionKey != Other.SelectionKey) - return SelectionKey < Other.SelectionKey; - return UniqueID < Other.UniqueID; - } - }; - - struct WasmSectionKey { - std::string SectionName; - StringRef GroupName; - unsigned UniqueID; - - WasmSectionKey(StringRef SectionName, StringRef GroupName, - unsigned UniqueID) - : SectionName(SectionName), GroupName(GroupName), UniqueID(UniqueID) { - } - - bool operator<(const WasmSectionKey &Other) const { - if (SectionName != Other.SectionName) - return SectionName < Other.SectionName; - if (GroupName != Other.GroupName) - return GroupName < Other.GroupName; - return UniqueID < Other.UniqueID; - } - }; - - struct XCOFFSectionKey { - // Section name. - std::string SectionName; - // Section property. - // For csect section, it is storage mapping class. - // For debug section, it is section type flags. - union { - XCOFF::StorageMappingClass MappingClass; - XCOFF::DwarfSectionSubtypeFlags DwarfSubtypeFlags; - }; - bool IsCsect; - - XCOFFSectionKey(StringRef SectionName, - XCOFF::StorageMappingClass MappingClass) - : SectionName(SectionName), MappingClass(MappingClass), - IsCsect(true) {} - - XCOFFSectionKey(StringRef SectionName, - XCOFF::DwarfSectionSubtypeFlags DwarfSubtypeFlags) - : SectionName(SectionName), DwarfSubtypeFlags(DwarfSubtypeFlags), - IsCsect(false) {} - - bool operator<(const XCOFFSectionKey &Other) const { - if (IsCsect && Other.IsCsect) - return std::tie(SectionName, MappingClass) < - std::tie(Other.SectionName, Other.MappingClass); - if (IsCsect != Other.IsCsect) - return IsCsect; - return std::tie(SectionName, DwarfSubtypeFlags) < - std::tie(Other.SectionName, Other.DwarfSubtypeFlags); - } - }; - - StringMap MachOUniquingMap; - std::map ELFUniquingMap; - std::map COFFUniquingMap; - std::map GOFFUniquingMap; - std::map WasmUniquingMap; - std::map XCOFFUniquingMap; - StringMap RelSecNames; - - SpecificBumpPtrAllocator MCSubtargetAllocator; +class CodeViewContext; +class MCAsmInfo; +class MCInst; +class MCLabel; +class MCObjectFileInfo; +class MCRegisterInfo; +class MCSection; +class MCSectionCOFF; +class MCSectionDXContainer; +class MCSectionELF; +class MCSectionGOFF; +class MCSectionMachO; +class MCSectionSPIRV; +class MCSectionWasm; +class MCSectionXCOFF; +class MCStreamer; +class MCSubtargetInfo; +class MCSymbol; +class MCSymbolELF; +class MCSymbolWasm; +class MCSymbolXCOFF; +class MCTargetOptions; +class MDNode; +template class SmallVectorImpl; +class SMDiagnostic; +class SMLoc; +class SourceMgr; +enum class EmitDwarfUnwindType; + +/// Context object for machine code objects. This class owns all of the +/// sections that it creates. +/// +class MCContext { +public: + using SymbolTable = StringMap; + using DiagHandlerTy = + std::function &)>; + enum Environment { + IsMachO, + IsELF, + IsGOFF, + IsCOFF, + IsSPIRV, + IsWasm, + IsXCOFF, + IsDXContainer + }; - /// Do automatic reset in destructor - bool AutoReset; +private: + Environment Env; - MCTargetOptions const *TargetOptions; + /// The name of the Segment where Swift5 Reflection Section data will be + /// outputted + StringRef Swift5ReflectionSegmentName; - bool HadError = false; + /// The triple for this object. + Triple TT; - void reportCommon(SMLoc Loc, - std::function); + /// The SourceMgr for this object, if any. + const SourceMgr *SrcMgr; - MCSymbol *createSymbolImpl(const StringMapEntry *Name, - bool CanBeUnnamed); - MCSymbol *createSymbol(StringRef Name, bool AlwaysAddSuffix, - bool IsTemporary); + /// The SourceMgr for inline assembly, if any. + std::unique_ptr InlineSrcMgr; + std::vector LocInfos; - MCSymbol *getOrCreateDirectionalLocalSymbol(unsigned LocalLabelVal, - unsigned Instance); + DiagHandlerTy DiagHandler; - MCSectionELF *createELFSectionImpl(StringRef Section, unsigned Type, - unsigned Flags, SectionKind K, - unsigned EntrySize, - const MCSymbolELF *Group, bool IsComdat, - unsigned UniqueID, - const MCSymbolELF *LinkedToSym); + /// The MCAsmInfo for this target. + const MCAsmInfo *MAI; - MCSymbolXCOFF *createXCOFFSymbolImpl(const StringMapEntry *Name, - bool IsTemporary); + /// The MCRegisterInfo for this target. + const MCRegisterInfo *MRI; - /// Map of currently defined macros. - StringMap MacroMap; + /// The MCObjectFileInfo for this target. + const MCObjectFileInfo *MOFI; - struct ELFEntrySizeKey { - std::string SectionName; - unsigned Flags; - unsigned EntrySize; + /// The MCSubtargetInfo for this target. + const MCSubtargetInfo *MSTI; - ELFEntrySizeKey(StringRef SectionName, unsigned Flags, unsigned EntrySize) - : SectionName(SectionName), Flags(Flags), EntrySize(EntrySize) {} + std::unique_ptr CVContext; - bool operator<(const ELFEntrySizeKey &Other) const { - if (SectionName != Other.SectionName) - return SectionName < Other.SectionName; - if (Flags != Other.Flags) - return Flags < Other.Flags; - return EntrySize < Other.EntrySize; - } - }; - - // Symbols must be assigned to a section with a compatible entry size and - // flags. This map is used to assign unique IDs to sections to distinguish - // between sections with identical names but incompatible entry sizes and/or - // flags. This can occur when a symbol is explicitly assigned to a section, - // e.g. via __attribute__((section("myname"))). - std::map ELFEntrySizeMap; - - // This set is used to record the generic mergeable section names seen. - // These are sections that are created as mergeable e.g. .debug_str. We need - // to avoid assigning non-mergeable symbols to these sections. It is used - // to prevent non-mergeable symbols being explicitly assigned to mergeable - // sections (e.g. via _attribute_((section("myname")))). - DenseSet ELFSeenGenericMergeableSections; - - public: - explicit MCContext(const Triple &TheTriple, const MCAsmInfo *MAI, - const MCRegisterInfo *MRI, const MCSubtargetInfo *MSTI, - const SourceMgr *Mgr = nullptr, - MCTargetOptions const *TargetOpts = nullptr, - bool DoAutoReset = true, - StringRef Swift5ReflSegmentName = {}); - MCContext(const MCContext &) = delete; - MCContext &operator=(const MCContext &) = delete; - ~MCContext(); - - Environment getObjectFileType() const { return Env; } - - const StringRef &getSwift5ReflectionSegmentName() const { - return Swift5ReflectionSegmentName; + /// Allocator object used for creating machine code objects. + /// + /// We use a bump pointer allocator to avoid the need to track all allocated + /// objects. + BumpPtrAllocator Allocator; + + SpecificBumpPtrAllocator COFFAllocator; + SpecificBumpPtrAllocator DXCAllocator; + SpecificBumpPtrAllocator ELFAllocator; + SpecificBumpPtrAllocator MachOAllocator; + SpecificBumpPtrAllocator GOFFAllocator; + SpecificBumpPtrAllocator SPIRVAllocator; + SpecificBumpPtrAllocator WasmAllocator; + SpecificBumpPtrAllocator XCOFFAllocator; + SpecificBumpPtrAllocator MCInstAllocator; + + /// Bindings of names to symbols. + SymbolTable Symbols; + + /// A mapping from a local label number and an instance count to a symbol. + /// For example, in the assembly + /// 1: + /// 2: + /// 1: + /// We have three labels represented by the pairs (1, 0), (2, 0) and (1, 1) + DenseMap, MCSymbol *> LocalSymbols; + + /// Keeps tracks of names that were used both for used declared and + /// artificial symbols. The value is "true" if the name has been used for a + /// non-section symbol (there can be at most one of those, plus an unlimited + /// number of section symbols with the same name). + StringMap UsedNames; + + /// Keeps track of labels that are used in inline assembly. + SymbolTable InlineAsmUsedLabelNames; + + /// The next ID to dole out to an unnamed assembler temporary symbol with + /// a given prefix. + StringMap NextID; + + /// Instances of directional local labels. + DenseMap Instances; + /// NextInstance() creates the next instance of the directional local label + /// for the LocalLabelVal and adds it to the map if needed. + unsigned NextInstance(unsigned LocalLabelVal); + /// GetInstance() gets the current instance of the directional local label + /// for the LocalLabelVal and adds it to the map if needed. + unsigned GetInstance(unsigned LocalLabelVal); + + /// LLVM_BB_ADDR_MAP version to emit. + uint8_t BBAddrMapVersion = 1; + + /// The file name of the log file from the environment variable + /// AS_SECURE_LOG_FILE. Which must be set before the .secure_log_unique + /// directive is used or it is an error. + char *SecureLogFile; + /// The stream that gets written to for the .secure_log_unique directive. + std::unique_ptr SecureLog; + /// Boolean toggled when .secure_log_unique / .secure_log_reset is seen to + /// catch errors if .secure_log_unique appears twice without + /// .secure_log_reset appearing between them. + bool SecureLogUsed = false; + + /// The compilation directory to use for DW_AT_comp_dir. + SmallString<128> CompilationDir; + + /// Prefix replacement map for source file information. + std::map DebugPrefixMap; + + /// The main file name if passed in explicitly. + std::string MainFileName; + + /// The dwarf file and directory tables from the dwarf .file directive. + /// We now emit a line table for each compile unit. To reduce the prologue + /// size of each line table, the files and directories used by each compile + /// unit are separated. + std::map MCDwarfLineTablesCUMap; + + /// The current dwarf line information from the last dwarf .loc directive. + MCDwarfLoc CurrentDwarfLoc; + bool DwarfLocSeen = false; + + /// Generate dwarf debugging info for assembly source files. + bool GenDwarfForAssembly = false; + + /// The current dwarf file number when generate dwarf debugging info for + /// assembly source files. + unsigned GenDwarfFileNumber = 0; + + /// Sections for generating the .debug_ranges and .debug_aranges sections. + SetVector SectionsForRanges; + + /// The information gathered from labels that will have dwarf label + /// entries when generating dwarf assembly source files. + std::vector MCGenDwarfLabelEntries; + + /// The string to embed in the debug information for the compile unit, if + /// non-empty. + StringRef DwarfDebugFlags; + + /// The string to embed in as the dwarf AT_producer for the compile unit, if + /// non-empty. + StringRef DwarfDebugProducer; + + /// The maximum version of dwarf that we should emit. + uint16_t DwarfVersion = 4; + + /// The format of dwarf that we emit. + dwarf::DwarfFormat DwarfFormat = dwarf::DWARF32; + + /// Honor temporary labels, this is useful for debugging semantic + /// differences between temporary and non-temporary labels (primarily on + /// Darwin). + bool AllowTemporaryLabels = true; + bool UseNamesOnTempLabels = false; + + /// The Compile Unit ID that we are currently processing. + unsigned DwarfCompileUnitID = 0; + + /// A collection of MCPseudoProbe in the current module + MCPseudoProbeTable PseudoProbeTable; + + // Sections are differentiated by the quadruple (section_name, group_name, + // unique_id, link_to_symbol_name). Sections sharing the same quadruple are + // combined into one section. + struct ELFSectionKey { + std::string SectionName; + StringRef GroupName; + StringRef LinkedToName; + unsigned UniqueID; + + ELFSectionKey(StringRef SectionName, StringRef GroupName, + StringRef LinkedToName, unsigned UniqueID) + : SectionName(SectionName), GroupName(GroupName), + LinkedToName(LinkedToName), UniqueID(UniqueID) {} + + bool operator<(const ELFSectionKey &Other) const { + if (SectionName != Other.SectionName) + return SectionName < Other.SectionName; + if (GroupName != Other.GroupName) + return GroupName < Other.GroupName; + if (int O = LinkedToName.compare(Other.LinkedToName)) + return O < 0; + return UniqueID < Other.UniqueID; } - const Triple &getTargetTriple() const { return TT; } - const SourceMgr *getSourceManager() const { return SrcMgr; } + }; - void initInlineSourceManager(); - SourceMgr *getInlineSourceManager() { - return InlineSrcMgr.get(); - } - std::vector &getLocInfos() { return LocInfos; } - void setDiagnosticHandler(DiagHandlerTy DiagHandler) { - this->DiagHandler = DiagHandler; + struct COFFSectionKey { + std::string SectionName; + StringRef GroupName; + int SelectionKey; + unsigned UniqueID; + + COFFSectionKey(StringRef SectionName, StringRef GroupName, int SelectionKey, + unsigned UniqueID) + : SectionName(SectionName), GroupName(GroupName), + SelectionKey(SelectionKey), UniqueID(UniqueID) {} + + bool operator<(const COFFSectionKey &Other) const { + if (SectionName != Other.SectionName) + return SectionName < Other.SectionName; + if (GroupName != Other.GroupName) + return GroupName < Other.GroupName; + if (SelectionKey != Other.SelectionKey) + return SelectionKey < Other.SelectionKey; + return UniqueID < Other.UniqueID; } + }; - void setObjectFileInfo(const MCObjectFileInfo *Mofi) { MOFI = Mofi; } - - const MCAsmInfo *getAsmInfo() const { return MAI; } - - const MCRegisterInfo *getRegisterInfo() const { return MRI; } - - const MCObjectFileInfo *getObjectFileInfo() const { return MOFI; } - - const MCSubtargetInfo *getSubtargetInfo() const { return MSTI; } - - CodeViewContext &getCVContext(); - - void setAllowTemporaryLabels(bool Value) { AllowTemporaryLabels = Value; } - void setUseNamesOnTempLabels(bool Value) { UseNamesOnTempLabels = Value; } - - /// \name Module Lifetime Management - /// @{ - - /// reset - return object to right after construction state to prepare - /// to process a new module - void reset(); - - /// @} - - /// \name McInst Management - - /// Create and return a new MC instruction. - MCInst *createMCInst(); - - /// \name Symbol Management - /// @{ - - /// Create and return a new linker temporary symbol with a unique but - /// unspecified name. - MCSymbol *createLinkerPrivateTempSymbol(); - - /// Create a temporary symbol with a unique name. The name will be omitted - /// in the symbol table if UseNamesOnTempLabels is false (default except - /// MCAsmStreamer). The overload without Name uses an unspecified name. - MCSymbol *createTempSymbol(); - MCSymbol *createTempSymbol(const Twine &Name, bool AlwaysAddSuffix = true); - - /// Create a temporary symbol with a unique name whose name cannot be - /// omitted in the symbol table. This is rarely used. - MCSymbol *createNamedTempSymbol(); - MCSymbol *createNamedTempSymbol(const Twine &Name); - - /// Create the definition of a directional local symbol for numbered label - /// (used for "1:" definitions). - MCSymbol *createDirectionalLocalSymbol(unsigned LocalLabelVal); - - /// Create and return a directional local symbol for numbered label (used - /// for "1b" or 1f" references). - MCSymbol *getDirectionalLocalSymbol(unsigned LocalLabelVal, bool Before); - - /// Lookup the symbol inside with the specified \p Name. If it exists, - /// return it. If not, create a forward reference and return it. - /// - /// \param Name - The symbol name, which must be unique across all symbols. - MCSymbol *getOrCreateSymbol(const Twine &Name); + struct WasmSectionKey { + std::string SectionName; + StringRef GroupName; + unsigned UniqueID; + + WasmSectionKey(StringRef SectionName, StringRef GroupName, + unsigned UniqueID) + : SectionName(SectionName), GroupName(GroupName), UniqueID(UniqueID) {} + + bool operator<(const WasmSectionKey &Other) const { + if (SectionName != Other.SectionName) + return SectionName < Other.SectionName; + if (GroupName != Other.GroupName) + return GroupName < Other.GroupName; + return UniqueID < Other.UniqueID; + } + }; - /// Gets a symbol that will be defined to the final stack offset of a local - /// variable after codegen. - /// - /// \param Idx - The index of a local variable passed to \@llvm.localescape. - MCSymbol *getOrCreateFrameAllocSymbol(StringRef FuncName, unsigned Idx); + struct XCOFFSectionKey { + // Section name. + std::string SectionName; + // Section property. + // For csect section, it is storage mapping class. + // For debug section, it is section type flags. + union { + XCOFF::StorageMappingClass MappingClass; + XCOFF::DwarfSectionSubtypeFlags DwarfSubtypeFlags; + }; + bool IsCsect; + + XCOFFSectionKey(StringRef SectionName, + XCOFF::StorageMappingClass MappingClass) + : SectionName(SectionName), MappingClass(MappingClass), IsCsect(true) {} + + XCOFFSectionKey(StringRef SectionName, + XCOFF::DwarfSectionSubtypeFlags DwarfSubtypeFlags) + : SectionName(SectionName), DwarfSubtypeFlags(DwarfSubtypeFlags), + IsCsect(false) {} + + bool operator<(const XCOFFSectionKey &Other) const { + if (IsCsect && Other.IsCsect) + return std::tie(SectionName, MappingClass) < + std::tie(Other.SectionName, Other.MappingClass); + if (IsCsect != Other.IsCsect) + return IsCsect; + return std::tie(SectionName, DwarfSubtypeFlags) < + std::tie(Other.SectionName, Other.DwarfSubtypeFlags); + } + }; - MCSymbol *getOrCreateParentFrameOffsetSymbol(StringRef FuncName); + StringMap MachOUniquingMap; + std::map ELFUniquingMap; + std::map COFFUniquingMap; + std::map GOFFUniquingMap; + std::map WasmUniquingMap; + std::map XCOFFUniquingMap; + StringMap DXCUniquingMap; + StringMap RelSecNames; - MCSymbol *getOrCreateLSDASymbol(StringRef FuncName); + SpecificBumpPtrAllocator MCSubtargetAllocator; - /// Get the symbol for \p Name, or null. - MCSymbol *lookupSymbol(const Twine &Name) const; + /// Do automatic reset in destructor + bool AutoReset; - /// Set value for a symbol. - void setSymbolValue(MCStreamer &Streamer, StringRef Sym, uint64_t Val); + MCTargetOptions const *TargetOptions; - /// getSymbols - Get a reference for the symbol table for clients that - /// want to, for example, iterate over all symbols. 'const' because we - /// still want any modifications to the table itself to use the MCContext - /// APIs. - const SymbolTable &getSymbols() const { return Symbols; } + bool HadError = false; - /// isInlineAsmLabel - Return true if the name is a label referenced in - /// inline assembly. - MCSymbol *getInlineAsmLabel(StringRef Name) const { - return InlineAsmUsedLabelNames.lookup(Name); - } + void reportCommon(SMLoc Loc, + std::function); - /// registerInlineAsmLabel - Records that the name is a label referenced in - /// inline assembly. - void registerInlineAsmLabel(MCSymbol *Sym); + MCSymbol *createSymbolImpl(const StringMapEntry *Name, + bool CanBeUnnamed); + MCSymbol *createSymbol(StringRef Name, bool AlwaysAddSuffix, + bool IsTemporary); - /// @} + MCSymbol *getOrCreateDirectionalLocalSymbol(unsigned LocalLabelVal, + unsigned Instance); - /// \name Section Management - /// @{ + MCSectionELF *createELFSectionImpl(StringRef Section, unsigned Type, + unsigned Flags, SectionKind K, + unsigned EntrySize, + const MCSymbolELF *Group, bool IsComdat, + unsigned UniqueID, + const MCSymbolELF *LinkedToSym); - enum : unsigned { - /// Pass this value as the UniqueID during section creation to get the - /// generic section with the given name and characteristics. The usual - /// sections such as .text use this ID. - GenericSectionID = ~0U - }; + MCSymbolXCOFF *createXCOFFSymbolImpl(const StringMapEntry *Name, + bool IsTemporary); - /// Return the MCSection for the specified mach-o section. This requires - /// the operands to be valid. - MCSectionMachO *getMachOSection(StringRef Segment, StringRef Section, - unsigned TypeAndAttributes, - unsigned Reserved2, SectionKind K, - const char *BeginSymName = nullptr); - - MCSectionMachO *getMachOSection(StringRef Segment, StringRef Section, - unsigned TypeAndAttributes, SectionKind K, - const char *BeginSymName = nullptr) { - return getMachOSection(Segment, Section, TypeAndAttributes, 0, K, - BeginSymName); - } + /// Map of currently defined macros. + StringMap MacroMap; - MCSectionELF *getELFSection(const Twine &Section, unsigned Type, - unsigned Flags) { - return getELFSection(Section, Type, Flags, 0, "", false); - } + struct ELFEntrySizeKey { + std::string SectionName; + unsigned Flags; + unsigned EntrySize; - MCSectionELF *getELFSection(const Twine &Section, unsigned Type, - unsigned Flags, unsigned EntrySize) { - return getELFSection(Section, Type, Flags, EntrySize, "", false, - MCSection::NonUniqueID, nullptr); - } + ELFEntrySizeKey(StringRef SectionName, unsigned Flags, unsigned EntrySize) + : SectionName(SectionName), Flags(Flags), EntrySize(EntrySize) {} - MCSectionELF *getELFSection(const Twine &Section, unsigned Type, - unsigned Flags, unsigned EntrySize, - const Twine &Group, bool IsComdat) { - return getELFSection(Section, Type, Flags, EntrySize, Group, IsComdat, - MCSection::NonUniqueID, nullptr); + bool operator<(const ELFEntrySizeKey &Other) const { + if (SectionName != Other.SectionName) + return SectionName < Other.SectionName; + if (Flags != Other.Flags) + return Flags < Other.Flags; + return EntrySize < Other.EntrySize; } + }; - MCSectionELF *getELFSection(const Twine &Section, unsigned Type, - unsigned Flags, unsigned EntrySize, - const Twine &Group, bool IsComdat, - unsigned UniqueID, - const MCSymbolELF *LinkedToSym); - - MCSectionELF *getELFSection(const Twine &Section, unsigned Type, - unsigned Flags, unsigned EntrySize, - const MCSymbolELF *Group, bool IsComdat, - unsigned UniqueID, - const MCSymbolELF *LinkedToSym); - - /// Get a section with the provided group identifier. This section is - /// named by concatenating \p Prefix with '.' then \p Suffix. The \p Type - /// describes the type of the section and \p Flags are used to further - /// configure this named section. - MCSectionELF *getELFNamedSection(const Twine &Prefix, const Twine &Suffix, - unsigned Type, unsigned Flags, - unsigned EntrySize = 0); - - MCSectionELF *createELFRelSection(const Twine &Name, unsigned Type, - unsigned Flags, unsigned EntrySize, - const MCSymbolELF *Group, - const MCSectionELF *RelInfoSection); - - void renameELFSection(MCSectionELF *Section, StringRef Name); + // Symbols must be assigned to a section with a compatible entry size and + // flags. This map is used to assign unique IDs to sections to distinguish + // between sections with identical names but incompatible entry sizes and/or + // flags. This can occur when a symbol is explicitly assigned to a section, + // e.g. via __attribute__((section("myname"))). + std::map ELFEntrySizeMap; - MCSectionELF *createELFGroupSection(const MCSymbolELF *Group, - bool IsComdat); + // This set is used to record the generic mergeable section names seen. + // These are sections that are created as mergeable e.g. .debug_str. We need + // to avoid assigning non-mergeable symbols to these sections. It is used + // to prevent non-mergeable symbols being explicitly assigned to mergeable + // sections (e.g. via _attribute_((section("myname")))). + DenseSet ELFSeenGenericMergeableSections; - void recordELFMergeableSectionInfo(StringRef SectionName, unsigned Flags, - unsigned UniqueID, unsigned EntrySize); +public: + explicit MCContext(const Triple &TheTriple, const MCAsmInfo *MAI, + const MCRegisterInfo *MRI, const MCSubtargetInfo *MSTI, + const SourceMgr *Mgr = nullptr, + MCTargetOptions const *TargetOpts = nullptr, + bool DoAutoReset = true, + StringRef Swift5ReflSegmentName = {}); + MCContext(const MCContext &) = delete; + MCContext &operator=(const MCContext &) = delete; + ~MCContext(); - bool isELFImplicitMergeableSectionNamePrefix(StringRef Name); + Environment getObjectFileType() const { return Env; } - bool isELFGenericMergeableSection(StringRef Name); + const StringRef &getSwift5ReflectionSegmentName() const { + return Swift5ReflectionSegmentName; + } + const Triple &getTargetTriple() const { return TT; } + const SourceMgr *getSourceManager() const { return SrcMgr; } - /// Return the unique ID of the section with the given name, flags and entry - /// size, if it exists. - Optional getELFUniqueIDForEntsize(StringRef SectionName, - unsigned Flags, - unsigned EntrySize); + void initInlineSourceManager(); + SourceMgr *getInlineSourceManager() { return InlineSrcMgr.get(); } + std::vector &getLocInfos() { return LocInfos; } + void setDiagnosticHandler(DiagHandlerTy DiagHandler) { + this->DiagHandler = DiagHandler; + } - MCSectionGOFF *getGOFFSection(StringRef Section, SectionKind Kind); + void setObjectFileInfo(const MCObjectFileInfo *Mofi) { MOFI = Mofi; } - MCSectionCOFF *getCOFFSection(StringRef Section, unsigned Characteristics, - SectionKind Kind, StringRef COMDATSymName, - int Selection, - unsigned UniqueID = GenericSectionID, - const char *BeginSymName = nullptr); + const MCAsmInfo *getAsmInfo() const { return MAI; } - MCSectionCOFF *getCOFFSection(StringRef Section, unsigned Characteristics, - SectionKind Kind, - const char *BeginSymName = nullptr); - - /// Gets or creates a section equivalent to Sec that is associated with the - /// section containing KeySym. For example, to create a debug info section - /// associated with an inline function, pass the normal debug info section - /// as Sec and the function symbol as KeySym. - MCSectionCOFF * - getAssociativeCOFFSection(MCSectionCOFF *Sec, const MCSymbol *KeySym, - unsigned UniqueID = GenericSectionID); - - MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, - unsigned Flags = 0) { - return getWasmSection(Section, K, Flags, nullptr); - } + const MCRegisterInfo *getRegisterInfo() const { return MRI; } - MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, - unsigned Flags, const char *BeginSymName) { - return getWasmSection(Section, K, Flags, "", ~0, BeginSymName); - } + const MCObjectFileInfo *getObjectFileInfo() const { return MOFI; } - MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, - unsigned Flags, const Twine &Group, - unsigned UniqueID) { - return getWasmSection(Section, K, Flags, Group, UniqueID, nullptr); - } + const MCSubtargetInfo *getSubtargetInfo() const { return MSTI; } - MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, - unsigned Flags, const Twine &Group, - unsigned UniqueID, const char *BeginSymName); + CodeViewContext &getCVContext(); - MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, - unsigned Flags, const MCSymbolWasm *Group, - unsigned UniqueID, const char *BeginSymName); + void setAllowTemporaryLabels(bool Value) { AllowTemporaryLabels = Value; } + void setUseNamesOnTempLabels(bool Value) { UseNamesOnTempLabels = Value; } - MCSectionXCOFF *getXCOFFSection( - StringRef Section, SectionKind K, - Optional CsectProp = None, - bool MultiSymbolsAllowed = false, const char *BeginSymName = nullptr, - Optional DwarfSubtypeFlags = None); + /// \name Module Lifetime Management + /// @{ - // Create and save a copy of STI and return a reference to the copy. - MCSubtargetInfo &getSubtargetCopy(const MCSubtargetInfo &STI); + /// reset - return object to right after construction state to prepare + /// to process a new module + void reset(); - /// @} + /// @} - /// \name Dwarf Management - /// @{ + /// \name McInst Management - /// Get the compilation directory for DW_AT_comp_dir - /// The compilation directory should be set with \c setCompilationDir before - /// calling this function. If it is unset, an empty string will be returned. - StringRef getCompilationDir() const { return CompilationDir; } + /// Create and return a new MC instruction. + MCInst *createMCInst(); - /// Set the compilation directory for DW_AT_comp_dir - void setCompilationDir(StringRef S) { CompilationDir = S.str(); } + /// \name Symbol Management + /// @{ - /// Add an entry to the debug prefix map. - void addDebugPrefixMapEntry(const std::string &From, const std::string &To); + /// Create and return a new linker temporary symbol with a unique but + /// unspecified name. + MCSymbol *createLinkerPrivateTempSymbol(); - // Remaps all debug directory paths in-place as per the debug prefix map. - void RemapDebugPaths(); + /// Create a temporary symbol with a unique name. The name will be omitted + /// in the symbol table if UseNamesOnTempLabels is false (default except + /// MCAsmStreamer). The overload without Name uses an unspecified name. + MCSymbol *createTempSymbol(); + MCSymbol *createTempSymbol(const Twine &Name, bool AlwaysAddSuffix = true); - /// Get the main file name for use in error messages and debug - /// info. This can be set to ensure we've got the correct file name - /// after preprocessing or for -save-temps. - const std::string &getMainFileName() const { return MainFileName; } + /// Create a temporary symbol with a unique name whose name cannot be + /// omitted in the symbol table. This is rarely used. + MCSymbol *createNamedTempSymbol(); + MCSymbol *createNamedTempSymbol(const Twine &Name); - /// Set the main file name and override the default. - void setMainFileName(StringRef S) { MainFileName = std::string(S); } + /// Create the definition of a directional local symbol for numbered label + /// (used for "1:" definitions). + MCSymbol *createDirectionalLocalSymbol(unsigned LocalLabelVal); - /// Creates an entry in the dwarf file and directory tables. - Expected getDwarfFile(StringRef Directory, StringRef FileName, - unsigned FileNumber, - Optional Checksum, - Optional Source, unsigned CUID); + /// Create and return a directional local symbol for numbered label (used + /// for "1b" or 1f" references). + MCSymbol *getDirectionalLocalSymbol(unsigned LocalLabelVal, bool Before); - bool isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID = 0); + /// Lookup the symbol inside with the specified \p Name. If it exists, + /// return it. If not, create a forward reference and return it. + /// + /// \param Name - The symbol name, which must be unique across all symbols. + MCSymbol *getOrCreateSymbol(const Twine &Name); - const std::map &getMCDwarfLineTables() const { - return MCDwarfLineTablesCUMap; - } + /// Gets a symbol that will be defined to the final stack offset of a local + /// variable after codegen. + /// + /// \param Idx - The index of a local variable passed to \@llvm.localescape. + MCSymbol *getOrCreateFrameAllocSymbol(StringRef FuncName, unsigned Idx); - MCDwarfLineTable &getMCDwarfLineTable(unsigned CUID) { - return MCDwarfLineTablesCUMap[CUID]; - } + MCSymbol *getOrCreateParentFrameOffsetSymbol(StringRef FuncName); - const MCDwarfLineTable &getMCDwarfLineTable(unsigned CUID) const { - auto I = MCDwarfLineTablesCUMap.find(CUID); - assert(I != MCDwarfLineTablesCUMap.end()); - return I->second; - } + MCSymbol *getOrCreateLSDASymbol(StringRef FuncName); - const SmallVectorImpl &getMCDwarfFiles(unsigned CUID = 0) { - return getMCDwarfLineTable(CUID).getMCDwarfFiles(); - } + /// Get the symbol for \p Name, or null. + MCSymbol *lookupSymbol(const Twine &Name) const; - const SmallVectorImpl &getMCDwarfDirs(unsigned CUID = 0) { - return getMCDwarfLineTable(CUID).getMCDwarfDirs(); - } + /// Set value for a symbol. + void setSymbolValue(MCStreamer &Streamer, StringRef Sym, uint64_t Val); - unsigned getDwarfCompileUnitID() { return DwarfCompileUnitID; } + /// getSymbols - Get a reference for the symbol table for clients that + /// want to, for example, iterate over all symbols. 'const' because we + /// still want any modifications to the table itself to use the MCContext + /// APIs. + const SymbolTable &getSymbols() const { return Symbols; } - void setDwarfCompileUnitID(unsigned CUIndex) { - DwarfCompileUnitID = CUIndex; - } + /// isInlineAsmLabel - Return true if the name is a label referenced in + /// inline assembly. + MCSymbol *getInlineAsmLabel(StringRef Name) const { + return InlineAsmUsedLabelNames.lookup(Name); + } - /// Specifies the "root" file and directory of the compilation unit. - /// These are "file 0" and "directory 0" in DWARF v5. - void setMCLineTableRootFile(unsigned CUID, StringRef CompilationDir, - StringRef Filename, - Optional Checksum, - Optional Source) { - getMCDwarfLineTable(CUID).setRootFile(CompilationDir, Filename, Checksum, - Source); - } + /// registerInlineAsmLabel - Records that the name is a label referenced in + /// inline assembly. + void registerInlineAsmLabel(MCSymbol *Sym); - /// Reports whether MD5 checksum usage is consistent (all-or-none). - bool isDwarfMD5UsageConsistent(unsigned CUID) const { - return getMCDwarfLineTable(CUID).isMD5UsageConsistent(); - } + /// @} - /// Saves the information from the currently parsed dwarf .loc directive - /// and sets DwarfLocSeen. When the next instruction is assembled an entry - /// in the line number table with this information and the address of the - /// instruction will be created. - void setCurrentDwarfLoc(unsigned FileNum, unsigned Line, unsigned Column, - unsigned Flags, unsigned Isa, - unsigned Discriminator) { - CurrentDwarfLoc.setFileNum(FileNum); - CurrentDwarfLoc.setLine(Line); - CurrentDwarfLoc.setColumn(Column); - CurrentDwarfLoc.setFlags(Flags); - CurrentDwarfLoc.setIsa(Isa); - CurrentDwarfLoc.setDiscriminator(Discriminator); - DwarfLocSeen = true; - } + /// \name Section Management + /// @{ - void clearDwarfLocSeen() { DwarfLocSeen = false; } + enum : unsigned { + /// Pass this value as the UniqueID during section creation to get the + /// generic section with the given name and characteristics. The usual + /// sections such as .text use this ID. + GenericSectionID = ~0U + }; - bool getDwarfLocSeen() { return DwarfLocSeen; } - const MCDwarfLoc &getCurrentDwarfLoc() { return CurrentDwarfLoc; } + /// Return the MCSection for the specified mach-o section. This requires + /// the operands to be valid. + MCSectionMachO *getMachOSection(StringRef Segment, StringRef Section, + unsigned TypeAndAttributes, + unsigned Reserved2, SectionKind K, + const char *BeginSymName = nullptr); - bool getGenDwarfForAssembly() { return GenDwarfForAssembly; } - void setGenDwarfForAssembly(bool Value) { GenDwarfForAssembly = Value; } - unsigned getGenDwarfFileNumber() { return GenDwarfFileNumber; } + MCSectionMachO *getMachOSection(StringRef Segment, StringRef Section, + unsigned TypeAndAttributes, SectionKind K, + const char *BeginSymName = nullptr) { + return getMachOSection(Segment, Section, TypeAndAttributes, 0, K, + BeginSymName); + } + + MCSectionELF *getELFSection(const Twine &Section, unsigned Type, + unsigned Flags) { + return getELFSection(Section, Type, Flags, 0, "", false); + } + + MCSectionELF *getELFSection(const Twine &Section, unsigned Type, + unsigned Flags, unsigned EntrySize) { + return getELFSection(Section, Type, Flags, EntrySize, "", false, + MCSection::NonUniqueID, nullptr); + } + + MCSectionELF *getELFSection(const Twine &Section, unsigned Type, + unsigned Flags, unsigned EntrySize, + const Twine &Group, bool IsComdat) { + return getELFSection(Section, Type, Flags, EntrySize, Group, IsComdat, + MCSection::NonUniqueID, nullptr); + } + + MCSectionELF *getELFSection(const Twine &Section, unsigned Type, + unsigned Flags, unsigned EntrySize, + const Twine &Group, bool IsComdat, + unsigned UniqueID, + const MCSymbolELF *LinkedToSym); + + MCSectionELF *getELFSection(const Twine &Section, unsigned Type, + unsigned Flags, unsigned EntrySize, + const MCSymbolELF *Group, bool IsComdat, + unsigned UniqueID, + const MCSymbolELF *LinkedToSym); + + /// Get a section with the provided group identifier. This section is + /// named by concatenating \p Prefix with '.' then \p Suffix. The \p Type + /// describes the type of the section and \p Flags are used to further + /// configure this named section. + MCSectionELF *getELFNamedSection(const Twine &Prefix, const Twine &Suffix, + unsigned Type, unsigned Flags, + unsigned EntrySize = 0); + + MCSectionELF *createELFRelSection(const Twine &Name, unsigned Type, + unsigned Flags, unsigned EntrySize, + const MCSymbolELF *Group, + const MCSectionELF *RelInfoSection); + + void renameELFSection(MCSectionELF *Section, StringRef Name); + + MCSectionELF *createELFGroupSection(const MCSymbolELF *Group, bool IsComdat); + + void recordELFMergeableSectionInfo(StringRef SectionName, unsigned Flags, + unsigned UniqueID, unsigned EntrySize); + + bool isELFImplicitMergeableSectionNamePrefix(StringRef Name); + + bool isELFGenericMergeableSection(StringRef Name); + + /// Return the unique ID of the section with the given name, flags and entry + /// size, if it exists. + Optional getELFUniqueIDForEntsize(StringRef SectionName, + unsigned Flags, + unsigned EntrySize); + + MCSectionGOFF *getGOFFSection(StringRef Section, SectionKind Kind, + MCSection *Parent, const MCExpr *SubsectionId); + + MCSectionCOFF *getCOFFSection(StringRef Section, unsigned Characteristics, + SectionKind Kind, StringRef COMDATSymName, + int Selection, + unsigned UniqueID = GenericSectionID, + const char *BeginSymName = nullptr); + + MCSectionCOFF *getCOFFSection(StringRef Section, unsigned Characteristics, + SectionKind Kind, + const char *BeginSymName = nullptr); + + /// Gets or creates a section equivalent to Sec that is associated with the + /// section containing KeySym. For example, to create a debug info section + /// associated with an inline function, pass the normal debug info section + /// as Sec and the function symbol as KeySym. + MCSectionCOFF * + getAssociativeCOFFSection(MCSectionCOFF *Sec, const MCSymbol *KeySym, + unsigned UniqueID = GenericSectionID); + + MCSectionSPIRV *getSPIRVSection(); + + MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, + unsigned Flags = 0) { + return getWasmSection(Section, K, Flags, nullptr); + } + + MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, + unsigned Flags, const char *BeginSymName) { + return getWasmSection(Section, K, Flags, "", ~0, BeginSymName); + } + + MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, + unsigned Flags, const Twine &Group, + unsigned UniqueID) { + return getWasmSection(Section, K, Flags, Group, UniqueID, nullptr); + } + + MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, + unsigned Flags, const Twine &Group, + unsigned UniqueID, const char *BeginSymName); + + MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, + unsigned Flags, const MCSymbolWasm *Group, + unsigned UniqueID, const char *BeginSymName); + + /// Get the section for the provided Section name + MCSectionDXContainer *getDXContainerSection(StringRef Section, SectionKind K); + + bool hasXCOFFSection(StringRef Section, + XCOFF::CsectProperties CsectProp) const; + + MCSectionXCOFF *getXCOFFSection( + StringRef Section, SectionKind K, + Optional CsectProp = None, + bool MultiSymbolsAllowed = false, const char *BeginSymName = nullptr, + Optional DwarfSubtypeFlags = None); + + // Create and save a copy of STI and return a reference to the copy. + MCSubtargetInfo &getSubtargetCopy(const MCSubtargetInfo &STI); + + uint8_t getBBAddrMapVersion() const { return BBAddrMapVersion; } + + /// @} + + /// \name Dwarf Management + /// @{ + + /// Get the compilation directory for DW_AT_comp_dir + /// The compilation directory should be set with \c setCompilationDir before + /// calling this function. If it is unset, an empty string will be returned. + StringRef getCompilationDir() const { return CompilationDir; } + + /// Set the compilation directory for DW_AT_comp_dir + void setCompilationDir(StringRef S) { CompilationDir = S.str(); } + + /// Add an entry to the debug prefix map. + void addDebugPrefixMapEntry(const std::string &From, const std::string &To); + + // Remaps all debug directory paths in-place as per the debug prefix map. + void RemapDebugPaths(); + + /// Get the main file name for use in error messages and debug + /// info. This can be set to ensure we've got the correct file name + /// after preprocessing or for -save-temps. + const std::string &getMainFileName() const { return MainFileName; } + + /// Set the main file name and override the default. + void setMainFileName(StringRef S) { MainFileName = std::string(S); } + + /// Creates an entry in the dwarf file and directory tables. + Expected getDwarfFile(StringRef Directory, StringRef FileName, + unsigned FileNumber, + Optional Checksum, + Optional Source, unsigned CUID); + + bool isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID = 0); + + const std::map &getMCDwarfLineTables() const { + return MCDwarfLineTablesCUMap; + } + + MCDwarfLineTable &getMCDwarfLineTable(unsigned CUID) { + return MCDwarfLineTablesCUMap[CUID]; + } + + const MCDwarfLineTable &getMCDwarfLineTable(unsigned CUID) const { + auto I = MCDwarfLineTablesCUMap.find(CUID); + assert(I != MCDwarfLineTablesCUMap.end()); + return I->second; + } + + const SmallVectorImpl &getMCDwarfFiles(unsigned CUID = 0) { + return getMCDwarfLineTable(CUID).getMCDwarfFiles(); + } + + const SmallVectorImpl &getMCDwarfDirs(unsigned CUID = 0) { + return getMCDwarfLineTable(CUID).getMCDwarfDirs(); + } - void setGenDwarfFileNumber(unsigned FileNumber) { - GenDwarfFileNumber = FileNumber; - } + unsigned getDwarfCompileUnitID() { return DwarfCompileUnitID; } - /// Specifies information about the "root file" for assembler clients - /// (e.g., llvm-mc). Assumes compilation dir etc. have been set up. - void setGenDwarfRootFile(StringRef FileName, StringRef Buffer); + void setDwarfCompileUnitID(unsigned CUIndex) { DwarfCompileUnitID = CUIndex; } + + /// Specifies the "root" file and directory of the compilation unit. + /// These are "file 0" and "directory 0" in DWARF v5. + void setMCLineTableRootFile(unsigned CUID, StringRef CompilationDir, + StringRef Filename, + Optional Checksum, + Optional Source) { + getMCDwarfLineTable(CUID).setRootFile(CompilationDir, Filename, Checksum, + Source); + } + + /// Reports whether MD5 checksum usage is consistent (all-or-none). + bool isDwarfMD5UsageConsistent(unsigned CUID) const { + return getMCDwarfLineTable(CUID).isMD5UsageConsistent(); + } - const SetVector &getGenDwarfSectionSyms() { - return SectionsForRanges; - } + /// Saves the information from the currently parsed dwarf .loc directive + /// and sets DwarfLocSeen. When the next instruction is assembled an entry + /// in the line number table with this information and the address of the + /// instruction will be created. + void setCurrentDwarfLoc(unsigned FileNum, unsigned Line, unsigned Column, + unsigned Flags, unsigned Isa, + unsigned Discriminator) { + CurrentDwarfLoc.setFileNum(FileNum); + CurrentDwarfLoc.setLine(Line); + CurrentDwarfLoc.setColumn(Column); + CurrentDwarfLoc.setFlags(Flags); + CurrentDwarfLoc.setIsa(Isa); + CurrentDwarfLoc.setDiscriminator(Discriminator); + DwarfLocSeen = true; + } - bool addGenDwarfSection(MCSection *Sec) { - return SectionsForRanges.insert(Sec); - } + void clearDwarfLocSeen() { DwarfLocSeen = false; } - void finalizeDwarfSections(MCStreamer &MCOS); + bool getDwarfLocSeen() { return DwarfLocSeen; } + const MCDwarfLoc &getCurrentDwarfLoc() { return CurrentDwarfLoc; } - const std::vector &getMCGenDwarfLabelEntries() const { - return MCGenDwarfLabelEntries; - } + bool getGenDwarfForAssembly() { return GenDwarfForAssembly; } + void setGenDwarfForAssembly(bool Value) { GenDwarfForAssembly = Value; } + unsigned getGenDwarfFileNumber() { return GenDwarfFileNumber; } + EmitDwarfUnwindType emitDwarfUnwindInfo() const; - void addMCGenDwarfLabelEntry(const MCGenDwarfLabelEntry &E) { - MCGenDwarfLabelEntries.push_back(E); - } + void setGenDwarfFileNumber(unsigned FileNumber) { + GenDwarfFileNumber = FileNumber; + } - void setDwarfDebugFlags(StringRef S) { DwarfDebugFlags = S; } - StringRef getDwarfDebugFlags() { return DwarfDebugFlags; } + /// Specifies information about the "root file" for assembler clients + /// (e.g., llvm-mc). Assumes compilation dir etc. have been set up. + void setGenDwarfRootFile(StringRef FileName, StringRef Buffer); - void setDwarfDebugProducer(StringRef S) { DwarfDebugProducer = S; } - StringRef getDwarfDebugProducer() { return DwarfDebugProducer; } + const SetVector &getGenDwarfSectionSyms() { + return SectionsForRanges; + } - void setDwarfFormat(dwarf::DwarfFormat f) { DwarfFormat = f; } - dwarf::DwarfFormat getDwarfFormat() const { return DwarfFormat; } + bool addGenDwarfSection(MCSection *Sec) { + return SectionsForRanges.insert(Sec); + } - void setDwarfVersion(uint16_t v) { DwarfVersion = v; } - uint16_t getDwarfVersion() const { return DwarfVersion; } + void finalizeDwarfSections(MCStreamer &MCOS); + + const std::vector &getMCGenDwarfLabelEntries() const { + return MCGenDwarfLabelEntries; + } - /// @} + void addMCGenDwarfLabelEntry(const MCGenDwarfLabelEntry &E) { + MCGenDwarfLabelEntries.push_back(E); + } + + void setDwarfDebugFlags(StringRef S) { DwarfDebugFlags = S; } + StringRef getDwarfDebugFlags() { return DwarfDebugFlags; } - char *getSecureLogFile() { return SecureLogFile; } - raw_fd_ostream *getSecureLog() { return SecureLog.get(); } + void setDwarfDebugProducer(StringRef S) { DwarfDebugProducer = S; } + StringRef getDwarfDebugProducer() { return DwarfDebugProducer; } - void setSecureLog(std::unique_ptr Value) { - SecureLog = std::move(Value); - } + void setDwarfFormat(dwarf::DwarfFormat f) { DwarfFormat = f; } + dwarf::DwarfFormat getDwarfFormat() const { return DwarfFormat; } + + void setDwarfVersion(uint16_t v) { DwarfVersion = v; } + uint16_t getDwarfVersion() const { return DwarfVersion; } + + /// @} - bool getSecureLogUsed() { return SecureLogUsed; } - void setSecureLogUsed(bool Value) { SecureLogUsed = Value; } + char *getSecureLogFile() { return SecureLogFile; } + raw_fd_ostream *getSecureLog() { return SecureLog.get(); } + + void setSecureLog(std::unique_ptr Value) { + SecureLog = std::move(Value); + } - void *allocate(unsigned Size, unsigned Align = 8) { - return Allocator.Allocate(Size, Align); - } + bool getSecureLogUsed() { return SecureLogUsed; } + void setSecureLogUsed(bool Value) { SecureLogUsed = Value; } - void deallocate(void *Ptr) {} + void *allocate(unsigned Size, unsigned Align = 8) { + return Allocator.Allocate(Size, Align); + } - bool hadError() { return HadError; } - void diagnose(const SMDiagnostic &SMD); - void reportError(SMLoc L, const Twine &Msg); - void reportWarning(SMLoc L, const Twine &Msg); + void deallocate(void *Ptr) {} - const MCAsmMacro *lookupMacro(StringRef Name) { - StringMap::iterator I = MacroMap.find(Name); - return (I == MacroMap.end()) ? nullptr : &I->getValue(); - } + bool hadError() { return HadError; } + void diagnose(const SMDiagnostic &SMD); + void reportError(SMLoc L, const Twine &Msg); + void reportWarning(SMLoc L, const Twine &Msg); + + const MCAsmMacro *lookupMacro(StringRef Name) { + StringMap::iterator I = MacroMap.find(Name); + return (I == MacroMap.end()) ? nullptr : &I->getValue(); + } - void defineMacro(StringRef Name, MCAsmMacro Macro) { - MacroMap.insert(std::make_pair(Name, std::move(Macro))); - } + void defineMacro(StringRef Name, MCAsmMacro Macro) { + MacroMap.insert(std::make_pair(Name, std::move(Macro))); + } - void undefineMacro(StringRef Name) { MacroMap.erase(Name); } + void undefineMacro(StringRef Name) { MacroMap.erase(Name); } - MCPseudoProbeTable &getMCPseudoProbeTable() { return PseudoProbeTable; } - }; + MCPseudoProbeTable &getMCPseudoProbeTable() { return PseudoProbeTable; } +}; } // end namespace llvm diff --git a/llvm/include/llvm/MC/MCDXContainerStreamer.h b/llvm/include/llvm/MC/MCDXContainerStreamer.h new file mode 100644 index 000000000000..ef1a95f71778 --- /dev/null +++ b/llvm/include/llvm/MC/MCDXContainerStreamer.h @@ -0,0 +1,49 @@ +//===- MCDXContainerStreamer.h - MCDXContainerStreamer Interface ---*- C++ ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Overrides MCObjectStreamer to disable all unnecessary features with stubs. +// The DXContainer format isn't a fully featured object format. It doesn't +// support symbols, and initially it will not support instruction data since it +// is used as a bitcode container for DXIL. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCDXCONTAINERSTREAMER_H +#define LLVM_MC_MCDXCONTAINERSTREAMER_H + +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCObjectWriter.h" + +namespace llvm { +class MCAssembler; +class MCExpr; +class MCInst; +class raw_ostream; + +class MCDXContainerStreamer : public MCObjectStreamer { +public: + MCDXContainerStreamer(MCContext &Context, std::unique_ptr TAB, + std::unique_ptr OW, + std::unique_ptr Emitter) + : MCObjectStreamer(Context, std::move(TAB), std::move(OW), + std::move(Emitter)) {} + + bool emitSymbolAttribute(MCSymbol *, MCSymbolAttr) override { return false; } + void emitCommonSymbol(MCSymbol *, uint64_t, unsigned) override {} + void emitZerofill(MCSection *, MCSymbol *Symbol = nullptr, uint64_t Size = 0, + unsigned ByteAlignment = 0, SMLoc Loc = SMLoc()) override {} + +private: + void emitInstToData(const MCInst &, const MCSubtargetInfo &) override; +}; + +} // end namespace llvm + +#endif // LLVM_MC_MCDXCONTAINERSTREAMER_H diff --git a/llvm/include/llvm/MC/MCDXContainerWriter.h b/llvm/include/llvm/MC/MCDXContainerWriter.h new file mode 100644 index 000000000000..8ecb86c8a16f --- /dev/null +++ b/llvm/include/llvm/MC/MCDXContainerWriter.h @@ -0,0 +1,45 @@ +//===- llvm/MC/MCDXContainerWriter.h - DXContainer Writer -*- C++ -------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCDXCONTAINERWRITER_H +#define LLVM_MC_MCDXCONTAINERWRITER_H + +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCObjectWriter.h" + +namespace llvm { + +class raw_pwrite_stream; + +class MCDXContainerTargetWriter : public MCObjectTargetWriter { +protected: + MCDXContainerTargetWriter() {} + +public: + virtual ~MCDXContainerTargetWriter(); + + Triple::ObjectFormatType getFormat() const override { + return Triple::DXContainer; + } + static bool classof(const MCObjectTargetWriter *W) { + return W->getFormat() == Triple::DXContainer; + } +}; + +/// Construct a new DXContainer writer instance. +/// +/// \param MOTW - The target specific DXContainer writer subclass. +/// \param OS - The stream to write to. +/// \returns The constructed object writer. +std::unique_ptr +createDXContainerObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS); + +} // end namespace llvm + +#endif // LLVM_MC_MCDXCONTAINERWRITER_H diff --git a/llvm/include/llvm/MC/MCDecoderOps.h b/llvm/include/llvm/MC/MCDecoderOps.h new file mode 100644 index 000000000000..c1956993fca2 --- /dev/null +++ b/llvm/include/llvm/MC/MCDecoderOps.h @@ -0,0 +1,33 @@ +//===------------ llvm/MC/MCDecoderOps.h - Decoder driver -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Disassembler decoder state machine driver. +//===----------------------------------------------------------------------===// +#ifndef LLVM_MC_MCDECODEROPS_H +#define LLVM_MC_MCDECODEROPS_H + +namespace llvm { + +namespace MCD { +// Disassembler state machine opcodes. +enum DecoderOps { + OPC_ExtractField = 1, // OPC_ExtractField(uint8_t Start, uint8_t Len) + OPC_FilterValue, // OPC_FilterValue(uleb128 Val, uint16_t NumToSkip) + OPC_CheckField, // OPC_CheckField(uint8_t Start, uint8_t Len, + // uleb128 Val, uint16_t NumToSkip) + OPC_CheckPredicate, // OPC_CheckPredicate(uleb128 PIdx, uint16_t NumToSkip) + OPC_Decode, // OPC_Decode(uleb128 Opcode, uleb128 DIdx) + OPC_TryDecode, // OPC_TryDecode(uleb128 Opcode, uleb128 DIdx, + // uint16_t NumToSkip) + OPC_SoftFail, // OPC_SoftFail(uleb128 PMask, uleb128 NMask) + OPC_Fail // OPC_Fail() +}; + +} // namespace MCD +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/MC/MCDirectives.h b/llvm/include/llvm/MC/MCDirectives.h index 51e57ad37021..d6ab29febeeb 100644 --- a/llvm/include/llvm/MC/MCDirectives.h +++ b/llvm/include/llvm/MC/MCDirectives.h @@ -31,6 +31,7 @@ enum MCSymbolAttr { MCSA_LGlobal, ///< .lglobl (XCOFF) MCSA_Extern, ///< .extern (XCOFF) MCSA_Hidden, ///< .hidden (ELF) + MCSA_Exported, ///< .globl _foo, exported (XCOFF) MCSA_IndirectSymbol, ///< .indirect_symbol (MachO) MCSA_Internal, ///< .internal (ELF) MCSA_LazyReference, ///< .lazy_reference (MachO) diff --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h index 10037cd66ef1..de069ff95c2f 100644 --- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h @@ -40,26 +40,35 @@ struct SymbolInfoTy { private: bool IsXCOFF; + bool HasType; public: SymbolInfoTy(uint64_t Addr, StringRef Name, Optional Smc, Optional Idx, bool Label) - : Addr(Addr), Name(Name), XCOFFSymInfo(Smc, Idx, Label), IsXCOFF(true) {} - SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type) - : Addr(Addr), Name(Name), Type(Type), IsXCOFF(false) {} + : Addr(Addr), Name(Name), XCOFFSymInfo(Smc, Idx, Label), IsXCOFF(true), + HasType(false) {} + SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type, + bool IsXCOFF = false) + : Addr(Addr), Name(Name), Type(Type), IsXCOFF(IsXCOFF), HasType(true) {} bool isXCOFF() const { return IsXCOFF; } private: friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) { - assert(P1.IsXCOFF == P2.IsXCOFF && - "P1.IsXCOFF should be equal to P2.IsXCOFF."); + assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) && + "The value of IsXCOFF and HasType in P1 and P2 should be the same " + "respectively."); + + if (P1.IsXCOFF && P1.HasType) + return std::tie(P1.Addr, P1.Type, P1.Name) < + std::tie(P2.Addr, P2.Type, P2.Name); + if (P1.IsXCOFF) return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) < std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name); return std::tie(P1.Addr, P1.Name, P1.Type) < - std::tie(P2.Addr, P2.Name, P2.Type); + std::tie(P2.Addr, P2.Name, P2.Type); } }; @@ -172,10 +181,9 @@ protected: public: // Helpers around MCSymbolizer - bool tryAddingSymbolicOperand(MCInst &Inst, - int64_t Value, - uint64_t Address, bool IsBranch, - uint64_t Offset, uint64_t InstSize) const; + bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, + bool IsBranch, uint64_t Offset, uint64_t OpSize, + uint64_t InstSize) const; void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const; diff --git a/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h b/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h index ffac5ee5cb1f..8af3bb2296ec 100644 --- a/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h @@ -15,7 +15,7 @@ #ifndef LLVM_MC_MCDISASSEMBLER_MCEXTERNALSYMBOLIZER_H #define LLVM_MC_MCDISASSEMBLER_MCEXTERNALSYMBOLIZER_H -#include "llvm-c/Disassembler.h" +#include "llvm-c/DisassemblerTypes.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" #include @@ -46,7 +46,8 @@ public: bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, bool IsBranch, - uint64_t Offset, uint64_t InstSize) override; + uint64_t Offset, uint64_t OpSize, + uint64_t InstSize) override; void tryAddingPcLoadReferenceComment(raw_ostream &CommentStream, int64_t Value, uint64_t Address) override; diff --git a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h index b966106007db..1efb63f1a142 100644 --- a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h @@ -17,9 +17,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" -#include #include #include +#include namespace llvm { @@ -63,12 +63,13 @@ public: /// \param Address - Load address of the instruction. /// \param IsBranch - Is the instruction a branch? /// \param Offset - Byte offset of the operand inside the inst. + /// \param OpSize - Size of the operand in bytes. /// \param InstSize - Size of the instruction in bytes. /// \return Whether a symbolic operand was added. virtual bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, - uint64_t InstSize) = 0; + uint64_t OpSize, uint64_t InstSize) = 0; /// Try to add a comment on the PC-relative load. /// For instance, in Mach-O, this is used to add annotations to instructions diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h index 7e72d56f3097..ce65b173b3d2 100644 --- a/llvm/include/llvm/MC/MCDwarf.h +++ b/llvm/include/llvm/MC/MCDwarf.h @@ -19,14 +19,12 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Error.h" #include "llvm/Support/MD5.h" #include #include #include -#include #include #include @@ -36,6 +34,7 @@ template class ArrayRef; class MCAsmBackend; class MCContext; class MCObjectStreamer; +class MCSection; class MCStreamer; class MCSymbol; class raw_ostream; @@ -63,6 +62,9 @@ public: /// Emit the .debug_line_str section if appropriate. void emitSection(MCStreamer *MCOS); + + /// Returns finalized section. + SmallString<0> getFinalizedData(); }; /// Instances of this class represent the name of the dwarf .file directive and @@ -294,8 +296,8 @@ public: RootFile.DirIndex = 0; RootFile.Checksum = Checksum; RootFile.Source = Source; - trackMD5Usage(Checksum.hasValue()); - HasSource = Source.hasValue(); + trackMD5Usage(Checksum.has_value()); + HasSource = Source.has_value(); } void resetFileTable() { @@ -686,6 +688,7 @@ struct MCDwarfFrameInfo { bool IsSimple = false; unsigned RAReg = static_cast(INT_MAX); bool IsBKeyFrame = false; + bool IsMTETaggedFrame = false; }; class MCDwarfFrameEmitter { diff --git a/llvm/include/llvm/MC/MCELFStreamer.h b/llvm/include/llvm/MC/MCELFStreamer.h index 8f2b176862c8..eac807aad908 100644 --- a/llvm/include/llvm/MC/MCELFStreamer.h +++ b/llvm/include/llvm/MC/MCELFStreamer.h @@ -10,12 +10,19 @@ #define LLVM_MC_MCELFSTREAMER_H #include "llvm/ADT/SmallVector.h" -#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCObjectStreamer.h" namespace llvm { +class MCContext; +class MCDataFragment; +class MCFragment; +class MCObjectWriter; +class MCSection; +class MCSubtargetInfo; +class MCSymbol; +class MCSymbolRefExpr; class MCAsmBackend; class MCCodeEmitter; class MCExpr; diff --git a/llvm/include/llvm/MC/MCFixedLenDisassembler.h b/llvm/include/llvm/MC/MCFixedLenDisassembler.h deleted file mode 100644 index 1edf3899c130..000000000000 --- a/llvm/include/llvm/MC/MCFixedLenDisassembler.h +++ /dev/null @@ -1,33 +0,0 @@ -//===-- llvm/MC/MCFixedLenDisassembler.h - Decoder driver -------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Fixed length disassembler decoder state machine driver. -//===----------------------------------------------------------------------===// -#ifndef LLVM_MC_MCFIXEDLENDISASSEMBLER_H -#define LLVM_MC_MCFIXEDLENDISASSEMBLER_H - -namespace llvm { - -namespace MCD { -// Disassembler state machine opcodes. -enum DecoderOps { - OPC_ExtractField = 1, // OPC_ExtractField(uint8_t Start, uint8_t Len) - OPC_FilterValue, // OPC_FilterValue(uleb128 Val, uint16_t NumToSkip) - OPC_CheckField, // OPC_CheckField(uint8_t Start, uint8_t Len, - // uleb128 Val, uint16_t NumToSkip) - OPC_CheckPredicate, // OPC_CheckPredicate(uleb128 PIdx, uint16_t NumToSkip) - OPC_Decode, // OPC_Decode(uleb128 Opcode, uleb128 DIdx) - OPC_TryDecode, // OPC_TryDecode(uleb128 Opcode, uleb128 DIdx, - // uint16_t NumToSkip) - OPC_SoftFail, // OPC_SoftFail(uleb128 PMask, uleb128 NMask) - OPC_Fail // OPC_Fail() -}; - -} // namespace MCD -} // namespace llvm - -#endif diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h index 736fdd992063..b6329b131624 100644 --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -17,7 +17,6 @@ #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Alignment.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/SMLoc.h" #include #include @@ -294,7 +293,7 @@ public: class MCAlignFragment : public MCFragment { /// The alignment to ensure, in bytes. - unsigned Alignment; + Align Alignment; /// Flag to indicate that (optimal) NOPs should be emitted instead /// of using the provided value. The exact interpretation of this flag is @@ -315,12 +314,12 @@ class MCAlignFragment : public MCFragment { const MCSubtargetInfo *STI; public: - MCAlignFragment(unsigned Alignment, int64_t Value, unsigned ValueSize, + MCAlignFragment(Align Alignment, int64_t Value, unsigned ValueSize, unsigned MaxBytesToEmit, MCSection *Sec = nullptr) : MCFragment(FT_Align, false, Sec), Alignment(Alignment), EmitNops(false), Value(Value), ValueSize(ValueSize), MaxBytesToEmit(MaxBytesToEmit) {} - unsigned getAlignment() const { return Alignment; } + Align getAlignment() const { return Alignment; } int64_t getValue() const { return Value; } diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h index 632a7d8f820e..a937f8203a0d 100644 --- a/llvm/include/llvm/MC/MCInstrAnalysis.h +++ b/llvm/include/llvm/MC/MCInstrAnalysis.h @@ -14,10 +14,13 @@ #ifndef LLVM_MC_MCINSTRANALYSIS_H #define LLVM_MC_MCINSTRANALYSIS_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include +#include namespace llvm { diff --git a/llvm/include/llvm/MC/MCInstrDesc.h b/llvm/include/llvm/MC/MCInstrDesc.h index e8ffd29170e6..120c3482ce70 100644 --- a/llvm/include/llvm/MC/MCInstrDesc.h +++ b/llvm/include/llvm/MC/MCInstrDesc.h @@ -14,10 +14,11 @@ #ifndef LLVM_MC_MCINSTRDESC_H #define LLVM_MC_MCINSTRDESC_H -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/DataTypes.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/MC/MCRegister.h" namespace llvm { +class MCRegisterInfo; class MCInst; @@ -148,6 +149,7 @@ enum Flag { Variadic, HasOptionalDef, Pseudo, + Meta, Return, EHScopeReturn, Call, @@ -263,6 +265,10 @@ public: /// correspond to a real machine instruction. bool isPseudo() const { return Flags & (1ULL << MCID::Pseudo); } + /// Return true if this is a meta instruction that doesn't + /// produce any output in the form of executable instructions. + bool isMetaInstruction() const { return Flags & (1ULL << MCID::Meta); } + /// Return true if the instruction is a return. bool isReturn() const { return Flags & (1ULL << MCID::Return); } diff --git a/llvm/include/llvm/MC/MCInstrInfo.h b/llvm/include/llvm/MC/MCInstrInfo.h index 598e24257e5d..84995b1e93fe 100644 --- a/llvm/include/llvm/MC/MCInstrInfo.h +++ b/llvm/include/llvm/MC/MCInstrInfo.h @@ -13,6 +13,7 @@ #ifndef LLVM_MC_MCINSTRINFO_H #define LLVM_MC_MCINSTRINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInstrDesc.h" #include diff --git a/llvm/include/llvm/MC/MCLinkerOptimizationHint.h b/llvm/include/llvm/MC/MCLinkerOptimizationHint.h index 003491f32f75..b91fbc62aa75 100644 --- a/llvm/include/llvm/MC/MCLinkerOptimizationHint.h +++ b/llvm/include/llvm/MC/MCLinkerOptimizationHint.h @@ -19,7 +19,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Support/raw_ostream.h" #include #include @@ -28,6 +27,7 @@ namespace llvm { class MachObjectWriter; class MCAsmLayout; class MCSymbol; +class raw_ostream; /// Linker Optimization Hint Type. enum MCLOHType { diff --git a/llvm/include/llvm/MC/MCMachObjectWriter.h b/llvm/include/llvm/MC/MCMachObjectWriter.h index f4f9c474cdcd..149373dd2b54 100644 --- a/llvm/include/llvm/MC/MCMachObjectWriter.h +++ b/llvm/include/llvm/MC/MCMachObjectWriter.h @@ -264,6 +264,8 @@ public: bool IsPCRel) const override; uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; + + void writeAddrsigSection(MCAssembler &Asm); }; /// Construct a new Mach-O writer instance. diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index 3c1d10c4e62f..ebc9b95d6d4e 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -13,13 +13,13 @@ #ifndef LLVM_MC_MCOBJECTFILEINFO_H #define LLVM_MC_MCOBJECTFILEINFO_H -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/Swift.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/CodeGen.h" #include "llvm/Support/VersionTuple.h" +#include + namespace llvm { class MCContext; class MCSection; @@ -213,6 +213,7 @@ protected: MCSection *LazySymbolPointerSection = nullptr; MCSection *NonLazySymbolPointerSection = nullptr; MCSection *ThreadLocalPointerSection = nullptr; + MCSection *AddrSigSection = nullptr; /// COFF specific sections. MCSection *DrectveSection = nullptr; @@ -224,6 +225,9 @@ protected: MCSection *GIATsSection = nullptr; MCSection *GLJMPSection = nullptr; + // GOFF specific sections. + MCSection *PPA1Section = nullptr; + // XCOFF specific sections MCSection *TOCBaseSection = nullptr; MCSection *ReadOnly8Section = nullptr; @@ -410,6 +414,7 @@ public: MCSection *getThreadLocalPointerSection() const { return ThreadLocalPointerSection; } + MCSection *getAddrSigSection() const { return AddrSigSection; } // COFF specific sections. MCSection *getDrectveSection() const { return DrectveSection; } @@ -421,6 +426,9 @@ public: MCSection *getGIATsSection() const { return GIATsSection; } MCSection *getGLJMPSection() const { return GLJMPSection; } + // GOFF specific sections. + MCSection *getPPA1Section() const { return PPA1Section; } + // XCOFF specific sections MCSection *getTOCBaseSection() const { return TOCBaseSection; } @@ -448,8 +456,10 @@ private: void initELFMCObjectFileInfo(const Triple &T, bool Large); void initGOFFMCObjectFileInfo(const Triple &T); void initCOFFMCObjectFileInfo(const Triple &T); + void initSPIRVMCObjectFileInfo(const Triple &T); void initWasmMCObjectFileInfo(const Triple &T); void initXCOFFMCObjectFileInfo(const Triple &T); + void initDXContainerObjectFileInfo(const Triple &T); MCSection *getDwarfComdatSection(const char *Name, uint64_t Hash) const; public: diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h index 183fd79fb9fc..6536c81d4aac 100644 --- a/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/llvm/include/llvm/MC/MCObjectStreamer.h @@ -11,11 +11,17 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" namespace llvm { +class MCContext; +class MCInst; +class MCObjectWriter; +class MCSymbol; +struct MCDwarfFrameInfo; class MCAssembler; class MCCodeEmitter; class MCSubtargetInfo; diff --git a/llvm/include/llvm/MC/MCObjectWriter.h b/llvm/include/llvm/MC/MCObjectWriter.h index d2a2f1a13ff5..a8e24a0c56ba 100644 --- a/llvm/include/llvm/MC/MCObjectWriter.h +++ b/llvm/include/llvm/MC/MCObjectWriter.h @@ -10,6 +10,7 @@ #define LLVM_MC_MCOBJECTWRITER_H #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCSymbol.h" #include namespace llvm { @@ -32,6 +33,9 @@ class MCValue; /// should be emitted as part of writeObject(). class MCObjectWriter { protected: + std::vector AddrsigSyms; + bool EmitAddrsigSection = false; + MCObjectWriter() = default; public: @@ -91,11 +95,15 @@ public: /// Tell the object writer to emit an address-significance table during /// writeObject(). If this function is not called, all symbols are treated as /// address-significant. - virtual void emitAddrsigSection() {} + void emitAddrsigSection() { EmitAddrsigSection = true; } + + bool getEmitAddrsigSection() { return EmitAddrsigSection; } /// Record the given symbol in the address-significance table to be written /// diring writeObject(). - virtual void addAddrsigSymbol(const MCSymbol *Sym) {} + void addAddrsigSymbol(const MCSymbol *Sym) { AddrsigSyms.push_back(Sym); } + + std::vector &getAddrsigSyms() { return AddrsigSyms; } /// Write the object file and returns the number of bytes written. /// diff --git a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h index 06796979b4fc..850a9cffe73a 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h @@ -12,10 +12,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCAsmMacro.h" -#include #include #include -#include #include namespace llvm { diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h index 29386ffc45ac..4a1291856a20 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -10,20 +10,20 @@ #define LLVM_MC_MCPARSER_MCASMPARSER_H #include "llvm/ADT/None.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCAsmMacro.h" #include "llvm/Support/SMLoc.h" #include -#include #include #include namespace llvm { +class MCAsmLexer; class MCAsmInfo; class MCAsmParserExtension; class MCContext; diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h b/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h index fc10e33bcf6b..cbabc2c9d69d 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h @@ -9,9 +9,8 @@ #ifndef LLVM_MC_MCPARSER_MCASMPARSEREXTENSION_H #define LLVM_MC_MCPARSER_MCASMPARSEREXTENSION_H -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/Support/SMLoc.h" diff --git a/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h index faf0a4474c8a..22f66a011ece 100644 --- a/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h +++ b/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h @@ -10,7 +10,6 @@ #define LLVM_MC_MCPARSER_MCPARSEDASMOPERAND_H #include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/SMLoc.h" #include @@ -63,6 +62,13 @@ public: /// isMem - Is this a memory operand? virtual bool isMem() const = 0; + /// isMemUseUpRegs - Is memory operand use up regs, for example, intel MS + /// inline asm may use ARR[baseReg + IndexReg + ...] which may use up regs + /// in [...] expr, so ARR[baseReg + IndexReg + ...] can not use extra reg + /// for ARR. For example, calculating ARR address to a reg or use another + /// base reg in PIC model. + virtual bool isMemUseUpRegs() const { return false; } + /// getStartLoc - Get the location of the first token of this operand. virtual SMLoc getStartLoc() const = 0; /// getEndLoc - Get the location of the last token of this operand. @@ -77,10 +83,6 @@ public: /// assembly. virtual bool isOffsetOfLocal() const { return false; } - /// isMemPlaceholder - Do we need to ignore the constraint, rather than emit - /// code? Only valid when parsing MS-style inline assembly. - virtual bool isMemPlaceholder(const MCInstrDesc &Desc) const { return false; } - /// getOffsetOfLoc - Get the location of the offset operator. virtual SMLoc getOffsetOfLoc() const { return SMLoc(); } diff --git a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h index 908ee30e4060..1d380c6a00b7 100644 --- a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -11,10 +11,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/SMLoc.h" @@ -23,9 +21,12 @@ namespace llvm { +class MCContext; class MCInst; +class MCInstrInfo; class MCStreamer; class MCSubtargetInfo; +class MCSymbol; template class SmallVectorImpl; using OperandVector = SmallVectorImpl>; @@ -100,10 +101,14 @@ struct AsmRewrite { int64_t Val; StringRef Label; IntelExpr IntelExp; + bool IntelExpRestricted; public: - AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0) - : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {} + AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0, + bool Restricted = false) + : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) { + IntelExpRestricted = Restricted; + } AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label) : AsmRewrite(kind, loc, len) { Label = label; } AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp) diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h index 9ff68f4236ca..d10d6015cd3c 100644 --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -55,6 +55,7 @@ #include #include #include +#include #include namespace llvm { @@ -82,10 +83,9 @@ struct MCPseudoProbeFuncDesc { void print(raw_ostream &OS); }; -class MCPseudoProbe; class MCDecodedPseudoProbe; -// An inline frame has the form +// An inline frame has the form using InlineSite = std::tuple; using MCPseudoProbeInlineStack = SmallVector; // GUID to PseudoProbeFuncDesc map @@ -95,7 +95,6 @@ using GUIDProbeFunctionMap = using AddressProbesMap = std::unordered_map>; -class MCPseudoProbeInlineTree; class MCDecodedPseudoProbeInlineTree; class MCPseudoProbeBase { @@ -272,7 +271,7 @@ public: MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){}; // Return false if it's a dummy inline site - bool hasInlineSite() const { return std::get<0>(ISite) != 0; } + bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); } }; /// Instances of this class represent the pseudo probes inserted into a compile @@ -355,6 +354,15 @@ public: // Decode pseudo_probe section to build address to probes map. bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size); + // Decode pseudo_probe section to build address to probes map for specifed + // functions only. + bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size, + std::unordered_set &GuildFilter); + + bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, + uint64_t &LastAddr, + std::unordered_set &GuildFilter); + // Print pseudo_probe_desc section info void printGUID2FuncDescMap(raw_ostream &OS); diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h index 65436dc74c3e..7165a2982d1b 100644 --- a/llvm/include/llvm/MC/MCRegisterInfo.h +++ b/llvm/include/llvm/MC/MCRegisterInfo.h @@ -580,6 +580,9 @@ public: bool isSuperOrSubRegisterEq(MCRegister RegA, MCRegister RegB) const { return isSubRegisterEq(RegA, RegB) || isSuperRegister(RegA, RegB); } + + /// Returns true if the two registers are equal or alias each other. + bool regsOverlap(MCRegister RegA, MCRegister RegB) const; }; //===----------------------------------------------------------------------===// @@ -698,6 +701,11 @@ public: // unit, we can allow a 0 differential here. advance(); } + + MCRegUnitIterator &operator++() { + MCRegisterInfo::DiffListIterator::operator++(); + return *this; + } }; /// MCRegUnitMaskIterator enumerates a list of register units and their diff --git a/llvm/include/llvm/MC/MCSPIRVObjectWriter.h b/llvm/include/llvm/MC/MCSPIRVObjectWriter.h new file mode 100644 index 000000000000..a8baf96b8384 --- /dev/null +++ b/llvm/include/llvm/MC/MCSPIRVObjectWriter.h @@ -0,0 +1,40 @@ +//===-- llvm/MC/MCSPIRVObjectWriter.h - SPIR-V Object Writer -----*- C++ *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCSPIRVOBJECTWRITER_H +#define LLVM_MC_MCSPIRVOBJECTWRITER_H + +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { + +class MCSPIRVObjectTargetWriter : public MCObjectTargetWriter { +protected: + explicit MCSPIRVObjectTargetWriter() {} + +public: + Triple::ObjectFormatType getFormat() const override { return Triple::SPIRV; } + static bool classof(const MCObjectTargetWriter *W) { + return W->getFormat() == Triple::SPIRV; + } +}; + +/// Construct a new SPIR-V writer instance. +/// +/// \param MOTW - The target specific SPIR-V writer subclass. +/// \param OS - The stream to write to. +/// \returns The constructed object writer. +std::unique_ptr +createSPIRVObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS); + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/MC/MCSPIRVStreamer.h b/llvm/include/llvm/MC/MCSPIRVStreamer.h new file mode 100644 index 000000000000..7366e0a9d82c --- /dev/null +++ b/llvm/include/llvm/MC/MCSPIRVStreamer.h @@ -0,0 +1,50 @@ +//===- MCSPIRVStreamer.h - MCStreamer SPIR-V Object File Interface -*- C++ ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Overrides MCObjectStreamer to disable all unnecessary features with stubs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCSPIRVSTREAMER_H +#define LLVM_MC_MCSPIRVSTREAMER_H + +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCObjectWriter.h" + +namespace llvm { +class MCAssembler; +class MCExpr; +class MCInst; +class raw_ostream; + +class MCSPIRVStreamer : public MCObjectStreamer { +public: + MCSPIRVStreamer(MCContext &Context, std::unique_ptr TAB, + std::unique_ptr OW, + std::unique_ptr Emitter) + : MCObjectStreamer(Context, std::move(TAB), std::move(OW), + std::move(Emitter)) {} + + bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override { + return false; + } + void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) override {} + void emitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr, + uint64_t Size = 0, unsigned ByteAlignment = 0, + SMLoc Loc = SMLoc()) override {} + +private: + void emitInstToData(const MCInst &Inst, const MCSubtargetInfo &) override; +}; + +} // end namespace llvm + +#endif diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h index 4335092f0920..2f7e17123c19 100644 --- a/llvm/include/llvm/MC/MCSection.h +++ b/llvm/include/llvm/MC/MCSection.h @@ -46,7 +46,9 @@ public: SV_GOFF, SV_MachO, SV_Wasm, - SV_XCOFF + SV_XCOFF, + SV_SPIRV, + SV_DXContainer, }; /// Express the state of bundle locked groups while emitting code. @@ -184,13 +186,13 @@ public: void dump() const; - virtual void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, + virtual void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, const MCExpr *Subsection) const = 0; /// Return true if a .align directive should use "optimized nops" to fill /// instead of 0s. - virtual bool UseCodeAlign() const = 0; + virtual bool useCodeAlign() const = 0; /// Check whether this section is "virtual", that is has no actual object /// file contents. diff --git a/llvm/include/llvm/MC/MCSectionCOFF.h b/llvm/include/llvm/MC/MCSectionCOFF.h index 3ece6eb904bc..373863e21ff0 100644 --- a/llvm/include/llvm/MC/MCSectionCOFF.h +++ b/llvm/include/llvm/MC/MCSectionCOFF.h @@ -61,7 +61,7 @@ private: public: /// Decides whether a '.section' directive should be printed before the /// section name - bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const; + bool shouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const; unsigned getCharacteristics() const { return Characteristics; } MCSymbol *getCOMDATSymbol() const { return COMDATSymbol; } @@ -69,10 +69,10 @@ public: void setSelection(int Selection) const; - void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, + void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, const MCExpr *Subsection) const override; - bool UseCodeAlign() const override; + bool useCodeAlign() const override; bool isVirtualSection() const override; StringRef getVirtualSectionKind() const override; diff --git a/llvm/include/llvm/MC/MCSectionDXContainer.h b/llvm/include/llvm/MC/MCSectionDXContainer.h new file mode 100644 index 000000000000..014684a93529 --- /dev/null +++ b/llvm/include/llvm/MC/MCSectionDXContainer.h @@ -0,0 +1,38 @@ +//===- MCSectionDXContainer.h - DXContainer MC Sections ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the MCSectionDXContainer class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCSECTIONDXCONTAINER_H +#define LLVM_MC_MCSECTIONDXCONTAINER_H + +#include "llvm/MC/MCSection.h" +#include "llvm/MC/SectionKind.h" + +namespace llvm { + +class MCSymbol; + +class MCSectionDXContainer final : public MCSection { + friend class MCContext; + + MCSectionDXContainer(StringRef Name, SectionKind K, MCSymbol *Begin) + : MCSection(SV_DXContainer, Name, K, Begin) {} + +public: + void printSwitchToSection(const MCAsmInfo &, const Triple &, raw_ostream &, + const MCExpr *) const override; + bool useCodeAlign() const override { return false; } + bool isVirtualSection() const override { return false; } +}; + +} // end namespace llvm + +#endif // LLVM_MC_MCSECTIONDXCONTAINER_H diff --git a/llvm/include/llvm/MC/MCSectionELF.h b/llvm/include/llvm/MC/MCSectionELF.h index 8b17df25a158..3b5239394493 100644 --- a/llvm/include/llvm/MC/MCSectionELF.h +++ b/llvm/include/llvm/MC/MCSectionELF.h @@ -21,8 +21,6 @@ namespace llvm { -class MCSymbol; - /// This represents a section on linux, lots of unix variants and some bare /// metal systems. class MCSectionELF final : public MCSection { @@ -69,7 +67,7 @@ private: public: /// Decides whether a '.section' directive should be printed before the /// section name - bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const; + bool shouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const; unsigned getType() const { return Type; } unsigned getFlags() const { return Flags; } @@ -78,10 +76,10 @@ public: const MCSymbolELF *getGroup() const { return Group.getPointer(); } bool isComdat() const { return Group.getInt(); } - void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, + void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, const MCExpr *Subsection) const override; - bool UseCodeAlign() const override; + bool useCodeAlign() const override; bool isVirtualSection() const override; StringRef getVirtualSectionKind() const override; diff --git a/llvm/include/llvm/MC/MCSectionGOFF.h b/llvm/include/llvm/MC/MCSectionGOFF.h index 4ba7f79f9696..d866329461ce 100644 --- a/llvm/include/llvm/MC/MCSectionGOFF.h +++ b/llvm/include/llvm/MC/MCSectionGOFF.h @@ -15,6 +15,7 @@ #ifndef LLVM_MC_MCSECTIONGOFF_H #define LLVM_MC_MCSECTIONGOFF_H +#include "llvm/BinaryFormat/GOFF.h" #include "llvm/MC/MCSection.h" #include "llvm/Support/raw_ostream.h" @@ -24,21 +25,27 @@ class MCExpr; class MCSectionGOFF final : public MCSection { private: + MCSection *Parent; + const MCExpr *SubsectionId; + friend class MCContext; - MCSectionGOFF(StringRef Name, SectionKind K) - : MCSection(SV_GOFF, Name, K, nullptr) {} + MCSectionGOFF(StringRef Name, SectionKind K, MCSection *P, const MCExpr *Sub) + : MCSection(SV_GOFF, Name, K, nullptr), Parent(P), SubsectionId(Sub) {} public: - void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, + void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, const MCExpr *Subsection) const override { OS << "\t.section\t\"" << getName() << "\"\n"; } - bool UseCodeAlign() const override { return false; } + bool useCodeAlign() const override { return false; } bool isVirtualSection() const override { return false; } + MCSection *getParent() const { return Parent; } + const MCExpr *getSubsectionId() const { return SubsectionId; } + static bool classof(const MCSection *S) { return S->getVariant() == SV_GOFF; } }; } // end namespace llvm diff --git a/llvm/include/llvm/MC/MCSectionMachO.h b/llvm/include/llvm/MC/MCSectionMachO.h index bf8940524e5a..fdf1773d4002 100644 --- a/llvm/include/llvm/MC/MCSectionMachO.h +++ b/llvm/include/llvm/MC/MCSectionMachO.h @@ -68,10 +68,10 @@ public: bool &TAAParsed, // Out. unsigned &StubSize); // Out. - void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, + void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, const MCExpr *Subsection) const override; - bool UseCodeAlign() const override; + bool useCodeAlign() const override; bool isVirtualSection() const override; static bool classof(const MCSection *S) { diff --git a/llvm/include/llvm/MC/MCSectionSPIRV.h b/llvm/include/llvm/MC/MCSectionSPIRV.h new file mode 100644 index 000000000000..6534599d2091 --- /dev/null +++ b/llvm/include/llvm/MC/MCSectionSPIRV.h @@ -0,0 +1,41 @@ +//===- MCSectionSPIRV.h - SPIR-V Machine Code Sections ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the MCSectionSPIRV class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCSECTIONSPIRV_H +#define LLVM_MC_MCSECTIONSPIRV_H + +#include "llvm/MC/MCSection.h" +#include "llvm/MC/SectionKind.h" + +namespace llvm { + +class MCSymbol; + +class MCSectionSPIRV final : public MCSection { + friend class MCContext; + + MCSectionSPIRV(SectionKind K, MCSymbol *Begin) + : MCSection(SV_SPIRV, "", K, Begin) {} + // TODO: Add StringRef Name to MCSectionSPIRV. + +public: + ~MCSectionSPIRV() = default; + void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, + raw_ostream &OS, + const MCExpr *Subsection) const override {} + bool useCodeAlign() const override { return false; } + bool isVirtualSection() const override { return false; } +}; + +} // end namespace llvm + +#endif // LLVM_MC_MCSECTIONSPIRV_H diff --git a/llvm/include/llvm/MC/MCSectionWasm.h b/llvm/include/llvm/MC/MCSectionWasm.h index f34dd6b3507c..579f92a75056 100644 --- a/llvm/include/llvm/MC/MCSectionWasm.h +++ b/llvm/include/llvm/MC/MCSectionWasm.h @@ -58,10 +58,10 @@ public: const MCSymbolWasm *getGroup() const { return Group; } unsigned getSegmentFlags() const { return SegmentFlags; } - void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, + void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, const MCExpr *Subsection) const override; - bool UseCodeAlign() const override; + bool useCodeAlign() const override; bool isVirtualSection() const override; bool isWasmData() const { diff --git a/llvm/include/llvm/MC/MCSectionXCOFF.h b/llvm/include/llvm/MC/MCSectionXCOFF.h index 1dafdd3ac500..95332647c9be 100644 --- a/llvm/include/llvm/MC/MCSectionXCOFF.h +++ b/llvm/include/llvm/MC/MCSectionXCOFF.h @@ -38,6 +38,7 @@ class MCSectionXCOFF final : public MCSection { Optional DwarfSubtypeFlags; bool MultiSymbolsAllowed; static constexpr unsigned DefaultAlignVal = 4; + static constexpr unsigned DefaultTextAlignVal = 32; MCSectionXCOFF(StringRef Name, XCOFF::StorageMappingClass SMC, XCOFF::SymbolType ST, SectionKind K, MCSymbolXCOFF *QualName, @@ -57,9 +58,14 @@ class MCSectionXCOFF final : public MCSection { QualName->setRepresentedCsect(this); QualName->setStorageClass(XCOFF::C_HIDEXT); - // A csect is 4 byte aligned by default, except for undefined symbol csects. - if (ST != XCOFF::XTY_ER) - setAlignment(Align(DefaultAlignVal)); + if (ST != XCOFF::XTY_ER) { + // For a csect for program code, set the alignment to 32 bytes by default. + // For other csects, set the alignment to 4 bytes by default. + if (SMC == XCOFF::XMC_PR) + setAlignment(Align(DefaultTextAlignVal)); + else + setAlignment(Align(DefaultAlignVal)); + } } MCSectionXCOFF(StringRef Name, SectionKind K, MCSymbolXCOFF *QualName, @@ -74,9 +80,8 @@ class MCSectionXCOFF final : public MCSection { // FIXME: use a more meaningful name for non csect sections. QualName->setRepresentedCsect(this); - // Set default alignment 4 for all non csect sections for now. - // FIXME: set different alignments according to section types. - setAlignment(Align(DefaultAlignVal)); + // Use default text alignment as the alignment for DWARF sections. + setAlignment(Align(DefaultTextAlignVal)); } void printCsectDirective(raw_ostream &OS) const; @@ -95,24 +100,28 @@ public: XCOFF::StorageClass getStorageClass() const { return QualName->getStorageClass(); } + XCOFF::VisibilityType getVisibilityType() const { + return QualName->getVisibilityType(); + } XCOFF::SymbolType getCSectType() const { assert(isCsect() && "Only csect section has symbol type property!"); return CsectProp->Type; } MCSymbolXCOFF *getQualNameSymbol() const { return QualName; } - void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, + void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, const MCExpr *Subsection) const override; - bool UseCodeAlign() const override; + bool useCodeAlign() const override; bool isVirtualSection() const override; StringRef getSymbolTableName() const { return SymbolTableName; } bool isMultiSymbolsAllowed() const { return MultiSymbolsAllowed; } - bool isCsect() const { return CsectProp.hasValue(); } - bool isDwarfSect() const { return DwarfSubtypeFlags.hasValue(); } + bool isCsect() const { return CsectProp.has_value(); } + bool isDwarfSect() const { return DwarfSubtypeFlags.has_value(); } Optional getDwarfSubtypeFlags() const { return DwarfSubtypeFlags; } + Optional getCsectProp() const { return CsectProp; } }; } // end namespace llvm diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index 3d6c512bfe73..e71014b8cccf 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -13,22 +13,20 @@ #ifndef LLVM_MC_MCSTREAMER_H #define LLVM_MC_MCSTREAMER_H -#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCLinkerOptimizationHint.h" #include "llvm/MC/MCPseudoProbe.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWinEH.h" +#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/Error.h" #include "llvm/Support/MD5.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/ARMTargetParser.h" -#include "llvm/Support/TargetParser.h" #include "llvm/Support/VersionTuple.h" #include #include @@ -39,20 +37,24 @@ namespace llvm { +class APInt; class AssemblerConstantPools; class MCAsmBackend; +class MCAssembler; class MCContext; -struct MCDwarfFrameInfo; class MCExpr; +class MCFragment; class MCInst; class MCInstPrinter; class MCRegister; class MCSection; class MCStreamer; -class MCSymbolRefExpr; class MCSubtargetInfo; -class raw_ostream; +class MCSymbol; +class MCSymbolRefExpr; +class Triple; class Twine; +class raw_ostream; namespace codeview { struct DefRangeRegisterRelHeader; @@ -111,7 +113,7 @@ public: /// Update streamer for a new active section. /// - /// This is called by PopSection and SwitchSection, if the current + /// This is called by popSection and switchSection, if the current /// section changes. virtual void changeSection(const MCSection *CurSection, MCSection *Section, const MCExpr *SubSection, raw_ostream &OS); @@ -163,12 +165,23 @@ public: virtual void finishAttributeSection(); virtual void emitInst(uint32_t Inst, char Suffix = '\0'); - virtual void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE); + virtual void annotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE); virtual void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value); void emitConstantPools() override; + virtual void emitARMWinCFIAllocStack(unsigned Size, bool Wide); + virtual void emitARMWinCFISaveRegMask(unsigned Mask, bool Wide); + virtual void emitARMWinCFISaveSP(unsigned Reg); + virtual void emitARMWinCFISaveFRegs(unsigned First, unsigned Last); + virtual void emitARMWinCFISaveLR(unsigned Offset); + virtual void emitARMWinCFIPrologEnd(bool Fragment); + virtual void emitARMWinCFINop(bool Wide); + virtual void emitARMWinCFIEpilogStart(unsigned Condition); + virtual void emitARMWinCFIEpilogEnd(); + virtual void emitARMWinCFICustom(unsigned Opcode); + /// Reset any state between object emissions, i.e. the equivalent of /// MCStreamer's reset method. virtual void reset(); @@ -215,7 +228,7 @@ class MCStreamer { DenseMap SymbolOrdering; /// This is stack of current and previous section values saved by - /// PushSection. + /// pushSection. SmallVector, 4> SectionStack; /// Pointer to the parser's SMLoc if available. This is used to provide @@ -247,9 +260,9 @@ protected: return CurrentWinFrameInfo; } - virtual void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame); + virtual void emitWindowsUnwindTables(WinEH::FrameInfo *Frame); - virtual void EmitWindowsUnwindTables(); + virtual void emitWindowsUnwindTables(); virtual void emitRawTextImpl(StringRef String); @@ -344,7 +357,7 @@ public: /// Return a raw_ostream that comments can be written to. Unlike /// AddComment, you are required to terminate comments with \n if you use this /// method. - virtual raw_ostream &GetCommentOS(); + virtual raw_ostream &getCommentOS(); /// Print T and prefix it with the comment string (normally #) and /// optionally a tab. This prints the comment immediately, not at the end of @@ -359,8 +372,8 @@ public: /// Emit added explicit comments. virtual void emitExplicitComments(); - /// AddBlankLine - Emit a blank line to a .s file to pretty it up. - virtual void AddBlankLine() {} + /// Emit a blank line to a .s file to pretty it up. + virtual void addBlankLine() {} /// @} @@ -384,18 +397,18 @@ public: /// Returns an index to represent the order a symbol was emitted in. /// (zero if we did not emit that symbol) - unsigned GetSymbolOrder(const MCSymbol *Sym) const { + unsigned getSymbolOrder(const MCSymbol *Sym) const { return SymbolOrdering.lookup(Sym); } /// Update streamer for a new active section. /// - /// This is called by PopSection and SwitchSection, if the current + /// This is called by popSection and switchSection, if the current /// section changes. virtual void changeSection(MCSection *, const MCExpr *); /// Save the current and previous section on the section stack. - void PushSection() { + void pushSection() { SectionStack.push_back( std::make_pair(getCurrentSection(), getPreviousSection())); } @@ -404,7 +417,7 @@ public: /// Calls changeSection as needed. /// /// Returns false if the stack was empty. - bool PopSection() { + bool popSection() { if (SectionStack.size() <= 1) return false; auto I = SectionStack.end(); @@ -419,11 +432,11 @@ public: return true; } - bool SubSection(const MCExpr *Subsection) { + bool subSection(const MCExpr *Subsection) { if (SectionStack.empty()) return false; - SwitchSection(SectionStack.back().first.first, Subsection); + switchSection(SectionStack.back().first.first, Subsection); return true; } @@ -431,13 +444,13 @@ public: /// is required to update CurSection. /// /// This corresponds to assembler directives like .section, .text, etc. - virtual void SwitchSection(MCSection *Section, + virtual void switchSection(MCSection *Section, const MCExpr *Subsection = nullptr); /// Set the current section where code is being emitted to \p Section. /// This is required to update CurSection. This version does not call /// changeSection. - void SwitchSectionNoChange(MCSection *Section, + void switchSectionNoChange(MCSection *Section, const MCExpr *Subsection = nullptr) { assert(Section && "Cannot switch to a null section!"); MCSectionSubPair curSection = SectionStack.back().first; @@ -455,7 +468,7 @@ public: /// /// Each emitted symbol will be tracked in the ordering table, /// so we can sort on them later. - void AssignFragment(MCSymbol *Symbol, MCFragment *Fragment); + void assignFragment(MCSymbol *Symbol, MCFragment *Fragment); /// Returns the mnemonic for \p MI, if the streamer has access to a /// instruction printer and returns an empty string otherwise. @@ -550,40 +563,40 @@ public: /// Start emitting COFF symbol definition /// /// \param Symbol - The symbol to have its External & Type fields set. - virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol); + virtual void beginCOFFSymbolDef(const MCSymbol *Symbol); /// Emit the storage class of the symbol. /// /// \param StorageClass - The storage class the symbol should have. - virtual void EmitCOFFSymbolStorageClass(int StorageClass); + virtual void emitCOFFSymbolStorageClass(int StorageClass); /// Emit the type of the symbol. /// /// \param Type - A COFF type identifier (see COFF::SymbolType in X86COFF.h) - virtual void EmitCOFFSymbolType(int Type); + virtual void emitCOFFSymbolType(int Type); /// Marks the end of the symbol definition. - virtual void EndCOFFSymbolDef(); + virtual void endCOFFSymbolDef(); - virtual void EmitCOFFSafeSEH(MCSymbol const *Symbol); + virtual void emitCOFFSafeSEH(MCSymbol const *Symbol); /// Emits the symbol table index of a Symbol into the current section. - virtual void EmitCOFFSymbolIndex(MCSymbol const *Symbol); + virtual void emitCOFFSymbolIndex(MCSymbol const *Symbol); /// Emits a COFF section index. /// /// \param Symbol - Symbol the section number relocation should point to. - virtual void EmitCOFFSectionIndex(MCSymbol const *Symbol); + virtual void emitCOFFSectionIndex(MCSymbol const *Symbol); /// Emits a COFF section relative relocation. /// /// \param Symbol - Symbol the section relative relocation should point to. - virtual void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset); + virtual void emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset); /// Emits a COFF image relative relocation. /// /// \param Symbol - Symbol the image relative relocation should point to. - virtual void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset); + virtual void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset); /// Emits an lcomm directive with XCOFF csect information. /// @@ -615,6 +628,12 @@ public: /// changed at the end of assembly. virtual void emitXCOFFRenameDirective(const MCSymbol *Name, StringRef Rename); + /// Emit a XCOFF .ref directive which creates R_REF type entry in the + /// relocation table for one or more symbols. + /// + /// \param Sym - The symbol on the .ref directive. + virtual void emitXCOFFRefDirective(StringRef Sym); + /// Emit an ELF .size directive. /// /// This corresponds to an assembler statement such as: @@ -907,6 +926,7 @@ public: unsigned CUID = 0); virtual void emitCFIBKeyFrame(); + virtual void emitCFIMTETaggedFrame(); /// This implements the DWARF2 '.loc fileno lineno ...' assembler /// directive. @@ -918,16 +938,16 @@ public: /// Associate a filename with a specified logical file number, and also /// specify that file's checksum information. This implements the '.cv_file 4 /// "foo.c"' assembler directive. Returns true on success. - virtual bool EmitCVFileDirective(unsigned FileNo, StringRef Filename, + virtual bool emitCVFileDirective(unsigned FileNo, StringRef Filename, ArrayRef Checksum, unsigned ChecksumKind); /// Introduces a function id for use with .cv_loc. - virtual bool EmitCVFuncIdDirective(unsigned FunctionId); + virtual bool emitCVFuncIdDirective(unsigned FunctionId); /// Introduces an inline call site id for use with .cv_loc. Includes /// extra information for inline line table generation. - virtual bool EmitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc, + virtual bool emitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc, unsigned IAFile, unsigned IALine, unsigned IACol, SMLoc Loc); @@ -983,7 +1003,7 @@ public: virtual void emitCVFileChecksumOffsetDirective(unsigned FileNo) {} /// This implements the CodeView '.cv_fpo_data' assembler directive. - virtual void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc = {}) {} + virtual void emitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc = {}) {} /// Emit the absolute difference between two symbols. /// @@ -1022,28 +1042,28 @@ public: virtual void emitCFIWindowSave(); virtual void emitCFINegateRAState(); - virtual void EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc = SMLoc()); - virtual void EmitWinCFIEndProc(SMLoc Loc = SMLoc()); + virtual void emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc = SMLoc()); + virtual void emitWinCFIEndProc(SMLoc Loc = SMLoc()); /// This is used on platforms, such as Windows on ARM64, that require function /// or funclet sizes to be emitted in .xdata before the End marker is emitted /// for the frame. We cannot use the End marker, as it is not set at the /// point of emitting .xdata, in order to indicate that the frame is active. - virtual void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc = SMLoc()); - virtual void EmitWinCFIStartChained(SMLoc Loc = SMLoc()); - virtual void EmitWinCFIEndChained(SMLoc Loc = SMLoc()); - virtual void EmitWinCFIPushReg(MCRegister Register, SMLoc Loc = SMLoc()); - virtual void EmitWinCFISetFrame(MCRegister Register, unsigned Offset, + virtual void emitWinCFIFuncletOrFuncEnd(SMLoc Loc = SMLoc()); + virtual void emitWinCFIStartChained(SMLoc Loc = SMLoc()); + virtual void emitWinCFIEndChained(SMLoc Loc = SMLoc()); + virtual void emitWinCFIPushReg(MCRegister Register, SMLoc Loc = SMLoc()); + virtual void emitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc = SMLoc()); - virtual void EmitWinCFIAllocStack(unsigned Size, SMLoc Loc = SMLoc()); - virtual void EmitWinCFISaveReg(MCRegister Register, unsigned Offset, + virtual void emitWinCFIAllocStack(unsigned Size, SMLoc Loc = SMLoc()); + virtual void emitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc = SMLoc()); - virtual void EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, + virtual void emitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc = SMLoc()); - virtual void EmitWinCFIPushFrame(bool Code, SMLoc Loc = SMLoc()); - virtual void EmitWinCFIEndProlog(SMLoc Loc = SMLoc()); - virtual void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, + virtual void emitWinCFIPushFrame(bool Code, SMLoc Loc = SMLoc()); + virtual void emitWinCFIEndProlog(SMLoc Loc = SMLoc()); + virtual void emitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, SMLoc Loc = SMLoc()); - virtual void EmitWinEHHandlerData(SMLoc Loc = SMLoc()); + virtual void emitWinEHHandlerData(SMLoc Loc = SMLoc()); virtual void emitCGProfileEntry(const MCSymbolRefExpr *From, const MCSymbolRefExpr *To, uint64_t Count); @@ -1099,7 +1119,7 @@ public: /// Streamer specific finalization. virtual void finishImpl(); /// Finish emission of machine code. - void Finish(SMLoc EndLoc = SMLoc()); + void finish(SMLoc EndLoc = SMLoc()); virtual bool mayHaveInstructions(MCSection &Sec) const { return true; } diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h index 839a3bd85829..e1f0a86141e3 100644 --- a/llvm/include/llvm/MC/MCSubtargetInfo.h +++ b/llvm/include/llvm/MC/MCSubtargetInfo.h @@ -14,12 +14,13 @@ #define LLVM_MC_MCSUBTARGETINFO_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" #include "llvm/MC/SubtargetFeature.h" -#include #include #include #include diff --git a/llvm/include/llvm/MC/MCSymbol.h b/llvm/include/llvm/MC/MCSymbol.h index d8fc4505d446..91ef6ee31d8d 100644 --- a/llvm/include/llvm/MC/MCSymbol.h +++ b/llvm/include/llvm/MC/MCSymbol.h @@ -14,7 +14,7 @@ #define LLVM_MC_MCSYMBOL_H #include "llvm/ADT/PointerIntPair.h" -#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringMapEntry.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFragment.h" diff --git a/llvm/include/llvm/MC/MCSymbolWasm.h b/llvm/include/llvm/MC/MCSymbolWasm.h index 5a4852e0e895..5eab32cb5c12 100644 --- a/llvm/include/llvm/MC/MCSymbolWasm.h +++ b/llvm/include/llvm/MC/MCSymbolWasm.h @@ -86,9 +86,9 @@ public: bool omitFromLinkingSection() const { return OmitFromLinkingSection; } void setOmitFromLinkingSection() { OmitFromLinkingSection = true; } - bool hasImportModule() const { return ImportModule.hasValue(); } + bool hasImportModule() const { return ImportModule.has_value(); } StringRef getImportModule() const { - if (ImportModule.hasValue()) + if (ImportModule) return ImportModule.getValue(); // Use a default module name of "env" for now, for compatibility with // existing tools. @@ -98,15 +98,15 @@ public: } void setImportModule(StringRef Name) { ImportModule = Name; } - bool hasImportName() const { return ImportName.hasValue(); } + bool hasImportName() const { return ImportName.has_value(); } StringRef getImportName() const { - if (ImportName.hasValue()) + if (ImportName) return ImportName.getValue(); return getName(); } void setImportName(StringRef Name) { ImportName = Name; } - bool hasExportName() const { return ExportName.hasValue(); } + bool hasExportName() const { return ExportName.has_value(); } StringRef getExportName() const { return ExportName.getValue(); } void setExportName(StringRef Name) { ExportName = Name; } @@ -129,12 +129,12 @@ public: void setSignature(wasm::WasmSignature *Sig) { Signature = Sig; } const wasm::WasmGlobalType &getGlobalType() const { - assert(GlobalType.hasValue()); + assert(GlobalType); return GlobalType.getValue(); } void setGlobalType(wasm::WasmGlobalType GT) { GlobalType = GT; } - bool hasTableType() const { return TableType.hasValue(); } + bool hasTableType() const { return TableType.has_value(); } const wasm::WasmTableType &getTableType() const { assert(hasTableType()); return TableType.getValue(); diff --git a/llvm/include/llvm/MC/MCSymbolXCOFF.h b/llvm/include/llvm/MC/MCSymbolXCOFF.h index 752e1e7bba0f..2ec265e66300 100644 --- a/llvm/include/llvm/MC/MCSymbolXCOFF.h +++ b/llvm/include/llvm/MC/MCSymbolXCOFF.h @@ -39,8 +39,7 @@ public: }; XCOFF::StorageClass getStorageClass() const { - assert(StorageClass.hasValue() && - "StorageClass not set on XCOFF MCSymbol."); + assert(StorageClass && "StorageClass not set on XCOFF MCSymbol."); return StorageClass.getValue(); } diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h index db50dc6749e2..9c906cdc90d0 100644 --- a/llvm/include/llvm/MC/MCTargetOptions.h +++ b/llvm/include/llvm/MC/MCTargetOptions.h @@ -31,6 +31,12 @@ enum class DebugCompressionType { Z, ///< zlib style complession }; +enum class EmitDwarfUnwindType { + Always, // Always emit dwarf unwind + NoCompactUnwind, // Only emit if compact unwind isn't available + Default, // Default behavior is based on the target +}; + class StringRef; class MCTargetOptions { @@ -47,7 +53,6 @@ public: bool MCNoDeprecatedWarn : 1; bool MCNoTypeCheck : 1; bool MCSaveTempLabels : 1; - bool MCUseDwarfDirectory : 1; bool MCIncrementalLinkerCompatible : 1; bool ShowMCEncoding : 1; bool ShowMCInst : 1; @@ -57,8 +62,22 @@ public: bool PreserveAsmComments : 1; bool Dwarf64 : 1; + + EmitDwarfUnwindType EmitDwarfUnwind; + int DwarfVersion = 0; + enum DwarfDirectory { + // Force disable + DisableDwarfDirectory, + // Force enable, for assemblers that support + // `.file fileno directory filename' syntax + EnableDwarfDirectory, + // Default is based on the target + DefaultDwarfDirectory + }; + DwarfDirectory MCUseDwarfDirectory; + std::string ABIName; std::string AssemblyLanguage; std::string SplitDwarfFile; diff --git a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h index 189484198916..d51e740177f7 100644 --- a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h +++ b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h @@ -20,6 +20,7 @@ namespace llvm { class MCTargetOptions; +enum class EmitDwarfUnwindType; namespace mc { @@ -32,6 +33,8 @@ int getDwarfVersion(); bool getDwarf64(); +EmitDwarfUnwindType getEmitDwarfUnwind(); + bool getShowMCInst(); bool getFatalWarnings(); diff --git a/llvm/include/llvm/MC/MCValue.h b/llvm/include/llvm/MC/MCValue.h index 37feee4c9ea8..37265d72c9df 100644 --- a/llvm/include/llvm/MC/MCValue.h +++ b/llvm/include/llvm/MC/MCValue.h @@ -15,7 +15,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/Support/DataTypes.h" -#include namespace llvm { class raw_ostream; diff --git a/llvm/include/llvm/MC/MCWin64EH.h b/llvm/include/llvm/MC/MCWin64EH.h index 065161d1759e..622a666b78dd 100644 --- a/llvm/include/llvm/MC/MCWin64EH.h +++ b/llvm/include/llvm/MC/MCWin64EH.h @@ -57,13 +57,19 @@ public: bool HandlerData) const override; }; -class ARM64UnwindEmitter : public WinEH::UnwindEmitter { +class ARMUnwindEmitter : public WinEH::UnwindEmitter { public: void Emit(MCStreamer &Streamer) const override; void EmitUnwindInfo(MCStreamer &Streamer, WinEH::FrameInfo *FI, bool HandlerData) const override; }; +class ARM64UnwindEmitter : public WinEH::UnwindEmitter { +public: + void Emit(MCStreamer &Streamer) const override; + void EmitUnwindInfo(MCStreamer &Streamer, WinEH::FrameInfo *FI, + bool HandlerData) const override; +}; } } // end namespace llvm diff --git a/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/llvm/include/llvm/MC/MCWinCOFFStreamer.h index af1ed6faf753..0778c4d52c5e 100644 --- a/llvm/include/llvm/MC/MCWinCOFFStreamer.h +++ b/llvm/include/llvm/MC/MCWinCOFFStreamer.h @@ -45,15 +45,15 @@ public: void emitThumbFunc(MCSymbol *Func) override; bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; void emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override; - void BeginCOFFSymbolDef(MCSymbol const *Symbol) override; - void EmitCOFFSymbolStorageClass(int StorageClass) override; - void EmitCOFFSymbolType(int Type) override; - void EndCOFFSymbolDef() override; - void EmitCOFFSafeSEH(MCSymbol const *Symbol) override; - void EmitCOFFSymbolIndex(MCSymbol const *Symbol) override; - void EmitCOFFSectionIndex(MCSymbol const *Symbol) override; - void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override; - void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override; + void beginCOFFSymbolDef(MCSymbol const *Symbol) override; + void emitCOFFSymbolStorageClass(int StorageClass) override; + void emitCOFFSymbolType(int Type) override; + void endCOFFSymbolDef() override; + void emitCOFFSafeSEH(MCSymbol const *Symbol) override; + void emitCOFFSymbolIndex(MCSymbol const *Symbol) override; + void emitCOFFSectionIndex(MCSymbol const *Symbol) override; + void emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override; + void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override; void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; void emitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, @@ -64,7 +64,7 @@ public: void emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; void emitIdent(StringRef IdentString) override; - void EmitWinEHHandlerData(SMLoc Loc) override; + void emitWinEHHandlerData(SMLoc Loc) override; void emitCGProfileEntry(const MCSymbolRefExpr *From, const MCSymbolRefExpr *To, uint64_t Count) override; void finishImpl() override; diff --git a/llvm/include/llvm/MC/MCWinEH.h b/llvm/include/llvm/MC/MCWinEH.h index 5688255810d0..c16396ea5e71 100644 --- a/llvm/include/llvm/MC/MCWinEH.h +++ b/llvm/include/llvm/MC/MCWinEH.h @@ -50,11 +50,17 @@ struct FrameInfo { bool HandlesUnwind = false; bool HandlesExceptions = false; bool EmitAttempted = false; + bool Fragment = false; int LastFrameInst = -1; const FrameInfo *ChainedParent = nullptr; std::vector Instructions; - MapVector> EpilogMap; + struct Epilog { + std::vector Instructions; + unsigned Condition; + MCSymbol *End; + }; + MapVector EpilogMap; FrameInfo() = default; FrameInfo(const MCSymbol *Function, const MCSymbol *BeginFuncEHLabel) @@ -68,7 +74,7 @@ struct FrameInfo { if (!Instructions.empty()) return false; for (const auto &E : EpilogMap) - if (!E.second.empty()) + if (!E.second.Instructions.empty()) return false; return true; } diff --git a/llvm/include/llvm/MC/MCXCOFFStreamer.h b/llvm/include/llvm/MC/MCXCOFFStreamer.h index 5fc2efbe5284..3faa03fa69e9 100644 --- a/llvm/include/llvm/MC/MCXCOFFStreamer.h +++ b/llvm/include/llvm/MC/MCXCOFFStreamer.h @@ -32,6 +32,10 @@ public: void emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol, MCSymbolAttr Linkage, MCSymbolAttr Visibility) override; + void emitXCOFFRefDirective(StringRef Name) override { + report_fatal_error("emitXCOFFRefDirective is not implemented yet on object" + "generation path"); + } void emitXCOFFRenameDirective(const MCSymbol *Name, StringRef Rename) override { report_fatal_error("emitXCOFFRenameDirective is not implemented yet on " diff --git a/llvm/include/llvm/MC/SectionKind.h b/llvm/include/llvm/MC/SectionKind.h index 0fd86cc457de..61e400fe9ede 100644 --- a/llvm/include/llvm/MC/SectionKind.h +++ b/llvm/include/llvm/MC/SectionKind.h @@ -24,6 +24,10 @@ class SectionKind { /// Metadata - Debug info sections or other metadata. Metadata, + /// Exclude - This section will be excluded from the final executable or + /// shared library. Only valid for ELF / COFF targets. + Exclude, + /// Text - Text section, used for functions and other executable code. Text, @@ -118,6 +122,8 @@ public: bool isMetadata() const { return K == Metadata; } + bool isExclude() const { return K == Exclude; } + bool isText() const { return K == Text || K == ExecuteOnly; } bool isExecuteOnly() const { return K == ExecuteOnly; } @@ -180,6 +186,7 @@ private: public: static SectionKind getMetadata() { return get(Metadata); } + static SectionKind getExclude() { return get(Exclude); } static SectionKind getText() { return get(Text); } static SectionKind getExecuteOnly() { return get(ExecuteOnly); } static SectionKind getReadOnly() { return get(ReadOnly); } diff --git a/llvm/include/llvm/MC/StringTableBuilder.h b/llvm/include/llvm/MC/StringTableBuilder.h index 3f9c91be05d3..42133f3f7726 100644 --- a/llvm/include/llvm/MC/StringTableBuilder.h +++ b/llvm/include/llvm/MC/StringTableBuilder.h @@ -85,7 +85,6 @@ public: void write(raw_ostream &OS) const; void write(uint8_t *Buf) const; -private: bool isFinalized() const { return Finalized; } }; diff --git a/llvm/include/llvm/MC/SubtargetFeature.h b/llvm/include/llvm/MC/SubtargetFeature.h index 032e2a7df1f2..799912d4bacb 100644 --- a/llvm/include/llvm/MC/SubtargetFeature.h +++ b/llvm/include/llvm/MC/SubtargetFeature.h @@ -17,11 +17,10 @@ #ifndef LLVM_MC_SUBTARGETFEATURE_H #define LLVM_MC_SUBTARGETFEATURE_H -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/MathExtras.h" #include -#include #include #include #include diff --git a/llvm/include/llvm/MC/TargetRegistry.h b/llvm/include/llvm/MC/TargetRegistry.h index da9a9269edbf..eeac559f81b1 100644 --- a/llvm/include/llvm/MC/TargetRegistry.h +++ b/llvm/include/llvm/MC/TargetRegistry.h @@ -27,7 +27,6 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include #include #include #include @@ -56,13 +55,12 @@ class MCTargetAsmParser; class MCTargetOptions; class MCTargetStreamer; class raw_ostream; -class raw_pwrite_stream; class TargetMachine; class TargetOptions; namespace mca { class CustomBehaviour; class InstrPostProcess; -class SourceMgr; +struct SourceMgr; } // namespace mca MCStreamer *createNullStreamer(MCContext &Ctx); @@ -111,6 +109,16 @@ MCStreamer *createXCOFFStreamer(MCContext &Ctx, std::unique_ptr &&OW, std::unique_ptr &&CE, bool RelaxAll); +MCStreamer *createSPIRVStreamer(MCContext &Ctx, + std::unique_ptr &&TAB, + std::unique_ptr &&OW, + std::unique_ptr &&CE, + bool RelaxAll); +MCStreamer *createDXContainerStreamer(MCContext &Ctx, + std::unique_ptr &&TAB, + std::unique_ptr &&OW, + std::unique_ptr &&CE, + bool RelaxAll); MCRelocationInfo *createMCRelocationInfo(const Triple &TT, MCContext &Ctx); @@ -177,7 +185,6 @@ public: const MCInstrInfo &MII, const MCRegisterInfo &MRI); using MCCodeEmitterCtorTy = MCCodeEmitter *(*)(const MCInstrInfo &II, - const MCRegisterInfo &MRI, MCContext &Ctx); using ELFStreamerCtorTy = MCStreamer *(*)(const Triple &T, MCContext &Ctx, @@ -204,6 +211,17 @@ public: std::unique_ptr &&TAB, std::unique_ptr &&OW, std::unique_ptr &&Emitter, bool RelaxAll); + using SPIRVStreamerCtorTy = + MCStreamer *(*)(const Triple &T, MCContext &Ctx, + std::unique_ptr &&TAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter, bool RelaxAll); + + using DXContainerStreamerCtorTy = + MCStreamer *(*)(const Triple &T, MCContext &Ctx, + std::unique_ptr &&TAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter, bool RelaxAll); using NullTargetStreamerCtorTy = MCTargetStreamer *(*)(MCStreamer &S); using AsmTargetStreamerCtorTy = MCTargetStreamer *(*)( @@ -305,6 +323,8 @@ private: ELFStreamerCtorTy ELFStreamerCtorFn = nullptr; WasmStreamerCtorTy WasmStreamerCtorFn = nullptr; XCOFFStreamerCtorTy XCOFFStreamerCtorFn = nullptr; + SPIRVStreamerCtorTy SPIRVStreamerCtorFn = nullptr; + DXContainerStreamerCtorTy DXContainerStreamerCtorFn = nullptr; /// Construction function for this target's null TargetStreamer, if /// registered (default = nullptr). @@ -508,11 +528,10 @@ public: /// createMCCodeEmitter - Create a target specific code emitter. MCCodeEmitter *createMCCodeEmitter(const MCInstrInfo &II, - const MCRegisterInfo &MRI, MCContext &Ctx) const { if (!MCCodeEmitterCtorFn) return nullptr; - return MCCodeEmitterCtorFn(II, MRI, Ctx); + return MCCodeEmitterCtorFn(II, Ctx); } /// Create a target specific MCStreamer. @@ -576,6 +595,22 @@ public: S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW), std::move(Emitter), RelaxAll); break; + case Triple::SPIRV: + if (SPIRVStreamerCtorFn) + S = SPIRVStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW), + std::move(Emitter), RelaxAll); + else + S = createSPIRVStreamer(Ctx, std::move(TAB), std::move(OW), + std::move(Emitter), RelaxAll); + break; + case Triple::DXContainer: + if (DXContainerStreamerCtorFn) + S = DXContainerStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW), + std::move(Emitter), RelaxAll); + else + S = createDXContainerStreamer(Ctx, std::move(TAB), std::move(OW), + std::move(Emitter), RelaxAll); + break; } if (ObjectTargetStreamerCtorFn) ObjectTargetStreamerCtorFn(*S, STI); @@ -956,6 +991,14 @@ struct TargetRegistry { T.ELFStreamerCtorFn = Fn; } + static void RegisterSPIRVStreamer(Target &T, Target::SPIRVStreamerCtorTy Fn) { + T.SPIRVStreamerCtorFn = Fn; + } + + static void RegisterDXContainerStreamer(Target &T, Target::DXContainerStreamerCtorTy Fn) { + T.DXContainerStreamerCtorFn = Fn; + } + static void RegisterWasmStreamer(Target &T, Target::WasmStreamerCtorTy Fn) { T.WasmStreamerCtorFn = Fn; } @@ -1362,7 +1405,6 @@ template struct RegisterMCCodeEmitter { private: static MCCodeEmitter *Allocator(const MCInstrInfo & /*II*/, - const MCRegisterInfo & /*MRI*/, MCContext & /*Ctx*/) { return new MCCodeEmitterImpl(); } diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h index c4be5312ea19..527dc766b739 100644 --- a/llvm/include/llvm/MCA/CustomBehaviour.h +++ b/llvm/include/llvm/MCA/CustomBehaviour.h @@ -49,6 +49,11 @@ public: /// scheduling model. virtual void postProcessInstruction(std::unique_ptr &Inst, const MCInst &MCI) {} + + // The resetState() method gets invoked at the beginning of each code region + // so that targets that override this function can clear any state that they + // have left from the previous code region. + virtual void resetState() {} }; /// Class which can be overriden by targets to enforce instruction diff --git a/llvm/include/llvm/MCA/IncrementalSourceMgr.h b/llvm/include/llvm/MCA/IncrementalSourceMgr.h new file mode 100644 index 000000000000..d91cc5f23311 --- /dev/null +++ b/llvm/include/llvm/MCA/IncrementalSourceMgr.h @@ -0,0 +1,92 @@ +//===---------------- IncrementalSourceMgr.h --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file contains IncrementalSourceMgr, an implementation of SourceMgr +/// that allows users to add new instructions incrementally / dynamically. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_INCREMENTALSOURCEMGR_H +#define LLVM_MCA_INCREMENTALSOURCEMGR_H + +#include "llvm/MCA/SourceMgr.h" +#include + +namespace llvm { +namespace mca { + +/// An implementation of \a SourceMgr that allows users to add new instructions +/// incrementally / dynamically. +/// Note that this SourceMgr takes ownership of all \a mca::Instruction. +class IncrementalSourceMgr : public SourceMgr { + /// Owner of all mca::Instruction instances. Note that we use std::deque here + /// to have a better throughput, in comparison to std::vector or + /// llvm::SmallVector, as they usually pay a higher re-allocation cost when + /// there is a large number of instructions. + std::deque InstStorage; + + /// Instructions that are ready to be used. Each of them is a pointer of an + /// \a UniqueInst inside InstStorage. + std::deque Staging; + + /// Current instruction index. + unsigned TotalCounter; + + /// End-of-stream flag. + bool EOS; + + /// Called when an instruction is no longer needed. + using InstFreedCallback = llvm::function_ref; + InstFreedCallback InstFreedCB; + +public: + IncrementalSourceMgr() : TotalCounter(0U), EOS(false) {} + + void clear(); + + /// Set a callback that is invoked when a mca::Instruction is + /// no longer needed. This is usually used for recycling the + /// instruction. + void setOnInstFreedCallback(InstFreedCallback CB) { InstFreedCB = CB; } + + ArrayRef getInstructions() const override { + llvm_unreachable("Not applicable"); + } + + bool hasNext() const override { return !Staging.empty(); } + bool isEnd() const override { return EOS; } + + SourceRef peekNext() const override { + assert(hasNext()); + return SourceRef(TotalCounter, *Staging.front()); + } + + /// Add a new instruction. + void addInst(UniqueInst &&Inst) { + InstStorage.emplace_back(std::move(Inst)); + Staging.push_back(InstStorage.back().get()); + } + + /// Add a recycled instruction. + void addRecycledInst(Instruction *Inst) { Staging.push_back(Inst); } + + void updateNext() override; + + /// Mark the end of instruction stream. + void endOfStream() { EOS = true; } + +#ifndef NDEBUG + /// Print statistic about instruction recycling stats. + void printStatistic(raw_ostream &OS); +#endif +}; + +} // end namespace mca +} // end namespace llvm + +#endif // LLVM_MCA_INCREMENTALSOURCEMGR_H diff --git a/llvm/include/llvm/MCA/InstrBuilder.h b/llvm/include/llvm/MCA/InstrBuilder.h index 04b5cf590d70..92b92a515db9 100644 --- a/llvm/include/llvm/MCA/InstrBuilder.h +++ b/llvm/include/llvm/MCA/InstrBuilder.h @@ -14,6 +14,7 @@ #ifndef LLVM_MCA_INSTRBUILDER_H #define LLVM_MCA_INSTRBUILDER_H +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -25,6 +26,27 @@ namespace llvm { namespace mca { +class RecycledInstErr : public ErrorInfo { + Instruction *RecycledInst; + +public: + static char ID; + + explicit RecycledInstErr(Instruction *Inst) : RecycledInst(Inst) {} + // Always need to carry an Instruction + RecycledInstErr() = delete; + + Instruction *getInst() const { return RecycledInst; } + + void log(raw_ostream &OS) const override { + OS << "Instruction is recycled\n"; + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } +}; + /// A builder class that knows how to construct Instruction objects. /// /// Every llvm-mca Instruction is described by an object of class InstrDesc. @@ -48,6 +70,10 @@ class InstrBuilder { bool FirstCallInst; bool FirstReturnInst; + using InstRecycleCallback = + llvm::function_ref; + InstRecycleCallback InstRecycleCB; + Expected createInstrDescImpl(const MCInst &MCI); Expected getOrCreateInstrDesc(const MCInst &MCI); @@ -69,6 +95,10 @@ public: FirstReturnInst = true; } + /// Set a callback which is invoked to retrieve a recycled mca::Instruction + /// or null if there isn't any. + void setInstRecycleCallback(InstRecycleCallback CB) { InstRecycleCB = CB; } + Expected> createInstruction(const MCInst &MCI); }; } // namespace mca diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h index 33e3c8a2e630..86f2d7ade161 100644 --- a/llvm/include/llvm/MCA/Instruction.h +++ b/llvm/include/llvm/MCA/Instruction.h @@ -472,17 +472,15 @@ struct InstrDesc { // subtarget when computing the reciprocal throughput. unsigned SchedClassID; - unsigned MayLoad : 1; - unsigned MayStore : 1; - unsigned HasSideEffects : 1; - unsigned BeginGroup : 1; - unsigned EndGroup : 1; - unsigned RetireOOO : 1; - // True if all buffered resources are in-order, and there is at least one // buffer which is a dispatch hazard (BufferSize = 0). unsigned MustIssueImmediately : 1; + // True if the corresponding mca::Instruction can be recycled. Currently only + // instructions that are neither variadic nor have any variant can be + // recycled. + unsigned IsRecyclable : 1; + // A zero latency instruction doesn't consume any scheduler resources. bool isZeroLatency() const { return !MaxLatency && Resources.empty(); } @@ -518,8 +516,16 @@ class InstructionBase { unsigned Opcode; // Flags used by the LSUnit. - bool IsALoadBarrier; - bool IsAStoreBarrier; + bool IsALoadBarrier : 1; + bool IsAStoreBarrier : 1; + // Flags copied from the InstrDesc and potentially modified by + // CustomBehaviour or (more likely) InstrPostProcess. + bool MayLoad : 1; + bool MayStore : 1; + bool HasSideEffects : 1; + bool BeginGroup : 1; + bool EndGroup : 1; + bool RetireOOO : 1; public: InstructionBase(const InstrDesc &D, const unsigned Opcode) @@ -568,7 +574,23 @@ public: // Returns true if this instruction is a candidate for move elimination. bool isOptimizableMove() const { return IsOptimizableMove; } void setOptimizableMove() { IsOptimizableMove = true; } - bool isMemOp() const { return Desc.MayLoad || Desc.MayStore; } + void clearOptimizableMove() { IsOptimizableMove = false; } + bool isMemOp() const { return MayLoad || MayStore; } + + // Getters and setters for general instruction flags. + void setMayLoad(bool newVal) { MayLoad = newVal; } + void setMayStore(bool newVal) { MayStore = newVal; } + void setHasSideEffects(bool newVal) { HasSideEffects = newVal; } + void setBeginGroup(bool newVal) { BeginGroup = newVal; } + void setEndGroup(bool newVal) { EndGroup = newVal; } + void setRetireOOO(bool newVal) { RetireOOO = newVal; } + + bool getMayLoad() const { return MayLoad; } + bool getMayStore() const { return MayStore; } + bool getHasSideEffects() const { return HasSideEffects; } + bool getBeginGroup() const { return BeginGroup; } + bool getEndGroup() const { return EndGroup; } + bool getRetireOOO() const { return RetireOOO; } }; /// An instruction propagated through the simulated instruction pipeline. @@ -628,6 +650,8 @@ public: UsedBuffers(D.UsedBuffers), CriticalRegDep(), CriticalMemDep(), CriticalResourceMask(0), IsEliminated(false) {} + void reset(); + unsigned getRCUTokenID() const { return RCUTokenID; } unsigned getLSUTokenID() const { return LSUTokenID; } void setLSUTokenID(unsigned LSUTok) { LSUTokenID = LSUTok; } @@ -657,6 +681,7 @@ public: bool updateDispatched(); bool updatePending(); + bool isInvalid() const { return Stage == IS_INVALID; } bool isDispatched() const { return Stage == IS_DISPATCHED; } bool isPending() const { return Stage == IS_PENDING; } bool isReady() const { return Stage == IS_READY; } diff --git a/llvm/include/llvm/MCA/Pipeline.h b/llvm/include/llvm/MCA/Pipeline.h index 0ac988c52dc1..92c3836124ad 100644 --- a/llvm/include/llvm/MCA/Pipeline.h +++ b/llvm/include/llvm/MCA/Pipeline.h @@ -51,6 +51,13 @@ class Pipeline { Pipeline(const Pipeline &P) = delete; Pipeline &operator=(const Pipeline &P) = delete; + enum class State { + Created, // Pipeline was just created. The default state. + Started, // Pipeline has started running. + Paused // Pipeline is paused. + }; + State CurrentState; + /// An ordered list of stages that define this instruction pipeline. SmallVector, 8> Stages; std::set Listeners; @@ -62,13 +69,16 @@ class Pipeline { void notifyCycleEnd(); public: - Pipeline() : Cycles(0) {} + Pipeline() : CurrentState(State::Created), Cycles(0) {} void appendStage(std::unique_ptr S); /// Returns the total number of simulated cycles. Expected run(); void addEventListener(HWEventListener *Listener); + + /// Returns whether the pipeline is currently paused. + bool isPaused() const { return CurrentState == State::Paused; } }; } // namespace mca } // namespace llvm diff --git a/llvm/include/llvm/MCA/SourceMgr.h b/llvm/include/llvm/MCA/SourceMgr.h index e844171bdcab..16a60d1116ad 100644 --- a/llvm/include/llvm/MCA/SourceMgr.h +++ b/llvm/include/llvm/MCA/SourceMgr.h @@ -6,9 +6,8 @@ // //===----------------------------------------------------------------------===// /// \file -/// This file implements class SourceMgr. Class SourceMgr abstracts the input -/// code sequence (a sequence of MCInst), and assings unique identifiers to -/// every instruction in the sequence. +/// This file contains abstract class SourceMgr and the default implementation, +/// CircularSourceMgr. /// //===----------------------------------------------------------------------===// @@ -25,30 +24,62 @@ namespace mca { // prevent compiler error C2139 about intrinsic type trait '__is_assignable'. typedef std::pair SourceRef; -class SourceMgr { +/// Abstracting the input code sequence (a sequence of MCInst) and assigning +/// unique identifiers to every instruction in the sequence. +struct SourceMgr { using UniqueInst = std::unique_ptr; + + /// Provides a fixed range of \a UniqueInst to iterate. + virtual ArrayRef getInstructions() const = 0; + + /// (Fixed) Number of \a UniqueInst. Returns the size of + /// \a getInstructions by default. + virtual size_t size() const { return getInstructions().size(); } + + /// Whether there is any \a SourceRef to inspect / peek next. + /// Note that returning false from this doesn't mean the instruction + /// stream has ended. + virtual bool hasNext() const = 0; + + /// Whether the instruction stream has eneded. + virtual bool isEnd() const = 0; + + /// The next \a SourceRef. + virtual SourceRef peekNext() const = 0; + + /// Advance to the next \a SourceRef. + virtual void updateNext() = 0; + + virtual ~SourceMgr() {} +}; + +/// The default implementation of \a SourceMgr. It always takes a fixed number +/// of instructions and provides an option to loop the given sequence for a +/// certain iterations. +class CircularSourceMgr : public SourceMgr { ArrayRef Sequence; unsigned Current; const unsigned Iterations; static const unsigned DefaultIterations = 100; public: - SourceMgr(ArrayRef S, unsigned Iter) - : Sequence(S), Current(0), Iterations(Iter ? Iter : DefaultIterations) {} + CircularSourceMgr(ArrayRef S, unsigned Iter) + : Sequence(S), Current(0U), Iterations(Iter ? Iter : DefaultIterations) {} + + ArrayRef getInstructions() const override { return Sequence; } unsigned getNumIterations() const { return Iterations; } - unsigned size() const { return Sequence.size(); } - bool hasNext() const { return Current < (Iterations * Sequence.size()); } - void updateNext() { ++Current; } + bool hasNext() const override { + return Current < (Iterations * Sequence.size()); + } + bool isEnd() const override { return !hasNext(); } - SourceRef peekNext() const { + SourceRef peekNext() const override { assert(hasNext() && "Already at end of sequence!"); return SourceRef(Current, *Sequence[Current % Sequence.size()]); } - using const_iterator = ArrayRef::const_iterator; - const_iterator begin() const { return Sequence.begin(); } - const_iterator end() const { return Sequence.end(); } + void updateNext() override { ++Current; } }; } // namespace mca diff --git a/llvm/include/llvm/MCA/Stages/EntryStage.h b/llvm/include/llvm/MCA/Stages/EntryStage.h index 4c50838bef4b..fb1244aa1933 100644 --- a/llvm/include/llvm/MCA/Stages/EntryStage.h +++ b/llvm/include/llvm/MCA/Stages/EntryStage.h @@ -30,7 +30,7 @@ class EntryStage final : public Stage { unsigned NumRetired; // Updates the program counter, and sets 'CurrentInstruction'. - void getNextInstruction(); + Error getNextInstruction(); EntryStage(const EntryStage &Other) = delete; EntryStage &operator=(const EntryStage &Other) = delete; @@ -42,6 +42,7 @@ public: bool hasWorkToComplete() const override; Error execute(InstRef &IR) override; Error cycleStart() override; + Error cycleResume() override; Error cycleEnd() override; }; diff --git a/llvm/include/llvm/MCA/Stages/Stage.h b/llvm/include/llvm/MCA/Stages/Stage.h index 84868e89ac29..2477b9b3d69c 100644 --- a/llvm/include/llvm/MCA/Stages/Stage.h +++ b/llvm/include/llvm/MCA/Stages/Stage.h @@ -48,6 +48,9 @@ public: /// phase to prepare for the executions during the cycle. virtual Error cycleStart() { return ErrorSuccess(); } + /// Called after the pipeline is resumed from pausing state. + virtual Error cycleResume() { return ErrorSuccess(); } + /// Called once at the end of each cycle. virtual Error cycleEnd() { return ErrorSuccess(); } @@ -82,6 +85,16 @@ public: } }; +/// This is actually not an error but a marker to indicate that +/// the instruction stream is paused. +struct InstStreamPause : public ErrorInfo { + static char ID; + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } + void log(raw_ostream &OS) const override { OS << "Stream is paused"; } +}; } // namespace mca } // namespace llvm #endif // LLVM_MCA_STAGES_STAGE_H diff --git a/llvm/include/llvm/ObjCopy/COFF/COFFConfig.h b/llvm/include/llvm/ObjCopy/COFF/COFFConfig.h new file mode 100644 index 000000000000..29d56d75698b --- /dev/null +++ b/llvm/include/llvm/ObjCopy/COFF/COFFConfig.h @@ -0,0 +1,27 @@ +//===- COFFConfig.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_COFF_COFFCONFIG_H +#define LLVM_OBJCOPY_COFF_COFFCONFIG_H + +#include "llvm/ADT/Optional.h" + +namespace llvm { +namespace objcopy { + +// Coff specific configuration for copying/stripping a single file. +struct COFFConfig { + Optional Subsystem; + Optional MajorSubsystemVersion; + Optional MinorSubsystemVersion; +}; + +} // namespace objcopy +} // namespace llvm + +#endif // LLVM_OBJCOPY_COFF_COFFCONFIG_H diff --git a/llvm/include/llvm/ObjCopy/COFF/COFFObjcopy.h b/llvm/include/llvm/ObjCopy/COFF/COFFObjcopy.h new file mode 100644 index 000000000000..d9043d6c5d01 --- /dev/null +++ b/llvm/include/llvm/ObjCopy/COFF/COFFObjcopy.h @@ -0,0 +1,36 @@ +//===- COFFObjcopy.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_COFF_COFFOBJCOPY_H +#define LLVM_OBJCOPY_COFF_COFFOBJCOPY_H + +namespace llvm { +class Error; +class raw_ostream; + +namespace object { +class COFFObjectFile; +} // end namespace object + +namespace objcopy { +struct CommonConfig; +struct COFFConfig; + +namespace coff { + +/// Apply the transformations described by \p Config and \p COFFConfig +/// to \p In and writes the result into \p Out. +/// \returns any Error encountered whilst performing the operation. +Error executeObjcopyOnBinary(const CommonConfig &Config, const COFFConfig &, + object::COFFObjectFile &In, raw_ostream &Out); + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_OBJCOPY_COFF_COFFOBJCOPY_H diff --git a/llvm/include/llvm/ObjCopy/CommonConfig.h b/llvm/include/llvm/ObjCopy/CommonConfig.h new file mode 100644 index 000000000000..24503caed342 --- /dev/null +++ b/llvm/include/llvm/ObjCopy/CommonConfig.h @@ -0,0 +1,271 @@ +//===- CommonConfig.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_COMMONCONFIG_H +#define LLVM_OBJCOPY_COMMONCONFIG_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Support/GlobPattern.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +// Necessary for llvm::DebugCompressionType::None +#include "llvm/Target/TargetOptions.h" +#include + +namespace llvm { +namespace objcopy { + +enum class FileFormat { + Unspecified, + ELF, + Binary, + IHex, +}; + +// This type keeps track of the machine info for various architectures. This +// lets us map architecture names to ELF types and the e_machine value of the +// ELF file. +struct MachineInfo { + MachineInfo(uint16_t EM, uint8_t ABI, bool Is64, bool IsLittle) + : EMachine(EM), OSABI(ABI), Is64Bit(Is64), IsLittleEndian(IsLittle) {} + // Alternative constructor that defaults to NONE for OSABI. + MachineInfo(uint16_t EM, bool Is64, bool IsLittle) + : MachineInfo(EM, ELF::ELFOSABI_NONE, Is64, IsLittle) {} + // Default constructor for unset fields. + MachineInfo() : MachineInfo(0, 0, false, false) {} + uint16_t EMachine; + uint8_t OSABI; + bool Is64Bit; + bool IsLittleEndian; +}; + +// Flags set by --set-section-flags or --rename-section. Interpretation of these +// is format-specific and not all flags are meaningful for all object file +// formats. This is a bitmask; many section flags may be set. +enum SectionFlag { + SecNone = 0, + SecAlloc = 1 << 0, + SecLoad = 1 << 1, + SecNoload = 1 << 2, + SecReadonly = 1 << 3, + SecDebug = 1 << 4, + SecCode = 1 << 5, + SecData = 1 << 6, + SecRom = 1 << 7, + SecMerge = 1 << 8, + SecStrings = 1 << 9, + SecContents = 1 << 10, + SecShare = 1 << 11, + SecExclude = 1 << 12, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/SecExclude) +}; + +struct SectionRename { + StringRef OriginalName; + StringRef NewName; + Optional NewFlags; +}; + +struct SectionFlagsUpdate { + StringRef Name; + SectionFlag NewFlags; +}; + +enum class DiscardType { + None, // Default + All, // --discard-all (-x) + Locals, // --discard-locals (-X) +}; + +enum class MatchStyle { + Literal, // Default for symbols. + Wildcard, // Default for sections, or enabled with --wildcard (-w). + Regex, // Enabled with --regex. +}; + +class NameOrPattern { + StringRef Name; + // Regex is shared between multiple CommonConfig instances. + std::shared_ptr R; + std::shared_ptr G; + bool IsPositiveMatch = true; + + NameOrPattern(StringRef N) : Name(N) {} + NameOrPattern(std::shared_ptr R) : R(R) {} + NameOrPattern(std::shared_ptr G, bool IsPositiveMatch) + : G(G), IsPositiveMatch(IsPositiveMatch) {} + +public: + // ErrorCallback is used to handle recoverable errors. An Error returned + // by the callback aborts the parsing and is then returned by this function. + static Expected + create(StringRef Pattern, MatchStyle MS, + llvm::function_ref ErrorCallback); + + bool isPositiveMatch() const { return IsPositiveMatch; } + Optional getName() const { + if (!R && !G) + return Name; + return None; + } + bool operator==(StringRef S) const { + return R ? R->match(S) : G ? G->match(S) : Name == S; + } + bool operator!=(StringRef S) const { return !operator==(S); } +}; + +// Matcher that checks symbol or section names against the command line flags +// provided for that option. +class NameMatcher { + DenseSet PosNames; + std::vector PosPatterns; + std::vector NegMatchers; + +public: + Error addMatcher(Expected Matcher) { + if (!Matcher) + return Matcher.takeError(); + if (Matcher->isPositiveMatch()) { + if (Optional MaybeName = Matcher->getName()) + PosNames.insert(CachedHashStringRef(*MaybeName)); + else + PosPatterns.push_back(std::move(*Matcher)); + } else { + NegMatchers.push_back(std::move(*Matcher)); + } + return Error::success(); + } + bool matches(StringRef S) const { + return (PosNames.contains(CachedHashStringRef(S)) || + is_contained(PosPatterns, S)) && + !is_contained(NegMatchers, S); + } + bool empty() const { + return PosNames.empty() && PosPatterns.empty() && NegMatchers.empty(); + } +}; + +enum class SymbolFlag { + Global, + Local, + Weak, + Default, + Hidden, + Protected, + File, + Section, + Object, + Function, + IndirectFunction, + Debug, + Constructor, + Warning, + Indirect, + Synthetic, + UniqueObject, +}; + +// Symbol info specified by --add-symbol option. Symbol flags not supported +// by a concrete format should be ignored. +struct NewSymbolInfo { + StringRef SymbolName; + StringRef SectionName; + uint64_t Value = 0; + std::vector Flags; + std::vector BeforeSyms; +}; + +// Specify section name and section body for newly added or updated section. +struct NewSectionInfo { + NewSectionInfo() = default; + NewSectionInfo(StringRef Name, std::unique_ptr &&Buffer) + : SectionName(Name), SectionData(std::move(Buffer)) {} + + StringRef SectionName; + std::shared_ptr SectionData; +}; + +// Configuration for copying/stripping a single file. +struct CommonConfig { + // Main input/output options + StringRef InputFilename; + FileFormat InputFormat = FileFormat::Unspecified; + StringRef OutputFilename; + FileFormat OutputFormat = FileFormat::Unspecified; + + // Only applicable when --output-format!=binary (e.g. elf64-x86-64). + Optional OutputArch; + + // Advanced options + StringRef AddGnuDebugLink; + // Cached gnu_debuglink's target CRC + uint32_t GnuDebugLinkCRC32; + Optional ExtractPartition; + StringRef SplitDWO; + StringRef SymbolsPrefix; + StringRef AllocSectionsPrefix; + DiscardType DiscardMode = DiscardType::None; + + // Repeated options + std::vector AddSection; + std::vector DumpSection; + std::vector UpdateSection; + + // Section matchers + NameMatcher KeepSection; + NameMatcher OnlySection; + NameMatcher ToRemove; + + // Symbol matchers + NameMatcher SymbolsToGlobalize; + NameMatcher SymbolsToKeep; + NameMatcher SymbolsToLocalize; + NameMatcher SymbolsToRemove; + NameMatcher UnneededSymbolsToRemove; + NameMatcher SymbolsToWeaken; + NameMatcher SymbolsToKeepGlobal; + + // Map options + StringMap SectionsToRename; + StringMap SetSectionAlignment; + StringMap SetSectionFlags; + StringMap SymbolsToRename; + + // Symbol info specified by --add-symbol option. + std::vector SymbolsToAdd; + + // Boolean options + bool DeterministicArchives = true; + bool ExtractDWO = false; + bool ExtractMainPartition = false; + bool OnlyKeepDebug = false; + bool PreserveDates = false; + bool StripAll = false; + bool StripAllGNU = false; + bool StripDWO = false; + bool StripDebug = false; + bool StripNonAlloc = false; + bool StripSections = false; + bool StripUnneeded = false; + bool Weaken = false; + bool DecompressDebugSections = false; + + DebugCompressionType CompressionType = DebugCompressionType::None; +}; + +} // namespace objcopy +} // namespace llvm + +#endif // LLVM_OBJCOPY_COMMONCONFIG_H diff --git a/llvm/include/llvm/ObjCopy/ConfigManager.h b/llvm/include/llvm/ObjCopy/ConfigManager.h new file mode 100644 index 000000000000..2962cf99b270 --- /dev/null +++ b/llvm/include/llvm/ObjCopy/ConfigManager.h @@ -0,0 +1,50 @@ +//===- ConfigManager.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_CONFIGMANAGER_H +#define LLVM_OBJCOPY_CONFIGMANAGER_H + +#include "llvm/ObjCopy/COFF/COFFConfig.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/ELF/ELFConfig.h" +#include "llvm/ObjCopy/MachO/MachOConfig.h" +#include "llvm/ObjCopy/MultiFormatConfig.h" +#include "llvm/ObjCopy/wasm/WasmConfig.h" +#include "llvm/ObjCopy/XCOFF/XCOFFConfig.h" + +namespace llvm { +namespace objcopy { + +struct ConfigManager : public MultiFormatConfig { + virtual ~ConfigManager() {} + + const CommonConfig &getCommonConfig() const override { return Common; } + + Expected getELFConfig() const override { return ELF; } + + Expected getCOFFConfig() const override; + + Expected getMachOConfig() const override; + + Expected getWasmConfig() const override; + + Expected getXCOFFConfig() const override; + + // All configs. + CommonConfig Common; + ELFConfig ELF; + COFFConfig COFF; + MachOConfig MachO; + WasmConfig Wasm; + XCOFFConfig XCOFF; +}; + +} // namespace objcopy +} // namespace llvm + +#endif // LLVM_OBJCOPY_CONFIGMANAGER_H diff --git a/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h b/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h new file mode 100644 index 000000000000..52bc728e36ff --- /dev/null +++ b/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h @@ -0,0 +1,38 @@ +//===- ELFConfig.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_ELF_ELFCONFIG_H +#define LLVM_OBJCOPY_ELF_ELFCONFIG_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ELFTypes.h" +#include + +namespace llvm { +namespace objcopy { + +// ELF specific configuration for copying/stripping a single file. +struct ELFConfig { + uint8_t NewSymbolVisibility = (uint8_t)ELF::STV_DEFAULT; + + // ELF entry point address expression. The input parameter is an entry point + // address in the input ELF file. The entry address in the output file is + // calculated with EntryExpr(input_address), when either --set-start or + // --change-start is used. + std::function EntryExpr; + + bool AllowBrokenLinks = false; + bool KeepFileSymbols = false; + bool LocalizeHidden = false; +}; + +} // namespace objcopy +} // namespace llvm + +#endif // LLVM_OBJCOPY_ELF_ELFCONFIG_H diff --git a/llvm/include/llvm/ObjCopy/ELF/ELFObjcopy.h b/llvm/include/llvm/ObjCopy/ELF/ELFObjcopy.h new file mode 100644 index 000000000000..552b6fb655f1 --- /dev/null +++ b/llvm/include/llvm/ObjCopy/ELF/ELFObjcopy.h @@ -0,0 +1,53 @@ +//===- ELFObjcopy.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_ELF_ELFOBJCOPY_H +#define LLVM_OBJCOPY_ELF_ELFOBJCOPY_H + +namespace llvm { +class Error; +class MemoryBuffer; +class raw_ostream; + +namespace object { +class ELFObjectFileBase; +} // end namespace object + +namespace objcopy { +struct CommonConfig; +struct ELFConfig; + +namespace elf { +/// Apply the transformations described by \p Config and \p ELFConfig to +/// \p In, which must represent an IHex file, and writes the result +/// into \p Out. +/// \returns any Error encountered whilst performing the operation. +Error executeObjcopyOnIHex(const CommonConfig &Config, + const ELFConfig &ELFConfig, MemoryBuffer &In, + raw_ostream &Out); + +/// Apply the transformations described by \p Config and \p ELFConfig to +/// \p In, which is treated as a raw binary input, and writes the result +/// into \p Out. +/// \returns any Error encountered whilst performing the operation. +Error executeObjcopyOnRawBinary(const CommonConfig &Config, + const ELFConfig &ELFConfig, MemoryBuffer &In, + raw_ostream &Out); + +/// Apply the transformations described by \p Config and \p ELFConfig to +/// \p In and writes the result into \p Out. +/// \returns any Error encountered whilst performing the operation. +Error executeObjcopyOnBinary(const CommonConfig &Config, + const ELFConfig &ELFConfig, + object::ELFObjectFileBase &In, raw_ostream &Out); + +} // end namespace elf +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_OBJCOPY_ELF_ELFOBJCOPY_H diff --git a/llvm/include/llvm/ObjCopy/MachO/MachOConfig.h b/llvm/include/llvm/ObjCopy/MachO/MachOConfig.h new file mode 100644 index 000000000000..c5f861363297 --- /dev/null +++ b/llvm/include/llvm/ObjCopy/MachO/MachOConfig.h @@ -0,0 +1,46 @@ +//===- MachOConfig.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_MACHO_MACHOCONFIG_H +#define LLVM_OBJCOPY_MACHO_MACHOCONFIG_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace llvm { +namespace objcopy { + +// Mach-O specific configuration for copying/stripping a single file. +struct MachOConfig { + // Repeated options + std::vector RPathToAdd; + std::vector RPathToPrepend; + DenseMap RPathsToUpdate; + DenseMap InstallNamesToUpdate; + DenseSet RPathsToRemove; + + // install-name-tool's id option + Optional SharedLibId; + + // Segments to remove if they are empty + DenseSet EmptySegmentsToRemove; + + // Boolean options + bool StripSwiftSymbols = false; + bool KeepUndefined = false; + + // install-name-tool's --delete_all_rpaths + bool RemoveAllRpaths = false; +}; + +} // namespace objcopy +} // namespace llvm + +#endif // LLVM_OBJCOPY_MACHO_MACHOCONFIG_H diff --git a/llvm/include/llvm/ObjCopy/MachO/MachOObjcopy.h b/llvm/include/llvm/ObjCopy/MachO/MachOObjcopy.h new file mode 100644 index 000000000000..73690d7ace8a --- /dev/null +++ b/llvm/include/llvm/ObjCopy/MachO/MachOObjcopy.h @@ -0,0 +1,45 @@ +//===- MachOObjcopy.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_MACHO_MACHOOBJCOPY_H +#define LLVM_OBJCOPY_MACHO_MACHOOBJCOPY_H + +namespace llvm { +class Error; +class raw_ostream; + +namespace object { +class MachOObjectFile; +class MachOUniversalBinary; +} // end namespace object + +namespace objcopy { +struct CommonConfig; +struct MachOConfig; +class MultiFormatConfig; + +namespace macho { +/// Apply the transformations described by \p Config and \p MachOConfig to +/// \p In and writes the result into \p Out. +/// \returns any Error encountered whilst performing the operation. +Error executeObjcopyOnBinary(const CommonConfig &Config, + const MachOConfig &MachOConfig, + object::MachOObjectFile &In, raw_ostream &Out); + +/// Apply the transformations described by \p Config and \p MachOConfig to +/// \p In and writes the result into \p Out. +/// \returns any Error encountered whilst performing the operation. +Error executeObjcopyOnMachOUniversalBinary( + const MultiFormatConfig &Config, const object::MachOUniversalBinary &In, + raw_ostream &Out); + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_OBJCOPY_MACHO_MACHOOBJCOPY_H diff --git a/llvm/include/llvm/ObjCopy/MultiFormatConfig.h b/llvm/include/llvm/ObjCopy/MultiFormatConfig.h new file mode 100644 index 000000000000..180f2f82a908 --- /dev/null +++ b/llvm/include/llvm/ObjCopy/MultiFormatConfig.h @@ -0,0 +1,39 @@ +//===- MultiFormatConfig.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_MULTIFORMATCONFIG_H +#define LLVM_OBJCOPY_MULTIFORMATCONFIG_H + +#include "llvm/Support/Error.h" + +namespace llvm { +namespace objcopy { + +struct CommonConfig; +struct ELFConfig; +struct COFFConfig; +struct MachOConfig; +struct WasmConfig; +struct XCOFFConfig; + +class MultiFormatConfig { +public: + virtual ~MultiFormatConfig() {} + + virtual const CommonConfig &getCommonConfig() const = 0; + virtual Expected getELFConfig() const = 0; + virtual Expected getCOFFConfig() const = 0; + virtual Expected getMachOConfig() const = 0; + virtual Expected getWasmConfig() const = 0; + virtual Expected getXCOFFConfig() const = 0; +}; + +} // namespace objcopy +} // namespace llvm + +#endif // LLVM_OBJCOPY_MULTIFORMATCONFIG_H diff --git a/llvm/include/llvm/ObjCopy/ObjCopy.h b/llvm/include/llvm/ObjCopy/ObjCopy.h new file mode 100644 index 000000000000..023814002c72 --- /dev/null +++ b/llvm/include/llvm/ObjCopy/ObjCopy.h @@ -0,0 +1,42 @@ +//===- ObjCopy.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_OBJCOPY_H +#define LLVM_OBJCOPY_OBJCOPY_H + +#include "llvm/Support/Error.h" + +namespace llvm { +class raw_ostream; + +namespace object { +class Archive; +class Binary; +} // end namespace object + +namespace objcopy { +class MultiFormatConfig; + +/// Applies the transformations described by \p Config to +/// each member in archive \p Ar. +/// Writes a result in a file specified by \p Config.OutputFilename. +/// \returns any Error encountered whilst performing the operation. +Error executeObjcopyOnArchive(const MultiFormatConfig &Config, + const object::Archive &Ar); + +/// Applies the transformations described by \p Config to \p In and writes +/// the result into \p Out. This function does the dispatch based on the +/// format of the input binary (COFF, ELF, MachO or wasm). +/// \returns any Error encountered whilst performing the operation. +Error executeObjcopyOnBinary(const MultiFormatConfig &Config, + object::Binary &In, raw_ostream &Out); + +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_OBJCOPY_OBJCOPY_H diff --git a/llvm/include/llvm/ObjCopy/XCOFF/XCOFFConfig.h b/llvm/include/llvm/ObjCopy/XCOFF/XCOFFConfig.h new file mode 100644 index 000000000000..adaeedc82b73 --- /dev/null +++ b/llvm/include/llvm/ObjCopy/XCOFF/XCOFFConfig.h @@ -0,0 +1,21 @@ +//===- XCOFFConfig.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_XCOFF_XCOFFCONFIG_H +#define LLVM_OBJCOPY_XCOFF_XCOFFCONFIG_H + +namespace llvm { +namespace objcopy { + +// XCOFF specific configuration for copying/stripping a single file. +struct XCOFFConfig {}; + +} // namespace objcopy +} // namespace llvm + +#endif // LLVM_OBJCOPY_XCOFF_XCOFFCONFIG_H diff --git a/llvm/include/llvm/ObjCopy/XCOFF/XCOFFObjcopy.h b/llvm/include/llvm/ObjCopy/XCOFF/XCOFFObjcopy.h new file mode 100644 index 000000000000..9fc85cb39fa5 --- /dev/null +++ b/llvm/include/llvm/ObjCopy/XCOFF/XCOFFObjcopy.h @@ -0,0 +1,35 @@ +//===- XCOFFObjcopy.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_XCOFF_XCOFFOBJCOPY_H +#define LLVM_OBJCOPY_XCOFF_XCOFFOBJCOPY_H + +namespace llvm { +class Error; +class raw_ostream; + +namespace object { +class XCOFFObjectFile; +} // end namespace object + +namespace objcopy { +struct CommonConfig; +struct XCOFFConfig; + +namespace xcoff { +/// Apply the transformations described by \p Config and \p XCOFFConfig +/// to \p In and writes the result into \p Out. +/// \returns any Error encountered whilst performing the operation. +Error executeObjcopyOnBinary(const CommonConfig &Config, const XCOFFConfig &, + object::XCOFFObjectFile &In, raw_ostream &Out); + +} // end namespace xcoff +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_OBJCOPY_XCOFF_XCOFFOBJCOPY_H diff --git a/llvm/include/llvm/ObjCopy/wasm/WasmConfig.h b/llvm/include/llvm/ObjCopy/wasm/WasmConfig.h new file mode 100644 index 000000000000..56a7055da9a7 --- /dev/null +++ b/llvm/include/llvm/ObjCopy/wasm/WasmConfig.h @@ -0,0 +1,21 @@ +//===- WasmConfig.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_WASM_WASMCONFIG_H +#define LLVM_OBJCOPY_WASM_WASMCONFIG_H + +namespace llvm { +namespace objcopy { + +// Wasm specific configuration for copying/stripping a single file. +struct WasmConfig {}; + +} // namespace objcopy +} // namespace llvm + +#endif // LLVM_OBJCOPY_WASM_WASMCONFIG_H diff --git a/llvm/include/llvm/ObjCopy/wasm/WasmObjcopy.h b/llvm/include/llvm/ObjCopy/wasm/WasmObjcopy.h new file mode 100644 index 000000000000..5b4181c22b97 --- /dev/null +++ b/llvm/include/llvm/ObjCopy/wasm/WasmObjcopy.h @@ -0,0 +1,35 @@ +//===- WasmObjcopy.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_WASM_WASMOBJCOPY_H +#define LLVM_OBJCOPY_WASM_WASMOBJCOPY_H + +namespace llvm { +class Error; +class raw_ostream; + +namespace object { +class WasmObjectFile; +} // end namespace object + +namespace objcopy { +struct CommonConfig; +struct WasmConfig; + +namespace wasm { +/// Apply the transformations described by \p Config and \p WasmConfig +/// to \p In and writes the result into \p Out. +/// \returns any Error encountered whilst performing the operation. +Error executeObjcopyOnBinary(const CommonConfig &Config, const WasmConfig &, + object::WasmObjectFile &In, raw_ostream &Out); + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_OBJCOPY_WASM_WASMOBJCOPY_H diff --git a/llvm/include/llvm/Object/Archive.h b/llvm/include/llvm/Object/Archive.h index b792cbc3d9ac..a36c9bd6163b 100644 --- a/llvm/include/llvm/Object/Archive.h +++ b/llvm/include/llvm/Object/Archive.h @@ -13,7 +13,6 @@ #ifndef LLVM_OBJECT_ARCHIVE_H #define LLVM_OBJECT_ARCHIVE_H -#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/fallible_iterator.h" #include "llvm/ADT/iterator_range.h" @@ -22,7 +21,6 @@ #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" -#include #include #include #include @@ -30,6 +28,9 @@ #include namespace llvm { + +template class Optional; + namespace object { const char ArchiveMagic[] = "!\n"; @@ -339,6 +340,7 @@ public: Kind kind() const { return (Kind)Format; } bool isThin() const { return IsThin; } + static object::Archive::Kind getDefaultKindForHost(); child_iterator child_begin(Error &Err, bool SkipInternal = true) const; child_iterator child_end() const; @@ -358,7 +360,7 @@ public: // check if a symbol is in the archive Expected> findSym(StringRef name) const; - bool isEmpty() const; + virtual bool isEmpty() const; bool hasSymbolTable() const; StringRef getSymbolTable() const { return SymbolTable; } StringRef getStringTable() const { return StringTable; } @@ -390,6 +392,7 @@ private: }; class BigArchive : public Archive { +public: /// Fixed-Length Header. struct FixLenHdr { char Magic[sizeof(BigArchiveMagic) - 1]; ///< Big archive magic string. @@ -410,6 +413,9 @@ public: BigArchive(MemoryBufferRef Source, Error &Err); uint64_t getFirstChildOffset() const override { return FirstChildOffset; } uint64_t getLastChildOffset() const { return LastChildOffset; } + bool isEmpty() const override { + return Data.getBufferSize() == sizeof(FixLenHdr); + }; }; } // end namespace object diff --git a/llvm/include/llvm/Object/ArchiveWriter.h b/llvm/include/llvm/Object/ArchiveWriter.h index 7eaf13e8fb22..6acab45215da 100644 --- a/llvm/include/llvm/Object/ArchiveWriter.h +++ b/llvm/include/llvm/Object/ArchiveWriter.h @@ -26,6 +26,11 @@ struct NewArchiveMember { NewArchiveMember() = default; NewArchiveMember(MemoryBufferRef BufRef); + // Detect the archive format from the object or bitcode file. This helps + // assume the archive format when creating or editing archives in the case + // one isn't explicitly set. + object::Archive::Kind detectKindFromObject() const; + static Expected getOldMember(const object::Archive::Child &OldMember, bool Deterministic); diff --git a/llvm/include/llvm/Object/Binary.h b/llvm/include/llvm/Object/Binary.h index a8f4437d5dbb..53b299ae8612 100644 --- a/llvm/include/llvm/Object/Binary.h +++ b/llvm/include/llvm/Object/Binary.h @@ -16,9 +16,9 @@ #include "llvm-c/Types.h" #include "llvm/ADT/Triple.h" #include "llvm/Object/Error.h" +#include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" -#include #include #include @@ -50,6 +50,8 @@ protected: ID_WinRes, // Windows resource (.res) file. + ID_Offload, // Offloading binary file. + // Object and children. ID_StartObjects, ID_COFF, @@ -133,6 +135,8 @@ public: bool isWasm() const { return TypeID == ID_Wasm; } + bool isOffloadFile() const { return TypeID == ID_Offload; } + bool isCOFFImportFile() const { return TypeID == ID_COFFImportFile; } diff --git a/llvm/include/llvm/Object/COFF.h b/llvm/include/llvm/Object/COFF.h index 3add3811069b..0b6975b9590f 100644 --- a/llvm/include/llvm/Object/COFF.h +++ b/llvm/include/llvm/Object/COFF.h @@ -1079,13 +1079,15 @@ public: uint64_t getImageBase() const; Error getVaPtr(uint64_t VA, uintptr_t &Res) const; - Error getRvaPtr(uint32_t Rva, uintptr_t &Res) const; + Error getRvaPtr(uint32_t Rva, uintptr_t &Res, + const char *ErrorContext = nullptr) const; /// Given an RVA base and size, returns a valid array of bytes or an error /// code if the RVA and size is not contained completely within a valid /// section. Error getRvaAndSizeAsBytes(uint32_t RVA, uint32_t Size, - ArrayRef &Contents) const; + ArrayRef &Contents, + const char *ErrorContext = nullptr) const; Error getHintName(uint32_t Rva, uint16_t &Hint, StringRef &Name) const; @@ -1296,6 +1298,12 @@ struct FpoData { frame_type getFP() const { return static_cast(Attributes >> 14); } }; +class SectionStrippedError + : public ErrorInfo { +public: + SectionStrippedError() { setErrorCode(object_error::section_stripped); } +}; + } // end namespace object } // end namespace llvm diff --git a/llvm/include/llvm/Object/COFFImportFile.h b/llvm/include/llvm/Object/COFFImportFile.h index 0da0d8fa70c9..f8f0e0343b22 100644 --- a/llvm/include/llvm/Object/COFFImportFile.h +++ b/llvm/include/llvm/Object/COFFImportFile.h @@ -18,10 +18,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/Object/COFF.h" -#include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/raw_ostream.h" namespace llvm { diff --git a/llvm/include/llvm/Object/COFFModuleDefinition.h b/llvm/include/llvm/Object/COFFModuleDefinition.h index fb3d0952e3a3..8e14dd61472d 100644 --- a/llvm/include/llvm/Object/COFFModuleDefinition.h +++ b/llvm/include/llvm/Object/COFFModuleDefinition.h @@ -18,7 +18,7 @@ #ifndef LLVM_OBJECT_COFFMODULEDEFINITION_H #define LLVM_OBJECT_COFFMODULEDEFINITION_H -#include "llvm/Object/COFF.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/COFFImportFile.h" namespace llvm { diff --git a/llvm/include/llvm/Object/DXContainer.h b/llvm/include/llvm/Object/DXContainer.h new file mode 100644 index 000000000000..7aa7d8ecf4c7 --- /dev/null +++ b/llvm/include/llvm/Object/DXContainer.h @@ -0,0 +1,124 @@ +//===- DXContainer.h - DXContainer file implementation ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the DXContainerFile class, which implements the ObjectFile +// interface for DXContainer files. +// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_DXCONTAINER_H +#define LLVM_OBJECT_DXCONTAINER_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/DXContainer.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBufferRef.h" + +namespace llvm { +namespace object { +class DXContainer { +public: + using DXILData = std::pair; + +private: + DXContainer(MemoryBufferRef O); + + MemoryBufferRef Data; + dxbc::Header Header; + SmallVector PartOffsets; + Optional DXIL; + + Error parseHeader(); + Error parsePartOffsets(); + Error parseDXILHeader(uint32_t Offset); + friend class PartIterator; + +public: + // The PartIterator is a wrapper around the iterator for the PartOffsets + // member of the DXContainer. It contains a refernce to the container, and the + // current iterator value, as well as storage for a parsed part header. + class PartIterator { + const DXContainer &Container; + SmallVectorImpl::const_iterator OffsetIt; + struct PartData { + dxbc::PartHeader Part; + uint32_t Offset; + StringRef Data; + } IteratorState; + + friend class DXContainer; + + PartIterator(const DXContainer &C, + SmallVectorImpl::const_iterator It) + : Container(C), OffsetIt(It) { + if (OffsetIt == Container.PartOffsets.end()) + updateIteratorImpl(Container.PartOffsets.back()); + else + updateIterator(); + } + + // Updates the iterator's state data. This results in copying the part + // header into the iterator and handling any required byte swapping. This is + // called when incrementing or decrementing the iterator. + void updateIterator() { + if (OffsetIt != Container.PartOffsets.end()) + updateIteratorImpl(*OffsetIt); + } + + // Implementation for updating the iterator state based on a specified + // offest. + void updateIteratorImpl(const uint32_t Offset); + + public: + PartIterator &operator++() { + if (OffsetIt == Container.PartOffsets.end()) + return *this; + ++OffsetIt; + updateIterator(); + return *this; + } + + PartIterator operator++(int) { + PartIterator Tmp = *this; + ++(*this); + return Tmp; + } + + bool operator==(const PartIterator &RHS) const { + return OffsetIt == RHS.OffsetIt; + } + + bool operator!=(const PartIterator &RHS) const { + return OffsetIt != RHS.OffsetIt; + } + + const PartData &operator*() { return IteratorState; } + const PartData *operator->() { return &IteratorState; } + }; + + PartIterator begin() const { + return PartIterator(*this, PartOffsets.begin()); + } + + PartIterator end() const { return PartIterator(*this, PartOffsets.end()); } + + StringRef getData() const { return Data.getBuffer(); } + static Expected create(MemoryBufferRef Object); + + const dxbc::Header &getHeader() const { return Header; } + + Optional getDXIL() const { return DXIL; } +}; + +} // namespace object +} // namespace llvm + +#endif // LLVM_OBJECT_DXCONTAINERFILE_H diff --git a/llvm/include/llvm/Object/Decompressor.h b/llvm/include/llvm/Object/Decompressor.h index cc918481b308..e04ee3c3e4c0 100644 --- a/llvm/include/llvm/Object/Decompressor.h +++ b/llvm/include/llvm/Object/Decompressor.h @@ -9,13 +9,15 @@ #ifndef LLVM_OBJECT_DECOMPRESSOR_H #define LLVM_OBJECT_DECOMPRESSOR_H -#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Error.h" namespace llvm { namespace object { +class SectionRef; + /// Decompressor helps to handle decompression of compressed sections. class Decompressor { public: diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index 37f23c435ae1..1a59ba94098f 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -855,7 +855,7 @@ Expected ELFFile::getSymbolVersionByIndex( const VersionEntry &Entry = *VersionMap[VersionIndex]; // A default version (@@) is only available for defined symbols. - if (!Entry.IsVerDef || IsSymHidden.getValueOr(false)) + if (!Entry.IsVerDef || IsSymHidden.value_or(false)) IsDefault = false; else IsDefault = !(SymbolVersionIndex & llvm::ELF::VERSYM_HIDDEN); diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index e2d2784d4f23..c449a3dafc0c 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -15,7 +15,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" @@ -27,19 +26,21 @@ #include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" -#include "llvm/Support/ARMAttributeParser.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/ELFAttributeParser.h" #include "llvm/Support/ELFAttributes.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/ScopedPrinter.h" #include #include -#include namespace llvm { + +template class SmallVectorImpl; + namespace object { constexpr int NumElfSymbolTypes = 16; @@ -101,6 +102,12 @@ public: /// Returns a vector containing a symbol version for each dynamic symbol. /// Returns an empty vector if version sections do not exist. Expected> readDynsymVersions() const; + + /// Returns a vector of all BB address maps in the object file. When + // `TextSectionIndex` is specified, only returns the BB address maps + // corresponding to the section with that index. + Expected> + readBBAddrMap(Optional TextSectionIndex = None) const; }; class ELFSectionRef : public SectionRef { @@ -1167,7 +1174,7 @@ uint8_t ELFObjectFile::getBytesInAddress() const { template StringRef ELFObjectFile::getFileFormatName() const { - bool IsLittleEndian = ELFT::TargetEndianness == support::little; + constexpr bool IsLittleEndian = ELFT::TargetEndianness == support::little; switch (EF.getHeader().e_ident[ELF::EI_CLASS]) { case ELF::ELFCLASS32: switch (EF.getHeader().e_machine) { @@ -1202,6 +1209,8 @@ StringRef ELFObjectFile::getFileFormatName() const { return "elf32-sparc"; case ELF::EM_AMDGPU: return "elf32-amdgpu"; + case ELF::EM_LOONGARCH: + return "elf32-loongarch"; default: return "elf32-unknown"; } @@ -1229,6 +1238,8 @@ StringRef ELFObjectFile::getFileFormatName() const { return "elf64-bpf"; case ELF::EM_VE: return "elf64-ve"; + case ELF::EM_LOONGARCH: + return "elf64-loongarch"; default: return "elf64-unknown"; } @@ -1313,6 +1324,17 @@ template Triple::ArchType ELFObjectFile::getArch() const { return Triple::ve; case ELF::EM_CSKY: return Triple::csky; + + case ELF::EM_LOONGARCH: + switch (EF.getHeader().e_ident[ELF::EI_CLASS]) { + case ELF::ELFCLASS32: + return Triple::loongarch32; + case ELF::ELFCLASS64: + return Triple::loongarch64; + default: + report_fatal_error("Invalid ELFCLASS!"); + } + default: return Triple::UnknownArch; } diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index c674b80c814d..5942b6f1d0a1 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -812,8 +812,20 @@ struct BBAddrMap { : Offset(Offset), Size(Size), HasReturn(Metadata & 1), HasTailCall(Metadata & (1 << 1)), IsEHPad(Metadata & (1 << 2)), CanFallThrough(Metadata & (1 << 3)){}; + + bool operator==(const BBEntry &Other) const { + return Offset == Other.Offset && Size == Other.Size && + HasReturn == Other.HasReturn && HasTailCall == Other.HasTailCall && + IsEHPad == Other.IsEHPad && CanFallThrough == Other.CanFallThrough; + } }; std::vector BBEntries; // Basic block entries for this function. + + // Equality operator for unit testing. + bool operator==(const BBAddrMap &Other) const { + return Addr == Other.Addr && std::equal(BBEntries.begin(), BBEntries.end(), + Other.BBEntries.begin()); + } }; } // end namespace object. diff --git a/llvm/include/llvm/Object/Error.h b/llvm/include/llvm/Object/Error.h index af334fc42658..8875fb6e1a20 100644 --- a/llvm/include/llvm/Object/Error.h +++ b/llvm/include/llvm/Object/Error.h @@ -34,6 +34,7 @@ enum class object_error { invalid_section_index, bitcode_section_not_found, invalid_symbol_index, + section_stripped, }; inline std::error_code make_error_code(object_error e) { diff --git a/llvm/include/llvm/Object/IRObjectFile.h b/llvm/include/llvm/Object/IRObjectFile.h index db47960237a0..6b3f2cd5671c 100644 --- a/llvm/include/llvm/Object/IRObjectFile.h +++ b/llvm/include/llvm/Object/IRObjectFile.h @@ -13,7 +13,6 @@ #ifndef LLVM_OBJECT_IROBJECTFILE_H #define LLVM_OBJECT_IROBJECTFILE_H -#include "llvm/ADT/PointerUnion.h" #include "llvm/Object/IRSymtab.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Object/SymbolicFile.h" diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h index 49a0706b84be..4ec366055db6 100644 --- a/llvm/include/llvm/Object/MachO.h +++ b/llvm/include/llvm/Object/MachO.h @@ -260,6 +260,124 @@ private: }; using bind_iterator = content_iterator; +/// ChainedFixupTarget holds all the information about an external symbol +/// necessary to bind this binary to that symbol. These values are referenced +/// indirectly by chained fixup binds. This structure captures values from all +/// import and symbol formats. +/// +/// Be aware there are two notions of weak here: +/// WeakImport == true +/// The associated bind may be set to 0 if this symbol is missing from its +/// parent library. This is called a "weak import." +/// LibOrdinal == BIND_SPECIAL_DYLIB_WEAK_LOOKUP +/// This symbol may be coalesced with other libraries vending the same +/// symbol. E.g., C++'s "operator new". This is called a "weak bind." +struct ChainedFixupTarget { +public: + ChainedFixupTarget(int LibOrdinal, StringRef Symbol, uint64_t Addend, + bool WeakImport) + : LibOrdinal(LibOrdinal), SymbolName(Symbol), Addend(Addend), + WeakImport(WeakImport) {} + + int libOrdinal() { return LibOrdinal; } + StringRef symbolName() { return SymbolName; } + uint64_t addend() { return Addend; } + bool weakImport() { return WeakImport; } + bool weakBind() { + return LibOrdinal == MachO::BIND_SPECIAL_DYLIB_WEAK_LOOKUP; + } + +private: + int LibOrdinal; + StringRef SymbolName; + uint64_t Addend; + bool WeakImport; +}; + +/// MachOAbstractFixupEntry is an abstract class representing a fixup in a +/// MH_DYLDLINK file. Fixups generally represent rebases and binds. Binds also +/// subdivide into additional subtypes (weak, lazy, reexport). +/// +/// The two concrete subclasses of MachOAbstractFixupEntry are: +/// +/// MachORebaseBindEntry - for dyld opcode-based tables, including threaded- +/// rebase, where rebases are mixed in with other +/// bind opcodes. +/// MachOChainedFixupEntry - for pointer chains embedded in data pages. +class MachOAbstractFixupEntry { +public: + MachOAbstractFixupEntry(Error *Err, const MachOObjectFile *O); + + int32_t segmentIndex() const; + uint64_t segmentOffset() const; + uint64_t segmentAddress() const; + StringRef segmentName() const; + StringRef sectionName() const; + StringRef typeName() const; + StringRef symbolName() const; + uint32_t flags() const; + int64_t addend() const; + int ordinal() const; + + /// \return the location of this fixup as a VM Address. For the VM + /// Address this fixup is pointing to, use pointerValue(). + uint64_t address() const; + + /// \return the VM Address pointed to by this fixup. Use + /// pointerValue() to compare against other VM Addresses, such as + /// section addresses or segment vmaddrs. + uint64_t pointerValue() const { return PointerValue; } + + /// \return the raw "on-disk" representation of the fixup. For + /// Threaded rebases and Chained pointers these values are generally + /// encoded into various different pointer formats. This value is + /// exposed in API for tools that want to display and annotate the + /// raw bits. + uint64_t rawValue() const { return RawValue; } + + void moveNext(); + +protected: + Error *E; + const MachOObjectFile *O; + uint64_t SegmentOffset = 0; + int32_t SegmentIndex = -1; + StringRef SymbolName; + int32_t Ordinal = 0; + uint32_t Flags = 0; + int64_t Addend = 0; + uint64_t PointerValue = 0; + uint64_t RawValue = 0; + bool Done = false; + + void moveToFirst(); + void moveToEnd(); + + /// \return the vm address of the start of __TEXT segment. + uint64_t textAddress() const { return TextAddress; } + +private: + uint64_t TextAddress; +}; + +class MachOChainedFixupEntry : public MachOAbstractFixupEntry { +public: + enum class FixupKind { All, Bind, WeakBind, Rebase }; + + MachOChainedFixupEntry(Error *Err, const MachOObjectFile *O, bool Parse); + + bool operator==(const MachOChainedFixupEntry &) const; + + void moveNext(); + void moveToFirst(); + void moveToEnd(); + +private: + std::vector FixupTargets; + uint32_t FixupIndex = 0; +}; +using fixup_iterator = content_iterator; + class MachOObjectFile : public ObjectFile { public: struct LoadCommandInfo { @@ -273,6 +391,8 @@ public: create(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits, uint32_t UniversalCputype = 0, uint32_t UniversalIndex = 0); + static bool isMachOPairedReloc(uint64_t RelocType, uint64_t Arch); + void moveSymbolNext(DataRefImpl &Symb) const override; uint64_t getNValue(DataRefImpl Sym) const; @@ -402,6 +522,9 @@ public: /// For use iterating over all bind table entries. iterator_range bindTable(Error &Err); + /// For iterating over all chained fixups. + iterator_range fixupTable(Error &Err); + /// For use iterating over all lazy bind table entries. iterator_range lazyBindTable(Error &Err); @@ -562,7 +685,12 @@ public: ArrayRef getDyldInfoBindOpcodes() const; ArrayRef getDyldInfoWeakBindOpcodes() const; ArrayRef getDyldInfoLazyBindOpcodes() const; + /// If the optional is None, no header was found, but the object was well-formed. + Expected> + getChainedFixupsHeader() const; + Expected> getDyldChainedFixupTargets() const; ArrayRef getDyldInfoExportsTrie() const; + SmallVector getFunctionStarts() const; ArrayRef getUuid() const; StringRef getStringTableData() const; @@ -689,6 +817,8 @@ private: const char *DataInCodeLoadCmd = nullptr; const char *LinkOptHintsLoadCmd = nullptr; const char *DyldInfoLoadCmd = nullptr; + const char *FuncStartsLoadCmd = nullptr; + const char *DyldChainedFixupsLoadCmd = nullptr; const char *UuidLoadCmd = nullptr; bool HasPageZeroSegment = false; }; diff --git a/llvm/include/llvm/Object/MachOUniversal.h b/llvm/include/llvm/Object/MachOUniversal.h index e87eb31aad4e..4fe7a68d9680 100644 --- a/llvm/include/llvm/Object/MachOUniversal.h +++ b/llvm/include/llvm/Object/MachOUniversal.h @@ -16,7 +16,6 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/MachO.h" -#include "llvm/Object/Archive.h" #include "llvm/Object/Binary.h" #include "llvm/Object/MachO.h" @@ -25,6 +24,7 @@ class StringRef; class LLVMContext; namespace object { +class Archive; class IRObjectFile; class MachOUniversalBinary : public Binary { diff --git a/llvm/include/llvm/Object/MachOUniversalWriter.h b/llvm/include/llvm/Object/MachOUniversalWriter.h index 8d095766cf48..4004f25f3fb7 100644 --- a/llvm/include/llvm/Object/MachOUniversalWriter.h +++ b/llvm/include/llvm/Object/MachOUniversalWriter.h @@ -14,15 +14,22 @@ #ifndef LLVM_OBJECT_MACHOUNIVERSALWRITER_H #define LLVM_OBJECT_MACHOUNIVERSALWRITER_H -#include "llvm/Object/Archive.h" -#include "llvm/Object/Binary.h" -#include "llvm/Object/MachO.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Error.h" +#include +#include namespace llvm { class LLVMContext; namespace object { +class Archive; +class Binary; class IRObjectFile; +class MachOObjectFile; class Slice { const Binary *B; diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h index 950c38a599d5..8754c229bd4b 100644 --- a/llvm/include/llvm/Object/ObjectFile.h +++ b/llvm/include/llvm/Object/ObjectFile.h @@ -13,7 +13,8 @@ #ifndef LLVM_OBJECT_OBJECTFILE_H #define LLVM_OBJECT_OBJECTFILE_H -#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" @@ -24,11 +25,10 @@ #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Error.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" #include #include #include -#include namespace llvm { @@ -170,11 +170,11 @@ class SymbolRef : public BasicSymbolRef { public: enum Type { ST_Unknown, // Type not specified + ST_Other, ST_Data, ST_Debug, ST_File, ST_Function, - ST_Other }; SymbolRef() = default; @@ -350,6 +350,11 @@ public: /// True if this is a relocatable object (.o/.obj). virtual bool isRelocatableObject() const = 0; + /// True if the reflection section can be stripped by the linker. + bool isReflectionSectionStrippable( + llvm::binaryformat::Swift5ReflectionSectionKind ReflectionSectionKind) + const; + /// @returns Pointer to ObjectFile subclass to handle this type of object. /// @param ObjectPath The path to the object file. ObjectPath.isObject must /// return true. diff --git a/llvm/include/llvm/Object/OffloadBinary.h b/llvm/include/llvm/Object/OffloadBinary.h new file mode 100644 index 000000000000..5afc3ed295ae --- /dev/null +++ b/llvm/include/llvm/Object/OffloadBinary.h @@ -0,0 +1,169 @@ +//===--- Offloading.h - Utilities for handling offloading code -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the binary format used for budingling device metadata with +// an associated device image. The data can then be stored inside a host object +// file to create a fat binary and read by the linker. This is intended to be a +// thin wrapper around the image itself. If this format becomes sufficiently +// complex it should be moved to a standard binary format like msgpack or ELF. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_OFFLOADING_H +#define LLVM_BINARYFORMAT_OFFLOADING_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Binary.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include + +namespace llvm { + +namespace object { + +/// The producer of the associated offloading image. +enum OffloadKind : uint16_t { + OFK_None = 0, + OFK_OpenMP, + OFK_Cuda, + OFK_HIP, + OFK_LAST, +}; + +/// The type of contents the offloading image contains. +enum ImageKind : uint16_t { + IMG_None = 0, + IMG_Object, + IMG_Bitcode, + IMG_Cubin, + IMG_Fatbinary, + IMG_PTX, + IMG_LAST, +}; + +/// A simple binary serialization of an offloading file. We use this format to +/// embed the offloading image into the host executable so it can be extracted +/// and used by the linker. +/// +/// Many of these could be stored in the same section by the time the linker +/// sees it so we mark this information with a header. The version is used to +/// detect ABI stability and the size is used to find other offloading entries +/// that may exist in the same section. All offsets are given as absolute byte +/// offsets from the beginning of the file. +class OffloadBinary : public Binary { +public: + using string_iterator = StringMap::const_iterator; + using string_iterator_range = iterator_range; + + /// The current version of the binary used for backwards compatibility. + static const uint32_t Version = 1; + + /// The offloading metadata that will be serialized to a memory buffer. + struct OffloadingImage { + ImageKind TheImageKind; + OffloadKind TheOffloadKind; + uint32_t Flags; + StringMap StringData; + std::unique_ptr Image; + }; + + /// Attempt to parse the offloading binary stored in \p Data. + static Expected> create(MemoryBufferRef); + + /// Serialize the contents of \p File to a binary buffer to be read later. + static std::unique_ptr write(const OffloadingImage &); + + static uint64_t getAlignment() { return alignof(Header); } + + ImageKind getImageKind() const { return TheEntry->TheImageKind; } + OffloadKind getOffloadKind() const { return TheEntry->TheOffloadKind; } + uint32_t getVersion() const { return TheHeader->Version; } + uint32_t getFlags() const { return TheEntry->Flags; } + uint64_t getSize() const { return TheHeader->Size; } + + StringRef getTriple() const { return getString("triple"); } + StringRef getArch() const { return getString("arch"); } + StringRef getImage() const { + return StringRef(&Buffer[TheEntry->ImageOffset], TheEntry->ImageSize); + } + + // Iterator over all the key and value pairs in the binary. + string_iterator_range strings() const { + return string_iterator_range(StringData.begin(), StringData.end()); + } + + StringRef getString(StringRef Key) const { return StringData.lookup(Key); } + + static bool classof(const Binary *V) { return V->isOffloadFile(); } + + struct Header { + uint8_t Magic[4] = {0x10, 0xFF, 0x10, 0xAD}; // 0x10FF10AD magic bytes. + uint32_t Version = OffloadBinary::Version; // Version identifier. + uint64_t Size; // Size in bytes of this entire binary. + uint64_t EntryOffset; // Offset of the metadata entry in bytes. + uint64_t EntrySize; // Size of the metadata entry in bytes. + }; + + struct Entry { + ImageKind TheImageKind; // The kind of the image stored. + OffloadKind TheOffloadKind; // The producer of this image. + uint32_t Flags; // Additional flags associated with the image. + uint64_t StringOffset; // Offset in bytes to the string map. + uint64_t NumStrings; // Number of entries in the string map. + uint64_t ImageOffset; // Offset in bytes of the actual binary image. + uint64_t ImageSize; // Size in bytes of the binary image. + }; + + struct StringEntry { + uint64_t KeyOffset; + uint64_t ValueOffset; + }; + +private: + OffloadBinary(MemoryBufferRef Source, const Header *TheHeader, + const Entry *TheEntry) + : Binary(Binary::ID_Offload, Source), Buffer(Source.getBufferStart()), + TheHeader(TheHeader), TheEntry(TheEntry) { + const StringEntry *StringMapBegin = + reinterpret_cast(&Buffer[TheEntry->StringOffset]); + for (uint64_t I = 0, E = TheEntry->NumStrings; I != E; ++I) { + StringRef Key = &Buffer[StringMapBegin[I].KeyOffset]; + StringData[Key] = &Buffer[StringMapBegin[I].ValueOffset]; + } + } + + OffloadBinary(const OffloadBinary &Other) = delete; + + /// Map from keys to offsets in the binary. + StringMap StringData; + /// Raw pointer to the MemoryBufferRef for convenience. + const char *Buffer; + /// Location of the header within the binary. + const Header *TheHeader; + /// Location of the metadata entries within the binary. + const Entry *TheEntry; +}; + +/// Convert a string \p Name to an image kind. +ImageKind getImageKind(StringRef Name); + +/// Convert an image kind to its string representation. +StringRef getImageKindName(ImageKind Name); + +/// Convert a string \p Name to an offload kind. +OffloadKind getOffloadKind(StringRef Name); + +/// Convert an offload kind to its string representation. +StringRef getOffloadKindName(OffloadKind Name); + +} // namespace object + +} // namespace llvm +#endif diff --git a/llvm/include/llvm/Object/RelocationResolver.h b/llvm/include/llvm/Object/RelocationResolver.h index d3b604018e89..2acdf5ed2fe1 100644 --- a/llvm/include/llvm/Object/RelocationResolver.h +++ b/llvm/include/llvm/Object/RelocationResolver.h @@ -15,22 +15,15 @@ #ifndef LLVM_OBJECT_RELOCATIONRESOLVER_H #define LLVM_OBJECT_RELOCATIONRESOLVER_H -#include "llvm/ADT/Triple.h" -#include "llvm/BinaryFormat/ELF.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Object/COFF.h" -#include "llvm/Object/ELFObjectFile.h" -#include "llvm/Object/MachO.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Object/Wasm.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/ErrorHandling.h" #include -#include +#include namespace llvm { namespace object { +class ObjectFile; +class RelocationRef; + using SupportsRelocation = bool (*)(uint64_t); using RelocationResolver = uint64_t (*)(uint64_t Type, uint64_t Offset, uint64_t S, uint64_t LocData, diff --git a/llvm/include/llvm/Object/SymbolicFile.h b/llvm/include/llvm/Object/SymbolicFile.h index 284302c5e042..ea51afce5d2a 100644 --- a/llvm/include/llvm/Object/SymbolicFile.h +++ b/llvm/include/llvm/Object/SymbolicFile.h @@ -13,21 +13,23 @@ #ifndef LLVM_OBJECT_SYMBOLICFILE_H #define LLVM_OBJECT_SYMBOLICFILE_H -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/Binary.h" #include "llvm/Support/Error.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" #include #include #include #include #include -#include namespace llvm { + +class LLVMContext; +class raw_ostream; + namespace object { union DataRefImpl { diff --git a/llvm/include/llvm/Object/TapiFile.h b/llvm/include/llvm/Object/TapiFile.h index ffa27fdf9654..410e58dceaf4 100644 --- a/llvm/include/llvm/Object/TapiFile.h +++ b/llvm/include/llvm/Object/TapiFile.h @@ -14,13 +14,22 @@ #define LLVM_OBJECT_TAPIFILE_H #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/iterator_range.h" +#include "llvm/Object/Binary.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Error.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/TextAPI/InterfaceFile.h" +#include "llvm/Support/MemoryBufferRef.h" +#include "llvm/TextAPI/Architecture.h" namespace llvm { + +class raw_ostream; + +namespace MachO { + +class InterfaceFile; + +} + namespace object { class TapiFile : public SymbolicFile { diff --git a/llvm/include/llvm/Object/TapiUniversal.h b/llvm/include/llvm/Object/TapiUniversal.h index ab548aa5bb2a..fff66c28c1a4 100644 --- a/llvm/include/llvm/Object/TapiUniversal.h +++ b/llvm/include/llvm/Object/TapiUniversal.h @@ -13,16 +13,18 @@ #ifndef LLVM_OBJECT_TAPIUNIVERSAL_H #define LLVM_OBJECT_TAPIUNIVERSAL_H +#include "llvm/ADT/StringRef.h" #include "llvm/Object/Binary.h" -#include "llvm/Object/TapiFile.h" #include "llvm/Support/Error.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" #include "llvm/TextAPI/Architecture.h" #include "llvm/TextAPI/InterfaceFile.h" namespace llvm { namespace object { +class TapiFile; + class TapiUniversal : public Binary { public: class ObjectForArch { diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h index e4802c087b8b..abe0f6f528cc 100644 --- a/llvm/include/llvm/Object/Wasm.h +++ b/llvm/include/llvm/Object/Wasm.h @@ -287,7 +287,6 @@ private: uint32_t StartFunction = -1; bool HasLinkingSection = false; bool HasDylinkSection = false; - bool SeenCodeSection = false; bool HasMemory64 = false; wasm::WasmLinkingData LinkingData; uint32_t NumImportedGlobals = 0; diff --git a/llvm/include/llvm/Object/WindowsResource.h b/llvm/include/llvm/Object/WindowsResource.h index b8fad299c693..acda9e2659b1 100644 --- a/llvm/include/llvm/Object/WindowsResource.h +++ b/llvm/include/llvm/Object/WindowsResource.h @@ -31,7 +31,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/Binary.h" -#include "llvm/Object/COFF.h" #include "llvm/Object/Error.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" @@ -50,6 +49,7 @@ namespace object { class WindowsResource; class ResourceSectionRef; +struct coff_resource_dir_table; const size_t WIN_RES_MAGIC_SIZE = 16; const size_t WIN_RES_NULL_ENTRY_SIZE = 16; diff --git a/llvm/include/llvm/Object/XCOFFObjectFile.h b/llvm/include/llvm/Object/XCOFFObjectFile.h index ac911e534f34..68d9afff887c 100644 --- a/llvm/include/llvm/Object/XCOFFObjectFile.h +++ b/llvm/include/llvm/Object/XCOFFObjectFile.h @@ -60,10 +60,13 @@ public: return static_cast(this)->FlagAndTDataAlignment & AuxiHeaderFlagMask; } + uint8_t getTDataAlignment() const { return static_cast(this)->FlagAndTDataAlignment & AuxiHeaderTDataAlignmentMask; } + + uint16_t getVersion() const { return static_cast(this)->Version; } }; struct XCOFFAuxiliaryHeader32 : XCOFFAuxiliaryHeader { @@ -113,7 +116,7 @@ struct XCOFFAuxiliaryHeader32 : XCOFFAuxiliaryHeader { support::ubig16_t SecNumOfTBSS; }; -struct XCOFFAuxiliaryHeader64 : XCOFFAuxiliaryHeader { +struct XCOFFAuxiliaryHeader64 : XCOFFAuxiliaryHeader { support::ubig16_t AuxMagic; support::ubig16_t Version; support::ubig32_t ReservedForDebugger; @@ -448,9 +451,6 @@ private: const void *SymbolTblPtr = nullptr; XCOFFStringTable StringTable = {0, nullptr}; - const XCOFFFileHeader32 *fileHeader32() const; - const XCOFFFileHeader64 *fileHeader64() const; - const XCOFFSectionHeader32 *sectionHeaderTable32() const; const XCOFFSectionHeader64 *sectionHeaderTable64() const; template const T *sectionHeaderTable() const; @@ -548,6 +548,8 @@ public: // Below here is the non-inherited interface. bool is64Bit() const; + Expected getRawData(const char *Start, uint64_t Size, + StringRef Name) const; const XCOFFAuxiliaryHeader32 *auxiliaryHeader32() const; const XCOFFAuxiliaryHeader64 *auxiliaryHeader64() const; @@ -559,6 +561,8 @@ public: XCOFFSymbolRef toSymbolRef(DataRefImpl Ref) const; // File header related interfaces. + const XCOFFFileHeader32 *fileHeader32() const; + const XCOFFFileHeader64 *fileHeader64() const; uint16_t getMagic() const; uint16_t getNumberOfSections() const; int32_t getTimeStamp() const; @@ -687,6 +691,9 @@ public: Entry32 = reinterpret_cast(SymEntDataRef.p); } + const XCOFFSymbolEntry32 *getSymbol32() { return Entry32; } + const XCOFFSymbolEntry64 *getSymbol64() { return Entry64; } + uint64_t getValue() const { return Entry32 ? getValue32() : getValue64(); } uint32_t getValue32() const { return Entry32->Value; } diff --git a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h new file mode 100644 index 000000000000..d1c0cd912d97 --- /dev/null +++ b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h @@ -0,0 +1,101 @@ +//===- DXContainerYAML.h - DXContainer YAMLIO implementation ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares classes for handling the YAML representation +/// of DXContainer. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECTYAML_DXCONTAINERYAML_H +#define LLVM_OBJECTYAML_DXCONTAINERYAML_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ObjectYAML/YAML.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include +#include + +namespace llvm { +namespace DXContainerYAML { + +struct VersionTuple { + uint16_t Major; + uint16_t Minor; +}; + +// The optional header fields are required in the binary and will be populated +// when reading from binary, but can be omitted in the YAML text because the +// emitter can calculate them. +struct FileHeader { + std::vector Hash; + VersionTuple Version; + Optional FileSize; + uint32_t PartCount; + Optional> PartOffsets; +}; + +struct DXILProgram { + uint8_t MajorVersion; + uint8_t MinorVersion; + uint16_t ShaderKind; + Optional Size; + uint16_t DXILMajorVersion; + uint16_t DXILMinorVersion; + Optional DXILOffset; + Optional DXILSize; + Optional> DXIL; +}; + +struct Part { + std::string Name; + uint32_t Size; + Optional Program; +}; + +struct Object { + FileHeader Header; + std::vector Parts; +}; + +} // namespace DXContainerYAML +} // namespace llvm + +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DXContainerYAML::Part) +namespace llvm { + +class raw_ostream; + +namespace yaml { + +template <> struct MappingTraits { + static void mapping(IO &IO, DXContainerYAML::VersionTuple &Version); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, DXContainerYAML::FileHeader &Header); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, DXContainerYAML::DXILProgram &Program); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, DXContainerYAML::Part &Version); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, DXContainerYAML::Object &Obj); +}; + +} // namespace yaml + +} // namespace llvm + +#endif // LLVM_OBJECTYAML_DXCONTAINERYAML_H diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index 92a9f78ce7bf..ddd5dd9cf3c9 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -161,6 +161,8 @@ struct BBAddrMapEntry { llvm::yaml::Hex64 Size; llvm::yaml::Hex64 Metadata; }; + uint8_t Version; + llvm::yaml::Hex8 Feature; llvm::yaml::Hex64 Address; Optional NumBlocks; Optional> BBEntries; @@ -317,7 +319,7 @@ struct BBAddrMapSection : Section { BBAddrMapSection() : Section(ChunkKind::BBAddrMap) {} std::vector> getEntries() const override { - return {{"Entries", Entries.hasValue()}}; + return {{"Entries", Entries.has_value()}}; }; static bool classof(const Chunk *S) { @@ -331,7 +333,7 @@ struct StackSizesSection : Section { StackSizesSection() : Section(ChunkKind::StackSizes) {} std::vector> getEntries() const override { - return {{"Entries", Entries.hasValue()}}; + return {{"Entries", Entries.has_value()}}; }; static bool classof(const Chunk *S) { @@ -349,7 +351,7 @@ struct DynamicSection : Section { DynamicSection() : Section(ChunkKind::Dynamic) {} std::vector> getEntries() const override { - return {{"Entries", Entries.hasValue()}}; + return {{"Entries", Entries.has_value()}}; }; static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Dynamic; } @@ -380,7 +382,7 @@ struct NoteSection : Section { NoteSection() : Section(ChunkKind::Note) {} std::vector> getEntries() const override { - return {{"Notes", Notes.hasValue()}}; + return {{"Notes", Notes.has_value()}}; }; static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Note; } @@ -391,7 +393,7 @@ struct HashSection : Section { Optional> Chain; std::vector> getEntries() const override { - return {{"Bucket", Bucket.hasValue()}, {"Chain", Chain.hasValue()}}; + return {{"Bucket", Bucket.has_value()}, {"Chain", Chain.has_value()}}; }; // The following members are used to override section fields. @@ -433,10 +435,10 @@ struct GnuHashSection : Section { GnuHashSection() : Section(ChunkKind::GnuHash) {} std::vector> getEntries() const override { - return {{"Header", Header.hasValue()}, - {"BloomFilter", BloomFilter.hasValue()}, - {"HashBuckets", HashBuckets.hasValue()}, - {"HashValues", HashValues.hasValue()}}; + return {{"Header", Header.has_value()}, + {"BloomFilter", BloomFilter.has_value()}, + {"HashBuckets", HashBuckets.has_value()}, + {"HashValues", HashValues.has_value()}}; }; static bool classof(const Chunk *S) { return S->Kind == ChunkKind::GnuHash; } @@ -462,7 +464,7 @@ struct VerneedSection : Section { VerneedSection() : Section(ChunkKind::Verneed) {} std::vector> getEntries() const override { - return {{"Dependencies", VerneedV.hasValue()}}; + return {{"Dependencies", VerneedV.has_value()}}; }; static bool classof(const Chunk *S) { @@ -476,7 +478,7 @@ struct AddrsigSection : Section { AddrsigSection() : Section(ChunkKind::Addrsig) {} std::vector> getEntries() const override { - return {{"Symbols", Symbols.hasValue()}}; + return {{"Symbols", Symbols.has_value()}}; }; static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Addrsig; } @@ -493,7 +495,7 @@ struct LinkerOptionsSection : Section { LinkerOptionsSection() : Section(ChunkKind::LinkerOptions) {} std::vector> getEntries() const override { - return {{"Options", Options.hasValue()}}; + return {{"Options", Options.has_value()}}; }; static bool classof(const Chunk *S) { @@ -507,7 +509,7 @@ struct DependentLibrariesSection : Section { DependentLibrariesSection() : Section(ChunkKind::DependentLibraries) {} std::vector> getEntries() const override { - return {{"Libraries", Libs.hasValue()}}; + return {{"Libraries", Libs.has_value()}}; }; static bool classof(const Chunk *S) { @@ -527,7 +529,7 @@ struct CallGraphProfileSection : Section { CallGraphProfileSection() : Section(ChunkKind::CallGraphProfile) {} std::vector> getEntries() const override { - return {{"Entries", Entries.hasValue()}}; + return {{"Entries", Entries.has_value()}}; }; static bool classof(const Chunk *S) { @@ -541,7 +543,7 @@ struct SymverSection : Section { SymverSection() : Section(ChunkKind::Symver) {} std::vector> getEntries() const override { - return {{"Entries", Entries.hasValue()}}; + return {{"Entries", Entries.has_value()}}; }; static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Symver; } @@ -562,7 +564,7 @@ struct VerdefSection : Section { VerdefSection() : Section(ChunkKind::Verdef) {} std::vector> getEntries() const override { - return {{"Entries", Entries.hasValue()}}; + return {{"Entries", Entries.has_value()}}; }; static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Verdef; } @@ -577,7 +579,7 @@ struct GroupSection : Section { GroupSection() : Section(ChunkKind::Group) {} std::vector> getEntries() const override { - return {{"Members", Members.hasValue()}}; + return {{"Members", Members.has_value()}}; }; static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Group; } @@ -597,7 +599,7 @@ struct RelocationSection : Section { RelocationSection() : Section(ChunkKind::Relocation) {} std::vector> getEntries() const override { - return {{"Relocations", Relocations.hasValue()}}; + return {{"Relocations", Relocations.has_value()}}; }; static bool classof(const Chunk *S) { @@ -611,7 +613,7 @@ struct RelrSection : Section { RelrSection() : Section(ChunkKind::Relr) {} std::vector> getEntries() const override { - return {{"Entries", Entries.hasValue()}}; + return {{"Entries", Entries.has_value()}}; }; static bool classof(const Chunk *S) { @@ -625,7 +627,7 @@ struct SymtabShndxSection : Section { SymtabShndxSection() : Section(ChunkKind::SymtabShndxSection) {} std::vector> getEntries() const override { - return {{"Entries", Entries.hasValue()}}; + return {{"Entries", Entries.has_value()}}; }; static bool classof(const Chunk *S) { @@ -644,7 +646,7 @@ struct ARMIndexTableSection : Section { ARMIndexTableSection() : Section(ChunkKind::ARMIndexTable) {} std::vector> getEntries() const override { - return {{"Entries", Entries.hasValue()}}; + return {{"Entries", Entries.has_value()}}; }; static bool classof(const Chunk *S) { @@ -720,6 +722,7 @@ struct Object { llvm_unreachable("the section header table chunk must always be present"); } + ELF_ELFOSABI getOSAbi() const; unsigned getMachine() const; }; diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h index 38a7de3d6131..095377c1b824 100644 --- a/llvm/include/llvm/ObjectYAML/MachOYAML.h +++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h @@ -122,6 +122,7 @@ struct LinkEditData { std::vector NameList; std::vector StringTable; std::vector IndirectSymbols; + std::vector FunctionStarts; bool isEmpty() const; }; diff --git a/llvm/include/llvm/ObjectYAML/ObjectYAML.h b/llvm/include/llvm/ObjectYAML/ObjectYAML.h index 312777aadd4c..b63607e6796b 100644 --- a/llvm/include/llvm/ObjectYAML/ObjectYAML.h +++ b/llvm/include/llvm/ObjectYAML/ObjectYAML.h @@ -11,9 +11,11 @@ #include "llvm/ObjectYAML/ArchiveYAML.h" #include "llvm/ObjectYAML/COFFYAML.h" +#include "llvm/ObjectYAML/DXContainerYAML.h" #include "llvm/ObjectYAML/ELFYAML.h" #include "llvm/ObjectYAML/MachOYAML.h" #include "llvm/ObjectYAML/MinidumpYAML.h" +#include "llvm/ObjectYAML/OffloadYAML.h" #include "llvm/ObjectYAML/WasmYAML.h" #include "llvm/ObjectYAML/XCOFFYAML.h" #include "llvm/Support/YAMLTraits.h" @@ -31,8 +33,10 @@ struct YamlObjectFile { std::unique_ptr MachO; std::unique_ptr FatMachO; std::unique_ptr Minidump; + std::unique_ptr Offload; std::unique_ptr Wasm; std::unique_ptr Xcoff; + std::unique_ptr DXContainer; }; template <> struct MappingTraits { diff --git a/llvm/include/llvm/ObjectYAML/OffloadYAML.h b/llvm/include/llvm/ObjectYAML/OffloadYAML.h new file mode 100644 index 000000000000..a4fdbce0b320 --- /dev/null +++ b/llvm/include/llvm/ObjectYAML/OffloadYAML.h @@ -0,0 +1,79 @@ +//===- OffloadYAML.h - Offload Binary YAMLIO implementation -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares classes for handling the YAML representation of +/// offloading binaries. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECTYAML_OFFLOADYAML_H +#define LLVM_OBJECTYAML_OFFLOADYAML_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/Object/OffloadBinary.h" +#include "llvm/ObjectYAML/YAML.h" +#include "llvm/Support/YAMLTraits.h" + +namespace llvm { +namespace OffloadYAML { + +struct Binary { + struct StringEntry { + StringRef Key; + StringRef Value; + }; + + struct Member { + Optional ImageKind; + Optional OffloadKind; + Optional Flags; + Optional> StringEntries; + Optional Content; + }; + + Optional Version; + Optional Size; + Optional EntryOffset; + Optional EntrySize; + std::vector Members; +}; + +} // end namespace OffloadYAML +} // end namespace llvm + +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::OffloadYAML::Binary::Member) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::OffloadYAML::Binary::StringEntry) + +namespace llvm { +namespace yaml { + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &IO, object::ImageKind &Value); +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &IO, object::OffloadKind &Value); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, OffloadYAML::Binary &O); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, OffloadYAML::Binary::StringEntry &M); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, OffloadYAML::Binary::Member &M); +}; + +} // end namespace yaml +} // end namespace llvm + +#endif // LLVM_OBJECTYAML_ARCHIVEYAML_H diff --git a/llvm/include/llvm/ObjectYAML/WasmYAML.h b/llvm/include/llvm/ObjectYAML/WasmYAML.h index e3a1ba0d58a6..0f6c4f06665f 100644 --- a/llvm/include/llvm/ObjectYAML/WasmYAML.h +++ b/llvm/include/llvm/ObjectYAML/WasmYAML.h @@ -62,11 +62,20 @@ struct Export { uint32_t Index; }; +struct InitExpr { + InitExpr() {} + bool Extended; + union { + wasm::WasmInitExprMVP Inst; + yaml::BinaryRef Body; + }; +}; + struct ElemSegment { uint32_t Flags; uint32_t TableNumber; ValueType ElemKind; - wasm::WasmInitExpr Offset; + InitExpr Offset; std::vector Functions; }; @@ -74,19 +83,20 @@ struct Global { uint32_t Index; ValueType Type; bool Mutable; - wasm::WasmInitExpr InitExpr; + InitExpr Init; }; struct Import { + Import() {} StringRef Module; StringRef Field; ExportKind Kind; union { uint32_t SigIndex; - Global GlobalImport; Table TableImport; Limits Memory; uint32_t TagIndex; + Global GlobalImport; }; }; @@ -114,7 +124,7 @@ struct DataSegment { uint32_t SectionOffset; uint32_t InitFlags; uint32_t MemoryIndex; - wasm::WasmInitExpr Offset; + InitExpr Offset; yaml::BinaryRef Content; }; @@ -526,8 +536,8 @@ template <> struct MappingTraits { static void mapping(IO &IO, WasmYAML::LocalDecl &LocalDecl); }; -template <> struct MappingTraits { - static void mapping(IO &IO, wasm::WasmInitExpr &Expr); +template <> struct MappingTraits { + static void mapping(IO &IO, WasmYAML::InitExpr &Expr); }; template <> struct MappingTraits { diff --git a/llvm/include/llvm/ObjectYAML/yaml2obj.h b/llvm/include/llvm/ObjectYAML/yaml2obj.h index 468f673fd451..000da077bb18 100644 --- a/llvm/include/llvm/ObjectYAML/yaml2obj.h +++ b/llvm/include/llvm/ObjectYAML/yaml2obj.h @@ -36,6 +36,10 @@ namespace MinidumpYAML { struct Object; } +namespace OffloadYAML { +struct Binary; +} + namespace WasmYAML { struct Object; } @@ -48,6 +52,10 @@ namespace ArchYAML { struct Archive; } +namespace DXContainerYAML { +struct Object; +} // namespace DXContainerYAML + namespace yaml { class Input; struct YamlObjectFile; @@ -61,8 +69,11 @@ bool yaml2elf(ELFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH, bool yaml2macho(YamlObjectFile &Doc, raw_ostream &Out, ErrorHandler EH); bool yaml2minidump(MinidumpYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH); +bool yaml2offload(OffloadYAML::Binary &Doc, raw_ostream &Out, ErrorHandler EH); bool yaml2wasm(WasmYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH); bool yaml2xcoff(XCOFFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH); +bool yaml2dxcontainer(DXContainerYAML::Object &Doc, raw_ostream &Out, + ErrorHandler EH); bool convertYAML(Input &YIn, raw_ostream &Out, ErrorHandler ErrHandler, unsigned DocNum = 1, uint64_t MaxSize = UINT64_MAX); diff --git a/llvm/include/llvm/Option/ArgList.h b/llvm/include/llvm/Option/ArgList.h index 74897de52a93..6a07e1c657dc 100644 --- a/llvm/include/llvm/Option/ArgList.h +++ b/llvm/include/llvm/Option/ArgList.h @@ -298,14 +298,24 @@ public: /// true if the option is present, false if the negation is present, and /// \p Default if neither option is given. If both the option and its /// negation are present, the last one wins. - bool hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default=true) const; + bool hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const; /// hasFlag - Given an option \p Pos, an alias \p PosAlias and its negative /// form \p Neg, return true if the option or its alias is present, false if /// the negation is present, and \p Default if none of the options are /// given. If multiple options are present, the last one wins. bool hasFlag(OptSpecifier Pos, OptSpecifier PosAlias, OptSpecifier Neg, - bool Default = true) const; + bool Default) const; + + /// Given an option Pos and its negative form Neg, render the option if Pos is + /// present. + void addOptInFlag(ArgStringList &Output, OptSpecifier Pos, + OptSpecifier Neg) const; + /// Render the option if Neg is present. + void addOptOutFlag(ArgStringList &Output, OptSpecifier Pos, + OptSpecifier Neg) const { + addOptInFlag(Output, Neg, Pos); + } /// Render only the last argument match \p Id0, if present. template diff --git a/llvm/include/llvm/Pass.h b/llvm/include/llvm/Pass.h index 8aa9ba90a9ca..6445e16ab68f 100644 --- a/llvm/include/llvm/Pass.h +++ b/llvm/include/llvm/Pass.h @@ -228,6 +228,16 @@ public: template AnalysisType &getAnalysisID(AnalysisID PI, Function &F, bool *Changed = nullptr); + +#ifdef EXPENSIVE_CHECKS + /// Hash a module in order to detect when a module (or more specific) pass has + /// modified it. + uint64_t structuralHash(Module &M) const; + + /// Hash a function in order to detect when a function (or more specific) pass + /// has modified it. + virtual uint64_t structuralHash(Function &F) const; +#endif }; //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 66b0b149fa25..0cbbdf7f3ce8 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -215,8 +215,9 @@ public: /// only intended for use when attempting to optimize code. If frontends /// require some transformations for semantic reasons, they should explicitly /// build them. - ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, - bool LTOPreLink = false); + ModulePassManager + buildModuleOptimizationPipeline(OptimizationLevel Level, + ThinOrFullLTOPhase LTOPhase); /// Build a per-module default optimization pipeline. /// @@ -468,6 +469,15 @@ public: PipelineEarlySimplificationEPCallbacks.push_back(C); } + /// Register a callback for a default optimizer pipeline extension point + /// + /// This extension point allows adding optimizations before the function + /// optimization pipeline. + void registerOptimizerEarlyEPCallback( + const std::function &C) { + OptimizerEarlyEPCallbacks.push_back(C); + } + /// Register a callback for a default optimizer pipeline extension point /// /// This extension point allows adding optimizations at the very end of the @@ -477,6 +487,24 @@ public: OptimizerLastEPCallbacks.push_back(C); } + /// Register a callback for a default optimizer pipeline extension point + /// + /// This extension point allows adding optimizations at the start of the full + /// LTO pipeline. + void registerFullLinkTimeOptimizationEarlyEPCallback( + const std::function &C) { + FullLinkTimeOptimizationEarlyEPCallbacks.push_back(C); + } + + /// Register a callback for a default optimizer pipeline extension point + /// + /// This extension point allows adding optimizations at the end of the full + /// LTO pipeline. + void registerFullLinkTimeOptimizationLastEPCallback( + const std::function &C) { + FullLinkTimeOptimizationLastEPCallbacks.push_back(C); + } + /// Register a callback for parsing an AliasAnalysis Name to populate /// the given AAManager \p AA void registerParseAACallback( @@ -582,7 +610,8 @@ private: void addPGOInstrPasses(ModulePassManager &MPM, OptimizationLevel Level, bool RunProfileGen, bool IsCS, std::string ProfileFile, - std::string ProfileRemappingFile); + std::string ProfileRemappingFile, + ThinOrFullLTOPhase LTOPhase); void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel); // Extension Point callbacks @@ -598,9 +627,15 @@ private: CGSCCOptimizerLateEPCallbacks; SmallVector, 2> VectorizerStartEPCallbacks; + // Module callbacks + SmallVector, 2> + OptimizerEarlyEPCallbacks; SmallVector, 2> OptimizerLastEPCallbacks; - // Module callbacks + SmallVector, 2> + FullLinkTimeOptimizationEarlyEPCallbacks; + SmallVector, 2> + FullLinkTimeOptimizationLastEPCallbacks; SmallVector, 2> PipelineStartEPCallbacks; SmallVector, 2> diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 561cd54fa998..32ecc9ec5fb0 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -187,17 +187,6 @@ protected: // Register required callbacks. void registerRequiredCallbacks(PassInstrumentationCallbacks &PIC); - // Return true when this is a defined function for which printing - // of changes is desired. - bool isInterestingFunction(const Function &F); - - // Return true when this is a pass for which printing of changes is desired. - bool isInterestingPass(StringRef PassID); - - // Return true when this is a pass on IR for which printing - // of changes is desired. - bool isInteresting(Any IR, StringRef PassID); - // Called on the first IR processed. virtual void handleInitialIR(Any IR) = 0; // Called before and after a pass to get the representation of the IR. @@ -491,6 +480,25 @@ protected: std::unique_ptr HTML; }; +// Print IR on crash. +class PrintCrashIRInstrumentation { +public: + PrintCrashIRInstrumentation() + : SavedIR("*** Dump of IR Before Last Pass Unknown ***") {} + ~PrintCrashIRInstrumentation(); + void registerCallbacks(PassInstrumentationCallbacks &PIC); + void reportCrashIR(); + +protected: + std::string SavedIR; + +private: + // The crash reporter that will report on a crash. + static PrintCrashIRInstrumentation *CrashReporter; + // Crash handler registered when print-on-crash is specified. + static void SignalHandler(void *); +}; + /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -504,6 +512,7 @@ class StandardInstrumentations { PseudoProbeVerifier PseudoProbeVerification; InLineChangePrinter PrintChangedDiff; DotCfgChangeReporter WebsiteChangeReporter; + PrintCrashIRInstrumentation PrintCrashIR; VerifyInstrumentation Verify; bool VerifyEach; diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index e1f45019b1a9..e35751512245 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -195,11 +195,11 @@ public: ArrayRef getExpressions() const { return Expressions; } /// Return a counter that represents the expression that adds LHS and RHS. - Counter add(Counter LHS, Counter RHS); + Counter add(Counter LHS, Counter RHS, bool Simplify = true); /// Return a counter that represents the expression that subtracts RHS from /// LHS. - Counter subtract(Counter LHS, Counter RHS); + Counter subtract(Counter LHS, Counter RHS, bool Simplify = true); }; using LineColPair = std::pair; diff --git a/llvm/include/llvm/ProfileData/GCOV.h b/llvm/include/llvm/ProfileData/GCOV.h index ef6515d39144..fe56f84f28b6 100644 --- a/llvm/include/llvm/ProfileData/GCOV.h +++ b/llvm/include/llvm/ProfileData/GCOV.h @@ -14,9 +14,7 @@ #ifndef LLVM_PROFILEDATA_GCOV_H #define LLVM_PROFILEDATA_GCOV_H -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -26,10 +24,8 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include -#include #include #include -#include #include #include #include diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index a416eb28906e..401d278cbd06 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -281,13 +281,21 @@ bool needsComdatForCounter(const Function &F, const Module &M); /// An enum describing the attributes of an instrumented profile. enum class InstrProfKind { Unknown = 0x0, - FE = 0x1, // A frontend clang profile, incompatible with other attrs. - IR = 0x2, // An IR-level profile (default when -fprofile-generate is used). - BB = 0x4, // A profile with entry basic block instrumentation. - CS = 0x8, // A context sensitive IR-level profile. - SingleByteCoverage = 0x10, // Use single byte probes for coverage. - FunctionEntryOnly = 0x20, // Only instrument the function entry basic block. - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionEntryOnly) + // A frontend clang profile, incompatible with other attrs. + FrontendInstrumentation = 0x1, + // An IR-level profile (default when -fprofile-generate is used). + IRInstrumentation = 0x2, + // A profile with entry basic block instrumentation. + FunctionEntryInstrumentation = 0x4, + // A context sensitive IR-level profile. + ContextSensitive = 0x8, + // Use single byte probes for coverage. + SingleByteCoverage = 0x10, + // Only instrument the function entry basic block. + FunctionEntryOnly = 0x20, + // A memory profile collected using -fprofile=memory. + MemProf = 0x40, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/MemProf) }; const std::error_category &instrprof_category(); @@ -1011,7 +1019,9 @@ enum ProfVersion { Version6 = 6, // An additional counter is added around logical operators. Version7 = 7, - // The current version is 7. + // An additional (optional) memory profile type is added. + Version8 = 8, + // The current version is 8. CurrentVersion = INSTR_PROF_INDEX_VERSION }; const uint64_t Version = ProfVersion::CurrentVersion; @@ -1028,6 +1038,21 @@ struct Header { uint64_t Unused; // Becomes unused since version 4 uint64_t HashType; uint64_t HashOffset; + uint64_t MemProfOffset; + // New fields should only be added at the end to ensure that the size + // computation is correct. The methods below need to be updated to ensure that + // the new field is read correctly. + + // Reads a header struct from the buffer. + static Expected
readFromBuffer(const unsigned char *Buffer); + + // Returns the size of the header in bytes for all valid fields based on the + // version. I.e a older version header will return a smaller size. + size_t size() const; + + // Returns the format version in little endian. The header retains the version + // in native endian of the compiler runtime. + uint64_t formatVersion() const; }; // Profile summary data recorded in the profile data file in indexed diff --git a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h index 3d0076fd9035..79995c813266 100644 --- a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h +++ b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h @@ -13,16 +13,17 @@ #define LLVM_PROFILEDATA_INSTRPROFCORRELATOR_H #include "llvm/ADT/DenseSet.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/Object/Binary.h" -#include "llvm/Object/ObjectFile.h" #include "llvm/ProfileData/InstrProf.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" #include namespace llvm { +class DWARFContext; +class DWARFDie; +namespace object { +class ObjectFile; +} /// InstrProfCorrelator - A base class used to create raw instrumentation data /// to their functions. diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index 62054a6a3df5..282620d8b5dc 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -650,7 +650,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, /* Raw profile format version (start from 1). */ #define INSTR_PROF_RAW_VERSION 8 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 7 +#define INSTR_PROF_INDEX_VERSION 8 /* Coverage mapping format version (start from 0). */ #define INSTR_PROF_COVMAP_VERSION 5 @@ -662,6 +662,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, * The 59th bit indicates whether to use debug info to correlate profiles. * The 60th bit indicates single byte coverage instrumentation. * The 61st bit indicates function entry instrumentation only. + * The 62nd bit indicates whether memory profile information is present. */ #define VARIANT_MASKS_ALL 0xff00000000000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) @@ -671,6 +672,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59) #define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60) #define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61) +#define VARIANT_MASK_MEMPROF (0x1ULL << 62) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime #define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index e9dd19a69792..3a25de05bbf1 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -19,6 +19,7 @@ #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfCorrelator.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/LineIterator.h" @@ -39,25 +40,36 @@ namespace llvm { class InstrProfReader; /// A file format agnostic iterator over profiling data. +template class InstrProfIterator { public: using iterator_category = std::input_iterator_tag; - using value_type = NamedInstrProfRecord; + using value_type = record_type; using difference_type = std::ptrdiff_t; using pointer = value_type *; using reference = value_type &; private: - InstrProfReader *Reader = nullptr; + reader_type *Reader = nullptr; value_type Record; - void Increment(); + void increment() { + if (Error E = Reader->readNextRecord(Record)) { + // Handle errors in the reader. + InstrProfError::take(std::move(E)); + *this = InstrProfIterator(); + } + } public: InstrProfIterator() = default; - InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); } + InstrProfIterator(reader_type *Reader) : Reader(Reader) { increment(); } - InstrProfIterator &operator++() { Increment(); return *this; } + InstrProfIterator &operator++() { + increment(); + return *this; + } bool operator==(const InstrProfIterator &RHS) const { return Reader == RHS.Reader; } @@ -88,8 +100,8 @@ public: virtual Error printBinaryIds(raw_ostream &OS) { return success(); }; /// Iterator over profile data. - InstrProfIterator begin() { return InstrProfIterator(this); } - InstrProfIterator end() { return InstrProfIterator(); } + InstrProfIterator<> begin() { return InstrProfIterator<>(this); } + InstrProfIterator<> end() { return InstrProfIterator<>(); } virtual bool isIRLevelProfile() const = 0; @@ -201,15 +213,16 @@ public: static bool hasFormat(const MemoryBuffer &Buffer); bool isIRLevelProfile() const override { - return static_cast(ProfileKind & InstrProfKind::IR); + return static_cast(ProfileKind & InstrProfKind::IRInstrumentation); } bool hasCSIRLevelProfile() const override { - return static_cast(ProfileKind & InstrProfKind::CS); + return static_cast(ProfileKind & InstrProfKind::ContextSensitive); } bool instrEntryBBEnabled() const override { - return static_cast(ProfileKind & InstrProfKind::BB); + return static_cast(ProfileKind & + InstrProfKind::FunctionEntryInstrumentation); } bool hasSingleByteCoverage() const override { @@ -460,6 +473,11 @@ struct InstrProfReaderIndexBase { using OnDiskHashTableImplV3 = OnDiskIterableChainedHashTable; +using MemProfRecordHashTable = + OnDiskIterableChainedHashTable; +using MemProfFrameHashTable = + OnDiskIterableChainedHashTable; + template class InstrProfReaderItaniumRemapper; @@ -545,6 +563,13 @@ private: std::unique_ptr Summary; /// Context sensitive profile summary data. std::unique_ptr CS_Summary; + /// MemProf profile schema (if available). + memprof::MemProfSchema Schema; + /// MemProf record profile data on-disk indexed via llvm::md5(FunctionName). + std::unique_ptr MemProfRecordTable; + /// MemProf frame profile data on-disk indexed via frame id. + std::unique_ptr MemProfFrameTable; + // Index to the current record in the record array. unsigned RecordIndex; @@ -598,6 +623,10 @@ public: Expected getInstrProfRecord(StringRef FuncName, uint64_t FuncHash); + /// Return the memprof record for the function identified by + /// llvm::md5(Name). + Expected getMemProfRecord(uint64_t FuncNameHash); + /// Fill Counts with the profile data for the given function name. Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, std::vector &Counts); diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index af1e46cf4fc2..29e07961a2f4 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -15,11 +15,13 @@ #define LLVM_PROFILEDATA_INSTRPROFWRITER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" -#include "llvm/Support/MemoryBuffer.h" #include #include @@ -28,6 +30,7 @@ namespace llvm { /// Writer for instrumentation based profile data. class InstrProfRecordWriterTrait; class ProfOStream; +class MemoryBuffer; class raw_fd_ostream; class InstrProfWriter { @@ -37,6 +40,16 @@ public: private: bool Sparse; StringMap FunctionData; + + // A map to hold memprof data per function. The lower 64 bits obtained from + // the md5 hash of the function name is used to index into the map. + llvm::MapVector + MemProfRecordData; + // A map to hold frame id to frame mappings. The mappings are used to + // convert IndexedMemProfRecord to MemProfRecords with frame information + // inline. + llvm::MapVector MemProfFrameData; + // An enum describing the attributes of the profile. InstrProfKind ProfileKind = InstrProfKind::Unknown; // Use raw pointer here for the incomplete type object. @@ -57,6 +70,15 @@ public: addRecord(std::move(I), 1, Warn); } + /// Add a memprof record for a function identified by its \p Id. + void addMemProfRecord(const GlobalValue::GUID Id, + const memprof::IndexedMemProfRecord &Record); + + /// Add a memprof frame identified by the hash of the contents of the frame in + /// \p FrameId. + bool addMemProfFrame(const memprof::FrameId, const memprof::Frame &F, + function_ref Warn); + /// Merge existing function counts from the given writer. void mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref Warn); @@ -97,11 +119,13 @@ public: // Check if the profiles are in-compatible. Clang frontend profiles can't be // merged with other profile types. - if (static_cast((ProfileKind & InstrProfKind::FE) ^ - (Other & InstrProfKind::FE))) { + if (static_cast( + (ProfileKind & InstrProfKind::FrontendInstrumentation) ^ + (Other & InstrProfKind::FrontendInstrumentation))) { return make_error(instrprof_error::unsupported_version); } - if (testIncompatible(InstrProfKind::FunctionEntryOnly, InstrProfKind::BB)) { + if (testIncompatible(InstrProfKind::FunctionEntryOnly, + InstrProfKind::FunctionEntryInstrumentation)) { return make_error( instrprof_error::unsupported_version, "cannot merge FunctionEntryOnly profiles and BB profiles together"); @@ -112,6 +136,8 @@ public: return Error::success(); } + InstrProfKind getProfileKind() const { return ProfileKind; } + // Internal interface for testing purpose only. void setValueProfDataEndianness(support::endianness Endianness); void setOutputSparse(bool Sparse); diff --git a/llvm/include/llvm/ProfileData/MIBEntryDef.inc b/llvm/include/llvm/ProfileData/MIBEntryDef.inc new file mode 100644 index 000000000000..f5c6f0e4924b --- /dev/null +++ b/llvm/include/llvm/ProfileData/MIBEntryDef.inc @@ -0,0 +1,47 @@ +/*===-- MemEntryDef.inc - MemProf profiling runtime macros -*- C++ -*-======== *\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ +/* + * This file defines the macros for memprof profiling data structures. + * Eg. usage to define the memprof meminfoblock struct: + * + * struct MemInfoBlock { + * #define MIBEntryDef(NameTag, Name, Type) Type Name; + * #include MIBEntryDef.inc + * #undef MIBEntryDef + * }; + * + * This file has two identical copies. The primary copy lives in LLVM and + * the other one sits in compiler-rt/include/profile directory. To make changes + * in this file, first modify the primary copy and copy it over to compiler-rt. + * Testing of any change in this file can start only after the two copies are + * synced up. + * +\*===----------------------------------------------------------------------===*/ +#ifndef MIBEntryDef +#define MIBEntryDef(NameTag, Name, Type) +#endif + +MIBEntryDef(AllocCount = 1, AllocCount, uint32_t) +MIBEntryDef(TotalAccessCount = 2, TotalAccessCount, uint64_t) +MIBEntryDef(MinAccessCount = 3, MinAccessCount, uint64_t) +MIBEntryDef(MaxAccessCount = 4, MaxAccessCount, uint64_t) +MIBEntryDef(TotalSize = 5, TotalSize, uint64_t) +MIBEntryDef(MinSize = 6, MinSize, uint32_t) +MIBEntryDef(MaxSize = 7, MaxSize, uint32_t) +MIBEntryDef(AllocTimestamp = 8, AllocTimestamp, uint32_t) +MIBEntryDef(DeallocTimestamp = 9, DeallocTimestamp, uint32_t) +MIBEntryDef(TotalLifetime = 10, TotalLifetime, uint64_t) +MIBEntryDef(MinLifetime = 11, MinLifetime, uint32_t) +MIBEntryDef(MaxLifetime = 12, MaxLifetime, uint32_t) +MIBEntryDef(AllocCpuId = 13, AllocCpuId, uint32_t) +MIBEntryDef(DeallocCpuId = 14, DeallocCpuId, uint32_t) +MIBEntryDef(NumMigratedCpu = 15, NumMigratedCpu, uint32_t) +MIBEntryDef(NumLifetimeOverlaps = 16, NumLifetimeOverlaps, uint32_t) +MIBEntryDef(NumSameAllocCpu = 17, NumSameAllocCpu, uint32_t) +MIBEntryDef(NumSameDeallocCpu = 18, NumSameDeallocCpu, uint32_t) +MIBEntryDef(DataTypeId = 19, DataTypeId, uint64_t) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h new file mode 100644 index 000000000000..bcee3b25bf87 --- /dev/null +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -0,0 +1,613 @@ +#ifndef LLVM_PROFILEDATA_MEMPROF_H_ +#define LLVM_PROFILEDATA_MEMPROF_H_ + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/ProfileData/MemProfData.inc" +#include "llvm/Support/Endian.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/raw_ostream.h" + +#include + +namespace llvm { +namespace memprof { + +enum class Meta : uint64_t { + Start = 0, +#define MIBEntryDef(NameTag, Name, Type) NameTag, +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + Size +}; + +using MemProfSchema = llvm::SmallVector(Meta::Size)>; + +// Holds the actual MemInfoBlock data with all fields. Contents may be read or +// written partially by providing an appropriate schema to the serialize and +// deserialize methods. +struct PortableMemInfoBlock { + PortableMemInfoBlock() = default; + explicit PortableMemInfoBlock(const MemInfoBlock &Block) { +#define MIBEntryDef(NameTag, Name, Type) Name = Block.Name; +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + } + + PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr) { + deserialize(Schema, Ptr); + } + + // Read the contents of \p Ptr based on the \p Schema to populate the + // MemInfoBlock member. + void deserialize(const MemProfSchema &Schema, const unsigned char *Ptr) { + using namespace support; + + for (const Meta Id : Schema) { + switch (Id) { +#define MIBEntryDef(NameTag, Name, Type) \ + case Meta::Name: { \ + Name = endian::readNext(Ptr); \ + } break; +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + default: + llvm_unreachable("Unknown meta type id, is the profile collected from " + "a newer version of the runtime?"); + } + } + } + + // Write the contents of the MemInfoBlock based on the \p Schema provided to + // the raw_ostream \p OS. + void serialize(const MemProfSchema &Schema, raw_ostream &OS) const { + using namespace support; + + endian::Writer LE(OS, little); + for (const Meta Id : Schema) { + switch (Id) { +#define MIBEntryDef(NameTag, Name, Type) \ + case Meta::Name: { \ + LE.write(Name); \ + } break; +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + default: + llvm_unreachable("Unknown meta type id, invalid input?"); + } + } + } + + // Print out the contents of the MemInfoBlock in YAML format. + void printYAML(raw_ostream &OS) const { + OS << " MemInfoBlock:\n"; +#define MIBEntryDef(NameTag, Name, Type) \ + OS << " " << #Name << ": " << Name << "\n"; +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + } + + // Define getters for each type which can be called by analyses. +#define MIBEntryDef(NameTag, Name, Type) \ + Type get##Name() const { return Name; } +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + + void clear() { *this = PortableMemInfoBlock(); } + + // Returns the full schema currently in use. + static MemProfSchema getSchema() { + MemProfSchema List; +#define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name); +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + return List; + } + + bool operator==(const PortableMemInfoBlock &Other) const { +#define MIBEntryDef(NameTag, Name, Type) \ + if (Other.get##Name() != get##Name()) \ + return false; +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + return true; + } + + bool operator!=(const PortableMemInfoBlock &Other) const { + return !operator==(Other); + } + + static constexpr size_t serializedSize() { + size_t Result = 0; +#define MIBEntryDef(NameTag, Name, Type) Result += sizeof(Type); +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + return Result; + } + +private: +#define MIBEntryDef(NameTag, Name, Type) Type Name = Type(); +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef +}; + +// A type representing the id generated by hashing the contents of the Frame. +using FrameId = uint64_t; +// Describes a call frame for a dynamic allocation context. The contents of +// the frame are populated by symbolizing the stack depot call frame from the +// compiler runtime. +struct Frame { + // A uuid (uint64_t) identifying the function. It is obtained by + // llvm::md5(FunctionName) which returns the lower 64 bits. + GlobalValue::GUID Function; + // The symbol name for the function. Only populated in the Frame by the reader + // if requested during initialization. This field should not be serialized. + llvm::Optional SymbolName; + // The source line offset of the call from the beginning of parent function. + uint32_t LineOffset; + // The source column number of the call to help distinguish multiple calls + // on the same line. + uint32_t Column; + // Whether the current frame is inlined. + bool IsInlineFrame; + + Frame(const Frame &Other) { + Function = Other.Function; + SymbolName = Other.SymbolName; + LineOffset = Other.LineOffset; + Column = Other.Column; + IsInlineFrame = Other.IsInlineFrame; + } + + Frame(uint64_t Hash, uint32_t Off, uint32_t Col, bool Inline) + : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {} + + bool operator==(const Frame &Other) const { + // Ignore the SymbolName field to avoid a string compare. Comparing the + // function hash serves the same purpose. + return Other.Function == Function && Other.LineOffset == LineOffset && + Other.Column == Column && Other.IsInlineFrame == IsInlineFrame; + } + + Frame &operator=(const Frame &Other) { + Function = Other.Function; + SymbolName = Other.SymbolName; + LineOffset = Other.LineOffset; + Column = Other.Column; + IsInlineFrame = Other.IsInlineFrame; + return *this; + } + + bool operator!=(const Frame &Other) const { return !operator==(Other); } + + // Write the contents of the frame to the ostream \p OS. + void serialize(raw_ostream &OS) const { + using namespace support; + + endian::Writer LE(OS, little); + + // If the type of the GlobalValue::GUID changes, then we need to update + // the reader and the writer. + static_assert(std::is_same::value, + "Expect GUID to be uint64_t."); + LE.write(Function); + + LE.write(LineOffset); + LE.write(Column); + LE.write(IsInlineFrame); + } + + // Read a frame from char data which has been serialized as little endian. + static Frame deserialize(const unsigned char *Ptr) { + using namespace support; + + const uint64_t F = endian::readNext(Ptr); + const uint32_t L = endian::readNext(Ptr); + const uint32_t C = endian::readNext(Ptr); + const bool I = endian::readNext(Ptr); + return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C, + /*IsInlineFrame=*/I); + } + + // Returns the size of the frame information. + static constexpr size_t serializedSize() { + return sizeof(Frame::Function) + sizeof(Frame::LineOffset) + + sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame); + } + + // Print the frame information in YAML format. + void printYAML(raw_ostream &OS) const { + OS << " -\n" + << " Function: " << Function << "\n" + << " SymbolName: " << SymbolName.value_or("") << "\n" + << " LineOffset: " << LineOffset << "\n" + << " Column: " << Column << "\n" + << " Inline: " << IsInlineFrame << "\n"; + } + + // Return a hash value based on the contents of the frame. Here we don't use + // hashing from llvm ADT since we are going to persist the hash id, the hash + // combine algorithm in ADT uses a new randomized seed each time. + inline FrameId hash() const { + auto HashCombine = [](auto Value, size_t Seed) { + std::hash Hasher; + // The constant used below is the 64 bit representation of the fractional + // part of the golden ratio. Used here for the randomness in their bit + // pattern. + return Hasher(Value) + 0x9e3779b97f4a7c15 + (Seed << 6) + (Seed >> 2); + }; + + size_t Result = 0; + Result ^= HashCombine(Function, Result); + Result ^= HashCombine(LineOffset, Result); + Result ^= HashCombine(Column, Result); + Result ^= HashCombine(IsInlineFrame, Result); + return static_cast(Result); + } +}; + +// Holds allocation information in a space efficient format where frames are +// represented using unique identifiers. +struct IndexedAllocationInfo { + // The dynamic calling context for the allocation in bottom-up (leaf-to-root) + // order. Frame contents are stored out-of-line. + llvm::SmallVector CallStack; + // The statistics obtained from the runtime for the allocation. + PortableMemInfoBlock Info; + + IndexedAllocationInfo() = default; + IndexedAllocationInfo(ArrayRef CS, const MemInfoBlock &MB) + : CallStack(CS.begin(), CS.end()), Info(MB) {} + + // Returns the size in bytes when this allocation info struct is serialized. + size_t serializedSize() const { + return sizeof(uint64_t) + // The number of frames to serialize. + sizeof(FrameId) * CallStack.size() + // The callstack frame ids. + PortableMemInfoBlock::serializedSize(); // The size of the payload. + } + + bool operator==(const IndexedAllocationInfo &Other) const { + if (Other.Info != Info) + return false; + + if (Other.CallStack.size() != CallStack.size()) + return false; + + for (size_t J = 0; J < Other.CallStack.size(); J++) { + if (Other.CallStack[J] != CallStack[J]) + return false; + } + return true; + } + + bool operator!=(const IndexedAllocationInfo &Other) const { + return !operator==(Other); + } +}; + +// Holds allocation information with frame contents inline. The type should +// be used for temporary in-memory instances. +struct AllocationInfo { + // Same as IndexedAllocationInfo::CallStack with the frame contents inline. + llvm::SmallVector CallStack; + // Same as IndexedAllocationInfo::Info; + PortableMemInfoBlock Info; + + AllocationInfo() = default; + AllocationInfo( + const IndexedAllocationInfo &IndexedAI, + llvm::function_ref IdToFrameCallback) { + for (const FrameId &Id : IndexedAI.CallStack) { + CallStack.push_back(IdToFrameCallback(Id)); + } + Info = IndexedAI.Info; + } + + void printYAML(raw_ostream &OS) const { + OS << " -\n"; + OS << " Callstack:\n"; + // TODO: Print out the frame on one line with to make it easier for deep + // callstacks once we have a test to check valid YAML is generated. + for (const Frame &F : CallStack) { + F.printYAML(OS); + } + Info.printYAML(OS); + } +}; + +// Holds the memprof profile information for a function. The internal +// representation stores frame ids for efficiency. This representation should +// be used in the profile conversion and manipulation tools. +struct IndexedMemProfRecord { + // Memory allocation sites in this function for which we have memory + // profiling data. + llvm::SmallVector AllocSites; + // Holds call sites in this function which are part of some memory + // allocation context. We store this as a list of locations, each with its + // list of inline locations in bottom-up order i.e. from leaf to root. The + // inline location list may include additional entries, users should pick + // the last entry in the list with the same function GUID. + llvm::SmallVector> CallSites; + + void clear() { + AllocSites.clear(); + CallSites.clear(); + } + + void merge(const IndexedMemProfRecord &Other) { + // TODO: Filter out duplicates which may occur if multiple memprof + // profiles are merged together using llvm-profdata. + AllocSites.append(Other.AllocSites); + CallSites.append(Other.CallSites); + } + + size_t serializedSize() const { + size_t Result = sizeof(GlobalValue::GUID); + for (const IndexedAllocationInfo &N : AllocSites) + Result += N.serializedSize(); + + // The number of callsites we have information for. + Result += sizeof(uint64_t); + for (const auto &Frames : CallSites) { + // The number of frame ids to serialize. + Result += sizeof(uint64_t); + Result += Frames.size() * sizeof(FrameId); + } + return Result; + } + + bool operator==(const IndexedMemProfRecord &Other) const { + if (Other.AllocSites.size() != AllocSites.size()) + return false; + + if (Other.CallSites.size() != CallSites.size()) + return false; + + for (size_t I = 0; I < AllocSites.size(); I++) { + if (AllocSites[I] != Other.AllocSites[I]) + return false; + } + + for (size_t I = 0; I < CallSites.size(); I++) { + if (CallSites[I] != Other.CallSites[I]) + return false; + } + return true; + } + + // Serializes the memprof records in \p Records to the ostream \p OS based + // on the schema provided in \p Schema. + void serialize(const MemProfSchema &Schema, raw_ostream &OS); + + // Deserializes memprof records from the Buffer. + static IndexedMemProfRecord deserialize(const MemProfSchema &Schema, + const unsigned char *Buffer); + + // Returns the GUID for the function name after canonicalization. For + // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are + // mapped to functions using this GUID. + static GlobalValue::GUID getGUID(const StringRef FunctionName); +}; + +// Holds the memprof profile information for a function. The internal +// representation stores frame contents inline. This representation should +// be used for small amount of temporary, in memory instances. +struct MemProfRecord { + // Same as IndexedMemProfRecord::AllocSites with frame contents inline. + llvm::SmallVector AllocSites; + // Same as IndexedMemProfRecord::CallSites with frame contents inline. + llvm::SmallVector> CallSites; + + MemProfRecord() = default; + MemProfRecord( + const IndexedMemProfRecord &Record, + llvm::function_ref IdToFrameCallback) { + for (const IndexedAllocationInfo &IndexedAI : Record.AllocSites) { + AllocSites.emplace_back(IndexedAI, IdToFrameCallback); + } + for (const ArrayRef Site : Record.CallSites) { + llvm::SmallVector Frames; + for (const FrameId Id : Site) { + Frames.push_back(IdToFrameCallback(Id)); + } + CallSites.push_back(Frames); + } + } + + // Prints out the contents of the memprof record in YAML. + void print(llvm::raw_ostream &OS) const { + if (!AllocSites.empty()) { + OS << " AllocSites:\n"; + for (const AllocationInfo &N : AllocSites) + N.printYAML(OS); + } + + if (!CallSites.empty()) { + OS << " CallSites:\n"; + for (const llvm::SmallVector &Frames : CallSites) { + for (const Frame &F : Frames) { + OS << " -\n"; + F.printYAML(OS); + } + } + } + } +}; + +// Reads a memprof schema from a buffer. All entries in the buffer are +// interpreted as uint64_t. The first entry in the buffer denotes the number of +// ids in the schema. Subsequent entries are integers which map to memprof::Meta +// enum class entries. After successfully reading the schema, the pointer is one +// byte past the schema contents. +Expected readMemProfSchema(const unsigned char *&Buffer); + +// Trait for reading IndexedMemProfRecord data from the on-disk hash table. +class RecordLookupTrait { +public: + using data_type = const IndexedMemProfRecord &; + using internal_key_type = uint64_t; + using external_key_type = uint64_t; + using hash_value_type = uint64_t; + using offset_type = uint64_t; + + RecordLookupTrait() = delete; + RecordLookupTrait(const MemProfSchema &S) : Schema(S) {} + + static bool EqualKey(uint64_t A, uint64_t B) { return A == B; } + static uint64_t GetInternalKey(uint64_t K) { return K; } + static uint64_t GetExternalKey(uint64_t K) { return K; } + + hash_value_type ComputeHash(uint64_t K) { return K; } + + static std::pair + ReadKeyDataLength(const unsigned char *&D) { + using namespace support; + + offset_type KeyLen = endian::readNext(D); + offset_type DataLen = endian::readNext(D); + return std::make_pair(KeyLen, DataLen); + } + + uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { + using namespace support; + return endian::readNext(D); + } + + data_type ReadData(uint64_t K, const unsigned char *D, + offset_type /*Unused*/) { + Record = IndexedMemProfRecord::deserialize(Schema, D); + return Record; + } + +private: + // Holds the memprof schema used to deserialize records. + MemProfSchema Schema; + // Holds the records from one function deserialized from the indexed format. + IndexedMemProfRecord Record; +}; + +// Trait for writing IndexedMemProfRecord data to the on-disk hash table. +class RecordWriterTrait { +public: + using key_type = uint64_t; + using key_type_ref = uint64_t; + + using data_type = IndexedMemProfRecord; + using data_type_ref = IndexedMemProfRecord &; + + using hash_value_type = uint64_t; + using offset_type = uint64_t; + + // Pointer to the memprof schema to use for the generator. Unlike the reader + // we must use a default constructor with no params for the writer trait so we + // have a public member which must be initialized by the user. + MemProfSchema *Schema = nullptr; + + RecordWriterTrait() = default; + + static hash_value_type ComputeHash(key_type_ref K) { return K; } + + static std::pair + EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { + using namespace support; + + endian::Writer LE(Out, little); + offset_type N = sizeof(K); + LE.write(N); + offset_type M = V.serializedSize(); + LE.write(M); + return std::make_pair(N, M); + } + + void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { + using namespace support; + endian::Writer LE(Out, little); + LE.write(K); + } + + void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, + offset_type /*Unused*/) { + assert(Schema != nullptr && "MemProf schema is not initialized!"); + V.serialize(*Schema, Out); + } +}; + +// Trait for writing frame mappings to the on-disk hash table. +class FrameWriterTrait { +public: + using key_type = FrameId; + using key_type_ref = FrameId; + + using data_type = Frame; + using data_type_ref = Frame &; + + using hash_value_type = FrameId; + using offset_type = uint64_t; + + static hash_value_type ComputeHash(key_type_ref K) { return K; } + + static std::pair + EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { + using namespace support; + endian::Writer LE(Out, little); + offset_type N = sizeof(K); + LE.write(N); + offset_type M = V.serializedSize(); + LE.write(M); + return std::make_pair(N, M); + } + + void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { + using namespace support; + endian::Writer LE(Out, little); + LE.write(K); + } + + void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, + offset_type /*Unused*/) { + V.serialize(Out); + } +}; + +// Trait for reading frame mappings from the on-disk hash table. +class FrameLookupTrait { +public: + using data_type = const Frame; + using internal_key_type = FrameId; + using external_key_type = FrameId; + using hash_value_type = FrameId; + using offset_type = uint64_t; + + static bool EqualKey(internal_key_type A, internal_key_type B) { + return A == B; + } + static uint64_t GetInternalKey(internal_key_type K) { return K; } + static uint64_t GetExternalKey(external_key_type K) { return K; } + + hash_value_type ComputeHash(internal_key_type K) { return K; } + + static std::pair + ReadKeyDataLength(const unsigned char *&D) { + using namespace support; + + offset_type KeyLen = endian::readNext(D); + offset_type DataLen = endian::readNext(D); + return std::make_pair(KeyLen, DataLen); + } + + uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { + using namespace support; + return endian::readNext(D); + } + + data_type ReadData(uint64_t K, const unsigned char *D, + offset_type /*Unused*/) { + return Frame::deserialize(D); + } +}; +} // namespace memprof +} // namespace llvm + +#endif // LLVM_PROFILEDATA_MEMPROF_H_ diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc index ff22a697965c..6433cef84865 100644 --- a/llvm/include/llvm/ProfileData/MemProfData.inc +++ b/llvm/include/llvm/ProfileData/MemProfData.inc @@ -1,5 +1,5 @@ -#ifndef LLVM_PROFILEDATA_MEMPROFDATA_INC -#define LLVM_PROFILEDATA_MEMPROFDATA_INC +#ifndef MEMPROF_DATA_INC +#define MEMPROF_DATA_INC /*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\ |* |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -80,71 +80,90 @@ PACKED(struct SegmentEntry { } }); +// Packed struct definition for MSVC. We can't use the PACKED macro defined in +// MemProfData.inc since it would mean we are embedding a directive (the +// #include for MIBEntryDef) into the macros which is undefined behaviour. +#ifdef _MSC_VER +__pragma(pack(push,1)) +#endif + // A struct representing the heap allocation characteristics of a particular // runtime context. This struct is shared between the compiler-rt runtime and // the raw profile reader. The indexed format uses a separate, self-describing // backwards compatible format. -PACKED(struct MemInfoBlock { - uint32_t alloc_count; - uint64_t total_access_count, min_access_count, max_access_count; - uint64_t total_size; - uint32_t min_size, max_size; - uint32_t alloc_timestamp, dealloc_timestamp; - uint64_t total_lifetime; - uint32_t min_lifetime, max_lifetime; - uint32_t alloc_cpu_id, dealloc_cpu_id; - uint32_t num_migrated_cpu; - - // Only compared to prior deallocated object currently. - uint32_t num_lifetime_overlaps; - uint32_t num_same_alloc_cpu; - uint32_t num_same_dealloc_cpu; - - uint64_t data_type_id; // TODO: hash of type name - - MemInfoBlock() : alloc_count(0) {} - - MemInfoBlock(uint32_t size, uint64_t access_count, uint32_t alloc_timestamp, - uint32_t dealloc_timestamp, uint32_t alloc_cpu, uint32_t dealloc_cpu) - : alloc_count(1), total_access_count(access_count), - min_access_count(access_count), max_access_count(access_count), - total_size(size), min_size(size), max_size(size), - alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp), - total_lifetime(dealloc_timestamp - alloc_timestamp), - min_lifetime(total_lifetime), max_lifetime(total_lifetime), - alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu), - num_lifetime_overlaps(0), num_same_alloc_cpu(0), - num_same_dealloc_cpu(0) { - num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id; - } - - void Merge(const MemInfoBlock &newMIB) { - alloc_count += newMIB.alloc_count; - - total_access_count += newMIB.total_access_count; - min_access_count = newMIB.min_access_count < min_access_count ? newMIB.min_access_count : min_access_count; - max_access_count = newMIB.max_access_count < max_access_count ? newMIB.max_access_count : max_access_count; - - total_size += newMIB.total_size; - min_size = newMIB.min_size < min_size ? newMIB.min_size : min_size; - max_size = newMIB.max_size < max_size ? newMIB.max_size : max_size; +struct MemInfoBlock{ + +#define MIBEntryDef(NameTag, Name, Type) Type Name; +#include "MIBEntryDef.inc" +#undef MIBEntryDef + +bool operator==(const MemInfoBlock& Other) const { + bool IsEqual = true; +#define MIBEntryDef(NameTag, Name, Type) \ + IsEqual = (IsEqual && Name == Other.Name); +#include "MIBEntryDef.inc" +#undef MIBEntryDef + return IsEqual; +} + +MemInfoBlock() { +#define MIBEntryDef(NameTag, Name, Type) Name = Type(); +#include "MIBEntryDef.inc" +#undef MIBEntryDef +} + +MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs, + uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu) + : MemInfoBlock() { + AllocCount = 1U; + TotalAccessCount = AccessCount; + MinAccessCount = AccessCount; + MaxAccessCount = AccessCount; + TotalSize = Size; + MinSize = Size; + MaxSize = Size; + AllocTimestamp = AllocTs; + DeallocTimestamp = DeallocTs; + TotalLifetime = DeallocTimestamp - AllocTimestamp; + MinLifetime = TotalLifetime; + MaxLifetime = TotalLifetime; + AllocCpuId = AllocCpu; + DeallocCpuId = DeallocCpu; + NumMigratedCpu = AllocCpuId != DeallocCpuId; +} + +void Merge(const MemInfoBlock &newMIB) { + AllocCount += newMIB.AllocCount; + + TotalAccessCount += newMIB.TotalAccessCount; + MinAccessCount = newMIB.MinAccessCount < MinAccessCount ? newMIB.MinAccessCount : MinAccessCount; + MaxAccessCount = newMIB.MaxAccessCount < MaxAccessCount ? newMIB.MaxAccessCount : MaxAccessCount; + + TotalSize += newMIB.TotalSize; + MinSize = newMIB.MinSize < MinSize ? newMIB.MinSize : MinSize; + MaxSize = newMIB.MaxSize < MaxSize ? newMIB.MaxSize : MaxSize; + + TotalLifetime += newMIB.TotalLifetime; + MinLifetime = newMIB.MinLifetime < MinLifetime ? newMIB.MinLifetime : MinLifetime; + MaxLifetime = newMIB.MaxLifetime > MaxLifetime ? newMIB.MaxLifetime : MaxLifetime; + + // We know newMIB was deallocated later, so just need to check if it was + // allocated before last one deallocated. + NumLifetimeOverlaps += newMIB.AllocTimestamp < DeallocTimestamp; + AllocTimestamp = newMIB.AllocTimestamp; + DeallocTimestamp = newMIB.DeallocTimestamp; + + NumSameAllocCpu += AllocCpuId == newMIB.AllocCpuId; + NumSameDeallocCpu += DeallocCpuId == newMIB.DeallocCpuId; + AllocCpuId = newMIB.AllocCpuId; + DeallocCpuId = newMIB.DeallocCpuId; +} - total_lifetime += newMIB.total_lifetime; - min_lifetime = newMIB.min_lifetime < min_lifetime ? newMIB.min_lifetime : min_lifetime; - max_lifetime = newMIB.max_lifetime > max_lifetime ? newMIB.max_lifetime : max_lifetime; - - // We know newMIB was deallocated later, so just need to check if it was - // allocated before last one deallocated. - num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp; - alloc_timestamp = newMIB.alloc_timestamp; - dealloc_timestamp = newMIB.dealloc_timestamp; - - num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id; - num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id; - alloc_cpu_id = newMIB.alloc_cpu_id; - dealloc_cpu_id = newMIB.dealloc_cpu_id; - } -}); +#ifdef _MSC_VER +} __pragma(pack(pop)); +#else +} __attribute__((__packed__)); +#endif } // namespace memprof } // namespace llvm diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h index 45544927a86f..34f78063aa42 100644 --- a/llvm/include/llvm/ProfileData/RawMemProfReader.h +++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h @@ -12,31 +12,142 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/ProfileData/MemProf.h" +#include "llvm/ProfileData/MemProfData.inc" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" +#include + namespace llvm { namespace memprof { +// Map from id (recorded from sanitizer stack depot) to virtual addresses for +// each program counter address in the callstack. +using CallStackMap = llvm::DenseMap>; + class RawMemProfReader { public: - RawMemProfReader(std::unique_ptr DataBuffer) - : DataBuffer(std::move(DataBuffer)) {} - // Prints aggregate counts for each raw profile parsed from the DataBuffer. - void printSummaries(raw_ostream &OS) const; + RawMemProfReader(const RawMemProfReader &) = delete; + RawMemProfReader &operator=(const RawMemProfReader &) = delete; + + // Prints the contents of the profile in YAML format. + void printYAML(raw_ostream &OS); // Return true if the \p DataBuffer starts with magic bytes indicating it is // a raw binary memprof profile. static bool hasFormat(const MemoryBuffer &DataBuffer); + // Return true if the file at \p Path starts with magic bytes indicating it is + // a raw binary memprof profile. + static bool hasFormat(const StringRef Path); // Create a RawMemProfReader after sanity checking the contents of the file at - // \p Path. - static Expected> create(const Twine &Path); + // \p Path. The binary from which the profile has been collected is specified + // via a path in \p ProfiledBinary. + static Expected> + create(const Twine &Path, const StringRef ProfiledBinary, + bool KeepName = false); + + using GuidMemProfRecordPair = std::pair; + using Iterator = InstrProfIterator; + Iterator end() { return Iterator(); } + Iterator begin() { + Iter = FunctionProfileData.begin(); + return Iterator(this); + } + + Error readNextRecord(GuidMemProfRecordPair &GuidRecord); + + // The RawMemProfReader only holds memory profile information. + InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; } + + // Constructor for unittests only. + RawMemProfReader(std::unique_ptr Sym, + llvm::SmallVectorImpl &Seg, + llvm::MapVector &Prof, + CallStackMap &SM, bool KeepName = false) + : Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()), + CallstackProfileData(Prof), StackMap(SM), KeepSymbolName(KeepName) { + // We don't call initialize here since there is no raw profile to read. The + // test should pass in the raw profile as structured data. + + // If there is an error here then the mock symbolizer has not been + // initialized properly. + if (Error E = symbolizeAndFilterStackFrames()) + report_fatal_error(std::move(E)); + if (Error E = mapRawProfileToRecords()) + report_fatal_error(std::move(E)); + } + + // Return a const reference to the internal Id to Frame mappings. + const llvm::DenseMap &getFrameMapping() const { + return IdToFrame; + } + + // Return a const reference to the internal function profile data. + const llvm::MapVector & + getProfileData() const { + return FunctionProfileData; + } private: - std::unique_ptr DataBuffer; -}; + RawMemProfReader(object::OwningBinary &&Bin, bool KeepName) + : Binary(std::move(Bin)), KeepSymbolName(KeepName) {} + // Initializes the RawMemProfReader with the contents in `DataBuffer`. + Error initialize(std::unique_ptr DataBuffer); + // Read and parse the contents of the `DataBuffer` as a binary format profile. + Error readRawProfile(std::unique_ptr DataBuffer); + // Symbolize and cache all the virtual addresses we encounter in the + // callstacks from the raw profile. Also prune callstack frames which we can't + // symbolize or those that belong to the runtime. For profile entries where + // the entire callstack is pruned, we drop the entry from the profile. + Error symbolizeAndFilterStackFrames(); + // Construct memprof records for each function and store it in the + // `FunctionProfileData` map. A function may have allocation profile data or + // callsite data or both. + Error mapRawProfileToRecords(); + + // A helper method to extract the frame from the IdToFrame map. + const Frame &idToFrame(const FrameId Id) const { + auto It = IdToFrame.find(Id); + assert(It != IdToFrame.end() && "Id not found in map."); + return It->getSecond(); + } + + object::SectionedAddress getModuleOffset(uint64_t VirtualAddress); + + object::OwningBinary Binary; + std::unique_ptr Symbolizer; + // The contents of the raw profile. + llvm::SmallVector SegmentInfo; + // A map from callstack id (same as key in CallStackMap below) to the heap + // information recorded for that allocation context. + llvm::MapVector CallstackProfileData; + CallStackMap StackMap; + + // Cached symbolization from PC to Frame. + llvm::DenseMap> SymbolizedFrame; + llvm::DenseMap IdToFrame; + + llvm::MapVector FunctionProfileData; + llvm::MapVector::iterator Iter; + + // Whether to keep the symbol name for each frame after hashing. + bool KeepSymbolName = false; + // A mapping of the hash to symbol name, only used if KeepSymbolName is true. + llvm::DenseMap GuidToSymbolName; +}; } // namespace memprof } // namespace llvm diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index bad2139fe8f0..f11392c05318 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -18,15 +18,12 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/IR/Module.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include #include #include @@ -40,6 +37,9 @@ namespace llvm { +class DILocation; +class raw_ostream; + const std::error_category &sampleprof_category(); enum class sampleprof_error { @@ -55,7 +55,6 @@ enum class sampleprof_error { not_implemented, counter_overflow, ostream_seek_unsupported, - compress_failed, uncompress_failed, zlib_unavailable, hash_mismatch @@ -201,9 +200,9 @@ enum class SecProfSummaryFlags : uint32_t { /// SecFlagFSDiscriminator means this profile uses flow-sensitive /// discriminators. SecFlagFSDiscriminator = (1 << 2), - /// SecFlagIsCSNested means this is context-sensitive nested profile for - /// CSSPGO - SecFlagIsCSNested = (1 << 4), + /// SecFlagIsPreInlined means this profile contains ShouldBeInlined + /// contexts thus this is CS preinliner computed. + SecFlagIsPreInlined = (1 << 4), }; enum class SecFuncMetadataFlags : uint32_t { @@ -343,6 +342,15 @@ public: : sampleprof_error::success; } + /// Decrease the number of samples for this record by \p S. Return the amout + /// of samples actually decreased. + uint64_t removeSamples(uint64_t S) { + if (S > NumSamples) + S = NumSamples; + NumSamples -= S; + return S; + } + /// Add called function \p F with samples \p S. /// Optionally scale sample count \p S by \p Weight. /// @@ -358,6 +366,18 @@ public: : sampleprof_error::success; } + /// Remove called function from the call target map. Return the target sample + /// count of the called function. + uint64_t removeCalledTarget(StringRef F) { + uint64_t Count = 0; + auto I = CallTargets.find(F); + if (I != CallTargets.end()) { + Count = I->second; + CallTargets.erase(I); + } + return Count; + } + /// Return true if this sample record contains function calls. bool hasCalls() const { return !CallTargets.empty(); } @@ -367,6 +387,13 @@ public: return SortCallTargets(CallTargets); } + uint64_t getCallTargetSum() const { + uint64_t Sum = 0; + for (const auto &I : CallTargets) + Sum += I.second; + return Sum; + } + /// Sort call targets in descending order of call frequency. static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) { SortedCallTargetSet SortedTargets; @@ -413,6 +440,8 @@ enum ContextAttributeMask { ContextNone = 0x0, ContextWasInlined = 0x1, // Leaf of context was inlined in previous build ContextShouldBeInlined = 0x2, // Leaf of context should be inlined + ContextDuplicatedIntoBase = + 0x4, // Leaf of context is duplicated into the base profile }; // Represents a context frame with function name and line location @@ -524,16 +553,6 @@ public: } } - // Promote context by removing top frames with the length of - // `ContextFramesToRemove`. Note that with array representation of context, - // the promotion is effectively a slice operation with first - // `ContextFramesToRemove` elements removed from left. - void promoteOnPath(uint32_t ContextFramesToRemove) { - assert(ContextFramesToRemove <= FullContext.size() && - "Cannot remove more than the whole context"); - FullContext = FullContext.drop_front(ContextFramesToRemove); - } - // Decode context string for a frame to get function name and location. // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`. static void decodeContextString(StringRef ContextStr, StringRef &FName, @@ -703,6 +722,13 @@ public: : sampleprof_error::success; } + void removeTotalSamples(uint64_t Num) { + if (TotalSamples < Num) + TotalSamples = 0; + else + TotalSamples -= Num; + } + void setTotalSamples(uint64_t Num) { TotalSamples = Num; } sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { @@ -727,6 +753,22 @@ public: FName, Num, Weight); } + // Remove a call target and decrease the body sample correspondingly. Return + // the number of body samples actually decreased. + uint64_t removeCalledTargetAndBodySample(uint32_t LineOffset, + uint32_t Discriminator, + StringRef FName) { + uint64_t Count = 0; + auto I = BodySamples.find(LineLocation(LineOffset, Discriminator)); + if (I != BodySamples.end()) { + Count = I->second.removeCalledTarget(FName); + Count = I->second.removeSamples(Count); + if (!I->second.getSamples()) + BodySamples.erase(I); + } + return Count; + } + sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num, uint64_t Weight = 1) { SampleRecord S; @@ -734,6 +776,19 @@ public: return BodySamples[LineLocation(Index, 0)].merge(S, Weight); } + // Accumulate all call target samples to update the body samples. + void updateCallsiteSamples() { + for (auto &I : BodySamples) { + uint64_t TargetSamples = I.second.getCallTargetSum(); + // It's possible that the body sample count can be greater than the call + // target sum. E.g, if some call targets are external targets, they won't + // be considered valid call targets, but the body sample count which is + // from lbr ranges can actually include them. + if (TargetSamples > I.second.getSamples()) + I.second.addSamples(TargetSamples - I.second.getSamples()); + } + } + // Accumulate all body samples to set total samples. void updateTotalSamples() { setTotalSamples(0); @@ -829,7 +884,7 @@ public: /// Return the sample count of the first instruction of the function. /// The function can be either a standalone symbol or an inlined function. uint64_t getEntrySamples() const { - if (FunctionSamples::ProfileIsCSFlat && getHeadSamples()) { + if (FunctionSamples::ProfileIsCS && getHeadSamples()) { // For CS profile, if we already have more accurate head samples // counted by branch sample from caller, use them as entry samples. return getHeadSamples(); @@ -1046,16 +1101,14 @@ public: static bool ProfileIsProbeBased; - static bool ProfileIsCSFlat; + static bool ProfileIsCS; - static bool ProfileIsCSNested; + static bool ProfileIsPreInlined; SampleContext &getContext() const { return Context; } void setContext(const SampleContext &FContext) { Context = FContext; } - static SampleProfileFormat Format; - /// Whether the profile uses MD5 to represent string. static bool UseMD5; diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index a2caca246d93..7da336b9f61b 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -227,10 +227,8 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/GCOV.h" @@ -240,7 +238,6 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SymbolRemappingReader.h" -#include #include #include #include @@ -473,11 +470,11 @@ public: /// Whether input profile is based on pseudo probes. bool profileIsProbeBased() const { return ProfileIsProbeBased; } - /// Whether input profile is fully context-sensitive and flat. - bool profileIsCSFlat() const { return ProfileIsCSFlat; } + /// Whether input profile is fully context-sensitive. + bool profileIsCS() const { return ProfileIsCS; } - /// Whether input profile is fully context-sensitive and nested. - bool profileIsCSNested() const { return ProfileIsCSNested; } + /// Whether input profile contains ShouldBeInlined contexts. + bool profileIsPreInlined() const { return ProfileIsPreInlined; } virtual std::unique_ptr getProfileSymbolList() { return nullptr; @@ -537,10 +534,10 @@ protected: bool ProfileIsProbeBased = false; /// Whether function profiles are context-sensitive flat profiles. - bool ProfileIsCSFlat = false; + bool ProfileIsCS = false; - /// Whether function profiles are context-sensitive nested profiles. - bool ProfileIsCSNested = false; + /// Whether function profile contains ShouldBeInlined contexts. + bool ProfileIsPreInlined = false; /// Number of context-sensitive profiles. uint32_t CSProfileCount = 0; diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h index 42decd255203..aa7f1cbdd7e8 100644 --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -13,19 +13,15 @@ #define LLVM_PROFILEDATA_SAMPLEPROFWRITER_H #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include #include -#include namespace llvm { namespace sampleprof { diff --git a/llvm/include/llvm/Remarks/RemarkSerializer.h b/llvm/include/llvm/Remarks/RemarkSerializer.h index 6217bd98d1a5..b971173ad2c6 100644 --- a/llvm/include/llvm/Remarks/RemarkSerializer.h +++ b/llvm/include/llvm/Remarks/RemarkSerializer.h @@ -13,7 +13,6 @@ #ifndef LLVM_REMARKS_REMARKSERIALIZER_H #define LLVM_REMARKS_REMARKSERIALIZER_H -#include "llvm/Remarks/Remark.h" #include "llvm/Remarks/RemarkFormat.h" #include "llvm/Remarks/RemarkStringTable.h" diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index a953e9439db4..e2f949856d9f 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -168,10 +168,10 @@ AARCH64_CPU_NAME("cortex-a510", ARMV9A, FK_NEON_FP_ARMV8, false, AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_CRC)) AARCH64_CPU_NAME("cortex-a65", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS | + (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) AARCH64_CPU_NAME("cortex-a65ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS | + (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) AARCH64_CPU_NAME("cortex-a72", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_CRC)) @@ -190,10 +190,11 @@ AARCH64_CPU_NAME("cortex-a77", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, AArch64::AEK_SSBS)) AARCH64_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | - AArch64::AEK_SSBS)) + AArch64::AEK_SSBS | AArch64::AEK_PROFILE)) AARCH64_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | - AArch64::AEK_SSBS)) + AArch64::AEK_SSBS | AArch64::AEK_PROFILE | AArch64::AEK_FLAGM | + AArch64::AEK_PAUTH | AArch64::AEK_FP16FML)) AARCH64_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false, (AArch64::AEK_MTE | AArch64::AEK_PAUTH | AArch64::AEK_FLAGM | AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_FP16FML | @@ -203,35 +204,37 @@ AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_LSE)) AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | - AArch64::AEK_SSBS)) + AArch64::AEK_SSBS | AArch64::AEK_PROFILE)) AARCH64_CPU_NAME("cortex-x1c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | - AArch64::AEK_SSBS | AArch64::AEK_PAUTH)) + AArch64::AEK_SSBS | AArch64::AEK_PAUTH | AArch64::AEK_PROFILE)) AARCH64_CPU_NAME("cortex-x2", ARMV9A, FK_NEON_FP_ARMV8, false, (AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SB | AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM | AArch64::AEK_FP16FML)) AARCH64_CPU_NAME("neoverse-e1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS | + (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) AARCH64_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | - AArch64::AEK_PROFILE | AArch64::AEK_RAS | AArch64::AEK_RCPC | + AArch64::AEK_PROFILE | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) AARCH64_CPU_NAME("neoverse-n2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_BF16 | AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | - AArch64::AEK_I8MM | AArch64::AEK_MTE | AArch64::AEK_RAS | - AArch64::AEK_RCPC | AArch64::AEK_SB | AArch64::AEK_SSBS | + AArch64::AEK_I8MM | AArch64::AEK_MTE | + AArch64::AEK_SB | AArch64::AEK_SSBS | AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM)) AARCH64_CPU_NAME("neoverse-512tvb", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS | - AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 | - AArch64::AEK_DOTPROD )) + (AArch64::AEK_SVE | AArch64::AEK_SSBS | + AArch64::AEK_FP16 | AArch64::AEK_BF16 | + AArch64::AEK_DOTPROD | AArch64::AEK_PROFILE | + AArch64::AEK_RAND | AArch64::AEK_FP16FML | AArch64::AEK_I8MM)) AARCH64_CPU_NAME("neoverse-v1", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS | - AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 | - AArch64::AEK_DOTPROD )) + (AArch64::AEK_SVE | AArch64::AEK_SSBS | + AArch64::AEK_FP16 | AArch64::AEK_BF16 | + AArch64::AEK_DOTPROD | AArch64::AEK_PROFILE | + AArch64::AEK_RAND | AArch64::AEK_FP16FML | AArch64::AEK_I8MM)) AARCH64_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_NONE)) AARCH64_CPU_NAME("apple-a7", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, @@ -247,11 +250,11 @@ AARCH64_CPU_NAME("apple-a11", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, AARCH64_CPU_NAME("apple-a12", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16)) AARCH64_CPU_NAME("apple-a13", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_FP16 | AArch64::AEK_FP16FML)) + (AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3)) AARCH64_CPU_NAME("apple-a14", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_FP16 | AArch64::AEK_FP16FML)) + (AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3)) AARCH64_CPU_NAME("apple-m1", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_FP16 | AArch64::AEK_FP16FML)) + (AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3)) AARCH64_CPU_NAME("apple-s4", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16)) AARCH64_CPU_NAME("apple-s5", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false, @@ -271,17 +274,15 @@ AARCH64_CPU_NAME("kryo", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AARCH64_CPU_NAME("thunderx2t99", ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_NONE)) AARCH64_CPU_NAME("thunderx3t110", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_CRC | AEK_CRYPTO | AEK_FP | AEK_SIMD | - AEK_LSE | AEK_RAND | AArch64::AEK_PROFILE | - AArch64::AEK_RAS)) + (AArch64::AEK_NONE)) AARCH64_CPU_NAME("thunderx", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_CRC | AArch64::AEK_PROFILE)) + (AArch64::AEK_CRC)) AARCH64_CPU_NAME("thunderxt88", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_CRC | AArch64::AEK_PROFILE)) + (AArch64::AEK_CRC)) AARCH64_CPU_NAME("thunderxt81", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_CRC | AArch64::AEK_PROFILE)) + (AArch64::AEK_CRC)) AARCH64_CPU_NAME("thunderxt83", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_CRC | AArch64::AEK_PROFILE)) + (AArch64::AEK_CRC)) AARCH64_CPU_NAME("tsv110", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_FP16FML | @@ -290,6 +291,8 @@ AARCH64_CPU_NAME("a64fx", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_SVE)) AARCH64_CPU_NAME("carmel", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, AArch64::AEK_FP16) +AARCH64_CPU_NAME("ampere1", ARMV8_6A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_FP16 | AArch64::AEK_SB | AArch64::AEK_SSBS)) // Invalid CPU AARCH64_CPU_NAME("invalid", INVALID, FK_INVALID, true, AArch64::AEK_INVALID) #undef AARCH64_CPU_NAME diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h index aec80291f01f..41d144cfd5c4 100644 --- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -136,13 +136,17 @@ enum : int32_t { // Compute program resource register 3 for GFX10+. Must match hardware // definition. -#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \ - AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_ ## NAME, SHIFT, WIDTH) +#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \ + AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH) enum : int32_t { - COMPUTE_PGM_RSRC3_GFX10(SHARED_VGPR_COUNT, 0, 4), // GFX10+ - COMPUTE_PGM_RSRC3_GFX10(RESERVED0, 4, 28), + COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+ + COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6), // GFX11+ + COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1), // GFX11+ + COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1), // GFX11+ + COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19), + COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1), // GFX11+ }; -#undef COMPUTE_PGM_RSRC3_GFX10 +#undef COMPUTE_PGM_RSRC3_GFX10_PLUS // Kernel code properties. Must be kept backwards compatible. #define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \ diff --git a/llvm/include/llvm/Support/ARMBuildAttributes.h b/llvm/include/llvm/Support/ARMBuildAttributes.h index b4405e7d4908..35f8992ca932 100644 --- a/llvm/include/llvm/Support/ARMBuildAttributes.h +++ b/llvm/include/llvm/Support/ARMBuildAttributes.h @@ -90,25 +90,26 @@ enum AttrType : unsigned { // Legal Values for CPU_arch, (=6), uleb128 enum CPUArch { - Pre_v4 = 0, - v4 = 1, // e.g. SA110 - v4T = 2, // e.g. ARM7TDMI - v5T = 3, // e.g. ARM9TDMI - v5TE = 4, // e.g. ARM946E_S - v5TEJ = 5, // e.g. ARM926EJ_S - v6 = 6, // e.g. ARM1136J_S - v6KZ = 7, // e.g. ARM1176JZ_S - v6T2 = 8, // e.g. ARM1156T2_S - v6K = 9, // e.g. ARM1176JZ_S - v7 = 10, // e.g. Cortex A8, Cortex M3 - v6_M = 11, // e.g. Cortex M1 - v6S_M = 12, // v6_M with the System extensions - v7E_M = 13, // v7_M with DSP extensions - v8_A = 14, // v8_A AArch32 - v8_R = 15, // e.g. Cortex R52 - v8_M_Base= 16, // v8_M_Base AArch32 - v8_M_Main= 17, // v8_M_Main AArch32 - v8_1_M_Main=21, // v8_1_M_Main AArch32 + Pre_v4 = 0, + v4 = 1, // e.g. SA110 + v4T = 2, // e.g. ARM7TDMI + v5T = 3, // e.g. ARM9TDMI + v5TE = 4, // e.g. ARM946E_S + v5TEJ = 5, // e.g. ARM926EJ_S + v6 = 6, // e.g. ARM1136J_S + v6KZ = 7, // e.g. ARM1176JZ_S + v6T2 = 8, // e.g. ARM1156T2_S + v6K = 9, // e.g. ARM1176JZ_S + v7 = 10, // e.g. Cortex A8, Cortex M3 + v6_M = 11, // e.g. Cortex M1 + v6S_M = 12, // v6_M with the System extensions + v7E_M = 13, // v7_M with DSP extensions + v8_A = 14, // v8_A AArch32 + v8_R = 15, // e.g. Cortex R52 + v8_M_Base = 16, // v8_M_Base AArch32 + v8_M_Main = 17, // v8_M_Main AArch32 + v8_1_M_Main = 21, // v8_1_M_Main AArch32 + v9_A = 22, // v9_A AArch32 }; enum CPUArchProfile { // (=7), uleb128 diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def index 80deeb2a6e9d..6a1ac7213dad 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.def +++ b/llvm/include/llvm/Support/ARMTargetParser.def @@ -129,22 +129,22 @@ ARM_ARCH("armv8.8-a", ARMV8_8A, "8.8-A", "v8.8a", ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES | ARM::AEK_I8MM)) ARM_ARCH("armv9-a", ARMV9A, "9-A", "v9a", - ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8, + ARMBuildAttrs::CPUArch::v9_A, FK_NEON_FP_ARMV8, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | ARM::AEK_DOTPROD)) ARM_ARCH("armv9.1-a", ARMV9_1A, "9.1-A", "v9.1a", - ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8, + ARMBuildAttrs::CPUArch::v9_A, FK_NEON_FP_ARMV8, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM)) ARM_ARCH("armv9.2-a", ARMV9_2A, "9.2-A", "v9.2a", - ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8, + ARMBuildAttrs::CPUArch::v9_A, FK_NEON_FP_ARMV8, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM)) ARM_ARCH("armv9.3-a", ARMV9_3A, "9.3-A", "v9.3a", - ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, + ARMBuildAttrs::CPUArch::v9_A, FK_CRYPTO_NEON_FP_ARMV8, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM)) diff --git a/llvm/include/llvm/Support/ARMWinEH.h b/llvm/include/llvm/Support/ARMWinEH.h index 327aa9804849..dee2f31fb127 100644 --- a/llvm/include/llvm/Support/ARMWinEH.h +++ b/llvm/include/llvm/Support/ARMWinEH.h @@ -199,13 +199,14 @@ inline bool EpilogueFolding(const RuntimeFunction &RF) { inline uint16_t StackAdjustment(const RuntimeFunction &RF) { uint16_t Adjustment = RF.StackAdjust(); if (Adjustment >= 0x3f4) - return (Adjustment & 0x3) ? ((Adjustment & 0x3) << 2) - 1 : 0; + return (Adjustment & 0x3) + 1; return Adjustment; } /// SavedRegisterMask - Utility function to calculate the set of saved general /// purpose (r0-r15) and VFP (d0-d31) registers. -std::pair SavedRegisterMask(const RuntimeFunction &RF); +std::pair SavedRegisterMask(const RuntimeFunction &RF, + bool Prologue = true); /// RuntimeFunctionARM64 - An entry in the table of procedure data (.pdata) /// diff --git a/llvm/include/llvm/Support/Alignment.h b/llvm/include/llvm/Support/Alignment.h index 1176c026ba99..1543a5713d73 100644 --- a/llvm/include/llvm/Support/Alignment.h +++ b/llvm/include/llvm/Support/Alignment.h @@ -84,6 +84,14 @@ public: /// Needed to interact with C for instance. uint64_t value() const { return uint64_t(1) << ShiftValue; } + // Returns the previous alignment. + Align previous() const { + assert(ShiftValue != 0 && "Undefined operation"); + Align Out; + Out.ShiftValue = ShiftValue - 1; + return Out; + } + /// Allow constructions of constexpr Align. template constexpr static LogValue Constant() { return LogValue{static_cast(CTLog2())}; @@ -131,7 +139,7 @@ public: } /// For convenience, returns a valid alignment or 1 if undefined. - Align valueOrOne() const { return hasValue() ? getValue() : Align(); } + Align valueOrOne() const { return value_or(Align()); } }; /// Checks that SizeInBytes is a multiple of the alignment. @@ -173,13 +181,7 @@ inline uint64_t alignTo(uint64_t Size, Align A) { inline uint64_t alignTo(uint64_t Size, Align A, uint64_t Skew) { const uint64_t Value = A.value(); Skew %= Value; - return ((Size + Value - 1 - Skew) & ~(Value - 1U)) + Skew; -} - -/// Returns a multiple of A needed to store `Size` bytes. -/// Returns `Size` if current alignment is undefined. -inline uint64_t alignTo(uint64_t Size, MaybeAlign A) { - return A ? alignTo(Size, A.getValue()) : Size; + return alignTo(Size - Skew, A) + Skew; } /// Aligns `Addr` to `Alignment` bytes, rounding up. @@ -206,28 +208,12 @@ inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) { /// Returns the log2 of the alignment. inline unsigned Log2(Align A) { return A.ShiftValue; } -/// Returns the alignment that satisfies both alignments. -/// Same semantic as MinAlign. -inline Align commonAlignment(Align A, Align B) { return std::min(A, B); } - /// Returns the alignment that satisfies both alignments. /// Same semantic as MinAlign. inline Align commonAlignment(Align A, uint64_t Offset) { return Align(MinAlign(A.value(), Offset)); } -/// Returns the alignment that satisfies both alignments. -/// Same semantic as MinAlign. -inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) { - return A && B ? commonAlignment(*A, *B) : A ? A : B; -} - -/// Returns the alignment that satisfies both alignments. -/// Same semantic as MinAlign. -inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) { - return MaybeAlign(MinAlign((*A).value(), Offset)); -} - /// Returns a representation of the alignment that encodes undefined as 0. inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; } @@ -270,14 +256,6 @@ inline bool operator>(Align Lhs, uint64_t Rhs) { return Lhs.value() > Rhs; } -/// Comparisons between MaybeAlign and scalars. -inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) { - return Lhs ? (*Lhs).value() == Rhs : Rhs == 0; -} -inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) { - return Lhs ? (*Lhs).value() != Rhs : Rhs != 0; -} - /// Comparisons operators between Align. inline bool operator==(Align Lhs, Align Rhs) { return Lhs.ShiftValue == Rhs.ShiftValue; @@ -314,37 +292,6 @@ bool operator>=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; bool operator<(MaybeAlign Lhs, MaybeAlign Rhs) = delete; bool operator>(MaybeAlign Lhs, MaybeAlign Rhs) = delete; -inline Align operator*(Align Lhs, uint64_t Rhs) { - assert(Rhs > 0 && "Rhs must be positive"); - return Align(Lhs.value() * Rhs); -} - -inline MaybeAlign operator*(MaybeAlign Lhs, uint64_t Rhs) { - assert(Rhs > 0 && "Rhs must be positive"); - return Lhs ? Lhs.getValue() * Rhs : MaybeAlign(); -} - -inline Align operator/(Align Lhs, uint64_t Divisor) { - assert(llvm::isPowerOf2_64(Divisor) && - "Divisor must be positive and a power of 2"); - assert(Lhs != 1 && "Can't halve byte alignment"); - return Align(Lhs.value() / Divisor); -} - -inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) { - assert(llvm::isPowerOf2_64(Divisor) && - "Divisor must be positive and a power of 2"); - return Lhs ? Lhs.getValue() / Divisor : MaybeAlign(); -} - -inline Align max(MaybeAlign Lhs, Align Rhs) { - return Lhs && *Lhs > Rhs ? *Lhs : Rhs; -} - -inline Align max(Align Lhs, MaybeAlign Rhs) { - return Rhs && *Rhs > Lhs ? *Rhs : Lhs; -} - #ifndef NDEBUG // For usage in LLVM_DEBUG macros. inline std::string DebugStr(const Align &A) { diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h index ec5ed06b7fa4..5ca0c9decac3 100644 --- a/llvm/include/llvm/Support/Allocator.h +++ b/llvm/include/llvm/Support/Allocator.h @@ -140,6 +140,9 @@ public: // This method is *not* marked noalias, because // SpecificBumpPtrAllocator::DestroyAll() loops over all allocations, and // that loop is not based on the Allocate() return value. + // + // Allocate(0, N) is valid, it returns a non-null pointer (which should not + // be dereferenced). LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, Align Alignment) { // Keep track of how many bytes we've allocated. BytesAllocated += Size; @@ -154,7 +157,9 @@ public: #endif // Check if we have enough space. - if (Adjustment + SizeToAllocate <= size_t(End - CurPtr)) { + if (Adjustment + SizeToAllocate <= size_t(End - CurPtr) + // We can't return nullptr even for a zero-sized allocation! + && CurPtr != nullptr) { char *AlignedPtr = CurPtr + Adjustment; CurPtr = AlignedPtr + SizeToAllocate; // Update the allocation point of this memory block in MemorySanitizer. diff --git a/llvm/include/llvm/Support/BLAKE3.h b/llvm/include/llvm/Support/BLAKE3.h new file mode 100644 index 000000000000..7b30dbccd173 --- /dev/null +++ b/llvm/include/llvm/Support/BLAKE3.h @@ -0,0 +1,124 @@ +//==- BLAKE3.h - BLAKE3 C++ wrapper for LLVM ---------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a C++ wrapper of the BLAKE3 C interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_BLAKE3_H +#define LLVM_SUPPORT_BLAKE3_H + +#include "llvm-c/blake3.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { + +/// The constant \p LLVM_BLAKE3_OUT_LEN provides the default output length, +/// 32 bytes, which is recommended for most callers. +/// +/// Outputs shorter than the default length of 32 bytes (256 bits) provide +/// less security. An N-bit BLAKE3 output is intended to provide N bits of +/// first and second preimage resistance and N/2 bits of collision +/// resistance, for any N up to 256. Longer outputs don't provide any +/// additional security. +/// +/// Shorter BLAKE3 outputs are prefixes of longer ones. Explicitly +/// requesting a short output is equivalent to truncating the default-length +/// output. +template +using BLAKE3Result = std::array; + +/// A class that wraps the BLAKE3 algorithm. +class BLAKE3 { +public: + BLAKE3() { init(); } + + /// Reinitialize the internal state + void init() { llvm_blake3_hasher_init(&Hasher); } + + /// Digest more data. + void update(ArrayRef Data) { + llvm_blake3_hasher_update(&Hasher, Data.data(), Data.size()); + } + + /// Digest more data. + void update(StringRef Str) { + llvm_blake3_hasher_update(&Hasher, Str.data(), Str.size()); + } + + /// Finalize the hasher and put the result in \p Result. + /// This doesn't modify the hasher itself, and it's possible to finalize again + /// after adding more input. + template + void final(BLAKE3Result &Result) { + llvm_blake3_hasher_finalize(&Hasher, Result.data(), Result.size()); + } + + /// Finalize the hasher and return an output of any length, given in bytes. + /// This doesn't modify the hasher itself, and it's possible to finalize again + /// after adding more input. + template + BLAKE3Result final() { + BLAKE3Result Result; + llvm_blake3_hasher_finalize(&Hasher, Result.data(), Result.size()); + return Result; + } + + /// Return the current output for the digested data since the last call to + /// init(). + /// + /// Other hash functions distinguish between \p result() and \p final(), with + /// \p result() allowing more calls into \p update(), but there's no + // difference for the BLAKE3 hash function. + template + BLAKE3Result result() { + return final(); + } + + /// Returns a BLAKE3 hash for the given data. + template + static BLAKE3Result hash(ArrayRef Data) { + BLAKE3 Hasher; + Hasher.update(Data); + return Hasher.final(); + } + +private: + llvm_blake3_hasher Hasher; +}; + +/// Like \p BLAKE3 but using a class-level template parameter for specifying the +/// hash size of the \p final() and \p result() functions. +/// +/// This is useful for using BLAKE3 as the hasher type for \p HashBuilder with +/// non-default hash sizes. +template class TruncatedBLAKE3 : public BLAKE3 { +public: + /// Finalize the hasher and put the result in \p Result. + /// This doesn't modify the hasher itself, and it's possible to finalize again + /// after adding more input. + void final(BLAKE3Result &Result) { return BLAKE3::final(Result); } + + /// Finalize the hasher and return an output of any length, given in bytes. + /// This doesn't modify the hasher itself, and it's possible to finalize again + /// after adding more input. + BLAKE3Result final() { return BLAKE3::final(); } + + /// Return the current output for the digested data since the last call to + /// init(). + /// + /// Other hash functions distinguish between \p result() and \p final(), with + /// \p result() allowing more calls into \p update(), but there's no + // difference for the BLAKE3 hash function. + BLAKE3Result result() { return BLAKE3::result(); } +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Support/Base64.h b/llvm/include/llvm/Support/Base64.h index 62064a35aa34..da4ae1688574 100644 --- a/llvm/include/llvm/Support/Base64.h +++ b/llvm/include/llvm/Support/Base64.h @@ -13,6 +13,7 @@ #ifndef LLVM_SUPPORT_BASE64_H #define LLVM_SUPPORT_BASE64_H +#include #include namespace llvm { diff --git a/llvm/include/llvm/Support/BinaryStreamArray.h b/llvm/include/llvm/Support/BinaryStreamArray.h index c3e0db4dcff0..ef2233c53ec2 100644 --- a/llvm/include/llvm/Support/BinaryStreamArray.h +++ b/llvm/include/llvm/Support/BinaryStreamArray.h @@ -111,6 +111,8 @@ public: bool valid() const { return Stream.valid(); } + bool isOffsetValid(uint32_t Offset) const { return at(Offset) != end(); } + uint32_t skew() const { return Skew; } Iterator end() const { return Iterator(E); } diff --git a/llvm/include/llvm/Support/BinaryStreamRef.h b/llvm/include/llvm/Support/BinaryStreamRef.h index bc8c6a496ecf..46fc9fb293df 100644 --- a/llvm/include/llvm/Support/BinaryStreamRef.h +++ b/llvm/include/llvm/Support/BinaryStreamRef.h @@ -48,7 +48,7 @@ public: } uint64_t getLength() const { - if (Length.hasValue()) + if (Length) return *Length; return BorrowedImpl ? (BorrowedImpl->getLength() - ViewOffset) : 0; @@ -67,7 +67,7 @@ public: return Result; Result.ViewOffset += N; - if (Result.Length.hasValue()) + if (Result.Length) *Result.Length -= N; return Result; } @@ -87,7 +87,7 @@ public: // Since we're dropping non-zero bytes from the end, stop length-tracking // by setting the length of the resulting StreamRef to an explicit value. - if (!Result.Length.hasValue()) + if (!Result.Length) Result.Length = getLength(); *Result.Length -= N; diff --git a/llvm/include/llvm/Support/BranchProbability.h b/llvm/include/llvm/Support/BranchProbability.h index 6f071c15421f..79d70cf611d4 100644 --- a/llvm/include/llvm/Support/BranchProbability.h +++ b/llvm/include/llvm/Support/BranchProbability.h @@ -16,6 +16,7 @@ #include "llvm/Support/DataTypes.h" #include #include +#include #include namespace llvm { diff --git a/llvm/include/llvm/Support/CSKYAttributeParser.h b/llvm/include/llvm/Support/CSKYAttributeParser.h new file mode 100644 index 000000000000..e926ebe5e306 --- /dev/null +++ b/llvm/include/llvm/Support/CSKYAttributeParser.h @@ -0,0 +1,43 @@ +//===---- CSKYAttributeParser.h - CSKY Attribute Parser ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_CSKYATTRIBUTEPARSER_H +#define LLVM_SUPPORT_CSKYATTRIBUTEPARSER_H + +#include "llvm/Support/CSKYAttributes.h" +#include "llvm/Support/ELFAttributeParser.h" + +namespace llvm { +class CSKYAttributeParser : public ELFAttributeParser { + struct DisplayHandler { + CSKYAttrs::AttrType attribute; + Error (CSKYAttributeParser::*routine)(unsigned); + }; + static const DisplayHandler displayRoutines[]; + + Error dspVersion(unsigned tag); + Error vdspVersion(unsigned tag); + Error fpuVersion(unsigned tag); + Error fpuABI(unsigned tag); + Error fpuRounding(unsigned tag); + Error fpuDenormal(unsigned tag); + Error fpuException(unsigned tag); + Error fpuHardFP(unsigned tag); + + Error handler(uint64_t tag, bool &handled) override; + +public: + CSKYAttributeParser(ScopedPrinter *sw) + : ELFAttributeParser(sw, CSKYAttrs::getCSKYAttributeTags(), "csky") {} + CSKYAttributeParser() + : ELFAttributeParser(CSKYAttrs::getCSKYAttributeTags(), "csky") {} +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Support/CSKYAttributes.h b/llvm/include/llvm/Support/CSKYAttributes.h new file mode 100644 index 000000000000..723f2ceee8fb --- /dev/null +++ b/llvm/include/llvm/Support/CSKYAttributes.h @@ -0,0 +1,95 @@ +//===---- CSKYAttributes.h - CSKY Attributes --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains enumerations for CSKY attributes. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_SUPPORT_CSKYATTRIBUTES_H +#define LLVM_SUPPORT_CSKYATTRIBUTES_H + +#include "llvm/Support/ELFAttributes.h" + +namespace llvm { +namespace CSKYAttrs { + +const TagNameMap &getCSKYAttributeTags(); + +enum AttrType { + CSKY_ARCH_NAME = 4, + CSKY_CPU_NAME = 5, + CSKY_ISA_FLAGS = 6, + CSKY_ISA_EXT_FLAGS = 7, + CSKY_DSP_VERSION = 8, + CSKY_VDSP_VERSION = 9, + CSKY_FPU_VERSION = 16, + CSKY_FPU_ABI = 17, + CSKY_FPU_ROUNDING = 18, + CSKY_FPU_DENORMAL = 19, + CSKY_FPU_EXCEPTION = 20, + CSKY_FPU_NUMBER_MODULE = 21, + CSKY_FPU_HARDFP = 22 +}; + +enum ISA_FLAGS { + V2_ISA_E1 = 1 << 1, + V2_ISA_1E2 = 1 << 2, + V2_ISA_2E3 = 1 << 3, + V2_ISA_3E7 = 1 << 4, + V2_ISA_7E10 = 1 << 5, + V2_ISA_3E3R1 = 1 << 6, + V2_ISA_3E3R2 = 1 << 7, + V2_ISA_10E60 = 1 << 8, + V2_ISA_3E3R3 = 1 << 9, + ISA_TRUST = 1 << 11, + ISA_CACHE = 1 << 12, + ISA_NVIC = 1 << 13, + ISA_CP = 1 << 14, + ISA_MP = 1 << 15, + ISA_MP_1E2 = 1 << 16, + ISA_JAVA = 1 << 17, + ISA_MAC = 1 << 18, + ISA_MAC_DSP = 1 << 19, + ISA_DSP = 1 << 20, + ISA_DSP_1E2 = 1 << 21, + ISA_DSP_ENHANCE = 1 << 22, + ISA_DSP_SILAN = 1 << 23, + ISA_VDSP = 1 << 24, + ISA_VDSP_2 = 1 << 25, + ISA_VDSP_2E3 = 1 << 26, + V2_ISA_DSPE60 = 1 << 27, + ISA_VDSP_2E60F = 1 << 28 +}; + +enum ISA_EXT_FLAGS { + ISA_FLOAT_E1 = 1 << 0, + ISA_FLOAT_1E2 = 1 << 1, + ISA_FLOAT_1E3 = 1 << 2, + ISA_FLOAT_3E4 = 1 << 3, + ISA_FLOAT_7E60 = 1 << 4 +}; + +enum { NONE = 0, NEEDED = 1 }; + +enum DSP_VERSION { DSP_VERSION_EXTENSION = 1, DSP_VERSION_2 = 2 }; + +enum VDSP_VERSION { VDSP_VERSION_1 = 1, VDSP_VERSION_2 = 2 }; + +enum FPU_VERSION { FPU_VERSION_1 = 1, FPU_VERSION_2 = 2, FPU_VERSION_3 = 3 }; + +enum FPU_ABI { FPU_ABI_SOFT = 1, FPU_ABI_SOFTFP = 2, FPU_ABI_HARD = 3 }; + +enum FPU_HARDFP { + FPU_HARDFP_HALF = 1, + FPU_HARDFP_SINGLE = 2, + FPU_HARDFP_DOUBLE = 4 +}; + +} // namespace CSKYAttrs +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Support/CSKYTargetParser.def b/llvm/include/llvm/Support/CSKYTargetParser.def new file mode 100644 index 000000000000..c93d6fdf8cce --- /dev/null +++ b/llvm/include/llvm/Support/CSKYTargetParser.def @@ -0,0 +1,524 @@ +//===- CSKYTargetParser.def - CSKY target parsing defines -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides defines to build up the CSKY target parser's logic. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +#ifndef CSKY_FPU +#define CSKY_FPU(NAME, KIND, VERSION) +#endif +CSKY_FPU("invalid", FK_INVALID, FPUVersion::NONE) +CSKY_FPU("auto", FK_AUTO, FPUVersion::FPV2) +CSKY_FPU("fpv2", FK_FPV2, FPUVersion::FPV2) +CSKY_FPU("fpv2_divd", FK_FPV2_DIVD, FPUVersion::FPV2) +CSKY_FPU("fpv2_sf", FK_FPV2_SF, FPUVersion::FPV2) +CSKY_FPU("fpv3", FK_FPV3, FPUVersion::FPV3) +CSKY_FPU("fpv3_hf", FK_FPV3_HF, FPUVersion::FPV3) +CSKY_FPU("fpv3_hsf", FK_FPV3_HSF, FPUVersion::FPV3) +CSKY_FPU("fpv3_sdf", FK_FPV3_SDF, FPUVersion::FPV3) + +#undef CSKY_FPU + +#ifndef CSKY_ARCH +#define CSKY_ARCH(NAME, ID, ARCH_BASE_EXT) +#endif +CSKY_ARCH("invalid", INVALID, CSKY::AEK_INVALID) +CSKY_ARCH("ck801", CK801, CSKY::MAEK_E1 | CSKY::AEK_TRUST) +CSKY_ARCH("ck802", CK802, CSKY::MAEK_E2 | CSKY::AEK_TRUST | CSKY::AEK_NVIC) +CSKY_ARCH("ck803", CK803, + CSKY::MAEK_2E3 | CSKY::AEK_MP | CSKY::AEK_TRUST | CSKY::AEK_NVIC | + CSKY::AEK_HWDIV) +CSKY_ARCH("ck803s", CK803S, + CSKY::MAEK_2E3 | CSKY::AEK_MP | CSKY::AEK_TRUST | CSKY::AEK_NVIC | + CSKY::AEK_HWDIV) +CSKY_ARCH("ck804", CK804, + CSKY::MAEK_2E3 | CSKY::AEK_MP | CSKY::AEK_TRUST | CSKY::AEK_NVIC | + CSKY::AEK_HWDIV | CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3) +CSKY_ARCH("ck805", CK805, + CSKY::MAEK_2E3 | CSKY::AEK_MP | CSKY::AEK_TRUST | CSKY::AEK_NVIC | + CSKY::AEK_HWDIV | CSKY::AEK_HIGHREG | CSKY::MAEK_3E3R2 | + CSKY::AEK_3E3R3 | CSKY::AEK_VDSPV2 | CSKY::AEK_VDSP2E3) +CSKY_ARCH("ck807", CK807, + CSKY::MAEK_3E7 | CSKY::MAEK_MP | CSKY::MAEK_MP1E2 | CSKY::AEK_TRUST | + CSKY::AEK_HWDIV | CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | + CSKY::AEK_DSPE60 | CSKY::AEK_HIGHREG | CSKY::AEK_HARDTP | + CSKY::AEK_NVIC | CSKY::AEK_CACHE) +CSKY_ARCH("ck810", CK810, + CSKY::MAEK_7E10 | CSKY::MAEK_MP | CSKY::MAEK_MP1E2 | CSKY::AEK_TRUST | + CSKY::AEK_HWDIV | CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | + CSKY::AEK_DSPE60 | CSKY::AEK_HIGHREG | CSKY::AEK_HARDTP | + CSKY::AEK_NVIC | CSKY::AEK_CACHE) +CSKY_ARCH("ck810v", CK810V, + CSKY::MAEK_7E10 | CSKY::MAEK_MP | CSKY::MAEK_MP1E2 | CSKY::AEK_TRUST | + CSKY::AEK_HWDIV | CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | + CSKY::AEK_DSPE60 | CSKY::AEK_HIGHREG | CSKY::AEK_HARDTP | + CSKY::AEK_NVIC | CSKY::AEK_CACHE | CSKY::AEK_VDSPV1) +CSKY_ARCH("ck860", CK860, + CSKY::MAEK_10E60 | CSKY::MAEK_MP | CSKY::MAEK_MP1E2 | + CSKY::AEK_TRUST | CSKY::AEK_HWDIV | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG | CSKY::AEK_HARDTP | CSKY::AEK_NVIC | + CSKY::AEK_CACHE | CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3) +CSKY_ARCH("ck860v", CK860V, + CSKY::MAEK_10E60 | CSKY::MAEK_MP | CSKY::MAEK_MP1E2 | + CSKY::AEK_TRUST | CSKY::AEK_HWDIV | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG | CSKY::AEK_HARDTP | CSKY::AEK_NVIC | + CSKY::AEK_CACHE | CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | + CSKY::AEK_VDSPV2 | CSKY::AEK_VDSP2E60F) +#undef CSKY_ARCH + +#ifndef CSKY_ARCH_EXT_NAME +#define CSKY_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) +#endif +CSKY_ARCH_EXT_NAME("invalid", CSKY::AEK_INVALID, nullptr, nullptr) +CSKY_ARCH_EXT_NAME("none", CSKY::AEK_NONE, nullptr, nullptr) +CSKY_ARCH_EXT_NAME("fpuv2_sf", CSKY::AEK_FPUV2SF, "+fpuv2_sf", "-fpuv2_sf") +CSKY_ARCH_EXT_NAME("fpuv2_df", CSKY::AEK_FPUV2DF, "+fpuv2_df", "-fpuv2_df") +CSKY_ARCH_EXT_NAME("fdivdu", CSKY::AEK_FDIVDU, "+fdivdu", "-fdivdu") +CSKY_ARCH_EXT_NAME("fpuv3_hi", CSKY::AEK_FPUV3HI, "+fpuv3_hi", "-fpuv3_hi") +CSKY_ARCH_EXT_NAME("fpuv3_hf", CSKY::AEK_FPUV3HF, "+fpuv3_hf", "-fpuv3_hf") +CSKY_ARCH_EXT_NAME("fpuv3_sf", CSKY::AEK_FPUV3SF, "+fpuv3_sf", "-fpuv3_sf") +CSKY_ARCH_EXT_NAME("fpuv3_df", CSKY::AEK_FPUV3DF, "+fpuv3_df", "-fpuv3_df") +CSKY_ARCH_EXT_NAME("floate1", CSKY::AEK_FLOATE1, "+floate1", "-floate1") +CSKY_ARCH_EXT_NAME("float1e2", CSKY::AEK_FLOAT1E2, "+float1e2", "-float1e2") +CSKY_ARCH_EXT_NAME("float1e3", CSKY::AEK_FLOAT1E3, "+float1e3", "-float1e3") +CSKY_ARCH_EXT_NAME("float3e4", CSKY::AEK_FLOAT3E4, "+float3e4", "-float3e4") +CSKY_ARCH_EXT_NAME("float7e60", CSKY::AEK_FLOAT7E60, "+float7e60", "-float7e60") +CSKY_ARCH_EXT_NAME("hwdiv", CSKY::AEK_HWDIV, "+hwdiv", "-hwdiv") +CSKY_ARCH_EXT_NAME("multiple_stld", CSKY::AEK_STLD, "+multiple_stld", + "-multiple_stld") +CSKY_ARCH_EXT_NAME("pushpop", CSKY::AEK_PUSHPOP, "+pushpop", "-pushpop") +CSKY_ARCH_EXT_NAME("edsp", CSKY::AEK_EDSP, "+edsp", "-edsp") +CSKY_ARCH_EXT_NAME("dsp1e2", CSKY::AEK_DSP1E2, "+dsp1e2", "-dsp1e2") +CSKY_ARCH_EXT_NAME("dspe60", CSKY::AEK_DSPE60, "+dspe60", "-dspe60") +CSKY_ARCH_EXT_NAME("dspv2", CSKY::AEK_DSPV2, "+dspv2", "-dspv2") +CSKY_ARCH_EXT_NAME("dsp_silan", CSKY::AEK_DSPSILAN, "+dsp_silan", "-dsp_silan") +CSKY_ARCH_EXT_NAME("elrw", CSKY::AEK_ELRW, "+elrw", "-elrw") +CSKY_ARCH_EXT_NAME("trust", CSKY::AEK_TRUST, "+trust", "-trust") +CSKY_ARCH_EXT_NAME("java", CSKY::AEK_JAVA, "+java", "-java") +CSKY_ARCH_EXT_NAME("cache", CSKY::AEK_CACHE, "+cache", "-cache") +CSKY_ARCH_EXT_NAME("nvic", CSKY::AEK_NVIC, "+nvic", "-nvic") +CSKY_ARCH_EXT_NAME("doloop", CSKY::AEK_DOLOOP, "+doloop", "-doloop") +CSKY_ARCH_EXT_NAME("high-registers", CSKY::AEK_HIGHREG, "+high-registers", + "-high-registers") +CSKY_ARCH_EXT_NAME("smart", CSKY::AEK_SMART, "+smart", "-smart") +CSKY_ARCH_EXT_NAME("vdsp2e3", CSKY::AEK_VDSP2E3, "+vdsp2e3", "-vdsp2e3") +CSKY_ARCH_EXT_NAME("vdsp2e60f", CSKY::AEK_VDSP2E60F, "+vdsp2e60f", "-vdsp2e60f") +CSKY_ARCH_EXT_NAME("vdspv2", CSKY::AEK_VDSPV2, "+vdspv2", "-vdspv2") +CSKY_ARCH_EXT_NAME("hard-tp", CSKY::AEK_HARDTP, "+hard-tp", "-hard-tp") +CSKY_ARCH_EXT_NAME("soft-tp", CSKY::AEK_SOFTTP, "+soft-tp", "-soft-tp") +CSKY_ARCH_EXT_NAME("istack", CSKY::AEK_ISTACK, "+istack", "-istack") +CSKY_ARCH_EXT_NAME("constpool", CSKY::AEK_CONSTPOOL, "+constpool", "-constpool") +CSKY_ARCH_EXT_NAME("stack-size", CSKY::AEK_STACKSIZE, "+stack-size", + "-stack-size") +CSKY_ARCH_EXT_NAME("ccrt", CSKY::AEK_CCRT, "+ccrt", "-ccrt") +CSKY_ARCH_EXT_NAME("vdspv1", CSKY::AEK_VDSPV1, "+vdspv1", "-vdspv1") + +CSKY_ARCH_EXT_NAME("e1", CSKY::AEK_E1, "+e1", "-e1") +CSKY_ARCH_EXT_NAME("e2", CSKY::AEK_E2, "+e2", "-e2") +CSKY_ARCH_EXT_NAME("2e3", CSKY::AEK_2E3, "+2e3", "-2e3") +CSKY_ARCH_EXT_NAME("mp", CSKY::AEK_MP, "+mp", "-mp") +CSKY_ARCH_EXT_NAME("3e3r1", CSKY::AEK_3E3R1, "+3e3r1", "-3e3r1") +CSKY_ARCH_EXT_NAME("3e3r2", CSKY::AEK_3E3R2, "+3e3r2", "-3e3r2") +CSKY_ARCH_EXT_NAME("3e3r3", CSKY::AEK_3E3R3, "+3e3r3", "-3e3r3") +CSKY_ARCH_EXT_NAME("3e7", CSKY::AEK_3E7, "+3e7", "-3e7") +CSKY_ARCH_EXT_NAME("mp1e2", CSKY::AEK_MP1E2, "+mp1e2", "-mp1e2") +CSKY_ARCH_EXT_NAME("7e10", CSKY::AEK_7E10, "+7e10", "-7e10") +CSKY_ARCH_EXT_NAME("10e60", CSKY::AEK_10E60, "+10e60", "-10e60") + +#undef CSKY_ARCH_EXT_NAME + +#ifndef CSKY_CPU_NAME +#define CSKY_CPU_NAME(NAME, ARCH_ID, DEFAULT_EXT) +#endif + +CSKY_CPU_NAME("ck801", CK801, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck801t", CK801, CSKY::AEK_NONE) +CSKY_CPU_NAME("e801", CK801, CSKY::AEK_NONE) + +CSKY_CPU_NAME("ck802", CK802, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck802t", CK802, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck802j", CK802, CSKY::AEK_JAVA) +CSKY_CPU_NAME("e802", CK802, CSKY::AEK_NONE) +CSKY_CPU_NAME("e802t", CK802, CSKY::AEK_NONE) +CSKY_CPU_NAME("s802", CK802, CSKY::AEK_NONE) +CSKY_CPU_NAME("s802t", CK802, CSKY::AEK_NONE) + +CSKY_CPU_NAME("ck803", CK803, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck803h", CK803, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck803t", CK803, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck803ht", CK803, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck803f", CK803, + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803fh", CK803, + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803e", CK803, + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60) +CSKY_CPU_NAME("ck803eh", CK803, + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60) +CSKY_CPU_NAME("ck803et", CK803, + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60) +CSKY_CPU_NAME("ck803eht", CK803, + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60) +CSKY_CPU_NAME("ck803ef", CK803, + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803efh", CK803, + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803ft", CK803, + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803eft", CK803, + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803efht", CK803, + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803r1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2) +CSKY_CPU_NAME("ck803r2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2) +CSKY_CPU_NAME("ck803r3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2) +CSKY_CPU_NAME("ck803hr1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2) +CSKY_CPU_NAME("ck803hr2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2) +CSKY_CPU_NAME("ck803hr3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2) +CSKY_CPU_NAME("ck803tr1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2) +CSKY_CPU_NAME("ck803tr2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2) +CSKY_CPU_NAME("ck803tr3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2) +CSKY_CPU_NAME("ck803htr1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2) +CSKY_CPU_NAME("ck803htr2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2) +CSKY_CPU_NAME("ck803htr3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2) +CSKY_CPU_NAME("ck803fr1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803fr2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803fr3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803fhr1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803fhr2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803fhr3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803er1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803er2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803er3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803ehr1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803ehr2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803ehr3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803etr1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803etr2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803etr3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803ehtr1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803ehtr2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803ehtr3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803efr1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803efr2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803efr3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803efhr1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803efhr2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803efhr3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803ftr1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803ftr2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803ftr3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803eftr1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803eftr2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803eftr3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803efhtr1", CK803, + CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803efhtr2", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck803efhtr3", CK803, + CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("s803", CK803, CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3) +CSKY_CPU_NAME("s803t", CK803, CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3) +CSKY_CPU_NAME("e803", CK803, CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3) +CSKY_CPU_NAME("e803t", CK803, CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3) + +CSKY_CPU_NAME("ck803s", CK803S, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck803st", CK803S, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck803se", CK803S, + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60) +CSKY_CPU_NAME("ck803sf", CK803S, + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803sef", CK803S, + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck803seft", CK803S, + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) + +CSKY_CPU_NAME("ck804", CK804, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck804h", CK804, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck804t", CK804, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck804ht", CK804, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck804f", CK804, + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck804fh", CK804, + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck804e", CK804, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck804eh", CK804, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck804et", CK804, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck804eht", CK804, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck804ef", CK804, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck804efh", CK804, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck804ft", CK804, + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck804eft", CK804, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("ck804efht", CK804, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("e804d", CK804, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("e804dt", CK804, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("e804f", CK804, + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("e804ft", CK804, + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("e804df", CK804, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_HIGHREG) +CSKY_CPU_NAME("e804dft", CK804, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_HIGHREG) + +CSKY_CPU_NAME("ck805", CK805, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck805e", CK805, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3) +CSKY_CPU_NAME("ck805f", CK805, + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck805t", CK805, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck805ef", CK805, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck805et", CK805, + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3) +CSKY_CPU_NAME("ck805ft", CK805, + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) +CSKY_CPU_NAME("ck805eft", CK805, + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3) +CSKY_CPU_NAME("i805", CK805, CSKY::AEK_NONE) +CSKY_CPU_NAME("i805f", CK805, + CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3) + +CSKY_CPU_NAME("ck807", CK807, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck807e", CK807, + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60) +CSKY_CPU_NAME("ck807f", CK807, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_FLOAT3E4) +CSKY_CPU_NAME("ck807ef", CK807, + CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 | + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_FLOAT3E4) +CSKY_CPU_NAME("c807", CK807, CSKY::AEK_NONE) +CSKY_CPU_NAME("c807f", CK807, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_FLOAT3E4) +CSKY_CPU_NAME("r807", CK807, CSKY::AEK_NONE) +CSKY_CPU_NAME("r807f", CK807, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2 | CSKY::AEK_FLOAT1E3 | + CSKY::AEK_FLOAT3E4) + +CSKY_CPU_NAME("ck810e", CK810, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck810et", CK810, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck810ef", CK810, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2) +CSKY_CPU_NAME("ck810eft", CK810, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2) +CSKY_CPU_NAME("ck810", CK810, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck810f", CK810, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2) +CSKY_CPU_NAME("ck810t", CK810, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck810ft", CK810, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2) +CSKY_CPU_NAME("c810", CK810, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2) +CSKY_CPU_NAME("c810t", CK810, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2) + +CSKY_CPU_NAME("ck810v", CK810V, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck810ev", CK810V, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck810tv", CK810V, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck810etv", CK810V, CSKY::AEK_NONE) +CSKY_CPU_NAME("c810v", CK810V, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2) +CSKY_CPU_NAME("ck810fv", CK810V, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2) +CSKY_CPU_NAME("ck810efv", CK810V, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2) +CSKY_CPU_NAME("ck810ftv", CK810V, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2) +CSKY_CPU_NAME("c810tv", CK810V, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2) +CSKY_CPU_NAME("c810eftv", CK810V, + CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU | + CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2) + +CSKY_CPU_NAME("ck860", CK860, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck860f", CK860, + CSKY::AEK_FPUV3HI | CSKY::AEK_FPUV3HF | CSKY::AEK_FPUV3SF | + CSKY::AEK_FPUV3DF | CSKY::AEK_FLOAT7E60) +CSKY_CPU_NAME("c860", CK860, + CSKY::AEK_FPUV3HI | CSKY::AEK_FPUV3HF | CSKY::AEK_FPUV3SF | + CSKY::AEK_FPUV3DF | CSKY::AEK_FLOAT7E60) + +CSKY_CPU_NAME("ck860v", CK860V, CSKY::AEK_NONE) +CSKY_CPU_NAME("ck860fv", CK860V, + CSKY::AEK_FPUV3HI | CSKY::AEK_FPUV3HF | CSKY::AEK_FPUV3SF | + CSKY::AEK_FPUV3DF | CSKY::AEK_FLOAT7E60) +CSKY_CPU_NAME("c860v", CK860V, + CSKY::AEK_FPUV3HI | CSKY::AEK_FPUV3HF | CSKY::AEK_FPUV3SF | + CSKY::AEK_FPUV3DF | CSKY::AEK_FLOAT7E60) +// Invalid CPU +CSKY_CPU_NAME("invalid", INVALID, CSKY::AEK_INVALID) +#undef CSKY_CPU_NAME diff --git a/llvm/include/llvm/Support/CSKYTargetParser.h b/llvm/include/llvm/Support/CSKYTargetParser.h new file mode 100644 index 000000000000..ca33a7ee406c --- /dev/null +++ b/llvm/include/llvm/Support/CSKYTargetParser.h @@ -0,0 +1,203 @@ +//===-- CSKYTargetParser - Parser for CSKY target features --------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise CSKY hardware features +// such as FPU/CPU/ARCH/extensions and specific support such as HWDIV. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_CSKYTARGETPARSER_H +#define LLVM_SUPPORT_CSKYTARGETPARSER_H + +#include "llvm/ADT/Triple.h" +#include + +namespace llvm { +class StringRef; + +namespace CSKY { + +// Arch extension modifiers for CPUs. +enum ArchExtKind : uint64_t { + AEK_INVALID = 0, + AEK_NONE = 1, + AEK_FPUV2SF = 1 << 1, + AEK_FPUV2DF = 1 << 2, + AEK_FDIVDU = 1 << 3, + AEK_FPUV3HI = 1 << 4, + AEK_FPUV3HF = 1 << 5, + AEK_FPUV3SF = 1 << 6, + AEK_FPUV3DF = 1 << 7, + AEK_FLOATE1 = 1 << 8, + AEK_FLOAT1E2 = 1 << 9, + AEK_FLOAT1E3 = 1 << 10, + AEK_FLOAT3E4 = 1 << 11, + AEK_FLOAT7E60 = 1 << 12, + AEK_HWDIV = 1 << 13, + AEK_STLD = 1 << 14, + AEK_PUSHPOP = 1 << 15, + AEK_EDSP = 1 << 16, + AEK_DSP1E2 = 1 << 17, + AEK_DSPE60 = 1 << 18, + AEK_DSPV2 = 1 << 19, + AEK_DSPSILAN = 1 << 20, + AEK_ELRW = 1 << 21, + AEK_TRUST = 1 << 22, + AEK_JAVA = 1 << 23, + AEK_CACHE = 1 << 24, + AEK_NVIC = 1 << 25, + AEK_DOLOOP = 1 << 26, + AEK_HIGHREG = 1 << 27, + AEK_SMART = 1 << 28, + AEK_VDSP2E3 = 1 << 29, + AEK_VDSP2E60F = 1 << 30, + AEK_VDSPV2 = 1ULL << 31, + AEK_HARDTP = 1ULL << 32, + AEK_SOFTTP = 1ULL << 33, + AEK_ISTACK = 1ULL << 34, + AEK_CONSTPOOL = 1ULL << 35, + AEK_STACKSIZE = 1ULL << 36, + AEK_CCRT = 1ULL << 37, + AEK_VDSPV1 = 1ULL << 38, + AEK_E1 = 1ULL << 39, + AEK_E2 = 1ULL << 40, + AEK_2E3 = 1ULL << 41, + AEK_MP = 1ULL << 42, + AEK_3E3R1 = 1ULL << 43, + AEK_3E3R2 = 1ULL << 44, + AEK_3E3R3 = 1ULL << 45, + AEK_3E7 = 1ULL << 46, + AEK_MP1E2 = 1ULL << 47, + AEK_7E10 = 1ULL << 48, + AEK_10E60 = 1ULL << 49 + +}; + +// Arch extension modifiers for CPUs. +enum MultiArchExtKind : uint64_t { + MAEK_E1 = CSKY::AEK_E1 | CSKY::AEK_ELRW, + MAEK_E2 = CSKY::AEK_E2 | CSKY::MAEK_E1, + MAEK_2E3 = CSKY::AEK_2E3 | CSKY::MAEK_E2, + MAEK_MP = CSKY::AEK_MP | CSKY::MAEK_2E3, + MAEK_3E3R1 = CSKY::AEK_3E3R1, + MAEK_3E3R2 = CSKY::AEK_3E3R1 | CSKY::AEK_3E3R2 | CSKY::AEK_DOLOOP, + MAEK_3E7 = CSKY::AEK_3E7 | CSKY::MAEK_2E3, + MAEK_MP1E2 = CSKY::AEK_MP1E2 | CSKY::MAEK_3E7, + MAEK_7E10 = CSKY::AEK_7E10 | CSKY::MAEK_3E7, + MAEK_10E60 = CSKY::AEK_10E60 | CSKY::MAEK_7E10, +}; +// FPU names. +enum CSKYFPUKind { +#define CSKY_FPU(NAME, KIND, VERSION) KIND, +#include "CSKYTargetParser.def" + FK_LAST +}; + +// FPU Version +enum class FPUVersion { + NONE, + FPV2, + FPV3, +}; + +// Arch names. +enum class ArchKind { +#define CSKY_ARCH(NAME, ID, ARCH_BASE_EXT) ID, +#include "CSKYTargetParser.def" +}; + +// List of Arch Extension names. +// FIXME: TableGen this. +struct ExtName { + const char *NameCStr; + size_t NameLength; + uint64_t ID; + const char *Feature; + const char *NegFeature; + + StringRef getName() const { return StringRef(NameCStr, NameLength); } +}; + +const CSKY::ExtName CSKYARCHExtNames[] = { +#define CSKY_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \ + {NAME, sizeof(NAME) - 1, ID, FEATURE, NEGFEATURE}, +#include "CSKYTargetParser.def" +}; + +// List of CPU names and their arches. +template struct CpuNames { + const char *NameCStr; + size_t NameLength; + T ArchID; + uint64_t defaultExt; + + StringRef getName() const { return StringRef(NameCStr, NameLength); } +}; +const CpuNames CPUNames[] = { +#define CSKY_CPU_NAME(NAME, ARCH_ID, DEFAULT_EXT) \ + {NAME, sizeof(NAME) - 1, CSKY::ArchKind::ARCH_ID, DEFAULT_EXT}, +#include "llvm/Support/CSKYTargetParser.def" +}; + +// FIXME: TableGen this. +// The entries must appear in the order listed in CSKY::CSKYFPUKind for correct +// indexing +struct FPUName { + const char *NameCStr; + size_t NameLength; + CSKYFPUKind ID; + FPUVersion FPUVer; + + StringRef getName() const { return StringRef(NameCStr, NameLength); } +}; + +static const FPUName FPUNames[] = { +#define CSKY_FPU(NAME, KIND, VERSION) {NAME, sizeof(NAME) - 1, KIND, VERSION}, +#include "llvm/Support/CSKYTargetParser.def" +}; + +// List of canonical arch names. +template struct ArchNames { + const char *NameCStr; + size_t NameLength; + T ID; + uint64_t archBaseExt; + StringRef getName() const { return StringRef(NameCStr, NameLength); } +}; +const ArchNames ARCHNames[] = { +#define CSKY_ARCH(NAME, ID, ARCH_BASE_EXT) \ + {NAME, sizeof(NAME) - 1, CSKY::ArchKind::ID, ARCH_BASE_EXT}, +#include "llvm/Support/CSKYTargetParser.def" +}; + +StringRef getArchName(ArchKind AK); +StringRef getDefaultCPU(StringRef Arch); +StringRef getArchExtName(uint64_t ArchExtKind); +StringRef getArchExtFeature(StringRef ArchExt); +uint64_t getDefaultExtensions(StringRef CPU); +bool getExtensionFeatures(uint64_t Extensions, + std::vector &Features); + +// Information by ID +StringRef getFPUName(unsigned FPUKind); +FPUVersion getFPUVersion(unsigned FPUKind); + +bool getFPUFeatures(CSKYFPUKind Kind, std::vector &Features); + +// Parser +ArchKind parseArch(StringRef Arch); +ArchKind parseCPUArch(StringRef CPU); +uint64_t parseArchExt(StringRef ArchExt); +void fillValidCPUArchList(SmallVectorImpl &Values); + +} // namespace CSKY + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Support/Casting.h b/llvm/include/llvm/Support/Casting.h index d6f7793d5df0..894c1f439b64 100644 --- a/llvm/include/llvm/Support/Casting.h +++ b/llvm/include/llvm/Support/Casting.h @@ -6,14 +6,15 @@ // //===----------------------------------------------------------------------===// // -// This file defines the isa(), cast(), dyn_cast(), cast_or_null(), -// and dyn_cast_or_null() templates. +// This file defines the isa(), cast(), dyn_cast(), +// cast_if_present(), and dyn_cast_if_present() templates. // //===----------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_CASTING_H #define LLVM_SUPPORT_CASTING_H +#include "llvm/ADT/Optional.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/type_traits.h" #include @@ -23,43 +24,47 @@ namespace llvm { //===----------------------------------------------------------------------===// -// isa Support Templates +// simplify_type //===----------------------------------------------------------------------===// -// Define a template that can be specialized by smart pointers to reflect the -// fact that they are automatically dereferenced, and are not involved with the -// template selection process... the default implementation is a noop. -// -template struct simplify_type { +/// Define a template that can be specialized by smart pointers to reflect the +/// fact that they are automatically dereferenced, and are not involved with the +/// template selection process... the default implementation is a noop. +// TODO: rename this and/or replace it with other cast traits. +template struct simplify_type { using SimpleType = From; // The real type this represents... // An accessor to get the real value... static SimpleType &getSimplifiedValue(From &Val) { return Val; } }; -template struct simplify_type { +template struct simplify_type { using NonConstSimpleType = typename simplify_type::SimpleType; - using SimpleType = - typename add_const_past_pointer::type; + using SimpleType = typename add_const_past_pointer::type; using RetType = typename add_lvalue_reference_if_not_pointer::type; - static RetType getSimplifiedValue(const From& Val) { - return simplify_type::getSimplifiedValue(const_cast(Val)); + static RetType getSimplifiedValue(const From &Val) { + return simplify_type::getSimplifiedValue(const_cast(Val)); } }; +// TODO: add this namespace once everyone is switched to using the new +// interface. +// namespace detail { + +//===----------------------------------------------------------------------===// +// isa_impl +//===----------------------------------------------------------------------===// + // The core of the implementation of isa is here; To and From should be // the names of classes. This template can be specialized to customize the // implementation of isa<> without rewriting it from scratch. -template -struct isa_impl { - static inline bool doit(const From &Val) { - return To::classof(&Val); - } +template struct isa_impl { + static inline bool doit(const From &Val) { return To::classof(&Val); } }; -/// Always allow upcasts, and perform no dynamic check for them. +// Always allow upcasts, and perform no dynamic check for them. template struct isa_impl::value>> { static inline bool doit(const From &) { return true; } @@ -85,103 +90,78 @@ struct isa_impl_cl> { } }; -template struct isa_impl_cl { +template struct isa_impl_cl { static inline bool doit(const From *Val) { assert(Val && "isa<> used on a null pointer"); return isa_impl::doit(*Val); } }; -template struct isa_impl_cl { +template struct isa_impl_cl { static inline bool doit(const From *Val) { assert(Val && "isa<> used on a null pointer"); return isa_impl::doit(*Val); } }; -template struct isa_impl_cl { +template struct isa_impl_cl { static inline bool doit(const From *Val) { assert(Val && "isa<> used on a null pointer"); return isa_impl::doit(*Val); } }; -template struct isa_impl_cl { +template +struct isa_impl_cl { static inline bool doit(const From *Val) { assert(Val && "isa<> used on a null pointer"); return isa_impl::doit(*Val); } }; -template +template struct isa_impl_wrap { // When From != SimplifiedType, we can simplify the type some more by using // the simplify_type template. static bool doit(const From &Val) { return isa_impl_wrap::SimpleType>::doit( - simplify_type::getSimplifiedValue(Val)); + typename simplify_type::SimpleType>:: + doit(simplify_type::getSimplifiedValue(Val)); } }; -template +template struct isa_impl_wrap { // When From == SimpleType, we are as simple as we are going to get. static bool doit(const FromTy &Val) { - return isa_impl_cl::doit(Val); + return isa_impl_cl::doit(Val); } }; -// isa - Return true if the parameter to the template is an instance of one -// of the template type arguments. Used like this: -// -// if (isa(myVal)) { ... } -// if (isa(myVal)) { ... } -// -template LLVM_NODISCARD inline bool isa(const Y &Val) { - return isa_impl_wrap::SimpleType>::doit(Val); -} - -template -LLVM_NODISCARD inline bool isa(const Y &Val) { - return isa(Val) || isa(Val); -} - -// isa_and_nonnull - Functionally identical to isa, except that a null value -// is accepted. -// -template -LLVM_NODISCARD inline bool isa_and_nonnull(const Y &Val) { - if (!Val) - return false; - return isa(Val); -} - //===----------------------------------------------------------------------===// -// cast Support Templates +// cast_retty + cast_retty_impl //===----------------------------------------------------------------------===// -template struct cast_retty; +template struct cast_retty; // Calculate what type the 'cast' function should return, based on a requested // type of To and a source type of From. -template struct cast_retty_impl { - using ret_type = To &; // Normal case, return Ty& +template struct cast_retty_impl { + using ret_type = To &; // Normal case, return Ty& }; -template struct cast_retty_impl { +template struct cast_retty_impl { using ret_type = const To &; // Normal case, return Ty& }; -template struct cast_retty_impl { - using ret_type = To *; // Pointer arg case, return Ty* +template struct cast_retty_impl { + using ret_type = To *; // Pointer arg case, return Ty* }; -template struct cast_retty_impl { +template struct cast_retty_impl { using ret_type = const To *; // Constant pointer arg case, return const Ty* }; -template struct cast_retty_impl { +template struct cast_retty_impl { using ret_type = const To *; // Constant pointer arg case, return const Ty* }; @@ -195,187 +175,604 @@ public: using ret_type = std::unique_ptr; }; -template -struct cast_retty_wrap { +template struct cast_retty_wrap { // When the simplified type and the from type are not the same, use the type // simplifier to reduce the type, then reuse cast_retty_impl to get the // resultant type. using ret_type = typename cast_retty::ret_type; }; -template -struct cast_retty_wrap { +template struct cast_retty_wrap { // When the simplified type is equal to the from type, use it directly. - using ret_type = typename cast_retty_impl::ret_type; + using ret_type = typename cast_retty_impl::ret_type; }; -template -struct cast_retty { +template struct cast_retty { using ret_type = typename cast_retty_wrap< To, From, typename simplify_type::SimpleType>::ret_type; }; +//===----------------------------------------------------------------------===// +// cast_convert_val +//===----------------------------------------------------------------------===// + // Ensure the non-simple values are converted using the simplify_type template // that may be specialized by smart pointers... // -template struct cast_convert_val { +template struct cast_convert_val { // This is not a simple type, use the template to simplify it... - static typename cast_retty::ret_type doit(From &Val) { + static typename cast_retty::ret_type doit(const From &Val) { return cast_convert_val::SimpleType>::doit( - simplify_type::getSimplifiedValue(Val)); + typename simplify_type::SimpleType>:: + doit(simplify_type::getSimplifiedValue(const_cast(Val))); } }; -template struct cast_convert_val { - // This _is_ a simple type, just cast it. +template struct cast_convert_val { + // If it's a reference, switch to a pointer to do the cast and then deref it. static typename cast_retty::ret_type doit(const FromTy &Val) { - typename cast_retty::ret_type Res2 - = (typename cast_retty::ret_type)const_cast(Val); - return Res2; + return *(std::remove_reference_t::ret_type> + *)&const_cast(Val); + } +}; + +template +struct cast_convert_val { + // If it's a pointer, we can use c-style casting directly. + static typename cast_retty::ret_type doit(const FromTy *Val) { + return (typename cast_retty::ret_type) const_cast( + Val); } }; +//===----------------------------------------------------------------------===// +// is_simple_type +//===----------------------------------------------------------------------===// + template struct is_simple_type { static const bool value = std::is_same::SimpleType>::value; }; -// cast - Return the argument parameter cast to the specified type. This -// casting operator asserts that the type is correct, so it does not return null -// on failure. It does not allow a null argument (use cast_or_null for that). -// It is typically used like this: -// -// cast(myVal)->getParent() -// -template -inline std::enable_if_t::value, - typename cast_retty::ret_type> -cast(const Y &Val) { - assert(isa(Val) && "cast() argument of incompatible type!"); - return cast_convert_val< - X, const Y, typename simplify_type::SimpleType>::doit(Val); +// } // namespace detail + +//===----------------------------------------------------------------------===// +// CastIsPossible +//===----------------------------------------------------------------------===// + +/// This struct provides a way to check if a given cast is possible. It provides +/// a static function called isPossible that is used to check if a cast can be +/// performed. It should be overridden like this: +/// +/// template<> struct CastIsPossible { +/// static inline bool isPossible(const bar &b) { +/// return bar.isFoo(); +/// } +/// }; +template +struct CastIsPossible { + static inline bool isPossible(const From &f) { + return isa_impl_wrap< + To, const From, + typename simplify_type::SimpleType>::doit(f); + } +}; + +// Needed for optional unwrapping. This could be implemented with isa_impl, but +// we want to implement things in the new method and move old implementations +// over. In fact, some of the isa_impl templates should be moved over to +// CastIsPossible. +template +struct CastIsPossible> { + static inline bool isPossible(const Optional &f) { + assert(f.hasValue() && "CastIsPossible::isPossible called on a nullopt!"); + return isa_impl_wrap< + To, const From, + typename simplify_type::SimpleType>::doit(*f); + } +}; + +/// Upcasting (from derived to base) and casting from a type to itself should +/// always be possible. +template +struct CastIsPossible::value>> { + static inline bool isPossible(const From &f) { return true; } +}; + +//===----------------------------------------------------------------------===// +// Cast traits +//===----------------------------------------------------------------------===// + +/// All of these cast traits are meant to be implementations for useful casts +/// that users may want to use that are outside the standard behavior. An +/// example of how to use a special cast called `CastTrait` is: +/// +/// template<> struct CastInfo : public CastTrait {}; +/// +/// Essentially, if your use case falls directly into one of the use cases +/// supported by a given cast trait, simply inherit your special CastInfo +/// directly from one of these to avoid having to reimplement the boilerplate +/// `isPossible/castFailed/doCast/doCastIfPossible`. A cast trait can also +/// provide a subset of those functions. + +/// This cast trait just provides castFailed for the specified `To` type to make +/// CastInfo specializations more declarative. In order to use this, the target +/// result type must be `To` and `To` must be constructible from `nullptr`. +template struct NullableValueCastFailed { + static To castFailed() { return To(nullptr); } +}; + +/// This cast trait just provides the default implementation of doCastIfPossible +/// to make CastInfo specializations more declarative. The `Derived` template +/// parameter *must* be provided for forwarding castFailed and doCast. +template +struct DefaultDoCastIfPossible { + static To doCastIfPossible(From f) { + if (!Derived::isPossible(f)) + return Derived::castFailed(); + return Derived::doCast(f); + } +}; + +namespace detail { +/// A helper to derive the type to use with `Self` for cast traits, when the +/// provided CRTP derived type is allowed to be void. +template +using SelfType = std::conditional_t::value, + Default, OptionalDerived>; +} // namespace detail + +/// This cast trait provides casting for the specific case of casting to a +/// value-typed object from a pointer-typed object. Note that `To` must be +/// nullable/constructible from a pointer to `From` to use this cast. +template +struct ValueFromPointerCast + : public CastIsPossible, + public NullableValueCastFailed, + public DefaultDoCastIfPossible< + To, From *, + detail::SelfType>> { + static inline To doCast(From *f) { return To(f); } +}; + +/// This cast trait provides std::unique_ptr casting. It has the semantics of +/// moving the contents of the input unique_ptr into the output unique_ptr +/// during the cast. It's also a good example of how to implement a move-only +/// cast. +template +struct UniquePtrCast : public CastIsPossible { + using Self = detail::SelfType>; + using CastResultType = std::unique_ptr< + std::remove_reference_t::ret_type>>; + + static inline CastResultType doCast(std::unique_ptr &&f) { + return CastResultType((typename CastResultType::element_type *)f.release()); + } + + static inline CastResultType castFailed() { return CastResultType(nullptr); } + + static inline CastResultType doCastIfPossible(std::unique_ptr &&f) { + if (!Self::isPossible(f)) + return castFailed(); + return doCast(f); + } +}; + +/// This cast trait provides Optional casting. This means that if you have a +/// value type, you can cast it to another value type and have dyn_cast return +/// an Optional. +template +struct OptionalValueCast + : public CastIsPossible, + public DefaultDoCastIfPossible< + Optional, From, + detail::SelfType>> { + static inline Optional castFailed() { return Optional{}; } + + static inline Optional doCast(const From &f) { return To(f); } +}; + +/// Provides a cast trait that strips `const` from types to make it easier to +/// implement a const-version of a non-const cast. It just removes boilerplate +/// and reduces the amount of code you as the user need to implement. You can +/// use it like this: +/// +/// template<> struct CastInfo { +/// ...verbose implementation... +/// }; +/// +/// template<> struct CastInfo : public +/// ConstStrippingForwardingCast> {}; +/// +template +struct ConstStrippingForwardingCast { + // Remove the pointer if it exists, then we can get rid of consts/volatiles. + using DecayedFrom = std::remove_cv_t>; + // Now if it's a pointer, add it back. Otherwise, we want a ref. + using NonConstFrom = std::conditional_t::value, + DecayedFrom *, DecayedFrom &>; + + static inline bool isPossible(const From &f) { + return ForwardTo::isPossible(const_cast(f)); + } + + static inline decltype(auto) castFailed() { return ForwardTo::castFailed(); } + + static inline decltype(auto) doCast(const From &f) { + return ForwardTo::doCast(const_cast(f)); + } + + static inline decltype(auto) doCastIfPossible(const From &f) { + return ForwardTo::doCastIfPossible(const_cast(f)); + } +}; + +/// Provides a cast trait that uses a defined pointer to pointer cast as a base +/// for reference-to-reference casts. Note that it does not provide castFailed +/// and doCastIfPossible because a pointer-to-pointer cast would likely just +/// return `nullptr` which could cause nullptr dereference. You can use it like +/// this: +/// +/// template <> struct CastInfo { ... verbose implementation... }; +/// +/// template <> +/// struct CastInfo +/// : public ForwardToPointerCast> {}; +/// +template +struct ForwardToPointerCast { + static inline bool isPossible(const From &f) { + return ForwardTo::isPossible(&f); + } + + static inline decltype(auto) doCast(const From &f) { + return *ForwardTo::doCast(&f); + } +}; + +//===----------------------------------------------------------------------===// +// CastInfo +//===----------------------------------------------------------------------===// + +/// This struct provides a method for customizing the way a cast is performed. +/// It inherits from CastIsPossible, to support the case of declaring many +/// CastIsPossible specializations without having to specialize the full +/// CastInfo. +/// +/// In order to specialize different behaviors, specify different functions in +/// your CastInfo specialization. +/// For isa<> customization, provide: +/// +/// `static bool isPossible(const From &f)` +/// +/// For cast<> customization, provide: +/// +/// `static To doCast(const From &f)` +/// +/// For dyn_cast<> and the *_if_present<> variants' customization, provide: +/// +/// `static To castFailed()` and `static To doCastIfPossible(const From &f)` +/// +/// Your specialization might look something like this: +/// +/// template<> struct CastInfo : public CastIsPossible { +/// static inline foo doCast(const bar &b) { +/// return foo(const_cast(b)); +/// } +/// static inline foo castFailed() { return foo(); } +/// static inline foo doCastIfPossible(const bar &b) { +/// if (!CastInfo::isPossible(b)) +/// return castFailed(); +/// return doCast(b); +/// } +/// }; + +// The default implementations of CastInfo don't use cast traits for now because +// we need to specify types all over the place due to the current expected +// casting behavior and the way cast_retty works. New use cases can and should +// take advantage of the cast traits whenever possible! + +template +struct CastInfo : public CastIsPossible { + using Self = CastInfo; + + using CastReturnType = typename cast_retty::ret_type; + + static inline CastReturnType doCast(const From &f) { + return cast_convert_val< + To, From, + typename simplify_type::SimpleType>::doit(const_cast(f)); + } + + // This assumes that you can construct the cast return type from `nullptr`. + // This is largely to support legacy use cases - if you don't want this + // behavior you should specialize CastInfo for your use case. + static inline CastReturnType castFailed() { return CastReturnType(nullptr); } + + static inline CastReturnType doCastIfPossible(const From &f) { + if (!Self::isPossible(f)) + return castFailed(); + return doCast(f); + } +}; + +/// This struct provides an overload for CastInfo where From has simplify_type +/// defined. This simply forwards to the appropriate CastInfo with the +/// simplified type/value, so you don't have to implement both. +template +struct CastInfo::value>> { + using Self = CastInfo; + using SimpleFrom = typename simplify_type::SimpleType; + using SimplifiedSelf = CastInfo; + + static inline bool isPossible(From &f) { + return SimplifiedSelf::isPossible( + simplify_type::getSimplifiedValue(f)); + } + + static inline decltype(auto) doCast(From &f) { + return SimplifiedSelf::doCast(simplify_type::getSimplifiedValue(f)); + } + + static inline decltype(auto) castFailed() { + return SimplifiedSelf::castFailed(); + } + + static inline decltype(auto) doCastIfPossible(From &f) { + return SimplifiedSelf::doCastIfPossible( + simplify_type::getSimplifiedValue(f)); + } +}; + +//===----------------------------------------------------------------------===// +// Pre-specialized CastInfo +//===----------------------------------------------------------------------===// + +/// Provide a CastInfo specialized for std::unique_ptr. +template +struct CastInfo> : public UniquePtrCast {}; + +/// Provide a CastInfo specialized for Optional. It's assumed that if the +/// input is Optional that the output can be Optional. If that's not +/// the case, specialize CastInfo for your use case. +template +struct CastInfo> : public OptionalValueCast {}; + +/// isa - Return true if the parameter to the template is an instance of one +/// of the template type arguments. Used like this: +/// +/// if (isa(myVal)) { ... } +/// if (isa(myVal)) { ... } +template +LLVM_NODISCARD inline bool isa(const From &Val) { + return CastInfo::isPossible(Val); } -template -inline typename cast_retty::ret_type cast(Y &Val) { - assert(isa(Val) && "cast() argument of incompatible type!"); - return cast_convert_val::SimpleType>::doit(Val); +template +LLVM_NODISCARD inline bool isa(const From &Val) { + return isa(Val) || isa(Val); } -template -inline typename cast_retty::ret_type cast(Y *Val) { - assert(isa(Val) && "cast() argument of incompatible type!"); - return cast_convert_val::SimpleType>::doit(Val); +/// cast - Return the argument parameter cast to the specified type. This +/// casting operator asserts that the type is correct, so it does not return +/// null on failure. It does not allow a null argument (use cast_if_present for +/// that). It is typically used like this: +/// +/// cast(myVal)->getParent() + +template +LLVM_NODISCARD inline decltype(auto) cast(const From &Val) { + assert(isa(Val) && "cast() argument of incompatible type!"); + return CastInfo::doCast(Val); } -template -inline typename cast_retty>::ret_type -cast(std::unique_ptr &&Val) { - assert(isa(Val.get()) && "cast() argument of incompatible type!"); - using ret_type = typename cast_retty>::ret_type; - return ret_type( - cast_convert_val::SimpleType>::doit( - Val.release())); +template +LLVM_NODISCARD inline decltype(auto) cast(From &Val) { + assert(isa(Val) && "cast() argument of incompatible type!"); + return CastInfo::doCast(Val); } -// cast_or_null - Functionally identical to cast, except that a null value is -// accepted. -// -template -LLVM_NODISCARD inline std::enable_if_t< - !is_simple_type::value, typename cast_retty::ret_type> -cast_or_null(const Y &Val) { - if (!Val) - return nullptr; - assert(isa(Val) && "cast_or_null() argument of incompatible type!"); - return cast(Val); +template +LLVM_NODISCARD inline decltype(auto) cast(From *Val) { + assert(isa(Val) && "cast() argument of incompatible type!"); + return CastInfo::doCast(Val); } -template -LLVM_NODISCARD inline std::enable_if_t::value, - typename cast_retty::ret_type> -cast_or_null(Y &Val) { - if (!Val) - return nullptr; - assert(isa(Val) && "cast_or_null() argument of incompatible type!"); - return cast(Val); +template +LLVM_NODISCARD inline decltype(auto) cast(std::unique_ptr &&Val) { + assert(isa(Val) && "cast() argument of incompatible type!"); + return CastInfo>::doCast(std::move(Val)); } -template -LLVM_NODISCARD inline typename cast_retty::ret_type -cast_or_null(Y *Val) { - if (!Val) return nullptr; - assert(isa(Val) && "cast_or_null() argument of incompatible type!"); - return cast(Val); +/// dyn_cast - Return the argument parameter cast to the specified type. This +/// casting operator returns null if the argument is of the wrong type, so it +/// can be used to test for a type as well as cast if successful. The value +/// passed in must be present, if not, use dyn_cast_if_present. This should be +/// used in the context of an if statement like this: +/// +/// if (const Instruction *I = dyn_cast(myVal)) { ... } + +template +LLVM_NODISCARD inline decltype(auto) dyn_cast(const From &Val) { + return CastInfo::doCastIfPossible(Val); } -template -inline typename cast_retty>::ret_type -cast_or_null(std::unique_ptr &&Val) { - if (!Val) - return nullptr; - return cast(std::move(Val)); +template +LLVM_NODISCARD inline decltype(auto) dyn_cast(From &Val) { + return CastInfo::doCastIfPossible(Val); } -// dyn_cast - Return the argument parameter cast to the specified type. This -// casting operator returns null if the argument is of the wrong type, so it can -// be used to test for a type as well as cast if successful. This should be -// used in the context of an if statement like this: -// -// if (const Instruction *I = dyn_cast(myVal)) { ... } -// +template +LLVM_NODISCARD inline decltype(auto) dyn_cast(From *Val) { + return CastInfo::doCastIfPossible(Val); +} -template -LLVM_NODISCARD inline std::enable_if_t< - !is_simple_type::value, typename cast_retty::ret_type> -dyn_cast(const Y &Val) { - return isa(Val) ? cast(Val) : nullptr; +template +LLVM_NODISCARD inline decltype(auto) dyn_cast(std::unique_ptr &&Val) { + return CastInfo>::doCastIfPossible(std::move(Val)); } -template -LLVM_NODISCARD inline typename cast_retty::ret_type dyn_cast(Y &Val) { - return isa(Val) ? cast(Val) : nullptr; +//===----------------------------------------------------------------------===// +// ValueIsPresent +//===----------------------------------------------------------------------===// + +template +constexpr bool IsNullable = std::is_pointer::value || + std::is_constructible::value; + +/// ValueIsPresent provides a way to check if a value is, well, present. For +/// pointers, this is the equivalent of checking against nullptr, for +/// Optionals this is the equivalent of checking hasValue(). It also +/// provides a method for unwrapping a value (think dereferencing a +/// pointer). + +// Generic values can't *not* be present. +template struct ValueIsPresent { + using UnwrappedType = T; + static inline bool isPresent(const T &t) { return true; } + static inline decltype(auto) unwrapValue(T &t) { return t; } +}; + +// Optional provides its own way to check if something is present. +template struct ValueIsPresent> { + using UnwrappedType = T; + static inline bool isPresent(const Optional &t) { return t.has_value(); } + static inline decltype(auto) unwrapValue(Optional &t) { + return t.getValue(); + } +}; + +// If something is "nullable" then we just compare it to nullptr to see if it +// exists. +template +struct ValueIsPresent>> { + using UnwrappedType = T; + static inline bool isPresent(const T &t) { return t != nullptr; } + static inline decltype(auto) unwrapValue(T &t) { return t; } +}; + +namespace detail { +// Convenience function we can use to check if a value is present. Because of +// simplify_type, we have to call it on the simplified type for now. +template inline bool isPresent(const T &t) { + return ValueIsPresent::SimpleType>::isPresent( + simplify_type::getSimplifiedValue(const_cast(t))); } -template -LLVM_NODISCARD inline typename cast_retty::ret_type dyn_cast(Y *Val) { - return isa(Val) ? cast(Val) : nullptr; +// Convenience function we can use to unwrap a value. +template inline decltype(auto) unwrapValue(T &t) { + return ValueIsPresent::unwrapValue(t); } +} // namespace detail -// dyn_cast_or_null - Functionally identical to dyn_cast, except that a null -// value is accepted. -// -template -LLVM_NODISCARD inline std::enable_if_t< - !is_simple_type::value, typename cast_retty::ret_type> -dyn_cast_or_null(const Y &Val) { - return (Val && isa(Val)) ? cast(Val) : nullptr; +/// isa_and_present - Functionally identical to isa, except that a null value +/// is accepted. +template +LLVM_NODISCARD inline bool isa_and_present(const Y &Val) { + if (!detail::isPresent(Val)) + return false; + return isa(Val); } +template +LLVM_NODISCARD inline bool isa_and_nonnull(const Y &Val) { + return isa_and_present(Val); +} + +/// cast_if_present - Functionally identical to cast, except that a null +/// value is accepted. template -LLVM_NODISCARD inline std::enable_if_t::value, - typename cast_retty::ret_type> -dyn_cast_or_null(Y &Val) { - return (Val && isa(Val)) ? cast(Val) : nullptr; +LLVM_NODISCARD inline auto cast_if_present(const Y &Val) { + if (!detail::isPresent(Val)) + return CastInfo::castFailed(); + assert(isa(Val) && "cast_if_present() argument of incompatible type!"); + return cast(detail::unwrapValue(Val)); +} + +template LLVM_NODISCARD inline auto cast_if_present(Y &Val) { + if (!detail::isPresent(Val)) + return CastInfo::castFailed(); + assert(isa(Val) && "cast_if_present() argument of incompatible type!"); + return cast(detail::unwrapValue(Val)); +} + +template LLVM_NODISCARD inline auto cast_if_present(Y *Val) { + if (!detail::isPresent(Val)) + return CastInfo::castFailed(); + assert(isa(Val) && "cast_if_present() argument of incompatible type!"); + return cast(detail::unwrapValue(Val)); } template -LLVM_NODISCARD inline typename cast_retty::ret_type -dyn_cast_or_null(Y *Val) { - return (Val && isa(Val)) ? cast(Val) : nullptr; +LLVM_NODISCARD inline auto cast_if_present(std::unique_ptr &&Val) { + if (!detail::isPresent(Val)) + return UniquePtrCast::castFailed(); + return UniquePtrCast::doCast(std::move(Val)); +} + +// Provide a forwarding from cast_or_null to cast_if_present for current +// users. This is deprecated and will be removed in a future patch, use +// cast_if_present instead. +template auto cast_or_null(const Y &Val) { + return cast_if_present(Val); +} + +template auto cast_or_null(Y &Val) { + return cast_if_present(Val); +} + +template auto cast_or_null(Y *Val) { + return cast_if_present(Val); +} + +template auto cast_or_null(std::unique_ptr &&Val) { + return cast_if_present(std::move(Val)); +} + +/// dyn_cast_if_present - Functionally identical to dyn_cast, except that a +/// null (or none in the case of optionals) value is accepted. +template auto dyn_cast_if_present(const Y &Val) { + if (!detail::isPresent(Val)) + return CastInfo::castFailed(); + return CastInfo::doCastIfPossible(detail::unwrapValue(Val)); +} + +template auto dyn_cast_if_present(Y &Val) { + if (!detail::isPresent(Val)) + return CastInfo::castFailed(); + return CastInfo::doCastIfPossible(detail::unwrapValue(Val)); +} + +template auto dyn_cast_if_present(Y *Val) { + if (!detail::isPresent(Val)) + return CastInfo::castFailed(); + return CastInfo::doCastIfPossible(detail::unwrapValue(Val)); +} + +// Forwards to dyn_cast_if_present to avoid breaking current users. This is +// deprecated and will be removed in a future patch, use +// cast_if_present instead. +template auto dyn_cast_or_null(const Y &Val) { + return dyn_cast_if_present(Val); +} + +template auto dyn_cast_or_null(Y &Val) { + return dyn_cast_if_present(Val); +} + +template auto dyn_cast_or_null(Y *Val) { + return dyn_cast_if_present(Val); } -// unique_dyn_cast - Given a unique_ptr, try to return a unique_ptr, -// taking ownership of the input pointer iff isa(Val) is true. If the -// cast is successful, From refers to nullptr on exit and the casted value -// is returned. If the cast is unsuccessful, the function returns nullptr -// and From is unchanged. +/// unique_dyn_cast - Given a unique_ptr, try to return a unique_ptr, +/// taking ownership of the input pointer iff isa(Val) is true. If the +/// cast is successful, From refers to nullptr on exit and the casted value +/// is returned. If the cast is unsuccessful, the function returns nullptr +/// and From is unchanged. template -LLVM_NODISCARD inline auto unique_dyn_cast(std::unique_ptr &Val) - -> decltype(cast(Val)) { +LLVM_NODISCARD inline typename CastInfo>::CastResultType +unique_dyn_cast(std::unique_ptr &Val) { if (!isa(Val)) return nullptr; return cast(std::move(Val)); @@ -386,11 +783,11 @@ LLVM_NODISCARD inline auto unique_dyn_cast(std::unique_ptr &&Val) { return unique_dyn_cast(Val); } -// dyn_cast_or_null - Functionally identical to unique_dyn_cast, except that -// a null value is accepted. +// unique_dyn_cast_or_null - Functionally identical to unique_dyn_cast, +// except that a null value is accepted. template -LLVM_NODISCARD inline auto unique_dyn_cast_or_null(std::unique_ptr &Val) - -> decltype(cast(Val)) { +LLVM_NODISCARD inline typename CastInfo>::CastResultType +unique_dyn_cast_or_null(std::unique_ptr &Val) { if (!Val) return nullptr; return unique_dyn_cast(Val); diff --git a/llvm/include/llvm/Support/CodeGen.h b/llvm/include/llvm/Support/CodeGen.h index 9e66d84e185d..71d0ddbfe05e 100644 --- a/llvm/include/llvm/Support/CodeGen.h +++ b/llvm/include/llvm/Support/CodeGen.h @@ -69,6 +69,40 @@ namespace llvm { // Specify what functions should keep the frame pointer. enum class FramePointerKind { None, NonLeaf, All }; -} // end llvm namespace + // Specify what type of zeroing callee-used registers. + namespace ZeroCallUsedRegs { + const unsigned ONLY_USED = 1U << 1; + const unsigned ONLY_GPR = 1U << 2; + const unsigned ONLY_ARG = 1U << 3; + + enum class ZeroCallUsedRegsKind : unsigned int { + // Don't zero any call-used regs. + Skip = 1U << 0, + // Only zeros call-used GPRs used in the fn and pass args. + UsedGPRArg = ONLY_USED | ONLY_GPR | ONLY_ARG, + // Only zeros call-used GPRs used in the fn. + UsedGPR = ONLY_USED | ONLY_GPR, + // Only zeros call-used regs used in the fn and pass args. + UsedArg = ONLY_USED | ONLY_ARG, + // Only zeros call-used regs used in the fn. + Used = ONLY_USED, + // Zeros all call-used GPRs that pass args. + AllGPRArg = ONLY_GPR | ONLY_ARG, + // Zeros all call-used GPRs. + AllGPR = ONLY_GPR, + // Zeros all call-used regs that pass args. + AllArg = ONLY_ARG, + // Zeros all call-used regs. + All = 0, + }; + } // namespace ZeroCallUsedRegs + + enum class UWTableKind { + None = 0, ///< No unwind table requested + Sync = 1, ///< "Synchronous" unwind tables + Async = 2, ///< "Asynchronous" unwind tables (instr precise) + Default = 2, + }; + } // namespace llvm #endif diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index c8e29ac42559..6461164fceff 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -49,13 +49,12 @@ class FileSystem; class StringSaver; -/// cl Namespace - This namespace contains all of the command line option -/// processing machinery. It is intentionally a short name to make qualified -/// usage concise. +/// This namespace contains all of the command line option processing machinery. +/// It is intentionally a short name to make qualified usage concise. namespace cl { //===----------------------------------------------------------------------===// -// ParseCommandLineOptions - Command line option processing entry point. +// Command line option processing entry point. // // Returns true on success. Otherwise, this will print the error message to // stderr and exit if \p Errs is not set (nullptr by default), or print the @@ -78,22 +77,19 @@ bool ParseCommandLineOptions(int argc, const char *const *argv, using VersionPrinterTy = std::function; ///===---------------------------------------------------------------------===// -/// SetVersionPrinter - Override the default (LLVM specific) version printer -/// used to print out the version when --version is given -/// on the command line. This allows other systems using the -/// CommandLine utilities to print their own version string. +/// Override the default (LLVM specific) version printer used to print out the +/// version when --version is given on the command line. This allows other +/// systems using the CommandLine utilities to print their own version string. void SetVersionPrinter(VersionPrinterTy func); ///===---------------------------------------------------------------------===// -/// AddExtraVersionPrinter - Add an extra printer to use in addition to the -/// default one. This can be called multiple times, -/// and each time it adds a new function to the list -/// which will be called after the basic LLVM version -/// printing is complete. Each can then add additional -/// information specific to the tool. +/// Add an extra printer to use in addition to the default one. This can be +/// called multiple times, and each time it adds a new function to the list +/// which will be called after the basic LLVM version printing is complete. +/// Each can then add additional information specific to the tool. void AddExtraVersionPrinter(VersionPrinterTy func); -// PrintOptionValues - Print option values. +// Print option values. // With -print-options print the difference between option values and defaults. // With -print-all-options print all option values. // (Currently not perfect, but best-effort.) @@ -121,9 +117,9 @@ enum NumOccurrencesFlag { // Flags for the number of occurrences allowed Required = 0x02, // One occurrence required OneOrMore = 0x03, // One or more occurrences required - // ConsumeAfter - Indicates that this option is fed anything that follows the - // last positional argument required by the application (it is an error if - // there are zero positional arguments, and a ConsumeAfter option is used). + // Indicates that this option is fed anything that follows the last positional + // argument required by the application (it is an error if there are zero + // positional arguments, and a ConsumeAfter option is used). // Thus, for example, all arguments to LLI are processed until a filename is // found. Once a filename is found, all of the succeeding arguments are // passed, unprocessed, to the ConsumeAfter option. @@ -144,8 +140,8 @@ enum OptionHidden { // Control whether -help shows this option ReallyHidden = 0x02 // Neither -help nor -help-hidden show this arg }; -// Formatting flags - This controls special features that the option might have -// that cause it to be parsed differently... +// This controls special features that the option might have that cause it to be +// parsed differently... // // Prefix - This option allows arguments that are otherwise unrecognized to be // matched by options that are a prefix of the actual value. This is useful for @@ -170,7 +166,7 @@ enum MiscFlags { // Miscellaneous flags to adjust argument PositionalEatsArgs = 0x02, // Should this positional cl::list eat -args? Sink = 0x04, // Should this cl::list eat all unknown options? - // Grouping - Can this option group with other options? + // Can this option group with other options? // If this is enabled, multiple letter options are allowed to bunch together // with only a single hyphen for the whole group. This allows emulation // of the behavior that ls uses for example: ls -la === ls -l -a @@ -181,7 +177,6 @@ enum MiscFlags { // Miscellaneous flags to adjust argument }; //===----------------------------------------------------------------------===// -// Option Category class // class OptionCategory { private: @@ -205,7 +200,6 @@ public: OptionCategory &getGeneralCategory(); //===----------------------------------------------------------------------===// -// SubCommand class // class SubCommand { private: @@ -244,14 +238,13 @@ extern ManagedStatic TopLevelSubCommand; extern ManagedStatic AllSubCommands; //===----------------------------------------------------------------------===// -// Option Base class // class Option { friend class alias; - // handleOccurrences - Overriden by subclasses to handle the value passed into - // an argument. Should return true if there was an error processing the - // argument and the program should exit. + // Overriden by subclasses to handle the value passed into an argument. Should + // return true if there was an error processing the argument and the program + // should exit. // virtual bool handleOccurrence(unsigned pos, StringRef ArgName, StringRef Arg) = 0; @@ -305,7 +298,7 @@ public: inline unsigned getPosition() const { return Position; } inline unsigned getNumAdditionalVals() const { return AdditionalVals; } - // hasArgStr - Return true if the argstr != "" + // Return true if the argstr != "" bool hasArgStr() const { return !ArgStr.empty(); } bool isPositional() const { return getFormattingFlag() == cl::Positional; } bool isSink() const { return getMiscFlags() & cl::Sink; } @@ -348,7 +341,7 @@ protected: public: virtual ~Option() = default; - // addArgument - Register this argument with the commandline system. + // Register this argument with the commandline system. // void addArgument(); @@ -361,8 +354,8 @@ public: // Return the width of the option tag for printing... virtual size_t getOptionWidth() const = 0; - // printOptionInfo - Print out information about this option. The - // to-be-maintained width is specified. + // Print out information about this option. The to-be-maintained width is + // specified. // virtual void printOptionInfo(size_t GlobalWidth) const = 0; @@ -388,7 +381,7 @@ public: virtual void getExtraOptionNames(SmallVectorImpl &) {} - // addOccurrence - Wrapper around handleOccurrence that enforces Flags. + // Wrapper around handleOccurrence that enforces Flags. // virtual bool addOccurrence(unsigned pos, StringRef ArgName, StringRef Value, bool MultiArg = false); @@ -408,7 +401,7 @@ public: // command line option parsers... // -// desc - Modifier to set the description shown in the -help output... +// Modifier to set the description shown in the -help output... struct desc { StringRef Desc; @@ -417,8 +410,7 @@ struct desc { void apply(Option &O) const { O.setDescription(Desc); } }; -// value_desc - Modifier to set the value description shown in the -help -// output... +// Modifier to set the value description shown in the -help output... struct value_desc { StringRef Desc; @@ -427,10 +419,9 @@ struct value_desc { void apply(Option &O) const { O.setValueStr(Desc); } }; -// init - Specify a default (initial) value for the command line argument, if -// the default constructor for the argument type does not give you what you -// want. This is only valid on "opt" arguments, not on "list" arguments. -// +// Specify a default (initial) value for the command line argument, if the +// default constructor for the argument type does not give you what you want. +// This is only valid on "opt" arguments, not on "list" arguments. template struct initializer { const Ty &Init; initializer(const Ty &Val) : Init(Val) {} @@ -442,10 +433,9 @@ template initializer init(const Ty &Val) { return initializer(Val); } -// location - Allow the user to specify which external variable they want to -// store the results of the command line argument processing into, if they don't -// want to store it in the option itself. -// +// Allow the user to specify which external variable they want to store the +// results of the command line argument processing into, if they don't want to +// store it in the option itself. template struct LocationClass { Ty &Loc; @@ -458,8 +448,7 @@ template LocationClass location(Ty &L) { return LocationClass(L); } -// cat - Specifiy the Option category for the command line argument to belong -// to. +// Specify the Option category for the command line argument to belong to. struct cat { OptionCategory &Category; @@ -468,7 +457,7 @@ struct cat { template void apply(Opt &O) const { O.addCategory(Category); } }; -// sub - Specify the subcommand that this option belongs to. +// Specify the subcommand that this option belongs to. struct sub { SubCommand ⋐ @@ -514,7 +503,6 @@ callback(F CB) { } //===----------------------------------------------------------------------===// -// OptionValue class // Support value comparison outside the template. struct GenericOptionValue { @@ -672,8 +660,8 @@ struct OptionEnumValue { #define clEnumValN(ENUMVAL, FLAGNAME, DESC) \ llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC } -// values - For custom data types, allow specifying a group of values together -// as the values that go into the mapping that the option handler uses. +// For custom data types, allow specifying a group of values together as the +// values that go into the mapping that the option handler uses. // class ValuesClass { // Use a vector instead of a map, because the lists should be short, @@ -699,16 +687,16 @@ template ValuesClass values(OptsTy... Options) { } //===----------------------------------------------------------------------===// -// parser class - Parameterizable parser for different data types. By default, -// known data types (string, int, bool) have specialized parsers, that do what -// you would expect. The default parser, used for data types that are not -// built-in, uses a mapping table to map specific options to values, which is -// used, among other things, to handle enum types. +// Parameterizable parser for different data types. By default, known data types +// (string, int, bool) have specialized parsers, that do what you would expect. +// The default parser, used for data types that are not built-in, uses a mapping +// table to map specific options to values, which is used, among other things, +// to handle enum types. //-------------------------------------------------- -// generic_parser_base - This class holds all the non-generic code that we do -// not need replicated for every instance of the generic parser. This also -// allows us to put stuff into CommandLine.cpp +// This class holds all the non-generic code that we do not need replicated for +// every instance of the generic parser. This also allows us to put stuff into +// CommandLine.cpp // class generic_parser_base { protected: @@ -726,15 +714,15 @@ public: virtual ~generic_parser_base() = default; // Base class should have virtual-destructor - // getNumOptions - Virtual function implemented by generic subclass to - // indicate how many entries are in Values. + // Virtual function implemented by generic subclass to indicate how many + // entries are in Values. // virtual unsigned getNumOptions() const = 0; - // getOption - Return option name N. + // Return option name N. virtual StringRef getOption(unsigned N) const = 0; - // getDescription - Return description N + // Return description N virtual StringRef getDescription(unsigned N) const = 0; // Return the width of the option tag for printing... @@ -742,8 +730,8 @@ public: virtual const GenericOptionValue &getOptionValue(unsigned N) const = 0; - // printOptionInfo - Print out information about this option. The - // to-be-maintained width is specified. + // Print out information about this option. The to-be-maintained width is + // specified. // virtual void printOptionInfo(const Option &O, size_t GlobalWidth) const; @@ -751,7 +739,7 @@ public: const GenericOptionValue &Default, size_t GlobalWidth) const; - // printOptionDiff - print the value of an option and it's default. + // Print the value of an option and it's default. // // Template definition ensures that the option and default have the same // DataType (via the same AnyOptionValue). @@ -791,7 +779,7 @@ public: return ValueDisallowed; } - // findOption - Return the option number corresponding to the specified + // Return the option number corresponding to the specified // argument string. If the option is not found, getNumOptions() is returned. // unsigned findOption(StringRef Name); @@ -829,12 +817,12 @@ public: return Values[N].HelpStr; } - // getOptionValue - Return the value of option name N. + // Return the value of option name N. const GenericOptionValue &getOptionValue(unsigned N) const override { return Values[N].V; } - // parse - Return true on error. + // Return true on error. bool parse(Option &O, StringRef ArgName, StringRef Arg, DataType &V) { StringRef ArgVal; if (Owner.hasArgStr()) @@ -851,7 +839,7 @@ public: return O.error("Cannot find option named '" + ArgVal + "'!"); } - /// addLiteralOption - Add an entry to the mapping table. + /// Add an entry to the mapping table. /// template void addLiteralOption(StringRef Name, const DT &V, StringRef HelpStr) { @@ -861,7 +849,7 @@ public: AddLiteralOption(Owner, Name); } - /// removeLiteralOption - Remove the specified option. + /// Remove the specified option. /// void removeLiteralOption(StringRef Name) { unsigned N = findOption(Name); @@ -871,7 +859,7 @@ public: }; //-------------------------------------------------- -// basic_parser - Super class of parsers to provide boilerplate code +// Super class of parsers to provide boilerplate code // class basic_parser_impl { // non-template implementation of basic_parser public: @@ -890,16 +878,15 @@ public: // Return the width of the option tag for printing... size_t getOptionWidth(const Option &O) const; - // printOptionInfo - Print out information about this option. The - // to-be-maintained width is specified. + // Print out information about this option. The to-be-maintained width is + // specified. // void printOptionInfo(const Option &O, size_t GlobalWidth) const; - // printOptionNoValue - Print a placeholder for options that don't yet support - // printOptionDiff(). + // Print a placeholder for options that don't yet support printOptionDiff(). void printOptionNoValue(const Option &O, size_t GlobalWidth) const; - // getValueName - Overload in subclass to provide a better default value. + // Overload in subclass to provide a better default value. virtual StringRef getValueName() const { return "value"; } // An out-of-line virtual method to provide a 'home' for this class. @@ -910,8 +897,8 @@ protected: void printOptionName(const Option &O, size_t GlobalWidth) const; }; -// basic_parser - The real basic parser is just a template wrapper that provides -// a typedef for the provided data type. +// The real basic parser is just a template wrapper that provides a typedef for +// the provided data type. // template class basic_parser : public basic_parser_impl { public: @@ -922,8 +909,6 @@ public: }; //-------------------------------------------------- -// parser -// extern template class basic_parser; @@ -931,7 +916,7 @@ template <> class parser : public basic_parser { public: parser(Option &O) : basic_parser(O) {} - // parse - Return true on error. + // Return true on error. bool parse(Option &O, StringRef ArgName, StringRef Arg, bool &Val); void initialize() {} @@ -940,7 +925,7 @@ public: return ValueOptional; } - // getValueName - Do not print = at all. + // Do not print = at all. StringRef getValueName() const override { return StringRef(); } void printOptionDiff(const Option &O, bool V, OptVal Default, @@ -951,7 +936,6 @@ public: }; //-------------------------------------------------- -// parser extern template class basic_parser; @@ -959,14 +943,14 @@ template <> class parser : public basic_parser { public: parser(Option &O) : basic_parser(O) {} - // parse - Return true on error. + // Return true on error. bool parse(Option &O, StringRef ArgName, StringRef Arg, boolOrDefault &Val); enum ValueExpected getValueExpectedFlagDefault() const { return ValueOptional; } - // getValueName - Do not print = at all. + // Do not print = at all. StringRef getValueName() const override { return StringRef(); } void printOptionDiff(const Option &O, boolOrDefault V, OptVal Default, @@ -977,8 +961,6 @@ public: }; //-------------------------------------------------- -// parser -// extern template class basic_parser; @@ -986,10 +968,10 @@ template <> class parser : public basic_parser { public: parser(Option &O) : basic_parser(O) {} - // parse - Return true on error. + // Return true on error. bool parse(Option &O, StringRef ArgName, StringRef Arg, int &Val); - // getValueName - Overload in subclass to provide a better default value. + // Overload in subclass to provide a better default value. StringRef getValueName() const override { return "int"; } void printOptionDiff(const Option &O, int V, OptVal Default, @@ -1000,8 +982,6 @@ public: }; //-------------------------------------------------- -// parser -// extern template class basic_parser; @@ -1009,10 +989,10 @@ template <> class parser final : public basic_parser { public: parser(Option &O) : basic_parser(O) {} - // parse - Return true on error. + // Return true on error. bool parse(Option &O, StringRef ArgName, StringRef Arg, long &Val); - // getValueName - Overload in subclass to provide a better default value. + // Overload in subclass to provide a better default value. StringRef getValueName() const override { return "long"; } void printOptionDiff(const Option &O, long V, OptVal Default, @@ -1023,8 +1003,6 @@ public: }; //-------------------------------------------------- -// parser -// extern template class basic_parser; @@ -1032,10 +1010,10 @@ template <> class parser : public basic_parser { public: parser(Option &O) : basic_parser(O) {} - // parse - Return true on error. + // Return true on error. bool parse(Option &O, StringRef ArgName, StringRef Arg, long long &Val); - // getValueName - Overload in subclass to provide a better default value. + // Overload in subclass to provide a better default value. StringRef getValueName() const override { return "long"; } void printOptionDiff(const Option &O, long long V, OptVal Default, @@ -1046,8 +1024,6 @@ public: }; //-------------------------------------------------- -// parser -// extern template class basic_parser; @@ -1055,10 +1031,10 @@ template <> class parser : public basic_parser { public: parser(Option &O) : basic_parser(O) {} - // parse - Return true on error. + // Return true on error. bool parse(Option &O, StringRef ArgName, StringRef Arg, unsigned &Val); - // getValueName - Overload in subclass to provide a better default value. + // Overload in subclass to provide a better default value. StringRef getValueName() const override { return "uint"; } void printOptionDiff(const Option &O, unsigned V, OptVal Default, @@ -1069,8 +1045,6 @@ public: }; //-------------------------------------------------- -// parser -// extern template class basic_parser; @@ -1079,10 +1053,10 @@ class parser final : public basic_parser { public: parser(Option &O) : basic_parser(O) {} - // parse - Return true on error. + // Return true on error. bool parse(Option &O, StringRef ArgName, StringRef Arg, unsigned long &Val); - // getValueName - Overload in subclass to provide a better default value. + // Overload in subclass to provide a better default value. StringRef getValueName() const override { return "ulong"; } void printOptionDiff(const Option &O, unsigned long V, OptVal Default, @@ -1093,8 +1067,6 @@ public: }; //-------------------------------------------------- -// parser -// extern template class basic_parser; @@ -1103,11 +1075,11 @@ class parser : public basic_parser { public: parser(Option &O) : basic_parser(O) {} - // parse - Return true on error. + // Return true on error. bool parse(Option &O, StringRef ArgName, StringRef Arg, unsigned long long &Val); - // getValueName - Overload in subclass to provide a better default value. + // Overload in subclass to provide a better default value. StringRef getValueName() const override { return "ulong"; } void printOptionDiff(const Option &O, unsigned long long V, OptVal Default, @@ -1118,8 +1090,6 @@ public: }; //-------------------------------------------------- -// parser -// extern template class basic_parser; @@ -1127,10 +1097,10 @@ template <> class parser : public basic_parser { public: parser(Option &O) : basic_parser(O) {} - // parse - Return true on error. + // Return true on error. bool parse(Option &O, StringRef ArgName, StringRef Arg, double &Val); - // getValueName - Overload in subclass to provide a better default value. + // Overload in subclass to provide a better default value. StringRef getValueName() const override { return "number"; } void printOptionDiff(const Option &O, double V, OptVal Default, @@ -1141,8 +1111,6 @@ public: }; //-------------------------------------------------- -// parser -// extern template class basic_parser; @@ -1150,10 +1118,10 @@ template <> class parser : public basic_parser { public: parser(Option &O) : basic_parser(O) {} - // parse - Return true on error. + // Return true on error. bool parse(Option &O, StringRef ArgName, StringRef Arg, float &Val); - // getValueName - Overload in subclass to provide a better default value. + // Overload in subclass to provide a better default value. StringRef getValueName() const override { return "number"; } void printOptionDiff(const Option &O, float V, OptVal Default, @@ -1164,8 +1132,6 @@ public: }; //-------------------------------------------------- -// parser -// extern template class basic_parser; @@ -1173,13 +1139,13 @@ template <> class parser : public basic_parser { public: parser(Option &O) : basic_parser(O) {} - // parse - Return true on error. + // Return true on error. bool parse(Option &, StringRef, StringRef Arg, std::string &Value) { Value = Arg.str(); return false; } - // getValueName - Overload in subclass to provide a better default value. + // Overload in subclass to provide a better default value. StringRef getValueName() const override { return "string"; } void printOptionDiff(const Option &O, StringRef V, const OptVal &Default, @@ -1190,8 +1156,6 @@ public: }; //-------------------------------------------------- -// parser -// extern template class basic_parser; @@ -1199,13 +1163,13 @@ template <> class parser : public basic_parser { public: parser(Option &O) : basic_parser(O) {} - // parse - Return true on error. + // Return true on error. bool parse(Option &, StringRef, StringRef Arg, char &Value) { Value = Arg[0]; return false; } - // getValueName - Overload in subclass to provide a better default value. + // Overload in subclass to provide a better default value. StringRef getValueName() const override { return "char"; } void printOptionDiff(const Option &O, char V, OptVal Default, @@ -1216,8 +1180,6 @@ public: }; //-------------------------------------------------- -// PrintOptionDiff -// // This collection of wrappers is the intermediary between class opt and class // parser to handle all the template nastiness. @@ -1261,10 +1223,10 @@ void printOptionDiff( } //===----------------------------------------------------------------------===// -// applicator class - This class is used because we must use partial -// specialization to handle literal string arguments specially (const char* does -// not correctly respond to the apply method). Because the syntax to use this -// is a pain, we have the 'apply' method below to handle the nastiness... +// This class is used because we must use partial specialization to handle +// literal string arguments specially (const char* does not correctly respond to +// the apply method). Because the syntax to use this is a pain, we have the +// 'apply' method below to handle the nastiness... // template struct applicator { template static void opt(const Mod &M, Opt &O) { M.apply(O); } @@ -1313,7 +1275,7 @@ template <> struct applicator { } }; -// apply method - Apply modifiers to an option in a type safe way. +// Apply modifiers to an option in a type safe way. template void apply(Opt *O, const Mod &M, const Mods &... Ms) { applicator::opt(M, *O); @@ -1325,8 +1287,6 @@ template void apply(Opt *O, const Mod &M) { } //===----------------------------------------------------------------------===// -// opt_storage class - // Default storage class definition: external storage. This implementation // assumes the user will specify a variable to store the data into with the // cl::location(x) modifier. @@ -1406,7 +1366,7 @@ public: // Make sure we initialize the value with the default constructor for the // type. - opt_storage() : Value(DataType()), Default(DataType()) {} + opt_storage() : Value(DataType()), Default() {} template void setValue(const T &V, bool initial = false) { Value = V; @@ -1425,7 +1385,7 @@ public: }; //===----------------------------------------------------------------------===// -// opt - A scalar command line option. +// A scalar command line option. // template > @@ -1476,6 +1436,8 @@ class opt : public Option, const OptionValue &V = this->getDefault(); if (V.hasValue()) this->setValue(V.getValue()); + else + this->setValue(T()); } template ; extern template class opt; //===----------------------------------------------------------------------===// -// list_storage class - // Default storage class definition: external storage. This implementation // assumes the user will specify a variable to store the data into with the // cl::location(x) modifier. @@ -1634,7 +1594,7 @@ public: }; //===----------------------------------------------------------------------===// -// list - A list of command line options. +// A list of command line options. // template > @@ -1716,7 +1676,7 @@ public: [](const typename ParserClass::parser_data_type &) {}; }; -// multi_val - Modifier to set the number of additional values. +// Modifier to set the number of additional values. struct multi_val { unsigned AdditionalVals; explicit multi_val(unsigned N) : AdditionalVals(N) {} @@ -1728,8 +1688,6 @@ struct multi_val { }; //===----------------------------------------------------------------------===// -// bits_storage class - // Default storage class definition: external storage. This implementation // assumes the user will specify a variable to store the data into with the // cl::location(x) modifier. @@ -1738,7 +1696,7 @@ template class bits_storage { unsigned *Location = nullptr; // Where to store the bits... template static unsigned Bit(const T &V) { - unsigned BitPos = reinterpret_cast(V); + unsigned BitPos = static_cast(V); assert(BitPos < sizeof(unsigned) * CHAR_BIT && "enum exceeds width of bit vector!"); return 1 << BitPos; @@ -1763,6 +1721,11 @@ public: unsigned getBits() { return *Location; } + void clear() { + if (Location) + *Location = 0; + } + template bool isSet(const T &V) { return (*Location & Bit(V)) != 0; } @@ -1772,10 +1735,10 @@ public: // This makes us exactly compatible with the bits in all cases that it is used. // template class bits_storage { - unsigned Bits; // Where to store the bits... + unsigned Bits{0}; // Where to store the bits... template static unsigned Bit(const T &V) { - unsigned BitPos = (unsigned)V; + unsigned BitPos = static_cast(V); assert(BitPos < sizeof(unsigned) * CHAR_BIT && "enum exceeds width of bit vector!"); return 1 << BitPos; @@ -1786,11 +1749,13 @@ public: unsigned getBits() { return Bits; } + void clear() { Bits = 0; } + template bool isSet(const T &V) { return (Bits & Bit(V)) != 0; } }; //===----------------------------------------------------------------------===// -// bits - A bit vector of command options. +// A bit vector of command options. // template > @@ -1832,7 +1797,7 @@ class bits : public Option, public bits_storage { void printOptionValue(size_t /*GlobalWidth*/, bool /*Force*/) const override { } - void setDefault() override {} + void setDefault() override { bits_storage::clear(); } void done() { addArgument(); @@ -1929,7 +1894,7 @@ public: } }; -// aliasfor - Modifier to set the option an alias aliases. +// Modifier to set the option an alias aliases. struct aliasopt { Option &Opt; @@ -1938,10 +1903,9 @@ struct aliasopt { void apply(alias &A) const { A.setAliasFor(Opt); } }; -// extrahelp - provide additional help at the end of the normal help -// output. All occurrences of cl::extrahelp will be accumulated and -// printed to stderr at the end of the regular help, just before -// exit is called. +// Provide additional help at the end of the normal help output. All occurrences +// of cl::extrahelp will be accumulated and printed to stderr at the end of the +// regular help, just before exit is called. struct extrahelp { StringRef morehelp; @@ -2032,12 +1996,15 @@ void TokenizeGNUCommandLine(StringRef Source, StringSaver &Saver, SmallVectorImpl &NewArgv, bool MarkEOLs = false); -/// Tokenizes a Windows command line which may contain quotes and escaped -/// quotes. +/// Tokenizes a string of Windows command line arguments, which may contain +/// quotes and escaped quotes. /// /// See MSDN docs for CommandLineToArgvW for information on the quoting rules. /// http://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx /// +/// For handling a full Windows command line including the executable name at +/// the start, see TokenizeWindowsCommandLineFull below. +/// /// \param [in] Source The string to be split on whitespace with quotes. /// \param [in] Saver Delegates back to the caller for saving parsed strings. /// \param [in] MarkEOLs true if tokenizing a response file and you want end of @@ -2054,6 +2021,23 @@ void TokenizeWindowsCommandLine(StringRef Source, StringSaver &Saver, void TokenizeWindowsCommandLineNoCopy(StringRef Source, StringSaver &Saver, SmallVectorImpl &NewArgv); +/// Tokenizes a Windows full command line, including command name at the start. +/// +/// This uses the same syntax rules as TokenizeWindowsCommandLine for all but +/// the first token. But the first token is expected to be parsed as the +/// executable file name in the way CreateProcess would do it, rather than the +/// way the C library startup code would do it: CreateProcess does not consider +/// that \ is ever an escape character (because " is not a valid filename char, +/// hence there's never a need to escape it to be used literally). +/// +/// Parameters are the same as for TokenizeWindowsCommandLine. In particular, +/// if you set MarkEOLs = true, then the first word of every line will be +/// parsed using the special rules for command names, making this function +/// suitable for parsing a file full of commands to execute. +void TokenizeWindowsCommandLineFull(StringRef Source, StringSaver &Saver, + SmallVectorImpl &NewArgv, + bool MarkEOLs = false); + /// String tokenization function type. Should be compatible with either /// Windows or Unix command line tokenizers. using TokenizerCallback = void (*)(StringRef Source, StringSaver &Saver, diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h index f3317049524f..6708b7cc95cc 100644 --- a/llvm/include/llvm/Support/Compiler.h +++ b/llvm/include/llvm/Support/Compiler.h @@ -39,6 +39,10 @@ # define __has_builtin(x) 0 #endif +#ifndef __has_include +# define __has_include(x) 0 +#endif + // Only use __has_cpp_attribute in C++ mode. GCC defines __has_cpp_attribute in // C mode, but the :: in __has_cpp_attribute(scoped::attribute) is invalid. #ifndef LLVM_HAS_CPP_ATTRIBUTE @@ -90,30 +94,14 @@ #define LLVM_MSC_PREREQ(version) (_MSC_VER >= (version)) // We require at least VS 2019. +#if !defined(LLVM_FORCE_USE_OLD_TOOLCHAIN) #if !LLVM_MSC_PREREQ(1920) #error LLVM requires at least VS 2019. #endif - -#else -#define LLVM_MSC_PREREQ(version) 0 #endif -/// Does the compiler support ref-qualifiers for *this? -/// -/// Sadly, this is separate from just rvalue reference support because GCC -/// and MSVC implemented this later than everything else. This appears to be -/// corrected in MSVC 2019 but not MSVC 2017. -/// FIXME: Remove LLVM_HAS_RVALUE_REFERENCE_THIS macro -#define LLVM_HAS_RVALUE_REFERENCE_THIS 1 - -/// Expands to '&' if ref-qualifiers for *this are supported. -/// -/// This can be used to provide lvalue/rvalue overrides of member functions. -/// The rvalue override should be guarded by LLVM_HAS_RVALUE_REFERENCE_THIS -#if LLVM_HAS_RVALUE_REFERENCE_THIS -#define LLVM_LVALUE_FUNCTION & #else -#define LLVM_LVALUE_FUNCTION +#define LLVM_MSC_PREREQ(version) 0 #endif /// LLVM_LIBRARY_VISIBILITY - If a class marked with this attribute is linked @@ -325,20 +313,17 @@ #define LLVM_EXTENSION #endif -// LLVM_ATTRIBUTE_DEPRECATED(decl, "message") -// This macro will be removed. -// Use C++14's attribute instead: [[deprecated("message")]] -#define LLVM_ATTRIBUTE_DEPRECATED(decl, message) [[deprecated(message)]] decl - /// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands /// to an expression which states that it is undefined behavior for the /// compiler to reach this point. Otherwise is not defined. +/// +/// '#else' is intentionally left out so that other macro logic (e.g., +/// LLVM_ASSUME_ALIGNED and llvm_unreachable()) can detect whether +/// LLVM_BUILTIN_UNREACHABLE has a definition. #if __has_builtin(__builtin_unreachable) || defined(__GNUC__) # define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable() #elif defined(_MSC_VER) # define LLVM_BUILTIN_UNREACHABLE __assume(false) -#else -# define LLVM_BUILTIN_UNREACHABLE #endif /// LLVM_BUILTIN_TRAP - On compilers which support it, expands to an expression @@ -411,22 +396,6 @@ # define LLVM_PACKED_END _Pragma("pack(pop)") #endif -/// \macro LLVM_PTR_SIZE -/// A constant integer equivalent to the value of sizeof(void*). -/// Generally used in combination with alignas or when doing computation in the -/// preprocessor. -#ifdef __SIZEOF_POINTER__ -# define LLVM_PTR_SIZE __SIZEOF_POINTER__ -#elif defined(_WIN64) -# define LLVM_PTR_SIZE 8 -#elif defined(_WIN32) -# define LLVM_PTR_SIZE 4 -#elif defined(_MSC_VER) -# error "could not determine LLVM_PTR_SIZE as a constant int for MSVC" -#else -# define LLVM_PTR_SIZE sizeof(void *) -#endif - /// \macro LLVM_MEMORY_SANITIZER_BUILD /// Whether LLVM itself is built with MemorySanitizer instrumentation. #if __has_feature(memory_sanitizer) @@ -444,8 +413,21 @@ /// Whether LLVM itself is built with AddressSanitizer instrumentation. #if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) # define LLVM_ADDRESS_SANITIZER_BUILD 1 +#if __has_include() # include #else +// These declarations exist to support ASan with MSVC. If MSVC eventually ships +// asan_interface.h in their headers, then we can remove this. +#ifdef __cplusplus +extern "C" { +#endif +void __asan_poison_memory_region(void const volatile *addr, size_t size); +void __asan_unpoison_memory_region(void const volatile *addr, size_t size); +#ifdef __cplusplus +} // extern "C" +#endif +#endif +#else # define LLVM_ADDRESS_SANITIZER_BUILD 0 # define __asan_poison_memory_region(p, size) # define __asan_unpoison_memory_region(p, size) diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h index 5bc0e56913fe..e6f898229412 100644 --- a/llvm/include/llvm/Support/Compression.h +++ b/llvm/include/llvm/Support/Compression.h @@ -29,8 +29,8 @@ static constexpr int BestSizeCompression = 9; bool isAvailable(); -Error compress(StringRef InputBuffer, SmallVectorImpl &CompressedBuffer, - int Level = DefaultCompression); +void compress(StringRef InputBuffer, SmallVectorImpl &CompressedBuffer, + int Level = DefaultCompression); Error uncompress(StringRef InputBuffer, char *UncompressedBuffer, size_t &UncompressedSize); diff --git a/llvm/include/llvm/Support/ConvertUTF.h b/llvm/include/llvm/Support/ConvertUTF.h index 374cdb907fdc..662f3aca5b54 100644 --- a/llvm/include/llvm/Support/ConvertUTF.h +++ b/llvm/include/llvm/Support/ConvertUTF.h @@ -126,6 +126,9 @@ typedef unsigned char Boolean; /* 0 or 1 */ #define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF #define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE +#define UNI_UTF32_BYTE_ORDER_MARK_NATIVE 0x0000FEFF +#define UNI_UTF32_BYTE_ORDER_MARK_SWAPPED 0xFFFE0000 + typedef enum { conversionOK, /* conversion successful */ sourceExhausted, /* partial character in source, but hit end */ @@ -281,6 +284,24 @@ bool convertUTF16ToUTF8String(ArrayRef SrcBytes, std::string &Out); */ bool convertUTF16ToUTF8String(ArrayRef Src, std::string &Out); +/** + * Converts a stream of raw bytes assumed to be UTF32 into a UTF8 std::string. + * + * \param [in] SrcBytes A buffer of what is assumed to be UTF-32 encoded text. + * \param [out] Out Converted UTF-8 is stored here on success. + * \returns true on success + */ +bool convertUTF32ToUTF8String(ArrayRef SrcBytes, std::string &Out); + +/** + * Converts a UTF32 string into a UTF8 std::string. + * + * \param [in] Src A buffer of UTF-32 encoded text. + * \param [out] Out Converted UTF-8 is stored here on success. + * \returns true on success + */ +bool convertUTF32ToUTF8String(ArrayRef Src, std::string &Out); + /** * Converts a UTF-8 string into a UTF-16 string with native endianness. * diff --git a/llvm/include/llvm/Support/CrashRecoveryContext.h b/llvm/include/llvm/Support/CrashRecoveryContext.h index f60e7335e197..26ddf97b3ef0 100644 --- a/llvm/include/llvm/Support/CrashRecoveryContext.h +++ b/llvm/include/llvm/Support/CrashRecoveryContext.h @@ -101,6 +101,9 @@ public: /// return failure from RunSafely(). This function does not return. [[noreturn]] void HandleExit(int RetCode); + /// Return true if RetCode indicates that a signal or an exception occurred. + static bool isCrash(int RetCode); + /// Throw again a signal or an exception, after it was catched once by a /// CrashRecoveryContext. static bool throwIfCrash(int RetCode); diff --git a/llvm/include/llvm/Support/Debug.h b/llvm/include/llvm/Support/Debug.h index 2ff978476c79..5788ab3b2138 100644 --- a/llvm/include/llvm/Support/Debug.h +++ b/llvm/include/llvm/Support/Debug.h @@ -67,8 +67,8 @@ void setCurrentDebugTypes(const char **Types, unsigned Count); #else #define isCurrentDebugType(X) (false) -#define setCurrentDebugType(X) -#define setCurrentDebugTypes(X, N) +#define setCurrentDebugType(X) do { (void)(X); } while (false) +#define setCurrentDebugTypes(X, N) do { (void)(X); (void)(N); } while (false) #define DEBUG_WITH_TYPE(TYPE, X) do { } while (false) #endif diff --git a/llvm/include/llvm/Support/Errno.h b/llvm/include/llvm/Support/Errno.h index 07df6765d9db..e095c66b9086 100644 --- a/llvm/include/llvm/Support/Errno.h +++ b/llvm/include/llvm/Support/Errno.h @@ -15,7 +15,6 @@ #include #include -#include namespace llvm { namespace sys { diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h index 881049b15b0d..1a801b6f2c7a 100644 --- a/llvm/include/llvm/Support/Error.h +++ b/llvm/include/llvm/Support/Error.h @@ -1269,7 +1269,7 @@ public: void log(raw_ostream &OS) const override { assert(Err && "Trying to log after takeError()."); OS << "'" << FileName << "': "; - if (Line.hasValue()) + if (Line) OS << "line " << Line.getValue() << ": "; Err->log(OS); } @@ -1281,7 +1281,7 @@ public: return OS.str(); } - StringRef getFileName() { return FileName; } + StringRef getFileName() const { return FileName; } Error takeError() { return Error(std::move(Err)); } diff --git a/llvm/include/llvm/Support/ErrorHandling.h b/llvm/include/llvm/Support/ErrorHandling.h index f980510d37f0..004b3b7868fb 100644 --- a/llvm/include/llvm/Support/ErrorHandling.h +++ b/llvm/include/llvm/Support/ErrorHandling.h @@ -124,19 +124,30 @@ llvm_unreachable_internal(const char *msg = nullptr, const char *file = nullptr, /// Marks that the current location is not supposed to be reachable. /// In !NDEBUG builds, prints the message and location info to stderr. -/// In NDEBUG builds, becomes an optimizer hint that the current location -/// is not supposed to be reachable. On compilers that don't support -/// such hints, prints a reduced message instead and aborts the program. +/// In NDEBUG builds, if the platform does not support a builtin unreachable +/// then we call an internal LLVM runtime function. Otherwise the behavior is +/// controlled by the CMake flag +/// -DLLVM_UNREACHABLE_OPTIMIZE +/// * When "ON" (default) llvm_unreachable() becomes an optimizer hint +/// that the current location is not supposed to be reachable: the hint +/// turns such code path into undefined behavior. On compilers that don't +/// support such hints, prints a reduced message instead and aborts the +/// program. +/// * When "OFF", a builtin_trap is emitted instead of an +// optimizer hint or printing a reduced message. /// -/// Use this instead of assert(0). It conveys intent more clearly and -/// allows compilers to omit some unnecessary code. +/// Use this instead of assert(0). It conveys intent more clearly, suppresses +/// diagnostics for unreachable code paths, and allows compilers to omit +/// unnecessary code. #ifndef NDEBUG #define llvm_unreachable(msg) \ ::llvm::llvm_unreachable_internal(msg, __FILE__, __LINE__) -#elif defined(LLVM_BUILTIN_UNREACHABLE) +#elif !defined(LLVM_BUILTIN_UNREACHABLE) +#define llvm_unreachable(msg) ::llvm::llvm_unreachable_internal() +#elif LLVM_UNREACHABLE_OPTIMIZE #define llvm_unreachable(msg) LLVM_BUILTIN_UNREACHABLE #else -#define llvm_unreachable(msg) ::llvm::llvm_unreachable_internal() +#define llvm_unreachable(msg) LLVM_BUILTIN_TRAP, LLVM_BUILTIN_UNREACHABLE #endif #endif diff --git a/llvm/include/llvm/Support/FileUtilities.h b/llvm/include/llvm/Support/FileUtilities.h index f8a37fe1177d..0033638c6804 100644 --- a/llvm/include/llvm/Support/FileUtilities.h +++ b/llvm/include/llvm/Support/FileUtilities.h @@ -110,6 +110,27 @@ namespace llvm { llvm::Error writeFileAtomically(StringRef TempPathModel, StringRef FinalPath, std::function Writer); + + /// FilePermssionsApplier helps to copy permissions from an input file to + /// an output one. It memorizes the status of the input file and can apply + /// permissions and dates to the output file. + class FilePermissionsApplier { + public: + static Expected create(StringRef InputFilename); + + /// Apply stored permissions to the \p OutputFilename. + /// Copy LastAccess and ModificationTime if \p CopyDates is true. + /// Overwrite stored permissions if \p OverwritePermissions is specified. + Error apply(StringRef OutputFilename, bool CopyDates = false, + Optional OverwritePermissions = None); + + private: + FilePermissionsApplier(StringRef InputFilename, sys::fs::file_status Status) + : InputFilename(InputFilename), InputStatus(Status) {} + + StringRef InputFilename; + sys::fs::file_status InputStatus; + }; } // End llvm namespace #endif diff --git a/llvm/include/llvm/Support/FormatProviders.h b/llvm/include/llvm/Support/FormatProviders.h index 3edd8844bc7a..8101ed7968ad 100644 --- a/llvm/include/llvm/Support/FormatProviders.h +++ b/llvm/include/llvm/Support/FormatProviders.h @@ -313,7 +313,7 @@ struct format_provider Precision = parseNumericPrecision(Style); - if (!Precision.hasValue()) + if (!Precision) Precision = getDefaultPrecision(S); write_double(Stream, static_cast(V), S, Precision); diff --git a/llvm/include/llvm/Support/FormatVariadic.h b/llvm/include/llvm/Support/FormatVariadic.h index a872afb5e45e..c1707b4fe9cb 100644 --- a/llvm/include/llvm/Support/FormatVariadic.h +++ b/llvm/include/llvm/Support/FormatVariadic.h @@ -172,7 +172,7 @@ public: // Formats textual output. `Fmt` is a string consisting of one or more // replacement sequences with the following grammar: // -// rep_field ::= "{" [index] ["," layout] [":" format] "}" +// rep_field ::= "{" index ["," layout] [":" format] "}" // index ::= // layout ::= [[[char]loc]width] // format ::= diff --git a/llvm/include/llvm/Support/HashBuilder.h b/llvm/include/llvm/Support/HashBuilder.h index bf93a0d22da7..9d7680d2b667 100644 --- a/llvm/include/llvm/Support/HashBuilder.h +++ b/llvm/include/llvm/Support/HashBuilder.h @@ -39,6 +39,9 @@ struct IsHashableData /// Declares the hasher member, and functions forwarding directly to the hasher. template class HashBuilderBase { public: + template + using HashResultTy = decltype(std::declval().final()); + HasherT &getHasher() { return Hasher; } /// Forward to `HasherT::update(ArrayRef)`. @@ -59,12 +62,12 @@ public: } /// Forward to `HasherT::final()` if available. - template StringRef final() { + template HashResultTy final() { return this->getHasher().final(); } /// Forward to `HasherT::result()` if available. - template StringRef result() { + template HashResultTy result() { return this->getHasher().result(); } diff --git a/llvm/include/llvm/Support/Host.h b/llvm/include/llvm/Support/Host.h index b3c15f0683b9..f683371ad1d3 100644 --- a/llvm/include/llvm/Support/Host.h +++ b/llvm/include/llvm/Support/Host.h @@ -64,6 +64,7 @@ namespace sys { StringRef getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForARM(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForS390x(StringRef ProcCpuinfoContent); + StringRef getHostCPUNameForRISCV(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForBPF(); /// Helper functions to extract CPU details from CPUID on x86. diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index 96b7753e9b20..84e095e2bbab 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -324,7 +324,7 @@ public: /// Compute known bits resulting from multiplying LHS and RHS. static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, - bool SelfMultiply = false); + bool NoUndefSelfMultiply = false); /// Compute known bits from sign-extended multiply-hi. static KnownBits mulhs(const KnownBits &LHS, const KnownBits &RHS); @@ -415,6 +415,12 @@ public: return KnownBits(Zero.reverseBits(), One.reverseBits()); } + bool operator==(const KnownBits &Other) const { + return Zero == Other.Zero && One == Other.One; + } + + bool operator!=(const KnownBits &Other) const { return !(*this == Other); } + void print(raw_ostream &OS) const; void dump() const; }; diff --git a/llvm/include/llvm/Support/LowLevelTypeImpl.h b/llvm/include/llvm/Support/LowLevelTypeImpl.h index dd286f5228fe..186a7e5930ec 100644 --- a/llvm/include/llvm/Support/LowLevelTypeImpl.h +++ b/llvm/include/llvm/Support/LowLevelTypeImpl.h @@ -207,6 +207,18 @@ public: return scalar(getScalarSizeInBits() / Factor); } + /// Produce a vector type that is \p Factor times bigger, preserving the + /// element type. For a scalar or pointer, this will produce a new vector with + /// \p Factor elements. + LLT multiplyElements(int Factor) const { + if (isVector()) { + return scalarOrVector(getElementCount().multiplyCoefficientBy(Factor), + getElementType()); + } + + return fixed_vector(Factor, *this); + } + bool isByteSized() const { return getSizeInBits().isKnownMultipleOf(8); } unsigned getScalarSizeInBits() const { diff --git a/llvm/include/llvm/Support/MD5.h b/llvm/include/llvm/Support/MD5.h index 70d046601346..fa2f477261dd 100644 --- a/llvm/include/llvm/Support/MD5.h +++ b/llvm/include/llvm/Support/MD5.h @@ -40,26 +40,19 @@ template class ArrayRef; class MD5 { public: - struct MD5Result { - std::array Bytes; - - operator std::array() const { return Bytes; } - - const uint8_t &operator[](size_t I) const { return Bytes[I]; } - uint8_t &operator[](size_t I) { return Bytes[I]; } - + struct MD5Result : public std::array { SmallString<32> digest() const; uint64_t low() const { // Our MD5 implementation returns the result in little endian, so the low // word is first. using namespace support; - return endian::read(Bytes.data()); + return endian::read(data()); } uint64_t high() const { using namespace support; - return endian::read(Bytes.data() + 8); + return endian::read(data() + 8); } std::pair words() const { using namespace support; @@ -78,20 +71,20 @@ public: /// Finishes off the hash and puts the result in result. void final(MD5Result &Result); - /// Finishes off the hash, and returns a reference to the 16-byte hash data. - StringRef final(); + /// Finishes off the hash, and returns the 16-byte hash data. + MD5Result final(); - /// Finishes off the hash, and returns a reference to the 16-byte hash data. + /// Finishes off the hash, and returns the 16-byte hash data. /// This is suitable for getting the MD5 at any time without invalidating the /// internal state, so that more calls can be made into `update`. - StringRef result(); + MD5Result result(); /// Translates the bytes in \p Res to a hex string that is /// deposited into \p Str. The result will be of length 32. static void stringifyResult(MD5Result &Result, SmallVectorImpl &Str); /// Computes the hash for a given bytes. - static std::array hash(ArrayRef Data); + static MD5Result hash(ArrayRef Data); private: // Any 32-bit or wider unsigned integer data type will do. @@ -109,15 +102,9 @@ private: MD5_u32plus block[16]; } InternalState; - MD5Result Result; - const uint8_t *body(ArrayRef Data); }; -inline bool operator==(const MD5::MD5Result &LHS, const MD5::MD5Result &RHS) { - return LHS.Bytes == RHS.Bytes; -} - /// Helper to compute and return lower 64 bits of the given string's MD5 hash. inline uint64_t MD5Hash(StringRef Str) { using namespace support; diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h index 643c2d8ce981..5355c50bb762 100644 --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -41,143 +41,149 @@ namespace llvm { // ValueTypes.td as well! Other = 1, // This is a non-standard value i1 = 2, // This is a 1 bit integer value - i8 = 3, // This is an 8 bit integer value - i16 = 4, // This is a 16 bit integer value - i32 = 5, // This is a 32 bit integer value - i64 = 6, // This is a 64 bit integer value - i128 = 7, // This is a 128 bit integer value + i2 = 3, // This is a 2 bit integer value + i4 = 4, // This is a 4 bit integer value + i8 = 5, // This is an 8 bit integer value + i16 = 6, // This is a 16 bit integer value + i32 = 7, // This is a 32 bit integer value + i64 = 8, // This is a 64 bit integer value + i128 = 9, // This is a 128 bit integer value FIRST_INTEGER_VALUETYPE = i1, LAST_INTEGER_VALUETYPE = i128, - bf16 = 8, // This is a 16 bit brain floating point value - f16 = 9, // This is a 16 bit floating point value - f32 = 10, // This is a 32 bit floating point value - f64 = 11, // This is a 64 bit floating point value - f80 = 12, // This is a 80 bit floating point value - f128 = 13, // This is a 128 bit floating point value - ppcf128 = 14, // This is a PPC 128-bit floating point value + bf16 = 10, // This is a 16 bit brain floating point value + f16 = 11, // This is a 16 bit floating point value + f32 = 12, // This is a 32 bit floating point value + f64 = 13, // This is a 64 bit floating point value + f80 = 14, // This is a 80 bit floating point value + f128 = 15, // This is a 128 bit floating point value + ppcf128 = 16, // This is a PPC 128-bit floating point value FIRST_FP_VALUETYPE = bf16, LAST_FP_VALUETYPE = ppcf128, - v1i1 = 15, // 1 x i1 - v2i1 = 16, // 2 x i1 - v4i1 = 17, // 4 x i1 - v8i1 = 18, // 8 x i1 - v16i1 = 19, // 16 x i1 - v32i1 = 20, // 32 x i1 - v64i1 = 21, // 64 x i1 - v128i1 = 22, // 128 x i1 - v256i1 = 23, // 256 x i1 - v512i1 = 24, // 512 x i1 - v1024i1 = 25, // 1024 x i1 - - v1i8 = 26, // 1 x i8 - v2i8 = 27, // 2 x i8 - v4i8 = 28, // 4 x i8 - v8i8 = 29, // 8 x i8 - v16i8 = 30, // 16 x i8 - v32i8 = 31, // 32 x i8 - v64i8 = 32, // 64 x i8 - v128i8 = 33, // 128 x i8 - v256i8 = 34, // 256 x i8 - v512i8 = 35, // 512 x i8 - v1024i8 = 36, // 1024 x i8 - - v1i16 = 37, // 1 x i16 - v2i16 = 38, // 2 x i16 - v3i16 = 39, // 3 x i16 - v4i16 = 40, // 4 x i16 - v8i16 = 41, // 8 x i16 - v16i16 = 42, // 16 x i16 - v32i16 = 43, // 32 x i16 - v64i16 = 44, // 64 x i16 - v128i16 = 45, // 128 x i16 - v256i16 = 46, // 256 x i16 - v512i16 = 47, // 512 x i16 - - v1i32 = 48, // 1 x i32 - v2i32 = 49, // 2 x i32 - v3i32 = 50, // 3 x i32 - v4i32 = 51, // 4 x i32 - v5i32 = 52, // 5 x i32 - v6i32 = 53, // 6 x i32 - v7i32 = 54, // 7 x i32 - v8i32 = 55, // 8 x i32 - v16i32 = 56, // 16 x i32 - v32i32 = 57, // 32 x i32 - v64i32 = 58, // 64 x i32 - v128i32 = 59, // 128 x i32 - v256i32 = 60, // 256 x i32 - v512i32 = 61, // 512 x i32 - v1024i32 = 62, // 1024 x i32 - v2048i32 = 63, // 2048 x i32 - - v1i64 = 64, // 1 x i64 - v2i64 = 65, // 2 x i64 - v3i64 = 66, // 3 x i64 - v4i64 = 67, // 4 x i64 - v8i64 = 68, // 8 x i64 - v16i64 = 69, // 16 x i64 - v32i64 = 70, // 32 x i64 - v64i64 = 71, // 64 x i64 - v128i64 = 72, // 128 x i64 - v256i64 = 73, // 256 x i64 - - v1i128 = 74, // 1 x i128 + v1i1 = 17, // 1 x i1 + v2i1 = 18, // 2 x i1 + v4i1 = 19, // 4 x i1 + v8i1 = 20, // 8 x i1 + v16i1 = 21, // 16 x i1 + v32i1 = 22, // 32 x i1 + v64i1 = 23, // 64 x i1 + v128i1 = 24, // 128 x i1 + v256i1 = 25, // 256 x i1 + v512i1 = 26, // 512 x i1 + v1024i1 = 27, // 1024 x i1 + + v128i2 = 28, // 128 x i2 + + v64i4 = 29, // 64 x i4 + + v1i8 = 30, // 1 x i8 + v2i8 = 31, // 2 x i8 + v4i8 = 32, // 4 x i8 + v8i8 = 33, // 8 x i8 + v16i8 = 34, // 16 x i8 + v32i8 = 35, // 32 x i8 + v64i8 = 36, // 64 x i8 + v128i8 = 37, // 128 x i8 + v256i8 = 38, // 256 x i8 + v512i8 = 39, // 512 x i8 + v1024i8 = 40, // 1024 x i8 + + v1i16 = 41, // 1 x i16 + v2i16 = 42, // 2 x i16 + v3i16 = 43, // 3 x i16 + v4i16 = 44, // 4 x i16 + v8i16 = 45, // 8 x i16 + v16i16 = 46, // 16 x i16 + v32i16 = 47, // 32 x i16 + v64i16 = 48, // 64 x i16 + v128i16 = 49, // 128 x i16 + v256i16 = 50, // 256 x i16 + v512i16 = 51, // 512 x i16 + + v1i32 = 52, // 1 x i32 + v2i32 = 53, // 2 x i32 + v3i32 = 54, // 3 x i32 + v4i32 = 55, // 4 x i32 + v5i32 = 56, // 5 x i32 + v6i32 = 57, // 6 x i32 + v7i32 = 58, // 7 x i32 + v8i32 = 59, // 8 x i32 + v16i32 = 60, // 16 x i32 + v32i32 = 61, // 32 x i32 + v64i32 = 62, // 64 x i32 + v128i32 = 63, // 128 x i32 + v256i32 = 64, // 256 x i32 + v512i32 = 65, // 512 x i32 + v1024i32 = 66, // 1024 x i32 + v2048i32 = 67, // 2048 x i32 + + v1i64 = 68, // 1 x i64 + v2i64 = 69, // 2 x i64 + v3i64 = 70, // 3 x i64 + v4i64 = 71, // 4 x i64 + v8i64 = 72, // 8 x i64 + v16i64 = 73, // 16 x i64 + v32i64 = 74, // 32 x i64 + v64i64 = 75, // 64 x i64 + v128i64 = 76, // 128 x i64 + v256i64 = 77, // 256 x i64 + + v1i128 = 78, // 1 x i128 FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128, - v1f16 = 75, // 1 x f16 - v2f16 = 76, // 2 x f16 - v3f16 = 77, // 3 x f16 - v4f16 = 78, // 4 x f16 - v8f16 = 79, // 8 x f16 - v16f16 = 80, // 16 x f16 - v32f16 = 81, // 32 x f16 - v64f16 = 82, // 64 x f16 - v128f16 = 83, // 128 x f16 - v256f16 = 84, // 256 x f16 - v512f16 = 85, // 256 x f16 - - v2bf16 = 86, // 2 x bf16 - v3bf16 = 87, // 3 x bf16 - v4bf16 = 88, // 4 x bf16 - v8bf16 = 89, // 8 x bf16 - v16bf16 = 90, // 16 x bf16 - v32bf16 = 91, // 32 x bf16 - v64bf16 = 92, // 64 x bf16 - v128bf16 = 93, // 128 x bf16 - - v1f32 = 94, // 1 x f32 - v2f32 = 95, // 2 x f32 - v3f32 = 96, // 3 x f32 - v4f32 = 97, // 4 x f32 - v5f32 = 98, // 5 x f32 - v6f32 = 99, // 6 x f32 - v7f32 = 100, // 7 x f32 - v8f32 = 101, // 8 x f32 - v16f32 = 102, // 16 x f32 - v32f32 = 103, // 32 x f32 - v64f32 = 104, // 64 x f32 - v128f32 = 105, // 128 x f32 - v256f32 = 106, // 256 x f32 - v512f32 = 107, // 512 x f32 - v1024f32 = 108, // 1024 x f32 - v2048f32 = 109, // 2048 x f32 - - v1f64 = 110, // 1 x f64 - v2f64 = 111, // 2 x f64 - v3f64 = 112, // 3 x f64 - v4f64 = 113, // 4 x f64 - v8f64 = 114, // 8 x f64 - v16f64 = 115, // 16 x f64 - v32f64 = 116, // 32 x f64 - v64f64 = 117, // 64 x f64 - v128f64 = 118, // 128 x f64 - v256f64 = 119, // 256 x f64 + v1f16 = 79, // 1 x f16 + v2f16 = 80, // 2 x f16 + v3f16 = 81, // 3 x f16 + v4f16 = 82, // 4 x f16 + v8f16 = 83, // 8 x f16 + v16f16 = 84, // 16 x f16 + v32f16 = 85, // 32 x f16 + v64f16 = 86, // 64 x f16 + v128f16 = 87, // 128 x f16 + v256f16 = 88, // 256 x f16 + v512f16 = 89, // 256 x f16 + + v2bf16 = 90, // 2 x bf16 + v3bf16 = 91, // 3 x bf16 + v4bf16 = 92, // 4 x bf16 + v8bf16 = 93, // 8 x bf16 + v16bf16 = 94, // 16 x bf16 + v32bf16 = 95, // 32 x bf16 + v64bf16 = 96, // 64 x bf16 + v128bf16 = 97, // 128 x bf16 + + v1f32 = 98, // 1 x f32 + v2f32 = 99, // 2 x f32 + v3f32 = 100, // 3 x f32 + v4f32 = 101, // 4 x f32 + v5f32 = 102, // 5 x f32 + v6f32 = 103, // 6 x f32 + v7f32 = 104, // 7 x f32 + v8f32 = 105, // 8 x f32 + v16f32 = 106, // 16 x f32 + v32f32 = 107, // 32 x f32 + v64f32 = 108, // 64 x f32 + v128f32 = 109, // 128 x f32 + v256f32 = 110, // 256 x f32 + v512f32 = 111, // 512 x f32 + v1024f32 = 112, // 1024 x f32 + v2048f32 = 113, // 2048 x f32 + + v1f64 = 114, // 1 x f64 + v2f64 = 115, // 2 x f64 + v3f64 = 116, // 3 x f64 + v4f64 = 117, // 4 x f64 + v8f64 = 118, // 8 x f64 + v16f64 = 119, // 16 x f64 + v32f64 = 120, // 32 x f64 + v64f64 = 121, // 64 x f64 + v128f64 = 122, // 128 x f64 + v256f64 = 123, // 256 x f64 FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v1f16, LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v256f64, @@ -185,68 +191,70 @@ namespace llvm { FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_FIXEDLEN_VECTOR_VALUETYPE = v256f64, - nxv1i1 = 120, // n x 1 x i1 - nxv2i1 = 121, // n x 2 x i1 - nxv4i1 = 122, // n x 4 x i1 - nxv8i1 = 123, // n x 8 x i1 - nxv16i1 = 124, // n x 16 x i1 - nxv32i1 = 125, // n x 32 x i1 - nxv64i1 = 126, // n x 64 x i1 - - nxv1i8 = 127, // n x 1 x i8 - nxv2i8 = 128, // n x 2 x i8 - nxv4i8 = 129, // n x 4 x i8 - nxv8i8 = 130, // n x 8 x i8 - nxv16i8 = 131, // n x 16 x i8 - nxv32i8 = 132, // n x 32 x i8 - nxv64i8 = 133, // n x 64 x i8 - - nxv1i16 = 134, // n x 1 x i16 - nxv2i16 = 135, // n x 2 x i16 - nxv4i16 = 136, // n x 4 x i16 - nxv8i16 = 137, // n x 8 x i16 - nxv16i16 = 138, // n x 16 x i16 - nxv32i16 = 139, // n x 32 x i16 - - nxv1i32 = 140, // n x 1 x i32 - nxv2i32 = 141, // n x 2 x i32 - nxv4i32 = 142, // n x 4 x i32 - nxv8i32 = 143, // n x 8 x i32 - nxv16i32 = 144, // n x 16 x i32 - nxv32i32 = 145, // n x 32 x i32 - - nxv1i64 = 146, // n x 1 x i64 - nxv2i64 = 147, // n x 2 x i64 - nxv4i64 = 148, // n x 4 x i64 - nxv8i64 = 149, // n x 8 x i64 - nxv16i64 = 150, // n x 16 x i64 - nxv32i64 = 151, // n x 32 x i64 + nxv1i1 = 124, // n x 1 x i1 + nxv2i1 = 125, // n x 2 x i1 + nxv4i1 = 126, // n x 4 x i1 + nxv8i1 = 127, // n x 8 x i1 + nxv16i1 = 128, // n x 16 x i1 + nxv32i1 = 129, // n x 32 x i1 + nxv64i1 = 130, // n x 64 x i1 + + nxv1i8 = 131, // n x 1 x i8 + nxv2i8 = 132, // n x 2 x i8 + nxv4i8 = 133, // n x 4 x i8 + nxv8i8 = 134, // n x 8 x i8 + nxv16i8 = 135, // n x 16 x i8 + nxv32i8 = 136, // n x 32 x i8 + nxv64i8 = 137, // n x 64 x i8 + + nxv1i16 = 138, // n x 1 x i16 + nxv2i16 = 139, // n x 2 x i16 + nxv4i16 = 140, // n x 4 x i16 + nxv8i16 = 141, // n x 8 x i16 + nxv16i16 = 142, // n x 16 x i16 + nxv32i16 = 143, // n x 32 x i16 + + nxv1i32 = 144, // n x 1 x i32 + nxv2i32 = 145, // n x 2 x i32 + nxv4i32 = 146, // n x 4 x i32 + nxv8i32 = 147, // n x 8 x i32 + nxv16i32 = 148, // n x 16 x i32 + nxv32i32 = 149, // n x 32 x i32 + + nxv1i64 = 150, // n x 1 x i64 + nxv2i64 = 151, // n x 2 x i64 + nxv4i64 = 152, // n x 4 x i64 + nxv8i64 = 153, // n x 8 x i64 + nxv16i64 = 154, // n x 16 x i64 + nxv32i64 = 155, // n x 32 x i64 FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1, LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64, - nxv1f16 = 152, // n x 1 x f16 - nxv2f16 = 153, // n x 2 x f16 - nxv4f16 = 154, // n x 4 x f16 - nxv8f16 = 155, // n x 8 x f16 - nxv16f16 = 156, // n x 16 x f16 - nxv32f16 = 157, // n x 32 x f16 - - nxv1bf16 = 158, // n x 1 x bf16 - nxv2bf16 = 159, // n x 2 x bf16 - nxv4bf16 = 160, // n x 4 x bf16 - nxv8bf16 = 161, // n x 8 x bf16 - - nxv1f32 = 162, // n x 1 x f32 - nxv2f32 = 163, // n x 2 x f32 - nxv4f32 = 164, // n x 4 x f32 - nxv8f32 = 165, // n x 8 x f32 - nxv16f32 = 166, // n x 16 x f32 - - nxv1f64 = 167, // n x 1 x f64 - nxv2f64 = 168, // n x 2 x f64 - nxv4f64 = 169, // n x 4 x f64 - nxv8f64 = 170, // n x 8 x f64 + nxv1f16 = 156, // n x 1 x f16 + nxv2f16 = 157, // n x 2 x f16 + nxv4f16 = 158, // n x 4 x f16 + nxv8f16 = 159, // n x 8 x f16 + nxv16f16 = 160, // n x 16 x f16 + nxv32f16 = 161, // n x 32 x f16 + + nxv1bf16 = 162, // n x 1 x bf16 + nxv2bf16 = 163, // n x 2 x bf16 + nxv4bf16 = 164, // n x 4 x bf16 + nxv8bf16 = 165, // n x 8 x bf16 + nxv16bf16 = 166, // n x 16 x bf16 + nxv32bf16 = 167, // n x 32 x bf16 + + nxv1f32 = 168, // n x 1 x f32 + nxv2f32 = 169, // n x 2 x f32 + nxv4f32 = 170, // n x 4 x f32 + nxv8f32 = 171, // n x 8 x f32 + nxv16f32 = 172, // n x 16 x f32 + + nxv1f64 = 173, // n x 1 x f64 + nxv2f64 = 174, // n x 2 x f64 + nxv4f64 = 175, // n x 4 x f64 + nxv8f64 = 176, // n x 8 x f64 FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv1f16, LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64, @@ -257,20 +265,20 @@ namespace llvm { FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 171, // This is an X86 MMX value + x86mmx = 177, // This is an X86 MMX value - Glue = 172, // This glues nodes together during pre-RA sched + Glue = 178, // This glues nodes together during pre-RA sched - isVoid = 173, // This has no value + isVoid = 179, // This has no value - Untyped = 174, // This value takes a register, but has + Untyped = 180, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - funcref = 175, // WebAssembly's funcref type - externref = 176, // WebAssembly's externref type - x86amx = 177, // This is an X86 AMX value - i64x8 = 178, // 8 Consecutive GPRs (AArch64) + funcref = 181, // WebAssembly's funcref type + externref = 182, // WebAssembly's externref type + x86amx = 183, // This is an X86 AMX value + i64x8 = 184, // 8 Consecutive GPRs (AArch64) FIRST_VALUETYPE = 1, // This is always the beginning of the list. LAST_VALUETYPE = i64x8, // This always remains at the end of the list. @@ -415,10 +423,11 @@ namespace llvm { /// Return true if this is a 256-bit vector type. bool is256BitVector() const { return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v16bf16 || - SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 || - SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 || - SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64 || - SimpleTy == MVT::v256i1); + SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 || + SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 || + SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64 || + SimpleTy == MVT::v256i1 || SimpleTy == MVT::v128i2 || + SimpleTy == MVT::v64i4); } /// Return true if this is a 512-bit vector type. @@ -517,6 +526,7 @@ namespace llvm { } MVT getVectorElementType() const { + // clang-format off switch (SimpleTy) { default: llvm_unreachable("Not a vector MVT!"); @@ -538,6 +548,8 @@ namespace llvm { case nxv16i1: case nxv32i1: case nxv64i1: return i1; + case v128i2: return i2; + case v64i4: return i4; case v1i8: case v2i8: case v4i8: @@ -640,7 +652,9 @@ namespace llvm { case nxv1bf16: case nxv2bf16: case nxv4bf16: - case nxv8bf16: return bf16; + case nxv8bf16: + case nxv16bf16: + case nxv32bf16: return bf16; case v1f32: case v2f32: case v3f32: @@ -677,6 +691,7 @@ namespace llvm { case nxv4f64: case nxv8f64: return f64; } + // clang-format on } /// Given a vector type, return the minimum number of elements it contains. @@ -705,6 +720,7 @@ namespace llvm { case v256f32: case v256f64: return 256; case v128i1: + case v128i2: case v128i8: case v128i16: case v128i32: @@ -714,6 +730,7 @@ namespace llvm { case v128f32: case v128f64: return 128; case v64i1: + case v64i4: case v64i8: case v64i16: case v64i32: @@ -738,7 +755,8 @@ namespace llvm { case nxv32i16: case nxv32i32: case nxv32i64: - case nxv32f16: return 32; + case nxv32f16: + case nxv32bf16: return 32; case v16i1: case v16i8: case v16i16: @@ -754,6 +772,7 @@ namespace llvm { case nxv16i32: case nxv16i64: case nxv16f16: + case nxv16bf16: case nxv16f32: return 16; case v8i1: case v8i8: @@ -883,8 +902,10 @@ namespace llvm { case i1: case v1i1: return TypeSize::Fixed(1); case nxv1i1: return TypeSize::Scalable(1); + case i2: case v2i1: return TypeSize::Fixed(2); case nxv2i1: return TypeSize::Scalable(2); + case i4: case v4i1: return TypeSize::Fixed(4); case nxv4i1: return TypeSize::Scalable(4); case i8 : @@ -977,6 +998,8 @@ namespace llvm { case v7i32: case v7f32: return TypeSize::Fixed(224); case v256i1: + case v128i2: + case v64i4: case v32i8: case v16i16: case v8i32: @@ -990,6 +1013,7 @@ namespace llvm { case nxv8i32: case nxv4i64: case nxv16f16: + case nxv16bf16: case nxv8f32: case nxv4f64: return TypeSize::Scalable(256); case i64x8: @@ -1007,6 +1031,7 @@ namespace llvm { case nxv16i32: case nxv8i64: case nxv32f16: + case nxv32bf16: case nxv16f32: case nxv8f64: return TypeSize::Scalable(512); case v1024i1: @@ -1078,6 +1103,12 @@ namespace llvm { return {(BaseSize.getKnownMinSize() + 7) / 8, BaseSize.isScalable()}; } + // Return the number of bytes overwritten by a store of this value type or + // this value type's element type in the case of a vector. + uint64_t getScalarStoreSize() const { + return getScalarType().getStoreSize().getFixedSize(); + } + /// Return the number of bits overwritten by a store of the specified value /// type. /// @@ -1165,6 +1196,10 @@ namespace llvm { return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); case 1: return MVT::i1; + case 2: + return MVT::i2; + case 4: + return MVT::i4; case 8: return MVT::i8; case 16: @@ -1179,6 +1214,7 @@ namespace llvm { } static MVT getVectorVT(MVT VT, unsigned NumElements) { + // clang-format off switch (VT.SimpleTy) { default: break; @@ -1195,6 +1231,12 @@ namespace llvm { if (NumElements == 512) return MVT::v512i1; if (NumElements == 1024) return MVT::v1024i1; break; + case MVT::i2: + if (NumElements == 128) return MVT::v128i2; + break; + case MVT::i4: + if (NumElements == 64) return MVT::v64i4; + break; case MVT::i8: if (NumElements == 1) return MVT::v1i8; if (NumElements == 2) return MVT::v2i8; @@ -1309,6 +1351,7 @@ namespace llvm { break; } return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); + // clang-format on } static MVT getScalableVectorVT(MVT VT, unsigned NumElements) { @@ -1370,6 +1413,8 @@ namespace llvm { if (NumElements == 2) return MVT::nxv2bf16; if (NumElements == 4) return MVT::nxv4bf16; if (NumElements == 8) return MVT::nxv8bf16; + if (NumElements == 16) return MVT::nxv16bf16; + if (NumElements == 32) return MVT::nxv32bf16; break; case MVT::f32: if (NumElements == 1) return MVT::nxv1f32; diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index 753b1998c40c..8079aa436933 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -571,6 +571,33 @@ inline unsigned countPopulation(T Value) { return detail::PopulationCounter::count(Value); } +/// Return true if the argument contains a non-empty sequence of ones with the +/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. +/// If true, \p MaskIdx will specify the index of the lowest set bit and \p +/// MaskLen is updated to specify the length of the mask, else neither are +/// updated. +inline bool isShiftedMask_32(uint32_t Value, unsigned &MaskIdx, + unsigned &MaskLen) { + if (!isShiftedMask_32(Value)) + return false; + MaskIdx = countTrailingZeros(Value); + MaskLen = countPopulation(Value); + return true; +} + +/// Return true if the argument contains a non-empty sequence of ones with the +/// remainder zero (64 bit version.) If true, \p MaskIdx will specify the index +/// of the lowest set bit and \p MaskLen is updated to specify the length of the +/// mask, else neither are updated. +inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx, + unsigned &MaskLen) { + if (!isShiftedMask_64(Value)) + return false; + MaskIdx = countTrailingZeros(Value); + MaskLen = countPopulation(Value); + return true; +} + /// Compile time Log2. /// Valid only for positive powers of two. template constexpr inline size_t CTLog2() { @@ -680,7 +707,7 @@ constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { /// Returns the next power of two (in 64-bits) that is strictly greater than A. /// Returns zero on overflow. -inline uint64_t NextPowerOf2(uint64_t A) { +constexpr inline uint64_t NextPowerOf2(uint64_t A) { A |= (A >> 1); A |= (A >> 2); A |= (A >> 4); @@ -708,27 +735,34 @@ inline uint64_t PowerOf2Ceil(uint64_t A) { /// Returns the next integer (mod 2**64) that is greater than or equal to /// \p Value and is a multiple of \p Align. \p Align must be non-zero. /// -/// If non-zero \p Skew is specified, the return value will be a minimal -/// integer that is greater than or equal to \p Value and equal to -/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than -/// \p Align, its value is adjusted to '\p Skew mod \p Align'. -/// /// Examples: /// \code /// alignTo(5, 8) = 8 /// alignTo(17, 8) = 24 /// alignTo(~0LL, 8) = 0 /// alignTo(321, 255) = 510 +/// \endcode +inline uint64_t alignTo(uint64_t Value, uint64_t Align) { + assert(Align != 0u && "Align can't be 0."); + return (Value + Align - 1) / Align * Align; +} + +/// If non-zero \p Skew is specified, the return value will be a minimal integer +/// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for +/// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p +/// Skew mod \p A'. \p Align must be non-zero. /// +/// Examples: +/// \code /// alignTo(5, 8, 7) = 7 /// alignTo(17, 8, 1) = 17 /// alignTo(~0LL, 8, 3) = 3 /// alignTo(321, 255, 42) = 552 /// \endcode -inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { +inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew) { assert(Align != 0u && "Align can't be 0."); Skew %= Align; - return (Value + Align - 1 - Skew) / Align * Align + Skew; + return alignTo(Value - Skew, Align) + Skew; } /// Returns the next integer (mod 2**64) that is greater than or equal to @@ -879,7 +913,7 @@ extern const float huge_valf; /// Add two signed integers, computing the two's complement truncated result, -/// returning true if overflow occured. +/// returning true if overflow occurred. template std::enable_if_t::value, T> AddOverflow(T X, T Y, T &Result) { #if __has_builtin(__builtin_add_overflow) diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h index 04caf5eac961..ff113f9b44c4 100644 --- a/llvm/include/llvm/Support/Parallel.h +++ b/llvm/include/llvm/Support/Parallel.h @@ -193,11 +193,11 @@ void parallelSort(RandomAccessIterator Start, RandomAccessIterator End, llvm::sort(Start, End, Comp); } -void parallelForEachN(size_t Begin, size_t End, function_ref Fn); +void parallelFor(size_t Begin, size_t End, function_ref Fn); template void parallelForEach(IterTy Begin, IterTy End, FuncTy Fn) { - parallelForEachN(0, End - Begin, [&](size_t I) { Fn(Begin[I]); }); + parallelFor(0, End - Begin, [&](size_t I) { Fn(Begin[I]); }); } template -#include namespace llvm { namespace sys { diff --git a/llvm/include/llvm/Support/PluginLoader.h b/llvm/include/llvm/Support/PluginLoader.h index 95c087f03d9b..bdd36366d1cf 100644 --- a/llvm/include/llvm/Support/PluginLoader.h +++ b/llvm/include/llvm/Support/PluginLoader.h @@ -31,9 +31,9 @@ namespace llvm { #ifndef DONT_GET_PLUGIN_LOADER_OPTION // This causes operator= above to be invoked for every -load option. - static cl::opt > - LoadOpt("load", cl::ZeroOrMore, cl::value_desc("pluginfilename"), - cl::desc("Load the specified plugin")); + static cl::opt> + LoadOpt("load", cl::value_desc("pluginfilename"), + cl::desc("Load the specified plugin")); #endif } diff --git a/llvm/include/llvm/Support/Printable.h b/llvm/include/llvm/Support/Printable.h index 6403c32aad67..8e76f01f6ba2 100644 --- a/llvm/include/llvm/Support/Printable.h +++ b/llvm/include/llvm/Support/Printable.h @@ -24,12 +24,12 @@ class raw_ostream; /// This class is useful to construct print helpers for raw_ostream. /// /// Example: -/// Printable PrintRegister(unsigned Register) { +/// Printable printRegister(unsigned Register) { /// return Printable([Register](raw_ostream &OS) { /// OS << getRegisterName(Register); -/// } +/// }); /// } -/// ... OS << PrintRegister(Register); ... +/// ... OS << printRegister(Register); ... /// /// Implementation note: Ideally this would just be a typedef, but doing so /// leads to operator << being ambiguous as function has matching constructors @@ -47,6 +47,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Printable &P) { return OS; } -} +} // namespace llvm #endif diff --git a/llvm/include/llvm/Support/Process.h b/llvm/include/llvm/Support/Process.h index ee03efeed9b2..9f56bd9b6e61 100644 --- a/llvm/include/llvm/Support/Process.h +++ b/llvm/include/llvm/Support/Process.h @@ -25,7 +25,6 @@ #define LLVM_SUPPORT_PROCESS_H #include "llvm/ADT/Optional.h" -#include "llvm/Support/AllocatorBase.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Error.h" diff --git a/llvm/include/llvm/Support/Program.h b/llvm/include/llvm/Support/Program.h index f91fca1c4464..4cb55c42c377 100644 --- a/llvm/include/llvm/Support/Program.h +++ b/llvm/include/llvm/Support/Program.h @@ -14,7 +14,6 @@ #define LLVM_SUPPORT_PROGRAM_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/llvm-config.h" @@ -24,6 +23,7 @@ #include namespace llvm { +class BitVector; namespace sys { /// This is the OS-specific separator for PATH like environment variables: diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h index 7fa0e6ee3acf..eac6cc0925fb 100644 --- a/llvm/include/llvm/Support/RISCVISAInfo.h +++ b/llvm/include/llvm/Support/RISCVISAInfo.h @@ -66,6 +66,7 @@ public: bool hasExtension(StringRef Ext) const; std::string toString() const; std::vector toFeatureVector() const; + StringRef computeDefaultABI() const; static bool isSupportedExtensionFeature(StringRef Ext); static bool isSupportedExtension(StringRef Ext); @@ -89,6 +90,7 @@ private: Error checkDependency(); void updateImplication(); + void updateCombination(); void updateFLen(); void updateMinVLen(); void updateMaxELen(); diff --git a/llvm/include/llvm/Support/RWMutex.h b/llvm/include/llvm/Support/RWMutex.h index 33a5d3efffee..3dd962586c36 100644 --- a/llvm/include/llvm/Support/RWMutex.h +++ b/llvm/include/llvm/Support/RWMutex.h @@ -93,8 +93,8 @@ private: /// running in multithreaded mode. template class SmartRWMutex { // shared_mutex (C++17) is more efficient than shared_timed_mutex (C++14) - // on Windows and always available on MSVC. -#if defined(_MSC_VER) || __cplusplus > 201402L + // on Windows and always available on MSVC except with libc++. +#if (defined(_MSC_VER) && !defined(_LIBCPP_VERSION)) || __cplusplus > 201402L std::shared_mutex impl; #else #if !defined(LLVM_USE_RW_MUTEX_IMPL) diff --git a/llvm/include/llvm/Support/SHA1.h b/llvm/include/llvm/Support/SHA1.h index efd8513cc201..ae6d62aed723 100644 --- a/llvm/include/llvm/Support/SHA1.h +++ b/llvm/include/llvm/Support/SHA1.h @@ -36,17 +36,17 @@ public: /// Digest more data. void update(StringRef Str); - /// Return a reference to the current raw 160-bits SHA1 for the digested data + /// Return the current raw 160-bits SHA1 for the digested data /// since the last call to init(). This call will add data to the internal /// state and as such is not suited for getting an intermediate result /// (see result()). - StringRef final(); + std::array final(); - /// Return a reference to the current raw 160-bits SHA1 for the digested data + /// Return the current raw 160-bits SHA1 for the digested data /// since the last call to init(). This is suitable for getting the SHA1 at /// any time without invalidating the internal state so that more calls can be /// made into update. - StringRef result(); + std::array result(); /// Returns a raw 160-bit SHA1 hash for the given data. static std::array hash(ArrayRef Data); @@ -68,14 +68,13 @@ private: uint8_t BufferOffset; } InternalState; - // Internal copy of the hash, populated and accessed on calls to result() - uint32_t HashResult[HASH_LENGTH / 4]; - // Helper void writebyte(uint8_t data); void hashBlock(); void addUncounted(uint8_t data); void pad(); + + void final(std::array &HashResult); }; } // end llvm namespace diff --git a/llvm/include/llvm/Support/SHA256.h b/llvm/include/llvm/Support/SHA256.h index 9e295b0b9fae..68b32c7b4834 100644 --- a/llvm/include/llvm/Support/SHA256.h +++ b/llvm/include/llvm/Support/SHA256.h @@ -43,17 +43,17 @@ public: /// Digest more data. void update(StringRef Str); - /// Return a reference to the current raw 256-bits SHA256 for the digested + /// Return the current raw 256-bits SHA256 for the digested /// data since the last call to init(). This call will add data to the /// internal state and as such is not suited for getting an intermediate /// result (see result()). - StringRef final(); + std::array final(); - /// Return a reference to the current raw 256-bits SHA256 for the digested + /// Return the current raw 256-bits SHA256 for the digested /// data since the last call to init(). This is suitable for getting the /// SHA256 at any time without invalidating the internal state so that more /// calls can be made into update. - StringRef result(); + std::array result(); /// Returns a raw 256-bit SHA256 hash for the given data. static std::array hash(ArrayRef Data); @@ -75,14 +75,13 @@ private: uint8_t BufferOffset; } InternalState; - // Internal copy of the hash, populated and accessed on calls to result() - uint32_t HashResult[HASH_LENGTH / 4]; - // Helper void writebyte(uint8_t data); void hashBlock(); void addUncounted(uint8_t data); void pad(); + + void final(std::array &HashResult); }; } // namespace llvm diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h index 6b5daf710c9f..c9eabfb3788c 100644 --- a/llvm/include/llvm/Support/ScopedPrinter.h +++ b/llvm/include/llvm/Support/ScopedPrinter.h @@ -81,7 +81,6 @@ struct FlagEntry { }; raw_ostream &operator<<(raw_ostream &OS, const HexNumber &Value); -std::string to_hexString(uint64_t Value, bool UpperCase = true); template std::string to_string(const T &Value) { std::string number; @@ -95,7 +94,7 @@ std::string enumToString(T Value, ArrayRef> EnumValues) { for (const EnumEntry &EnumItem : EnumValues) if (EnumItem.Value == Value) return std::string(EnumItem.AltName); - return to_hexString(Value, false); + return utohexstr(Value, true); } class ScopedPrinter { @@ -107,7 +106,7 @@ public: ScopedPrinter(raw_ostream &OS, ScopedPrinterKind Kind = ScopedPrinterKind::Base) - : OS(OS), IndentLevel(0), Kind(Kind) {} + : OS(OS), Kind(Kind) {} ScopedPrinterKind getKind() const { return Kind; } @@ -498,7 +497,7 @@ private: } raw_ostream &OS; - int IndentLevel; + int IndentLevel = 0; StringRef Prefix; ScopedPrinterKind Kind; }; diff --git a/llvm/include/llvm/Support/Signals.h b/llvm/include/llvm/Support/Signals.h index 44f5a750ff5c..937e0572d4a7 100644 --- a/llvm/include/llvm/Support/Signals.h +++ b/llvm/include/llvm/Support/Signals.h @@ -14,6 +14,7 @@ #ifndef LLVM_SUPPORT_SIGNALS_H #define LLVM_SUPPORT_SIGNALS_H +#include #include namespace llvm { diff --git a/llvm/include/llvm/Support/Signposts.h b/llvm/include/llvm/Support/Signposts.h index dabbba6f89d1..37089bd1c17d 100644 --- a/llvm/include/llvm/Support/Signposts.h +++ b/llvm/include/llvm/Support/Signposts.h @@ -16,11 +16,11 @@ #ifndef LLVM_SUPPORT_SIGNPOSTS_H #define LLVM_SUPPORT_SIGNPOSTS_H -#include "llvm/ADT/StringRef.h" #include namespace llvm { class SignpostEmitterImpl; +class StringRef; /// Manages the emission of signposts into the recording method supported by /// the OS. diff --git a/llvm/include/llvm/Support/SourceMgr.h b/llvm/include/llvm/Support/SourceMgr.h index 28716b42f4ab..eced4574c82e 100644 --- a/llvm/include/llvm/Support/SourceMgr.h +++ b/llvm/include/llvm/Support/SourceMgr.h @@ -100,6 +100,9 @@ public: SourceMgr &operator=(SourceMgr &&) = default; ~SourceMgr() = default; + /// Return the include directories of this source manager. + ArrayRef getIncludeDirs() const { return IncludeDirectories; } + void setIncludeDirs(const std::vector &Dirs) { IncludeDirectories = Dirs; } @@ -147,6 +150,22 @@ public: return Buffers.size(); } + /// Takes the source buffers from the given source manager and append them to + /// the current manager. `MainBufferIncludeLoc` is an optional include + /// location to attach to the main buffer of `SrcMgr` after it gets moved to + /// the current manager. + void takeSourceBuffersFrom(SourceMgr &SrcMgr, + SMLoc MainBufferIncludeLoc = SMLoc()) { + if (SrcMgr.Buffers.empty()) + return; + + size_t OldNumBuffers = getNumBuffers(); + std::move(SrcMgr.Buffers.begin(), SrcMgr.Buffers.end(), + std::back_inserter(Buffers)); + SrcMgr.Buffers.clear(); + Buffers[OldNumBuffers].IncludeLoc = MainBufferIncludeLoc; + } + /// Search for a file with the specified name in the current directory or in /// one of the IncludeDirs. /// @@ -156,6 +175,17 @@ public: unsigned AddIncludeFile(const std::string &Filename, SMLoc IncludeLoc, std::string &IncludedFile); + /// Search for a file with the specified name in the current directory or in + /// one of the IncludeDirs, and try to open it **without** adding to the + /// SourceMgr. If the opened file is intended to be added to the source + /// manager, prefer `AddIncludeFile` instead. + /// + /// If no file is found, this returns an Error, otherwise it returns the + /// buffer of the stacked file. The full path to the included file can be + /// found in \p IncludedFile. + ErrorOr> + OpenIncludeFile(const std::string &Filename, std::string &IncludedFile); + /// Return the ID of the buffer containing the specified location. /// /// 0 is returned if the buffer is not found. diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 428cbb44705d..8df7ced0029d 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -322,6 +322,9 @@ HANDLE_TARGET_OPCODE(G_BITCAST) /// Generic freeze. HANDLE_TARGET_OPCODE(G_FREEZE) +// INTRINSIC fptrunc_round intrinsic. +HANDLE_TARGET_OPCODE(G_INTRINSIC_FPTRUNC_ROUND) + /// INTRINSIC trunc intrinsic. HANDLE_TARGET_OPCODE(G_INTRINSIC_TRUNC) @@ -617,6 +620,9 @@ HANDLE_TARGET_OPCODE(G_FABS) /// f64) is allowed. HANDLE_TARGET_OPCODE(G_FCOPYSIGN) +/// Generic test for floating-point class. +HANDLE_TARGET_OPCODE(G_IS_FPCLASS) + /// Generic FP canonicalize value. HANDLE_TARGET_OPCODE(G_FCANONICALIZE) diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h index 02a8d72483db..c3a6cceaee6b 100644 --- a/llvm/include/llvm/Support/TargetParser.h +++ b/llvm/include/llvm/Support/TargetParser.h @@ -14,14 +14,14 @@ #ifndef LLVM_SUPPORT_TARGETPARSER_H #define LLVM_SUPPORT_TARGETPARSER_H +#include "llvm/ADT/StringRef.h" +#include // FIXME: vector is used because that's what clang uses for subtarget feature // lists, but SmallVector would probably be better -#include "llvm/Support/RISCVISAInfo.h" #include namespace llvm { -class StringRef; template class SmallVectorImpl; class Triple; @@ -86,6 +86,7 @@ enum GPUKind : uint32_t { GK_GFX909 = 65, GK_GFX90A = 66, GK_GFX90C = 67, + GK_GFX940 = 68, GK_GFX1010 = 71, GK_GFX1011 = 72, @@ -97,9 +98,15 @@ enum GPUKind : uint32_t { GK_GFX1033 = 78, GK_GFX1034 = 79, GK_GFX1035 = 80, + GK_GFX1036 = 81, + + GK_GFX1100 = 90, + GK_GFX1101 = 91, + GK_GFX1102 = 92, + GK_GFX1103 = 93, GK_AMDGCN_FIRST = GK_GFX600, - GK_AMDGCN_LAST = GK_GFX1035, + GK_AMDGCN_LAST = GK_GFX1103, }; /// Instruction set architecture version. @@ -170,7 +177,6 @@ void fillValidCPUArchList(SmallVectorImpl &Values, bool IsRV64); void fillValidTuneCPUArchList(SmallVectorImpl &Values, bool IsRV64); bool getCPUFeaturesExceptStdExt(CPUKind Kind, std::vector &Features); StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsRV64); -StringRef computeDefaultABIFromArch(const llvm::RISCVISAInfo &ISAInfo); } // namespace RISCV diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h index 868dd2819f83..5e67a312d5c7 100644 --- a/llvm/include/llvm/Support/ThreadPool.h +++ b/llvm/include/llvm/Support/ThreadPool.h @@ -13,26 +13,42 @@ #ifndef LLVM_SUPPORT_THREADPOOL_H #define LLVM_SUPPORT_THREADPOOL_H +#include "llvm/ADT/DenseMap.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/RWMutex.h" #include "llvm/Support/Threading.h" #include "llvm/Support/thread.h" #include #include +#include #include #include #include -#include #include namespace llvm { +class ThreadPoolTaskGroup; + /// A ThreadPool for asynchronous parallel execution on a defined number of /// threads. /// /// The pool keeps a vector of threads alive, waiting on a condition variable /// for some work to become available. +/// +/// It is possible to reuse one thread pool for different groups of tasks +/// by grouping tasks using ThreadPoolTaskGroup. All tasks are processed using +/// the same queue, but it is possible to wait only for a specific group of +/// tasks to finish. +/// +/// It is also possible for worker threads to submit new tasks and wait for +/// them. Note that this may result in a deadlock in cases such as when a task +/// (directly or indirectly) tries to wait for its own completion, or when all +/// available threads are used up by tasks waiting for a task that has no thread +/// left to run on (this includes waiting on the returned future). It should be +/// generally safe to wait() for a group as long as groups do not form a cycle. class ThreadPool { public: /// Construct a pool using the hardware strategy \p S for mapping hardware @@ -47,23 +63,47 @@ public: /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. template - inline auto async(Function &&F, Args &&...ArgList) { + auto async(Function &&F, Args &&...ArgList) { auto Task = std::bind(std::forward(F), std::forward(ArgList)...); return async(std::move(Task)); } + /// Overload, task will be in the given task group. + template + auto async(ThreadPoolTaskGroup &Group, Function &&F, Args &&...ArgList) { + auto Task = + std::bind(std::forward(F), std::forward(ArgList)...); + return async(Group, std::move(Task)); + } + /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. template auto async(Func &&F) -> std::shared_future { - return asyncImpl(std::function(std::forward(F))); + return asyncImpl(std::function(std::forward(F)), + nullptr); + } + + template + auto async(ThreadPoolTaskGroup &Group, Func &&F) + -> std::shared_future { + return asyncImpl(std::function(std::forward(F)), + &Group); } /// Blocking wait for all the threads to complete and the queue to be empty. /// It is an error to try to add new tasks while blocking on this call. + /// Calling wait() from a task would deadlock waiting for itself. void wait(); + /// Blocking wait for only all the threads in the given group to complete. + /// It is possible to wait even inside a task, but waiting (directly or + /// indirectly) on itself will deadlock. If called from a task running on a + /// worker thread, the call may process pending tasks while waiting in order + /// not to waste the thread. + void wait(ThreadPoolTaskGroup &Group); + // TODO: misleading legacy name warning! // Returns the maximum number of worker threads in the pool, not the current // number of threads! @@ -98,12 +138,15 @@ private: std::move(F)}; } - bool workCompletedUnlocked() { return !ActiveThreads && Tasks.empty(); } + /// Returns true if all tasks in the given group have finished (nullptr means + /// all tasks regardless of their group). QueueLock must be locked. + bool workCompletedUnlocked(ThreadPoolTaskGroup *Group) const; /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. template - std::shared_future asyncImpl(std::function Task) { + std::shared_future asyncImpl(std::function Task, + ThreadPoolTaskGroup *Group) { #if LLVM_ENABLE_THREADS /// Wrap the Task in a std::function that sets the result of the @@ -117,7 +160,7 @@ private: // Don't allow enqueueing after disabling the pool assert(EnableFlag && "Queuing a thread during ThreadPool destruction"); - Tasks.push(std::move(R.first)); + Tasks.emplace_back(std::make_pair(std::move(R.first), Group)); requestedThreads = ActiveThreads + Tasks.size(); } QueueCondition.notify_one(); @@ -130,7 +173,7 @@ private: auto Future = std::async(std::launch::deferred, std::move(Task)).share(); // Wrap the future so that both ThreadPool::wait() can operate and the // returned future can be sync'ed on. - Tasks.push([Future]() { Future.get(); }); + Tasks.emplace_back(std::make_pair([Future]() { Future.get(); }, Group)); return Future; #endif } @@ -139,25 +182,29 @@ private: // Grow to ensure that we have at least `requested` Threads, but do not go // over MaxThreadCount. void grow(int requested); + + void processTasks(ThreadPoolTaskGroup *WaitingForGroup); #endif /// Threads in flight std::vector Threads; /// Lock protecting access to the Threads vector. - mutable std::mutex ThreadsLock; + mutable llvm::sys::RWMutex ThreadsLock; /// Tasks waiting for execution in the pool. - std::queue> Tasks; + std::deque, ThreadPoolTaskGroup *>> Tasks; /// Locking and signaling for accessing the Tasks queue. std::mutex QueueLock; std::condition_variable QueueCondition; - /// Signaling for job completion + /// Signaling for job completion (all tasks or all tasks in a group). std::condition_variable CompletionCondition; /// Keep track of the number of thread actually busy unsigned ActiveThreads = 0; + /// Number of threads active for tasks in the given group (only non-zero). + DenseMap ActiveGroups; #if LLVM_ENABLE_THREADS // avoids warning for unused variable /// Signal for the destruction of the pool, asking thread to exit. @@ -169,6 +216,34 @@ private: /// Maximum number of threads to potentially grow this pool to. const unsigned MaxThreadCount; }; -} + +/// A group of tasks to be run on a thread pool. Thread pool tasks in different +/// groups can run on the same threadpool but can be waited for separately. +/// It is even possible for tasks of one group to submit and wait for tasks +/// of another group, as long as this does not form a loop. +class ThreadPoolTaskGroup { +public: + /// The ThreadPool argument is the thread pool to forward calls to. + ThreadPoolTaskGroup(ThreadPool &Pool) : Pool(Pool) {} + + /// Blocking destructor: will wait for all the tasks in the group to complete + /// by calling ThreadPool::wait(). + ~ThreadPoolTaskGroup() { wait(); } + + /// Calls ThreadPool::async() for this group. + template + inline auto async(Function &&F, Args &&...ArgList) { + return Pool.async(*this, std::forward(F), + std::forward(ArgList)...); + } + + /// Calls ThreadPool::wait() for this group. + void wait() { Pool.wait(*this); } + +private: + ThreadPool &Pool; +}; + +} // namespace llvm #endif // LLVM_SUPPORT_THREADPOOL_H diff --git a/llvm/include/llvm/Support/Threading.h b/llvm/include/llvm/Support/Threading.h index 94de950d4470..1e7e5f7b8f50 100644 --- a/llvm/include/llvm/Support/Threading.h +++ b/llvm/include/llvm/Support/Threading.h @@ -15,13 +15,10 @@ #define LLVM_SUPPORT_THREADING_H #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/FunctionExtras.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX #include "llvm/Support/Compiler.h" #include // So we can check the C++ standard lib macros. -#include #if defined(_MSC_VER) // MSVC's call_once implementation worked since VS 2015, which is the minimum @@ -236,15 +233,20 @@ bool llvm_is_multithreaded(); unsigned get_cpus(); enum class ThreadPriority { + /// Lower the current thread's priority as much as possible. Can be used + /// for long-running tasks that are not time critical; more energy- + /// efficient than Low. Background = 0, - Default = 1, + + /// Lower the current thread's priority such that it does not affect + /// foreground tasks significantly. This is a good default for long- + /// running, latency-insensitive tasks to make sure cpu is not hogged + /// by this task. + Low = 1, + + /// Restore the current thread's priority to default scheduling priority. + Default = 2, }; - /// If priority is Background tries to lower current threads priority such - /// that it does not affect foreground tasks significantly. Can be used for - /// long-running, latency-insensitive tasks to make sure cpu is not hogged by - /// this task. - /// If the priority is default tries to restore current threads priority to - /// default scheduling priority. enum class SetThreadPriorityResult { FAILURE, SUCCESS }; SetThreadPriorityResult set_thread_priority(ThreadPriority Priority); } diff --git a/llvm/include/llvm/Support/TrigramIndex.h b/llvm/include/llvm/Support/TrigramIndex.h index f772deca0301..0bfac498393f 100644 --- a/llvm/include/llvm/Support/TrigramIndex.h +++ b/llvm/include/llvm/Support/TrigramIndex.h @@ -27,12 +27,12 @@ #define LLVM_SUPPORT_TRIGRAMINDEX_H #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" #include #include #include namespace llvm { +class StringRef; class TrigramIndex { public: diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h index 6bddb602e8c1..0b40e970e8c9 100644 --- a/llvm/include/llvm/Support/TypeSize.h +++ b/llvm/include/llvm/Support/TypeSize.h @@ -362,12 +362,31 @@ public: LinearPolySize::get(getKnownMinValue() / RHS, isScalable())); } + LeafTy multiplyCoefficientBy(ScalarTy RHS) const { + return static_cast( + LinearPolySize::get(getKnownMinValue() * RHS, isScalable())); + } + LeafTy coefficientNextPowerOf2() const { return static_cast(LinearPolySize::get( static_cast(llvm::NextPowerOf2(getKnownMinValue())), isScalable())); } + /// Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) + /// will result in a value whose size matches our own. + bool hasKnownScalarFactor(const LinearPolySize &RHS) const { + return isScalable() == RHS.isScalable() && + getKnownMinValue() % RHS.getKnownMinValue() == 0; + } + + /// Returns a value X where RHS.multiplyCoefficientBy(X) will result in a + /// value whose size matches our own. + ScalarTy getKnownScalarFactor(const LinearPolySize &RHS) const { + assert(hasKnownScalarFactor(RHS) && "Expected RHS to be a known factor!"); + return getKnownMinValue() / RHS.getKnownMinValue(); + } + /// Printing function. void print(raw_ostream &OS) const { if (isScalable()) diff --git a/llvm/include/llvm/Support/Unicode.h b/llvm/include/llvm/Support/Unicode.h index ca17bba2fbb4..729775431e16 100644 --- a/llvm/include/llvm/Support/Unicode.h +++ b/llvm/include/llvm/Support/Unicode.h @@ -14,6 +14,10 @@ #ifndef LLVM_SUPPORT_UNICODE_H #define LLVM_SUPPORT_UNICODE_H +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include + namespace llvm { class StringRef; @@ -30,19 +34,13 @@ enum ColumnWidthErrors { /// terminal, so we define the semantic that should be suitable for generic case /// of a terminal capable to output Unicode characters. /// -/// All characters from the Unicode code point range are considered printable -/// except for: -/// * C0 and C1 control character ranges; -/// * default ignorable code points as per 5.21 of -/// http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf -/// except for U+00AD SOFT HYPHEN, as it's actually displayed on most -/// terminals; -/// * format characters (category = Cf); -/// * surrogates (category = Cs); -/// * unassigned characters (category = Cn). +/// Printable codepoints are those in the categories L, M, N, P, S and Zs /// \return true if the character is considered printable. bool isPrintable(int UCS); +// Formatting codepoints are codepoints in the Cf category. +bool isFormatting(int UCS); + /// Gets the number of positions the UTF8-encoded \p Text is likely to occupy /// when output on a terminal ("character width"). This depends on the /// implementation of the terminal, and there's no standard definition of @@ -63,6 +61,30 @@ int columnWidthUTF8(StringRef Text); /// rules. int foldCharSimple(int C); +/// Maps the name or the alias of a Unicode character to its associated +/// codepoints. +/// The names and aliases are derived from UnicodeData.txt and NameAliases.txt +/// For compatibility with the semantics of named character escape sequences in +/// C++, this mapping does an exact match sensitive to casing and spacing. +/// \return The codepoint of the corresponding character, if any. +Optional nameToCodepointStrict(StringRef Name); + +struct LooseMatchingResult { + char32_t CodePoint; + SmallString<64> Name; +}; + +Optional nameToCodepointLooseMatching(StringRef Name); + +struct MatchForCodepointName { + std::string Name; + uint32_t Distance = 0; + char32_t Value = 0; +}; + +SmallVector +nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount); + } // namespace unicode } // namespace sys } // namespace llvm diff --git a/llvm/include/llvm/Support/VersionTuple.h b/llvm/include/llvm/Support/VersionTuple.h index 1a1072d228f1..2020a5c06f56 100644 --- a/llvm/include/llvm/Support/VersionTuple.h +++ b/llvm/include/llvm/Support/VersionTuple.h @@ -17,11 +17,13 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/Optional.h" -#include "llvm/Support/HashBuilder.h" +#include "llvm/Support/Endian.h" #include #include namespace llvm { +template +class HashBuilderImpl; class raw_ostream; class StringRef; @@ -97,6 +99,12 @@ public: return *this; } + /// Return a version tuple that contains a different major version but + /// everything else is the same. + VersionTuple withMajorReplaced(unsigned NewMajor) const { + return VersionTuple(NewMajor, Minor, Subminor, Build); + } + /// Return a version tuple that contains only components that are non-zero. VersionTuple normalize() const { VersionTuple Result = *this; @@ -161,8 +169,8 @@ public: return !(X < Y); } - friend llvm::hash_code hash_value(const VersionTuple &VT) { - return llvm::hash_combine(VT.Major, VT.Minor, VT.Subminor, VT.Build); + friend hash_code hash_value(const VersionTuple &VT) { + return hash_combine(VT.Major, VT.Minor, VT.Subminor, VT.Build); } template diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h index f5dde334b0a7..3c99b0d8efdb 100644 --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -22,6 +22,7 @@ #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" @@ -58,6 +59,17 @@ public: // FIXME: remove when files support multiple names bool IsVFSMapped = false; + /// Whether this entity has an external path different from the virtual path, + /// and the external path is exposed by leaking it through the abstraction. + /// For example, a RedirectingFileSystem will set this for paths where + /// UseExternalName is true. + /// + /// FIXME: Currently the external path is exposed by replacing the virtual + /// path in this Status object. Instead, we should leave the path in the + /// Status intact (matching the requested virtual path) - see + /// FileManager::getFileRef for how how we plan to fix this. + bool ExposesExternalVFSPath = false; + Status() = default; Status(const llvm::sys::fs::file_status &Status); Status(const Twine &Name, llvm::sys::fs::UniqueID UID, @@ -306,6 +318,28 @@ public: /// \returns success if \a path has been made absolute, otherwise a /// platform-specific error_code. virtual std::error_code makeAbsolute(SmallVectorImpl &Path) const; + + enum class PrintType { Summary, Contents, RecursiveContents }; + void print(raw_ostream &OS, PrintType Type = PrintType::Contents, + unsigned IndentLevel = 0) const { + printImpl(OS, Type, IndentLevel); + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump() const; +#endif + +protected: + virtual void printImpl(raw_ostream &OS, PrintType Type, + unsigned IndentLevel) const { + printIndent(OS, IndentLevel); + OS << "FileSystem\n"; + } + + void printIndent(raw_ostream &OS, unsigned IndentLevel) const { + for (unsigned i = 0; i < IndentLevel; ++i) + OS << " "; + } }; /// Gets an \p vfs::FileSystem for the 'real' file system, as seen by @@ -357,6 +391,8 @@ public: using const_iterator = FileSystemList::const_reverse_iterator; using reverse_iterator = FileSystemList::iterator; using const_reverse_iterator = FileSystemList::const_iterator; + using range = iterator_range; + using const_range = iterator_range; /// Get an iterator pointing to the most recently added file system. iterator overlays_begin() { return FSList.rbegin(); } @@ -373,6 +409,13 @@ public: /// Get an iterator pointing one-past the most recently added file system. reverse_iterator overlays_rend() { return FSList.end(); } const_reverse_iterator overlays_rend() const { return FSList.end(); } + + range overlays_range() { return llvm::reverse(FSList); } + const_range overlays_range() const { return llvm::reverse(FSList); } + +protected: + void printImpl(raw_ostream &OS, PrintType Type, + unsigned IndentLevel) const override; }; /// By default, this delegates all calls to the underlying file system. This @@ -436,6 +479,24 @@ struct NewInMemoryNodeInfo { Status makeStatus() const; }; +class NamedNodeOrError { + ErrorOr, const detail::InMemoryNode *>> + Value; + +public: + NamedNodeOrError(llvm::SmallString<128> Name, + const detail::InMemoryNode *Node) + : Value(std::make_pair(Name, Node)) {} + NamedNodeOrError(std::error_code EC) : Value(EC) {} + NamedNodeOrError(llvm::errc EC) : Value(EC) {} + + StringRef getName() const { return (*Value).first; } + explicit operator bool() const { return static_cast(Value); } + operator std::error_code() const { return Value.getError(); } + std::error_code getError() const { return Value.getError(); } + const detail::InMemoryNode *operator*() const { return (*Value).second; } +}; + } // namespace detail /// An in-memory file system. @@ -454,6 +515,14 @@ class InMemoryFileSystem : public FileSystem { Optional Type, Optional Perms, MakeNodeFn MakeNode); + /// Looks up the in-memory node for the path \param P. + /// If \param FollowFinalSymlink is true, the returned node is guaranteed to + /// not be a symlink and its path may differ from \param P. + detail::NamedNodeOrError lookupNode(const Twine &P, bool FollowFinalSymlink, + size_t SymlinkDepth = 0) const; + + class DirIterator; + public: explicit InMemoryFileSystem(bool UseNormalizedPaths = true); ~InMemoryFileSystem() override; @@ -471,18 +540,32 @@ public: Optional Perms = None); /// Add a hard link to a file. + /// /// Here hard links are not intended to be fully equivalent to the classical /// filesystem. Both the hard link and the file share the same buffer and /// status (and thus have the same UniqueID). Because of this there is no way /// to distinguish between the link and the file after the link has been /// added. /// - /// The To path must be an existing file or a hardlink. The From file must not - /// have been added before. The To Path must not be a directory. The From Node - /// is added as a hard link which points to the resolved file of To Node. + /// The \param Target path must be an existing file or a hardlink. The + /// \param NewLink file must not have been added before. The \param Target + /// path must not be a directory. The \param NewLink node is added as a hard + /// link which points to the resolved file of \param Target node. /// \return true if the above condition is satisfied and hardlink was /// successfully created, false otherwise. - bool addHardLink(const Twine &From, const Twine &To); + bool addHardLink(const Twine &NewLink, const Twine &Target); + + /// Arbitrary max depth to search through symlinks. We can get into problems + /// if a link links to a link that links back to the link, for example. + static constexpr size_t MaxSymlinkDepth = 16; + + /// Add a symbolic link. Unlike a HardLink, because \param Target doesn't need + /// to refer to a file (or refer to anything, as it happens). Also, an + /// in-memory directory for \param Target isn't automatically created. + bool addSymbolicLink(const Twine &NewLink, const Twine &Target, + time_t ModificationTime, Optional User = None, + Optional Group = None, + Optional Perms = None); /// Add a buffer to the VFS with a path. The VFS does not own the buffer. /// If present, User, Group, Type and Perms apply to the newly-created file @@ -520,6 +603,10 @@ public: SmallVectorImpl &Output) const override; std::error_code isLocal(const Twine &Path, bool &Result) override; std::error_code setCurrentWorkingDirectory(const Twine &Path) override; + +protected: + void printImpl(raw_ostream &OS, PrintType Type, + unsigned IndentLevel) const override; }; /// Get a globally unique ID for a virtual file or directory. @@ -571,7 +658,10 @@ class RedirectingFileSystemParser; /// 'case-sensitive': /// 'use-external-names': /// 'overlay-relative': -/// 'fallthrough': +/// 'fallthrough': +/// 'redirecting-with': /// /// Virtual directories that list their contents are represented as /// \verbatim @@ -642,6 +732,20 @@ public: enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File }; enum NameKind { NK_NotSet, NK_External, NK_Virtual }; + /// The type of redirection to perform. + enum class RedirectKind { + /// Lookup the redirected path first (ie. the one specified in + /// 'external-contents') and if that fails "fallthrough" to a lookup of the + /// originally provided path. + Fallthrough, + /// Lookup the provided path first and if that fails, "fallback" to a + /// lookup of the redirected path. + Fallback, + /// Only lookup the redirected path, do not lookup the originally provided + /// path. + RedirectOnly + }; + /// A single file or directory in the VFS. class Entry { EntryKind Kind; @@ -776,17 +880,11 @@ private: friend class RedirectingFSDirIterImpl; friend class RedirectingFileSystemParser; - bool shouldUseExternalFS() const { return IsFallthrough; } - /// Canonicalize path by removing ".", "..", "./", components. This is /// a VFS request, do not bother about symlinks in the path components /// but canonicalize in order to perform the correct entry search. std::error_code makeCanonical(SmallVectorImpl &Path) const; - /// Whether to fall back to the external file system when an operation fails - /// with the given error code on a path associated with the provided Entry. - bool shouldFallBackToExternalFS(std::error_code EC, Entry *E = nullptr) const; - /// Get the File status, or error, from the underlying external file system. /// This returns the status with the originally requested name, while looking /// up the entry using the canonical path. @@ -834,9 +932,9 @@ private: /// names of files. This global value is overridable on a per-file basis. bool UseExternalNames = true; - /// Whether to attempt a file lookup in external file system after it wasn't - /// found in VFS. - bool IsFallthrough = true; + /// Determines the lookups to perform, as well as their order. See + /// \c RedirectKind for details. + RedirectKind Redirection = RedirectKind::Fallthrough; /// @} RedirectingFileSystem(IntrusiveRefCntPtr ExternalFS); @@ -891,15 +989,19 @@ public: StringRef getExternalContentsPrefixDir() const; + /// Sets the redirection kind to \c Fallthrough if true or \c RedirectOnly + /// otherwise. Will removed in the future, use \c setRedirection instead. void setFallthrough(bool Fallthrough); + void setRedirection(RedirectingFileSystem::RedirectKind Kind); + std::vector getRoots() const; - void dump(raw_ostream &OS) const; - void dumpEntry(raw_ostream &OS, Entry *E, int NumSpaces = 0) const; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const; -#endif + void printEntry(raw_ostream &OS, Entry *E, unsigned IndentLevel = 0) const; + +protected: + void printImpl(raw_ostream &OS, PrintType Type, + unsigned IndentLevel) const override; }; /// Collect all pairs of entries from the diff --git a/llvm/include/llvm/Support/Win64EH.h b/llvm/include/llvm/Support/Win64EH.h index 9359fcb4286a..31345beaa66a 100644 --- a/llvm/include/llvm/Support/Win64EH.h +++ b/llvm/include/llvm/Support/Win64EH.h @@ -24,6 +24,9 @@ namespace Win64EH { /// UnwindOpcodes - Enumeration whose values specify a single operation in /// the prolog of a function. enum UnwindOpcodes { + // The following set of unwind opcodes is for x86_64. They are documented at + // https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64. + // Some generic values from this set are used for other architectures too. UOP_PushNonVol = 0, UOP_AllocLarge, UOP_AllocSmall, @@ -57,7 +60,38 @@ enum UnwindOpcodes { UOP_SaveNext, UOP_TrapFrame, UOP_Context, - UOP_ClearUnwoundToCall + UOP_ClearUnwoundToCall, + // The following set of unwind opcodes is for ARM. They are documented at + // https://docs.microsoft.com/en-us/cpp/build/arm-exception-handling + + // Stack allocations use UOP_AllocSmall, UOP_AllocLarge from above, plus + // the following. AllocSmall, AllocLarge and AllocHuge represent a 16 bit + // instruction, while the WideAlloc* opcodes represent a 32 bit instruction. + // Small can represent a stack offset of 0x7f*4 (252) bytes, Medium can + // represent up to 0x3ff*4 (4092) bytes, Large up to 0xffff*4 (262140) bytes, + // and Huge up to 0xffffff*4 (67108860) bytes. + UOP_AllocHuge, + UOP_WideAllocMedium, + UOP_WideAllocLarge, + UOP_WideAllocHuge, + + UOP_WideSaveRegMask, + UOP_SaveSP, + UOP_SaveRegsR4R7LR, + UOP_WideSaveRegsR4R11LR, + UOP_SaveFRegD8D15, + UOP_SaveRegMask, + UOP_SaveLR, + UOP_SaveFRegD0D15, + UOP_SaveFRegD16D31, + // Using UOP_Nop from above + UOP_WideNop, + // Using UOP_End from above + UOP_EndNop, + UOP_WideEndNop, + // A custom unspecified opcode, consisting of one or more bytes. This + // allows producing opcodes in the implementation defined/reserved range. + UOP_Custom, }; /// UnwindCode - This union describes a single operation in a function prolog, diff --git a/llvm/include/llvm/Support/WithColor.h b/llvm/include/llvm/Support/WithColor.h index e772ea667f4f..b249f34da1fa 100644 --- a/llvm/include/llvm/Support/WithColor.h +++ b/llvm/include/llvm/Support/WithColor.h @@ -51,10 +51,9 @@ enum class ColorMode { /// An RAII object that temporarily switches an output stream to a specific /// color. class WithColor { - raw_ostream &OS; - ColorMode Mode; - public: + using AutoDetectFunctionType = bool (*)(const raw_ostream &OS); + /// To be used like this: WithColor(OS, HighlightColor::String) << "text"; /// @param OS The output stream /// @param S Symbolic name for syntax element to color @@ -132,6 +131,19 @@ public: /// Implement default handling for Warning. /// Print "warning: " to stderr. static void defaultWarningHandler(Error Warning); + + /// Retrieve the default color auto detection function. + static AutoDetectFunctionType defaultAutoDetectFunction(); + + /// Change the global auto detection function. + static void + setAutoDetectFunction(AutoDetectFunctionType NewAutoDetectFunction); + +private: + raw_ostream &OS; + ColorMode Mode; + + static AutoDetectFunctionType AutoDetectFunction; }; } // end namespace llvm diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h index aca717a9f6cb..169b8e97986e 100644 --- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h +++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h @@ -120,8 +120,6 @@ enum attributeBits { ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \ ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \ ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \ - ENUM_ENTRY(IC_64BIT_VEX_OPSIZE, 4, "requires 64-bit mode and VEX") \ - ENUM_ENTRY(IC_64BIT_VEX_OPSIZE_ADSIZE, 5, "requires 64-bit mode, VEX, and AdSize")\ ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \ ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \ ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \ diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def index 4443d822d3e8..58fa3b3842e7 100644 --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -211,47 +211,47 @@ X86_FEATURE (LVI_LOAD_HARDENING, "lvi-load-hardening") #undef X86_FEATURE #ifndef CPU_SPECIFIC -#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) #endif #ifndef CPU_SPECIFIC_ALIAS -#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) +#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) #endif -CPU_SPECIFIC("generic", 'A', "") -CPU_SPECIFIC("pentium", 'B', "") -CPU_SPECIFIC("pentium_pro", 'C', "+cmov") -CPU_SPECIFIC("pentium_mmx", 'D', "+mmx") -CPU_SPECIFIC("pentium_ii", 'E', "+cmov,+mmx") -CPU_SPECIFIC("pentium_iii", 'H', "+cmov,+mmx,+sse") -CPU_SPECIFIC_ALIAS("pentium_iii_no_xmm_regs", "pentium_iii") -CPU_SPECIFIC("pentium_4", 'J', "+cmov,+mmx,+sse,+sse2") -CPU_SPECIFIC("pentium_m", 'K', "+cmov,+mmx,+sse,+sse2") -CPU_SPECIFIC("pentium_4_sse3", 'L', "+cmov,+mmx,+sse,+sse2,+sse3") -CPU_SPECIFIC("core_2_duo_ssse3", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3") -CPU_SPECIFIC("core_2_duo_sse4_1", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1") -CPU_SPECIFIC("atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe") -CPU_SPECIFIC("atom_sse4_2", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") -CPU_SPECIFIC("core_i7_sse4_2", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") -CPU_SPECIFIC("core_aes_pclmulqdq", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") -CPU_SPECIFIC("atom_sse4_2_movbe", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt") -CPU_SPECIFIC("goldmont", 'i', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt") -CPU_SPECIFIC("sandybridge", 'R', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx") -CPU_SPECIFIC_ALIAS("core_2nd_gen_avx", "sandybridge") -CPU_SPECIFIC("ivybridge", 'S', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+f16c,+avx") -CPU_SPECIFIC_ALIAS("core_3rd_gen_avx", "ivybridge") -CPU_SPECIFIC("haswell", 'V', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2") -CPU_SPECIFIC_ALIAS("core_4th_gen_avx", "haswell") -CPU_SPECIFIC("core_4th_gen_avx_tsx", 'W', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2") -CPU_SPECIFIC("broadwell", 'X', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx") -CPU_SPECIFIC_ALIAS("core_5th_gen_avx", "broadwell") -CPU_SPECIFIC("core_5th_gen_avx_tsx", 'Y', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx") -CPU_SPECIFIC("knl", 'Z', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd") -CPU_SPECIFIC_ALIAS("mic_avx512", "knl") -CPU_SPECIFIC("skylake", 'b', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx,+mpx") -CPU_SPECIFIC( "skylake_avx512", 'a', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512cd,+avx512bw,+avx512vl,+clwb") -CPU_SPECIFIC("cannonlake", 'e', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi") -CPU_SPECIFIC("knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd,+avx5124fmaps,+avx5124vnniw,+avx512vpopcntdq") +CPU_SPECIFIC("generic", "generic", 'A', "") +CPU_SPECIFIC("pentium", "pentium", 'B', "") +CPU_SPECIFIC("pentium_pro", "pentiumpro", 'C', "+cmov") +CPU_SPECIFIC("pentium_mmx", "pentium-mmx", 'D', "+mmx") +CPU_SPECIFIC("pentium_ii", "pentium2", 'E', "+cmov,+mmx") +CPU_SPECIFIC("pentium_iii", "pentium3", 'H', "+cmov,+mmx,+sse") +CPU_SPECIFIC_ALIAS("pentium_iii_no_xmm_regs", "pentium3", "pentium_iii") +CPU_SPECIFIC("pentium_4", "pentium4", 'J', "+cmov,+mmx,+sse,+sse2") +CPU_SPECIFIC("pentium_m", "pentium-m", 'K', "+cmov,+mmx,+sse,+sse2") +CPU_SPECIFIC("pentium_4_sse3", "prescott", 'L', "+cmov,+mmx,+sse,+sse2,+sse3") +CPU_SPECIFIC("core_2_duo_ssse3", "core2", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3") +CPU_SPECIFIC("core_2_duo_sse4_1", "penryn", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1") +CPU_SPECIFIC("atom", "atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe") +CPU_SPECIFIC("atom_sse4_2", "silvermont", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") +CPU_SPECIFIC("core_i7_sse4_2", "nehalem", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") +CPU_SPECIFIC("core_aes_pclmulqdq", "westmere", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") +CPU_SPECIFIC("atom_sse4_2_movbe", "silvermont", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt") +CPU_SPECIFIC("goldmont", "goldmont", 'i', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt") +CPU_SPECIFIC("sandybridge", "sandybridge", 'R', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx") +CPU_SPECIFIC_ALIAS("core_2nd_gen_avx", "sandybridge", "sandybridge") +CPU_SPECIFIC("ivybridge", "ivybridge", 'S', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+f16c,+avx") +CPU_SPECIFIC_ALIAS("core_3rd_gen_avx", "ivybridge", "ivybridge") +CPU_SPECIFIC("haswell", "haswell", 'V', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2") +CPU_SPECIFIC_ALIAS("core_4th_gen_avx", "haswell", "haswell") +CPU_SPECIFIC("core_4th_gen_avx_tsx", "haswell", 'W', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2") +CPU_SPECIFIC("broadwell", "broadwell", 'X', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx") +CPU_SPECIFIC_ALIAS("core_5th_gen_avx", "broadwell", "broadwell") +CPU_SPECIFIC("core_5th_gen_avx_tsx", "broadwell", 'Y', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx") +CPU_SPECIFIC("knl", "knl", 'Z', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd") +CPU_SPECIFIC_ALIAS("mic_avx512", "knl", "knl") +CPU_SPECIFIC("skylake", "skylake", 'b', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx,+mpx") +CPU_SPECIFIC( "skylake_avx512", "skylake-avx512", 'a', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512cd,+avx512bw,+avx512vl,+clwb") +CPU_SPECIFIC("cannonlake", "cannonlake", 'e', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi") +CPU_SPECIFIC("knm", "knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd,+avx5124fmaps,+avx5124vnniw,+avx512vpopcntdq") #undef CPU_SPECIFIC_ALIAS #undef CPU_SPECIFIC diff --git a/llvm/include/llvm/Support/YAMLParser.h b/llvm/include/llvm/Support/YAMLParser.h index a4b2ab5e49ec..231cc1d28c9a 100644 --- a/llvm/include/llvm/Support/YAMLParser.h +++ b/llvm/include/llvm/Support/YAMLParser.h @@ -11,7 +11,6 @@ // See http://www.yaml.org/spec/1.2/spec.html for the full standard. // // This currently does not implement the following: -// * Multi-line literal folding. // * Tag resolution. // * UTF-16. // * BOMs anywhere other than the first Unicode scalar value in the file. diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h index 7ad73543fc6e..8ade9b15642b 100644 --- a/llvm/include/llvm/Support/YAMLTraits.h +++ b/llvm/include/llvm/Support/YAMLTraits.h @@ -24,7 +24,6 @@ #include "llvm/Support/YAMLParser.h" #include "llvm/Support/raw_ostream.h" #include -#include #include #include #include @@ -63,6 +62,7 @@ struct MappingTraits { // static void mapping(IO &io, T &fields); // Optionally may provide: // static std::string validate(IO &io, T &fields); + // static void enumInput(IO &io, T &value); // // The optional flow flag will cause generated YAML to use a flow mapping // (e.g. { a: 0, b: 1 }): @@ -446,6 +446,31 @@ template struct has_MappingValidateTraits { static bool const value = (sizeof(test>(nullptr)) == 1); }; +// Test if MappingContextTraits::enumInput() is defined on type T. +template struct has_MappingEnumInputTraits { + using Signature_validate = void (*)(class IO &, T &); + + template + static char test(SameType *); + + template static double test(...); + + static bool const value = + (sizeof(test>(nullptr)) == 1); +}; + +// Test if MappingTraits::enumInput() is defined on type T. +template struct has_MappingEnumInputTraits { + using Signature_validate = void (*)(class IO &, T &); + + template + static char test(SameType *); + + template static double test(...); + + static bool const value = (sizeof(test>(nullptr)) == 1); +}; + // Test if SequenceTraits is defined on type T. template struct has_SequenceMethodTraits @@ -537,9 +562,8 @@ template struct has_PolymorphicTraits { }; inline bool isNumeric(StringRef S) { - const static auto skipDigits = [](StringRef Input) { - return Input.drop_front( - std::min(Input.find_first_not_of("0123456789"), Input.size())); + const auto skipDigits = [](StringRef Input) { + return Input.ltrim("0123456789"); }; // Make S.front() and S.drop_front().front() (if S.front() is [+-]) calls @@ -666,8 +690,7 @@ inline QuotingType needsQuotes(StringRef S) { // 7.3.3 Plain Style // Plain scalars must not begin with most indicators, as this would cause // ambiguity with other YAML constructs. - static constexpr char Indicators[] = R"(-?:\,[]{}#&*!|>'"%@`)"; - if (S.find_first_of(Indicators) == 0) + if (std::strchr(R"(-?:\,[]{}#&*!|>'"%@`)", S[0]) != nullptr) MaxQuotingNeeded = QuotingType::Single; for (unsigned char C : S) { @@ -1061,9 +1084,30 @@ yamlize(IO &io, T &Val, bool, Context &Ctx) { io.endMapping(); } +template +std::enable_if_t::value, bool> +yamlizeMappingEnumInput(IO &io, T &Val) { + return false; +} + +template +std::enable_if_t::value, bool> +yamlizeMappingEnumInput(IO &io, T &Val) { + if (io.outputting()) + return false; + + io.beginEnumScalar(); + MappingTraits::enumInput(io, Val); + bool Matched = !io.matchEnumFallback(); + io.endEnumScalar(); + return Matched; +} + template std::enable_if_t::value, void> yamlize(IO &io, T &Val, bool, Context &Ctx) { + if (yamlizeMappingEnumInput(io, Val)) + return; if (has_FlowTraits>::value) { io.beginFlowMapping(); detail::doMapping(io, Val, Ctx); @@ -1624,14 +1668,13 @@ template void IO::processKeyWithDefault(const char *Key, Optional &Val, const Optional &DefaultValue, bool Required, Context &Ctx) { - assert(DefaultValue.hasValue() == false && - "Optional shouldn't have a value!"); + assert(!DefaultValue && "Optional shouldn't have a value!"); void *SaveInfo; bool UseDefault = true; - const bool sameAsDefault = outputting() && !Val.hasValue(); - if (!outputting() && !Val.hasValue()) + const bool sameAsDefault = outputting() && !Val; + if (!outputting() && !Val) Val = T(); - if (Val.hasValue() && + if (Val && this->preflightKey(Key, Required, sameAsDefault, UseDefault, SaveInfo)) { // When reading an Optional key from a YAML description, we allow the @@ -1648,7 +1691,7 @@ void IO::processKeyWithDefault(const char *Key, Optional &Val, if (IsNone) Val = DefaultValue; else - yamlize(*this, Val.getValue(), Required, Ctx); + yamlize(*this, *Val, Required, Ctx); this->postflightKey(SaveInfo); } else { if (UseDefault) diff --git a/llvm/include/llvm/Support/circular_raw_ostream.h b/llvm/include/llvm/Support/circular_raw_ostream.h index d2f01ea6a7f2..17fb8fa0e476 100644 --- a/llvm/include/llvm/Support/circular_raw_ostream.h +++ b/llvm/include/llvm/Support/circular_raw_ostream.h @@ -38,7 +38,7 @@ namespace llvm { /// TheStream - The real stream we output to. We set it to be /// unbuffered, since we're already doing our own buffering. /// - raw_ostream *TheStream; + raw_ostream *TheStream = nullptr; /// OwnsStream - Are we responsible for managing the underlying /// stream? @@ -51,7 +51,7 @@ namespace llvm { /// BufferArray - The actual buffer storage. /// - char *BufferArray; + char *BufferArray = nullptr; /// Cur - Pointer to the current output point in BufferArray. /// @@ -60,7 +60,7 @@ namespace llvm { /// Filled - Indicate whether the buffer has been completely /// filled. This helps avoid garbage output. /// - bool Filled; + bool Filled = false; /// Banner - A pointer to a banner to print before dumping the /// log. @@ -106,9 +106,8 @@ namespace llvm { /// circular_raw_ostream(raw_ostream &Stream, const char *Header, size_t BuffSize = 0, bool Owns = REFERENCE_ONLY) - : raw_ostream(/*unbuffered*/ true), TheStream(nullptr), - OwnsStream(Owns), BufferSize(BuffSize), BufferArray(nullptr), - Filled(false), Banner(Header) { + : raw_ostream(/*unbuffered*/ true), OwnsStream(Owns), + BufferSize(BuffSize), Banner(Header) { if (BufferSize != 0) BufferArray = new char[BufferSize]; Cur = BufferArray; diff --git a/llvm/include/llvm/Support/raw_sha1_ostream.h b/llvm/include/llvm/Support/raw_sha1_ostream.h index 3991691796b5..299f6e6b5e88 100644 --- a/llvm/include/llvm/Support/raw_sha1_ostream.h +++ b/llvm/include/llvm/Support/raw_sha1_ostream.h @@ -30,7 +30,7 @@ class raw_sha1_ostream : public raw_ostream { public: /// Return the current SHA1 hash for the content of the stream - StringRef sha1() { + std::array sha1() { flush(); return State.result(); } diff --git a/llvm/include/llvm/TableGen/Parser.h b/llvm/include/llvm/TableGen/Parser.h new file mode 100644 index 000000000000..411259e4033c --- /dev/null +++ b/llvm/include/llvm/TableGen/Parser.h @@ -0,0 +1,34 @@ +//===- llvm/TableGen/Parser.h - tblgen parser entry point -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares an entry point into the tablegen parser for use by tools. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TABLEGEN_PARSER_H +#define LLVM_TABLEGEN_PARSER_H + +#include "llvm/ADT/STLExtras.h" +#include +#include + +namespace llvm { +class RecordKeeper; +class SourceMgr; + +/// Parse the TableGen file defined within the main buffer of the given +/// SourceMgr. On success, populates the provided RecordKeeper with the parsed +/// records and returns false. On failure, returns true. +/// +/// NOTE: TableGen currently relies on global state within a given parser +/// invocation, so this function is not thread-safe. +bool TableGenParseFile(SourceMgr &InputSrcMgr, RecordKeeper &Records); + +} // end namespace llvm + +#endif // LLVM_TABLEGEN_PARSER_H diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index 1157487eced3..44daad976c12 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -28,7 +28,6 @@ #include "llvm/Support/Timer.h" #include "llvm/Support/TrailingObjects.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include @@ -40,7 +39,7 @@ namespace llvm { namespace detail { -struct RecordContext; +struct RecordKeeperImpl; } // namespace detail class ListRecTy; @@ -70,15 +69,20 @@ public: private: RecTyKind Kind; + /// The RecordKeeper that uniqued this Type. + RecordKeeper &RK; /// ListRecTy of the list that has elements of this type. ListRecTy *ListTy = nullptr; public: - RecTy(RecTyKind K) : Kind(K) {} + RecTy(RecTyKind K, RecordKeeper &RK) : Kind(K), RK(RK) {} virtual ~RecTy() = default; RecTyKind getRecTyKind() const { return Kind; } + /// Return the RecordKeeper that uniqued this Type. + RecordKeeper &getRecordKeeper() const { return RK; } + virtual std::string getAsString() const = 0; void print(raw_ostream &OS) const { OS << getAsString(); } void dump() const; @@ -102,16 +106,16 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RecTy &Ty) { /// 'bit' - Represent a single bit class BitRecTy : public RecTy { - friend detail::RecordContext; + friend detail::RecordKeeperImpl; - BitRecTy() : RecTy(BitRecTyKind) {} + BitRecTy(RecordKeeper &RK) : RecTy(BitRecTyKind, RK) {} public: static bool classof(const RecTy *RT) { return RT->getRecTyKind() == BitRecTyKind; } - static BitRecTy *get(); + static BitRecTy *get(RecordKeeper &RK); std::string getAsString() const override { return "bit"; } @@ -122,14 +126,15 @@ public: class BitsRecTy : public RecTy { unsigned Size; - explicit BitsRecTy(unsigned Sz) : RecTy(BitsRecTyKind), Size(Sz) {} + explicit BitsRecTy(RecordKeeper &RK, unsigned Sz) + : RecTy(BitsRecTyKind, RK), Size(Sz) {} public: static bool classof(const RecTy *RT) { return RT->getRecTyKind() == BitsRecTyKind; } - static BitsRecTy *get(unsigned Sz); + static BitsRecTy *get(RecordKeeper &RK, unsigned Sz); unsigned getNumBits() const { return Size; } @@ -142,16 +147,16 @@ public: /// 'int' - Represent an integer value of no particular size class IntRecTy : public RecTy { - friend detail::RecordContext; + friend detail::RecordKeeperImpl; - IntRecTy() : RecTy(IntRecTyKind) {} + IntRecTy(RecordKeeper &RK) : RecTy(IntRecTyKind, RK) {} public: static bool classof(const RecTy *RT) { return RT->getRecTyKind() == IntRecTyKind; } - static IntRecTy *get(); + static IntRecTy *get(RecordKeeper &RK); std::string getAsString() const override { return "int"; } @@ -160,16 +165,16 @@ public: /// 'string' - Represent an string value class StringRecTy : public RecTy { - friend detail::RecordContext; + friend detail::RecordKeeperImpl; - StringRecTy() : RecTy(StringRecTyKind) {} + StringRecTy(RecordKeeper &RK) : RecTy(StringRecTyKind, RK) {} public: static bool classof(const RecTy *RT) { return RT->getRecTyKind() == StringRecTyKind; } - static StringRecTy *get(); + static StringRecTy *get(RecordKeeper &RK); std::string getAsString() const override; @@ -183,7 +188,8 @@ class ListRecTy : public RecTy { RecTy *ElementTy; - explicit ListRecTy(RecTy *T) : RecTy(ListRecTyKind), ElementTy(T) {} + explicit ListRecTy(RecTy *T) + : RecTy(ListRecTyKind, T->getRecordKeeper()), ElementTy(T) {} public: static bool classof(const RecTy *RT) { @@ -202,16 +208,16 @@ public: /// 'dag' - Represent a dag fragment class DagRecTy : public RecTy { - friend detail::RecordContext; + friend detail::RecordKeeperImpl; - DagRecTy() : RecTy(DagRecTyKind) {} + DagRecTy(RecordKeeper &RK) : RecTy(DagRecTyKind, RK) {} public: static bool classof(const RecTy *RT) { return RT->getRecTyKind() == DagRecTyKind; } - static DagRecTy *get(); + static DagRecTy *get(RecordKeeper &RK); std::string getAsString() const override; }; @@ -223,12 +229,12 @@ public: class RecordRecTy final : public RecTy, public FoldingSetNode, public TrailingObjects { friend class Record; - friend detail::RecordContext; + friend detail::RecordKeeperImpl; unsigned NumClasses; - explicit RecordRecTy(unsigned Num) - : RecTy(RecordRecTyKind), NumClasses(Num) {} + explicit RecordRecTy(RecordKeeper &RK, unsigned Num) + : RecTy(RecordRecTyKind, RK), NumClasses(Num) {} public: RecordRecTy(const RecordRecTy &) = delete; @@ -242,7 +248,8 @@ public: } /// Get the record type with the given non-redundant list of superclasses. - static RecordRecTy *get(ArrayRef Classes); + static RecordRecTy *get(RecordKeeper &RK, ArrayRef Classes); + static RecordRecTy *get(Record *Class); void Profile(FoldingSetNodeID &ID) const; @@ -304,6 +311,7 @@ protected: IK_CondOpInit, IK_FoldOpInit, IK_IsAOpInit, + IK_ExistsOpInit, IK_AnonymousNameInit, IK_StringInit, IK_VarInit, @@ -327,6 +335,9 @@ public: /// Get the kind (type) of the value. InitKind getKind() const { return Kind; } + /// Get the record keeper that initialized this Init. + RecordKeeper &getRecordKeeper() const; + protected: explicit Init(InitKind K, uint8_t Opc = 0) : Kind(K), Opc(Opc) {} @@ -426,6 +437,9 @@ public: /// Get the type of the Init as a RecTy. RecTy *getType() const { return ValueTy; } + /// Get the record keeper that initialized this Init. + RecordKeeper &getRecordKeeper() const { return ValueTy->getRecordKeeper(); } + Init *getCastTo(RecTy *Ty) const override; Init *convertInitializerTo(RecTy *Ty) const override; @@ -440,9 +454,12 @@ public: /// '?' - Represents an uninitialized value. class UnsetInit : public Init { - friend detail::RecordContext; + friend detail::RecordKeeperImpl; - UnsetInit() : Init(IK_UnsetInit) {} + /// The record keeper that initialized this Init. + RecordKeeper &RK; + + UnsetInit(RecordKeeper &RK) : Init(IK_UnsetInit), RK(RK) {} public: UnsetInit(const UnsetInit &) = delete; @@ -453,7 +470,10 @@ public: } /// Get the singleton unset Init. - static UnsetInit *get(); + static UnsetInit *get(RecordKeeper &RK); + + /// Get the record keeper that initialized this Init. + RecordKeeper &getRecordKeeper() const { return RK; } Init *getCastTo(RecTy *Ty) const override; Init *convertInitializerTo(RecTy *Ty) const override; @@ -473,7 +493,7 @@ public: /// 'true'/'false' - Represent a concrete initializer for a bit. class BitInit final : public TypedInit { - friend detail::RecordContext; + friend detail::RecordKeeperImpl; bool Value; @@ -487,7 +507,7 @@ public: return I->getKind() == IK_BitInit; } - static BitInit *get(bool V); + static BitInit *get(RecordKeeper &RK, bool V); bool getValue() const { return Value; } @@ -508,8 +528,8 @@ class BitsInit final : public TypedInit, public FoldingSetNode, public TrailingObjects { unsigned NumBits; - BitsInit(unsigned N) - : TypedInit(IK_BitsInit, BitsRecTy::get(N)), NumBits(N) {} + BitsInit(RecordKeeper &RK, unsigned N) + : TypedInit(IK_BitsInit, BitsRecTy::get(RK, N)), NumBits(N) {} public: BitsInit(const BitsInit &) = delete; @@ -522,7 +542,7 @@ public: return I->getKind() == IK_BitsInit; } - static BitsInit *get(ArrayRef Range); + static BitsInit *get(RecordKeeper &RK, ArrayRef Range); void Profile(FoldingSetNodeID &ID) const; @@ -558,8 +578,8 @@ public: class IntInit : public TypedInit { int64_t Value; - explicit IntInit(int64_t V) - : TypedInit(IK_IntInit, IntRecTy::get()), Value(V) {} + explicit IntInit(RecordKeeper &RK, int64_t V) + : TypedInit(IK_IntInit, IntRecTy::get(RK)), Value(V) {} public: IntInit(const IntInit &) = delete; @@ -569,7 +589,7 @@ public: return I->getKind() == IK_IntInit; } - static IntInit *get(int64_t V); + static IntInit *get(RecordKeeper &RK, int64_t V); int64_t getValue() const { return Value; } @@ -580,7 +600,7 @@ public: std::string getAsString() const override; Init *getBit(unsigned Bit) const override { - return BitInit::get((Value & (1ULL << Bit)) != 0); + return BitInit::get(getRecordKeeper(), (Value & (1ULL << Bit)) != 0); } }; @@ -588,8 +608,8 @@ public: class AnonymousNameInit : public TypedInit { unsigned Value; - explicit AnonymousNameInit(unsigned V) - : TypedInit(IK_AnonymousNameInit, StringRecTy::get()), Value(V) {} + explicit AnonymousNameInit(RecordKeeper &RK, unsigned V) + : TypedInit(IK_AnonymousNameInit, StringRecTy::get(RK)), Value(V) {} public: AnonymousNameInit(const AnonymousNameInit &) = delete; @@ -599,7 +619,7 @@ public: return I->getKind() == IK_AnonymousNameInit; } - static AnonymousNameInit *get(unsigned); + static AnonymousNameInit *get(RecordKeeper &RK, unsigned); unsigned getValue() const { return Value; } @@ -626,8 +646,8 @@ private: StringRef Value; StringFormat Format; - explicit StringInit(StringRef V, StringFormat Fmt) - : TypedInit(IK_StringInit, StringRecTy::get()), Value(V), Format(Fmt) {} + explicit StringInit(RecordKeeper &RK, StringRef V, StringFormat Fmt) + : TypedInit(IK_StringInit, StringRecTy::get(RK)), Value(V), Format(Fmt) {} public: StringInit(const StringInit &) = delete; @@ -637,7 +657,8 @@ public: return I->getKind() == IK_StringInit; } - static StringInit *get(StringRef, StringFormat Fmt = SF_String); + static StringInit *get(RecordKeeper &RK, StringRef, + StringFormat Fmt = SF_String); static StringFormat determineFormat(StringFormat Fmt1, StringFormat Fmt2) { return (Fmt1 == SF_Code || Fmt2 == SF_Code) ? SF_Code : SF_String; @@ -678,7 +699,7 @@ public: private: explicit ListInit(unsigned N, RecTy *EltTy) - : TypedInit(IK_ListInit, ListRecTy::get(EltTy)), NumValues(N) {} + : TypedInit(IK_ListInit, ListRecTy::get(EltTy)), NumValues(N) {} public: ListInit(const ListInit &) = delete; @@ -1049,8 +1070,8 @@ private: Init *Expr; IsAOpInit(RecTy *CheckType, Init *Expr) - : TypedInit(IK_IsAOpInit, IntRecTy::get()), CheckType(CheckType), - Expr(Expr) {} + : TypedInit(IK_IsAOpInit, IntRecTy::get(CheckType->getRecordKeeper())), + CheckType(CheckType), Expr(Expr) {} public: IsAOpInit(const IsAOpInit &) = delete; @@ -1075,6 +1096,40 @@ public: std::string getAsString() const override; }; +/// !exists(expr) - Dynamically determine if a record of `type` named +/// `expr` exists. +class ExistsOpInit : public TypedInit, public FoldingSetNode { +private: + RecTy *CheckType; + Init *Expr; + + ExistsOpInit(RecTy *CheckType, Init *Expr) + : TypedInit(IK_ExistsOpInit, IntRecTy::get(CheckType->getRecordKeeper())), + CheckType(CheckType), Expr(Expr) {} + +public: + ExistsOpInit(const ExistsOpInit &) = delete; + ExistsOpInit &operator=(const ExistsOpInit &) = delete; + + static bool classof(const Init *I) { return I->getKind() == IK_ExistsOpInit; } + + static ExistsOpInit *get(RecTy *CheckType, Init *Expr); + + void Profile(FoldingSetNodeID &ID) const; + + // Fold - If possible, fold this to a simpler init. Return this if not + // possible to fold. + Init *Fold(Record *CurRec, bool IsFinal = false) const; + + bool isComplete() const override { return false; } + + Init *resolveReferences(Resolver &R) const override; + + Init *getBit(unsigned Bit) const override; + + std::string getAsString() const override; +}; + /// 'Opcode' - Represent a reference to an entire variable object. class VarInit : public TypedInit { Init *VarName; @@ -1118,7 +1173,8 @@ class VarBitInit final : public TypedInit { unsigned Bit; VarBitInit(TypedInit *T, unsigned B) - : TypedInit(IK_VarBitInit, BitRecTy::get()), TI(T), Bit(B) { + : TypedInit(IK_VarBitInit, BitRecTy::get(T->getRecordKeeper())), TI(T), + Bit(B) { assert(T->getType() && (isa(T->getType()) || (isa(T->getType()) && @@ -1223,8 +1279,7 @@ class VarDefInit final : public TypedInit, public FoldingSetNode, DefInit *Def = nullptr; // after instantiation unsigned NumArgs; - explicit VarDefInit(Record *Class, unsigned N) - : TypedInit(IK_VarDefInit, RecordRecTy::get(Class)), Class(Class), NumArgs(N) {} + explicit VarDefInit(Record *Class, unsigned N); DefInit *instantiate(); @@ -1321,8 +1376,8 @@ class DagInit final : public TypedInit, public FoldingSetNode, unsigned NumArgNames; DagInit(Init *V, StringInit *VN, unsigned NumArgs, unsigned NumArgNames) - : TypedInit(IK_DagInit, DagRecTy::get()), Val(V), ValName(VN), - NumArgs(NumArgs), NumArgNames(NumArgNames) {} + : TypedInit(IK_DagInit, DagRecTy::get(V->getRecordKeeper())), Val(V), + ValName(VN), NumArgs(NumArgs), NumArgNames(NumArgNames) {} size_t numTrailingObjects(OverloadToken) const { return NumArgs; } @@ -1427,6 +1482,9 @@ public: RecordVal(Init *N, RecTy *T, FieldKind K); RecordVal(Init *N, SMLoc Loc, RecTy *T, FieldKind K); + /// Get the record keeper used to unique this value. + RecordKeeper &getRecordKeeper() const { return Name->getRecordKeeper(); } + /// Get the name of the field as a StringRef. StringRef getName() const; @@ -1527,13 +1585,14 @@ public: explicit Record(Init *N, ArrayRef locs, RecordKeeper &records, bool Anonymous = false, bool Class = false) : Name(N), Locs(locs.begin(), locs.end()), TrackedRecords(records), - ID(getNewUID()), IsAnonymous(Anonymous), IsClass(Class) { + ID(getNewUID(N->getRecordKeeper())), IsAnonymous(Anonymous), + IsClass(Class) { checkName(); } explicit Record(StringRef N, ArrayRef locs, RecordKeeper &records, bool Class = false) - : Record(StringInit::get(N), locs, records, false, Class) {} + : Record(StringInit::get(records, N), locs, records, false, Class) {} // When copy-constructing a Record, we must still guarantee a globally unique // ID number. Don't copy CorrespondingDefInit either, since it's owned by the @@ -1542,9 +1601,10 @@ public: : Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs), Values(O.Values), Assertions(O.Assertions), SuperClasses(O.SuperClasses), TrackedRecords(O.TrackedRecords), - ID(getNewUID()), IsAnonymous(O.IsAnonymous), IsClass(O.IsClass) {} + ID(getNewUID(O.getRecords())), IsAnonymous(O.IsAnonymous), + IsClass(O.IsClass) {} - static unsigned getNewUID(); + static unsigned getNewUID(RecordKeeper &RK); unsigned getID() const { return ID; } @@ -1600,7 +1660,7 @@ public: } const RecordVal *getValue(StringRef Name) const { - return getValue(StringInit::get(Name)); + return getValue(StringInit::get(getRecords(), Name)); } RecordVal *getValue(const Init *Name) { @@ -1631,7 +1691,7 @@ public: } void removeValue(StringRef Name) { - removeValue(StringInit::get(Name)); + removeValue(StringInit::get(getRecords(), Name)); } void addAssertion(SMLoc Loc, Init *Condition, Init *Message) { @@ -1671,11 +1731,11 @@ public: SuperClasses.push_back(std::make_pair(R, Range)); } - /// If there are any field references that refer to fields - /// that have been filled in, we can propagate the values now. + /// If there are any field references that refer to fields that have been + /// filled in, we can propagate the values now. /// - /// This is a final resolve: any error messages, e.g. due to undefined - /// !cast references, are generated now. + /// This is a final resolve: any error messages, e.g. due to undefined !cast + /// references, are generated now. void resolveReferences(Init *NewName = nullptr); /// Apply the resolver to the name of the record as well as to the @@ -1699,11 +1759,11 @@ public: // High-level methods useful to tablegen back-ends // - ///Return the source location for the named field. + /// Return the source location for the named field. SMLoc getFieldLoc(StringRef FieldName) const; - /// Return the initializer for a value with the specified name, - /// or throw an exception if the field does not exist. + /// Return the initializer for a value with the specified name, or throw an + /// exception if the field does not exist. Init *getValueInit(StringRef FieldName) const; /// Return true if the named field is unset. @@ -1711,96 +1771,85 @@ public: return isa(getValueInit(FieldName)); } - /// This method looks up the specified field and returns - /// its value as a string, throwing an exception if the field does not exist - /// or if the value is not a string. + /// This method looks up the specified field and returns its value as a + /// string, throwing an exception if the field does not exist or if the value + /// is not a string. StringRef getValueAsString(StringRef FieldName) const; - /// This method looks up the specified field and returns - /// its value as a string, throwing an exception if the field if the value is - /// not a string and llvm::Optional() if the field does not exist. + /// This method looks up the specified field and returns its value as a + /// string, throwing an exception if the value is not a string and + /// llvm::Optional() if the field does not exist. llvm::Optional getValueAsOptionalString(StringRef FieldName) const; - /// This method looks up the specified field and returns - /// its value as a BitsInit, throwing an exception if the field does not exist - /// or if the value is not the right type. + /// This method looks up the specified field and returns its value as a + /// BitsInit, throwing an exception if the field does not exist or if the + /// value is not the right type. BitsInit *getValueAsBitsInit(StringRef FieldName) const; - /// This method looks up the specified field and returns - /// its value as a ListInit, throwing an exception if the field does not exist - /// or if the value is not the right type. + /// This method looks up the specified field and returns its value as a + /// ListInit, throwing an exception if the field does not exist or if the + /// value is not the right type. ListInit *getValueAsListInit(StringRef FieldName) const; - /// This method looks up the specified field and - /// returns its value as a vector of records, throwing an exception if the - /// field does not exist or if the value is not the right type. + /// This method looks up the specified field and returns its value as a + /// vector of records, throwing an exception if the field does not exist or + /// if the value is not the right type. std::vector getValueAsListOfDefs(StringRef FieldName) const; - /// This method looks up the specified field and - /// returns its value as a vector of integers, throwing an exception if the - /// field does not exist or if the value is not the right type. + /// This method looks up the specified field and returns its value as a + /// vector of integers, throwing an exception if the field does not exist or + /// if the value is not the right type. std::vector getValueAsListOfInts(StringRef FieldName) const; - /// This method looks up the specified field and - /// returns its value as a vector of strings, throwing an exception if the - /// field does not exist or if the value is not the right type. + /// This method looks up the specified field and returns its value as a + /// vector of strings, throwing an exception if the field does not exist or + /// if the value is not the right type. std::vector getValueAsListOfStrings(StringRef FieldName) const; - /// This method looks up the specified field and returns its - /// value as a Record, throwing an exception if the field does not exist or if - /// the value is not the right type. + /// This method looks up the specified field and returns its value as a + /// Record, throwing an exception if the field does not exist or if the value + /// is not the right type. Record *getValueAsDef(StringRef FieldName) const; /// This method looks up the specified field and returns its value as a - /// Record, returning null if the field exists but is "uninitialized" - /// (i.e. set to `?`), and throwing an exception if the field does not - /// exist or if its value is not the right type. + /// Record, returning null if the field exists but is "uninitialized" (i.e. + /// set to `?`), and throwing an exception if the field does not exist or if + /// its value is not the right type. Record *getValueAsOptionalDef(StringRef FieldName) const; - /// This method looks up the specified field and returns its - /// value as a bit, throwing an exception if the field does not exist or if - /// the value is not the right type. + /// This method looks up the specified field and returns its value as a bit, + /// throwing an exception if the field does not exist or if the value is not + /// the right type. bool getValueAsBit(StringRef FieldName) const; - /// This method looks up the specified field and - /// returns its value as a bit. If the field is unset, sets Unset to true and - /// returns false. + /// This method looks up the specified field and returns its value as a bit. + /// If the field is unset, sets Unset to true and returns false. bool getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const; - /// This method looks up the specified field and returns its - /// value as an int64_t, throwing an exception if the field does not exist or - /// if the value is not the right type. + /// This method looks up the specified field and returns its value as an + /// int64_t, throwing an exception if the field does not exist or if the + /// value is not the right type. int64_t getValueAsInt(StringRef FieldName) const; - /// This method looks up the specified field and returns its - /// value as an Dag, throwing an exception if the field does not exist or if - /// the value is not the right type. + /// This method looks up the specified field and returns its value as an Dag, + /// throwing an exception if the field does not exist or if the value is not + /// the right type. DagInit *getValueAsDag(StringRef FieldName) const; }; raw_ostream &operator<<(raw_ostream &OS, const Record &R); class RecordKeeper { - friend class RecordRecTy; - using RecordMap = std::map, std::less<>>; using GlobalMap = std::map>; - std::string InputFilename; - RecordMap Classes, Defs; - mutable StringMap> ClassRecordsMap; - FoldingSet RecordTypePool; - std::map> ExtraGlobals; - unsigned AnonCounter = 0; +public: + RecordKeeper(); + ~RecordKeeper(); - // These members are for the phase timing feature. We need a timer group, - // the last timer started, and a flag to say whether the last timer - // is the special "backend overall timer." - TimerGroup *TimingGroup = nullptr; - Timer *LastTimer = nullptr; - bool BackendTimer = false; + /// Return the internal implementation of the RecordKeeper. + detail::RecordKeeperImpl &getImpl() { return *Impl; } -public: /// Get the main TableGen input file's name. const std::string getInputFilename() const { return InputFilename; } @@ -1896,7 +1945,33 @@ public: std::vector getAllDerivedDefinitions( ArrayRef ClassNames) const; + /// Get all the concrete records that inherit from specified class, if the + /// class is defined. Returns an empty vector if the class is not defined. + std::vector + getAllDerivedDefinitionsIfDefined(StringRef ClassName) const; + void dump() const; + +private: + RecordKeeper(RecordKeeper &&) = delete; + RecordKeeper(const RecordKeeper &) = delete; + RecordKeeper &operator=(RecordKeeper &&) = delete; + RecordKeeper &operator=(const RecordKeeper &) = delete; + + std::string InputFilename; + RecordMap Classes, Defs; + mutable StringMap> ClassRecordsMap; + GlobalMap ExtraGlobals; + + // These members are for the phase timing feature. We need a timer group, + // the last timer started, and a flag to say whether the last timer + // is the special "backend overall timer." + TimerGroup *TimingGroup = nullptr; + Timer *LastTimer = nullptr; + bool BackendTimer = false; + + /// The internal uniquer implementation of the RecordKeeper. + std::unique_ptr Impl; }; /// Sorting predicate to sort record pointers by name. diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h index f84889392d13..7a6d91061701 100644 --- a/llvm/include/llvm/Target/CGPassBuilderOption.h +++ b/llvm/include/llvm/Target/CGPassBuilderOption.h @@ -42,6 +42,7 @@ struct CGPassBuilderOption { bool DisableMergeICmps = false; bool DisablePartialLibcallInlining = false; bool DisableConstantHoisting = false; + bool DisableSelectOptimize = true; bool PrintISelInput = false; bool PrintGCInfo = false; bool RequiresCodeGenSCCOrder = false; diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index 2af20ab6a53f..3e2f18b57d1e 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -745,6 +745,13 @@ def G_FCANONICALIZE : GenericInstruction { let hasSideEffects = false; } +// Generic opcode equivalent to the llvm.is_fpclass intrinsic. +def G_IS_FPCLASS: GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src, unknown:$test, unknown:$fpsem); + let hasSideEffects = false; +} + // FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two // values. // @@ -965,6 +972,12 @@ def G_FNEARBYINT : GenericInstruction { //------------------------------------------------------------------------------ // Opcodes for LLVM Intrinsics //------------------------------------------------------------------------------ +def G_INTRINSIC_FPTRUNC_ROUND : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src1, i32imm:$round_mode); + let hasSideEffects = false; +} + def G_INTRINSIC_TRUNC : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 4859cf6b57b7..89f08d200021 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -118,6 +118,7 @@ def int64_matchinfo: GIDefMatchData<"int64_t">; def apint_matchinfo : GIDefMatchData<"APInt">; def build_fn_matchinfo : GIDefMatchData<"std::function">; +def unsigned_matchinfo: GIDefMatchData<"unsigned">; def copy_prop : GICombineRule< (defs root:$d), @@ -234,6 +235,12 @@ def binop_left_undef_to_zero: GICombineRule< [{ return Helper.matchOperandIsUndef(*${root}, 1); }]), (apply [{ Helper.replaceInstWithConstant(*${root}, 0); }])>; +def binop_right_undef_to_undef: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_SHL, G_ASHR, G_LSHR):$root, + [{ return Helper.matchOperandIsUndef(*${root}, 2); }]), + (apply [{ Helper.replaceInstWithUndef(*${root}); }])>; + // Instructions where if any source operand is undef, the instruction can be // replaced with undef. def propagate_undef_any_op: GICombineRule< @@ -283,6 +290,13 @@ def select_constant_cmp: GICombineRule< (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, ${matchinfo}); }]) >; +def select_to_logical : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_SELECT):$root, + [{ return Helper.matchSelectToLogical(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }]) +>; + // Fold x op 0 -> x def right_identity_zero: GICombineRule< (defs root:$root), @@ -323,6 +337,26 @@ def urem_pow2_to_mask : GICombineRule< (apply [{ Helper.applySimplifyURemByPow2(*${root}); }]) >; +// Push a binary operator through a select on constants. +// +// binop (select cond, K0, K1), K2 -> +// select cond, (binop K0, K2), (binop K1, K2) + +// Every binary operator that has constant folding. We currently do +// not have constant folding for G_FPOW, G_FMAXNUM_IEEE or +// G_FMINNUM_IEEE. +def fold_binop_into_select : GICombineRule< + (defs root:$root, unsigned_matchinfo:$select_op_no), + (match (wip_match_opcode + G_ADD, G_SUB, G_PTR_ADD, G_AND, G_OR, G_XOR, + G_SDIV, G_SREM, G_UDIV, G_UREM, G_LSHR, G_ASHR, G_SHL, + G_SMIN, G_SMAX, G_UMIN, G_UMAX, + G_FMUL, G_FADD, G_FSUB, G_FDIV, G_FREM, + G_FMINNUM, G_FMAXNUM, G_FMINIMUM, G_FMAXIMUM):$root, + [{ return Helper.matchFoldBinOpIntoSelect(*${root}, ${select_op_no}); }]), + (apply [{ return Helper.applyFoldBinOpIntoSelect(*${root}, ${select_op_no}); }]) +>; + // Transform d = [su]div(x, y) and r = [su]rem(x, y) - > d, r = [su]divrem(x, y) def div_rem_to_divrem_matchdata : GIDefMatchData<"MachineInstr *">; def div_rem_to_divrem : GICombineRule< @@ -753,6 +787,18 @@ def mulo_by_2: GICombineRule< [{ return Helper.matchMulOBy2(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; +def mulo_by_0: GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_UMULO, G_SMULO):$root, + [{ return Helper.matchMulOBy0(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + +def addo_by_0: GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_UADDO, G_SADDO):$root, + [{ return Helper.matchAddOBy0(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + def mulh_to_lshr : GICombineRule< (defs root:$root), (match (wip_match_opcode G_UMULH):$root, @@ -845,10 +891,26 @@ def combine_fsub_fpext_fneg_fmul_to_fmad_or_fma: GICombineRule< *${root}, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; +def combine_minmax_nan: GICombineRule< + (defs root:$root, unsigned_matchinfo:$info), + (match (wip_match_opcode G_FMINNUM, G_FMAXNUM, G_FMINIMUM, G_FMAXIMUM):$root, + [{ return Helper.matchCombineFMinMaxNaN(*${root}, ${info}); }]), + (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, ${info}); }])>; + +// Transform (add x, (sub y, x)) -> y +// Transform (add (sub y, x), x) -> y +def add_sub_reg: GICombineRule < + (defs root:$root, register_matchinfo:$matchinfo), + (match (wip_match_opcode G_ADD):$root, + [{ return Helper.matchAddSubSameReg(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, + ${matchinfo}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, binop_left_undef_to_zero, + binop_right_undef_to_undef, propagate_undef_any_op, propagate_undef_all_ops, propagate_undef_shuffle_mask, @@ -859,10 +921,12 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero, binop_same_val, binop_left_to_zero, binop_right_to_zero, p2i_to_i2p, i2p_to_p2i, anyext_trunc_fold, - fneg_fneg_fold, right_identity_one]>; + fneg_fneg_fold, right_identity_one, + add_sub_reg]>; def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p, - overlapping_and, mulo_by_2]>; + overlapping_and, mulo_by_2, mulo_by_0, + addo_by_0, combine_minmax_nan]>; def known_bits_simplifications : GICombineGroup<[ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask, @@ -873,7 +937,8 @@ def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend, def phi_combines : GICombineGroup<[extend_through_phis]>; -def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>; +def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp, + select_to_logical]>; def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd, mul_by_neg_one]>; @@ -900,7 +965,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, truncstore_merge, div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract, constant_fold, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, - and_or_disjoint_mask, fma_combines]>; + and_or_disjoint_mask, fma_combines, fold_binop_into_select]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index d8faa63ee877..c5b2462dc868 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -279,6 +279,8 @@ class RegisterClass regTypes, int alignment, // heuristic. Classes with higher priority values are assigned first. This is // useful as it is sometimes beneficial to assign registers to highly // constrained classes first. The value has to be in the range [0,63]. + // Values >= 32 should be used with care since they may overlap with other + // fields in the allocator's priority heuristics. int AllocationPriority = 0; // Generate register pressure set for this register class and any class @@ -389,6 +391,14 @@ class RegisterTuples Indices, list Regs, list RegAsmNames = RegNames; } +// RegisterCategory - This class is a list of RegisterClasses that belong to a +// general cateogry --- e.g. "general purpose" or "fixed" registers. This is +// useful for identifying registers in a generic way instead of having +// information about a specific target's registers. +class RegisterCategory classes> { + // Classes - A list of register classes that fall within the category. + list Classes = classes; +} //===----------------------------------------------------------------------===// // DwarfRegNum - This class provides a mapping of the llvm register enumeration @@ -560,6 +570,9 @@ class Instruction : InstructionEncoding { bit isPseudo = false; // Is this instruction a pseudo-instruction? // If so, won't have encoding information for // the [MC]CodeEmitter stuff. + bit isMeta = false; // Is this instruction a meta-instruction? + // If so, won't produce any output in the form of + // executable instructions bit isExtractSubreg = false; // Is this instruction a kind of extract subreg? // If so, make sure to override // TargetInstrInfo::getExtractSubregLikeInputs. @@ -748,6 +761,33 @@ def ins; /// of operands. def variable_ops; +/// variable-length instruction encoding utilities. +/// The `ascend` operator should be used like this: +/// (ascend 0b0010, 0b1101) +/// Which represent a seqence of encoding fragments placing from LSB to MSB. +/// Thus, in this case the final encoding will be 0b1101_0010. +/// The arguments for `ascend` can either be `bits` or another DAG. +def ascend; +/// In addition, we can use `descend` to describe an encoding that places +/// its arguments (i.e. encoding fragments) from MSB to LSB. For instance: +/// (descend 0b0010, 0b1101) +/// This results in an encoding of 0b0010_1101. +def descend; +/// The `operand` operator should be used like this: +/// (operand "$src", 4) +/// Which represents a 4-bit encoding for an instruction operand named `$src`. +def operand; +/// Similar to `operand`, we can reference only part of the operand's encoding: +/// (slice "$src", 6, 8) +/// (slice "$src", 8, 6) +/// Both DAG represent bit 6 to 8 (total of 3 bits) in the encoding of operand +/// `$src`. +def slice; +/// You can use `encoder` to specify a custom encoder function for a specific +/// `operand` or `encoder` directive. For example: +/// (operand "$src", 4, (encoder "encodeMyImm")) +/// (slice "$src", 8, 6, (encoder "encodeMyReg")) +def encoder; /// PointerLikeRegClass - Values that are designed to have pointer width are /// derived from this. TableGen treats the register class as having a symbolic @@ -1064,6 +1104,7 @@ def CFI_INSTRUCTION : StandardPseudoInstruction { let hasCtrlDep = true; let hasSideEffects = false; let isNotDuplicable = true; + let isMeta = true; } def EH_LABEL : StandardPseudoInstruction { let OutOperandList = (outs); @@ -1072,6 +1113,7 @@ def EH_LABEL : StandardPseudoInstruction { let hasCtrlDep = true; let hasSideEffects = false; let isNotDuplicable = true; + let isMeta = true; } def GC_LABEL : StandardPseudoInstruction { let OutOperandList = (outs); @@ -1080,6 +1122,7 @@ def GC_LABEL : StandardPseudoInstruction { let hasCtrlDep = true; let hasSideEffects = false; let isNotDuplicable = true; + let isMeta = true; } def ANNOTATION_LABEL : StandardPseudoInstruction { let OutOperandList = (outs); @@ -1094,6 +1137,7 @@ def KILL : StandardPseudoInstruction { let InOperandList = (ins variable_ops); let AsmString = ""; let hasSideEffects = false; + let isMeta = true; } def EXTRACT_SUBREG : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); @@ -1115,6 +1159,7 @@ def IMPLICIT_DEF : StandardPseudoInstruction { let hasSideEffects = false; let isReMaterializable = true; let isAsCheapAsAMove = true; + let isMeta = true; } def SUBREG_TO_REG : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); @@ -1134,30 +1179,35 @@ def DBG_VALUE : StandardPseudoInstruction { let InOperandList = (ins variable_ops); let AsmString = "DBG_VALUE"; let hasSideEffects = false; + let isMeta = true; } def DBG_VALUE_LIST : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = "DBG_VALUE_LIST"; let hasSideEffects = 0; + let isMeta = true; } def DBG_INSTR_REF : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = "DBG_INSTR_REF"; let hasSideEffects = false; + let isMeta = true; } def DBG_PHI : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = "DBG_PHI"; let hasSideEffects = 0; + let isMeta = true; } def DBG_LABEL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins unknown:$label); let AsmString = "DBG_LABEL"; let hasSideEffects = false; + let isMeta = true; } def REG_SEQUENCE : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); @@ -1185,18 +1235,21 @@ def LIFETIME_START : StandardPseudoInstruction { let InOperandList = (ins i32imm:$id); let AsmString = "LIFETIME_START"; let hasSideEffects = false; + let isMeta = true; } def LIFETIME_END : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = "LIFETIME_END"; let hasSideEffects = false; + let isMeta = true; } def PSEUDO_PROBE : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i64imm:$guid, i64imm:$index, i8imm:$type, i32imm:$attr); let AsmString = "PSEUDO_PROBE"; let hasSideEffects = 1; + let isMeta = true; } def ARITH_FENCE : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); @@ -1204,6 +1257,7 @@ def ARITH_FENCE : StandardPseudoInstruction { let AsmString = ""; let hasSideEffects = false; let Constraints = "$src = $dst"; + let isMeta = true; } def STACKMAP : StandardPseudoInstruction { diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h index 392ee4334cb5..0c09cfe68478 100644 --- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -20,6 +20,7 @@ namespace llvm { +struct Align; class Constant; class DataLayout; class Function; @@ -276,7 +277,7 @@ public: } /// If supported, return the function entry point symbol. - /// Otherwise, returns nulltpr. + /// Otherwise, returns nullptr. /// Func must be a function or an alias which has a function as base object. virtual MCSymbol *getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const { diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h index acfb265a9ff9..bf37ad7010ec 100644 --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -18,7 +18,6 @@ #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Error.h" #include "llvm/Support/PGOOptions.h" @@ -30,8 +29,6 @@ namespace llvm { class AAManager; -template -class PassManager; using ModulePassManager = PassManager; class Function; @@ -225,7 +222,10 @@ public: /// Returns the code model. The choices are small, kernel, medium, large, and /// target default. - CodeModel::Model getCodeModel() const; + CodeModel::Model getCodeModel() const { return CMModel; } + + /// Set the code model. + void setCodeModel(CodeModel::Model CM) { CMModel = CM; } bool isPositionIndependent() const; @@ -260,6 +260,8 @@ public: Options.SupportsDebugEntryValues = Enable; } + void setCFIFixup(bool Enable) { Options.EnableCFIFixup = Enable; } + bool getAIXExtendedAltivecABI() const { return Options.EnableAIXExtendedAltivecABI; } @@ -337,13 +339,13 @@ public: /// This is used to construct the new pass manager's target IR analysis pass, /// set up appropriately for this target machine. Even the old pass manager /// uses this to answer queries about the IR. - TargetIRAnalysis getTargetIRAnalysis(); + TargetIRAnalysis getTargetIRAnalysis() const; /// Return a TargetTransformInfo for a given function. /// /// The returned TargetTransformInfo is specialized to the subtarget /// corresponding to \p F. - virtual TargetTransformInfo getTargetTransformInfo(const Function &F); + virtual TargetTransformInfo getTargetTransformInfo(const Function &F) const; /// Allow the target to modify the pass manager, e.g. by calling /// PassManagerBuilder::addExtension. @@ -398,6 +400,12 @@ public: virtual unsigned getSjLjDataSize() const { return DefaultSjLjDataSize; } static std::pair parseBinutilsVersion(StringRef Version); + + /// getAddressSpaceForPseudoSourceKind - Given the kind of memory + /// (e.g. stack) the target returns the corresponding address space. + virtual unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const { + return 0; + } }; /// This class describes a target machine that is implemented with the LLVM @@ -417,7 +425,7 @@ public: /// /// The TTI returned uses the common code generator to answer queries about /// the IR. - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; /// Create a pass configuration object to be used by addPassToEmitX methods /// for generating a pipeline of CodeGen passes. diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index a636c4822832..6083d18d96f7 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -130,19 +130,21 @@ namespace llvm { HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), GuaranteedTailCallOpt(false), StackSymbolOrdering(true), EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false), - DisableIntegratedAS(false), RelaxELFRelocations(false), - FunctionSections(false), DataSections(false), - IgnoreXCOFFVisibility(false), XCOFFTracebackTable(true), - UniqueSectionNames(true), UniqueBasicBlockSectionNames(false), - TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0), - EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false), + LowerGlobalDtorsViaCxaAtExit(false), DisableIntegratedAS(false), + RelaxELFRelocations(false), FunctionSections(false), + DataSections(false), IgnoreXCOFFVisibility(false), + XCOFFTracebackTable(true), UniqueSectionNames(true), + UniqueBasicBlockSectionNames(false), TrapUnreachable(false), + NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false), + ExplicitEmulatedTLS(false), EnableIPRA(false), EmitStackSizeSection(false), EnableMachineOutliner(false), EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false), EmitAddrsig(false), EmitCallSiteInfo(false), SupportsDebugEntryValues(false), EnableDebugEntryValues(false), ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false), DebugStrictDwarf(false), - Hotpatch(false), + Hotpatch(false), PPCGenScalarMASSEntries(false), JMCInstrument(false), + EnableCFIFixup(false), MisExpect(false), FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {} /// DisableFramePointerElim - This returns true if frame pointer elimination @@ -245,6 +247,10 @@ namespace llvm { /// constructors. unsigned UseInitArray : 1; + /// Use __cxa_atexit to register global destructors; determines how + /// llvm.global_dtors is lowered. + unsigned LowerGlobalDtorsViaCxaAtExit : 1; + /// Disable the integrated assembler. unsigned DisableIntegratedAS : 1; @@ -345,6 +351,19 @@ namespace llvm { /// Emit the hotpatch flag in CodeView debug. unsigned Hotpatch : 1; + /// Enables scalar MASS conversions + unsigned PPCGenScalarMASSEntries : 1; + + /// Enable JustMyCode instrumentation. + unsigned JMCInstrument : 1; + + /// Enable the CFIFixup pass. + unsigned EnableCFIFixup : 1; + + /// When set to true, enable MisExpect Diagnostics + /// By default, it is set to false + unsigned MisExpect : 1; + /// Name of the stack usage file (i.e., .su file) if user passes /// -fstack-usage. If empty, it can be implied that -fstack-usage is not /// passed on the command line. diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index d8ef7c49a5f9..47b686aca7b5 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -238,6 +238,16 @@ def SDTMaskedLoad: SDTypeProfile<1, 4, [ // masked load SDTCisSameNumEltsAs<0, 3> ]>; +def SDTMaskedGather : SDTypeProfile<1, 4, [ + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVec<2>, SDTCisPtrTy<3>, SDTCisVec<4>, + SDTCisSameNumEltsAs<0, 2>, SDTCisSameNumEltsAs<0, 4> +]>; + +def SDTMaskedScatter : SDTypeProfile<0, 4, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, + SDTCisSameNumEltsAs<0, 1>, SDTCisSameNumEltsAs<0, 3> +]>; + def SDTVecShuffle : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>; @@ -365,6 +375,10 @@ def mul : SDNode<"ISD::MUL" , SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def mulhs : SDNode<"ISD::MULHS" , SDTIntBinOp, [SDNPCommutative]>; def mulhu : SDNode<"ISD::MULHU" , SDTIntBinOp, [SDNPCommutative]>; +def avgfloors : SDNode<"ISD::AVGFLOORS" , SDTIntBinOp, [SDNPCommutative]>; +def avgflooru : SDNode<"ISD::AVGFLOORU" , SDTIntBinOp, [SDNPCommutative]>; +def avgceils : SDNode<"ISD::AVGCEILS" , SDTIntBinOp, [SDNPCommutative]>; +def avgceilu : SDNode<"ISD::AVGCEILU" , SDTIntBinOp, [SDNPCommutative]>; def abds : SDNode<"ISD::ABDS" , SDTIntBinOp, [SDNPCommutative]>; def abdu : SDNode<"ISD::ABDU" , SDTIntBinOp, [SDNPCommutative]>; def smullohi : SDNode<"ISD::SMUL_LOHI" , SDTIntBinHiLoOp, [SDNPCommutative]>; @@ -648,6 +662,12 @@ def masked_st : SDNode<"ISD::MSTORE", SDTMaskedStore, def masked_ld : SDNode<"ISD::MLOAD", SDTMaskedLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def masked_gather : SDNode<"ISD::MGATHER", SDTMaskedGather, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +def masked_scatter : SDNode<"ISD::MSCATTER", SDTMaskedScatter, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + // Do not use ld, st directly. Use load, extload, sextload, zextload, store, // and truncst (see below). def ld : SDNode<"ISD::LOAD" , SDTLoad, @@ -1624,6 +1644,124 @@ def atomic_load_64 : let MemoryVT = i64; } +def nonext_masked_gather : + PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), + (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ + return cast(N)->getExtensionType() == ISD::NON_EXTLOAD; +}]>; + +// Any extending masked gather fragments. +def ext_masked_gather_i8 : + PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), + (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ + auto MGN = cast(N); + return MGN->getExtensionType() == ISD::EXTLOAD && + MGN->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def ext_masked_gather_i16 : + PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), + (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ + auto MGN = cast(N); + return MGN->getExtensionType() == ISD::EXTLOAD && + MGN->getMemoryVT().getScalarType() == MVT::i16; +}]>; +def ext_masked_gather_i32 : + PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), + (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ + auto MGN = cast(N); + return MGN->getExtensionType() == ISD::EXTLOAD && + MGN->getMemoryVT().getScalarType() == MVT::i32; +}]>; + +// Sign extending masked gather fragments. +def sext_masked_gather_i8 : + PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), + (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ + auto MGN = cast(N); + return MGN->getExtensionType() == ISD::SEXTLOAD && + MGN->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def sext_masked_gather_i16 : + PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), + (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ + auto MGN = cast(N); + return MGN->getExtensionType() == ISD::SEXTLOAD && + MGN->getMemoryVT().getScalarType() == MVT::i16; +}]>; +def sext_masked_gather_i32 : + PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), + (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ + auto MGN = cast(N); + return MGN->getExtensionType() == ISD::SEXTLOAD && + MGN->getMemoryVT().getScalarType() == MVT::i32; +}]>; + +// Zero extending masked gather fragments. +def zext_masked_gather_i8 : + PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), + (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ + auto MGN = cast(N); + return MGN->getExtensionType() == ISD::ZEXTLOAD && + MGN->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def zext_masked_gather_i16 : + PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), + (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ + auto MGN = cast(N); + return MGN->getExtensionType() == ISD::ZEXTLOAD && + MGN->getMemoryVT().getScalarType() == MVT::i16; +}]>; +def zext_masked_gather_i32 : + PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), + (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ + auto MGN = cast(N); + return MGN->getExtensionType() == ISD::ZEXTLOAD && + MGN->getMemoryVT().getScalarType() == MVT::i32; +}]>; + +// Any/Zero extending masked gather fragments. +def azext_masked_gather_i8 : + PatFrags<(ops node:$def, node:$pred, node:$ptr, node:$idx), + [(ext_masked_gather_i8 node:$def, node:$pred, node:$ptr, node:$idx), + (zext_masked_gather_i8 node:$def, node:$pred, node:$ptr, node:$idx)]>; +def azext_masked_gather_i16 : + PatFrags<(ops node:$def, node:$pred, node:$ptr, node:$idx), + [(ext_masked_gather_i16 node:$def, node:$pred, node:$ptr, node:$idx), + (zext_masked_gather_i16 node:$def, node:$pred, node:$ptr, node:$idx)]>; +def azext_masked_gather_i32 : + PatFrags<(ops node:$def, node:$pred, node:$ptr, node:$idx), + [(ext_masked_gather_i32 node:$def, node:$pred, node:$ptr, node:$idx), + (zext_masked_gather_i32 node:$def, node:$pred, node:$ptr, node:$idx)]>; + +def nontrunc_masked_scatter : + PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), + (masked_scatter node:$val, node:$pred, node:$ptr, node:$idx), [{ + return !cast(N)->isTruncatingStore(); +}]>; + +// Truncating masked scatter fragments. +def trunc_masked_scatter_i8 : + PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), + (masked_scatter node:$val, node:$pred, node:$ptr, node:$idx), [{ + auto MSN = cast(N); + return MSN->isTruncatingStore() && + MSN->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def trunc_masked_scatter_i16 : + PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), + (masked_scatter node:$val, node:$pred, node:$ptr, node:$idx), [{ + auto MSN = cast(N); + return MSN->isTruncatingStore() && + MSN->getMemoryVT().getScalarType() == MVT::i16; +}]>; +def trunc_masked_scatter_i32 : + PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), + (masked_scatter node:$val, node:$pred, node:$ptr, node:$idx), [{ + auto MSN = cast(N); + return MSN->isTruncatingStore() && + MSN->getMemoryVT().getScalarType() == MVT::i32; +}]>; + //===----------------------------------------------------------------------===// // Selection DAG Pattern Support. // diff --git a/llvm/include/llvm/Testing/Support/SupportHelpers.h b/llvm/include/llvm/Testing/Support/SupportHelpers.h index 2419fc95d817..b1c59cf97f7f 100644 --- a/llvm/include/llvm/Testing/Support/SupportHelpers.h +++ b/llvm/include/llvm/Testing/Support/SupportHelpers.h @@ -77,7 +77,7 @@ public: bool MatchAndExplain(const llvm::Optional &Input, testing::MatchResultListener *L) const override { - return Input && ValueMatcher.MatchAndExplain(Input.getValue(), L); + return Input && ValueMatcher.MatchAndExplain(*Input, L); } void DescribeTo(std::ostream *OS) const override { @@ -238,6 +238,12 @@ public: } } + TempFile(const TempFile &) = delete; + TempFile &operator=(const TempFile &) = delete; + + TempFile(TempFile &&) = default; + TempFile &operator=(TempFile &&) = default; + /// The path to the file. StringRef path() const { return Path; } }; diff --git a/llvm/include/llvm/TextAPI/Symbol.h b/llvm/include/llvm/TextAPI/Symbol.h index dfc84908bba2..1c25295b299d 100644 --- a/llvm/include/llvm/TextAPI/Symbol.h +++ b/llvm/include/llvm/TextAPI/Symbol.h @@ -11,7 +11,6 @@ #include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TextAPI/ArchitectureSet.h" #include "llvm/TextAPI/Target.h" diff --git a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h index 072ccf7320e8..3931c9c55c07 100644 --- a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h +++ b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h @@ -18,10 +18,12 @@ #define LLVM_TRANSFORMS_AGGRESSIVEINSTCOMBINE_AGGRESSIVEINSTCOMBINE_H #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" namespace llvm { +class Function; +class FunctionPass; + class AggressiveInstCombinePass : public PassInfoMixin { diff --git a/llvm/include/llvm/Transforms/Coroutines.h b/llvm/include/llvm/Transforms/Coroutines.h deleted file mode 100644 index f68ef705fdef..000000000000 --- a/llvm/include/llvm/Transforms/Coroutines.h +++ /dev/null @@ -1,37 +0,0 @@ -//===-- Coroutines.h - Coroutine Transformations ----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Declare accessor functions for coroutine lowering passes. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_COROUTINES_H -#define LLVM_TRANSFORMS_COROUTINES_H - -namespace llvm { - -class Pass; -class PassManagerBuilder; - -/// Add all coroutine passes to appropriate extension points. -void addCoroutinePassesToExtensionPoints(PassManagerBuilder &Builder); - -/// Lower coroutine intrinsics that are not needed by later passes. -Pass *createCoroEarlyLegacyPass(); - -/// Split up coroutines into multiple functions driving their state machines. -Pass *createCoroSplitLegacyPass(bool IsOptimizing = false); - -/// Analyze coroutines use sites, devirtualize resume/destroy calls and elide -/// heap allocation for coroutine frame where possible. -Pass *createCoroElideLegacyPass(); - -/// Lower all remaining coroutine intrinsics. -Pass *createCoroCleanupLegacyPass(); - -} - -#endif diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h b/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h index 7ecdc050335d..3000a38258f4 100644 --- a/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h +++ b/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h @@ -18,10 +18,10 @@ namespace llvm { -class Function; +class Module; struct CoroCleanupPass : PassInfoMixin { - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroConditionalWrapper.h b/llvm/include/llvm/Transforms/Coroutines/CoroConditionalWrapper.h new file mode 100644 index 000000000000..ea19ec533c4d --- /dev/null +++ b/llvm/include/llvm/Transforms/Coroutines/CoroConditionalWrapper.h @@ -0,0 +1,30 @@ +//===---- CoroConditionalWrapper.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_COROUTINES_COROCONDITIONALWRAPPER_H +#define LLVM_TRANSFORMS_COROUTINES_COROCONDITIONALWRAPPER_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; + +// Only runs passes in the contained pass manager if the module contains any +// coroutine intrinsic declarations. +struct CoroConditionalWrapper : PassInfoMixin { + CoroConditionalWrapper(ModulePassManager &&); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } + +private: + ModulePassManager PM; +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_COROUTINES_COROCONDITIONALWRAPPER_H diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h b/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h index 3f5ec2abd172..d55dcc6dfa6d 100644 --- a/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h +++ b/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h @@ -21,10 +21,10 @@ namespace llvm { -class Function; +class Module; struct CoroEarlyPass : PassInfoMixin { - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index 67b9a93c47b2..6b7d4f4821f0 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -151,13 +151,6 @@ ModulePass *createDeadArgEliminationPass(); /// bugpoint. ModulePass *createDeadArgHackingPass(); -//===----------------------------------------------------------------------===// -/// createArgumentPromotionPass - This pass promotes "by reference" arguments to -/// be passed by value if the number of elements passed is smaller or -/// equal to maxElements (maxElements == 0 means always promote). -/// -Pass *createArgumentPromotionPass(unsigned maxElements = 3); - //===----------------------------------------------------------------------===// /// createOpenMPOptLegacyPass - OpenMP specific optimizations. Pass *createOpenMPOptCGSCCLegacyPass(); diff --git a/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h b/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h index 78b2f909f1c9..252cfd4dc5f3 100644 --- a/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h +++ b/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h @@ -15,10 +15,12 @@ #define LLVM_TRANSFORMS_IPO_ALWAYSINLINER_H #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" namespace llvm { +class Module; +class Pass; + /// Inlines functions marked as "always_inline". /// /// Note that this does not inline call sites marked as always_inline and does diff --git a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h index 225def99678a..3865f098b8de 100644 --- a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h +++ b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h @@ -14,7 +14,6 @@ #include "llvm/IR/PassManager.h" namespace llvm { -class TargetTransformInfo; /// Argument promotion pass. /// @@ -25,10 +24,7 @@ class ArgumentPromotionPass : public PassInfoMixin { unsigned MaxElements; public: - ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {} - - /// Checks if a type could have padding bytes. - static bool isDenselyPacked(Type *type, const DataLayout &DL); + ArgumentPromotionPass(unsigned MaxElements = 2u) : MaxElements(MaxElements) {} PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 7eee16f71d64..17e29695ab73 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -116,15 +116,24 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/AbstractCallSite.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" +#include + namespace llvm { +class DataLayout; +class LLVMContext; +class Pass; +template class function_ref; struct AADepGraphNode; struct AADepGraph; struct Attributor; @@ -140,6 +149,24 @@ class Function; /// Abstract Attribute helper functions. namespace AA { +/// Flags to distinguish intra-procedural queries from *potentially* +/// inter-procedural queries. Not that information can be valid for both and +/// therefore both bits might be set. +enum ValueScope : uint8_t { + Intraprocedural = 1, + Interprocedural = 2, +}; + +struct ValueAndContext : public std::pair { + using Base = std::pair; + ValueAndContext(const Base &B) : Base(B) {} + ValueAndContext(Value &V, const Instruction *CtxI) : Base(&V, CtxI) {} + ValueAndContext(Value &V, const Instruction &CtxI) : Base(&V, &CtxI) {} + + Value *getValue() const { return this->first; } + const Instruction *getCtxI() const { return this->second; } +}; + /// Return true if \p I is a `nosync` instruction. Use generic reasoning and /// potentially the corresponding AANoSync. bool isNoSyncInst(Attributor &A, const Instruction &I, @@ -147,18 +174,20 @@ bool isNoSyncInst(Attributor &A, const Instruction &I, /// Return true if \p V is dynamically unique, that is, there are no two /// "instances" of \p V at runtime with different values. +/// Note: If \p ForAnalysisOnly is set we only check that the Attributor will +/// never use \p V to represent two "instances" not that \p V could not +/// technically represent them. bool isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA, - const Value &V); + const Value &V, bool ForAnalysisOnly = true); /// Return true if \p V is a valid value in \p Scope, that is a constant or an /// instruction/argument of \p Scope. bool isValidInScope(const Value &V, const Function *Scope); -/// Return true if \p V is a valid value at position \p CtxI, that is a -/// constant, an argument of the same function as \p CtxI, or an instruction in -/// that function that dominates \p CtxI. -bool isValidAtPosition(const Value &V, const Instruction &CtxI, - InformationCache &InfoCache); +/// Return true if the value of \p VAC is a valid at the position of \p VAC, +/// that is a constant, an argument of the same function, or an instruction in +/// that function that dominates the position. +bool isValidAtPosition(const ValueAndContext &VAC, InformationCache &InfoCache); /// Try to convert \p V to type \p Ty without introducing new instructions. If /// this is not possible return `nullptr`. Note: this function basically knows @@ -192,11 +221,29 @@ bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, SmallVectorImpl &Objects, const AbstractAttribute &QueryingAA, const Instruction *CtxI, - bool Intraprocedural = false); + bool &UsedAssumedInformation, + AA::ValueScope VS = Interprocedural); + +/// Collect all potential values \p LI could read into \p PotentialValues. That +/// is, the only values read by \p LI are assumed to be known and all are in +/// \p PotentialValues. \p PotentialValueOrigins will contain all the +/// instructions that might have put a potential value into \p PotentialValues. +/// Dependences onto \p QueryingAA are properly tracked, \p +/// UsedAssumedInformation will inform the caller if assumed information was +/// used. +/// +/// \returns True if the assumed potential copies are all in \p PotentialValues, +/// false if something went wrong and the copies could not be +/// determined. +bool getPotentiallyLoadedValues( + Attributor &A, LoadInst &LI, SmallSetVector &PotentialValues, + SmallSetVector &PotentialValueOrigins, + const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation, + bool OnlyExact = false); /// Collect all potential values of the one stored by \p SI into /// \p PotentialCopies. That is, the only copies that were made via the -/// store are assumed to be known and all in \p PotentialCopies. Dependences +/// store are assumed to be known and all are in \p PotentialCopies. Dependences /// onto \p QueryingAA are properly tracked, \p UsedAssumedInformation will /// inform the caller if assumed information was used. /// @@ -205,7 +252,8 @@ bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, /// determined. bool getPotentialCopiesOfStoredValue( Attributor &A, StoreInst &SI, SmallSetVector &PotentialCopies, - const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation); + const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation, + bool OnlyExact = false); /// Return true if \p IRP is readonly. This will query respective AAs that /// deduce the information and introduce dependences for \p QueryingAA. @@ -237,6 +285,26 @@ bool isPotentiallyReachable( } // namespace AA +template <> +struct DenseMapInfo + : public DenseMapInfo { + using Base = DenseMapInfo; + static inline AA::ValueAndContext getEmptyKey() { + return Base::getEmptyKey(); + } + static inline AA::ValueAndContext getTombstoneKey() { + return Base::getTombstoneKey(); + } + static unsigned getHashValue(const AA::ValueAndContext &VAC) { + return Base::getHashValue(VAC); + } + + static bool isEqual(const AA::ValueAndContext &LHS, + const AA::ValueAndContext &RHS) { + return Base::isEqual(LHS, RHS); + } +}; + /// The value passed to the line option that defines the maximal initialization /// chain length. extern unsigned MaxInitializationChainLength; @@ -1033,6 +1101,10 @@ struct InformationCache { return FI.CalledViaMustTail || FI.ContainsMustTailCall; } + bool isOnlyUsedByAssume(const Instruction &I) const { + return AssumeOnlyValues.contains(&I); + } + /// Return the analysis result from a pass \p AP for function \p F. template typename AP::Result *getAnalysisResultForFunction(const Function &F) { @@ -1125,6 +1197,9 @@ private: /// A map with knowledge retained in `llvm.assume` instructions. RetainedKnowledgeMap KnowledgeMap; + /// A container for all instructions that are only used by `llvm.assume`. + SetVector AssumeOnlyValues; + /// Getters for analysis. AnalysisGetter &AG; @@ -1143,6 +1218,53 @@ private: friend struct Attributor; }; +/// Configuration for the Attributor. +struct AttributorConfig { + + AttributorConfig(CallGraphUpdater &CGUpdater) : CGUpdater(CGUpdater) {} + + /// Is the user of the Attributor a module pass or not. This determines what + /// IR we can look at and modify. If it is a module pass we might deduce facts + /// outside the initial function set and modify functions outside that set, + /// but only as part of the optimization of the functions in the initial + /// function set. For CGSCC passes we can look at the IR of the module slice + /// but never run any deduction, or perform any modification, outside the + /// initial function set (which we assume is the SCC). + bool IsModulePass = true; + + /// Flag to determine if we can delete functions or keep dead ones around. + bool DeleteFns = true; + + /// Flag to determine if we rewrite function signatures. + bool RewriteSignatures = true; + + /// Flag to determine if we want to initialize all default AAs for an internal + /// function marked live. + /// TODO: This should probably be a callback, or maybe + /// identifyDefaultAbstractAttributes should be virtual, something to allow + /// customizable lazy initialization for internal functions. + bool DefaultInitializeLiveInternals = true; + + /// Helper to update an underlying call graph and to delete functions. + CallGraphUpdater &CGUpdater; + + /// If not null, a set limiting the attribute opportunities. + DenseSet *Allowed = nullptr; + + /// Maximum number of iterations to run until fixpoint. + Optional MaxFixpointIterations = None; + + /// A callback function that returns an ORE object from a Function pointer. + ///{ + using OptimizationRemarkGetter = + function_ref; + OptimizationRemarkGetter OREGetter = nullptr; + ///} + + /// The name of the pass running the attributor, used to emit remarks. + const char *PassName = nullptr; +}; + /// The fixpoint analysis framework that orchestrates the attribute deduction. /// /// The Attributor provides a general abstract analysis framework (guided @@ -1172,52 +1294,17 @@ private: /// described in the file comment. struct Attributor { - using OptimizationRemarkGetter = - function_ref; - /// Constructor /// /// \param Functions The set of functions we are deriving attributes for. /// \param InfoCache Cache to hold various information accessible for /// the abstract attributes. - /// \param CGUpdater Helper to update an underlying call graph. - /// \param Allowed If not null, a set limiting the attribute opportunities. - /// \param DeleteFns Whether to delete functions. - /// \param RewriteSignatures Whether to rewrite function signatures. + /// \param Configuration The Attributor configuration which determines what + /// generic features to use. Attributor(SetVector &Functions, InformationCache &InfoCache, - CallGraphUpdater &CGUpdater, - DenseSet *Allowed = nullptr, bool DeleteFns = true, - bool RewriteSignatures = true) + AttributorConfig Configuration) : Allocator(InfoCache.Allocator), Functions(Functions), - InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), - DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), - MaxFixpointIterations(None), OREGetter(None), PassName("") {} - - /// Constructor - /// - /// \param Functions The set of functions we are deriving attributes for. - /// \param InfoCache Cache to hold various information accessible for - /// the abstract attributes. - /// \param CGUpdater Helper to update an underlying call graph. - /// \param Allowed If not null, a set limiting the attribute opportunities. - /// \param DeleteFns Whether to delete functions - /// \param RewriteSignatures Whether to rewrite function signatures. - /// \param MaxFixpointIterations Maximum number of iterations to run until - /// fixpoint. - /// \param OREGetter A callback function that returns an ORE object from a - /// Function pointer. - /// \param PassName The name of the pass emitting remarks. - Attributor(SetVector &Functions, InformationCache &InfoCache, - CallGraphUpdater &CGUpdater, DenseSet *Allowed, - bool DeleteFns, bool RewriteSignatures, - Optional MaxFixpointIterations, - OptimizationRemarkGetter OREGetter, const char *PassName) - : Allocator(InfoCache.Allocator), Functions(Functions), - InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), - DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), - MaxFixpointIterations(MaxFixpointIterations), - OREGetter(Optional(OREGetter)), - PassName(PassName) {} + InfoCache(InfoCache), Configuration(Configuration) {} ~Attributor(); @@ -1301,11 +1388,15 @@ struct Attributor { registerAA(AA); // For now we ignore naked and optnone functions. - bool Invalidate = Allowed && !Allowed->count(&AAType::ID); - const Function *FnScope = IRP.getAnchorScope(); - if (FnScope) - Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) || - FnScope->hasFnAttribute(Attribute::OptimizeNone); + bool Invalidate = + Configuration.Allowed && !Configuration.Allowed->count(&AAType::ID); + const Function *AnchorFn = IRP.getAnchorScope(); + if (AnchorFn) { + Invalidate |= + AnchorFn->hasFnAttribute(Attribute::Naked) || + AnchorFn->hasFnAttribute(Attribute::OptimizeNone) || + (!isModulePass() && !getInfoCache().isInModuleSlice(*AnchorFn)); + } // Avoid too many nested initializations to prevent a stack overflow. Invalidate |= InitializationChainLength > MaxInitializationChainLength; @@ -1325,15 +1416,12 @@ struct Attributor { --InitializationChainLength; } - // Initialize and update is allowed for code outside of the current function - // set, but only if it is part of module slice we are allowed to look at. - // Only exception is AAIsDeadFunction whose initialization is prevented - // directly, since we don't to compute it twice. - if (FnScope && !Functions.count(const_cast(FnScope))) { - if (!getInfoCache().isInModuleSlice(*FnScope)) { - AA.getState().indicatePessimisticFixpoint(); - return AA; - } + // We update only AAs associated with functions in the Functions set or + // call sites of them. + if ((AnchorFn && !Functions.count(const_cast(AnchorFn))) && + !Functions.count(IRP.getAssociatedFunction())) { + AA.getState().indicatePessimisticFixpoint(); + return AA; } // If this is queried in the manifest stage, we force the AA to indicate @@ -1443,10 +1531,7 @@ struct Attributor { InformationCache &getInfoCache() { return InfoCache; } /// Return true if this is a module pass, false otherwise. - bool isModulePass() const { - return !Functions.empty() && - Functions.size() == Functions.front()->getParent()->size(); - } + bool isModulePass() const { return Configuration.IsModulePass; } /// Return true if we derive attributes for \p Fn bool isRunOn(Function &Fn) const { @@ -1481,7 +1566,8 @@ struct Attributor { assert(F.hasLocalLinkage() && "Only local linkage is assumed dead initially."); - identifyDefaultAbstractAttributes(const_cast(F)); + if (Configuration.DefaultInitializeLiveInternals) + identifyDefaultAbstractAttributes(const_cast(F)); } /// Helper function to remove callsite. @@ -1489,7 +1575,7 @@ struct Attributor { if (!CI) return; - CGUpdater.removeCallSite(*CI); + Configuration.CGUpdater.removeCallSite(*CI); } /// Record that \p U is to be replaces with \p NV after information was @@ -1505,11 +1591,17 @@ struct Attributor { return true; } - /// Helper function to replace all uses of \p V with \p NV. Return true if - /// there is any change. The flag \p ChangeDroppable indicates if dropppable - /// uses should be changed too. - bool changeValueAfterManifest(Value &V, Value &NV, - bool ChangeDroppable = true) { + /// Helper function to replace all uses associated with \p IRP with \p NV. + /// Return true if there is any change. The flag \p ChangeDroppable indicates + /// if dropppable uses should be changed too. + bool changeAfterManifest(const IRPosition IRP, Value &NV, + bool ChangeDroppable = true) { + if (IRP.getPositionKind() == IRPosition::IRP_CALL_SITE_ARGUMENT) { + auto *CB = cast(IRP.getCtxI()); + return changeUseAfterManifest( + CB->getArgOperandUse(IRP.getCallSiteArgNo()), NV); + } + Value &V = IRP.getAssociatedValue(); auto &Entry = ToBeChangedValues[&V]; Value *&CurNV = Entry.first; if (CurNV && (CurNV->stripPointerCasts() == NV.stripPointerCasts() || @@ -1532,7 +1624,7 @@ struct Attributor { /// is used, e.g., to replace \p II with a call, after information was /// manifested. void registerInvokeWithDeadSuccessor(InvokeInst &II) { - InvokeWithDeadSuccessor.push_back(&II); + InvokeWithDeadSuccessor.insert(&II); } /// Record that \p I is deleted after information was manifested. This also @@ -1551,7 +1643,9 @@ struct Attributor { /// Record that \p F is deleted after information was manifested. void deleteAfterManifest(Function &F) { - if (DeleteFns) + errs() << "Delete " << F.getName() << " : " << (Configuration.DeleteFns) + << "\n"; + if (Configuration.DeleteFns) ToBeDeletedFunctions.insert(&F); } @@ -1668,6 +1762,7 @@ public: const AbstractAttribute &QueryingAA, const Value &V, bool CheckBBLivenessOnly = false, DepClassTy LivenessDepClass = DepClassTy::OPTIONAL, + bool IgnoreDroppableUses = true, function_ref EquivalentUseCB = nullptr); @@ -1685,37 +1780,41 @@ public: template void emitRemark(Instruction *I, StringRef RemarkName, RemarkCallBack &&RemarkCB) const { - if (!OREGetter) + if (!Configuration.OREGetter) return; Function *F = I->getFunction(); - auto &ORE = OREGetter.getValue()(F); + auto &ORE = Configuration.OREGetter(F); if (RemarkName.startswith("OMP")) ORE.emit([&]() { - return RemarkCB(RemarkKind(PassName, RemarkName, I)) + return RemarkCB(RemarkKind(Configuration.PassName, RemarkName, I)) << " [" << RemarkName << "]"; }); else - ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, I)); }); + ORE.emit([&]() { + return RemarkCB(RemarkKind(Configuration.PassName, RemarkName, I)); + }); } /// Emit a remark on a function. template void emitRemark(Function *F, StringRef RemarkName, RemarkCallBack &&RemarkCB) const { - if (!OREGetter) + if (!Configuration.OREGetter) return; - auto &ORE = OREGetter.getValue()(F); + auto &ORE = Configuration.OREGetter(F); if (RemarkName.startswith("OMP")) ORE.emit([&]() { - return RemarkCB(RemarkKind(PassName, RemarkName, F)) + return RemarkCB(RemarkKind(Configuration.PassName, RemarkName, F)) << " [" << RemarkName << "]"; }); else - ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, F)); }); + ORE.emit([&]() { + return RemarkCB(RemarkKind(Configuration.PassName, RemarkName, F)); + }); } /// Helper struct used in the communication between an abstract attribute (AA) @@ -1824,23 +1923,24 @@ public: /// This method will evaluate \p Pred on call sites and return /// true if \p Pred holds in every call sites. However, this is only possible /// all call sites are known, hence the function has internal linkage. - /// If true is returned, \p AllCallSitesKnown is set if all possible call - /// sites of the function have been visited. + /// If true is returned, \p UsedAssumedInformation is set if assumed + /// information was used to skip or simplify potential call sites. bool checkForAllCallSites(function_ref Pred, const AbstractAttribute &QueryingAA, - bool RequireAllCallSites, bool &AllCallSitesKnown); + bool RequireAllCallSites, + bool &UsedAssumedInformation); /// Check \p Pred on all call sites of \p Fn. /// /// This method will evaluate \p Pred on call sites and return /// true if \p Pred holds in every call sites. However, this is only possible /// all call sites are known, hence the function has internal linkage. - /// If true is returned, \p AllCallSitesKnown is set if all possible call - /// sites of the function have been visited. + /// If true is returned, \p UsedAssumedInformation is set if assumed + /// information was used to skip or simplify potential call sites. bool checkForAllCallSites(function_ref Pred, const Function &Fn, bool RequireAllCallSites, const AbstractAttribute *QueryingAA, - bool &AllCallSitesKnown); + bool &UsedAssumedInformation); /// Check \p Pred on all values potentially returned by \p F. /// @@ -1859,6 +1959,19 @@ public: bool checkForAllReturnedValues(function_ref Pred, const AbstractAttribute &QueryingAA); + /// Check \p Pred on all instructions in \p Fn with an opcode present in + /// \p Opcodes. + /// + /// This method will evaluate \p Pred on all instructions with an opcode + /// present in \p Opcode and return true if \p Pred holds on all of them. + bool checkForAllInstructions(function_ref Pred, + const Function *Fn, + const AbstractAttribute &QueryingAA, + const ArrayRef &Opcodes, + bool &UsedAssumedInformation, + bool CheckBBLivenessOnly = false, + bool CheckPotentiallyDead = false); + /// Check \p Pred on all instructions with an opcode present in \p Opcodes. /// /// This method will evaluate \p Pred on all instructions with an opcode @@ -1987,7 +2100,7 @@ private: /// (\see registerFunctionSignatureRewrite) and return Changed if the module /// was altered. ChangeStatus - rewriteFunctionSignatures(SmallPtrSetImpl &ModifiedFns); + rewriteFunctionSignatures(SmallSetVector &ModifiedFns); /// Check if the Attribute \p AA should be seeded. /// See getOrCreateAAFor. @@ -2011,15 +2124,12 @@ private: /// The information cache that holds pre-processed (LLVM-IR) information. InformationCache &InfoCache; - /// Helper to update an underlying call graph. - CallGraphUpdater &CGUpdater; - /// Abstract Attribute dependency graph AADepGraph DG; /// Set of functions for which we modified the content such that it might /// impact the call graph. - SmallPtrSet CGModifiedFunctions; + SmallSetVector CGModifiedFunctions; /// Information about a dependence. If FromAA is changed ToAA needs to be /// updated as well. @@ -2039,34 +2149,22 @@ private: using DependenceVector = SmallVector; SmallVector DependenceStack; - /// If not null, a set limiting the attribute opportunities. - const DenseSet *Allowed; - - /// Whether to delete functions. - const bool DeleteFns; - - /// Whether to rewrite signatures. - const bool RewriteSignatures; - - /// Maximum number of fixedpoint iterations. - Optional MaxFixpointIterations; - /// A set to remember the functions we already assume to be live and visited. DenseSet VisitedFunctions; /// Uses we replace with a new value after manifest is done. We will remove /// then trivially dead instructions as well. - DenseMap ToBeChangedUses; + SmallMapVector ToBeChangedUses; /// Values we replace with a new value after manifest is done. We will remove /// then trivially dead instructions as well. - DenseMap> ToBeChangedValues; + SmallMapVector, 32> ToBeChangedValues; /// Instructions we replace with `unreachable` insts after manifest is done. - SmallDenseSet ToBeChangedToUnreachableInsts; + SmallSetVector ToBeChangedToUnreachableInsts; /// Invoke instructions with at least a single dead successor block. - SmallVector InvokeWithDeadSuccessor; + SmallSetVector InvokeWithDeadSuccessor; /// A flag that indicates which stage of the process we are in. Initially, the /// phase is SEEDING. Phase is changed in `Attributor::run()` @@ -2083,21 +2181,18 @@ private: /// Functions, blocks, and instructions we delete after manifest is done. /// ///{ - SmallPtrSet ToBeDeletedFunctions; - SmallPtrSet ToBeDeletedBlocks; SmallPtrSet ManifestAddedBlocks; - SmallDenseSet ToBeDeletedInsts; + SmallSetVector ToBeDeletedFunctions; + SmallSetVector ToBeDeletedBlocks; + SmallSetVector ToBeDeletedInsts; ///} - /// Callback to get an OptimizationRemarkEmitter from a Function *. - Optional OREGetter; - /// Container with all the query AAs that requested an update via /// registerForUpdate. SmallSetVector QueryAAsAwaitingUpdate; - /// The name of the pass to emit remarks for. - const char *PassName = ""; + /// User provided configuration for this Attributor instance. + const AttributorConfig Configuration; friend AADepGraph; friend AttributorCallGraph; @@ -2515,16 +2610,6 @@ struct IntegerRangeState : public AbstractState { unionAssumed(R.getAssumed()); } - /// Unite known range with the passed state. - void unionKnown(const ConstantRange &R) { - // Don't loose a known range. - Known = Known.unionWith(R); - Assumed = Assumed.unionWith(Known); - } - - /// See IntegerRangeState::unionKnown(..). - void unionKnown(const IntegerRangeState &R) { unionKnown(R.getKnown()); } - /// Intersect known range with the passed state. void intersectKnown(const ConstantRange &R) { Assumed = Assumed.intersectWith(R); @@ -2554,8 +2639,8 @@ struct IntegerRangeState : public AbstractState { IntegerRangeState operator&=(const IntegerRangeState &R) { // NOTE: `&=` operator seems like `intersect` but in this case, we need to // take `union`. - unionKnown(R); - unionAssumed(R); + Known = Known.unionWith(R.getKnown()); + Assumed = Assumed.unionWith(R.getAssumed()); return *this; } }; @@ -3363,6 +3448,12 @@ protected: /// Returns true if \p I is known dead. virtual bool isKnownDead(const Instruction *I) const = 0; + /// Return true if the underlying value is a store that is known to be + /// removable. This is different from dead stores as the removable store + /// can have an effect on live values, especially loads, but that effect + /// is propagated which allows us to remove the store in turn. + virtual bool isRemovableStore() const { return false; } + /// This method is used to check if at least one instruction in a collection /// of instructions is live. template bool isLiveInstSet(T begin, T end) const { @@ -3618,10 +3709,10 @@ struct AAAlign : public IRAttribute< AAAlign(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} /// Return assumed alignment. - uint64_t getAssumedAlign() const { return getAssumed(); } + Align getAssumedAlign() const { return Align(getAssumed()); } /// Return known alignment. - uint64_t getKnownAlign() const { return getKnown(); } + Align getKnownAlign() const { return Align(getKnown()); } /// See AbstractAttribute::getName() const std::string getName() const override { return "AAAlign"; } @@ -3641,6 +3732,46 @@ struct AAAlign : public IRAttribute< static const char ID; }; +/// An abstract interface to track if a value leaves it's defining function +/// instance. +/// TODO: We should make it a ternary AA tracking uniqueness, and uniqueness +/// wrt. the Attributor analysis separately. +struct AAInstanceInfo : public StateWrapper { + AAInstanceInfo(const IRPosition &IRP, Attributor &A) + : StateWrapper(IRP) {} + + /// Return true if we know that the underlying value is unique in its scope + /// wrt. the Attributor analysis. That means it might not be unique but we can + /// still use pointer equality without risking to represent two instances with + /// one `llvm::Value`. + bool isKnownUniqueForAnalysis() const { return isKnown(); } + + /// Return true if we assume that the underlying value is unique in its scope + /// wrt. the Attributor analysis. That means it might not be unique but we can + /// still use pointer equality without risking to represent two instances with + /// one `llvm::Value`. + bool isAssumedUniqueForAnalysis() const { return isAssumed(); } + + /// Create an abstract attribute view for the position \p IRP. + static AAInstanceInfo &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// See AbstractAttribute::getName() + const std::string getName() const override { return "AAInstanceInfo"; } + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAInstanceInfo + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address) + static const char ID; +}; + /// An abstract interface for all nocapture attributes. struct AANoCapture : public IRAttribute< @@ -4150,13 +4281,14 @@ struct AAValueConstantRange /// Return an assumed constant for the associated value a program point \p /// CtxI. - Optional - getAssumedConstantInt(Attributor &A, - const Instruction *CtxI = nullptr) const { + Optional + getAssumedConstant(Attributor &A, const Instruction *CtxI = nullptr) const { ConstantRange RangeV = getAssumedConstantRange(A, CtxI); - if (auto *C = RangeV.getSingleElement()) - return cast( - ConstantInt::get(getAssociatedValue().getType(), *C)); + if (auto *C = RangeV.getSingleElement()) { + Type *Ty = getAssociatedValue().getType(); + return cast_or_null( + AA::getWithType(*ConstantInt::get(Ty->getContext(), *C), *Ty)); + } if (RangeV.isEmptySet()) return llvm::None; return nullptr; @@ -4185,10 +4317,9 @@ struct AAValueConstantRange /// contains every possible value (i.e. we cannot in any way limit the value /// that the target position can take). That never happens naturally, we only /// force it. As for the conditions under which we force it, see -/// AAPotentialValues. -template > -struct PotentialValuesState : AbstractState { - using SetTy = DenseSet; +/// AAPotentialConstantValues. +template struct PotentialValuesState : AbstractState { + using SetTy = SmallSetVector; PotentialValuesState() : IsValidState(true), UndefIsContained(false) {} @@ -4247,7 +4378,7 @@ struct PotentialValuesState : AbstractState { return PotentialValuesState(true); } - static PotentialValuesState getBestState(PotentialValuesState &PVS) { + static PotentialValuesState getBestState(const PotentialValuesState &PVS) { return getBestState(); } @@ -4278,6 +4409,12 @@ struct PotentialValuesState : AbstractState { return *this; } +protected: + SetTy &getAssumedSet() { + assert(isValidState() && "This set shoud not be used when it is invalid!"); + return Set; + } + private: /// Check the size of this set, and invalidate when the size is no /// less than \p MaxPotentialValues threshold. @@ -4372,10 +4509,10 @@ raw_ostream &operator<<(raw_ostream &OS, /// operator we do not currently handle). /// /// TODO: Support values other than constant integers. -struct AAPotentialValues +struct AAPotentialConstantValues : public StateWrapper { using Base = StateWrapper; - AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + AAPotentialConstantValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {} /// See AbstractAttribute::getState(...). PotentialConstantIntValuesState &getState() override { return *this; } @@ -4384,22 +4521,23 @@ struct AAPotentialValues } /// Create an abstract attribute view for the position \p IRP. - static AAPotentialValues &createForPosition(const IRPosition &IRP, - Attributor &A); + static AAPotentialConstantValues &createForPosition(const IRPosition &IRP, + Attributor &A); /// Return assumed constant for the associated value - Optional - getAssumedConstantInt(Attributor &A, - const Instruction *CtxI = nullptr) const { + Optional + getAssumedConstant(Attributor &A, const Instruction *CtxI = nullptr) const { if (!isValidState()) return nullptr; - if (getAssumedSet().size() == 1) - return cast(ConstantInt::get(getAssociatedValue().getType(), - *(getAssumedSet().begin()))); + if (getAssumedSet().size() == 1) { + Type *Ty = getAssociatedValue().getType(); + return cast_or_null(AA::getWithType( + *ConstantInt::get(Ty->getContext(), *(getAssumedSet().begin())), + *Ty)); + } if (getAssumedSet().size() == 0) { if (undefIsContained()) - return cast( - ConstantInt::get(getAssociatedValue().getType(), 0)); + return UndefValue::get(getAssociatedValue().getType()); return llvm::None; } @@ -4407,13 +4545,15 @@ struct AAPotentialValues } /// See AbstractAttribute::getName() - const std::string getName() const override { return "AAPotentialValues"; } + const std::string getName() const override { + return "AAPotentialConstantValues"; + } /// See AbstractAttribute::getIdAddr() const char *getIdAddr() const override { return &ID; } /// This function should return true if the type of the \p AA is - /// AAPotentialValues + /// AAPotentialConstantValues static bool classof(const AbstractAttribute *AA) { return (AA->getIdAddr() == &ID); } @@ -4744,12 +4884,10 @@ struct AAPointerInfo : public AbstractAttribute { Instruction *getRemoteInst() const { return RemoteI; } /// Return true if the value written is not known yet. - bool isWrittenValueYetUndetermined() const { return !Content.hasValue(); } + bool isWrittenValueYetUndetermined() const { return !Content; } /// Return true if the value written cannot be determined at all. - bool isWrittenValueUnknown() const { - return Content.hasValue() && !*Content; - } + bool isWrittenValueUnknown() const { return Content && !*Content; } /// Return the type associated with the access, if known. Type *getType() const { return Ty; } @@ -4792,21 +4930,55 @@ struct AAPointerInfo : public AbstractAttribute { /// See AbstractAttribute::getIdAddr() const char *getIdAddr() const override { return &ID; } - /// Call \p CB on all accesses that might interfere with \p LI and return true - /// if all such accesses were known and the callback returned true for all of - /// them, false otherwise. - virtual bool forallInterferingAccesses( - LoadInst &LI, function_ref CB) const = 0; + /// Helper to represent an access offset and size, with logic to deal with + /// uncertainty and check for overlapping accesses. + struct OffsetAndSize : public std::pair { + using BaseTy = std::pair; + OffsetAndSize(int64_t Offset, int64_t Size) : BaseTy(Offset, Size) {} + OffsetAndSize(const BaseTy &P) : BaseTy(P) {} + int64_t getOffset() const { return first; } + int64_t getSize() const { return second; } + static OffsetAndSize getUnknown() { + return OffsetAndSize(Unknown, Unknown); + } + + /// Return true if offset or size are unknown. + bool offsetOrSizeAreUnknown() const { + return getOffset() == OffsetAndSize::Unknown || + getSize() == OffsetAndSize::Unknown; + } + + /// Return true if this offset and size pair might describe an address that + /// overlaps with \p OAS. + bool mayOverlap(const OffsetAndSize &OAS) const { + // Any unknown value and we are giving up -> overlap. + if (offsetOrSizeAreUnknown() || OAS.offsetOrSizeAreUnknown()) + return true; + + // Check if one offset point is in the other interval [offset, + // offset+size]. + return OAS.getOffset() + OAS.getSize() > getOffset() && + OAS.getOffset() < getOffset() + getSize(); + } + + /// Constant used to represent unknown offset or sizes. + static constexpr int64_t Unknown = 1 << 31; + }; + + /// Call \p CB on all accesses that might interfere with \p OAS and return + /// true if all such accesses were known and the callback returned true for + /// all of them, false otherwise. An access interferes with an offset-size + /// pair if it might read or write that memory region. virtual bool forallInterferingAccesses( - StoreInst &SI, function_ref CB) const = 0; + OffsetAndSize OAS, function_ref CB) const = 0; - /// Call \p CB on all write accesses that might interfere with \p LI and + /// Call \p CB on all accesses that might interfere with \p I and /// return true if all such accesses were known and the callback returned true /// for all of them, false otherwise. In contrast to forallInterferingAccesses /// this function will perform reasoning to exclude write accesses that cannot /// affect the load even if they on the surface look as if they would. - virtual bool forallInterferingWrites( - Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI, + virtual bool forallInterferingAccesses( + Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, function_ref CB) const = 0; /// This function should return true if the type of the \p AA is AAPointerInfo diff --git a/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h b/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h index 496ceea12bc9..a71fa3bf404d 100644 --- a/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h +++ b/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h @@ -66,25 +66,24 @@ public: } }; - /// Liveness enum - During our initial pass over the program, we determine - /// that things are either alive or maybe alive. We don't mark anything - /// explicitly dead (even if we know they are), since anything not alive - /// with no registered uses (in Uses) will never be marked alive and will - /// thus become dead in the end. + /// During our initial pass over the program, we determine that things are + /// either alive or maybe alive. We don't mark anything explicitly dead (even + /// if we know they are), since anything not alive with no registered uses + /// (in Uses) will never be marked alive and will thus become dead in the end. enum Liveness { Live, MaybeLive }; - DeadArgumentEliminationPass(bool ShouldHackArguments_ = false) - : ShouldHackArguments(ShouldHackArguments_) {} + DeadArgumentEliminationPass(bool ShouldHackArguments = false) + : ShouldHackArguments(ShouldHackArguments) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &); /// Convenience wrapper - RetOrArg CreateRet(const Function *F, unsigned Idx) { + RetOrArg createRet(const Function *F, unsigned Idx) { return RetOrArg(F, Idx, false); } /// Convenience wrapper - RetOrArg CreateArg(const Function *F, unsigned Idx) { + RetOrArg createArg(const Function *F, unsigned Idx) { return RetOrArg(F, Idx, true); } @@ -122,21 +121,21 @@ public: bool ShouldHackArguments = false; private: - Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses); - Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses, + Liveness markIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses); + Liveness surveyUse(const Use *U, UseVector &MaybeLiveUses, unsigned RetValNum = -1U); - Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses); + Liveness surveyUses(const Value *V, UseVector &MaybeLiveUses); - void SurveyFunction(const Function &F); - bool IsLive(const RetOrArg &RA); - void MarkValue(const RetOrArg &RA, Liveness L, + void surveyFunction(const Function &F); + bool isLive(const RetOrArg &RA); + void markValue(const RetOrArg &RA, Liveness L, const UseVector &MaybeLiveUses); - void MarkLive(const RetOrArg &RA); - void MarkLive(const Function &F); - void PropagateLiveness(const RetOrArg &RA); - bool RemoveDeadStuffFromFunction(Function *F); - bool DeleteDeadVarargs(Function &Fn); - bool RemoveDeadArgumentsFromCallers(Function &Fn); + void markLive(const RetOrArg &RA); + void markLive(const Function &F); + void propagateLiveness(const RetOrArg &RA); + bool removeDeadStuffFromFunction(Function *F); + bool deleteDeadVarargs(Function &F); + bool removeDeadArgumentsFromCallers(Function &F); }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h index a2b93f8aa30d..07c7cac77354 100644 --- a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h +++ b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h @@ -14,9 +14,10 @@ #define LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" namespace llvm { +class Module; +class Pass; /// Pass which forces specific function attributes into the IR, primarily as /// a debugging tool. diff --git a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h index 0b6734a3929d..bcb75025f8e5 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h @@ -15,29 +15,22 @@ #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H #define LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/PassManager.h" namespace llvm { -class AAResults; +class GlobalValueSummary; +class ModuleSummaryIndex; class Function; class Module; class Pass; -/// The three kinds of memory access relevant to 'readonly' and -/// 'readnone' attributes. -enum MemoryAccessKind { - MAK_ReadNone = 0, - MAK_ReadOnly = 1, - MAK_MayWrite = 2, - MAK_WriteOnly = 3 -}; - /// Returns the memory access properties of this copy of the function. -MemoryAccessKind computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR); +FunctionModRefBehavior computeFunctionBodyMemoryAccess(Function &F, + AAResults &AAR); /// Propagate function attributes for function summaries along the index's /// callgraph during thinlink diff --git a/llvm/include/llvm/Transforms/IPO/GlobalDCE.h b/llvm/include/llvm/Transforms/IPO/GlobalDCE.h index 0a6851849e7e..a24196efb83b 100644 --- a/llvm/include/llvm/Transforms/IPO/GlobalDCE.h +++ b/llvm/include/llvm/Transforms/IPO/GlobalDCE.h @@ -19,11 +19,18 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/PassManager.h" #include namespace llvm { +class Comdat; +class Constant; +class Function; +class GlobalVariable; +class Metadata; +class Module; +class Value; /// Pass to remove unused function declarations. class GlobalDCEPass : public PassInfoMixin { diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h index e4807a1c9c65..315587e0f922 100644 --- a/llvm/include/llvm/Transforms/IPO/IROutliner.h +++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -43,14 +43,13 @@ #include "llvm/Analysis/IRSimilarityIdentifier.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/ValueMap.h" #include "llvm/Support/InstructionCost.h" #include "llvm/Transforms/Utils/CodeExtractor.h" -#include struct OutlinableGroup; namespace llvm { +using namespace CallingConv; using namespace IRSimilarity; class Module; @@ -86,6 +85,13 @@ struct OutlinableRegion { DenseMap ExtractedArgToAgg; DenseMap AggArgToExtracted; + /// Values in the outlined functions will often be replaced by arguments. When + /// finding corresponding values from one region to another, the found value + /// will be the value the argument previously replaced. This structure maps + /// any replaced values for the region to the aggregate aggregate argument + /// in the overall function. + DenseMap RemappedArguments; + /// Marks whether we need to change the order of the arguments when mapping /// the old extracted function call to the new aggregate outlined function /// call. @@ -168,6 +174,15 @@ struct OutlinableRegion { /// \return The corresponding Value to \p V if it exists, otherwise nullptr. Value *findCorrespondingValueIn(const OutlinableRegion &Other, Value *V); + /// Find a corresponding BasicBlock for \p BB in similar OutlinableRegion \p Other. + /// + /// \param Other [in] - The OutlinableRegion to find the corresponding + /// BasicBlock in. + /// \param BB [in] - The BasicBlock to look for in the other region. + /// \return The corresponding Value to \p V if it exists, otherwise nullptr. + BasicBlock *findCorrespondingBlockIn(const OutlinableRegion &Other, + BasicBlock *BB); + /// Get the size of the code removed from the region. /// /// \param [in] TTI - The TargetTransformInfo for the parent function. @@ -372,6 +387,25 @@ private: // the call in outlined functions. if (CI.canReturnTwice()) return false; + // TODO: Update the outliner to capture whether the outlined function + // needs these extra attributes. + + // Functions marked with the swifttailcc and tailcc calling conventions + // require special handling when outlining musttail functions. The + // calling convention must be passed down to the outlined function as + // well. Further, there is special handling for musttail calls as well, + // requiring a return call directly after. For now, the outliner does not + // support this. + bool IsTailCC = CI.getCallingConv() == CallingConv::SwiftTail || + CI.getCallingConv() == CallingConv::Tail; + if (IsTailCC && !EnableMustTailCalls) + return false; + if (CI.isMustTailCall() && !EnableMustTailCalls) + return false; + // The outliner can only handle musttail items if it is also accompanied + // by the tailcc or swifttailcc calling convention. + if (CI.isMustTailCall() && !IsTailCC) + return false; return true; } // TODO: Handle FreezeInsts. Since a frozen value could be frozen inside @@ -397,6 +431,9 @@ private: // The flag variable that marks whether we should allow intrinsics // instructions to be outlined. bool EnableIntrinsics = false; + + // The flag variable that marks whether we should allow musttail calls. + bool EnableMustTailCalls = false; }; /// A InstVisitor used to exclude certain instructions from being outlined. diff --git a/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h index 302695d96355..880af2b46d7f 100644 --- a/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h +++ b/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h @@ -15,11 +15,11 @@ #ifndef LLVM_TRANSFORMS_IPO_INFERFUNCTIONATTRS_H #define LLVM_TRANSFORMS_IPO_INFERFUNCTIONATTRS_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" namespace llvm { +class Module; +class Pass; /// A pass which infers function attributes from the names and signatures of /// function declarations in a module. diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h index a7060943c4c0..1e154eb8f5da 100644 --- a/llvm/include/llvm/Transforms/IPO/Inliner.h +++ b/llvm/include/llvm/Transforms/IPO/Inliner.h @@ -16,7 +16,6 @@ #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/IR/PassManager.h" -#include namespace llvm { @@ -96,7 +95,9 @@ protected: /// passes be composed to achieve the same end result. class InlinerPass : public PassInfoMixin { public: - InlinerPass(bool OnlyMandatory = false) : OnlyMandatory(OnlyMandatory) {} + InlinerPass(bool OnlyMandatory = false, + ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None) + : OnlyMandatory(OnlyMandatory), LTOPhase(LTOPhase) {} InlinerPass(InlinerPass &&Arg) = default; PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, @@ -110,6 +111,7 @@ private: FunctionAnalysisManager &FAM, Module &M); std::unique_ptr OwnedAdvisor; const bool OnlyMandatory; + const ThinOrFullLTOPhase LTOPhase; }; /// Module pass, wrapping the inliner pass. This works in conjunction with the @@ -122,6 +124,7 @@ class ModuleInlinerWrapperPass public: ModuleInlinerWrapperPass( InlineParams Params = getInlineParams(), bool MandatoryFirst = true, + InlineContext IC = {}, InliningAdvisorMode Mode = InliningAdvisorMode::Default, unsigned MaxDevirtIterations = 0); ModuleInlinerWrapperPass(ModuleInlinerWrapperPass &&Arg) = default; @@ -147,6 +150,7 @@ public: private: const InlineParams Params; + const InlineContext IC; const InliningAdvisorMode Mode; const unsigned MaxDevirtIterations; // TODO: Clean this up so we only have one ModulePassManager. diff --git a/llvm/include/llvm/Transforms/IPO/Internalize.h b/llvm/include/llvm/Transforms/IPO/Internalize.h index 41816df93360..adcf5a932be0 100644 --- a/llvm/include/llvm/Transforms/IPO/Internalize.h +++ b/llvm/include/llvm/Transforms/IPO/Internalize.h @@ -23,7 +23,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringSet.h" -#include "llvm/IR/GlobalValue.h" #include "llvm/IR/PassManager.h" #include diff --git a/llvm/include/llvm/Transforms/IPO/ModuleInliner.h b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h index 7474e48aafaf..24cfff6083ff 100644 --- a/llvm/include/llvm/Transforms/IPO/ModuleInliner.h +++ b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h @@ -11,10 +11,7 @@ #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/ReplayInlineAdvisor.h" -#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/IR/PassManager.h" -#include namespace llvm { @@ -30,8 +27,9 @@ namespace llvm { class ModuleInlinerPass : public PassInfoMixin { public: ModuleInlinerPass(InlineParams Params = getInlineParams(), - InliningAdvisorMode Mode = InliningAdvisorMode::Default) - : Params(Params), Mode(Mode){}; + InliningAdvisorMode Mode = InliningAdvisorMode::Default, + ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None) + : Params(Params), Mode(Mode), LTOPhase(LTOPhase){}; ModuleInlinerPass(ModuleInlinerPass &&Arg) = default; PreservedAnalyses run(Module &, ModuleAnalysisManager &); @@ -42,6 +40,7 @@ private: std::unique_ptr OwnedAdvisor; const InlineParams Params; const InliningAdvisorMode Mode; + const ThinOrFullLTOPhase LTOPhase; }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h index 3b944878a810..2676f2705424 100644 --- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -16,7 +16,6 @@ #include "llvm-c/Transforms/PassManagerBuilder.h" #include -#include #include #include @@ -214,7 +213,6 @@ private: void addInitialAliasAnalysisPasses(legacy::PassManagerBase &PM) const; void addLTOOptimizationPasses(legacy::PassManagerBase &PM); void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM); - void addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS); void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM); void addVectorPasses(legacy::PassManagerBase &PM, bool IsFullLTO); @@ -226,8 +224,6 @@ public: /// populateModulePassManager - This sets up the primary pass manager. void populateModulePassManager(legacy::PassManagerBase &MPM); - void populateLTOPassManager(legacy::PassManagerBase &PM); - void populateThinLTOPassManager(legacy::PassManagerBase &PM); }; /// Registers a function for adding a standard set of passes. This should be diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h index 893654650caa..fff06da22cf3 100644 --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -18,9 +18,6 @@ #include #include -using namespace llvm; -using namespace sampleprof; - namespace llvm { namespace sampleprof { @@ -51,10 +48,10 @@ struct ProfiledCallGraphNode { } }; - using iterator = std::set::iterator; - using const_iterator = std::set::const_iterator; using edge = ProfiledCallGraphEdge; - using edges = std::set; + using edges = std::set; + using iterator = edges::iterator; + using const_iterator = edges::const_iterator; ProfiledCallGraphNode(StringRef FName = StringRef()) : Name(FName) {} @@ -64,11 +61,11 @@ struct ProfiledCallGraphNode { class ProfiledCallGraph { public: - using iterator = std::set::iterator; + using iterator = ProfiledCallGraphNode::iterator; // Constructor for non-CS profile. ProfiledCallGraph(SampleProfileMap &ProfileMap) { - assert(!FunctionSamples::ProfileIsCSFlat && + assert(!FunctionSamples::ProfileIsCS && "CS flat profile is not handled here"); for (const auto &Samples : ProfileMap) { addProfiledCalls(Samples.second); diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h index cf87d028600f..a97d5ee3d710 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -15,20 +15,18 @@ #ifndef LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H #define LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Instructions.h" #include "llvm/ProfileData/SampleProf.h" -#include #include +#include #include -using namespace llvm; -using namespace sampleprof; - namespace llvm { +class CallBase; +class DILocation; +class Function; +class Instruction; // Internal trie tree representation used for tracking context tree and sample // profiles. The path from root node to a given node represents the context of @@ -47,11 +45,6 @@ public: ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite, StringRef ChildName, bool AllowCreate = true); - - ContextTrieNode &moveToChildContext(const LineLocation &CallSite, - ContextTrieNode &&NodeToMove, - uint32_t ContextFramesToRemove, - bool DeleteNode = true); void removeChildContext(const LineLocation &CallSite, StringRef ChildName); std::map &getAllChildContext(); StringRef getFuncName() const; @@ -62,6 +55,7 @@ public: LineLocation getCallSiteLoc() const; ContextTrieNode *getParentContext() const; void setParentContext(ContextTrieNode *Parent); + void setCallSiteLoc(const LineLocation &Loc); void dumpNode(); void dumpTree(); @@ -94,22 +88,13 @@ private: // calling context and the context is identified by path from root to the node. class SampleContextTracker { public: - struct ProfileComparer { - bool operator()(FunctionSamples *A, FunctionSamples *B) const { - // Sort function profiles by the number of total samples and their - // contexts. - if (A->getTotalSamples() == B->getTotalSamples()) - return A->getContext() < B->getContext(); - return A->getTotalSamples() > B->getTotalSamples(); - } - }; - - // Keep profiles of a function sorted so that they will be processed/promoted - // deterministically. - using ContextSamplesTy = std::set; + using ContextSamplesTy = std::vector; + SampleContextTracker() = default; SampleContextTracker(SampleProfileMap &Profiles, const DenseMap *GUIDToFuncNameMap); + // Populate the FuncToCtxtProfiles map after the trie is built. + void populateFuncToCtxtMap(); // Query context profile for a specific callee with given name at a given // call-site. The full context is identified by location of call instruction. FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst, @@ -125,6 +110,8 @@ public: // Get all context profile for given function. ContextSamplesTy &getAllContextSamplesFor(const Function &Func); ContextSamplesTy &getAllContextSamplesFor(StringRef Name); + ContextTrieNode *getOrCreateContextPath(const SampleContext &Context, + bool AllowCreate); // Query base profile for a given function. A base profile is a merged view // of all context profiles for contexts that are not inlined. FunctionSamples *getBaseSamplesFor(const Function &Func, @@ -142,6 +129,64 @@ public: ContextTrieNode &getRootContext(); void promoteMergeContextSamplesTree(const Instruction &Inst, StringRef CalleeName); + + // Create a merged conext-less profile map. + void createContextLessProfileMap(SampleProfileMap &ContextLessProfiles); + ContextTrieNode * + getContextNodeForProfile(const FunctionSamples *FSamples) const { + auto I = ProfileToNodeMap.find(FSamples); + if (I == ProfileToNodeMap.end()) + return nullptr; + return I->second; + } + StringMap &getFuncToCtxtProfiles() { + return FuncToCtxtProfiles; + } + + class Iterator : public std::iterator { + std::queue NodeQueue; + + public: + explicit Iterator() = default; + explicit Iterator(ContextTrieNode *Node) { NodeQueue.push(Node); } + Iterator &operator++() { + assert(!NodeQueue.empty() && "Iterator already at the end"); + ContextTrieNode *Node = NodeQueue.front(); + NodeQueue.pop(); + for (auto &It : Node->getAllChildContext()) + NodeQueue.push(&It.second); + return *this; + } + + Iterator operator++(int) { + assert(!NodeQueue.empty() && "Iterator already at the end"); + Iterator Ret = *this; + ++(*this); + return Ret; + } + bool operator==(const Iterator &Other) const { + if (NodeQueue.empty() && Other.NodeQueue.empty()) + return true; + if (NodeQueue.empty() || Other.NodeQueue.empty()) + return false; + return NodeQueue.front() == Other.NodeQueue.front(); + } + bool operator!=(const Iterator &Other) const { return !(*this == Other); } + ContextTrieNode *operator*() const { + assert(!NodeQueue.empty() && "Invalid access to end iterator"); + return NodeQueue.front(); + } + }; + + Iterator begin() { return Iterator(&RootContext); } + Iterator end() { return Iterator(); } + +#ifndef NDEBUG + // Get a context string from root to current node. + std::string getContextString(const FunctionSamples &FSamples) const; + std::string getContextString(ContextTrieNode *Node) const; +#endif // Dump the internal context profile trie. void dump(); @@ -149,21 +194,26 @@ private: ContextTrieNode *getContextFor(const DILocation *DIL); ContextTrieNode *getCalleeContextFor(const DILocation *DIL, StringRef CalleeName); - ContextTrieNode *getOrCreateContextPath(const SampleContext &Context, - bool AllowCreate); ContextTrieNode *getTopLevelContextNode(StringRef FName); ContextTrieNode &addTopLevelContextNode(StringRef FName); ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo); - void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode, - uint32_t ContextFramesToRemove); + void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode); ContextTrieNode & promoteMergeContextSamplesTree(ContextTrieNode &FromNode, - ContextTrieNode &ToNodeParent, - uint32_t ContextFramesToRemove); - + ContextTrieNode &ToNodeParent); + ContextTrieNode &moveContextSamples(ContextTrieNode &ToNodeParent, + const LineLocation &CallSite, + ContextTrieNode &&NodeToMove); + void setContextNode(const FunctionSamples *FSample, ContextTrieNode *Node) { + ProfileToNodeMap[FSample] = Node; + } // Map from function name to context profiles (excluding base profile) StringMap FuncToCtxtProfiles; + // Map from current FunctionSample to the belonged context trie. + std::unordered_map + ProfileToNodeMap; + // Map from function guid to real function names. Only used in md5 mode. const DenseMap *GUIDToFuncNameMap; diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfile.h b/llvm/include/llvm/Transforms/IPO/SampleProfile.h index 704b793ab3ea..d838c8b8a83e 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfile.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfile.h @@ -36,7 +36,7 @@ public: private: std::string ProfileFileName; std::string ProfileRemappingFileName; - ThinOrFullLTOPhase LTOPhase; + const ThinOrFullLTOPhase LTOPhase; }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h index e73c36043cb2..ed296d2dd080 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -16,17 +16,19 @@ #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H #include "llvm/ADT/DenseMap.h" -#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/PseudoProbe.h" #include "llvm/ProfileData/SampleProf.h" -#include "llvm/Target/TargetMachine.h" #include namespace llvm { +class Any; +class BasicBlock; +class Function; +class Instruction; +class Loop; +class PassInstrumentationCallbacks; +class TargetMachine; class Module; diff --git a/llvm/include/llvm/Transforms/IPO/StripDeadPrototypes.h b/llvm/include/llvm/Transforms/IPO/StripDeadPrototypes.h index f4a15c36afc9..4a2eaad63113 100644 --- a/llvm/include/llvm/Transforms/IPO/StripDeadPrototypes.h +++ b/llvm/include/llvm/Transforms/IPO/StripDeadPrototypes.h @@ -16,11 +16,12 @@ #ifndef LLVM_TRANSFORMS_IPO_STRIPDEADPROTOTYPES_H #define LLVM_TRANSFORMS_IPO_STRIPDEADPROTOTYPES_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Module; + /// Pass to remove unused function declarations. struct StripDeadPrototypesPass : PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &); diff --git a/llvm/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h b/llvm/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h index 7acb922b37e1..469cf2bc5011 100644 --- a/llvm/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h +++ b/llvm/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h @@ -17,9 +17,10 @@ #define LLVM_TRANSFORMS_IPO_THINLTOBITCODEWRITER_H #include -#include namespace llvm { +class Module; +class raw_ostream; class ThinLTOBitcodeWriterPass : public PassInfoMixin { diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h index 2e9744cfd524..47c137e70a7f 100644 --- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -14,16 +14,17 @@ #ifndef LLVM_TRANSFORMS_IPO_WHOLEPROGRAMDEVIRT_H #define LLVM_TRANSFORMS_IPO_WHOLEPROGRAMDEVIRT_H -#include "llvm/IR/Module.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/IPO/FunctionImport.h" #include #include +#include #include #include #include namespace llvm { +class Module; template class ArrayRef; template class MutableArrayRef; diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h index 6dee38c83b36..35a3a8c3218b 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h @@ -18,6 +18,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" #define DEBUG_TYPE "instcombine" #include "llvm/Transforms/Utils/InstructionWorklist.h" diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h index a288a3972c3d..9ff45fc29b06 100644 --- a/llvm/include/llvm/Transforms/Instrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation.h @@ -15,6 +15,10 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" #include #include #include @@ -75,21 +79,6 @@ struct GCOVOptions { std::string Exclude; }; -ModulePass *createGCOVProfilerPass(const GCOVOptions &Options = - GCOVOptions::getDefault()); - -// PGO Instrumention. Parameter IsCS indicates if this is the context sensitive -// instrumentation. -ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false); -ModulePass * -createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""), - bool IsCS = false); -ModulePass *createPGOInstrumentationGenCreateVarLegacyPass( - StringRef CSInstrName = StringRef("")); -ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false, - bool SamplePGO = false); -FunctionPass *createPGOMemOPSizeOptLegacyPass(); - ModulePass *createCGProfileLegacyPass(); // The pgo-specific indirect call promotion function declared below is used by @@ -194,6 +183,26 @@ static inline uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale) { assert(Scaled <= std::numeric_limits::max() && "overflow 32-bits"); return Scaled; } + +// Use to ensure the inserted instrumentation has a DebugLocation; if none is +// attached to the source instruction, try to use a DILocation with offset 0 +// scoped to surrounding function (if it has a DebugLocation). +// +// Some non-call instructions may be missing debug info, but when inserting +// instrumentation calls, some builds (e.g. LTO) want calls to have debug info +// if the enclosing function does. +struct InstrumentationIRBuilder : IRBuilder<> { + static void ensureDebugInfo(IRBuilder<> &IRB, const Function &F) { + if (IRB.getCurrentDebugLocation()) + return; + if (DISubprogram *SP = F.getSubprogram()) + IRB.SetCurrentDebugLocation(DILocation::get(SP->getContext(), 0, 0, SP)); + } + + explicit InstrumentationIRBuilder(Instruction *IP) : IRBuilder<>(IP) { + ensureDebugInfo(*this, *IP->getFunction()); + } +}; } // end namespace llvm #endif // LLVM_TRANSFORMS_INSTRUMENTATION_H diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h index a0d8118c23f7..d12b2cf45825 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h @@ -13,82 +13,17 @@ #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_ADDRESSSANITIZER_H #define LLVM_TRANSFORMS_INSTRUMENTATION_ADDRESSSANITIZER_H -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" #include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h" namespace llvm { - -/// Frontend-provided metadata for source location. -struct LocationMetadata { - StringRef Filename; - int LineNo = 0; - int ColumnNo = 0; - - LocationMetadata() = default; - - bool empty() const { return Filename.empty(); } - void parse(MDNode *MDN); -}; - -/// Frontend-provided metadata for global variables. -class GlobalsMetadata { -public: - struct Entry { - LocationMetadata SourceLoc; - StringRef Name; - bool IsDynInit = false; - bool IsExcluded = false; - - Entry() = default; - }; - - /// Create a default uninitialized GlobalsMetadata instance. - GlobalsMetadata() = default; - - /// Create an initialized GlobalsMetadata instance. - GlobalsMetadata(Module &M); - - /// Returns metadata entry for a given global. - Entry get(GlobalVariable *G) const { - auto Pos = Entries.find(G); - return (Pos != Entries.end()) ? Pos->second : Entry(); - } - - /// Handle invalidation from the pass manager. - /// These results are never invalidated. - bool invalidate(Module &, const PreservedAnalyses &, - ModuleAnalysisManager::Invalidator &) { - return false; - } - bool invalidate(Function &, const PreservedAnalyses &, - FunctionAnalysisManager::Invalidator &) { - return false; - } - -private: - DenseMap Entries; -}; - -/// The ASanGlobalsMetadataAnalysis initializes and returns a GlobalsMetadata -/// object. More specifically, ASan requires looking at all globals registered -/// in 'llvm.asan.globals' before running, which only depends on reading module -/// level metadata. This analysis is required to run before running the -/// AddressSanitizerPass since it collects that metadata. -/// The legacy pass manager equivalent of this is ASanGlobalsMetadataLegacyPass. -class ASanGlobalsMetadataAnalysis - : public AnalysisInfoMixin { -public: - using Result = GlobalsMetadata; - - Result run(Module &, ModuleAnalysisManager &); - -private: - friend AnalysisInfoMixin; - static AnalysisKey Key; -}; +class Function; +class FunctionPass; +class GlobalVariable; +class MDNode; +class Module; +class ModulePass; +class raw_ostream; struct AddressSanitizerOptions { bool CompileKernel = false; @@ -98,26 +33,6 @@ struct AddressSanitizerOptions { AsanDetectStackUseAfterReturnMode::Runtime; }; -/// Public interface to the address sanitizer pass for instrumenting code to -/// check for various memory errors at runtime. -/// -/// The sanitizer itself is a function pass that works by inserting various -/// calls to the ASan runtime library functions. The runtime library essentially -/// replaces malloc() and free() with custom implementations that allow regions -/// surrounding requested memory to be checked for invalid accesses. -class AddressSanitizerPass : public PassInfoMixin { -public: - AddressSanitizerPass(const AddressSanitizerOptions &Options) - : Options(Options){}; - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); - void printPipeline(raw_ostream &OS, - function_ref MapClassName2PassName); - static bool isRequired() { return true; } - -private: - AddressSanitizerOptions Options; -}; - /// Public interface to the address sanitizer module pass for instrumenting code /// to check for various memory errors. /// @@ -142,17 +57,6 @@ private: AsanDtorKind DestructorKind; }; -// Insert AddressSanitizer (address basic correctness checking) instrumentation -FunctionPass *createAddressSanitizerFunctionPass( - bool CompileKernel = false, bool Recover = false, - bool UseAfterScope = false, - AsanDetectStackUseAfterReturnMode UseAfterReturn = - AsanDetectStackUseAfterReturnMode::Runtime); -ModulePass *createModuleAddressSanitizerLegacyPassPass( - bool CompileKernel = false, bool Recover = false, bool UseGlobalsGC = true, - bool UseOdrIndicator = true, - AsanDtorKind DestructorKind = AsanDtorKind::Global); - struct ASanAccessInfo { const int32_t Packed; const uint8_t AccessSizeIndex; diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h index 0a5456c5956f..7858a1c4b2fd 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h @@ -47,51 +47,6 @@ public: Value *getPtr() { return PtrUse->get(); } }; -// For an alloca valid between lifetime markers Start and Ends, call the -// Callback for all possible exits out of the lifetime in the containing -// function, which can return from the instructions in RetVec. -// -// Returns whether Ends covered all possible exits. If they did not, -// the caller should remove Ends to ensure that work done at the other -// exits does not happen outside of the lifetime. -template -bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT, - const Instruction *Start, - const SmallVectorImpl &Ends, - const SmallVectorImpl &RetVec, - F Callback) { - if (Ends.size() == 1 && PDT.dominates(Ends[0], Start)) { - Callback(Ends[0]); - return true; - } - SmallVector ReachableRetVec; - unsigned NumCoveredExits = 0; - for (auto *RI : RetVec) { - if (!isPotentiallyReachable(Start, RI, nullptr, &DT)) - continue; - ReachableRetVec.push_back(RI); - // TODO(fmayer): We don't support diamond shapes, where multiple lifetime - // ends together dominate the RI, but none of them does by itself. - // Check how often this happens and decide whether to support this here. - if (std::any_of(Ends.begin(), Ends.end(), - [&](Instruction *End) { return DT.dominates(End, RI); })) - ++NumCoveredExits; - } - // If there's a mix of covered and non-covered exits, just put the untag - // on exits, so we avoid the redundancy of untagging twice. - if (NumCoveredExits == ReachableRetVec.size()) { - for (auto *End : Ends) - Callback(End); - } else { - for (auto *RI : ReachableRetVec) - Callback(RI); - // We may have inserted untag outside of the lifetime interval. - // Signal the caller to remove the lifetime end call for this alloca. - return false; - } - return true; -} - // Get AddressSanitizer parameters. void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize, bool IsKasan, uint64_t *ShadowBase, diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h index f019d1c00a35..187aaedb6000 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h @@ -17,14 +17,13 @@ enum class AsanDtorKind { None, ///< Do not emit any destructors for ASan Global, ///< Append to llvm.global_dtors Invalid, ///< Not a valid destructor Kind. - // TODO(dliew): Add more more kinds. }; /// Mode of ASan detect stack use after return enum class AsanDetectStackUseAfterReturnMode { Never, ///< Never detect stack use after return. - Runtime, ///< Detect stack use after return if runtime flag is enabled - ///< (ASAN_OPTIONS=detect_stack_use_after_return=1) + Runtime, ///< Detect stack use after return if not disabled runtime with + ///< (ASAN_OPTIONS=detect_stack_use_after_return=0). Always, ///< Always detect stack use after return. Invalid, ///< Not a valid detect mode. }; diff --git a/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h b/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h index 76d586252743..5e68141e3399 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h +++ b/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h @@ -10,9 +10,10 @@ #define LLVM_TRANSFORMS_INSTRUMENTATION_BOUNDSCHECKING_H #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" namespace llvm { +class Function; +class FunctionPass; /// A pass to instrument code and perform run-time bounds checking on loads, /// stores, and other memory intrinsics. diff --git a/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h b/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h index c56e4c78cad5..9f9ce42277a0 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h +++ b/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h @@ -12,10 +12,10 @@ #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_CGPROFILE_H #define LLVM_TRANSFORMS_INSTRUMENTATION_CGPROFILE_H -#include "llvm/ADT/MapVector.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Module; class CGProfilePass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); diff --git a/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h b/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h index 18b428582046..0bace514c361 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h +++ b/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h @@ -14,7 +14,6 @@ #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H #define LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H -#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/PassManager.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h index 9b57b1f9a9ea..41ba05cd67f0 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h @@ -8,12 +8,12 @@ #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_DATAFLOWSANITIZER_H #define LLVM_TRANSFORMS_INSTRUMENTATION_DATAFLOWSANITIZER_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include #include namespace llvm { +class Module; class DataFlowSanitizerPass : public PassInfoMixin { private: diff --git a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h index 70949026a892..d3b5b5ca5c25 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h @@ -13,11 +13,14 @@ #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_HWADDRESSSANITIZER_H #define LLVM_TRANSFORMS_INSTRUMENTATION_HWADDRESSSANITIZER_H -#include "llvm/IR/Function.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" namespace llvm { +class FunctionPass; +class Module; +class StringRef; +class raw_ostream; struct HWAddressSanitizerOptions { HWAddressSanitizerOptions() @@ -47,11 +50,6 @@ private: HWAddressSanitizerOptions Options; }; -FunctionPass * -createHWAddressSanitizerLegacyPassPass(bool CompileKernel = false, - bool Recover = false, - bool DisableOptimization = false); - namespace HWASanAccessInfo { // Bit field positions for the accessinfo parameter to diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h index 5873db22a5d1..90fc0670448b 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -19,7 +19,6 @@ #include "llvm/IR/PassManager.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Transforms/Instrumentation.h" -#include #include #include #include @@ -57,6 +56,9 @@ private: } }; DenseMap ProfileDataMap; + /// If runtime relocation is enabled, this maps functions to the load + /// instruction that produces the profile relocation bias. + DenseMap FunctionToProfileBiasMap; std::vector CompilerUsedVars; std::vector UsedVars; std::vector ReferencedNames; diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h index b9ad56ba7509..b584b9984492 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h @@ -12,12 +12,13 @@ #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H #define LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" namespace llvm { +class Function; +class FunctionPass; +class Module; +class ModulePass; /// Public interface to the memory profiler pass for instrumenting code to /// profile memory accesses. diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h index e5779dc775ba..e4654a0fc7ef 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h @@ -13,10 +13,15 @@ #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_MEMORYSANITIZER_H #define LLVM_TRANSFORMS_INSTRUMENTATION_MEMORYSANITIZER_H +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" namespace llvm { +class Function; +class FunctionPass; +class Module; +class StringRef; +class raw_ostream; struct MemorySanitizerOptions { MemorySanitizerOptions() : MemorySanitizerOptions(0, false, false, false){}; @@ -30,10 +35,6 @@ struct MemorySanitizerOptions { bool EagerChecks; }; -// Insert MemorySanitizer instrumentation (detection of uninitialized reads) -FunctionPass * -createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options = {}); - /// A function pass for msan instrumentation. /// /// Instruments functions to detect unitialized reads. This function pass diff --git a/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h b/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h index e3d268cb0781..9bacb7eb38a5 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h +++ b/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h @@ -16,13 +16,14 @@ #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H #define LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/SpecialCaseList.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Transforms/Instrumentation.h" namespace llvm { +class Module; +class ModulePass; /// This is the ModuleSanitizerCoverage pass used in the new pass manager. The /// pass instruments functions for coverage, adds initialization calls to the diff --git a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h index e795043630d5..b3a067ba59c2 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h @@ -14,11 +14,11 @@ #define LLVM_TRANSFORMS_INSTRUMENTATION_THREADSANITIZER_H #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" namespace llvm { -// Insert ThreadSanitizer (race detection) instrumentation -FunctionPass *createThreadSanitizerLegacyPassPass(); +class Function; +class FunctionPass; +class Module; /// A function pass for tsan instrumentation. /// diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index d6228700aa9a..edd492b0343d 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -133,7 +133,8 @@ Pass *createIndVarSimplifyPass(); // Pass *createLICMPass(); Pass *createLICMPass(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap); + unsigned LicmMssaNoAccForPromotionCap, + bool AllowSpeculation); //===----------------------------------------------------------------------===// // @@ -168,13 +169,6 @@ FunctionPass *createLoopFlattenPass(); // Pass *createLoopStrengthReducePass(); -//===----------------------------------------------------------------------===// -// -// LoopUnswitch - This pass is a simple loop unswitching pass. -// -Pass *createLoopUnswitchPass(bool OptimizeForSize = false, - bool hasBranchDivergence = false); - //===----------------------------------------------------------------------===// // // LoopInstSimplify - This pass simplifies instructions in a loop's body. @@ -246,12 +240,10 @@ FunctionPass *createReassociatePass(); //===----------------------------------------------------------------------===// // // JumpThreading - Thread control through mult-pred/multi-succ blocks where some -// preds always go to some succ. If FreezeSelectCond is true, unfold the -// condition of a select that unfolds to branch. Thresholds other than minus one +// preds always go to some succ. Thresholds other than minus one // override the internal BB duplication default threshold. // -FunctionPass *createJumpThreadingPass(bool FreezeSelectCond = false, - int Threshold = -1); +FunctionPass *createJumpThreadingPass(int Threshold = -1); //===----------------------------------------------------------------------===// // @@ -426,6 +418,12 @@ extern char &InferAddressSpacesID; // "block_weights" metadata. FunctionPass *createLowerExpectIntrinsicPass(); +//===----------------------------------------------------------------------===// +// +// TLSVariableHoist - This pass reduce duplicated TLS address call. +// +FunctionPass *createTLSVariableHoistPass(); + //===----------------------------------------------------------------------===// // // LowerConstantIntrinsicss - Expand any remaining llvm.objectsize and diff --git a/llvm/include/llvm/Transforms/Scalar/BDCE.h b/llvm/include/llvm/Transforms/Scalar/BDCE.h index 996622bccdba..0763f31dfad4 100644 --- a/llvm/include/llvm/Transforms/Scalar/BDCE.h +++ b/llvm/include/llvm/Transforms/Scalar/BDCE.h @@ -16,11 +16,12 @@ #ifndef LLVM_TRANSFORMS_SCALAR_BDCE_H #define LLVM_TRANSFORMS_SCALAR_BDCE_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; + // The Bit-Tracking Dead Code Elimination pass. struct BDCEPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); diff --git a/llvm/include/llvm/Transforms/Scalar/CallSiteSplitting.h b/llvm/include/llvm/Transforms/Scalar/CallSiteSplitting.h index ee2b6f264086..661340f4598f 100644 --- a/llvm/include/llvm/Transforms/Scalar/CallSiteSplitting.h +++ b/llvm/include/llvm/Transforms/Scalar/CallSiteSplitting.h @@ -9,11 +9,12 @@ #ifndef LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING_H #define LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; + struct CallSiteSplittingPass : PassInfoMixin { /// Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); diff --git a/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h b/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h index 11379e59467f..e59734b92244 100644 --- a/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h +++ b/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h @@ -40,7 +40,6 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" #include diff --git a/llvm/include/llvm/Transforms/Scalar/DCE.h b/llvm/include/llvm/Transforms/Scalar/DCE.h index 4d83296b1d86..8d1616a7b75d 100644 --- a/llvm/include/llvm/Transforms/Scalar/DCE.h +++ b/llvm/include/llvm/Transforms/Scalar/DCE.h @@ -13,11 +13,12 @@ #ifndef LLVM_TRANSFORMS_SCALAR_DCE_H #define LLVM_TRANSFORMS_SCALAR_DCE_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; + /// Basic Dead Code Elimination pass. class DCEPass : public PassInfoMixin { public: diff --git a/llvm/include/llvm/Transforms/Scalar/DFAJumpThreading.h b/llvm/include/llvm/Transforms/Scalar/DFAJumpThreading.h index afebd9bbc122..4e9fbf65e163 100644 --- a/llvm/include/llvm/Transforms/Scalar/DFAJumpThreading.h +++ b/llvm/include/llvm/Transforms/Scalar/DFAJumpThreading.h @@ -13,11 +13,12 @@ #ifndef LLVM_TRANSFORMS_SCALAR_DFAJUMPTHREADING_H #define LLVM_TRANSFORMS_SCALAR_DFAJUMPTHREADING_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; + struct DFAJumpThreadingPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; diff --git a/llvm/include/llvm/Transforms/Scalar/Float2Int.h b/llvm/include/llvm/Transforms/Scalar/Float2Int.h index 5fb47af6f795..f4bec228ea96 100644 --- a/llvm/include/llvm/Transforms/Scalar/Float2Int.h +++ b/llvm/include/llvm/Transforms/Scalar/Float2Int.h @@ -18,11 +18,17 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/IR/ConstantRange.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class DominatorTree; +class Function; +class Instruction; +class LLVMContext; +template class Optional; +class Type; +class Value; + class Float2IntPass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); @@ -36,6 +42,7 @@ private: ConstantRange badRange(); ConstantRange unknownRange(); ConstantRange validateRange(ConstantRange R); + Optional calcRange(Instruction *I); void walkBackwards(); void walkForwards(); bool validateAndTransform(); diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h index 9e660c92124e..16ab1a490162 100644 --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -17,10 +17,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/InstructionPrecedenceTracking.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/PassManager.h" @@ -42,6 +40,8 @@ class CallInst; class ExtractValueInst; class Function; class FunctionPass; +class GetElementPtrInst; +class ImplicitControlFlowTracking; class LoadInst; class LoopInfo; class MemDepResult; @@ -178,6 +178,7 @@ public: Expression createCmpExpr(unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS); Expression createExtractvalueExpr(ExtractValueInst *EI); + Expression createGEPExpr(GetElementPtrInst *GEP); uint32_t lookupOrAddCall(CallInst *C); uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock, uint32_t Num, GVNPass &Gvn); diff --git a/llvm/include/llvm/Transforms/Scalar/GuardWidening.h b/llvm/include/llvm/Transforms/Scalar/GuardWidening.h index d08d042ab055..fa03d5f678fd 100644 --- a/llvm/include/llvm/Transforms/Scalar/GuardWidening.h +++ b/llvm/include/llvm/Transforms/Scalar/GuardWidening.h @@ -15,12 +15,13 @@ #ifndef LLVM_TRANSFORMS_SCALAR_GUARDWIDENING_H #define LLVM_TRANSFORMS_SCALAR_GUARDWIDENING_H -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class LPMUpdater; +class Loop; class Function; struct GuardWideningPass : public PassInfoMixin { diff --git a/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h b/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h index a1f20d9ca983..4136c45e1905 100644 --- a/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h +++ b/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h @@ -9,11 +9,13 @@ #ifndef LLVM_TRANSFORMS_SCALAR_IVUSERSPRINTER_H #define LLVM_TRANSFORMS_SCALAR_IVUSERSPRINTER_H -#include "llvm/Analysis/IVUsers.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/IR/PassManager.h" namespace llvm { +class LPMUpdater; +class Loop; +class raw_ostream; /// Printer pass for the \c IVUsers for a loop. class IVUsersPrinterPass : public PassInfoMixin { diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h index 0ac7d7c62b7a..09d08bf423a6 100644 --- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -16,14 +16,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/IR/ValueHandle.h" -#include #include namespace llvm { @@ -95,10 +92,9 @@ class JumpThreadingPass : public PassInfoMixin { unsigned BBDupThreshold; unsigned DefaultBBDupThreshold; - bool InsertFreezeWhenUnfoldingSelect; public: - JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1); + JumpThreadingPass(int T = -1); // Glue for old PM. bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, diff --git a/llvm/include/llvm/Transforms/Scalar/LICM.h b/llvm/include/llvm/Transforms/Scalar/LICM.h index 751f75c0ccb2..f7dd40be47e5 100644 --- a/llvm/include/llvm/Transforms/Scalar/LICM.h +++ b/llvm/include/llvm/Transforms/Scalar/LICM.h @@ -32,46 +32,70 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LICM_H #define LLVM_TRANSFORMS_SCALAR_LICM_H -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class LPMUpdater; +class Loop; +class LoopNest; + extern cl::opt SetLicmMssaOptCap; extern cl::opt SetLicmMssaNoAccForPromotionCap; +struct LICMOptions { + unsigned MssaOptCap; + unsigned MssaNoAccForPromotionCap; + bool AllowSpeculation; + + LICMOptions() + : MssaOptCap(SetLicmMssaOptCap), + MssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap), + AllowSpeculation(true) {} + + LICMOptions(unsigned MssaOptCap, unsigned MssaNoAccForPromotionCap, + bool AllowSpeculation) + : MssaOptCap(MssaOptCap), + MssaNoAccForPromotionCap(MssaNoAccForPromotionCap), + AllowSpeculation(AllowSpeculation) {} +}; + /// Performs Loop Invariant Code Motion Pass. class LICMPass : public PassInfoMixin { - unsigned LicmMssaOptCap; - unsigned LicmMssaNoAccForPromotionCap; + LICMOptions Opts; public: - LICMPass() - : LicmMssaOptCap(SetLicmMssaOptCap), - LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {} - LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) - : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LICMPass(unsigned MssaOptCap, unsigned MssaNoAccForPromotionCap, + bool AllowSpeculation) + : LICMPass(LICMOptions(MssaOptCap, MssaNoAccForPromotionCap, + AllowSpeculation)) {} + LICMPass(LICMOptions Opts) : Opts(Opts) {} + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); + + void printPipeline(raw_ostream &OS, + function_ref MapClassName2PassName); }; /// Performs LoopNest Invariant Code Motion Pass. class LNICMPass : public PassInfoMixin { - unsigned LicmMssaOptCap; - unsigned LicmMssaNoAccForPromotionCap; + LICMOptions Opts; public: - LNICMPass() - : LicmMssaOptCap(SetLicmMssaOptCap), - LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {} - LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) - : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LNICMPass(unsigned MssaOptCap, unsigned MssaNoAccForPromotionCap, + bool AllowSpeculation) + : LNICMPass(LICMOptions(MssaOptCap, MssaNoAccForPromotionCap, + AllowSpeculation)) {} + LNICMPass(LICMOptions Opts) : Opts(Opts) {} + PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); + + void printPipeline(raw_ostream &OS, + function_ref MapClassName2PassName); }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h b/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h index 3f250fc1ce8c..50a837acf4e3 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h @@ -8,12 +8,14 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPACCESSANALYSISPRINTER_H #define LLVM_TRANSFORMS_SCALAR_LOOPACCESSANALYSISPRINTER_H - -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/IR/PassManager.h" namespace llvm { +class LPMUpdater; +class Loop; +class raw_ostream; /// Printer pass for the \c LoopAccessInfo results. class LoopAccessInfoPrinterPass : public PassInfoMixin { diff --git a/llvm/include/llvm/Transforms/Scalar/LoopBoundSplit.h b/llvm/include/llvm/Transforms/Scalar/LoopBoundSplit.h index 306b6fa046df..0c597bf295b2 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopBoundSplit.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopBoundSplit.h @@ -10,11 +10,11 @@ #define LLVM_TRANSFORMS_SCALAR_LOOPBOUNDSPLIT_H #include "llvm/Analysis/LoopAnalysisManager.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class LPMUpdater; +class Loop; /// This pass transforms loops that contain a conditional branch with induction /// variable. For example, it transforms left code to right code: diff --git a/llvm/include/llvm/Transforms/Scalar/LoopDataPrefetch.h b/llvm/include/llvm/Transforms/Scalar/LoopDataPrefetch.h index 9ebd5984cea9..d5e15ffff075 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopDataPrefetch.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopDataPrefetch.h @@ -13,11 +13,12 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPDATAPREFETCH_H #define LLVM_TRANSFORMS_SCALAR_LOOPDATAPREFETCH_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; + /// An optimization pass inserting data prefetches in loops. class LoopDataPrefetchPass : public PassInfoMixin { public: diff --git a/llvm/include/llvm/Transforms/Scalar/LoopDeletion.h b/llvm/include/llvm/Transforms/Scalar/LoopDeletion.h index 557616e2e6ba..459a5cd3ece4 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopDeletion.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopDeletion.h @@ -14,13 +14,13 @@ #define LLVM_TRANSFORMS_SCALAR_LOOPDELETION_H #include "llvm/Analysis/LoopAnalysisManager.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class Loop; +class LPMUpdater; + class LoopDeletionPass : public PassInfoMixin { public: LoopDeletionPass() = default; diff --git a/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h b/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h index 3d259bdbe986..311b843e83b5 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h @@ -14,11 +14,11 @@ #define LLVM_TRANSFORMS_SCALAR_LOOPFLATTEN_H #include "llvm/Analysis/LoopAnalysisManager.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class LPMUpdater; +class LoopNest; class LoopFlattenPass : public PassInfoMixin { public: diff --git a/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h b/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h index c67a30293d2f..8fa14d747f5c 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h @@ -9,11 +9,14 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPINTERCHANGE_H #define LLVM_TRANSFORMS_SCALAR_LOOPINTERCHANGE_H +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class LPMUpdater; +class LoopNest; + struct LoopInterchangePass : public PassInfoMixin { PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h index e83cc2b9bef0..1df510474ca7 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -40,8 +40,6 @@ #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopNestAnalysis.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/PassManager.h" #include "llvm/Transforms/Utils/LCSSA.h" #include "llvm/Transforms/Utils/LoopSimplify.h" @@ -52,6 +50,7 @@ namespace llvm { // Forward declarations of an update tracking API used in the pass manager. class LPMUpdater; +class PassInstrumentation; namespace { diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPredication.h b/llvm/include/llvm/Transforms/Scalar/LoopPredication.h index 252daafab7a3..83f533603419 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopPredication.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPredication.h @@ -14,12 +14,13 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPPREDICATION_H #define LLVM_TRANSFORMS_SCALAR_LOOPPREDICATION_H -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class LPMUpdater; +class Loop; /// Performs Loop Predication Pass. class LoopPredicationPass : public PassInfoMixin { public: diff --git a/llvm/include/llvm/Transforms/Scalar/LoopRotation.h b/llvm/include/llvm/Transforms/Scalar/LoopRotation.h index f68ac70da324..c0e6f105a412 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopRotation.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopRotation.h @@ -13,11 +13,12 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPROTATION_H #define LLVM_TRANSFORMS_SCALAR_LOOPROTATION_H -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class LPMUpdater; +class Loop; /// A simple loop rotation transformation. class LoopRotatePass : public PassInfoMixin { diff --git a/llvm/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h b/llvm/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h index 2d718592aef5..82c8a4406d00 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h @@ -16,12 +16,14 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPSIMPLIFYCFG_H #define LLVM_TRANSFORMS_SCALAR_LOOPSIMPLIFYCFG_H -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class LPMUpdater; +class Loop; + /// Performs basic CFG simplifications to assist other loop passes. class LoopSimplifyCFGPass : public PassInfoMixin { public: diff --git a/llvm/include/llvm/Transforms/Scalar/LoopSink.h b/llvm/include/llvm/Transforms/Scalar/LoopSink.h index 234c48cbebc5..26e50590a625 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopSink.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopSink.h @@ -13,12 +13,12 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPSINK_H #define LLVM_TRANSFORMS_SCALAR_LOOPSINK_H -#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class Function; + /// A pass that does profile-guided sinking of instructions into loops. /// /// This is a function pass as it shouldn't be composed into any kind of diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h index 72663d3d62a8..54f70d7ed4b3 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h @@ -9,10 +9,12 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPUNROLLANDJAMPASS_H #define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLANDJAMPASS_H +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class LPMUpdater; +class LoopNest; /// A simple loop rotation transformation. class LoopUnrollAndJamPass : public PassInfoMixin { diff --git a/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h b/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h index 87d6d6759db2..04e0012330da 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h @@ -9,10 +9,12 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPVERSIONINGLICM_H #define LLVM_TRANSFORMS_SCALAR_LOOPVERSIONINGLICM_H +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class LPMUpdater; +class Loop; class LoopVersioningLICMPass : public PassInfoMixin { public: diff --git a/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h b/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h deleted file mode 100644 index 87d945d06901..000000000000 --- a/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h +++ /dev/null @@ -1,35 +0,0 @@ -//===- LowerAtomic.cpp - Lower atomic intrinsics ----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -// This pass lowers atomic intrinsics to non-atomic form for use in a known -// non-preemptible environment. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H -#define LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H - -#include "llvm/IR/PassManager.h" - -namespace llvm { - -/// A pass that lowers atomic intrinsic into non-atomic intrinsics. -class LowerAtomicPass : public PassInfoMixin { -public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &); - static bool isRequired() { return true; } -}; - -class AtomicRMWInst; -/// Convert the given RMWI into primitive load and stores, -/// assuming that doing so is legal. Return true if the lowering -/// succeeds. -bool lowerAtomicRMWInst(AtomicRMWInst *RMWI); -} - -#endif // LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H diff --git a/llvm/include/llvm/Transforms/Scalar/LowerAtomicPass.h b/llvm/include/llvm/Transforms/Scalar/LowerAtomicPass.h new file mode 100644 index 000000000000..60bbf916fced --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/LowerAtomicPass.h @@ -0,0 +1,30 @@ +//===- LowerAtomicPass.h - Lower atomic intrinsics --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +// This pass lowers atomic intrinsics to non-atomic form for use in a known +// non-preemptible environment. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOWERATOMICPASS_H +#define LLVM_TRANSFORMS_SCALAR_LOWERATOMICPASS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// A pass that lowers atomic intrinsic into non-atomic intrinsics. +class LowerAtomicPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &); + static bool isRequired() { return true; } +}; + +} + +#endif // LLVM_TRANSFORMS_SCALAR_LOWERATOMICPASS_H diff --git a/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h b/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h index 61c7bf0454e1..e8e404bb93d6 100644 --- a/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h +++ b/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h @@ -15,11 +15,12 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOWERCONSTANTINTRINSICS_H #define LLVM_TRANSFORMS_SCALAR_LOWERCONSTANTINTRINSICS_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; + struct LowerConstantIntrinsicsPass : PassInfoMixin { public: diff --git a/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h b/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h index 4e47ff70d557..95ef0f73e8af 100644 --- a/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h +++ b/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h @@ -15,11 +15,12 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOWEREXPECTINTRINSIC_H #define LLVM_TRANSFORMS_SCALAR_LOWEREXPECTINTRINSIC_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; + struct LowerExpectIntrinsicPass : PassInfoMixin { /// Run the pass over the function. /// diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index 3a4db13d670a..8103b0a92489 100644 --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -16,8 +16,6 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/PassManager.h" -#include -#include namespace llvm { @@ -63,7 +61,7 @@ private: bool processMemMove(MemMoveInst *M); bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore, Value *cpyDst, Value *cpySrc, TypeSize cpyLen, - Align cpyAlign, CallInst *C); + Align cpyAlign, std::function GetC); bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep); bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet); bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet); diff --git a/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h b/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h index 256d03675a07..71e11e59a471 100644 --- a/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h +++ b/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h @@ -23,10 +23,11 @@ #ifndef LLVM_TRANSFORMS_SCALAR_MERGEDLOADSTOREMOTION_H #define LLVM_TRANSFORMS_SCALAR_MERGEDLOADSTOREMOTION_H -#include "llvm/IR/Module.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; struct MergedLoadStoreMotionOptions { bool SplitFooterBB; MergedLoadStoreMotionOptions(bool SplitFooterBB = false) diff --git a/llvm/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h b/llvm/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h index fd5a06c5051d..b8a8fcc71e57 100644 --- a/llvm/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h +++ b/llvm/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h @@ -15,10 +15,10 @@ #ifndef LLVM_TRANSFORMS_SCALAR_PARTIALLYINLINELIBCALLS_H #define LLVM_TRANSFORMS_SCALAR_PARTIALLYINLINELIBCALLS_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; class PartiallyInlineLibCallsPass : public PassInfoMixin { public: diff --git a/llvm/include/llvm/Transforms/Scalar/SCCP.h b/llvm/include/llvm/Transforms/Scalar/SCCP.h index cd4100447880..032a9b15fc46 100644 --- a/llvm/include/llvm/Transforms/Scalar/SCCP.h +++ b/llvm/include/llvm/Transforms/Scalar/SCCP.h @@ -20,17 +20,19 @@ #ifndef LLVM_TRANSFORMS_SCALAR_SCCP_H #define LLVM_TRANSFORMS_SCALAR_SCCP_H -#include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Utils/PredicateInfo.h" -#include "llvm/Transforms/Utils/SCCPSolver.h" + +#include namespace llvm { +class AssumptionCache; +class DataLayout; +class Function; +class Module; +class TargetLibraryInfo; +class TargetTransformInfo; +struct AnalysisResultsForFn; /// This pass performs function-level constant propagation and merging. class SCCPPass : public PassInfoMixin { diff --git a/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h b/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h index e4002159edbd..5e876fc82ac1 100644 --- a/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h +++ b/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h @@ -1,5 +1,5 @@ //===- ScalarizeMaskedMemIntrin.h - Scalarize unsupported masked mem ----===// -// instrinsics +// intrinsics // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h index f4472e699295..5cc67f78e5a2 100644 --- a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h +++ b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h @@ -17,14 +17,33 @@ #ifndef LLVM_TRANSFORMS_SCALAR_SCALARIZER_H #define LLVM_TRANSFORMS_SCALAR_SCALARIZER_H +#include "llvm/ADT/Optional.h" #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" namespace llvm { +class Function; +class FunctionPass; + +struct ScalarizerPassOptions { + // These optional booleans correspond 1:1 to cl::opt options defined in + // Scalarizer.cpp. When the cl::opt are specified, they take precedence. + // When the cl::opt are not specified, the present optional booleans allow to + // override the cl::opt's default values. + llvm::Optional ScalarizeVariableInsertExtract; + llvm::Optional ScalarizeLoadStore; +}; + class ScalarizerPass : public PassInfoMixin { + ScalarizerPassOptions Options; + public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + void setScalarizeVariableInsertExtract(bool Value) { + Options.ScalarizeVariableInsertExtract = Value; + } + void setScalarizeLoadStore(bool Value) { Options.ScalarizeLoadStore = Value; } }; /// Create a legacy pass manager instance of the Scalarizer pass diff --git a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h index dfb1619c7f2a..68c121560b13 100644 --- a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h +++ b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h @@ -9,13 +9,18 @@ #ifndef LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H #define LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Analysis/LoopAnalysisManager.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class LPMUpdater; +class Loop; +class Pass; +class StringRef; +class raw_ostream; + /// This pass transforms loops that contain branches or switches on loop- /// invariant conditions to have multiple loops. For example, it turns the left /// into the right code: diff --git a/llvm/include/llvm/Transforms/Scalar/Sink.h b/llvm/include/llvm/Transforms/Scalar/Sink.h index 6cbe964d1580..759153f22853 100644 --- a/llvm/include/llvm/Transforms/Scalar/Sink.h +++ b/llvm/include/llvm/Transforms/Scalar/Sink.h @@ -14,11 +14,12 @@ #ifndef LLVM_TRANSFORMS_SCALAR_SINK_H #define LLVM_TRANSFORMS_SCALAR_SINK_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; + /// Move instructions into successor blocks when possible. class SinkingPass : public PassInfoMixin { public: diff --git a/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h b/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h index 41de544e7c9c..0ec2a395f875 100644 --- a/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h +++ b/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h @@ -62,10 +62,10 @@ #ifndef LLVM_TRANSFORMS_SCALAR_SPECULATIVEEXECUTION_H #define LLVM_TRANSFORMS_SCALAR_SPECULATIVEEXECUTION_H -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/PassManager.h" namespace llvm { +class TargetTransformInfo; class SpeculativeExecutionPass : public PassInfoMixin { public: diff --git a/llvm/include/llvm/Transforms/Scalar/TLSVariableHoist.h b/llvm/include/llvm/Transforms/Scalar/TLSVariableHoist.h new file mode 100644 index 000000000000..2a1b02b40eeb --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/TLSVariableHoist.h @@ -0,0 +1,131 @@ +//==- TLSVariableHoist.h ------ Remove Redundant TLS Loads -------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass identifies/eliminates Redundant TLS Loads if related option is set. +// For example: +// static __thread int x; +// int g(); +// int f(int c) { +// int *px = &x; +// while (c--) +// *px += g(); +// return *px; +// } +// +// will generate Redundant TLS Loads by compiling it with +// clang++ -fPIC -ftls-model=global-dynamic -O2 -S +// +// .LBB0_2: # %while.body +// # =>This Inner Loop Header: Depth=1 +// callq _Z1gv@PLT +// movl %eax, %ebp +// leaq _ZL1x@TLSLD(%rip), %rdi +// callq __tls_get_addr@PLT +// addl _ZL1x@DTPOFF(%rax), %ebp +// movl %ebp, _ZL1x@DTPOFF(%rax) +// addl $-1, %ebx +// jne .LBB0_2 +// jmp .LBB0_3 +// .LBB0_4: # %entry.while.end_crit_edge +// leaq _ZL1x@TLSLD(%rip), %rdi +// callq __tls_get_addr@PLT +// movl _ZL1x@DTPOFF(%rax), %ebp +// +// The Redundant TLS Loads will hurt the performance, especially in loops. +// So we try to eliminate/move them if required by customers, let it be: +// +// # %bb.0: # %entry +// ... +// movl %edi, %ebx +// leaq _ZL1x@TLSLD(%rip), %rdi +// callq __tls_get_addr@PLT +// leaq _ZL1x@DTPOFF(%rax), %r14 +// testl %ebx, %ebx +// je .LBB0_1 +// .LBB0_2: # %while.body +// # =>This Inner Loop Header: Depth=1 +// callq _Z1gv@PLT +// addl (%r14), %eax +// movl %eax, (%r14) +// addl $-1, %ebx +// jne .LBB0_2 +// jmp .LBB0_3 +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_TLSVARIABLEHOIST_H +#define LLVM_TRANSFORMS_SCALAR_TLSVARIABLEHOIST_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class BasicBlock; +class DominatorTree; +class Function; +class GlobalVariable; +class Instruction; + +/// A private "module" namespace for types and utilities used by +/// TLSVariableHoist. These are implementation details and should +/// not be used by clients. +namespace tlshoist { + +/// Keeps track of the user of a TLS variable and the operand index +/// where the variable is used. +struct TLSUser { + Instruction *Inst; + unsigned OpndIdx; + + TLSUser(Instruction *Inst, unsigned Idx) : Inst(Inst), OpndIdx(Idx) {} +}; + +/// Keeps track of a TLS variable candidate and its users. +struct TLSCandidate { + SmallVector Users; + + /// Add the user to the use list and update the cost. + void addUser(Instruction *Inst, unsigned Idx) { + Users.push_back(TLSUser(Inst, Idx)); + } +}; + +} // end namespace tlshoist + +class TLSVariableHoistPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + // Glue for old PM. + bool runImpl(Function &F, DominatorTree &DT, LoopInfo &LI); + +private: + DominatorTree *DT; + LoopInfo *LI; + + /// Keeps track of TLS variable candidates found in the function. + using TLSCandMapType = MapVector; + TLSCandMapType TLSCandMap; + + void collectTLSCandidates(Function &Fn); + void collectTLSCandidate(Instruction *Inst); + Instruction *getNearestLoopDomInst(BasicBlock *BB, Loop *L); + Instruction *getDomInst(Instruction *I1, Instruction *I2); + BasicBlock::iterator findInsertPos(Function &Fn, GlobalVariable *GV, + BasicBlock *&PosBB); + Instruction *genBitCastInst(Function &Fn, GlobalVariable *GV); + bool tryReplaceTLSCandidates(Function &Fn); + bool tryReplaceTLSCandidate(Function &Fn, GlobalVariable *GV); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_TLSVARIABLEHOIST_H diff --git a/llvm/include/llvm/Transforms/Scalar/TailRecursionElimination.h b/llvm/include/llvm/Transforms/Scalar/TailRecursionElimination.h index 906867644504..57b1ed9bf4fe 100644 --- a/llvm/include/llvm/Transforms/Scalar/TailRecursionElimination.h +++ b/llvm/include/llvm/Transforms/Scalar/TailRecursionElimination.h @@ -52,11 +52,12 @@ #ifndef LLVM_TRANSFORMS_SCALAR_TAILRECURSIONELIMINATION_H #define LLVM_TRANSFORMS_SCALAR_TAILRECURSIONELIMINATION_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; + struct TailCallElimPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; diff --git a/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h b/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h index 64691d68b1c4..80d098a1ea52 100644 --- a/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h +++ b/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h @@ -14,10 +14,11 @@ #define LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" namespace llvm { class Function; +class Pass; +class PassRegistry; // New pass manager boilerplate. class WarnMissedTransformationsPass diff --git a/llvm/include/llvm/Transforms/Utils.h b/llvm/include/llvm/Transforms/Utils.h index 1e9c0a040ad2..ebd4bd318573 100644 --- a/llvm/include/llvm/Transforms/Utils.h +++ b/llvm/include/llvm/Transforms/Utils.h @@ -155,6 +155,12 @@ FunctionPass *createAssumeSimplifyPass(); // don't block SCEV. // Pass *createCanonicalizeFreezeInLoopsPass(); + +//===----------------------------------------------------------------------===// +// LowerGlobalDtorsLegacy - Lower @llvm.global_dtors by creating wrapper +// functions that are registered in @llvm.global_ctors and which contain a call +// to `__cxa_atexit` to register their destructor functions. +ModulePass *createLowerGlobalDtorsLegacyPass(); } // namespace llvm #endif diff --git a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h index d679bca69510..991ecb8efbd0 100644 --- a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h +++ b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h @@ -17,12 +17,13 @@ #define LLVM_TRANSFORMS_UTILS_ASSUMEBUNDLEBUILDER_H #include "llvm/Analysis/AssumeBundleQueries.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" namespace llvm { +class AssumeInst; +class Function; +class FunctionPass; +class Instruction; class AssumptionCache; class DominatorTree; diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h index d99b2a56559d..fcdd2aa0e060 100644 --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -18,21 +18,20 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SetVector.h" -#include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Dominators.h" #include namespace llvm { - +class BranchInst; +class LandingPadInst; +class Loop; +class PHINode; +template class SmallPtrSetImpl; class BlockFrequencyInfo; class BranchProbabilityInfo; -class DominatorTree; class DomTreeUpdater; class Function; -class Instruction; class LoopInfo; class MDNode; class MemoryDependenceResults; @@ -500,7 +499,9 @@ BranchInst *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // create the following structure: // A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1 // If BPI and BFI aren't non-null, BPI/BFI will be updated accordingly. -bool SplitIndirectBrCriticalEdges(Function &F, +// When `IgnoreBlocksWithoutPHI` is set to `true` critical edges leading to a +// block without phi-instructions will not be split. +bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr); diff --git a/llvm/include/llvm/Transforms/Utils/BreakCriticalEdges.h b/llvm/include/llvm/Transforms/Utils/BreakCriticalEdges.h index 3644f1ed7a13..6de080ce3128 100644 --- a/llvm/include/llvm/Transforms/Utils/BreakCriticalEdges.h +++ b/llvm/include/llvm/Transforms/Utils/BreakCriticalEdges.h @@ -17,10 +17,11 @@ #ifndef LLVM_TRANSFORMS_UTILS_BREAKCRITICALEDGES_H #define LLVM_TRANSFORMS_UTILS_BREAKCRITICALEDGES_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { + +class Function; struct BreakCriticalEdgesPass : public PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h index 87d33b9b11b7..6ea195ce31ac 100644 --- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -22,23 +22,63 @@ namespace llvm { class IRBuilderBase; /// Analyze the name and prototype of the given function and set any - /// applicable attributes. + /// applicable attributes. Note that this merely helps optimizations on an + /// already existing function but does not consider mandatory attributes. + /// /// If the library function is unavailable, this doesn't modify it. /// /// Returns true if any attributes were set and false otherwise. - bool inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI); - bool inferLibFuncAttributes(Module *M, StringRef Name, const TargetLibraryInfo &TLI); + bool inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name, + const TargetLibraryInfo &TLI); + bool inferNonMandatoryLibFuncAttrs(Function &F, const TargetLibraryInfo &TLI); + + /// Calls getOrInsertFunction() and then makes sure to add mandatory + /// argument attributes. + FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, + LibFunc TheLibFunc, FunctionType *T, + AttributeList AttributeList); + FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, + LibFunc TheLibFunc, FunctionType *T); + template + FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, + LibFunc TheLibFunc, AttributeList AttributeList, + Type *RetTy, ArgsTy... Args) { + SmallVector ArgTys{Args...}; + return getOrInsertLibFunc(M, TLI, TheLibFunc, + FunctionType::get(RetTy, ArgTys, false), + AttributeList); + } + /// Same as above, but without the attributes. + template + FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, + LibFunc TheLibFunc, Type *RetTy, ArgsTy... Args) { + return getOrInsertLibFunc(M, TLI, TheLibFunc, AttributeList{}, RetTy, + Args...); + } + // Avoid an incorrect ordering that'd otherwise compile incorrectly. + template + FunctionCallee + getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, + LibFunc TheLibFunc, AttributeList AttributeList, + FunctionType *Invalid, ArgsTy... Args) = delete; + + /// Check whether the library function is available on target and also that + /// it in the current Module is a Function with the right type. + bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, + LibFunc TheLibFunc); + bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, + StringRef Name); /// Check whether the overloaded floating point function /// corresponding to \a Ty is available. - bool hasFloatFn(const TargetLibraryInfo *TLI, Type *Ty, + bool hasFloatFn(const Module *M, const TargetLibraryInfo *TLI, Type *Ty, LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn); /// Get the name of the overloaded floating point function - /// corresponding to \a Ty. - StringRef getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn); + /// corresponding to \a Ty. Return the LibFunc in \a TheLibFunc. + StringRef getFloatFn(const Module *M, const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn, + LibFunc &TheLibFunc); /// Return V if it is an i8*, otherwise cast it to i8*. Value *castToCStr(Value *V, IRBuilderBase &B); @@ -99,6 +139,10 @@ namespace llvm { Value *emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI); + /// Emit a call to the memrchr function, analogously to emitMemChr. + Value *emitMemRChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI); + /// Emit a call to the memcmp function. Value *emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI); @@ -148,7 +192,8 @@ namespace llvm { /// function is known to take a single of type matching 'Op' and returns one /// value with the same type. If 'Op' is a long double, 'l' is added as the /// suffix of name, if 'Op' is a float, we add a 'f' suffix. - Value *emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilderBase &B, + Value *emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI, + StringRef Name, IRBuilderBase &B, const AttributeList &Attrs); /// Emit a call to the unary function DoubleFn, FloatFn or LongDoubleFn, @@ -162,8 +207,10 @@ namespace llvm { /// function is known to take type matching 'Op1' and 'Op2' and return one /// value with the same type. If 'Op1/Op2' are long double, 'l' is added as /// the suffix of name, if 'Op1/Op2' are float, we add a 'f' suffix. - Value *emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, - IRBuilderBase &B, const AttributeList &Attrs); + Value *emitBinaryFloatFnCall(Value *Op1, Value *Op2, + const TargetLibraryInfo *TLI, + StringRef Name, IRBuilderBase &B, + const AttributeList &Attrs); /// Emit a call to the binary function DoubleFn, FloatFn or LongDoubleFn, /// depending of the type of Op1. diff --git a/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h b/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h index e12d7e09aad6..7e6683fd0c8a 100644 --- a/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h +++ b/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h @@ -16,12 +16,13 @@ #define LLVM_TRANSFORMS_UTILS_CALLGRAPHUPDATER_H #include "llvm/Analysis/CGSCCPassManager.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/LazyCallGraph.h" namespace llvm { +class CallGraph; +class CallGraphSCC; + /// Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph. This /// simplifies the interface and the call sites, e.g., new and old pass manager /// passes can share the same code. diff --git a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h index daa88981d3bf..fcb384ec3613 100644 --- a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h +++ b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h @@ -19,6 +19,7 @@ class CallBase; class CastInst; class Function; class MDNode; +class Value; /// Return true if the given indirect call site can be made to call \p Callee. /// @@ -73,6 +74,15 @@ CallBase &promoteCallWithIfThenElse(CallBase &CB, Function *Callee, /// bool tryPromoteCall(CallBase &CB); +/// Predicate and clone the given call site. +/// +/// This function creates an if-then-else structure at the location of the call +/// site. The "if" condition compares the call site's called value to the given +/// callee. The original call site is moved into the "else" block, and a clone +/// of the call site is placed in the "then" block. The cloned instruction is +/// returned. +CallBase &versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights); + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_CALLPROMOTIONUTILS_H diff --git a/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h b/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h index fdb390db3aff..0bdc1a12d1fb 100644 --- a/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h +++ b/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h @@ -13,11 +13,12 @@ #ifndef LLVM_TRANSFORMS_UTILS_CANONICALIZEALIASES_H #define LLVM_TRANSFORMS_UTILS_CANONICALIZEALIASES_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Module; + /// Simple pass that canonicalizes aliases. class CanonicalizeAliasesPass : public PassInfoMixin { public: diff --git a/llvm/include/llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h b/llvm/include/llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h index 9de032935f88..924b6cdf7ca0 100644 --- a/llvm/include/llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h +++ b/llvm/include/llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h @@ -14,10 +14,10 @@ #define LLVM_TRANSFORMS_UTILS_CANONICALIZEFREEZEINLOOPS_H #include "llvm/Analysis/LoopAnalysisManager.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Loop; class LPMUpdater; /// A pass that canonicalizes freeze instructions in a loop. diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 8aed3d0e40d9..bb23cf4a9a3c 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -17,11 +17,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" #include namespace llvm { +template class SmallPtrSetImpl; class AllocaInst; class BasicBlock; class BlockFrequency; @@ -92,6 +92,11 @@ public: BranchProbabilityInfo *BPI; AssumptionCache *AC; + // A block outside of the extraction set where any intermediate + // allocations will be placed inside. If this is null, allocations + // will be placed in the entry block of the function. + BasicBlock *AllocationBlock; + // If true, varargs functions can be extracted. bool AllowVarArgs; @@ -120,11 +125,15 @@ public: /// code is extracted, including vastart. If AllowAlloca is true, then /// extraction of blocks containing alloca instructions would be possible, /// however code extractor won't validate whether extraction is legal. + /// Any new allocations will be placed in the AllocationBlock, unless + /// it is null, in which case it will be placed in the entry block of + /// the function from which the code is being extracted. CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr, - AssumptionCache *AC = nullptr, - bool AllowVarArgs = false, bool AllowAlloca = false, + AssumptionCache *AC = nullptr, bool AllowVarArgs = false, + bool AllowAlloca = false, + BasicBlock *AllocationBlock = nullptr, std::string Suffix = ""); /// Create a code extractor for a loop body. diff --git a/llvm/include/llvm/Transforms/Utils/CtorUtils.h b/llvm/include/llvm/Transforms/Utils/CtorUtils.h index 3ef3ba244b43..40b290a5a6f4 100644 --- a/llvm/include/llvm/Transforms/Utils/CtorUtils.h +++ b/llvm/include/llvm/Transforms/Utils/CtorUtils.h @@ -13,7 +13,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_CTORUTILS_H #define LLVM_TRANSFORMS_UTILS_CTORUTILS_H -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" namespace llvm { @@ -22,9 +22,9 @@ class Module; /// Call "ShouldRemove" for every entry in M's global_ctor list and remove the /// entries for which it returns true. Return true if anything changed. -bool optimizeGlobalCtorsList(Module &M, - function_ref ShouldRemove); +bool optimizeGlobalCtorsList( + Module &M, function_ref ShouldRemove); -} // End llvm namespace +} // namespace llvm #endif diff --git a/llvm/include/llvm/Transforms/Utils/Debugify.h b/llvm/include/llvm/Transforms/Utils/Debugify.h index 892e354cd9ed..405bbb8e0be8 100644 --- a/llvm/include/llvm/Transforms/Utils/Debugify.h +++ b/llvm/include/llvm/Transforms/Utils/Debugify.h @@ -23,7 +23,8 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" -using DebugFnMap = llvm::MapVector; +using DebugFnMap = + llvm::MapVector; using DebugInstMap = llvm::MapVector; using DebugVarMap = llvm::MapVector; using WeakInstValueMap = @@ -42,9 +43,6 @@ struct DebugInfoPerPass { DebugVarMap DIVariables; }; -/// Map pass names to a per-pass DebugInfoPerPass instance. -using DebugInfoPerPassMap = llvm::MapVector; - namespace llvm { class DIBuilder; @@ -69,24 +67,24 @@ bool stripDebugifyMetadata(Module &M); /// /// \param M The module to collect debug information from. /// \param Functions A range of functions to collect debug information from. -/// \param DIPreservationMap A map to collect the DI metadata. +/// \param DebugInfoBeforePass DI metadata before a pass. /// \param Banner A prefix string to add to debug/error messages. /// \param NameOfWrappedPass A name of a pass to add to debug/error messages. bool collectDebugInfoMetadata(Module &M, iterator_range Functions, - DebugInfoPerPassMap &DIPreservationMap, + DebugInfoPerPass &DebugInfoBeforePass, StringRef Banner, StringRef NameOfWrappedPass); /// Check original debug information after a pass. /// /// \param M The module to collect debug information from. /// \param Functions A range of functions to collect debug information from. -/// \param DIPreservationMap A map used to check collected the DI metadata. +/// \param DebugInfoBeforePass DI metadata before a pass. /// \param Banner A prefix string to add to debug/error messages. /// \param NameOfWrappedPass A name of a pass to add to debug/error messages. bool checkDebugInfoMetadata(Module &M, iterator_range Functions, - DebugInfoPerPassMap &DIPreservationMap, + DebugInfoPerPass &DebugInfoBeforePass, StringRef Banner, StringRef NameOfWrappedPass, StringRef OrigDIVerifyBugsReportFilePath); } // namespace llvm @@ -97,11 +95,11 @@ enum class DebugifyMode { NoDebugify, SyntheticDebugInfo, OriginalDebugInfo }; llvm::ModulePass *createDebugifyModulePass( enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, llvm::StringRef NameOfWrappedPass = "", - DebugInfoPerPassMap *DIPreservationMap = nullptr); + DebugInfoPerPass *DebugInfoBeforePass = nullptr); llvm::FunctionPass *createDebugifyFunctionPass( enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, llvm::StringRef NameOfWrappedPass = "", - DebugInfoPerPassMap *DIPreservationMap = nullptr); + DebugInfoPerPass *DebugInfoBeforePass = nullptr); struct NewPMDebugifyPass : public llvm::PassInfoMixin { llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &AM); @@ -140,14 +138,14 @@ llvm::ModulePass *createCheckDebugifyModulePass( bool Strip = false, llvm::StringRef NameOfWrappedPass = "", DebugifyStatsMap *StatsMap = nullptr, enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, - DebugInfoPerPassMap *DIPreservationMap = nullptr, + DebugInfoPerPass *DebugInfoBeforePass = nullptr, llvm::StringRef OrigDIVerifyBugsReportFilePath = ""); llvm::FunctionPass *createCheckDebugifyFunctionPass( bool Strip = false, llvm::StringRef NameOfWrappedPass = "", DebugifyStatsMap *StatsMap = nullptr, enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, - DebugInfoPerPassMap *DIPreservationMap = nullptr, + DebugInfoPerPass *DebugInfoBeforePass = nullptr, llvm::StringRef OrigDIVerifyBugsReportFilePath = ""); struct NewPMCheckDebugifyPass @@ -171,7 +169,7 @@ struct DebugifyEachInstrumentation { class DebugifyCustomPassManager : public legacy::PassManager { StringRef OrigDIVerifyBugsReportFilePath; DebugifyStatsMap *DIStatsMap = nullptr; - DebugInfoPerPassMap *DIPreservationMap = nullptr; + DebugInfoPerPass *DebugInfoBeforePass = nullptr; enum DebugifyMode Mode = DebugifyMode::NoDebugify; public: @@ -197,17 +195,17 @@ public: // TODO: Implement Debugify for LoopPass. switch (Kind) { case PT_Function: - super::add(createDebugifyFunctionPass(Mode, Name, DIPreservationMap)); + super::add(createDebugifyFunctionPass(Mode, Name, DebugInfoBeforePass)); super::add(P); super::add(createCheckDebugifyFunctionPass( - isSyntheticDebugInfo(), Name, DIStatsMap, Mode, DIPreservationMap, + isSyntheticDebugInfo(), Name, DIStatsMap, Mode, DebugInfoBeforePass, OrigDIVerifyBugsReportFilePath)); break; case PT_Module: - super::add(createDebugifyModulePass(Mode, Name, DIPreservationMap)); + super::add(createDebugifyModulePass(Mode, Name, DebugInfoBeforePass)); super::add(P); super::add(createCheckDebugifyModulePass( - isSyntheticDebugInfo(), Name, DIStatsMap, Mode, DIPreservationMap, + isSyntheticDebugInfo(), Name, DIStatsMap, Mode, DebugInfoBeforePass, OrigDIVerifyBugsReportFilePath)); break; default: @@ -219,8 +217,8 @@ public: // Used within DebugifyMode::SyntheticDebugInfo mode. void setDIStatsMap(DebugifyStatsMap &StatMap) { DIStatsMap = &StatMap; } // Used within DebugifyMode::OriginalDebugInfo mode. - void setDIPreservationMap(DebugInfoPerPassMap &PerPassMap) { - DIPreservationMap = &PerPassMap; + void setDebugInfoBeforePass(DebugInfoPerPass &PerPassDI) { + DebugInfoBeforePass = &PerPassDI; } void setOrigDIVerifyBugsReportFilePath(StringRef BugsReportFilePath) { OrigDIVerifyBugsReportFilePath = BugsReportFilePath; @@ -239,7 +237,7 @@ public: } const DebugifyStatsMap &getDebugifyStatsMap() const { return *DIStatsMap; } - DebugInfoPerPassMap &getDebugInfoPerPassMap() { return *DIPreservationMap; } + DebugInfoPerPass &getDebugInfoPerPass() { return *DebugInfoBeforePass; } }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/EscapeEnumerator.h b/llvm/include/llvm/Transforms/Utils/EscapeEnumerator.h index bb5c6f04dd0c..3d8447e9bf23 100644 --- a/llvm/include/llvm/Transforms/Utils/EscapeEnumerator.h +++ b/llvm/include/llvm/Transforms/Utils/EscapeEnumerator.h @@ -32,7 +32,7 @@ class EscapeEnumerator { Function::iterator StateBB, StateE; IRBuilder<> Builder; - bool Done; + bool Done = false; bool HandleExceptions; DomTreeUpdater *DTU; @@ -41,8 +41,7 @@ public: EscapeEnumerator(Function &F, const char *N = "cleanup", bool HandleExceptions = true, DomTreeUpdater *DTU = nullptr) : F(F), CleanupBBName(N), StateBB(F.begin()), StateE(F.end()), - Builder(F.getContext()), Done(false), - HandleExceptions(HandleExceptions), DTU(DTU) {} + Builder(F.getContext()), HandleExceptions(HandleExceptions), DTU(DTU) {} IRBuilder<> *Next(); }; diff --git a/llvm/include/llvm/Transforms/Utils/Evaluator.h b/llvm/include/llvm/Transforms/Utils/Evaluator.h index 99e826bf855f..2b8384897c6b 100644 --- a/llvm/include/llvm/Transforms/Utils/Evaluator.h +++ b/llvm/include/llvm/Transforms/Utils/Evaluator.h @@ -18,8 +18,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include #include @@ -27,6 +25,7 @@ namespace llvm { +class CallBase; class DataLayout; class Function; class TargetLibraryInfo; @@ -139,6 +138,8 @@ private: SmallVectorImpl &Formals); Constant *ComputeLoadResult(Constant *P, Type *Ty); + Constant *ComputeLoadResult(GlobalVariable *GV, Type *Ty, + const APInt &Offset); /// As we compute SSA register values, we store their contents here. The back /// of the deque contains the current function and the stack contains the diff --git a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h index 964fdce45744..b6b53d0f10cb 100644 --- a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h +++ b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h @@ -16,7 +16,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueMap.h" @@ -28,6 +27,7 @@ namespace llvm { class APFloat; +class AttributeList; class APInt; class BasicBlock; class Constant; diff --git a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h index 775dd23d8f23..60c91fc30174 100644 --- a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h +++ b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h @@ -35,6 +35,9 @@ struct GlobalStatus { /// can be deleted. bool IsLoaded = false; + /// Number of stores to the global. + unsigned NumStores = 0; + /// Keep track of what stores to the global look like. enum StoredType { /// There is no store to this global. It can thus be marked constant. diff --git a/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h b/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h index af9cdb9fd619..d2ce0c5d3988 100644 --- a/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h +++ b/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h @@ -18,6 +18,7 @@ #include "llvm/Pass.h" namespace llvm { +class Function; class InjectTLIMappings : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index 873127554b47..946fc84b9a2c 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -15,26 +15,18 @@ #define LLVM_TRANSFORMS_UTILS_LOCAL_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/Utils/Local.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include -#include namespace llvm { +class DataLayout; +class Value; +class WeakTrackingVH; +class WeakVH; +template class SmallVectorImpl; class AAResults; class AllocaInst; class AssumptionCache; @@ -343,7 +335,7 @@ bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, /// Remove all instructions from a basic block other than its terminator /// and any present EH pad instructions. Returns a pair where the first element -/// is the number of instructions (excluding debug info instrinsics) that have +/// is the number of instructions (excluding debug info intrinsics) that have /// been removed, and the second element is the number of debug info intrinsics /// that have been removed. std::pair diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 3a712d78df67..676c0c1487db 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -13,18 +13,18 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H -#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/IVDescriptors.h" -#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Transforms/Utils/ValueMapper.h" namespace llvm { template class DomTreeNodeBase; using DomTreeNode = DomTreeNodeBase; +class StringRef; +class AnalysisUsage; +class TargetTransformInfo; class AAResults; -class AliasSet; -class AliasSetTracker; class BasicBlock; class BlockFrequencyInfo; class ICFLoopSafetyInfo; @@ -49,8 +49,6 @@ typedef std::pair class Optional; template class SmallSetVector; -template class SmallVector; -template class SmallVectorImpl; template class SmallPriorityWorklist; BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, @@ -150,7 +148,7 @@ protected: /// this function is called by \p sinkRegionForLoopNest. bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, - TargetTransformInfo *, Loop *CurLoop, MemorySSAUpdater *, + TargetTransformInfo *, Loop *CurLoop, MemorySSAUpdater &, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, Loop *OutermostLoop = nullptr); @@ -159,7 +157,7 @@ bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, - MemorySSAUpdater *, ICFLoopSafetyInfo *, + MemorySSAUpdater &, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); @@ -171,10 +169,13 @@ bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all /// instructions of the loop and loop safety information as arguments. /// Diagnostics is emitted via \p ORE. It returns changed status. +/// \p AllowSpeculation is whether values should be hoisted even if they are not +/// guaranteed to execute in the loop, but are safe to speculatively execute. bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, Loop *, - MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool); + MemorySSAUpdater &, ScalarEvolution *, ICFLoopSafetyInfo *, + SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, + bool AllowSpeculation); /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. @@ -204,12 +205,14 @@ void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions /// of the loop and loop safety information as arguments. /// Diagnostics is emitted via \p ORE. It returns changed status. +/// \p AllowSpeculation is whether values should be hoisted even if they are not +/// guaranteed to execute in the loop, but are safe to speculatively execute. bool promoteLoopAccessesToScalars( const SmallSetVector &, SmallVectorImpl &, SmallVectorImpl &, SmallVectorImpl &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, - Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *, - OptimizationRemarkEmitter *); + Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *, + OptimizationRemarkEmitter *, bool AllowSpeculation); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. @@ -342,9 +345,9 @@ void getLoopAnalysisUsage(AnalysisUsage &AU); /// true when moving out of loop and not true when moving into loops. /// If \p ORE is set use it to emit optimization remarks. bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, - Loop *CurLoop, AliasSetTracker *CurAST, - MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, - SinkAndHoistLICMFlags *LICMFlags = nullptr, + Loop *CurLoop, MemorySSAUpdater &MSSAU, + bool TargetExecutesOncePerLoop, + SinkAndHoistLICMFlags &LICMFlags, OptimizationRemarkEmitter *ORE = nullptr); /// Returns the comparison predicate used when expanding a min/max reduction. @@ -410,8 +413,10 @@ Value *createOrderedReduction(IRBuilderBase &B, /// of each scalar operation (VL) that will be converted into a vector (I). /// If OpValue is non-null, we only consider operations similar to OpValue /// when intersecting. -/// Flag set: NSW, NUW, exact, and all of fast-math. -void propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue = nullptr); +/// Flag set: NSW, NUW (if IncludeWrapFlags is true), exact, and all of +/// fast-math. +void propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue = nullptr, + bool IncludeWrapFlags = true); /// Returns true if we can prove that \p S is defined and always negative in /// loop \p L. @@ -497,6 +502,12 @@ addRuntimeChecks(Instruction *Loc, Loop *TheLoop, const SmallVectorImpl &PointerChecks, SCEVExpander &Expander); +Value * +addDiffRuntimeChecks(Instruction *Loc, Loop *TheLoop, + ArrayRef Checks, SCEVExpander &Expander, + function_ref GetVF, + unsigned IC); + /// Struct to hold information about a partially invariant condition. struct IVConditionInfo { /// Instructions that need to be duplicated and checked for the unswitching diff --git a/llvm/include/llvm/Transforms/Utils/LoopVersioning.h b/llvm/include/llvm/Transforms/Utils/LoopVersioning.h index 4a8831ed45b2..eeab98c56b66 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopVersioning.h +++ b/llvm/include/llvm/Transforms/Utils/LoopVersioning.h @@ -15,7 +15,6 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H #define LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/PassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -23,6 +22,8 @@ namespace llvm { class Loop; +class SCEVPredicate; +class ScalarEvolution; class LoopAccessInfo; class LoopInfo; struct RuntimeCheckingPtrGroup; @@ -113,7 +114,7 @@ private: Loop *VersionedLoop; /// The fall-back loop. I.e. control flows here if pointers in the /// loop may alias (memchecks failed). - Loop *NonVersionedLoop; + Loop *NonVersionedLoop = nullptr; /// This maps the instructions from VersionedLoop to their counterpart /// in NonVersionedLoop. @@ -123,7 +124,7 @@ private: SmallVector AliasChecks; /// The set of SCEV checks that we are versioning for. - const SCEVUnionPredicate &Preds; + const SCEVPredicate &Preds; /// Maps a pointer to the pointer checking group that the pointer /// belongs to. diff --git a/llvm/include/llvm/Transforms/Utils/LowerAtomic.h b/llvm/include/llvm/Transforms/Utils/LowerAtomic.h new file mode 100644 index 000000000000..c85f8e3a5646 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/LowerAtomic.h @@ -0,0 +1,37 @@ +//===- LowerAtomic.h - Lower atomic intrinsics ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +// This pass lowers atomic intrinsics to non-atomic form for use in a known +// non-preemptible environment. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H +#define LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H + +#include "llvm/IR/Instructions.h" + +namespace llvm { + +class IRBuilderBase; + +/// Convert the given Cmpxchg into primitive load and compare. +bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI); + +/// Convert the given RMWI into primitive load and stores, +/// assuming that doing so is legal. Return true if the lowering +/// succeeds. +bool lowerAtomicRMWInst(AtomicRMWInst *RMWI); + +/// Emit IR to implement the given atomicrmw operation on values in registers, +/// returning the new value. +Value *buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, + Value *Loaded, Value *Inc); +} + +#endif // LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H diff --git a/llvm/include/llvm/Transforms/Utils/LowerGlobalDtors.h b/llvm/include/llvm/Transforms/Utils/LowerGlobalDtors.h new file mode 100644 index 000000000000..993a6f57361c --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/LowerGlobalDtors.h @@ -0,0 +1,28 @@ +//===- LowerGlobalDtors.h - Lower @llvm.global_dtors ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass lowers @llvm.global_dtors by creating wrapper functions that are +// registered in @llvm.global_ctors and which contain a call to `__cxa_atexit` +// to register their destructor functions. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_UTILS_LOWERGLOBALDTORS_H +#define LLVM_TRANSFORMS_UTILS_LOWERGLOBALDTORS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class LowerGlobalDtorsPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_LOWERGLOBALDTORS_H diff --git a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h index 8d0956033d9f..acf59ff580a4 100644 --- a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h +++ b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h @@ -14,13 +14,17 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOWERMEMINTRINSICS_H #define LLVM_TRANSFORMS_UTILS_LOWERMEMINTRINSICS_H +#include "llvm/ADT/Optional.h" + namespace llvm { +class AtomicMemCpyInst; class ConstantInt; class Instruction; class MemCpyInst; class MemMoveInst; class MemSetInst; +class ScalarEvolution; class TargetTransformInfo; class Value; struct Align; @@ -28,10 +32,11 @@ struct Align; /// Emit a loop implementing the semantics of llvm.memcpy where the size is not /// a compile-time constant. Loop will be insterted at \p InsertBefore. void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, - Value *DstAddr, Value *CopyLen, - Align SrcAlign, Align DestAlign, - bool SrcIsVolatile, bool DstIsVolatile, - const TargetTransformInfo &TTI); + Value *DstAddr, Value *CopyLen, Align SrcAlign, + Align DestAlign, bool SrcIsVolatile, + bool DstIsVolatile, bool CanOverlap, + const TargetTransformInfo &TTI, + Optional AtomicSize = None); /// Emit a loop implementing the semantics of an llvm.memcpy whose size is a /// compile time constant. Loop is inserted at \p InsertBefore. @@ -39,10 +44,12 @@ void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, - const TargetTransformInfo &TTI); + bool CanOverlap, const TargetTransformInfo &TTI, + Optional AtomicCpySize = None); /// Expand \p MemCpy as a loop. \p MemCpy is not deleted. -void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI); +void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, + ScalarEvolution *SE = nullptr); /// Expand \p MemMove as a loop. \p MemMove is not deleted. void expandMemMoveAsLoop(MemMoveInst *MemMove); @@ -50,6 +57,11 @@ void expandMemMoveAsLoop(MemMoveInst *MemMove); /// Expand \p MemSet as a loop. \p MemSet is not deleted. void expandMemSetAsLoop(MemSetInst *MemSet); +/// Expand \p AtomicMemCpy as a loop. \p AtomicMemCpy is not deleted. +void expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemCpy, + const TargetTransformInfo &TTI, + ScalarEvolution *SE); + } // End llvm namespace #endif diff --git a/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h b/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h new file mode 100644 index 000000000000..a2b85e03897b --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h @@ -0,0 +1,82 @@ +//===- MemoryTaggingSupport.h - helpers for memory tagging implementations ===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares common infrastructure for HWAddressSanitizer and +// Aarch64StackTagging. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_UTILS_MEMORYTAGGINGSUPPORT_H +#define LLVM_TRANSFORMS_UTILS_MEMORYTAGGINGSUPPORT_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Support/Alignment.h" + +namespace llvm { +class DominatorTree; +class DbgVariableIntrinsic; +class IntrinsicInst; +class PostDominatorTree; +class AllocaInst; +class Instruction; +namespace memtag { +// For an alloca valid between lifetime markers Start and Ends, call the +// Callback for all possible exits out of the lifetime in the containing +// function, which can return from the instructions in RetVec. +// +// Returns whether Ends covered all possible exits. If they did not, +// the caller should remove Ends to ensure that work done at the other +// exits does not happen outside of the lifetime. +bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT, + const LoopInfo &LI, const Instruction *Start, + const SmallVectorImpl &Ends, + const SmallVectorImpl &RetVec, + llvm::function_ref Callback); + +bool isStandardLifetime(const SmallVectorImpl &LifetimeStart, + const SmallVectorImpl &LifetimeEnd, + const DominatorTree *DT, const LoopInfo *LI, + size_t MaxLifetimes); + +Instruction *getUntagLocationIfFunctionExit(Instruction &Inst); + +struct AllocaInfo { + AllocaInst *AI; + SmallVector LifetimeStart; + SmallVector LifetimeEnd; + SmallVector DbgVariableIntrinsics; +}; + +struct StackInfo { + MapVector AllocasToInstrument; + SmallVector UnrecognizedLifetimes; + SmallVector RetVec; + bool CallsReturnTwice = false; +}; + +class StackInfoBuilder { +public: + StackInfoBuilder(std::function IsInterestingAlloca) + : IsInterestingAlloca(IsInterestingAlloca) {} + + void visit(Instruction &Inst); + StackInfo &get() { return Info; }; + +private: + StackInfo Info; + std::function IsInterestingAlloca; +}; + +uint64_t getAllocaSizeInBytes(const AllocaInst &AI); +void alignAndPadAlloca(memtag::AllocaInfo &Info, llvm::Align Align); + +} // namespace memtag +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/Utils/MisExpect.h b/llvm/include/llvm/Transforms/Utils/MisExpect.h new file mode 100644 index 000000000000..064eeac4c669 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/MisExpect.h @@ -0,0 +1,77 @@ +//===--- MisExpect.h - Check the use of llvm.expect with PGO data ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit diagnostic messages for potentially incorrect +// usage of the llvm.expect intrinsic. This utility extracts the threshold +// values from metadata associated with the instrumented Branch or Switch +// instruction. The threshold values are then used to determine if a diagnostic +// should be emitted. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" + +namespace llvm { +namespace misexpect { + +/// checkBackendInstrumentation - compares PGO counters to the thresholds used +/// for llvm.expect and warns if the PGO counters are outside of the expected +/// range. It extracts the expected weights from the MD_prof weights attatched +/// to the instruction, which are are assumed to come from lowered llvm.expect +/// intrinsics. The RealWeights parameter and the extracted expected weights are +/// then passed to verifyMisexpect() for verification +/// +/// \param I The Instruction being checked +/// \param RealWeights A vector of profile weights for each target block +void checkBackendInstrumentation(Instruction &I, + const llvm::ArrayRef RealWeights); + +/// checkFrontendInstrumentation - compares PGO counters to the thresholds used +/// for llvm.expect and warns if the PGO counters are outside of the expected +/// range. It extracts the expected weights from the MD_prof weights attatched +/// to the instruction, which are are assumed to come from profiling data +/// attached by the frontend prior to llvm.expect intrinsic lowering. The +/// ExpectedWeights parameter and the extracted real weights are then passed to +/// verifyMisexpect() for verification +/// +/// \param I The Instruction being checked +/// \param ExpectedWeights A vector of the expected weights for each target +/// block, this determines the threshold values used when emiting diagnostics +void checkFrontendInstrumentation(Instruction &I, + const ArrayRef ExpectedWeights); + +/// veryifyMisExpect - compares RealWeights to the thresholds used +/// for llvm.expect and warns if the PGO counters are outside of the expected +/// range. +/// +/// \param I The Instruction being checked +/// \param RealWeights A vector of profile weights from the profile data +/// \param ExpectedWeights A vector of the weights attatch by llvm.expect +void verifyMisExpect(Instruction &I, ArrayRef RealWeights, + const ArrayRef ExpectedWeights); + +/// checkExpectAnnotations - compares PGO counters to the thresholds used +/// for llvm.expect and warns if the PGO counters are outside of the expected +/// range. It extracts the expected weights from the MD_prof weights attatched +/// to the instruction, which are are assumed to come from lowered llvm.expect +/// intrinsics. The RealWeights parameter and the extracted expected weights are +/// then passed to verifyMisexpect() for verification. It is a thin wrapper +/// around the checkFrontendInstrumentation and checkBackendInstrumentation APIs +/// +/// \param I The Instruction being checked +/// \param RealWeights A vector of profile weights for each target block +/// \param IsBackend A boolean describing if this is Frontend instrumentation +void checkExpectAnnotations(Instruction &I, + const ArrayRef ExistingWeights, + bool IsFrontend); + +} // namespace misexpect +} // namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h index 8d459972336b..85263fc00bc3 100644 --- a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h +++ b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h @@ -13,12 +13,13 @@ #ifndef LLVM_TRANSFORMS_UTILS_MODULEUTILS_H #define LLVM_TRANSFORMS_UTILS_MODULEUTILS_H -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/MemoryBufferRef.h" #include // for std::pair namespace llvm { +template class SmallVectorImpl; template class ArrayRef; class Module; @@ -109,14 +110,14 @@ std::string getUniqueModuleId(Module *M); /// Embed the memory buffer \p Buf into the module \p M as a global using the /// specified section name. -void embedBufferInModule(Module &M, MemoryBufferRef Buf, StringRef SectionName); +void embedBufferInModule(Module &M, MemoryBufferRef Buf, StringRef SectionName, + Align Alignment = Align(1)); class CallInst; namespace VFABI { /// Overwrite the Vector Function ABI variants attribute with the names provide /// in \p VariantMappings. -void setVectorVariantNames(CallInst *CI, - const SmallVector &VariantMappings); +void setVectorVariantNames(CallInst *CI, ArrayRef VariantMappings); } // End VFABI namespace } // End llvm namespace diff --git a/llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h b/llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h index 03d8840a22d2..a59f9bc3ebfb 100644 --- a/llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h +++ b/llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h @@ -14,7 +14,6 @@ #ifndef LLVM_TRANSFORMS_UTILS_NAMEANONGLOBALS_H #define LLVM_TRANSFORMS_UTILS_NAMEANONGLOBALS_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h index c922476ac79d..e57e598b6918 100644 --- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h +++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h @@ -56,7 +56,6 @@ #include "llvm/ADT/ilist_node.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" @@ -65,6 +64,7 @@ namespace llvm { class AssumptionCache; class DominatorTree; class Function; +class Value; class IntrinsicInst; class raw_ostream; diff --git a/llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h b/llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h index 54c257383fb5..0992a4456c9d 100644 --- a/llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h +++ b/llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h @@ -51,11 +51,12 @@ #ifndef LLVM_TRANSFORMS_UTILS_RELLOOKUPTABLECONVERTER_H #define LLVM_TRANSFORMS_UTILS_RELLOOKUPTABLECONVERTER_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Module; + // Pass that converts lookup tables to relative lookup tables. class RelLookupTableConverterPass : public PassInfoMixin { diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h index bf418e659a04..17bd072598ee 100644 --- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h +++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h @@ -16,16 +16,25 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/ValueLattice.h" -#include "llvm/Analysis/ValueLatticeUtils.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/Transforms/Utils/PredicateInfo.h" -#include -#include #include namespace llvm { +class Argument; +class BasicBlock; +class CallInst; +class Constant; +class DataLayout; +class DominatorTree; +class Function; +class GlobalVariable; +class Instruction; +class LLVMContext; +class PostDominatorTree; +class StructType; +class TargetLibraryInfo; +class Value; +class ValueLatticeElement; /// Helper struct for bundling up the analysis results per function for IPSCCP. struct AnalysisResultsForFn { @@ -34,6 +43,14 @@ struct AnalysisResultsForFn { PostDominatorTree *PDT; }; +/// Helper struct shared between Function Specialization and SCCP Solver. +struct ArgInfo { + Argument *Formal; // The Formal argument being analysed. + Constant *Actual; // A corresponding actual constant argument. + + ArgInfo(Argument *F, Constant *A) : Formal(F), Actual(A){}; +}; + class SCCPInstVisitor; //===----------------------------------------------------------------------===// @@ -134,11 +151,14 @@ public: /// Return a reference to the set of argument tracked functions. SmallPtrSetImpl &getArgumentTrackedFunctions(); - /// Mark argument \p A constant with value \p C in a new function - /// specialization. The argument's parent function is a specialization of the - /// original function \p F. All other arguments of the specialization inherit - /// the lattice state of their corresponding values in the original function. - void markArgInFuncSpecialization(Function *F, Argument *A, Constant *C); + /// Mark the constant arguments of a new function specialization. \p F points + /// to the cloned function and \p Args contains a list of constant arguments + /// represented as pairs of {formal,actual} values (the formal argument is + /// associated with the original function definition). All other arguments of + /// the specialization inherit the lattice state of their corresponding values + /// in the original function. + void markArgInFuncSpecialization(Function *F, + const SmallVectorImpl &Args); /// Mark all of the blocks in function \p F non-executable. Clients can used /// this method to erase a function from the module (e.g., if it has been diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h index ee06893ca660..a3e5ac3ac19d 100644 --- a/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h @@ -323,6 +323,28 @@ public: } while (Changed); } + /// Check all predecessors and if all of them have the same AvailableVal use + /// it as value for block represented by Info. Return true if singluar value + /// is found. + bool FindSingularVal(BBInfo *Info) { + if (!Info->NumPreds) + return false; + ValT Singular = Info->Preds[0]->DefBB->AvailableVal; + if (!Singular) + return false; + for (unsigned Idx = 1; Idx < Info->NumPreds; ++Idx) { + ValT PredVal = Info->Preds[Idx]->DefBB->AvailableVal; + if (!PredVal || Singular != PredVal) + return false; + } + // Record Singular value. + (*AvailableVals)[Info->BB] = Singular; + assert(BBMap[Info->BB] == Info && "Info missed in BBMap?"); + Info->AvailableVal = Singular; + Info->DefBB = Info->Preds[0]->DefBB; + return true; + } + /// FindAvailableVal - If this block requires a PHI, first check if an /// existing PHI matches the PHI placement and reaching definitions computed /// earlier, and if not, create a new PHI. Visit all the block's @@ -339,6 +361,10 @@ public: if (Info->DefBB != Info) continue; + // Look for singular value. + if (FindSingularVal(Info)) + continue; + // Look for an existing PHI. FindExistingPHI(Info->BB, BlockList); if (Info->AvailableVal) diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h index e1f681bbd367..5a4c28063a1d 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h @@ -24,7 +24,6 @@ namespace llvm { -class BasicBlock; class Function; class MachineBasicBlock; class MachineFunction; diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h index 175bdde7fd05..2250e928d1e6 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -76,6 +76,7 @@ template <> struct IRTraits { } // end namespace afdo_detail extern cl::opt SampleProfileUseProfi; +extern cl::opt SampleProfileInferEntryCount; template class SampleProfileLoaderBaseImpl { public: @@ -920,7 +921,9 @@ void SampleProfileLoaderBaseImpl::finalizeWeightPropagation( // Samples->getHeadSamples() + 1 to avoid functions with zero count. if (SampleProfileUseProfi) { const BasicBlockT *EntryBB = getEntryBB(&F); - if (BlockWeights[EntryBB] > 0) { + ErrorOr EntryWeight = getBlockWeight(EntryBB); + if (BlockWeights[EntryBB] > 0 && + (SampleProfileInferEntryCount || !EntryWeight)) { getFunction(F).setEntryCount( ProfileCount(BlockWeights[EntryBB], Function::PCT_Real), &InlinedGUIDs); diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h index a621cb3078c5..bd7175aa96ff 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h @@ -16,20 +16,14 @@ #define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEUTIL_H #include "llvm/ADT/DenseMap.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/IR/Function.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" namespace llvm { using namespace sampleprof; class ProfileSummaryInfo; +class Module; extern cl::opt SampleProfileMaxPropagateIterations; extern cl::opt SampleProfileRecordCoverage; diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index 277eb7acf238..260ed1a97831 100644 --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -15,13 +15,10 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionNormalization.h" -#include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/ValueHandle.h" @@ -293,8 +290,9 @@ public: Value *expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc); /// A specialized variant of expandCodeForPredicate, handling the case when - /// we are expanding code for a SCEVEqualPredicate. - Value *expandEqualPredicate(const SCEVEqualPredicate *Pred, Instruction *Loc); + /// we are expanding code for a SCEVComparePredicate. + Value *expandComparePredicate(const SCEVComparePredicate *Pred, + Instruction *Loc); /// Generates code that evaluates if the \p AR expression will overflow. Value *generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc, @@ -384,8 +382,8 @@ public: /// Note that this function does not perform an exhaustive search. I.e if it /// didn't find any value it does not mean that there is no such value. /// - Optional - getRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L); + Value *getRelatedExistingExpansion(const SCEV *S, const Instruction *At, + Loop *L); /// Returns a suitable insert point after \p I, that dominates \p /// MustDominate. Skips instructions inserted by the expander. @@ -443,21 +441,15 @@ private: Value *expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, Value *V); /// Find a previous Value in ExprValueMap for expand. - ScalarEvolution::ValueOffsetPair - FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt); + Value *FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt); Value *expand(const SCEV *S); /// Determine the most "relevant" loop for the given SCEV. const Loop *getRelevantLoop(const SCEV *); - Value *expandSMaxExpr(const SCEVNAryExpr *S); - - Value *expandUMaxExpr(const SCEVNAryExpr *S); - - Value *expandSMinExpr(const SCEVNAryExpr *S); - - Value *expandUMinExpr(const SCEVNAryExpr *S); + Value *expandMinMaxExpr(const SCEVNAryExpr *S, Intrinsic::ID IntrinID, + Twine Name, bool IsSequential = false); Value *visitConstant(const SCEVConstant *S) { return S->getValue(); } diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h index fb3a7490346f..7af879638a4d 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h @@ -23,6 +23,7 @@ class AssumptionCache; struct SimplifyCFGOptions { int BonusInstThreshold = 1; bool ForwardSwitchCondToPhi = false; + bool ConvertSwitchRangeToICmp = false; bool ConvertSwitchToLookupTable = false; bool NeedCanonicalLoop = true; bool HoistCommonInsts = false; @@ -41,6 +42,10 @@ struct SimplifyCFGOptions { ForwardSwitchCondToPhi = B; return *this; } + SimplifyCFGOptions &convertSwitchRangeToICmp(bool B) { + ConvertSwitchRangeToICmp = B; + return *this; + } SimplifyCFGOptions &convertSwitchToLookupTable(bool B) { ConvertSwitchToLookupTable = B; return *this; diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h index 4ba56fb45afa..ff60811b6168 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h @@ -15,12 +15,11 @@ #ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H #define LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/IR/ConstantRange.h" -#include "llvm/IR/ValueHandle.h" - namespace llvm { +class Type; +class WeakTrackingVH; +template class SmallVectorImpl; class CastInst; class DominatorTree; class Loop; diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h index a88e72fc9ba8..79a44b667445 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -14,7 +14,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYLIBCALLS_H #define LLVM_TRANSFORMS_UTILS_SIMPLIFYLIBCALLS_H -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Analysis/TargetLibraryInfo.h" namespace llvm { @@ -105,7 +105,7 @@ private: OptimizationRemarkEmitter &ORE; BlockFrequencyInfo *BFI; ProfileSummaryInfo *PSI; - bool UnsafeFPShrink; + bool UnsafeFPShrink = false; function_ref Replacer; function_ref Eraser; @@ -163,6 +163,7 @@ private: Value *optimizeStpCpy(CallInst *CI, IRBuilderBase &B); Value *optimizeStrNCpy(CallInst *CI, IRBuilderBase &B); Value *optimizeStrLen(CallInst *CI, IRBuilderBase &B); + Value *optimizeStrNLen(CallInst *CI, IRBuilderBase &B); Value *optimizeStrPBrk(CallInst *CI, IRBuilderBase &B); Value *optimizeStrTo(CallInst *CI, IRBuilderBase &B); Value *optimizeStrSpn(CallInst *CI, IRBuilderBase &B); @@ -234,10 +235,11 @@ private: /// hasFloatVersion - Checks if there is a float version of the specified /// function by checking for an existing function with name FuncName + f - bool hasFloatVersion(StringRef FuncName); + bool hasFloatVersion(const Module *M, StringRef FuncName); - /// Shared code to optimize strlen+wcslen. - Value *optimizeStringLength(CallInst *CI, IRBuilderBase &B, unsigned CharSize); + /// Shared code to optimize strlen+wcslen and strnlen+wcsnlen. + Value *optimizeStringLength(CallInst *CI, IRBuilderBase &B, unsigned CharSize, + Value *Bound = nullptr); }; } // End llvm namespace diff --git a/llvm/include/llvm/Transforms/Utils/SizeOpts.h b/llvm/include/llvm/Transforms/Utils/SizeOpts.h index 11bf5501598f..aa9e9bd6c69b 100644 --- a/llvm/include/llvm/Transforms/Utils/SizeOpts.h +++ b/llvm/include/llvm/Transforms/Utils/SizeOpts.h @@ -13,7 +13,6 @@ #ifndef LLVM_TRANSFORMS_UTILS_SIZEOPTS_H #define LLVM_TRANSFORMS_UTILS_SIZEOPTS_H -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Support/CommandLine.h" diff --git a/llvm/include/llvm/Transforms/Utils/SplitModule.h b/llvm/include/llvm/Transforms/Utils/SplitModule.h index 42b3784db417..a5450738060a 100644 --- a/llvm/include/llvm/Transforms/Utils/SplitModule.h +++ b/llvm/include/llvm/Transforms/Utils/SplitModule.h @@ -15,7 +15,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULE_H #define LLVM_TRANSFORMS_UTILS_SPLITMODULE_H -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include namespace llvm { diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 320c36b36924..65fe8eff6442 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/InstructionCost.h" namespace llvm { @@ -123,11 +124,9 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( Optional UserAllowPartial, Optional UserRuntime, Optional UserUpperBound, Optional UserFullUnrollMaxCount); -unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, - bool &NotDuplicatable, bool &Convergent, - const TargetTransformInfo &TTI, - const SmallPtrSetImpl &EphValues, - unsigned BEInsns); +InstructionCost ApproximateLoopSize(const Loop *L, unsigned &NumCalls, + bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, + const SmallPtrSetImpl &EphValues, unsigned BEInsns); } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h index 3636285e38f5..15a46baa190d 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h @@ -10,9 +10,10 @@ #define LLVM_TRANSFORMS_VECTORIZE_LOADSTOREVECTORIZER_H #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" namespace llvm { +class Pass; +class Function; class LoadStoreVectorizerPass : public PassInfoMixin { public: diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 32d295a2dd16..b01bd222b252 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -28,12 +28,26 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/LoopAccessAnalysis.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/TypeSize.h" #include "llvm/Transforms/Utils/LoopUtils.h" namespace llvm { +class AAResults; +class AssumptionCache; +class BasicBlock; +class BlockFrequencyInfo; +class DemandedBits; +class DominatorTree; +class Function; +class Loop; +class LoopInfo; +class Metadata; +class OptimizationRemarkEmitter; +class PredicatedScalarEvolution; +class ProfileSummaryInfo; +class TargetLibraryInfo; +class TargetTransformInfo; +class Type; /// Utility class for getting and setting loop vectorizer hints in the form /// of loop metadata. @@ -207,7 +221,6 @@ public: void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; } - Instruction *getExactFPInst() { return ExactFPMathInst; } unsigned getNumRuntimePointerChecks() const { @@ -294,6 +307,14 @@ public: /// Returns the widest induction type. Type *getWidestInductionType() { return WidestIndTy; } + /// Returns True if given store is a final invariant store of one of the + /// reductions found in the loop. + bool isInvariantStoreOfReduction(StoreInst *SI); + + /// Returns True if given address is invariant and is used to store recurrent + /// expression + bool isInvariantAddressOfReduction(Value *V); + /// Returns True if V is a Phi node of an induction variable in this loop. bool isInductionPhi(const Value *V) const; @@ -301,6 +322,10 @@ public: /// floating point induction. const InductionDescriptor *getIntOrFpInductionDescriptor(PHINode *Phi) const; + /// Returns a pointer to the induction descriptor, if \p Phi is pointer + /// induction. + const InductionDescriptor *getPointerInductionDescriptor(PHINode *Phi) const; + /// Returns True if V is a cast that is part of an induction def-use chain, /// and had been proven to be redundant under a runtime guard (in other /// words, the cast has the same SCEV expression as the induction phi). diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h index cd605aacb52d..b41f3efc5b55 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -20,7 +20,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" @@ -30,7 +29,6 @@ class AAResults; class AssumptionCache; class BasicBlock; class CmpInst; -class DataLayout; class DemandedBits; class DominatorTree; class Function; @@ -135,7 +133,7 @@ private: bool vectorizeChainsInBlock(BasicBlock *BB, slpvectorizer::BoUpSLP &R); bool vectorizeStoreChain(ArrayRef Chain, slpvectorizer::BoUpSLP &R, - unsigned Idx); + unsigned Idx, unsigned MinVF); bool vectorizeStores(ArrayRef Stores, slpvectorizer::BoUpSLP &R); diff --git a/llvm/include/llvm/WindowsDriver/MSVCPaths.h b/llvm/include/llvm/WindowsDriver/MSVCPaths.h new file mode 100644 index 000000000000..7256a4f66eaa --- /dev/null +++ b/llvm/include/llvm/WindowsDriver/MSVCPaths.h @@ -0,0 +1,107 @@ +//===-- MSVCPaths.h - MSVC path-parsing helpers -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_MSVCPATHS_H +#define LLVM_SUPPORT_MSVCPATHS_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include + +namespace llvm { + +namespace vfs { +class FileSystem; +} + +enum class SubDirectoryType { + Bin, + Include, + Lib, +}; + +enum class ToolsetLayout { + OlderVS, + VS2017OrNewer, + DevDivInternal, +}; + +// Windows SDKs and VC Toolchains group their contents into subdirectories based +// on the target architecture. This function converts an llvm::Triple::ArchType +// to the corresponding subdirectory name. +const char *archToWindowsSDKArch(llvm::Triple::ArchType Arch); + +// Similar to the above function, but for Visual Studios before VS2017. +const char *archToLegacyVCArch(llvm::Triple::ArchType Arch); + +// Similar to the above function, but for DevDiv internal builds. +const char *archToDevDivInternalArch(llvm::Triple::ArchType Arch); + +bool appendArchToWindowsSDKLibPath(int SDKMajor, llvm::SmallString<128> LibPath, + llvm::Triple::ArchType Arch, + std::string &path); + +// Get the path to a specific subdirectory in the current toolchain for +// a given target architecture. +// VS2017 changed the VC toolchain layout, so this should be used instead +// of hardcoding paths. +std::string getSubDirectoryPath(SubDirectoryType Type, ToolsetLayout VSLayout, + const std::string &VCToolChainPath, + llvm::Triple::ArchType TargetArch, + llvm::StringRef SubdirParent = ""); + +// Check if the Include path of a specified version of Visual Studio contains +// specific header files. If not, they are probably shipped with Universal CRT. +bool useUniversalCRT(ToolsetLayout VSLayout, const std::string &VCToolChainPath, + llvm::Triple::ArchType TargetArch, + llvm::vfs::FileSystem &VFS); + +/// Get Windows SDK installation directory. +bool getWindowsSDKDir(vfs::FileSystem &VFS, + llvm::Optional WinSdkDir, + llvm::Optional WinSdkVersion, + llvm::Optional WinSysRoot, + std::string &Path, int &Major, + std::string &WindowsSDKIncludeVersion, + std::string &WindowsSDKLibVersion); + +bool getUniversalCRTSdkDir(vfs::FileSystem &VFS, + llvm::Optional WinSdkDir, + llvm::Optional WinSdkVersion, + llvm::Optional WinSysRoot, + std::string &Path, + std::string &UCRTVersion); + +// Check command line arguments to try and find a toolchain. +bool findVCToolChainViaCommandLine( + vfs::FileSystem &VFS, llvm::Optional VCToolsDir, + llvm::Optional VCToolsVersion, + llvm::Optional WinSysRoot, std::string &Path, + ToolsetLayout &VSLayout); + +// Check various environment variables to try and find a toolchain. +bool findVCToolChainViaEnvironment(vfs::FileSystem &VFS, std::string &Path, + ToolsetLayout &VSLayout); + +// Query the Setup Config server for installs, then pick the newest version +// and find its default VC toolchain. +// This is the preferred way to discover new Visual Studios, as they're no +// longer listed in the registry. +bool findVCToolChainViaSetupConfig(vfs::FileSystem &VFS, std::string &Path, + ToolsetLayout &VSLayout); + +// Look in the registry for Visual Studio installs, and use that to get +// a toolchain path. VS2017 and newer don't get added to the registry. +// So if we find something here, we know that it's an older version. +bool findVCToolChainViaRegistry(std::string &Path, ToolsetLayout &VSLayout); + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/WindowsDriver/MSVCSetupApi.h b/llvm/include/llvm/WindowsDriver/MSVCSetupApi.h new file mode 100644 index 000000000000..28e6e3e08e37 --- /dev/null +++ b/llvm/include/llvm/WindowsDriver/MSVCSetupApi.h @@ -0,0 +1,523 @@ +// +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. +// +// +// The MIT License (MIT) +// +// Copyright (C) Microsoft Corporation. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#pragma once + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnon-virtual-dtor" +#endif + +// Constants +// +#ifndef E_NOTFOUND +#define E_NOTFOUND HRESULT_FROM_WIN32(ERROR_NOT_FOUND) +#endif + +#ifndef E_FILENOTFOUND +#define E_FILENOTFOUND HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND) +#endif + +// Enumerations +// +/// +/// The state of an instance. +/// +enum InstanceState : unsigned { + /// + /// The instance state has not been determined. + /// + eNone = 0, + + /// + /// The instance installation path exists. + /// + eLocal = 1, + + /// + /// A product is registered to the instance. + /// + eRegistered = 2, + + /// + /// No reboot is required for the instance. + /// + eNoRebootRequired = 4, + + /// + /// The instance represents a complete install. + /// + eComplete = MAXUINT, +}; + +// Forward interface declarations +// +#ifndef __ISetupInstance_FWD_DEFINED__ +#define __ISetupInstance_FWD_DEFINED__ +typedef struct ISetupInstance ISetupInstance; +#endif + +#ifndef __ISetupInstance2_FWD_DEFINED__ +#define __ISetupInstance2_FWD_DEFINED__ +typedef struct ISetupInstance2 ISetupInstance2; +#endif + +#ifndef __IEnumSetupInstances_FWD_DEFINED__ +#define __IEnumSetupInstances_FWD_DEFINED__ +typedef struct IEnumSetupInstances IEnumSetupInstances; +#endif + +#ifndef __ISetupConfiguration_FWD_DEFINED__ +#define __ISetupConfiguration_FWD_DEFINED__ +typedef struct ISetupConfiguration ISetupConfiguration; +#endif + +#ifndef __ISetupConfiguration2_FWD_DEFINED__ +#define __ISetupConfiguration2_FWD_DEFINED__ +typedef struct ISetupConfiguration2 ISetupConfiguration2; +#endif + +#ifndef __ISetupPackageReference_FWD_DEFINED__ +#define __ISetupPackageReference_FWD_DEFINED__ +typedef struct ISetupPackageReference ISetupPackageReference; +#endif + +#ifndef __ISetupHelper_FWD_DEFINED__ +#define __ISetupHelper_FWD_DEFINED__ +typedef struct ISetupHelper ISetupHelper; +#endif + +// Forward class declarations +// +#ifndef __SetupConfiguration_FWD_DEFINED__ +#define __SetupConfiguration_FWD_DEFINED__ + +#ifdef __cplusplus +typedef class SetupConfiguration SetupConfiguration; +#endif + +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// Interface definitions +// +EXTERN_C const IID IID_ISetupInstance; + +#if defined(__cplusplus) && !defined(CINTERFACE) +/// +/// Information about an instance of a product. +/// +struct DECLSPEC_UUID("B41463C3-8866-43B5-BC33-2B0676F7F42E") + DECLSPEC_NOVTABLE ISetupInstance : public IUnknown { + /// + /// Gets the instance identifier (should match the name of the parent instance + /// directory). + /// + /// The instance identifier. + /// Standard HRESULT indicating success or failure, including + /// E_FILENOTFOUND if the instance state does not exist. + STDMETHOD(GetInstanceId)(_Out_ BSTR *pbstrInstanceId) = 0; + + /// + /// Gets the local date and time when the installation was originally + /// installed. + /// + /// The local date and time when the installation + /// was originally installed. + /// Standard HRESULT indicating success or failure, including + /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the + /// property is not defined. + STDMETHOD(GetInstallDate)(_Out_ LPFILETIME pInstallDate) = 0; + + /// + /// Gets the unique name of the installation, often indicating the branch and + /// other information used for telemetry. + /// + /// The unique name of the installation, + /// often indicating the branch and other information used for + /// telemetry. + /// Standard HRESULT indicating success or failure, including + /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the + /// property is not defined. + STDMETHOD(GetInstallationName)(_Out_ BSTR *pbstrInstallationName) = 0; + + /// + /// Gets the path to the installation root of the product. + /// + /// The path to the installation root of + /// the product. + /// Standard HRESULT indicating success or failure, including + /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the + /// property is not defined. + STDMETHOD(GetInstallationPath)(_Out_ BSTR *pbstrInstallationPath) = 0; + + /// + /// Gets the version of the product installed in this instance. + /// + /// The version of the product + /// installed in this instance. + /// Standard HRESULT indicating success or failure, including + /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the + /// property is not defined. + STDMETHOD(GetInstallationVersion)(_Out_ BSTR *pbstrInstallationVersion) = 0; + + /// + /// Gets the display name (title) of the product installed in this instance. + /// + /// The LCID for the display name. + /// The display name (title) of the product + /// installed in this instance. + /// Standard HRESULT indicating success or failure, including + /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the + /// property is not defined. + STDMETHOD(GetDisplayName)(_In_ LCID lcid, _Out_ BSTR *pbstrDisplayName) = 0; + + /// + /// Gets the description of the product installed in this instance. + /// + /// The LCID for the description. + /// The description of the product installed in + /// this instance. + /// Standard HRESULT indicating success or failure, including + /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the + /// property is not defined. + STDMETHOD(GetDescription)(_In_ LCID lcid, _Out_ BSTR *pbstrDescription) = 0; + + /// + /// Resolves the optional relative path to the root path of the instance. + /// + /// A relative path within the instance to + /// resolve, or NULL to get the root path. + /// The full path to the optional relative + /// path within the instance. If the relative path is NULL, the root path will + /// always terminate in a backslash. + /// Standard HRESULT indicating success or failure, including + /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the + /// property is not defined. + STDMETHOD(ResolvePath) + (_In_opt_z_ LPCOLESTR pwszRelativePath, _Out_ BSTR *pbstrAbsolutePath) = 0; +}; +#endif + +EXTERN_C const IID IID_ISetupInstance2; + +#if defined(__cplusplus) && !defined(CINTERFACE) +/// +/// Information about an instance of a product. +/// +struct DECLSPEC_UUID("89143C9A-05AF-49B0-B717-72E218A2185C") + DECLSPEC_NOVTABLE ISetupInstance2 : public ISetupInstance { + /// + /// Gets the state of the instance. + /// + /// The state of the instance. + /// Standard HRESULT indicating success or failure, including + /// E_FILENOTFOUND if the instance state does not exist. + STDMETHOD(GetState)(_Out_ InstanceState *pState) = 0; + + /// + /// Gets an array of package references registered to the instance. + /// + /// Pointer to an array of . + /// Standard HRESULT indicating success or failure, including + /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the + /// packages property is not defined. + STDMETHOD(GetPackages)(_Out_ LPSAFEARRAY *ppsaPackages) = 0; + + /// + /// Gets a pointer to the that represents + /// the registered product. + /// + /// Pointer to an instance of . This may be NULL if does not return . + /// Standard HRESULT indicating success or failure, including + /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the + /// packages property is not defined. + STDMETHOD(GetProduct) + (_Outptr_result_maybenull_ ISetupPackageReference **ppPackage) = 0; + + /// + /// Gets the relative path to the product application, if available. + /// + /// The relative path to the product + /// application, if available. + /// Standard HRESULT indicating success or failure, including + /// E_FILENOTFOUND if the instance state does not exist. + STDMETHOD(GetProductPath) + (_Outptr_result_maybenull_ BSTR *pbstrProductPath) = 0; +}; +#endif + +EXTERN_C const IID IID_IEnumSetupInstances; + +#if defined(__cplusplus) && !defined(CINTERFACE) +/// +/// A enumerator of installed objects. +/// +struct DECLSPEC_UUID("6380BCFF-41D3-4B2E-8B2E-BF8A6810C848") + DECLSPEC_NOVTABLE IEnumSetupInstances : public IUnknown { + /// + /// Retrieves the next set of product instances in the enumeration sequence. + /// + /// The number of product instances to retrieve. + /// A pointer to an array of . + /// A pointer to the number of product instances + /// retrieved. If celt is 1 this parameter may be NULL. + /// S_OK if the number of elements were fetched, S_FALSE if nothing + /// was fetched (at end of enumeration), E_INVALIDARG if celt is greater than + /// 1 and pceltFetched is NULL, or E_OUTOFMEMORY if an could not be allocated. + STDMETHOD(Next) + (_In_ ULONG celt, _Out_writes_to_(celt, *pceltFetched) ISetupInstance **rgelt, + _Out_opt_ _Deref_out_range_(0, celt) ULONG *pceltFetched) = 0; + + /// + /// Skips the next set of product instances in the enumeration sequence. + /// + /// The number of product instances to skip. + /// S_OK if the number of elements could be skipped; otherwise, + /// S_FALSE; + STDMETHOD(Skip)(_In_ ULONG celt) = 0; + + /// + /// Resets the enumeration sequence to the beginning. + /// + /// Always returns S_OK; + STDMETHOD(Reset)(void) = 0; + + /// + /// Creates a new enumeration object in the same state as the current + /// enumeration object: the new object points to the same place in the + /// enumeration sequence. + /// + /// A pointer to a pointer to a new interface. If the method fails, this + /// parameter is undefined. + /// S_OK if a clone was returned; otherwise, E_OUTOFMEMORY. + STDMETHOD(Clone)(_Deref_out_opt_ IEnumSetupInstances **ppenum) = 0; +}; +#endif + +EXTERN_C const IID IID_ISetupConfiguration; + +#if defined(__cplusplus) && !defined(CINTERFACE) +/// +/// Gets information about product instances set up on the machine. +/// +struct DECLSPEC_UUID("42843719-DB4C-46C2-8E7C-64F1816EFD5B") + DECLSPEC_NOVTABLE ISetupConfiguration : public IUnknown { + /// + /// Enumerates all completed product instances installed. + /// + /// An enumeration of completed, installed + /// product instances. + /// Standard HRESULT indicating success or failure. + STDMETHOD(EnumInstances)(_Out_ IEnumSetupInstances **ppEnumInstances) = 0; + + /// + /// Gets the instance for the current process path. + /// + /// The instance for the current process + /// path. + /// The instance for the current process path, or E_NOTFOUND if not + /// found. + STDMETHOD(GetInstanceForCurrentProcess) + (_Out_ ISetupInstance **ppInstance) = 0; + + /// + /// Gets the instance for the given path. + /// + /// The instance for the given path. + /// The instance for the given path, or E_NOTFOUND if not + /// found. + STDMETHOD(GetInstanceForPath) + (_In_z_ LPCWSTR wzPath, _Out_ ISetupInstance **ppInstance) = 0; +}; +#endif + +EXTERN_C const IID IID_ISetupConfiguration2; + +#if defined(__cplusplus) && !defined(CINTERFACE) +/// +/// Gets information about product instances. +/// +struct DECLSPEC_UUID("26AAB78C-4A60-49D6-AF3B-3C35BC93365D") + DECLSPEC_NOVTABLE ISetupConfiguration2 : public ISetupConfiguration { + /// + /// Enumerates all product instances. + /// + /// An enumeration of all product + /// instances. + /// Standard HRESULT indicating success or failure. + STDMETHOD(EnumAllInstances)(_Out_ IEnumSetupInstances **ppEnumInstances) = 0; +}; +#endif + +EXTERN_C const IID IID_ISetupPackageReference; + +#if defined(__cplusplus) && !defined(CINTERFACE) +/// +/// A reference to a package. +/// +struct DECLSPEC_UUID("da8d8a16-b2b6-4487-a2f1-594ccccd6bf5") + DECLSPEC_NOVTABLE ISetupPackageReference : public IUnknown { + /// + /// Gets the general package identifier. + /// + /// The general package identifier. + /// Standard HRESULT indicating success or failure. + STDMETHOD(GetId)(_Out_ BSTR *pbstrId) = 0; + + /// + /// Gets the version of the package. + /// + /// The version of the package. + /// Standard HRESULT indicating success or failure. + STDMETHOD(GetVersion)(_Out_ BSTR *pbstrVersion) = 0; + + /// + /// Gets the target process architecture of the package. + /// + /// The target process architecture of the + /// package. + /// Standard HRESULT indicating success or failure. + STDMETHOD(GetChip)(_Out_ BSTR *pbstrChip) = 0; + + /// + /// Gets the language and optional region identifier. + /// + /// The language and optional region + /// identifier. + /// Standard HRESULT indicating success or failure. + STDMETHOD(GetLanguage)(_Out_ BSTR *pbstrLanguage) = 0; + + /// + /// Gets the build branch of the package. + /// + /// The build branch of the package. + /// Standard HRESULT indicating success or failure. + STDMETHOD(GetBranch)(_Out_ BSTR *pbstrBranch) = 0; + + /// + /// Gets the type of the package. + /// + /// The type of the package. + /// Standard HRESULT indicating success or failure. + STDMETHOD(GetType)(_Out_ BSTR *pbstrType) = 0; + + /// + /// Gets the unique identifier consisting of all defined tokens. + /// + /// The unique identifier consisting of all + /// defined tokens. + /// Standard HRESULT indicating success or failure, including + /// E_UNEXPECTED if no Id was defined (required). + STDMETHOD(GetUniqueId)(_Out_ BSTR *pbstrUniqueId) = 0; +}; +#endif + +EXTERN_C const IID IID_ISetupHelper; + +#if defined(__cplusplus) && !defined(CINTERFACE) +/// +/// Helper functions. +/// +/// +/// You can query for this interface from the +/// class. +/// +struct DECLSPEC_UUID("42b21b78-6192-463e-87bf-d577838f1d5c") + DECLSPEC_NOVTABLE ISetupHelper : public IUnknown { + /// + /// Parses a dotted quad version string into a 64-bit unsigned integer. + /// + /// The dotted quad version string to parse, e.g. + /// 1.2.3.4. + /// A 64-bit unsigned integer representing the + /// version. You can compare this to other versions. + /// Standard HRESULT indicating success or failure. + STDMETHOD(ParseVersion) + (_In_ LPCOLESTR pwszVersion, _Out_ PULONGLONG pullVersion) = 0; + + /// + /// Parses a dotted quad version string into a 64-bit unsigned integer. + /// + /// The string containing 1 or 2 dotted quad + /// version strings to parse, e.g. [1.0,) that means 1.0.0.0 or newer. + /// A 64-bit unsigned integer representing the + /// minimum version, which may be 0. You can compare this to other + /// versions. + /// A 64-bit unsigned integer representing the + /// maximum version, which may be MAXULONGLONG. You can compare this to other + /// versions. + /// Standard HRESULT indicating success or failure. + STDMETHOD(ParseVersionRange) + (_In_ LPCOLESTR pwszVersionRange, _Out_ PULONGLONG pullMinVersion, + _Out_ PULONGLONG pullMaxVersion) = 0; +}; +#endif + +// Class declarations +// +EXTERN_C const CLSID CLSID_SetupConfiguration; + +#ifdef __cplusplus +/// +/// This class implements , , and . +/// +class DECLSPEC_UUID("177F0C4A-1CD3-4DE7-A32C-71DBBB9FA36D") SetupConfiguration; +#endif + +// Function declarations +// +/// +/// Gets an that provides information about +/// product instances installed on the machine. +/// +/// The that +/// provides information about product instances installed on the +/// machine. +/// Reserved for future use. +/// Standard HRESULT indicating success or failure. +STDMETHODIMP GetSetupConfiguration(_Out_ ISetupConfiguration **ppConfiguration, + _Reserved_ LPVOID pReserved); + +#ifdef __cplusplus +} +#endif + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap index d0693ccfd8f6..76b10621541c 100644 --- a/llvm/include/llvm/module.modulemap +++ b/llvm/include/llvm/module.modulemap @@ -4,6 +4,7 @@ module LLVM_Analysis { module * { export * } // This is intended for (repeated) textual inclusion. + textual header "Analysis/ScalarFuncs.def" textual header "Analysis/TargetLibraryInfo.def" textual header "Analysis/VecFuncs.def" } @@ -71,6 +72,7 @@ module LLVM_BinaryFormat { textual header "BinaryFormat/ELFRelocs/Hexagon.def" textual header "BinaryFormat/ELFRelocs/i386.def" textual header "BinaryFormat/ELFRelocs/Lanai.def" + textual header "BinaryFormat/ELFRelocs/LoongArch.def" textual header "BinaryFormat/ELFRelocs/M68k.def" textual header "BinaryFormat/ELFRelocs/Mips.def" textual header "BinaryFormat/ELFRelocs/MSP430.def" @@ -242,6 +244,7 @@ module LLVM_intrinsic_gen { export * } module IR_AbstractCallSite { header "IR/AbstractCallSite.h" export * } + module IR_ConstantFold { header "IR/ConstantFold.h" export * } module IR_ConstantFolder { header "IR/ConstantFolder.h" export * } module IR_GlobalVariable { header "IR/GlobalVariable.h" export * } module IR_NoFolder { header "IR/NoFolder.h" export * } @@ -253,6 +256,7 @@ module LLVM_intrinsic_gen { module IR_InstrTypes { header "IR/InstrTypes.h" export * } module IR_Instructions { header "IR/Instructions.h" export * } module IR_TypeFinder { header "IR/TypeFinder.h" export * } + module IR_VectorBuilder { header "IR/VectorBuilder.h" export * } // Intrinsics.h @@ -331,7 +335,6 @@ module LLVM_MC { module LLVM_MC_TableGen { requires cplusplus module MC_LaneBitmask { header "MC/LaneBitmask.h" export * } - module MC_FixedLenDisassembler { header "MC/MCFixedLenDisassembler.h" export * } module MC_InstrItineraries { header "MC/MCInstrItineraries.h" export * } module MC_Schedule { header "MC/MCSchedule.h" export * } module MC_SubtargetFeature { header "MC/SubtargetFeature.h" export * } @@ -357,6 +360,7 @@ module LLVM_ProfileData { textual header "ProfileData/InstrProfData.inc" textual header "ProfileData/MemProfData.inc" + textual header "ProfileData/MIBEntryDef.inc" } // FIXME: Mislayered? @@ -410,6 +414,7 @@ module LLVM_Utils { // These are intended for textual inclusion. textual header "Support/AArch64TargetParser.def" textual header "Support/ARMTargetParser.def" + textual header "Support/CSKYTargetParser.def" textual header "Support/RISCVTargetParser.def" textual header "Support/TargetOpcodes.def" textual header "Support/X86TargetParser.def" diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp index a8132e5abf54..e249c38ecd34 100644 --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -42,7 +42,6 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" @@ -680,7 +679,7 @@ ModRefInfo AAResults::getModRefInfo(const Instruction *I, } } - const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation()); + const MemoryLocation &Loc = OptLoc.value_or(MemoryLocation()); switch (I->getOpcode()) { case Instruction::VAArg: @@ -988,6 +987,28 @@ bool llvm::isIdentifiedFunctionLocal(const Value *V) { return isa(V) || isNoAliasCall(V) || isNoAliasOrByValArgument(V); } +bool llvm::isEscapeSource(const Value *V) { + if (auto *CB = dyn_cast(V)) + return !isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(CB, + true); + + // The load case works because isNonEscapingLocalObject considers all + // stores to be escapes (it passes true for the StoreCaptures argument + // to PointerMayBeCaptured). + if (isa(V)) + return true; + + // The inttoptr case works because isNonEscapingLocalObject considers all + // means of converting or equating a pointer to an int (ptrtoint, ptr store + // which could be followed by an integer load, ptr<->int compare) as + // escaping, and objects located at well-known addresses via platform-specific + // means cannot be considered non-escaping local objects. + if (isa(V)) + return true; + + return false; +} + bool llvm::isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind) { RequiresNoCaptureBeforeUnwind = false; diff --git a/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp index 1577f1eb70b1..e3446a1f3130 100644 --- a/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -9,9 +9,7 @@ #include "llvm/Analysis/AliasAnalysisEvaluator.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -19,7 +17,6 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -41,30 +38,48 @@ static cl::opt PrintMustModRef("print-mustmodref", cl::ReallyHidden); static cl::opt EvalAAMD("evaluate-aa-metadata", cl::ReallyHidden); -static void PrintResults(AliasResult AR, bool P, const Value *V1, - const Value *V2, const Module *M) { +static void PrintResults(AliasResult AR, bool P, + std::pair Loc1, + std::pair Loc2, + const Module *M) { if (PrintAll || P) { + Type *Ty1 = Loc1.second, *Ty2 = Loc2.second; + unsigned AS1 = Loc1.first->getType()->getPointerAddressSpace(); + unsigned AS2 = Loc2.first->getType()->getPointerAddressSpace(); std::string o1, o2; { raw_string_ostream os1(o1), os2(o2); - V1->printAsOperand(os1, true, M); - V2->printAsOperand(os2, true, M); + Loc1.first->printAsOperand(os1, false, M); + Loc2.first->printAsOperand(os2, false, M); } if (o2 < o1) { std::swap(o1, o2); + std::swap(Ty1, Ty2); + std::swap(AS1, AS2); // Change offset sign for the local AR, for printing only. AR.swap(); } - errs() << " " << AR << ":\t" << o1 << ", " << o2 << "\n"; + errs() << " " << AR << ":\t"; + Ty1->print(errs(), false, /* NoDetails */ true); + if (AS1 != 0) + errs() << " addrspace(" << AS1 << ")"; + errs() << "* " << o1 << ", "; + Ty2->print(errs(), false, /* NoDetails */ true); + if (AS2 != 0) + errs() << " addrspace(" << AS2 << ")"; + errs() << "* " << o2 << "\n"; } } -static inline void PrintModRefResults(const char *Msg, bool P, Instruction *I, - Value *Ptr, Module *M) { +static inline void PrintModRefResults( + const char *Msg, bool P, Instruction *I, + std::pair Loc, Module *M) { if (PrintAll || P) { errs() << " " << Msg << ": Ptr: "; - Ptr->printAsOperand(errs(), true, M); + Loc.second->print(errs(), false, /* NoDetails */ true); + errs() << "* "; + Loc.first->printAsOperand(errs(), false, M); errs() << "\t<->" << *I << '\n'; } } @@ -84,11 +99,6 @@ static inline void PrintLoadStoreResults(AliasResult AR, bool P, } } -static inline bool isInterestingPointer(Value *V) { - return V->getType()->isPointerTy() - && !isa(V); -} - PreservedAnalyses AAEvaluator::run(Function &F, FunctionAnalysisManager &AM) { runInternal(F, AM.getResult(F)); return PreservedAnalyses::all(); @@ -99,38 +109,21 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { ++FunctionCount; - SetVector Pointers; + SetVector> Pointers; SmallSetVector Calls; SetVector Loads; SetVector Stores; - for (auto &I : F.args()) - if (I.getType()->isPointerTy()) // Add all pointer arguments. - Pointers.insert(&I); - for (Instruction &Inst : instructions(F)) { - if (Inst.getType()->isPointerTy()) // Add all pointer instructions. - Pointers.insert(&Inst); - if (EvalAAMD && isa(&Inst)) - Loads.insert(&Inst); - if (EvalAAMD && isa(&Inst)) - Stores.insert(&Inst); - if (auto *Call = dyn_cast(&Inst)) { - Value *Callee = Call->getCalledOperand(); - // Skip actual functions for direct function calls. - if (!isa(Callee) && isInterestingPointer(Callee)) - Pointers.insert(Callee); - // Consider formals. - for (Use &DataOp : Call->data_ops()) - if (isInterestingPointer(DataOp)) - Pointers.insert(DataOp); - Calls.insert(Call); - } else { - // Consider all operands. - for (Use &Op : Inst.operands()) - if (isInterestingPointer(Op)) - Pointers.insert(Op); - } + if (auto *LI = dyn_cast(&Inst)) { + Pointers.insert({LI->getPointerOperand(), LI->getType()}); + Loads.insert(LI); + } else if (auto *SI = dyn_cast(&Inst)) { + Pointers.insert({SI->getPointerOperand(), + SI->getValueOperand()->getType()}); + Stores.insert(SI); + } else if (auto *CB = dyn_cast(&Inst)) + Calls.insert(CB); } if (PrintAll || PrintNoAlias || PrintMayAlias || PrintPartialAlias || @@ -139,20 +132,12 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { << " pointers, " << Calls.size() << " call sites\n"; // iterate over the worklist, and run the full (n^2)/2 disambiguations - for (SetVector::iterator I1 = Pointers.begin(), E = Pointers.end(); - I1 != E; ++I1) { - auto I1Size = LocationSize::afterPointer(); - Type *I1ElTy = (*I1)->getType()->getPointerElementType(); - if (I1ElTy->isSized()) - I1Size = LocationSize::precise(DL.getTypeStoreSize(I1ElTy)); - - for (SetVector::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { - auto I2Size = LocationSize::afterPointer(); - Type *I2ElTy = (*I2)->getType()->getPointerElementType(); - if (I2ElTy->isSized()) - I2Size = LocationSize::precise(DL.getTypeStoreSize(I2ElTy)); - - AliasResult AR = AA.alias(*I1, I1Size, *I2, I2Size); + for (auto I1 = Pointers.begin(), E = Pointers.end(); I1 != E; ++I1) { + LocationSize Size1 = LocationSize::precise(DL.getTypeStoreSize(I1->second)); + for (auto I2 = Pointers.begin(); I2 != I1; ++I2) { + LocationSize Size2 = + LocationSize::precise(DL.getTypeStoreSize(I2->second)); + AliasResult AR = AA.alias(I1->first, Size1, I2->first, Size2); switch (AR) { case AliasResult::NoAlias: PrintResults(AR, PrintNoAlias, *I1, *I2, F.getParent()); @@ -231,13 +216,10 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { // Mod/ref alias analysis: compare all pairs of calls and values for (CallBase *Call : Calls) { - for (auto Pointer : Pointers) { - auto Size = LocationSize::afterPointer(); - Type *ElTy = Pointer->getType()->getPointerElementType(); - if (ElTy->isSized()) - Size = LocationSize::precise(DL.getTypeStoreSize(ElTy)); - - switch (AA.getModRefInfo(Call, Pointer, Size)) { + for (const auto &Pointer : Pointers) { + LocationSize Size = + LocationSize::precise(DL.getTypeStoreSize(Pointer.second)); + switch (AA.getModRefInfo(Call, Pointer.first, Size)) { case ModRefInfo::NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, Call, Pointer, F.getParent()); diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp index 5dc6c7780a0c..234a73bff6a8 100644 --- a/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/llvm/lib/Analysis/AliasSetTracker.cpp @@ -13,16 +13,12 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/GuardUtils.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" @@ -237,8 +233,8 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst, if (AliasAny) return true; - assert(Inst->mayReadOrWriteMemory() && - "Instruction must either read or write memory."); + if (!Inst->mayReadOrWriteMemory()) + return false; for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { if (auto *UnknownInst = getUnknownInst(i)) { @@ -258,31 +254,6 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst, return false; } -Instruction* AliasSet::getUniqueInstruction() { - if (AliasAny) - // May have collapses alias set - return nullptr; - if (begin() != end()) { - if (!UnknownInsts.empty()) - // Another instruction found - return nullptr; - if (std::next(begin()) != end()) - // Another instruction found - return nullptr; - Value *Addr = begin()->getValue(); - assert(!Addr->user_empty() && - "where's the instruction which added this pointer?"); - if (std::next(Addr->user_begin()) != Addr->user_end()) - // Another instruction found -- this is really restrictive - // TODO: generalize! - return nullptr; - return cast(*(Addr->user_begin())); - } - if (1 != UnknownInsts.size()) - return nullptr; - return cast(UnknownInsts[0]); -} - void AliasSetTracker::clear() { // Delete all the PointerRec entries. for (auto &I : PointerMap) diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp index 177f38af13d8..460dddceaf17 100644 --- a/llvm/lib/Analysis/Analysis.cpp +++ b/llvm/lib/Analysis/Analysis.cpp @@ -40,14 +40,14 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeDelinearizationPass(Registry); initializeDemandedBitsWrapperPassPass(Registry); initializeDominanceFrontierWrapperPassPass(Registry); - initializeDomViewerPass(Registry); - initializeDomPrinterPass(Registry); - initializeDomOnlyViewerPass(Registry); - initializePostDomViewerPass(Registry); - initializeDomOnlyPrinterPass(Registry); - initializePostDomPrinterPass(Registry); - initializePostDomOnlyViewerPass(Registry); - initializePostDomOnlyPrinterPass(Registry); + initializeDomViewerWrapperPassPass(Registry); + initializeDomPrinterWrapperPassPass(Registry); + initializeDomOnlyViewerWrapperPassPass(Registry); + initializePostDomViewerWrapperPassPass(Registry); + initializeDomOnlyPrinterWrapperPassPass(Registry); + initializePostDomPrinterWrapperPassPass(Registry); + initializePostDomOnlyViewerWrapperPassPass(Registry); + initializePostDomOnlyPrinterWrapperPassPass(Registry); initializeAAResultsWrapperPassPass(Registry); initializeGlobalsAAWrapperPassPass(Registry); initializeIVUsersWrapperPassPass(Registry); diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp index 9d4fe1225b33..7440dbd29ccf 100644 --- a/llvm/lib/Analysis/AssumeBundleQueries.cpp +++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp @@ -10,8 +10,8 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/DebugCounter.h" diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp index 3e0214e21ecd..e7e476dfb572 100644 --- a/llvm/lib/Analysis/AssumptionCache.cpp +++ b/llvm/lib/Analysis/AssumptionCache.cpp @@ -11,18 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" @@ -31,7 +30,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include #include #include diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 0a0b53796add..c78f822b8bcf 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/PhiValues.h" @@ -45,7 +44,6 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" @@ -105,29 +103,6 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA, // Useful predicates //===----------------------------------------------------------------------===// -/// Returns true if the pointer is one which would have been considered an -/// escape by isNonEscapingLocalObject. -static bool isEscapeSource(const Value *V) { - if (isa(V)) - return true; - - // The load case works because isNonEscapingLocalObject considers all - // stores to be escapes (it passes true for the StoreCaptures argument - // to PointerMayBeCaptured). - if (isa(V)) - return true; - - // The inttoptr case works because isNonEscapingLocalObject considers all - // means of converting or equating a pointer to an int (ptrtoint, ptr store - // which could be followed by an integer load, ptr<->int compare) as - // escaping, and objects located at well-known addresses via platform-specific - // means cannot be considered non-escaping local objects. - if (isa(V)) - return true; - - return false; -} - /// Returns the size of the object specified by V or UnknownSize if unknown. static uint64_t getObjectSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, @@ -234,7 +209,7 @@ bool EarliestEscapeInfo::isNotCapturedBeforeOrAt(const Value *Object, if (Iter.second) { Instruction *EarliestCapture = FindEarliestCapture( Object, *const_cast(I->getFunction()), - /*ReturnCaptures=*/false, /*StoreCaptures=*/true, DT); + /*ReturnCaptures=*/false, /*StoreCaptures=*/true, DT, EphValues); if (EarliestCapture) { auto Ins = Inst2Obj.insert({EarliestCapture, {}}); Ins.first->second.push_back(Object); @@ -661,8 +636,8 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, unsigned TypeSize = DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize(); LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds()); - Decomposed.Offset += LE.Offset.sextOrSelf(MaxIndexSize); - APInt Scale = LE.Scale.sextOrSelf(MaxIndexSize); + Decomposed.Offset += LE.Offset.sext(MaxIndexSize); + APInt Scale = LE.Scale.sext(MaxIndexSize); // If we already had an occurrence of this index variable, merge this // scale into it. For example, we want to handle: @@ -1299,8 +1274,31 @@ AliasResult BasicAAResult::aliasGEP( const VariableGEPIndex &Var = DecompGEP1.VarIndices[0]; if (Var.Val.TruncBits == 0 && isKnownNonZero(Var.Val.V, DL, 0, &AC, Var.CxtI, DT)) { - // If V != 0 then abs(VarIndex) >= abs(Scale). - MinAbsVarIndex = Var.Scale.abs(); + // If V != 0, then abs(VarIndex) > 0. + MinAbsVarIndex = APInt(Var.Scale.getBitWidth(), 1); + + // Check if abs(V*Scale) >= abs(Scale) holds in the presence of + // potentially wrapping math. + auto MultiplyByScaleNoWrap = [](const VariableGEPIndex &Var) { + if (Var.IsNSW) + return true; + + int ValOrigBW = Var.Val.V->getType()->getPrimitiveSizeInBits(); + // If Scale is small enough so that abs(V*Scale) >= abs(Scale) holds. + // The max value of abs(V) is 2^ValOrigBW - 1. Multiplying with a + // constant smaller than 2^(bitwidth(Val) - ValOrigBW) won't wrap. + int MaxScaleValueBW = Var.Val.getBitWidth() - ValOrigBW; + if (MaxScaleValueBW <= 0) + return false; + return Var.Scale.ule( + APInt::getMaxValue(MaxScaleValueBW).zext(Var.Scale.getBitWidth())); + }; + // Refine MinAbsVarIndex, if abs(Scale*V) >= abs(Scale) holds in the + // presence of potentially wrapping math. + if (MultiplyByScaleNoWrap(Var)) { + // If V != 0 then abs(VarIndex) >= abs(Scale). + MinAbsVarIndex = Var.Scale.abs(); + } } } else if (DecompGEP1.VarIndices.size() == 2) { // VarIndex = Scale*V0 + (-Scale)*V1. @@ -1370,15 +1368,15 @@ BasicAAResult::aliasSelect(const SelectInst *SI, LocationSize SISize, // If both arms of the Select node NoAlias or MustAlias V2, then returns // NoAlias / MustAlias. Otherwise, returns MayAlias. - AliasResult Alias = getBestAAResults().alias( - MemoryLocation(V2, V2Size), - MemoryLocation(SI->getTrueValue(), SISize), AAQI); + AliasResult Alias = + getBestAAResults().alias(MemoryLocation(SI->getTrueValue(), SISize), + MemoryLocation(V2, V2Size), AAQI); if (Alias == AliasResult::MayAlias) return AliasResult::MayAlias; - AliasResult ThisAlias = getBestAAResults().alias( - MemoryLocation(V2, V2Size), - MemoryLocation(SI->getFalseValue(), SISize), AAQI); + AliasResult ThisAlias = + getBestAAResults().alias(MemoryLocation(SI->getFalseValue(), SISize), + MemoryLocation(V2, V2Size), AAQI); return MergeAliasResults(ThisAlias, Alias); } @@ -1500,8 +1498,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, AAQueryInfo *UseAAQI = BlockInserted ? &NewAAQI : &AAQI; AliasResult Alias = getBestAAResults().alias( - MemoryLocation(V2, V2Size), - MemoryLocation(V1Srcs[0], PNSize), *UseAAQI); + MemoryLocation(V1Srcs[0], PNSize), MemoryLocation(V2, V2Size), *UseAAQI); // Early exit if the check of the first PHI source against V2 is MayAlias. // Other results are not possible. @@ -1518,7 +1515,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, Value *V = V1Srcs[i]; AliasResult ThisAlias = getBestAAResults().alias( - MemoryLocation(V2, V2Size), MemoryLocation(V, PNSize), *UseAAQI); + MemoryLocation(V, PNSize), MemoryLocation(V2, V2Size), *UseAAQI); Alias = MergeAliasResults(ThisAlias, Alias); if (Alias == AliasResult::MayAlias) break; diff --git a/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/llvm/lib/Analysis/BlockFrequencyInfo.cpp index b464071a33e6..436b01764033 100644 --- a/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -25,7 +25,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include #include #include diff --git a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp index 2a5e1f65d731..ec8d318b675b 100644 --- a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -13,7 +13,6 @@ #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/None.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/Config/llvm-config.h" @@ -22,8 +21,8 @@ #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ScaledNumber.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/ScaledNumber.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -48,7 +47,7 @@ cl::opt CheckBFIUnknownBlockQueries( "for debugging missed BFI updates")); cl::opt UseIterativeBFIInference( - "use-iterative-bfi-inference", cl::init(false), cl::Hidden, cl::ZeroOrMore, + "use-iterative-bfi-inference", cl::Hidden, cl::desc("Apply an iterative post-processing to infer correct BFI counts")); cl::opt IterativeBFIMaxIterationsPerBlock( diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp index ffb80134749a..1d880424e55c 100644 --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -414,8 +414,7 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I - 1)); auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB}); if (EstimatedWeight && - EstimatedWeight.getValue() <= - static_cast(BlockExecWeight::UNREACHABLE)) + *EstimatedWeight <= static_cast(BlockExecWeight::UNREACHABLE)) UnreachableIdxs.push_back(I - 1); else ReachableIdxs.push_back(I - 1); @@ -688,7 +687,7 @@ Optional BranchProbabilityInfo::getMaxEstimatedEdgeWeight( if (!Weight) return None; - if (!MaxWeight || MaxWeight.getValue() < Weight.getValue()) + if (!MaxWeight || *MaxWeight < *Weight) MaxWeight = Weight; } @@ -852,8 +851,7 @@ void BranchProbabilityInfo::computeEestimateBlockWeight( if (LoopWeight <= static_cast(BlockExecWeight::UNREACHABLE)) LoopWeight = static_cast(BlockExecWeight::LOWEST_NON_ZERO); - EstimatedLoopWeight.insert( - {LoopBB.getLoopData(), LoopWeight.getValue()}); + EstimatedLoopWeight.insert({LoopBB.getLoopData(), *LoopWeight}); // Add all blocks entering the loop into working list. getLoopEnterBlocks(LoopBB, BlockWorkList); } @@ -875,7 +873,7 @@ void BranchProbabilityInfo::computeEestimateBlockWeight( auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB)); if (MaxWeight) - propagateEstimatedBlockWeight(LoopBB, DT, PDT, MaxWeight.getValue(), + propagateEstimatedBlockWeight(LoopBB, DT, PDT, *MaxWeight, BlockWorkList, LoopWorkList); } } while (!BlockWorkList.empty() || !LoopWorkList.empty()); @@ -913,7 +911,7 @@ bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) { // Scale down loop exiting weight by trip count. Weight = std::max( static_cast(BlockExecWeight::LOWEST_NON_ZERO), - Weight.getValueOr(static_cast(BlockExecWeight::DEFAULT)) / + Weight.value_or(static_cast(BlockExecWeight::DEFAULT)) / TC); } bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB); @@ -923,15 +921,14 @@ bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) { // 'Unlikely' blocks have twice lower weight. Weight = std::max( static_cast(BlockExecWeight::LOWEST_NON_ZERO), - Weight.getValueOr(static_cast(BlockExecWeight::DEFAULT)) / - 2); + Weight.value_or(static_cast(BlockExecWeight::DEFAULT)) / 2); } if (Weight) FoundEstimatedWeight = true; auto WeightVal = - Weight.getValueOr(static_cast(BlockExecWeight::DEFAULT)); + Weight.value_or(static_cast(BlockExecWeight::DEFAULT)); TotalWeight += WeightVal; SuccWeights.push_back(WeightVal); } diff --git a/llvm/lib/Analysis/CFG.cpp b/llvm/lib/Analysis/CFG.cpp index ec25ee161e2c..1902d72f2f89 100644 --- a/llvm/lib/Analysis/CFG.cpp +++ b/llvm/lib/Analysis/CFG.cpp @@ -127,11 +127,7 @@ bool llvm::isCriticalEdge(const Instruction *TI, const BasicBlock *Dest, // the outermost loop in the loop nest that contains BB. static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) { const Loop *L = LI->getLoopFor(BB); - if (L) { - while (const Loop *Parent = L->getParentLoop()) - L = Parent; - } - return L; + return L ? L->getOutermostLoop() : nullptr; } bool llvm::isPotentiallyReachableFromMany( diff --git a/llvm/lib/Analysis/CFGPrinter.cpp b/llvm/lib/Analysis/CFGPrinter.cpp index 04ccdc590845..f8eba1a00f28 100644 --- a/llvm/lib/Analysis/CFGPrinter.cpp +++ b/llvm/lib/Analysis/CFGPrinter.cpp @@ -23,7 +23,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" -#include +#include "llvm/Support/GraphWriter.h" using namespace llvm; diff --git a/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp index 1216d03e448b..602a01867f3b 100644 --- a/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -831,14 +831,14 @@ CFLAndersAAResult::ensureCached(const Function &Fn) { scan(Fn); Iter = Cache.find(&Fn); assert(Iter != Cache.end()); - assert(Iter->second.hasValue()); + assert(Iter->second); } return Iter->second; } const AliasSummary *CFLAndersAAResult::getAliasSummary(const Function &Fn) { auto &FunInfo = ensureCached(Fn); - if (FunInfo.hasValue()) + if (FunInfo) return &FunInfo->getAliasSummary(); else return nullptr; diff --git a/llvm/lib/Analysis/CFLGraph.h b/llvm/lib/Analysis/CFLGraph.h index 02a13d673f40..60fc8d18678c 100644 --- a/llvm/lib/Analysis/CFLGraph.h +++ b/llvm/lib/Analysis/CFLGraph.h @@ -403,7 +403,7 @@ template class CFLGraphBuilder { auto &RetParamRelations = Summary->RetParamRelations; for (auto &Relation : RetParamRelations) { auto IRelation = instantiateExternalRelation(Relation, Call); - if (IRelation.hasValue()) { + if (IRelation) { Graph.addNode(IRelation->From); Graph.addNode(IRelation->To); Graph.addEdge(IRelation->From, IRelation->To); @@ -413,7 +413,7 @@ template class CFLGraphBuilder { auto &RetParamAttributes = Summary->RetParamAttributes; for (auto &Attribute : RetParamAttributes) { auto IAttr = instantiateExternalAttribute(Attribute, Call); - if (IAttr.hasValue()) + if (IAttr) Graph.addNode(IAttr->IValue, IAttr->Attr); } } diff --git a/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp index 090dccc53b6e..f92869c2ec63 100644 --- a/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -165,7 +165,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo( assert(RetVal != nullptr); assert(RetVal->getType()->isPointerTy()); auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0}); - if (RetInfo.hasValue()) + if (RetInfo) AddToRetParamRelations(0, RetInfo->Index); } @@ -174,7 +174,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo( for (auto &Param : Fn.args()) { if (Param.getType()->isPointerTy()) { auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0}); - if (ParamInfo.hasValue()) + if (ParamInfo) AddToRetParamRelations(I + 1, ParamInfo->Index); } ++I; @@ -250,14 +250,14 @@ CFLSteensAAResult::ensureCached(Function *Fn) { scan(Fn); Iter = Cache.find(Fn); assert(Iter != Cache.end()); - assert(Iter->second.hasValue()); + assert(Iter->second); } return Iter->second; } const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) { auto &FunInfo = ensureCached(&Fn); - if (FunInfo.hasValue()) + if (FunInfo) return &FunInfo->getAliasSummary(); else return nullptr; @@ -293,15 +293,15 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA, assert(Fn != nullptr); auto &MaybeInfo = ensureCached(Fn); - assert(MaybeInfo.hasValue()); + assert(MaybeInfo); auto &Sets = MaybeInfo->getStratifiedSets(); auto MaybeA = Sets.find(InstantiatedValue{ValA, 0}); - if (!MaybeA.hasValue()) + if (!MaybeA) return AliasResult::MayAlias; auto MaybeB = Sets.find(InstantiatedValue{ValB, 0}); - if (!MaybeB.hasValue()) + if (!MaybeB) return AliasResult::MayAlias; auto SetA = *MaybeA; diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp index c60b70ae5b69..b2e7422bbf8b 100644 --- a/llvm/lib/Analysis/CGSCCPassManager.cpp +++ b/llvm/lib/Analysis/CGSCCPassManager.cpp @@ -9,6 +9,7 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/PriorityWorklist.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -27,7 +28,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" -#include #include #include @@ -164,9 +164,9 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) { InlinedInternalEdges; CGSCCUpdateResult UR = { - RCWorklist, CWorklist, InvalidRefSCCSet, InvalidSCCSet, - nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges, - {}}; + RCWorklist, CWorklist, InvalidRefSCCSet, + InvalidSCCSet, nullptr, PreservedAnalyses::all(), + InlinedInternalEdges, {}}; // Request PassInstrumentation from analysis manager, will use it to run // instrumenting callbacks for the passes later. @@ -174,9 +174,8 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) { PreservedAnalyses PA = PreservedAnalyses::all(); CG.buildRefSCCs(); - for (auto RCI = CG.postorder_ref_scc_begin(), - RCE = CG.postorder_ref_scc_end(); - RCI != RCE;) { + for (LazyCallGraph::RefSCC &RC : + llvm::make_early_inc_range(CG.postorder_ref_sccs())) { assert(RCWorklist.empty() && "Should always start with an empty RefSCC worklist"); // The postorder_ref_sccs range we are walking is lazily constructed, so @@ -190,7 +189,7 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) { // // We also eagerly increment the iterator to the next position because // the CGSCC passes below may delete the current RefSCC. - RCWorklist.insert(&*RCI++); + RCWorklist.insert(&RC); do { LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val(); @@ -230,11 +229,15 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) { LLVM_DEBUG(dbgs() << "Skipping redundant run on SCC: " << *C << "\n"); continue; } - if (&C->getOuterRefSCC() != RC) { - LLVM_DEBUG(dbgs() << "Skipping an SCC that is now part of some other " - "RefSCC...\n"); - continue; - } + // We used to also check if the current SCC is part of the current + // RefSCC and bail if it wasn't, since it should be in RCWorklist. + // However, this can cause compile time explosions in some cases on + // modules with a huge RefSCC. If a non-trivial amount of SCCs in the + // huge RefSCC can become their own child RefSCC, we create one child + // RefSCC, bail on the current RefSCC, visit the child RefSCC, revisit + // the huge RefSCC, and repeat. By visiting all SCCs in the original + // RefSCC we create all the child RefSCCs in one pass of the RefSCC, + // rather one pass of the RefSCC creating one child RefSCC at a time. // Ensure we can proxy analysis updates from the CGSCC analysis manager // into the the Function analysis manager by getting a proxy here. @@ -264,11 +267,8 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) { // Check that we didn't miss any update scenario. assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!"); assert(C->begin() != C->end() && "Cannot have an empty SCC!"); - assert(&C->getOuterRefSCC() == RC && - "Processing an SCC in a different RefSCC!"); LastUpdatedC = UR.UpdatedC; - UR.UpdatedRC = nullptr; UR.UpdatedC = nullptr; // Check the PassInstrumentation's BeforePass callbacks before @@ -290,7 +290,6 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) { // Update the SCC and RefSCC if necessary. C = UR.UpdatedC ? UR.UpdatedC : C; - RC = UR.UpdatedRC ? UR.UpdatedRC : RC; if (UR.UpdatedC) { // If we're updating the SCC, also update the FAM inside the proxy's @@ -1213,10 +1212,8 @@ static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass( assert(!UR.InvalidatedRefSCCs.count(RC) && "Invalidated the current RefSCC!"); assert(&C->getOuterRefSCC() == RC && "Current SCC not in current RefSCC!"); - // Record the current RefSCC and SCC for higher layers of the CGSCC pass - // manager now that all the updates have been applied. - if (RC != &InitialRC) - UR.UpdatedRC = RC; + // Record the current SCC for higher layers of the CGSCC pass manager now that + // all the updates have been applied. if (C != &InitialC) UR.UpdatedC = C; diff --git a/llvm/lib/Analysis/CallGraph.cpp b/llvm/lib/Analysis/CallGraph.cpp index dfbd29b7d636..f85527122b2a 100644 --- a/llvm/lib/Analysis/CallGraph.cpp +++ b/llvm/lib/Analysis/CallGraph.cpp @@ -21,7 +21,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include #include using namespace llvm; @@ -70,8 +69,7 @@ bool CallGraph::invalidate(Module &, const PreservedAnalyses &PA, // Check whether the analysis, all analyses on functions, or the function's // CFG have been preserved. auto PAC = PA.getChecker(); - return !(PAC.preserved() || PAC.preservedSet>() || - PAC.preservedSet()); + return !(PAC.preserved() || PAC.preservedSet>()); } void CallGraph::addToCallGraph(Function *F) { diff --git a/llvm/lib/Analysis/CallGraphSCCPass.cpp b/llvm/lib/Analysis/CallGraphSCCPass.cpp index 930cb13c0cb3..8438f33f4712 100644 --- a/llvm/lib/Analysis/CallGraphSCCPass.cpp +++ b/llvm/lib/Analysis/CallGraphSCCPass.cpp @@ -28,7 +28,6 @@ #include "llvm/IR/OptBisect.h" #include "llvm/IR/PassTimingInfo.h" #include "llvm/IR/PrintPasses.h" -#include "llvm/IR/StructuralHash.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -271,7 +270,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, Calls.count(Call) || // If the call edge is not from a call or invoke, or it is a - // instrinsic call, then the function pass RAUW'd a call with + // intrinsic call, then the function pass RAUW'd a call with // another value. This can happen when constant folding happens // of well known functions etc. (Call->getCalledFunction() && @@ -470,7 +469,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, initializeAnalysisImpl(P); #ifdef EXPENSIVE_CHECKS - uint64_t RefHash = StructuralHash(CG.getModule()); + uint64_t RefHash = P->structuralHash(CG.getModule()); #endif // Actually run this pass on the current SCC. @@ -480,7 +479,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, Changed |= LocalChanged; #ifdef EXPENSIVE_CHECKS - if (!LocalChanged && (RefHash != StructuralHash(CG.getModule()))) { + if (!LocalChanged && (RefHash != P->structuralHash(CG.getModule()))) { llvm::errs() << "Pass modifies its input and doesn't report it: " << P->getPassName() << "\n"; llvm_unreachable("Pass modifies its input and doesn't report it"); diff --git a/llvm/lib/Analysis/CallPrinter.cpp b/llvm/lib/Analysis/CallPrinter.cpp index 829532a0fa10..65e3184fad91 100644 --- a/llvm/lib/Analysis/CallPrinter.cpp +++ b/llvm/lib/Analysis/CallPrinter.cpp @@ -14,18 +14,23 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CallPrinter.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/DOTGraphTraitsPass.h" #include "llvm/Analysis/HeatUtils.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DOTGraphTraits.h" +#include "llvm/Support/GraphWriter.h" using namespace llvm; +namespace llvm { +template struct GraphTraits; +} + // This option shows static (relative) call counts. // FIXME: // Need to show real counts when profile data is available @@ -212,6 +217,71 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } // end llvm namespace +namespace { +void doCallGraphDOTPrinting( + Module &M, function_ref LookupBFI) { + std::string Filename; + if (!CallGraphDotFilenamePrefix.empty()) + Filename = (CallGraphDotFilenamePrefix + ".callgraph.dot"); + else + Filename = (std::string(M.getModuleIdentifier()) + ".callgraph.dot"); + errs() << "Writing '" << Filename << "'..."; + + std::error_code EC; + raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); + + CallGraph CG(M); + CallGraphDOTInfo CFGInfo(&M, &CG, LookupBFI); + + if (!EC) + WriteGraph(File, &CFGInfo); + else + errs() << " error opening file for writing!"; + errs() << "\n"; +} + +void viewCallGraph(Module &M, + function_ref LookupBFI) { + CallGraph CG(M); + CallGraphDOTInfo CFGInfo(&M, &CG, LookupBFI); + + std::string Title = + DOTGraphTraits::getGraphName(&CFGInfo); + ViewGraph(&CFGInfo, "callgraph", true, Title); +} +} // namespace + +namespace llvm { +PreservedAnalyses CallGraphDOTPrinterPass::run(Module &M, + ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + + auto LookupBFI = [&FAM](Function &F) { + return &FAM.getResult(F); + }; + + doCallGraphDOTPrinting(M, LookupBFI); + + return PreservedAnalyses::all(); +} + +PreservedAnalyses CallGraphViewerPass::run(Module &M, + ModuleAnalysisManager &AM) { + + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + + auto LookupBFI = [&FAM](Function &F) { + return &FAM.getResult(F); + }; + + viewCallGraph(M, LookupBFI); + + return PreservedAnalyses::all(); +} +} // namespace llvm + namespace { // Viewer class CallGraphViewer : public ModulePass { @@ -234,12 +304,7 @@ bool CallGraphViewer::runOnModule(Module &M) { return &this->getAnalysis(F).getBFI(); }; - CallGraph CG(M); - CallGraphDOTInfo CFGInfo(&M, &CG, LookupBFI); - - std::string Title = - DOTGraphTraits::getGraphName(&CFGInfo); - ViewGraph(&CFGInfo, "callgraph", true, Title); + viewCallGraph(M, LookupBFI); return false; } @@ -266,24 +331,7 @@ bool CallGraphDOTPrinter::runOnModule(Module &M) { return &this->getAnalysis(F).getBFI(); }; - std::string Filename; - if (!CallGraphDotFilenamePrefix.empty()) - Filename = (CallGraphDotFilenamePrefix + ".callgraph.dot"); - else - Filename = (std::string(M.getModuleIdentifier()) + ".callgraph.dot"); - errs() << "Writing '" << Filename << "'..."; - - std::error_code EC; - raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); - - CallGraph CG(M); - CallGraphDOTInfo CFGInfo(&M, &CG, LookupBFI); - - if (!EC) - WriteGraph(File, &CFGInfo); - else - errs() << " error opening file for writing!"; - errs() << "\n"; + doCallGraphDOTPrinting(M, LookupBFI); return false; } diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp index ba8462e659d5..f4fd660ac7e0 100644 --- a/llvm/lib/Analysis/CaptureTracking.cpp +++ b/llvm/lib/Analysis/CaptureTracking.cpp @@ -16,6 +16,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -44,15 +45,15 @@ STATISTIC(NumNotCapturedBefore, "Number of pointers not captured before"); /// use it where possible. The caching version can use much higher limit or /// don't have this cap at all. static cl::opt -DefaultMaxUsesToExplore("capture-tracking-max-uses-to-explore", cl::Hidden, - cl::desc("Maximal number of uses to explore."), - cl::init(20)); + DefaultMaxUsesToExplore("capture-tracking-max-uses-to-explore", cl::Hidden, + cl::desc("Maximal number of uses to explore."), + cl::init(100)); unsigned llvm::getDefaultMaxUsesToExploreForCaptureTracking() { return DefaultMaxUsesToExplore; } -CaptureTracker::~CaptureTracker() {} +CaptureTracker::~CaptureTracker() = default; bool CaptureTracker::shouldExplore(const Use *U) { return true; } @@ -74,8 +75,10 @@ bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) { namespace { struct SimpleCaptureTracker : public CaptureTracker { - explicit SimpleCaptureTracker(bool ReturnCaptures) - : ReturnCaptures(ReturnCaptures) {} + explicit SimpleCaptureTracker( + + const SmallPtrSetImpl &EphValues, bool ReturnCaptures) + : EphValues(EphValues), ReturnCaptures(ReturnCaptures) {} void tooManyUses() override { Captured = true; } @@ -83,10 +86,15 @@ namespace { if (isa(U->getUser()) && !ReturnCaptures) return false; + if (EphValues.contains(U->getUser())) + return false; + Captured = true; return true; } + const SmallPtrSetImpl &EphValues; + bool ReturnCaptures; bool Captured = false; @@ -154,8 +162,9 @@ namespace { // escape are not in a cycle. struct EarliestCaptures : public CaptureTracker { - EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT) - : DT(DT), ReturnCaptures(ReturnCaptures), F(F) {} + EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT, + const SmallPtrSetImpl &EphValues) + : EphValues(EphValues), DT(DT), ReturnCaptures(ReturnCaptures), F(F) {} void tooManyUses() override { Captured = true; @@ -167,6 +176,9 @@ namespace { if (isa(I) && !ReturnCaptures) return false; + if (EphValues.contains(I)) + return false; + if (!EarliestCapture) { EarliestCapture = I; } else if (EarliestCapture->getParent() == I->getParent()) { @@ -193,6 +205,8 @@ namespace { return false; } + const SmallPtrSetImpl &EphValues; + Instruction *EarliestCapture = nullptr; const DominatorTree &DT; @@ -212,8 +226,18 @@ namespace { /// counts as capturing it or not. The boolean StoreCaptures specified whether /// storing the value (or part of it) into memory anywhere automatically /// counts as capturing it or not. -bool llvm::PointerMayBeCaptured(const Value *V, - bool ReturnCaptures, bool StoreCaptures, +bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures, + bool StoreCaptures, unsigned MaxUsesToExplore) { + SmallPtrSet Empty; + return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures, Empty, + MaxUsesToExplore); +} + +/// Variant of the above function which accepts a set of Values that are +/// ephemeral and cannot cause pointers to escape. +bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures, + bool StoreCaptures, + const SmallPtrSetImpl &EphValues, unsigned MaxUsesToExplore) { assert(!isa(V) && "It doesn't make sense to ask whether a global is captured."); @@ -224,7 +248,7 @@ bool llvm::PointerMayBeCaptured(const Value *V, // take advantage of this. (void)StoreCaptures; - SimpleCaptureTracker SCT(ReturnCaptures); + SimpleCaptureTracker SCT(EphValues, ReturnCaptures); PointerMayBeCaptured(V, &SCT, MaxUsesToExplore); if (SCT.Captured) ++NumCaptured; @@ -266,14 +290,16 @@ bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, return CB.Captured; } -Instruction *llvm::FindEarliestCapture(const Value *V, Function &F, - bool ReturnCaptures, bool StoreCaptures, - const DominatorTree &DT, - unsigned MaxUsesToExplore) { +Instruction * +llvm::FindEarliestCapture(const Value *V, Function &F, bool ReturnCaptures, + bool StoreCaptures, const DominatorTree &DT, + + const SmallPtrSetImpl &EphValues, + unsigned MaxUsesToExplore) { assert(!isa(V) && "It doesn't make sense to ask whether a global is captured."); - EarliestCaptures CB(ReturnCaptures, F, DT); + EarliestCaptures CB(ReturnCaptures, F, DT, EphValues); PointerMayBeCaptured(V, &CB, MaxUsesToExplore); if (CB.Captured) ++NumCapturedBefore; @@ -282,6 +308,132 @@ Instruction *llvm::FindEarliestCapture(const Value *V, Function &F, return CB.EarliestCapture; } +UseCaptureKind llvm::DetermineUseCaptureKind( + const Use &U, + function_ref IsDereferenceableOrNull) { + Instruction *I = cast(U.getUser()); + + switch (I->getOpcode()) { + case Instruction::Call: + case Instruction::Invoke: { + auto *Call = cast(I); + // Not captured if the callee is readonly, doesn't return a copy through + // its return value and doesn't unwind (a readonly function can leak bits + // by throwing an exception or not depending on the input value). + if (Call->onlyReadsMemory() && Call->doesNotThrow() && + Call->getType()->isVoidTy()) + return UseCaptureKind::NO_CAPTURE; + + // The pointer is not captured if returned pointer is not captured. + // NOTE: CaptureTracking users should not assume that only functions + // marked with nocapture do not capture. This means that places like + // getUnderlyingObject in ValueTracking or DecomposeGEPExpression + // in BasicAA also need to know about this property. + if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call, true)) + return UseCaptureKind::PASSTHROUGH; + + // Volatile operations effectively capture the memory location that they + // load and store to. + if (auto *MI = dyn_cast(Call)) + if (MI->isVolatile()) + return UseCaptureKind::MAY_CAPTURE; + + // Calling a function pointer does not in itself cause the pointer to + // be captured. This is a subtle point considering that (for example) + // the callee might return its own address. It is analogous to saying + // that loading a value from a pointer does not cause the pointer to be + // captured, even though the loaded value might be the pointer itself + // (think of self-referential objects). + if (Call->isCallee(&U)) + return UseCaptureKind::NO_CAPTURE; + + // Not captured if only passed via 'nocapture' arguments. + if (Call->isDataOperand(&U) && + !Call->doesNotCapture(Call->getDataOperandNo(&U))) { + // The parameter is not marked 'nocapture' - captured. + return UseCaptureKind::MAY_CAPTURE; + } + return UseCaptureKind::NO_CAPTURE; + } + case Instruction::Load: + // Volatile loads make the address observable. + if (cast(I)->isVolatile()) + return UseCaptureKind::MAY_CAPTURE; + return UseCaptureKind::NO_CAPTURE; + case Instruction::VAArg: + // "va-arg" from a pointer does not cause it to be captured. + return UseCaptureKind::NO_CAPTURE; + case Instruction::Store: + // Stored the pointer - conservatively assume it may be captured. + // Volatile stores make the address observable. + if (U.getOperandNo() == 0 || cast(I)->isVolatile()) + return UseCaptureKind::MAY_CAPTURE; + return UseCaptureKind::NO_CAPTURE; + case Instruction::AtomicRMW: { + // atomicrmw conceptually includes both a load and store from + // the same location. + // As with a store, the location being accessed is not captured, + // but the value being stored is. + // Volatile stores make the address observable. + auto *ARMWI = cast(I); + if (U.getOperandNo() == 1 || ARMWI->isVolatile()) + return UseCaptureKind::MAY_CAPTURE; + return UseCaptureKind::NO_CAPTURE; + } + case Instruction::AtomicCmpXchg: { + // cmpxchg conceptually includes both a load and store from + // the same location. + // As with a store, the location being accessed is not captured, + // but the value being stored is. + // Volatile stores make the address observable. + auto *ACXI = cast(I); + if (U.getOperandNo() == 1 || U.getOperandNo() == 2 || ACXI->isVolatile()) + return UseCaptureKind::MAY_CAPTURE; + return UseCaptureKind::NO_CAPTURE; + } + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::PHI: + case Instruction::Select: + case Instruction::AddrSpaceCast: + // The original value is not captured via this if the new value isn't. + return UseCaptureKind::PASSTHROUGH; + case Instruction::ICmp: { + unsigned Idx = U.getOperandNo(); + unsigned OtherIdx = 1 - Idx; + if (auto *CPN = dyn_cast(I->getOperand(OtherIdx))) { + // Don't count comparisons of a no-alias return value against null as + // captures. This allows us to ignore comparisons of malloc results + // with null, for example. + if (CPN->getType()->getAddressSpace() == 0) + if (isNoAliasCall(U.get()->stripPointerCasts())) + return UseCaptureKind::NO_CAPTURE; + if (!I->getFunction()->nullPointerIsDefined()) { + auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation(); + // Comparing a dereferenceable_or_null pointer against null cannot + // lead to pointer escapes, because if it is not null it must be a + // valid (in-bounds) pointer. + const DataLayout &DL = I->getModule()->getDataLayout(); + if (IsDereferenceableOrNull && IsDereferenceableOrNull(O, DL)) + return UseCaptureKind::NO_CAPTURE; + } + } + // Comparison against value stored in global variable. Given the pointer + // does not escape, its value cannot be guessed and stored separately in a + // global variable. + auto *LI = dyn_cast(I->getOperand(OtherIdx)); + if (LI && isa(LI->getPointerOperand())) + return UseCaptureKind::NO_CAPTURE; + // Otherwise, be conservative. There are crazy ways to capture pointers + // using comparisons. + return UseCaptureKind::MAY_CAPTURE; + } + default: + // Something else - be conservative and say it is captured. + return UseCaptureKind::MAY_CAPTURE; + } +} + void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, unsigned MaxUsesToExplore) { assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); @@ -293,11 +445,10 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, SmallSet Visited; auto AddUses = [&](const Value *V) { - unsigned Count = 0; for (const Use &U : V->uses()) { // If there are lots of uses, conservatively say that the value // is captured to avoid taking too much compile time. - if (Count++ >= MaxUsesToExplore) { + if (Visited.size() >= MaxUsesToExplore) { Tracker->tooManyUses(); return false; } @@ -312,144 +463,22 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, if (!AddUses(V)) return; + auto IsDereferenceableOrNull = [Tracker](Value *V, const DataLayout &DL) { + return Tracker->isDereferenceableOrNull(V, DL); + }; while (!Worklist.empty()) { const Use *U = Worklist.pop_back_val(); - Instruction *I = cast(U->getUser()); - - switch (I->getOpcode()) { - case Instruction::Call: - case Instruction::Invoke: { - auto *Call = cast(I); - // Not captured if the callee is readonly, doesn't return a copy through - // its return value and doesn't unwind (a readonly function can leak bits - // by throwing an exception or not depending on the input value). - if (Call->onlyReadsMemory() && Call->doesNotThrow() && - Call->getType()->isVoidTy()) - break; - - // The pointer is not captured if returned pointer is not captured. - // NOTE: CaptureTracking users should not assume that only functions - // marked with nocapture do not capture. This means that places like - // getUnderlyingObject in ValueTracking or DecomposeGEPExpression - // in BasicAA also need to know about this property. - if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call, - true)) { - if (!AddUses(Call)) - return; - break; - } - - // Volatile operations effectively capture the memory location that they - // load and store to. - if (auto *MI = dyn_cast(Call)) - if (MI->isVolatile()) - if (Tracker->captured(U)) - return; - - // Calling a function pointer does not in itself cause the pointer to - // be captured. This is a subtle point considering that (for example) - // the callee might return its own address. It is analogous to saying - // that loading a value from a pointer does not cause the pointer to be - // captured, even though the loaded value might be the pointer itself - // (think of self-referential objects). - if (Call->isCallee(U)) - break; - - // Not captured if only passed via 'nocapture' arguments. - if (Call->isDataOperand(U) && - !Call->doesNotCapture(Call->getDataOperandNo(U))) { - // The parameter is not marked 'nocapture' - captured. - if (Tracker->captured(U)) - return; - } - break; - } - case Instruction::Load: - // Volatile loads make the address observable. - if (cast(I)->isVolatile()) - if (Tracker->captured(U)) - return; - break; - case Instruction::VAArg: - // "va-arg" from a pointer does not cause it to be captured. - break; - case Instruction::Store: - // Stored the pointer - conservatively assume it may be captured. - // Volatile stores make the address observable. - if (U->getOperandNo() == 0 || cast(I)->isVolatile()) - if (Tracker->captured(U)) - return; - break; - case Instruction::AtomicRMW: { - // atomicrmw conceptually includes both a load and store from - // the same location. - // As with a store, the location being accessed is not captured, - // but the value being stored is. - // Volatile stores make the address observable. - auto *ARMWI = cast(I); - if (U->getOperandNo() == 1 || ARMWI->isVolatile()) - if (Tracker->captured(U)) - return; - break; - } - case Instruction::AtomicCmpXchg: { - // cmpxchg conceptually includes both a load and store from - // the same location. - // As with a store, the location being accessed is not captured, - // but the value being stored is. - // Volatile stores make the address observable. - auto *ACXI = cast(I); - if (U->getOperandNo() == 1 || U->getOperandNo() == 2 || - ACXI->isVolatile()) - if (Tracker->captured(U)) - return; - break; - } - case Instruction::BitCast: - case Instruction::GetElementPtr: - case Instruction::PHI: - case Instruction::Select: - case Instruction::AddrSpaceCast: - // The original value is not captured via this if the new value isn't. - if (!AddUses(I)) - return; - break; - case Instruction::ICmp: { - unsigned Idx = U->getOperandNo(); - unsigned OtherIdx = 1 - Idx; - if (auto *CPN = dyn_cast(I->getOperand(OtherIdx))) { - // Don't count comparisons of a no-alias return value against null as - // captures. This allows us to ignore comparisons of malloc results - // with null, for example. - if (CPN->getType()->getAddressSpace() == 0) - if (isNoAliasCall(U->get()->stripPointerCasts())) - break; - if (!I->getFunction()->nullPointerIsDefined()) { - auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation(); - // Comparing a dereferenceable_or_null pointer against null cannot - // lead to pointer escapes, because if it is not null it must be a - // valid (in-bounds) pointer. - if (Tracker->isDereferenceableOrNull(O, I->getModule()->getDataLayout())) - break; - } - } - // Comparison against value stored in global variable. Given the pointer - // does not escape, its value cannot be guessed and stored separately in a - // global variable. - auto *LI = dyn_cast(I->getOperand(OtherIdx)); - if (LI && isa(LI->getPointerOperand())) - break; - // Otherwise, be conservative. There are crazy ways to capture pointers - // using comparisons. + switch (DetermineUseCaptureKind(*U, IsDereferenceableOrNull)) { + case UseCaptureKind::NO_CAPTURE: + continue; + case UseCaptureKind::MAY_CAPTURE: if (Tracker->captured(U)) return; - break; - } - default: - // Something else - be conservative and say it is captured. - if (Tracker->captured(U)) + continue; + case UseCaptureKind::PASSTHROUGH: + if (!AddUses(U->getUser())) return; - break; + continue; } } diff --git a/llvm/lib/Analysis/CmpInstAnalysis.cpp b/llvm/lib/Analysis/CmpInstAnalysis.cpp index 5b951980a0aa..20b1df6e1495 100644 --- a/llvm/lib/Analysis/CmpInstAnalysis.cpp +++ b/llvm/lib/Analysis/CmpInstAnalysis.cpp @@ -18,9 +18,7 @@ using namespace llvm; -unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) { - ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate() - : ICI->getPredicate(); +unsigned llvm::getICmpCode(CmpInst::Predicate Pred) { switch (Pred) { // False -> 0 case ICmpInst::ICMP_UGT: return 1; // 001 @@ -63,6 +61,18 @@ bool llvm::predicatesFoldable(ICmpInst::Predicate P1, ICmpInst::Predicate P2) { (CmpInst::isSigned(P2) && ICmpInst::isEquality(P1)); } +Constant *llvm::getPredForFCmpCode(unsigned Code, Type *OpTy, + CmpInst::Predicate &Pred) { + Pred = static_cast(Code); + assert(FCmpInst::FCMP_FALSE <= Pred && Pred <= FCmpInst::FCMP_TRUE && + "Unexpected FCmp predicate!"); + if (Pred == FCmpInst::FCMP_FALSE) + return ConstantInt::get(CmpInst::makeCmpResultType(OpTy), 0); + if (Pred == FCmpInst::FCMP_TRUE) + return ConstantInt::get(CmpInst::makeCmpResultType(OpTy), 1); + return nullptr; +} + bool llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, Value *&X, APInt &Mask, bool LookThruTrunc) { diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp index 27c52506352f..6d9084215dee 100644 --- a/llvm/lib/Analysis/CodeMetrics.cpp +++ b/llvm/lib/Analysis/CodeMetrics.cpp @@ -15,7 +15,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstructionCost.h" @@ -118,13 +117,6 @@ void CodeMetrics::analyzeBasicBlock( const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl &EphValues, bool PrepareForLTO) { ++NumBlocks; - // Use a proxy variable for NumInsts of type InstructionCost, so that it can - // use InstructionCost's arithmetic properties such as saturation when this - // feature is added to InstructionCost. - // When storing the value back to NumInsts, we can assume all costs are Valid - // because the IR should not contain any nodes that cannot be costed. If that - // happens the cost-model is broken. - InstructionCost NumInstsProxy = NumInsts; InstructionCost NumInstsBeforeThisBB = NumInsts; for (const Instruction &I : *BB) { // Skip ephemeral values. @@ -184,8 +176,7 @@ void CodeMetrics::analyzeBasicBlock( if (InvI->cannotDuplicate()) notDuplicatable = true; - NumInstsProxy += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); - NumInsts = *NumInstsProxy.getValue(); + NumInsts += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); } if (isa(BB->getTerminator())) @@ -205,6 +196,6 @@ void CodeMetrics::analyzeBasicBlock( notDuplicatable |= isa(BB->getTerminator()); // Remember NumInsts for this BB. - InstructionCost NumInstsThisBB = NumInstsProxy - NumInstsBeforeThisBB; - NumBBInsts[BB] = *NumInstsThisBB.getValue(); + InstructionCost NumInstsThisBB = NumInsts - NumInstsBeforeThisBB; + NumBBInsts[BB] = NumInstsThisBB; } diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 7cf69f613c66..a81041845052 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -57,7 +57,6 @@ #include #include #include -#include #include using namespace llvm; @@ -92,7 +91,7 @@ static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy, return ConstantExpr::getBitCast(C, DestTy); Result <<= BitShift; - Result |= ElementCI->getValue().zextOrSelf(Result.getBitWidth()); + Result |= ElementCI->getValue().zext(Result.getBitWidth()); } return nullptr; @@ -589,14 +588,17 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy, if (BytesLoaded > 32 || BytesLoaded == 0) return nullptr; - int64_t InitializerSize = DL.getTypeAllocSize(C->getType()).getFixedSize(); - // If we're not accessing anything in this constant, the result is undefined. if (Offset <= -1 * static_cast(BytesLoaded)) return UndefValue::get(IntType); + // TODO: We should be able to support scalable types. + TypeSize InitializerSize = DL.getTypeAllocSize(C->getType()); + if (InitializerSize.isScalable()) + return nullptr; + // If we're not accessing anything in this constant, the result is undefined. - if (Offset >= InitializerSize) + if (Offset >= (int64_t)InitializerSize.getFixedValue()) return UndefValue::get(IntType); unsigned char RawBytes[32] = {0}; @@ -631,6 +633,39 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy, return ConstantInt::get(IntType->getContext(), ResultVal); } +} // anonymous namespace + +// If GV is a constant with an initializer read its representation starting +// at Offset and return it as a constant array of unsigned char. Otherwise +// return null. +Constant *llvm::ReadByteArrayFromGlobal(const GlobalVariable *GV, + uint64_t Offset) { + if (!GV->isConstant() || !GV->hasDefinitiveInitializer()) + return nullptr; + + const DataLayout &DL = GV->getParent()->getDataLayout(); + Constant *Init = const_cast(GV->getInitializer()); + TypeSize InitSize = DL.getTypeAllocSize(Init->getType()); + if (InitSize < Offset) + return nullptr; + + uint64_t NBytes = InitSize - Offset; + if (NBytes > UINT16_MAX) + // Bail for large initializers in excess of 64K to avoid allocating + // too much memory. + // Offset is assumed to be less than or equal than InitSize (this + // is enforced in ReadDataFromGlobal). + return nullptr; + + SmallVector RawBytes(static_cast(NBytes)); + unsigned char *CurPtr = RawBytes.data(); + + if (!ReadDataFromGlobal(Init, Offset, CurPtr, NBytes, DL)) + return nullptr; + + return ConstantDataArray::get(GV->getContext(), RawBytes); +} + /// If this Offset points exactly to the start of an aggregate element, return /// that element, otherwise return nullptr. Constant *getConstantAtOffset(Constant *Base, APInt Offset, @@ -659,8 +694,6 @@ Constant *getConstantAtOffset(Constant *Base, APInt Offset, return C; } -} // end anonymous namespace - Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL) { @@ -864,21 +897,6 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, Type *IntIdxTy = DL.getIndexType(Ptr->getType()); - // If this is "gep i8* Ptr, (sub 0, V)", fold this as: - // "inttoptr (sub (ptrtoint Ptr), V)" - if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) { - auto *CE = dyn_cast(Ops[1]); - assert((!CE || CE->getType() == IntIdxTy) && - "CastGEPIndices didn't canonicalize index types!"); - if (CE && CE->getOpcode() == Instruction::Sub && - CE->getOperand(0)->isNullValue()) { - Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType()); - Res = ConstantExpr::getSub(Res, CE->getOperand(1)); - Res = ConstantExpr::getIntToPtr(Res, ResTy); - return ConstantFoldConstant(Res, DL, TLI); - } - } - for (unsigned i = 1, e = Ops.size(); i != e; ++i) if (!isa(Ops[i])) return nullptr; @@ -1012,8 +1030,24 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, if (Instruction::isUnaryOp(Opcode)) return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL); - if (Instruction::isBinaryOp(Opcode)) + if (Instruction::isBinaryOp(Opcode)) { + switch (Opcode) { + default: + break; + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + // Handle floating point instructions separately to account for denormals + // TODO: If a constant expression is being folded rather than an + // instruction, denormals will not be flushed/treated as zero + if (const auto *I = dyn_cast(InstOrCE)) { + return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I); + } + } return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL); + } if (Instruction::isCast(Opcode)) return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL); @@ -1027,13 +1061,21 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, GEP->getInRangeIndex()); } - if (auto *CE = dyn_cast(InstOrCE)) + if (auto *CE = dyn_cast(InstOrCE)) { + if (CE->isCompare()) + return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], + DL, TLI); return CE->getWithOperands(Ops); + } switch (Opcode) { default: return nullptr; case Instruction::ICmp: - case Instruction::FCmp: llvm_unreachable("Invalid for compares"); + case Instruction::FCmp: { + auto *C = cast(InstOrCE); + return ConstantFoldCompareInstOperands(C->getPredicate(), Ops[0], Ops[1], + DL, TLI, C); + } case Instruction::Freeze: return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr; case Instruction::Call: @@ -1048,13 +1090,22 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, case Instruction::ExtractElement: return ConstantExpr::getExtractElement(Ops[0], Ops[1]); case Instruction::ExtractValue: - return ConstantExpr::getExtractValue( + return ConstantFoldExtractValueInstruction( Ops[0], cast(InstOrCE)->getIndices()); case Instruction::InsertElement: return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); + case Instruction::InsertValue: + return ConstantFoldInsertValueInstruction( + Ops[0], Ops[1], cast(InstOrCE)->getIndices()); case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector( Ops[0], Ops[1], cast(InstOrCE)->getShuffleMask()); + case Instruction::Load: { + const auto *LI = dyn_cast(InstOrCE); + if (LI->isVolatile()) + return nullptr; + return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL); + } } } @@ -1091,13 +1142,8 @@ ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL, Ops.push_back(NewC); } - if (auto *CE = dyn_cast(C)) { - if (CE->isCompare()) - return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], - DL, TLI); - + if (auto *CE = dyn_cast(C)) return ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI); - } assert(isa(C)); return ConstantVector::get(Ops); @@ -1150,22 +1196,6 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL, Ops.push_back(Op); } - if (const auto *CI = dyn_cast(I)) - return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], - DL, TLI); - - if (const auto *LI = dyn_cast(I)) { - if (LI->isVolatile()) - return nullptr; - return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL); - } - - if (auto *IVI = dyn_cast(I)) - return ConstantExpr::getInsertValue(Ops[0], Ops[1], IVI->getIndices()); - - if (auto *EVI = dyn_cast(I)) - return ConstantExpr::getExtractValue(Ops[0], EVI->getIndices()); - return ConstantFoldInstOperands(I, Ops, DL, TLI); } @@ -1182,10 +1212,9 @@ Constant *llvm::ConstantFoldInstOperands(Instruction *I, return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI); } -Constant *llvm::ConstantFoldCompareInstOperands(unsigned IntPredicate, - Constant *Ops0, Constant *Ops1, - const DataLayout &DL, - const TargetLibraryInfo *TLI) { +Constant *llvm::ConstantFoldCompareInstOperands( + unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL, + const TargetLibraryInfo *TLI, const Instruction *I) { CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate; // fold: icmp (inttoptr x), null -> icmp x, 0 // fold: icmp null, (inttoptr x) -> icmp 0, x @@ -1287,6 +1316,11 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned IntPredicate, return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI); } + // Flush any denormal constant float input according to denormal handling + // mode. + Ops0 = FlushFPConstant(Ops0, I, /* IsOutput */ false); + Ops1 = FlushFPConstant(Ops1, I, /* IsOutput */ false); + return ConstantExpr::getCompare(Predicate, Ops0, Ops1); } @@ -1308,6 +1342,63 @@ Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, return ConstantExpr::get(Opcode, LHS, RHS); } +Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I, + bool IsOutput) { + if (!I || !I->getParent() || !I->getFunction()) + return Operand; + + ConstantFP *CFP = dyn_cast(Operand); + if (!CFP) + return Operand; + + const APFloat &APF = CFP->getValueAPF(); + Type *Ty = CFP->getType(); + DenormalMode DenormMode = + I->getFunction()->getDenormalMode(Ty->getFltSemantics()); + DenormalMode::DenormalModeKind Mode = + IsOutput ? DenormMode.Output : DenormMode.Input; + switch (Mode) { + default: + llvm_unreachable("unknown denormal mode"); + return Operand; + case DenormalMode::IEEE: + return Operand; + case DenormalMode::PreserveSign: + if (APF.isDenormal()) { + return ConstantFP::get( + Ty->getContext(), + APFloat::getZero(Ty->getFltSemantics(), APF.isNegative())); + } + return Operand; + case DenormalMode::PositiveZero: + if (APF.isDenormal()) { + return ConstantFP::get(Ty->getContext(), + APFloat::getZero(Ty->getFltSemantics(), false)); + } + return Operand; + } + return Operand; +} + +Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS, + Constant *RHS, const DataLayout &DL, + const Instruction *I) { + if (Instruction::isBinaryOp(Opcode)) { + // Flush denormal inputs if needed. + Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false); + Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false); + + // Calculate constant result. + Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL); + + // Flush denormal output if needed. + return FlushFPConstant(C, I, /* IsOutput */ true); + } + // If instruction lacks a parent/function and the denormal mode cannot be + // determined, use the default (IEEE). + return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL); +} + Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL) { assert(Instruction::isCast(Opcode)); @@ -1334,6 +1425,19 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C, DL, BaseOffset, /*AllowNonInbounds=*/true)); if (Base->isNullValue()) { FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset); + } else { + // ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V + if (GEP->getNumIndices() == 1 && + GEP->getSourceElementType()->isIntegerTy(8)) { + auto *Ptr = cast(GEP->getPointerOperand()); + auto *Sub = dyn_cast(GEP->getOperand(1)); + Type *IntIdxTy = DL.getIndexType(Ptr->getType()); + if (Sub && Sub->getType() == IntIdxTy && + Sub->getOpcode() == Instruction::Sub && + Sub->getOperand(0)->isNullValue()) + FoldedValue = ConstantExpr::getSub( + ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1)); + } } } if (FoldedValue) { @@ -1386,6 +1490,8 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C, bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { if (Call->isNoBuiltin()) return false; + if (Call->getFunctionType() != F->getFunctionType()) + return false; switch (F->getIntrinsicID()) { // Operations that do not operate floating-point numbers and do not depend on // FP environment can be folded even in strictfp functions. @@ -1527,6 +1633,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::experimental_constrained_trunc: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_fcmp: + case Intrinsic::experimental_constrained_fcmps: return true; default: return false; @@ -1798,12 +1906,12 @@ static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI, // If evaluation raised FP exception, the result can depend on rounding // mode. If the latter is unknown, folding is not possible. - if (!ORM || *ORM == RoundingMode::Dynamic) + if (ORM && *ORM == RoundingMode::Dynamic) return false; // If FP exceptions are ignored, fold the call, even if such exception is // raised. - if (!EB || *EB != fp::ExceptionBehavior::ebStrict) + if (EB && *EB != fp::ExceptionBehavior::ebStrict) return true; // Leave the calculation for runtime so that exception flags be correctly set @@ -1979,7 +2087,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, case Intrinsic::experimental_constrained_rint: { auto CI = cast(Call); RM = CI->getRoundingMode(); - if (!RM || RM.getValue() == RoundingMode::Dynamic) + if (!RM || *RM == RoundingMode::Dynamic) return nullptr; break; } @@ -2301,6 +2409,24 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return nullptr; } +static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2, + const ConstrainedFPIntrinsic *Call) { + APFloat::opStatus St = APFloat::opOK; + auto *FCmp = cast(Call); + FCmpInst::Predicate Cond = FCmp->getPredicate(); + if (FCmp->isSignaling()) { + if (Op1.isNaN() || Op2.isNaN()) + St = APFloat::opInvalidOp; + } else { + if (Op1.isSignaling() || Op2.isSignaling()) + St = APFloat::opInvalidOp; + } + bool Result = FCmpInst::compare(Op1, Op2, Cond); + if (mayFoldConstrained(const_cast(FCmp), St)) + return ConstantInt::get(Call->getType()->getScalarType(), Result); + return nullptr; +} + static Constant *ConstantFoldScalarCall2(StringRef Name, Intrinsic::ID IntrinsicID, Type *Ty, @@ -2329,8 +2455,6 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, } if (const auto *Op1 = dyn_cast(Operands[0])) { - if (!Ty->isFloatingPointTy()) - return nullptr; const APFloat &Op1V = Op1->getValueAPF(); if (const auto *Op2 = dyn_cast(Operands[1])) { @@ -2360,6 +2484,9 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, case Intrinsic::experimental_constrained_frem: St = Res.mod(Op2V); break; + case Intrinsic::experimental_constrained_fcmp: + case Intrinsic::experimental_constrained_fcmps: + return evaluateCompare(Op1V, Op2V, ConstrIntr); } if (mayFoldConstrained(const_cast(ConstrIntr), St)) @@ -2484,6 +2611,11 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, case Intrinsic::smin: case Intrinsic::umax: case Intrinsic::umin: + // This is the same as for binary ops - poison propagates. + // TODO: Poison handling should be consolidated. + if (isa(Operands[0]) || isa(Operands[1])) + return PoisonValue::get(Ty); + if (!C0 && !C1) return UndefValue::get(Ty); if (!C0 || !C1) @@ -2550,6 +2682,11 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, } case Intrinsic::uadd_sat: case Intrinsic::sadd_sat: + // This is the same as for binary ops - poison propagates. + // TODO: Poison handling should be consolidated. + if (isa(Operands[0]) || isa(Operands[1])) + return PoisonValue::get(Ty); + if (!C0 && !C1) return UndefValue::get(Ty); if (!C0 || !C1) @@ -2560,6 +2697,11 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, return ConstantInt::get(Ty, C0->sadd_sat(*C1)); case Intrinsic::usub_sat: case Intrinsic::ssub_sat: + // This is the same as for binary ops - poison propagates. + // TODO: Poison handling should be consolidated. + if (isa(Operands[0]) || isa(Operands[1])) + return PoisonValue::get(Ty); + if (!C0 && !C1) return UndefValue::get(Ty); if (!C0 || !C1) @@ -2840,11 +2982,11 @@ static Constant *ConstantFoldScalarCall3(StringRef Name, unsigned Width = C0->getBitWidth(); assert(Scale < Width && "Illegal scale."); unsigned ExtendedWidth = Width * 2; - APInt Product = (C0->sextOrSelf(ExtendedWidth) * - C1->sextOrSelf(ExtendedWidth)).ashr(Scale); + APInt Product = + (C0->sext(ExtendedWidth) * C1->sext(ExtendedWidth)).ashr(Scale); if (IntrinsicID == Intrinsic::smul_fix_sat) { - APInt Max = APInt::getSignedMaxValue(Width).sextOrSelf(ExtendedWidth); - APInt Min = APInt::getSignedMinValue(Width).sextOrSelf(ExtendedWidth); + APInt Max = APInt::getSignedMaxValue(Width).sext(ExtendedWidth); + APInt Min = APInt::getSignedMinValue(Width).sext(ExtendedWidth); Product = APIntOps::smin(Product, Max); Product = APIntOps::smax(Product, Min); } @@ -2998,7 +3140,7 @@ static Constant *ConstantFoldFixedVectorCall( // Gather a column of constants. for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { // Some intrinsics use a scalar type for certain arguments. - if (hasVectorInstrinsicScalarOpd(IntrinsicID, J)) { + if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J)) { Lane[J] = Operands[J]; continue; } diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp index 773f71ada0ee..dc774728ab3d 100644 --- a/llvm/lib/Analysis/ConstraintSystem.cpp +++ b/llvm/lib/Analysis/ConstraintSystem.cpp @@ -12,7 +12,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Debug.h" -#include #include using namespace llvm; diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp index 326bacad01fe..52e424ae324b 100644 --- a/llvm/lib/Analysis/CostModel.cpp +++ b/llvm/lib/Analysis/CostModel.cpp @@ -17,7 +17,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CostModel.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" @@ -25,7 +24,6 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -119,7 +117,7 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { PreservedAnalyses CostModelPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TTI = AM.getResult(F); - OS << "Cost Model for function '" << F.getName() << "'\n"; + OS << "Printing analysis 'Cost Model Analysis' for function '" << F.getName() << "':\n"; for (BasicBlock &B : F) { for (Instruction &Inst : B) { // TODO: Use a pass parameter instead of cl::opt CostKind to determine diff --git a/llvm/lib/Analysis/CycleAnalysis.cpp b/llvm/lib/Analysis/CycleAnalysis.cpp index 09c7ee67e05c..17998123fce7 100644 --- a/llvm/lib/Analysis/CycleAnalysis.cpp +++ b/llvm/lib/Analysis/CycleAnalysis.cpp @@ -8,11 +8,15 @@ #include "llvm/Analysis/CycleAnalysis.h" #include "llvm/ADT/GenericCycleImpl.h" -#include "llvm/IR/CFG.h" +#include "llvm/IR/CFG.h" // for successors found by ADL in GenericCycleImpl.h #include "llvm/InitializePasses.h" using namespace llvm; +namespace llvm { +class Module; +} + template class llvm::GenericCycleInfo; template class llvm::GenericCycle; diff --git a/llvm/lib/Analysis/DDG.cpp b/llvm/lib/Analysis/DDG.cpp index 7e1357959a3f..998c888dd2d9 100644 --- a/llvm/lib/Analysis/DDG.cpp +++ b/llvm/lib/Analysis/DDG.cpp @@ -17,13 +17,12 @@ using namespace llvm; static cl::opt SimplifyDDG( - "ddg-simplify", cl::init(true), cl::Hidden, cl::ZeroOrMore, + "ddg-simplify", cl::init(true), cl::Hidden, cl::desc( "Simplify DDG by merging nodes that have less interesting edges.")); -static cl::opt - CreatePiBlocks("ddg-pi-blocks", cl::init(true), cl::Hidden, cl::ZeroOrMore, - cl::desc("Create pi-block nodes.")); +static cl::opt CreatePiBlocks("ddg-pi-blocks", cl::init(true), cl::Hidden, + cl::desc("Create pi-block nodes.")); #define DEBUG_TYPE "ddg" @@ -34,7 +33,7 @@ template class llvm::DirectedGraph; //===--------------------------------------------------------------------===// // DDGNode implementation //===--------------------------------------------------------------------===// -DDGNode::~DDGNode() {} +DDGNode::~DDGNode() = default; bool DDGNode::collectInstructions( llvm::function_ref const &Pred, diff --git a/llvm/lib/Analysis/DDGPrinter.cpp b/llvm/lib/Analysis/DDGPrinter.cpp index 0d5a936723ce..6b5acd204ec7 100644 --- a/llvm/lib/Analysis/DDGPrinter.cpp +++ b/llvm/lib/Analysis/DDGPrinter.cpp @@ -18,8 +18,8 @@ using namespace llvm; -static cl::opt DotOnly("dot-ddg-only", cl::init(false), cl::Hidden, - cl::ZeroOrMore, cl::desc("simple ddg dot graph")); +static cl::opt DotOnly("dot-ddg-only", cl::Hidden, + cl::desc("simple ddg dot graph")); static cl::opt DDGDotFilenamePrefix( "dot-ddg-filename-prefix", cl::init("ddg"), cl::Hidden, cl::desc("The prefix used for the DDG dot file names.")); diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp index 670532c6d9a8..c36e1d922915 100644 --- a/llvm/lib/Analysis/Delinearization.cpp +++ b/llvm/lib/Analysis/Delinearization.cpp @@ -24,9 +24,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -523,6 +521,44 @@ bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE, return !Subscripts.empty(); } +bool llvm::tryDelinearizeFixedSizeImpl( + ScalarEvolution *SE, Instruction *Inst, const SCEV *AccessFn, + SmallVectorImpl &Subscripts, SmallVectorImpl &Sizes) { + Value *SrcPtr = getLoadStorePointerOperand(Inst); + + // Check the simple case where the array dimensions are fixed size. + auto *SrcGEP = dyn_cast(SrcPtr); + if (!SrcGEP) + return false; + + getIndexExpressionsFromGEP(*SE, SrcGEP, Subscripts, Sizes); + + // Check that the two size arrays are non-empty and equal in length and + // value. + // TODO: it would be better to let the caller to clear Subscripts, similar + // to how we handle Sizes. + if (Sizes.empty() || Subscripts.size() <= 1) { + Subscripts.clear(); + return false; + } + + // Check that for identical base pointers we do not miss index offsets + // that have been added before this GEP is applied. + Value *SrcBasePtr = SrcGEP->getOperand(0)->stripPointerCasts(); + const SCEVUnknown *SrcBase = + dyn_cast(SE->getPointerBase(AccessFn)); + if (!SrcBase || SrcBasePtr != SrcBase->getValue()) { + Subscripts.clear(); + return false; + } + + assert(Subscripts.size() == Sizes.size() + 1 && + "Expected equal number of entries in the list of size and " + "subscript."); + + return true; +} + namespace { class Delinearization : public FunctionPass { diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp index 117b12fc0701..e01ed48be376 100644 --- a/llvm/lib/Analysis/DemandedBits.cpp +++ b/llvm/lib/Analysis/DemandedBits.cpp @@ -21,19 +21,13 @@ #include "llvm/Analysis/DemandedBits.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index f827f74d5367..3d2d84ecadb4 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -50,7 +50,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DependenceAnalysis.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Delinearization.h" @@ -58,10 +57,8 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Config/llvm-config.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -109,11 +106,10 @@ STATISTIC(BanerjeeIndependence, "Banerjee independence"); STATISTIC(BanerjeeSuccesses, "Banerjee successes"); static cl::opt - Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore, + Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::desc("Try to delinearize array references.")); static cl::opt DisableDelinearizationChecks( - "da-disable-delinearization-checks", cl::init(false), cl::Hidden, - cl::ZeroOrMore, + "da-disable-delinearization-checks", cl::Hidden, cl::desc( "Disable checks that try to statically verify validity of " "delinearized subscripts. Enabling this option may result in incorrect " @@ -121,7 +117,7 @@ static cl::opt DisableDelinearizationChecks( "dimension to underflow or overflow into another dimension.")); static cl::opt MIVMaxLevelThreshold( - "da-miv-max-level-threshold", cl::init(7), cl::Hidden, cl::ZeroOrMore, + "da-miv-max-level-threshold", cl::init(7), cl::Hidden, cl::desc("Maximum depth allowed for the recursive algorithm used to " "explore MIV direction vectors.")); @@ -787,6 +783,8 @@ unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { unsigned D = DstLoop->getLoopDepth(); if (D > CommonLevels) + // This tries to make sure that we assign unique numbers to src and dst when + // the memory accesses reside in different loops that have the same depth. return D - CommonLevels + SrcLevels; else return D; @@ -796,10 +794,16 @@ unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { // Returns true if Expression is loop invariant in LoopNest. bool DependenceInfo::isLoopInvariant(const SCEV *Expression, const Loop *LoopNest) const { + // Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of + // any loop as invariant, because we only consier expression evaluation at a + // specific position (where the array access takes place), and not across the + // entire function. if (!LoopNest) return true; - return SE->isLoopInvariant(Expression, LoopNest) && - isLoopInvariant(Expression, LoopNest->getParentLoop()); + + // If the expression is invariant in the outermost loop of the loop nest, it + // is invariant anywhere in the loop nest. + return SE->isLoopInvariant(Expression, LoopNest->getOutermostLoop()); } @@ -890,13 +894,25 @@ void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { } } -// Examine the scev and return true iff it's linear. +// Examine the scev and return true iff it's affine. // Collect any loops mentioned in the set of "Loops". bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest, SmallBitVector &Loops, bool IsSrc) { const SCEVAddRecExpr *AddRec = dyn_cast(Expr); if (!AddRec) return isLoopInvariant(Expr, LoopNest); + + // The AddRec must depend on one of the containing loops. Otherwise, + // mapSrcLoop and mapDstLoop return indices outside the intended range. This + // can happen when a subscript in one loop references an IV from a sibling + // loop that could not be replaced with a concrete exit value by + // getSCEVAtScope. + const Loop *L = LoopNest; + while (L && AddRec->getLoop() != L) + L = L->getParentLoop(); + if (!L) + return false; + const SCEV *Start = AddRec->getStart(); const SCEV *Step = AddRec->getStepRecurrence(*SE); const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); @@ -3318,59 +3334,45 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, return true; } +/// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying +/// arrays accessed are fixed-size arrays. Return true if delinearization was +/// successful. bool DependenceInfo::tryDelinearizeFixedSize( Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, const SCEV *DstAccessFn, SmallVectorImpl &SrcSubscripts, SmallVectorImpl &DstSubscripts) { - - Value *SrcPtr = getLoadStorePointerOperand(Src); - Value *DstPtr = getLoadStorePointerOperand(Dst); - const SCEVUnknown *SrcBase = - dyn_cast(SE->getPointerBase(SrcAccessFn)); - const SCEVUnknown *DstBase = - dyn_cast(SE->getPointerBase(DstAccessFn)); - assert(SrcBase && DstBase && SrcBase == DstBase && - "expected src and dst scev unknowns to be equal"); - - // Check the simple case where the array dimensions are fixed size. - auto *SrcGEP = dyn_cast(SrcPtr); - auto *DstGEP = dyn_cast(DstPtr); - if (!SrcGEP || !DstGEP) + LLVM_DEBUG({ + const SCEVUnknown *SrcBase = + dyn_cast(SE->getPointerBase(SrcAccessFn)); + const SCEVUnknown *DstBase = + dyn_cast(SE->getPointerBase(DstAccessFn)); + assert(SrcBase && DstBase && SrcBase == DstBase && + "expected src and dst scev unknowns to be equal"); + }); + + SmallVector SrcSizes; + SmallVector DstSizes; + if (!tryDelinearizeFixedSizeImpl(SE, Src, SrcAccessFn, SrcSubscripts, + SrcSizes) || + !tryDelinearizeFixedSizeImpl(SE, Dst, DstAccessFn, DstSubscripts, + DstSizes)) return false; - SmallVector SrcSizes, DstSizes; - getIndexExpressionsFromGEP(*SE, SrcGEP, SrcSubscripts, SrcSizes); - getIndexExpressionsFromGEP(*SE, DstGEP, DstSubscripts, DstSizes); - // Check that the two size arrays are non-empty and equal in length and // value. - if (SrcSizes.empty() || SrcSubscripts.size() <= 1 || - SrcSizes.size() != DstSizes.size() || + if (SrcSizes.size() != DstSizes.size() || !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) { SrcSubscripts.clear(); DstSubscripts.clear(); return false; } - Value *SrcBasePtr = SrcGEP->getOperand(0); - Value *DstBasePtr = DstGEP->getOperand(0); - while (auto *PCast = dyn_cast(SrcBasePtr)) - SrcBasePtr = PCast->getOperand(0); - while (auto *PCast = dyn_cast(DstBasePtr)) - DstBasePtr = PCast->getOperand(0); - - // Check that for identical base pointers we do not miss index offsets - // that have been added before this GEP is applied. - if (SrcBasePtr != SrcBase->getValue() || DstBasePtr != DstBase->getValue()) { - SrcSubscripts.clear(); - DstSubscripts.clear(); - return false; - } - assert(SrcSubscripts.size() == DstSubscripts.size() && - SrcSubscripts.size() == SrcSizes.size() + 1 && - "Expected equal number of entries in the list of sizes and " - "subscripts."); + "Expected equal number of entries in the list of SrcSubscripts and " + "DstSubscripts."); + + Value *SrcPtr = getLoadStorePointerOperand(Src); + Value *DstPtr = getLoadStorePointerOperand(Dst); // In general we cannot safely assume that the subscripts recovered from GEPs // are in the range of values defined for their corresponding array @@ -3406,8 +3408,8 @@ bool DependenceInfo::tryDelinearizeFixedSize( } LLVM_DEBUG({ dbgs() << "Delinearized subscripts of fixed-size array\n" - << "SrcGEP:" << *SrcGEP << "\n" - << "DstGEP:" << *DstGEP << "\n"; + << "SrcGEP:" << *SrcPtr << "\n" + << "DstGEP:" << *DstPtr << "\n"; }); return true; } diff --git a/llvm/lib/Analysis/DependenceGraphBuilder.cpp b/llvm/lib/Analysis/DependenceGraphBuilder.cpp index 6b90db4bafe1..7ee2adf49ebb 100644 --- a/llvm/lib/Analysis/DependenceGraphBuilder.cpp +++ b/llvm/lib/Analysis/DependenceGraphBuilder.cpp @@ -12,6 +12,7 @@ #include "llvm/Analysis/DependenceGraphBuilder.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/EnumeratedArray.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/DDG.h" diff --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp index 4a792fce51d1..79ea160afc22 100644 --- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp +++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// #include "llvm/Config/config.h" -#include "llvm/Support/Casting.h" #if defined(LLVM_HAVE_TF_API) #include "llvm/ADT/BitVector.h" @@ -273,8 +272,8 @@ static const std::vector TrainingOnlyFeatures{ static const std::vector getInputFeatures() { std::vector InputSpecs; for (size_t I = 0; I < NumberOfFeatures; ++I) - InputSpecs.push_back( - TensorSpec::createSpec(TFFeedPrefix + FeatureNameMap[I], {1})); + InputSpecs.push_back(TensorSpec::createSpec( + TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape())); append_range(InputSpecs, TrainingOnlyFeatures); return InputSpecs; } @@ -290,8 +289,7 @@ TrainingLogger::TrainingLogger(StringRef LogFileName, std::vector FT; for (size_t I = 0; I < NumberOfFeatures; ++I) - FT.push_back( - {TensorSpec::createSpec(FeatureNameMap.at(I), {1}), None}); + FT.push_back({FeatureMap.at(I), None}); if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1) append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs())); diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp index 39e80c2ad51c..1a4b09e0cac2 100644 --- a/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -73,15 +73,14 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Value.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/Analysis/DomPrinter.cpp b/llvm/lib/Analysis/DomPrinter.cpp index 6088de53028d..e9f5103e1276 100644 --- a/llvm/lib/Analysis/DomPrinter.cpp +++ b/llvm/lib/Analysis/DomPrinter.cpp @@ -24,74 +24,6 @@ using namespace llvm; -namespace llvm { -template<> -struct DOTGraphTraits : public DefaultDOTGraphTraits { - - DOTGraphTraits (bool isSimple=false) - : DefaultDOTGraphTraits(isSimple) {} - - std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) { - - BasicBlock *BB = Node->getBlock(); - - if (!BB) - return "Post dominance root node"; - - - if (isSimple()) - return DOTGraphTraits - ::getSimpleNodeLabel(BB, nullptr); - else - return DOTGraphTraits - ::getCompleteNodeLabel(BB, nullptr); - } -}; - -template<> -struct DOTGraphTraits : public DOTGraphTraits { - - DOTGraphTraits (bool isSimple=false) - : DOTGraphTraits(isSimple) {} - - static std::string getGraphName(DominatorTree *DT) { - return "Dominator tree"; - } - - std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) { - return DOTGraphTraits::getNodeLabel(Node, G->getRootNode()); - } -}; - -template<> -struct DOTGraphTraits - : public DOTGraphTraits { - - DOTGraphTraits (bool isSimple=false) - : DOTGraphTraits(isSimple) {} - - static std::string getGraphName(PostDominatorTree *DT) { - return "Post dominator tree"; - } - - std::string getNodeLabel(DomTreeNode *Node, PostDominatorTree *G ) { - return DOTGraphTraits::getNodeLabel(Node, G->getRootNode()); - } -}; -} - -PreservedAnalyses DomTreePrinterPass::run(Function &F, - FunctionAnalysisManager &AM) { - WriteDOTGraphToFile(F, &AM.getResult(F), "dom", false); - return PreservedAnalyses::all(); -} - -PreservedAnalyses DomTreeOnlyPrinterPass::run(Function &F, - FunctionAnalysisManager &AM) { - WriteDOTGraphToFile(F, &AM.getResult(F), "domonly", - true); - return PreservedAnalyses::all(); -} void DominatorTree::viewGraph(const Twine &Name, const Twine &Title) { #ifndef NDEBUG @@ -110,166 +42,167 @@ void DominatorTree::viewGraph() { } namespace { -struct DominatorTreeWrapperPassAnalysisGraphTraits { +struct LegacyDominatorTreeWrapperPassAnalysisGraphTraits { static DominatorTree *getGraph(DominatorTreeWrapperPass *DTWP) { return &DTWP->getDomTree(); } }; -struct DomViewer : public DOTGraphTraitsViewer< - DominatorTreeWrapperPass, false, DominatorTree *, - DominatorTreeWrapperPassAnalysisGraphTraits> { +struct DomViewerWrapperPass + : public DOTGraphTraitsViewerWrapperPass< + DominatorTreeWrapperPass, false, DominatorTree *, + LegacyDominatorTreeWrapperPassAnalysisGraphTraits> { static char ID; - DomViewer() - : DOTGraphTraitsViewer( - "dom", ID) { - initializeDomViewerPass(*PassRegistry::getPassRegistry()); + DomViewerWrapperPass() + : DOTGraphTraitsViewerWrapperPass< + DominatorTreeWrapperPass, false, DominatorTree *, + LegacyDominatorTreeWrapperPassAnalysisGraphTraits>("dom", ID) { + initializeDomViewerWrapperPassPass(*PassRegistry::getPassRegistry()); } }; -struct DomOnlyViewer : public DOTGraphTraitsViewer< - DominatorTreeWrapperPass, true, DominatorTree *, - DominatorTreeWrapperPassAnalysisGraphTraits> { +struct DomOnlyViewerWrapperPass + : public DOTGraphTraitsViewerWrapperPass< + DominatorTreeWrapperPass, true, DominatorTree *, + LegacyDominatorTreeWrapperPassAnalysisGraphTraits> { static char ID; - DomOnlyViewer() - : DOTGraphTraitsViewer( - "domonly", ID) { - initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry()); + DomOnlyViewerWrapperPass() + : DOTGraphTraitsViewerWrapperPass< + DominatorTreeWrapperPass, true, DominatorTree *, + LegacyDominatorTreeWrapperPassAnalysisGraphTraits>("domonly", ID) { + initializeDomOnlyViewerWrapperPassPass(*PassRegistry::getPassRegistry()); } }; -struct PostDominatorTreeWrapperPassAnalysisGraphTraits { +struct LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits { static PostDominatorTree *getGraph(PostDominatorTreeWrapperPass *PDTWP) { return &PDTWP->getPostDomTree(); } }; -struct PostDomViewer : public DOTGraphTraitsViewer< - PostDominatorTreeWrapperPass, false, - PostDominatorTree *, - PostDominatorTreeWrapperPassAnalysisGraphTraits> { +struct PostDomViewerWrapperPass + : public DOTGraphTraitsViewerWrapperPass< + PostDominatorTreeWrapperPass, false, PostDominatorTree *, + LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits> { static char ID; - PostDomViewer() : - DOTGraphTraitsViewer( - "postdom", ID){ - initializePostDomViewerPass(*PassRegistry::getPassRegistry()); - } + PostDomViewerWrapperPass() + : DOTGraphTraitsViewerWrapperPass< + PostDominatorTreeWrapperPass, false, PostDominatorTree *, + LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits>("postdom", + ID) { + initializePostDomViewerWrapperPassPass(*PassRegistry::getPassRegistry()); + } }; -struct PostDomOnlyViewer : public DOTGraphTraitsViewer< - PostDominatorTreeWrapperPass, true, - PostDominatorTree *, - PostDominatorTreeWrapperPassAnalysisGraphTraits> { +struct PostDomOnlyViewerWrapperPass + : public DOTGraphTraitsViewerWrapperPass< + PostDominatorTreeWrapperPass, true, PostDominatorTree *, + LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits> { static char ID; - PostDomOnlyViewer() : - DOTGraphTraitsViewer( - "postdomonly", ID){ - initializePostDomOnlyViewerPass(*PassRegistry::getPassRegistry()); - } + PostDomOnlyViewerWrapperPass() + : DOTGraphTraitsViewerWrapperPass< + PostDominatorTreeWrapperPass, true, PostDominatorTree *, + LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits>( + "postdomonly", ID) { + initializePostDomOnlyViewerWrapperPassPass( + *PassRegistry::getPassRegistry()); + } }; } // end anonymous namespace -char DomViewer::ID = 0; -INITIALIZE_PASS(DomViewer, "view-dom", +char DomViewerWrapperPass::ID = 0; +INITIALIZE_PASS(DomViewerWrapperPass, "view-dom", "View dominance tree of function", false, false) -char DomOnlyViewer::ID = 0; -INITIALIZE_PASS(DomOnlyViewer, "view-dom-only", +char DomOnlyViewerWrapperPass::ID = 0; +INITIALIZE_PASS(DomOnlyViewerWrapperPass, "view-dom-only", "View dominance tree of function (with no function bodies)", false, false) -char PostDomViewer::ID = 0; -INITIALIZE_PASS(PostDomViewer, "view-postdom", +char PostDomViewerWrapperPass::ID = 0; +INITIALIZE_PASS(PostDomViewerWrapperPass, "view-postdom", "View postdominance tree of function", false, false) -char PostDomOnlyViewer::ID = 0; -INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only", +char PostDomOnlyViewerWrapperPass::ID = 0; +INITIALIZE_PASS(PostDomOnlyViewerWrapperPass, "view-postdom-only", "View postdominance tree of function " "(with no function bodies)", false, false) namespace { -struct DomPrinter : public DOTGraphTraitsPrinter< - DominatorTreeWrapperPass, false, DominatorTree *, - DominatorTreeWrapperPassAnalysisGraphTraits> { +struct DomPrinterWrapperPass + : public DOTGraphTraitsPrinterWrapperPass< + DominatorTreeWrapperPass, false, DominatorTree *, + LegacyDominatorTreeWrapperPassAnalysisGraphTraits> { static char ID; - DomPrinter() - : DOTGraphTraitsPrinter( - "dom", ID) { - initializeDomPrinterPass(*PassRegistry::getPassRegistry()); + DomPrinterWrapperPass() + : DOTGraphTraitsPrinterWrapperPass< + DominatorTreeWrapperPass, false, DominatorTree *, + LegacyDominatorTreeWrapperPassAnalysisGraphTraits>("dom", ID) { + initializeDomPrinterWrapperPassPass(*PassRegistry::getPassRegistry()); } }; -struct DomOnlyPrinter : public DOTGraphTraitsPrinter< - DominatorTreeWrapperPass, true, DominatorTree *, - DominatorTreeWrapperPassAnalysisGraphTraits> { +struct DomOnlyPrinterWrapperPass + : public DOTGraphTraitsPrinterWrapperPass< + DominatorTreeWrapperPass, true, DominatorTree *, + LegacyDominatorTreeWrapperPassAnalysisGraphTraits> { static char ID; - DomOnlyPrinter() - : DOTGraphTraitsPrinter( - "domonly", ID) { - initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry()); + DomOnlyPrinterWrapperPass() + : DOTGraphTraitsPrinterWrapperPass< + DominatorTreeWrapperPass, true, DominatorTree *, + LegacyDominatorTreeWrapperPassAnalysisGraphTraits>("domonly", ID) { + initializeDomOnlyPrinterWrapperPassPass(*PassRegistry::getPassRegistry()); } }; -struct PostDomPrinter - : public DOTGraphTraitsPrinter< - PostDominatorTreeWrapperPass, false, - PostDominatorTree *, - PostDominatorTreeWrapperPassAnalysisGraphTraits> { +struct PostDomPrinterWrapperPass + : public DOTGraphTraitsPrinterWrapperPass< + PostDominatorTreeWrapperPass, false, PostDominatorTree *, + LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits> { static char ID; - PostDomPrinter() : - DOTGraphTraitsPrinter( - "postdom", ID) { - initializePostDomPrinterPass(*PassRegistry::getPassRegistry()); - } + PostDomPrinterWrapperPass() + : DOTGraphTraitsPrinterWrapperPass< + PostDominatorTreeWrapperPass, false, PostDominatorTree *, + LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits>("postdom", + ID) { + initializePostDomPrinterWrapperPassPass(*PassRegistry::getPassRegistry()); + } }; -struct PostDomOnlyPrinter - : public DOTGraphTraitsPrinter< - PostDominatorTreeWrapperPass, true, - PostDominatorTree *, - PostDominatorTreeWrapperPassAnalysisGraphTraits> { +struct PostDomOnlyPrinterWrapperPass + : public DOTGraphTraitsPrinterWrapperPass< + PostDominatorTreeWrapperPass, true, PostDominatorTree *, + LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits> { static char ID; - PostDomOnlyPrinter() : - DOTGraphTraitsPrinter( - "postdomonly", ID) { - initializePostDomOnlyPrinterPass(*PassRegistry::getPassRegistry()); - } + PostDomOnlyPrinterWrapperPass() + : DOTGraphTraitsPrinterWrapperPass< + PostDominatorTreeWrapperPass, true, PostDominatorTree *, + LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits>( + "postdomonly", ID) { + initializePostDomOnlyPrinterWrapperPassPass( + *PassRegistry::getPassRegistry()); + } }; } // end anonymous namespace +char DomPrinterWrapperPass::ID = 0; +INITIALIZE_PASS(DomPrinterWrapperPass, "dot-dom", + "Print dominance tree of function to 'dot' file", false, false) - -char DomPrinter::ID = 0; -INITIALIZE_PASS(DomPrinter, "dot-dom", - "Print dominance tree of function to 'dot' file", - false, false) - -char DomOnlyPrinter::ID = 0; -INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only", +char DomOnlyPrinterWrapperPass::ID = 0; +INITIALIZE_PASS(DomOnlyPrinterWrapperPass, "dot-dom-only", "Print dominance tree of function to 'dot' file " "(with no function bodies)", false, false) -char PostDomPrinter::ID = 0; -INITIALIZE_PASS(PostDomPrinter, "dot-postdom", - "Print postdominance tree of function to 'dot' file", - false, false) +char PostDomPrinterWrapperPass::ID = 0; +INITIALIZE_PASS(PostDomPrinterWrapperPass, "dot-postdom", + "Print postdominance tree of function to 'dot' file", false, + false) -char PostDomOnlyPrinter::ID = 0; -INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only", +char PostDomOnlyPrinterWrapperPass::ID = 0; +INITIALIZE_PASS(PostDomOnlyPrinterWrapperPass, "dot-postdom-only", "Print postdominance tree of function to 'dot' file " "(with no function bodies)", false, false) @@ -278,34 +211,34 @@ INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only", // "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by // the link time optimization. -FunctionPass *llvm::createDomPrinterPass() { - return new DomPrinter(); +FunctionPass *llvm::createDomPrinterWrapperPassPass() { + return new DomPrinterWrapperPass(); } -FunctionPass *llvm::createDomOnlyPrinterPass() { - return new DomOnlyPrinter(); +FunctionPass *llvm::createDomOnlyPrinterWrapperPassPass() { + return new DomOnlyPrinterWrapperPass(); } -FunctionPass *llvm::createDomViewerPass() { - return new DomViewer(); +FunctionPass *llvm::createDomViewerWrapperPassPass() { + return new DomViewerWrapperPass(); } -FunctionPass *llvm::createDomOnlyViewerPass() { - return new DomOnlyViewer(); +FunctionPass *llvm::createDomOnlyViewerWrapperPassPass() { + return new DomOnlyViewerWrapperPass(); } -FunctionPass *llvm::createPostDomPrinterPass() { - return new PostDomPrinter(); +FunctionPass *llvm::createPostDomPrinterWrapperPassPass() { + return new PostDomPrinterWrapperPass(); } -FunctionPass *llvm::createPostDomOnlyPrinterPass() { - return new PostDomOnlyPrinter(); +FunctionPass *llvm::createPostDomOnlyPrinterWrapperPassPass() { + return new PostDomOnlyPrinterWrapperPass(); } -FunctionPass *llvm::createPostDomViewerPass() { - return new PostDomViewer(); +FunctionPass *llvm::createPostDomViewerWrapperPassPass() { + return new PostDomViewerWrapperPass(); } -FunctionPass *llvm::createPostDomOnlyViewerPass() { - return new PostDomOnlyViewer(); +FunctionPass *llvm::createPostDomOnlyViewerWrapperPassPass() { + return new PostDomOnlyViewerWrapperPass(); } diff --git a/llvm/lib/Analysis/DomTreeUpdater.cpp b/llvm/lib/Analysis/DomTreeUpdater.cpp index 6e299263e66d..888c16723208 100644 --- a/llvm/lib/Analysis/DomTreeUpdater.cpp +++ b/llvm/lib/Analysis/DomTreeUpdater.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/Support/GenericDomTree.h" #include @@ -314,98 +315,6 @@ PostDominatorTree &DomTreeUpdater::getPostDomTree() { return *PDT; } -void DomTreeUpdater::insertEdge(BasicBlock *From, BasicBlock *To) { - -#ifndef NDEBUG - assert(isUpdateValid({DominatorTree::Insert, From, To}) && - "Inserted edge does not appear in the CFG"); -#endif - - if (!DT && !PDT) - return; - - // Won't affect DomTree and PostDomTree; discard update. - if (From == To) - return; - - if (Strategy == UpdateStrategy::Eager) { - if (DT) - DT->insertEdge(From, To); - if (PDT) - PDT->insertEdge(From, To); - return; - } - - PendUpdates.push_back({DominatorTree::Insert, From, To}); -} - -void DomTreeUpdater::insertEdgeRelaxed(BasicBlock *From, BasicBlock *To) { - if (From == To) - return; - - if (!DT && !PDT) - return; - - if (!isUpdateValid({DominatorTree::Insert, From, To})) - return; - - if (Strategy == UpdateStrategy::Eager) { - if (DT) - DT->insertEdge(From, To); - if (PDT) - PDT->insertEdge(From, To); - return; - } - - PendUpdates.push_back({DominatorTree::Insert, From, To}); -} - -void DomTreeUpdater::deleteEdge(BasicBlock *From, BasicBlock *To) { - -#ifndef NDEBUG - assert(isUpdateValid({DominatorTree::Delete, From, To}) && - "Deleted edge still exists in the CFG!"); -#endif - - if (!DT && !PDT) - return; - - // Won't affect DomTree and PostDomTree; discard update. - if (From == To) - return; - - if (Strategy == UpdateStrategy::Eager) { - if (DT) - DT->deleteEdge(From, To); - if (PDT) - PDT->deleteEdge(From, To); - return; - } - - PendUpdates.push_back({DominatorTree::Delete, From, To}); -} - -void DomTreeUpdater::deleteEdgeRelaxed(BasicBlock *From, BasicBlock *To) { - if (From == To) - return; - - if (!DT && !PDT) - return; - - if (!isUpdateValid({DominatorTree::Delete, From, To})) - return; - - if (Strategy == UpdateStrategy::Eager) { - if (DT) - DT->deleteEdge(From, To); - if (PDT) - PDT->deleteEdge(From, To); - return; - } - - PendUpdates.push_back({DominatorTree::Delete, From, To}); -} - void DomTreeUpdater::dropOutOfDateUpdates() { if (Strategy == DomTreeUpdater::UpdateStrategy::Eager) return; diff --git a/llvm/lib/Analysis/DominanceFrontier.cpp b/llvm/lib/Analysis/DominanceFrontier.cpp index a8806fe5a480..ccba913ccfe5 100644 --- a/llvm/lib/Analysis/DominanceFrontier.cpp +++ b/llvm/lib/Analysis/DominanceFrontier.cpp @@ -15,7 +15,6 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/Analysis/EHPersonalities.cpp b/llvm/lib/Analysis/EHPersonalities.cpp index df8b7e12e8d7..277ff6ba735f 100644 --- a/llvm/lib/Analysis/EHPersonalities.cpp +++ b/llvm/lib/Analysis/EHPersonalities.cpp @@ -8,6 +8,7 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -67,7 +68,10 @@ StringRef llvm::getEHPersonalityName(EHPersonality Pers) { } EHPersonality llvm::getDefaultEHPersonality(const Triple &T) { - return EHPersonality::GNU_C; + if (T.isPS5()) + return EHPersonality::GNU_CXX; + else + return EHPersonality::GNU_C; } bool llvm::canSimplifyInvokeNoUnwind(const Function *F) { diff --git a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp index 33519038e225..782c11937507 100644 --- a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp +++ b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp @@ -12,48 +12,87 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/FunctionPropertiesAnalysis.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" +#include using namespace llvm; -FunctionPropertiesInfo -FunctionPropertiesInfo::getFunctionPropertiesInfo(const Function &F, - const LoopInfo &LI) { - - FunctionPropertiesInfo FPI; +namespace { +int64_t getNrBlocksFromCond(const BasicBlock &BB) { + int64_t Ret = 0; + if (const auto *BI = dyn_cast(BB.getTerminator())) { + if (BI->isConditional()) + Ret += BI->getNumSuccessors(); + } else if (const auto *SI = dyn_cast(BB.getTerminator())) { + Ret += (SI->getNumCases() + (nullptr != SI->getDefaultDest())); + } + return Ret; +} - FPI.Uses = ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses(); +int64_t getUses(const Function &F) { + return ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses(); +} +} // namespace - for (const auto &BB : F) { - ++FPI.BasicBlockCount; +void FunctionPropertiesInfo::reIncludeBB(const BasicBlock &BB) { + updateForBB(BB, +1); +} - if (const auto *BI = dyn_cast(BB.getTerminator())) { - if (BI->isConditional()) - FPI.BlocksReachedFromConditionalInstruction += BI->getNumSuccessors(); - } else if (const auto *SI = dyn_cast(BB.getTerminator())) { - FPI.BlocksReachedFromConditionalInstruction += - (SI->getNumCases() + (nullptr != SI->getDefaultDest())); +void FunctionPropertiesInfo::updateForBB(const BasicBlock &BB, + int64_t Direction) { + assert(Direction == 1 || Direction == -1); + BasicBlockCount += Direction; + BlocksReachedFromConditionalInstruction += + (Direction * getNrBlocksFromCond(BB)); + for (const auto &I : BB) { + if (auto *CS = dyn_cast(&I)) { + const auto *Callee = CS->getCalledFunction(); + if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration()) + DirectCallsToDefinedFunctions += Direction; } - - for (const auto &I : BB) { - if (auto *CS = dyn_cast(&I)) { - const auto *Callee = CS->getCalledFunction(); - if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration()) - ++FPI.DirectCallsToDefinedFunctions; - } - if (I.getOpcode() == Instruction::Load) { - ++FPI.LoadInstCount; - } else if (I.getOpcode() == Instruction::Store) { - ++FPI.StoreInstCount; - } + if (I.getOpcode() == Instruction::Load) { + LoadInstCount += Direction; + } else if (I.getOpcode() == Instruction::Store) { + StoreInstCount += Direction; } - // Loop Depth of the Basic Block - int64_t LoopDepth; - LoopDepth = LI.getLoopDepth(&BB); - if (FPI.MaxLoopDepth < LoopDepth) - FPI.MaxLoopDepth = LoopDepth; } - FPI.TopLevelLoopCount += llvm::size(LI); + TotalInstructionCount += Direction * BB.sizeWithoutDebug(); +} + +void FunctionPropertiesInfo::updateAggregateStats(const Function &F, + const LoopInfo &LI) { + + Uses = getUses(F); + TopLevelLoopCount = llvm::size(LI); + MaxLoopDepth = 0; + std::deque Worklist; + llvm::append_range(Worklist, LI); + while (!Worklist.empty()) { + const auto *L = Worklist.front(); + MaxLoopDepth = + std::max(MaxLoopDepth, static_cast(L->getLoopDepth())); + Worklist.pop_front(); + llvm::append_range(Worklist, L->getSubLoops()); + } +} + +FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo( + const Function &F, FunctionAnalysisManager &FAM) { + + FunctionPropertiesInfo FPI; + // The const casts are due to the getResult API - there's no mutation of F. + const auto &LI = FAM.getResult(const_cast(F)); + const auto &DT = + FAM.getResult(const_cast(F)); + for (const auto &BB : F) + if (DT.isReachableFromEntry(&BB)) + FPI.reIncludeBB(BB); + FPI.updateAggregateStats(F, LI); return FPI; } @@ -67,15 +106,15 @@ void FunctionPropertiesInfo::print(raw_ostream &OS) const { << "LoadInstCount: " << LoadInstCount << "\n" << "StoreInstCount: " << StoreInstCount << "\n" << "MaxLoopDepth: " << MaxLoopDepth << "\n" - << "TopLevelLoopCount: " << TopLevelLoopCount << "\n\n"; + << "TopLevelLoopCount: " << TopLevelLoopCount << "\n" + << "TotalInstructionCount: " << TotalInstructionCount << "\n\n"; } AnalysisKey FunctionPropertiesAnalysis::Key; FunctionPropertiesInfo FunctionPropertiesAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { - return FunctionPropertiesInfo::getFunctionPropertiesInfo( - F, FAM.getResult(F)); + return FunctionPropertiesInfo::getFunctionPropertiesInfo(F, FAM); } PreservedAnalyses @@ -86,3 +125,127 @@ FunctionPropertiesPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { AM.getResult(F).print(OS); return PreservedAnalyses::all(); } + +FunctionPropertiesUpdater::FunctionPropertiesUpdater( + FunctionPropertiesInfo &FPI, const CallBase &CB) + : FPI(FPI), CallSiteBB(*CB.getParent()), Caller(*CallSiteBB.getParent()) { + assert(isa(CB) || isa(CB)); + // For BBs that are likely to change, we subtract from feature totals their + // contribution. Some features, like max loop counts or depths, are left + // invalid, as they will be updated post-inlining. + SmallPtrSet LikelyToChangeBBs; + // The CB BB will change - it'll either be split or the callee's body (single + // BB) will be pasted in. + LikelyToChangeBBs.insert(&CallSiteBB); + + // The caller's entry BB may change due to new alloca instructions. + LikelyToChangeBBs.insert(&*Caller.begin()); + + // The successors may become unreachable in the case of `invoke` inlining. + // We track successors separately, too, because they form a boundary, together + // with the CB BB ('Entry') between which the inlined callee will be pasted. + Successors.insert(succ_begin(&CallSiteBB), succ_end(&CallSiteBB)); + + // Inlining only handles invoke and calls. If this is an invoke, and inlining + // it pulls another invoke, the original landing pad may get split, so as to + // share its content with other potential users. So the edge up to which we + // need to invalidate and then re-account BB data is the successors of the + // current landing pad. We can leave the current lp, too - if it doesn't get + // split, then it will be the place traversal stops. Either way, the + // discounted BBs will be checked if reachable and re-added. + if (const auto *II = dyn_cast(&CB)) { + const auto *UnwindDest = II->getUnwindDest(); + Successors.insert(succ_begin(UnwindDest), succ_end(UnwindDest)); + } + + // Exclude the CallSiteBB, if it happens to be its own successor (1-BB loop). + // We are only interested in BBs the graph moves past the callsite BB to + // define the frontier past which we don't want to re-process BBs. Including + // the callsite BB in this case would prematurely stop the traversal in + // finish(). + Successors.erase(&CallSiteBB); + + for (const auto *BB : Successors) + LikelyToChangeBBs.insert(BB); + + // Commit the change. While some of the BBs accounted for above may play dual + // role - e.g. caller's entry BB may be the same as the callsite BB - set + // insertion semantics make sure we account them once. This needs to be + // followed in `finish`, too. + for (const auto *BB : LikelyToChangeBBs) + FPI.updateForBB(*BB, -1); +} + +void FunctionPropertiesUpdater::finish(FunctionAnalysisManager &FAM) const { + // Update feature values from the BBs that were copied from the callee, or + // might have been modified because of inlining. The latter have been + // subtracted in the FunctionPropertiesUpdater ctor. + // There could be successors that were reached before but now are only + // reachable from elsewhere in the CFG. + // One example is the following diamond CFG (lines are arrows pointing down): + // A + // / \ + // B C + // | | + // | D + // | | + // | E + // \ / + // F + // There's a call site in C that is inlined. Upon doing that, it turns out + // it expands to + // call void @llvm.trap() + // unreachable + // F isn't reachable from C anymore, but we did discount it when we set up + // FunctionPropertiesUpdater, so we need to re-include it here. + // At the same time, D and E were reachable before, but now are not anymore, + // so we need to leave D out (we discounted it at setup), and explicitly + // remove E. + SetVector Reinclude; + SetVector Unreachable; + const auto &DT = + FAM.getResult(const_cast(Caller)); + + if (&CallSiteBB != &*Caller.begin()) + Reinclude.insert(&*Caller.begin()); + + // Distribute the successors to the 2 buckets. + for (const auto *Succ : Successors) + if (DT.isReachableFromEntry(Succ)) + Reinclude.insert(Succ); + else + Unreachable.insert(Succ); + + // For reinclusion, we want to stop at the reachable successors, who are at + // the beginning of the worklist; but, starting from the callsite bb and + // ending at those successors, we also want to perform a traversal. + // IncludeSuccessorsMark is the index after which we include successors. + const auto IncludeSuccessorsMark = Reinclude.size(); + bool CSInsertion = Reinclude.insert(&CallSiteBB); + (void)CSInsertion; + assert(CSInsertion); + for (size_t I = 0; I < Reinclude.size(); ++I) { + const auto *BB = Reinclude[I]; + FPI.reIncludeBB(*BB); + if (I >= IncludeSuccessorsMark) + Reinclude.insert(succ_begin(BB), succ_end(BB)); + } + + // For exclusion, we don't need to exclude the set of BBs that were successors + // before and are now unreachable, because we already did that at setup. For + // the rest, as long as a successor is unreachable, we want to explicitly + // exclude it. + const auto AlreadyExcludedMark = Unreachable.size(); + for (size_t I = 0; I < Unreachable.size(); ++I) { + const auto *U = Unreachable[I]; + if (I >= AlreadyExcludedMark) + FPI.updateForBB(*U, -1); + for (const auto *Succ : successors(U)) + if (!DT.isReachableFromEntry(Succ)) + Unreachable.insert(Succ); + } + + const auto &LI = FAM.getResult(const_cast(Caller)); + FPI.updateAggregateStats(Caller, LI); + assert(FPI == FunctionPropertiesInfo::getFunctionPropertiesInfo(Caller, FAM)); +} diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp index 6869530148c5..e82d2fae9356 100644 --- a/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/llvm/lib/Analysis/GlobalsModRef.cpp @@ -21,11 +21,11 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -68,8 +68,8 @@ class GlobalsAAResult::FunctionInfo { /// should provide this much alignment at least, but this makes it clear we /// specifically rely on this amount of alignment. struct alignas(8) AlignedMap { - AlignedMap() {} - AlignedMap(const AlignedMap &Arg) : Map(Arg.Map) {} + AlignedMap() = default; + AlignedMap(const AlignedMap &Arg) = default; GlobalInfoMapType Map; }; @@ -102,7 +102,7 @@ class GlobalsAAResult::FunctionInfo { "Insufficient low bits to store our flag and ModRef info."); public: - FunctionInfo() {} + FunctionInfo() = default; ~FunctionInfo() { delete Info.getPointer(); } @@ -511,6 +511,18 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { Handles.front().I = Handles.begin(); bool KnowNothing = false; + // Intrinsics, like any other synchronizing function, can make effects + // of other threads visible. Without nosync we know nothing really. + // Similarly, if `nocallback` is missing the function, or intrinsic, + // can call into the module arbitrarily. If both are set the function + // has an effect but will not interact with accesses of internal + // globals inside the module. We are conservative here for optnone + // functions, might not be necessary. + auto MaySyncOrCallIntoModule = [](const Function &F) { + return !F.isDeclaration() || !F.hasNoSync() || + !F.hasFnAttribute(Attribute::NoCallback); + }; + // Collect the mod/ref properties due to called functions. We only compute // one mod-ref set. for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { @@ -525,7 +537,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { // Can't do better than that! } else if (F->onlyReadsMemory()) { FI.addModRefInfo(ModRefInfo::Ref); - if (!F->isIntrinsic() && !F->onlyAccessesArgMemory()) + if (!F->onlyAccessesArgMemory() && MaySyncOrCallIntoModule(*F)) // This function might call back into the module and read a global - // consider every global as possibly being read by this function. FI.setMayReadAnyGlobal(); @@ -533,7 +545,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { FI.addModRefInfo(ModRefInfo::ModRef); if (!F->onlyAccessesArgMemory()) FI.setMayReadAnyGlobal(); - if (!F->isIntrinsic()) { + if (MaySyncOrCallIntoModule(*F)) { KnowNothing = true; break; } @@ -585,12 +597,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { // We handle calls specially because the graph-relevant aspects are // handled above. if (auto *Call = dyn_cast(&I)) { - auto &TLI = GetTLI(*Node->getFunction()); - if (isAllocationFn(Call, &TLI) || isFreeCall(Call, &TLI)) { - // FIXME: It is completely unclear why this is necessary and not - // handled by the above graph code. - FI.addModRefInfo(ModRefInfo::ModRef); - } else if (Function *Callee = Call->getCalledFunction()) { + if (Function *Callee = Call->getCalledFunction()) { // The callgraph doesn't include intrinsic calls. if (Callee->isIntrinsic()) { if (isa(Call)) @@ -979,7 +986,7 @@ GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg) } } -GlobalsAAResult::~GlobalsAAResult() {} +GlobalsAAResult::~GlobalsAAResult() = default; /*static*/ GlobalsAAResult GlobalsAAResult::analyzeModule( Module &M, std::function GetTLI, @@ -1010,6 +1017,24 @@ GlobalsAAResult GlobalsAA::run(Module &M, ModuleAnalysisManager &AM) { AM.getResult(M)); } +PreservedAnalyses RecomputeGlobalsAAPass::run(Module &M, + ModuleAnalysisManager &AM) { + if (auto *G = AM.getCachedResult(M)) { + auto &CG = AM.getResult(M); + G->NonAddressTakenGlobals.clear(); + G->UnknownFunctionsWithLocalLinkage = false; + G->IndirectGlobals.clear(); + G->AllocsForIndirectGlobals.clear(); + G->FunctionInfos.clear(); + G->FunctionToSCCMap.clear(); + G->Handles.clear(); + G->CollectSCCMembership(CG); + G->AnalyzeGlobals(M); + G->AnalyzeCallGraph(CG, M); + } + return PreservedAnalyses::all(); +} + char GlobalsAAWrapperPass::ID = 0; INITIALIZE_PASS_BEGIN(GlobalsAAWrapperPass, "globals-aa", "Globals Alias Analysis", false, true) diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index 01681c47418a..3d51042f4da8 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -64,7 +64,7 @@ void IRInstructionData::initializeInstruction() { // Here we collect the operands and their types for determining whether // the structure of the operand use matches between two different candidates. for (Use &OI : Inst->operands()) { - if (isa(Inst) && RevisedPredicate.hasValue()) { + if (isa(Inst) && RevisedPredicate) { // If we have a CmpInst where the predicate is reversed, it means the // operands must be reversed as well. OperVals.insert(OperVals.begin(), OI.get()); @@ -183,7 +183,7 @@ CmpInst::Predicate IRInstructionData::getPredicate() const { assert(isa(Inst) && "Can only get a predicate from a compare instruction"); - if (RevisedPredicate.hasValue()) + if (RevisedPredicate) return RevisedPredicate.getValue(); return cast(Inst)->getPredicate(); @@ -193,7 +193,7 @@ StringRef IRInstructionData::getCalleeName() const { assert(isa(Inst) && "Can only get a name from a call instruction"); - assert(CalleeName.hasValue() && "CalleeName has not been set"); + assert(CalleeName && "CalleeName has not been set"); return *CalleeName; } @@ -289,14 +289,12 @@ void IRInstructionMapper::convertToUnsignedVec( } } - if (HaveLegalRange) { - if (AddedIllegalLastTime) - mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); - for (IRInstructionData *ID : InstrListForBB) - this->IDL->push_back(*ID); - llvm::append_range(InstrList, InstrListForBB); - llvm::append_range(IntegerMapping, IntegerMappingForBB); - } + if (AddedIllegalLastTime) + mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); + for (IRInstructionData *ID : InstrListForBB) + this->IDL->push_back(*ID); + llvm::append_range(InstrList, InstrListForBB); + llvm::append_range(IntegerMapping, IntegerMappingForBB); } // TODO: This is the same as the MachineOutliner, and should be consolidated @@ -461,6 +459,18 @@ IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, // that both of these instructions are not nullptrs. FirstInst = FirstInstIt; LastInst = LastInstIt; + + // Add the basic blocks contained in the set into the global value numbering. + DenseSet BBSet; + getBasicBlocks(BBSet); + for (BasicBlock *BB : BBSet) { + if (ValueToNumber.find(BB) != ValueToNumber.end()) + continue; + + ValueToNumber.try_emplace(BB, LocalValNumber); + NumberToValue.try_emplace(LocalValNumber, BB); + LocalValNumber++; + } } bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, @@ -516,19 +526,13 @@ static bool checkNumberingAndReplaceCommutative( for (Value *V : SourceOperands) { ArgVal = SourceValueToNumberMapping.find(V)->second; + // Instead of finding a current mapping, we attempt to insert a set. std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( std::make_pair(ArgVal, TargetValueNumbers)); - // Instead of finding a current mapping, we inserted a set. This means a - // mapping did not exist for the source Instruction operand, it has no - // current constraints we need to check. - if (WasInserted) - continue; - - // If a mapping already exists for the source operand to the values in the - // other IRSimilarityCandidate we need to iterate over the items in other - // IRSimilarityCandidate's Instruction to determine whether there is a valid - // mapping of Value to Value. + // We need to iterate over the items in other IRSimilarityCandidate's + // Instruction to determine whether there is a valid mapping of + // Value to Value. DenseSet NewSet; for (unsigned &Curr : ValueMappingIt->second) // If we can find the value in the mapping, we add it to the new set. @@ -548,7 +552,6 @@ static bool checkNumberingAndReplaceCommutative( if (ValueMappingIt->second.size() != 1) continue; - unsigned ValToRemove = *ValueMappingIt->second.begin(); // When there is only one item left in the mapping for and operand, remove // the value from the other operands. If it results in there being no @@ -791,7 +794,8 @@ bool IRSimilarityCandidate::compareStructure( // We have different paths for commutative instructions and non-commutative // instructions since commutative instructions could allow multiple mappings // to certain values. - if (IA->isCommutative() && !isa(IA)) { + if (IA->isCommutative() && !isa(IA) && + !isa(IA)) { if (!compareCommutativeOperandMapping( {A, OperValsA, ValueNumberMappingA}, {B, OperValsB, ValueNumberMappingB})) @@ -1008,6 +1012,40 @@ void IRSimilarityCandidate::createCanonicalRelationFrom( CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN)); NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum)); } + + DenseSet BBSet; + getBasicBlocks(BBSet); + // Find canonical numbers for the BasicBlocks in the current candidate. + // This is done by finding the corresponding value for the first instruction + // in the block in the current candidate, finding the matching value in the + // source candidate. Then by finding the parent of this value, use the + // canonical number of the block in the source candidate for the canonical + // number in the current candidate. + for (BasicBlock *BB : BBSet) { + unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second; + + // We can skip the BasicBlock if the canonical numbering has already been + // found in a separate instruction. + if (NumberToCanonNum.find(BBGVNForCurrCand) != NumberToCanonNum.end()) + continue; + + // If the basic block is the starting block, then the shared instruction may + // not be the first instruction in the block, it will be the first + // instruction in the similarity region. + Value *FirstOutlineInst = BB == getStartBB() + ? frontInstruction() + : &*BB->instructionsWithoutDebug().begin(); + + unsigned FirstInstGVN = *getGVN(FirstOutlineInst); + unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN); + unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum); + Value *SourceV = *SourceCand.fromGVN(SourceGVN); + BasicBlock *SourceBB = cast(SourceV)->getParent(); + unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB); + unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN); + CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand)); + NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN)); + } } void IRSimilarityCandidate::createCanonicalMappingFor( @@ -1162,11 +1200,12 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; + Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; populateMapper(Modules, InstrList, IntegerMapping); findCandidates(InstrList, IntegerMapping); - return SimilarityCandidates.getValue(); + return *SimilarityCandidates; } SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { @@ -1175,6 +1214,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; + Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; std::vector InstrList; std::vector IntegerMapping; @@ -1182,7 +1222,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { populateMapper(M, InstrList, IntegerMapping); findCandidates(InstrList, IntegerMapping); - return SimilarityCandidates.getValue(); + return *SimilarityCandidates; } INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", @@ -1196,7 +1236,8 @@ IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, - MatchCallsByName, !DisableIntrinsics)); + MatchCallsByName, !DisableIntrinsics, + false)); return false; } @@ -1214,7 +1255,8 @@ AnalysisKey IRSimilarityAnalysis::Key; IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, ModuleAnalysisManager &) { auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, - MatchCallsByName, !DisableIntrinsics); + MatchCallsByName, !DisableIntrinsics, + false); IRSI.findSimilarity(M); return IRSI; } diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 44b1d94ebdc8..e4d706ab045c 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -11,26 +11,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/IVDescriptors.h" -#include "llvm/ADT/ScopeExit.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/DemandedBits.h" -#include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" @@ -237,12 +227,10 @@ static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, return true; } -bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, - Loop *TheLoop, FastMathFlags FuncFMF, - RecurrenceDescriptor &RedDes, - DemandedBits *DB, - AssumptionCache *AC, - DominatorTree *DT) { +bool RecurrenceDescriptor::AddReductionVar( + PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF, + RecurrenceDescriptor &RedDes, DemandedBits *DB, AssumptionCache *AC, + DominatorTree *DT, ScalarEvolution *SE) { if (Phi->getNumIncomingValues() != 2) return false; @@ -259,6 +247,12 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // This includes users of the reduction, variables (which form a cycle // which ends in the phi node). Instruction *ExitInstruction = nullptr; + + // Variable to keep last visited store instruction. By the end of the + // algorithm this variable will be either empty or having intermediate + // reduction value stored in invariant address. + StoreInst *IntermediateStore = nullptr; + // Indicates that we found a reduction operation in our scan. bool FoundReduxOp = false; @@ -324,6 +318,10 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // - By instructions outside of the loop (safe). // * One value may have several outside users, but all outside // uses must be of the same value. + // - By store instructions with a loop invariant address (safe with + // the following restrictions): + // * If there are several stores, all must have the same address. + // * Final value should be stored in that loop invariant address. // - By an instruction that is not part of the reduction (not safe). // This is either: // * An instruction type other than PHI or the reduction operation. @@ -331,6 +329,43 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, while (!Worklist.empty()) { Instruction *Cur = Worklist.pop_back_val(); + // Store instructions are allowed iff it is the store of the reduction + // value to the same loop invariant memory location. + if (auto *SI = dyn_cast(Cur)) { + if (!SE) { + LLVM_DEBUG(dbgs() << "Store instructions are not processed without " + << "Scalar Evolution Analysis\n"); + return false; + } + + const SCEV *PtrScev = SE->getSCEV(SI->getPointerOperand()); + // Check it is the same address as previous stores + if (IntermediateStore) { + const SCEV *OtherScev = + SE->getSCEV(IntermediateStore->getPointerOperand()); + + if (OtherScev != PtrScev) { + LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses " + << "inside the loop: " << *SI->getPointerOperand() + << " and " + << *IntermediateStore->getPointerOperand() << '\n'); + return false; + } + } + + // Check the pointer is loop invariant + if (!SE->isLoopInvariant(PtrScev, TheLoop)) { + LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address " + << "inside the loop: " << *SI->getPointerOperand() + << '\n'); + return false; + } + + // IntermediateStore is always the last store in the loop. + IntermediateStore = SI; + continue; + } + // No Users. // If the instruction has no users then this is a broken chain and can't be // a reduction variable. @@ -453,10 +488,17 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // reductions which are represented as a cmp followed by a select. InstDesc IgnoredVal(false, nullptr); if (VisitedInsts.insert(UI).second) { - if (isa(UI)) + if (isa(UI)) { PHIs.push_back(UI); - else + } else { + StoreInst *SI = dyn_cast(UI); + if (SI && SI->getPointerOperand() == Cur) { + // Reduction variable chain can only be stored somewhere but it + // can't be used as an address. + return false; + } NonPHIs.push_back(UI); + } } else if (!isa(UI) && ((!isa(UI) && !isa(UI) && !isa(UI)) || @@ -476,7 +518,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // This means we have seen one but not the other instruction of the // pattern or more than just a select and cmp. Zero implies that we saw a - // llvm.min/max instrinsic, which is always OK. + // llvm.min/max intrinsic, which is always OK. if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 && NumCmpSelectPatternInst != 0) return false; @@ -484,6 +526,32 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, if (isSelectCmpRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1) return false; + if (IntermediateStore) { + // Check that stored value goes to the phi node again. This way we make sure + // that the value stored in IntermediateStore is indeed the final reduction + // value. + if (!is_contained(Phi->operands(), IntermediateStore->getValueOperand())) { + LLVM_DEBUG(dbgs() << "Not a final reduction value stored: " + << *IntermediateStore << '\n'); + return false; + } + + // If there is an exit instruction it's value should be stored in + // IntermediateStore + if (ExitInstruction && + IntermediateStore->getValueOperand() != ExitInstruction) { + LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not " + "store last calculated value of the reduction: " + << *IntermediateStore << '\n'); + return false; + } + + // If all uses are inside the loop (intermediate stores), then the + // reduction value after the loop will be the one used in the last store. + if (!ExitInstruction) + ExitInstruction = cast(IntermediateStore->getValueOperand()); + } + if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) return false; @@ -545,9 +613,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // is saved as part of the RecurrenceDescriptor. // Save the description of this reduction variable. - RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, ExactFPMathInst, - RecurrenceType, IsSigned, IsOrdered, CastInsts, - MinWidthCastToRecurrenceType); + RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind, + FMF, ExactFPMathInst, RecurrenceType, IsSigned, + IsOrdered, CastInsts, MinWidthCastToRecurrenceType); RedDes = RD; return true; @@ -771,7 +839,8 @@ bool RecurrenceDescriptor::hasMultipleUsesOf( bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB, AssumptionCache *AC, - DominatorTree *DT) { + DominatorTree *DT, + ScalarEvolution *SE) { BasicBlock *Header = TheLoop->getHeader(); Function &F = *Header->getParent(); FastMathFlags FMF; @@ -780,72 +849,85 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, FMF.setNoSignedZeros( F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool()); - if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n"); return true; } if (AddReductionVar(Phi, RecurKind::SelectICmp, TheLoop, FMF, RedDes, DB, AC, - DT)) { + DT, SE)) { LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT)) { + if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n"); return true; } if (AddReductionVar(Phi, RecurKind::SelectFCmp, TheLoop, FMF, RedDes, DB, AC, - DT)) { + DT, SE)) { LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI." << " PHI." << *Phi << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, - DT)) { + if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n"); return true; } @@ -917,12 +999,37 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence( SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator()) return false; - // Do not try to sink an instruction multiple times (if multiple operands - // are first order recurrences). - // TODO: We can support this case, by sinking the instruction after the - // 'deepest' previous instruction. - if (SinkAfter.find(SinkCandidate) != SinkAfter.end()) - return false; + // Avoid sinking an instruction multiple times (if multiple operands are + // first order recurrences) by sinking once - after the latest 'previous' + // instruction. + auto It = SinkAfter.find(SinkCandidate); + if (It != SinkAfter.end()) { + auto *OtherPrev = It->second; + // Find the earliest entry in the 'sink-after' chain. The last entry in + // the chain is the original 'Previous' for a recurrence handled earlier. + auto EarlierIt = SinkAfter.find(OtherPrev); + while (EarlierIt != SinkAfter.end()) { + Instruction *EarlierInst = EarlierIt->second; + EarlierIt = SinkAfter.find(EarlierInst); + // Bail out if order has not been preserved. + if (EarlierIt != SinkAfter.end() && + !DT->dominates(EarlierInst, OtherPrev)) + return false; + OtherPrev = EarlierInst; + } + // Bail out if order has not been preserved. + if (OtherPrev != It->second && !DT->dominates(It->second, OtherPrev)) + return false; + + // SinkCandidate is already being sunk after an instruction after + // Previous. Nothing left to do. + if (DT->dominates(Previous, OtherPrev) || Previous == OtherPrev) + return true; + // Otherwise, Previous comes after OtherPrev and SinkCandidate needs to be + // re-sunk to Previous, instead of sinking to OtherPrev. Remove + // SinkCandidate from SinkAfter to ensure it's insert position is updated. + SinkAfter.erase(SinkCandidate); + } // If we reach a PHI node that is not dominated by Previous, we reached a // header PHI. No need for sinking. @@ -1052,7 +1159,7 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { // to check for a pair of icmp/select, for which we use getNextInstruction and // isCorrectOpcode functions to step the right number of instruction, and // check the icmp/select pair. - // FIXME: We also do not attempt to look through Phi/Select's yet, which might + // FIXME: We also do not attempt to look through Select's yet, which might // be part of the reduction chain, or attempt to looks through And's to find a // smaller bitwidth. Subs are also currently not allowed (which are usually // treated as part of a add reduction) as they are expected to generally be @@ -1062,16 +1169,21 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) ExpectedUses = 2; - auto getNextInstruction = [&](Instruction *Cur) { - if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { - // We are expecting a icmp/select pair, which we go to the next select - // instruction if we can. We already know that Cur has 2 uses. - if (isa(*Cur->user_begin())) - return cast(*Cur->user_begin()); - else - return cast(*std::next(Cur->user_begin())); + auto getNextInstruction = [&](Instruction *Cur) -> Instruction * { + for (auto User : Cur->users()) { + Instruction *UI = cast(User); + if (isa(UI)) + continue; + if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { + // We are expecting a icmp/select pair, which we go to the next select + // instruction if we can. We already know that Cur has 2 uses. + if (isa(UI)) + return UI; + continue; + } + return UI; } - return cast(*Cur->user_begin()); + return nullptr; }; auto isCorrectOpcode = [&](Instruction *Cur) { if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { @@ -1086,22 +1198,46 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { return Cur->getOpcode() == RedOp; }; + // Attempt to look through Phis which are part of the reduction chain + unsigned ExtraPhiUses = 0; + Instruction *RdxInstr = LoopExitInstr; + if (auto ExitPhi = dyn_cast(LoopExitInstr)) { + if (ExitPhi->getNumIncomingValues() != 2) + return {}; + + Instruction *Inc0 = dyn_cast(ExitPhi->getIncomingValue(0)); + Instruction *Inc1 = dyn_cast(ExitPhi->getIncomingValue(1)); + + Instruction *Chain = nullptr; + if (Inc0 == Phi) + Chain = Inc1; + else if (Inc1 == Phi) + Chain = Inc0; + else + return {}; + + RdxInstr = Chain; + ExtraPhiUses = 1; + } + // The loop exit instruction we check first (as a quick test) but add last. We // check the opcode is correct (and dont allow them to be Subs) and that they // have expected to have the expected number of uses. They will have one use // from the phi and one from a LCSSA value, no matter the type. - if (!isCorrectOpcode(LoopExitInstr) || !LoopExitInstr->hasNUses(2)) + if (!isCorrectOpcode(RdxInstr) || !LoopExitInstr->hasNUses(2)) return {}; - // Check that the Phi has one (or two for min/max) uses. - if (!Phi->hasNUses(ExpectedUses)) + // Check that the Phi has one (or two for min/max) uses, plus an extra use + // for conditional reductions. + if (!Phi->hasNUses(ExpectedUses + ExtraPhiUses)) return {}; + Instruction *Cur = getNextInstruction(Phi); // Each other instruction in the chain should have the expected number of uses // and be the correct opcode. - while (Cur != LoopExitInstr) { - if (!isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses)) + while (Cur != RdxInstr) { + if (!Cur || !isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses)) return {}; ReductionOperations.push_back(Cur); @@ -1428,10 +1564,14 @@ bool InductionDescriptor::isInductionPHI( ConstantInt *CV = ConstStep->getValue(); const DataLayout &DL = Phi->getModule()->getDataLayout(); - int64_t Size = static_cast(DL.getTypeAllocSize(ElementType)); - if (!Size) + TypeSize TySize = DL.getTypeAllocSize(ElementType); + // TODO: We could potentially support this for scalable vectors if we can + // prove at compile time that the constant step is always a multiple of + // the scalable type. + if (TySize.isZero() || TySize.isScalable()) return false; + int64_t Size = static_cast(TySize.getFixedSize()); int64_t CVSize = CV->getSExtValue(); if (CVSize % Size) return false; diff --git a/llvm/lib/Analysis/IVUsers.cpp b/llvm/lib/Analysis/IVUsers.cpp index 0f3929f45506..5bde947bd851 100644 --- a/llvm/lib/Analysis/IVUsers.cpp +++ b/llvm/lib/Analysis/IVUsers.cpp @@ -12,25 +12,21 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/IVUsers.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; #define DEBUG_TYPE "iv-users" diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp index b112ed2e4439..ebfa1c8fc08e 100644 --- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -13,12 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/IndirectCallVisitor.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Instruction.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -31,7 +26,7 @@ using namespace llvm; // The percent threshold for the direct-call target (this call site vs the // remaining call count) for it to be considered as the promotion target. static cl::opt ICPRemainingPercentThreshold( - "icp-remaining-percent-threshold", cl::init(30), cl::Hidden, cl::ZeroOrMore, + "icp-remaining-percent-threshold", cl::init(30), cl::Hidden, cl::desc("The percentage threshold against remaining unpromoted indirect " "call count for the promotion")); @@ -39,14 +34,14 @@ static cl::opt ICPRemainingPercentThreshold( // total call count) for it to be considered as the promotion target. static cl::opt ICPTotalPercentThreshold("icp-total-percent-threshold", cl::init(5), - cl::Hidden, cl::ZeroOrMore, + cl::Hidden, cl::desc("The percentage threshold against total " "count for the promotion")); // Set the maximum number of targets to promote for a single indirect-call // callsite. static cl::opt - MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore, + MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite")); diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp index f6e3dd354ff8..cf8592c41eda 100644 --- a/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -13,14 +13,15 @@ #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -55,6 +56,11 @@ static cl::opt cl::desc("Scale to limit the cost of inline deferral"), cl::init(2), cl::Hidden); +static cl::opt AnnotateInlinePhase( + "annotate-inline-phase", cl::Hidden, cl::init(false), + cl::desc("If true, annotate inline advisor remarks " + "with LTO and pass information.")); + extern cl::opt InlinerFunctionImportStats; namespace { @@ -80,7 +86,8 @@ private: void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { if (IsInliningRecommended) ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) + return OptimizationRemarkMissed(Advisor->getAnnotatedInlinePassName(), + "NotInlined", DLoc, Block) << "'" << NV("Callee", Callee) << "' is not AlwaysInline into '" << NV("Caller", Caller) << "': " << NV("Reason", Result.getFailureReason()); @@ -99,7 +106,8 @@ void DefaultInlineAdvice::recordUnsuccessfulInliningImpl( llvm::setInlineRemark(*OriginalCB, std::string(Result.getFailureReason()) + "; " + inlineCostStr(*OIC)); ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) + return OptimizationRemarkMissed(Advisor->getAnnotatedInlinePassName(), + "NotInlined", DLoc, Block) << "'" << NV("Callee", Callee) << "' is not inlined into '" << NV("Caller", Caller) << "': " << NV("Reason", Result.getFailureReason()); @@ -108,12 +116,16 @@ void DefaultInlineAdvice::recordUnsuccessfulInliningImpl( void DefaultInlineAdvice::recordInliningWithCalleeDeletedImpl() { if (EmitRemarks) - emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC); + emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC, + /* ForProfileContext= */ false, + Advisor->getAnnotatedInlinePassName()); } void DefaultInlineAdvice::recordInliningImpl() { if (EmitRemarks) - emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC); + emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC, + /* ForProfileContext= */ false, + Advisor->getAnnotatedInlinePassName()); } llvm::Optional static getDefaultInlineAdvice( @@ -146,7 +158,7 @@ llvm::Optional static getDefaultInlineAdvice( }; return llvm::shouldInline( CB, GetInlineCost, ORE, - Params.EnableDeferral.getValueOr(EnableInlineDeferral)); + Params.EnableDeferral.value_or(EnableInlineDeferral)); } std::unique_ptr @@ -185,18 +197,18 @@ AnalysisKey InlineAdvisorAnalysis::Key; bool InlineAdvisorAnalysis::Result::tryCreate( InlineParams Params, InliningAdvisorMode Mode, - const ReplayInlinerSettings &ReplaySettings) { + const ReplayInlinerSettings &ReplaySettings, InlineContext IC) { auto &FAM = MAM.getResult(M).getManager(); switch (Mode) { case InliningAdvisorMode::Default: LLVM_DEBUG(dbgs() << "Using default inliner heuristic.\n"); - Advisor.reset(new DefaultInlineAdvisor(M, FAM, Params)); + Advisor.reset(new DefaultInlineAdvisor(M, FAM, Params, IC)); // Restrict replay to default advisor, ML advisors are stateful so // replay will need augmentations to interleave with them correctly. if (!ReplaySettings.ReplayFile.empty()) { Advisor = llvm::getReplayInlineAdvisor(M, FAM, M.getContext(), std::move(Advisor), ReplaySettings, - /* EmitRemarks =*/true); + /* EmitRemarks =*/true, IC); } break; case InliningAdvisorMode::Development: @@ -442,7 +454,7 @@ std::string llvm::formatCallSiteLocation(DebugLoc DLoc, } void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) { - if (!DLoc.get()) { + if (!DLoc) { return; } @@ -499,8 +511,11 @@ void llvm::emitInlinedIntoBasedOnCost( PassName); } -InlineAdvisor::InlineAdvisor(Module &M, FunctionAnalysisManager &FAM) - : M(M), FAM(FAM) { +InlineAdvisor::InlineAdvisor(Module &M, FunctionAnalysisManager &FAM, + Optional IC) + : M(M), FAM(FAM), IC(IC), + AnnotatedInlinePassName((IC && AnnotateInlinePhase) ? llvm::AnnotateInlinePassName(*IC) + : DEBUG_TYPE) { if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { ImportedFunctionsStats = std::make_unique(); @@ -522,6 +537,48 @@ std::unique_ptr InlineAdvisor::getMandatoryAdvice(CallBase &CB, Advice); } +static inline const char *getLTOPhase(ThinOrFullLTOPhase LTOPhase) { + switch (LTOPhase) { + case (ThinOrFullLTOPhase::None): + return "main"; + case (ThinOrFullLTOPhase::ThinLTOPreLink): + case (ThinOrFullLTOPhase::FullLTOPreLink): + return "prelink"; + case (ThinOrFullLTOPhase::ThinLTOPostLink): + case (ThinOrFullLTOPhase::FullLTOPostLink): + return "postlink"; + } + llvm_unreachable("unreachable"); +} + +static inline const char *getInlineAdvisorContext(InlinePass IP) { + switch (IP) { + case (InlinePass::AlwaysInliner): + return "always-inline"; + case (InlinePass::CGSCCInliner): + return "cgscc-inline"; + case (InlinePass::EarlyInliner): + return "early-inline"; + case (InlinePass::MLInliner): + return "ml-inline"; + case (InlinePass::ModuleInliner): + return "module-inline"; + case (InlinePass::ReplayCGSCCInliner): + return "replay-cgscc-inline"; + case (InlinePass::ReplaySampleProfileInliner): + return "replay-sample-profile-inline"; + case (InlinePass::SampleProfileInliner): + return "sample-profile-inline"; + } + + llvm_unreachable("unreachable"); +} + +std::string llvm::AnnotateInlinePassName(InlineContext IC) { + return std::string(getLTOPhase(IC.LTOPhase)) + "-" + + std::string(getInlineAdvisorContext(IC.Pass)); +} + InlineAdvisor::MandatoryInliningKind InlineAdvisor::getMandatoryKind(CallBase &CB, FunctionAnalysisManager &FAM, OptimizationRemarkEmitter &ORE) { @@ -536,7 +593,7 @@ InlineAdvisor::getMandatoryKind(CallBase &CB, FunctionAnalysisManager &FAM, auto TrivialDecision = llvm::getAttributeBasedInliningDecision(CB, &Callee, TIR, GetTLI); - if (TrivialDecision.hasValue()) { + if (TrivialDecision) { if (TrivialDecision->isSuccess()) return MandatoryInliningKind::Always; else @@ -568,3 +625,22 @@ InlineAdvisorAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &MAM) { IA->getAdvisor()->print(OS); return PreservedAnalyses::all(); } + +PreservedAnalyses InlineAdvisorAnalysisPrinterPass::run( + LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG, + CGSCCUpdateResult &UR) { + const auto &MAMProxy = + AM.getResult(InitialC, CG); + + if (InitialC.size() == 0) { + OS << "SCC is empty!\n"; + return PreservedAnalyses::all(); + } + Module &M = *InitialC.begin()->getFunction().getParent(); + const auto *IA = MAMProxy.getCachedResult(M); + if (!IA) + OS << "No Inline Advisor\n"; + else + IA->getAdvisor()->print(OS); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index d5411d916c77..e63497260e6e 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -18,11 +18,11 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -42,6 +42,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; @@ -51,24 +52,33 @@ STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); static cl::opt DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), - cl::ZeroOrMore, cl::desc("Default amount of inlining to perform")); +// We introduce this option since there is a minor compile-time win by avoiding +// addition of TTI attributes (target-features in particular) to inline +// candidates when they are guaranteed to be the same as top level methods in +// some use cases. If we avoid adding the attribute, we need an option to avoid +// checking these attributes. +static cl::opt IgnoreTTIInlineCompatible( + "ignore-tti-inline-compatible", cl::Hidden, cl::init(false), + cl::desc("Ignore TTI attributes compatibility check between callee/caller " + "during inline cost calculation")); + static cl::opt PrintInstructionComments( "print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis")); static cl::opt InlineThreshold( - "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, + "inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)")); static cl::opt HintThreshold( - "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, + "inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint")); static cl::opt ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, - cl::init(45), cl::ZeroOrMore, + cl::init(45), cl::desc("Threshold for inlining cold callsites")); static cl::opt InlineEnableCostBenefitAnalysis( @@ -76,12 +86,11 @@ static cl::opt InlineEnableCostBenefitAnalysis( cl::desc("Enable the cost-benefit analysis for the inliner")); static cl::opt InlineSavingsMultiplier( - "inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore, + "inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining")); static cl::opt InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), - cl::ZeroOrMore, cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings")); @@ -89,26 +98,25 @@ static cl::opt // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. static cl::opt ColdThreshold( - "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, + "inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute")); static cl::opt HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), - cl::ZeroOrMore, cl::desc("Threshold for hot callsites ")); static cl::opt LocallyHotCallSiteThreshold( - "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, + "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites ")); static cl::opt ColdCallSiteRelFreq( - "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, + "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information.")); static cl::opt HotCallSiteRelFreq( - "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, + "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information.")); @@ -117,14 +125,19 @@ static cl::opt CallPenalty( "inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining")); +static cl::opt + StackSizeThreshold("inline-max-stacksize", cl::Hidden, + cl::init(std::numeric_limits::max()), + cl::desc("Do not inline functions with a stack size " + "that exceeds the specified limit")); + static cl::opt OptComputeFullInlineCost( - "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore, + "inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold.")); static cl::opt InlineCallerSupersetNoBuiltin( "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), - cl::ZeroOrMore, cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes.")); @@ -132,33 +145,18 @@ static cl::opt DisableGEPConstOperand( "disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands")); -namespace { -class InlineCostCallAnalyzer; - -/// This function behaves more like CallBase::hasFnAttr: when it looks for the -/// requested attribute, it check both the call instruction and the called -/// function (if it's available and operand bundles don't prohibit that). -Attribute getFnAttr(CallBase &CB, StringRef AttrKind) { - Attribute CallAttr = CB.getFnAttr(AttrKind); - if (CallAttr.isValid()) - return CallAttr; - - // Operand bundles override attributes on the called function, but don't - // override attributes directly present on the call instruction. - if (!CB.isFnAttrDisallowedByOpBundle(AttrKind)) - if (const Function *F = CB.getCalledFunction()) - return F->getFnAttribute(AttrKind); - - return {}; -} - +namespace llvm { Optional getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) { - Attribute Attr = getFnAttr(CB, AttrKind); + Attribute Attr = CB.getFnAttr(AttrKind); int AttrValue; if (Attr.getValueAsString().getAsInteger(10, AttrValue)) return None; return AttrValue; } +} // namespace llvm + +namespace { +class InlineCostCallAnalyzer; // This struct is used to store information about inline cost of a // particular instruction @@ -198,7 +196,7 @@ class CallAnalyzer : public InstVisitor { friend class InstVisitor; protected: - virtual ~CallAnalyzer() {} + virtual ~CallAnalyzer() = default; /// The TargetTransformInfo available for this compilation. const TargetTransformInfo &TTI; @@ -352,7 +350,7 @@ protected: DenseMap> ConstantOffsetPtrs; /// Keep track of dead blocks due to the constant arguments. - SetVector DeadBlocks; + SmallPtrSet DeadBlocks; /// The mapping of the blocks to their known unique successors due to the /// constant arguments. @@ -385,8 +383,7 @@ protected: bool canFoldInboundsGEP(GetElementPtrInst &I); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); bool simplifyCallSite(Function *F, CallBase &Call); - template - bool simplifyInstruction(Instruction &I, Callable Evaluate); + bool simplifyInstruction(Instruction &I); bool simplifyIntrinsicCallIsConstant(CallBase &CB); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); @@ -704,7 +701,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { BlockFrequencyInfo *BFI = &(GetBFI(F)); assert(BFI && "BFI must be available"); auto ProfileCount = BFI->getBlockProfileCount(BB); - assert(ProfileCount.hasValue()); + assert(ProfileCount); if (ProfileCount.getValue() == 0) ColdSize += Cost - CostAtBBStart; } @@ -829,14 +826,14 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { } auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB); - assert(ProfileCount.hasValue()); + assert(ProfileCount); CurrentSavings *= ProfileCount.getValue(); CycleSavings += CurrentSavings; } // Compute the cycle savings per call. auto EntryProfileCount = F.getEntryCount(); - assert(EntryProfileCount.hasValue() && EntryProfileCount->getCount()); + assert(EntryProfileCount && EntryProfileCount->getCount()); auto EntryCount = EntryProfileCount->getCount(); CycleSavings += EntryCount / 2; CycleSavings = CycleSavings.udiv(EntryCount); @@ -845,7 +842,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { auto *CallerBB = CandidateCall.getParent(); BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent()))); CycleSavings += getCallsiteCost(this->CandidateCall, DL); - CycleSavings *= CallerBFI->getBlockProfileCount(CallerBB).getValue(); + CycleSavings *= *CallerBFI->getBlockProfileCount(CallerBB); // Remove the cost of the cold basic blocks. int Size = Cost - ColdSize; @@ -904,13 +901,18 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { getStringFnAttrAsInt(CandidateCall, "function-inline-cost")) Cost = *AttrCost; + if (Optional AttrCostMult = getStringFnAttrAsInt( + CandidateCall, + InlineConstants::FunctionInlineCostMultiplierAttributeName)) + Cost *= *AttrCostMult; + if (Optional AttrThreshold = getStringFnAttrAsInt(CandidateCall, "function-inline-threshold")) Threshold = *AttrThreshold; if (auto Result = costBenefitAnalysis()) { DecidedByCostBenefit = true; - if (Result.getValue()) + if (*Result) return InlineResult::success(); else return InlineResult::failure("Cost over threshold."); @@ -978,6 +980,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { if (F.getCallingConv() == CallingConv::Cold) Cost += InlineConstants::ColdccPenalty; + LLVM_DEBUG(dbgs() << " Initial cost: " << Cost << "\n"); + // Check if we're done. This can happen due to bonuses and penalties. if (Cost >= Threshold && !ComputeFullInlineCost) return InlineResult::failure("high cost"); @@ -1002,7 +1006,7 @@ public: BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold), CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()), Writer(this) { - AllowRecursiveCall = Params.AllowRecursiveCall.getValue(); + AllowRecursiveCall = *Params.AllowRecursiveCall; } /// Annotation Writer for instruction details @@ -1020,7 +1024,7 @@ public: return None; } - virtual ~InlineCostCallAnalyzer() {} + virtual ~InlineCostCallAnalyzer() = default; int getThreshold() const { return Threshold; } int getCost() const { return Cost; } Optional getCostBenefitPair() { return CostBenefit; } @@ -1203,6 +1207,10 @@ private: set(InlineCostFeatureIndex::ColdCcPenalty, (F.getCallingConv() == CallingConv::Cold)); + set(InlineCostFeatureIndex::LastCallToStaticBonus, + (F.hasLocalLinkage() && F.hasOneLiveUse() && + &F == CandidateCall.getCalledFunction())); + // FIXME: we shouldn't repeat this logic in both the Features and Cost // analyzer - instead, we should abstract it to a common method in the // CallAnalyzer @@ -1262,7 +1270,7 @@ void InlineCostAnnotationWriter::emitInstructionAnnot( auto C = ICCA->getSimplifiedValue(const_cast(I)); if (C) { OS << ", simplified to "; - C.getValue()->print(OS, true); + (*C)->print(OS, true); } OS << "\n"; } @@ -1501,13 +1509,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { }; if (!DisableGEPConstOperand) - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - SmallVector Indices; - for (unsigned int Index = 1; Index < COps.size(); ++Index) - Indices.push_back(COps[Index]); - return ConstantExpr::getGetElementPtr( - I.getSourceElementType(), COps[0], Indices, I.isInBounds()); - })) + if (simplifyInstruction(I)) return true; if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) { @@ -1525,11 +1527,8 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { } /// Simplify \p I if its operands are constants and update SimplifiedValues. -/// \p Evaluate is a callable specific to instruction type that evaluates the -/// instruction when all the operands are constants. -template -bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) { - SmallVector COps; +bool CallAnalyzer::simplifyInstruction(Instruction &I) { + SmallVector COps; for (Value *Op : I.operands()) { Constant *COp = dyn_cast(Op); if (!COp) @@ -1538,7 +1537,7 @@ bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) { return false; COps.push_back(COp); } - auto *C = Evaluate(COps); + auto *C = ConstantFoldInstOperands(&I, COps, DL); if (!C) return false; SimplifiedValues[&I] = C; @@ -1568,9 +1567,7 @@ bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) { bool CallAnalyzer::visitBitCast(BitCastInst &I) { // Propagate constants through bitcasts. - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantExpr::getBitCast(COps[0], I.getType()); - })) + if (simplifyInstruction(I)) return true; // Track base/offsets through casts @@ -1590,9 +1587,7 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) { bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Propagate constants through ptrtoint. - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantExpr::getPtrToInt(COps[0], I.getType()); - })) + if (simplifyInstruction(I)) return true; // Track base/offset pairs when converted to a plain integer provided the @@ -1622,9 +1617,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // Propagate constants through ptrtoint. - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantExpr::getIntToPtr(COps[0], I.getType()); - })) + if (simplifyInstruction(I)) return true; // Track base/offset pairs when round-tripped through a pointer without @@ -1647,9 +1640,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { bool CallAnalyzer::visitCastInst(CastInst &I) { // Propagate constants through casts. - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType()); - })) + if (simplifyInstruction(I)) return true; // Disable SROA in the face of arbitrary casts we don't explicitly list @@ -1855,7 +1846,7 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { // current threshold, but AutoFDO + ThinLTO currently relies on this // behavior to prevent inlining of hot callsites during ThinLTO // compile phase. - Threshold = HotCallSiteThreshold.getValue(); + Threshold = *HotCallSiteThreshold; } else if (isColdCallSite(Call, CallerBFI)) { LLVM_DEBUG(dbgs() << "Cold callsite.\n"); // Do not apply bonuses for a cold callsite including the @@ -1906,9 +1897,7 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { bool CallAnalyzer::visitCmpInst(CmpInst &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); // First try to handle simplified comparisons. - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]); - })) + if (simplifyInstruction(I)) return true; if (I.getOpcode() == Instruction::FCmp) @@ -1984,11 +1973,11 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *SimpleV = nullptr; if (auto FI = dyn_cast(&I)) - SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, + SimpleV = simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL); else SimpleV = - SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL); + simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL); if (Constant *C = dyn_cast_or_null(SimpleV)) SimplifiedValues[&I] = C; @@ -2018,7 +2007,7 @@ bool CallAnalyzer::visitFNeg(UnaryOperator &I) { if (!COp) COp = SimplifiedValues.lookup(Op); - Value *SimpleV = SimplifyFNegInst( + Value *SimpleV = simplifyFNegInst( COp ? COp : Op, cast(I).getFastMathFlags(), DL); if (Constant *C = dyn_cast_or_null(SimpleV)) @@ -2067,9 +2056,7 @@ bool CallAnalyzer::visitStore(StoreInst &I) { bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { // Constant folding for extract value is trivial. - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantExpr::getExtractValue(COps[0], I.getIndices()); - })) + if (simplifyInstruction(I)) return true; // SROA can't look through these, but they may be free. @@ -2078,11 +2065,7 @@ bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { // Constant folding for insert value is trivial. - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0], - /*InsertedValueOperand*/ COps[1], - I.getIndices()); - })) + if (simplifyInstruction(I)) return true; // SROA can't look through these, but they may be free. @@ -2136,14 +2119,14 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) { if (isa(Call) && cast(Call).cannotDuplicate()) ContainsNoDuplicateCall = true; - Value *Callee = Call.getCalledOperand(); - Function *F = dyn_cast_or_null(Callee); + Function *F = Call.getCalledFunction(); bool IsIndirectCall = !F; if (IsIndirectCall) { // Check if this happens to be an indirect function call to a known function // in this inline context. If not, we've done all we can. + Value *Callee = Call.getCalledOperand(); F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); - if (!F) { + if (!F || F->getFunctionType() != Call.getFunctionType()) { onCallArgumentSetup(Call); if (!Call.onlyReadsMemory()) @@ -2552,7 +2535,7 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { NewDead.push_back(Succ); while (!NewDead.empty()) { BasicBlock *Dead = NewDead.pop_back_val(); - if (DeadBlocks.insert(Dead)) + if (DeadBlocks.insert(Dead).second) // Continue growing the dead block lists. for (BasicBlock *S : successors(Dead)) if (IsNewlyDead(S)) @@ -2707,6 +2690,11 @@ InlineResult CallAnalyzer::analyze() { if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) return InlineResult::failure("noduplicate"); + // If the callee's stack size exceeds the user-specified threshold, + // do not let it be inlined. + if (AllocatedSize > StackSizeThreshold) + return InlineResult::failure("stacksize"); + return finalizeAnalysis(); } @@ -2745,7 +2733,8 @@ static bool functionsHaveCompatibleAttributes( // object, and always returns the same object (which is overwritten on each // GetTLI call). Therefore we copy the first result. auto CalleeTLI = GetTLI(*Callee); - return TTI.areInlineCompatible(Caller, Callee) && + return (IgnoreTTIInlineCompatible || + TTI.areInlineCompatible(Caller, Callee)) && GetTLI(*Caller).areInlineCompatible(CalleeTLI, InlineCallerSupersetNoBuiltin) && AttributeFuncs::areInlineCompatible(*Caller, *Callee); @@ -2864,6 +2853,9 @@ Optional llvm::getAttributeBasedInliningDecision( // Calls to functions with always-inline attributes should be inlined // whenever possible. if (Call.hasFnAttr(Attribute::AlwaysInline)) { + if (Call.getAttributes().hasFnAttr(Attribute::NoInline)) + return InlineResult::failure("noinline call site attribute"); + auto IsViable = isInlineViable(*Callee); if (IsViable.isSuccess()) return InlineResult::success(); @@ -2911,7 +2903,7 @@ InlineCost llvm::getInlineCost( auto UserDecision = llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI); - if (UserDecision.hasValue()) { + if (UserDecision) { if (UserDecision->isSuccess()) return llvm::InlineCost::getAlways("always inline attribute"); return llvm::InlineCost::getNever(UserDecision->getFailureReason()); diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp index a2e231e2d0f4..2371ecbba615 100644 --- a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp +++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp @@ -15,33 +15,32 @@ #ifdef LLVM_HAVE_TF_API #include "llvm/Analysis/Utils/TFUtils.h" #endif +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +AnalysisKey InlineSizeEstimatorAnalysis::Key; + +#ifdef LLVM_HAVE_TF_API #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/PassManager.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" - #include #include -using namespace llvm; - -AnalysisKey InlineSizeEstimatorAnalysis::Key; - -#define DEBUG_TYPE "inline-size-estimator" - -#ifdef LLVM_HAVE_TF_API cl::opt TFIR2NativeModelPath( "ml-inliner-ir2native-model", cl::Hidden, cl::desc("Path to saved model evaluating native size from IR.")); +#define DEBUG_TYPE "inline-size-estimator" namespace { unsigned getMaxInstructionID() { #define LAST_OTHER_INST(NR) return NR; @@ -261,10 +260,10 @@ InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis( namespace llvm { class TFModelEvaluator {}; } // namespace llvm -InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {} +InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default; InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis( InlineSizeEstimatorAnalysis &&) {} -InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} +InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() = default; InlineSizeEstimatorAnalysis::Result InlineSizeEstimatorAnalysis::run(const Function &F, FunctionAnalysisManager &FAM) { diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 4775340b3438..013e4d6489fa 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -20,7 +20,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" @@ -36,13 +35,10 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/Support/KnownBits.h" #include using namespace llvm; @@ -52,28 +48,30 @@ using namespace llvm::PatternMatch; enum { RecursionLimit = 3 }; -STATISTIC(NumExpand, "Number of expansions"); +STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumReassoc, "Number of reassociations"); -static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned); +static Value *simplifyAndInst(Value *, Value *, const SimplifyQuery &, + unsigned); static Value *simplifyUnOp(unsigned, Value *, const SimplifyQuery &, unsigned); static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &, const SimplifyQuery &, unsigned); -static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &, +static Value *simplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); -static Value *SimplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &, +static Value *simplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &, const SimplifyQuery &, unsigned); -static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &, +static Value *simplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); -static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, +static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse); -static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned); -static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned); -static Value *SimplifyCastInst(unsigned, Value *, Type *, - const SimplifyQuery &, unsigned); -static Value *SimplifyGEPInst(Type *, Value *, ArrayRef, bool, +static Value *simplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned); +static Value *simplifyXorInst(Value *, Value *, const SimplifyQuery &, + unsigned); +static Value *simplifyCastInst(unsigned, Value *, Type *, const SimplifyQuery &, + unsigned); +static Value *simplifyGEPInst(Type *, Value *, ArrayRef, bool, const SimplifyQuery &, unsigned); -static Value *SimplifySelectInst(Value *, Value *, Value *, +static Value *simplifySelectInst(Value *, Value *, Value *, const SimplifyQuery &, unsigned); static Value *foldSelectWithBinaryOp(Value *Cond, Value *TrueVal, @@ -120,15 +118,11 @@ static Value *foldSelectWithBinaryOp(Value *Cond, Value *TrueVal, /// For a boolean type or a vector of boolean type, return false or a vector /// with every element false. -static Constant *getFalse(Type *Ty) { - return ConstantInt::getFalse(Ty); -} +static Constant *getFalse(Type *Ty) { return ConstantInt::getFalse(Ty); } /// For a boolean type or a vector of boolean type, return true or a vector /// with every element true. -static Constant *getTrue(Type *Ty) { - return ConstantInt::getTrue(Ty); -} +static Constant *getTrue(Type *Ty) { return ConstantInt::getTrue(Ty); } /// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"? static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS, @@ -141,7 +135,7 @@ static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS, if (CPred == Pred && CLHS == LHS && CRHS == RHS) return true; return CPred == CmpInst::getSwappedPredicate(Pred) && CLHS == RHS && - CRHS == LHS; + CRHS == LHS; } /// Simplify comparison with true or false branch of select: @@ -153,7 +147,7 @@ static Value *simplifyCmpSelCase(CmpInst::Predicate Pred, Value *LHS, Value *RHS, Value *Cond, const SimplifyQuery &Q, unsigned MaxRecurse, Constant *TrueOrFalse) { - Value *SimplifiedCmp = SimplifyCmpInst(Pred, LHS, RHS, Q, MaxRecurse); + Value *SimplifiedCmp = simplifyCmpInst(Pred, LHS, RHS, Q, MaxRecurse); if (SimplifiedCmp == Cond) { // %cmp simplified to the select condition (%cond). return TrueOrFalse; @@ -196,17 +190,17 @@ static Value *handleOtherCmpSelSimplifications(Value *TCmp, Value *FCmp, // checks whether folding it does not convert a well-defined value into // poison. if (match(FCmp, m_Zero()) && impliesPoison(TCmp, Cond)) - if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse)) + if (Value *V = simplifyAndInst(Cond, TCmp, Q, MaxRecurse)) return V; // If the true value simplified to true, then the result of the compare // is equal to "Cond || FCmp". if (match(TCmp, m_One()) && impliesPoison(FCmp, Cond)) - if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse)) + if (Value *V = simplifyOrInst(Cond, FCmp, Q, MaxRecurse)) return V; // Finally, if the false value simplified to true and the true value to // false, then the result of the compare is equal to "!Cond". if (match(FCmp, m_One()) && match(TCmp, m_Zero())) - if (Value *V = SimplifyXorInst( + if (Value *V = simplifyXorInst( Cond, Constant::getAllOnesValue(Cond->getType()), Q, MaxRecurse)) return V; return nullptr; @@ -248,12 +242,12 @@ static Value *expandBinOp(Instruction::BinaryOps Opcode, Value *V, if (!B || B->getOpcode() != OpcodeToExpand) return nullptr; Value *B0 = B->getOperand(0), *B1 = B->getOperand(1); - Value *L = SimplifyBinOp(Opcode, B0, OtherOp, Q.getWithoutUndef(), - MaxRecurse); + Value *L = + simplifyBinOp(Opcode, B0, OtherOp, Q.getWithoutUndef(), MaxRecurse); if (!L) return nullptr; - Value *R = SimplifyBinOp(Opcode, B1, OtherOp, Q.getWithoutUndef(), - MaxRecurse); + Value *R = + simplifyBinOp(Opcode, B1, OtherOp, Q.getWithoutUndef(), MaxRecurse); if (!R) return nullptr; @@ -265,7 +259,7 @@ static Value *expandBinOp(Instruction::BinaryOps Opcode, Value *V, } // Otherwise, return "L op' R" if it simplifies. - Value *S = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse); + Value *S = simplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse); if (!S) return nullptr; @@ -275,8 +269,8 @@ static Value *expandBinOp(Instruction::BinaryOps Opcode, Value *V, /// Try to simplify binops of form "A op (B op' C)" or the commuted variant by /// distributing op over op'. -static Value *expandCommutativeBinOp(Instruction::BinaryOps Opcode, - Value *L, Value *R, +static Value *expandCommutativeBinOp(Instruction::BinaryOps Opcode, Value *L, + Value *R, Instruction::BinaryOps OpcodeToExpand, const SimplifyQuery &Q, unsigned MaxRecurse) { @@ -293,7 +287,7 @@ static Value *expandCommutativeBinOp(Instruction::BinaryOps Opcode, /// Generic simplifications for associative binary operations. /// Returns the simpler value, or null if none was found. -static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, +static Value *simplifyAssociativeBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { @@ -313,12 +307,13 @@ static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, Value *C = RHS; // Does "B op C" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) { + if (Value *V = simplifyBinOp(Opcode, B, C, Q, MaxRecurse)) { // It does! Return "A op V" if it simplifies or is already available. // If V equals B then "A op V" is just the LHS. - if (V == B) return LHS; + if (V == B) + return LHS; // Otherwise return "A op V" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) { + if (Value *W = simplifyBinOp(Opcode, A, V, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -332,12 +327,13 @@ static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, Value *C = Op1->getOperand(1); // Does "A op B" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) { + if (Value *V = simplifyBinOp(Opcode, A, B, Q, MaxRecurse)) { // It does! Return "V op C" if it simplifies or is already available. // If V equals B then "V op C" is just the RHS. - if (V == B) return RHS; + if (V == B) + return RHS; // Otherwise return "V op C" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) { + if (Value *W = simplifyBinOp(Opcode, V, C, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -355,12 +351,13 @@ static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, Value *C = RHS; // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { + if (Value *V = simplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { // It does! Return "V op B" if it simplifies or is already available. // If V equals A then "V op B" is just the LHS. - if (V == A) return LHS; + if (V == A) + return LHS; // Otherwise return "V op B" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) { + if (Value *W = simplifyBinOp(Opcode, V, B, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -374,12 +371,13 @@ static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, Value *C = Op1->getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { + if (Value *V = simplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { // It does! Return "B op V" if it simplifies or is already available. // If V equals C then "B op V" is just the RHS. - if (V == C) return RHS; + if (V == C) + return RHS; // Otherwise return "B op V" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) { + if (Value *W = simplifyBinOp(Opcode, B, V, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -393,7 +391,7 @@ static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, /// try to simplify the binop by seeing whether evaluating it on both branches /// of the select results in the same value. Returns the common value if so, /// otherwise returns null. -static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, +static Value *threadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. @@ -412,11 +410,11 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, Value *TV; Value *FV; if (SI == LHS) { - TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse); - FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse); + TV = simplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse); + FV = simplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse); } else { - TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse); - FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse); + TV = simplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse); + FV = simplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse); } // If they simplified to the same value, then return the common value. @@ -471,7 +469,7 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, /// We can simplify %cmp1 to true, because both branches of select are /// less than 3. We compose new comparison by substituting %tmp with both /// branches of select and see if it can be simplified. -static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, +static Value *threadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. @@ -517,7 +515,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, /// try to simplify the binop by seeing whether evaluating it on the incoming /// phi values yields the same result for every value. If so returns the common /// value, otherwise returns null. -static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, +static Value *threadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. @@ -542,10 +540,10 @@ static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, Value *CommonValue = nullptr; for (Value *Incoming : PI->incoming_values()) { // If the incoming value is the phi node itself, it can safely be skipped. - if (Incoming == PI) continue; - Value *V = PI == LHS ? - SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) : - SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse); + if (Incoming == PI) + continue; + Value *V = PI == LHS ? simplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) + : simplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) @@ -560,7 +558,7 @@ static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, /// comparison by seeing whether comparing with all of the incoming phi values /// yields the same result every time. If so returns the common result, /// otherwise returns null. -static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, +static Value *threadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -584,11 +582,12 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, Value *Incoming = PI->getIncomingValue(u); Instruction *InTI = PI->getIncomingBlock(u)->getTerminator(); // If the incoming value is the phi node itself, it can safely be skipped. - if (Incoming == PI) continue; + if (Incoming == PI) + continue; // Change the context instruction to the "edge" that flows into the phi. // This is important because that is where incoming is actually "evaluated" // even though it is used later somewhere else. - Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q.getWithInstruction(InTI), + Value *V = simplifyCmpInst(Pred, Incoming, RHS, Q.getWithInstruction(InTI), MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. @@ -604,8 +603,20 @@ static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode, Value *&Op0, Value *&Op1, const SimplifyQuery &Q) { if (auto *CLHS = dyn_cast(Op0)) { - if (auto *CRHS = dyn_cast(Op1)) + if (auto *CRHS = dyn_cast(Op1)) { + switch (Opcode) { + default: + break; + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + if (Q.CxtI != nullptr) + return ConstantFoldFPInstOperands(Opcode, CLHS, CRHS, Q.DL, Q.CxtI); + } return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL); + } // Canonicalize the constant to the RHS if this is a commutative operation. if (Instruction::isCommutative(Opcode)) @@ -616,7 +627,7 @@ static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode, /// Given operands for an Add, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, +static Value *simplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q)) return C; @@ -647,8 +658,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, // X + ~X -> -1 since ~X = -X-1 Type *Ty = Op0->getType(); - if (match(Op0, m_Not(m_Specific(Op1))) || - match(Op1, m_Not(m_Specific(Op0)))) + if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0)))) return Constant::getAllOnesValue(Ty); // add nsw/nuw (xor Y, signmask), signmask --> Y @@ -664,12 +674,12 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, /// i1 add -> xor. if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) - if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) + if (Value *V = simplifyXorInst(Op0, Op1, Q, MaxRecurse - 1)) return V; // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q, - MaxRecurse)) + if (Value *V = + simplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q, MaxRecurse)) return V; // Threading Add over selects and phi nodes is pointless, so don't bother. @@ -684,45 +694,37 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, return nullptr; } -Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, +Value *llvm::simplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, const SimplifyQuery &Query) { - return ::SimplifyAddInst(Op0, Op1, IsNSW, IsNUW, Query, RecursionLimit); + return ::simplifyAddInst(Op0, Op1, IsNSW, IsNUW, Query, RecursionLimit); } /// Compute the base pointer and cumulative constant offsets for V. /// /// This strips all constant offsets off of V, leaving it the base pointer, and -/// accumulates the total constant offset applied in the returned constant. It -/// returns 0 if V is not a pointer, and returns the constant '0' if there are -/// no constant offsets applied. +/// accumulates the total constant offset applied in the returned constant. +/// It returns zero if there are no constant offsets applied. /// -/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't -/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. -/// folding. -static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, - bool AllowNonInbounds = false) { +/// This is very similar to stripAndAccumulateConstantOffsets(), except it +/// normalizes the offset bitwidth to the stripped pointer type, not the +/// original pointer type. +static APInt stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, + bool AllowNonInbounds = false) { assert(V->getType()->isPtrOrPtrVectorTy()); APInt Offset = APInt::getZero(DL.getIndexTypeSizeInBits(V->getType())); - V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds); // As that strip may trace through `addrspacecast`, need to sext or trunc // the offset calculated. - Type *IntIdxTy = DL.getIndexType(V->getType())->getScalarType(); - Offset = Offset.sextOrTrunc(IntIdxTy->getIntegerBitWidth()); - - Constant *OffsetIntPtr = ConstantInt::get(IntIdxTy, Offset); - if (VectorType *VecTy = dyn_cast(V->getType())) - return ConstantVector::getSplat(VecTy->getElementCount(), OffsetIntPtr); - return OffsetIntPtr; + return Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(V->getType())); } /// Compute the constant difference between two pointer values. /// If the difference is not a constant, returns zero. static Constant *computePointerDifference(const DataLayout &DL, Value *LHS, Value *RHS) { - Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS); - Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS); + APInt LHSOffset = stripAndComputeConstantOffsets(DL, LHS); + APInt RHSOffset = stripAndComputeConstantOffsets(DL, RHS); // If LHS and RHS are not related via constant offsets to the same base // value, there is nothing we can do here. @@ -733,12 +735,15 @@ static Constant *computePointerDifference(const DataLayout &DL, Value *LHS, // LHS - RHS // = (LHSOffset + Base) - (RHSOffset + Base) // = LHSOffset - RHSOffset - return ConstantExpr::getSub(LHSOffset, RHSOffset); + Constant *Res = ConstantInt::get(LHS->getContext(), LHSOffset - RHSOffset); + if (auto *VecTy = dyn_cast(LHS->getType())) + Res = ConstantVector::getSplat(VecTy->getElementCount(), Res); + return Res; } /// Given operands for a Sub, see if we can fold the result. /// If not, this returns null. -static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, +static Value *simplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Sub, Op0, Op1, Q)) return C; @@ -784,17 +789,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *X = nullptr, *Y = nullptr, *Z = Op1; if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z // See if "V === Y - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1)) + if (Value *V = simplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse - 1)) // It does! Now see if "X + V" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) { + if (Value *W = simplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse - 1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } // See if "V === X - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1)) + if (Value *V = simplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse - 1)) // It does! Now see if "Y + V" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) { + if (Value *W = simplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse - 1)) { // It does, we successfully reassociated! ++NumReassoc; return W; @@ -806,17 +811,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, X = Op0; if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z) // See if "V === X - Y" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) + if (Value *V = simplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse - 1)) // It does! Now see if "V - Z" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) { + if (Value *W = simplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse - 1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } // See if "V === X - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1)) + if (Value *V = simplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse - 1)) // It does! Now see if "V - Y" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) { + if (Value *W = simplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse - 1)) { // It does, we successfully reassociated! ++NumReassoc; return W; @@ -828,9 +833,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Z = Op0; if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y) // See if "V === Z - X" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1)) + if (Value *V = simplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse - 1)) // It does! Now see if "V + Y" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) { + if (Value *W = simplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse - 1)) { // It does, we successfully reassociated! ++NumReassoc; return W; @@ -841,22 +846,21 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, match(Op1, m_Trunc(m_Value(Y)))) if (X->getType() == Y->getType()) // See if "V === X - Y" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) + if (Value *V = simplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse - 1)) // It does! Now see if "trunc V" simplifies. - if (Value *W = SimplifyCastInst(Instruction::Trunc, V, Op0->getType(), + if (Value *W = simplifyCastInst(Instruction::Trunc, V, Op0->getType(), Q, MaxRecurse - 1)) // It does, return the simplified "trunc V". return W; // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...). - if (match(Op0, m_PtrToInt(m_Value(X))) && - match(Op1, m_PtrToInt(m_Value(Y)))) + if (match(Op0, m_PtrToInt(m_Value(X))) && match(Op1, m_PtrToInt(m_Value(Y)))) if (Constant *Result = computePointerDifference(Q.DL, X, Y)) return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); // i1 sub -> xor. if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) - if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) + if (Value *V = simplifyXorInst(Op0, Op1, Q, MaxRecurse - 1)) return V; // Threading Sub over selects and phi nodes is pointless, so don't bother. @@ -871,14 +875,14 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return nullptr; } -Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, +Value *llvm::simplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const SimplifyQuery &Q) { - return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); + return ::simplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); } /// Given operands for a Mul, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, +static Value *simplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Mul, Op0, Op1, Q)) return C; @@ -906,12 +910,12 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, // i1 mul -> and. if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) - if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1)) + if (Value *V = simplifyAndInst(Op0, Op1, Q, MaxRecurse - 1)) return V; // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q, - MaxRecurse)) + if (Value *V = + simplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; // Mul distributes over Add. Try some generic simplifications based on this. @@ -922,22 +926,22 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q, - MaxRecurse)) + if (Value *V = + threadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q, - MaxRecurse)) + if (Value *V = + threadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; return nullptr; } -Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { - return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit); +Value *llvm::simplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::simplifyMulInst(Op0, Op1, Q, RecursionLimit); } /// Check for common or similar folds of integer division or integer remainder. @@ -1026,7 +1030,7 @@ static Value *simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, /// when we can prove a relationship between the operands. static bool isICmpTrue(ICmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { - Value *V = SimplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse); + Value *V = simplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse); Constant *C = dyn_cast_or_null(V); return (C && C->isAllOnesValue()); } @@ -1122,13 +1126,13 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) + if (Value *V = threadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) + if (Value *V = threadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; if (isDivZero(Op0, Op1, Q, MaxRecurse, IsSigned)) @@ -1164,13 +1168,13 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) + if (Value *V = threadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) + if (Value *V = threadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If X / Y == 0, then X % Y == X. @@ -1182,7 +1186,7 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// Given operands for an SDiv, see if we can fold the result. /// If not, this returns null. -static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, +static Value *simplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { // If two operands are negated and no signed overflow, return -1. if (isKnownNegation(Op0, Op1, /*NeedNSW=*/true)) @@ -1191,24 +1195,24 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return simplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse); } -Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { - return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit); +Value *llvm::simplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::simplifySDivInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a UDiv, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, +static Value *simplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { return simplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse); } -Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { - return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit); +Value *llvm::simplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::simplifyUDivInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for an SRem, see if we can fold the result. /// If not, this returns null. -static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, +static Value *simplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { // If the divisor is 0, the result is undefined, so assume the divisor is -1. // srem Op0, (sext i1 X) --> srem Op0, -1 --> 0 @@ -1223,19 +1227,19 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return simplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse); } -Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { - return ::SimplifySRemInst(Op0, Op1, Q, RecursionLimit); +Value *llvm::simplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::simplifySRemInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a URem, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, +static Value *simplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { return simplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse); } -Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { - return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit); +Value *llvm::simplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::simplifyURemInst(Op0, Op1, Q, RecursionLimit); } /// Returns true if a shift by \c Amount always yields poison. @@ -1268,7 +1272,7 @@ static bool isPoisonShift(Value *Amount, const SimplifyQuery &Q) { /// Given operands for an Shl, LShr or AShr, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, +static Value *simplifyShift(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, bool IsNSW, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) @@ -1297,13 +1301,13 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) + if (Value *V = threadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) + if (Value *V = threadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If any bits in the shift amount make that value greater than or equal to @@ -1338,11 +1342,11 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, /// Given operands for an Shl, LShr or AShr, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, - Value *Op1, bool isExact, const SimplifyQuery &Q, - unsigned MaxRecurse) { +static Value *simplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, + Value *Op1, bool isExact, + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = - SimplifyShift(Opcode, Op0, Op1, /*IsNSW*/ false, Q, MaxRecurse)) + simplifyShift(Opcode, Op0, Op1, /*IsNSW*/ false, Q, MaxRecurse)) return V; // X >> X -> 0 @@ -1356,7 +1360,8 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, // The low bit cannot be shifted out of an exact shift if it is set. if (isExact) { - KnownBits Op0Known = computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); + KnownBits Op0Known = + computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); if (Op0Known.One[0]) return Op0; } @@ -1366,10 +1371,10 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, /// Given operands for an Shl, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, +static Value *simplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = - SimplifyShift(Instruction::Shl, Op0, Op1, isNSW, Q, MaxRecurse)) + simplifyShift(Instruction::Shl, Op0, Op1, isNSW, Q, MaxRecurse)) return V; // undef << X -> 0 @@ -1392,18 +1397,18 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return nullptr; } -Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, +Value *llvm::simplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const SimplifyQuery &Q) { - return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); + return ::simplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); } /// Given operands for an LShr, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, +static Value *simplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const SimplifyQuery &Q, unsigned MaxRecurse) { - if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q, + if (Value *V = simplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q, MaxRecurse)) - return V; + return V; // (X << A) >> A -> X Value *X; @@ -1429,16 +1434,16 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, return nullptr; } -Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, +Value *llvm::simplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const SimplifyQuery &Q) { - return ::SimplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit); + return ::simplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit); } /// Given operands for an AShr, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, +static Value *simplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const SimplifyQuery &Q, unsigned MaxRecurse) { - if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q, + if (Value *V = simplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q, MaxRecurse)) return V; @@ -1462,9 +1467,9 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, return nullptr; } -Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, +Value *llvm::simplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const SimplifyQuery &Q) { - return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit); + return ::simplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit); } /// Commuted variants are assumed to be handled by calling this function again @@ -1581,7 +1586,7 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, /// with the parameters swapped. static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) { ICmpInst::Predicate Pred0, Pred1; - Value *A ,*B; + Value *A, *B; if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) || !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B)))) return nullptr; @@ -1606,7 +1611,7 @@ static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) { /// with the parameters swapped. static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) { ICmpInst::Predicate Pred0, Pred1; - Value *A ,*B; + Value *A, *B; if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) || !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B)))) return nullptr; @@ -1812,6 +1817,27 @@ static Value *simplifyAndOrOfICmpsWithLimitConst(ICmpInst *Cmp0, ICmpInst *Cmp1, return nullptr; } +/// Try to simplify and/or of icmp with ctpop intrinsic. +static Value *simplifyAndOrOfICmpsWithCtpop(ICmpInst *Cmp0, ICmpInst *Cmp1, + bool IsAnd) { + ICmpInst::Predicate Pred0, Pred1; + Value *X; + const APInt *C; + if (!match(Cmp0, m_ICmp(Pred0, m_Intrinsic(m_Value(X)), + m_APInt(C))) || + !match(Cmp1, m_ICmp(Pred1, m_Specific(X), m_ZeroInt())) || C->isZero()) + return nullptr; + + // (ctpop(X) == C) || (X != 0) --> X != 0 where C > 0 + if (!IsAnd && Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_NE) + return Cmp1; + // (ctpop(X) != C) && (X == 0) --> X == 0 where C > 0 + if (IsAnd && Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_EQ) + return Cmp1; + + return nullptr; +} + static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1, const SimplifyQuery &Q) { if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true, Q)) @@ -1833,6 +1859,11 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1, if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true)) return X; + if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op0, Op1, true)) + return X; + if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op1, Op0, true)) + return X; + if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, Q.IIQ)) return X; if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, Q.IIQ)) @@ -1909,6 +1940,11 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1, if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false)) return X; + if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op0, Op1, false)) + return X; + if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op1, Op0, false)) + return X; + if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, Q.IIQ)) return X; if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, Q.IIQ)) @@ -1917,8 +1953,8 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1, return nullptr; } -static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI, - FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) { +static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI, FCmpInst *LHS, + FCmpInst *RHS, bool IsAnd) { Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1); Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1); if (LHS0->getType() != RHS0->getType()) @@ -1955,8 +1991,8 @@ static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI, return nullptr; } -static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, - Value *Op0, Value *Op1, bool IsAnd) { +static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, Value *Op0, + Value *Op1, bool IsAnd) { // Look through casts of the 'and' operands to find compares. auto *Cast0 = dyn_cast(Op0); auto *Cast1 = dyn_cast(Op1); @@ -2017,7 +2053,7 @@ static Value *simplifyLogicOfAddSub(Value *Op0, Value *Op1, /// Given operands for an And, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, +static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::And, Op0, Op1, Q)) return C; @@ -2043,8 +2079,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Op0; // A & ~A = ~A & A = 0 - if (match(Op0, m_Not(m_Specific(Op1))) || - match(Op1, m_Not(m_Specific(Op0)))) + if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0)))) return Constant::getNullValue(Op0->getType()); // (A | ?) & A = A @@ -2117,8 +2152,8 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return V; // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q, - MaxRecurse)) + if (Value *V = + simplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q, MaxRecurse)) return V; // And distributes over Or. Try some generic simplifications based on this. @@ -2142,16 +2177,16 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, // If the operation is with the result of a select instruction, check // whether operating on either branch of the select always yields the same // value. - if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q, - MaxRecurse)) + if (Value *V = + threadBinOpOverSelect(Instruction::And, Op0, Op1, Q, MaxRecurse)) return V; } // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q, - MaxRecurse)) + if (Value *V = + threadBinOpOverPHI(Instruction::And, Op0, Op1, Q, MaxRecurse)) return V; // Assuming the effective width of Y is not larger than A, i.e. all bits @@ -2174,8 +2209,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); const unsigned EffWidthY = YKnown.countMaxActiveBits(); if (EffWidthY <= ShftCnt) { - const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI, - Q.DT); + const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); const unsigned EffWidthX = XKnown.countMaxActiveBits(); const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY); const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShftCnt; @@ -2197,11 +2231,20 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op1, m_c_Xor(m_Specific(Or), m_Specific(Y)))) return Constant::getNullValue(Op0->getType()); + if (Op0->getType()->isIntOrIntVectorTy(1)) { + // Op0&Op1 -> Op0 where Op0 implies Op1 + if (isImpliedCondition(Op0, Op1, Q.DL).value_or(false)) + return Op0; + // Op0&Op1 -> Op1 where Op1 implies Op0 + if (isImpliedCondition(Op1, Op0, Q.DL).value_or(false)) + return Op1; + } + return nullptr; } -Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { - return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit); +Value *llvm::simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::simplifyAndInst(Op0, Op1, Q, RecursionLimit); } static Value *simplifyOrLogic(Value *X, Value *Y) { @@ -2289,7 +2332,7 @@ static Value *simplifyOrLogic(Value *X, Value *Y) { /// Given operands for an Or, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, +static Value *simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Or, Op0, Op1, Q)) return C; @@ -2334,6 +2377,31 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, } } + // A funnel shift (rotate) can be decomposed into simpler shifts. See if we + // are mixing in another shift that is redundant with the funnel shift. + + // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y + // (shl X, Y) | (fshl X, ?, Y) --> fshl X, ?, Y + if (match(Op0, + m_Intrinsic(m_Value(X), m_Value(), m_Value(Y))) && + match(Op1, m_Shl(m_Specific(X), m_Specific(Y)))) + return Op0; + if (match(Op1, + m_Intrinsic(m_Value(X), m_Value(), m_Value(Y))) && + match(Op0, m_Shl(m_Specific(X), m_Specific(Y)))) + return Op1; + + // (fshr ?, X, Y) | (lshr X, Y) --> fshr ?, X, Y + // (lshr X, Y) | (fshr ?, X, Y) --> fshr ?, X, Y + if (match(Op0, + m_Intrinsic(m_Value(), m_Value(X), m_Value(Y))) && + match(Op1, m_LShr(m_Specific(X), m_Specific(Y)))) + return Op0; + if (match(Op1, + m_Intrinsic(m_Value(), m_Value(X), m_Value(Y))) && + match(Op0, m_LShr(m_Specific(X), m_Specific(Y)))) + return Op1; + if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false)) return V; @@ -2346,8 +2414,8 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Op0; // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, - MaxRecurse)) + if (Value *V = + simplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; // Or distributes over And. Try some generic simplifications based on this. @@ -2366,8 +2434,8 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, // If the operation is with the result of a select instruction, check // whether operating on either branch of the select always yields the same // value. - if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q, - MaxRecurse)) + if (Value *V = + threadBinOpOverSelect(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; } @@ -2389,8 +2457,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return A; } // Or commutes, try both ways. - if (C1->isMask() && - match(B, m_c_Add(m_Specific(A), m_Value(N)))) { + if (C1->isMask() && match(B, m_c_Add(m_Specific(A), m_Value(N)))) { // Add commutes, try both ways. if (MaskedValueIsZero(N, *C1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return B; @@ -2401,19 +2468,28 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse)) + if (Value *V = threadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; + if (Op0->getType()->isIntOrIntVectorTy(1)) { + // Op0|Op1 -> Op1 where Op0 implies Op1 + if (isImpliedCondition(Op0, Op1, Q.DL).value_or(false)) + return Op1; + // Op0|Op1 -> Op0 where Op1 implies Op0 + if (isImpliedCondition(Op1, Op0, Q.DL).value_or(false)) + return Op0; + } + return nullptr; } -Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { - return ::SimplifyOrInst(Op0, Op1, Q, RecursionLimit); +Value *llvm::simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::simplifyOrInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a Xor, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, +static Value *simplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q)) return C; @@ -2435,8 +2511,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Constant::getNullValue(Op0->getType()); // A ^ ~A = ~A ^ A = -1 - if (match(Op0, m_Not(m_Specific(Op1))) || - match(Op1, m_Not(m_Specific(Op0)))) + if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0)))) return Constant::getAllOnesValue(Op0->getType()); auto foldAndOrNot = [](Value *X, Value *Y) -> Value * { @@ -2467,8 +2542,8 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return V; // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q, - MaxRecurse)) + if (Value *V = + simplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q, MaxRecurse)) return V; // Threading Xor over selects and phi nodes is pointless, so don't bother. @@ -2483,19 +2558,18 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return nullptr; } -Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { - return ::SimplifyXorInst(Op0, Op1, Q, RecursionLimit); +Value *llvm::simplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::simplifyXorInst(Op0, Op1, Q, RecursionLimit); } - -static Type *GetCompareTy(Value *Op) { +static Type *getCompareTy(Value *Op) { return CmpInst::makeCmpResultType(Op->getType()); } /// Rummage around inside V looking for something equivalent to the comparison /// "LHS Pred RHS". Return such a value if found, otherwise return null. /// Helper function for analyzing max/min idioms. -static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, +static Value *extractEquivalentCondition(Value *V, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { SelectInst *SI = dyn_cast(V); if (!SI) @@ -2512,6 +2586,70 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, return nullptr; } +/// Return true if the underlying object (storage) must be disjoint from +/// storage returned by any noalias return call. +static bool isAllocDisjoint(const Value *V) { + // For allocas, we consider only static ones (dynamic + // allocas might be transformed into calls to malloc not simultaneously + // live with the compared-to allocation). For globals, we exclude symbols + // that might be resolve lazily to symbols in another dynamically-loaded + // library (and, thus, could be malloc'ed by the implementation). + if (const AllocaInst *AI = dyn_cast(V)) + return AI->getParent() && AI->getFunction() && AI->isStaticAlloca(); + if (const GlobalValue *GV = dyn_cast(V)) + return (GV->hasLocalLinkage() || GV->hasHiddenVisibility() || + GV->hasProtectedVisibility() || GV->hasGlobalUnnamedAddr()) && + !GV->isThreadLocal(); + if (const Argument *A = dyn_cast(V)) + return A->hasByValAttr(); + return false; +} + +/// Return true if V1 and V2 are each the base of some distict storage region +/// [V, object_size(V)] which do not overlap. Note that zero sized regions +/// *are* possible, and that zero sized regions do not overlap with any other. +static bool haveNonOverlappingStorage(const Value *V1, const Value *V2) { + // Global variables always exist, so they always exist during the lifetime + // of each other and all allocas. Global variables themselves usually have + // non-overlapping storage, but since their addresses are constants, the + // case involving two globals does not reach here and is instead handled in + // constant folding. + // + // Two different allocas usually have different addresses... + // + // However, if there's an @llvm.stackrestore dynamically in between two + // allocas, they may have the same address. It's tempting to reduce the + // scope of the problem by only looking at *static* allocas here. That would + // cover the majority of allocas while significantly reducing the likelihood + // of having an @llvm.stackrestore pop up in the middle. However, it's not + // actually impossible for an @llvm.stackrestore to pop up in the middle of + // an entry block. Also, if we have a block that's not attached to a + // function, we can't tell if it's "static" under the current definition. + // Theoretically, this problem could be fixed by creating a new kind of + // instruction kind specifically for static allocas. Such a new instruction + // could be required to be at the top of the entry block, thus preventing it + // from being subject to a @llvm.stackrestore. Instcombine could even + // convert regular allocas into these special allocas. It'd be nifty. + // However, until then, this problem remains open. + // + // So, we'll assume that two non-empty allocas have different addresses + // for now. + auto isByValArg = [](const Value *V) { + const Argument *A = dyn_cast(V); + return A && A->hasByValAttr(); + }; + + // Byval args are backed by store which does not overlap with each other, + // allocas, or globals. + if (isByValArg(V1)) + return isa(V2) || isa(V2) || isByValArg(V2); + if (isByValArg(V2)) + return isa(V1) || isa(V1) || isByValArg(V1); + + return isa(V1) && + (isa(V2) || isa(V2)); +} + // A significant optimization not implemented here is assuming that alloca // addresses are not equal to incoming argument values. They don't *alias*, // as we say, but that doesn't mean they aren't equal, so we take a @@ -2540,9 +2678,8 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, // If the C and C++ standards are ever made sufficiently restrictive in this // area, it may be possible to update LLVM's semantics accordingly and reinstate // this optimization. -static Constant * -computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, - const SimplifyQuery &Q) { +static Constant *computePointerICmp(CmpInst::Predicate Pred, Value *LHS, + Value *RHS, const SimplifyQuery &Q) { const DataLayout &DL = Q.DL; const TargetLibraryInfo *TLI = Q.TLI; const DominatorTree *DT = Q.DT; @@ -2557,8 +2694,7 @@ computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, if (isa(RHS) && ICmpInst::isEquality(Pred) && llvm::isKnownNonZero(LHS, DL, 0, nullptr, nullptr, nullptr, IIQ.UseInstrInfo)) - return ConstantInt::get(GetCompareTy(LHS), - !CmpInst::isTrueWhenEqual(Pred)); + return ConstantInt::get(getCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); // We can only fold certain predicates on pointer comparisons. switch (Pred) { @@ -2588,88 +2724,47 @@ computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, // numerous hazards. AliasAnalysis and its utilities rely on special rules // governing loads and stores which don't apply to icmps. Also, AliasAnalysis // doesn't need to guarantee pointer inequality when it says NoAlias. - Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS); - Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS); + + // Even if an non-inbounds GEP occurs along the path we can still optimize + // equality comparisons concerning the result. + bool AllowNonInbounds = ICmpInst::isEquality(Pred); + APInt LHSOffset = stripAndComputeConstantOffsets(DL, LHS, AllowNonInbounds); + APInt RHSOffset = stripAndComputeConstantOffsets(DL, RHS, AllowNonInbounds); // If LHS and RHS are related via constant offsets to the same base // value, we can replace it with an icmp which just compares the offsets. if (LHS == RHS) - return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); + return ConstantInt::get(getCompareTy(LHS), + ICmpInst::compare(LHSOffset, RHSOffset, Pred)); // Various optimizations for (in)equality comparisons. if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { // Different non-empty allocations that exist at the same time have - // different addresses (if the program can tell). Global variables always - // exist, so they always exist during the lifetime of each other and all - // allocas. Two different allocas usually have different addresses... - // - // However, if there's an @llvm.stackrestore dynamically in between two - // allocas, they may have the same address. It's tempting to reduce the - // scope of the problem by only looking at *static* allocas here. That would - // cover the majority of allocas while significantly reducing the likelihood - // of having an @llvm.stackrestore pop up in the middle. However, it's not - // actually impossible for an @llvm.stackrestore to pop up in the middle of - // an entry block. Also, if we have a block that's not attached to a - // function, we can't tell if it's "static" under the current definition. - // Theoretically, this problem could be fixed by creating a new kind of - // instruction kind specifically for static allocas. Such a new instruction - // could be required to be at the top of the entry block, thus preventing it - // from being subject to a @llvm.stackrestore. Instcombine could even - // convert regular allocas into these special allocas. It'd be nifty. - // However, until then, this problem remains open. - // - // So, we'll assume that two non-empty allocas have different addresses - // for now. - // - // With all that, if the offsets are within the bounds of their allocations - // (and not one-past-the-end! so we can't use inbounds!), and their - // allocations aren't the same, the pointers are not equal. - // - // Note that it's not necessary to check for LHS being a global variable - // address, due to canonicalization and constant folding. - if (isa(LHS) && - (isa(RHS) || isa(RHS))) { - ConstantInt *LHSOffsetCI = dyn_cast(LHSOffset); - ConstantInt *RHSOffsetCI = dyn_cast(RHSOffset); + // different addresses (if the program can tell). If the offsets are + // within the bounds of their allocations (and not one-past-the-end! + // so we can't use inbounds!), and their allocations aren't the same, + // the pointers are not equal. + if (haveNonOverlappingStorage(LHS, RHS)) { uint64_t LHSSize, RHSSize; ObjectSizeOpts Opts; - Opts.NullIsUnknownSize = - NullPointerIsDefined(cast(LHS)->getFunction()); - if (LHSOffsetCI && RHSOffsetCI && - getObjectSize(LHS, LHSSize, DL, TLI, Opts) && - getObjectSize(RHS, RHSSize, DL, TLI, Opts)) { - const APInt &LHSOffsetValue = LHSOffsetCI->getValue(); - const APInt &RHSOffsetValue = RHSOffsetCI->getValue(); - if (!LHSOffsetValue.isNegative() && - !RHSOffsetValue.isNegative() && - LHSOffsetValue.ult(LHSSize) && - RHSOffsetValue.ult(RHSSize)) { - return ConstantInt::get(GetCompareTy(LHS), - !CmpInst::isTrueWhenEqual(Pred)); - } - } - - // Repeat the above check but this time without depending on DataLayout - // or being able to compute a precise size. - if (!cast(LHS->getType())->isEmptyTy() && - !cast(RHS->getType())->isEmptyTy() && - LHSOffset->isNullValue() && - RHSOffset->isNullValue()) - return ConstantInt::get(GetCompareTy(LHS), + Opts.EvalMode = ObjectSizeOpts::Mode::Min; + auto *F = [](Value *V) -> Function * { + if (auto *I = dyn_cast(V)) + return I->getFunction(); + if (auto *A = dyn_cast(V)) + return A->getParent(); + return nullptr; + }(LHS); + Opts.NullIsUnknownSize = F ? NullPointerIsDefined(F) : true; + if (getObjectSize(LHS, LHSSize, DL, TLI, Opts) && + getObjectSize(RHS, RHSSize, DL, TLI, Opts) && + !LHSOffset.isNegative() && !RHSOffset.isNegative() && + LHSOffset.ult(LHSSize) && RHSOffset.ult(RHSSize)) { + return ConstantInt::get(getCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); + } } - // Even if an non-inbounds GEP occurs along the path we can still optimize - // equality comparisons concerning the result. We avoid walking the whole - // chain again by starting where the last calls to - // stripAndComputeConstantOffsets left off and accumulate the offsets. - Constant *LHSNoBound = stripAndComputeConstantOffsets(DL, LHS, true); - Constant *RHSNoBound = stripAndComputeConstantOffsets(DL, RHS, true); - if (LHS == RHS) - return ConstantExpr::getICmp(Pred, - ConstantExpr::getAdd(LHSOffset, LHSNoBound), - ConstantExpr::getAdd(RHSOffset, RHSNoBound)); - // If one side of the equality comparison must come from a noalias call // (meaning a system memory allocation function), and the other side must // come from a pointer that cannot overlap with dynamically-allocated @@ -2685,29 +2780,16 @@ computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, }; // Is the set of underlying objects all things which must be disjoint from - // noalias calls. For allocas, we consider only static ones (dynamic - // allocas might be transformed into calls to malloc not simultaneously - // live with the compared-to allocation). For globals, we exclude symbols - // that might be resolve lazily to symbols in another dynamically-loaded - // library (and, thus, could be malloc'ed by the implementation). + // noalias calls. We assume that indexing from such disjoint storage + // into the heap is undefined, and thus offsets can be safely ignored. auto IsAllocDisjoint = [](ArrayRef Objects) { - return all_of(Objects, [](const Value *V) { - if (const AllocaInst *AI = dyn_cast(V)) - return AI->getParent() && AI->getFunction() && AI->isStaticAlloca(); - if (const GlobalValue *GV = dyn_cast(V)) - return (GV->hasLocalLinkage() || GV->hasHiddenVisibility() || - GV->hasProtectedVisibility() || GV->hasGlobalUnnamedAddr()) && - !GV->isThreadLocal(); - if (const Argument *A = dyn_cast(V)) - return A->hasByValAttr(); - return false; - }); + return all_of(Objects, ::isAllocDisjoint); }; if ((IsNAC(LHSUObjs) && IsAllocDisjoint(RHSUObjs)) || (IsNAC(RHSUObjs) && IsAllocDisjoint(LHSUObjs))) - return ConstantInt::get(GetCompareTy(LHS), - !CmpInst::isTrueWhenEqual(Pred)); + return ConstantInt::get(getCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); // Fold comparisons for non-escaping pointer even if the allocation call // cannot be elided. We cannot fold malloc comparison to null. Also, the @@ -2724,7 +2806,7 @@ computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, // FIXME: We should also fold the compare when the pointer escapes, but the // compare dominates the pointer escape if (MI && !PointerMayBeCaptured(MI, true, true)) - return ConstantInt::get(GetCompareTy(LHS), + return ConstantInt::get(getCompareTy(LHS), CmpInst::isFalseWhenEqual(Pred)); } @@ -2735,7 +2817,7 @@ computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, /// Fold an icmp when its operands have i1 scalar type. static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q) { - Type *ITy = GetCompareTy(LHS); // The return type. + Type *ITy = getCompareTy(LHS); // The return type. Type *OpTy = LHS->getType(); // The operand type. if (!OpTy->isIntOrIntVectorTy(1)) return nullptr; @@ -2773,7 +2855,8 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, case CmpInst::ICMP_SLE: // X <=s 0 -> true return getTrue(ITy); - default: break; + default: + break; } } else if (match(RHS, m_One())) { switch (Pred) { @@ -2797,7 +2880,8 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, case CmpInst::ICMP_SGE: // X >=s -1 -> true return getTrue(ITy); - default: break; + default: + break; } } @@ -2805,7 +2889,7 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, default: break; case ICmpInst::ICMP_UGE: - if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false)) + if (isImpliedCondition(RHS, LHS, Q.DL).value_or(false)) return getTrue(ITy); break; case ICmpInst::ICMP_SGE: @@ -2816,11 +2900,11 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, /// 0 | 1 | 1 (0 >= -1) | 1 /// 1 | 0 | 0 (-1 >= 0) | 0 /// 1 | 1 | 1 (-1 >= -1) | 1 - if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false)) + if (isImpliedCondition(LHS, RHS, Q.DL).value_or(false)) return getTrue(ITy); break; case ICmpInst::ICMP_ULE: - if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false)) + if (isImpliedCondition(LHS, RHS, Q.DL).value_or(false)) return getTrue(ITy); break; } @@ -2834,7 +2918,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, if (!match(RHS, m_Zero())) return nullptr; - Type *ITy = GetCompareTy(LHS); // The return type. + Type *ITy = getCompareTy(LHS); // The return type. switch (Pred) { default: llvm_unreachable("Unknown ICmp predicate!"); @@ -2893,7 +2977,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const InstrInfoQuery &IIQ) { - Type *ITy = GetCompareTy(RHS); // The return type. + Type *ITy = getCompareTy(RHS); // The return type. Value *X; // Sign-bit checks can be optimized to true/false after unsigned @@ -2940,10 +3024,11 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, return nullptr; } -static Value *simplifyICmpWithBinOpOnLHS( - CmpInst::Predicate Pred, BinaryOperator *LBO, Value *RHS, - const SimplifyQuery &Q, unsigned MaxRecurse) { - Type *ITy = GetCompareTy(RHS); // The return type. +static Value *simplifyICmpWithBinOpOnLHS(CmpInst::Predicate Pred, + BinaryOperator *LBO, Value *RHS, + const SimplifyQuery &Q, + unsigned MaxRecurse) { + Type *ITy = getCompareTy(RHS); // The return type. Value *Y = nullptr; // icmp pred (or X, Y), X @@ -3078,7 +3163,6 @@ static Value *simplifyICmpWithBinOpOnLHS( return nullptr; } - // If only one of the icmp's operands has NSW flags, try to prove that: // // icmp slt (x + C1), (x +nsw C2) @@ -3113,7 +3197,6 @@ static bool trySimplifyICmpWithAdds(CmpInst::Predicate Pred, Value *LHS, (C2->slt(*C1) && C1->isNonPositive()); } - /// TODO: A large part of this logic is duplicated in InstCombine's /// foldICmpBinOp(). We should be able to share that and avoid the code /// duplication. @@ -3150,7 +3233,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. if ((A == RHS || B == RHS) && NoLHSWrapProblem) - if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A, + if (Value *V = simplifyICmpInst(Pred, A == RHS ? B : A, Constant::getNullValue(RHS->getType()), Q, MaxRecurse - 1)) return V; @@ -3158,7 +3241,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. if ((C == LHS || D == LHS) && NoRHSWrapProblem) if (Value *V = - SimplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()), + simplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()), C == LHS ? D : C, Q, MaxRecurse - 1)) return V; @@ -3186,7 +3269,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, Y = A; Z = C; } - if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse - 1)) + if (Value *V = simplifyICmpInst(Pred, Y, Z, Q, MaxRecurse - 1)) return V; } } @@ -3206,15 +3289,15 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, if (match(RHS, m_APInt(C))) { if (C->isStrictlyPositive()) { if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_NE) - return ConstantInt::getTrue(GetCompareTy(RHS)); + return ConstantInt::getTrue(getCompareTy(RHS)); if (Pred == ICmpInst::ICMP_SGE || Pred == ICmpInst::ICMP_EQ) - return ConstantInt::getFalse(GetCompareTy(RHS)); + return ConstantInt::getFalse(getCompareTy(RHS)); } if (C->isNonNegative()) { if (Pred == ICmpInst::ICMP_SLE) - return ConstantInt::getTrue(GetCompareTy(RHS)); + return ConstantInt::getTrue(getCompareTy(RHS)); if (Pred == ICmpInst::ICMP_SGT) - return ConstantInt::getFalse(GetCompareTy(RHS)); + return ConstantInt::getFalse(getCompareTy(RHS)); } } } @@ -3237,9 +3320,9 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, Q.IIQ.hasNoUnsignedWrap(cast(LBO)) || match(LHS, m_Shl(m_One(), m_Value())) || !C->isZero()) { if (Pred == ICmpInst::ICMP_EQ) - return ConstantInt::getFalse(GetCompareTy(RHS)); + return ConstantInt::getFalse(getCompareTy(RHS)); if (Pred == ICmpInst::ICMP_NE) - return ConstantInt::getTrue(GetCompareTy(RHS)); + return ConstantInt::getTrue(getCompareTy(RHS)); } } @@ -3248,9 +3331,9 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, // (1 << X) <=u 0x8000 --> true if (match(LHS, m_Shl(m_One(), m_Value())) && match(RHS, m_SignMask())) { if (Pred == ICmpInst::ICMP_UGT) - return ConstantInt::getFalse(GetCompareTy(RHS)); + return ConstantInt::getFalse(getCompareTy(RHS)); if (Pred == ICmpInst::ICMP_ULE) - return ConstantInt::getTrue(GetCompareTy(RHS)); + return ConstantInt::getTrue(getCompareTy(RHS)); } if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() && @@ -3263,22 +3346,22 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, if (ICmpInst::isSigned(Pred) || !Q.IIQ.isExact(LBO) || !Q.IIQ.isExact(RBO)) break; - if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(0), RBO->getOperand(0), Q, MaxRecurse - 1)) - return V; + return V; break; case Instruction::SDiv: if (!ICmpInst::isEquality(Pred) || !Q.IIQ.isExact(LBO) || !Q.IIQ.isExact(RBO)) break; - if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(0), RBO->getOperand(0), Q, MaxRecurse - 1)) return V; break; case Instruction::AShr: if (!Q.IIQ.isExact(LBO) || !Q.IIQ.isExact(RBO)) break; - if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(0), RBO->getOperand(0), Q, MaxRecurse - 1)) return V; break; @@ -3289,7 +3372,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, break; if (!NSW && ICmpInst::isSigned(Pred)) break; - if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(0), RBO->getOperand(0), Q, MaxRecurse - 1)) return V; break; @@ -3299,12 +3382,12 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, return nullptr; } -/// Simplify integer comparisons where at least one operand of the compare +/// simplify integer comparisons where at least one operand of the compare /// matches an integer min/max idiom. static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { - Type *ITy = GetCompareTy(LHS); // The return type. + Type *ITy = getCompareTy(LHS); // The return type. Value *A, *B; CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE; CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B". @@ -3349,13 +3432,13 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, case CmpInst::ICMP_SLE: // Equivalent to "A EqP B". This may be the same as the condition tested // in the max/min; if so, we can just return that. - if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B)) + if (Value *V = extractEquivalentCondition(LHS, EqP, A, B)) return V; - if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B)) + if (Value *V = extractEquivalentCondition(RHS, EqP, A, B)) return V; // Otherwise, see if "A EqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1)) + if (Value *V = simplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1)) return V; break; case CmpInst::ICMP_NE: @@ -3363,13 +3446,13 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP); // Equivalent to "A InvEqP B". This may be the same as the condition // tested in the max/min; if so, we can just return that. - if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B)) + if (Value *V = extractEquivalentCondition(LHS, InvEqP, A, B)) return V; - if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B)) + if (Value *V = extractEquivalentCondition(RHS, InvEqP, A, B)) return V; // Otherwise, see if "A InvEqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1)) + if (Value *V = simplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1)) return V; break; } @@ -3423,13 +3506,13 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, case CmpInst::ICMP_ULE: // Equivalent to "A EqP B". This may be the same as the condition tested // in the max/min; if so, we can just return that. - if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B)) + if (Value *V = extractEquivalentCondition(LHS, EqP, A, B)) return V; - if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B)) + if (Value *V = extractEquivalentCondition(RHS, EqP, A, B)) return V; // Otherwise, see if "A EqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1)) + if (Value *V = simplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1)) return V; break; case CmpInst::ICMP_NE: @@ -3437,13 +3520,13 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP); // Equivalent to "A InvEqP B". This may be the same as the condition // tested in the max/min; if so, we can just return that. - if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B)) + if (Value *V = extractEquivalentCondition(LHS, InvEqP, A, B)) return V; - if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B)) + if (Value *V = extractEquivalentCondition(RHS, InvEqP, A, B)) return V; // Otherwise, see if "A InvEqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1)) + if (Value *V = simplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1)) return V; break; } @@ -3499,11 +3582,10 @@ static Value *simplifyICmpWithDominatingAssume(CmpInst::Predicate Predicate, continue; CallInst *Assume = cast(AssumeVH); - if (Optional Imp = - isImpliedCondition(Assume->getArgOperand(0), Predicate, LHS, RHS, - Q.DL)) + if (Optional Imp = isImpliedCondition(Assume->getArgOperand(0), + Predicate, LHS, RHS, Q.DL)) if (isValidAssumeForContext(Assume, Q.CxtI, Q.DT)) - return ConstantInt::get(GetCompareTy(LHS), *Imp); + return ConstantInt::get(getCompareTy(LHS), *Imp); } } @@ -3512,7 +3594,7 @@ static Value *simplifyICmpWithDominatingAssume(CmpInst::Predicate Predicate, /// Given operands for an ICmpInst, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, +static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); @@ -3527,7 +3609,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } assert(!isa(LHS) && "Unexpected icmp undef,%X"); - Type *ITy = GetCompareTy(LHS); // The return type. + Type *ITy = getCompareTy(LHS); // The return type. // icmp poison, X -> poison if (isa(RHS)) @@ -3589,15 +3671,15 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { if (Constant *RHSC = dyn_cast(RHS)) { // Transfer the cast to the constant. - if (Value *V = SimplifyICmpInst(Pred, SrcOp, + if (Value *V = simplifyICmpInst(Pred, SrcOp, ConstantExpr::getIntToPtr(RHSC, SrcTy), - Q, MaxRecurse-1)) + Q, MaxRecurse - 1)) return V; } else if (PtrToIntInst *RI = dyn_cast(RHS)) { if (RI->getOperand(0)->getType() == SrcTy) // Compare without the cast. - if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), - Q, MaxRecurse-1)) + if (Value *V = simplifyICmpInst(Pred, SrcOp, RI->getOperand(0), Q, + MaxRecurse - 1)) return V; } } @@ -3608,9 +3690,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (ZExtInst *RI = dyn_cast(RHS)) { if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) // Compare X and Y. Note that signed predicates become unsigned. - if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), - SrcOp, RI->getOperand(0), Q, - MaxRecurse-1)) + if (Value *V = + simplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), SrcOp, + RI->getOperand(0), Q, MaxRecurse - 1)) return V; } // Fold (zext X) ule (sext X), (zext X) sge (sext X) to true. @@ -3633,15 +3715,16 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the re-extended constant didn't change then this is effectively // also a case of comparing two zero-extended values. if (RExt == CI && MaxRecurse) - if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), - SrcOp, Trunc, Q, MaxRecurse-1)) + if (Value *V = simplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), + SrcOp, Trunc, Q, MaxRecurse - 1)) return V; // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit // there. Use this to work out the result of the comparison. if (RExt != CI) { switch (Pred) { - default: llvm_unreachable("Unknown ICmp predicate!"); + default: + llvm_unreachable("Unknown ICmp predicate!"); // LHS getValue().isNegative() ? - ConstantInt::getTrue(CI->getContext()) : - ConstantInt::getFalse(CI->getContext()); + return CI->getValue().isNegative() + ? ConstantInt::getTrue(CI->getContext()) + : ConstantInt::getFalse(CI->getContext()); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - return CI->getValue().isNegative() ? - ConstantInt::getFalse(CI->getContext()) : - ConstantInt::getTrue(CI->getContext()); + return CI->getValue().isNegative() + ? ConstantInt::getFalse(CI->getContext()) + : ConstantInt::getTrue(CI->getContext()); } } } @@ -3677,8 +3760,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (SExtInst *RI = dyn_cast(RHS)) { if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) // Compare X and Y. Note that the predicate does not change. - if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), - Q, MaxRecurse-1)) + if (Value *V = simplifyICmpInst(Pred, SrcOp, RI->getOperand(0), Q, + MaxRecurse - 1)) return V; } // Fold (sext X) uge (zext X), (sext X) sle (zext X) to true. @@ -3701,14 +3784,16 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the re-extended constant didn't change then this is effectively // also a case of comparing two sign-extended values. if (RExt == CI && MaxRecurse) - if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1)) + if (Value *V = + simplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse - 1)) return V; // Otherwise the upper bits of LHS are all equal, while RHS has varying // bits there. Use this to work out the result of the comparison. if (RExt != CI) { switch (Pred) { - default: llvm_unreachable("Unknown ICmp predicate!"); + default: + llvm_unreachable("Unknown ICmp predicate!"); case ICmpInst::ICMP_EQ: return ConstantInt::getFalse(CI->getContext()); case ICmpInst::ICMP_NE: @@ -3718,14 +3803,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // LHS >s RHS. case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - return CI->getValue().isNegative() ? - ConstantInt::getTrue(CI->getContext()) : - ConstantInt::getFalse(CI->getContext()); + return CI->getValue().isNegative() + ? ConstantInt::getTrue(CI->getContext()) + : ConstantInt::getFalse(CI->getContext()); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - return CI->getValue().isNegative() ? - ConstantInt::getFalse(CI->getContext()) : - ConstantInt::getTrue(CI->getContext()); + return CI->getValue().isNegative() + ? ConstantInt::getFalse(CI->getContext()) + : ConstantInt::getTrue(CI->getContext()); // If LHS is non-negative then LHS u RHS. @@ -3733,18 +3818,18 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_UGE: // Comparison is true iff the LHS =s 0. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp, - Constant::getNullValue(SrcTy), - Q, MaxRecurse-1)) + if (Value *V = simplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp, + Constant::getNullValue(SrcTy), Q, + MaxRecurse - 1)) return V; break; } @@ -3788,26 +3873,26 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa(LHS) || isa(RHS)) - if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) + if (Value *V = threadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) return V; // If the comparison is with the result of a phi instruction, check whether // doing the compare with each incoming phi value yields a common result. if (isa(LHS) || isa(RHS)) - if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) + if (Value *V = threadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; return nullptr; } -Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, +Value *llvm::simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q) { - return ::SimplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit); + return ::simplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit); } /// Given operands for an FCmpInst, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, +static Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; @@ -3815,7 +3900,8 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Constant *CLHS = dyn_cast(LHS)) { if (Constant *CRHS = dyn_cast(RHS)) - return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI); + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI, + Q.CxtI); // If we have a constant, make sure it is on the RHS. std::swap(LHS, RHS); @@ -3823,7 +3909,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } // Fold trivial predicates. - Type *RetTy = GetCompareTy(LHS); + Type *RetTy = getCompareTy(LHS); if (Pred == FCmpInst::FCMP_FALSE) return getFalse(RetTy); if (Pred == FCmpInst::FCMP_TRUE) @@ -3943,23 +4029,29 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, // The ordered relationship and minnum/maxnum guarantee that we do not // have NaN constants, so ordered/unordered preds are handled the same. switch (Pred) { - case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_UEQ: + case FCmpInst::FCMP_OEQ: + case FCmpInst::FCMP_UEQ: // minnum(X, LesserC) == C --> false // maxnum(X, GreaterC) == C --> false return getFalse(RetTy); - case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_UNE: + case FCmpInst::FCMP_ONE: + case FCmpInst::FCMP_UNE: // minnum(X, LesserC) != C --> true // maxnum(X, GreaterC) != C --> true return getTrue(RetTy); - case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_UGE: - case FCmpInst::FCMP_OGT: case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_OGE: + case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_OGT: + case FCmpInst::FCMP_UGT: // minnum(X, LesserC) >= C --> false // minnum(X, LesserC) > C --> false // maxnum(X, GreaterC) >= C --> true // maxnum(X, GreaterC) > C --> true return ConstantInt::get(RetTy, IsMaxNum); - case FCmpInst::FCMP_OLE: case FCmpInst::FCMP_ULE: - case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_ULT: + case FCmpInst::FCMP_OLE: + case FCmpInst::FCMP_ULE: + case FCmpInst::FCMP_OLT: + case FCmpInst::FCMP_ULT: // minnum(X, LesserC) <= C --> true // minnum(X, LesserC) < C --> true // maxnum(X, GreaterC) <= C --> false @@ -3997,21 +4089,21 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa(LHS) || isa(RHS)) - if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) + if (Value *V = threadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) return V; // If the comparison is with the result of a phi instruction, check whether // doing the compare with each incoming phi value yields a common result. if (isa(LHS) || isa(RHS)) - if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) + if (Value *V = threadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; return nullptr; } -Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, +Value *llvm::simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit); + return ::simplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit); } static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, @@ -4078,22 +4170,21 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, }; if (auto *B = dyn_cast(I)) - return PreventSelfSimplify(SimplifyBinOp(B->getOpcode(), NewOps[0], + return PreventSelfSimplify(simplifyBinOp(B->getOpcode(), NewOps[0], NewOps[1], Q, MaxRecurse - 1)); if (CmpInst *C = dyn_cast(I)) - return PreventSelfSimplify(SimplifyCmpInst(C->getPredicate(), NewOps[0], + return PreventSelfSimplify(simplifyCmpInst(C->getPredicate(), NewOps[0], NewOps[1], Q, MaxRecurse - 1)); if (auto *GEP = dyn_cast(I)) - return PreventSelfSimplify(SimplifyGEPInst( + return PreventSelfSimplify(simplifyGEPInst( GEP->getSourceElementType(), NewOps[0], makeArrayRef(NewOps).slice(1), GEP->isInBounds(), Q, MaxRecurse - 1)); if (isa(I)) - return PreventSelfSimplify( - SimplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q, - MaxRecurse - 1)); + return PreventSelfSimplify(simplifySelectInst( + NewOps[0], NewOps[1], NewOps[2], Q, MaxRecurse - 1)); // TODO: We could hand off more cases to instsimplify here. } @@ -4119,14 +4210,6 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, if (!AllowRefinement && canCreatePoison(cast(I))) return nullptr; - if (CmpInst *C = dyn_cast(I)) - return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0], - ConstOps[1], Q.DL, Q.TLI); - - if (LoadInst *LI = dyn_cast(I)) - if (!LI->isVolatile()) - return ConstantFoldLoadFromConstPtr(ConstOps[0], LI->getType(), Q.DL); - return ConstantFoldInstOperands(I, ConstOps, Q.DL, Q.TLI); } @@ -4189,7 +4272,8 @@ static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *CmpRHS, /// Try to simplify a select instruction when its condition operand is an /// integer comparison. static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, - Value *FalseVal, const SimplifyQuery &Q, + Value *FalseVal, + const SimplifyQuery &Q, unsigned MaxRecurse) { ICmpInst::Predicate Pred; Value *CmpLHS, *CmpRHS; @@ -4209,7 +4293,8 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, Value *X, *Y; SelectPatternFlavor SPF = matchDecomposedSelectPattern(cast(CondVal), TrueVal, FalseVal, - X, Y).Flavor; + X, Y) + .Flavor; if (SelectPatternResult::isMinOrMax(SPF) && Pred == getMinMaxPred(SPF)) { APInt LimitC = getMinMaxLimit(getInverseMinMaxFlavor(SPF), X->getType()->getScalarSizeInBits()); @@ -4261,8 +4346,8 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, } // Check for other compares that behave like bit test. - if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, CmpRHS, Pred, - TrueVal, FalseVal)) + if (Value *V = + simplifySelectWithFakeICmpEq(CmpLHS, CmpRHS, Pred, TrueVal, FalseVal)) return V; // If we have a scalar equality comparison, then we know the value in one of @@ -4272,18 +4357,18 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, // because each element of a vector select is chosen independently. if (Pred == ICmpInst::ICMP_EQ && !CondVal->getType()->isVectorTy()) { if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, - /* AllowRefinement */ false, MaxRecurse) == - TrueVal || + /* AllowRefinement */ false, + MaxRecurse) == TrueVal || simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, - /* AllowRefinement */ false, MaxRecurse) == - TrueVal) + /* AllowRefinement */ false, + MaxRecurse) == TrueVal) return FalseVal; if (simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, - /* AllowRefinement */ true, MaxRecurse) == - FalseVal || + /* AllowRefinement */ true, + MaxRecurse) == FalseVal || simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, - /* AllowRefinement */ true, MaxRecurse) == - FalseVal) + /* AllowRefinement */ true, + MaxRecurse) == FalseVal) return FalseVal; } @@ -4302,11 +4387,11 @@ static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F, // This transform is safe if we do not have (do not care about) -0.0 or if // at least one operand is known to not be -0.0. Otherwise, the select can // change the sign of a zero operand. - bool HasNoSignedZeros = Q.CxtI && isa(Q.CxtI) && - Q.CxtI->hasNoSignedZeros(); + bool HasNoSignedZeros = + Q.CxtI && isa(Q.CxtI) && Q.CxtI->hasNoSignedZeros(); const APFloat *C; if (HasNoSignedZeros || (match(T, m_APFloat(C)) && C->isNonZero()) || - (match(F, m_APFloat(C)) && C->isNonZero())) { + (match(F, m_APFloat(C)) && C->isNonZero())) { // (T == F) ? T : F --> F // (F == T) ? T : F --> F if (Pred == FCmpInst::FCMP_OEQ) @@ -4323,7 +4408,7 @@ static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F, /// Given operands for a SelectInst, see if we can fold the result. /// If not, this returns null. -static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, +static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const SimplifyQuery &Q, unsigned MaxRecurse) { if (auto *CondC = dyn_cast(Cond)) { if (auto *TrueC = dyn_cast(TrueVal)) @@ -4439,14 +4524,14 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, return nullptr; } -Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, +Value *llvm::simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const SimplifyQuery &Q) { - return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit); + return ::simplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit); } /// Given operands for an GetElementPtrInst, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyGEPInst(Type *SrcTy, Value *Ptr, +static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef Indices, bool InBounds, const SimplifyQuery &Q, unsigned) { // The type of the GEP pointer operand. @@ -4473,6 +4558,13 @@ static Value *SimplifyGEPInst(Type *SrcTy, Value *Ptr, } } + // For opaque pointers an all-zero GEP is a no-op. For typed pointers, + // it may be equivalent to a bitcast. + if (Ptr->getType()->getScalarType()->isOpaquePointerTy() && + Ptr->getType() == GEPTy && + all_of(Indices, [](const auto *V) { return match(V, m_Zero()); })) + return Ptr; + // getelementptr poison, idx -> poison // getelementptr baseptr, poison -> poison if (isa(Ptr) || @@ -4577,16 +4669,16 @@ static Value *SimplifyGEPInst(Type *SrcTy, Value *Ptr, return ConstantFoldConstant(CE, Q.DL); } -Value *llvm::SimplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef Indices, +Value *llvm::simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef Indices, bool InBounds, const SimplifyQuery &Q) { - return ::SimplifyGEPInst(SrcTy, Ptr, Indices, InBounds, Q, RecursionLimit); + return ::simplifyGEPInst(SrcTy, Ptr, Indices, InBounds, Q, RecursionLimit); } /// Given operands for an InsertValueInst, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, - ArrayRef Idxs, const SimplifyQuery &Q, - unsigned) { +static Value *simplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef Idxs, + const SimplifyQuery &Q, unsigned) { if (Constant *CAgg = dyn_cast(Agg)) if (Constant *CVal = dyn_cast(Val)) return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs); @@ -4611,13 +4703,13 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, return nullptr; } -Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, +Value *llvm::simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef Idxs, const SimplifyQuery &Q) { - return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit); + return ::simplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit); } -Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx, +Value *llvm::simplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx, const SimplifyQuery &Q) { // Try to constant fold. auto *VecC = dyn_cast(Vec); @@ -4654,7 +4746,7 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx, /// Given operands for an ExtractValueInst, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, +static Value *simplifyExtractValueInst(Value *Agg, ArrayRef Idxs, const SimplifyQuery &, unsigned) { if (auto *CAgg = dyn_cast(Agg)) return ConstantFoldExtractValueInstruction(CAgg, Idxs); @@ -4677,14 +4769,14 @@ static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, return nullptr; } -Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, +Value *llvm::simplifyExtractValueInst(Value *Agg, ArrayRef Idxs, const SimplifyQuery &Q) { - return ::SimplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit); + return ::simplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit); } /// Given operands for an ExtractElementInst, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, +static Value *simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q, unsigned) { auto *VecVTy = cast(Vec->getType()); if (auto *CVec = dyn_cast(Vec)) { @@ -4721,13 +4813,13 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, return nullptr; } -Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx, +Value *llvm::simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q) { - return ::SimplifyExtractElementInst(Vec, Idx, Q, RecursionLimit); + return ::simplifyExtractElementInst(Vec, Idx, Q, RecursionLimit); } /// See if we can fold the given phi. If not, returns null. -static Value *SimplifyPHINode(PHINode *PN, ArrayRef IncomingValues, +static Value *simplifyPHINode(PHINode *PN, ArrayRef IncomingValues, const SimplifyQuery &Q) { // WARNING: no matter how worthwhile it may seem, we can not perform PHI CSE // here, because the PHI we may succeed simplifying to was not @@ -4739,14 +4831,15 @@ static Value *SimplifyPHINode(PHINode *PN, ArrayRef IncomingValues, bool HasUndefInput = false; for (Value *Incoming : IncomingValues) { // If the incoming value is the phi node itself, it can safely be skipped. - if (Incoming == PN) continue; + if (Incoming == PN) + continue; if (Q.isUndefValue(Incoming)) { // Remember that we saw an undef value, but otherwise ignore them. HasUndefInput = true; continue; } if (CommonValue && Incoming != CommonValue) - return nullptr; // Not the same, bail out. + return nullptr; // Not the same, bail out. CommonValue = Incoming; } @@ -4755,17 +4848,24 @@ static Value *SimplifyPHINode(PHINode *PN, ArrayRef IncomingValues, if (!CommonValue) return UndefValue::get(PN->getType()); - // If we have a PHI node like phi(X, undef, X), where X is defined by some - // instruction, we cannot return X as the result of the PHI node unless it - // dominates the PHI block. - if (HasUndefInput) + if (HasUndefInput) { + // We cannot start executing a trapping constant expression on more control + // flow paths. + auto *C = dyn_cast(CommonValue); + if (C && C->canTrap()) + return nullptr; + + // If we have a PHI node like phi(X, undef, X), where X is defined by some + // instruction, we cannot return X as the result of the PHI node unless it + // dominates the PHI block. return valueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr; + } return CommonValue; } -static Value *SimplifyCastInst(unsigned CastOpc, Value *Op, - Type *Ty, const SimplifyQuery &Q, unsigned MaxRecurse) { +static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, + const SimplifyQuery &Q, unsigned MaxRecurse) { if (auto *C = dyn_cast(Op)) return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL); @@ -4798,9 +4898,9 @@ static Value *SimplifyCastInst(unsigned CastOpc, Value *Op, return nullptr; } -Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, +Value *llvm::simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const SimplifyQuery &Q) { - return ::SimplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit); + return ::simplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit); } /// For the given destination element of a shuffle, peek through shuffles to @@ -4854,7 +4954,7 @@ static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, return RootVec; } -static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, +static Value *simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef Mask, Type *RetTy, const SimplifyQuery &Q, unsigned MaxRecurse) { @@ -4970,14 +5070,14 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, } /// Given operands for a ShuffleVectorInst, fold the result or return null. -Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, +Value *llvm::simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef Mask, Type *RetTy, const SimplifyQuery &Q) { - return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); + return ::simplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); } -static Constant *foldConstant(Instruction::UnaryOps Opcode, - Value *&Op, const SimplifyQuery &Q) { +static Constant *foldConstant(Instruction::UnaryOps Opcode, Value *&Op, + const SimplifyQuery &Q) { if (auto *C = dyn_cast(Op)) return ConstantFoldUnaryOpOperand(Opcode, C, Q.DL); return nullptr; @@ -4998,7 +5098,7 @@ static Value *simplifyFNegInst(Value *Op, FastMathFlags FMF, return nullptr; } -Value *llvm::SimplifyFNegInst(Value *Op, FastMathFlags FMF, +Value *llvm::simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q) { return ::simplifyFNegInst(Op, FMF, Q, RecursionLimit); } @@ -5049,15 +5149,10 @@ static Constant *simplifyFPOp(ArrayRef Ops, FastMathFlags FMF, return nullptr; } -// TODO: Move this out to a header file: -static inline bool canIgnoreSNaN(fp::ExceptionBehavior EB, FastMathFlags FMF) { - return (EB == fp::ebIgnore || FMF.noNaNs()); -} - /// Given operands for an FAdd, see if we can fold the result. If not, this /// returns null. static Value * -SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, +simplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse, fp::ExceptionBehavior ExBehavior = fp::ebIgnore, RoundingMode Rounding = RoundingMode::NearestTiesToEven) { @@ -5119,7 +5214,7 @@ SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, /// Given operands for an FSub, see if we can fold the result. If not, this /// returns null. static Value * -SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, +simplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse, fp::ExceptionBehavior ExBehavior = fp::ebIgnore, RoundingMode Rounding = RoundingMode::NearestTiesToEven) { @@ -5130,24 +5225,28 @@ SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q, ExBehavior, Rounding)) return C; - if (!isDefaultFPEnvironment(ExBehavior, Rounding)) - return nullptr; - // fsub X, +0 ==> X - if (match(Op1, m_PosZeroFP())) - return Op0; + if (canIgnoreSNaN(ExBehavior, FMF) && + (!canRoundingModeBe(Rounding, RoundingMode::TowardNegative) || + FMF.noSignedZeros())) + if (match(Op1, m_PosZeroFP())) + return Op0; // fsub X, -0 ==> X, when we know X is not -0 - if (match(Op1, m_NegZeroFP()) && - (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI))) - return Op0; + if (canIgnoreSNaN(ExBehavior, FMF)) + if (match(Op1, m_NegZeroFP()) && + (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI))) + return Op0; // fsub -0.0, (fsub -0.0, X) ==> X // fsub -0.0, (fneg X) ==> X Value *X; - if (match(Op0, m_NegZeroFP()) && - match(Op1, m_FNeg(m_Value(X)))) - return X; + if (canIgnoreSNaN(ExBehavior, FMF)) + if (match(Op0, m_NegZeroFP()) && match(Op1, m_FNeg(m_Value(X)))) + return X; + + if (!isDefaultFPEnvironment(ExBehavior, Rounding)) + return nullptr; // fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored. // fsub 0.0, (fneg X) ==> X if signed zeros are ignored. @@ -5170,7 +5269,7 @@ SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, return nullptr; } -static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, +static Value *simplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse, fp::ExceptionBehavior ExBehavior, RoundingMode Rounding) { @@ -5201,8 +5300,8 @@ static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, // 2. Ignore non-zero negative numbers because sqrt would produce NAN. // 3. Ignore -0.0 because sqrt(-0.0) == -0.0, but -0.0 * -0.0 == 0.0. Value *X; - if (Op0 == Op1 && match(Op0, m_Intrinsic(m_Value(X))) && - FMF.allowReassoc() && FMF.noNaNs() && FMF.noSignedZeros()) + if (Op0 == Op1 && match(Op0, m_Sqrt(m_Value(X))) && FMF.allowReassoc() && + FMF.noNaNs() && FMF.noSignedZeros()) return X; return nullptr; @@ -5210,7 +5309,7 @@ static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, /// Given the operands for an FMul, see if we can fold the result static Value * -SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, +simplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse, fp::ExceptionBehavior ExBehavior = fp::ebIgnore, RoundingMode Rounding = RoundingMode::NearestTiesToEven) { @@ -5219,43 +5318,43 @@ SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, return C; // Now apply simplifications that do not require rounding. - return SimplifyFMAFMul(Op0, Op1, FMF, Q, MaxRecurse, ExBehavior, Rounding); + return simplifyFMAFMul(Op0, Op1, FMF, Q, MaxRecurse, ExBehavior, Rounding); } -Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, +Value *llvm::simplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior, RoundingMode Rounding) { - return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior, + return ::simplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior, Rounding); } -Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, +Value *llvm::simplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior, RoundingMode Rounding) { - return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior, + return ::simplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior, Rounding); } -Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, +Value *llvm::simplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior, RoundingMode Rounding) { - return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior, + return ::simplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior, Rounding); } -Value *llvm::SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, +Value *llvm::simplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior, RoundingMode Rounding) { - return ::SimplifyFMAFMul(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior, + return ::simplifyFMAFMul(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior, Rounding); } static Value * -SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, +simplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned, fp::ExceptionBehavior ExBehavior = fp::ebIgnore, RoundingMode Rounding = RoundingMode::NearestTiesToEven) { @@ -5301,16 +5400,16 @@ SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, return nullptr; } -Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, +Value *llvm::simplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior, RoundingMode Rounding) { - return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior, + return ::simplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior, Rounding); } static Value * -SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, +simplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned, fp::ExceptionBehavior ExBehavior = fp::ebIgnore, RoundingMode Rounding = RoundingMode::NearestTiesToEven) { @@ -5339,11 +5438,11 @@ SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, return nullptr; } -Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, +Value *llvm::simplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior, RoundingMode Rounding) { - return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior, + return ::simplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior, Rounding); } @@ -5365,8 +5464,8 @@ static Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q, /// If not, this returns null. /// Try to use FastMathFlags when folding the result. static Value *simplifyFPUnOp(unsigned Opcode, Value *Op, - const FastMathFlags &FMF, - const SimplifyQuery &Q, unsigned MaxRecurse) { + const FastMathFlags &FMF, const SimplifyQuery &Q, + unsigned MaxRecurse) { switch (Opcode) { case Instruction::FNeg: return simplifyFNegInst(Op, FMF, Q, MaxRecurse); @@ -5375,56 +5474,56 @@ static Value *simplifyFPUnOp(unsigned Opcode, Value *Op, } } -Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) { +Value *llvm::simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) { return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit); } -Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, +Value *llvm::simplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, const SimplifyQuery &Q) { return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit); } /// Given operands for a BinaryOperator, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, +static Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { switch (Opcode) { case Instruction::Add: - return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse); + return simplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse); case Instruction::Sub: - return SimplifySubInst(LHS, RHS, false, false, Q, MaxRecurse); + return simplifySubInst(LHS, RHS, false, false, Q, MaxRecurse); case Instruction::Mul: - return SimplifyMulInst(LHS, RHS, Q, MaxRecurse); + return simplifyMulInst(LHS, RHS, Q, MaxRecurse); case Instruction::SDiv: - return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); + return simplifySDivInst(LHS, RHS, Q, MaxRecurse); case Instruction::UDiv: - return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); + return simplifyUDivInst(LHS, RHS, Q, MaxRecurse); case Instruction::SRem: - return SimplifySRemInst(LHS, RHS, Q, MaxRecurse); + return simplifySRemInst(LHS, RHS, Q, MaxRecurse); case Instruction::URem: - return SimplifyURemInst(LHS, RHS, Q, MaxRecurse); + return simplifyURemInst(LHS, RHS, Q, MaxRecurse); case Instruction::Shl: - return SimplifyShlInst(LHS, RHS, false, false, Q, MaxRecurse); + return simplifyShlInst(LHS, RHS, false, false, Q, MaxRecurse); case Instruction::LShr: - return SimplifyLShrInst(LHS, RHS, false, Q, MaxRecurse); + return simplifyLShrInst(LHS, RHS, false, Q, MaxRecurse); case Instruction::AShr: - return SimplifyAShrInst(LHS, RHS, false, Q, MaxRecurse); + return simplifyAShrInst(LHS, RHS, false, Q, MaxRecurse); case Instruction::And: - return SimplifyAndInst(LHS, RHS, Q, MaxRecurse); + return simplifyAndInst(LHS, RHS, Q, MaxRecurse); case Instruction::Or: - return SimplifyOrInst(LHS, RHS, Q, MaxRecurse); + return simplifyOrInst(LHS, RHS, Q, MaxRecurse); case Instruction::Xor: - return SimplifyXorInst(LHS, RHS, Q, MaxRecurse); + return simplifyXorInst(LHS, RHS, Q, MaxRecurse); case Instruction::FAdd: - return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + return simplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::FSub: - return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + return simplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::FMul: - return SimplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + return simplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::FDiv: - return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + return simplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::FRem: - return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + return simplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); default: llvm_unreachable("Unexpected opcode"); } @@ -5433,49 +5532,50 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// Given operands for a BinaryOperator, see if we can fold the result. /// If not, this returns null. /// Try to use FastMathFlags when folding the result. -static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, +static Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const FastMathFlags &FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { switch (Opcode) { case Instruction::FAdd: - return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse); + return simplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse); case Instruction::FSub: - return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse); + return simplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse); case Instruction::FMul: - return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse); + return simplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse); case Instruction::FDiv: - return SimplifyFDivInst(LHS, RHS, FMF, Q, MaxRecurse); + return simplifyFDivInst(LHS, RHS, FMF, Q, MaxRecurse); default: - return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse); + return simplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse); } } -Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, +Value *llvm::simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q) { - return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit); + return ::simplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit); } -Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, +Value *llvm::simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q) { - return ::SimplifyBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); + return ::simplifyBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); } /// Given operands for a CmpInst, see if we can fold the result. -static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, +static Value *simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) - return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse); - return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse); + return simplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse); + return simplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse); } -Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, +Value *llvm::simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q) { - return ::SimplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit); + return ::simplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit); } -static bool IsIdempotent(Intrinsic::ID ID) { +static bool isIdempotent(Intrinsic::ID ID) { switch (ID) { - default: return false; + default: + return false; // Unary idempotent: f(f(x)) = f(x) case Intrinsic::fabs: @@ -5491,7 +5591,7 @@ static bool IsIdempotent(Intrinsic::ID ID) { } } -static Value *SimplifyRelativeLoad(Constant *Ptr, Constant *Offset, +static Value *simplifyRelativeLoad(Constant *Ptr, Constant *Offset, const DataLayout &DL) { GlobalValue *PtrSym; APInt PtrOffset; @@ -5551,7 +5651,7 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, const SimplifyQuery &Q) { // Idempotent functions return the same result when called repeatedly. Intrinsic::ID IID = F->getIntrinsicID(); - if (IsIdempotent(IID)) + if (isIdempotent(IID)) if (auto *II = dyn_cast(Op0)) if (II->getIntrinsicID() == IID) return II; @@ -5559,15 +5659,18 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, Value *X; switch (IID) { case Intrinsic::fabs: - if (SignBitMustBeZero(Op0, Q.TLI)) return Op0; + if (SignBitMustBeZero(Op0, Q.TLI)) + return Op0; break; case Intrinsic::bswap: // bswap(bswap(x)) -> x - if (match(Op0, m_BSwap(m_Value(X)))) return X; + if (match(Op0, m_BSwap(m_Value(X)))) + return X; break; case Intrinsic::bitreverse: // bitreverse(bitreverse(x)) -> x - if (match(Op0, m_BitReverse(m_Value(X)))) return X; + if (match(Op0, m_BitReverse(m_Value(X)))) + return X; break; case Intrinsic::ctpop: { // If everything but the lowest bit is zero, that bit is the pop-count. Ex: @@ -5581,30 +5684,34 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, case Intrinsic::exp: // exp(log(x)) -> x if (Q.CxtI->hasAllowReassoc() && - match(Op0, m_Intrinsic(m_Value(X)))) return X; + match(Op0, m_Intrinsic(m_Value(X)))) + return X; break; case Intrinsic::exp2: // exp2(log2(x)) -> x if (Q.CxtI->hasAllowReassoc() && - match(Op0, m_Intrinsic(m_Value(X)))) return X; + match(Op0, m_Intrinsic(m_Value(X)))) + return X; break; case Intrinsic::log: // log(exp(x)) -> x if (Q.CxtI->hasAllowReassoc() && - match(Op0, m_Intrinsic(m_Value(X)))) return X; + match(Op0, m_Intrinsic(m_Value(X)))) + return X; break; case Intrinsic::log2: // log2(exp2(x)) -> x if (Q.CxtI->hasAllowReassoc() && (match(Op0, m_Intrinsic(m_Value(X))) || - match(Op0, m_Intrinsic(m_SpecificFP(2.0), - m_Value(X))))) return X; + match(Op0, + m_Intrinsic(m_SpecificFP(2.0), m_Value(X))))) + return X; break; case Intrinsic::log10: // log10(pow(10.0, x)) -> x if (Q.CxtI->hasAllowReassoc() && - match(Op0, m_Intrinsic(m_SpecificFP(10.0), - m_Value(X)))) return X; + match(Op0, m_Intrinsic(m_SpecificFP(10.0), m_Value(X)))) + return X; break; case Intrinsic::floor: case Intrinsic::trunc: @@ -5826,7 +5933,7 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, case Intrinsic::load_relative: if (auto *C0 = dyn_cast(Op0)) if (auto *C1 = dyn_cast(Op1)) - return SimplifyRelativeLoad(C0, C1, Q.DL); + return simplifyRelativeLoad(C0, C1, Q.DL); break; case Intrinsic::powi: if (auto *Power = dyn_cast(Op1)) { @@ -5853,7 +5960,8 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, case Intrinsic::maximum: case Intrinsic::minimum: { // If the arguments are the same, this is a no-op. - if (Op0 == Op1) return Op0; + if (Op0 == Op1) + return Op0; // Canonicalize constant operand as Op1. if (isa(Op0)) @@ -5906,14 +6014,14 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, break; } - case Intrinsic::experimental_vector_extract: { + case Intrinsic::vector_extract: { Type *ReturnType = F->getReturnType(); // (extract_vector (insert_vector _, X, 0), 0) -> X unsigned IdxN = cast(Op1)->getZExtValue(); Value *X = nullptr; - if (match(Op0, m_Intrinsic( - m_Value(), m_Value(X), m_Zero())) && + if (match(Op0, m_Intrinsic(m_Value(), m_Value(X), + m_Zero())) && IdxN == 0 && X->getType() == ReturnType) return X; @@ -6054,7 +6162,7 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { return nullptr; } - case Intrinsic::experimental_vector_insert: { + case Intrinsic::vector_insert: { Value *Vec = Call->getArgOperand(0); Value *SubVec = Call->getArgOperand(1); Value *Idx = Call->getArgOperand(2); @@ -6064,8 +6172,8 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { // where: Y is X, or Y is undef unsigned IdxN = cast(Idx)->getZExtValue(); Value *X = nullptr; - if (match(SubVec, m_Intrinsic( - m_Value(X), m_Zero())) && + if (match(SubVec, + m_Intrinsic(m_Value(X), m_Zero())) && (Q.isUndefValue(Vec) || Vec == X) && IdxN == 0 && X->getType() == ReturnType) return X; @@ -6074,43 +6182,38 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } case Intrinsic::experimental_constrained_fadd: { auto *FPI = cast(Call); - return SimplifyFAddInst(FPI->getArgOperand(0), FPI->getArgOperand(1), + return simplifyFAddInst(FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), Q, FPI->getExceptionBehavior().getValue(), FPI->getRoundingMode().getValue()); - break; } case Intrinsic::experimental_constrained_fsub: { auto *FPI = cast(Call); - return SimplifyFSubInst(FPI->getArgOperand(0), FPI->getArgOperand(1), + return simplifyFSubInst(FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), Q, FPI->getExceptionBehavior().getValue(), FPI->getRoundingMode().getValue()); - break; } case Intrinsic::experimental_constrained_fmul: { auto *FPI = cast(Call); - return SimplifyFMulInst(FPI->getArgOperand(0), FPI->getArgOperand(1), + return simplifyFMulInst(FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), Q, FPI->getExceptionBehavior().getValue(), FPI->getRoundingMode().getValue()); - break; } case Intrinsic::experimental_constrained_fdiv: { auto *FPI = cast(Call); - return SimplifyFDivInst(FPI->getArgOperand(0), FPI->getArgOperand(1), + return simplifyFDivInst(FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), Q, FPI->getExceptionBehavior().getValue(), FPI->getRoundingMode().getValue()); - break; } case Intrinsic::experimental_constrained_frem: { auto *FPI = cast(Call); - return SimplifyFRemInst(FPI->getArgOperand(0), FPI->getArgOperand(1), + return simplifyFRemInst(FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), Q, FPI->getExceptionBehavior().getValue(), FPI->getRoundingMode().getValue()); - break; } default: return nullptr; @@ -6138,7 +6241,7 @@ static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) { return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI); } -Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) { +Value *llvm::simplifyCall(CallBase *Call, const SimplifyQuery &Q) { // musttail calls can only be simplified if they are also DCEd. // As we can't guarantee this here, don't simplify them. if (Call->isMustTailCall()) @@ -6161,8 +6264,17 @@ Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) { return nullptr; } +Value *llvm::simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q) { + assert(isa(Call)); + if (Value *V = tryConstantFoldCall(Call, Q)) + return V; + if (Value *Ret = simplifyIntrinsic(Call, Q)) + return Ret; + return nullptr; +} + /// Given operands for a Freeze, see if we can fold the result. -static Value *SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) { +static Value *simplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) { // Use a utility function defined in ValueTracking. if (llvm::isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT)) return Op0; @@ -6170,11 +6282,11 @@ static Value *SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) { return nullptr; } -Value *llvm::SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) { - return ::SimplifyFreezeInst(Op0, Q); +Value *llvm::simplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) { + return ::simplifyFreezeInst(Op0, Q); } -static Value *SimplifyLoadInst(LoadInst *LI, Value *PtrOp, +static Value *simplifyLoadInst(LoadInst *LI, Value *PtrOp, const SimplifyQuery &Q) { if (LI->isVolatile()) return nullptr; @@ -6218,134 +6330,134 @@ static Value *simplifyInstructionWithOperands(Instruction *I, } break; case Instruction::FNeg: - Result = SimplifyFNegInst(NewOps[0], I->getFastMathFlags(), Q); + Result = simplifyFNegInst(NewOps[0], I->getFastMathFlags(), Q); break; case Instruction::FAdd: - Result = SimplifyFAddInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q); + Result = simplifyFAddInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q); break; case Instruction::Add: - Result = SimplifyAddInst( + Result = simplifyAddInst( NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast(I)), Q.IIQ.hasNoUnsignedWrap(cast(I)), Q); break; case Instruction::FSub: - Result = SimplifyFSubInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q); + Result = simplifyFSubInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q); break; case Instruction::Sub: - Result = SimplifySubInst( + Result = simplifySubInst( NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast(I)), Q.IIQ.hasNoUnsignedWrap(cast(I)), Q); break; case Instruction::FMul: - Result = SimplifyFMulInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q); + Result = simplifyFMulInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q); break; case Instruction::Mul: - Result = SimplifyMulInst(NewOps[0], NewOps[1], Q); + Result = simplifyMulInst(NewOps[0], NewOps[1], Q); break; case Instruction::SDiv: - Result = SimplifySDivInst(NewOps[0], NewOps[1], Q); + Result = simplifySDivInst(NewOps[0], NewOps[1], Q); break; case Instruction::UDiv: - Result = SimplifyUDivInst(NewOps[0], NewOps[1], Q); + Result = simplifyUDivInst(NewOps[0], NewOps[1], Q); break; case Instruction::FDiv: - Result = SimplifyFDivInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q); + Result = simplifyFDivInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q); break; case Instruction::SRem: - Result = SimplifySRemInst(NewOps[0], NewOps[1], Q); + Result = simplifySRemInst(NewOps[0], NewOps[1], Q); break; case Instruction::URem: - Result = SimplifyURemInst(NewOps[0], NewOps[1], Q); + Result = simplifyURemInst(NewOps[0], NewOps[1], Q); break; case Instruction::FRem: - Result = SimplifyFRemInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q); + Result = simplifyFRemInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q); break; case Instruction::Shl: - Result = SimplifyShlInst( + Result = simplifyShlInst( NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast(I)), Q.IIQ.hasNoUnsignedWrap(cast(I)), Q); break; case Instruction::LShr: - Result = SimplifyLShrInst(NewOps[0], NewOps[1], + Result = simplifyLShrInst(NewOps[0], NewOps[1], Q.IIQ.isExact(cast(I)), Q); break; case Instruction::AShr: - Result = SimplifyAShrInst(NewOps[0], NewOps[1], + Result = simplifyAShrInst(NewOps[0], NewOps[1], Q.IIQ.isExact(cast(I)), Q); break; case Instruction::And: - Result = SimplifyAndInst(NewOps[0], NewOps[1], Q); + Result = simplifyAndInst(NewOps[0], NewOps[1], Q); break; case Instruction::Or: - Result = SimplifyOrInst(NewOps[0], NewOps[1], Q); + Result = simplifyOrInst(NewOps[0], NewOps[1], Q); break; case Instruction::Xor: - Result = SimplifyXorInst(NewOps[0], NewOps[1], Q); + Result = simplifyXorInst(NewOps[0], NewOps[1], Q); break; case Instruction::ICmp: - Result = SimplifyICmpInst(cast(I)->getPredicate(), NewOps[0], + Result = simplifyICmpInst(cast(I)->getPredicate(), NewOps[0], NewOps[1], Q); break; case Instruction::FCmp: - Result = SimplifyFCmpInst(cast(I)->getPredicate(), NewOps[0], + Result = simplifyFCmpInst(cast(I)->getPredicate(), NewOps[0], NewOps[1], I->getFastMathFlags(), Q); break; case Instruction::Select: - Result = SimplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q); + Result = simplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q); break; case Instruction::GetElementPtr: { auto *GEPI = cast(I); Result = - SimplifyGEPInst(GEPI->getSourceElementType(), NewOps[0], + simplifyGEPInst(GEPI->getSourceElementType(), NewOps[0], makeArrayRef(NewOps).slice(1), GEPI->isInBounds(), Q); break; } case Instruction::InsertValue: { InsertValueInst *IV = cast(I); - Result = SimplifyInsertValueInst(NewOps[0], NewOps[1], IV->getIndices(), Q); + Result = simplifyInsertValueInst(NewOps[0], NewOps[1], IV->getIndices(), Q); break; } case Instruction::InsertElement: { - Result = SimplifyInsertElementInst(NewOps[0], NewOps[1], NewOps[2], Q); + Result = simplifyInsertElementInst(NewOps[0], NewOps[1], NewOps[2], Q); break; } case Instruction::ExtractValue: { auto *EVI = cast(I); - Result = SimplifyExtractValueInst(NewOps[0], EVI->getIndices(), Q); + Result = simplifyExtractValueInst(NewOps[0], EVI->getIndices(), Q); break; } case Instruction::ExtractElement: { - Result = SimplifyExtractElementInst(NewOps[0], NewOps[1], Q); + Result = simplifyExtractElementInst(NewOps[0], NewOps[1], Q); break; } case Instruction::ShuffleVector: { auto *SVI = cast(I); - Result = SimplifyShuffleVectorInst( + Result = simplifyShuffleVectorInst( NewOps[0], NewOps[1], SVI->getShuffleMask(), SVI->getType(), Q); break; } case Instruction::PHI: - Result = SimplifyPHINode(cast(I), NewOps, Q); + Result = simplifyPHINode(cast(I), NewOps, Q); break; case Instruction::Call: { // TODO: Use NewOps - Result = SimplifyCall(cast(I), Q); + Result = simplifyCall(cast(I), Q); break; } case Instruction::Freeze: - Result = llvm::SimplifyFreezeInst(NewOps[0], Q); + Result = llvm::simplifyFreezeInst(NewOps[0], Q); break; #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: #include "llvm/IR/Instruction.def" #undef HANDLE_CAST_INST - Result = SimplifyCastInst(I->getOpcode(), NewOps[0], I->getType(), Q); + Result = simplifyCastInst(I->getOpcode(), NewOps[0], I->getType(), Q); break; case Instruction::Alloca: // No simplifications for Alloca and it can't be constant folded. Result = nullptr; break; case Instruction::Load: - Result = SimplifyLoadInst(cast(I), NewOps[0], Q); + Result = simplifyLoadInst(cast(I), NewOps[0], Q); break; } @@ -6355,7 +6467,7 @@ static Value *simplifyInstructionWithOperands(Instruction *I, return Result == I ? UndefValue::get(I->getType()) : Result; } -Value *llvm::SimplifyInstructionWithOperands(Instruction *I, +Value *llvm::simplifyInstructionWithOperands(Instruction *I, ArrayRef NewOps, const SimplifyQuery &SQ, OptimizationRemarkEmitter *ORE) { @@ -6364,7 +6476,7 @@ Value *llvm::SimplifyInstructionWithOperands(Instruction *I, return ::simplifyInstructionWithOperands(I, NewOps, SQ, ORE); } -Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, +Value *llvm::simplifyInstruction(Instruction *I, const SimplifyQuery &SQ, OptimizationRemarkEmitter *ORE) { SmallVector Ops(I->operands()); return ::simplifyInstructionWithOperands(I, Ops, SQ, ORE); @@ -6415,7 +6527,7 @@ static bool replaceAndRecursivelySimplifyImpl( I = Worklist[Idx]; // See if this instruction simplifies. - SimpleV = SimplifyInstruction(I, {DL, TLI, DT, AC}); + SimpleV = simplifyInstruction(I, {DL, TLI, DT, AC}); if (!SimpleV) { if (UnsimplifiedUsers) UnsimplifiedUsers->insert(I); @@ -6478,6 +6590,6 @@ const SimplifyQuery getBestSimplifyQuery(AnalysisManager &AM, } template const SimplifyQuery getBestSimplifyQuery(AnalysisManager &, Function &); -} +} // namespace llvm void InstSimplifyFolder::anchor() {} diff --git a/llvm/lib/Analysis/Interval.cpp b/llvm/lib/Analysis/Interval.cpp index e228ec4f2126..f7fffcb3d5e6 100644 --- a/llvm/lib/Analysis/Interval.cpp +++ b/llvm/lib/Analysis/Interval.cpp @@ -13,7 +13,6 @@ #include "llvm/Analysis/Interval.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/Analysis/LazyCallGraph.cpp b/llvm/lib/Analysis/LazyCallGraph.cpp index e8e9593d7030..20a905e04a9d 100644 --- a/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/llvm/lib/Analysis/LazyCallGraph.cpp @@ -9,14 +9,13 @@ #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/VectorUtils.h" #include "llvm/Config/llvm-config.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstIterator.h" @@ -30,12 +29,15 @@ #include "llvm/Support/raw_ostream.h" #include #include -#include #include #include #include #include +#ifdef EXPENSIVE_CHECKS +#include "llvm/ADT/ScopeExit.h" +#endif + using namespace llvm; #define DEBUG_TYPE "lcg" diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index e311b40ab25c..8a8e9e923b7c 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -38,7 +38,6 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; using namespace PatternMatch; @@ -919,7 +918,7 @@ Optional LazyValueInfoImpl::solveBlockValueCast( // transfer rule on the full set since we may be able to locally infer // interesting facts. Optional LHSRes = getRangeFor(CI->getOperand(0), CI, BB); - if (!LHSRes.hasValue()) + if (!LHSRes) // More work to do before applying this transfer rule. return None; const ConstantRange &LHSRange = LHSRes.getValue(); @@ -943,7 +942,7 @@ Optional LazyValueInfoImpl::solveBlockValueBinaryOpImpl( // @foo()), 32" Optional LHSRes = getRangeFor(I->getOperand(0), I, BB); Optional RHSRes = getRangeFor(I->getOperand(1), I, BB); - if (!LHSRes.hasValue() || !RHSRes.hasValue()) + if (!LHSRes || !RHSRes) // More work to do before applying this transfer rule. return None; @@ -956,13 +955,6 @@ Optional LazyValueInfoImpl::solveBlockValueBinaryOp( BinaryOperator *BO, BasicBlock *BB) { assert(BO->getOperand(0)->getType()->isSized() && "all operands to binary operators are sized"); - if (BO->getOpcode() == Instruction::Xor) { - // Xor is the only operation not supported by ConstantRange::binaryOp(). - LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() - << "' - overdefined (unknown binary operator).\n"); - return ValueLatticeElement::getOverdefined(); - } - if (auto *OBO = dyn_cast(BO)) { unsigned NoWrapKind = 0; if (OBO->hasNoUnsignedWrap()) @@ -1020,7 +1012,7 @@ Optional LazyValueInfoImpl::solveBlockValueExtractValue( // Handle extractvalue of insertvalue to allow further simplification // based on replaced with.overflow intrinsics. - if (Value *V = SimplifyExtractValueInst( + if (Value *V = simplifyExtractValueInst( EVI->getAggregateOperand(), EVI->getIndices(), EVI->getModule()->getDataLayout())) return getBlockValue(V, BB, EVI); @@ -1141,7 +1133,7 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, ConstantRange CR = ConstantRange::makeExactICmpRegion(EdgePred, *C); if (!CR.isEmptySet()) return ValueLatticeElement::getRange(ConstantRange::getNonEmpty( - CR.getUnsignedMin().zextOrSelf(BitWidth), APInt(BitWidth, 0))); + CR.getUnsignedMin().zext(BitWidth), APInt(BitWidth, 0))); } return ValueLatticeElement::getOverdefined(); @@ -1278,7 +1270,7 @@ static ValueLatticeElement constantFoldUser(User *Usr, Value *Op, if (auto *CI = dyn_cast(Usr)) { assert(CI->getOperand(0) == Op && "Operand 0 isn't Op"); if (auto *C = dyn_cast_or_null( - SimplifyCastInst(CI->getOpcode(), OpConst, + simplifyCastInst(CI->getOpcode(), OpConst, CI->getDestTy(), DL))) { return ValueLatticeElement::getRange(ConstantRange(C->getValue())); } @@ -1290,7 +1282,7 @@ static ValueLatticeElement constantFoldUser(User *Usr, Value *Op, Value *LHS = Op0Match ? OpConst : BO->getOperand(0); Value *RHS = Op1Match ? OpConst : BO->getOperand(1); if (auto *C = dyn_cast_or_null( - SimplifyBinOp(BO->getOpcode(), LHS, RHS, DL))) { + simplifyBinOp(BO->getOpcode(), LHS, RHS, DL))) { return ValueLatticeElement::getRange(ConstantRange(C->getValue())); } } else if (isa(Usr)) { @@ -1361,7 +1353,7 @@ static Optional getEdgeValueLocal(Value *Val, ValueLatticeElement OpLatticeVal = getValueFromCondition(Op, Condition, isTrueDest); if (Optional OpConst = OpLatticeVal.asConstantInteger()) { - Result = constantFoldUser(Usr, Op, OpConst.getValue(), DL); + Result = constantFoldUser(Usr, Op, *OpConst, DL); break; } } @@ -1432,8 +1424,9 @@ Optional LazyValueInfoImpl::getEdgeValue( if (Constant *VC = dyn_cast(Val)) return ValueLatticeElement::get(VC); - ValueLatticeElement LocalResult = getEdgeValueLocal(Val, BBFrom, BBTo) - .getValueOr(ValueLatticeElement::getOverdefined()); + ValueLatticeElement LocalResult = + getEdgeValueLocal(Val, BBFrom, BBTo) + .value_or(ValueLatticeElement::getOverdefined()); if (hasSingleValue(LocalResult)) // Can't get any more precise here return LocalResult; @@ -1886,6 +1879,11 @@ void LazyValueInfo::eraseBlock(BasicBlock *BB) { } } +void LazyValueInfo::clear(const Module *M) { + if (PImpl) { + getImpl(PImpl, AC, M).clear(); + } +} void LazyValueInfo::printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) { if (PImpl) { diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp index 031bf3bae51d..491d44335f22 100644 --- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -68,6 +68,7 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index f9a7a5bdf434..9cfb91a22b7d 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -44,7 +44,6 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -69,9 +68,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -169,8 +166,8 @@ public: }; } // end anonymous namespace -// Assert - We know that cond should be true, if not print an error message. -#define Assert(C, ...) \ +// Check - We know that cond should be true, if not print an error message. +#define Check(C, ...) \ do { \ if (!(C)) { \ CheckFailed(__VA_ARGS__); \ @@ -181,8 +178,8 @@ public: void Lint::visitFunction(Function &F) { // This isn't undefined behavior, it's just a little unusual, and it's a // fairly common mistake to neglect to name a function. - Assert(F.hasName() || F.hasLocalLinkage(), - "Unusual: Unnamed function with non-local linkage", &F); + Check(F.hasName() || F.hasLocalLinkage(), + "Unusual: Unnamed function with non-local linkage", &F); // TODO: Check for irreducible control flow. } @@ -195,23 +192,23 @@ void Lint::visitCallBase(CallBase &I) { if (Function *F = dyn_cast(findValue(Callee, /*OffsetOk=*/false))) { - Assert(I.getCallingConv() == F->getCallingConv(), - "Undefined behavior: Caller and callee calling convention differ", - &I); + Check(I.getCallingConv() == F->getCallingConv(), + "Undefined behavior: Caller and callee calling convention differ", + &I); FunctionType *FT = F->getFunctionType(); unsigned NumActualArgs = I.arg_size(); - Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs - : FT->getNumParams() == NumActualArgs, - "Undefined behavior: Call argument count mismatches callee " - "argument count", - &I); + Check(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs + : FT->getNumParams() == NumActualArgs, + "Undefined behavior: Call argument count mismatches callee " + "argument count", + &I); - Assert(FT->getReturnType() == I.getType(), - "Undefined behavior: Call return type mismatches " - "callee return type", - &I); + Check(FT->getReturnType() == I.getType(), + "Undefined behavior: Call return type mismatches " + "callee return type", + &I); // Check argument types (in case the callee was casted) and attributes. // TODO: Verify that caller and callee attributes are compatible. @@ -221,10 +218,10 @@ void Lint::visitCallBase(CallBase &I) { Value *Actual = *AI; if (PI != PE) { Argument *Formal = &*PI++; - Assert(Formal->getType() == Actual->getType(), - "Undefined behavior: Call argument type mismatches " - "callee parameter type", - &I); + Check(Formal->getType() == Actual->getType(), + "Undefined behavior: Call argument type mismatches " + "callee parameter type", + &I); // Check that noalias arguments don't alias other arguments. This is // not fully precise because we don't know the sizes of the dereferenced @@ -242,9 +239,9 @@ void Lint::visitCallBase(CallBase &I) { continue; if (AI != BI && (*BI)->getType()->isPointerTy()) { AliasResult Result = AA->alias(*AI, *BI); - Assert(Result != AliasResult::MustAlias && - Result != AliasResult::PartialAlias, - "Unusual: noalias argument aliases another argument", &I); + Check(Result != AliasResult::MustAlias && + Result != AliasResult::PartialAlias, + "Unusual: noalias argument aliases another argument", &I); } } } @@ -271,10 +268,10 @@ void Lint::visitCallBase(CallBase &I) { if (PAL.hasParamAttr(ArgNo++, Attribute::ByVal)) continue; Value *Obj = findValue(Arg, /*OffsetOk=*/true); - Assert(!isa(Obj), - "Undefined behavior: Call with \"tail\" keyword references " - "alloca", - &I); + Check(!isa(Obj), + "Undefined behavior: Call with \"tail\" keyword references " + "alloca", + &I); } } } @@ -302,9 +299,9 @@ void Lint::visitCallBase(CallBase &I) { /*OffsetOk=*/false))) if (Len->getValue().isIntN(32)) Size = LocationSize::precise(Len->getValue().getZExtValue()); - Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != - AliasResult::MustAlias, - "Undefined behavior: memcpy source and destination overlap", &I); + Check(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != + AliasResult::MustAlias, + "Undefined behavior: memcpy source and destination overlap", &I); break; } case Intrinsic::memcpy_inline: { @@ -319,9 +316,9 @@ void Lint::visitCallBase(CallBase &I) { // isn't expressive enough for what we really want to do. Known partial // overlap is not distinguished from the case where nothing is known. const LocationSize LS = LocationSize::precise(Size); - Assert(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) != - AliasResult::MustAlias, - "Undefined behavior: memcpy source and destination overlap", &I); + Check(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) != + AliasResult::MustAlias, + "Undefined behavior: memcpy source and destination overlap", &I); break; } case Intrinsic::memmove: { @@ -338,11 +335,17 @@ void Lint::visitCallBase(CallBase &I) { MSI->getDestAlign(), nullptr, MemRef::Write); break; } + case Intrinsic::memset_inline: { + MemSetInlineInst *MSII = cast(&I); + visitMemoryReference(I, MemoryLocation::getForDest(MSII), + MSII->getDestAlign(), nullptr, MemRef::Write); + break; + } case Intrinsic::vastart: - Assert(I.getParent()->getParent()->isVarArg(), - "Undefined behavior: va_start called in a non-varargs function", - &I); + Check(I.getParent()->getParent()->isVarArg(), + "Undefined behavior: va_start called in a non-varargs function", + &I); visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None, nullptr, MemRef::Read | MemRef::Write); @@ -367,20 +370,22 @@ void Lint::visitCallBase(CallBase &I) { break; case Intrinsic::get_active_lane_mask: if (auto *TripCount = dyn_cast(I.getArgOperand(1))) - Assert(!TripCount->isZero(), "get_active_lane_mask: operand #2 " - "must be greater than 0", &I); + Check(!TripCount->isZero(), + "get_active_lane_mask: operand #2 " + "must be greater than 0", + &I); break; } } void Lint::visitReturnInst(ReturnInst &I) { Function *F = I.getParent()->getParent(); - Assert(!F->doesNotReturn(), - "Unusual: Return statement in function with noreturn attribute", &I); + Check(!F->doesNotReturn(), + "Unusual: Return statement in function with noreturn attribute", &I); if (Value *V = I.getReturnValue()) { Value *Obj = findValue(V, /*OffsetOk=*/true); - Assert(!isa(Obj), "Unusual: Returning alloca value", &I); + Check(!isa(Obj), "Unusual: Returning alloca value", &I); } } @@ -395,39 +400,39 @@ void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc, Value *Ptr = const_cast(Loc.Ptr); Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); - Assert(!isa(UnderlyingObject), - "Undefined behavior: Null pointer dereference", &I); - Assert(!isa(UnderlyingObject), - "Undefined behavior: Undef pointer dereference", &I); - Assert(!isa(UnderlyingObject) || - !cast(UnderlyingObject)->isMinusOne(), - "Unusual: All-ones pointer dereference", &I); - Assert(!isa(UnderlyingObject) || - !cast(UnderlyingObject)->isOne(), - "Unusual: Address one pointer dereference", &I); + Check(!isa(UnderlyingObject), + "Undefined behavior: Null pointer dereference", &I); + Check(!isa(UnderlyingObject), + "Undefined behavior: Undef pointer dereference", &I); + Check(!isa(UnderlyingObject) || + !cast(UnderlyingObject)->isMinusOne(), + "Unusual: All-ones pointer dereference", &I); + Check(!isa(UnderlyingObject) || + !cast(UnderlyingObject)->isOne(), + "Unusual: Address one pointer dereference", &I); if (Flags & MemRef::Write) { if (const GlobalVariable *GV = dyn_cast(UnderlyingObject)) - Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory", - &I); - Assert(!isa(UnderlyingObject) && - !isa(UnderlyingObject), - "Undefined behavior: Write to text section", &I); + Check(!GV->isConstant(), "Undefined behavior: Write to read-only memory", + &I); + Check(!isa(UnderlyingObject) && + !isa(UnderlyingObject), + "Undefined behavior: Write to text section", &I); } if (Flags & MemRef::Read) { - Assert(!isa(UnderlyingObject), "Unusual: Load from function body", - &I); - Assert(!isa(UnderlyingObject), - "Undefined behavior: Load from block address", &I); + Check(!isa(UnderlyingObject), "Unusual: Load from function body", + &I); + Check(!isa(UnderlyingObject), + "Undefined behavior: Load from block address", &I); } if (Flags & MemRef::Callee) { - Assert(!isa(UnderlyingObject), - "Undefined behavior: Call to block address", &I); + Check(!isa(UnderlyingObject), + "Undefined behavior: Call to block address", &I); } if (Flags & MemRef::Branchee) { - Assert(!isa(UnderlyingObject) || - isa(UnderlyingObject), - "Undefined behavior: Branch to non-blockaddress", &I); + Check(!isa(UnderlyingObject) || + isa(UnderlyingObject), + "Undefined behavior: Branch to non-blockaddress", &I); } // Check for buffer overflows and misalignment. @@ -461,17 +466,17 @@ void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc, // Accesses from before the start or after the end of the object are not // defined. - Assert(!Loc.Size.hasValue() || BaseSize == MemoryLocation::UnknownSize || - (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize), - "Undefined behavior: Buffer overflow", &I); + Check(!Loc.Size.hasValue() || BaseSize == MemoryLocation::UnknownSize || + (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize), + "Undefined behavior: Buffer overflow", &I); // Accesses that say that the memory is more aligned than it is are not // defined. if (!Align && Ty && Ty->isSized()) Align = DL->getABITypeAlign(Ty); if (BaseAlign && Align) - Assert(*Align <= commonAlignment(*BaseAlign, Offset), - "Undefined behavior: Memory reference address is misaligned", &I); + Check(*Align <= commonAlignment(*BaseAlign, Offset), + "Undefined behavior: Memory reference address is misaligned", &I); } } @@ -486,34 +491,34 @@ void Lint::visitStoreInst(StoreInst &I) { } void Lint::visitXor(BinaryOperator &I) { - Assert(!isa(I.getOperand(0)) || !isa(I.getOperand(1)), - "Undefined result: xor(undef, undef)", &I); + Check(!isa(I.getOperand(0)) || !isa(I.getOperand(1)), + "Undefined result: xor(undef, undef)", &I); } void Lint::visitSub(BinaryOperator &I) { - Assert(!isa(I.getOperand(0)) || !isa(I.getOperand(1)), - "Undefined result: sub(undef, undef)", &I); + Check(!isa(I.getOperand(0)) || !isa(I.getOperand(1)), + "Undefined result: sub(undef, undef)", &I); } void Lint::visitLShr(BinaryOperator &I) { if (ConstantInt *CI = dyn_cast(findValue(I.getOperand(1), /*OffsetOk=*/false))) - Assert(CI->getValue().ult(cast(I.getType())->getBitWidth()), - "Undefined result: Shift count out of range", &I); + Check(CI->getValue().ult(cast(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); } void Lint::visitAShr(BinaryOperator &I) { if (ConstantInt *CI = dyn_cast(findValue(I.getOperand(1), /*OffsetOk=*/false))) - Assert(CI->getValue().ult(cast(I.getType())->getBitWidth()), - "Undefined result: Shift count out of range", &I); + Check(CI->getValue().ult(cast(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); } void Lint::visitShl(BinaryOperator &I) { if (ConstantInt *CI = dyn_cast(findValue(I.getOperand(1), /*OffsetOk=*/false))) - Assert(CI->getValue().ult(cast(I.getType())->getBitWidth()), - "Undefined result: Shift count out of range", &I); + Check(CI->getValue().ult(cast(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); } static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, @@ -554,30 +559,30 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, } void Lint::visitSDiv(BinaryOperator &I) { - Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), - "Undefined behavior: Division by zero", &I); + Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), + "Undefined behavior: Division by zero", &I); } void Lint::visitUDiv(BinaryOperator &I) { - Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), - "Undefined behavior: Division by zero", &I); + Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), + "Undefined behavior: Division by zero", &I); } void Lint::visitSRem(BinaryOperator &I) { - Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), - "Undefined behavior: Division by zero", &I); + Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), + "Undefined behavior: Division by zero", &I); } void Lint::visitURem(BinaryOperator &I) { - Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), - "Undefined behavior: Division by zero", &I); + Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), + "Undefined behavior: Division by zero", &I); } void Lint::visitAllocaInst(AllocaInst &I) { if (isa(I.getArraySize())) // This isn't undefined behavior, it's just an obvious pessimization. - Assert(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), - "Pessimization: Static alloca outside of entry block", &I); + Check(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), + "Pessimization: Static alloca outside of entry block", &I); // TODO: Check for an unusual size (MSB set?) } @@ -591,14 +596,14 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) { visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()), None, nullptr, MemRef::Branchee); - Assert(I.getNumDestinations() != 0, - "Undefined behavior: indirectbr with no destinations", &I); + Check(I.getNumDestinations() != 0, + "Undefined behavior: indirectbr with no destinations", &I); } void Lint::visitExtractElementInst(ExtractElementInst &I) { if (ConstantInt *CI = dyn_cast(findValue(I.getIndexOperand(), /*OffsetOk=*/false))) - Assert( + Check( CI->getValue().ult( cast(I.getVectorOperandType())->getNumElements()), "Undefined result: extractelement index out of range", &I); @@ -607,18 +612,18 @@ void Lint::visitExtractElementInst(ExtractElementInst &I) { void Lint::visitInsertElementInst(InsertElementInst &I) { if (ConstantInt *CI = dyn_cast(findValue(I.getOperand(2), /*OffsetOk=*/false))) - Assert(CI->getValue().ult( - cast(I.getType())->getNumElements()), - "Undefined result: insertelement index out of range", &I); + Check(CI->getValue().ult( + cast(I.getType())->getNumElements()), + "Undefined result: insertelement index out of range", &I); } void Lint::visitUnreachableInst(UnreachableInst &I) { // This isn't undefined behavior, it's merely suspicious. - Assert(&I == &I.getParent()->front() || - std::prev(I.getIterator())->mayHaveSideEffects(), - "Unusual: unreachable immediately preceded by instruction without " - "side effects", - &I); + Check(&I == &I.getParent()->front() || + std::prev(I.getIterator())->mayHaveSideEffects(), + "Unusual: unreachable immediately preceded by instruction without " + "side effects", + &I); } /// findValue - Look through bitcasts and simple memory reference patterns @@ -681,17 +686,12 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, CE->getOperand(0)->getType(), CE->getType(), *DL)) return findValueImpl(CE->getOperand(0), OffsetOk, Visited); - } else if (CE->getOpcode() == Instruction::ExtractValue) { - ArrayRef Indices = CE->getIndices(); - if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) - if (W != V) - return findValueImpl(W, OffsetOk, Visited); } } // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast(V)) { - if (Value *W = SimplifyInstruction(Inst, {*DL, TLI, DT, AC})) + if (Value *W = simplifyInstruction(Inst, {*DL, TLI, DT, AC})) return findValueImpl(W, OffsetOk, Visited); } else if (auto *C = dyn_cast(V)) { Value *W = ConstantFoldConstant(C, *DL, TLI); diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index cd0d4d6b9ca8..bc1d82cf1480 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -13,19 +13,14 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumeBundleQueries.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" @@ -509,8 +504,8 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr, if (CastInst::isBitOrNoopPointerCastable(Val->getType(), AccessTy, DL)) return Val; - TypeSize StoreSize = DL.getTypeStoreSize(Val->getType()); - TypeSize LoadSize = DL.getTypeStoreSize(AccessTy); + TypeSize StoreSize = DL.getTypeSizeInBits(Val->getType()); + TypeSize LoadSize = DL.getTypeSizeInBits(AccessTy); if (TypeSize::isKnownLE(LoadSize, StoreSize)) if (auto *C = dyn_cast(Val)) return ConstantFoldLoadFromConst(C, AccessTy, DL); diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 2ab78d2b7ee2..79161db9b5e4 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -47,6 +47,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" @@ -60,12 +61,12 @@ #include #include #include -#include #include #include #include using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-accesses" @@ -172,7 +173,8 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup( : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start), AddressSpace(RtCheck.Pointers[Index] .PointerValue->getType() - ->getPointerAddressSpace()) { + ->getPointerAddressSpace()), + NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) { Members.push_back(Index); } @@ -189,21 +191,20 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup( /// /// There is no conflict when the intervals are disjoint: /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) -void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, +void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, + Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, - const ValueToValueMap &Strides, - PredicatedScalarEvolution &PSE) { - // Get the stride replaced scev. - const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); + PredicatedScalarEvolution &PSE, + bool NeedsFreeze) { ScalarEvolution *SE = PSE.getSE(); const SCEV *ScStart; const SCEV *ScEnd; - if (SE->isLoopInvariant(Sc, Lp)) { - ScStart = ScEnd = Sc; + if (SE->isLoopInvariant(PtrExpr, Lp)) { + ScStart = ScEnd = PtrExpr; } else { - const SCEVAddRecExpr *AR = dyn_cast(Sc); + const SCEVAddRecExpr *AR = dyn_cast(PtrExpr); assert(AR && "Invalid addrec expression"); const SCEV *Ex = PSE.getBackedgeTakenCount(); @@ -227,15 +228,100 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, // Add the size of the pointed element to ScEnd. auto &DL = Lp->getHeader()->getModule()->getDataLayout(); Type *IdxTy = DL.getIndexType(Ptr->getType()); - const SCEV *EltSizeSCEV = - SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType()); + const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy); ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); - Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); + Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr, + NeedsFreeze); } -SmallVector -RuntimePointerChecking::generateChecks() const { +void RuntimePointerChecking::tryToCreateDiffCheck( + const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) { + if (!CanUseDiffCheck) + return; + + // If either group contains multiple different pointers, bail out. + // TODO: Support multiple pointers by using the minimum or maximum pointer, + // depending on src & sink. + if (CGI.Members.size() != 1 || CGJ.Members.size() != 1) { + CanUseDiffCheck = false; + return; + } + + PointerInfo *Src = &Pointers[CGI.Members[0]]; + PointerInfo *Sink = &Pointers[CGJ.Members[0]]; + + // If either pointer is read and written, multiple checks may be needed. Bail + // out. + if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() || + !DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty()) { + CanUseDiffCheck = false; + return; + } + + ArrayRef AccSrc = + DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr); + ArrayRef AccSink = + DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr); + // If either pointer is accessed multiple times, there may not be a clear + // src/sink relation. Bail out for now. + if (AccSrc.size() != 1 || AccSink.size() != 1) { + CanUseDiffCheck = false; + return; + } + // If the sink is accessed before src, swap src/sink. + if (AccSink[0] < AccSrc[0]) + std::swap(Src, Sink); + + auto *SrcAR = dyn_cast(Src->Expr); + auto *SinkAR = dyn_cast(Sink->Expr); + if (!SrcAR || !SinkAR) { + CanUseDiffCheck = false; + return; + } + + const DataLayout &DL = + SinkAR->getLoop()->getHeader()->getModule()->getDataLayout(); + SmallVector SrcInsts = + DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr); + SmallVector SinkInsts = + DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr); + Type *SrcTy = getLoadStoreType(SrcInsts[0]); + Type *DstTy = getLoadStoreType(SinkInsts[0]); + if (isa(SrcTy) || isa(DstTy)) + return; + unsigned AllocSize = + std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy)); + IntegerType *IntTy = + IntegerType::get(Src->PointerValue->getContext(), + DL.getPointerSizeInBits(CGI.AddressSpace)); + + // Only matching constant steps matching the AllocSize are supported at the + // moment. This simplifies the difference computation. Can be extended in the + // future. + auto *Step = dyn_cast(SinkAR->getStepRecurrence(*SE)); + if (!Step || Step != SrcAR->getStepRecurrence(*SE) || + Step->getAPInt().abs() != AllocSize) { + CanUseDiffCheck = false; + return; + } + + // When counting down, the dependence distance needs to be swapped. + if (Step->getValue()->isNegative()) + std::swap(SinkAR, SrcAR); + + const SCEV *SinkStartInt = SE->getPtrToIntExpr(SinkAR->getStart(), IntTy); + const SCEV *SrcStartInt = SE->getPtrToIntExpr(SrcAR->getStart(), IntTy); + if (isa(SinkStartInt) || + isa(SrcStartInt)) { + CanUseDiffCheck = false; + return; + } + DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize, + Src->NeedsFreeze || Sink->NeedsFreeze); +} + +SmallVector RuntimePointerChecking::generateChecks() { SmallVector Checks; for (unsigned I = 0; I < CheckingGroups.size(); ++I) { @@ -243,8 +329,10 @@ RuntimePointerChecking::generateChecks() const { const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I]; const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J]; - if (needsChecking(CGI, CGJ)) + if (needsChecking(CGI, CGJ)) { + tryToCreateDiffCheck(CGI, CGJ); Checks.push_back(std::make_pair(&CGI, &CGJ)); + } } } return Checks; @@ -285,11 +373,12 @@ bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, return addPointer( Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End, RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(), - *RtCheck.SE); + RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE); } bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start, const SCEV *End, unsigned AS, + bool NeedsFreeze, ScalarEvolution &SE) { assert(AddressSpace == AS && "all pointers in a checking group must be in the same address space"); @@ -314,6 +403,7 @@ bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start, High = End; Members.push_back(Index); + this->NeedsFreeze |= NeedsFreeze; return true; } @@ -371,9 +461,11 @@ void RuntimePointerChecking::groupChecks( unsigned TotalComparisons = 0; - DenseMap PositionMap; - for (unsigned Index = 0; Index < Pointers.size(); ++Index) - PositionMap[Pointers[Index].PointerValue] = Index; + DenseMap> PositionMap; + for (unsigned Index = 0; Index < Pointers.size(); ++Index) { + auto Iter = PositionMap.insert({Pointers[Index].PointerValue, {}}); + Iter.first->second.push_back(Index); + } // We need to keep track of what pointers we've already seen so we // don't process them twice. @@ -404,34 +496,35 @@ void RuntimePointerChecking::groupChecks( auto PointerI = PositionMap.find(MI->getPointer()); assert(PointerI != PositionMap.end() && "pointer in equivalence class not found in PositionMap"); - unsigned Pointer = PointerI->second; - bool Merged = false; - // Mark this pointer as seen. - Seen.insert(Pointer); - - // Go through all the existing sets and see if we can find one - // which can include this pointer. - for (RuntimeCheckingPtrGroup &Group : Groups) { - // Don't perform more than a certain amount of comparisons. - // This should limit the cost of grouping the pointers to something - // reasonable. If we do end up hitting this threshold, the algorithm - // will create separate groups for all remaining pointers. - if (TotalComparisons > MemoryCheckMergeThreshold) - break; - - TotalComparisons++; - - if (Group.addPointer(Pointer, *this)) { - Merged = true; - break; + for (unsigned Pointer : PointerI->second) { + bool Merged = false; + // Mark this pointer as seen. + Seen.insert(Pointer); + + // Go through all the existing sets and see if we can find one + // which can include this pointer. + for (RuntimeCheckingPtrGroup &Group : Groups) { + // Don't perform more than a certain amount of comparisons. + // This should limit the cost of grouping the pointers to something + // reasonable. If we do end up hitting this threshold, the algorithm + // will create separate groups for all remaining pointers. + if (TotalComparisons > MemoryCheckMergeThreshold) + break; + + TotalComparisons++; + + if (Group.addPointer(Pointer, *this)) { + Merged = true; + break; + } } - } - if (!Merged) - // We couldn't add this pointer to any existing set or the threshold - // for the number of comparisons has been reached. Create a new group - // to hold the current pointer. - Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this)); + if (!Merged) + // We couldn't add this pointer to any existing set or the threshold + // for the number of comparisons has been reached. Create a new group + // to hold the current pointer. + Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this)); + } } // We've computed the grouped checks for this partition. @@ -522,19 +615,19 @@ public: : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {} /// Register a load and whether it is only read from. - void addLoad(MemoryLocation &Loc, bool IsReadOnly) { + void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) { Value *Ptr = const_cast(Loc.Ptr); AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); - Accesses.insert(MemAccessInfo(Ptr, false)); + Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy); if (IsReadOnly) ReadOnlyPtr.insert(Ptr); } /// Register a store. - void addStore(MemoryLocation &Loc) { + void addStore(MemoryLocation &Loc, Type *AccessTy) { Value *Ptr = const_cast(Loc.Ptr); AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); - Accesses.insert(MemAccessInfo(Ptr, true)); + Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy); } /// Check if we can emit a run-time no-alias check for \p Access. @@ -545,12 +638,11 @@ public: /// we will attempt to use additional run-time checks in order to get /// the bounds of the pointer. bool createCheckForAccess(RuntimePointerChecking &RtCheck, - MemAccessInfo Access, + MemAccessInfo Access, Type *AccessTy, const ValueToValueMap &Strides, DenseMap &DepSetId, Loop *TheLoop, unsigned &RunningDepId, - unsigned ASId, bool ShouldCheckStride, - bool Assume); + unsigned ASId, bool ShouldCheckStride, bool Assume); /// Check whether we can check the pointers at runtime for /// non-intersection. @@ -559,7 +651,7 @@ public: /// (i.e. the pointers have computable bounds). bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &Strides, - bool ShouldCheckWrap = false); + Value *&UncomputablePtr, bool ShouldCheckWrap = false); /// Goes over all memory accesses, checks whether a RT check is needed /// and builds sets of dependent accesses. @@ -583,14 +675,15 @@ public: MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; } private: - typedef SetVector PtrAccessSet; + typedef MapVector> PtrAccessMap; /// Go over all memory access and check whether runtime pointer checks /// are needed and build sets of dependency check candidates. void processMemAccesses(); - /// Set of all accesses. - PtrAccessSet Accesses; + /// Map of all accesses. Values are the types used to access memory pointed to + /// by the pointer. + PtrAccessMap Accesses; /// The loop being checked. const Loop *TheLoop; @@ -630,11 +723,8 @@ private: /// Check whether a pointer can participate in a runtime bounds check. /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr /// by adding run-time checks (overflow checks) if necessary. -static bool hasComputableBounds(PredicatedScalarEvolution &PSE, - const ValueToValueMap &Strides, Value *Ptr, - Loop *L, bool Assume) { - const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); - +static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr, + const SCEV *PtrScev, Loop *L, bool Assume) { // The bounds for loop-invariant pointer is trivial. if (PSE.getSE()->isLoopInvariant(PtrScev, L)) return true; @@ -652,12 +742,12 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE, /// Check whether a pointer address cannot wrap. static bool isNoWrap(PredicatedScalarEvolution &PSE, - const ValueToValueMap &Strides, Value *Ptr, Loop *L) { + const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy, + Loop *L) { const SCEV *PtrScev = PSE.getSCEV(Ptr); if (PSE.getSE()->isLoopInvariant(PtrScev, L)) return true; - Type *AccessTy = Ptr->getType()->getPointerElementType(); int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides); if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW)) return true; @@ -689,7 +779,7 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, } bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, - MemAccessInfo Access, + MemAccessInfo Access, Type *AccessTy, const ValueToValueMap &StridesMap, DenseMap &DepSetId, Loop *TheLoop, unsigned &RunningDepId, @@ -697,42 +787,75 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, bool Assume) { Value *Ptr = Access.getPointer(); - if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume)) - return false; + ScalarEvolution &SE = *PSE.getSE(); + SmallVector> TranslatedPtrs; + auto *SI = dyn_cast(Ptr); + // Look through selects in the current loop. + if (SI && !TheLoop->isLoopInvariant(SI)) { + TranslatedPtrs = { + std::make_pair(SE.getSCEV(SI->getOperand(1)), + !isGuaranteedNotToBeUndefOrPoison(SI->getOperand(1))), + std::make_pair(SE.getSCEV(SI->getOperand(2)), + !isGuaranteedNotToBeUndefOrPoison(SI->getOperand(2)))}; + } else + TranslatedPtrs = { + std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)}; - // When we run after a failing dependency check we have to make sure - // we don't have wrapping pointers. - if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) { - auto *Expr = PSE.getSCEV(Ptr); - if (!Assume || !isa(Expr)) + for (auto &P : TranslatedPtrs) { + const SCEV *PtrExpr = P.first; + if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume)) return false; - PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); + + // When we run after a failing dependency check we have to make sure + // we don't have wrapping pointers. + if (ShouldCheckWrap) { + // Skip wrap checking when translating pointers. + if (TranslatedPtrs.size() > 1) + return false; + + if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) { + auto *Expr = PSE.getSCEV(Ptr); + if (!Assume || !isa(Expr)) + return false; + PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); + } + } + // If there's only one option for Ptr, look it up after bounds and wrap + // checking, because assumptions might have been added to PSE. + if (TranslatedPtrs.size() == 1) + TranslatedPtrs[0] = std::make_pair( + replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false); } - // The id of the dependence set. - unsigned DepId; + for (auto &P : TranslatedPtrs) { + const SCEV *PtrExpr = P.first; - if (isDependencyCheckNeeded()) { - Value *Leader = DepCands.getLeaderValue(Access).getPointer(); - unsigned &LeaderId = DepSetId[Leader]; - if (!LeaderId) - LeaderId = RunningDepId++; - DepId = LeaderId; - } else - // Each access has its own dependence set. - DepId = RunningDepId++; + // The id of the dependence set. + unsigned DepId; - bool IsWrite = Access.getInt(); - RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); - LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); + if (isDependencyCheckNeeded()) { + Value *Leader = DepCands.getLeaderValue(Access).getPointer(); + unsigned &LeaderId = DepSetId[Leader]; + if (!LeaderId) + LeaderId = RunningDepId++; + DepId = LeaderId; + } else + // Each access has its own dependence set. + DepId = RunningDepId++; + + bool IsWrite = Access.getInt(); + RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE, + P.second); + LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); + } return true; - } +} bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, - bool ShouldCheckWrap) { + Value *&UncomputablePtr, bool ShouldCheckWrap) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; @@ -788,12 +911,15 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, } for (auto &Access : AccessInfos) { - if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop, - RunningDepId, ASId, ShouldCheckWrap, false)) { - LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" - << *Access.getPointer() << '\n'); - Retries.push_back(Access); - CanDoAliasSetRT = false; + for (auto &AccessTy : Accesses[Access]) { + if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, + DepSetId, TheLoop, RunningDepId, ASId, + ShouldCheckWrap, false)) { + LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" + << *Access.getPointer() << '\n'); + Retries.push_back(Access); + CanDoAliasSetRT = false; + } } } @@ -815,13 +941,17 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, // We know that we need these checks, so we can now be more aggressive // and add further checks if required (overflow checks). CanDoAliasSetRT = true; - for (auto Access : Retries) - if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, - TheLoop, RunningDepId, ASId, - ShouldCheckWrap, /*Assume=*/true)) { - CanDoAliasSetRT = false; - break; + for (auto Access : Retries) { + for (auto &AccessTy : Accesses[Access]) { + if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, + DepSetId, TheLoop, RunningDepId, ASId, + ShouldCheckWrap, /*Assume=*/true)) { + CanDoAliasSetRT = false; + UncomputablePtr = Access.getPointer(); + break; + } } + } } CanDoRT &= CanDoAliasSetRT; @@ -886,9 +1016,12 @@ void AccessAnalysis::processMemAccesses() { LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n"); LLVM_DEBUG({ for (auto A : Accesses) - dbgs() << "\t" << *A.getPointer() << " (" << - (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? - "read-only" : "read")) << ")\n"; + dbgs() << "\t" << *A.first.getPointer() << " (" + << (A.first.getInt() + ? "write" + : (ReadOnlyPtr.count(A.first.getPointer()) ? "read-only" + : "read")) + << ")\n"; }); // The AliasSetTracker has nicely partitioned our pointers by metadata @@ -907,13 +1040,13 @@ void AccessAnalysis::processMemAccesses() { UnderlyingObjToAccessMap ObjToLastAccess; // Set of access to check after all writes have been processed. - PtrAccessSet DeferredAccesses; + PtrAccessMap DeferredAccesses; // Iterate over each alias set twice, once to process read/write pointers, // and then to process read-only pointers. for (int SetIteration = 0; SetIteration < 2; ++SetIteration) { bool UseDeferred = SetIteration > 0; - PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; + PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses; for (const auto &AV : AS) { Value *Ptr = AV.getValue(); @@ -921,10 +1054,10 @@ void AccessAnalysis::processMemAccesses() { // For a single memory access in AliasSetTracker, Accesses may contain // both read and write, and they both need to be handled for CheckDeps. for (const auto &AC : S) { - if (AC.getPointer() != Ptr) + if (AC.first.getPointer() != Ptr) continue; - bool IsWrite = AC.getInt(); + bool IsWrite = AC.first.getInt(); // If we're using the deferred access set, then it contains only // reads. @@ -946,7 +1079,9 @@ void AccessAnalysis::processMemAccesses() { // consecutive as "read-only" pointers (so that we check // "a[b[i]] +="). Hence, we need the second check for "!IsWrite". if (!UseDeferred && IsReadOnlyPtr) { - DeferredAccesses.insert(Access); + // We only use the pointer keys, the types vector values don't + // matter. + DeferredAccesses.insert({Access, {}}); continue; } @@ -1445,13 +1580,13 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV *CastedDist = &Dist; const SCEV *CastedProduct = Product; - uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType()); - uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType()); + uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType()); + uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType()); // The dependence distance can be positive/negative, so we sign extend Dist; // The multiplication of the absolute stride in bytes and the // backedgeTakenCount is non-negative, so we zero extend Product. - if (DistTypeSize > ProductTypeSize) + if (DistTypeSizeBits > ProductTypeSizeBits) CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType()); else CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType()); @@ -1518,8 +1653,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, Value *BPtr = B.getPointer(); bool AIsWrite = A.getInt(); bool BIsWrite = B.getInt(); - Type *ATy = APtr->getType()->getPointerElementType(); - Type *BTy = BPtr->getType()->getPointerElementType(); + Type *ATy = getLoadStoreType(InstMap[AIdx]); + Type *BTy = getLoadStoreType(InstMap[BIdx]); // Two reads are independent. if (!AIsWrite && !BIsWrite) @@ -1842,8 +1977,6 @@ bool LoopAccessInfo::canAnalyzeLoop() { void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI, DominatorTree *DT) { - typedef SmallPtrSet ValueSet; - // Holds the Load and Store instructions. SmallVector Loads; SmallVector Stores; @@ -1975,22 +2108,26 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // for read and once for write, it will only appear once (on the write // list). This is okay, since we are going to check for conflicts between // writes and between reads and writes, but not between reads and reads. - ValueSet Seen; + SmallSet, 16> Seen; // Record uniform store addresses to identify if we have multiple stores // to the same address. - ValueSet UniformStores; + SmallPtrSet UniformStores; for (StoreInst *ST : Stores) { Value *Ptr = ST->getPointerOperand(); - if (isUniform(Ptr)) + if (isUniform(Ptr)) { + // Record store instructions to loop invariant addresses + StoresToInvariantAddresses.push_back(ST); HasDependenceInvolvingLoopInvariantAddress |= !UniformStores.insert(Ptr).second; + } // If we did *not* see this pointer before, insert it to the read-write // list. At this phase it is only a 'write' list. - if (Seen.insert(Ptr).second) { + Type *AccessTy = getLoadStoreType(ST); + if (Seen.insert({Ptr, AccessTy}).second) { ++NumReadWrites; MemoryLocation Loc = MemoryLocation::get(ST); @@ -2001,9 +2138,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, Loc.AATags.TBAA = nullptr; visitPointers(const_cast(Loc.Ptr), *TheLoop, - [&Accesses, Loc](Value *Ptr) { + [&Accesses, AccessTy, Loc](Value *Ptr) { MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr); - Accesses.addStore(NewLoc); + Accesses.addStore(NewLoc, AccessTy); }); } } @@ -2027,7 +2164,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // read a few words, modify, and write a few words, and some of the // words may be written to the same address. bool IsReadOnlyPtr = false; - if (Seen.insert(Ptr).second || + Type *AccessTy = getLoadStoreType(LD); + if (Seen.insert({Ptr, AccessTy}).second || !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) { ++NumReads; IsReadOnlyPtr = true; @@ -2049,9 +2187,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, Loc.AATags.TBAA = nullptr; visitPointers(const_cast(Loc.Ptr), *TheLoop, - [&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) { + [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) { MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr); - Accesses.addLoad(NewLoc, IsReadOnlyPtr); + Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr); }); } @@ -2069,10 +2207,14 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. - bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), - TheLoop, SymbolicStrides); + Value *UncomputablePtr = nullptr; + bool CanDoRTIfNeeded = + Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop, + SymbolicStrides, UncomputablePtr, false); if (!CanDoRTIfNeeded) { - recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds"; + auto *I = dyn_cast_or_null(UncomputablePtr); + recordAnalysis("CantIdentifyArrayBounds", I) + << "cannot identify array bounds"; LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << "the array bounds.\n"); CanVecMem = false; @@ -2099,12 +2241,14 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, PtrRtChecking->Need = true; auto *SE = PSE->getSE(); - CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop, - SymbolicStrides, true); + UncomputablePtr = nullptr; + CanDoRTIfNeeded = Accesses.canCheckPtrAtRT( + *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true); // Check that we found the bounds for the pointer. if (!CanDoRTIfNeeded) { - recordAnalysis("CantCheckMemDepsAtRunTime") + auto *I = dyn_cast_or_null(UncomputablePtr); + recordAnalysis("CantCheckMemDepsAtRunTime", I) << "cannot check memory dependencies at runtime"; LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); CanVecMem = false; @@ -2129,13 +2273,61 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, dbgs() << "LAA: No unsafe dependent memory operations in loop. We" << (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"); - else { - recordAnalysis("UnsafeMemDep") - << "unsafe dependent memory operations in loop. Use " - "#pragma loop distribute(enable) to allow loop distribution " - "to attempt to isolate the offending operations into a separate " - "loop"; - LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); + else + emitUnsafeDependenceRemark(); +} + +void LoopAccessInfo::emitUnsafeDependenceRemark() { + auto Deps = getDepChecker().getDependences(); + if (!Deps) + return; + auto Found = std::find_if( + Deps->begin(), Deps->end(), [](const MemoryDepChecker::Dependence &D) { + return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) != + MemoryDepChecker::VectorizationSafetyStatus::Safe; + }); + if (Found == Deps->end()) + return; + MemoryDepChecker::Dependence Dep = *Found; + + LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); + + // Emit remark for first unsafe dependence + OptimizationRemarkAnalysis &R = + recordAnalysis("UnsafeDep", Dep.getDestination(*this)) + << "unsafe dependent memory operations in loop. Use " + "#pragma loop distribute(enable) to allow loop distribution " + "to attempt to isolate the offending operations into a separate " + "loop"; + + switch (Dep.Type) { + case MemoryDepChecker::Dependence::NoDep: + case MemoryDepChecker::Dependence::Forward: + case MemoryDepChecker::Dependence::BackwardVectorizable: + llvm_unreachable("Unexpected dependence"); + case MemoryDepChecker::Dependence::Backward: + R << "\nBackward loop carried data dependence."; + break; + case MemoryDepChecker::Dependence::ForwardButPreventsForwarding: + R << "\nForward loop carried data dependence that prevents " + "store-to-load forwarding."; + break; + case MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding: + R << "\nBackward loop carried data dependence that prevents " + "store-to-load forwarding."; + break; + case MemoryDepChecker::Dependence::Unknown: + R << "\nUnknown data dependence."; + break; + } + + if (Instruction *I = Dep.getSource(*this)) { + DebugLoc SourceLoc = I->getDebugLoc(); + if (auto *DD = dyn_cast_or_null(getPointerOperand(I))) + SourceLoc = DD->getDebugLoc(); + if (SourceLoc) + R << " Memory location is the same as accessed at " + << ore::NV("Location", SourceLoc); } } @@ -2212,12 +2404,12 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { // The Stride can be positive/negative, so we sign extend Stride; // The backedgeTakenCount is non-negative, so we zero extend BETakenCount. const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); - uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType()); - uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType()); + uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType()); + uint64_t BETypeSizeBits = DL.getTypeSizeInBits(BETakenCount->getType()); const SCEV *CastedStride = StrideExpr; const SCEV *CastedBECount = BETakenCount; ScalarEvolution *SE = PSE->getSE(); - if (BETypeSize >= StrideTypeSize) + if (BETypeSizeBits >= StrideTypeSizeBits) CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType()); else CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType()); @@ -2232,7 +2424,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { "at most once.\n"); return; } - LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version."); + LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n"); SymbolicStrides[Ptr] = Stride; StrideSet.insert(Stride); @@ -2242,10 +2434,12 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI) : PSE(std::make_unique(*SE, *L)), - PtrRtChecking(std::make_unique(SE)), + PtrRtChecking(nullptr), DepChecker(std::make_unique(*PSE, L)), TheLoop(L) { - if (canAnalyzeLoop()) + PtrRtChecking = std::make_unique(*DepChecker, SE); + if (canAnalyzeLoop()) { analyzeLoop(AA, LI, TLI, DT); + } } void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { @@ -2283,7 +2477,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { << "found in loop.\n"; OS.indent(Depth) << "SCEV assumptions:\n"; - PSE->getUnionPredicate().print(OS, Depth); + PSE->getPredicate().print(OS, Depth); OS << "\n"; @@ -2301,7 +2495,7 @@ const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) { if (!LAI) LAI = std::make_unique(L, SE, TLI, AA, DT, LI); - return *LAI.get(); + return *LAI; } void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const { diff --git a/llvm/lib/Analysis/LoopAnalysisManager.cpp b/llvm/lib/Analysis/LoopAnalysisManager.cpp index 4d6f8a64329a..8d71b31ca393 100644 --- a/llvm/lib/Analysis/LoopAnalysisManager.cpp +++ b/llvm/lib/Analysis/LoopAnalysisManager.cpp @@ -8,12 +8,9 @@ #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PassManagerImpl.h" diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp index ba014bd08c98..2cbf1f7f2d28 100644 --- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -103,14 +103,24 @@ static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, return StepRec == &ElemSize; } -/// Compute the trip count for the given loop \p L. Return the SCEV expression -/// for the trip count or nullptr if it cannot be computed. -static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) { +/// Compute the trip count for the given loop \p L or assume a default value if +/// it is not a compile time constant. Return the SCEV expression for the trip +/// count. +static const SCEV *computeTripCount(const Loop &L, const SCEV &ElemSize, + ScalarEvolution &SE) { const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L); - if (isa(BackedgeTakenCount) || - !isa(BackedgeTakenCount)) - return nullptr; - return SE.getTripCountFromExitCount(BackedgeTakenCount); + const SCEV *TripCount = (!isa(BackedgeTakenCount) && + isa(BackedgeTakenCount)) + ? SE.getTripCountFromExitCount(BackedgeTakenCount) + : nullptr; + + if (!TripCount) { + LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName() + << " could not be computed, using DefaultTripCount\n"); + TripCount = SE.getConstant(ElemSize.getType(), DefaultTripCount); + } + + return TripCount; } //===----------------------------------------------------------------------===// @@ -274,22 +284,18 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L, return 1; } - const SCEV *TripCount = computeTripCount(L, SE); - if (!TripCount) { - LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName() - << " could not be computed, using DefaultTripCount\n"); - const SCEV *ElemSize = Sizes.back(); - TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount); - } + const SCEV *TripCount = computeTripCount(L, *Sizes.back(), SE); + assert(TripCount && "Expecting valid TripCount"); LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n"); - // If the indexed reference is 'consecutive' the cost is - // (TripCount*Stride)/CLS, otherwise the cost is TripCount. - const SCEV *RefCost = TripCount; - + const SCEV *RefCost = nullptr; if (isConsecutive(L, CLS)) { + // If the indexed reference is 'consecutive' the cost is + // (TripCount*Stride)/CLS. const SCEV *Coeff = getLastCoefficient(); const SCEV *ElemSize = Sizes.back(); + assert(Coeff->getType() == ElemSize->getType() && + "Expecting the same type"); const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType()); const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS); @@ -303,10 +309,33 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L, LLVM_DEBUG(dbgs().indent(4) << "Access is consecutive: RefCost=(TripCount*Stride)/CLS=" << *RefCost << "\n"); - } else + } else { + // If the indexed reference is not 'consecutive' the cost is proportional to + // the trip count and the depth of the dimension which the subject loop + // subscript is accessing. We try to estimate this by multiplying the cost + // by the trip counts of loops corresponding to the inner dimensions. For + // example, given the indexed reference 'A[i][j][k]', and assuming the + // i-loop is in the innermost position, the cost would be equal to the + // iterations of the i-loop multiplied by iterations of the j-loop. + RefCost = TripCount; + + int Index = getSubscriptIndex(L); + assert(Index >= 0 && "Cound not locate a valid Index"); + + for (unsigned I = Index + 1; I < getNumSubscripts() - 1; ++I) { + const SCEVAddRecExpr *AR = dyn_cast(getSubscript(I)); + assert(AR && AR->getLoop() && "Expecting valid loop"); + const SCEV *TripCount = + computeTripCount(*AR->getLoop(), *Sizes.back(), SE); + Type *WiderType = SE.getWiderType(RefCost->getType(), TripCount->getType()); + RefCost = SE.getMulExpr(SE.getNoopOrAnyExtend(RefCost, WiderType), + SE.getNoopOrAnyExtend(TripCount, WiderType)); + } + LLVM_DEBUG(dbgs().indent(4) - << "Access is not consecutive: RefCost=TripCount=" << *RefCost - << "\n"); + << "Access is not consecutive: RefCost=" << *RefCost << "\n"); + } + assert(RefCost && "Expecting a valid RefCost"); // Attempt to fold RefCost into a constant. if (auto ConstantCost = dyn_cast(RefCost)) @@ -319,6 +348,26 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L, return CacheCost::InvalidCost; } +bool IndexedReference::tryDelinearizeFixedSize( + const SCEV *AccessFn, SmallVectorImpl &Subscripts) { + SmallVector ArraySizes; + if (!tryDelinearizeFixedSizeImpl(&SE, &StoreOrLoadInst, AccessFn, Subscripts, + ArraySizes)) + return false; + + // Populate Sizes with scev expressions to be used in calculations later. + for (auto Idx : seq(1, Subscripts.size())) + Sizes.push_back( + SE.getConstant(Subscripts[Idx]->getType(), ArraySizes[Idx - 1])); + + LLVM_DEBUG({ + dbgs() << "Delinearized subscripts of fixed-size array\n" + << "GEP:" << *getLoadStorePointerOperand(&StoreOrLoadInst) + << "\n"; + }); + return true; +} + bool IndexedReference::delinearize(const LoopInfo &LI) { assert(Subscripts.empty() && "Subscripts should be empty"); assert(Sizes.empty() && "Sizes should be empty"); @@ -340,13 +389,25 @@ bool IndexedReference::delinearize(const LoopInfo &LI) { return false; } - AccessFn = SE.getMinusSCEV(AccessFn, BasePointer); + bool IsFixedSize = false; + // Try to delinearize fixed-size arrays. + if (tryDelinearizeFixedSize(AccessFn, Subscripts)) { + IsFixedSize = true; + // The last element of Sizes is the element size. + Sizes.push_back(ElemSize); + LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName() + << "', AccessFn: " << *AccessFn << "\n"); + } - LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName() - << "', AccessFn: " << *AccessFn << "\n"); + AccessFn = SE.getMinusSCEV(AccessFn, BasePointer); - llvm::delinearize(SE, AccessFn, Subscripts, Sizes, - SE.getElementSize(&StoreOrLoadInst)); + // Try to delinearize parametric-size arrays. + if (!IsFixedSize) { + LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName() + << "', AccessFn: " << *AccessFn << "\n"); + llvm::delinearize(SE, AccessFn, Subscripts, Sizes, + SE.getElementSize(&StoreOrLoadInst)); + } if (Subscripts.empty() || Sizes.empty() || Subscripts.size() != Sizes.size()) { @@ -424,6 +485,16 @@ bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const { return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize); } +int IndexedReference::getSubscriptIndex(const Loop &L) const { + for (auto Idx : seq(0, getNumSubscripts())) { + const SCEVAddRecExpr *AR = dyn_cast(getSubscript(Idx)); + if (AR && AR->getLoop() == &L) { + return Idx; + } + } + return -1; +} + const SCEV *IndexedReference::getLastCoefficient() const { const SCEV *LastSubscript = getLastSubscript(); auto *AR = cast(LastSubscript); @@ -550,7 +621,7 @@ bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const { bool Added = false; for (ReferenceGroupTy &RefGroup : RefGroups) { - const IndexedReference &Representative = *RefGroup.front().get(); + const IndexedReference &Representative = *RefGroup.front(); LLVM_DEBUG({ dbgs() << "References:\n"; dbgs().indent(2) << *R << "\n"; @@ -574,8 +645,8 @@ bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const { Optional HasSpacialReuse = R->hasSpacialReuse(Representative, CLS, AA); - if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) || - (HasSpacialReuse.hasValue() && *HasSpacialReuse)) { + if ((HasTemporalReuse && *HasTemporalReuse) || + (HasSpacialReuse && *HasSpacialReuse)) { RefGroup.push_back(std::move(R)); Added = true; break; diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index b161c490a6bc..29c2437ff5ea 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopInfo.h" -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/IVDescriptors.h" @@ -30,7 +29,6 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -38,9 +36,7 @@ #include "llvm/IR/PrintPasses.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; // Explicitly instantiate methods in LoopInfoImpl.h for IR-level Loops. @@ -740,6 +736,7 @@ void UnloopUpdater::updateBlockParents() { bool Changed = FoundIB; for (unsigned NIters = 0; Changed; ++NIters) { assert(NIters < Unloop.getNumBlocks() && "runaway iterative algorithm"); + (void) NIters; // Iterate over the postorder list of blocks, propagating the nearest loop // from successors to predecessors as before. @@ -1085,13 +1082,13 @@ Optional llvm::getOptionalBoolLoopAttribute(const Loop *TheLoop, } bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) { - return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false); + return getOptionalBoolLoopAttribute(TheLoop, Name).value_or(false); } llvm::Optional llvm::getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name) { const MDOperand *AttrMD = - findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr); + findStringMetadataForLoop(TheLoop, Name).value_or(nullptr); if (!AttrMD) return None; @@ -1104,7 +1101,7 @@ llvm::Optional llvm::getOptionalIntLoopAttribute(const Loop *TheLoop, int llvm::getIntLoopAttribute(const Loop *TheLoop, StringRef Name, int Default) { - return getOptionalIntLoopAttribute(TheLoop, Name).getValueOr(Default); + return getOptionalIntLoopAttribute(TheLoop, Name).value_or(Default); } bool llvm::isFinite(const Loop *L) { diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp index 675bb7a7749c..bff796f339ab 100644 --- a/llvm/lib/Analysis/LoopNestAnalysis.cpp +++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp @@ -13,8 +13,7 @@ #include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/ADT/BreadthFirstIterator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/PostDominators.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Analysis/ValueTracking.h" using namespace llvm; diff --git a/llvm/lib/Analysis/LoopPass.cpp b/llvm/lib/Analysis/LoopPass.cpp index b720bab454e9..5d824aece488 100644 --- a/llvm/lib/Analysis/LoopPass.cpp +++ b/llvm/lib/Analysis/LoopPass.cpp @@ -13,14 +13,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/OptBisect.h" -#include "llvm/IR/PassManager.h" #include "llvm/IR/PassTimingInfo.h" #include "llvm/IR/PrintPasses.h" -#include "llvm/IR/StructuralHash.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TimeProfiler.h" @@ -192,12 +190,12 @@ bool LPPassManager::runOnFunction(Function &F) { PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader()); TimeRegion PassTimer(getPassTimer(P)); #ifdef EXPENSIVE_CHECKS - uint64_t RefHash = StructuralHash(F); + uint64_t RefHash = P->structuralHash(F); #endif LocalChanged = P->runOnLoop(CurrentLoop, *this); #ifdef EXPENSIVE_CHECKS - if (!LocalChanged && (RefHash != StructuralHash(F))) { + if (!LocalChanged && (RefHash != P->structuralHash(F))) { llvm::errs() << "Pass modifies its input and doesn't report it: " << P->getPassName() << "\n"; llvm_unreachable("Pass modifies its input and doesn't report it"); diff --git a/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp b/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp index 15095d67d385..84f1eff9a732 100644 --- a/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp +++ b/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp @@ -13,7 +13,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopUnrollAnalyzer.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Operator.h" using namespace llvm; @@ -84,9 +87,9 @@ bool UnrolledInstAnalyzer::visitBinaryOperator(BinaryOperator &I) { const DataLayout &DL = I.getModule()->getDataLayout(); if (auto FI = dyn_cast(&I)) SimpleV = - SimplifyBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL); + simplifyBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL); else - SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); + SimpleV = simplifyBinOp(I.getOpcode(), LHS, RHS, DL); if (SimpleV) { SimplifiedValues[&I] = SimpleV; @@ -155,7 +158,7 @@ bool UnrolledInstAnalyzer::visitCastInst(CastInst &I) { // i32 0). if (CastInst::castIsValid(I.getOpcode(), Op, I.getType())) { const DataLayout &DL = I.getModule()->getDataLayout(); - if (Value *V = SimplifyCastInst(I.getOpcode(), Op, I.getType(), DL)) { + if (Value *V = simplifyCastInst(I.getOpcode(), Op, I.getType(), DL)) { SimplifiedValues[&I] = V; return true; } @@ -192,7 +195,7 @@ bool UnrolledInstAnalyzer::visitCmpInst(CmpInst &I) { } const DataLayout &DL = I.getModule()->getDataLayout(); - if (Value *V = SimplifyCmpInst(I.getPredicate(), LHS, RHS, DL)) { + if (Value *V = simplifyCmpInst(I.getPredicate(), LHS, RHS, DL)) { SimplifiedValues[&I] = V; return true; } diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp index 0480c1cd2842..f55de71ea98a 100644 --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -13,30 +13,25 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/MLInlineAdvisor.h" #include "llvm/ADT/SCCIterator.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InlineModelFeatureMaps.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MLModelRunner.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/ReleaseModeModelRunner.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Config/config.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Path.h" - -#include -#include -#include using namespace llvm; #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL) +#include "llvm/Analysis/ReleaseModeModelRunner.h" // codegen-ed file #include "InlinerSizeModel.h" // NOLINT @@ -44,7 +39,7 @@ std::unique_ptr llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { auto AOTRunner = std::make_unique>( - M.getContext(), FeatureNameMap, DecisionName); + M.getContext(), FeatureMap, DecisionName); return std::make_unique(M, MAM, std::move(AOTRunner)); } #endif @@ -57,15 +52,21 @@ static cl::opt SizeIncreaseThreshold( "blocking any further inlining."), cl::init(2.0)); +static cl::opt KeepFPICache( + "ml-advisor-keep-fpi-cache", cl::Hidden, + cl::desc( + "For test - keep the ML Inline advisor's FunctionPropertiesInfo cache"), + cl::init(false)); + // clang-format off -const std::array llvm::FeatureNameMap{ +const std::array llvm::FeatureMap{ +#define POPULATE_NAMES(_, NAME) TensorSpec::createSpec(NAME, {1} ), // InlineCost features - these must come first -#define POPULATE_NAMES(INDEX_NAME, NAME) NAME, INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES) #undef POPULATE_NAMES // Non-cost features -#define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME, +#define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec(NAME, {1} ), INLINE_FEATURE_ITERATOR(POPULATE_NAMES) #undef POPULATE_NAMES }; @@ -138,7 +139,10 @@ unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const { return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0; } -void MLInlineAdvisor::onPassEntry() { +void MLInlineAdvisor::onPassEntry(LazyCallGraph::SCC *LastSCC) { + if (!LastSCC || ForceStop) + return; + FPICache.clear(); // Function passes executed between InlinerPass runs may have changed the // module-wide features. // The cgscc pass manager rules are such that: @@ -154,8 +158,8 @@ void MLInlineAdvisor::onPassEntry() { // care about the nature of the Edge (call or ref). NodeCount -= static_cast(NodesInLastSCC.size()); while (!NodesInLastSCC.empty()) { - const auto *N = NodesInLastSCC.front(); - NodesInLastSCC.pop_front(); + const auto *N = *NodesInLastSCC.begin(); + NodesInLastSCC.erase(N); // The Function wrapped by N could have been deleted since we last saw it. if (N->isDead()) { assert(!N->getFunction().isDeclaration()); @@ -168,34 +172,52 @@ void MLInlineAdvisor::onPassEntry() { assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration()); auto I = AllNodes.insert(AdjNode); if (I.second) - NodesInLastSCC.push_back(AdjNode); + NodesInLastSCC.insert(AdjNode); } } EdgeCount -= EdgesOfLastSeenNodes; EdgesOfLastSeenNodes = 0; + + // (Re)use NodesInLastSCC to remember the nodes in the SCC right now, + // in case the SCC is split before onPassExit and some nodes are split out + assert(NodesInLastSCC.empty()); + for (const auto &N : *LastSCC) + NodesInLastSCC.insert(&N); } void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) { - if (!LastSCC) + // No need to keep this around - function passes will invalidate it. + if (!KeepFPICache) + FPICache.clear(); + if (!LastSCC || ForceStop) return; // Keep track of the nodes and edges we last saw. Then, in onPassEntry, // we update the node count and edge count from the subset of these nodes that // survived. - assert(NodesInLastSCC.empty()); - assert(NodeCount >= LastSCC->size()); EdgesOfLastSeenNodes = 0; + + // Check on nodes that were in SCC onPassEntry + for (auto I = NodesInLastSCC.begin(); I != NodesInLastSCC.end();) { + if ((*I)->isDead()) + NodesInLastSCC.erase(*I++); + else + EdgesOfLastSeenNodes += getLocalCalls((*I++)->getFunction()); + } + + // Check on nodes that may have got added to SCC for (const auto &N : *LastSCC) { assert(!N.isDead()); - EdgesOfLastSeenNodes += getLocalCalls(N.getFunction()); - NodesInLastSCC.push_back(&N); + auto I = NodesInLastSCC.insert(&N); + if (I.second) + EdgesOfLastSeenNodes += getLocalCalls(N.getFunction()); } + assert(NodeCount >= NodesInLastSCC.size()); assert(EdgeCount >= EdgesOfLastSeenNodes); } int64_t MLInlineAdvisor::getLocalCalls(Function &F) { - return FAM.getResult(F) - .DirectCallsToDefinedFunctions; + return getCachedFPI(F).DirectCallsToDefinedFunctions; } // Update the internal state of the advisor, and force invalidate feature @@ -208,13 +230,15 @@ void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, assert(!ForceStop); Function *Caller = Advice.getCaller(); Function *Callee = Advice.getCallee(); - // The caller features aren't valid anymore. { PreservedAnalyses PA = PreservedAnalyses::all(); PA.abandon(); + PA.abandon(); + PA.abandon(); FAM.invalidate(*Caller, PA); } + Advice.updateCachedCallerFPI(FAM); int64_t IRSizeAfter = getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); @@ -227,15 +251,13 @@ void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, // For edges, we 'forget' the edges that the caller and callee used to have // before inlining, and add back what they currently have together. int64_t NewCallerAndCalleeEdges = - FAM.getResult(*Caller) - .DirectCallsToDefinedFunctions; + getCachedFPI(*Caller).DirectCallsToDefinedFunctions; if (CalleeWasDeleted) --NodeCount; else NewCallerAndCalleeEdges += - FAM.getResult(*Callee) - .DirectCallsToDefinedFunctions; + getCachedFPI(*Callee).DirectCallsToDefinedFunctions; EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0); } @@ -248,7 +270,19 @@ int64_t MLInlineAdvisor::getModuleIRSize() const { return Ret; } +FunctionPropertiesInfo &MLInlineAdvisor::getCachedFPI(Function &F) const { + auto InsertPair = + FPICache.insert(std::make_pair(&F, FunctionPropertiesInfo())); + if (!InsertPair.second) + return InsertPair.first->second; + InsertPair.first->second = FAM.getResult(F); + return InsertPair.first->second; +} + std::unique_ptr MLInlineAdvisor::getAdviceImpl(CallBase &CB) { + if (auto Skip = getSkipAdviceIfUnreachableCallsite(CB)) + return Skip; + auto &Caller = *CB.getCaller(); auto &Callee = *CB.getCalledFunction(); @@ -307,8 +341,8 @@ std::unique_ptr MLInlineAdvisor::getAdviceImpl(CallBase &CB) { NrCtantParams += (isa(*I)); } - auto &CallerBefore = FAM.getResult(Caller); - auto &CalleeBefore = FAM.getResult(Callee); + auto &CallerBefore = getCachedFPI(Caller); + auto &CalleeBefore = getCachedFPI(Callee); *ModelRunner->getTensor(FeatureIndex::CalleeBasicBlockCount) = CalleeBefore.BasicBlockCount; @@ -348,9 +382,19 @@ MLInlineAdvisor::getAdviceFromModel(CallBase &CB, this, CB, ORE, static_cast(ModelRunner->evaluate())); } +std::unique_ptr +MLInlineAdvisor::getSkipAdviceIfUnreachableCallsite(CallBase &CB) { + if (!FAM.getResult(*CB.getCaller()) + .isReachableFromEntry(CB.getParent())) + return std::make_unique(this, CB, getCallerORE(CB), false); + return nullptr; +} + std::unique_ptr MLInlineAdvisor::getMandatoryAdvice(CallBase &CB, bool Advice) { // Make sure we track inlinings in all cases - mandatory or not. + if (auto Skip = getSkipAdviceIfUnreachableCallsite(CB)) + return Skip; if (Advice && !ForceStop) return getMandatoryAdviceImpl(CB); @@ -366,16 +410,47 @@ MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { return std::make_unique(this, CB, getCallerORE(CB), true); } +void MLInlineAdvisor::print(raw_ostream &OS) const { + OS << "[MLInlineAdvisor] Nodes: " << NodeCount << " Edges: " << EdgeCount + << " EdgesOfLastSeenNodes: " << EdgesOfLastSeenNodes << "\n"; + OS << "[MLInlineAdvisor] FPI:\n"; + for (auto I : FPICache) { + OS << I.getFirst()->getName() << ":\n"; + I.getSecond().print(OS); + OS << "\n"; + } + OS << "\n"; +} + +MLInlineAdvice::MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB, + OptimizationRemarkEmitter &ORE, + bool Recommendation) + : InlineAdvice(Advisor, CB, ORE, Recommendation), + CallerIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Caller)), + CalleeIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Callee)), + CallerAndCalleeEdges(Advisor->isForcedToStop() + ? 0 + : (Advisor->getLocalCalls(*Caller) + + Advisor->getLocalCalls(*Callee))), + PreInlineCallerFPI(Advisor->getCachedFPI(*Caller)) { + if (Recommendation) + FPU.emplace(Advisor->getCachedFPI(*getCaller()), CB); +} + void MLInlineAdvice::reportContextForRemark( DiagnosticInfoOptimizationBase &OR) { using namespace ore; OR << NV("Callee", Callee->getName()); for (size_t I = 0; I < NumberOfFeatures; ++I) - OR << NV(FeatureNameMap[I], + OR << NV(FeatureMap[I].name(), *getAdvisor()->getModelRunner().getTensor(I)); OR << NV("ShouldInline", isInliningRecommended()); } +void MLInlineAdvice::updateCachedCallerFPI(FunctionAnalysisManager &FAM) const { + FPU->finish(FAM); +} + void MLInlineAdvice::recordInliningImpl() { ORE.emit([&]() { OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block); @@ -397,6 +472,7 @@ void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() { void MLInlineAdvice::recordUnsuccessfulInliningImpl( const InlineResult &Result) { + getAdvisor()->getCachedFPI(*Caller) = PreInlineCallerFPI; ORE.emit([&]() { OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful", DLoc, Block); @@ -405,6 +481,7 @@ void MLInlineAdvice::recordUnsuccessfulInliningImpl( }); } void MLInlineAdvice::recordUnattemptedInliningImpl() { + assert(!FPU); ORE.emit([&]() { OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block); reportContextForRemark(R); diff --git a/llvm/lib/Analysis/MemDepPrinter.cpp b/llvm/lib/Analysis/MemDepPrinter.cpp index 00642347102a..305ae3e2a992 100644 --- a/llvm/lib/Analysis/MemDepPrinter.cpp +++ b/llvm/lib/Analysis/MemDepPrinter.cpp @@ -15,7 +15,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/InitializePasses.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/Analysis/MemDerefPrinter.cpp b/llvm/lib/Analysis/MemDerefPrinter.cpp index 82617c7256a5..4dd5c76cc604 100644 --- a/llvm/lib/Analysis/MemDerefPrinter.cpp +++ b/llvm/lib/Analysis/MemDerefPrinter.cpp @@ -9,14 +9,11 @@ #include "llvm/Analysis/MemDerefPrinter.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/Passes.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp index 208f93aa1ac6..91501b04448e 100644 --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -17,7 +17,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/Utils/Local.h" @@ -43,6 +43,8 @@ #include #include #include +#include +#include #include using namespace llvm; @@ -62,6 +64,42 @@ enum AllocType : uint8_t { AnyAlloc = AllocLike | ReallocLike }; +enum class MallocFamily { + Malloc, + CPPNew, // new(unsigned int) + CPPNewAligned, // new(unsigned int, align_val_t) + CPPNewArray, // new[](unsigned int) + CPPNewArrayAligned, // new[](unsigned long, align_val_t) + MSVCNew, // new(unsigned int) + MSVCArrayNew, // new[](unsigned int) + VecMalloc, + KmpcAllocShared, +}; + +StringRef mangledNameForMallocFamily(const MallocFamily &Family) { + switch (Family) { + case MallocFamily::Malloc: + return "malloc"; + case MallocFamily::CPPNew: + return "_Znwm"; + case MallocFamily::CPPNewAligned: + return "_ZnwmSt11align_val_t"; + case MallocFamily::CPPNewArray: + return "_Znam"; + case MallocFamily::CPPNewArrayAligned: + return "_ZnamSt11align_val_t"; + case MallocFamily::MSVCNew: + return "??2@YAPAXI@Z"; + case MallocFamily::MSVCArrayNew: + return "??_U@YAPAXI@Z"; + case MallocFamily::VecMalloc: + return "vec_malloc"; + case MallocFamily::KmpcAllocShared: + return "__kmpc_alloc_shared"; + } + llvm_unreachable("missing an alloc family"); +} + struct AllocFnsTy { AllocType AllocTy; unsigned NumParams; @@ -69,50 +107,55 @@ struct AllocFnsTy { int FstParam, SndParam; // Alignment parameter for aligned_alloc and aligned new int AlignParam; + // Name of default allocator function to group malloc/free calls by family + MallocFamily Family; }; +// clang-format off // FIXME: certain users need more information. E.g., SimplifyLibCalls needs to // know which functions are nounwind, noalias, nocapture parameters, etc. static const std::pair AllocationFnData[] = { - {LibFunc_malloc, {MallocLike, 1, 0, -1, -1}}, - {LibFunc_vec_malloc, {MallocLike, 1, 0, -1, -1}}, - {LibFunc_valloc, {MallocLike, 1, 0, -1, -1}}, - {LibFunc_Znwj, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned int) - {LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new(unsigned int, nothrow) - {LibFunc_ZnwjSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new(unsigned int, align_val_t) - {LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new(unsigned int, align_val_t, nothrow) - {LibFunc_Znwm, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned long) - {LibFunc_ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new(unsigned long, nothrow) - {LibFunc_ZnwmSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new(unsigned long, align_val_t) - {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new(unsigned long, align_val_t, nothrow) - {LibFunc_Znaj, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned int) - {LibFunc_ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned int, nothrow) - {LibFunc_ZnajSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new[](unsigned int, align_val_t) - {LibFunc_ZnajSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new[](unsigned int, align_val_t, nothrow) - {LibFunc_Znam, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned long) - {LibFunc_ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned long, nothrow) - {LibFunc_ZnamSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new[](unsigned long, align_val_t) - {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new[](unsigned long, align_val_t, nothrow) - {LibFunc_msvc_new_int, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned int) - {LibFunc_msvc_new_int_nothrow, {MallocLike, 2, 0, -1, -1}}, // new(unsigned int, nothrow) - {LibFunc_msvc_new_longlong, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned long long) - {LibFunc_msvc_new_longlong_nothrow, {MallocLike, 2, 0, -1, -1}}, // new(unsigned long long, nothrow) - {LibFunc_msvc_new_array_int, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned int) - {LibFunc_msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned int, nothrow) - {LibFunc_msvc_new_array_longlong, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned long long) - {LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned long long, nothrow) - {LibFunc_aligned_alloc, {AlignedAllocLike, 2, 1, -1, 0}}, - {LibFunc_memalign, {AlignedAllocLike, 2, 1, -1, 0}}, - {LibFunc_calloc, {CallocLike, 2, 0, 1, -1}}, - {LibFunc_vec_calloc, {CallocLike, 2, 0, 1, -1}}, - {LibFunc_realloc, {ReallocLike, 2, 1, -1, -1}}, - {LibFunc_vec_realloc, {ReallocLike, 2, 1, -1, -1}}, - {LibFunc_reallocf, {ReallocLike, 2, 1, -1, -1}}, - {LibFunc_strdup, {StrDupLike, 1, -1, -1, -1}}, - {LibFunc_strndup, {StrDupLike, 2, 1, -1, -1}}, - {LibFunc___kmpc_alloc_shared, {MallocLike, 1, 0, -1, -1}}, - // TODO: Handle "int posix_memalign(void **, size_t, size_t)" + {LibFunc_malloc, {MallocLike, 1, 0, -1, -1, MallocFamily::Malloc}}, + {LibFunc_vec_malloc, {MallocLike, 1, 0, -1, -1, MallocFamily::VecMalloc}}, + {LibFunc_valloc, {MallocLike, 1, 0, -1, -1, MallocFamily::Malloc}}, + {LibFunc_Znwj, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned int) + {LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned int, nothrow) + {LibFunc_ZnwjSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned int, align_val_t) + {LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned int, align_val_t, nothrow) + {LibFunc_Znwm, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long) + {LibFunc_ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long, nothrow) + {LibFunc_ZnwmSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t) + {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t, nothrow) + {LibFunc_Znaj, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned int) + {LibFunc_ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned int, nothrow) + {LibFunc_ZnajSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned int, align_val_t) + {LibFunc_ZnajSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned int, align_val_t, nothrow) + {LibFunc_Znam, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned long) + {LibFunc_ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned long, nothrow) + {LibFunc_ZnamSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned long, align_val_t) + {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned long, align_val_t, nothrow) + {LibFunc_msvc_new_int, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned int) + {LibFunc_msvc_new_int_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned int, nothrow) + {LibFunc_msvc_new_longlong, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned long long) + {LibFunc_msvc_new_longlong_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned long long, nothrow) + {LibFunc_msvc_new_array_int, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned int) + {LibFunc_msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned int, nothrow) + {LibFunc_msvc_new_array_longlong, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned long long) + {LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned long long, nothrow) + {LibFunc_aligned_alloc, {AlignedAllocLike, 2, 1, -1, 0, MallocFamily::Malloc}}, + {LibFunc_memalign, {AlignedAllocLike, 2, 1, -1, 0, MallocFamily::Malloc}}, + {LibFunc_calloc, {CallocLike, 2, 0, 1, -1, MallocFamily::Malloc}}, + {LibFunc_vec_calloc, {CallocLike, 2, 0, 1, -1, MallocFamily::VecMalloc}}, + {LibFunc_realloc, {ReallocLike, 2, 1, -1, -1, MallocFamily::Malloc}}, + {LibFunc_vec_realloc, {ReallocLike, 2, 1, -1, -1, MallocFamily::VecMalloc}}, + {LibFunc_reallocf, {ReallocLike, 2, 1, -1, -1, MallocFamily::Malloc}}, + {LibFunc_strdup, {StrDupLike, 1, -1, -1, -1, MallocFamily::Malloc}}, + {LibFunc_dunder_strdup, {StrDupLike, 1, -1, -1, -1, MallocFamily::Malloc}}, + {LibFunc_strndup, {StrDupLike, 2, 1, -1, -1, MallocFamily::Malloc}}, + {LibFunc_dunder_strndup, {StrDupLike, 2, 1, -1, -1, MallocFamily::Malloc}}, + {LibFunc___kmpc_alloc_shared, {MallocLike, 1, 0, -1, -1, MallocFamily::KmpcAllocShared}}, }; +// clang-format on static const Function *getCalledFunction(const Value *V, bool &IsNoBuiltin) { @@ -217,7 +260,7 @@ static Optional getAllocationSize(const Value *V, Result.AllocTy = MallocLike; Result.NumParams = Callee->getNumOperands(); Result.FstParam = Args.first; - Result.SndParam = Args.second.getValueOr(-1); + Result.SndParam = Args.second.value_or(-1); // Allocsize has no way to specify an alignment argument Result.AlignParam = -1; return Result; @@ -227,54 +270,53 @@ static Optional getAllocationSize(const Value *V, /// allocates or reallocates memory (either malloc, calloc, realloc, or strdup /// like). bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI) { - return getAllocationData(V, AnyAlloc, TLI).hasValue(); + return getAllocationData(V, AnyAlloc, TLI).has_value(); } bool llvm::isAllocationFn( const Value *V, function_ref GetTLI) { - return getAllocationData(V, AnyAlloc, GetTLI).hasValue(); + return getAllocationData(V, AnyAlloc, GetTLI).has_value(); } /// Tests if a value is a call or invoke to a library function that /// allocates uninitialized memory (such as malloc). static bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { - return getAllocationData(V, MallocOrOpNewLike, TLI).hasValue(); + return getAllocationData(V, MallocOrOpNewLike, TLI).has_value(); } /// Tests if a value is a call or invoke to a library function that /// allocates uninitialized memory with alignment (such as aligned_alloc). static bool isAlignedAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { - return getAllocationData(V, AlignedAllocLike, TLI) - .hasValue(); + return getAllocationData(V, AlignedAllocLike, TLI).has_value(); } /// Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). static bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { - return getAllocationData(V, CallocLike, TLI).hasValue(); + return getAllocationData(V, CallocLike, TLI).has_value(); } /// Tests if a value is a call or invoke to a library function that /// allocates memory similar to malloc or calloc. bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { - return getAllocationData(V, MallocOrCallocLike, TLI).hasValue(); + return getAllocationData(V, MallocOrCallocLike, TLI).has_value(); } /// Tests if a value is a call or invoke to a library function that /// allocates memory (either malloc, calloc, or strdup like). bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { - return getAllocationData(V, AllocLike, TLI).hasValue(); + return getAllocationData(V, AllocLike, TLI).has_value(); } /// Tests if a value is a call or invoke to a library function that /// reallocates memory (e.g., realloc). bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { - return getAllocationData(V, ReallocLike, TLI).hasValue(); + return getAllocationData(V, ReallocLike, TLI).has_value(); } /// Tests if a functions is a call or invoke to a library function that /// reallocates memory (e.g., realloc). bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) { - return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue(); + return getAllocationDataForFunction(F, ReallocLike, TLI).has_value(); } bool llvm::isAllocRemovable(const CallBase *CB, const TargetLibraryInfo *TLI) { @@ -291,13 +333,11 @@ bool llvm::isAllocRemovable(const CallBase *CB, const TargetLibraryInfo *TLI) { Value *llvm::getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI) { - assert(isAllocationFn(V, TLI)); - const Optional FnData = getAllocationData(V, AnyAlloc, TLI); - if (!FnData.hasValue() || FnData->AlignParam < 0) { - return nullptr; + if (FnData && FnData->AlignParam >= 0) { + return V->getOperand(FnData->AlignParam); } - return V->getOperand(FnData->AlignParam); + return V->getArgOperandWithAttribute(Attribute::AllocAlign); } /// When we're compiling N-bit code, and the user uses parameters that are @@ -344,7 +384,7 @@ llvm::getAllocSize(const CallBase *CB, if (!Arg) return None; - APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits); + APInt MaxSize = Arg->getValue().zext(IntTyBits); if (Size.ugt(MaxSize)) Size = MaxSize + 1; } @@ -379,10 +419,12 @@ llvm::getAllocSize(const CallBase *CB, return Size; } -Constant *llvm::getInitialValueOfAllocation(const CallBase *Alloc, +Constant *llvm::getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty) { - assert(isAllocationFn(Alloc, TLI)); + auto *Alloc = dyn_cast(V); + if (!Alloc) + return nullptr; // malloc and aligned_alloc are uninitialized (undef) if (isMallocLikeFn(Alloc, TLI) || isAlignedAllocLikeFn(Alloc, TLI)) @@ -395,43 +437,81 @@ Constant *llvm::getInitialValueOfAllocation(const CallBase *Alloc, return nullptr; } +struct FreeFnsTy { + unsigned NumParams; + // Name of default allocator function to group malloc/free calls by family + MallocFamily Family; +}; + +// clang-format off +static const std::pair FreeFnData[] = { + {LibFunc_free, {1, MallocFamily::Malloc}}, + {LibFunc_vec_free, {1, MallocFamily::VecMalloc}}, + {LibFunc_ZdlPv, {1, MallocFamily::CPPNew}}, // operator delete(void*) + {LibFunc_ZdaPv, {1, MallocFamily::CPPNewArray}}, // operator delete[](void*) + {LibFunc_msvc_delete_ptr32, {1, MallocFamily::MSVCNew}}, // operator delete(void*) + {LibFunc_msvc_delete_ptr64, {1, MallocFamily::MSVCNew}}, // operator delete(void*) + {LibFunc_msvc_delete_array_ptr32, {1, MallocFamily::MSVCArrayNew}}, // operator delete[](void*) + {LibFunc_msvc_delete_array_ptr64, {1, MallocFamily::MSVCArrayNew}}, // operator delete[](void*) + {LibFunc_ZdlPvj, {2, MallocFamily::CPPNew}}, // delete(void*, uint) + {LibFunc_ZdlPvm, {2, MallocFamily::CPPNew}}, // delete(void*, ulong) + {LibFunc_ZdlPvRKSt9nothrow_t, {2, MallocFamily::CPPNew}}, // delete(void*, nothrow) + {LibFunc_ZdlPvSt11align_val_t, {2, MallocFamily::CPPNewAligned}}, // delete(void*, align_val_t) + {LibFunc_ZdaPvj, {2, MallocFamily::CPPNewArray}}, // delete[](void*, uint) + {LibFunc_ZdaPvm, {2, MallocFamily::CPPNewArray}}, // delete[](void*, ulong) + {LibFunc_ZdaPvRKSt9nothrow_t, {2, MallocFamily::CPPNewArray}}, // delete[](void*, nothrow) + {LibFunc_ZdaPvSt11align_val_t, {2, MallocFamily::CPPNewArrayAligned}}, // delete[](void*, align_val_t) + {LibFunc_msvc_delete_ptr32_int, {2, MallocFamily::MSVCNew}}, // delete(void*, uint) + {LibFunc_msvc_delete_ptr64_longlong, {2, MallocFamily::MSVCNew}}, // delete(void*, ulonglong) + {LibFunc_msvc_delete_ptr32_nothrow, {2, MallocFamily::MSVCNew}}, // delete(void*, nothrow) + {LibFunc_msvc_delete_ptr64_nothrow, {2, MallocFamily::MSVCNew}}, // delete(void*, nothrow) + {LibFunc_msvc_delete_array_ptr32_int, {2, MallocFamily::MSVCArrayNew}}, // delete[](void*, uint) + {LibFunc_msvc_delete_array_ptr64_longlong, {2, MallocFamily::MSVCArrayNew}}, // delete[](void*, ulonglong) + {LibFunc_msvc_delete_array_ptr32_nothrow, {2, MallocFamily::MSVCArrayNew}}, // delete[](void*, nothrow) + {LibFunc_msvc_delete_array_ptr64_nothrow, {2, MallocFamily::MSVCArrayNew}}, // delete[](void*, nothrow) + {LibFunc___kmpc_free_shared, {2, MallocFamily::KmpcAllocShared}}, // OpenMP Offloading RTL free + {LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t, {3, MallocFamily::CPPNewAligned}}, // delete(void*, align_val_t, nothrow) + {LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t, {3, MallocFamily::CPPNewArrayAligned}}, // delete[](void*, align_val_t, nothrow) + {LibFunc_ZdlPvjSt11align_val_t, {3, MallocFamily::CPPNewAligned}}, // delete(void*, unsigned int, align_val_t) + {LibFunc_ZdlPvmSt11align_val_t, {3, MallocFamily::CPPNewAligned}}, // delete(void*, unsigned long, align_val_t) + {LibFunc_ZdaPvjSt11align_val_t, {3, MallocFamily::CPPNewArrayAligned}}, // delete[](void*, unsigned int, align_val_t) + {LibFunc_ZdaPvmSt11align_val_t, {3, MallocFamily::CPPNewArrayAligned}}, // delete[](void*, unsigned long, align_val_t) +}; +// clang-format on + +Optional getFreeFunctionDataForFunction(const Function *Callee, + const LibFunc TLIFn) { + const auto *Iter = + find_if(FreeFnData, [TLIFn](const std::pair &P) { + return P.first == TLIFn; + }); + if (Iter == std::end(FreeFnData)) + return None; + return Iter->second; +} + +Optional llvm::getAllocationFamily(const Value *I, + const TargetLibraryInfo *TLI) { + bool IsNoBuiltin; + const Function *Callee = getCalledFunction(I, IsNoBuiltin); + if (Callee == nullptr || IsNoBuiltin) + return None; + LibFunc TLIFn; + if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn)) + return None; + const auto AllocData = getAllocationDataForFunction(Callee, AnyAlloc, TLI); + if (AllocData) + return mangledNameForMallocFamily(AllocData.getValue().Family); + const auto FreeData = getFreeFunctionDataForFunction(Callee, TLIFn); + if (FreeData) + return mangledNameForMallocFamily(FreeData.getValue().Family); + return None; +} + /// isLibFreeFunction - Returns true if the function is a builtin free() bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) { - unsigned ExpectedNumParams; - if (TLIFn == LibFunc_free || - TLIFn == LibFunc_ZdlPv || // operator delete(void*) - TLIFn == LibFunc_ZdaPv || // operator delete[](void*) - TLIFn == LibFunc_msvc_delete_ptr32 || // operator delete(void*) - TLIFn == LibFunc_msvc_delete_ptr64 || // operator delete(void*) - TLIFn == LibFunc_msvc_delete_array_ptr32 || // operator delete[](void*) - TLIFn == LibFunc_msvc_delete_array_ptr64) // operator delete[](void*) - ExpectedNumParams = 1; - else if (TLIFn == LibFunc_ZdlPvj || // delete(void*, uint) - TLIFn == LibFunc_ZdlPvm || // delete(void*, ulong) - TLIFn == LibFunc_ZdlPvRKSt9nothrow_t || // delete(void*, nothrow) - TLIFn == LibFunc_ZdlPvSt11align_val_t || // delete(void*, align_val_t) - TLIFn == LibFunc_ZdaPvj || // delete[](void*, uint) - TLIFn == LibFunc_ZdaPvm || // delete[](void*, ulong) - TLIFn == LibFunc_ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow) - TLIFn == LibFunc_ZdaPvSt11align_val_t || // delete[](void*, align_val_t) - TLIFn == LibFunc_msvc_delete_ptr32_int || // delete(void*, uint) - TLIFn == LibFunc_msvc_delete_ptr64_longlong || // delete(void*, ulonglong) - TLIFn == LibFunc_msvc_delete_ptr32_nothrow || // delete(void*, nothrow) - TLIFn == LibFunc_msvc_delete_ptr64_nothrow || // delete(void*, nothrow) - TLIFn == LibFunc_msvc_delete_array_ptr32_int || // delete[](void*, uint) - TLIFn == LibFunc_msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong) - TLIFn == LibFunc_msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow) - TLIFn == LibFunc_msvc_delete_array_ptr64_nothrow || // delete[](void*, nothrow) - TLIFn == LibFunc___kmpc_free_shared) // OpenMP Offloading RTL free - ExpectedNumParams = 2; - else if (TLIFn == LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t || // delete(void*, align_val_t, nothrow) - TLIFn == LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t || // delete[](void*, align_val_t, nothrow) - TLIFn == LibFunc_ZdlPvjSt11align_val_t || // delete(void*, unsigned long, align_val_t) - TLIFn == LibFunc_ZdlPvmSt11align_val_t || // delete(void*, unsigned long, align_val_t) - TLIFn == LibFunc_ZdaPvjSt11align_val_t || // delete[](void*, unsigned int, align_val_t) - TLIFn == LibFunc_ZdaPvmSt11align_val_t) // delete[](void*, unsigned long, align_val_t) - ExpectedNumParams = 3; - else + Optional FnData = getFreeFunctionDataForFunction(F, TLIFn); + if (!FnData) return false; // Check free prototype. @@ -440,7 +520,7 @@ bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) { FunctionType *FTy = F->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) return false; - if (FTy->getNumParams() != ExpectedNumParams) + if (FTy->getNumParams() != FnData->NumParams) return false; if (FTy->getParamType(0) != Type::getInt8PtrTy(F->getContext())) return false; @@ -491,11 +571,21 @@ Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed) { + return lowerObjectSizeCall(ObjectSize, DL, TLI, /*AAResults=*/nullptr, + MustSucceed); +} + +Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize, + const DataLayout &DL, + const TargetLibraryInfo *TLI, AAResults *AA, + bool MustSucceed) { assert(ObjectSize->getIntrinsicID() == Intrinsic::objectsize && "ObjectSize must be a call to llvm.objectsize!"); bool MaxVal = cast(ObjectSize->getArgOperand(1))->isZero(); ObjectSizeOpts EvalOptions; + EvalOptions.AA = AA; + // Unless we have to fold this to something, try to be as accurate as // possible. if (MustSucceed) @@ -559,7 +649,7 @@ STATISTIC(ObjectVisitorLoad, APInt ObjectSizeOffsetVisitor::align(APInt Size, MaybeAlign Alignment) { if (Options.RoundToAlign && Alignment) - return APInt(IntTyBits, alignTo(Size.getZExtValue(), Alignment)); + return APInt(IntTyBits, alignTo(Size.getZExtValue(), *Alignment)); return Size; } @@ -573,18 +663,48 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL, } SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { + unsigned InitialIntTyBits = DL.getIndexTypeSizeInBits(V->getType()); + + // Stripping pointer casts can strip address space casts which can change the + // index type size. The invariant is that we use the value type to determine + // the index type size and if we stripped address space casts we have to + // readjust the APInt as we pass it upwards in order for the APInt to match + // the type the caller passed in. + APInt Offset(InitialIntTyBits, 0); + V = V->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true, /* AllowInvariantGroup */ true); + + // Later we use the index type size and zero but it will match the type of the + // value that is passed to computeImpl. IntTyBits = DL.getIndexTypeSizeInBits(V->getType()); Zero = APInt::getZero(IntTyBits); - V = V->stripPointerCasts(); + bool IndexTypeSizeChanged = InitialIntTyBits != IntTyBits; + if (!IndexTypeSizeChanged && Offset.isZero()) + return computeImpl(V); + + // We stripped an address space cast that changed the index type size or we + // accumulated some constant offset (or both). Readjust the bit width to match + // the argument index type size and apply the offset, as required. + SizeOffsetType SOT = computeImpl(V); + if (IndexTypeSizeChanged) { + if (knownSize(SOT) && !::CheckedZextOrTrunc(SOT.first, InitialIntTyBits)) + SOT.first = APInt(); + if (knownOffset(SOT) && !::CheckedZextOrTrunc(SOT.second, InitialIntTyBits)) + SOT.second = APInt(); + } + // If the computed offset is "unknown" we cannot add the stripped offset. + return {SOT.first, + SOT.second.getBitWidth() > 1 ? SOT.second + Offset : SOT.second}; +} + +SizeOffsetType ObjectSizeOffsetVisitor::computeImpl(Value *V) { if (Instruction *I = dyn_cast(V)) { // If we have already seen this instruction, bail out. Cycles can happen in // unreachable code after constant propagation. if (!SeenInsts.insert(I).second) return unknown(); - if (GEPOperator *GEP = dyn_cast(V)) - return visitGEPOperator(*GEP); return visit(*I); } if (Argument *A = dyn_cast(V)) @@ -597,12 +717,6 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { return visitGlobalVariable(*GV); if (UndefValue *UV = dyn_cast(V)) return visitUndefValue(*UV); - if (ConstantExpr *CE = dyn_cast(V)) { - if (CE->getOpcode() == Instruction::IntToPtr) - return unknown(); // clueless - if (CE->getOpcode() == Instruction::GetElementPtr) - return visitGEPOperator(cast(*CE)); - } LLVM_DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V << '\n'); @@ -617,10 +731,10 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { if (!I.getAllocatedType()->isSized()) return unknown(); - if (isa(I.getAllocatedType())) + TypeSize ElemSize = DL.getTypeAllocSize(I.getAllocatedType()); + if (ElemSize.isScalable() && Options.EvalMode != ObjectSizeOpts::Mode::Min) return unknown(); - - APInt Size(IntTyBits, DL.getTypeAllocSize(I.getAllocatedType())); + APInt Size(IntTyBits, ElemSize.getKnownMinSize()); if (!I.isArrayAllocation()) return std::make_pair(align(Size, I.getAlign()), Zero); @@ -682,15 +796,6 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { return unknown(); } -SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) { - SizeOffsetType PtrData = compute(GEP.getPointerOperand()); - APInt Offset(DL.getIndexTypeSizeInBits(GEP.getPointerOperand()->getType()), 0); - if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(DL, Offset)) - return unknown(); - - return std::make_pair(PtrData.first, PtrData.second + Offset); -} - SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) { if (GA.isInterposable()) return unknown(); @@ -710,42 +815,161 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitIntToPtrInst(IntToPtrInst&) { return unknown(); } -SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) { - ++ObjectVisitorLoad; - return unknown(); -} +SizeOffsetType ObjectSizeOffsetVisitor::findLoadSizeOffset( + LoadInst &Load, BasicBlock &BB, BasicBlock::iterator From, + SmallDenseMap &VisitedBlocks, + unsigned &ScannedInstCount) { + constexpr unsigned MaxInstsToScan = 128; + + auto Where = VisitedBlocks.find(&BB); + if (Where != VisitedBlocks.end()) + return Where->second; + + auto Unknown = [this, &BB, &VisitedBlocks]() { + return VisitedBlocks[&BB] = unknown(); + }; + auto Known = [&BB, &VisitedBlocks](SizeOffsetType SO) { + return VisitedBlocks[&BB] = SO; + }; + + do { + Instruction &I = *From; + + if (I.isDebugOrPseudoInst()) + continue; + + if (++ScannedInstCount > MaxInstsToScan) + return Unknown(); + + if (!I.mayWriteToMemory()) + continue; + + if (auto *SI = dyn_cast(&I)) { + AliasResult AR = + Options.AA->alias(SI->getPointerOperand(), Load.getPointerOperand()); + switch ((AliasResult::Kind)AR) { + case AliasResult::NoAlias: + continue; + case AliasResult::MustAlias: + if (SI->getValueOperand()->getType()->isPointerTy()) + return Known(compute(SI->getValueOperand())); + else + return Unknown(); // No handling of non-pointer values by `compute`. + default: + return Unknown(); + } + } -SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) { - // too complex to analyze statically. - return unknown(); + if (auto *CB = dyn_cast(&I)) { + Function *Callee = CB->getCalledFunction(); + // Bail out on indirect call. + if (!Callee) + return Unknown(); + + LibFunc TLIFn; + if (!TLI || !TLI->getLibFunc(*CB->getCalledFunction(), TLIFn) || + !TLI->has(TLIFn)) + return Unknown(); + + // TODO: There's probably more interesting case to support here. + if (TLIFn != LibFunc_posix_memalign) + return Unknown(); + + AliasResult AR = + Options.AA->alias(CB->getOperand(0), Load.getPointerOperand()); + switch ((AliasResult::Kind)AR) { + case AliasResult::NoAlias: + continue; + case AliasResult::MustAlias: + break; + default: + return Unknown(); + } + + // Is the error status of posix_memalign correctly checked? If not it + // would be incorrect to assume it succeeds and load doesn't see the + // previous value. + Optional Checked = isImpliedByDomCondition( + ICmpInst::ICMP_EQ, CB, ConstantInt::get(CB->getType(), 0), &Load, DL); + if (!Checked || !*Checked) + return Unknown(); + + Value *Size = CB->getOperand(2); + auto *C = dyn_cast(Size); + if (!C) + return Unknown(); + + return Known({C->getValue(), APInt(C->getValue().getBitWidth(), 0)}); + } + + return Unknown(); + } while (From-- != BB.begin()); + + SmallVector PredecessorSizeOffsets; + for (auto *PredBB : predecessors(&BB)) { + PredecessorSizeOffsets.push_back(findLoadSizeOffset( + Load, *PredBB, BasicBlock::iterator(PredBB->getTerminator()), + VisitedBlocks, ScannedInstCount)); + if (!bothKnown(PredecessorSizeOffsets.back())) + return Unknown(); + } + + if (PredecessorSizeOffsets.empty()) + return Unknown(); + + return Known(std::accumulate(PredecessorSizeOffsets.begin() + 1, + PredecessorSizeOffsets.end(), + PredecessorSizeOffsets.front(), + [this](SizeOffsetType LHS, SizeOffsetType RHS) { + return combineSizeOffset(LHS, RHS); + })); } -SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { - SizeOffsetType TrueSide = compute(I.getTrueValue()); - SizeOffsetType FalseSide = compute(I.getFalseValue()); - if (bothKnown(TrueSide) && bothKnown(FalseSide)) { - if (TrueSide == FalseSide) { - return TrueSide; - } +SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst &LI) { + if (!Options.AA) { + ++ObjectVisitorLoad; + return unknown(); + } - APInt TrueResult = getSizeWithOverflow(TrueSide); - APInt FalseResult = getSizeWithOverflow(FalseSide); + SmallDenseMap VisitedBlocks; + unsigned ScannedInstCount = 0; + SizeOffsetType SO = + findLoadSizeOffset(LI, *LI.getParent(), BasicBlock::iterator(LI), + VisitedBlocks, ScannedInstCount); + if (!bothKnown(SO)) + ++ObjectVisitorLoad; + return SO; +} - if (TrueResult == FalseResult) { - return TrueSide; - } - if (Options.EvalMode == ObjectSizeOpts::Mode::Min) { - if (TrueResult.slt(FalseResult)) - return TrueSide; - return FalseSide; - } - if (Options.EvalMode == ObjectSizeOpts::Mode::Max) { - if (TrueResult.sgt(FalseResult)) - return TrueSide; - return FalseSide; - } +SizeOffsetType ObjectSizeOffsetVisitor::combineSizeOffset(SizeOffsetType LHS, + SizeOffsetType RHS) { + if (!bothKnown(LHS) || !bothKnown(RHS)) + return unknown(); + + switch (Options.EvalMode) { + case ObjectSizeOpts::Mode::Min: + return (getSizeWithOverflow(LHS).slt(getSizeWithOverflow(RHS))) ? LHS : RHS; + case ObjectSizeOpts::Mode::Max: + return (getSizeWithOverflow(LHS).sgt(getSizeWithOverflow(RHS))) ? LHS : RHS; + case ObjectSizeOpts::Mode::Exact: + return (getSizeWithOverflow(LHS).eq(getSizeWithOverflow(RHS))) ? LHS + : unknown(); } - return unknown(); + llvm_unreachable("missing an eval mode"); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PN) { + auto IncomingValues = PN.incoming_values(); + return std::accumulate(IncomingValues.begin() + 1, IncomingValues.end(), + compute(*IncomingValues.begin()), + [this](SizeOffsetType LHS, Value *VRHS) { + return combineSizeOffset(LHS, compute(VRHS)); + }); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { + return combineSizeOffset(compute(I.getTrueValue()), + compute(I.getFalseValue())); } SizeOffsetType ObjectSizeOffsetVisitor::visitUndefValue(UndefValue&) { @@ -790,7 +1014,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { // Erase any instructions we inserted as part of the traversal. for (Instruction *I : InsertedInstructions) { - I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); I->eraseFromParent(); } } @@ -919,7 +1143,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitIntToPtrInst(IntToPtrInst&) { return unknown(); } -SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst&) { +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst &LI) { return unknown(); } @@ -937,10 +1161,10 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) { SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i)); if (!bothKnown(EdgeData)) { - OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy)); + OffsetPHI->replaceAllUsesWith(PoisonValue::get(IntTy)); OffsetPHI->eraseFromParent(); InsertedInstructions.erase(OffsetPHI); - SizePHI->replaceAllUsesWith(UndefValue::get(IntTy)); + SizePHI->replaceAllUsesWith(PoisonValue::get(IntTy)); SizePHI->eraseFromParent(); InsertedInstructions.erase(SizePHI); return unknown(); diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 36df462c7a66..690d575ef979 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -27,11 +27,7 @@ #include "llvm/Analysis/PhiValues.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -44,7 +40,6 @@ #include "llvm/IR/PredIteratorCache.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" -#include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -53,10 +48,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" #include #include -#include #include #include @@ -414,20 +407,17 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( isInvariantLoad = true; } - // Return "true" if and only if the instruction I is either a non-simple - // load or a non-simple store. - auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool { + // True for volatile instruction. + // For Load/Store return true if atomic ordering is stronger than AO, + // for other instruction just true if it can read or write to memory. + auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool { + if (I->isVolatile()) + return true; if (auto *LI = dyn_cast(I)) - return !LI->isSimple(); + return isStrongerThan(LI->getOrdering(), AO); if (auto *SI = dyn_cast(I)) - return !SI->isSimple(); - return false; - }; - - // Return "true" if I is not a load and not a store, but it does access - // memory. - auto isOtherMemAccess = [](Instruction *I) -> bool { - return !isa(I) && !isa(I) && I->mayReadOrWriteMemory(); + return isStrongerThan(SI->getOrdering(), AO); + return I->mayReadOrWriteMemory(); }; // Walk backwards through the basic block, looking for dependencies. @@ -500,8 +490,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // atomic. // FIXME: This is overly conservative. if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) { - if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) || - isOtherMemAccess(QueryInst)) + if (!QueryInst || + isComplexForReordering(QueryInst, AtomicOrdering::NotAtomic)) return MemDepResult::getClobber(LI); if (LI->getOrdering() != AtomicOrdering::Monotonic) return MemDepResult::getClobber(LI); @@ -512,10 +502,10 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // If we found a pointer, check if it could be the same as our pointer. AliasResult R = BatchAA.alias(LoadLoc, MemLoc); - if (isLoad) { - if (R == AliasResult::NoAlias) - continue; + if (R == AliasResult::NoAlias) + continue; + if (isLoad) { // Must aliased loads are defs of each other. if (R == AliasResult::MustAlias) return MemDepResult::getDef(Inst); @@ -532,10 +522,6 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( continue; } - // Stores don't depend on other no-aliased accesses. - if (R == AliasResult::NoAlias) - continue; - // Stores don't alias loads from read-only memory. if (BatchAA.pointsToConstantMemory(LoadLoc)) continue; @@ -549,20 +535,25 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // A Monotonic store is OK if the query inst is itself not atomic. // FIXME: This is overly conservative. if (!SI->isUnordered() && SI->isAtomic()) { - if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) || - isOtherMemAccess(QueryInst)) - return MemDepResult::getClobber(SI); - if (SI->getOrdering() != AtomicOrdering::Monotonic) + if (!QueryInst || + isComplexForReordering(QueryInst, AtomicOrdering::Unordered)) return MemDepResult::getClobber(SI); + // Ok, if we are here the guard above guarantee us that + // QueryInst is a non-atomic or unordered load/store. + // SI is atomic with monotonic or release semantic (seq_cst for store + // is actually a release semantic plus total order over other seq_cst + // instructions, as soon as QueryInst is not seq_cst we can consider it + // as simple release semantic). + // Monotonic and Release semantic allows re-ordering before store + // so we are safe to go further and check the aliasing. It will prohibit + // re-ordering in case locations are may or must alias. } - // FIXME: this is overly conservative. // While volatile access cannot be eliminated, they do not have to clobber // non-aliasing locations, as normal accesses can for example be reordered // with volatile accesses. if (SI->isVolatile()) - if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) || - isOtherMemAccess(QueryInst)) + if (!QueryInst || QueryInst->isVolatile()) return MemDepResult::getClobber(SI); // If alias analysis can tell that this store is guaranteed to not modify @@ -743,8 +734,6 @@ MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) { llvm::sort(Cache); ++NumCacheDirtyNonLocal; - // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " - // << Cache.size() << " cached: " << *QueryInst; } else { // Seed DirtyBlocks with each of the preds of QueryInst's block. BasicBlock *QueryBB = QueryCall->getParent(); @@ -1204,7 +1193,6 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( // If we do process a large number of blocks it becomes very expensive and // likely it isn't worth worrying about if (Result.size() > NumResultsLimit) { - Worklist.clear(); // Sort it now (if needed) so that recursive invocations of // getNonLocalPointerDepFromBB and other routines that could reuse the // cache value will only see properly sorted cache arrays. diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp index a877b19df866..2ed32227bd9e 100644 --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -8,12 +8,10 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsARM.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" using namespace llvm; diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index 57f431ec21f5..76371b88812e 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -36,8 +36,8 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Use.h" #include "llvm/InitializePasses.h" @@ -49,10 +49,10 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include #include -#include #include #include #include @@ -130,6 +130,12 @@ public: MemorySSAWalkerAnnotatedWriter(MemorySSA *M) : MSSA(M), Walker(M->getWalker()) {} + void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) override { + if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) + OS << "; " << *MA << "\n"; + } + void emitInstructionAnnot(const Instruction *I, formatted_raw_ostream &OS) override { if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { @@ -732,7 +738,7 @@ template class ClobberWalker { struct generic_def_path_iterator : public iterator_facade_base, std::forward_iterator_tag, T *> { - generic_def_path_iterator() {} + generic_def_path_iterator() = default; generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} T &operator*() const { return curNode(); } @@ -743,9 +749,9 @@ template class ClobberWalker { } bool operator==(const generic_def_path_iterator &O) const { - if (N.hasValue() != O.N.hasValue()) + if (N.has_value() != O.N.has_value()) return false; - return !N.hasValue() || *N == *O.N; + return !N || *N == *O.N; } private: @@ -1397,6 +1403,9 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock( continue; } + if (MU->isOptimized()) + continue; + if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) { MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true, None); continue; @@ -1585,10 +1594,6 @@ void MemorySSA::buildMemorySSA(BatchAAResults &BAA) { SmallPtrSet Visited; renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); - ClobberWalkerBase WalkerBase(this, &BAA, DT); - CachingWalker WalkerLocal(this, &WalkerBase); - OptimizeUses(this, &WalkerLocal, &BAA, DT).optimizeUses(); - // Mark the uses in unreachable blocks as live on entry, so that they go // somewhere. for (auto &BB : F) @@ -2178,6 +2183,17 @@ bool MemorySSA::dominates(const MemoryAccess *Dominator, return dominates(Dominator, cast(Dominatee.getUser())); } +void MemorySSA::ensureOptimizedUses() { + if (IsOptimized) + return; + + BatchAAResults BatchAA(*AA); + ClobberWalkerBase WalkerBase(this, &BatchAA, DT); + CachingWalker WalkerLocal(this, &WalkerBase); + OptimizeUses(this, &WalkerLocal, &BatchAA, DT).optimizeUses(); + IsOptimized = true; +} + void MemoryAccess::print(raw_ostream &OS) const { switch (getValueID()) { case MemoryPhiVal: return static_cast(this)->print(OS); @@ -2350,6 +2366,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { auto &MSSA = getAnalysis().getMSSA(); + MSSA.ensureOptimizedUses(); if (DotCFGMSSA != "") { DOTFuncMSSAInfo CFGInfo(F, MSSA); WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA); @@ -2382,6 +2399,7 @@ bool MemorySSAAnalysis::Result::invalidate( PreservedAnalyses MemorySSAPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { auto &MSSA = AM.getResult(F).getMSSA(); + MSSA.ensureOptimizedUses(); if (DotCFGMSSA != "") { DOTFuncMSSAInfo CFGInfo(F, MSSA); WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA); diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index 9c841883de6d..eb75118210b9 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -10,22 +10,15 @@ // //===----------------------------------------------------------------===// #include "llvm/Analysis/MemorySSAUpdater.h" -#include "llvm/Analysis/LoopIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/FormattedStream.h" #include #define DEBUG_TYPE "memoryssa" @@ -243,6 +236,7 @@ MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi, } void MemorySSAUpdater::insertUse(MemoryUse *MU, bool RenameUses) { + VisitedBlocks.clear(); InsertedPHIs.clear(); MU->setDefiningAccess(getPreviousDef(MU)); @@ -311,6 +305,13 @@ static void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB, // point to the correct new defs, to ensure we only have one variable, and no // disconnected stores. void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { + // Don't bother updating dead code. + if (!MSSA->DT->isReachableFromEntry(MD->getBlock())) { + MD->setDefiningAccess(MSSA->getLiveOnEntryDef()); + return; + } + + VisitedBlocks.clear(); InsertedPHIs.clear(); // See if we had a local def, and if not, go hunting. @@ -427,10 +428,10 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { if (NewPhiSize) tryRemoveTrivialPhis(ArrayRef(&InsertedPHIs[NewPhiIndex], NewPhiSize)); - // Now that all fixups are done, rename all uses if we are asked. Skip - // renaming for defs in unreachable blocks. + // Now that all fixups are done, rename all uses if we are asked. The defs are + // guaranteed to be in reachable code due to the check at the method entry. BasicBlock *StartBlock = MD->getBlock(); - if (RenameUses && MSSA->getDomTree().getNode(StartBlock)) { + if (RenameUses) { SmallPtrSet Visited; // We are guaranteed there is a def in the block, because we just got it // handed to us in this function. diff --git a/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp b/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp index fab51d6a7aaf..dc149f326271 100644 --- a/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp +++ b/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp @@ -22,7 +22,7 @@ ModelUnderTrainingRunner::ModelUnderTrainingRunner( LLVMContext &Ctx, const std::string &ModelPath, const std::vector &InputSpecs, const std::vector &OutputSpecs) - : MLModelRunner(Ctx, MLModelRunner::Kind::Development), + : MLModelRunner(Ctx, MLModelRunner::Kind::Development, InputSpecs.size()), OutputSpecs(OutputSpecs) { Evaluator = std::make_unique( ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; }, @@ -32,6 +32,10 @@ ModelUnderTrainingRunner::ModelUnderTrainingRunner( Evaluator.reset(); return; } + + for (size_t I = 0, E = InputSpecs.size(); I < E; ++I) { + setUpBufferForTensor(I, InputSpecs[I], Evaluator->getUntypedInput(I)); + } } void *ModelUnderTrainingRunner::evaluateUntyped() { @@ -43,24 +47,31 @@ void *ModelUnderTrainingRunner::evaluateUntyped() { return LastEvaluationResult->getUntypedTensorValue(0); } -void *ModelUnderTrainingRunner::getTensorUntyped(size_t Index) { - return Evaluator->getUntypedInput(Index); -} - std::unique_ptr ModelUnderTrainingRunner::createAndEnsureValid( LLVMContext &Ctx, const std::string &ModelPath, StringRef DecisionName, const std::vector &InputSpecs, StringRef OutputSpecsPathOverride) { - std::unique_ptr MUTR; if (auto MaybeOutputSpecs = loadOutputSpecs(Ctx, DecisionName, ModelPath, OutputSpecsPathOverride)) - MUTR.reset(new ModelUnderTrainingRunner(Ctx, ModelPath, InputSpecs, - *MaybeOutputSpecs)); + return createAndEnsureValid(Ctx, ModelPath, DecisionName, InputSpecs, + *MaybeOutputSpecs); + Ctx.emitError("Could not load the policy model from the provided path"); + return nullptr; +} + +std::unique_ptr +ModelUnderTrainingRunner::createAndEnsureValid( + LLVMContext &Ctx, const std::string &ModelPath, StringRef DecisionName, + const std::vector &InputSpecs, + const std::vector &OutputSpecs) { + std::unique_ptr MUTR; + MUTR.reset( + new ModelUnderTrainingRunner(Ctx, ModelPath, InputSpecs, OutputSpecs)); if (MUTR && MUTR->isValid()) return MUTR; - Ctx.emitError("Could not load the policy model from the provided path"); + Ctx.emitError("Could not load or create model evaluator."); return nullptr; } diff --git a/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp index 64fd5eb1acd4..373aaa48b1d1 100644 --- a/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -15,8 +15,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ModuleDebugInfoPrinter.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Passes.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 2880ca62a7f8..2b98634ef7bf 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -38,7 +38,6 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" @@ -368,7 +367,7 @@ static void computeFunctionSummary( // We should have named any anonymous globals assert(CalledFunction->hasName()); auto ScaledCount = PSI->getProfileCount(*CB, BFI); - auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI) + auto Hotness = ScaledCount ? getHotness(*ScaledCount, PSI) : CalleeInfo::HotnessType::Unknown; if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None) Hotness = CalleeInfo::HotnessType::Cold; @@ -490,8 +489,7 @@ static void computeFunctionSummary( HasIndirBranchToBlockAddress; GlobalValueSummary::GVFlags Flags( F.getLinkage(), F.getVisibility(), NotEligibleForImport, - /* Live = */ false, F.isDSOLocal(), - F.hasLinkOnceODRLinkage() && F.hasGlobalUnnamedAddr()); + /* Live = */ false, F.isDSOLocal(), F.canBeOmittedFromSymbolTable()); FunctionSummary::FFlags FunFlags{ F.hasFnAttribute(Attribute::ReadNone), F.hasFnAttribute(Attribute::ReadOnly), @@ -612,8 +610,7 @@ static void computeVariableSummary(ModuleSummaryIndex &Index, bool NonRenamableLocal = isNonRenamableLocal(V); GlobalValueSummary::GVFlags Flags( V.getLinkage(), V.getVisibility(), NonRenamableLocal, - /* Live = */ false, V.isDSOLocal(), - V.hasLinkOnceODRLinkage() && V.hasGlobalUnnamedAddr()); + /* Live = */ false, V.isDSOLocal(), V.canBeOmittedFromSymbolTable()); VTableFuncList VTableFuncs; // If splitting is not enabled, then we compute the summary information @@ -655,8 +652,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, bool NonRenamableLocal = isNonRenamableLocal(A); GlobalValueSummary::GVFlags Flags( A.getLinkage(), A.getVisibility(), NonRenamableLocal, - /* Live = */ false, A.isDSOLocal(), - A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr()); + /* Live = */ false, A.isDSOLocal(), A.canBeOmittedFromSymbolTable()); auto AS = std::make_unique(Flags); auto *Aliasee = A.getAliaseeObject(); auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID()); @@ -733,8 +729,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( GlobalValue::InternalLinkage, GlobalValue::DefaultVisibility, /* NotEligibleToImport = */ true, /* Live = */ true, - /* Local */ GV->isDSOLocal(), - GV->hasLinkOnceODRLinkage() && GV->hasGlobalUnnamedAddr()); + /* Local */ GV->isDSOLocal(), GV->canBeOmittedFromSymbolTable()); CantBePromoted.insert(GV->getGUID()); // Create the appropriate summary type. if (Function *F = dyn_cast(GV)) { diff --git a/llvm/lib/Analysis/MustExecute.cpp b/llvm/lib/Analysis/MustExecute.cpp index 5ca72f5f3623..5cff986245b9 100644 --- a/llvm/lib/Analysis/MustExecute.cpp +++ b/llvm/lib/Analysis/MustExecute.cpp @@ -16,14 +16,11 @@ #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/AssemblyAnnotationWriter.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" @@ -143,7 +140,7 @@ static bool CanProveNotTakenFirstIteration(const BasicBlock *ExitBlock, return false; auto DL = ExitBlock->getModule()->getDataLayout(); auto *IVStart = LHS->getIncomingValueForBlock(CurLoop->getLoopPreheader()); - auto *SimpleValOrNull = SimplifyCmpInst(Cond->getPredicate(), + auto *SimpleValOrNull = simplifyCmpInst(Cond->getPredicate(), IVStart, RHS, {DL, /*TLI*/ nullptr, DT, /*AC*/ nullptr, BI}); @@ -494,7 +491,7 @@ template static V getOrCreateCachedOptional(K Key, DenseMap> &Map, FnTy &&Fn, ArgsTy&&... args) { Optional &OptVal = Map[Key]; - if (!OptVal.hasValue()) + if (!OptVal) OptVal = Fn(std::forward(args)...); return OptVal.getValue(); } diff --git a/llvm/lib/Analysis/NoInferenceModelRunner.cpp b/llvm/lib/Analysis/NoInferenceModelRunner.cpp index 7178120ebe4f..1914b22f5d71 100644 --- a/llvm/lib/Analysis/NoInferenceModelRunner.cpp +++ b/llvm/lib/Analysis/NoInferenceModelRunner.cpp @@ -10,24 +10,14 @@ // logs for the default policy, in 'development' mode, but never ask it to // 'run'. //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" -#if defined(LLVM_HAVE_TF_API) - #include "llvm/Analysis/NoInferenceModelRunner.h" -#include "llvm/Analysis/Utils/TFUtils.h" using namespace llvm; NoInferenceModelRunner::NoInferenceModelRunner( LLVMContext &Ctx, const std::vector &Inputs) - : MLModelRunner(Ctx, MLModelRunner::Kind::NoOp) { - ValuesBuffer.reserve(Inputs.size()); + : MLModelRunner(Ctx, MLModelRunner::Kind::NoOp, Inputs.size()) { + size_t Index = 0; for (const auto &TS : Inputs) - ValuesBuffer.push_back(std::make_unique(TS.getElementCount() * - TS.getElementByteSize())); -} - -void *NoInferenceModelRunner::getTensorUntyped(size_t Index) { - return ValuesBuffer[Index].get(); + setUpBufferForTensor(Index++, TS, nullptr); } -#endif // defined(LLVM_HAVE_TF_API) diff --git a/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp index 0826b3078672..6fe056d36668 100644 --- a/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp +++ b/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp @@ -26,8 +26,6 @@ #include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp index 6f3d4d536c40..17b40f03a5a5 100644 --- a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp +++ b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -47,7 +47,7 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F) bool OptimizationRemarkEmitter::invalidate( Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv) { - if (OwnedBFI.get()) { + if (OwnedBFI) { OwnedBFI.reset(); BFI = nullptr; } @@ -80,7 +80,7 @@ void OptimizationRemarkEmitter::emit( computeHotness(OptDiag); // Only emit it if its hotness meets the threshold. - if (OptDiag.getHotness().getValueOr(0) < + if (OptDiag.getHotness().value_or(0) < F->getContext().getDiagnosticsHotnessThreshold()) { return; } diff --git a/llvm/lib/Analysis/OverflowInstAnalysis.cpp b/llvm/lib/Analysis/OverflowInstAnalysis.cpp index 87a85e6a7364..8bfd6642f760 100644 --- a/llvm/lib/Analysis/OverflowInstAnalysis.cpp +++ b/llvm/lib/Analysis/OverflowInstAnalysis.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/OverflowInstAnalysis.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PatternMatch.h" diff --git a/llvm/lib/Analysis/PHITransAddr.cpp b/llvm/lib/Analysis/PHITransAddr.cpp index 02d084937ccb..7571bd0059cc 100644 --- a/llvm/lib/Analysis/PHITransAddr.cpp +++ b/llvm/lib/Analysis/PHITransAddr.cpp @@ -17,7 +17,6 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -35,9 +34,6 @@ static bool CanPHITrans(Instruction *Inst) { isa(Inst->getOperand(1))) return true; - // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; - // if (isa(PtrInst) || isa(PtrInst)) - // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); return false; } @@ -226,7 +222,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return GEP; // Simplify the GEP to handle 'gep x, 0' -> x etc. - if (Value *V = SimplifyGEPInst(GEP->getSourceElementType(), GEPOps[0], + if (Value *V = simplifyGEPInst(GEP->getSourceElementType(), GEPOps[0], ArrayRef(GEPOps).slice(1), GEP->isInBounds(), {DL, TLI, DT, AC})) { for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) @@ -240,6 +236,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, for (User *U : APHIOp->users()) { if (GetElementPtrInst *GEPI = dyn_cast(U)) if (GEPI->getType() == GEP->getType() && + GEPI->getSourceElementType() == GEP->getSourceElementType() && GEPI->getNumOperands() == GEPOps.size() && GEPI->getParent()->getParent() == CurBB->getParent() && (!DT || DT->dominates(GEPI->getParent(), PredBB))) { @@ -277,7 +274,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, } // See if the add simplifies away. - if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, {DL, TLI, DT, AC})) { + if (Value *Res = simplifyAddInst(LHS, RHS, isNSW, isNUW, {DL, TLI, DT, AC})) { // If we simplified the operands, the LHS is no longer an input, but Res // is. RemoveInstInputs(LHS, InstInputs); diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp index 268ed9d04741..9d5fa6d0a41b 100644 --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -15,7 +15,6 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/InitializePasses.h" @@ -125,7 +124,7 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph( for (const auto &I : BB) if (isa(I) || isa(I)) if (auto CallCount = getProfileCount(cast(I), nullptr)) - TotalCallCount += CallCount.getValue(); + TotalCallCount += *CallCount; if (isHotCount(TotalCallCount)) return true; } @@ -154,7 +153,7 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph( for (const auto &I : BB) if (isa(I) || isa(I)) if (auto CallCount = getProfileCount(cast(I), nullptr)) - TotalCallCount += CallCount.getValue(); + TotalCallCount += *CallCount; if (!isColdCount(TotalCallCount)) return false; } @@ -166,7 +165,7 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph( bool ProfileSummaryInfo::isFunctionHotnessUnknown(const Function &F) const { assert(hasPartialSampleProfile() && "Expect partial sample profile"); - return !F.getEntryCount().hasValue(); + return !F.getEntryCount(); } template @@ -188,7 +187,7 @@ bool ProfileSummaryInfo::isFunctionHotOrColdInCallGraphNthPercentile( for (const auto &I : BB) if (isa(I) || isa(I)) if (auto CallCount = getProfileCount(cast(I), nullptr)) - TotalCallCount += CallCount.getValue(); + TotalCallCount += *CallCount; if (isHot && isHotCountNthPercentile(PercentileCutoff, TotalCallCount)) return true; if (!isHot && !isColdCountNthPercentile(PercentileCutoff, TotalCallCount)) @@ -316,11 +315,11 @@ bool ProfileSummaryInfo::isColdCountNthPercentile(int PercentileCutoff, } uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() const { - return HotCountThreshold.getValueOr(UINT64_MAX); + return HotCountThreshold.value_or(UINT64_MAX); } uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() const { - return ColdCountThreshold.getValueOr(0); + return ColdCountThreshold.value_or(0); } bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB, diff --git a/llvm/lib/Analysis/PtrUseVisitor.cpp b/llvm/lib/Analysis/PtrUseVisitor.cpp index 9a834ba4866a..49304818d7ef 100644 --- a/llvm/lib/Analysis/PtrUseVisitor.cpp +++ b/llvm/lib/Analysis/PtrUseVisitor.cpp @@ -14,7 +14,6 @@ #include "llvm/Analysis/PtrUseVisitor.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include using namespace llvm; diff --git a/llvm/lib/Analysis/RegionInfo.cpp b/llvm/lib/Analysis/RegionInfo.cpp index 3ba0bb9eaf2c..9be23a374eca 100644 --- a/llvm/lib/Analysis/RegionInfo.cpp +++ b/llvm/lib/Analysis/RegionInfo.cpp @@ -10,6 +10,7 @@ #include "llvm/Analysis/RegionInfo.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DominanceFrontier.h" #include "llvm/InitializePasses.h" #ifndef NDEBUG #include "llvm/Analysis/RegionPrinter.h" diff --git a/llvm/lib/Analysis/RegionPass.cpp b/llvm/lib/Analysis/RegionPass.cpp index 10c8569096c6..ddef3be8df37 100644 --- a/llvm/lib/Analysis/RegionPass.cpp +++ b/llvm/lib/Analysis/RegionPass.cpp @@ -12,14 +12,16 @@ // Most of this code has been COPIED from LoopPass.cpp // //===----------------------------------------------------------------------===// + #include "llvm/Analysis/RegionPass.h" +#include "llvm/Analysis/RegionInfo.h" #include "llvm/IR/OptBisect.h" #include "llvm/IR/PassTimingInfo.h" #include "llvm/IR/PrintPasses.h" -#include "llvm/IR/StructuralHash.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; #define DEBUG_TYPE "regionpassmgr" @@ -93,12 +95,12 @@ bool RGPassManager::runOnFunction(Function &F) { TimeRegion PassTimer(getPassTimer(P)); #ifdef EXPENSIVE_CHECKS - uint64_t RefHash = StructuralHash(F); + uint64_t RefHash = P->structuralHash(F); #endif LocalChanged = P->runOnRegion(CurrentRegion, *this); #ifdef EXPENSIVE_CHECKS - if (!LocalChanged && (RefHash != StructuralHash(F))) { + if (!LocalChanged && (RefHash != P->structuralHash(F))) { llvm::errs() << "Pass modifies its input and doesn't report it: " << P->getPassName() << "\n"; llvm_unreachable("Pass modifies its input and doesn't report it"); diff --git a/llvm/lib/Analysis/RegionPrinter.cpp b/llvm/lib/Analysis/RegionPrinter.cpp index 1fb5faaa6a71..fbd3d17febff 100644 --- a/llvm/lib/Analysis/RegionPrinter.cpp +++ b/llvm/lib/Analysis/RegionPrinter.cpp @@ -10,15 +10,11 @@ #include "llvm/Analysis/RegionPrinter.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/DOTGraphTraitsPass.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #ifndef NDEBUG #include "llvm/IR/LegacyPassManager.h" @@ -35,28 +31,20 @@ onlySimpleRegions("only-simple-regions", cl::init(false)); namespace llvm { -template<> -struct DOTGraphTraits : public DefaultDOTGraphTraits { - DOTGraphTraits (bool isSimple=false) - : DefaultDOTGraphTraits(isSimple) {} +std::string DOTGraphTraits::getNodeLabel(RegionNode *Node, + RegionNode *Graph) { + if (!Node->isSubRegion()) { + BasicBlock *BB = Node->getNodeAs(); - std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) { - - if (!Node->isSubRegion()) { - BasicBlock *BB = Node->getNodeAs(); - - if (isSimple()) - return DOTGraphTraits - ::getSimpleNodeLabel(BB, nullptr); - else - return DOTGraphTraits - ::getCompleteNodeLabel(BB, nullptr); - } - - return "Not implemented"; + if (isSimple()) + return DOTGraphTraits::getSimpleNodeLabel(BB, nullptr); + else + return DOTGraphTraits::getCompleteNodeLabel(BB, nullptr); } -}; + + return "Not implemented"; +} template <> struct DOTGraphTraits : public DOTGraphTraits { @@ -138,7 +126,7 @@ struct DOTGraphTraits : public DOTGraphTraits { printRegionCluster(*G->getTopLevelRegion(), GW, 4); } }; -} //end namespace llvm +} // end namespace llvm namespace { @@ -149,48 +137,49 @@ struct RegionInfoPassGraphTraits { }; struct RegionPrinter - : public DOTGraphTraitsPrinter { + : public DOTGraphTraitsPrinterWrapperPass< + RegionInfoPass, false, RegionInfo *, RegionInfoPassGraphTraits> { static char ID; RegionPrinter() - : DOTGraphTraitsPrinter("reg", ID) { + : DOTGraphTraitsPrinterWrapperPass("reg", ID) { initializeRegionPrinterPass(*PassRegistry::getPassRegistry()); } }; char RegionPrinter::ID = 0; struct RegionOnlyPrinter - : public DOTGraphTraitsPrinter { + : public DOTGraphTraitsPrinterWrapperPass< + RegionInfoPass, true, RegionInfo *, RegionInfoPassGraphTraits> { static char ID; RegionOnlyPrinter() - : DOTGraphTraitsPrinter("reg", ID) { + : DOTGraphTraitsPrinterWrapperPass("reg", ID) { initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry()); } }; char RegionOnlyPrinter::ID = 0; struct RegionViewer - : public DOTGraphTraitsViewer { + : public DOTGraphTraitsViewerWrapperPass< + RegionInfoPass, false, RegionInfo *, RegionInfoPassGraphTraits> { static char ID; RegionViewer() - : DOTGraphTraitsViewer("reg", ID) { + : DOTGraphTraitsViewerWrapperPass("reg", ID) { initializeRegionViewerPass(*PassRegistry::getPassRegistry()); } }; char RegionViewer::ID = 0; struct RegionOnlyViewer - : public DOTGraphTraitsViewer { + : public DOTGraphTraitsViewerWrapperPass { static char ID; RegionOnlyViewer() - : DOTGraphTraitsViewer("regonly", ID) { + : DOTGraphTraitsViewerWrapperPass("regonly", + ID) { initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry()); } }; diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp index 294bc38c17ad..afc3d7fc4c35 100644 --- a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp +++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp @@ -14,9 +14,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ReplayInlineAdvisor.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Instructions.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" #include using namespace llvm; @@ -26,8 +26,9 @@ using namespace llvm; ReplayInlineAdvisor::ReplayInlineAdvisor( Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr OriginalAdvisor, - const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks) - : InlineAdvisor(M, FAM), OriginalAdvisor(std::move(OriginalAdvisor)), + const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, + InlineContext IC) + : InlineAdvisor(M, FAM, IC), OriginalAdvisor(std::move(OriginalAdvisor)), ReplaySettings(ReplaySettings), EmitRemarks(EmitRemarks) { auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(ReplaySettings.ReplayFile); @@ -75,12 +76,15 @@ ReplayInlineAdvisor::ReplayInlineAdvisor( HasReplayRemarks = true; } -std::unique_ptr llvm::getReplayInlineAdvisor( - Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, - std::unique_ptr OriginalAdvisor, - const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks) { +std::unique_ptr +llvm::getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, + LLVMContext &Context, + std::unique_ptr OriginalAdvisor, + const ReplayInlinerSettings &ReplaySettings, + bool EmitRemarks, InlineContext IC) { auto Advisor = std::make_unique( - M, FAM, Context, std::move(OriginalAdvisor), ReplaySettings, EmitRemarks); + M, FAM, Context, std::move(OriginalAdvisor), ReplaySettings, EmitRemarks, + IC); if (!Advisor->areReplayRemarksLoaded()) Advisor.reset(); return Advisor; diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 977fc0911355..207f4df79e45 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -79,7 +79,6 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolutionDivision.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -96,7 +95,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" @@ -104,7 +102,6 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" @@ -125,7 +122,6 @@ #include #include #include -#include #include #include #include @@ -146,17 +142,21 @@ STATISTIC(NumTripCountsNotComputed, STATISTIC(NumBruteForceTripCountsComputed, "Number of loops with trip counts computed by force"); +#ifdef EXPENSIVE_CHECKS +bool llvm::VerifySCEV = true; +#else +bool llvm::VerifySCEV = false; +#endif + static cl::opt -MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, - cl::ZeroOrMore, - cl::desc("Maximum number of iterations SCEV will " - "symbolically execute a constant " - "derived loop"), - cl::init(100)); - -// FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean. -static cl::opt VerifySCEV( - "verify-scev", cl::Hidden, + MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, + cl::desc("Maximum number of iterations SCEV will " + "symbolically execute a constant " + "derived loop"), + cl::init(100)); + +static cl::opt VerifySCEVOpt( + "verify-scev", cl::Hidden, cl::location(VerifySCEV), cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); static cl::opt VerifySCEVStrict( "verify-scev-strict", cl::Hidden, @@ -231,6 +231,17 @@ static cl::opt UseExpensiveRangeSharpening( cl::desc("Use more powerful methods of sharpening expression ranges. May " "be costly in terms of compile time")); +static cl::opt MaxPhiSCCAnalysisSize( + "scalar-evolution-max-scc-analysis-depth", cl::Hidden, + cl::desc("Maximum amount of nodes to process while searching SCEVUnknown " + "Phi strongly connected components"), + cl::init(8)); + +static cl::opt + EnableFiniteLoopControl("scalar-evolution-finite-loop", cl::Hidden, + cl::desc("Handle <= and >= in finite loops"), + cl::init(true)); + //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -519,12 +530,13 @@ void SCEVUnknown::deleted() { } void SCEVUnknown::allUsesReplacedWith(Value *New) { + // Clear this SCEVUnknown from various maps. + SE->forgetMemoizedResults(this); + // Remove this SCEVUnknown from the uniquing map. SE->UniqueSCEVs.RemoveNode(this); - // Update this SCEVUnknown to point to the new value. This is needed - // because there may still be outstanding SCEVs which still point to - // this SCEVUnknown. + // Replace the value pointer in case someone is still using this SCEVUnknown. setValPtr(New); } @@ -1643,10 +1655,12 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. - if (AR->hasNoUnsignedWrap()) - return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Depth + 1), - getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); + if (AR->hasNoUnsignedWrap()) { + Start = + getExtendAddRecStart(AR, Ty, this, Depth + 1); + Step = getZeroExtendExpr(Step, Ty, Depth + 1); + return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); + } // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1688,11 +1702,10 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // Cache knowledge of AR NUW, which is propagated to this AddRec. setNoWrapFlags(const_cast(AR), SCEV::FlagNUW); // Return the expression with the addrec on the outside. - return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, - Depth + 1), - getZeroExtendExpr(Step, Ty, Depth + 1), L, - AR->getNoWrapFlags()); + Start = getExtendAddRecStart(AR, Ty, this, + Depth + 1); + Step = getZeroExtendExpr(Step, Ty, Depth + 1); + return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as signed. // This covers loops that count down. @@ -1707,11 +1720,10 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // Negative step causes unsigned wrap, but it still can't self-wrap. setNoWrapFlags(const_cast(AR), SCEV::FlagNW); // Return the expression with the addrec on the outside. - return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, - Depth + 1), - getSignExtendExpr(Step, Ty, Depth + 1), L, - AR->getNoWrapFlags()); + Start = getExtendAddRecStart(AR, Ty, this, + Depth + 1); + Step = getSignExtendExpr(Step, Ty, Depth + 1); + return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } } } @@ -1733,11 +1745,10 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // issue. It's not clear that the order of checks does matter, but // it's one of two issue possible causes for a change which was // reverted. Be conservative for the moment. - return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, - Depth + 1), - getZeroExtendExpr(Step, Ty, Depth + 1), L, - AR->getNoWrapFlags()); + Start = + getExtendAddRecStart(AR, Ty, this, Depth + 1); + Step = getZeroExtendExpr(Step, Ty, Depth + 1); + return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } // For a negative step, we can extend the operands iff doing so only @@ -1752,11 +1763,10 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // still can't self-wrap. setNoWrapFlags(const_cast(AR), SCEV::FlagNW); // Return the expression with the addrec on the outside. - return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, - Depth + 1), - getSignExtendExpr(Step, Ty, Depth + 1), L, - AR->getNoWrapFlags()); + Start = getExtendAddRecStart(AR, Ty, this, + Depth + 1); + Step = getSignExtendExpr(Step, Ty, Depth + 1); + return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } } } @@ -1780,9 +1790,10 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { if (proveNoWrapByVaryingStart(Start, Step, L)) { setNoWrapFlags(const_cast(AR), SCEV::FlagNUW); - return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Depth + 1), - getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); + Start = + getExtendAddRecStart(AR, Ty, this, Depth + 1); + Step = getZeroExtendExpr(Step, Ty, Depth + 1); + return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } } @@ -1984,10 +1995,12 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. - if (AR->hasNoSignedWrap()) - return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Depth + 1), - getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW); + if (AR->hasNoSignedWrap()) { + Start = + getExtendAddRecStart(AR, Ty, this, Depth + 1); + Step = getSignExtendExpr(Step, Ty, Depth + 1); + return getAddRecExpr(Start, Step, L, SCEV::FlagNSW); + } // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -2030,11 +2043,10 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // Cache knowledge of AR NSW, which is propagated to this AddRec. setNoWrapFlags(const_cast(AR), SCEV::FlagNSW); // Return the expression with the addrec on the outside. - return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, - Depth + 1), - getSignExtendExpr(Step, Ty, Depth + 1), L, - AR->getNoWrapFlags()); + Start = getExtendAddRecStart(AR, Ty, this, + Depth + 1); + Step = getSignExtendExpr(Step, Ty, Depth + 1); + return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as unsigned. // This covers loops that count up with an unsigned step. @@ -2056,11 +2068,10 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { setNoWrapFlags(const_cast(AR), SCEV::FlagNW); // Return the expression with the addrec on the outside. - return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, - Depth + 1), - getZeroExtendExpr(Step, Ty, Depth + 1), L, - AR->getNoWrapFlags()); + Start = getExtendAddRecStart(AR, Ty, this, + Depth + 1); + Step = getZeroExtendExpr(Step, Ty, Depth + 1); + return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } } } @@ -2072,9 +2083,10 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // issue. It's not clear that the order of checks does matter, but // it's one of two issue possible causes for a change which was // reverted. Be conservative for the moment. - return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Depth + 1), - getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); + Start = + getExtendAddRecStart(AR, Ty, this, Depth + 1); + Step = getSignExtendExpr(Step, Ty, Depth + 1); + return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } // sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step})) @@ -2096,9 +2108,10 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { if (proveNoWrapByVaryingStart(Start, Step, L)) { setNoWrapFlags(const_cast(AR), SCEV::FlagNSW); - return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this, Depth + 1), - getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); + Start = + getExtendAddRecStart(AR, Ty, this, Depth + 1); + Step = getSignExtendExpr(Step, Ty, Depth + 1); + return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } } @@ -2300,9 +2313,9 @@ bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, const SCEV *A = (this->*Extension)( (this->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0), WideTy, 0); - const SCEV *B = (this->*Operation)((this->*Extension)(LHS, WideTy, 0), - (this->*Extension)(RHS, WideTy, 0), - SCEV::FlagAnyWrap, 0); + const SCEV *LHSB = (this->*Extension)(LHS, WideTy, 0); + const SCEV *RHSB = (this->*Extension)(RHS, WideTy, 0); + const SCEV *B = (this->*Operation)(LHSB, RHSB, SCEV::FlagAnyWrap, 0); return A == B; } @@ -3106,12 +3119,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, // TODO: There are some cases where this transformation is not // profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of // this transformation should be narrowed down. - if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add)) - return getAddExpr(getMulExpr(LHSC, Add->getOperand(0), - SCEV::FlagAnyWrap, Depth + 1), - getMulExpr(LHSC, Add->getOperand(1), - SCEV::FlagAnyWrap, Depth + 1), - SCEV::FlagAnyWrap, Depth + 1); + if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add)) { + const SCEV *LHS = getMulExpr(LHSC, Add->getOperand(0), + SCEV::FlagAnyWrap, Depth + 1); + const SCEV *RHS = getMulExpr(LHSC, Add->getOperand(1), + SCEV::FlagAnyWrap, Depth + 1); + return getAddExpr(LHS, RHS, SCEV::FlagAnyWrap, Depth + 1); + } if (Ops[0]->isAllOnesValue()) { // If we have a mul by -1 of an add, try distributing the -1 among the @@ -3466,12 +3480,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, } // Fold if both operands are constant. - if (const SCEVConstant *LHSC = dyn_cast(LHS)) { - Constant *LHSCV = LHSC->getValue(); - Constant *RHSCV = RHSC->getValue(); - return getConstant(cast(ConstantExpr::getUDiv(LHSCV, - RHSCV))); - } + if (const SCEVConstant *LHSC = dyn_cast(LHS)) + return getConstant(LHSC->getAPInt().udiv(RHSC->getAPInt())); } } @@ -4002,6 +4012,59 @@ public: } // namespace +/// Return true if V is poison given that AssumedPoison is already poison. +static bool impliesPoison(const SCEV *AssumedPoison, const SCEV *S) { + // The only way poison may be introduced in a SCEV expression is from a + // poison SCEVUnknown (ConstantExprs are also represented as SCEVUnknown, + // not SCEVConstant). Notably, nowrap flags in SCEV nodes can *not* + // introduce poison -- they encode guaranteed, non-speculated knowledge. + // + // Additionally, all SCEV nodes propagate poison from inputs to outputs, + // with the notable exception of umin_seq, where only poison from the first + // operand is (unconditionally) propagated. + struct SCEVPoisonCollector { + bool LookThroughSeq; + SmallPtrSet MaybePoison; + SCEVPoisonCollector(bool LookThroughSeq) : LookThroughSeq(LookThroughSeq) {} + + bool follow(const SCEV *S) { + // TODO: We can always follow the first operand, but the SCEVTraversal + // API doesn't support this. + if (!LookThroughSeq && isa(S)) + return false; + + if (auto *SU = dyn_cast(S)) { + if (!isGuaranteedNotToBePoison(SU->getValue())) + MaybePoison.insert(S); + } + return true; + } + bool isDone() const { return false; } + }; + + // First collect all SCEVs that might result in AssumedPoison to be poison. + // We need to look through umin_seq here, because we want to find all SCEVs + // that *might* result in poison, not only those that are *required* to. + SCEVPoisonCollector PC1(/* LookThroughSeq */ true); + visitAll(AssumedPoison, PC1); + + // AssumedPoison is never poison. As the assumption is false, the implication + // is true. Don't bother walking the other SCEV in this case. + if (PC1.MaybePoison.empty()) + return true; + + // Collect all SCEVs in S that, if poison, *will* result in S being poison + // as well. We cannot look through umin_seq here, as its argument only *may* + // make the result poison. + SCEVPoisonCollector PC2(/* LookThroughSeq */ false); + visitAll(S, PC2); + + // Make sure that no matter which SCEV in PC1.MaybePoison is actually poison, + // it will also make S poison by being part of PC2.MaybePoison. + return all_of(PC1.MaybePoison, + [&](const SCEV *S) { return PC2.MaybePoison.contains(S); }); +} + const SCEV * ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind, SmallVectorImpl &Ops) { @@ -4010,11 +4073,6 @@ ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind, assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); if (Ops.size() == 1) return Ops[0]; - if (Ops.size() == 2 && - any_of(Ops, [](const SCEV *Op) { return isa(Op); })) - return getMinMaxExpr( - SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType(Kind), - Ops); #ifndef NDEBUG Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) { @@ -4063,6 +4121,39 @@ ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind, return getSequentialMinMaxExpr(Kind, Ops); } + const SCEV *SaturationPoint; + ICmpInst::Predicate Pred; + switch (Kind) { + case scSequentialUMinExpr: + SaturationPoint = getZero(Ops[0]->getType()); + Pred = ICmpInst::ICMP_ULE; + break; + default: + llvm_unreachable("Not a sequential min/max type."); + } + + for (unsigned i = 1, e = Ops.size(); i != e; ++i) { + // We can replace %x umin_seq %y with %x umin %y if either: + // * %y being poison implies %x is also poison. + // * %x cannot be the saturating value (e.g. zero for umin). + if (::impliesPoison(Ops[i], Ops[i - 1]) || + isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_NE, Ops[i - 1], + SaturationPoint)) { + SmallVector SeqOps = {Ops[i - 1], Ops[i]}; + Ops[i - 1] = getMinMaxExpr( + SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType(Kind), + SeqOps); + Ops.erase(Ops.begin() + i); + return getSequentialMinMaxExpr(Kind, Ops); + } + // Fold %x umin_seq %y to %x if %x ule %y. + // TODO: We might be able to prove the predicate for a later operand. + if (isKnownViaNonRecursiveReasoning(Pred, Ops[i - 1], Ops[i])) { + Ops.erase(Ops.begin() + i); + return getSequentialMinMaxExpr(Kind, Ops); + } + } + // Okay, it looks like we really DO need an expr. Check to see if we // already have one, otherwise create a new one. FoldingSetNodeID ID; @@ -4265,39 +4356,20 @@ bool ScalarEvolution::containsAddRecurrence(const SCEV *S) { return FoundAddRec; } -/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}. -/// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an -/// offset I, then return {S', I}, else return {\p S, nullptr}. -static std::pair splitAddExpr(const SCEV *S) { - const auto *Add = dyn_cast(S); - if (!Add) - return {S, nullptr}; - - if (Add->getNumOperands() != 2) - return {S, nullptr}; - - auto *ConstOp = dyn_cast(Add->getOperand(0)); - if (!ConstOp) - return {S, nullptr}; - - return {Add->getOperand(1), ConstOp->getValue()}; -} - /// Return the ValueOffsetPair set for \p S. \p S can be represented /// by the value and offset from any ValueOffsetPair in the set. -ScalarEvolution::ValueOffsetPairSetVector * -ScalarEvolution::getSCEVValues(const SCEV *S) { +ArrayRef ScalarEvolution::getSCEVValues(const SCEV *S) { ExprValueMapType::iterator SI = ExprValueMap.find_as(S); if (SI == ExprValueMap.end()) - return nullptr; + return None; #ifndef NDEBUG if (VerifySCEVMap) { // Check there is no dangling Value in the set returned. - for (const auto &VE : SI->second) - assert(ValueExprMap.count(VE.first)); + for (Value *V : SI->second) + assert(ValueExprMap.count(V)); } #endif - return &SI->second; + return SI->second.getArrayRef(); } /// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V) @@ -4306,20 +4378,11 @@ ScalarEvolution::getSCEVValues(const SCEV *S) { void ScalarEvolution::eraseValueFromMap(Value *V) { ValueExprMapType::iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) { - const SCEV *S = I->second; - // Remove {V, 0} from the set of ExprValueMap[S] - if (auto *SV = getSCEVValues(S)) - SV->remove({V, nullptr}); - - // Remove {V, Offset} from the set of ExprValueMap[Stripped] - const SCEV *Stripped; - ConstantInt *Offset; - std::tie(Stripped, Offset) = splitAddExpr(S); - if (Offset != nullptr) { - if (auto *SV = getSCEVValues(Stripped)) - SV->remove({V, Offset}); - } - ValueExprMap.erase(V); + auto EVIt = ExprValueMap.find(I->second); + bool Removed = EVIt->second.remove(V); + (void) Removed; + assert(Removed && "Value not in ExprValueMap?"); + ValueExprMap.erase(I); } } @@ -4330,7 +4393,7 @@ void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) { auto It = ValueExprMap.find_as(V); if (It == ValueExprMap.end()) { ValueExprMap.insert({SCEVCallbackVH(V, this), S}); - ExprValueMap[S].insert({V, nullptr}); + ExprValueMap[S].insert(V); } } @@ -4339,33 +4402,9 @@ void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) { const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); - const SCEV *S = getExistingSCEV(V); - if (S == nullptr) { - S = createSCEV(V); - // During PHI resolution, it is possible to create two SCEVs for the same - // V, so it is needed to double check whether V->S is inserted into - // ValueExprMap before insert S->{V, 0} into ExprValueMap. - std::pair Pair = - ValueExprMap.insert({SCEVCallbackVH(V, this), S}); - if (Pair.second) { - ExprValueMap[S].insert({V, nullptr}); - - // If S == Stripped + Offset, add Stripped -> {V, Offset} into - // ExprValueMap. - const SCEV *Stripped = S; - ConstantInt *Offset = nullptr; - std::tie(Stripped, Offset) = splitAddExpr(S); - // If stripped is SCEVUnknown, don't bother to save - // Stripped -> {V, offset}. It doesn't simplify and sometimes even - // increase the complexity of the expansion code. - // If V is GetElementPtrInst, don't save Stripped -> {V, offset} - // because it may generate add/sub instead of GEP in SCEV expansion. - if (Offset != nullptr && !isa(Stripped) && - !isa(V)) - ExprValueMap[Stripped].insert({V, Offset}); - } - } - return S; + if (const SCEV *S = getExistingSCEV(V)) + return S; + return createSCEVIter(V); } const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { @@ -4795,7 +4834,7 @@ public: SelectInst *SI = cast(I); Optional Res = compareWithBackedgeCondition(SI->getCondition()); - if (Res.hasValue()) { + if (Res) { bool IsOne = cast(Res.getValue())->getValue()->isOne(); Result = SE.getSCEV(IsOne ? SI->getTrueValue() : SI->getFalseValue()); } @@ -4803,7 +4842,7 @@ public: } default: { Optional Res = compareWithBackedgeCondition(I); - if (Res.hasValue()) + if (Res) Result = Res.getValue(); break; } @@ -5067,6 +5106,9 @@ static Optional MatchBinaryOp(Value *V, DominatorTree &DT) { // Instcombine turns add of signmask into xor as a strength reduction step. if (RHSC->getValue().isSignMask()) return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1)); + // Binary `xor` is a bit-wise `add`. + if (V->getType()->isIntegerTy(1)) + return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1)); return BinaryOp(Op); case Instruction::LShr: @@ -5489,8 +5531,8 @@ bool PredicatedScalarEvolution::areAddRecsEqualWithPreds( return true; auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool { - if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2)) && - !Preds.implies(SE.getEqualPredicate(Expr2, Expr1))) + if (Expr1 != Expr2 && !Preds->implies(SE.getEqualPredicate(Expr1, Expr2)) && + !Preds->implies(SE.getEqualPredicate(Expr2, Expr1))) return false; return true; }; @@ -5872,31 +5914,53 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { if (const SCEV *S = createNodeFromSelectLikePHI(PN)) return S; - // If the PHI has a single incoming value, follow that value, unless the - // PHI's incoming blocks are in a different loop, in which case doing so - // risks breaking LCSSA form. Instcombine would normally zap these, but - // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = SimplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC})) - if (LI.replacementPreservesLCSSAForm(PN, V)) - return getSCEV(V); + if (Value *V = simplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC})) + return getSCEV(V); // If it's not a loop phi, we can't handle it yet. return getUnknown(PN); } -const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I, - Value *Cond, - Value *TrueVal, - Value *FalseVal) { - // Handle "constant" branch or select. This can occur for instance when a - // loop pass transforms an inner loop and moves on to process the outer loop. - if (auto *CI = dyn_cast(Cond)) - return getSCEV(CI->isOne() ? TrueVal : FalseVal); +bool SCEVMinMaxExprContains(const SCEV *Root, const SCEV *OperandToFind, + SCEVTypes RootKind) { + struct FindClosure { + const SCEV *OperandToFind; + const SCEVTypes RootKind; // Must be a sequential min/max expression. + const SCEVTypes NonSequentialRootKind; // Non-seq variant of RootKind. + + bool Found = false; + + bool canRecurseInto(SCEVTypes Kind) const { + // We can only recurse into the SCEV expression of the same effective type + // as the type of our root SCEV expression, and into zero-extensions. + return RootKind == Kind || NonSequentialRootKind == Kind || + scZeroExtend == Kind; + }; + + FindClosure(const SCEV *OperandToFind, SCEVTypes RootKind) + : OperandToFind(OperandToFind), RootKind(RootKind), + NonSequentialRootKind( + SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType( + RootKind)) {} + bool follow(const SCEV *S) { + Found = S == OperandToFind; + + return !isDone() && canRecurseInto(S->getSCEVType()); + } + + bool isDone() const { return Found; } + }; + + FindClosure FC(OperandToFind, RootKind); + visitAll(Root, FC); + return FC.Found; +} + +const SCEV *ScalarEvolution::createNodeForSelectOrPHIInstWithICmpInstCond( + Instruction *I, ICmpInst *Cond, Value *TrueVal, Value *FalseVal) { // Try to match some simple smax or umax patterns. - auto *ICI = dyn_cast(Cond); - if (!ICI) - return getUnknown(I); + auto *ICI = Cond; Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); @@ -5958,31 +6022,36 @@ const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I, } break; case ICmpInst::ICMP_NE: - // n != 0 ? n+x : 1+x -> umax(n, 1)+x - if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && - isa(RHS) && cast(RHS)->isZero()) { - const SCEV *One = getOne(I->getType()); - const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); - const SCEV *LA = getSCEV(TrueVal); - const SCEV *RA = getSCEV(FalseVal); - const SCEV *LDiff = getMinusSCEV(LA, LS); - const SCEV *RDiff = getMinusSCEV(RA, One); - if (LDiff == RDiff) - return getAddExpr(getUMaxExpr(One, LS), LDiff); - } - break; + // x != 0 ? x+y : C+y -> x == 0 ? C+y : x+y + std::swap(TrueVal, FalseVal); + LLVM_FALLTHROUGH; case ICmpInst::ICMP_EQ: - // n == 0 ? 1+x : n+x -> umax(n, 1)+x + // x == 0 ? C+y : x+y -> umax(x, C)+y iff C u<= 1 if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && isa(RHS) && cast(RHS)->isZero()) { - const SCEV *One = getOne(I->getType()); - const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); - const SCEV *LA = getSCEV(TrueVal); - const SCEV *RA = getSCEV(FalseVal); - const SCEV *LDiff = getMinusSCEV(LA, One); - const SCEV *RDiff = getMinusSCEV(RA, LS); - if (LDiff == RDiff) - return getAddExpr(getUMaxExpr(One, LS), LDiff); + const SCEV *X = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); + const SCEV *TrueValExpr = getSCEV(TrueVal); // C+y + const SCEV *FalseValExpr = getSCEV(FalseVal); // x+y + const SCEV *Y = getMinusSCEV(FalseValExpr, X); // y = (x+y)-x + const SCEV *C = getMinusSCEV(TrueValExpr, Y); // C = (C+y)-y + if (isa(C) && cast(C)->getAPInt().ule(1)) + return getAddExpr(getUMaxExpr(X, C), Y); + } + // x == 0 ? 0 : umin (..., x, ...) -> umin_seq(x, umin (...)) + // x == 0 ? 0 : umin_seq(..., x, ...) -> umin_seq(x, umin_seq(...)) + // x == 0 ? 0 : umin (..., umin_seq(..., x, ...), ...) + // -> umin_seq(x, umin (..., umin_seq(...), ...)) + if (isa(RHS) && cast(RHS)->isZero() && + isa(TrueVal) && cast(TrueVal)->isZero()) { + const SCEV *X = getSCEV(LHS); + while (auto *ZExt = dyn_cast(X)) + X = ZExt->getOperand(); + if (getTypeSizeInBits(X->getType()) <= getTypeSizeInBits(I->getType())) { + const SCEV *FalseValExpr = getSCEV(FalseVal); + if (SCEVMinMaxExprContains(FalseValExpr, X, scSequentialUMinExpr)) + return getUMinExpr(getNoopOrZeroExtend(X, I->getType()), FalseValExpr, + /*Sequential=*/true); + } } break; default: @@ -5992,12 +6061,95 @@ const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I, return getUnknown(I); } +static Optional +createNodeForSelectViaUMinSeq(ScalarEvolution *SE, const SCEV *CondExpr, + const SCEV *TrueExpr, const SCEV *FalseExpr) { + assert(CondExpr->getType()->isIntegerTy(1) && + TrueExpr->getType() == FalseExpr->getType() && + TrueExpr->getType()->isIntegerTy(1) && + "Unexpected operands of a select."); + + // i1 cond ? i1 x : i1 C --> C + (i1 cond ? (i1 x - i1 C) : i1 0) + // --> C + (umin_seq cond, x - C) + // + // i1 cond ? i1 C : i1 x --> C + (i1 cond ? i1 0 : (i1 x - i1 C)) + // --> C + (i1 ~cond ? (i1 x - i1 C) : i1 0) + // --> C + (umin_seq ~cond, x - C) + + // FIXME: while we can't legally model the case where both of the hands + // are fully variable, we only require that the *difference* is constant. + if (!isa(TrueExpr) && !isa(FalseExpr)) + return None; + + const SCEV *X, *C; + if (isa(TrueExpr)) { + CondExpr = SE->getNotSCEV(CondExpr); + X = FalseExpr; + C = TrueExpr; + } else { + X = TrueExpr; + C = FalseExpr; + } + return SE->getAddExpr(C, SE->getUMinExpr(CondExpr, SE->getMinusSCEV(X, C), + /*Sequential=*/true)); +} + +static Optional createNodeForSelectViaUMinSeq(ScalarEvolution *SE, + Value *Cond, + Value *TrueVal, + Value *FalseVal) { + if (!isa(TrueVal) && !isa(FalseVal)) + return None; + + const auto *SECond = SE->getSCEV(Cond); + const auto *SETrue = SE->getSCEV(TrueVal); + const auto *SEFalse = SE->getSCEV(FalseVal); + return createNodeForSelectViaUMinSeq(SE, SECond, SETrue, SEFalse); +} + +const SCEV *ScalarEvolution::createNodeForSelectOrPHIViaUMinSeq( + Value *V, Value *Cond, Value *TrueVal, Value *FalseVal) { + assert(Cond->getType()->isIntegerTy(1) && "Select condition is not an i1?"); + assert(TrueVal->getType() == FalseVal->getType() && + V->getType() == TrueVal->getType() && + "Types of select hands and of the result must match."); + + // For now, only deal with i1-typed `select`s. + if (!V->getType()->isIntegerTy(1)) + return getUnknown(V); + + if (Optional S = + createNodeForSelectViaUMinSeq(this, Cond, TrueVal, FalseVal)) + return *S; + + return getUnknown(V); +} + +const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Value *V, Value *Cond, + Value *TrueVal, + Value *FalseVal) { + // Handle "constant" branch or select. This can occur for instance when a + // loop pass transforms an inner loop and moves on to process the outer loop. + if (auto *CI = dyn_cast(Cond)) + return getSCEV(CI->isOne() ? TrueVal : FalseVal); + + if (auto *I = dyn_cast(V)) { + if (auto *ICI = dyn_cast(Cond)) { + const SCEV *S = createNodeForSelectOrPHIInstWithICmpInstCond( + I, ICI, TrueVal, FalseVal); + if (!isa(S)) + return S; + } + } + + return createNodeForSelectOrPHIViaUMinSeq(V, Cond, TrueVal, FalseVal); +} + /// Expand GEP instructions into add and multiply operations. This allows them /// to be analyzed by regular SCEV code. const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { - // Don't attempt to analyze GEPs over unsized objects. - if (!GEP->getSourceElementType()->isSized()) - return getUnknown(GEP); + assert(GEP->getSourceElementType()->isSized() && + "GEP source element type must be sized"); SmallVector IndexExprs; for (Value *Index : GEP->indices()) @@ -6430,7 +6582,7 @@ ScalarEvolution::getRangeRef(const SCEV *S, // Check if the IR explicitly contains !range metadata. Optional MDRange = GetRangeFromMetadata(U->getValue()); - if (MDRange.hasValue()) + if (MDRange) ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(), RangeType); @@ -6719,7 +6871,7 @@ ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, FalseValue = *FalseVal; // Re-apply the cast we peeled off earlier - if (CastOp.hasValue()) + if (CastOp) switch (*CastOp) { default: llvm_unreachable("Unknown SCEV cast type!"); @@ -7020,6 +7172,211 @@ bool ScalarEvolution::loopIsFiniteByAssumption(const Loop *L) { return isFinite(L) || (isMustProgress(L) && loopHasNoSideEffects(L)); } +const SCEV *ScalarEvolution::createSCEVIter(Value *V) { + // Worklist item with a Value and a bool indicating whether all operands have + // been visited already. + using PointerTy = PointerIntPair; + SmallVector Stack; + + Stack.emplace_back(V, true); + Stack.emplace_back(V, false); + while (!Stack.empty()) { + auto E = Stack.pop_back_val(); + Value *CurV = E.getPointer(); + + if (getExistingSCEV(CurV)) + continue; + + SmallVector Ops; + const SCEV *CreatedSCEV = nullptr; + // If all operands have been visited already, create the SCEV. + if (E.getInt()) { + CreatedSCEV = createSCEV(CurV); + } else { + // Otherwise get the operands we need to create SCEV's for before creating + // the SCEV for CurV. If the SCEV for CurV can be constructed trivially, + // just use it. + CreatedSCEV = getOperandsToCreate(CurV, Ops); + } + + if (CreatedSCEV) { + insertValueToMap(CurV, CreatedSCEV); + } else { + // Queue CurV for SCEV creation, followed by its's operands which need to + // be constructed first. + Stack.emplace_back(CurV, true); + for (Value *Op : Ops) + Stack.emplace_back(Op, false); + } + } + + return getExistingSCEV(V); +} + +const SCEV * +ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl &Ops) { + if (!isSCEVable(V->getType())) + return getUnknown(V); + + if (Instruction *I = dyn_cast(V)) { + // Don't attempt to analyze instructions in blocks that aren't + // reachable. Such instructions don't matter, and they aren't required + // to obey basic rules for definitions dominating uses which this + // analysis depends on. + if (!DT.isReachableFromEntry(I->getParent())) + return getUnknown(PoisonValue::get(V->getType())); + } else if (ConstantInt *CI = dyn_cast(V)) + return getConstant(CI); + else if (GlobalAlias *GA = dyn_cast(V)) { + if (!GA->isInterposable()) { + Ops.push_back(GA->getAliasee()); + return nullptr; + } + return getUnknown(V); + } else if (!isa(V)) + return getUnknown(V); + + Operator *U = cast(V); + if (auto BO = MatchBinaryOp(U, DT)) { + bool IsConstArg = isa(BO->RHS); + switch (U->getOpcode()) { + case Instruction::Add: { + // For additions and multiplications, traverse add/mul chains for which we + // can potentially create a single SCEV, to reduce the number of + // get{Add,Mul}Expr calls. + do { + if (BO->Op) { + if (BO->Op != V && getExistingSCEV(BO->Op)) { + Ops.push_back(BO->Op); + break; + } + } + Ops.push_back(BO->RHS); + auto NewBO = MatchBinaryOp(BO->LHS, DT); + if (!NewBO || (NewBO->Opcode != Instruction::Add && + NewBO->Opcode != Instruction::Sub)) { + Ops.push_back(BO->LHS); + break; + } + BO = NewBO; + } while (true); + return nullptr; + } + + case Instruction::Mul: { + do { + if (BO->Op) { + if (BO->Op != V && getExistingSCEV(BO->Op)) { + Ops.push_back(BO->Op); + break; + } + } + Ops.push_back(BO->RHS); + auto NewBO = MatchBinaryOp(BO->LHS, DT); + if (!NewBO || NewBO->Opcode != Instruction::Mul) { + Ops.push_back(BO->LHS); + break; + } + BO = NewBO; + } while (true); + return nullptr; + } + + case Instruction::AShr: + case Instruction::Shl: + case Instruction::Xor: + if (!IsConstArg) + return nullptr; + break; + case Instruction::And: + case Instruction::Or: + if (!IsConstArg && BO->LHS->getType()->isIntegerTy(1)) + return nullptr; + break; + default: + break; + } + + Ops.push_back(BO->LHS); + Ops.push_back(BO->RHS); + return nullptr; + } + + switch (U->getOpcode()) { + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::PtrToInt: + Ops.push_back(U->getOperand(0)); + return nullptr; + + case Instruction::BitCast: + if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) { + Ops.push_back(U->getOperand(0)); + return nullptr; + } + return getUnknown(V); + + case Instruction::SDiv: + case Instruction::SRem: + Ops.push_back(U->getOperand(0)); + Ops.push_back(U->getOperand(1)); + return nullptr; + + case Instruction::GetElementPtr: + assert(cast(U)->getSourceElementType()->isSized() && + "GEP source element type must be sized"); + for (Value *Index : U->operands()) + Ops.push_back(Index); + return nullptr; + + case Instruction::IntToPtr: + return getUnknown(V); + + case Instruction::PHI: + // Keep constructing SCEVs' for phis recursively for now. + return nullptr; + + case Instruction::Select: + for (Value *Inc : U->operands()) + Ops.push_back(Inc); + return nullptr; + break; + + case Instruction::Call: + case Instruction::Invoke: + if (Value *RV = cast(U)->getReturnedArgOperand()) { + Ops.push_back(RV); + return nullptr; + } + + if (auto *II = dyn_cast(U)) { + switch (II->getIntrinsicID()) { + case Intrinsic::abs: + Ops.push_back(II->getArgOperand(0)); + return nullptr; + case Intrinsic::umax: + case Intrinsic::umin: + case Intrinsic::smax: + case Intrinsic::smin: + case Intrinsic::usub_sat: + case Intrinsic::uadd_sat: + Ops.push_back(II->getArgOperand(0)); + Ops.push_back(II->getArgOperand(1)); + return nullptr; + case Intrinsic::start_loop_iterations: + Ops.push_back(II->getArgOperand(0)); + return nullptr; + default: + break; + } + } + break; + } + + return nullptr; +} + const SCEV *ScalarEvolution::createSCEV(Value *V) { if (!isSCEVable(V->getType())) return getUnknown(V); @@ -7030,7 +7387,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // to obey basic rules for definitions dominating uses which this // analysis depends on. if (!DT.isReachableFromEntry(I->getParent())) - return getUnknown(UndefValue::get(V->getType())); + return getUnknown(PoisonValue::get(V->getType())); } else if (ConstantInt *CI = dyn_cast(V)) return getConstant(CI); else if (GlobalAlias *GA = dyn_cast(V)) @@ -7038,6 +7395,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { else if (!isa(V)) return getUnknown(V); + const SCEV *LHS; + const SCEV *RHS; + Operator *U = cast(V); if (auto BO = MatchBinaryOp(U, DT)) { switch (BO->Opcode) { @@ -7103,8 +7463,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op); if (Flags != SCEV::FlagAnyWrap) { - MulOps.push_back( - getMulExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags)); + LHS = getSCEV(BO->LHS); + RHS = getSCEV(BO->RHS); + MulOps.push_back(getMulExpr(LHS, RHS, Flags)); break; } } @@ -7121,14 +7482,20 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { return getMulExpr(MulOps); } case Instruction::UDiv: - return getUDivExpr(getSCEV(BO->LHS), getSCEV(BO->RHS)); + LHS = getSCEV(BO->LHS); + RHS = getSCEV(BO->RHS); + return getUDivExpr(LHS, RHS); case Instruction::URem: - return getURemExpr(getSCEV(BO->LHS), getSCEV(BO->RHS)); + LHS = getSCEV(BO->LHS); + RHS = getSCEV(BO->RHS); + return getURemExpr(LHS, RHS); case Instruction::Sub: { SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; if (BO->Op) Flags = getNoWrapFlagsFromUB(BO->Op); - return getMinusSCEV(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags); + LHS = getSCEV(BO->LHS); + RHS = getSCEV(BO->RHS); + return getMinusSCEV(LHS, RHS, Flags); } case Instruction::And: // For an expression like x&255 that merely masks off the high bits, @@ -7180,6 +7547,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { MulCount); } } + // Binary `and` is a bit-wise `umin`. + if (BO->LHS->getType()->isIntegerTy(1)) { + LHS = getSCEV(BO->LHS); + RHS = getSCEV(BO->RHS); + return getUMinExpr(LHS, RHS); + } break; case Instruction::Or: @@ -7199,6 +7572,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { (SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNSW)); } } + // Binary `or` is a bit-wise `umax`. + if (BO->LHS->getType()->isIntegerTy(1)) { + LHS = getSCEV(BO->LHS); + RHS = getSCEV(BO->RHS); + return getUMaxExpr(LHS, RHS); + } break; case Instruction::Xor: @@ -7266,9 +7645,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { Flags = (SCEV::NoWrapFlags)(Flags | SCEV::FlagNUW); } - Constant *X = ConstantInt::get( + ConstantInt *X = ConstantInt::get( getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue())); - return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags); + return getMulExpr(getSCEV(BO->LHS), getConstant(X), Flags); } break; @@ -7394,14 +7773,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { return createNodeForPHI(cast(U)); case Instruction::Select: - // U can also be a select constant expr, which let fall through. Since - // createNodeForSelect only works for a condition that is an `ICmpInst`, and - // constant expressions cannot have instructions as operands, we'd have - // returned getUnknown for a select constant expressions anyway. - if (isa(U)) - return createNodeForSelectOrPHI(cast(U), U->getOperand(0), - U->getOperand(1), U->getOperand(2)); - break; + return createNodeForSelectOrPHI(U, U->getOperand(0), U->getOperand(1), + U->getOperand(2)); case Instruction::Call: case Instruction::Invoke: @@ -7415,17 +7788,21 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { getSCEV(II->getArgOperand(0)), /*IsNSW=*/cast(II->getArgOperand(1))->isOne()); case Intrinsic::umax: - return getUMaxExpr(getSCEV(II->getArgOperand(0)), - getSCEV(II->getArgOperand(1))); + LHS = getSCEV(II->getArgOperand(0)); + RHS = getSCEV(II->getArgOperand(1)); + return getUMaxExpr(LHS, RHS); case Intrinsic::umin: - return getUMinExpr(getSCEV(II->getArgOperand(0)), - getSCEV(II->getArgOperand(1))); + LHS = getSCEV(II->getArgOperand(0)); + RHS = getSCEV(II->getArgOperand(1)); + return getUMinExpr(LHS, RHS); case Intrinsic::smax: - return getSMaxExpr(getSCEV(II->getArgOperand(0)), - getSCEV(II->getArgOperand(1))); + LHS = getSCEV(II->getArgOperand(0)); + RHS = getSCEV(II->getArgOperand(1)); + return getSMaxExpr(LHS, RHS); case Intrinsic::smin: - return getSMinExpr(getSCEV(II->getArgOperand(0)), - getSCEV(II->getArgOperand(1))); + LHS = getSCEV(II->getArgOperand(0)); + RHS = getSCEV(II->getArgOperand(1)); + return getSMinExpr(LHS, RHS); case Intrinsic::usub_sat: { const SCEV *X = getSCEV(II->getArgOperand(0)); const SCEV *Y = getSCEV(II->getArgOperand(1)); @@ -7640,7 +8017,7 @@ unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) { Res = Multiple; Res = (unsigned)GreatestCommonDivisor64(*Res, Multiple); } - return Res.getValueOr(1); + return Res.value_or(1); } unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L, @@ -7708,7 +8085,7 @@ const SCEV *ScalarEvolution::getExitCount(const Loop *L, const SCEV * ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L, - SCEVUnionPredicate &Preds) { + SmallVector &Preds) { return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds); } @@ -7870,7 +8247,6 @@ void ScalarEvolution::forgetLoop(const Loop *L) { if (LoopUsersItr != LoopUsers.end()) { ToForget.insert(ToForget.end(), LoopUsersItr->second.begin(), LoopUsersItr->second.end()); - LoopUsers.erase(LoopUsersItr); } // Drop information about expressions based on loop-header PHIs. @@ -7900,9 +8276,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) { } void ScalarEvolution::forgetTopmostLoop(const Loop *L) { - while (Loop *Parent = L->getParentLoop()) - L = Parent; - forgetLoop(L); + forgetLoop(L->getOutermostLoop()); } void ScalarEvolution::forgetValue(Value *V) { @@ -7944,7 +8318,7 @@ void ScalarEvolution::forgetLoopDispositions(const Loop *L) { /// the relevant loop exiting block using getExact(ExitingBlock, SE). const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, - SCEVUnionPredicate *Preds) const { + SmallVector *Preds) const { // If any exits were not computable, the loop is not computable. if (!isComplete() || ExitNotTaken.empty()) return SE->getCouldNotCompute(); @@ -7966,14 +8340,18 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, Ops.push_back(BECount); - if (Preds && !ENT.hasAlwaysTruePredicate()) - Preds->add(ENT.Predicate.get()); + if (Preds) + for (auto *P : ENT.Predicates) + Preds->push_back(P); assert((Preds || ENT.hasAlwaysTruePredicate()) && "Predicate should be always true!"); } - return SE->getUMinFromMismatchedTypes(Ops); + // If an earlier exit exits on the first iteration (exit count zero), then + // a later poison exit count should not propagate into the result. This are + // exactly the semantics provided by umin_seq. + return SE->getUMinFromMismatchedTypes(Ops, /* Sequential */ true); } /// Get the exact not taken count for this loop exit. @@ -8082,16 +8460,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( [&](const EdgeExitInfo &EEI) { BasicBlock *ExitBB = EEI.first; const ExitLimit &EL = EEI.second; - if (EL.Predicates.empty()) - return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken, - nullptr); - - std::unique_ptr Predicate(new SCEVUnionPredicate); - for (auto *Pred : EL.Predicates) - Predicate->add(Pred); - return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken, - std::move(Predicate)); + EL.Predicates); }); assert((isa(ConstantMax) || isa(ConstantMax)) && @@ -8385,11 +8755,6 @@ ScalarEvolution::computeExitLimitFromCondFromBinOp( BECount = getUMinFromMismatchedTypes( EL0.ExactNotTaken, EL1.ExactNotTaken, /*Sequential=*/!isa(ExitCond)); - - // If EL0.ExactNotTaken was zero and ExitCond was a short-circuit form, - // it should have been simplified to zero (see the condition (3) above) - assert(!isa(ExitCond) || !EL0.ExactNotTaken->isZero() || - BECount->isZero()); } if (EL0.MaxNotTaken == getCouldNotCompute()) MaxBECount = EL1.MaxNotTaken; @@ -8470,7 +8835,8 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, ControlsExit && loopHasNoAbnormalExits(L) && loopIsFiniteByAssumption(L); // Simplify the operands before analyzing them. (void)SimplifyICmpOperands(Pred, LHS, RHS, /*Depth=*/0, - ControllingFiniteLoop); + (EnableFiniteLoopControl ? ControllingFiniteLoop + : false)); // If we have a comparison of a chrec against a constant, try to use value // ranges to answer this query. @@ -8683,7 +9049,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( // and the kind of shift should be match the kind of shift we peeled // off, if any. - (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut); + (!PostShiftOpCode || *PostShiftOpCode == OpCodeOut); }; PHINode *PN; @@ -8871,13 +9237,6 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, Operands[i] = C; } - if (CmpInst *CI = dyn_cast(I)) - return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], - Operands[1], DL, TLI); - if (LoadInst *LI = dyn_cast(I)) { - if (!LI->isVolatile()) - return ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL); - } return ConstantFoldInstOperands(I, Operands, DL, TLI); } @@ -9121,58 +9480,42 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { } case scAddExpr: { const SCEVAddExpr *SA = cast(V); - if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { - if (PointerType *PTy = dyn_cast(C->getType())) { - unsigned AS = PTy->getAddressSpace(); - Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); - C = ConstantExpr::getBitCast(C, DestPtrTy); + Constant *C = nullptr; + for (const SCEV *Op : SA->operands()) { + Constant *OpC = BuildConstantFromSCEV(Op); + if (!OpC) + return nullptr; + if (!C) { + C = OpC; + continue; } - for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { - Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); - if (!C2) - return nullptr; - - // First pointer! - if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { - unsigned AS = C2->getType()->getPointerAddressSpace(); - std::swap(C, C2); - Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); - // The offsets have been converted to bytes. We can add bytes to an - // i8* by GEP with the byte count in the first index. - C = ConstantExpr::getBitCast(C, DestPtrTy); - } - - // Don't bother trying to sum two pointers. We probably can't - // statically compute a load that results from it anyway. - if (C2->getType()->isPointerTy()) - return nullptr; - - if (C->getType()->isPointerTy()) { - C = ConstantExpr::getGetElementPtr(Type::getInt8Ty(C->getContext()), - C, C2); - } else { - C = ConstantExpr::getAdd(C, C2); - } + assert(!C->getType()->isPointerTy() && + "Can only have one pointer, and it must be last"); + if (auto *PT = dyn_cast(OpC->getType())) { + // The offsets have been converted to bytes. We can add bytes to an + // i8* by GEP with the byte count in the first index. + Type *DestPtrTy = + Type::getInt8PtrTy(PT->getContext(), PT->getAddressSpace()); + OpC = ConstantExpr::getBitCast(OpC, DestPtrTy); + C = ConstantExpr::getGetElementPtr(Type::getInt8Ty(C->getContext()), + OpC, C); + } else { + C = ConstantExpr::getAdd(C, OpC); } - return C; } - return nullptr; + return C; } case scMulExpr: { const SCEVMulExpr *SM = cast(V); - if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { - // Don't bother with pointers at all. - if (C->getType()->isPointerTy()) + Constant *C = nullptr; + for (const SCEV *Op : SM->operands()) { + assert(!Op->getType()->isPointerTy() && "Can't multiply pointers"); + Constant *OpC = BuildConstantFromSCEV(Op); + if (!OpC) return nullptr; - for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { - Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); - if (!C2 || C2->getType()->isPointerTy()) - return nullptr; - C = ConstantExpr::getMul(C, C2); - } - return C; + C = C ? ConstantExpr::getMul(C, OpC) : OpC; } - return nullptr; + return C; } case scUDivExpr: { const SCEVUDivExpr *SU = cast(V); @@ -9297,15 +9640,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (MadeImprovement) { Constant *C = nullptr; const DataLayout &DL = getDataLayout(); - if (const CmpInst *CI = dyn_cast(I)) - C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], - Operands[1], DL, &TLI); - else if (const LoadInst *Load = dyn_cast(I)) { - if (!Load->isVolatile()) - C = ConstantFoldLoadFromConstPtr(Operands[0], Load->getType(), - DL); - } else - C = ConstantFoldInstOperands(I, Operands, DL, &TLI); + C = ConstantFoldInstOperands(I, Operands, DL, &TLI); if (!C) return V; return getSCEV(C); } @@ -9535,15 +9870,15 @@ GetQuadraticEquation(const SCEVAddRecExpr *AddRec) { /// (b) if neither X nor Y exist, return None, /// (c) if exactly one of X and Y exists, return that value. static Optional MinOptional(Optional X, Optional Y) { - if (X.hasValue() && Y.hasValue()) { + if (X && Y) { unsigned W = std::max(X->getBitWidth(), Y->getBitWidth()); - APInt XW = X->sextOrSelf(W); - APInt YW = Y->sextOrSelf(W); + APInt XW = X->sext(W); + APInt YW = Y->sext(W); return XW.slt(YW) ? *X : *Y; } - if (!X.hasValue() && !Y.hasValue()) + if (!X && !Y) return None; - return X.hasValue() ? *X : *Y; + return X ? *X : *Y; } /// Helper function to truncate an optional APInt to a given BitWidth. @@ -9558,7 +9893,7 @@ static Optional MinOptional(Optional X, Optional Y) { /// equation are BW+1 bits wide (to avoid truncation when converting from /// the addrec to the equation). static Optional TruncIfPossible(Optional X, unsigned BitWidth) { - if (!X.hasValue()) + if (!X) return None; unsigned W = X->getBitWidth(); if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth)) @@ -9585,13 +9920,13 @@ SolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { APInt A, B, C, M; unsigned BitWidth; auto T = GetQuadraticEquation(AddRec); - if (!T.hasValue()) + if (!T) return None; std::tie(A, B, C, M, BitWidth) = *T; LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n"); Optional X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1); - if (!X.hasValue()) + if (!X) return None; ConstantInt *CX = ConstantInt::get(SE.getContext(), *X); @@ -9627,7 +9962,7 @@ SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec, APInt A, B, C, M; unsigned BitWidth; auto T = GetQuadraticEquation(AddRec); - if (!T.hasValue()) + if (!T) return None; // Be careful about the return value: there can be two reasons for not @@ -9672,7 +10007,7 @@ SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec, // If SolveQuadraticEquationWrap returns None, it means that there can // be a solution, but the function failed to find it. We cannot treat it // as "no solution". - if (!SO.hasValue() || !UO.hasValue()) + if (!SO || !UO) return { None, false }; // Check the smaller value first to see if it leaves the range. @@ -9690,8 +10025,8 @@ SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec, std::tie(A, B, C, M, BitWidth) = *T; // Lower bound is inclusive, subtract 1 to represent the exiting value. - APInt Lower = Range.getLower().sextOrSelf(A.getBitWidth()) - 1; - APInt Upper = Range.getUpper().sextOrSelf(A.getBitWidth()); + APInt Lower = Range.getLower().sext(A.getBitWidth()) - 1; + APInt Upper = Range.getUpper().sext(A.getBitWidth()); auto SL = SolveForBoundary(Lower); auto SU = SolveForBoundary(Upper); // If any of the solutions was unknown, no meaninigful conclusions can @@ -9776,7 +10111,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // value at this index. When solving for "X*X != 5", for example, we // should not accept a root of 2. if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) { - const auto *R = cast(getConstant(S.getValue())); + const auto *R = cast(getConstant(*S)); return ExitLimit(R, R, false, Predicates); } return getCouldNotCompute(); @@ -10296,7 +10631,7 @@ ScalarEvolution::getMonotonicPredicateType(const SCEVAddRecExpr *LHS, auto ResultSwapped = getMonotonicPredicateTypeImpl(LHS, ICmpInst::getSwappedPredicate(Pred)); - assert(ResultSwapped.hasValue() && "should be able to analyze both!"); + assert(ResultSwapped && "should be able to analyze both!"); assert(ResultSwapped.getValue() != Result.getValue() && "monotonicity should flip as we flip the predicate"); } @@ -10479,17 +10814,27 @@ bool ScalarEvolution::isKnownPredicateViaConstantRanges( return false; if (Pred == CmpInst::ICMP_NE) { - if (CheckRanges(getSignedRange(LHS), getSignedRange(RHS)) || - CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS))) + auto SL = getSignedRange(LHS); + auto SR = getSignedRange(RHS); + if (CheckRanges(SL, SR)) + return true; + auto UL = getUnsignedRange(LHS); + auto UR = getUnsignedRange(RHS); + if (CheckRanges(UL, UR)) return true; auto *Diff = getMinusSCEV(LHS, RHS); return !isa(Diff) && isKnownNonZero(Diff); } - if (CmpInst::isSigned(Pred)) - return CheckRanges(getSignedRange(LHS), getSignedRange(RHS)); + if (CmpInst::isSigned(Pred)) { + auto SL = getSignedRange(LHS); + auto SR = getSignedRange(RHS); + return CheckRanges(SL, SR); + } - return CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)); + auto UL = getUnsignedRange(LHS); + auto UR = getUnsignedRange(RHS); + return CheckRanges(UL, UR); } bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, @@ -12596,7 +12941,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range, if (isQuadratic()) { if (auto S = SolveQuadraticAddRecRange(this, Range, SE)) - return SE.getConstant(S.getValue()); + return SE.getConstant(*S); } return SE.getCouldNotCompute(); @@ -12636,6 +12981,15 @@ bool ScalarEvolution::containsUndefs(const SCEV *S) const { }); } +// Return true when S contains a value that is a nullptr. +bool ScalarEvolution::containsErasedValue(const SCEV *S) const { + return SCEVExprContains(S, [](const SCEV *S) { + if (const auto *SU = dyn_cast(S)) + return SU->getValue() == nullptr; + return false; + }); +} + /// Return the size of an element read or written by Inst. const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) { Type *Ty; @@ -12820,12 +13174,13 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; - SCEVUnionPredicate Pred; - auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred); + SmallVector Preds; + auto PBT = SE->getPredicatedBackedgeTakenCount(L, Preds); if (!isa(PBT)) { OS << "Predicated backedge-taken count is " << *PBT << "\n"; OS << " Predicates:\n"; - Pred.print(OS, 4); + for (auto *P : Preds) + P->print(OS, 4); } else { OS << "Unpredictable predicated backedge-taken count. "; } @@ -13202,12 +13557,10 @@ void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) { auto ExprIt = ExprValueMap.find(S); if (ExprIt != ExprValueMap.end()) { - for (auto &ValueAndOffset : ExprIt->second) { - if (ValueAndOffset.second == nullptr) { - auto ValueIt = ValueExprMap.find_as(ValueAndOffset.first); - if (ValueIt != ValueExprMap.end()) - ValueExprMap.erase(ValueIt); - } + for (Value *V : ExprIt->second) { + auto ValueIt = ValueExprMap.find_as(V); + if (ValueIt != ValueExprMap.end()) + ValueExprMap.erase(ValueIt); } ExprValueMap.erase(ExprIt); } @@ -13258,6 +13611,43 @@ ScalarEvolution::getUsedLoops(const SCEV *S, SCEVTraversal(F).visitAll(S); } +void ScalarEvolution::getReachableBlocks( + SmallPtrSetImpl &Reachable, Function &F) { + SmallVector Worklist; + Worklist.push_back(&F.getEntryBlock()); + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + if (!Reachable.insert(BB).second) + continue; + + Value *Cond; + BasicBlock *TrueBB, *FalseBB; + if (match(BB->getTerminator(), m_Br(m_Value(Cond), m_BasicBlock(TrueBB), + m_BasicBlock(FalseBB)))) { + if (auto *C = dyn_cast(Cond)) { + Worklist.push_back(C->isOne() ? TrueBB : FalseBB); + continue; + } + + if (auto *Cmp = dyn_cast(Cond)) { + const SCEV *L = getSCEV(Cmp->getOperand(0)); + const SCEV *R = getSCEV(Cmp->getOperand(1)); + if (isKnownPredicateViaConstantRanges(Cmp->getPredicate(), L, R)) { + Worklist.push_back(TrueBB); + continue; + } + if (isKnownPredicateViaConstantRanges(Cmp->getInversePredicate(), L, + R)) { + Worklist.push_back(FalseBB); + continue; + } + } + } + + append_range(Worklist, successors(BB)); + } +} + void ScalarEvolution::verify() const { ScalarEvolution &SE = *const_cast(this); ScalarEvolution SE2(F, TLI, AC, DT, LI); @@ -13282,13 +13672,44 @@ void ScalarEvolution::verify() const { }; SCEVMapper SCM(SE2); + SmallPtrSet ReachableBlocks; + SE2.getReachableBlocks(ReachableBlocks, F); + + auto GetDelta = [&](const SCEV *Old, const SCEV *New) -> const SCEV * { + if (containsUndefs(Old) || containsUndefs(New)) { + // SCEV treats "undef" as an unknown but consistent value (i.e. it does + // not propagate undef aggressively). This means we can (and do) fail + // verification in cases where a transform makes a value go from "undef" + // to "undef+1" (say). The transform is fine, since in both cases the + // result is "undef", but SCEV thinks the value increased by 1. + return nullptr; + } + + // Unless VerifySCEVStrict is set, we only compare constant deltas. + const SCEV *Delta = SE2.getMinusSCEV(Old, New); + if (!VerifySCEVStrict && !isa(Delta)) + return nullptr; + + return Delta; + }; while (!LoopStack.empty()) { auto *L = LoopStack.pop_back_val(); llvm::append_range(LoopStack, *L); - auto *CurBECount = SCM.visit( - const_cast(this)->getBackedgeTakenCount(L)); + // Only verify BECounts in reachable loops. For an unreachable loop, + // any BECount is legal. + if (!ReachableBlocks.contains(L->getHeader())) + continue; + + // Only verify cached BECounts. Computing new BECounts may change the + // results of subsequent SCEV uses. + auto It = BackedgeTakenCounts.find(L); + if (It == BackedgeTakenCounts.end()) + continue; + + auto *CurBECount = + SCM.visit(It->second.getExact(L, const_cast(this))); auto *NewBECount = SE2.getBackedgeTakenCount(L); if (CurBECount == SE2.getCouldNotCompute() || @@ -13301,16 +13722,6 @@ void ScalarEvolution::verify() const { continue; } - if (containsUndefs(CurBECount) || containsUndefs(NewBECount)) { - // SCEV treats "undef" as an unknown but consistent value (i.e. it does - // not propagate undef aggressively). This means we can (and do) fail - // verification in cases where a transform makes the trip count of a loop - // go from "undef" to "undef+1" (say). The transform is fine, since in - // both cases the loop iterates "undef" times, but SCEV thinks we - // increased the trip count of the loop by 1 incorrectly. - continue; - } - if (SE.getTypeSizeInBits(CurBECount->getType()) > SE.getTypeSizeInBits(NewBECount->getType())) NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType()); @@ -13318,10 +13729,8 @@ void ScalarEvolution::verify() const { SE.getTypeSizeInBits(NewBECount->getType())) CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType()); - const SCEV *Delta = SE2.getMinusSCEV(CurBECount, NewBECount); - - // Unless VerifySCEVStrict is set, we only compare constant deltas. - if ((VerifySCEVStrict || isa(Delta)) && !Delta->isZero()) { + const SCEV *Delta = GetDelta(CurBECount, NewBECount); + if (Delta && !Delta->isZero()) { dbgs() << "Trip Count for " << *L << " Changed!\n"; dbgs() << "Old: " << *CurBECount << "\n"; dbgs() << "New: " << *NewBECount << "\n"; @@ -13335,10 +13744,8 @@ void ScalarEvolution::verify() const { SmallVector Worklist(LI.begin(), LI.end()); while (!Worklist.empty()) { Loop *L = Worklist.pop_back_val(); - if (ValidLoops.contains(L)) - continue; - ValidLoops.insert(L); - Worklist.append(L->begin(), L->end()); + if (ValidLoops.insert(L).second) + Worklist.append(L->begin(), L->end()); } for (auto &KV : ValueExprMap) { #ifndef NDEBUG @@ -13351,27 +13758,38 @@ void ScalarEvolution::verify() const { // Check that the value is also part of the reverse map. auto It = ExprValueMap.find(KV.second); - if (It == ExprValueMap.end() || !It->second.contains({KV.first, nullptr})) { + if (It == ExprValueMap.end() || !It->second.contains(KV.first)) { dbgs() << "Value " << *KV.first << " is in ValueExprMap but not in ExprValueMap\n"; std::abort(); } - } - for (const auto &KV : ExprValueMap) { - for (const auto &ValueAndOffset : KV.second) { - if (ValueAndOffset.second != nullptr) + if (auto *I = dyn_cast(&*KV.first)) { + if (!ReachableBlocks.contains(I->getParent())) continue; + const SCEV *OldSCEV = SCM.visit(KV.second); + const SCEV *NewSCEV = SE2.getSCEV(I); + const SCEV *Delta = GetDelta(OldSCEV, NewSCEV); + if (Delta && !Delta->isZero()) { + dbgs() << "SCEV for value " << *I << " changed!\n" + << "Old: " << *OldSCEV << "\n" + << "New: " << *NewSCEV << "\n" + << "Delta: " << *Delta << "\n"; + std::abort(); + } + } + } - auto It = ValueExprMap.find_as(ValueAndOffset.first); + for (const auto &KV : ExprValueMap) { + for (Value *V : KV.second) { + auto It = ValueExprMap.find_as(V); if (It == ValueExprMap.end()) { - dbgs() << "Value " << *ValueAndOffset.first + dbgs() << "Value " << *V << " is in ExprValueMap but not in ValueExprMap\n"; std::abort(); } if (It->second != KV.first) { - dbgs() << "Value " << *ValueAndOffset.first - << " mapped to " << *It->second + dbgs() << "Value " << *V << " mapped to " << *It->second << " rather than " << *KV.first << "\n"; std::abort(); } @@ -13537,18 +13955,25 @@ void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS, const SCEV *RHS) { + return getComparePredicate(ICmpInst::ICMP_EQ, LHS, RHS); +} + +const SCEVPredicate * +ScalarEvolution::getComparePredicate(const ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { FoldingSetNodeID ID; assert(LHS->getType() == RHS->getType() && "Type mismatch between LHS and RHS"); // Unique this node based on the arguments - ID.AddInteger(SCEVPredicate::P_Equal); + ID.AddInteger(SCEVPredicate::P_Compare); + ID.AddInteger(Pred); ID.AddPointer(LHS); ID.AddPointer(RHS); void *IP = nullptr; if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP)) return S; - SCEVEqualPredicate *Eq = new (SCEVAllocator) - SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS); + SCEVComparePredicate *Eq = new (SCEVAllocator) + SCEVComparePredicate(ID.Intern(SCEVAllocator), Pred, LHS, RHS); UniquePreds.InsertNode(Eq, IP); return Eq; } @@ -13585,18 +14010,24 @@ public: /// \p NewPreds such that the result will be an AddRecExpr. static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE, SmallPtrSetImpl *NewPreds, - SCEVUnionPredicate *Pred) { + const SCEVPredicate *Pred) { SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred); return Rewriter.visit(S); } const SCEV *visitUnknown(const SCEVUnknown *Expr) { if (Pred) { - auto ExprPreds = Pred->getPredicatesForExpr(Expr); - for (auto *Pred : ExprPreds) - if (const auto *IPred = dyn_cast(Pred)) - if (IPred->getLHS() == Expr) - return IPred->getRHS(); + if (auto *U = dyn_cast(Pred)) { + for (auto *Pred : U->getPredicates()) + if (const auto *IPred = dyn_cast(Pred)) + if (IPred->getLHS() == Expr && + IPred->getPredicate() == ICmpInst::ICMP_EQ) + return IPred->getRHS(); + } else if (const auto *IPred = dyn_cast(Pred)) { + if (IPred->getLHS() == Expr && + IPred->getPredicate() == ICmpInst::ICMP_EQ) + return IPred->getRHS(); + } } return convertToAddRecWithPreds(Expr); } @@ -13636,7 +14067,7 @@ public: private: explicit SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE, SmallPtrSetImpl *NewPreds, - SCEVUnionPredicate *Pred) + const SCEVPredicate *Pred) : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {} bool addOverflowAssumption(const SCEVPredicate *P) { @@ -13670,8 +14101,7 @@ private: for (auto *P : PredicatedRewrite->second){ // Wrap predicates from outer loops are not supported. if (auto *WP = dyn_cast(P)) { - auto *AR = cast(WP->getExpr()); - if (L != AR->getLoop()) + if (L != WP->getExpr()->getLoop()) return Expr; } if (!addOverflowAssumption(P)) @@ -13681,14 +14111,15 @@ private: } SmallPtrSetImpl *NewPreds; - SCEVUnionPredicate *Pred; + const SCEVPredicate *Pred; const Loop *L; }; } // end anonymous namespace -const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L, - SCEVUnionPredicate &Preds) { +const SCEV * +ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L, + const SCEVPredicate &Preds) { return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds); } @@ -13715,28 +14146,36 @@ SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID, SCEVPredicateKind Kind) : FastID(ID), Kind(Kind) {} -SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID, - const SCEV *LHS, const SCEV *RHS) - : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) { +SCEVComparePredicate::SCEVComparePredicate(const FoldingSetNodeIDRef ID, + const ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) + : SCEVPredicate(ID, P_Compare), Pred(Pred), LHS(LHS), RHS(RHS) { assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match"); assert(LHS != RHS && "LHS and RHS are the same SCEV"); } -bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const { - const auto *Op = dyn_cast(N); +bool SCEVComparePredicate::implies(const SCEVPredicate *N) const { + const auto *Op = dyn_cast(N); if (!Op) return false; + if (Pred != ICmpInst::ICMP_EQ) + return false; + return Op->LHS == LHS && Op->RHS == RHS; } -bool SCEVEqualPredicate::isAlwaysTrue() const { return false; } +bool SCEVComparePredicate::isAlwaysTrue() const { return false; } -const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; } +void SCEVComparePredicate::print(raw_ostream &OS, unsigned Depth) const { + if (Pred == ICmpInst::ICMP_EQ) + OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n"; + else + OS.indent(Depth) << "Compare predicate: " << *LHS + << " " << CmpInst::getPredicateName(Pred) << ") " + << *RHS << "\n"; -void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const { - OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n"; } SCEVWrapPredicate::SCEVWrapPredicate(const FoldingSetNodeIDRef ID, @@ -13744,7 +14183,7 @@ SCEVWrapPredicate::SCEVWrapPredicate(const FoldingSetNodeIDRef ID, IncrementWrapFlags Flags) : SCEVPredicate(ID, P_Wrap), AR(AR), Flags(Flags) {} -const SCEV *SCEVWrapPredicate::getExpr() const { return AR; } +const SCEVAddRecExpr *SCEVWrapPredicate::getExpr() const { return AR; } bool SCEVWrapPredicate::implies(const SCEVPredicate *N) const { const auto *Op = dyn_cast(N); @@ -13793,38 +14232,26 @@ SCEVWrapPredicate::getImpliedFlags(const SCEVAddRecExpr *AR, } /// Union predicates don't get cached so create a dummy set ID for it. -SCEVUnionPredicate::SCEVUnionPredicate() - : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {} +SCEVUnionPredicate::SCEVUnionPredicate(ArrayRef Preds) + : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) { + for (auto *P : Preds) + add(P); +} bool SCEVUnionPredicate::isAlwaysTrue() const { return all_of(Preds, [](const SCEVPredicate *I) { return I->isAlwaysTrue(); }); } -ArrayRef -SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) { - auto I = SCEVToPreds.find(Expr); - if (I == SCEVToPreds.end()) - return ArrayRef(); - return I->second; -} - bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const { if (const auto *Set = dyn_cast(N)) return all_of(Set->Preds, [this](const SCEVPredicate *I) { return this->implies(I); }); - auto ScevPredsIt = SCEVToPreds.find(N->getExpr()); - if (ScevPredsIt == SCEVToPreds.end()) - return false; - auto &SCEVPreds = ScevPredsIt->second; - - return any_of(SCEVPreds, + return any_of(Preds, [N](const SCEVPredicate *I) { return I->implies(N); }); } -const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; } - void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const { for (auto Pred : Preds) Pred->print(OS, Depth); @@ -13837,20 +14264,15 @@ void SCEVUnionPredicate::add(const SCEVPredicate *N) { return; } - if (implies(N)) - return; - - const SCEV *Key = N->getExpr(); - assert(Key && "Only SCEVUnionPredicate doesn't have an " - " associated expression!"); - - SCEVToPreds[Key].push_back(N); Preds.push_back(N); } PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L) - : SE(SE), L(L) {} + : SE(SE), L(L) { + SmallVector Empty; + Preds = std::make_unique(Empty); +} void ScalarEvolution::registerUser(const SCEV *User, ArrayRef Ops) { @@ -13875,7 +14297,7 @@ const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { if (Entry.second) Expr = Entry.second; - const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, Preds); + const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, *Preds); Entry = {Generation, NewSCEV}; return NewSCEV; @@ -13883,22 +14305,27 @@ const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() { if (!BackedgeCount) { - SCEVUnionPredicate BackedgePred; - BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred); - addPredicate(BackedgePred); + SmallVector Preds; + BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, Preds); + for (auto *P : Preds) + addPredicate(*P); } return BackedgeCount; } void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { - if (Preds.implies(&Pred)) + if (Preds->implies(&Pred)) return; - Preds.add(&Pred); + + auto &OldPreds = Preds->getPredicates(); + SmallVector NewPreds(OldPreds.begin(), OldPreds.end()); + NewPreds.push_back(&Pred); + Preds = std::make_unique(NewPreds); updateGeneration(); } -const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const { - return Preds; +const SCEVPredicate &PredicatedScalarEvolution::getPredicate() const { + return *Preds; } void PredicatedScalarEvolution::updateGeneration() { @@ -13906,7 +14333,7 @@ void PredicatedScalarEvolution::updateGeneration() { if (++Generation == 0) { for (auto &II : RewriteMap) { const SCEV *Rewritten = II.second.second; - II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, Preds)}; + II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, *Preds)}; } } } @@ -13952,17 +14379,17 @@ const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) { return nullptr; for (auto *P : NewPreds) - Preds.add(P); + addPredicate(*P); - updateGeneration(); RewriteMap[SE.getSCEV(V)] = {Generation, New}; return New; } PredicatedScalarEvolution::PredicatedScalarEvolution( const PredicatedScalarEvolution &Init) - : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds), - Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) { + : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), + Preds(std::make_unique(Init.Preds->getPredicates())), + Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) { for (auto I : Init.FlagsMap) FlagsMap.insert(I); } @@ -14243,12 +14670,23 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { ExprsToRewrite.push_back(LHS); } }; - // First, collect conditions from dominating branches. Starting at the loop + + SmallVector> Terms; + // First, collect information from assumptions dominating the loop. + for (auto &AssumeVH : AC.assumptions()) { + if (!AssumeVH) + continue; + auto *AssumeI = cast(AssumeVH); + if (!DT.dominates(AssumeI, L->getHeader())) + continue; + Terms.emplace_back(AssumeI->getOperand(0), true); + } + + // Second, collect conditions from dominating branches. Starting at the loop // predecessor, climb up the predecessor chain, as long as there are // predecessors that can be found that have unique successors leading to the // original header. // TODO: share this logic with isLoopEntryGuardedByCond. - SmallVector> Terms; for (std::pair Pair( L->getLoopPredecessor(), L->getHeader()); Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { @@ -14280,8 +14718,9 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { if (auto *Cmp = dyn_cast(Cond)) { auto Predicate = EnterIfTrue ? Cmp->getPredicate() : Cmp->getInversePredicate(); - CollectCondition(Predicate, getSCEV(Cmp->getOperand(0)), - getSCEV(Cmp->getOperand(1)), RewriteMap); + const auto *LHS = getSCEV(Cmp->getOperand(0)); + const auto *RHS = getSCEV(Cmp->getOperand(1)); + CollectCondition(Predicate, LHS, RHS, RewriteMap); continue; } @@ -14294,18 +14733,6 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { } } - // Also collect information from assumptions dominating the loop. - for (auto &AssumeVH : AC.assumptions()) { - if (!AssumeVH) - continue; - auto *AssumeI = cast(AssumeVH); - auto *Cmp = dyn_cast(AssumeI->getOperand(0)); - if (!Cmp || !DT.dominates(AssumeI, L->getHeader())) - continue; - CollectCondition(Cmp->getPredicate(), getSCEV(Cmp->getOperand(0)), - getSCEV(Cmp->getOperand(1)), RewriteMap); - } - if (RewriteMap.empty()) return Expr; diff --git a/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index f4fa159d1ec7..3d47dc6b30df 100644 --- a/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/InitializePasses.h" using namespace llvm; diff --git a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp index 64e908bdf342..0619569bf816 100644 --- a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp @@ -15,9 +15,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/IR/Constants.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ErrorHandling.h" #include #include diff --git a/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp index 209ae66ca53e..22dff5efec5c 100644 --- a/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -13,6 +13,7 @@ #include "llvm/Analysis/ScalarEvolutionNormalization.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" using namespace llvm; diff --git a/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/llvm/lib/Analysis/ScopedNoAliasAA.cpp index e847bf8f0f6b..f510991b4463 100644 --- a/llvm/lib/Analysis/ScopedNoAliasAA.cpp +++ b/llvm/lib/Analysis/ScopedNoAliasAA.cpp @@ -36,7 +36,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp index 9056cc01484d..52e8566aca3c 100644 --- a/llvm/lib/Analysis/StackLifetime.cpp +++ b/llvm/lib/Analysis/StackLifetime.cpp @@ -19,17 +19,12 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" #include -#include #include using namespace llvm; @@ -75,7 +70,7 @@ static const AllocaInst *findMatchingAlloca(const IntrinsicInst &II, auto AllocaSizeInBits = AI->getAllocationSizeInBits(DL); if (!AllocaSizeInBits) return nullptr; - int64_t AllocaSize = AllocaSizeInBits.getValue() / 8; + int64_t AllocaSize = *AllocaSizeInBits / 8; auto *Size = dyn_cast(II.getArgOperand(0)); if (!Size) diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 54f3605ee033..94b646ab7c06 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/StackLifetime.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DerivedTypes.h" @@ -384,9 +383,9 @@ bool StackSafetyLocalAnalysis::isSafeAccess(const Use &U, AllocaInst *AI, const SCEV *Max = SE.getMinusSCEV(ToDiffTy(SE.getConstant(Size.getUpper())), ToDiffTy(AccessSize)); return SE.evaluatePredicateAt(ICmpInst::Predicate::ICMP_SGE, Diff, Min, I) - .getValueOr(false) && + .value_or(false) && SE.evaluatePredicateAt(ICmpInst::Predicate::ICMP_SLE, Diff, Max, I) - .getValueOr(false); + .value_or(false); } /// The function analyzes all local uses of Ptr (alloca or argument) and diff --git a/llvm/lib/Analysis/StratifiedSets.h b/llvm/lib/Analysis/StratifiedSets.h index 60ea2451b0ef..883ebd24efdc 100644 --- a/llvm/lib/Analysis/StratifiedSets.h +++ b/llvm/lib/Analysis/StratifiedSets.h @@ -340,10 +340,10 @@ public: return StratifiedSets(std::move(Values), std::move(StratLinks)); } - bool has(const T &Elem) const { return get(Elem).hasValue(); } + bool has(const T &Elem) const { return get(Elem).has_value(); } bool add(const T &Main) { - if (get(Main).hasValue()) + if (get(Main)) return false; auto NewIndex = getNewUnlinkedIndex(); @@ -560,7 +560,7 @@ private: Optional indexOf(const T &Val) { auto MaybeVal = get(Val); - if (!MaybeVal.hasValue()) + if (!MaybeVal) return None; auto *Info = *MaybeVal; auto &Link = linksAt(Info->Index); diff --git a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp index ff833b55bbce..3446e50a4344 100644 --- a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp @@ -116,18 +116,16 @@ // around from the latch. // //===----------------------------------------------------------------------===// + #include "llvm/Analysis/SyncDependenceAnalysis.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include -#include -#include #define DEBUG_TYPE "sync-dependence" @@ -257,7 +255,7 @@ SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT, [&](const BasicBlock &BB) { LoopPO.appendBlock(BB); }); } -SyncDependenceAnalysis::~SyncDependenceAnalysis() {} +SyncDependenceAnalysis::~SyncDependenceAnalysis() = default; // divergence propagator for reducible CFGs struct DivergencePropagator { diff --git a/llvm/lib/Analysis/SyntheticCountsUtils.cpp b/llvm/lib/Analysis/SyntheticCountsUtils.cpp index a3edce76cd88..29c41fda5e28 100644 --- a/llvm/lib/Analysis/SyntheticCountsUtils.cpp +++ b/llvm/lib/Analysis/SyntheticCountsUtils.cpp @@ -14,9 +14,6 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/ModuleSummaryIndex.h" using namespace llvm; @@ -57,7 +54,7 @@ void SyntheticCountsUtils::propagateFromSCC( if (!OptProfCount) continue; auto Callee = CGT::edge_dest(E.second); - AdditionalCounts[Callee] += OptProfCount.getValue(); + AdditionalCounts[Callee] += *OptProfCount; } // Update the counts for the nodes in the SCC. @@ -70,7 +67,7 @@ void SyntheticCountsUtils::propagateFromSCC( if (!OptProfCount) continue; auto Callee = CGT::edge_dest(E.second); - AddCount(Callee, OptProfCount.getValue()); + AddCount(Callee, *OptProfCount); } } diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp index 26bc63983b4e..203858c1cf06 100644 --- a/llvm/lib/Analysis/TFUtils.cpp +++ b/llvm/lib/Analysis/TFUtils.cpp @@ -82,6 +82,33 @@ void serialize(const Message &SE, std::string *OutStr) { *OutStr = SE.SerializeAsString(); } } + +int getTFTypeIndex(TensorType TType) { + switch (TType) { + case TensorType::Double: + return TF_DOUBLE; + case TensorType::Float: + return TF_FLOAT; + case TensorType::Int8: + return TF_INT8; + case TensorType::UInt8: + return TF_UINT8; + case TensorType::Int16: + return TF_INT16; + case TensorType::UInt16: + return TF_UINT16; + case TensorType::Int32: + return TF_INT32; + case TensorType::UInt32: + return TF_UINT32; + case TensorType::Int64: + return TF_INT64; + case TensorType::UInt64: + return TF_UINT64; + case TensorType::Invalid: + llvm_unreachable("Unknown tensor type"); + } +} } // namespace namespace llvm { @@ -105,116 +132,6 @@ private: std::vector Output; }; -size_t TensorSpec::getElementByteSize() const { - return TF_DataTypeSize(static_cast(TypeIndex)); -} - -TensorSpec::TensorSpec(const std::string &Name, int Port, int TypeIndex, - const std::vector &Shape) - : Name(Name), Port(Port), TypeIndex(TypeIndex), Shape(Shape), - ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1, - std::multiplies())) {} - -Optional getTensorSpecFromJSON(LLVMContext &Ctx, - const json::Value &Value) { - auto EmitError = [&](const llvm::Twine &Message) -> Optional { - std::string S; - llvm::raw_string_ostream OS(S); - OS << Value; - Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S); - return None; - }; - // FIXME: accept a Path as a parameter, and use it for error reporting. - json::Path::Root Root("tensor_spec"); - json::ObjectMapper Mapper(Value, Root); - if (!Mapper) - return EmitError("Value is not a dict"); - - std::string TensorName; - int TensorPort = -1; - std::string TensorType; - std::vector TensorShape; - - if (!Mapper.map("name", TensorName)) - return EmitError("'name' property not present or not a string"); - if (!Mapper.map("type", TensorType)) - return EmitError("'type' property not present or not a string"); - if (!Mapper.map("port", TensorPort)) - return EmitError("'port' property not present or not an int"); - if (!Mapper.map>("shape", TensorShape)) - return EmitError("'shape' property not present or not an int array"); - -#define PARSE_TYPE(T, E) \ - if (TensorType == #T) \ - return TensorSpec::createSpec(TensorName, TensorShape, TensorPort); - TFUTILS_SUPPORTED_TYPES(PARSE_TYPE) -#undef PARSE_TYPE - return None; -} - -Optional> -loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName, - StringRef ModelPath, StringRef SpecFileOverride) { - SmallVector OutputSpecsPath; - StringRef FileName = SpecFileOverride; - if (FileName.empty()) { - llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json"); - FileName = {OutputSpecsPath.data(), OutputSpecsPath.size()}; - } - - auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName); - if (!BufferOrError) { - Ctx.emitError("Error opening output specs file: " + FileName + " : " + - BufferOrError.getError().message()); - return None; - } - auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer()); - if (!ParsedJSONValues) { - Ctx.emitError("Could not parse specs file: " + FileName); - return None; - } - auto ValuesArray = ParsedJSONValues->getAsArray(); - if (!ValuesArray) { - Ctx.emitError("Expected an array of {tensor_spec:, " - "logging_name:} dictionaries"); - return None; - } - std::vector Ret; - for (const auto &Value : *ValuesArray) - if (const auto *Obj = Value.getAsObject()) - if (const auto *SpecPart = Obj->get("tensor_spec")) - if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart)) - if (auto LoggingName = Obj->getString("logging_name")) { - if (!TensorSpec->isElementType() && - !TensorSpec->isElementType() && - !TensorSpec->isElementType()) { - Ctx.emitError( - "Only int64, int32, and float tensors are supported. " - "Found unsupported type for tensor named " + - TensorSpec->name()); - return None; - } - Ret.push_back({*TensorSpec, LoggingName->str()}); - } - - if (ValuesArray->size() != Ret.size()) { - Ctx.emitError( - "Unable to parse output spec. It should be a json file containing an " - "array of dictionaries. Each dictionary must have a 'tensor_spec' key, " - "with a json object describing a TensorSpec; and a 'logging_name' key, " - "which is a string to use as name when logging this tensor in the " - "training log."); - return None; - } - if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) { - Ctx.emitError("The first output spec must describe the decision tensor, " - "and must have the logging_name " + - StringRef(ExpectedDecisionName)); - return None; - } - return Ret; -} - class TFModelEvaluatorImpl { public: TFModelEvaluatorImpl(StringRef SavedModelPath, @@ -383,16 +300,29 @@ TFModelEvaluatorImpl::TFModelEvaluatorImpl( errs() << TF_Message(Status.get()); invalidate(); } + size_t NrSupported = 0; for (size_t I = 0; I < InputSpecs.size(); ++I) { auto &InputSpec = InputSpecs[I]; InputFeed[I] = { TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()), InputSpec.port()}; + if (!InputFeed[I].oper) { + continue; + } + if (NrSupported++ != I) { + errs() + << "Unsupported features must be placed at the end of the InputSpecs"; + invalidate(); + return; + } if (!checkReportAndInvalidate(InputFeed[I], InputSpec)) return; - initInput(I, static_cast(InputSpec.typeIndex()), + initInput(I, static_cast(getTFTypeIndex(InputSpec.type())), InputSpec.shape()); } + InputFeed.resize(NrSupported); + Input.resize(NrSupported); + for (size_t I = 0; I < OutputSpecsSize; ++I) { auto OutputSpec = GetOutputSpecs(I); OutputFeed[I] = { @@ -470,7 +400,9 @@ void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type, } void *TFModelEvaluator::getUntypedInput(size_t Index) { - return TF_TensorData(Impl->getInput()[Index]); + if (Index < Impl->getInput().size()) + return TF_TensorData(Impl->getInput()[Index]); + return nullptr; } TFModelEvaluator::EvaluationResult::EvaluationResult( @@ -495,13 +427,6 @@ TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const { return TF_TensorData(Impl->getOutput()[Index]); } -#define TFUTILS_GETDATATYPE_IMPL(T, E) \ - template <> int TensorSpec::getDataType() { return E; } - -TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL) - -#undef TFUTILS_GETDATATYPE_IMPL - TFModelEvaluator::EvaluationResult::~EvaluationResult() {} TFModelEvaluator::~TFModelEvaluator() {} diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 02923c2c7eb1..8ebdb65e88dc 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -659,12 +659,12 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_stpncpy); } - if (T.isPS4()) { - // PS4 does have memalign. + if (T.isPS()) { + // PS4/PS5 do have memalign. TLI.setAvailable(LibFunc_memalign); - // PS4 does not have new/delete with "unsigned int" size parameter; - // it only has the "unsigned long" versions. + // PS4/PS5 do not have new/delete with "unsigned int" size parameter; + // they only have the "unsigned long" versions. TLI.setUnavailable(LibFunc_ZdaPvj); TLI.setUnavailable(LibFunc_ZdaPvjSt11align_val_t); TLI.setUnavailable(LibFunc_ZdlPvj); @@ -1110,9 +1110,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_system: return (NumParams == 1 && FTy.getParamType(0)->isPointerTy()); case LibFunc___kmpc_alloc_shared: + return NumParams == 1 && FTy.getReturnType()->isPointerTy(); case LibFunc_malloc: case LibFunc_vec_malloc: - return (NumParams == 1 && FTy.getReturnType()->isPointerTy()); + return NumParams == 1 && FTy.getParamType(0)->isIntegerTy(SizeTBits) && + FTy.getReturnType()->isPointerTy(); case LibFunc_memcmp: return NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) && FTy.getParamType(0)->isPointerTy() && diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 25e9dee98e13..66f61961d01b 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -11,7 +11,6 @@ #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -21,7 +20,6 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" #include using namespace llvm; @@ -33,6 +31,11 @@ static cl::opt EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns.")); +static cl::opt CacheLineSize( + "cache-line-size", cl::init(0), cl::Hidden, + cl::desc("Use this to override the target cache line size when " + "specified by the user.")); + namespace { /// No-op implementation of the TTI interface using the utility base /// classes. @@ -179,7 +182,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, TargetTransformInfo::TargetTransformInfo(const DataLayout &DL) : TTIImpl(new Model(NoTTIImpl(DL))) {} -TargetTransformInfo::~TargetTransformInfo() {} +TargetTransformInfo::~TargetTransformInfo() = default; TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg) : TTIImpl(std::move(Arg.TTIImpl)) {} @@ -350,7 +353,8 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, Scale, AddrSpace, I); } -bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { +bool TargetTransformInfo::isLSRCostLess(const LSRCost &C1, + const LSRCost &C2) const { return TTIImpl->isLSRCostLess(C1, C2); } @@ -398,11 +402,22 @@ bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const { return TTIImpl->isLegalNTLoad(DataType, Alignment); } +bool TargetTransformInfo::isLegalBroadcastLoad(Type *ElementTy, + ElementCount NumElements) const { + return TTIImpl->isLegalBroadcastLoad(ElementTy, NumElements); +} + bool TargetTransformInfo::isLegalMaskedGather(Type *DataType, Align Alignment) const { return TTIImpl->isLegalMaskedGather(DataType, Alignment); } +bool TargetTransformInfo::isLegalAltInstr( + VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, + const SmallBitVector &OpcodeMask) const { + return TTIImpl->isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask); +} + bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType, Align Alignment) const { return TTIImpl->isLegalMaskedScatter(DataType, Alignment); @@ -470,7 +485,7 @@ bool TargetTransformInfo::isTypeLegal(Type *Ty) const { return TTIImpl->isTypeLegal(Ty); } -InstructionCost TargetTransformInfo::getRegUsageForType(Type *Ty) const { +unsigned TargetTransformInfo::getRegUsageForType(Type *Ty) const { return TTIImpl->getRegUsageForType(Ty); } @@ -507,6 +522,10 @@ bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const { return TTIImpl->supportsEfficientVectorElementLoadStore(); } +bool TargetTransformInfo::supportsTailCalls() const { + return TTIImpl->supportsTailCalls(); +} + bool TargetTransformInfo::enableAggressiveInterleaving( bool LoopHasReductions) const { return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); @@ -623,8 +642,9 @@ Optional TargetTransformInfo::getVScaleForTuning() const { return TTIImpl->getVScaleForTuning(); } -bool TargetTransformInfo::shouldMaximizeVectorBandwidth() const { - return TTIImpl->shouldMaximizeVectorBandwidth(); +bool TargetTransformInfo::shouldMaximizeVectorBandwidth( + TargetTransformInfo::RegisterKind K) const { + return TTIImpl->shouldMaximizeVectorBandwidth(K); } ElementCount TargetTransformInfo::getMinimumVF(unsigned ElemWidth, @@ -637,6 +657,11 @@ unsigned TargetTransformInfo::getMaximumVF(unsigned ElemWidth, return TTIImpl->getMaximumVF(ElemWidth, Opcode); } +unsigned TargetTransformInfo::getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, + Type *ScalarValTy) const { + return TTIImpl->getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy); +} + bool TargetTransformInfo::shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { return TTIImpl->shouldConsiderAddressTypePromotion( @@ -644,7 +669,8 @@ bool TargetTransformInfo::shouldConsiderAddressTypePromotion( } unsigned TargetTransformInfo::getCacheLineSize() const { - return TTIImpl->getCacheLineSize(); + return CacheLineSize.getNumOccurrences() > 0 ? CacheLineSize + : TTIImpl->getCacheLineSize(); } llvm::Optional @@ -742,12 +768,11 @@ InstructionCost TargetTransformInfo::getArithmeticInstrCost( return Cost; } -InstructionCost TargetTransformInfo::getShuffleCost(ShuffleKind Kind, - VectorType *Ty, - ArrayRef Mask, - int Index, - VectorType *SubTp) const { - InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp); +InstructionCost TargetTransformInfo::getShuffleCost( + ShuffleKind Kind, VectorType *Ty, ArrayRef Mask, int Index, + VectorType *SubTp, ArrayRef Args) const { + InstructionCost Cost = + TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp, Args); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -973,18 +998,21 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( Type *TargetTransformInfo::getMemcpyLoopLoweringType( LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, - unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const { + unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, + Optional AtomicElementSize) const { return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace, - DestAddrSpace, SrcAlign, DestAlign); + DestAddrSpace, SrcAlign, DestAlign, + AtomicElementSize); } void TargetTransformInfo::getMemcpyLoopResidualLoweringType( SmallVectorImpl &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, - unsigned SrcAlign, unsigned DestAlign) const { - TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, - SrcAddrSpace, DestAddrSpace, - SrcAlign, DestAlign); + unsigned SrcAlign, unsigned DestAlign, + Optional AtomicCpySize) const { + TTIImpl->getMemcpyLoopResidualLoweringType( + OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign, + DestAlign, AtomicCpySize); } bool TargetTransformInfo::areInlineCompatible(const Function *Caller, @@ -1155,7 +1183,7 @@ TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { } } -TargetTransformInfo::Concept::~Concept() {} +TargetTransformInfo::Concept::~Concept() = default; TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/llvm/lib/Analysis/TensorSpec.cpp b/llvm/lib/Analysis/TensorSpec.cpp new file mode 100644 index 000000000000..f6a5882371a7 --- /dev/null +++ b/llvm/lib/Analysis/TensorSpec.cpp @@ -0,0 +1,144 @@ +//===- TensorSpec.cpp - tensor type abstraction ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation file for the abstraction of a tensor type, and JSON loading +// utils. +// +//===----------------------------------------------------------------------===// +#include "llvm/Config/config.h" + +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/TensorSpec.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; + +namespace llvm { + +#define TFUTILS_GETDATATYPE_IMPL(T, E) \ + template <> TensorType TensorSpec::getDataType() { return TensorType::E; } + +SUPPORTED_TENSOR_TYPES(TFUTILS_GETDATATYPE_IMPL) + +#undef TFUTILS_GETDATATYPE_IMPL + +TensorSpec::TensorSpec(const std::string &Name, int Port, TensorType Type, + size_t ElementSize, const std::vector &Shape) + : Name(Name), Port(Port), Type(Type), Shape(Shape), + ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1, + std::multiplies())), + ElementSize(ElementSize) {} + +Optional getTensorSpecFromJSON(LLVMContext &Ctx, + const json::Value &Value) { + auto EmitError = [&](const llvm::Twine &Message) -> Optional { + std::string S; + llvm::raw_string_ostream OS(S); + OS << Value; + Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S); + return None; + }; + // FIXME: accept a Path as a parameter, and use it for error reporting. + json::Path::Root Root("tensor_spec"); + json::ObjectMapper Mapper(Value, Root); + if (!Mapper) + return EmitError("Value is not a dict"); + + std::string TensorName; + int TensorPort = -1; + std::string TensorType; + std::vector TensorShape; + + if (!Mapper.map("name", TensorName)) + return EmitError("'name' property not present or not a string"); + if (!Mapper.map("type", TensorType)) + return EmitError("'type' property not present or not a string"); + if (!Mapper.map("port", TensorPort)) + return EmitError("'port' property not present or not an int"); + if (!Mapper.map>("shape", TensorShape)) + return EmitError("'shape' property not present or not an int array"); + +#define PARSE_TYPE(T, E) \ + if (TensorType == #T) \ + return TensorSpec::createSpec(TensorName, TensorShape, TensorPort); + SUPPORTED_TENSOR_TYPES(PARSE_TYPE) +#undef PARSE_TYPE + return None; +} + +Optional> +loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName, + StringRef ModelPath, StringRef SpecFileOverride) { + SmallVector OutputSpecsPath; + StringRef FileName = SpecFileOverride; + if (FileName.empty()) { + llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json"); + FileName = {OutputSpecsPath.data(), OutputSpecsPath.size()}; + } + + auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName); + if (!BufferOrError) { + Ctx.emitError("Error opening output specs file: " + FileName + " : " + + BufferOrError.getError().message()); + return None; + } + auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer()); + if (!ParsedJSONValues) { + Ctx.emitError("Could not parse specs file: " + FileName); + return None; + } + auto ValuesArray = ParsedJSONValues->getAsArray(); + if (!ValuesArray) { + Ctx.emitError("Expected an array of {tensor_spec:, " + "logging_name:} dictionaries"); + return None; + } + std::vector Ret; + for (const auto &Value : *ValuesArray) + if (const auto *Obj = Value.getAsObject()) + if (const auto *SpecPart = Obj->get("tensor_spec")) + if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart)) + if (auto LoggingName = Obj->getString("logging_name")) { + if (!TensorSpec->isElementType() && + !TensorSpec->isElementType() && + !TensorSpec->isElementType()) { + Ctx.emitError( + "Only int64, int32, and float tensors are supported. " + "Found unsupported type for tensor named " + + TensorSpec->name()); + return None; + } + Ret.push_back({*TensorSpec, LoggingName->str()}); + } + + if (ValuesArray->size() != Ret.size()) { + Ctx.emitError( + "Unable to parse output spec. It should be a json file containing an " + "array of dictionaries. Each dictionary must have a 'tensor_spec' key, " + "with a json object describing a TensorSpec; and a 'logging_name' key, " + "which is a string to use as name when logging this tensor in the " + "training log."); + return None; + } + if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) { + Ctx.emitError("The first output spec must describe the decision tensor, " + "and must have the logging_name " + + StringRef(ExpectedDecisionName)); + return None; + } + return Ret; +} +} // namespace llvm diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 627a78a2a2fd..9bcbe4a4cc1e 100644 --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -112,7 +112,6 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/Analysis/TypeMetadataUtils.cpp b/llvm/lib/Analysis/TypeMetadataUtils.cpp index 80051fd5f7c1..201e64770766 100644 --- a/llvm/lib/Analysis/TypeMetadataUtils.cpp +++ b/llvm/lib/Analysis/TypeMetadataUtils.cpp @@ -16,7 +16,6 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" using namespace llvm; diff --git a/llvm/lib/Analysis/VFABIDemangling.cpp b/llvm/lib/Analysis/VFABIDemangling.cpp index 7573975a3dd3..e6d297877b62 100644 --- a/llvm/lib/Analysis/VFABIDemangling.cpp +++ b/llvm/lib/Analysis/VFABIDemangling.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallString.h" #include "llvm/Analysis/VectorUtils.h" using namespace llvm; diff --git a/llvm/lib/Analysis/ValueLatticeUtils.cpp b/llvm/lib/Analysis/ValueLatticeUtils.cpp index 53638c351f72..2bcb4d5b0e6b 100644 --- a/llvm/lib/Analysis/ValueLatticeUtils.cpp +++ b/llvm/lib/Analysis/ValueLatticeUtils.cpp @@ -29,12 +29,13 @@ bool llvm::canTrackGlobalVariableInterprocedurally(GlobalVariable *GV) { !GV->hasDefinitiveInitializer()) return false; return all_of(GV->users(), [&](User *U) { - // Currently all users of a global variable have to be none-volatile loads - // or stores and the global cannot be stored itself. + // Currently all users of a global variable have to be non-volatile loads + // or stores of the global type, and the global cannot be stored itself. if (auto *Store = dyn_cast(U)) - return Store->getValueOperand() != GV && !Store->isVolatile(); + return Store->getValueOperand() != GV && !Store->isVolatile() && + Store->getValueOperand()->getType() == GV->getValueType(); if (auto *Load = dyn_cast(U)) - return !Load->isVolatile(); + return !Load->isVolatile() && Load->getType() == GV->getValueType(); return false; }); diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index c14bdb8bc262..05d5e47bb8d7 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -70,10 +71,8 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include -#include #include #include -#include #include using namespace llvm; @@ -86,13 +85,12 @@ static cl::opt DomConditionsMaxUses("dom-conditions-max-uses", // According to the LangRef, branching on a poison condition is absolutely // immediate full UB. However, historically we haven't implemented that -// consistently as we have an important transformation (non-trivial unswitch) -// which introduces instances of branch on poison/undef to otherwise well -// defined programs. This flag exists to let us test optimization benefit -// of exploiting the specified behavior (in combination with enabling the -// unswitch fix.) +// consistently as we had an important transformation (non-trivial unswitch) +// which introduced instances of branch on poison/undef to otherwise well +// defined programs. This issue has since been fixed, but the flag is +// temporarily retained to easily diagnose potential regressions. static cl::opt BranchOnPoisonAsUB("branch-on-poison-as-ub", - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); /// Returns the bitwidth of the given scalar or pointer type. For vector types, @@ -275,13 +273,39 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, assert(LHS->getType()->isIntOrIntVectorTy() && "LHS and RHS should be integers"); // Look for an inverted mask: (X & ~M) op (Y & M). - Value *M; - if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) && - match(RHS, m_c_And(m_Specific(M), m_Value()))) + { + Value *M; + if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) && + match(RHS, m_c_And(m_Specific(M), m_Value()))) + return true; + if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) && + match(LHS, m_c_And(m_Specific(M), m_Value()))) + return true; + } + + // X op (Y & ~X) + if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) || + match(LHS, m_c_And(m_Not(m_Specific(RHS)), m_Value()))) return true; - if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) && - match(LHS, m_c_And(m_Specific(M), m_Value()))) + + // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern + // for constant Y. + Value *Y; + if (match(RHS, + m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) || + match(LHS, m_c_Xor(m_c_And(m_Specific(RHS), m_Value(Y)), m_Deferred(Y)))) return true; + + // Look for: (A & B) op ~(A | B) + { + Value *A, *B; + if (match(LHS, m_And(m_Value(A), m_Value(B))) && + match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) + return true; + if (match(RHS, m_And(m_Value(A), m_Value(B))) && + match(LHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) + return true; + } IntegerType *IT = cast(LHS->getType()->getScalarType()); KnownBits LHSKnown(IT->getBitWidth()); KnownBits RHSKnown(IT->getBitWidth()); @@ -451,7 +475,12 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, } } - Known = KnownBits::mul(Known, Known2); + bool SelfMultiply = Op0 == Op1; + // TODO: SelfMultiply can be poison, but not undef. + if (SelfMultiply) + SelfMultiply &= + isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1); + Known = KnownBits::mul(Known, Known2, SelfMultiply); // Only make use of no-wrap flags if we failed to compute the sign bit // directly. This matters if the multiplication always overflows, in @@ -656,7 +685,8 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, if (V->getType()->isPointerTy()) { if (RetainedKnowledge RK = getKnowledgeValidInContext( V, {Attribute::Alignment}, Q.CxtI, Q.DT, Q.AC)) { - Known.Zero.setLowBits(Log2_64(RK.ArgValue)); + if (isPowerOf2_64(RK.ArgValue)) + Known.Zero.setLowBits(Log2_64(RK.ArgValue)); } } @@ -1041,7 +1071,7 @@ static void computeKnownBitsFromShiftOperator( // bits. This check is sunk down as far as possible to avoid the expensive // call to isKnownNonZero if the cheaper checks above fail. if (ShiftAmt == 0) { - if (!ShifterOperandIsNonZero.hasValue()) + if (!ShifterOperandIsNonZero) ShifterOperandIsNonZero = isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q); if (*ShifterOperandIsNonZero) @@ -1726,8 +1756,7 @@ static void computeKnownBitsFromOperator(const Operator *I, break; } - unsigned FirstZeroHighBit = - 32 - countLeadingZeros(VScaleMax.getValue()); + unsigned FirstZeroHighBit = 32 - countLeadingZeros(*VScaleMax); if (FirstZeroHighBit < BitWidth) Known.Zero.setBitsFrom(FirstZeroHighBit); @@ -2007,6 +2036,63 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts, assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); } +/// Try to detect a recurrence that the value of the induction variable is +/// always a power of two (or zero). +static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero, + unsigned Depth, Query &Q) { + BinaryOperator *BO = nullptr; + Value *Start = nullptr, *Step = nullptr; + if (!matchSimpleRecurrence(PN, BO, Start, Step)) + return false; + + // Initial value must be a power of two. + for (const Use &U : PN->operands()) { + if (U.get() == Start) { + // Initial value comes from a different BB, need to adjust context + // instruction for analysis. + Q.CxtI = PN->getIncomingBlock(U)->getTerminator(); + if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q)) + return false; + } + } + + // Except for Mul, the induction variable must be on the left side of the + // increment expression, otherwise its value can be arbitrary. + if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step) + return false; + + Q.CxtI = BO->getParent()->getTerminator(); + switch (BO->getOpcode()) { + case Instruction::Mul: + // Power of two is closed under multiplication. + return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || + Q.IIQ.hasNoSignedWrap(BO)) && + isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q); + case Instruction::SDiv: + // Start value must not be signmask for signed division, so simply being a + // power of two is not sufficient, and it has to be a constant. + if (!match(Start, m_Power2()) || match(Start, m_SignMask())) + return false; + LLVM_FALLTHROUGH; + case Instruction::UDiv: + // Divisor must be a power of two. + // If OrZero is false, cannot guarantee induction variable is non-zero after + // division, same for Shr, unless it is exact division. + return (OrZero || Q.IIQ.isExact(BO)) && + isKnownToBeAPowerOfTwo(Step, false, Depth, Q); + case Instruction::Shl: + return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO); + case Instruction::AShr: + if (!match(Start, m_Power2()) || match(Start, m_SignMask())) + return false; + LLVM_FALLTHROUGH; + case Instruction::LShr: + return OrZero || Q.IIQ.isExact(BO); + default: + return false; + } +} + /// Return true if the given value is known to have exactly one /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer @@ -2098,6 +2184,30 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, } } + // A PHI node is power of two if all incoming values are power of two, or if + // it is an induction variable where in each step its value is a power of two. + if (const PHINode *PN = dyn_cast(V)) { + Query RecQ = Q; + + // Check if it is an induction variable and always power of two. + if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ)) + return true; + + // Recursively check all incoming values. Limit recursion to 2 levels, so + // that search complexity is limited to number of operands^2. + unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); + return llvm::all_of(PN->operands(), [&](const Use &U) { + // Value is power of 2 if it is coming from PHI node itself by induction. + if (U.get() == PN) + return true; + + // Change the context instruction to the incoming block where it is + // evaluated. + RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); + return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ); + }); + } + // An exact divide or right shift can only shift off zero bits, so the result // is a power of two only if the first operand is a power of two and not // copying a sign bit (sdiv int_min, 2). @@ -2588,6 +2698,9 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, if (isKnownNonZero(Op, Depth, Q) && isGuaranteedNotToBePoison(Op, Q.AC, Q.CxtI, Q.DT, Depth)) return true; + } else if (const auto *II = dyn_cast(V)) { + if (II->getIntrinsicID() == Intrinsic::vscale) + return true; } KnownBits Known(BitWidth); @@ -2885,6 +2998,24 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In, return CLow->sle(*CHigh); } +static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II, + const APInt *&CLow, + const APInt *&CHigh) { + assert((II->getIntrinsicID() == Intrinsic::smin || + II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax"); + + Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID()); + auto *InnerII = dyn_cast(II->getArgOperand(0)); + if (!InnerII || InnerII->getIntrinsicID() != InverseID || + !match(II->getArgOperand(1), m_APInt(CLow)) || + !match(InnerII->getArgOperand(1), m_APInt(CHigh))) + return false; + + if (II->getIntrinsicID() == Intrinsic::smin) + std::swap(CLow, CHigh); + return CLow->sle(*CHigh); +} + /// For vector constants, loop over the elements and find the constant with the /// minimum number of sign bits. Return 0 if the value is not a vector constant /// or if any element was not analyzed; otherwise, return the count for the @@ -3225,6 +3356,12 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, // Absolute value reduces number of sign bits by at most 1. return Tmp - 1; + case Intrinsic::smin: + case Intrinsic::smax: { + const APInt *CLow, *CHigh; + if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh)) + return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); + } } } } @@ -3358,9 +3495,6 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB, /// NOTE: Do not check 'nsz' here because that fast-math-flag does not guarantee /// that a value is not -0.0. It only guarantees that -0.0 may be treated /// the same as +0.0 in floating-point ops. -/// -/// NOTE: this function will need to be revisited when we support non-default -/// rounding modes! bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth) { if (auto *CFP = dyn_cast(V)) @@ -3390,9 +3524,21 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, case Intrinsic::sqrt: case Intrinsic::canonicalize: return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1); + case Intrinsic::experimental_constrained_sqrt: { + // NOTE: This rounding mode restriction may be too strict. + const auto *CI = cast(Call); + if (CI->getRoundingMode() == RoundingMode::NearestTiesToEven) + return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1); + else + return false; + } // fabs(x) != -0.0 case Intrinsic::fabs: return true; + // sitofp and uitofp turn into +0.0 for zero. + case Intrinsic::experimental_constrained_sitofp: + case Intrinsic::experimental_constrained_uitofp: + return true; } } @@ -4032,69 +4178,83 @@ bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP, return true; } +// If V refers to an initialized global constant, set Slice either to +// its initializer if the size of its elements equals ElementSize, or, +// for ElementSize == 8, to its representation as an array of unsiged +// char. Return true on success. bool llvm::getConstantDataArrayInfo(const Value *V, ConstantDataArraySlice &Slice, unsigned ElementSize, uint64_t Offset) { assert(V); - // Look through bitcast instructions and geps. - V = V->stripPointerCasts(); + // Drill down into the pointer expression V, ignoring any intervening + // casts, and determine the identity of the object it references along + // with the cumulative byte offset into it. + const GlobalVariable *GV = + dyn_cast(getUnderlyingObject(V)); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) + // Fail if V is not based on constant global object. + return false; - // If the value is a GEP instruction or constant expression, treat it as an - // offset. - if (const GEPOperator *GEP = dyn_cast(V)) { - // The GEP operator should be based on a pointer to string constant, and is - // indexing into the string constant. - if (!isGEPBasedOnPointerToString(GEP, ElementSize)) - return false; + const DataLayout &DL = GV->getParent()->getDataLayout(); + APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0); - // If the second index isn't a ConstantInt, then this is a variable index - // into the array. If this occurs, we can't say anything meaningful about - // the string. - uint64_t StartIdx = 0; - if (const ConstantInt *CI = dyn_cast(GEP->getOperand(2))) - StartIdx = CI->getZExtValue(); - else - return false; - return getConstantDataArrayInfo(GEP->getOperand(0), Slice, ElementSize, - StartIdx + Offset); - } + if (GV != V->stripAndAccumulateConstantOffsets(DL, Off, + /*AllowNonInbounds*/ true)) + // Fail if a constant offset could not be determined. + return false; - // The GEP instruction, constant or instruction, must reference a global - // variable that is a constant and is initialized. The referenced constant - // initializer is the array that we'll use for optimization. - const GlobalVariable *GV = dyn_cast(V); - if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) + uint64_t StartIdx = Off.getLimitedValue(); + if (StartIdx == UINT64_MAX) + // Fail if the constant offset is excessive. return false; - const ConstantDataArray *Array; - ArrayType *ArrayTy; + Offset += StartIdx; + + ConstantDataArray *Array = nullptr; + ArrayType *ArrayTy = nullptr; + if (GV->getInitializer()->isNullValue()) { Type *GVTy = GV->getValueType(); - if ( (ArrayTy = dyn_cast(GVTy)) ) { - // A zeroinitializer for the array; there is no ConstantDataArray. - Array = nullptr; - } else { - const DataLayout &DL = GV->getParent()->getDataLayout(); - uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedSize(); - uint64_t Length = SizeInBytes / (ElementSize / 8); - if (Length <= Offset) - return false; + uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedSize(); + uint64_t Length = SizeInBytes / (ElementSize / 8); + + Slice.Array = nullptr; + Slice.Offset = 0; + // Return an empty Slice for undersized constants to let callers + // transform even undefined library calls into simpler, well-defined + // expressions. This is preferable to making the calls although it + // prevents sanitizers from detecting such calls. + Slice.Length = Length < Offset ? 0 : Length - Offset; + return true; + } - Slice.Array = nullptr; - Slice.Offset = 0; - Slice.Length = Length - Offset; - return true; + auto *Init = const_cast(GV->getInitializer()); + if (auto *ArrayInit = dyn_cast(Init)) { + Type *InitElTy = ArrayInit->getElementType(); + if (InitElTy->isIntegerTy(ElementSize)) { + // If Init is an initializer for an array of the expected type + // and size, use it as is. + Array = ArrayInit; + ArrayTy = ArrayInit->getType(); } - } else { - // This must be a ConstantDataArray. - Array = dyn_cast(GV->getInitializer()); - if (!Array) + } + + if (!Array) { + if (ElementSize != 8) + // TODO: Handle conversions to larger integral types. return false; - ArrayTy = Array->getType(); + + // Otherwise extract the portion of the initializer starting + // at Offset as an array of bytes, and reset Offset. + Init = ReadByteArrayFromGlobal(GV, Offset); + if (!Init) + return false; + + Offset = 0; + Array = dyn_cast(Init); + ArrayTy = dyn_cast(Init->getType()); } - if (!ArrayTy->getElementType()->isIntegerTy(ElementSize)) - return false; uint64_t NumElts = ArrayTy->getArrayNumElements(); if (Offset > NumElts) @@ -4117,6 +4277,12 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, if (Slice.Array == nullptr) { if (TrimAtNul) { + // Return a nul-terminated string even for an empty Slice. This is + // safe because all existing SimplifyLibcalls callers require string + // arguments and the behavior of the functions they fold is undefined + // otherwise. Folding the calls this way is preferable to making + // the undefined library calls, even though it prevents sanitizers + // from reporting such calls. Str = StringRef(); return true; } @@ -4196,9 +4362,13 @@ static uint64_t GetStringLengthH(const Value *V, return 0; if (Slice.Array == nullptr) + // Zeroinitializer (including an empty one). return 1; - // Search for nul characters + // Search for the first nul character. Return a conservative result even + // when there is no nul. This is safe since otherwise the string function + // being folded such as strlen is undefined, and can be preferable to + // making the undefined library call. unsigned NullIndex = 0; for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) { if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0) @@ -4517,13 +4687,40 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, const Operator *Inst = dyn_cast(V); if (!Inst) return false; + return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, DT, TLI); +} + +bool llvm::isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, + const Operator *Inst, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { +#ifndef NDEBUG + if (Inst->getOpcode() != Opcode) { + // Check that the operands are actually compatible with the Opcode override. + auto hasEqualReturnAndLeadingOperandTypes = + [](const Operator *Inst, unsigned NumLeadingOperands) { + if (Inst->getNumOperands() < NumLeadingOperands) + return false; + const Type *ExpectedType = Inst->getType(); + for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp) + if (Inst->getOperand(ItOp)->getType() != ExpectedType) + return false; + return true; + }; + assert(!Instruction::isBinaryOp(Opcode) || + hasEqualReturnAndLeadingOperandTypes(Inst, 2)); + assert(!Instruction::isUnaryOp(Opcode) || + hasEqualReturnAndLeadingOperandTypes(Inst, 1)); + } +#endif for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) if (Constant *C = dyn_cast(Inst->getOperand(i))) if (C->canTrap()) return false; - switch (Inst->getOpcode()) { + switch (Opcode) { default: return true; case Instruction::UDiv: @@ -4554,7 +4751,9 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return false; } case Instruction::Load: { - const LoadInst *LI = cast(Inst); + const LoadInst *LI = dyn_cast(Inst); + if (!LI) + return false; if (mustSuppressSpeculation(*LI)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); @@ -4563,7 +4762,9 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, TLI); } case Instruction::Call: { - auto *CI = cast(Inst); + auto *CI = dyn_cast(Inst); + if (!CI) + return false; const Function *Callee = CI->getCalledFunction(); // The called function could have undefined behavior or side-effects, even @@ -4595,8 +4796,20 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, } } -bool llvm::mayBeMemoryDependent(const Instruction &I) { - return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I); +bool llvm::mayHaveNonDefUseDependency(const Instruction &I) { + if (I.mayReadOrWriteMemory()) + // Memory dependency possible + return true; + if (!isSafeToSpeculativelyExecute(&I)) + // Can't move above a maythrow call or infinite loop. Or if an + // inalloca alloca, above a stacksave call. + return true; + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + // 1) Can't reorder two inf-loop calls, even if readonly + // 2) Also can't reorder an inf-loop call below a instruction which isn't + // safe to speculative execute. (Inverse of above) + return true; + return false; } /// Convert ConstantRange OverflowResult into ValueTracking OverflowResult. @@ -4766,6 +4979,22 @@ OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { + // X - (X % ?) + // The remainder of a value can't have greater magnitude than itself, + // so the subtraction can't overflow. + + // X - (X -nuw ?) + // In the minimal case, this would simplify to "?", so there's no subtract + // at all. But if this analysis is used to peek through casts, for example, + // then determining no-overflow may allow other transforms. + + // TODO: There are other patterns like this. + // See simplifyICmpWithBinOpOnLHS() for candidates. + if (match(RHS, m_URem(m_Specific(LHS), m_Value())) || + match(RHS, m_NUWSub(m_Specific(LHS), m_Value()))) + if (isGuaranteedNotToBeUndefOrPoison(LHS, AC, CxtI, DT)) + return OverflowResult::NeverOverflows; + // Checking for conditions implied by dominating conditions may be expensive. // Limit it to usub_with_overflow calls for now. if (match(CxtI, @@ -4789,6 +5018,19 @@ OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { + // X - (X % ?) + // The remainder of a value can't have greater magnitude than itself, + // so the subtraction can't overflow. + + // X - (X -nsw ?) + // In the minimal case, this would simplify to "?", so there's no subtract + // at all. But if this analysis is used to peek through casts, for example, + // then determining no-overflow may allow other transforms. + if (match(RHS, m_SRem(m_Specific(LHS), m_Value())) || + match(RHS, m_NSWSub(m_Specific(LHS), m_Value()))) + if (isGuaranteedNotToBeUndefOrPoison(LHS, AC, CxtI, DT)) + return OverflowResult::NeverOverflows; + // If LHS and RHS each have at least two sign bits, the subtraction // cannot overflow. if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 && @@ -5100,7 +5342,9 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, } if (auto *I = dyn_cast(V)) - if (I->getMetadata(LLVMContext::MD_noundef)) + if (I->hasMetadata(LLVMContext::MD_noundef) || + I->hasMetadata(LLVMContext::MD_dereferenceable) || + I->hasMetadata(LLVMContext::MD_dereferenceable_or_null)) return true; if (programUndefinedIfUndefOrPoison(V, PoisonOnly)) @@ -5125,10 +5369,10 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, auto *TI = Dominator->getBlock()->getTerminator(); Value *Cond = nullptr; - if (auto BI = dyn_cast(TI)) { + if (auto BI = dyn_cast_or_null(TI)) { if (BI->isConditional()) Cond = BI->getCondition(); - } else if (auto SI = dyn_cast(TI)) { + } else if (auto SI = dyn_cast_or_null(TI)) { Cond = SI->getCondition(); } @@ -5763,20 +6007,6 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) return {SPF_UNKNOWN, SPNB_NA, false}; - // Z = X -nsw Y - // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0) - // (X (Z SMAX(Z, 0) - if (match(TrueVal, m_Zero()) && - match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) - return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false}; - - // Z = X -nsw Y - // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0) - // (X (Z SMIN(Z, 0) - if (match(FalseVal, m_Zero()) && - match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) - return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false}; - const APInt *C1; if (!match(CmpRHS, m_APInt(C1))) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -6576,11 +6806,38 @@ Optional llvm::isImpliedCondition(const Value *LHS, const Value *RHS, if (LHS == RHS) return LHSIsTrue; - const ICmpInst *RHSCmp = dyn_cast(RHS); - if (RHSCmp) + if (const ICmpInst *RHSCmp = dyn_cast(RHS)) return isImpliedCondition(LHS, RHSCmp->getPredicate(), RHSCmp->getOperand(0), RHSCmp->getOperand(1), DL, LHSIsTrue, Depth); + + if (Depth == MaxAnalysisRecursionDepth) + return None; + + // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2 + // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2 + const Value *RHS1, *RHS2; + if (match(RHS, m_LogicalOr(m_Value(RHS1), m_Value(RHS2)))) { + if (Optional Imp = + isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1)) + if (*Imp == true) + return true; + if (Optional Imp = + isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1)) + if (*Imp == true) + return true; + } + if (match(RHS, m_LogicalAnd(m_Value(RHS1), m_Value(RHS2)))) { + if (Optional Imp = + isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1)) + if (*Imp == false) + return false; + if (Optional Imp = + isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1)) + if (*Imp == false) + return false; + } + return None; } @@ -7072,66 +7329,25 @@ getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) { Optional llvm::isPointerOffset(const Value *Ptr1, const Value *Ptr2, const DataLayout &DL) { - Ptr1 = Ptr1->stripPointerCasts(); - Ptr2 = Ptr2->stripPointerCasts(); + APInt Offset1(DL.getIndexTypeSizeInBits(Ptr1->getType()), 0); + APInt Offset2(DL.getIndexTypeSizeInBits(Ptr2->getType()), 0); + Ptr1 = Ptr1->stripAndAccumulateConstantOffsets(DL, Offset1, true); + Ptr2 = Ptr2->stripAndAccumulateConstantOffsets(DL, Offset2, true); // Handle the trivial case first. - if (Ptr1 == Ptr2) { - return 0; - } + if (Ptr1 == Ptr2) + return Offset2.getSExtValue() - Offset1.getSExtValue(); const GEPOperator *GEP1 = dyn_cast(Ptr1); const GEPOperator *GEP2 = dyn_cast(Ptr2); - // If one pointer is a GEP see if the GEP is a constant offset from the base, - // as in "P" and "gep P, 1". - // Also do this iteratively to handle the the following case: - // Ptr_t1 = GEP Ptr1, c1 - // Ptr_t2 = GEP Ptr_t1, c2 - // Ptr2 = GEP Ptr_t2, c3 - // where we will return c1+c2+c3. - // TODO: Handle the case when both Ptr1 and Ptr2 are GEPs of some common base - // -- replace getOffsetFromBase with getOffsetAndBase, check that the bases - // are the same, and return the difference between offsets. - auto getOffsetFromBase = [&DL](const GEPOperator *GEP, - const Value *Ptr) -> Optional { - const GEPOperator *GEP_T = GEP; - int64_t OffsetVal = 0; - bool HasSameBase = false; - while (GEP_T) { - auto Offset = getOffsetFromIndex(GEP_T, 1, DL); - if (!Offset) - return None; - OffsetVal += *Offset; - auto Op0 = GEP_T->getOperand(0)->stripPointerCasts(); - if (Op0 == Ptr) { - HasSameBase = true; - break; - } - GEP_T = dyn_cast(Op0); - } - if (!HasSameBase) - return None; - return OffsetVal; - }; - - if (GEP1) { - auto Offset = getOffsetFromBase(GEP1, Ptr2); - if (Offset) - return -*Offset; - } - if (GEP2) { - auto Offset = getOffsetFromBase(GEP2, Ptr1); - if (Offset) - return Offset; - } - // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical // base. After that base, they may have some number of common (and // potentially variable) indices. After that they handle some constant // offset, which determines their offset from each other. At this point, we // handle no other case. - if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) + if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0) || + GEP1->getSourceElementType() != GEP2->getSourceElementType()) return None; // Skip any common indices and track the GEP types. @@ -7140,9 +7356,10 @@ Optional llvm::isPointerOffset(const Value *Ptr1, const Value *Ptr2, if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) break; - auto Offset1 = getOffsetFromIndex(GEP1, Idx, DL); - auto Offset2 = getOffsetFromIndex(GEP2, Idx, DL); - if (!Offset1 || !Offset2) + auto IOffset1 = getOffsetFromIndex(GEP1, Idx, DL); + auto IOffset2 = getOffsetFromIndex(GEP2, Idx, DL); + if (!IOffset1 || !IOffset2) return None; - return *Offset2 - *Offset1; + return *IOffset2 - *IOffset1 + Offset2.getSExtValue() - + Offset1.getSExtValue(); } diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 655c248907f6..f863a1ffad3a 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -40,7 +40,7 @@ static cl::opt MaxInterleaveGroupFactor( /// Return true if all of the intrinsic's arguments and return type are scalars /// for the scalar form of the intrinsic, and vectors for the vector form of the /// intrinsic (except operands that are marked as always being scalar by -/// hasVectorInstrinsicScalarOpd). +/// isVectorIntrinsicWithScalarOpAtArg). bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { switch (ID) { case Intrinsic::abs: // Begin integer bit-manipulation. @@ -89,6 +89,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::fmuladd: case Intrinsic::powi: case Intrinsic::canonicalize: + case Intrinsic::fptosi_sat: + case Intrinsic::fptoui_sat: return true; default: return false; @@ -96,8 +98,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { } /// Identifies if the vector form of the intrinsic has a scalar operand. -bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, - unsigned ScalarOpdIdx) { +bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, + unsigned ScalarOpdIdx) { switch (ID) { case Intrinsic::abs: case Intrinsic::ctlz: @@ -114,11 +116,14 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, } } -bool llvm::hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID, - unsigned ScalarOpdIdx) { +bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, + unsigned OpdIdx) { switch (ID) { + case Intrinsic::fptosi_sat: + case Intrinsic::fptoui_sat: + return OpdIdx == 0; case Intrinsic::powi: - return (ScalarOpdIdx == 1); + return OpdIdx == 1; default: return false; } @@ -496,6 +501,116 @@ bool llvm::widenShuffleMaskElts(int Scale, ArrayRef Mask, return true; } +void llvm::processShuffleMasks( + ArrayRef Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, + unsigned NumOfUsedRegs, function_ref NoInputAction, + function_ref, unsigned, unsigned)> SingleInputAction, + function_ref, unsigned, unsigned)> ManyInputsAction) { + SmallVector>> Res(NumOfDestRegs); + // Try to perform better estimation of the permutation. + // 1. Split the source/destination vectors into real registers. + // 2. Do the mask analysis to identify which real registers are + // permuted. + int Sz = Mask.size(); + unsigned SzDest = Sz / NumOfDestRegs; + unsigned SzSrc = Sz / NumOfSrcRegs; + for (unsigned I = 0; I < NumOfDestRegs; ++I) { + auto &RegMasks = Res[I]; + RegMasks.assign(NumOfSrcRegs, {}); + // Check that the values in dest registers are in the one src + // register. + for (unsigned K = 0; K < SzDest; ++K) { + int Idx = I * SzDest + K; + if (Idx == Sz) + break; + if (Mask[Idx] >= Sz || Mask[Idx] == UndefMaskElem) + continue; + int SrcRegIdx = Mask[Idx] / SzSrc; + // Add a cost of PermuteTwoSrc for each new source register permute, + // if we have more than one source registers. + if (RegMasks[SrcRegIdx].empty()) + RegMasks[SrcRegIdx].assign(SzDest, UndefMaskElem); + RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc; + } + } + // Process split mask. + for (unsigned I = 0; I < NumOfUsedRegs; ++I) { + auto &Dest = Res[I]; + int NumSrcRegs = + count_if(Dest, [](ArrayRef Mask) { return !Mask.empty(); }); + switch (NumSrcRegs) { + case 0: + // No input vectors were used! + NoInputAction(); + break; + case 1: { + // Find the only mask with at least single undef mask elem. + auto *It = + find_if(Dest, [](ArrayRef Mask) { return !Mask.empty(); }); + unsigned SrcReg = std::distance(Dest.begin(), It); + SingleInputAction(*It, SrcReg, I); + break; + } + default: { + // The first mask is a permutation of a single register. Since we have >2 + // input registers to shuffle, we merge the masks for 2 first registers + // and generate a shuffle of 2 registers rather than the reordering of the + // first register and then shuffle with the second register. Next, + // generate the shuffles of the resulting register + the remaining + // registers from the list. + auto &&CombineMasks = [](MutableArrayRef FirstMask, + ArrayRef SecondMask) { + for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) { + if (SecondMask[Idx] != UndefMaskElem) { + assert(FirstMask[Idx] == UndefMaskElem && + "Expected undefined mask element."); + FirstMask[Idx] = SecondMask[Idx] + VF; + } + } + }; + auto &&NormalizeMask = [](MutableArrayRef Mask) { + for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) { + if (Mask[Idx] != UndefMaskElem) + Mask[Idx] = Idx; + } + }; + int SecondIdx; + do { + int FirstIdx = -1; + SecondIdx = -1; + MutableArrayRef FirstMask, SecondMask; + for (unsigned I = 0; I < NumOfDestRegs; ++I) { + SmallVectorImpl &RegMask = Dest[I]; + if (RegMask.empty()) + continue; + + if (FirstIdx == SecondIdx) { + FirstIdx = I; + FirstMask = RegMask; + continue; + } + SecondIdx = I; + SecondMask = RegMask; + CombineMasks(FirstMask, SecondMask); + ManyInputsAction(FirstMask, FirstIdx, SecondIdx); + NormalizeMask(FirstMask); + RegMask.clear(); + SecondMask = FirstMask; + SecondIdx = FirstIdx; + } + if (FirstIdx != SecondIdx && SecondIdx >= 0) { + CombineMasks(SecondMask, FirstMask); + ManyInputsAction(SecondMask, SecondIdx, FirstIdx); + Dest[FirstIdx].clear(); + NormalizeMask(SecondMask); + } + } while (SecondIdx >= 0); + break; + } + } + } +} + MapVector llvm::computeMinimumValueSizes(ArrayRef Blocks, DemandedBits &DB, const TargetTransformInfo *TTI) { @@ -543,9 +658,8 @@ llvm::computeMinimumValueSizes(ArrayRef Blocks, DemandedBits &DB, Value *Val = Worklist.pop_back_val(); Value *Leader = ECs.getOrInsertLeaderValue(Val); - if (Visited.count(Val)) + if (!Visited.insert(Val).second) continue; - Visited.insert(Val); // Non-instructions terminate a chain successfully. if (!isa(Val)) @@ -1387,7 +1501,7 @@ void VFABI::getVectorVariantNames( #ifndef NDEBUG LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n"); Optional Info = VFABI::tryDemangleForVFABI(S, *(CI.getModule())); - assert(Info.hasValue() && "Invalid name for a VFABI variant."); + assert(Info && "Invalid name for a VFABI variant."); assert(CI.getModule()->getFunction(Info.getValue().VectorName) && "Vector function is missing."); #endif diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index e3bf41c9721b..30e6f8599208 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -567,7 +567,6 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(exact); KEYWORD(inbounds); KEYWORD(inrange); - KEYWORD(align); KEYWORD(addrspace); KEYWORD(section); KEYWORD(partition); @@ -576,12 +575,16 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(module); KEYWORD(asm); KEYWORD(sideeffect); - KEYWORD(alignstack); KEYWORD(inteldialect); KEYWORD(gc); KEYWORD(prefix); KEYWORD(prologue); + KEYWORD(no_sanitize_address); + KEYWORD(no_sanitize_hwaddress); + KEYWORD(no_sanitize_memtag); + KEYWORD(sanitize_address_dyninit); + KEYWORD(ccc); KEYWORD(fastcc); KEYWORD(coldcc); @@ -632,82 +635,13 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(c); KEYWORD(attributes); + KEYWORD(sync); + KEYWORD(async); - KEYWORD(alwaysinline); - KEYWORD(allocsize); - KEYWORD(argmemonly); - KEYWORD(builtin); - KEYWORD(byval); - KEYWORD(inalloca); - KEYWORD(cold); - KEYWORD(convergent); - KEYWORD(dereferenceable); - KEYWORD(dereferenceable_or_null); - KEYWORD(disable_sanitizer_instrumentation); - KEYWORD(elementtype); - KEYWORD(inaccessiblememonly); - KEYWORD(inaccessiblemem_or_argmemonly); - KEYWORD(inlinehint); - KEYWORD(inreg); - KEYWORD(jumptable); - KEYWORD(minsize); - KEYWORD(naked); - KEYWORD(nest); - KEYWORD(noalias); - KEYWORD(nobuiltin); - KEYWORD(nocallback); - KEYWORD(nocapture); - KEYWORD(noduplicate); - KEYWORD(nofree); - KEYWORD(noimplicitfloat); - KEYWORD(noinline); - KEYWORD(norecurse); - KEYWORD(nonlazybind); - KEYWORD(nomerge); - KEYWORD(nonnull); - KEYWORD(noprofile); - KEYWORD(noredzone); - KEYWORD(noreturn); - KEYWORD(nosync); - KEYWORD(nocf_check); - KEYWORD(noundef); - KEYWORD(nounwind); - KEYWORD(nosanitize_coverage); - KEYWORD(null_pointer_is_valid); - KEYWORD(optforfuzzing); - KEYWORD(optnone); - KEYWORD(optsize); - KEYWORD(preallocated); - KEYWORD(readnone); - KEYWORD(readonly); - KEYWORD(returned); - KEYWORD(returns_twice); - KEYWORD(signext); - KEYWORD(speculatable); - KEYWORD(sret); - KEYWORD(ssp); - KEYWORD(sspreq); - KEYWORD(sspstrong); - KEYWORD(strictfp); - KEYWORD(safestack); - KEYWORD(shadowcallstack); - KEYWORD(sanitize_address); - KEYWORD(sanitize_hwaddress); - KEYWORD(sanitize_memtag); - KEYWORD(sanitize_thread); - KEYWORD(sanitize_memory); - KEYWORD(speculative_load_hardening); - KEYWORD(swifterror); - KEYWORD(swiftself); - KEYWORD(swiftasync); - KEYWORD(uwtable); - KEYWORD(vscale_range); - KEYWORD(willreturn); - KEYWORD(writeonly); - KEYWORD(zeroext); - KEYWORD(immarg); - KEYWORD(byref); - KEYWORD(mustprogress); +#define GET_ATTR_NAMES +#define ATTRIBUTE_ENUM(ENUM_NAME, DISPLAY_NAME) \ + KEYWORD(DISPLAY_NAME); +#include "llvm/IR/Attributes.inc" KEYWORD(type); KEYWORD(opaque); @@ -781,7 +715,6 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(param); KEYWORD(hotness); KEYWORD(unknown); - KEYWORD(hot); KEYWORD(critical); KEYWORD(relbf); KEYWORD(variable); @@ -856,7 +789,10 @@ lltok::Kind LLLexer::LexIdentifier() { TYPEKEYWORD("token", Type::getTokenTy(Context)); if (Keyword == "ptr") { - if (Context.supportsTypedPointers()) { + // setOpaquePointers() must be called before creating any pointer types. + if (!Context.hasSetOpaquePointersValue()) { + Context.setOpaquePointers(true); + } else if (Context.supportsTypedPointers()) { Warning("ptr type is only supported in -opaque-pointers mode"); return lltok::Error; } diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 432ec151cf8a..a1cdeac2b47f 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Casting.h" @@ -47,7 +48,6 @@ #include #include #include -#include #include using namespace llvm; @@ -59,9 +59,31 @@ static std::string getTypeString(Type *T) { return Tmp.str(); } +static void setContextOpaquePointers(LLLexer &L, LLVMContext &C) { + while (true) { + lltok::Kind K = L.Lex(); + // LLLexer will set the opaque pointers option in LLVMContext if it sees an + // explicit "ptr". + if (K == lltok::star || K == lltok::Error || K == lltok::Eof || + isa_and_nonnull(L.getTyVal())) { + if (K == lltok::star) + C.setOpaquePointers(false); + return; + } + } +} + /// Run: module ::= toplevelentity* bool LLParser::Run(bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback) { + // If we haven't decided on whether or not we're using opaque pointers, do a + // quick lex over the tokens to see if we explicitly construct any typed or + // opaque pointer types. + // Don't bail out on an error so we do the same work in the parsing below + // regardless of if --opaque-pointers is set. + if (!Context.hasSetOpaquePointersValue()) + setContextOpaquePointers(OPLex, Context); + // Prime the lexer. Lex.Lex(); @@ -248,7 +270,7 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { // remangle intrinsics names as well. for (Function &F : llvm::make_early_inc_range(*M)) { if (auto Remangled = Intrinsic::remangleIntrinsicFunction(&F)) { - F.replaceAllUsesWith(Remangled.getValue()); + F.replaceAllUsesWith(*Remangled); F.eraseFromParent(); } } @@ -1081,6 +1103,45 @@ bool LLParser::parseAliasOrIFunc(const std::string &Name, LocTy NameLoc, return false; } +static bool isSanitizer(lltok::Kind Kind) { + switch (Kind) { + case lltok::kw_no_sanitize_address: + case lltok::kw_no_sanitize_hwaddress: + case lltok::kw_no_sanitize_memtag: + case lltok::kw_sanitize_address_dyninit: + return true; + default: + return false; + } +} + +bool LLParser::parseSanitizer(GlobalVariable *GV) { + using SanitizerMetadata = GlobalValue::SanitizerMetadata; + SanitizerMetadata Meta; + if (GV->hasSanitizerMetadata()) + Meta = GV->getSanitizerMetadata(); + + switch (Lex.getKind()) { + case lltok::kw_no_sanitize_address: + Meta.NoAddress = true; + break; + case lltok::kw_no_sanitize_hwaddress: + Meta.NoHWAddress = true; + break; + case lltok::kw_no_sanitize_memtag: + Meta.NoMemtag = true; + break; + case lltok::kw_sanitize_address_dyninit: + Meta.IsDynInit = true; + break; + default: + return tokError("non-sanitizer token passed to LLParser::parseSanitizer()"); + } + GV->setSanitizerMetadata(Meta); + Lex.Lex(); + return false; +} + /// parseGlobal /// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier /// OptionalVisibility OptionalDLLStorageClass @@ -1168,7 +1229,7 @@ bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc, GV->setUnnamedAddr(UnnamedAddr); if (GVal) { - if (!GVal->getType()->isOpaque() && GVal->getValueType() != Ty) + if (GVal->getType() != Ty->getPointerTo(AddrSpace)) return error( TyLoc, "forward reference and definition of global have different types"); @@ -1199,6 +1260,9 @@ bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc, } else if (Lex.getKind() == lltok::MetadataVar) { if (parseGlobalObjectMetadataAttachment(*GV)) return true; + } else if (isSanitizer(Lex.getKind())) { + if (parseSanitizer(GV)) + return true; } else { Comdat *C; if (parseOptionalComdat(Name, C)) @@ -1333,6 +1397,20 @@ bool LLParser::parseEnumAttribute(Attribute::AttrKind Attr, AttrBuilder &B, B.addDereferenceableOrNullAttr(Bytes); return false; } + case Attribute::UWTable: { + UWTableKind Kind; + if (parseOptionalUWTableKind(Kind)) + return true; + B.addUWTableAttr(Kind); + return false; + } + case Attribute::AllocKind: { + AllocFnKind Kind = AllocFnKind::Unknown; + if (parseAllocKind(Kind)) + return true; + B.addAllocKindAttr(Kind); + return false; + } default: B.addAttribute(Attr); Lex.Lex(); @@ -1996,6 +2074,56 @@ bool LLParser::parseOptionalDerefAttrBytes(lltok::Kind AttrKind, return false; } +bool LLParser::parseOptionalUWTableKind(UWTableKind &Kind) { + Lex.Lex(); + Kind = UWTableKind::Default; + if (!EatIfPresent(lltok::lparen)) + return false; + LocTy KindLoc = Lex.getLoc(); + if (Lex.getKind() == lltok::kw_sync) + Kind = UWTableKind::Sync; + else if (Lex.getKind() == lltok::kw_async) + Kind = UWTableKind::Async; + else + return error(KindLoc, "expected unwind table kind"); + Lex.Lex(); + return parseToken(lltok::rparen, "expected ')'"); +} + +bool LLParser::parseAllocKind(AllocFnKind &Kind) { + Lex.Lex(); + LocTy ParenLoc = Lex.getLoc(); + if (!EatIfPresent(lltok::lparen)) + return error(ParenLoc, "expected '('"); + LocTy KindLoc = Lex.getLoc(); + std::string Arg; + if (parseStringConstant(Arg)) + return error(KindLoc, "expected allockind value"); + for (StringRef A : llvm::split(Arg, ",")) { + if (A == "alloc") { + Kind |= AllocFnKind::Alloc; + } else if (A == "realloc") { + Kind |= AllocFnKind::Realloc; + } else if (A == "free") { + Kind |= AllocFnKind::Free; + } else if (A == "uninitialized") { + Kind |= AllocFnKind::Uninitialized; + } else if (A == "zeroed") { + Kind |= AllocFnKind::Zeroed; + } else if (A == "aligned") { + Kind |= AllocFnKind::Aligned; + } else { + return error(KindLoc, Twine("unknown allockind ") + A); + } + } + ParenLoc = Lex.getLoc(); + if (!EatIfPresent(lltok::rparen)) + return error(ParenLoc, "expected ')'"); + if (Kind == AllocFnKind::Unknown) + return error(KindLoc, "expected allockind value"); + return false; +} + /// parseOptionalCommaAlign /// ::= /// ::= ',' align 4 @@ -3344,24 +3472,8 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) { ID.Kind = ValID::t_Constant; return false; } - case lltok::kw_extractvalue: { - Lex.Lex(); - Constant *Val; - SmallVector Indices; - if (parseToken(lltok::lparen, - "expected '(' in extractvalue constantexpr") || - parseGlobalTypeAndValue(Val) || parseIndexList(Indices) || - parseToken(lltok::rparen, "expected ')' in extractvalue constantexpr")) - return true; - - if (!Val->getType()->isAggregateType()) - return error(ID.Loc, "extractvalue operand must be aggregate type"); - if (!ExtractValueInst::getIndexedType(Val->getType(), Indices)) - return error(ID.Loc, "invalid indices for extractvalue"); - ID.ConstantVal = ConstantExpr::getExtractValue(Val, Indices); - ID.Kind = ValID::t_Constant; - return false; - } + case lltok::kw_extractvalue: + return error(ID.Loc, "extractvalue constexprs are no longer supported"); case lltok::kw_insertvalue: { Lex.Lex(); Constant *Val0, *Val1; @@ -3881,11 +3993,11 @@ struct MDAPSIntField : public MDFieldImpl { }; struct MDSignedField : public MDFieldImpl { - int64_t Min; - int64_t Max; + int64_t Min = INT64_MIN; + int64_t Max = INT64_MAX; MDSignedField(int64_t Default = 0) - : ImplTy(Default), Min(INT64_MIN), Max(INT64_MAX) {} + : ImplTy(Default) {} MDSignedField(int64_t Default, int64_t Min, int64_t Max) : ImplTy(Default), Min(Min), Max(Max) {} }; @@ -4144,8 +4256,8 @@ bool LLParser::parseMDField(LocTy Loc, StringRef Name, DIFlagField &Result) { Val = DINode::getFlag(Lex.getStrVal()); if (!Val) - return tokError(Twine("invalid debug info flag flag '") + - Lex.getStrVal() + "'"); + return tokError(Twine("invalid debug info flag '") + Lex.getStrVal() + + "'"); Lex.Lex(); return false; }; @@ -4779,7 +4891,8 @@ bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) { OPTIONAL(declaration, MDField, ); \ OPTIONAL(retainedNodes, MDField, ); \ OPTIONAL(thrownTypes, MDField, ); \ - OPTIONAL(annotations, MDField, ); + OPTIONAL(annotations, MDField, ); \ + OPTIONAL(targetFuncName, MDStringField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS @@ -4798,7 +4911,8 @@ bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) { (Context, scope.Val, name.Val, linkageName.Val, file.Val, line.Val, type.Val, scopeLine.Val, containingType.Val, virtualIndex.Val, thisAdjustment.Val, flags.Val, SPFlags, unit.Val, templateParams.Val, - declaration.Val, retainedNodes.Val, thrownTypes.Val, annotations.Val)); + declaration.Val, retainedNodes.Val, thrownTypes.Val, annotations.Val, + targetFuncName.Val)); return false; } @@ -4965,7 +5079,7 @@ bool LLParser::parseDITemplateValueParameter(MDNode *&Result, bool IsDistinct) { /// declaration: !4, align: 8) bool LLParser::parseDIGlobalVariable(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ - REQUIRED(name, MDStringField, (/* AllowEmpty */ false)); \ + OPTIONAL(name, MDStringField, (/* AllowEmpty */ false)); \ OPTIONAL(scope, MDField, ); \ OPTIONAL(linkageName, MDStringField, ); \ OPTIONAL(file, MDField, ); \ @@ -5603,20 +5717,19 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) { auto FRVI = ForwardRefVals.find(FunctionName); if (FRVI != ForwardRefVals.end()) { FwdFn = FRVI->second.first; - if (!FwdFn->getType()->isOpaque()) { - if (!FwdFn->getType()->getNonOpaquePointerElementType()->isFunctionTy()) - return error(FRVI->second.second, "invalid forward reference to " - "function as global value!"); - if (FwdFn->getType() != PFT) - return error(FRVI->second.second, - "invalid forward reference to " - "function '" + - FunctionName + - "' with wrong type: " - "expected '" + - getTypeString(PFT) + "' but was '" + - getTypeString(FwdFn->getType()) + "'"); - } + if (!FwdFn->getType()->isOpaque() && + !FwdFn->getType()->getNonOpaquePointerElementType()->isFunctionTy()) + return error(FRVI->second.second, "invalid forward reference to " + "function as global value!"); + if (FwdFn->getType() != PFT) + return error(FRVI->second.second, + "invalid forward reference to " + "function '" + + FunctionName + + "' with wrong type: " + "expected '" + + getTypeString(PFT) + "' but was '" + + getTypeString(FwdFn->getType()) + "'"); ForwardRefVals.erase(FRVI); } else if ((Fn = M->getFunction(FunctionName))) { // Reject redefinitions. @@ -5631,8 +5744,8 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) { // types agree. auto I = ForwardRefValIDs.find(NumberedVals.size()); if (I != ForwardRefValIDs.end()) { - FwdFn = cast(I->second.first); - if (!FwdFn->getType()->isOpaque() && FwdFn->getType() != PFT) + FwdFn = I->second.first; + if (FwdFn->getType() != PFT) return error(NameLoc, "type of definition and forward reference of '@" + Twine(NumberedVals.size()) + "' disagree: " @@ -7322,9 +7435,9 @@ int LLParser::parseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) { PFS.getFunction().getParent()->getDataLayout().getTypeStoreSize( Cmp->getType())); - AtomicCmpXchgInst *CXI = new AtomicCmpXchgInst( - Ptr, Cmp, New, Alignment.getValueOr(DefaultAlignment), SuccessOrdering, - FailureOrdering, SSID); + AtomicCmpXchgInst *CXI = + new AtomicCmpXchgInst(Ptr, Cmp, New, Alignment.value_or(DefaultAlignment), + SuccessOrdering, FailureOrdering, SSID); CXI->setVolatile(isVolatile); CXI->setWeak(isWeak); @@ -7390,10 +7503,12 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { if (Operation == AtomicRMWInst::Xchg) { if (!Val->getType()->isIntegerTy() && - !Val->getType()->isFloatingPointTy()) { - return error(ValLoc, - "atomicrmw " + AtomicRMWInst::getOperationName(Operation) + - " operand must be an integer or floating point type"); + !Val->getType()->isFloatingPointTy() && + !Val->getType()->isPointerTy()) { + return error( + ValLoc, + "atomicrmw " + AtomicRMWInst::getOperationName(Operation) + + " operand must be an integer, floating point, or pointer type"); } } else if (IsFP) { if (!Val->getType()->isFloatingPointTy()) { @@ -7409,7 +7524,9 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { } } - unsigned Size = Val->getType()->getPrimitiveSizeInBits(); + unsigned Size = + PFS.getFunction().getParent()->getDataLayout().getTypeStoreSizeInBits( + Val->getType()); if (Size < 8 || (Size & (Size - 1))) return error(ValLoc, "atomicrmw operand must be power-of-two byte-sized" " integer"); @@ -7418,7 +7535,7 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { Val->getType())); AtomicRMWInst *RMWI = new AtomicRMWInst(Operation, Ptr, Val, - Alignment.getValueOr(DefaultAlignment), Ordering, SSID); + Alignment.value_or(DefaultAlignment), Ordering, SSID); RMWI->setVolatile(isVolatile); Inst = RMWI; return AteExtraComma ? InstExtraComma : InstNormal; diff --git a/llvm/lib/AsmParser/Parser.cpp b/llvm/lib/AsmParser/Parser.cpp index 156fbbe71adb..95b9079f0f9c 100644 --- a/llvm/lib/AsmParser/Parser.cpp +++ b/llvm/lib/AsmParser/Parser.cpp @@ -11,13 +11,11 @@ //===----------------------------------------------------------------------===// #include "llvm/AsmParser/Parser.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/AsmParser/LLParser.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" -#include #include using namespace llvm; diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp index 0d28d93c93c0..1613e7e42a0a 100644 --- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp +++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp @@ -106,8 +106,7 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) { return false; if (!verifyIntegerEntry(ArgsMap, ".offset", true)) return false; - if (!verifyScalarEntry(ArgsMap, ".value_kind", true, - msgpack::Type::String, + if (!verifyScalarEntry(ArgsMap, ".value_kind", true, msgpack::Type::String, [](msgpack::DocNode &SNode) { return StringSwitch(SNode.getString()) .Case("by_value", true) @@ -133,6 +132,7 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) { .Case("hidden_none", true) .Case("hidden_printf_buffer", true) .Case("hidden_hostcall_buffer", true) + .Case("hidden_heap_v1", true) .Case("hidden_default_queue", true) .Case("hidden_completion_action", true) .Case("hidden_multigrid_sync_arg", true) diff --git a/llvm/lib/BinaryFormat/COFF.cpp b/llvm/lib/BinaryFormat/COFF.cpp new file mode 100644 index 000000000000..8fbee0218b79 --- /dev/null +++ b/llvm/lib/BinaryFormat/COFF.cpp @@ -0,0 +1,57 @@ +//===- llvm/BinaryFormat/COFF.cpp - The COFF format -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" + +// Maximum offsets for different string table entry encodings. +enum : unsigned { Max7DecimalOffset = 9999999U }; +enum : uint64_t { MaxBase64Offset = 0xFFFFFFFFFULL }; // 64^6, including 0 + +// Encode a string table entry offset in base 64, padded to 6 chars, and +// prefixed with a double slash: '//AAAAAA', '//AAAAAB', ... +// Buffer must be at least 8 bytes large. No terminating null appended. +static void encodeBase64StringEntry(char *Buffer, uint64_t Value) { + assert(Value > Max7DecimalOffset && Value <= MaxBase64Offset && + "Illegal section name encoding for value"); + + static const char Alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + + Buffer[0] = '/'; + Buffer[1] = '/'; + + char *Ptr = Buffer + 7; + for (unsigned i = 0; i < 6; ++i) { + unsigned Rem = Value % 64; + Value /= 64; + *(Ptr--) = Alphabet[Rem]; + } +} + +bool llvm::COFF::encodeSectionName(char *Out, uint64_t Offset) { + if (Offset <= Max7DecimalOffset) { + // Offsets of 7 digits or less are encoded in ASCII. + SmallVector Buffer; + Twine('/').concat(Twine(Offset)).toVector(Buffer); + assert(Buffer.size() <= COFF::NameSize && Buffer.size() >= 2); + std::memcpy(Out, Buffer.data(), Buffer.size()); + return true; + } + + if (Offset <= MaxBase64Offset) { + // Starting with 10,000,000, offsets are encoded as base64. + encodeBase64StringEntry(Out, Offset); + return true; + } + + // The offset is too large to be encoded. + return false; +} diff --git a/llvm/lib/BinaryFormat/Magic.cpp b/llvm/lib/BinaryFormat/Magic.cpp index 044e4840cb3b..d45195fb95c5 100644 --- a/llvm/lib/BinaryFormat/Magic.cpp +++ b/llvm/lib/BinaryFormat/Magic.cpp @@ -74,6 +74,11 @@ file_magic llvm::identify_magic(StringRef Magic) { return file_magic::goff_object; break; + case 0x10: + if (startswith(Magic, "\x10\xFF\x10\xAD")) + return file_magic::offload_binary; + break; + case 0xDE: // 0x0B17C0DE = BC wraper if (startswith(Magic, "\xDE\xC0\x17\x0B")) return file_magic::bitcode; @@ -185,6 +190,10 @@ file_magic llvm::identify_magic(StringRef Magic) { case 0x84: // Alpha 64-bit case 0x66: // MPS R4000 Windows case 0x50: // mc68K + if (startswith(Magic, "\x50\xed\x55\xba")) + return file_magic::cuda_fatbinary; + LLVM_FALLTHROUGH; + case 0x4c: // 80386 Windows case 0xc4: // ARMNT Windows if (Magic[1] == 0x01) @@ -221,6 +230,11 @@ file_magic llvm::identify_magic(StringRef Magic) { if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:")) return file_magic::tapi_file; break; + + case 'D': // DirectX container file - DXBC + if (startswith(Magic, "DXBC")) + return file_magic::dxcontainer_object; + break; default: break; diff --git a/llvm/lib/BinaryFormat/Wasm.cpp b/llvm/lib/BinaryFormat/Wasm.cpp index 55efe31f2669..babeb12e49ef 100644 --- a/llvm/lib/BinaryFormat/Wasm.cpp +++ b/llvm/lib/BinaryFormat/Wasm.cpp @@ -8,7 +8,7 @@ #include "llvm/BinaryFormat/Wasm.h" -std::string llvm::wasm::toString(wasm::WasmSymbolType Type) { +llvm::StringRef llvm::wasm::toString(wasm::WasmSymbolType Type) { switch (Type) { case wasm::WASM_SYMBOL_TYPE_FUNCTION: return "WASM_SYMBOL_TYPE_FUNCTION"; @@ -26,7 +26,7 @@ std::string llvm::wasm::toString(wasm::WasmSymbolType Type) { llvm_unreachable("unknown symbol type"); } -std::string llvm::wasm::relocTypetoString(uint32_t Type) { +llvm::StringRef llvm::wasm::relocTypetoString(uint32_t Type) { switch (Type) { #define WASM_RELOC(NAME, VALUE) \ case VALUE: \ @@ -38,6 +38,31 @@ std::string llvm::wasm::relocTypetoString(uint32_t Type) { } } +llvm::StringRef llvm::wasm::sectionTypeToString(uint32_t Type) { +#define ECase(X) \ + case wasm::WASM_SEC_##X: \ + return #X; + switch (Type) { + ECase(CUSTOM); + ECase(TYPE); + ECase(IMPORT); + ECase(FUNCTION); + ECase(TABLE); + ECase(MEMORY); + ECase(GLOBAL); + ECase(EXPORT); + ECase(START); + ECase(ELEM); + ECase(CODE); + ECase(DATA); + ECase(DATACOUNT); + ECase(TAG); + default: + llvm_unreachable("unknown section type"); + } +#undef ECase +} + bool llvm::wasm::relocTypeHasAddend(uint32_t Type) { switch (Type) { case R_WASM_MEMORY_ADDR_LEB: diff --git a/llvm/lib/Bitcode/Reader/BitReader.cpp b/llvm/lib/Bitcode/Reader/BitReader.cpp index 5ac893aef14e..da2cf0770ec5 100644 --- a/llvm/lib/Bitcode/Reader/BitReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitReader.cpp @@ -12,7 +12,6 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include #include diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp index ffef35299981..1d16211c65bf 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -267,6 +267,7 @@ static Optional GetCodeName(unsigned CodeID, unsigned BlockID, STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC) STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG) STRINGIFY_CODE(FUNC_CODE, INST_CALLBR) + STRINGIFY_CODE(FUNC_CODE, BLOCKADDR_USERS) } case bitc::VALUE_SYMTAB_BLOCK_ID: switch (CodeID) { @@ -735,7 +736,7 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel, BlockStats.NumInstances++; // BLOCKINFO is a special part of the stream. - bool DumpRecords = O.hasValue(); + bool DumpRecords = O.has_value(); if (BlockID == bitc::BLOCKINFO_BLOCK_ID) { if (O && !O->DumpBlockinfo) O->OS << Indent << "\n"; @@ -864,7 +865,10 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel, O->OS << " codeid=" << Code; const BitCodeAbbrev *Abbv = nullptr; if (Entry.ID != bitc::UNABBREV_RECORD) { - Abbv = Stream.getAbbrev(Entry.ID); + Expected MaybeAbbv = Stream.getAbbrev(Entry.ID); + if (!MaybeAbbv) + return MaybeAbbv.takeError(); + Abbv = MaybeAbbv.get(); O->OS << " abbrevid=" << Entry.ID; } @@ -894,13 +898,13 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel, // If we found a module hash, let's verify that it matches! if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH && - CheckHash.hasValue()) { + CheckHash) { if (Record.size() != 5) O->OS << " (invalid)"; else { // Recompute the hash and compare it to the one in the bitcode SHA1 Hasher; - StringRef Hash; + std::array Hash; Hasher.update(*CheckHash); { int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos; @@ -908,14 +912,14 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel, Hasher.update(ArrayRef(Ptr, BlockSize)); Hash = Hasher.result(); } - std::array RecordedHash; + std::array RecordedHash; int Pos = 0; for (auto &Val : Record) { assert(!(Val >> 32) && "Unexpected high bits set"); support::endian::write32be(&RecordedHash[Pos], Val); Pos += 4; } - if (Hash == StringRef(RecordedHash.data(), RecordedHash.size())) + if (Hash == RecordedHash) O->OS << " (match)"; else O->OS << " (!mismatch!)"; diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 720ab560f988..93b07fc0db30 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GVMaterializer.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalObject.h" @@ -50,6 +51,8 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -91,6 +94,11 @@ static cl::opt PrintSummaryGUIDs( cl::desc( "Print the global id for each value when reading the module summary")); +static cl::opt ExpandConstantExprs( + "expand-constant-exprs", cl::Hidden, + cl::desc( + "Expand constant expressions to instructions for testing purposes")); + namespace { enum { @@ -282,7 +290,7 @@ static Expected hasObjCCategoryInModule(BitstreamCursor &Stream) { case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N] std::string S; if (convertToString(Record, 0, S)) - return error("Invalid record"); + return error("Invalid section name record"); // Check for the i386 and other (x86_64, ARM) conventions if (S.find("__DATA,__objc_catlist") != std::string::npos || S.find("__OBJC,__category") != std::string::npos) @@ -361,7 +369,7 @@ static Expected readModuleTriple(BitstreamCursor &Stream) { case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; if (convertToString(Record, 0, S)) - return error("Invalid record"); + return error("Invalid triple record"); Triple = S; break; } @@ -429,7 +437,7 @@ protected: std::pair> readNameFromStrtab(ArrayRef Record); - bool readBlockInfo(); + Error readBlockInfo(); // Contains an arbitrary and optional string identifying the bitcode producer std::string ProducerIdentification; @@ -450,7 +458,7 @@ Error BitcodeReaderBase::error(const Twine &Message) { Expected BitcodeReaderBase::parseVersionRecord(ArrayRef Record) { if (Record.empty()) - return error("Invalid record"); + return error("Invalid version record"); unsigned ModuleVersion = Record[0]; if (ModuleVersion > 2) return error("Invalid value"); @@ -470,6 +478,90 @@ BitcodeReaderBase::readNameFromStrtab(ArrayRef Record) { namespace { +/// This represents a constant expression or constant aggregate using a custom +/// structure internal to the bitcode reader. Later, this structure will be +/// expanded by materializeValue() either into a constant expression/aggregate, +/// or into an instruction sequence at the point of use. This allows us to +/// upgrade bitcode using constant expressions even if this kind of constant +/// expression is no longer supported. +class BitcodeConstant final : public Value, + TrailingObjects { + friend TrailingObjects; + + // Value subclass ID: Pick largest possible value to avoid any clashes. + static constexpr uint8_t SubclassID = 255; + +public: + // Opcodes used for non-expressions. This includes constant aggregates + // (struct, array, vector) that might need expansion, as well as non-leaf + // constants that don't need expansion (no_cfi, dso_local, blockaddress), + // but still go through BitcodeConstant to avoid different uselist orders + // between the two cases. + static constexpr uint8_t ConstantStructOpcode = 255; + static constexpr uint8_t ConstantArrayOpcode = 254; + static constexpr uint8_t ConstantVectorOpcode = 253; + static constexpr uint8_t NoCFIOpcode = 252; + static constexpr uint8_t DSOLocalEquivalentOpcode = 251; + static constexpr uint8_t BlockAddressOpcode = 250; + static constexpr uint8_t FirstSpecialOpcode = BlockAddressOpcode; + + // Separate struct to make passing different number of parameters to + // BitcodeConstant::create() more convenient. + struct ExtraInfo { + uint8_t Opcode; + uint8_t Flags; + unsigned Extra; + Type *SrcElemTy; + + ExtraInfo(uint8_t Opcode, uint8_t Flags = 0, unsigned Extra = 0, + Type *SrcElemTy = nullptr) + : Opcode(Opcode), Flags(Flags), Extra(Extra), SrcElemTy(SrcElemTy) {} + }; + + uint8_t Opcode; + uint8_t Flags; + unsigned NumOperands; + unsigned Extra; // GEP inrange index or blockaddress BB id. + Type *SrcElemTy; // GEP source element type. + +private: + BitcodeConstant(Type *Ty, const ExtraInfo &Info, ArrayRef OpIDs) + : Value(Ty, SubclassID), Opcode(Info.Opcode), Flags(Info.Flags), + NumOperands(OpIDs.size()), Extra(Info.Extra), + SrcElemTy(Info.SrcElemTy) { + std::uninitialized_copy(OpIDs.begin(), OpIDs.end(), + getTrailingObjects()); + } + + BitcodeConstant &operator=(const BitcodeConstant &) = delete; + +public: + static BitcodeConstant *create(BumpPtrAllocator &A, Type *Ty, + const ExtraInfo &Info, + ArrayRef OpIDs) { + void *Mem = A.Allocate(totalSizeToAlloc(OpIDs.size()), + alignof(BitcodeConstant)); + return new (Mem) BitcodeConstant(Ty, Info, OpIDs); + } + + static bool classof(const Value *V) { return V->getValueID() == SubclassID; } + + ArrayRef getOperandIDs() const { + return makeArrayRef(getTrailingObjects(), NumOperands); + } + + Optional getInRangeIndex() const { + assert(Opcode == Instruction::GetElementPtr); + if (Extra == (unsigned)-1) + return None; + return Extra; + } + + const char *getOpcodeName() const { + return Instruction::getOpcodeName(Opcode); + } +}; + class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { LLVMContext &Context; Module *TheModule = nullptr; @@ -483,8 +575,23 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { std::vector SectionTable; std::vector GCTable; - std::vector TypeList; - DenseMap FunctionTypes; + std::vector TypeList; + /// Track type IDs of contained types. Order is the same as the contained + /// types of a Type*. This is used during upgrades of typed pointer IR in + /// opaque pointer mode. + DenseMap> ContainedTypeIDs; + /// In some cases, we need to create a type ID for a type that was not + /// explicitly encoded in the bitcode, or we don't know about at the current + /// point. For example, a global may explicitly encode the value type ID, but + /// not have a type ID for the pointer to value type, for which we create a + /// virtual type ID instead. This map stores the new type ID that was created + /// for the given pair of Type and contained type ID. + DenseMap, unsigned> VirtualTypeIDs; + DenseMap FunctionTypeIDs; + /// Allocator for BitcodeConstants. This should come before ValueList, + /// because the ValueList might hold ValueHandles to these constants, so + /// ValueList must be destroyed before Alloc. + BumpPtrAllocator Alloc; BitcodeReaderValueList ValueList; Optional MDLoader; std::vector ComdatList; @@ -544,6 +651,13 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { DenseMap> BasicBlockFwdRefs; std::deque BasicBlockFwdRefQueue; + /// These are Functions that contain BlockAddresses which refer a different + /// Function. When parsing the different Function, queue Functions that refer + /// to the different Function. Those Functions must be materialized in order + /// to resolve their BlockAddress constants before the different Function + /// gets moved into another Module. + std::vector BackwardRefFunctions; + /// Indicates that we are using a new encoding for instruction operands where /// most operands in the current FUNCTION_BLOCK are encoded relative to the /// instruction number, for a more compact encoding. Some instruction @@ -575,8 +689,8 @@ public: /// Main interface to parsing a bitcode buffer. /// \returns true if an error occurred. Error parseBitcodeInto( - Module *M, bool ShouldLazyLoadMetadata = false, bool IsImporting = false, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); + Module *M, bool ShouldLazyLoadMetadata, bool IsImporting, + DataLayoutCallbackTy DataLayoutCallback); static uint64_t decodeSignRotatedValue(uint64_t V); @@ -590,12 +704,21 @@ private: StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name); StructType *createIdentifiedStructType(LLVMContext &Context); + static constexpr unsigned InvalidTypeID = ~0u; + Type *getTypeByID(unsigned ID); + Type *getPtrElementTypeByID(unsigned ID); + unsigned getContainedTypeID(unsigned ID, unsigned Idx = 0); + unsigned getVirtualTypeID(Type *Ty, ArrayRef ContainedTypeIDs = {}); + + Expected materializeValue(unsigned ValID, BasicBlock *InsertBB); + Expected getValueForInitializer(unsigned ID); - Value *getFnValueByID(unsigned ID, Type *Ty) { + Value *getFnValueByID(unsigned ID, Type *Ty, unsigned TyID, + BasicBlock *ConstExprInsertBB) { if (Ty && Ty->isMetadataTy()) return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID)); - return ValueList.getValueFwdRef(ID, Ty); + return ValueList.getValueFwdRef(ID, Ty, TyID, ConstExprInsertBB); } Metadata *getFnMetadataByID(unsigned ID) { @@ -617,7 +740,8 @@ private: /// Increment Slot past the number of slots used in the record. Return true on /// failure. bool getValueTypePair(const SmallVectorImpl &Record, unsigned &Slot, - unsigned InstNum, Value *&ResVal) { + unsigned InstNum, Value *&ResVal, unsigned &TypeID, + BasicBlock *ConstExprInsertBB) { if (Slot == Record.size()) return true; unsigned ValNo = (unsigned)Record[Slot++]; // Adjust the ValNo, if it was encoded relative to the InstNum. @@ -626,14 +750,18 @@ private: if (ValNo < InstNum) { // If this is not a forward reference, just return the value we already // have. - ResVal = getFnValueByID(ValNo, nullptr); + TypeID = ValueList.getTypeID(ValNo); + ResVal = getFnValueByID(ValNo, nullptr, TypeID, ConstExprInsertBB); + assert((!ResVal || ResVal->getType() == getTypeByID(TypeID)) && + "Incorrect type ID stored for value"); return ResVal == nullptr; } if (Slot == Record.size()) return true; - unsigned TypeNo = (unsigned)Record[Slot++]; - ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo)); + TypeID = (unsigned)Record[Slot++]; + ResVal = getFnValueByID(ValNo, getTypeByID(TypeID), TypeID, + ConstExprInsertBB); return ResVal == nullptr; } @@ -641,8 +769,9 @@ private: /// past the number of slots used by the value in the record. Return true if /// there is an error. bool popValue(const SmallVectorImpl &Record, unsigned &Slot, - unsigned InstNum, Type *Ty, Value *&ResVal) { - if (getValue(Record, Slot, InstNum, Ty, ResVal)) + unsigned InstNum, Type *Ty, unsigned TyID, Value *&ResVal, + BasicBlock *ConstExprInsertBB) { + if (getValue(Record, Slot, InstNum, Ty, TyID, ResVal, ConstExprInsertBB)) return true; // All values currently take a single record slot. ++Slot; @@ -651,38 +780,41 @@ private: /// Like popValue, but does not increment the Slot number. bool getValue(const SmallVectorImpl &Record, unsigned Slot, - unsigned InstNum, Type *Ty, Value *&ResVal) { - ResVal = getValue(Record, Slot, InstNum, Ty); + unsigned InstNum, Type *Ty, unsigned TyID, Value *&ResVal, + BasicBlock *ConstExprInsertBB) { + ResVal = getValue(Record, Slot, InstNum, Ty, TyID, ConstExprInsertBB); return ResVal == nullptr; } /// Version of getValue that returns ResVal directly, or 0 if there is an /// error. Value *getValue(const SmallVectorImpl &Record, unsigned Slot, - unsigned InstNum, Type *Ty) { + unsigned InstNum, Type *Ty, unsigned TyID, + BasicBlock *ConstExprInsertBB) { if (Slot == Record.size()) return nullptr; unsigned ValNo = (unsigned)Record[Slot]; // Adjust the ValNo, if it was encoded relative to the InstNum. if (UseRelativeIDs) ValNo = InstNum - ValNo; - return getFnValueByID(ValNo, Ty); + return getFnValueByID(ValNo, Ty, TyID, ConstExprInsertBB); } /// Like getValue, but decodes signed VBRs. Value *getValueSigned(const SmallVectorImpl &Record, unsigned Slot, - unsigned InstNum, Type *Ty) { + unsigned InstNum, Type *Ty, unsigned TyID, + BasicBlock *ConstExprInsertBB) { if (Slot == Record.size()) return nullptr; unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]); // Adjust the ValNo, if it was encoded relative to the InstNum. if (UseRelativeIDs) ValNo = InstNum - ValNo; - return getFnValueByID(ValNo, Ty); + return getFnValueByID(ValNo, Ty, TyID, ConstExprInsertBB); } /// Upgrades old-style typeless byval/sret/inalloca attributes by adding the /// corresponding argument's pointee type. Also upgrades intrinsics that now /// require an elementtype attribute. - void propagateAttributeTypes(CallBase *CB, ArrayRef ArgsTys); + Error propagateAttributeTypes(CallBase *CB, ArrayRef ArgsTys); /// Converts alignment exponent (i.e. power of two (or zero)) to the /// corresponding alignment to use. If alignment is too large, returns @@ -827,7 +959,10 @@ BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab, StringRef ProducerIdentification, LLVMContext &Context) : BitcodeReaderBase(std::move(Stream), Strtab), Context(Context), - ValueList(Context, Stream.SizeInBytes()) { + ValueList(this->Stream.SizeInBytes(), + [this](unsigned ValID, BasicBlock *InsertBB) { + return materializeValue(ValID, InsertBB); + }) { this->ProducerIdentification = std::string(ProducerIdentification); } @@ -859,6 +994,11 @@ Error BitcodeReader::materializeForwardReferencedFunctions() { } assert(BasicBlockFwdRefs.empty() && "Function missing from queue"); + for (Function *F : BackwardRefFunctions) + if (Error Err = materialize(F)) + return Err; + BackwardRefFunctions.clear(); + // Reset state. WillMaterializeAllForwardRefs = false; return Error::success(); @@ -1176,6 +1316,324 @@ Type *BitcodeReader::getTypeByID(unsigned ID) { return TypeList[ID] = createIdentifiedStructType(Context); } +unsigned BitcodeReader::getContainedTypeID(unsigned ID, unsigned Idx) { + auto It = ContainedTypeIDs.find(ID); + if (It == ContainedTypeIDs.end()) + return InvalidTypeID; + + if (Idx >= It->second.size()) + return InvalidTypeID; + + return It->second[Idx]; +} + +Type *BitcodeReader::getPtrElementTypeByID(unsigned ID) { + if (ID >= TypeList.size()) + return nullptr; + + Type *Ty = TypeList[ID]; + if (!Ty->isPointerTy()) + return nullptr; + + Type *ElemTy = getTypeByID(getContainedTypeID(ID, 0)); + if (!ElemTy) + return nullptr; + + assert(cast(Ty)->isOpaqueOrPointeeTypeMatches(ElemTy) && + "Incorrect element type"); + return ElemTy; +} + +unsigned BitcodeReader::getVirtualTypeID(Type *Ty, + ArrayRef ChildTypeIDs) { + unsigned ChildTypeID = ChildTypeIDs.empty() ? InvalidTypeID : ChildTypeIDs[0]; + auto CacheKey = std::make_pair(Ty, ChildTypeID); + auto It = VirtualTypeIDs.find(CacheKey); + if (It != VirtualTypeIDs.end()) { + // The cmpxchg return value is the only place we need more than one + // contained type ID, however the second one will always be the same (i1), + // so we don't need to include it in the cache key. This asserts that the + // contained types are indeed as expected and there are no collisions. + assert((ChildTypeIDs.empty() || + ContainedTypeIDs[It->second] == ChildTypeIDs) && + "Incorrect cached contained type IDs"); + return It->second; + } + +#ifndef NDEBUG + if (!Ty->isOpaquePointerTy()) { + assert(Ty->getNumContainedTypes() == ChildTypeIDs.size() && + "Wrong number of contained types"); + for (auto Pair : zip(Ty->subtypes(), ChildTypeIDs)) { + assert(std::get<0>(Pair) == getTypeByID(std::get<1>(Pair)) && + "Incorrect contained type ID"); + } + } +#endif + + unsigned TypeID = TypeList.size(); + TypeList.push_back(Ty); + if (!ChildTypeIDs.empty()) + append_range(ContainedTypeIDs[TypeID], ChildTypeIDs); + VirtualTypeIDs.insert({CacheKey, TypeID}); + return TypeID; +} + +static bool isConstExprSupported(uint8_t Opcode) { + // These are not real constant expressions, always consider them supported. + if (Opcode >= BitcodeConstant::FirstSpecialOpcode) + return true; + + return !ExpandConstantExprs; +} + +Expected BitcodeReader::materializeValue(unsigned StartValID, + BasicBlock *InsertBB) { + // Quickly handle the case where there is no BitcodeConstant to resolve. + if (StartValID < ValueList.size() && ValueList[StartValID] && + !isa(ValueList[StartValID])) + return ValueList[StartValID]; + + SmallDenseMap MaterializedValues; + SmallVector Worklist; + Worklist.push_back(StartValID); + while (!Worklist.empty()) { + unsigned ValID = Worklist.back(); + if (MaterializedValues.count(ValID)) { + // Duplicate expression that was already handled. + Worklist.pop_back(); + continue; + } + + if (ValID >= ValueList.size() || !ValueList[ValID]) + return error("Invalid value ID"); + + Value *V = ValueList[ValID]; + auto *BC = dyn_cast(V); + if (!BC) { + MaterializedValues.insert({ValID, V}); + Worklist.pop_back(); + continue; + } + + // Iterate in reverse, so values will get popped from the worklist in + // expected order. + SmallVector Ops; + for (unsigned OpID : reverse(BC->getOperandIDs())) { + auto It = MaterializedValues.find(OpID); + if (It != MaterializedValues.end()) + Ops.push_back(It->second); + else + Worklist.push_back(OpID); + } + + // Some expressions have not been resolved yet, handle them first and then + // revisit this one. + if (Ops.size() != BC->getOperandIDs().size()) + continue; + std::reverse(Ops.begin(), Ops.end()); + + SmallVector ConstOps; + for (Value *Op : Ops) + if (auto *C = dyn_cast(Op)) + ConstOps.push_back(C); + + // Materialize as constant expression if possible. + if (isConstExprSupported(BC->Opcode) && ConstOps.size() == Ops.size()) { + Constant *C; + if (Instruction::isCast(BC->Opcode)) { + C = UpgradeBitCastExpr(BC->Opcode, ConstOps[0], BC->getType()); + if (!C) + C = ConstantExpr::getCast(BC->Opcode, ConstOps[0], BC->getType()); + } else if (Instruction::isUnaryOp(BC->Opcode)) { + C = ConstantExpr::get(BC->Opcode, ConstOps[0], BC->Flags); + } else if (Instruction::isBinaryOp(BC->Opcode)) { + C = ConstantExpr::get(BC->Opcode, ConstOps[0], ConstOps[1], BC->Flags); + } else { + switch (BC->Opcode) { + case BitcodeConstant::NoCFIOpcode: { + auto *GV = dyn_cast(ConstOps[0]); + if (!GV) + return error("no_cfi operand must be GlobalValue"); + C = NoCFIValue::get(GV); + break; + } + case BitcodeConstant::DSOLocalEquivalentOpcode: { + auto *GV = dyn_cast(ConstOps[0]); + if (!GV) + return error("dso_local operand must be GlobalValue"); + C = DSOLocalEquivalent::get(GV); + break; + } + case BitcodeConstant::BlockAddressOpcode: { + Function *Fn = dyn_cast(ConstOps[0]); + if (!Fn) + return error("blockaddress operand must be a function"); + + // If the function is already parsed we can insert the block address + // right away. + BasicBlock *BB; + unsigned BBID = BC->Extra; + if (!BBID) + // Invalid reference to entry block. + return error("Invalid ID"); + if (!Fn->empty()) { + Function::iterator BBI = Fn->begin(), BBE = Fn->end(); + for (size_t I = 0, E = BBID; I != E; ++I) { + if (BBI == BBE) + return error("Invalid ID"); + ++BBI; + } + BB = &*BBI; + } else { + // Otherwise insert a placeholder and remember it so it can be + // inserted when the function is parsed. + auto &FwdBBs = BasicBlockFwdRefs[Fn]; + if (FwdBBs.empty()) + BasicBlockFwdRefQueue.push_back(Fn); + if (FwdBBs.size() < BBID + 1) + FwdBBs.resize(BBID + 1); + if (!FwdBBs[BBID]) + FwdBBs[BBID] = BasicBlock::Create(Context); + BB = FwdBBs[BBID]; + } + C = BlockAddress::get(Fn, BB); + break; + } + case BitcodeConstant::ConstantStructOpcode: + C = ConstantStruct::get(cast(BC->getType()), ConstOps); + break; + case BitcodeConstant::ConstantArrayOpcode: + C = ConstantArray::get(cast(BC->getType()), ConstOps); + break; + case BitcodeConstant::ConstantVectorOpcode: + C = ConstantVector::get(ConstOps); + break; + case Instruction::ICmp: + case Instruction::FCmp: + C = ConstantExpr::getCompare(BC->Flags, ConstOps[0], ConstOps[1]); + break; + case Instruction::GetElementPtr: + C = ConstantExpr::getGetElementPtr( + BC->SrcElemTy, ConstOps[0], makeArrayRef(ConstOps).drop_front(), + BC->Flags, BC->getInRangeIndex()); + break; + case Instruction::Select: + C = ConstantExpr::getSelect(ConstOps[0], ConstOps[1], ConstOps[2]); + break; + case Instruction::ExtractElement: + C = ConstantExpr::getExtractElement(ConstOps[0], ConstOps[1]); + break; + case Instruction::InsertElement: + C = ConstantExpr::getInsertElement(ConstOps[0], ConstOps[1], + ConstOps[2]); + break; + case Instruction::ShuffleVector: { + SmallVector Mask; + ShuffleVectorInst::getShuffleMask(ConstOps[2], Mask); + C = ConstantExpr::getShuffleVector(ConstOps[0], ConstOps[1], Mask); + break; + } + default: + llvm_unreachable("Unhandled bitcode constant"); + } + } + + // Cache resolved constant. + ValueList.replaceValueWithoutRAUW(ValID, C); + MaterializedValues.insert({ValID, C}); + Worklist.pop_back(); + continue; + } + + if (!InsertBB) + return error(Twine("Value referenced by initializer is an unsupported " + "constant expression of type ") + + BC->getOpcodeName()); + + // Materialize as instructions if necessary. + Instruction *I; + if (Instruction::isCast(BC->Opcode)) { + I = CastInst::Create((Instruction::CastOps)BC->Opcode, Ops[0], + BC->getType(), "constexpr", InsertBB); + } else if (Instruction::isUnaryOp(BC->Opcode)) { + I = UnaryOperator::Create((Instruction::UnaryOps)BC->Opcode, Ops[0], + "constexpr", InsertBB); + } else if (Instruction::isBinaryOp(BC->Opcode)) { + I = BinaryOperator::Create((Instruction::BinaryOps)BC->Opcode, Ops[0], + Ops[1], "constexpr", InsertBB); + if (isa(I)) { + if (BC->Flags & OverflowingBinaryOperator::NoSignedWrap) + I->setHasNoSignedWrap(); + if (BC->Flags & OverflowingBinaryOperator::NoUnsignedWrap) + I->setHasNoUnsignedWrap(); + } + if (isa(I) && + (BC->Flags & PossiblyExactOperator::IsExact)) + I->setIsExact(); + } else { + switch (BC->Opcode) { + case BitcodeConstant::ConstantStructOpcode: + case BitcodeConstant::ConstantArrayOpcode: + case BitcodeConstant::ConstantVectorOpcode: { + Type *IdxTy = Type::getInt32Ty(BC->getContext()); + Value *V = PoisonValue::get(BC->getType()); + for (auto Pair : enumerate(Ops)) { + Value *Idx = ConstantInt::get(IdxTy, Pair.index()); + V = InsertElementInst::Create(V, Pair.value(), Idx, "constexpr.ins", + InsertBB); + } + I = cast(V); + break; + } + case Instruction::ICmp: + case Instruction::FCmp: + I = CmpInst::Create((Instruction::OtherOps)BC->Opcode, + (CmpInst::Predicate)BC->Flags, Ops[0], Ops[1], + "constexpr", InsertBB); + break; + case Instruction::GetElementPtr: + I = GetElementPtrInst::Create(BC->SrcElemTy, Ops[0], + makeArrayRef(Ops).drop_front(), + "constexpr", InsertBB); + if (BC->Flags) + cast(I)->setIsInBounds(); + break; + case Instruction::Select: + I = SelectInst::Create(Ops[0], Ops[1], Ops[2], "constexpr", InsertBB); + break; + case Instruction::ExtractElement: + I = ExtractElementInst::Create(Ops[0], Ops[1], "constexpr", InsertBB); + break; + case Instruction::InsertElement: + I = InsertElementInst::Create(Ops[0], Ops[1], Ops[2], "constexpr", + InsertBB); + break; + case Instruction::ShuffleVector: + I = new ShuffleVectorInst(Ops[0], Ops[1], Ops[2], "constexpr", + InsertBB); + break; + default: + llvm_unreachable("Unhandled bitcode constant"); + } + } + + MaterializedValues.insert({ValID, I}); + Worklist.pop_back(); + } + + return MaterializedValues[StartValID]; +} + +Expected BitcodeReader::getValueForInitializer(unsigned ID) { + Expected MaybeV = materializeValue(ID, /* InsertBB */ nullptr); + if (!MaybeV) + return MaybeV.takeError(); + + // Result must be Constant if InsertBB is nullptr. + return cast(MaybeV.get()); +} + StructType *BitcodeReader::createIdentifiedStructType(LLVMContext &Context, StringRef Name) { auto *Ret = StructType::create(Context, Name); @@ -1346,7 +1804,7 @@ Error BitcodeReader::parseAttributeBlock() { case bitc::PARAMATTR_CODE_ENTRY_OLD: // ENTRY: [paramidx0, attr0, ...] // Deprecated, but still needed to read old bitcode files. if (Record.size() & 1) - return error("Invalid record"); + return error("Invalid parameter attribute record"); for (unsigned i = 0, e = Record.size(); i != e; i += 2) { AttrBuilder B(Context); @@ -1437,8 +1895,14 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::Dereferenceable; case bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL: return Attribute::DereferenceableOrNull; + case bitc::ATTR_KIND_ALLOC_ALIGN: + return Attribute::AllocAlign; + case bitc::ATTR_KIND_ALLOC_KIND: + return Attribute::AllocKind; case bitc::ATTR_KIND_ALLOC_SIZE: return Attribute::AllocSize; + case bitc::ATTR_KIND_ALLOCATED_POINTER: + return Attribute::AllocatedPointer; case bitc::ATTR_KIND_NO_RED_ZONE: return Attribute::NoRedZone; case bitc::ATTR_KIND_NO_RETURN: @@ -1451,6 +1915,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::NoProfile; case bitc::ATTR_KIND_NO_UNWIND: return Attribute::NoUnwind; + case bitc::ATTR_KIND_NO_SANITIZE_BOUNDS: + return Attribute::NoSanitizeBounds; case bitc::ATTR_KIND_NO_SANITIZE_COVERAGE: return Attribute::NoSanitizeCoverage; case bitc::ATTR_KIND_NULL_POINTER_IS_VALID: @@ -1529,6 +1995,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::MustProgress; case bitc::ATTR_KIND_HOT: return Attribute::Hot; + case bitc::ATTR_KIND_PRESPLIT_COROUTINE: + return Attribute::PresplitCoroutine; } } @@ -1586,7 +2054,7 @@ Error BitcodeReader::parseAttributeGroupBlock() { break; case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...] if (Record.size() < 3) - return error("Invalid record"); + return error("Invalid grp record"); uint64_t GrpID = Record[0]; uint64_t Idx = Record[1]; // Index of the object this attribute refers to. @@ -1607,6 +2075,8 @@ Error BitcodeReader::parseAttributeGroupBlock() { B.addStructRetAttr(nullptr); else if (Kind == Attribute::InAlloca) B.addInAllocaAttr(nullptr); + else if (Kind == Attribute::UWTable) + B.addUWTableAttr(UWTableKind::Default); else if (Attribute::isEnumAttrKind(Kind)) B.addAttribute(Kind); else @@ -1629,6 +2099,10 @@ Error BitcodeReader::parseAttributeGroupBlock() { B.addAllocSizeAttrFromRawRepr(Record[++i]); else if (Kind == Attribute::VScaleRange) B.addVScaleRangeAttrFromRawRepr(Record[++i]); + else if (Kind == Attribute::UWTable) + B.addUWTableAttr(UWTableKind(Record[++i])); + else if (Kind == Attribute::AllocKind) + B.addAllocKindAttr(static_cast(Record[++i])); } else if (Record[i] == 3 || Record[i] == 4) { // String attribute bool HasValue = (Record[i++] == 4); SmallString<64> KindStr; @@ -1647,9 +2121,7 @@ Error BitcodeReader::parseAttributeGroupBlock() { } B.addAttribute(KindStr.str(), ValStr.str()); - } else { - assert((Record[i] == 5 || Record[i] == 6) && - "Invalid attribute group entry"); + } else if (Record[i] == 5 || Record[i] == 6) { bool HasType = Record[i] == 6; Attribute::AttrKind Kind; if (Error Err = parseAttrKind(Record[++i], &Kind)) @@ -1658,6 +2130,8 @@ Error BitcodeReader::parseAttributeGroupBlock() { return error("Not a type attribute"); B.addTypeAttr(Kind, HasType ? getTypeByID(Record[++i]) : nullptr); + } else { + return error("Invalid attribute group entry"); } } @@ -1708,6 +2182,7 @@ Error BitcodeReader::parseTypeTableBody() { // Read a record. Record.clear(); Type *ResultTy = nullptr; + SmallVector ContainedIDs; Expected MaybeRecord = Stream.readRecord(Entry.ID, Record); if (!MaybeRecord) return MaybeRecord.takeError(); @@ -1718,7 +2193,7 @@ Error BitcodeReader::parseTypeTableBody() { // TYPE_CODE_NUMENTRY contains a count of the number of types in the // type list. This allows us to reserve space. if (Record.empty()) - return error("Invalid record"); + return error("Invalid numentry record"); TypeList.resize(Record[0]); continue; case bitc::TYPE_CODE_VOID: // VOID @@ -1762,7 +2237,7 @@ Error BitcodeReader::parseTypeTableBody() { break; case bitc::TYPE_CODE_INTEGER: { // INTEGER: [width] if (Record.empty()) - return error("Invalid record"); + return error("Invalid integer record"); uint64_t NumBits = Record[0]; if (NumBits < IntegerType::MIN_INT_BITS || @@ -1774,7 +2249,7 @@ Error BitcodeReader::parseTypeTableBody() { case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or // [pointee type, address space] if (Record.empty()) - return error("Invalid record"); + return error("Invalid pointer record"); unsigned AddressSpace = 0; if (Record.size() == 2) AddressSpace = Record[1]; @@ -1782,13 +2257,18 @@ Error BitcodeReader::parseTypeTableBody() { if (!ResultTy || !PointerType::isValidElementType(ResultTy)) return error("Invalid type"); + if (LLVM_UNLIKELY(!Context.hasSetOpaquePointersValue())) + Context.setOpaquePointers(false); + ContainedIDs.push_back(Record[0]); ResultTy = PointerType::get(ResultTy, AddressSpace); break; } case bitc::TYPE_CODE_OPAQUE_POINTER: { // OPAQUE_POINTER: [addrspace] if (Record.size() != 1) - return error("Invalid record"); - if (Context.supportsTypedPointers()) + return error("Invalid opaque pointer record"); + if (LLVM_UNLIKELY(!Context.hasSetOpaquePointersValue())) { + Context.setOpaquePointers(true); + } else if (Context.supportsTypedPointers()) return error( "Opaque pointers are only supported in -opaque-pointers mode"); unsigned AddressSpace = Record[0]; @@ -1799,7 +2279,7 @@ Error BitcodeReader::parseTypeTableBody() { // Deprecated, but still needed to read old bitcode files. // FUNCTION: [vararg, attrid, retty, paramty x N] if (Record.size() < 3) - return error("Invalid record"); + return error("Invalid function record"); SmallVector ArgTys; for (unsigned i = 3, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) @@ -1812,13 +2292,14 @@ Error BitcodeReader::parseTypeTableBody() { if (!ResultTy || ArgTys.size() < Record.size()-3) return error("Invalid type"); + ContainedIDs.append(Record.begin() + 2, Record.end()); ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); break; } case bitc::TYPE_CODE_FUNCTION: { // FUNCTION: [vararg, retty, paramty x N] if (Record.size() < 2) - return error("Invalid record"); + return error("Invalid function record"); SmallVector ArgTys; for (unsigned i = 2, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) { @@ -1834,12 +2315,13 @@ Error BitcodeReader::parseTypeTableBody() { if (!ResultTy || ArgTys.size() < Record.size()-2) return error("Invalid type"); + ContainedIDs.append(Record.begin() + 1, Record.end()); ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); break; } case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N] if (Record.empty()) - return error("Invalid record"); + return error("Invalid anon struct record"); SmallVector EltTys; for (unsigned i = 1, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) @@ -1849,17 +2331,18 @@ Error BitcodeReader::parseTypeTableBody() { } if (EltTys.size() != Record.size()-1) return error("Invalid type"); + ContainedIDs.append(Record.begin() + 1, Record.end()); ResultTy = StructType::get(Context, EltTys, Record[0]); break; } case bitc::TYPE_CODE_STRUCT_NAME: // STRUCT_NAME: [strchr x N] if (convertToString(Record, 0, TypeName)) - return error("Invalid record"); + return error("Invalid struct name record"); continue; case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N] if (Record.empty()) - return error("Invalid record"); + return error("Invalid named struct record"); if (NumRecords >= TypeList.size()) return error("Invalid TYPE table"); @@ -1881,14 +2364,15 @@ Error BitcodeReader::parseTypeTableBody() { break; } if (EltTys.size() != Record.size()-1) - return error("Invalid record"); + return error("Invalid named struct record"); Res->setBody(EltTys, Record[0]); + ContainedIDs.append(Record.begin() + 1, Record.end()); ResultTy = Res; break; } case bitc::TYPE_CODE_OPAQUE: { // OPAQUE: [] if (Record.size() != 1) - return error("Invalid record"); + return error("Invalid opaque type record"); if (NumRecords >= TypeList.size()) return error("Invalid TYPE table"); @@ -1906,22 +2390,24 @@ Error BitcodeReader::parseTypeTableBody() { } case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] if (Record.size() < 2) - return error("Invalid record"); + return error("Invalid array type record"); ResultTy = getTypeByID(Record[1]); if (!ResultTy || !ArrayType::isValidElementType(ResultTy)) return error("Invalid type"); + ContainedIDs.push_back(Record[1]); ResultTy = ArrayType::get(ResultTy, Record[0]); break; case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty] or // [numelts, eltty, scalable] if (Record.size() < 2) - return error("Invalid record"); + return error("Invalid vector type record"); if (Record[0] == 0) return error("Invalid vector length"); ResultTy = getTypeByID(Record[1]); if (!ResultTy || !VectorType::isValidElementType(ResultTy)) return error("Invalid type"); bool Scalable = Record.size() > 2 ? Record[2] : false; + ContainedIDs.push_back(Record[1]); ResultTy = VectorType::get(ResultTy, Record[0], Scalable); break; } @@ -1932,7 +2418,10 @@ Error BitcodeReader::parseTypeTableBody() { return error( "Invalid TYPE table: Only named structs can be forward referenced"); assert(ResultTy && "Didn't read a type?"); - TypeList[NumRecords++] = ResultTy; + TypeList[NumRecords] = ResultTy; + if (!ContainedIDs.empty()) + ContainedTypeIDs[NumRecords] = std::move(ContainedIDs); + ++NumRecords; } } @@ -1968,12 +2457,12 @@ Error BitcodeReader::parseOperandBundleTags() { if (!MaybeRecord) return MaybeRecord.takeError(); if (MaybeRecord.get() != bitc::OPERAND_BUNDLE_TAG) - return error("Invalid record"); + return error("Invalid operand bundle record"); // OPERAND_BUNDLE_TAG: [strchr x N] BundleTags.emplace_back(); if (convertToString(Record, 0, BundleTags.back())) - return error("Invalid record"); + return error("Invalid operand bundle record"); Record.clear(); } } @@ -2012,11 +2501,11 @@ Error BitcodeReader::parseSyncScopeNames() { if (!MaybeRecord) return MaybeRecord.takeError(); if (MaybeRecord.get() != bitc::SYNC_SCOPE_NAME) - return error("Invalid record"); + return error("Invalid sync scope record"); SmallString<16> SSN; if (convertToString(Record, 0, SSN)) - return error("Invalid record"); + return error("Invalid sync scope record"); SSIDs.push_back(Context.getOrInsertSyncScopeID(SSN)); Record.clear(); @@ -2056,8 +2545,9 @@ static Expected jumpToValueSymbolTable(uint64_t Offset, Expected MaybeEntry = Stream.advance(); if (!MaybeEntry) return MaybeEntry.takeError(); - assert(MaybeEntry.get().Kind == BitstreamEntry::SubBlock); - assert(MaybeEntry.get().ID == bitc::VALUE_SYMTAB_BLOCK_ID); + if (MaybeEntry.get().Kind != BitstreamEntry::SubBlock || + MaybeEntry.get().ID != bitc::VALUE_SYMTAB_BLOCK_ID) + return error("Expected value symbol table subblock"); return CurrentBit; } @@ -2107,11 +2597,15 @@ Error BitcodeReader::parseGlobalValueSymbolTable() { if (!MaybeRecord) return MaybeRecord.takeError(); switch (MaybeRecord.get()) { - case bitc::VST_CODE_FNENTRY: // [valueid, offset] + case bitc::VST_CODE_FNENTRY: { // [valueid, offset] + unsigned ValueID = Record[0]; + if (ValueID >= ValueList.size() || !ValueList[ValueID]) + return error("Invalid value reference in symbol table"); setDeferredFunctionInfo(FuncBitcodeOffsetDelta, - cast(ValueList[Record[0]]), Record); + cast(ValueList[ValueID]), Record); break; } + } } } @@ -2213,10 +2707,10 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) { } case bitc::VST_CODE_BBENTRY: { if (convertToString(Record, 1, ValueName)) - return error("Invalid record"); + return error("Invalid bbentry record"); BasicBlock *BB = getBasicBlock(Record[0]); if (!BB) - return error("Invalid record"); + return error("Invalid bbentry record"); BB->setName(StringRef(ValueName.data(), ValueName.size())); ValueName.clear(); @@ -2253,10 +2747,10 @@ Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() { // Not ready to resolve this yet, it requires something later in the file. GlobalInits.push_back(GlobalInitWorklist.back()); } else { - if (Constant *C = dyn_cast_or_null(ValueList[ValID])) - GlobalInitWorklist.back().first->setInitializer(C); - else - return error("Expected a constant"); + Expected MaybeC = getValueForInitializer(ValID); + if (!MaybeC) + return MaybeC.takeError(); + GlobalInitWorklist.back().first->setInitializer(MaybeC.get()); } GlobalInitWorklist.pop_back(); } @@ -2266,9 +2760,10 @@ Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() { if (ValID >= ValueList.size()) { IndirectSymbolInits.push_back(IndirectSymbolInitWorklist.back()); } else { - Constant *C = dyn_cast_or_null(ValueList[ValID]); - if (!C) - return error("Expected a constant"); + Expected MaybeC = getValueForInitializer(ValID); + if (!MaybeC) + return MaybeC.takeError(); + Constant *C = MaybeC.get(); GlobalValue *GV = IndirectSymbolInitWorklist.back().first; if (auto *GA = dyn_cast(GV)) { if (C->getType() != GV->getType()) @@ -2292,30 +2787,30 @@ Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() { if (Info.PersonalityFn) { unsigned ValID = Info.PersonalityFn - 1; if (ValID < ValueList.size()) { - if (Constant *C = dyn_cast_or_null(ValueList[ValID])) - Info.F->setPersonalityFn(C); - else - return error("Expected a constant"); + Expected MaybeC = getValueForInitializer(ValID); + if (!MaybeC) + return MaybeC.takeError(); + Info.F->setPersonalityFn(MaybeC.get()); Info.PersonalityFn = 0; } } if (Info.Prefix) { unsigned ValID = Info.Prefix - 1; if (ValID < ValueList.size()) { - if (Constant *C = dyn_cast_or_null(ValueList[ValID])) - Info.F->setPrefixData(C); - else - return error("Expected a constant"); + Expected MaybeC = getValueForInitializer(ValID); + if (!MaybeC) + return MaybeC.takeError(); + Info.F->setPrefixData(MaybeC.get()); Info.Prefix = 0; } } if (Info.Prologue) { unsigned ValID = Info.Prologue - 1; if (ValID < ValueList.size()) { - if (Constant *C = dyn_cast_or_null(ValueList[ValID])) - Info.F->setPrologueData(C); - else - return error("Expected a constant"); + Expected MaybeC = getValueForInitializer(ValID); + if (!MaybeC) + return MaybeC.takeError(); + Info.F->setPrologueData(MaybeC.get()); Info.Prologue = 0; } } @@ -2343,26 +2838,11 @@ Error BitcodeReader::parseConstants() { // Read all the records for this value table. Type *CurTy = Type::getInt32Ty(Context); + unsigned Int32TyID = getVirtualTypeID(CurTy); + unsigned CurTyID = Int32TyID; + Type *CurElemTy = nullptr; unsigned NextCstNo = ValueList.size(); - struct DelayedShufTy { - VectorType *OpTy; - VectorType *RTy; - uint64_t Op0Idx; - uint64_t Op1Idx; - uint64_t Op2Idx; - unsigned CstNo; - }; - std::vector DelayedShuffles; - struct DelayedSelTy { - Type *OpTy; - uint64_t Op0Idx; - uint64_t Op1Idx; - uint64_t Op2Idx; - unsigned CstNo; - }; - std::vector DelayedSelectors; - while (true) { Expected MaybeEntry = Stream.advanceSkippingSubblocks(); if (!MaybeEntry) @@ -2374,57 +2854,8 @@ Error BitcodeReader::parseConstants() { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: - // Once all the constants have been read, go through and resolve forward - // references. - // - // We have to treat shuffles specially because they don't have three - // operands anymore. We need to convert the shuffle mask into an array, - // and we can't convert a forward reference. - for (auto &DelayedShuffle : DelayedShuffles) { - VectorType *OpTy = DelayedShuffle.OpTy; - VectorType *RTy = DelayedShuffle.RTy; - uint64_t Op0Idx = DelayedShuffle.Op0Idx; - uint64_t Op1Idx = DelayedShuffle.Op1Idx; - uint64_t Op2Idx = DelayedShuffle.Op2Idx; - uint64_t CstNo = DelayedShuffle.CstNo; - Constant *Op0 = ValueList.getConstantFwdRef(Op0Idx, OpTy); - Constant *Op1 = ValueList.getConstantFwdRef(Op1Idx, OpTy); - Type *ShufTy = - VectorType::get(Type::getInt32Ty(Context), RTy->getElementCount()); - Constant *Op2 = ValueList.getConstantFwdRef(Op2Idx, ShufTy); - if (!ShuffleVectorInst::isValidOperands(Op0, Op1, Op2)) - return error("Invalid shufflevector operands"); - SmallVector Mask; - ShuffleVectorInst::getShuffleMask(Op2, Mask); - Value *V = ConstantExpr::getShuffleVector(Op0, Op1, Mask); - ValueList.assignValue(V, CstNo); - } - for (auto &DelayedSelector : DelayedSelectors) { - Type *OpTy = DelayedSelector.OpTy; - Type *SelectorTy = Type::getInt1Ty(Context); - uint64_t Op0Idx = DelayedSelector.Op0Idx; - uint64_t Op1Idx = DelayedSelector.Op1Idx; - uint64_t Op2Idx = DelayedSelector.Op2Idx; - uint64_t CstNo = DelayedSelector.CstNo; - Constant *Op1 = ValueList.getConstantFwdRef(Op1Idx, OpTy); - Constant *Op2 = ValueList.getConstantFwdRef(Op2Idx, OpTy); - // The selector might be an i1 or an - // Get the type from the ValueList before getting a forward ref. - if (VectorType *VTy = dyn_cast(OpTy)) { - Value *V = ValueList[Op0Idx]; - assert(V); - if (SelectorTy != V->getType()) - SelectorTy = VectorType::get(SelectorTy, VTy->getElementCount()); - } - Constant *Op0 = ValueList.getConstantFwdRef(Op0Idx, SelectorTy); - Value *V = ConstantExpr::getSelect(Op0, Op1, Op2); - ValueList.assignValue(V, CstNo); - } - if (NextCstNo != ValueList.size()) return error("Invalid constant reference"); - - ValueList.resolveConstantForwardRefs(); return Error::success(); case BitstreamEntry::Record: // The interesting case. @@ -2448,12 +2879,14 @@ Error BitcodeReader::parseConstants() { break; case bitc::CST_CODE_SETTYPE: // SETTYPE: [typeid] if (Record.empty()) - return error("Invalid record"); + return error("Invalid settype record"); if (Record[0] >= TypeList.size() || !TypeList[Record[0]]) - return error("Invalid record"); + return error("Invalid settype record"); if (TypeList[Record[0]] == VoidType) return error("Invalid constant type"); - CurTy = TypeList[Record[0]]; + CurTyID = Record[0]; + CurTy = TypeList[CurTyID]; + CurElemTy = getPtrElementTypeByID(CurTyID); continue; // Skip the ValueList manipulation. case bitc::CST_CODE_NULL: // NULL if (CurTy->isVoidTy() || CurTy->isFunctionTy() || CurTy->isLabelTy()) @@ -2462,12 +2895,12 @@ Error BitcodeReader::parseConstants() { break; case bitc::CST_CODE_INTEGER: // INTEGER: [intval] if (!CurTy->isIntegerTy() || Record.empty()) - return error("Invalid record"); + return error("Invalid integer const record"); V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0])); break; case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval] if (!CurTy->isIntegerTy() || Record.empty()) - return error("Invalid record"); + return error("Invalid wide integer const record"); APInt VInt = readWideAPInt(Record, cast(CurTy)->getBitWidth()); @@ -2477,7 +2910,7 @@ Error BitcodeReader::parseConstants() { } case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval] if (Record.empty()) - return error("Invalid record"); + return error("Invalid float const record"); if (CurTy->isHalfTy()) V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf(), APInt(16, (uint16_t)Record[0]))); @@ -2510,26 +2943,22 @@ Error BitcodeReader::parseConstants() { case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number] if (Record.empty()) - return error("Invalid record"); + return error("Invalid aggregate record"); unsigned Size = Record.size(); - SmallVector Elts; - - if (StructType *STy = dyn_cast(CurTy)) { - for (unsigned i = 0; i != Size; ++i) - Elts.push_back(ValueList.getConstantFwdRef(Record[i], - STy->getElementType(i))); - V = ConstantStruct::get(STy, Elts); - } else if (ArrayType *ATy = dyn_cast(CurTy)) { - Type *EltTy = ATy->getElementType(); - for (unsigned i = 0; i != Size; ++i) - Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy)); - V = ConstantArray::get(ATy, Elts); - } else if (VectorType *VTy = dyn_cast(CurTy)) { - Type *EltTy = VTy->getElementType(); - for (unsigned i = 0; i != Size; ++i) - Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy)); - V = ConstantVector::get(Elts); + SmallVector Elts; + for (unsigned i = 0; i != Size; ++i) + Elts.push_back(Record[i]); + + if (isa(CurTy)) { + V = BitcodeConstant::create( + Alloc, CurTy, BitcodeConstant::ConstantStructOpcode, Elts); + } else if (isa(CurTy)) { + V = BitcodeConstant::create(Alloc, CurTy, + BitcodeConstant::ConstantArrayOpcode, Elts); + } else if (isa(CurTy)) { + V = BitcodeConstant::create( + Alloc, CurTy, BitcodeConstant::ConstantVectorOpcode, Elts); } else { V = UndefValue::get(CurTy); } @@ -2538,7 +2967,7 @@ Error BitcodeReader::parseConstants() { case bitc::CST_CODE_STRING: // STRING: [values] case bitc::CST_CODE_CSTRING: { // CSTRING: [values] if (Record.empty()) - return error("Invalid record"); + return error("Invalid string record"); SmallString<16> Elts(Record.begin(), Record.end()); V = ConstantDataArray::getString(Context, Elts, @@ -2547,7 +2976,7 @@ Error BitcodeReader::parseConstants() { } case bitc::CST_CODE_DATA: {// DATA: [n x value] if (Record.empty()) - return error("Invalid record"); + return error("Invalid data record"); Type *EltTy; if (auto *Array = dyn_cast(CurTy)) @@ -2609,27 +3038,23 @@ Error BitcodeReader::parseConstants() { } case bitc::CST_CODE_CE_UNOP: { // CE_UNOP: [opcode, opval] if (Record.size() < 2) - return error("Invalid record"); + return error("Invalid unary op constexpr record"); int Opc = getDecodedUnaryOpcode(Record[0], CurTy); if (Opc < 0) { V = UndefValue::get(CurTy); // Unknown unop. } else { - Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy); - unsigned Flags = 0; - V = ConstantExpr::get(Opc, LHS, Flags); + V = BitcodeConstant::create(Alloc, CurTy, Opc, (unsigned)Record[1]); } break; } case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval] if (Record.size() < 3) - return error("Invalid record"); + return error("Invalid binary op constexpr record"); int Opc = getDecodedBinaryOpcode(Record[0], CurTy); if (Opc < 0) { V = UndefValue::get(CurTy); // Unknown binop. } else { - Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy); - Constant *RHS = ValueList.getConstantFwdRef(Record[2], CurTy); - unsigned Flags = 0; + uint8_t Flags = 0; if (Record.size() >= 4) { if (Opc == Instruction::Add || Opc == Instruction::Sub || @@ -2647,23 +3072,23 @@ Error BitcodeReader::parseConstants() { Flags |= SDivOperator::IsExact; } } - V = ConstantExpr::get(Opc, LHS, RHS, Flags); + V = BitcodeConstant::create(Alloc, CurTy, {(uint8_t)Opc, Flags}, + {(unsigned)Record[1], (unsigned)Record[2]}); } break; } case bitc::CST_CODE_CE_CAST: { // CE_CAST: [opcode, opty, opval] if (Record.size() < 3) - return error("Invalid record"); + return error("Invalid cast constexpr record"); int Opc = getDecodedCastOpcode(Record[0]); if (Opc < 0) { V = UndefValue::get(CurTy); // Unknown cast. } else { - Type *OpTy = getTypeByID(Record[1]); + unsigned OpTyID = Record[1]; + Type *OpTy = getTypeByID(OpTyID); if (!OpTy) - return error("Invalid record"); - Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy); - V = UpgradeBitCastExpr(Opc, Op, CurTy); - if (!V) V = ConstantExpr::getCast(Opc, Op, CurTy); + return error("Invalid cast constexpr record"); + V = BitcodeConstant::create(Alloc, CurTy, Opc, (unsigned)Record[2]); } break; } @@ -2671,6 +3096,8 @@ Error BitcodeReader::parseConstants() { case bitc::CST_CODE_CE_GEP: // [ty, n x operands] case bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX: { // [ty, flags, n x // operands] + if (Record.size() < 2) + return error("Constant GEP record must have at least two elements"); unsigned OpNum = 0; Type *PointeeType = nullptr; if (BitCode == bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX || @@ -2686,180 +3113,190 @@ Error BitcodeReader::parseConstants() { } else if (BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP) InBounds = true; - SmallVector Elts; - Type *Elt0FullTy = nullptr; + SmallVector Elts; + unsigned BaseTypeID = Record[OpNum]; while (OpNum != Record.size()) { - if (!Elt0FullTy) - Elt0FullTy = getTypeByID(Record[OpNum]); - Type *ElTy = getTypeByID(Record[OpNum++]); + unsigned ElTyID = Record[OpNum++]; + Type *ElTy = getTypeByID(ElTyID); if (!ElTy) - return error("Invalid record"); - Elts.push_back(ValueList.getConstantFwdRef(Record[OpNum++], ElTy)); + return error("Invalid getelementptr constexpr record"); + Elts.push_back(Record[OpNum++]); } if (Elts.size() < 1) return error("Invalid gep with no operands"); - PointerType *OrigPtrTy = cast(Elt0FullTy->getScalarType()); - if (!PointeeType) - PointeeType = OrigPtrTy->getPointerElementType(); - else if (!OrigPtrTy->isOpaqueOrPointeeTypeMatches(PointeeType)) + Type *BaseType = getTypeByID(BaseTypeID); + if (isa(BaseType)) { + BaseTypeID = getContainedTypeID(BaseTypeID, 0); + BaseType = getTypeByID(BaseTypeID); + } + + PointerType *OrigPtrTy = dyn_cast_or_null(BaseType); + if (!OrigPtrTy) + return error("GEP base operand must be pointer or vector of pointer"); + + if (!PointeeType) { + PointeeType = getPtrElementTypeByID(BaseTypeID); + if (!PointeeType) + return error("Missing element type for old-style constant GEP"); + } else if (!OrigPtrTy->isOpaqueOrPointeeTypeMatches(PointeeType)) return error("Explicit gep operator type does not match pointee type " "of pointer operand"); - ArrayRef Indices(Elts.begin() + 1, Elts.end()); - V = ConstantExpr::getGetElementPtr(PointeeType, Elts[0], Indices, - InBounds, InRangeIndex); + V = BitcodeConstant::create(Alloc, CurTy, + {Instruction::GetElementPtr, InBounds, + InRangeIndex.value_or(-1), PointeeType}, + Elts); break; } case bitc::CST_CODE_CE_SELECT: { // CE_SELECT: [opval#, opval#, opval#] if (Record.size() < 3) - return error("Invalid record"); + return error("Invalid select constexpr record"); - DelayedSelectors.push_back( - {CurTy, Record[0], Record[1], Record[2], NextCstNo}); - (void)ValueList.getConstantFwdRef(NextCstNo, CurTy); - ++NextCstNo; - continue; + V = BitcodeConstant::create( + Alloc, CurTy, Instruction::Select, + {(unsigned)Record[0], (unsigned)Record[1], (unsigned)Record[2]}); + break; } case bitc::CST_CODE_CE_EXTRACTELT : { // CE_EXTRACTELT: [opty, opval, opty, opval] if (Record.size() < 3) - return error("Invalid record"); + return error("Invalid extractelement constexpr record"); + unsigned OpTyID = Record[0]; VectorType *OpTy = - dyn_cast_or_null(getTypeByID(Record[0])); + dyn_cast_or_null(getTypeByID(OpTyID)); if (!OpTy) - return error("Invalid record"); - Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); - Constant *Op1 = nullptr; + return error("Invalid extractelement constexpr record"); + unsigned IdxRecord; if (Record.size() == 4) { - Type *IdxTy = getTypeByID(Record[2]); + unsigned IdxTyID = Record[2]; + Type *IdxTy = getTypeByID(IdxTyID); if (!IdxTy) - return error("Invalid record"); - Op1 = ValueList.getConstantFwdRef(Record[3], IdxTy); + return error("Invalid extractelement constexpr record"); + IdxRecord = Record[3]; } else { // Deprecated, but still needed to read old bitcode files. - Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); + IdxRecord = Record[2]; } - if (!Op1) - return error("Invalid record"); - V = ConstantExpr::getExtractElement(Op0, Op1); + V = BitcodeConstant::create(Alloc, CurTy, Instruction::ExtractElement, + {(unsigned)Record[1], IdxRecord}); break; } case bitc::CST_CODE_CE_INSERTELT : { // CE_INSERTELT: [opval, opval, opty, opval] VectorType *OpTy = dyn_cast(CurTy); if (Record.size() < 3 || !OpTy) - return error("Invalid record"); - Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); - Constant *Op1 = ValueList.getConstantFwdRef(Record[1], - OpTy->getElementType()); - Constant *Op2 = nullptr; + return error("Invalid insertelement constexpr record"); + unsigned IdxRecord; if (Record.size() == 4) { - Type *IdxTy = getTypeByID(Record[2]); + unsigned IdxTyID = Record[2]; + Type *IdxTy = getTypeByID(IdxTyID); if (!IdxTy) - return error("Invalid record"); - Op2 = ValueList.getConstantFwdRef(Record[3], IdxTy); + return error("Invalid insertelement constexpr record"); + IdxRecord = Record[3]; } else { // Deprecated, but still needed to read old bitcode files. - Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); + IdxRecord = Record[2]; } - if (!Op2) - return error("Invalid record"); - V = ConstantExpr::getInsertElement(Op0, Op1, Op2); + V = BitcodeConstant::create( + Alloc, CurTy, Instruction::InsertElement, + {(unsigned)Record[0], (unsigned)Record[1], IdxRecord}); break; } case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval] VectorType *OpTy = dyn_cast(CurTy); if (Record.size() < 3 || !OpTy) - return error("Invalid record"); - DelayedShuffles.push_back( - {OpTy, OpTy, Record[0], Record[1], Record[2], NextCstNo}); - ++NextCstNo; - continue; + return error("Invalid shufflevector constexpr record"); + V = BitcodeConstant::create( + Alloc, CurTy, Instruction::ShuffleVector, + {(unsigned)Record[0], (unsigned)Record[1], (unsigned)Record[2]}); + break; } case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval] VectorType *RTy = dyn_cast(CurTy); VectorType *OpTy = dyn_cast_or_null(getTypeByID(Record[0])); if (Record.size() < 4 || !RTy || !OpTy) - return error("Invalid record"); - DelayedShuffles.push_back( - {OpTy, RTy, Record[1], Record[2], Record[3], NextCstNo}); - ++NextCstNo; - continue; + return error("Invalid shufflevector constexpr record"); + V = BitcodeConstant::create( + Alloc, CurTy, Instruction::ShuffleVector, + {(unsigned)Record[1], (unsigned)Record[2], (unsigned)Record[3]}); + break; } case bitc::CST_CODE_CE_CMP: { // CE_CMP: [opty, opval, opval, pred] if (Record.size() < 4) - return error("Invalid record"); - Type *OpTy = getTypeByID(Record[0]); + return error("Invalid cmp constexpt record"); + unsigned OpTyID = Record[0]; + Type *OpTy = getTypeByID(OpTyID); if (!OpTy) - return error("Invalid record"); - Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); - Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); - - if (OpTy->isFPOrFPVectorTy()) - V = ConstantExpr::getFCmp(Record[3], Op0, Op1); - else - V = ConstantExpr::getICmp(Record[3], Op0, Op1); + return error("Invalid cmp constexpr record"); + V = BitcodeConstant::create( + Alloc, CurTy, + {(uint8_t)(OpTy->isFPOrFPVectorTy() ? Instruction::FCmp + : Instruction::ICmp), + (uint8_t)Record[3]}, + {(unsigned)Record[1], (unsigned)Record[2]}); break; } // This maintains backward compatibility, pre-asm dialect keywords. // Deprecated, but still needed to read old bitcode files. case bitc::CST_CODE_INLINEASM_OLD: { if (Record.size() < 2) - return error("Invalid record"); + return error("Invalid inlineasm record"); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[0] & 1; bool IsAlignStack = Record[0] >> 1; unsigned AsmStrSize = Record[1]; if (2+AsmStrSize >= Record.size()) - return error("Invalid record"); + return error("Invalid inlineasm record"); unsigned ConstStrSize = Record[2+AsmStrSize]; if (3+AsmStrSize+ConstStrSize > Record.size()) - return error("Invalid record"); + return error("Invalid inlineasm record"); for (unsigned i = 0; i != AsmStrSize; ++i) AsmStr += (char)Record[2+i]; for (unsigned i = 0; i != ConstStrSize; ++i) ConstrStr += (char)Record[3+AsmStrSize+i]; UpgradeInlineAsmString(&AsmStr); - // FIXME: support upgrading in opaque pointers mode. - V = InlineAsm::get(cast(CurTy->getPointerElementType()), - AsmStr, ConstrStr, HasSideEffects, IsAlignStack); + if (!CurElemTy) + return error("Missing element type for old-style inlineasm"); + V = InlineAsm::get(cast(CurElemTy), AsmStr, ConstrStr, + HasSideEffects, IsAlignStack); break; } // This version adds support for the asm dialect keywords (e.g., // inteldialect). case bitc::CST_CODE_INLINEASM_OLD2: { if (Record.size() < 2) - return error("Invalid record"); + return error("Invalid inlineasm record"); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[0] & 1; bool IsAlignStack = (Record[0] >> 1) & 1; unsigned AsmDialect = Record[0] >> 2; unsigned AsmStrSize = Record[1]; if (2+AsmStrSize >= Record.size()) - return error("Invalid record"); + return error("Invalid inlineasm record"); unsigned ConstStrSize = Record[2+AsmStrSize]; if (3+AsmStrSize+ConstStrSize > Record.size()) - return error("Invalid record"); + return error("Invalid inlineasm record"); for (unsigned i = 0; i != AsmStrSize; ++i) AsmStr += (char)Record[2+i]; for (unsigned i = 0; i != ConstStrSize; ++i) ConstrStr += (char)Record[3+AsmStrSize+i]; UpgradeInlineAsmString(&AsmStr); - // FIXME: support upgrading in opaque pointers mode. - V = InlineAsm::get(cast(CurTy->getPointerElementType()), - AsmStr, ConstrStr, HasSideEffects, IsAlignStack, + if (!CurElemTy) + return error("Missing element type for old-style inlineasm"); + V = InlineAsm::get(cast(CurElemTy), AsmStr, ConstrStr, + HasSideEffects, IsAlignStack, InlineAsm::AsmDialect(AsmDialect)); break; } // This version adds support for the unwind keyword. case bitc::CST_CODE_INLINEASM_OLD3: { if (Record.size() < 2) - return error("Invalid record"); + return error("Invalid inlineasm record"); unsigned OpNum = 0; std::string AsmStr, ConstrStr; bool HasSideEffects = Record[OpNum] & 1; @@ -2870,10 +3307,10 @@ Error BitcodeReader::parseConstants() { unsigned AsmStrSize = Record[OpNum]; ++OpNum; if (OpNum + AsmStrSize >= Record.size()) - return error("Invalid record"); + return error("Invalid inlineasm record"); unsigned ConstStrSize = Record[OpNum + AsmStrSize]; if (OpNum + 1 + AsmStrSize + ConstStrSize > Record.size()) - return error("Invalid record"); + return error("Invalid inlineasm record"); for (unsigned i = 0; i != AsmStrSize; ++i) AsmStr += (char)Record[OpNum + i]; @@ -2881,21 +3318,22 @@ Error BitcodeReader::parseConstants() { for (unsigned i = 0; i != ConstStrSize; ++i) ConstrStr += (char)Record[OpNum + AsmStrSize + i]; UpgradeInlineAsmString(&AsmStr); - // FIXME: support upgrading in opaque pointers mode. - V = InlineAsm::get(cast(CurTy->getPointerElementType()), - AsmStr, ConstrStr, HasSideEffects, IsAlignStack, + if (!CurElemTy) + return error("Missing element type for old-style inlineasm"); + V = InlineAsm::get(cast(CurElemTy), AsmStr, ConstrStr, + HasSideEffects, IsAlignStack, InlineAsm::AsmDialect(AsmDialect), CanThrow); break; } // This version adds explicit function type. case bitc::CST_CODE_INLINEASM: { if (Record.size() < 3) - return error("Invalid record"); + return error("Invalid inlineasm record"); unsigned OpNum = 0; auto *FnTy = dyn_cast_or_null(getTypeByID(Record[OpNum])); ++OpNum; if (!FnTy) - return error("Invalid record"); + return error("Invalid inlineasm record"); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[OpNum] & 1; bool IsAlignStack = (Record[OpNum] >> 1) & 1; @@ -2905,10 +3343,10 @@ Error BitcodeReader::parseConstants() { unsigned AsmStrSize = Record[OpNum]; ++OpNum; if (OpNum + AsmStrSize >= Record.size()) - return error("Invalid record"); + return error("Invalid inlineasm record"); unsigned ConstStrSize = Record[OpNum + AsmStrSize]; if (OpNum + 1 + AsmStrSize + ConstStrSize > Record.size()) - return error("Invalid record"); + return error("Invalid inlineasm record"); for (unsigned i = 0; i != AsmStrSize; ++i) AsmStr += (char)Record[OpNum + i]; @@ -2922,75 +3360,44 @@ Error BitcodeReader::parseConstants() { } case bitc::CST_CODE_BLOCKADDRESS:{ if (Record.size() < 3) - return error("Invalid record"); - Type *FnTy = getTypeByID(Record[0]); + return error("Invalid blockaddress record"); + unsigned FnTyID = Record[0]; + Type *FnTy = getTypeByID(FnTyID); if (!FnTy) - return error("Invalid record"); - Function *Fn = - dyn_cast_or_null(ValueList.getConstantFwdRef(Record[1],FnTy)); - if (!Fn) - return error("Invalid record"); - - // If the function is already parsed we can insert the block address right - // away. - BasicBlock *BB; - unsigned BBID = Record[2]; - if (!BBID) - // Invalid reference to entry block. - return error("Invalid ID"); - if (!Fn->empty()) { - Function::iterator BBI = Fn->begin(), BBE = Fn->end(); - for (size_t I = 0, E = BBID; I != E; ++I) { - if (BBI == BBE) - return error("Invalid ID"); - ++BBI; - } - BB = &*BBI; - } else { - // Otherwise insert a placeholder and remember it so it can be inserted - // when the function is parsed. - auto &FwdBBs = BasicBlockFwdRefs[Fn]; - if (FwdBBs.empty()) - BasicBlockFwdRefQueue.push_back(Fn); - if (FwdBBs.size() < BBID + 1) - FwdBBs.resize(BBID + 1); - if (!FwdBBs[BBID]) - FwdBBs[BBID] = BasicBlock::Create(Context); - BB = FwdBBs[BBID]; - } - V = BlockAddress::get(Fn, BB); + return error("Invalid blockaddress record"); + V = BitcodeConstant::create( + Alloc, CurTy, + {BitcodeConstant::BlockAddressOpcode, 0, (unsigned)Record[2]}, + Record[1]); break; } case bitc::CST_CODE_DSO_LOCAL_EQUIVALENT: { if (Record.size() < 2) - return error("Invalid record"); - Type *GVTy = getTypeByID(Record[0]); + return error("Invalid dso_local record"); + unsigned GVTyID = Record[0]; + Type *GVTy = getTypeByID(GVTyID); if (!GVTy) - return error("Invalid record"); - GlobalValue *GV = dyn_cast_or_null( - ValueList.getConstantFwdRef(Record[1], GVTy)); - if (!GV) - return error("Invalid record"); - - V = DSOLocalEquivalent::get(GV); + return error("Invalid dso_local record"); + V = BitcodeConstant::create( + Alloc, CurTy, BitcodeConstant::DSOLocalEquivalentOpcode, Record[1]); break; } case bitc::CST_CODE_NO_CFI_VALUE: { if (Record.size() < 2) - return error("Invalid record"); - Type *GVTy = getTypeByID(Record[0]); + return error("Invalid no_cfi record"); + unsigned GVTyID = Record[0]; + Type *GVTy = getTypeByID(GVTyID); if (!GVTy) - return error("Invalid record"); - GlobalValue *GV = dyn_cast_or_null( - ValueList.getConstantFwdRef(Record[1], GVTy)); - if (!GV) - return error("Invalid record"); - V = NoCFIValue::get(GV); + return error("Invalid no_cfi record"); + V = BitcodeConstant::create(Alloc, CurTy, BitcodeConstant::NoCFIOpcode, + Record[1]); break; } } - ValueList.assignValue(V, NextCstNo); + assert(V->getType() == getTypeByID(CurTyID) && "Incorrect result type ID"); + if (Error Err = ValueList.assignValue(NextCstNo, V, CurTyID)) + return Err; ++NextCstNo; } } @@ -3146,7 +3553,7 @@ Error BitcodeReader::globalCleanup() { // Some types could be renamed during loading if several modules are // loaded in the same LLVMContext (LTO scenario). In this case we should // remangle intrinsics names as well. - RemangledIntrinsics[&F] = Remangled.getValue(); + RemangledIntrinsics[&F] = *Remangled; // Look for functions that rely on old function attribute behavior. UpgradeFunctionAttributes(F); } @@ -3211,17 +3618,17 @@ Error BitcodeReader::rememberAndSkipFunctionBodies() { } } -bool BitcodeReaderBase::readBlockInfo() { +Error BitcodeReaderBase::readBlockInfo() { Expected> MaybeNewBlockInfo = Stream.ReadBlockInfoBlock(); if (!MaybeNewBlockInfo) - return true; // FIXME Handle the error. + return MaybeNewBlockInfo.takeError(); Optional NewBlockInfo = std::move(MaybeNewBlockInfo.get()); if (!NewBlockInfo) - return true; + return error("Malformed block"); BlockInfo = std::move(*NewBlockInfo); - return false; + return Error::success(); } Error BitcodeReader::parseComdatRecord(ArrayRef Record) { @@ -3238,6 +3645,8 @@ Error BitcodeReader::parseComdatRecord(ArrayRef Record) { if (Record.size() < 2) return error("Invalid record"); unsigned ComdatNameSize = Record[1]; + if (ComdatNameSize > Record.size() - 2) + return error("Comdat name size too large"); OldFormatName.reserve(ComdatNameSize); for (unsigned i = 0; i != ComdatNameSize; ++i) OldFormatName += (char)Record[2 + i]; @@ -3256,6 +3665,19 @@ static void inferDSOLocal(GlobalValue *GV) { GV->setDSOLocal(true); } +GlobalValue::SanitizerMetadata deserializeSanitizerMetadata(unsigned V) { + GlobalValue::SanitizerMetadata Meta; + if (V & (1 << 0)) + Meta.NoAddress = true; + if (V & (1 << 1)) + Meta.NoHWAddress = true; + if (V & (1 << 2)) + Meta.NoMemtag = true; + if (V & (1 << 3)) + Meta.IsDynInit = true; + return Meta; +} + Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { // v1: [pointer type, isconst, initid, linkage, alignment, section, // visibility, threadlocal, unnamed_addr, externally_initialized, @@ -3267,7 +3689,8 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { if (Record.size() < 6) return error("Invalid record"); - Type *Ty = getTypeByID(Record[0]); + unsigned TyID = Record[0]; + Type *Ty = getTypeByID(TyID); if (!Ty) return error("Invalid record"); bool isConstant = Record[1] & 1; @@ -3279,7 +3702,10 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { if (!Ty->isPointerTy()) return error("Invalid type for value"); AddressSpace = cast(Ty)->getAddressSpace(); - Ty = Ty->getPointerElementType(); + TyID = getContainedTypeID(TyID); + Ty = getTypeByID(TyID); + if (!Ty) + return error("Missing element type for old-style global"); } uint64_t RawLinkage = Record[3]; @@ -3325,7 +3751,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { else upgradeDLLImportExportLinkage(NewGV, RawLinkage); - ValueList.push_back(NewGV); + ValueList.push_back(NewGV, getVirtualTypeID(NewGV->getType(), TyID)); // Remember which value to use for the global initializer. if (unsigned InitID = Record[2]) @@ -3355,6 +3781,12 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { if (Record.size() > 15) NewGV->setPartition(StringRef(Strtab.data() + Record[14], Record[15])); + if (Record.size() > 16 && Record[16]) { + llvm::GlobalValue::SanitizerMetadata Meta = + deserializeSanitizerMetadata(Record[16]); + NewGV->setSanitizerMetadata(Meta); + } + return Error::success(); } @@ -3368,11 +3800,16 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { if (Record.size() < 8) return error("Invalid record"); - Type *FTy = getTypeByID(Record[0]); + unsigned FTyID = Record[0]; + Type *FTy = getTypeByID(FTyID); if (!FTy) return error("Invalid record"); - if (auto *PTy = dyn_cast(FTy)) - FTy = PTy->getPointerElementType(); + if (isa(FTy)) { + FTyID = getContainedTypeID(FTyID, 0); + FTy = getTypeByID(FTyID); + if (!FTy) + return error("Missing element type for old-style function"); + } if (!isa(FTy)) return error("Invalid type for value"); @@ -3390,7 +3827,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { assert(Func->getFunctionType() == FTy && "Incorrect fully specified type provided for function"); - FunctionTypes[Func] = cast(FTy); + FunctionTypeIDs[Func] = FTyID; Func->setCallingConv(CC); bool isProto = Record[2]; @@ -3412,8 +3849,11 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { Func->removeParamAttr(i, Kind); - Type *PTy = cast(FTy)->getParamType(i); - Type *PtrEltTy = PTy->getPointerElementType(); + unsigned ParamTypeID = getContainedTypeID(FTyID, i + 1); + Type *PtrEltTy = getPtrElementTypeByID(ParamTypeID); + if (!PtrEltTy) + return error("Missing param element type for attribute upgrade"); + Attribute NewAttr; switch (Kind) { case Attribute::ByVal: @@ -3433,6 +3873,16 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { } } + if (Func->getCallingConv() == CallingConv::X86_INTR && + !Func->arg_empty() && !Func->hasParamAttribute(0, Attribute::ByVal)) { + unsigned ParamTypeID = getContainedTypeID(FTyID, 1); + Type *ByValTy = getPtrElementTypeByID(ParamTypeID); + if (!ByValTy) + return error("Missing param element type for x86_intrcc upgrade"); + Attribute NewAttr = Attribute::getWithByValType(Context, ByValTy); + Func->addParamAttr(0, NewAttr); + } + MaybeAlign Alignment; if (Error Err = parseAlignmentValue(Record[5], Alignment)) return Err; @@ -3495,7 +3945,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { Func->setPartition(StringRef(Strtab.data() + Record[17], Record[18])); } - ValueList.push_back(Func); + ValueList.push_back(Func, getVirtualTypeID(Func->getType(), FTyID)); if (OperandInfo.PersonalityFn || OperandInfo.Prefix || OperandInfo.Prologue) FunctionOperands.push_back(OperandInfo); @@ -3527,7 +3977,8 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord( if (Record.size() < (3 + (unsigned)NewRecord)) return error("Invalid record"); unsigned OpNum = 0; - Type *Ty = getTypeByID(Record[OpNum++]); + unsigned TypeID = Record[OpNum++]; + Type *Ty = getTypeByID(TypeID); if (!Ty) return error("Invalid record"); @@ -3536,8 +3987,11 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord( auto *PTy = dyn_cast(Ty); if (!PTy) return error("Invalid type for value"); - Ty = PTy->getPointerElementType(); AddrSpace = PTy->getAddressSpace(); + TypeID = getContainedTypeID(TypeID); + Ty = getTypeByID(TypeID); + if (!Ty) + return error("Missing element type for old-style indirect symbol"); } else { AddrSpace = Record[OpNum++]; } @@ -3582,7 +4036,7 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord( OpNum += 2; } - ValueList.push_back(NewGA); + ValueList.push_back(NewGA, getVirtualTypeID(NewGA->getType(), TypeID)); IndirectSymbolInits.push_back(std::make_pair(NewGA, Val)); return Error::success(); } @@ -3639,8 +4093,8 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit, return Err; break; case bitc::BLOCKINFO_BLOCK_ID: - if (readBlockInfo()) - return error("Malformed block"); + if (Error Err = readBlockInfo()) + return Err; break; case bitc::PARAMATTR_BLOCK_ID: if (Error Err = parseAttributeBlock()) @@ -3796,7 +4250,10 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit, std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); - TheModule->setDataLayout(S); + Expected MaybeDL = DataLayout::parse(S); + if (!MaybeDL) + return MaybeDL.takeError(); + TheModule->setDataLayout(MaybeDL.get()); break; } case bitc::MODULE_CODE_ASM: { // ASM: [strchr x N] @@ -3894,18 +4351,20 @@ Error BitcodeReader::typeCheckLoadStoreInst(Type *ValType, Type *PtrType) { return Error::success(); } -void BitcodeReader::propagateAttributeTypes(CallBase *CB, - ArrayRef ArgsTys) { +Error BitcodeReader::propagateAttributeTypes(CallBase *CB, + ArrayRef ArgTyIDs) { + AttributeList Attrs = CB->getAttributes(); for (unsigned i = 0; i != CB->arg_size(); ++i) { for (Attribute::AttrKind Kind : {Attribute::ByVal, Attribute::StructRet, Attribute::InAlloca}) { - if (!CB->paramHasAttr(i, Kind) || - CB->getParamAttr(i, Kind).getValueAsType()) + if (!Attrs.hasParamAttr(i, Kind) || + Attrs.getParamAttr(i, Kind).getValueAsType()) continue; - CB->removeParamAttr(i, Kind); + Type *PtrEltTy = getPtrElementTypeByID(ArgTyIDs[i]); + if (!PtrEltTy) + return error("Missing element type for typed attribute upgrade"); - Type *PtrEltTy = ArgsTys[i]->getPointerElementType(); Attribute NewAttr; switch (Kind) { case Attribute::ByVal: @@ -3921,7 +4380,7 @@ void BitcodeReader::propagateAttributeTypes(CallBase *CB, llvm_unreachable("not an upgraded type attribute"); } - CB->addParamAttr(i, NewAttr); + Attrs = Attrs.addParamAttribute(Context, i, NewAttr); } } @@ -3932,10 +4391,13 @@ void BitcodeReader::propagateAttributeTypes(CallBase *CB, if (!CI.hasArg()) continue; - if (CI.isIndirect && !CB->getAttributes().getParamElementType(ArgNo)) { - Type *ElemTy = ArgsTys[ArgNo]->getPointerElementType(); - CB->addParamAttr( - ArgNo, Attribute::get(Context, Attribute::ElementType, ElemTy)); + if (CI.isIndirect && !Attrs.getParamElementType(ArgNo)) { + Type *ElemTy = getPtrElementTypeByID(ArgTyIDs[ArgNo]); + if (!ElemTy) + return error("Missing element type for inline asm upgrade"); + Attrs = Attrs.addParamAttribute( + Context, ArgNo, + Attribute::get(Context, Attribute::ElementType, ElemTy)); } ArgNo++; @@ -3945,15 +4407,41 @@ void BitcodeReader::propagateAttributeTypes(CallBase *CB, switch (CB->getIntrinsicID()) { case Intrinsic::preserve_array_access_index: case Intrinsic::preserve_struct_access_index: - if (!CB->getAttributes().getParamElementType(0)) { - Type *ElTy = ArgsTys[0]->getPointerElementType(); + case Intrinsic::aarch64_ldaxr: + case Intrinsic::aarch64_ldxr: + case Intrinsic::aarch64_stlxr: + case Intrinsic::aarch64_stxr: + case Intrinsic::arm_ldaex: + case Intrinsic::arm_ldrex: + case Intrinsic::arm_stlex: + case Intrinsic::arm_strex: { + unsigned ArgNo; + switch (CB->getIntrinsicID()) { + case Intrinsic::aarch64_stlxr: + case Intrinsic::aarch64_stxr: + case Intrinsic::arm_stlex: + case Intrinsic::arm_strex: + ArgNo = 1; + break; + default: + ArgNo = 0; + break; + } + if (!Attrs.getParamElementType(ArgNo)) { + Type *ElTy = getPtrElementTypeByID(ArgTyIDs[ArgNo]); + if (!ElTy) + return error("Missing element type for elementtype upgrade"); Attribute NewAttr = Attribute::get(Context, Attribute::ElementType, ElTy); - CB->addParamAttr(0, NewAttr); + Attrs = Attrs.addParamAttribute(Context, ArgNo, NewAttr); } break; + } default: break; } + + CB->setAttributes(Attrs); + return Error::success(); } /// Lazily parse the specified function body block. @@ -3970,18 +4458,24 @@ Error BitcodeReader::parseFunctionBody(Function *F) { unsigned ModuleMDLoaderSize = MDLoader->size(); // Add all the function arguments to the value table. -#ifndef NDEBUG unsigned ArgNo = 0; - FunctionType *FTy = FunctionTypes[F]; -#endif + unsigned FTyID = FunctionTypeIDs[F]; for (Argument &I : F->args()) { - assert(I.getType() == FTy->getParamType(ArgNo++) && + unsigned ArgTyID = getContainedTypeID(FTyID, ArgNo + 1); + assert(I.getType() == getTypeByID(ArgTyID) && "Incorrect fully specified type for Function Argument"); - ValueList.push_back(&I); + ValueList.push_back(&I, ArgTyID); + ++ArgNo; } unsigned NextValueNo = ValueList.size(); BasicBlock *CurBB = nullptr; unsigned CurBBNo = 0; + // Block into which constant expressions from phi nodes are materialized. + BasicBlock *PhiConstExprBB = nullptr; + // Edge blocks for phi nodes into which constant expressions have been + // expanded. + SmallMapVector, BasicBlock *, 4> + ConstExprEdgeBBs; DebugLoc LastLoc; auto getLastInstruction = [&]() -> Instruction * { @@ -4050,6 +4544,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { // Read a record. Record.clear(); Instruction *I = nullptr; + unsigned ResTypeID = InvalidTypeID; Expected MaybeBitCode = Stream.readRecord(Entry.ID, Record); if (!MaybeBitCode) return MaybeBitCode.takeError(); @@ -4091,6 +4586,31 @@ Error BitcodeReader::parseFunctionBody(Function *F) { continue; } + case bitc::FUNC_CODE_BLOCKADDR_USERS: // BLOCKADDR_USERS: [vals...] + // The record should not be emitted if it's an empty list. + if (Record.empty()) + return error("Invalid record"); + // When we have the RARE case of a BlockAddress Constant that is not + // scoped to the Function it refers to, we need to conservatively + // materialize the referred to Function, regardless of whether or not + // that Function will ultimately be linked, otherwise users of + // BitcodeReader might start splicing out Function bodies such that we + // might no longer be able to materialize the BlockAddress since the + // BasicBlock (and entire body of the Function) the BlockAddress refers + // to may have been moved. In the case that the user of BitcodeReader + // decides ultimately not to link the Function body, materializing here + // could be considered wasteful, but it's better than a deserialization + // failure as described. This keeps BitcodeReader unaware of complex + // linkage policy decisions such as those use by LTO, leaving those + // decisions "one layer up." + for (uint64_t ValID : Record) + if (auto *F = dyn_cast(ValueList[ValID])) + BackwardRefFunctions.push_back(F); + else + return error("Invalid record"); + + continue; + case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN // This record indicates that the last instruction is at the same // location as the previous instruction with a location. @@ -4133,7 +4653,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_UNOP: { // UNOP: [opval, ty, opcode] unsigned OpNum = 0; Value *LHS; - if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || + unsigned TypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, LHS, TypeID, CurBB) || OpNum+1 > Record.size()) return error("Invalid record"); @@ -4141,6 +4662,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Opc == -1) return error("Invalid record"); I = UnaryOperator::Create((Instruction::UnaryOps)Opc, LHS); + ResTypeID = TypeID; InstructionList.push_back(I); if (OpNum < Record.size()) { if (isa(I)) { @@ -4154,8 +4676,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_BINOP: { // BINOP: [opval, ty, opval, opcode] unsigned OpNum = 0; Value *LHS, *RHS; - if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || - popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) || + unsigned TypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, LHS, TypeID, CurBB) || + popValue(Record, OpNum, NextValueNo, LHS->getType(), TypeID, RHS, + CurBB) || OpNum+1 > Record.size()) return error("Invalid record"); @@ -4163,6 +4687,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Opc == -1) return error("Invalid record"); I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); + ResTypeID = TypeID; InstructionList.push_back(I); if (OpNum < Record.size()) { if (Opc == Instruction::Add || @@ -4191,11 +4716,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_CAST: { // CAST: [opval, opty, destty, castopc] unsigned OpNum = 0; Value *Op; - if (getValueTypePair(Record, OpNum, NextValueNo, Op) || + unsigned OpTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB) || OpNum+2 != Record.size()) return error("Invalid record"); - Type *ResTy = getTypeByID(Record[OpNum]); + ResTypeID = Record[OpNum]; + Type *ResTy = getTypeByID(ResTypeID); int Opc = getDecodedCastOpcode(Record[OpNum + 1]); if (Opc == -1 || !ResTy) return error("Invalid record"); @@ -4220,23 +4747,31 @@ Error BitcodeReader::parseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_GEP: { // GEP: type, [n x operands] unsigned OpNum = 0; + unsigned TyID; Type *Ty; bool InBounds; if (BitCode == bitc::FUNC_CODE_INST_GEP) { InBounds = Record[OpNum++]; - Ty = getTypeByID(Record[OpNum++]); + TyID = Record[OpNum++]; + Ty = getTypeByID(TyID); } else { InBounds = BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP_OLD; + TyID = InvalidTypeID; Ty = nullptr; } Value *BasePtr; - if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr)) + unsigned BasePtrTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr, BasePtrTypeID, + CurBB)) return error("Invalid record"); if (!Ty) { - Ty = BasePtr->getType()->getScalarType()->getPointerElementType(); + TyID = getContainedTypeID(BasePtrTypeID); + if (BasePtr->getType()->isVectorTy()) + TyID = getContainedTypeID(TyID); + Ty = getTypeByID(TyID); } else if (!cast(BasePtr->getType()->getScalarType()) ->isOpaqueOrPointeeTypeMatches(Ty)) { return error( @@ -4246,13 +4781,37 @@ Error BitcodeReader::parseFunctionBody(Function *F) { SmallVector GEPIdx; while (OpNum != Record.size()) { Value *Op; - if (getValueTypePair(Record, OpNum, NextValueNo, Op)) + unsigned OpTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB)) return error("Invalid record"); GEPIdx.push_back(Op); } I = GetElementPtrInst::Create(Ty, BasePtr, GEPIdx); + ResTypeID = TyID; + if (cast(I)->getNumIndices() != 0) { + auto GTI = std::next(gep_type_begin(I)); + for (Value *Idx : drop_begin(cast(I)->indices())) { + unsigned SubType = 0; + if (GTI.isStruct()) { + ConstantInt *IdxC = + Idx->getType()->isVectorTy() + ? cast(cast(Idx)->getSplatValue()) + : cast(Idx); + SubType = IdxC->getZExtValue(); + } + ResTypeID = getContainedTypeID(ResTypeID, SubType); + ++GTI; + } + } + + // At this point ResTypeID is the result element type. We need a pointer + // or vector of pointer to it. + ResTypeID = getVirtualTypeID(I->getType()->getScalarType(), ResTypeID); + if (I->getType()->isVectorTy()) + ResTypeID = getVirtualTypeID(I->getType(), ResTypeID); + InstructionList.push_back(I); if (InBounds) cast(I)->setIsInBounds(true); @@ -4263,7 +4822,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) { // EXTRACTVAL: [opty, opval, n x indices] unsigned OpNum = 0; Value *Agg; - if (getValueTypePair(Record, OpNum, NextValueNo, Agg)) + unsigned AggTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Agg, AggTypeID, CurBB)) return error("Invalid record"); Type *Ty = Agg->getType(); @@ -4272,6 +4832,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { return error("EXTRACTVAL: Invalid instruction with 0 indices"); SmallVector EXTRACTVALIdx; + ResTypeID = AggTypeID; for (; OpNum != RecSize; ++OpNum) { bool IsArray = Ty->isArrayTy(); bool IsStruct = Ty->isStructTy(); @@ -4287,10 +4848,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) { return error("EXTRACTVAL: Invalid array index"); EXTRACTVALIdx.push_back((unsigned)Index); - if (IsStruct) + if (IsStruct) { Ty = Ty->getStructElementType(Index); - else + ResTypeID = getContainedTypeID(ResTypeID, Index); + } else { Ty = Ty->getArrayElementType(); + ResTypeID = getContainedTypeID(ResTypeID); + } } I = ExtractValueInst::Create(Agg, EXTRACTVALIdx); @@ -4302,10 +4866,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) { // INSERTVAL: [opty, opval, opty, opval, n x indices] unsigned OpNum = 0; Value *Agg; - if (getValueTypePair(Record, OpNum, NextValueNo, Agg)) + unsigned AggTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Agg, AggTypeID, CurBB)) return error("Invalid record"); Value *Val; - if (getValueTypePair(Record, OpNum, NextValueNo, Val)) + unsigned ValTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Val, ValTypeID, CurBB)) return error("Invalid record"); unsigned RecSize = Record.size(); @@ -4339,6 +4905,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { return error("Inserted value type doesn't match aggregate type"); I = InsertValueInst::Create(Agg, Val, INSERTVALIdx); + ResTypeID = AggTypeID; InstructionList.push_back(I); break; } @@ -4348,12 +4915,18 @@ Error BitcodeReader::parseFunctionBody(Function *F) { // handles select i1 ... in old bitcode unsigned OpNum = 0; Value *TrueVal, *FalseVal, *Cond; - if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || - popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) || - popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond)) + unsigned TypeID; + Type *CondType = Type::getInt1Ty(Context); + if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal, TypeID, + CurBB) || + popValue(Record, OpNum, NextValueNo, TrueVal->getType(), TypeID, + FalseVal, CurBB) || + popValue(Record, OpNum, NextValueNo, CondType, + getVirtualTypeID(CondType), Cond, CurBB)) return error("Invalid record"); I = SelectInst::Create(Cond, TrueVal, FalseVal); + ResTypeID = TypeID; InstructionList.push_back(I); break; } @@ -4363,9 +4936,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) { // handles select i1 or select [N x i1] unsigned OpNum = 0; Value *TrueVal, *FalseVal, *Cond; - if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || - popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) || - getValueTypePair(Record, OpNum, NextValueNo, Cond)) + unsigned ValTypeID, CondTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal, ValTypeID, + CurBB) || + popValue(Record, OpNum, NextValueNo, TrueVal->getType(), ValTypeID, + FalseVal, CurBB) || + getValueTypePair(Record, OpNum, NextValueNo, Cond, CondTypeID, CurBB)) return error("Invalid record"); // select condition can be either i1 or [N x i1] @@ -4381,6 +4957,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } I = SelectInst::Create(Cond, TrueVal, FalseVal); + ResTypeID = ValTypeID; InstructionList.push_back(I); if (OpNum < Record.size() && isa(I)) { FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]); @@ -4393,12 +4970,14 @@ Error BitcodeReader::parseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval] unsigned OpNum = 0; Value *Vec, *Idx; - if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || - getValueTypePair(Record, OpNum, NextValueNo, Idx)) + unsigned VecTypeID, IdxTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Vec, VecTypeID, CurBB) || + getValueTypePair(Record, OpNum, NextValueNo, Idx, IdxTypeID, CurBB)) return error("Invalid record"); if (!Vec->getType()->isVectorTy()) return error("Invalid type for value"); I = ExtractElementInst::Create(Vec, Idx); + ResTypeID = getContainedTypeID(VecTypeID); InstructionList.push_back(I); break; } @@ -4406,15 +4985,18 @@ Error BitcodeReader::parseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval] unsigned OpNum = 0; Value *Vec, *Elt, *Idx; - if (getValueTypePair(Record, OpNum, NextValueNo, Vec)) + unsigned VecTypeID, IdxTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Vec, VecTypeID, CurBB)) return error("Invalid record"); if (!Vec->getType()->isVectorTy()) return error("Invalid type for value"); if (popValue(Record, OpNum, NextValueNo, - cast(Vec->getType())->getElementType(), Elt) || - getValueTypePair(Record, OpNum, NextValueNo, Idx)) + cast(Vec->getType())->getElementType(), + getContainedTypeID(VecTypeID), Elt, CurBB) || + getValueTypePair(Record, OpNum, NextValueNo, Idx, IdxTypeID, CurBB)) return error("Invalid record"); I = InsertElementInst::Create(Vec, Elt, Idx); + ResTypeID = VecTypeID; InstructionList.push_back(I); break; } @@ -4422,16 +5004,22 @@ Error BitcodeReader::parseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval] unsigned OpNum = 0; Value *Vec1, *Vec2, *Mask; - if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) || - popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2)) + unsigned Vec1TypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Vec1, Vec1TypeID, + CurBB) || + popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec1TypeID, + Vec2, CurBB)) return error("Invalid record"); - if (getValueTypePair(Record, OpNum, NextValueNo, Mask)) + unsigned MaskTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Mask, MaskTypeID, CurBB)) return error("Invalid record"); if (!Vec1->getType()->isVectorTy() || !Vec2->getType()->isVectorTy()) return error("Invalid type for value"); I = new ShuffleVectorInst(Vec1, Vec2, Mask); + ResTypeID = + getVirtualTypeID(I->getType(), getContainedTypeID(Vec1TypeID)); InstructionList.push_back(I); break; } @@ -4445,8 +5033,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) { unsigned OpNum = 0; Value *LHS, *RHS; - if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || - popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS)) + unsigned LHSTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, LHS, LHSTypeID, CurBB) || + popValue(Record, OpNum, NextValueNo, LHS->getType(), LHSTypeID, RHS, + CurBB)) return error("Invalid record"); if (OpNum >= Record.size()) @@ -4467,6 +5057,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) { else I = new ICmpInst((ICmpInst::Predicate)PredVal, LHS, RHS); + ResTypeID = getVirtualTypeID(I->getType()->getScalarType()); + if (LHS->getType()->isVectorTy()) + ResTypeID = getVirtualTypeID(I->getType(), ResTypeID); + if (FMF.any()) I->setFastMathFlags(FMF); InstructionList.push_back(I); @@ -4484,7 +5078,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Op = nullptr; - if (getValueTypePair(Record, OpNum, NextValueNo, Op)) + unsigned OpTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB)) return error("Invalid record"); if (OpNum != Record.size()) return error("Invalid record"); @@ -4506,8 +5101,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } else { BasicBlock *FalseDest = getBasicBlock(Record[1]); - Value *Cond = getValue(Record, 2, NextValueNo, - Type::getInt1Ty(Context)); + Type *CondType = Type::getInt1Ty(Context); + Value *Cond = getValue(Record, 2, NextValueNo, CondType, + getVirtualTypeID(CondType), CurBB); if (!FalseDest || !Cond) return error("Invalid record"); I = BranchInst::Create(TrueDest, FalseDest, Cond); @@ -4519,8 +5115,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Record.size() != 1 && Record.size() != 2) return error("Invalid record"); unsigned Idx = 0; - Value *CleanupPad = - getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); + Type *TokenTy = Type::getTokenTy(Context); + Value *CleanupPad = getValue(Record, Idx++, NextValueNo, TokenTy, + getVirtualTypeID(TokenTy), CurBB); if (!CleanupPad) return error("Invalid record"); BasicBlock *UnwindDest = nullptr; @@ -4538,8 +5135,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Record.size() != 2) return error("Invalid record"); unsigned Idx = 0; - Value *CatchPad = - getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); + Type *TokenTy = Type::getTokenTy(Context); + Value *CatchPad = getValue(Record, Idx++, NextValueNo, TokenTy, + getVirtualTypeID(TokenTy), CurBB); if (!CatchPad) return error("Invalid record"); BasicBlock *BB = getBasicBlock(Record[Idx++]); @@ -4557,8 +5155,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) { unsigned Idx = 0; - Value *ParentPad = - getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); + Type *TokenTy = Type::getTokenTy(Context); + Value *ParentPad = getValue(Record, Idx++, NextValueNo, TokenTy, + getVirtualTypeID(TokenTy), CurBB); unsigned NumHandlers = Record[Idx++]; @@ -4585,6 +5184,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { for (BasicBlock *Handler : Handlers) CatchSwitch->addHandler(Handler); I = CatchSwitch; + ResTypeID = getVirtualTypeID(I->getType()); InstructionList.push_back(I); break; } @@ -4596,15 +5196,17 @@ Error BitcodeReader::parseFunctionBody(Function *F) { unsigned Idx = 0; - Value *ParentPad = - getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); + Type *TokenTy = Type::getTokenTy(Context); + Value *ParentPad = getValue(Record, Idx++, NextValueNo, TokenTy, + getVirtualTypeID(TokenTy), CurBB); unsigned NumArgOperands = Record[Idx++]; SmallVector Args; for (unsigned Op = 0; Op != NumArgOperands; ++Op) { Value *Val; - if (getValueTypePair(Record, Idx, NextValueNo, Val)) + unsigned ValTypeID; + if (getValueTypePair(Record, Idx, NextValueNo, Val, ValTypeID, nullptr)) return error("Invalid record"); Args.push_back(Val); } @@ -4616,6 +5218,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { I = CleanupPadInst::Create(ParentPad, Args); else I = CatchPadInst::Create(ParentPad, Args); + ResTypeID = getVirtualTypeID(I->getType()); InstructionList.push_back(I); break; } @@ -4627,10 +5230,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) { // Hopefully someday we will have support for case ranges and can use // this format again. - Type *OpTy = getTypeByID(Record[1]); + unsigned OpTyID = Record[1]; + Type *OpTy = getTypeByID(OpTyID); unsigned ValueBitWidth = cast(OpTy)->getBitWidth(); - Value *Cond = getValue(Record, 2, NextValueNo, OpTy); + Value *Cond = getValue(Record, 2, NextValueNo, OpTy, OpTyID, CurBB); BasicBlock *Default = getBasicBlock(Record[3]); if (!OpTy || !Cond || !Default) return error("Invalid record"); @@ -4684,8 +5288,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Record.size() < 3 || (Record.size() & 1) == 0) return error("Invalid record"); - Type *OpTy = getTypeByID(Record[0]); - Value *Cond = getValue(Record, 1, NextValueNo, OpTy); + unsigned OpTyID = Record[0]; + Type *OpTy = getTypeByID(OpTyID); + Value *Cond = getValue(Record, 1, NextValueNo, OpTy, OpTyID, CurBB); BasicBlock *Default = getBasicBlock(Record[2]); if (!OpTy || !Cond || !Default) return error("Invalid record"); @@ -4693,8 +5298,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) { SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases); InstructionList.push_back(SI); for (unsigned i = 0, e = NumCases; i != e; ++i) { - ConstantInt *CaseVal = - dyn_cast_or_null(getFnValueByID(Record[3+i*2], OpTy)); + ConstantInt *CaseVal = dyn_cast_or_null( + getFnValueByID(Record[3+i*2], OpTy, OpTyID, nullptr)); BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]); if (!CaseVal || !DestBB) { delete SI; @@ -4708,8 +5313,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...] if (Record.size() < 2) return error("Invalid record"); - Type *OpTy = getTypeByID(Record[0]); - Value *Address = getValue(Record, 1, NextValueNo, OpTy); + unsigned OpTyID = Record[0]; + Type *OpTy = getTypeByID(OpTyID); + Value *Address = getValue(Record, 1, NextValueNo, OpTy, OpTyID, CurBB); if (!OpTy || !Address) return error("Invalid record"); unsigned NumDests = Record.size()-2; @@ -4737,23 +5343,27 @@ Error BitcodeReader::parseFunctionBody(Function *F) { BasicBlock *NormalBB = getBasicBlock(Record[OpNum++]); BasicBlock *UnwindBB = getBasicBlock(Record[OpNum++]); + unsigned FTyID = InvalidTypeID; FunctionType *FTy = nullptr; if ((CCInfo >> 13) & 1) { - FTy = dyn_cast(getTypeByID(Record[OpNum++])); + FTyID = Record[OpNum++]; + FTy = dyn_cast(getTypeByID(FTyID)); if (!FTy) return error("Explicit invoke type is not a function type"); } Value *Callee; - if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) + unsigned CalleeTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Callee, CalleeTypeID, + CurBB)) return error("Invalid record"); PointerType *CalleeTy = dyn_cast(Callee->getType()); if (!CalleeTy) return error("Callee is not a pointer"); if (!FTy) { - FTy = - dyn_cast(Callee->getType()->getPointerElementType()); + FTyID = getContainedTypeID(CalleeTypeID); + FTy = dyn_cast_or_null(getTypeByID(FTyID)); if (!FTy) return error("Callee is not of pointer to function type"); } else if (!CalleeTy->isOpaqueOrPointeeTypeMatches(FTy)) @@ -4763,11 +5373,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) { return error("Insufficient operands to call"); SmallVector Ops; - SmallVector ArgsTys; + SmallVector ArgTyIDs; for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { - Ops.push_back(getValue(Record, OpNum, NextValueNo, - FTy->getParamType(i))); - ArgsTys.push_back(FTy->getParamType(i)); + unsigned ArgTyID = getContainedTypeID(FTyID, i + 1); + Ops.push_back(getValue(Record, OpNum, NextValueNo, FTy->getParamType(i), + ArgTyID, CurBB)); + ArgTyIDs.push_back(ArgTyID); if (!Ops.back()) return error("Invalid record"); } @@ -4779,28 +5390,38 @@ Error BitcodeReader::parseFunctionBody(Function *F) { // Read type/value pairs for varargs params. while (OpNum != Record.size()) { Value *Op; - if (getValueTypePair(Record, OpNum, NextValueNo, Op)) + unsigned OpTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB)) return error("Invalid record"); Ops.push_back(Op); - ArgsTys.push_back(Op->getType()); + ArgTyIDs.push_back(OpTypeID); } } + // Upgrade the bundles if needed. + if (!OperandBundles.empty()) + UpgradeOperandBundles(OperandBundles); + I = InvokeInst::Create(FTy, Callee, NormalBB, UnwindBB, Ops, OperandBundles); + ResTypeID = getContainedTypeID(FTyID); OperandBundles.clear(); InstructionList.push_back(I); cast(I)->setCallingConv( static_cast(CallingConv::MaxID & CCInfo)); cast(I)->setAttributes(PAL); - propagateAttributeTypes(cast(I), ArgsTys); + if (Error Err = propagateAttributeTypes(cast(I), ArgTyIDs)) { + I->deleteValue(); + return Err; + } break; } case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval] unsigned Idx = 0; Value *Val = nullptr; - if (getValueTypePair(Record, Idx, NextValueNo, Val)) + unsigned ValTypeID; + if (getValueTypePair(Record, Idx, NextValueNo, Val, ValTypeID, CurBB)) return error("Invalid record"); I = ResumeInst::Create(Val); InstructionList.push_back(I); @@ -4818,23 +5439,27 @@ Error BitcodeReader::parseFunctionBody(Function *F) { for (unsigned i = 0, e = NumIndirectDests; i != e; ++i) IndirectDests.push_back(getBasicBlock(Record[OpNum++])); + unsigned FTyID = InvalidTypeID; FunctionType *FTy = nullptr; if ((CCInfo >> bitc::CALL_EXPLICIT_TYPE) & 1) { - FTy = dyn_cast(getTypeByID(Record[OpNum++])); + FTyID = Record[OpNum++]; + FTy = dyn_cast_or_null(getTypeByID(FTyID)); if (!FTy) return error("Explicit call type is not a function type"); } Value *Callee; - if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) + unsigned CalleeTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Callee, CalleeTypeID, + CurBB)) return error("Invalid record"); PointerType *OpTy = dyn_cast(Callee->getType()); if (!OpTy) return error("Callee is not a pointer type"); if (!FTy) { - FTy = - dyn_cast(Callee->getType()->getPointerElementType()); + FTyID = getContainedTypeID(CalleeTypeID); + FTy = dyn_cast_or_null(getTypeByID(FTyID)); if (!FTy) return error("Callee is not of pointer to function type"); } else if (!OpTy->isOpaqueOrPointeeTypeMatches(FTy)) @@ -4844,18 +5469,20 @@ Error BitcodeReader::parseFunctionBody(Function *F) { return error("Insufficient operands to call"); SmallVector Args; - SmallVector ArgsTys; + SmallVector ArgTyIDs; // Read the fixed params. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { Value *Arg; + unsigned ArgTyID = getContainedTypeID(FTyID, i + 1); if (FTy->getParamType(i)->isLabelTy()) Arg = getBasicBlock(Record[OpNum]); else - Arg = getValue(Record, OpNum, NextValueNo, FTy->getParamType(i)); + Arg = getValue(Record, OpNum, NextValueNo, FTy->getParamType(i), + ArgTyID, CurBB); if (!Arg) return error("Invalid record"); Args.push_back(Arg); - ArgsTys.push_back(Arg->getType()); + ArgTyIDs.push_back(ArgTyID); } // Read type/value pairs for varargs params. @@ -4865,21 +5492,30 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } else { while (OpNum != Record.size()) { Value *Op; - if (getValueTypePair(Record, OpNum, NextValueNo, Op)) + unsigned OpTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB)) return error("Invalid record"); Args.push_back(Op); - ArgsTys.push_back(Op->getType()); + ArgTyIDs.push_back(OpTypeID); } } + // Upgrade the bundles if needed. + if (!OperandBundles.empty()) + UpgradeOperandBundles(OperandBundles); + I = CallBrInst::Create(FTy, Callee, DefaultDest, IndirectDests, Args, OperandBundles); + ResTypeID = getContainedTypeID(FTyID); OperandBundles.clear(); InstructionList.push_back(I); cast(I)->setCallingConv( static_cast((0x7ff & CCInfo) >> bitc::CALL_CCONV)); cast(I)->setAttributes(PAL); - propagateAttributeTypes(cast(I), ArgsTys); + if (Error Err = propagateAttributeTypes(cast(I), ArgTyIDs)) { + I->deleteValue(); + return Err; + } break; } case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE @@ -4888,36 +5524,76 @@ Error BitcodeReader::parseFunctionBody(Function *F) { break; case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...] if (Record.empty()) - return error("Invalid record"); + return error("Invalid phi record"); // The first record specifies the type. - Type *Ty = getTypeByID(Record[0]); + unsigned TyID = Record[0]; + Type *Ty = getTypeByID(TyID); if (!Ty) - return error("Invalid record"); + return error("Invalid phi record"); // Phi arguments are pairs of records of [value, basic block]. // There is an optional final record for fast-math-flags if this phi has a // floating-point type. size_t NumArgs = (Record.size() - 1) / 2; PHINode *PN = PHINode::Create(Ty, NumArgs); - if ((Record.size() - 1) % 2 == 1 && !isa(PN)) - return error("Invalid record"); + if ((Record.size() - 1) % 2 == 1 && !isa(PN)) { + PN->deleteValue(); + return error("Invalid phi record"); + } InstructionList.push_back(PN); + SmallDenseMap Args; for (unsigned i = 0; i != NumArgs; i++) { - Value *V; + BasicBlock *BB = getBasicBlock(Record[i * 2 + 2]); + if (!BB) { + PN->deleteValue(); + return error("Invalid phi BB"); + } + + // Phi nodes may contain the same predecessor multiple times, in which + // case the incoming value must be identical. Directly reuse the already + // seen value here, to avoid expanding a constant expression multiple + // times. + auto It = Args.find(BB); + if (It != Args.end()) { + PN->addIncoming(It->second, BB); + continue; + } + + // If there already is a block for this edge (from a different phi), + // use it. + BasicBlock *EdgeBB = ConstExprEdgeBBs.lookup({BB, CurBB}); + if (!EdgeBB) { + // Otherwise, use a temporary block (that we will discard if it + // turns out to be unnecessary). + if (!PhiConstExprBB) + PhiConstExprBB = BasicBlock::Create(Context, "phi.constexpr", F); + EdgeBB = PhiConstExprBB; + } + // With the new function encoding, it is possible that operands have // negative IDs (for forward references). Use a signed VBR // representation to keep the encoding small. + Value *V; if (UseRelativeIDs) - V = getValueSigned(Record, i * 2 + 1, NextValueNo, Ty); + V = getValueSigned(Record, i * 2 + 1, NextValueNo, Ty, TyID, EdgeBB); else - V = getValue(Record, i * 2 + 1, NextValueNo, Ty); - BasicBlock *BB = getBasicBlock(Record[i * 2 + 2]); - if (!V || !BB) - return error("Invalid record"); + V = getValue(Record, i * 2 + 1, NextValueNo, Ty, TyID, EdgeBB); + if (!V) { + PN->deleteValue(); + PhiConstExprBB->eraseFromParent(); + return error("Invalid phi record"); + } + + if (EdgeBB == PhiConstExprBB && !EdgeBB->empty()) { + ConstExprEdgeBBs.insert({{BB, CurBB}, EdgeBB}); + PhiConstExprBB = nullptr; + } PN->addIncoming(V, BB); + Args.insert({BB, V}); } I = PN; + ResTypeID = TyID; // If there are an even number of records, the final record must be FMF. if (Record.size() % 2 == 0) { @@ -4942,12 +5618,15 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Record.size() < 4) return error("Invalid record"); } - Type *Ty = getTypeByID(Record[Idx++]); + ResTypeID = Record[Idx++]; + Type *Ty = getTypeByID(ResTypeID); if (!Ty) return error("Invalid record"); if (BitCode == bitc::FUNC_CODE_INST_LANDINGPAD_OLD) { Value *PersFn = nullptr; - if (getValueTypePair(Record, Idx, NextValueNo, PersFn)) + unsigned PersFnTypeID; + if (getValueTypePair(Record, Idx, NextValueNo, PersFn, PersFnTypeID, + nullptr)) return error("Invalid record"); if (!F->hasPersonalityFn()) @@ -4964,8 +5643,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) { LandingPadInst::ClauseType CT = LandingPadInst::ClauseType(Record[Idx++]); (void)CT; Value *Val; + unsigned ValTypeID; - if (getValueTypePair(Record, Idx, NextValueNo, Val)) { + if (getValueTypePair(Record, Idx, NextValueNo, Val, ValTypeID, + nullptr)) { delete LP; return error("Invalid record"); } @@ -4985,21 +5666,23 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align] - if (Record.size() != 4) + if (Record.size() != 4 && Record.size() != 5) return error("Invalid record"); using APV = AllocaPackedValues; const uint64_t Rec = Record[3]; const bool InAlloca = Bitfield::get(Rec); const bool SwiftError = Bitfield::get(Rec); - Type *Ty = getTypeByID(Record[0]); + unsigned TyID = Record[0]; + Type *Ty = getTypeByID(TyID); if (!Bitfield::get(Rec)) { - auto *PTy = dyn_cast_or_null(Ty); - if (!PTy) - return error("Old-style alloca with a non-pointer type"); - Ty = PTy->getPointerElementType(); + TyID = getContainedTypeID(TyID); + Ty = getTypeByID(TyID); + if (!Ty) + return error("Missing element type for old-style alloca"); } - Type *OpTy = getTypeByID(Record[1]); - Value *Size = getFnValueByID(Record[2], OpTy); + unsigned OpTyID = Record[1]; + Type *OpTy = getTypeByID(OpTyID); + Value *Size = getFnValueByID(Record[2], OpTy, OpTyID, CurBB); MaybeAlign Align; uint64_t AlignExp = Bitfield::get(Rec) | @@ -5010,9 +5693,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (!Ty || !Size) return error("Invalid record"); - // FIXME: Make this an optional field. const DataLayout &DL = TheModule->getDataLayout(); - unsigned AS = DL.getAllocaAddrSpace(); + unsigned AS = Record.size() == 5 ? Record[4] : DL.getAllocaAddrSpace(); SmallPtrSet Visited; if (!Align && !Ty->isSized(&Visited)) @@ -5024,13 +5706,15 @@ Error BitcodeReader::parseFunctionBody(Function *F) { AI->setUsedWithInAlloca(InAlloca); AI->setSwiftError(SwiftError); I = AI; + ResTypeID = getVirtualTypeID(AI->getType(), TyID); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol] unsigned OpNum = 0; Value *Op; - if (getValueTypePair(Record, OpNum, NextValueNo, Op) || + unsigned OpTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB) || (OpNum + 2 != Record.size() && OpNum + 3 != Record.size())) return error("Invalid record"); @@ -5039,9 +5723,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) { Type *Ty = nullptr; if (OpNum + 3 == Record.size()) { - Ty = getTypeByID(Record[OpNum++]); + ResTypeID = Record[OpNum++]; + Ty = getTypeByID(ResTypeID); } else { - Ty = Op->getType()->getPointerElementType(); + ResTypeID = getContainedTypeID(OpTypeID); + Ty = getTypeByID(ResTypeID); + if (!Ty) + return error("Missing element type for old-style load"); } if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType())) @@ -5063,7 +5751,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) { // LOADATOMIC: [opty, op, align, vol, ordering, ssid] unsigned OpNum = 0; Value *Op; - if (getValueTypePair(Record, OpNum, NextValueNo, Op) || + unsigned OpTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB) || (OpNum + 4 != Record.size() && OpNum + 5 != Record.size())) return error("Invalid record"); @@ -5072,9 +5761,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) { Type *Ty = nullptr; if (OpNum + 5 == Record.size()) { - Ty = getTypeByID(Record[OpNum++]); + ResTypeID = Record[OpNum++]; + Ty = getTypeByID(ResTypeID); } else { - Ty = Op->getType()->getPointerElementType(); + ResTypeID = getContainedTypeID(OpTypeID); + Ty = getTypeByID(ResTypeID); + if (!Ty) + return error("Missing element type for old style atomic load"); } if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType())) @@ -5102,12 +5795,21 @@ Error BitcodeReader::parseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_STORE_OLD: { // STORE2:[ptrty, ptr, val, align, vol] unsigned OpNum = 0; Value *Val, *Ptr; - if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || - (BitCode == bitc::FUNC_CODE_INST_STORE - ? getValueTypePair(Record, OpNum, NextValueNo, Val) - : popValue(Record, OpNum, NextValueNo, - Ptr->getType()->getPointerElementType(), Val)) || - OpNum + 2 != Record.size()) + unsigned PtrTypeID, ValTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, PtrTypeID, CurBB)) + return error("Invalid record"); + + if (BitCode == bitc::FUNC_CODE_INST_STORE) { + if (getValueTypePair(Record, OpNum, NextValueNo, Val, ValTypeID, CurBB)) + return error("Invalid record"); + } else { + ValTypeID = getContainedTypeID(PtrTypeID); + if (popValue(Record, OpNum, NextValueNo, getTypeByID(ValTypeID), + ValTypeID, Val, CurBB)) + return error("Invalid record"); + } + + if (OpNum + 2 != Record.size()) return error("Invalid record"); if (Error Err = typeCheckLoadStoreInst(Val->getType(), Ptr->getType())) @@ -5129,13 +5831,21 @@ Error BitcodeReader::parseFunctionBody(Function *F) { // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, ssid] unsigned OpNum = 0; Value *Val, *Ptr; - if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || - !isa(Ptr->getType()) || - (BitCode == bitc::FUNC_CODE_INST_STOREATOMIC - ? getValueTypePair(Record, OpNum, NextValueNo, Val) - : popValue(Record, OpNum, NextValueNo, - Ptr->getType()->getPointerElementType(), Val)) || - OpNum + 4 != Record.size()) + unsigned PtrTypeID, ValTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, PtrTypeID, CurBB) || + !isa(Ptr->getType())) + return error("Invalid record"); + if (BitCode == bitc::FUNC_CODE_INST_STOREATOMIC) { + if (getValueTypePair(Record, OpNum, NextValueNo, Val, ValTypeID, CurBB)) + return error("Invalid record"); + } else { + ValTypeID = getContainedTypeID(PtrTypeID); + if (popValue(Record, OpNum, NextValueNo, getTypeByID(ValTypeID), + ValTypeID, Val, CurBB)) + return error("Invalid record"); + } + + if (OpNum + 4 != Record.size()) return error("Invalid record"); if (Error Err = typeCheckLoadStoreInst(Val->getType(), Ptr->getType())) @@ -5164,20 +5874,22 @@ Error BitcodeReader::parseFunctionBody(Function *F) { const size_t NumRecords = Record.size(); unsigned OpNum = 0; Value *Ptr = nullptr; - if (getValueTypePair(Record, OpNum, NextValueNo, Ptr)) + unsigned PtrTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, PtrTypeID, CurBB)) return error("Invalid record"); if (!isa(Ptr->getType())) return error("Cmpxchg operand is not a pointer type"); Value *Cmp = nullptr; - if (popValue(Record, OpNum, NextValueNo, - cast(Ptr->getType())->getPointerElementType(), - Cmp)) + unsigned CmpTypeID = getContainedTypeID(PtrTypeID); + if (popValue(Record, OpNum, NextValueNo, getTypeByID(CmpTypeID), + CmpTypeID, Cmp, CurBB)) return error("Invalid record"); Value *New = nullptr; - if (popValue(Record, OpNum, NextValueNo, Cmp->getType(), New) || + if (popValue(Record, OpNum, NextValueNo, Cmp->getType(), CmpTypeID, + New, CurBB) || NumRecords < OpNum + 3 || NumRecords > OpNum + 5) return error("Invalid record"); @@ -5214,8 +5926,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) { // expecting the first component of a modern cmpxchg. CurBB->getInstList().push_back(I); I = ExtractValueInst::Create(I, 0); + ResTypeID = CmpTypeID; } else { cast(I)->setWeak(Record[OpNum + 4]); + unsigned I1TypeID = getVirtualTypeID(Type::getInt1Ty(Context)); + ResTypeID = getVirtualTypeID(I->getType(), {CmpTypeID, I1TypeID}); } InstructionList.push_back(I); @@ -5227,18 +5942,21 @@ Error BitcodeReader::parseFunctionBody(Function *F) { const size_t NumRecords = Record.size(); unsigned OpNum = 0; Value *Ptr = nullptr; - if (getValueTypePair(Record, OpNum, NextValueNo, Ptr)) + unsigned PtrTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, PtrTypeID, CurBB)) return error("Invalid record"); if (!isa(Ptr->getType())) return error("Cmpxchg operand is not a pointer type"); Value *Cmp = nullptr; - if (getValueTypePair(Record, OpNum, NextValueNo, Cmp)) + unsigned CmpTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Cmp, CmpTypeID, CurBB)) return error("Invalid record"); Value *Val = nullptr; - if (popValue(Record, OpNum, NextValueNo, Cmp->getType(), Val)) + if (popValue(Record, OpNum, NextValueNo, Cmp->getType(), CmpTypeID, Val, + CurBB)) return error("Invalid record"); if (NumRecords < OpNum + 3 || NumRecords > OpNum + 6) @@ -5278,6 +5996,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) { cast(I)->setVolatile(IsVol); cast(I)->setWeak(IsWeak); + unsigned I1TypeID = getVirtualTypeID(Type::getInt1Ty(Context)); + ResTypeID = getVirtualTypeID(I->getType(), {CmpTypeID, I1TypeID}); + InstructionList.push_back(I); break; } @@ -5289,20 +6010,22 @@ Error BitcodeReader::parseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Ptr = nullptr; - if (getValueTypePair(Record, OpNum, NextValueNo, Ptr)) + unsigned PtrTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, PtrTypeID, CurBB)) return error("Invalid record"); if (!isa(Ptr->getType())) return error("Invalid record"); Value *Val = nullptr; + unsigned ValTypeID = InvalidTypeID; if (BitCode == bitc::FUNC_CODE_INST_ATOMICRMW_OLD) { + ValTypeID = getContainedTypeID(PtrTypeID); if (popValue(Record, OpNum, NextValueNo, - cast(Ptr->getType())->getPointerElementType(), - Val)) + getTypeByID(ValTypeID), ValTypeID, Val, CurBB)) return error("Invalid record"); } else { - if (getValueTypePair(Record, OpNum, NextValueNo, Val)) + if (getValueTypePair(Record, OpNum, NextValueNo, Val, ValTypeID, CurBB)) return error("Invalid record"); } @@ -5336,6 +6059,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { Align(TheModule->getDataLayout().getTypeStoreSize(Val->getType())); I = new AtomicRMWInst(Operation, Ptr, Val, *Alignment, Ordering, SSID); + ResTypeID = ValTypeID; cast(I)->setVolatile(IsVol); InstructionList.push_back(I); @@ -5370,23 +6094,27 @@ Error BitcodeReader::parseFunctionBody(Function *F) { return error("Fast math flags indicator set for call with no FMF"); } + unsigned FTyID = InvalidTypeID; FunctionType *FTy = nullptr; if ((CCInfo >> bitc::CALL_EXPLICIT_TYPE) & 1) { - FTy = dyn_cast(getTypeByID(Record[OpNum++])); + FTyID = Record[OpNum++]; + FTy = dyn_cast_or_null(getTypeByID(FTyID)); if (!FTy) return error("Explicit call type is not a function type"); } Value *Callee; - if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) + unsigned CalleeTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Callee, CalleeTypeID, + CurBB)) return error("Invalid record"); PointerType *OpTy = dyn_cast(Callee->getType()); if (!OpTy) return error("Callee is not a pointer type"); if (!FTy) { - FTy = - dyn_cast(Callee->getType()->getPointerElementType()); + FTyID = getContainedTypeID(CalleeTypeID); + FTy = dyn_cast_or_null(getTypeByID(FTyID)); if (!FTy) return error("Callee is not of pointer to function type"); } else if (!OpTy->isOpaqueOrPointeeTypeMatches(FTy)) @@ -5396,15 +6124,16 @@ Error BitcodeReader::parseFunctionBody(Function *F) { return error("Insufficient operands to call"); SmallVector Args; - SmallVector ArgsTys; + SmallVector ArgTyIDs; // Read the fixed params. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { + unsigned ArgTyID = getContainedTypeID(FTyID, i + 1); if (FTy->getParamType(i)->isLabelTy()) Args.push_back(getBasicBlock(Record[OpNum])); else Args.push_back(getValue(Record, OpNum, NextValueNo, - FTy->getParamType(i))); - ArgsTys.push_back(FTy->getParamType(i)); + FTy->getParamType(i), ArgTyID, CurBB)); + ArgTyIDs.push_back(ArgTyID); if (!Args.back()) return error("Invalid record"); } @@ -5416,14 +6145,20 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } else { while (OpNum != Record.size()) { Value *Op; - if (getValueTypePair(Record, OpNum, NextValueNo, Op)) + unsigned OpTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB)) return error("Invalid record"); Args.push_back(Op); - ArgsTys.push_back(Op->getType()); + ArgTyIDs.push_back(OpTypeID); } } + // Upgrade the bundles if needed. + if (!OperandBundles.empty()) + UpgradeOperandBundles(OperandBundles); + I = CallInst::Create(FTy, Callee, Args, OperandBundles); + ResTypeID = getContainedTypeID(FTyID); OperandBundles.clear(); InstructionList.push_back(I); cast(I)->setCallingConv( @@ -5437,7 +6172,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) { TCK = CallInst::TCK_NoTail; cast(I)->setTailCallKind(TCK); cast(I)->setAttributes(PAL); - propagateAttributeTypes(cast(I), ArgsTys); + if (Error Err = propagateAttributeTypes(cast(I), ArgTyIDs)) { + I->deleteValue(); + return Err; + } if (FMF.any()) { if (!isa(I)) return error("Fast-math-flags specified for call without " @@ -5449,9 +6187,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty] if (Record.size() < 3) return error("Invalid record"); - Type *OpTy = getTypeByID(Record[0]); - Value *Op = getValue(Record, 1, NextValueNo, OpTy); - Type *ResTy = getTypeByID(Record[2]); + unsigned OpTyID = Record[0]; + Type *OpTy = getTypeByID(OpTyID); + Value *Op = getValue(Record, 1, NextValueNo, OpTy, OpTyID, CurBB); + ResTypeID = Record[2]; + Type *ResTy = getTypeByID(ResTypeID); if (!OpTy || !Op || !ResTy) return error("Invalid record"); I = new VAArgInst(Op, ResTy); @@ -5472,7 +6212,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) { unsigned OpNum = 1; while (OpNum != Record.size()) { Value *Op; - if (getValueTypePair(Record, OpNum, NextValueNo, Op)) + unsigned OpTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB)) return error("Invalid record"); Inputs.push_back(Op); } @@ -5484,12 +6225,14 @@ Error BitcodeReader::parseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_FREEZE: { // FREEZE: [opty,opval] unsigned OpNum = 0; Value *Op = nullptr; - if (getValueTypePair(Record, OpNum, NextValueNo, Op)) + unsigned OpTypeID; + if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB)) return error("Invalid record"); if (OpNum != Record.size()) return error("Invalid record"); I = new FreezeInst(Op); + ResTypeID = OpTypeID; InstructionList.push_back(I); break; } @@ -5514,8 +6257,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } // Non-void values get registered in the value table for future use. - if (!I->getType()->isVoidTy()) - ValueList.assignValue(I, NextValueNo++); + if (!I->getType()->isVoidTy()) { + assert(I->getType() == getTypeByID(ResTypeID) && + "Incorrect result type ID"); + if (Error Err = ValueList.assignValue(NextValueNo++, I, ResTypeID)) + return Err; + } } OutOfRecordLoop: @@ -5541,6 +6288,19 @@ OutOfRecordLoop: if (MDLoader->hasFwdRefs()) return error("Invalid function metadata: outgoing forward refs"); + if (PhiConstExprBB) + PhiConstExprBB->eraseFromParent(); + + for (const auto &Pair : ConstExprEdgeBBs) { + BasicBlock *From = Pair.first.first; + BasicBlock *To = Pair.first.second; + BasicBlock *EdgeBB = Pair.second; + BranchInst::Create(To, EdgeBB); + From->getTerminator()->replaceSuccessorWith(To, EdgeBB); + To->replacePhiUsesWith(From, EdgeBB); + EdgeBB->moveBefore(To); + } + // Trim the value list down to the size it was before we parsed this function. ValueList.shrinkTo(ModuleValueListSize); MDLoader->shrinkTo(ModuleMDLoaderSize); @@ -5913,8 +6673,8 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() { break; case bitc::BLOCKINFO_BLOCK_ID: // Need to parse these to get abbrev ids (e.g. for VST) - if (readBlockInfo()) - return error("Malformed block"); + if (Error Err = readBlockInfo()) + return Err; break; case bitc::VALUE_SYMTAB_BLOCK_ID: // Should have been parsed earlier via VSTOffset, unless there diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 0f4111514057..0d57ae4ef9df 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -9,74 +9,60 @@ #include "MetadataLoader.h" #include "ValueList.h" -#include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/ilist_iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/Bitstream/BitstreamReader.h" -#include "llvm/IR/Argument.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/IR/Comdat.h" -#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GVMaterializer.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalObject.h" -#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/InlineAsm.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" -#include "llvm/IR/ModuleSummaryIndex.h" -#include "llvm/IR/OperandTraits.h" #include "llvm/IR/TrackingMDRef.h" #include "llvm/IR/Type.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/type_traits.h" + #include #include #include #include #include +#include #include -#include #include -#include #include +#include #include #include +namespace llvm { +class Argument; +} using namespace llvm; @@ -678,8 +664,8 @@ public: bool hasSeenOldLoopTags() const { return HasSeenOldLoopTags; } - Error parseMetadataAttachment( - Function &F, const SmallVectorImpl &InstructionList); + Error parseMetadataAttachment(Function &F, + ArrayRef InstructionList); Error parseMetadataKinds(); @@ -1233,14 +1219,16 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } - Type *Ty = getTypeByID(Record[0]); + unsigned TyID = Record[0]; + Type *Ty = getTypeByID(TyID); if (Ty->isMetadataTy() || Ty->isVoidTy()) { dropRecord(); break; } MetadataList.assignValue( - LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)), + LocalAsMetadata::get(ValueList.getValueFwdRef( + Record[1], Ty, TyID, /*ConstExprInsertBB*/ nullptr)), NextMetadataNo); NextMetadataNo++; break; @@ -1253,14 +1241,15 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( unsigned Size = Record.size(); SmallVector Elts; for (unsigned i = 0; i != Size; i += 2) { - Type *Ty = getTypeByID(Record[i]); + unsigned TyID = Record[i]; + Type *Ty = getTypeByID(TyID); if (!Ty) return error("Invalid record"); if (Ty->isMetadataTy()) Elts.push_back(getMD(Record[i + 1])); else if (!Ty->isVoidTy()) { - auto *MD = - ValueAsMetadata::get(ValueList.getValueFwdRef(Record[i + 1], Ty)); + auto *MD = ValueAsMetadata::get(ValueList.getValueFwdRef( + Record[i + 1], Ty, TyID, /*ConstExprInsertBB*/ nullptr)); assert(isa(MD) && "Expected non-function-local metadata"); Elts.push_back(MD); @@ -1275,12 +1264,14 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( if (Record.size() != 2) return error("Invalid record"); - Type *Ty = getTypeByID(Record[0]); + unsigned TyID = Record[0]; + Type *Ty = getTypeByID(TyID); if (Ty->isMetadataTy() || Ty->isVoidTy()) return error("Invalid record"); MetadataList.assignValue( - ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)), + ValueAsMetadata::get(ValueList.getValueFwdRef( + Record[1], Ty, TyID, /*ConstExprInsertBB*/ nullptr)), NextMetadataNo); NextMetadataNo++; break; @@ -1514,6 +1505,15 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)) { Flags = Flags | DINode::FlagFwdDecl; + if (Name) { + // This is a hack around preserving template parameters for simplified + // template names - it should probably be replaced with a + // DICompositeType flag specifying whether template parameters are + // required on declarations of this type. + StringRef NameStr = Name->getString(); + if (!NameStr.contains('<') || NameStr.startswith("_STN|")) + TemplateParams = getMDOrNull(Record[14]); + } } else { BaseType = getDITypeRefOrNull(Record[6]); OffsetInBits = Record[9]; @@ -1700,6 +1700,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( bool HasThisAdj = true; bool HasThrownTypes = true; bool HasAnnotations = false; + bool HasTargetFuncName = false; unsigned OffsetA = 0; unsigned OffsetB = 0; if (!HasSPFlags) { @@ -1713,6 +1714,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( HasThrownTypes = Record.size() >= 21; } else { HasAnnotations = Record.size() >= 19; + HasTargetFuncName = Record.size() >= 20; } Metadata *CUorFn = getMDOrNull(Record[12 + OffsetB]); DISubprogram *SP = GET_OR_DISTINCT( @@ -1737,7 +1739,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( HasThrownTypes ? getMDOrNull(Record[17 + OffsetB]) : nullptr, // thrownTypes HasAnnotations ? getMDOrNull(Record[18 + OffsetB]) - : nullptr // annotations + : nullptr, // annotations + HasTargetFuncName ? getMDString(Record[19 + OffsetB]) + : nullptr // targetFuncName )); MetadataList.assignValue(SP, NextMetadataNo); NextMetadataNo++; @@ -2047,8 +2051,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } case bitc::METADATA_IMPORTED_ENTITY: { - if (Record.size() < 6 && Record.size() > 8) - return error("Invalid record"); + if (Record.size() < 6 || Record.size() > 8) + return error("Invalid DIImportedEntity record"); IsDistinct = Record[0]; bool HasFile = (Record.size() >= 7); @@ -2181,7 +2185,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseGlobalObjectAttachment( /// Parse metadata attachments. Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment( - Function &F, const SmallVectorImpl &InstructionList) { + Function &F, ArrayRef InstructionList) { if (Error Err = Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID)) return Err; @@ -2357,7 +2361,7 @@ DISubprogram *MetadataLoader::lookupSubprogramForFunction(Function *F) { } Error MetadataLoader::parseMetadataAttachment( - Function &F, const SmallVectorImpl &InstructionList) { + Function &F, ArrayRef InstructionList) { return Pimpl->parseMetadataAttachment(F, InstructionList); } diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.h b/llvm/lib/Bitcode/Reader/MetadataLoader.h index 709800850f0d..653f1402bead 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.h +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.h @@ -13,7 +13,6 @@ #ifndef LLVM_LIB_BITCODE_READER_METADATALOADER_H #define LLVM_LIB_BITCODE_READER_METADATALOADER_H -#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Error.h" #include @@ -28,6 +27,7 @@ class Instruction; class Metadata; class Module; class Type; +template class ArrayRef; /// Helper class that handles loading Metadatas and keeping them available. class MetadataLoader { @@ -66,8 +66,8 @@ public: DISubprogram *lookupSubprogramForFunction(Function *F); /// Parse a `METADATA_ATTACHMENT` block for a function. - Error parseMetadataAttachment( - Function &F, const SmallVectorImpl &InstructionList); + Error parseMetadataAttachment(Function &F, + ArrayRef InstructionList); /// Parse a `METADATA_KIND` block for the current module. Error parseMetadataKinds(); diff --git a/llvm/lib/Bitcode/Reader/ValueList.cpp b/llvm/lib/Bitcode/Reader/ValueList.cpp index 86ed664070f6..b9dbf904c89e 100644 --- a/llvm/lib/Bitcode/Reader/ValueList.cpp +++ b/llvm/lib/Bitcode/Reader/ValueList.cpp @@ -17,80 +17,44 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include #include -#include using namespace llvm; -namespace llvm { - -namespace { - -/// A class for maintaining the slot number definition -/// as a placeholder for the actual definition for forward constants defs. -class ConstantPlaceHolder : public ConstantExpr { -public: - explicit ConstantPlaceHolder(Type *Ty, LLVMContext &Context) - : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) { - Op<0>() = UndefValue::get(Type::getInt32Ty(Context)); - } - - ConstantPlaceHolder &operator=(const ConstantPlaceHolder &) = delete; - - // allocate space for exactly one operand - void *operator new(size_t s) { return User::operator new(s, 1); } - - /// Methods to support type inquiry through isa, cast, and dyn_cast. - static bool classof(const Value *V) { - return isa(V) && - cast(V)->getOpcode() == Instruction::UserOp1; - } - - /// Provide fast operand accessors - DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); -}; - -} // end anonymous namespace - -// FIXME: can we inherit this from ConstantExpr? -template <> -struct OperandTraits - : public FixedNumOperandTraits {}; -DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantPlaceHolder, Value) - -} // end namespace llvm - -void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) { +Error BitcodeReaderValueList::assignValue(unsigned Idx, Value *V, + unsigned TypeID) { if (Idx == size()) { - push_back(V); - return; + push_back(V, TypeID); + return Error::success(); } if (Idx >= size()) resize(Idx + 1); - WeakTrackingVH &OldV = ValuePtrs[Idx]; - if (!OldV) { - OldV = V; - return; + auto &Old = ValuePtrs[Idx]; + if (!Old.first) { + Old.first = V; + Old.second = TypeID; + return Error::success(); } - // Handle constants and non-constants (e.g. instrs) differently for - // efficiency. - if (Constant *PHC = dyn_cast(&*OldV)) { - ResolveConstants.push_back(std::make_pair(PHC, Idx)); - OldV = V; - } else { - // If there was a forward reference to this value, replace it. - Value *PrevVal = OldV; - OldV->replaceAllUsesWith(V); - PrevVal->deleteValue(); - } + assert(!isa(&*Old.first) && "Shouldn't update constant"); + // If there was a forward reference to this value, replace it. + Value *PrevVal = Old.first; + if (PrevVal->getType() != V->getType()) + return createStringError( + std::errc::illegal_byte_sequence, + "Assigned value does not match type of forward declaration"); + Old.first->replaceAllUsesWith(V); + PrevVal->deleteValue(); + return Error::success(); } -Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Type *Ty) { +Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty, + unsigned TyID, + BasicBlock *ConstExprInsertBB) { // Bail out for a clearly invalid value. if (Idx >= RefsUpperBound) return nullptr; @@ -98,31 +62,18 @@ Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Type *Ty) { if (Idx >= size()) resize(Idx + 1); - if (Value *V = ValuePtrs[Idx]) { - if (Ty != V->getType()) - report_fatal_error("Type mismatch in constant table!"); - return cast(V); - } - - // Create and return a placeholder, which will later be RAUW'd. - Constant *C = new ConstantPlaceHolder(Ty, Context); - ValuePtrs[Idx] = C; - return C; -} - -Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) { - // Bail out for a clearly invalid value. - if (Idx >= RefsUpperBound) - return nullptr; - - if (Idx >= size()) - resize(Idx + 1); - - if (Value *V = ValuePtrs[Idx]) { + if (Value *V = ValuePtrs[Idx].first) { // If the types don't match, it's invalid. if (Ty && Ty != V->getType()) return nullptr; - return V; + + Expected MaybeV = MaterializeValueFn(Idx, ConstExprInsertBB); + if (!MaybeV) { + // TODO: We might want to propagate the precise error message here. + consumeError(MaybeV.takeError()); + return nullptr; + } + return MaybeV.get(); } // No type specified, must be invalid reference. @@ -131,86 +82,6 @@ Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) { // Create and return a placeholder, which will later be RAUW'd. Value *V = new Argument(Ty); - ValuePtrs[Idx] = V; + ValuePtrs[Idx] = {V, TyID}; return V; } - -/// Once all constants are read, this method bulk resolves any forward -/// references. The idea behind this is that we sometimes get constants (such -/// as large arrays) which reference *many* forward ref constants. Replacing -/// each of these causes a lot of thrashing when building/reuniquing the -/// constant. Instead of doing this, we look at all the uses and rewrite all -/// the place holders at once for any constant that uses a placeholder. -void BitcodeReaderValueList::resolveConstantForwardRefs() { - // Sort the values by-pointer so that they are efficient to look up with a - // binary search. - llvm::sort(ResolveConstants); - - SmallVector NewOps; - - while (!ResolveConstants.empty()) { - Value *RealVal = operator[](ResolveConstants.back().second); - Constant *Placeholder = ResolveConstants.back().first; - ResolveConstants.pop_back(); - - // Loop over all users of the placeholder, updating them to reference the - // new value. If they reference more than one placeholder, update them all - // at once. - while (!Placeholder->use_empty()) { - auto UI = Placeholder->user_begin(); - User *U = *UI; - - // If the using object isn't uniqued, just update the operands. This - // handles instructions and initializers for global variables. - if (!isa(U) || isa(U)) { - UI.getUse().set(RealVal); - continue; - } - - // Otherwise, we have a constant that uses the placeholder. Replace that - // constant with a new constant that has *all* placeholder uses updated. - Constant *UserC = cast(U); - for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end(); I != E; - ++I) { - Value *NewOp; - if (!isa(*I)) { - // Not a placeholder reference. - NewOp = *I; - } else if (*I == Placeholder) { - // Common case is that it just references this one placeholder. - NewOp = RealVal; - } else { - // Otherwise, look up the placeholder in ResolveConstants. - ResolveConstantsTy::iterator It = llvm::lower_bound( - ResolveConstants, - std::pair(cast(*I), 0)); - assert(It != ResolveConstants.end() && It->first == *I); - NewOp = operator[](It->second); - } - - NewOps.push_back(cast(NewOp)); - } - - // Make the new constant. - Constant *NewC; - if (ConstantArray *UserCA = dyn_cast(UserC)) { - NewC = ConstantArray::get(UserCA->getType(), NewOps); - } else if (ConstantStruct *UserCS = dyn_cast(UserC)) { - NewC = ConstantStruct::get(UserCS->getType(), NewOps); - } else if (isa(UserC)) { - NewC = ConstantVector::get(NewOps); - } else { - assert(isa(UserC) && "Must be a ConstantExpr."); - NewC = cast(UserC)->getWithOperands(NewOps); - } - - UserC->replaceAllUsesWith(NewC); - UserC->destroyConstant(); - NewOps.clear(); - } - - // Update all ValueHandles, they should be the only users at this point. - Placeholder->replaceAllUsesWith(RealVal); - delete cast(Placeholder); - } -} diff --git a/llvm/lib/Bitcode/Reader/ValueList.h b/llvm/lib/Bitcode/Reader/ValueList.h index a39617018f42..995d46f01f75 100644 --- a/llvm/lib/Bitcode/Reader/ValueList.h +++ b/llvm/lib/Bitcode/Reader/ValueList.h @@ -14,6 +14,7 @@ #define LLVM_LIB_BITCODE_READER_VALUELIST_H #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Error.h" #include #include #include @@ -21,56 +22,53 @@ namespace llvm { class Constant; -class LLVMContext; +class Error; class Type; class Value; class BitcodeReaderValueList { - std::vector ValuePtrs; - - /// As we resolve forward-referenced constants, we add information about them - /// to this vector. This allows us to resolve them in bulk instead of - /// resolving each reference at a time. See the code in - /// ResolveConstantForwardRefs for more information about this. - /// - /// The key of this vector is the placeholder constant, the value is the slot - /// number that holds the resolved value. - using ResolveConstantsTy = std::vector>; - ResolveConstantsTy ResolveConstants; - LLVMContext &Context; + /// Maps Value ID to pair of Value* and Type ID. + std::vector> ValuePtrs; /// Maximum number of valid references. Forward references exceeding the /// maximum must be invalid. unsigned RefsUpperBound; -public: - BitcodeReaderValueList(LLVMContext &C, size_t RefsUpperBound) - : Context(C), - RefsUpperBound(std::min((size_t)std::numeric_limits::max(), - RefsUpperBound)) {} + using MaterializeValueFnTy = + std::function(unsigned, BasicBlock *)>; + MaterializeValueFnTy MaterializeValueFn; - ~BitcodeReaderValueList() { - assert(ResolveConstants.empty() && "Constants not resolved?"); - } +public: + BitcodeReaderValueList(size_t RefsUpperBound, + MaterializeValueFnTy MaterializeValueFn) + : RefsUpperBound(std::min((size_t)std::numeric_limits::max(), + RefsUpperBound)), + MaterializeValueFn(MaterializeValueFn) {} // vector compatibility methods unsigned size() const { return ValuePtrs.size(); } void resize(unsigned N) { ValuePtrs.resize(N); } - void push_back(Value *V) { ValuePtrs.emplace_back(V); } + void push_back(Value *V, unsigned TypeID) { + ValuePtrs.emplace_back(V, TypeID); + } void clear() { - assert(ResolveConstants.empty() && "Constants not resolved?"); ValuePtrs.clear(); } Value *operator[](unsigned i) const { assert(i < ValuePtrs.size()); - return ValuePtrs[i]; + return ValuePtrs[i].first; } - Value *back() const { return ValuePtrs.back(); } + unsigned getTypeID(unsigned ValNo) const { + assert(ValNo < ValuePtrs.size()); + return ValuePtrs[ValNo].second; + } + + Value *back() const { return ValuePtrs.back().first; } void pop_back() { ValuePtrs.pop_back(); } @@ -81,14 +79,15 @@ public: ValuePtrs.resize(N); } - Constant *getConstantFwdRef(unsigned Idx, Type *Ty); - Value *getValueFwdRef(unsigned Idx, Type *Ty); + void replaceValueWithoutRAUW(unsigned ValNo, Value *NewV) { + assert(ValNo < ValuePtrs.size()); + ValuePtrs[ValNo].first = NewV; + } - void assignValue(Value *V, unsigned Idx); + Value *getValueFwdRef(unsigned Idx, Type *Ty, unsigned TyID, + BasicBlock *ConstExprInsertBB); - /// Once all constants are read, this method bulk resolves any forward - /// references. - void resolveConstantForwardRefs(); + Error assignValue(unsigned Idx, Value *V, unsigned TypeID); }; } // end namespace llvm diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 4bba0b356675..941ed808bab1 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -19,6 +19,8 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -610,6 +612,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { switch (Kind) { case Attribute::Alignment: return bitc::ATTR_KIND_ALIGNMENT; + case Attribute::AllocAlign: + return bitc::ATTR_KIND_ALLOC_ALIGN; case Attribute::AllocSize: return bitc::ATTR_KIND_ALLOC_SIZE; case Attribute::AlwaysInline: @@ -644,6 +648,10 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_JUMP_TABLE; case Attribute::MinSize: return bitc::ATTR_KIND_MIN_SIZE; + case Attribute::AllocatedPointer: + return bitc::ATTR_KIND_ALLOCATED_POINTER; + case Attribute::AllocKind: + return bitc::ATTR_KIND_ALLOC_KIND; case Attribute::Naked: return bitc::ATTR_KIND_NAKED; case Attribute::Nest: @@ -688,6 +696,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_NO_PROFILE; case Attribute::NoUnwind: return bitc::ATTR_KIND_NO_UNWIND; + case Attribute::NoSanitizeBounds: + return bitc::ATTR_KIND_NO_SANITIZE_BOUNDS; case Attribute::NoSanitizeCoverage: return bitc::ATTR_KIND_NO_SANITIZE_COVERAGE; case Attribute::NullPointerIsValid: @@ -764,6 +774,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_BYREF; case Attribute::MustProgress: return bitc::ATTR_KIND_MUSTPROGRESS; + case Attribute::PresplitCoroutine: + return bitc::ATTR_KIND_PRESPLIT_COROUTINE; case Attribute::EndAttrKinds: llvm_unreachable("Can not encode end-attribute kinds marker."); case Attribute::None: @@ -1013,6 +1025,8 @@ void ModuleBitcodeWriter::writeTypeTable() { TypeVals.push_back(true); break; } + case Type::DXILPointerTyID: + llvm_unreachable("DXIL pointers cannot be added to IR modules"); } // Emit the finished record. @@ -1211,6 +1225,14 @@ static StringEncoding getStringEncoding(StringRef Str) { return SE_Fixed7; } +static_assert(sizeof(GlobalValue::SanitizerMetadata) <= sizeof(unsigned), + "Sanitizer Metadata is too large for naive serialization."); +static unsigned +serializeSanitizerMetadata(const GlobalValue::SanitizerMetadata &Meta) { + return Meta.NoAddress | (Meta.NoHWAddress << 1) | + (Meta.NoMemtag << 2) | (Meta.IsDynInit << 3); +} + /// Emit top-level description of module, including target triple, inline asm, /// descriptors for global variables, and function prototype info. /// Returns the bit offset to backpatch with the location of the real VST. @@ -1334,7 +1356,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { // GLOBALVAR: [strtab offset, strtab size, type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass, - // comdat, attributes, DSO_Local] + // comdat, attributes, DSO_Local, GlobalSanitizer] Vals.push_back(addToStrtab(GV.getName())); Vals.push_back(GV.getName().size()); Vals.push_back(VE.getTypeID(GV.getValueType())); @@ -1350,10 +1372,8 @@ void ModuleBitcodeWriter::writeModuleInfo() { GV.getUnnamedAddr() != GlobalValue::UnnamedAddr::None || GV.isExternallyInitialized() || GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass || - GV.hasComdat() || - GV.hasAttributes() || - GV.isDSOLocal() || - GV.hasPartition()) { + GV.hasComdat() || GV.hasAttributes() || GV.isDSOLocal() || + GV.hasPartition() || GV.hasSanitizerMetadata()) { Vals.push_back(getEncodedVisibility(GV)); Vals.push_back(getEncodedThreadLocalMode(GV)); Vals.push_back(getEncodedUnnamedAddr(GV)); @@ -1367,6 +1387,10 @@ void ModuleBitcodeWriter::writeModuleInfo() { Vals.push_back(GV.isDSOLocal()); Vals.push_back(addToStrtab(GV.getPartition())); Vals.push_back(GV.getPartition().size()); + + Vals.push_back((GV.hasSanitizerMetadata() ? serializeSanitizerMetadata( + GV.getSanitizerMetadata()) + : 0)); } else { AbbrevToUse = SimpleGVarAbbrev; } @@ -1817,6 +1841,7 @@ void ModuleBitcodeWriter::writeDISubprogram(const DISubprogram *N, Record.push_back(N->getThisAdjustment()); Record.push_back(VE.getMetadataOrNullID(N->getThrownTypes().get())); Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get())); + Record.push_back(VE.getMetadataOrNullID(N->getRawTargetFuncName())); Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev); Record.clear(); @@ -2649,6 +2674,9 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal, Record.push_back(VE.getValueID(C->getOperand(1))); Record.push_back(CE->getPredicate()); break; + case Instruction::InsertValue: + report_fatal_error("insertvalue constexprs not supported"); + break; } } else if (const BlockAddress *BA = dyn_cast(C)) { Code = bitc::CST_CODE_BLOCKADDRESS; @@ -3068,6 +3096,10 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Bitfield::set(Record, true); Bitfield::set(Record, AI.isSwiftError()); Vals.push_back(Record); + + unsigned AS = AI.getAddressSpace(); + if (AS != M.getDataLayout().getAllocaAddrSpace()) + Vals.push_back(AS); break; } @@ -3347,8 +3379,10 @@ void ModuleBitcodeWriter::writeFunction( bool NeedsMetadataAttachment = F.hasMetadata(); DILocation *LastDL = nullptr; + SmallSetVector BlockAddressUsers; + // Finally, emit all the instructions, in order. - for (const BasicBlock &BB : F) + for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { writeInstruction(I, InstID, Vals); @@ -3380,6 +3414,32 @@ void ModuleBitcodeWriter::writeFunction( LastDL = DL; } + if (BlockAddress *BA = BlockAddress::lookup(&BB)) { + SmallVector Worklist{BA}; + SmallPtrSet Visited{BA}; + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + for (User *U : V->users()) { + if (auto *I = dyn_cast(U)) { + Function *P = I->getFunction(); + if (P != &F) + BlockAddressUsers.insert(P); + } else if (isa(U) && !isa(U) && + Visited.insert(U).second) + Worklist.push_back(U); + } + } + } + } + + if (!BlockAddressUsers.empty()) { + Vals.resize(BlockAddressUsers.size()); + for (auto I : llvm::enumerate(BlockAddressUsers)) + Vals[I.index()] = VE.getValueID(I.value()); + Stream.EmitRecord(bitc::FUNC_CODE_BLOCKADDR_USERS, Vals); + Vals.clear(); + } + // Emit names for all the instructions etc. if (auto *Symtab = F.getValueSymbolTable()) writeFunctionLevelValueSymbolTable(*Symtab); @@ -4375,7 +4435,7 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) { uint32_t Vals[5]; Hasher.update(ArrayRef((const uint8_t *)&(Buffer)[BlockStartPos], Buffer.size() - BlockStartPos)); - StringRef Hash = Hasher.result(); + std::array Hash = Hasher.result(); for (int Pos = 0; Pos < 20; Pos += 4) { Vals[Pos / 4] = support::endian::read32be(Hash.data() + Pos); } @@ -4855,9 +4915,15 @@ static const char *getSectionNameForBitcode(const Triple &T) { case Triple::GOFF: llvm_unreachable("GOFF is not yet implemented"); break; + case Triple::SPIRV: + llvm_unreachable("SPIRV is not yet implemented"); + break; case Triple::XCOFF: llvm_unreachable("XCOFF is not yet implemented"); break; + case Triple::DXContainer: + llvm_unreachable("DXContainer is not yet implemented"); + break; } llvm_unreachable("Unimplemented ObjectFormatType"); } @@ -4874,9 +4940,15 @@ static const char *getSectionNameForCommandline(const Triple &T) { case Triple::GOFF: llvm_unreachable("GOFF is not yet implemented"); break; + case Triple::SPIRV: + llvm_unreachable("SPIRV is not yet implemented"); + break; case Triple::XCOFF: llvm_unreachable("XCOFF is not yet implemented"); break; + case Triple::DXContainer: + llvm_unreachable("DXC is not yet implemented"); + break; } llvm_unreachable("Unimplemented ObjectFormatType"); } @@ -4931,7 +5003,7 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.embedded.module", true)) { - assert(Old->hasOneUse() && + assert(Old->hasZeroLiveUses() && "llvm.embedded.module can only be used once in llvm.compiler.used"); GV->takeName(Old); Old->eraseFromParent(); @@ -4954,7 +5026,7 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, UsedArray.push_back( ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.cmdline", true)) { - assert(Old->hasOneUse() && + assert(Old->hasZeroLiveUses() && "llvm.cmdline can only be used once in llvm.compiler.used"); GV->takeName(Old); Old->eraseFromParent(); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp index d884415aafd5..536d04f2fe26 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp @@ -13,7 +13,6 @@ #include "llvm/Bitcode/BitcodeWriterPass.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Bitcode/BitcodeWriter.h" -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index 01f7e85bd60e..727ec2e02cc2 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -50,17 +50,12 @@ namespace { struct OrderMap { DenseMap> IDs; - unsigned LastGlobalConstantID = 0; unsigned LastGlobalValueID = 0; OrderMap() = default; - bool isGlobalConstant(unsigned ID) const { - return ID <= LastGlobalConstantID; - } - bool isGlobalValue(unsigned ID) const { - return ID <= LastGlobalValueID && !isGlobalConstant(ID); + return ID <= LastGlobalValueID; } unsigned size() const { return IDs.size(); } @@ -84,7 +79,7 @@ static void orderValue(const Value *V, OrderMap &OM) { return; if (const Constant *C = dyn_cast(V)) { - if (C->getNumOperands() && !isa(C)) { + if (C->getNumOperands()) { for (const Value *Op : C->operands()) if (!isa(Op) && !isa(Op)) orderValue(Op, OM); @@ -104,39 +99,40 @@ static OrderMap orderModule(const Module &M) { // and ValueEnumerator::incorporateFunction(). OrderMap OM; - // In the reader, initializers of GlobalValues are set *after* all the - // globals have been read. Rather than awkwardly modeling this behaviour - // directly in predictValueUseListOrderImpl(), just assign IDs to - // initializers of GlobalValues before GlobalValues themselves to model this - // implicitly. - for (const GlobalVariable &G : M.globals()) - if (G.hasInitializer()) - if (!isa(G.getInitializer())) - orderValue(G.getInitializer(), OM); - for (const GlobalAlias &A : M.aliases()) - if (!isa(A.getAliasee())) - orderValue(A.getAliasee(), OM); - for (const GlobalIFunc &I : M.ifuncs()) - if (!isa(I.getResolver())) - orderValue(I.getResolver(), OM); - for (const Function &F : M) { - for (const Use &U : F.operands()) - if (!isa(U.get())) - orderValue(U.get(), OM); - } + // Initializers of GlobalValues are processed in + // BitcodeReader::ResolveGlobalAndAliasInits(). Match the order there rather + // than ValueEnumerator, and match the code in predictValueUseListOrderImpl() + // by giving IDs in reverse order. + // + // Since GlobalValues never reference each other directly (just through + // initializers), their relative IDs only matter for determining order of + // uses in their initializers. + for (const GlobalVariable &G : reverse(M.globals())) + orderValue(&G, OM); + for (const GlobalAlias &A : reverse(M.aliases())) + orderValue(&A, OM); + for (const GlobalIFunc &I : reverse(M.ifuncs())) + orderValue(&I, OM); + for (const Function &F : reverse(M)) + orderValue(&F, OM); + OM.LastGlobalValueID = OM.size(); - // As constants used in metadata operands are emitted as module-level - // constants, we must order them before other operands. Also, we must order - // these before global values, as these will be read before setting the - // global values' initializers. The latter matters for constants which have - // uses towards other constants that are used as initializers. auto orderConstantValue = [&OM](const Value *V) { - if ((isa(V) && !isa(V)) || isa(V)) + if (isa(V) || isa(V)) orderValue(V, OM); }; + for (const Function &F : M) { if (F.isDeclaration()) continue; + // Here we need to match the union of ValueEnumerator::incorporateFunction() + // and WriteFunction(). Basic blocks are implicitly declared before + // anything else (by declaring their size). + for (const BasicBlock &BB : F) + orderValue(&BB, OM); + + // Metadata used by instructions is decoded before the actual instructions, + // so visit any constants used by it beforehand. for (const BasicBlock &BB : F) for (const Instruction &I : BB) for (const Value *V : I.operands()) { @@ -151,49 +147,17 @@ static OrderMap orderModule(const Module &M) { } } } - } - OM.LastGlobalConstantID = OM.size(); - - // Initializers of GlobalValues are processed in - // BitcodeReader::ResolveGlobalAndAliasInits(). Match the order there rather - // than ValueEnumerator, and match the code in predictValueUseListOrderImpl() - // by giving IDs in reverse order. - // - // Since GlobalValues never reference each other directly (just through - // initializers), their relative IDs only matter for determining order of - // uses in their initializers. - for (const Function &F : M) - orderValue(&F, OM); - for (const GlobalAlias &A : M.aliases()) - orderValue(&A, OM); - for (const GlobalIFunc &I : M.ifuncs()) - orderValue(&I, OM); - for (const GlobalVariable &G : M.globals()) - orderValue(&G, OM); - OM.LastGlobalValueID = OM.size(); - for (const Function &F : M) { - if (F.isDeclaration()) - continue; - // Here we need to match the union of ValueEnumerator::incorporateFunction() - // and WriteFunction(). Basic blocks are implicitly declared before - // anything else (by declaring their size). - for (const BasicBlock &BB : F) - orderValue(&BB, OM); for (const Argument &A : F.args()) orderValue(&A, OM); for (const BasicBlock &BB : F) for (const Instruction &I : BB) { for (const Value *Op : I.operands()) - if ((isa(*Op) && !isa(*Op)) || - isa(*Op)) - orderValue(Op, OM); + orderConstantValue(Op); if (auto *SVI = dyn_cast(&I)) orderValue(SVI->getShuffleMaskForBitcode(), OM); - } - for (const BasicBlock &BB : F) - for (const Instruction &I : BB) orderValue(&I, OM); + } } return OM; } @@ -223,18 +187,6 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F, auto LID = OM.lookup(LU->getUser()).first; auto RID = OM.lookup(RU->getUser()).first; - // Global values are processed in reverse order. - // - // Moreover, initializers of GlobalValues are set *after* all the globals - // have been read (despite having earlier IDs). Rather than awkwardly - // modeling this behaviour here, orderModule() has assigned IDs to - // initializers of GlobalValues before GlobalValues themselves. - if (OM.isGlobalValue(LID) && OM.isGlobalValue(RID)) { - if (LID == RID) - return LU->getOperandNo() > RU->getOperandNo(); - return LID < RID; - } - // If ID is 4, then expect: 7 6 5 1 2 3. if (LID < RID) { if (RID <= ID) @@ -257,9 +209,7 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F, return LU->getOperandNo() > RU->getOperandNo(); }); - if (llvm::is_sorted(List, [](const Entry &L, const Entry &R) { - return L.second < R.second; - })) + if (llvm::is_sorted(List, llvm::less_second())) // Order is already correct. return; @@ -319,16 +269,25 @@ static UseListOrderStack predictUseListOrder(const Module &M) { predictValueUseListOrder(&A, &F, OM, Stack); for (const BasicBlock &BB : F) for (const Instruction &I : BB) { - for (const Value *Op : I.operands()) + for (const Value *Op : I.operands()) { if (isa(*Op) || isa(*Op)) // Visit GlobalValues. predictValueUseListOrder(Op, &F, OM, Stack); + if (const auto *MAV = dyn_cast(Op)) { + if (const auto *VAM = + dyn_cast(MAV->getMetadata())) { + predictValueUseListOrder(VAM->getValue(), &F, OM, Stack); + } else if (const auto *AL = + dyn_cast(MAV->getMetadata())) { + for (const auto *VAM : AL->getArgs()) + predictValueUseListOrder(VAM->getValue(), &F, OM, Stack); + } + } + } if (auto *SVI = dyn_cast(&I)) predictValueUseListOrder(SVI->getShuffleMaskForBitcode(), &F, OM, Stack); - } - for (const BasicBlock &BB : F) - for (const Instruction &I : BB) predictValueUseListOrder(&I, &F, OM, Stack); + } } // Visit globals last, since the module-level use-list block will be seen @@ -939,9 +898,12 @@ void ValueEnumerator::EnumerateValue(const Value *V) { I != E; ++I) if (!isa(*I)) // Don't enumerate BB operand to BlockAddress. EnumerateValue(*I); - if (auto *CE = dyn_cast(C)) + if (auto *CE = dyn_cast(C)) { if (CE->getOpcode() == Instruction::ShuffleVector) EnumerateValue(CE->getShuffleMaskForBitcode()); + if (auto *GEP = dyn_cast(CE)) + EnumerateType(GEP->getSourceElementType()); + } // Finally, add the value. Doing this could make the ValueID reference be // dangling, don't reuse it. diff --git a/llvm/lib/Bitstream/Reader/BitstreamReader.cpp b/llvm/lib/Bitstream/Reader/BitstreamReader.cpp index 28adfe6268f9..c297e16bdfdf 100644 --- a/llvm/lib/Bitstream/Reader/BitstreamReader.cpp +++ b/llvm/lib/Bitstream/Reader/BitstreamReader.cpp @@ -16,6 +16,10 @@ using namespace llvm; //===----------------------------------------------------------------------===// // BitstreamCursor implementation //===----------------------------------------------------------------------===// +// +static Error error(const char *Message) { + return createStringError(std::errc::illegal_byte_sequence, Message); +} /// Having read the ENTER_SUBBLOCK abbrevid, enter the block. Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { @@ -97,7 +101,7 @@ Expected BitstreamCursor::skipRecord(unsigned AbbrevID) { unsigned Code = MaybeCode.get(); Expected MaybeVBR = ReadVBR(6); if (!MaybeVBR) - return MaybeVBR.get(); + return MaybeVBR.takeError(); unsigned NumElts = MaybeVBR.get(); for (unsigned i = 0; i != NumElts; ++i) if (Expected Res = ReadVBR64(6)) @@ -107,7 +111,11 @@ Expected BitstreamCursor::skipRecord(unsigned AbbrevID) { return Code; } - const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); + Expected MaybeAbbv = getAbbrev(AbbrevID); + if (!MaybeAbbv) + return MaybeAbbv.takeError(); + + const BitCodeAbbrev *Abbv = MaybeAbbv.get(); const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); unsigned Code; if (CodeOp.isLiteral()) @@ -152,7 +160,7 @@ Expected BitstreamCursor::skipRecord(unsigned AbbrevID) { // Decode the value as we are commanded. switch (EltEnc.getEncoding()) { default: - report_fatal_error("Array element type can't be an Array or a Blob"); + return error("Array element type can't be an Array or a Blob"); case BitCodeAbbrevOp::Fixed: assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); if (Error Err = @@ -212,8 +220,12 @@ Expected BitstreamCursor::readRecord(unsigned AbbrevID, uint32_t Code = MaybeCode.get(); Expected MaybeNumElts = ReadVBR(6); if (!MaybeNumElts) - return MaybeNumElts.takeError(); + return error( + ("Failed to read size: " + toString(MaybeNumElts.takeError())) + .c_str()); uint32_t NumElts = MaybeNumElts.get(); + if (!isSizePlausible(NumElts)) + return error("Size is not plausible"); Vals.reserve(Vals.size() + NumElts); for (unsigned i = 0; i != NumElts; ++i) @@ -224,7 +236,10 @@ Expected BitstreamCursor::readRecord(unsigned AbbrevID, return Code; } - const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); + Expected MaybeAbbv = getAbbrev(AbbrevID); + if (!MaybeAbbv) + return MaybeAbbv.takeError(); + const BitCodeAbbrev *Abbv = MaybeAbbv.get(); // Read the record code first. assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?"); @@ -235,7 +250,7 @@ Expected BitstreamCursor::readRecord(unsigned AbbrevID, else { if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) - report_fatal_error("Abbreviation starts with an Array or a Blob"); + return error("Abbreviation starts with an Array or a Blob"); if (Expected MaybeCode = readAbbreviatedField(*this, CodeOp)) Code = MaybeCode.get(); else @@ -262,22 +277,26 @@ Expected BitstreamCursor::readRecord(unsigned AbbrevID, // Array case. Read the number of elements as a vbr6. Expected MaybeNumElts = ReadVBR(6); if (!MaybeNumElts) - return MaybeNumElts.takeError(); + return error( + ("Failed to read size: " + toString(MaybeNumElts.takeError())) + .c_str()); uint32_t NumElts = MaybeNumElts.get(); + if (!isSizePlausible(NumElts)) + return error("Size is not plausible"); Vals.reserve(Vals.size() + NumElts); // Get the element encoding. if (i + 2 != e) - report_fatal_error("Array op not second to last"); + return error("Array op not second to last"); const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); if (!EltEnc.isEncoding()) - report_fatal_error( + return error( "Array element type has to be an encoding of a type"); // Read all the elements. switch (EltEnc.getEncoding()) { default: - report_fatal_error("Array element type can't be an Array or a Blob"); + return error("Array element type can't be an Array or a Blob"); case BitCodeAbbrevOp::Fixed: for (; NumElts; --NumElts) if (Expected MaybeVal = @@ -316,13 +335,9 @@ Expected BitstreamCursor::readRecord(unsigned AbbrevID, size_t CurBitPos = GetCurrentBitNo(); const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8; - // If this would read off the end of the bitcode file, just set the - // record to empty and return. - if (!canSkipToPos(NewEnd/8)) { - Vals.append(NumElts, 0); - skipToEnd(); - break; - } + // Make sure the bitstream is large enough to contain the blob. + if (!canSkipToPos(NewEnd/8)) + return error("Blob ends too soon"); // Otherwise, inform the streamer that we need these bytes in memory. Skip // over tail padding first, in case jumping to NewEnd invalidates the Blob @@ -366,6 +381,9 @@ Error BitstreamCursor::ReadAbbrevRecord() { Expected MaybeEncoding = Read(3); if (!MaybeEncoding) return MaybeEncoding.takeError(); + if (!BitCodeAbbrevOp::isValidEncoding(MaybeEncoding.get())) + return error("Invalid encoding"); + BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)MaybeEncoding.get(); if (BitCodeAbbrevOp::hasEncodingData(E)) { @@ -385,8 +403,7 @@ Error BitstreamCursor::ReadAbbrevRecord() { if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && Data > MaxChunkSize) - report_fatal_error( - "Fixed or VBR abbrev record with size > MaxChunkData"); + return error("Fixed or VBR abbrev record with size > MaxChunkData"); Abbv->Add(BitCodeAbbrevOp(E, Data)); } else @@ -394,7 +411,7 @@ Error BitstreamCursor::ReadAbbrevRecord() { } if (Abbv->getNumOperandInfos() == 0) - report_fatal_error("Abbrev record with no operands"); + return error("Abbrev record with no operands"); CurAbbrevs.push_back(std::move(Abbv)); return Error::success(); diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp index cdf5586766da..f5dbaccfcad5 100644 --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -21,12 +21,9 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Utils/GlobalStatus.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp index 03e63321e3c4..1940f46232d3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp @@ -38,8 +38,19 @@ void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA, // unsigned long personality; /* Pointer to the personality routine */ // } - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getCompactUnwindSection()); + auto *EHInfo = + cast(Asm->getObjFileLowering().getCompactUnwindSection()); + if (Asm->TM.getFunctionSections()) { + // If option -ffunction-sections is on, append the function name to the + // name of EH Info Table csect so that each function has its own EH Info + // Table csect. This helps the linker to garbage-collect EH info of unused + // functions. + SmallString<128> NameStr = EHInfo->getName(); + raw_svector_ostream(NameStr) << '.' << Asm->MF->getFunction().getName(); + EHInfo = Asm->OutContext.getXCOFFSection(NameStr, EHInfo->getKind(), + EHInfo->getCsectProp()); + } + Asm->OutStreamer->switchSection(EHInfo); MCSymbol *EHInfoLabel = TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(Asm->MF); Asm->OutStreamer->emitLabel(EHInfoLabel); @@ -74,8 +85,8 @@ void AIXException::endFunction(const MachineFunction *MF) { const Function &F = MF->getFunction(); assert(F.hasPersonalityFn() && "Landingpads are presented, but no personality routine is found."); - const GlobalValue *Per = - dyn_cast(F.getPersonalityFn()->stripPointerCasts()); + const auto *Per = + cast(F.getPersonalityFn()->stripPointerCasts()); const MCSymbol *PerSym = Asm->TM.getSymbol(Per); emitExceptionInfoTable(LSDALabel, PerSym); diff --git a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index 223840c21d8b..e04a29fbb42b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -14,21 +14,14 @@ #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Mangler.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetOptions.h" using namespace llvm; ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {} -ARMException::~ARMException() {} +ARMException::~ARMException() = default; ARMTargetStreamer &ARMException::getTargetStreamer() { MCTargetStreamer &TS = *Asm->OutStreamer->getTargetStreamer(); @@ -101,7 +94,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding, // Emit the Catch TypeInfos. if (VerboseAsm && !TypeInfos.empty()) { Asm->OutStreamer->AddComment(">> Catch TypeInfos <<"); - Asm->OutStreamer->AddBlankLine(); + Asm->OutStreamer->addBlankLine(); Entry = TypeInfos.size(); } @@ -116,7 +109,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding, // Emit the Exception Specifications. if (VerboseAsm && !FilterIds.empty()) { Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); - Asm->OutStreamer->AddBlankLine(); + Asm->OutStreamer->addBlankLine(); Entry = 0; } for (std::vector::const_iterator diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index 65c45f73e965..b10d79f4b5a6 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -18,7 +18,6 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" @@ -563,7 +562,7 @@ void llvm::emitDWARF5AccelTable( if (CompUnits.empty()) return; - Asm->OutStreamer->SwitchSection( + Asm->OutStreamer->switchSection( Asm->getObjFileLowering().getDwarfDebugNamesSection()); Contents.finalize(Asm, "names"); diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp index 21da9d50efba..32d8dc793510 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -17,7 +17,7 @@ using namespace llvm; unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) { - HasBeenUsed = true; + resetUsedFlag(true); auto IterBool = Pool.insert(std::make_pair(Sym, AddressPoolEntry(Pool.size(), TLS))); return IterBool.first->second.Number; @@ -44,7 +44,7 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) { return; // Start the dwarf addr section. - Asm.OutStreamer->SwitchSection(AddrSection); + Asm.OutStreamer->switchSection(AddrSection); MCSymbol *EndLabel = nullptr; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 3e8e190eecc3..4a31bf85446b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/ConstantFolding.h" @@ -48,7 +49,6 @@ #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineOperand.h" @@ -82,33 +82,26 @@ #include "llvm/IR/PseudoProbe.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" -#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/SectionKind.h" -#include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" -#include "llvm/Remarks/Remark.h" -#include "llvm/Remarks/RemarkFormat.h" #include "llvm/Remarks/RemarkStreamer.h" -#include "llvm/Remarks/RemarkStringTable.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" @@ -125,7 +118,6 @@ #include #include #include -#include #include #include #include @@ -135,11 +127,6 @@ using namespace llvm; #define DEBUG_TYPE "asm-printer" -// FIXME: this option currently only applies to DWARF, and not CodeView, tables -static cl::opt - DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, - cl::desc("Disable debug info printing")); - const char DWARFGroupName[] = "dwarf"; const char DWARFGroupDescription[] = "DWARF Emission"; const char DbgTimerName[] = "emit"; @@ -167,6 +154,178 @@ static gcp_map_type &getGCMap(void *&P) { return *(gcp_map_type*)P; } +namespace { +class AddrLabelMapCallbackPtr final : CallbackVH { + AddrLabelMap *Map = nullptr; + +public: + AddrLabelMapCallbackPtr() = default; + AddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {} + + void setPtr(BasicBlock *BB) { + ValueHandleBase::operator=(BB); + } + + void setMap(AddrLabelMap *map) { Map = map; } + + void deleted() override; + void allUsesReplacedWith(Value *V2) override; +}; +} // namespace + +class llvm::AddrLabelMap { + MCContext &Context; + struct AddrLabelSymEntry { + /// The symbols for the label. + TinyPtrVector Symbols; + + Function *Fn; // The containing function of the BasicBlock. + unsigned Index; // The index in BBCallbacks for the BasicBlock. + }; + + DenseMap, AddrLabelSymEntry> AddrLabelSymbols; + + /// Callbacks for the BasicBlock's that we have entries for. We use this so + /// we get notified if a block is deleted or RAUWd. + std::vector BBCallbacks; + + /// This is a per-function list of symbols whose corresponding BasicBlock got + /// deleted. These symbols need to be emitted at some point in the file, so + /// AsmPrinter emits them after the function body. + DenseMap, std::vector> + DeletedAddrLabelsNeedingEmission; + +public: + AddrLabelMap(MCContext &context) : Context(context) {} + + ~AddrLabelMap() { + assert(DeletedAddrLabelsNeedingEmission.empty() && + "Some labels for deleted blocks never got emitted"); + } + + ArrayRef getAddrLabelSymbolToEmit(BasicBlock *BB); + + void takeDeletedSymbolsForFunction(Function *F, + std::vector &Result); + + void UpdateForDeletedBlock(BasicBlock *BB); + void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New); +}; + +ArrayRef AddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { + assert(BB->hasAddressTaken() && + "Shouldn't get label for block without address taken"); + AddrLabelSymEntry &Entry = AddrLabelSymbols[BB]; + + // If we already had an entry for this block, just return it. + if (!Entry.Symbols.empty()) { + assert(BB->getParent() == Entry.Fn && "Parent changed"); + return Entry.Symbols; + } + + // Otherwise, this is a new entry, create a new symbol for it and add an + // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd. + BBCallbacks.emplace_back(BB); + BBCallbacks.back().setMap(this); + Entry.Index = BBCallbacks.size() - 1; + Entry.Fn = BB->getParent(); + MCSymbol *Sym = BB->hasAddressTaken() ? Context.createNamedTempSymbol() + : Context.createTempSymbol(); + Entry.Symbols.push_back(Sym); + return Entry.Symbols; +} + +/// If we have any deleted symbols for F, return them. +void AddrLabelMap::takeDeletedSymbolsForFunction( + Function *F, std::vector &Result) { + DenseMap, std::vector>::iterator I = + DeletedAddrLabelsNeedingEmission.find(F); + + // If there are no entries for the function, just return. + if (I == DeletedAddrLabelsNeedingEmission.end()) + return; + + // Otherwise, take the list. + std::swap(Result, I->second); + DeletedAddrLabelsNeedingEmission.erase(I); +} + +//===- Address of Block Management ----------------------------------------===// + +ArrayRef +AsmPrinter::getAddrLabelSymbolToEmit(const BasicBlock *BB) { + // Lazily create AddrLabelSymbols. + if (!AddrLabelSymbols) + AddrLabelSymbols = std::make_unique(OutContext); + return AddrLabelSymbols->getAddrLabelSymbolToEmit( + const_cast(BB)); +} + +void AsmPrinter::takeDeletedSymbolsForFunction( + const Function *F, std::vector &Result) { + // If no blocks have had their addresses taken, we're done. + if (!AddrLabelSymbols) + return; + return AddrLabelSymbols->takeDeletedSymbolsForFunction( + const_cast(F), Result); +} + +void AddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { + // If the block got deleted, there is no need for the symbol. If the symbol + // was already emitted, we can just forget about it, otherwise we need to + // queue it up for later emission when the function is output. + AddrLabelSymEntry Entry = std::move(AddrLabelSymbols[BB]); + AddrLabelSymbols.erase(BB); + assert(!Entry.Symbols.empty() && "Didn't have a symbol, why a callback?"); + BBCallbacks[Entry.Index] = nullptr; // Clear the callback. + +#if !LLVM_MEMORY_SANITIZER_BUILD + // BasicBlock is destroyed already, so this access is UB detectable by msan. + assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) && + "Block/parent mismatch"); +#endif + + for (MCSymbol *Sym : Entry.Symbols) { + if (Sym->isDefined()) + return; + + // If the block is not yet defined, we need to emit it at the end of the + // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list + // for the containing Function. Since the block is being deleted, its + // parent may already be removed, we have to get the function from 'Entry'. + DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym); + } +} + +void AddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { + // Get the entry for the RAUW'd block and remove it from our map. + AddrLabelSymEntry OldEntry = std::move(AddrLabelSymbols[Old]); + AddrLabelSymbols.erase(Old); + assert(!OldEntry.Symbols.empty() && "Didn't have a symbol, why a callback?"); + + AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New]; + + // If New is not address taken, just move our symbol over to it. + if (NewEntry.Symbols.empty()) { + BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback. + NewEntry = std::move(OldEntry); // Set New's entry. + return; + } + + BBCallbacks[OldEntry.Index] = nullptr; // Update the callback. + + // Otherwise, we need to add the old symbols to the new block's set. + llvm::append_range(NewEntry.Symbols, OldEntry.Symbols); +} + +void AddrLabelMapCallbackPtr::deleted() { + Map->UpdateForDeletedBlock(cast(getValPtr())); +} + +void AddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { + Map->UpdateForRAUWBlock(cast(getValPtr()), cast(V2)); +} + /// getGVAlignment - Return the alignment to use for the specified global /// value. This rounds up to the preferred alignment if possible and legal. Align AsmPrinter::getGVAlignment(const GlobalObject *GV, const DataLayout &DL, @@ -271,6 +430,10 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { bool AsmPrinter::doInitialization(Module &M) { auto *MMIWP = getAnalysisIfAvailable(); MMI = MMIWP ? &MMIWP->getMMI() : nullptr; + HasSplitStack = false; + HasNoSplitStack = false; + + AddrLabelSymbols = nullptr; // Initialize TargetLoweringObjectFile. const_cast(getObjFileLowering()) @@ -281,9 +444,6 @@ bool AsmPrinter::doInitialization(Module &M) { OutStreamer->initSections(false, *TM.getMCSubtargetInfo()); - if (DisableDebugInfoPrinting) - MMI->setDebugInfoAvailability(false); - // Emit the version-min deployment target directive if needed. // // FIXME: If we end up with a collection of these sorts of Darwin-specific @@ -335,11 +495,11 @@ bool AsmPrinter::doInitialization(Module &M) { // Emit module-level inline asm if it exists. if (!M.getModuleInlineAsm().empty()) { OutStreamer->AddComment("Start of file scope inline assembly"); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); emitInlineAsm(M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(), TM.Options.MCOptions); OutStreamer->AddComment("End of file scope inline assembly"); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } if (MAI->doesSupportDebugInformation()) { @@ -351,7 +511,7 @@ bool AsmPrinter::doInitialization(Module &M) { CodeViewLineTablesGroupDescription); } if (!EmitCodeView || M.getDwarfVersion()) { - if (!DisableDebugInfoPrinting) { + if (MMI->hasDebugInfo()) { DD = new DwarfDebug(this); Handlers.emplace_back(std::unique_ptr(DD), DbgTimerName, DbgTimerDescription, DWARFGroupName, @@ -536,9 +696,9 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { if (isVerbose()) { // When printing the control variable __emutls_v.*, // we don't need to print the original TLS variable name. - GV->printAsOperand(OutStreamer->GetCommentOS(), - /*PrintType=*/false, GV->getParent()); - OutStreamer->GetCommentOS() << '\n'; + GV->printAsOperand(OutStreamer->getCommentOS(), + /*PrintType=*/false, GV->getParent()); + OutStreamer->getCommentOS() << '\n'; } } @@ -652,7 +812,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { TheSection = getObjFileLowering().getTLSBSSSection(); OutStreamer->emitTBSSSymbol(TheSection, MangSym, Size, Alignment.value()); } else if (GVKind.isThreadData()) { - OutStreamer->SwitchSection(TheSection); + OutStreamer->switchSection(TheSection); emitAlignment(Alignment, GV); OutStreamer->emitLabel(MangSym); @@ -661,12 +821,12 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { GV->getInitializer()); } - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); // Emit the variable struct for the runtime. MCSection *TLVSect = getObjFileLowering().getTLSExtraDataSection(); - OutStreamer->SwitchSection(TLVSect); + OutStreamer->switchSection(TLVSect); // Emit the linkage here. emitLinkage(GV, GVSym); OutStreamer->emitLabel(GVSym); @@ -681,13 +841,13 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { OutStreamer->emitIntValue(0, PtrSize); OutStreamer->emitSymbolValue(MangSym, PtrSize); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); return; } MCSymbol *EmittedInitSym = GVSym; - OutStreamer->SwitchSection(TheSection); + OutStreamer->switchSection(TheSection); emitLinkage(GV, EmittedInitSym); emitAlignment(Alignment, GV); @@ -704,7 +864,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { OutStreamer->emitELFSize(EmittedInitSym, MCConstantExpr::create(Size, OutContext)); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } /// Emit the directive and value for debug thread local expression @@ -723,7 +883,7 @@ void AsmPrinter::emitFunctionHeader() { const Function &F = MF->getFunction(); if (isVerbose()) - OutStreamer->GetCommentOS() + OutStreamer->getCommentOS() << "-- Begin function " << GlobalValue::dropLLVMManglingEscape(F.getName()) << '\n'; @@ -737,7 +897,7 @@ void AsmPrinter::emitFunctionHeader() { MF->setSection(getObjFileLowering().getUniqueSectionForFunction(F, TM)); else MF->setSection(getObjFileLowering().SectionForGlobal(&F, TM)); - OutStreamer->SwitchSection(MF->getSection()); + OutStreamer->switchSection(MF->getSection()); if (!MAI->hasVisibilityOnlyWithLinkage()) emitVisibility(CurrentFnSym, F.getVisibility()); @@ -756,10 +916,10 @@ void AsmPrinter::emitFunctionHeader() { OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_Cold); if (isVerbose()) { - F.printAsOperand(OutStreamer->GetCommentOS(), - /*PrintType=*/false, F.getParent()); + F.printAsOperand(OutStreamer->getCommentOS(), + /*PrintType=*/false, F.getParent()); emitFunctionHeaderComment(); - OutStreamer->GetCommentOS() << '\n'; + OutStreamer->getCommentOS() << '\n'; } // Emit the prefix data. @@ -817,7 +977,7 @@ void AsmPrinter::emitFunctionHeader() { // references to the dangling symbols. Emit them at the start of the function // so that we don't get references to undefined symbols. std::vector DeadBlockSyms; - MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms); + takeDeletedSymbolsForFunction(&F, DeadBlockSyms); for (MCSymbol *DeadBlockSym : DeadBlockSyms) { OutStreamer->AddComment("Address taken block that was later removed"); OutStreamer->emitLabel(DeadBlockSym); @@ -844,6 +1004,24 @@ void AsmPrinter::emitFunctionHeader() { // Emit the prologue data. if (F.hasPrologueData()) emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData()); + + // Emit the function prologue data for the indirect call sanitizer. + if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) { + assert(TM.getTargetTriple().getArch() == Triple::x86 || + TM.getTargetTriple().getArch() == Triple::x86_64); + assert(MD->getNumOperands() == 2); + + auto *PrologueSig = mdconst::extract(MD->getOperand(0)); + auto *FTRTTIProxy = mdconst::extract(MD->getOperand(1)); + assert(PrologueSig && FTRTTIProxy); + emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig); + + const MCExpr *Proxy = lowerConstant(FTRTTIProxy); + const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext); + const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext); + // Use 32 bit since only small code model is supported. + OutStreamer->emitValue(PCRel, 4u); + } } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the @@ -912,7 +1090,7 @@ void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { << printReg(RegNo, MF->getSubtarget().getRegisterInfo()); OutStreamer->AddComment(OS.str()); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { @@ -925,7 +1103,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { << printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo()); } AP.OutStreamer->AddComment(OS.str()); - AP.OutStreamer->AddBlankLine(); + AP.OutStreamer->addBlankLine(); } /// emitDebugValueComment - This method handles the target-independent form @@ -1147,32 +1325,42 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { const MCSymbol *FunctionSymbol = getFunctionBegin(); - OutStreamer->PushSection(); - OutStreamer->SwitchSection(BBAddrMapSection); + OutStreamer->pushSection(); + OutStreamer->switchSection(BBAddrMapSection); + OutStreamer->AddComment("version"); + OutStreamer->emitInt8(OutStreamer->getContext().getBBAddrMapVersion()); + OutStreamer->AddComment("feature"); + OutStreamer->emitInt8(0); + OutStreamer->AddComment("function address"); OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize()); - // Emit the total number of basic blocks in this function. + OutStreamer->AddComment("number of basic blocks"); OutStreamer->emitULEB128IntValue(MF.size()); + const MCSymbol *PrevMBBEndSymbol = FunctionSymbol; // Emit BB Information for each basic block in the funciton. for (const MachineBasicBlock &MBB : MF) { const MCSymbol *MBBSymbol = MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol(); - // Emit the basic block offset. - emitLabelDifferenceAsULEB128(MBBSymbol, FunctionSymbol); + // Emit the basic block offset relative to the end of the previous block. + // This is zero unless the block is padded due to alignment. + emitLabelDifferenceAsULEB128(MBBSymbol, PrevMBBEndSymbol); // Emit the basic block size. When BBs have alignments, their size cannot // always be computed from their offsets. emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol); OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB)); + PrevMBBEndSymbol = MBB.getEndSymbol(); } - OutStreamer->PopSection(); + OutStreamer->popSection(); } void AsmPrinter::emitPseudoProbe(const MachineInstr &MI) { - auto GUID = MI.getOperand(0).getImm(); - auto Index = MI.getOperand(1).getImm(); - auto Type = MI.getOperand(2).getImm(); - auto Attr = MI.getOperand(3).getImm(); - DILocation *DebugLoc = MI.getDebugLoc(); - PP->emitPseudoProbe(GUID, Index, Type, Attr, DebugLoc); + if (PP) { + auto GUID = MI.getOperand(0).getImm(); + auto Index = MI.getOperand(1).getImm(); + auto Type = MI.getOperand(2).getImm(); + auto Attr = MI.getOperand(3).getImm(); + DILocation *DebugLoc = MI.getDebugLoc(); + PP->emitPseudoProbe(GUID, Index, Type, Attr, DebugLoc); + } } void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) { @@ -1189,15 +1377,16 @@ void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) { if (FrameInfo.hasVarSizedObjects()) return; - OutStreamer->PushSection(); - OutStreamer->SwitchSection(StackSizeSection); + OutStreamer->pushSection(); + OutStreamer->switchSection(StackSizeSection); const MCSymbol *FunctionSymbol = getFunctionBegin(); - uint64_t StackSize = FrameInfo.getStackSize(); + uint64_t StackSize = + FrameInfo.getStackSize() + FrameInfo.getUnsafeStackSize(); OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); OutStreamer->emitULEB128IntValue(StackSize); - OutStreamer->PopSection(); + OutStreamer->popSection(); } void AsmPrinter::emitStackUsage(const MachineFunction &MF) { @@ -1208,7 +1397,8 @@ void AsmPrinter::emitStackUsage(const MachineFunction &MF) { return; const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); - uint64_t StackSize = FrameInfo.getStackSize(); + uint64_t StackSize = + FrameInfo.getStackSize() + FrameInfo.getUnsafeStackSize(); if (StackUsageStream == nullptr) { std::error_code EC; @@ -1298,7 +1488,7 @@ void AsmPrinter::emitFunctionBody() { } if (isVerbose()) - emitComments(MI, OutStreamer->GetCommentOS()); + emitComments(MI, OutStreamer->getCommentOS()); switch (MI.getOpcode()) { case TargetOpcode::CFI_INSTRUCTION: @@ -1460,7 +1650,7 @@ void AsmPrinter::emitFunctionBody() { } // Switch to the original section in case basic block sections was used. - OutStreamer->SwitchSection(MF->getSection()); + OutStreamer->switchSection(MF->getSection()); const Function &F = MF->getFunction(); for (const auto &BB : F) { @@ -1527,9 +1717,9 @@ void AsmPrinter::emitFunctionBody() { emitPatchableFunctionEntries(); if (isVerbose()) - OutStreamer->GetCommentOS() << "-- End function\n"; + OutStreamer->getCommentOS() << "-- End function\n"; - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } /// Compute the number of Global Variables that uses a Constant. @@ -1617,10 +1807,7 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) { // Treat bitcasts of functions as functions also. This is important at least // on WebAssembly where object and function addresses can't alias each other. if (!IsFunction) - if (auto *CE = dyn_cast(GA.getAliasee())) - if (CE->getOpcode() == Instruction::BitCast) - IsFunction = - CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy(); + IsFunction = isa(GA.getAliasee()->stripPointerCasts()); // AIX's assembly directive `.set` is not usable for aliasing purpose, // so AIX has to use the extra-label-at-definition strategy. At this @@ -1650,13 +1837,13 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) { if (IsFunction) { OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeFunction); if (TM.getTargetTriple().isOSBinFormatCOFF()) { - OutStreamer->BeginCOFFSymbolDef(Name); - OutStreamer->EmitCOFFSymbolStorageClass( + OutStreamer->beginCOFFSymbolDef(Name); + OutStreamer->emitCOFFSymbolStorageClass( GA.hasLocalLinkage() ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL); - OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT); - OutStreamer->EndCOFFSymbolDef(); + OutStreamer->endCOFFSymbolDef(); } } @@ -1734,7 +1921,7 @@ void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) { // Switch to the remarks section. MCSection *RemarksSection = OutContext.getObjectFileInfo()->getRemarksSection(); - OutStreamer->SwitchSection(RemarksSection); + OutStreamer->switchSection(RemarksSection); OutStreamer->emitBinaryData(OS.str()); } @@ -1805,7 +1992,7 @@ bool AsmPrinter::doFinalization(Module &M) { // Output stubs for external and common global variables. MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); if (!Stubs.empty()) { - OutStreamer->SwitchSection(TLOF.getDataSection()); + OutStreamer->switchSection(TLOF.getDataSection()); const DataLayout &DL = M.getDataLayout(); emitAlignment(Align(DL.getPointerSize())); @@ -1829,7 +2016,7 @@ bool AsmPrinter::doFinalization(Module &M) { for (const auto &Stub : Stubs) { SmallString<256> SectionName = StringRef(".rdata$"); SectionName += Stub.first->getName(); - OutStreamer->SwitchSection(OutContext.getCOFFSection( + OutStreamer->switchSection(OutContext.getCOFFSection( SectionName, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_LNK_COMDAT, @@ -1920,31 +2107,14 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit bytes for llvm.commandline metadata. emitModuleCommandLines(M); - // Emit __morestack address if needed for indirect calls. - if (MMI->usesMorestackAddr()) { - Align Alignment(1); - MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant( - getDataLayout(), SectionKind::getReadOnly(), - /*C=*/nullptr, Alignment); - OutStreamer->SwitchSection(ReadOnlySection); - - MCSymbol *AddrSymbol = - OutContext.getOrCreateSymbol(StringRef("__morestack_addr")); - OutStreamer->emitLabel(AddrSymbol); - - unsigned PtrSize = MAI->getCodePointerSize(); - OutStreamer->emitSymbolValue(GetExternalSymbolSymbol("__morestack"), - PtrSize); - } - // Emit .note.GNU-split-stack and .note.GNU-no-split-stack sections if // split-stack is used. - if (TM.getTargetTriple().isOSBinFormatELF() && MMI->hasSplitStack()) { - OutStreamer->SwitchSection( - OutContext.getELFSection(".note.GNU-split-stack", ELF::SHT_PROGBITS, 0)); - if (MMI->hasNosplitStack()) - OutStreamer->SwitchSection( - OutContext.getELFSection(".note.GNU-no-split-stack", ELF::SHT_PROGBITS, 0)); + if (TM.getTargetTriple().isOSBinFormatELF() && HasSplitStack) { + OutStreamer->switchSection(OutContext.getELFSection(".note.GNU-split-stack", + ELF::SHT_PROGBITS, 0)); + if (HasNoSplitStack) + OutStreamer->switchSection(OutContext.getELFSection( + ".note.GNU-no-split-stack", ELF::SHT_PROGBITS, 0)); } // If we don't have any trampolines, then we don't require stack memory @@ -1952,7 +2122,7 @@ bool AsmPrinter::doFinalization(Module &M) { Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) if (MCSection *S = MAI->getNonexecutableStackSection(OutContext)) - OutStreamer->SwitchSection(S); + OutStreamer->switchSection(S); if (TM.Options.EmitAddrsig) { // Emit address-significance attributes for all globals. @@ -1973,7 +2143,7 @@ bool AsmPrinter::doFinalization(Module &M) { GV.getVisibility() != GlobalValue::DefaultVisibility) continue; - OutStreamer->SwitchSection( + OutStreamer->switchSection( OutContext.getELFSection(".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0, "", false, ++UniqueID, nullptr)); OutStreamer->emitBytes(GV.getPartition()); @@ -1989,8 +2159,9 @@ bool AsmPrinter::doFinalization(Module &M) { emitEndOfAsmFile(M); MMI = nullptr; + AddrLabelSymbols = nullptr; - OutStreamer->Finish(); + OutStreamer->finish(); OutStreamer->reset(); OwnedMLI.reset(); OwnedMDT.reset(); @@ -2009,6 +2180,16 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; const Function &F = MF.getFunction(); + // Record that there are split-stack functions, so we will emit a special + // section to tell the linker. + if (MF.shouldSplitStack()) { + HasSplitStack = true; + + if (!MF.getFrameInfo().needsSplitStackProlog()) + HasNoSplitStack = true; + } else + HasNoSplitStack = true; + // Get the function symbol. if (!MAI->needsFunctionDescriptors()) { CurrentFnSym = getSymbol(&MF.getFunction()); @@ -2113,7 +2294,7 @@ void AsmPrinter::emitConstantPool() { continue; if (CurSection != CPSections[i].S) { - OutStreamer->SwitchSection(CPSections[i].S); + OutStreamer->switchSection(CPSections[i].S); emitAlignment(Align(CPSections[i].Alignment)); CurSection = CPSections[i].S; Offset = 0; @@ -2156,7 +2337,7 @@ void AsmPrinter::emitJumpTableInfo() { if (JTInDiffSection) { // Drop it in the readonly section. MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(F, TM); - OutStreamer->SwitchSection(ReadOnlySection); + OutStreamer->switchSection(ReadOnlySection); } emitAlignment(Align(MJTI->getEntryAlignment(DL))); @@ -2392,7 +2573,7 @@ void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List, MCSection *OutputSection = (IsCtor ? Obj.getStaticCtorSection(S.Priority, KeySym) : Obj.getStaticDtorSection(S.Priority, KeySym)); - OutStreamer->SwitchSection(OutputSection); + OutStreamer->switchSection(OutputSection); if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection()) emitAlignment(Align); emitXXStructor(DL, S.Func); @@ -2423,8 +2604,8 @@ void AsmPrinter::emitModuleCommandLines(Module &M) { if (!NMD || !NMD->getNumOperands()) return; - OutStreamer->PushSection(); - OutStreamer->SwitchSection(CommandLine); + OutStreamer->pushSection(); + OutStreamer->switchSection(CommandLine); OutStreamer->emitZeros(1); for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { const MDNode *N = NMD->getOperand(i); @@ -2434,7 +2615,7 @@ void AsmPrinter::emitModuleCommandLines(Module &M) { OutStreamer->emitBytes(S->getString()); OutStreamer->emitZeros(1); } - OutStreamer->PopSection(); + OutStreamer->popSection(); } //===--------------------------------------------------------------------===// @@ -2471,7 +2652,7 @@ void AsmPrinter::emitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, unsigned Size, bool IsSectionRelative) const { if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { - OutStreamer->EmitCOFFSecRel32(Label, Offset); + OutStreamer->emitCOFFSecRel32(Label, Offset); if (Size > 4) OutStreamer->emitZeros(Size - 4); return; @@ -2541,6 +2722,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { llvm_unreachable("Unknown constant value to lower!"); } + // The constant expression opcodes are limited to those that are necessary + // to represent relocations on supported targets. Expressions involving only + // constant addresses are constant folded instead. switch (CE->getOpcode()) { case Instruction::AddrSpaceCast: { const Constant *Op = CE->getOperand(0); @@ -2658,34 +2842,17 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { return RelocExpr; } } + + const MCExpr *LHS = lowerConstant(CE->getOperand(0)); + const MCExpr *RHS = lowerConstant(CE->getOperand(1)); + return MCBinaryExpr::createSub(LHS, RHS, Ctx); + break; } - // else fallthrough - LLVM_FALLTHROUGH; - - // The MC library also has a right-shift operator, but it isn't consistently - // signed or unsigned between different targets. - case Instruction::Add: - case Instruction::Mul: - case Instruction::SDiv: - case Instruction::SRem: - case Instruction::Shl: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { + + case Instruction::Add: { const MCExpr *LHS = lowerConstant(CE->getOperand(0)); const MCExpr *RHS = lowerConstant(CE->getOperand(1)); - switch (CE->getOpcode()) { - default: llvm_unreachable("Unknown binary operator constant cast expr"); - case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx); - case Instruction::Sub: return MCBinaryExpr::createSub(LHS, RHS, Ctx); - case Instruction::Mul: return MCBinaryExpr::createMul(LHS, RHS, Ctx); - case Instruction::SDiv: return MCBinaryExpr::createDiv(LHS, RHS, Ctx); - case Instruction::SRem: return MCBinaryExpr::createMod(LHS, RHS, Ctx); - case Instruction::Shl: return MCBinaryExpr::createShl(LHS, RHS, Ctx); - case Instruction::And: return MCBinaryExpr::createAnd(LHS, RHS, Ctx); - case Instruction::Or: return MCBinaryExpr::createOr (LHS, RHS, Ctx); - case Instruction::Xor: return MCBinaryExpr::createXor(LHS, RHS, Ctx); - } + return MCBinaryExpr::createAdd(LHS, RHS, Ctx); } } } @@ -2719,7 +2886,7 @@ static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) { assert(Size % 8 == 0); // Extend the element to take zero padding into account. - APInt Value = CI->getValue().zextOrSelf(Size); + APInt Value = CI->getValue().zext(Size); if (!Value.isSplat(8)) return -1; @@ -2768,8 +2935,8 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL, if (isa(CDS->getElementType())) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { if (AP.isVerbose()) - AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n", - CDS->getElementAsInteger(i)); + AP.OutStreamer->getCommentOS() + << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(i)); AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(i), ElementByteSize); } @@ -2855,8 +3022,8 @@ static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) { if (AP.isVerbose()) { SmallString<8> StrVal; APF.toString(StrVal); - ET->print(AP.OutStreamer->GetCommentOS()); - AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n'; + ET->print(AP.OutStreamer->getCommentOS()); + AP.OutStreamer->getCommentOS() << ' ' << StrVal << '\n'; } // Now iterate through the APInt chunks, emitting them in endian-correct @@ -3061,8 +3228,8 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, if (StoreSize <= 8) { if (AP.isVerbose()) - AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n", - CI->getZExtValue()); + AP.OutStreamer->getCommentOS() + << format("0x%" PRIx64 "\n", CI->getZExtValue()); AP.OutStreamer->emitIntValue(CI->getZExtValue(), StoreSize); } else { emitGlobalConstantLargeInt(CI, AP); @@ -3163,11 +3330,12 @@ MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const { } MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const { - return MMI->getAddrLabelSymbol(BA->getBasicBlock()); + return const_cast(this)->getAddrLabelSymbol( + BA->getBasicBlock()); } MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { - return MMI->getAddrLabelSymbol(BB); + return const_cast(this)->getAddrLabelSymbol(BB); } /// GetCPISymbol - Return the symbol for the specified constant pool entry. @@ -3272,7 +3440,7 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, // Otherwise, it is a loop header. Print out information about child and // parent loops. - raw_ostream &OS = AP.OutStreamer->GetCommentOS(); + raw_ostream &OS = AP.OutStreamer->getCommentOS(); PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); @@ -3308,7 +3476,7 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { // entry block is always placed in the function section and is handled // separately. if (MBB.isBeginSection() && !MBB.isEntryBlock()) { - OutStreamer->SwitchSection( + OutStreamer->switchSection( getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(), MBB, TM)); CurrentSectionBeginSym = MBB.getSymbol(); @@ -3326,7 +3494,7 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { // MBBs can have their address taken as part of CodeGen without having // their corresponding BB's address taken in IR if (BB && BB->hasAddressTaken()) - for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB)) + for (MCSymbol *Sym : getAddrLabelSymbolToEmit(BB)) OutStreamer->emitLabel(Sym); } @@ -3334,9 +3502,9 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { if (isVerbose()) { if (BB) { if (BB->hasName()) { - BB->printAsOperand(OutStreamer->GetCommentOS(), + BB->printAsOperand(OutStreamer->getCommentOS(), /*PrintType=*/false, BB->getModule()); - OutStreamer->GetCommentOS() << '\n'; + OutStreamer->getCommentOS() << '\n'; } } @@ -3563,7 +3731,7 @@ void AsmPrinter::emitXRayTable() { // range of sleds associated with a function. auto &Ctx = OutContext; MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true); - OutStreamer->SwitchSection(InstMap); + OutStreamer->switchSection(InstMap); OutStreamer->emitLabel(SledsStart); for (const auto &Sled : Sleds) { MCSymbol *Dot = Ctx.createTempSymbol(); @@ -3590,11 +3758,11 @@ void AsmPrinter::emitXRayTable() { // Each entry here will be 2 * word size aligned, as we're writing down two // pointers. This should work for both 32-bit and 64-bit platforms. if (FnSledIndex) { - OutStreamer->SwitchSection(FnSledIndex); + OutStreamer->switchSection(FnSledIndex); OutStreamer->emitCodeAlignment(2 * WordSizeBytes, &getSubtargetInfo()); OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false); OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false); - OutStreamer->SwitchSection(PrevSection); + OutStreamer->switchSection(PrevSection); } Sleds.clear(); } @@ -3639,7 +3807,7 @@ void AsmPrinter::emitPatchableFunctionEntries() { } LinkedToSym = cast(CurrentFnSym); } - OutStreamer->SwitchSection(OutContext.getELFSection( + OutStreamer->switchSection(OutContext.getELFSection( "__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0, GroupName, F.hasComdat(), MCSection::NonUniqueID, LinkedToSym)); emitAlignment(Align(PointerSize)); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index fc127f4cf9da..719fec06aa33 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "ByteStreamer.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" @@ -19,14 +18,11 @@ #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDwarf.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" #include using namespace llvm; @@ -162,7 +158,7 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label, if (MAI->needsDwarfSectionOffsetDirective()) { assert(!isDwarf64() && "emitting DWARF64 is not implemented for COFF targets"); - OutStreamer->EmitCOFFSecRel32(Label, /*Offset=*/0); + OutStreamer->emitCOFFSecRel32(Label, /*Offset=*/0); return; } @@ -277,6 +273,12 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpUndefined: OutStreamer->emitCFIUndefined(Inst.getRegister()); break; + case MCCFIInstruction::OpRememberState: + OutStreamer->emitCFIRememberState(); + break; + case MCCFIInstruction::OpRestoreState: + OutStreamer->emitCFIRestoreState(); + break; } } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 5d0cadefdbf7..88c82cbc958b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -17,8 +17,8 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" @@ -26,9 +26,10 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" @@ -115,7 +116,7 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); Parser->setAssemblerDialect(Dialect); - Parser->setTargetParser(*TAP.get()); + Parser->setTargetParser(*TAP); // Enable lexing Masm binary and hex integer literals in intel inline // assembly. if (Dialect == InlineAsm::AD_Intel) @@ -398,9 +399,9 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { if (!RestrRegs.empty()) { std::string Msg = "inline asm clobber list contains reserved registers: "; ListSeparator LS; - for (const Register &RR : RestrRegs) { + for (const Register RR : RestrRegs) { Msg += LS; - Msg += TRI->getName(RR); + Msg += TRI->getRegAsmName(RR); } const char *Note = "Reserved registers on the clobber list may not be " diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 52c74713551c..701c0affdfa6 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "CodeViewDebug.h" -#include "DwarfExpression.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" @@ -29,7 +28,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -41,7 +39,6 @@ #include "llvm/DebugInfo/CodeView/EnumTables.h" #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeTableCollection.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" @@ -58,11 +55,8 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/BinaryByteStream.h" -#include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -230,7 +224,7 @@ unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) { break; } } - bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes, + bool Success = OS.emitCVFileDirective(NextId, FullPath, ChecksumAsBytes, static_cast(CSKind)); (void)Success; assert(Success && ".cv_file directive failed"); @@ -251,7 +245,7 @@ CodeViewDebug::getInlineSite(const DILocation *InlinedAt, .SiteFuncId; Site->SiteFuncId = NextFuncId++; - OS.EmitCVInlineSiteIdDirective( + OS.emitCVInlineSiteIdDirective( Site->SiteFuncId, ParentFuncId, maybeRecordFile(InlinedAt->getFile()), InlinedAt->getLine(), InlinedAt->getColumn(), SMLoc()); Site->Inlinee = Inlinee; @@ -515,7 +509,7 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL, if (!DL || DL == PrevInstLoc) return; - const DIScope *Scope = DL.get()->getScope(); + const DIScope *Scope = DL->getScope(); if (!Scope) return; @@ -614,18 +608,16 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { void CodeViewDebug::beginModule(Module *M) { // If module doesn't have named metadata anchors or COFF debug section // is not available, skip any debug info related stuff. - NamedMDNode *CUs = M->getNamedMetadata("llvm.dbg.cu"); - if (!CUs || !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) { + if (!MMI->hasDebugInfo() || + !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) { Asm = nullptr; return; } - // Tell MMI that we have and need debug info. - MMI->setDebugInfoAvailability(true); TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch()); // Get the current source language. - const MDNode *Node = *CUs->operands().begin(); + const MDNode *Node = *M->debug_compile_units_begin(); const auto *CU = cast(Node); CurrentSourceLanguage = MapDWLangToCVLang(CU->getSourceLanguage()); @@ -727,7 +719,7 @@ void CodeViewDebug::emitTypeInformation() { return; // Start the .debug$T or .debug$P section with 0x4. - OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection()); + OS.switchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection()); emitCodeViewMagicVersion(); TypeTableCollection Table(TypeTable.records()); @@ -760,7 +752,7 @@ void CodeViewDebug::emitTypeGlobalHashes() { // Start the .debug$H section with the version and hash algorithm, currently // hardcoded to version 0, SHA1. - OS.SwitchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection()); + OS.switchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection()); OS.emitValueToAlignment(4); OS.AddComment("Magic"); @@ -826,6 +818,8 @@ static Version parseVersion(StringRef Name) { if (isdigit(C)) { V.Part[N] *= 10; V.Part[N] += C - '0'; + V.Part[N] = + std::min(V.Part[N], std::numeric_limits::max()); } else if (C == '.') { ++N; if (N >= 4) @@ -867,7 +861,6 @@ void CodeViewDebug::emitCompilerInformation() { Version FrontVer = parseVersion(CompilerVersion); OS.AddComment("Frontend version"); for (int N : FrontVer.Part) { - N = std::min(N, std::numeric_limits::max()); OS.emitInt16(N); } @@ -985,11 +978,11 @@ void CodeViewDebug::emitInlineeLinesSubsection() { assert(TypeIndices.count({SP, nullptr})); TypeIndex InlineeIdx = TypeIndices[{SP, nullptr}]; - OS.AddBlankLine(); + OS.addBlankLine(); unsigned FileId = maybeRecordFile(SP->getFile()); OS.AddComment("Inlined function " + SP->getName() + " starts at " + SP->getFilename() + Twine(':') + Twine(SP->getLine())); - OS.AddBlankLine(); + OS.addBlankLine(); OS.AddComment("Type index of inlined function"); OS.emitInt32(InlineeIdx.getIndex()); OS.AddComment("Offset into filechecksum table"); @@ -1051,7 +1044,7 @@ void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) { Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); DebugSec = OS.getContext().getAssociativeCOFFSection(DebugSec, KeySym); - OS.SwitchSection(DebugSec); + OS.switchSection(DebugSec); // Emit the magic version number if this is the first time we've switched to // this section. @@ -1080,9 +1073,9 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV, OS.AddComment("PtrNext"); OS.emitInt32(0); OS.AddComment("Thunk section relative address"); - OS.EmitCOFFSecRel32(Fn, /*Offset=*/0); + OS.emitCOFFSecRel32(Fn, /*Offset=*/0); OS.AddComment("Thunk section index"); - OS.EmitCOFFSectionIndex(Fn); + OS.emitCOFFSectionIndex(Fn); OS.AddComment("Code size"); OS.emitAbsoluteSymbolDiff(FI.End, Fn, 2); OS.AddComment("Ordinal"); @@ -1132,7 +1125,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, // Emit FPO data, but only on 32-bit x86. No other platforms use it. if (Triple(MMI->getModule()->getTargetTriple()).getArch() == Triple::x86) - OS.EmitCVFPOData(Fn); + OS.emitCVFPOData(Fn); // Emit a symbol subsection, required by VS2012+ to find function boundaries. OS.AddComment("Symbol subsection for " + Twine(FuncName)); @@ -1160,9 +1153,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, OS.AddComment("Function type index"); OS.emitInt32(getFuncIdForSubprogram(GV->getSubprogram()).getIndex()); OS.AddComment("Function section relative address"); - OS.EmitCOFFSecRel32(Fn, /*Offset=*/0); + OS.emitCOFFSecRel32(Fn, /*Offset=*/0); OS.AddComment("Function section index"); - OS.EmitCOFFSectionIndex(Fn); + OS.emitCOFFSectionIndex(Fn); OS.AddComment("Flags"); OS.emitInt8(0); // Emit the function display name as a null-terminated string. @@ -1207,9 +1200,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, MCSymbol *Label = Annot.first; MDTuple *Strs = cast(Annot.second); MCSymbol *AnnotEnd = beginSymbolRecord(SymbolKind::S_ANNOTATION); - OS.EmitCOFFSecRel32(Label, /*Offset=*/0); + OS.emitCOFFSecRel32(Label, /*Offset=*/0); // FIXME: Make sure we don't overflow the max record size. - OS.EmitCOFFSectionIndex(Label); + OS.emitCOFFSectionIndex(Label); OS.emitInt16(Strs->getNumOperands()); for (Metadata *MD : Strs->operands()) { // MDStrings are null terminated, so we can do EmitBytes and get the @@ -1227,9 +1220,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, const DIType *DITy = std::get<2>(HeapAllocSite); MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE); OS.AddComment("Call site offset"); - OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0); + OS.emitCOFFSecRel32(BeginLabel, /*Offset=*/0); OS.AddComment("Call site section index"); - OS.EmitCOFFSectionIndex(BeginLabel); + OS.emitCOFFSectionIndex(BeginLabel); OS.AddComment("Call instruction length"); OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2); OS.AddComment("Type index"); @@ -1249,9 +1242,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, OS.emitCVLinetableDirective(FI.FuncId, Fn, FI.End); } -CodeViewDebug::LocalVarDefRange +CodeViewDebug::LocalVarDef CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) { - LocalVarDefRange DR; + LocalVarDef DR; DR.InMemory = -1; DR.DataOffset = Offset; assert(DR.DataOffset == Offset && "truncation"); @@ -1303,19 +1296,19 @@ void CodeViewDebug::collectVariableInfoFromMFTable( "Frame offsets with a scalable component are not supported"); // Calculate the label ranges. - LocalVarDefRange DefRange = + LocalVarDef DefRange = createDefRangeMem(CVReg, FrameOffset.getFixed() + ExprOffset); + LocalVariable Var; + Var.DIVar = VI.Var; + for (const InsnRange &Range : Scope->getRanges()) { const MCSymbol *Begin = getLabelBeforeInsn(Range.first); const MCSymbol *End = getLabelAfterInsn(Range.second); End = End ? End : Asm->getFunctionEnd(); - DefRange.Ranges.emplace_back(Begin, End); + Var.DefRanges[DefRange].emplace_back(Begin, End); } - LocalVariable Var; - Var.DIVar = VI.Var; - Var.DefRanges.emplace_back(std::move(DefRange)); if (Deref) Var.UseReferenceType = true; @@ -1374,24 +1367,18 @@ void CodeViewDebug::calculateRanges( // We can only handle a register or an offseted load of a register. if (Location->Register == 0 || Location->LoadChain.size() > 1) continue; - { - LocalVarDefRange DR; - DR.CVRegister = TRI->getCodeViewRegNum(Location->Register); - DR.InMemory = !Location->LoadChain.empty(); - DR.DataOffset = - !Location->LoadChain.empty() ? Location->LoadChain.back() : 0; - if (Location->FragmentInfo) { - DR.IsSubfield = true; - DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8; - } else { - DR.IsSubfield = false; - DR.StructOffset = 0; - } - if (Var.DefRanges.empty() || - Var.DefRanges.back().isDifferentLocation(DR)) { - Var.DefRanges.emplace_back(std::move(DR)); - } + LocalVarDef DR; + DR.CVRegister = TRI->getCodeViewRegNum(Location->Register); + DR.InMemory = !Location->LoadChain.empty(); + DR.DataOffset = + !Location->LoadChain.empty() ? Location->LoadChain.back() : 0; + if (Location->FragmentInfo) { + DR.IsSubfield = true; + DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8; + } else { + DR.IsSubfield = false; + DR.StructOffset = 0; } // Compute the label range. @@ -1408,7 +1395,7 @@ void CodeViewDebug::calculateRanges( // If the last range end is our begin, just extend the last range. // Otherwise make a new range. SmallVectorImpl> &R = - Var.DefRanges.back().Ranges; + Var.DefRanges[DR]; if (!R.empty() && R.back().second == Begin) R.back().second = End; else @@ -1525,7 +1512,7 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { // FIXME: Set GuardCfg when it is implemented. CurFn->FrameProcOpts = FPO; - OS.EmitCVFuncIdDirective(CurFn->FuncId); + OS.emitCVFuncIdDirective(CurFn->FuncId); // Find the end of the function prolog. First known non-DBG_VALUE and // non-frame setup location marks the beginning of the function body. @@ -1825,6 +1812,7 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) { break; case dwarf::DW_ATE_UTF: switch (ByteSize) { + case 1: STK = SimpleTypeKind::Character8; break; case 2: STK = SimpleTypeKind::Character16; break; case 4: STK = SimpleTypeKind::Character32; break; } @@ -2820,7 +2808,9 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, // records and on disk formats are described in SymbolRecords.h. BytePrefix // should be big enough to hold all forms without memory allocation. SmallString<20> BytePrefix; - for (const LocalVarDefRange &DefRange : Var.DefRanges) { + for (const auto &Pair : Var.DefRanges) { + LocalVarDef DefRange = Pair.first; + const auto &Ranges = Pair.second; BytePrefix.clear(); if (DefRange.InMemory) { int Offset = DefRange.DataOffset; @@ -2844,7 +2834,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, : (EncFP == FI.EncodedLocalFramePtrReg))) { DefRangeFramePointerRelHeader DRHdr; DRHdr.Offset = Offset; - OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(Ranges, DRHdr); } else { uint16_t RegRelFlags = 0; if (DefRange.IsSubfield) { @@ -2856,7 +2846,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, DRHdr.Register = Reg; DRHdr.Flags = RegRelFlags; DRHdr.BasePointerOffset = Offset; - OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(Ranges, DRHdr); } } else { assert(DefRange.DataOffset == 0 && "unexpected offset into register"); @@ -2865,12 +2855,12 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; DRHdr.OffsetInParent = DefRange.StructOffset; - OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(Ranges, DRHdr); } else { DefRangeRegisterHeader DRHdr; DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; - OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(Ranges, DRHdr); } } } @@ -2894,9 +2884,9 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block, OS.AddComment("Code size"); OS.emitAbsoluteSymbolDiff(Block.End, Block.Begin, 4); // Code Size OS.AddComment("Function section relative address"); - OS.EmitCOFFSecRel32(Block.Begin, /*Offset=*/0); // Func Offset + OS.emitCOFFSecRel32(Block.Begin, /*Offset=*/0); // Func Offset OS.AddComment("Function section index"); - OS.EmitCOFFSectionIndex(FI.Begin); // Func Symbol + OS.emitCOFFSectionIndex(FI.Begin); // Func Symbol OS.AddComment("Lexical block name"); emitNullTerminatedSymbolName(OS, Block.Name); // Name endSymbolRecord(RecordEnd); @@ -3181,6 +3171,11 @@ void CodeViewDebug::collectGlobalVariableInfo() { for (const auto *GVE : CU->getGlobalVariables()) { const DIGlobalVariable *DIGV = GVE->getVariable(); const DIExpression *DIE = GVE->getExpression(); + // Don't emit string literals in CodeView, as the only useful parts are + // generally the filename and line number, which isn't possible to output + // in CodeView. String literals should be the only unnamed GlobalVariable + // with debug info. + if (DIGV->getName().empty()) continue; if ((DIE->getNumElements() == 2) && (DIE->getElement(0) == dwarf::DW_OP_plus_uconst)) @@ -3380,10 +3375,10 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { if (CVGlobalVariableOffsets.find(DIGV) != CVGlobalVariableOffsets.end()) // Use the offset seen while collecting info on globals. Offset = CVGlobalVariableOffsets[DIGV]; - OS.EmitCOFFSecRel32(GVSym, Offset); + OS.emitCOFFSecRel32(GVSym, Offset); OS.AddComment("Segment"); - OS.EmitCOFFSectionIndex(GVSym); + OS.emitCOFFSectionIndex(GVSym); OS.AddComment("Name"); const unsigned LengthOfDataRecord = 12; emitNullTerminatedSymbolName(OS, QualifiedName, LengthOfDataRecord); diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index d1fc3cdccb20..16f0082723ed 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -50,18 +50,8 @@ class MachineFunction; /// Collects and handles line tables information in a CodeView format. class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { - MCStreamer &OS; - BumpPtrAllocator Allocator; - codeview::GlobalTypeTableBuilder TypeTable; - - /// Whether to emit type record hashes into .debug$H. - bool EmitDebugGlobalHashes = false; - - /// The codeview CPU type used by the translation unit. - codeview::CPUType TheCPU; - - /// Represents the most general definition range. - struct LocalVarDefRange { +public: + struct LocalVarDef { /// Indicates that variable data is stored in memory relative to the /// specified register. int InMemory : 1; @@ -79,23 +69,40 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// location containing the data. uint16_t CVRegister; - /// Compares all location fields. This includes all fields except the label - /// ranges. - bool isDifferentLocation(LocalVarDefRange &O) { - return InMemory != O.InMemory || DataOffset != O.DataOffset || - IsSubfield != O.IsSubfield || StructOffset != O.StructOffset || - CVRegister != O.CVRegister; + uint64_t static toOpaqueValue(const LocalVarDef DR) { + uint64_t Val = 0; + std::memcpy(&Val, &DR, sizeof(Val)); + return Val; } - SmallVector, 1> Ranges; + LocalVarDef static createFromOpaqueValue(uint64_t Val) { + LocalVarDef DR; + std::memcpy(&DR, &Val, sizeof(Val)); + return DR; + } }; - static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset); + static_assert(sizeof(uint64_t) == sizeof(LocalVarDef), ""); + +private: + MCStreamer &OS; + BumpPtrAllocator Allocator; + codeview::GlobalTypeTableBuilder TypeTable; + + /// Whether to emit type record hashes into .debug$H. + bool EmitDebugGlobalHashes = false; + + /// The codeview CPU type used by the translation unit. + codeview::CPUType TheCPU; + + static LocalVarDef createDefRangeMem(uint16_t CVRegister, int Offset); /// Similar to DbgVariable in DwarfDebug, but not dwarf-specific. struct LocalVariable { const DILocalVariable *DIVar = nullptr; - SmallVector DefRanges; + MapVector, 1>> + DefRanges; bool UseReferenceType = false; }; @@ -493,6 +500,27 @@ public: void beginInstruction(const MachineInstr *MI) override; }; +template <> struct DenseMapInfo { + + static inline CodeViewDebug::LocalVarDef getEmptyKey() { + return CodeViewDebug::LocalVarDef::createFromOpaqueValue(~0ULL); + } + + static inline CodeViewDebug::LocalVarDef getTombstoneKey() { + return CodeViewDebug::LocalVarDef::createFromOpaqueValue(~0ULL - 1ULL); + } + + static unsigned getHashValue(const CodeViewDebug::LocalVarDef &DR) { + return CodeViewDebug::LocalVarDef::toOpaqueValue(DR) * 37ULL; + } + + static bool isEqual(const CodeViewDebug::LocalVarDef &LHS, + const CodeViewDebug::LocalVarDef &RHS) { + return CodeViewDebug::LocalVarDef::toOpaqueValue(LHS) == + CodeViewDebug::LocalVarDef::toOpaqueValue(RHS); + } +}; + } // end namespace llvm #endif // LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 396322c4979d..617ddbd66e4e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -13,21 +13,15 @@ #include "llvm/CodeGen/DIE.h" #include "DwarfCompileUnit.h" #include "DwarfDebug.h" -#include "DwarfUnit.h" -#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -170,7 +164,7 @@ DIEAbbrev &DIEAbbrevSet::uniqueAbbreviation(DIE &Die) { void DIEAbbrevSet::Emit(const AsmPrinter *AP, MCSection *Section) const { if (!Abbreviations.empty()) { // Start the debug abbrev section. - AP->OutStreamer->SwitchSection(Section); + AP->OutStreamer->switchSection(Section); AP->emitDwarfAbbrevs(Abbreviations); } } @@ -204,6 +198,7 @@ const DIE *DIE::getUnitDie() const { const DIE *p = this; while (p) { if (p->getTag() == dwarf::DW_TAG_compile_unit || + p->getTag() == dwarf::DW_TAG_skeleton_unit || p->getTag() == dwarf::DW_TAG_type_unit) return p; p = p->getParent(); @@ -378,7 +373,7 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_flag_present: // Emit something to keep the lines and comments in sync. // FIXME: Is there a better way to do this? - Asm->OutStreamer->AddBlankLine(); + Asm->OutStreamer->addBlankLine(); return; case dwarf::DW_FORM_flag: case dwarf::DW_FORM_ref1: diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index e175854f7b93..5da50d7aab9f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -19,7 +19,6 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index dd795079ac1a..1358f4d25990 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/DbgEntityHistoryCalculator.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -204,7 +203,7 @@ void DbgValueHistoryMap::trimLocationRanges( if (auto R = intersects(StartMI, EndMI, ScopeRanges, Ordering)) { // Adjust ScopeRanges to exclude ranges which subsequent location ranges // cannot possibly intersect. - ScopeRanges = ArrayRef(R.getValue(), ScopeRanges.end()); + ScopeRanges = ArrayRef(*R, ScopeRanges.end()); } else { // If the location range does not intersect any scope range then the // DBG_VALUE which opened this location range is usless, mark it for diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 18fc46c74eb4..660a064687d3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -13,7 +13,6 @@ #include "llvm/CodeGen/DebugHandlerBase.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 63343d2519f9..5f187acf13dc 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -11,23 +11,13 @@ //===----------------------------------------------------------------------===// #include "DwarfException.h" -#include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Mangler.h" -#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MachineLocation.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -53,7 +43,7 @@ void DwarfCFIExceptionBase::endFragment() { DwarfCFIException::DwarfCFIException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {} -DwarfCFIException::~DwarfCFIException() {} +DwarfCFIException::~DwarfCFIException() = default; /// endModule - Emit all exception information that should come after the /// content. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 5913c687db48..b3f99d346faa 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -21,7 +21,6 @@ #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -67,13 +66,13 @@ DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, /// DW_FORM_addr or DW_FORM_GNU_addr_index. void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label) { + if ((Skeleton || !DD->useSplitDwarf()) && Label) + DD->addArangeLabel(SymbolCU(this, Label)); + // Don't use the address pool in non-fission or in the skeleton unit itself. if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5) return addLocalLabelAddress(Die, Attribute, Label); - if (Label) - DD->addArangeLabel(SymbolCU(this, Label)); - bool UseAddrOffsetFormOrExpressions = DD->useAddrOffsetForm() || DD->useAddrOffsetExpressions(); @@ -108,9 +107,6 @@ void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label) { - if (Label) - DD->addArangeLabel(SymbolCU(this, Label)); - if (Label) addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIELabel(Label)); else @@ -169,7 +165,9 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( } else { DeclContext = GV->getScope(); // Add name and type. - addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName()); + StringRef DisplayName = GV->getDisplayName(); + if (!DisplayName.empty()) + addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName()); if (GTy) addType(*VariableDIE, GTy); @@ -303,8 +301,11 @@ void DwarfCompileUnit::addLocationAttribute( DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address : dwarf::DW_OP_form_tls_address); } - } else if (Asm->TM.getRelocationModel() == Reloc::RWPI || - Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) { + } else if ((Asm->TM.getRelocationModel() == Reloc::RWPI || + Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) && + !Asm->getObjFileLowering() + .getKindForGlobal(Global, Asm->TM) + .isReadOnly()) { auto FormAndOp = GetPointerSizedFormAndOp(); // Constant addUInt(*Loc, dwarf::DW_FORM_data1, FormAndOp.Op); @@ -505,7 +506,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { // FIXME: when writing dwo, we need to avoid relocations. Probably // the "right" solution is to treat globals the way func and data // symbols are (with entries in .debug_addr). - // For now, since we only ever use index 0, this should work as-is. + // For now, since we only ever use index 0, this should work as-is. addUInt(*Loc, dwarf::DW_FORM_data4, FrameBase.Location.WasmLoc.Index); } addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index f2e1f6346803..61412cde34c8 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -25,7 +25,6 @@ #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/Casting.h" -#include #include #include #include diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 609b568f28be..866338a949f3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -31,8 +31,8 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -45,14 +45,11 @@ #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MachineLocation.h" #include "llvm/MC/SectionKind.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MD5.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -360,7 +357,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A) DebuggerTuning = Asm->TM.Options.DebuggerTuning; else if (IsDarwin) DebuggerTuning = DebuggerKind::LLDB; - else if (TT.isPS4CPU()) + else if (TT.isPS()) DebuggerTuning = DebuggerKind::SCE; else if (TT.isOSAIX()) DebuggerTuning = DebuggerKind::DBX; @@ -2315,7 +2312,7 @@ void DwarfDebug::emitStringOffsetsTableHeader() { template void DwarfDebug::emitAccel(AccelTableT &Accel, MCSection *Section, StringRef TableName) { - Asm->OutStreamer->SwitchSection(Section); + Asm->OutStreamer->switchSection(Section); // Emit the full data. emitAppleAccelTable(Asm, Accel, TableName, Section->getBeginSymbol()); @@ -2434,12 +2431,12 @@ void DwarfDebug::emitDebugPubSections() { bool GnuStyle = TheU->getCUNode()->getNameTableKind() == DICompileUnit::DebugNameTableKind::GNU; - Asm->OutStreamer->SwitchSection( + Asm->OutStreamer->switchSection( GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() : Asm->getObjFileLowering().getDwarfPubNamesSection()); emitDebugPubSection(GnuStyle, "Names", TheU, TheU->getGlobalNames()); - Asm->OutStreamer->SwitchSection( + Asm->OutStreamer->switchSection( GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() : Asm->getObjFileLowering().getDwarfPubTypesSection()); emitDebugPubSection(GnuStyle, "Types", TheU, TheU->getGlobalTypes()); @@ -2849,7 +2846,7 @@ void DwarfDebug::emitDebugLocImpl(MCSection *Sec) { if (DebugLocs.getLists().empty()) return; - Asm->OutStreamer->SwitchSection(Sec); + Asm->OutStreamer->switchSection(Sec); MCSymbol *TableEnd = nullptr; if (getDwarfVersion() >= 5) @@ -2880,7 +2877,7 @@ void DwarfDebug::emitDebugLocDWO() { } for (const auto &List : DebugLocs.getLists()) { - Asm->OutStreamer->SwitchSection( + Asm->OutStreamer->switchSection( Asm->getObjFileLowering().getDwarfLocDWOSection()); Asm->OutStreamer->emitLabel(List.Label); @@ -2953,8 +2950,8 @@ void DwarfDebug::emitDebugARanges() { // Sort the symbols by offset within the section. llvm::stable_sort(List, [&](const SymbolCU &A, const SymbolCU &B) { - unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0; - unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0; + unsigned IA = A.Sym ? Asm->OutStreamer->getSymbolOrder(A.Sym) : 0; + unsigned IB = B.Sym ? Asm->OutStreamer->getSymbolOrder(B.Sym) : 0; // Symbols with no order assigned should be placed at the end. // (e.g. section end labels) @@ -2987,7 +2984,7 @@ void DwarfDebug::emitDebugARanges() { } // Start the dwarf aranges section. - Asm->OutStreamer->SwitchSection( + Asm->OutStreamer->switchSection( Asm->getObjFileLowering().getDwarfARangesSection()); unsigned PtrSize = Asm->MAI->getCodePointerSize(); @@ -3045,15 +3042,22 @@ void DwarfDebug::emitDebugARanges() { for (const ArangeSpan &Span : List) { Asm->emitLabelReference(Span.Start, PtrSize); - // Calculate the size as being from the span start to it's end. - if (Span.End) { + // Calculate the size as being from the span start to its end. + // + // If the size is zero, then round it up to one byte. The DWARF + // specification requires that entries in this table have nonzero + // lengths. + auto SizeRef = SymSize.find(Span.Start); + if ((SizeRef == SymSize.end() || SizeRef->second != 0) && Span.End) { Asm->emitLabelDifference(Span.End, Span.Start, PtrSize); } else { // For symbols without an end marker (e.g. common), we // write a single arange entry containing just that one symbol. - uint64_t Size = SymSize[Span.Start]; - if (Size == 0) + uint64_t Size; + if (SizeRef == SymSize.end() || SizeRef->second == 0) Size = 1; + else + Size = SizeRef->second; Asm->OutStreamer->emitIntValue(Size, PtrSize); } @@ -3087,7 +3091,7 @@ void DwarfDebug::emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section return !Pair.second->getCUNode()->isDebugDirectivesOnly(); })); - Asm->OutStreamer->SwitchSection(Section); + Asm->OutStreamer->switchSection(Section); MCSymbol *TableEnd = nullptr; if (getDwarfVersion() >= 5) @@ -3239,7 +3243,7 @@ void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) { DIMacroNodeArray Macros = CUNode->getMacros(); if (Macros.empty()) continue; - Asm->OutStreamer->SwitchSection(Section); + Asm->OutStreamer->switchSection(Section); Asm->OutStreamer->emitLabel(U.getMacroLabelBegin()); if (UseDebugMacroSection) emitMacroHeader(Asm, *this, U, getDwarfVersion()); @@ -3447,22 +3451,6 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, CU.addDIETypeSignature(RefDie, Signature); } -DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD) - : DD(DD), - TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) { - DD->TypeUnitsUnderConstruction.clear(); - DD->AddrPool.resetUsedFlag(); -} - -DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() { - DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction); - DD->AddrPool.resetUsedFlag(AddrPoolUsed); -} - -DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() { - return NonTypeUnitContext(this); -} - // Add the Name along with its companion DIE to the appropriate accelerator // table (for AccelTableKind::Dwarf it's always AccelDebugNames, for // AccelTableKind::Apple, we use the table we got as an argument). If @@ -3555,6 +3543,6 @@ Optional DwarfDebug::getMD5AsBytes(const DIFile *File) const { // An MD5 checksum is 16 bytes. std::string ChecksumString = fromHex(Checksum->Value); MD5::MD5Result CKMem; - std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data()); + std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.data()); return CKMem; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 4e1a1b1e068d..31e4081b7141 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -14,14 +14,13 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H #include "AddressPool.h" -#include "DebugLocStream.h" #include "DebugLocEntry.h" +#include "DebugLocStream.h" #include "DwarfFile.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -31,7 +30,6 @@ #include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/DbgEntityHistoryCalculator.h" #include "llvm/CodeGen/DebugHandlerBase.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Metadata.h" @@ -80,7 +78,7 @@ private: public: DbgEntity(const DINode *N, const DILocation *IA, DbgEntityKind ID) : Entity(N), InlinedAt(IA), SubclassID(ID) {} - virtual ~DbgEntity() {} + virtual ~DbgEntity() = default; /// Accessors. /// @{ @@ -667,19 +665,6 @@ public: void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, DIE &Die, const DICompositeType *CTy); - class NonTypeUnitContext { - DwarfDebug *DD; - decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction; - bool AddrPoolUsed; - friend class DwarfDebug; - NonTypeUnitContext(DwarfDebug *DD); - public: - NonTypeUnitContext(NonTypeUnitContext&&) = default; - ~NonTypeUnitContext(); - }; - - NonTypeUnitContext enterNonTypeUnitContext(); - /// Add a label so that arange data can be generated for it. void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index fe438102ee98..1c21d5ee8bb1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -329,7 +329,16 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return false; } - assert(DwarfRegs.size() == 1); + // TODO: We should not give up here but the following code needs to be changed + // to deal with multiple (sub)registers first. + if (DwarfRegs.size() > 1) { + LLVM_DEBUG(dbgs() << "TODO: giving up on debug information due to " + "multi-register usage.\n"); + DwarfRegs.clear(); + LocationKind = Unknown; + return false; + } + auto Reg = DwarfRegs[0]; bool FBReg = isFrameRegister(TRI, MachineReg); int SignedOffset = 0; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index a67d0f032cf6..a497aa07284e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -12,9 +12,7 @@ #include "DwarfUnit.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Metadata.h" #include "llvm/MC/MCStreamer.h" -#include #include using namespace llvm; @@ -47,7 +45,7 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) { if (llvm::empty(TheU->getUnitDie().values())) return; - Asm->OutStreamer->SwitchSection(S); + Asm->OutStreamer->switchSection(S); TheU->emitHeader(UseOffsets); Asm->emitDwarfDIE(TheU->getUnitDie()); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index a876f8ccace9..67b72f0b455d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -39,7 +39,7 @@ DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) { DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm, StringRef Str) { auto &MapEntry = getEntryImpl(Asm, Str); - return EntryRef(MapEntry, false); + return EntryRef(MapEntry); } DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm, @@ -47,7 +47,7 @@ DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm, auto &MapEntry = getEntryImpl(Asm, Str); if (!MapEntry.getValue().isIndexed()) MapEntry.getValue().Index = NumIndexedStrings++; - return EntryRef(MapEntry, true); + return EntryRef(MapEntry); } void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm, @@ -55,7 +55,7 @@ void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm, MCSymbol *StartSym) { if (getNumIndexedStrings() == 0) return; - Asm.OutStreamer->SwitchSection(Section); + Asm.OutStreamer->switchSection(Section); unsigned EntrySize = Asm.getDwarfOffsetByteSize(); // We are emitting the header for a contribution to the string offsets // table. The header consists of an entry with the contribution's @@ -78,7 +78,7 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, return; // Start the dwarf str section. - Asm.OutStreamer->SwitchSection(StrSection); + Asm.OutStreamer->switchSection(StrSection); // Get all of the string pool entries and sort them by their offset. SmallVector *, 64> Entries; @@ -117,7 +117,7 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, Entries[Entry.getValue().Index] = &Entry; } - Asm.OutStreamer->SwitchSection(OffsetSection); + Asm.OutStreamer->switchSection(OffsetSection); unsigned size = Asm.getDwarfOffsetByteSize(); for (const auto &Entry : Entries) if (UseRelativeOffsets) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 5a2bd479f277..81238b0fe0d2 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -17,12 +17,8 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" @@ -32,9 +28,7 @@ #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include #include @@ -380,6 +374,8 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, CU = getUnitDie().getUnit(); if (!EntryCU) EntryCU = getUnitDie().getUnit(); + assert(EntryCU == CU || !DD->useSplitDwarf() || DD->shareAcrossDWOCUs() || + !static_cast(CU)->isDwoUnit()); addAttribute(Die, Attribute, EntryCU == CU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, Entry); @@ -596,10 +592,8 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE, // Skip updating the accelerator tables since this is not the full type. if (MDString *TypeId = CTy->getRawIdentifier()) DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); - else { - auto X = DD->enterNonTypeUnitContext(); + else finishNonUnitTypeDIE(TyDIE, CTy); - } return &TyDIE; } constructTypeDIE(TyDIE, CTy); @@ -805,7 +799,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { // or reference types. if (DTy->getDWARFAddressSpace()) addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4, - DTy->getDWARFAddressSpace().getValue()); + *DTy->getDWARFAddressSpace()); } void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) { @@ -1350,6 +1344,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, if (SP->isRecursive()) addFlag(SPDie, dwarf::DW_AT_recursive); + if (!SP->getTargetFuncName().empty()) + addString(SPDie, dwarf::DW_AT_trampoline, SP->getTargetFuncName()); + if (DD->getDwarfVersion() >= 5 && SP->isDeleted()) addFlag(SPDie, dwarf::DW_AT_deleted); } @@ -1442,7 +1439,8 @@ DIE *DwarfUnit::getIndexTyDie() { addString(*IndexTyDie, dwarf::DW_AT_name, Name); addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - dwarf::DW_ATE_unsigned); + dwarf::getArrayIndexTypeEncoding( + (dwarf::SourceLanguage)getLanguage())); DD->addAccelType(*CUNode, Name, *IndexTyDie, /*Flags*/ 0); return IndexTyDie; } @@ -1847,11 +1845,5 @@ void DwarfUnit::addRnglistsBase() { } void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) { - addFlag(D, dwarf::DW_AT_declaration); - StringRef Name = CTy->getName(); - if (!Name.empty()) - addString(D, dwarf::DW_AT_name, Name); - if (Name.startswith("_STN") || !Name.contains('<')) - addTemplateParams(D, CTy->getTemplateParams()); - getCU().createTypeDIE(CTy); + DD->getAddressPool().resetUsedFlag(true); } diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 39f40b172c1b..31644959bdca 100644 --- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -458,7 +457,7 @@ MCSymbol *EHStreamer::emitExceptionTable() { // Sometimes we want not to emit the data into separate section (e.g. ARM // EHABI). In this case LSDASection will be NULL. if (LSDASection) - Asm->OutStreamer->SwitchSection(LSDASection); + Asm->OutStreamer->switchSection(LSDASection); Asm->emitAlignment(Align(4)); // Emit the LSDA. @@ -806,7 +805,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { // Emit the Catch TypeInfos. if (VerboseAsm && !TypeInfos.empty()) { Asm->OutStreamer->AddComment(">> Catch TypeInfos <<"); - Asm->OutStreamer->AddBlankLine(); + Asm->OutStreamer->addBlankLine(); Entry = TypeInfos.size(); } @@ -821,7 +820,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { // Emit the Exception Specifications. if (VerboseAsm && !FilterIds.empty()) { Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); - Asm->OutStreamer->AddBlankLine(); + Asm->OutStreamer->addBlankLine(); Entry = 0; } for (std::vector::const_iterator diff --git a/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index 70777f07fc6c..62fd15d89512 100644 --- a/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -23,7 +23,6 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; @@ -46,9 +45,8 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, unsigned IntPtrSize = M.getDataLayout().getPointerSize(); // Put this in a custom .note section. - OS.SwitchSection( - AP.getObjFileLowering().getContext().getELFSection(".note.gc", - ELF::SHT_PROGBITS, 0)); + OS.switchSection(AP.getObjFileLowering().getContext().getELFSection( + ".note.gc", ELF::SHT_PROGBITS, 0)); // For each function... for (GCModuleInfo::FuncInfoVec::iterator FI = Info.funcinfo_begin(), diff --git a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 3ade262d9af2..74fa30ab321b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -72,10 +72,10 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) { - AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection()); + AP.OutStreamer->switchSection(AP.getObjFileLowering().getTextSection()); EmitCamlGlobal(M, AP, "code_begin"); - AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); + AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection()); EmitCamlGlobal(M, AP, "data_begin"); } @@ -99,16 +99,16 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) { unsigned IntPtrSize = M.getDataLayout().getPointerSize(); - AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection()); + AP.OutStreamer->switchSection(AP.getObjFileLowering().getTextSection()); EmitCamlGlobal(M, AP, "code_end"); - AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); + AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection()); EmitCamlGlobal(M, AP, "data_end"); // FIXME: Why does ocaml emit this?? AP.OutStreamer->emitIntValue(0, IntPtrSize); - AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); + AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection()); EmitCamlGlobal(M, AP, "frametable"); int NumDescriptors = 0; @@ -147,7 +147,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AP.OutStreamer->AddComment("live roots for " + Twine(FI->getFunction().getName())); - AP.OutStreamer->AddBlankLine(); + AP.OutStreamer->addBlankLine(); for (GCFunctionInfo::iterator J = FI->begin(), JE = FI->end(); J != JE; ++J) { diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp index bab187f46535..135eabc34838 100644 --- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp @@ -13,7 +13,7 @@ #include "PseudoProbePrinter.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Function.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/MC/MCPseudoProbe.h" #include "llvm/MC/MCStreamer.h" diff --git a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp index a17a2ca2790e..a514ff161cee 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "WasmException.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" diff --git a/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/llvm/lib/CodeGen/AsmPrinter/WasmException.h index f06de786bd76..2abbe37cb6d9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WasmException.h +++ b/llvm/lib/CodeGen/AsmPrinter/WasmException.h @@ -15,9 +15,12 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H #include "EHStreamer.h" -#include "llvm/CodeGen/AsmPrinter.h" namespace llvm { +class AsmPrinter; +class MachineFunction; +struct LandingPadInfo; +template class SmallVectorImpl; class LLVM_LIBRARY_VISIBILITY WasmException : public EHStreamer { public: diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp index ad8432343a60..5d813b72c0b7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -15,11 +15,8 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Metadata.h" -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCStreamer.h" @@ -29,7 +26,7 @@ using namespace llvm; WinCFGuard::WinCFGuard(AsmPrinter *A) : Asm(A) {} -WinCFGuard::~WinCFGuard() {} +WinCFGuard::~WinCFGuard() = default; void WinCFGuard::endFunction(const MachineFunction *MF) { @@ -110,19 +107,19 @@ void WinCFGuard::endModule() { // Emit the symbol index of each GFIDs entry to form the .gfids section. auto &OS = *Asm->OutStreamer; - OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection()); + OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection()); for (const MCSymbol *S : GFIDsEntries) - OS.EmitCOFFSymbolIndex(S); + OS.emitCOFFSymbolIndex(S); // Emit the symbol index of each GIATs entry to form the .giats section. - OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection()); + OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection()); for (const MCSymbol *S : GIATsEntries) { - OS.EmitCOFFSymbolIndex(S); + OS.emitCOFFSymbolIndex(S); } // Emit the symbol index of each longjmp target to form the .gljmp section. - OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection()); + OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection()); for (const MCSymbol *S : LongjmpTargets) { - OS.EmitCOFFSymbolIndex(S); + OS.emitCOFFSymbolIndex(S); } } diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index ef57031c7294..c3ca9c92bf71 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -23,19 +23,13 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" using namespace llvm; WinException::WinException(AsmPrinter *A) : EHStreamer(A) { @@ -46,7 +40,7 @@ WinException::WinException(AsmPrinter *A) : EHStreamer(A) { isThumb = Asm->TM.getTargetTriple().isThumb(); } -WinException::~WinException() {} +WinException::~WinException() = default; /// endModule - Emit all exception information that should come after the /// content. @@ -55,13 +49,13 @@ void WinException::endModule() { const Module *M = MMI->getModule(); for (const Function &F : *M) if (F.hasFnAttribute("safeseh")) - OS.EmitCOFFSafeSEH(Asm->getSymbol(&F)); + OS.emitCOFFSafeSEH(Asm->getSymbol(&F)); if (M->getModuleFlag("ehcontguard") && !EHContTargets.empty()) { // Emit the symbol index of each ehcont target. - OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGEHContSection()); + OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGEHContSection()); for (const MCSymbol *S : EHContTargets) { - OS.EmitCOFFSymbolIndex(S); + OS.emitCOFFSymbolIndex(S); } } } @@ -122,7 +116,7 @@ void WinException::beginFunction(const MachineFunction *MF) { void WinException::markFunctionEnd() { if (isAArch64 && CurrentFuncletEntry && (shouldEmitMoves || shouldEmitPersonality)) - Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd(); + Asm->OutStreamer->emitWinCFIFuncletOrFuncEnd(); } /// endFunction - Gather and emit post-function exception information. @@ -151,12 +145,12 @@ void WinException::endFunction(const MachineFunction *MF) { return; if (shouldEmitPersonality || shouldEmitLSDA) { - Asm->OutStreamer->PushSection(); + Asm->OutStreamer->pushSection(); // Just switch sections to the right xdata section. MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection( Asm->OutStreamer->getCurrentSectionOnly()); - Asm->OutStreamer->SwitchSection(XData); + Asm->OutStreamer->switchSection(XData); // Emit the tables appropriate to the personality function in use. If we // don't recognize the personality, assume it uses an Itanium-style LSDA. @@ -171,7 +165,7 @@ void WinException::endFunction(const MachineFunction *MF) { else emitExceptionTable(); - Asm->OutStreamer->PopSection(); + Asm->OutStreamer->popSection(); } if (!MF->getCatchretTargets().empty()) { @@ -211,11 +205,11 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, Sym = getMCSymbolForMBB(Asm, &MBB); // Describe our funclet symbol as a function with internal linkage. - Asm->OutStreamer->BeginCOFFSymbolDef(Sym); - Asm->OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); - Asm->OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + Asm->OutStreamer->beginCOFFSymbolDef(Sym); + Asm->OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); + Asm->OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT); - Asm->OutStreamer->EndCOFFSymbolDef(); + Asm->OutStreamer->endCOFFSymbolDef(); // We want our funclet's entry point to be aligned such that no nops will be // present after the label. @@ -229,7 +223,7 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, // Mark 'Sym' as starting our funclet. if (shouldEmitMoves || shouldEmitPersonality) { CurrentFuncletTextSection = Asm->OutStreamer->getCurrentSectionOnly(); - Asm->OutStreamer->EmitWinCFIStartProc(Sym); + Asm->OutStreamer->emitWinCFIStartProc(Sym); } if (shouldEmitPersonality) { @@ -248,15 +242,15 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, // inliner doesn't allow inlining them, this isn't a major problem in // practice. if (!CurrentFuncletEntry->isCleanupFuncletEntry()) - Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true); + Asm->OutStreamer->emitWinEHHandler(PersHandlerSym, true, true); } } void WinException::endFunclet() { if (isAArch64 && CurrentFuncletEntry && (shouldEmitMoves || shouldEmitPersonality)) { - Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection); - Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd(); + Asm->OutStreamer->switchSection(CurrentFuncletTextSection); + Asm->OutStreamer->emitWinCFIFuncletOrFuncEnd(); } endFuncletImpl(); } @@ -276,7 +270,7 @@ void WinException::endFuncletImpl() { if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality && !CurrentFuncletEntry->isCleanupFuncletEntry()) { // Emit an UNWIND_INFO struct describing the prologue. - Asm->OutStreamer->EmitWinEHHandlerData(); + Asm->OutStreamer->emitWinEHHandlerData(); // If this is a C++ catch funclet (or the parent function), // emit a reference to the LSDA for the parent function. @@ -287,14 +281,14 @@ void WinException::endFuncletImpl() { } else if (Per == EHPersonality::MSVC_TableSEH && MF->hasEHFunclets() && !CurrentFuncletEntry->isEHFuncletEntry()) { // Emit an UNWIND_INFO struct describing the prologue. - Asm->OutStreamer->EmitWinEHHandlerData(); + Asm->OutStreamer->emitWinEHHandlerData(); // If this is the parent function in Win64 SEH, emit the LSDA immediately // following .seh_handlerdata. emitCSpecificHandlerTable(MF); } else if (shouldEmitPersonality || shouldEmitLSDA) { // Emit an UNWIND_INFO struct describing the prologue. - Asm->OutStreamer->EmitWinEHHandlerData(); + Asm->OutStreamer->emitWinEHHandlerData(); // In these cases, no further info is written to the .xdata section // right here, but is written by e.g. emitExceptionTable in endFunction() // above. @@ -307,8 +301,8 @@ void WinException::endFuncletImpl() { // Switch back to the funclet start .text section now that we are done // writing to .xdata, and emit an .seh_endproc directive to mark the end of // the function. - Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection); - Asm->OutStreamer->EmitWinCFIEndProc(); + Asm->OutStreamer->switchSection(CurrentFuncletTextSection); + Asm->OutStreamer->emitWinCFIEndProc(); } // Let's make sure we don't try to end the same funclet twice. @@ -699,7 +693,12 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { } int UnwindHelpOffset = 0; - if (Asm->MAI->usesWindowsCFI()) + // TODO: The check for UnwindHelpFrameIdx against max() below (and the + // second check further below) can be removed if MS C++ unwinding is + // implemented for ARM, when test/CodeGen/ARM/Windows/wineh-basic.ll + // passes without the check. + if (Asm->MAI->usesWindowsCFI() && + FuncInfo.UnwindHelpFrameIdx != std::numeric_limits::max()) UnwindHelpOffset = getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo); @@ -761,7 +760,8 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { AddComment("IPToStateXData"); OS.emitValue(create32bitRef(IPToStateXData), 4); - if (Asm->MAI->usesWindowsCFI()) { + if (Asm->MAI->usesWindowsCFI() && + FuncInfo.UnwindHelpFrameIdx != std::numeric_limits::max()) { AddComment("UnwindHelp"); OS.emitInt32(UnwindHelpOffset); } diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 4838f6da750d..5ce6fbb5f647 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -15,7 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/AtomicExpandUtils.h" @@ -47,6 +47,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/LowerAtomic.h" #include #include #include @@ -57,71 +58,72 @@ using namespace llvm; namespace { - class AtomicExpand: public FunctionPass { - const TargetLowering *TLI = nullptr; +class AtomicExpand : public FunctionPass { + const TargetLowering *TLI = nullptr; - public: - static char ID; // Pass identification, replacement for typeid +public: + static char ID; // Pass identification, replacement for typeid - AtomicExpand() : FunctionPass(ID) { - initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); - } + AtomicExpand() : FunctionPass(ID) { + initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); + } - bool runOnFunction(Function &F) override; - - private: - bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); - IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); - LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); - bool tryExpandAtomicLoad(LoadInst *LI); - bool expandAtomicLoadToLL(LoadInst *LI); - bool expandAtomicLoadToCmpXchg(LoadInst *LI); - StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); - bool expandAtomicStore(StoreInst *SI); - bool tryExpandAtomicRMW(AtomicRMWInst *AI); - AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); - Value * - insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, - Align AddrAlign, AtomicOrdering MemOpOrder, - function_ref &, Value *)> PerformOp); - void expandAtomicOpToLLSC( - Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign, - AtomicOrdering MemOpOrder, - function_ref &, Value *)> PerformOp); - void expandPartwordAtomicRMW( - AtomicRMWInst *I, - TargetLoweringBase::AtomicExpansionKind ExpansionKind); - AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); - bool expandPartwordCmpXchg(AtomicCmpXchgInst *I); - void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); - void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); - - AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); - static Value *insertRMWCmpXchgLoop( - IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign, - AtomicOrdering MemOpOrder, SyncScope::ID SSID, - function_ref &, Value *)> PerformOp, - CreateCmpXchgInstFun CreateCmpXchg); - bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); - - bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); - bool isIdempotentRMW(AtomicRMWInst *RMWI); - bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); - - bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment, - Value *PointerOperand, Value *ValueOperand, - Value *CASExpected, AtomicOrdering Ordering, - AtomicOrdering Ordering2, - ArrayRef Libcalls); - void expandAtomicLoadToLibcall(LoadInst *LI); - void expandAtomicStoreToLibcall(StoreInst *LI); - void expandAtomicRMWToLibcall(AtomicRMWInst *I); - void expandAtomicCASToLibcall(AtomicCmpXchgInst *I); - - friend bool - llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, - CreateCmpXchgInstFun CreateCmpXchg); - }; + bool runOnFunction(Function &F) override; + +private: + bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); + IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); + LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); + bool tryExpandAtomicLoad(LoadInst *LI); + bool expandAtomicLoadToLL(LoadInst *LI); + bool expandAtomicLoadToCmpXchg(LoadInst *LI); + StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); + bool tryExpandAtomicStore(StoreInst *SI); + void expandAtomicStore(StoreInst *SI); + bool tryExpandAtomicRMW(AtomicRMWInst *AI); + AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); + Value * + insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, + Align AddrAlign, AtomicOrdering MemOpOrder, + function_ref &, Value *)> PerformOp); + void + expandAtomicOpToLLSC(Instruction *I, Type *ResultTy, Value *Addr, + Align AddrAlign, AtomicOrdering MemOpOrder, + function_ref &, Value *)> PerformOp); + void expandPartwordAtomicRMW( + AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind); + AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); + bool expandPartwordCmpXchg(AtomicCmpXchgInst *I); + void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); + void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); + + AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); + static Value * + insertRMWCmpXchgLoop(IRBuilder<> &Builder, Type *ResultType, Value *Addr, + Align AddrAlign, AtomicOrdering MemOpOrder, + SyncScope::ID SSID, + function_ref &, Value *)> PerformOp, + CreateCmpXchgInstFun CreateCmpXchg); + bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); + + bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); + bool isIdempotentRMW(AtomicRMWInst *RMWI); + bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); + + bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment, + Value *PointerOperand, Value *ValueOperand, + Value *CASExpected, AtomicOrdering Ordering, + AtomicOrdering Ordering2, + ArrayRef Libcalls); + void expandAtomicLoadToLibcall(LoadInst *LI); + void expandAtomicStoreToLibcall(StoreInst *LI); + void expandAtomicRMWToLibcall(AtomicRMWInst *I); + void expandAtomicCASToLibcall(AtomicCmpXchgInst *I); + + friend bool + llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, + CreateCmpXchgInstFun CreateCmpXchg); +}; } // end anonymous namespace @@ -129,8 +131,8 @@ char AtomicExpand::ID = 0; char &llvm::AtomicExpandID = AtomicExpand::ID; -INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", - false, false) +INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, + false) FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } @@ -252,7 +254,8 @@ bool AtomicExpand::runOnFunction(Function &F) { } if (LI) { - if (LI->getType()->isFloatingPointTy()) { + if (TLI->shouldCastAtomicLoadInIR(LI) == + TargetLoweringBase::AtomicExpansionKind::CastToInteger) { // TODO: add a TLI hook to control this so that each target can // convert to lowering the original type one at a time. LI = convertAtomicLoadToIntegerType(LI); @@ -262,7 +265,8 @@ bool AtomicExpand::runOnFunction(Function &F) { MadeChange |= tryExpandAtomicLoad(LI); } else if (SI) { - if (SI->getValueOperand()->getType()->isFloatingPointTy()) { + if (TLI->shouldCastAtomicStoreInIR(SI) == + TargetLoweringBase::AtomicExpansionKind::CastToInteger) { // TODO: add a TLI hook to control this so that each target can // convert to lowering the original type one at a time. SI = convertAtomicStoreToIntegerType(SI); @@ -271,8 +275,8 @@ bool AtomicExpand::runOnFunction(Function &F) { MadeChange = true; } - if (TLI->shouldExpandAtomicStoreInIR(SI)) - MadeChange |= expandAtomicStore(SI); + if (tryExpandAtomicStore(SI)) + MadeChange = true; } else if (RMWI) { // There are two different ways of expanding RMW instructions: // - into a load if it is idempotent @@ -283,8 +287,8 @@ bool AtomicExpand::runOnFunction(Function &F) { MadeChange = true; } else { AtomicRMWInst::BinOp Op = RMWI->getOperation(); - if (Op == AtomicRMWInst::Xchg && - RMWI->getValOperand()->getType()->isFloatingPointTy()) { + if (TLI->shouldCastAtomicRMWIInIR(RMWI) == + TargetLoweringBase::AtomicExpansionKind::CastToInteger) { // TODO: add a TLI hook to control this so that each target can // convert to lowering the original type one at a time. RMWI = convertAtomicXchgToIntegerType(RMWI); @@ -308,7 +312,7 @@ bool AtomicExpand::runOnFunction(Function &F) { // extend convertCmpXchgToInteger for floating point too. assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() && "unimplemented - floating point not legal at IR level"); - if (CASI->getCompareOperand()->getType()->isPointerTy() ) { + if (CASI->getCompareOperand()->getType()->isPointerTy()) { // TODO: add a TLI hook to control this so that each target can // convert to lowering the original type one at a time. CASI = convertCmpXchgToIntegerType(CASI); @@ -351,14 +355,12 @@ IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, /// convertAtomicStoreToIntegerType for background. LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { auto *M = LI->getModule(); - Type *NewTy = getCorrespondingIntegerType(LI->getType(), - M->getDataLayout()); + Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout()); IRBuilder<> Builder(LI); Value *Addr = LI->getPointerOperand(); - Type *PT = PointerType::get(NewTy, - Addr->getType()->getPointerAddressSpace()); + Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); auto *NewLI = Builder.CreateLoad(NewTy, NewAddr); @@ -385,7 +387,9 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { Value *Val = RMWI->getValOperand(); Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); - Value *NewVal = Builder.CreateBitCast(Val, NewTy); + Value *NewVal = Val->getType()->isPointerTy() + ? Builder.CreatePtrToInt(Val, NewTy) + : Builder.CreateBitCast(Val, NewTy); auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal, @@ -393,7 +397,9 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { NewRMWI->setVolatile(RMWI->isVolatile()); LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n"); - Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType()); + Value *NewRVal = RMWI->getType()->isPointerTy() + ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType()) + : Builder.CreateBitCast(NewRMWI, RMWI->getType()); RMWI->replaceAllUsesWith(NewRVal); RMWI->eraseFromParent(); return NewRMWI; @@ -413,11 +419,29 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { return expandAtomicLoadToLL(LI); case TargetLoweringBase::AtomicExpansionKind::CmpXChg: return expandAtomicLoadToCmpXchg(LI); + case TargetLoweringBase::AtomicExpansionKind::NotAtomic: + LI->setAtomic(AtomicOrdering::NotAtomic); + return true; default: llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); } } +bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) { + switch (TLI->shouldExpandAtomicStoreInIR(SI)) { + case TargetLoweringBase::AtomicExpansionKind::None: + return false; + case TargetLoweringBase::AtomicExpansionKind::Expand: + expandAtomicStore(SI); + return true; + case TargetLoweringBase::AtomicExpansionKind::NotAtomic: + SI->setAtomic(AtomicOrdering::NotAtomic); + return true; + default: + llvm_unreachable("Unhandled case in tryExpandAtomicStore"); + } +} + bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { IRBuilder<> Builder(LI); @@ -471,8 +495,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); Value *Addr = SI->getPointerOperand(); - Type *PT = PointerType::get(NewTy, - Addr->getType()->getPointerAddressSpace()); + Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); @@ -484,7 +507,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { return NewSI; } -bool AtomicExpand::expandAtomicStore(StoreInst *SI) { +void AtomicExpand::expandAtomicStore(StoreInst *SI) { // This function is only called on atomic stores that are too large to be // atomic if implemented as a native store. So we replace them by an // atomic swap, that can be implemented for example as a ldrex/strex on ARM @@ -498,7 +521,7 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { SI->eraseFromParent(); // Now we have an appropriate swap instruction, lower it as usual. - return tryExpandAtomicRMW(AI); + tryExpandAtomicRMW(AI); } static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, @@ -508,6 +531,7 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Type *OrigTy = NewVal->getType(); // This code can go away when cmpxchg supports FP types. + assert(!OrigTy->isPointerTy()); bool NeedBitcast = OrigTy->isFloatingPointTy(); if (NeedBitcast) { IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); @@ -527,47 +551,6 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy); } -/// Emit IR to implement the given atomicrmw operation on values in registers, -/// returning the new value. -static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, - Value *Loaded, Value *Inc) { - Value *NewVal; - switch (Op) { - case AtomicRMWInst::Xchg: - return Inc; - case AtomicRMWInst::Add: - return Builder.CreateAdd(Loaded, Inc, "new"); - case AtomicRMWInst::Sub: - return Builder.CreateSub(Loaded, Inc, "new"); - case AtomicRMWInst::And: - return Builder.CreateAnd(Loaded, Inc, "new"); - case AtomicRMWInst::Nand: - return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); - case AtomicRMWInst::Or: - return Builder.CreateOr(Loaded, Inc, "new"); - case AtomicRMWInst::Xor: - return Builder.CreateXor(Loaded, Inc, "new"); - case AtomicRMWInst::Max: - NewVal = Builder.CreateICmpSGT(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); - case AtomicRMWInst::Min: - NewVal = Builder.CreateICmpSLE(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); - case AtomicRMWInst::UMax: - NewVal = Builder.CreateICmpUGT(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); - case AtomicRMWInst::UMin: - NewVal = Builder.CreateICmpULE(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); - case AtomicRMWInst::FAdd: - return Builder.CreateFAdd(Loaded, Inc, "new"); - case AtomicRMWInst::FSub: - return Builder.CreateFSub(Loaded, Inc, "new"); - default: - llvm_unreachable("Unknown atomic op"); - } -} - bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { LLVMContext &Ctx = AI->getModule()->getContext(); TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI); @@ -582,8 +565,8 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { TargetLoweringBase::AtomicExpansionKind::LLSC); } else { auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) { - return performAtomicOp(AI->getOperation(), Builder, Loaded, - AI->getValOperand()); + return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, + AI->getValOperand()); }; expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(), AI->getAlign(), AI->getOrdering(), PerformOp); @@ -621,6 +604,12 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { expandAtomicRMWToMaskedIntrinsic(AI); return true; } + case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: { + TLI->emitBitTestAtomicRMWIntrinsic(AI); + return true; + } + case TargetLoweringBase::AtomicExpansionKind::NotAtomic: + return lowerAtomicRMWInst(AI); default: llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); } @@ -703,7 +692,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, PMV.AlignedAddr = Addr; PMV.AlignedAddrAlignment = AddrAlign; PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0); - PMV.Mask = ConstantInt::get(PMV.ValueType, ~0); + PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true); return PMV; } @@ -787,7 +776,7 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, case AtomicRMWInst::Sub: case AtomicRMWInst::Nand: { // The other arithmetic ops need to be masked into place. - Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc); + Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc); Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask); Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked); @@ -801,7 +790,7 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, // truncate down to the original size, and expand out again after // doing the operation. Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); - Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc); + Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc); Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); return FinalVal; } @@ -840,9 +829,8 @@ void AtomicExpand::expandPartwordAtomicRMW( Value *OldResult; if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, - PMV.AlignedAddrAlignment, MemOpOrder, - SSID, PerformPartwordOp, - createCmpXchgInstFun); + PMV.AlignedAddrAlignment, MemOpOrder, SSID, + PerformPartwordOp, createCmpXchgInstFun); } else { assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, @@ -1106,7 +1094,7 @@ Value *AtomicExpand::insertRMWLLSCLoop( // [...] BasicBlock *ExitBB = BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); - BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place). @@ -1135,7 +1123,8 @@ Value *AtomicExpand::insertRMWLLSCLoop( /// IR. As a migration step, we convert back to what use to be the standard /// way to represent a pointer cmpxchg so that we can update backends one by /// one. -AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { +AtomicCmpXchgInst * +AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { auto *M = CI->getModule(); Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), M->getDataLayout()); @@ -1143,8 +1132,7 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst * IRBuilder<> Builder(CI); Value *Addr = CI->getPointerOperand(); - Type *PT = PointerType::get(NewTy, - Addr->getType()->getPointerAddressSpace()); + Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); @@ -1305,9 +1293,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); Value *NewValueInsert = insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); - Value *StoreSuccess = - TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr, - MemOpOrder); + Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert, + PMV.AlignedAddr, MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; @@ -1418,27 +1405,27 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { return true; } -bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { +bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) { auto C = dyn_cast(RMWI->getValOperand()); - if(!C) + if (!C) return false; AtomicRMWInst::BinOp Op = RMWI->getOperation(); - switch(Op) { - case AtomicRMWInst::Add: - case AtomicRMWInst::Sub: - case AtomicRMWInst::Or: - case AtomicRMWInst::Xor: - return C->isZero(); - case AtomicRMWInst::And: - return C->isMinusOne(); - // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... - default: - return false; + switch (Op) { + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + case AtomicRMWInst::Or: + case AtomicRMWInst::Xor: + return C->isZero(); + case AtomicRMWInst::And: + return C->isMinusOne(); + // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... + default: + return false; } } -bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { +bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) { if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { tryExpandAtomicLoad(ResultingLoad); return true; @@ -1524,6 +1511,8 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: expandAtomicCmpXchgToMaskedIntrinsic(CI); return true; + case TargetLoweringBase::AtomicExpansionKind::NotAtomic: + return lowerAtomicCmpXchgInst(CI); } } @@ -1535,8 +1524,8 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(), AI->getOrdering(), AI->getSyncScopeID(), [&](IRBuilder<> &Builder, Value *Loaded) { - return performAtomicOp(AI->getOperation(), Builder, Loaded, - AI->getValOperand()); + return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, + AI->getValOperand()); }, CreateCmpXchg); @@ -1738,11 +1727,21 @@ bool AtomicExpand::expandAtomicOpToLibcall( RTLIB::Libcall RTLibType; if (UseSizedLibcall) { switch (Size) { - case 1: RTLibType = Libcalls[1]; break; - case 2: RTLibType = Libcalls[2]; break; - case 4: RTLibType = Libcalls[3]; break; - case 8: RTLibType = Libcalls[4]; break; - case 16: RTLibType = Libcalls[5]; break; + case 1: + RTLibType = Libcalls[1]; + break; + case 2: + RTLibType = Libcalls[2]; + break; + case 4: + RTLibType = Libcalls[3]; + break; + case 8: + RTLibType = Libcalls[4]; + break; + case 16: + RTLibType = Libcalls[5]; + break; } } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) { RTLibType = Libcalls[0]; @@ -1806,8 +1805,8 @@ bool AtomicExpand::expandAtomicOpToLibcall( // that property, we'd need to extend this mechanism to support AS-specific // families of atomic intrinsics. auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace(); - Value *PtrVal = Builder.CreateBitCast(PointerOperand, - Type::getInt8PtrTy(Ctx, PtrTypeAS)); + Value *PtrVal = + Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx, PtrTypeAS)); PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx)); Args.push_back(PtrVal); @@ -1815,11 +1814,10 @@ bool AtomicExpand::expandAtomicOpToLibcall( if (CASExpected) { AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); AllocaCASExpected->setAlignment(AllocaAlignment); - unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); + unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); - AllocaCASExpected_i8 = - Builder.CreateBitCast(AllocaCASExpected, - Type::getInt8PtrTy(Ctx, AllocaAS)); + AllocaCASExpected_i8 = Builder.CreateBitCast( + AllocaCASExpected, Type::getInt8PtrTy(Ctx, AllocaAS)); Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64); Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment); Args.push_back(AllocaCASExpected_i8); @@ -1846,9 +1844,9 @@ bool AtomicExpand::expandAtomicOpToLibcall( if (!CASExpected && HasResult && !UseSizedLibcall) { AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); AllocaResult->setAlignment(AllocaAlignment); - unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); + unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); AllocaResult_i8 = - Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); + Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64); Args.push_back(AllocaResult_i8); } diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index c1901bc46d72..f05f5b9f9947 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -60,7 +60,7 @@ // Basic Block Labels // ================== // -// With -fbasic-block-sections=labels, we emit the offsets of BB addresses of +// With -fbasic-block-sections=labels, we encode the offsets of BB addresses of // every function into the .llvm_bb_addr_map section. Along with the function // symbols, this allows for mapping of virtual addresses in PMU profiles back to // the corresponding basic blocks. This logic is implemented in AsmPrinter. This @@ -69,26 +69,17 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/LineIterator.h" -#include "llvm/Support/MemoryBuffer.h" #include "llvm/Target/TargetMachine.h" -using llvm::SmallSet; -using llvm::SmallVector; -using llvm::StringMap; -using llvm::StringRef; using namespace llvm; // Placing the cold clusters in a separate section mitigates against poor @@ -108,41 +99,11 @@ cl::opt BBSectionsDetectSourceDrift( namespace { -// This struct represents the cluster information for a machine basic block. -struct BBClusterInfo { - // MachineBasicBlock ID. - unsigned MBBNumber; - // Cluster ID this basic block belongs to. - unsigned ClusterID; - // Position of basic block within the cluster. - unsigned PositionInCluster; -}; - -using ProgramBBClusterInfoMapTy = StringMap>; - class BasicBlockSections : public MachineFunctionPass { public: static char ID; - // This contains the basic-block-sections profile. - const MemoryBuffer *MBuf = nullptr; - - // This encapsulates the BB cluster information for the whole program. - // - // For every function name, it contains the cluster information for (all or - // some of) its basic blocks. The cluster information for every basic block - // includes its cluster ID along with the position of the basic block in that - // cluster. - ProgramBBClusterInfoMapTy ProgramBBClusterInfo; - - // Some functions have alias names. We use this map to find the main alias - // name for which we have mapping in ProgramBBClusterInfo. - StringMap FuncAliasMap; - - BasicBlockSections(const MemoryBuffer *Buf) - : MachineFunctionPass(ID), MBuf(Buf) { - initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); - }; + BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; BasicBlockSections() : MachineFunctionPass(ID) { initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); @@ -154,9 +115,6 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; - /// Read profiles of basic blocks if available here. - bool doInitialization(Module &M) override; - /// Identify basic blocks that need separate sections and prepare to emit them /// accordingly. bool runOnMachineFunction(MachineFunction &MF) override; @@ -206,21 +164,18 @@ static void updateBranches( // This function provides the BBCluster information associated with a function. // Returns true if a valid association exists and false otherwise. -static bool getBBClusterInfoForFunction( - const MachineFunction &MF, const StringMap FuncAliasMap, - const ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, +bool getBBClusterInfoForFunction( + const MachineFunction &MF, + BasicBlockSectionsProfileReader *BBSectionsProfileReader, std::vector> &V) { - // Get the main alias name for the function. - auto FuncName = MF.getName(); - auto R = FuncAliasMap.find(FuncName); - StringRef AliasName = R == FuncAliasMap.end() ? FuncName : R->second; // Find the assoicated cluster information. - auto P = ProgramBBClusterInfo.find(AliasName); - if (P == ProgramBBClusterInfo.end()) + std::pair> P = + BBSectionsProfileReader->getBBClusterInfoForFunction(MF.getName()); + if (!P.first) return false; - if (P->second.empty()) { + if (P.second.empty()) { // This indicates that sections are desired for all basic blocks of this // function. We clear the BBClusterInfo vector to denote this. V.clear(); @@ -228,7 +183,7 @@ static bool getBBClusterInfoForFunction( } V.resize(MF.getNumBlockIDs()); - for (auto bbClusterInfo : P->second) { + for (auto bbClusterInfo : P.second) { // Bail out if the cluster information contains invalid MBB numbers. if (bbClusterInfo.MBBNumber >= MF.getNumBlockIDs()) return false; @@ -266,7 +221,7 @@ assignSections(MachineFunction &MF, // set every basic block's section ID equal to its number (basic block // id). This further ensures that basic blocks are ordered canonically. MBB.setSectionID({static_cast(MBB.getNumber())}); - } else if (FuncBBClusterInfo[MBB.getNumber()].hasValue()) + } else if (FuncBBClusterInfo[MBB.getNumber()]) MBB.setSectionID(FuncBBClusterInfo[MBB.getNumber()]->ClusterID); else { // BB goes into the special cold section if it is not specified in the @@ -279,9 +234,8 @@ assignSections(MachineFunction &MF, // If we already have one cluster containing eh_pads, this must be updated // to ExceptionSectionID. Otherwise, we set it equal to the current // section ID. - EHPadsSectionID = EHPadsSectionID.hasValue() - ? MBBSectionID::ExceptionSectionID - : MBB.getSectionID(); + EHPadsSectionID = EHPadsSectionID ? MBBSectionID::ExceptionSectionID + : MBB.getSectionID(); } } @@ -290,7 +244,7 @@ assignSections(MachineFunction &MF, if (EHPadsSectionID == MBBSectionID::ExceptionSectionID) for (auto &MBB : MF) if (MBB.isEHPad()) - MBB.setSectionID(EHPadsSectionID.getValue()); + MBB.setSectionID(*EHPadsSectionID); } void llvm::sortBasicBlocksAndUpdateBranches( @@ -377,9 +331,11 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { return true; } + BBSectionsProfileReader = &getAnalysis(); + std::vector> FuncBBClusterInfo; if (BBSectionsType == BasicBlockSection::List && - !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo, + !getBBClusterInfoForFunction(MF, BBSectionsProfileReader, FuncBBClusterInfo)) return true; MF.setBBSectionsType(BBSectionsType); @@ -427,107 +383,12 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { return true; } -// Basic Block Sections can be enabled for a subset of machine basic blocks. -// This is done by passing a file containing names of functions for which basic -// block sections are desired. Additionally, machine basic block ids of the -// functions can also be specified for a finer granularity. Moreover, a cluster -// of basic blocks could be assigned to the same section. -// A file with basic block sections for all of function main and three blocks -// for function foo (of which 1 and 2 are placed in a cluster) looks like this: -// ---------------------------- -// list.txt: -// !main -// !foo -// !!1 2 -// !!4 -static Error getBBClusterInfo(const MemoryBuffer *MBuf, - ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, - StringMap &FuncAliasMap) { - assert(MBuf); - line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); - - auto invalidProfileError = [&](auto Message) { - return make_error( - Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " + - Twine(LineIt.line_number()) + ": " + Message), - inconvertibleErrorCode()); - }; - - auto FI = ProgramBBClusterInfo.end(); - - // Current cluster ID corresponding to this function. - unsigned CurrentCluster = 0; - // Current position in the current cluster. - unsigned CurrentPosition = 0; - - // Temporary set to ensure every basic block ID appears once in the clusters - // of a function. - SmallSet FuncBBIDs; - - for (; !LineIt.is_at_eof(); ++LineIt) { - StringRef S(*LineIt); - if (S[0] == '@') - continue; - // Check for the leading "!" - if (!S.consume_front("!") || S.empty()) - break; - // Check for second "!" which indicates a cluster of basic blocks. - if (S.consume_front("!")) { - if (FI == ProgramBBClusterInfo.end()) - return invalidProfileError( - "Cluster list does not follow a function name specifier."); - SmallVector BBIndexes; - S.split(BBIndexes, ' '); - // Reset current cluster position. - CurrentPosition = 0; - for (auto BBIndexStr : BBIndexes) { - unsigned long long BBIndex; - if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex)) - return invalidProfileError(Twine("Unsigned integer expected: '") + - BBIndexStr + "'."); - if (!FuncBBIDs.insert(BBIndex).second) - return invalidProfileError(Twine("Duplicate basic block id found '") + - BBIndexStr + "'."); - if (!BBIndex && CurrentPosition) - return invalidProfileError("Entry BB (0) does not begin a cluster."); - - FI->second.emplace_back(BBClusterInfo{ - ((unsigned)BBIndex), CurrentCluster, CurrentPosition++}); - } - CurrentCluster++; - } else { // This is a function name specifier. - // Function aliases are separated using '/'. We use the first function - // name for the cluster info mapping and delegate all other aliases to - // this one. - SmallVector Aliases; - S.split(Aliases, '/'); - for (size_t i = 1; i < Aliases.size(); ++i) - FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); - - // Prepare for parsing clusters of this function name. - // Start a new cluster map for this function name. - FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first; - CurrentCluster = 0; - FuncBBIDs.clear(); - } - } - return Error::success(); -} - -bool BasicBlockSections::doInitialization(Module &M) { - if (!MBuf) - return false; - if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap)) - report_fatal_error(std::move(Err)); - return false; -} - void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } -MachineFunctionPass * -llvm::createBasicBlockSectionsPass(const MemoryBuffer *Buf) { - return new BasicBlockSections(Buf); +MachineFunctionPass *llvm::createBasicBlockSectionsPass() { + return new BasicBlockSections(); } diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp new file mode 100644 index 000000000000..c2acf115998b --- /dev/null +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -0,0 +1,144 @@ +//===-- BasicBlockSectionsProfileReader.cpp -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the basic block sections profile reader pass. It parses +// and stores the basic block sections profile file (which is specified via the +// `-basic-block-sections` flag). +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; + +char BasicBlockSectionsProfileReader::ID = 0; +INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader", + "Reads and parses a basic block sections profile.", false, + false) + +bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { + return getBBClusterInfoForFunction(FuncName).first; +} + +std::pair> +BasicBlockSectionsProfileReader::getBBClusterInfoForFunction( + StringRef FuncName) const { + std::pair> cluster_info(false, {}); + auto R = ProgramBBClusterInfo.find(getAliasName(FuncName)); + if (R != ProgramBBClusterInfo.end()) { + cluster_info.second = R->second; + cluster_info.first = true; + } + return cluster_info; +} + +// Basic Block Sections can be enabled for a subset of machine basic blocks. +// This is done by passing a file containing names of functions for which basic +// block sections are desired. Additionally, machine basic block ids of the +// functions can also be specified for a finer granularity. Moreover, a cluster +// of basic blocks could be assigned to the same section. +// A file with basic block sections for all of function main and three blocks +// for function foo (of which 1 and 2 are placed in a cluster) looks like this: +// ---------------------------- +// list.txt: +// !main +// !foo +// !!1 2 +// !!4 +static Error getBBClusterInfo(const MemoryBuffer *MBuf, + ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, + StringMap &FuncAliasMap) { + assert(MBuf); + line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); + + auto invalidProfileError = [&](auto Message) { + return make_error( + Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " + + Twine(LineIt.line_number()) + ": " + Message), + inconvertibleErrorCode()); + }; + + auto FI = ProgramBBClusterInfo.end(); + + // Current cluster ID corresponding to this function. + unsigned CurrentCluster = 0; + // Current position in the current cluster. + unsigned CurrentPosition = 0; + + // Temporary set to ensure every basic block ID appears once in the clusters + // of a function. + SmallSet FuncBBIDs; + + for (; !LineIt.is_at_eof(); ++LineIt) { + StringRef S(*LineIt); + if (S[0] == '@') + continue; + // Check for the leading "!" + if (!S.consume_front("!") || S.empty()) + break; + // Check for second "!" which indicates a cluster of basic blocks. + if (S.consume_front("!")) { + if (FI == ProgramBBClusterInfo.end()) + return invalidProfileError( + "Cluster list does not follow a function name specifier."); + SmallVector BBIndexes; + S.split(BBIndexes, ' '); + // Reset current cluster position. + CurrentPosition = 0; + for (auto BBIndexStr : BBIndexes) { + unsigned long long BBIndex; + if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex)) + return invalidProfileError(Twine("Unsigned integer expected: '") + + BBIndexStr + "'."); + if (!FuncBBIDs.insert(BBIndex).second) + return invalidProfileError(Twine("Duplicate basic block id found '") + + BBIndexStr + "'."); + if (!BBIndex && CurrentPosition) + return invalidProfileError("Entry BB (0) does not begin a cluster."); + + FI->second.emplace_back(BBClusterInfo{ + ((unsigned)BBIndex), CurrentCluster, CurrentPosition++}); + } + CurrentCluster++; + } else { // This is a function name specifier. + // Function aliases are separated using '/'. We use the first function + // name for the cluster info mapping and delegate all other aliases to + // this one. + SmallVector Aliases; + S.split(Aliases, '/'); + for (size_t i = 1; i < Aliases.size(); ++i) + FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); + + // Prepare for parsing clusters of this function name. + // Start a new cluster map for this function name. + FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first; + CurrentCluster = 0; + FuncBBIDs.clear(); + } + } + return Error::success(); +} + +void BasicBlockSectionsProfileReader::initializePass() { + if (!MBuf) + return; + if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap)) + report_fatal_error(std::move(Err)); +} + +ImmutablePass * +llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) { + return new BasicBlockSectionsProfileReader(Buf); +} diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 0ff67f7ca00a..07be03d2dab9 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -32,11 +33,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSizeOpts.h" -#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -105,6 +104,11 @@ namespace { AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoPHIs); + } }; } // end anonymous namespace diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h index 95d5dcfbbd0f..d0b6ed5ebe05 100644 --- a/llvm/lib/CodeGen/BranchFolding.h +++ b/llvm/lib/CodeGen/BranchFolding.h @@ -14,7 +14,6 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/Support/Compiler.h" -#include #include namespace llvm { diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp index eda0f37fdeb7..29508f8f35a6 100644 --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -24,7 +24,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp index 558700bd9b3b..57170c58db14 100644 --- a/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -19,11 +19,13 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegister.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Debug.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp new file mode 100644 index 000000000000..837dbd77d073 --- /dev/null +++ b/llvm/lib/CodeGen/CFIFixup.cpp @@ -0,0 +1,225 @@ +//===------ CFIFixup.cpp - Insert CFI remember/restore instructions -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// + +// This pass inserts the necessary instructions to adjust for the inconsistency +// of the call-frame information caused by final machine basic block layout. +// The pass relies in constraints LLVM imposes on the placement of +// save/restore points (cf. ShrinkWrap): +// * there is a single basic block, containing the function prologue +// * possibly multiple epilogue blocks, where each epilogue block is +// complete and self-contained, i.e. CSR restore instructions (and the +// corresponding CFI instructions are not split across two or more blocks. +// * prologue and epilogue blocks are outside of any loops +// Thus, during execution, at the beginning and at the end of each basic block +// the function can be in one of two states: +// - "has a call frame", if the function has executed the prologue, and +// has not executed any epilogue +// - "does not have a call frame", if the function has not executed the +// prologue, or has executed an epilogue +// which can be computed by a single RPO traversal. + +// In order to accommodate backends which do not generate unwind info in +// epilogues we compute an additional property "strong no call frame on entry", +// which is set for the entry point of the function and for every block +// reachable from the entry along a path that does not execute the prologue. If +// this property holds, it takes precedence over the "has a call frame" +// property. + +// From the point of view of the unwind tables, the "has/does not have call +// frame" state at beginning of each block is determined by the state at the end +// of the previous block, in layout order. Where these states differ, we insert +// compensating CFI instructions, which come in two flavours: + +// - CFI instructions, which reset the unwind table state to the initial one. +// This is done by a target specific hook and is expected to be trivial +// to implement, for example it could be: +// .cfi_def_cfa , 0 +// .cfi_same_value +// .cfi_same_value +// ... +// where are the callee-saved registers. +// - CFI instructions, which reset the unwind table state to the one +// created by the function prologue. These are +// .cfi_restore_state +// .cfi_remember_state +// In this case we also insert a `.cfi_remember_state` after the last CFI +// instruction in the function prologue. +// +// Known limitations: +// * the pass cannot handle an epilogue preceding the prologue in the basic +// block layout +// * the pass does not handle functions where SP is used as a frame pointer and +// SP adjustments up and down are done in different basic blocks (TODO) +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CFIFixup.h" + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define DEBUG_TYPE "cfi-fixup" + +char CFIFixup::ID = 0; + +INITIALIZE_PASS(CFIFixup, "cfi-fixup", + "Insert CFI remember/restore state instructions", false, false) +FunctionPass *llvm::createCFIFixup() { return new CFIFixup(); } + +static bool isPrologueCFIInstruction(const MachineInstr &MI) { + return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION && + MI.getFlag(MachineInstr::FrameSetup); +} + +static bool containsPrologue(const MachineBasicBlock &MBB) { + return llvm::any_of(MBB.instrs(), isPrologueCFIInstruction); +} + +static bool containsEpilogue(const MachineBasicBlock &MBB) { + return llvm::any_of(llvm::reverse(MBB), [](const auto &MI) { + return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION && + MI.getFlag(MachineInstr::FrameDestroy); + }); +} + +bool CFIFixup::runOnMachineFunction(MachineFunction &MF) { + const TargetFrameLowering &TFL = *MF.getSubtarget().getFrameLowering(); + if (!TFL.enableCFIFixup(MF)) + return false; + + const unsigned NumBlocks = MF.getNumBlockIDs(); + if (NumBlocks < 2) + return false; + + struct BlockFlags { + bool Reachable : 1; + bool StrongNoFrameOnEntry : 1; + bool HasFrameOnEntry : 1; + bool HasFrameOnExit : 1; + }; + SmallVector BlockInfo(NumBlocks, {false, false, false, false}); + BlockInfo[0].Reachable = true; + BlockInfo[0].StrongNoFrameOnEntry = true; + + // Compute the presence/absence of frame at each basic block. + MachineBasicBlock *PrologueBlock = nullptr; + ReversePostOrderTraversal RPOT(&*MF.begin()); + for (MachineBasicBlock *MBB : RPOT) { + BlockFlags &Info = BlockInfo[MBB->getNumber()]; + + // Set to true if the current block contains the prologue or the epilogue, + // respectively. + bool HasPrologue = false; + bool HasEpilogue = false; + + if (!PrologueBlock && !Info.HasFrameOnEntry && containsPrologue(*MBB)) { + PrologueBlock = MBB; + HasPrologue = true; + } + + if (Info.HasFrameOnEntry || HasPrologue) + HasEpilogue = containsEpilogue(*MBB); + + // If the function has a call frame at the entry of the current block or the + // current block contains the prologue, then the function has a call frame + // at the exit of the block, unless the block contains the epilogue. + Info.HasFrameOnExit = (Info.HasFrameOnEntry || HasPrologue) && !HasEpilogue; + + // Set the successors' state on entry. + for (MachineBasicBlock *Succ : MBB->successors()) { + BlockFlags &SuccInfo = BlockInfo[Succ->getNumber()]; + SuccInfo.Reachable = true; + SuccInfo.StrongNoFrameOnEntry |= + Info.StrongNoFrameOnEntry && !HasPrologue; + SuccInfo.HasFrameOnEntry = Info.HasFrameOnExit; + } + } + + if (!PrologueBlock) + return false; + + // Walk the blocks of the function in "physical" order. + // Every block inherits the frame state (as recorded in the unwind tables) + // of the previous block. If the intended frame state is different, insert + // compensating CFI instructions. + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + bool Change = false; + // `InsertPt` always points to the point in a preceding block where we have to + // insert a `.cfi_remember_state`, in the case that the current block needs a + // `.cfi_restore_state`. + MachineBasicBlock *InsertMBB = PrologueBlock; + MachineBasicBlock::iterator InsertPt = PrologueBlock->begin(); + for (MachineInstr &MI : *PrologueBlock) + if (isPrologueCFIInstruction(MI)) + InsertPt = std::next(MI.getIterator()); + + assert(InsertPt != PrologueBlock->begin() && + "Inconsistent notion of \"prologue block\""); + + // No point starting before the prologue block. + // TODO: the unwind tables will still be incorrect if an epilogue physically + // preceeds the prologue. + MachineFunction::iterator CurrBB = std::next(PrologueBlock->getIterator()); + bool HasFrame = BlockInfo[PrologueBlock->getNumber()].HasFrameOnExit; + while (CurrBB != MF.end()) { + const BlockFlags &Info = BlockInfo[CurrBB->getNumber()]; + if (!Info.Reachable) { + ++CurrBB; + continue; + } + +#ifndef NDEBUG + if (!Info.StrongNoFrameOnEntry) { + for (auto *Pred : CurrBB->predecessors()) { + BlockFlags &PredInfo = BlockInfo[Pred->getNumber()]; + assert((!PredInfo.Reachable || + Info.HasFrameOnEntry == PredInfo.HasFrameOnExit) && + "Inconsistent call frame state"); + } + } +#endif + if (!Info.StrongNoFrameOnEntry && Info.HasFrameOnEntry && !HasFrame) { + // Reset to the "after prologue" state. + + // Insert a `.cfi_remember_state` into the last block known to have a + // stack frame. + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr)); + BuildMI(*InsertMBB, InsertPt, DebugLoc(), + TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + // Insert a `.cfi_restore_state` at the beginning of the current block. + CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr)); + InsertPt = BuildMI(*CurrBB, CurrBB->begin(), DebugLoc(), + TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + ++InsertPt; + InsertMBB = &*CurrBB; + Change = true; + } else if ((Info.StrongNoFrameOnEntry || !Info.HasFrameOnEntry) && + HasFrame) { + // Reset to the state upon function entry. + TFL.resetCFIToInitialState(*CurrBB); + Change = true; + } + + HasFrame = Info.HasFrameOnExit; + ++CurrBB; + } + + return Change; +} diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp index de173a9dfd62..42523c47a671 100644 --- a/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -19,16 +19,14 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SetOperations.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/MC/MCDwarf.h" using namespace llvm; static cl::opt VerifyCFI("verify-cfiinstrs", diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 84a0e4142bb6..689e49978d43 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -145,11 +145,6 @@ void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &LI) { LI.setWeight(Weight); } -float VirtRegAuxInfo::futureWeight(LiveInterval &LI, SlotIndex Start, - SlotIndex End) { - return weightCalcHelper(LI, &Start, &End); -} - float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, SlotIndex *End) { MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp index c9246f6e8754..f74ff30ab2e1 100644 --- a/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/llvm/lib/CodeGen/CallingConvLower.cpp @@ -14,16 +14,14 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; @@ -72,15 +70,9 @@ bool CCState::IsShadowAllocatedReg(MCRegister Reg) const { if (!isAllocated(Reg)) return false; - for (auto const &ValAssign : Locs) { - if (ValAssign.isRegLoc()) { - for (MCRegAliasIterator AI(ValAssign.getLocReg(), &TRI, true); - AI.isValid(); ++AI) { - if (*AI == Reg) - return false; - } - } - } + for (auto const &ValAssign : Locs) + if (ValAssign.isRegLoc() && TRI.regsOverlap(ValAssign.getLocReg(), Reg)) + return false; return true; } diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 7c236a9785d8..5050395fbc0f 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -24,6 +24,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); initializeCFGuardLongjmpPass(Registry); + initializeCFIFixupPass(Registry); initializeCFIInstrInserterPass(Registry); initializeCheckDebugMachineModulePass(Registry); initializeCodeGenPreparePass(Registry); @@ -50,6 +51,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeIndirectBrExpandPassPass(Registry); initializeInterleavedLoadCombinePass(Registry); initializeInterleavedAccessPass(Registry); + initializeJMCInstrumenterPass(Registry); initializeLiveDebugValuesPass(Registry); initializeLiveDebugVariablesPass(Registry); initializeLiveIntervalsPass(Registry); @@ -57,6 +59,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveStacksPass(Registry); initializeLiveVariablesPass(Registry); initializeLocalStackSlotPassPass(Registry); + initializeLowerGlobalDtorsLegacyPassPass(Registry); initializeLowerIntrinsicsPass(Registry); initializeMIRAddFSDiscriminatorsPass(Registry); initializeMIRCanonicalizerPass(Registry); @@ -104,6 +107,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRemoveRedundantDebugValuesPass(Registry); initializeRenameIndependentSubregsPass(Registry); initializeSafeStackLegacyPassPass(Registry); + initializeSelectOptimizePass(Registry); initializeShadowStackGCLoweringPass(Registry); initializeShrinkWrapPass(Registry); initializeSjLjEHPreparePass(Registry); diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp index 877aa69c3e58..8f185a161bd0 100644 --- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp +++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -129,7 +129,9 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB, MachineBasicBlock::iterator Start = BB->begin(); MachineBasicBlock::iterator Previous = SplitPoint; - --Previous; + do { + --Previous; + } while (Previous != Start && Previous->isDebugInstr()); if (TII.isTailCall(*SplitPoint) && Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) { @@ -142,7 +144,7 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB, // ADJCALLSTACKUP ... // TAILJMP somewhere // On the other hand, it could be an unrelated call in which case this tail - // call has to register moves of its own and should be the split point. For + // call has no register moves of its own and should be the split point. For // example: // ADJCALLSTACKDOWN // CALL something_else @@ -167,3 +169,31 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB, return SplitPoint; } + +unsigned llvm::getInvertedFPClassTest(unsigned Test) { + unsigned InvertedTest = ~Test & fcAllFlags; + switch (InvertedTest) { + default: + break; + case fcNan: + case fcSNan: + case fcQNan: + case fcInf: + case fcPosInf: + case fcNegInf: + case fcNormal: + case fcPosNormal: + case fcNegNormal: + case fcSubnormal: + case fcPosSubnormal: + case fcNegSubnormal: + case fcZero: + case fcPosZero: + case fcNegZero: + case fcFinite: + case fcPosFinite: + case fcNegFinite: + return InvertedTest; + } + return 0; +} diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index c888adeafca5..6778af22f532 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -23,16 +23,15 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" @@ -174,12 +173,11 @@ static cl::opt DisablePreheaderProtect( cl::desc("Disable protection against removing loop preheaders")); static cl::opt ProfileGuidedSectionPrefix( - "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore, + "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions")); static cl::opt ProfileUnknownInSpecialSection( - "profile-unknown-in-special-section", cl::Hidden, cl::init(false), - cl::ZeroOrMore, + "profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " @@ -188,6 +186,15 @@ static cl::opt ProfileUnknownInSpecialSection( "to handle it in a different way than .text section, to save " "RAM for example. ")); +static cl::opt BBSectionsGuidedSectionPrefix( + "bbsections-guided-section-prefix", cl::Hidden, cl::init(true), + cl::desc("Use the basic-block-sections profile to determine the text " + "section prefix for hot functions. Functions with " + "basic-block-sections profile will be placed in `.text.hot` " + "regardless of their FDO profile info. Other functions won't be " + "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " + "profiles.")); + static cl::opt FreqRatioToSkipMerge( "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " @@ -274,6 +281,7 @@ class TypePromotionTransaction; const TargetLowering *TLI = nullptr; const TargetRegisterInfo *TRI; const TargetTransformInfo *TTI = nullptr; + const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; const TargetLibraryInfo *TLInfo; const LoopInfo *LI; std::unique_ptr BFI; @@ -349,6 +357,7 @@ class TypePromotionTransaction; AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addUsedIfAvailable(); } private: @@ -401,6 +410,8 @@ class TypePromotionTransaction; bool optimizeFunnelShift(IntrinsicInst *Fsh); bool optimizeSelectInst(SelectInst *SI); bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI); + bool optimizeSwitchType(SwitchInst *SI); + bool optimizeSwitchPhiConstants(SwitchInst *SI); bool optimizeSwitchInst(SwitchInst *SI); bool optimizeExtractElementInst(Instruction *Inst); bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT); @@ -442,6 +453,7 @@ char CodeGenPrepare::ID = 0; INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", false, false) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) @@ -473,8 +485,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) { BPI.reset(new BranchProbabilityInfo(F, *LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); PSI = &getAnalysis().getPSI(); + BBSectionsProfileReader = + getAnalysisIfAvailable(); OptSize = F.hasOptSize(); - if (ProfileGuidedSectionPrefix) { + // Use the basic-block-sections profile to promote hot functions to .text.hot if requested. + if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader && + BBSectionsProfileReader->isFunctionHot(F.getName())) { + F.setSectionPrefix("hot"); + } else if (ProfileGuidedSectionPrefix) { // The hot attribute overwrites profile count based hotness while profile // counts based hotness overwrite the cold attribute. // This is a conservative behabvior. @@ -524,7 +542,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Split some critical edges where one of the sources is an indirect branch, // to help generate sane code for PHIs involving such edges. - EverMadeChange |= SplitIndirectBrCriticalEdges(F); + EverMadeChange |= + SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true); bool MadeChange = true; while (MadeChange) { @@ -2037,7 +2056,8 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, return false; // Bail if the value is never zero. - if (llvm::isKnownNonZero(CountZeros->getOperand(0), *DL)) + Use &Op = CountZeros->getOperandUse(0); + if (isKnownNonZero(Op, *DL)) return false; // The intrinsic will be sunk behind a compare against zero and branch. @@ -2058,7 +2078,10 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, // Replace the unconditional branch that was created by the first split with // a compare against zero and a conditional branch. Value *Zero = Constant::getNullValue(Ty); - Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz"); + // Avoid introducing branch on poison. This also replaces the ctz operand. + if (!isGuaranteedNotToBeUndefOrPoison(Op)) + Op = Builder.CreateFreeze(Op, Op->getName() + ".fr"); + Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz"); Builder.CreateCondBr(Cmp, EndBlock, CallBlock); StartBlock->getTerminator()->eraseFromParent(); @@ -2101,7 +2124,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // Align the pointer arguments to this call if the target thinks it's a good // idea - unsigned MinSize, PrefAlign; + unsigned MinSize; + Align PrefAlign; if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { for (auto &Arg : CI->args()) { // We want to align both objects whose address is used directly and @@ -2115,12 +2139,12 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { 0); Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); uint64_t Offset2 = Offset.getLimitedValue(); - if ((Offset2 & (PrefAlign-1)) != 0) + if (!isAligned(PrefAlign, Offset2)) continue; AllocaInst *AI; - if ((AI = dyn_cast(Val)) && AI->getAlignment() < PrefAlign && + if ((AI = dyn_cast(Val)) && AI->getAlign() < PrefAlign && DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) - AI->setAlignment(Align(PrefAlign)); + AI->setAlignment(PrefAlign); // Global variables can only be aligned if they are defined in this // object (i.e. they are uniquely initialized in this object), and // over-aligning global variables that have an explicit section is @@ -2130,7 +2154,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { GV->getPointerAlignment(*DL) < PrefAlign && DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2) - GV->setAlignment(MaybeAlign(PrefAlign)); + GV->setAlignment(PrefAlign); } // If this is a memcpy (or similar) then we may be able to improve the // alignment @@ -3371,7 +3395,7 @@ public: if (!Visited.insert(P).second) continue; if (auto *PI = dyn_cast(P)) - if (Value *V = SimplifyInstruction(cast(PI), SQ)) { + if (Value *V = simplifyInstruction(cast(PI), SQ)) { for (auto *U : PI->users()) WorkList.push_back(cast(U)); Put(PI, V); @@ -3416,7 +3440,7 @@ public: void destroyNewNodes(Type *CommonType) { // For safe erasing, replace the uses with dummy value first. - auto *Dummy = UndefValue::get(CommonType); + auto *Dummy = PoisonValue::get(CommonType); for (auto *I : AllPhiNodes) { I->replaceAllUsesWith(Dummy); I->eraseFromParent(); @@ -3785,7 +3809,7 @@ private: SmallVector Worklist; assert((isa(Original) || isa(Original)) && "Address must be a Phi or Select node"); - auto *Dummy = UndefValue::get(CommonType); + auto *Dummy = PoisonValue::get(CommonType); Worklist.push_back(Original); while (!Worklist.empty()) { Value *Current = Worklist.pop_back_val(); @@ -4550,9 +4574,9 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, ConstantInt *RHS = dyn_cast(AddrInst->getOperand(1)); if (!RHS || RHS->getBitWidth() > 64) return false; - int64_t Scale = RHS->getSExtValue(); - if (Opcode == Instruction::Shl) - Scale = 1LL << Scale; + int64_t Scale = Opcode == Instruction::Shl + ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1) + : RHS->getSExtValue(); return matchScaledValue(AddrInst->getOperand(0), Scale, Depth); } @@ -4783,7 +4807,6 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { } // It isn't profitable to do this, roll back. - //cerr << "NOT FOLDING: " << *I; AddrMode = BackupAddrMode; AddrModeInsts.resize(OldSize); TPT.rollback(LastKnownGood); @@ -4836,7 +4859,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, TLI.ComputeConstraintToUse(OpInfo, SDValue()); // If this asm operand is our Value*, and if it isn't an indirect memory - // operand, we can't fold it! + // operand, we can't fold it! TODO: Also handle C_Address? if (OpInfo.CallOperandVal == OpVal && (OpInfo.ConstraintType != TargetLowering::C_Memory || !OpInfo.isIndirect)) @@ -5158,8 +5181,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // GEP, collect the GEP. Skip the GEPs that are the new bases of // previously split data structures. LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP); - if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end()) - LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size(); + LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size())); } NewAddrMode.OriginalValue = V; @@ -5323,11 +5345,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // SDAG consecutive load/store merging. if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); - ResultPtr = - AddrMode.InBounds - ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex, - "sunkaddr") - : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); + ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, + "sunkaddr", AddrMode.InBounds); } ResultIndex = V; @@ -5338,11 +5357,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } else { if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); - SunkAddr = - AddrMode.InBounds - ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex, - "sunkaddr") - : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); + SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr", + AddrMode.InBounds); } if (SunkAddr->getType() != Addr->getType()) @@ -5619,6 +5635,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { // Compute the constraint code and ConstraintType to use. TLI->ComputeConstraintToUse(OpInfo, SDValue()); + // TODO: Also handle C_Address? if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { Value *OpVal = CS->getArgOperand(ArgNo++); @@ -6002,31 +6019,25 @@ bool CodeGenPrepare::optimizePhiType( for (Value *V : Phi->incoming_values()) { if (auto *OpPhi = dyn_cast(V)) { if (!PhiNodes.count(OpPhi)) { - if (Visited.count(OpPhi)) + if (!Visited.insert(OpPhi).second) return false; PhiNodes.insert(OpPhi); - Visited.insert(OpPhi); Worklist.push_back(OpPhi); } } else if (auto *OpLoad = dyn_cast(V)) { if (!OpLoad->isSimple()) return false; - if (!Defs.count(OpLoad)) { - Defs.insert(OpLoad); + if (Defs.insert(OpLoad).second) Worklist.push_back(OpLoad); - } } else if (auto *OpEx = dyn_cast(V)) { - if (!Defs.count(OpEx)) { - Defs.insert(OpEx); + if (Defs.insert(OpEx).second) Worklist.push_back(OpEx); - } } else if (auto *OpBC = dyn_cast(V)) { if (!ConvertTy) ConvertTy = OpBC->getOperand(0)->getType(); if (OpBC->getOperand(0)->getType() != ConvertTy) return false; - if (!Defs.count(OpBC)) { - Defs.insert(OpBC); + if (Defs.insert(OpBC).second) { Worklist.push_back(OpBC); AnyAnchored |= !isa(OpBC->getOperand(0)) && !isa(OpBC->getOperand(0)); @@ -6127,7 +6138,7 @@ bool CodeGenPrepare::optimizePhiTypes(Function &F) { // Remove any old phi's that have been converted. for (auto *I : DeletedInstrs) { - I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); I->eraseFromParent(); } @@ -6979,12 +6990,12 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { return Changed; } -bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { +bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) { Value *Cond = SI->getCondition(); Type *OldType = Cond->getType(); LLVMContext &Context = Cond->getContext(); EVT OldVT = TLI->getValueType(*DL, OldType); - MVT RegType = TLI->getRegisterType(Context, OldVT); + MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT); unsigned RegWidth = RegType.getSizeInBits(); if (RegWidth <= cast(OldType)->getBitWidth()) @@ -7019,7 +7030,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { ExtInst->setDebugLoc(SI->getDebugLoc()); SI->setCondition(ExtInst); for (auto Case : SI->cases()) { - APInt NarrowConst = Case.getCaseValue()->getValue(); + const APInt &NarrowConst = Case.getCaseValue()->getValue(); APInt WideConst = (ExtType == Instruction::ZExt) ? NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth); Case.setValue(ConstantInt::get(Context, WideConst)); @@ -7028,6 +7039,89 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { return true; } +bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) { + // The SCCP optimization tends to produce code like this: + // switch(x) { case 42: phi(42, ...) } + // Materializing the constant for the phi-argument needs instructions; So we + // change the code to: + // switch(x) { case 42: phi(x, ...) } + + Value *Condition = SI->getCondition(); + // Avoid endless loop in degenerate case. + if (isa(*Condition)) + return false; + + bool Changed = false; + BasicBlock *SwitchBB = SI->getParent(); + Type *ConditionType = Condition->getType(); + + for (const SwitchInst::CaseHandle &Case : SI->cases()) { + ConstantInt *CaseValue = Case.getCaseValue(); + BasicBlock *CaseBB = Case.getCaseSuccessor(); + // Set to true if we previously checked that `CaseBB` is only reached by + // a single case from this switch. + bool CheckedForSinglePred = false; + for (PHINode &PHI : CaseBB->phis()) { + Type *PHIType = PHI.getType(); + // If ZExt is free then we can also catch patterns like this: + // switch((i32)x) { case 42: phi((i64)42, ...); } + // and replace `(i64)42` with `zext i32 %x to i64`. + bool TryZExt = + PHIType->isIntegerTy() && + PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() && + TLI->isZExtFree(ConditionType, PHIType); + if (PHIType == ConditionType || TryZExt) { + // Set to true to skip this case because of multiple preds. + bool SkipCase = false; + Value *Replacement = nullptr; + for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) { + Value *PHIValue = PHI.getIncomingValue(I); + if (PHIValue != CaseValue) { + if (!TryZExt) + continue; + ConstantInt *PHIValueInt = dyn_cast(PHIValue); + if (!PHIValueInt || + PHIValueInt->getValue() != + CaseValue->getValue().zext(PHIType->getIntegerBitWidth())) + continue; + } + if (PHI.getIncomingBlock(I) != SwitchBB) + continue; + // We cannot optimize if there are multiple case labels jumping to + // this block. This check may get expensive when there are many + // case labels so we test for it last. + if (!CheckedForSinglePred) { + CheckedForSinglePred = true; + if (SI->findCaseDest(CaseBB) == nullptr) { + SkipCase = true; + break; + } + } + + if (Replacement == nullptr) { + if (PHIValue == CaseValue) { + Replacement = Condition; + } else { + IRBuilder<> Builder(SI); + Replacement = Builder.CreateZExt(Condition, PHIType); + } + } + PHI.setIncomingValue(I, Replacement); + Changed = true; + } + if (SkipCase) + break; + } + } + } + return Changed; +} + +bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { + bool Changed = optimizeSwitchType(SI); + Changed |= optimizeSwitchPhiConstants(SI); + return Changed; +} namespace { @@ -7777,7 +7871,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a // trivial PHI, go ahead and zap it here. - if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) { + if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) { LargeOffsetGEPMap.erase(P); P->replaceAllUsesWith(V); P->eraseFromParent(); diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 1d50e1d22b95..fd52191882cb 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -13,7 +13,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/CommandFlags.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Host.h" @@ -58,6 +63,7 @@ CGOPT(bool, EnableUnsafeFPMath) CGOPT(bool, EnableNoInfsFPMath) CGOPT(bool, EnableNoNaNsFPMath) CGOPT(bool, EnableNoSignedZerosFPMath) +CGOPT(bool, EnableApproxFuncFPMath) CGOPT(bool, EnableNoTrappingFPMath) CGOPT(bool, EnableAIXExtendedAltivecABI) CGOPT(DenormalMode::DenormalModeKind, DenormalFPMath) @@ -73,6 +79,7 @@ CGOPT(bool, StackSymbolOrdering) CGOPT(bool, StackRealign) CGOPT(std::string, TrapFuncName) CGOPT(bool, UseCtors) +CGOPT(bool, LowerGlobalDtorsViaCxaAtExit) CGOPT(bool, RelaxELFRelocations) CGOPT_EXP(bool, DataSections) CGOPT_EXP(bool, FunctionSections) @@ -94,6 +101,7 @@ CGOPT(bool, ForceDwarfFrameSection) CGOPT(bool, XRayOmitFunctionIndex) CGOPT(bool, DebugStrictDwarf) CGOPT(unsigned, AlignLoops) +CGOPT(bool, JMCInstrument) codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { #define CGBINDOPT(NAME) \ @@ -218,6 +226,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(EnableNoSignedZerosFPMath); + static cl::opt EnableApproxFuncFPMath( + "enable-approx-func-fp-math", + cl::desc("Enable FP math optimizations that assume approx func"), + cl::init(false)); + CGBINDOPT(EnableApproxFuncFPMath); + static cl::opt EnableNoTrappingFPMath( "enable-no-trapping-fp-math", cl::desc("Enable setting the FP exceptions build " @@ -333,6 +347,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(UseCtors); + static cl::opt LowerGlobalDtorsViaCxaAtExit( + "lower-global-dtors-via-cxa-atexit", + cl::desc("Lower llvm.global_dtors (global destructors) via __cxa_atexit"), + cl::init(true)); + CGBINDOPT(LowerGlobalDtorsViaCxaAtExit); + static cl::opt RelaxELFRelocations( "relax-elf-relocations", cl::desc( @@ -457,6 +477,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::desc("Default alignment for loops")); CGBINDOPT(AlignLoops); + static cl::opt JMCInstrument( + "enable-jmc-instrument", + cl::desc("Instrument functions with a call to __CheckForDebuggerJustMyCode"), + cl::init(false)); + CGBINDOPT(JMCInstrument); + #undef CGBINDOPT mc::RegisterMCTargetOptionsFlags(); @@ -493,6 +519,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.NoInfsFPMath = getEnableNoInfsFPMath(); Options.NoNaNsFPMath = getEnableNoNaNsFPMath(); Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath(); + Options.ApproxFuncFPMath = getEnableApproxFuncFPMath(); Options.NoTrappingFPMath = getEnableNoTrappingFPMath(); DenormalMode::DenormalModeKind DenormKind = getDenormalFPMath(); @@ -509,9 +536,10 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt(); Options.StackSymbolOrdering = getStackSymbolOrdering(); Options.UseInitArray = !getUseCtors(); + Options.LowerGlobalDtorsViaCxaAtExit = getLowerGlobalDtorsViaCxaAtExit(); Options.RelaxELFRelocations = getRelaxELFRelocations(); Options.DataSections = - getExplicitDataSections().getValueOr(TheTriple.hasDefaultDataSections()); + getExplicitDataSections().value_or(TheTriple.hasDefaultDataSections()); Options.FunctionSections = getFunctionSections(); Options.IgnoreXCOFFVisibility = getIgnoreXCOFFVisibility(); Options.XCOFFTracebackTable = getXCOFFTracebackTable(); @@ -531,6 +559,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex(); Options.DebugStrictDwarf = getDebugStrictDwarf(); Options.LoopAlignment = getAlignLoops(); + Options.JMCInstrument = getJMCInstrument(); Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); @@ -643,6 +672,7 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, HANDLE_BOOL_ATTR(EnableNoInfsFPMathView, "no-infs-fp-math"); HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math"); HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math"); + HANDLE_BOOL_ATTR(EnableApproxFuncFPMathView, "approx-func-fp-math"); if (DenormalFPMathView->getNumOccurrences() > 0 && !F.hasFnAttribute("denormal-fp-math")) { @@ -684,4 +714,3 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, for (Function &F : M) setFunctionAttributes(CPU, Features, F); } - diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp index d38bacdb1aa7..42192f41dbda 100644 --- a/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -30,10 +30,10 @@ #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 5579152f1ce0..ce00be634e9a 100644 --- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -14,7 +14,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/llvm/lib/CodeGen/DetectDeadLanes.cpp b/llvm/lib/CodeGen/DetectDeadLanes.cpp index 1337e57f360b..565c8b405f82 100644 --- a/llvm/lib/CodeGen/DetectDeadLanes.cpp +++ b/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -28,12 +28,9 @@ #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/PassRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include @@ -93,7 +90,7 @@ private: LaneBitmask transferUsedLanes(const MachineInstr &MI, LaneBitmask UsedLanes, const MachineOperand &MO) const; - bool runOnce(MachineFunction &MF); + std::pair runOnce(MachineFunction &MF); LaneBitmask determineInitialDefinedLanes(unsigned Reg); LaneBitmask determineInitialUsedLanes(unsigned Reg); @@ -487,7 +484,7 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO, return true; } -bool DetectDeadLanes::runOnce(MachineFunction &MF) { +std::pair DetectDeadLanes::runOnce(MachineFunction &MF) { // First pass: Populate defs/uses of vregs with initial values unsigned NumVirtRegs = MRI->getNumVirtRegs(); for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { @@ -528,6 +525,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { dbgs() << "\n"; }); + bool Changed = false; bool Again = false; // Mark operands as dead/unused. for (MachineBasicBlock &MBB : MF) { @@ -544,6 +542,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI); MO.setIsDead(); + Changed = true; } if (MO.readsReg()) { bool CrossCopy = false; @@ -551,10 +550,12 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " << MI); MO.setIsUndef(); + Changed = true; } else if (isUndefInput(MO, &CrossCopy)) { LLVM_DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " << MI); MO.setIsUndef(); + Changed = true; if (CrossCopy) Again = true; } @@ -563,7 +564,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { } } - return Again; + return std::make_pair(Changed, Again); } bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) { @@ -585,13 +586,16 @@ bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) { WorklistMembers.resize(NumVirtRegs); DefinedByCopy.resize(NumVirtRegs); + bool Changed = false; bool Again; do { - Again = runOnce(MF); + bool LocalChanged; + std::tie(LocalChanged, Again) = runOnce(MF); + Changed |= LocalChanged; } while(Again); DefinedByCopy.clear(); WorklistMembers.clear(); delete[] VRegInfos; - return true; + return Changed; } diff --git a/llvm/lib/CodeGen/EHContGuardCatchret.cpp b/llvm/lib/CodeGen/EHContGuardCatchret.cpp index c18532946bf9..b26aa792bb93 100644 --- a/llvm/lib/CodeGen/EHContGuardCatchret.cpp +++ b/llvm/lib/CodeGen/EHContGuardCatchret.cpp @@ -17,9 +17,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index 6a0da4dad3c1..32858d043383 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -17,10 +17,10 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,7 +30,6 @@ #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -664,8 +663,8 @@ void SSAIfConv::rewritePHIOperands() { PI.PHI->getOperand(i-1).setMBB(Head); PI.PHI->getOperand(i-2).setReg(DstReg); } else if (MBB == getFPred()) { - PI.PHI->RemoveOperand(i-1); - PI.PHI->RemoveOperand(i-2); + PI.PHI->removeOperand(i-1); + PI.PHI->removeOperand(i-2); } } LLVM_DEBUG(dbgs() << " --> " << *PI.PHI); diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index 60ee1812ee2c..b2639636dda7 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Dominators.h" @@ -32,6 +31,10 @@ using namespace llvm; +namespace llvm { +class TargetLowering; +} + #define DEBUG_TYPE "expandmemcmp" STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); @@ -737,7 +740,7 @@ Value *MemCmpExpansion::getMemCmpExpansion() { static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, const TargetLowering *TLI, const DataLayout *DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, - DomTreeUpdater *DTU) { + DomTreeUpdater *DTU, const bool IsBCmp) { NumMemCmpCalls++; // Early exit from expansion if -Oz. @@ -757,7 +760,8 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, } // TTI call to check if target would like to expand memcmp. Also, get the // available load sizes. - const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); + const bool IsUsedForZeroCmp = + IsBCmp || isOnlyUsedInZeroEqualityComparison(CI); bool OptForSize = CI->getFunction()->hasOptSize() || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); auto Options = TTI->enableMemCmpExpansion(OptForSize, @@ -861,7 +865,7 @@ bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, LibFunc Func; if (TLI->getLibFunc(*CI, Func) && (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && - expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU)) { + expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) { return true; } } @@ -881,7 +885,7 @@ ExpandMemCmpPass::runImpl(Function &F, const TargetLibraryInfo *TLI, bool MadeChanges = false; for (auto BBIt = F.begin(); BBIt != F.end();) { if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, - DTU.hasValue() ? DTU.getPointer() : nullptr)) { + DTU ? DTU.getPointer() : nullptr)) { MadeChanges = true; // If changes were made, restart the function from the beginning, since // the structure of the function was changed. diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index d9caa8ad42d0..086b4a4dcc47 100644 --- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -13,8 +13,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -104,8 +102,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { if (MI->allDefsAreDead()) { MI->setDesc(TII->get(TargetOpcode::KILL)); - MI->RemoveOperand(3); // SubIdx - MI->RemoveOperand(1); // Imm + MI->removeOperand(3); // SubIdx + MI->removeOperand(1); // Imm LLVM_DEBUG(dbgs() << "subreg: replaced by: " << *MI); return true; } @@ -117,8 +115,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { // We must leave %rax live. if (DstReg != InsReg) { MI->setDesc(TII->get(TargetOpcode::KILL)); - MI->RemoveOperand(3); // SubIdx - MI->RemoveOperand(1); // Imm + MI->removeOperand(3); // SubIdx + MI->removeOperand(1); // Imm LLVM_DEBUG(dbgs() << "subreg: replace by: " << *MI); return true; } diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp index 2bcaf750911b..f08c47d220ea 100644 --- a/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/llvm/lib/CodeGen/ExpandReductions.cpp @@ -14,12 +14,10 @@ #include "llvm/CodeGen/ExpandReductions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils/LoopUtils.h" diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index bb8d2b3e9a78..7883a48d121c 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -23,13 +23,11 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" using namespace llvm; @@ -115,6 +113,17 @@ static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) { OldOp.eraseFromParent(); } +static bool maySpeculateLanes(VPIntrinsic &VPI) { + // The result of VP reductions depends on the mask and evl. + if (isa(VPI)) + return false; + // Fallback to whether the intrinsic is speculatable. + Optional OpcOpt = VPI.getFunctionalOpcode(); + unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call); + return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, + cast(&VPI)); +} + //// } Helpers namespace { @@ -218,8 +227,7 @@ Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder, Value * CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, VPIntrinsic &VPI) { - assert((isSafeToSpeculativelyExecute(&VPI) || - VPI.canIgnoreVectorLengthParam()) && + assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && "Implicitly dropping %evl in non-speculatable operator!"); auto OC = static_cast(*VPI.getFunctionalOpcode()); @@ -298,8 +306,7 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, Value * CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, VPReductionIntrinsic &VPI) { - assert((isSafeToSpeculativelyExecute(&VPI) || - VPI.canIgnoreVectorLengthParam()) && + assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && "Implicitly dropping %evl in non-speculatable operator!"); Value *Mask = VPI.getMaskParam(); @@ -473,9 +480,9 @@ struct TransformJob { bool isDone() const { return Strategy.shouldDoNothing(); } }; -void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) { - // Speculatable instructions do not strictly need predication. - if (isSafeToSpeculativelyExecute(&I)) { +void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { + // Operations with speculatable lanes do not strictly need predication. + if (maySpeculateLanes(VPI)) { // Converting a speculatable VP intrinsic means dropping %mask and %evl. // No need to expand %evl into the %mask only to ignore that code. if (LegalizeStrat.OpStrategy == VPLegalization::Convert) @@ -520,7 +527,7 @@ bool CachingVPExpander::expandVectorPredication() { if (!VPI) continue; auto VPStrat = getVPLegalizationStrategy(*VPI); - sanitizeStrategy(I, VPStrat); + sanitizeStrategy(*VPI, VPStrat); if (!VPStrat.shouldDoNothing()) Worklist.emplace_back(VPI, VPStrat); } diff --git a/llvm/lib/CodeGen/FEntryInserter.cpp b/llvm/lib/CodeGen/FEntryInserter.cpp index c2194929e2e7..68304dd41db0 100644 --- a/llvm/lib/CodeGen/FEntryInserter.cpp +++ b/llvm/lib/CodeGen/FEntryInserter.cpp @@ -13,12 +13,9 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/FaultMaps.cpp b/llvm/lib/CodeGen/FaultMaps.cpp index 1d35b194f218..3ec666227651 100644 --- a/llvm/lib/CodeGen/FaultMaps.cpp +++ b/llvm/lib/CodeGen/FaultMaps.cpp @@ -52,7 +52,7 @@ void FaultMaps::serializeToFaultMapSection() { // Create the section. MCSection *FaultMapSection = OutContext.getObjectFileInfo()->getFaultMapSection(); - OS.SwitchSection(FaultMapSection); + OS.switchSection(FaultMapSection); // Emit a dummy symbol to force section inclusion. OS.emitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps"))); diff --git a/llvm/lib/CodeGen/FinalizeISel.cpp b/llvm/lib/CodeGen/FinalizeISel.cpp index 00040e92a829..329c9587e321 100644 --- a/llvm/lib/CodeGen/FinalizeISel.cpp +++ b/llvm/lib/CodeGen/FinalizeISel.cpp @@ -16,11 +16,9 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "finalize-isel" diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index ec6bf18b2769..252910fd9462 100644 --- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -24,10 +24,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackMaps.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Statepoint.h" #include "llvm/InitializePasses.h" @@ -156,12 +153,17 @@ static Register performCopyPropagation(Register Reg, RI = ++MachineBasicBlock::iterator(Def); IsKill = DestSrc->Source->isKill(); - // There are no uses of original register between COPY and STATEPOINT. - // There can't be any after STATEPOINT, so we can eliminate Def. if (!Use) { + // There are no uses of original register between COPY and STATEPOINT. + // There can't be any after STATEPOINT, so we can eliminate Def. LLVM_DEBUG(dbgs() << "spillRegisters: removing dead copy " << *Def); Def->eraseFromParent(); + } else if (IsKill) { + // COPY will remain in place, spill will be inserted *after* it, so it is + // not a kill of source anymore. + const_cast(DestSrc->Source)->setIsKill(false); } + return SrcReg; } diff --git a/llvm/lib/CodeGen/GCMetadata.cpp b/llvm/lib/CodeGen/GCMetadata.cpp index af5515cc6bfd..4d27143c5298 100644 --- a/llvm/lib/CodeGen/GCMetadata.cpp +++ b/llvm/lib/CodeGen/GCMetadata.cpp @@ -11,16 +11,13 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp index 637a877810a1..80feb0045406 100644 --- a/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/llvm/lib/CodeGen/GCRootLowering.cpp @@ -14,7 +14,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -24,9 +23,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCContext.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index f9bfe8518083..ac140e745600 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -67,7 +67,8 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { } bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) { - return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_IMPLICIT_DEF; + return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT || + Opc == TargetOpcode::G_IMPLICIT_DEF; } std::unique_ptr @@ -88,7 +89,7 @@ void GISelCSEInfo::setMF(MachineFunction &MF) { this->MRI = &MF.getRegInfo(); } -GISelCSEInfo::~GISelCSEInfo() {} +GISelCSEInfo::~GISelCSEInfo() = default; bool GISelCSEInfo::isUniqueMachineInstValid( const UniqueMachineInstr &UMI) const { diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 1a642e233a6a..a432e4ed7fb7 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -12,6 +12,7 @@ // #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -174,6 +175,7 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, default: break; case TargetOpcode::G_ADD: + case TargetOpcode::G_PTR_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_ASHR: case TargetOpcode::G_LSHR: @@ -185,23 +187,54 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, case TargetOpcode::G_UDIV: case TargetOpcode::G_SDIV: case TargetOpcode::G_UREM: - case TargetOpcode::G_SREM: { + case TargetOpcode::G_SREM: + case TargetOpcode::G_SMIN: + case TargetOpcode::G_SMAX: + case TargetOpcode::G_UMIN: + case TargetOpcode::G_UMAX: { // Try to constant fold these. assert(SrcOps.size() == 2 && "Invalid sources"); assert(DstOps.size() == 1 && "Invalid dsts"); - if (SrcOps[0].getLLTTy(*getMRI()).isVector()) { + LLT SrcTy = SrcOps[0].getLLTTy(*getMRI()); + + if (Opc == TargetOpcode::G_PTR_ADD && + getDataLayout().isNonIntegralAddressSpace(SrcTy.getAddressSpace())) + break; + + if (SrcTy.isVector()) { // Try to constant fold vector constants. - Register VecCst = ConstantFoldVectorBinop( - Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this); - if (VecCst) - return buildCopy(DstOps[0], VecCst); + SmallVector VecCst = ConstantFoldVectorBinop( + Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI()); + if (!VecCst.empty()) + return buildBuildVectorConstant(DstOps[0], VecCst); break; } + if (Optional Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI())) return buildConstant(DstOps[0], *Cst); break; } + case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FREM: + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMAXNUM_IEEE: + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_FCOPYSIGN: { + // Try to constant fold these. + assert(SrcOps.size() == 2 && "Invalid sources"); + assert(DstOps.size() == 1 && "Invalid dsts"); + if (Optional Cst = ConstantFoldFPBinOp( + Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI())) + return buildFConstant(DstOps[0], *Cst); + break; + } case TargetOpcode::G_SEXT_INREG: { assert(DstOps.size() == 1 && "Invalid dst ops"); assert(SrcOps.size() == 2 && "Invalid src ops"); diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 1ec7868f2234..081c8b125f17 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -11,16 +11,16 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Target/TargetMachine.h" @@ -698,10 +698,12 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, ValTy, extendOpFromFlags(Args[i].Flags[0])); } + bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(OrigVT, DL); for (unsigned Part = 0; Part < NumParts; ++Part) { Register ArgReg = Args[i].Regs[Part]; // There should be Regs.size() ArgLocs per argument. - VA = ArgLocs[j + Part]; + unsigned Idx = BigEndianPartOrdering ? NumParts - 1 - Part : Part; + CCValAssign &VA = ArgLocs[j + Idx]; const ISD::ArgFlagsTy Flags = Args[i].Flags[Part]; if (VA.isMemLoc() && !Flags.isByVal()) { diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index 30f8838805b5..1a5fe3e84c17 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -13,14 +13,13 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" -#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelWorkList.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "gi-combiner" @@ -57,8 +56,7 @@ class WorkListMaintainer : public GISelChangeObserver { public: WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {} - virtual ~WorkListMaintainer() { - } + virtual ~WorkListMaintainer() = default; void erasingInstr(MachineInstr &MI) override { LLVM_DEBUG(dbgs() << "Erasing: " << MI << "\n"); @@ -115,7 +113,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, bool MFChanged = false; bool Changed; - MachineIRBuilder &B = *Builder.get(); + MachineIRBuilder &B = *Builder; do { // Collect all instructions. Do a post order traversal for basic blocks and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index d6a009744161..2c94f87804ac 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -8,7 +8,6 @@ #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" @@ -16,23 +15,22 @@ #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DivisionByConstantInfo.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" #include #define DEBUG_TYPE "gi-combiner" @@ -131,9 +129,27 @@ isBigEndian(const SmallDenseMap &MemOffset2Idx, return BigEndian; } +bool CombinerHelper::isPreLegalize() const { return !LI; } + +bool CombinerHelper::isLegal(const LegalityQuery &Query) const { + assert(LI && "Must have LegalizerInfo to query isLegal!"); + return LI->getAction(Query).Action == LegalizeActions::Legal; +} + bool CombinerHelper::isLegalOrBeforeLegalizer( const LegalityQuery &Query) const { - return !LI || LI->getAction(Query).Action == LegalizeActions::Legal; + return isPreLegalize() || isLegal(Query); +} + +bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty) const { + if (!Ty.isVector()) + return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}}); + // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs. + if (isPreLegalize()) + return true; + LLT EltTy = Ty.getElementType(); + return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) && + isLegal({TargetOpcode::G_CONSTANT, {EltTy}}); } void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, @@ -1275,12 +1291,12 @@ bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI, Register SrcReg = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(DstReg); Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI); - return Cst.hasValue(); + return Cst.has_value(); } void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, Optional &Cst) { - assert(Cst.hasValue() && "Optional is unexpectedly empty!"); + assert(Cst && "Optional is unexpectedly empty!"); Builder.setInstrAndDebugLoc(MI); MachineFunction &MF = Builder.getMF(); auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst); @@ -2350,6 +2366,19 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr)) return false; + // If both instructions are loads or stores, they are equal only if both + // are dereferenceable invariant loads with the same number of bits. + if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) { + GLoadStore *LS1 = dyn_cast(I1); + GLoadStore *LS2 = dyn_cast(I2); + if (!LS1 || !LS2) + return false; + + if (!I2->isDereferenceableInvariantLoad(nullptr) || + (LS1->getMemSizeInBits() != LS2->getMemSizeInBits())) + return false; + } + // Check for physical registers on the instructions first to avoid cases // like this: // @@ -2397,7 +2426,7 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { return false; auto *MI = MRI.getVRegDef(MOP.getReg()); auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI); - return MaybeCst.hasValue() && MaybeCst->getBitWidth() <= 64 && + return MaybeCst && MaybeCst->getBitWidth() <= 64 && MaybeCst->getSExtValue() == C; } @@ -2916,7 +2945,7 @@ bool CombinerHelper::matchNotCmp(MachineInstr &MI, int64_t Cst; if (Ty.isVector()) { MachineInstr *CstDef = MRI.getVRegDef(CstReg); - auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI); + auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI); if (!MaybeCst) return false; if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP)) @@ -3049,6 +3078,102 @@ void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { MI.eraseFromParent(); } +bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI, + unsigned &SelectOpNo) { + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + Register OtherOperandReg = RHS; + SelectOpNo = 1; + MachineInstr *Select = MRI.getVRegDef(LHS); + + // Don't do this unless the old select is going away. We want to eliminate the + // binary operator, not replace a binop with a select. + if (Select->getOpcode() != TargetOpcode::G_SELECT || + !MRI.hasOneNonDBGUse(LHS)) { + OtherOperandReg = LHS; + SelectOpNo = 2; + Select = MRI.getVRegDef(RHS); + if (Select->getOpcode() != TargetOpcode::G_SELECT || + !MRI.hasOneNonDBGUse(RHS)) + return false; + } + + MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg()); + MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg()); + + if (!isConstantOrConstantVector(*SelectLHS, MRI, + /*AllowFP*/ true, + /*AllowOpaqueConstants*/ false)) + return false; + if (!isConstantOrConstantVector(*SelectRHS, MRI, + /*AllowFP*/ true, + /*AllowOpaqueConstants*/ false)) + return false; + + unsigned BinOpcode = MI.getOpcode(); + + // We know know one of the operands is a select of constants. Now verify that + // the other binary operator operand is either a constant, or we can handle a + // variable. + bool CanFoldNonConst = + (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) && + (isNullOrNullSplat(*SelectLHS, MRI) || + isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) && + (isNullOrNullSplat(*SelectRHS, MRI) || + isAllOnesOrAllOnesSplat(*SelectRHS, MRI)); + if (CanFoldNonConst) + return true; + + return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI, + /*AllowFP*/ true, + /*AllowOpaqueConstants*/ false); +} + +/// \p SelectOperand is the operand in binary operator \p MI that is the select +/// to fold. +bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI, + const unsigned &SelectOperand) { + Builder.setInstrAndDebugLoc(MI); + + Register Dst = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg()); + + Register SelectCond = Select->getOperand(1).getReg(); + Register SelectTrue = Select->getOperand(2).getReg(); + Register SelectFalse = Select->getOperand(3).getReg(); + + LLT Ty = MRI.getType(Dst); + unsigned BinOpcode = MI.getOpcode(); + + Register FoldTrue, FoldFalse; + + // We have a select-of-constants followed by a binary operator with a + // constant. Eliminate the binop by pulling the constant math into the select. + // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO + if (SelectOperand == 1) { + // TODO: SelectionDAG verifies this actually constant folds before + // committing to the combine. + + FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0); + FoldFalse = + Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0); + } else { + FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0); + FoldFalse = + Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0); + } + + Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags()); + Observer.erasingInstr(*Select); + Select->eraseFromParent(); + MI.eraseFromParent(); + + return true; +} + Optional> CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!"); @@ -3340,7 +3465,7 @@ bool CombinerHelper::matchLoadOrCombine( // BSWAP. bool IsBigEndianTarget = MF.getDataLayout().isBigEndian(); Optional IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx); - if (!IsBigEndian.hasValue()) + if (!IsBigEndian) return false; bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian; if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}})) @@ -3848,7 +3973,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector( auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI); if (!Cst) return false; - unsigned Idx = Cst.getValue().getZExtValue(); + unsigned Idx = Cst->getZExtValue(); if (Idx >= NumElts) return false; // Out of range. ExtractedElts.set(Idx); @@ -3904,10 +4029,9 @@ bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI, // Given constants C0 and C1 such that C0 + C1 is bit-width: // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1) - // TODO: Match constant splat. int64_t CstShlAmt, CstLShrAmt; - if (mi_match(ShlAmt, MRI, m_ICst(CstShlAmt)) && - mi_match(LShrAmt, MRI, m_ICst(CstLShrAmt)) && + if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) && + mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) && CstShlAmt + CstLShrAmt == BitWidth) { FshOpc = TargetOpcode::G_FSHR; Amt = LShrAmt; @@ -3958,7 +4082,7 @@ void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) { Observer.changingInstr(MI); MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR)); - MI.RemoveOperand(2); + MI.removeOperand(2); Observer.changedInstr(MI); } @@ -4100,18 +4224,23 @@ bool CombinerHelper::matchAndOrDisjointMask( return false; Register Src; - int64_t MaskAnd; - int64_t MaskOr; + Register AndMaskReg; + int64_t AndMaskBits; + int64_t OrMaskBits; if (!mi_match(MI, MRI, - m_GAnd(m_GOr(m_Reg(Src), m_ICst(MaskOr)), m_ICst(MaskAnd)))) + m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)), + m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg))))) return false; - // Check if MaskOr could turn on any bits in Src. - if (MaskAnd & MaskOr) + // Check if OrMask could turn on any bits in Src. + if (AndMaskBits & OrMaskBits) return false; MatchInfo = [=, &MI](MachineIRBuilder &B) { Observer.changingInstr(MI); + // Canonicalize the result to have the constant on the RHS. + if (MI.getOperand(1).getReg() == AndMaskReg) + MI.getOperand(2).setReg(AndMaskReg); MI.getOperand(1).setReg(Src); Observer.changedInstr(MI); }; @@ -4259,6 +4388,14 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd( if (ShrAmt < 0 || ShrAmt >= Size) return false; + // If the shift subsumes the mask, emit the 0 directly. + if (0 == (SMask >> ShrAmt)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.buildConstant(Dst, 0); + }; + return true; + } + // Check that ubfx can do the extraction, with no holes in the mask. uint64_t UMask = SMask; UMask |= maskTrailingOnes(ShrAmt); @@ -4585,6 +4722,42 @@ bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) { return true; } +bool CombinerHelper::matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) { + // (G_*MULO x, 0) -> 0 + no carry out + assert(MI.getOpcode() == TargetOpcode::G_UMULO || + MI.getOpcode() == TargetOpcode::G_SMULO); + if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0))) + return false; + Register Dst = MI.getOperand(0).getReg(); + Register Carry = MI.getOperand(1).getReg(); + if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) || + !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry))) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildConstant(Dst, 0); + B.buildConstant(Carry, 0); + }; + return true; +} + +bool CombinerHelper::matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) { + // (G_*ADDO x, 0) -> x + no carry out + assert(MI.getOpcode() == TargetOpcode::G_UADDO || + MI.getOpcode() == TargetOpcode::G_SADDO); + if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0))) + return false; + Register Carry = MI.getOperand(1).getReg(); + if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Carry))) + return false; + Register Dst = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(2).getReg(); + MatchInfo = [=](MachineIRBuilder &B) { + B.buildCopy(Dst, LHS); + B.buildConstant(Carry, 0); + }; + return true; +} + MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UDIV); auto &UDiv = cast(MI); @@ -5376,6 +5549,106 @@ bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA( return false; } +bool CombinerHelper::matchSelectToLogical(MachineInstr &MI, + BuildFnTy &MatchInfo) { + GSelect &Sel = cast(MI); + Register DstReg = Sel.getReg(0); + Register Cond = Sel.getCondReg(); + Register TrueReg = Sel.getTrueReg(); + Register FalseReg = Sel.getFalseReg(); + + auto *TrueDef = getDefIgnoringCopies(TrueReg, MRI); + auto *FalseDef = getDefIgnoringCopies(FalseReg, MRI); + + const LLT CondTy = MRI.getType(Cond); + const LLT OpTy = MRI.getType(TrueReg); + if (CondTy != OpTy || OpTy.getScalarSizeInBits() != 1) + return false; + + // We have a boolean select. + + // select Cond, Cond, F --> or Cond, F + // select Cond, 1, F --> or Cond, F + auto MaybeCstTrue = isConstantOrConstantSplatVector(*TrueDef, MRI); + if (Cond == TrueReg || (MaybeCstTrue && MaybeCstTrue->isOne())) { + MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.buildOr(DstReg, Cond, FalseReg); + }; + return true; + } + + // select Cond, T, Cond --> and Cond, T + // select Cond, T, 0 --> and Cond, T + auto MaybeCstFalse = isConstantOrConstantSplatVector(*FalseDef, MRI); + if (Cond == FalseReg || (MaybeCstFalse && MaybeCstFalse->isZero())) { + MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.buildAnd(DstReg, Cond, TrueReg); + }; + return true; + } + + // select Cond, T, 1 --> or (not Cond), T + if (MaybeCstFalse && MaybeCstFalse->isOne()) { + MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.buildOr(DstReg, MIB.buildNot(OpTy, Cond), TrueReg); + }; + return true; + } + + // select Cond, 0, F --> and (not Cond), F + if (MaybeCstTrue && MaybeCstTrue->isZero()) { + MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.buildAnd(DstReg, MIB.buildNot(OpTy, Cond), FalseReg); + }; + return true; + } + return false; +} + +bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI, + unsigned &IdxToPropagate) { + bool PropagateNaN; + switch (MI.getOpcode()) { + default: + return false; + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: + PropagateNaN = false; + break; + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMAXIMUM: + PropagateNaN = true; + break; + } + + auto MatchNaN = [&](unsigned Idx) { + Register MaybeNaNReg = MI.getOperand(Idx).getReg(); + const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI); + if (!MaybeCst || !MaybeCst->getValueAPF().isNaN()) + return false; + IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1); + return true; + }; + + return MatchNaN(1) || MatchNaN(2); +} + +bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) { + assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD"); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + // Helper lambda to check for opportunities for + // A + (B - A) -> B + // (B - A) + A -> B + auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) { + Register Reg; + return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) && + Reg == MaybeSameReg; + }; + return CheckFold(LHS, RHS) || CheckFold(RHS, LHS); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 64c2f0d5f8e4..4f03af0fce82 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -567,6 +567,26 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown); break; } + case TargetOpcode::G_UADDO: + case TargetOpcode::G_UADDE: + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SADDE: + case TargetOpcode::G_USUBO: + case TargetOpcode::G_USUBE: + case TargetOpcode::G_SSUBO: + case TargetOpcode::G_SSUBE: + case TargetOpcode::G_UMULO: + case TargetOpcode::G_SMULO: { + if (MI.getOperand(1).getReg() == R) { + // If we know the result of a compare has the top bits zero, use this + // info. + if (TL.getBooleanContents(DstTy.isVector(), false) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + Known.Zero.setBitsFrom(1); + } + break; + } } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); @@ -673,6 +693,27 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, MI.getOperand(3).getReg(), DemandedElts, Depth + 1); } + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SADDE: + case TargetOpcode::G_UADDO: + case TargetOpcode::G_UADDE: + case TargetOpcode::G_SSUBO: + case TargetOpcode::G_SSUBE: + case TargetOpcode::G_USUBO: + case TargetOpcode::G_USUBE: + case TargetOpcode::G_SMULO: + case TargetOpcode::G_UMULO: { + // If compares returns 0/-1, all bits are sign bits. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (MI.getOperand(1).getReg() == R) { + if (TL.getBooleanContents(DstTy.isVector(), false) == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return TyBits; + } + + break; + } case TargetOpcode::G_INTRINSIC: case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: default: { diff --git a/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp index 252b931602c6..efcc40641ea8 100644 --- a/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/InitializePasses.h" -#include "llvm/PassRegistry.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 6d415c9c7f90..a2af66d28f4a 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -16,10 +16,11 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" +#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" @@ -47,7 +48,6 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" @@ -78,7 +78,6 @@ #include "llvm/Transforms/Utils/MemoryOpRemark.h" #include #include -#include #include #include #include @@ -1818,7 +1817,7 @@ static unsigned getConstrainedOpcode(Intrinsic::ID ID) { bool IRTranslator::translateConstrainedFPIntrinsic( const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) { - fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue(); + fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID()); if (!Opcode) @@ -2252,6 +2251,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0}; return CLI->lowerCall(MIRBuilder, Info); } + case Intrinsic::fptrunc_round: { + unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI); + + // Convert the metadata argument to a constant integer + Metadata *MD = cast(CI.getArgOperand(1))->getMetadata(); + Optional RoundMode = + convertStrToRoundingMode(cast(MD)->getString()); + + // Add the Rounding mode as an integer + MIRBuilder + .buildInstr(TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND, + {getOrCreateVReg(CI)}, + {getOrCreateVReg(*CI.getArgOperand(0))}, Flags) + .addImm((int)*RoundMode); + + return true; + } #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" @@ -2409,7 +2425,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { TargetLowering::IntrinsicInfo Info; // TODO: Add a GlobalISel version of getTgtMemIntrinsic. if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { - Align Alignment = Info.align.getValueOr( + Align Alignment = Info.align.value_or( DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext()))); LLT MemTy = Info.memVT.isSimple() ? getLLTForMVT(Info.memVT.getSimpleVT()) @@ -2934,15 +2950,6 @@ void IRTranslator::finishPendingPhis() { } } -bool IRTranslator::valueIsSplit(const Value &V, - SmallVectorImpl *Offsets) { - SmallVector SplitTys; - if (Offsets && !Offsets->empty()) - Offsets->clear(); - computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets); - return SplitTys.size() > 1; -} - bool IRTranslator::translate(const Instruction &Inst) { CurBuilder->setDebugLoc(Inst.getDebugLoc()); @@ -2984,7 +2991,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { // Return the scalar if it is a <1 x Ty> vector. unsigned NumElts = CAZ->getElementCount().getFixedValue(); if (NumElts == 1) - return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder.get()); + return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder); SmallVector Ops; for (unsigned I = 0; I < NumElts; ++I) { Constant &Elt = *CAZ->getElementValue(I); @@ -2994,8 +3001,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { } else if (auto CV = dyn_cast(&C)) { // Return the scalar if it is a <1 x Ty> vector. if (CV->getNumElements() == 1) - return translateCopy(C, *CV->getElementAsConstant(0), - *EntryBuilder.get()); + return translateCopy(C, *CV->getElementAsConstant(0), *EntryBuilder); SmallVector Ops; for (unsigned i = 0; i < CV->getNumElements(); ++i) { Constant &Elt = *CV->getElementAsConstant(i); @@ -3013,7 +3019,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { } } else if (auto CV = dyn_cast(&C)) { if (CV->getNumOperands() == 1) - return translateCopy(C, *CV->getOperand(0), *EntryBuilder.get()); + return translateCopy(C, *CV->getOperand(0), *EntryBuilder); SmallVector Ops; for (unsigned i = 0; i < CV->getNumOperands(); ++i) { Ops.push_back(getOrCreateVReg(*CV->getOperand(i))); @@ -3255,14 +3261,13 @@ bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor &SPD, return false; } - // On PS4, the "return address" must still be within the calling function, - // even if it's at the very end, so emit an explicit TRAP here. - // Passing 'true' for doesNotReturn above won't generate the trap for us. + // On PS4/PS5, the "return address" must still be within the calling + // function, even if it's at the very end, so emit an explicit TRAP here. // WebAssembly needs an unreachable instruction after a non-returning call, // because the function return type can be different from __stack_chk_fail's // return type (void). const TargetMachine &TM = MF->getTarget(); - if (TM.getTargetTriple().isPS4CPU() || TM.getTargetTriple().isWasm()) { + if (TM.getTargetTriple().isPS() || TM.getTargetTriple().isWasm()) { LLVM_DEBUG(dbgs() << "Unhandled trap emission for stack protector fail\n"); return false; } @@ -3413,7 +3418,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { } } - if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) { + if (!CLI->lowerFormalArguments(*EntryBuilder, F, VRegArgs, FuncInfo)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); R << "unable to lower arguments: " << ore::NV("Prototype", F.getType()); @@ -3469,8 +3474,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { return false; } - if (!finalizeBasicBlock(*BB, MBB)) + if (!finalizeBasicBlock(*BB, MBB)) { + OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", + BB->getTerminator()->getDebugLoc(), BB); + R << "unable to translate basic block"; + reportTranslationError(*MF, *TPC, *ORE, R); return false; + } } #ifndef NDEBUG WrapperObserver.removeObserver(&Verifier); diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index e5f95ca5aa73..95ae8383b6fa 100644 --- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -12,15 +12,10 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #define DEBUG_TYPE "inline-asm-lowering" @@ -150,6 +145,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { case TargetLowering::C_RegisterClass: return 2; case TargetLowering::C_Memory: + case TargetLowering::C_Address: return 3; } llvm_unreachable("Invalid constraint type"); @@ -310,7 +306,7 @@ bool InlineAsmLowering::lowerInlineAsm( // If this is an indirect operand, the operand is a pointer to the // accessed type. if (OpInfo.isIndirect) { - OpTy = Call.getAttributes().getParamElementType(ArgNo); + OpTy = Call.getParamElementType(ArgNo); assert(OpTy && "Indirect operand must have elementtype attribute"); } @@ -649,6 +645,8 @@ bool InlineAsmLowering::lowerInlineAsm( return false; case TargetLowering::C_Memory: break; // Already handled. + case TargetLowering::C_Address: + break; // Silence warning. case TargetLowering::C_Unknown: LLVM_DEBUG(dbgs() << "Unexpected unknown constraint\n"); return false; diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 2bb5addefe48..28f3b425c67d 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -12,8 +12,6 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/ScopeExit.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" @@ -23,14 +21,13 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/config.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/CodeGenCoverage.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 1d0c106fd5db..8959d215ecd1 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -13,16 +13,9 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/Utils.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include #define DEBUG_TYPE "instructionselector" @@ -66,6 +59,10 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI, std::next(MI.getIterator()) == IntoMI.getIterator()) return true; + // Convergent instructions cannot be moved in the CFG. + if (MI.isConvergent() && MI.getParent() != IntoMI.getParent()) + return false; + return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() && !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty(); } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 1f0738a8d9d2..54a82cac95d5 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -188,6 +188,13 @@ LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { }; } +LegalityPredicate LegalityPredicates::memSizeNotByteSizePow2(unsigned MMOIdx) { + return [=](const LegalityQuery &Query) { + const LLT MemTy = Query.MMODescrs[MMOIdx].MemoryTy; + return !MemTy.isByteSized() || !isPowerOf2_32(MemTy.getSizeInBytes()); + }; +} + LegalityPredicate LegalityPredicates::numElementsNotPow2(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp index 75b7fcb5663a..25c1db91b05d 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -43,6 +43,27 @@ LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx, }; } +LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx, + unsigned FromTypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT OldTy = Query.Types[TypeIdx]; + const LLT NewTy = Query.Types[FromTypeIdx]; + ElementCount NewEltCount = + NewTy.isVector() ? NewTy.getElementCount() : ElementCount::getFixed(1); + return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount)); + }; +} + +LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx, + LLT NewEltTy) { + return [=](const LegalityQuery &Query) { + const LLT OldTy = Query.Types[TypeIdx]; + ElementCount NewEltCount = NewEltTy.isVector() ? NewEltTy.getElementCount() + : ElementCount::getFixed(1); + return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount)); + }; +} + LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx) { return [=](const LegalityQuery &Query) { diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index 0ab4a7f64840..f09e5b7ce783 100644 --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -14,7 +14,7 @@ #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" @@ -24,15 +24,11 @@ #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" -#include "llvm/Target/TargetMachine.h" - -#include #define DEBUG_TYPE "legalizer" diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 37bc8a65dc7c..fb046d519ac8 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -15,10 +15,13 @@ #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -1611,40 +1614,6 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, return Legalized; } -Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) { - Register WideReg = MRI.createGenericVirtualRegister(WideTy); - LLT OrigTy = MRI.getType(OrigReg); - LLT LCMTy = getLCMType(WideTy, OrigTy); - - const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits(); - const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits(); - - Register UnmergeSrc = WideReg; - - // Create a merge to the LCM type, padding with undef - // %0:_(<3 x s32>) = G_FOO => <4 x s32> - // => - // %1:_(<4 x s32>) = G_FOO - // %2:_(<4 x s32>) = G_IMPLICIT_DEF - // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2 - // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3 - if (NumMergeParts > 1) { - Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0); - SmallVector MergeParts(NumMergeParts, Undef); - MergeParts[0] = WideReg; - UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0); - } - - // Unmerge to the original register and pad with dead defs. - SmallVector UnmergeResults(NumUnmergeParts); - UnmergeResults[0] = OrigReg; - for (int I = 1; I != NumUnmergeParts; ++I) - UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy); - - MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc); - return WideReg; -} - LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { @@ -1867,9 +1836,6 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { - if (TypeIdx == 1) - return UnableToLegalize; // TODO - unsigned Opcode; unsigned ExtOpcode; Optional CarryIn = None; @@ -1914,6 +1880,18 @@ LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx, break; } + if (TypeIdx == 1) { + unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false); + + Observer.changingInstr(MI); + widenScalarDst(MI, WideTy, 1); + if (CarryIn) + widenScalarSrc(MI, WideTy, 4, BoolExtOp); + + Observer.changedInstr(MI); + return Legalized; + } + auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)}); auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)}); // Do the arithmetic in the larger type. @@ -1985,8 +1963,12 @@ LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { - if (TypeIdx == 1) - return UnableToLegalize; + if (TypeIdx == 1) { + Observer.changingInstr(MI); + widenScalarDst(MI, WideTy, 1); + Observer.changedInstr(MI); + return Legalized; + } bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO; Register Result = MI.getOperand(0).getReg(); @@ -2992,7 +2974,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { if (isa(LoadMI)) { auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO); MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits); - } else if (isa(LoadMI) || WideMemTy == DstTy) { + } else if (isa(LoadMI) || WideMemTy == LoadTy) { auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO); // The extra bits are guaranteed to be zero, since we stored them that // way. A zext load from Wide thus automatically gives zext from MemVT. @@ -3314,7 +3296,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { Observer.changingInstr(MI); const auto &TII = MIRBuilder.getTII(); MI.setDesc(TII.get(TargetOpcode::G_MUL)); - MI.RemoveOperand(1); + MI.removeOperand(1); Observer.changedInstr(MI); auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS}); @@ -4096,13 +4078,14 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx, // is a load, return the new registers in ValRegs. For a store, each elements // of ValRegs should be PartTy. Returns the next offset that needs to be // handled. + bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian(); auto MMO = LdStMI.getMMO(); auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl &ValRegs, - unsigned Offset) -> unsigned { + unsigned NumParts, unsigned Offset) -> unsigned { MachineFunction &MF = MIRBuilder.getMF(); unsigned PartSize = PartTy.getSizeInBits(); for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; - Offset += PartSize, ++Idx) { + ++Idx) { unsigned ByteOffset = Offset / 8; Register NewAddrReg; @@ -4118,16 +4101,19 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx, } else { MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); } + Offset = isBigEndian ? Offset - PartSize : Offset + PartSize; } return Offset; }; - unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); + unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0; + unsigned HandledOffset = + splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset); // Handle the rest of the register if this isn't an even type breakdown. if (LeftoverTy.isValid()) - splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); + splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset); if (IsLoad) { insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, @@ -4236,6 +4222,14 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_INTTOPTR: case G_PTRTOINT: case G_ADDRSPACE_CAST: + case G_UADDO: + case G_USUBO: + case G_UADDE: + case G_USUBE: + case G_SADDO: + case G_SSUBO: + case G_SADDE: + case G_SSUBE: return fewerElementsVectorMultiEltType(GMI, NumElts); case G_ICMP: case G_FCMP: @@ -4882,10 +4876,26 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_SELECT: - if (TypeIdx != 0) - return UnableToLegalize; - if (MRI.getType(MI.getOperand(1).getReg()).isVector()) + case TargetOpcode::G_SELECT: { + Register DstReg = MI.getOperand(0).getReg(); + Register CondReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT CondTy = MRI.getType(CondReg); + if (TypeIdx == 1) { + if (!CondTy.isScalar() || + DstTy.getElementCount() != MoreTy.getElementCount()) + return UnableToLegalize; + + // This is turning a scalar select of vectors into a vector + // select. Broadcast the select condition. + auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(ShufSplat.getReg(0)); + Observer.changedInstr(MI); + return Legalized; + } + + if (CondTy.isVector()) return UnableToLegalize; Observer.changingInstr(MI); @@ -4894,6 +4904,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; + } case TargetOpcode::G_UNMERGE_VALUES: return UnableToLegalize; case TargetOpcode::G_PHI: @@ -7229,25 +7240,32 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { Register Op2Reg = MI.getOperand(3).getReg(); LLT DstTy = MRI.getType(DstReg); LLT MaskTy = MRI.getType(MaskReg); - LLT Op1Ty = MRI.getType(Op1Reg); if (!DstTy.isVector()) return UnableToLegalize; - // Vector selects can have a scalar predicate. If so, splat into a vector and - // finish for later legalization attempts to try again. if (MaskTy.isScalar()) { + // Turn the scalar condition into a vector condition mask. + Register MaskElt = MaskReg; - if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits()) - MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0); - // Generate a vector splat idiom to be pattern matched later. + + // The condition was potentially zero extended before, but we want a sign + // extended boolean. + if (MaskTy.getSizeInBits() <= DstTy.getScalarSizeInBits() && + MaskTy != LLT::scalar(1)) { + MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0); + } + + // Continue the sign extension (or truncate) to match the data type. + MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(), + MaskElt).getReg(0); + + // Generate a vector splat idiom. auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt); - Observer.changingInstr(MI); - MI.getOperand(1).setReg(ShufSplat.getReg(0)); - Observer.changedInstr(MI); - return Legalized; + MaskReg = ShufSplat.getReg(0); + MaskTy = DstTy; } - if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) { + if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) { return UnableToLegalize; } @@ -7414,7 +7432,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { unsigned NumBits = Ty.getScalarSizeInBits(); auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI); if (!Ty.isVector() && ValVRegAndVal) { - APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8); + APInt Scalar = ValVRegAndVal->Value.trunc(8); APInt SplatVal = APInt::getSplat(NumBits, Scalar); return MIB.buildConstant(Ty, SplatVal).getReg(0); } @@ -7569,7 +7587,7 @@ LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) { // See if this is a constant length copy auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI); // FIXME: support dynamically sized G_MEMCPY_INLINE - assert(LenVRegAndVal.hasValue() && + assert(LenVRegAndVal && "inline memcpy with dynamic size is not yet supported"); uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); if (KnownLen == 0) { @@ -7609,7 +7627,7 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, bool DstAlignCanChange = false; MachineFrameInfo &MFI = MF.getFrameInfo(); - Align Alignment = commonAlignment(DstAlign, SrcAlign); + Align Alignment = std::min(DstAlign, SrcAlign); MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) @@ -7644,7 +7662,7 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->hasStackRealignment(MF)) while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) - NewAlign = NewAlign / 2; + NewAlign = NewAlign.previous(); if (NewAlign > Alignment) { Alignment = NewAlign; @@ -7717,7 +7735,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, bool DstAlignCanChange = false; MachineFrameInfo &MFI = MF.getFrameInfo(); bool OptSize = shouldLowerMemFuncForSize(MF); - Align Alignment = commonAlignment(DstAlign, SrcAlign); + Align Alignment = std::min(DstAlign, SrcAlign); MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) @@ -7752,7 +7770,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->hasStackRealignment(MF)) while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) - NewAlign = NewAlign / 2; + NewAlign = NewAlign.previous(); if (NewAlign > Alignment) { Alignment = NewAlign; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 30697913a6a4..6adb7ddb5b66 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -13,7 +13,6 @@ #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -23,9 +22,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Support/MathExtras.h" #include -#include using namespace llvm; using namespace LegalizeActions; @@ -132,15 +129,16 @@ static bool mutationIsSane(const LegalizeRule &Rule, LLVM_FALLTHROUGH; case MoreElements: { // MoreElements can go from scalar to vector. - const unsigned OldElts = OldTy.isVector() ? OldTy.getNumElements() : 1; + const ElementCount OldElts = OldTy.isVector() ? + OldTy.getElementCount() : ElementCount::getFixed(1); if (NewTy.isVector()) { if (Rule.getAction() == FewerElements) { // Make sure the element count really decreased. - if (NewTy.getNumElements() >= OldElts) + if (ElementCount::isKnownGE(NewTy.getElementCount(), OldElts)) return false; } else { // Make sure the element count really increased. - if (NewTy.getNumElements() <= OldElts) + if (ElementCount::isKnownLE(NewTy.getElementCount(), OldElts)) return false; } } else if (Rule.getAction() == MoreElements) diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index de8dbd456901..d4fbf7d15089 100644 --- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -73,6 +73,7 @@ void LoadStoreOpt::init(MachineFunction &MF) { void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.setPreservesAll(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -508,6 +509,12 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI, if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits()) return false; + // Avoid adding volatile or ordered stores to the candidate. We already have a + // check for this in instMayAlias() but that only get's called later between + // potential aliasing hazards. + if (!StoreMI.isSimple()) + return false; + Register StoreAddr = StoreMI.getPointerReg(); auto BIO = getPointerInfo(StoreAddr, *MRI); Register StoreBase = BIO.BaseReg; diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index 328a278f3d68..c1287693e74d 100644 --- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index c6720568b362..19ebf46191a9 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -9,8 +9,6 @@ /// This file implements the MachineIRBuidler class. //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/Analysis/MemoryLocation.h" -#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -19,7 +17,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" using namespace llvm; @@ -568,47 +566,6 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(const DstOp &Dst, return Extract; } -void MachineIRBuilder::buildSequence(Register Res, ArrayRef Ops, - ArrayRef Indices) { -#ifndef NDEBUG - assert(Ops.size() == Indices.size() && "incompatible args"); - assert(!Ops.empty() && "invalid trivial sequence"); - assert(llvm::is_sorted(Indices) && - "sequence offsets must be in ascending order"); - - assert(getMRI()->getType(Res).isValid() && "invalid operand type"); - for (auto Op : Ops) - assert(getMRI()->getType(Op).isValid() && "invalid operand type"); -#endif - - LLT ResTy = getMRI()->getType(Res); - LLT OpTy = getMRI()->getType(Ops[0]); - unsigned OpSize = OpTy.getSizeInBits(); - bool MaybeMerge = true; - for (unsigned i = 0; i < Ops.size(); ++i) { - if (getMRI()->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) { - MaybeMerge = false; - break; - } - } - - if (MaybeMerge && Ops.size() * OpSize == ResTy.getSizeInBits()) { - buildMerge(Res, Ops); - return; - } - - Register ResIn = getMRI()->createGenericVirtualRegister(ResTy); - buildUndef(ResIn); - - for (unsigned i = 0; i < Ops.size(); ++i) { - Register ResOut = i + 1 == Ops.size() - ? Res - : getMRI()->createGenericVirtualRegister(ResTy); - buildInsert(ResOut, ResIn, Ops[i], Indices[i]); - ResIn = ResOut; - } -} - MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) { return buildInstr(TargetOpcode::G_IMPLICIT_DEF, {Res}, {}); } @@ -666,6 +623,17 @@ MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res, return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } +MachineInstrBuilder +MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res, + ArrayRef Ops) { + SmallVector TmpVec; + TmpVec.reserve(Ops.size()); + LLT EltTy = Res.getLLTTy(*getMRI()).getElementType(); + for (auto &Op : Ops) + TmpVec.push_back(buildConstant(EltTy, Op)); + return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); +} + MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, const SrcOp &Src) { SmallVector TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src); diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 01af6bb51bb7..bce850ee212c 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -14,8 +14,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -25,12 +23,13 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -631,7 +630,8 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) { "Unexpected hint opcode!"); // The only correct mapping for these is to always use the source register // bank. - const RegisterBank *RB = MRI->getRegBankOrNull(MI.getOperand(1).getReg()); + const RegisterBank *RB = + RBI->getRegBank(MI.getOperand(1).getReg(), *MRI, *TRI); // We can assume every instruction above this one has a selected register // bank. assert(RB && "Expected source register to have a register bank?"); diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp deleted file mode 100644 index 5c4d18ad79c5..000000000000 --- a/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ /dev/null @@ -1,110 +0,0 @@ -//===- llvm/CodeGen/GlobalISel/RegisterBank.cpp - Register Bank --*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements the RegisterBank class. -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "registerbank" - -using namespace llvm; - -const unsigned RegisterBank::InvalidID = UINT_MAX; - -RegisterBank::RegisterBank( - unsigned ID, const char *Name, unsigned Size, - const uint32_t *CoveredClasses, unsigned NumRegClasses) - : ID(ID), Name(Name), Size(Size) { - ContainedRegClasses.resize(NumRegClasses); - ContainedRegClasses.setBitsInMask(CoveredClasses); -} - -bool RegisterBank::verify(const TargetRegisterInfo &TRI) const { - assert(isValid() && "Invalid register bank"); - for (unsigned RCId = 0, End = TRI.getNumRegClasses(); RCId != End; ++RCId) { - const TargetRegisterClass &RC = *TRI.getRegClass(RCId); - - if (!covers(RC)) - continue; - // Verify that the register bank covers all the sub classes of the - // classes it covers. - - // Use a different (slow in that case) method than - // RegisterBankInfo to find the subclasses of RC, to make sure - // both agree on the covers. - for (unsigned SubRCId = 0; SubRCId != End; ++SubRCId) { - const TargetRegisterClass &SubRC = *TRI.getRegClass(RCId); - - if (!RC.hasSubClassEq(&SubRC)) - continue; - - // Verify that the Size of the register bank is big enough to cover - // all the register classes it covers. - assert(getSize() >= TRI.getRegSizeInBits(SubRC) && - "Size is not big enough for all the subclasses!"); - assert(covers(SubRC) && "Not all subclasses are covered"); - } - } - return true; -} - -bool RegisterBank::covers(const TargetRegisterClass &RC) const { - assert(isValid() && "RB hasn't been initialized yet"); - return ContainedRegClasses.test(RC.getID()); -} - -bool RegisterBank::isValid() const { - return ID != InvalidID && Name != nullptr && Size != 0 && - // A register bank that does not cover anything is useless. - !ContainedRegClasses.empty(); -} - -bool RegisterBank::operator==(const RegisterBank &OtherRB) const { - // There must be only one instance of a given register bank alive - // for the whole compilation. - // The RegisterBankInfo is supposed to enforce that. - assert((OtherRB.getID() != getID() || &OtherRB == this) && - "ID does not uniquely identify a RegisterBank"); - return &OtherRB == this; -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void RegisterBank::dump(const TargetRegisterInfo *TRI) const { - print(dbgs(), /* IsForDebug */ true, TRI); -} -#endif - -void RegisterBank::print(raw_ostream &OS, bool IsForDebug, - const TargetRegisterInfo *TRI) const { - OS << getName(); - if (!IsForDebug) - return; - OS << "(ID:" << getID() << ", Size:" << getSize() << ")\n" - << "isValid:" << isValid() << '\n' - << "Number of Covered register classes: " << ContainedRegClasses.count() - << '\n'; - // Print all the subclasses if we can. - // This register classes may not be properly initialized yet. - if (!TRI || ContainedRegClasses.empty()) - return; - assert(ContainedRegClasses.size() == TRI->getNumRegClasses() && - "TRI does not match the initialization process?"); - OS << "Covered register classes:\n"; - ListSeparator LS; - for (unsigned RCId = 0, End = TRI->getNumRegClasses(); RCId != End; ++RCId) { - const TargetRegisterClass &RC = *TRI->getRegClass(RCId); - - if (covers(RC)) - OS << LS << TRI->getRegClassName(&RC); - } -} diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp deleted file mode 100644 index 650500c7eb31..000000000000 --- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ /dev/null @@ -1,805 +0,0 @@ -//===- llvm/CodeGen/GlobalISel/RegisterBankInfo.cpp --------------*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements the RegisterBankInfo class. -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/IR/Type.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -#include // For std::max. - -#define DEBUG_TYPE "registerbankinfo" - -using namespace llvm; - -STATISTIC(NumPartialMappingsCreated, - "Number of partial mappings dynamically created"); -STATISTIC(NumPartialMappingsAccessed, - "Number of partial mappings dynamically accessed"); -STATISTIC(NumValueMappingsCreated, - "Number of value mappings dynamically created"); -STATISTIC(NumValueMappingsAccessed, - "Number of value mappings dynamically accessed"); -STATISTIC(NumOperandsMappingsCreated, - "Number of operands mappings dynamically created"); -STATISTIC(NumOperandsMappingsAccessed, - "Number of operands mappings dynamically accessed"); -STATISTIC(NumInstructionMappingsCreated, - "Number of instruction mappings dynamically created"); -STATISTIC(NumInstructionMappingsAccessed, - "Number of instruction mappings dynamically accessed"); - -const unsigned RegisterBankInfo::DefaultMappingID = UINT_MAX; -const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1; - -//------------------------------------------------------------------------------ -// RegisterBankInfo implementation. -//------------------------------------------------------------------------------ -RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks, - unsigned NumRegBanks) - : RegBanks(RegBanks), NumRegBanks(NumRegBanks) { -#ifndef NDEBUG - for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) { - assert(RegBanks[Idx] != nullptr && "Invalid RegisterBank"); - assert(RegBanks[Idx]->isValid() && "RegisterBank should be valid"); - } -#endif // NDEBUG -} - -bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const { -#ifndef NDEBUG - for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) { - const RegisterBank &RegBank = getRegBank(Idx); - assert(Idx == RegBank.getID() && - "ID does not match the index in the array"); - LLVM_DEBUG(dbgs() << "Verify " << RegBank << '\n'); - assert(RegBank.verify(TRI) && "RegBank is invalid"); - } -#endif // NDEBUG - return true; -} - -const RegisterBank * -RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI) const { - if (Register::isPhysicalRegister(Reg)) { - // FIXME: This was probably a copy to a virtual register that does have a - // type we could use. - return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI), LLT()); - } - - assert(Reg && "NoRegister does not have a register bank"); - const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); - if (auto *RB = RegClassOrBank.dyn_cast()) - return RB; - if (auto *RC = RegClassOrBank.dyn_cast()) - return &getRegBankFromRegClass(*RC, MRI.getType(Reg)); - return nullptr; -} - -const TargetRegisterClass & -RegisterBankInfo::getMinimalPhysRegClass(Register Reg, - const TargetRegisterInfo &TRI) const { - assert(Register::isPhysicalRegister(Reg) && "Reg must be a physreg"); - const auto &RegRCIt = PhysRegMinimalRCs.find(Reg); - if (RegRCIt != PhysRegMinimalRCs.end()) - return *RegRCIt->second; - const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClass(Reg); - PhysRegMinimalRCs[Reg] = PhysRC; - return *PhysRC; -} - -const RegisterBank *RegisterBankInfo::getRegBankFromConstraints( - const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII, - const MachineRegisterInfo &MRI) const { - const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); - - // The mapping of the registers may be available via the - // register class constraints. - const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, TRI); - - if (!RC) - return nullptr; - - Register Reg = MI.getOperand(OpIdx).getReg(); - const RegisterBank &RegBank = getRegBankFromRegClass(*RC, MRI.getType(Reg)); - // Check that the target properly implemented getRegBankFromRegClass. - assert(RegBank.covers(*RC) && - "The mapping of the register bank does not make sense"); - return &RegBank; -} - -const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister( - Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) { - - // If the register already has a class, fallback to MRI::constrainRegClass. - auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); - if (RegClassOrBank.is()) - return MRI.constrainRegClass(Reg, &RC); - - const RegisterBank *RB = RegClassOrBank.get(); - // Otherwise, all we can do is ensure the bank covers the class, and set it. - if (RB && !RB->covers(RC)) - return nullptr; - - // If nothing was set or the class is simply compatible, set it. - MRI.setRegClass(Reg, &RC); - return &RC; -} - -/// Check whether or not \p MI should be treated like a copy -/// for the mappings. -/// Copy like instruction are special for mapping because -/// they don't have actual register constraints. Moreover, -/// they sometimes have register classes assigned and we can -/// just use that instead of failing to provide a generic mapping. -static bool isCopyLike(const MachineInstr &MI) { - return MI.isCopy() || MI.isPHI() || - MI.getOpcode() == TargetOpcode::REG_SEQUENCE; -} - -const RegisterBankInfo::InstructionMapping & -RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { - // For copies we want to walk over the operands and try to find one - // that has a register bank since the instruction itself will not get - // us any constraint. - bool IsCopyLike = isCopyLike(MI); - // For copy like instruction, only the mapping of the definition - // is important. The rest is not constrained. - unsigned NumOperandsForMapping = IsCopyLike ? 1 : MI.getNumOperands(); - - const MachineFunction &MF = *MI.getMF(); - const TargetSubtargetInfo &STI = MF.getSubtarget(); - const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - // We may need to query the instruction encoding to guess the mapping. - const TargetInstrInfo &TII = *STI.getInstrInfo(); - - // Before doing anything complicated check if the mapping is not - // directly available. - bool CompleteMapping = true; - - SmallVector OperandsMapping(NumOperandsForMapping); - for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx; - ++OpIdx) { - const MachineOperand &MO = MI.getOperand(OpIdx); - if (!MO.isReg()) - continue; - Register Reg = MO.getReg(); - if (!Reg) - continue; - // The register bank of Reg is just a side effect of the current - // excution and in particular, there is no reason to believe this - // is the best default mapping for the current instruction. Keep - // it as an alternative register bank if we cannot figure out - // something. - const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI); - // For copy-like instruction, we want to reuse the register bank - // that is already set on Reg, if any, since those instructions do - // not have any constraints. - const RegisterBank *CurRegBank = IsCopyLike ? AltRegBank : nullptr; - if (!CurRegBank) { - // If this is a target specific instruction, we can deduce - // the register bank from the encoding constraints. - CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, MRI); - if (!CurRegBank) { - // All our attempts failed, give up. - CompleteMapping = false; - - if (!IsCopyLike) - // MI does not carry enough information to guess the mapping. - return getInvalidInstructionMapping(); - continue; - } - } - - unsigned Size = getSizeInBits(Reg, MRI, TRI); - const ValueMapping *ValMapping = &getValueMapping(0, Size, *CurRegBank); - if (IsCopyLike) { - if (!OperandsMapping[0]) { - if (MI.isRegSequence()) { - // For reg_sequence, the result size does not match the input. - unsigned ResultSize = getSizeInBits(MI.getOperand(0).getReg(), - MRI, TRI); - OperandsMapping[0] = &getValueMapping(0, ResultSize, *CurRegBank); - } else { - OperandsMapping[0] = ValMapping; - } - } - - // The default handling assumes any register bank can be copied to any - // other. If this isn't the case, the target should specially deal with - // reg_sequence/phi. There may also be unsatisfiable copies. - for (; OpIdx != EndIdx; ++OpIdx) { - const MachineOperand &MO = MI.getOperand(OpIdx); - if (!MO.isReg()) - continue; - Register Reg = MO.getReg(); - if (!Reg) - continue; - - const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI); - if (AltRegBank && - cannotCopy(*CurRegBank, *AltRegBank, getSizeInBits(Reg, MRI, TRI))) - return getInvalidInstructionMapping(); - } - - CompleteMapping = true; - break; - } - - OperandsMapping[OpIdx] = ValMapping; - } - - if (IsCopyLike && !CompleteMapping) { - // No way to deduce the type from what we have. - return getInvalidInstructionMapping(); - } - - assert(CompleteMapping && "Setting an uncomplete mapping"); - return getInstructionMapping( - DefaultMappingID, /*Cost*/ 1, - /*OperandsMapping*/ getOperandsMapping(OperandsMapping), - NumOperandsForMapping); -} - -/// Hashing function for PartialMapping. -static hash_code hashPartialMapping(unsigned StartIdx, unsigned Length, - const RegisterBank *RegBank) { - return hash_combine(StartIdx, Length, RegBank ? RegBank->getID() : 0); -} - -/// Overloaded version of hash_value for a PartialMapping. -hash_code -llvm::hash_value(const RegisterBankInfo::PartialMapping &PartMapping) { - return hashPartialMapping(PartMapping.StartIdx, PartMapping.Length, - PartMapping.RegBank); -} - -const RegisterBankInfo::PartialMapping & -RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length, - const RegisterBank &RegBank) const { - ++NumPartialMappingsAccessed; - - hash_code Hash = hashPartialMapping(StartIdx, Length, &RegBank); - const auto &It = MapOfPartialMappings.find(Hash); - if (It != MapOfPartialMappings.end()) - return *It->second; - - ++NumPartialMappingsCreated; - - auto &PartMapping = MapOfPartialMappings[Hash]; - PartMapping = std::make_unique(StartIdx, Length, RegBank); - return *PartMapping; -} - -const RegisterBankInfo::ValueMapping & -RegisterBankInfo::getValueMapping(unsigned StartIdx, unsigned Length, - const RegisterBank &RegBank) const { - return getValueMapping(&getPartialMapping(StartIdx, Length, RegBank), 1); -} - -static hash_code -hashValueMapping(const RegisterBankInfo::PartialMapping *BreakDown, - unsigned NumBreakDowns) { - if (LLVM_LIKELY(NumBreakDowns == 1)) - return hash_value(*BreakDown); - SmallVector Hashes(NumBreakDowns); - for (unsigned Idx = 0; Idx != NumBreakDowns; ++Idx) - Hashes.push_back(hash_value(BreakDown[Idx])); - return hash_combine_range(Hashes.begin(), Hashes.end()); -} - -const RegisterBankInfo::ValueMapping & -RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown, - unsigned NumBreakDowns) const { - ++NumValueMappingsAccessed; - - hash_code Hash = hashValueMapping(BreakDown, NumBreakDowns); - const auto &It = MapOfValueMappings.find(Hash); - if (It != MapOfValueMappings.end()) - return *It->second; - - ++NumValueMappingsCreated; - - auto &ValMapping = MapOfValueMappings[Hash]; - ValMapping = std::make_unique(BreakDown, NumBreakDowns); - return *ValMapping; -} - -template -const RegisterBankInfo::ValueMapping * -RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const { - - ++NumOperandsMappingsAccessed; - - // The addresses of the value mapping are unique. - // Therefore, we can use them directly to hash the operand mapping. - hash_code Hash = hash_combine_range(Begin, End); - auto &Res = MapOfOperandsMappings[Hash]; - if (Res) - return Res.get(); - - ++NumOperandsMappingsCreated; - - // Create the array of ValueMapping. - // Note: this array will not hash to this instance of operands - // mapping, because we use the pointer of the ValueMapping - // to hash and we expect them to uniquely identify an instance - // of value mapping. - Res = std::make_unique(std::distance(Begin, End)); - unsigned Idx = 0; - for (Iterator It = Begin; It != End; ++It, ++Idx) { - const ValueMapping *ValMap = *It; - if (!ValMap) - continue; - Res[Idx] = *ValMap; - } - return Res.get(); -} - -const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping( - const SmallVectorImpl &OpdsMapping) - const { - return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end()); -} - -const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping( - std::initializer_list OpdsMapping) - const { - return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end()); -} - -static hash_code -hashInstructionMapping(unsigned ID, unsigned Cost, - const RegisterBankInfo::ValueMapping *OperandsMapping, - unsigned NumOperands) { - return hash_combine(ID, Cost, OperandsMapping, NumOperands); -} - -const RegisterBankInfo::InstructionMapping & -RegisterBankInfo::getInstructionMappingImpl( - bool IsInvalid, unsigned ID, unsigned Cost, - const RegisterBankInfo::ValueMapping *OperandsMapping, - unsigned NumOperands) const { - assert(((IsInvalid && ID == InvalidMappingID && Cost == 0 && - OperandsMapping == nullptr && NumOperands == 0) || - !IsInvalid) && - "Mismatch argument for invalid input"); - ++NumInstructionMappingsAccessed; - - hash_code Hash = - hashInstructionMapping(ID, Cost, OperandsMapping, NumOperands); - const auto &It = MapOfInstructionMappings.find(Hash); - if (It != MapOfInstructionMappings.end()) - return *It->second; - - ++NumInstructionMappingsCreated; - - auto &InstrMapping = MapOfInstructionMappings[Hash]; - InstrMapping = std::make_unique( - ID, Cost, OperandsMapping, NumOperands); - return *InstrMapping; -} - -const RegisterBankInfo::InstructionMapping & -RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { - const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI); - if (Mapping.isValid()) - return Mapping; - llvm_unreachable("The target must implement this"); -} - -RegisterBankInfo::InstructionMappings -RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const { - InstructionMappings PossibleMappings; - const auto &Mapping = getInstrMapping(MI); - if (Mapping.isValid()) { - // Put the default mapping first. - PossibleMappings.push_back(&Mapping); - } - - // Then the alternative mapping, if any. - InstructionMappings AltMappings = getInstrAlternativeMappings(MI); - append_range(PossibleMappings, AltMappings); -#ifndef NDEBUG - for (const InstructionMapping *Mapping : PossibleMappings) - assert(Mapping->verify(MI) && "Mapping is invalid"); -#endif - return PossibleMappings; -} - -RegisterBankInfo::InstructionMappings -RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const { - // No alternative for MI. - return InstructionMappings(); -} - -void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { - MachineInstr &MI = OpdMapper.getMI(); - MachineRegisterInfo &MRI = OpdMapper.getMRI(); - LLVM_DEBUG(dbgs() << "Applying default-like mapping\n"); - for (unsigned OpIdx = 0, - EndIdx = OpdMapper.getInstrMapping().getNumOperands(); - OpIdx != EndIdx; ++OpIdx) { - LLVM_DEBUG(dbgs() << "OpIdx " << OpIdx); - MachineOperand &MO = MI.getOperand(OpIdx); - if (!MO.isReg()) { - LLVM_DEBUG(dbgs() << " is not a register, nothing to be done\n"); - continue; - } - if (!MO.getReg()) { - LLVM_DEBUG(dbgs() << " is $noreg, nothing to be done\n"); - continue; - } - assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns != - 0 && - "Invalid mapping"); - assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns == - 1 && - "This mapping is too complex for this function"); - iterator_range::const_iterator> NewRegs = - OpdMapper.getVRegs(OpIdx); - if (NewRegs.empty()) { - LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n"); - continue; - } - Register OrigReg = MO.getReg(); - Register NewReg = *NewRegs.begin(); - LLVM_DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr)); - MO.setReg(NewReg); - LLVM_DEBUG(dbgs() << " with " << printReg(NewReg, nullptr)); - - // The OperandsMapper creates plain scalar, we may have to fix that. - // Check if the types match and if not, fix that. - LLT OrigTy = MRI.getType(OrigReg); - LLT NewTy = MRI.getType(NewReg); - if (OrigTy != NewTy) { - // The default mapping is not supposed to change the size of - // the storage. However, right now we don't necessarily bump all - // the types to storage size. For instance, we can consider - // s16 G_AND legal whereas the storage size is going to be 32. - assert(OrigTy.getSizeInBits() <= NewTy.getSizeInBits() && - "Types with difference size cannot be handled by the default " - "mapping"); - LLVM_DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to " - << OrigTy); - MRI.setType(NewReg, OrigTy); - } - LLVM_DEBUG(dbgs() << '\n'); - } -} - -unsigned RegisterBankInfo::getSizeInBits(Register Reg, - const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI) const { - if (Register::isPhysicalRegister(Reg)) { - // The size is not directly available for physical registers. - // Instead, we need to access a register class that contains Reg and - // get the size of that register class. - // Because this is expensive, we'll cache the register class by calling - auto *RC = &getMinimalPhysRegClass(Reg, TRI); - assert(RC && "Expecting Register class"); - return TRI.getRegSizeInBits(*RC); - } - return TRI.getRegSizeInBits(Reg, MRI); -} - -//------------------------------------------------------------------------------ -// Helper classes implementation. -//------------------------------------------------------------------------------ -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void RegisterBankInfo::PartialMapping::dump() const { - print(dbgs()); - dbgs() << '\n'; -} -#endif - -bool RegisterBankInfo::PartialMapping::verify() const { - assert(RegBank && "Register bank not set"); - assert(Length && "Empty mapping"); - assert((StartIdx <= getHighBitIdx()) && "Overflow, switch to APInt?"); - // Check if the minimum width fits into RegBank. - assert(RegBank->getSize() >= Length && "Register bank too small for Mask"); - return true; -} - -void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const { - OS << "[" << StartIdx << ", " << getHighBitIdx() << "], RegBank = "; - if (RegBank) - OS << *RegBank; - else - OS << "nullptr"; -} - -bool RegisterBankInfo::ValueMapping::partsAllUniform() const { - if (NumBreakDowns < 2) - return true; - - const PartialMapping *First = begin(); - for (const PartialMapping *Part = First + 1; Part != end(); ++Part) { - if (Part->Length != First->Length || Part->RegBank != First->RegBank) - return false; - } - - return true; -} - -bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const { - assert(NumBreakDowns && "Value mapped nowhere?!"); - unsigned OrigValueBitWidth = 0; - for (const RegisterBankInfo::PartialMapping &PartMap : *this) { - // Check that each register bank is big enough to hold the partial value: - // this check is done by PartialMapping::verify - assert(PartMap.verify() && "Partial mapping is invalid"); - // The original value should completely be mapped. - // Thus the maximum accessed index + 1 is the size of the original value. - OrigValueBitWidth = - std::max(OrigValueBitWidth, PartMap.getHighBitIdx() + 1); - } - assert(OrigValueBitWidth >= MeaningfulBitWidth && - "Meaningful bits not covered by the mapping"); - APInt ValueMask(OrigValueBitWidth, 0); - for (const RegisterBankInfo::PartialMapping &PartMap : *this) { - // Check that the union of the partial mappings covers the whole value, - // without overlaps. - // The high bit is exclusive in the APInt API, thus getHighBitIdx + 1. - APInt PartMapMask = APInt::getBitsSet(OrigValueBitWidth, PartMap.StartIdx, - PartMap.getHighBitIdx() + 1); - ValueMask ^= PartMapMask; - assert((ValueMask & PartMapMask) == PartMapMask && - "Some partial mappings overlap"); - } - assert(ValueMask.isAllOnes() && "Value is not fully mapped"); - return true; -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void RegisterBankInfo::ValueMapping::dump() const { - print(dbgs()); - dbgs() << '\n'; -} -#endif - -void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const { - OS << "#BreakDown: " << NumBreakDowns << " "; - bool IsFirst = true; - for (const PartialMapping &PartMap : *this) { - if (!IsFirst) - OS << ", "; - OS << '[' << PartMap << ']'; - IsFirst = false; - } -} - -bool RegisterBankInfo::InstructionMapping::verify( - const MachineInstr &MI) const { - // Check that all the register operands are properly mapped. - // Check the constructor invariant. - // For PHI, we only care about mapping the definition. - assert(NumOperands == (isCopyLike(MI) ? 1 : MI.getNumOperands()) && - "NumOperands must match, see constructor"); - assert(MI.getParent() && MI.getMF() && - "MI must be connected to a MachineFunction"); - const MachineFunction &MF = *MI.getMF(); - const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo(); - (void)RBI; - - for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { - const MachineOperand &MO = MI.getOperand(Idx); - if (!MO.isReg()) { - assert(!getOperandMapping(Idx).isValid() && - "We should not care about non-reg mapping"); - continue; - } - Register Reg = MO.getReg(); - if (!Reg) - continue; - assert(getOperandMapping(Idx).isValid() && - "We must have a mapping for reg operands"); - const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx); - (void)MOMapping; - // Register size in bits. - // This size must match what the mapping expects. - assert(MOMapping.verify(RBI->getSizeInBits( - Reg, MF.getRegInfo(), *MF.getSubtarget().getRegisterInfo())) && - "Value mapping is invalid"); - } - return true; -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void RegisterBankInfo::InstructionMapping::dump() const { - print(dbgs()); - dbgs() << '\n'; -} -#endif - -void RegisterBankInfo::InstructionMapping::print(raw_ostream &OS) const { - OS << "ID: " << getID() << " Cost: " << getCost() << " Mapping: "; - - for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { - const ValueMapping &ValMapping = getOperandMapping(OpIdx); - if (OpIdx) - OS << ", "; - OS << "{ Idx: " << OpIdx << " Map: " << ValMapping << '}'; - } -} - -const int RegisterBankInfo::OperandsMapper::DontKnowIdx = -1; - -RegisterBankInfo::OperandsMapper::OperandsMapper( - MachineInstr &MI, const InstructionMapping &InstrMapping, - MachineRegisterInfo &MRI) - : MRI(MRI), MI(MI), InstrMapping(InstrMapping) { - unsigned NumOpds = InstrMapping.getNumOperands(); - OpToNewVRegIdx.resize(NumOpds, OperandsMapper::DontKnowIdx); - assert(InstrMapping.verify(MI) && "Invalid mapping for MI"); -} - -iterator_range::iterator> -RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) { - assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); - unsigned NumPartialVal = - getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns; - int StartIdx = OpToNewVRegIdx[OpIdx]; - - if (StartIdx == OperandsMapper::DontKnowIdx) { - // This is the first time we try to access OpIdx. - // Create the cells that will hold all the partial values at the - // end of the list of NewVReg. - StartIdx = NewVRegs.size(); - OpToNewVRegIdx[OpIdx] = StartIdx; - for (unsigned i = 0; i < NumPartialVal; ++i) - NewVRegs.push_back(0); - } - SmallVectorImpl::iterator End = - getNewVRegsEnd(StartIdx, NumPartialVal); - - return make_range(&NewVRegs[StartIdx], End); -} - -SmallVectorImpl::const_iterator -RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx, - unsigned NumVal) const { - return const_cast(this)->getNewVRegsEnd(StartIdx, NumVal); -} -SmallVectorImpl::iterator -RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx, - unsigned NumVal) { - assert((NewVRegs.size() == StartIdx + NumVal || - NewVRegs.size() > StartIdx + NumVal) && - "NewVRegs too small to contain all the partial mapping"); - return NewVRegs.size() <= StartIdx + NumVal ? NewVRegs.end() - : &NewVRegs[StartIdx + NumVal]; -} - -void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) { - assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); - iterator_range::iterator> NewVRegsForOpIdx = - getVRegsMem(OpIdx); - const ValueMapping &ValMapping = getInstrMapping().getOperandMapping(OpIdx); - const PartialMapping *PartMap = ValMapping.begin(); - for (Register &NewVReg : NewVRegsForOpIdx) { - assert(PartMap != ValMapping.end() && "Out-of-bound access"); - assert(NewVReg == 0 && "Register has already been created"); - // The new registers are always bound to scalar with the right size. - // The actual type has to be set when the target does the mapping - // of the instruction. - // The rationale is that this generic code cannot guess how the - // target plans to split the input type. - NewVReg = MRI.createGenericVirtualRegister(LLT::scalar(PartMap->Length)); - MRI.setRegBank(NewVReg, *PartMap->RegBank); - ++PartMap; - } -} - -void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx, - unsigned PartialMapIdx, - Register NewVReg) { - assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); - assert(getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns > - PartialMapIdx && - "Out-of-bound access for partial mapping"); - // Make sure the memory is initialized for that operand. - (void)getVRegsMem(OpIdx); - assert(NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] == 0 && - "This value is already set"); - NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] = NewVReg; -} - -iterator_range::const_iterator> -RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx, - bool ForDebug) const { - (void)ForDebug; - assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); - int StartIdx = OpToNewVRegIdx[OpIdx]; - - if (StartIdx == OperandsMapper::DontKnowIdx) - return make_range(NewVRegs.end(), NewVRegs.end()); - - unsigned PartMapSize = - getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns; - SmallVectorImpl::const_iterator End = - getNewVRegsEnd(StartIdx, PartMapSize); - iterator_range::const_iterator> Res = - make_range(&NewVRegs[StartIdx], End); -#ifndef NDEBUG - for (Register VReg : Res) - assert((VReg || ForDebug) && "Some registers are uninitialized"); -#endif - return Res; -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void RegisterBankInfo::OperandsMapper::dump() const { - print(dbgs(), true); - dbgs() << '\n'; -} -#endif - -void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS, - bool ForDebug) const { - unsigned NumOpds = getInstrMapping().getNumOperands(); - if (ForDebug) { - OS << "Mapping for " << getMI() << "\nwith " << getInstrMapping() << '\n'; - // Print out the internal state of the index table. - OS << "Populated indices (CellNumber, IndexInNewVRegs): "; - bool IsFirst = true; - for (unsigned Idx = 0; Idx != NumOpds; ++Idx) { - if (OpToNewVRegIdx[Idx] != DontKnowIdx) { - if (!IsFirst) - OS << ", "; - OS << '(' << Idx << ", " << OpToNewVRegIdx[Idx] << ')'; - IsFirst = false; - } - } - OS << '\n'; - } else - OS << "Mapping ID: " << getInstrMapping().getID() << ' '; - - OS << "Operand Mapping: "; - // If we have a function, we can pretty print the name of the registers. - // Otherwise we will print the raw numbers. - const TargetRegisterInfo *TRI = - getMI().getParent() && getMI().getMF() - ? getMI().getMF()->getSubtarget().getRegisterInfo() - : nullptr; - bool IsFirst = true; - for (unsigned Idx = 0; Idx != NumOpds; ++Idx) { - if (OpToNewVRegIdx[Idx] == DontKnowIdx) - continue; - if (!IsFirst) - OS << ", "; - IsFirst = false; - OS << '(' << printReg(getMI().getOperand(Idx).getReg(), TRI) << ", ["; - bool IsFirstNewVReg = true; - for (Register VReg : getVRegs(Idx)) { - if (!IsFirstNewVReg) - OS << ", "; - IsFirstNewVReg = false; - OS << printReg(VReg, TRI); - } - OS << "])"; - } -} diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 544af9a2954f..7781761bc131 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -16,14 +16,14 @@ #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" -#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSizeOpts.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -31,6 +31,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #define DEBUG_TYPE "globalisel-utils" @@ -56,6 +57,11 @@ Register llvm::constrainOperandRegClass( // Assume physical registers are properly constrained. assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); + // Save the old register class to check whether + // the change notifications will be required. + // TODO: A better approach would be to pass + // the observers to constrainRegToClass(). + auto *OldRegClass = MRI.getRegClassOrNull(Reg); Register ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass); // If we created a new virtual register because the class is not compatible // then create a copy between the new and the old register. @@ -81,7 +87,7 @@ Register llvm::constrainOperandRegClass( if (GISelChangeObserver *Observer = MF.getObserver()) { Observer->changedInstr(*RegMO.getParent()); } - } else { + } else if (OldRegClass != MRI.getRegClassOrNull(Reg)) { if (GISelChangeObserver *Observer = MF.getObserver()) { if (!RegMO.isDef()) { MachineInstr *RegDef = MRI.getVRegDef(Reg); @@ -500,6 +506,7 @@ Optional llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1, default: break; case TargetOpcode::G_ADD: + case TargetOpcode::G_PTR_ADD: return C1 + C2; case TargetOpcode::G_AND: return C1 & C2; @@ -533,6 +540,14 @@ Optional llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1, if (!C2.getBoolValue()) break; return C1.srem(C2); + case TargetOpcode::G_SMIN: + return APIntOps::smin(C1, C2); + case TargetOpcode::G_SMAX: + return APIntOps::smax(C1, C2); + case TargetOpcode::G_UMIN: + return APIntOps::umin(C1, C2); + case TargetOpcode::G_UMAX: + return APIntOps::umax(C1, C2); } return None; @@ -592,33 +607,27 @@ Optional llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, return None; } -Register llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, - const Register Op2, - const MachineRegisterInfo &MRI, - MachineIRBuilder &MIB) { +SmallVector +llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, + const Register Op2, + const MachineRegisterInfo &MRI) { auto *SrcVec2 = getOpcodeDef(Op2, MRI); if (!SrcVec2) - return Register(); + return SmallVector(); auto *SrcVec1 = getOpcodeDef(Op1, MRI); if (!SrcVec1) - return Register(); + return SmallVector(); - const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0)); - - SmallVector FoldedElements; + SmallVector FoldedElements; for (unsigned Idx = 0, E = SrcVec1->getNumSources(); Idx < E; ++Idx) { auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx), SrcVec2->getSourceReg(Idx), MRI); if (!MaybeCst) - return Register(); - auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0); - FoldedElements.emplace_back(FoldedCstReg); + return SmallVector(); + FoldedElements.push_back(*MaybeCst); } - // Create the new vector constant. - auto CstVec = - MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements); - return CstVec.getReg(0); + return FoldedElements; } bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, @@ -1061,15 +1070,38 @@ bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI, AllowUndef); } +Optional llvm::getIConstantSplatVal(const Register Reg, + const MachineRegisterInfo &MRI) { + if (auto SplatValAndReg = + getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false)) { + Optional ValAndVReg = + getIConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI); + return ValAndVReg->Value; + } + + return None; +} + +Optional getIConstantSplatVal(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + return getIConstantSplatVal(MI.getOperand(0).getReg(), MRI); +} + Optional -llvm::getBuildVectorConstantSplat(const MachineInstr &MI, - const MachineRegisterInfo &MRI) { +llvm::getIConstantSplatSExtVal(const Register Reg, + const MachineRegisterInfo &MRI) { if (auto SplatValAndReg = - getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, false)) + getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false)) return getIConstantVRegSExtVal(SplatValAndReg->VReg, MRI); return None; } +Optional +llvm::getIConstantSplatSExtVal(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + return getIConstantSplatSExtVal(MI.getOperand(0).getReg(), MRI); +} + Optional llvm::getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef) { @@ -1095,7 +1127,7 @@ Optional llvm::getVectorSplat(const MachineInstr &MI, unsigned Opc = MI.getOpcode(); if (!isBuildVectorOp(Opc)) return None; - if (auto Splat = getBuildVectorConstantSplat(MI, MRI)) + if (auto Splat = getIConstantSplatSExtVal(MI, MRI)) return RegOrConstant(*Splat); auto Reg = MI.getOperand(1).getReg(); if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()), @@ -1104,6 +1136,26 @@ Optional llvm::getVectorSplat(const MachineInstr &MI, return RegOrConstant(Reg); } +static bool isConstantScalar(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + bool AllowFP = true, + bool AllowOpaqueConstants = true) { + switch (MI.getOpcode()) { + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_IMPLICIT_DEF: + return true; + case TargetOpcode::G_FCONSTANT: + return AllowFP; + case TargetOpcode::G_GLOBAL_VALUE: + case TargetOpcode::G_FRAME_INDEX: + case TargetOpcode::G_BLOCK_ADDR: + case TargetOpcode::G_JUMP_TABLE: + return AllowOpaqueConstants; + default: + return false; + } +} + bool llvm::isConstantOrConstantVector(MachineInstr &MI, const MachineRegisterInfo &MRI) { Register Def = MI.getOperand(0).getReg(); @@ -1121,19 +1173,71 @@ bool llvm::isConstantOrConstantVector(MachineInstr &MI, return true; } +bool llvm::isConstantOrConstantVector(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + bool AllowFP, bool AllowOpaqueConstants) { + if (isConstantScalar(MI, MRI, AllowFP, AllowOpaqueConstants)) + return true; + + if (!isBuildVectorOp(MI.getOpcode())) + return false; + + const unsigned NumOps = MI.getNumOperands(); + for (unsigned I = 1; I != NumOps; ++I) { + const MachineInstr *ElementDef = MRI.getVRegDef(MI.getOperand(I).getReg()); + if (!isConstantScalar(*ElementDef, MRI, AllowFP, AllowOpaqueConstants)) + return false; + } + + return true; +} + Optional llvm::isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI) { Register Def = MI.getOperand(0).getReg(); if (auto C = getIConstantVRegValWithLookThrough(Def, MRI)) return C->Value; - auto MaybeCst = getBuildVectorConstantSplat(MI, MRI); + auto MaybeCst = getIConstantSplatSExtVal(MI, MRI); if (!MaybeCst) return None; const unsigned ScalarSize = MRI.getType(Def).getScalarSizeInBits(); return APInt(ScalarSize, *MaybeCst, true); } +bool llvm::isNullOrNullSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, bool AllowUndefs) { + switch (MI.getOpcode()) { + case TargetOpcode::G_IMPLICIT_DEF: + return AllowUndefs; + case TargetOpcode::G_CONSTANT: + return MI.getOperand(1).getCImm()->isNullValue(); + case TargetOpcode::G_FCONSTANT: { + const ConstantFP *FPImm = MI.getOperand(1).getFPImm(); + return FPImm->isZero() && !FPImm->isNegative(); + } + default: + if (!AllowUndefs) // TODO: isBuildVectorAllZeros assumes undef is OK already + return false; + return isBuildVectorAllZeros(MI, MRI); + } +} + +bool llvm::isAllOnesOrAllOnesSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + bool AllowUndefs) { + switch (MI.getOpcode()) { + case TargetOpcode::G_IMPLICIT_DEF: + return AllowUndefs; + case TargetOpcode::G_CONSTANT: + return MI.getOperand(1).getCImm()->isAllOnesValue(); + default: + if (!AllowUndefs) // TODO: isBuildVectorAllOnes assumes undef is OK already + return false; + return isBuildVectorAllOnes(MI, MRI); + } +} + bool llvm::matchUnaryPredicate( const MachineRegisterInfo &MRI, Register Reg, std::function Match, bool AllowUndefs) { diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp index bbd9006a5d8c..f5833d3b9086 100644 --- a/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/llvm/lib/CodeGen/GlobalMerge.cpp @@ -592,6 +592,13 @@ void GlobalMerge::setMustKeepGlobalVariables(Module &M) { if (const GlobalVariable *GV = dyn_cast(U->stripPointerCasts())) MustKeepGlobalVariables.insert(GV); + else if (const ConstantArray *CA = dyn_cast(U->stripPointerCasts())) { + for (const Use &Elt : CA->operands()) { + if (const GlobalVariable *GV = + dyn_cast(Elt->stripPointerCasts())) + MustKeepGlobalVariables.insert(GV); + } + } } } } @@ -609,6 +616,13 @@ bool GlobalMerge::doInitialization(Module &M) { bool Changed = false; setMustKeepGlobalVariables(M); + LLVM_DEBUG({ + dbgs() << "Number of GV that must be kept: " << + MustKeepGlobalVariables.size() << "\n"; + for (auto KeptGV = MustKeepGlobalVariables.begin(); + KeptGV != MustKeepGlobalVariables.end(); KeptGV++) + dbgs() << "Kept: " << **KeptGV << "\n"; + }); // Grab all non-const globals. for (auto &GV : M.globals()) { // Merge is safe for "normal" internal or external globals only diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp index 83b8c2d0eacb..67d6a3df7807 100644 --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -23,10 +23,8 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -37,7 +35,6 @@ #include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index 1b20d1da20ad..105ab908d3fa 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -28,16 +29,13 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp index 2d38a44d5a33..5be98e114673 100644 --- a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp +++ b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp @@ -32,17 +32,13 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index c975013db8c8..06c660807c5c 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -23,7 +23,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalCalc.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStacks.h" @@ -686,7 +685,7 @@ void InlineSpiller::reMaterializeAll() { // Remove any values that were completely rematted. for (Register Reg : RegsToSpill) { LiveInterval &LI = LIS.getInterval(Reg); - for (VNInfo *VNI : llvm::make_range(LI.vni_begin(), LI.vni_end())) { + for (VNInfo *VNI : LI.vnis()) { if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI)) continue; MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); @@ -839,6 +838,13 @@ foldMemoryOperand(ArrayRef> Ops, unsigned Idx = OpPair.second; assert(MI == OpPair.first && "Instruction conflict during operand folding"); MachineOperand &MO = MI->getOperand(Idx); + + // No point restoring an undef read, and we'll produce an invalid live + // interval. + // TODO: Is this really the correct way to handle undef tied uses? + if (MO.isUse() && !MO.readsReg() && !MO.isTied()) + continue; + if (MO.isImplicit()) { ImpReg = MO.getReg(); continue; @@ -964,7 +970,7 @@ foldMemoryOperand(ArrayRef> Ops, if (!MO.isReg() || !MO.isImplicit()) break; if (MO.getReg() == ImpReg) - FoldMI->RemoveOperand(i - 1); + FoldMI->removeOperand(i - 1); } LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, @@ -1608,7 +1614,7 @@ void HoistSpillHelper::hoistAllSpills() { for (unsigned i = RMEnt->getNumOperands(); i; --i) { MachineOperand &MO = RMEnt->getOperand(i - 1); if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead()) - RMEnt->RemoveOperand(i - 1); + RMEnt->removeOperand(i - 1); } } Edit.eliminateDeadDefs(SpillsToRm, None, AA); diff --git a/llvm/lib/CodeGen/InterferenceCache.h b/llvm/lib/CodeGen/InterferenceCache.h index ace1691c1363..97464da9f17b 100644 --- a/llvm/lib/CodeGen/InterferenceCache.h +++ b/llvm/lib/CodeGen/InterferenceCache.h @@ -37,7 +37,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { SlotIndex First; SlotIndex Last; - BlockInterference() {} + BlockInterference() = default; }; /// Entry - A cache entry containing interference information for all aliases diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 5a20580e5479..b3f38a3b53f3 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -46,6 +46,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -57,7 +58,6 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 230c6846dde2..43858071025a 100644 --- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -19,7 +19,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -31,9 +30,8 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -173,10 +171,10 @@ class Polynomial { }; /// Number of Error Bits e - unsigned ErrorMSBs; + unsigned ErrorMSBs = (unsigned)-1; /// Value - Value *V; + Value *V = nullptr; /// Coefficient B SmallVector, 4> B; @@ -185,7 +183,7 @@ class Polynomial { APInt A; public: - Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V) { + Polynomial(Value *V) : V(V) { IntegerType *Ty = dyn_cast(V->getType()); if (Ty) { ErrorMSBs = 0; @@ -195,12 +193,12 @@ public: } Polynomial(const APInt &A, unsigned ErrorMSBs = 0) - : ErrorMSBs(ErrorMSBs), V(nullptr), A(A) {} + : ErrorMSBs(ErrorMSBs), A(A) {} Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0) - : ErrorMSBs(ErrorMSBs), V(nullptr), A(BitWidth, A) {} + : ErrorMSBs(ErrorMSBs), A(BitWidth, A) {} - Polynomial() : ErrorMSBs((unsigned)-1), V(nullptr) {} + Polynomial() = default; /// Increment and clamp the number of undefined bits. void incErrorMSBs(unsigned amt) { @@ -1206,9 +1204,7 @@ bool InterleavedLoadCombineImpl::combine(std::list &InterleavedLoad, ->getNumElements(); FixedVectorType *ILTy = FixedVectorType::get(ETy, Factor * ElementsPerSVI); - SmallVector Indices; - for (unsigned i = 0; i < Factor; i++) - Indices.push_back(i); + auto Indices = llvm::to_vector<4>(llvm::seq(0, Factor)); InterleavedCost = TTI.getInterleavedMemoryOpCost( Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(), InsertionPoint->getPointerAddressSpace(), CostKind); @@ -1228,7 +1224,7 @@ bool InterleavedLoadCombineImpl::combine(std::list &InterleavedLoad, auto MSSAU = MemorySSAUpdater(&MSSA); MemoryUse *MSSALoad = cast(MSSAU.createMemoryAccessBefore( LI, nullptr, MSSA.getMemoryAccess(InsertionPoint))); - MSSAU.insertUse(MSSALoad); + MSSAU.insertUse(MSSALoad, /*RenameUses=*/ true); // Create the final SVIs and replace all uses. int i = 0; diff --git a/llvm/lib/CodeGen/JMCInstrumenter.cpp b/llvm/lib/CodeGen/JMCInstrumenter.cpp new file mode 100644 index 000000000000..23220872b532 --- /dev/null +++ b/llvm/lib/CodeGen/JMCInstrumenter.cpp @@ -0,0 +1,233 @@ +//===- JMCInstrumenter.cpp - JMC Instrumentation --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// JMCInstrumenter pass: +// - instrument each function with a call to __CheckForDebuggerJustMyCode. The +// sole argument should be defined in .msvcjmc. Each flag is 1 byte initilized +// to 1. +// - create the dummy COMDAT function __JustMyCode_Default to prevent linking +// error if __CheckForDebuggerJustMyCode is not available. +// - For MSVC: +// add "/alternatename:__CheckForDebuggerJustMyCode=__JustMyCode_Default" to +// "llvm.linker.options" +// For ELF: +// Rename __JustMyCode_Default to __CheckForDebuggerJustMyCode and mark it as +// weak symbol. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/DJB.h" +#include "llvm/Support/Path.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "jmc-instrument" + +namespace { +struct JMCInstrumenter : public ModulePass { + static char ID; + JMCInstrumenter() : ModulePass(ID) { + initializeJMCInstrumenterPass(*PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) override; +}; +char JMCInstrumenter::ID = 0; +} // namespace + +INITIALIZE_PASS( + JMCInstrumenter, DEBUG_TYPE, + "Instrument function entry with call to __CheckForDebuggerJustMyCode", + false, false) + +ModulePass *llvm::createJMCInstrumenterPass() { return new JMCInstrumenter(); } + +namespace { +const char CheckFunctionName[] = "__CheckForDebuggerJustMyCode"; + +std::string getFlagName(DISubprogram &SP, bool UseX86FastCall) { + // absolute windows path: windows_backslash + // relative windows backslash path: windows_backslash + // relative windows slash path: posix + // absolute posix path: posix + // relative posix path: posix + sys::path::Style PathStyle = + has_root_name(SP.getDirectory(), sys::path::Style::windows_backslash) || + SP.getDirectory().contains("\\") || + SP.getFilename().contains("\\") + ? sys::path::Style::windows_backslash + : sys::path::Style::posix; + // Best effort path normalization. This is to guarantee an unique flag symbol + // is produced for the same directory. Some builds may want to use relative + // paths, or paths with a specific prefix (see the -fdebug-compilation-dir + // flag), so only hash paths in debuginfo. Don't expand them to absolute + // paths. + SmallString<256> FilePath(SP.getDirectory()); + sys::path::append(FilePath, PathStyle, SP.getFilename()); + sys::path::native(FilePath, PathStyle); + sys::path::remove_dots(FilePath, /*remove_dot_dot=*/true, PathStyle); + + // The naming convention for the flag name is ___ with '.' in + // replaced with '@'. For example C:\file.any.c would have a flag + // __D032E919_file@any@c. The naming convention match MSVC's format however + // the match is not required to make JMC work. The hashing function used here + // is different from MSVC's. + + std::string Suffix; + for (auto C : sys::path::filename(FilePath, PathStyle)) + Suffix.push_back(C == '.' ? '@' : C); + + sys::path::remove_filename(FilePath, PathStyle); + return (UseX86FastCall ? "_" : "__") + + utohexstr(djbHash(FilePath), /*LowerCase=*/false, + /*Width=*/8) + + "_" + Suffix; +} + +void attachDebugInfo(GlobalVariable &GV, DISubprogram &SP) { + Module &M = *GV.getParent(); + DICompileUnit *CU = SP.getUnit(); + assert(CU); + DIBuilder DB(M, false, CU); + + auto *DType = + DB.createBasicType("unsigned char", 8, dwarf::DW_ATE_unsigned_char, + llvm::DINode::FlagArtificial); + + auto *DGVE = DB.createGlobalVariableExpression( + CU, GV.getName(), /*LinkageName=*/StringRef(), SP.getFile(), + /*LineNo=*/0, DType, /*IsLocalToUnit=*/true, /*IsDefined=*/true); + GV.addMetadata(LLVMContext::MD_dbg, *DGVE); + DB.finalize(); +} + +FunctionType *getCheckFunctionType(LLVMContext &Ctx) { + Type *VoidTy = Type::getVoidTy(Ctx); + PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); + return FunctionType::get(VoidTy, VoidPtrTy, false); +} + +Function *createDefaultCheckFunction(Module &M, bool UseX86FastCall) { + LLVMContext &Ctx = M.getContext(); + const char *DefaultCheckFunctionName = + UseX86FastCall ? "_JustMyCode_Default" : "__JustMyCode_Default"; + // Create the function. + Function *DefaultCheckFunc = + Function::Create(getCheckFunctionType(Ctx), GlobalValue::ExternalLinkage, + DefaultCheckFunctionName, &M); + DefaultCheckFunc->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + DefaultCheckFunc->addParamAttr(0, Attribute::NoUndef); + if (UseX86FastCall) + DefaultCheckFunc->addParamAttr(0, Attribute::InReg); + + BasicBlock *EntryBB = BasicBlock::Create(Ctx, "", DefaultCheckFunc); + ReturnInst::Create(Ctx, EntryBB); + return DefaultCheckFunc; +} +} // namespace + +bool JMCInstrumenter::runOnModule(Module &M) { + bool Changed = false; + LLVMContext &Ctx = M.getContext(); + Triple ModuleTriple(M.getTargetTriple()); + bool IsMSVC = ModuleTriple.isKnownWindowsMSVCEnvironment(); + bool IsELF = ModuleTriple.isOSBinFormatELF(); + assert((IsELF || IsMSVC) && "Unsupported triple for JMC"); + bool UseX86FastCall = IsMSVC && ModuleTriple.getArch() == Triple::x86; + const char *const FlagSymbolSection = IsELF ? ".just.my.code" : ".msvcjmc"; + + GlobalValue *CheckFunction = nullptr; + DenseMap SavedFlags(8); + for (auto &F : M) { + if (F.isDeclaration()) + continue; + auto *SP = F.getSubprogram(); + if (!SP) + continue; + + Constant *&Flag = SavedFlags[SP]; + if (!Flag) { + std::string FlagName = getFlagName(*SP, UseX86FastCall); + IntegerType *FlagTy = Type::getInt8Ty(Ctx); + Flag = M.getOrInsertGlobal(FlagName, FlagTy, [&] { + // FIXME: Put the GV in comdat and have linkonce_odr linkage to save + // .msvcjmc section space? maybe not worth it. + GlobalVariable *GV = new GlobalVariable( + M, FlagTy, /*isConstant=*/false, GlobalValue::InternalLinkage, + ConstantInt::get(FlagTy, 1), FlagName); + GV->setSection(FlagSymbolSection); + GV->setAlignment(Align(1)); + GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + attachDebugInfo(*GV, *SP); + return GV; + }); + } + + if (!CheckFunction) { + Function *DefaultCheckFunc = + createDefaultCheckFunction(M, UseX86FastCall); + if (IsELF) { + DefaultCheckFunc->setName(CheckFunctionName); + DefaultCheckFunc->setLinkage(GlobalValue::WeakAnyLinkage); + CheckFunction = DefaultCheckFunc; + } else { + assert(!M.getFunction(CheckFunctionName) && + "JMC instrument more than once?"); + auto *CheckFunc = cast( + M.getOrInsertFunction(CheckFunctionName, getCheckFunctionType(Ctx)) + .getCallee()); + CheckFunc->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + CheckFunc->addParamAttr(0, Attribute::NoUndef); + if (UseX86FastCall) { + CheckFunc->setCallingConv(CallingConv::X86_FastCall); + CheckFunc->addParamAttr(0, Attribute::InReg); + } + CheckFunction = CheckFunc; + + StringRef DefaultCheckFunctionName = DefaultCheckFunc->getName(); + appendToUsed(M, {DefaultCheckFunc}); + Comdat *C = M.getOrInsertComdat(DefaultCheckFunctionName); + C->setSelectionKind(Comdat::Any); + DefaultCheckFunc->setComdat(C); + // Add a linker option /alternatename to set the default implementation + // for the check function. + // https://devblogs.microsoft.com/oldnewthing/20200731-00/?p=104024 + std::string AltOption = std::string("/alternatename:") + + CheckFunctionName + "=" + + DefaultCheckFunctionName.str(); + llvm::Metadata *Ops[] = {llvm::MDString::get(Ctx, AltOption)}; + MDTuple *N = MDNode::get(Ctx, Ops); + M.getOrInsertNamedMetadata("llvm.linker.options")->addOperand(N); + } + } + // FIXME: it would be nice to make CI scheduling boundary, although in + // practice it does not matter much. + auto *CI = CallInst::Create(getCheckFunctionType(Ctx), CheckFunction, + {Flag}, "", &*F.begin()->getFirstInsertionPt()); + CI->addParamAttr(0, Attribute::NoUndef); + if (UseX86FastCall) { + CI->setCallingConv(CallingConv::X86_FastCall); + CI->addParamAttr(0, Attribute::InReg); + } + + Changed = true; + } + return Changed; +} diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 0d3685d4141c..3192dcadb5f5 100644 --- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -23,20 +23,19 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; -static cl::opt EnableTrapUnreachable("trap-unreachable", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Enable generating trap for unreachable")); +static cl::opt + EnableTrapUnreachable("trap-unreachable", cl::Hidden, + cl::desc("Enable generating trap for unreachable")); void LLVMTargetMachine::initAsmInfo() { MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str())); @@ -99,7 +98,7 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, } TargetTransformInfo -LLVMTargetMachine::getTargetTransformInfo(const Function &F) { +LLVMTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(BasicTTIImpl(this, F)); } @@ -164,22 +163,35 @@ Expected> LLVMTargetMachine::createMCStreamer( // Create a code emitter if asked to show the encoding. std::unique_ptr MCE; if (Options.MCOptions.ShowMCEncoding) - MCE.reset(getTarget().createMCCodeEmitter(MII, MRI, Context)); + MCE.reset(getTarget().createMCCodeEmitter(MII, Context)); + + bool UseDwarfDirectory = false; + switch (Options.MCOptions.MCUseDwarfDirectory) { + case MCTargetOptions::DisableDwarfDirectory: + UseDwarfDirectory = false; + break; + case MCTargetOptions::EnableDwarfDirectory: + UseDwarfDirectory = true; + break; + case MCTargetOptions::DefaultDwarfDirectory: + UseDwarfDirectory = MAI.enableDwarfFileDirectoryDefault(); + break; + } std::unique_ptr MAB( getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions)); auto FOut = std::make_unique(Out); MCStreamer *S = getTarget().createAsmStreamer( Context, std::move(FOut), Options.MCOptions.AsmVerbose, - Options.MCOptions.MCUseDwarfDirectory, InstPrinter, std::move(MCE), - std::move(MAB), Options.MCOptions.ShowMCInst); + UseDwarfDirectory, InstPrinter, std::move(MCE), std::move(MAB), + Options.MCOptions.ShowMCInst); AsmStreamer.reset(S); break; } case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, Context); if (!MCE) return make_error("createMCCodeEmitter failed", inconvertibleErrorCode()); @@ -252,6 +264,9 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, "Cannot emit MC with limited codegen pipeline"); Ctx = &MMIWP->getMMI().getContext(); + // libunwind is unable to load compact unwind dynamically, so we must generate + // DWARF unwind info for the JIT. + Options.MCOptions.EmitDwarfUnwind = EmitDwarfUnwindType::Always; if (Options.MCOptions.MCSaveTempLabels) Ctx->setAllowTemporaryLabels(false); @@ -259,8 +274,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, // emission fails. const MCSubtargetInfo &STI = *getMCSubtargetInfo(); const MCRegisterInfo &MRI = *getMCRegisterInfo(); - MCCodeEmitter *MCE = - getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getMCInstrInfo(), *Ctx); MCAsmBackend *MAB = getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions); if (!MCE || !MAB) diff --git a/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp index 63a0d0c1c43e..39b44b917d9e 100644 --- a/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp +++ b/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp @@ -14,6 +14,7 @@ ///===---------------------------------------------------------------------===// #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/InitializePasses.h" using namespace llvm; @@ -87,7 +88,7 @@ LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const { OwnedMBFI = std::make_unique(); OwnedMBFI->calculate(*MF, MBPI, *MLI); - return *OwnedMBFI.get(); + return *OwnedMBFI; } bool LazyMachineBlockFrequencyInfoPass::runOnMachineFunction( diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 0eb6100230bd..30ca8bd871e8 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -84,21 +84,18 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -106,27 +103,23 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/GenericIteratedDominanceFrontier.h" #include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" #include #include +#include #include #include -#include -#include #include #include #include @@ -148,6 +141,20 @@ static cl::opt EmulateOldLDV("emulate-old-livedebugvalues", cl::Hidden, cl::desc("Act like old LiveDebugValues did"), cl::init(false)); +// Limit for the maximum number of stack slots we should track, past which we +// will ignore any spills. InstrRefBasedLDV gathers detailed information on all +// stack slots which leads to high memory consumption, and in some scenarios +// (such as asan with very many locals) the working set of the function can be +// very large, causing many spills. In these scenarios, it is very unlikely that +// the developer has hundreds of variables live at the same time that they're +// carefully thinking about -- instead, they probably autogenerated the code. +// When this happens, gracefully stop tracking excess spill slots, rather than +// consuming all the developer's memory. +static cl::opt + StackWorkingSetLimit("livedebugvalues-max-stack-slots", cl::Hidden, + cl::desc("livedebugvalues-stack-ws-limit"), + cl::init(250)); + /// Tracker for converting machine value locations and variable values into /// variable locations (the output of LiveDebugValues), recorded as DBG_VALUEs /// specifying block live-in locations and transfers within blocks. @@ -252,7 +259,7 @@ public: /// object fields to track variable locations as we step through the block. /// FIXME: could just examine mloctracker instead of passing in \p mlocs? void - loadInlocs(MachineBasicBlock &MBB, ValueIDNum *MLocs, + loadInlocs(MachineBasicBlock &MBB, ValueTable &MLocs, const SmallVectorImpl> &VLocs, unsigned NumLocs) { ActiveMLocs.clear(); @@ -715,6 +722,20 @@ MLocTracker::MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII, StackSlotIdxes.insert({{Size, Offs}, Idx}); } + // There may also be strange register class sizes (think x86 fp80s). + for (const TargetRegisterClass *RC : TRI.regclasses()) { + unsigned Size = TRI.getRegSizeInBits(*RC); + + // We might see special reserved values as sizes, and classes for other + // stuff the machine tries to model. If it's more than 512 bits, then it + // is very unlikely to be a register than can be spilt. + if (Size > 512) + continue; + + unsigned Idx = StackSlotIdxes.size(); + StackSlotIdxes.insert({{Size, 0}, Idx}); + } + for (auto &Idx : StackSlotIdxes) StackIdxesToPos[Idx.second] = Idx.first; @@ -757,9 +778,15 @@ void MLocTracker::writeRegMask(const MachineOperand *MO, unsigned CurBB, Masks.push_back(std::make_pair(MO, InstID)); } -SpillLocationNo MLocTracker::getOrTrackSpillLoc(SpillLoc L) { +Optional MLocTracker::getOrTrackSpillLoc(SpillLoc L) { SpillLocationNo SpillID(SpillLocs.idFor(L)); + if (SpillID.id() == 0) { + // If there is no location, and we have reached the limit of how many stack + // slots to track, then don't track this one. + if (SpillLocs.size() >= StackWorkingSetLimit) + return None; + // Spill location is untracked: create record for this one, and all // subregister slots too. SpillID = SpillLocationNo(SpillLocs.insert(L)); @@ -843,19 +870,72 @@ MachineInstrBuilder MLocTracker::emitLoc(Optional MLoc, // the variable is. if (Offset == 0) { const SpillLoc &Spill = SpillLocs[SpillID.id()]; - Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset, - Spill.SpillOffset); unsigned Base = Spill.SpillBase; MIB.addReg(Base); - MIB.addImm(0); - // Being on the stack makes this location indirect; if it was _already_ - // indirect though, we need to add extra indirection. See this test for - // a scenario where this happens: - // llvm/test/DebugInfo/X86/spill-nontrivial-param.ll + // There are several ways we can dereference things, and several inputs + // to consider: + // * NRVO variables will appear with IsIndirect set, but should have + // nothing else in their DIExpressions, + // * Variables with DW_OP_stack_value in their expr already need an + // explicit dereference of the stack location, + // * Values that don't match the variable size need DW_OP_deref_size, + // * Everything else can just become a simple location expression. + + // We need to use deref_size whenever there's a mismatch between the + // size of value and the size of variable portion being read. + // Additionally, we should use it whenever dealing with stack_value + // fragments, to avoid the consumer having to determine the deref size + // from DW_OP_piece. + bool UseDerefSize = false; + unsigned ValueSizeInBits = getLocSizeInBits(*MLoc); + unsigned DerefSizeInBytes = ValueSizeInBits / 8; + if (auto Fragment = Var.getFragment()) { + unsigned VariableSizeInBits = Fragment->SizeInBits; + if (VariableSizeInBits != ValueSizeInBits || Expr->isComplex()) + UseDerefSize = true; + } else if (auto Size = Var.getVariable()->getSizeInBits()) { + if (*Size != ValueSizeInBits) { + UseDerefSize = true; + } + } + if (Properties.Indirect) { - std::vector Elts = {dwarf::DW_OP_deref}; - Expr = DIExpression::append(Expr, Elts); + // This is something like an NRVO variable, where the pointer has been + // spilt to the stack, or a dbg.addr pointing at a coroutine frame + // field. It should end up being a memory location, with the pointer + // to the variable loaded off the stack with a deref. It can't be a + // DW_OP_stack_value expression. + assert(!Expr->isImplicit()); + Expr = TRI.prependOffsetExpression( + Expr, DIExpression::ApplyOffset | DIExpression::DerefAfter, + Spill.SpillOffset); + MIB.addImm(0); + } else if (UseDerefSize) { + // We're loading a value off the stack that's not the same size as the + // variable. Add / subtract stack offset, explicitly deref with a size, + // and add DW_OP_stack_value if not already present. + SmallVector Ops = {dwarf::DW_OP_deref_size, + DerefSizeInBytes}; + Expr = DIExpression::prependOpcodes(Expr, Ops, true); + unsigned Flags = DIExpression::StackValue | DIExpression::ApplyOffset; + Expr = TRI.prependOffsetExpression(Expr, Flags, Spill.SpillOffset); + MIB.addReg(0); + } else if (Expr->isComplex()) { + // A variable with no size ambiguity, but with extra elements in it's + // expression. Manually dereference the stack location. + assert(Expr->isComplex()); + Expr = TRI.prependOffsetExpression( + Expr, DIExpression::ApplyOffset | DIExpression::DerefAfter, + Spill.SpillOffset); + MIB.addReg(0); + } else { + // A plain value that has been spilt to the stack, with no further + // context. Request a location expression, marking the DBG_VALUE as + // IsIndirect. + Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset, + Spill.SpillOffset); + MIB.addImm(0); } } else { // This is a stack location with a weird subregister offset: emit an undef @@ -879,7 +959,7 @@ MachineInstrBuilder MLocTracker::emitLoc(Optional MLoc, } /// Default construct and initialize the pass. -InstrRefBasedLDV::InstrRefBasedLDV() {} +InstrRefBasedLDV::InstrRefBasedLDV() = default; bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const { unsigned Reg = MTracker->LocIdxToLocID[L]; @@ -898,7 +978,7 @@ bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const { // void InstrRefBasedLDV::printVarLocInMBB(..) #endif -SpillLocationNo +Optional InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) { assert(MI.hasOneMemOperand() && "Spill instruction does not have exactly one memory operand?"); @@ -913,8 +993,11 @@ InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) { return MTracker->getOrTrackSpillLoc({Reg, Offset}); } -Optional InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) { - SpillLocationNo SpillLoc = extractSpillBaseRegAndOffset(MI); +Optional +InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) { + Optional SpillLoc = extractSpillBaseRegAndOffset(MI); + if (!SpillLoc) + return None; // Where in the stack slot is this value defined -- i.e., what size of value // is this? An important question, because it could be loaded into a register @@ -930,7 +1013,7 @@ Optional InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr // occur, but the safe action is to indicate the variable is optimised out. return None; - unsigned SpillID = MTracker->getSpillIDWithIdx(SpillLoc, IdxIt->second); + unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillLoc, IdxIt->second); return MTracker->getSpillMLoc(SpillID); } @@ -999,14 +1082,14 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) { } bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, - ValueIDNum **MLiveOuts, - ValueIDNum **MLiveIns) { + const ValueTable *MLiveOuts, + const ValueTable *MLiveIns) { if (!MI.isDebugRef()) return false; // Only handle this instruction when we are building the variable value // transfer function. - if (!VTracker) + if (!VTracker && !TTracker) return false; unsigned InstNo = MI.getOperand(0).getImm(); @@ -1068,15 +1151,25 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, if (L) NewID = ValueIDNum(BlockNo, InstrIt->second.second, *L); } else if (OpNo != MachineFunction::DebugOperandMemNumber) { - assert(OpNo < TargetInstr.getNumOperands()); - const MachineOperand &MO = TargetInstr.getOperand(OpNo); - - // Today, this can only be a register. - assert(MO.isReg() && MO.isDef()); + // Permit the debug-info to be completely wrong: identifying a nonexistant + // operand, or one that is not a register definition, means something + // unexpected happened during optimisation. Broken debug-info, however, + // shouldn't crash the compiler -- instead leave the variable value as + // None, which will make it appear "optimised out". + if (OpNo < TargetInstr.getNumOperands()) { + const MachineOperand &MO = TargetInstr.getOperand(OpNo); + + if (MO.isReg() && MO.isDef() && MO.getReg()) { + unsigned LocID = MTracker->getLocID(MO.getReg()); + LocIdx L = MTracker->LocIDToLocIdx[LocID]; + NewID = ValueIDNum(BlockNo, InstrIt->second.second, L); + } + } - unsigned LocID = MTracker->getLocID(MO.getReg()); - LocIdx L = MTracker->LocIDToLocIdx[LocID]; - NewID = ValueIDNum(BlockNo, InstrIt->second.second, L); + if (!NewID) { + LLVM_DEBUG( + { dbgs() << "Seen instruction reference to illegal operand\n"; }); + } } // else: NewID is left as None. } else if (PHIIt != DebugPHINumToValue.end() && PHIIt->InstrNum == InstNo) { @@ -1162,7 +1255,8 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, // for DBG_INSTR_REFs as DBG_VALUEs (just, the former can refer to values that // aren't immediately available). DbgValueProperties Properties(Expr, false); - VTracker->defVar(MI, Properties, NewID); + if (VTracker) + VTracker->defVar(MI, Properties, NewID); // If we're on the final pass through the function, decompose this INSTR_REF // into a plain DBG_VALUE. @@ -1225,7 +1319,16 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(0); unsigned InstrNum = MI.getOperand(1).getImm(); - if (MO.isReg()) { + auto EmitBadPHI = [this, &MI, InstrNum](void) -> bool { + // Helper lambda to do any accounting when we fail to find a location for + // a DBG_PHI. This can happen if DBG_PHIs are malformed, or refer to a + // dead stack slot, for example. + // Record a DebugPHIRecord with an empty value + location. + DebugPHINumToValue.push_back({InstrNum, MI.getParent(), None, None}); + return true; + }; + + if (MO.isReg() && MO.getReg()) { // The value is whatever's currently in the register. Read and record it, // to be analysed later. Register Reg = MO.getReg(); @@ -1237,57 +1340,45 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { // Ensure this register is tracked. for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) MTracker->lookupOrTrackRegister(*RAI); - } else { + } else if (MO.isFI()) { // The value is whatever's in this stack slot. - assert(MO.isFI()); unsigned FI = MO.getIndex(); // If the stack slot is dead, then this was optimized away. // FIXME: stack slot colouring should account for slots that get merged. if (MFI->isDeadObjectIndex(FI)) - return true; + return EmitBadPHI(); // Identify this spill slot, ensure it's tracked. Register Base; StackOffset Offs = TFI->getFrameIndexReference(*MI.getMF(), FI, Base); SpillLoc SL = {Base, Offs}; - SpillLocationNo SpillNo = MTracker->getOrTrackSpillLoc(SL); - - // Problem: what value should we extract from the stack? LLVM does not - // record what size the last store to the slot was, and it would become - // sketchy after stack slot colouring anyway. Take a look at what values - // are stored on the stack, and pick the largest one that wasn't def'd - // by a spill (i.e., the value most likely to have been def'd in a register - // and then spilt. - std::array CandidateSizes = {64, 32, 16, 8}; - Optional Result = None; - Optional SpillLoc = None; - for (unsigned CS : CandidateSizes) { - unsigned SpillID = MTracker->getLocID(SpillNo, {CS, 0}); - SpillLoc = MTracker->getSpillMLoc(SpillID); - ValueIDNum Val = MTracker->readMLoc(*SpillLoc); - // If this value was defined in it's own position, then it was probably - // an aliasing index of a small value that was spilt. - if (Val.getLoc() != SpillLoc->asU64()) { - Result = Val; - break; - } - } + Optional SpillNo = MTracker->getOrTrackSpillLoc(SL); - // If we didn't find anything, we're probably looking at a PHI, or a memory - // store folded into an instruction. FIXME: Take a guess that's it's 64 - // bits. This isn't ideal, but tracking the size that the spill is - // "supposed" to be is more complex, and benefits a small number of - // locations. - if (!Result) { - unsigned SpillID = MTracker->getLocID(SpillNo, {64, 0}); - SpillLoc = MTracker->getSpillMLoc(SpillID); - Result = MTracker->readMLoc(*SpillLoc); - } + // We might be able to find a value, but have chosen not to, to avoid + // tracking too much stack information. + if (!SpillNo) + return EmitBadPHI(); + + // Any stack location DBG_PHI should have an associate bit-size. + assert(MI.getNumOperands() == 3 && "Stack DBG_PHI with no size?"); + unsigned slotBitSize = MI.getOperand(2).getImm(); + + unsigned SpillID = MTracker->getLocID(*SpillNo, {slotBitSize, 0}); + LocIdx SpillLoc = MTracker->getSpillMLoc(SpillID); + ValueIDNum Result = MTracker->readMLoc(SpillLoc); // Record this DBG_PHI for later analysis. - auto DbgPHI = DebugPHIRecord({InstrNum, MI.getParent(), *Result, *SpillLoc}); + auto DbgPHI = DebugPHIRecord({InstrNum, MI.getParent(), Result, SpillLoc}); DebugPHINumToValue.push_back(DbgPHI); + } else { + // Else: if the operand is neither a legal register or a stack slot, then + // we're being fed illegal debug-info. Record an empty PHI, so that any + // debug users trying to read this number will be put off trying to + // interpret the value. + LLVM_DEBUG( + { dbgs() << "Seen DBG_PHI with unrecognised operand format\n"; }); + return EmitBadPHI(); } return true; @@ -1357,11 +1448,12 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { // If this instruction writes to a spill slot, def that slot. if (hasFoldedStackStore(MI)) { - SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI); - for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) { - unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I); - LocIdx L = MTracker->getSpillMLoc(SpillID); - MTracker->setMLoc(L, ValueIDNum(CurBB, CurInst, L)); + if (Optional SpillNo = extractSpillBaseRegAndOffset(MI)) { + for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) { + unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillNo, I); + LocIdx L = MTracker->getSpillMLoc(SpillID); + MTracker->setMLoc(L, ValueIDNum(CurBB, CurInst, L)); + } } } @@ -1398,11 +1490,12 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { // Tell TTracker about any folded stack store. if (hasFoldedStackStore(MI)) { - SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI); - for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) { - unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I); - LocIdx L = MTracker->getSpillMLoc(SpillID); - TTracker->clobberMloc(L, MI.getIterator(), true); + if (Optional SpillNo = extractSpillBaseRegAndOffset(MI)) { + for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) { + unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillNo, I); + LocIdx L = MTracker->getSpillMLoc(SpillID); + TTracker->clobberMloc(L, MI.getIterator(), true); + } } } } @@ -1438,23 +1531,24 @@ void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) { } } -bool InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI, - MachineFunction *MF) { +Optional +InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI, + MachineFunction *MF) { // TODO: Handle multiple stores folded into one. if (!MI.hasOneMemOperand()) - return false; + return None; // Reject any memory operand that's aliased -- we can't guarantee its value. auto MMOI = MI.memoperands_begin(); const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue(); if (PVal->isAliased(MFI)) - return false; + return None; if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII)) - return false; // This is not a spill instruction, since no valid size was - // returned from either function. + return None; // This is not a spill instruction, since no valid size was + // returned from either function. - return true; + return extractSpillBaseRegAndOffset(MI); } bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI, @@ -1511,13 +1605,11 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { // First, if there are any DBG_VALUEs pointing at a spill slot that is // written to, terminate that variable location. The value in memory // will have changed. DbgEntityHistoryCalculator doesn't try to detect this. - if (isSpillInstruction(MI, MF)) { - SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI); - + if (Optional Loc = isSpillInstruction(MI, MF)) { // Un-set this location and clobber, so that earlier locations don't // continue past this store. for (unsigned SlotIdx = 0; SlotIdx < MTracker->NumSlotIdxes; ++SlotIdx) { - unsigned SpillID = MTracker->getSpillIDWithIdx(Loc, SlotIdx); + unsigned SpillID = MTracker->getSpillIDWithIdx(*Loc, SlotIdx); Optional MLoc = MTracker->getSpillMLoc(SpillID); if (!MLoc) continue; @@ -1535,7 +1627,9 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { // Try to recognise spill and restore instructions that may transfer a value. if (isLocationSpill(MI, MF, Reg)) { - SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI); + // isLocationSpill returning true should guarantee we can extract a + // location. + SpillLocationNo Loc = *extractSpillBaseRegAndOffset(MI); auto DoTransfer = [&](Register SrcReg, unsigned SpillID) { auto ReadValue = MTracker->readReg(SrcReg); @@ -1562,10 +1656,9 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { unsigned SpillID = MTracker->getLocID(Loc, {Size, 0}); DoTransfer(Reg, SpillID); } else { - Optional OptLoc = isRestoreInstruction(MI, MF, Reg); - if (!OptLoc) + Optional Loc = isRestoreInstruction(MI, MF, Reg); + if (!Loc) return false; - SpillLocationNo Loc = *OptLoc; // Assumption: we're reading from the base of the stack slot, not some // offset into it. It seems very unlikely LLVM would ever generate @@ -1583,22 +1676,17 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { LocIdx SrcIdx = MTracker->getSpillMLoc(SpillID); auto ReadValue = MTracker->readMLoc(SrcIdx); MTracker->setReg(DestReg, ReadValue); - - if (TTracker) { - LocIdx DstLoc = MTracker->getRegMLoc(DestReg); - TTracker->transferMlocs(SrcIdx, DstLoc, MI.getIterator()); - } }; for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) { unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI); - unsigned SpillID = MTracker->getLocID(Loc, Subreg); + unsigned SpillID = MTracker->getLocID(*Loc, Subreg); DoTransfer(*SRI, SpillID); } // Directly look up this registers slot idx by size, and transfer. unsigned Size = TRI->getRegSizeInBits(Reg, *MRI); - unsigned SpillID = MTracker->getLocID(Loc, {Size, 0}); + unsigned SpillID = MTracker->getLocID(*Loc, {Size, 0}); DoTransfer(Reg, SpillID); } return true; @@ -1724,8 +1812,8 @@ void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) { AllSeenFragments.insert(ThisFragment); } -void InstrRefBasedLDV::process(MachineInstr &MI, ValueIDNum **MLiveOuts, - ValueIDNum **MLiveIns) { +void InstrRefBasedLDV::process(MachineInstr &MI, const ValueTable *MLiveOuts, + const ValueTable *MLiveIns) { // Try to interpret an MI as a debug or transfer instruction. Only if it's // none of these should we interpret it's register defs as new value // definitions. @@ -1775,7 +1863,10 @@ void InstrRefBasedLDV::produceMLocTransferFunction( // Step through each instruction in this block. for (auto &MI : MBB) { - process(MI); + // Pass in an empty unique_ptr for the value tables when accumulating the + // machine transfer function. + process(MI, nullptr, nullptr); + // Also accumulate fragment map. if (MI.isDebugValue() || MI.isDebugRef()) accumulateFragmentMap(MI); @@ -1864,7 +1955,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction( bool InstrRefBasedLDV::mlocJoin( MachineBasicBlock &MBB, SmallPtrSet &Visited, - ValueIDNum **OutLocs, ValueIDNum *InLocs) { + FuncValueTable &OutLocs, ValueTable &InLocs) { LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); bool Changed = false; @@ -1965,7 +2056,7 @@ void InstrRefBasedLDV::findStackIndexInterference( void InstrRefBasedLDV::placeMLocPHIs( MachineFunction &MF, SmallPtrSetImpl &AllBlocks, - ValueIDNum **MInLocs, SmallVectorImpl &MLocTransfer) { + FuncValueTable &MInLocs, SmallVectorImpl &MLocTransfer) { SmallVector StackUnits; findStackIndexInterference(StackUnits); @@ -2094,7 +2185,7 @@ void InstrRefBasedLDV::placeMLocPHIs( } void InstrRefBasedLDV::buildMLocValueMap( - MachineFunction &MF, ValueIDNum **MInLocs, ValueIDNum **MOutLocs, + MachineFunction &MF, FuncValueTable &MInLocs, FuncValueTable &MOutLocs, SmallVectorImpl &MLocTransfer) { std::priority_queue, std::greater> @@ -2236,7 +2327,7 @@ void InstrRefBasedLDV::BlockPHIPlacement( Optional InstrRefBasedLDV::pickVPHILoc( const MachineBasicBlock &MBB, const DebugVariable &Var, - const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs, + const LiveIdxT &LiveOuts, FuncValueTable &MOutLocs, const SmallVectorImpl &BlockOrders) { // Collect a set of locations from predecessor where its live-out value can // be found. @@ -2504,7 +2595,7 @@ void InstrRefBasedLDV::getBlocksForScope( void InstrRefBasedLDV::buildVLocValueMap( const DILocation *DILoc, const SmallSet &VarsWeCareAbout, SmallPtrSetImpl &AssignBlocks, LiveInsT &Output, - ValueIDNum **MOutLocs, ValueIDNum **MInLocs, + FuncValueTable &MOutLocs, FuncValueTable &MInLocs, SmallVectorImpl &AllTheVLocs) { // This method is much like buildMLocValueMap: but focuses on a single // LexicalScope at a time. Pick out a set of blocks and variables that are @@ -2765,6 +2856,11 @@ void InstrRefBasedLDV::placePHIsForSingleVarDefinition( auto ValueIt = VLocs.Vars.find(Var); const DbgValue &Value = ValueIt->second; + // If it's an explicit assignment of "undef", that means there is no location + // anyway, anywhere. + if (Value.Kind == DbgValue::Undef) + return; + // Assign the variable value to entry to each dominated block that's in scope. // Skip the definition block -- it's assigned the variable value in the middle // of the block somewhere. @@ -2790,35 +2886,6 @@ void InstrRefBasedLDV::dump_mloc_transfer( } #endif -void InstrRefBasedLDV::emitLocations( - MachineFunction &MF, LiveInsT SavedLiveIns, ValueIDNum **MOutLocs, - ValueIDNum **MInLocs, DenseMap &AllVarsNumbering, - const TargetPassConfig &TPC) { - TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs, TPC); - unsigned NumLocs = MTracker->getNumLocs(); - - // For each block, load in the machine value locations and variable value - // live-ins, then step through each instruction in the block. New DBG_VALUEs - // to be inserted will be created along the way. - for (MachineBasicBlock &MBB : MF) { - unsigned bbnum = MBB.getNumber(); - MTracker->reset(); - MTracker->loadFromArray(MInLocs[bbnum], bbnum); - TTracker->loadInlocs(MBB, MInLocs[bbnum], SavedLiveIns[MBB.getNumber()], - NumLocs); - - CurBB = bbnum; - CurInst = 1; - for (auto &MI : MBB) { - process(MI, MOutLocs, MInLocs); - TTracker->checkInstForNewValues(CurInst, MI.getIterator()); - ++CurInst; - } - } - - emitTransfers(AllVarsNumbering); -} - void InstrRefBasedLDV::initialSetup(MachineFunction &MF) { // Build some useful data structures. @@ -2861,8 +2928,172 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) { #endif } +// Produce an "ejection map" for blocks, i.e., what's the highest-numbered +// lexical scope it's used in. When exploring in DFS order and we pass that +// scope, the block can be processed and any tracking information freed. +void InstrRefBasedLDV::makeDepthFirstEjectionMap( + SmallVectorImpl &EjectionMap, + const ScopeToDILocT &ScopeToDILocation, + ScopeToAssignBlocksT &ScopeToAssignBlocks) { + SmallPtrSet BlocksToExplore; + SmallVector, 4> WorkStack; + auto *TopScope = LS.getCurrentFunctionScope(); + + // Unlike lexical scope explorers, we explore in reverse order, to find the + // "last" lexical scope used for each block early. + WorkStack.push_back({TopScope, TopScope->getChildren().size() - 1}); + + while (!WorkStack.empty()) { + auto &ScopePosition = WorkStack.back(); + LexicalScope *WS = ScopePosition.first; + ssize_t ChildNum = ScopePosition.second--; + + const SmallVectorImpl &Children = WS->getChildren(); + if (ChildNum >= 0) { + // If ChildNum is positive, there are remaining children to explore. + // Push the child and its children-count onto the stack. + auto &ChildScope = Children[ChildNum]; + WorkStack.push_back( + std::make_pair(ChildScope, ChildScope->getChildren().size() - 1)); + } else { + WorkStack.pop_back(); + + // We've explored all children and any later blocks: examine all blocks + // in our scope. If they haven't yet had an ejection number set, then + // this scope will be the last to use that block. + auto DILocationIt = ScopeToDILocation.find(WS); + if (DILocationIt != ScopeToDILocation.end()) { + getBlocksForScope(DILocationIt->second, BlocksToExplore, + ScopeToAssignBlocks.find(WS)->second); + for (auto *MBB : BlocksToExplore) { + unsigned BBNum = MBB->getNumber(); + if (EjectionMap[BBNum] == 0) + EjectionMap[BBNum] = WS->getDFSOut(); + } + + BlocksToExplore.clear(); + } + } + } +} + +bool InstrRefBasedLDV::depthFirstVLocAndEmit( + unsigned MaxNumBlocks, const ScopeToDILocT &ScopeToDILocation, + const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToAssignBlocks, + LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs, + SmallVectorImpl &AllTheVLocs, MachineFunction &MF, + DenseMap &AllVarsNumbering, + const TargetPassConfig &TPC) { + TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs, TPC); + unsigned NumLocs = MTracker->getNumLocs(); + VTracker = nullptr; + + // No scopes? No variable locations. + if (!LS.getCurrentFunctionScope()) + return false; + + // Build map from block number to the last scope that uses the block. + SmallVector EjectionMap; + EjectionMap.resize(MaxNumBlocks, 0); + makeDepthFirstEjectionMap(EjectionMap, ScopeToDILocation, + ScopeToAssignBlocks); + + // Helper lambda for ejecting a block -- if nothing is going to use the block, + // we can translate the variable location information into DBG_VALUEs and then + // free all of InstrRefBasedLDV's data structures. + auto EjectBlock = [&](MachineBasicBlock &MBB) -> void { + unsigned BBNum = MBB.getNumber(); + AllTheVLocs[BBNum].clear(); + + // Prime the transfer-tracker, and then step through all the block + // instructions, installing transfers. + MTracker->reset(); + MTracker->loadFromArray(MInLocs[BBNum], BBNum); + TTracker->loadInlocs(MBB, MInLocs[BBNum], Output[BBNum], NumLocs); + + CurBB = BBNum; + CurInst = 1; + for (auto &MI : MBB) { + process(MI, MOutLocs.get(), MInLocs.get()); + TTracker->checkInstForNewValues(CurInst, MI.getIterator()); + ++CurInst; + } + + // Free machine-location tables for this block. + MInLocs[BBNum].reset(); + MOutLocs[BBNum].reset(); + // We don't need live-in variable values for this block either. + Output[BBNum].clear(); + AllTheVLocs[BBNum].clear(); + }; + + SmallPtrSet BlocksToExplore; + SmallVector, 4> WorkStack; + WorkStack.push_back({LS.getCurrentFunctionScope(), 0}); + unsigned HighestDFSIn = 0; + + // Proceed to explore in depth first order. + while (!WorkStack.empty()) { + auto &ScopePosition = WorkStack.back(); + LexicalScope *WS = ScopePosition.first; + ssize_t ChildNum = ScopePosition.second++; + + // We obesrve scopes with children twice here, once descending in, once + // ascending out of the scope nest. Use HighestDFSIn as a ratchet to ensure + // we don't process a scope twice. Additionally, ignore scopes that don't + // have a DILocation -- by proxy, this means we never tracked any variable + // assignments in that scope. + auto DILocIt = ScopeToDILocation.find(WS); + if (HighestDFSIn <= WS->getDFSIn() && DILocIt != ScopeToDILocation.end()) { + const DILocation *DILoc = DILocIt->second; + auto &VarsWeCareAbout = ScopeToVars.find(WS)->second; + auto &BlocksInScope = ScopeToAssignBlocks.find(WS)->second; + + buildVLocValueMap(DILoc, VarsWeCareAbout, BlocksInScope, Output, MOutLocs, + MInLocs, AllTheVLocs); + } + + HighestDFSIn = std::max(HighestDFSIn, WS->getDFSIn()); + + // Descend into any scope nests. + const SmallVectorImpl &Children = WS->getChildren(); + if (ChildNum < (ssize_t)Children.size()) { + // There are children to explore -- push onto stack and continue. + auto &ChildScope = Children[ChildNum]; + WorkStack.push_back(std::make_pair(ChildScope, 0)); + } else { + WorkStack.pop_back(); + + // We've explored a leaf, or have explored all the children of a scope. + // Try to eject any blocks where this is the last scope it's relevant to. + auto DILocationIt = ScopeToDILocation.find(WS); + if (DILocationIt == ScopeToDILocation.end()) + continue; + + getBlocksForScope(DILocationIt->second, BlocksToExplore, + ScopeToAssignBlocks.find(WS)->second); + for (auto *MBB : BlocksToExplore) + if (WS->getDFSOut() == EjectionMap[MBB->getNumber()]) + EjectBlock(const_cast(*MBB)); + + BlocksToExplore.clear(); + } + } + + // Some artificial blocks may not have been ejected, meaning they're not + // connected to an actual legitimate scope. This can technically happen + // with things like the entry block. In theory, we shouldn't need to do + // anything for such out-of-scope blocks, but for the sake of being similar + // to VarLocBasedLDV, eject these too. + for (auto *MBB : ArtificialBlocks) + if (MOutLocs[MBB->getNumber()]) + EjectBlock(*MBB); + + return emitTransfers(AllVarsNumbering); +} + bool InstrRefBasedLDV::emitTransfers( - DenseMap &AllVarsNumbering) { + DenseMap &AllVarsNumbering) { // Go through all the transfers recorded in the TransferTracker -- this is // both the live-ins to a block, and any movements of values that happen // in the middle. @@ -2944,24 +3175,24 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, assert(MaxNumBlocks >= 0); ++MaxNumBlocks; + initialSetup(MF); + MLocTransfer.resize(MaxNumBlocks); vlocs.resize(MaxNumBlocks, VLocTracker(OverlapFragments, EmptyExpr)); SavedLiveIns.resize(MaxNumBlocks); - initialSetup(MF); - produceMLocTransferFunction(MF, MLocTransfer, MaxNumBlocks); // Allocate and initialize two array-of-arrays for the live-in and live-out // machine values. The outer dimension is the block number; while the inner // dimension is a LocIdx from MLocTracker. - ValueIDNum **MOutLocs = new ValueIDNum *[MaxNumBlocks]; - ValueIDNum **MInLocs = new ValueIDNum *[MaxNumBlocks]; + FuncValueTable MOutLocs = std::make_unique(MaxNumBlocks); + FuncValueTable MInLocs = std::make_unique(MaxNumBlocks); unsigned NumLocs = MTracker->getNumLocs(); for (int i = 0; i < MaxNumBlocks; ++i) { // These all auto-initialize to ValueIDNum::EmptyValue - MOutLocs[i] = new ValueIDNum[NumLocs]; - MInLocs[i] = new ValueIDNum[NumLocs]; + MOutLocs[i] = std::make_unique(NumLocs); + MInLocs[i] = std::make_unique(NumLocs); } // Solve the machine value dataflow problem using the MLocTransfer function, @@ -2974,7 +3205,10 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, // either live-through machine values, or PHIs. for (auto &DBG_PHI : DebugPHINumToValue) { // Identify unresolved block-live-ins. - ValueIDNum &Num = DBG_PHI.ValueRead; + if (!DBG_PHI.ValueRead) + continue; + + ValueIDNum &Num = *DBG_PHI.ValueRead; if (!Num.isPHI()) continue; @@ -2995,7 +3229,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, MTracker->loadFromArray(MInLocs[CurBB], CurBB); CurInst = 1; for (auto &MI : MBB) { - process(MI, MOutLocs, MInLocs); + process(MI, MOutLocs.get(), MInLocs.get()); ++CurInst; } MTracker->reset(); @@ -3051,32 +3285,13 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, << VarAssignCount << " variable assignments, exceeding limits.\n"); } else { - // Compute the extended ranges, iterating over scopes. There might be - // something to be said for ordering them by size/locality, but that's for - // the future. For each scope, solve the variable value problem, producing - // a map of variables to values in SavedLiveIns. - for (auto &P : ScopeToVars) { - buildVLocValueMap(ScopeToDILocation[P.first], P.second, - ScopeToAssignBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs, - vlocs); - } - - // Using the computed value locations and variable values for each block, - // create the DBG_VALUE instructions representing the extended variable - // locations. - emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC); - - // Did we actually make any changes? If we created any DBG_VALUEs, then yes. - Changed = TTracker->Transfers.size() != 0; - } - - // Common clean-up of memory. - for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) { - delete[] MOutLocs[Idx]; - delete[] MInLocs[Idx]; + // Optionally, solve the variable value problem and emit to blocks by using + // a lexical-scope-depth search. It should be functionally identical to + // the "else" block of this condition. + Changed = depthFirstVLocAndEmit( + MaxNumBlocks, ScopeToDILocation, ScopeToVars, ScopeToAssignBlocks, + SavedLiveIns, MOutLocs, MInLocs, vlocs, MF, AllVarsNumbering, *TPC); } - delete[] MOutLocs; - delete[] MInLocs; delete MTracker; delete TTracker; @@ -3092,6 +3307,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, DebugPHINumToValue.clear(); OverlapFragments.clear(); SeenFragments.clear(); + SeenDbgPHIs.clear(); return Changed; } @@ -3193,9 +3409,10 @@ public: /// Machine location where any PHI must occur. LocIdx Loc; /// Table of live-in machine value numbers for blocks / locations. - ValueIDNum **MLiveIns; + const ValueTable *MLiveIns; - LDVSSAUpdater(LocIdx L, ValueIDNum **MLiveIns) : Loc(L), MLiveIns(MLiveIns) {} + LDVSSAUpdater(LocIdx L, const ValueTable *MLiveIns) + : Loc(L), MLiveIns(MLiveIns) {} void reset() { for (auto &Block : BlockMap) @@ -3352,11 +3569,28 @@ public: } // end namespace llvm -Optional InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF, - ValueIDNum **MLiveOuts, - ValueIDNum **MLiveIns, - MachineInstr &Here, - uint64_t InstrNum) { +Optional InstrRefBasedLDV::resolveDbgPHIs( + MachineFunction &MF, const ValueTable *MLiveOuts, + const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) { + assert(MLiveOuts && MLiveIns && + "Tried to resolve DBG_PHI before location " + "tables allocated?"); + + // This function will be called twice per DBG_INSTR_REF, and might end up + // computing lots of SSA information: memoize it. + auto SeenDbgPHIIt = SeenDbgPHIs.find(&Here); + if (SeenDbgPHIIt != SeenDbgPHIs.end()) + return SeenDbgPHIIt->second; + + Optional Result = + resolveDbgPHIsImpl(MF, MLiveOuts, MLiveIns, Here, InstrNum); + SeenDbgPHIs.insert({&Here, Result}); + return Result; +} + +Optional InstrRefBasedLDV::resolveDbgPHIsImpl( + MachineFunction &MF, const ValueTable *MLiveOuts, + const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) { // Pick out records of DBG_PHI instructions that have been observed. If there // are none, then we cannot compute a value number. auto RangePair = std::equal_range(DebugPHINumToValue.begin(), @@ -3368,17 +3602,24 @@ Optional InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF, if (LowerIt == UpperIt) return None; + // If any DBG_PHIs referred to a location we didn't understand, don't try to + // compute a value. There might be scenarios where we could recover a value + // for some range of DBG_INSTR_REFs, but at this point we can have high + // confidence that we've seen a bug. + auto DBGPHIRange = make_range(LowerIt, UpperIt); + for (const DebugPHIRecord &DBG_PHI : DBGPHIRange) + if (!DBG_PHI.ValueRead) + return None; + // If there's only one DBG_PHI, then that is our value number. if (std::distance(LowerIt, UpperIt) == 1) - return LowerIt->ValueRead; - - auto DBGPHIRange = make_range(LowerIt, UpperIt); + return *LowerIt->ValueRead; // Pick out the location (physreg, slot) where any PHIs must occur. It's // technically possible for us to merge values in different registers in each // block, but highly unlikely that LLVM will generate such code after register // allocation. - LocIdx Loc = LowerIt->ReadLoc; + LocIdx Loc = *LowerIt->ReadLoc; // We have several DBG_PHIs, and a use position (the Here inst). All each // DBG_PHI does is identify a value at a program position. We can treat each @@ -3397,7 +3638,7 @@ Optional InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF, // for the SSAUpdater. for (const auto &DBG_PHI : DBGPHIRange) { LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB); - const ValueIDNum &Num = DBG_PHI.ValueRead; + const ValueIDNum &Num = *DBG_PHI.ValueRead; AvailableValues.insert(std::make_pair(Block, Num.asU64())); } @@ -3431,7 +3672,7 @@ Optional InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF, // Define all the input DBG_PHI values in ValidatedValues. for (const auto &DBG_PHI : DBGPHIRange) { LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB); - const ValueIDNum &Num = DBG_PHI.ValueRead; + const ValueIDNum &Num = *DBG_PHI.ValueRead; ValidatedValues.insert(std::make_pair(Block, Num)); } @@ -3456,7 +3697,7 @@ Optional InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF, return None; ValueIDNum ValueToCheck; - ValueIDNum *BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()]; + const ValueTable &BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()]; auto VVal = ValidatedValues.find(PHIIt.first); if (VVal == ValidatedValues.end()) { diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h index e7383209c027..70aae47c8bdc 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -10,17 +10,14 @@ #define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/UniqueVector.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/TargetFrameLowering.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "LiveDebugValues.h" @@ -171,6 +168,13 @@ public: static ValueIDNum TombstoneValue; }; +/// Type for a table of values in a block. +using ValueTable = std::unique_ptr; + +/// Type for a table-of-table-of-values, i.e., the collection of either +/// live-in or live-out values for each block in the function. +using FuncValueTable = std::unique_ptr; + /// Thin wrapper around an integer -- designed to give more type safety to /// spill location numbers. class SpillLocationNo { @@ -192,7 +196,7 @@ public: }; /// Meta qualifiers for a value. Pair of whatever expression is used to qualify -/// the the value, and Boolean of whether or not it's indirect. +/// the value, and Boolean of whether or not it's indirect. class DbgValueProperties { public: DbgValueProperties(const DIExpression *DIExpr, bool Indirect) @@ -507,7 +511,7 @@ public: /// Load values for each location from array of ValueIDNums. Take current /// bbnum just in case we read a value from a hitherto untouched register. - void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) { + void loadFromArray(ValueTable &Locs, unsigned NewCurBB) { CurBB = NewCurBB; // Iterate over all tracked locations, and load each locations live-in // value into our local index. @@ -616,7 +620,9 @@ public: void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID); /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked. - SpillLocationNo getOrTrackSpillLoc(SpillLoc L); + /// Returns None when in scenarios where a spill slot could be tracked, but + /// we would likely run into resource limitations. + Optional getOrTrackSpillLoc(SpillLoc L); // Get LocIdx of a spill ID. LocIdx getSpillMLoc(unsigned SpillID) { @@ -627,6 +633,19 @@ public: /// Return true if Idx is a spill machine location. bool isSpill(LocIdx Idx) const { return LocIdxToLocID[Idx] >= NumRegs; } + /// How large is this location (aka, how wide is a value defined there?). + unsigned getLocSizeInBits(LocIdx L) const { + unsigned ID = LocIdxToLocID[L]; + if (!isSpill(L)) { + return TRI.getRegSizeInBits(Register(ID), MF.getRegInfo()); + } else { + // The slot location on the stack is uninteresting, we care about the + // position of the value within the slot (which comes with a size). + StackSlotPos Pos = locIDToSpillIdx(ID); + return Pos.first; + } + } + MLocIterator begin() { return MLocIterator(LocIdxToIDNum, 0); } MLocIterator end() { @@ -678,7 +697,7 @@ public: /// movement of values between locations inside of a block is handled at a /// much later stage, in the TransferTracker class. MapVector Vars; - DenseMap Scopes; + SmallDenseMap Scopes; MachineBasicBlock *MBB = nullptr; const OverlapMap &OverlappingFragments; DbgValueProperties EmptyProperties; @@ -747,6 +766,11 @@ public: Scopes[Overlapped] = Loc; } } + + void clear() { + Vars.clear(); + Scopes.clear(); + } }; // XXX XXX docs @@ -844,10 +868,16 @@ private: /// Record of where we observed a DBG_PHI instruction. class DebugPHIRecord { public: - uint64_t InstrNum; ///< Instruction number of this DBG_PHI. - MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred. - ValueIDNum ValueRead; ///< The value number read by the DBG_PHI. - LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads. + /// Instruction number of this DBG_PHI. + uint64_t InstrNum; + /// Block where DBG_PHI occurred. + MachineBasicBlock *MBB; + /// The value number read by the DBG_PHI -- or None if it didn't refer to + /// a value. + Optional ValueRead; + /// Register/Stack location the DBG_PHI reads -- or None if it referred to + /// something unexpected. + Optional ReadLoc; operator unsigned() const { return InstrNum; } }; @@ -862,6 +892,12 @@ private: OverlapMap OverlapFragments; VarToFragments SeenFragments; + /// Mapping of DBG_INSTR_REF instructions to their values, for those + /// DBG_INSTR_REFs that call resolveDbgPHIs. These variable references solve + /// a mini SSA problem caused by DBG_PHIs being cloned, this collection caches + /// the result. + DenseMap> SeenDbgPHIs; + /// True if we need to examine call instructions for stack clobbers. We /// normally assume that they don't clobber SP, but stack probes on Windows /// do. @@ -873,7 +909,8 @@ private: StringRef StackProbeSymbolName; /// Tests whether this instruction is a spill to a stack slot. - bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF); + Optional isSpillInstruction(const MachineInstr &MI, + MachineFunction *MF); /// Decide if @MI is a spill instruction and return true if it is. We use 2 /// criteria to make this decision: @@ -891,11 +928,12 @@ private: /// Given a spill instruction, extract the spill slot information, ensure it's /// tracked, and return the spill number. - SpillLocationNo extractSpillBaseRegAndOffset(const MachineInstr &MI); + Optional + extractSpillBaseRegAndOffset(const MachineInstr &MI); /// Observe a single instruction while stepping through a block. - void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr, - ValueIDNum **MLiveIns = nullptr); + void process(MachineInstr &MI, const ValueTable *MLiveOuts, + const ValueTable *MLiveIns); /// Examines whether \p MI is a DBG_VALUE and notifies trackers. /// \returns true if MI was recognized and processed. @@ -903,8 +941,8 @@ private: /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers. /// \returns true if MI was recognized and processed. - bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts, - ValueIDNum **MLiveIns); + bool transferDebugInstrRef(MachineInstr &MI, const ValueTable *MLiveOuts, + const ValueTable *MLiveIns); /// Stores value-information about where this PHI occurred, and what /// instruction number is associated with it. @@ -936,9 +974,15 @@ private: /// \p InstrNum Debug instruction number defined by DBG_PHI instructions. /// \returns The machine value number at position Here, or None. Optional resolveDbgPHIs(MachineFunction &MF, - ValueIDNum **MLiveOuts, - ValueIDNum **MLiveIns, MachineInstr &Here, - uint64_t InstrNum); + const ValueTable *MLiveOuts, + const ValueTable *MLiveIns, + MachineInstr &Here, uint64_t InstrNum); + + Optional resolveDbgPHIsImpl(MachineFunction &MF, + const ValueTable *MLiveOuts, + const ValueTable *MLiveIns, + MachineInstr &Here, + uint64_t InstrNum); /// Step through the function, recording register definitions and movements /// in an MLocTracker. Convert the observations into a per-block transfer @@ -954,8 +998,8 @@ private: /// live-out arrays to the (initialized to zero) multidimensional arrays in /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block /// number, the inner by LocIdx. - void buildMLocValueMap(MachineFunction &MF, ValueIDNum **MInLocs, - ValueIDNum **MOutLocs, + void buildMLocValueMap(MachineFunction &MF, FuncValueTable &MInLocs, + FuncValueTable &MOutLocs, SmallVectorImpl &MLocTransfer); /// Examine the stack indexes (i.e. offsets within the stack) to find the @@ -966,7 +1010,7 @@ private: /// the IDF of each register. void placeMLocPHIs(MachineFunction &MF, SmallPtrSetImpl &AllBlocks, - ValueIDNum **MInLocs, + FuncValueTable &MInLocs, SmallVectorImpl &MLocTransfer); /// Propagate variable values to blocks in the common case where there's @@ -997,7 +1041,7 @@ private: /// is true, revisiting this block is necessary. bool mlocJoin(MachineBasicBlock &MBB, SmallPtrSet &Visited, - ValueIDNum **OutLocs, ValueIDNum *InLocs); + FuncValueTable &OutLocs, ValueTable &InLocs); /// Produce a set of blocks that are in the current lexical scope. This means /// those blocks that contain instructions "in" the scope, blocks where @@ -1025,11 +1069,11 @@ private: /// scope, but which do contain DBG_VALUEs, which VarLocBasedImpl tracks /// locations through. void buildVLocValueMap(const DILocation *DILoc, - const SmallSet &VarsWeCareAbout, - SmallPtrSetImpl &AssignBlocks, - LiveInsT &Output, ValueIDNum **MOutLocs, - ValueIDNum **MInLocs, - SmallVectorImpl &AllTheVLocs); + const SmallSet &VarsWeCareAbout, + SmallPtrSetImpl &AssignBlocks, + LiveInsT &Output, FuncValueTable &MOutLocs, + FuncValueTable &MInLocs, + SmallVectorImpl &AllTheVLocs); /// Attempt to eliminate un-necessary PHIs on entry to a block. Examines the /// live-in values coming from predecessors live-outs, and replaces any PHIs @@ -1047,21 +1091,9 @@ private: /// \returns Value ID of a machine PHI if an appropriate one is available. Optional pickVPHILoc(const MachineBasicBlock &MBB, const DebugVariable &Var, - const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs, + const LiveIdxT &LiveOuts, FuncValueTable &MOutLocs, const SmallVectorImpl &BlockOrders); - /// Given the solutions to the two dataflow problems, machine value locations - /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the - /// TransferTracker class over the function to produce live-in and transfer - /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the - /// order given by AllVarsNumbering -- this could be any stable order, but - /// right now "order of appearence in function, when explored in RPO", so - /// that we can compare explictly against VarLocBasedImpl. - void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns, - ValueIDNum **MOutLocs, ValueIDNum **MInLocs, - DenseMap &AllVarsNumbering, - const TargetPassConfig &TPC); - /// Take collections of DBG_VALUE instructions stored in TTracker, and /// install them into their output blocks. Preserves a stable order of /// DBG_VALUEs produced (which would otherwise cause nondeterminism) through @@ -1072,6 +1104,28 @@ private: /// RPOT block ordering. void initialSetup(MachineFunction &MF); + /// Produce a map of the last lexical scope that uses a block, using the + /// scopes DFSOut number. Mapping is block-number to DFSOut. + /// \p EjectionMap Pre-allocated vector in which to install the built ma. + /// \p ScopeToDILocation Mapping of LexicalScopes to their DILocations. + /// \p AssignBlocks Map of blocks where assignments happen for a scope. + void makeDepthFirstEjectionMap(SmallVectorImpl &EjectionMap, + const ScopeToDILocT &ScopeToDILocation, + ScopeToAssignBlocksT &AssignBlocks); + + /// When determining per-block variable values and emitting to DBG_VALUEs, + /// this function explores by lexical scope depth. Doing so means that per + /// block information can be fully computed before exploration finishes, + /// allowing us to emit it and free data structures earlier than otherwise. + /// It's also good for locality. + bool depthFirstVLocAndEmit( + unsigned MaxNumBlocks, const ScopeToDILocT &ScopeToDILocation, + const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToBlocks, + LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs, + SmallVectorImpl &AllTheVLocs, MachineFunction &MF, + DenseMap &AllVarsNumbering, + const TargetPassConfig &TPC); + bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree, TargetPassConfig *TPC, unsigned InputBBLimit, unsigned InputDbgValLimit) override; diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp index 8f697611a82c..141008ac2296 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp @@ -8,14 +8,16 @@ #include "LiveDebugValues.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetMachine.h" /// \file LiveDebugValues.cpp /// @@ -65,7 +67,7 @@ public: static char ID; LiveDebugValues(); - ~LiveDebugValues() {} + ~LiveDebugValues() = default; /// Calculate the liveness information for the given machine function. bool runOnMachineFunction(MachineFunction &MF) override; @@ -123,6 +125,11 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { } bool llvm::debuginfoShouldUseDebugInstrRef(const Triple &T) { + // Enable by default on x86_64, disable if explicitly turned off on cmdline. + if (T.getArch() == llvm::Triple::x86_64 && + ValueTrackingVariableLocations != cl::boolOrDefault::BOU_FALSE) + return true; + // Enable if explicitly requested on command line. return ValueTrackingVariableLocations == cl::boolOrDefault::BOU_TRUE; } diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h index 8f0b2ec3e1fc..6cc1685c0022 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h +++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h @@ -9,12 +9,11 @@ #ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H #define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/ADT/Triple.h" - namespace llvm { +class MachineDominatorTree; +class MachineFunction; +class TargetPassConfig; +class Triple; // Inline namespace for types / symbols shared between different // LiveDebugValues implementations. @@ -28,7 +27,7 @@ public: virtual bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree, TargetPassConfig *TPC, unsigned InputBBLimit, unsigned InputDbgValLimit) = 0; - virtual ~LDVImpl() {} + virtual ~LDVImpl() = default; }; } // namespace SharedLiveDebugValues diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index 42a0967bce3f..24c00b8a10ec 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -118,18 +118,15 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/UniqueVector.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -137,16 +134,11 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" @@ -922,14 +914,14 @@ private: std::unique_ptr &VLS = Locs[MBB]; if (!VLS) VLS = std::make_unique(Alloc); - return *VLS.get(); + return *VLS; } const VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB, const VarLocInMBB &Locs) const { auto It = Locs.find(MBB); assert(It != Locs.end() && "MBB not in map"); - return *It->second.get(); + return *It->second; } /// Tests whether this instruction is a spill to a stack location. @@ -1035,9 +1027,9 @@ public: // Implementation //===----------------------------------------------------------------------===// -VarLocBasedLDV::VarLocBasedLDV() { } +VarLocBasedLDV::VarLocBasedLDV() = default; -VarLocBasedLDV::~VarLocBasedLDV() { } +VarLocBasedLDV::~VarLocBasedLDV() = default; /// Erase a variable from the set of open ranges, and additionally erase any /// fragments that may overlap it. If the VarLoc is a backup location, erase @@ -1948,7 +1940,7 @@ bool VarLocBasedLDV::join( // Just copy over the Out locs to incoming locs for the first visited // predecessor, and for all other predecessors join the Out locs. - VarLocSet &OutLocVLS = *OL->second.get(); + VarLocSet &OutLocVLS = *OL->second; if (!NumVisited) InLocsT = OutLocVLS; else @@ -2007,7 +1999,7 @@ void VarLocBasedLDV::flushPendingLocs(VarLocInMBB &PendingInLocs, for (auto &Iter : PendingInLocs) { // Map is keyed on a constant pointer, unwrap it so we can insert insts. auto &MBB = const_cast(*Iter.first); - VarLocSet &Pending = *Iter.second.get(); + VarLocSet &Pending = *Iter.second; SmallVector VarLocs; collectAllVarLocs(VarLocs, Pending, VarLocIDs); diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index 6d806135240e..35cf25330186 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" @@ -38,11 +39,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -50,15 +49,12 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Metadata.h" #include "llvm/InitializePasses.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include #include #include @@ -976,7 +972,7 @@ void UserValue::extendDef( if (Segment->end < Stop) { Stop = Segment->end; Kills = {Stop, {LII.first}}; - } else if (Segment->end == Stop && Kills.hasValue()) { + } else if (Segment->end == Stop && Kills) { // If multiple locations end at the same place, track all of them in // Kills. Kills->second.push_back(LII.first); @@ -1854,16 +1850,33 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { const TargetRegisterClass *TRC = MRI.getRegClass(Reg); unsigned SpillSize, SpillOffset; - // Test whether this location is legal with the given subreg. + unsigned regSizeInBits = TRI->getRegSizeInBits(*TRC); + if (SubReg) + regSizeInBits = TRI->getSubRegIdxSize(SubReg); + + // Test whether this location is legal with the given subreg. If the + // subregister has a nonzero offset, drop this location, it's too complex + // to describe. (TODO: future work). bool Success = TII->getStackSlotRange(TRC, SubReg, SpillSize, SpillOffset, *MF); - if (Success) { + if (Success && SpillOffset == 0) { auto Builder = BuildMI(*OrigMBB, OrigMBB->begin(), DebugLoc(), TII->get(TargetOpcode::DBG_PHI)); Builder.addFrameIndex(VRM->getStackSlot(Reg)); Builder.addImm(InstNum); + // Record how large the original value is. The stack slot might be + // merged and altered during optimisation, but we will want to know how + // large the value is, at this DBG_PHI. + Builder.addImm(regSizeInBits); + } + + LLVM_DEBUG( + if (SpillOffset != 0) { + dbgs() << "DBG_PHI for Vreg " << Reg << " subreg " << SubReg << + " has nonzero offset\n"; } + ); } // If there was no mapping for a value ID, it's optimized out. Create no // DBG_PHI, and any variables using this value will become optimized out. diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp index 9ded0fb6ae0a..9378aaeb181c 100644 --- a/llvm/lib/CodeGen/LiveInterval.cpp +++ b/llvm/lib/CodeGen/LiveInterval.cpp @@ -348,23 +348,8 @@ private: //===----------------------------------------------------------------------===// LiveRange::iterator LiveRange::find(SlotIndex Pos) { - // This algorithm is basically std::upper_bound. - // Unfortunately, std::upper_bound cannot be used with mixed types until we - // adopt C++0x. Many libraries can do it, but not all. - if (empty() || Pos >= endIndex()) - return end(); - iterator I = begin(); - size_t Len = size(); - do { - size_t Mid = Len >> 1; - if (Pos < I[Mid].end) { - Len = Mid; - } else { - I += Mid + 1; - Len -= Mid + 1; - } - } while (Len); - return I; + return llvm::partition_point(*this, + [&](const Segment &X) { return X.end <= Pos; }); } VNInfo *LiveRange::createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc) { diff --git a/llvm/lib/CodeGen/LiveIntervalCalc.cpp b/llvm/lib/CodeGen/LiveIntervalCalc.cpp index 2756086cb8b1..3176d73b35f6 100644 --- a/llvm/lib/CodeGen/LiveIntervalCalc.cpp +++ b/llvm/lib/CodeGen/LiveIntervalCalc.cpp @@ -11,13 +11,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveIntervalCalc.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -25,12 +21,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include #include -#include -#include -#include using namespace llvm; diff --git a/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/llvm/lib/CodeGen/LiveIntervalUnion.cpp index 50b31e1eb247..11a4ecf0bef9 100644 --- a/llvm/lib/CodeGen/LiveIntervalUnion.cpp +++ b/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -26,7 +26,8 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" // Merge a LiveInterval's segments. Guarantee no overlaps. -void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) { +void LiveIntervalUnion::unify(const LiveInterval &VirtReg, + const LiveRange &Range) { if (Range.empty()) return; ++Tag; @@ -53,7 +54,8 @@ void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) { } // Remove a live virtual register's segments from this union. -void LiveIntervalUnion::extract(LiveInterval &VirtReg, const LiveRange &Range) { +void LiveIntervalUnion::extract(const LiveInterval &VirtReg, + const LiveRange &Range) { if (Range.empty()) return; ++Tag; @@ -99,7 +101,7 @@ void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) { } #endif //!NDEBUG -LiveInterval *LiveIntervalUnion::getOneVReg() const { +const LiveInterval *LiveIntervalUnion::getOneVReg() const { if (empty()) return nullptr; for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) { @@ -111,7 +113,8 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const { // Scan the vector of interfering virtual registers in this union. Assume it's // quite small. -bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { +bool LiveIntervalUnion::Query::isSeenInterference( + const LiveInterval *VirtReg) const { return is_contained(InterferingVRegs, VirtReg); } @@ -147,14 +150,14 @@ LiveIntervalUnion::Query::collectInterferingVRegs(unsigned MaxInterferingRegs) { } LiveRange::const_iterator LREnd = LR->end(); - LiveInterval *RecentReg = nullptr; + const LiveInterval *RecentReg = nullptr; while (LiveUnionI.valid()) { assert(LRI != LREnd && "Reached end of LR"); // Check for overlapping interference. while (LRI->start < LiveUnionI.stop() && LRI->end > LiveUnionI.start()) { // This is an overlap, record the interfering register. - LiveInterval *VReg = LiveUnionI.value(); + const LiveInterval *VReg = LiveUnionI.value(); if (VReg != RecentReg && !isSeenInterference(VReg)) { RecentReg = VReg; InterferingVRegs.push_back(VReg); diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 9571afa434c1..7d825a8bf853 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -33,22 +33,20 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Statepoint.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" -#include "llvm/Support/BlockFrequency.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/CodeGen/StackMaps.h" #include #include #include @@ -149,7 +147,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { getRegUnit(i); } LLVM_DEBUG(dump()); - return true; + return false; } void LiveIntervals::print(raw_ostream &OS, const Module* ) const { @@ -500,7 +498,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Create new live ranges with only minimal live segments per def. LiveRange NewLR; - createSegmentsForValues(NewLR, make_range(li->vni_begin(), li->vni_end())); + createSegmentsForValues(NewLR, li->vnis()); extendSegmentsToUses(NewLR, WorkList, Reg, LaneBitmask::getNone()); // Move the trimmed segments back. @@ -604,7 +602,7 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, Register Reg) { // Create a new live ranges with only minimal live segments per def. LiveRange NewLR; - createSegmentsForValues(NewLR, make_range(SR.vni_begin(), SR.vni_end())); + createSegmentsForValues(NewLR, SR.vnis()); extendSegmentsToUses(NewLR, WorkList, Reg, SR.LaneMask); // Move the trimmed ranges back. @@ -913,11 +911,11 @@ static bool hasLiveThroughUse(const MachineInstr *MI, Register Reg) { return false; } -bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, +bool LiveIntervals::checkRegMaskInterference(const LiveInterval &LI, BitVector &UsableRegs) { if (LI.empty()) return false; - LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end(); + LiveInterval::const_iterator LiveI = LI.begin(), LiveE = LI.end(); // Use a smaller arrays for local live ranges. ArrayRef Slots; diff --git a/llvm/lib/CodeGen/LiveRangeCalc.cpp b/llvm/lib/CodeGen/LiveRangeCalc.cpp index 3ef28042acb0..26f6e1ede1ad 100644 --- a/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -20,11 +20,9 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index 05768140cbdf..58eb4110f153 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -371,7 +371,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, const MachineOperand &MO = MI->getOperand(i-1); if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) continue; - MI->RemoveOperand(i-1); + MI->removeOperand(i-1); } LLVM_DEBUG(dbgs() << "Converted physregs to:\t" << *MI); } else { diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp index 054f4370b609..8e56985246db 100644 --- a/llvm/lib/CodeGen/LiveRangeShrink.cpp +++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp @@ -23,7 +23,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp index 4c0172a930b5..6ca7f00a7885 100644 --- a/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -78,13 +78,13 @@ void LiveRegMatrix::releaseMemory() { template static bool foreachUnit(const TargetRegisterInfo *TRI, - LiveInterval &VRegInterval, MCRegister PhysReg, + const LiveInterval &VRegInterval, MCRegister PhysReg, Callable Func) { if (VRegInterval.hasSubRanges()) { for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { unsigned Unit = (*Units).first; LaneBitmask Mask = (*Units).second; - for (LiveInterval::SubRange &S : VRegInterval.subranges()) { + for (const LiveInterval::SubRange &S : VRegInterval.subranges()) { if ((S.LaneMask & Mask).any()) { if (Func(Unit, S)) return true; @@ -101,7 +101,7 @@ static bool foreachUnit(const TargetRegisterInfo *TRI, return false; } -void LiveRegMatrix::assign(LiveInterval &VirtReg, MCRegister PhysReg) { +void LiveRegMatrix::assign(const LiveInterval &VirtReg, MCRegister PhysReg) { LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg(), TRI) << " to " << printReg(PhysReg, TRI) << ':'); assert(!VRM->hasPhys(VirtReg.reg()) && "Duplicate VirtReg assignment"); @@ -118,7 +118,7 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, MCRegister PhysReg) { LLVM_DEBUG(dbgs() << '\n'); } -void LiveRegMatrix::unassign(LiveInterval &VirtReg) { +void LiveRegMatrix::unassign(const LiveInterval &VirtReg) { Register PhysReg = VRM->getPhys(VirtReg.reg()); LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg(), TRI) << " from " << printReg(PhysReg, TRI) << ':'); @@ -143,7 +143,7 @@ bool LiveRegMatrix::isPhysRegUsed(MCRegister PhysReg) const { return false; } -bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, +bool LiveRegMatrix::checkRegMaskInterference(const LiveInterval &VirtReg, MCRegister PhysReg) { // Check if the cached information is valid. // The same BitVector can be reused for all PhysRegs. @@ -161,7 +161,7 @@ bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg)); } -bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, +bool LiveRegMatrix::checkRegUnitInterference(const LiveInterval &VirtReg, MCRegister PhysReg) { if (VirtReg.empty()) return false; @@ -183,7 +183,8 @@ LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR, } LiveRegMatrix::InterferenceKind -LiveRegMatrix::checkInterference(LiveInterval &VirtReg, MCRegister PhysReg) { +LiveRegMatrix::checkInterference(const LiveInterval &VirtReg, + MCRegister PhysReg) { if (VirtReg.empty()) return IK_Free; @@ -237,7 +238,7 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End, } Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const { - LiveInterval *VRegInterval = nullptr; + const LiveInterval *VRegInterval = nullptr; for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) { if ((VRegInterval = Matrix[*Unit].getOneVReg())) return VRegInterval->reg(); diff --git a/llvm/lib/CodeGen/LiveStacks.cpp b/llvm/lib/CodeGen/LiveStacks.cpp index 8df84ebf4f06..8fc5a929d77b 100644 --- a/llvm/lib/CodeGen/LiveStacks.cpp +++ b/llvm/lib/CodeGen/LiveStacks.cpp @@ -13,12 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveStacks.h" -#include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/InitializePasses.h" using namespace llvm; #define DEBUG_TYPE "livestacks" diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 37fd3e4853ac..5f54d7cc8472 100644 --- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -23,7 +23,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -118,7 +117,7 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { // If the target doesn't want/need this pass, or if there are no locals // to consider, early exit. if (LocalObjectCount == 0 || !TRI->requiresVirtualBaseRegisters(MF)) - return true; + return false; // Make sure we have enough space to store the local offsets. LocalOffsets.resize(MFI.getObjectIndexEnd()); @@ -344,7 +343,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { MachineBasicBlock *Entry = &Fn.front(); - unsigned BaseReg = 0; + Register BaseReg; int64_t BaseOffset = 0; // Loop through the frame references and allocate for them as necessary. @@ -414,20 +413,14 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { continue; } - const MachineFunction *MF = MI.getMF(); - const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); - BaseReg = Fn.getRegInfo().createVirtualRegister(RC); - - LLVM_DEBUG(dbgs() << " Materializing base register" - << " at frame local offset " - << LocalOffset + InstrOffset); - // Tell the target to insert the instruction to initialize // the base register. // MachineBasicBlock::iterator InsertionPt = Entry->begin(); BaseReg = TRI->materializeFrameBaseRegister(Entry, FrameIdx, InstrOffset); - LLVM_DEBUG(dbgs() << " into " << printReg(BaseReg, TRI) << '\n'); + LLVM_DEBUG(dbgs() << " Materialized base register at frame local offset " + << LocalOffset + InstrOffset + << " into " << printReg(BaseReg, TRI) << '\n'); // The base register already includes any offset specified // by the instruction, so account for that so it doesn't get @@ -437,7 +430,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { ++NumBaseRegisters; UsedBaseReg = true; } - assert(BaseReg != 0 && "Unable to allocate virtual base register!"); + assert(BaseReg && "Unable to allocate virtual base register!"); // Modify the instruction to use the new base register rather // than the frame index operand. diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp index dce64ab9f5ca..b47c96e50831 100644 --- a/llvm/lib/CodeGen/LowLevelType.cpp +++ b/llvm/lib/CodeGen/LowLevelType.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/APFloat.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) { diff --git a/llvm/lib/CodeGen/LowerEmuTLS.cpp b/llvm/lib/CodeGen/LowerEmuTLS.cpp index a06d1d6255c7..984dc452fbfd 100644 --- a/llvm/lib/CodeGen/LowerEmuTLS.cpp +++ b/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index 3ec8c627f131..eea24d8e9353 100644 --- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -27,15 +27,12 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include - using namespace llvm; #define DEBUG_TYPE "mir-canonicalizer" @@ -106,10 +103,7 @@ rescheduleLexographically(std::vector instructions, StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); } - llvm::sort(StringInstrMap, - [](const StringInstrPair &a, const StringInstrPair &b) -> bool { - return (a.first < b.first); - }); + llvm::sort(StringInstrMap, llvm::less_first()); for (auto &II : StringInstrMap) { diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp index bf78594e9b23..3152102410d7 100644 --- a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp +++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp @@ -15,12 +15,14 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" -#include using namespace llvm; using namespace sampleprof; @@ -68,6 +70,8 @@ static uint64_t getCallStackHash(const MachineBasicBlock &BB, bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) { if (!EnableFSDiscriminator) return false; + if (!MF.getFunction().isDebugInfoForProfiling()) + return false; bool Changed = false; using LocationDiscriminator = std::tuple; @@ -131,6 +135,7 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) { if (Changed) { createFSDiscriminatorVariable(MF.getFunction().getParent()); LLVM_DEBUG(dbgs() << "Num of FS Discriminators: " << NumNewD << "\n"); + (void) NumNewD; } return Changed; diff --git a/llvm/lib/CodeGen/MIRNamerPass.cpp b/llvm/lib/CodeGen/MIRNamerPass.cpp index 9f61dd9ef243..bc65700aba06 100644 --- a/llvm/lib/CodeGen/MIRNamerPass.cpp +++ b/llvm/lib/CodeGen/MIRNamerPass.cpp @@ -18,11 +18,7 @@ #include "MIRVRegNamerUtils.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 0ca820f160aa..b0daa20913f5 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" -#include #include #include #include @@ -250,7 +249,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("dereferenceable", MIToken::kw_dereferenceable) .Case("invariant", MIToken::kw_invariant) .Case("align", MIToken::kw_align) - .Case("basealign", MIToken::kw_align) + .Case("basealign", MIToken::kw_basealign) .Case("addrspace", MIToken::kw_addrspace) .Case("stack", MIToken::kw_stack) .Case("got", MIToken::kw_got) diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 6477965bdc21..40ae7053ea09 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -26,8 +26,6 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -38,6 +36,8 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -60,7 +60,6 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" @@ -69,10 +68,8 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" -#include #include #include #include @@ -744,7 +741,7 @@ bool MIParser::parseBasicBlockDefinition( MBB->setIsEHPad(IsLandingPad); MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget); MBB->setIsEHFuncletEntry(IsEHFuncletEntry); - if (SectionID.hasValue()) { + if (SectionID) { MBB->setSectionID(SectionID.getValue()); MF.setBBSectionsType(BasicBlockSection::List); } @@ -1094,11 +1091,23 @@ bool MIParser::parse(MachineInstr *&MI) { return true; } - // TODO: Check for extraneous machine operands. MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true); MI->setFlags(Flags); - for (const auto &Operand : Operands) + + unsigned NumExplicitOps = 0; + for (const auto &Operand : Operands) { + bool IsImplicitOp = Operand.Operand.isReg() && Operand.Operand.isImplicit(); + if (!IsImplicitOp) { + if (!MCID.isVariadic() && NumExplicitOps >= MCID.getNumOperands() && + !Operand.Operand.isValidExcessOperand()) + return error(Operand.Begin, "too many operands for instruction"); + + ++NumExplicitOps; + } + MI->addOperand(MF, Operand.Operand); + } + if (assignRegisterTies(*MI, Operands)) return true; if (PreInstrSymbol) @@ -1609,7 +1618,7 @@ bool MIParser::assignRegisterTies(MachineInstr &MI, continue; // The parser ensures that this operand is a register use, so we just have // to check the tied-def operand. - unsigned DefIdx = Operands[I].TiedDefIdx.getValue(); + unsigned DefIdx = *Operands[I].TiedDefIdx; if (DefIdx >= E) return error(Operands[I].Begin, Twine("use of invalid tied-def operand index '" + @@ -1714,6 +1723,15 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, RegInfo->Kind == VRegInfo::REGBANK) return error("generic virtual registers must have a type"); } + + if (Flags & RegState::Define) { + if (Flags & RegState::Kill) + return error("cannot have a killed def operand"); + } else { + if (Flags & RegState::Dead) + return error("cannot have a dead use operand"); + } + Dest = MachineOperand::CreateReg( Reg, Flags & RegState::Define, Flags & RegState::Implicit, Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef, @@ -2689,19 +2707,19 @@ bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) { return true; uint32_t *Mask = MF.allocateRegMask(); - while (true) { - if (Token.isNot(MIToken::NamedRegister)) - return error("expected a named register"); - Register Reg; - if (parseNamedRegister(Reg)) - return true; - lex(); - Mask[Reg / 32] |= 1U << (Reg % 32); + do { + if (Token.isNot(MIToken::rparen)) { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a named register"); + Register Reg; + if (parseNamedRegister(Reg)) + return true; + lex(); + Mask[Reg / 32] |= 1U << (Reg % 32); + } + // TODO: Report an error if the same register is used more than once. - if (Token.isNot(MIToken::comma)) - break; - lex(); - } + } while (consumeIfPresent(MIToken::comma)); if (expectAndConsume(MIToken::rparen)) return true; @@ -3269,11 +3287,21 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { MDNode *Range = nullptr; while (consumeIfPresent(MIToken::comma)) { switch (Token.kind()) { - case MIToken::kw_align: + case MIToken::kw_align: { // align is printed if it is different than size. - if (parseAlignment(BaseAlignment)) + uint64_t Alignment; + if (parseAlignment(Alignment)) return true; + if (Ptr.Offset & (Alignment - 1)) { + // MachineMemOperand::getAlign never returns a value greater than the + // alignment of offset, so this just guards against hand-written MIR + // that specifies a large "align" value when it should probably use + // "basealign" instead. + return error("specified alignment is more aligned than offset"); + } + BaseAlignment = Alignment; break; + } case MIToken::kw_basealign: // basealign is printed if it is different than align. if (parseAlignment(BaseAlignment)) diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index f144639770bc..4944cb46c5b5 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -13,13 +13,10 @@ #include "llvm/CodeGen/MIRParser/MIRParser.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -29,7 +26,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" @@ -46,6 +43,8 @@ using namespace llvm; namespace llvm { +class MDNode; +class RegisterBank; /// This class implements the parsing of LLVM IR that's embedded inside a MIR /// file. @@ -459,6 +458,12 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); MF.setHasWinCFI(YamlMF.HasWinCFI); + MF.setCallsEHReturn(YamlMF.CallsEHReturn); + MF.setCallsUnwindInit(YamlMF.CallsUnwindInit); + MF.setHasEHCatchret(YamlMF.HasEHCatchret); + MF.setHasEHScopes(YamlMF.HasEHScopes); + MF.setHasEHFunclets(YamlMF.HasEHFunclets); + if (YamlMF.Legalized) MF.getProperties().set(MachineFunctionProperties::Property::Legalized); if (YamlMF.RegBankSelected) @@ -638,7 +643,7 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS, // be saved for the caller). if (YamlMF.CalleeSavedRegisters) { SmallVector CalleeSavedRegisters; - for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) { + for (const auto &RegSource : *YamlMF.CalleeSavedRegisters) { Register Reg; if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error)) return error(Error, RegSource.SourceRange); @@ -809,7 +814,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, Object.CalleeSavedRestored, ObjectIdx)) return true; if (Object.LocalOffset) - MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue()); + MFI.mapLocalFrameObject(ObjectIdx, *Object.LocalOffset); if (parseStackObjectsDebugInfo(PFS, Object, ObjectIdx)) return true; } @@ -826,6 +831,15 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, return error(Error, YamlMFI.StackProtector.SourceRange); MFI.setStackProtectorIndex(FI); } + + if (!YamlMFI.FunctionContext.Value.empty()) { + SMDiagnostic Error; + int FI; + if (parseStackObjectReference(PFS, FI, YamlMFI.FunctionContext.Value, Error)) + return error(Error, YamlMFI.FunctionContext.SourceRange); + MFI.setFunctionContextIndex(FI); + } + return false; } @@ -909,7 +923,7 @@ bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS, return error(Error, YamlConstant.Value.SourceRange); const Align PrefTypeAlign = M.getDataLayout().getPrefTypeAlign(Value->getType()); - const Align Alignment = YamlConstant.Alignment.getValueOr(PrefTypeAlign); + const Align Alignment = YamlConstant.Alignment.value_or(PrefTypeAlign); unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment); if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index)) .second) @@ -1023,7 +1037,7 @@ SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error, MIRParser::MIRParser(std::unique_ptr Impl) : Impl(std::move(Impl)) {} -MIRParser::~MIRParser() {} +MIRParser::~MIRParser() = default; std::unique_ptr MIRParser::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) { diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index dc72f83ad0e4..25823b1567f7 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -13,14 +13,11 @@ #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -32,29 +29,19 @@ #include "llvm/CodeGen/MachineModuleSlotTracker.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRPrintingPasses.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/Value.h" #include "llvm/MC/LaneBitmask.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDwarf.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -63,7 +50,6 @@ #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include #include @@ -209,6 +195,12 @@ void MIRPrinter::print(const MachineFunction &MF) { YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); YamlMF.HasWinCFI = MF.hasWinCFI(); + YamlMF.CallsEHReturn = MF.callsEHReturn(); + YamlMF.CallsUnwindInit = MF.callsUnwindInit(); + YamlMF.HasEHCatchret = MF.hasEHCatchret(); + YamlMF.HasEHScopes = MF.hasEHScopes(); + YamlMF.HasEHFunclets = MF.hasEHFunclets(); + YamlMF.Legalized = MF.getProperties().hasProperty( MachineFunctionProperties::Property::Legalized); YamlMF.RegBankSelected = MF.getProperties().hasProperty( @@ -489,6 +481,12 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, .printStackObjectReference(MFI.getStackProtectorIndex()); } + if (MFI.hasFunctionContextIndex()) { + raw_string_ostream StrOS(YMF.FrameInfo.FunctionContext.Value); + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .printStackObjectReference(MFI.getFunctionContextIndex()); + } + // Print the debug variable information. for (const MachineFunction::VariableDbgInfo &DebugVar : MF.getVariableDbgInfo()) { @@ -693,11 +691,11 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { // Print the live in registers. const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - if (MRI.tracksLiveness() && !MBB.livein_empty()) { + if (!MBB.livein_empty()) { const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); OS.indent(2) << "liveins: "; bool First = true; - for (const auto &LI : MBB.liveins()) { + for (const auto &LI : MBB.liveins_dbg()) { if (!First) OS << ", "; First = false; diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp index b742ad9823c9..a8996a586909 100644 --- a/llvm/lib/CodeGen/MIRSampleProfile.cpp +++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -15,7 +15,15 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index 5862504109f0..a2abe71a6bd7 100644 --- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -10,7 +10,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineStableHash.h" #include "llvm/IR/Constants.h" -#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp index 33782c755eb0..7daf9025d303 100644 --- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -10,17 +10,19 @@ // //===----------------------------------------------------------------------===// +#include "AllocationOrder.h" #include "RegAllocEvictionAdvisor.h" #include "RegAllocGreedy.h" -#include "RegAllocScore.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MLModelRunner.h" +#include "llvm/Analysis/TensorSpec.h" +#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API) #include "llvm/Analysis/ModelUnderTrainingRunner.h" #include "llvm/Analysis/NoInferenceModelRunner.h" +#endif #include "llvm/Analysis/ReleaseModeModelRunner.h" -#include "llvm/Analysis/Utils/TFUtils.h" #include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -28,13 +30,11 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/Config/config.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetMachine.h" #include #include @@ -46,10 +46,16 @@ using namespace llvm; // Generated header in release (AOT) mode #if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) #include "RegallocEvictModel.h" +using CompiledModelType = RegallocEvictModel; +#else +using CompiledModelType = NoopSavedModelImpl; #endif // Options that only make sense in development mode #ifdef LLVM_HAVE_TF_API +#include "RegAllocScore.h" +#include "llvm/Analysis/Utils/TFUtils.h" + static cl::opt TrainingLog( "regalloc-training-log", cl::Hidden, cl::desc("Training log for the register allocator eviction model")); @@ -60,6 +66,8 @@ static cl::opt ModelUnderTraining( #endif // #ifdef LLVM_HAVE_TF_API +extern cl::opt EvictInterferenceCutoff; + /// The score injection pass. /// This pass calculates the score for a function and inserts it in the log, but /// this happens only in development mode. It's a no-op otherwise. @@ -240,8 +248,8 @@ using FeaturesListNormalizer = std::array; /// The ML evictor (commonalities between release and development mode) class MLEvictAdvisor : public RegAllocEvictionAdvisor { public: - MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, MLModelRunner *Runner, - const MachineBlockFrequencyInfo &MBFI, + MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, + MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI, const MachineLoopInfo &Loops); protected: @@ -257,14 +265,16 @@ protected: /// if we're just capturing the log of the default advisor, it needs to call /// the latter instead, so we need to pass all the necessary parameters for /// it. In the development case, it will also log. - virtual int64_t tryFindEvictionCandidatePosition( - LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit, - uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const; + virtual int64_t + tryFindEvictionCandidatePosition(const LiveInterval &VirtReg, + const AllocationOrder &Order, + unsigned OrderLimit, uint8_t CostPerUseLimit, + const SmallVirtRegSet &FixedRegisters) const; /// Load the features of the given VirtReg (allocated or not) at column Pos, /// but if that can't be evicted, return false instead. bool - loadInterferenceFeatures(LiveInterval &VirtReg, MCRegister PhysReg, + loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, const SmallVirtRegSet &FixedRegisters, std::array &Largest, size_t Pos) const; @@ -273,24 +283,24 @@ private: static float getInitialQueueSize(const MachineFunction &MF); MCRegister tryFindEvictionCandidate( - LiveInterval &VirtReg, const AllocationOrder &Order, + const LiveInterval &VirtReg, const AllocationOrder &Order, uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const override; - void extractFeatures(const SmallVectorImpl &Intervals, + void extractFeatures(const SmallVectorImpl &Intervals, std::array &Largest, size_t Pos, int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const; // Point-in-time: we didn't learn this, so we always delegate to the default. bool canEvictHintInterference( - LiveInterval &VirtReg, MCRegister PhysReg, + const LiveInterval &VirtReg, MCRegister PhysReg, const SmallVirtRegSet &FixedRegisters) const override { return getDefaultAdvisor().canEvictHintInterference(VirtReg, PhysReg, FixedRegisters); } - const LIFeatureComponents + const LIFeatureComponents & getLIFeatureComponents(const LiveInterval &LI) const; // Hold on to a default advisor for: @@ -306,17 +316,21 @@ private: // This could be static and shared, but its initialization is non-trivial. std::bitset DoNotNormalize; const float InitialQSize; + + using RegID = unsigned; + mutable DenseMap CachedFeatures; }; +#define _DECL_FEATURES(type, name, shape, _) \ + TensorSpec::createSpec(#name, shape), + +static const std::vector InputFeatures{ + {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}, +}; +#undef _DECL_FEATURES // =================================== // Release (AOT) - specifics // =================================== -#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) -const std::array FeatureNames{ -#define _GETNAME(_, NAME, __, ___) #NAME, - RA_EVICT_FEATURES_LIST(_GETNAME) -#undef _GETNAME -}; class ReleaseModeEvictionAdvisorAnalysis final : public RegAllocEvictionAdvisorAnalysis { public: @@ -335,17 +349,16 @@ private: } std::unique_ptr - getAdvisor(MachineFunction &MF, const RAGreedy &RA) override { + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { if (!Runner) - Runner = std::make_unique>( - MF.getFunction().getContext(), FeatureNames, DecisionName); + Runner = std::make_unique>( + MF.getFunction().getContext(), InputFeatures, DecisionName); return std::make_unique( MF, RA, Runner.get(), getAnalysis(), getAnalysis()); } - std::unique_ptr> Runner; + std::unique_ptr> Runner; }; -#endif // =================================== // Development mode-specifics @@ -353,13 +366,6 @@ private: // // Features we log #ifdef LLVM_HAVE_TF_API -#define _DECL_FEATURES(type, name, shape, _) \ - TensorSpec::createSpec(#name, shape), - -static const std::vector InputFeatures{ - {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}, -}; -#undef _DECL_FEATURES static const TensorSpec Output = TensorSpec::createSpec(DecisionName, {1}); static const TensorSpec Reward = TensorSpec::createSpec("reward", {1}); @@ -380,7 +386,7 @@ static const std::vector TrainingInputFeatures{ class DevelopmentModeEvictAdvisor : public MLEvictAdvisor { public: - DevelopmentModeEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, + DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI, const MachineLoopInfo &Loops, Logger *Log) @@ -388,8 +394,8 @@ public: private: int64_t tryFindEvictionCandidatePosition( - LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit, - uint8_t CostPerUseLimit, + const LiveInterval &VirtReg, const AllocationOrder &Order, + unsigned OrderLimit, uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const override; Logger *const Log; @@ -436,7 +442,7 @@ private: } std::unique_ptr - getAdvisor(MachineFunction &MF, const RAGreedy &RA) override { + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { LLVMContext &Ctx = MF.getFunction().getContext(); if (ModelUnderTraining.empty() && TrainingLog.empty()) { Ctx.emitError("Regalloc development mode should be requested with at " @@ -496,7 +502,7 @@ float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) { return Ret; } -MLEvictAdvisor::MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, +MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI, const MachineLoopInfo &Loops) @@ -514,7 +520,7 @@ MLEvictAdvisor::MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, } int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition( - LiveInterval &, const AllocationOrder &, unsigned, uint8_t, + const LiveInterval &, const AllocationOrder &, unsigned, uint8_t, const SmallVirtRegSet &) const { int64_t Ret = Runner->evaluate(); assert(Ret >= 0); @@ -523,7 +529,7 @@ int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition( } bool MLEvictAdvisor::loadInterferenceFeatures( - LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, + const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, const SmallVirtRegSet &FixedRegisters, FeaturesListNormalizer &Largest, size_t Pos) const { // It is only possible to evict virtual register interference. @@ -539,16 +545,18 @@ bool MLEvictAdvisor::loadInterferenceFeatures( // The cascade tracking is the same as in the default advisor unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg()); - SmallVector InterferingIntervals; + SmallVector InterferingIntervals; for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // Different from the default heuristic, we don't make any assumptions about // what having more than 10 results in the query may mean. - const auto &IFIntervals = Q.interferingVRegs(); + const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff); if (IFIntervals.empty() && InterferingIntervals.empty()) continue; + if (IFIntervals.size() >= EvictInterferenceCutoff) + return false; InterferingIntervals.append(IFIntervals.begin(), IFIntervals.end()); - for (LiveInterval *Intf : reverse(IFIntervals)) { + for (const LiveInterval *Intf : reverse(IFIntervals)) { assert(Register::isVirtualRegister(Intf->reg()) && "Only expecting virtual register interference from query"); // This is the same set of legality checks as in the default case: don't @@ -587,7 +595,7 @@ bool MLEvictAdvisor::loadInterferenceFeatures( } MCRegister MLEvictAdvisor::tryFindEvictionCandidate( - LiveInterval &VirtReg, const AllocationOrder &Order, + const LiveInterval &VirtReg, const AllocationOrder &Order, uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit); if (!MaybeOrderLimit) @@ -652,7 +660,7 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( // decision making process. Regs[CandidateVirtRegPos].second = !MustFindEviction; if (!MustFindEviction) - extractFeatures(SmallVector(1, &VirtReg), Largest, + extractFeatures(SmallVector(1, &VirtReg), Largest, CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0, /*NrUrgent*/ 0.0); assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had " @@ -686,9 +694,15 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( return Regs[CandidatePos].first; } -const LIFeatureComponents +const LIFeatureComponents & MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const { - LIFeatureComponents Ret; + RegID ID = LI.reg().id(); + LIFeatureComponents Empty; + auto I = CachedFeatures.insert(std::make_pair(ID, Empty)); + LIFeatureComponents &Ret = I.first->getSecond(); + if (!I.second) + return Ret; + SmallPtrSet Visited; const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); @@ -733,7 +747,7 @@ MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const { // Overall, this currently mimics what we do for weight calculation, but instead // of accummulating the various features, we keep them separate. void MLEvictAdvisor::extractFeatures( - const SmallVectorImpl &Intervals, + const SmallVectorImpl &Intervals, std::array &Largest, size_t Pos, int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const { int64_t NrDefsAndUses = 0; @@ -769,7 +783,7 @@ void MLEvictAdvisor::extractFeatures( if (LI.endIndex() > EndSI) EndSI = LI.endIndex(); - const LIFeatureComponents LIFC = getLIFeatureComponents(LI); + const LIFeatureComponents &LIFC = getLIFeatureComponents(LI); NrBrokenHints += VRM->hasPreferredPhys(LI.reg()); NrDefsAndUses += LIFC.NrDefsAndUses; @@ -831,8 +845,9 @@ RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() { } int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition( - LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit, - uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { + const LiveInterval &VirtReg, const AllocationOrder &Order, + unsigned OrderLimit, uint8_t CostPerUseLimit, + const SmallVirtRegSet &FixedRegisters) const { int64_t Ret = 0; if (isa(getRunner())) { Ret = MLEvictAdvisor::tryFindEvictionCandidatePosition( @@ -885,11 +900,9 @@ bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) { } #endif // #ifdef LLVM_HAVE_TF_API -#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() { return new ReleaseModeEvictionAdvisorAnalysis(); } -#endif // In all cases except development mode, we don't need scoring. #if !defined(LLVM_HAVE_TF_API) diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 8c9d00d08c6a..c186d0ba9969 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -11,8 +11,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -26,12 +26,10 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -53,8 +51,7 @@ MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B) IrrLoopHeaderWeight = B->getIrrLoopHeaderWeight(); } -MachineBasicBlock::~MachineBasicBlock() { -} +MachineBasicBlock::~MachineBasicBlock() = default; /// Return the MCSymbol for this basic block. MCSymbol *MachineBasicBlock::getSymbol() const { @@ -135,7 +132,7 @@ void ilist_callback_traits::addNodeToList( // Make sure the instructions have their operands in the reginfo lists. MachineRegisterInfo &RegInfo = MF.getRegInfo(); for (MachineInstr &MI : N->instrs()) - MI.AddRegOperandsToUseLists(RegInfo); + MI.addRegOperandsToUseLists(RegInfo); } void ilist_callback_traits::removeNodeFromList( @@ -153,7 +150,7 @@ void ilist_traits::addNodeToList(MachineInstr *N) { // Add the instruction's register operands to their corresponding // use/def lists. MachineFunction *MF = Parent->getParent(); - N->AddRegOperandsToUseLists(MF->getRegInfo()); + N->addRegOperandsToUseLists(MF->getRegInfo()); MF->handleInsertion(*N); } @@ -165,7 +162,7 @@ void ilist_traits::removeNodeFromList(MachineInstr *N) { // Remove from the use/def lists. if (MachineFunction *MF = N->getMF()) { MF->handleRemoval(*N); - N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); + N->removeRegOperandsFromUseLists(MF->getRegInfo()); } N->setParent(nullptr); @@ -918,6 +915,10 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { return std::next(I) == MachineFunction::const_iterator(MBB); } +const MachineBasicBlock *MachineBasicBlock::getSingleSuccessor() const { + return Successors.size() == 1 ? Successors[0] : nullptr; +} + MachineBasicBlock *MachineBasicBlock::getFallThrough() { MachineFunction::iterator Fallthrough = getIterator(); ++Fallthrough; @@ -1620,6 +1621,16 @@ MachineBasicBlock::liveout_iterator MachineBasicBlock::liveout_begin() const { return liveout_iterator(*this, ExceptionPointer, ExceptionSelector, false); } +bool MachineBasicBlock::sizeWithoutDebugLargerThan(unsigned Limit) const { + unsigned Cntr = 0; + auto R = instructionsWithoutDebug(begin(), end()); + for (auto I = R.begin(), E = R.end(); I != E; ++I) { + if (++Cntr > Limit) + return true; + } + return false; +} + const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold); const MBBSectionID MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception); diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index c93ffaabf74c..4cc84f22bdde 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -34,13 +34,13 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/TailDuplicator.h" @@ -50,6 +50,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/IR/PrintPasses.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" @@ -200,10 +201,8 @@ static cl::opt TriangleChainCount( cl::init(2), cl::Hidden); -static cl::opt EnableExtTspBlockPlacement( - "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false), - cl::desc("Enable machine block placement based on the ext-tsp model, " - "optimizing I-cache utilization.")); +extern cl::opt EnableExtTspBlockPlacement; +extern cl::opt ApplyExtTspWithoutProfile; namespace llvm { extern cl::opt StaticLikelyProb; @@ -3422,7 +3421,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { } // Apply a post-processing optimizing block placement. - if (MF.size() >= 3 && EnableExtTspBlockPlacement) { + if (MF.size() >= 3 && EnableExtTspBlockPlacement && + (ApplyExtTspWithoutProfile || MF.getFunction().hasProfileData())) { // Find a new placement and modify the layout of the blocks in the function. applyExtTsp(); @@ -3660,6 +3660,9 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { if (std::next(F.begin()) == F.end()) return false; + if (!isFunctionInPrintList(F.getName())) + return false; + MBPI = &getAnalysis(); MBFI = &getAnalysis(); diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index c9f762f9a6e7..a84377d70855 100644 --- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -12,10 +12,8 @@ #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp index 0fcb07252d0e..e60fd9f7883a 100644 --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -34,7 +34,6 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegister.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -91,6 +90,11 @@ namespace { AU.addPreserved(); } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties() + .set(MachineFunctionProperties::Property::IsSSA); + } + void releaseMemory() override { ScopeMap.clear(); PREMap.clear(); diff --git a/llvm/lib/CodeGen/MachineCheckDebugify.cpp b/llvm/lib/CodeGen/MachineCheckDebugify.cpp index bd7f0f862947..1e5b8dd0bbb0 100644 --- a/llvm/lib/CodeGen/MachineCheckDebugify.cpp +++ b/llvm/lib/CodeGen/MachineCheckDebugify.cpp @@ -11,13 +11,14 @@ /// DILocalVariable which mir-debugifiy generated before. //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Transforms/Utils/Debugify.h" +#include "llvm/Pass.h" #define DEBUG_TYPE "mir-check-debugify" @@ -27,9 +28,6 @@ namespace { struct CheckDebugMachineModule : public ModulePass { bool runOnModule(Module &M) override { - MachineModuleInfo &MMI = - getAnalysis().getMMI(); - NamedMDNode *NMD = M.getNamedMetadata("llvm.mir.debugify"); if (!NMD) { errs() << "WARNING: Please run mir-debugify to generate " @@ -37,6 +35,9 @@ struct CheckDebugMachineModule : public ModulePass { return false; } + MachineModuleInfo &MMI = + getAnalysis().getMMI(); + auto getDebugifyOperand = [&](unsigned Idx) -> unsigned { return mdconst::extract(NMD->getOperand(Idx)->getOperand(0)) ->getZExtValue(); @@ -106,8 +107,7 @@ struct CheckDebugMachineModule : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addPreserved(); - AU.setPreservesCFG(); + AU.setPreservesAll(); } static char ID; // Pass identification. diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index 72ab9ee4f388..722a709af240 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -21,7 +21,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/MachineTraceMetrics.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -278,6 +277,8 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { case MachineCombinerPattern::REASSOC_XA_YB: case MachineCombinerPattern::REASSOC_XY_AMM_BMM: case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: + case MachineCombinerPattern::SUBADD_OP1: + case MachineCombinerPattern::SUBADD_OP2: return CombinerObjective::MustReduceDepth; case MachineCombinerPattern::REASSOC_XY_BCA: case MachineCombinerPattern::REASSOC_XY_BAC: diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 57fbe4112e47..66f0eb83e57c 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -83,8 +83,24 @@ STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated"); DEBUG_COUNTER(FwdCounter, "machine-cp-fwd", "Controls which register COPYs are forwarded"); +static cl::opt MCPUseCopyInstr("mcp-use-is-copy-instr", cl::init(false), + cl::Hidden); + namespace { +static Optional isCopyInstr(const MachineInstr &MI, + const TargetInstrInfo &TII, + bool UseCopyInstr) { + if (UseCopyInstr) + return TII.isCopyInstr(MI); + + if (MI.isCopy()) + return Optional( + DestSourcePair{MI.getOperand(0), MI.getOperand(1)}); + + return None; +} + class CopyTracker { struct CopyInfo { MachineInstr *MI; @@ -110,7 +126,8 @@ public: } /// Remove register from copy maps. - void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI) { + void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, bool UseCopyInstr) { // Since Reg might be a subreg of some registers, only invalidate Reg is not // enough. We have to find the COPY defines Reg or registers defined by Reg // and invalidate all of them. @@ -120,8 +137,13 @@ public: auto I = Copies.find(*RUI); if (I != Copies.end()) { if (MachineInstr *MI = I->second.MI) { - RegsToInvalidate.insert(MI->getOperand(0).getReg().asMCReg()); - RegsToInvalidate.insert(MI->getOperand(1).getReg().asMCReg()); + Optional CopyOperands = + isCopyInstr(*MI, TII, UseCopyInstr); + assert(CopyOperands && "Expect copy"); + + RegsToInvalidate.insert( + CopyOperands->Destination->getReg().asMCReg()); + RegsToInvalidate.insert(CopyOperands->Source->getReg().asMCReg()); } RegsToInvalidate.insert(I->second.DefRegs.begin(), I->second.DefRegs.end()); @@ -133,7 +155,8 @@ public: } /// Clobber a single register, removing it from the tracker's copy maps. - void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI) { + void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, bool UseCopyInstr) { for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { auto I = Copies.find(*RUI); if (I != Copies.end()) { @@ -142,8 +165,12 @@ public: markRegsUnavailable(I->second.DefRegs, TRI); // When we clobber the destination of a copy, we need to clobber the // whole register it defined. - if (MachineInstr *MI = I->second.MI) - markRegsUnavailable({MI->getOperand(0).getReg().asMCReg()}, TRI); + if (MachineInstr *MI = I->second.MI) { + Optional CopyOperands = + isCopyInstr(*MI, TII, UseCopyInstr); + markRegsUnavailable({CopyOperands->Destination->getReg().asMCReg()}, + TRI); + } // Now we can erase the copy. Copies.erase(I); } @@ -151,11 +178,13 @@ public: } /// Add this copy's registers into the tracker's copy maps. - void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) { - assert(MI->isCopy() && "Tracking non-copy?"); + void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, bool UseCopyInstr) { + Optional CopyOperands = isCopyInstr(*MI, TII, UseCopyInstr); + assert(CopyOperands && "Tracking non-copy?"); - MCRegister Def = MI->getOperand(0).getReg().asMCReg(); - MCRegister Src = MI->getOperand(1).getReg().asMCReg(); + MCRegister Src = CopyOperands->Source->getReg().asMCReg(); + MCRegister Def = CopyOperands->Destination->getReg().asMCReg(); // Remember Def is defined by the copy. for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI) @@ -198,15 +227,22 @@ public: } MachineInstr *findAvailBackwardCopy(MachineInstr &I, MCRegister Reg, - const TargetRegisterInfo &TRI) { + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, + bool UseCopyInstr) { MCRegUnitIterator RUI(Reg, &TRI); MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI); - if (!AvailCopy || - !TRI.isSubRegisterEq(AvailCopy->getOperand(1).getReg(), Reg)) + + if (!AvailCopy) + return nullptr; + + Optional CopyOperands = + isCopyInstr(*AvailCopy, TII, UseCopyInstr); + Register AvailSrc = CopyOperands->Source->getReg(); + Register AvailDef = CopyOperands->Destination->getReg(); + if (!TRI.isSubRegisterEq(AvailSrc, Reg)) return nullptr; - Register AvailSrc = AvailCopy->getOperand(1).getReg(); - Register AvailDef = AvailCopy->getOperand(0).getReg(); for (const MachineInstr &MI : make_range(AvailCopy->getReverseIterator(), I.getReverseIterator())) for (const MachineOperand &MO : MI.operands()) @@ -219,20 +255,26 @@ public: } MachineInstr *findAvailCopy(MachineInstr &DestCopy, MCRegister Reg, - const TargetRegisterInfo &TRI) { + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, bool UseCopyInstr) { // We check the first RegUnit here, since we'll only be interested in the // copy if it copies the entire register anyway. MCRegUnitIterator RUI(Reg, &TRI); MachineInstr *AvailCopy = findCopyForUnit(*RUI, TRI, /*MustBeAvailable=*/true); - if (!AvailCopy || - !TRI.isSubRegisterEq(AvailCopy->getOperand(0).getReg(), Reg)) + + if (!AvailCopy) + return nullptr; + + Optional CopyOperands = + isCopyInstr(*AvailCopy, TII, UseCopyInstr); + Register AvailSrc = CopyOperands->Source->getReg(); + Register AvailDef = CopyOperands->Destination->getReg(); + if (!TRI.isSubRegisterEq(AvailDef, Reg)) return nullptr; // Check that the available copy isn't clobbered by any regmasks between // itself and the destination. - Register AvailSrc = AvailCopy->getOperand(1).getReg(); - Register AvailDef = AvailCopy->getOperand(0).getReg(); for (const MachineInstr &MI : make_range(AvailCopy->getIterator(), DestCopy.getIterator())) for (const MachineOperand &MO : MI.operands()) @@ -253,10 +295,14 @@ class MachineCopyPropagation : public MachineFunctionPass { const TargetInstrInfo *TII; const MachineRegisterInfo *MRI; + // Return true if this is a copy instruction and false otherwise. + bool UseCopyInstr; + public: static char ID; // Pass identification, replacement for typeid - MachineCopyPropagation() : MachineFunctionPass(ID) { + MachineCopyPropagation(bool CopyInstr = false) + : MachineFunctionPass(ID), UseCopyInstr(CopyInstr || MCPUseCopyInstr) { initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); } @@ -334,9 +380,13 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader, /// isNopCopy("ecx = COPY eax", AX, CX) == true /// isNopCopy("ecx = COPY eax", AH, CL) == false static bool isNopCopy(const MachineInstr &PreviousCopy, MCRegister Src, - MCRegister Def, const TargetRegisterInfo *TRI) { - MCRegister PreviousSrc = PreviousCopy.getOperand(1).getReg().asMCReg(); - MCRegister PreviousDef = PreviousCopy.getOperand(0).getReg().asMCReg(); + MCRegister Def, const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, bool UseCopyInstr) { + + Optional CopyOperands = + isCopyInstr(PreviousCopy, *TII, UseCopyInstr); + MCRegister PreviousSrc = CopyOperands->Source->getReg().asMCReg(); + MCRegister PreviousDef = CopyOperands->Destination->getReg().asMCReg(); if (Src == PreviousSrc && Def == PreviousDef) return true; if (!TRI->isSubRegister(PreviousSrc, Src)) @@ -356,22 +406,26 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, return false; // Search for an existing copy. - MachineInstr *PrevCopy = Tracker.findAvailCopy(Copy, Def, *TRI); + MachineInstr *PrevCopy = + Tracker.findAvailCopy(Copy, Def, *TRI, *TII, UseCopyInstr); if (!PrevCopy) return false; + auto PrevCopyOperands = isCopyInstr(*PrevCopy, *TII, UseCopyInstr); // Check that the existing copy uses the correct sub registers. - if (PrevCopy->getOperand(0).isDead()) + if (PrevCopyOperands->Destination->isDead()) return false; - if (!isNopCopy(*PrevCopy, Src, Def, TRI)) + if (!isNopCopy(*PrevCopy, Src, Def, TRI, TII, UseCopyInstr)) return false; LLVM_DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump()); // Copy was redundantly redefining either Src or Def. Remove earlier kill // flags between Copy and PrevCopy because the value will be reused now. - assert(Copy.isCopy()); - Register CopyDef = Copy.getOperand(0).getReg(); + Optional CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr); + assert(CopyOperands); + + Register CopyDef = CopyOperands->Destination->getReg(); assert(CopyDef == Src || CopyDef == Def); for (MachineInstr &MI : make_range(PrevCopy->getIterator(), Copy.getIterator())) @@ -385,7 +439,9 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy( const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) { - Register Def = Copy.getOperand(0).getReg(); + + Optional CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr); + Register Def = CopyOperands->Destination->getReg(); if (const TargetRegisterClass *URC = UseI.getRegClassConstraint(UseIdx, TII, TRI)) @@ -403,7 +459,8 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) { - Register CopySrcReg = Copy.getOperand(1).getReg(); + Optional CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr); + Register CopySrcReg = CopyOperands->Source->getReg(); // If the new register meets the opcode register constraints, then allow // forwarding. @@ -411,34 +468,10 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, UseI.getRegClassConstraint(UseIdx, TII, TRI)) return URC->contains(CopySrcReg); - if (!UseI.isCopy()) + auto UseICopyOperands = isCopyInstr(UseI, *TII, UseCopyInstr); + if (!UseICopyOperands) return false; - const TargetRegisterClass *CopySrcRC = - TRI->getMinimalPhysRegClass(CopySrcReg); - const TargetRegisterClass *UseDstRC = - TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg()); - const TargetRegisterClass *CrossCopyRC = TRI->getCrossCopyRegClass(CopySrcRC); - - // If cross copy register class is not the same as copy source register class - // then it is not possible to copy the register directly and requires a cross - // register class copy. Fowarding this copy without checking register class of - // UseDst may create additional cross register copies when expanding the copy - // instruction in later passes. - if (CopySrcRC != CrossCopyRC) { - const TargetRegisterClass *CopyDstRC = - TRI->getMinimalPhysRegClass(Copy.getOperand(0).getReg()); - - // Check if UseDstRC matches the necessary register class to copy from - // CopySrc's register class. If so then forwarding the copy will not - // introduce any cross-class copys. Else if CopyDstRC matches then keep the - // copy and do not forward. If neither UseDstRC or CopyDstRC matches then - // we may need a cross register copy later but we do not worry about it - // here. - if (UseDstRC != CrossCopyRC && CopyDstRC == CrossCopyRC) - return false; - } - /// COPYs don't have register class constraints, so if the user instruction /// is a COPY, we just try to avoid introducing additional cross-class /// COPYs. For example: @@ -455,12 +488,34 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, /// /// so we have reduced the number of cross-class COPYs and potentially /// introduced a nop COPY that can be removed. - const TargetRegisterClass *SuperRC = UseDstRC; - for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses(); - SuperRC; SuperRC = *SuperRCI++) - if (SuperRC->contains(CopySrcReg)) - return true; + // Allow forwarding if src and dst belong to any common class, so long as they + // don't belong to any (possibly smaller) common class that requires copies to + // go via a different class. + Register UseDstReg = UseICopyOperands->Destination->getReg(); + bool Found = false; + bool IsCrossClass = false; + for (const TargetRegisterClass *RC : TRI->regclasses()) { + if (RC->contains(CopySrcReg) && RC->contains(UseDstReg)) { + Found = true; + if (TRI->getCrossCopyRegClass(RC) != RC) { + IsCrossClass = true; + break; + } + } + } + if (!Found) + return false; + if (!IsCrossClass) + return true; + // The forwarded copy would be cross-class. Only do this if the original copy + // was also cross-class. + Register CopyDstReg = CopyOperands->Destination->getReg(); + for (const TargetRegisterClass *RC : TRI->regclasses()) { + if (RC->contains(CopySrcReg) && RC->contains(CopyDstReg) && + TRI->getCrossCopyRegClass(RC) != RC) + return true; + } return false; } @@ -527,13 +582,15 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { if (!MOUse.isRenamable()) continue; - MachineInstr *Copy = - Tracker.findAvailCopy(MI, MOUse.getReg().asMCReg(), *TRI); + MachineInstr *Copy = Tracker.findAvailCopy(MI, MOUse.getReg().asMCReg(), + *TRI, *TII, UseCopyInstr); if (!Copy) continue; - Register CopyDstReg = Copy->getOperand(0).getReg(); - const MachineOperand &CopySrc = Copy->getOperand(1); + Optional CopyOperands = + isCopyInstr(*Copy, *TII, UseCopyInstr); + Register CopyDstReg = CopyOperands->Destination->getReg(); + const MachineOperand &CopySrc = *CopyOperands->Source; Register CopySrcReg = CopySrc.getReg(); // FIXME: Don't handle partial uses of wider COPYs yet. @@ -557,7 +614,8 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { // Check that the instruction is not a copy that partially overwrites the // original copy source that we are about to use. The tracker mechanism // cannot cope with that. - if (MI.isCopy() && MI.modifiesRegister(CopySrcReg, TRI) && + if (isCopyInstr(MI, *TII, UseCopyInstr) && + MI.modifiesRegister(CopySrcReg, TRI) && !MI.definesRegister(CopySrcReg)) { LLVM_DEBUG(dbgs() << "MCP: Copy source overlap with dest in " << MI); continue; @@ -596,76 +654,82 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { // Analyze copies (which don't overlap themselves). - if (MI.isCopy() && !TRI->regsOverlap(MI.getOperand(0).getReg(), - MI.getOperand(1).getReg())) { - assert(MI.getOperand(0).getReg().isPhysical() && - MI.getOperand(1).getReg().isPhysical() && - "MachineCopyPropagation should be run after register allocation!"); - - MCRegister Def = MI.getOperand(0).getReg().asMCReg(); - MCRegister Src = MI.getOperand(1).getReg().asMCReg(); - - // The two copies cancel out and the source of the first copy - // hasn't been overridden, eliminate the second one. e.g. - // %ecx = COPY %eax - // ... nothing clobbered eax. - // %eax = COPY %ecx - // => - // %ecx = COPY %eax - // - // or - // - // %ecx = COPY %eax - // ... nothing clobbered eax. - // %ecx = COPY %eax - // => - // %ecx = COPY %eax - if (eraseIfRedundant(MI, Def, Src) || eraseIfRedundant(MI, Src, Def)) - continue; + Optional CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr); + if (CopyOperands) { + + Register RegSrc = CopyOperands->Source->getReg(); + Register RegDef = CopyOperands->Destination->getReg(); + + if (!TRI->regsOverlap(RegDef, RegSrc)) { + assert(RegDef.isPhysical() && RegSrc.isPhysical() && + "MachineCopyPropagation should be run after register allocation!"); + + MCRegister Def = RegDef.asMCReg(); + MCRegister Src = RegSrc.asMCReg(); + + // The two copies cancel out and the source of the first copy + // hasn't been overridden, eliminate the second one. e.g. + // %ecx = COPY %eax + // ... nothing clobbered eax. + // %eax = COPY %ecx + // => + // %ecx = COPY %eax + // + // or + // + // %ecx = COPY %eax + // ... nothing clobbered eax. + // %ecx = COPY %eax + // => + // %ecx = COPY %eax + if (eraseIfRedundant(MI, Def, Src) || eraseIfRedundant(MI, Src, Def)) + continue; - forwardUses(MI); + forwardUses(MI); + + // Src may have been changed by forwardUses() + CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr); + Src = CopyOperands->Source->getReg().asMCReg(); + + // If Src is defined by a previous copy, the previous copy cannot be + // eliminated. + ReadRegister(Src, MI, RegularUse); + for (const MachineOperand &MO : MI.implicit_operands()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + MCRegister Reg = MO.getReg().asMCReg(); + if (!Reg) + continue; + ReadRegister(Reg, MI, RegularUse); + } - // Src may have been changed by forwardUses() - Src = MI.getOperand(1).getReg().asMCReg(); + LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI.dump()); + + // Copy is now a candidate for deletion. + if (!MRI->isReserved(Def)) + MaybeDeadCopies.insert(&MI); + + // If 'Def' is previously source of another copy, then this earlier copy's + // source is no longer available. e.g. + // %xmm9 = copy %xmm2 + // ... + // %xmm2 = copy %xmm0 + // ... + // %xmm2 = copy %xmm9 + Tracker.clobberRegister(Def, *TRI, *TII, UseCopyInstr); + for (const MachineOperand &MO : MI.implicit_operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + MCRegister Reg = MO.getReg().asMCReg(); + if (!Reg) + continue; + Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); + } - // If Src is defined by a previous copy, the previous copy cannot be - // eliminated. - ReadRegister(Src, MI, RegularUse); - for (const MachineOperand &MO : MI.implicit_operands()) { - if (!MO.isReg() || !MO.readsReg()) - continue; - MCRegister Reg = MO.getReg().asMCReg(); - if (!Reg) - continue; - ReadRegister(Reg, MI, RegularUse); - } + Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr); - LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI.dump()); - - // Copy is now a candidate for deletion. - if (!MRI->isReserved(Def)) - MaybeDeadCopies.insert(&MI); - - // If 'Def' is previously source of another copy, then this earlier copy's - // source is no longer available. e.g. - // %xmm9 = copy %xmm2 - // ... - // %xmm2 = copy %xmm0 - // ... - // %xmm2 = copy %xmm9 - Tracker.clobberRegister(Def, *TRI); - for (const MachineOperand &MO : MI.implicit_operands()) { - if (!MO.isReg() || !MO.isDef()) - continue; - MCRegister Reg = MO.getReg().asMCReg(); - if (!Reg) - continue; - Tracker.clobberRegister(Reg, *TRI); + continue; } - - Tracker.trackCopy(&MI, *TRI); - - continue; } // Clobber any earlyclobber regs first. @@ -677,7 +741,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // later. if (MO.isTied()) ReadRegister(Reg, MI, RegularUse); - Tracker.clobberRegister(Reg, *TRI); + Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); } forwardUses(MI); @@ -713,7 +777,9 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { MaybeDeadCopies.begin(); DI != MaybeDeadCopies.end();) { MachineInstr *MaybeDead = *DI; - MCRegister Reg = MaybeDead->getOperand(0).getReg().asMCReg(); + Optional CopyOperands = + isCopyInstr(*MaybeDead, *TII, UseCopyInstr); + MCRegister Reg = CopyOperands->Destination->getReg().asMCReg(); assert(!MRI->isReserved(Reg)); if (!RegMask->clobbersPhysReg(Reg)) { @@ -726,7 +792,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // Make sure we invalidate any entries in the copy maps before erasing // the instruction. - Tracker.clobberRegister(Reg, *TRI); + Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); // erase() will return the next valid iterator pointing to the next // element after the erased one. @@ -739,7 +805,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // Any previous copy definition or reading the Defs is no longer available. for (MCRegister Reg : Defs) - Tracker.clobberRegister(Reg, *TRI); + Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); } // If MBB doesn't have successors, delete the copies whose defs are not used. @@ -749,12 +815,16 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { for (MachineInstr *MaybeDead : MaybeDeadCopies) { LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: "; MaybeDead->dump()); - assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg())); + + Optional CopyOperands = + isCopyInstr(*MaybeDead, *TII, UseCopyInstr); + assert(CopyOperands); + + Register SrcReg = CopyOperands->Source->getReg(); + Register DestReg = CopyOperands->Destination->getReg(); + assert(!MRI->isReserved(DestReg)); // Update matching debug values, if any. - assert(MaybeDead->isCopy()); - Register SrcReg = MaybeDead->getOperand(1).getReg(); - Register DestReg = MaybeDead->getOperand(0).getReg(); SmallVector MaybeDeadDbgUsers( CopyDbgUsers[MaybeDead].begin(), CopyDbgUsers[MaybeDead].end()); MRI->updateDbgUsersToReg(DestReg.asMCReg(), SrcReg.asMCReg(), @@ -772,10 +842,14 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { } static bool isBackwardPropagatableCopy(MachineInstr &MI, - const MachineRegisterInfo &MRI) { - assert(MI.isCopy() && "MI is expected to be a COPY"); - Register Def = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); + const MachineRegisterInfo &MRI, + const TargetInstrInfo &TII, + bool UseCopyInstr) { + Optional CopyOperands = isCopyInstr(MI, TII, UseCopyInstr); + assert(CopyOperands && "MI is expected to be a COPY"); + + Register Def = CopyOperands->Destination->getReg(); + Register Src = CopyOperands->Source->getReg(); if (!Def || !Src) return false; @@ -783,7 +857,7 @@ static bool isBackwardPropagatableCopy(MachineInstr &MI, if (MRI.isReserved(Def) || MRI.isReserved(Src)) return false; - return MI.getOperand(1).isRenamable() && MI.getOperand(1).isKill(); + return CopyOperands->Source->isRenamable() && CopyOperands->Source->isKill(); } void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { @@ -808,13 +882,15 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { if (!MODef.isRenamable()) continue; - MachineInstr *Copy = - Tracker.findAvailBackwardCopy(MI, MODef.getReg().asMCReg(), *TRI); + MachineInstr *Copy = Tracker.findAvailBackwardCopy( + MI, MODef.getReg().asMCReg(), *TRI, *TII, UseCopyInstr); if (!Copy) continue; - Register Def = Copy->getOperand(0).getReg(); - Register Src = Copy->getOperand(1).getReg(); + Optional CopyOperands = + isCopyInstr(*Copy, *TII, UseCopyInstr); + Register Def = CopyOperands->Destination->getReg(); + Register Src = CopyOperands->Source->getReg(); if (MODef.getReg() != Src) continue; @@ -833,7 +909,7 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { << MI << " from " << *Copy); MODef.setReg(Def); - MODef.setIsRenamable(Copy->getOperand(0).isRenamable()); + MODef.setIsRenamable(CopyOperands->Destination->isRenamable()); LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n"); MaybeDeadCopies.insert(Copy); @@ -849,20 +925,23 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) { // Ignore non-trivial COPYs. - if (MI.isCopy() && MI.getNumOperands() == 2 && - !TRI->regsOverlap(MI.getOperand(0).getReg(), - MI.getOperand(1).getReg())) { - - MCRegister Def = MI.getOperand(0).getReg().asMCReg(); - MCRegister Src = MI.getOperand(1).getReg().asMCReg(); - - // Unlike forward cp, we don't invoke propagateDefs here, - // just let forward cp do COPY-to-COPY propagation. - if (isBackwardPropagatableCopy(MI, *MRI)) { - Tracker.invalidateRegister(Src, *TRI); - Tracker.invalidateRegister(Def, *TRI); - Tracker.trackCopy(&MI, *TRI); - continue; + Optional CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr); + if (CopyOperands && MI.getNumOperands() == 2) { + Register DefReg = CopyOperands->Destination->getReg(); + Register SrcReg = CopyOperands->Source->getReg(); + + if (!TRI->regsOverlap(DefReg, SrcReg)) { + MCRegister Def = DefReg.asMCReg(); + MCRegister Src = SrcReg.asMCReg(); + + // Unlike forward cp, we don't invoke propagateDefs here, + // just let forward cp do COPY-to-COPY propagation. + if (isBackwardPropagatableCopy(MI, *MRI, *TII, UseCopyInstr)) { + Tracker.invalidateRegister(Src, *TRI, *TII, UseCopyInstr); + Tracker.invalidateRegister(Def, *TRI, *TII, UseCopyInstr); + Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr); + continue; + } } } @@ -872,7 +951,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( MCRegister Reg = MO.getReg().asMCReg(); if (!Reg) continue; - Tracker.invalidateRegister(Reg, *TRI); + Tracker.invalidateRegister(Reg, *TRI, *TII, UseCopyInstr); } propagateDefs(MI); @@ -884,7 +963,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( continue; if (MO.isDef()) - Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); + Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI, *TII, + UseCopyInstr); if (MO.readsReg()) { if (MO.isDebug()) { @@ -898,7 +978,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( } } } else { - Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); + Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI, *TII, + UseCopyInstr); } } } @@ -906,8 +987,10 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( for (auto *Copy : MaybeDeadCopies) { - Register Src = Copy->getOperand(1).getReg(); - Register Def = Copy->getOperand(0).getReg(); + Optional CopyOperands = + isCopyInstr(*Copy, *TII, UseCopyInstr); + Register Src = CopyOperands->Source->getReg(); + Register Def = CopyOperands->Destination->getReg(); SmallVector MaybeDeadDbgUsers(CopyDbgUsers[Copy].begin(), CopyDbgUsers[Copy].end()); @@ -938,3 +1021,8 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { return Changed; } + +MachineFunctionPass * +llvm::createMachineCopyPropagationPass(bool UseCopyInstr = false) { + return new MachineCopyPropagation(UseCopyInstr); +} diff --git a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp index 42a5e2b7af01..6871ac35b300 100644 --- a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp @@ -8,50 +8,15 @@ #include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/ADT/GenericCycleImpl.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineSSAContext.h" -#include "llvm/InitializePasses.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" using namespace llvm; template class llvm::GenericCycleInfo; template class llvm::GenericCycle; -namespace { - -/// Legacy analysis pass which computes a \ref MachineCycleInfo. -class MachineCycleInfoWrapperPass : public MachineFunctionPass { - MachineFunction *F = nullptr; - MachineCycleInfo CI; - -public: - static char ID; - - MachineCycleInfoWrapperPass(); - - MachineCycleInfo &getCycleInfo() { return CI; } - const MachineCycleInfo &getCycleInfo() const { return CI; } - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - void releaseMemory() override; - void print(raw_ostream &OS, const Module *M = nullptr) const override; - - // TODO: verify analysis -}; - -class MachineCycleInfoPrinterPass : public MachineFunctionPass { -public: - static char ID; - - MachineCycleInfoPrinterPass(); - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; -}; - -} // namespace - char MachineCycleInfoWrapperPass::ID = 0; MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass() @@ -87,6 +52,16 @@ void MachineCycleInfoWrapperPass::releaseMemory() { F = nullptr; } +class MachineCycleInfoPrinterPass : public MachineFunctionPass { +public: + static char ID; + + MachineCycleInfoPrinterPass(); + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + char MachineCycleInfoPrinterPass::ID = 0; MachineCycleInfoPrinterPass::MachineCycleInfoPrinterPass() @@ -111,3 +86,62 @@ bool MachineCycleInfoPrinterPass::runOnMachineFunction(MachineFunction &F) { CI.print(errs()); return false; } + +bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) { + MachineFunction *MF = I.getParent()->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + const TargetSubtargetInfo &ST = MF->getSubtarget(); + const TargetRegisterInfo *TRI = ST.getRegisterInfo(); + const TargetInstrInfo *TII = ST.getInstrInfo(); + + // The instruction is cycle invariant if all of its operands are. + for (const MachineOperand &MO : I.operands()) { + if (!MO.isReg()) + continue; + + Register Reg = MO.getReg(); + if (Reg == 0) + continue; + + // An instruction that uses or defines a physical register can't e.g. be + // hoisted, so mark this as not invariant. + if (Register::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + // However, if the physreg is known to always be caller saved/restored + // then this use is safe to hoist. + if (!MRI->isConstantPhysReg(Reg) && + !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) && + !TII->isIgnorableUse(MO)) + return false; + // Otherwise it's safe to move. + continue; + } else if (!MO.isDead()) { + // A def that isn't dead can't be moved. + return false; + } else if (any_of(Cycle->getEntries(), + [&](const MachineBasicBlock *Block) { + return Block->isLiveIn(Reg); + })) { + // If the reg is live into any header of the cycle we can't hoist an + // instruction which would clobber it. + return false; + } + } + + if (!MO.isUse()) + continue; + + assert(MRI->getVRegDef(Reg) && "Machine instr not mapped for this vreg?!"); + + // If the cycle contains the definition of an operand, then the instruction + // isn't cycle invariant. + if (Cycle->contains(MRI->getVRegDef(Reg)->getParent())) + return false; + } + + // If we got this far, the instruction is cycle invariant! + return true; +} diff --git a/llvm/lib/CodeGen/MachineDebugify.cpp b/llvm/lib/CodeGen/MachineDebugify.cpp index 599a81847592..b726a032ca18 100644 --- a/llvm/lib/CodeGen/MachineDebugify.cpp +++ b/llvm/lib/CodeGen/MachineDebugify.cpp @@ -16,14 +16,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/InitializePasses.h" #include "llvm/Transforms/Utils/Debugify.h" diff --git a/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/llvm/lib/CodeGen/MachineDominanceFrontier.cpp index a39dc79baaa8..346cfedde390 100644 --- a/llvm/lib/CodeGen/MachineDominanceFrontier.cpp +++ b/llvm/lib/CodeGen/MachineDominanceFrontier.cpp @@ -7,10 +7,11 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineDominanceFrontier.h" -#include "llvm/Analysis/DominanceFrontierImpl.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MachineDominators.cpp b/llvm/lib/CodeGen/MachineDominators.cpp index 28cff2a4f3f3..0632cde9c6f4 100644 --- a/llvm/lib/CodeGen/MachineDominators.cpp +++ b/llvm/lib/CodeGen/MachineDominators.cpp @@ -15,6 +15,8 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index fd5ea5cad072..f58996ea90c6 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -44,7 +44,6 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -61,7 +60,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DOTGraphTraits.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" @@ -109,6 +107,27 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) { llvm_unreachable("Invalid machine function property"); } +void setUnsafeStackSize(const Function &F, MachineFrameInfo &FrameInfo) { + if (!F.hasFnAttribute(Attribute::SafeStack)) + return; + + auto *Existing = + dyn_cast_or_null(F.getMetadata(LLVMContext::MD_annotation)); + + if (!Existing || Existing->getNumOperands() != 2) + return; + + auto *MetadataName = "unsafe-stack-size"; + if (auto &N = Existing->getOperand(0)) { + if (cast(N.get())->getString() == MetadataName) { + if (auto &Op = Existing->getOperand(1)) { + auto Val = mdconst::extract(Op)->getZExtValue(); + FrameInfo.setUnsafeStackSize(Val); + } + } + } +} + // Pin the vtable to this file. void MachineFunction::Delegate::anchor() {} @@ -133,11 +152,11 @@ void ilist_alloc_traits::deleteNode(MachineBasicBlock *MBB) { MBB->getParent()->deleteMachineBasicBlock(MBB); } -static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI, +static inline Align getFnStackAlignment(const TargetSubtargetInfo *STI, const Function &F) { if (auto MA = F.getFnStackAlign()) - return MA->value(); - return STI->getFrameLowering()->getStackAlign().value(); + return *MA; + return STI->getFrameLowering()->getStackAlign(); } MachineFunction::MachineFunction(Function &F, const LLVMTargetMachine &Target, @@ -177,6 +196,8 @@ void MachineFunction::init() { /*ForcedRealign=*/CanRealignSP && F.hasFnAttribute(Attribute::StackAlignment)); + setUnsafeStackSize(F, *FrameInfo); + if (F.hasFnAttribute(Attribute::StackAlignment)) FrameInfo->ensureMaxAlignment(*F.getFnStackAlign()); @@ -208,9 +229,7 @@ void MachineFunction::init() { "Can't create a MachineFunction using a Module with a " "Target-incompatible DataLayout attached\n"); - PSVManager = - std::make_unique(*(getSubtarget(). - getInstrInfo())); + PSVManager = std::make_unique(getTarget()); } MachineFunction::~MachineFunction() { @@ -837,25 +856,6 @@ void MachineFunction::addCleanup(MachineBasicBlock *LandingPad) { LP.TypeIds.push_back(0); } -void MachineFunction::addSEHCatchHandler(MachineBasicBlock *LandingPad, - const Function *Filter, - const BlockAddress *RecoverBA) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - SEHHandler Handler; - Handler.FilterOrFinally = Filter; - Handler.RecoverBA = RecoverBA; - LP.SEHHandlers.push_back(Handler); -} - -void MachineFunction::addSEHCleanupHandler(MachineBasicBlock *LandingPad, - const Function *Cleanup) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - SEHHandler Handler; - Handler.FilterOrFinally = Cleanup; - Handler.RecoverBA = nullptr; - LP.SEHHandlers.push_back(Handler); -} - void MachineFunction::setCallSiteLandingPad(MCSymbol *Sym, ArrayRef Sites) { LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end()); @@ -1012,7 +1012,32 @@ void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old, } } -auto MachineFunction::salvageCopySSA(MachineInstr &MI) +auto MachineFunction::salvageCopySSA( + MachineInstr &MI, DenseMap &DbgPHICache) + -> DebugInstrOperandPair { + const TargetInstrInfo &TII = *getSubtarget().getInstrInfo(); + + // Check whether this copy-like instruction has already been salvaged into + // an operand pair. + Register Dest; + if (auto CopyDstSrc = TII.isCopyInstr(MI)) { + Dest = CopyDstSrc->Destination->getReg(); + } else { + assert(MI.isSubregToReg()); + Dest = MI.getOperand(0).getReg(); + } + + auto CacheIt = DbgPHICache.find(Dest); + if (CacheIt != DbgPHICache.end()) + return CacheIt->second; + + // Calculate the instruction number to use, or install a DBG_PHI. + auto OperandPair = salvageCopySSAImpl(MI); + DbgPHICache.insert({Dest, OperandPair}); + return OperandPair; +} + +auto MachineFunction::salvageCopySSAImpl(MachineInstr &MI) -> DebugInstrOperandPair { MachineRegisterInfo &MRI = getRegInfo(); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); @@ -1141,26 +1166,13 @@ auto MachineFunction::salvageCopySSA(MachineInstr &MI) MachineBasicBlock &InsertBB = *CurInst->getParent(); // We reached the start of the block before finding a defining instruction. - // It could be from a constant register, otherwise it must be an argument. - if (TRI.isConstantPhysReg(State.first)) { - // We can produce a DBG_PHI that identifies the constant physreg. Doesn't - // matter where we put it, as it's constant valued. - assert(CurInst->isCopy()); - } else if (State.first == TRI.getFrameRegister(*this)) { - // LLVM IR is allowed to read the framepointer by calling a - // llvm.frameaddress.* intrinsic. We can support this by emitting a - // DBG_PHI $fp. This isn't ideal, because it extends the behaviours / - // position that DBG_PHIs appear at, limiting what can be done later. - // TODO: see if there's a better way of expressing these variable - // locations. - ; - } else { - // Assert that this is the entry block, or an EH pad. If it isn't, then - // there is some code construct we don't recognise that deals with physregs - // across blocks. - assert(!State.first.isVirtual()); - assert(&*InsertBB.getParent()->begin() == &InsertBB || InsertBB.isEHPad()); - } + // There are numerous scenarios where this can happen: + // * Constant physical registers, + // * Several intrinsics that allow LLVM-IR to read arbitary registers, + // * Arguments in the entry block, + // * Exception handling landing pads. + // Validating all of them is too difficult, so just insert a DBG_PHI reading + // the variable value at this position, rather than checking it makes sense. // Create DBG_PHI for specified physreg. auto Builder = BuildMI(InsertBB, InsertBB.getFirstNonPHI(), DebugLoc(), @@ -1181,9 +1193,7 @@ void MachineFunction::finalizeDebugInstrRefs() { MI.getOperand(1).ChangeToRegister(0, false); }; - if (!useDebugInstrRef()) - return; - + DenseMap ArgDbgPHIs; for (auto &MBB : *this) { for (auto &MI : MBB) { if (!MI.isDebugRef() || !MI.getOperand(0).isReg()) @@ -1206,7 +1216,7 @@ void MachineFunction::finalizeDebugInstrRefs() { // instruction that defines the source value, see salvageCopySSA docs // for why this is important. if (DefMI.isCopyLike() || TII->isCopyInstr(DefMI)) { - auto Result = salvageCopySSA(DefMI); + auto Result = salvageCopySSA(DefMI, ArgDbgPHIs); MI.getOperand(0).ChangeToImmediate(Result.first); MI.getOperand(1).setImm(Result.second); } else { diff --git a/llvm/lib/CodeGen/MachineFunctionPass.cpp b/llvm/lib/CodeGen/MachineFunctionPass.cpp index 16cde1f601f9..99494122d608 100644 --- a/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index 0e0eb8b8e00f..81c97ba6a086 100644 --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -24,7 +24,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -34,7 +33,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" @@ -82,7 +80,7 @@ static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI) { Optional Count = MBFI->getBlockProfileCount(&MBB); - if (!Count.hasValue()) + if (!Count) return true; if (PercentileCutoff > 0) { @@ -108,9 +106,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { // We don't want to proceed further for cold functions // or functions of unknown hotness. Lukewarm functions have no prefix. Optional SectionPrefix = MF.getFunction().getSectionPrefix(); - if (SectionPrefix.hasValue() && - (SectionPrefix.getValue().equals("unlikely") || - SectionPrefix.getValue().equals("unknown"))) { + if (SectionPrefix && (SectionPrefix.getValue().equals("unlikely") || + SectionPrefix.getValue().equals("unknown"))) { return false; } diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 85b266afceef..31f45e194a97 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -11,19 +11,14 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/ADT/APFloat.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -38,42 +33,30 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include #include -#include #include #include -#include #include using namespace llvm; @@ -163,19 +146,13 @@ MachineRegisterInfo *MachineInstr::getRegInfo() { return nullptr; } -/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in -/// this instruction from their respective use lists. This requires that the -/// operands already be on their use lists. -void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) { +void MachineInstr::removeRegOperandsFromUseLists(MachineRegisterInfo &MRI) { for (MachineOperand &MO : operands()) if (MO.isReg()) MRI.removeRegOperandFromUseList(&MO); } -/// AddRegOperandsToUseLists - Add all of the register operands in -/// this instruction from their respective use lists. This requires that the -/// operands not be on their use lists yet. -void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) { +void MachineInstr::addRegOperandsToUseLists(MachineRegisterInfo &MRI) { for (MachineOperand &MO : operands()) if (MO.isReg()) MRI.addRegOperandToUseList(&MO); @@ -232,16 +209,12 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { } } -#ifndef NDEBUG - bool isDebugOp = Op.getType() == MachineOperand::MO_Metadata || - Op.getType() == MachineOperand::MO_MCSymbol; // OpNo now points as the desired insertion point. Unless this is a variadic // instruction, only implicit regs are allowed beyond MCID->getNumOperands(). // RegMask operands go between the explicit and implicit operands. - assert((isImpReg || Op.isRegMask() || MCID->isVariadic() || - OpNo < MCID->getNumOperands() || isDebugOp) && + assert((MCID->isVariadic() || OpNo < MCID->getNumOperands() || + Op.isValidExcessOperand()) && "Trying to add an operand to a machine instr that is already done!"); -#endif MachineRegisterInfo *MRI = getRegInfo(); @@ -300,10 +273,7 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { } } -/// RemoveOperand - Erase an operand from an instruction, leaving it with one -/// fewer operand than it started with. -/// -void MachineInstr::RemoveOperand(unsigned OpNo) { +void MachineInstr::removeOperand(unsigned OpNo) { assert(OpNo < getNumOperands() && "Invalid operand number"); untieRegOperand(OpNo); @@ -1401,11 +1371,10 @@ bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const { continue; // A load from a constant PseudoSourceValue is invariant. - if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) + if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) { if (PSV->isConstant(&MFI)) continue; - - if (const Value *V = MMO->getValue()) { + } else if (const Value *V = MMO->getValue()) { // If we have an AliasAnalysis, ask it whether the memory is constant. if (AA && AA->pointsToConstantMemory( @@ -1904,7 +1873,7 @@ bool MachineInstr::addRegisterKilled(Register IncomingReg, unsigned OpIdx = DeadOps.back(); if (getOperand(OpIdx).isImplicit() && (!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0)) - RemoveOperand(OpIdx); + removeOperand(OpIdx); else getOperand(OpIdx).setIsKill(false); DeadOps.pop_back(); @@ -1969,7 +1938,7 @@ bool MachineInstr::addRegisterDead(Register Reg, unsigned OpIdx = DeadOps.back(); if (getOperand(OpIdx).isImplicit() && (!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0)) - RemoveOperand(OpIdx); + removeOperand(OpIdx); else getOperand(OpIdx).setIsDead(false); DeadOps.pop_back(); diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp index 759cff179790..2f1d7b976264 100644 --- a/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -16,7 +16,8 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" #include using namespace llvm; @@ -109,7 +110,7 @@ bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) { static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI) { for (auto MII = FirstMI; MII != LastMI; ++MII) - if (MII->getDebugLoc().get()) + if (MII->getDebugLoc()) return MII->getDebugLoc(); return DebugLoc(); } diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 500cf8e0b79b..00d75f8231c7 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -240,7 +240,7 @@ namespace { void ExitScopeIfDone( MachineDomTreeNode *Node, DenseMap &OpenChildren, - DenseMap &ParentMap); + const DenseMap &ParentMap); void HoistOutOfLoop(MachineDomTreeNode *HeaderN); @@ -696,19 +696,16 @@ void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) { /// destroy ancestors which are now done. void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node, DenseMap &OpenChildren, - DenseMap &ParentMap) { + const DenseMap &ParentMap) { if (OpenChildren[Node]) return; - // Pop scope. - ExitScope(Node->getBlock()); - - // Now traverse upwards to pop ancestors whose offsprings are all done. - while (MachineDomTreeNode *Parent = ParentMap[Node]) { - unsigned Left = --OpenChildren[Parent]; - if (Left != 0) + for(;;) { + ExitScope(Node->getBlock()); + // Now traverse upwards to pop ancestors whose offsprings are all done. + MachineDomTreeNode *Parent = ParentMap.lookup(Node); + if (!Parent || --OpenChildren[Parent] != 0) break; - ExitScope(Parent->getBlock()); Node = Parent; } } @@ -999,6 +996,9 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { if (I.isConvergent()) return false; + if (!TII->shouldHoist(I, CurLoop)) + return false; + return true; } diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp index 9b96bc5e5e7f..5cbded4b9264 100644 --- a/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -17,13 +17,12 @@ #include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MachineLoopUtils.cpp b/llvm/lib/CodeGen/MachineLoopUtils.cpp index fdcc8472f1c2..0e8335d4974d 100644 --- a/llvm/lib/CodeGen/MachineLoopUtils.cpp +++ b/llvm/lib/CodeGen/MachineLoopUtils.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineLoopUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -64,7 +63,11 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction, if (Use.getParent()->getParent() != Loop) Uses.push_back(&Use); for (auto *Use : Uses) { - MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg())); + const TargetRegisterClass *ConstrainRegClass = + MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg())); + assert(ConstrainRegClass && + "Expected a valid constrained register class!"); + (void)ConstrainRegClass; Use->setReg(R); } } @@ -90,25 +93,24 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction, if (Remaps.count(R)) R = Remaps[R]; OrigPhi.getOperand(InitRegIdx).setReg(R); - MI.RemoveOperand(LoopRegIdx + 1); - MI.RemoveOperand(LoopRegIdx + 0); + MI.removeOperand(LoopRegIdx + 1); + MI.removeOperand(LoopRegIdx + 0); } else { // When peeling back, the initial value is the loop-carried value from // the original loop. Register LoopReg = OrigPhi.getOperand(LoopRegIdx).getReg(); MI.getOperand(LoopRegIdx).setReg(LoopReg); - MI.RemoveOperand(InitRegIdx + 1); - MI.RemoveOperand(InitRegIdx + 0); + MI.removeOperand(InitRegIdx + 1); + MI.removeOperand(InitRegIdx + 0); } } DebugLoc DL; if (Direction == LPD_Front) { - Preheader->replaceSuccessor(Loop, NewBB); + Preheader->ReplaceUsesOfBlockWith(Loop, NewBB); NewBB->addSuccessor(Loop); Loop->replacePhiUsesWith(Preheader, NewBB); - if (TII->removeBranch(*Preheader) > 0) - TII->insertBranch(*Preheader, NewBB, nullptr, {}, DL); + Preheader->updateTerminator(Loop); TII->removeBranch(*NewBB); TII->insertBranch(*NewBB, Loop, nullptr, {}, DL); } else { diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp index 31d4fc7d02bf..23d55a5df9f5 100644 --- a/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -7,27 +7,18 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/TinyPtrVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Value.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/Pass.h" -#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -40,174 +31,24 @@ using namespace llvm; using namespace llvm::dwarf; +static cl::opt + DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, + cl::desc("Disable debug info printing")); + // Out of line virtual method. MachineModuleInfoImpl::~MachineModuleInfoImpl() = default; -namespace llvm { - -class MMIAddrLabelMapCallbackPtr final : CallbackVH { - MMIAddrLabelMap *Map = nullptr; - -public: - MMIAddrLabelMapCallbackPtr() = default; - MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {} - - void setPtr(BasicBlock *BB) { - ValueHandleBase::operator=(BB); - } - - void setMap(MMIAddrLabelMap *map) { Map = map; } - - void deleted() override; - void allUsesReplacedWith(Value *V2) override; -}; - -class MMIAddrLabelMap { - MCContext &Context; - struct AddrLabelSymEntry { - /// The symbols for the label. - TinyPtrVector Symbols; - - Function *Fn; // The containing function of the BasicBlock. - unsigned Index; // The index in BBCallbacks for the BasicBlock. - }; - - DenseMap, AddrLabelSymEntry> AddrLabelSymbols; - - /// Callbacks for the BasicBlock's that we have entries for. We use this so - /// we get notified if a block is deleted or RAUWd. - std::vector BBCallbacks; - - /// This is a per-function list of symbols whose corresponding BasicBlock got - /// deleted. These symbols need to be emitted at some point in the file, so - /// AsmPrinter emits them after the function body. - DenseMap, std::vector> - DeletedAddrLabelsNeedingEmission; - -public: - MMIAddrLabelMap(MCContext &context) : Context(context) {} - - ~MMIAddrLabelMap() { - assert(DeletedAddrLabelsNeedingEmission.empty() && - "Some labels for deleted blocks never got emitted"); - } - - ArrayRef getAddrLabelSymbolToEmit(BasicBlock *BB); - - void takeDeletedSymbolsForFunction(Function *F, - std::vector &Result); - - void UpdateForDeletedBlock(BasicBlock *BB); - void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New); -}; - -} // end namespace llvm - -ArrayRef MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { - assert(BB->hasAddressTaken() && - "Shouldn't get label for block without address taken"); - AddrLabelSymEntry &Entry = AddrLabelSymbols[BB]; - - // If we already had an entry for this block, just return it. - if (!Entry.Symbols.empty()) { - assert(BB->getParent() == Entry.Fn && "Parent changed"); - return Entry.Symbols; - } - - // Otherwise, this is a new entry, create a new symbol for it and add an - // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd. - BBCallbacks.emplace_back(BB); - BBCallbacks.back().setMap(this); - Entry.Index = BBCallbacks.size() - 1; - Entry.Fn = BB->getParent(); - MCSymbol *Sym = BB->hasAddressTaken() ? Context.createNamedTempSymbol() - : Context.createTempSymbol(); - Entry.Symbols.push_back(Sym); - return Entry.Symbols; -} - -/// If we have any deleted symbols for F, return them. -void MMIAddrLabelMap:: -takeDeletedSymbolsForFunction(Function *F, std::vector &Result) { - DenseMap, std::vector>::iterator I = - DeletedAddrLabelsNeedingEmission.find(F); - - // If there are no entries for the function, just return. - if (I == DeletedAddrLabelsNeedingEmission.end()) return; - - // Otherwise, take the list. - std::swap(Result, I->second); - DeletedAddrLabelsNeedingEmission.erase(I); -} - -void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { - // If the block got deleted, there is no need for the symbol. If the symbol - // was already emitted, we can just forget about it, otherwise we need to - // queue it up for later emission when the function is output. - AddrLabelSymEntry Entry = std::move(AddrLabelSymbols[BB]); - AddrLabelSymbols.erase(BB); - assert(!Entry.Symbols.empty() && "Didn't have a symbol, why a callback?"); - BBCallbacks[Entry.Index] = nullptr; // Clear the callback. - - assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) && - "Block/parent mismatch"); - - for (MCSymbol *Sym : Entry.Symbols) { - if (Sym->isDefined()) - return; - - // If the block is not yet defined, we need to emit it at the end of the - // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list - // for the containing Function. Since the block is being deleted, its - // parent may already be removed, we have to get the function from 'Entry'. - DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym); - } -} - -void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { - // Get the entry for the RAUW'd block and remove it from our map. - AddrLabelSymEntry OldEntry = std::move(AddrLabelSymbols[Old]); - AddrLabelSymbols.erase(Old); - assert(!OldEntry.Symbols.empty() && "Didn't have a symbol, why a callback?"); - - AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New]; - - // If New is not address taken, just move our symbol over to it. - if (NewEntry.Symbols.empty()) { - BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback. - NewEntry = std::move(OldEntry); // Set New's entry. - return; - } - - BBCallbacks[OldEntry.Index] = nullptr; // Update the callback. - - // Otherwise, we need to add the old symbols to the new block's set. - llvm::append_range(NewEntry.Symbols, OldEntry.Symbols); -} - -void MMIAddrLabelMapCallbackPtr::deleted() { - Map->UpdateForDeletedBlock(cast(getValPtr())); -} - -void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { - Map->UpdateForRAUWBlock(cast(getValPtr()), cast(V2)); -} - void MachineModuleInfo::initialize() { ObjFileMMI = nullptr; CurCallSite = 0; NextFnNum = 0; - UsesMSVCFloatingPoint = UsesMorestackAddr = false; - HasSplitStack = HasNosplitStack = false; - AddrLabelSymbols = nullptr; + UsesMSVCFloatingPoint = false; + DbgInfoAvailable = false; } void MachineModuleInfo::finalize() { Personalities.clear(); - delete AddrLabelSymbols; - AddrLabelSymbols = nullptr; - Context.reset(); // We don't clear the ExternalContext. @@ -219,16 +60,11 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI) : TM(std::move(MMI.TM)), Context(MMI.TM.getTargetTriple(), MMI.TM.getMCAsmInfo(), MMI.TM.getMCRegisterInfo(), MMI.TM.getMCSubtargetInfo(), nullptr, - nullptr, false), + &MMI.TM.Options.MCOptions, false), MachineFunctions(std::move(MMI.MachineFunctions)) { Context.setObjectFileInfo(MMI.TM.getObjFileLowering()); ObjFileMMI = MMI.ObjFileMMI; CurCallSite = MMI.CurCallSite; - UsesMSVCFloatingPoint = MMI.UsesMSVCFloatingPoint; - UsesMorestackAddr = MMI.UsesMorestackAddr; - HasSplitStack = MMI.HasSplitStack; - HasNosplitStack = MMI.HasNosplitStack; - AddrLabelSymbols = MMI.AddrLabelSymbols; ExternalContext = MMI.ExternalContext; TheModule = MMI.TheModule; } @@ -236,7 +72,7 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI) MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM) : TM(*TM), Context(TM->getTargetTriple(), TM->getMCAsmInfo(), TM->getMCRegisterInfo(), TM->getMCSubtargetInfo(), - nullptr, nullptr, false) { + nullptr, &TM->Options.MCOptions, false) { Context.setObjectFileInfo(TM->getObjFileLowering()); initialize(); } @@ -245,7 +81,7 @@ MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM, MCContext *ExtContext) : TM(*TM), Context(TM->getTargetTriple(), TM->getMCAsmInfo(), TM->getMCRegisterInfo(), TM->getMCSubtargetInfo(), - nullptr, nullptr, false), + nullptr, &TM->Options.MCOptions, false), ExternalContext(ExtContext) { Context.setObjectFileInfo(TM->getObjFileLowering()); initialize(); @@ -253,25 +89,6 @@ MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM, MachineModuleInfo::~MachineModuleInfo() { finalize(); } -//===- Address of Block Management ----------------------------------------===// - -ArrayRef -MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) { - // Lazily create AddrLabelSymbols. - if (!AddrLabelSymbols) - AddrLabelSymbols = new MMIAddrLabelMap(getContext()); - return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast(BB)); -} - -void MachineModuleInfo:: -takeDeletedSymbolsForFunction(const Function *F, - std::vector &Result) { - // If no blocks have had their addresses taken, we're done. - if (!AddrLabelSymbols) return; - return AddrLabelSymbols-> - takeDeletedSymbolsForFunction(const_cast(F), Result); -} - /// \name Exception Handling /// \{ @@ -318,6 +135,13 @@ void MachineModuleInfo::deleteMachineFunctionFor(Function &F) { LastResult = nullptr; } +void MachineModuleInfo::insertFunction(const Function &F, + std::unique_ptr &&MF) { + auto I = MachineFunctions.insert(std::make_pair(&F, std::move(MF))); + assert(I.second && "machine function already mapped"); + (void)I; +} + namespace { /// This pass frees the MachineFunction object associated with a Function. @@ -409,7 +233,8 @@ bool MachineModuleInfoWrapperPass::doInitialization(Module &M) { Ctx.diagnose( DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, LocCookie)); }); - MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); + MMI.DbgInfoAvailable = !DisableDebugInfoPrinting && + !M.debug_compile_units().empty(); return false; } @@ -424,6 +249,7 @@ MachineModuleInfo MachineModuleAnalysis::run(Module &M, ModuleAnalysisManager &) { MachineModuleInfo MMI(TM); MMI.TheModule = &M; - MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); + MMI.DbgInfoAvailable = !DisableDebugInfoPrinting && + !M.debug_compile_units().empty(); return MMI; } diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 680dbe54ffaf..46ad1de78c46 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -14,9 +14,7 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/MIRFormatter.h" -#include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 5347a7b0d890..631768ec986c 100644 --- a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -53,10 +53,8 @@ void MachineOptimizationRemarkEmitter::emit( LLVMContext &Ctx = MF.getFunction().getContext(); // Only emit it if its hotness meets the threshold. - if (OptDiag.getHotness().getValueOr(0) < - Ctx.getDiagnosticsHotnessThreshold()) { + if (OptDiag.getHotness().value_or(0) < Ctx.getDiagnosticsHotnessThreshold()) return; - } Ctx.diagnose(OptDiag); } diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index 7783b5e0d3cc..5da68abc8f6a 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -59,6 +59,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/Passes.h" @@ -82,9 +84,17 @@ using namespace llvm; using namespace ore; using namespace outliner; +// Statistics for outlined functions. STATISTIC(NumOutlined, "Number of candidates outlined"); STATISTIC(FunctionsCreated, "Number of functions created"); +// Statistics for instruction mapping. +STATISTIC(NumLegalInUnsignedVec, "Number of legal instrs in unsigned vector"); +STATISTIC(NumIllegalInUnsignedVec, + "Number of illegal instrs in unsigned vector"); +STATISTIC(NumInvisible, "Number of invisible instrs in unsigned vector"); +STATISTIC(UnsignedVecSize, "Size of unsigned vector"); + // Set to true if the user wants the outliner to run on linkonceodr linkage // functions. This is false by default because the linker can dedupe linkonceodr // functions. Since the outliner is confined to a single module (modulo LTO), @@ -188,6 +198,8 @@ struct InstructionMapper { assert(LegalInstrNumber != DenseMapInfo::getTombstoneKey() && "Tried to assign DenseMap tombstone or empty key to instruction."); + // Statistics. + ++NumLegalInUnsignedVec; return MINumber; } @@ -215,6 +227,8 @@ struct InstructionMapper { InstrListForMBB.push_back(It); UnsignedVecForMBB.push_back(IllegalInstrNumber); IllegalInstrNumber--; + // Statistics. + ++NumIllegalInUnsignedVec; assert(LegalInstrNumber < IllegalInstrNumber && "Instruction mapping overflow!"); @@ -293,6 +307,7 @@ struct InstructionMapper { case InstrType::Invisible: // Normally this is set by mapTo(Blah)Unsigned, but we just want to // skip this instruction. So, unset the flag here. + ++NumInvisible; AddedIllegalLastTime = false; break; } @@ -623,6 +638,15 @@ MachineFunction *MachineOutliner::createOutlinedFunction( TII.mergeOutliningCandidateAttributes(*F, OF.Candidates); + // Set uwtable, so we generate eh_frame. + UWTableKind UW = std::accumulate( + OF.Candidates.cbegin(), OF.Candidates.cend(), UWTableKind::None, + [](UWTableKind K, const outliner::Candidate &C) { + return std::max(K, C.getMF()->getFunction().getUWTableKind()); + }); + if (UW != UWTableKind::None) + F->setUWTableKind(UW); + BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); IRBuilder<> Builder(EntryBB); Builder.CreateRetVoid(); @@ -641,17 +665,20 @@ MachineFunction *MachineOutliner::createOutlinedFunction( ++I) { if (I->isDebugInstr()) continue; - MachineInstr *NewMI = MF.CloneMachineInstr(&*I); + + // Don't keep debug information for outlined instructions. + auto DL = DebugLoc(); if (I->isCFIInstruction()) { - unsigned CFIIndex = NewMI->getOperand(0).getCFIIndex(); + unsigned CFIIndex = I->getOperand(0).getCFIIndex(); MCCFIInstruction CFI = Instrs[CFIIndex]; - (void)MF.addFrameInst(CFI); + BuildMI(MBB, MBB.end(), DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(MF.addFrameInst(CFI)); + } else { + MachineInstr *NewMI = MF.CloneMachineInstr(&*I); + NewMI->dropMemRefs(MF); + NewMI->setDebugLoc(DL); + MBB.insert(MBB.end(), NewMI); } - NewMI->dropMemRefs(MF); - - // Don't keep debug information for outlined instructions. - NewMI->setDebugLoc(DebugLoc()); - MBB.insert(MBB.end(), NewMI); } // Set normal properties for a late MachineFunction. @@ -831,9 +858,10 @@ bool MachineOutliner::outline(Module &M, MBB.erase(std::next(StartIt), std::next(EndIt)); // Keep track of what we removed by marking them all as -1. - std::for_each(Mapper.UnsignedVec.begin() + C.getStartIdx(), - Mapper.UnsignedVec.begin() + C.getEndIdx() + 1, - [](unsigned &I) { I = static_cast(-1); }); + for (unsigned &I : + llvm::make_range(Mapper.UnsignedVec.begin() + C.getStartIdx(), + Mapper.UnsignedVec.begin() + C.getEndIdx() + 1)) + I = static_cast(-1); OutlinedSomething = true; // Statistics. @@ -896,6 +924,9 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M, // MBB is suitable for outlining. Map it to a list of unsigneds. Mapper.convertToUnsignedVec(MBB, *TII); } + + // Statistics. + UnsignedVecSize = Mapper.UnsignedVec.size(); } } diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 762395542b40..8d500398f55e 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -29,6 +29,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachinePipeliner.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" @@ -43,6 +44,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/LiveIntervals.h" @@ -55,7 +57,6 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachinePipeliner.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ModuloSchedule.h" #include "llvm/CodeGen/RegisterPressure.h" @@ -66,7 +67,6 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCInstrDesc.h" @@ -109,7 +109,6 @@ STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages"); /// A command line option to turn software pipelining on or off. static cl::opt EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), - cl::ZeroOrMore, cl::desc("Enable Software Pipelining")); /// A command line option to enable SWP at -Os. @@ -147,8 +146,8 @@ static cl::opt SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1)); #endif static cl::opt SwpIgnoreRecMII("pipeliner-ignore-recmii", - cl::ReallyHidden, cl::init(false), - cl::ZeroOrMore, cl::desc("Ignore RecMII")); + cl::ReallyHidden, + cl::desc("Ignore RecMII")); static cl::opt SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false)); @@ -169,10 +168,9 @@ static cl::opt ExperimentalCodeGen( namespace llvm { // A command line option to enable the CopyToPhi DAG mutation. -cl::opt - SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden, - cl::init(true), cl::ZeroOrMore, - cl::desc("Enable CopyToPhi DAG Mutation")); +cl::opt SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden, + cl::init(true), + cl::desc("Enable CopyToPhi DAG Mutation")); } // end namespace llvm @@ -255,6 +253,7 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { << "Failed to pipeline loop"; }); + LI.LoopPipelinerInfo.reset(); return Changed; } @@ -262,6 +261,7 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { Changed = swingModuloScheduler(L); + LI.LoopPipelinerInfo.reset(); return Changed; } @@ -354,7 +354,8 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { LI.LoopInductionVar = nullptr; LI.LoopCompare = nullptr; - if (!TII->analyzeLoopForPipelining(L.getTopBlock())) { + LI.LoopPipelinerInfo = TII->analyzeLoopForPipelining(L.getTopBlock()); + if (!LI.LoopPipelinerInfo) { LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n"); NumFailLoop++; ORE->emit([&]() { @@ -419,7 +420,7 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) { assert(L.getBlocks().size() == 1 && "SMS works on single blocks only."); SwingSchedulerDAG SMS(*this, L, getAnalysis(), RegClassInfo, - II_setByPragma); + II_setByPragma, LI.LoopPipelinerInfo.get()); MachineBasicBlock *MBB = L.getHeader(); // The kernel should not include any terminator instructions. These @@ -513,7 +514,7 @@ void SwingSchedulerDAG::schedule() { // Don't pipeline large loops. if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) { LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii - << ", we don't pipleline large loops\n"); + << ", we don't pipeline large loops\n"); NumFailLargeMaxMII++; Pass.ORE->emit([&]() { return MachineOptimizationRemarkAnalysis( @@ -1297,8 +1298,7 @@ bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets, for (auto W : AdjK[V]) { if (W < S) continue; - if (B[W].count(SV) == 0) - B[W].insert(SV); + B[W].insert(SV); } } Stack.pop_back(); @@ -1422,7 +1422,7 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) { /// We ignore the back-edge recurrence in order to avoid unbounded recursion /// in the calculation of the ASAP, ALAP, etc functions. static bool ignoreDependence(const SDep &D, bool isPred) { - if (D.isArtificial()) + if (D.isArtificial() || D.getSUnit()->isBoundaryNode()) return true; return D.getKind() == SDep::Anti && isPred; } @@ -1471,6 +1471,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { SUnit *SU = &SUnits[I]; for (const SDep &S : SU->Succs) { SUnit *succ = S.getSUnit(); + if (succ->isBoundaryNode()) + continue; if (S.getLatency() == 0) zeroLatencyHeight = std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1); @@ -1575,7 +1577,9 @@ static bool computePath(SUnit *Cur, SetVector &Path, return Path.contains(Cur); bool FoundPath = false; for (auto &SI : Cur->Succs) - FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited); + if (!ignoreDependence(SI, false)) + FoundPath |= + computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited); for (auto &PI : Cur->Preds) if (PI.getKind() == SDep::Anti) FoundPath |= @@ -1663,7 +1667,7 @@ void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) { LLVM_DEBUG( dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") " << TRI->getRegPressureSetName(RPDelta.Excess.getPSet()) - << ":" << RPDelta.Excess.getUnitInc()); + << ":" << RPDelta.Excess.getUnitInc() << "\n"); NS.setExceedPressure(SU); break; } @@ -1718,7 +1722,7 @@ void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) { } /// Add the nodes that do not belong to a recurrence set into groups -/// based upon connected componenets. +/// based upon connected components. void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { SetVector NodesAdded; SmallPtrSet Visited; @@ -1788,7 +1792,8 @@ void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet, NodesAdded.insert(SU); for (auto &SI : SU->Succs) { SUnit *Successor = SI.getSUnit(); - if (!SI.isArtificial() && NodesAdded.count(Successor) == 0) + if (!SI.isArtificial() && !Successor->isBoundaryNode() && + NodesAdded.count(Successor) == 0) addConnectedNodes(Successor, NewSet, NodesAdded); } for (auto &PI : SU->Preds) { @@ -1803,8 +1808,7 @@ void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet, static bool isIntersect(SmallSetVector &Set1, const NodeSet &Set2, SmallSetVector &Result) { Result.clear(); - for (unsigned i = 0, e = Set1.size(); i != e; ++i) { - SUnit *SU = Set1[i]; + for (SUnit *SU : Set1) { if (Set2.count(SU) != 0) Result.insert(SU); } @@ -2080,6 +2084,11 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { }); } while (++NI != NE && scheduleFound); + // If a schedule is found, ensure non-pipelined instructions are in stage 0 + if (scheduleFound) + scheduleFound = + Schedule.normalizeNonPipelinedInstructions(this, LoopPipelinerInfo); + // If a schedule is found, check if it is a valid schedule too. if (scheduleFound) scheduleFound = Schedule.isValidSchedule(this); @@ -2263,7 +2272,7 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) { bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc) { if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) || - Dep.isArtificial()) + Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode()) return false; if (!SwpPruneLoopCarried) @@ -2430,7 +2439,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) { while (!Worklist.empty()) { const SDep &Cur = Worklist.pop_back_val(); SUnit *SuccSU = Cur.getSUnit(); - if (Visited.count(SuccSU)) + if (Visited.count(SuccSU) || SuccSU->isBoundaryNode()) continue; std::map::const_iterator it = InstrToCycle.find(SuccSU); if (it == InstrToCycle.end()) @@ -2697,21 +2706,91 @@ bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, return false; } +/// Determine transitive dependences of unpipelineable instructions +SmallSet SMSchedule::computeUnpipelineableNodes( + SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) { + SmallSet DoNotPipeline; + SmallVector Worklist; + + for (auto &SU : SSD->SUnits) + if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr())) + Worklist.push_back(&SU); + + while (!Worklist.empty()) { + auto SU = Worklist.pop_back_val(); + if (DoNotPipeline.count(SU)) + continue; + LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n"); + DoNotPipeline.insert(SU); + for (auto &Dep : SU->Preds) + Worklist.push_back(Dep.getSUnit()); + if (SU->getInstr()->isPHI()) + for (auto &Dep : SU->Succs) + if (Dep.getKind() == SDep::Anti) + Worklist.push_back(Dep.getSUnit()); + } + return DoNotPipeline; +} + +// Determine all instructions upon which any unpipelineable instruction depends +// and ensure that they are in stage 0. If unable to do so, return false. +bool SMSchedule::normalizeNonPipelinedInstructions( + SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) { + SmallSet DNP = computeUnpipelineableNodes(SSD, PLI); + + int NewLastCycle = INT_MIN; + for (SUnit &SU : SSD->SUnits) { + if (!SU.isInstr()) + continue; + if (!DNP.contains(&SU) || stageScheduled(&SU) == 0) { + NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]); + continue; + } + + // Put the non-pipelined instruction as early as possible in the schedule + int NewCycle = getFirstCycle(); + for (auto &Dep : SU.Preds) + NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle); + + int OldCycle = InstrToCycle[&SU]; + if (OldCycle != NewCycle) { + InstrToCycle[&SU] = NewCycle; + auto &OldS = getInstructions(OldCycle); + llvm::erase_value(OldS, &SU); + getInstructions(NewCycle).emplace_back(&SU); + LLVM_DEBUG(dbgs() << "SU(" << SU.NodeNum + << ") is not pipelined; moving from cycle " << OldCycle + << " to " << NewCycle << " Instr:" << *SU.getInstr()); + } + NewLastCycle = std::max(NewLastCycle, NewCycle); + } + LastCycle = NewLastCycle; + return true; +} + // Check if the generated schedule is valid. This function checks if // an instruction that uses a physical register is scheduled in a // different stage than the definition. The pipeliner does not handle // physical register values that may cross a basic block boundary. +// Furthermore, if a physical def/use pair is assigned to the same +// cycle, orderDependence does not guarantee def/use ordering, so that +// case should be considered invalid. (The test checks for both +// earlier and same-cycle use to be more robust.) bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { for (SUnit &SU : SSD->SUnits) { if (!SU.hasPhysRegDefs) continue; int StageDef = stageScheduled(&SU); + int CycleDef = InstrToCycle[&SU]; assert(StageDef != -1 && "Instruction should have been scheduled."); for (auto &SI : SU.Succs) - if (SI.isAssignedRegDep()) - if (Register::isPhysicalRegister(SI.getReg())) + if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode()) + if (Register::isPhysicalRegister(SI.getReg())) { if (stageScheduled(SI.getSUnit()) != StageDef) return false; + if (InstrToCycle[SI.getSUnit()] <= CycleDef) + return false; + } } return true; } @@ -2998,7 +3077,7 @@ bool ResourceManager::canReserveResources(const MCInstrDesc *MID) const { if (!SCDesc->isValid()) { LLVM_DEBUG({ dbgs() << "No valid Schedule Class Desc for schedClass!\n"; - dbgs() << "isPseduo:" << MID->isPseudo() << "\n"; + dbgs() << "isPseudo:" << MID->isPseudo() << "\n"; }); return true; } @@ -3038,7 +3117,7 @@ void ResourceManager::reserveResources(const MCInstrDesc *MID) { if (!SCDesc->isValid()) { LLVM_DEBUG({ dbgs() << "No valid Schedule Class Desc for schedClass!\n"; - dbgs() << "isPseduo:" << MID->isPseudo() << "\n"; + dbgs() << "isPseudo:" << MID->isPseudo() << "\n"; }); return; } diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 1a4ad53ddf81..511bb80052c2 100644 --- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -12,7 +12,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -651,3 +650,18 @@ bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const { } return false; } + +bool MachineRegisterInfo::isArgumentRegister(const MachineFunction &MF, + MCRegister Reg) const { + return getTargetRegisterInfo()->isArgumentRegister(MF, Reg); +} + +bool MachineRegisterInfo::isFixedRegister(const MachineFunction &MF, + MCRegister Reg) const { + return getTargetRegisterInfo()->isFixedRegister(MF, Reg); +} + +bool MachineRegisterInfo::isGeneralPurposeRegister(const MachineFunction &MF, + MCRegister Reg) const { + return getTargetRegisterInfo()->isGeneralPurposeRegister(MF, Reg); +} diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp b/llvm/lib/CodeGen/MachineSSAContext.cpp index 8db893535daf..01cea85ecc7c 100644 --- a/llvm/lib/CodeGen/MachineSSAContext.cpp +++ b/llvm/lib/CodeGen/MachineSSAContext.cpp @@ -14,7 +14,9 @@ #include "llvm/CodeGen/MachineSSAContext.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index b043d4c1b0c1..4e00a211713e 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -32,7 +32,6 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" @@ -752,7 +751,7 @@ void ScheduleDAGMI::moveInstruction( } bool ScheduleDAGMI::checkSchedLimit() { -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { CurrentTop = CurrentBottom; return false; @@ -920,12 +919,10 @@ void ScheduleDAGMI::placeDebugValues() { MachineBasicBlock::iterator OrigPrevMI = P.second; if (&*RegionBegin == DbgValue) ++RegionBegin; - BB->splice(++OrigPrevMI, BB, DbgValue); - if (OrigPrevMI == std::prev(RegionEnd)) + BB->splice(std::next(OrigPrevMI), BB, DbgValue); + if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd) RegionEnd = DbgValue; } - DbgValues.clear(); - FirstDbgValue = nullptr; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -2008,7 +2005,7 @@ void SchedBoundary::reset() { ReservedCycles.clear(); ReservedCyclesIndex.clear(); ResourceGroupSubUnitMasks.clear(); -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // Track the maximum number of stall cycles that could arise either from the // latency of a DAG edge or the number of cycles that a processor resource is // reserved (SchedBoundary::ReservedCycles). @@ -2196,7 +2193,7 @@ bool SchedBoundary::checkHazard(SUnit *SU) { unsigned NRCycle, InstanceIdx; std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles); if (NRCycle > CurrCycle) { -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS MaxObservedStall = std::max(Cycles, MaxObservedStall); #endif LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " @@ -2263,7 +2260,7 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx) { assert(SU->getInstr() && "Scheduled SUnit must have instr"); -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // ReadyCycle was been bumped up to the CurrCycle when this node was // scheduled, but CurrCycle may have been eagerly advanced immediately after // scheduling, so may now be greater than ReadyCycle. diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 0dbbc218e946..006ba9273dfb 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -16,17 +16,20 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -93,18 +96,18 @@ static cl::opt SinkLoadBlocksThreshold( cl::init(20), cl::Hidden); static cl::opt -SinkInstsIntoLoop("sink-insts-to-avoid-spills", - cl::desc("Sink instructions into loops to avoid " - "register spills"), - cl::init(false), cl::Hidden); - -static cl::opt SinkIntoLoopLimit( - "machine-sink-loop-limit", - cl::desc("The maximum number of instructions considered for loop sinking."), + SinkInstsIntoCycle("sink-insts-to-avoid-spills", + cl::desc("Sink instructions into cycles to avoid " + "register spills"), + cl::init(false), cl::Hidden); + +static cl::opt SinkIntoCycleLimit( + "machine-sink-cycle-limit", + cl::desc("The maximum number of instructions considered for cycle sinking."), cl::init(50), cl::Hidden); STATISTIC(NumSunk, "Number of machine instructions sunk"); -STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop"); +STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); @@ -117,7 +120,7 @@ namespace { MachineRegisterInfo *MRI; // Machine register information MachineDominatorTree *DT; // Machine dominator tree MachinePostDominatorTree *PDT; // Machine post dominator tree - MachineLoopInfo *LI; + MachineCycleInfo *CI; MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; AliasAnalysis *AA; @@ -178,8 +181,9 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); if (UseBlockFreqInfo) AU.addRequired(); @@ -230,9 +234,9 @@ namespace { MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); - void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, - SmallVectorImpl &Candidates); - bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); + void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB, + SmallVectorImpl &Candidates); + bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I); bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -259,7 +263,7 @@ INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) @@ -376,26 +380,27 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { return false; } -void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, +void MachineSinking::FindCycleSinkCandidates( + MachineCycle *Cycle, MachineBasicBlock *BB, SmallVectorImpl &Candidates) { for (auto &MI : *BB) { - LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI); + LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI); if (!TII->shouldSink(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this " + LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this " "target\n"); continue; } - if (!L->isLoopInvariant(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); + if (!isCycleInvariant(Cycle, MI)) { + LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n"); continue; } bool DontMoveAcrossStore = true; if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n"); continue; } if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n"); continue; } if (MI.isConvergent()) @@ -407,7 +412,7 @@ void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *B if (!MRI->hasOneDef(MO.getReg())) continue; - LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n"); Candidates.push_back(&MI); } } @@ -423,7 +428,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); DT = &getAnalysis(); PDT = &getAnalysis(); - LI = &getAnalysis(); + CI = &getAnalysis().getCycleInfo(); MBFI = UseBlockFreqInfo ? &getAnalysis() : nullptr; MBPI = &getAnalysis(); AA = &getAnalysis().getAAResults(); @@ -461,32 +466,33 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { EverMadeChange = true; } - if (SinkInstsIntoLoop) { - SmallVector Loops(LI->begin(), LI->end()); - for (auto *L : Loops) { - MachineBasicBlock *Preheader = LI->findLoopPreheader(L); + if (SinkInstsIntoCycle) { + SmallVector Cycles(CI->toplevel_begin(), + CI->toplevel_end()); + for (auto *Cycle : Cycles) { + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); if (!Preheader) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); continue; } SmallVector Candidates; - FindLoopSinkCandidates(L, Preheader, Candidates); + FindCycleSinkCandidates(Cycle, Preheader, Candidates); // Walk the candidates in reverse order so that we start with the use // of a def-use chain, if there is any. // TODO: Sort the candidates using a cost-model. unsigned i = 0; for (MachineInstr *I : llvm::reverse(Candidates)) { - if (i++ == SinkIntoLoopLimit) { - LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to " + if (i++ == SinkIntoCycleLimit) { + LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to " "be analysed."); break; } - if (!SinkIntoLoop(L, *I)) + if (!SinkIntoCycle(Cycle, *I)) break; EverMadeChange = true; - ++NumLoopSunk; + ++NumCycleSunk; } } } @@ -508,12 +514,12 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { // Don't bother sinking code out of unreachable blocks. In addition to being // unprofitable, it can also lead to infinite looping, because in an - // unreachable loop there may be nowhere to stop. + // unreachable cycle there may be nowhere to stop. if (!DT->isReachableFromEntry(&MBB)) return false; bool MadeChange = false; - // Cache all successors, sorted by frequency info and loop depth. + // Cache all successors, sorted by frequency info and cycle depth. AllSuccsCache AllSuccessors; // Walk the basic block bottom-up. Remember if we saw a store. @@ -632,13 +638,16 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) return false; - // Avoid breaking back edge. From == To means backedge for single BB loop. + // Avoid breaking back edge. From == To means backedge for single BB cycle. if (!SplitEdges || FromBB == ToBB) return false; - // Check for backedges of more "complex" loops. - if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && - LI->isLoopHeader(ToBB)) + MachineCycle *FromCycle = CI->getCycle(FromBB); + MachineCycle *ToCycle = CI->getCycle(ToBB); + + // Check for backedges of more "complex" cycles. + if (FromCycle == ToCycle && FromCycle && + (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB)) return false; // It's not always legal to break critical edges and sink the computation @@ -741,9 +750,9 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, if (!PDT->dominates(SuccToSinkTo, MBB)) return true; - // It is profitable to sink an instruction from a deeper loop to a shallower - // loop, even if the latter post-dominates the former (PR21115). - if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo)) + // It is profitable to sink an instruction from a deeper cycle to a shallower + // cycle, even if the latter post-dominates the former (PR21115). + if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo)) return true; // Check if only use in post dominated block is PHI instruction. @@ -764,11 +773,11 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); - MachineLoop *ML = LI->getLoopFor(MBB); + MachineCycle *MCycle = CI->getCycle(MBB); - // If the instruction is not inside a loop, it is not profitable to sink MI to + // If the instruction is not inside a cycle, it is not profitable to sink MI to // a post dominate block SuccToSinkTo. - if (!ML) + if (!MCycle) return false; auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) { @@ -786,7 +795,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, return false; }; - // If this instruction is inside a loop and sinking this instruction can make + // If this instruction is inside a Cycle and sinking this instruction can make // more registers live range shorten, it is still prifitable. for (const MachineOperand &MO : MI.operands()) { // Ignore non-register operands. @@ -814,14 +823,17 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, return false; } else { MachineInstr *DefMI = MRI->getVRegDef(Reg); - // DefMI is defined outside of loop. There should be no live range - // impact for this operand. Defination outside of loop means: - // 1: defination is outside of loop. - // 2: defination is in this loop, but it is a PHI in the loop header. - if (LI->getLoopFor(DefMI->getParent()) != ML || - (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent()))) + if (!DefMI) + continue; + MachineCycle *Cycle = CI->getCycle(DefMI->getParent()); + // DefMI is defined outside of cycle. There should be no live range + // impact for this operand. Defination outside of cycle means: + // 1: defination is outside of cycle. + // 2: defination is in this cycle, but it is a PHI in the cycle header. + if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() && + Cycle->getHeader() == DefMI->getParent())) continue; - // The DefMI is defined inside the loop. + // The DefMI is defined inside the cycle. // If sinking this operand makes some register pressure set exceed limit, // it is not profitable. if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) { @@ -831,8 +843,8 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, } } - // If MI is in loop and all its operands are alive across the whole loop or if - // no operand sinking make register pressure set exceed limit, it is + // If MI is in cycle and all its operands are alive across the whole cycle or + // if no operand sinking make register pressure set exceed limit, it is // profitable to sink MI. return true; } @@ -864,14 +876,14 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccs.push_back(DTChild->getBlock()); } - // Sort Successors according to their loop depth or block frequency info. + // Sort Successors according to their cycle depth or block frequency info. llvm::stable_sort( AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0; return HasBlockFreq ? LHSFreq < RHSFreq - : LI->getLoopDepth(L) < LI->getLoopDepth(R); + : CI->getCycleDepth(L) < CI->getCycleDepth(R); }); auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs)); @@ -886,7 +898,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) { assert (MBB && "Invalid MachineBasicBlock!"); - // Loop over all the operands of the specified instruction. If there is + // loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. // SuccToSinkTo - This is the successor to sink this instruction to, once we @@ -933,7 +945,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, // Otherwise, we should look at all the successors and decide which one // we should sink to. If we have reliable block frequency information // (frequency != 0) available, give successors with smaller frequencies - // higher priority, otherwise prioritize smaller loop depths. + // higher priority, otherwise prioritize smaller cycle depths. for (MachineBasicBlock *SuccBlock : GetAllSortedSuccessors(MI, MBB, AllSuccessors)) { bool LocalUse = false; @@ -956,7 +968,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, } // It is not possible to sink an instruction into its own block. This can - // happen with loops. + // happen with cycles. if (MBB == SuccToSinkTo) return nullptr; @@ -1081,8 +1093,7 @@ using MIRegs = std::pair>; /// Sink an instruction and its associated debug instructions. static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, MachineBasicBlock::iterator InsertPos, - SmallVectorImpl &DbgValuesToSink) { - + ArrayRef DbgValuesToSink) { // If we cannot find a location to use (merge with), then we erase the debug // location to prevent debug-info driven tools from potentially reporting // wrong location information. @@ -1101,7 +1112,7 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, // DBG_VALUE location as 'undef', indicating that any earlier variable // location should be terminated as we've optimised away the value at this // point. - for (auto DbgValueToSink : DbgValuesToSink) { + for (const auto &DbgValueToSink : DbgValuesToSink) { MachineInstr *DbgMI = DbgValueToSink.first; MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI); SuccToSinkTo.insert(InsertPos, NewDbgMI); @@ -1166,7 +1177,7 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, // If this BB is too big or the block number in straight line between From // and To is too big, stop searching to save compiling time. - if (BB->size() > SinkLoadInstsPerBlockThreshold || + if (BB->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold) || HandledDomBlocks.size() > SinkLoadBlocksThreshold) { for (auto *DomBB : HandledDomBlocks) { if (DomBB != BB && DT->dominates(DomBB, BB)) @@ -1211,69 +1222,78 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, return HasAliasedStore; } -/// Sink instructions into loops if profitable. This especially tries to prevent -/// register spills caused by register pressure if there is little to no -/// overhead moving instructions into loops. -bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { - LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I); - MachineBasicBlock *Preheader = L->getLoopPreheader(); - assert(Preheader && "Loop sink needs a preheader block"); +/// Sink instructions into cycles if profitable. This especially tries to +/// prevent register spills caused by register pressure if there is little to no +/// overhead moving instructions into cycles. +bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) { + LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I); + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); + assert(Preheader && "Cycle sink needs a preheader block"); MachineBasicBlock *SinkBlock = nullptr; bool CanSink = true; const MachineOperand &MO = I.getOperand(0); for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { - LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI); - if (!L->contains(&MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI); + if (!Cycle->contains(MI.getParent())) { + LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n"); CanSink = false; break; } // FIXME: Come up with a proper cost model that estimates whether sinking - // the instruction (and thus possibly executing it on every loop + // the instruction (and thus possibly executing it on every cycle // iteration) is more expensive than a register. // For now assumes that copies are cheap and thus almost always worth it. if (!MI.isCopy()) { - LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n"); CanSink = false; break; } if (!SinkBlock) { SinkBlock = MI.getParent(); - LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: " + LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: " << printMBBReference(*SinkBlock) << "\n"); continue; } SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n"); CanSink = false; break; } - LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " << + LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " << printMBBReference(*SinkBlock) << "\n"); } if (!CanSink) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n"); return false; } if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n"); return false; } if (SinkBlock == Preheader) { - LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n"); + LLVM_DEBUG( + dbgs() << "CycleSink: Not sinking, sink block is the preheader\n"); return false; } - if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) { - LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n"); + if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) { + LLVM_DEBUG( + dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n"); return false; } - LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); - SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); + LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n"); + SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader, + I); + + // Conservatively clear any kill flags on uses of sunk instruction + for (MachineOperand &MO : I.operands()) { + if (MO.isReg() && MO.readsReg()) + RegsToClearKillFlags.insert(MO.getReg()); + } // The instruction is moved from its basic block, so do not retain the // debug information. @@ -1282,6 +1302,45 @@ bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { return true; } +/// Return true if a target defined block prologue instruction interferes +/// with a sink candidate. +static bool blockPrologueInterferes(MachineBasicBlock *BB, + MachineBasicBlock::iterator End, + MachineInstr &MI, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, + const MachineRegisterInfo *MRI) { + if (BB->begin() == End) + return false; // no prologue + for (MachineBasicBlock::iterator PI = BB->getFirstNonPHI(); PI != End; ++PI) { + // Only check target defined prologue instructions + if (!TII->isBasicBlockPrologue(*PI)) + continue; + for (auto &MO : MI.operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isUse()) { + if (Register::isPhysicalRegister(Reg) && + (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg)))) + continue; + if (PI->modifiesRegister(Reg, TRI)) + return true; + } else { + if (PI->readsRegister(Reg, TRI)) + return true; + // Check for interference with non-dead defs + auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI); + if (DefOp && !DefOp->isDead()) + return true; + } + } + } + return false; +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, @@ -1356,9 +1415,11 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, TryBreak = true; } - // Don't sink instructions into a loop. - if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { - LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n"); + // Don't sink instructions into a cycle. + if (!TryBreak && CI->getCycle(SuccToSinkTo) && + (!CI->getCycle(SuccToSinkTo)->isReducible() || + CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) { + LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n"); TryBreak = true; } @@ -1393,9 +1454,12 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, } // Determine where to insert into. Skip phi nodes. - MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); - while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) - ++InsertPos; + MachineBasicBlock::iterator InsertPos = + SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin()); + if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) { + LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n"); + return false; + } // Collect debug users of any vreg that this inst defines. SmallVector DbgUsersToSink; @@ -1684,14 +1748,6 @@ static bool hasRegisterDependency(MachineInstr *MI, return HasRegDependency; } -static SmallSet getRegUnits(MCRegister Reg, - const TargetRegisterInfo *TRI) { - SmallSet RegUnits; - for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI) - RegUnits.insert(*RI); - return RegUnits; -} - bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, MachineFunction &MF, const TargetRegisterInfo *TRI, @@ -1737,14 +1793,15 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, } // Record debug use of each reg unit. - SmallSet RegUnits = getRegUnits(MO.getReg(), TRI); - for (MCRegister Reg : RegUnits) - MIUnits[Reg].push_back(MO.getReg()); + for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid(); + ++RI) + MIUnits[*RI].push_back(MO.getReg()); } } if (IsValid) { - for (auto RegOps : MIUnits) - SeenDbgInstrs[RegOps.first].push_back({&MI, RegOps.second}); + for (auto &RegOps : MIUnits) + SeenDbgInstrs[RegOps.first].emplace_back(&MI, + std::move(RegOps.second)); } continue; } @@ -1791,22 +1848,29 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, if (!MO.isReg() || !MO.isDef()) continue; - SmallSet Units = getRegUnits(MO.getReg(), TRI); - for (MCRegister Reg : Units) { - for (auto MIRegs : SeenDbgInstrs.lookup(Reg)) { + for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid(); ++RI) { + for (const auto &MIRegs : SeenDbgInstrs.lookup(*RI)) { auto &Regs = DbgValsToSinkMap[MIRegs.first]; for (unsigned Reg : MIRegs.second) Regs.push_back(Reg); } } } - SmallVector DbgValsToSink(DbgValsToSinkMap.begin(), - DbgValsToSinkMap.end()); + auto DbgValsToSink = DbgValsToSinkMap.takeVector(); + + LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB); + + MachineBasicBlock::iterator InsertPos = + SuccBB->SkipPHIsAndLabels(SuccBB->begin()); + if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) { + LLVM_DEBUG( + dbgs() << " *** Not sinking: prologue interference\n"); + continue; + } // Clear the kill flag if SrcReg is killed between MI and the end of the // block. clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); - MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); performSink(MI, *SuccBB, InsertPos, DbgValsToSink); updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp index 0803c2b8b85a..a85dbf1de1ee 100644 --- a/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -12,29 +12,30 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineStableHash.h" -#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/MemoryLocation.h" -#include "llvm/CodeGen/MIRFormatter.h" -#include "llvm/CodeGen/MIRPrinter.h" -#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/ADT/ilist_iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/StableHashing.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/IRPrintingPasses.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/ModuleSlotTracker.h" -#include "llvm/MC/MCDwarf.h" -#include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/ErrorHandling.h" #define DEBUG_TYPE "machine-stable-hash" @@ -64,7 +65,10 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) { case MachineOperand::MO_Register: if (Register::isVirtualRegister(MO.getReg())) { const MachineRegisterInfo &MRI = MO.getParent()->getMF()->getRegInfo(); - return MRI.getVRegDef(MO.getReg())->getOpcode(); + SmallVector DefOpcodes; + for (auto &Def : MRI.def_instructions(MO.getReg())) + DefOpcodes.push_back(Def.getOpcode()); + return hash_combine_range(DefOpcodes.begin(), DefOpcodes.end()); } // Register operands don't have target flags. @@ -192,3 +196,21 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs, return stable_hash_combine_range(HashComponents.begin(), HashComponents.end()); } + +stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) { + SmallVector HashComponents; + // TODO: Hash more stuff like block alignment and branch probabilities. + for (auto &MI : MBB) + HashComponents.push_back(stableHashValue(MI)); + return stable_hash_combine_range(HashComponents.begin(), + HashComponents.end()); +} + +stable_hash llvm::stableHashValue(const MachineFunction &MF) { + SmallVector HashComponents; + // TODO: Hash lots more stuff like function alignment and stack objects. + for (auto &MBB : MF) + HashComponents.push_back(stableHashValue(MBB)); + return stable_hash_combine_range(HashComponents.begin(), + HashComponents.end()); +} diff --git a/llvm/lib/CodeGen/MachineStripDebug.cpp b/llvm/lib/CodeGen/MachineStripDebug.cpp index 86cf4999d4b0..6128248a028e 100644 --- a/llvm/lib/CodeGen/MachineStripDebug.cpp +++ b/llvm/lib/CodeGen/MachineStripDebug.cpp @@ -10,10 +10,10 @@ /// tests can be debugified without affecting the output MIR. //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/Debugify.h" diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index c9d3e473062b..db04f2bcc095 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -32,10 +32,10 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/EHPersonalities.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalCalc.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -48,6 +48,8 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -55,12 +57,14 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCTargetOptions.h" @@ -95,6 +99,7 @@ namespace { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; + const RegisterBankInfo *RBI; unsigned foundErrors; @@ -370,6 +375,7 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) { TM = &MF.getTarget(); TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); + RBI = MF.getSubtarget().getRegBankInfo(); MRI = &MF.getRegInfo(); const bool isFunctionFailedISel = MF.getProperties().hasProperty( @@ -442,7 +448,7 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) { for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &Op = MI.getOperand(I); if (Op.getParent() != &MI) { - // Make sure to use correct addOperand / RemoveOperand / ChangeTo + // Make sure to use correct addOperand / removeOperand / ChangeTo // functions when replacing operands of a MachineInstr. report("Instruction has operand with wrong parent set", &MI); } @@ -1000,17 +1006,23 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } - if (MRI->getRegBankOrNull(Src) != MRI->getRegBankOrNull(Dst)) { - report( - Twine(OpcName, " source and destination register banks must match"), - MI); + const RegisterBank *SrcRB = RBI->getRegBank(Src, *MRI, *TRI); + const RegisterBank *DstRB = RBI->getRegBank(Dst, *MRI, *TRI); + + // Allow only the source bank to be set. + if ((SrcRB && DstRB && SrcRB != DstRB) || (DstRB && !SrcRB)) { + report(Twine(OpcName, " cannot change register bank"), MI); break; } - if (MRI->getRegClassOrNull(Src) != MRI->getRegClassOrNull(Dst)) + // Don't allow a class change. Do allow member class->regbank. + const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(Dst); + if (DstRC && DstRC != MRI->getRegClassOrNull(Src)) { report( Twine(OpcName, " source and destination register classes must match"), MI); + break; + } break; } @@ -1072,6 +1084,18 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (ValTy.getSizeInBytes() < MMO.getSize()) report("store memory size cannot exceed value size", MI); } + + const AtomicOrdering Order = MMO.getSuccessOrdering(); + if (Opc == TargetOpcode::G_STORE) { + if (Order == AtomicOrdering::Acquire || + Order == AtomicOrdering::AcquireRelease) + report("atomic store cannot use acquire ordering", MI); + + } else { + if (Order == AtomicOrdering::Release || + Order == AtomicOrdering::AcquireRelease) + report("atomic load cannot use release ordering", MI); + } } break; @@ -1628,6 +1652,43 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { verifyAllRegOpsScalar(*MI, *MRI); break; } + case TargetOpcode::G_IS_FPCLASS: { + LLT DestTy = MRI->getType(MI->getOperand(0).getReg()); + LLT DestEltTy = DestTy.getScalarType(); + if (!DestEltTy.isScalar()) { + report("Destination must be a scalar or vector of scalars", MI); + break; + } + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + LLT SrcEltTy = SrcTy.getScalarType(); + if (!SrcEltTy.isScalar()) { + report("Source must be a scalar or vector of scalars", MI); + break; + } + if (!verifyVectorElementMatch(DestTy, SrcTy, MI)) + break; + const MachineOperand &TestMO = MI->getOperand(2); + if (!TestMO.isImm()) { + report("floating-point class set (operand 2) must be an immediate", MI); + break; + } + int64_t Test = TestMO.getImm(); + if (Test < 0 || Test > fcAllFlags) { + report("Incorrect floating-point class set (operand 2)", MI); + break; + } + const MachineOperand &SemanticsMO = MI->getOperand(3); + if (!SemanticsMO.isImm()) { + report("floating-point semantics (operand 3) must be an immediate", MI); + break; + } + int64_t Semantics = SemanticsMO.getImm(); + if (Semantics < 0 || Semantics > APFloat::S_MaxSemantics) { + report("Incorrect floating-point semantics (operand 3)", MI); + break; + } + break; + } default: break; } @@ -1912,6 +1973,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MRI->tracksLiveness() && !MI->isDebugInstr()) checkLiveness(MO, MONum); + if (MO->isDef() && MO->isUndef() && !MO->getSubReg() && + MO->getReg().isVirtual()) // TODO: Apply to physregs too + report("Undef virtual register def operands require a subregister", MO, MONum); + // Verify the consistency of tied operands. if (MO->isTied()) { unsigned OtherIdx = MI->findTiedOperandIdx(MONum); @@ -2148,6 +2213,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } break; + case MachineOperand::MO_CFIIndex: + if (MO->getCFIIndex() >= MF->getFrameInstructions().size()) + report("CFI instruction has invalid index", MO, MONum); + break; + default: break; } diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp index b0760322064c..fa5df68b8abc 100644 --- a/llvm/lib/CodeGen/MacroFusion.cpp +++ b/llvm/lib/CodeGen/MacroFusion.cpp @@ -12,11 +12,10 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MacroFusion.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index f91a9d2c3a32..3245d9649be1 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -11,6 +11,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCContext.h" @@ -157,7 +158,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() { SmallVector EpilogBBs; // Generate the epilog instructions to complete the pipeline. - generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs); + generateEpilog(MaxStageCount, KernelBB, BB, VRMap, EpilogBBs, PrologBBs); // We need this step because the register allocation doesn't handle some // situations well, so we insert copies to help out. @@ -239,11 +240,9 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage, /// Generate the pipeline epilog code. The epilog code finishes the iterations /// that were started in either the prolog or the kernel. We create a basic /// block for each stage that needs to complete. -void ModuloScheduleExpander::generateEpilog(unsigned LastStage, - MachineBasicBlock *KernelBB, - ValueMapTy *VRMap, - MBBVectorTy &EpilogBBs, - MBBVectorTy &PrologBBs) { +void ModuloScheduleExpander::generateEpilog( + unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB, + ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs) { // We need to change the branch from the kernel to the first epilog block, so // this call to analyze branch uses the kernel rather than the original BB. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; @@ -313,7 +312,12 @@ void ModuloScheduleExpander::generateEpilog(unsigned LastStage, // Create a branch to the new epilog from the kernel. // Remove the original branch and add a new branch to the epilog. TII->removeBranch(*KernelBB); - TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc()); + assert((OrigBB == TBB || OrigBB == FBB) && + "Unable to determine looping branch direction"); + if (OrigBB != TBB) + TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc()); + else + TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc()); // Add a branch to the loop exit. if (EpilogBBs.size() > 0) { MachineBasicBlock *LastEpilogBB = EpilogBBs.back(); @@ -813,8 +817,8 @@ static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) { break; for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) if (MI.getOperand(i + 1).getMBB() == Incoming) { - MI.RemoveOperand(i + 1); - MI.RemoveOperand(i); + MI.removeOperand(i + 1); + MI.removeOperand(i); break; } } @@ -846,7 +850,7 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB, Optional StaticallyGreater = LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond); unsigned numAdded = 0; - if (!StaticallyGreater.hasValue()) { + if (!StaticallyGreater) { Prolog->addSuccessor(Epilog); numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc()); } else if (*StaticallyGreater == false) { @@ -999,7 +1003,7 @@ MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr( } /// Update the machine instruction with new virtual registers. This -/// function may change the defintions and/or uses. +/// function may change the definitions and/or uses. void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI, bool LastDef, unsigned CurStageNum, @@ -1159,8 +1163,17 @@ void ModuloScheduleExpander::rewriteScheduledInstr( if (!InProlog && !Phi->isPHI() && StagePhi < StageSched) ReplaceReg = NewReg; if (ReplaceReg) { - MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg)); - UseOp.setReg(ReplaceReg); + const TargetRegisterClass *NRC = + MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg)); + if (NRC) + UseOp.setReg(ReplaceReg); + else { + Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY), + SplitReg) + .addReg(ReplaceReg); + UseOp.setReg(SplitReg); + } } } } @@ -1205,8 +1218,12 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI, MI.eraseFromParent(); Changed = true; } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) { - MRI.constrainRegClass(MI.getOperand(1).getReg(), - MRI.getRegClass(MI.getOperand(0).getReg())); + const TargetRegisterClass *ConstrainRegClass = + MRI.constrainRegClass(MI.getOperand(1).getReg(), + MRI.getRegClass(MI.getOperand(0).getReg())); + assert(ConstrainRegClass && + "Expected a valid constrained register class!"); + (void)ConstrainRegClass; MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); if (LIS) @@ -1404,7 +1421,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { while (DefaultI != Defaults.rend()) LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg)); - if (IllegalPhiDefault.hasValue()) { + if (IllegalPhiDefault) { // The consumer optionally consumes LoopProducer in the same iteration // (because the producer is scheduled at an earlier cycle than the consumer) // or the initial value. To facilitate this we create an illegal block here @@ -1414,7 +1431,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { Register R = MRI.createVirtualRegister(RC); MachineInstr *IllegalPhi = BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R) - .addReg(IllegalPhiDefault.getValue()) + .addReg(*IllegalPhiDefault) .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect. .addReg(LoopReg) .addMBB(BB); // Block choice is arbitrary and has no effect. @@ -1430,7 +1447,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { Register KernelRewriter::phi(Register LoopReg, Optional InitReg, const TargetRegisterClass *RC) { // If the init register is not undef, try and find an existing phi. - if (InitReg.hasValue()) { + if (InitReg) { auto I = Phis.find({LoopReg, InitReg.getValue()}); if (I != Phis.end()) return I->second; @@ -1446,7 +1463,7 @@ Register KernelRewriter::phi(Register LoopReg, Optional InitReg, auto I = UndefPhis.find(LoopReg); if (I != UndefPhis.end()) { Register R = I->second; - if (!InitReg.hasValue()) + if (!InitReg) // Found a phi taking undef as input, and this input is undef so return // without any more changes. return R; @@ -1454,7 +1471,10 @@ Register KernelRewriter::phi(Register LoopReg, Optional InitReg, MachineInstr *MI = MRI.getVRegDef(R); MI->getOperand(1).setReg(InitReg.getValue()); Phis.insert({{LoopReg, InitReg.getValue()}, R}); - MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue())); + const TargetRegisterClass *ConstrainRegClass = + MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue())); + assert(ConstrainRegClass && "Expected a valid constrained register class!"); + (void)ConstrainRegClass; UndefPhis.erase(I); return R; } @@ -1463,14 +1483,18 @@ Register KernelRewriter::phi(Register LoopReg, Optional InitReg, if (!RC) RC = MRI.getRegClass(LoopReg); Register R = MRI.createVirtualRegister(RC); - if (InitReg.hasValue()) - MRI.constrainRegClass(R, MRI.getRegClass(*InitReg)); + if (InitReg) { + const TargetRegisterClass *ConstrainRegClass = + MRI.constrainRegClass(R, MRI.getRegClass(*InitReg)); + assert(ConstrainRegClass && "Expected a valid constrained register class!"); + (void)ConstrainRegClass; + } BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R) - .addReg(InitReg.hasValue() ? *InitReg : undef(RC)) + .addReg(InitReg ? *InitReg : undef(RC)) .addMBB(PreheaderBB) .addReg(LoopReg) .addMBB(BB); - if (!InitReg.hasValue()) + if (!InitReg) UndefPhis[LoopReg] = R; else Phis[{LoopReg, *InitReg}] = R; @@ -1793,10 +1817,10 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() { // Iterate in reverse order over all instructions, remapping as we go. for (MachineBasicBlock *B : reverse(Blocks)) { - for (auto I = B->getFirstInstrTerminator()->getReverseIterator(); + for (auto I = B->instr_rbegin(); I != std::next(B->getFirstNonPHI()->getReverseIterator());) { - MachineInstr *MI = &*I++; - rewriteUsesOf(MI); + MachineBasicBlock::reverse_instr_iterator MI = I++; + rewriteUsesOf(&*MI); } } for (auto *MI : IllegalPhisToDelete) { @@ -1919,7 +1943,7 @@ void PeelingModuloScheduleExpander::fixupBranches() { TII->removeBranch(*Prolog); Optional StaticallyGreater = LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond); - if (!StaticallyGreater.hasValue()) { + if (!StaticallyGreater) { LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n"); // Dynamically branch based on Cond. TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc()); @@ -1929,8 +1953,8 @@ void PeelingModuloScheduleExpander::fixupBranches() { // blocks. Leave it to unreachable-block-elim to clean up. Prolog->removeSuccessor(Fallthrough); for (MachineInstr &P : Fallthrough->phis()) { - P.RemoveOperand(2); - P.RemoveOperand(1); + P.removeOperand(2); + P.removeOperand(1); } TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc()); KernelDisposed = true; @@ -1939,8 +1963,8 @@ void PeelingModuloScheduleExpander::fixupBranches() { // Prolog always falls through; remove incoming values in epilog. Prolog->removeSuccessor(Epilog); for (MachineInstr &P : Epilog->phis()) { - P.RemoveOperand(4); - P.RemoveOperand(3); + P.removeOperand(4); + P.removeOperand(3); } } } diff --git a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp index db5217469fba..7304bfef55cb 100644 --- a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp +++ b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp @@ -25,7 +25,7 @@ DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) { Entry.Symbol = nullptr; CurrentEndOffset += S.size() + 1; } - return DwarfStringPoolEntryRef(*I.first, true); + return DwarfStringPoolEntryRef(*I.first); } StringRef NonRelocatableStringpool::internString(StringRef S) { @@ -44,7 +44,7 @@ NonRelocatableStringpool::getEntriesForEmission() const { Result.reserve(Strings.size()); for (const auto &E : Strings) if (E.getValue().isIndexed()) - Result.emplace_back(E, true); + Result.emplace_back(E); llvm::sort(Result, [](const DwarfStringPoolEntryRef A, const DwarfStringPoolEntryRef B) { return A.getIndex() < B.getIndex(); diff --git a/llvm/lib/CodeGen/OptimizePHIs.cpp b/llvm/lib/CodeGen/OptimizePHIs.cpp index 8a6cf47c0d89..d5d262e4047a 100644 --- a/llvm/lib/CodeGen/OptimizePHIs.cpp +++ b/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp index 7693ab417de9..7709095cd683 100644 --- a/llvm/lib/CodeGen/PHIElimination.cpp +++ b/llvm/lib/CodeGen/PHIElimination.cpp @@ -31,9 +31,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Pass.h" diff --git a/llvm/lib/CodeGen/ParallelCG.cpp b/llvm/lib/CodeGen/ParallelCG.cpp index 3e32afaafa6e..43b23368ead2 100644 --- a/llvm/lib/CodeGen/ParallelCG.cpp +++ b/llvm/lib/CodeGen/ParallelCG.cpp @@ -16,8 +16,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" -#include "llvm/Support/ErrorOr.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/SplitModule.h" diff --git a/llvm/lib/CodeGen/PatchableFunction.cpp b/llvm/lib/CodeGen/PatchableFunction.cpp index ca44b7a53982..0f9da0637ced 100644 --- a/llvm/lib/CodeGen/PatchableFunction.cpp +++ b/llvm/lib/CodeGen/PatchableFunction.cpp @@ -14,11 +14,11 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index f9b16d2630d6..31e37c4cd7e3 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -90,7 +90,6 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -214,8 +213,9 @@ namespace { const SmallSet &TargetReg, RecurrenceCycle &RC); - /// If copy instruction \p MI is a virtual register copy, track it in - /// the set \p CopyMIs. If this virtual register was previously seen as a + /// If copy instruction \p MI is a virtual register copy or a copy of a + /// constant physical register to a virtual register, track it in the + /// set \p CopyMIs. If this virtual register was previously seen as a /// copy, replace the uses of this copy with the previously seen copy's /// destination register. bool foldRedundantCopy(MachineInstr &MI, @@ -810,7 +810,7 @@ protected: unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten. public: Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {} - virtual ~Rewriter() {} + virtual ~Rewriter() = default; /// Get the next rewritable source (SrcReg, SrcSubReg) and /// the related value that it affects (DstReg, DstSubReg). @@ -1022,7 +1022,7 @@ public: CurrentSrcIdx = -1; // Rewrite the operation as a COPY. // Get rid of the sub-register index. - CopyLike.RemoveOperand(2); + CopyLike.removeOperand(2); // Morph the operation into a COPY. CopyLike.setDesc(TII.get(TargetOpcode::COPY)); return true; @@ -1412,7 +1412,7 @@ bool PeepholeOptimizer::foldRedundantCopy( Register SrcReg = MI.getOperand(1).getReg(); unsigned SrcSubReg = MI.getOperand(1).getSubReg(); - if (!SrcReg.isVirtual()) + if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg)) return false; Register DstReg = MI.getOperand(0).getReg(); @@ -1643,8 +1643,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // without any intervening re-definition of $physreg. DenseMap NAPhysToVirtMIs; - // Set of pairs of virtual registers and their subregs that are copied - // from. + // Set of copies to virtual registers keyed by source register. Never + // holds any physreg which requires def tracking. DenseMap CopySrcMIs; bool IsLoopHeader = MLI->isLoopHeader(&MBB); diff --git a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp index 82ed386db827..97b1532300b1 100644 --- a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -28,14 +28,11 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Pass.h" using namespace llvm; #define DEBUG_TYPE "post-RA-hazard-rec" @@ -72,10 +69,11 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) { TII->CreateTargetPostRAHazardRecognizer(Fn)); // Return if the target has not implemented a hazard recognizer. - if (!HazardRec.get()) + if (!HazardRec) return false; // Loop over all of the basic blocks + bool Changed = false; for (auto &MBB : Fn) { // We do not call HazardRec->reset() here to make sure we are handling noop // hazards at the start of basic blocks. @@ -85,6 +83,8 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) { HazardRec->EmitNoops(NumPreNoops); TII->insertNoops(MBB, MachineBasicBlock::iterator(MI), NumPreNoops); NumNoops += NumPreNoops; + if (NumPreNoops) + Changed = true; HazardRec->EmitInstruction(&MI); if (HazardRec->atIssueLimit()) { @@ -92,5 +92,5 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) { } } } - return true; + return Changed; } diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp index aac46cb22084..98fc7e07a1b4 100644 --- a/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -25,18 +25,16 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -72,7 +70,7 @@ DebugMod("postra-sched-debugmod", cl::desc("Debug control MBBs that are scheduled"), cl::init(0), cl::Hidden); -AntiDepBreaker::~AntiDepBreaker() { } +AntiDepBreaker::~AntiDepBreaker() = default; namespace { class PostRAScheduler : public MachineFunctionPass { diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 74b903f99284..1115c2a27956 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -18,10 +18,8 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/IR/User.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index d232ca3a69c3..7327f9e52efc 100644 --- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -11,10 +11,11 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -45,6 +46,11 @@ public: void getAnalysisUsage(AnalysisUsage &au) const override; bool runOnMachineFunction(MachineFunction &MF) override; + + virtual MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } }; } // end anonymous namespace @@ -124,7 +130,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { // Using instr wasn't found, it could be in another block. // Leave the physreg IMPLICIT_DEF, but trim any extra operands. for (unsigned i = MI->getNumOperands() - 1; i; --i) - MI->RemoveOperand(i); + MI->removeOperand(i); LLVM_DEBUG(dbgs() << "Keeping physreg: " << *MI); } @@ -140,7 +146,6 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); - assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form."); assert(WorkList.empty() && "Inconsistent worklist state"); for (MachineBasicBlock &MBB : MF) { diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 8d8a6126dad0..1a0f296d5fdc 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -55,10 +55,8 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -130,6 +128,7 @@ private: void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, int &SPAdj); void insertPrologEpilogCode(MachineFunction &MF); + void insertZeroCallUsedRegs(MachineFunction &MF); }; } // end anonymous namespace @@ -284,6 +283,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { assert(!Failed && "Invalid warn-stack-size fn attr value"); (void)Failed; } + if (MF.getFunction().hasFnAttribute(Attribute::SafeStack)) { + StackSize += MFI.getUnsafeStackSize(); + } if (StackSize > Threshold) { DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning); F.getContext().diagnose(DiagStackSize); @@ -837,8 +839,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // Adjust 'Offset' to point to the end of last fixed sized preallocated // object. for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) { - if (MFI.getStackID(i) != - TargetStackID::Default) // Only allocate objects on the default stack. + // Only allocate objects on the default stack. + if (MFI.getStackID(i) != TargetStackID::Default) continue; int64_t FixedOff; @@ -855,47 +857,34 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { if (FixedOff > Offset) Offset = FixedOff; } + Align MaxAlign = MFI.getMaxAlign(); // First assign frame offsets to stack objects that are used to spill // callee saved registers. - if (StackGrowsDown && MaxCSFrameIndex >= MinCSFrameIndex) { - for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { - if (MFI.getStackID(i) != - TargetStackID::Default) // Only allocate objects on the default stack. - continue; + if (MaxCSFrameIndex >= MinCSFrameIndex) { + for (unsigned i = 0; i <= MaxCSFrameIndex - MinCSFrameIndex; ++i) { + unsigned FrameIndex = + StackGrowsDown ? MinCSFrameIndex + i : MaxCSFrameIndex - i; - // If the stack grows down, we need to add the size to find the lowest - // address of the object. - Offset += MFI.getObjectSize(i); - - // Adjust to alignment boundary - Offset = alignTo(Offset, MFI.getObjectAlign(i), Skew); - - LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n"); - MFI.setObjectOffset(i, -Offset); // Set the computed offset - } - } else if (MaxCSFrameIndex >= MinCSFrameIndex) { - // Be careful about underflow in comparisons agains MinCSFrameIndex. - for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) { - if (MFI.getStackID(i) != - TargetStackID::Default) // Only allocate objects on the default stack. + // Only allocate objects on the default stack. + if (MFI.getStackID(FrameIndex) != TargetStackID::Default) continue; - if (MFI.isDeadObjectIndex(i)) + // TODO: should this just be if (MFI.isDeadObjectIndex(FrameIndex)) + if (!StackGrowsDown && MFI.isDeadObjectIndex(FrameIndex)) continue; - // Adjust to alignment boundary - Offset = alignTo(Offset, MFI.getObjectAlign(i), Skew); - - LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n"); - MFI.setObjectOffset(i, Offset); - Offset += MFI.getObjectSize(i); + AdjustStackOffset(MFI, FrameIndex, StackGrowsDown, Offset, MaxAlign, + Skew); } } + assert(MaxAlign == MFI.getMaxAlign() && + "MFI.getMaxAlign should already account for all callee-saved " + "registers without a fixed stack slot"); + // FixedCSEnd is the stack offset to the end of the fixed and callee-save // stack area. int64_t FixedCSEnd = Offset; - Align MaxAlign = MFI.getMaxAlign(); // Make sure the special register scavenging spill slot is closest to the // incoming stack pointer if a frame pointer is required and is closer @@ -982,8 +971,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { continue; if (StackProtectorFI == (int)i || EHRegNodeFrameIndex == (int)i) continue; - if (MFI.getStackID(i) != - TargetStackID::Default) // Only allocate objects on the default stack. + // Only allocate objects on the default stack. + if (MFI.getStackID(i) != TargetStackID::Default) continue; switch (MFI.getObjectSSPLayout(i)) { @@ -1036,8 +1025,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { continue; if (ProtectedObjs.count(i)) continue; - if (MFI.getStackID(i) != - TargetStackID::Default) // Only allocate objects on the default stack. + // Only allocate objects on the default stack. + if (MFI.getStackID(i) != TargetStackID::Default) continue; // Add the objects that we need to allocate to our working set. @@ -1145,6 +1134,9 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) { for (MachineBasicBlock *RestoreBlock : RestoreBlocks) TFI.emitEpilogue(MF, *RestoreBlock); + // Zero call used registers before restoring callee-saved registers. + insertZeroCallUsedRegs(MF); + for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.inlineStackProbe(MF, *SaveBlock); @@ -1155,11 +1147,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) { if (MF.shouldSplitStack()) { for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.adjustForSegmentedStacks(MF, *SaveBlock); - // Record that there are split-stack functions, so we will emit a - // special section to tell the linker. - MF.getMMI().setHasSplitStack(true); - } else - MF.getMMI().setHasNosplitStack(true); + } // Emit additional code that is required to explicitly handle the stack in // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The @@ -1171,6 +1159,120 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) { TFI.adjustForHiPEPrologue(MF, *SaveBlock); } +/// insertZeroCallUsedRegs - Zero out call used registers. +void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { + const Function &F = MF.getFunction(); + + if (!F.hasFnAttribute("zero-call-used-regs")) + return; + + using namespace ZeroCallUsedRegs; + + ZeroCallUsedRegsKind ZeroRegsKind = + StringSwitch( + F.getFnAttribute("zero-call-used-regs").getValueAsString()) + .Case("skip", ZeroCallUsedRegsKind::Skip) + .Case("used-gpr-arg", ZeroCallUsedRegsKind::UsedGPRArg) + .Case("used-gpr", ZeroCallUsedRegsKind::UsedGPR) + .Case("used-arg", ZeroCallUsedRegsKind::UsedArg) + .Case("used", ZeroCallUsedRegsKind::Used) + .Case("all-gpr-arg", ZeroCallUsedRegsKind::AllGPRArg) + .Case("all-gpr", ZeroCallUsedRegsKind::AllGPR) + .Case("all-arg", ZeroCallUsedRegsKind::AllArg) + .Case("all", ZeroCallUsedRegsKind::All); + + if (ZeroRegsKind == ZeroCallUsedRegsKind::Skip) + return; + + const bool OnlyGPR = static_cast(ZeroRegsKind) & ONLY_GPR; + const bool OnlyUsed = static_cast(ZeroRegsKind) & ONLY_USED; + const bool OnlyArg = static_cast(ZeroRegsKind) & ONLY_ARG; + + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const BitVector AllocatableSet(TRI.getAllocatableSet(MF)); + + // Mark all used registers. + BitVector UsedRegs(TRI.getNumRegs()); + if (OnlyUsed) + for (const MachineBasicBlock &MBB : MF) + for (const MachineInstr &MI : MBB) + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + + MCRegister Reg = MO.getReg(); + if (AllocatableSet[Reg] && !MO.isImplicit() && + (MO.isDef() || MO.isUse())) + UsedRegs.set(Reg); + } + + BitVector RegsToZero(TRI.getNumRegs()); + for (MCRegister Reg : AllocatableSet.set_bits()) { + // Skip over fixed registers. + if (TRI.isFixedRegister(MF, Reg)) + continue; + + // Want only general purpose registers. + if (OnlyGPR && !TRI.isGeneralPurposeRegister(MF, Reg)) + continue; + + // Want only used registers. + if (OnlyUsed && !UsedRegs[Reg]) + continue; + + // Want only registers used for arguments. + if (OnlyArg && !TRI.isArgumentRegister(MF, Reg)) + continue; + + RegsToZero.set(Reg); + } + + // Don't clear registers that are live when leaving the function. + for (const MachineBasicBlock &MBB : MF) + for (const MachineInstr &MI : MBB.terminators()) { + if (!MI.isReturn()) + continue; + + for (const auto &MO : MI.operands()) { + if (!MO.isReg()) + continue; + + for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(MO.getReg())) + RegsToZero.reset(SReg); + } + } + + // Don't need to clear registers that are used/clobbered by terminating + // instructions. + for (const MachineBasicBlock &MBB : MF) { + if (!MBB.isReturnBlock()) + continue; + + MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator(); + for (MachineBasicBlock::const_iterator I = MBBI, E = MBB.end(); I != E; + ++I) { + for (const MachineOperand &MO : I->operands()) { + if (!MO.isReg()) + continue; + + for (const MCPhysReg &Reg : + TRI.sub_and_superregs_inclusive(MO.getReg())) + RegsToZero.reset(Reg); + } + } + } + + // Don't clear registers that are reset before exiting. + for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo()) + for (MCRegister Reg : TRI.sub_and_superregs_inclusive(CSI.getReg())) + RegsToZero.reset(Reg); + + const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); + for (MachineBasicBlock &MBB : MF) + if (MBB.isReturnBlock()) + TFI.emitZeroCallUsedRegs(RegsToZero, MBB); +} + /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. void PEI::replaceFrameIndices(MachineFunction &MF) { diff --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp index 5f69f9194125..86ea3ec67178 100644 --- a/llvm/lib/CodeGen/PseudoProbeInserter.cpp +++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp @@ -18,11 +18,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/InitializePasses.h" -#include "llvm/MC/MCPseudoProbe.h" -#include "llvm/Target/TargetMachine.h" -#include #define DEBUG_TYPE "pseudo-probe-inserter" diff --git a/llvm/lib/CodeGen/PseudoSourceValue.cpp b/llvm/lib/CodeGen/PseudoSourceValue.cpp index 74e721dbd138..40c52b9d9707 100644 --- a/llvm/lib/CodeGen/PseudoSourceValue.cpp +++ b/llvm/lib/CodeGen/PseudoSourceValue.cpp @@ -11,26 +11,23 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + using namespace llvm; static const char *const PSVNames[] = { "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack", "GlobalValueCallEntry", "ExternalSymbolCallEntry"}; -PseudoSourceValue::PseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII) +PseudoSourceValue::PseudoSourceValue(unsigned Kind, const TargetMachine &TM) : Kind(Kind) { - AddressSpace = TII.getAddressSpaceForPseudoSourceKind(Kind); + AddressSpace = TM.getAddressSpaceForPseudoSourceKind(Kind); } - -PseudoSourceValue::~PseudoSourceValue() {} +PseudoSourceValue::~PseudoSourceValue() = default; void PseudoSourceValue::printCustom(raw_ostream &O) const { if (Kind < TargetCustom) @@ -79,9 +76,9 @@ void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const { OS << "FixedStack" << FI; } -CallEntryPseudoSourceValue::CallEntryPseudoSourceValue( - unsigned Kind, const TargetInstrInfo &TII) - : PseudoSourceValue(Kind, TII) {} +CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(unsigned Kind, + const TargetMachine &TM) + : PseudoSourceValue(Kind, TM) {} bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const { return false; @@ -96,20 +93,17 @@ bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const { } GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue( - const GlobalValue *GV, - const TargetInstrInfo &TII) - : CallEntryPseudoSourceValue(GlobalValueCallEntry, TII), GV(GV) {} + const GlobalValue *GV, const TargetMachine &TM) + : CallEntryPseudoSourceValue(GlobalValueCallEntry, TM), GV(GV) {} ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue( - const char *ES, const TargetInstrInfo &TII) - : CallEntryPseudoSourceValue(ExternalSymbolCallEntry, TII), ES(ES) {} + const char *ES, const TargetMachine &TM) + : CallEntryPseudoSourceValue(ExternalSymbolCallEntry, TM), ES(ES) {} -PseudoSourceValueManager::PseudoSourceValueManager( - const TargetInstrInfo &TIInfo) - : TII(TIInfo), - StackPSV(PseudoSourceValue::Stack, TII), - GOTPSV(PseudoSourceValue::GOT, TII), - JumpTablePSV(PseudoSourceValue::JumpTable, TII), - ConstantPoolPSV(PseudoSourceValue::ConstantPool, TII) {} +PseudoSourceValueManager::PseudoSourceValueManager(const TargetMachine &TMInfo) + : TM(TMInfo), StackPSV(PseudoSourceValue::Stack, TM), + GOTPSV(PseudoSourceValue::GOT, TM), + JumpTablePSV(PseudoSourceValue::JumpTable, TM), + ConstantPoolPSV(PseudoSourceValue::ConstantPool, TM) {} const PseudoSourceValue *PseudoSourceValueManager::getStack() { return &StackPSV; @@ -129,7 +123,7 @@ const PseudoSourceValue * PseudoSourceValueManager::getFixedStack(int FI) { std::unique_ptr &V = FSValues[FI]; if (!V) - V = std::make_unique(FI, TII); + V = std::make_unique(FI, TM); return V.get(); } @@ -138,7 +132,7 @@ PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) { std::unique_ptr &E = GlobalCallEntries[GV]; if (!E) - E = std::make_unique(GV, TII); + E = std::make_unique(GV, TM); return E.get(); } @@ -147,6 +141,6 @@ PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) { std::unique_ptr &E = ExternalCallEntries[ES]; if (!E) - E = std::make_unique(ES, TII); + E = std::make_unique(ES, TM); return E.get(); } diff --git a/llvm/lib/CodeGen/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp index 882f8e91bf1d..ec383b9b1c65 100644 --- a/llvm/lib/CodeGen/RDFGraph.cpp +++ b/llvm/lib/CodeGen/RDFGraph.cpp @@ -8,6 +8,7 @@ // // Target-independent, SSA-based data flow graph for register data flow (RDF). // +#include "llvm/CodeGen/RDFGraph.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -18,7 +19,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RDFGraph.h" #include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -27,8 +27,6 @@ #include "llvm/IR/Function.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -979,18 +977,6 @@ RegisterRef DataFlowGraph::makeRegRef(const MachineOperand &Op) const { return RegisterRef(PRI.getRegMaskId(Op.getRegMask()), LaneBitmask::getAll()); } -RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const { - if (AR.Reg == BR.Reg) { - LaneBitmask M = AR.Mask & BR.Mask; - return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef(); - } - // This isn't strictly correct, because the overlap may happen in the - // part masked out. - if (PRI.alias(AR, BR)) - return AR; - return RegisterRef(); -} - // For each stack in the map DefM, push the delimiter for block B on it. void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) { // Push block delimiters. diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp index d704cf7b3213..2fd947086b4d 100644 --- a/llvm/lib/CodeGen/RDFLiveness.cpp +++ b/llvm/lib/CodeGen/RDFLiveness.cpp @@ -22,6 +22,7 @@ // and Embedded Architectures and Compilers", 8 (4), // <10.1145/2086696.2086706>. // +#include "llvm/CodeGen/RDFLiveness.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -32,14 +33,12 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/RDFLiveness.h" #include "llvm/CodeGen/RDFGraph.h" #include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -341,9 +340,8 @@ Liveness::getAllReachingDefsRecImpl(RegisterRef RefRR, NodeAddr RefA, if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef)) continue; NodeAddr PA = DA.Addr->getOwner(DFG); - if (Visited.count(PA.Id)) + if (!Visited.insert(PA.Id).second) continue; - Visited.insert(PA.Id); // Go over all phi uses and get the reaching defs for each use. for (auto U : PA.Addr->members_if(DFG.IsRef, DFG)) { const auto &T = getAllReachingDefsRecImpl(RefRR, U, Visited, TmpDefs, diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 1264e6021b6e..69db8bad54f9 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -34,12 +34,7 @@ static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg, const TargetRegisterInfo *TRI) { if (!isValidRegUse(MO)) return false; - if (MO.getReg() == PhysReg) - return true; - for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R) - if (MO.getReg() == *R) - return true; - return false; + return TRI->regsOverlap(MO.getReg(), PhysReg); } static bool isValidRegDef(const MachineOperand &MO) { @@ -50,12 +45,7 @@ static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg, const TargetRegisterInfo *TRI) { if (!isValidRegDef(MO)) return false; - if (MO.getReg() == PhysReg) - return true; - for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R) - if (MO.getReg() == *R) - return true; - return false; + return TRI->regsOverlap(MO.getReg(), PhysReg); } void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) { diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index d891d4c2ffbb..0c18814189eb 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -85,7 +85,7 @@ void RegAllocBase::allocatePhysRegs() { seedLiveRegs(); // Continue assigning vregs one at a time to available physical registers. - while (LiveInterval *VirtReg = dequeue()) { + while (const LiveInterval *VirtReg = dequeue()) { assert(!VRM->hasPhys(VirtReg->reg()) && "Register already assigned"); // Unused registers can appear when the spiller coalesces snippets. @@ -140,10 +140,7 @@ void RegAllocBase::allocatePhysRegs() { // Keep going after reporting the error. VRM->assignVirt2Phys(VirtReg->reg(), AllocOrder.front()); - continue; - } - - if (AvailablePhysReg) + } else if (AvailablePhysReg) Matrix->assign(*VirtReg, AvailablePhysReg); for (Register Reg : SplitVRegs) { @@ -176,7 +173,7 @@ void RegAllocBase::postOptimization() { DeadRemats.clear(); } -void RegAllocBase::enqueue(LiveInterval *LI) { +void RegAllocBase::enqueue(const LiveInterval *LI) { const Register Reg = LI->reg(); assert(Reg.isVirtual() && "Can only enqueue virtual registers"); diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h index 1fb56dbaebb7..a8bf305a50c9 100644 --- a/llvm/lib/CodeGen/RegAllocBase.h +++ b/llvm/lib/CodeGen/RegAllocBase.h @@ -96,19 +96,19 @@ protected: virtual Spiller &spiller() = 0; /// enqueue - Add VirtReg to the priority queue of unassigned registers. - virtual void enqueueImpl(LiveInterval *LI) = 0; + virtual void enqueueImpl(const LiveInterval *LI) = 0; /// enqueue - Add VirtReg to the priority queue of unassigned registers. - void enqueue(LiveInterval *LI); + void enqueue(const LiveInterval *LI); /// dequeue - Return the next unassigned register, or NULL. - virtual LiveInterval *dequeue() = 0; + virtual const LiveInterval *dequeue() = 0; // A RegAlloc pass should override this to provide the allocation heuristics. // Each call must guarantee forward progess by returning an available PhysReg // or new set of split live virtual registers. It is up to the splitter to // converge quickly toward fully spilled live ranges. - virtual MCRegister selectOrSplit(LiveInterval &VirtReg, + virtual MCRegister selectOrSplit(const LiveInterval &VirtReg, SmallVectorImpl &splitLVRs) = 0; // Use this group name for NamedRegionTimer. @@ -116,7 +116,7 @@ protected: static const char TimerGroupDescription[]; /// Method called when the allocator is about to remove a LiveInterval. - virtual void aboutToRemoveInterval(LiveInterval &LI) {} + virtual void aboutToRemoveInterval(const LiveInterval &LI) {} public: /// VerifyEnabled - True when -verify-regalloc is given. diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index a9816b13e798..7defdf04aec8 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -22,9 +22,7 @@ #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/Spiller.h" @@ -33,7 +31,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include #include using namespace llvm; @@ -45,7 +42,7 @@ static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator", namespace { struct CompSpillWeight { - bool operator()(LiveInterval *A, LiveInterval *B) const { + bool operator()(const LiveInterval *A, const LiveInterval *B) const { return A->weight() < B->weight(); } }; @@ -65,8 +62,9 @@ class RABasic : public MachineFunctionPass, // state std::unique_ptr SpillerInstance; - std::priority_queue, - CompSpillWeight> Queue; + std::priority_queue, + CompSpillWeight> + Queue; // Scratch space. Allocated here to avoid repeated malloc calls in // selectOrSplit(). @@ -88,19 +86,17 @@ public: Spiller &spiller() override { return *SpillerInstance; } - void enqueueImpl(LiveInterval *LI) override { - Queue.push(LI); - } + void enqueueImpl(const LiveInterval *LI) override { Queue.push(LI); } - LiveInterval *dequeue() override { + const LiveInterval *dequeue() override { if (Queue.empty()) return nullptr; - LiveInterval *LI = Queue.top(); + const LiveInterval *LI = Queue.top(); Queue.pop(); return LI; } - MCRegister selectOrSplit(LiveInterval &VirtReg, + MCRegister selectOrSplit(const LiveInterval &VirtReg, SmallVectorImpl &SplitVRegs) override; /// Perform register allocation. @@ -119,7 +115,7 @@ public: // Helper for spilling all live virtual registers currently unified under preg // that interfere with the most recently queried lvr. Return true if spilling // was successful, and append any new spilled/split intervals to splitLVRs. - bool spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg, + bool spillInterferences(const LiveInterval &VirtReg, MCRegister PhysReg, SmallVectorImpl &SplitVRegs); static char ID; @@ -208,16 +204,17 @@ void RABasic::releaseMemory() { // Spill or split all live virtual registers currently unified under PhysReg // that interfere with VirtReg. The newly spilled or split live intervals are // returned by appending them to SplitVRegs. -bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg, +bool RABasic::spillInterferences(const LiveInterval &VirtReg, + MCRegister PhysReg, SmallVectorImpl &SplitVRegs) { // Record each interference and determine if all are spillable before mutating // either the union or live intervals. - SmallVector Intfs; + SmallVector Intfs; // Collect interferences assigned to any alias of the physical register. for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); - for (auto *Intf : reverse(Q.interferingVRegs())) { + for (const auto *Intf : reverse(Q.interferingVRegs())) { if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight()) return false; Intfs.push_back(Intf); @@ -229,7 +226,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg, // Spill each interfering vreg allocated to PhysReg or an alias. for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { - LiveInterval &Spill = *Intfs[i]; + const LiveInterval &Spill = *Intfs[i]; // Skip duplicates. if (!VRM->hasPhys(Spill.reg())) @@ -258,7 +255,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg, // |vregs| * |machineregs|. And since the number of interference tests is // minimal, there is no value in caching them outside the scope of // selectOrSplit(). -MCRegister RABasic::selectOrSplit(LiveInterval &VirtReg, +MCRegister RABasic::selectOrSplit(const LiveInterval &VirtReg, SmallVectorImpl &SplitVRegs) { // Populate a list of physical register spill candidates. SmallVector PhysRegSpillCands; diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp index fc5d1104a999..ee03feda796f 100644 --- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp +++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp @@ -11,13 +11,14 @@ //===----------------------------------------------------------------------===// #include "RegAllocEvictionAdvisor.h" +#include "AllocationOrder.h" #include "RegAllocGreedy.h" +#include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" @@ -25,7 +26,7 @@ using namespace llvm; static cl::opt Mode( - "regalloc-enable-advisor", cl::Hidden, cl::ZeroOrMore, + "regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values( @@ -42,6 +43,14 @@ static cl::opt EnableLocalReassignment( "may be compile time intensive"), cl::init(false)); +cl::opt EvictInterferenceCutoff( + "regalloc-eviction-max-interference-cutoff", cl::Hidden, + cl::desc("Number of interferences after which we declare " + "an interference unevictable and bail out. This " + "is a compilation cost-saving consideration. To " + "disable, pass a very large number."), + cl::init(10)); + #define DEBUG_TYPE "regalloc" #ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL #define LLVM_HAVE_TF_AOT @@ -66,7 +75,7 @@ public: private: std::unique_ptr - getAdvisor(MachineFunction &MF, const RAGreedy &RA) override { + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { return std::make_unique(MF, RA); } bool doInitialization(Module &M) override { @@ -113,7 +122,7 @@ StringRef RegAllocEvictionAdvisorAnalysis::getPassName() const { llvm_unreachable("Unknown advisor kind"); } -RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(MachineFunction &MF, +RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA) : MF(MF), RA(RA), Matrix(RA.getInterferenceMatrix()), LIS(RA.getLiveIntervals()), VRM(RA.getVirtRegMap()), @@ -136,8 +145,8 @@ RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(MachineFunction &MF, /// register. /// @param B The live range to be evicted. /// @param BreaksHint True when B is already assigned to its preferred register. -bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint, - LiveInterval &B, +bool DefaultEvictionAdvisor::shouldEvict(const LiveInterval &A, bool IsHint, + const LiveInterval &B, bool BreaksHint) const { bool CanSplit = RA.getExtraInfo().getStage(B) < RS_Spill; @@ -156,7 +165,7 @@ bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint, /// canEvictHintInterference - return true if the interference for VirtReg /// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg. bool DefaultEvictionAdvisor::canEvictHintInterference( - LiveInterval &VirtReg, MCRegister PhysReg, + const LiveInterval &VirtReg, MCRegister PhysReg, const SmallVirtRegSet &FixedRegisters) const { EvictionCost MaxCost; MaxCost.setBrokenHints(1); @@ -174,7 +183,7 @@ bool DefaultEvictionAdvisor::canEvictHintInterference( /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( - LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, + const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const { // It is only possible to evict virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) @@ -195,12 +204,12 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // If there is 10 or more interferences, chances are one is heavier. - const auto &Interferences = Q.interferingVRegs(10); - if (Interferences.size() >= 10) + const auto &Interferences = Q.interferingVRegs(EvictInterferenceCutoff); + if (Interferences.size() >= EvictInterferenceCutoff) return false; // Check if any interfering live range is heavier than MaxWeight. - for (LiveInterval *Intf : reverse(Interferences)) { + for (const LiveInterval *Intf : reverse(Interferences)) { assert(Register::isVirtualRegister(Intf->reg()) && "Only expecting virtual register interference from query"); @@ -227,7 +236,10 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( MRI->getRegClass(Intf->reg()))); // Only evict older cascades or live ranges without a cascade. unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg()); - if (Cascade <= IntfCascade) { + if (Cascade == IntfCascade) + return false; + + if (Cascade < IntfCascade) { if (!Urgent) return false; // We permit breaking cascades for urgent evictions. It should be the @@ -261,7 +273,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( } MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate( - LiveInterval &VirtReg, const AllocationOrder &Order, + const LiveInterval &VirtReg, const AllocationOrder &Order, uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { // Keep track of the cheapest interference seen so far. EvictionCost BestCost; diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h index 1f40386db8da..d57b0ca6d53d 100644 --- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h +++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h @@ -9,19 +9,25 @@ #ifndef LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H #define LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H -#include "AllocationOrder.h" -#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/LiveRegMatrix.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/Register.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Config/llvm-config.h" +#include "llvm/MC/MCRegister.h" #include "llvm/Pass.h" namespace llvm { +class AllocationOrder; +class LiveInterval; +class LiveIntervals; +class LiveRegMatrix; +class MachineFunction; +class MachineRegisterInfo; +class RegisterClassInfo; +class TargetRegisterInfo; +class VirtRegMap; using SmallVirtRegSet = SmallSet; @@ -99,15 +105,14 @@ public: /// Find a physical register that can be freed by evicting the FixedRegisters, /// or return NoRegister. The eviction decision is assumed to be correct (i.e. /// no fixed live ranges are evicted) and profitable. - virtual MCRegister - tryFindEvictionCandidate(LiveInterval &VirtReg, const AllocationOrder &Order, - uint8_t CostPerUseLimit, - const SmallVirtRegSet &FixedRegisters) const = 0; + virtual MCRegister tryFindEvictionCandidate( + const LiveInterval &VirtReg, const AllocationOrder &Order, + uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const = 0; /// Find out if we can evict the live ranges occupying the given PhysReg, /// which is a hint (preferred register) for VirtReg. virtual bool - canEvictHintInterference(LiveInterval &VirtReg, MCRegister PhysReg, + canEvictHintInterference(const LiveInterval &VirtReg, MCRegister PhysReg, const SmallVirtRegSet &FixedRegisters) const = 0; /// Returns true if the given \p PhysReg is a callee saved register and has @@ -115,9 +120,9 @@ public: bool isUnusedCalleeSavedReg(MCRegister PhysReg) const; protected: - RegAllocEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA); + RegAllocEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA); - Register canReassign(LiveInterval &VirtReg, Register PrevReg) const; + Register canReassign(const LiveInterval &VirtReg, Register PrevReg) const; // Get the upper limit of elements in the given Order we need to analize. // TODO: is this heuristic, we could consider learning it. @@ -173,7 +178,7 @@ public: /// Get an advisor for the given context (i.e. machine function, etc) virtual std::unique_ptr - getAdvisor(MachineFunction &MF, const RAGreedy &RA) = 0; + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0; AdvisorMode getAdvisorMode() const { return Mode; } protected: @@ -200,19 +205,20 @@ RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor(); // out of RegAllocGreedy.cpp class DefaultEvictionAdvisor : public RegAllocEvictionAdvisor { public: - DefaultEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA) + DefaultEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA) : RegAllocEvictionAdvisor(MF, RA) {} private: - MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &, - uint8_t, + MCRegister tryFindEvictionCandidate(const LiveInterval &, + const AllocationOrder &, uint8_t, const SmallVirtRegSet &) const override; - bool canEvictHintInterference(LiveInterval &, MCRegister, + bool canEvictHintInterference(const LiveInterval &, MCRegister, const SmallVirtRegSet &) const override; - bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool, + bool canEvictInterferenceBasedOnCost(const LiveInterval &, MCRegister, bool, EvictionCost &, const SmallVirtRegSet &) const; - bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const; + bool shouldEvict(const LiveInterval &A, bool, const LiveInterval &B, + bool) const; }; } // namespace llvm diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 6653145d3d2a..72ceaa768803 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -35,14 +35,9 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/IR/Metadata.h" #include "llvm/InitializePasses.h" -#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -364,7 +359,16 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) { // If this block loops back to itself, it is necessary to check whether the // use comes after the def. if (MBB->isSuccessor(MBB)) { - SelfLoopDef = MRI->getUniqueVRegDef(VirtReg); + // Find the first def in the self loop MBB. + for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) { + if (DefInst.getParent() != MBB) { + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); + return true; + } else { + if (!SelfLoopDef || dominates(*MBB, DefInst.getIterator(), SelfLoopDef)) + SelfLoopDef = &DefInst; + } + } if (!SelfLoopDef) { MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); return true; @@ -1117,6 +1121,12 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { RegMasks.clear(); BundleVirtRegsMap.clear(); + auto TiedOpIsUndef = [&](const MachineOperand &MO, unsigned Idx) { + assert(MO.isTied()); + unsigned TiedIdx = MI.findTiedOperandIdx(Idx); + const MachineOperand &TiedMO = MI.getOperand(TiedIdx); + return TiedMO.isUndef(); + }; // Scan for special cases; Apply pre-assigned register defs to state. bool HasPhysRegUse = false; bool HasRegMask = false; @@ -1124,7 +1134,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { bool HasDef = false; bool HasEarlyClobber = false; bool NeedToAssignLiveThroughs = false; - for (MachineOperand &MO : MI.operands()) { + for (unsigned I = 0; I < MI.getNumOperands(); ++I) { + MachineOperand &MO = MI.getOperand(I); if (MO.isReg()) { Register Reg = MO.getReg(); if (Reg.isVirtual()) { @@ -1135,7 +1146,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { HasEarlyClobber = true; NeedToAssignLiveThroughs = true; } - if (MO.isTied() || (MO.getSubReg() != 0 && !MO.isUndef())) + if ((MO.isTied() && !TiedOpIsUndef(MO, I)) || + (MO.getSubReg() != 0 && !MO.isUndef())) NeedToAssignLiveThroughs = true; } } else if (Reg.isPhysical()) { @@ -1235,7 +1247,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { MachineOperand &MO = MI.getOperand(OpIdx); LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n'); unsigned Reg = MO.getReg(); - if (MO.isEarlyClobber() || MO.isTied() || + if (MO.isEarlyClobber() || + (MO.isTied() && !TiedOpIsUndef(MO, OpIdx)) || (MO.getSubReg() && !MO.isUndef())) { defineLiveThroughVirtReg(MI, OpIdx, Reg); } else { @@ -1258,7 +1271,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { // Free registers occupied by defs. // Iterate operands in reverse order, so we see the implicit super register // defs first (we added them earlier in case of ). - for (MachineOperand &MO : llvm::reverse(MI.operands())) { + for (signed I = MI.getNumOperands() - 1; I >= 0; --I) { + MachineOperand &MO = MI.getOperand(I); if (!MO.isReg() || !MO.isDef()) continue; @@ -1273,7 +1287,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { "tied def assigned to clobbered register"); // Do not free tied operands and early clobbers. - if (MO.isTied() || MO.isEarlyClobber()) + if ((MO.isTied() && !TiedOpIsUndef(MO, I)) || MO.isEarlyClobber()) continue; Register Reg = MO.getReg(); if (!Reg) diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 7870574df5b2..2efb98ae200d 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -21,9 +21,7 @@ #include "SplitKit.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -62,6 +60,7 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" @@ -71,13 +70,9 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include #include #include -#include -#include -#include #include using namespace llvm; @@ -127,11 +122,18 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost", cl::desc("Cost for first time use of callee-saved register."), cl::init(0), cl::Hidden); -static cl::opt ConsiderLocalIntervalCost( - "consider-local-interval-cost", cl::Hidden, - cl::desc("Consider the cost of local intervals created by a split " - "candidate when choosing the best split candidate."), - cl::init(false)); +static cl::opt GrowRegionComplexityBudget( + "grow-region-complexity-budget", + cl::desc("growRegion() does not scale with the number of BB edges, so " + "limit its budget and bail out once we reach the limit."), + cl::init(10000), cl::Hidden); + +static cl::opt GreedyRegClassPriorityTrumpsGlobalness( + "greedy-regclass-priority-trumps-globalness", + cl::desc("Change the greedy register allocator's live range priority " + "calculation to make the AllocationPriority of the register class " + "more important then whether the range is global"), + cl::Hidden); static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -277,9 +279,9 @@ void RAGreedy::releaseMemory() { GlobalCand.clear(); } -void RAGreedy::enqueueImpl(LiveInterval *LI) { enqueue(Queue, LI); } +void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); } -void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { +void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) { // Prioritize live ranges by size, assigning larger ranges first. // The queue holds (size, reg) pairs. const unsigned Size = LI->getSize(); @@ -308,8 +310,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // prevents excessive spilling in pathological cases. bool ReverseLocal = TRI->reverseLocalAssignment(); const TargetRegisterClass &RC = *MRI->getRegClass(Reg); - bool ForceGlobal = !ReverseLocal && - (Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC)); + bool ForceGlobal = + !ReverseLocal && (Size / SlotIndex::InstrDist) > + (2 * RegClassInfo.getNumAllocatableRegs(&RC)); + unsigned GlobalBit = 0; if (Stage == RS_Assign && !ForceGlobal && !LI->empty() && LIS->intervalIsInOneMBB(*LI)) { @@ -324,15 +328,18 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // large blocks on targets with many physical registers. Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex()); } - Prio |= RC.AllocationPriority << 24; } else { // Allocate global and split ranges in long->short order. Long ranges that // don't fit should be spilled (or split) ASAP so they don't create // interference. Mark a bit to prioritize global above local ranges. - Prio = (1u << 29) + Size; - - Prio |= RC.AllocationPriority << 24; + Prio = Size; + GlobalBit = 1; } + if (RegClassPriorityTrumpsGlobalness) + Prio |= RC.AllocationPriority << 25 | GlobalBit << 24; + else + Prio |= GlobalBit << 29 | RC.AllocationPriority << 24; + // Mark a higher bit to prioritize global and local above RS_Split. Prio |= (1u << 31); @@ -345,9 +352,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { CurQueue.push(std::make_pair(Prio, ~Reg)); } -LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } +const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } -LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { +const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { if (CurQueue.empty()) return nullptr; LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); @@ -360,10 +367,10 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { //===----------------------------------------------------------------------===// /// tryAssign - Try to assign VirtReg to an available register. -MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg, - AllocationOrder &Order, - SmallVectorImpl &NewVRegs, - const SmallVirtRegSet &FixedRegisters) { +MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl &NewVRegs, + const SmallVirtRegSet &FixedRegisters) { MCRegister PhysReg; for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) { assert(*I); @@ -413,7 +420,7 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg, // Interference eviction //===----------------------------------------------------------------------===// -Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg, +Register RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg, Register PrevReg) const { auto Order = AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); @@ -440,94 +447,11 @@ Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg, return PhysReg; } -/// Return true if all interferences between VirtReg and PhysReg between -/// Start and End can be evicted. -/// -/// \param VirtReg Live range that is about to be assigned. -/// \param PhysReg Desired register for assignment. -/// \param Start Start of range to look for interferences. -/// \param End End of range to look for interferences. -/// \param MaxCost Only look for cheaper candidates and update with new cost -/// when returning true. -/// \return True when interference can be evicted cheaper than MaxCost. -bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg, - MCRegister PhysReg, SlotIndex Start, - SlotIndex End, - EvictionCost &MaxCost) const { - EvictionCost Cost; - - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { - LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); - - // Check if any interfering live range is heavier than MaxWeight. - for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) { - // Check if interference overlast the segment in interest. - if (!Intf->overlaps(Start, End)) - continue; - - // Cannot evict non virtual reg interference. - if (!Register::isVirtualRegister(Intf->reg())) - return false; - // Never evict spill products. They cannot split or spill. - if (ExtraInfo->getStage(*Intf) == RS_Done) - return false; - - // Would this break a satisfied hint? - bool BreaksHint = VRM->hasPreferredPhys(Intf->reg()); - // Update eviction cost. - Cost.BrokenHints += BreaksHint; - Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight()); - // Abort if this would be too expensive. - if (!(Cost < MaxCost)) - return false; - } - } - - if (Cost.MaxWeight == 0) - return false; - - MaxCost = Cost; - return true; -} - -/// Return the physical register that will be best -/// candidate for eviction by a local split interval that will be created -/// between Start and End. -/// -/// \param Order The allocation order -/// \param VirtReg Live range that is about to be assigned. -/// \param Start Start of range to look for interferences -/// \param End End of range to look for interferences -/// \param BestEvictweight The eviction cost of that eviction -/// \return The PhysReg which is the best candidate for eviction and the -/// eviction cost in BestEvictweight -MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order, - const LiveInterval &VirtReg, - SlotIndex Start, SlotIndex End, - float *BestEvictweight) const { - EvictionCost BestEvictCost; - BestEvictCost.setMax(); - BestEvictCost.MaxWeight = VirtReg.weight(); - MCRegister BestEvicteePhys; - - // Go over all physical registers and find the best candidate for eviction - for (MCRegister PhysReg : Order.getOrder()) { - - if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End, - BestEvictCost)) - continue; - - // Best so far. - BestEvicteePhys = PhysReg; - } - *BestEvictweight = BestEvictCost.MaxWeight; - return BestEvicteePhys; -} - /// evictInterference - Evict any interferring registers that prevent VirtReg /// from being assigned to Physreg. This assumes that canEvictInterference /// returned true. -void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg, +void RAGreedy::evictInterference(const LiveInterval &VirtReg, + MCRegister PhysReg, SmallVectorImpl &NewVRegs) { // Make sure that VirtReg has a cascade number, and assign that cascade // number to every evicted register. These live ranges than then only be @@ -538,25 +462,23 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg, << " interference: Cascade " << Cascade << '\n'); // Collect all interfering virtregs first. - SmallVector Intfs; + SmallVector Intfs; for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // We usually have the interfering VRegs cached so collectInterferingVRegs() // should be fast, we may need to recalculate if when different physregs // overlap the same register unit so we had different SubRanges queried // against it. - ArrayRef IVR = Q.interferingVRegs(); + ArrayRef IVR = Q.interferingVRegs(); Intfs.append(IVR.begin(), IVR.end()); } // Evict them second. This will invalidate the queries. - for (LiveInterval *Intf : Intfs) { + for (const LiveInterval *Intf : Intfs) { // The same VirtReg may be present in multiple RegUnits. Skip duplicates. if (!VRM->hasPhys(Intf->reg())) continue; - LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg()); - Matrix->unassign(*Intf); assert((ExtraInfo->getCascade(Intf->reg()) < Cascade || VirtReg.isSpillable() < Intf->isSpillable()) && @@ -624,7 +546,8 @@ bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit, /// @param VirtReg Currently unassigned virtual register. /// @param Order Physregs to try. /// @return Physreg to assign VirtReg, or 0. -MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, +MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg, + AllocationOrder &Order, SmallVectorImpl &NewVRegs, uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) { @@ -782,12 +705,17 @@ bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) { unsigned Visited = 0; #endif + unsigned long Budget = GrowRegionComplexityBudget; while (true) { ArrayRef NewBundles = SpillPlacer->getRecentPositive(); // Find new through blocks in the periphery of PrefRegBundles. for (unsigned Bundle : NewBundles) { // Look at all blocks connected to Bundle in the full graph. ArrayRef Blocks = Bundles->getBlocks(Bundle); + // Limit compilation time by bailing out after we use all our budget. + if (Blocks.size() >= Budget) + return false; + Budget -= Blocks.size(); for (unsigned Block : Blocks) { if (!Todo.test(Block)) continue; @@ -887,147 +815,14 @@ BlockFrequency RAGreedy::calcSpillCost() { return Cost; } -/// Check if splitting Evictee will create a local split interval in -/// basic block number BBNumber that may cause a bad eviction chain. This is -/// intended to prevent bad eviction sequences like: -/// movl %ebp, 8(%esp) # 4-byte Spill -/// movl %ecx, %ebp -/// movl %ebx, %ecx -/// movl %edi, %ebx -/// movl %edx, %edi -/// cltd -/// idivl %esi -/// movl %edi, %edx -/// movl %ebx, %edi -/// movl %ecx, %ebx -/// movl %ebp, %ecx -/// movl 16(%esp), %ebp # 4 - byte Reload -/// -/// Such sequences are created in 2 scenarios: -/// -/// Scenario #1: -/// %0 is evicted from physreg0 by %1. -/// Evictee %0 is intended for region splitting with split candidate -/// physreg0 (the reg %0 was evicted from). -/// Region splitting creates a local interval because of interference with the -/// evictor %1 (normally region splitting creates 2 interval, the "by reg" -/// and "by stack" intervals and local interval created when interference -/// occurs). -/// One of the split intervals ends up evicting %2 from physreg1. -/// Evictee %2 is intended for region splitting with split candidate -/// physreg1. -/// One of the split intervals ends up evicting %3 from physreg2, etc. -/// -/// Scenario #2 -/// %0 is evicted from physreg0 by %1. -/// %2 is evicted from physreg2 by %3 etc. -/// Evictee %0 is intended for region splitting with split candidate -/// physreg1. -/// Region splitting creates a local interval because of interference with the -/// evictor %1. -/// One of the split intervals ends up evicting back original evictor %1 -/// from physreg0 (the reg %0 was evicted from). -/// Another evictee %2 is intended for region splitting with split candidate -/// physreg1. -/// One of the split intervals ends up evicting %3 from physreg2, etc. -/// -/// \param Evictee The register considered to be split. -/// \param Cand The split candidate that determines the physical register -/// we are splitting for and the interferences. -/// \param BBNumber The number of a BB for which the region split process will -/// create a local split interval. -/// \param Order The physical registers that may get evicted by a split -/// artifact of Evictee. -/// \return True if splitting Evictee may cause a bad eviction chain, false -/// otherwise. -bool RAGreedy::splitCanCauseEvictionChain(Register Evictee, - GlobalSplitCandidate &Cand, - unsigned BBNumber, - const AllocationOrder &Order) { - EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee); - unsigned Evictor = VregEvictorInfo.first; - MCRegister PhysReg = VregEvictorInfo.second; - - // No actual evictor. - if (!Evictor || !PhysReg) - return false; - - float MaxWeight = 0; - MCRegister FutureEvictedPhysReg = - getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee), - Cand.Intf.first(), Cand.Intf.last(), &MaxWeight); - - // The bad eviction chain occurs when either the split candidate is the - // evicting reg or one of the split artifact will evict the evicting reg. - if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg)) - return false; - - Cand.Intf.moveToBlock(BBNumber); - - // Check to see if the Evictor contains interference (with Evictee) in the - // given BB. If so, this interference caused the eviction of Evictee from - // PhysReg. This suggest that we will create a local interval during the - // region split to avoid this interference This local interval may cause a bad - // eviction chain. - if (!LIS->hasInterval(Evictor)) - return false; - LiveInterval &EvictorLI = LIS->getInterval(Evictor); - if (EvictorLI.FindSegmentContaining(Cand.Intf.first()) == EvictorLI.end()) - return false; - - // Now, check to see if the local interval we will create is going to be - // expensive enough to evict somebody If so, this may cause a bad eviction - // chain. - float splitArtifactWeight = - VRAI->futureWeight(LIS->getInterval(Evictee), - Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); - if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight) - return false; - - return true; -} - -/// Check if splitting VirtRegToSplit will create a local split interval -/// in basic block number BBNumber that may cause a spill. -/// -/// \param VirtRegToSplit The register considered to be split. -/// \param Cand The split candidate that determines the physical -/// register we are splitting for and the interferences. -/// \param BBNumber The number of a BB for which the region split process -/// will create a local split interval. -/// \param Order The physical registers that may get evicted by a -/// split artifact of VirtRegToSplit. -/// \return True if splitting VirtRegToSplit may cause a spill, false -/// otherwise. -bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit, - GlobalSplitCandidate &Cand, - unsigned BBNumber, - const AllocationOrder &Order) { - Cand.Intf.moveToBlock(BBNumber); - - // Check if the local interval will find a non interfereing assignment. - for (auto PhysReg : Order.getOrder()) { - if (!Matrix->checkInterference(Cand.Intf.first().getPrevIndex(), - Cand.Intf.last(), PhysReg)) - return false; - } - - // The local interval is not able to find non interferencing assignment - // and not able to evict a less worthy interval, therfore, it can cause a - // spill. - return true; -} - /// calcGlobalSplitCost - Return the global split cost of following the split /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, - const AllocationOrder &Order, - bool *CanCauseEvictionChain) { + const AllocationOrder &Order) { BlockFrequency GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; - Register VirtRegToSplit = SA->getParent().reg(); ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned I = 0; I != UseBlocks.size(); ++I) { const SplitAnalysis::BlockInfo &BI = UseBlocks[I]; @@ -1037,29 +832,6 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, unsigned Ins = 0; Cand.Intf.moveToBlock(BC.Number); - // Check wheather a local interval is going to be created during the region - // split. Calculate adavanced spilt cost (cost of local intervals) if option - // is enabled. - if (EnableAdvancedRASplitCost && Cand.Intf.hasInterference() && BI.LiveIn && - BI.LiveOut && RegIn && RegOut) { - - if (CanCauseEvictionChain && - splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) { - // This interference causes our eviction from this assignment, we might - // evict somebody else and eventually someone will spill, add that cost. - // See splitCanCauseEvictionChain for detailed description of scenarios. - GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); - GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); - - *CanCauseEvictionChain = true; - - } else if (splitCanCauseLocalSpill(VirtRegToSplit, Cand, BC.Number, - Order)) { - // This interference causes local interval to spill, add that cost. - GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); - GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); - } - } if (BI.LiveIn) Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); @@ -1080,20 +852,6 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, if (Cand.Intf.hasInterference()) { GlobalCost += SpillPlacer->getBlockFrequency(Number); GlobalCost += SpillPlacer->getBlockFrequency(Number); - - // Check wheather a local interval is going to be created during the - // region split. - if (EnableAdvancedRASplitCost && CanCauseEvictionChain && - splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) { - // This interference cause our eviction from this assignment, we might - // evict somebody else, add that cost. - // See splitCanCauseEvictionChain for detailed description of - // scenarios. - GlobalCost += SpillPlacer->getBlockFrequency(Number); - GlobalCost += SpillPlacer->getBlockFrequency(Number); - - *CanCauseEvictionChain = true; - } } continue; } @@ -1253,7 +1011,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, MF->verify(this, "After splitting live range around region"); } -MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg, +MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg)) @@ -1276,19 +1034,8 @@ MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg, MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); } - bool CanCauseEvictionChain = false; - unsigned BestCand = - calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, - false /*IgnoreCSR*/, &CanCauseEvictionChain); - - // Split candidates with compact regions can cause a bad eviction sequence. - // See splitCanCauseEvictionChain for detailed description of scenarios. - // To avoid it, we need to comapre the cost with the spill cost and not the - // current max frequency. - if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) && - CanCauseEvictionChain) { - return MCRegister::NoRegister; - } + unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, + NumCands, false /*IgnoreCSR*/); // No solutions found, fall back to single block splitting. if (!HasCompact && BestCand == NoCand) @@ -1297,11 +1044,11 @@ MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg, return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); } -unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, +unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg, AllocationOrder &Order, BlockFrequency &BestCost, - unsigned &NumCands, bool IgnoreCSR, - bool *CanCauseEvictionChain) { + unsigned &NumCands, + bool IgnoreCSR) { unsigned BestCand = NoCand; for (MCPhysReg PhysReg : Order) { assert(PhysReg); @@ -1364,8 +1111,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, continue; } - bool HasEvictionChain = false; - Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain); + Cost += calcGlobalSplitCost(Cand, Order); LLVM_DEBUG({ dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; @@ -1376,28 +1122,14 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, if (Cost < BestCost) { BestCand = NumCands; BestCost = Cost; - // See splitCanCauseEvictionChain for detailed description of bad - // eviction chain scenarios. - if (CanCauseEvictionChain) - *CanCauseEvictionChain = HasEvictionChain; } ++NumCands; } - if (CanCauseEvictionChain && BestCand != NoCand) { - // See splitCanCauseEvictionChain for detailed description of bad - // eviction chain scenarios. - LLVM_DEBUG(dbgs() << "Best split candidate of vreg " - << printReg(VirtReg.reg(), TRI) << " may "); - if (!(*CanCauseEvictionChain)) - LLVM_DEBUG(dbgs() << "not "); - LLVM_DEBUG(dbgs() << "cause bad eviction chain\n"); - } - return BestCand; } -unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, +unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand, bool HasCompact, SmallVectorImpl &NewVRegs) { SmallVector UsedCands; @@ -1444,7 +1176,8 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, /// tryBlockSplit - Split a global live range around every block with uses. This /// creates a lot of local live ranges, that will be split by tryLocalSplit if /// they don't allocate. -unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, +unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg, + AllocationOrder &Order, SmallVectorImpl &NewVRegs) { assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); Register Reg = VirtReg.reg(); @@ -1507,9 +1240,9 @@ static unsigned getNumAllocatableRegsForConstraints( /// be moved to a larger register class. /// /// This is similar to spilling to a larger register class. -unsigned -RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl &NewVRegs) { +unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl &NewVRegs) { const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg()); // There is no point to this if there are no larger sub-classes. if (!RegClassInfo.isProperSubClass(CurRC)) @@ -1529,7 +1262,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC, *MF); - unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); + unsigned SuperRCNumAllocatableRegs = + RegClassInfo.getNumAllocatableRegs(SuperRC); // Split around every non-copy instruction if this split will relax // the constraints on the virtual register. // Otherwise, splitting just inserts uncoalescable copies that do not help @@ -1539,7 +1273,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, if (MI->isFullCopy() || SuperRCNumAllocatableRegs == getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC, - TII, TRI, RCI)) { + TII, TRI, RegClassInfo)) { LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI); continue; } @@ -1649,7 +1383,8 @@ void RAGreedy::calcGapWeights(MCRegister PhysReg, /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only /// basic block. /// -unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, +unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg, + AllocationOrder &Order, SmallVectorImpl &NewVRegs) { // TODO: the function currently only handles a single UseBlock; it should be // possible to generalize. @@ -1879,7 +1614,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// trySplit - Try to split VirtReg or one of its interferences, making it /// assignable. /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. -unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, +unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs, const SmallVirtRegSet &FixedRegisters) { // Ranges must be Split2 or less. @@ -1928,6 +1663,18 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) { return false; } +/// Return true if the existing assignment of \p Intf overlaps, but is not the +/// same, as \p PhysReg. +static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI, + const VirtRegMap &VRM, + MCRegister PhysReg, + const LiveInterval &Intf) { + MCRegister AssignedReg = VRM.getPhys(Intf.reg()); + if (PhysReg == AssignedReg) + return false; + return TRI.regsOverlap(PhysReg, AssignedReg); +} + /// mayRecolorAllInterferences - Check if the virtual registers that /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be /// recolored to free \p PhysReg. @@ -1937,8 +1684,8 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) { /// \p FixedRegisters contains all the virtual registers that cannot be /// recolored. bool RAGreedy::mayRecolorAllInterferences( - MCRegister PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates, - const SmallVirtRegSet &FixedRegisters) { + MCRegister PhysReg, const LiveInterval &VirtReg, + SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) { const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg()); for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { @@ -1952,13 +1699,21 @@ bool RAGreedy::mayRecolorAllInterferences( CutOffInfo |= CO_Interf; return false; } - for (LiveInterval *Intf : reverse(Q.interferingVRegs())) { - // If Intf is done and sit on the same register class as VirtReg, - // it would not be recolorable as it is in the same state as VirtReg. - // However, if VirtReg has tied defs and Intf doesn't, then + for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) { + // If Intf is done and sits on the same register class as VirtReg, it + // would not be recolorable as it is in the same state as + // VirtReg. However there are at least two exceptions. + // + // If VirtReg has tied defs and Intf doesn't, then // there is still a point in examining if it can be recolorable. + // + // Additionally, if the register class has overlapping tuple members, it + // may still be recolorable using a different tuple. This is more likely + // if the existing assignment aliases with the candidate. + // if (((ExtraInfo->getStage(*Intf) == RS_Done && - MRI->getRegClass(Intf->reg()) == CurRC) && + MRI->getRegClass(Intf->reg()) == CurRC && + !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) && !(hasTiedDef(MRI, VirtReg.reg()) && !hasTiedDef(MRI, Intf->reg()))) || FixedRegisters.count(Intf->reg())) { @@ -2008,18 +1763,26 @@ bool RAGreedy::mayRecolorAllInterferences( /// (split, spill) during the process and that must be assigned. /// \p FixedRegisters contains all the virtual registers that cannot be /// recolored. +/// +/// \p RecolorStack tracks the original assignments of successfully recolored +/// registers. +/// /// \p Depth gives the current depth of the last chance recoloring. /// \return a physical register that can be used for VirtReg or ~0u if none /// exists. -unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, +unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs, SmallVirtRegSet &FixedRegisters, + RecoloringStack &RecolorStack, unsigned Depth) { if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg)) return ~0u; LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); + + const ssize_t EntryStackSize = RecolorStack.size(); + // Ranges must be Done. assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && "Last chance recoloring should really be last chance"); @@ -2035,9 +1798,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // Set of Live intervals that will need to be recolored. SmallLISet RecoloringCandidates; - // Record the original mapping virtual register to physical register in case - // the recoloring fails. - DenseMap VirtRegToPhysReg; + // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in // this recoloring "session". assert(!FixedRegisters.count(VirtReg.reg())); @@ -2049,7 +1810,6 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " << printReg(PhysReg, TRI) << '\n'); RecoloringCandidates.clear(); - VirtRegToPhysReg.clear(); CurrentNewVRegs.clear(); // It is only possible to recolor virtual register interference. @@ -2069,18 +1829,19 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, continue; } - // RecoloringCandidates contains all the virtual registers that interfer - // with VirtReg on PhysReg (or one of its aliases). - // Enqueue them for recoloring and perform the actual recoloring. + // RecoloringCandidates contains all the virtual registers that interfere + // with VirtReg on PhysReg (or one of its aliases). Enqueue them for + // recoloring and perform the actual recoloring. PQueue RecoloringQueue; - for (LiveInterval *RC : RecoloringCandidates) { + for (const LiveInterval *RC : RecoloringCandidates) { Register ItVirtReg = RC->reg(); enqueue(RecoloringQueue, RC); assert(VRM->hasPhys(ItVirtReg) && "Interferences are supposed to be with allocated variables"); // Record the current allocation. - VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); + RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg))); + // unset the related struct. Matrix->unassign(*RC); } @@ -2095,7 +1856,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // at this point for the next physical register. SmallVirtRegSet SaveFixedRegisters(FixedRegisters); if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs, - FixedRegisters, Depth)) { + FixedRegisters, RecolorStack, Depth)) { // Push the queued vregs into the main queue. for (Register NewVReg : CurrentNewVRegs) NewVRegs.push_back(NewVReg); @@ -2122,13 +1883,31 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, NewVRegs.push_back(R); } - for (LiveInterval *RC : RecoloringCandidates) { - Register ItVirtReg = RC->reg(); - if (VRM->hasPhys(ItVirtReg)) - Matrix->unassign(*RC); - MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg]; - Matrix->assign(*RC, ItPhysReg); + // Roll back our unsuccessful recoloring. Also roll back any successful + // recolorings in any recursive recoloring attempts, since it's possible + // they would have introduced conflicts with assignments we will be + // restoring further up the stack. Perform all unassignments prior to + // reassigning, since sub-recolorings may have conflicted with the registers + // we are going to restore to their original assignments. + for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) { + const LiveInterval *LI; + MCRegister PhysReg; + std::tie(LI, PhysReg) = RecolorStack[I]; + + if (VRM->hasPhys(LI->reg())) + Matrix->unassign(*LI); } + + for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) { + const LiveInterval *LI; + MCRegister PhysReg; + std::tie(LI, PhysReg) = RecolorStack[I]; + if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg())) + Matrix->assign(*LI, PhysReg); + } + + // Pop the stack of recoloring attempts. + RecolorStack.resize(EntryStackSize); } // Last chance recoloring did not worked either, give up. @@ -2146,12 +1925,13 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, SmallVectorImpl &NewVRegs, SmallVirtRegSet &FixedRegisters, + RecoloringStack &RecolorStack, unsigned Depth) { while (!RecoloringQueue.empty()) { - LiveInterval *LI = dequeue(RecoloringQueue); + const LiveInterval *LI = dequeue(RecoloringQueue); LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); - MCRegister PhysReg = - selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); + MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, + RecolorStack, Depth + 1); // When splitting happens, the live-range may actually be empty. // In that case, this is okay to continue the recoloring even // if we did not find an alternative color for it. Indeed, @@ -2178,12 +1958,14 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, // Main Entry Point //===----------------------------------------------------------------------===// -MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg, +MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg, SmallVectorImpl &NewVRegs) { CutOffInfo = CO_None; LLVMContext &Ctx = MF->getFunction().getContext(); SmallVirtRegSet FixedRegisters; - MCRegister Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); + RecoloringStack RecolorStack; + MCRegister Reg = + selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack); if (Reg == ~0U && (CutOffInfo != CO_None)) { uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); if (CutOffEncountered == CO_Depth) @@ -2208,10 +1990,9 @@ MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg, /// Spilling a live range in the cold path can have lower cost than using /// the CSR for the first time. Returns the physical register if we decide /// to use the CSR; otherwise return 0. -MCRegister -RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, - MCRegister PhysReg, uint8_t &CostPerUseLimit, - SmallVectorImpl &NewVRegs) { +MCRegister RAGreedy::tryAssignCSRFirstTime( + const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg, + uint8_t &CostPerUseLimit, SmallVectorImpl &NewVRegs) { if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { // We choose spill over using the CSR for the first time if the spill cost // is lower than CSRCost. @@ -2243,7 +2024,7 @@ RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, return PhysReg; } -void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) { +void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) { // Do not keep invalid information around. SetOfBrokenHints.remove(&LI); } @@ -2317,7 +2098,7 @@ BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, /// For a given live range, profitability is determined by the sum of the /// frequencies of the non-identity copies it would introduce with the old /// and new register. -void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { +void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) { // We have a broken hint, check if it is possible to fix it by // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted // some register and PhysReg may be available for the other live-ranges. @@ -2431,7 +2212,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { /// This is likely that we can assign the same register for b, c, and d, /// getting rid of 2 copies. void RAGreedy::tryHintsRecoloring() { - for (LiveInterval *LI : SetOfBrokenHints) { + for (const LiveInterval *LI : SetOfBrokenHints) { assert(Register::isVirtualRegister(LI->reg()) && "Recoloring is possible only for virtual registers"); // Some dead defs may be around (e.g., because of debug uses). @@ -2442,9 +2223,10 @@ void RAGreedy::tryHintsRecoloring() { } } -MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, +MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg, SmallVectorImpl &NewVRegs, SmallVirtRegSet &FixedRegisters, + RecoloringStack &RecolorStack, unsigned Depth) { uint8_t CostPerUseLimit = uint8_t(~0u); // First try assigning a free register. @@ -2452,8 +2234,6 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); if (MCRegister PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { - // If VirtReg got an assignment, the eviction info is no longer relevant. - LastEvicted.clearEvicteeInfo(VirtReg.reg()); // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical // register. @@ -2488,9 +2268,6 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // copy-related live-ranges. if (Hint && Hint != PhysReg) SetOfBrokenHints.insert(&VirtReg); - // If VirtReg eviction someone, the eviction info for it as an evictee is - // no longer relevant. - LastEvicted.clearEvicteeInfo(VirtReg.reg()); return PhysReg; } @@ -2510,18 +2287,16 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // Try splitting VirtReg or interferences. unsigned NewVRegSizeBefore = NewVRegs.size(); Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters); - if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) { - // If VirtReg got split, the eviction info is no longer relevant. - LastEvicted.clearEvicteeInfo(VirtReg.reg()); + if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) return PhysReg; - } } // If we couldn't allocate a register from spilling, there is probably some // invalid inline assembly. The base class will report it. - if (Stage >= RS_Done || !VirtReg.isSpillable()) + if (Stage >= RS_Done || !VirtReg.isSpillable()) { return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, - Depth); + RecolorStack, Depth); + } // Finally spill VirtReg itself. if ((EnableDeferredSpilling || @@ -2713,19 +2488,27 @@ void RAGreedy::reportStats() { } } +bool RAGreedy::hasVirtRegAlloc() { + for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { + Register Reg = Register::index2VirtReg(I); + if (MRI->reg_nodbg_empty(Reg)) + continue; + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (!RC) + continue; + if (ShouldAllocateClass(*TRI, *RC)) + return true; + } + + return false; +} + bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" << "********** Function: " << mf.getName() << '\n'); MF = &mf; - TRI = MF->getSubtarget().getRegisterInfo(); TII = MF->getSubtarget().getInstrInfo(); - RCI.runOnMachineFunction(mf); - - EnableAdvancedRASplitCost = - ConsiderLocalIntervalCost.getNumOccurrences() - ? ConsiderLocalIntervalCost - : MF->getSubtarget().enableAdvancedRASplitCost(); if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); @@ -2733,6 +2516,12 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { RegAllocBase::init(getAnalysis(), getAnalysis(), getAnalysis()); + + // Early return if there is no virtual register to be allocated to a + // physical register. + if (!hasVirtRegAlloc()) + return false; + Indexes = &getAnalysis(); MBFI = &getAnalysis(); DomTree = &getAnalysis(); @@ -2746,6 +2535,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { initializeCSRCost(); RegCosts = TRI->getRegisterCosts(*MF); + RegClassPriorityTrumpsGlobalness = + GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences() + ? GreedyRegClassPriorityTrumpsGlobalness + : TRI->regClassPriorityTrumpsGlobalness(*MF); ExtraInfo.emplace(); EvictAdvisor = @@ -2764,7 +2557,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. SetOfBrokenHints.clear(); - LastEvicted.clear(); allocatePhysRegs(); tryHintsRecoloring(); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h index e9a5fe635f26..358e74541a54 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/llvm/lib/CodeGen/RegAllocGreedy.h @@ -12,9 +12,7 @@ #ifndef LLVM_CODEGEN_REGALLOCGREEDY_H_ #define LLVM_CODEGEN_REGALLOCGREEDY_H_ -#include "AllocationOrder.h" #include "InterferenceCache.h" -#include "LiveDebugVariables.h" #include "RegAllocBase.h" #include "RegAllocEvictionAdvisor.h" #include "SpillPlacement.h" @@ -23,52 +21,44 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalUnion.h" -#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" -#include "llvm/CodeGen/LiveRegMatrix.h" -#include "llvm/CodeGen/LiveStacks.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/Spiller.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Pass.h" -#include "llvm/Support/BranchProbability.h" -#include "llvm/Target/TargetMachine.h" #include -#include #include #include #include -#include #include namespace llvm { +class AllocationOrder; +class AnalysisUsage; +class EdgeBundles; +class LiveDebugVariables; +class LiveIntervals; +class LiveRegMatrix; +class MachineBasicBlock; +class MachineBlockFrequencyInfo; +class MachineDominatorTree; +class MachineLoop; +class MachineLoopInfo; +class MachineOptimizationRemarkEmitter; +class MachineOptimizationRemarkMissed; +class SlotIndex; +class SlotIndexes; +class TargetInstrInfo; +class VirtRegMap; + class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass, public RegAllocBase, private LiveRangeEdit::Delegate { @@ -162,15 +152,18 @@ public: private: // Convenient shortcuts. using PQueue = std::priority_queue>; - using SmallLISet = SmallPtrSet; + using SmallLISet = SmallPtrSet; + + // We need to track all tentative recolorings so we can roll back any + // successful and unsuccessful recoloring attempts. + using RecoloringStack = + SmallVector, 8>; // context MachineFunction *MF; // Shortcuts to some useful interface. const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - RegisterClassInfo RCI; // analyses SlotIndexes *Indexes; @@ -210,57 +203,6 @@ private: static const char *const StageName[]; #endif - /// EvictionTrack - Keeps track of past evictions in order to optimize region - /// split decision. - class EvictionTrack { - - public: - using EvictorInfo = - std::pair; - using EvicteeInfo = llvm::DenseMap; - - private: - /// Each Vreg that has been evicted in the last stage of selectOrSplit will - /// be mapped to the evictor Vreg and the PhysReg it was evicted from. - EvicteeInfo Evictees; - - public: - /// Clear all eviction information. - void clear() { Evictees.clear(); } - - /// Clear eviction information for the given evictee Vreg. - /// E.g. when Vreg get's a new allocation, the old eviction info is no - /// longer relevant. - /// \param Evictee The evictee Vreg for whom we want to clear collected - /// eviction info. - void clearEvicteeInfo(Register Evictee) { Evictees.erase(Evictee); } - - /// Track new eviction. - /// The Evictor vreg has evicted the Evictee vreg from Physreg. - /// \param PhysReg The physical register Evictee was evicted from. - /// \param Evictor The evictor Vreg that evicted Evictee. - /// \param Evictee The evictee Vreg. - void addEviction(MCRegister PhysReg, Register Evictor, Register Evictee) { - Evictees[Evictee].first = Evictor; - Evictees[Evictee].second = PhysReg; - } - - /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg. - /// \param Evictee The evictee vreg. - /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if - /// nobody has evicted Evictee from PhysReg. - EvictorInfo getEvictor(Register Evictee) { - if (Evictees.count(Evictee)) { - return Evictees[Evictee]; - } - - return EvictorInfo(0, 0); - } - }; - - // Keeps track of past evictions in order to optimize region split decision. - EvictionTrack LastEvicted; - // splitting state. std::unique_ptr SA; std::unique_ptr SE; @@ -320,17 +262,17 @@ private: /// Callee-save register cost, calculated once per machine function. BlockFrequency CSRCost; - /// Enable or not the consideration of the cost of local intervals created - /// by a split candidate when choosing the best split candidate. - bool EnableAdvancedRASplitCost; - /// Set of broken hints that may be reconciled later because of eviction. - SmallSetVector SetOfBrokenHints; + SmallSetVector SetOfBrokenHints; /// The register cost values. This list will be recreated for each Machine /// Function ArrayRef RegCosts; + /// Flags for the live range priority calculation, determined once per + /// machine function. + bool RegClassPriorityTrumpsGlobalness; + public: RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses); @@ -341,11 +283,11 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; void releaseMemory() override; Spiller &spiller() override { return *SpillerInstance; } - void enqueueImpl(LiveInterval *LI) override; - LiveInterval *dequeue() override; - MCRegister selectOrSplit(LiveInterval &, + void enqueueImpl(const LiveInterval *LI) override; + const LiveInterval *dequeue() override; + MCRegister selectOrSplit(const LiveInterval &, SmallVectorImpl &) override; - void aboutToRemoveInterval(LiveInterval &) override; + void aboutToRemoveInterval(const LiveInterval &) override; /// Perform register allocation. bool runOnMachineFunction(MachineFunction &mf) override; @@ -363,81 +305,70 @@ public: static char ID; private: - MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl &, - SmallVirtRegSet &, unsigned = 0); + MCRegister selectOrSplitImpl(const LiveInterval &, + SmallVectorImpl &, SmallVirtRegSet &, + RecoloringStack &, unsigned = 0); bool LRE_CanEraseVirtReg(Register) override; void LRE_WillShrinkVirtReg(Register) override; void LRE_DidCloneVirtReg(Register, Register) override; - void enqueue(PQueue &CurQueue, LiveInterval *LI); - LiveInterval *dequeue(PQueue &CurQueue); + void enqueue(PQueue &CurQueue, const LiveInterval *LI); + const LiveInterval *dequeue(PQueue &CurQueue); + bool hasVirtRegAlloc(); BlockFrequency calcSpillCost(); bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency &); bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef); bool growRegion(GlobalSplitCandidate &Cand); - bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand, - unsigned BBNumber, - const AllocationOrder &Order); - bool splitCanCauseLocalSpill(unsigned VirtRegToSplit, - GlobalSplitCandidate &Cand, unsigned BBNumber, - const AllocationOrder &Order); BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &, - const AllocationOrder &Order, - bool *CanCauseEvictionChain); + const AllocationOrder &Order); bool calcCompactRegion(GlobalSplitCandidate &); void splitAroundRegion(LiveRangeEdit &, ArrayRef); void calcGapWeights(MCRegister, SmallVectorImpl &); - bool canEvictInterferenceInRange(const LiveInterval &VirtReg, - MCRegister PhysReg, SlotIndex Start, - SlotIndex End, EvictionCost &MaxCost) const; - MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order, - const LiveInterval &VirtReg, - SlotIndex Start, SlotIndex End, - float *BestEvictWeight) const; - void evictInterference(LiveInterval &, MCRegister, + void evictInterference(const LiveInterval &, MCRegister, SmallVectorImpl &); - bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg, + bool mayRecolorAllInterferences(MCRegister PhysReg, + const LiveInterval &VirtReg, SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters); - MCRegister tryAssign(LiveInterval &, AllocationOrder &, + MCRegister tryAssign(const LiveInterval &, AllocationOrder &, SmallVectorImpl &, const SmallVirtRegSet &); - MCRegister tryEvict(LiveInterval &, AllocationOrder &, + MCRegister tryEvict(const LiveInterval &, AllocationOrder &, SmallVectorImpl &, uint8_t, const SmallVirtRegSet &); - MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &, + MCRegister tryRegionSplit(const LiveInterval &, AllocationOrder &, SmallVectorImpl &); /// Calculate cost of region splitting. - unsigned calculateRegionSplitCost(LiveInterval &VirtReg, + unsigned calculateRegionSplitCost(const LiveInterval &VirtReg, AllocationOrder &Order, BlockFrequency &BestCost, - unsigned &NumCands, bool IgnoreCSR, - bool *CanCauseEvictionChain = nullptr); + unsigned &NumCands, bool IgnoreCSR); /// Perform region splitting. - unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, + unsigned doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand, bool HasCompact, SmallVectorImpl &NewVRegs); /// Check other options before using a callee-saved register for the first /// time. - MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg, + MCRegister tryAssignCSRFirstTime(const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg, uint8_t &CostPerUseLimit, SmallVectorImpl &NewVRegs); void initializeCSRCost(); - unsigned tryBlockSplit(LiveInterval &, AllocationOrder &, + unsigned tryBlockSplit(const LiveInterval &, AllocationOrder &, SmallVectorImpl &); - unsigned tryInstructionSplit(LiveInterval &, AllocationOrder &, + unsigned tryInstructionSplit(const LiveInterval &, AllocationOrder &, SmallVectorImpl &); - unsigned tryLocalSplit(LiveInterval &, AllocationOrder &, + unsigned tryLocalSplit(const LiveInterval &, AllocationOrder &, SmallVectorImpl &); - unsigned trySplit(LiveInterval &, AllocationOrder &, + unsigned trySplit(const LiveInterval &, AllocationOrder &, SmallVectorImpl &, const SmallVirtRegSet &); - unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, + unsigned tryLastChanceRecoloring(const LiveInterval &, AllocationOrder &, SmallVectorImpl &, - SmallVirtRegSet &, unsigned); + SmallVirtRegSet &, RecoloringStack &, + unsigned); bool tryRecoloringCandidates(PQueue &, SmallVectorImpl &, - SmallVirtRegSet &, unsigned); - void tryHintRecoloring(LiveInterval &); + SmallVirtRegSet &, RecoloringStack &, unsigned); + void tryHintRecoloring(const LiveInterval &); void tryHintsRecoloring(); /// Model the information carried by one end of a copy. diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp index 93be8f689d57..8c262130fb70 100644 --- a/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -847,6 +847,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { while (!PBQPAllocComplete) { LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n"); + (void) Round; PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI)); initializeGraph(G, VRM, *VRegSpiller); diff --git a/llvm/lib/CodeGen/RegAllocScore.cpp b/llvm/lib/CodeGen/RegAllocScore.cpp index 740890831617..32fa5e07dd16 100644 --- a/llvm/lib/CodeGen/RegAllocScore.cpp +++ b/llvm/lib/CodeGen/RegAllocScore.cpp @@ -13,19 +13,19 @@ //===----------------------------------------------------------------------===// #include "RegAllocScore.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/SetVector.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/ADT/ilist_iterator.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" -#include -#include -#include -#include +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; cl::opt CopyWeight("regalloc-copy-weight", cl::init(0.2), cl::Hidden); diff --git a/llvm/lib/CodeGen/RegAllocScore.h b/llvm/lib/CodeGen/RegAllocScore.h index 3c28bb61189d..2bcd0b5895bf 100644 --- a/llvm/lib/CodeGen/RegAllocScore.h +++ b/llvm/lib/CodeGen/RegAllocScore.h @@ -15,21 +15,16 @@ #ifndef LLVM_CODEGEN_REGALLOCSCORE_H_ #define LLVM_CODEGEN_REGALLOCSCORE_H_ -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Analysis/Utils/TFUtils.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/IR/Module.h" -#include -#include -#include +#include "llvm/ADT/STLFunctionalExtras.h" namespace llvm { +class AAResults; +class MachineBasicBlock; +class MachineBlockFrequencyInfo; +class MachineFunction; +class MachineInstr; + /// Regalloc score. class RegAllocScore final { double CopyCounts = 0.0; diff --git a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp index 5a79ac44dcf4..16afd15e29e4 100644 --- a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp @@ -17,16 +17,15 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterUsageInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/CodeGen/TargetFrameLowering.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp index 800d952469a5..d356962e0d78 100644 --- a/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp +++ b/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -19,8 +19,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -29,7 +29,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/RegisterBank.cpp b/llvm/lib/CodeGen/RegisterBank.cpp new file mode 100644 index 000000000000..512b21aeacaf --- /dev/null +++ b/llvm/lib/CodeGen/RegisterBank.cpp @@ -0,0 +1,110 @@ +//===- llvm/CodeGen/GlobalISel/RegisterBank.cpp - Register Bank --*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the RegisterBank class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "registerbank" + +using namespace llvm; + +const unsigned RegisterBank::InvalidID = UINT_MAX; + +RegisterBank::RegisterBank( + unsigned ID, const char *Name, unsigned Size, + const uint32_t *CoveredClasses, unsigned NumRegClasses) + : ID(ID), Name(Name), Size(Size) { + ContainedRegClasses.resize(NumRegClasses); + ContainedRegClasses.setBitsInMask(CoveredClasses); +} + +bool RegisterBank::verify(const TargetRegisterInfo &TRI) const { + assert(isValid() && "Invalid register bank"); + for (unsigned RCId = 0, End = TRI.getNumRegClasses(); RCId != End; ++RCId) { + const TargetRegisterClass &RC = *TRI.getRegClass(RCId); + + if (!covers(RC)) + continue; + // Verify that the register bank covers all the sub classes of the + // classes it covers. + + // Use a different (slow in that case) method than + // RegisterBankInfo to find the subclasses of RC, to make sure + // both agree on the covers. + for (unsigned SubRCId = 0; SubRCId != End; ++SubRCId) { + const TargetRegisterClass &SubRC = *TRI.getRegClass(RCId); + + if (!RC.hasSubClassEq(&SubRC)) + continue; + + // Verify that the Size of the register bank is big enough to cover + // all the register classes it covers. + assert(getSize() >= TRI.getRegSizeInBits(SubRC) && + "Size is not big enough for all the subclasses!"); + assert(covers(SubRC) && "Not all subclasses are covered"); + } + } + return true; +} + +bool RegisterBank::covers(const TargetRegisterClass &RC) const { + assert(isValid() && "RB hasn't been initialized yet"); + return ContainedRegClasses.test(RC.getID()); +} + +bool RegisterBank::isValid() const { + return ID != InvalidID && Name != nullptr && Size != 0 && + // A register bank that does not cover anything is useless. + !ContainedRegClasses.empty(); +} + +bool RegisterBank::operator==(const RegisterBank &OtherRB) const { + // There must be only one instance of a given register bank alive + // for the whole compilation. + // The RegisterBankInfo is supposed to enforce that. + assert((OtherRB.getID() != getID() || &OtherRB == this) && + "ID does not uniquely identify a RegisterBank"); + return &OtherRB == this; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegisterBank::dump(const TargetRegisterInfo *TRI) const { + print(dbgs(), /* IsForDebug */ true, TRI); +} +#endif + +void RegisterBank::print(raw_ostream &OS, bool IsForDebug, + const TargetRegisterInfo *TRI) const { + OS << getName(); + if (!IsForDebug) + return; + OS << "(ID:" << getID() << ", Size:" << getSize() << ")\n" + << "isValid:" << isValid() << '\n' + << "Number of Covered register classes: " << ContainedRegClasses.count() + << '\n'; + // Print all the subclasses if we can. + // This register classes may not be properly initialized yet. + if (!TRI || ContainedRegClasses.empty()) + return; + assert(ContainedRegClasses.size() == TRI->getNumRegClasses() && + "TRI does not match the initialization process?"); + OS << "Covered register classes:\n"; + ListSeparator LS; + for (unsigned RCId = 0, End = TRI->getNumRegClasses(); RCId != End; ++RCId) { + const TargetRegisterClass &RC = *TRI->getRegClass(RCId); + + if (covers(RC)) + OS << LS << TRI->getRegClassName(&RC); + } +} diff --git a/llvm/lib/CodeGen/RegisterBankInfo.cpp b/llvm/lib/CodeGen/RegisterBankInfo.cpp new file mode 100644 index 000000000000..de851ffc7fdc --- /dev/null +++ b/llvm/lib/CodeGen/RegisterBankInfo.cpp @@ -0,0 +1,802 @@ +//===- llvm/CodeGen/GlobalISel/RegisterBankInfo.cpp --------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the RegisterBankInfo class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegisterBankInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include // For std::max. + +#define DEBUG_TYPE "registerbankinfo" + +using namespace llvm; + +STATISTIC(NumPartialMappingsCreated, + "Number of partial mappings dynamically created"); +STATISTIC(NumPartialMappingsAccessed, + "Number of partial mappings dynamically accessed"); +STATISTIC(NumValueMappingsCreated, + "Number of value mappings dynamically created"); +STATISTIC(NumValueMappingsAccessed, + "Number of value mappings dynamically accessed"); +STATISTIC(NumOperandsMappingsCreated, + "Number of operands mappings dynamically created"); +STATISTIC(NumOperandsMappingsAccessed, + "Number of operands mappings dynamically accessed"); +STATISTIC(NumInstructionMappingsCreated, + "Number of instruction mappings dynamically created"); +STATISTIC(NumInstructionMappingsAccessed, + "Number of instruction mappings dynamically accessed"); + +const unsigned RegisterBankInfo::DefaultMappingID = UINT_MAX; +const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1; + +//------------------------------------------------------------------------------ +// RegisterBankInfo implementation. +//------------------------------------------------------------------------------ +RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks, + unsigned NumRegBanks) + : RegBanks(RegBanks), NumRegBanks(NumRegBanks) { +#ifndef NDEBUG + for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) { + assert(RegBanks[Idx] != nullptr && "Invalid RegisterBank"); + assert(RegBanks[Idx]->isValid() && "RegisterBank should be valid"); + } +#endif // NDEBUG +} + +bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const { +#ifndef NDEBUG + for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) { + const RegisterBank &RegBank = getRegBank(Idx); + assert(Idx == RegBank.getID() && + "ID does not match the index in the array"); + LLVM_DEBUG(dbgs() << "Verify " << RegBank << '\n'); + assert(RegBank.verify(TRI) && "RegBank is invalid"); + } +#endif // NDEBUG + return true; +} + +const RegisterBank * +RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + if (Register::isPhysicalRegister(Reg)) { + // FIXME: This was probably a copy to a virtual register that does have a + // type we could use. + return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI), LLT()); + } + + assert(Reg && "NoRegister does not have a register bank"); + const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); + if (auto *RB = RegClassOrBank.dyn_cast()) + return RB; + if (auto *RC = RegClassOrBank.dyn_cast()) + return &getRegBankFromRegClass(*RC, MRI.getType(Reg)); + return nullptr; +} + +const TargetRegisterClass & +RegisterBankInfo::getMinimalPhysRegClass(Register Reg, + const TargetRegisterInfo &TRI) const { + assert(Register::isPhysicalRegister(Reg) && "Reg must be a physreg"); + const auto &RegRCIt = PhysRegMinimalRCs.find(Reg); + if (RegRCIt != PhysRegMinimalRCs.end()) + return *RegRCIt->second; + const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClass(Reg); + PhysRegMinimalRCs[Reg] = PhysRC; + return *PhysRC; +} + +const RegisterBank *RegisterBankInfo::getRegBankFromConstraints( + const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII, + const MachineRegisterInfo &MRI) const { + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + + // The mapping of the registers may be available via the + // register class constraints. + const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, TRI); + + if (!RC) + return nullptr; + + Register Reg = MI.getOperand(OpIdx).getReg(); + const RegisterBank &RegBank = getRegBankFromRegClass(*RC, MRI.getType(Reg)); + // Check that the target properly implemented getRegBankFromRegClass. + assert(RegBank.covers(*RC) && + "The mapping of the register bank does not make sense"); + return &RegBank; +} + +const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister( + Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) { + + // If the register already has a class, fallback to MRI::constrainRegClass. + auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); + if (RegClassOrBank.is()) + return MRI.constrainRegClass(Reg, &RC); + + const RegisterBank *RB = RegClassOrBank.get(); + // Otherwise, all we can do is ensure the bank covers the class, and set it. + if (RB && !RB->covers(RC)) + return nullptr; + + // If nothing was set or the class is simply compatible, set it. + MRI.setRegClass(Reg, &RC); + return &RC; +} + +/// Check whether or not \p MI should be treated like a copy +/// for the mappings. +/// Copy like instruction are special for mapping because +/// they don't have actual register constraints. Moreover, +/// they sometimes have register classes assigned and we can +/// just use that instead of failing to provide a generic mapping. +static bool isCopyLike(const MachineInstr &MI) { + return MI.isCopy() || MI.isPHI() || + MI.getOpcode() == TargetOpcode::REG_SEQUENCE; +} + +const RegisterBankInfo::InstructionMapping & +RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { + // For copies we want to walk over the operands and try to find one + // that has a register bank since the instruction itself will not get + // us any constraint. + bool IsCopyLike = isCopyLike(MI); + // For copy like instruction, only the mapping of the definition + // is important. The rest is not constrained. + unsigned NumOperandsForMapping = IsCopyLike ? 1 : MI.getNumOperands(); + + const MachineFunction &MF = *MI.getMF(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + // We may need to query the instruction encoding to guess the mapping. + const TargetInstrInfo &TII = *STI.getInstrInfo(); + + // Before doing anything complicated check if the mapping is not + // directly available. + bool CompleteMapping = true; + + SmallVector OperandsMapping(NumOperandsForMapping); + for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx; + ++OpIdx) { + const MachineOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Reg) + continue; + // The register bank of Reg is just a side effect of the current + // excution and in particular, there is no reason to believe this + // is the best default mapping for the current instruction. Keep + // it as an alternative register bank if we cannot figure out + // something. + const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI); + // For copy-like instruction, we want to reuse the register bank + // that is already set on Reg, if any, since those instructions do + // not have any constraints. + const RegisterBank *CurRegBank = IsCopyLike ? AltRegBank : nullptr; + if (!CurRegBank) { + // If this is a target specific instruction, we can deduce + // the register bank from the encoding constraints. + CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, MRI); + if (!CurRegBank) { + // All our attempts failed, give up. + CompleteMapping = false; + + if (!IsCopyLike) + // MI does not carry enough information to guess the mapping. + return getInvalidInstructionMapping(); + continue; + } + } + + unsigned Size = getSizeInBits(Reg, MRI, TRI); + const ValueMapping *ValMapping = &getValueMapping(0, Size, *CurRegBank); + if (IsCopyLike) { + if (!OperandsMapping[0]) { + if (MI.isRegSequence()) { + // For reg_sequence, the result size does not match the input. + unsigned ResultSize = getSizeInBits(MI.getOperand(0).getReg(), + MRI, TRI); + OperandsMapping[0] = &getValueMapping(0, ResultSize, *CurRegBank); + } else { + OperandsMapping[0] = ValMapping; + } + } + + // The default handling assumes any register bank can be copied to any + // other. If this isn't the case, the target should specially deal with + // reg_sequence/phi. There may also be unsatisfiable copies. + for (; OpIdx != EndIdx; ++OpIdx) { + const MachineOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Reg) + continue; + + const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI); + if (AltRegBank && + cannotCopy(*CurRegBank, *AltRegBank, getSizeInBits(Reg, MRI, TRI))) + return getInvalidInstructionMapping(); + } + + CompleteMapping = true; + break; + } + + OperandsMapping[OpIdx] = ValMapping; + } + + if (IsCopyLike && !CompleteMapping) { + // No way to deduce the type from what we have. + return getInvalidInstructionMapping(); + } + + assert(CompleteMapping && "Setting an uncomplete mapping"); + return getInstructionMapping( + DefaultMappingID, /*Cost*/ 1, + /*OperandsMapping*/ getOperandsMapping(OperandsMapping), + NumOperandsForMapping); +} + +/// Hashing function for PartialMapping. +static hash_code hashPartialMapping(unsigned StartIdx, unsigned Length, + const RegisterBank *RegBank) { + return hash_combine(StartIdx, Length, RegBank ? RegBank->getID() : 0); +} + +/// Overloaded version of hash_value for a PartialMapping. +hash_code +llvm::hash_value(const RegisterBankInfo::PartialMapping &PartMapping) { + return hashPartialMapping(PartMapping.StartIdx, PartMapping.Length, + PartMapping.RegBank); +} + +const RegisterBankInfo::PartialMapping & +RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length, + const RegisterBank &RegBank) const { + ++NumPartialMappingsAccessed; + + hash_code Hash = hashPartialMapping(StartIdx, Length, &RegBank); + const auto &It = MapOfPartialMappings.find(Hash); + if (It != MapOfPartialMappings.end()) + return *It->second; + + ++NumPartialMappingsCreated; + + auto &PartMapping = MapOfPartialMappings[Hash]; + PartMapping = std::make_unique(StartIdx, Length, RegBank); + return *PartMapping; +} + +const RegisterBankInfo::ValueMapping & +RegisterBankInfo::getValueMapping(unsigned StartIdx, unsigned Length, + const RegisterBank &RegBank) const { + return getValueMapping(&getPartialMapping(StartIdx, Length, RegBank), 1); +} + +static hash_code +hashValueMapping(const RegisterBankInfo::PartialMapping *BreakDown, + unsigned NumBreakDowns) { + if (LLVM_LIKELY(NumBreakDowns == 1)) + return hash_value(*BreakDown); + SmallVector Hashes(NumBreakDowns); + for (unsigned Idx = 0; Idx != NumBreakDowns; ++Idx) + Hashes.push_back(hash_value(BreakDown[Idx])); + return hash_combine_range(Hashes.begin(), Hashes.end()); +} + +const RegisterBankInfo::ValueMapping & +RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown, + unsigned NumBreakDowns) const { + ++NumValueMappingsAccessed; + + hash_code Hash = hashValueMapping(BreakDown, NumBreakDowns); + const auto &It = MapOfValueMappings.find(Hash); + if (It != MapOfValueMappings.end()) + return *It->second; + + ++NumValueMappingsCreated; + + auto &ValMapping = MapOfValueMappings[Hash]; + ValMapping = std::make_unique(BreakDown, NumBreakDowns); + return *ValMapping; +} + +template +const RegisterBankInfo::ValueMapping * +RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const { + + ++NumOperandsMappingsAccessed; + + // The addresses of the value mapping are unique. + // Therefore, we can use them directly to hash the operand mapping. + hash_code Hash = hash_combine_range(Begin, End); + auto &Res = MapOfOperandsMappings[Hash]; + if (Res) + return Res.get(); + + ++NumOperandsMappingsCreated; + + // Create the array of ValueMapping. + // Note: this array will not hash to this instance of operands + // mapping, because we use the pointer of the ValueMapping + // to hash and we expect them to uniquely identify an instance + // of value mapping. + Res = std::make_unique(std::distance(Begin, End)); + unsigned Idx = 0; + for (Iterator It = Begin; It != End; ++It, ++Idx) { + const ValueMapping *ValMap = *It; + if (!ValMap) + continue; + Res[Idx] = *ValMap; + } + return Res.get(); +} + +const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping( + const SmallVectorImpl &OpdsMapping) + const { + return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end()); +} + +const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping( + std::initializer_list OpdsMapping) + const { + return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end()); +} + +static hash_code +hashInstructionMapping(unsigned ID, unsigned Cost, + const RegisterBankInfo::ValueMapping *OperandsMapping, + unsigned NumOperands) { + return hash_combine(ID, Cost, OperandsMapping, NumOperands); +} + +const RegisterBankInfo::InstructionMapping & +RegisterBankInfo::getInstructionMappingImpl( + bool IsInvalid, unsigned ID, unsigned Cost, + const RegisterBankInfo::ValueMapping *OperandsMapping, + unsigned NumOperands) const { + assert(((IsInvalid && ID == InvalidMappingID && Cost == 0 && + OperandsMapping == nullptr && NumOperands == 0) || + !IsInvalid) && + "Mismatch argument for invalid input"); + ++NumInstructionMappingsAccessed; + + hash_code Hash = + hashInstructionMapping(ID, Cost, OperandsMapping, NumOperands); + const auto &It = MapOfInstructionMappings.find(Hash); + if (It != MapOfInstructionMappings.end()) + return *It->second; + + ++NumInstructionMappingsCreated; + + auto &InstrMapping = MapOfInstructionMappings[Hash]; + InstrMapping = std::make_unique( + ID, Cost, OperandsMapping, NumOperands); + return *InstrMapping; +} + +const RegisterBankInfo::InstructionMapping & +RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { + const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI); + if (Mapping.isValid()) + return Mapping; + llvm_unreachable("The target must implement this"); +} + +RegisterBankInfo::InstructionMappings +RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const { + InstructionMappings PossibleMappings; + const auto &Mapping = getInstrMapping(MI); + if (Mapping.isValid()) { + // Put the default mapping first. + PossibleMappings.push_back(&Mapping); + } + + // Then the alternative mapping, if any. + InstructionMappings AltMappings = getInstrAlternativeMappings(MI); + append_range(PossibleMappings, AltMappings); +#ifndef NDEBUG + for (const InstructionMapping *Mapping : PossibleMappings) + assert(Mapping->verify(MI) && "Mapping is invalid"); +#endif + return PossibleMappings; +} + +RegisterBankInfo::InstructionMappings +RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const { + // No alternative for MI. + return InstructionMappings(); +} + +void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { + MachineInstr &MI = OpdMapper.getMI(); + MachineRegisterInfo &MRI = OpdMapper.getMRI(); + LLVM_DEBUG(dbgs() << "Applying default-like mapping\n"); + for (unsigned OpIdx = 0, + EndIdx = OpdMapper.getInstrMapping().getNumOperands(); + OpIdx != EndIdx; ++OpIdx) { + LLVM_DEBUG(dbgs() << "OpIdx " << OpIdx); + MachineOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) { + LLVM_DEBUG(dbgs() << " is not a register, nothing to be done\n"); + continue; + } + if (!MO.getReg()) { + LLVM_DEBUG(dbgs() << " is $noreg, nothing to be done\n"); + continue; + } + assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns != + 0 && + "Invalid mapping"); + assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns == + 1 && + "This mapping is too complex for this function"); + iterator_range::const_iterator> NewRegs = + OpdMapper.getVRegs(OpIdx); + if (NewRegs.empty()) { + LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n"); + continue; + } + Register OrigReg = MO.getReg(); + Register NewReg = *NewRegs.begin(); + LLVM_DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr)); + MO.setReg(NewReg); + LLVM_DEBUG(dbgs() << " with " << printReg(NewReg, nullptr)); + + // The OperandsMapper creates plain scalar, we may have to fix that. + // Check if the types match and if not, fix that. + LLT OrigTy = MRI.getType(OrigReg); + LLT NewTy = MRI.getType(NewReg); + if (OrigTy != NewTy) { + // The default mapping is not supposed to change the size of + // the storage. However, right now we don't necessarily bump all + // the types to storage size. For instance, we can consider + // s16 G_AND legal whereas the storage size is going to be 32. + assert(OrigTy.getSizeInBits() <= NewTy.getSizeInBits() && + "Types with difference size cannot be handled by the default " + "mapping"); + LLVM_DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to " + << OrigTy); + MRI.setType(NewReg, OrigTy); + } + LLVM_DEBUG(dbgs() << '\n'); + } +} + +unsigned RegisterBankInfo::getSizeInBits(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + if (Register::isPhysicalRegister(Reg)) { + // The size is not directly available for physical registers. + // Instead, we need to access a register class that contains Reg and + // get the size of that register class. + // Because this is expensive, we'll cache the register class by calling + auto *RC = &getMinimalPhysRegClass(Reg, TRI); + assert(RC && "Expecting Register class"); + return TRI.getRegSizeInBits(*RC); + } + return TRI.getRegSizeInBits(Reg, MRI); +} + +//------------------------------------------------------------------------------ +// Helper classes implementation. +//------------------------------------------------------------------------------ +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegisterBankInfo::PartialMapping::dump() const { + print(dbgs()); + dbgs() << '\n'; +} +#endif + +bool RegisterBankInfo::PartialMapping::verify() const { + assert(RegBank && "Register bank not set"); + assert(Length && "Empty mapping"); + assert((StartIdx <= getHighBitIdx()) && "Overflow, switch to APInt?"); + // Check if the minimum width fits into RegBank. + assert(RegBank->getSize() >= Length && "Register bank too small for Mask"); + return true; +} + +void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const { + OS << "[" << StartIdx << ", " << getHighBitIdx() << "], RegBank = "; + if (RegBank) + OS << *RegBank; + else + OS << "nullptr"; +} + +bool RegisterBankInfo::ValueMapping::partsAllUniform() const { + if (NumBreakDowns < 2) + return true; + + const PartialMapping *First = begin(); + for (const PartialMapping *Part = First + 1; Part != end(); ++Part) { + if (Part->Length != First->Length || Part->RegBank != First->RegBank) + return false; + } + + return true; +} + +bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const { + assert(NumBreakDowns && "Value mapped nowhere?!"); + unsigned OrigValueBitWidth = 0; + for (const RegisterBankInfo::PartialMapping &PartMap : *this) { + // Check that each register bank is big enough to hold the partial value: + // this check is done by PartialMapping::verify + assert(PartMap.verify() && "Partial mapping is invalid"); + // The original value should completely be mapped. + // Thus the maximum accessed index + 1 is the size of the original value. + OrigValueBitWidth = + std::max(OrigValueBitWidth, PartMap.getHighBitIdx() + 1); + } + assert(OrigValueBitWidth >= MeaningfulBitWidth && + "Meaningful bits not covered by the mapping"); + APInt ValueMask(OrigValueBitWidth, 0); + for (const RegisterBankInfo::PartialMapping &PartMap : *this) { + // Check that the union of the partial mappings covers the whole value, + // without overlaps. + // The high bit is exclusive in the APInt API, thus getHighBitIdx + 1. + APInt PartMapMask = APInt::getBitsSet(OrigValueBitWidth, PartMap.StartIdx, + PartMap.getHighBitIdx() + 1); + ValueMask ^= PartMapMask; + assert((ValueMask & PartMapMask) == PartMapMask && + "Some partial mappings overlap"); + } + assert(ValueMask.isAllOnes() && "Value is not fully mapped"); + return true; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegisterBankInfo::ValueMapping::dump() const { + print(dbgs()); + dbgs() << '\n'; +} +#endif + +void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const { + OS << "#BreakDown: " << NumBreakDowns << " "; + bool IsFirst = true; + for (const PartialMapping &PartMap : *this) { + if (!IsFirst) + OS << ", "; + OS << '[' << PartMap << ']'; + IsFirst = false; + } +} + +bool RegisterBankInfo::InstructionMapping::verify( + const MachineInstr &MI) const { + // Check that all the register operands are properly mapped. + // Check the constructor invariant. + // For PHI, we only care about mapping the definition. + assert(NumOperands == (isCopyLike(MI) ? 1 : MI.getNumOperands()) && + "NumOperands must match, see constructor"); + assert(MI.getParent() && MI.getMF() && + "MI must be connected to a MachineFunction"); + const MachineFunction &MF = *MI.getMF(); + const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo(); + (void)RBI; + + for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { + const MachineOperand &MO = MI.getOperand(Idx); + if (!MO.isReg()) { + assert(!getOperandMapping(Idx).isValid() && + "We should not care about non-reg mapping"); + continue; + } + Register Reg = MO.getReg(); + if (!Reg) + continue; + assert(getOperandMapping(Idx).isValid() && + "We must have a mapping for reg operands"); + const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx); + (void)MOMapping; + // Register size in bits. + // This size must match what the mapping expects. + assert(MOMapping.verify(RBI->getSizeInBits( + Reg, MF.getRegInfo(), *MF.getSubtarget().getRegisterInfo())) && + "Value mapping is invalid"); + } + return true; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegisterBankInfo::InstructionMapping::dump() const { + print(dbgs()); + dbgs() << '\n'; +} +#endif + +void RegisterBankInfo::InstructionMapping::print(raw_ostream &OS) const { + OS << "ID: " << getID() << " Cost: " << getCost() << " Mapping: "; + + for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { + const ValueMapping &ValMapping = getOperandMapping(OpIdx); + if (OpIdx) + OS << ", "; + OS << "{ Idx: " << OpIdx << " Map: " << ValMapping << '}'; + } +} + +const int RegisterBankInfo::OperandsMapper::DontKnowIdx = -1; + +RegisterBankInfo::OperandsMapper::OperandsMapper( + MachineInstr &MI, const InstructionMapping &InstrMapping, + MachineRegisterInfo &MRI) + : MRI(MRI), MI(MI), InstrMapping(InstrMapping) { + unsigned NumOpds = InstrMapping.getNumOperands(); + OpToNewVRegIdx.resize(NumOpds, OperandsMapper::DontKnowIdx); + assert(InstrMapping.verify(MI) && "Invalid mapping for MI"); +} + +iterator_range::iterator> +RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) { + assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); + unsigned NumPartialVal = + getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns; + int StartIdx = OpToNewVRegIdx[OpIdx]; + + if (StartIdx == OperandsMapper::DontKnowIdx) { + // This is the first time we try to access OpIdx. + // Create the cells that will hold all the partial values at the + // end of the list of NewVReg. + StartIdx = NewVRegs.size(); + OpToNewVRegIdx[OpIdx] = StartIdx; + for (unsigned i = 0; i < NumPartialVal; ++i) + NewVRegs.push_back(0); + } + SmallVectorImpl::iterator End = + getNewVRegsEnd(StartIdx, NumPartialVal); + + return make_range(&NewVRegs[StartIdx], End); +} + +SmallVectorImpl::const_iterator +RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx, + unsigned NumVal) const { + return const_cast(this)->getNewVRegsEnd(StartIdx, NumVal); +} +SmallVectorImpl::iterator +RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx, + unsigned NumVal) { + assert((NewVRegs.size() == StartIdx + NumVal || + NewVRegs.size() > StartIdx + NumVal) && + "NewVRegs too small to contain all the partial mapping"); + return NewVRegs.size() <= StartIdx + NumVal ? NewVRegs.end() + : &NewVRegs[StartIdx + NumVal]; +} + +void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) { + assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); + iterator_range::iterator> NewVRegsForOpIdx = + getVRegsMem(OpIdx); + const ValueMapping &ValMapping = getInstrMapping().getOperandMapping(OpIdx); + const PartialMapping *PartMap = ValMapping.begin(); + for (Register &NewVReg : NewVRegsForOpIdx) { + assert(PartMap != ValMapping.end() && "Out-of-bound access"); + assert(NewVReg == 0 && "Register has already been created"); + // The new registers are always bound to scalar with the right size. + // The actual type has to be set when the target does the mapping + // of the instruction. + // The rationale is that this generic code cannot guess how the + // target plans to split the input type. + NewVReg = MRI.createGenericVirtualRegister(LLT::scalar(PartMap->Length)); + MRI.setRegBank(NewVReg, *PartMap->RegBank); + ++PartMap; + } +} + +void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx, + unsigned PartialMapIdx, + Register NewVReg) { + assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); + assert(getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns > + PartialMapIdx && + "Out-of-bound access for partial mapping"); + // Make sure the memory is initialized for that operand. + (void)getVRegsMem(OpIdx); + assert(NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] == 0 && + "This value is already set"); + NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] = NewVReg; +} + +iterator_range::const_iterator> +RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx, + bool ForDebug) const { + (void)ForDebug; + assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); + int StartIdx = OpToNewVRegIdx[OpIdx]; + + if (StartIdx == OperandsMapper::DontKnowIdx) + return make_range(NewVRegs.end(), NewVRegs.end()); + + unsigned PartMapSize = + getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns; + SmallVectorImpl::const_iterator End = + getNewVRegsEnd(StartIdx, PartMapSize); + iterator_range::const_iterator> Res = + make_range(&NewVRegs[StartIdx], End); +#ifndef NDEBUG + for (Register VReg : Res) + assert((VReg || ForDebug) && "Some registers are uninitialized"); +#endif + return Res; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegisterBankInfo::OperandsMapper::dump() const { + print(dbgs(), true); + dbgs() << '\n'; +} +#endif + +void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS, + bool ForDebug) const { + unsigned NumOpds = getInstrMapping().getNumOperands(); + if (ForDebug) { + OS << "Mapping for " << getMI() << "\nwith " << getInstrMapping() << '\n'; + // Print out the internal state of the index table. + OS << "Populated indices (CellNumber, IndexInNewVRegs): "; + bool IsFirst = true; + for (unsigned Idx = 0; Idx != NumOpds; ++Idx) { + if (OpToNewVRegIdx[Idx] != DontKnowIdx) { + if (!IsFirst) + OS << ", "; + OS << '(' << Idx << ", " << OpToNewVRegIdx[Idx] << ')'; + IsFirst = false; + } + } + OS << '\n'; + } else + OS << "Mapping ID: " << getInstrMapping().getID() << ' '; + + OS << "Operand Mapping: "; + // If we have a function, we can pretty print the name of the registers. + // Otherwise we will print the raw numbers. + const TargetRegisterInfo *TRI = + getMI().getParent() && getMI().getMF() + ? getMI().getMF()->getSubtarget().getRegisterInfo() + : nullptr; + bool IsFirst = true; + for (unsigned Idx = 0; Idx != NumOpds; ++Idx) { + if (OpToNewVRegIdx[Idx] == DontKnowIdx) + continue; + if (!IsFirst) + OS << ", "; + IsFirst = false; + OS << '(' << printReg(getMI().getOperand(Idx).getReg(), TRI) << ", ["; + bool IsFirstNewVReg = true; + for (Register VReg : getVRegs(Idx)) { + if (!IsFirstNewVReg) + OS << ", "; + IsFirstNewVReg = false; + OS << printReg(VReg, TRI); + } + OS << "])"; + } +} diff --git a/llvm/lib/CodeGen/RegisterClassInfo.cpp b/llvm/lib/CodeGen/RegisterClassInfo.cpp index 65a65b9cae95..374fcc9a6014 100644 --- a/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -44,9 +43,11 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { bool Update = false; MF = &mf; + auto &STI = MF->getSubtarget(); + // Allocate new array the first time we see a new target. - if (MF->getSubtarget().getRegisterInfo() != TRI) { - TRI = MF->getSubtarget().getRegisterInfo(); + if (STI.getRegisterInfo() != TRI) { + TRI = STI.getRegisterInfo(); RegClass.reset(new RCInfo[TRI->getNumRegClasses()]); Update = true; } @@ -68,6 +69,18 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { } CalleeSavedRegs = CSR; + // Even if CSR list is same, we could have had a different allocation order + // if ignoreCSRForAllocationOrder is evaluated differently. + BitVector CSRHintsForAllocOrder(TRI->getNumRegs()); + for (const MCPhysReg *I = CSR; *I; ++I) + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) + CSRHintsForAllocOrder[*AI] = STI.ignoreCSRForAllocationOrder(mf, *AI); + if (IgnoreCSRForAllocOrder.size() != CSRHintsForAllocOrder.size() || + IgnoreCSRForAllocOrder != CSRHintsForAllocOrder) { + Update = true; + IgnoreCSRForAllocOrder = CSRHintsForAllocOrder; + } + RegCosts = TRI->getRegisterCosts(*MF); // Different reserved registers? diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index a917b0d27d4a..930d05324440 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1647,7 +1647,7 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) { MachineOperand &MO = CopyMI->getOperand(i-1); if (MO.isReg() && MO.isUse()) - CopyMI->RemoveOperand(i-1); + CopyMI->removeOperand(i-1); } LLVM_DEBUG(dbgs() << "\tReplaced copy of value with an " "implicit def\n"); diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp index 424ad7419165..289d31be2d2d 100644 --- a/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -37,11 +37,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include -#include #include using namespace llvm; diff --git a/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/llvm/lib/CodeGen/RegisterUsageInfo.cpp index 6858d7233bc5..9d9cdf9edbb3 100644 --- a/llvm/lib/CodeGen/RegisterUsageInfo.cpp +++ b/llvm/lib/CodeGen/RegisterUsageInfo.cpp @@ -22,8 +22,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include -#include #include #include #include diff --git a/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp b/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp index 49859aeec78b..01886e40a4a3 100644 --- a/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp +++ b/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp @@ -12,13 +12,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/PassRegistry.h" /// \file RemoveRedundantDebugValues.cpp /// diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp index 0872ec303460..466022ae0ac1 100644 --- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -33,9 +33,9 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp index 0ff045fa787e..87b8ac59bdba 100644 --- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp +++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp @@ -1,4 +1,4 @@ -//=== ReplaceWithVeclib.cpp - Replace vector instrinsics with veclib calls ===// +//=== ReplaceWithVeclib.cpp - Replace vector intrinsics with veclib calls -===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -23,7 +23,6 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; @@ -110,7 +109,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, auto *ArgType = Arg.value()->getType(); // Vector calls to intrinsics can still have // scalar operands for specific arguments. - if (hasVectorInstrinsicScalarOpd(IntrinsicID, Arg.index())) { + if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) { ScalarTypes.push_back(ArgType); } else { // The argument in this place should be a vector if diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index 3d8a7eecce18..e7116ec3ea28 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -17,7 +17,6 @@ #include "SafeStackLayout.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -49,10 +48,10 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" -#include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -97,31 +96,12 @@ static cl::opt SafeStackUsePointerAddress("safestack-use-pointer-address", cl::init(false), cl::Hidden); -// Disabled by default due to PR32143. static cl::opt ClColoring("safe-stack-coloring", cl::desc("enable safe stack coloring"), - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); namespace { -/// Rewrite an SCEV expression for a memory access address to an expression that -/// represents offset from the given alloca. -/// -/// The implementation simply replaces all mentions of the alloca with zero. -class AllocaOffsetRewriter : public SCEVRewriteVisitor { - const Value *AllocaPtr; - -public: - AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr) - : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {} - - const SCEV *visitUnknown(const SCEVUnknown *Expr) { - if (Expr->getValue() == AllocaPtr) - return SE.getZero(Expr->getType()); - return Expr; - } -}; - /// The SafeStack pass splits the stack of each function into the safe /// stack, which is only accessed through memory safe dereferences (as /// determined statically), and the unsafe stack, which contains all @@ -147,7 +127,7 @@ class SafeStack { /// /// 16 seems like a reasonable upper bound on the alignment of objects that we /// might expect to appear on the stack on most common targets. - static constexpr uint64_t StackAlignment = 16; + static constexpr Align StackAlignment = Align::Constant<16>(); /// Return the value of the stack canary. Value *getStackGuard(IRBuilder<> &IRB, Function &F); @@ -221,7 +201,7 @@ public: bool run(); }; -constexpr uint64_t SafeStack::StackAlignment; +constexpr Align SafeStack::StackAlignment; uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType()); @@ -236,9 +216,18 @@ uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize, const Value *AllocaPtr, uint64_t AllocaSize) { - AllocaOffsetRewriter Rewriter(SE, AllocaPtr); - const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr)); + const SCEV *AddrExpr = SE.getSCEV(Addr); + const auto *Base = dyn_cast(SE.getPointerBase(AddrExpr)); + if (!Base || Base->getValue() != AllocaPtr) { + LLVM_DEBUG( + dbgs() << "[SafeStack] " + << (isa(AllocaPtr) ? "Alloca " : "ByValArgument ") + << *AllocaPtr << "\n" + << "SCEV " << *AddrExpr << " not directly based on alloca\n"); + return false; + } + const SCEV *Expr = SE.removePointerBase(AddrExpr); uint64_t BitWidth = SE.getTypeSizeInBits(Expr->getType()); ConstantRange AccessStartRange = SE.getUnsignedRange(Expr); ConstantRange SizeRange = @@ -645,6 +634,13 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( // FIXME: no need to update BasePointer in leaf functions. unsigned FrameSize = alignTo(SSL.getFrameSize(), StackAlignment); + MDBuilder MDB(F.getContext()); + SmallVector Data; + Data.push_back(MDB.createString("unsafe-stack-size")); + Data.push_back(MDB.createConstant(ConstantInt::get(Int32Ty, FrameSize))); + MDNode *MD = MDTuple::get(F.getContext(), Data); + F.setMetadata(LLVMContext::MD_annotation, MD); + // Update shadow stack pointer in the function epilogue. IRB.SetInsertPoint(BasePointer->getNextNode()); @@ -677,13 +673,12 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( SP = IRB.CreateSub(SP, Size); // Align the SP value to satisfy the AllocaInst, type and stack alignments. - uint64_t Align = - std::max(std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment()), - StackAlignment); + auto Align = std::max(std::max(DL.getPrefTypeAlign(Ty), AI->getAlign()), + StackAlignment); - assert(isPowerOf2_32(Align)); Value *NewTop = IRB.CreateIntToPtr( - IRB.CreateAnd(SP, ConstantInt::get(IntPtrTy, ~uint64_t(Align - 1))), + IRB.CreateAnd(SP, + ConstantInt::get(IntPtrTy, ~uint64_t(Align.value() - 1))), StackPtrTy); // Save the stack pointer. diff --git a/llvm/lib/CodeGen/SafeStackLayout.cpp b/llvm/lib/CodeGen/SafeStackLayout.cpp index 602afcfa9001..f821145f4b63 100644 --- a/llvm/lib/CodeGen/SafeStackLayout.cpp +++ b/llvm/lib/CodeGen/SafeStackLayout.cpp @@ -11,7 +11,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/llvm/lib/CodeGen/SafeStackLayout.h b/llvm/lib/CodeGen/SafeStackLayout.h index 4ac7af2059f5..6126c7a67854 100644 --- a/llvm/lib/CodeGen/SafeStackLayout.h +++ b/llvm/lib/CodeGen/SafeStackLayout.h @@ -52,7 +52,7 @@ class StackLayout { void layoutObject(StackObject &Obj); public: - StackLayout(uint64_t StackAlignment) : MaxAlignment(StackAlignment) {} + StackLayout(Align StackAlignment) : MaxAlignment(StackAlignment) {} /// Add an object to the stack frame. Value pointer is opaque and used as a /// handle to retrieve the object's offset in the frame later. diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 0e8e8338b46d..07dcc34fbf15 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -14,7 +14,6 @@ #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/ADT/IntEqClasses.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/iterator_range.h" @@ -40,9 +39,6 @@ #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/MC/LaneBitmask.h" @@ -65,9 +61,9 @@ using namespace llvm; #define DEBUG_TYPE "machine-scheduler" -static cl::opt EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, - cl::ZeroOrMore, cl::init(false), - cl::desc("Enable use of AA during MI DAG construction")); +static cl::opt + EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, + cl::desc("Enable use of AA during MI DAG construction")); static cl::opt UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction")); diff --git a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index 05b2a3764cca..e7b14944acfe 100644 --- a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -10,13 +10,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/IR/Constants.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp new file mode 100644 index 000000000000..c199b6a6cca8 --- /dev/null +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -0,0 +1,989 @@ +//===--- SelectOptimize.cpp - Convert select to branches if profitable ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass converts selects to conditional jumps when profitable. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/ScaledNumber.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/SizeOpts.h" +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "select-optimize" + +STATISTIC(NumSelectOptAnalyzed, + "Number of select groups considered for conversion to branch"); +STATISTIC(NumSelectConvertedExpColdOperand, + "Number of select groups converted due to expensive cold operand"); +STATISTIC(NumSelectConvertedHighPred, + "Number of select groups converted due to high-predictability"); +STATISTIC(NumSelectUnPred, + "Number of select groups not converted due to unpredictability"); +STATISTIC(NumSelectColdBB, + "Number of select groups not converted due to cold basic block"); +STATISTIC(NumSelectConvertedLoop, + "Number of select groups converted due to loop-level analysis"); +STATISTIC(NumSelectsConverted, "Number of selects converted"); + +static cl::opt ColdOperandThreshold( + "cold-operand-threshold", + cl::desc("Maximum frequency of path for an operand to be considered cold."), + cl::init(20), cl::Hidden); + +static cl::opt ColdOperandMaxCostMultiplier( + "cold-operand-max-cost-multiplier", + cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " + "slice of a cold operand to be considered inexpensive."), + cl::init(1), cl::Hidden); + +static cl::opt + GainGradientThreshold("select-opti-loop-gradient-gain-threshold", + cl::desc("Gradient gain threshold (%)."), + cl::init(25), cl::Hidden); + +static cl::opt + GainCycleThreshold("select-opti-loop-cycle-gain-threshold", + cl::desc("Minimum gain per loop (in cycles) threshold."), + cl::init(4), cl::Hidden); + +static cl::opt GainRelativeThreshold( + "select-opti-loop-relative-gain-threshold", + cl::desc( + "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), + cl::init(8), cl::Hidden); + +static cl::opt MispredictDefaultRate( + "mispredict-default-rate", cl::Hidden, cl::init(25), + cl::desc("Default mispredict rate (initialized to 25%).")); + +static cl::opt + DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, + cl::init(false), + cl::desc("Disable loop-level heuristics.")); + +namespace { + +class SelectOptimize : public FunctionPass { + const TargetMachine *TM = nullptr; + const TargetSubtargetInfo *TSI; + const TargetLowering *TLI = nullptr; + const TargetTransformInfo *TTI = nullptr; + const LoopInfo *LI; + DominatorTree *DT; + std::unique_ptr BFI; + std::unique_ptr BPI; + ProfileSummaryInfo *PSI; + OptimizationRemarkEmitter *ORE; + TargetSchedModel TSchedModel; + +public: + static char ID; + + SelectOptimize() : FunctionPass(ID) { + initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + +private: + // Select groups consist of consecutive select instructions with the same + // condition. + using SelectGroup = SmallVector; + using SelectGroups = SmallVector; + + using Scaled64 = ScaledNumber; + + struct CostInfo { + /// Predicated cost (with selects as conditional moves). + Scaled64 PredCost; + /// Non-predicated cost (with selects converted to branches). + Scaled64 NonPredCost; + }; + + // Converts select instructions of a function to conditional jumps when deemed + // profitable. Returns true if at least one select was converted. + bool optimizeSelects(Function &F); + + // Heuristics for determining which select instructions can be profitably + // conveted to branches. Separate heuristics for selects in inner-most loops + // and the rest of code regions (base heuristics for non-inner-most loop + // regions). + void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups); + void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups); + + // Converts to branches the select groups that were deemed + // profitable-to-convert. + void convertProfitableSIGroups(SelectGroups &ProfSIGroups); + + // Splits selects of a given basic block into select groups. + void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups); + + // Determines for which select groups it is profitable converting to branches + // (base and inner-most-loop heuristics). + void findProfitableSIGroupsBase(SelectGroups &SIGroups, + SelectGroups &ProfSIGroups); + void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups, + SelectGroups &ProfSIGroups); + + // Determines if a select group should be converted to a branch (base + // heuristics). + bool isConvertToBranchProfitableBase(const SmallVector &ASI); + + // Returns true if there are expensive instructions in the cold value + // operand's (if any) dependence slice of any of the selects of the given + // group. + bool hasExpensiveColdOperand(const SmallVector &ASI); + + // For a given source instruction, collect its backwards dependence slice + // consisting of instructions exclusively computed for producing the operands + // of the source instruction. + void getExclBackwardsSlice(Instruction *I, std::stack &Slice, + bool ForSinking = false); + + // Returns true if the condition of the select is highly predictable. + bool isSelectHighlyPredictable(const SelectInst *SI); + + // Loop-level checks to determine if a non-predicated version (with branches) + // of the given loop is more profitable than its predicated version. + bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]); + + // Computes instruction and loop-critical-path costs for both the predicated + // and non-predicated version of the given loop. + bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups, + DenseMap &InstCostMap, + CostInfo *LoopCost); + + // Returns a set of all the select instructions in the given select groups. + SmallPtrSet getSIset(const SelectGroups &SIGroups); + + // Returns the latency cost of a given instruction. + Optional computeInstCost(const Instruction *I); + + // Returns the misprediction cost of a given select when converted to branch. + Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost); + + // Returns the cost of a branch when the prediction is correct. + Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, + const SelectInst *SI); + + // Returns true if the target architecture supports lowering a given select. + bool isSelectKindSupported(SelectInst *SI); +}; +} // namespace + +char SelectOptimize::ID = 0; + +INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, + false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, + false) + +FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); } + +bool SelectOptimize::runOnFunction(Function &F) { + TM = &getAnalysis().getTM(); + TSI = TM->getSubtargetImpl(F); + TLI = TSI->getTargetLowering(); + + // If none of the select types is supported then skip this pass. + // This is an optimization pass. Legality issues will be handled by + // instruction selection. + if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && + !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && + !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) + return false; + + TTI = &getAnalysis().getTTI(F); + DT = &getAnalysis().getDomTree(); + LI = &getAnalysis().getLoopInfo(); + BPI.reset(new BranchProbabilityInfo(F, *LI)); + BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); + PSI = &getAnalysis().getPSI(); + ORE = &getAnalysis().getORE(); + TSchedModel.init(TSI); + + // When optimizing for size, selects are preferable over branches. + if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get())) + return false; + + return optimizeSelects(F); +} + +bool SelectOptimize::optimizeSelects(Function &F) { + // Determine for which select groups it is profitable converting to branches. + SelectGroups ProfSIGroups; + // Base heuristics apply only to non-loops and outer loops. + optimizeSelectsBase(F, ProfSIGroups); + // Separate heuristics for inner-most loops. + optimizeSelectsInnerLoops(F, ProfSIGroups); + + // Convert to branches the select groups that were deemed + // profitable-to-convert. + convertProfitableSIGroups(ProfSIGroups); + + // Code modified if at least one select group was converted. + return !ProfSIGroups.empty(); +} + +void SelectOptimize::optimizeSelectsBase(Function &F, + SelectGroups &ProfSIGroups) { + // Collect all the select groups. + SelectGroups SIGroups; + for (BasicBlock &BB : F) { + // Base heuristics apply only to non-loops and outer loops. + Loop *L = LI->getLoopFor(&BB); + if (L && L->isInnermost()) + continue; + collectSelectGroups(BB, SIGroups); + } + + // Determine for which select groups it is profitable converting to branches. + findProfitableSIGroupsBase(SIGroups, ProfSIGroups); +} + +void SelectOptimize::optimizeSelectsInnerLoops(Function &F, + SelectGroups &ProfSIGroups) { + SmallVector Loops(LI->begin(), LI->end()); + // Need to check size on each iteration as we accumulate child loops. + for (unsigned long i = 0; i < Loops.size(); ++i) + for (Loop *ChildL : Loops[i]->getSubLoops()) + Loops.push_back(ChildL); + + for (Loop *L : Loops) { + if (!L->isInnermost()) + continue; + + SelectGroups SIGroups; + for (BasicBlock *BB : L->getBlocks()) + collectSelectGroups(*BB, SIGroups); + + findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups); + } +} + +/// If \p isTrue is true, return the true value of \p SI, otherwise return +/// false value of \p SI. If the true/false value of \p SI is defined by any +/// select instructions in \p Selects, look through the defining select +/// instruction until the true/false value is not defined in \p Selects. +static Value * +getTrueOrFalseValue(SelectInst *SI, bool isTrue, + const SmallPtrSet &Selects) { + Value *V = nullptr; + for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); + DefSI = dyn_cast(V)) { + assert(DefSI->getCondition() == SI->getCondition() && + "The condition of DefSI does not match with SI"); + V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); + } + assert(V && "Failed to get select true/false value"); + return V; +} + +void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { + for (SelectGroup &ASI : ProfSIGroups) { + // The code transformation here is a modified version of the sinking + // transformation in CodeGenPrepare::optimizeSelectInst with a more + // aggressive strategy of which instructions to sink. + // + // TODO: eliminate the redundancy of logic transforming selects to branches + // by removing CodeGenPrepare::optimizeSelectInst and optimizing here + // selects for all cases (with and without profile information). + + // Transform a sequence like this: + // start: + // %cmp = cmp uge i32 %a, %b + // %sel = select i1 %cmp, i32 %c, i32 %d + // + // Into: + // start: + // %cmp = cmp uge i32 %a, %b + // %cmp.frozen = freeze %cmp + // br i1 %cmp.frozen, label %select.true, label %select.false + // select.true: + // br label %select.end + // select.false: + // br label %select.end + // select.end: + // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] + // + // %cmp should be frozen, otherwise it may introduce undefined behavior. + // In addition, we may sink instructions that produce %c or %d into the + // destination(s) of the new branch. + // If the true or false blocks do not contain a sunken instruction, that + // block and its branch may be optimized away. In that case, one side of the + // first branch will point directly to select.end, and the corresponding PHI + // predecessor block will be the start block. + + // Find all the instructions that can be soundly sunk to the true/false + // blocks. These are instructions that are computed solely for producing the + // operands of the select instructions in the group and can be sunk without + // breaking the semantics of the LLVM IR (e.g., cannot sink instructions + // with side effects). + SmallVector, 2> TrueSlices, FalseSlices; + typedef std::stack::size_type StackSizeType; + StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0; + for (SelectInst *SI : ASI) { + // For each select, compute the sinkable dependence chains of the true and + // false operands. + if (auto *TI = dyn_cast(SI->getTrueValue())) { + std::stack TrueSlice; + getExclBackwardsSlice(TI, TrueSlice, true); + maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size()); + TrueSlices.push_back(TrueSlice); + } + if (auto *FI = dyn_cast(SI->getFalseValue())) { + std::stack FalseSlice; + getExclBackwardsSlice(FI, FalseSlice, true); + maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size()); + FalseSlices.push_back(FalseSlice); + } + } + // In the case of multiple select instructions in the same group, the order + // of non-dependent instructions (instructions of different dependence + // slices) in the true/false blocks appears to affect performance. + // Interleaving the slices seems to experimentally be the optimal approach. + // This interleaving scheduling allows for more ILP (with a natural downside + // of increasing a bit register pressure) compared to a simple ordering of + // one whole chain after another. One would expect that this ordering would + // not matter since the scheduling in the backend of the compiler would + // take care of it, but apparently the scheduler fails to deliver optimal + // ILP with a naive ordering here. + SmallVector TrueSlicesInterleaved, FalseSlicesInterleaved; + for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) { + for (auto &S : TrueSlices) { + if (!S.empty()) { + TrueSlicesInterleaved.push_back(S.top()); + S.pop(); + } + } + } + for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) { + for (auto &S : FalseSlices) { + if (!S.empty()) { + FalseSlicesInterleaved.push_back(S.top()); + S.pop(); + } + } + } + + // We split the block containing the select(s) into two blocks. + SelectInst *SI = ASI.front(); + SelectInst *LastSI = ASI.back(); + BasicBlock *StartBlock = SI->getParent(); + BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); + BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); + BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); + // Delete the unconditional branch that was just created by the split. + StartBlock->getTerminator()->eraseFromParent(); + + // Move any debug/pseudo instructions that were in-between the select + // group to the newly-created end block. + SmallVector DebugPseudoINS; + auto DIt = SI->getIterator(); + while (&*DIt != LastSI) { + if (DIt->isDebugOrPseudoInst()) + DebugPseudoINS.push_back(&*DIt); + DIt++; + } + for (auto DI : DebugPseudoINS) { + DI->moveBefore(&*EndBlock->getFirstInsertionPt()); + } + + // These are the new basic blocks for the conditional branch. + // At least one will become an actual new basic block. + BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr; + BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr; + if (!TrueSlicesInterleaved.empty()) { + TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink", + EndBlock->getParent(), EndBlock); + TrueBranch = BranchInst::Create(EndBlock, TrueBlock); + TrueBranch->setDebugLoc(LastSI->getDebugLoc()); + for (Instruction *TrueInst : TrueSlicesInterleaved) + TrueInst->moveBefore(TrueBranch); + } + if (!FalseSlicesInterleaved.empty()) { + FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink", + EndBlock->getParent(), EndBlock); + FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + FalseBranch->setDebugLoc(LastSI->getDebugLoc()); + for (Instruction *FalseInst : FalseSlicesInterleaved) + FalseInst->moveBefore(FalseBranch); + } + // If there was nothing to sink, then arbitrarily choose the 'false' side + // for a new input value to the PHI. + if (TrueBlock == FalseBlock) { + assert(TrueBlock == nullptr && + "Unexpected basic block transform while optimizing select"); + + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", + EndBlock->getParent(), EndBlock); + auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + FalseBranch->setDebugLoc(SI->getDebugLoc()); + } + + // Insert the real conditional branch based on the original condition. + // If we did not create a new block for one of the 'true' or 'false' paths + // of the condition, it means that side of the branch goes to the end block + // directly and the path originates from the start block from the point of + // view of the new PHI. + BasicBlock *TT, *FT; + if (TrueBlock == nullptr) { + TT = EndBlock; + FT = FalseBlock; + TrueBlock = StartBlock; + } else if (FalseBlock == nullptr) { + TT = TrueBlock; + FT = EndBlock; + FalseBlock = StartBlock; + } else { + TT = TrueBlock; + FT = FalseBlock; + } + IRBuilder<> IB(SI); + auto *CondFr = + IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); + IB.CreateCondBr(CondFr, TT, FT, SI); + + SmallPtrSet INS; + INS.insert(ASI.begin(), ASI.end()); + // Use reverse iterator because later select may use the value of the + // earlier select, and we need to propagate value through earlier select + // to get the PHI operand. + for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { + SelectInst *SI = *It; + // The select itself is replaced with a PHI Node. + PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); + PN->takeName(SI); + PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); + PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); + PN->setDebugLoc(SI->getDebugLoc()); + + SI->replaceAllUsesWith(PN); + SI->eraseFromParent(); + INS.erase(SI); + ++NumSelectsConverted; + } + } +} + +void SelectOptimize::collectSelectGroups(BasicBlock &BB, + SelectGroups &SIGroups) { + BasicBlock::iterator BBIt = BB.begin(); + while (BBIt != BB.end()) { + Instruction *I = &*BBIt++; + if (SelectInst *SI = dyn_cast(I)) { + SelectGroup SIGroup; + SIGroup.push_back(SI); + while (BBIt != BB.end()) { + Instruction *NI = &*BBIt; + SelectInst *NSI = dyn_cast(NI); + if (NSI && SI->getCondition() == NSI->getCondition()) { + SIGroup.push_back(NSI); + } else if (!NI->isDebugOrPseudoInst()) { + // Debug/pseudo instructions should be skipped and not prevent the + // formation of a select group. + break; + } + ++BBIt; + } + + // If the select type is not supported, no point optimizing it. + // Instruction selection will take care of it. + if (!isSelectKindSupported(SI)) + continue; + + SIGroups.push_back(SIGroup); + } + } +} + +void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups, + SelectGroups &ProfSIGroups) { + for (SelectGroup &ASI : SIGroups) { + ++NumSelectOptAnalyzed; + if (isConvertToBranchProfitableBase(ASI)) + ProfSIGroups.push_back(ASI); + } +} + +void SelectOptimize::findProfitableSIGroupsInnerLoops( + const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { + NumSelectOptAnalyzed += SIGroups.size(); + // For each select group in an inner-most loop, + // a branch is more preferable than a select/conditional-move if: + // i) conversion to branches for all the select groups of the loop satisfies + // loop-level heuristics including reducing the loop's critical path by + // some threshold (see SelectOptimize::checkLoopHeuristics); and + // ii) the total cost of the select group is cheaper with a branch compared + // to its predicated version. The cost is in terms of latency and the cost + // of a select group is the cost of its most expensive select instruction + // (assuming infinite resources and thus fully leveraging available ILP). + + DenseMap InstCostMap; + CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()}, + {Scaled64::getZero(), Scaled64::getZero()}}; + if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) || + !checkLoopHeuristics(L, LoopCost)) { + return; + } + + for (SelectGroup &ASI : SIGroups) { + // Assuming infinite resources, the cost of a group of instructions is the + // cost of the most expensive instruction of the group. + Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero(); + for (SelectInst *SI : ASI) { + SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost); + BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost); + } + if (BranchCost < SelectCost) { + OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front()); + OR << "Profitable to convert to branch (loop analysis). BranchCost=" + << BranchCost.toString() << ", SelectCost=" << SelectCost.toString() + << ". "; + ORE->emit(OR); + ++NumSelectConvertedLoop; + ProfSIGroups.push_back(ASI); + } else { + OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front()); + ORmiss << "Select is more profitable (loop analysis). BranchCost=" + << BranchCost.toString() + << ", SelectCost=" << SelectCost.toString() << ". "; + ORE->emit(ORmiss); + } + } +} + +bool SelectOptimize::isConvertToBranchProfitableBase( + const SmallVector &ASI) { + SelectInst *SI = ASI.front(); + OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI); + OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI); + + // Skip cold basic blocks. Better to optimize for size for cold blocks. + if (PSI->isColdBlock(SI->getParent(), BFI.get())) { + ++NumSelectColdBB; + ORmiss << "Not converted to branch because of cold basic block. "; + ORE->emit(ORmiss); + return false; + } + + // If unpredictable, branch form is less profitable. + if (SI->getMetadata(LLVMContext::MD_unpredictable)) { + ++NumSelectUnPred; + ORmiss << "Not converted to branch because of unpredictable branch. "; + ORE->emit(ORmiss); + return false; + } + + // If highly predictable, branch form is more profitable, unless a + // predictable select is inexpensive in the target architecture. + if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) { + ++NumSelectConvertedHighPred; + OR << "Converted to branch because of highly predictable branch. "; + ORE->emit(OR); + return true; + } + + // Look for expensive instructions in the cold operand's (if any) dependence + // slice of any of the selects in the group. + if (hasExpensiveColdOperand(ASI)) { + ++NumSelectConvertedExpColdOperand; + OR << "Converted to branch because of expensive cold operand."; + ORE->emit(OR); + return true; + } + + ORmiss << "Not profitable to convert to branch (base heuristic)."; + ORE->emit(ORmiss); + return false; +} + +static InstructionCost divideNearest(InstructionCost Numerator, + uint64_t Denominator) { + return (Numerator + (Denominator / 2)) / Denominator; +} + +bool SelectOptimize::hasExpensiveColdOperand( + const SmallVector &ASI) { + bool ColdOperand = false; + uint64_t TrueWeight, FalseWeight, TotalWeight; + if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t MinWeight = std::min(TrueWeight, FalseWeight); + TotalWeight = TrueWeight + FalseWeight; + // Is there a path with frequency 100 * MinWeight; + } else if (PSI->hasProfileSummary()) { + OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front()); + ORmiss << "Profile data available but missing branch-weights metadata for " + "select instruction. "; + ORE->emit(ORmiss); + } + if (!ColdOperand) + return false; + // Check if the cold path's dependence slice is expensive for any of the + // selects of the group. + for (SelectInst *SI : ASI) { + Instruction *ColdI = nullptr; + uint64_t HotWeight; + if (TrueWeight < FalseWeight) { + ColdI = dyn_cast(SI->getTrueValue()); + HotWeight = FalseWeight; + } else { + ColdI = dyn_cast(SI->getFalseValue()); + HotWeight = TrueWeight; + } + if (ColdI) { + std::stack ColdSlice; + getExclBackwardsSlice(ColdI, ColdSlice); + InstructionCost SliceCost = 0; + while (!ColdSlice.empty()) { + SliceCost += TTI->getInstructionCost(ColdSlice.top(), + TargetTransformInfo::TCK_Latency); + ColdSlice.pop(); + } + // The colder the cold value operand of the select is the more expensive + // the cmov becomes for computing the cold value operand every time. Thus, + // the colder the cold operand is the more its cost counts. + // Get nearest integer cost adjusted for coldness. + InstructionCost AdjSliceCost = + divideNearest(SliceCost * HotWeight, TotalWeight); + if (AdjSliceCost >= + ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive) + return true; + } + } + return false; +} + +// For a given source instruction, collect its backwards dependence slice +// consisting of instructions exclusively computed for the purpose of producing +// the operands of the source instruction. As an approximation +// (sufficiently-accurate in practice), we populate this set with the +// instructions of the backwards dependence slice that only have one-use and +// form an one-use chain that leads to the source instruction. +void SelectOptimize::getExclBackwardsSlice(Instruction *I, + std::stack &Slice, + bool ForSinking) { + SmallPtrSet Visited; + std::queue Worklist; + Worklist.push(I); + while (!Worklist.empty()) { + Instruction *II = Worklist.front(); + Worklist.pop(); + + // Avoid cycles. + if (!Visited.insert(II).second) + continue; + + if (!II->hasOneUse()) + continue; + + // Cannot soundly sink instructions with side-effects. + // Terminator or phi instructions cannot be sunk. + // Avoid sinking other select instructions (should be handled separetely). + if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() || + isa(II) || isa(II))) + continue; + + // Avoid considering instructions with less frequency than the source + // instruction (i.e., avoid colder code regions of the dependence slice). + if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) + continue; + + // Eligible one-use instruction added to the dependence slice. + Slice.push(II); + + // Explore all the operands of the current instruction to expand the slice. + for (unsigned k = 0; k < II->getNumOperands(); ++k) + if (auto *OpI = dyn_cast(II->getOperand(k))) + Worklist.push(OpI); + } +} + +bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { + uint64_t TrueWeight, FalseWeight; + if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t Max = std::max(TrueWeight, FalseWeight); + uint64_t Sum = TrueWeight + FalseWeight; + if (Sum != 0) { + auto Probability = BranchProbability::getBranchProbability(Max, Sum); + if (Probability > TTI->getPredictableBranchThreshold()) + return true; + } + } + return false; +} + +bool SelectOptimize::checkLoopHeuristics(const Loop *L, + const CostInfo LoopCost[2]) { + // Loop-level checks to determine if a non-predicated version (with branches) + // of the loop is more profitable than its predicated version. + + if (DisableLoopLevelHeuristics) + return true; + + OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", + L->getHeader()->getFirstNonPHI()); + + if (LoopCost[0].NonPredCost > LoopCost[0].PredCost || + LoopCost[1].NonPredCost >= LoopCost[1].PredCost) { + ORmissL << "No select conversion in the loop due to no reduction of loop's " + "critical path. "; + ORE->emit(ORmissL); + return false; + } + + Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost, + LoopCost[1].PredCost - LoopCost[1].NonPredCost}; + + // Profitably converting to branches need to reduce the loop's critical path + // by at least some threshold (absolute gain of GainCycleThreshold cycles and + // relative gain of 12.5%). + if (Gain[1] < Scaled64::get(GainCycleThreshold) || + Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) { + Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost; + ORmissL << "No select conversion in the loop due to small reduction of " + "loop's critical path. Gain=" + << Gain[1].toString() + << ", RelativeGain=" << RelativeGain.toString() << "%. "; + ORE->emit(ORmissL); + return false; + } + + // If the loop's critical path involves loop-carried dependences, the gradient + // of the gain needs to be at least GainGradientThreshold% (defaults to 25%). + // This check ensures that the latency reduction for the loop's critical path + // keeps decreasing with sufficient rate beyond the two analyzed loop + // iterations. + if (Gain[1] > Gain[0]) { + Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) / + (LoopCost[1].PredCost - LoopCost[0].PredCost); + if (GradientGain < Scaled64::get(GainGradientThreshold)) { + ORmissL << "No select conversion in the loop due to small gradient gain. " + "GradientGain=" + << GradientGain.toString() << "%. "; + ORE->emit(ORmissL); + return false; + } + } + // If the gain decreases it is not profitable to convert. + else if (Gain[1] < Gain[0]) { + ORmissL + << "No select conversion in the loop due to negative gradient gain. "; + ORE->emit(ORmissL); + return false; + } + + // Non-predicated version of the loop is more profitable than its + // predicated version. + return true; +} + +// Computes instruction and loop-critical-path costs for both the predicated +// and non-predicated version of the given loop. +// Returns false if unable to compute these costs due to invalid cost of loop +// instruction(s). +bool SelectOptimize::computeLoopCosts( + const Loop *L, const SelectGroups &SIGroups, + DenseMap &InstCostMap, CostInfo *LoopCost) { + const auto &SIset = getSIset(SIGroups); + // Compute instruction and loop-critical-path costs across two iterations for + // both predicated and non-predicated version. + const unsigned Iterations = 2; + for (unsigned Iter = 0; Iter < Iterations; ++Iter) { + // Cost of the loop's critical path. + CostInfo &MaxCost = LoopCost[Iter]; + for (BasicBlock *BB : L->getBlocks()) { + for (const Instruction &I : *BB) { + if (I.isDebugOrPseudoInst()) + continue; + // Compute the predicated and non-predicated cost of the instruction. + Scaled64 IPredCost = Scaled64::getZero(), + INonPredCost = Scaled64::getZero(); + + // Assume infinite resources that allow to fully exploit the available + // instruction-level parallelism. + // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost) + for (const Use &U : I.operands()) { + auto UI = dyn_cast(U.get()); + if (!UI) + continue; + if (InstCostMap.count(UI)) { + IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost); + INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost); + } + } + auto ILatency = computeInstCost(&I); + if (!ILatency) { + OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I); + ORmissL << "Invalid instruction cost preventing analysis and " + "optimization of the inner-most loop containing this " + "instruction. "; + ORE->emit(ORmissL); + return false; + } + IPredCost += Scaled64::get(ILatency.getValue()); + INonPredCost += Scaled64::get(ILatency.getValue()); + + // For a select that can be converted to branch, + // compute its cost as a branch (non-predicated cost). + // + // BranchCost = PredictedPathCost + MispredictCost + // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb + // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate + if (SIset.contains(&I)) { + auto SI = dyn_cast(&I); + + Scaled64 TrueOpCost = Scaled64::getZero(), + FalseOpCost = Scaled64::getZero(); + if (auto *TI = dyn_cast(SI->getTrueValue())) + if (InstCostMap.count(TI)) + TrueOpCost = InstCostMap[TI].NonPredCost; + if (auto *FI = dyn_cast(SI->getFalseValue())) + if (InstCostMap.count(FI)) + FalseOpCost = InstCostMap[FI].NonPredCost; + Scaled64 PredictedPathCost = + getPredictedPathCost(TrueOpCost, FalseOpCost, SI); + + Scaled64 CondCost = Scaled64::getZero(); + if (auto *CI = dyn_cast(SI->getCondition())) + if (InstCostMap.count(CI)) + CondCost = InstCostMap[CI].NonPredCost; + Scaled64 MispredictCost = getMispredictionCost(SI, CondCost); + + INonPredCost = PredictedPathCost + MispredictCost; + } + + InstCostMap[&I] = {IPredCost, INonPredCost}; + MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost); + MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost); + } + } + } + return true; +} + +SmallPtrSet +SelectOptimize::getSIset(const SelectGroups &SIGroups) { + SmallPtrSet SIset; + for (const SelectGroup &ASI : SIGroups) + for (const SelectInst *SI : ASI) + SIset.insert(SI); + return SIset; +} + +Optional SelectOptimize::computeInstCost(const Instruction *I) { + InstructionCost ICost = + TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); + if (auto OC = ICost.getValue()) + return Optional(*OC); + return Optional(None); +} + +ScaledNumber +SelectOptimize::getMispredictionCost(const SelectInst *SI, + const Scaled64 CondCost) { + uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; + + // Account for the default misprediction rate when using a branch + // (conservatively set to 25% by default). + uint64_t MispredictRate = MispredictDefaultRate; + // If the select condition is obviously predictable, then the misprediction + // rate is zero. + if (isSelectHighlyPredictable(SI)) + MispredictRate = 0; + + // CondCost is included to account for cases where the computation of the + // condition is part of a long dependence chain (potentially loop-carried) + // that would delay detection of a misprediction and increase its cost. + Scaled64 MispredictCost = + std::max(Scaled64::get(MispredictPenalty), CondCost) * + Scaled64::get(MispredictRate); + MispredictCost /= Scaled64::get(100); + + return MispredictCost; +} + +// Returns the cost of a branch when the prediction is correct. +// TrueCost * TrueProbability + FalseCost * FalseProbability. +ScaledNumber +SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, + const SelectInst *SI) { + Scaled64 PredPathCost; + uint64_t TrueWeight, FalseWeight; + if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t SumWeight = TrueWeight + FalseWeight; + if (SumWeight != 0) { + PredPathCost = TrueCost * Scaled64::get(TrueWeight) + + FalseCost * Scaled64::get(FalseWeight); + PredPathCost /= Scaled64::get(SumWeight); + return PredPathCost; + } + } + // Without branch weight metadata, we assume 75% for the one path and 25% for + // the other, and pick the result with the biggest cost. + PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost, + FalseCost * Scaled64::get(3) + TrueCost); + PredPathCost /= Scaled64::get(4); + return PredPathCost; +} + +bool SelectOptimize::isSelectKindSupported(SelectInst *SI) { + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); + if (VectorCond) + return false; + TargetLowering::SelectSupportKind SelectKind; + if (SI->getType()->isVectorTy()) + SelectKind = TargetLowering::ScalarCondVectorVal; + else + SelectKind = TargetLowering::ScalarValSelect; + return TLI->isSelectSupported(SelectKind); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 041d7e5b4a4a..aa688d9dda3c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -35,7 +35,6 @@ #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -52,7 +51,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" @@ -426,6 +424,7 @@ namespace { SDValue visitREM(SDNode *N); SDValue visitMULHU(SDNode *N); SDValue visitMULHS(SDNode *N); + SDValue visitAVG(SDNode *N); SDValue visitSMUL_LOHI(SDNode *N); SDValue visitUMUL_LOHI(SDNode *N); SDValue visitMULO(SDNode *N); @@ -511,6 +510,7 @@ namespace { SDValue visitMSCATTER(SDNode *N); SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); + SDValue visitFP_TO_BF16(SDNode *N); SDValue visitVECREDUCE(SDNode *N); SDValue visitVPOp(SDNode *N); @@ -520,7 +520,9 @@ namespace { SDValue XformToShuffleWithZero(SDNode *N); bool reassociationCanBreakAddressingModePattern(unsigned Opc, - const SDLoc &DL, SDValue N0, + const SDLoc &DL, + SDNode *N, + SDValue N0, SDValue N1); SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1); @@ -570,6 +572,8 @@ namespace { SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); + SDValue BuildSREMPow2(SDNode *N); + SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N); SDValue BuildLogBase2(SDValue V, const SDLoc &DL); SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); @@ -583,11 +587,11 @@ namespace { bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, - SDValue InnerPos, SDValue InnerNeg, + SDValue InnerPos, SDValue InnerNeg, bool HasPos, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg, - SDValue InnerPos, SDValue InnerNeg, + SDValue InnerPos, SDValue InnerNeg, bool HasPos, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); @@ -665,9 +669,8 @@ namespace { /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). /// MulNode is the original multiply, AddNode is (add x, c1), /// and ConstNode is c2. - bool isMulAddWithConstProfitable(SDNode *MulNode, - SDValue &AddNode, - SDValue &ConstNode); + bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode, + SDValue ConstNode); /// This is a helper function for visitAND and visitZERO_EXTEND. Returns /// true if the (and (load x) c) pattern matches an extload. ExtVT returns @@ -880,8 +883,8 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) { // We provide an Offset so that we can create bitwidths that won't overflow. static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth()); - LHS = LHS.zextOrSelf(Bits); - RHS = RHS.zextOrSelf(Bits); + LHS = LHS.zext(Bits); + RHS = RHS.zext(Bits); } // Return true if this node is a setcc, or is a select_cc @@ -926,7 +929,7 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, /// it is profitable to do so. bool DAGCombiner::isOneUseSetCC(SDValue N) const { SDValue N0, N1, N2; - if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) + if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse()) return true; return false; } @@ -996,6 +999,7 @@ static bool canSplitIdx(LoadSDNode *LD) { bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, const SDLoc &DL, + SDNode *N, SDValue N0, SDValue N1) { // Currently this only tries to ensure we don't undo the GEP splits done by @@ -1004,33 +1008,62 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, // (load/store (add, (add, x, offset1), offset2)) -> // (load/store (add, x, offset1+offset2)). - if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD) - return false; + // (load/store (add, (add, x, y), offset2)) -> + // (load/store (add, (add, x, offset2), y)). - if (N0.hasOneUse()) + if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD) return false; - auto *C1 = dyn_cast(N0.getOperand(1)); auto *C2 = dyn_cast(N1); - if (!C1 || !C2) + if (!C2) return false; - const APInt &C1APIntVal = C1->getAPIntValue(); const APInt &C2APIntVal = C2->getAPIntValue(); - if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64) + if (C2APIntVal.getSignificantBits() > 64) return false; - const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal; - if (CombinedValueIntVal.getBitWidth() > 64) - return false; - const int64_t CombinedValue = CombinedValueIntVal.getSExtValue(); - - for (SDNode *Node : N0->uses()) { - auto LoadStore = dyn_cast(Node); - if (LoadStore) { - // Is x[offset2] already not a legal addressing mode? If so then - // reassociating the constants breaks nothing (we test offset2 because - // that's the one we hope to fold into the load or store). + if (auto *C1 = dyn_cast(N0.getOperand(1))) { + if (N0.hasOneUse()) + return false; + + const APInt &C1APIntVal = C1->getAPIntValue(); + const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal; + if (CombinedValueIntVal.getSignificantBits() > 64) + return false; + const int64_t CombinedValue = CombinedValueIntVal.getSExtValue(); + + for (SDNode *Node : N->uses()) { + if (auto *LoadStore = dyn_cast(Node)) { + // Is x[offset2] already not a legal addressing mode? If so then + // reassociating the constants breaks nothing (we test offset2 because + // that's the one we hope to fold into the load or store). + TargetLoweringBase::AddrMode AM; + AM.HasBaseReg = true; + AM.BaseOffs = C2APIntVal.getSExtValue(); + EVT VT = LoadStore->getMemoryVT(); + unsigned AS = LoadStore->getAddressSpace(); + Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) + continue; + + // Would x[offset1+offset2] still be a legal addressing mode? + AM.BaseOffs = CombinedValue; + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) + return true; + } + } + } else { + if (auto *GA = dyn_cast(N0.getOperand(1))) + if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA)) + return false; + + for (SDNode *Node : N->uses()) { + auto *LoadStore = dyn_cast(Node); + if (!LoadStore) + return false; + + // Is x[offset2] a legal addressing mode? If so then + // reassociating the constants breaks address pattern TargetLoweringBase::AddrMode AM; AM.HasBaseReg = true; AM.BaseOffs = C2APIntVal.getSExtValue(); @@ -1038,13 +1071,9 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, unsigned AS = LoadStore->getAddressSpace(); Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) - continue; - - // Would x[offset1+offset2] still be a legal addressing mode? - AM.BaseOffs = CombinedValue; - if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) - return true; + return false; } + return true; } return false; @@ -1072,11 +1101,51 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, if (TLI.isReassocProfitable(DAG, N0, N1)) { // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) // iff (op x, c1) has one use - if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1)) - return DAG.getNode(Opc, DL, VT, OpNode, N01); - return SDValue(); + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1); + return DAG.getNode(Opc, DL, VT, OpNode, N01); + } + } + + // Check for repeated operand logic simplifications. + if (Opc == ISD::AND || Opc == ISD::OR) { + // (N00 & N01) & N00 --> N00 & N01 + // (N00 & N01) & N01 --> N00 & N01 + // (N00 | N01) | N00 --> N00 | N01 + // (N00 | N01) | N01 --> N00 | N01 + if (N1 == N00 || N1 == N01) + return N0; + } + if (Opc == ISD::XOR) { + // (N00 ^ N01) ^ N00 --> N01 + if (N1 == N00) + return N01; + // (N00 ^ N01) ^ N01 --> N00 + if (N1 == N01) + return N00; + } + + if (TLI.isReassocProfitable(DAG, N0, N1)) { + if (N1 != N01) { + // Reassociate if (op N00, N1) already exist + if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) { + // if Op (Op N00, N1), N01 already exist + // we need to stop reassciate to avoid dead loop + if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01})) + return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01); + } + } + + if (N1 != N00) { + // Reassociate if (op N01, N1) already exist + if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) { + // if Op (Op N01, N1), N00 already exist + // we need to stop reassciate to avoid dead loop + if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00})) + return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00); + } } } + return SDValue(); } @@ -1103,7 +1172,7 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: "; - To[0].getNode()->dump(&DAG); + To[0].dump(&DAG); dbgs() << " and " << NumTo - 1 << " other values\n"); for (unsigned i = 0, e = NumTo; i != e; ++i) assert((!To[i].getNode() || @@ -1115,10 +1184,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, if (AddTo) { // Push the new nodes and any users onto the worklist for (unsigned i = 0, e = NumTo; i != e; ++i) { - if (To[i].getNode()) { - AddToWorklist(To[i].getNode()); - AddUsersToWorklist(To[i].getNode()); - } + if (To[i].getNode()) + AddToWorklistWithUsers(To[i].getNode()); } } @@ -1134,9 +1201,8 @@ void DAGCombiner:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Replace the old value with the new one. ++NodesCombined; - LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); - dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG); + dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n'); // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. @@ -1149,7 +1215,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. - if (TLO.Old.getNode()->use_empty()) + if (TLO.Old->use_empty()) deleteAndRecombine(TLO.Old.getNode()); } @@ -1196,7 +1262,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: "; - Trunc.getNode()->dump(&DAG); dbgs() << '\n'); + Trunc.dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); @@ -1295,7 +1361,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG)); bool Replace0 = false; SDValue N0 = Op.getOperand(0); @@ -1322,7 +1388,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { // If operands have a use ordering, make sure we deal with // predecessor first. - if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) { + if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) { std::swap(N0, N1); std::swap(NN0, NN1); } @@ -1363,11 +1429,10 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG)); bool Replace = false; SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); if (Opc == ISD::SRA) N0 = SExtPromoteOperand(N0, PVT); else if (Opc == ISD::SRL) @@ -1379,6 +1444,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { return SDValue(); SDLoc DL(Op); + SDValue N1 = Op.getOperand(1); SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1)); @@ -1414,7 +1480,7 @@ SDValue DAGCombiner::PromoteExtend(SDValue Op) { // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) - LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG)); return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); } return SDValue(); @@ -1455,7 +1521,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: "; - Result.getNode()->dump(&DAG); dbgs() << '\n'); + Result.dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); @@ -1569,9 +1635,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG)); - if (N->getNumValues() == RV.getNode()->getNumValues()) + if (N->getNumValues() == RV->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); else { assert(N->getValueType(0) == RV.getValueType() && @@ -1635,6 +1701,10 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::UREM: return visitREM(N); case ISD::MULHU: return visitMULHU(N); case ISD::MULHS: return visitMULHS(N); + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: + case ISD::AVGCEILS: + case ISD::AVGCEILU: return visitAVG(N); case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); case ISD::SMULO: @@ -1724,6 +1794,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::LIFETIME_END: return visitLIFETIME_END(N); case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); + case ISD::FP_TO_BF16: return visitFP_TO_BF16(N); case ISD::FREEZE: return visitFREEZE(N); case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: @@ -2072,8 +2143,9 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, return false; VT = ST->getMemoryVT(); AS = ST->getAddressSpace(); - } else + } else { return false; + } TargetLowering::AddrMode AM; if (N->getOpcode() == ISD::ADD) { @@ -2094,17 +2166,100 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, else // [reg +/- reg] AM.Scale = 1; - } else + } else { return false; + } return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, VT.getTypeForEVT(*DAG.getContext()), AS); } +/// This inverts a canonicalization in IR that replaces a variable select arm +/// with an identity constant. Codegen improves if we re-use the variable +/// operand rather than load a constant. This can also be converted into a +/// masked vector operation if the target supports it. +static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, + bool ShouldCommuteOperands) { + // Match a select as operand 1. The identity constant that we are looking for + // is only valid as operand 1 of a non-commutative binop. + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (ShouldCommuteOperands) + std::swap(N0, N1); + + // TODO: Should this apply to scalar select too? + if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT) + return SDValue(); + + unsigned Opcode = N->getOpcode(); + EVT VT = N->getValueType(0); + SDValue Cond = N1.getOperand(0); + SDValue TVal = N1.getOperand(1); + SDValue FVal = N1.getOperand(2); + + // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity(). + // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()". + // TODO: With fast-math (NSZ), allow the opposite-sign form of zero? + auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) { + if (ConstantFPSDNode *C = isConstOrConstSplatFP(V)) { + switch (Opcode) { + case ISD::FADD: // X + -0.0 --> X + return C->isZero() && C->isNegative(); + case ISD::FSUB: // X - 0.0 --> X + return C->isZero() && !C->isNegative(); + case ISD::FMUL: // X * 1.0 --> X + case ISD::FDIV: // X / 1.0 --> X + return C->isExactlyValue(1.0); + } + } + if (ConstantSDNode *C = isConstOrConstSplat(V)) { + switch (Opcode) { + case ISD::ADD: // X + 0 --> X + case ISD::SUB: // X - 0 --> X + case ISD::SHL: // X << 0 --> X + case ISD::SRA: // X s>> 0 --> X + case ISD::SRL: // X u>> 0 --> X + return C->isZero(); + case ISD::MUL: // X * 1 --> X + return C->isOne(); + } + } + return false; + }; + + // This transform increases uses of N0, so freeze it to be safe. + // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal) + if (isIdentityConstantForOpcode(Opcode, TVal)) { + SDValue F0 = DAG.getFreeze(N0); + SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags()); + return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO); + } + // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0 + if (isIdentityConstantForOpcode(Opcode, FVal)) { + SDValue F0 = DAG.getFreeze(N0); + SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags()); + return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0); + } + + return SDValue(); +} + SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && "Unexpected binary operator"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto BinOpcode = BO->getOpcode(); + EVT VT = BO->getValueType(0); + if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) { + if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false)) + return Sel; + + if (TLI.isCommutativeBinOp(BO->getOpcode())) + if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true)) + return Sel; + } + // Don't do this unless the old select is going away. We want to eliminate the // binary operator, not replace a binop with a select. // TODO: Handle ISD::SELECT_CC. @@ -2133,7 +2288,6 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { // propagate non constant operands into select. I.e.: // and (select Cond, 0, -1), X --> select Cond, 0, X // or X, (select Cond, -1, 0) --> select Cond, -1, X - auto BinOpcode = BO->getOpcode(); bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) && @@ -2145,8 +2299,6 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { !DAG.isConstantFPBuildVectorOrConstantFP(CBO)) return SDValue(); - EVT VT = BO->getValueType(0); - // We have a select-of-constants followed by a binary operator with a // constant. Eliminate the binop by pulling the constant math into the select. // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO @@ -2249,6 +2401,15 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static bool isADDLike(SDValue V, const SelectionDAG &DAG) { + unsigned Opcode = V.getOpcode(); + if (Opcode == ISD::OR) + return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1)); + if (Opcode == ISD::XOR) + return isMinSignedConstant(V.getOperand(1)); + return false; +} + /// Try to fold a node that behaves like an ADD (note that N isn't necessarily /// an ISD::ADD here, it could for example be an ISD::OR if we know that there /// are no common bits set in the operands). @@ -2287,66 +2448,60 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { if (isNullConstant(N1)) return N0; - if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { + if (N0.getOpcode() == ISD::SUB) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + // fold ((A-c1)+c2) -> (A+(c2-c1)) - if (N0.getOpcode() == ISD::SUB && - isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) { - SDValue Sub = - DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)}); - assert(Sub && "Constant folding failed"); + if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01})) return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub); - } // fold ((c1-A)+c2) -> (c1+c2)-A - if (N0.getOpcode() == ISD::SUB && - isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { - SDValue Add = - DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)}); - assert(Add && "Constant folding failed"); + if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00})) return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); - } + } - // add (sext i1 X), 1 -> zext (not i1 X) - // We don't transform this pattern: - // add (zext i1 X), -1 -> sext (not i1 X) - // because most (?) targets generate better code for the zext form. - if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && - isOneOrOneSplat(N1)) { - SDValue X = N0.getOperand(0); - if ((!LegalOperations || - (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && - TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && - X.getScalarValueSizeInBits() == 1) { - SDValue Not = DAG.getNOT(DL, X, X.getValueType()); - return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); - } + // add (sext i1 X), 1 -> zext (not i1 X) + // We don't transform this pattern: + // add (zext i1 X), -1 -> sext (not i1 X) + // because most (?) targets generate better code for the zext form. + if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && + isOneOrOneSplat(N1)) { + SDValue X = N0.getOperand(0); + if ((!LegalOperations || + (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && + TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && + X.getScalarValueSizeInBits() == 1) { + SDValue Not = DAG.getNOT(DL, X, X.getValueType()); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); } + } - // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is - // equivalent to (add x, c0). - if (N0.getOpcode() == ISD::OR && - isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) && - DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { - if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, - {N1, N0.getOperand(1)})) - return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); - } + // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) + // iff (or x, c0) is equivalent to (add x, c0). + // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1)) + // iff (xor x, c0) is equivalent to (add x, c0). + if (isADDLike(N0, DAG)) { + SDValue N01 = N0.getOperand(1); + if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01})) + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add); } if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // reassociate add - if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) { + if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) { if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) return RADD; // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is // equivalent to (add x, c). + // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is + // equivalent to (add x, c). auto ReassociateAddOr = [&](SDValue N0, SDValue N1) { - if (N0.getOpcode() == ISD::OR && N0.hasOneUse() && - isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) && - DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { + if (isADDLike(N0, DAG) && N0.hasOneUse() && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) { return DAG.getNode(ISD::ADD, DL, VT, DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), N0.getOperand(1)); @@ -2406,7 +2561,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { N1.getOperand(1)); // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant - if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && + N0->hasOneUse() && N1->hasOneUse()) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); SDValue N10 = N1.getOperand(0); @@ -2459,8 +2615,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // add (add x, y), 1 // And if the target does not like this form then turn into: // sub y, (xor x, -1) - if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() && - N0.getOpcode() == ISD::ADD) { + if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD && + N0.hasOneUse()) { SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), DAG.getAllOnesConstant(DL, VT)); return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not); @@ -2468,7 +2624,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { } // (x - y) + -1 -> add (xor y, -1), x - if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isAllOnesOrAllOnesSplat(N1)) { SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1); return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); @@ -2565,7 +2721,8 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) { // fold vector ops if (VT.isVector()) { - // TODO SimplifyVBinOp + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; // fold (add_sat x, 0) -> x, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) @@ -2611,7 +2768,7 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO) return SDValue(); - EVT VT = V.getNode()->getValueType(0); + EVT VT = V->getValueType(0); if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT)) return SDValue(); @@ -2664,27 +2821,27 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, // add (add x, 1), y // And if the target does not like this form then turn into: // sub y, (xor x, -1) - if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() && - N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) { + if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD && + N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) { SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), DAG.getAllOnesConstant(DL, VT)); return DAG.getNode(ISD::SUB, DL, VT, N1, Not); } - // Hoist one-use subtraction by non-opaque constant: - // (x - C) + y -> (x + y) - C - // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. - if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && - isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { - SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1); - return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); - } - // Hoist one-use subtraction from non-opaque constant: - // (C - x) + y -> (y - x) + C - if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && - isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { - SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); - return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0)); + if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) { + // Hoist one-use subtraction by non-opaque constant: + // (x - C) + y -> (x + y) - C + // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. + if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); + } + // Hoist one-use subtraction from non-opaque constant: + // (C - x) + y -> (y - x) + C + if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0)); + } } // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1' @@ -3060,21 +3217,26 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with // a single path for carry/borrow out propagation: static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, - SDValue Carry0, SDValue Carry1, SDNode *N) { - if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1) + SDValue N0, SDValue N1, SDNode *N) { + SDValue Carry0 = getAsCarry(TLI, N0); + if (!Carry0) return SDValue(); + SDValue Carry1 = getAsCarry(TLI, N1); + if (!Carry1) + return SDValue(); + unsigned Opcode = Carry0.getOpcode(); if (Opcode != Carry1.getOpcode()) return SDValue(); if (Opcode != ISD::UADDO && Opcode != ISD::USUBO) return SDValue(); - // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the - // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in - // the above ASCII art.) - if (Carry1.getOperand(0) != Carry0.getValue(0) && - Carry1.getOperand(1) != Carry0.getValue(0)) + // Canonicalize the add/sub of A and B (the top node in the above ASCII art) + // as Carry0 and the add/sub of the carry in as Carry1 (the middle node). + if (Carry1.getNode()->isOperandOf(Carry0.getNode())) std::swap(Carry0, Carry1); + + // Check if nodes are connected in expected way. if (Carry1.getOperand(0) != Carry0.getValue(0) && Carry1.getOperand(1) != Carry0.getValue(0)) return SDValue(); @@ -3254,9 +3416,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { EVT VT = N0.getValueType(); SDLoc DL(N); + auto PeekThroughFreeze = [](SDValue N) { + if (N->getOpcode() == ISD::FREEZE && N.hasOneUse()) + return N->getOperand(0); + return N; + }; + // fold (sub x, x) -> 0 // FIXME: Refactor this and xor and other similar operations together. - if (N0 == N1) + if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1)) return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); // fold (sub c1, c2) -> c3 @@ -3314,7 +3482,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } // Convert 0 - abs(x). - if (N1->getOpcode() == ISD::ABS && + if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::ABS, VT)) if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true)) return Result; @@ -3352,44 +3520,31 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return N0.getOperand(0); // fold (A+C1)-C2 -> A+(C1-C2) - if (N0.getOpcode() == ISD::ADD && - isConstantOrConstantVector(N1, /* NoOpaques */ true) && - isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { - SDValue NewC = - DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1}); - assert(NewC && "Constant folding failed"); - return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC); + if (N0.getOpcode() == ISD::ADD) { + SDValue N01 = N0.getOperand(1); + if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1})) + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC); } // fold C2-(A+C1) -> (C2-C1)-A if (N1.getOpcode() == ISD::ADD) { SDValue N11 = N1.getOperand(1); - if (isConstantOrConstantVector(N0, /* NoOpaques */ true) && - isConstantOrConstantVector(N11, /* NoOpaques */ true)) { - SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}); - assert(NewC && "Constant folding failed"); + if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11})) return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); - } } // fold (A-C1)-C2 -> A-(C1+C2) - if (N0.getOpcode() == ISD::SUB && - isConstantOrConstantVector(N1, /* NoOpaques */ true) && - isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { - SDValue NewC = - DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1}); - assert(NewC && "Constant folding failed"); - return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC); + if (N0.getOpcode() == ISD::SUB) { + SDValue N01 = N0.getOperand(1); + if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1})) + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC); } // fold (c1-A)-c2 -> (c1-c2)-A - if (N0.getOpcode() == ISD::SUB && - isConstantOrConstantVector(N1, /* NoOpaques */ true) && - isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) { - SDValue NewC = - DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1}); - assert(NewC && "Constant folding failed"); - return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1)); + if (N0.getOpcode() == ISD::SUB) { + SDValue N00 = N0.getOperand(0); + if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1})) + return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1)); } // fold ((A+(B+or-C))-B) -> A+or-C @@ -3584,6 +3739,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + // As with the previous fold, prefer add for more folding potential. + // Subtracting SMIN/0 is the same as adding SMIN/0: + // N0 - (X << BW-1) --> N0 + (X << BW-1) + if (N1.getOpcode() == ISD::SHL) { + ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1)); + if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1) + return DAG.getNode(ISD::ADD, DL, VT, N1, N0); + } + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) { // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry) if (SDValue Carry = getAsCarry(TLI, N0)) { @@ -3619,7 +3783,8 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) { // fold vector ops if (VT.isVector()) { - // TODO SimplifyVBinOp + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; // fold (sub_sat x, 0) -> x, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) @@ -3770,19 +3935,20 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + SDLoc DL(N); // fold (mul x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // fold (mul c1, c2) -> c1*c2 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1})) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS (vector doesn't have to splat) if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); + return DAG.getNode(ISD::MUL, DL, VT, N1, N0); bool N1IsConst = false; bool N1IsOpaqueConst = false; @@ -3790,7 +3956,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); @@ -3817,17 +3983,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { return NewSel; // fold (mul x, -1) -> 0-x - if (N1IsConst && ConstValue1.isAllOnes()) { - SDLoc DL(N); + if (N1IsConst && ConstValue1.isAllOnes()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); - } // fold (mul x, (1 << c)) -> x << c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N1) && (!VT.isVector() || Level <= AfterLegalizeVectorOps)) { - SDLoc DL(N); SDValue LogBase2 = BuildLogBase2(N1, DL); EVT ShiftVT = getShiftAmountTy(N0.getValueType()); SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); @@ -3837,7 +4000,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) { unsigned Log2Val = (-ConstValue1).logBase2(); - SDLoc DL(N); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. return DAG.getNode(ISD::SUB, DL, VT, @@ -3882,7 +4044,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { ShAmt += TZeros; assert(ShAmt < VT.getScalarSizeInBits() && "multiply-by-constant generated out of bounds shift"); - SDLoc DL(N); SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT)); SDValue R = @@ -3897,12 +4058,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // (mul (shl X, c1), c2) -> (mul X, c2 << c1) - if (N0.getOpcode() == ISD::SHL && - isConstantOrConstantVector(N1, /* NoOpaques */ true) && - isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { - SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); - if (isConstantOrConstantVector(C3)) - return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); + if (N0.getOpcode() == ISD::SHL) { + SDValue N01 = N0.getOperand(1); + if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01})) + return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3); } // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one @@ -3912,18 +4071,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). if (N0.getOpcode() == ISD::SHL && - isConstantOrConstantVector(N0.getOperand(1)) && - N0.getNode()->hasOneUse()) { + isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) { Sh = N0; Y = N1; } else if (N1.getOpcode() == ISD::SHL && isConstantOrConstantVector(N1.getOperand(1)) && - N1.getNode()->hasOneUse()) { + N1->hasOneUse()) { Sh = N1; Y = N0; } if (Sh.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y); + return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1)); } } @@ -3932,18 +4090,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0.getOpcode() == ISD::ADD && DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && isMulAddWithConstProfitable(N, N0, N1)) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, - DAG.getNode(ISD::MUL, SDLoc(N0), VT, - N0.getOperand(0), N1), - DAG.getNode(ISD::MUL, SDLoc(N1), VT, - N0.getOperand(1), N1)); + return DAG.getNode( + ISD::ADD, DL, VT, + DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1), + DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)). if (N0.getOpcode() == ISD::VSCALE) if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) { const APInt &C0 = N0.getConstantOperandAPInt(0); const APInt &C1 = NC1->getAPIntValue(); - return DAG.getVScale(SDLoc(N), VT, C0 * C1); + return DAG.getVScale(DL, VT, C0 * C1); } // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)). @@ -3952,7 +4109,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) { const APInt &C0 = N0.getConstantOperandAPInt(0); APInt NewStep = C0 * MulVal; - return DAG.getStepVector(SDLoc(N), VT, NewStep); + return DAG.getStepVector(DL, VT, NewStep); } // Fold ((mul x, 0/undef) -> 0, @@ -3974,7 +4131,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) && ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) { assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector"); - SDLoc DL(N); EVT LegalSVT = N1.getOperand(0).getValueType(); SDValue Zero = DAG.getConstant(0, DL, LegalSVT); SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT); @@ -3987,7 +4143,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // reassociate mul - if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags())) + if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags())) return RMUL; return SDValue(); @@ -4050,7 +4206,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); SDValue combined; - for (SDNode *User : Op0.getNode()->uses()) { + for (SDNode *User : Op0->uses()) { if (User == Node || User->getOpcode() == ISD::DELETED_NODE || User->use_empty()) continue; @@ -4190,12 +4346,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { - SDLoc DL(N); - EVT VT = N->getValueType(0); - EVT CCVT = getSetCCResultType(VT); - unsigned BitWidth = VT.getScalarSizeInBits(); - +static bool isDivisorPowerOfTwo(SDValue Divisor) { // Helper for determining whether a value is a power-2 constant scalar or a // vector of such elements. auto IsPowerOfTwo = [](ConstantSDNode *C) { @@ -4208,11 +4359,20 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { return false; }; + return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo); +} + +SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(VT); + unsigned BitWidth = VT.getScalarSizeInBits(); + // fold (sdiv X, pow2) -> simple ops after legalize // FIXME: We check for the exact bit here because the generic lowering gives // better results in that case. The target-specific lowering should learn how // to handle exact sdivs efficiently. - if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) { + if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) { // Target-specific implementation of sdiv x, pow2. if (SDValue Res = BuildSDIVPow2(N)) return Res; @@ -4368,6 +4528,16 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { return SDValue(); } +SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) { + if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) && + !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) { + // Target-specific implementation of srem x, pow2. + if (SDValue Res = BuildSREMPow2(N)) + return Res; + } + return SDValue(); +} + // handles ISD::SREM and ISD::UREM SDValue DAGCombiner::visitREM(SDNode *N) { unsigned Opcode = N->getOpcode(); @@ -4384,10 +4554,13 @@ SDValue DAGCombiner::visitREM(SDNode *N) { if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; - // fold (urem X, -1) -> select(X == -1, 0, x) - if (!isSigned && N1C && N1C->isAllOnes()) - return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), - DAG.getConstant(0, DL, VT), N0); + // fold (urem X, -1) -> select(FX == -1, 0, FX) + // Freeze the numerator to avoid a miscompile with an undefined value. + if (!isSigned && N1C && N1C->isAllOnes()) { + SDValue F0 = DAG.getFreeze(N0); + SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ); + return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0); + } if (SDValue V = simplifyDivRem(N, DAG)) return V; @@ -4428,6 +4601,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) { // combine will not return a DIVREM. Regardless, checking cheapness here // makes sense since the simplification results in fatter code. if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) { + if (isSigned) { + // check if we can build faster implementation for srem + if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N)) + return OptimizedRem; + } + SDValue OptimizedDiv = isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) { @@ -4587,6 +4766,46 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitAVG(SDNode *N) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // fold (avg c1, c2) + if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) + return C; + + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0); + + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; + + // fold (avgfloor x, 0) -> x >> 1 + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) { + if (Opcode == ISD::AVGFLOORS) + return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT)); + if (Opcode == ISD::AVGFLOORU) + return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT)); + } + } + + // fold (avg x, undef) -> x + if (N0.isUndef()) + return N1; + if (N1.isUndef()) + return N0; + + // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1 + + return SDValue(); +} + /// Perform optimizations common to nodes that compute two values. LoOp and HiOp /// give the opcodes for the two computations that are being performed. Return /// true if a simplification was made. @@ -4745,7 +4964,9 @@ SDValue DAGCombiner::visitMULO(SDNode *N) { DAG.getConstant(0, DL, CarryVT)); // (mulo x, 2) -> (addo x, x) - if (N1C && N1C->getAPIntValue() == 2) + // FIXME: This needs a freeze. + if (N1C && N1C->getAPIntValue() == 2 && + (!IsSigned || VT.getScalarSizeInBits() > 2)) return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL, N->getVTList(), N0, N0); @@ -4802,8 +5023,7 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, return 0; const APInt &C1 = N1C->getAPIntValue(); const APInt &C2 = N3C->getAPIntValue(); - if (C1.getBitWidth() < C2.getBitWidth() || - C1 != C2.sextOrSelf(C1.getBitWidth())) + if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth())) return 0; return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0); }; @@ -4910,7 +5130,7 @@ static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, const APInt &C1 = N1C->getAPIntValue(); const APInt &C3 = N3C->getAPIntValue(); if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() || - C1 != C3.zextOrSelf(C1.getBitWidth())) + C1 != C3.zext(C1.getBitWidth())) return SDValue(); unsigned BW = (C1 + 1).exactLogBase2(); @@ -4940,6 +5160,10 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; + // If the operands are the same, this is a no-op. + if (N0 == N1) + return N0; + // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) @@ -5245,29 +5469,27 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, } // Turn compare of constants whose difference is 1 bit into add+and+setcc. - // TODO - support non-uniform vector amounts. if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) { // Match a shared variable operand and 2 non-opaque constant operands. - ConstantSDNode *C0 = isConstOrConstSplat(LR); - ConstantSDNode *C1 = isConstOrConstSplat(RR); - if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) { + auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) { + // The difference of the constants must be a single bit. const APInt &CMax = APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue()); const APInt &CMin = APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue()); - // The difference of the constants must be a single bit. - if ((CMax - CMin).isPowerOf2()) { - // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) --> - // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq - SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR); - SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR); - SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min); - SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min); - SDValue Mask = DAG.getNOT(DL, Diff, OpVT); - SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask); - SDValue Zero = DAG.getConstant(0, DL, OpVT); - return DAG.getSetCC(DL, VT, And, Zero, CC0); - } + return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2(); + }; + if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) { + // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) --> + // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq + SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR); + SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR); + SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min); + SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min); + SDValue Mask = DAG.getNOT(DL, Diff, OpVT); + SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask); + SDValue Zero = DAG.getConstant(0, DL, OpVT); + return DAG.getSetCC(DL, VT, And, Zero, CC0); } } } @@ -5769,6 +5991,9 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { if (ShiftAmt.uge(VTBitWidth)) return SDValue(); + if (!TLI.hasBitTest(Srl.getOperand(0), Srl.getOperand(1))) + return SDValue(); + // Turn this into a bit-test pattern using mask op + setcc: // and (not (srl X, C)), 1 --> (and X, 1< ((X0 | X1) << Y) | Z +static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, + SelectionDAG &DAG) { + unsigned LogicOpcode = N->getOpcode(); + assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || + LogicOpcode == ISD::XOR) + && "Expected bitwise logic operation"); + + if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse()) + return SDValue(); + + // Match another bitwise logic op and a shift. + unsigned ShiftOpcode = ShiftOp.getOpcode(); + if (LogicOp.getOpcode() != LogicOpcode || + !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL || + ShiftOpcode == ISD::SRA)) + return SDValue(); + + // Match another shift op inside the first logic operand. Handle both commuted + // possibilities. + // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z + // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z + SDValue X1 = ShiftOp.getOperand(0); + SDValue Y = ShiftOp.getOperand(1); + SDValue X0, Z; + if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode && + LogicOp.getOperand(0).getOperand(1) == Y) { + X0 = LogicOp.getOperand(0).getOperand(0); + Z = LogicOp.getOperand(1); + } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode && + LogicOp.getOperand(1).getOperand(1) == Y) { + X0 = LogicOp.getOperand(1).getOperand(0); + Z = LogicOp.getOperand(0); + } else { + return SDValue(); + } + + EVT VT = N->getValueType(0); + SDLoc DL(N); + SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1); + SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y); + return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5848,27 +6120,25 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) return N0; - // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load + // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load auto *MLoad = dyn_cast(N0); - auto *BVec = dyn_cast(N1); - if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD && - N0.hasOneUse() && N1.hasOneUse()) { + ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true); + if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() && + Splat && N1.hasOneUse()) { EVT LoadVT = MLoad->getMemoryVT(); EVT ExtVT = VT; if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) { // For this AND to be a zero extension of the masked load the elements // of the BuildVec must mask the bottom bits of the extended element // type - if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) { - uint64_t ElementSize = - LoadVT.getVectorElementType().getScalarSizeInBits(); - if (Splat->getAPIntValue().isMask(ElementSize)) { - return DAG.getMaskedLoad( - ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(), - MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(), - LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(), - ISD::ZEXTLOAD, MLoad->isExpandingLoad()); - } + uint64_t ElementSize = + LoadVT.getVectorElementType().getScalarSizeInBits(); + if (Splat->getAPIntValue().isMask(ElementSize)) { + return DAG.getMaskedLoad( + ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(), + MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(), + LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(), + ISD::ZEXTLOAD, MLoad->isExpandingLoad()); } } } @@ -5944,7 +6214,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // This can be a pure constant or a vector splat, in which case we treat the // vector as a scalar and use the splat value. APInt Constant = APInt::getZero(1); - if (const ConstantSDNode *C = dyn_cast(N1)) { + if (const ConstantSDNode *C = isConstOrConstSplat(N1)) { Constant = C->getAPIntValue(); } else if (BuildVectorSDNode *Vector = dyn_cast(N1)) { APInt SplatValue, SplatUndef; @@ -6084,6 +6354,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) return V; + if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) + return R; + if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG)) + return R; + // Masking the negated extension of a boolean is just the zero-extended // boolean: // and (sub 0, zext(bool X)), 1 --> zext(bool X) @@ -6142,9 +6417,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) return Shifts; - if (TLI.hasBitTest(N0, N1)) - if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) - return V; + if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) + return V; // Recognize the following pattern: // @@ -6194,11 +6468,11 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool LookPassAnd0 = false; bool LookPassAnd1 = false; if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) - std::swap(N0, N1); + std::swap(N0, N1); if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) - std::swap(N0, N1); + std::swap(N0, N1); if (N0.getOpcode() == ISD::AND) { - if (!N0.getNode()->hasOneUse()) + if (!N0->hasOneUse()) return SDValue(); ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); // Also handle 0xffff since the LHS is guaranteed to have zeros there. @@ -6211,7 +6485,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, } if (N1.getOpcode() == ISD::AND) { - if (!N1.getNode()->hasOneUse()) + if (!N1->hasOneUse()) return SDValue(); ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); if (!N11C || N11C->getZExtValue() != 0xFF) @@ -6224,7 +6498,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) return SDValue(); - if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse()) + if (!N0->hasOneUse() || !N1->hasOneUse()) return SDValue(); ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); @@ -6237,7 +6511,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) SDValue N00 = N0->getOperand(0); if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { - if (!N00.getNode()->hasOneUse()) + if (!N00->hasOneUse()) return SDValue(); ConstantSDNode *N001C = dyn_cast(N00.getOperand(1)); if (!N001C || N001C->getZExtValue() != 0xFF) @@ -6248,7 +6522,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, SDValue N10 = N1->getOperand(0); if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { - if (!N10.getNode()->hasOneUse()) + if (!N10->hasOneUse()) return SDValue(); ConstantSDNode *N101C = dyn_cast(N10.getOperand(1)); // Also allow 0xFFFF since the bits will be shifted out. This is needed @@ -6266,19 +6540,23 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, // Make sure everything beyond the low halfword gets set to zero since the SRL // 16 will clear the top bits. unsigned OpSizeInBits = VT.getSizeInBits(); - if (DemandHighBits && OpSizeInBits > 16) { + if (OpSizeInBits > 16) { // If the left-shift isn't masked out then the only way this is a bswap is // if all bits beyond the low 8 are 0. In that case the entire pattern // reduces to a left shift anyway: leave it for other parts of the combiner. - if (!LookPassAnd0) + if (DemandHighBits && !LookPassAnd0) return SDValue(); // However, if the right shift isn't masked out then it might be because - // it's not needed. See if we can spot that too. - if (!LookPassAnd1 && - !DAG.MaskedValueIsZero( - N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) - return SDValue(); + // it's not needed. See if we can spot that too. If the high bits aren't + // demanded, we only need bits 23:16 to be zero. Otherwise, we need all + // upper bits to be zero. + if (!LookPassAnd1) { + unsigned HighBit = DemandHighBits ? OpSizeInBits : 24; + if (!DAG.MaskedValueIsZero(N10, + APInt::getBitsSet(OpSizeInBits, 16, HighBit))) + return SDValue(); + } } SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); @@ -6298,7 +6576,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, /// ((x & 0x00ff0000) << 8) | /// ((x & 0xff000000) >> 8) static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { - if (!N.getNode()->hasOneUse()) + if (!N->hasOneUse()) return false; unsigned Opc = N.getOpcode(); @@ -6485,8 +6763,9 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) && !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts))) return SDValue(); - } else + } else { return SDValue(); + } // Make sure the parts are all coming from the same node. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) @@ -6524,7 +6803,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && // Don't increase # computations. - (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + (N0->hasOneUse() || N1->hasOneUse())) { // We can only do this xform if we know that bits from X that are set in C2 // but not in C1 are already zero. Likewise for Y. if (const ConstantSDNode *N0O1C = @@ -6552,7 +6831,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { N1.getOpcode() == ISD::AND && N0.getOperand(0) == N1.getOperand(0) && // Don't increase # computations. - (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + (N0->hasOneUse() || N1->hasOneUse())) { SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(1), N1.getOperand(1)); return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X); @@ -6567,14 +6846,38 @@ static SDValue visitORCommutative( EVT VT = N0.getValueType(); if (N0.getOpcode() == ISD::AND) { // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y) - if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1) + // TODO: Set AllowUndefs = true. + if (getBitwiseNotOperand(N0.getOperand(1), N0.getOperand(0), + /* AllowUndefs */ false) == N1) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1); // fold (or (and (xor Y, -1), X), Y) -> (or X, Y) - if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1) + if (getBitwiseNotOperand(N0.getOperand(0), N0.getOperand(1), + /* AllowUndefs */ false) == N1) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1); } + if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) + return R; + + auto peekThroughZext = [](SDValue V) { + if (V->getOpcode() == ISD::ZERO_EXTEND) + return V->getOperand(0); + return V; + }; + + // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y + if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL && + N0.getOperand(0) == N1.getOperand(0) && + peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1))) + return N0; + + // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y + if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL && + N0.getOperand(1) == N1.getOperand(0) && + peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1))) + return N0; + return SDValue(); } @@ -6611,11 +6914,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) - // Do this only if the resulting shuffle is legal. - if (isa(N0) && - isa(N1) && - // Avoid folding a node with illegal type. - TLI.isTypeLegal(VT)) { + // Do this only if the resulting type / shuffle is legal. + auto *SV0 = dyn_cast(N0); + auto *SV1 = dyn_cast(N1); + if (SV0 && SV1 && TLI.isTypeLegal(VT)) { bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode()); bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()); bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); @@ -6624,11 +6926,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) { assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); - const ShuffleVectorSDNode *SV0 = cast(N0); - const ShuffleVectorSDNode *SV1 = cast(N1); bool CanFold = true; int NumElts = VT.getVectorNumElements(); - SmallVector Mask(NumElts); + SmallVector Mask(NumElts, -1); for (int i = 0; i != NumElts; ++i) { int M0 = SV0->getMaskElt(i); @@ -6640,10 +6940,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // If one element is zero and the otherside is undef, keep undef. // This also handles the case that both are undef. - if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) { - Mask[i] = -1; + if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) continue; - } // Make sure only one of the elements is zero. if (M0Zero == M1Zero) { @@ -6711,7 +7009,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) { return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue()); }; - if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + if (N0.getOpcode() == ISD::AND && N0->hasOneUse() && ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) { if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, {N1, N0.getOperand(1)})) { @@ -7031,8 +7329,9 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // Neg with outer conversions stripped away. SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, - SDValue InnerNeg, unsigned PosOpcode, - unsigned NegOpcode, const SDLoc &DL) { + SDValue InnerNeg, bool HasPos, + unsigned PosOpcode, unsigned NegOpcode, + const SDLoc &DL) { // fold (or (shl x, (*ext y)), // (srl x, (*ext (sub 32, y)))) -> // (rotl x, y) or (rotr x, (sub 32, y)) @@ -7043,7 +7342,6 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, EVT VT = Shifted.getValueType(); if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG, /*IsRotate*/ true)) { - bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg); } @@ -7059,8 +7357,9 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // TODO: Merge with MatchRotatePosNeg. SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg, SDValue InnerPos, - SDValue InnerNeg, unsigned PosOpcode, - unsigned NegOpcode, const SDLoc &DL) { + SDValue InnerNeg, bool HasPos, + unsigned PosOpcode, unsigned NegOpcode, + const SDLoc &DL) { EVT VT = N0.getValueType(); unsigned EltBits = VT.getScalarSizeInBits(); @@ -7072,7 +7371,6 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, // (srl x1, (*ext y))) -> // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y)) if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) { - bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1, HasPos ? Pos : Neg); } @@ -7134,6 +7432,16 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { bool HasROTR = hasOperation(ISD::ROTR, VT); bool HasFSHL = hasOperation(ISD::FSHL, VT); bool HasFSHR = hasOperation(ISD::FSHR, VT); + + // If the type is going to be promoted and the target has enabled custom + // lowering for rotate, allow matching rotate by non-constants. Only allow + // this for scalar types. + if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) == + TargetLowering::TypePromoteInteger) { + HasROTL |= TLI.getOperationAction(ISD::ROTL, VT) == TargetLowering::Custom; + HasROTR |= TLI.getOperationAction(ISD::ROTR, VT) == TargetLowering::Custom; + } + if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR) return SDValue(); @@ -7187,11 +7495,6 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { if (LHSShift.getOpcode() == RHSShift.getOpcode()) return SDValue(); // Shifts must disagree. - // TODO: Support pre-legalization funnel-shift by constant. - bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0); - if (!IsRotate && !(HasFSHL || HasFSHR)) - return SDValue(); // Requires funnel shift support. - // Canonicalize shl to left side in a shl/srl pair. if (RHSShift.getOpcode() == ISD::SHL) { std::swap(LHS, RHS); @@ -7205,27 +7508,12 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { SDValue RHSShiftArg = RHSShift.getOperand(0); SDValue RHSShiftAmt = RHSShift.getOperand(1); - // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) - // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) - // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1) - // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2) - // iff C1+C2 == EltSizeInBits auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { - return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; - }; - if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { - SDValue Res; - if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) { - bool UseROTL = !LegalOperations || HasROTL; - Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - UseROTL ? LHSShiftAmt : RHSShiftAmt); - } else { - bool UseFSHL = !LegalOperations || HasFSHL; - Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, - RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt); - } + return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; + }; + auto ApplyMasks = [&](SDValue Res) { // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); @@ -7246,6 +7534,71 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { } return Res; + }; + + // TODO: Support pre-legalization funnel-shift by constant. + bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0); + if (!IsRotate && !(HasFSHL || HasFSHR)) { + if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() && + ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { + // Look for a disguised rotate by constant. + // The common shifted operand X may be hidden inside another 'or'. + SDValue X, Y; + auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) { + if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR) + return false; + if (CommonOp == Or.getOperand(0)) { + X = CommonOp; + Y = Or.getOperand(1); + return true; + } + if (CommonOp == Or.getOperand(1)) { + X = CommonOp; + Y = Or.getOperand(0); + return true; + } + return false; + }; + + SDValue Res; + if (matchOr(LHSShiftArg, RHSShiftArg)) { + // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1) + SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt); + SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt); + Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY); + } else if (matchOr(RHSShiftArg, LHSShiftArg)) { + // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2) + SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt); + SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt); + Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY); + } else { + return SDValue(); + } + + return ApplyMasks(Res); + } + + return SDValue(); // Requires funnel shift support. + } + + // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) + // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) + // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1) + // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2) + // iff C1+C2 == EltSizeInBits + if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { + SDValue Res; + if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) { + bool UseROTL = !LegalOperations || HasROTL; + Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + UseROTL ? LHSShiftAmt : RHSShiftAmt); + } else { + bool UseFSHL = !LegalOperations || HasFSHL; + Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, + RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt); + } + + return ApplyMasks(Res); } // Even pre-legalization, we can't easily rotate/funnel-shift by a variable @@ -7276,26 +7629,26 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { if (IsRotate && (HasROTL || HasROTR)) { SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0, - RExtOp0, ISD::ROTL, ISD::ROTR, DL); + RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL); if (TryL) return TryL; SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0, - LExtOp0, ISD::ROTR, ISD::ROTL, DL); + LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL); if (TryR) return TryR; } SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt, - LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL); + LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL); if (TryL) return TryL; SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt, - RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL); + RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL); if (TryR) return TryR; @@ -7810,7 +8163,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // little endian value load Optional IsBigEndian = isBigEndian( makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset); - if (!IsBigEndian.hasValue()) + if (!IsBigEndian) return SDValue(); assert(FirstByteProvider && "must be set"); @@ -8017,6 +8370,13 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags())) return RXOR; + // look for 'add-like' folds: + // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE) + if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && + isMinSignedConstant(N1)) + if (SDValue Combined = visitADDLike(N)) + return Combined; + // fold !(x cc y) -> (x !cc y) unsigned N0Opcode = N0.getOpcode(); SDValue LHS, RHS, CC; @@ -8182,6 +8542,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) return V; + if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) + return R; + if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG)) + return R; + // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable if (SDValue MM = unfoldMaskedMerge(N)) return MM; @@ -8412,7 +8777,9 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { } unsigned NextOp = N0.getOpcode(); - // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize) + + // fold (rot* (rot* x, c2), c1) + // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize)) % bitsize) if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) { SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); @@ -8420,14 +8787,19 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { EVT ShiftVT = C1->getValueType(0); bool SameSide = (N->getOpcode() == NextOp); unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; - if (SDValue CombinedShift = DAG.FoldConstantArithmetic( - CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) { - SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); - SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( - ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC}); - return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), - CombinedShiftNorm); - } + SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); + SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT, + {N1, BitsizeC}); + SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT, + {N0.getOperand(1), BitsizeC}); + if (Norm1 && Norm2) + if (SDValue CombinedShift = DAG.FoldConstantArithmetic( + CombineOp, dl, ShiftVT, {Norm1, Norm2})) { + SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( + ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC}); + return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), + CombinedShiftNorm); + } } } return SDValue(); @@ -8587,52 +8959,63 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } - // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 - // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 - // TODO - support non-uniform vector shift amounts. - ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && - N0->getFlags().hasExact()) { - if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - uint64_t C1 = N0C1->getZExtValue(); - uint64_t C2 = N1C->getZExtValue(); - SDLoc DL(N); - if (C1 <= C2) - return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), - DAG.getConstant(C2 - C1, DL, ShiftVT)); - return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), - DAG.getConstant(C1 - C2, DL, ShiftVT)); + if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) { + auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + const APInt &LHSC = LHS->getAPIntValue(); + const APInt &RHSC = RHS->getAPIntValue(); + return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) && + LHSC.getZExtValue() <= RHSC.getZExtValue(); + }; + + SDLoc DL(N); + + // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 + // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2 + if (N0->getFlags().hasExact()) { + if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); + return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); + } + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); + return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff); + } } - } - // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or - // (and (srl x, (sub c1, c2), MASK) - // Only fold this if the inner shift has no other uses -- if it does, folding - // this will increase the total number of instructions. - // TODO - drop hasOneUse requirement if c1 == c2? - // TODO - support non-uniform vector shift amounts. - if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && - TLI.shouldFoldConstantShiftPairToMask(N, Level)) { - if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - if (N0C1->getAPIntValue().ult(OpSizeInBits)) { - uint64_t c1 = N0C1->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); - SDValue Shift; - if (c2 > c1) { - Mask <<= c2 - c1; - SDLoc DL(N); - Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), - DAG.getConstant(c2 - c1, DL, ShiftVT)); - } else { - Mask.lshrInPlace(c1 - c2); - SDLoc DL(N); - Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), - DAG.getConstant(c1 - c2, DL, ShiftVT)); - } - SDLoc DL(N0); - return DAG.getNode(ISD::AND, DL, VT, Shift, - DAG.getConstant(Mask, DL, VT)); + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or + // (and (srl x, (sub c1, c2), MASK) + // Only fold this if the inner shift has no other uses -- if it does, + // folding this will increase the total number of instructions. + if (N0.getOpcode() == ISD::SRL && + (N0.getOperand(1) == N1 || N0.hasOneUse()) && + TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); + Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01); + Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff); + SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } + if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); + Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1); + SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); } } } @@ -8651,7 +9034,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // Variant of version done on multiply, except mul by a power of 2 is turned // into a shift. if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) && - N0.getNode()->hasOneUse() && + N0->hasOneUse() && isConstantOrConstantVector(N1, /* No Opaques */ true) && isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) && TLI.isDesirableToCommuteWithShift(N, Level)) { @@ -8663,14 +9046,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) - if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() && - isConstantOrConstantVector(N1, /* No Opaques */ true) && - isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { - SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); - if (isConstantOrConstantVector(Shl)) + if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) { + SDValue N01 = N0.getOperand(1); + if (SDValue Shl = + DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); } + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && !N1C->isOpaque()) if (SDValue NewSHL = visitShiftByConstant(N)) return NewSHL; @@ -8956,8 +9339,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits; if (LargeShift->getAPIntValue() == TruncBits) { SDLoc DL(N); - SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL, - getShiftAmountTy(LargeVT)); + EVT LargeShiftVT = getShiftAmountTy(LargeVT); + SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT); + Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt, + DAG.getConstant(TruncBits, DL, LargeShiftVT)); SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt); return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); @@ -8996,6 +9381,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return V; EVT VT = N0.getValueType(); + EVT ShiftVT = N1.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold (srl c1, c2) -> c1 >>u c2 @@ -9037,7 +9423,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { }; if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { SDLoc DL(N); - EVT ShiftVT = N1.getValueType(); SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum); } @@ -9081,15 +9466,41 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } } - // fold (srl (shl x, c), c) -> (and x, cst2) - // TODO - (srl (shl x, c1), c2). - if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && - isConstantOrConstantVector(N1, /* NoOpaques */ true)) { - SDLoc DL(N); - SDValue Mask = - DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1); - AddToWorklist(Mask.getNode()); - return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); + // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or + // (and (srl x, (sub c2, c1), MASK) + if (N0.getOpcode() == ISD::SHL && + (N0.getOperand(1) == N1 || N0->hasOneUse()) && + TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + const APInt &LHSC = LHS->getAPIntValue(); + const APInt &RHSC = RHS->getAPIntValue(); + return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) && + LHSC.getZExtValue() <= RHSC.getZExtValue(); + }; + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDLoc DL(N); + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); + Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01); + Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff); + SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } + if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDLoc DL(N); + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); + Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1); + SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } } // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) @@ -9345,6 +9756,21 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) { DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1})) return C; + ConstantSDNode *N1C = isConstOrConstSplat(N1); + + if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) { + // fold (sshlsat x, c) -> (shl x, c) + if (N->getOpcode() == ISD::SSHLSAT && N1C && + N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0))) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1); + + // fold (ushlsat x, c) -> (shl x, c) + if (N->getOpcode() == ISD::USHLSAT && N1C && + N1C->getAPIntValue().ule( + DAG.computeKnownBits(N0).countMinLeadingZeros())) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1); + } + return SDValue(); } @@ -9368,18 +9794,27 @@ static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG, (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) return SDValue(); + EVT VT = N->getValueType(0); EVT VT1 = Op0.getOperand(0).getValueType(); EVT VT2 = Op1.getOperand(0).getValueType(); - // Check if the operands are of same type and valid size. unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU; - if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) - return SDValue(); - Op0 = Op0.getOperand(0); - Op1 = Op1.getOperand(0); - SDValue ABD = - DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1); - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD); + // fold abs(sext(x) - sext(y)) -> zext(abds(x, y)) + // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y)) + // NOTE: Extensions must be equivalent. + if (VT1 == VT2 && TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) { + Op0 = Op0.getOperand(0); + Op1 = Op1.getOperand(0); + SDValue ABD = DAG.getNode(ABDOpcode, SDLoc(N), VT1, Op0, Op1); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD); + } + + // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y)) + // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y)) + if (TLI.isOperationLegalOrCustom(ABDOpcode, VT)) + return DAG.getNode(ABDOpcode, SDLoc(N), VT, Op0, Op1); + + return SDValue(); } SDValue DAGCombiner::visitABS(SDNode *N) { @@ -9405,24 +9840,60 @@ SDValue DAGCombiner::visitABS(SDNode *N) { SDValue DAGCombiner::visitBSWAP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (bswap c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); + return DAG.getNode(ISD::BSWAP, DL, VT, N0); // fold (bswap (bswap x)) -> x if (N0.getOpcode() == ISD::BSWAP) - return N0->getOperand(0); + return N0.getOperand(0); // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse // isn't supported, it will be expanded to bswap followed by a manual reversal // of bits in each byte. By placing bswaps before bitreverse, we can remove // the two bswaps if the bitreverse gets expanded. if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) { - SDLoc DL(N); SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0)); return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap); } + // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2)))))) + // iff x >= bw/2 (i.e. lower half is known zero) + unsigned BW = VT.getScalarSizeInBits(); + if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) { + auto *ShAmt = dyn_cast(N0.getOperand(1)); + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2); + if (ShAmt && ShAmt->getAPIntValue().ult(BW) && + ShAmt->getZExtValue() >= (BW / 2) && + (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) && + TLI.isTruncateFree(VT, HalfVT) && + (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) { + SDValue Res = N0.getOperand(0); + if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2))) + Res = DAG.getNode(ISD::SHL, DL, VT, Res, + DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT))); + Res = DAG.getZExtOrTrunc(Res, DL, HalfVT); + Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res); + return DAG.getZExtOrTrunc(Res, DL, VT); + } + } + + // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as + // inverse-shift-of-bswap: + // bswap (X u<< C) --> (bswap X) u>> C + // bswap (X u>> C) --> (bswap X) u<< C + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && + N0.hasOneUse()) { + auto *ShAmt = dyn_cast(N0.getOperand(1)); + if (ShAmt && ShAmt->getAPIntValue().ult(BW) && + ShAmt->getZExtValue() % 8 == 0) { + SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0)); + unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL; + return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1)); + } + } + return SDValue(); } @@ -9673,7 +10144,8 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { if (C1Val.isPowerOf2() && C2Val.isZero()) { if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); - SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); + SDValue ShAmtC = + DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL); return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); } @@ -9956,7 +10428,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) { // Any flags available in a select/setcc fold will be on the setcc as they // migrated from fcmp - Flags = N0.getNode()->getFlags(); + Flags = N0->getFlags(); SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2, N0.getOperand(2)); SelectNode->setFlags(Flags); @@ -10029,14 +10501,19 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1)); } -bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) { +bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, + SelectionDAG &DAG) { if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD) return false; + // Only perform the transformation when existing operands can be reused. + if (IndexIsScaled) + return false; + // For now we check only the LHS of the add. SDValue LHS = Index.getOperand(0); SDValue SplatVal = DAG.getSplatValue(LHS); - if (!SplatVal) + if (!SplatVal || SplatVal.getValueType() != BasePtr.getValueType()) return false; BasePtr = SplatVal; @@ -10045,23 +10522,29 @@ bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) { } // Fold sext/zext of index into index type. -bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index, - bool Scaled, SelectionDAG &DAG) { +bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, + SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // It's always safe to look through zero extends. if (Index.getOpcode() == ISD::ZERO_EXTEND) { SDValue Op = Index.getOperand(0); - MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED); - if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) { + if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) { + IndexType = ISD::UNSIGNED_SCALED; Index = Op; return true; } + if (ISD::isIndexTypeSigned(IndexType)) { + IndexType = ISD::UNSIGNED_SCALED; + return true; + } } - if (Index.getOpcode() == ISD::SIGN_EXTEND) { + // It's only safe to look through sign extends when Index is signed. + if (Index.getOpcode() == ISD::SIGN_EXTEND && + ISD::isIndexTypeSigned(IndexType)) { SDValue Op = Index.getOperand(0); - MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED); - if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) { + if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) { Index = Op; return true; } @@ -10078,24 +10561,25 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { SDValue Scale = MSC->getScale(); SDValue StoreVal = MSC->getValue(); SDValue BasePtr = MSC->getBasePtr(); + ISD::MemIndexType IndexType = MSC->getIndexType(); SDLoc DL(N); // Zap scatters with a zero mask. if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return Chain; - if (refineUniformBase(BasePtr, Index, DAG)) { + if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG)) { SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; - return DAG.getMaskedScatter( - DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops, - MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore()); + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), + DL, Ops, MSC->getMemOperand(), IndexType, + MSC->isTruncatingStore()); } - if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) { + if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) { SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; - return DAG.getMaskedScatter( - DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops, - MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore()); + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), + DL, Ops, MSC->getMemOperand(), IndexType, + MSC->isTruncatingStore()); } return SDValue(); @@ -10150,7 +10634,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { // If this is a TRUNC followed by a masked store, fold this into a masked // truncating store. We can do this even if this is already a masked // truncstore. - if ((Value.getOpcode() == ISD::TRUNCATE) && Value.getNode()->hasOneUse() && + if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() && MST->isUnindexed() && TLI.canCombineTruncStore(Value.getOperand(0).getValueType(), MST->getMemoryVT(), LegalOperations)) { @@ -10173,26 +10657,25 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { SDValue Scale = MGT->getScale(); SDValue PassThru = MGT->getPassThru(); SDValue BasePtr = MGT->getBasePtr(); + ISD::MemIndexType IndexType = MGT->getIndexType(); SDLoc DL(N); // Zap gathers with a zero mask. if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return CombineTo(N, PassThru, MGT->getChain()); - if (refineUniformBase(BasePtr, Index, DAG)) { + if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG)) { SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; - return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other), - MGT->getMemoryVT(), DL, Ops, - MGT->getMemOperand(), MGT->getIndexType(), - MGT->getExtensionType()); + return DAG.getMaskedGather( + DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, + Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType()); } - if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) { + if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) { SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; - return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other), - MGT->getMemoryVT(), DL, Ops, - MGT->getMemOperand(), MGT->getIndexType(), - MGT->getExtensionType()); + return DAG.getMaskedGather( + DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, + Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType()); } return SDValue(); @@ -10446,23 +10929,25 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { Other = N1; } + // zext(x) >= y ? trunc(zext(x) - y) : 0 + // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit))) + // zext(x) > y ? trunc(zext(x) - y) : 0 + // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit))) + if (Other && Other.getOpcode() == ISD::TRUNCATE && + Other.getOperand(0).getOpcode() == ISD::SUB && + (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) { + SDValue OpLHS = Other.getOperand(0).getOperand(0); + SDValue OpRHS = Other.getOperand(0).getOperand(1); + if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND) + if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, + DAG, DL)) + return R; + } + if (Other && Other.getNumOperands() == 2) { SDValue CondRHS = RHS; SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1); - if (Other.getOpcode() == ISD::SUB && - LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS && - OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) { - // Look for a general sub with unsigned saturation first. - // zext(x) >= y ? x - trunc(y) : 0 - // --> usubsat(x,trunc(umin(y,SatLimit))) - // zext(x) > y ? x - trunc(y) : 0 - // --> usubsat(x,trunc(umin(y,SatLimit))) - if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) - return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG, - DL); - } - if (OpLHS == LHS) { // Look for a general sub with unsigned saturation first. // x >= y ? x-y : 0 --> usubsat x, y @@ -10493,8 +10978,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // Another special case: If C was a sign bit, the sub has been // canonicalized into a xor. - // FIXME: Would it be better to use computeKnownBits to determine - // whether it's safe to decanonicalize the xor? + // FIXME: Would it be better to use computeKnownBits to + // determine whether it's safe to decanonicalize the xor? // x s< 0 ? x^C : 0 --> usubsat x, C APInt SplatValue; if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR && @@ -10560,17 +11045,18 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { CC, SDLoc(N), false)) { AddToWorklist(SCC.getNode()); - if (ConstantSDNode *SCCC = dyn_cast(SCC.getNode())) { - if (!SCCC->isZero()) - return N2; // cond always true -> true val - else - return N3; // cond always false -> false val - } else if (SCC->isUndef()) { - // When the condition is UNDEF, just return the first operand. This is - // coherent the DAG creation, no setcc node is created in this case + // cond always true -> true val + // cond always false -> false val + if (auto *SCCC = dyn_cast(SCC.getNode())) + return SCCC->isZero() ? N3 : N2; + + // When the condition is UNDEF, just return the first operand. This is + // coherent the DAG creation, no setcc node is created in this case + if (SCC->isUndef()) return N2; - } else if (SCC.getOpcode() == ISD::SETCC) { - // Fold to a simpler select_cc + + // Fold to a simpler select_cc + if (SCC.getOpcode() == ISD::SETCC) { SDValue SelectOp = DAG.getNode( ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0), SCC.getOperand(1), N2, N3, SCC.getOperand(2)); @@ -10853,9 +11339,8 @@ static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, const TargetLowering &TLI) { bool HasCopyToRegUses = false; bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType()); - for (SDNode::use_iterator UI = N0.getNode()->use_begin(), - UE = N0.getNode()->use_end(); - UI != UE; ++UI) { + for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE; + ++UI) { SDNode *User = *UI; if (User == N) continue; @@ -11187,9 +11672,12 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) { + // TODO: isFixedLengthVector() should be removed and any negative effects on + // code generation being the result of that target's implementation of + // isVectorLoadExtDesirable(). if (!ISD::isNON_EXTLoad(N0.getNode()) || !ISD::isUNINDEXEDLoad(N0.getNode()) || - ((LegalOperations || VT.isVector() || + ((LegalOperations || VT.isFixedLengthVector() || !cast(N0)->isSimple()) && !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) return {}; @@ -11413,6 +11901,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + // sext(undef) = 0 because the top bit will all be the same. + if (N0.isUndef()) + return DAG.getConstant(0, DL, VT); + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; @@ -11582,10 +12074,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // Return SDValue here as the xor should have already been replaced in // this sext. return SDValue(); - } else { - // Return a new sext with the new xor. - return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor); } + + // Return a new sext with the new xor. + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor); } SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); @@ -11658,6 +12150,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + // zext(undef) = 0 + if (N0.isUndef()) + return DAG.getConstant(0, SDLoc(N), VT); + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; @@ -11917,6 +12413,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + // aext(undef) = undef + if (N0.isUndef()) + return DAG.getUNDEF(VT); + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; @@ -11954,11 +12454,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), N0.getValueType())) { SDLoc DL(N); - SDValue X = N0.getOperand(0).getOperand(0); - X = DAG.getAnyExtOrTrunc(X, DL, VT); - APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, DL, VT, - X, DAG.getConstant(Mask, DL, VT)); + SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT); + SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1)); + assert(isa(Y) && "Expected constant to be folded!"); + return DAG.getNode(ISD::AND, DL, VT, X, Y); } // fold (aext (load x)) -> (aext (truncate (extload x))) @@ -12086,13 +12585,9 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) { // This eliminates the later assert: // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN + SDLoc DL(N); SDValue BigA = N0.getOperand(0); EVT BigA_AssertVT = cast(BigA.getOperand(1))->getVT(); - assert(BigA_AssertVT.bitsLE(N0.getValueType()) && - "Asserting zero/sign-extended bits to a type larger than the " - "truncated destination does not provide information"); - - SDLoc DL(N); EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT; SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT); SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), @@ -12108,10 +12603,6 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) { Opcode == ISD::AssertZext) { SDValue BigA = N0.getOperand(0); EVT BigA_AssertVT = cast(BigA.getOperand(1))->getVT(); - assert(BigA_AssertVT.bitsLE(N0.getValueType()) && - "Asserting zero/sign-extended bits to a type larger than the " - "truncated destination does not provide information"); - if (AssertVT.bitsLT(BigA_AssertVT)) { SDLoc DL(N); SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), @@ -12229,13 +12720,11 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { unsigned ActiveBits = 0; if (Mask.isMask()) { ActiveBits = Mask.countTrailingOnes(); - } else if (Mask.isShiftedMask()) { - ShAmt = Mask.countTrailingZeros(); - APInt ShiftedMask = Mask.lshr(ShAmt); - ActiveBits = ShiftedMask.countTrailingOnes(); + } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) { HasShiftedOffset = true; - } else + } else { return SDValue(); + } ExtType = ISD::ZEXTLOAD; ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); @@ -12852,21 +13341,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) - // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) - // When the adde's carry is not used. - if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) && - N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) && - // We only do for addcarry before legalize operation - ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) || - TLI.isOperationLegal(N0.getOpcode(), VT))) { - SDLoc SL(N); - auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); - auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); - auto VTs = DAG.getVTList(VT, N0->getValueType(1)); - return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2)); - } - // fold (truncate (extract_subvector(ext x))) -> // (extract_subvector x) // TODO: This can be generalized to cover cases where the truncate and extract @@ -12911,6 +13385,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } break; + case ISD::ADDE: + case ISD::ADDCARRY: + // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) + // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) + // When the adde's carry is not used. + // We only do for addcarry before legalize operation + if (((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) || + TLI.isOperationLegal(N0.getOpcode(), VT)) && + N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) { + SDLoc DL(N); + SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0)); + SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1)); + SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1)); + return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2)); + } + break; case ISD::USUBSAT: // Truncate the USUBSAT only if LHS is a known zero-extension, its not // enough to know that the upper bits are zero we must ensure that we don't @@ -13044,7 +13534,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { (!LegalTypes || (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() && TLI.isTypeLegal(VT.getVectorElementType()))) && - N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && + N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() && cast(N0)->isConstant()) return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), VT.getVectorElementType()); @@ -13112,8 +13602,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // This often reduces constant pool loads. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && - N0.getNode()->hasOneUse() && VT.isInteger() && - !VT.isVector() && !N0.getValueType().isVector()) { + N0->hasOneUse() && VT.isInteger() && !VT.isVector() && + !N0.getValueType().isVector()) { SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(NewConv.getNode()); @@ -13161,9 +13651,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // (xor (bitcast cst), (bitcast x)), 0), // signbit) // (xor (bitcast cst) (build_pair flipbit, flipbit)) - if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && - isa(N0.getOperand(0)) && - VT.isInteger() && !VT.isVector()) { + if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() && + isa(N0.getOperand(0)) && VT.isInteger() && + !VT.isVector()) { unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { @@ -13245,8 +13735,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT) return SDValue(Op.getOperand(0)); - if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || - ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) + if (Op.isUndef() || isAnyConstantBuildVector(Op)) return DAG.getBitcast(VT, Op); return SDValue(); }; @@ -13286,6 +13775,14 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false)) return N0; + // Fold freeze(bitcast(x)) -> bitcast(freeze(x)). + // TODO: Replace with pushFreezeToPreventPoisonFromPropagating fold. + if (N0.getOpcode() == ISD::BITCAST) + return DAG.getBitcast(N->getValueType(0), + DAG.getNode(ISD::FREEZE, SDLoc(N0), + N0.getOperand(0).getValueType(), + N0.getOperand(0))); + return SDValue(); } @@ -13377,7 +13874,7 @@ static bool isContractableFMUL(const TargetOptions &Options, SDValue N) { // Returns true if `N` can assume no infinities involved in its computation. static bool hasNoInfs(const TargetOptions &Options, SDValue N) { - return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs(); + return Options.NoInfsFPMath || N->getFlags().hasNoInfs(); } /// Try to perform FMA combining on a given FADD node. @@ -13431,7 +13928,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) { - if (N0.getNode()->use_size() > N1.getNode()->use_size()) + if (N0->use_size() > N1->use_size()) std::swap(N0, N1); } @@ -13661,7 +14158,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. if (isContractableFMUL(N0) && isContractableFMUL(N1) && - (N0.getNode()->use_size() > N1.getNode()->use_size())) { + (N0->use_size() > N1->use_size())) { // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b)) if (SDValue V = tryToFoldXSubYZ(N0, N1)) return V; @@ -14784,7 +15281,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { // fold (frem c1, c2) -> fmod(c1,c2) if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1})) return C; - + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -15107,7 +15604,7 @@ static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { // This means this is also safe for a signed input and unsigned output, since // a negative input would lead to undefined behavior. unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; - unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned; + unsigned OutputSize = (int)VT.getScalarSizeInBits(); unsigned ActualSize = std::min(InputSize, OutputSize); const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); @@ -15198,7 +15695,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) - if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { + if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) { SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorklist(Tmp.getNode()); @@ -15642,7 +16139,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail // out. There is no reason to make this a preinc/predec. if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || - Ptr.getNode()->hasOneUse()) + Ptr->hasOneUse()) return false; // Ask the target to do addressing mode selection. @@ -15702,8 +16199,8 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // a copy of the original base pointer. SmallVector OtherUses; if (isa(Offset)) - for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(), - UE = BasePtr.getNode()->use_end(); + for (SDNode::use_iterator UI = BasePtr->use_begin(), + UE = BasePtr->use_end(); UI != UE; ++UI) { SDUse &Use = UI.getUse(); // Skip the use that is Ptr and uses of other results from BasePtr's @@ -15741,7 +16238,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Now check for #3 and #4. bool RealUse = false; - for (SDNode *Use : Ptr.getNode()->uses()) { + for (SDNode *Use : Ptr->uses()) { if (Use == N) continue; if (SDNode::hasPredecessorHelper(Use, Visited, Worklist)) @@ -15774,7 +16271,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { ++PreIndexedNodes; ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; - Result.getNode()->dump(&DAG); dbgs() << '\n'); + Result.dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); @@ -15864,7 +16361,7 @@ static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, return false; SmallPtrSet Visited; - for (SDNode *Use : BasePtr.getNode()->uses()) { + for (SDNode *Use : BasePtr->uses()) { if (Use == Ptr.getNode()) continue; @@ -15901,7 +16398,7 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, const TargetLowering &TLI) { if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked, Ptr, TLI) || - Ptr.getNode()->hasOneUse()) + Ptr->hasOneUse()) return nullptr; // Try turning it into a post-indexed load / store except when @@ -15961,9 +16458,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; - LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); - dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: "; + Result.dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); @@ -16204,7 +16700,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Now we replace use of chain2 with chain1. This makes the second load // isomorphic to the one we are deleting, and thus makes this load live. LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); - dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); + dbgs() << "\nWith chain: "; Chain.dump(&DAG); dbgs() << "\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); @@ -16235,7 +16731,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } else Index = DAG.getUNDEF(N->getValueType(1)); LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); - dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); + dbgs() << "\nWith: "; Undef.dump(&DAG); dbgs() << " and 2 other values\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); @@ -16947,11 +17443,19 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type - // legalization (and the target doesn't explicitly think this is a bad idea). + // legalization. If the source type is legal, but the store type isn't, see + // if we can use a truncating store. MVT VT = MVT::getIntegerVT(NumBytes * 8); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!DC->isTypeLegal(VT)) + bool UseTruncStore; + if (DC->isTypeLegal(VT)) + UseTruncStore = false; + else if (TLI.isTypeLegal(IVal.getValueType()) && + TLI.isTruncStoreLegal(IVal.getValueType(), VT)) + UseTruncStore = true; + else return SDValue(); + // Check that the target doesn't think this is a bad idea. if (St->getMemOperand() && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, *St->getMemOperand())) @@ -16979,10 +17483,15 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL); } + ++OpsNarrowed; + if (UseTruncStore) + return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr, + St->getPointerInfo().getWithOffset(StOffset), + VT, St->getOriginalAlign()); + // Truncate down to the new size. IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); - ++OpsNarrowed; return DAG .getStore(St->getChain(), SDLoc(St), IVal, Ptr, St->getPointerInfo().getWithOffset(StOffset), @@ -17003,11 +17512,15 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { SDValue Ptr = ST->getBasePtr(); EVT VT = Value.getValueType(); - if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) + if (ST->isTruncatingStore() || VT.isVector()) return SDValue(); unsigned Opc = Value.getOpcode(); + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || + !Value.hasOneUse()) + return SDValue(); + // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst // is a byte mask indicating a consecutive number of bytes, check to see if // Y is known to provide just those bytes. If so, we try to replace the @@ -17032,8 +17545,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (!EnableReduceLoadOpStoreWidth) return SDValue(); - if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || - Value.getOperand(1).getOpcode() != ISD::Constant) + if (Value.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); SDValue N0 = Value.getOperand(0); @@ -17189,14 +17701,13 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { // (A + c1) * c3 // (A + c2) * c3 // We're checking for cases where we have common "c3 * A" expressions. -bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, - SDValue &AddNode, - SDValue &ConstNode) { +bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode, + SDValue ConstNode) { APInt Val; // If the add only has one use, and the target thinks the folding is // profitable or does not lead to worse code, this would be OK to do. - if (AddNode.getNode()->hasOneUse() && + if (AddNode->hasOneUse() && TLI.isMulAddWithConstProfitable(AddNode, ConstNode)) return true; @@ -17330,7 +17841,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( if (isa(Val)) { // Not clear how to truncate FP values. return false; - } else if (auto *C = dyn_cast(Val)) + } + + if (auto *C = dyn_cast(Val)) Val = DAG.getConstant(C->getAPIntValue() .zextOrTrunc(Val.getValueSizeInBits()) .zextOrTrunc(ElementSizeBits), @@ -17424,7 +17937,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( if (!UseTrunc) { NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), - FirstInChain->getAlign(), Flags.getValue(), AAInfo); + FirstInChain->getAlign(), *Flags, AAInfo); } else { // Must be realized as a trunc store EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); @@ -17436,7 +17949,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( NewStore = DAG.getTruncStore( NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, - FirstInChain->getAlign(), Flags.getValue(), AAInfo); + FirstInChain->getAlign(), *Flags, AAInfo); } // Replace all merged stores with the new store. @@ -17604,11 +18117,9 @@ void DAGCombiner::getStoreMergeCandidates( } } -// We need to check that merging these stores does not cause a loop in -// the DAG. Any store candidate may depend on another candidate -// indirectly through its operand (we already consider dependencies -// through the chain). Check in parallel by searching up from -// non-chain operands of candidates. +// We need to check that merging these stores does not cause a loop in the +// DAG. Any store candidate may depend on another candidate indirectly through +// its operands. Check in parallel by searching up from operands of candidates. bool DAGCombiner::checkMergeStoreCandidatesForDependencies( SmallVectorImpl &StoreNodes, unsigned NumStores, SDNode *RootNode) { @@ -17642,8 +18153,13 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( SDNode *N = StoreNodes[i].MemNode; // Of the 4 Store Operands: // * Chain (Op 0) -> We have already considered these - // in candidate selection and can be - // safely ignored + // in candidate selection, but only by following the + // chain dependencies. We could still have a chain + // dependency to a load, that has a non-chain dep to + // another load, that depends on a store, etc. So it is + // possible to have dependencies that consist of a mix + // of chain and non-chain deps, and we need to include + // chain operands in the analysis here.. // * Value (Op 1) -> Cycles may happen (e.g. through load chains) // * Address (Op 2) -> Merged addresses may only vary by a fixed constant, // but aren't necessarily fromt the same base node, so @@ -17651,7 +18167,7 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( // * (Op 3) -> Represents the pre or post-indexing offset (or undef for // non-indexed stores). Not constant on all targets (e.g. ARM) // and so can participate in a cycle. - for (unsigned j = 1; j < N->getNumOperands(); ++j) + for (unsigned j = 0; j < N->getNumOperands(); ++j) Worklist.push_back(N->getOperand(j).getNode()); } // Search through DAG. We can stop early if we find a store node. @@ -17726,7 +18242,7 @@ bool DAGCombiner::tryStoreMergeOfConstants( while (NumConsecutiveStores >= 2) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); + Align FirstStoreAlign = FirstInChain->getAlign(); unsigned LastLegalType = 1; unsigned LastLegalVectorType = 1; bool LastIntegerTrunc = false; @@ -17814,7 +18330,7 @@ bool DAGCombiner::tryStoreMergeOfConstants( unsigned NumSkip = 1; while ((NumSkip < NumConsecutiveStores) && (NumSkip < FirstZeroAfterNonZero) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign)) NumSkip++; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); @@ -17853,7 +18369,7 @@ bool DAGCombiner::tryStoreMergeOfExtracts( while (NumConsecutiveStores >= 2) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); + Align FirstStoreAlign = FirstInChain->getAlign(); unsigned NumStoresToMerge = 1; for (unsigned i = 0; i < NumConsecutiveStores; ++i) { // Find a legal type for the vector store. @@ -17884,7 +18400,7 @@ bool DAGCombiner::tryStoreMergeOfExtracts( // improved. Drop as many candidates as we can here. unsigned NumSkip = 1; while ((NumSkip < NumConsecutiveStores) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign)) NumSkip++; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); @@ -18181,7 +18697,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, for (unsigned i = 0; i < NumElem; ++i) { SDValue Val = StoreNodes[i].MemNode->getOperand(1); CombineTo(StoreNodes[i].MemNode, NewStore); - if (Val.getNode()->use_empty()) + if (Val->use_empty()) recursivelyDeleteUnusedNodes(Val.getNode()); } @@ -18331,6 +18847,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { default: llvm_unreachable("Unknown FP type"); case MVT::f16: // We don't do this for these yet. + case MVT::bf16: case MVT::f80: case MVT::f128: case MVT::ppcf128: @@ -18338,7 +18855,6 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { case MVT::f32: if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { - ; Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), SDLoc(CFP), MVT::i32); @@ -18350,7 +18866,6 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { - ; Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). getZExtValue(), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, DL, Tmp, @@ -18544,7 +19059,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // truncating store. We can do this even if this is already a truncstore. if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) && - Value.getNode()->hasOneUse() && ST->isUnindexed() && + Value->hasOneUse() && ST->isUnindexed() && TLI.canCombineTruncStore(Value.getOperand(0).getValueType(), ST->getMemoryVT(), LegalOperations)) { return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), @@ -18807,6 +19322,14 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { } } + // If we failed to find a match, see if we can replace an UNDEF shuffle + // operand. + if (ElementOffset == -1 && Y.isUndef() && + InsertVal0.getValueType() == Y.getValueType()) { + ElementOffset = Mask.size(); + Y = InsertVal0; + } + if (ElementOffset != -1) { SmallVector NewMask(Mask.begin(), Mask.end()); @@ -18905,10 +19428,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) { if (VT.isScalableVector()) return DAG.getSplatVector(VT, DL, InVal); - else { - SmallVector Ops(VT.getVectorNumElements(), InVal); - return DAG.getBuildVector(VT, DL, Ops); - } + + SmallVector Ops(VT.getVectorNumElements(), InVal); + return DAG.getBuildVector(VT, DL, Ops); } return SDValue(); } @@ -18920,9 +19442,19 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // We must know which element is being inserted for folds below here. unsigned Elt = IndexC->getZExtValue(); + if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) return Shuf; + // Handle <1 x ???> vector insertion special cases. + if (VT.getVectorNumElements() == 1) { + // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + InVal.getOperand(0).getValueType() == VT && + isNullConstant(InVal.getOperand(1))) + return InVal.getOperand(0); + } + // Canonicalize insert_vector_elt dag nodes. // Example: // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) @@ -18943,36 +19475,84 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } } - // If we can't generate a legal BUILD_VECTOR, exit - if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) - return SDValue(); + // Attempt to fold the insertion into a legal BUILD_VECTOR. + if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { + auto UpdateBuildVector = [&](SmallVectorImpl &Ops) { + assert(Ops.size() == NumElts && "Unexpected vector size"); - // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially - // be converted to a BUILD_VECTOR). Fill in the Ops vector with the - // vector elements. - SmallVector Ops; - // Do not combine these two vectors if the output vector will not replace - // the input vector. - if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { - Ops.append(InVec.getNode()->op_begin(), - InVec.getNode()->op_end()); - } else if (InVec.isUndef()) { - Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType())); - } else { - return SDValue(); - } - assert(Ops.size() == NumElts && "Unexpected vector size"); + // Insert the element + if (Elt < Ops.size()) { + // All the operands of BUILD_VECTOR must have the same type; + // we enforce that here. + EVT OpVT = Ops[0].getValueType(); + Ops[Elt] = + OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal; + } + + // Return the new vector + return DAG.getBuildVector(VT, DL, Ops); + }; + + // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially + // be converted to a BUILD_VECTOR). Fill in the Ops vector with the + // vector elements. + SmallVector Ops; + + // Do not combine these two vectors if the output vector will not replace + // the input vector. + if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { + Ops.append(InVec->op_begin(), InVec->op_end()); + return UpdateBuildVector(Ops); + } + + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.hasOneUse()) { + Ops.push_back(InVec.getOperand(0)); + Ops.append(NumElts - 1, DAG.getUNDEF(InVec.getOperand(0).getValueType())); + return UpdateBuildVector(Ops); + } + + if (InVec.isUndef()) { + Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType())); + return UpdateBuildVector(Ops); + } + + // If we're inserting into the end of a vector as part of an sequence, see + // if we can create a BUILD_VECTOR by following the sequence back up the + // chain. + if (Elt == (NumElts - 1)) { + SmallVector ReverseInsertions; + ReverseInsertions.push_back(InVal); + + EVT MaxEltVT = InVal.getValueType(); + SDValue CurVec = InVec; + for (unsigned I = 1; I != NumElts; ++I) { + if (CurVec.getOpcode() != ISD::INSERT_VECTOR_ELT || !CurVec.hasOneUse()) + break; + + auto *CurIdx = dyn_cast(CurVec.getOperand(2)); + if (!CurIdx || CurIdx->getAPIntValue() != ((NumElts - 1) - I)) + break; + SDValue CurVal = CurVec.getOperand(1); + ReverseInsertions.push_back(CurVal); + if (VT.isInteger()) { + EVT CurValVT = CurVal.getValueType(); + MaxEltVT = MaxEltVT.bitsGE(CurValVT) ? MaxEltVT : CurValVT; + } + CurVec = CurVec.getOperand(0); + } - // Insert the element - if (Elt < Ops.size()) { - // All the operands of BUILD_VECTOR must have the same type; - // we enforce that here. - EVT OpVT = Ops[0].getValueType(); - Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal; + if (ReverseInsertions.size() == NumElts) { + for (unsigned I = 0; I != NumElts; ++I) { + SDValue Val = ReverseInsertions[(NumElts - 1) - I]; + Val = VT.isInteger() ? DAG.getAnyExtOrTrunc(Val, DL, MaxEltVT) : Val; + Ops.push_back(Val); + } + return DAG.getBuildVector(VT, DL, Ops); + } + } } - // Return the new vector - return DAG.getBuildVector(VT, DL, Ops); + return SDValue(); } SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, @@ -19021,47 +19601,33 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo); - // The replacement we need to do here is a little tricky: we need to - // replace an extractelement of a load with a load. - // Use ReplaceAllUsesOfValuesWith to do the replacement. - // Note that this replacement assumes that the extractvalue is the only - // use of the load; that's okay because we don't want to perform this - // transformation in other cases anyway. + // We are replacing a vector load with a scalar load. The new load must have + // identical memory op ordering to the original. SDValue Load; - SDValue Chain; if (ResultVT.bitsGT(VecEltVT)) { // If the result type of vextract is wider than the load, then issue an // extending load instead. - ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, - VecEltVT) - ? ISD::ZEXTLOAD - : ISD::EXTLOAD; - Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, - OriginalLoad->getChain(), NewPtr, MPI, VecEltVT, - Alignment, OriginalLoad->getMemOperand()->getFlags(), + ISD::LoadExtType ExtType = + TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(), + NewPtr, MPI, VecEltVT, Alignment, + OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo()); - Chain = Load.getValue(1); + DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load); } else { - Load = DAG.getLoad( - VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment, - OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo()); - Chain = Load.getValue(1); + // The result type is narrower or the same width as the vector element + Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI, + Alignment, OriginalLoad->getMemOperand()->getFlags(), + OriginalLoad->getAAInfo()); + DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load); if (ResultVT.bitsLT(VecEltVT)) - Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); + Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load); else Load = DAG.getBitcast(ResultVT, Load); } - WorklistRemover DeadNodes(*this); - SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; - SDValue To[] = { Load, Chain }; - DAG.ReplaceAllUsesOfValuesWith(From, To, 2); - // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorklist(EVE); - // Since we're explicitly calling ReplaceAllUses, add the new node to the - // worklist explicitly as well. - AddToWorklistWithUsers(Load.getNode()); ++OpsNarrowed; - return SDValue(EVE, 0); + return Load; } /// Transform a vector binary operation into a scalar binary operation by moving @@ -19073,7 +19639,7 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, SDValue Index = ExtElt->getOperand(1); auto *IndexC = dyn_cast(Index); if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() || - Vec.getNode()->getNumValues() != 1) + Vec->getNumValues() != 1) return SDValue(); // Targets may want to avoid this to prevent an expensive register transfer. @@ -19129,8 +19695,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // EXTRACT_VECTOR_ELT may widen the extracted vector. SDValue InOp = VecOp.getOperand(0); if (InOp.getValueType() != ScalarVT) { - assert(InOp.getValueType().isInteger() && ScalarVT.isInteger()); - return DAG.getSExtOrTrunc(InOp, DL, ScalarVT); + assert(InOp.getValueType().isInteger() && ScalarVT.isInteger() && + InOp.getValueType().bitsGT(ScalarVT)); + return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp); } return InOp; } @@ -19588,7 +20155,7 @@ SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) { if (!isa(ShiftAmtVal)) return SDValue(); - uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1); + uint64_t ShiftAmt = In.getConstantOperandVal(1); // The extracted value is not extracted at the right position if (ShiftAmt != i * ScalarTypeBitsize) @@ -20029,18 +20596,39 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { int Left = 2 * In; int Right = 2 * In + 1; SmallVector Mask(NumElems, -1); - for (unsigned i = 0; i != NumElems; ++i) { - if (VectorMask[i] == Left) { - Mask[i] = i; - VectorMask[i] = In; - } else if (VectorMask[i] == Right) { - Mask[i] = i + NumElems; - VectorMask[i] = In; + SDValue L = Shuffles[Left]; + ArrayRef LMask; + bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE && + L.use_empty() && L.getOperand(1).isUndef() && + L.getOperand(0).getValueType() == L.getValueType(); + if (IsLeftShuffle) { + LMask = cast(L.getNode())->getMask(); + L = L.getOperand(0); + } + SDValue R = Shuffles[Right]; + ArrayRef RMask; + bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE && + R.use_empty() && R.getOperand(1).isUndef() && + R.getOperand(0).getValueType() == R.getValueType(); + if (IsRightShuffle) { + RMask = cast(R.getNode())->getMask(); + R = R.getOperand(0); + } + for (unsigned I = 0; I != NumElems; ++I) { + if (VectorMask[I] == Left) { + Mask[I] = I; + if (IsLeftShuffle) + Mask[I] = LMask[I]; + VectorMask[I] = In; + } else if (VectorMask[I] == Right) { + Mask[I] = I + NumElems; + if (IsRightShuffle) + Mask[I] = RMask[I] + NumElems; + VectorMask[I] = In; } } - Shuffles[In] = - DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask); + Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask); } } return Shuffles[0]; @@ -20628,7 +21216,7 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue BinOp = Extract->getOperand(0); unsigned BinOpcode = BinOp.getOpcode(); - if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1) + if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1) return SDValue(); EVT VecVT = BinOp.getValueType(); @@ -20677,7 +21265,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0)); unsigned BOpcode = BinOp.getOpcode(); - if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1) + if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1) return SDValue(); // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be @@ -20736,8 +21324,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, BinOp.getOperand(0), NewExtIndex); SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, BinOp.getOperand(1), NewExtIndex); - SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, - BinOp.getNode()->getFlags()); + SDValue NarrowBinOp = + DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags()); return DAG.getBitcast(VT, NarrowBinOp); } @@ -21018,6 +21606,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { } } + // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V) + if (V.getOpcode() == ISD::SPLAT_VECTOR) + if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse()) + if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT)) + return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0)); + // Try to move vector bitcast after extract_subv by scaling extraction index: // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') if (V.getOpcode() == ISD::BITCAST && @@ -21383,9 +21977,10 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); if (SVT != VT.getScalarType()) for (SDValue &Op : Ops) - Op = TLI.isZExtFree(Op.getValueType(), SVT) - ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) - : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT); + Op = Op.isUndef() ? DAG.getUNDEF(SVT) + : (TLI.isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) + : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT)); return DAG.getBuildVector(VT, SDLoc(SVN), Ops); } @@ -21515,6 +22110,13 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { if (!Shuf->getOperand(1).isUndef()) return SDValue(); + + // If the inner operand is a known splat with no undefs, just return that directly. + // TODO: Create DemandedElts mask from Shuf's mask. + // TODO: Allow undef elements and merge with the shuffle code below. + if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false)) + return Shuf->getOperand(0); + auto *Splat = dyn_cast(Shuf->getOperand(0)); if (!Splat || !Splat->isSplat()) return SDValue(); @@ -21561,6 +22163,53 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, NewMask); } +// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing +// the mask can be treated as a larger type. +static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalOperations) { + SDValue Op0 = SVN->getOperand(0); + SDValue Op1 = SVN->getOperand(1); + EVT VT = SVN->getValueType(0); + if (Op0.getOpcode() != ISD::BITCAST) + return SDValue(); + EVT InVT = Op0.getOperand(0).getValueType(); + if (!InVT.isVector() || + (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST || + Op1.getOperand(0).getValueType() != InVT))) + return SDValue(); + if (isAnyConstantBuildVector(Op0.getOperand(0)) && + (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0)))) + return SDValue(); + + int VTLanes = VT.getVectorNumElements(); + int InLanes = InVT.getVectorNumElements(); + if (VTLanes <= InLanes || VTLanes % InLanes != 0 || + (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, InVT))) + return SDValue(); + int Factor = VTLanes / InLanes; + + // Check that each group of lanes in the mask are either undef or make a valid + // mask for the wider lane type. + ArrayRef Mask = SVN->getMask(); + SmallVector NewMask; + if (!widenShuffleMaskElts(Factor, Mask, NewMask)) + return SDValue(); + + if (!TLI.isShuffleMaskLegal(NewMask, InVT)) + return SDValue(); + + // Create the new shuffle with the new mask and bitcast it back to the + // original type. + SDLoc DL(SVN); + Op0 = Op0.getOperand(0); + Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0); + SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask); + return DAG.getBitcast(VT, NewShuf); +} + /// Combine shuffle of shuffle of the form: /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, @@ -21772,7 +22421,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { int SplatIndex = SVN->getSplatIndex(); if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) && - TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) { + TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) { // splat (vector_bo L, R), Index --> // splat (scalar_bo (extelt L, Index), (extelt R, Index)) SDValue L = N0.getOperand(0), R = N0.getOperand(1); @@ -21781,13 +22430,26 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL); SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index); SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index); - SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, - N0.getNode()->getFlags()); + SDValue NewBO = + DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags()); SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO); SmallVector ZeroMask(VT.getVectorNumElements(), 0); return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask); } + // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x) + // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x) + if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) && + N0.hasOneUse()) { + if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0) + return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0)); + + if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT) + if (auto *Idx = dyn_cast(N0.getOperand(2))) + if (Idx->getAPIntValue() == SplatIndex) + return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1)); + } + // If this is a bit convert that changes the element type of the vector but // not the number of vector elements, look through it. Be careful not to // look though conversions that change things like v4f32 to v2f64. @@ -22011,6 +22673,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + // Match shuffles of bitcasts, so long as the mask can be treated as the + // larger type. + if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations)) + return V; + // Compute the combined shuffle mask for a shuffle with SV0 as the first // operand, and SV1 as the second operand. // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false @@ -22342,6 +23009,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) return N1.getOperand(0); + // Simplify scalar inserts into an undef vector: + // insert_subvector undef, (splat X), N2 -> splat X + if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR) + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0)); + // If we are inserting a bitcast value into an undef, with the same // number of elements, just use the bitcast input of the extract. // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 -> @@ -22489,6 +23161,16 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold (fp_to_bf16 (bf16_to_fp op)) -> op + if (N0->getOpcode() == ISD::BF16_TO_FP) + return N0->getOperand(0); + + return SDValue(); +} + SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N0.getValueType(); @@ -22516,6 +23198,19 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0); } + // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val) + // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val) + if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && + TLI.isTypeLegal(N0.getOperand(1).getValueType())) { + SDValue Vec = N0.getOperand(0); + SDValue Subvec = N0.getOperand(1); + if ((Opcode == ISD::VECREDUCE_OR && + (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) || + (Opcode == ISD::VECREDUCE_AND && + (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec)))) + return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec); + } + return SDValue(); } @@ -22819,7 +23514,7 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, // Check to see if we got a select_cc back (to turn into setcc/select). // Otherwise, just return whatever node we got back, like fabs. if (SCC.getOpcode() == ISD::SELECT_CC) { - const SDNodeFlags Flags = N0.getNode()->getFlags(); + const SDNodeFlags Flags = N0->getFlags(); SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), N0.getValueType(), SCC.getOperand(0), SCC.getOperand(1), @@ -23489,6 +24184,27 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return SDValue(); } +/// Given an ISD::SREM node expressing a remainder by constant power of 2, +/// return a DAG expression that will generate the same value. +SDValue DAGCombiner::BuildSREMPow2(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (C->isZero()) + return SDValue(); + + SmallVector Built; + if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) { + for (SDNode *N : Built) + AddToWorklist(N); + return S; + } + + return SDValue(); +} + /// Determines the LogBase2 value for a non-null input value using the /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { @@ -23798,9 +24514,8 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { auto &Size0 = MUC0.NumBytes; auto &Size1 = MUC1.NumBytes; if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && - Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 && - OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 && - SrcValOffset1 % *Size1 == 0) { + Size0 && Size1 && *Size0 == *Size1 && OrigAlignment0 > *Size0 && + SrcValOffset0 % *Size0 == 0 && SrcValOffset1 % *Size1 == 0) { int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value(); int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value(); @@ -23819,8 +24534,8 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { UseAA = false; #endif - if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && - Size0.hasValue() && Size1.hasValue()) { + if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0 && + Size1) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset; @@ -23853,7 +24568,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, unsigned Depth = 0; // Attempt to improve chain by a single step - std::function ImproveChain = [&](SDValue &C) -> bool { + auto ImproveChain = [&](SDValue &C) -> bool { switch (C.getOpcode()) { case ISD::EntryToken: // No need to mark EntryToken. diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index d8ef79fe9a7b..ff5779967e22 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -72,7 +72,6 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" @@ -94,7 +93,6 @@ #include "llvm/IR/Value.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -1265,7 +1263,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // If using instruction referencing, mutate this into a DBG_INSTR_REF, // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto // the expression, we don't have an "indirect" flag in DBG_INSTR_REF. - if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) { + if (UseInstrRefDebugInfo && Op->isReg()) { Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF)); Builder->getOperand(1).ChangeToImmediate(0); auto *NewExpr = @@ -1324,7 +1322,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // If using instruction referencing, mutate this into a DBG_INSTR_REF, // to be later patched up by finalizeDebugInstrRefs. - if (FuncInfo.MF->useDebugInstrRef()) { + if (UseInstrRefDebugInfo) { Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF)); Builder->getOperand(1).ChangeToImmediate(0); } @@ -1408,16 +1406,6 @@ bool FastISel::selectCast(const User *I, unsigned Opcode) { } bool FastISel::selectBitCast(const User *I) { - // If the bitcast doesn't change the type, just use the operand value. - if (I->getType() == I->getOperand(0)->getType()) { - Register Reg = getRegForValue(I->getOperand(0)); - if (!Reg) - return false; - updateValueMap(I, Reg); - return true; - } - - // Bitcasts of other values become reg-reg copies or BITCAST operators. EVT SrcEVT = TLI.getValueType(DL, I->getOperand(0)->getType()); EVT DstEVT = TLI.getValueType(DL, I->getType()); if (SrcEVT == MVT::Other || DstEVT == MVT::Other || @@ -1431,23 +1419,14 @@ bool FastISel::selectBitCast(const User *I) { if (!Op0) // Unhandled operand. Halt "fast" selection and bail. return false; - // First, try to perform the bitcast by inserting a reg-reg copy. - Register ResultReg; + // If the bitcast doesn't change the type, just use the operand value. if (SrcVT == DstVT) { - const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT); - const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT); - // Don't attempt a cross-class copy. It will likely fail. - if (SrcClass == DstClass) { - ResultReg = createResultReg(DstClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0); - } + updateValueMap(I, Op0); + return true; } - // If the reg-reg copy failed, select a BITCAST opcode. - if (!ResultReg) - ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0); - + // Otherwise, select a BITCAST opcode. + Register ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0); if (!ResultReg) return false; @@ -2251,6 +2230,11 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { if (!MRI.hasOneUse(LoadReg)) return false; + // If the register has fixups, there may be additional uses through a + // different alias of the register. + if (FuncInfo.RegsWithFixups.contains(LoadReg)) + return false; + MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg); MachineInstr *User = RI->getParent(); diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 85c6eca5775e..aa9c77f9cabf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -31,13 +31,10 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetOptions.h" #include using namespace llvm; @@ -57,7 +54,7 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { return false; } -static ISD::NodeType getPreferredExtendForValue(const Value *V) { +static ISD::NodeType getPreferredExtendForValue(const Instruction *I) { // For the users of the source value being used for compare instruction, if // the number of signed predicate is greater than unsigned predicate, we // prefer to use SIGN_EXTEND. @@ -67,7 +64,7 @@ static ISD::NodeType getPreferredExtendForValue(const Value *V) { // can be exposed. ISD::NodeType ExtendKind = ISD::ANY_EXTEND; unsigned NumOfSigned = 0, NumOfUnsigned = 0; - for (const User *U : V->users()) { + for (const User *U : I->users()) { if (const auto *CI = dyn_cast(U)) { NumOfSigned += CI->isSigned(); NumOfUnsigned += CI->isUnsigned(); @@ -448,9 +445,14 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT); unsigned BitWidth = IntVT.getSizeInBits(); - Register DestReg = ValueMap[PN]; - if (!Register::isVirtualRegister(DestReg)) + auto It = ValueMap.find(PN); + if (It == ValueMap.end()) return; + + Register DestReg = It->second; + if (DestReg == 0) + return + assert(Register::isVirtualRegister(DestReg) && "Expected a virtual reg"); LiveOutRegInfo.grow(DestReg); LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg]; @@ -462,7 +464,11 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { } if (ConstantInt *CI = dyn_cast(V)) { - APInt Val = CI->getValue().zextOrTrunc(BitWidth); + APInt Val; + if (TLI->signExtendConstant(CI)) + Val = CI->getValue().sext(BitWidth); + else + Val = CI->getValue().zext(BitWidth); DestLOI.NumSignBits = Val.getNumSignBits(); DestLOI.Known = KnownBits::makeConstant(Val); } else { @@ -494,7 +500,11 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { } if (ConstantInt *CI = dyn_cast(V)) { - APInt Val = CI->getValue().zextOrTrunc(BitWidth); + APInt Val; + if (TLI->signExtendConstant(CI)) + Val = CI->getValue().sext(BitWidth); + else + Val = CI->getValue().zext(BitWidth); DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits()); DestLOI.Known.Zero &= ~Val; DestLOI.Known.One &= Val; diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 331e0325aea3..3d3b504c6abd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -14,22 +14,18 @@ #include "InstrEmitter.h" #include "SDNodeDbgValue.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/PseudoProbe.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -321,8 +317,15 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, OpRC = TII->getRegClass(*II, IIOpNum, TRI, *MF); if (OpRC) { + unsigned MinNumRegs = MinRCSize; + // Don't apply any RC size limit for IMPLICIT_DEF. Each use has a unique + // virtual register. + if (Op.isMachineOpcode() && + Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) + MinNumRegs = 0; + const TargetRegisterClass *ConstrainedRC - = MRI->constrainRegClass(VReg, OpRC, MinRCSize); + = MRI->constrainRegClass(VReg, OpRC, MinNumRegs); if (!ConstrainedRC) { OpRC = TRI->getAllocatableClass(OpRC); assert(OpRC && "Constraints cannot be fulfilled for allocation"); @@ -1341,11 +1344,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, /// InstrEmitter - Construct an InstrEmitter and set it to start inserting /// at the given position in the given block. InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb, - MachineBasicBlock::iterator insertpos) + MachineBasicBlock::iterator insertpos, + bool UseInstrRefDebugInfo) : MF(mbb->getParent()), MRI(&MF->getRegInfo()), TII(MF->getSubtarget().getInstrInfo()), TRI(MF->getSubtarget().getRegisterInfo()), TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb), InsertPos(insertpos) { - EmitDebugInstrRefs = MF->useDebugInstrRef(); + EmitDebugInstrRefs = UseInstrRefDebugInfo; } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index ac8a70156522..ced8f064b9be 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -154,7 +154,8 @@ public: /// InstrEmitter - Construct an InstrEmitter and set it to start inserting /// at the given position in the given block. InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb, - MachineBasicBlock::iterator insertpos); + MachineBasicBlock::iterator insertpos, + bool UseInstrRefDebugInfo); private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 54481b94fdd8..8bdc9410d131 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -45,7 +46,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include #include #include #include @@ -142,12 +142,10 @@ private: RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128, SmallVectorImpl &Results); - SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, - RTLIB::Libcall Call_I8, - RTLIB::Libcall Call_I16, - RTLIB::Libcall Call_I32, - RTLIB::Libcall Call_I64, - RTLIB::Libcall Call_I128); + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128, + RTLIB::Libcall Call_IEXT); void ExpandArgFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, @@ -1000,6 +998,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; case ISD::FP_TO_FP16: + case ISD::FP_TO_BF16: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::EXTRACT_VECTOR_ELT: @@ -1036,14 +1035,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: case ISD::SETCC: + case ISD::VP_SETCC: case ISD::BR_CC: { - unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : - Node->getOpcode() == ISD::STRICT_FSETCC ? 3 : - Node->getOpcode() == ISD::STRICT_FSETCCS ? 3 : - Node->getOpcode() == ISD::SETCC ? 2 : 1; - unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : - Node->getOpcode() == ISD::STRICT_FSETCC ? 1 : - Node->getOpcode() == ISD::STRICT_FSETCCS ? 1 : 0; + unsigned Opc = Node->getOpcode(); + unsigned CCOperand = Opc == ISD::SELECT_CC ? 4 + : Opc == ISD::STRICT_FSETCC ? 3 + : Opc == ISD::STRICT_FSETCCS ? 3 + : (Opc == ISD::SETCC || Opc == ISD::VP_SETCC) ? 2 + : 1; + unsigned CompareOperand = Opc == ISD::BR_CC ? 2 + : Opc == ISD::STRICT_FSETCC ? 1 + : Opc == ISD::STRICT_FSETCCS ? 1 + : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = cast(Node->getOperand(CCOperand))->get(); @@ -1174,6 +1177,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Node->getOpcode(), cast(Node)->getValue().getValueType()); break; + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + Action = TLI.getOperationAction( + Node->getOpcode(), + cast(Node)->getValue().getValueType()); + break; case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -1187,6 +1195,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::VECREDUCE_UMIN: case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMIN: + case ISD::IS_FPCLASS: Action = TLI.getOperationAction( Node->getOpcode(), Node->getOperand(0).getValueType()); break; @@ -1212,7 +1221,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { - Action = TargetLowering::Legal; + Action = TLI.getCustomOperationAction(*Node); } else { Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); } @@ -1723,16 +1732,14 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl, SDValue Chain) { - unsigned SrcSize = SrcOp.getValueSizeInBits(); - unsigned SlotSize = SlotVT.getSizeInBits(); - unsigned DestSize = DestVT.getSizeInBits(); + EVT SrcVT = SrcOp.getValueType(); Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); Align DestAlign = DAG.getDataLayout().getPrefTypeAlign(DestType); // Don't convert with stack if the load/store is expensive. - if ((SrcSize > SlotSize && + if ((SrcVT.bitsGT(SlotVT) && !TLI.isTruncStoreLegalOrCustom(SrcOp.getValueType(), SlotVT)) || - (SlotSize < DestSize && + (SlotVT.bitsLT(DestVT) && !TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, DestVT, SlotVT))) return SDValue(); @@ -1750,20 +1757,19 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, // later than DestVT. SDValue Store; - if (SrcSize > SlotSize) + if (SrcVT.bitsGT(SlotVT)) Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SlotVT, SrcAlign); else { - assert(SrcSize == SlotSize && "Invalid store"); - Store = - DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign); + assert(SrcVT.bitsEq(SlotVT) && "Invalid store"); + Store = DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign); } // Result is a load from the stack slot. - if (SlotSize == DestSize) + if (SlotVT.bitsEq(DestVT)) return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign); - assert(SlotSize < DestSize && "Unknown extension!"); + assert(SlotVT.bitsLT(DestVT) && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT, DestAlign); } @@ -2101,15 +2107,17 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, ExpandFPLibCall(Node, LC, Results); } -SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, - RTLIB::Libcall Call_I8, - RTLIB::Libcall Call_I16, - RTLIB::Libcall Call_I32, - RTLIB::Libcall Call_I64, - RTLIB::Libcall Call_I128) { +SDValue SelectionDAGLegalize::ExpandIntLibCall( + SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128, RTLIB::Libcall Call_IEXT) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); + + default: + LC = Call_IEXT; + break; + case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; @@ -2144,7 +2152,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); + + default: + LC = isSigned ? RTLIB::SDIVREM_IEXT : RTLIB::UDIVREM_IEXT; + break; + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; @@ -2893,6 +2905,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getValueType(0), dl))) Results.push_back(Tmp1); break; + case ISD::BF16_TO_FP: { + // Always expand bf16 to f32 casts, they lower to ext + shift. + SDValue Op = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Node->getOperand(0)); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op); + Op = DAG.getNode( + ISD::SHL, dl, MVT::i32, Op, + DAG.getConstant(16, dl, + TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout()))); + Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op); + Results.push_back(Op); + break; + } case ISD::SIGN_EXTEND_INREG: { EVT ExtraVT = cast(Node->getOperand(1))->getVT(); EVT VT = Node->getValueType(0); @@ -2904,7 +2928,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // SIGN_EXTEND_INREG does not guarantee that the high bits are already zero. // TODO: Do this for vectors too? - if (ExtraVT.getSizeInBits() == 1) { + if (ExtraVT.isScalarInteger() && ExtraVT.getSizeInBits() == 1) { SDValue One = DAG.getConstant(1, dl, VT); SDValue And = DAG.getNode(ISD::AND, dl, VT, Node->getOperand(0), One); SDValue Zero = DAG.getConstant(0, dl, VT); @@ -3135,6 +3159,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::FABS: Results.push_back(ExpandFABS(Node)); break; + case ISD::IS_FPCLASS: { + auto CNode = cast(Node->getOperand(1)); + auto Test = static_cast(CNode->getZExtValue()); + if (SDValue Expanded = + TLI.expandIS_FPCLASS(Node->getValueType(0), Node->getOperand(0), + Test, Node->getFlags(), SDLoc(Node), DAG)) + Results.push_back(Expanded); + break; + } case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -3577,18 +3610,26 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; case ISD::SETCC: + case ISD::VP_SETCC: case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: { - bool IsStrict = Node->getOpcode() != ISD::SETCC; + bool IsVP = Node->getOpcode() == ISD::VP_SETCC; + bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC || + Node->getOpcode() == ISD::STRICT_FSETCCS; bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS; SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue(); unsigned Offset = IsStrict ? 1 : 0; Tmp1 = Node->getOperand(0 + Offset); Tmp2 = Node->getOperand(1 + Offset); Tmp3 = Node->getOperand(2 + Offset); - bool Legalized = - TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3, - NeedInvert, dl, Chain, IsSignaling); + SDValue Mask, EVL; + if (IsVP) { + Mask = Node->getOperand(3 + Offset); + EVL = Node->getOperand(4 + Offset); + } + bool Legalized = TLI.LegalizeSetCCCondCode( + DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Mask, EVL, NeedInvert, dl, + Chain, IsSignaling); if (Legalized) { // If we expanded the SETCC by swapping LHS and RHS, or by inverting the @@ -3598,6 +3639,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(), {Chain, Tmp1, Tmp2, Tmp3}, Node->getFlags()); Chain = Tmp1.getValue(1); + } else if (IsVP) { + Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), + {Tmp1, Tmp2, Tmp3, Mask, EVL}, Node->getFlags()); } else { Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Node->getFlags()); @@ -3606,8 +3650,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If we expanded the SETCC by inverting the condition code, then wrap // the existing SETCC in a NOT to restore the intended condition. - if (NeedInvert) - Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0)); + if (NeedInvert) { + if (!IsVP) + Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0)); + else + Tmp1 = + DAG.getVPLogicalNOT(dl, Tmp1, Mask, EVL, Tmp1->getValueType(0)); + } Results.push_back(Tmp1); if (IsStrict) @@ -3622,21 +3671,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Otherwise, SETCC for the given comparison type must be completely // illegal; expand it into a SELECT_CC. + // FIXME: This drops the mask/evl for VP_SETCC. EVT VT = Node->getValueType(0); - int TrueValue; - switch (TLI.getBooleanContents(Tmp1.getValueType())) { - case TargetLowering::ZeroOrOneBooleanContent: - case TargetLowering::UndefinedBooleanContent: - TrueValue = 1; - break; - case TargetLowering::ZeroOrNegativeOneBooleanContent: - TrueValue = -1; - break; - } + EVT Tmp1VT = Tmp1.getValueType(); Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, - DAG.getConstant(TrueValue, dl, VT), - DAG.getConstant(0, dl, VT), - Tmp3); + DAG.getBoolConstant(true, dl, VT, Tmp1VT), + DAG.getBoolConstant(false, dl, VT, Tmp1VT), Tmp3); Tmp1->setFlags(Node->getFlags()); Results.push_back(Tmp1); break; @@ -3692,7 +3732,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (!Legalized) { Legalized = TLI.LegalizeSetCCCondCode( DAG, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, - NeedInvert, dl, Chain); + /*Mask*/ SDValue(), /*EVL*/ SDValue(), NeedInvert, dl, Chain); assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); @@ -3725,9 +3765,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = Node->getOperand(3); // RHS Tmp4 = Node->getOperand(1); // CC - bool Legalized = - TLI.LegalizeSetCCCondCode(DAG, getSetCCResultType(Tmp2.getValueType()), - Tmp2, Tmp3, Tmp4, NeedInvert, dl, Chain); + bool Legalized = TLI.LegalizeSetCCCondCode( + DAG, getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, + /*Mask*/ SDValue(), /*EVL*/ SDValue(), NeedInvert, dl, Chain); (void)Legalized; assert(Legalized && "Can't legalize BR_CC with legal condition!"); @@ -4068,12 +4108,25 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi."); if (!TLI.getLibcallName(LC)) { // Some targets don't have a powi libcall; use pow instead. - SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), - Node->getValueType(0), - Node->getOperand(1)); - Results.push_back(DAG.getNode(ISD::FPOW, SDLoc(Node), - Node->getValueType(0), Node->getOperand(0), - Exponent)); + if (Node->isStrictFPOpcode()) { + SDValue Exponent = + DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(Node), + {Node->getValueType(0), Node->getValueType(1)}, + {Node->getOperand(0), Node->getOperand(2)}); + SDValue FPOW = + DAG.getNode(ISD::STRICT_FPOW, SDLoc(Node), + {Node->getValueType(0), Node->getValueType(1)}, + {Exponent.getValue(1), Node->getOperand(1), Exponent}); + Results.push_back(FPOW); + Results.push_back(FPOW.getValue(1)); + } else { + SDValue Exponent = + DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), Node->getValueType(0), + Node->getOperand(1)); + Results.push_back(DAG.getNode(ISD::FPOW, SDLoc(Node), + Node->getValueType(0), + Node->getOperand(0), Exponent)); + } break; } unsigned Offset = Node->isStrictFPOpcode() ? 1 : 0; @@ -4176,6 +4229,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { Results.push_back(ExpandLibCall(LC, Node, false)); break; } + case ISD::FP_TO_BF16: { + RTLIB::Libcall LC = + RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::bf16); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_bf16"); + Results.push_back(ExpandLibCall(LC, Node, false)); + break; + } case ISD::STRICT_SINT_TO_FP: case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: @@ -4315,28 +4375,24 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::SUB_PPCF128, Results); break; case ISD::SREM: - Results.push_back(ExpandIntLibCall(Node, true, - RTLIB::SREM_I8, - RTLIB::SREM_I16, RTLIB::SREM_I32, - RTLIB::SREM_I64, RTLIB::SREM_I128)); + Results.push_back(ExpandIntLibCall( + Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128, RTLIB::SREM_IEXT)); break; case ISD::UREM: - Results.push_back(ExpandIntLibCall(Node, false, - RTLIB::UREM_I8, - RTLIB::UREM_I16, RTLIB::UREM_I32, - RTLIB::UREM_I64, RTLIB::UREM_I128)); + Results.push_back(ExpandIntLibCall( + Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128, RTLIB::UREM_IEXT)); break; case ISD::SDIV: - Results.push_back(ExpandIntLibCall(Node, true, - RTLIB::SDIV_I8, - RTLIB::SDIV_I16, RTLIB::SDIV_I32, - RTLIB::SDIV_I64, RTLIB::SDIV_I128)); + Results.push_back(ExpandIntLibCall( + Node, true, RTLIB::SDIV_I8, RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128, RTLIB::SDIV_IEXT)); break; case ISD::UDIV: - Results.push_back(ExpandIntLibCall(Node, false, - RTLIB::UDIV_I8, - RTLIB::UDIV_I16, RTLIB::UDIV_I32, - RTLIB::UDIV_I64, RTLIB::UDIV_I128)); + Results.push_back(ExpandIntLibCall( + Node, false, RTLIB::UDIV_I8, RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128, RTLIB::UDIV_IEXT)); break; case ISD::SDIVREM: case ISD::UDIVREM: @@ -4344,10 +4400,9 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { ExpandDivRemLibCall(Node, Results); break; case ISD::MUL: - Results.push_back(ExpandIntLibCall(Node, false, - RTLIB::MUL_I8, - RTLIB::MUL_I16, RTLIB::MUL_I32, - RTLIB::MUL_I64, RTLIB::MUL_I128)); + Results.push_back(ExpandIntLibCall( + Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128, RTLIB::MUL_IEXT)); break; case ISD::CTLZ_ZERO_UNDEF: switch (Node->getSimpleValueType(0).SimpleTy) { @@ -4700,6 +4755,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl))); break; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FMAXNUM: case ISD::STRICT_FREM: case ISD::STRICT_FPOW: Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, @@ -4724,6 +4785,22 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), DAG.getIntPtrConstant(0, dl))); break; + case ISD::STRICT_FMA: + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(2)}); + Tmp3 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(3)}); + Tmp4 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1.getValue(1), + Tmp2.getValue(1), Tmp3.getValue(1)); + Tmp4 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp4, Tmp1, Tmp2, Tmp3}); + Tmp4 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp4.getValue(1), Tmp4, DAG.getIntPtrConstant(0, dl)}); + Results.push_back(Tmp4); + Results.push_back(Tmp4.getValue(1)); + break; case ISD::FCOPYSIGN: case ISD::FPOWI: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); @@ -4740,6 +4817,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); break; } + case ISD::STRICT_FPOWI: + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp1.getValue(1), Tmp1, Node->getOperand(2)}); + Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)}); + Results.push_back(Tmp3); + Results.push_back(Tmp3.getValue(1)); + break; case ISD::FFLOOR: case ISD::FCEIL: case ISD::FRINT: @@ -4764,12 +4851,19 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { break; case ISD::STRICT_FFLOOR: case ISD::STRICT_FCEIL: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FROUND: + case ISD::STRICT_FROUNDEVEN: + case ISD::STRICT_FTRUNC: + case ISD::STRICT_FSQRT: case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG2: case ISD::STRICT_FLOG10: case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, {Node->getOperand(0), Node->getOperand(1)}); Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 6bf38d7296a8..f464208cd9dc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -273,6 +273,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { + if (SDValue SelCC = TLI.createSelectForFMINNUM_FMAXNUM(N, DAG)) + return SoftenFloatRes_SELECT_CC(SelCC.getNode()); return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::FMIN_F32, RTLIB::FMIN_F64, @@ -282,6 +284,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { + if (SDValue SelCC = TLI.createSelectForFMINNUM_FMAXNUM(N, DAG)) + return SoftenFloatRes_SELECT_CC(SelCC.getNode()); return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::FMAX_F32, RTLIB::FMAX_F64, @@ -830,6 +834,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; case ISD::STRICT_FP_TO_FP16: case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes + case ISD::FP_TO_BF16: case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::STRICT_FP_TO_SINT: @@ -881,16 +886,19 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 || N->getOpcode() == ISD::STRICT_FP_TO_FP16 || + N->getOpcode() == ISD::FP_TO_BF16 || N->getOpcode() == ISD::STRICT_FP_ROUND); bool IsStrict = N->isStrictFPOpcode(); SDValue Op = N->getOperand(IsStrict ? 1 : 0); EVT SVT = Op.getValueType(); EVT RVT = N->getValueType(0); - EVT FloatRVT = (N->getOpcode() == ISD::FP_TO_FP16 || - N->getOpcode() == ISD::STRICT_FP_TO_FP16) - ? MVT::f16 - : RVT; + EVT FloatRVT = RVT; + if (N->getOpcode() == ISD::FP_TO_FP16 || + N->getOpcode() == ISD::STRICT_FP_TO_FP16) + FloatRVT = MVT::f16; + else if (N->getOpcode() == ISD::FP_TO_BF16) + FloatRVT = MVT::bf16; RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); @@ -2064,9 +2072,13 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) { static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) { if (OpVT == MVT::f16) { - return ISD::FP16_TO_FP; + return ISD::FP16_TO_FP; } else if (RetVT == MVT::f16) { - return ISD::FP_TO_FP16; + return ISD::FP_TO_FP16; + } else if (OpVT == MVT::bf16) { + return ISD::BF16_TO_FP; + } else if (RetVT == MVT::bf16) { + return ISD::FP_TO_BF16; } report_fatal_error("Attempt at an invalid promotion-related conversion"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 8c7b90b6cd33..69fd83bcd7b3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -78,6 +78,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SELECT: case ISD::VSELECT: case ISD::VP_SELECT: + case ISD::VP_MERGE: Res = PromoteIntRes_Select(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; @@ -97,6 +98,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VP_ASHR: Res = PromoteIntRes_SRA(N); break; case ISD::SRL: case ISD::VP_LSHR: Res = PromoteIntRes_SRL(N); break; + case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; @@ -115,11 +117,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::INSERT_VECTOR_ELT: Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break; case ISD::BUILD_VECTOR: - Res = PromoteIntRes_BUILD_VECTOR(N); break; - case ISD::SCALAR_TO_VECTOR: - Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break; + Res = PromoteIntRes_BUILD_VECTOR(N); + break; case ISD::SPLAT_VECTOR: - Res = PromoteIntRes_SPLAT_VECTOR(N); break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntRes_ScalarOp(N); + break; case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = PromoteIntRes_CONCAT_VECTORS(N); break; @@ -133,6 +136,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + case ISD::VP_FPTOSI: + case ISD::VP_FPTOUI: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: @@ -262,6 +267,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FSHR: Res = PromoteIntRes_FunnelShift(N); break; + + case ISD::IS_FPCLASS: + Res = PromoteIntRes_IS_FPCLASS(N); + break; } // If the result is null then the sub-method took care of registering it. @@ -435,10 +444,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { // interesting bits will end up at the wrong place. if (DAG.getDataLayout().isBigEndian()) { unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits(); - EVT ShiftAmtTy = TLI.getShiftAmountTy(NOutVT, DAG.getDataLayout()); assert(ShiftAmt < NOutVT.getSizeInBits() && "Too large shift amount!"); Res = DAG.getNode(ISD::SRL, dl, NOutVT, Res, - DAG.getConstant(ShiftAmt, dl, ShiftAmtTy)); + DAG.getShiftAmountConstant(ShiftAmt, NOutVT, dl)); } return Res; } @@ -446,13 +454,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { // as the widened input type would be a legal type, we can widen the bitcast // and handle the promotion after. if (NOutVT.isVector()) { - unsigned WidenInSize = NInVT.getSizeInBits(); - unsigned OutSize = OutVT.getSizeInBits(); - if (WidenInSize % OutSize == 0) { - unsigned Scale = WidenInSize / OutSize; - EVT WideOutVT = EVT::getVectorVT(*DAG.getContext(), - OutVT.getVectorElementType(), - OutVT.getVectorNumElements() * Scale); + TypeSize WidenInSize = NInVT.getSizeInBits(); + TypeSize OutSize = OutVT.getSizeInBits(); + if (WidenInSize.hasKnownScalarFactor(OutSize)) { + unsigned Scale = WidenInSize.getKnownScalarFactor(OutSize); + EVT WideOutVT = + EVT::getVectorVT(*DAG.getContext(), OutVT.getVectorElementType(), + OutVT.getVectorElementCount() * Scale); if (isTypeLegal(WideOutVT)) { InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp)); InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp, @@ -490,9 +498,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { } unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), - DAG.getConstant(DiffBits, dl, ShiftVT)); + DAG.getShiftAmountConstant(DiffBits, NVT, dl)); } SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { @@ -512,10 +519,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { } unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), - DAG.getConstant(DiffBits, dl, ShiftVT)); + DAG.getShiftAmountConstant(DiffBits, NVT, dl)); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { @@ -666,6 +672,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) NewOpc = ISD::STRICT_FP_TO_SINT; + if (N->getOpcode() == ISD::VP_FPTOUI && + !TLI.isOperationLegal(ISD::VP_FPTOUI, NVT) && + TLI.isOperationLegalOrCustom(ISD::VP_FPTOSI, NVT)) + NewOpc = ISD::VP_FPTOSI; + SDValue Res; if (N->isStrictFPOpcode()) { Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other}, @@ -673,8 +684,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); - } else + } else if (NewOpc == ISD::VP_FPTOSI || NewOpc == ISD::VP_FPTOUI) { + Res = DAG.getNode(NewOpc, dl, NVT, {N->getOperand(0), N->getOperand(1), + N->getOperand(2)}); + } else { Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + } // Assert that the converted value fits in the original type. If it doesn't // (eg: because the value being converted is too big), then the result of the @@ -684,8 +699,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // before legalization: fp-to-uint16, 65534. -> 0xfffe // after legalization: fp-to-sint32, 65534. -> 0x0000fffe return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT || - N->getOpcode() == ISD::STRICT_FP_TO_UINT) ? - ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, + N->getOpcode() == ISD::STRICT_FP_TO_UINT || + N->getOpcode() == ISD::VP_FPTOUI) + ? ISD::AssertZext + : ISD::AssertSext, + dl, NVT, Res, DAG.getValueType(N->getValueType(0).getScalarType())); } @@ -889,8 +907,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { } unsigned SHLAmount = NewBits - OldBits; - EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); - SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); + SDValue ShiftAmount = + DAG.getShiftAmountConstant(SHLAmount, PromotedType, dl); Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); if (!IsShift) @@ -939,14 +957,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { // which is extends the values that we clamp to on saturation. This could be // resolved by shifting one of the operands the same amount, which would // also shift the result we compare against, then shifting back. - EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); - Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, - DAG.getConstant(DiffSize, dl, ShiftTy)); + Op1Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, + DAG.getShiftAmountConstant(DiffSize, PromotedType, dl)); SDValue Result = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted, N->getOperand(2)); unsigned ShiftOp = Signed ? ISD::SRA : ISD::SRL; return DAG.getNode(ShiftOp, dl, PromotedType, Result, - DAG.getConstant(DiffSize, dl, ShiftTy)); + DAG.getShiftAmountConstant(DiffSize, PromotedType, dl)); } return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted, N->getOperand(2)); @@ -1043,17 +1061,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) { TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale); if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) { - EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); unsigned Diff = PromotedType.getScalarSizeInBits() - N->getValueType(0).getScalarSizeInBits(); if (Saturating) - Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, - DAG.getConstant(Diff, dl, ShiftTy)); + Op1Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, + DAG.getShiftAmountConstant(Diff, PromotedType, dl)); SDValue Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted, N->getOperand(2)); if (Saturating) Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, PromotedType, Res, - DAG.getConstant(Diff, dl, ShiftTy)); + DAG.getShiftAmountConstant(Diff, PromotedType, dl)); return Res; } } @@ -1110,11 +1128,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Select(SDNode *N) { SDValue RHS = GetPromotedInteger(N->getOperand(2)); unsigned Opcode = N->getOpcode(); - return Opcode == ISD::VP_SELECT - ? DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS, - N->getOperand(3)) - : DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, - RHS); + if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE) + return DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS, + N->getOperand(3)); + return DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { @@ -1167,6 +1184,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { return DAG.getSExtOrTrunc(SetCC, dl, NVT); } +SDValue DAGTypeLegalizer::PromoteIntRes_IS_FPCLASS(SDNode *N) { + SDLoc DL(N); + SDValue Arg = N->getOperand(0); + SDValue Test = N->getOperand(1); + EVT NResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(ISD::IS_FPCLASS, DL, NResVT, Arg, Test); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); @@ -1265,7 +1290,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) { SDValue Hi = GetPromotedInteger(N->getOperand(0)); SDValue Lo = GetPromotedInteger(N->getOperand(1)); - SDValue Amt = GetPromotedInteger(N->getOperand(2)); + SDValue Amt = N->getOperand(2); + if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger) + Amt = ZExtPromotedInteger(Amt); + EVT AmtVT = Amt.getValueType(); SDLoc DL(N); EVT OldVT = N->getOperand(0).getValueType(); @@ -1276,7 +1304,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) { unsigned NewBits = VT.getScalarSizeInBits(); // Amount has to be interpreted modulo the old bit width. - Amt = DAG.getNode(ISD::UREM, DL, VT, Amt, DAG.getConstant(OldBits, DL, VT)); + Amt = DAG.getNode(ISD::UREM, DL, AmtVT, Amt, + DAG.getConstant(OldBits, DL, AmtVT)); // If the promoted type is twice the size (or more), then we use the // traditional funnel 'double' shift codegen. This isn't necessary if the @@ -1296,13 +1325,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) { } // Shift Lo up to occupy the upper bits of the promoted type. - SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, VT); + SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, AmtVT); Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, ShiftOffset); // Increase Amount to shift the result into the lower bits of the promoted // type. if (IsFSHR) - Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, ShiftOffset); + Amt = DAG.getNode(ISD::ADD, DL, AmtVT, Amt, ShiftOffset); return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt); } @@ -1336,11 +1365,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(), NumElts.divideCoefficientBy(2)); - EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); - EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); - + if (N->getOpcode() == ISD::TRUNCATE) { + EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); + EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); + } else { + assert(N->getOpcode() == ISD::VP_TRUNCATE && + "Expected VP_TRUNCATE opcode"); + SDValue MaskLo, MaskHi, EVLLo, EVLHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); + EOp1 = DAG.getNode(ISD::VP_TRUNCATE, dl, HalfNVT, EOp1, MaskLo, EVLLo); + EOp2 = DAG.getNode(ISD::VP_TRUNCATE, dl, HalfNVT, EOp2, MaskHi, EVLHi); + } return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2); } + // TODO: VP_TRUNCATE need to handle when TypeWidenVector access to some + // targets. case TargetLowering::TypeWidenVector: { SDValue WideInOp = GetWidenedVector(InOp); @@ -1362,6 +1403,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { } // Truncate to NVT instead of VT + if (N->getOpcode() == ISD::VP_TRUNCATE) + return DAG.getNode(ISD::VP_TRUNCATE, dl, NVT, Res, N->getOperand(1), + N->getOperand(2)); return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); } @@ -1432,6 +1476,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N, } SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) { + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + + // If a larger ABS or SMAX isn't supported by the target, try to expand now. + // If we expand later we'll end up sign extending more than just the sra input + // in sra+xor+sub expansion. + if (!OVT.isVector() && + !TLI.isOperationLegalOrCustomOrPromote(ISD::ABS, NVT) && + !TLI.isOperationLegal(ISD::SMAX, NVT)) { + if (SDValue Res = TLI.expandABS(N, DAG)) + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Res); + } + SDValue Op0 = SExtPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0); } @@ -1466,9 +1523,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { if (N->getOpcode() == ISD::UMULO) { // Unsigned overflow occurred if the high part is non-zero. unsigned Shift = SmallVT.getScalarSizeInBits(); - EVT ShiftTy = TLI.getShiftAmountTy(Mul.getValueType(), DAG.getDataLayout()); - SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, - DAG.getConstant(Shift, DL, ShiftTy)); + SDValue Hi = + DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, + DAG.getShiftAmountConstant(Shift, Mul.getValueType(), DL)); Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, DAG.getConstant(0, DL, Hi.getValueType()), ISD::SETNE); @@ -1498,7 +1555,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) { EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); APInt MulImm = cast(N->getOperand(0))->getAPIntValue(); - return DAG.getVScale(SDLoc(N), VT, MulImm.sextOrSelf(VT.getSizeInBits())); + return DAG.getVScale(SDLoc(N), VT, MulImm.sext(VT.getSizeInBits())); } SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { @@ -1578,16 +1635,19 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break; case ISD::INSERT_VECTOR_ELT: - Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; - case ISD::SCALAR_TO_VECTOR: - Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; + Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo); + break; case ISD::SPLAT_VECTOR: - Res = PromoteIntOp_SPLAT_VECTOR(N); break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntOp_ScalarOp(N); + break; case ISD::VSELECT: case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; + case ISD::VP_SETCC: case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break; + case ISD::VP_SITOFP: case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break; case ISD::STORE: Res = PromoteIntOp_STORE(cast(N), @@ -1600,8 +1660,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { OpNo); break; case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast(N), OpNo); break; + case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: + case ISD::VP_UITOFP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; @@ -1614,6 +1676,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::ROTL: case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + case ISD::FSHL: + case ISD::FSHR: Res = PromoteIntOp_FunnelShift(N); break; + case ISD::SADDO_CARRY: case ISD::SSUBO_CARRY: case ISD::ADDCARRY: @@ -1848,20 +1913,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, N->getOperand(1), Idx), 0); } -SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { - // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote - // the operand in place. +SDValue DAGTypeLegalizer::PromoteIntOp_ScalarOp(SDNode *N) { + // Integer SPLAT_VECTOR/SCALAR_TO_VECTOR operands are implicitly truncated, + // so just promote the operand in place. return SDValue(DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0); } -SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) { - // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the - // operand in place. - return SDValue( - DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0); -} - SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { assert(OpNo == 0 && "Only know how to promote the condition!"); SDValue Cond = N->getOperand(0); @@ -1900,7 +1958,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { PromoteSetCCOperands(LHS, RHS, cast(N->getOperand(2))->get()); // The CC (#2) is always legal. - return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0); + if (N->getOpcode() == ISD::SETCC) + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0); + + assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode"); + + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2), + N->getOperand(3), N->getOperand(4)), + 0); } SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { @@ -1908,6 +1973,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { ZExtPromotedInteger(N->getOperand(1))), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_FunnelShift(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), + ZExtPromotedInteger(N->getOperand(2))), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); SDLoc dl(N); @@ -1917,6 +1987,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { + if (N->getOpcode() == ISD::VP_SITOFP) + return SDValue(DAG.UpdateNodeOperands(N, + SExtPromotedInteger(N->getOperand(0)), + N->getOperand(1), N->getOperand(2)), + 0); return SDValue(DAG.UpdateNodeOperands(N, SExtPromotedInteger(N->getOperand(0))), 0); } @@ -1980,8 +2055,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo) { - SmallVector NewOps(N->op_begin(), N->op_end()); + if (OpNo == 2) { // The Mask EVT DataVT = N->getValueType(0); @@ -2010,6 +2085,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo) { bool TruncateStore = N->isTruncatingStore(); SmallVector NewOps(N->op_begin(), N->op_end()); + if (OpNo == 2) { // The Mask EVT DataVT = N->getValue().getValueType(); @@ -2021,9 +2097,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); else NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); - - N->setIndexType(TLI.getCanonicalIndexType(N->getIndexType(), - N->getMemoryVT(), NewOps[OpNo])); } else { NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); TruncateStore = true; @@ -2036,10 +2109,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); + if (N->getOpcode() == ISD::VP_TRUNCATE) + return DAG.getNode(ISD::VP_TRUNCATE, SDLoc(N), N->getValueType(0), Op, + N->getOperand(1), N->getOperand(2)); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); } SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { + if (N->getOpcode() == ISD::VP_UITOFP) + return SDValue(DAG.UpdateNodeOperands(N, + ZExtPromotedInteger(N->getOperand(0)), + N->getOperand(1), N->getOperand(2)), + 0); return SDValue(DAG.UpdateNodeOperands(N, ZExtPromotedInteger(N->getOperand(0))), 0); } @@ -2468,7 +2549,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, EVT ShTy = N->getOperand(1).getValueType(); if (N->getOpcode() == ISD::SHL) { - if (Amt.ugt(VTBits)) { + if (Amt.uge(VTBits)) { Lo = Hi = DAG.getConstant(0, DL, NVT); } else if (Amt.ugt(NVTBits)) { Lo = DAG.getConstant(0, DL, NVT); @@ -2489,7 +2570,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, } if (N->getOpcode() == ISD::SRL) { - if (Amt.ugt(VTBits)) { + if (Amt.uge(VTBits)) { Lo = Hi = DAG.getConstant(0, DL, NVT); } else if (Amt.ugt(NVTBits)) { Lo = DAG.getNode(ISD::SRL, DL, @@ -2510,7 +2591,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, } assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); - if (Amt.ugt(VTBits)) { + if (Amt.uge(VTBits)) { Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(NVTBits - 1, DL, ShTy)); } else if (Amt.ugt(NVTBits)) { @@ -3132,24 +3213,23 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { GetExpandedInteger(N0, Lo, Hi); EVT NVT = Lo.getValueType(); - // If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we - // use in LegalizeDAG. The ADD part of the expansion is based on - // ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that - // ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded + // If we have SUBCARRY, use the expanded form of the sra+xor+sub sequence we + // use in LegalizeDAG. The SUB part of the expansion is based on + // ExpandIntRes_ADDSUB which also uses SUBCARRY/USUBO after checking that + // SUBCARRY is LegalOrCustom. Each of the pieces here can be further expanded // if needed. Shift expansion has a special case for filling with sign bits // so that we will only end up with one SRA. - bool HasAddCarry = TLI.isOperationLegalOrCustom( - ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); - if (HasAddCarry) { - EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); - SDValue Sign = - DAG.getNode(ISD::SRA, dl, NVT, Hi, - DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy)); + bool HasSubCarry = TLI.isOperationLegalOrCustom( + ISD::SUBCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (HasSubCarry) { + SDValue Sign = DAG.getNode( + ISD::SRA, dl, NVT, Hi, + DAG.getShiftAmountConstant(NVT.getSizeInBits() - 1, NVT, dl)); SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT)); - Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign); - Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1)); Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign); Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign); + Lo = DAG.getNode(ISD::USUBO, dl, VTList, Lo, Sign); + Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, Hi, Sign, Lo.getValue(1)); return; } @@ -3160,8 +3240,8 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue NegLo, NegHi; SplitInteger(Neg, NegLo, NegHi); - SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), - DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT); + SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi, + DAG.getConstant(0, dl, NVT), ISD::SETLT); Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo); Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi); } @@ -3223,12 +3303,11 @@ void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NBitWidth = NVT.getSizeInBits(); - EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0)); SDValue Chain = Lo.getValue(1); // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, - DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy)); + DAG.getShiftAmountConstant(NBitWidth - 1, NVT, dl)); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -3535,8 +3614,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL); SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask); - EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); - SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy); + SDValue Shift = DAG.getShiftAmountConstant(HalfBits, NVT, dl); SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift); SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift); SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift); @@ -3667,7 +3745,6 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, unsigned NVTSize = NVT.getScalarSizeInBits(); assert((VTSize == NVTSize * 2) && "Expected the new value type to be half " "the size of the current value type"); - EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); // After getting the multiplication result in 4 parts, we need to perform a // shift right by the amount of the scale to get the result in that scale. @@ -3690,7 +3767,7 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, // shifting. uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed. if (Scale % NVTSize) { - SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy); + SDValue ShiftAmount = DAG.getShiftAmountConstant(Scale % NVTSize, NVT, dl); Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0], ShiftAmount); Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1], @@ -3731,8 +3808,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, if (!Signed) { if (Scale < NVTSize) { // Overflow happened if ((HH | (HL >> Scale)) != 0). - SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, - DAG.getConstant(Scale, dl, ShiftTy)); + SDValue HLAdjusted = + DAG.getNode(ISD::SRL, dl, NVT, ResultHL, + DAG.getShiftAmountConstant(Scale, NVT, dl)); SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH); SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE); } else if (Scale == NVTSize) { @@ -3740,9 +3818,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE); } else if (Scale < VTSize) { // Overflow happened if ((HH >> (Scale - NVTSize)) != 0). - SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, - DAG.getConstant(Scale - NVTSize, dl, - ShiftTy)); + SDValue HLAdjusted = + DAG.getNode(ISD::SRL, dl, NVT, ResultHL, + DAG.getShiftAmountConstant(Scale - NVTSize, NVT, dl)); SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE); } else llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT" @@ -3901,6 +3979,70 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, ReplaceValueWith(SDValue(Node, 1), Ovf); } +// Emit a call to __udivei4 and friends which require +// the arguments be based on the stack +// and extra argument that contains the number of bits of the operands. +// Returns the result of the call operation. +static SDValue ExpandExtIntRes_DIVREM(const TargetLowering &TLI, + const RTLIB::Libcall &LC, + SelectionDAG &DAG, SDNode *N, + const SDLoc &DL, const EVT &VT) { + + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + // The signature of __udivei4 is + // void __udivei4(unsigned int *quo, unsigned int *a, unsigned int *b, + // unsigned int bits) + EVT ArgVT = N->op_begin()->getValueType(); + assert(ArgVT.isInteger() && ArgVT.getSizeInBits() > 128 && + "Unexpected argument type for lowering"); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + + SDValue Output = DAG.CreateStackTemporary(ArgVT); + Entry.Node = Output; + Entry.Ty = ArgTy->getPointerTo(); + Entry.IsSExt = false; + Entry.IsZExt = false; + Args.push_back(Entry); + + for (const llvm::SDUse &Op : N->ops()) { + SDValue StackPtr = DAG.CreateStackTemporary(ArgVT); + InChain = DAG.getStore(InChain, DL, Op, StackPtr, MachinePointerInfo()); + Entry.Node = StackPtr; + Entry.Ty = ArgTy->getPointerTo(); + Entry.IsSExt = false; + Entry.IsZExt = false; + Args.push_back(Entry); + } + + int Bits = N->getOperand(0) + .getValueType() + .getTypeForEVT(*DAG.getContext()) + ->getIntegerBitWidth(); + Entry.Node = DAG.getConstant(Bits, DL, TLI.getPointerTy(DAG.getDataLayout())); + Entry.Ty = Type::getInt32Ty(*DAG.getContext()); + Entry.IsSExt = false; + Entry.IsZExt = true; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy(DAG.getDataLayout())); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL) + .setChain(InChain) + .setLibCallee(TLI.getLibcallCallingConv(LC), + Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args)) + .setDiscardResult(); + + SDValue Chain = TLI.LowerCallTo(CLI).second; + + return DAG.getLoad(ArgVT, DL, Chain, Output, MachinePointerInfo()); +} + void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -3922,6 +4064,14 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, LC = RTLIB::SDIV_I64; else if (VT == MVT::i128) LC = RTLIB::SDIV_I128; + + else { + SDValue Result = + ExpandExtIntRes_DIVREM(TLI, RTLIB::SDIV_IEXT, DAG, N, dl, VT); + SplitInteger(Result, Lo, Hi); + return; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4113,6 +4263,14 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, LC = RTLIB::SREM_I64; else if (VT == MVT::i128) LC = RTLIB::SREM_I128; + + else { + SDValue Result = + ExpandExtIntRes_DIVREM(TLI, RTLIB::SREM_IEXT, DAG, N, dl, VT); + SplitInteger(Result, Lo, Hi); + return; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4288,6 +4446,14 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, LC = RTLIB::UDIV_I64; else if (VT == MVT::i128) LC = RTLIB::UDIV_I128; + + else { + SDValue Result = + ExpandExtIntRes_DIVREM(TLI, RTLIB::UDIV_IEXT, DAG, N, dl, VT); + SplitInteger(Result, Lo, Hi); + return; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4315,6 +4481,14 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, LC = RTLIB::UREM_I64; else if (VT == MVT::i128) LC = RTLIB::UREM_I128; + + else { + SDValue Result = + ExpandExtIntRes_DIVREM(TLI, RTLIB::UREM_IEXT, DAG, N, dl, VT); + SplitInteger(Result, Lo, Hi); + return; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -5060,7 +5234,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { return DAG.getBuildVector(NOutVT, dl, Ops); } -SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_ScalarOp(SDNode *N) { SDLoc dl(N); @@ -5070,35 +5244,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); - EVT NOutVTElem = NOutVT.getVectorElementType(); - - SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0)); - - return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op); -} - -SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) { - SDLoc dl(N); - - SDValue SplatVal = N->getOperand(0); - - assert(!SplatVal.getValueType().isVector() && "Input must be a scalar"); - - EVT OutVT = N->getValueType(0); - EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); - assert(NOutVT.isVector() && "Type must be promoted to a vector type"); EVT NOutElemVT = NOutVT.getVectorElementType(); - SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal); + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, N->getOperand(0)); - return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op); + return DAG.getNode(N->getOpcode(), dl, NOutVT, Op); } SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) { SDLoc dl(N); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); - assert(NOutVT.isVector() && "Type must be promoted to a vector type"); + assert(NOutVT.isScalableVector() && + "Type must be promoted to a scalable vector type"); APInt StepVal = cast(N->getOperand(0))->getAPIntValue(); return DAG.getStepVector(dl, NOutVT, StepVal.sext(NOutVT.getScalarSizeInBits())); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 03dcd0f6d2c9..8fe9a83b9c3d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -13,10 +13,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "SDNodeDbgValue.h" #include "llvm/ADT/SetVector.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -86,46 +83,49 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { auto ResId = ValueToIdMap.lookup(Res); unsigned Mapped = 0; - if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) { - Mapped |= 1; - // Check that remapped values are only used by nodes marked NewNode. - for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); - UI != UE; ++UI) - if (UI.getUse().getResNo() == i) - assert(UI->getNodeId() == NewNode && - "Remapped value has non-trivial use!"); - - // Check that the final result of applying ReplacedValues is not - // marked NewNode. - auto NewValId = ReplacedValues[ResId]; - auto I = ReplacedValues.find(NewValId); - while (I != ReplacedValues.end()) { - NewValId = I->second; + if (ResId) { + auto I = ReplacedValues.find(ResId); + if (I != ReplacedValues.end()) { + Mapped |= 1; + // Check that remapped values are only used by nodes marked NewNode. + for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); + UI != UE; ++UI) + if (UI.getUse().getResNo() == i) + assert(UI->getNodeId() == NewNode && + "Remapped value has non-trivial use!"); + + // Check that the final result of applying ReplacedValues is not + // marked NewNode. + auto NewValId = I->second; I = ReplacedValues.find(NewValId); + while (I != ReplacedValues.end()) { + NewValId = I->second; + I = ReplacedValues.find(NewValId); + } + SDValue NewVal = getSDValue(NewValId); + (void)NewVal; + assert(NewVal.getNode()->getNodeId() != NewNode && + "ReplacedValues maps to a new node!"); } - SDValue NewVal = getSDValue(NewValId); - (void)NewVal; - assert(NewVal.getNode()->getNodeId() != NewNode && - "ReplacedValues maps to a new node!"); + if (PromotedIntegers.count(ResId)) + Mapped |= 2; + if (SoftenedFloats.count(ResId)) + Mapped |= 4; + if (ScalarizedVectors.count(ResId)) + Mapped |= 8; + if (ExpandedIntegers.count(ResId)) + Mapped |= 16; + if (ExpandedFloats.count(ResId)) + Mapped |= 32; + if (SplitVectors.count(ResId)) + Mapped |= 64; + if (WidenedVectors.count(ResId)) + Mapped |= 128; + if (PromotedFloats.count(ResId)) + Mapped |= 256; + if (SoftPromotedHalfs.count(ResId)) + Mapped |= 512; } - if (ResId && PromotedIntegers.find(ResId) != PromotedIntegers.end()) - Mapped |= 2; - if (ResId && SoftenedFloats.find(ResId) != SoftenedFloats.end()) - Mapped |= 4; - if (ResId && ScalarizedVectors.find(ResId) != ScalarizedVectors.end()) - Mapped |= 8; - if (ResId && ExpandedIntegers.find(ResId) != ExpandedIntegers.end()) - Mapped |= 16; - if (ResId && ExpandedFloats.find(ResId) != ExpandedFloats.end()) - Mapped |= 32; - if (ResId && SplitVectors.find(ResId) != SplitVectors.end()) - Mapped |= 64; - if (ResId && WidenedVectors.find(ResId) != WidenedVectors.end()) - Mapped |= 128; - if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end()) - Mapped |= 256; - if (ResId && SoftPromotedHalfs.find(ResId) != SoftPromotedHalfs.end()) - Mapped |= 512; if (Node.getNodeId() != Processed) { // Since we allow ReplacedValues to map deleted nodes, it may map nodes @@ -143,8 +143,16 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { } } else { if (Mapped == 0) { - dbgs() << "Processed value not in any map!"; - Failed = true; + SDValue NodeById = IdToValueMap.lookup(ResId); + // It is possible the node has been remapped to another node and had + // its Id updated in the Value to Id table. The node it remapped to + // may not have been processed yet. Look up the Id in the Id to Value + // table and re-check the Processed state. If the node hasn't been + // remapped we'll get the same state as we got earlier. + if (NodeById->getNodeId() == Processed) { + dbgs() << "Processed value not in any map!"; + Failed = true; + } } else if (Mapped & (Mapped - 1)) { dbgs() << "Value in multiple maps!"; Failed = true; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 4d8daa82d8c0..de320290bda9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -19,7 +19,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" namespace llvm { @@ -309,8 +308,7 @@ private: SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); SDValue PromoteIntRes_VECTOR_SPLICE(SDNode *N); SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); - SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); - SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N); + SDValue PromoteIntRes_ScalarOp(SDNode *N); SDValue PromoteIntRes_STEP_VECTOR(SDNode *N); SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N); SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); @@ -362,6 +360,7 @@ private: SDValue PromoteIntRes_ABS(SDNode *N); SDValue PromoteIntRes_Rotate(SDNode *N); SDValue PromoteIntRes_FunnelShift(SDNode *N); + SDValue PromoteIntRes_IS_FPCLASS(SDNode *N); // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); @@ -377,12 +376,12 @@ private: SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); - SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); - SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N); + SDValue PromoteIntOp_ScalarOp(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_Shift(SDNode *N); + SDValue PromoteIntOp_FunnelShift(SDNode *N); SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); SDValue PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N); @@ -784,6 +783,7 @@ private: SDValue ScalarizeVecRes_UNDEF(SDNode *N); SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); SDValue ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N); + SDValue ScalarizeVecRes_IS_FPCLASS(SDNode *N); SDValue ScalarizeVecRes_FIX(SDNode *N); @@ -850,6 +850,7 @@ private: void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); @@ -960,6 +961,7 @@ private: SDValue WidenVecRes_Convert_StrictFP(SDNode *N); SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N); SDValue WidenVecRes_FCOPYSIGN(SDNode *N); + SDValue WidenVecRes_IS_FPCLASS(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); @@ -985,6 +987,7 @@ private: SDValue WidenVecOp_Convert(SDNode *N); SDValue WidenVecOp_FP_TO_XINT_SAT(SDNode *N); SDValue WidenVecOp_FCOPYSIGN(SDNode *N); + SDValue WidenVecOp_IS_FPCLASS(SDNode *N); SDValue WidenVecOp_VECREDUCE(SDNode *N); SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N); SDValue WidenVecOp_VP_REDUCE(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index abf6a3ac6916..842ffa2aa23e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -26,11 +26,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" @@ -41,7 +39,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" -#include "llvm/Support/MathExtras.h" #include #include #include @@ -464,6 +461,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::VPID: { \ EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \ : Node->getOperand(LEGALPOS).getValueType(); \ + if (ISD::VPID == ISD::VP_SETCC) { \ + ISD::CondCode CCCode = cast(Node->getOperand(2))->get(); \ + Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \ + if (Action != TargetLowering::Legal) \ + break; \ + } \ Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \ } break; #include "llvm/IR/VPIntrinsics.def" @@ -747,6 +750,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { ExpandFSUB(Node, Results); return; case ISD::SETCC: + case ISD::VP_SETCC: ExpandSETCC(Node, Results); return; case ISD::ABS: @@ -1050,10 +1054,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) { // Shuffle the incoming lanes into the correct position, and pull all other // lanes from the zero vector. - SmallVector ShuffleMask; - ShuffleMask.reserve(NumSrcElements); - for (int i = 0; i < NumSrcElements; ++i) - ShuffleMask.push_back(i); + auto ShuffleMask = llvm::to_vector<16>(llvm::seq(0, NumSrcElements)); int ExtLaneScale = NumSrcElements / NumElements; int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; @@ -1423,6 +1424,7 @@ void VectorLegalizer::ExpandFSUB(SDNode *Node, void VectorLegalizer::ExpandSETCC(SDNode *Node, SmallVectorImpl &Results) { bool NeedInvert = false; + bool IsVP = Node->getOpcode() == ISD::VP_SETCC; SDLoc dl(Node); MVT OpVT = Node->getOperand(0).getSimpleValueType(); ISD::CondCode CCCode = cast(Node->getOperand(2))->get(); @@ -1436,20 +1438,36 @@ void VectorLegalizer::ExpandSETCC(SDNode *Node, SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue CC = Node->getOperand(2); - bool Legalized = TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, - RHS, CC, NeedInvert, dl, Chain); + SDValue Mask, EVL; + if (IsVP) { + Mask = Node->getOperand(3); + EVL = Node->getOperand(4); + } + + bool Legalized = + TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask, + EVL, NeedInvert, dl, Chain); if (Legalized) { // If we expanded the SETCC by swapping LHS and RHS, or by inverting the // condition code, create a new SETCC node. - if (CC.getNode()) - LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC, - Node->getFlags()); + if (CC.getNode()) { + if (!IsVP) + LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC, + Node->getFlags()); + else + LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0), + {LHS, RHS, CC, Mask, EVL}, Node->getFlags()); + } // If we expanded the SETCC by inverting the condition code, then wrap // the existing SETCC in a NOT to restore the intended condition. - if (NeedInvert) - LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0)); + if (NeedInvert) { + if (!IsVP) + LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0)); + else + LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0)); + } } else { // Otherwise, SETCC for the given comparison type must be completely // illegal; expand it into a SELECT_CC. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 0bd44ce4c872..fa555be00ded 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -20,7 +20,9 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TypeSize.h" @@ -64,6 +66,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; + case ISD::IS_FPCLASS: R = ScalarizeVecRes_IS_FPCLASS(N); break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: @@ -231,9 +234,16 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { // Now process the remaining operands. for (unsigned i = 1; i < NumOpers; ++i) { SDValue Oper = N->getOperand(i); + EVT OperVT = Oper.getValueType(); - if (Oper.getValueType().isVector()) - Oper = GetScalarizedVector(Oper); + if (OperVT.isVector()) { + if (getTypeAction(OperVT) == TargetLowering::TypeScalarizeVector) + Oper = GetScalarizedVector(Oper); + else + Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + OperVT.getVectorElementType(), Oper, + DAG.getVectorIdxConstant(0, dl)); + } Opers[i] = Oper; } @@ -582,6 +592,29 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { return DAG.getNode(ExtendCode, DL, NVT, Res); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_IS_FPCLASS(SDNode *N) { + SDLoc DL(N); + SDValue Arg = N->getOperand(0); + SDValue Test = N->getOperand(1); + EVT ArgVT = Arg.getValueType(); + EVT ResultVT = N->getValueType(0).getVectorElementType(); + + if (getTypeAction(ArgVT) == TargetLowering::TypeScalarizeVector) { + Arg = GetScalarizedVector(Arg); + } else { + EVT VT = ArgVT.getVectorElementType(); + Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Arg, + DAG.getVectorIdxConstant(0, DL)); + } + + SDValue Res = + DAG.getNode(ISD::IS_FPCLASS, DL, MVT::i1, {Arg, Test}, N->getFlags()); + // Vectors may have a different boolean contents to scalars. Promote the + // value appropriately. + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(ArgVT)); + return DAG.getNode(ExtendCode, DL, ResultVT, Res); +} //===----------------------------------------------------------------------===// // Operand Vector Scalarization <1 x ty> -> ty. @@ -926,6 +959,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; + case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: @@ -949,6 +983,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_Gather(cast(N), Lo, Hi, /*SplitSETCC*/ true); break; case ISD::SETCC: + case ISD::VP_SETCC: SplitVecRes_SETCC(N, Lo, Hi); break; case ISD::VECTOR_REVERSE: @@ -988,13 +1023,17 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG10: case ISD::FLOG2: case ISD::FNEARBYINT: - case ISD::FNEG: + case ISD::FNEG: case ISD::VP_FNEG: case ISD::FREEZE: case ISD::ARITH_FENCE: case ISD::FP_EXTEND: + case ISD::VP_FP_EXTEND: case ISD::FP_ROUND: + case ISD::VP_FP_ROUND: case ISD::FP_TO_SINT: + case ISD::VP_FPTOSI: case ISD::FP_TO_UINT: + case ISD::VP_FPTOUI: case ISD::FRINT: case ISD::FROUND: case ISD::FROUNDEVEN: @@ -1002,8 +1041,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FSQRT: case ISD::FTRUNC: case ISD::SINT_TO_FP: + case ISD::VP_SITOFP: case ISD::TRUNCATE: + case ISD::VP_TRUNCATE: case ISD::UINT_TO_FP: + case ISD::VP_UITOFP: case ISD::FCANONICALIZE: SplitVecRes_UnaryOp(N, Lo, Hi); break; @@ -1011,6 +1053,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: + case ISD::VP_SIGN_EXTEND: + case ISD::VP_ZERO_EXTEND: SplitVecRes_ExtendOp(N, Lo, Hi); break; @@ -1053,7 +1097,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::ROTR: SplitVecRes_BinOp(N, Lo, Hi); break; - case ISD::FMA: + case ISD::FMA: case ISD::VP_FMA: case ISD::FSHL: case ISD::FSHR: SplitVecRes_TernaryOp(N, Lo, Hi); @@ -1175,10 +1219,28 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); SDLoc dl(N); - Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, - Op2Lo, N->getFlags()); - Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), Op0Hi, Op1Hi, - Op2Hi, N->getFlags()); + const SDNodeFlags Flags = N->getFlags(); + unsigned Opcode = N->getOpcode(); + if (N->getNumOperands() == 3) { + Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, Op2Lo, Flags); + Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(), Op0Hi, Op1Hi, Op2Hi, Flags); + return; + } + + assert(N->getNumOperands() == 5 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3)); + + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(4), N->getValueType(0), dl); + + Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(), + {Op0Lo, Op1Lo, Op2Lo, MaskLo, EVLLo}, Flags); + Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(), + {Op0Hi, Op1Hi, Op2Hi, MaskHi, EVLHi}, Flags); } void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) { @@ -1398,6 +1460,19 @@ void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi); } +void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc DL(N); + SDValue ArgLo, ArgHi; + SDValue Test = N->getOperand(1); + GetSplitVector(N->getOperand(0), ArgLo, ArgHi); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + Lo = DAG.getNode(ISD::IS_FPCLASS, DL, LoVT, ArgLo, Test, N->getFlags()); + Hi = DAG.getNode(ISD::IS_FPCLASS, DL, HiVT, ArgHi, Test, N->getFlags()); +} + void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; @@ -2043,8 +2118,20 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { else std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); + if (N->getOpcode() == ISD::SETCC) { + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); + } else { + assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode"); + SDValue MaskLo, MaskHi, EVLLo, EVLHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3)); + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL); + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2), MaskLo, + EVLLo); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2), MaskHi, + EVLHi); + } } void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, @@ -2056,22 +2143,37 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, // If the input also splits, handle it directly for a compile time speedup. // Otherwise split it by hand. - unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; - EVT InVT = N->getOperand(OpNo).getValueType(); + EVT InVT = N->getOperand(0).getValueType(); if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) - GetSplitVector(N->getOperand(OpNo), Lo, Hi); + GetSplitVector(N->getOperand(0), Lo, Hi); else - std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo); + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); - if (N->getOpcode() == ISD::FP_ROUND) { - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1), - N->getFlags()); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1), - N->getFlags()); - } else { - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getFlags()); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getFlags()); + const SDNodeFlags Flags = N->getFlags(); + unsigned Opcode = N->getOpcode(); + if (N->getNumOperands() <= 2) { + if (Opcode == ISD::FP_ROUND) { + Lo = DAG.getNode(Opcode, dl, LoVT, Lo, N->getOperand(1), Flags); + Hi = DAG.getNode(Opcode, dl, HiVT, Hi, N->getOperand(1), Flags); + } else { + Lo = DAG.getNode(Opcode, dl, LoVT, Lo, Flags); + Hi = DAG.getNode(Opcode, dl, HiVT, Hi, Flags); + } + return; } + + assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); + + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); + + Lo = DAG.getNode(Opcode, dl, LoVT, {Lo, MaskLo, EVLLo}, Flags); + Hi = DAG.getNode(Opcode, dl, HiVT, {Hi, MaskHi, EVLHi}, Flags); } void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, @@ -2107,14 +2209,34 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:"; N->dump(&DAG); dbgs() << "\n"); + if (!N->isVPOpcode()) { + // Extend the source vector by one step. + SDValue NewSrc = + DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); + // Get the low and high halves of the new, extended one step, vector. + std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + // Extend those vector halves the rest of the way. + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + return; + } + // Extend the source vector by one step. SDValue NewSrc = - DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); + DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); // Get the low and high halves of the new, extended one step, vector. std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); + + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); // Extend those vector halves the rest of the way. - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, {Lo, MaskLo, EVLLo}); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, {Hi, MaskHi, EVLHi}); return; } } @@ -2126,108 +2248,352 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi) { // The low and high parts of the original input give four input vectors. SDValue Inputs[4]; - SDLoc dl(N); + SDLoc DL(N); GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); EVT NewVT = Inputs[0].getValueType(); unsigned NewElts = NewVT.getVectorNumElements(); + auto &&IsConstant = [](const SDValue &N) { + APInt SplatValue; + return N.getResNo() == 0 && + (ISD::isConstantSplatVector(N.getNode(), SplatValue) || + ISD::isBuildVectorOfConstantSDNodes(N.getNode())); + }; + auto &&BuildVector = [NewElts, &DAG = DAG, NewVT, &DL](SDValue &Input1, + SDValue &Input2, + ArrayRef Mask) { + assert(Input1->getOpcode() == ISD::BUILD_VECTOR && + Input2->getOpcode() == ISD::BUILD_VECTOR && + "Expected build vector node."); + EVT EltVT = NewVT.getVectorElementType(); + SmallVector Ops(NewElts, DAG.getUNDEF(EltVT)); + for (unsigned I = 0; I < NewElts; ++I) { + if (Mask[I] == UndefMaskElem) + continue; + unsigned Idx = Mask[I]; + if (Idx >= NewElts) + Ops[I] = Input2.getOperand(Idx - NewElts); + else + Ops[I] = Input1.getOperand(Idx); + // Make the type of all elements the same as the element type. + if (Ops[I].getValueType().bitsGT(EltVT)) + Ops[I] = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Ops[I]); + } + return DAG.getBuildVector(NewVT, DL, Ops); + }; + // If Lo or Hi uses elements from at most two of the four input vectors, then // express it as a vector shuffle of those two inputs. Otherwise extract the // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. - SmallVector Ops; - for (unsigned High = 0; High < 2; ++High) { - SDValue &Output = High ? Hi : Lo; - - // Build a shuffle mask for the output, discovering on the fly which - // input vectors to use as shuffle operands (recorded in InputUsed). - // If building a suitable shuffle vector proves too hard, then bail - // out with useBuildVector set. - unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered. - unsigned FirstMaskIdx = High * NewElts; - bool useBuildVector = false; - for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { - // The mask element. This indexes into the input. - int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); - - // The input vector this mask element indexes into. - unsigned Input = (unsigned)Idx / NewElts; - - if (Input >= array_lengthof(Inputs)) { - // The mask element does not index into any input vector. - Ops.push_back(-1); + SmallVector OrigMask(N->getMask().begin(), N->getMask().end()); + // Try to pack incoming shuffles/inputs. + auto &&TryPeekThroughShufflesInputs = [&Inputs, &NewVT, this, NewElts, + &DL](SmallVectorImpl &Mask) { + // Check if all inputs are shuffles of the same operands or non-shuffles. + MapVector, SmallVector> ShufflesIdxs; + for (unsigned Idx = 0; Idx < array_lengthof(Inputs); ++Idx) { + SDValue Input = Inputs[Idx]; + auto *Shuffle = dyn_cast(Input.getNode()); + if (!Shuffle || + Input.getOperand(0).getValueType() != Input.getValueType()) + continue; + ShufflesIdxs[std::make_pair(Input.getOperand(0), Input.getOperand(1))] + .push_back(Idx); + ShufflesIdxs[std::make_pair(Input.getOperand(1), Input.getOperand(0))] + .push_back(Idx); + } + for (auto &P : ShufflesIdxs) { + if (P.second.size() < 2) continue; + // Use shuffles operands instead of shuffles themselves. + // 1. Adjust mask. + for (int &Idx : Mask) { + if (Idx == UndefMaskElem) + continue; + unsigned SrcRegIdx = Idx / NewElts; + if (Inputs[SrcRegIdx].isUndef()) { + Idx = UndefMaskElem; + continue; + } + auto *Shuffle = + dyn_cast(Inputs[SrcRegIdx].getNode()); + if (!Shuffle || !is_contained(P.second, SrcRegIdx)) + continue; + int MaskElt = Shuffle->getMaskElt(Idx % NewElts); + if (MaskElt == UndefMaskElem) { + Idx = UndefMaskElem; + continue; + } + Idx = MaskElt % NewElts + + P.second[Shuffle->getOperand(MaskElt / NewElts) == P.first.first + ? 0 + : 1] * + NewElts; } - - // Turn the index into an offset from the start of the input vector. - Idx -= Input * NewElts; - - // Find or create a shuffle vector operand to hold this input. - unsigned OpNo; - for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { - if (InputUsed[OpNo] == Input) { - // This input vector is already an operand. - break; - } else if (InputUsed[OpNo] == -1U) { - // Create a new operand for this input vector. - InputUsed[OpNo] = Input; - break; + // 2. Update inputs. + Inputs[P.second[0]] = P.first.first; + Inputs[P.second[1]] = P.first.second; + // Clear the pair data. + P.second.clear(); + ShufflesIdxs[std::make_pair(P.first.second, P.first.first)].clear(); + } + // Check if any concat_vectors can be simplified. + SmallBitVector UsedSubVector(2 * array_lengthof(Inputs)); + for (int &Idx : Mask) { + if (Idx == UndefMaskElem) + continue; + unsigned SrcRegIdx = Idx / NewElts; + if (Inputs[SrcRegIdx].isUndef()) { + Idx = UndefMaskElem; + continue; + } + TargetLowering::LegalizeTypeAction TypeAction = + getTypeAction(Inputs[SrcRegIdx].getValueType()); + if (Inputs[SrcRegIdx].getOpcode() == ISD::CONCAT_VECTORS && + Inputs[SrcRegIdx].getNumOperands() == 2 && + !Inputs[SrcRegIdx].getOperand(1).isUndef() && + (TypeAction == TargetLowering::TypeLegal || + TypeAction == TargetLowering::TypeWidenVector)) + UsedSubVector.set(2 * SrcRegIdx + (Idx % NewElts) / (NewElts / 2)); + } + if (UsedSubVector.count() > 1) { + SmallVector, 2>> Pairs; + for (unsigned I = 0; I < array_lengthof(Inputs); ++I) { + if (UsedSubVector.test(2 * I) == UsedSubVector.test(2 * I + 1)) + continue; + if (Pairs.empty() || Pairs.back().size() == 2) + Pairs.emplace_back(); + if (UsedSubVector.test(2 * I)) { + Pairs.back().emplace_back(I, 0); + } else { + assert(UsedSubVector.test(2 * I + 1) && + "Expected to be used one of the subvectors."); + Pairs.back().emplace_back(I, 1); } } - - if (OpNo >= array_lengthof(InputUsed)) { - // More than two input vectors used! Give up on trying to create a - // shuffle vector. Insert all elements into a BUILD_VECTOR instead. - useBuildVector = true; - break; + if (!Pairs.empty() && Pairs.front().size() > 1) { + // Adjust mask. + for (int &Idx : Mask) { + if (Idx == UndefMaskElem) + continue; + unsigned SrcRegIdx = Idx / NewElts; + auto *It = find_if( + Pairs, [SrcRegIdx](ArrayRef> Idxs) { + return Idxs.front().first == SrcRegIdx || + Idxs.back().first == SrcRegIdx; + }); + if (It == Pairs.end()) + continue; + Idx = It->front().first * NewElts + (Idx % NewElts) % (NewElts / 2) + + (SrcRegIdx == It->front().first ? 0 : (NewElts / 2)); + } + // Adjust inputs. + for (ArrayRef> Idxs : Pairs) { + Inputs[Idxs.front().first] = DAG.getNode( + ISD::CONCAT_VECTORS, DL, + Inputs[Idxs.front().first].getValueType(), + Inputs[Idxs.front().first].getOperand(Idxs.front().second), + Inputs[Idxs.back().first].getOperand(Idxs.back().second)); + } } - - // Add the mask index for the new shuffle vector. - Ops.push_back(Idx + OpNo * NewElts); } - - if (useBuildVector) { - EVT EltVT = NewVT.getVectorElementType(); - SmallVector SVOps; - - // Extract the input elements by hand. - for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { - // The mask element. This indexes into the input. - int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); - - // The input vector this mask element indexes into. - unsigned Input = (unsigned)Idx / NewElts; - - if (Input >= array_lengthof(Inputs)) { - // The mask element is "undef" or indexes off the end of the input. - SVOps.push_back(DAG.getUNDEF(EltVT)); + bool Changed; + do { + // Try to remove extra shuffles (except broadcasts) and shuffles with the + // reused operands. + Changed = false; + for (unsigned I = 0; I < array_lengthof(Inputs); ++I) { + auto *Shuffle = dyn_cast(Inputs[I].getNode()); + if (!Shuffle) continue; + if (Shuffle->getOperand(0).getValueType() != NewVT) + continue; + int Op = -1; + if (!Inputs[I].hasOneUse() && Shuffle->getOperand(1).isUndef() && + !Shuffle->isSplat()) { + Op = 0; + } else if (!Inputs[I].hasOneUse() && + !Shuffle->getOperand(1).isUndef()) { + // Find the only used operand, if possible. + for (int &Idx : Mask) { + if (Idx == UndefMaskElem) + continue; + unsigned SrcRegIdx = Idx / NewElts; + if (SrcRegIdx != I) + continue; + int MaskElt = Shuffle->getMaskElt(Idx % NewElts); + if (MaskElt == UndefMaskElem) { + Idx = UndefMaskElem; + continue; + } + int OpIdx = MaskElt / NewElts; + if (Op == -1) { + Op = OpIdx; + continue; + } + if (Op != OpIdx) { + Op = -1; + break; + } + } + } + if (Op < 0) { + // Try to check if one of the shuffle operands is used already. + for (int OpIdx = 0; OpIdx < 2; ++OpIdx) { + if (Shuffle->getOperand(OpIdx).isUndef()) + continue; + auto *It = find(Inputs, Shuffle->getOperand(OpIdx)); + if (It == std::end(Inputs)) + continue; + int FoundOp = std::distance(std::begin(Inputs), It); + // Found that operand is used already. + // 1. Fix the mask for the reused operand. + for (int &Idx : Mask) { + if (Idx == UndefMaskElem) + continue; + unsigned SrcRegIdx = Idx / NewElts; + if (SrcRegIdx != I) + continue; + int MaskElt = Shuffle->getMaskElt(Idx % NewElts); + if (MaskElt == UndefMaskElem) { + Idx = UndefMaskElem; + continue; + } + int MaskIdx = MaskElt / NewElts; + if (OpIdx == MaskIdx) + Idx = MaskElt % NewElts + FoundOp * NewElts; + } + // 2. Set Op to the unused OpIdx. + Op = (OpIdx + 1) % 2; + break; + } + } + if (Op >= 0) { + Changed = true; + Inputs[I] = Shuffle->getOperand(Op); + // Adjust mask. + for (int &Idx : Mask) { + if (Idx == UndefMaskElem) + continue; + unsigned SrcRegIdx = Idx / NewElts; + if (SrcRegIdx != I) + continue; + int MaskElt = Shuffle->getMaskElt(Idx % NewElts); + int OpIdx = MaskElt / NewElts; + if (OpIdx != Op) + continue; + Idx = MaskElt % NewElts + SrcRegIdx * NewElts; + } } - - // Turn the index into an offset from the start of the input vector. - Idx -= Input * NewElts; - - // Extract the vector element by hand. - SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Inputs[Input], - DAG.getVectorIdxConstant(Idx, dl))); } - - // Construct the Lo/Hi output using a BUILD_VECTOR. - Output = DAG.getBuildVector(NewVT, dl, SVOps); - } else if (InputUsed[0] == -1U) { - // No input vectors were used! The result is undefined. - Output = DAG.getUNDEF(NewVT); - } else { - SDValue Op0 = Inputs[InputUsed[0]]; - // If only one input was used, use an undefined vector for the other. - SDValue Op1 = InputUsed[1] == -1U ? - DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; - // At least one input vector was used. Create a new shuffle vector. - Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, Ops); + } while (Changed); + }; + TryPeekThroughShufflesInputs(OrigMask); + // Proces unique inputs. + auto &&MakeUniqueInputs = [&Inputs, &IsConstant, + NewElts](SmallVectorImpl &Mask) { + SetVector UniqueInputs; + SetVector UniqueConstantInputs; + for (unsigned I = 0; I < array_lengthof(Inputs); ++I) { + if (IsConstant(Inputs[I])) + UniqueConstantInputs.insert(Inputs[I]); + else if (!Inputs[I].isUndef()) + UniqueInputs.insert(Inputs[I]); + } + // Adjust mask in case of reused inputs. Also, need to insert constant + // inputs at first, otherwise it affects the final outcome. + if (UniqueInputs.size() != array_lengthof(Inputs)) { + auto &&UniqueVec = UniqueInputs.takeVector(); + auto &&UniqueConstantVec = UniqueConstantInputs.takeVector(); + unsigned ConstNum = UniqueConstantVec.size(); + for (int &Idx : Mask) { + if (Idx == UndefMaskElem) + continue; + unsigned SrcRegIdx = Idx / NewElts; + if (Inputs[SrcRegIdx].isUndef()) { + Idx = UndefMaskElem; + continue; + } + const auto It = find(UniqueConstantVec, Inputs[SrcRegIdx]); + if (It != UniqueConstantVec.end()) { + Idx = (Idx % NewElts) + + NewElts * std::distance(UniqueConstantVec.begin(), It); + assert(Idx >= 0 && "Expected defined mask idx."); + continue; + } + const auto RegIt = find(UniqueVec, Inputs[SrcRegIdx]); + assert(RegIt != UniqueVec.end() && "Cannot find non-const value."); + Idx = (Idx % NewElts) + + NewElts * (std::distance(UniqueVec.begin(), RegIt) + ConstNum); + assert(Idx >= 0 && "Expected defined mask idx."); + } + copy(UniqueConstantVec, std::begin(Inputs)); + copy(UniqueVec, std::next(std::begin(Inputs), ConstNum)); } + }; + MakeUniqueInputs(OrigMask); + SDValue OrigInputs[4]; + copy(Inputs, std::begin(OrigInputs)); + for (unsigned High = 0; High < 2; ++High) { + SDValue &Output = High ? Hi : Lo; - Ops.clear(); + // Build a shuffle mask for the output, discovering on the fly which + // input vectors to use as shuffle operands. + unsigned FirstMaskIdx = High * NewElts; + SmallVector Mask(NewElts * array_lengthof(Inputs), UndefMaskElem); + copy(makeArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin()); + assert(!Output && "Expected default initialized initial value."); + TryPeekThroughShufflesInputs(Mask); + MakeUniqueInputs(Mask); + SDValue TmpInputs[4]; + copy(Inputs, std::begin(TmpInputs)); + // Track changes in the output registers. + int UsedIdx = -1; + bool SecondIteration = false; + auto &&AccumulateResults = [&UsedIdx, &SecondIteration](unsigned Idx) { + if (UsedIdx < 0) { + UsedIdx = Idx; + return false; + } + if (UsedIdx >= 0 && static_cast(UsedIdx) == Idx) + SecondIteration = true; + return SecondIteration; + }; + processShuffleMasks( + Mask, array_lengthof(Inputs), array_lengthof(Inputs), + /*NumOfUsedRegs=*/1, + [&Output, &DAG = DAG, NewVT]() { Output = DAG.getUNDEF(NewVT); }, + [&Output, &DAG = DAG, NewVT, &DL, &Inputs, + &BuildVector](ArrayRef Mask, unsigned Idx, unsigned /*Unused*/) { + if (Inputs[Idx]->getOpcode() == ISD::BUILD_VECTOR) + Output = BuildVector(Inputs[Idx], Inputs[Idx], Mask); + else + Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx], + DAG.getUNDEF(NewVT), Mask); + Inputs[Idx] = Output; + }, + [&AccumulateResults, &Output, &DAG = DAG, NewVT, &DL, &Inputs, + &TmpInputs, + &BuildVector](ArrayRef Mask, unsigned Idx1, unsigned Idx2) { + if (AccumulateResults(Idx1)) { + if (Inputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR && + Inputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR) + Output = BuildVector(Inputs[Idx1], Inputs[Idx2], Mask); + else + Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx1], + Inputs[Idx2], Mask); + } else { + if (TmpInputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR && + TmpInputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR) + Output = BuildVector(TmpInputs[Idx1], TmpInputs[Idx2], Mask); + else + Output = DAG.getVectorShuffle(NewVT, DL, TmpInputs[Idx1], + TmpInputs[Idx2], Mask); + } + Inputs[Idx1] = Output; + }); + copy(OrigInputs, std::begin(Inputs)); } } @@ -2268,6 +2634,32 @@ void DAGTypeLegalizer::SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, Hi = DAG.getNode(N->getOpcode(), dl, DstVTHi, SrcHi, N->getOperand(1)); } +void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue InLo, InHi; + GetSplitVector(N->getOperand(0), InLo, InHi); + SDLoc DL(N); + + Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi); + Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo); +} + +void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc DL(N); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + + SDValue Expanded = TLI.expandVectorSplice(N, DAG); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Expanded, + DAG.getVectorIdxConstant(0, DL)); + Hi = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Expanded, + DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); +} + //===----------------------------------------------------------------------===// // Operand Vector Splitting //===----------------------------------------------------------------------===// @@ -2294,16 +2686,19 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { report_fatal_error("Do not know how to split this operator's " "operand!\n"); + case ISD::VP_SETCC: case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = SplitVecOp_TruncateHelper(N); break; case ISD::STRICT_FP_ROUND: + case ISD::VP_FP_ROUND: case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break; case ISD::STORE: @@ -2543,6 +2938,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Ch); + } else if (N->getNumOperands() == 3) { + assert(N->isVPOpcode() && "Expected VP opcode"); + SDValue MaskLo, MaskHi, EVLLo, EVLHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); + Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo, MaskLo, EVLLo); + Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi, MaskHi, EVLHi); } else { Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); @@ -3128,8 +3531,20 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt); EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2); - LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); - HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); + if (N->getOpcode() == ISD::SETCC) { + LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); + HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); + } else { + assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode"); + SDValue MaskLo, MaskHi, EVLLo, EVLHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3)); + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL); + LoRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Lo0, Lo1, + N->getOperand(2), MaskLo, EVLLo); + HiRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Hi0, Hi1, + N->getOperand(2), MaskHi, EVLHi); + } SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); EVT OpVT = N->getOperand(0).getValueType(); @@ -3160,6 +3575,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), Hi.getValue(1)); ReplaceValueWith(SDValue(N, 1), NewChain); + } else if (N->getOpcode() == ISD::VP_FP_ROUND) { + SDValue MaskLo, MaskHi, EVLLo, EVLHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(2), N->getValueType(0), DL); + Lo = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Lo, MaskLo, EVLLo); + Hi = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Hi, MaskHi, EVLHi); } else { Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); @@ -3204,6 +3626,22 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { return; SDValue Res = SDValue(); + + auto unrollExpandedOp = [&]() { + // We're going to widen this vector op to a legal type by padding with undef + // elements. If the wide vector op is eventually going to be expanded to + // scalar libcalls, then unroll into scalar ops now to avoid unnecessary + // libcalls on the undef elements. + EVT VT = N->getValueType(0); + EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) && + TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { + Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); + return true; + } + return false; + }; + switch (N->getOpcode()) { default: #ifndef NDEBUG @@ -3223,6 +3661,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; + case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_ScalarOp(N); @@ -3235,6 +3674,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Select(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; + case ISD::VP_SETCC: case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: @@ -3280,6 +3720,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::USHLSAT: case ISD::ROTL: case ISD::ROTR: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: + case ISD::AVGCEILS: + case ISD::AVGCEILU: // Vector-predicated binary op widening. Note that -- unlike the // unpredicated versions -- we don't have to worry about trapping on // operations like UDIV, FADD, etc., as we pass on the original vector @@ -3297,12 +3741,19 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Binary(N); break; + case ISD::FPOW: + case ISD::FREM: + if (unrollExpandedOp()) + break; + // If the target has custom/legal support for the scalar FP intrinsic ops + // (they are probably not destined to become libcalls), then widen those + // like any other binary ops. + LLVM_FALLTHROUGH; + case ISD::FADD: case ISD::FMUL: - case ISD::FPOW: case ISD::FSUB: case ISD::FDIV: - case ISD::FREM: case ISD::SDIV: case ISD::UDIV: case ISD::SREM: @@ -3338,6 +3789,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_FCOPYSIGN(N); break; + case ISD::IS_FPCLASS: + Res = WidenVecRes_IS_FPCLASS(N); + break; + case ISD::FPOWI: Res = WidenVecRes_POWI(N); break; @@ -3350,14 +3805,23 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::ANY_EXTEND: case ISD::FP_EXTEND: + case ISD::VP_FP_EXTEND: case ISD::FP_ROUND: + case ISD::VP_FP_ROUND: case ISD::FP_TO_SINT: + case ISD::VP_FPTOSI: case ISD::FP_TO_UINT: + case ISD::VP_FPTOUI: case ISD::SIGN_EXTEND: + case ISD::VP_SIGN_EXTEND: case ISD::SINT_TO_FP: + case ISD::VP_SITOFP: + case ISD::VP_TRUNCATE: case ISD::TRUNCATE: case ISD::UINT_TO_FP: + case ISD::VP_UITOFP: case ISD::ZERO_EXTEND: + case ISD::VP_ZERO_EXTEND: Res = WidenVecRes_Convert(N); break; @@ -3381,23 +3845,13 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: - case ISD::FTRUNC: { - // We're going to widen this vector op to a legal type by padding with undef - // elements. If the wide vector op is eventually going to be expanded to - // scalar libcalls, then unroll into scalar ops now to avoid unnecessary - // libcalls on the undef elements. - EVT VT = N->getValueType(0); - EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) && - TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { - Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); + case ISD::FTRUNC: + if (unrollExpandedOp()) break; - } - } - // If the target has custom/legal support for the scalar FP intrinsic ops - // (they are probably not destined to become libcalls), then widen those like - // any other unary ops. - LLVM_FALLTHROUGH; + // If the target has custom/legal support for the scalar FP intrinsic ops + // (they are probably not destined to become libcalls), then widen those + // like any other unary ops. + LLVM_FALLTHROUGH; case ISD::ABS: case ISD::BITREVERSE: @@ -3407,13 +3861,13 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTPOP: case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - case ISD::FNEG: + case ISD::FNEG: case ISD::VP_FNEG: case ISD::FREEZE: case ISD::ARITH_FENCE: case ISD::FCANONICALIZE: Res = WidenVecRes_Unary(N); break; - case ISD::FMA: + case ISD::FMA: case ISD::VP_FMA: case ISD::FSHL: case ISD::FSHR: Res = WidenVecRes_Ternary(N); @@ -3432,7 +3886,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); SDValue InOp3 = GetWidenedVector(N->getOperand(2)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); + if (N->getNumOperands() == 3) + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); + + assert(N->getNumOperands() == 5 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); + + SDValue Mask = + GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount()); + return DAG.getNode(N->getOpcode(), dl, WidenVT, + {InOp1, InOp2, InOp3, Mask, N->getOperand(4)}); } SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { @@ -3552,7 +4015,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; - unsigned NumElts = VT.getVectorNumElements(); + unsigned NumElts = VT.getVectorMinNumElements(); const SDNodeFlags Flags = N->getFlags(); while (!TLI.isTypeLegal(VT) && NumElts != 1) { NumElts = NumElts / 2; @@ -3566,6 +4029,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags); } + // FIXME: Improve support for scalable vectors. + assert(!VT.isScalableVector() && "Scalable vectors not handled yet."); + // No legal vector version so unroll the vector operation and then widen. if (NumElts == 1) return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); @@ -3826,6 +4292,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { if (InVTEC == WidenEC) { if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InOp); + if (N->getNumOperands() == 3) { + assert(N->isVPOpcode() && "Expected VP opcode"); + SDValue Mask = + GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount()); + return DAG.getNode(Opcode, DL, WidenVT, InOp, Mask, N->getOperand(2)); + } return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); } if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) { @@ -4007,6 +4479,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); } +SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Arg = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, {Arg, N->getOperand(1)}, + N->getFlags()); +} + SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); @@ -4018,7 +4497,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { // Unary op widening. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); + if (N->getNumOperands() == 1) + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); + + assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); + + SDValue Mask = + GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount()); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, + {InOp, Mask, N->getOperand(2)}); } SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { @@ -4243,11 +4731,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { - EVT VT = N->getValueType(0); - EVT EltVT = VT.getVectorElementType(); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - SDValue InOp = N->getOperand(0); - SDValue Idx = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue InOp = N->getOperand(0); + SDValue Idx = N->getOperand(1); SDLoc dl(N); auto InOpTypeAction = getTypeAction(InOp.getValueType()); @@ -4264,6 +4752,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { // Check if we can extract from the vector. unsigned WidenNumElts = WidenVT.getVectorMinNumElements(); unsigned InNumElts = InVT.getVectorMinNumElements(); + unsigned VTNumElts = VT.getVectorMinNumElements(); + assert(IdxVal % VTNumElts == 0 && + "Expected Idx to be a multiple of subvector minimum vector length"); if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); @@ -4277,8 +4768,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { // nxv2i64 extract_subvector(nxv16i64, 8) // nxv2i64 extract_subvector(nxv16i64, 10) // undef) - unsigned VTNElts = VT.getVectorMinNumElements(); - unsigned GCD = greatestCommonDivisor(VTNElts, WidenNumElts); + unsigned GCD = greatestCommonDivisor(VTNumElts, WidenNumElts); assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken " "down type's element count"); EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT, @@ -4287,7 +4777,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) { SmallVector Parts; unsigned I = 0; - for (; I < VTNElts / GCD; ++I) + for (; I < VTNumElts / GCD; ++I) Parts.push_back( DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp, DAG.getVectorIdxConstant(IdxVal + I * GCD, dl))); @@ -4304,9 +4794,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { // We could try widening the input to the right length but for now, extract // the original elements, fill the rest with undefs and build a vector. SmallVector Ops(WidenNumElts); - unsigned NumElts = VT.getVectorNumElements(); unsigned i; - for (i = 0; i < NumElts; ++i) + for (i = 0; i < VTNumElts; ++i) Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getVectorIdxConstant(IdxVal + i, dl)); @@ -4783,10 +5272,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_Select(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - return Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE - ? DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2, - N->getOperand(3)) - : DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2); + if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE) + return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2, + N->getOperand(3)); + return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2); } SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { @@ -4832,13 +5321,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { N->getOperand(0).getValueType().isVector() && "Operands must be vectors"); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); + ElementCount WidenEC = WidenVT.getVectorElementCount(); SDValue InOp1 = N->getOperand(0); EVT InVT = InOp1.getValueType(); assert(InVT.isVector() && "can not widen non-vector type"); - EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), WidenNumElts); + EVT WidenInVT = + EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenEC); // The input and output types often differ here, and it could be that while // we'd prefer to widen the result type, the input operands have been split. @@ -4865,8 +5354,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { InOp2.getValueType() == WidenInVT && "Input not widened to expected type!"); (void)WidenInVT; - return DAG.getNode(ISD::SETCC, SDLoc(N), - WidenVT, InOp1, InOp2, N->getOperand(2)); + if (N->getOpcode() == ISD::VP_SETCC) { + SDValue Mask = + GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount()); + return DAG.getNode(ISD::VP_SETCC, SDLoc(N), WidenVT, InOp1, InOp2, + N->getOperand(2), Mask, N->getOperand(4)); + } + return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2, + N->getOperand(2)); } SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) { @@ -4946,6 +5441,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break; case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break; case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break; + case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break; case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: @@ -5098,6 +5594,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) { return DAG.UnrollVectorOp(N); } +SDValue DAGTypeLegalizer::WidenVecOp_IS_FPCLASS(SDNode *N) { + SDLoc DL(N); + EVT ResultVT = N->getValueType(0); + SDValue Test = N->getOperand(1); + SDValue WideArg = GetWidenedVector(N->getOperand(0)); + + // Process this node similarly to SETCC. + EVT WideResultVT = getSetCCResultType(WideArg.getValueType()); + if (ResultVT.getScalarType() == MVT::i1) + WideResultVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + WideResultVT.getVectorNumElements()); + + SDValue WideNode = DAG.getNode(ISD::IS_FPCLASS, DL, WideResultVT, + {WideArg, Test}, N->getFlags()); + + // Extract the needed results from the result vector. + EVT ResVT = + EVT::getVectorVT(*DAG.getContext(), WideResultVT.getVectorElementType(), + ResultVT.getVectorNumElements()); + SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, WideNode, + DAG.getVectorIdxConstant(0, DL)); + + EVT OpVT = N->getOperand(0).getValueType(); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, DL, ResultVT, CC); +} + SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Since the result is legal and the input is illegal. EVT VT = N->getValueType(0); @@ -5192,11 +5716,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { SDLoc dl(N); // Check if we can convert between two legal vector types and extract. - unsigned InWidenSize = InWidenVT.getSizeInBits(); - unsigned Size = VT.getSizeInBits(); + TypeSize InWidenSize = InWidenVT.getSizeInBits(); + TypeSize Size = VT.getSizeInBits(); // x86mmx is not an acceptable vector element type, so don't try. - if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) { - unsigned NewNumElts = InWidenSize / Size; + if (!VT.isVector() && VT != MVT::x86mmx && + InWidenSize.hasKnownScalarFactor(Size)) { + unsigned NewNumElts = InWidenSize.getKnownScalarFactor(Size); EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); @@ -5211,9 +5736,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { // having to copy via memory. if (VT.isVector()) { EVT EltVT = VT.getVectorElementType(); - unsigned EltSize = EltVT.getSizeInBits(); - if (InWidenSize % EltSize == 0) { - unsigned NewNumElts = InWidenSize / EltSize; + unsigned EltSize = EltVT.getFixedSizeInBits(); + if (InWidenSize.isKnownMultipleOf(EltSize)) { + ElementCount NewNumElts = + (InWidenVT.getVectorElementCount() * InWidenVT.getScalarSizeInBits()) + .divideCoefficientBy(EltSize); EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts); if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); @@ -5266,18 +5793,17 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); SDValue SubVec = N->getOperand(1); SDValue InVec = N->getOperand(0); - if (getTypeAction(InVec.getValueType()) == TargetLowering::TypeWidenVector) - InVec = GetWidenedVector(InVec); - if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) SubVec = GetWidenedVector(SubVec); - if (SubVec.getValueType() == InVec.getValueType() && InVec.isUndef() && + if (SubVec.getValueType().knownBitsLE(VT) && InVec.isUndef() && N->getConstantOperandVal(2) == 0) - return SubVec; + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec, + N->getOperand(2)); report_fatal_error("Don't know how to widen the operands for " "INSERT_SUBVECTOR"); @@ -5500,11 +6026,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_SCATTER(SDNode *N, unsigned OpNo) { Mask = GetWidenedMask(Mask, WideEC); WideMemVT = EVT::getVectorVT(*DAG.getContext(), VPSC->getMemoryVT().getScalarType(), WideEC); - } else if (OpNo == 4) { + } else if (OpNo == 3) { // Just widen the index. It's allowed to have extra elements. Index = GetWidenedVector(Index); } else - llvm_unreachable("Can't widen this operand of mscatter"); + llvm_unreachable("Can't widen this operand of VP_SCATTER"); SDValue Ops[] = { VPSC->getChain(), DataOp, VPSC->getBasePtr(), Index, Scale, Mask, @@ -5597,8 +6123,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { assert(NeutralElem && "Neutral element must exist"); // Pad the vector with the neutral element. - unsigned OrigElts = OrigVT.getVectorNumElements(); - unsigned WideElts = WideVT.getVectorNumElements(); + unsigned OrigElts = OrigVT.getVectorMinNumElements(); + unsigned WideElts = WideVT.getVectorMinNumElements(); + + if (WideVT.isScalableVector()) { + unsigned GCD = greatestCommonDivisor(OrigElts, WideElts); + EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, + ElementCount::getScalable(GCD)); + SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem); + for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD) + Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral, + DAG.getVectorIdxConstant(Idx, dl)); + return DAG.getNode(Opc, dl, N->getValueType(0), Op, Flags); + } + for (unsigned Idx = OrigElts; Idx < WideElts; Idx++) Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem, DAG.getVectorIdxConstant(Idx, dl)); @@ -5622,8 +6160,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) { SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags); // Pad the vector with the neutral element. - unsigned OrigElts = OrigVT.getVectorNumElements(); - unsigned WideElts = WideVT.getVectorNumElements(); + unsigned OrigElts = OrigVT.getVectorMinNumElements(); + unsigned WideElts = WideVT.getVectorMinNumElements(); + + if (WideVT.isScalableVector()) { + unsigned GCD = greatestCommonDivisor(OrigElts, WideElts); + EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, + ElementCount::getScalable(GCD)); + SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem); + for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD) + Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral, + DAG.getVectorIdxConstant(Idx, dl)); + return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags); + } + for (unsigned Idx = OrigElts; Idx < WideElts; Idx++) Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem, DAG.getVectorIdxConstant(Idx, dl)); @@ -5795,7 +6345,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, // Allow wider loads if they are sufficiently aligned to avoid memory faults // and if the original load is simple. unsigned LdAlign = - (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlignment(); + (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value(); // Find the vector type that can load from. Optional FirstVT = @@ -6103,7 +6653,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, EVT InVT = InOp.getValueType(); assert(InVT.getVectorElementType() == NVT.getVectorElementType() && "input and widen element type must match"); - assert(!InVT.isScalableVector() && !NVT.isScalableVector() && + assert(InVT.isScalableVector() == NVT.isScalableVector() && "cannot modify scalable vectors in this way"); SDLoc dl(InOp); @@ -6111,10 +6661,10 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, if (InVT == NVT) return InOp; - unsigned InNumElts = InVT.getVectorNumElements(); - unsigned WidenNumElts = NVT.getVectorNumElements(); - if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { - unsigned NumConcat = WidenNumElts / InNumElts; + ElementCount InEC = InVT.getVectorElementCount(); + ElementCount WidenEC = NVT.getVectorElementCount(); + if (WidenEC.hasKnownScalarFactor(InEC)) { + unsigned NumConcat = WidenEC.getKnownScalarFactor(InEC); SmallVector Ops(NumConcat); SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) : DAG.getUNDEF(InVT); @@ -6125,10 +6675,16 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops); } - if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) + if (InEC.hasKnownScalarFactor(WidenEC)) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, DAG.getVectorIdxConstant(0, dl)); + assert(!InVT.isScalableVector() && !NVT.isScalableVector() && + "Scalable vectors should have been handled already."); + + unsigned InNumElts = InEC.getFixedValue(); + unsigned WidenNumElts = WidenEC.getFixedValue(); + // Fall back to extract and build. SmallVector Ops(WidenNumElts); EVT EltVT = NVT.getVectorElementType(); @@ -6144,29 +6700,3 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, Ops[Idx] = FillVal; return DAG.getBuildVector(NVT, dl, Ops); } - -void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDValue InLo, InHi; - GetSplitVector(N->getOperand(0), InLo, InHi); - SDLoc DL(N); - - Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi); - Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo); -} - -void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, - SDValue &Hi) { - EVT VT = N->getValueType(0); - SDLoc DL(N); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); - - SDValue Expanded = TLI.expandVectorSplice(N, DAG); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Expanded, - DAG.getVectorIdxConstant(0, DL)); - Hi = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Expanded, - DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); -} diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 3d5c4c5b1cae..e0e8d503ca92 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/DFAPacketizer.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -28,21 +27,18 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "scheduler" -static cl::opt DisableDFASched("disable-dfa-sched", cl::Hidden, - cl::ZeroOrMore, cl::init(false), - cl::desc("Disable use of DFA during scheduling")); +static cl::opt + DisableDFASched("disable-dfa-sched", cl::Hidden, + cl::desc("Disable use of DFA during scheduling")); static cl::opt RegPressureThreshold( - "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), - cl::desc("Track reg pressure and switch priority to in-depth")); + "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::init(5), + cl::desc("Track reg pressure and switch priority to in-depth")); ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index f64b332a7fef..9fcf692babdc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -14,6 +14,7 @@ #define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H #include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/DataTypes.h" #include diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 1b89864116cb..78fc407e9573 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -11,16 +11,14 @@ //===----------------------------------------------------------------------===// #include "InstrEmitter.h" -#include "ScheduleDAGSDNodes.h" #include "SDNodeDbgValue.h" -#include "llvm/ADT/STLExtras.h" +#include "ScheduleDAGSDNodes.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -442,17 +440,29 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, /// CheckForLiveRegDef - Return true and update live register vector if the /// specified register def of the specified SUnit clobbers any "live" registers. static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, - std::vector &LiveRegDefs, + std::vector &LiveRegDefs, SmallSet &RegAdded, SmallVectorImpl &LRegs, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + const SDNode *Node = nullptr) { bool Added = false; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { - if (RegAdded.insert(*AI).second) { - LRegs.push_back(*AI); - Added = true; - } + // Check if Ref is live. + if (!LiveRegDefs[*AI]) + continue; + + // Allow multiple uses of the same def. + if (LiveRegDefs[*AI] == SU) + continue; + + // Allow multiple uses of same def + if (Node && LiveRegDefs[*AI]->getNode() == Node) + continue; + + // Add Reg to the set of interfering live regs. + if (RegAdded.insert(*AI).second) { + LRegs.push_back(*AI); + Added = true; } } return Added; @@ -504,6 +514,15 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, } continue; } + + if (Node->getOpcode() == ISD::CopyToReg) { + Register Reg = cast(Node->getOperand(1))->getReg(); + if (Reg.isPhysical()) { + SDNode *SrcNode = Node->getOperand(2).getNode(); + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI, SrcNode); + } + } + if (!Node->isMachineOpcode()) continue; const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); @@ -758,7 +777,8 @@ void ScheduleDAGLinearize::Schedule() { MachineBasicBlock* ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { - InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); + InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos, + DAG->getUseInstrRefDebugInfo()); DenseMap VRBaseMap; LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 7a5e8ac6075e..8a04ce7535a1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1294,11 +1294,11 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, /// CheckForLiveRegDef - Return true and update live register vector if the /// specified register def of the specified SUnit clobbers any "live" registers. -static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, - SUnit **LiveRegDefs, +static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet &RegAdded, SmallVectorImpl &LRegs, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + const SDNode *Node = nullptr) { for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { // Check if Ref is live. @@ -1307,6 +1307,10 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, // Allow multiple uses of the same def. if (LiveRegDefs[*AliasI] == SU) continue; + // Allow multiple uses of same def + if (Node && LiveRegDefs[*AliasI]->getNode() == Node) + continue; + // Add Reg to the set of interfering live regs. if (RegAdded.insert(*AliasI).second) { LRegs.push_back(*AliasI); @@ -1387,6 +1391,15 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl &LRegs) { continue; } + if (Node->getOpcode() == ISD::CopyToReg) { + Register Reg = cast(Node->getOperand(1))->getReg(); + if (Reg.isPhysical()) { + SDNode *SrcNode = Node->getOperand(2).getNode(); + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI, + SrcNode); + } + } + if (!Node->isMachineOpcode()) continue; // If we're in the middle of scheduling a call, don't begin scheduling diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 55f6f288f3e3..2a10157b404e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -843,7 +843,8 @@ EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, /// not necessarily refer to returned BB. The emitter may split blocks. MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { - InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); + InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos, + DAG->getUseInstrRefDebugInfo()); DenseMap VRBaseMap; DenseMap CopyVRBaseMap; SmallVector, 32> Orders; @@ -883,7 +884,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { if (MI->isCandidateForCallSiteEntry() && DAG->getTarget().Options.EmitCallSiteInfo) - MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node)); + MF.addCallArgsForwardingRegs(MI, DAG->getCallSiteInfo(Node)); if (DAG->getNoMergeSiteInfo(Node)) { MI->setFlag(MachineInstr::MIFlag::NoMerge); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 10940478010e..1ba1fd65b8c9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -19,19 +19,15 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; #define DEBUG_TYPE "pre-RA-sched" diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d5998d166d25..b3b8756ae9ba 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -24,9 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -55,7 +53,6 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compiler.h" @@ -144,11 +141,11 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); if (auto *Op0 = dyn_cast(N->getOperand(0))) { - SplatVal = Op0->getAPIntValue().truncOrSelf(EltSize); + SplatVal = Op0->getAPIntValue().trunc(EltSize); return true; } if (auto *Op0 = dyn_cast(N->getOperand(0))) { - SplatVal = Op0->getValueAPF().bitcastToAPInt().truncOrSelf(EltSize); + SplatVal = Op0->getValueAPF().bitcastToAPInt().trunc(EltSize); return true; } } @@ -714,6 +711,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(LD->getMemoryVT().getRawBits()); ID.AddInteger(LD->getRawSubclassData()); ID.AddInteger(LD->getPointerInfo().getAddrSpace()); + ID.AddInteger(LD->getMemOperand()->getFlags()); break; } case ISD::STORE: { @@ -721,6 +719,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); ID.AddInteger(ST->getPointerInfo().getAddrSpace()); + ID.AddInteger(ST->getMemOperand()->getFlags()); break; } case ISD::VP_LOAD: { @@ -728,6 +727,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ELD->getMemoryVT().getRawBits()); ID.AddInteger(ELD->getRawSubclassData()); ID.AddInteger(ELD->getPointerInfo().getAddrSpace()); + ID.AddInteger(ELD->getMemOperand()->getFlags()); break; } case ISD::VP_STORE: { @@ -735,6 +735,21 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(EST->getMemoryVT().getRawBits()); ID.AddInteger(EST->getRawSubclassData()); ID.AddInteger(EST->getPointerInfo().getAddrSpace()); + ID.AddInteger(EST->getMemOperand()->getFlags()); + break; + } + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: { + const VPStridedLoadSDNode *SLD = cast(N); + ID.AddInteger(SLD->getMemoryVT().getRawBits()); + ID.AddInteger(SLD->getRawSubclassData()); + ID.AddInteger(SLD->getPointerInfo().getAddrSpace()); + break; + } + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: { + const VPStridedStoreSDNode *SST = cast(N); + ID.AddInteger(SST->getMemoryVT().getRawBits()); + ID.AddInteger(SST->getRawSubclassData()); + ID.AddInteger(SST->getPointerInfo().getAddrSpace()); break; } case ISD::VP_GATHER: { @@ -742,6 +757,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(EG->getMemoryVT().getRawBits()); ID.AddInteger(EG->getRawSubclassData()); ID.AddInteger(EG->getPointerInfo().getAddrSpace()); + ID.AddInteger(EG->getMemOperand()->getFlags()); break; } case ISD::VP_SCATTER: { @@ -749,6 +765,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ES->getMemoryVT().getRawBits()); ID.AddInteger(ES->getRawSubclassData()); ID.AddInteger(ES->getPointerInfo().getAddrSpace()); + ID.AddInteger(ES->getMemOperand()->getFlags()); break; } case ISD::MLOAD: { @@ -756,6 +773,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(MLD->getMemoryVT().getRawBits()); ID.AddInteger(MLD->getRawSubclassData()); ID.AddInteger(MLD->getPointerInfo().getAddrSpace()); + ID.AddInteger(MLD->getMemOperand()->getFlags()); break; } case ISD::MSTORE: { @@ -763,6 +781,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(MST->getMemoryVT().getRawBits()); ID.AddInteger(MST->getRawSubclassData()); ID.AddInteger(MST->getPointerInfo().getAddrSpace()); + ID.AddInteger(MST->getMemOperand()->getFlags()); break; } case ISD::MGATHER: { @@ -770,6 +789,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(MG->getMemoryVT().getRawBits()); ID.AddInteger(MG->getRawSubclassData()); ID.AddInteger(MG->getPointerInfo().getAddrSpace()); + ID.AddInteger(MG->getMemOperand()->getFlags()); break; } case ISD::MSCATTER: { @@ -777,6 +797,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(MS->getMemoryVT().getRawBits()); ID.AddInteger(MS->getRawSubclassData()); ID.AddInteger(MS->getPointerInfo().getAddrSpace()); + ID.AddInteger(MS->getMemOperand()->getFlags()); break; } case ISD::ATOMIC_CMP_SWAP: @@ -799,11 +820,13 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(AT->getMemoryVT().getRawBits()); ID.AddInteger(AT->getRawSubclassData()); ID.AddInteger(AT->getPointerInfo().getAddrSpace()); + ID.AddInteger(AT->getMemOperand()->getFlags()); break; } case ISD::PREFETCH: { const MemSDNode *PF = cast(N); ID.AddInteger(PF->getPointerInfo().getAddrSpace()); + ID.AddInteger(PF->getMemOperand()->getFlags()); break; } case ISD::VECTOR_SHUFFLE: { @@ -821,11 +844,18 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(BA->getTargetFlags()); break; } + case ISD::AssertAlign: + ID.AddInteger(cast(N)->getAlign().value()); + break; } // end switch (N->getOpcode()) - // Target specific memory nodes could also have address spaces to check. - if (N->isTargetMemoryOpcode()) - ID.AddInteger(cast(N)->getPointerInfo().getAddrSpace()); + // Target specific memory nodes could also have address spaces and flags + // to check. + if (N->isTargetMemoryOpcode()) { + const MemSDNode *MN = cast(N); + ID.AddInteger(MN->getPointerInfo().getAddrSpace()); + ID.AddInteger(MN->getMemOperand()->getFlags()); + } } /// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID @@ -1395,6 +1425,12 @@ SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { return getNode(ISD::XOR, DL, VT, Val, TrueValue); } +SDValue SelectionDAG::getVPLogicalNOT(const SDLoc &DL, SDValue Val, + SDValue Mask, SDValue EVL, EVT VT) { + SDValue TrueValue = getBoolConstant(true, DL, VT, VT); + return getNode(ISD::VP_XOR, DL, VT, Val, TrueValue, Mask, EVL); +} + SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT) { if (!V) @@ -2433,23 +2469,9 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) { if (VT.isScalableVector()) return SDValue(); - APInt DemandedElts = VT.isVector() - ? APInt::getAllOnes(VT.getVectorNumElements()) - : APInt(1, 1); - return GetDemandedBits(V, DemandedBits, DemandedElts); -} - -/// See if the specified operand can be simplified with the knowledge that only -/// the bits specified by DemandedBits are used in the elements specified by -/// DemandedElts. -/// TODO: really we should be making this into the DAG equivalent of -/// SimplifyMultipleUseDemandedBits and not generate any new nodes. -SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, - const APInt &DemandedElts) { switch (V.getOpcode()) { default: - return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, - *this); + return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, *this); case ISD::Constant: { const APInt &CVal = cast(V)->getAPIntValue(); APInt NewVal = CVal & DemandedBits; @@ -2469,8 +2491,8 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, if (Amt >= DemandedBits.getBitWidth()) break; APInt SrcDemandedBits = DemandedBits << Amt; - if (SDValue SimplifyLHS = - GetDemandedBits(V.getOperand(0), SrcDemandedBits)) + if (SDValue SimplifyLHS = TLI->SimplifyMultipleUseDemandedBits( + V.getOperand(0), SrcDemandedBits, *this)) return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } @@ -2503,6 +2525,14 @@ bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero); } +/// MaskedVectorIsZero - Return true if 'Op' is known to be zero in +/// DemandedElts. We use this predicate to simplify operations downstream. +bool SelectionDAG::MaskedVectorIsZero(SDValue V, const APInt &DemandedElts, + unsigned Depth /* = 0 */) const { + APInt Mask = APInt::getAllOnes(V.getScalarValueSizeInBits()); + return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero); +} + /// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'. bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, unsigned Depth) const { @@ -2587,9 +2617,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, return true; } case ISD::VECTOR_SHUFFLE: { - // Check if this is a shuffle node doing a splat. - // TODO: Do we need to handle shuffle(splat, undef, mask)? - int SplatIndex = -1; + // Check if this is a shuffle node doing a splat or a shuffle of a splat. + APInt DemandedLHS = APInt::getNullValue(NumElts); + APInt DemandedRHS = APInt::getNullValue(NumElts); ArrayRef Mask = cast(V)->getMask(); for (int i = 0; i != (int)NumElts; ++i) { int M = Mask[i]; @@ -2599,11 +2629,30 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, } if (!DemandedElts[i]) continue; - if (0 <= SplatIndex && SplatIndex != M) - return false; - SplatIndex = M; + if (M < (int)NumElts) + DemandedLHS.setBit(M); + else + DemandedRHS.setBit(M - NumElts); } - return true; + + // If we aren't demanding either op, assume there's no splat. + // If we are demanding both ops, assume there's no splat. + if ((DemandedLHS.isZero() && DemandedRHS.isZero()) || + (!DemandedLHS.isZero() && !DemandedRHS.isZero())) + return false; + + // See if the demanded elts of the source op is a splat or we only demand + // one element, which should always be a splat. + // TODO: Handle source ops splats with undefs. + auto CheckSplatSrc = [&](SDValue Src, const APInt &SrcElts) { + APInt SrcUndefs; + return (SrcElts.countPopulation() == 1) || + (isSplatValue(Src, SrcElts, SrcUndefs, Depth + 1) && + (SrcElts & SrcUndefs).isZero()); + }; + if (!DemandedLHS.isZero()) + return CheckSplatSrc(V.getOperand(0), DemandedLHS); + return CheckSplatSrc(V.getOperand(1), DemandedRHS); } case ISD::EXTRACT_SUBVECTOR: { // Offset the demanded elts by the subvector index. @@ -2614,7 +2663,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, uint64_t Idx = V.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); APInt UndefSrcElts; - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) { UndefElts = UndefSrcElts.extractBits(NumElts, Idx); return true; @@ -2631,9 +2680,49 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, return false; unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); APInt UndefSrcElts; - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts); if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) { - UndefElts = UndefSrcElts.truncOrSelf(NumElts); + UndefElts = UndefSrcElts.trunc(NumElts); + return true; + } + break; + } + case ISD::BITCAST: { + SDValue Src = V.getOperand(0); + EVT SrcVT = Src.getValueType(); + unsigned SrcBitWidth = SrcVT.getScalarSizeInBits(); + unsigned BitWidth = VT.getScalarSizeInBits(); + + // Ignore bitcasts from unsupported types. + // TODO: Add fp support? + if (!SrcVT.isVector() || !SrcVT.isInteger() || !VT.isInteger()) + break; + + // Bitcast 'small element' vector to 'large element' vector. + if ((BitWidth % SrcBitWidth) == 0) { + // See if each sub element is a splat. + unsigned Scale = BitWidth / SrcBitWidth; + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + APInt ScaledDemandedElts = + APIntOps::ScaleBitMask(DemandedElts, NumSrcElts); + for (unsigned I = 0; I != Scale; ++I) { + APInt SubUndefElts; + APInt SubDemandedElt = APInt::getOneBitSet(Scale, I); + APInt SubDemandedElts = APInt::getSplat(NumSrcElts, SubDemandedElt); + SubDemandedElts &= ScaledDemandedElts; + if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1)) + return false; + + // Here we can't do "MatchAnyBits" operation merge for undef bits. + // Because some operation only use part value of the source. + // Take llvm.fshl.* for example: + // t1: v4i32 = Constant:i32<12>, undef:i32, Constant:i32<12>, undef:i32 + // t2: v2i64 = bitcast t1 + // t5: v2i64 = fshl t3, t4, t2 + // We can not convert t2 to {i64 undef, i64 undef} + UndefElts |= APIntOps::ScaleBitMask(SubUndefElts, NumElts, + /*MatchAllBits=*/true); + } return true; } break; @@ -2978,7 +3067,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); Known = computeKnownBits(Src, DemandedSrcElts, Depth + 1); break; } @@ -3083,9 +3172,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); // TODO: SelfMultiply can be poison, but not undef. - SelfMultiply &= isGuaranteedNotToBeUndefOrPoison( - Op.getOperand(0), DemandedElts, false, Depth + 1); + if (SelfMultiply) + SelfMultiply &= isGuaranteedNotToBeUndefOrPoison( + Op.getOperand(0), DemandedElts, false, Depth + 1); Known = KnownBits::mul(Known, Known2, SelfMultiply); + + // If the multiplication is known not to overflow, the product of a number + // with itself is non-negative. Only do this if we didn't already computed + // the opposite value for the sign bit. + if (Op->getFlags().hasNoSignedWrap() && + Op.getOperand(0) == Op.getOperand(1) && + !Known.isNegative()) + Known.makeNonNegative(); break; } case ISD::MULHU: { @@ -3128,6 +3226,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::udiv(Known, Known2); break; } + case ISD::AVGCEILU: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = Known.zext(BitWidth + 1); + Known2 = Known2.zext(BitWidth + 1); + KnownBits One = KnownBits::makeConstant(APInt(1, 1)); + Known = KnownBits::computeForAddCarry(Known, Known2, One); + Known = Known.extractBits(BitWidth, 1); + break; + } case ISD::SELECT: case ISD::VSELECT: Known = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1); @@ -3330,7 +3438,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::ZERO_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); - APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); + APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); Known = Known.zext(BitWidth); break; @@ -3342,7 +3450,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::SIGN_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); - APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); + APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); // If the sign bit is known to be zero or one, then sext will extend // it to the top bits, else it will just zext. @@ -3358,7 +3466,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::ANY_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); - APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); + APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); Known = Known.anyext(BitWidth); break; @@ -3605,6 +3713,19 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::smax(Known, Known2); else Known = KnownBits::smin(Known, Known2); + + // For SMAX, if CstLow is non-negative we know the result will be + // non-negative and thus all sign bits are 0. + // TODO: There's an equivalent of this for smin with negative constant for + // known ones. + if (IsMax && CstLow) { + const APInt &ValueLow = CstLow->getAPIntValue(); + if (ValueLow.isNonNegative()) { + unsigned SignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + Known.Zero.setHighBits(std::min(SignBits, ValueLow.getNumSignBits())); + } + } + break; } case ISD::FP_TO_UINT_SAT: { @@ -3905,7 +4026,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::SIGN_EXTEND_VECTOR_INREG: { SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(SrcVT.getVectorNumElements()); + APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements()); Tmp = VTBits - SrcVT.getScalarSizeInBits(); return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp; } @@ -4192,7 +4313,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, break; uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); } case ISD::CONCAT_VECTORS: { @@ -4585,26 +4706,54 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { return false; } +// Only bits set in Mask must be negated, other bits may be arbitrary. +SDValue llvm::getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs) { + if (isBitwiseNot(V, AllowUndefs)) + return V.getOperand(0); + + // Handle any_extend (not (truncate X)) pattern, where Mask only sets + // bits in the non-extended part. + ConstantSDNode *MaskC = isConstOrConstSplat(Mask); + if (!MaskC || V.getOpcode() != ISD::ANY_EXTEND) + return SDValue(); + SDValue ExtArg = V.getOperand(0); + if (ExtArg.getScalarValueSizeInBits() >= + MaskC->getAPIntValue().getActiveBits() && + isBitwiseNot(ExtArg, AllowUndefs) && + ExtArg.getOperand(0).getOpcode() == ISD::TRUNCATE && + ExtArg.getOperand(0).getOperand(0).getValueType() == V.getValueType()) + return ExtArg.getOperand(0).getOperand(0); + return SDValue(); +} + +static bool haveNoCommonBitsSetCommutative(SDValue A, SDValue B) { + // Match masked merge pattern (X & ~M) op (Y & M) + // Including degenerate case (X & ~M) op M + auto MatchNoCommonBitsPattern = [&](SDValue Not, SDValue Mask, + SDValue Other) { + if (SDValue NotOperand = + getBitwiseNotOperand(Not, Mask, /* AllowUndefs */ true)) { + if (Other == NotOperand) + return true; + if (Other->getOpcode() == ISD::AND) + return NotOperand == Other->getOperand(0) || + NotOperand == Other->getOperand(1); + } + return false; + }; + if (A->getOpcode() == ISD::AND) + return MatchNoCommonBitsPattern(A->getOperand(0), A->getOperand(1), B) || + MatchNoCommonBitsPattern(A->getOperand(1), A->getOperand(0), B); + return false; +} + // FIXME: unify with llvm::haveNoCommonBitsSet. bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { assert(A.getValueType() == B.getValueType() && "Values must have the same type"); - // Match masked merge pattern (X & ~M) op (Y & M) - if (A->getOpcode() == ISD::AND && B->getOpcode() == ISD::AND) { - auto MatchNoCommonBitsPattern = [&](SDValue NotM, SDValue And) { - if (isBitwiseNot(NotM, true)) { - SDValue NotOperand = NotM->getOperand(0); - return NotOperand == And->getOperand(0) || - NotOperand == And->getOperand(1); - } - return false; - }; - if (MatchNoCommonBitsPattern(A->getOperand(0), B) || - MatchNoCommonBitsPattern(A->getOperand(1), B) || - MatchNoCommonBitsPattern(B->getOperand(0), A) || - MatchNoCommonBitsPattern(B->getOperand(1), A)) - return true; - } + if (haveNoCommonBitsSetCommutative(A, B) || + haveNoCommonBitsSetCommutative(B, A)) + return true; return KnownBits::haveNoCommonBitsSet(computeKnownBits(A), computeKnownBits(B)); } @@ -4833,9 +4982,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::CTTZ_ZERO_UNDEF: return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(), C->isOpaque()); - case ISD::FP16_TO_FP: { + case ISD::FP16_TO_FP: + case ISD::BF16_TO_FP: { bool Ignored; - APFloat FPV(APFloat::IEEEhalf(), + APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf() + : APFloat::BFloat(), (Val.getBitWidth() == 16) ? Val : Val.trunc(16)); // This can return overflow, underflow, or inexact; we don't care. @@ -4909,11 +5060,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); break; - case ISD::FP_TO_FP16: { + case ISD::FP_TO_FP16: + case ISD::FP_TO_BF16: { bool Ignored; // This can return overflow, underflow, or inexact; we don't care. // FIXME need to be more flexible about rounding mode. - (void)V.convert(APFloat::IEEEhalf(), + (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf() + : APFloat::BFloat(), APFloat::rmNearestTiesToEven, &Ignored); return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); } @@ -4965,6 +5118,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; case ISD::FREEZE: assert(VT == Operand.getValueType() && "Unexpected VT!"); + if (isGuaranteedNotToBeUndefOrPoison(Operand)) + return Operand; break; case ISD::TokenFactor: case ISD::MERGE_VALUES: @@ -5114,7 +5269,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isInteger() && VT == Operand.getValueType() && "Invalid ABS!"); if (OpOpcode == ISD::UNDEF) - return getUNDEF(VT); + return getConstant(0, DL, VT); break; case ISD::BSWAP: assert(VT.isInteger() && VT == Operand.getValueType() && @@ -5182,6 +5337,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (Operand.getValueType().getScalarType() == MVT::i1) return getNOT(DL, Operand, Operand.getValueType()); break; + case ISD::VECREDUCE_ADD: + if (Operand.getValueType().getScalarType() == MVT::i1) + return getNode(ISD::VECREDUCE_XOR, DL, VT, Operand); + break; case ISD::VECREDUCE_SMIN: case ISD::VECREDUCE_UMAX: if (Operand.getValueType().getScalarType() == MVT::i1) @@ -5273,6 +5432,30 @@ static llvm::Optional FoldValue(unsigned Opcode, const APInt &C1, APInt C2Ext = C2.zext(FullWidth); return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth()); } + case ISD::AVGFLOORS: { + unsigned FullWidth = C1.getBitWidth() + 1; + APInt C1Ext = C1.sext(FullWidth); + APInt C2Ext = C2.sext(FullWidth); + return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1); + } + case ISD::AVGFLOORU: { + unsigned FullWidth = C1.getBitWidth() + 1; + APInt C1Ext = C1.zext(FullWidth); + APInt C2Ext = C2.zext(FullWidth); + return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1); + } + case ISD::AVGCEILS: { + unsigned FullWidth = C1.getBitWidth() + 1; + APInt C1Ext = C1.sext(FullWidth); + APInt C2Ext = C2.sext(FullWidth); + return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1); + } + case ISD::AVGCEILU: { + unsigned FullWidth = C1.getBitWidth() + 1; + APInt C1Ext = C1.zext(FullWidth); + APInt C2Ext = C2.zext(FullWidth); + return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1); + } } return llvm::None; } @@ -5355,7 +5538,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (!FoldAttempt) return SDValue(); - SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT); + SDValue Folded = getConstant(*FoldAttempt, DL, VT); assert((!Folded || !VT.isVector()) && "Can't fold vectors ops with scalar operands"); return Folded; @@ -5400,7 +5583,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, Optional Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]); if (!Fold) break; - RawBits.push_back(Fold.getValue()); + RawBits.push_back(*Fold); } if (RawBits.size() == NumElts.getFixedValue()) { // We have constant folded, but we need to cast this again back to @@ -5416,7 +5599,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, for (unsigned I = 0, E = DstBits.size(); I != E; ++I) { if (DstUndefs[I]) continue; - Ops[I] = getConstant(DstBits[I].sextOrSelf(BVEltBits), DL, BVEltVT); + Ops[I] = getConstant(DstBits[I].sext(BVEltBits), DL, BVEltVT); } return getBitcast(VT, getBuildVector(BVVT, DL, Ops)); } @@ -5455,9 +5638,14 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, !llvm::all_of(Ops, IsScalarOrSameVectorSize)) return SDValue(); - // If we are comparing vectors, then the result needs to be a i1 boolean - // that is then sign-extended back to the legal result type. + // If we are comparing vectors, then the result needs to be a i1 boolean that + // is then extended back to the legal result type depending on how booleans + // are represented. EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType()); + ISD::NodeType ExtendCode = + (Opcode == ISD::SETCC && SVT != VT.getScalarType()) + ? TargetLowering::getExtendForContent(TLI->getBooleanContents(VT)) + : ISD::SIGN_EXTEND; // Find legal integer scalar type for constant promotion and // ensure that its scalar size is at least as large as source. @@ -5494,8 +5682,18 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, // Build vector (integer) scalar operands may need implicit // truncation - do this before constant folding. - if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) + if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) { + // Don't create illegally-typed nodes unless they're constants or undef + // - if we fail to constant fold we can't guarantee the (dead) nodes + // we're creating will be cleaned up before being visited for + // legalization. + if (NewNodesMustHaveLegalTypes && !ScalarOp.isUndef() && + !isa(ScalarOp) && + TLI->getTypeAction(*getContext(), InSVT) != + TargetLowering::TypeLegal) + return SDValue(); ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp); + } ScalarOps.push_back(ScalarOp); } @@ -5505,7 +5703,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, // Legalize the (integer) scalar constant if necessary. if (LegalSVT != SVT) - ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); + ScalarResult = getNode(ExtendCode, DL, LegalSVT, ScalarResult); // Scalar folding only succeeded if the result is a constant or UNDEF. if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && @@ -5629,20 +5827,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getNode(Opcode, DL, VT, N1, N2, Flags); } +void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1, + SDValue &N2) const { + if (!TLI->isCommutativeBinOp(Opcode)) + return; + + // Canonicalize: + // binop(const, nonconst) -> binop(nonconst, const) + bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1); + bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2); + bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1); + bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2); + if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP)) + std::swap(N1, N2); + + // Canonicalize: + // binop(splat(x), step_vector) -> binop(step_vector, splat(x)) + else if (N1.getOpcode() == ISD::SPLAT_VECTOR && + N2.getOpcode() == ISD::STEP_VECTOR) + std::swap(N1, N2); +} + SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags Flags) { assert(N1.getOpcode() != ISD::DELETED_NODE && N2.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); - // Canonicalize constant to RHS if commutative. - if (TLI->isCommutativeBinOp(Opcode)) { - bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1); - bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2); - bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1); - bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2); - if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP)) - std::swap(N1, N2); - } + + canonicalizeCommutativeBinop(Opcode, N1, N2); auto *N1C = dyn_cast(N1); auto *N2C = dyn_cast(N2); @@ -5946,6 +6158,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N1Op2C->getZExtValue() == N2C->getZExtValue()) { if (VT == N1.getOperand(1).getValueType()) return N1.getOperand(1); + if (VT.isFloatingPoint()) { + assert(VT.getSizeInBits() > N1.getOperand(1).getValueType().getSizeInBits()); + return getFPExtendOrRound(N1.getOperand(1), DL, VT); + } return getSExtOrTrunc(N1.getOperand(1), DL, VT); } return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); @@ -6043,9 +6259,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, std::swap(N1, N2); } else { switch (Opcode) { - case ISD::SIGN_EXTEND_INREG: case ISD::SUB: return getUNDEF(VT); // fold op(undef, arg2) -> undef + case ISD::SIGN_EXTEND_INREG: case ISD::UDIV: case ISD::SDIV: case ISD::UREM: @@ -6534,7 +6750,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->hasStackRealignment(MF)) while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) - NewAlign = NewAlign / 2; + NewAlign = NewAlign.previous(); if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. @@ -6782,17 +6998,18 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, /// \param Size Number of bytes to write. /// \param Alignment Alignment of the destination in bytes. /// \param isVol True if destination is volatile. +/// \param AlwaysInline Makes sure no function call is generated. /// \param DstPtrInfo IR information on the memory pointer. /// \returns New head in the control flow, if lowering was successful, empty /// SDValue otherwise. /// /// The function tries to replace 'llvm.memset' intrinsic with several store /// operations and value calculation code. This is usually profitable for small -/// memory size. +/// memory size or when the semantic requires inlining. static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, Align Alignment, bool isVol, - MachinePointerInfo DstPtrInfo, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo) { // Turn a memset of undef to nop. // FIXME: We need to honor volatile even is Src is undef. @@ -6812,8 +7029,10 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, DstAlignCanChange = true; bool IsZeroVal = isa(Src) && cast(Src)->isZero(); + unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize); + if (!TLI.findOptimalMemOpLowering( - MemOps, TLI.getMaxStoresPerMemset(OptSize), + MemOps, Limit, MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol), DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes())) return SDValue(); @@ -6964,10 +7183,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, } SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, - SDValue Dst, unsigned DstAlign, - SDValue Src, unsigned SrcAlign, - SDValue Size, Type *SizeTy, - unsigned ElemSz, bool isTailCall, + SDValue Dst, SDValue Src, SDValue Size, + Type *SizeTy, unsigned ElemSz, + bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Emit a library call. @@ -7067,10 +7285,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, } SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, - SDValue Dst, unsigned DstAlign, - SDValue Src, unsigned SrcAlign, - SDValue Size, Type *SizeTy, - unsigned ElemSz, bool isTailCall, + SDValue Dst, SDValue Src, SDValue Size, + Type *SizeTy, unsigned ElemSz, + bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Emit a library call. @@ -7109,7 +7326,7 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, - bool isVol, bool isTailCall, + bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo) { // Check to see if we should lower the memset to stores first. @@ -7122,7 +7339,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, DstPtrInfo, AAInfo); + isVol, false, DstPtrInfo, AAInfo); if (Result.getNode()) return Result; @@ -7132,45 +7349,75 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, // code. If the target chooses to do this, this is the next best. if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemset( - *this, dl, Chain, Dst, Src, Size, Alignment, isVol, DstPtrInfo); + *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline, DstPtrInfo); if (Result.getNode()) return Result; } + // If we really need inline code and the target declined to provide it, + // use a (potentially long) sequence of loads and stores. + if (AlwaysInline) { + assert(ConstantSize && "AlwaysInline requires a constant size!"); + SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Alignment, + isVol, true, DstPtrInfo, AAInfo); + assert(Result && + "getMemsetStores must return a valid sequence when AlwaysInline"); + return Result; + } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); // Emit a library call. - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext()); - Args.push_back(Entry); - Entry.Node = Src; - Entry.Ty = Src.getValueType().getTypeForEVT(*getContext()); - Args.push_back(Entry); - Entry.Node = Size; - Entry.Ty = getDataLayout().getIntPtrType(*getContext()); - Args.push_back(Entry); + auto &Ctx = *getContext(); + const auto& DL = getDataLayout(); - // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); - CLI.setDebugLoc(dl) - .setChain(Chain) - .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), - Dst.getValueType().getTypeForEVT(*getContext()), - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), - TLI->getPointerTy(getDataLayout())), - std::move(Args)) - .setDiscardResult() - .setTailCall(isTailCall); + // FIXME: pass in SDLoc + CLI.setDebugLoc(dl).setChain(Chain); + + ConstantSDNode *ConstantSrc = dyn_cast(Src); + const bool SrcIsZero = ConstantSrc && ConstantSrc->isZero(); + const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO); + + // Helper function to create an Entry from Node and Type. + const auto CreateEntry = [](SDValue Node, Type *Ty) { + TargetLowering::ArgListEntry Entry; + Entry.Node = Node; + Entry.Ty = Ty; + return Entry; + }; - std::pair CallResult = TLI->LowerCallTo(CLI); + // If zeroing out and bzero is present, use it. + if (SrcIsZero && BzeroName) { + TargetLowering::ArgListTy Args; + Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx))); + Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); + CLI.setLibCallee( + TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx), + getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args)); + } else { + TargetLowering::ArgListTy Args; + Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx))); + Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx))); + Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); + CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), + Dst.getValueType().getTypeForEVT(Ctx), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy(DL)), + std::move(Args)); + } + + CLI.setDiscardResult().setTailCall(isTailCall); + + std::pair CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, - SDValue Dst, unsigned DstAlign, - SDValue Value, SDValue Size, Type *SizeTy, - unsigned ElemSz, bool isTailCall, + SDValue Dst, SDValue Value, SDValue Size, + Type *SizeTy, unsigned ElemSz, + bool isTailCall, MachinePointerInfo DstPtrInfo) { // Emit a library call. TargetLowering::ArgListTy Args; @@ -7214,6 +7461,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void* IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); @@ -7326,6 +7574,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, ID.AddInteger(getSyntheticNodeSubclassData( Opcode, dl.getIROrder(), VTList, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); @@ -7498,6 +7747,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, ID.AddInteger(getSyntheticNodeSubclassData( dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); @@ -7599,6 +7849,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, ID.AddInteger(getSyntheticNodeSubclassData( dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); @@ -7665,6 +7916,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, ID.AddInteger(getSyntheticNodeSubclassData( dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); @@ -7693,6 +7945,7 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); ID.AddInteger(ST->getPointerInfo().getAddrSpace()); + ID.AddInteger(ST->getMemOperand()->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) return SDValue(E, 0); @@ -7750,6 +8003,7 @@ SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM, ID.AddInteger(getSyntheticNodeSubclassData( dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); @@ -7842,6 +8096,7 @@ SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, ID.AddInteger(getSyntheticNodeSubclassData( dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); @@ -7912,6 +8167,7 @@ SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, ID.AddInteger(getSyntheticNodeSubclassData( dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); @@ -7942,6 +8198,7 @@ SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); ID.AddInteger(ST->getPointerInfo().getAddrSpace()); + ID.AddInteger(ST->getMemOperand()->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) return SDValue(E, 0); @@ -7958,6 +8215,259 @@ SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, return V; } +SDValue SelectionDAG::getStridedLoadVP( + ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, + SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, + SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + const MDNode *Ranges, bool IsExpanding) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + + MMOFlags |= MachineMemOperand::MOLoad; + assert((MMOFlags & MachineMemOperand::MOStore) == 0); + // If we don't have a PtrInfo, infer the trivial frame index case to simplify + // clients. + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); + + uint64_t Size = MemoryLocation::UnknownSize; + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, + Alignment, AAInfo, Ranges); + return getStridedLoadVP(AM, ExtType, VT, DL, Chain, Ptr, Offset, Stride, Mask, + EVL, MemVT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getStridedLoadVP( + ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, + SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, + SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) { + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); + + SDValue Ops[] = {Chain, Ptr, Offset, Stride, Mask, EVL}; + SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other) + : getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData( + DL.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + auto *N = + newSDNode(DL.getIROrder(), DL.getDebugLoc(), VTs, AM, + ExtType, IsExpanding, MemVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getStridedLoadVP( + EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Stride, + SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, MaybeAlign Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + const MDNode *Ranges, bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr, + Undef, Stride, Mask, EVL, PtrInfo, VT, Alignment, + MMOFlags, AAInfo, Ranges, IsExpanding); +} + +SDValue SelectionDAG::getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain, + SDValue Ptr, SDValue Stride, + SDValue Mask, SDValue EVL, + MachineMemOperand *MMO, + bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr, + Undef, Stride, Mask, EVL, VT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getExtStridedLoadVP( + ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain, + SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef, + Stride, Mask, EVL, PtrInfo, MemVT, Alignment, + MMOFlags, AAInfo, nullptr, IsExpanding); +} + +SDValue SelectionDAG::getExtStridedLoadVP( + ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain, + SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, + MachineMemOperand *MMO, bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef, + Stride, Mask, EVL, MemVT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getIndexedStridedLoadVP(SDValue OrigLoad, const SDLoc &DL, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + auto *SLD = cast(OrigLoad); + assert(SLD->getOffset().isUndef() && + "Strided load is already a indexed load!"); + // Don't propagate the invariant or dereferenceable flags. + auto MMOFlags = + SLD->getMemOperand()->getFlags() & + ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); + return getStridedLoadVP( + AM, SLD->getExtensionType(), OrigLoad.getValueType(), DL, SLD->getChain(), + Base, Offset, SLD->getStride(), SLD->getMask(), SLD->getVectorLength(), + SLD->getPointerInfo(), SLD->getMemoryVT(), SLD->getAlign(), MMOFlags, + SLD->getAAInfo(), nullptr, SLD->isExpandingLoad()); +} + +SDValue SelectionDAG::getStridedStoreVP(SDValue Chain, const SDLoc &DL, + SDValue Val, SDValue Ptr, + SDValue Offset, SDValue Stride, + SDValue Mask, SDValue EVL, EVT MemVT, + MachineMemOperand *MMO, + ISD::MemIndexedMode AM, + bool IsTruncating, bool IsCompressing) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!"); + SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) + : getVTList(MVT::Other); + SDValue Ops[] = {Chain, Val, Ptr, Offset, Stride, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData( + DL.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode(DL.getIROrder(), DL.getDebugLoc(), + VTs, AM, IsTruncating, + IsCompressing, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getTruncStridedStoreVP( + SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Stride, + SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT SVT, + Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + bool IsCompressing) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); + + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MMOFlags, MemoryLocation::UnknownSize, Alignment, AAInfo); + return getTruncStridedStoreVP(Chain, DL, Val, Ptr, Stride, Mask, EVL, SVT, + MMO, IsCompressing); +} + +SDValue SelectionDAG::getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL, + SDValue Val, SDValue Ptr, + SDValue Stride, SDValue Mask, + SDValue EVL, EVT SVT, + MachineMemOperand *MMO, + bool IsCompressing) { + EVT VT = Val.getValueType(); + + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + if (VT == SVT) + return getStridedStoreVP(Chain, DL, Val, Ptr, getUNDEF(Ptr.getValueType()), + Stride, Mask, EVL, VT, MMO, ISD::UNINDEXED, + /*IsTruncating*/ false, IsCompressing); + + assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be a truncating store, not extending!"); + assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); + assert(VT.isVector() == SVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorElementCount() == SVT.getVectorElementCount()) && + "Cannot use trunc store to change the number of vector elements!"); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = {Chain, Val, Ptr, Undef, Stride, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops); + ID.AddInteger(SVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData( + DL.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode(DL.getIROrder(), DL.getDebugLoc(), + VTs, ISD::UNINDEXED, true, + IsCompressing, SVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getIndexedStridedStoreVP(SDValue OrigStore, + const SDLoc &DL, SDValue Base, + SDValue Offset, + ISD::MemIndexedMode AM) { + auto *SST = cast(OrigStore); + assert(SST->getOffset().isUndef() && + "Strided store is already an indexed store!"); + SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); + SDValue Ops[] = { + SST->getChain(), SST->getValue(), Base, Offset, SST->getStride(), + SST->getMask(), SST->getVectorLength()}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops); + ID.AddInteger(SST->getMemoryVT().getRawBits()); + ID.AddInteger(SST->getRawSubclassData()); + ID.AddInteger(SST->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + return SDValue(E, 0); + + auto *N = newSDNode( + DL.getIROrder(), DL.getDebugLoc(), VTs, AM, SST->isTruncatingStore(), + SST->isCompressingStore(), SST->getMemoryVT(), SST->getMemOperand()); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType) { @@ -7969,6 +8479,7 @@ SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ID.AddInteger(getSyntheticNodeSubclassData( dl.getIROrder(), VTs, VT, MMO, IndexType)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); @@ -8012,6 +8523,7 @@ SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ID.AddInteger(getSyntheticNodeSubclassData( dl.getIROrder(), VTs, VT, MMO, IndexType)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); @@ -8061,6 +8573,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, ID.AddInteger(getSyntheticNodeSubclassData( dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); @@ -8108,6 +8621,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, ID.AddInteger(getSyntheticNodeSubclassData( dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); @@ -8149,13 +8663,13 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ID.AddInteger(getSyntheticNodeSubclassData( dl.getIROrder(), VTs, MemVT, MMO, IndexType, ExtTy)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); return SDValue(E, 0); } - IndexType = TLI->getCanonicalIndexType(IndexType, MemVT, Ops[4]); auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, MMO, IndexType, ExtTy); createOperands(N, Ops); @@ -8196,13 +8710,13 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ID.AddInteger(getSyntheticNodeSubclassData( dl.getIROrder(), VTs, MemVT, MMO, IndexType, IsTrunc)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); return SDValue(E, 0); } - IndexType = TLI->getCanonicalIndexType(IndexType, MemVT, Ops[4]); auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, MMO, IndexType, IsTrunc); createOperands(N, Ops); @@ -8400,6 +8914,41 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(Ops[2].getValueType() == Ops[3].getValueType() && "LHS/RHS of comparison should match types!"); break; + case ISD::VP_ADD: + case ISD::VP_SUB: + // If it is VP_ADD/VP_SUB mask operation then turn it to VP_XOR + if (VT.isVector() && VT.getVectorElementType() == MVT::i1) + Opcode = ISD::VP_XOR; + break; + case ISD::VP_MUL: + // If it is VP_MUL mask operation then turn it to VP_AND + if (VT.isVector() && VT.getVectorElementType() == MVT::i1) + Opcode = ISD::VP_AND; + break; + case ISD::VP_REDUCE_MUL: + // If it is VP_REDUCE_MUL mask operation then turn it to VP_REDUCE_AND + if (VT == MVT::i1) + Opcode = ISD::VP_REDUCE_AND; + break; + case ISD::VP_REDUCE_ADD: + // If it is VP_REDUCE_ADD mask operation then turn it to VP_REDUCE_XOR + if (VT == MVT::i1) + Opcode = ISD::VP_REDUCE_XOR; + break; + case ISD::VP_REDUCE_SMAX: + case ISD::VP_REDUCE_UMIN: + // If it is VP_REDUCE_SMAX/VP_REDUCE_UMIN mask operation then turn it to + // VP_REDUCE_AND. + if (VT == MVT::i1) + Opcode = ISD::VP_REDUCE_AND; + break; + case ISD::VP_REDUCE_SMIN: + case ISD::VP_REDUCE_UMAX: + // If it is VP_REDUCE_SMIN/VP_REDUCE_UMAX mask operation then turn it to + // VP_REDUCE_OR. + if (VT == MVT::i1) + Opcode = ISD::VP_REDUCE_OR; + break; } // Memoize nodes. @@ -8446,7 +8995,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, ArrayRef Ops, const SDNodeFlags Flags) { if (VTList.NumVTs == 1) - return getNode(Opcode, DL, VTList.VTs[0], Ops); + return getNode(Opcode, DL, VTList.VTs[0], Ops, Flags); #ifndef NDEBUG for (auto &Op : Ops) @@ -9659,19 +10208,36 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ namespace { - /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith - /// to record information about a use. - struct UseMemo { - SDNode *User; - unsigned Index; - SDUse *Use; - }; +/// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith +/// to record information about a use. +struct UseMemo { + SDNode *User; + unsigned Index; + SDUse *Use; +}; - /// operator< - Sort Memos by User. - bool operator<(const UseMemo &L, const UseMemo &R) { - return (intptr_t)L.User < (intptr_t)R.User; +/// operator< - Sort Memos by User. +bool operator<(const UseMemo &L, const UseMemo &R) { + return (intptr_t)L.User < (intptr_t)R.User; +} + +/// RAUOVWUpdateListener - Helper for ReplaceAllUsesOfValuesWith - When the node +/// pointed to by a UseMemo is deleted, set the User to nullptr to indicate that +/// the node already has been taken care of recursively. +class RAUOVWUpdateListener : public SelectionDAG::DAGUpdateListener { + SmallVector &Uses; + + void NodeDeleted(SDNode *N, SDNode *E) override { + for (UseMemo &Memo : Uses) + if (Memo.User == N) + Memo.User = nullptr; } +public: + RAUOVWUpdateListener(SelectionDAG &d, SmallVector &uses) + : SelectionDAG::DAGUpdateListener(d), Uses(uses) {} +}; + } // end anonymous namespace bool SelectionDAG::calculateDivergence(SDNode *N) { @@ -9763,12 +10329,19 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, // Sort the uses, so that all the uses from a given User are together. llvm::sort(Uses); + RAUOVWUpdateListener Listener(*this, Uses); for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); UseIndex != UseIndexEnd; ) { // We know that this user uses some value of From. If it is the right // value, update it. SDNode *User = Uses[UseIndex].User; + // If the node has been deleted by recursive CSE updates when updating + // another node, then just skip this entry. + if (User == nullptr) { + ++UseIndex; + continue; + } // This node is about to morph, remove its old self from the CSE maps. RemoveNodeFromCSEMaps(User); @@ -9965,6 +10538,11 @@ bool llvm::isOneConstant(SDValue V) { return Const != nullptr && Const->isOne(); } +bool llvm::isMinSignedConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast(V); + return Const != nullptr && Const->isMinSignedValue(); +} + SDValue llvm::peekThroughBitcasts(SDValue V) { while (V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); @@ -10095,10 +10673,9 @@ bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) { } bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) { - // TODO: may want to use peekThroughBitcast() here. - unsigned BitWidth = N.getScalarValueSizeInBits(); - ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); - return C && C->isOne() && C->getValueSizeInBits(0) == BitWidth; + ConstantSDNode *C = + isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation*/ true); + return C && C->isOne(); } bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) { @@ -10947,9 +11524,8 @@ bool BuildVectorSDNode::getConstantRawBits( auto *CInt = dyn_cast(Op); auto *CFP = dyn_cast(Op); assert((CInt || CFP) && "Unknown constant"); - SrcBitElements[I] = - CInt ? CInt->getAPIntValue().truncOrSelf(SrcEltSizeInBits) - : CFP->getValueAPF().bitcastToAPInt(); + SrcBitElements[I] = CInt ? CInt->getAPIntValue().trunc(SrcEltSizeInBits) + : CFP->getValueAPF().bitcastToAPInt(); } // Recast to dst width. @@ -11068,6 +11644,10 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const { if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) return N.getNode(); + if ((N.getOpcode() == ISD::SPLAT_VECTOR) && + isa(N.getOperand(0))) + return N.getNode(); + return nullptr; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 6d8252046501..d236433f6fb4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -96,7 +96,7 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0, if (!(BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode())) return false; int64_t PtrDiff; - if (NumBytes0.hasValue() && NumBytes1.hasValue() && + if (NumBytes0 && NumBytes1 && BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) { // If the size of memory access is unknown, do not use it to analysis. // One example of unknown size memory access is to load/store scalable diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 01230a36e744..37d05cdba76d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -24,25 +24,21 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" @@ -89,7 +85,6 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -102,10 +97,8 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/Local.h" #include -#include #include #include -#include #include using namespace llvm; @@ -224,10 +217,10 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, std::swap(Lo, Hi); EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); - Hi = - DAG.getNode(ISD::SHL, DL, TotalVT, Hi, - DAG.getConstant(Lo.getValueSizeInBits(), DL, - TLI.getPointerTy(DAG.getDataLayout()))); + Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, + DAG.getConstant(Lo.getValueSizeInBits(), DL, + TLI.getShiftAmountTy( + TotalVT, DAG.getDataLayout()))); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); } @@ -276,7 +269,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, // For a truncate, see if we have any information to // indicate whether the truncated bits will always be // zero or sign-extension. - if (AssertOp.hasValue()) + if (AssertOp) Val = DAG.getNode(*AssertOp, DL, PartEVT, Val, DAG.getValueType(ValueVT)); return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); @@ -330,7 +323,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, Optional CallConv) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); - const bool IsABIRegCopy = CallConv.hasValue(); + const bool IsABIRegCopy = CallConv.has_value(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; @@ -344,7 +337,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, if (IsABIRegCopy) { NumRegs = TLI.getVectorTypeBreakdownForCallingConv( - *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT, + *DAG.getContext(), *CallConv, ValueVT, IntermediateVT, NumIntermediates, RegisterVT); } else { NumRegs = @@ -566,7 +559,7 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, unsigned RoundBits = RoundParts * PartBits; unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, - DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false)); + DAG.getShiftAmountConstant(RoundBits, ValueVT, DL)); getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V, CallConv); @@ -654,7 +647,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const bool IsABIRegCopy = CallConv.hasValue(); + const bool IsABIRegCopy = CallConv.has_value(); if (NumParts == 1) { EVT PartEVT = PartVT; @@ -733,7 +726,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, DestEltCnt = ElementCount::getFixed(NumIntermediates); EVT BuiltVectorTy = EVT::getVectorVT( - *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue()); + *DAG.getContext(), IntermediateVT.getScalarType(), *DestEltCnt); if (ValueVT == BuiltVectorTy) { // Nothing to do. @@ -1236,7 +1229,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, // in the first place we should not be more successful here). Unless we // have some test case that prove this to be correct we should avoid // calling EmitFuncArgumentDbgValue here. - if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) { + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, + FuncArgumentDbgValueKind::Value, Val)) { LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order=" << DbgSDNodeOrder << "] for:\n " << *DI << "\n"); LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump()); @@ -1367,7 +1361,9 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef Values, N = UnusedArgNodeMap[V]; if (N.getNode()) { // Only emit func arg dbg value for non-variadic dbg.values for now. - if (!IsVariadic && EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N)) + if (!IsVariadic && + EmitFuncArgumentDbgValue(V, Var, Expr, dl, + FuncArgumentDbgValueKind::Value, N)) return true; if (auto *FISDN = dyn_cast(N.getNode())) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can @@ -1639,7 +1635,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Ops.push_back(getValue(CV->getOperand(i))); return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); - } else if (isa(C)) { + } + + if (isa(C)) { EVT EltVT = TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType()); @@ -1651,12 +1649,12 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa(VecTy)) return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op); - else { - SmallVector Ops; - Ops.assign(cast(VecTy)->getNumElements(), Op); - return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); - } + + SmallVector Ops; + Ops.assign(cast(VecTy)->getNumElements(), Op); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); } + llvm_unreachable("Unknown vector constant"); } @@ -1680,11 +1678,12 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } - if (const MetadataAsValue *MD = dyn_cast(V)) { + if (const MetadataAsValue *MD = dyn_cast(V)) return DAG.getMDNode(cast(MD->getMetadata())); - } + if (const auto *BB = dyn_cast(V)) return DAG.getBasicBlock(FuncInfo.MBBMap[BB]); + llvm_unreachable("Can't get register for value!"); } @@ -2748,10 +2747,10 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, None, CallOptions, getCurSDLoc()).second; - // On PS4, the "return address" must still be within the calling function, - // even if it's at the very end, so emit an explicit TRAP here. + // On PS4/PS5, the "return address" must still be within the calling + // function, even if it's at the very end, so emit an explicit TRAP here. // Passing 'true' for doesNotReturn above won't generate the trap for us. - if (TM.getTargetTriple().isPS4CPU()) + if (TM.getTargetTriple().isPS()) Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain); // WebAssembly needs an unreachable instruction after a non-returning call, // because the function return type can be different from __stack_chk_fail's @@ -3150,26 +3149,12 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy( Op1.getValueType(), DAG.getDataLayout()); - // Coerce the shift amount to the right type if we can. + // Coerce the shift amount to the right type if we can. This exposes the + // truncate or zext to optimization early. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { - unsigned ShiftSize = ShiftTy.getSizeInBits(); - unsigned Op2Size = Op2.getValueSizeInBits(); - SDLoc DL = getCurSDLoc(); - - // If the operand is smaller than the shift count type, promote it. - if (ShiftSize > Op2Size) - Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); - - // If the operand is larger than the shift count type but the shift - // count type has enough bits to represent any shift value, truncate - // it now. This is a common case and it exposes the truncate to - // optimization early. - else if (ShiftSize >= Log2_32_Ceil(Op1.getValueSizeInBits())) - Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); - // Otherwise we'll need to temporarily settle for some other convenient - // type. Type legalization will make adjustments once the shiftee is split. - else - Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); + assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(Op1.getValueSizeInBits()) && + "Unexpected shift type"); + Op2 = DAG.getZExtOrTrunc(Op2, getCurSDLoc(), ShiftTy); } bool nuw = false; @@ -3816,13 +3801,8 @@ void SelectionDAGBuilder::visitInsertValue(const User &I) { DAG.getVTList(AggValueVTs), Values)); } -void SelectionDAGBuilder::visitExtractValue(const User &I) { - ArrayRef Indices; - if (const ExtractValueInst *EV = dyn_cast(&I)) - Indices = EV->getIndices(); - else - Indices = cast(&I)->getIndices(); - +void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { + ArrayRef Indices = I.getIndices(); const Value *Op0 = I.getOperand(0); Type *AggTy = Op0->getType(); Type *ValTy = I.getType(); @@ -4376,7 +4356,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // In all other cases the function returns 'false'. static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, ISD::MemIndexType &IndexType, SDValue &Scale, - SelectionDAGBuilder *SDB, const BasicBlock *CurBB) { + SelectionDAGBuilder *SDB, const BasicBlock *CurBB, + uint64_t ElemSize) { SelectionDAG& DAG = SDB->DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const DataLayout &DL = DAG.getDataLayout(); @@ -4416,9 +4397,16 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, Base = SDB->getValue(BasePtr); Index = SDB->getValue(IndexVal); IndexType = ISD::SIGNED_SCALED; - Scale = DAG.getTargetConstant( - DL.getTypeAllocSize(GEP->getResultElementType()), - SDB->getCurSDLoc(), TLI.getPointerTy(DL)); + + // MGATHER/MSCATTER are only required to support scaling by one or by the + // element size. Other scales may be produced using target-specific DAG + // combines. + uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType()); + if (ScaleVal != ElemSize && ScaleVal != 1) + return false; + + Scale = + DAG.getTargetConstant(ScaleVal, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); return true; } @@ -4432,7 +4420,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { EVT VT = Src0.getValueType(); Align Alignment = cast(I.getArgOperand(2)) ->getMaybeAlignValue() - .getValueOr(DAG.getEVTAlign(VT.getScalarType())); + .value_or(DAG.getEVTAlign(VT.getScalarType())); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Base; @@ -4440,7 +4428,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { ISD::MemIndexType IndexType; SDValue Scale; bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, - I.getParent()); + I.getParent(), VT.getScalarStoreSize()); unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( @@ -4451,7 +4439,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); - IndexType = ISD::SIGNED_UNSCALED; + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } @@ -4538,7 +4526,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); Align Alignment = cast(I.getArgOperand(1)) ->getMaybeAlignValue() - .getValueOr(DAG.getEVTAlign(VT.getScalarType())); + .value_or(DAG.getEVTAlign(VT.getScalarType())); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); @@ -4548,7 +4536,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { ISD::MemIndexType IndexType; SDValue Scale; bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, - I.getParent()); + I.getParent(), VT.getScalarStoreSize()); unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(AS), MachineMemOperand::MOLoad, @@ -4559,7 +4547,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); - IndexType = ISD::SIGNED_UNSCALED; + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } @@ -4678,7 +4666,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType()); if (!TLI.supportsUnalignedAtomics() && - I.getAlignment() < MemVT.getSizeInBits() / 8) + I.getAlign().value() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); @@ -4730,7 +4718,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); - if (I.getAlignment() < MemVT.getSizeInBits() / 8) + if (I.getAlign().value() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); @@ -4781,7 +4769,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } } - // Info is set by getTgtMemInstrinsic + // Info is set by getTgtMemIntrinsic TargetLowering::IntrinsicInfo Info; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, @@ -4895,7 +4883,8 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, DAG.getConstant(0x7f800000, dl, MVT::i32)); SDValue t1 = DAG.getNode( ISD::SRL, dl, MVT::i32, t0, - DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout()))); + DAG.getConstant(23, dl, + TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout()))); SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, DAG.getConstant(127, dl, MVT::i32)); return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); @@ -4920,10 +4909,11 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); // IntegerPartOfX <<= 23; - IntegerPartOfX = DAG.getNode( - ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy( - DAG.getDataLayout()))); + IntegerPartOfX = + DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, dl, + DAG.getTargetLoweringInfo().getShiftAmountTy( + MVT::i32, DAG.getDataLayout()))); SDValue TwoToFractionalPartOfX; if (LimitFloatPrecision <= 6) { @@ -5351,38 +5341,36 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, /// ExpandPowI - Expand a llvm.powi intrinsic. static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { - // If RHS is a constant, we can expand this out to a multiplication tree, - // otherwise we end up lowering to a call to __powidf2 (for example). When - // optimizing for size, we only want to do this if the expansion would produce - // a small number of multiplies, otherwise we do the full expansion. + // If RHS is a constant, we can expand this out to a multiplication tree if + // it's beneficial on the target, otherwise we end up lowering to a call to + // __powidf2 (for example). if (ConstantSDNode *RHSC = dyn_cast(RHS)) { - // Get the exponent as a positive value. unsigned Val = RHSC->getSExtValue(); - if ((int)Val < 0) Val = -Val; // powi(x, 0) -> 1.0 if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); - bool OptForSize = DAG.shouldOptForSize(); - if (!OptForSize || - // If optimizing for size, don't insert too many multiplies. - // This inserts up to 5 multiplies. - countPopulation(Val) + Log2_32(Val) < 7) { + if (DAG.getTargetLoweringInfo().isBeneficialToExpandPowI( + Val, DAG.shouldOptForSize())) { + // Get the exponent as a positive value. + if ((int)Val < 0) + Val = -Val; // We use the simple binary decomposition method to generate the multiply // sequence. There are more optimal ways to do this (for example, // powi(x,15) generates one more multiply than it should), but this has // the benefit of being both really simple and much better than a libcall. - SDValue Res; // Logically starts equal to 1.0 + SDValue Res; // Logically starts equal to 1.0 SDValue CurSquare = LHS; // TODO: Intrinsics should have fast-math-flags that propagate to these // nodes. while (Val) { if (Val & 1) { if (Res.getNode()) - Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); + Res = + DAG.getNode(ISD::FMUL, DL, Res.getValueType(), Res, CurSquare); else - Res = CurSquare; // 1.0*CurSquare. + Res = CurSquare; // 1.0*CurSquare. } CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), @@ -5503,7 +5491,7 @@ getUnderlyingArgRegs(SmallVectorImpl> &Regs, /// appear for function arguments or in the prologue. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const Value *V, DILocalVariable *Variable, DIExpression *Expr, - DILocation *DL, bool IsDbgDeclare, const SDValue &N) { + DILocation *DL, FuncArgumentDbgValueKind Kind, const SDValue &N) { const Argument *Arg = dyn_cast(V); if (!Arg) return false; @@ -5537,7 +5525,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } }; - if (!IsDbgDeclare) { + if (Kind == FuncArgumentDbgValueKind::Value) { // ArgDbgValues are hoisted to the beginning of the entry block. So we // should only emit as ArgDbgValue if the dbg.value intrinsic is found in // the entry block. @@ -5624,7 +5612,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } if (Reg) { Op = MachineOperand::CreateReg(Reg, false); - IsIndirect = IsDbgDeclare; + IsIndirect = Kind != FuncArgumentDbgValueKind::Value; } } @@ -5672,7 +5660,8 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( continue; } MachineInstr *NewMI = - MakeVRegDbgValue(RegAndSize.first, *FragmentExpr, IsDbgDeclare); + MakeVRegDbgValue(RegAndSize.first, *FragmentExpr, + Kind != FuncArgumentDbgValueKind::Value); FuncInfo.ArgDbgValues.push_back(NewMI); } }; @@ -5690,7 +5679,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } Op = MachineOperand::CreateReg(VMI->second, false); - IsIndirect = IsDbgDeclare; + IsIndirect = Kind != FuncArgumentDbgValueKind::Value; } else if (ArgRegsAndSizes.size() > 1) { // This was split due to the calling convention, and no virtual register // mapping exists for the value. @@ -5712,6 +5701,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( NewMI = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), true, *Op, Variable, Expr); + // Otherwise, use ArgDbgValues. FuncInfo.ArgDbgValues.push_back(NewMI); return true; } @@ -5817,16 +5807,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::vacopy: visitVACopy(I); return; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, - TLI.getPointerTy(DAG.getDataLayout()), + TLI.getValueType(DAG.getDataLayout(), I.getType()), getValue(I.getArgOperand(0)))); return; case Intrinsic::addressofreturnaddress: - setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl, - TLI.getPointerTy(DAG.getDataLayout()))); + setValue(&I, + DAG.getNode(ISD::ADDROFRETURNADDR, sdl, + TLI.getValueType(DAG.getDataLayout(), I.getType()))); return; case Intrinsic::sponentry: - setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl, - TLI.getFrameIndexTy(DAG.getDataLayout()))); + setValue(&I, + DAG.getNode(ISD::SPONENTRY, sdl, + TLI.getValueType(DAG.getDataLayout(), I.getType()))); return; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, @@ -5864,7 +5856,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // @llvm.memcpy defines 0 and 1 to both mean no alignment. Align DstAlign = MCI.getDestAlign().valueOrOne(); Align SrcAlign = MCI.getSourceAlign().valueOrOne(); - Align Alignment = commonAlignment(DstAlign, SrcAlign); + Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MCI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG @@ -5887,7 +5879,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment. Align DstAlign = MCI.getDestAlign().valueOrOne(); Align SrcAlign = MCI.getSourceAlign().valueOrOne(); - Align Alignment = commonAlignment(DstAlign, SrcAlign); + Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MCI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG @@ -5910,10 +5902,28 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isVol = MSI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, + SDValue MS = DAG.getMemset( + Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false, + isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); + updateDAGForMaybeTailCall(MS); + return; + } + case Intrinsic::memset_inline: { + const auto &MSII = cast(I); + SDValue Dst = getValue(I.getArgOperand(0)); + SDValue Value = getValue(I.getArgOperand(1)); + SDValue Size = getValue(I.getArgOperand(2)); + assert(isa(Size) && "memset_inline needs constant size"); + // @llvm.memset defines 0 and 1 to both mean no alignment. + Align DstAlign = MSII.getDestAlign().valueOrOne(); + bool isVol = MSII.isVolatile(); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); + SDValue Root = isVol ? getRoot() : getMemoryRoot(); + SDValue MC = DAG.getMemset(Root, sdl, Dst, Value, Size, DstAlign, isVol, + /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); - updateDAGForMaybeTailCall(MS); + updateDAGForMaybeTailCall(MC); return; } case Intrinsic::memmove: { @@ -5924,7 +5934,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // @llvm.memmove defines 0 and 1 to both mean no alignment. Align DstAlign = MMI.getDestAlign().valueOrOne(); Align SrcAlign = MMI.getSourceAlign().valueOrOne(); - Align Alignment = commonAlignment(DstAlign, SrcAlign); + Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MMI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memmove DAG @@ -5943,15 +5953,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); - unsigned DstAlign = MI.getDestAlignment(); - unsigned SrcAlign = MI.getSourceAlignment(); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); - SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src, - SrcAlign, Length, LengthTy, ElemSz, isTC, - MachinePointerInfo(MI.getRawDest()), - MachinePointerInfo(MI.getRawSource())); + SDValue MC = + DAG.getAtomicMemcpy(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz, + isTC, MachinePointerInfo(MI.getRawDest()), + MachinePointerInfo(MI.getRawSource())); updateDAGForMaybeTailCall(MC); return; } @@ -5961,15 +5969,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); - unsigned DstAlign = MI.getDestAlignment(); - unsigned SrcAlign = MI.getSourceAlignment(); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); - SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src, - SrcAlign, Length, LengthTy, ElemSz, isTC, - MachinePointerInfo(MI.getRawDest()), - MachinePointerInfo(MI.getRawSource())); + SDValue MC = + DAG.getAtomicMemmove(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz, + isTC, MachinePointerInfo(MI.getRawDest()), + MachinePointerInfo(MI.getRawSource())); updateDAGForMaybeTailCall(MC); return; } @@ -5979,13 +5985,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Val = getValue(MI.getValue()); SDValue Length = getValue(MI.getLength()); - unsigned DstAlign = MI.getDestAlignment(); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); - SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length, - LengthTy, ElemSz, isTC, - MachinePointerInfo(MI.getRawDest())); + SDValue MC = + DAG.getAtomicMemset(getRoot(), sdl, Dst, Val, Length, LengthTy, ElemSz, + isTC, MachinePointerInfo(MI.getRawDest())); updateDAGForMaybeTailCall(MC); return; } @@ -6085,7 +6090,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } else if (isa(Address)) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, + FuncArgumentDbgValueKind::Declare, N); return; } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), @@ -6095,8 +6101,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, - N)) { + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, + FuncArgumentDbgValueKind::Declare, N)) { LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << " (could not emit func-arg dbg_value)\n"); } @@ -6162,8 +6168,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - ConstantInt *CI = dyn_cast(I.getArgOperand(0)); - assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); + ConstantInt *CI = cast(I.getArgOperand(0)); assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); MMI.setCurrentCallSite(CI->getZExtValue()); @@ -6343,6 +6348,29 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, #include "llvm/IR/VPIntrinsics.def" visitVectorPredicationIntrinsic(cast(I)); return; + case Intrinsic::fptrunc_round: { + // Get the last argument, the metadata and convert it to an integer in the + // call + Metadata *MD = cast(I.getArgOperand(1))->getMetadata(); + Optional RoundMode = + convertStrToRoundingMode(cast(MD)->getString()); + + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + // Propagate fast-math-flags from IR to node(s). + SDNodeFlags Flags; + Flags.copyFMF(*cast(&I)); + SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); + + SDValue Result; + Result = DAG.getNode( + ISD::FPTRUNC_ROUND, sdl, VT, getValue(I.getArgOperand(0)), + DAG.getTargetConstant((int)*RoundMode, sdl, + TLI.getPointerTy(DAG.getDataLayout()))); + setValue(&I, Result); + + return; + } case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -6397,6 +6425,31 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, Res); DAG.setRoot(Res.getValue(0)); return; + case Intrinsic::is_fpclass: { + const DataLayout DLayout = DAG.getDataLayout(); + EVT DestVT = TLI.getValueType(DLayout, I.getType()); + EVT ArgVT = TLI.getValueType(DLayout, I.getArgOperand(0)->getType()); + unsigned Test = cast(I.getArgOperand(1))->getZExtValue(); + MachineFunction &MF = DAG.getMachineFunction(); + const Function &F = MF.getFunction(); + SDValue Op = getValue(I.getArgOperand(0)); + SDNodeFlags Flags; + Flags.setNoFPExcept( + !F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP)); + // If ISD::IS_FPCLASS should be expanded, do it right now, because the + // expansion can use illegal types. Making expansion early allows + // legalizing these types prior to selection. + if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) { + SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG); + setValue(&I, Result); + return; + } + + SDValue Check = DAG.getTargetConstant(Test, sdl, MVT::i32); + SDValue V = DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags); + setValue(&I, V); + return; + } case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); @@ -6843,7 +6896,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } case Intrinsic::invariant_start: // Discard region information. - setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); + setValue(&I, + DAG.getUNDEF(TLI.getValueType(DAG.getDataLayout(), I.getType()))); return; case Intrinsic::invariant_end: // Discard region information. @@ -7147,7 +7201,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, SetCC); return; } - case Intrinsic::experimental_vector_insert: { + case Intrinsic::vector_insert: { SDValue Vec = getValue(I.getOperand(0)); SDValue SubVec = getValue(I.getOperand(1)); SDValue Index = getValue(I.getOperand(2)); @@ -7164,7 +7218,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Index)); return; } - case Intrinsic::experimental_vector_extract: { + case Intrinsic::vector_extract: { SDValue Vec = getValue(I.getOperand(0)); SDValue Index = getValue(I.getOperand(1)); EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -7242,7 +7296,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( }; SDVTList VTs = DAG.getVTList(ValueVTs); - fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue(); + fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); SDNodeFlags Flags; if (EB == fp::ExceptionBehavior::ebIgnore) @@ -7307,13 +7361,14 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { Optional ResOPC; switch (VPIntrin.getIntrinsicID()) { -#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: -#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) ResOPC = ISD::VPSD; -#define END_REGISTER_VP_INTRINSIC(VPID) break; +#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \ + case Intrinsic::VPID: \ + ResOPC = ISD::VPSD; \ + break; #include "llvm/IR/VPIntrinsics.def" } - if (!ResOPC.hasValue()) + if (!ResOPC) llvm_unreachable( "Inconsistency: no SDNode available for this VPIntrinsic!"); @@ -7324,7 +7379,7 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { : ISD::VP_REDUCE_FMUL; } - return ResOPC.getValue(); + return *ResOPC; } void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, @@ -7362,11 +7417,12 @@ void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, SDValue Base, Index, Scale; ISD::MemIndexType IndexType; bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, - this, VPIntrin.getParent()); + this, VPIntrin.getParent(), + VT.getScalarStoreSize()); if (!UniformBase) { Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(PtrOperand); - IndexType = ISD::SIGNED_UNSCALED; + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); } @@ -7418,11 +7474,12 @@ void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin, SDValue Base, Index, Scale; ISD::MemIndexType IndexType; bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, - this, VPIntrin.getParent()); + this, VPIntrin.getParent(), + VT.getScalarStoreSize()); if (!UniformBase) { Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(PtrOperand); - IndexType = ISD::SIGNED_UNSCALED; + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); } @@ -7441,18 +7498,104 @@ void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin, setValue(&VPIntrin, ST); } +void SelectionDAGBuilder::visitVPStridedLoad( + const VPIntrinsic &VPIntrin, EVT VT, SmallVectorImpl &OpValues) { + SDLoc DL = getCurSDLoc(); + Value *PtrOperand = VPIntrin.getArgOperand(0); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + if (!Alignment) + Alignment = DAG.getEVTAlign(VT.getScalarType()); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); + bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + + SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], + OpValues[2], OpValues[3], MMO, + false /*IsExpanding*/); + + if (AddToChain) + PendingLoads.push_back(LD.getValue(1)); + setValue(&VPIntrin, LD); +} + +void SelectionDAGBuilder::visitVPStridedStore( + const VPIntrinsic &VPIntrin, SmallVectorImpl &OpValues) { + SDLoc DL = getCurSDLoc(); + Value *PtrOperand = VPIntrin.getArgOperand(1); + EVT VT = OpValues[0].getValueType(); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + if (!Alignment) + Alignment = DAG.getEVTAlign(VT.getScalarType()); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, *Alignment, AAInfo); + + SDValue ST = DAG.getStridedStoreVP( + getMemoryRoot(), DL, OpValues[0], OpValues[1], + DAG.getUNDEF(OpValues[1].getValueType()), OpValues[2], OpValues[3], + OpValues[4], VT, MMO, ISD::UNINDEXED, /*IsTruncating*/ false, + /*IsCompressing*/ false); + + DAG.setRoot(ST); + setValue(&VPIntrin, ST); +} + +void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic &VPIntrin) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc DL = getCurSDLoc(); + + ISD::CondCode Condition; + CmpInst::Predicate CondCode = VPIntrin.getPredicate(); + bool IsFP = VPIntrin.getOperand(0)->getType()->isFPOrFPVectorTy(); + if (IsFP) { + // FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan) + // flags, but calls that don't return floating-point types can't be + // FPMathOperators, like vp.fcmp. This affects constrained fcmp too. + Condition = getFCmpCondCode(CondCode); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); + } else { + Condition = getICmpCondCode(CondCode); + } + + SDValue Op1 = getValue(VPIntrin.getOperand(0)); + SDValue Op2 = getValue(VPIntrin.getOperand(1)); + // #2 is the condition code + SDValue MaskOp = getValue(VPIntrin.getOperand(3)); + SDValue EVL = getValue(VPIntrin.getOperand(4)); + MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy(); + assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) && + "Unexpected target EVL type"); + EVL = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, EVL); + + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + VPIntrin.getType()); + setValue(&VPIntrin, + DAG.getSetCCVP(DL, DestVT, Op1, Op2, Condition, MaskOp, EVL)); +} + void SelectionDAGBuilder::visitVectorPredicationIntrinsic( const VPIntrinsic &VPIntrin) { SDLoc DL = getCurSDLoc(); unsigned Opcode = getISDForVPIntrinsic(VPIntrin); + auto IID = VPIntrin.getIntrinsicID(); + + if (const auto *CmpI = dyn_cast(&VPIntrin)) + return visitVPCmp(*CmpI); + SmallVector ValueVTs; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs); SDVTList VTs = DAG.getVTList(ValueVTs); - auto EVLParamPos = - VPIntrinsic::getVectorLengthParamPos(VPIntrin.getIntrinsicID()); + auto EVLParamPos = VPIntrinsic::getVectorLengthParamPos(IID); MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy(); assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) && @@ -7469,7 +7612,10 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( switch (Opcode) { default: { - SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues); + SDNodeFlags SDFlags; + if (auto *FPMO = dyn_cast(&VPIntrin)) + SDFlags.copyFMF(*FPMO); + SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues, SDFlags); setValue(&VPIntrin, Result); break; } @@ -7478,10 +7624,16 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues, Opcode == ISD::VP_GATHER); break; + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + visitVPStridedLoad(VPIntrin, ValueVTs[0], OpValues); + break; case ISD::VP_STORE: case ISD::VP_SCATTER: visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER); break; + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + visitVPStridedStore(VPIntrin, OpValues); + break; } } @@ -7756,7 +7908,7 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) { const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); const Value *Size = I.getArgOperand(2); - const ConstantInt *CSize = dyn_cast(Size); + const ConstantSDNode *CSize = dyn_cast(getValue(Size)); if (CSize && CSize->getZExtValue() == 0) { EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType(), true); @@ -8277,7 +8429,7 @@ public: // accessed type. if (isIndirect) { OpTy = ParamElemType; - assert(OpTy && "Indirect opernad must have elementtype attribute"); + assert(OpTy && "Indirect operand must have elementtype attribute"); } // Look for vector wrapped in a struct. e.g. { <16 x i8> }. @@ -8398,8 +8550,9 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, SmallVector Regs; const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - // No work to do for memory operations. - if (OpInfo.ConstraintType == TargetLowering::C_Memory) + // No work to do for memory/address operands. + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Address) return None; // If this is a constraint for a single physreg, or a constraint for a @@ -8579,7 +8732,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, if (OpInfo.hasArg()) { OpInfo.CallOperandVal = Call.getArgOperand(ArgNo); OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); - Type *ParamElemTy = Call.getAttributes().getParamElementType(ArgNo); + Type *ParamElemTy = Call.getParamElementType(ArgNo); EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout(), ParamElemTy); OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other; @@ -8657,8 +8810,9 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); - if (OpInfo.ConstraintType == TargetLowering::C_Memory && - OpInfo.Type == InlineAsm::isClobber) + if ((OpInfo.ConstraintType == TargetLowering::C_Memory && + OpInfo.Type == InlineAsm::isClobber) || + OpInfo.ConstraintType == TargetLowering::C_Address) continue; // If this is a memory input, and if the operand is not indirect, do what we @@ -8708,7 +8862,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, : OpInfo; const auto RegError = getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); - if (RegError.hasValue()) { + if (RegError) { const MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); const char *RegName = TRI.getName(RegError.getValue()); @@ -8733,6 +8887,10 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, } return false; }; + assert((OpInfo.ConstraintType != TargetLowering::C_Address || + (OpInfo.Type == InlineAsm::isInput && + !OpInfo.isMatchingInputConstraint())) && + "Only address as input operand is allowed."); switch (OpInfo.Type) { case InlineAsm::isOutput: @@ -8865,8 +9023,11 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, break; } - if (OpInfo.ConstraintType == TargetLowering::C_Memory) { - assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Address) { + assert((OpInfo.isIndirect || + OpInfo.ConstraintType != TargetLowering::C_Memory) && + "Operand must be indirect to be a mem!"); assert(InOperandVal.getValueType() == TLI.getPointerTy(DAG.getDataLayout()) && "Memory operands expect pointer values"); @@ -9004,6 +9165,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, break; case TargetLowering::C_Memory: break; // Already handled. + case TargetLowering::C_Address: + break; // Silence warning. case TargetLowering::C_Unknown: assert(false && "Unexpected unknown constraint"); } @@ -9950,8 +10113,9 @@ SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("LowerOperation not implemented for this target!"); } -void -SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { +void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, + unsigned Reg, + ISD::NodeType ExtendType) { SDValue Op = getNonRegisterValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast(Op.getOperand(1))->getReg() != Reg) && @@ -9966,10 +10130,11 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { None); // This is not an ABI copy. SDValue Chain = DAG.getEntryNode(); - ISD::NodeType ExtendType = ISD::ANY_EXTEND; - auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V); - if (PreferredExtendIt != FuncInfo.PreferredExtendType.end()) - ExtendType = PreferredExtendIt->second; + if (ExtendType == ISD::ANY_EXTEND) { + auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V); + if (PreferredExtendIt != FuncInfo.PreferredExtendType.end()) + ExtendType = PreferredExtendIt->second; + } RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); PendingExports.push_back(Chain); } @@ -10542,6 +10707,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { /// the end. void SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Instruction *TI = LLVMBB->getTerminator(); SmallPtrSet SuccsHandled; @@ -10579,7 +10745,13 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { RegOut = FuncInfo.CreateRegs(C); - CopyValueToVirtualRegister(C, RegOut); + // We need to zero/sign extend ConstantInt phi operands to match + // assumptions in FunctionLoweringInfo::ComputePHILiveOutRegInfo. + ISD::NodeType ExtendType = ISD::ANY_EXTEND; + if (auto *CI = dyn_cast(C)) + ExtendType = TLI.signExtendConstant(CI) ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; + CopyValueToVirtualRegister(C, RegOut, ExtendType); } Reg = RegOut; } else { @@ -10599,7 +10771,6 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Remember that this register needs to added to the machine PHI node as // the input for this MBB. SmallVector ValueVTs; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index ea48042a5dcf..72cca3d9b001 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -284,7 +284,8 @@ public: return CurInst ? CurInst->getDebugLoc() : DebugLoc(); } - void CopyValueToVirtualRegister(const Value *V, unsigned Reg); + void CopyValueToVirtualRegister(const Value *V, unsigned Reg, + ISD::NodeType ExtendType = ISD::ANY_EXTEND); void visit(const Instruction &I); @@ -527,7 +528,7 @@ private: void visitInsertElement(const User &I); void visitShuffleVector(const User &I); - void visitExtractValue(const User &I); + void visitExtractValue(const ExtractValueInst &I); void visitInsertValue(const User &I); void visitLandingPad(const LandingPadInst &LP); @@ -570,6 +571,11 @@ private: SmallVector &OpValues, bool IsGather); void visitVPStoreScatter(const VPIntrinsic &VPIntrin, SmallVector &OpValues, bool IsScatter); + void visitVPStridedLoad(const VPIntrinsic &VPIntrin, EVT VT, + SmallVectorImpl &OpValues); + void visitVPStridedStore(const VPIntrinsic &VPIntrin, + SmallVectorImpl &OpValues); + void visitVPCmp(const VPCmpIntrinsic &VPIntrin); void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin); void visitVAStart(const CallInst &I); @@ -602,12 +608,22 @@ private: void emitInlineAsmError(const CallBase &Call, const Twine &Message); + /// An enum that states to emit func argument dbg value the kind of intrinsic + /// it originally had. This controls the internal behavior of + /// EmitFuncArgumentDbgValue. + enum class FuncArgumentDbgValueKind { + Value, // This was originally a llvm.dbg.value. + Addr, // This was originally a llvm.dbg.addr. + Declare, // This was originally a llvm.dbg.declare. + }; + /// If V is an function argument then create corresponding DBG_VALUE machine /// instruction for it now. At the end of instruction selection, they will be /// inserted to the entry BB. bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable, DIExpression *Expr, DILocation *DL, - bool IsDbgDeclare, const SDValue &N); + FuncArgumentDbgValueKind Kind, + const SDValue &N); /// Return the next block after MBB, or nullptr if there is none. MachineBasicBlock *NextBlock(MachineBasicBlock *MBB); @@ -673,9 +689,7 @@ struct RegsForValue { const DataLayout &DL, unsigned Reg, Type *Ty, Optional CC); - bool isABIMangled() const { - return CallConv.hasValue(); - } + bool isABIMangled() const { return CallConv.has_value(); } /// Add the specified values to this one. void append(const RegsForValue &RHS) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 77e9e53668f9..bbfc6e5ef64f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -10,9 +10,9 @@ // //===----------------------------------------------------------------------===// +#include "SDNodeDbgValue.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -45,7 +45,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" -#include "SDNodeDbgValue.h" #include #include @@ -231,6 +230,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::MUL: return "mul"; case ISD::MULHU: return "mulhu"; case ISD::MULHS: return "mulhs"; + case ISD::AVGFLOORU: return "avgflooru"; + case ISD::AVGFLOORS: return "avgfloors"; + case ISD::AVGCEILU: return "avgceilu"; + case ISD::AVGCEILS: return "avgceils"; case ISD::ABDS: return "abds"; case ISD::ABDU: return "abdu"; case ISD::SDIV: return "sdiv"; @@ -267,6 +270,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; case ISD::FCANONICALIZE: return "fcanonicalize"; + case ISD::IS_FPCLASS: return "is_fpclass"; case ISD::FPOW: return "fpow"; case ISD::STRICT_FPOW: return "strict_fpow"; case ISD::SMIN: return "smin"; @@ -361,6 +365,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_FP16_TO_FP: return "strict_fp16_to_fp"; case ISD::FP_TO_FP16: return "fp_to_fp16"; case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16"; + case ISD::BF16_TO_FP: return "bf16_to_fp"; + case ISD::FP_TO_BF16: return "fp_to_bf16"; case ISD::LROUND: return "lround"; case ISD::STRICT_LROUND: return "strict_lround"; case ISD::LLROUND: return "llround"; @@ -814,6 +820,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { } else if (const LifetimeSDNode *LN = dyn_cast(this)) { if (LN->hasOffset()) OS << "<" << LN->getOffset() << " to " << LN->getOffset() + LN->getSize() << ">"; + } else if (const auto *AA = dyn_cast(this)) { + OS << '<' << AA->getAlign().value() << '>'; } if (VerboseDAGDumping) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 3c786904620a..2b63359c2b1b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -15,11 +15,9 @@ #include "SelectionDAGBuilder.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" @@ -29,6 +27,7 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -69,7 +68,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -82,7 +80,6 @@ #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" @@ -370,8 +367,8 @@ static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT, // PHI. for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast(I)); ++I) for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - ConstantExpr *CE = dyn_cast(PN->getIncomingValue(i)); - if (!CE || !CE->canTrap()) continue; + Constant *C = dyn_cast(PN->getIncomingValue(i)); + if (!C || !C->canTrap()) continue; // The only case we have to worry about is when the edge is critical. // Since this block has a PHI Node, we assume it has multiple input @@ -425,6 +422,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { const Function &Fn = mf.getFunction(); MF = &mf; + // Decide what flavour of variable location debug-info will be used, before + // we change the optimisation level. + UseInstrRefDebugInfo = mf.useDebugInstrRef(); + CurDAG->useInstrRefDebugInfo(UseInstrRefDebugInfo); + // Reset the target options before resetting the optimization // level below. // FIXME: This is a horrible hack and should be processed via @@ -654,7 +656,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // For debug-info, in instruction referencing mode, we need to perform some // post-isel maintenence. - MF->finalizeDebugInstrRefs(); + if (UseInstrRefDebugInfo) + MF->finalizeDebugInstrRefs(); // Determine if there are any calls in this machine function. MachineFrameInfo &MFI = MF->getFrameInfo(); @@ -703,6 +706,7 @@ static void reportFastISelFailure(MachineFunction &MF, report_fatal_error(Twine(R.getMsg())); ORE.emit(R); + LLVM_DEBUG(dbgs() << R.getMsg() << "\n"); } void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, @@ -1380,6 +1384,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (TM.Options.EnableFastISel) { LLVM_DEBUG(dbgs() << "Enabling fast-isel\n"); FastIS = TLI->createFastISel(*FuncInfo, LibInfo); + if (FastIS) + FastIS->useInstrRefDebugInfo(UseInstrRefDebugInfo); } ReversePostOrderTraversal RPOT(&Fn); @@ -1519,6 +1525,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { BeforeInst->hasOneUse() && FastIS->tryToFoldLoad(cast(BeforeInst), Inst)) { // If we succeeded, don't re-select the load. + LLVM_DEBUG(dbgs() + << "FastISel folded load: " << *BeforeInst << "\n"); BI = std::next(BasicBlock::const_iterator(BeforeInst)); --NumFastIselRemaining; ++NumFastIselSuccess; @@ -3264,6 +3272,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + // If the chained node is not the root, we can't fold it if it has + // multiple uses. // FIXME: What if other value results of the node have uses not matched // by this pattern? if (ChainNodesMatched.back() != NodeToMatch && @@ -3301,6 +3311,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + // If the chained node is not the root, we can't fold it if it has + // multiple uses. // FIXME: What if other value results of the node have uses not matched // by this pattern? if (ChainNodesMatched.back() != NodeToMatch && @@ -3439,12 +3451,10 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // such nodes must have a chain, it suffices to check ChainNodesMatched. // We need to perform this check before potentially modifying one of the // nodes via MorphNode. - bool MayRaiseFPException = false; - for (auto *N : ChainNodesMatched) - if (mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept()) { - MayRaiseFPException = true; - break; - } + bool MayRaiseFPException = + llvm::any_of(ChainNodesMatched, [this](SDNode *N) { + return mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept(); + }); // Create the node. MachineSDNode *Res = nullptr; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index d022e2a23ea0..b66eeb6d2bb1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -13,15 +13,11 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "dag-printer" @@ -181,11 +177,11 @@ LLVM_DUMP_METHOD void SelectionDAG::dumpDotGraph(const Twine &FileName, /// clearGraphAttrs - Clear all previously defined node graph attributes. /// Intended to be used from a debugging tool (eg. gdb). void SelectionDAG::clearGraphAttrs() { -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS NodeGraphAttrs.clear(); #else - errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::clearGraphAttrs is only available in builds with " + << "ABI breaking checks enabled on systems with Graphviz or gv!\n"; #endif } @@ -193,11 +189,11 @@ void SelectionDAG::clearGraphAttrs() { /// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".) /// void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS NodeGraphAttrs[N] = Attrs; #else - errs() << "SelectionDAG::setGraphAttrs is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::setGraphAttrs is only available in builds with " + << "ABI breaking checks enabled on systems with Graphviz or gv!\n"; #endif } @@ -205,7 +201,7 @@ void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { /// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".) /// Used from getNodeAttributes. std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS std::map::const_iterator I = NodeGraphAttrs.find(N); @@ -214,8 +210,8 @@ std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { else return ""; #else - errs() << "SelectionDAG::getGraphAttrs is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::getGraphAttrs is only available in builds with " + << "ABI breaking checks enabled on systems with Graphviz or gv!\n"; return std::string(); #endif } @@ -223,11 +219,11 @@ std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { /// setGraphColor - Convenience for setting node color attribute. /// void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) { -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS NodeGraphAttrs[N] = std::string("color=") + Color; #else - errs() << "SelectionDAG::setGraphColor is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::setGraphColor is only available in builds with " + << "ABI breaking checks enabled on systems with Graphviz or gv!\n"; #endif } diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index dfda7d8b9f81..19a52fde44c1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -17,7 +17,10 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -27,6 +30,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" @@ -168,7 +172,7 @@ static Optional findPreviousSpillSlot(const Value *Val, const auto &RelocationMap = Builder.FuncInfo.StatepointRelocationMaps[Relocate->getStatepoint()]; - auto It = RelocationMap.find(Relocate->getDerivedPtr()); + auto It = RelocationMap.find(Relocate); if (It == RelocationMap.end()) return None; @@ -192,10 +196,10 @@ static Optional findPreviousSpillSlot(const Value *Val, for (auto &IncomingValue : Phi->incoming_values()) { Optional SpillSlot = findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); - if (!SpillSlot.hasValue()) + if (!SpillSlot) return None; - if (MergedResult.hasValue() && *MergedResult != *SpillSlot) + if (MergedResult && *MergedResult != *SpillSlot) return None; MergedResult = SpillSlot; @@ -276,7 +280,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, const int LookUpDepth = 6; Optional Index = findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth); - if (!Index.hasValue()) + if (!Index) return; const auto &StatepointSlots = Builder.FuncInfo.StatepointStackSlots; @@ -526,14 +530,14 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, GCStrategy &S = GFI->getStrategy(); for (const Value *V : SI.Bases) { auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); - if (Opt.hasValue()) { + if (Opt) { assert(Opt.getValue() && "non gc managed base pointer found in statepoint"); } } for (const Value *V : SI.Ptrs) { auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); - if (Opt.hasValue()) { + if (Opt) { assert(Opt.getValue() && "non gc managed derived pointer found in statepoint"); } @@ -880,8 +884,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); DAG.setNodeMemRefs(StatepointMCNode, MemRefs); - // For values lowered to tied-defs, create the virtual registers. Note that - // for simplicity, we *always* create a vreg even within a single block. + // For values lowered to tied-defs, create the virtual registers if used + // in other blocks. For local gc.relocate record appropriate statepoint + // result in StatepointLoweringState. DenseMap VirtRegs; for (const auto *Relocate : SI.GCRelocates) { Value *Derived = Relocate->getDerivedPtr(); @@ -889,12 +894,23 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( if (!LowerAsVReg.count(SD)) continue; + SDValue Relocated = SDValue(StatepointMCNode, LowerAsVReg[SD]); + + // Handle local relocate. Note that different relocates might + // map to the same SDValue. + if (SI.StatepointInstr->getParent() == Relocate->getParent()) { + SDValue Res = StatepointLowering.getLocation(SD); + if (Res) + assert(Res == Relocated); + else + StatepointLowering.setLocation(SD, Relocated); + continue; + } + // Handle multiple gc.relocates of the same input efficiently. if (VirtRegs.count(SD)) continue; - SDValue Relocated = SDValue(StatepointMCNode, LowerAsVReg[SD]); - auto *RetTy = Relocate->getType(); Register Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), @@ -915,8 +931,13 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( SDValue SDV = getValue(V); SDValue Loc = StatepointLowering.getLocation(SDV); + bool IsLocal = (Relocate->getParent() == StatepointInstr->getParent()); + RecordType Record; - if (LowerAsVReg.count(SDV)) { + if (IsLocal && LowerAsVReg.count(SDV)) { + // Result is already stored in StatepointLowering + Record.type = RecordType::SDValueNode; + } else if (LowerAsVReg.count(SDV)) { Record.type = RecordType::VReg; assert(VirtRegs.count(SDV)); Record.payload.Reg = VirtRegs[SDV]; @@ -932,7 +953,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( if (Relocate->getParent() != StatepointInstr->getParent()) ExportFromCurrentBlock(V); } - RelocationMap[V] = Record; + RelocationMap[Relocate] = Record; } @@ -1148,8 +1169,8 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl( unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID; auto SD = parseStatepointDirectivesFromAttrs(Call->getAttributes()); - SI.ID = SD.StatepointID.getValueOr(DefaultID); - SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0); + SI.ID = SD.StatepointID.value_or(DefaultID); + SI.NumPatchBytes = SD.NumPatchBytes.value_or(0); SI.DeoptState = ArrayRef(DeoptBundle.Inputs.begin(), DeoptBundle.Inputs.end()); @@ -1210,11 +1231,19 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { const Value *DerivedPtr = Relocate.getDerivedPtr(); auto &RelocationMap = FuncInfo.StatepointRelocationMaps[Relocate.getStatepoint()]; - auto SlotIt = RelocationMap.find(DerivedPtr); + auto SlotIt = RelocationMap.find(&Relocate); assert(SlotIt != RelocationMap.end() && "Relocating not lowered gc value"); const RecordType &Record = SlotIt->second; // If relocation was done via virtual register.. + if (Record.type == RecordType::SDValueNode) { + assert(Relocate.getStatepoint()->getParent() == Relocate.getParent() && + "Nonlocal gc.relocate mapped via SDValue"); + SDValue SDV = StatepointLowering.getLocation(getValue(DerivedPtr)); + assert(SDV.getNode() && "empty SDValue"); + setValue(&Relocate, SDV); + return; + } if (Record.type == RecordType::VReg) { Register InReg = Record.payload.Reg; RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f6d1fa87676f..a6b471ea22b7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -13,13 +13,13 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" @@ -30,7 +30,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include using namespace llvm; @@ -94,6 +93,8 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, // (We look for a CopyFromReg reading a virtual register that is used // for the function live-in value of register Reg) SDValue Value = OutVals[I]; + if (Value->getOpcode() == ISD::AssertZext) + Value = Value.getOperand(0); if (Value->getOpcode() != ISD::CopyFromReg) return false; Register ArgReg = cast(Value->getOperand(1))->getReg(); @@ -121,7 +122,7 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); Alignment = Call->getParamStackAlign(ArgIdx); IndirectType = nullptr; - assert(IsByVal + IsPreallocated + IsInAlloca <= 1 && + assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 && "multiple ABI attributes?"); if (IsByVal) { IndirectType = Call->getParamByValType(ArgIdx); @@ -132,6 +133,8 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, IndirectType = Call->getParamPreallocatedType(ArgIdx); if (IsInAlloca) IndirectType = Call->getParamInAllocaType(ArgIdx); + if (IsSRet) + IndirectType = Call->getParamStructRetType(ArgIdx); } /// Generate a libcall taking the given operands as arguments and returning a @@ -193,7 +196,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, bool TargetLowering::findOptimalMemOpLowering( std::vector &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const { - if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) + if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() && + Op.getSrcAlign() < Op.getDstAlign()) return false; EVT VT = getOptimalMemOpType(Op, FuncAttributes); @@ -905,6 +909,132 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts( Depth); } +// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1). +// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1). +static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI, + const APInt &DemandedBits, + const APInt &DemandedElts, + unsigned Depth) { + assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) && + "SRL or SRA node is required here!"); + // Is the right shift using an immediate value of 1? + ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts); + if (!N1C || !N1C->isOne()) + return SDValue(); + + // We are looking for an avgfloor + // add(ext, ext) + // or one of these as a avgceil + // add(add(ext, ext), 1) + // add(add(ext, 1), ext) + // add(ext, add(ext, 1)) + SDValue Add = Op.getOperand(0); + if (Add.getOpcode() != ISD::ADD) + return SDValue(); + + SDValue ExtOpA = Add.getOperand(0); + SDValue ExtOpB = Add.getOperand(1); + auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3) { + ConstantSDNode *ConstOp; + if ((ConstOp = isConstOrConstSplat(Op1, DemandedElts)) && + ConstOp->isOne()) { + ExtOpA = Op2; + ExtOpB = Op3; + return true; + } + if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) && + ConstOp->isOne()) { + ExtOpA = Op1; + ExtOpB = Op3; + return true; + } + if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) && + ConstOp->isOne()) { + ExtOpA = Op1; + ExtOpB = Op2; + return true; + } + return false; + }; + bool IsCeil = + (ExtOpA.getOpcode() == ISD::ADD && + MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB)) || + (ExtOpB.getOpcode() == ISD::ADD && + MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA)); + + // If the shift is signed (sra): + // - Needs >= 2 sign bit for both operands. + // - Needs >= 2 zero bits. + // If the shift is unsigned (srl): + // - Needs >= 1 zero bit for both operands. + // - Needs 1 demanded bit zero and >= 2 sign bits. + unsigned ShiftOpc = Op.getOpcode(); + bool IsSigned = false; + unsigned KnownBits; + unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth); + unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth); + unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1; + unsigned NumZeroA = + DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros(); + unsigned NumZeroB = + DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros(); + unsigned NumZero = std::min(NumZeroA, NumZeroB); + + switch (ShiftOpc) { + default: + llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG"); + case ISD::SRA: { + if (NumZero >= 2 && NumSigned < NumZero) { + IsSigned = false; + KnownBits = NumZero; + break; + } + if (NumSigned >= 1) { + IsSigned = true; + KnownBits = NumSigned; + break; + } + return SDValue(); + } + case ISD::SRL: { + if (NumZero >= 1 && NumSigned < NumZero) { + IsSigned = false; + KnownBits = NumZero; + break; + } + if (NumSigned >= 1 && DemandedBits.isSignBitClear()) { + IsSigned = true; + KnownBits = NumSigned; + break; + } + return SDValue(); + } + } + + unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU) + : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU); + + // Find the smallest power-2 type that is legal for this vector size and + // operation, given the original type size and the number of known sign/zero + // bits. + EVT VT = Op.getValueType(); + unsigned MinWidth = + std::max(VT.getScalarSizeInBits() - KnownBits, 8); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), PowerOf2Ceil(MinWidth)); + if (VT.isVector()) + NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount()); + if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) + return SDValue(); + + SDLoc DL(Op); + SDValue ResultAVG = + DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA), + DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB)); + return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, + ResultAVG); +} + /// Look at Op. At this point, we know that only the OriginalDemandedBits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning the @@ -989,7 +1119,7 @@ bool TargetLowering::SimplifyDemandedBits( KnownBits SrcKnown; SDValue Src = Op.getOperand(0); unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); - APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth); + APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth); if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1)) return true; @@ -1105,7 +1235,7 @@ bool TargetLowering::SimplifyDemandedBits( break; uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO, Depth + 1)) @@ -1406,6 +1536,19 @@ bool TargetLowering::SimplifyDemandedBits( if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return true; + // Only known if known in both the LHS and RHS. + Known = KnownBits::commonBits(Known, Known2); + break; + case ISD::VSELECT: + if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts, + Known, TLO, Depth + 1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts, + Known2, TLO, Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // Only known if known in both the LHS and RHS. Known = KnownBits::commonBits(Known, Known2); break; @@ -1542,6 +1685,16 @@ bool TargetLowering::SimplifyDemandedBits( // low bits known zero. Known.Zero.setLowBits(ShAmt); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0) { + SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // Try shrinking the operation as long as the shift amount will still be // in range. if ((ShAmt < DemandedBits.getActiveBits()) && @@ -1567,6 +1720,11 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op1 = Op.getOperand(1); EVT ShiftVT = Op1.getValueType(); + // Try to match AVG patterns. + if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits, + DemandedElts, Depth + 1)) + return TLO.CombineTo(Op, AVG); + if (const APInt *SA = TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { unsigned ShAmt = SA->getZExtValue(); @@ -1633,6 +1791,11 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isOne()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); + // Try to match AVG patterns. + if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits, + DemandedElts, Depth + 1)) + return TLO.CombineTo(Op, AVG); + if (const APInt *SA = TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { unsigned ShAmt = SA->getZExtValue(); @@ -1727,6 +1890,22 @@ bool TargetLowering::SimplifyDemandedBits( Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); Known.One |= Known2.One; Known.Zero |= Known2.Zero; + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() || + !DemandedElts.isAllOnes()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0; + DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0, + DemandedOp1, Op2); + return TLO.CombineTo(Op, NewOp); + } + } } // For pow-2 bitwidths we only demand the bottom modulo amt bits. @@ -1899,7 +2078,8 @@ bool TargetLowering::SimplifyDemandedBits( // bit is demanded. InputDemandedBits.setBit(ExVTBits - 1); - if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO, + Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); @@ -1965,7 +2145,7 @@ bool TargetLowering::SimplifyDemandedBits( } APInt InDemandedBits = DemandedBits.trunc(InBits); - APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); + APInt InDemandedElts = DemandedElts.zext(InElts); if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, Depth + 1)) return true; @@ -2002,7 +2182,7 @@ bool TargetLowering::SimplifyDemandedBits( } APInt InDemandedBits = DemandedBits.trunc(InBits); - APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); + APInt InDemandedElts = DemandedElts.zext(InElts); // Since some of the sign extended bits are demanded, we know that the sign // bit is demanded. @@ -2046,7 +2226,7 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); APInt InDemandedBits = DemandedBits.trunc(InBits); - APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); + APInt InDemandedElts = DemandedElts.zext(InElts); if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, Depth + 1)) return true; @@ -2265,9 +2445,27 @@ bool TargetLowering::SimplifyDemandedBits( break; } case ISD::MUL: - // 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1] - if (DemandedBits == 2 && Op.getOperand(0) == Op.getOperand(1)) - return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT)); + if (DemandedBits.isPowerOf2()) { + // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1. + // If we demand exactly one bit N and we have "X * (C' << N)" where C' is + // odd (has LSB set), then the left-shifted low bit of X is the answer. + unsigned CTZ = DemandedBits.countTrailingZeros(); + ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts); + if (C && C->getAPIntValue().countTrailingZeros() == CTZ) { + EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout()); + SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy); + SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC); + return TLO.CombineTo(Op, Shl); + } + } + // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because: + // X * X is odd iff X is odd. + // 'Quadratic Reciprocity': X * X -> 0 for bit[1] + if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) { + SDValue One = TLO.DAG.getConstant(1, dl, VT); + SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One); + return TLO.CombineTo(Op, And1); + } LLVM_FALLTHROUGH; case ISD::ADD: case ISD::SUB: { @@ -2330,6 +2528,49 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, NewOp); } + // Match a multiply with a disguised negated-power-of-2 and convert to a + // an equivalent shift-left amount. + // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC)) + auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned { + if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse()) + return 0; + + // Don't touch opaque constants. Also, ignore zero and power-of-2 + // multiplies. Those will get folded later. + ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1)); + if (MulC && !MulC->isOpaque() && !MulC->isZero() && + !MulC->getAPIntValue().isPowerOf2()) { + APInt UnmaskedC = MulC->getAPIntValue() | HighMask; + if (UnmaskedC.isNegatedPowerOf2()) + return (-UnmaskedC).logBase2(); + } + return 0; + }; + + auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) { + EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout()); + SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy); + SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC); + SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl); + return TLO.CombineTo(Op, Res); + }; + + if (isOperationLegalOrCustom(ISD::SHL, VT)) { + if (Op.getOpcode() == ISD::ADD) { + // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC)) + if (unsigned ShAmt = getShiftLeftAmt(Op0)) + return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt); + // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC)) + if (unsigned ShAmt = getShiftLeftAmt(Op1)) + return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt); + } + if (Op.getOpcode() == ISD::SUB) { + // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC)) + if (unsigned ShAmt = getShiftLeftAmt(Op1)) + return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt); + } + } + LLVM_FALLTHROUGH; } default: @@ -2347,7 +2588,8 @@ bool TargetLowering::SimplifyDemandedBits( // If we know the value of all of the demanded bits, return this as a // constant. - if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) { + if (!isTargetCanonicalConstantNode(Op) && + DemandedBits.isSubsetOf(Known.Zero | Known.One)) { // Avoid folding to a constant if any OpaqueConstant is involved. const SDNode *N = Op.getNode(); for (SDNode *Op : @@ -2370,13 +2612,12 @@ bool TargetLowering::SimplifyDemandedBits( bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, - APInt &KnownUndef, - APInt &KnownZero, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); + APInt KnownUndef, KnownZero; bool Simplified = SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO); if (Simplified) { @@ -2447,6 +2688,10 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownUndef = KnownZero = APInt::getZero(NumElts); + const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo(); + if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO)) + return false; + // TODO: For now we assume we know nothing about scalable vectors. if (VT.isScalableVector()) return false; @@ -2565,6 +2810,21 @@ bool TargetLowering::SimplifyDemandedVectorElts( if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known, TLO, Depth + 1)) return true; + + // The bitcast has split each wide element into a number of + // narrow subelements. We have just computed the Known bits + // for wide elements. See if element splitting results in + // some subelements being zero. Only for demanded elements! + for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) { + if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits) + .isAllOnes()) + continue; + for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) { + unsigned Elt = Scale * SrcElt + SubElt; + if (DemandedElts[Elt]) + KnownZero.setBit(Elt); + } + } } // If the src element is zero/undef then all the output elements will be - @@ -2646,6 +2906,25 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownUndef.insertBits(SubUndef, i * NumSubElts); KnownZero.insertBits(SubZero, i * NumSubElts); } + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedElts.isAllOnes()) { + bool FoundNewSub = false; + SmallVector DemandedSubOps; + for (unsigned i = 0; i != NumSubVecs; ++i) { + SDValue SubOp = Op.getOperand(i); + APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts); + SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts( + SubOp, SubElts, TLO.DAG, Depth + 1); + DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp); + FoundNewSub = NewSubOp ? true : FoundNewSub; + } + if (FoundNewSub) { + SDValue NewOp = + TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps); + return TLO.CombineTo(Op, NewOp); + } + } break; } case ISD::INSERT_SUBVECTOR: { @@ -2699,7 +2978,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); APInt SrcUndef, SrcZero; if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO, @@ -2858,7 +3137,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( APInt SrcUndef, SrcZero; SDValue Src = Op.getOperand(0); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts); if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; @@ -3618,6 +3897,115 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, return SDValue(); } +static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &dl, + SelectionDAG &DAG) { + if (Cond != ISD::SETEQ && Cond != ISD::SETNE) + return SDValue(); + + auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true); + if (!C1 || !(C1->isZero() || C1->isAllOnes())) + return SDValue(); + + auto getRotateSource = [](SDValue X) { + if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR) + return X.getOperand(0); + return SDValue(); + }; + + // Peek through a rotated value compared against 0 or -1: + // (rot X, Y) == 0/-1 --> X == 0/-1 + // (rot X, Y) != 0/-1 --> X != 0/-1 + if (SDValue R = getRotateSource(N0)) + return DAG.getSetCC(dl, VT, R, N1, Cond); + + // Peek through an 'or' of a rotated value compared against 0: + // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0 + // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0 + // + // TODO: Add the 'and' with -1 sibling. + // TODO: Recurse through a series of 'or' ops to find the rotate. + EVT OpVT = N0.getValueType(); + if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) { + if (SDValue R = getRotateSource(N0.getOperand(0))) { + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1)); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + if (SDValue R = getRotateSource(N0.getOperand(1))) { + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0)); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + } + + return SDValue(); +} + +static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &dl, + SelectionDAG &DAG) { + // If we are testing for all-bits-clear, we might be able to do that with + // less shifting since bit-order does not matter. + if (Cond != ISD::SETEQ && Cond != ISD::SETNE) + return SDValue(); + + auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true); + if (!C1 || !C1->isZero()) + return SDValue(); + + if (!N0.hasOneUse() || + (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR)) + return SDValue(); + + unsigned BitWidth = N0.getScalarValueSizeInBits(); + auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2)); + if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth)) + return SDValue(); + + // Canonicalize fshr as fshl to reduce pattern-matching. + unsigned ShAmt = ShAmtC->getZExtValue(); + if (N0.getOpcode() == ISD::FSHR) + ShAmt = BitWidth - ShAmt; + + // Match an 'or' with a specific operand 'Other' in either commuted variant. + SDValue X, Y; + auto matchOr = [&X, &Y](SDValue Or, SDValue Other) { + if (Or.getOpcode() != ISD::OR || !Or.hasOneUse()) + return false; + if (Or.getOperand(0) == Other) { + X = Or.getOperand(0); + Y = Or.getOperand(1); + return true; + } + if (Or.getOperand(1) == Other) { + X = Or.getOperand(1); + Y = Or.getOperand(0); + return true; + } + return false; + }; + + EVT OpVT = N0.getValueType(); + EVT ShAmtVT = N0.getOperand(2).getValueType(); + SDValue F0 = N0.getOperand(0); + SDValue F1 = N0.getOperand(1); + if (matchOr(F0, F1)) { + // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0 + SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT); + SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt); + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + if (matchOr(F1, F0)) { + // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0 + SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT); + SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt); + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + + return SDValue(); +} + /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -3632,13 +4020,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl)) return Fold; + bool N0ConstOrSplat = + isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true); + bool N1ConstOrSplat = + isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true); + // Ensure that the constant occurs on the RHS and fold constant comparisons. // TODO: Handle non-splat vector constants. All undef causes trouble. // FIXME: We can't yet fold constant scalable vector splats, so avoid an // infinite loop here when we encounter one. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); - if (isConstOrConstSplat(N0) && - (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) && + if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); @@ -3647,13 +4039,19 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // -- but in reverse order -- then try to commute the operands of this setcc // to match. A matching pair of setcc (cmp) and sub may be combined into 1 // instruction on some targets. - if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) && + if (!N0ConstOrSplat && !N1ConstOrSplat && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) && DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) && !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1})) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); + if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG)) + return V; + + if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG)) + return V; + if (auto *N1C = isConstOrConstSplat(N1)) { const APInt &C1 = N1C->getAPIntValue(); @@ -4399,37 +4797,30 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (auto *RHSC = dyn_cast(N1)) { if (auto *LHSR = dyn_cast(N0.getOperand(1))) { // Turn (X+C1) == C2 --> X == C2-C1 - if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { - return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(RHSC->getAPIntValue()- - LHSR->getAPIntValue(), - dl, N0.getValueType()), Cond); - } - - // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. - if (N0.getOpcode() == ISD::XOR) - // If we know that all of the inverted bits are zero, don't bother - // performing the inversion. - if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue())) - return - DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(LHSR->getAPIntValue() ^ - RHSC->getAPIntValue(), - dl, N0.getValueType()), - Cond); + if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) + return DAG.getSetCC( + dl, VT, N0.getOperand(0), + DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(), + dl, N0.getValueType()), + Cond); + + // Turn (X^C1) == C2 --> X == C1^C2 + if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse()) + return DAG.getSetCC( + dl, VT, N0.getOperand(0), + DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(), + dl, N0.getValueType()), + Cond); } // Turn (C1-X) == C2 --> X == C1-C2 - if (auto *SUBC = dyn_cast(N0.getOperand(0))) { - if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { - return - DAG.getSetCC(dl, VT, N0.getOperand(1), - DAG.getConstant(SUBC->getAPIntValue() - - RHSC->getAPIntValue(), - dl, N0.getValueType()), - Cond); - } - } + if (auto *SUBC = dyn_cast(N0.getOperand(0))) + if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) + return DAG.getSetCC( + dl, VT, N0.getOperand(1), + DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(), + dl, N0.getValueType()), + Cond); // Could RHSC fold directly into a compare? if (RHSC->getValueType(0).getSizeInBits() <= 64) @@ -4582,13 +4973,14 @@ TargetLowering::getConstraintType(StringRef Constraint) const { case 'o': // offsetable case 'V': // not offsetable return C_Memory; + case 'p': // Address. + return C_Address; case 'n': // Simple Integer case 'E': // Floating Point Constant case 'F': // Floating Point Constant return C_Immediate; case 'i': // Simple Integer or Relocatable Constant case 's': // Relocatable Constant - case 'p': // Address. case 'X': // Allow ANY value. case 'I': // Target registers. case 'J': @@ -4826,8 +5218,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL, if (OpInfo.CallOperandVal) { llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); if (OpInfo.isIndirect) { - OpTy = Call.getAttributes().getParamElementType(ArgNo); - assert(OpTy && "Indirect opernad must have elementtype attribute"); + OpTy = Call.getParamElementType(ArgNo); + assert(OpTy && "Indirect operand must have elementtype attribute"); } // Look for vector wrapped in a struct. e.g. { <16 x i8> }. @@ -4962,6 +5354,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { case TargetLowering::C_RegisterClass: return 2; case TargetLowering::C_Memory: + case TargetLowering::C_Address: return 3; } llvm_unreachable("Invalid constraint type"); @@ -5232,6 +5625,17 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, return SDValue(); } +SDValue +TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl &Created) const { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N, 0); // Lower SREM as SREM + return SDValue(); +} + /// Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. @@ -7016,6 +7420,30 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, return true; } +SDValue +TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node, + SelectionDAG &DAG) const { + unsigned Opcode = Node->getOpcode(); + assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM || + Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) && + "Wrong opcode"); + + if (Node->getFlags().hasNoNaNs()) { + ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT; + SDValue Op1 = Node->getOperand(0); + SDValue Op2 = Node->getOperand(1); + SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred); + // Copy FMF flags, but always set the no-signed-zeros flag + // as this is implied by the FMINNUM/FMAXNUM semantics. + SDNodeFlags Flags = Node->getFlags(); + Flags.setNoSignedZeros(true); + SelCC->setFlags(Flags); + return SelCC; + } + + return SDValue(); +} + SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); @@ -7058,29 +7486,234 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, } } - // If none of the above worked, but there are no NaNs, then expand to - // a compare/select sequence. This is required for correctness since - // InstCombine might have canonicalized a fcmp+select sequence to a - // FMINNUM/FMAXNUM node. If we were to fall through to the default - // expansion to libcall, we might introduce a link-time dependency - // on libm into a file that originally did not have one. - if (Node->getFlags().hasNoNaNs()) { - ISD::CondCode Pred = - Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT; - SDValue Op1 = Node->getOperand(0); - SDValue Op2 = Node->getOperand(1); - SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred); - // Copy FMF flags, but always set the no-signed-zeros flag - // as this is implied by the FMINNUM/FMAXNUM semantics. - SDNodeFlags Flags = Node->getFlags(); - Flags.setNoSignedZeros(true); - SelCC->setFlags(Flags); + if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG)) return SelCC; - } return SDValue(); } +SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, + unsigned Test, SDNodeFlags Flags, + const SDLoc &DL, + SelectionDAG &DAG) const { + EVT OperandVT = Op.getValueType(); + assert(OperandVT.isFloatingPoint()); + + // Degenerated cases. + if (Test == 0) + return DAG.getBoolConstant(false, DL, ResultVT, OperandVT); + if ((Test & fcAllFlags) == fcAllFlags) + return DAG.getBoolConstant(true, DL, ResultVT, OperandVT); + + // PPC double double is a pair of doubles, of which the higher part determines + // the value class. + if (OperandVT == MVT::ppcf128) { + Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op, + DAG.getConstant(1, DL, MVT::i32)); + OperandVT = MVT::f64; + } + + // Some checks may be represented as inversion of simpler check, for example + // "inf|normal|subnormal|zero" => !"nan". + bool IsInverted = false; + if (unsigned InvertedCheck = getInvertedFPClassTest(Test)) { + IsInverted = true; + Test = InvertedCheck; + } + + // Floating-point type properties. + EVT ScalarFloatVT = OperandVT.getScalarType(); + const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext()); + const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics(); + bool IsF80 = (ScalarFloatVT == MVT::f80); + + // Some checks can be implemented using float comparisons, if floating point + // exceptions are ignored. + if (Flags.hasNoFPExcept() && + isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) { + if (Test == fcZero) + return DAG.getSetCC(DL, ResultVT, Op, + DAG.getConstantFP(0.0, DL, OperandVT), + IsInverted ? ISD::SETUNE : ISD::SETOEQ); + if (Test == fcNan) + return DAG.getSetCC(DL, ResultVT, Op, Op, + IsInverted ? ISD::SETO : ISD::SETUO); + } + + // In the general case use integer operations. + unsigned BitSize = OperandVT.getScalarSizeInBits(); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize); + if (OperandVT.isVector()) + IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT, + OperandVT.getVectorElementCount()); + SDValue OpAsInt = DAG.getBitcast(IntVT, Op); + + // Various masks. + APInt SignBit = APInt::getSignMask(BitSize); + APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign. + APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit. + const unsigned ExplicitIntBitInF80 = 63; + APInt ExpMask = Inf; + if (IsF80) + ExpMask.clearBit(ExplicitIntBitInF80); + APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf; + APInt QNaNBitMask = + APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1); + APInt InvertionMask = APInt::getAllOnesValue(ResultVT.getScalarSizeInBits()); + + SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT); + SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT); + SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT); + SDValue ZeroV = DAG.getConstant(0, DL, IntVT); + SDValue InfV = DAG.getConstant(Inf, DL, IntVT); + SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT); + + SDValue Res; + const auto appendResult = [&](SDValue PartialRes) { + if (PartialRes) { + if (Res) + Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes); + else + Res = PartialRes; + } + }; + + SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set. + const auto getIntBitIsSet = [&]() -> SDValue { + if (!IntBitIsSetV) { + APInt IntBitMask(BitSize, 0); + IntBitMask.setBit(ExplicitIntBitInF80); + SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT); + SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV); + IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE); + } + return IntBitIsSetV; + }; + + // Split the value into sign bit and absolute value. + SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV); + SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt, + DAG.getConstant(0.0, DL, IntVT), ISD::SETLT); + + // Tests that involve more than one class should be processed first. + SDValue PartialRes; + + if (IsF80) + ; // Detect finite numbers of f80 by checking individual classes because + // they have different settings of the explicit integer bit. + else if ((Test & fcFinite) == fcFinite) { + // finite(V) ==> abs(V) < exp_mask + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT); + Test &= ~fcFinite; + } else if ((Test & fcFinite) == fcPosFinite) { + // finite(V) && V > 0 ==> V < exp_mask + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT); + Test &= ~fcPosFinite; + } else if ((Test & fcFinite) == fcNegFinite) { + // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1 + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT); + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV); + Test &= ~fcNegFinite; + } + appendResult(PartialRes); + + // Check for individual classes. + + if (unsigned PartialCheck = Test & fcZero) { + if (PartialCheck == fcPosZero) + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ); + else if (PartialCheck == fcZero) + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ); + else // ISD::fcNegZero + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ); + appendResult(PartialRes); + } + + if (unsigned PartialCheck = Test & fcInf) { + if (PartialCheck == fcPosInf) + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ); + else if (PartialCheck == fcInf) + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ); + else { // ISD::fcNegInf + APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt(); + SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT); + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ); + } + appendResult(PartialRes); + } + + if (unsigned PartialCheck = Test & fcNan) { + APInt InfWithQnanBit = Inf | QNaNBitMask; + SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT); + if (PartialCheck == fcNan) { + // isnan(V) ==> abs(V) > int(inf) + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT); + if (IsF80) { + // Recognize unsupported values as NaNs for compatibility with glibc. + // In them (exp(V)==0) == int_bit. + SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV); + SDValue ExpIsZero = + DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ); + SDValue IsPseudo = + DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ); + PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo); + } + } else if (PartialCheck == fcQNan) { + // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit) + PartialRes = + DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE); + } else { // ISD::fcSNan + // issignaling(V) ==> abs(V) > unsigned(Inf) && + // abs(V) < (unsigned(Inf) | quiet_bit) + SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT); + SDValue IsNotQnan = + DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT); + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan); + } + appendResult(PartialRes); + } + + if (unsigned PartialCheck = Test & fcSubnormal) { + // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set) + // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set) + SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV; + SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT); + SDValue VMinusOneV = + DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT)); + PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT); + if (PartialCheck == fcNegSubnormal) + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV); + appendResult(PartialRes); + } + + if (unsigned PartialCheck = Test & fcNormal) { + // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1)) + APInt ExpLSB = ExpMask & ~(ExpMask.shl(1)); + SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT); + SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV); + APInt ExpLimit = ExpMask - ExpLSB; + SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT); + PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT); + if (PartialCheck == fcNegNormal) + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV); + else if (PartialCheck == fcPosNormal) { + SDValue PosSignV = + DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask); + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV); + } + if (IsF80) + PartialRes = + DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet()); + appendResult(PartialRes); + } + + if (!Res) + return DAG.getConstant(IsInverted, DL, ResultVT); + if (IsInverted) + Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask); + return Res; +} + // Only expand vector types if we have the appropriate vector bit operations. static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) { assert(VT.isVector() && "Expected vector type"); @@ -7116,8 +7749,6 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT); SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT); - SDValue Mask01 = - DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); // v = v - ((v >> 1) & 0x55555555...) Op = DAG.getNode(ISD::SUB, dl, VT, Op, @@ -7137,13 +7768,28 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT))), Mask0F); - // v = (v * 0x01010101...) >> (Len - 8) - if (Len > 8) - Op = - DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), - DAG.getConstant(Len - 8, dl, ShVT)); - return Op; + if (Len <= 8) + return Op; + + // Avoid the multiply if we only have 2 bytes to add. + // TODO: Only doing this for scalars because vectors weren't as obviously + // improved. + if (Len == 16 && !VT.isVector()) { + // v = (v + (v >> 8)) & 0x00FF; + return DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ADD, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(8, dl, ShVT))), + DAG.getConstant(0xFF, dl, VT)); + } + + // v = (v * 0x01010101...) >> (Len - 8) + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + return DAG.getNode(ISD::SRL, dl, VT, + DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), + DAG.getConstant(Len - 8, dl, ShVT)); } SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const { @@ -7265,6 +7911,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, if (!IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::UMIN, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); + Op = DAG.getFreeze(Op); return DAG.getNode(ISD::UMIN, dl, VT, Op, DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } @@ -7272,6 +7919,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, // 0 - abs(x) -> smin(x, sub(0,x)) if (IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMIN, VT)) { + Op = DAG.getFreeze(Op); SDValue Zero = DAG.getConstant(0, dl, VT); return DAG.getNode(ISD::SMIN, dl, VT, Op, DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); @@ -7285,16 +7933,17 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) return SDValue(); + Op = DAG.getFreeze(Op); SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, Op, DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT)); - if (!IsNegative) { - SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); - return DAG.getNode(ISD::XOR, dl, VT, Add, Shift); - } + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); + + // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y) + if (!IsNegative) + return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift); // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) - SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor); } @@ -8041,23 +8690,6 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, return SDValue(); } -// Convert redundant addressing modes (e.g. scaling is redundant -// when accessing bytes). -ISD::MemIndexType -TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT, - SDValue Offsets) const { - bool IsScaledIndex = - (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED); - bool IsSignedIndex = - (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED); - - // Scaling is unimportant for bytes, canonicalize to unscaled. - if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) - return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED; - - return IndexType; -} - SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); @@ -8473,8 +9105,20 @@ void TargetLowering::expandUADDSUBO( EVT ResultType = Node->getValueType(1); EVT SetCCType = getSetCCResultType( DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); - ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT; - SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC); + SDValue SetCC; + if (IsAdd && isOneConstant(RHS)) { + // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces + // the live range of X. We assume comparing with 0 is cheap. + // The general case (X + C) < C is not necessarily beneficial. Although we + // reduce the live range of X, we may introduce the materialization of + // constant C. + SetCC = + DAG.getSetCC(dl, SetCCType, Result, + DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ); + } else { + ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT; + SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC); + } Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType); } @@ -8773,11 +9417,11 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node, // floating-point values. APInt MinInt, MaxInt; if (IsSigned) { - MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth); - MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth); + MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth); + MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth); } else { - MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth); - MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth); + MinInt = APInt::getMinValue(SatWidth).zext(DstWidth); + MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth); } // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as @@ -8931,13 +9575,16 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node, bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, - SDValue &CC, bool &NeedInvert, + SDValue &CC, SDValue Mask, + SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast(CC)->get(); NeedInvert = false; + assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset"); + bool IsNonVP = !EVL; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); @@ -9044,17 +9691,34 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). - SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling); + if (IsNonVP) { + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling); + } else { + SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL); + SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL); + } } else { // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) - SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling); + if (IsNonVP) { + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling); + } else { + SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL); + SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL); + } } if (Chain) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1), SetCC2.getValue(1)); - LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); + if (IsNonVP) + LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); + else { + // Transform the binary opcode to the VP equivalent. + assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode"); + Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND; + LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL); + } RHS = SDValue(); CC = SDValue(); return true; diff --git a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp index 43a54ce33bf0..5f9ade18f15c 100644 --- a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -39,7 +39,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Transforms/Utils/EscapeEnumerator.h" #include -#include #include #include #include @@ -362,7 +361,7 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { // For each instruction that escapes... EscapeEnumerator EE(F, "gc_cleanup", /*HandleExceptions=*/true, - DTU.hasValue() ? DTU.getPointer() : nullptr); + DTU ? DTU.getPointer() : nullptr); while (IRBuilder<> *AtExit = EE.Next()) { // Pop the entry from the shadow stack. Don't reuse CurrentHead from // AtEntry, since that would make the value live for the entire function. diff --git a/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 8211e3d6a9dd..1fcee02184a9 100644 --- a/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -413,7 +413,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Val = Builder.CreateCall(StackAddrFn, {}, "sp"); Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); - // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf. + // Call the setup_dispatch intrinsic. It fills in the rest of the jmpbuf. Builder.CreateCall(BuiltinSetupDispatchFn, {}); // Store a pointer to the function context so that the back-end will know diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index 7f9518e4c075..140a91ae342b 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -389,17 +389,34 @@ LLVM_DUMP_METHOD void SplitEditor::dump() const { } #endif -LiveInterval::SubRange &SplitEditor::getSubRangeForMaskExact(LaneBitmask LM, - LiveInterval &LI) { - for (LiveInterval::SubRange &S : LI.subranges()) +/// Find a subrange corresponding to the exact lane mask @p LM in the live +/// interval @p LI. The interval @p LI is assumed to contain such a subrange. +/// This function is used to find corresponding subranges between the +/// original interval and the new intervals. +template auto &getSubrangeImpl(LaneBitmask LM, T &LI) { + for (auto &S : LI.subranges()) if (S.LaneMask == LM) return S; llvm_unreachable("SubRange for this mask not found"); } -LiveInterval::SubRange &SplitEditor::getSubRangeForMask(LaneBitmask LM, - LiveInterval &LI) { - for (LiveInterval::SubRange &S : LI.subranges()) +LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM, + LiveInterval &LI) { + return getSubrangeImpl(LM, LI); +} + +const LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM, + const LiveInterval &LI) { + return getSubrangeImpl(LM, LI); +} + +/// Find a subrange corresponding to the lane mask @p LM, or a superset of it, +/// in the live interval @p LI. The interval @p LI is assumed to contain such +/// a subrange. This function is used to find corresponding subranges between +/// the original interval and the new intervals. +const LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM, + const LiveInterval &LI) { + for (const LiveInterval::SubRange &S : LI.subranges()) if ((S.LaneMask & LM) == LM) return S; llvm_unreachable("SubRange for this mask not found"); @@ -566,10 +583,8 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg, return Def; } -VNInfo *SplitEditor::defFromParent(unsigned RegIdx, - VNInfo *ParentVNI, - SlotIndex UseIdx, - MachineBasicBlock &MBB, +VNInfo *SplitEditor::defFromParent(unsigned RegIdx, const VNInfo *ParentVNI, + SlotIndex UseIdx, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { SlotIndex Def; LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); @@ -937,7 +952,7 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB, void SplitEditor::computeRedundantBackCopies( DenseSet &NotToHoistSet, SmallVectorImpl &BackCopies) { LiveInterval *LI = &LIS.getInterval(Edit->get(0)); - LiveInterval *Parent = &Edit->getParent(); + const LiveInterval *Parent = &Edit->getParent(); SmallVector, 8> EqualVNs(Parent->getNumValNums()); SmallPtrSet DominatedVNIs; @@ -952,7 +967,7 @@ void SplitEditor::computeRedundantBackCopies( // For VNI aggregation of each ParentVNI, collect dominated, i.e., // redundant VNIs to BackCopies. for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) { - VNInfo *ParentVNI = Parent->getValNumInfo(i); + const VNInfo *ParentVNI = Parent->getValNumInfo(i); if (!NotToHoistSet.count(ParentVNI->id)) continue; SmallPtrSetIterator It1 = EqualVNs[ParentVNI->id].begin(); @@ -990,7 +1005,7 @@ void SplitEditor::computeRedundantBackCopies( void SplitEditor::hoistCopies() { // Get the complement interval, always RegIdx 0. LiveInterval *LI = &LIS.getInterval(Edit->get(0)); - LiveInterval *Parent = &Edit->getParent(); + const LiveInterval *Parent = &Edit->getParent(); // Track the nearest common dominator for all back-copies for each ParentVNI, // indexed by ParentVNI->id. @@ -1067,7 +1082,7 @@ void SplitEditor::hoistCopies() { if (!Dom.first || Dom.second.isValid()) continue; // This value needs a hoisted copy inserted at the end of Dom.first. - VNInfo *ParentVNI = Parent->getValNumInfo(i); + const VNInfo *ParentVNI = Parent->getValNumInfo(i); MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def); // Get a less loopy dominator than Dom.first. Dom.first = findShallowDominator(Dom.first, DefMBB); @@ -1237,11 +1252,11 @@ void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveIntervalCalc &LIC, SlotIndex LastUse = End.getPrevSlot(); // The predecessor may not have a live-out value. That is OK, like an // undef PHI operand. - LiveInterval &PLI = Edit->getParent(); + const LiveInterval &PLI = Edit->getParent(); // Need the cast because the inputs to ?: would otherwise be deemed // "incompatible": SubRange vs LiveInterval. - LiveRange &PSR = !LM.all() ? getSubRangeForMaskExact(LM, PLI) - : static_cast(PLI); + const LiveRange &PSR = !LM.all() ? getSubRangeForMaskExact(LM, PLI) + : static_cast(PLI); if (PSR.liveAt(LastUse)) LIC.extend(LR, End, /*PhysReg=*/0, Undefs); } @@ -1254,7 +1269,7 @@ void SplitEditor::extendPHIKillRanges() { // remove it. Otherwise, extend the live interval to reach the end indexes // of all predecessor blocks. - LiveInterval &ParentLI = Edit->getParent(); + const LiveInterval &ParentLI = Edit->getParent(); for (const VNInfo *V : ParentLI.valnos) { if (V->isUnused() || !V->isPHIDef()) continue; @@ -1270,7 +1285,7 @@ void SplitEditor::extendPHIKillRanges() { SmallVector Undefs; LiveIntervalCalc SubLIC; - for (LiveInterval::SubRange &PS : ParentLI.subranges()) { + for (const LiveInterval::SubRange &PS : ParentLI.subranges()) { for (const VNInfo *V : PS.valnos) { if (V->isUnused() || !V->isPHIDef()) continue; @@ -1337,13 +1352,34 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { continue; // We may want to extend a live range for a partial redef, or for a use // tied to an early clobber. - Idx = Idx.getPrevSlot(); - if (!Edit->getParent().liveAt(Idx)) + if (!Edit->getParent().liveAt(Idx.getPrevSlot())) continue; - } else - Idx = Idx.getRegSlot(true); + } else { + assert(MO.isUse()); + bool IsEarlyClobber = false; + if (MO.isTied()) { + // We want to extend a live range into `e` slot rather than `r` slot if + // tied-def is early clobber, because the `e` slot already contained + // in the live range of early-clobber tied-def operand, give an example + // here: + // 0 %0 = ... + // 16 early-clobber %0 = Op %0 (tied-def 0), ... + // 32 ... = Op %0 + // Before extend: + // %0 = [0r, 0d) [16e, 32d) + // The point we want to extend is 0d to 16e not 16r in this case, but if + // we use 16r here we will extend nothing because that already contained + // in [16e, 32d). + unsigned OpIdx = MI->getOperandNo(&MO); + unsigned DefOpIdx = MI->findTiedOperandIdx(OpIdx); + const MachineOperand &DefOp = MI->getOperand(DefOpIdx); + IsEarlyClobber = DefOp.isEarlyClobber(); + } + + Idx = Idx.getRegSlot(IsEarlyClobber); + } - SlotIndex Next = Idx.getNextSlot(); + SlotIndex Next = Idx; if (LI.hasSubRanges()) { // We have to delay extending subranges until we have seen all operands // defining the register. This is because a operand @@ -1510,9 +1546,8 @@ void SplitEditor::finish(SmallVectorImpl *LRMap) { // Provide a reverse mapping from original indices to Edit ranges. if (LRMap) { - LRMap->clear(); - for (unsigned i = 0, e = Edit->size(); i != e; ++i) - LRMap->push_back(i); + auto Seq = llvm::seq(0, Edit->size()); + LRMap->assign(Seq.begin(), Seq.end()); } // Now check if any registers were separated into multiple components. diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h index 902546fe16d8..4400a797d38e 100644 --- a/llvm/lib/CodeGen/SplitKit.h +++ b/llvm/lib/CodeGen/SplitKit.h @@ -22,19 +22,19 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalCalc.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Compiler.h" #include namespace llvm { class AAResults; +class LiveInterval; +class LiveRange; class LiveIntervals; class LiveRangeEdit; class MachineBlockFrequencyInfo; @@ -346,19 +346,6 @@ private: return LICalc[SpillMode != SM_Partition && RegIdx != 0]; } - /// Find a subrange corresponding to the exact lane mask @p LM in the live - /// interval @p LI. The interval @p LI is assumed to contain such a subrange. - /// This function is used to find corresponding subranges between the - /// original interval and the new intervals. - LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM, - LiveInterval &LI); - - /// Find a subrange corresponding to the lane mask @p LM, or a superset of it, - /// in the live interval @p LI. The interval @p LI is assumed to contain such - /// a subrange. This function is used to find corresponding subranges between - /// the original interval and the new intervals. - LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM, LiveInterval &LI); - /// Add a segment to the interval LI for the value number VNI. If LI has /// subranges, corresponding segments will be added to them as well, but /// with newly created value numbers. If Original is true, dead def will @@ -390,10 +377,8 @@ private: /// defFromParent - Define Reg from ParentVNI at UseIdx using either /// rematerialization or a COPY from parent. Return the new value. - VNInfo *defFromParent(unsigned RegIdx, - VNInfo *ParentVNI, - SlotIndex UseIdx, - MachineBasicBlock &MBB, + VNInfo *defFromParent(unsigned RegIdx, const VNInfo *ParentVNI, + SlotIndex UseIdx, MachineBasicBlock &MBB, MachineBasicBlock::iterator I); /// removeBackCopies - Remove the copy instructions that defines the values diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp index 623d5da9831e..11c6bdc69956 100644 --- a/llvm/lib/CodeGen/StackColoring.cpp +++ b/llvm/lib/CodeGen/StackColoring.cpp @@ -36,14 +36,12 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Use.h" @@ -1145,6 +1143,9 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n"); LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n"); LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n"); + (void) FixedMemOp; + (void) FixedDbg; + (void) FixedInstr; } void StackColoring::removeInvalidSlotRanges() { @@ -1319,6 +1320,11 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { int FirstSlot = SortedSlots[I]; int SecondSlot = SortedSlots[J]; + + // Objects with different stack IDs cannot be merged. + if (MFI->getStackID(FirstSlot) != MFI->getStackID(SecondSlot)) + continue; + LiveInterval *First = &*Intervals[FirstSlot]; LiveInterval *Second = &*Intervals[SecondSlot]; auto &FirstS = LiveStarts[FirstSlot]; diff --git a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp index 3640296adbca..b83c56903133 100644 --- a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -17,9 +17,9 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp index 36e8f129ea15..6757d6ca4f88 100644 --- a/llvm/lib/CodeGen/StackMaps.cpp +++ b/llvm/lib/CodeGen/StackMaps.cpp @@ -721,7 +721,7 @@ void StackMaps::serializeToStackMapSection() { // Create the section. MCSection *StackMapSection = OutContext.getObjectFileInfo()->getStackMapSection(); - OS.SwitchSection(StackMapSection); + OS.switchSection(StackMapSection); // Emit a dummy symbol to force section inclusion. OS.emitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps"))); @@ -732,7 +732,7 @@ void StackMaps::serializeToStackMapSection() { emitFunctionFrameRecords(OS); emitConstantPoolEntries(OS); emitCallsiteEntries(OS); - OS.AddBlankLine(); + OS.addBlankLine(); // Clean up. CSInfos.clear(); diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index 6765fd274686..510a8e3e4ba2 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -28,8 +28,6 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -169,7 +167,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI, // If this instruction accesses memory make sure it doesn't access beyond // the bounds of the allocated object. Optional MemLoc = MemoryLocation::getOrNone(I); - if (MemLoc.hasValue() && MemLoc->Size.hasValue() && + if (MemLoc && MemLoc->Size.hasValue() && !TypeSize::isKnownGE(AllocSize, TypeSize::getFixed(MemLoc->Size.getValue()))) return true; diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index 17e6f51d0899..b8c750688914 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp index 20892a79d35f..bf3d2088e196 100644 --- a/llvm/lib/CodeGen/TailDuplication.cpp +++ b/llvm/lib/CodeGen/TailDuplication.cpp @@ -14,14 +14,14 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TailDuplicator.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/PassRegistry.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 68a7b80d6146..ba533a491b9c 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -19,17 +19,15 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -370,8 +368,8 @@ void TailDuplicator::processPHI( return; // Remove PredBB from the PHI node. - MI->RemoveOperand(SrcOpIdx + 1); - MI->RemoveOperand(SrcOpIdx); + MI->removeOperand(SrcOpIdx + 1); + MI->removeOperand(SrcOpIdx); if (MI->getNumOperands() == 1) MI->eraseFromParent(); } @@ -385,8 +383,9 @@ void TailDuplicator::duplicateInstruction( // Allow duplication of CFI instructions. if (MI->isCFIInstruction()) { BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()), - TII->get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex( - MI->getOperand(0).getCFIIndex()); + TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(MI->getOperand(0).getCFIIndex()) + .setMIFlags(MI->getFlags()); return; } MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI); @@ -496,15 +495,15 @@ void TailDuplicator::updateSuccessorsPHIs( for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) { MachineOperand &MO = MI.getOperand(i + 1); if (MO.getMBB() == FromBB) { - MI.RemoveOperand(i + 1); - MI.RemoveOperand(i); + MI.removeOperand(i + 1); + MI.removeOperand(i); } } } else Idx = 0; // If Idx is set, the operands at Idx and Idx+1 must be removed. - // We reuse the location to avoid expensive RemoveOperand calls. + // We reuse the location to avoid expensive removeOperand calls. DenseMap::iterator LI = SSAUpdateVals.find(Reg); @@ -541,8 +540,8 @@ void TailDuplicator::updateSuccessorsPHIs( } } if (Idx != 0) { - MI.RemoveOperand(Idx + 1); - MI.RemoveOperand(Idx); + MI.removeOperand(Idx + 1); + MI.removeOperand(Idx); } } } diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index fbf190a52585..9430e86fe44d 100644 --- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -10,17 +10,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Target/TargetMachine.h" @@ -37,6 +37,11 @@ bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const return false; } +bool TargetFrameLowering::enableCFIFixup(MachineFunction &MF) const { + return MF.needsFrameMoves() && + !MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); +} + /// Returns the displacement from the frame register to the stack /// frame of the specified index, along with the frame register used /// (in output arg FrameReg). This is the default implementation which diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 3f22cc4289f2..2a987ee3eedf 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -31,8 +32,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" -#include using namespace llvm; @@ -40,8 +39,7 @@ static cl::opt DisableHazardRecognizer( "disable-sched-hazard", cl::Hidden, cl::init(false), cl::desc("Disable hazard detection during preRA scheduling")); -TargetInstrInfo::~TargetInstrInfo() { -} +TargetInstrInfo::~TargetInstrInfo() = default; const TargetRegisterClass* TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, @@ -873,11 +871,13 @@ void TargetInstrInfo::reassociateOps( MachineInstrBuilder MIB1 = BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) .addReg(RegX, getKillRegState(KillX)) - .addReg(RegY, getKillRegState(KillY)); + .addReg(RegY, getKillRegState(KillY)) + .setMIFlags(Prev.getFlags()); MachineInstrBuilder MIB2 = BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) .addReg(RegA, getKillRegState(KillA)) - .addReg(NewVR, getKillRegState(true)); + .addReg(NewVR, getKillRegState(true)) + .setMIFlags(Root.getFlags()); setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2); @@ -1399,7 +1399,7 @@ std::string TargetInstrInfo::createMIROperandComment( return OS.str(); } -TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {} +TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() = default; void TargetInstrInfo::mergeOutliningCandidateAttributes( Function &F, std::vector &Candidates) const { diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index ab574232e367..6a595a4c748b 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -56,7 +56,6 @@ #include "llvm/Transforms/Utils/SizeOpts.h" #include #include -#include #include #include #include @@ -202,7 +201,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl"); } - if (TT.isPS4CPU()) { + if (TT.isPS()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); } @@ -275,6 +274,11 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { return FPROUND_F128_F16; if (OpVT == MVT::ppcf128) return FPROUND_PPCF128_F16; + } else if (RetVT == MVT::bf16) { + if (OpVT == MVT::f32) + return FPROUND_F32_BF16; + if (OpVT == MVT::f64) + return FPROUND_F64_BF16; } else if (RetVT == MVT::f32) { if (OpVT == MVT::f64) return FPROUND_F64_F32; @@ -740,6 +744,30 @@ void TargetLoweringBase::initActions() { std::fill(std::begin(TargetDAGCombineArray), std::end(TargetDAGCombineArray), 0); + // We're somewhat special casing MVT::i2 and MVT::i4. Ideally we want to + // remove this and targets should individually set these types if not legal. + for (ISD::NodeType NT : enum_seq(ISD::DELETED_NODE, ISD::BUILTIN_OP_END, + force_iteration_on_noniterable_enum)) { + for (MVT VT : {MVT::i2, MVT::i4}) + OpActions[(unsigned)VT.SimpleTy][NT] = Expand; + } + for (MVT AVT : MVT::all_valuetypes()) { + for (MVT VT : {MVT::i2, MVT::i4, MVT::v128i2, MVT::v64i4}) { + setTruncStoreAction(AVT, VT, Expand); + setLoadExtAction(ISD::EXTLOAD, AVT, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, AVT, VT, Expand); + } + } + for (unsigned IM = (unsigned)ISD::PRE_INC; + IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { + for (MVT VT : {MVT::i2, MVT::i4}) { + setIndexedLoadAction(IM, VT, Expand); + setIndexedStoreAction(IM, VT, Expand); + setIndexedMaskedLoadAction(IM, VT, Expand); + setIndexedMaskedStoreAction(IM, VT, Expand); + } + } + for (MVT VT : MVT::fp_valuetypes()) { MVT IntVT = MVT::getIntegerVT(VT.getFixedSizeInBits()); if (IntVT.isValid()) { @@ -763,85 +791,63 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand); // These operations default to expand. - setOperationAction(ISD::FGETSIGN, VT, Expand); - setOperationAction(ISD::CONCAT_VECTORS, VT, Expand); - setOperationAction(ISD::FMINNUM, VT, Expand); - setOperationAction(ISD::FMAXNUM, VT, Expand); - setOperationAction(ISD::FMINNUM_IEEE, VT, Expand); - setOperationAction(ISD::FMAXNUM_IEEE, VT, Expand); - setOperationAction(ISD::FMINIMUM, VT, Expand); - setOperationAction(ISD::FMAXIMUM, VT, Expand); - setOperationAction(ISD::FMAD, VT, Expand); - setOperationAction(ISD::SMIN, VT, Expand); - setOperationAction(ISD::SMAX, VT, Expand); - setOperationAction(ISD::UMIN, VT, Expand); - setOperationAction(ISD::UMAX, VT, Expand); - setOperationAction(ISD::ABS, VT, Expand); - setOperationAction(ISD::FSHL, VT, Expand); - setOperationAction(ISD::FSHR, VT, Expand); - setOperationAction(ISD::SADDSAT, VT, Expand); - setOperationAction(ISD::UADDSAT, VT, Expand); - setOperationAction(ISD::SSUBSAT, VT, Expand); - setOperationAction(ISD::USUBSAT, VT, Expand); - setOperationAction(ISD::SSHLSAT, VT, Expand); - setOperationAction(ISD::USHLSAT, VT, Expand); - setOperationAction(ISD::SMULFIX, VT, Expand); - setOperationAction(ISD::SMULFIXSAT, VT, Expand); - setOperationAction(ISD::UMULFIX, VT, Expand); - setOperationAction(ISD::UMULFIXSAT, VT, Expand); - setOperationAction(ISD::SDIVFIX, VT, Expand); - setOperationAction(ISD::SDIVFIXSAT, VT, Expand); - setOperationAction(ISD::UDIVFIX, VT, Expand); - setOperationAction(ISD::UDIVFIXSAT, VT, Expand); - setOperationAction(ISD::FP_TO_SINT_SAT, VT, Expand); - setOperationAction(ISD::FP_TO_UINT_SAT, VT, Expand); + setOperationAction({ISD::FGETSIGN, ISD::CONCAT_VECTORS, + ISD::FMINNUM, ISD::FMAXNUM, + ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, + ISD::FMINIMUM, ISD::FMAXIMUM, + ISD::FMAD, ISD::SMIN, + ISD::SMAX, ISD::UMIN, + ISD::UMAX, ISD::ABS, + ISD::FSHL, ISD::FSHR, + ISD::SADDSAT, ISD::UADDSAT, + ISD::SSUBSAT, ISD::USUBSAT, + ISD::SSHLSAT, ISD::USHLSAT, + ISD::SMULFIX, ISD::SMULFIXSAT, + ISD::UMULFIX, ISD::UMULFIXSAT, + ISD::SDIVFIX, ISD::SDIVFIXSAT, + ISD::UDIVFIX, ISD::UDIVFIXSAT, + ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, + ISD::IS_FPCLASS}, + VT, Expand); // Overflow operations default to expand - setOperationAction(ISD::SADDO, VT, Expand); - setOperationAction(ISD::SSUBO, VT, Expand); - setOperationAction(ISD::UADDO, VT, Expand); - setOperationAction(ISD::USUBO, VT, Expand); - setOperationAction(ISD::SMULO, VT, Expand); - setOperationAction(ISD::UMULO, VT, Expand); + setOperationAction({ISD::SADDO, ISD::SSUBO, ISD::UADDO, ISD::USUBO, + ISD::SMULO, ISD::UMULO}, + VT, Expand); // ADDCARRY operations default to expand - setOperationAction(ISD::ADDCARRY, VT, Expand); - setOperationAction(ISD::SUBCARRY, VT, Expand); - setOperationAction(ISD::SETCCCARRY, VT, Expand); - setOperationAction(ISD::SADDO_CARRY, VT, Expand); - setOperationAction(ISD::SSUBO_CARRY, VT, Expand); + setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY, ISD::SETCCCARRY, + ISD::SADDO_CARRY, ISD::SSUBO_CARRY}, + VT, Expand); // ADDC/ADDE/SUBC/SUBE default to expand. - setOperationAction(ISD::ADDC, VT, Expand); - setOperationAction(ISD::ADDE, VT, Expand); - setOperationAction(ISD::SUBC, VT, Expand); - setOperationAction(ISD::SUBE, VT, Expand); + setOperationAction({ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}, VT, + Expand); + + // Halving adds + setOperationAction( + {ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, ISD::AVGCEILU}, VT, + Expand); // Absolute difference - setOperationAction(ISD::ABDS, VT, Expand); - setOperationAction(ISD::ABDU, VT, Expand); + setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Expand); // These default to Expand so they will be expanded to CTLZ/CTTZ by default. - setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, + Expand); - setOperationAction(ISD::BITREVERSE, VT, Expand); - setOperationAction(ISD::PARITY, VT, Expand); + setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand); // These library functions default to expand. - setOperationAction(ISD::FROUND, VT, Expand); - setOperationAction(ISD::FROUNDEVEN, VT, Expand); - setOperationAction(ISD::FPOWI, VT, Expand); + setOperationAction({ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI}, VT, Expand); // These operations default to expand for vector types. - if (VT.isVector()) { - setOperationAction(ISD::FCOPYSIGN, VT, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); - setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand); - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand); - setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); - setOperationAction(ISD::SPLAT_VECTOR, VT, Expand); - } + if (VT.isVector()) + setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, + ISD::ANY_EXTEND_VECTOR_INREG, + ISD::SIGN_EXTEND_VECTOR_INREG, + ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR}, + VT, Expand); // Constrained floating-point operations default to expand. #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ @@ -852,21 +858,13 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); // Vector reduction default to expand. - setOperationAction(ISD::VECREDUCE_FADD, VT, Expand); - setOperationAction(ISD::VECREDUCE_FMUL, VT, Expand); - setOperationAction(ISD::VECREDUCE_ADD, VT, Expand); - setOperationAction(ISD::VECREDUCE_MUL, VT, Expand); - setOperationAction(ISD::VECREDUCE_AND, VT, Expand); - setOperationAction(ISD::VECREDUCE_OR, VT, Expand); - setOperationAction(ISD::VECREDUCE_XOR, VT, Expand); - setOperationAction(ISD::VECREDUCE_SMAX, VT, Expand); - setOperationAction(ISD::VECREDUCE_SMIN, VT, Expand); - setOperationAction(ISD::VECREDUCE_UMAX, VT, Expand); - setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand); - setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand); - setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand); - setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Expand); - setOperationAction(ISD::VECREDUCE_SEQ_FMUL, VT, Expand); + setOperationAction( + {ISD::VECREDUCE_FADD, ISD::VECREDUCE_FMUL, ISD::VECREDUCE_ADD, + ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, + ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, + ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_FMAX, + ISD::VECREDUCE_FMIN, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL}, + VT, Expand); // Named vector shuffles default to expand. setOperationAction(ISD::VECTOR_SPLICE, VT, Expand); @@ -881,30 +879,16 @@ void TargetLoweringBase::initActions() { // ConstantFP nodes default to expand. Targets can either change this to // Legal, in which case all fp constants are legal, or use isFPImmLegal() // to optimize expansions for certain constants. - setOperationAction(ISD::ConstantFP, MVT::f16, Expand); - setOperationAction(ISD::ConstantFP, MVT::f32, Expand); - setOperationAction(ISD::ConstantFP, MVT::f64, Expand); - setOperationAction(ISD::ConstantFP, MVT::f80, Expand); - setOperationAction(ISD::ConstantFP, MVT::f128, Expand); + setOperationAction(ISD::ConstantFP, + {MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128}, + Expand); // These library functions default to expand. - for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) { - setOperationAction(ISD::FCBRT, VT, Expand); - setOperationAction(ISD::FLOG , VT, Expand); - setOperationAction(ISD::FLOG2, VT, Expand); - setOperationAction(ISD::FLOG10, VT, Expand); - setOperationAction(ISD::FEXP , VT, Expand); - setOperationAction(ISD::FEXP2, VT, Expand); - setOperationAction(ISD::FFLOOR, VT, Expand); - setOperationAction(ISD::FNEARBYINT, VT, Expand); - setOperationAction(ISD::FCEIL, VT, Expand); - setOperationAction(ISD::FRINT, VT, Expand); - setOperationAction(ISD::FTRUNC, VT, Expand); - setOperationAction(ISD::LROUND, VT, Expand); - setOperationAction(ISD::LLROUND, VT, Expand); - setOperationAction(ISD::LRINT, VT, Expand); - setOperationAction(ISD::LLRINT, VT, Expand); - } + setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, + ISD::FEXP2, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, + ISD::FRINT, ISD::FTRUNC, ISD::LROUND, ISD::LLROUND, + ISD::LRINT, ISD::LLRINT}, + {MVT::f32, MVT::f64, MVT::f128}, Expand); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); @@ -1394,6 +1378,16 @@ void TargetLoweringBase::computeRegisterProperties( } } + // Decide how to handle bf16. If the target does not have native bf16 support, + // promote it to f32, because there are no bf16 library calls (except for + // converting from f32 to bf16). + if (!isTypeLegal(MVT::bf16)) { + NumRegistersForVT[MVT::bf16] = NumRegistersForVT[MVT::f32]; + RegisterTypeForVT[MVT::bf16] = RegisterTypeForVT[MVT::f32]; + TransformToType[MVT::bf16] = MVT::f32; + ValueTypeActions.setTypeAction(MVT::bf16, TypePromoteFloat); + } + // Loop over all of the vector value types to see which need transformations. for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { @@ -1647,6 +1641,11 @@ bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI, (NumCases * 100 >= Range * MinDensity); } +MVT TargetLoweringBase::getPreferredSwitchConditionType(LLVMContext &Context, + EVT ConditionVT) const { + return getRegisterType(Context, ConditionVT); +} + /// Get the EVTs and ArgFlags collections that represent the legalized return /// type of the given function. This does not require a DAG or a return value, /// and is suitable for use before any DAGs for the function are constructed. @@ -2066,9 +2065,11 @@ static std::string getReciprocalOpName(bool IsSqrt, EVT VT) { Name += IsSqrt ? "sqrt" : "div"; - // TODO: Handle "half" or other float types? + // TODO: Handle other float types? if (VT.getScalarType() == MVT::f64) { Name += "d"; + } else if (VT.getScalarType() == MVT::f16) { + Name += "h"; } else { assert(VT.getScalarType() == MVT::f32 && "Unexpected FP type for reciprocal estimate"); diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index ce350034d073..f3d68bd9c92d 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -310,7 +310,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, auto *S = C.getELFSection(".linker-options", ELF::SHT_LLVM_LINKER_OPTIONS, ELF::SHF_EXCLUDE); - Streamer.SwitchSection(S); + Streamer.switchSection(S); for (const auto *Operand : LinkerOptions->operands()) { if (cast(Operand)->getNumOperands() != 2) @@ -326,7 +326,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, auto *S = C.getELFSection(".deplibs", ELF::SHT_LLVM_DEPENDENT_LIBRARIES, ELF::SHF_MERGE | ELF::SHF_STRINGS, 1); - Streamer.SwitchSection(S); + Streamer.switchSection(S); for (const auto *Operand : DependentLibraries->operands()) { Streamer.emitBytes( @@ -350,7 +350,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, auto *S = C.getObjectFileInfo()->getPseudoProbeDescSection( TM->getFunctionSections() ? Name->getString() : StringRef()); - Streamer.SwitchSection(S); + Streamer.switchSection(S); Streamer.emitInt64(GUID->getZExtValue()); Streamer.emitInt64(Hash->getZExtValue()); Streamer.emitULEB128IntValue(Name->getString().size()); @@ -365,11 +365,11 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, GetObjCImageInfo(M, Version, Flags, Section); if (!Section.empty()) { auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); - Streamer.SwitchSection(S); + Streamer.switchSection(S); Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); Streamer.emitInt32(Version); Streamer.emitInt32(Flags); - Streamer.AddBlankLine(); + Streamer.addBlankLine(); } emitCGProfileMetadata(Streamer, M); @@ -399,7 +399,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue( MCSection *Sec = getContext().getELFNamedSection(".data", Label->getName(), ELF::SHT_PROGBITS, Flags, 0); unsigned Size = DL.getPointerSize(); - Streamer.SwitchSection(Sec); + Streamer.switchSection(Sec); Streamer.emitValueToAlignment(DL.getPointerABIAlignment(0).value()); Streamer.emitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::create(Size, getContext()); @@ -449,6 +449,9 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { Name == ".llvmbc" || Name == ".llvmcmd") return SectionKind::getMetadata(); + if (Name == ".llvm.offloading") + return SectionKind::getExclude(); + if (Name.empty() || Name[0] != '.') return K; // Default implementation based on some magic section names. @@ -507,9 +510,12 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) { static unsigned getELFSectionFlags(SectionKind K) { unsigned Flags = 0; - if (!K.isMetadata()) + if (!K.isMetadata() && !K.isExclude()) Flags |= ELF::SHF_ALLOC; + if (K.isExclude()) + Flags |= ELF::SHF_EXCLUDE; + if (K.isText()) Flags |= ELF::SHF_EXECINSTR; @@ -681,9 +687,10 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName, } if (Retain) { - if ((Ctx.getAsmInfo()->useIntegratedAssembler() || - Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) && - !TM.getTargetTriple().isOSSolaris()) + if (TM.getTargetTriple().isOSSolaris()) + Flags |= ELF::SHF_SUNW_NODISCARD; + else if (Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) Flags |= ELF::SHF_GNU_RETAIN; return NextUniqueID++; } @@ -860,12 +867,15 @@ static MCSection *selectELFSectionForGlobal( EmitUniqueSection = true; Flags |= ELF::SHF_LINK_ORDER; } - if (Retain && - (Ctx.getAsmInfo()->useIntegratedAssembler() || - Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) && - !TM.getTargetTriple().isOSSolaris()) { - EmitUniqueSection = true; - Flags |= ELF::SHF_GNU_RETAIN; + if (Retain) { + if (TM.getTargetTriple().isOSSolaris()) { + EmitUniqueSection = true; + Flags |= ELF::SHF_SUNW_NODISCARD; + } else if (Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) { + EmitUniqueSection = true; + Flags |= ELF::SHF_GNU_RETAIN; + } } MCSectionELF *Section = selectELFSectionForGlobal( @@ -1171,6 +1181,15 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; } +MCSection *TargetLoweringObjectFileMachO::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + // TODO(yln): Remove -lower-global-dtors-via-cxa-atexit fallback flag + // (LowerGlobalDtorsViaCxaAtExit) and always issue a fatal error here. + if (TM->Options.LowerGlobalDtorsViaCxaAtExit) + report_fatal_error("@llvm.global_dtors should have been lowered already"); + return StaticDtorSection; +} + void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer, Module &M) const { // Emit the linker options if present. @@ -1207,12 +1226,12 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer, // Get the section. MCSectionMachO *S = getContext().getMachOSection( Segment, Section, TAA, StubSize, SectionKind::getData()); - Streamer.SwitchSection(S); + Streamer.switchSection(S); Streamer.emitLabel(getContext(). getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO"))); Streamer.emitInt32(VersionVal); Streamer.emitInt32(ImageInfoFlags); - Streamer.AddBlankLine(); + Streamer.addBlankLine(); } static void checkMachOComdat(const GlobalValue *GV) { @@ -1520,6 +1539,9 @@ getCOFFSectionFlags(SectionKind K, const TargetMachine &TM) { if (K.isMetadata()) Flags |= COFF::IMAGE_SCN_MEM_DISCARDABLE; + else if (K.isExclude()) + Flags |= + COFF::IMAGE_SCN_LNK_REMOVE | COFF::IMAGE_SCN_MEM_DISCARDABLE; else if (K.isText()) Flags |= COFF::IMAGE_SCN_MEM_EXECUTE | @@ -1755,11 +1777,11 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); - Streamer.SwitchSection(S); + Streamer.switchSection(S); Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); Streamer.emitInt32(Version); Streamer.emitInt32(Flags); - Streamer.AddBlankLine(); + Streamer.addBlankLine(); } emitCGProfileMetadata(Streamer, M); @@ -1772,7 +1794,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives( // spec, this section is a space-separated string containing flags for // linker. MCSection *Sec = getDrectveSection(); - Streamer.SwitchSection(Sec); + Streamer.switchSection(Sec); for (const auto *Option : LinkerOptions->operands()) { for (const auto &Piece : cast(Option)->operands()) { // Lead with a space for consistency with our dllexport implementation. @@ -1791,7 +1813,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives( getMangler()); OS.flush(); if (!Flags.empty()) { - Streamer.SwitchSection(getDrectveSection()); + Streamer.switchSection(getDrectveSection()); Streamer.emitBytes(Flags); } Flags.clear(); @@ -1817,7 +1839,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives( OS.flush(); if (!Flags.empty()) { - Streamer.SwitchSection(getDrectveSection()); + Streamer.switchSection(getDrectveSection()); Streamer.emitBytes(Flags); } Flags.clear(); @@ -2170,8 +2192,7 @@ MCSection *TargetLoweringObjectFileWasm::getStaticCtorSection( MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection( unsigned Priority, const MCSymbol *KeySym) const { - llvm_unreachable("@llvm.global_dtors should have been lowered already"); - return nullptr; + report_fatal_error("@llvm.global_dtors should have been lowered already"); } //===----------------------------------------------------------------------===// @@ -2544,10 +2565,24 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry( XCOFF::XTY_SD)); } +MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA( + const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const { + auto *LSDA = cast(LSDASection); + if (TM.getFunctionSections()) { + // If option -ffunction-sections is on, append the function name to the + // name of the LSDA csect so that each function has its own LSDA csect. + // This helps the linker to garbage-collect EH info of unused functions. + SmallString<128> NameStr = LSDA->getName(); + raw_svector_ostream(NameStr) << '.' << F.getName(); + LSDA = getContext().getXCOFFSection(NameStr, LSDA->getKind(), + LSDA->getCsectProp()); + } + return LSDA; +} //===----------------------------------------------------------------------===// // GOFF //===----------------------------------------------------------------------===// -TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() {} +TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() = default; MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { @@ -2558,8 +2593,8 @@ MCSection *TargetLoweringObjectFileGOFF::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { auto *Symbol = TM.getSymbol(GO); if (Kind.isBSS()) - return getContext().getGOFFSection(Symbol->getName(), - SectionKind::getBSS()); + return getContext().getGOFFSection(Symbol->getName(), SectionKind::getBSS(), + nullptr, nullptr); return getContext().getObjectFileInfo()->getTextSection(); } diff --git a/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/llvm/lib/CodeGen/TargetOptionsImpl.cpp index 0731cf9b28f4..af5d10103f78 100644 --- a/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -15,7 +15,6 @@ #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 05004fb935df..0bd229f4fc68 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/CSEConfigBase.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachinePassRegistry.h" @@ -47,7 +48,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/SymbolRewriter.h" #include #include @@ -115,20 +115,18 @@ static cl::opt PrintGCInfo("print-gc", cl::Hidden, cl::desc("Dump garbage collector data")); static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, - cl::desc("Verify generated machine code"), - cl::ZeroOrMore); -static cl::opt DebugifyAndStripAll( - "debugify-and-strip-all-safe", cl::Hidden, - cl::desc( - "Debugify MIR before and Strip debug after " - "each pass except those known to be unsafe when debug info is present"), - cl::ZeroOrMore); + cl::desc("Verify generated machine code")); +static cl::opt + DebugifyAndStripAll("debugify-and-strip-all-safe", cl::Hidden, + cl::desc("Debugify MIR before and Strip debug after " + "each pass except those known to be unsafe " + "when debug info is present")); static cl::opt DebugifyCheckAndStripAll( "debugify-check-and-strip-all-safe", cl::Hidden, cl::desc( "Debugify MIR before, by checking and stripping the debug info after, " - "each pass except those known to be unsafe when debug info is present"), - cl::ZeroOrMore); + "each pass except those known to be unsafe when debug info is " + "present")); // Enable or disable the MachineOutliner. static cl::opt EnableMachineOutliner( "enable-machine-outliner", cl::desc("Enable the machine outliner"), @@ -139,6 +137,11 @@ static cl::opt EnableMachineOutliner( "Disable all outlining"), // Sentinel value for unspecified option. clEnumValN(RunOutliner::AlwaysOutline, "", ""))); +// Disable the pass to fix unwind information. Whether the pass is included in +// the pipeline is controlled via the target options, this option serves as +// manual override. +static cl::opt DisableCFIFixup("disable-cfi-fixup", cl::Hidden, + cl::desc("Disable the CFI fixup pass")); // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be // able to enable or disable fast-isel independently from -O0. @@ -175,12 +178,12 @@ static cl::opt // Disable MIRProfileLoader before RegAlloc. This is for for debugging and // tuning purpose. static cl::opt DisableRAFSProfileLoader( - "disable-ra-fsprofile-loader", cl::init(true), cl::Hidden, + "disable-ra-fsprofile-loader", cl::init(false), cl::Hidden, cl::desc("Disable MIRProfileLoader before RegAlloc")); // Disable MIRProfileLoader before BloackPlacement. This is for for debugging // and tuning purpose. static cl::opt DisableLayoutFSProfileLoader( - "disable-layout-fsprofile-loader", cl::init(true), cl::Hidden, + "disable-layout-fsprofile-loader", cl::init(false), cl::Hidden, cl::desc("Disable MIRProfileLoader before BlockPlacement")); // Specify FSProfile file name. static cl::opt @@ -256,6 +259,11 @@ static cl::opt DisableExpandReductions( "disable-expand-reductions", cl::init(false), cl::Hidden, cl::desc("Disable the expand reduction intrinsics pass from running")); +/// Disable the select optimization pass. +static cl::opt DisableSelectOptimize( + "disable-select-optimize", cl::init(true), cl::Hidden, + cl::desc("Disable the select-optimization pass from running")); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -490,6 +498,7 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() { SET_BOOLEAN_OPTION(DisableConstantHoisting) SET_BOOLEAN_OPTION(DisableCGP) SET_BOOLEAN_OPTION(DisablePartialLibcallInlining) + SET_BOOLEAN_OPTION(DisableSelectOptimize) SET_BOOLEAN_OPTION(PrintLSR) SET_BOOLEAN_OPTION(PrintISelInput) SET_BOOLEAN_OPTION(PrintGCInfo) @@ -736,21 +745,21 @@ void TargetPassConfig::addPass(Pass *P) { if (StopBefore == PassID && StopBeforeCount++ == StopBeforeInstanceNum) Stopped = true; if (Started && !Stopped) { - if (AddingMachinePasses) + if (AddingMachinePasses) { + // Construct banner message before PM->add() as that may delete the pass. + std::string Banner = + std::string("After ") + std::string(P->getPassName()); addMachinePrePasses(); - std::string Banner; - // Construct banner message before PM->add() as that may delete the pass. - if (AddingMachinePasses) - Banner = std::string("After ") + std::string(P->getPassName()); - PM->add(P); - if (AddingMachinePasses) + PM->add(P); addMachinePostPasses(Banner); + } else { + PM->add(P); + } // Add the passes after the pass P if there is any. - for (const auto &IP : Impl->InsertedPasses) { + for (const auto &IP : Impl->InsertedPasses) if (IP.TargetPassID == PassID) addPass(IP.getInsertedPass()); - } } else { delete P; } @@ -895,6 +904,12 @@ void TargetPassConfig::addIRPasses() { addPass(&ShadowStackGCLoweringID); addPass(createLowerConstantIntrinsicsPass()); + // For MachO, lower @llvm.global_dtors into @llvm_global_ctors with + // __cxa_atexit() calls to avoid emitting the deprecated __mod_term_func. + if (TM->getTargetTriple().isOSBinFormatMachO() && + TM->Options.LowerGlobalDtorsViaCxaAtExit) + addPass(createLowerGlobalDtorsLegacyPass()); + // Make sure that no unreachable blocks are instruction selected. addPass(createUnreachableBlockEliminationPass()); @@ -922,6 +937,13 @@ void TargetPassConfig::addIRPasses() { // Allow disabling it for testing purposes. if (!DisableExpandReductions) addPass(createExpandReductionsPass()); + + if (getOptLevel() != CodeGenOpt::None) + addPass(createTLSVariableHoistPass()); + + // Convert conditional moves to conditional jumps when profitable. + if (getOptLevel() != CodeGenOpt::None && !DisableSelectOptimize) + addPass(createSelectOptimizePass()); } /// Turn exception handling constructs into something the code generators can @@ -1261,12 +1283,19 @@ void TargetPassConfig::addMachinePasses() { // FIXME: In principle, BasicBlockSection::Labels and splitting can used // together. Update this check once we have addressed any issues. if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) { - addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf())); + if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { + addPass(llvm::createBasicBlockSectionsProfileReaderPass( + TM->getBBSectionsFuncListBuf())); + } + addPass(llvm::createBasicBlockSectionsPass()); } else if (TM->Options.EnableMachineFunctionSplitter || EnableMachineFunctionSplitter) { addPass(createMachineFunctionSplitterPass()); } + if (!DisableCFIFixup && TM->Options.EnableCFIFixup) + addPass(createCFIFixup()); + // Add passes that directly emit MI after all other MI passes. addPreEmitPass2(); @@ -1376,6 +1405,11 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { return createTargetRegisterAllocator(Optimized); } +bool TargetPassConfig::isCustomizedRegAlloc() { + return RegAlloc != + (RegisterRegAlloc::FunctionPassCtor)&useDefaultRegisterAllocator; +} + bool TargetPassConfig::addRegAssignAndRewriteFast() { if (RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&useDefaultRegisterAllocator && RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&createFastRegisterAllocator) diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 6bcf79547056..ac346585b0f8 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -16,10 +16,11 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" diff --git a/llvm/lib/CodeGen/TargetSchedule.cpp b/llvm/lib/CodeGen/TargetSchedule.cpp index ce59452fd1b8..ac07c86cab85 100644 --- a/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/llvm/lib/CodeGen/TargetSchedule.cpp @@ -16,7 +16,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" diff --git a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp index e4520d8ccb1e..ba2c8dda7de5 100644 --- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp @@ -45,10 +45,6 @@ bool TargetSubtargetInfo::enableRALocalReassignment( return true; } -bool TargetSubtargetInfo::enableAdvancedRASplitCost() const { - return false; -} - bool TargetSubtargetInfo::enablePostRAScheduler() const { return getSchedModel().PostRAScheduler; } diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index dfd962be2882..c44fd9f97383 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -28,7 +28,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" @@ -50,7 +49,6 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" @@ -163,6 +161,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); void eliminateRegSequence(MachineBasicBlock::iterator&); + bool processStatepoint(MachineInstr *MI, TiedOperandMap &TiedOperands); public: static char ID; // Pass identification, replacement for typeid @@ -1629,6 +1628,61 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } } +// For every tied operand pair this function transforms statepoint from +// RegA = STATEPOINT ... RegB(tied-def N) +// to +// RegB = STATEPOINT ... RegB(tied-def N) +// and replaces all uses of RegA with RegB. +// No extra COPY instruction is necessary because tied use is killed at +// STATEPOINT. +bool TwoAddressInstructionPass::processStatepoint( + MachineInstr *MI, TiedOperandMap &TiedOperands) { + + bool NeedCopy = false; + for (auto &TO : TiedOperands) { + Register RegB = TO.first; + if (TO.second.size() != 1) { + NeedCopy = true; + continue; + } + + unsigned SrcIdx = TO.second[0].first; + unsigned DstIdx = TO.second[0].second; + + MachineOperand &DstMO = MI->getOperand(DstIdx); + Register RegA = DstMO.getReg(); + + assert(RegB == MI->getOperand(SrcIdx).getReg()); + + if (RegA == RegB) + continue; + + MRI->replaceRegWith(RegA, RegB); + + if (LIS) { + VNInfo::Allocator &A = LIS->getVNInfoAllocator(); + LiveInterval &LI = LIS->getInterval(RegB); + for (auto &S : LIS->getInterval(RegA)) { + VNInfo *VNI = LI.getNextValue(S.start, A); + LiveRange::Segment NewSeg(S.start, S.end, VNI); + LI.addSegment(NewSeg); + } + LIS->removeInterval(RegA); + } + + if (LV) { + if (MI->getOperand(SrcIdx).isKill()) + LV->removeVirtualRegisterKilled(RegB, *MI); + LiveVariables::VarInfo &SrcInfo = LV->getVarInfo(RegB); + LiveVariables::VarInfo &DstInfo = LV->getVarInfo(RegA); + SrcInfo.AliveBlocks |= DstInfo.AliveBlocks; + for (auto *KillMI : DstInfo.Kills) + LV->addVirtualRegisterKilled(RegB, *KillMI, false); + } + } + return !NeedCopy; +} + /// Reduce two-address instructions to two operands. bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { MF = &Func; @@ -1722,6 +1776,14 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { } } + if (mi->getOpcode() == TargetOpcode::STATEPOINT && + processStatepoint(&*mi, TiedOperands)) { + TiedOperands.clear(); + LLVM_DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); + mi = nmi; + continue; + } + // Now iterate over the information collected above. for (auto &TO : TiedOperands) { processTiedPairs(&*mi, TO.second, Dist); @@ -1733,11 +1795,11 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // From %reg = INSERT_SUBREG %reg, %subreg, subidx // To %reg:subidx = COPY %subreg unsigned SubIdx = mi->getOperand(3).getImm(); - mi->RemoveOperand(3); + mi->removeOperand(3); assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); mi->getOperand(0).setSubReg(SubIdx); mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef()); - mi->RemoveOperand(1); + mi->removeOperand(1); mi->setDesc(TII->get(TargetOpcode::COPY)); LLVM_DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); @@ -1858,7 +1920,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { LLVM_DEBUG(dbgs() << "Turned: " << MI << " into an IMPLICIT_DEF"); MI.setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j) - MI.RemoveOperand(j); + MI.removeOperand(j); } else { if (LIS) LIS->RemoveMachineInstrFromMaps(MI); diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index 01ea171e5ea2..166a3c413f6a 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -24,15 +24,13 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" @@ -44,9 +42,9 @@ using namespace llvm; -static cl::opt -DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(false), - cl::desc("Disable type promotion pass")); +static cl::opt DisablePromotion("disable-type-promotion", cl::Hidden, + cl::init(false), + cl::desc("Disable type promotion pass")); // The goal of this pass is to enable more efficient code generation for // operations on narrow types (i.e. types with < 32-bits) and this is a @@ -103,17 +101,16 @@ DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(false), namespace { class IRPromoter { LLVMContext &Ctx; - IntegerType *OrigTy = nullptr; unsigned PromotedWidth = 0; - SetVector &Visited; - SetVector &Sources; - SetVector &Sinks; + SetVector &Visited; + SetVector &Sources; + SetVector &Sinks; SmallPtrSetImpl &SafeWrap; IntegerType *ExtTy = nullptr; - SmallPtrSet NewInsts; - SmallPtrSet InstsToRemove; - DenseMap> TruncTysMap; - SmallPtrSet Promoted; + SmallPtrSet NewInsts; + SmallPtrSet InstsToRemove; + DenseMap> TruncTysMap; + SmallPtrSet Promoted; void ReplaceAllUsersOfWith(Value *From, Value *To); void ExtendSources(); @@ -123,16 +120,13 @@ class IRPromoter { void Cleanup(); public: - IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width, + IRPromoter(LLVMContext &C, unsigned Width, SetVector &visited, SetVector &sources, SetVector &sinks, SmallPtrSetImpl &wrap) - : Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited), + : Ctx(C), PromotedWidth(Width), Visited(visited), Sources(sources), Sinks(sinks), SafeWrap(wrap) { ExtTy = IntegerType::get(Ctx, PromotedWidth); - assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() < - ExtTy->getPrimitiveSizeInBits().getFixedSize() && - "Original type not smaller than extended type"); } void Mutate(); @@ -142,8 +136,8 @@ class TypePromotion : public FunctionPass { unsigned TypeSize = 0; LLVMContext *Ctx = nullptr; unsigned RegisterBitWidth = 0; - SmallPtrSet AllVisited; - SmallPtrSet SafeToPromote; + SmallPtrSet AllVisited; + SmallPtrSet SafeToPromote; SmallPtrSet SafeWrap; // Does V have the same size result type as TypeSize. @@ -190,7 +184,7 @@ public: bool runOnFunction(Function &F) override; }; -} +} // namespace static bool GenerateSignBits(Instruction *I) { unsigned Opc = I->getOpcode(); @@ -245,7 +239,7 @@ bool TypePromotion::isSource(Value *V) { bool TypePromotion::isSink(Value *V) { // TODO The truncate also isn't actually necessary because we would already // proved that the data value is kept within the range of the original data - // type. + // type. We currently remove any truncs inserted for handling zext sinks. // Sinks are: // - points where the value in the register is being observed, such as an @@ -269,7 +263,7 @@ bool TypePromotion::isSink(Value *V) { /// Return whether this instruction can safely wrap. bool TypePromotion::isSafeWrap(Instruction *I) { - // We can support a, potentially, wrapping instruction (I) if: + // We can support a potentially wrapping instruction (I) if: // - It is only used by an unsigned icmp. // - The icmp uses a constant. // - The wrapping value (I) is decreasing, i.e would underflow - wrapping @@ -356,7 +350,7 @@ bool TypePromotion::isSafeWrap(Instruction *I) { if (!OverflowConst.isNonPositive()) return false; - // Using C1 = OverflowConst and C2 = ICmpConst, we can use either prove that: + // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that: // zext(x) + sext(C1) s C2 // zext(x) + sext(C1) Users; + SmallVector Users; Instruction *InstTo = dyn_cast(To); bool ReplacedAll = true; @@ -485,12 +479,18 @@ void IRPromoter::PromoteTree() { continue; if (auto *Const = dyn_cast(Op)) { - Constant *NewConst = SafeWrap.contains(I) + // For subtract, we don't need to sext the constant. We only put it in + // SafeWrap because SafeWrap.size() is used elsewhere. + // For cmp, we need to sign extend a constant appearing in either + // operand. For add, we should only sign extend the RHS. + Constant *NewConst = (SafeWrap.contains(I) && + (I->getOpcode() == Instruction::ICmp || i == 1) && + I->getOpcode() != Instruction::Sub) ? ConstantExpr::getSExt(Const, ExtTy) : ConstantExpr::getZExt(Const, ExtTy); I->setOperand(i, NewConst); } else if (isa(Op)) - I->setOperand(i, UndefValue::get(ExtTy)); + I->setOperand(i, ConstantInt::get(ExtTy, 0)); } // Mutate the result type, unless this is an icmp or switch. @@ -506,7 +506,7 @@ void IRPromoter::TruncateSinks() { IRBuilder<> Builder{Ctx}; - auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* { + auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction * { if (!isa(V) || !isa(V->getType())) return nullptr; @@ -514,7 +514,7 @@ void IRPromoter::TruncateSinks() { return nullptr; LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for " - << *V << "\n"); + << *V << "\n"); Builder.SetInsertPoint(cast(V)); auto *Trunc = dyn_cast(Builder.CreateTrunc(V, TruncTy)); if (Trunc) @@ -550,6 +550,11 @@ void IRPromoter::TruncateSinks() { continue; } + // Don't insert a trunc for a zext which can still legally promote. + if (auto ZExt = dyn_cast(I)) + if (ZExt->getType()->getScalarSizeInBits() > PromotedWidth) + continue; + // Now handle the others. for (unsigned i = 0; i < I->getNumOperands(); ++i) { Type *Ty = TruncTysMap[I][i]; @@ -576,16 +581,14 @@ void IRPromoter::Cleanup() { Value *Src = ZExt->getOperand(0); if (ZExt->getSrcTy() == ZExt->getDestTy()) { LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt - << "\n"); + << "\n"); ReplaceAllUsersOfWith(ZExt, Src); continue; } - // Unless they produce a value that is narrower than ExtTy, we can - // replace the result of the zext with the input of a newly inserted - // trunc. - if (NewInsts.count(Src) && isa(Src) && - Src->getType() == OrigTy) { + // We've inserted a trunc for a zext sink, but we already know that the + // input is in range, negating the need for the trunc. + if (NewInsts.count(Src) && isa(Src)) { auto *Trunc = cast(Src); assert(Trunc->getOperand(0)->getType() == ExtTy && "expected inserted trunc to be operating on i32"); @@ -615,7 +618,7 @@ void IRPromoter::ConvertTruncs() { unsigned NumBits = DestTy->getScalarSizeInBits(); ConstantInt *Mask = - ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue()); + ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue()); Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask); if (auto *I = dyn_cast(Masked)) @@ -626,8 +629,8 @@ void IRPromoter::ConvertTruncs() { } void IRPromoter::Mutate() { - LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains from " - << OrigTy->getBitWidth() << " to " << PromotedWidth << "-bits\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains to " + << PromotedWidth << "-bits\n"); // Cache original types of the values that will likely need truncating for (auto *I : Sinks) { @@ -677,8 +680,7 @@ bool TypePromotion::isSupportedType(Value *V) { if (Ty->isVoidTy() || Ty->isPointerTy()) return true; - if (!isa(Ty) || - cast(Ty)->getBitWidth() == 1 || + if (!isa(Ty) || cast(Ty)->getBitWidth() == 1 || cast(Ty)->getBitWidth() > RegisterBitWidth) return false; @@ -738,13 +740,12 @@ bool TypePromotion::isSupportedValue(Value *V) { /// smaller than the targeted promoted type. Check that we're not trying to /// promote something larger than our base 'TypeSize' type. bool TypePromotion::isLegalToPromote(Value *V) { - auto *I = dyn_cast(V); if (!I) return true; if (SafeToPromote.count(I)) - return true; + return true; if (isPromotedResultSafe(I) || isSafeWrap(I)) { SafeToPromote.insert(I); @@ -765,10 +766,10 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from " << TypeSize << " bits to " << PromotedWidth << "\n"); - SetVector WorkList; - SetVector Sources; - SetVector Sinks; - SetVector CurrentVisited; + SetVector WorkList; + SetVector Sources; + SetVector Sinks; + SetVector CurrentVisited; WorkList.insert(V); // Return true if V was added to the worklist as a supported instruction, @@ -839,14 +840,15 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { } } - LLVM_DEBUG(dbgs() << "IR Promotion: Visited nodes:\n"; - for (auto *I : CurrentVisited) - I->dump(); - ); + LLVM_DEBUG({ + dbgs() << "IR Promotion: Visited nodes:\n"; + for (auto *I : CurrentVisited) + I->dump(); + }); unsigned ToPromote = 0; unsigned NonFreeArgs = 0; - SmallPtrSet Blocks; + SmallPtrSet Blocks; for (auto *V : CurrentVisited) { if (auto *I = dyn_cast(V)) Blocks.insert(I->getParent()); @@ -860,16 +862,16 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { if (Sinks.count(cast(V))) continue; - ++ToPromote; - } + ++ToPromote; + } // DAG optimizations should be able to handle these cases better, especially // for function arguments. if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size()))) return false; - IRPromoter Promoter(*Ctx, cast(OrigTy), PromotedWidth, - CurrentVisited, Sources, Sinks, SafeWrap); + IRPromoter Promoter(*Ctx, PromotedWidth, CurrentVisited, Sources, Sinks, + SafeWrap); Promoter.Mutate(); return true; } @@ -893,14 +895,14 @@ bool TypePromotion::runOnFunction(Function &F) { const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F); const TargetLowering *TLI = SubtargetInfo->getTargetLowering(); const TargetTransformInfo &TII = - getAnalysis().getTTI(F); + getAnalysis().getTTI(F); RegisterBitWidth = TII.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedSize(); Ctx = &F.getParent()->getContext(); // Search up from icmps to try to promote their operands. for (BasicBlock &BB : F) { - for (auto &I : BB) { + for (Instruction &I : BB) { if (AllVisited.count(&I)) continue; @@ -909,8 +911,7 @@ bool TypePromotion::runOnFunction(Function &F) { auto *ICmp = cast(&I); // Skip signed or pointer compares - if (ICmp->isSigned() || - !isa(ICmp->getOperand(0)->getType())) + if (ICmp->isSigned() || !isa(ICmp->getOperand(0)->getType())) continue; LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n"); @@ -921,13 +922,13 @@ bool TypePromotion::runOnFunction(Function &F) { if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT())) break; - if (TLI->getTypeAction(ICmp->getContext(), SrcVT) != + if (TLI->getTypeAction(*Ctx, SrcVT) != TargetLowering::TypePromoteInteger) break; - EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT); + EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT); if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) { LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register " - << "for promoted type\n"); + << "for promoted type\n"); break; } @@ -936,13 +937,7 @@ bool TypePromotion::runOnFunction(Function &F) { } } } - LLVM_DEBUG(if (verifyFunction(F, &dbgs())) { - dbgs() << F; - report_fatal_error("Broken function after type promotion"); - }); } - if (MadeChange) - LLVM_DEBUG(dbgs() << "After TypePromotion: " << F << "\n"); AllVisited.clear(); SafeToPromote.clear(); @@ -956,6 +951,4 @@ INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false) char TypePromotion::ID = 0; -FunctionPass *llvm::createTypePromotionPass() { - return new TypePromotion(); -} +FunctionPass *llvm::createTypePromotionPass() { return new TypePromotion(); } diff --git a/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/llvm/lib/CodeGen/UnreachableBlockElim.cpp index 3426a03b6083..5e8514f525e9 100644 --- a/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -26,16 +26,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Constant.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -131,8 +125,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2) if (start->getOperand(i).isMBB() && start->getOperand(i).getMBB() == &BB) { - start->RemoveOperand(i); - start->RemoveOperand(i-1); + start->removeOperand(i); + start->removeOperand(i-1); } start++; @@ -162,8 +156,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { while (phi != BB.end() && phi->isPHI()) { for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2) if (!preds.count(phi->getOperand(i).getMBB())) { - phi->RemoveOperand(i); - phi->RemoveOperand(i-1); + phi->removeOperand(i); + phi->removeOperand(i-1); ModifiedPHI = true; } diff --git a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp index 5f59cb4643f2..8b5b585090f5 100644 --- a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp +++ b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp @@ -27,7 +27,6 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -43,19 +42,18 @@ using namespace llvm; #define DEBUG_TYPE "machine-scheduler" static cl::opt IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden, - cl::ZeroOrMore, cl::init(false)); + cl::init(false)); static cl::opt UseNewerCandidate("use-newer-candidate", cl::Hidden, - cl::ZeroOrMore, cl::init(true)); + cl::init(true)); static cl::opt SchedDebugVerboseLevel("misched-verbose-level", - cl::Hidden, cl::ZeroOrMore, - cl::init(1)); + cl::Hidden, cl::init(1)); // Check if the scheduler should penalize instructions that are available to // early due to a zero-latency dependence. static cl::opt CheckEarlyAvail("check-early-avail", cl::Hidden, - cl::ZeroOrMore, cl::init(true)); + cl::init(true)); // This value is used to determine if a register class is a high pressure set. // We compute the maximum number of registers needed and divided by the total diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index 0c42bef82005..f577aff39ea7 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -12,6 +12,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TypeSize.h" +#include "llvm/Support/WithColor.h" using namespace llvm; EVT EVT::changeExtendedTypeToInteger() const { @@ -179,19 +180,22 @@ std::string EVT::getEVTString() const { /// specified EVT. For integer types, this returns an unsigned type. Note /// that this will abort for types that cannot be represented. Type *EVT::getTypeForEVT(LLVMContext &Context) const { + // clang-format off switch (V.SimpleTy) { default: assert(isExtended() && "Type is not extended!"); return LLVMTy; case MVT::isVoid: return Type::getVoidTy(Context); case MVT::i1: return Type::getInt1Ty(Context); + case MVT::i2: return Type::getIntNTy(Context, 2); + case MVT::i4: return Type::getIntNTy(Context, 4); case MVT::i8: return Type::getInt8Ty(Context); case MVT::i16: return Type::getInt16Ty(Context); case MVT::i32: return Type::getInt32Ty(Context); case MVT::i64: return Type::getInt64Ty(Context); case MVT::i128: return IntegerType::get(Context, 128); case MVT::f16: return Type::getHalfTy(Context); - case MVT::bf16: return Type::getBFloatTy(Context); + case MVT::bf16: return Type::getBFloatTy(Context); case MVT::f32: return Type::getFloatTy(Context); case MVT::f64: return Type::getDoubleTy(Context); case MVT::f80: return Type::getX86_FP80Ty(Context); @@ -228,6 +232,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getInt1Ty(Context), 512); case MVT::v1024i1: return FixedVectorType::get(Type::getInt1Ty(Context), 1024); + case MVT::v128i2: + return FixedVectorType::get(Type::getIntNTy(Context, 2), 128); + case MVT::v64i4: + return FixedVectorType::get(Type::getIntNTy(Context, 4), 64); case MVT::v1i8: return FixedVectorType::get(Type::getInt8Ty(Context), 1); case MVT::v2i8: @@ -500,6 +508,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return ScalableVectorType::get(Type::getBFloatTy(Context), 4); case MVT::nxv8bf16: return ScalableVectorType::get(Type::getBFloatTy(Context), 8); + case MVT::nxv16bf16: + return ScalableVectorType::get(Type::getBFloatTy(Context), 16); + case MVT::nxv32bf16: + return ScalableVectorType::get(Type::getBFloatTy(Context), 32); case MVT::nxv1f32: return ScalableVectorType::get(Type::getFloatTy(Context), 1); case MVT::nxv2f32: @@ -520,6 +532,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return ScalableVectorType::get(Type::getDoubleTy(Context), 8); case MVT::Metadata: return Type::getMetadataTy(Context); } + // clang-format on } /// Return the value type corresponding to the specified type. This returns all diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp index c04a7b28eff9..aa6645227edb 100644 --- a/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -77,8 +77,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/TargetLowering.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsWebAssembly.h" @@ -212,9 +212,15 @@ bool WasmEHPrepare::prepareEHPads(Function &F) { assert(F.hasPersonalityFn() && "Personality function not found"); - // __wasm_lpad_context global variable + // __wasm_lpad_context global variable. + // This variable should be thread local. If the target does not support TLS, + // we depend on CoalesceFeaturesAndStripAtomics to downgrade it to + // non-thread-local ones, in which case we don't allow this object to be + // linked with other objects using shared memory. LPadContextGV = cast( M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy)); + LPadContextGV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel); + LPadIndexField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 0, "lpad_index_gep"); LSDAField = diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp index d31183e46d65..b835503ee9ed 100644 --- a/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -19,14 +19,14 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" -#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -1256,4 +1256,4 @@ void WinEHFuncInfo::addIPToStateRange(const InvokeInst *II, LabelToStateMap[InvokeBegin] = std::make_pair(InvokeStateMap[II], InvokeEnd); } -WinEHFuncInfo::WinEHFuncInfo() {} +WinEHFuncInfo::WinEHFuncInfo() = default; diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp index b56095ca9a96..50c52190c1f6 100644 --- a/llvm/lib/DWARFLinker/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp @@ -10,7 +10,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/NonRelocatableStringpool.h" #include "llvm/DWARFLinker/DWARFLinkerDeclContext.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" @@ -19,9 +18,11 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -132,9 +133,9 @@ static bool isTypeTag(uint16_t Tag) { return false; } -AddressesMap::~AddressesMap() {} +AddressesMap::~AddressesMap() = default; -DwarfEmitter::~DwarfEmitter() {} +DwarfEmitter::~DwarfEmitter() = default; static Optional StripTemplateParameters(StringRef Name) { // We are looking for template parameters to strip from Name. e.g. @@ -360,16 +361,16 @@ static bool analyzeContextInfo( } Info.ParentIdx = Current.ParentIdx; - bool InClangModule = CU.isClangModule() || Current.InImportedModule; - if (CU.hasODR() || InClangModule) { + Info.InModuleScope = CU.isClangModule() || Current.InImportedModule; + if (CU.hasODR() || Info.InModuleScope) { if (Current.Context) { auto PtrInvalidPair = Contexts.getChildDeclContext( - *Current.Context, Current.Die, CU, InClangModule); + *Current.Context, Current.Die, CU, Info.InModuleScope); Current.Context = PtrInvalidPair.getPointer(); Info.Ctxt = PtrInvalidPair.getInt() ? nullptr : PtrInvalidPair.getPointer(); if (Info.Ctxt) - Info.Ctxt->setDefinedInClangModule(InClangModule); + Info.Ctxt->setDefinedInClangModule(Info.InModuleScope); } else Info.Ctxt = Current.Context = nullptr; } @@ -439,8 +440,7 @@ unsigned DWARFLinker::shouldKeepVariableDIE(AddressesMap &RelocMgr, // if the variable has a valid relocation, so that the DIEInfo is filled. // However, we don't want a static variable in a function to force us to keep // the enclosing function, unless requested explicitly. - const bool HasLiveMemoryLocation = - RelocMgr.hasLiveMemoryLocation(DIE, MyInfo); + const bool HasLiveMemoryLocation = RelocMgr.isLiveVariable(DIE, MyInfo); if (!HasLiveMemoryLocation || ((Flags & TF_InFunctionScope) && !LLVM_UNLIKELY(Options.KeepFunctionForStatic))) return Flags; @@ -468,8 +468,8 @@ unsigned DWARFLinker::shouldKeepSubprogramDIE( if (!LowPc) return Flags; - assert(LowPc.hasValue() && "low_pc attribute is not an address."); - if (!RelocMgr.hasLiveAddressRange(DIE, MyInfo)) + assert(LowPc && "low_pc attribute is not an address."); + if (!RelocMgr.isLiveSubprogram(DIE, MyInfo)) return Flags; if (Options.Verbose) { @@ -490,7 +490,7 @@ unsigned DWARFLinker::shouldKeepSubprogramDIE( // generation bugs aside, this is really wrong in the case of labels, where // a label marking the end of a function will have a PC == CU's high_pc. if (dwarf::toAddress(OrigUnit.getUnitDIE().find(dwarf::DW_AT_high_pc)) - .getValueOr(UINT64_MAX) <= LowPc) + .value_or(UINT64_MAX) <= LowPc) return Flags; Unit.addLabelLowPc(*LowPc, MyInfo.AddrAdjust); return Flags | TF_Keep; @@ -616,6 +616,27 @@ void DWARFLinker::lookForChildDIEsToKeep( } } +static bool isODRCanonicalCandidate(const DWARFDie &Die, CompileUnit &CU) { + CompileUnit::DIEInfo &Info = CU.getInfo(Die); + + if (!Info.Ctxt || (Die.getTag() == dwarf::DW_TAG_namespace)) + return false; + + if (!CU.hasODR() && !Info.InModuleScope) + return false; + + return !Info.Incomplete && Info.Ctxt != CU.getInfo(Info.ParentIdx).Ctxt; +} + +void DWARFLinker::markODRCanonicalDie(const DWARFDie &Die, CompileUnit &CU) { + CompileUnit::DIEInfo &Info = CU.getInfo(Die); + + Info.ODRMarkingDone = true; + if (Info.Keep && isODRCanonicalCandidate(Die, CU) && + !Info.Ctxt->hasCanonicalDIE()) + Info.Ctxt->setHasCanonicalDIE(); +} + /// Look at DIEs referenced by the given DIE and decide whether they should be /// kept. All DIEs referenced though attributes should be kept. void DWARFLinker::lookForRefDIEsToKeep( @@ -645,8 +666,6 @@ void DWARFLinker::lookForRefDIEsToKeep( if (auto RefDie = resolveDIEReference(File, Units, Val, Die, ReferencedCU)) { CompileUnit::DIEInfo &Info = ReferencedCU->getInfo(RefDie); - bool IsModuleRef = Info.Ctxt && Info.Ctxt->getCanonicalDIEOffset() && - Info.Ctxt->isDefinedInClangModule(); // If the referenced DIE has a DeclContext that has already been // emitted, then do not keep the one in this CU. We'll link to // the canonical DIE in cloneDieReferenceAttribute. @@ -657,15 +676,14 @@ void DWARFLinker::lookForRefDIEsToKeep( // // FIXME: compatibility with dsymutil-classic. There is no // reason not to unique ref_addr references. - if (AttrSpec.Form != dwarf::DW_FORM_ref_addr && (UseOdr || IsModuleRef) && - Info.Ctxt && - Info.Ctxt != ReferencedCU->getInfo(Info.ParentIdx).Ctxt && - Info.Ctxt->getCanonicalDIEOffset() && isODRAttribute(AttrSpec.Attr)) + if (AttrSpec.Form != dwarf::DW_FORM_ref_addr && + isODRAttribute(AttrSpec.Attr) && Info.Ctxt && + Info.Ctxt->hasCanonicalDIE()) continue; // Keep a module forward declaration if there is no definition. if (!(isODRAttribute(AttrSpec.Attr) && Info.Ctxt && - Info.Ctxt->getCanonicalDIEOffset())) + Info.Ctxt->hasCanonicalDIE())) Info.Prune = false; ReferencedDIEs.emplace_back(RefDie, *ReferencedCU); } @@ -756,6 +774,9 @@ void DWARFLinker::lookForDIEsToKeep(AddressesMap &AddressesMap, lookForParentDIEsToKeep(Current.AncestorIdx, Current.CU, Current.Flags, Worklist); continue; + case WorklistItemType::MarkODRCanonicalDie: + markODRCanonicalDie(Current.Die, Current.CU); + continue; case WorklistItemType::LookForDIEsToKeep: break; } @@ -778,6 +799,16 @@ void DWARFLinker::lookForDIEsToKeep(AddressesMap &AddressesMap, Current.Flags = shouldKeepDIE(AddressesMap, Ranges, Current.Die, File, Current.CU, MyInfo, Current.Flags); + // We need to mark context for the canonical die in the end of normal + // traversing(not TF_DependencyWalk) or after normal traversing if die + // was not marked as kept. + if (!(Current.Flags & TF_DependencyWalk) || + (MyInfo.ODRMarkingDone && !MyInfo.Keep)) { + if (Current.CU.hasODR() || MyInfo.InModuleScope) + Worklist.emplace_back(Current.Die, Current.CU, + WorklistItemType::MarkODRCanonicalDie); + } + // Finish by looking for child DIEs. Because of the LIFO worklist we need // to schedule that work before any subsequent items are added to the // worklist. @@ -845,7 +876,7 @@ void DWARFLinker::assignAbbrev(DIEAbbrev &Abbrev) { unsigned DWARFLinker::DIECloner::cloneStringAttribute( DIE &Die, AttributeSpec AttrSpec, const DWARFFormValue &Val, - const DWARFUnit &U, OffsetsStringPool &StringPool, AttributesInfo &Info) { + const DWARFUnit &, OffsetsStringPool &StringPool, AttributesInfo &Info) { Optional String = dwarf::toString(Val); if (!String) return 0; @@ -875,7 +906,6 @@ unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute( DIE *NewRefDie = nullptr; CompileUnit *RefUnit = nullptr; - DeclContext *Ctxt = nullptr; DWARFDie RefDie = Linker.resolveDIEReference(File, CompileUnits, Val, InputDIE, RefUnit); @@ -888,14 +918,14 @@ unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute( // If we already have emitted an equivalent DeclContext, just point // at it. - if (isODRAttribute(AttrSpec.Attr)) { - Ctxt = RefInfo.Ctxt; - if (Ctxt && Ctxt->getCanonicalDIEOffset()) { - DIEInteger Attr(Ctxt->getCanonicalDIEOffset()); - Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr), - dwarf::DW_FORM_ref_addr, Attr); - return U.getRefAddrByteSize(); - } + if (isODRAttribute(AttrSpec.Attr) && RefInfo.Ctxt && + RefInfo.Ctxt->getCanonicalDIEOffset()) { + assert(RefInfo.Ctxt->hasCanonicalDIE() && + "Offset to canonical die is set, but context is not marked"); + DIEInteger Attr(RefInfo.Ctxt->getCanonicalDIEOffset()); + Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr), + dwarf::DW_FORM_ref_addr, Attr); + return U.getRefAddrByteSize(); } if (!RefInfo.Clone) { @@ -925,7 +955,7 @@ unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute( // A forward reference. Note and fixup later. Attr = 0xBADDEF; Unit.noteForwardReference( - NewRefDie, RefUnit, Ctxt, + NewRefDie, RefUnit, RefInfo.Ctxt, Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr), dwarf::DW_FORM_ref_addr, DIEInteger(Attr))); } @@ -1356,10 +1386,10 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE, assert(Die->getTag() == InputDIE.getTag()); Die->setOffset(OutOffset); - if ((Unit.hasODR() || Unit.isClangModule()) && !Info.Incomplete && - Die->getTag() != dwarf::DW_TAG_namespace && Info.Ctxt && - Info.Ctxt != Unit.getInfo(Info.ParentIdx).Ctxt && - !Info.Ctxt->getCanonicalDIEOffset()) { + if (isODRCanonicalCandidate(InputDIE, Unit) && Info.Ctxt && + (Info.Ctxt->getCanonicalDIEOffset() == 0)) { + if (!Info.Ctxt->hasCanonicalDIE()) + Info.Ctxt->setHasCanonicalDIE(); // We are about to emit a DIE that is the root of its own valid // DeclContext tree. Make the current offset the canonical offset // for this context. @@ -1384,8 +1414,7 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE, DWARFDataExtractor(DIECopy, Data.isLittleEndian(), Data.getAddressSize()); // Modify the copy with relocated addresses. - if (ObjFile.Addresses->areRelocationsResolved() && - ObjFile.Addresses->applyValidRelocs(DIECopy, Offset, + if (ObjFile.Addresses->applyValidRelocs(DIECopy, Offset, Data.isLittleEndian())) { // If we applied relocations, we store the value of high_pc that was // potentially stored in the input DIE. If high_pc is an address @@ -1481,12 +1510,12 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE, uint32_t Hash = hashFullyQualifiedName(InputDIE, Unit, File); uint64_t RuntimeLang = dwarf::toUnsigned(InputDIE.find(dwarf::DW_AT_APPLE_runtime_class)) - .getValueOr(0); + .value_or(0); bool ObjCClassIsImplementation = (RuntimeLang == dwarf::DW_LANG_ObjC || RuntimeLang == dwarf::DW_LANG_ObjC_plus_plus) && dwarf::toUnsigned(InputDIE.find(dwarf::DW_AT_APPLE_objc_complete_type)) - .getValueOr(0); + .value_or(0); Unit.addTypeAccelerator(Die, AttrInfo.Name, ObjCClassIsImplementation, Hash); } @@ -1788,16 +1817,19 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, void DWARFLinker::emitAcceleratorEntriesForUnit(CompileUnit &Unit) { switch (Options.TheAccelTableKind) { - case AccelTableKind::Apple: + case DwarfLinkerAccelTableKind::None: + // Nothing to do. + break; + case DwarfLinkerAccelTableKind::Apple: emitAppleAcceleratorEntriesForUnit(Unit); break; - case AccelTableKind::Dwarf: + case DwarfLinkerAccelTableKind::Dwarf: emitDwarfAcceleratorEntriesForUnit(Unit); break; - case AccelTableKind::Pub: + case DwarfLinkerAccelTableKind::Pub: emitPubAcceleratorEntriesForUnit(Unit); break; - case AccelTableKind::Default: + case DwarfLinkerAccelTableKind::Default: llvm_unreachable("The default must be updated to a concrete value."); break; } @@ -2216,7 +2248,7 @@ uint64_t DWARFLinker::DIECloner::cloneAllCompileUnits( } void DWARFLinker::updateAccelKind(DWARFContext &Dwarf) { - if (Options.TheAccelTableKind != AccelTableKind::Default) + if (Options.TheAccelTableKind != DwarfLinkerAccelTableKind::Default) return; auto &DwarfObj = Dwarf.getDWARFObj(); @@ -2342,11 +2374,11 @@ bool DWARFLinker::link() { // would affect the decision. However, as they're built with the same // compiler and flags, it is safe to assume that they will follow the // decision made here. - if (Options.TheAccelTableKind == AccelTableKind::Default) { + if (Options.TheAccelTableKind == DwarfLinkerAccelTableKind::Default) { if (AtLeastOneDwarfAccelTable && !AtLeastOneAppleAccelTable) - Options.TheAccelTableKind = AccelTableKind::Dwarf; + Options.TheAccelTableKind = DwarfLinkerAccelTableKind::Dwarf; else - Options.TheAccelTableKind = AccelTableKind::Apple; + Options.TheAccelTableKind = DwarfLinkerAccelTableKind::Apple; } for (LinkContext &OptContext : ObjectContexts) { @@ -2362,6 +2394,10 @@ bool DWARFLinker::link() { if (!OptContext.File.Dwarf) continue; + + if (Options.VerifyInputDWARF) + verify(OptContext.File); + // Look for relocations that correspond to address map entries. // there was findvalidrelocations previously ... probably we need to gather @@ -2521,19 +2557,22 @@ bool DWARFLinker::link() { TheDwarfEmitter->emitAbbrevs(Abbreviations, MaxDwarfVersion); TheDwarfEmitter->emitStrings(OffsetsStringPool); switch (Options.TheAccelTableKind) { - case AccelTableKind::Apple: + case DwarfLinkerAccelTableKind::None: + // Nothing to do. + break; + case DwarfLinkerAccelTableKind::Apple: TheDwarfEmitter->emitAppleNames(AppleNames); TheDwarfEmitter->emitAppleNamespaces(AppleNamespaces); TheDwarfEmitter->emitAppleTypes(AppleTypes); TheDwarfEmitter->emitAppleObjc(AppleObjc); break; - case AccelTableKind::Dwarf: + case DwarfLinkerAccelTableKind::Dwarf: TheDwarfEmitter->emitDebugNames(DebugNames); break; - case AccelTableKind::Pub: + case DwarfLinkerAccelTableKind::Pub: // Already emitted by emitPubAcceleratorEntriesForUnit. break; - case AccelTableKind::Default: + case DwarfLinkerAccelTableKind::Default: llvm_unreachable("Default should have already been resolved."); break; } @@ -2631,4 +2670,15 @@ bool DWARFLinker::link() { return true; } +bool DWARFLinker::verify(const DWARFFile &File) { + assert(File.Dwarf); + + DIDumpOptions DumpOpts; + if (!File.Dwarf->verify(llvm::outs(), DumpOpts.noImplicitRecursion())) { + reportWarning("input verification failed", File); + return false; + } + return true; +} + } // namespace llvm diff --git a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp index acecb1788d10..e9e8be7fd008 100644 --- a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp @@ -90,9 +90,11 @@ void CompileUnit::fixupForwardReferences() { PatchLocation Attr; DeclContext *Ctxt; std::tie(RefDie, RefUnit, Ctxt, Attr) = Ref; - if (Ctxt && Ctxt->getCanonicalDIEOffset()) + if (Ctxt && Ctxt->hasCanonicalDIE()) { + assert(Ctxt->getCanonicalDIEOffset() && + "Canonical die offset is not set"); Attr.set(Ctxt->getCanonicalDIEOffset()); - else + } else Attr.set(RefDie->getOffset() + RefUnit->getStartOffset()); } } diff --git a/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp b/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp index 5ab2ad0780a2..dfdfc5857569 100644 --- a/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/DWARFLinker/DWARFLinkerDeclContext.h" +#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp index 99e12fce6513..55ff6b14f945 100644 --- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp +++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp @@ -18,7 +18,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCTargetOptionsCommandFlags.h" #include "llvm/MC/TargetRegistry.h" @@ -68,7 +67,7 @@ bool DwarfStreamer::init(Triple TheTriple, if (!MII) return error("no instr info info for target " + TripleName, Context), false; - MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, *MC); + MCE = TheTarget->createMCCodeEmitter(*MII, *MC); if (!MCE) return error("no code emitter for target " + TripleName, Context), false; @@ -114,10 +113,10 @@ bool DwarfStreamer::init(Triple TheTriple, return true; } -void DwarfStreamer::finish() { MS->Finish(); } +void DwarfStreamer::finish() { MS->finish(); } void DwarfStreamer::switchToDebugInfoSection(unsigned DwarfVersion) { - MS->SwitchSection(MOFI->getDwarfInfoSection()); + MS->switchSection(MOFI->getDwarfInfoSection()); MC->setDwarfVersion(DwarfVersion); } @@ -175,14 +174,14 @@ void DwarfStreamer::emitCompileUnitHeader(CompileUnit &Unit, void DwarfStreamer::emitAbbrevs( const std::vector> &Abbrevs, unsigned DwarfVersion) { - MS->SwitchSection(MOFI->getDwarfAbbrevSection()); + MS->switchSection(MOFI->getDwarfAbbrevSection()); MC->setDwarfVersion(DwarfVersion); Asm->emitDwarfAbbrevs(Abbrevs); } /// Recursively emit the DIE tree rooted at \p Die. void DwarfStreamer::emitDIE(DIE &Die) { - MS->SwitchSection(MOFI->getDwarfInfoSection()); + MS->switchSection(MOFI->getDwarfInfoSection()); Asm->emitDwarfDIE(Die); DebugInfoSectionSize += Die.getSize(); } @@ -201,7 +200,7 @@ void DwarfStreamer::emitSectionContents(StringRef SecData, StringRef SecName) { .Default(nullptr); if (Section) { - MS->SwitchSection(Section); + MS->switchSection(Section); MS->emitBytes(SecData); } @@ -221,7 +220,7 @@ void DwarfStreamer::emitPaperTrailWarningsDie(DIE &Die) { /// Emit the debug_str section stored in \p Pool. void DwarfStreamer::emitStrings(const NonRelocatableStringpool &Pool) { - Asm->OutStreamer->SwitchSection(MOFI->getDwarfStrSection()); + Asm->OutStreamer->switchSection(MOFI->getDwarfStrSection()); std::vector Entries = Pool.getEntriesForEmission(); for (auto Entry : Entries) { // Emit the string itself. @@ -233,7 +232,7 @@ void DwarfStreamer::emitStrings(const NonRelocatableStringpool &Pool) { #if 0 if (DwarfVersion >= 5) { // Emit an empty string offset section. - Asm->OutStreamer->SwitchSection(MOFI->getDwarfStrOffSection()); + Asm->OutStreamer->switchSection(MOFI->getDwarfStrOffSection()); Asm->emitDwarfUnitLength(4, "Length of String Offsets Set"); Asm->emitInt16(DwarfVersion); Asm->emitInt16(0); @@ -256,7 +255,7 @@ void DwarfStreamer::emitDebugNames( UniqueIdToCuMap[CU.ID] = Id++; } - Asm->OutStreamer->SwitchSection(MOFI->getDwarfDebugNamesSection()); + Asm->OutStreamer->switchSection(MOFI->getDwarfDebugNamesSection()); emitDWARF5AccelTable( Asm.get(), Table, CompUnits, [&UniqueIdToCuMap](const DWARF5AccelTableStaticData &Entry) { @@ -266,7 +265,7 @@ void DwarfStreamer::emitDebugNames( void DwarfStreamer::emitAppleNamespaces( AccelTable &Table) { - Asm->OutStreamer->SwitchSection(MOFI->getDwarfAccelNamespaceSection()); + Asm->OutStreamer->switchSection(MOFI->getDwarfAccelNamespaceSection()); auto *SectionBegin = Asm->createTempSymbol("namespac_begin"); Asm->OutStreamer->emitLabel(SectionBegin); emitAppleAccelTable(Asm.get(), Table, "namespac", SectionBegin); @@ -274,7 +273,7 @@ void DwarfStreamer::emitAppleNamespaces( void DwarfStreamer::emitAppleNames( AccelTable &Table) { - Asm->OutStreamer->SwitchSection(MOFI->getDwarfAccelNamesSection()); + Asm->OutStreamer->switchSection(MOFI->getDwarfAccelNamesSection()); auto *SectionBegin = Asm->createTempSymbol("names_begin"); Asm->OutStreamer->emitLabel(SectionBegin); emitAppleAccelTable(Asm.get(), Table, "names", SectionBegin); @@ -282,7 +281,7 @@ void DwarfStreamer::emitAppleNames( void DwarfStreamer::emitAppleObjc( AccelTable &Table) { - Asm->OutStreamer->SwitchSection(MOFI->getDwarfAccelObjCSection()); + Asm->OutStreamer->switchSection(MOFI->getDwarfAccelObjCSection()); auto *SectionBegin = Asm->createTempSymbol("objc_begin"); Asm->OutStreamer->emitLabel(SectionBegin); emitAppleAccelTable(Asm.get(), Table, "objc", SectionBegin); @@ -290,7 +289,7 @@ void DwarfStreamer::emitAppleObjc( void DwarfStreamer::emitAppleTypes( AccelTable &Table) { - Asm->OutStreamer->SwitchSection(MOFI->getDwarfAccelTypesSection()); + Asm->OutStreamer->switchSection(MOFI->getDwarfAccelTypesSection()); auto *SectionBegin = Asm->createTempSymbol("types_begin"); Asm->OutStreamer->emitLabel(SectionBegin); emitAppleAccelTable(Asm.get(), Table, "types", SectionBegin); @@ -300,7 +299,7 @@ void DwarfStreamer::emitAppleTypes( void DwarfStreamer::emitSwiftAST(StringRef Buffer) { MCSection *SwiftASTSection = MOFI->getDwarfSwiftASTSection(); SwiftASTSection->setAlignment(Align(32)); - MS->SwitchSection(SwiftASTSection); + MS->switchSection(SwiftASTSection); MS->emitBytes(Buffer); } @@ -312,7 +311,7 @@ void DwarfStreamer::emitSwiftReflectionSection( if (ReflectionSection == nullptr) return; ReflectionSection->setAlignment(Align(Alignment)); - MS->SwitchSection(ReflectionSection); + MS->switchSection(ReflectionSection); MS->emitBytes(Buffer); } @@ -325,7 +324,7 @@ void DwarfStreamer::emitRangesEntries( const FunctionIntervals::const_iterator &FuncRange, const std::vector &Entries, unsigned AddressSize) { - MS->SwitchSection(MC->getObjectFileInfo()->getDwarfRangesSection()); + MS->switchSection(MC->getObjectFileInfo()->getDwarfRangesSection()); // Offset each range by the right amount. int64_t PcOffset = Entries.empty() ? 0 : FuncRange.value() + UnitPcOffset; @@ -377,7 +376,7 @@ void DwarfStreamer::emitUnitRangesEntries(CompileUnit &Unit, llvm::sort(Ranges); if (!Ranges.empty()) { - MS->SwitchSection(MC->getObjectFileInfo()->getDwarfARangesSection()); + MS->switchSection(MC->getObjectFileInfo()->getDwarfARangesSection()); MCSymbol *BeginLabel = Asm->createTempSymbol("Barange"); MCSymbol *EndLabel = Asm->createTempSymbol("Earange"); @@ -419,7 +418,7 @@ void DwarfStreamer::emitUnitRangesEntries(CompileUnit &Unit, if (!DoDebugRanges) return; - MS->SwitchSection(MC->getObjectFileInfo()->getDwarfRangesSection()); + MS->switchSection(MC->getObjectFileInfo()->getDwarfRangesSection()); // Offset each range by the right amount. int64_t PcOffset = -Unit.getLowPc(); // Emit coalesced ranges. @@ -447,7 +446,7 @@ void DwarfStreamer::emitLocationsForUnit( if (Attributes.empty()) return; - MS->SwitchSection(MC->getObjectFileInfo()->getDwarfLocSection()); + MS->switchSection(MC->getObjectFileInfo()->getDwarfLocSection()); unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize(); uint64_t BaseAddressMarker = (AddressSize == 8) @@ -509,7 +508,7 @@ void DwarfStreamer::emitLineTableForUnit(MCDwarfLineTableParams Params, std::vector &Rows, unsigned PointerSize) { // Switch to the section where the table will be emitted into. - MS->SwitchSection(MC->getObjectFileInfo()->getDwarfLineSection()); + MS->switchSection(MC->getObjectFileInfo()->getDwarfLineSection()); MCSymbol *LineStartSym = MC->createTempSymbol(); MCSymbol *LineEndSym = MC->createTempSymbol(); @@ -650,7 +649,7 @@ void DwarfStreamer::emitLineTableForUnit(MCDwarfLineTableParams Params, /// Copy the debug_line over to the updated binary while unobfuscating the file /// names and directories. void DwarfStreamer::translateLineTable(DataExtractor Data, uint64_t Offset) { - MS->SwitchSection(MC->getObjectFileInfo()->getDwarfLineSection()); + MS->switchSection(MC->getObjectFileInfo()->getDwarfLineSection()); StringRef Contents = Data.getData(); // We have to deconstruct the line table header, because it contains to @@ -738,7 +737,7 @@ void DwarfStreamer::emitPubSectionForUnit( return; // Start the dwarf pubnames section. - Asm->OutStreamer->SwitchSection(Sec); + Asm->OutStreamer->switchSection(Sec); MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + SecName + "_begin"); MCSymbol *EndLabel = Asm->createTempSymbol("pub" + SecName + "_end"); @@ -785,7 +784,7 @@ void DwarfStreamer::emitPubTypesForUnit(const CompileUnit &Unit) { /// Emit a CIE into the debug_frame section. void DwarfStreamer::emitCIE(StringRef CIEBytes) { - MS->SwitchSection(MC->getObjectFileInfo()->getDwarfFrameSection()); + MS->switchSection(MC->getObjectFileInfo()->getDwarfFrameSection()); MS->emitBytes(CIEBytes); FrameSectionSize += CIEBytes.size(); @@ -796,7 +795,7 @@ void DwarfStreamer::emitCIE(StringRef CIEBytes) { /// which will be replaced with the parameter values. void DwarfStreamer::emitFDE(uint32_t CIEOffset, uint32_t AddrSize, uint32_t Address, StringRef FDEBytes) { - MS->SwitchSection(MC->getObjectFileInfo()->getDwarfFrameSection()); + MS->switchSection(MC->getObjectFileInfo()->getDwarfFrameSection()); MS->emitIntValue(FDEBytes.size() + 4 + AddrSize, 4); MS->emitIntValue(CIEOffset, 4); diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp index f6538c0549d0..34615a73e328 100644 --- a/llvm/lib/DWP/DWP.cpp +++ b/llvm/lib/DWP/DWP.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCTargetOptionsCommandFlags.h" #include "llvm/Object/Decompressor.h" +#include "llvm/Support/MemoryBuffer.h" using namespace llvm; using namespace llvm::object; @@ -181,7 +182,7 @@ addAllTypesFromDWP(MCStreamer &Out, const DWARFUnitIndex &TUIndex, MCSection *OutputTypes, StringRef Types, const UnitIndexEntry &TUEntry, uint32_t &TypesOffset, unsigned TypesContributionIndex) { - Out.SwitchSection(OutputTypes); + Out.switchSection(OutputTypes); for (const DWARFUnitIndex::Entry &E : TUIndex.getRows()) { auto *I = E.getContributions(); if (!I) @@ -215,7 +216,7 @@ static void addAllTypesFromTypesSection( MCSection *OutputTypes, const std::vector &TypesSections, const UnitIndexEntry &CUEntry, uint32_t &TypesOffset) { for (StringRef Types : TypesSections) { - Out.SwitchSection(OutputTypes); + Out.switchSection(OutputTypes); uint64_t Offset = 0; DataExtractor Data(Types, true, 0); while (Data.isValidOffset(Offset)) { @@ -373,7 +374,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, Data = DataExtractor(CurStrOffsetSection, true, 0); - Out.SwitchSection(StrOffsetSection); + Out.switchSection(StrOffsetSection); uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version); uint64_t Offset = 0; @@ -427,7 +428,7 @@ void writeIndex(MCStreamer &Out, MCSection *Section, ++I; } - Out.SwitchSection(Section); + Out.switchSection(Section); Out.emitIntValue(IndexVersion, 4); // Version Out.emitIntValue(Columns, 4); // Columns Out.emitIntValue(IndexEntries.size(), 4); // Num Units @@ -526,7 +527,7 @@ Error handleSection( else if (OutSection == InfoSection) CurInfoSection.push_back(Contents); else { - Out.SwitchSection(OutSection); + Out.switchSection(OutSection); Out.emitBytes(Contents); } return Error::success(); @@ -633,7 +634,7 @@ Error write(MCStreamer &Out, ArrayRef Inputs) { ContributionOffsets[getContributionIndex(DW_SECT_INFO, IndexVersion)]; if (CurCUIndexSection.empty()) { bool FoundCUUnit = false; - Out.SwitchSection(InfoSection); + Out.switchSection(InfoSection); for (StringRef Info : CurInfoSection) { uint64_t UnitOffset = 0; while (Info.size() > UnitOffset) { @@ -668,7 +669,7 @@ Error write(MCStreamer &Out, ArrayRef Inputs) { FoundCUUnit = true; } else if (Header.UnitType == dwarf::DW_UT_split_type) { auto P = TypeIndexEntries.insert( - std::make_pair(Header.Signature.getValue(), Entry)); + std::make_pair(*Header.Signature, Entry)); if (!P.second) continue; } @@ -703,7 +704,7 @@ Error write(MCStreamer &Out, ArrayRef Inputs) { utostr(CUIndex.getVersion()) + " and expecting " + utostr(IndexVersion)); - Out.SwitchSection(InfoSection); + Out.switchSection(InfoSection); for (const DWARFUnitIndex::Entry &E : CUIndex.getRows()) { auto *I = E.getContributions(); if (!I) diff --git a/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp index 4d8b15530b9e..3ab7f722eaee 100644 --- a/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp +++ b/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp @@ -8,18 +8,11 @@ #include "llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h" -#include "llvm/DebugInfo/CodeView/RecordSerialization.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/BinaryByteStream.h" -#include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/Error.h" -#include +#include "llvm/Support/ErrorHandling.h" #include #include #include diff --git a/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp b/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp index 48b9b0496ffe..2154aa2b8d00 100644 --- a/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp +++ b/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp @@ -8,8 +8,12 @@ #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" -#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h" #include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; using namespace llvm::codeview; @@ -80,3 +84,72 @@ Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols, } return Error::success(); } + +Error CVSymbolVisitor::visitSymbolStreamFiltered(const CVSymbolArray &Symbols, + const FilterOptions &Filter) { + if (!Filter.SymbolOffset) + return visitSymbolStream(Symbols); + uint32_t SymbolOffset = *Filter.SymbolOffset; + uint32_t ParentRecurseDepth = Filter.ParentRecursiveDepth.value_or(0); + uint32_t ChildrenRecurseDepth = Filter.ChildRecursiveDepth.value_or(0); + if (!Symbols.isOffsetValid(SymbolOffset)) + return createStringError(inconvertibleErrorCode(), "Invalid symbol offset"); + CVSymbol Sym = *Symbols.at(SymbolOffset); + uint32_t SymEndOffset = + symbolOpensScope(Sym.kind()) ? getScopeEndOffset(Sym) : 0; + + std::vector ParentOffsets; + std::vector ParentEndOffsets; + uint32_t ChildrenDepth = 0; + for (auto Begin = Symbols.begin(), End = Symbols.end(); Begin != End; + ++Begin) { + uint32_t BeginOffset = Begin.offset(); + CVSymbol BeginSym = *Begin; + if (BeginOffset < SymbolOffset) { + if (symbolOpensScope(Begin->kind())) { + uint32_t EndOffset = getScopeEndOffset(BeginSym); + if (SymbolOffset < EndOffset) { + ParentOffsets.push_back(BeginOffset); + ParentEndOffsets.push_back(EndOffset); + } + } + } else if (BeginOffset == SymbolOffset) { + // Found symbol at offset. Visit its parent up to ParentRecurseDepth. + if (ParentRecurseDepth >= ParentOffsets.size()) + ParentRecurseDepth = ParentOffsets.size(); + uint32_t StartIndex = ParentOffsets.size() - ParentRecurseDepth; + while (StartIndex < ParentOffsets.size()) { + if (!Symbols.isOffsetValid(ParentOffsets[StartIndex])) + break; + CVSymbol Parent = *Symbols.at(ParentOffsets[StartIndex]); + if (auto EC = visitSymbolRecord(Parent, ParentOffsets[StartIndex])) + return EC; + ++StartIndex; + } + if (auto EC = visitSymbolRecord(Sym, SymbolOffset)) + return EC; + } else if (BeginOffset <= SymEndOffset) { + if (ChildrenRecurseDepth) { + // Visit children. + if (symbolEndsScope(Begin->kind())) + --ChildrenDepth; + if (ChildrenDepth < ChildrenRecurseDepth || + BeginOffset == SymEndOffset) { + if (auto EC = visitSymbolRecord(BeginSym, BeginOffset)) + return EC; + } + if (symbolOpensScope(Begin->kind())) + ++ChildrenDepth; + } + } else { + // Visit parents' ends. + if (ParentRecurseDepth && BeginOffset == ParentEndOffsets.back()) { + if (auto EC = visitSymbolRecord(BeginSym, BeginOffset)) + return EC; + ParentEndOffsets.pop_back(); + --ParentRecurseDepth; + } + } + } + return Error::success(); +} diff --git a/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp index dd6f75f97a4a..5da300f710d5 100644 --- a/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp +++ b/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp @@ -8,11 +8,12 @@ #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" -#include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" -#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" +#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" diff --git a/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp index 1af59ff679dd..a66f9af98835 100644 --- a/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp +++ b/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp @@ -8,7 +8,9 @@ #include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h" #include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/GUID.h" #include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" @@ -68,10 +70,10 @@ uint32_t CodeViewRecordIO::maxFieldLength() const { Optional Min = Limits.front().bytesRemaining(Offset); for (auto X : makeArrayRef(Limits).drop_front()) { Optional ThisMin = X.bytesRemaining(Offset); - if (ThisMin.hasValue()) - Min = (Min.hasValue()) ? std::min(*Min, *ThisMin) : *ThisMin; + if (ThisMin) + Min = Min ? std::min(*Min, *ThisMin) : *ThisMin; } - assert(Min.hasValue() && "Every field must have a maximum length!"); + assert(Min && "Every field must have a maximum length!"); return *Min; } @@ -279,17 +281,24 @@ void CodeViewRecordIO::emitEncodedSignedInteger(const int64_t &Value, // FIXME: There are no test cases covering this function. // This may be because we always consider enumerators to be unsigned. // See FIXME at CodeViewDebug.cpp : CodeViewDebug::lowerTypeEnum. - if (Value >= std::numeric_limits::min()) { + if (Value < LF_NUMERIC && Value >= 0) { + emitComment(Comment); + Streamer->emitIntValue(Value, 2); + incrStreamedLen(2); + } else if (Value >= std::numeric_limits::min() && + Value <= std::numeric_limits::max()) { Streamer->emitIntValue(LF_CHAR, 2); emitComment(Comment); Streamer->emitIntValue(Value, 1); incrStreamedLen(3); - } else if (Value >= std::numeric_limits::min()) { + } else if (Value >= std::numeric_limits::min() && + Value <= std::numeric_limits::max()) { Streamer->emitIntValue(LF_SHORT, 2); emitComment(Comment); Streamer->emitIntValue(Value, 2); incrStreamedLen(4); - } else if (Value >= std::numeric_limits::min()) { + } else if (Value >= std::numeric_limits::min() && + Value <= std::numeric_limits::max()) { Streamer->emitIntValue(LF_LONG, 2); emitComment(Comment); Streamer->emitIntValue(Value, 4); @@ -328,17 +337,23 @@ void CodeViewRecordIO::emitEncodedUnsignedInteger(const uint64_t &Value, } Error CodeViewRecordIO::writeEncodedSignedInteger(const int64_t &Value) { - if (Value >= std::numeric_limits::min()) { + if (Value < LF_NUMERIC && Value >= 0) { + if (auto EC = Writer->writeInteger(Value)) + return EC; + } else if (Value >= std::numeric_limits::min() && + Value <= std::numeric_limits::max()) { if (auto EC = Writer->writeInteger(LF_CHAR)) return EC; if (auto EC = Writer->writeInteger(Value)) return EC; - } else if (Value >= std::numeric_limits::min()) { + } else if (Value >= std::numeric_limits::min() && + Value <= std::numeric_limits::max()) { if (auto EC = Writer->writeInteger(LF_SHORT)) return EC; if (auto EC = Writer->writeInteger(Value)) return EC; - } else if (Value >= std::numeric_limits::min()) { + } else if (Value >= std::numeric_limits::min() && + Value <= std::numeric_limits::max()) { if (auto EC = Writer->writeInteger(LF_LONG)) return EC; if (auto EC = Writer->writeInteger(Value)) diff --git a/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp b/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp index c7b1c65f2f9a..a3dbb3954d5c 100644 --- a/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp +++ b/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp @@ -46,10 +46,10 @@ static inline TypeLeafKind getTypeLeafKind(ContinuationRecordKind CK) { ContinuationRecordBuilder::ContinuationRecordBuilder() : SegmentWriter(Buffer), Mapping(SegmentWriter) {} -ContinuationRecordBuilder::~ContinuationRecordBuilder() {} +ContinuationRecordBuilder::~ContinuationRecordBuilder() = default; void ContinuationRecordBuilder::begin(ContinuationRecordKind RecordKind) { - assert(!Kind.hasValue()); + assert(!Kind); Kind = RecordKind; Buffer.clear(); SegmentWriter.setOffset(0); @@ -76,7 +76,7 @@ void ContinuationRecordBuilder::begin(ContinuationRecordKind RecordKind) { template void ContinuationRecordBuilder::writeMemberType(RecordType &Record) { - assert(Kind.hasValue()); + assert(Kind); uint32_t OriginalOffset = SegmentWriter.getOffset(); CVMemberRecord CVMR; @@ -158,7 +158,7 @@ CVType ContinuationRecordBuilder::createSegmentRecord( RecordPrefix *Prefix = reinterpret_cast(Data.data()); Prefix->RecordLen = Data.size() - sizeof(RecordPrefix::RecordLen); - if (RefersTo.hasValue()) { + if (RefersTo) { auto Continuation = Data.take_back(ContinuationLength); ContinuationRecord *CR = reinterpret_cast(Continuation.data()); diff --git a/llvm/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp b/llvm/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp index b23410409f88..b48f57955db1 100644 --- a/llvm/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp +++ b/llvm/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp @@ -8,6 +8,7 @@ #include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Error.h" #include diff --git a/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp b/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp index 9bc69abea102..c083c61d1595 100644 --- a/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp +++ b/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp @@ -8,6 +8,8 @@ #include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" using namespace llvm; using namespace llvm::codeview; diff --git a/llvm/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp b/llvm/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp index 48ec7e4ecdd6..665511c592f9 100644 --- a/llvm/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp +++ b/llvm/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp @@ -10,6 +10,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Endian.h" diff --git a/llvm/lib/DebugInfo/CodeView/DebugSubsection.cpp b/llvm/lib/DebugInfo/CodeView/DebugSubsection.cpp index 3f93463fe6d6..01581181dfe0 100644 --- a/llvm/lib/DebugInfo/CodeView/DebugSubsection.cpp +++ b/llvm/lib/DebugInfo/CodeView/DebugSubsection.cpp @@ -10,6 +10,6 @@ using namespace llvm::codeview; -DebugSubsectionRef::~DebugSubsectionRef() {} +DebugSubsectionRef::~DebugSubsectionRef() = default; -DebugSubsection::~DebugSubsection() {} +DebugSubsection::~DebugSubsection() = default; diff --git a/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp index 3c8a30101450..adc6cabd7da1 100644 --- a/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp +++ b/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp @@ -13,7 +13,6 @@ #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Error.h" #include "llvm/Support/MathExtras.h" -#include #include #include diff --git a/llvm/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp b/llvm/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp index 7968b6a2d757..50f6fb93dec1 100644 --- a/llvm/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp +++ b/llvm/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp @@ -8,6 +8,7 @@ #include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" #include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h" #include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h" @@ -20,7 +21,7 @@ #include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h" #include "llvm/DebugInfo/CodeView/DebugUnknownSubsection.h" #include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/SwapByteOrder.h" using namespace llvm; using namespace llvm::codeview; diff --git a/llvm/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp b/llvm/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp index c833103663e4..2b20b3e95db6 100644 --- a/llvm/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp +++ b/llvm/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h" +#include "llvm/Support/BinaryStreamWriter.h" using namespace llvm; using namespace llvm::codeview; diff --git a/llvm/lib/DebugInfo/CodeView/Formatters.cpp b/llvm/lib/DebugInfo/CodeView/Formatters.cpp index f1f51bcb39cc..73a589212227 100644 --- a/llvm/lib/DebugInfo/CodeView/Formatters.cpp +++ b/llvm/lib/DebugInfo/CodeView/Formatters.cpp @@ -9,8 +9,10 @@ #include "llvm/DebugInfo/CodeView/Formatters.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/DebugInfo/CodeView/GUID.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include #include using namespace llvm; diff --git a/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp index 7cd9ca7498f5..142af382efba 100644 --- a/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp +++ b/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp @@ -8,18 +8,12 @@ #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/None.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h" -#include "llvm/DebugInfo/CodeView/RecordSerialization.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/BinaryByteStream.h" -#include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/Error.h" -#include +#include "llvm/Support/ErrorHandling.h" #include #include #include diff --git a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp index c0fc3e0ef65a..1d49a1ed4712 100644 --- a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp +++ b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp @@ -9,11 +9,12 @@ #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/DebugInfo/CodeView/RecordName.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" diff --git a/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp index 13ce3ae82c26..62d228599eae 100644 --- a/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp +++ b/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp @@ -8,18 +8,13 @@ #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/None.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h" -#include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/TypeHashing.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/BinaryByteStream.h" -#include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/Error.h" -#include +#include "llvm/Support/ErrorHandling.h" #include #include #include diff --git a/llvm/lib/DebugInfo/CodeView/RecordName.cpp b/llvm/lib/DebugInfo/CodeView/RecordName.cpp index 1ca899789bef..5fbbc4a5d497 100644 --- a/llvm/lib/DebugInfo/CodeView/RecordName.cpp +++ b/llvm/lib/DebugInfo/CodeView/RecordName.cpp @@ -10,9 +10,13 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h" +#include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" #include "llvm/Support/FormatVariadic.h" diff --git a/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp b/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp index 63ce302a4e09..d76905df8681 100644 --- a/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp +++ b/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp @@ -13,9 +13,9 @@ #include "llvm/DebugInfo/CodeView/RecordSerialization.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/Support/BinaryByteStream.h" using namespace llvm; diff --git a/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp b/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp index d963e34628db..cf0c877fdbf8 100644 --- a/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp +++ b/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp @@ -7,7 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" #include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" #include "llvm/Support/BinaryStreamWriter.h" @@ -29,7 +30,7 @@ static void addPadding(BinaryStreamWriter &Writer) { SimpleTypeSerializer::SimpleTypeSerializer() : ScratchBuffer(MaxRecordLength) {} -SimpleTypeSerializer::~SimpleTypeSerializer() {} +SimpleTypeSerializer::~SimpleTypeSerializer() = default; template ArrayRef SimpleTypeSerializer::serialize(T &Record) { diff --git a/llvm/lib/DebugInfo/CodeView/StringsAndChecksums.cpp b/llvm/lib/DebugInfo/CodeView/StringsAndChecksums.cpp index 9e204eec8604..81aa44fb2086 100644 --- a/llvm/lib/DebugInfo/CodeView/StringsAndChecksums.cpp +++ b/llvm/lib/DebugInfo/CodeView/StringsAndChecksums.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" #include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" diff --git a/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp b/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp index 45b63983beb4..cfb12dbae845 100644 --- a/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/SymbolDumper.h" -#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" #include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" #include "llvm/DebugInfo/CodeView/EnumTables.h" @@ -20,8 +20,6 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ScopedPrinter.h" -#include - using namespace llvm; using namespace llvm::codeview; diff --git a/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp b/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp index 2562c633bb99..d8b350bf26ba 100644 --- a/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp +++ b/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp @@ -8,7 +8,7 @@ #include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" using namespace llvm; diff --git a/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp index de9bb42b1798..5fb8d497b957 100644 --- a/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp +++ b/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp @@ -8,9 +8,9 @@ #include "llvm/DebugInfo/CodeView/SymbolSerializer.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include #include #include @@ -24,7 +24,7 @@ SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator, Mapping(Writer, Container) {} Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) { - assert(!CurrentSymbol.hasValue() && "Already in a symbol mapping!"); + assert(!CurrentSymbol && "Already in a symbol mapping!"); Writer.setOffset(0); @@ -39,7 +39,7 @@ Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) { } Error SymbolSerializer::visitSymbolEnd(CVSymbol &Record) { - assert(CurrentSymbol.hasValue() && "Not in a symbol mapping!"); + assert(CurrentSymbol && "Not in a symbol mapping!"); if (auto EC = Mapping.visitSymbolEnd(Record)) return EC; diff --git a/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp index d5fea5ee5e29..5d27c9f29984 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp @@ -8,14 +8,15 @@ #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" -#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" -#include "llvm/DebugInfo/CodeView/Formatters.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::codeview; diff --git a/llvm/lib/DebugInfo/CodeView/TypeHashing.cpp b/llvm/lib/DebugInfo/CodeView/TypeHashing.cpp index 2dbc11a84f0b..fc85d8186eaa 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeHashing.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeHashing.cpp @@ -76,5 +76,6 @@ GloballyHashedType::hashType(ArrayRef RecordData, auto TrailingBytes = RecordData.drop_front(Off); S.update(TrailingBytes); - return {S.final().take_back(8)}; + std::array Hash = S.final(); + return {ArrayRef(Hash).take_back(8)}; } diff --git a/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp b/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp index 604d342448d3..3aead9d50041 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp @@ -33,6 +33,7 @@ static const SimpleTypeEntry SimpleTypeNames[] = { {"wchar_t*", SimpleTypeKind::WideCharacter}, {"char16_t*", SimpleTypeKind::Character16}, {"char32_t*", SimpleTypeKind::Character32}, + {"char8_t*", SimpleTypeKind::Character8}, {"__int8*", SimpleTypeKind::SByte}, {"unsigned __int8*", SimpleTypeKind::Byte}, {"short*", SimpleTypeKind::Int16Short}, diff --git a/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp index d272999bdab8..27f63b9edcd0 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp @@ -7,10 +7,28 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Twine.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h" #include "llvm/DebugInfo/CodeView/EnumTables.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/ScopedPrinter.h" + +#include +#include +#include +#include +#include +#include using namespace llvm; using namespace llvm::codeview; @@ -210,8 +228,8 @@ static Error mapNameAndUniqueName(CodeViewRecordIO &IO, StringRef &Name, } Error TypeRecordMapping::visitTypeBegin(CVType &CVR) { - assert(!TypeKind.hasValue() && "Already in a type mapping!"); - assert(!MemberKind.hasValue() && "Already in a member mapping!"); + assert(!TypeKind && "Already in a type mapping!"); + assert(!MemberKind && "Already in a member mapping!"); // FieldList and MethodList records can be any length because they can be // split with continuation records. All other record types cannot be @@ -242,8 +260,8 @@ Error TypeRecordMapping::visitTypeBegin(CVType &CVR, TypeIndex Index) { } Error TypeRecordMapping::visitTypeEnd(CVType &Record) { - assert(TypeKind.hasValue() && "Not in a type mapping!"); - assert(!MemberKind.hasValue() && "Still in a member mapping!"); + assert(TypeKind && "Not in a type mapping!"); + assert(!MemberKind && "Still in a member mapping!"); error(IO.endRecord()); @@ -252,8 +270,8 @@ Error TypeRecordMapping::visitTypeEnd(CVType &Record) { } Error TypeRecordMapping::visitMemberBegin(CVMemberRecord &Record) { - assert(TypeKind.hasValue() && "Not in a type mapping!"); - assert(!MemberKind.hasValue() && "Already in a member mapping!"); + assert(TypeKind && "Not in a type mapping!"); + assert(!MemberKind && "Already in a member mapping!"); // The largest possible subrecord is one in which there is a record prefix, // followed by the subrecord, followed by a continuation, and that entire @@ -278,8 +296,8 @@ Error TypeRecordMapping::visitMemberBegin(CVMemberRecord &Record) { } Error TypeRecordMapping::visitMemberEnd(CVMemberRecord &Record) { - assert(TypeKind.hasValue() && "Not in a type mapping!"); - assert(MemberKind.hasValue() && "Not in a member mapping!"); + assert(TypeKind && "Not in a type mapping!"); + assert(MemberKind && "Not in a member mapping!"); if (IO.isReading()) { if (auto EC = IO.skipPadding()) diff --git a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp index 587a68142a4a..7ddfb7ab2f8d 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" @@ -487,7 +487,7 @@ Expected TypeStreamMerger::shouldRemapType(const CVType &Type) { if (auto EC = TypeDeserializer::deserializeAs(const_cast(Type), EP)) return joinErrors(std::move(EC), errorCorruptRecord()); - if (PCHSignature.hasValue()) + if (PCHSignature) return errorCorruptRecord(); PCHSignature.emplace(EP.getSignature()); return false; diff --git a/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp index e517e8846d69..910a32730e39 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp @@ -8,9 +8,10 @@ #include "llvm/DebugInfo/CodeView/TypeTableCollection.h" -#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/RecordName.h" -#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; using namespace llvm::codeview; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp index 1be5a752453a..e2ea5910932d 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp @@ -11,10 +11,10 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index c77d4d4d989c..5727b3bdb05c 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -10,7 +10,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DJB.h" #include "llvm/Support/Errc.h" diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp index 25d2e852a7fe..2d6c145f9237 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFAddressRange.h" +#include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp index d68ecd4f8a42..6461f2ac031d 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp @@ -7,8 +7,9 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" +#include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" + #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index ef50ad53650a..c785026f8461 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -15,6 +16,7 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h" #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h" @@ -29,7 +31,11 @@ #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h" +#include "llvm/DebugInfo/DWARF/DWARFListTable.h" +#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" +#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" +#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" #include "llvm/DebugInfo/DWARF/DWARFVerifier.h" #include "llvm/MC/MCRegisterInfo.h" @@ -115,7 +121,7 @@ collectContributionData(DWARFContext::unit_iterator_range Units) { const Optional &R) { if (L && R) return L->Base < R->Base; - return R.hasValue(); + return R.has_value(); }); // Uniquify contributions, as it is possible that units (specifically @@ -383,7 +389,7 @@ void DWARFContext::dump( OS << '\n' << Name << " contents:\n"; if (auto DumpOffset = DumpOffsets[DIDT_ID_DebugInfo]) for (const auto &U : Units) - U->getDIEForOffset(DumpOffset.getValue()) + U->getDIEForOffset(*DumpOffset) .dump(OS, 0, DumpOpts.noImplicitRecursion()); else for (const auto &U : Units) @@ -763,6 +769,10 @@ bool DWARFContext::verify(raw_ostream &OS, DIDumpOptions DumpOpts) { DWARFVerifier verifier(OS, *this, DumpOpts); Success &= verifier.handleDebugAbbrev(); + if (DumpOpts.DumpType & DIDT_DebugCUIndex) + Success &= verifier.handleDebugCUIndex(); + if (DumpOpts.DumpType & DIDT_DebugTUIndex) + Success &= verifier.handleDebugTUIndex(); if (DumpOpts.DumpType & DIDT_DebugInfo) Success &= verifier.handleDebugInfo(); if (DumpOpts.DumpType & DIDT_DebugLine) @@ -993,6 +1003,22 @@ Expected DWARFContext::getLineTableForUnit( RecoverableErrorHandler); } +void DWARFContext::clearLineTableForUnit(DWARFUnit *U) { + if (!Line) + return; + + auto UnitDIE = U->getUnitDIE(); + if (!UnitDIE) + return; + + auto Offset = toSectionOffset(UnitDIE.find(DW_AT_stmt_list)); + if (!Offset) + return; + + uint64_t stmtOffset = *Offset + U->getLineTableOffset(); + Line->clearLineTable(stmtOffset); +} + void DWARFContext::parseNormalUnits() { if (!NormalUnits.empty()) return; @@ -1027,7 +1053,25 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { // First, get the offset of the compile unit. uint64_t CUOffset = getDebugAranges()->findAddress(Address); // Retrieve the compile unit. - return getCompileUnitForOffset(CUOffset); + if (DWARFCompileUnit *OffsetCU = getCompileUnitForOffset(CUOffset)) + return OffsetCU; + + // Global variables are often not found by the above search, for one of two + // reasons: + // 1. .debug_aranges may not include global variables. On clang, it seems we + // put the globals in the aranges, but this isn't true for gcc. + // 2. Even if the global variable is in a .debug_arange, global variables + // may not be captured in the [start, end) addresses described by the + // parent compile unit. + // + // So, we walk the CU's and their child DI's manually, looking for the + // specific global variable. + for (std::unique_ptr &CU : compile_units()) { + if (DWARFDie Die = CU->getVariableForAddress(Address)) { + return static_cast(CU.get()); + } + } + return nullptr; } DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) { @@ -1097,64 +1141,6 @@ static bool getFunctionNameAndStartLineForAddress( return FoundResult; } -static Optional getTypeSize(DWARFDie Type, uint64_t PointerSize) { - if (auto SizeAttr = Type.find(DW_AT_byte_size)) - if (Optional Size = SizeAttr->getAsUnsignedConstant()) - return Size; - - switch (Type.getTag()) { - case DW_TAG_pointer_type: - case DW_TAG_reference_type: - case DW_TAG_rvalue_reference_type: - return PointerSize; - case DW_TAG_ptr_to_member_type: { - if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type)) - if (BaseType.getTag() == DW_TAG_subroutine_type) - return 2 * PointerSize; - return PointerSize; - } - case DW_TAG_const_type: - case DW_TAG_immutable_type: - case DW_TAG_volatile_type: - case DW_TAG_restrict_type: - case DW_TAG_typedef: { - if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type)) - return getTypeSize(BaseType, PointerSize); - break; - } - case DW_TAG_array_type: { - DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type); - if (!BaseType) - return Optional(); - Optional BaseSize = getTypeSize(BaseType, PointerSize); - if (!BaseSize) - return Optional(); - uint64_t Size = *BaseSize; - for (DWARFDie Child : Type) { - if (Child.getTag() != DW_TAG_subrange_type) - continue; - - if (auto ElemCountAttr = Child.find(DW_AT_count)) - if (Optional ElemCount = - ElemCountAttr->getAsUnsignedConstant()) - Size *= *ElemCount; - if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound)) - if (Optional UpperBound = - UpperBoundAttr->getAsSignedConstant()) { - int64_t LowerBound = 0; - if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound)) - LowerBound = LowerBoundAttr->getAsSignedConstant().getValueOr(0); - Size *= *UpperBound - LowerBound + 1; - } - } - return Size; - } - default: - break; - } - return Optional(); -} - static Optional getExpressionFrameOffset(ArrayRef Expr, Optional FrameBaseReg) { @@ -1215,7 +1201,7 @@ void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, if (Optional Name = dwarf::toString(*NameAttr)) Local.Name = *Name; if (auto Type = Die.getAttributeValueAsReferencedDie(DW_AT_type)) - Local.Size = getTypeSize(Type, getCUAddrSize()); + Local.Size = Type.getTypeSize(getCUAddrSize()); if (auto DeclFileAttr = Die.find(DW_AT_decl_file)) { if (const auto *LT = CU->getContext().getLineTableForUnit(CU)) LT->getFileNameByIndex( @@ -1256,7 +1242,6 @@ DWARFContext::getLocalsForAddress(object::SectionedAddress Address) { DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, DILineInfoSpecifier Spec) { DILineInfo Result; - DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); if (!CU) return Result; @@ -1271,6 +1256,22 @@ DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, Spec.FLIKind, Result); } } + + return Result; +} + +DILineInfo +DWARFContext::getLineInfoForDataAddress(object::SectionedAddress Address) { + DILineInfo Result; + DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + if (!CU) + return Result; + + if (DWARFDie Die = CU->getVariableForAddress(Address.Address)) { + Result.FileName = Die.getDeclFile(FileLineInfoKind::AbsoluteFilePath); + Result.Line = Die.getDeclLine(); + } + return Result; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp index da6f6ad903f4..b18b64382b41 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp @@ -7,7 +7,9 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFObject.h" +#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" +#include "llvm/Support/Errc.h" using namespace llvm; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp index 5b1c62e6a259..81fac4763ec1 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Support/Errc.h" using namespace llvm; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp index 1a1b8ea0976f..49ee27db6d54 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp @@ -7,10 +7,12 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h" -#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" +#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h" -#include "llvm/Support/DataExtractor.h" +#include "llvm/DebugInfo/DWARF/DWARFObject.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include #include #include @@ -20,15 +22,15 @@ using namespace llvm; void DWARFDebugAranges::extract( DWARFDataExtractor DebugArangesData, - function_ref RecoverableErrorHandler) { + function_ref RecoverableErrorHandler, + function_ref WarningHandler) { if (!DebugArangesData.isValidOffset(0)) return; uint64_t Offset = 0; DWARFDebugArangeSet Set; while (DebugArangesData.isValidOffset(Offset)) { - if (Error E = - Set.extract(DebugArangesData, &Offset, RecoverableErrorHandler)) { + if (Error E = Set.extract(DebugArangesData, &Offset, WarningHandler)) { RecoverableErrorHandler(std::move(E)); return; } @@ -50,7 +52,8 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) { // Extract aranges from .debug_aranges section. DWARFDataExtractor ArangesData(CTX->getDWARFObj().getArangesSection(), CTX->isLittleEndian(), 0); - extract(ArangesData, CTX->getRecoverableErrorHandler()); + extract(ArangesData, CTX->getRecoverableErrorHandler(), + CTX->getWarningHandler()); // Generate aranges from DIEs: even if .debug_aranges section is present, // it may describe only a small subset of compilation units, so we need to diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index 92a461dbd941..cf9057c99dbd 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -12,8 +12,9 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Errc.h" @@ -1100,8 +1101,8 @@ Error DWARFDebugFrame::parse(DWARFDataExtractor Data) { default: return createStringError( errc::invalid_argument, - "unknown augmentation character in entry at 0x%" PRIx64, - StartOffset); + "unknown augmentation character %c in entry at 0x%" PRIx64, + AugmentationString[i], StartOffset); case 'L': LSDAPointerEncoding = Data.getU8(&Offset); break; @@ -1137,10 +1138,14 @@ Error DWARFDebugFrame::parse(DWARFDataExtractor Data) { // B-Key is used for signing functions associated with this // augmentation string break; + // This stack frame contains MTE tagged data, so needs to be + // untagged on unwind. + case 'G': + break; } } - if (AugmentationLength.hasValue()) { + if (AugmentationLength) { if (Offset != EndAugmentationOffset) return createStringError(errc::invalid_argument, "parsing augmentation data at 0x%" PRIx64 diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp index 385bde51e2e7..7dbeebc2770f 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp @@ -9,10 +9,11 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" -#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Errc.h" #include #include diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index f36d3f87257a..2e0780e249aa 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -12,12 +12,12 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" +#include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" -#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -29,6 +29,10 @@ using namespace llvm; using namespace dwarf; +namespace llvm { +class DwarfContext; +} + using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind; namespace { @@ -337,7 +341,7 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, errc::invalid_argument, "failed to parse file entry because the MD5 hash is invalid"); std::uninitialized_copy_n(Value.getAsBlock().getValue().begin(), 16, - FileEntry.Checksum.Bytes.begin()); + FileEntry.Checksum.begin()); break; default: break; @@ -597,6 +601,10 @@ Expected DWARFDebugLine::getOrParseLineTable( return LT; } +void DWARFDebugLine::clearLineTable(uint64_t Offset) { + LineTableMap.erase(Offset); +} + static StringRef getOpcodeName(uint8_t Opcode, uint8_t OpcodeBase) { assert(Opcode != 0); if (Opcode < OpcodeBase) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index f39c7871d603..b68af4cfafef 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -9,13 +9,13 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h" #include "llvm/DebugInfo/DWARF/DWARFExpression.h" -#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Format.h" -#include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -24,6 +24,10 @@ using namespace llvm; using object::SectionedAddress; +namespace llvm { +class DWARFObject; +} + namespace { class DWARFLocationInterpreter { Optional Base; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp index 7a81d7ff064b..80daea64814a 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp @@ -7,9 +7,12 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" +#include "llvm/DebugInfo/DWARF/DWARFDie.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include @@ -112,7 +115,7 @@ Error DWARFDebugMacro::parseImpl( if (IsMacro && Data.isValidOffset(Offset)) { // Keep a mapping from Macro contribution to CUs, this will // be needed while retrieving macro from DW_MACRO_define_strx form. - for (const auto &U : Units.getValue()) + for (const auto &U : *Units) if (auto CUDIE = U->getUnitDIE()) // Skip units which does not contibutes to macro section. if (auto MacroOffset = toSectionOffset(CUDIE.find(DW_AT_macros))) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index ec7889a3728a..96c546250974 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -14,19 +14,20 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFTypePrinter.h" +#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Format.h" -#include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include @@ -106,586 +107,10 @@ static void dumpLocationExpr(raw_ostream &OS, const DWARFFormValue &FormValue, .print(OS, DumpOpts, MRI, U); } -static DWARFDie resolveReferencedType(DWARFDie D, - dwarf::Attribute Attr = DW_AT_type) { - return D.getAttributeValueAsReferencedDie(Attr).resolveTypeUnitReference(); -} static DWARFDie resolveReferencedType(DWARFDie D, DWARFFormValue F) { return D.getAttributeValueAsReferencedDie(F).resolveTypeUnitReference(); } -namespace { - -// FIXME: We should have pretty printers per language. Currently we print -// everything as if it was C++ and fall back to the TAG type name. -struct DWARFTypePrinter { - raw_ostream &OS; - bool Word = true; - bool EndedWithTemplate = false; - - DWARFTypePrinter(raw_ostream &OS) : OS(OS) {} - - /// Dump the name encoded in the type tag. - void appendTypeTagName(dwarf::Tag T) { - StringRef TagStr = TagString(T); - static constexpr StringRef Prefix = "DW_TAG_"; - static constexpr StringRef Suffix = "_type"; - if (!TagStr.startswith(Prefix) || !TagStr.endswith(Suffix)) - return; - OS << TagStr.substr(Prefix.size(), - TagStr.size() - (Prefix.size() + Suffix.size())) - << " "; - } - - void appendArrayType(const DWARFDie &D) { - for (const DWARFDie &C : D.children()) { - if (C.getTag() != DW_TAG_subrange_type) - continue; - Optional LB; - Optional Count; - Optional UB; - Optional DefaultLB; - if (Optional L = C.find(DW_AT_lower_bound)) - LB = L->getAsUnsignedConstant(); - if (Optional CountV = C.find(DW_AT_count)) - Count = CountV->getAsUnsignedConstant(); - if (Optional UpperV = C.find(DW_AT_upper_bound)) - UB = UpperV->getAsUnsignedConstant(); - if (Optional LV = - D.getDwarfUnit()->getUnitDIE().find(DW_AT_language)) - if (Optional LC = LV->getAsUnsignedConstant()) - if ((DefaultLB = - LanguageLowerBound(static_cast(*LC)))) - if (LB && *LB == *DefaultLB) - LB = None; - if (!LB && !Count && !UB) - OS << "[]"; - else if (!LB && (Count || UB) && DefaultLB) - OS << '[' << (Count ? *Count : *UB - *DefaultLB + 1) << ']'; - else { - OS << "[["; - if (LB) - OS << *LB; - else - OS << '?'; - OS << ", "; - if (Count) - if (LB) - OS << *LB + *Count; - else - OS << "? + " << *Count; - else if (UB) - OS << *UB + 1; - else - OS << '?'; - OS << ")]"; - } - } - EndedWithTemplate = false; - } - - DWARFDie skipQualifiers(DWARFDie D) { - while (D && (D.getTag() == DW_TAG_const_type || - D.getTag() == DW_TAG_volatile_type)) - D = resolveReferencedType(D); - return D; - } - - bool needsParens(DWARFDie D) { - D = skipQualifiers(D); - return D && (D.getTag() == DW_TAG_subroutine_type || D.getTag() == DW_TAG_array_type); - } - - void appendPointerLikeTypeBefore(DWARFDie D, DWARFDie Inner, StringRef Ptr) { - appendQualifiedNameBefore(Inner); - if (Word) - OS << ' '; - if (needsParens(Inner)) - OS << '('; - OS << Ptr; - Word = false; - EndedWithTemplate = false; - } - - DWARFDie - appendUnqualifiedNameBefore(DWARFDie D, - std::string *OriginalFullName = nullptr) { - Word = true; - if (!D) { - OS << "void"; - return DWARFDie(); - } - DWARFDie InnerDIE; - auto Inner = [&] { return InnerDIE = resolveReferencedType(D); }; - const dwarf::Tag T = D.getTag(); - switch (T) { - case DW_TAG_pointer_type: { - appendPointerLikeTypeBefore(D, Inner(), "*"); - break; - } - case DW_TAG_subroutine_type: { - appendQualifiedNameBefore(Inner()); - if (Word) { - OS << ' '; - } - Word = false; - break; - } - case DW_TAG_array_type: { - appendQualifiedNameBefore(Inner()); - break; - } - case DW_TAG_reference_type: - appendPointerLikeTypeBefore(D, Inner(), "&"); - break; - case DW_TAG_rvalue_reference_type: - appendPointerLikeTypeBefore(D, Inner(), "&&"); - break; - case DW_TAG_ptr_to_member_type: { - appendQualifiedNameBefore(Inner()); - if (needsParens(InnerDIE)) - OS << '('; - else if (Word) - OS << ' '; - if (DWARFDie Cont = resolveReferencedType(D, DW_AT_containing_type)) { - appendQualifiedName(Cont); - OS << "::"; - } - OS << "*"; - Word = false; - break; - } - case DW_TAG_const_type: - case DW_TAG_volatile_type: - appendConstVolatileQualifierBefore(D); - break; - case DW_TAG_namespace: { - if (const char *Name = dwarf::toString(D.find(DW_AT_name), nullptr)) - OS << Name; - else - OS << "(anonymous namespace)"; - break; - } - case DW_TAG_unspecified_type: { - StringRef TypeName = D.getShortName(); - if (TypeName == "decltype(nullptr)") - TypeName = "std::nullptr_t"; - Word = true; - OS << TypeName; - EndedWithTemplate = false; - break; - } - /* - case DW_TAG_structure_type: - case DW_TAG_class_type: - case DW_TAG_enumeration_type: - case DW_TAG_base_type: - */ - default: { - const char *NamePtr = dwarf::toString(D.find(DW_AT_name), nullptr); - if (!NamePtr) { - appendTypeTagName(D.getTag()); - return DWARFDie(); - } - Word = true; - StringRef Name = NamePtr; - static constexpr StringRef MangledPrefix = "_STN"; - if (Name.startswith(MangledPrefix)) { - Name = Name.drop_front(MangledPrefix.size()); - auto Separator = Name.find('|'); - assert(Separator != StringRef::npos); - StringRef BaseName = Name.substr(0, Separator); - StringRef TemplateArgs = Name.substr(Separator + 1); - if (OriginalFullName) - *OriginalFullName = (BaseName + TemplateArgs).str(); - Name = BaseName; - } else - EndedWithTemplate = Name.endswith(">"); - OS << Name; - // This check would be insufficient for operator overloads like - // "operator>>" - but for now Clang doesn't try to simplify them, so this - // is OK. Add more nuanced operator overload handling here if/when needed. - if (Name.endswith(">")) - break; - if (!appendTemplateParameters(D)) - break; - - if (EndedWithTemplate) - OS << ' '; - OS << '>'; - EndedWithTemplate = true; - Word = true; - break; - } - } - return InnerDIE; - } - - void appendUnqualifiedNameAfter(DWARFDie D, DWARFDie Inner, - bool SkipFirstParamIfArtificial = false) { - if (!D) - return; - switch (D.getTag()) { - case DW_TAG_subroutine_type: { - appendSubroutineNameAfter(D, Inner, SkipFirstParamIfArtificial, false, - false); - break; - } - case DW_TAG_array_type: { - appendArrayType(D); - break; - } - case DW_TAG_const_type: - case DW_TAG_volatile_type: - appendConstVolatileQualifierAfter(D); - break; - case DW_TAG_ptr_to_member_type: - case DW_TAG_reference_type: - case DW_TAG_rvalue_reference_type: - case DW_TAG_pointer_type: { - if (needsParens(Inner)) - OS << ')'; - appendUnqualifiedNameAfter(Inner, resolveReferencedType(Inner), - /*SkipFirstParamIfArtificial=*/D.getTag() == - DW_TAG_ptr_to_member_type); - break; - } - /* - case DW_TAG_structure_type: - case DW_TAG_class_type: - case DW_TAG_enumeration_type: - case DW_TAG_base_type: - case DW_TAG_namespace: - */ - default: - break; - } - } - - void appendQualifiedName(DWARFDie D) { - if (D) - appendScopes(D.getParent()); - appendUnqualifiedName(D); - } - DWARFDie appendQualifiedNameBefore(DWARFDie D) { - if (D) - appendScopes(D.getParent()); - return appendUnqualifiedNameBefore(D); - } - bool appendTemplateParameters(DWARFDie D, bool *FirstParameter = nullptr) { - bool FirstParameterValue = true; - bool IsTemplate = false; - if (!FirstParameter) - FirstParameter = &FirstParameterValue; - for (const DWARFDie &C : D) { - auto Sep = [&] { - if (*FirstParameter) - OS << '<'; - else - OS << ", "; - IsTemplate = true; - EndedWithTemplate = false; - *FirstParameter = false; - }; - if (C.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { - IsTemplate = true; - appendTemplateParameters(C, FirstParameter); - } - if (C.getTag() == dwarf::DW_TAG_template_value_parameter) { - DWARFDie T = resolveReferencedType(C); - Sep(); - if (T.getTag() == DW_TAG_enumeration_type) { - auto V = C.find(DW_AT_const_value); - bool FoundEnumerator = false; - for (const DWARFDie &Enumerator : T) { - auto EV = Enumerator.find(DW_AT_const_value); - if (V && EV && - V->getAsSignedConstant() == EV->getAsSignedConstant()) { - if (T.find(DW_AT_enum_class)) { - appendQualifiedName(T); - OS << "::"; - } else - appendScopes(T.getParent()); - OS << Enumerator.getShortName(); - FoundEnumerator = true; - break; - } - } - if (FoundEnumerator) - continue; - OS << '('; - appendQualifiedName(T); - OS << ')'; - OS << to_string(*V->getAsSignedConstant()); - continue; - } - // /Maybe/ we could do pointer type parameters, looking for the - // symbol in the ELF symbol table to get back to the variable... - // but probably not worth it. - if (T.getTag() == DW_TAG_pointer_type) - continue; - const char *RawName = dwarf::toString(T.find(DW_AT_name), nullptr); - assert(RawName); - StringRef Name = RawName; - auto V = C.find(DW_AT_const_value); - bool IsQualifiedChar = false; - if (Name == "bool") { - OS << (*V->getAsUnsignedConstant() ? "true" : "false"); - } else if (Name == "short") { - OS << "(short)"; - OS << to_string(*V->getAsSignedConstant()); - } else if (Name == "unsigned short") { - OS << "(unsigned short)"; - OS << to_string(*V->getAsSignedConstant()); - } else if (Name == "int") - OS << to_string(*V->getAsSignedConstant()); - else if (Name == "long") { - OS << to_string(*V->getAsSignedConstant()); - OS << "L"; - } else if (Name == "long long") { - OS << to_string(*V->getAsSignedConstant()); - OS << "LL"; - } else if (Name == "unsigned int") { - OS << to_string(*V->getAsUnsignedConstant()); - OS << "U"; - } else if (Name == "unsigned long") { - OS << to_string(*V->getAsUnsignedConstant()); - OS << "UL"; - } else if (Name == "unsigned long long") { - OS << to_string(*V->getAsUnsignedConstant()); - OS << "ULL"; - } else if (Name == "char" || - (IsQualifiedChar = - (Name == "unsigned char" || Name == "signed char"))) { - // FIXME: check T's DW_AT_type to see if it's signed or not (since - // char signedness is implementation defined). - auto Val = *V->getAsSignedConstant(); - // Copied/hacked up from Clang's CharacterLiteral::print - incomplete - // (doesn't actually support different character types/widths, sign - // handling's not done, and doesn't correctly test if a character is - // printable or needs to use a numeric escape sequence instead) - if (IsQualifiedChar) { - OS << '('; - OS << Name; - OS << ')'; - } - switch (Val) { - case '\\': - OS << "'\\\\'"; - break; - case '\'': - OS << "'\\''"; - break; - case '\a': - // TODO: K&R: the meaning of '\\a' is different in traditional C - OS << "'\\a'"; - break; - case '\b': - OS << "'\\b'"; - break; - case '\f': - OS << "'\\f'"; - break; - case '\n': - OS << "'\\n'"; - break; - case '\r': - OS << "'\\r'"; - break; - case '\t': - OS << "'\\t'"; - break; - case '\v': - OS << "'\\v'"; - break; - default: - if ((Val & ~0xFFu) == ~0xFFu) - Val &= 0xFFu; - if (Val < 127 && Val >= 32) { - OS << "'"; - OS << (char)Val; - OS << "'"; - } else if (Val < 256) - OS << to_string(llvm::format("'\\x%02x'", Val)); - else if (Val <= 0xFFFF) - OS << to_string(llvm::format("'\\u%04x'", Val)); - else - OS << to_string(llvm::format("'\\U%08x'", Val)); - } - } - continue; - } - if (C.getTag() == dwarf::DW_TAG_GNU_template_template_param) { - const char *RawName = - dwarf::toString(C.find(DW_AT_GNU_template_name), nullptr); - assert(RawName); - StringRef Name = RawName; - Sep(); - OS << Name; - continue; - } - if (C.getTag() != dwarf::DW_TAG_template_type_parameter) - continue; - auto TypeAttr = C.find(DW_AT_type); - Sep(); - appendQualifiedName(TypeAttr ? resolveReferencedType(C, *TypeAttr) - : DWARFDie()); - } - if (IsTemplate && *FirstParameter && FirstParameter == &FirstParameterValue) - OS << '<'; - return IsTemplate; - } - void decomposeConstVolatile(DWARFDie &N, DWARFDie &T, DWARFDie &C, - DWARFDie &V) { - (N.getTag() == DW_TAG_const_type ? C : V) = N; - T = resolveReferencedType(N); - if (T) { - auto Tag = T.getTag(); - if (Tag == DW_TAG_const_type) { - C = T; - T = resolveReferencedType(T); - } else if (Tag == DW_TAG_volatile_type) { - V = T; - T = resolveReferencedType(T); - } - } - } - void appendConstVolatileQualifierAfter(DWARFDie N) { - DWARFDie C; - DWARFDie V; - DWARFDie T; - decomposeConstVolatile(N, T, C, V); - if (T && T.getTag() == DW_TAG_subroutine_type) - appendSubroutineNameAfter(T, resolveReferencedType(T), false, C.isValid(), - V.isValid()); - else - appendUnqualifiedNameAfter(T, resolveReferencedType(T)); - } - void appendConstVolatileQualifierBefore(DWARFDie N) { - DWARFDie C; - DWARFDie V; - DWARFDie T; - decomposeConstVolatile(N, T, C, V); - bool Subroutine = T && T.getTag() == DW_TAG_subroutine_type; - DWARFDie A = T; - while (A && A.getTag() == DW_TAG_array_type) - A = resolveReferencedType(A); - bool Leading = - (!A || (A.getTag() != DW_TAG_pointer_type && - A.getTag() != llvm::dwarf::DW_TAG_ptr_to_member_type)) && - !Subroutine; - if (Leading) { - if (C) - OS << "const "; - if (V) - OS << "volatile "; - } - appendQualifiedNameBefore(T); - if (!Leading && !Subroutine) { - Word = true; - if (C) - OS << "const"; - if (V) { - if (C) - OS << ' '; - OS << "volatile"; - } - } - } - - /// Recursively append the DIE type name when applicable. - void appendUnqualifiedName(DWARFDie D, - std::string *OriginalFullName = nullptr) { - // FIXME: We should have pretty printers per language. Currently we print - // everything as if it was C++ and fall back to the TAG type name. - DWARFDie Inner = appendUnqualifiedNameBefore(D, OriginalFullName); - appendUnqualifiedNameAfter(D, Inner); - } - - void appendSubroutineNameAfter(DWARFDie D, DWARFDie Inner, - bool SkipFirstParamIfArtificial, bool Const, - bool Volatile) { - DWARFDie FirstParamIfArtificial; - OS << '('; - EndedWithTemplate = false; - bool First = true; - bool RealFirst = true; - for (DWARFDie P : D) { - if (P.getTag() != DW_TAG_formal_parameter && - P.getTag() != DW_TAG_unspecified_parameters) - return; - DWARFDie T = resolveReferencedType(P); - if (SkipFirstParamIfArtificial && RealFirst && P.find(DW_AT_artificial)) { - FirstParamIfArtificial = T; - RealFirst = false; - continue; - } - if (!First) { - OS << ", "; - } - First = false; - if (P.getTag() == DW_TAG_unspecified_parameters) - OS << "..."; - else - appendQualifiedName(T); - } - EndedWithTemplate = false; - OS << ')'; - if (FirstParamIfArtificial) { - if (DWARFDie P = FirstParamIfArtificial) { - if (P.getTag() == DW_TAG_pointer_type) { - DWARFDie C; - DWARFDie V; - auto CVStep = [&](DWARFDie CV) { - if (DWARFDie U = resolveReferencedType(CV)) { - if (U.getTag() == DW_TAG_const_type) - return C = U; - if (U.getTag() == DW_TAG_volatile_type) - return V = U; - } - return DWARFDie(); - }; - if (DWARFDie CV = CVStep(P)) { - CVStep(CV); - } - if (C) - OS << " const"; - if (V) - OS << " volatile"; - } - } - } else { - if (Const) - OS << " const"; - if (Volatile) - OS << " volatile"; - } - if (D.find(DW_AT_reference)) - OS << " &"; - if (D.find(DW_AT_rvalue_reference)) - OS << " &&"; - appendUnqualifiedNameAfter(Inner, resolveReferencedType(Inner)); - } - void appendScopes(DWARFDie D) { - if (D.getTag() == DW_TAG_compile_unit) - return; - if (D.getTag() == DW_TAG_type_unit) - return; - if (D.getTag() == DW_TAG_skeleton_unit) - return; - if (D.getTag() == DW_TAG_subprogram) - return; - if (D.getTag() == DW_TAG_lexical_block) - return; - D = D.resolveTypeUnitReference(); - if (DWARFDie P = D.getParent()) - appendScopes(P); - appendUnqualifiedName(D); - OS << "::"; - } -}; -} // anonymous namespace - static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, const DWARFAttribute &AttrValue, unsigned Indent, DIDumpOptions DumpOpts) { @@ -713,8 +138,7 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, Color = HighlightColor::String; if (const auto *LT = U->getContext().getLineTableForUnit(U)) if (LT->getFileNameByIndex( - FormValue.getAsUnsignedConstant().getValue(), - U->getCompilationDir(), + *FormValue.getAsUnsignedConstant(), U->getCompilationDir(), DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) { File = '"' + File + '"'; Name = File; @@ -768,7 +192,7 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, Die.getAttributeValueAsReferencedDie(FormValue).getName( DINameKind::LinkageName)) OS << Space << "\"" << Name << '\"'; - } else if (Attr == DW_AT_type) { + } else if (Attr == DW_AT_type || Attr == DW_AT_containing_type) { DWARFDie D = resolveReferencedType(Die, FormValue); if (D && !D.isNULL()) { OS << Space << "\""; @@ -1061,6 +485,66 @@ void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0); } +Optional DWARFDie::getTypeSize(uint64_t PointerSize) { + if (auto SizeAttr = find(DW_AT_byte_size)) + if (Optional Size = SizeAttr->getAsUnsignedConstant()) + return Size; + + switch (getTag()) { + case DW_TAG_pointer_type: + case DW_TAG_reference_type: + case DW_TAG_rvalue_reference_type: + return PointerSize; + case DW_TAG_ptr_to_member_type: { + if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) + if (BaseType.getTag() == DW_TAG_subroutine_type) + return 2 * PointerSize; + return PointerSize; + } + case DW_TAG_const_type: + case DW_TAG_immutable_type: + case DW_TAG_volatile_type: + case DW_TAG_restrict_type: + case DW_TAG_typedef: { + if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) + return BaseType.getTypeSize(PointerSize); + break; + } + case DW_TAG_array_type: { + DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type); + if (!BaseType) + return None; + Optional BaseSize = BaseType.getTypeSize(PointerSize); + if (!BaseSize) + return None; + uint64_t Size = *BaseSize; + for (DWARFDie Child : *this) { + if (Child.getTag() != DW_TAG_subrange_type) + continue; + + if (auto ElemCountAttr = Child.find(DW_AT_count)) + if (Optional ElemCount = + ElemCountAttr->getAsUnsignedConstant()) + Size *= *ElemCount; + if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound)) + if (Optional UpperBound = + UpperBoundAttr->getAsSignedConstant()) { + int64_t LowerBound = 0; + if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound)) + LowerBound = LowerBoundAttr->getAsSignedConstant().value_or(0); + Size *= *UpperBound - LowerBound + 1; + } + } + return Size; + } + default: + if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) + return BaseType.getTypeSize(PointerSize); + break; + } + return None; +} + /// Helper to dump a DIE with all of its parents, but no siblings. static unsigned dumpParentChain(DWARFDie Die, raw_ostream &OS, unsigned Indent, DIDumpOptions DumpOpts, unsigned Depth = 0) { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 86991a3949dd..1fecd5ee6902 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -13,7 +13,10 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" +#include "llvm/DebugInfo/DWARF/DWARFObject.h" +#include "llvm/DebugInfo/DWARF/DWARFSection.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" diff --git a/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp index ace7000f07b2..3f140d21c53c 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp @@ -9,10 +9,10 @@ #include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataExtractor.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include diff --git a/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp b/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp new file mode 100644 index 000000000000..86cc07b0d0f2 --- /dev/null +++ b/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp @@ -0,0 +1,608 @@ +#include "llvm/DebugInfo/DWARF/DWARFTypePrinter.h" +#include "llvm/DebugInfo/DWARF/DWARFDie.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" +#include "llvm/Support/ScopedPrinter.h" +namespace llvm { +using namespace dwarf; +void DWARFTypePrinter::appendTypeTagName(dwarf::Tag T) { + StringRef TagStr = TagString(T); + static constexpr StringRef Prefix = "DW_TAG_"; + static constexpr StringRef Suffix = "_type"; + if (!TagStr.startswith(Prefix) || !TagStr.endswith(Suffix)) + return; + OS << TagStr.substr(Prefix.size(), + TagStr.size() - (Prefix.size() + Suffix.size())) + << " "; +} + +void DWARFTypePrinter::appendArrayType(const DWARFDie &D) { + for (const DWARFDie &C : D.children()) { + if (C.getTag() != DW_TAG_subrange_type) + continue; + Optional LB; + Optional Count; + Optional UB; + Optional DefaultLB; + if (Optional L = C.find(DW_AT_lower_bound)) + LB = L->getAsUnsignedConstant(); + if (Optional CountV = C.find(DW_AT_count)) + Count = CountV->getAsUnsignedConstant(); + if (Optional UpperV = C.find(DW_AT_upper_bound)) + UB = UpperV->getAsUnsignedConstant(); + if (Optional LV = + D.getDwarfUnit()->getUnitDIE().find(DW_AT_language)) + if (Optional LC = LV->getAsUnsignedConstant()) + if ((DefaultLB = + LanguageLowerBound(static_cast(*LC)))) + if (LB && *LB == *DefaultLB) + LB = None; + if (!LB && !Count && !UB) + OS << "[]"; + else if (!LB && (Count || UB) && DefaultLB) + OS << '[' << (Count ? *Count : *UB - *DefaultLB + 1) << ']'; + else { + OS << "[["; + if (LB) + OS << *LB; + else + OS << '?'; + OS << ", "; + if (Count) + if (LB) + OS << *LB + *Count; + else + OS << "? + " << *Count; + else if (UB) + OS << *UB + 1; + else + OS << '?'; + OS << ")]"; + } + } + EndedWithTemplate = false; +} + +static DWARFDie resolveReferencedType(DWARFDie D, + dwarf::Attribute Attr = DW_AT_type) { + return D.getAttributeValueAsReferencedDie(Attr).resolveTypeUnitReference(); +} +static DWARFDie resolveReferencedType(DWARFDie D, DWARFFormValue F) { + return D.getAttributeValueAsReferencedDie(F).resolveTypeUnitReference(); +} +DWARFDie DWARFTypePrinter::skipQualifiers(DWARFDie D) { + while (D && (D.getTag() == DW_TAG_const_type || + D.getTag() == DW_TAG_volatile_type)) + D = resolveReferencedType(D); + return D; +} + +bool DWARFTypePrinter::needsParens(DWARFDie D) { + D = skipQualifiers(D); + return D && (D.getTag() == DW_TAG_subroutine_type || + D.getTag() == DW_TAG_array_type); +} + +void DWARFTypePrinter::appendPointerLikeTypeBefore(DWARFDie D, DWARFDie Inner, + StringRef Ptr) { + appendQualifiedNameBefore(Inner); + if (Word) + OS << ' '; + if (needsParens(Inner)) + OS << '('; + OS << Ptr; + Word = false; + EndedWithTemplate = false; +} + +DWARFDie +DWARFTypePrinter::appendUnqualifiedNameBefore(DWARFDie D, + std::string *OriginalFullName) { + Word = true; + if (!D) { + OS << "void"; + return DWARFDie(); + } + DWARFDie InnerDIE; + auto Inner = [&] { return InnerDIE = resolveReferencedType(D); }; + const dwarf::Tag T = D.getTag(); + switch (T) { + case DW_TAG_pointer_type: { + appendPointerLikeTypeBefore(D, Inner(), "*"); + break; + } + case DW_TAG_subroutine_type: { + appendQualifiedNameBefore(Inner()); + if (Word) { + OS << ' '; + } + Word = false; + break; + } + case DW_TAG_array_type: { + appendQualifiedNameBefore(Inner()); + break; + } + case DW_TAG_reference_type: + appendPointerLikeTypeBefore(D, Inner(), "&"); + break; + case DW_TAG_rvalue_reference_type: + appendPointerLikeTypeBefore(D, Inner(), "&&"); + break; + case DW_TAG_ptr_to_member_type: { + appendQualifiedNameBefore(Inner()); + if (needsParens(InnerDIE)) + OS << '('; + else if (Word) + OS << ' '; + if (DWARFDie Cont = resolveReferencedType(D, DW_AT_containing_type)) { + appendQualifiedName(Cont); + EndedWithTemplate = false; + OS << "::"; + } + OS << "*"; + Word = false; + break; + } + case DW_TAG_const_type: + case DW_TAG_volatile_type: + appendConstVolatileQualifierBefore(D); + break; + case DW_TAG_namespace: { + if (const char *Name = dwarf::toString(D.find(DW_AT_name), nullptr)) + OS << Name; + else + OS << "(anonymous namespace)"; + break; + } + case DW_TAG_unspecified_type: { + StringRef TypeName = D.getShortName(); + if (TypeName == "decltype(nullptr)") + TypeName = "std::nullptr_t"; + Word = true; + OS << TypeName; + EndedWithTemplate = false; + break; + } + /* + case DW_TAG_structure_type: + case DW_TAG_class_type: + case DW_TAG_enumeration_type: + case DW_TAG_base_type: + */ + default: { + const char *NamePtr = dwarf::toString(D.find(DW_AT_name), nullptr); + if (!NamePtr) { + appendTypeTagName(D.getTag()); + return DWARFDie(); + } + Word = true; + StringRef Name = NamePtr; + static constexpr StringRef MangledPrefix = "_STN|"; + if (Name.startswith(MangledPrefix)) { + Name = Name.drop_front(MangledPrefix.size()); + auto Separator = Name.find('|'); + assert(Separator != StringRef::npos); + StringRef BaseName = Name.substr(0, Separator); + StringRef TemplateArgs = Name.substr(Separator + 1); + if (OriginalFullName) + *OriginalFullName = (BaseName + TemplateArgs).str(); + Name = BaseName; + } else + EndedWithTemplate = Name.endswith(">"); + OS << Name; + // This check would be insufficient for operator overloads like + // "operator>>" - but for now Clang doesn't try to simplify them, so this + // is OK. Add more nuanced operator overload handling here if/when needed. + if (Name.endswith(">")) + break; + if (!appendTemplateParameters(D)) + break; + + if (EndedWithTemplate) + OS << ' '; + OS << '>'; + EndedWithTemplate = true; + Word = true; + break; + } + } + return InnerDIE; +} + +void DWARFTypePrinter::appendUnqualifiedNameAfter( + DWARFDie D, DWARFDie Inner, bool SkipFirstParamIfArtificial) { + if (!D) + return; + switch (D.getTag()) { + case DW_TAG_subroutine_type: { + appendSubroutineNameAfter(D, Inner, SkipFirstParamIfArtificial, false, + false); + break; + } + case DW_TAG_array_type: { + appendArrayType(D); + break; + } + case DW_TAG_const_type: + case DW_TAG_volatile_type: + appendConstVolatileQualifierAfter(D); + break; + case DW_TAG_ptr_to_member_type: + case DW_TAG_reference_type: + case DW_TAG_rvalue_reference_type: + case DW_TAG_pointer_type: { + if (needsParens(Inner)) + OS << ')'; + appendUnqualifiedNameAfter(Inner, resolveReferencedType(Inner), + /*SkipFirstParamIfArtificial=*/D.getTag() == + DW_TAG_ptr_to_member_type); + break; + } + /* + case DW_TAG_structure_type: + case DW_TAG_class_type: + case DW_TAG_enumeration_type: + case DW_TAG_base_type: + case DW_TAG_namespace: + */ + default: + break; + } +} + +void DWARFTypePrinter::appendQualifiedName(DWARFDie D) { + if (D) + appendScopes(D.getParent()); + appendUnqualifiedName(D); +} +DWARFDie DWARFTypePrinter::appendQualifiedNameBefore(DWARFDie D) { + if (D) + appendScopes(D.getParent()); + return appendUnqualifiedNameBefore(D); +} +bool DWARFTypePrinter::appendTemplateParameters(DWARFDie D, + bool *FirstParameter) { + bool FirstParameterValue = true; + bool IsTemplate = false; + if (!FirstParameter) + FirstParameter = &FirstParameterValue; + for (const DWARFDie &C : D) { + auto Sep = [&] { + if (*FirstParameter) + OS << '<'; + else + OS << ", "; + IsTemplate = true; + EndedWithTemplate = false; + *FirstParameter = false; + }; + if (C.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { + IsTemplate = true; + appendTemplateParameters(C, FirstParameter); + } + if (C.getTag() == dwarf::DW_TAG_template_value_parameter) { + DWARFDie T = resolveReferencedType(C); + Sep(); + if (T.getTag() == DW_TAG_enumeration_type) { + OS << '('; + appendQualifiedName(T); + OS << ')'; + auto V = C.find(DW_AT_const_value); + OS << std::to_string(*V->getAsSignedConstant()); + continue; + } + // /Maybe/ we could do pointer type parameters, looking for the + // symbol in the ELF symbol table to get back to the variable... + // but probably not worth it. + if (T.getTag() == DW_TAG_pointer_type) + continue; + const char *RawName = dwarf::toString(T.find(DW_AT_name), nullptr); + assert(RawName); + StringRef Name = RawName; + auto V = C.find(DW_AT_const_value); + bool IsQualifiedChar = false; + if (Name == "bool") { + OS << (*V->getAsUnsignedConstant() ? "true" : "false"); + } else if (Name == "short") { + OS << "(short)"; + OS << std::to_string(*V->getAsSignedConstant()); + } else if (Name == "unsigned short") { + OS << "(unsigned short)"; + OS << std::to_string(*V->getAsSignedConstant()); + } else if (Name == "int") + OS << std::to_string(*V->getAsSignedConstant()); + else if (Name == "long") { + OS << std::to_string(*V->getAsSignedConstant()); + OS << "L"; + } else if (Name == "long long") { + OS << std::to_string(*V->getAsSignedConstant()); + OS << "LL"; + } else if (Name == "unsigned int") { + OS << std::to_string(*V->getAsUnsignedConstant()); + OS << "U"; + } else if (Name == "unsigned long") { + OS << std::to_string(*V->getAsUnsignedConstant()); + OS << "UL"; + } else if (Name == "unsigned long long") { + OS << std::to_string(*V->getAsUnsignedConstant()); + OS << "ULL"; + } else if (Name == "char" || + (IsQualifiedChar = + (Name == "unsigned char" || Name == "signed char"))) { + // FIXME: check T's DW_AT_type to see if it's signed or not (since + // char signedness is implementation defined). + auto Val = *V->getAsSignedConstant(); + // Copied/hacked up from Clang's CharacterLiteral::print - incomplete + // (doesn't actually support different character types/widths, sign + // handling's not done, and doesn't correctly test if a character is + // printable or needs to use a numeric escape sequence instead) + if (IsQualifiedChar) { + OS << '('; + OS << Name; + OS << ')'; + } + switch (Val) { + case '\\': + OS << "'\\\\'"; + break; + case '\'': + OS << "'\\''"; + break; + case '\a': + // TODO: K&R: the meaning of '\\a' is different in traditional C + OS << "'\\a'"; + break; + case '\b': + OS << "'\\b'"; + break; + case '\f': + OS << "'\\f'"; + break; + case '\n': + OS << "'\\n'"; + break; + case '\r': + OS << "'\\r'"; + break; + case '\t': + OS << "'\\t'"; + break; + case '\v': + OS << "'\\v'"; + break; + default: + if ((Val & ~0xFFu) == ~0xFFu) + Val &= 0xFFu; + if (Val < 127 && Val >= 32) { + OS << "'"; + OS << (char)Val; + OS << "'"; + } else if (Val < 256) + OS << to_string(llvm::format("'\\x%02x'", Val)); + else if (Val <= 0xFFFF) + OS << to_string(llvm::format("'\\u%04x'", Val)); + else + OS << to_string(llvm::format("'\\U%08x'", Val)); + } + } + continue; + } + if (C.getTag() == dwarf::DW_TAG_GNU_template_template_param) { + const char *RawName = + dwarf::toString(C.find(DW_AT_GNU_template_name), nullptr); + assert(RawName); + StringRef Name = RawName; + Sep(); + OS << Name; + continue; + } + if (C.getTag() != dwarf::DW_TAG_template_type_parameter) + continue; + auto TypeAttr = C.find(DW_AT_type); + Sep(); + appendQualifiedName(TypeAttr ? resolveReferencedType(C, *TypeAttr) + : DWARFDie()); + } + if (IsTemplate && *FirstParameter && FirstParameter == &FirstParameterValue) { + OS << '<'; + EndedWithTemplate = false; + } + return IsTemplate; +} +void DWARFTypePrinter::decomposeConstVolatile(DWARFDie &N, DWARFDie &T, + DWARFDie &C, DWARFDie &V) { + (N.getTag() == DW_TAG_const_type ? C : V) = N; + T = resolveReferencedType(N); + if (T) { + auto Tag = T.getTag(); + if (Tag == DW_TAG_const_type) { + C = T; + T = resolveReferencedType(T); + } else if (Tag == DW_TAG_volatile_type) { + V = T; + T = resolveReferencedType(T); + } + } +} +void DWARFTypePrinter::appendConstVolatileQualifierAfter(DWARFDie N) { + DWARFDie C; + DWARFDie V; + DWARFDie T; + decomposeConstVolatile(N, T, C, V); + if (T && T.getTag() == DW_TAG_subroutine_type) + appendSubroutineNameAfter(T, resolveReferencedType(T), false, C.isValid(), + V.isValid()); + else + appendUnqualifiedNameAfter(T, resolveReferencedType(T)); +} +void DWARFTypePrinter::appendConstVolatileQualifierBefore(DWARFDie N) { + DWARFDie C; + DWARFDie V; + DWARFDie T; + decomposeConstVolatile(N, T, C, V); + bool Subroutine = T && T.getTag() == DW_TAG_subroutine_type; + DWARFDie A = T; + while (A && A.getTag() == DW_TAG_array_type) + A = resolveReferencedType(A); + bool Leading = + (!A || (A.getTag() != DW_TAG_pointer_type && + A.getTag() != llvm::dwarf::DW_TAG_ptr_to_member_type)) && + !Subroutine; + if (Leading) { + if (C) + OS << "const "; + if (V) + OS << "volatile "; + } + appendQualifiedNameBefore(T); + if (!Leading && !Subroutine) { + Word = true; + if (C) + OS << "const"; + if (V) { + if (C) + OS << ' '; + OS << "volatile"; + } + } +} +void DWARFTypePrinter::appendUnqualifiedName(DWARFDie D, + std::string *OriginalFullName) { + // FIXME: We should have pretty printers per language. Currently we print + // everything as if it was C++ and fall back to the TAG type name. + DWARFDie Inner = appendUnqualifiedNameBefore(D, OriginalFullName); + appendUnqualifiedNameAfter(D, Inner); +} +void DWARFTypePrinter::appendSubroutineNameAfter( + DWARFDie D, DWARFDie Inner, bool SkipFirstParamIfArtificial, bool Const, + bool Volatile) { + DWARFDie FirstParamIfArtificial; + OS << '('; + EndedWithTemplate = false; + bool First = true; + bool RealFirst = true; + for (DWARFDie P : D) { + if (P.getTag() != DW_TAG_formal_parameter && + P.getTag() != DW_TAG_unspecified_parameters) + return; + DWARFDie T = resolveReferencedType(P); + if (SkipFirstParamIfArtificial && RealFirst && P.find(DW_AT_artificial)) { + FirstParamIfArtificial = T; + RealFirst = false; + continue; + } + if (!First) { + OS << ", "; + } + First = false; + if (P.getTag() == DW_TAG_unspecified_parameters) + OS << "..."; + else + appendQualifiedName(T); + } + EndedWithTemplate = false; + OS << ')'; + if (FirstParamIfArtificial) { + if (DWARFDie P = FirstParamIfArtificial) { + if (P.getTag() == DW_TAG_pointer_type) { + auto CVStep = [&](DWARFDie CV) { + if (DWARFDie U = resolveReferencedType(CV)) { + Const |= U.getTag() == DW_TAG_const_type; + Volatile |= U.getTag() == DW_TAG_volatile_type; + return U; + } + return DWARFDie(); + }; + if (DWARFDie CV = CVStep(P)) { + CVStep(CV); + } + } + } + } + + if (auto CC = D.find(DW_AT_calling_convention)) { + switch (*CC->getAsUnsignedConstant()) { + case CallingConvention::DW_CC_BORLAND_stdcall: + OS << " __attribute__((stdcall))"; + break; + case CallingConvention::DW_CC_BORLAND_msfastcall: + OS << " __attribute__((fastcall))"; + break; + case CallingConvention::DW_CC_BORLAND_thiscall: + OS << " __attribute__((thiscall))"; + break; + case CallingConvention::DW_CC_LLVM_vectorcall: + OS << " __attribute__((vectorcall))"; + break; + case CallingConvention::DW_CC_BORLAND_pascal: + OS << " __attribute__((pascal))"; + break; + case CallingConvention::DW_CC_LLVM_Win64: + OS << " __attribute__((ms_abi))"; + break; + case CallingConvention::DW_CC_LLVM_X86_64SysV: + OS << " __attribute__((sysv_abi))"; + break; + case CallingConvention::DW_CC_LLVM_AAPCS: + // AArch64VectorCall missing? + OS << " __attribute__((pcs(\"aapcs\")))"; + break; + case CallingConvention::DW_CC_LLVM_AAPCS_VFP: + OS << " __attribute__((pcs(\"aapcs-vfp\")))"; + break; + case CallingConvention::DW_CC_LLVM_IntelOclBicc: + OS << " __attribute__((intel_ocl_bicc))"; + break; + case CallingConvention::DW_CC_LLVM_SpirFunction: + case CallingConvention::DW_CC_LLVM_OpenCLKernel: + // These aren't available as attributes, but maybe we should still + // render them somehow? (Clang doesn't render them, but that's an issue + // for template names too - since then the DWARF names of templates + // instantiated with function types with these calling conventions won't + // have distinct names - so we'd need to fix that too) + break; + case CallingConvention::DW_CC_LLVM_Swift: + // SwiftAsync missing + OS << " __attribute__((swiftcall))"; + break; + case CallingConvention::DW_CC_LLVM_PreserveMost: + OS << " __attribute__((preserve_most))"; + break; + case CallingConvention::DW_CC_LLVM_PreserveAll: + OS << " __attribute__((preserve_all))"; + break; + case CallingConvention::DW_CC_LLVM_X86RegCall: + OS << " __attribute__((regcall))"; + break; + } + } + + if (Const) + OS << " const"; + if (Volatile) + OS << " volatile"; + if (D.find(DW_AT_reference)) + OS << " &"; + if (D.find(DW_AT_rvalue_reference)) + OS << " &&"; + + appendUnqualifiedNameAfter(Inner, resolveReferencedType(Inner)); +} +void DWARFTypePrinter::appendScopes(DWARFDie D) { + if (D.getTag() == DW_TAG_compile_unit) + return; + if (D.getTag() == DW_TAG_type_unit) + return; + if (D.getTag() == DW_TAG_skeleton_unit) + return; + if (D.getTag() == DW_TAG_subprogram) + return; + if (D.getTag() == DW_TAG_lexical_block) + return; + D = D.resolveTypeUnitReference(); + if (DWARFDie P = D.getParent()) + appendScopes(P); + appendUnqualifiedName(D); + OS << "::"; +} +} // namespace llvm diff --git a/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp index a301b65dd444..fe16ca06132b 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp @@ -8,9 +8,7 @@ #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" #include "llvm/DebugInfo/DIContext.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" -#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index eed0a60ec75e..74667fcb92bc 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -9,15 +9,23 @@ #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFListTable.h" +#include "llvm/DebugInfo/DWARF/DWARFObject.h" +#include "llvm/DebugInfo/DWARF/DWARFSection.h" #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Path.h" @@ -25,7 +33,6 @@ #include #include #include -#include #include #include @@ -79,7 +86,14 @@ void DWARFUnitVector::addUnitsImpl( if (!IndexEntry && IsDWO) { const DWARFUnitIndex &Index = getDWARFUnitIndex( Context, Header.isTypeUnit() ? DW_SECT_EXT_TYPES : DW_SECT_INFO); - IndexEntry = Index.getFromOffset(Header.getOffset()); + if (Index) { + if (Header.isTypeUnit()) + IndexEntry = Index.getFromHash(Header.getTypeHash()); + else if (auto DWOId = Header.getDWOId()) + IndexEntry = Index.getFromHash(*DWOId); + } + if (!IndexEntry) + IndexEntry = Index.getFromOffset(Header.getOffset()); } if (IndexEntry && !Header.applyIndexEntry(IndexEntry)) return nullptr; @@ -366,6 +380,9 @@ void DWARFUnit::clear() { AddrOffsetSectionBase = None; SU = nullptr; clearDIEs(false); + AddrDieMap.clear(); + if (DWO) + DWO->clear(); DWO.reset(); } @@ -407,7 +424,7 @@ void DWARFUnit::extractDIEsToVector( assert((Parents.back() == UINT32_MAX || Parents.back() <= Dies.size()) && "Wrong parent index"); - // Extract die. Stop if any error occured. + // Extract die. Stop if any error occurred. if (!DIE.extractFast(*this, &DIEOffset, DebugInfoData, NextCUOffset, Parents.back())) break; @@ -607,7 +624,7 @@ bool DWARFUnit::parseDWO() { DWO->setAddrOffsetSection(AddrOffsetSection, *AddrOffsetSectionBase); if (getVersion() == 4) { auto DWORangesBase = UnitDie.getRangesBaseAttribute(); - DWO->setRangesSection(RangeSection, DWORangesBase.getValueOr(0)); + DWO->setRangesSection(RangeSection, DWORangesBase.value_or(0)); } return true; @@ -735,6 +752,100 @@ DWARFDie DWARFUnit::getSubroutineForAddress(uint64_t Address) { return R->second.second; } +void DWARFUnit::updateVariableDieMap(DWARFDie Die) { + for (DWARFDie Child : Die) { + if (isType(Child.getTag())) + continue; + updateVariableDieMap(Child); + } + + if (Die.getTag() != DW_TAG_variable) + return; + + Expected Locations = + Die.getLocations(DW_AT_location); + if (!Locations) { + // Missing DW_AT_location is fine here. + consumeError(Locations.takeError()); + return; + } + + uint64_t Address = UINT64_MAX; + + for (const DWARFLocationExpression &Location : *Locations) { + uint8_t AddressSize = getAddressByteSize(); + DataExtractor Data(Location.Expr, /*IsLittleEndian=*/true, AddressSize); + DWARFExpression Expr(Data, AddressSize); + auto It = Expr.begin(); + if (It == Expr.end()) + continue; + + // Match exactly the main sequence used to describe global variables: + // `DW_OP_addr[x] [+ DW_OP_plus_uconst]`. Currently, this is the sequence + // that LLVM produces for DILocalVariables and DIGlobalVariables. If, in + // future, the DWARF producer (`DwarfCompileUnit::addLocationAttribute()` is + // a good starting point) is extended to use further expressions, this code + // needs to be updated. + uint64_t LocationAddr; + if (It->getCode() == dwarf::DW_OP_addr) { + LocationAddr = It->getRawOperand(0); + } else if (It->getCode() == dwarf::DW_OP_addrx) { + uint64_t DebugAddrOffset = It->getRawOperand(0); + if (auto Pointer = getAddrOffsetSectionItem(DebugAddrOffset)) { + LocationAddr = Pointer->Address; + } + } else { + continue; + } + + // Read the optional 2nd operand, a DW_OP_plus_uconst. + if (++It != Expr.end()) { + if (It->getCode() != dwarf::DW_OP_plus_uconst) + continue; + + LocationAddr += It->getRawOperand(0); + + // Probe for a 3rd operand, if it exists, bail. + if (++It != Expr.end()) + continue; + } + + Address = LocationAddr; + break; + } + + // Get the size of the global variable. If all else fails (i.e. the global has + // no type), then we use a size of one to still allow symbolization of the + // exact address. + uint64_t GVSize = 1; + if (DWARFDie BaseType = Die.getAttributeValueAsReferencedDie(DW_AT_type)) + if (Optional Size = Die.getTypeSize(getAddressByteSize())) + GVSize = *Size; + + if (Address != UINT64_MAX) + VariableDieMap[Address] = {Address + GVSize, Die}; +} + +DWARFDie DWARFUnit::getVariableForAddress(uint64_t Address) { + extractDIEsIfNeeded(false); + + auto RootDie = getUnitDIE(); + + auto RootLookup = RootsParsedForVariables.insert(RootDie.getOffset()); + if (RootLookup.second) + updateVariableDieMap(RootDie); + + auto R = VariableDieMap.upper_bound(Address); + if (R == VariableDieMap.begin()) + return DWARFDie(); + + // upper_bound's previous item contains Address. + --R; + if (Address >= R->second.first) + return DWARFDie(); + return R->second.second; +} + void DWARFUnit::getInlinedChainForAddress(uint64_t Address, SmallVectorImpl &InlinedChain) { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp index d27fd08db14e..d161beef2202 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataExtractor.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index ca7ac785b550..c704f8f583af 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -6,17 +6,28 @@ // //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFVerifier.h" +#include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" +#include "llvm/DebugInfo/DWARF/DWARFAttribute.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" +#include "llvm/DebugInfo/DWARF/DWARFObject.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" -#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" +#include "llvm/Object/Error.h" #include "llvm/Support/DJB.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -28,6 +39,10 @@ using namespace llvm; using namespace dwarf; using namespace object; +namespace llvm { +class DWARFDebugInfoEntry; +} + Optional DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) { auto Begin = Ranges.begin(); @@ -381,6 +396,59 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S) { return NumDebugInfoErrors; } +unsigned DWARFVerifier::verifyIndex(StringRef Name, + DWARFSectionKind InfoColumnKind, + StringRef IndexStr) { + if (IndexStr.empty()) + return 0; + OS << "Verifying " << Name << "...\n"; + DWARFUnitIndex Index(InfoColumnKind); + DataExtractor D(IndexStr, DCtx.isLittleEndian(), 0); + if (!Index.parse(D)) + return 1; + using MapType = IntervalMap; + MapType::Allocator Alloc; + std::vector> Sections(Index.getColumnKinds().size()); + for (const DWARFUnitIndex::Entry &E : Index.getRows()) { + uint64_t Sig = E.getSignature(); + if (!E.getContributions()) + continue; + for (auto E : enumerate(InfoColumnKind == DW_SECT_INFO + ? makeArrayRef(E.getContributions(), + Index.getColumnKinds().size()) + : makeArrayRef(E.getContribution(), 1))) { + const DWARFUnitIndex::Entry::SectionContribution &SC = E.value(); + int Col = E.index(); + if (SC.Length == 0) + continue; + if (!Sections[Col]) + Sections[Col] = std::make_unique(Alloc); + auto &M = *Sections[Col]; + auto I = M.find(SC.Offset); + if (I != M.end() && I.start() < (SC.Offset + SC.Length)) { + error() << llvm::formatv( + "overlapping index entries for entries {0:x16} " + "and {1:x16} for column {2}\n", + *I, Sig, toString(Index.getColumnKinds()[Col])); + return 1; + } + M.insert(SC.Offset, SC.Offset + SC.Length - 1, Sig); + } + } + + return 0; +} + +bool DWARFVerifier::handleDebugCUIndex() { + return verifyIndex(".debug_cu_index", DWARFSectionKind::DW_SECT_INFO, + DCtx.getDWARFObj().getCUIndexSection()) == 0; +} + +bool DWARFVerifier::handleDebugTUIndex() { + return verifyIndex(".debug_tu_index", DWARFSectionKind::DW_SECT_EXT_TYPES, + DCtx.getDWARFObj().getTUIndexSection()) == 0; +} + bool DWARFVerifier::handleDebugInfo() { const DWARFObject &DObj = DCtx.getDWARFObj(); unsigned NumErrors = 0; diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp index 6eef6f84ab40..473a69b34ac3 100644 --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -10,6 +10,7 @@ #include #include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Support/Error.h" #include "llvm/Support/ThreadPool.h" @@ -287,12 +288,12 @@ static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI, // linker problems or LTO or other DWARF re-linking so it is worth emitting // an error, but not worth stopping the creation of the GSYM. if (!FI.Range.contains(RowAddress)) { - if (RowAddress < FI.Range.Start) { + if (RowAddress < FI.Range.start()) { Log << "error: DIE has a start address whose LowPC is between the " "line table Row[" << RowIndex << "] with address " << HEX64(RowAddress) << " and the next one.\n"; Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); - RowAddress = FI.Range.Start; + RowAddress = FI.Range.start(); } else { continue; } @@ -403,8 +404,7 @@ void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) { } FunctionInfo FI; - FI.setStartAddress(Range.LowPC); - FI.setEndAddress(Range.HighPC); + FI.Range = {Range.LowPC, Range.HighPC}; FI.Name = *NameIndex; if (CUI.LineTable) { convertFunctionLineTable(OS, CUI, Die, Gsym, FI); @@ -427,11 +427,28 @@ void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) { Error DwarfTransformer::convert(uint32_t NumThreads) { size_t NumBefore = Gsym.getNumFunctionInfos(); + auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie { + DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false); + if (llvm::Optional DWOId = DwarfUnit.getDWOId()) { + DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit(); + if (!DWOCU->isDWOUnit()) { + std::string DWOName = dwarf::toString( + DwarfUnit.getUnitDIE().find( + {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), + ""); + Log << "warning: Unable to retrieve DWO .debug_info section for " + << DWOName << "\n"; + } else { + ReturnDie = DWOCU->getUnitDIE(false); + } + } + return ReturnDie; + }; if (NumThreads == 1) { // Parse all DWARF data from this thread, use the same string/file table // for everything for (const auto &CU : DICtx.compile_units()) { - DWARFDie Die = CU->getUnitDIE(false); + DWARFDie Die = getDie(*CU); CUInfo CUI(DICtx, dyn_cast(CU.get())); handleDie(Log, CUI, Die); } @@ -456,7 +473,7 @@ Error DwarfTransformer::convert(uint32_t NumThreads) { // Now convert all DWARF to GSYM in a thread pool. std::mutex LogMutex; for (const auto &CU : DICtx.compile_units()) { - DWARFDie Die = CU->getUnitDIE(false /*CUDieOnly*/); + DWARFDie Die = getDie(*CU); if (Die) { CUInfo CUI(DICtx, dyn_cast(CU.get())); pool.async([this, CUI, &LogMutex, Die]() mutable { diff --git a/llvm/lib/DebugInfo/GSYM/ExtractRanges.cpp b/llvm/lib/DebugInfo/GSYM/ExtractRanges.cpp new file mode 100644 index 000000000000..4a42100c86da --- /dev/null +++ b/llvm/lib/DebugInfo/GSYM/ExtractRanges.cpp @@ -0,0 +1,79 @@ +//===- ExtractRanges.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/ExtractRanges.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/DataExtractor.h" +#include +#include + +namespace llvm { +namespace gsym { + +void encodeRange(const AddressRange &Range, FileWriter &O, uint64_t BaseAddr) { + assert(Range.start() >= BaseAddr); + O.writeULEB(Range.start() - BaseAddr); + O.writeULEB(Range.size()); +} + +AddressRange decodeRange(DataExtractor &Data, uint64_t BaseAddr, + uint64_t &Offset) { + const uint64_t AddrOffset = Data.getULEB128(&Offset); + const uint64_t Size = Data.getULEB128(&Offset); + const uint64_t StartAddr = BaseAddr + AddrOffset; + + return {StartAddr, StartAddr + Size}; +} + +void encodeRanges(const AddressRanges &Ranges, FileWriter &O, + uint64_t BaseAddr) { + O.writeULEB(Ranges.size()); + if (Ranges.empty()) + return; + for (auto Range : Ranges) + encodeRange(Range, O, BaseAddr); +} + +void decodeRanges(AddressRanges &Ranges, DataExtractor &Data, uint64_t BaseAddr, + uint64_t &Offset) { + Ranges.clear(); + uint64_t NumRanges = Data.getULEB128(&Offset); + Ranges.reserve(NumRanges); + for (uint64_t RangeIdx = 0; RangeIdx < NumRanges; RangeIdx++) + Ranges.insert(decodeRange(Data, BaseAddr, Offset)); +} + +void skipRange(DataExtractor &Data, uint64_t &Offset) { + Data.getULEB128(&Offset); + Data.getULEB128(&Offset); +} + +uint64_t skipRanges(DataExtractor &Data, uint64_t &Offset) { + uint64_t NumRanges = Data.getULEB128(&Offset); + for (uint64_t I = 0; I < NumRanges; ++I) + skipRange(Data, Offset); + return NumRanges; +} + +} // namespace gsym + +raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R) { + return OS << '[' << HEX64(R.start()) << " - " << HEX64(R.end()) << ")"; +} + +raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR) { + size_t Size = AR.size(); + for (size_t I = 0; I < Size; ++I) { + if (I) + OS << ' '; + OS << AR[I]; + } + return OS; +} + +} // namespace llvm diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp index cef1b9498c5c..4f5d240cdf72 100644 --- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -36,12 +36,11 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) { llvm::Expected FunctionInfo::decode(DataExtractor &Data, uint64_t BaseAddr) { FunctionInfo FI; - FI.Range.Start = BaseAddr; uint64_t Offset = 0; if (!Data.isValidOffsetForDataOfSize(Offset, 4)) return createStringError(std::errc::io_error, "0x%8.8" PRIx64 ": missing FunctionInfo Size", Offset); - FI.Range.End = FI.Range.Start + Data.getU32(&Offset); + FI.Range = {BaseAddr, BaseAddr + Data.getU32(&Offset)}; if (!Data.isValidOffsetForDataOfSize(Offset, 4)) return createStringError(std::errc::io_error, "0x%8.8" PRIx64 ": missing FunctionInfo Name", Offset); @@ -109,13 +108,13 @@ llvm::Expected FunctionInfo::encode(FileWriter &O) const { // Write the name of this function as a uint32_t string table offset. O.writeU32(Name); - if (OptLineTable.hasValue()) { + if (OptLineTable) { O.writeU32(InfoType::LineTableInfo); // Write a uint32_t length as zero for now, we will fix this up after // writing the LineTable out with the number of bytes that were written. O.writeU32(0); const auto StartOffset = O.tell(); - llvm::Error err = OptLineTable->encode(O, Range.Start); + llvm::Error err = OptLineTable->encode(O, Range.start()); if (err) return std::move(err); const auto Length = O.tell() - StartOffset; @@ -127,13 +126,13 @@ llvm::Expected FunctionInfo::encode(FileWriter &O) const { } // Write out the inline function info if we have any and if it is valid. - if (Inline.hasValue()) { + if (Inline) { O.writeU32(InfoType::InlineInfo); // Write a uint32_t length as zero for now, we will fix this up after // writing the LineTable out with the number of bytes that were written. O.writeU32(0); const auto StartOffset = O.tell(); - llvm::Error err = Inline->encode(O, Range.Start); + llvm::Error err = Inline->encode(O, Range.start()); if (err) return std::move(err); const auto Length = O.tell() - StartOffset; @@ -157,9 +156,8 @@ llvm::Expected FunctionInfo::lookup(DataExtractor &Data, uint64_t Addr) { LookupResult LR; LR.LookupAddr = Addr; - LR.FuncRange.Start = FuncAddr; uint64_t Offset = 0; - LR.FuncRange.End = FuncAddr + Data.getU32(&Offset); + LR.FuncRange = {FuncAddr, FuncAddr + Data.getU32(&Offset)}; uint32_t NameOffset = Data.getU32(&Offset); // The "lookup" functions doesn't report errors as accurately as the "decode" // function as it is meant to be fast. For more accurage errors we could call diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp index 1c20a59469dc..8281938770cf 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -271,7 +271,7 @@ llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { } } } else if (Prev.Range.size() == 0 && - Curr.Range.contains(Prev.Range.Start)) { + Curr.Range.contains(Prev.Range.start())) { if (!Quiet) { OS << "warning: removing symbol:\n" << Prev << "\nKeeping:\n" @@ -291,8 +291,8 @@ llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { // has no size when doing lookups. if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) { if (auto Range = - ValidTextRanges->getRangeThatContains(Funcs.back().Range.Start)) { - Funcs.back().Range.End = Range->End; + ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) { + Funcs.back().Range = {Funcs.back().Range.start(), Range->end()}; } } OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp index 2ad18bf63d5d..0c585cc8d306 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp @@ -48,7 +48,7 @@ llvm::Expected GsymReader::copyBuffer(StringRef Bytes) { llvm::Expected GsymReader::create(std::unique_ptr &MemBuffer) { - if (!MemBuffer.get()) + if (!MemBuffer) return createStringError(std::errc::invalid_argument, "invalid memory buffer"); GsymReader GR(std::move(MemBuffer)); diff --git a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp index 21679b1b78aa..f7c4637a8a5b 100644 --- a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp @@ -75,7 +75,7 @@ llvm::Optional InlineInfo::getInlineStack(uint64_t Addr static bool skip(DataExtractor &Data, uint64_t &Offset, bool SkippedRanges) { if (!SkippedRanges) { - if (AddressRanges::skip(Data, Offset) == 0) + if (skipRanges(Data, Offset) == 0) return false; } bool HasChildren = Data.getU8(&Offset) != 0; @@ -109,7 +109,7 @@ static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err) { InlineInfo Inline; - Inline.Ranges.decode(Data, BaseAddr, Offset); + decodeRanges(Inline.Ranges, Data, BaseAddr, Offset); if (Inline.Ranges.empty()) return true; // Check if the address is contained within the inline information, and if @@ -128,7 +128,7 @@ static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, if (HasChildren) { // Child address ranges are encoded relative to the first address in the // parent InlineInfo object. - const auto ChildBaseAddr = Inline.Ranges[0].Start; + const auto ChildBaseAddr = Inline.Ranges[0].start(); bool Done = false; while (!Done) Done = lookup(GR, Data, Offset, ChildBaseAddr, Addr, SrcLocs, Err); @@ -150,7 +150,7 @@ static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, SrcLoc.Base = GR.getString(CallFile->Base); SrcLoc.Line = Inline.CallLine; SrcLocs.back().Name = GR.getString(Inline.Name); - SrcLocs.back().Offset = Addr - Inline.Ranges[0].Start; + SrcLocs.back().Offset = Addr - Inline.Ranges[0].start(); SrcLocs.push_back(SrcLoc); } return true; @@ -182,7 +182,7 @@ static llvm::Expected decode(DataExtractor &Data, uint64_t &Offset, if (!Data.isValidOffset(Offset)) return createStringError(std::errc::io_error, "0x%8.8" PRIx64 ": missing InlineInfo address ranges data", Offset); - Inline.Ranges.decode(Data, BaseAddr, Offset); + decodeRanges(Inline.Ranges, Data, BaseAddr, Offset); if (Inline.Ranges.empty()) return Inline; if (!Data.isValidOffsetForDataOfSize(Offset, 1)) @@ -205,7 +205,7 @@ static llvm::Expected decode(DataExtractor &Data, uint64_t &Offset, if (HasChildren) { // Child address ranges are encoded relative to the first address in the // parent InlineInfo object. - const auto ChildBaseAddr = Inline.Ranges[0].Start; + const auto ChildBaseAddr = Inline.Ranges[0].start(); while (true) { llvm::Expected Child = decode(Data, Offset, ChildBaseAddr); if (!Child) @@ -232,7 +232,7 @@ llvm::Error InlineInfo::encode(FileWriter &O, uint64_t BaseAddr) const { if (!isValid()) return createStringError(std::errc::invalid_argument, "attempted to encode invalid InlineInfo object"); - Ranges.encode(O, BaseAddr); + encodeRanges(Ranges, O, BaseAddr); bool HasChildren = !Children.empty(); O.writeU8(HasChildren); O.writeU32(Name); @@ -242,7 +242,7 @@ llvm::Error InlineInfo::encode(FileWriter &O, uint64_t BaseAddr) const { // Child address ranges are encoded as relative to the first // address in the Ranges for this object. This keeps the offsets // small and allows for efficient encoding using ULEB offsets. - const uint64_t ChildBaseAddr = Ranges[0].Start; + const uint64_t ChildBaseAddr = Ranges[0].start(); for (const auto &Child : Children) { // Make sure all child address ranges are contained in the parent address // ranges. diff --git a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp index 8a624226b1d3..00a5b1bbfaa5 100644 --- a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp +++ b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp @@ -8,6 +8,7 @@ #include "llvm/DebugInfo/GSYM/LookupResult.h" #include "llvm/ADT/SmallString.h" +#include "llvm/DebugInfo/GSYM/ExtractRanges.h" #include "llvm/Support/Format.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -42,7 +43,7 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const SourceLocation &SL) { OS << " @ "; if (!SL.Dir.empty()) { OS << SL.Dir; - if (SL.Dir.contains('\\') and not SL.Dir.contains('/')) + if (SL.Dir.contains('\\') && !SL.Dir.contains('/')) OS << '\\'; else OS << '/'; diff --git a/llvm/lib/DebugInfo/GSYM/Range.cpp b/llvm/lib/DebugInfo/GSYM/Range.cpp deleted file mode 100644 index c1e8eccd0daa..000000000000 --- a/llvm/lib/DebugInfo/GSYM/Range.cpp +++ /dev/null @@ -1,123 +0,0 @@ -//===- Range.cpp ------------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/GSYM/Range.h" -#include "llvm/DebugInfo/GSYM/FileWriter.h" -#include "llvm/Support/DataExtractor.h" -#include -#include - -using namespace llvm; -using namespace gsym; - - -void AddressRanges::insert(AddressRange Range) { - if (Range.size() == 0) - return; - - auto It = llvm::upper_bound(Ranges, Range); - auto It2 = It; - while (It2 != Ranges.end() && It2->Start < Range.End) - ++It2; - if (It != It2) { - Range.End = std::max(Range.End, It2[-1].End); - It = Ranges.erase(It, It2); - } - if (It != Ranges.begin() && Range.Start < It[-1].End) - It[-1].End = std::max(It[-1].End, Range.End); - else - Ranges.insert(It, Range); -} - -bool AddressRanges::contains(uint64_t Addr) const { - auto It = std::partition_point( - Ranges.begin(), Ranges.end(), - [=](const AddressRange &R) { return R.Start <= Addr; }); - return It != Ranges.begin() && Addr < It[-1].End; -} - -bool AddressRanges::contains(AddressRange Range) const { - if (Range.size() == 0) - return false; - auto It = std::partition_point( - Ranges.begin(), Ranges.end(), - [=](const AddressRange &R) { return R.Start <= Range.Start; }); - if (It == Ranges.begin()) - return false; - return Range.End <= It[-1].End; -} - -Optional -AddressRanges::getRangeThatContains(uint64_t Addr) const { - auto It = std::partition_point( - Ranges.begin(), Ranges.end(), - [=](const AddressRange &R) { return R.Start <= Addr; }); - if (It != Ranges.begin() && Addr < It[-1].End) - return It[-1]; - return llvm::None; -} - -raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRange &R) { - return OS << '[' << HEX64(R.Start) << " - " << HEX64(R.End) << ")"; -} - -raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRanges &AR) { - size_t Size = AR.size(); - for (size_t I = 0; I < Size; ++I) { - if (I) - OS << ' '; - OS << AR[I]; - } - return OS; -} - -void AddressRange::encode(FileWriter &O, uint64_t BaseAddr) const { - assert(Start >= BaseAddr); - O.writeULEB(Start - BaseAddr); - O.writeULEB(size()); -} - -void AddressRange::decode(DataExtractor &Data, uint64_t BaseAddr, - uint64_t &Offset) { - const uint64_t AddrOffset = Data.getULEB128(&Offset); - const uint64_t Size = Data.getULEB128(&Offset); - const uint64_t StartAddr = BaseAddr + AddrOffset; - Start = StartAddr; - End = StartAddr + Size; -} - -void AddressRanges::encode(FileWriter &O, uint64_t BaseAddr) const { - O.writeULEB(Ranges.size()); - if (Ranges.empty()) - return; - for (auto Range : Ranges) - Range.encode(O, BaseAddr); -} - -void AddressRanges::decode(DataExtractor &Data, uint64_t BaseAddr, - uint64_t &Offset) { - clear(); - uint64_t NumRanges = Data.getULEB128(&Offset); - if (NumRanges == 0) - return; - Ranges.resize(NumRanges); - for (auto &Range : Ranges) - Range.decode(Data, BaseAddr, Offset); -} - -void AddressRange::skip(DataExtractor &Data, uint64_t &Offset) { - Data.getULEB128(&Offset); - Data.getULEB128(&Offset); -} - -uint64_t AddressRanges::skip(DataExtractor &Data, uint64_t &Offset) { - uint64_t NumRanges = Data.getULEB128(&Offset); - for (uint64_t I=0; I diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp index 4eb16804171d..1a2267334049 100644 --- a/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -9,7 +9,6 @@ #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" @@ -20,7 +19,6 @@ #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Error.h" -#include #include #include diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index 0584966a98c5..3a719bd07c8a 100644 --- a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -14,7 +14,6 @@ #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h" -#include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Object/COFF.h" #include "llvm/Support/BinaryStreamWriter.h" @@ -30,7 +29,7 @@ DbiStreamBuilder::DbiStreamBuilder(msf::MSFBuilder &Msf) PdbDllVersion(0), PdbDllRbld(0), Flags(0), MachineType(PDB_Machine::x86), Header(nullptr) {} -DbiStreamBuilder::~DbiStreamBuilder() {} +DbiStreamBuilder::~DbiStreamBuilder() = default; void DbiStreamBuilder::setVersionHeader(PdbRaw_DbiVer V) { VerHeader = V; } @@ -72,7 +71,7 @@ void DbiStreamBuilder::setPublicsStreamIndex(uint32_t Index) { } void DbiStreamBuilder::addNewFpoData(const codeview::FrameData &FD) { - if (!NewFpoData.hasValue()) + if (!NewFpoData) NewFpoData.emplace(false); NewFpoData->addFrameData(FD); @@ -286,7 +285,7 @@ Error DbiStreamBuilder::finalize() { } Error DbiStreamBuilder::finalizeMsfLayout() { - if (NewFpoData.hasValue()) { + if (NewFpoData) { DbgStreams[(int)DbgHeaderType::NewFPO].emplace(); DbgStreams[(int)DbgHeaderType::NewFPO]->Size = NewFpoData->calculateSerializedSize(); @@ -307,7 +306,7 @@ Error DbiStreamBuilder::finalizeMsfLayout() { } for (auto &S : DbgStreams) { - if (!S.hasValue()) + if (!S) continue; auto ExpectedIndex = Msf.addStream(S->Size); if (!ExpectedIndex) @@ -428,14 +427,14 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, for (auto &Stream : DbgStreams) { uint16_t StreamNumber = kInvalidStreamIndex; - if (Stream.hasValue()) + if (Stream) StreamNumber = Stream->StreamNumber; if (auto EC = Writer.writeInteger(StreamNumber)) return EC; } for (auto &Stream : DbgStreams) { - if (!Stream.hasValue()) + if (!Stream) continue; assert(Stream->StreamNumber != kInvalidStreamIndex); diff --git a/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp b/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp index 37192ba36a04..32bad9cea7ce 100644 --- a/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp @@ -8,6 +8,7 @@ #include "llvm/DebugInfo/PDB/Native/EnumTables.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" +#include "llvm/Support/ScopedPrinter.h" using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp b/llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp new file mode 100644 index 000000000000..a167d45982a9 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp @@ -0,0 +1,207 @@ +//===- FormatUtil.cpp ----------------------------------------- *- C++ --*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/FormatUtil.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +std::string llvm::pdb::typesetItemList(ArrayRef Opts, + uint32_t IndentLevel, uint32_t GroupSize, + StringRef Sep) { + std::string Result; + while (!Opts.empty()) { + ArrayRef ThisGroup; + ThisGroup = Opts.take_front(GroupSize); + Opts = Opts.drop_front(ThisGroup.size()); + Result += join(ThisGroup, Sep); + if (!Opts.empty()) { + Result += Sep; + Result += "\n"; + Result += std::string(formatv("{0}", fmt_repeat(' ', IndentLevel))); + } + } + return Result; +} + +std::string llvm::pdb::typesetStringList(uint32_t IndentLevel, + ArrayRef Strings) { + std::string Result = "["; + for (const auto &S : Strings) { + Result += std::string(formatv("\n{0}{1}", fmt_repeat(' ', IndentLevel), S)); + } + Result += "]"; + return Result; +} + +std::string llvm::pdb::formatChunkKind(DebugSubsectionKind Kind, + bool Friendly) { + if (Friendly) { + switch (Kind) { + RETURN_CASE(DebugSubsectionKind, None, "none"); + RETURN_CASE(DebugSubsectionKind, Symbols, "symbols"); + RETURN_CASE(DebugSubsectionKind, Lines, "lines"); + RETURN_CASE(DebugSubsectionKind, StringTable, "strings"); + RETURN_CASE(DebugSubsectionKind, FileChecksums, "checksums"); + RETURN_CASE(DebugSubsectionKind, FrameData, "frames"); + RETURN_CASE(DebugSubsectionKind, InlineeLines, "inlinee lines"); + RETURN_CASE(DebugSubsectionKind, CrossScopeImports, "xmi"); + RETURN_CASE(DebugSubsectionKind, CrossScopeExports, "xme"); + RETURN_CASE(DebugSubsectionKind, ILLines, "il lines"); + RETURN_CASE(DebugSubsectionKind, FuncMDTokenMap, "func md token map"); + RETURN_CASE(DebugSubsectionKind, TypeMDTokenMap, "type md token map"); + RETURN_CASE(DebugSubsectionKind, MergedAssemblyInput, + "merged assembly input"); + RETURN_CASE(DebugSubsectionKind, CoffSymbolRVA, "coff symbol rva"); + } + } else { + switch (Kind) { + RETURN_CASE(DebugSubsectionKind, None, "none"); + RETURN_CASE(DebugSubsectionKind, Symbols, "DEBUG_S_SYMBOLS"); + RETURN_CASE(DebugSubsectionKind, Lines, "DEBUG_S_LINES"); + RETURN_CASE(DebugSubsectionKind, StringTable, "DEBUG_S_STRINGTABLE"); + RETURN_CASE(DebugSubsectionKind, FileChecksums, "DEBUG_S_FILECHKSMS"); + RETURN_CASE(DebugSubsectionKind, FrameData, "DEBUG_S_FRAMEDATA"); + RETURN_CASE(DebugSubsectionKind, InlineeLines, "DEBUG_S_INLINEELINES"); + RETURN_CASE(DebugSubsectionKind, CrossScopeImports, + "DEBUG_S_CROSSSCOPEIMPORTS"); + RETURN_CASE(DebugSubsectionKind, CrossScopeExports, + "DEBUG_S_CROSSSCOPEEXPORTS"); + RETURN_CASE(DebugSubsectionKind, ILLines, "DEBUG_S_IL_LINES"); + RETURN_CASE(DebugSubsectionKind, FuncMDTokenMap, + "DEBUG_S_FUNC_MDTOKEN_MAP"); + RETURN_CASE(DebugSubsectionKind, TypeMDTokenMap, + "DEBUG_S_TYPE_MDTOKEN_MAP"); + RETURN_CASE(DebugSubsectionKind, MergedAssemblyInput, + "DEBUG_S_MERGED_ASSEMBLYINPUT"); + RETURN_CASE(DebugSubsectionKind, CoffSymbolRVA, + "DEBUG_S_COFF_SYMBOL_RVA"); + } + } + return formatUnknownEnum(Kind); +} + +std::string llvm::pdb::formatSymbolKind(SymbolKind K) { + switch (uint32_t(K)) { +#define SYMBOL_RECORD(EnumName, value, name) \ + case EnumName: \ + return #EnumName; +#define CV_SYMBOL(EnumName, value) SYMBOL_RECORD(EnumName, value, EnumName) +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" + } + return formatUnknownEnum(K); +} + +std::string llvm::pdb::formatTypeLeafKind(TypeLeafKind K) { + switch (K) { +#define TYPE_RECORD(EnumName, value, name) \ + case EnumName: \ + return #EnumName; +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" + default: + return formatv("UNKNOWN RECORD ({0:X})", + static_cast>(K)) + .str(); + } +} + +std::string llvm::pdb::formatSegmentOffset(uint16_t Segment, uint32_t Offset) { + return std::string(formatv("{0:4}:{1:4}", Segment, Offset)); +} + +#define PUSH_CHARACTERISTIC_FLAG(Enum, TheOpt, Value, Style, Descriptive) \ + PUSH_FLAG(Enum, TheOpt, Value, \ + ((Style == CharacteristicStyle::HeaderDefinition) ? #TheOpt \ + : Descriptive)) + +#define PUSH_MASKED_CHARACTERISTIC_FLAG(Enum, Mask, TheOpt, Value, Style, \ + Descriptive) \ + PUSH_MASKED_FLAG(Enum, Mask, TheOpt, Value, \ + ((Style == CharacteristicStyle::HeaderDefinition) \ + ? #TheOpt \ + : Descriptive)) + +std::string llvm::pdb::formatSectionCharacteristics(uint32_t IndentLevel, + uint32_t C, + uint32_t FlagsPerLine, + StringRef Separator, + CharacteristicStyle Style) { + using SC = COFF::SectionCharacteristics; + std::vector Opts; + if (C == COFF::SC_Invalid) + return "invalid"; + if (C == 0) + return "none"; + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_TYPE_NOLOAD, C, Style, "noload"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_TYPE_NO_PAD, C, Style, "no padding"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_CNT_CODE, C, Style, "code"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_CNT_INITIALIZED_DATA, C, Style, + "initialized data"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_CNT_UNINITIALIZED_DATA, C, Style, + "uninitialized data"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_LNK_OTHER, C, Style, "other"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_LNK_INFO, C, Style, "info"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_LNK_REMOVE, C, Style, "remove"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_LNK_COMDAT, C, Style, "comdat"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_GPREL, C, Style, "gp rel"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_PURGEABLE, C, Style, "purgeable"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_16BIT, C, Style, "16-bit"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_LOCKED, C, Style, "locked"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_PRELOAD, C, Style, "preload"); + PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_1BYTES, C, + Style, "1 byte align"); + PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_2BYTES, C, + Style, "2 byte align"); + PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_4BYTES, C, + Style, "4 byte align"); + PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_8BYTES, C, + Style, "8 byte align"); + PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_16BYTES, C, + Style, "16 byte align"); + PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_32BYTES, C, + Style, "32 byte align"); + PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_64BYTES, C, + Style, "64 byte align"); + PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_128BYTES, C, + Style, "128 byte align"); + PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_256BYTES, C, + Style, "256 byte align"); + PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_512BYTES, C, + Style, "512 byte align"); + PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_1024BYTES, C, + Style, "1024 byte align"); + PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_2048BYTES, C, + Style, "2048 byte align"); + PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_4096BYTES, C, + Style, "4096 byte align"); + PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_8192BYTES, C, + Style, "8192 byte align"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_LNK_NRELOC_OVFL, C, Style, + "noreloc overflow"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_DISCARDABLE, C, Style, + "discardable"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_NOT_CACHED, C, Style, + "not cached"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_NOT_PAGED, C, Style, "not paged"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_SHARED, C, Style, "shared"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_EXECUTE, C, Style, + "execute permissions"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_READ, C, Style, + "read permissions"); + PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_WRITE, C, Style, + "write permissions"); + return typesetItemList(Opts, IndentLevel, FlagsPerLine, Separator); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp index 9084e689d165..262873c6e6ab 100644 --- a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp @@ -14,7 +14,7 @@ #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h" #include "llvm/DebugInfo/CodeView/RecordName.h" -#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/SymbolSerializer.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" @@ -22,6 +22,7 @@ #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" #include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/BinaryItemStream.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Parallel.h" @@ -196,7 +197,7 @@ void GSIStreamBuilder::finalizeGlobalBuckets(uint32_t RecordZeroOffset) { void GSIHashStreamBuilder::finalizeBuckets( uint32_t RecordZeroOffset, MutableArrayRef Records) { // Hash every name in parallel. - parallelForEachN(0, Records.size(), [&](size_t I) { + parallelFor(0, Records.size(), [&](size_t I) { Records[I].setBucketIdx(hashStringV1(Records[I].Name) % IPHR_HASH); }); @@ -231,7 +232,7 @@ void GSIHashStreamBuilder::finalizeBuckets( // bucket can properly early-out when it detects the record won't be found. // The algorithm used here corresponds to the function // caseInsensitiveComparePchPchCchCch in the reference implementation. - parallelForEachN(0, IPHR_HASH, [&](size_t I) { + parallelFor(0, IPHR_HASH, [&](size_t I) { auto B = HashRecords.begin() + BucketStarts[I]; auto E = HashRecords.begin() + BucketCursors[I]; if (B == E) @@ -286,7 +287,7 @@ GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf) : Msf(Msf), PSH(std::make_unique()), GSH(std::make_unique()) {} -GSIStreamBuilder::~GSIStreamBuilder() {} +GSIStreamBuilder::~GSIStreamBuilder() = default; uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const { uint32_t Size = 0; diff --git a/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp index f27d60f46815..7217fe38be55 100644 --- a/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp @@ -21,6 +21,7 @@ #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" #include "llvm/DebugInfo/CodeView/RecordName.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/Hash.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" @@ -141,14 +142,12 @@ readGSIHashBuckets(FixedStreamArray &HashBuckets, return joinErrors(std::move(EC), make_error(raw_error_code::corrupt_file, "Could not read a bitmap.")); - uint32_t NumBuckets1 = 0; uint32_t CompressedBucketIdx = 0; for (uint32_t I = 0; I <= IPHR_HASH; ++I) { uint8_t WordIdx = I / 32; uint8_t BitIdx = I % 32; bool IsSet = HashBitmap[WordIdx] & (1U << BitIdx); if (IsSet) { - ++NumBuckets1; BucketMap[I] = CompressedBucketIdx++; } else { BucketMap[I] = -1; diff --git a/llvm/lib/DebugInfo/PDB/Native/HashTable.cpp b/llvm/lib/DebugInfo/PDB/Native/HashTable.cpp index dfdcdf1f4eaf..030a59821914 100644 --- a/llvm/lib/DebugInfo/PDB/Native/HashTable.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/HashTable.cpp @@ -7,14 +7,11 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/HashTable.h" -#include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Error.h" #include "llvm/Support/MathExtras.h" -#include -#include #include #include diff --git a/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp b/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp index f41bb32d69af..927a0ffee28c 100644 --- a/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/InfoStream.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" @@ -16,7 +14,7 @@ using namespace llvm; using namespace llvm::codeview; -using namespace llvm::msf; +// using namespace llvm::msf; using namespace llvm::pdb; InfoStream::InfoStream(std::unique_ptr Stream) diff --git a/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp index 42daa7cae799..e8f5a451b08e 100644 --- a/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp @@ -10,11 +10,9 @@ #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" -#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" -#include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" using namespace llvm; diff --git a/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp b/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp index 3f4101db7b93..f1e8adeb1b21 100644 --- a/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp @@ -9,7 +9,7 @@ #include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/HashTable.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" diff --git a/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp new file mode 100644 index 000000000000..495b25077737 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp @@ -0,0 +1,587 @@ +//===- InputFile.cpp ------------------------------------------ *- C++ --*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/InputFile.h" + +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" +#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/DbiStream.h" +#include "llvm/DebugInfo/PDB/Native/FormatUtil.h" +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/DebugInfo/PDB/PDB.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::object; +using namespace llvm::pdb; + +InputFile::InputFile() = default; +InputFile::~InputFile() = default; + +Expected +llvm::pdb::getModuleDebugStream(PDBFile &File, StringRef &ModuleName, + uint32_t Index) { + Expected DbiOrErr = File.getPDBDbiStream(); + if (!DbiOrErr) + return DbiOrErr.takeError(); + DbiStream &Dbi = *DbiOrErr; + const auto &Modules = Dbi.modules(); + if (Index >= Modules.getModuleCount()) + return make_error(raw_error_code::index_out_of_bounds, + "Invalid module index"); + + auto Modi = Modules.getModuleDescriptor(Index); + + ModuleName = Modi.getModuleName(); + + uint16_t ModiStream = Modi.getModuleStreamIndex(); + if (ModiStream == kInvalidStreamIndex) + return make_error(raw_error_code::no_stream, + "Module stream not present"); + + auto ModStreamData = File.createIndexedStream(ModiStream); + + ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); + if (auto EC = ModS.reload()) + return make_error(raw_error_code::corrupt_file, + "Invalid module stream"); + + return std::move(ModS); +} + +Expected llvm::pdb::getModuleDebugStream(PDBFile &File, + uint32_t Index) { + Expected DbiOrErr = File.getPDBDbiStream(); + if (!DbiOrErr) + return DbiOrErr.takeError(); + DbiStream &Dbi = *DbiOrErr; + const auto &Modules = Dbi.modules(); + auto Modi = Modules.getModuleDescriptor(Index); + + uint16_t ModiStream = Modi.getModuleStreamIndex(); + if (ModiStream == kInvalidStreamIndex) + return make_error(raw_error_code::no_stream, + "Module stream not present"); + + auto ModStreamData = File.createIndexedStream(ModiStream); + + ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); + if (Error Err = ModS.reload()) + return make_error(raw_error_code::corrupt_file, + "Invalid module stream"); + + return std::move(ModS); +} + +static inline bool isCodeViewDebugSubsection(object::SectionRef Section, + StringRef Name, + BinaryStreamReader &Reader) { + if (Expected NameOrErr = Section.getName()) { + if (*NameOrErr != Name) + return false; + } else { + consumeError(NameOrErr.takeError()); + return false; + } + + Expected ContentsOrErr = Section.getContents(); + if (!ContentsOrErr) { + consumeError(ContentsOrErr.takeError()); + return false; + } + + Reader = BinaryStreamReader(*ContentsOrErr, support::little); + uint32_t Magic; + if (Reader.bytesRemaining() < sizeof(uint32_t)) + return false; + cantFail(Reader.readInteger(Magic)); + if (Magic != COFF::DEBUG_SECTION_MAGIC) + return false; + return true; +} + +static inline bool isDebugSSection(object::SectionRef Section, + DebugSubsectionArray &Subsections) { + BinaryStreamReader Reader; + if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader)) + return false; + + cantFail(Reader.readArray(Subsections, Reader.bytesRemaining())); + return true; +} + +static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) { + BinaryStreamReader Reader; + if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) && + !isCodeViewDebugSubsection(Section, ".debug$P", Reader)) + return false; + cantFail(Reader.readArray(Types, Reader.bytesRemaining())); + return true; +} + +static std::string formatChecksumKind(FileChecksumKind Kind) { + switch (Kind) { + RETURN_CASE(FileChecksumKind, None, "None"); + RETURN_CASE(FileChecksumKind, MD5, "MD5"); + RETURN_CASE(FileChecksumKind, SHA1, "SHA-1"); + RETURN_CASE(FileChecksumKind, SHA256, "SHA-256"); + } + return formatUnknownEnum(Kind); +} + +template +static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) { + if (Append) + Printer.format(std::forward(args)...); + else + Printer.formatLine(std::forward(args)...); +} + +SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) { + if (!File) + return; + + if (File->isPdb()) + initializeForPdb(GroupIndex); + else { + Name = ".debug$S"; + uint32_t I = 0; + for (const auto &S : File->obj().sections()) { + DebugSubsectionArray SS; + if (!isDebugSSection(S, SS)) + continue; + + if (!SC.hasChecksums() || !SC.hasStrings()) + SC.initialize(SS); + + if (I == GroupIndex) + Subsections = SS; + + if (SC.hasChecksums() && SC.hasStrings()) + break; + } + rebuildChecksumMap(); + } +} + +StringRef SymbolGroup::name() const { return Name; } + +void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) { + Subsections = SS; +} + +void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); } + +void SymbolGroup::initializeForPdb(uint32_t Modi) { + assert(File && File->isPdb()); + + // PDB always uses the same string table, but each module has its own + // checksums. So we only set the strings if they're not already set. + if (!SC.hasStrings()) { + auto StringTable = File->pdb().getStringTable(); + if (StringTable) + SC.setStrings(StringTable->getStringTable()); + else + consumeError(StringTable.takeError()); + } + + SC.resetChecksums(); + auto MDS = getModuleDebugStream(File->pdb(), Name, Modi); + if (!MDS) { + consumeError(MDS.takeError()); + return; + } + + DebugStream = std::make_shared(std::move(*MDS)); + Subsections = DebugStream->getSubsectionsArray(); + SC.initialize(Subsections); + rebuildChecksumMap(); +} + +void SymbolGroup::rebuildChecksumMap() { + if (!SC.hasChecksums()) + return; + + for (const auto &Entry : SC.checksums()) { + auto S = SC.strings().getString(Entry.FileNameOffset); + if (!S) + continue; + ChecksumsByFile[*S] = Entry; + } +} + +const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const { + assert(File && File->isPdb() && DebugStream); + return *DebugStream; +} + +Expected SymbolGroup::getNameFromStringTable(uint32_t Offset) const { + return SC.strings().getString(Offset); +} + +Expected SymbolGroup::getNameFromChecksums(uint32_t Offset) const { + StringRef Name; + if (!SC.hasChecksums()) { + return std::move(Name); + } + + auto Iter = SC.checksums().getArray().at(Offset); + if (Iter == SC.checksums().getArray().end()) { + return std::move(Name); + } + + uint32_t FO = Iter->FileNameOffset; + auto ExpectedFile = getNameFromStringTable(FO); + if (!ExpectedFile) { + return std::move(Name); + } + + return *ExpectedFile; +} + +void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File, + bool Append) const { + auto FC = ChecksumsByFile.find(File); + if (FC == ChecksumsByFile.end()) { + formatInternal(Printer, Append, "- (no checksum) {0}", File); + return; + } + + formatInternal(Printer, Append, "- ({0}: {1}) {2}", + formatChecksumKind(FC->getValue().Kind), + toHex(FC->getValue().Checksum), File); +} + +void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer, + uint32_t Offset, + bool Append) const { + if (!SC.hasChecksums()) { + formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); + return; + } + + auto Iter = SC.checksums().getArray().at(Offset); + if (Iter == SC.checksums().getArray().end()) { + formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); + return; + } + + uint32_t FO = Iter->FileNameOffset; + auto ExpectedFile = getNameFromStringTable(FO); + if (!ExpectedFile) { + formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); + consumeError(ExpectedFile.takeError()); + return; + } + if (Iter->Kind == FileChecksumKind::None) { + formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile); + } else { + formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile, + formatChecksumKind(Iter->Kind), toHex(Iter->Checksum)); + } +} + +Expected InputFile::open(StringRef Path, bool AllowUnknownFile) { + InputFile IF; + if (!llvm::sys::fs::exists(Path)) + return make_error(formatv("File {0} not found", Path), + inconvertibleErrorCode()); + + file_magic Magic; + if (auto EC = identify_magic(Path, Magic)) + return make_error( + formatv("Unable to identify file type for file {0}", Path), EC); + + if (Magic == file_magic::coff_object) { + Expected> BinaryOrErr = createBinary(Path); + if (!BinaryOrErr) + return BinaryOrErr.takeError(); + + IF.CoffObject = std::move(*BinaryOrErr); + IF.PdbOrObj = llvm::cast(IF.CoffObject.getBinary()); + return std::move(IF); + } + + if (Magic == file_magic::pdb) { + std::unique_ptr Session; + if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session)) + return std::move(Err); + + IF.PdbSession.reset(static_cast(Session.release())); + IF.PdbOrObj = &IF.PdbSession->getPDBFile(); + + return std::move(IF); + } + + if (!AllowUnknownFile) + return make_error( + formatv("File {0} is not a supported file type", Path), + inconvertibleErrorCode()); + + auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false, + /*RequiresNullTerminator=*/false); + if (!Result) + return make_error( + formatv("File {0} could not be opened", Path), Result.getError()); + + IF.UnknownFile = std::move(*Result); + IF.PdbOrObj = IF.UnknownFile.get(); + return std::move(IF); +} + +PDBFile &InputFile::pdb() { + assert(isPdb()); + return *PdbOrObj.get(); +} + +const PDBFile &InputFile::pdb() const { + assert(isPdb()); + return *PdbOrObj.get(); +} + +object::COFFObjectFile &InputFile::obj() { + assert(isObj()); + return *PdbOrObj.get(); +} + +const object::COFFObjectFile &InputFile::obj() const { + assert(isObj()); + return *PdbOrObj.get(); +} + +MemoryBuffer &InputFile::unknown() { + assert(isUnknown()); + return *PdbOrObj.get(); +} + +const MemoryBuffer &InputFile::unknown() const { + assert(isUnknown()); + return *PdbOrObj.get(); +} + +StringRef InputFile::getFilePath() const { + if (isPdb()) + return pdb().getFilePath(); + if (isObj()) + return obj().getFileName(); + assert(isUnknown()); + return unknown().getBufferIdentifier(); +} + +bool InputFile::hasTypes() const { + if (isPdb()) + return pdb().hasPDBTpiStream(); + + for (const auto &Section : obj().sections()) { + CVTypeArray Types; + if (isDebugTSection(Section, Types)) + return true; + } + return false; +} + +bool InputFile::hasIds() const { + if (isObj()) + return false; + return pdb().hasPDBIpiStream(); +} + +bool InputFile::isPdb() const { return PdbOrObj.is(); } + +bool InputFile::isObj() const { + return PdbOrObj.is(); +} + +bool InputFile::isUnknown() const { return PdbOrObj.is(); } + +codeview::LazyRandomTypeCollection & +InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { + if (Types && Kind == kTypes) + return *Types; + if (Ids && Kind == kIds) + return *Ids; + + if (Kind == kIds) { + assert(isPdb() && pdb().hasPDBIpiStream()); + } + + // If the collection was already initialized, we should have just returned it + // in step 1. + if (isPdb()) { + TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types; + auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream() + : pdb().getPDBTpiStream()); + + auto &Array = Stream.typeArray(); + uint32_t Count = Stream.getNumTypeRecords(); + auto Offsets = Stream.getTypeIndexOffsets(); + Collection = + std::make_unique(Array, Count, Offsets); + return *Collection; + } + + assert(isObj()); + assert(Kind == kTypes); + assert(!Types); + + for (const auto &Section : obj().sections()) { + CVTypeArray Records; + if (!isDebugTSection(Section, Records)) + continue; + + Types = std::make_unique(Records, 100); + return *Types; + } + + Types = std::make_unique(100); + return *Types; +} + +codeview::LazyRandomTypeCollection &InputFile::types() { + return getOrCreateTypeCollection(kTypes); +} + +codeview::LazyRandomTypeCollection &InputFile::ids() { + // Object files have only one type stream that contains both types and ids. + // Similarly, some PDBs don't contain an IPI stream, and for those both types + // and IDs are in the same stream. + if (isObj() || !pdb().hasPDBIpiStream()) + return types(); + + return getOrCreateTypeCollection(kIds); +} + +iterator_range InputFile::symbol_groups() { + return make_range(symbol_groups_begin(), + symbol_groups_end()); +} + +SymbolGroupIterator InputFile::symbol_groups_begin() { + return SymbolGroupIterator(*this); +} + +SymbolGroupIterator InputFile::symbol_groups_end() { + return SymbolGroupIterator(); +} + +SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {} + +SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) { + if (File.isObj()) { + SectionIter = File.obj().section_begin(); + scanToNextDebugS(); + } +} + +bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const { + bool E = isEnd(); + bool RE = R.isEnd(); + if (E || RE) + return E == RE; + + if (Value.File != R.Value.File) + return false; + return Index == R.Index; +} + +const SymbolGroup &SymbolGroupIterator::operator*() const { + assert(!isEnd()); + return Value; +} +SymbolGroup &SymbolGroupIterator::operator*() { + assert(!isEnd()); + return Value; +} + +SymbolGroupIterator &SymbolGroupIterator::operator++() { + assert(Value.File && !isEnd()); + ++Index; + if (isEnd()) + return *this; + + if (Value.File->isPdb()) { + Value.updatePdbModi(Index); + return *this; + } + + scanToNextDebugS(); + return *this; +} + +void SymbolGroupIterator::scanToNextDebugS() { + assert(SectionIter); + auto End = Value.File->obj().section_end(); + auto &Iter = *SectionIter; + assert(!isEnd()); + + while (++Iter != End) { + DebugSubsectionArray SS; + SectionRef SR = *Iter; + if (!isDebugSSection(SR, SS)) + continue; + + Value.updateDebugS(SS); + return; + } +} + +bool SymbolGroupIterator::isEnd() const { + if (!Value.File) + return true; + if (Value.File->isPdb()) { + DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream()); + uint32_t Count = Dbi.modules().getModuleCount(); + assert(Index <= Count); + return Index == Count; + } + + assert(SectionIter); + return *SectionIter == Value.File->obj().section_end(); +} + +static bool isMyCode(const SymbolGroup &Group) { + if (Group.getFile().isObj()) + return true; + + StringRef Name = Group.name(); + if (Name.startswith("Import:")) + return false; + if (Name.endswith_insensitive(".dll")) + return false; + if (Name.equals_insensitive("* linker *")) + return false; + if (Name.startswith_insensitive("f:\\binaries\\Intermediate\\vctools")) + return false; + if (Name.startswith_insensitive("f:\\dd\\vctools\\crt")) + return false; + return true; +} + +bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group, + const FilterOptions &Filters) { + if (Filters.JustMyCode && !isMyCode(Group)) + return false; + + // If the arg was not specified on the command line, always dump all modules. + if (!Filters.DumpModi) + return true; + + // Otherwise, only dump if this is the same module specified. + return (Filters.DumpModi == Idx); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/LinePrinter.cpp b/llvm/lib/DebugInfo/PDB/Native/LinePrinter.cpp new file mode 100644 index 000000000000..c12fedc23833 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/LinePrinter.cpp @@ -0,0 +1,340 @@ +//===- LinePrinter.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" +#include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" +#include "llvm/DebugInfo/PDB/Native/InputFile.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/UDTLayout.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Regex.h" + +#include + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::pdb; + +namespace { +bool IsItemExcluded(llvm::StringRef Item, + std::list &IncludeFilters, + std::list &ExcludeFilters) { + if (Item.empty()) + return false; + + auto match_pred = [Item](llvm::Regex &R) { return R.match(Item); }; + + // Include takes priority over exclude. If the user specified include + // filters, and none of them include this item, them item is gone. + if (!IncludeFilters.empty() && !any_of(IncludeFilters, match_pred)) + return true; + + if (any_of(ExcludeFilters, match_pred)) + return true; + + return false; +} +} // namespace + +using namespace llvm; + +LinePrinter::LinePrinter(int Indent, bool UseColor, llvm::raw_ostream &Stream, + const FilterOptions &Filters) + : OS(Stream), IndentSpaces(Indent), CurrentIndent(0), UseColor(UseColor), + Filters(Filters) { + SetFilters(ExcludeTypeFilters, Filters.ExcludeTypes.begin(), + Filters.ExcludeTypes.end()); + SetFilters(ExcludeSymbolFilters, Filters.ExcludeSymbols.begin(), + Filters.ExcludeSymbols.end()); + SetFilters(ExcludeCompilandFilters, Filters.ExcludeCompilands.begin(), + Filters.ExcludeCompilands.end()); + + SetFilters(IncludeTypeFilters, Filters.IncludeTypes.begin(), + Filters.IncludeTypes.end()); + SetFilters(IncludeSymbolFilters, Filters.IncludeSymbols.begin(), + Filters.IncludeSymbols.end()); + SetFilters(IncludeCompilandFilters, Filters.IncludeCompilands.begin(), + Filters.IncludeCompilands.end()); +} + +void LinePrinter::Indent(uint32_t Amount) { + if (Amount == 0) + Amount = IndentSpaces; + CurrentIndent += Amount; +} + +void LinePrinter::Unindent(uint32_t Amount) { + if (Amount == 0) + Amount = IndentSpaces; + CurrentIndent = std::max(0, CurrentIndent - Amount); +} + +void LinePrinter::NewLine() { + OS << "\n"; + OS.indent(CurrentIndent); +} + +void LinePrinter::print(const Twine &T) { OS << T; } + +void LinePrinter::printLine(const Twine &T) { + NewLine(); + OS << T; +} + +bool LinePrinter::IsClassExcluded(const ClassLayout &Class) { + if (IsTypeExcluded(Class.getName(), Class.getSize())) + return true; + if (Class.deepPaddingSize() < Filters.PaddingThreshold) + return true; + return false; +} + +void LinePrinter::formatBinary(StringRef Label, ArrayRef Data, + uint64_t StartOffset) { + NewLine(); + OS << Label << " ("; + if (!Data.empty()) { + OS << "\n"; + OS << format_bytes_with_ascii(Data, StartOffset, 32, 4, + CurrentIndent + IndentSpaces, true); + NewLine(); + } + OS << ")"; +} + +void LinePrinter::formatBinary(StringRef Label, ArrayRef Data, + uint64_t Base, uint64_t StartOffset) { + NewLine(); + OS << Label << " ("; + if (!Data.empty()) { + OS << "\n"; + Base += StartOffset; + OS << format_bytes_with_ascii(Data, Base, 32, 4, + CurrentIndent + IndentSpaces, true); + NewLine(); + } + OS << ")"; +} + +namespace { +struct Run { + Run() = default; + explicit Run(uint32_t Block) : Block(Block) {} + uint32_t Block = 0; + uint64_t ByteLen = 0; +}; +} // namespace + +static std::vector computeBlockRuns(uint32_t BlockSize, + const msf::MSFStreamLayout &Layout) { + std::vector Runs; + if (Layout.Length == 0) + return Runs; + + ArrayRef Blocks = Layout.Blocks; + assert(!Blocks.empty()); + uint64_t StreamBytesRemaining = Layout.Length; + uint32_t CurrentBlock = Blocks[0]; + Runs.emplace_back(CurrentBlock); + while (!Blocks.empty()) { + Run *CurrentRun = &Runs.back(); + uint32_t NextBlock = Blocks.front(); + if (NextBlock < CurrentBlock || (NextBlock - CurrentBlock > 1)) { + Runs.emplace_back(NextBlock); + CurrentRun = &Runs.back(); + } + uint64_t Used = + std::min(static_cast(BlockSize), StreamBytesRemaining); + CurrentRun->ByteLen += Used; + StreamBytesRemaining -= Used; + CurrentBlock = NextBlock; + Blocks = Blocks.drop_front(); + } + return Runs; +} + +static std::pair findRun(uint64_t Offset, ArrayRef Runs) { + for (const auto &R : Runs) { + if (Offset < R.ByteLen) + return std::make_pair(R, Offset); + Offset -= R.ByteLen; + } + llvm_unreachable("Invalid offset!"); +} + +void LinePrinter::formatMsfStreamData(StringRef Label, PDBFile &File, + uint32_t StreamIdx, + StringRef StreamPurpose, uint64_t Offset, + uint64_t Size) { + if (StreamIdx >= File.getNumStreams()) { + formatLine("Stream {0}: Not present", StreamIdx); + return; + } + if (Size + Offset > File.getStreamByteSize(StreamIdx)) { + formatLine( + "Stream {0}: Invalid offset and size, range out of stream bounds", + StreamIdx); + return; + } + + auto S = File.createIndexedStream(StreamIdx); + if (!S) { + NewLine(); + formatLine("Stream {0}: Not present", StreamIdx); + return; + } + + uint64_t End = + (Size == 0) ? S->getLength() : std::min(Offset + Size, S->getLength()); + Size = End - Offset; + + formatLine("Stream {0}: {1} (dumping {2:N} / {3:N} bytes)", StreamIdx, + StreamPurpose, Size, S->getLength()); + AutoIndent Indent(*this); + BinaryStreamRef Slice(*S); + BinarySubstreamRef Substream; + Substream.Offset = Offset; + Substream.StreamData = Slice.drop_front(Offset).keep_front(Size); + + auto Layout = File.getStreamLayout(StreamIdx); + formatMsfStreamData(Label, File, Layout, Substream); +} + +void LinePrinter::formatMsfStreamData(StringRef Label, PDBFile &File, + const msf::MSFStreamLayout &Stream, + BinarySubstreamRef Substream) { + BinaryStreamReader Reader(Substream.StreamData); + + auto Runs = computeBlockRuns(File.getBlockSize(), Stream); + + NewLine(); + OS << Label << " ("; + while (Reader.bytesRemaining() > 0) { + OS << "\n"; + + Run FoundRun; + uint64_t RunOffset; + std::tie(FoundRun, RunOffset) = findRun(Substream.Offset, Runs); + assert(FoundRun.ByteLen >= RunOffset); + uint64_t Len = FoundRun.ByteLen - RunOffset; + Len = std::min(Len, Reader.bytesRemaining()); + uint64_t Base = FoundRun.Block * File.getBlockSize() + RunOffset; + ArrayRef Data; + consumeError(Reader.readBytes(Data, Len)); + OS << format_bytes_with_ascii(Data, Base, 32, 4, + CurrentIndent + IndentSpaces, true); + if (Reader.bytesRemaining() > 0) { + NewLine(); + OS << formatv(" {0}", + fmt_align("", AlignStyle::Center, 114, '-')); + } + Substream.Offset += Len; + } + NewLine(); + OS << ")"; +} + +void LinePrinter::formatMsfStreamBlocks( + PDBFile &File, const msf::MSFStreamLayout &StreamLayout) { + auto Blocks = makeArrayRef(StreamLayout.Blocks); + uint64_t L = StreamLayout.Length; + + while (L > 0) { + NewLine(); + assert(!Blocks.empty()); + OS << formatv("Block {0} (\n", uint32_t(Blocks.front())); + uint64_t UsedBytes = + std::min(L, static_cast(File.getBlockSize())); + ArrayRef BlockData = + cantFail(File.getBlockData(Blocks.front(), File.getBlockSize())); + uint64_t BaseOffset = Blocks.front(); + BaseOffset *= File.getBlockSize(); + OS << format_bytes_with_ascii(BlockData, BaseOffset, 32, 4, + CurrentIndent + IndentSpaces, true); + NewLine(); + OS << ")"; + NewLine(); + L -= UsedBytes; + Blocks = Blocks.drop_front(); + } +} + +bool LinePrinter::IsTypeExcluded(llvm::StringRef TypeName, uint64_t Size) { + if (IsItemExcluded(TypeName, IncludeTypeFilters, ExcludeTypeFilters)) + return true; + if (Size < Filters.SizeThreshold) + return true; + return false; +} + +bool LinePrinter::IsSymbolExcluded(llvm::StringRef SymbolName) { + return IsItemExcluded(SymbolName, IncludeSymbolFilters, ExcludeSymbolFilters); +} + +bool LinePrinter::IsCompilandExcluded(llvm::StringRef CompilandName) { + return IsItemExcluded(CompilandName, IncludeCompilandFilters, + ExcludeCompilandFilters); +} + +WithColor::WithColor(LinePrinter &P, PDB_ColorItem C) + : OS(P.OS), UseColor(P.hasColor()) { + if (UseColor) + applyColor(C); +} + +WithColor::~WithColor() { + if (UseColor) + OS.resetColor(); +} + +void WithColor::applyColor(PDB_ColorItem C) { + switch (C) { + case PDB_ColorItem::None: + OS.resetColor(); + return; + case PDB_ColorItem::Comment: + OS.changeColor(raw_ostream::GREEN, false); + return; + case PDB_ColorItem::Address: + OS.changeColor(raw_ostream::YELLOW, /*bold=*/true); + return; + case PDB_ColorItem::Keyword: + OS.changeColor(raw_ostream::MAGENTA, true); + return; + case PDB_ColorItem::Register: + case PDB_ColorItem::Offset: + OS.changeColor(raw_ostream::YELLOW, false); + return; + case PDB_ColorItem::Type: + OS.changeColor(raw_ostream::CYAN, true); + return; + case PDB_ColorItem::Identifier: + OS.changeColor(raw_ostream::CYAN, false); + return; + case PDB_ColorItem::Path: + OS.changeColor(raw_ostream::CYAN, false); + return; + case PDB_ColorItem::Padding: + case PDB_ColorItem::SectionHeader: + OS.changeColor(raw_ostream::RED, true); + return; + case PDB_ColorItem::LiteralValue: + OS.changeColor(raw_ostream::GREEN, true); + return; + } +} diff --git a/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp index 1445f0bd9e1b..f0e96a7cd659 100644 --- a/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp @@ -10,16 +10,17 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" -#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h" +#include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Error.h" -#include #include using namespace llvm; diff --git a/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp index 1d873b87b347..500923e57fbb 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp @@ -7,21 +7,19 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" +#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/PDB/Native/Hash.h" #include "llvm/DebugInfo/PDB/Native/HashTable.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include #include #include -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp index 7717f062eac1..d24364312b31 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp @@ -9,8 +9,6 @@ #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" -#include "llvm/ADT/STLExtras.h" - namespace llvm { namespace pdb { diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp index 54646867bc5f..b861fc2435b8 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp @@ -8,13 +8,15 @@ #include "llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h" -#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/SymbolCache.h" #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" using namespace llvm; using namespace llvm::codeview; diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp index 5e6412275063..65e253ed115f 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp @@ -8,9 +8,11 @@ #include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h" -#include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/HashTable.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" namespace llvm { namespace pdb { diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp index 1e4b07646335..b912bf77e579 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp @@ -8,13 +8,11 @@ #include "llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" -#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/Native/NativeLineNumber.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" -#include "llvm/DebugInfo/PDB/Native/NativeSourceFile.h" + +#include using namespace llvm; using namespace llvm::codeview; diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp index c6621924b516..7108b8efff83 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp @@ -8,13 +8,10 @@ #include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h" -#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" -#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" -#include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/SymbolCache.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h" -#include "llvm/DebugInfo/PDB/PDBSymbolExe.h" namespace llvm { namespace pdb { diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp index feede1dbc958..24fe2244cfc5 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp @@ -8,11 +8,11 @@ #include "llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" -#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h" +#include "llvm/DebugInfo/PDB/Native/SymbolCache.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" using namespace llvm; using namespace llvm::codeview; diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp index 2524e10cb6c5..6912b8dc838e 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp @@ -8,13 +8,16 @@ #include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h" -#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" -#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h" +#include "llvm/DebugInfo/PDB/Native/SymbolCache.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" using namespace llvm; using namespace llvm::codeview; diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp index 895f8943157a..ae0f66c31fde 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp @@ -8,14 +8,14 @@ #include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" -#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/SymbolCache.h" -#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h" using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp index 7f3b35c297b4..b1caa5add5b3 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp @@ -8,11 +8,15 @@ #include "llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h" -#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h" -#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/SymbolCache.h" +#include "llvm/DebugInfo/PDB/PDBExtras.h" using namespace llvm; using namespace llvm::codeview; @@ -25,7 +29,7 @@ NativeFunctionSymbol::NativeFunctionSymbol(NativeSession &Session, : NativeRawSymbol(Session, PDB_SymType::Function, Id), Sym(Sym), RecordOffset(Offset) {} -NativeFunctionSymbol::~NativeFunctionSymbol() {} +NativeFunctionSymbol::~NativeFunctionSymbol() = default; void NativeFunctionSymbol::dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields, diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp index 8314353c3890..99ec627fcd26 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp @@ -12,8 +12,14 @@ #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" +#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h" +#include "llvm/DebugInfo/PDB/Native/NativeLineNumber.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/SymbolCache.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/DebugInfo/PDB/PDBExtras.h" using namespace llvm; using namespace llvm::codeview; @@ -25,7 +31,7 @@ NativeInlineSiteSymbol::NativeInlineSiteSymbol( : NativeRawSymbol(Session, PDB_SymType::InlineSite, Id), Sym(Sym), ParentAddr(ParentAddr) {} -NativeInlineSiteSymbol::~NativeInlineSiteSymbol() {} +NativeInlineSiteSymbol::~NativeInlineSiteSymbol() = default; void NativeInlineSiteSymbol::dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields, @@ -98,29 +104,81 @@ void NativeInlineSiteSymbol::getLineOffset(uint32_t OffsetInFunc, LineOffset = 0; FileOffset = 0; uint32_t CodeOffset = 0; + Optional CodeOffsetBase; + Optional CodeOffsetEnd; + Optional CurLineOffset; + Optional NextLineOffset; + Optional NextFileOffset; + auto UpdateCodeOffset = [&](uint32_t Delta) { + if (!CodeOffsetBase) + CodeOffsetBase = CodeOffset; + else if (!CodeOffsetEnd) + CodeOffsetEnd = *CodeOffsetBase + Delta; + }; + auto UpdateLineOffset = [&](int32_t Delta) { + LineOffset += Delta; + if (!CodeOffsetBase || !CurLineOffset) + CurLineOffset = LineOffset; + else + NextLineOffset = LineOffset; + }; + auto UpdateFileOffset = [&](uint32_t Offset) { + if (!CodeOffsetBase) + FileOffset = Offset; + else + NextFileOffset = Offset; + }; + auto ValidateAndReset = [&]() { + // Current range is finished. Check if OffsetInFunc is in the range. + if (CodeOffsetBase && CodeOffsetEnd && CurLineOffset) { + if (CodeOffsetBase <= OffsetInFunc && OffsetInFunc < CodeOffsetEnd) { + LineOffset = *CurLineOffset; + return true; + } + // Set base, end, file offset and line offset for next range. + if (NextFileOffset) + FileOffset = *NextFileOffset; + if (NextLineOffset) { + CurLineOffset = NextLineOffset; + NextLineOffset = None; + } + CodeOffsetBase = CodeOffsetEnd; + CodeOffsetEnd = NextFileOffset = None; + } + return false; + }; for (const auto &Annot : Sym.annotations()) { switch (Annot.OpCode) { case BinaryAnnotationsOpCode::CodeOffset: case BinaryAnnotationsOpCode::ChangeCodeOffset: - case BinaryAnnotationsOpCode::ChangeCodeLength: + case BinaryAnnotationsOpCode::ChangeCodeOffsetBase: CodeOffset += Annot.U1; + UpdateCodeOffset(Annot.U1); + break; + case BinaryAnnotationsOpCode::ChangeCodeLength: + UpdateCodeOffset(Annot.U1); break; case BinaryAnnotationsOpCode::ChangeCodeLengthAndCodeOffset: CodeOffset += Annot.U2; + UpdateCodeOffset(Annot.U2); + UpdateCodeOffset(Annot.U1); break; case BinaryAnnotationsOpCode::ChangeLineOffset: + UpdateLineOffset(Annot.S1); + break; case BinaryAnnotationsOpCode::ChangeCodeOffsetAndLineOffset: CodeOffset += Annot.U1; - LineOffset += Annot.S1; + UpdateCodeOffset(Annot.U1); + UpdateLineOffset(Annot.S1); break; case BinaryAnnotationsOpCode::ChangeFile: - FileOffset = Annot.U1; + UpdateFileOffset(Annot.U1); break; default: break; } - if (CodeOffset >= OffsetInFunc) + if (ValidateAndReset()) return; } } diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp index 155ed0cdb828..aa7d6ac6f29d 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/NativeLineNumber.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp index 1265e688b867..339af6108009 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp @@ -9,8 +9,7 @@ #include "llvm/DebugInfo/PDB/Native/NativePublicSymbol.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h" -#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" using namespace llvm; using namespace llvm::codeview; @@ -20,7 +19,7 @@ NativePublicSymbol::NativePublicSymbol(NativeSession &Session, SymIndexId Id, const codeview::PublicSym32 &Sym) : NativeRawSymbol(Session, PDB_SymType::PublicSymbol, Id), Sym(Sym) {} -NativePublicSymbol::~NativePublicSymbol() {} +NativePublicSymbol::~NativePublicSymbol() = default; void NativePublicSymbol::dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields, diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp index 2ad552470b61..89f9f9836fec 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp @@ -10,7 +10,6 @@ #include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" -#include "llvm/Support/FormatVariadic.h" using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp index 7212a0e65035..cf314c3bede3 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -8,31 +8,33 @@ #include "llvm/DebugInfo/PDB/Native/NativeSession.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBSourceFile.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h" -#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" +#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h" -#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h" #include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h" -#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h" -#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/DebugInfo/PDB/Native/SymbolCache.h" -#include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h" #include "llvm/DebugInfo/PDB/PDBSymbolExe.h" -#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" +#include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" -#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" @@ -45,6 +47,12 @@ using namespace llvm; using namespace llvm::msf; using namespace llvm::pdb; +namespace llvm { +namespace codeview { +union DebugInfo; +} +} // namespace llvm + static DbiStream *getDbiStreamPtr(PDBFile &File) { Expected DbiS = File.getPDBDbiStream(); if (DbiS) diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp index fd813dee6b9f..8d6f8ebebf4c 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp @@ -8,6 +8,8 @@ #include "llvm/DebugInfo/PDB/Native/NativeSourceFile.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp index e5f1dcaf801e..a6e8cbf71548 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp @@ -8,7 +8,7 @@ #include "llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h" #include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h" @@ -22,7 +22,7 @@ NativeSymbolEnumerator::NativeSymbolEnumerator( : NativeRawSymbol(Session, PDB_SymType::Data, Id), Parent(Parent), Record(std::move(Record)) {} -NativeSymbolEnumerator::~NativeSymbolEnumerator() {} +NativeSymbolEnumerator::~NativeSymbolEnumerator() = default; void NativeSymbolEnumerator::dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields, diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp index 63ac9fae0e87..e98f357ac485 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp @@ -8,9 +8,10 @@ #include "llvm/DebugInfo/PDB/Native/NativeTypeArray.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h" -#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/SymbolCache.h" +#include "llvm/DebugInfo/PDB/PDBExtras.h" using namespace llvm; using namespace llvm::codeview; @@ -21,7 +22,7 @@ NativeTypeArray::NativeTypeArray(NativeSession &Session, SymIndexId Id, codeview::ArrayRecord Record) : NativeRawSymbol(Session, PDB_SymType::ArrayType, Id), Record(Record), Index(TI) {} -NativeTypeArray::~NativeTypeArray() {} +NativeTypeArray::~NativeTypeArray() = default; void NativeTypeArray::dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields, diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp index a08663aa91ba..80f892c7b118 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h" -#include "llvm/Support/FormatVariadic.h" using namespace llvm; using namespace llvm::codeview; @@ -19,7 +18,7 @@ NativeTypeBuiltin::NativeTypeBuiltin(NativeSession &PDBSession, SymIndexId Id, : NativeRawSymbol(PDBSession, PDB_SymType::BuiltinType, Id), Session(PDBSession), Mods(Mods), Type(T), Length(L) {} -NativeTypeBuiltin::~NativeTypeBuiltin() {} +NativeTypeBuiltin::~NativeTypeBuiltin() = default; void NativeTypeBuiltin::dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields, diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp index aaec3a5e7c60..ec37d276e66b 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp @@ -9,8 +9,9 @@ #include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h" #include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" @@ -18,8 +19,6 @@ #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" -#include "llvm/Support/FormatVariadic.h" - #include using namespace llvm; @@ -68,10 +67,13 @@ NativeEnumEnumEnumerators::NativeEnumEnumEnumerators( ContinuationIndex = ClassParent.getEnumRecord().FieldList; while (ContinuationIndex) { - CVType FieldList = Types.getType(*ContinuationIndex); - assert(FieldList.kind() == LF_FIELDLIST); + CVType FieldListCVT = Types.getType(*ContinuationIndex); + assert(FieldListCVT.kind() == LF_FIELDLIST); ContinuationIndex.reset(); - cantFail(visitMemberRecordStream(FieldList.data(), *this)); + FieldListRecord FieldList; + cantFail(TypeDeserializer::deserializeAs(FieldListCVT, + FieldList)); + cantFail(visitMemberRecordStream(FieldList.Data, *this)); } } @@ -123,7 +125,7 @@ NativeTypeEnum::NativeTypeEnum(NativeSession &Session, SymIndexId Id, : NativeRawSymbol(Session, PDB_SymType::Enum, Id), UnmodifiedType(&UnmodifiedType), Modifiers(std::move(Modifier)) {} -NativeTypeEnum::~NativeTypeEnum() {} +NativeTypeEnum::~NativeTypeEnum() = default; void NativeTypeEnum::dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields, @@ -138,7 +140,7 @@ void NativeTypeEnum::dump(raw_ostream &OS, int Indent, dumpSymbolField(OS, "name", getName(), Indent); dumpSymbolIdField(OS, "typeId", getTypeId(), Indent, Session, PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields); - if (Modifiers.hasValue()) + if (Modifiers) dumpSymbolIdField(OS, "unmodifiedTypeId", getUnmodifiedTypeId(), Indent, Session, PdbSymbolIdField::UnmodifiedType, ShowIdFields, RecurseIdFields); @@ -206,6 +208,8 @@ PDB_BuiltinType NativeTypeEnum::getBuiltinType() const { return PDB_BuiltinType::Char16; case SimpleTypeKind::Character32: return PDB_BuiltinType::Char32; + case SimpleTypeKind::Character8: + return PDB_BuiltinType::Char8; case SimpleTypeKind::Int128: case SimpleTypeKind::Int128Oct: case SimpleTypeKind::Int16: diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp index f98a4c3043eb..7db3f1c63128 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp @@ -10,9 +10,10 @@ #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h" -#include "llvm/DebugInfo/PDB/PDBExtras.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/DebugInfo/PDB/PDBExtras.h" using namespace llvm; using namespace llvm::codeview; @@ -96,7 +97,7 @@ void NativeTypeFunctionSig::initialize() { } } -NativeTypeFunctionSig::~NativeTypeFunctionSig() {} +NativeTypeFunctionSig::~NativeTypeFunctionSig() = default; void NativeTypeFunctionSig::initializeArgList(codeview::TypeIndex ArgListTI) { TpiStream &Tpi = cantFail(Session.getPDBFile().getPDBTpiStream()); diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp index 32dcfc235954..14b903ccef5a 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp @@ -7,8 +7,9 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/NativeTypePointer.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" -#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include @@ -29,7 +30,7 @@ NativeTypePointer::NativeTypePointer(NativeSession &Session, SymIndexId Id, : NativeRawSymbol(Session, PDB_SymType::PointerType, Id), TI(TI), Record(std::move(Record)) {} -NativeTypePointer::~NativeTypePointer() {} +NativeTypePointer::~NativeTypePointer() = default; void NativeTypePointer::dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields, diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp index 72964a9e0d4d..11cd349b72ca 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp @@ -1,4 +1,6 @@ #include "llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/PDBExtras.h" using namespace llvm; using namespace llvm::codeview; @@ -9,7 +11,7 @@ NativeTypeTypedef::NativeTypeTypedef(NativeSession &Session, SymIndexId Id, : NativeRawSymbol(Session, PDB_SymType::Typedef, Id), Record(std::move(Typedef)) {} -NativeTypeTypedef::~NativeTypeTypedef() {} +NativeTypeTypedef::~NativeTypeTypedef() = default; void NativeTypeTypedef::dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields, diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp index 917ec14e58d6..b708fb644e7a 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp @@ -7,10 +7,11 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/NativeTypeUDT.h" - -#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" - -#include +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/SymbolCache.h" +#include "llvm/DebugInfo/PDB/PDBExtras.h" using namespace llvm; using namespace llvm::codeview; @@ -32,7 +33,7 @@ NativeTypeUDT::NativeTypeUDT(NativeSession &Session, SymIndexId Id, : NativeRawSymbol(Session, PDB_SymType::UDT, Id), UnmodifiedType(&UnmodifiedType), Modifiers(std::move(Modifier)) {} -NativeTypeUDT::~NativeTypeUDT() {} +NativeTypeUDT::~NativeTypeUDT() = default; void NativeTypeUDT::dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields, @@ -44,7 +45,7 @@ void NativeTypeUDT::dump(raw_ostream &OS, int Indent, dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session, PdbSymbolIdField::LexicalParent, ShowIdFields, RecurseIdFields); - if (Modifiers.hasValue()) + if (Modifiers) dumpSymbolIdField(OS, "unmodifiedTypeId", getUnmodifiedTypeId(), Indent, Session, PdbSymbolIdField::UnmodifiedType, ShowIdFields, RecurseIdFields); diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp index 837fe19ec88c..63bb3f046e23 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp @@ -1,4 +1,7 @@ #include "llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/PDBExtras.h" using namespace llvm; using namespace llvm::pdb; @@ -10,7 +13,7 @@ NativeTypeVTShape::NativeTypeVTShape(NativeSession &Session, SymIndexId Id, : NativeRawSymbol(Session, PDB_SymType::VTableShape, Id), TI(TI), Record(std::move(SR)) {} -NativeTypeVTShape::~NativeTypeVTShape() {} +NativeTypeVTShape::~NativeTypeVTShape() = default; void NativeTypeVTShape::dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields, diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp index 5c61530c470d..471d183a5f53 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -8,7 +8,6 @@ #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index f33125474e3a..641043a8e186 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -7,34 +7,41 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/GUID.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" -#include "llvm/DebugInfo/PDB/Native/DbiStream.h" +#include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h" -#include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" +#include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" -#include "llvm/Support/BinaryStream.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/CRC.h" -#include "llvm/Support/Chrono.h" #include "llvm/Support/Path.h" #include "llvm/Support/xxhash.h" +#include + using namespace llvm; using namespace llvm::codeview; using namespace llvm::msf; using namespace llvm::pdb; using namespace llvm::support; +namespace llvm { +class WritableBinaryStream; +} + PDBFileBuilder::PDBFileBuilder(BumpPtrAllocator &Allocator) : Allocator(Allocator), InjectedSourceHashTraits(Strings), InjectedSourceTable(2) {} -PDBFileBuilder::~PDBFileBuilder() {} +PDBFileBuilder::~PDBFileBuilder() = default; Error PDBFileBuilder::initialize(uint32_t BlockSize) { auto ExpectedMsf = MSFBuilder::create(Allocator, BlockSize); @@ -348,7 +355,7 @@ Error PDBFileBuilder::commit(StringRef Filename, codeview::GUID *Guid) { H->Age = Info->getAge(); H->Guid = Info->getGuid(); Optional Sig = Info->getSignature(); - H->Signature = Sig.hasValue() ? *Sig : time(nullptr); + H->Signature = Sig ? *Sig : time(nullptr); } return Buffer.commit(); diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp index 2be1656e06bb..5bd12f50f1d7 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp @@ -8,7 +8,6 @@ #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/DebugInfo/PDB/Native/Hash.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp index f7f36901e4d4..45a5bdb48f01 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp @@ -71,7 +71,7 @@ static uint32_t computeBucketCount(uint32_t NumStrings) { // This list contains all StringCount, BucketCount pairs where BucketCount was // just incremented. It ends before the first BucketCount entry where // BucketCount * 3 would overflow a 32-bit unsigned int. - static std::map StringsToBuckets = { + static const std::pair StringsToBuckets[] = { {0, 1}, {1, 2}, {2, 4}, @@ -124,8 +124,9 @@ static uint32_t computeBucketCount(uint32_t NumStrings) { {517197275, 1034394550}, {775795913, 1551591826}, {1163693870, 2327387740}}; - auto Entry = StringsToBuckets.lower_bound(NumStrings); - assert(Entry != StringsToBuckets.end()); + const auto *Entry = llvm::lower_bound( + StringsToBuckets, std::make_pair(NumStrings, 0U), llvm::less_first()); + assert(Entry != std::end(StringsToBuckets)); return Entry->second; } diff --git a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp index a33bf03bf8fb..c7b9f443da5e 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp @@ -22,14 +22,12 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" -#include #include using namespace llvm; diff --git a/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp b/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp index f9e67014477e..f89f09aa3399 100644 --- a/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp @@ -1,20 +1,25 @@ #include "llvm/DebugInfo/PDB/Native/SymbolCache.h" -#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" #include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" +#include "llvm/DebugInfo/PDB/IPDBSourceFile.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" -#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" -#include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h" +#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h" -#include "llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h" #include "llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h" +#include "llvm/DebugInfo/PDB/Native/NativeLineNumber.h" #include "llvm/DebugInfo/PDB/Native/NativePublicSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" @@ -32,7 +37,6 @@ #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h" -#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" using namespace llvm; using namespace llvm::codeview; @@ -60,6 +64,7 @@ static const struct BuiltinTypeEntry { {codeview::SimpleTypeKind::WideCharacter, PDB_BuiltinType::WCharT, 2}, {codeview::SimpleTypeKind::Character16, PDB_BuiltinType::Char16, 2}, {codeview::SimpleTypeKind::Character32, PDB_BuiltinType::Char32, 4}, + {codeview::SimpleTypeKind::Character8, PDB_BuiltinType::Char8, 1}, {codeview::SimpleTypeKind::SignedCharacter, PDB_BuiltinType::Char, 1}, {codeview::SimpleTypeKind::UnsignedCharacter, PDB_BuiltinType::UInt, 1}, {codeview::SimpleTypeKind::Float32, PDB_BuiltinType::Float, 4}, diff --git a/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp b/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp index 003840b6e67e..5802d1c77527 100644 --- a/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp @@ -8,10 +8,7 @@ #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Endian.h" using namespace llvm; @@ -22,7 +19,7 @@ using namespace llvm::pdb; SymbolStream::SymbolStream(std::unique_ptr Stream) : Stream(std::move(Stream)) {} -SymbolStream::~SymbolStream() {} +SymbolStream::~SymbolStream() = default; Error SymbolStream::reload() { BinaryStreamReader Reader(*Stream); diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 5f4f497690b6..986e45e050c7 100644 --- a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -9,17 +9,13 @@ #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/DebugInfo/PDB/Native/PDBFile.h" -#include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryByteStream.h" -#include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" diff --git a/llvm/lib/DebugInfo/PDB/PDB.cpp b/llvm/lib/DebugInfo/PDB/PDB.cpp index e5b7731f6f4a..d106ba8fefc1 100644 --- a/llvm/lib/DebugInfo/PDB/PDB.cpp +++ b/llvm/lib/DebugInfo/PDB/PDB.cpp @@ -15,7 +15,6 @@ #endif #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/Support/Error.h" -#include "llvm/Support/MemoryBuffer.h" using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBContext.cpp b/llvm/lib/DebugInfo/PDB/PDBContext.cpp index 0ebb70e010d5..e600fb7385f1 100644 --- a/llvm/lib/DebugInfo/PDB/PDBContext.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBContext.cpp @@ -14,6 +14,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolData.h" #include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Object/COFF.h" using namespace llvm; @@ -62,6 +64,13 @@ DILineInfo PDBContext::getLineInfoForAddress(object::SectionedAddress Address, return Result; } +DILineInfo +PDBContext::getLineInfoForDataAddress(object::SectionedAddress Address) { + // Unimplemented. S_GDATA and S_LDATA in CodeView (used to describe global + // variables) aren't capable of carrying line information. + return DILineInfo(); +} + DILineInfoTable PDBContext::getLineInfoForAddressRange(object::SectionedAddress Address, uint64_t Size, diff --git a/llvm/lib/DebugInfo/PDB/PDBExtras.cpp b/llvm/lib/DebugInfo/PDB/PDBExtras.cpp index a6d7ca0da7a9..571510e6bad9 100644 --- a/llvm/lib/DebugInfo/PDB/PDBExtras.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBExtras.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/PDBExtras.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -64,6 +63,7 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, HResult, OS) CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Char16, OS) CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Char32, OS) + CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Char8, OS) } return OS; } diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp index d6bc7ee9c951..4eb5af9bd292 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp @@ -8,6 +8,7 @@ #include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/PDBExtras.h" @@ -43,7 +44,6 @@ #include "llvm/DebugInfo/PDB/PDBSymbolUnknown.h" #include "llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" -#include #include using namespace llvm; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp index 0fa83efb7ae0..089f4de0f422 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp @@ -10,8 +10,6 @@ #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include - using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp index 9452282a8817..49ee4937521b 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp @@ -9,9 +9,6 @@ #include "llvm/DebugInfo/PDB/PDBSymbolBlock.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp index 529100b23ba5..bd60489b6bed 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp @@ -9,10 +9,11 @@ #include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/IPDBSourceFile.h" +#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h" +#include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h" #include "llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h" #include "llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h" -#include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Path.h" diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp index 0d86dfe1e632..f775ac949cd8 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp @@ -9,9 +9,6 @@ #include "llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp index 61f119405fd9..2c2ed59c1726 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp @@ -10,9 +10,7 @@ #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include +#include "llvm/DebugInfo/PDB/PDBTypes.h" using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp index 6c9a4aa76c3d..405b07c2b689 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp @@ -10,9 +10,6 @@ #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp index d2b82111ccd5..c604b5cd3a6a 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp @@ -7,12 +7,11 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/PDBSymbolData.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/IPDBSectionContrib.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include - using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp index c85756c43e47..3887c23b18ef 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp @@ -8,10 +8,10 @@ #include "llvm/DebugInfo/PDB/PDBSymbolExe.h" +#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h" - -#include +#include "llvm/DebugInfo/PDB/PDBTypes.h" using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp index cb0329bc0ed7..59d57e83fc10 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp @@ -10,7 +10,9 @@ #include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" +#include "llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBSymbolData.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h" diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp index 66433dc17b49..5c72e3f62121 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp @@ -9,9 +9,6 @@ #include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp index fe32c93c0121..fd537a9eeea4 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp @@ -8,10 +8,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp index 1fffe69a0c83..896719a6a8e2 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp @@ -10,8 +10,6 @@ #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include - using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp index 08697683f641..a00b1be40e18 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp @@ -8,10 +8,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp index 6483858183e5..42502a55ef76 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp @@ -10,8 +10,6 @@ #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include - using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp index a0d521abe43f..bb4eb43f22e5 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp @@ -10,8 +10,6 @@ #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include - using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp index 08467059b5e1..539c3547a4b0 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp @@ -8,10 +8,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp index a0dd9ef601c0..eca2a09c1f77 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp @@ -10,8 +10,6 @@ #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include - using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp index 6723894c90ea..a616b4e26cb1 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp @@ -9,9 +9,6 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp index 4a25a391f278..2828ce4df3f8 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp @@ -10,9 +10,6 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp index b9fdf6aec811..db8ca327da1e 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp @@ -8,11 +8,10 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" -#include - using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp index 4ffea42cbb0a..d4bd9996d786 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp @@ -9,9 +9,6 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp index 683e93548fb1..acda57f44e33 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp @@ -10,8 +10,6 @@ #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include - using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp index e80e6c716572..fa6e630e3c45 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp @@ -9,9 +9,6 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp index 462fc315359b..9e238c7caa37 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp @@ -8,11 +8,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h" -#include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include - using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp index 70749d9bf5f5..c2ce21c6ca69 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp @@ -10,8 +10,6 @@ #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include - using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp index d302c29a3bec..122111d32027 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp @@ -8,16 +8,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" -#include "llvm/DebugInfo/PDB/IPDBSession.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbolData.h" -#include "llvm/DebugInfo/PDB/PDBSymbolExe.h" -#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" -#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" -#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp index 4e2a45116d51..a4d81888e457 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp @@ -10,8 +10,6 @@ #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include - using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp index 78957620e083..835a86e165af 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp @@ -9,9 +9,6 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp index 650d01183171..85294a4cded2 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp @@ -9,9 +9,6 @@ #include "llvm/DebugInfo/PDB/PDBSymbolUnknown.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp index 74afbdb18086..98aaaa9b10b9 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp @@ -9,9 +9,6 @@ #include "llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" - -#include using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/lib/DebugInfo/PDB/UDTLayout.cpp b/llvm/lib/DebugInfo/PDB/UDTLayout.cpp index 55854bb49888..6e388834f199 100644 --- a/llvm/lib/DebugInfo/PDB/UDTLayout.cpp +++ b/llvm/lib/DebugInfo/PDB/UDTLayout.cpp @@ -10,6 +10,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" @@ -17,6 +19,7 @@ #include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" diff --git a/llvm/lib/DebugInfo/Symbolize/DIFetcher.cpp b/llvm/lib/DebugInfo/Symbolize/DIFetcher.cpp new file mode 100644 index 000000000000..119830de595a --- /dev/null +++ b/llvm/lib/DebugInfo/Symbolize/DIFetcher.cpp @@ -0,0 +1,57 @@ +//===-- lib/DebugInfo/Symbolize/DIFetcher.cpp -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the implementation of the local debug info fetcher, which +/// searches cache directories. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/Symbolize/DIFetcher.h" + +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" + +namespace llvm { +namespace symbolize { + +Optional +LocalDIFetcher::fetchBuildID(ArrayRef BuildID) const { + auto GetDebugPath = [&](StringRef Directory) { + SmallString<128> Path{Directory}; + sys::path::append(Path, ".build-id", + llvm::toHex(BuildID[0], /*LowerCase=*/true), + llvm::toHex(BuildID.slice(1), /*LowerCase=*/true)); + Path += ".debug"; + return Path; + }; + if (DebugFileDirectory.empty()) { + SmallString<128> Path = GetDebugPath( +#if defined(__NetBSD__) + // Try /usr/libdata/debug/.build-id/../... + "/usr/libdata/debug" +#else + // Try /usr/lib/debug/.build-id/../... + "/usr/lib/debug" +#endif + ); + if (llvm::sys::fs::exists(Path)) + return std::string(Path); + } else { + for (const auto &Directory : DebugFileDirectory) { + // Try /.build-id/../... + SmallString<128> Path = GetDebugPath(Directory); + if (llvm::sys::fs::exists(Path)) + return std::string(Path); + } + } + return None; +} + +} // namespace symbolize +} // namespace llvm diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp index e29968d113bd..877380213f21 100644 --- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -16,9 +16,7 @@ #include "llvm/DebugInfo/DIContext.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/Format.h" -#include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -208,6 +206,10 @@ void PlainPrinterBase::print(const Request &Request, const DIGlobal &Global) { Name = DILineInfo::Addr2LineBadString; OS << Name << "\n"; OS << Global.Start << " " << Global.Size << "\n"; + if (Global.DeclFile.empty()) + OS << "??:?\n"; + else + OS << Global.DeclFile << ":" << Global.DeclLine << "\n"; printFooter(); } diff --git a/llvm/lib/DebugInfo/Symbolize/Markup.cpp b/llvm/lib/DebugInfo/Symbolize/Markup.cpp new file mode 100644 index 000000000000..9bc65e763287 --- /dev/null +++ b/llvm/lib/DebugInfo/Symbolize/Markup.cpp @@ -0,0 +1,202 @@ +//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the log symbolizer markup data model and parser. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/Symbolize/Markup.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" + +namespace llvm { +namespace symbolize { + +// Matches the following: +// "\033[0m" +// "\033[1m" +// "\033[30m" -- "\033[37m" +static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m"; + +MarkupParser::MarkupParser(StringSet<> MultilineTags) + : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {} + +static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { + return Str.take_front(Pos - Str.begin()); +} +static void advanceTo(StringRef &Str, StringRef::iterator Pos) { + Str = Str.drop_front(Pos - Str.begin()); +} + +void MarkupParser::parseLine(StringRef Line) { + Buffer.clear(); + NextIdx = 0; + FinishedMultiline.clear(); + this->Line = Line; +} + +Optional MarkupParser::nextNode() { + // Pull something out of the buffer if possible. + if (!Buffer.empty()) { + if (NextIdx < Buffer.size()) + return std::move(Buffer[NextIdx++]); + NextIdx = 0; + Buffer.clear(); + } + + // The buffer is empty, so parse the next bit of the line. + + if (Line.empty()) + return None; + + if (!InProgressMultiline.empty()) { + if (Optional MultilineEnd = parseMultiLineEnd(Line)) { + llvm::append_range(InProgressMultiline, *MultilineEnd); + assert(FinishedMultiline.empty() && + "At most one multi-line element can be finished at a time."); + FinishedMultiline.swap(InProgressMultiline); + // Parse the multi-line element as if it were contiguous. + advanceTo(Line, MultilineEnd->end()); + return *parseElement(FinishedMultiline); + } + + // The whole line is part of the multi-line element. + llvm::append_range(InProgressMultiline, Line); + Line = Line.drop_front(Line.size()); + return None; + } + + // Find the first valid markup element, if any. + if (Optional Element = parseElement(Line)) { + parseTextOutsideMarkup(takeTo(Line, Element->Text.begin())); + Buffer.push_back(std::move(*Element)); + advanceTo(Line, Element->Text.end()); + return nextNode(); + } + + // Since there were no valid elements remaining, see if the line opens a + // multi-line element. + if (Optional MultilineBegin = parseMultiLineBegin(Line)) { + // Emit any text before the element. + parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin())); + + // Begin recording the multi-line element. + llvm::append_range(InProgressMultiline, *MultilineBegin); + Line = Line.drop_front(Line.size()); + return nextNode(); + } + + // The line doesn't contain any more markup elements, so emit it as text. + parseTextOutsideMarkup(Line); + Line = Line.drop_front(Line.size()); + return nextNode(); +} + +void MarkupParser::flush() { + if (InProgressMultiline.empty()) + return; + FinishedMultiline.swap(InProgressMultiline); + parseTextOutsideMarkup(FinishedMultiline); +} + +// Finds and returns the next valid markup element in the given line. Returns +// None if the line contains no valid elements. +Optional MarkupParser::parseElement(StringRef Line) { + while (true) { + // Find next element using begin and end markers. + size_t BeginPos = Line.find("{{{"); + if (BeginPos == StringRef::npos) + return None; + size_t EndPos = Line.find("}}}", BeginPos + 3); + if (EndPos == StringRef::npos) + return None; + EndPos += 3; + MarkupNode Element; + Element.Text = Line.slice(BeginPos, EndPos); + Line = Line.substr(EndPos); + + // Parse tag. + StringRef Content = Element.Text.drop_front(3).drop_back(3); + StringRef FieldsContent; + std::tie(Element.Tag, FieldsContent) = Content.split(':'); + if (Element.Tag.empty()) + continue; + + // Parse fields. + if (!FieldsContent.empty()) + FieldsContent.split(Element.Fields, ":"); + else if (Content.back() == ':') + Element.Fields.push_back(FieldsContent); + + return Element; + } +} + +static MarkupNode textNode(StringRef Text) { + MarkupNode Node; + Node.Text = Text; + return Node; +} + +// Parses a region of text known to be outside any markup elements. Such text +// may still contain SGR control codes, so the region is further subdivided into +// control codes and true text regions. +void MarkupParser::parseTextOutsideMarkup(StringRef Text) { + if (Text.empty()) + return; + SmallVector Matches; + while (SGRSyntax.match(Text, &Matches)) { + // Emit any text before the SGR element. + if (Matches.begin()->begin() != Text.begin()) + Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin()))); + + Buffer.push_back(textNode(*Matches.begin())); + advanceTo(Text, Matches.begin()->end()); + } + if (!Text.empty()) + Buffer.push_back(textNode(Text)); +} + +// Given that a line doesn't contain any valid markup, see if it ends with the +// start of a multi-line element. If so, returns the beginning. +Optional MarkupParser::parseMultiLineBegin(StringRef Line) { + // A multi-line begin marker must be the last one on the line. + size_t BeginPos = Line.rfind("{{{"); + if (BeginPos == StringRef::npos) + return None; + size_t BeginTagPos = BeginPos + 3; + + // If there are any end markers afterwards, the begin marker cannot belong to + // a multi-line element. + size_t EndPos = Line.find("}}}", BeginTagPos); + if (EndPos != StringRef::npos) + return None; + + // Check whether the tag is registered multi-line. + size_t EndTagPos = Line.find(':', BeginTagPos); + if (EndTagPos == StringRef::npos) + return None; + StringRef Tag = Line.slice(BeginTagPos, EndTagPos); + if (!MultilineTags.contains(Tag)) + return None; + return Line.substr(BeginPos); +} + +// See if the line begins with the ending of an in-progress multi-line element. +// If so, return the ending. +Optional MarkupParser::parseMultiLineEnd(StringRef Line) { + size_t EndPos = Line.find("}}}"); + if (EndPos == StringRef::npos) + return None; + return Line.take_front(EndPos + 3); +} + +} // end namespace symbolize +} // end namespace llvm diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp new file mode 100644 index 000000000000..3363fe5e531f --- /dev/null +++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp @@ -0,0 +1,143 @@ +//===-- lib/DebugInfo/Symbolize/MarkupFilter.cpp -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the implementation of a filter that replaces symbolizer +/// markup with human-readable expressions. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/Symbolize/MarkupFilter.h" + +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Demangle/Demangle.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::symbolize; + +MarkupFilter::MarkupFilter(raw_ostream &OS, Optional ColorsEnabled) + : OS(OS), ColorsEnabled(ColorsEnabled.value_or( + WithColor::defaultAutoDetectFunction()(OS))) {} + +void MarkupFilter::beginLine(StringRef Line) { + this->Line = Line; + resetColor(); +} + +void MarkupFilter::filter(const MarkupNode &Node) { + if (!checkTag(Node)) + return; + + if (trySGR(Node)) + return; + + if (Node.Tag == "symbol") { + if (!checkNumFields(Node, 1)) + return; + highlight(); + OS << llvm::demangle(Node.Fields.front().str()); + restoreColor(); + return; + } + + OS << Node.Text; +} + +bool MarkupFilter::trySGR(const MarkupNode &Node) { + if (Node.Text == "\033[0m") { + resetColor(); + return true; + } + if (Node.Text == "\033[1m") { + Bold = true; + if (ColorsEnabled) + OS.changeColor(raw_ostream::Colors::SAVEDCOLOR, Bold); + return true; + } + auto SGRColor = StringSwitch>(Node.Text) + .Case("\033[30m", raw_ostream::Colors::BLACK) + .Case("\033[31m", raw_ostream::Colors::RED) + .Case("\033[32m", raw_ostream::Colors::GREEN) + .Case("\033[33m", raw_ostream::Colors::YELLOW) + .Case("\033[34m", raw_ostream::Colors::BLUE) + .Case("\033[35m", raw_ostream::Colors::MAGENTA) + .Case("\033[36m", raw_ostream::Colors::CYAN) + .Case("\033[37m", raw_ostream::Colors::WHITE) + .Default(llvm::None); + if (SGRColor) { + Color = *SGRColor; + if (ColorsEnabled) + OS.changeColor(*Color); + return true; + } + + return false; +} + +// Begin highlighting text by picking a different color than the current color +// state. +void MarkupFilter::highlight() { + if (!ColorsEnabled) + return; + OS.changeColor(Color == raw_ostream::Colors::BLUE ? raw_ostream::Colors::CYAN + : raw_ostream::Colors::BLUE, + Bold); +} + +// Set the output stream's color to the current color and bold state of the SGR +// abstract machine. +void MarkupFilter::restoreColor() { + if (!ColorsEnabled) + return; + if (Color) { + OS.changeColor(*Color, Bold); + } else { + OS.resetColor(); + if (Bold) + OS.changeColor(raw_ostream::Colors::SAVEDCOLOR, Bold); + } +} + +// Set the SGR and output stream's color and bold states back to the default. +void MarkupFilter::resetColor() { + if (!Color && !Bold) + return; + Color.reset(); + Bold = false; + if (ColorsEnabled) + OS.resetColor(); +} + +bool MarkupFilter::checkTag(const MarkupNode &Node) const { + if (any_of(Node.Tag, [](char C) { return C < 'a' || C > 'z'; })) { + WithColor::error(errs()) << "tags must be all lowercase characters\n"; + reportLocation(Node.Tag.begin()); + return false; + } + return true; +} + +bool MarkupFilter::checkNumFields(const MarkupNode &Node, size_t Size) const { + if (Node.Fields.size() != Size) { + WithColor::error(errs()) << "expected " << Size << " fields; found " + << Node.Fields.size() << "\n"; + reportLocation(Node.Tag.end()); + return false; + } + return true; +} + +void MarkupFilter::reportLocation(StringRef::iterator Loc) const { + errs() << Line; + WithColor(errs().indent(Loc - Line.begin()), HighlightColor::String) << '^'; + errs() << '\n'; +} diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp index a9c78830fa59..d8ee9264b64f 100644 --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -#include "SymbolizableObjectFile.h" +#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/COFF.h" @@ -327,6 +327,14 @@ DIGlobal SymbolizableObjectFile::symbolizeData( std::string FileName; getNameFromSymbolTable(ModuleOffset.Address, Res.Name, Res.Start, Res.Size, FileName); + Res.DeclFile = FileName; + + // Try and get a better filename:lineno pair from the debuginfo, if present. + DILineInfo DL = DebugInfoContext->getLineInfoForDataAddress(ModuleOffset); + if (DL.Line != 0) { + Res.DeclFile = DL.FileName; + Res.DeclLine = DL.Line; + } return Res; } diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h deleted file mode 100644 index 8fb003fff0ae..000000000000 --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h +++ /dev/null @@ -1,103 +0,0 @@ -//===- SymbolizableObjectFile.h ---------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file declares the SymbolizableObjectFile class. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H -#define LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/DIContext.h" -#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" -#include "llvm/Support/Error.h" -#include -#include -#include -#include -#include - -namespace llvm { - -class DataExtractor; - -namespace symbolize { - -class SymbolizableObjectFile : public SymbolizableModule { -public: - static Expected> - create(const object::ObjectFile *Obj, std::unique_ptr DICtx, - bool UntagAddresses); - - DILineInfo symbolizeCode(object::SectionedAddress ModuleOffset, - DILineInfoSpecifier LineInfoSpecifier, - bool UseSymbolTable) const override; - DIInliningInfo symbolizeInlinedCode(object::SectionedAddress ModuleOffset, - DILineInfoSpecifier LineInfoSpecifier, - bool UseSymbolTable) const override; - DIGlobal symbolizeData(object::SectionedAddress ModuleOffset) const override; - std::vector - symbolizeFrame(object::SectionedAddress ModuleOffset) const override; - - // Return true if this is a 32-bit x86 PE COFF module. - bool isWin32Module() const override; - - // Returns the preferred base of the module, i.e. where the loader would place - // it in memory assuming there were no conflicts. - uint64_t getModulePreferredBase() const override; - -private: - bool shouldOverrideWithSymbolTable(FunctionNameKind FNKind, - bool UseSymbolTable) const; - - bool getNameFromSymbolTable(uint64_t Address, std::string &Name, - uint64_t &Addr, uint64_t &Size, - std::string &FileName) const; - // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd - // (function descriptor) section and OpdExtractor refers to its contents. - Error addSymbol(const object::SymbolRef &Symbol, uint64_t SymbolSize, - DataExtractor *OpdExtractor = nullptr, - uint64_t OpdAddress = 0); - Error addCoffExportSymbols(const object::COFFObjectFile *CoffObj); - - /// Search for the first occurence of specified Address in ObjectFile. - uint64_t getModuleSectionIndexForAddress(uint64_t Address) const; - - const object::ObjectFile *Module; - std::unique_ptr DebugInfoContext; - bool UntagAddresses; - - struct SymbolDesc { - uint64_t Addr; - // If size is 0, assume that symbol occupies the whole memory range up to - // the following symbol. - uint64_t Size; - - StringRef Name; - // Non-zero if this is an ELF local symbol. See the comment in - // getNameFromSymbolTable. - uint32_t ELFLocalSymIdx; - - bool operator<(const SymbolDesc &RHS) const { - return Addr != RHS.Addr ? Addr < RHS.Addr : Size < RHS.Size; - } - }; - std::vector Symbols; - // (index, filename) pairs of ELF STT_FILE symbols. - std::vector> FileSymbols; - - SymbolizableObjectFile(const object::ObjectFile *Obj, - std::unique_ptr DICtx, - bool UntagAddresses); -}; - -} // end namespace symbolize - -} // end namespace llvm - -#endif // LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp index 5ec79df17fed..d2ff8aa7c995 100644 --- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -12,22 +12,19 @@ #include "llvm/DebugInfo/Symbolize/Symbolize.h" -#include "SymbolizableObjectFile.h" - #include "llvm/ADT/STLExtras.h" -#include "llvm/BinaryFormat/COFF.h" -#include "llvm/Config/config.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/PDB/PDB.h" #include "llvm/DebugInfo/PDB/PDBContext.h" -#include "llvm/Debuginfod/Debuginfod.h" +#include "llvm/DebugInfo/Symbolize/DIFetcher.h" +#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" #include "llvm/Demangle/Demangle.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/Support/CRC.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Compression.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" @@ -38,8 +35,20 @@ #include namespace llvm { +namespace codeview { +union DebugInfo; +} +namespace object { +template class ELFFile; +} namespace symbolize { +LLVMSymbolizer::LLVMSymbolizer() = default; + +LLVMSymbolizer::LLVMSymbolizer(const Options &Opts) : Opts(Opts) {} + +LLVMSymbolizer::~LLVMSymbolizer() = default; + template Expected LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier, @@ -81,6 +90,12 @@ LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, return symbolizeCodeCommon(ModuleName, ModuleOffset); } +Expected +LLVMSymbolizer::symbolizeCode(ArrayRef BuildID, + object::SectionedAddress ModuleOffset) { + return symbolizeCodeCommon(BuildID, ModuleOffset); +} + template Expected LLVMSymbolizer::symbolizeInlinedCodeCommon( const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) { @@ -124,6 +139,12 @@ LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName, return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset); } +Expected +LLVMSymbolizer::symbolizeInlinedCode(ArrayRef BuildID, + object::SectionedAddress ModuleOffset) { + return symbolizeInlinedCodeCommon(BuildID, ModuleOffset); +} + template Expected LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier, @@ -163,6 +184,12 @@ LLVMSymbolizer::symbolizeData(const std::string &ModuleName, return symbolizeDataCommon(ModuleName, ModuleOffset); } +Expected +LLVMSymbolizer::symbolizeData(ArrayRef BuildID, + object::SectionedAddress ModuleOffset) { + return symbolizeDataCommon(BuildID, ModuleOffset); +} + template Expected> LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier, @@ -198,11 +225,20 @@ LLVMSymbolizer::symbolizeFrame(const std::string &ModuleName, return symbolizeFrameCommon(ModuleName, ModuleOffset); } +Expected> +LLVMSymbolizer::symbolizeFrame(ArrayRef BuildID, + object::SectionedAddress ModuleOffset) { + return symbolizeFrameCommon(BuildID, ModuleOffset); +} + void LLVMSymbolizer::flush() { ObjectForUBPathAndArch.clear(); + LRUBinaries.clear(); + CacheSize = 0; BinaryForPath.clear(); ObjectPairForPathArch.clear(); Modules.clear(); + BuildIDPaths.clear(); } namespace { @@ -230,51 +266,6 @@ bool checkFileCRC(StringRef Path, uint32_t CRCHash) { return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer())); } -bool findDebugBinary(const std::string &OrigPath, - const std::string &DebuglinkName, uint32_t CRCHash, - const std::string &FallbackDebugPath, - std::string &Result) { - SmallString<16> OrigDir(OrigPath); - llvm::sys::path::remove_filename(OrigDir); - SmallString<16> DebugPath = OrigDir; - // Try relative/path/to/original_binary/debuglink_name - llvm::sys::path::append(DebugPath, DebuglinkName); - if (checkFileCRC(DebugPath, CRCHash)) { - Result = std::string(DebugPath.str()); - return true; - } - // Try relative/path/to/original_binary/.debug/debuglink_name - DebugPath = OrigDir; - llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); - if (checkFileCRC(DebugPath, CRCHash)) { - Result = std::string(DebugPath.str()); - return true; - } - // Make the path absolute so that lookups will go to - // "/usr/lib/debug/full/path/to/debug", not - // "/usr/lib/debug/to/debug" - llvm::sys::fs::make_absolute(OrigDir); - if (!FallbackDebugPath.empty()) { - // Try /absolute/path/to/original_binary/debuglink_name - DebugPath = FallbackDebugPath; - } else { -#if defined(__NetBSD__) - // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name - DebugPath = "/usr/libdata/debug"; -#else - // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name - DebugPath = "/usr/lib/debug"; -#endif - } - llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), - DebuglinkName); - if (checkFileCRC(DebugPath, CRCHash)) { - Result = std::string(DebugPath.str()); - return true; - } - return false; -} - bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, uint32_t &CRCHash) { if (!Obj) @@ -351,50 +342,6 @@ Optional> getBuildID(const ELFObjectFileBase *Obj) { return BuildID; } -bool findDebugBinary(const std::vector &DebugFileDirectory, - const ArrayRef BuildID, std::string &Result) { - auto getDebugPath = [&](StringRef Directory) { - SmallString<128> Path{Directory}; - sys::path::append(Path, ".build-id", - llvm::toHex(BuildID[0], /*LowerCase=*/true), - llvm::toHex(BuildID.slice(1), /*LowerCase=*/true)); - Path += ".debug"; - return Path; - }; - if (DebugFileDirectory.empty()) { - SmallString<128> Path = getDebugPath( -#if defined(__NetBSD__) - // Try /usr/libdata/debug/.build-id/../... - "/usr/libdata/debug" -#else - // Try /usr/lib/debug/.build-id/../... - "/usr/lib/debug" -#endif - ); - if (llvm::sys::fs::exists(Path)) { - Result = std::string(Path.str()); - return true; - } - } else { - for (const auto &Directory : DebugFileDirectory) { - // Try /.build-id/../... - SmallString<128> Path = getDebugPath(Directory); - if (llvm::sys::fs::exists(Path)) { - Result = std::string(Path.str()); - return true; - } - } - } - // Try debuginfod client cache and known servers. - Expected PathOrErr = getCachedOrDownloadDebuginfo(BuildID); - if (!PathOrErr) { - consumeError(PathOrErr.takeError()); - return false; - } - Result = *PathOrErr; - return true; -} - } // end anonymous namespace ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, @@ -437,8 +384,7 @@ ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, std::string DebugBinaryPath; if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash)) return nullptr; - if (!findDebugBinary(Path, DebuglinkName, CRCHash, Opts.FallbackDebugPath, - DebugBinaryPath)) + if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) return nullptr; auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); if (!DbgObjOrErr) { @@ -458,7 +404,7 @@ ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path, if (BuildID->size() < 2) return nullptr; std::string DebugBinaryPath; - if (!findDebugBinary(Opts.DebugFileDirectory, *BuildID, DebugBinaryPath)) + if (!getOrFindDebugBinary(*BuildID, DebugBinaryPath)) return nullptr; auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); if (!DbgObjOrErr) { @@ -468,12 +414,97 @@ ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path, return DbgObjOrErr.get(); } +bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath, + const std::string &DebuglinkName, + uint32_t CRCHash, std::string &Result) { + SmallString<16> OrigDir(OrigPath); + llvm::sys::path::remove_filename(OrigDir); + SmallString<16> DebugPath = OrigDir; + // Try relative/path/to/original_binary/debuglink_name + llvm::sys::path::append(DebugPath, DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = std::string(DebugPath.str()); + return true; + } + // Try relative/path/to/original_binary/.debug/debuglink_name + DebugPath = OrigDir; + llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = std::string(DebugPath.str()); + return true; + } + // Make the path absolute so that lookups will go to + // "/usr/lib/debug/full/path/to/debug", not + // "/usr/lib/debug/to/debug" + llvm::sys::fs::make_absolute(OrigDir); + if (!Opts.FallbackDebugPath.empty()) { + // Try /absolute/path/to/original_binary/debuglink_name + DebugPath = Opts.FallbackDebugPath; + } else { +#if defined(__NetBSD__) + // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name + DebugPath = "/usr/libdata/debug"; +#else + // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name + DebugPath = "/usr/lib/debug"; +#endif + } + llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), + DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = std::string(DebugPath.str()); + return true; + } + return false; +} + +static StringRef getBuildIDStr(ArrayRef BuildID) { + return StringRef(reinterpret_cast(BuildID.data()), + BuildID.size()); +} + +bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef BuildID, + std::string &Result) { + StringRef BuildIDStr = getBuildIDStr(BuildID); + auto I = BuildIDPaths.find(BuildIDStr); + if (I != BuildIDPaths.end()) { + Result = I->second; + return true; + } + auto recordPath = [&](StringRef Path) { + Result = Path.str(); + auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result}); + assert(InsertResult.second); + (void)InsertResult; + }; + + Optional Path; + Path = LocalDIFetcher(Opts.DebugFileDirectory).fetchBuildID(BuildID); + if (Path) { + recordPath(*Path); + return true; + } + + // Try caller-provided debug info fetchers. + for (const std::unique_ptr &Fetcher : DIFetchers) { + Path = Fetcher->fetchBuildID(BuildID); + if (Path) { + recordPath(*Path); + return true; + } + } + + return false; +} + Expected LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, const std::string &ArchName) { auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); - if (I != ObjectPairForPathArch.end()) + if (I != ObjectPairForPathArch.end()) { + recordAccess(BinaryForPath.find(Path)->second); return I->second; + } auto ObjOrErr = getOrCreateObject(Path, ArchName); if (!ObjOrErr) { @@ -495,7 +526,12 @@ LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, if (!DbgObj) DbgObj = Obj; ObjectPair Res = std::make_pair(Obj, DbgObj); - ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res); + std::string DbgObjPath = DbgObj->getFileName().str(); + auto Pair = + ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res); + BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() { + ObjectPairForPathArch.erase(I); + }); return Res; } @@ -505,13 +541,19 @@ LLVMSymbolizer::getOrCreateObject(const std::string &Path, Binary *Bin; auto Pair = BinaryForPath.emplace(Path, OwningBinary()); if (!Pair.second) { - Bin = Pair.first->second.getBinary(); + Bin = Pair.first->second->getBinary(); + recordAccess(Pair.first->second); } else { Expected> BinOrErr = createBinary(Path); if (!BinOrErr) return BinOrErr.takeError(); - Pair.first->second = std::move(BinOrErr.get()); - Bin = Pair.first->second.getBinary(); + + CachedBinary &CachedBin = Pair.first->second; + CachedBin = std::move(BinOrErr.get()); + CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); }); + LRUBinaries.push_back(CachedBin); + CacheSize += CachedBin.size(); + Bin = CachedBin->getBinary(); } if (!Bin) @@ -530,8 +572,10 @@ LLVMSymbolizer::getOrCreateObject(const std::string &Path, return ObjOrErr.takeError(); } ObjectFile *Res = ObjOrErr->get(); - ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), - std::move(ObjOrErr.get())); + auto Pair = ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), + std::move(ObjOrErr.get())); + BinaryForPath.find(Path)->second.pushEvictor( + [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(Iter); }); return Res; } if (Bin->isObject()) { @@ -559,10 +603,6 @@ LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj, Expected LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { - auto I = Modules.find(ModuleName); - if (I != Modules.end()) - return I->second.get(); - std::string BinaryName = ModuleName; std::string ArchName = Opts.DefaultArch; size_t ColonPos = ModuleName.find_last_of(':'); @@ -574,6 +614,13 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { ArchName = ArchStr; } } + + auto I = Modules.find(ModuleName); + if (I != Modules.end()) { + recordAccess(BinaryForPath.find(BinaryName)->second); + return I->second.get(); + } + auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName); if (!ObjectsOrErr) { // Failed to find valid object file. @@ -608,7 +655,15 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { Context = DWARFContext::create( *Objects.second, DWARFContext::ProcessDebugRelocations::Process, nullptr, Opts.DWPName); - return createModuleInfo(Objects.first, std::move(Context), ModuleName); + auto ModuleOrErr = + createModuleInfo(Objects.first, std::move(Context), ModuleName); + if (ModuleOrErr) { + auto I = Modules.find(ModuleName); + BinaryForPath.find(BinaryName)->second.pushEvictor([this, I]() { + Modules.erase(I); + }); + } + return ModuleOrErr; } Expected @@ -623,6 +678,17 @@ LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) { return createModuleInfo(&Obj, std::move(Context), ObjName); } +Expected +LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef BuildID) { + std::string Path; + if (!getOrFindDebugBinary(BuildID, Path)) { + return createStringError(errc::no_such_file_or_directory, + Twine("could not find build ID '") + + toHex(BuildID) + "'"); + } + return getOrCreateModuleInfo(Path); +} + namespace { // Undo these various manglings for Win32 extern "C" functions: @@ -680,5 +746,35 @@ LLVMSymbolizer::DemangleName(const std::string &Name, return Name; } +void LLVMSymbolizer::recordAccess(CachedBinary &Bin) { + if (Bin->getBinary()) + LRUBinaries.splice(LRUBinaries.end(), LRUBinaries, Bin.getIterator()); +} + +void LLVMSymbolizer::pruneCache() { + // Evict the LRU binary until the max cache size is reached or there's <= 1 + // item in the cache. The MRU binary is always kept to avoid thrashing if it's + // larger than the cache size. + while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() && + std::next(LRUBinaries.begin()) != LRUBinaries.end()) { + CachedBinary &Bin = LRUBinaries.front(); + CacheSize -= Bin.size(); + LRUBinaries.pop_front(); + Bin.evict(); + } +} + +void CachedBinary::pushEvictor(std::function NewEvictor) { + if (Evictor) { + this->Evictor = [OldEvictor = std::move(this->Evictor), + NewEvictor = std::move(NewEvictor)]() { + NewEvictor(); + OldEvictor(); + }; + } else { + this->Evictor = std::move(NewEvictor); + } +} + } // namespace symbolize } // namespace llvm diff --git a/llvm/lib/Debuginfod/DIFetcher.cpp b/llvm/lib/Debuginfod/DIFetcher.cpp new file mode 100644 index 000000000000..f0c134654534 --- /dev/null +++ b/llvm/lib/Debuginfod/DIFetcher.cpp @@ -0,0 +1,28 @@ +//===- llvm/DebugInfod/DIFetcher.cpp - Debug info fetcher -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines a DIFetcher implementation for obtaining debug info +/// from debuginfod. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Debuginfod/DIFetcher.h" + +#include "llvm/Debuginfod/Debuginfod.h" + +using namespace llvm; + +Optional +DebuginfodDIFetcher::fetchBuildID(ArrayRef BuildID) const { + Expected PathOrErr = getCachedOrDownloadDebuginfo(BuildID); + if (PathOrErr) + return *PathOrErr; + consumeError(PathOrErr.takeError()); + return None; +} diff --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp index 27614572766d..7b1c36fdbe09 100644 --- a/llvm/lib/Debuginfod/Debuginfod.cpp +++ b/llvm/lib/Debuginfod/Debuginfod.cpp @@ -115,6 +115,41 @@ Expected getCachedOrDownloadArtifact(StringRef UniqueKey, getDefaultDebuginfodTimeout()); } +namespace { + +/// A simple handler which streams the returned data to a cache file. The cache +/// file is only created if a 200 OK status is observed. +class StreamedHTTPResponseHandler : public HTTPResponseHandler { + using CreateStreamFn = + std::function>()>; + CreateStreamFn CreateStream; + HTTPClient &Client; + std::unique_ptr FileStream; + +public: + StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client) + : CreateStream(CreateStream), Client(Client) {} + virtual ~StreamedHTTPResponseHandler() = default; + + Error handleBodyChunk(StringRef BodyChunk) override; +}; + +} // namespace + +Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) { + if (!FileStream) { + if (Client.responseCode() != 200) + return Error::success(); + Expected> FileStreamOrError = + CreateStream(); + if (!FileStreamOrError) + return FileStreamOrError.takeError(); + FileStream = std::move(*FileStreamOrError); + } + *FileStream->OS << BodyChunk; + return Error::success(); +} + Expected getCachedOrDownloadArtifact( StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath, ArrayRef DebuginfodUrls, std::chrono::milliseconds Timeout) { @@ -155,28 +190,18 @@ Expected getCachedOrDownloadArtifact( SmallString<64> ArtifactUrl; sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath); - Expected ResponseOrErr = Client.get(ArtifactUrl); - if (!ResponseOrErr) - return ResponseOrErr.takeError(); + // Perform the HTTP request and if successful, write the response body to + // the cache. + StreamedHTTPResponseHandler Handler([&]() { return CacheAddStream(Task); }, + Client); + HTTPRequest Request(ArtifactUrl); + Error Err = Client.perform(Request, Handler); + if (Err) + return std::move(Err); - HTTPResponseBuffer &Response = *ResponseOrErr; - if (Response.Code != 200) + if (Client.responseCode() != 200) continue; - // We have retrieved the artifact from this server, and now add it to the - // file cache. - Expected> FileStreamOrErr = - CacheAddStream(Task); - if (!FileStreamOrErr) - return FileStreamOrErr.takeError(); - std::unique_ptr &FileStream = *FileStreamOrErr; - if (!Response.Body) - return createStringError( - errc::io_error, "Unallocated MemoryBuffer in HTTPResponseBuffer."); - - *FileStream->OS << StringRef(Response.Body->getBufferStart(), - Response.Body->getBufferSize()); - // Return the path to the artifact on disk. return std::string(AbsCachedArtifactPath); } diff --git a/llvm/lib/Debuginfod/HTTPClient.cpp b/llvm/lib/Debuginfod/HTTPClient.cpp index 65f457933b92..3376eaa7cd0d 100644 --- a/llvm/lib/Debuginfod/HTTPClient.cpp +++ b/llvm/lib/Debuginfod/HTTPClient.cpp @@ -7,9 +7,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// -/// This file defines the methods of the HTTPRequest, HTTPClient, and -/// BufferedHTTPResponseHandler classes. +/// This file defines the implementation of the HTTPClient library for issuing +/// HTTP requests and handling the responses. /// //===----------------------------------------------------------------------===// @@ -34,44 +33,6 @@ bool operator==(const HTTPRequest &A, const HTTPRequest &B) { HTTPResponseHandler::~HTTPResponseHandler() = default; -static inline bool parseContentLengthHeader(StringRef LineRef, - size_t &ContentLength) { - // Content-Length is a mandatory header, and the only one we handle. - return LineRef.consume_front("Content-Length: ") && - to_integer(LineRef.trim(), ContentLength, 10); -} - -Error BufferedHTTPResponseHandler::handleHeaderLine(StringRef HeaderLine) { - if (ResponseBuffer.Body) - return Error::success(); - - size_t ContentLength; - if (parseContentLengthHeader(HeaderLine, ContentLength)) - ResponseBuffer.Body = - WritableMemoryBuffer::getNewUninitMemBuffer(ContentLength); - - return Error::success(); -} - -Error BufferedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) { - if (!ResponseBuffer.Body) - return createStringError(errc::io_error, - "Unallocated response buffer. HTTP Body data " - "received before Content-Length header."); - if (Offset + BodyChunk.size() > ResponseBuffer.Body->getBufferSize()) - return createStringError(errc::io_error, - "Content size exceeds buffer size."); - memcpy(ResponseBuffer.Body->getBufferStart() + Offset, BodyChunk.data(), - BodyChunk.size()); - Offset += BodyChunk.size(); - return Error::success(); -} - -Error BufferedHTTPResponseHandler::handleStatusCode(unsigned Code) { - ResponseBuffer.Code = Code; - return Error::success(); -} - bool HTTPClient::IsInitialized = false; class HTTPClientCleanup { @@ -80,18 +41,6 @@ public: }; static const HTTPClientCleanup Cleanup; -Expected HTTPClient::perform(const HTTPRequest &Request) { - BufferedHTTPResponseHandler Handler; - if (Error Err = perform(Request, Handler)) - return std::move(Err); - return std::move(Handler.ResponseBuffer); -} - -Expected HTTPClient::get(StringRef Url) { - HTTPRequest Request(Url); - return perform(Request); -} - #ifdef LLVM_ENABLE_CURL bool HTTPClient::isAvailable() { return true; } @@ -128,18 +77,6 @@ struct CurlHTTPRequest { llvm::Error ErrorState = Error::success(); }; -static size_t curlHeaderFunction(char *Contents, size_t Size, size_t NMemb, - CurlHTTPRequest *CurlRequest) { - assert(Size == 1 && "The Size passed by libCURL to CURLOPT_HEADERFUNCTION " - "should always be 1."); - if (Error Err = - CurlRequest->Handler.handleHeaderLine(StringRef(Contents, NMemb))) { - CurlRequest->storeError(std::move(Err)); - return 0; - } - return NMemb; -} - static size_t curlWriteFunction(char *Contents, size_t Size, size_t NMemb, CurlHTTPRequest *CurlRequest) { Size *= NMemb; @@ -156,10 +93,10 @@ HTTPClient::HTTPClient() { "Must call HTTPClient::initialize() at the beginning of main()."); if (Curl) return; - assert((Curl = curl_easy_init()) && "Curl could not be initialized."); + Curl = curl_easy_init(); + assert(Curl && "Curl could not be initialized"); // Set the callback hooks. curl_easy_setopt(Curl, CURLOPT_WRITEFUNCTION, curlWriteFunction); - curl_easy_setopt(Curl, CURLOPT_HEADERFUNCTION, curlHeaderFunction); } HTTPClient::~HTTPClient() { curl_easy_cleanup(Curl); } @@ -176,22 +113,19 @@ Error HTTPClient::perform(const HTTPRequest &Request, CurlHTTPRequest CurlRequest(Handler); curl_easy_setopt(Curl, CURLOPT_WRITEDATA, &CurlRequest); - curl_easy_setopt(Curl, CURLOPT_HEADERDATA, &CurlRequest); CURLcode CurlRes = curl_easy_perform(Curl); if (CurlRes != CURLE_OK) return joinErrors(std::move(CurlRequest.ErrorState), createStringError(errc::io_error, "curl_easy_perform() failed: %s\n", curl_easy_strerror(CurlRes))); - if (CurlRequest.ErrorState) - return std::move(CurlRequest.ErrorState); + return std::move(CurlRequest.ErrorState); +} - unsigned Code; +unsigned HTTPClient::responseCode() { + long Code = 0; curl_easy_getinfo(Curl, CURLINFO_RESPONSE_CODE, &Code); - if (Error Err = Handler.handleStatusCode(Code)) - return joinErrors(std::move(CurlRequest.ErrorState), std::move(Err)); - - return std::move(CurlRequest.ErrorState); + return Code; } #else @@ -213,4 +147,8 @@ Error HTTPClient::perform(const HTTPRequest &Request, llvm_unreachable("No HTTP Client implementation available."); } +unsigned HTTPClient::responseCode() { + llvm_unreachable("No HTTP Client implementation available."); +} + #endif diff --git a/llvm/lib/Demangle/Demangle.cpp b/llvm/lib/Demangle/Demangle.cpp index 13aa2864c183..9d128424cabf 100644 --- a/llvm/lib/Demangle/Demangle.cpp +++ b/llvm/lib/Demangle/Demangle.cpp @@ -51,7 +51,7 @@ bool llvm::nonMicrosoftDemangle(const char *MangledName, std::string &Result) { if (isItaniumEncoding(MangledName)) Demangled = itaniumDemangle(MangledName, nullptr, nullptr, nullptr); else if (isRustEncoding(MangledName)) - Demangled = rustDemangle(MangledName, nullptr, nullptr, nullptr); + Demangled = rustDemangle(MangledName); else if (isDLangEncoding(MangledName)) Demangled = dlangDemangle(MangledName); diff --git a/llvm/lib/Demangle/ItaniumDemangle.cpp b/llvm/lib/Demangle/ItaniumDemangle.cpp index 1a5db755e37b..1c9209d8f369 100644 --- a/llvm/lib/Demangle/ItaniumDemangle.cpp +++ b/llvm/lib/Demangle/ItaniumDemangle.cpp @@ -172,6 +172,50 @@ struct DumpVisitor { return printStr("TemplateParamKind::Template"); } } + void print(Node::Prec P) { + switch (P) { + case Node::Prec::Primary: + return printStr("Node::Prec::Primary"); + case Node::Prec::Postfix: + return printStr("Node::Prec::Postfix"); + case Node::Prec::Unary: + return printStr("Node::Prec::Unary"); + case Node::Prec::Cast: + return printStr("Node::Prec::Cast"); + case Node::Prec::PtrMem: + return printStr("Node::Prec::PtrMem"); + case Node::Prec::Multiplicative: + return printStr("Node::Prec::Multiplicative"); + case Node::Prec::Additive: + return printStr("Node::Prec::Additive"); + case Node::Prec::Shift: + return printStr("Node::Prec::Shift"); + case Node::Prec::Spaceship: + return printStr("Node::Prec::Spaceship"); + case Node::Prec::Relational: + return printStr("Node::Prec::Relational"); + case Node::Prec::Equality: + return printStr("Node::Prec::Equality"); + case Node::Prec::And: + return printStr("Node::Prec::And"); + case Node::Prec::Xor: + return printStr("Node::Prec::Xor"); + case Node::Prec::Ior: + return printStr("Node::Prec::Ior"); + case Node::Prec::AndIf: + return printStr("Node::Prec::AndIf"); + case Node::Prec::OrIf: + return printStr("Node::Prec::OrIf"); + case Node::Prec::Conditional: + return printStr("Node::Prec::Conditional"); + case Node::Prec::Assign: + return printStr("Node::Prec::Assign"); + case Node::Prec::Comma: + return printStr("Node::Prec::Comma"); + case Node::Prec::Default: + return printStr("Node::Prec::Default"); + } + } void newLine() { printStr("\n"); @@ -404,8 +448,8 @@ char *ItaniumPartialDemangler::getFunctionBaseName(char *Buf, size_t *N) const { case Node::KAbiTagAttr: Name = static_cast(Name)->Base; continue; - case Node::KStdQualifiedName: - Name = static_cast(Name)->Child; + case Node::KModuleEntity: + Name = static_cast(Name)->Name; continue; case Node::KNestedName: Name = static_cast(Name)->Name; @@ -445,10 +489,10 @@ char *ItaniumPartialDemangler::getFunctionDeclContextName(char *Buf, break; } + if (Name->getKind() == Node::KModuleEntity) + Name = static_cast(Name)->Name; + switch (Name->getKind()) { - case Node::KStdQualifiedName: - OB += "std"; - break; case Node::KNestedName: static_cast(Name)->Qual->print(OB); break; @@ -550,8 +594,8 @@ bool ItaniumPartialDemangler::isCtorOrDtor() const { case Node::KNestedName: N = static_cast(N)->Name; break; - case Node::KStdQualifiedName: - N = static_cast(N)->Child; + case Node::KModuleEntity: + N = static_cast(N)->Name; break; } } diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp index d8da3b48e25b..b4e98a20f389 100644 --- a/llvm/lib/Demangle/MicrosoftDemangle.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp @@ -245,8 +245,8 @@ demanglePointerCVQualifiers(StringView &MangledName) { } StringView Demangler::copyString(StringView Borrowed) { - char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1); - std::strcpy(Stable, Borrowed.begin()); + char *Stable = Arena.allocUnalignedBuffer(Borrowed.size()); + std::memcpy(Stable, Borrowed.begin(), Borrowed.size()); return {Stable, Borrowed.size()}; } @@ -823,11 +823,15 @@ SymbolNode *Demangler::parse(StringView &MangledName) { } TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) { - if (!MangledName.consumeFront(".?A")) + if (!MangledName.consumeFront(".?A")) { + Error = true; return nullptr; + } MangledName.consumeFront(".?A"); - if (MangledName.empty()) + if (MangledName.empty()) { + Error = true; return nullptr; + } return demangleClassType(MangledName); } @@ -970,12 +974,9 @@ void Demangler::memorizeIdentifier(IdentifierNode *Identifier) { // FIXME: Propagate out-of-memory as an error? std::terminate(); Identifier->output(OB, OF_Default); - OB << '\0'; - char *Name = OB.getBuffer(); - - StringView Owned = copyString(Name); + StringView Owned = copyString(OB); memorizeString(Owned); - std::free(Name); + std::free(OB.getBuffer()); } IdentifierNode * @@ -1279,7 +1280,6 @@ Demangler::demangleStringLiteral(StringView &MangledName) { bool IsWcharT = false; bool IsNegative = false; size_t CrcEndPos = 0; - char *ResultBuffer = nullptr; EncodedStringLiteralNode *Result = Arena.alloc(); @@ -1375,10 +1375,8 @@ Demangler::demangleStringLiteral(StringView &MangledName) { } } - OB << '\0'; - ResultBuffer = OB.getBuffer(); - Result->DecodedString = copyString(ResultBuffer); - std::free(ResultBuffer); + Result->DecodedString = copyString(OB); + std::free(OB.getBuffer()); return Result; StringLiteralError: @@ -1455,10 +1453,9 @@ Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) { Scope->output(OB, OF_Default); OB << '\''; OB << "::`" << Number << "'"; - OB << '\0'; - char *Result = OB.getBuffer(); - Identifier->Name = copyString(Result); - std::free(Result); + + Identifier->Name = copyString(OB); + std::free(OB.getBuffer()); return Identifier; } @@ -2322,8 +2319,8 @@ void Demangler::dumpBackReferences() { TypeNode *T = Backrefs.FunctionParams[I]; T->output(OB, OF_Default); - std::printf(" [%d] - %.*s\n", (int)I, (int)OB.getCurrentPosition(), - OB.getBuffer()); + StringView B = OB; + std::printf(" [%d] - %.*s\n", (int)I, (int)B.size(), B.begin()); } std::free(OB.getBuffer()); diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp index d07d05a08c55..494cdabad41f 100644 --- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp @@ -121,8 +121,8 @@ std::string Node::toString(OutputFlags Flags) const { OutputBuffer OB; initializeOutputBuffer(nullptr, nullptr, OB, 1024); this->output(OB, Flags); - OB << '\0'; - std::string Owned(OB.getBuffer()); + StringView SV = OB; + std::string Owned(SV.begin(), SV.end()); std::free(OB.getBuffer()); return Owned; } diff --git a/llvm/lib/Demangle/RustDemangle.cpp b/llvm/lib/Demangle/RustDemangle.cpp index dcac0bd63859..32b10db2a968 100644 --- a/llvm/lib/Demangle/RustDemangle.cpp +++ b/llvm/lib/Demangle/RustDemangle.cpp @@ -24,8 +24,8 @@ using namespace llvm; using llvm::itanium_demangle::OutputBuffer; +using llvm::itanium_demangle::ScopedOverride; using llvm::itanium_demangle::StringView; -using llvm::itanium_demangle::SwapAndRestore; namespace { @@ -119,7 +119,7 @@ private: if (!Print) return; - SwapAndRestore SavePosition(Position, Position); + ScopedOverride SavePosition(Position, Position); Position = Backref; Demangler(); } @@ -147,57 +147,27 @@ private: } // namespace -char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N, - int *Status) { - if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) { - if (Status != nullptr) - *Status = demangle_invalid_args; +char *llvm::rustDemangle(const char *MangledName) { + if (MangledName == nullptr) return nullptr; - } // Return early if mangled name doesn't look like a Rust symbol. StringView Mangled(MangledName); - if (!Mangled.startsWith("_R")) { - if (Status != nullptr) - *Status = demangle_invalid_mangled_name; + if (!Mangled.startsWith("_R")) return nullptr; - } Demangler D; - if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024)) { - if (Status != nullptr) - *Status = demangle_memory_alloc_failure; + if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024)) return nullptr; - } if (!D.demangle(Mangled)) { - if (Status != nullptr) - *Status = demangle_invalid_mangled_name; std::free(D.Output.getBuffer()); return nullptr; } D.Output += '\0'; - char *Demangled = D.Output.getBuffer(); - size_t DemangledLen = D.Output.getCurrentPosition(); - - if (Buf != nullptr) { - if (DemangledLen <= *N) { - std::memcpy(Buf, Demangled, DemangledLen); - std::free(Demangled); - Demangled = Buf; - } else { - std::free(Buf); - } - } - - if (N != nullptr) - *N = DemangledLen; - - if (Status != nullptr) - *Status = demangle_success; - return Demangled; + return D.Output.getBuffer(); } Demangler::Demangler(size_t MaxRecursionLevel) @@ -241,7 +211,7 @@ bool Demangler::demangle(StringView Mangled) { demanglePath(IsInType::No); if (Position != Input.size()) { - SwapAndRestore SavePrint(Print, false); + ScopedOverride SavePrint(Print, false); demanglePath(IsInType::No); } @@ -279,7 +249,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { Error = true; return false; } - SwapAndRestore SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); + ScopedOverride SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); switch (consume()) { case 'C': { @@ -380,7 +350,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { // = [] // = "s" void Demangler::demangleImplPath(IsInType InType) { - SwapAndRestore SavePrint(Print, false); + ScopedOverride SavePrint(Print, false); parseOptionalBase62Number('s'); demanglePath(InType); } @@ -574,7 +544,7 @@ void Demangler::demangleType() { Error = true; return; } - SwapAndRestore SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); + ScopedOverride SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); size_t Start = Position; char C = consume(); @@ -657,7 +627,7 @@ void Demangler::demangleType() { // = "C" // | void Demangler::demangleFnSig() { - SwapAndRestore SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); + ScopedOverride SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); demangleOptionalBinder(); if (consumeIf('U')) @@ -699,7 +669,7 @@ void Demangler::demangleFnSig() { // = [] {} "E" void Demangler::demangleDynBounds() { - SwapAndRestore SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); + ScopedOverride SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); print("dyn "); demangleOptionalBinder(); for (size_t I = 0; !Error && !consumeIf('E'); ++I) { @@ -763,7 +733,7 @@ void Demangler::demangleConst() { Error = true; return; } - SwapAndRestore SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); + ScopedOverride SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); char C = consume(); BasicType Type; diff --git a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp index 1fb37ce7c57c..29a623ebe449 100644 --- a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp +++ b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp @@ -13,6 +13,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Mutex.h" #include @@ -70,7 +71,7 @@ LLVM_ATTRIBUTE_USED void requiredSymbolDefinitionsFromOrcTargetProcess() { } struct RegisteredObjectInfo { - RegisteredObjectInfo() {} + RegisteredObjectInfo() = default; RegisteredObjectInfo(std::size_t Size, jit_code_entry *Entry, OwningBinary Obj) @@ -96,7 +97,7 @@ class GDBJITRegistrationListener : public JITEventListener { public: /// Instantiates the JIT service. - GDBJITRegistrationListener() {} + GDBJITRegistrationListener() = default; /// Unregisters each object that was previously registered and releases all /// internal resources. diff --git a/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h index fd7fa21df196..3dfe736dc5be 100644 --- a/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -37,7 +37,7 @@ class AllocaHolder { std::vector Allocations; public: - AllocaHolder() {} + AllocaHolder() = default; // Make this type move-only. AllocaHolder(AllocaHolder &&) = default; diff --git a/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp b/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp new file mode 100644 index 000000000000..0fc366bf505f --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp @@ -0,0 +1,117 @@ +//===-------- JITLink_DWARFRecordSectionSplitter.cpp - JITLink-------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h" +#include "llvm/Support/BinaryStreamReader.h" + +#define DEBUG_TYPE "jitlink" + +namespace llvm { +namespace jitlink { + +DWARFRecordSectionSplitter::DWARFRecordSectionSplitter(StringRef SectionName) + : SectionName(SectionName) {} + +Error DWARFRecordSectionSplitter::operator()(LinkGraph &G) { + auto *Section = G.findSectionByName(SectionName); + + if (!Section) { + LLVM_DEBUG({ + dbgs() << "DWARFRecordSectionSplitter: No " << SectionName + << " section. Nothing to do\n"; + }); + return Error::success(); + } + + LLVM_DEBUG({ + dbgs() << "DWARFRecordSectionSplitter: Processing " << SectionName + << "...\n"; + }); + + DenseMap Caches; + + { + // Pre-build the split caches. + for (auto *B : Section->blocks()) + Caches[B] = LinkGraph::SplitBlockCache::value_type(); + for (auto *Sym : Section->symbols()) + Caches[&Sym->getBlock()]->push_back(Sym); + for (auto *B : Section->blocks()) + llvm::sort(*Caches[B], [](const Symbol *LHS, const Symbol *RHS) { + return LHS->getOffset() > RHS->getOffset(); + }); + } + + // Iterate over blocks (we do this by iterating over Caches entries rather + // than Section->blocks() as we will be inserting new blocks along the way, + // which would invalidate iterators in the latter sequence. + for (auto &KV : Caches) { + auto &B = *KV.first; + auto &BCache = KV.second; + if (auto Err = processBlock(G, B, BCache)) + return Err; + } + + return Error::success(); +} + +Error DWARFRecordSectionSplitter::processBlock( + LinkGraph &G, Block &B, LinkGraph::SplitBlockCache &Cache) { + LLVM_DEBUG(dbgs() << " Processing block at " << B.getAddress() << "\n"); + + // Section should not contain zero-fill blocks. + if (B.isZeroFill()) + return make_error("Unexpected zero-fill block in " + + SectionName + " section"); + + if (B.getSize() == 0) { + LLVM_DEBUG(dbgs() << " Block is empty. Skipping.\n"); + return Error::success(); + } + + BinaryStreamReader BlockReader( + StringRef(B.getContent().data(), B.getContent().size()), + G.getEndianness()); + + while (true) { + uint64_t RecordStartOffset = BlockReader.getOffset(); + + LLVM_DEBUG({ + dbgs() << " Processing CFI record at " + << formatv("{0:x16}", B.getAddress()) << "\n"; + }); + + uint32_t Length; + if (auto Err = BlockReader.readInteger(Length)) + return Err; + if (Length != 0xffffffff) { + if (auto Err = BlockReader.skip(Length)) + return Err; + } else { + uint64_t ExtendedLength; + if (auto Err = BlockReader.readInteger(ExtendedLength)) + return Err; + if (auto Err = BlockReader.skip(ExtendedLength)) + return Err; + } + + // If this was the last block then there's nothing to split + if (BlockReader.empty()) { + LLVM_DEBUG(dbgs() << " Extracted " << B << "\n"); + return Error::success(); + } + + uint64_t BlockSize = BlockReader.getOffset() - RecordStartOffset; + auto &NewBlock = G.splitBlock(B, BlockSize); + (void)NewBlock; + LLVM_DEBUG(dbgs() << " Extracted " << NewBlock << "\n"); + } +} + +} // namespace jitlink +} // namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp index 2ae193595fc0..b1492cd74508 100644 --- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp @@ -10,6 +10,7 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Config/config.h" +#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h" #include "llvm/Support/DynamicLibrary.h" @@ -18,109 +19,13 @@ namespace llvm { namespace jitlink { -EHFrameSplitter::EHFrameSplitter(StringRef EHFrameSectionName) - : EHFrameSectionName(EHFrameSectionName) {} - -Error EHFrameSplitter::operator()(LinkGraph &G) { - auto *EHFrame = G.findSectionByName(EHFrameSectionName); - - if (!EHFrame) { - LLVM_DEBUG({ - dbgs() << "EHFrameSplitter: No " << EHFrameSectionName - << " section. Nothing to do\n"; - }); - return Error::success(); - } - - LLVM_DEBUG({ - dbgs() << "EHFrameSplitter: Processing " << EHFrameSectionName << "...\n"; - }); - - DenseMap Caches; - - { - // Pre-build the split caches. - for (auto *B : EHFrame->blocks()) - Caches[B] = LinkGraph::SplitBlockCache::value_type(); - for (auto *Sym : EHFrame->symbols()) - Caches[&Sym->getBlock()]->push_back(Sym); - for (auto *B : EHFrame->blocks()) - llvm::sort(*Caches[B], [](const Symbol *LHS, const Symbol *RHS) { - return LHS->getOffset() > RHS->getOffset(); - }); - } - - // Iterate over blocks (we do this by iterating over Caches entries rather - // than EHFrame->blocks() as we will be inserting new blocks along the way, - // which would invalidate iterators in the latter sequence. - for (auto &KV : Caches) { - auto &B = *KV.first; - auto &BCache = KV.second; - if (auto Err = processBlock(G, B, BCache)) - return Err; - } - - return Error::success(); -} - -Error EHFrameSplitter::processBlock(LinkGraph &G, Block &B, - LinkGraph::SplitBlockCache &Cache) { - LLVM_DEBUG(dbgs() << " Processing block at " << B.getAddress() << "\n"); - - // eh-frame should not contain zero-fill blocks. - if (B.isZeroFill()) - return make_error("Unexpected zero-fill block in " + - EHFrameSectionName + " section"); - - if (B.getSize() == 0) { - LLVM_DEBUG(dbgs() << " Block is empty. Skipping.\n"); - return Error::success(); - } - - BinaryStreamReader BlockReader( - StringRef(B.getContent().data(), B.getContent().size()), - G.getEndianness()); - - while (true) { - uint64_t RecordStartOffset = BlockReader.getOffset(); - - LLVM_DEBUG({ - dbgs() << " Processing CFI record at " - << formatv("{0:x16}", B.getAddress()) << "\n"; - }); - - uint32_t Length; - if (auto Err = BlockReader.readInteger(Length)) - return Err; - if (Length != 0xffffffff) { - if (auto Err = BlockReader.skip(Length)) - return Err; - } else { - uint64_t ExtendedLength; - if (auto Err = BlockReader.readInteger(ExtendedLength)) - return Err; - if (auto Err = BlockReader.skip(ExtendedLength)) - return Err; - } - - // If this was the last block then there's nothing to split - if (BlockReader.empty()) { - LLVM_DEBUG(dbgs() << " Extracted " << B << "\n"); - return Error::success(); - } - - uint64_t BlockSize = BlockReader.getOffset() - RecordStartOffset; - auto &NewBlock = G.splitBlock(B, BlockSize); - (void)NewBlock; - LLVM_DEBUG(dbgs() << " Extracted " << NewBlock << "\n"); - } -} - EHFrameEdgeFixer::EHFrameEdgeFixer(StringRef EHFrameSectionName, - unsigned PointerSize, Edge::Kind Delta64, - Edge::Kind Delta32, Edge::Kind NegDelta32) + unsigned PointerSize, Edge::Kind Pointer32, + Edge::Kind Pointer64, Edge::Kind Delta32, + Edge::Kind Delta64, Edge::Kind NegDelta32) : EHFrameSectionName(EHFrameSectionName), PointerSize(PointerSize), - Delta64(Delta64), Delta32(Delta32), NegDelta32(NegDelta32) {} + Pointer32(Pointer32), Pointer64(Pointer64), Delta32(Delta32), + Delta64(Delta64), NegDelta32(NegDelta32) {} Error EHFrameEdgeFixer::operator()(LinkGraph &G) { auto *EHFrame = G.findSectionByName(EHFrameSectionName); @@ -147,7 +52,16 @@ Error EHFrameEdgeFixer::operator()(LinkGraph &G) { // Build a map of all blocks and symbols in the text sections. We will use // these for finding / building edge targets when processing FDEs. for (auto &Sec : G.sections()) { - PC.AddrToSyms.addSymbols(Sec.symbols()); + // Just record the most-canonical symbol (for eh-frame purposes) at each + // address. + for (auto *Sym : Sec.symbols()) { + auto &CurSym = PC.AddrToSym[Sym->getAddress()]; + if (!CurSym || (std::make_tuple(Sym->getLinkage(), Sym->getScope(), + !Sym->hasName(), Sym->getName()) < + std::make_tuple(CurSym->getLinkage(), CurSym->getScope(), + !CurSym->hasName(), CurSym->getName()))) + CurSym = Sym; + } if (auto Err = PC.AddrToBlock.addBlocks(Sec.blocks(), BlockAddressMap::includeNonNull)) return Err; @@ -172,10 +86,7 @@ Error EHFrameEdgeFixer::operator()(LinkGraph &G) { Error EHFrameEdgeFixer::processBlock(ParseContext &PC, Block &B) { - LLVM_DEBUG({ - dbgs() << " Processing block at " << formatv("{0:x16}", B.getAddress()) - << "\n"; - }); + LLVM_DEBUG(dbgs() << " Processing block at " << B.getAddress() << "\n"); // eh-frame should not contain zero-fill blocks. if (B.isZeroFill()) @@ -209,7 +120,7 @@ Error EHFrameEdgeFixer::processBlock(ParseContext &PC, Block &B) { LLVM_DEBUG({ dbgs() << " Processing CFI record at " - << formatv("{0:x16}", B.getAddress() + RecordStartOffset) << "\n"; + << (B.getAddress() + RecordStartOffset) << "\n"; }); // Get the record length. @@ -244,7 +155,7 @@ Error EHFrameEdgeFixer::processBlock(ParseContext &PC, Block &B) { if (CIEDelta == 0) { if (auto Err = processCIE(PC, B, RecordStartOffset, CIEDeltaFieldOffset + RecordRemaining, - CIEDeltaFieldOffset)) + CIEDeltaFieldOffset, BlockEdges)) return Err; } else { if (auto Err = processFDE(PC, B, RecordStartOffset, @@ -263,7 +174,8 @@ Error EHFrameEdgeFixer::processBlock(ParseContext &PC, Block &B) { Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B, size_t RecordOffset, size_t RecordLength, - size_t CIEDeltaFieldOffset) { + size_t CIEDeltaFieldOffset, + const BlockEdgeMap &BlockEdges) { LLVM_DEBUG(dbgs() << " Record is CIE\n"); @@ -301,10 +213,6 @@ Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B, uint64_t CodeAlignmentFactor = 0; if (auto Err = RecordReader.readULEB128(CodeAlignmentFactor)) return Err; - if (CodeAlignmentFactor != 1) - return make_error("Unsupported CIE code alignment factor " + - Twine(CodeAlignmentFactor) + - " (expected 1)"); } // Read and validate the data alignment factor. @@ -312,76 +220,65 @@ Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B, int64_t DataAlignmentFactor = 0; if (auto Err = RecordReader.readSLEB128(DataAlignmentFactor)) return Err; - if (DataAlignmentFactor != -8) - return make_error("Unsupported CIE data alignment factor " + - Twine(DataAlignmentFactor) + - " (expected -8)"); } // Skip the return address register field. if (auto Err = RecordReader.skip(1)) return Err; - uint64_t AugmentationDataLength = 0; - if (auto Err = RecordReader.readULEB128(AugmentationDataLength)) - return Err; + if (AugInfo->AugmentationDataPresent) { - uint32_t AugmentationDataStartOffset = RecordReader.getOffset(); + CIEInfo.AugmentationDataPresent = true; - uint8_t *NextField = &AugInfo->Fields[0]; - while (uint8_t Field = *NextField++) { - switch (Field) { - case 'L': { - CIEInfo.FDEsHaveLSDAField = true; - uint8_t LSDAPointerEncoding; - if (auto Err = RecordReader.readInteger(LSDAPointerEncoding)) - return Err; - if (!isSupportedPointerEncoding(LSDAPointerEncoding)) - return make_error( - "Unsupported LSDA pointer encoding " + - formatv("{0:x2}", LSDAPointerEncoding) + " in CIE at " + - formatv("{0:x16}", CIESymbol.getAddress())); - CIEInfo.LSDAPointerEncoding = LSDAPointerEncoding; - break; - } - case 'P': { - uint8_t PersonalityPointerEncoding = 0; - if (auto Err = RecordReader.readInteger(PersonalityPointerEncoding)) - return Err; - if (PersonalityPointerEncoding != - (dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | - dwarf::DW_EH_PE_sdata4)) - return make_error( - "Unspported personality pointer " - "encoding " + - formatv("{0:x2}", PersonalityPointerEncoding) + " in CIE at " + - formatv("{0:x16}", CIESymbol.getAddress())); - uint32_t PersonalityPointerAddress; - if (auto Err = RecordReader.readInteger(PersonalityPointerAddress)) - return Err; - break; - } - case 'R': { - uint8_t FDEPointerEncoding; - if (auto Err = RecordReader.readInteger(FDEPointerEncoding)) - return Err; - if (!isSupportedPointerEncoding(FDEPointerEncoding)) - return make_error( - "Unsupported FDE pointer encoding " + - formatv("{0:x2}", FDEPointerEncoding) + " in CIE at " + - formatv("{0:x16}", CIESymbol.getAddress())); - CIEInfo.FDEPointerEncoding = FDEPointerEncoding; - break; - } - default: - llvm_unreachable("Invalid augmentation string field"); + uint64_t AugmentationDataLength = 0; + if (auto Err = RecordReader.readULEB128(AugmentationDataLength)) + return Err; + + uint32_t AugmentationDataStartOffset = RecordReader.getOffset(); + + uint8_t *NextField = &AugInfo->Fields[0]; + while (uint8_t Field = *NextField++) { + switch (Field) { + case 'L': + CIEInfo.LSDAPresent = true; + if (auto PE = readPointerEncoding(RecordReader, B, "LSDA")) + CIEInfo.LSDAEncoding = *PE; + else + return PE.takeError(); + break; + case 'P': { + auto PersonalityPointerEncoding = + readPointerEncoding(RecordReader, B, "personality"); + if (!PersonalityPointerEncoding) + return PersonalityPointerEncoding.takeError(); + if (auto Err = + getOrCreateEncodedPointerEdge( + PC, BlockEdges, *PersonalityPointerEncoding, RecordReader, + B, RecordOffset + RecordReader.getOffset(), "personality") + .takeError()) + return Err; + break; + } + case 'R': + if (auto PE = readPointerEncoding(RecordReader, B, "address")) { + CIEInfo.AddressEncoding = *PE; + if (CIEInfo.AddressEncoding == dwarf::DW_EH_PE_omit) + return make_error( + "Invalid address encoding DW_EH_PE_omit in CIE at " + + formatv("{0:x}", (B.getAddress() + RecordOffset).getValue())); + } else + return PE.takeError(); + break; + default: + llvm_unreachable("Invalid augmentation string field"); + } } - } - if (RecordReader.getOffset() - AugmentationDataStartOffset > - AugmentationDataLength) - return make_error("Read past the end of the augmentation " - "data while parsing fields"); + if (RecordReader.getOffset() - AugmentationDataStartOffset > + AugmentationDataLength) + return make_error("Read past the end of the augmentation " + "data while parsing fields"); + } assert(!PC.CIEInfos.count(CIESymbol.getAddress()) && "Multiple CIEs recorded at the same address?"); @@ -394,7 +291,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, size_t RecordOffset, size_t RecordLength, size_t CIEDeltaFieldOffset, uint32_t CIEDelta, - BlockEdgeMap &BlockEdges) { + const BlockEdgeMap &BlockEdges) { LLVM_DEBUG(dbgs() << " Record is FDE\n"); orc::ExecutorAddr RecordAddress = B.getAddress() + RecordOffset; @@ -422,8 +319,8 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, LLVM_DEBUG({ dbgs() << " Adding edge at " - << formatv("{0:x16}", RecordAddress + CIEDeltaFieldOffset) - << " to CIE at: " << formatv("{0:x16}", CIEAddress) << "\n"; + << (RecordAddress + CIEDeltaFieldOffset) + << " to CIE at: " << CIEAddress << "\n"; }); if (auto CIEInfoOrErr = PC.findCIEInfo(CIEAddress)) CIEInfo = *CIEInfoOrErr; @@ -435,8 +332,8 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, } else { LLVM_DEBUG({ dbgs() << " Already has edge at " - << formatv("{0:x16}", RecordAddress + CIEDeltaFieldOffset) - << " to CIE at " << formatv("{0:x16}", CIEAddress) << "\n"; + << (RecordAddress + CIEDeltaFieldOffset) << " to CIE at " + << CIEAddress << "\n"; }); auto &EI = CIEEdgeItr->second; if (EI.Addend) @@ -451,107 +348,41 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, } } - { - // Process the PC-Begin field. - Block *PCBeginBlock = nullptr; - orc::ExecutorAddrDiff PCBeginFieldOffset = RecordReader.getOffset(); - auto PCEdgeItr = BlockEdges.find(RecordOffset + PCBeginFieldOffset); - if (PCEdgeItr == BlockEdges.end()) { - auto PCBeginPtrInfo = - readEncodedPointer(CIEInfo->FDEPointerEncoding, - RecordAddress + PCBeginFieldOffset, RecordReader); - if (!PCBeginPtrInfo) - return PCBeginPtrInfo.takeError(); - orc::ExecutorAddr PCBegin = PCBeginPtrInfo->first; - Edge::Kind PCBeginEdgeKind = PCBeginPtrInfo->second; - LLVM_DEBUG({ - dbgs() << " Adding edge at " - << (RecordAddress + PCBeginFieldOffset) << " to PC at " - << formatv("{0:x16}", PCBegin) << "\n"; - }); - auto PCBeginSym = getOrCreateSymbol(PC, PCBegin); - if (!PCBeginSym) - return PCBeginSym.takeError(); - B.addEdge(PCBeginEdgeKind, RecordOffset + PCBeginFieldOffset, *PCBeginSym, - 0); - PCBeginBlock = &PCBeginSym->getBlock(); - } else { - auto &EI = PCEdgeItr->second; - LLVM_DEBUG({ - dbgs() << " Already has edge at " - << formatv("{0:x16}", RecordAddress + PCBeginFieldOffset) - << " to PC at " << formatv("{0:x16}", EI.Target->getAddress()); - if (EI.Addend) - dbgs() << " + " << formatv("{0:x16}", EI.Addend); - dbgs() << "\n"; - }); - - // Make sure the existing edge points at a defined block. - if (!EI.Target->isDefined()) { - auto EdgeAddr = RecordAddress + PCBeginFieldOffset; - return make_error("FDE edge at " + - formatv("{0:x16}", EdgeAddr) + - " points at external block"); - } - PCBeginBlock = &EI.Target->getBlock(); - if (auto Err = RecordReader.skip( - getPointerEncodingDataSize(CIEInfo->FDEPointerEncoding))) - return Err; - } - + // Process the PC-Begin field. + LLVM_DEBUG({ + dbgs() << " Processing PC-begin at " + << (RecordAddress + RecordReader.getOffset()) << "\n"; + }); + if (auto PCBegin = getOrCreateEncodedPointerEdge( + PC, BlockEdges, CIEInfo->AddressEncoding, RecordReader, B, + RecordReader.getOffset(), "PC begin")) { + assert(*PCBegin && "PC-begin symbol not set"); // Add a keep-alive edge from the FDE target to the FDE to ensure that the // FDE is kept alive if its target is. - assert(PCBeginBlock && "PC-begin block not recorded"); LLVM_DEBUG({ dbgs() << " Adding keep-alive edge from target at " - << formatv("{0:x16}", PCBeginBlock->getAddress()) << " to FDE at " - << formatv("{0:x16}", RecordAddress) << "\n"; + << (*PCBegin)->getBlock().getAddress() << " to FDE at " + << RecordAddress << "\n"; }); - PCBeginBlock->addEdge(Edge::KeepAlive, 0, FDESymbol, 0); - } + (*PCBegin)->getBlock().addEdge(Edge::KeepAlive, 0, FDESymbol, 0); + } else + return PCBegin.takeError(); // Skip over the PC range size field. - if (auto Err = RecordReader.skip( - getPointerEncodingDataSize(CIEInfo->FDEPointerEncoding))) + if (auto Err = skipEncodedPointer(CIEInfo->AddressEncoding, RecordReader)) return Err; - if (CIEInfo->FDEsHaveLSDAField) { + if (CIEInfo->AugmentationDataPresent) { uint64_t AugmentationDataSize; if (auto Err = RecordReader.readULEB128(AugmentationDataSize)) return Err; - orc::ExecutorAddrDiff LSDAFieldOffset = RecordReader.getOffset(); - auto LSDAEdgeItr = BlockEdges.find(RecordOffset + LSDAFieldOffset); - if (LSDAEdgeItr == BlockEdges.end()) { - auto LSDAPointerInfo = - readEncodedPointer(CIEInfo->LSDAPointerEncoding, - RecordAddress + LSDAFieldOffset, RecordReader); - if (!LSDAPointerInfo) - return LSDAPointerInfo.takeError(); - orc::ExecutorAddr LSDA = LSDAPointerInfo->first; - Edge::Kind LSDAEdgeKind = LSDAPointerInfo->second; - auto LSDASym = getOrCreateSymbol(PC, LSDA); - if (!LSDASym) - return LSDASym.takeError(); - LLVM_DEBUG({ - dbgs() << " Adding edge at " - << formatv("{0:x16}", RecordAddress + LSDAFieldOffset) - << " to LSDA at " << formatv("{0:x16}", LSDA) << "\n"; - }); - B.addEdge(LSDAEdgeKind, RecordOffset + LSDAFieldOffset, *LSDASym, 0); - } else { - LLVM_DEBUG({ - auto &EI = LSDAEdgeItr->second; - dbgs() << " Already has edge at " - << formatv("{0:x16}", RecordAddress + LSDAFieldOffset) - << " to LSDA at " << formatv("{0:x16}", EI.Target->getAddress()); - if (EI.Addend) - dbgs() << " + " << formatv("{0:x16}", EI.Addend); - dbgs() << "\n"; - }); - if (auto Err = RecordReader.skip(AugmentationDataSize)) + if (CIEInfo->LSDAPresent) + if (auto Err = getOrCreateEncodedPointerEdge( + PC, BlockEdges, CIEInfo->LSDAEncoding, RecordReader, B, + RecordReader.getOffset(), "LSDA") + .takeError()) return Err; - } } else { LLVM_DEBUG(dbgs() << " Record does not have LSDA field.\n"); } @@ -600,129 +431,163 @@ EHFrameEdgeFixer::parseAugmentationString(BinaryStreamReader &RecordReader) { return std::move(AugInfo); } -bool EHFrameEdgeFixer::isSupportedPointerEncoding(uint8_t PointerEncoding) { +Expected EHFrameEdgeFixer::readPointerEncoding(BinaryStreamReader &R, + Block &InBlock, + const char *FieldName) { using namespace dwarf; - // We only support PC-rel for now. - if ((PointerEncoding & 0x70) != DW_EH_PE_pcrel) - return false; - - // readEncodedPointer does not handle indirect. - if (PointerEncoding & DW_EH_PE_indirect) - return false; + uint8_t PointerEncoding; + if (auto Err = R.readInteger(PointerEncoding)) + return std::move(Err); - // Supported datatypes. + bool Supported = true; switch (PointerEncoding & 0xf) { - case DW_EH_PE_absptr: - case DW_EH_PE_udata4: - case DW_EH_PE_udata8: - case DW_EH_PE_sdata4: - case DW_EH_PE_sdata8: - return true; + case DW_EH_PE_uleb128: + case DW_EH_PE_udata2: + case DW_EH_PE_sleb128: + case DW_EH_PE_sdata2: + Supported = false; + break; + } + if (Supported) { + switch (PointerEncoding & 0x70) { + case DW_EH_PE_textrel: + case DW_EH_PE_datarel: + case DW_EH_PE_funcrel: + case DW_EH_PE_aligned: + Supported = false; + break; + } } - return false; + if (Supported) + return PointerEncoding; + + return make_error("Unsupported pointer encoding " + + formatv("{0:x2}", PointerEncoding) + " for " + + FieldName + "in CFI record at " + + formatv("{0:x16}", InBlock.getAddress())); } -unsigned EHFrameEdgeFixer::getPointerEncodingDataSize(uint8_t PointerEncoding) { +Error EHFrameEdgeFixer::skipEncodedPointer(uint8_t PointerEncoding, + BinaryStreamReader &RecordReader) { using namespace dwarf; - assert(isSupportedPointerEncoding(PointerEncoding) && - "Unsupported pointer encoding"); + // Switch absptr to corresponding udata encoding. + if ((PointerEncoding & 0xf) == DW_EH_PE_absptr) + PointerEncoding |= (PointerSize == 8) ? DW_EH_PE_udata8 : DW_EH_PE_udata4; + switch (PointerEncoding & 0xf) { - case DW_EH_PE_absptr: - return PointerSize; case DW_EH_PE_udata4: case DW_EH_PE_sdata4: - return 4; + if (auto Err = RecordReader.skip(4)) + return Err; + break; case DW_EH_PE_udata8: case DW_EH_PE_sdata8: - return 8; + if (auto Err = RecordReader.skip(8)) + return Err; + break; default: - llvm_unreachable("Unsupported encoding"); + llvm_unreachable("Unrecognized encoding"); } + return Error::success(); } -Expected> -EHFrameEdgeFixer::readEncodedPointer(uint8_t PointerEncoding, - orc::ExecutorAddr PointerFieldAddress, - BinaryStreamReader &RecordReader) { - assert(isSupportedPointerEncoding(PointerEncoding) && - "Unsupported pointer encoding"); - +Expected EHFrameEdgeFixer::getOrCreateEncodedPointerEdge( + ParseContext &PC, const BlockEdgeMap &BlockEdges, uint8_t PointerEncoding, + BinaryStreamReader &RecordReader, Block &BlockToFix, + size_t PointerFieldOffset, const char *FieldName) { using namespace dwarf; - // Isolate data type, remap absptr to udata4 or udata8. This relies on us - // having verified that the graph uses 32-bit or 64-bit pointers only at the - // start of this pass. - uint8_t EffectiveType = PointerEncoding & 0xf; - if (EffectiveType == DW_EH_PE_absptr) - EffectiveType = (PointerSize == 8) ? DW_EH_PE_udata8 : DW_EH_PE_udata4; + if (PointerEncoding == DW_EH_PE_omit) + return nullptr; + + // If there's already an edge here then just skip the encoded pointer and + // return the edge's target. + { + auto EdgeI = BlockEdges.find(PointerFieldOffset); + if (EdgeI != BlockEdges.end()) { + LLVM_DEBUG({ + dbgs() << " Existing edge at " + << (BlockToFix.getAddress() + PointerFieldOffset) << " to " + << FieldName << " at " << EdgeI->second.Target->getAddress(); + if (EdgeI->second.Target->hasName()) + dbgs() << " (" << EdgeI->second.Target->getName() << ")"; + dbgs() << "\n"; + }); + if (auto Err = skipEncodedPointer(PointerEncoding, RecordReader)) + return std::move(Err); + return EdgeI->second.Target; + } + } + + // Switch absptr to corresponding udata encoding. + if ((PointerEncoding & 0xf) == DW_EH_PE_absptr) + PointerEncoding |= (PointerSize == 8) ? DW_EH_PE_udata8 : DW_EH_PE_udata4; - orc::ExecutorAddr Addr; - Edge::Kind PointerEdgeKind = Edge::Invalid; - switch (EffectiveType) { + // We need to create an edge. Start by reading the field value. + uint64_t FieldValue; + bool Is64Bit = false; + switch (PointerEncoding & 0xf) { case DW_EH_PE_udata4: { uint32_t Val; if (auto Err = RecordReader.readInteger(Val)) return std::move(Err); - Addr = PointerFieldAddress + Val; - PointerEdgeKind = Delta32; - break; - } - case DW_EH_PE_udata8: { - uint64_t Val; - if (auto Err = RecordReader.readInteger(Val)) - return std::move(Err); - Addr = PointerFieldAddress + Val; - PointerEdgeKind = Delta64; + FieldValue = Val; break; } case DW_EH_PE_sdata4: { - int32_t Val; + uint32_t Val; if (auto Err = RecordReader.readInteger(Val)) return std::move(Err); - Addr = PointerFieldAddress + Val; - PointerEdgeKind = Delta32; + FieldValue = Val; break; } - case DW_EH_PE_sdata8: { - int64_t Val; - if (auto Err = RecordReader.readInteger(Val)) + case DW_EH_PE_udata8: + case DW_EH_PE_sdata8: + Is64Bit = true; + if (auto Err = RecordReader.readInteger(FieldValue)) return std::move(Err); - Addr = PointerFieldAddress + Val; - PointerEdgeKind = Delta64; break; - } + default: + llvm_unreachable("Unsupported encoding"); } - if (PointerEdgeKind == Edge::Invalid) - return make_error( - "Unspported edge kind for encoded pointer at " + - formatv("{0:x}", PointerFieldAddress)); + // Find the edge target and edge kind to use. + orc::ExecutorAddr Target; + Edge::Kind PtrEdgeKind = Edge::Invalid; + if ((PointerEncoding & 0x70) == DW_EH_PE_pcrel) { + Target = BlockToFix.getAddress() + PointerFieldOffset; + PtrEdgeKind = Is64Bit ? Delta64 : Delta32; + } else + PtrEdgeKind = Is64Bit ? Pointer64 : Pointer32; + Target += FieldValue; + + // Find or create a symbol to point the edge at. + auto TargetSym = getOrCreateSymbol(PC, Target); + if (!TargetSym) + return TargetSym.takeError(); + BlockToFix.addEdge(PtrEdgeKind, PointerFieldOffset, *TargetSym, 0); - return std::make_pair(Addr, Delta64); + LLVM_DEBUG({ + dbgs() << " Adding edge at " + << (BlockToFix.getAddress() + PointerFieldOffset) << " to " + << FieldName << " at " << TargetSym->getAddress(); + if (TargetSym->hasName()) + dbgs() << " (" << TargetSym->getName() << ")"; + dbgs() << "\n"; + }); + + return &*TargetSym; } Expected EHFrameEdgeFixer::getOrCreateSymbol(ParseContext &PC, orc::ExecutorAddr Addr) { - Symbol *CanonicalSym = nullptr; - - auto UpdateCanonicalSym = [&](Symbol *Sym) { - if (!CanonicalSym || Sym->getLinkage() < CanonicalSym->getLinkage() || - Sym->getScope() < CanonicalSym->getScope() || - (Sym->hasName() && !CanonicalSym->hasName()) || - Sym->getName() < CanonicalSym->getName()) - CanonicalSym = Sym; - }; - - if (auto *SymbolsAtAddr = PC.AddrToSyms.getSymbolsAt(Addr)) - for (auto *Sym : *SymbolsAtAddr) - UpdateCanonicalSym(Sym); - - // If we found an existing symbol at the given address then use it. - if (CanonicalSym) - return *CanonicalSym; + // See whether we have a canonical symbol for the given address already. + auto CanonicalSymI = PC.AddrToSym.find(Addr); + if (CanonicalSymI != PC.AddrToSym.end()) + return *CanonicalSymI->second; // Otherwise search for a block covering the address and create a new symbol. auto *B = PC.AddrToBlock.getBlockCovering(Addr); @@ -730,7 +595,10 @@ Expected EHFrameEdgeFixer::getOrCreateSymbol(ParseContext &PC, return make_error("No symbol or block covering address " + formatv("{0:x16}", Addr)); - return PC.G.addAnonymousSymbol(*B, Addr - B->getAddress(), 0, false, false); + auto &S = + PC.G.addAnonymousSymbol(*B, Addr - B->getAddress(), 0, false, false); + PC.AddrToSym[S.getAddress()] = &S; + return S; } char EHFrameNullTerminator::NullTerminatorBlockContent[4] = {0, 0, 0, 0}; @@ -756,7 +624,7 @@ Error EHFrameNullTerminator::operator()(LinkGraph &G) { return Error::success(); } -EHFrameRegistrar::~EHFrameRegistrar() {} +EHFrameRegistrar::~EHFrameRegistrar() = default; Error InProcessEHFrameRegistrar::registerEHFrames( orc::ExecutorAddrRange EHFrameSection) { diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h index ef4b47b9aa28..55cf7fc63ee7 100644 --- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h +++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h @@ -21,27 +21,16 @@ namespace llvm { namespace jitlink { -/// A LinkGraph pass that splits blocks in an eh-frame section into sub-blocks -/// representing individual eh-frames. -/// EHFrameSplitter should not be run without EHFrameEdgeFixer, which is -/// responsible for adding FDE-to-CIE edges. -class EHFrameSplitter { -public: - EHFrameSplitter(StringRef EHFrameSectionName); - Error operator()(LinkGraph &G); - -private: - Error processBlock(LinkGraph &G, Block &B, LinkGraph::SplitBlockCache &Cache); - - StringRef EHFrameSectionName; -}; - /// A LinkGraph pass that adds missing FDE-to-CIE, FDE-to-PC and FDE-to-LSDA /// edges. class EHFrameEdgeFixer { public: + /// Create an eh-frame edge fixer. + /// If a given edge-kind is not supported on the target architecture then + /// Edge::Invalid should be used. EHFrameEdgeFixer(StringRef EHFrameSectionName, unsigned PointerSize, - Edge::Kind Delta64, Edge::Kind Delta32, + Edge::Kind Pointer32, Edge::Kind Pointer64, + Edge::Kind Delta32, Edge::Kind Delta64, Edge::Kind NegDelta32); Error operator()(LinkGraph &G); @@ -57,9 +46,10 @@ private: CIEInformation() = default; CIEInformation(Symbol &CIESymbol) : CIESymbol(&CIESymbol) {} Symbol *CIESymbol = nullptr; - bool FDEsHaveLSDAField = false; - uint8_t FDEPointerEncoding = 0; - uint8_t LSDAPointerEncoding = 0; + bool AugmentationDataPresent = false; + bool LSDAPresent = false; + uint8_t LSDAEncoding = 0; + uint8_t AddressEncoding = 0; }; struct EdgeTarget { @@ -87,33 +77,38 @@ private: LinkGraph &G; CIEInfosMap CIEInfos; BlockAddressMap AddrToBlock; - SymbolAddressMap AddrToSyms; + DenseMap AddrToSym; }; Error processBlock(ParseContext &PC, Block &B); Error processCIE(ParseContext &PC, Block &B, size_t RecordOffset, - size_t RecordLength, size_t CIEDeltaFieldOffset); + size_t RecordLength, size_t CIEDeltaFieldOffset, + const BlockEdgeMap &BlockEdges); Error processFDE(ParseContext &PC, Block &B, size_t RecordOffset, size_t RecordLength, size_t CIEDeltaFieldOffset, - uint32_t CIEDelta, BlockEdgeMap &BlockEdges); + uint32_t CIEDelta, const BlockEdgeMap &BlockEdges); Expected parseAugmentationString(BinaryStreamReader &RecordReader); - static bool isSupportedPointerEncoding(uint8_t PointerEncoding); - unsigned getPointerEncodingDataSize(uint8_t PointerEncoding); - Expected> - readEncodedPointer(uint8_t PointerEncoding, - orc::ExecutorAddr PointerFieldAddress, - BinaryStreamReader &RecordReader); + Expected readPointerEncoding(BinaryStreamReader &RecordReader, + Block &InBlock, const char *FieldName); + Error skipEncodedPointer(uint8_t PointerEncoding, + BinaryStreamReader &RecordReader); + Expected getOrCreateEncodedPointerEdge( + ParseContext &PC, const BlockEdgeMap &BlockEdges, uint8_t PointerEncoding, + BinaryStreamReader &RecordReader, Block &BlockToFix, + size_t PointerFieldOffset, const char *FieldName); Expected getOrCreateSymbol(ParseContext &PC, orc::ExecutorAddr Addr); StringRef EHFrameSectionName; unsigned PointerSize; - Edge::Kind Delta64; + Edge::Kind Pointer32; + Edge::Kind Pointer64; Edge::Kind Delta32; + Edge::Kind Delta64; Edge::Kind NegDelta32; }; diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp index 2194a4fbf1f4..5a983c219627 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp @@ -27,7 +27,7 @@ namespace jitlink { StringRef ELFLinkGraphBuilderBase::CommonSectionName(".common"); ArrayRef ELFLinkGraphBuilderBase::DwarfSectionNames = DWSecNames; -ELFLinkGraphBuilderBase::~ELFLinkGraphBuilderBase() {} +ELFLinkGraphBuilderBase::~ELFLinkGraphBuilderBase() = default; } // end namespace jitlink } // end namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp index dd3eb97c21a0..98da3f155c3e 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp @@ -11,20 +11,21 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/JITLink/ELF_aarch64.h" +#include "EHFrameSupportImpl.h" #include "ELFLinkGraphBuilder.h" #include "JITLinkGeneric.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h" #include "llvm/ExecutionEngine/JITLink/aarch64.h" #include "llvm/Object/ELFObjectFile.h" -#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Endian.h" #define DEBUG_TYPE "jitlink" using namespace llvm; using namespace llvm::jitlink; -namespace llvm { -namespace jitlink { +namespace { class ELFJITLinker_aarch64 : public JITLinker { friend class JITLinker; @@ -37,50 +38,77 @@ public: private: Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const { - using namespace aarch64; - using namespace llvm::support; - - char *BlockWorkingMem = B.getAlreadyMutableContent().data(); - char *FixupPtr = BlockWorkingMem + E.getOffset(); - auto FixupAddress = B.getAddress() + E.getOffset(); - switch (E.getKind()) { - case aarch64::R_AARCH64_CALL26: { - assert((FixupAddress.getValue() & 0x3) == 0 && - "Call-inst is not 32-bit aligned"); - int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); - - if (static_cast(Value) & 0x3) - return make_error("Call target is not 32-bit aligned"); - - if (!isInt<28>(Value)) - return makeTargetOutOfRangeError(G, B, E); - - uint32_t RawInstr = *(little32_t *)FixupPtr; - assert((RawInstr & 0x7fffffff) == 0x14000000 && - "RawInstr isn't a B or BR immediate instruction"); - uint32_t Imm = (static_cast(Value) & ((1 << 28) - 1)) >> 2; - uint32_t FixedInstr = RawInstr | Imm; - *(little32_t *)FixupPtr = FixedInstr; - break; - } - } - return Error::success(); + return aarch64::applyFixup(G, B, E); } }; template class ELFLinkGraphBuilder_aarch64 : public ELFLinkGraphBuilder { private: - static Expected + enum ELFAArch64RelocationKind : Edge::Kind { + ELFCall26 = Edge::FirstRelocation, + ELFAdrPage21, + ELFAddAbs12, + ELFLdSt8Abs12, + ELFLdSt16Abs12, + ELFLdSt32Abs12, + ELFLdSt64Abs12, + ELFLdSt128Abs12, + ELFMovwAbsG0, + ELFMovwAbsG1, + ELFMovwAbsG2, + ELFMovwAbsG3, + ELFAbs64, + ELFPrel32, + ELFPrel64, + ELFAdrGOTPage21, + ELFLd64GOTLo12, + }; + + static Expected getRelocationKind(const uint32_t Type) { using namespace aarch64; switch (Type) { case ELF::R_AARCH64_CALL26: - return EdgeKind_aarch64::R_AARCH64_CALL26; + case ELF::R_AARCH64_JUMP26: + return ELFCall26; + case ELF::R_AARCH64_ADR_PREL_PG_HI21: + return ELFAdrPage21; + case ELF::R_AARCH64_ADD_ABS_LO12_NC: + return ELFAddAbs12; + case ELF::R_AARCH64_LDST8_ABS_LO12_NC: + return ELFLdSt8Abs12; + case ELF::R_AARCH64_LDST16_ABS_LO12_NC: + return ELFLdSt16Abs12; + case ELF::R_AARCH64_LDST32_ABS_LO12_NC: + return ELFLdSt32Abs12; + case ELF::R_AARCH64_LDST64_ABS_LO12_NC: + return ELFLdSt64Abs12; + case ELF::R_AARCH64_LDST128_ABS_LO12_NC: + return ELFLdSt128Abs12; + case ELF::R_AARCH64_MOVW_UABS_G0_NC: + return ELFMovwAbsG0; + case ELF::R_AARCH64_MOVW_UABS_G1_NC: + return ELFMovwAbsG1; + case ELF::R_AARCH64_MOVW_UABS_G2_NC: + return ELFMovwAbsG2; + case ELF::R_AARCH64_MOVW_UABS_G3: + return ELFMovwAbsG3; + case ELF::R_AARCH64_ABS64: + return ELFAbs64; + case ELF::R_AARCH64_PREL32: + return ELFPrel32; + case ELF::R_AARCH64_PREL64: + return ELFPrel64; + case ELF::R_AARCH64_ADR_GOT_PAGE: + return ELFAdrGOTPage21; + case ELF::R_AARCH64_LD64_GOT_LO12_NC: + return ELFLd64GOTLo12; } - return make_error("Unsupported aarch64 relocation:" + - formatv("{0:d}", Type)); + return make_error( + "Unsupported aarch64 relocation:" + formatv("{0:d}: ", Type) + + object::getELFRelocationTypeName(ELF::EM_AARCH64, Type)); } Error addRelocations() override { @@ -99,6 +127,7 @@ private: Error addSingleRelocation(const typename ELFT::Rela &Rel, const typename ELFT::Shdr &FixupSect, Block &BlockToFix) { + using support::ulittle32_t; using Base = ELFLinkGraphBuilder; uint32_t SymbolIndex = Rel.getSymbol(false); @@ -116,18 +145,159 @@ private: inconvertibleErrorCode()); uint32_t Type = Rel.getType(false); - Expected Kind = getRelocationKind(Type); - if (!Kind) - return Kind.takeError(); + Expected RelocKind = getRelocationKind(Type); + if (!RelocKind) + return RelocKind.takeError(); int64_t Addend = Rel.r_addend; orc::ExecutorAddr FixupAddress = orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset; Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress(); - Edge GE(*Kind, Offset, *GraphSymbol, Addend); + + // Get a pointer to the fixup content. + const void *FixupContent = BlockToFix.getContent().data() + + (FixupAddress - BlockToFix.getAddress()); + + Edge::Kind Kind = Edge::Invalid; + + switch (*RelocKind) { + case ELFCall26: { + Kind = aarch64::Branch26; + break; + } + case ELFAdrPage21: { + Kind = aarch64::Page21; + break; + } + case ELFAddAbs12: { + Kind = aarch64::PageOffset12; + break; + } + case ELFLdSt8Abs12: { + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if (!aarch64::isLoadStoreImm12(Instr) || + aarch64::getPageOffset12Shift(Instr) != 0) + return make_error( + "R_AARCH64_LDST8_ABS_LO12_NC target is not a " + "LDRB/STRB (imm12) instruction"); + + Kind = aarch64::PageOffset12; + break; + } + case ELFLdSt16Abs12: { + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if (!aarch64::isLoadStoreImm12(Instr) || + aarch64::getPageOffset12Shift(Instr) != 1) + return make_error( + "R_AARCH64_LDST16_ABS_LO12_NC target is not a " + "LDRH/STRH (imm12) instruction"); + + Kind = aarch64::PageOffset12; + break; + } + case ELFLdSt32Abs12: { + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if (!aarch64::isLoadStoreImm12(Instr) || + aarch64::getPageOffset12Shift(Instr) != 2) + return make_error( + "R_AARCH64_LDST32_ABS_LO12_NC target is not a " + "LDR/STR (imm12, 32 bit) instruction"); + + Kind = aarch64::PageOffset12; + break; + } + case ELFLdSt64Abs12: { + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if (!aarch64::isLoadStoreImm12(Instr) || + aarch64::getPageOffset12Shift(Instr) != 3) + return make_error( + "R_AARCH64_LDST64_ABS_LO12_NC target is not a " + "LDR/STR (imm12, 64 bit) instruction"); + + Kind = aarch64::PageOffset12; + break; + } + case ELFLdSt128Abs12: { + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if (!aarch64::isLoadStoreImm12(Instr) || + aarch64::getPageOffset12Shift(Instr) != 4) + return make_error( + "R_AARCH64_LDST128_ABS_LO12_NC target is not a " + "LDR/STR (imm12, 128 bit) instruction"); + + Kind = aarch64::PageOffset12; + break; + } + case ELFMovwAbsG0: { + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if (!aarch64::isMoveWideImm16(Instr) || + aarch64::getMoveWide16Shift(Instr) != 0) + return make_error( + "R_AARCH64_MOVW_UABS_G0_NC target is not a " + "MOVK/MOVZ (imm16, LSL #0) instruction"); + + Kind = aarch64::MoveWide16; + break; + } + case ELFMovwAbsG1: { + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if (!aarch64::isMoveWideImm16(Instr) || + aarch64::getMoveWide16Shift(Instr) != 16) + return make_error( + "R_AARCH64_MOVW_UABS_G1_NC target is not a " + "MOVK/MOVZ (imm16, LSL #16) instruction"); + + Kind = aarch64::MoveWide16; + break; + } + case ELFMovwAbsG2: { + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if (!aarch64::isMoveWideImm16(Instr) || + aarch64::getMoveWide16Shift(Instr) != 32) + return make_error( + "R_AARCH64_MOVW_UABS_G2_NC target is not a " + "MOVK/MOVZ (imm16, LSL #32) instruction"); + + Kind = aarch64::MoveWide16; + break; + } + case ELFMovwAbsG3: { + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if (!aarch64::isMoveWideImm16(Instr) || + aarch64::getMoveWide16Shift(Instr) != 48) + return make_error( + "R_AARCH64_MOVW_UABS_G3 target is not a " + "MOVK/MOVZ (imm16, LSL #48) instruction"); + + Kind = aarch64::MoveWide16; + break; + } + case ELFAbs64: { + Kind = aarch64::Pointer64; + break; + } + case ELFPrel32: { + Kind = aarch64::Delta32; + break; + } + case ELFPrel64: { + Kind = aarch64::Delta64; + break; + } + case ELFAdrGOTPage21: { + Kind = aarch64::GOTPage21; + break; + } + case ELFLd64GOTLo12: { + Kind = aarch64::GOTPageOffset12; + break; + } + }; + + Edge GE(Kind, Offset, *GraphSymbol, Addend); LLVM_DEBUG({ dbgs() << " "; - printEdge(dbgs(), BlockToFix, GE, aarch64::getEdgeKindName(*Kind)); + printEdge(dbgs(), BlockToFix, GE, aarch64::getEdgeKindName(Kind)); dbgs() << "\n"; }); @@ -135,6 +305,48 @@ private: return Error::success(); } + /// Return the string name of the given ELF aarch64 edge kind. + const char *getELFAArch64RelocationKindName(Edge::Kind R) { + switch (R) { + case ELFCall26: + return "ELFCall26"; + case ELFAdrPage21: + return "ELFAdrPage21"; + case ELFAddAbs12: + return "ELFAddAbs12"; + case ELFLdSt8Abs12: + return "ELFLdSt8Abs12"; + case ELFLdSt16Abs12: + return "ELFLdSt16Abs12"; + case ELFLdSt32Abs12: + return "ELFLdSt32Abs12"; + case ELFLdSt64Abs12: + return "ELFLdSt64Abs12"; + case ELFLdSt128Abs12: + return "ELFLdSt128Abs12"; + case ELFMovwAbsG0: + return "ELFMovwAbsG0"; + case ELFMovwAbsG1: + return "ELFMovwAbsG1"; + case ELFMovwAbsG2: + return "ELFMovwAbsG2"; + case ELFMovwAbsG3: + return "ELFMovwAbsG3"; + case ELFAbs64: + return "ELFAbs64"; + case ELFPrel32: + return "ELFPrel32"; + case ELFPrel64: + return "ELFPrel64"; + case ELFAdrGOTPage21: + return "ELFAdrGOTPage21"; + case ELFLd64GOTLo12: + return "ELFLd64GOTLo12"; + default: + return getGenericEdgeKindName(static_cast(R)); + } + } + public: ELFLinkGraphBuilder_aarch64(StringRef FileName, const object::ELFFile &Obj, const Triple T) @@ -142,6 +354,20 @@ public: aarch64::getEdgeKindName) {} }; +Error buildTables_ELF_aarch64(LinkGraph &G) { + LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n"); + + aarch64::GOTTableManager GOT; + aarch64::PLTTableManager PLT(GOT); + visitExistingEdges(G, GOT, PLT); + return Error::success(); +} + +} // namespace + +namespace llvm { +namespace jitlink { + Expected> createLinkGraphFromELFObject_aarch64(MemoryBufferRef ObjectBuffer) { LLVM_DEBUG({ @@ -168,11 +394,22 @@ void link_ELF_aarch64(std::unique_ptr G, PassConfiguration Config; const Triple &TT = G->getTargetTriple(); if (Ctx->shouldAddDefaultTargetPasses(TT)) { + // Add eh-frame passses. + Config.PrePrunePasses.push_back(DWARFRecordSectionSplitter(".eh_frame")); + Config.PrePrunePasses.push_back(EHFrameEdgeFixer( + ".eh_frame", 8, aarch64::Pointer32, aarch64::Pointer64, + aarch64::Delta32, aarch64::Delta64, aarch64::NegDelta32)); + + // Add a mark-live pass. if (auto MarkLive = Ctx->getMarkLivePass(TT)) Config.PrePrunePasses.push_back(std::move(MarkLive)); else Config.PrePrunePasses.push_back(markAllSymbolsLive); + + // Add an in-place GOT/Stubs build pass. + Config.PostPrunePasses.push_back(buildTables_ELF_aarch64); } + if (auto Err = Ctx->modifyPassConfig(*G, Config)) return Ctx->notifyFailed(std::move(Err)); diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp index f83001417e94..197ab71f5274 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp @@ -160,23 +160,16 @@ static Expected getRISCVPCRelHi20(const Edge &E) { } static uint32_t extractBits(uint32_t Num, unsigned Low, unsigned Size) { - return (Num & (((1ULL << (Size + 1)) - 1) << Low)) >> Low; + return (Num & (((1ULL << Size) - 1) << Low)) >> Low; } -inline Error checkAlignment(llvm::orc::ExecutorAddr loc, uint64_t v, int n, - const Edge &E) { - if (v & (n - 1)) - return make_error("0x" + llvm::utohexstr(loc.getValue()) + - " improper alignment for relocation " + - formatv("{0:d}", E.getKind()) + ": 0x" + - llvm::utohexstr(v) + " is not aligned to " + - Twine(n) + " bytes"); - return Error::success(); +static inline bool isAlignmentCorrect(uint64_t Value, int N) { + return (Value & (N - 1)) ? false : true; } -static inline bool isInRangeForImmS32(int64_t Value) { - return (Value >= std::numeric_limits::min() && - Value <= std::numeric_limits::max()); +// Requires 0 < N <= 64. +static inline bool isInRangeForImm(int64_t Value, int N) { + return Value == llvm::SignExtend64(Value, N); } class ELFJITLinker_riscv : public JITLinker { @@ -208,23 +201,36 @@ private: } case R_RISCV_BRANCH: { int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress; - Error AlignmentIssue = checkAlignment(FixupAddress, Value, 2, E); - if (AlignmentIssue) { - return AlignmentIssue; - } - int64_t Lo = Value & 0xFFF; - uint32_t Imm31_25 = extractBits(Lo, 5, 6) << 25 | extractBits(Lo, 12, 1) - << 31; - uint32_t Imm11_7 = extractBits(Lo, 1, 4) << 8 | extractBits(Lo, 11, 1) - << 7; + if (LLVM_UNLIKELY(!isInRangeForImm(Value >> 1, 12))) + return makeTargetOutOfRangeError(G, B, E); + if (LLVM_UNLIKELY(!isAlignmentCorrect(Value, 2))) + return makeAlignmentError(FixupAddress, Value, 2, E); + uint32_t Imm31_25 = + extractBits(Value, 5, 6) << 25 | extractBits(Value, 12, 1) << 31; + uint32_t Imm11_7 = + extractBits(Value, 1, 4) << 8 | extractBits(Value, 11, 1) << 7; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = (RawInstr & 0x1FFF07F) | Imm31_25 | Imm11_7; break; } + case R_RISCV_JAL: { + int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress; + if (LLVM_UNLIKELY(!isInRangeForImm(Value >> 1, 20))) + return makeTargetOutOfRangeError(G, B, E); + if (LLVM_UNLIKELY(!isAlignmentCorrect(Value, 2))) + return makeAlignmentError(FixupAddress, Value, 2, E); + uint32_t Imm20 = extractBits(Value, 20, 1) << 31; + uint32_t Imm10_1 = extractBits(Value, 1, 10) << 21; + uint32_t Imm11 = extractBits(Value, 11, 1) << 20; + uint32_t Imm19_12 = extractBits(Value, 12, 8) << 12; + uint32_t RawInstr = *(little32_t *)FixupPtr; + *(little32_t *)FixupPtr = RawInstr | Imm20 | Imm10_1 | Imm11 | Imm19_12; + break; + } case R_RISCV_HI20: { int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); int64_t Hi = Value + 0x800; - if (LLVM_UNLIKELY(!isInRangeForImmS32(Hi))) + if (LLVM_UNLIKELY(!isInRangeForImm(Hi, 32))) return makeTargetOutOfRangeError(G, B, E); uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = @@ -244,7 +250,7 @@ private: case R_RISCV_CALL: { int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress; int64_t Hi = Value + 0x800; - if (LLVM_UNLIKELY(!isInRangeForImmS32(Hi))) + if (LLVM_UNLIKELY(!isInRangeForImm(Hi, 32))) return makeTargetOutOfRangeError(G, B, E); int32_t Lo = Value & 0xFFF; uint32_t RawInstrAuipc = *(little32_t *)FixupPtr; @@ -258,7 +264,7 @@ private: case R_RISCV_PCREL_HI20: { int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress; int64_t Hi = Value + 0x800; - if (LLVM_UNLIKELY(!isInRangeForImmS32(Hi))) + if (LLVM_UNLIKELY(!isInRangeForImm(Hi, 32))) return makeTargetOutOfRangeError(G, B, E); uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = @@ -359,6 +365,13 @@ private: *FixupPtr = static_cast(Value); break; } + case R_RISCV_SUB6: { + int64_t Value = + *(reinterpret_cast(FixupAddress.getValue())) & 0x3f; + Value -= E.getTarget().getAddress().getValue() - E.getAddend(); + *FixupPtr = (*FixupPtr & 0xc0) | (static_cast(Value) & 0x3f); + break; + } case R_RISCV_SET6: { int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); uint32_t RawData = *(little32_t *)FixupPtr; @@ -410,6 +423,8 @@ private: return EdgeKind_riscv::R_RISCV_64; case ELF::R_RISCV_BRANCH: return EdgeKind_riscv::R_RISCV_BRANCH; + case ELF::R_RISCV_JAL: + return EdgeKind_riscv::R_RISCV_JAL; case ELF::R_RISCV_HI20: return EdgeKind_riscv::R_RISCV_HI20; case ELF::R_RISCV_LO12_I: @@ -442,6 +457,8 @@ private: return EdgeKind_riscv::R_RISCV_SUB16; case ELF::R_RISCV_SUB8: return EdgeKind_riscv::R_RISCV_SUB8; + case ELF::R_RISCV_SUB6: + return EdgeKind_riscv::R_RISCV_SUB6; case ELF::R_RISCV_SET6: return EdgeKind_riscv::R_RISCV_SET6; case ELF::R_RISCV_SET8: @@ -454,8 +471,9 @@ private: return EdgeKind_riscv::R_RISCV_32_PCREL; } - return make_error("Unsupported riscv relocation:" + - formatv("{0:d}", Type)); + return make_error( + "Unsupported riscv relocation:" + formatv("{0:d}: ", Type) + + object::getELFRelocationTypeName(ELF::EM_RISCV, Type)); } Error addRelocations() override { diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp index 79d2cdbb30f1..8f21274bd1a3 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h" +#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h" #include "llvm/ExecutionEngine/JITLink/JITLink.h" #include "llvm/ExecutionEngine/JITLink/TableManager.h" #include "llvm/ExecutionEngine/JITLink/x86_64.h" @@ -96,17 +97,6 @@ Error buildTables_ELF_x86_64(LinkGraph &G) { } } // namespace -static const char *getELFX86_64RelocName(uint32_t Type) { - switch (Type) { -#define ELF_RELOC(Name, Number) \ - case Number: \ - return #Name; -#include "llvm/BinaryFormat/ELFRelocs/x86_64.def" -#undef ELF_RELOC - } - return "Unrecognized ELF/x86-64 relocation type"; -} - namespace llvm { namespace jitlink { @@ -145,9 +135,9 @@ private: case ELF::R_X86_64_TLSGD: return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32TLV; } - return make_error("Unsupported x86-64 relocation type " + - formatv("{0:d}: ", Type) + - getELFX86_64RelocName(Type)); + return make_error( + "Unsupported x86-64 relocation type " + formatv("{0:d}: ", Type) + + object::getELFRelocationTypeName(ELF::EM_X86_64, Type)); } Error addRelocations() override { @@ -379,10 +369,10 @@ void link_ELF_x86_64(std::unique_ptr G, if (Ctx->shouldAddDefaultTargetPasses(G->getTargetTriple())) { - Config.PrePrunePasses.push_back(EHFrameSplitter(".eh_frame")); - Config.PrePrunePasses.push_back( - EHFrameEdgeFixer(".eh_frame", x86_64::PointerSize, x86_64::Delta64, - x86_64::Delta32, x86_64::NegDelta32)); + Config.PrePrunePasses.push_back(DWARFRecordSectionSplitter(".eh_frame")); + Config.PrePrunePasses.push_back(EHFrameEdgeFixer( + ".eh_frame", x86_64::PointerSize, x86_64::Pointer32, x86_64::Pointer64, + x86_64::Delta32, x86_64::Delta64, x86_64::NegDelta32)); Config.PrePrunePasses.push_back(EHFrameNullTerminator(".eh_frame")); // Construct a JITLinker and run the link function. diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp index 78a603cfed17..43efe0725cfe 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -336,7 +336,7 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LF) { void JITLinkAsyncLookupContinuation::anchor() {} -JITLinkContext::~JITLinkContext() {} +JITLinkContext::~JITLinkContext() = default; bool JITLinkContext::shouldAddDefaultTargetPasses(const Triple &TT) const { return true; @@ -393,6 +393,15 @@ Error makeTargetOutOfRangeError(const LinkGraph &G, const Block &B, return make_error(std::move(ErrMsg)); } +Error makeAlignmentError(llvm::orc::ExecutorAddr Loc, uint64_t Value, int N, + const Edge &E) { + return make_error("0x" + llvm::utohexstr(Loc.getValue()) + + " improper alignment for relocation " + + formatv("{0:d}", E.getKind()) + ": 0x" + + llvm::utohexstr(Value) + + " is not aligned to " + Twine(N) + " bytes"); +} + Expected> createLinkGraphFromObject(MemoryBufferRef ObjectBuffer) { auto Magic = identify_magic(ObjectBuffer.getBuffer()); diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp index 35ee050c8566..6d321a080829 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp @@ -20,7 +20,7 @@ namespace llvm { namespace jitlink { -JITLinkerBase::~JITLinkerBase() {} +JITLinkerBase::~JITLinkerBase() = default; void JITLinkerBase::linkPhase1(std::unique_ptr Self) { diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp index 9315ac4f6120..acb759d6ce79 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp @@ -211,7 +211,7 @@ SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD, SimpleSegmentAlloc::SimpleSegmentAlloc(SimpleSegmentAlloc &&) = default; SimpleSegmentAlloc & SimpleSegmentAlloc::operator=(SimpleSegmentAlloc &&) = default; -SimpleSegmentAlloc::~SimpleSegmentAlloc() {} +SimpleSegmentAlloc::~SimpleSegmentAlloc() = default; SimpleSegmentAlloc::SegmentInfo SimpleSegmentAlloc::getSegInfo(AllocGroup AG) { auto I = ContentBlocks.find(AG); diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp index 62574604458c..1bf12f438be0 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp @@ -19,7 +19,7 @@ static const char *CommonSectionName = "__common"; namespace llvm { namespace jitlink { -MachOLinkGraphBuilder::~MachOLinkGraphBuilder() {} +MachOLinkGraphBuilder::~MachOLinkGraphBuilder() = default; Expected> MachOLinkGraphBuilder::buildGraph() { @@ -368,7 +368,7 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { Twine(KV.first)); NSym.GraphSymbol = &G->addAbsoluteSymbol( *NSym.Name, orc::ExecutorAddr(NSym.Value), 0, Linkage::Strong, - Scope::Default, NSym.Desc & MachO::N_NO_DEAD_STRIP); + getScope(*NSym.Name, NSym.Type), NSym.Desc & MachO::N_NO_DEAD_STRIP); break; case MachO::N_SECT: SecIndexToSymbols[NSym.Sect - 1].push_back(&NSym); @@ -644,17 +644,27 @@ Error MachOLinkGraphBuilder::graphifyCStringSection( // Scan section for null characters. for (size_t I = 0; I != NSec.Size; ++I) if (NSec.Data[I] == '\0') { - orc::ExecutorAddrDiff BlockEnd = I + 1; - size_t BlockSize = BlockEnd - BlockStart; + size_t BlockSize = I + 1 - BlockStart; // Create a block for this null terminated string. auto &B = G->createContentBlock(*NSec.GraphSection, {NSec.Data + BlockStart, BlockSize}, - NSec.Address + BlockStart, 1, 0); + NSec.Address + BlockStart, NSec.Alignment, + BlockStart % NSec.Alignment); LLVM_DEBUG({ - dbgs() << " Created block " << formatv("{0:x}", B.getAddress()) - << " -- " << formatv("{0:x}", B.getAddress() + B.getSize()) - << " for \"" << StringRef(B.getContent().data()) << "\"\n"; + dbgs() << " Created block " << B.getRange() + << ", align = " << B.getAlignment() + << ", align-ofs = " << B.getAlignmentOffset() << " for \""; + for (size_t J = 0; J != std::min(B.getSize(), size_t(16)); ++J) + switch (B.getContent()[J]) { + case '\0': break; + case '\n': dbgs() << "\\n"; break; + case '\t': dbgs() << "\\t"; break; + default: dbgs() << B.getContent()[J]; break; + } + if (B.getSize() > 16) + dbgs() << "..."; + dbgs() << "\"\n"; }); // If there's no symbol at the start of this block then create one. @@ -663,15 +673,13 @@ Error MachOLinkGraphBuilder::graphifyCStringSection( auto &S = G->addAnonymousSymbol(B, 0, BlockSize, false, false); setCanonicalSymbol(NSec, S); LLVM_DEBUG({ - dbgs() << " Adding anonymous symbol for c-string block " - << formatv("{0:x16} -- {1:x16}", S.getAddress(), - S.getAddress() + BlockSize) - << "\n"; + dbgs() << " Adding symbol for c-string block " << B.getRange() + << ": at offset 0\n"; }); } // Process any remaining symbols that point into this block. - auto LastCanonicalAddr = B.getAddress() + BlockEnd; + auto LastCanonicalAddr = B.getAddress() + BlockSize; while (!NSyms.empty() && orc::ExecutorAddr(NSyms.back()->Value) < B.getAddress() + BlockSize) { auto &NSym = *NSyms.back(); @@ -686,8 +694,15 @@ Error MachOLinkGraphBuilder::graphifyCStringSection( LastCanonicalAddr = orc::ExecutorAddr(NSym.Value); } - createStandardGraphSymbol(NSym, B, SymSize, SectionIsText, SymLive, - IsCanonical); + auto &Sym = createStandardGraphSymbol(NSym, B, SymSize, SectionIsText, + SymLive, IsCanonical); + (void)Sym; + LLVM_DEBUG({ + dbgs() << " Adding symbol for c-string block " << B.getRange() + << ": " + << (Sym.hasName() ? Sym.getName() : "") + << " at offset " << formatv("{0:x}", Sym.getOffset()) << "\n"; + }); NSyms.pop_back(); } diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp index 3ca2e40c7263..dd50314d3ed7 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp @@ -11,15 +11,15 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/JITLink/MachO_arm64.h" +#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h" +#include "llvm/ExecutionEngine/JITLink/aarch64.h" #include "MachOLinkGraphBuilder.h" -#include "PerGraphGOTAndPLTStubsBuilder.h" #define DEBUG_TYPE "jitlink" using namespace llvm; using namespace llvm::jitlink; -using namespace llvm::jitlink::MachO_arm64_Edges; namespace { @@ -27,19 +27,39 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { public: MachOLinkGraphBuilder_arm64(const object::MachOObjectFile &Obj) : MachOLinkGraphBuilder(Obj, Triple("arm64-apple-darwin"), - getMachOARM64RelocationKindName), + aarch64::getEdgeKindName), NumSymbols(Obj.getSymtabLoadCommand().nsyms) {} private: + enum MachOARM64RelocationKind : Edge::Kind { + MachOBranch26 = Edge::FirstRelocation, + MachOPointer32, + MachOPointer64, + MachOPointer64Anon, + MachOPage21, + MachOPageOffset12, + MachOGOTPage21, + MachOGOTPageOffset12, + MachOTLVPage21, + MachOTLVPageOffset12, + MachOPointerToGOT, + MachOPairedAddend, + MachOLDRLiteral19, + MachODelta32, + MachODelta64, + MachONegDelta32, + MachONegDelta64, + }; + static Expected getRelocationKind(const MachO::relocation_info &RI) { switch (RI.r_type) { case MachO::ARM64_RELOC_UNSIGNED: if (!RI.r_pcrel) { if (RI.r_length == 3) - return RI.r_extern ? Pointer64 : Pointer64Anon; + return RI.r_extern ? MachOPointer64 : MachOPointer64Anon; else if (RI.r_length == 2) - return Pointer32; + return MachOPointer32; } break; case MachO::ARM64_RELOC_SUBTRACTOR: @@ -48,46 +68,46 @@ private: // They may be turned into NegDelta by parsePairRelocation. if (!RI.r_pcrel && RI.r_extern) { if (RI.r_length == 2) - return Delta32; + return MachODelta32; else if (RI.r_length == 3) - return Delta64; + return MachODelta64; } break; case MachO::ARM64_RELOC_BRANCH26: if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) - return Branch26; + return MachOBranch26; break; case MachO::ARM64_RELOC_PAGE21: if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) - return Page21; + return MachOPage21; break; case MachO::ARM64_RELOC_PAGEOFF12: if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2) - return PageOffset12; + return MachOPageOffset12; break; case MachO::ARM64_RELOC_GOT_LOAD_PAGE21: if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) - return GOTPage21; + return MachOGOTPage21; break; case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12: if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2) - return GOTPageOffset12; + return MachOGOTPageOffset12; break; case MachO::ARM64_RELOC_POINTER_TO_GOT: if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) - return PointerToGOT; + return MachOPointerToGOT; break; case MachO::ARM64_RELOC_ADDEND: if (!RI.r_pcrel && !RI.r_extern && RI.r_length == 2) - return PairedAddend; + return MachOPairedAddend; break; case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21: if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) - return TLVPage21; + return MachOTLVPage21; break; case MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12: if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2) - return TLVPageOffset12; + return MachOTLVPageOffset12; break; } @@ -101,8 +121,7 @@ private: ", length=" + formatv("{0:d}", RI.r_length)); } - using PairRelocInfo = - std::tuple; + using PairRelocInfo = std::tuple; // Parses paired SUBTRACTOR/UNSIGNED relocations and, on success, // returns the edge kind and addend to be used. @@ -114,8 +133,8 @@ private: object::relocation_iterator &RelEnd) { using namespace support; - assert(((SubtractorKind == Delta32 && SubRI.r_length == 2) || - (SubtractorKind == Delta64 && SubRI.r_length == 3)) && + assert(((SubtractorKind == MachODelta32 && SubRI.r_length == 2) || + (SubtractorKind == MachODelta64 && SubRI.r_length == 3)) && "Subtractor kind should match length"); assert(SubRI.r_extern && "SUBTRACTOR reloc symbol should be extern"); assert(!SubRI.r_pcrel && "SUBTRACTOR reloc should not be PCRel"); @@ -165,17 +184,18 @@ private: FixupValue -= ToSymbol->getAddress().getValue(); } - MachOARM64RelocationKind DeltaKind; + Edge::Kind DeltaKind; Symbol *TargetSymbol; uint64_t Addend; if (&BlockToFix == &FromSymbol->getAddressable()) { TargetSymbol = ToSymbol; - DeltaKind = (SubRI.r_length == 3) ? Delta64 : Delta32; + DeltaKind = (SubRI.r_length == 3) ? aarch64::Delta64 : aarch64::Delta32; Addend = FixupValue + (FixupAddress - FromSymbol->getAddress()); // FIXME: handle extern 'from'. } else if (&BlockToFix == &ToSymbol->getAddressable()) { TargetSymbol = &*FromSymbol; - DeltaKind = (SubRI.r_length == 3) ? NegDelta64 : NegDelta32; + DeltaKind = + (SubRI.r_length == 3) ? aarch64::NegDelta64 : aarch64::NegDelta32; Addend = FixupValue - (FixupAddress - ToSymbol->getAddress()); } else { // BlockToFix was neither FromSymbol nor ToSymbol. @@ -229,9 +249,9 @@ private: MachO::relocation_info RI = getRelocationInfo(RelItr); // Validate the relocation kind. - auto Kind = getRelocationKind(RI); - if (!Kind) - return Kind.takeError(); + auto MachORelocKind = getRelocationKind(RI); + if (!MachORelocKind) + return MachORelocKind.takeError(); // Find the address of the value to fix up. orc::ExecutorAddr FixupAddress = @@ -255,6 +275,8 @@ private: return make_error( "Relocation content extends past end of fixup block"); + Edge::Kind Kind = Edge::Invalid; + // Get a pointer to the fixup content. const char *FixupContent = BlockToFix->getContent().data() + (FixupAddress - BlockToFix->getAddress()); @@ -263,7 +285,7 @@ private: Symbol *TargetSymbol = nullptr; uint64_t Addend = 0; - if (*Kind == PairedAddend) { + if (*MachORelocKind == MachOPairedAddend) { // If this is an Addend relocation then process it and move to the // paired reloc. @@ -275,19 +297,21 @@ private: ++RelItr; RI = getRelocationInfo(RelItr); - Kind = getRelocationKind(RI); - if (!Kind) - return Kind.takeError(); + MachORelocKind = getRelocationKind(RI); + if (!MachORelocKind) + return MachORelocKind.takeError(); - if (*Kind != Branch26 && *Kind != Page21 && *Kind != PageOffset12) + if (*MachORelocKind != MachOBranch26 && + *MachORelocKind != MachOPage21 && + *MachORelocKind != MachOPageOffset12) return make_error( "Invalid relocation pair: Addend + " + - StringRef(getMachOARM64RelocationKindName(*Kind))); + StringRef(getMachOARM64RelocationKindName(*MachORelocKind))); LLVM_DEBUG({ dbgs() << " Addend: value = " << formatv("{0:x6}", Addend) - << ", pair is " << getMachOARM64RelocationKindName(*Kind) - << "\n"; + << ", pair is " + << getMachOARM64RelocationKindName(*MachORelocKind) << "\n"; }); // Find the address of the value to fix up. @@ -298,8 +322,8 @@ private: "different target"); } - switch (*Kind) { - case Branch26: { + switch (*MachORelocKind) { + case MachOBranch26: { if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) TargetSymbol = TargetSymbolOrErr->GraphSymbol; else @@ -308,23 +332,26 @@ private: if ((Instr & 0x7fffffff) != 0x14000000) return make_error("BRANCH26 target is not a B or BL " "instruction with a zero addend"); + Kind = aarch64::Branch26; break; } - case Pointer32: + case MachOPointer32: if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) TargetSymbol = TargetSymbolOrErr->GraphSymbol; else return TargetSymbolOrErr.takeError(); Addend = *(const ulittle32_t *)FixupContent; + Kind = aarch64::Pointer32; break; - case Pointer64: + case MachOPointer64: if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) TargetSymbol = TargetSymbolOrErr->GraphSymbol; else return TargetSymbolOrErr.takeError(); Addend = *(const ulittle64_t *)FixupContent; + Kind = aarch64::Pointer64; break; - case Pointer64Anon: { + case MachOPointer64Anon: { orc::ExecutorAddr TargetAddress(*(const ulittle64_t *)FixupContent); auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1); if (!TargetNSec) @@ -335,11 +362,12 @@ private: else return TargetSymbolOrErr.takeError(); Addend = TargetAddress - TargetSymbol->getAddress(); + Kind = aarch64::Pointer64Anon; break; } - case Page21: - case TLVPage21: - case GOTPage21: { + case MachOPage21: + case MachOTLVPage21: + case MachOGOTPage21: { if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) TargetSymbol = TargetSymbolOrErr->GraphSymbol; else @@ -349,9 +377,17 @@ private: return make_error("PAGE21/GOTPAGE21 target is not an " "ADRP instruction with a zero " "addend"); + + if (*MachORelocKind == MachOPage21) { + Kind = aarch64::Page21; + } else if (*MachORelocKind == MachOTLVPage21) { + Kind = aarch64::TLVPage21; + } else if (*MachORelocKind == MachOGOTPage21) { + Kind = aarch64::GOTPage21; + } break; } - case PageOffset12: { + case MachOPageOffset12: { if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) TargetSymbol = TargetSymbolOrErr->GraphSymbol; else @@ -361,10 +397,11 @@ private: if (EncodedAddend != 0) return make_error("GOTPAGEOFF12 target has non-zero " "encoded addend"); + Kind = aarch64::PageOffset12; break; } - case TLVPageOffset12: - case GOTPageOffset12: { + case MachOTLVPageOffset12: + case MachOGOTPageOffset12: { if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) TargetSymbol = TargetSymbolOrErr->GraphSymbol; else @@ -374,27 +411,35 @@ private: return make_error("GOTPAGEOFF12 target is not an LDR " "immediate instruction with a zero " "addend"); + + if (*MachORelocKind == MachOTLVPageOffset12) { + Kind = aarch64::TLVPageOffset12; + } else if (*MachORelocKind == MachOGOTPageOffset12) { + Kind = aarch64::GOTPageOffset12; + } break; } - case PointerToGOT: + case MachOPointerToGOT: if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) TargetSymbol = TargetSymbolOrErr->GraphSymbol; else return TargetSymbolOrErr.takeError(); + + Kind = aarch64::PointerToGOT; break; - case Delta32: - case Delta64: { + case MachODelta32: + case MachODelta64: { // We use Delta32/Delta64 to represent SUBTRACTOR relocations. // parsePairRelocation handles the paired reloc, and returns the // edge kind to be used (either Delta32/Delta64, or // NegDelta32/NegDelta64, depending on the direction of the // subtraction) along with the addend. auto PairInfo = - parsePairRelocation(*BlockToFix, *Kind, RI, FixupAddress, - FixupContent, ++RelItr, RelEnd); + parsePairRelocation(*BlockToFix, *MachORelocKind, RI, + FixupAddress, FixupContent, ++RelItr, RelEnd); if (!PairInfo) return PairInfo.takeError(); - std::tie(*Kind, TargetSymbol, Addend) = *PairInfo; + std::tie(Kind, TargetSymbol, Addend) = *PairInfo; assert(TargetSymbol && "No target symbol from parsePairRelocation?"); break; } @@ -405,108 +450,59 @@ private: LLVM_DEBUG({ dbgs() << " "; - Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol, + Edge GE(Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol, Addend); - printEdge(dbgs(), *BlockToFix, GE, - getMachOARM64RelocationKindName(*Kind)); + printEdge(dbgs(), *BlockToFix, GE, aarch64::getEdgeKindName(Kind)); dbgs() << "\n"; }); - BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(), + BlockToFix->addEdge(Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol, Addend); } } return Error::success(); } - unsigned NumSymbols = 0; -}; - -class PerGraphGOTAndPLTStubsBuilder_MachO_arm64 - : public PerGraphGOTAndPLTStubsBuilder< - PerGraphGOTAndPLTStubsBuilder_MachO_arm64> { -public: - using PerGraphGOTAndPLTStubsBuilder< - PerGraphGOTAndPLTStubsBuilder_MachO_arm64>::PerGraphGOTAndPLTStubsBuilder; - - bool isGOTEdgeToFix(Edge &E) const { - return E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12 || - E.getKind() == TLVPage21 || E.getKind() == TLVPageOffset12 || - E.getKind() == PointerToGOT; - } - - Symbol &createGOTEntry(Symbol &Target) { - auto &GOTEntryBlock = G.createContentBlock( - getGOTSection(), getGOTEntryBlockContent(), orc::ExecutorAddr(), 8, 0); - GOTEntryBlock.addEdge(Pointer64, 0, Target, 0); - return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false); - } - - void fixGOTEdge(Edge &E, Symbol &GOTEntry) { - if (E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12 || - E.getKind() == TLVPage21 || E.getKind() == TLVPageOffset12) { - // Update the target, but leave the edge addend as-is. - E.setTarget(GOTEntry); - } else if (E.getKind() == PointerToGOT) { - E.setTarget(GOTEntry); - E.setKind(Delta32); - } else - llvm_unreachable("Not a GOT edge?"); - } - - bool isExternalBranchEdge(Edge &E) { - return E.getKind() == Branch26 && !E.getTarget().isDefined(); - } - - Symbol &createPLTStub(Symbol &Target) { - auto &StubContentBlock = G.createContentBlock( - getStubsSection(), getStubBlockContent(), orc::ExecutorAddr(), 1, 0); - // Re-use GOT entries for stub targets. - auto &GOTEntrySymbol = getGOTEntry(Target); - StubContentBlock.addEdge(LDRLiteral19, 0, GOTEntrySymbol, 0); - return G.addAnonymousSymbol(StubContentBlock, 0, 8, true, false); - } - - void fixPLTEdge(Edge &E, Symbol &Stub) { - assert(E.getKind() == Branch26 && "Not a Branch32 edge?"); - assert(E.getAddend() == 0 && "Branch32 edge has non-zero addend?"); - E.setTarget(Stub); - } - -private: - Section &getGOTSection() { - if (!GOTSection) - GOTSection = &G.createSection("$__GOT", MemProt::Read | MemProt::Exec); - return *GOTSection; - } - - Section &getStubsSection() { - if (!StubsSection) - StubsSection = - &G.createSection("$__STUBS", MemProt::Read | MemProt::Exec); - return *StubsSection; - } - - ArrayRef getGOTEntryBlockContent() { - return {reinterpret_cast(NullGOTEntryContent), - sizeof(NullGOTEntryContent)}; - } - - ArrayRef getStubBlockContent() { - return {reinterpret_cast(StubContent), sizeof(StubContent)}; + /// Return the string name of the given MachO arm64 edge kind. + const char *getMachOARM64RelocationKindName(Edge::Kind R) { + switch (R) { + case MachOBranch26: + return "MachOBranch26"; + case MachOPointer64: + return "MachOPointer64"; + case MachOPointer64Anon: + return "MachOPointer64Anon"; + case MachOPage21: + return "MachOPage21"; + case MachOPageOffset12: + return "MachOPageOffset12"; + case MachOGOTPage21: + return "MachOGOTPage21"; + case MachOGOTPageOffset12: + return "MachOGOTPageOffset12"; + case MachOTLVPage21: + return "MachOTLVPage21"; + case MachOTLVPageOffset12: + return "MachOTLVPageOffset12"; + case MachOPointerToGOT: + return "MachOPointerToGOT"; + case MachOPairedAddend: + return "MachOPairedAddend"; + case MachOLDRLiteral19: + return "MachOLDRLiteral19"; + case MachODelta32: + return "MachODelta32"; + case MachODelta64: + return "MachODelta64"; + case MachONegDelta32: + return "MachONegDelta32"; + case MachONegDelta64: + return "MachONegDelta64"; + default: + return getGenericEdgeKindName(static_cast(R)); + } } - static const uint8_t NullGOTEntryContent[8]; - static const uint8_t StubContent[8]; - Section *GOTSection = nullptr; - Section *StubsSection = nullptr; -}; - -const uint8_t - PerGraphGOTAndPLTStubsBuilder_MachO_arm64::NullGOTEntryContent[8] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; -const uint8_t PerGraphGOTAndPLTStubsBuilder_MachO_arm64::StubContent[8] = { - 0x10, 0x00, 0x00, 0x58, // LDR x16, - 0x00, 0x02, 0x1f, 0xd6 // BR x16 + unsigned NumSymbols = 0; }; } // namespace @@ -514,6 +510,15 @@ const uint8_t PerGraphGOTAndPLTStubsBuilder_MachO_arm64::StubContent[8] = { namespace llvm { namespace jitlink { +Error buildTables_MachO_arm64(LinkGraph &G) { + LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n"); + + aarch64::GOTTableManager GOT; + aarch64::PLTTableManager PLT(GOT); + visitExistingEdges(G, GOT, PLT); + return Error::success(); +} + class MachOJITLinker_arm64 : public JITLinker { friend class JITLinker; @@ -524,162 +529,8 @@ public: : JITLinker(std::move(Ctx), std::move(G), std::move(PassConfig)) {} private: - - static unsigned getPageOffset12Shift(uint32_t Instr) { - constexpr uint32_t LoadStoreImm12Mask = 0x3b000000; - constexpr uint32_t Vec128Mask = 0x04800000; - - if ((Instr & LoadStoreImm12Mask) == 0x39000000) { - uint32_t ImplicitShift = Instr >> 30; - if (ImplicitShift == 0) - if ((Instr & Vec128Mask) == Vec128Mask) - ImplicitShift = 4; - - return ImplicitShift; - } - - return 0; - } - Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const { - using namespace support; - - char *BlockWorkingMem = B.getAlreadyMutableContent().data(); - char *FixupPtr = BlockWorkingMem + E.getOffset(); - orc::ExecutorAddr FixupAddress = B.getAddress() + E.getOffset(); - - switch (E.getKind()) { - case Branch26: { - assert((FixupAddress.getValue() & 0x3) == 0 && - "Branch-inst is not 32-bit aligned"); - - int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); - - if (static_cast(Value) & 0x3) - return make_error("Branch26 target is not 32-bit " - "aligned"); - - if (Value < -(1 << 27) || Value > ((1 << 27) - 1)) - return makeTargetOutOfRangeError(G, B, E); - - uint32_t RawInstr = *(little32_t *)FixupPtr; - assert((RawInstr & 0x7fffffff) == 0x14000000 && - "RawInstr isn't a B or BR immediate instruction"); - uint32_t Imm = (static_cast(Value) & ((1 << 28) - 1)) >> 2; - uint32_t FixedInstr = RawInstr | Imm; - *(little32_t *)FixupPtr = FixedInstr; - break; - } - case Pointer32: { - uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); - if (Value > std::numeric_limits::max()) - return makeTargetOutOfRangeError(G, B, E); - *(ulittle32_t *)FixupPtr = Value; - break; - } - case Pointer64: - case Pointer64Anon: { - uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); - *(ulittle64_t *)FixupPtr = Value; - break; - } - case Page21: - case TLVPage21: - case GOTPage21: { - assert((E.getKind() != GOTPage21 || E.getAddend() == 0) && - "GOTPAGE21 with non-zero addend"); - uint64_t TargetPage = - (E.getTarget().getAddress().getValue() + E.getAddend()) & - ~static_cast(4096 - 1); - uint64_t PCPage = - FixupAddress.getValue() & ~static_cast(4096 - 1); - - int64_t PageDelta = TargetPage - PCPage; - if (PageDelta < -(1 << 30) || PageDelta > ((1 << 30) - 1)) - return makeTargetOutOfRangeError(G, B, E); - - uint32_t RawInstr = *(ulittle32_t *)FixupPtr; - assert((RawInstr & 0xffffffe0) == 0x90000000 && - "RawInstr isn't an ADRP instruction"); - uint32_t ImmLo = (static_cast(PageDelta) >> 12) & 0x3; - uint32_t ImmHi = (static_cast(PageDelta) >> 14) & 0x7ffff; - uint32_t FixedInstr = RawInstr | (ImmLo << 29) | (ImmHi << 5); - *(ulittle32_t *)FixupPtr = FixedInstr; - break; - } - case PageOffset12: { - uint64_t TargetOffset = - (E.getTarget().getAddress() + E.getAddend()).getValue() & 0xfff; - - uint32_t RawInstr = *(ulittle32_t *)FixupPtr; - unsigned ImmShift = getPageOffset12Shift(RawInstr); - - if (TargetOffset & ((1 << ImmShift) - 1)) - return make_error("PAGEOFF12 target is not aligned"); - - uint32_t EncodedImm = (TargetOffset >> ImmShift) << 10; - uint32_t FixedInstr = RawInstr | EncodedImm; - *(ulittle32_t *)FixupPtr = FixedInstr; - break; - } - case TLVPageOffset12: - case GOTPageOffset12: { - assert(E.getAddend() == 0 && "GOTPAGEOF12 with non-zero addend"); - - uint32_t RawInstr = *(ulittle32_t *)FixupPtr; - assert((RawInstr & 0xfffffc00) == 0xf9400000 && - "RawInstr isn't a 64-bit LDR immediate"); - - uint32_t TargetOffset = E.getTarget().getAddress().getValue() & 0xfff; - assert((TargetOffset & 0x7) == 0 && "GOT entry is not 8-byte aligned"); - uint32_t EncodedImm = (TargetOffset >> 3) << 10; - uint32_t FixedInstr = RawInstr | EncodedImm; - *(ulittle32_t *)FixupPtr = FixedInstr; - break; - } - case LDRLiteral19: { - assert((FixupAddress.getValue() & 0x3) == 0 && - "LDR is not 32-bit aligned"); - assert(E.getAddend() == 0 && "LDRLiteral19 with non-zero addend"); - uint32_t RawInstr = *(ulittle32_t *)FixupPtr; - assert(RawInstr == 0x58000010 && "RawInstr isn't a 64-bit LDR literal"); - int64_t Delta = E.getTarget().getAddress() - FixupAddress; - if (Delta & 0x3) - return make_error("LDR literal target is not 32-bit " - "aligned"); - if (Delta < -(1 << 20) || Delta > ((1 << 20) - 1)) - return makeTargetOutOfRangeError(G, B, E); - - uint32_t EncodedImm = - ((static_cast(Delta) >> 2) & 0x7ffff) << 5; - uint32_t FixedInstr = RawInstr | EncodedImm; - *(ulittle32_t *)FixupPtr = FixedInstr; - break; - } - case Delta32: - case Delta64: - case NegDelta32: - case NegDelta64: { - int64_t Value; - if (E.getKind() == Delta32 || E.getKind() == Delta64) - Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); - else - Value = FixupAddress - E.getTarget().getAddress() + E.getAddend(); - - if (E.getKind() == Delta32 || E.getKind() == NegDelta32) { - if (Value < std::numeric_limits::min() || - Value > std::numeric_limits::max()) - return makeTargetOutOfRangeError(G, B, E); - *(little32_t *)FixupPtr = Value; - } else - *(little64_t *)FixupPtr = Value; - break; - } - default: - llvm_unreachable("Unrecognized edge kind"); - } - - return Error::success(); + return aarch64::applyFixup(G, B, E); } uint64_t NullValue = 0; @@ -712,13 +563,14 @@ void link_MachO_arm64(std::unique_ptr G, // Add eh-frame passses. // FIXME: Prune eh-frames for which compact-unwind is available once // we support compact-unwind registration with libunwind. - Config.PrePrunePasses.push_back(EHFrameSplitter("__TEXT,__eh_frame")); Config.PrePrunePasses.push_back( - EHFrameEdgeFixer("__TEXT,__eh_frame", 8, Delta64, Delta32, NegDelta32)); + DWARFRecordSectionSplitter("__TEXT,__eh_frame")); + Config.PrePrunePasses.push_back(EHFrameEdgeFixer( + "__TEXT,__eh_frame", 8, aarch64::Pointer32, aarch64::Pointer64, + aarch64::Delta32, aarch64::Delta64, aarch64::NegDelta32)); // Add an in-place GOT/Stubs pass. - Config.PostPrunePasses.push_back( - PerGraphGOTAndPLTStubsBuilder_MachO_arm64::asPass); + Config.PostPrunePasses.push_back(buildTables_MachO_arm64); } if (auto Err = Ctx->modifyPassConfig(*G, Config)) @@ -728,44 +580,5 @@ void link_MachO_arm64(std::unique_ptr G, MachOJITLinker_arm64::link(std::move(Ctx), std::move(G), std::move(Config)); } -const char *getMachOARM64RelocationKindName(Edge::Kind R) { - switch (R) { - case Branch26: - return "Branch26"; - case Pointer64: - return "Pointer64"; - case Pointer64Anon: - return "Pointer64Anon"; - case Page21: - return "Page21"; - case PageOffset12: - return "PageOffset12"; - case GOTPage21: - return "GOTPage21"; - case GOTPageOffset12: - return "GOTPageOffset12"; - case TLVPage21: - return "TLVPage21"; - case TLVPageOffset12: - return "TLVPageOffset12"; - case PointerToGOT: - return "PointerToGOT"; - case PairedAddend: - return "PairedAddend"; - case LDRLiteral19: - return "LDRLiteral19"; - case Delta32: - return "Delta32"; - case Delta64: - return "Delta64"; - case NegDelta32: - return "NegDelta32"; - case NegDelta64: - return "NegDelta64"; - default: - return getGenericEdgeKindName(static_cast(R)); - } -} - } // end namespace jitlink } // end namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp index 82afaa3aa3c5..6dfd5548fcfd 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp @@ -11,10 +11,10 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h" +#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h" #include "llvm/ExecutionEngine/JITLink/x86_64.h" #include "MachOLinkGraphBuilder.h" -#include "PerGraphGOTAndPLTStubsBuilder.h" #define DEBUG_TYPE "jitlink" @@ -504,12 +504,13 @@ void link_MachO_x86_64(std::unique_ptr G, } LinkGraphPassFunction createEHFrameSplitterPass_MachO_x86_64() { - return EHFrameSplitter("__TEXT,__eh_frame"); + return DWARFRecordSectionSplitter("__TEXT,__eh_frame"); } LinkGraphPassFunction createEHFrameEdgeFixerPass_MachO_x86_64() { return EHFrameEdgeFixer("__TEXT,__eh_frame", x86_64::PointerSize, - x86_64::Delta64, x86_64::Delta32, x86_64::NegDelta32); + x86_64::Pointer32, x86_64::Pointer64, x86_64::Delta32, + x86_64::Delta64, x86_64::NegDelta32); } } // end namespace jitlink diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp index 6dccc4811885..28a6f9ce90d9 100644 --- a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp @@ -18,13 +18,55 @@ namespace llvm { namespace jitlink { namespace aarch64 { -const char *getEdgeKindName(Edge::Kind K) { - switch (K) { - case R_AARCH64_CALL26: - return "R_AARCH64_CALL26"; +const uint8_t NullGOTEntryContent[8] = {0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00}; + +const uint8_t StubContent[8] = { + 0x10, 0x00, 0x00, 0x58, // LDR x16, + 0x00, 0x02, 0x1f, 0xd6 // BR x16 +}; + +const char *getEdgeKindName(Edge::Kind R) { + switch (R) { + case Branch26: + return "Branch26"; + case Pointer64: + return "Pointer64"; + case Pointer64Anon: + return "Pointer64Anon"; + case Page21: + return "Page21"; + case PageOffset12: + return "PageOffset12"; + case MoveWide16: + return "MoveWide16"; + case GOTPage21: + return "GOTPage21"; + case GOTPageOffset12: + return "GOTPageOffset12"; + case TLVPage21: + return "TLVPage21"; + case TLVPageOffset12: + return "TLVPageOffset12"; + case PointerToGOT: + return "PointerToGOT"; + case PairedAddend: + return "PairedAddend"; + case LDRLiteral19: + return "LDRLiteral19"; + case Delta32: + return "Delta32"; + case Delta64: + return "Delta64"; + case NegDelta32: + return "NegDelta32"; + case NegDelta64: + return "NegDelta64"; + default: + return getGenericEdgeKindName(static_cast(R)); } - return getGenericEdgeKindName(K); } + } // namespace aarch64 } // namespace jitlink } // namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp index 3ce2cf10a24c..3848cc6b5f01 100644 --- a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp @@ -26,6 +26,8 @@ const char *getEdgeKindName(Edge::Kind K) { return "R_RISCV_64"; case R_RISCV_BRANCH: return "R_RISCV_BRANCH"; + case R_RISCV_JAL: + return "R_RISCV_JAL"; case R_RISCV_HI20: return "R_RISCV_HI20"; case R_RISCV_LO12_I: @@ -56,6 +58,8 @@ const char *getEdgeKindName(Edge::Kind K) { return "R_RISCV_SUB16"; case R_RISCV_SUB8: return "R_RISCV_SUB8"; + case R_RISCV_SUB6: + return "R_RISCV_SUB6"; case R_RISCV_SET6: return "R_RISCV_SET6"; case R_RISCV_SET8: diff --git a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp index ed912280ac82..4ac901daa5c8 100644 --- a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" +#include "llvm/MC/MCContext.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/DynamicLibrary.h" diff --git a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h index a5dd420c9132..f6c4cdbb8c91 100644 --- a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -72,8 +72,7 @@ class MCJIT : public ExecutionEngine { class OwningModuleContainer { public: - OwningModuleContainer() { - } + OwningModuleContainer() = default; ~OwningModuleContainer() { freeModulePtrSet(AddedModules); freeModulePtrSet(LoadedModules); diff --git a/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp b/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp index f34247005258..fad7428e1f90 100644 --- a/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp @@ -12,6 +12,7 @@ #include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" +#include "llvm/MC/MCContext.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index e5cb8103919a..dd80630a33c1 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -62,7 +62,7 @@ void ResourceTracker::makeDefunct() { JDAndFlag.store(Val); } -ResourceManager::~ResourceManager() {} +ResourceManager::~ResourceManager() = default; ResourceTrackerDefunct::ResourceTrackerDefunct(ResourceTrackerSP RT) : RT(std::move(RT)) {} @@ -76,9 +76,21 @@ void ResourceTrackerDefunct::log(raw_ostream &OS) const { } FailedToMaterialize::FailedToMaterialize( + std::shared_ptr SSP, std::shared_ptr Symbols) - : Symbols(std::move(Symbols)) { + : SSP(std::move(SSP)), Symbols(std::move(Symbols)) { + assert(this->SSP && "String pool cannot be null"); assert(!this->Symbols->empty() && "Can not fail to resolve an empty set"); + + // FIXME: Use a new dep-map type for FailedToMaterialize errors so that we + // don't have to manually retain/release. + for (auto &KV : *this->Symbols) + KV.first->Retain(); +} + +FailedToMaterialize::~FailedToMaterialize() { + for (auto &KV : *Symbols) + KV.first->Release(); } std::error_code FailedToMaterialize::convertToErrorCode() const { @@ -251,9 +263,21 @@ StringRef AbsoluteSymbolsMaterializationUnit::getName() const { void AbsoluteSymbolsMaterializationUnit::materialize( std::unique_ptr R) { - // No dependencies, so these calls can't fail. - cantFail(R->notifyResolved(Symbols)); - cantFail(R->notifyEmitted()); + // Even though these are just absolute symbols we need to check for failure + // to resolve/emit: the tracker for these symbols may have been removed while + // the materialization was in flight (e.g. due to a failure in some action + // triggered by the queries attached to the resolution/emission of these + // symbols). + if (auto Err = R->notifyResolved(Symbols)) { + R->getExecutionSession().reportError(std::move(Err)); + R->failMaterialization(); + return; + } + if (auto Err = R->notifyEmitted()) { + R->getExecutionSession().reportError(std::move(Err)); + R->failMaterialization(); + return; + } } void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD, @@ -485,13 +509,16 @@ Expected buildSimpleReexportsAliasMap(JITDylib &SourceJD, class InProgressLookupState { public: + // FIXME: Reduce the number of SymbolStringPtrs here. See + // https://github.com/llvm/llvm-project/issues/55576. + InProgressLookupState(LookupKind K, JITDylibSearchOrder SearchOrder, SymbolLookupSet LookupSet, SymbolState RequiredState) : K(K), SearchOrder(std::move(SearchOrder)), LookupSet(std::move(LookupSet)), RequiredState(RequiredState) { DefGeneratorCandidates = this->LookupSet; } - virtual ~InProgressLookupState() {} + virtual ~InProgressLookupState() = default; virtual void complete(std::unique_ptr IPLS) = 0; virtual void fail(Error Err) = 0; @@ -609,7 +636,7 @@ void LookupState::continueLookup(Error Err) { ES.OL_applyQueryPhase1(std::move(IPLS), std::move(Err)); } -DefinitionGenerator::~DefinitionGenerator() {} +DefinitionGenerator::~DefinitionGenerator() = default; JITDylib::~JITDylib() { LLVM_DEBUG(dbgs() << "Destroying JITDylib " << getName() << "\n"); @@ -959,6 +986,7 @@ Error JITDylib::resolve(MaterializationResponsibility &MR, auto FailedSymbolsDepMap = std::make_shared(); (*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState); return make_error( + getExecutionSession().getSymbolStringPool(), std::move(FailedSymbolsDepMap)); } @@ -1036,6 +1064,7 @@ Error JITDylib::emit(MaterializationResponsibility &MR, auto FailedSymbolsDepMap = std::make_shared(); (*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState); return make_error( + getExecutionSession().getSymbolStringPool(), std::move(FailedSymbolsDepMap)); } @@ -1411,12 +1440,11 @@ void JITDylib::dump(raw_ostream &OS) { for (auto &KV : Symbols) { OS << " \"" << *KV.first << "\": "; if (auto Addr = KV.second.getAddress()) - OS << format("0x%016" PRIx64, Addr) << ", " << KV.second.getFlags() - << " "; + OS << format("0x%016" PRIx64, Addr); else OS << " "; - OS << KV.second.getFlags() << " " << KV.second.getState(); + OS << " " << KV.second.getFlags() << " " << KV.second.getState(); if (KV.second.hasMaterializerAttached()) { OS << " (Materializer "; @@ -1751,7 +1779,7 @@ void JITDylib::transferEmittedNodeDependencies( } } -Platform::~Platform() {} +Platform::~Platform() = default; Expected> Platform::lookupInitSymbols( ExecutionSession &ES, @@ -1858,6 +1886,12 @@ ExecutionSession::ExecutionSession(std::unique_ptr EPC) this->EPC->ES = this; } +ExecutionSession::~ExecutionSession() { + // You must call endSession prior to destroying the session. + assert(!SessionOpen && + "Session still open. Did you forget to call endSession?"); +} + Error ExecutionSession::endSession() { LLVM_DEBUG(dbgs() << "Ending ExecutionSession " << this << "\n"); @@ -1869,7 +1903,7 @@ Error ExecutionSession::endSession() { // TODO: notifiy platform? run static deinits? Error Err = Error::success(); - for (auto &JD : JITDylibsToClose) + for (auto &JD : reverse(JITDylibsToClose)) Err = joinErrors(std::move(Err), JD->clear()); Err = joinErrors(std::move(Err), EPC->disconnect()); @@ -1987,9 +2021,8 @@ JITDylib::getDFSLinkOrder(ArrayRef JDs) { for (auto &KV : llvm::reverse(Result.back()->LinkOrder)) { auto &JD = *KV.first; - if (Visited.count(&JD)) + if (!Visited.insert(&JD).second) continue; - Visited.insert(&JD); WorkStack.push_back(&JD); } } @@ -2071,7 +2104,7 @@ void ExecutionSession::lookup( Expected ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder, - const SymbolLookupSet &Symbols, LookupKind K, + SymbolLookupSet Symbols, LookupKind K, SymbolState RequiredState, RegisterDependenciesFunction RegisterDependencies) { #if LLVM_ENABLE_THREADS @@ -2103,7 +2136,7 @@ ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder, #endif // Perform the asynchronous lookup. - lookup(K, SearchOrder, Symbols, RequiredState, NotifyComplete, + lookup(K, SearchOrder, std::move(Symbols), RequiredState, NotifyComplete, RegisterDependencies); #if LLVM_ENABLE_THREADS @@ -2257,7 +2290,8 @@ Error ExecutionSession::removeResourceTracker(ResourceTracker &RT) { joinErrors(std::move(Err), L->handleRemoveResources(RT.getKeyUnsafe())); for (auto &Q : QueriesToFail) - Q->handleFailed(make_error(FailedSymbols)); + Q->handleFailed( + make_error(getSymbolStringPool(), FailedSymbols)); return Err; } @@ -2337,7 +2371,8 @@ Error ExecutionSession::IL_updateCandidatesFor( if (SymI->second.getFlags().hasError()) { auto FailedSymbolsMap = std::make_shared(); (*FailedSymbolsMap)[&JD] = {Name}; - return make_error(std::move(FailedSymbolsMap)); + return make_error(getSymbolStringPool(), + std::move(FailedSymbolsMap)); } // Otherwise this is a match. Remove it from the candidate set. @@ -2611,7 +2646,7 @@ void ExecutionSession::OL_completeLookup( auto FailedSymbolsMap = std::make_shared(); (*FailedSymbolsMap)[&JD] = {Name}; return make_error( - std::move(FailedSymbolsMap)); + getSymbolStringPool(), std::move(FailedSymbolsMap)); } // Otherwise this is a match. @@ -2947,7 +2982,8 @@ void ExecutionSession::OL_notifyFailed(MaterializationResponsibility &MR) { }); for (auto &Q : FailedQueries) - Q->handleFailed(make_error(FailedSymbols)); + Q->handleFailed( + make_error(getSymbolStringPool(), FailedSymbols)); } Error ExecutionSession::OL_replace(MaterializationResponsibility &MR, diff --git a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp index 4ff6b7fd54df..1e68ea1225e6 100644 --- a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp +++ b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp @@ -42,7 +42,7 @@ class DebugObjectSection { public: virtual void setTargetMemoryRange(SectionRange Range) = 0; virtual void dump(raw_ostream &OS, StringRef Name) {} - virtual ~DebugObjectSection() {} + virtual ~DebugObjectSection() = default; }; template diff --git a/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp b/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp index 5b386a458f1f..028bd245fb55 100644 --- a/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp @@ -297,6 +297,13 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S) { llvm_unreachable("Invalid state"); } +raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPool &SSP) { + std::lock_guard Lock(SSP.PoolMutex); + for (auto &KV : SSP.Pool) + OS << KV.first() << ": " << KV.second << "\n"; + return OS; +} + DumpObjects::DumpObjects(std::string DumpDir, std::string IdentifierOverride) : DumpDir(std::move(DumpDir)), IdentifierOverride(std::move(IdentifierOverride)) { diff --git a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp index 6916ee4a827f..3c44fe81b4a9 100644 --- a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp +++ b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp @@ -48,7 +48,7 @@ public: MachODebugObjectSynthesizerBase(LinkGraph &G, ExecutorAddr RegisterActionAddr) : G(G), RegisterActionAddr(RegisterActionAddr) {} - virtual ~MachODebugObjectSynthesizerBase() {} + virtual ~MachODebugObjectSynthesizerBase() = default; Error preserveDebugSections() { if (G.findSectionByName(SynthDebugSectionName)) { @@ -349,10 +349,11 @@ public: } SectionRange R(MachOContainerBlock->getSection()); - G.allocActions().push_back({cantFail(shared::WrapperFunctionCall::Create< - SPSArgList>( - RegisterActionAddr, R.getRange())), - {}}); + G.allocActions().push_back( + {cantFail(shared::WrapperFunctionCall::Create< + shared::SPSArgList>( + RegisterActionAddr, R.getRange())), + {}}); return Error::success(); } diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp index d02760703f06..e476c549412a 100644 --- a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp @@ -10,6 +10,7 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h" +#include "llvm/ExecutionEngine/JITLink/aarch64.h" #include "llvm/ExecutionEngine/JITLink/x86_64.h" #include "llvm/ExecutionEngine/Orc/DebugUtils.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" @@ -47,6 +48,11 @@ public: Endianness = support::endianness::little; EdgeKind = jitlink::x86_64::Pointer64; break; + case Triple::aarch64: + PointerSize = 8; + Endianness = support::endianness::little; + EdgeKind = jitlink::aarch64::Pointer64; + break; default: llvm_unreachable("Unrecognized architecture"); } @@ -95,8 +101,6 @@ StringRef InitArrayFuncSectionName = ".init_array"; StringRef ThreadBSSSectionName = ".tbss"; StringRef ThreadDataSectionName = ".tdata"; -StringRef InitSectionNames[] = {InitArrayFuncSectionName}; - } // end anonymous namespace namespace llvm { @@ -117,8 +121,12 @@ ELFNixPlatform::Create(ExecutionSession &ES, inconvertibleErrorCode()); // Create default aliases if the caller didn't supply any. - if (!RuntimeAliases) - RuntimeAliases = standardPlatformAliases(ES); + if (!RuntimeAliases) { + auto StandardRuntimeAliases = standardPlatformAliases(ES, PlatformJD); + if (!StandardRuntimeAliases) + return StandardRuntimeAliases.takeError(); + RuntimeAliases = std::move(*StandardRuntimeAliases); + } // Define the aliases. if (auto Err = PlatformJD.define(symbolAliases(std::move(*RuntimeAliases)))) @@ -189,10 +197,53 @@ static void addAliases(ExecutionSession &ES, SymbolAliasMap &Aliases, } } -SymbolAliasMap ELFNixPlatform::standardPlatformAliases(ExecutionSession &ES) { +Expected +ELFNixPlatform::standardPlatformAliases(ExecutionSession &ES, + JITDylib &PlatformJD) { SymbolAliasMap Aliases; addAliases(ES, Aliases, requiredCXXAliases()); addAliases(ES, Aliases, standardRuntimeUtilityAliases()); + + // Determine whether or not the libunwind extended-API function for + // dynamically registering an entire .eh_frame section is available. + // If it is not, we assume that libgcc_s is being used, and alias to + // its __register_frame with the same functionality. + auto RTRegisterFrame = ES.intern("__orc_rt_register_eh_frame_section"); + auto LibUnwindRegisterFrame = ES.intern("__unw_add_dynamic_eh_frame_section"); + auto RTDeregisterFrame = ES.intern("__orc_rt_deregister_eh_frame_section"); + auto LibUnwindDeregisterFrame = + ES.intern("__unw_remove_dynamic_eh_frame_section"); + auto SM = ES.lookup(makeJITDylibSearchOrder(&PlatformJD), + SymbolLookupSet() + .add(LibUnwindRegisterFrame, + SymbolLookupFlags::WeaklyReferencedSymbol) + .add(LibUnwindDeregisterFrame, + SymbolLookupFlags::WeaklyReferencedSymbol)); + if (!SM) { // Weak-ref means no "missing symbol" errors, so this must be + // something more serious that we should report. + return SM.takeError(); + } else if (SM->size() == 2) { + LLVM_DEBUG({ + dbgs() << "Using libunwind " << LibUnwindRegisterFrame + << " for unwind info registration\n"; + }); + Aliases[std::move(RTRegisterFrame)] = {LibUnwindRegisterFrame, + JITSymbolFlags::Exported}; + Aliases[std::move(RTDeregisterFrame)] = {LibUnwindDeregisterFrame, + JITSymbolFlags::Exported}; + } else { + // Since LLVM libunwind is not present, we assume that unwinding + // is provided by libgcc + LLVM_DEBUG({ + dbgs() << "Using libgcc __register_frame" + << " for unwind info registration\n"; + }); + Aliases[std::move(RTRegisterFrame)] = {ES.intern("__register_frame"), + JITSymbolFlags::Exported}; + Aliases[std::move(RTDeregisterFrame)] = {ES.intern("__deregister_frame"), + JITSymbolFlags::Exported}; + } + return Aliases; } @@ -210,6 +261,10 @@ ELFNixPlatform::standardRuntimeUtilityAliases() { static const std::pair StandardRuntimeUtilityAliases[] = { {"__orc_rt_run_program", "__orc_rt_elfnix_run_program"}, + {"__orc_rt_jit_dlerror", "__orc_rt_elfnix_jit_dlerror"}, + {"__orc_rt_jit_dlopen", "__orc_rt_elfnix_jit_dlopen"}, + {"__orc_rt_jit_dlclose", "__orc_rt_elfnix_jit_dlclose"}, + {"__orc_rt_jit_dlsym", "__orc_rt_elfnix_jit_dlsym"}, {"__orc_rt_log_error", "__orc_rt_log_error_to_stderr"}}; return ArrayRef>( @@ -217,16 +272,16 @@ ELFNixPlatform::standardRuntimeUtilityAliases() { } bool ELFNixPlatform::isInitializerSection(StringRef SecName) { - for (auto &Name : InitSectionNames) { - if (Name.equals(SecName)) - return true; - } + if (SecName.consume_front(InitArrayFuncSectionName) && + (SecName.empty() || SecName[0] == '.')) + return true; return false; } bool ELFNixPlatform::supportedTarget(const Triple &TT) { switch (TT.getArch()) { case Triple::x86_64: + case Triple::aarch64: return true; default: return false; @@ -723,16 +778,15 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::preserveInitSections( jitlink::LinkGraph &G, MaterializationResponsibility &MR) { JITLinkSymbolSet InitSectionSymbols; - for (auto &InitSectionName : InitSectionNames) { + for (auto &InitSection : G.sections()) { // Skip non-init sections. - auto *InitSection = G.findSectionByName(InitSectionName); - if (!InitSection) + if (!isInitializerSection(InitSection.getName())) continue; // Make a pass over live symbols in the section: those blocks are already // preserved. DenseSet AlreadyLiveBlocks; - for (auto &Sym : InitSection->symbols()) { + for (auto &Sym : InitSection.symbols()) { auto &B = Sym->getBlock(); if (Sym->isLive() && Sym->getOffset() == 0 && Sym->getSize() == B.getSize() && !AlreadyLiveBlocks.count(&B)) { @@ -742,7 +796,7 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::preserveInitSections( } // Add anonymous symbols to preserve any not-already-preserved blocks. - for (auto *B : InitSection->blocks()) + for (auto *B : InitSection.blocks()) if (!AlreadyLiveBlocks.count(B)) InitSectionSymbols.insert( &G.addAnonymousSymbol(*B, 0, B->getSize(), false, true)); @@ -763,9 +817,9 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::registerInitSections( LLVM_DEBUG({ dbgs() << "ELFNixPlatform::registerInitSections\n"; }); - for (auto InitSectionName : InitSectionNames) { - if (auto *Sec = G.findSectionByName(InitSectionName)) { - InitSections.push_back(Sec); + for (auto &Sec : G.sections()) { + if (isInitializerSection(Sec.getName())) { + InitSections.push_back(&Sec); } } diff --git a/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp index f3fe0555fa75..c591acdd646b 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp @@ -45,7 +45,8 @@ createJITLoaderGDBRegistrar(ExecutionSession &ES) { Error EPCDebugObjectRegistrar::registerDebugObject( ExecutorAddrRange TargetMem) { - return ES.callSPSWrapper(RegisterFn, TargetMem); + return ES.callSPSWrapper(RegisterFn, + TargetMem); } } // namespace orc diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp index b901a2d2da23..48aaab96e71f 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp @@ -88,7 +88,6 @@ EPCTrampolinePool::EPCTrampolinePool(EPCIndirectionUtils &EPCIU) } Error EPCTrampolinePool::deallocatePool() { - Error Err = Error::success(); std::promise DeallocResultP; auto DeallocResultF = DeallocResultP.get_future(); @@ -234,7 +233,7 @@ Error EPCIndirectStubsManager::updatePointer(StringRef Name, namespace llvm { namespace orc { -EPCIndirectionUtils::ABISupport::~ABISupport() {} +EPCIndirectionUtils::ABISupport::~ABISupport() = default; Expected> EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) { @@ -261,6 +260,9 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) { case Triple::mips64el: return CreateWithABI(EPC); + case Triple::riscv64: + return CreateWithABI(EPC); + case Triple::x86_64: if (TT.getOS() == Triple::OSType::Win32) return CreateWithABI(EPC); @@ -302,7 +304,8 @@ EPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr, return Alloc.takeError(); auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec); - ABI->writeResolverCode(SegInfo.WorkingMem.data(), SegInfo.Addr.getValue(), + ResolverBlockAddr = SegInfo.Addr.getValue(); + ABI->writeResolverCode(SegInfo.WorkingMem.data(), ResolverBlockAddr, ReentryFnAddr, ReentryCtxAddr); auto FA = Alloc->finalize(); @@ -310,7 +313,7 @@ EPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr, return FA.takeError(); ResolverBlock = std::move(*FA); - return SegInfo.Addr.getValue(); + return ResolverBlockAddr; } std::unique_ptr diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp index ae2d47fb8c5e..95cf89ec3f8b 100644 --- a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp @@ -62,7 +62,7 @@ CtorDtorIterator::Element CtorDtorIterator::operator*() const { break; } else if (ConstantExpr *CE = dyn_cast_or_null(FuncC)) { if (CE->isCast()) - FuncC = dyn_cast_or_null(CE->getOperand(0)); + FuncC = CE->getOperand(0); else break; } else { @@ -273,10 +273,10 @@ Expected> StaticLibraryDefinitionGenerator::Load( ObjectLayer &L, const char *FileName, GetObjectFileInterface GetObjFileInterface) { - auto ArchiveBuffer = errorOrToExpected(MemoryBuffer::getFile(FileName)); + auto ArchiveBuffer = MemoryBuffer::getFile(FileName); if (!ArchiveBuffer) - return ArchiveBuffer.takeError(); + return createFileError(FileName, ArchiveBuffer.getError()); return Create(L, std::move(*ArchiveBuffer), std::move(GetObjFileInterface)); } @@ -288,7 +288,7 @@ StaticLibraryDefinitionGenerator::Load( auto B = object::createBinary(FileName); if (!B) - return B.takeError(); + return createFileError(FileName, B.takeError()); // If this is a regular archive then create an instance from it. if (isa(B->getBinary())) diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp index 2eb835551adb..412b9f95ea62 100644 --- a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp @@ -19,9 +19,9 @@ namespace llvm { namespace orc { -ExecutorProcessControl::MemoryAccess::~MemoryAccess() {} +ExecutorProcessControl::MemoryAccess::~MemoryAccess() = default; -ExecutorProcessControl::~ExecutorProcessControl() {} +ExecutorProcessControl::~ExecutorProcessControl() = default; SelfExecutorProcessControl::SelfExecutorProcessControl( std::shared_ptr SSP, std::unique_ptr D, diff --git a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp index aadc437c80c4..69aba1fff59a 100644 --- a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp @@ -11,7 +11,7 @@ namespace llvm { namespace orc { -IRCompileLayer::IRCompiler::~IRCompiler() {} +IRCompileLayer::IRCompiler::~IRCompiler() = default; IRCompileLayer::IRCompileLayer(ExecutionSession &ES, ObjectLayer &BaseLayer, std::unique_ptr Compile) diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 7a71d2f781d7..38cab526704f 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -59,7 +59,7 @@ private: namespace llvm { namespace orc { -TrampolinePool::~TrampolinePool() {} +TrampolinePool::~TrampolinePool() = default; void IndirectStubsManager::anchor() {} Expected @@ -152,6 +152,11 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES, return CCMgrT::Create(ES, ErrorHandlerAddress); } + case Triple::riscv64: { + typedef orc::LocalJITCompileCallbackManager CCMgrT; + return CCMgrT::Create(ES, ErrorHandlerAddress); + } + case Triple::x86_64: { if (T.getOS() == Triple::OSType::Win32) { typedef orc::LocalJITCompileCallbackManager CCMgrT; @@ -206,6 +211,12 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) { orc::LocalIndirectStubsManager>(); }; + case Triple::riscv64: + return []() { + return std::make_unique< + orc::LocalIndirectStubsManager>(); + }; + case Triple::x86_64: if (T.getOS() == Triple::OSType::Win32) { return [](){ @@ -431,8 +442,7 @@ Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, auto RelocOffInInstr = MIA.getMemoryOperandRelocationOffset(Instr, InstrSize); - if (!RelocOffInInstr.hasValue() || - InstrSize - RelocOffInInstr.getValue() != 4) { + if (!RelocOffInInstr || InstrSize - *RelocOffInInstr != 4) { LLVM_DEBUG(dbgs() << "Skipping unknown self-relocation at " << InstrStart); continue; diff --git a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp index 0fbf79b8a56d..c60f4b3b263c 100644 --- a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp +++ b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp @@ -19,6 +19,7 @@ JITTargetMachineBuilder::JITTargetMachineBuilder(Triple TT) : TT(std::move(TT)) { Options.EmulatedTLS = true; Options.ExplicitEmulatedTLS = true; + Options.UseInitArray = true; } Expected JITTargetMachineBuilder::detectHost() { diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index 91949c9d7eeb..6d67e6d87b56 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -143,7 +143,7 @@ public: JITEvaluatedSymbol(pointerToJITTargetAddress(this), JITSymbolFlags::Exported); StdInterposes[J.mangleAndIntern("__lljit.cxa_atexit_helper")] = - JITEvaluatedSymbol(pointerToJITTargetAddress(registerAtExitHelper), + JITEvaluatedSymbol(pointerToJITTargetAddress(registerCxaAtExitHelper), JITSymbolFlags()); cantFail( @@ -162,6 +162,9 @@ public: PerJDInterposes[J.mangleAndIntern("__lljit.run_atexits_helper")] = JITEvaluatedSymbol(pointerToJITTargetAddress(runAtExitsHelper), JITSymbolFlags()); + PerJDInterposes[J.mangleAndIntern("__lljit.atexit_helper")] = + JITEvaluatedSymbol(pointerToJITTargetAddress(registerAtExitHelper), + JITSymbolFlags()); cantFail(JD.define(absoluteSymbols(std::move(PerJDInterposes)))); auto Ctx = std::make_unique(); @@ -190,6 +193,14 @@ public: GlobalValue::HiddenVisibility, "__lljit.run_atexits_helper", {PlatformInstanceDecl, DSOHandle}); + auto *IntTy = Type::getIntNTy(*Ctx, sizeof(int) * CHAR_BIT); + auto *AtExitCallbackTy = FunctionType::get(VoidTy, {}, false); + auto *AtExitCallbackPtrTy = PointerType::getUnqual(AtExitCallbackTy); + addHelperAndWrapper(*M, "atexit", + FunctionType::get(IntTy, {AtExitCallbackPtrTy}, false), + GlobalValue::HiddenVisibility, "__lljit.atexit_helper", + {PlatformInstanceDecl, DSOHandle}); + return J.addIRModule(JD, ThreadSafeModule(std::move(M), std::move(Ctx))); } @@ -413,16 +424,25 @@ private: .takeError(); } - static void registerAtExitHelper(void *Self, void (*F)(void *), void *Ctx, - void *DSOHandle) { + static void registerCxaAtExitHelper(void *Self, void (*F)(void *), void *Ctx, + void *DSOHandle) { LLVM_DEBUG({ - dbgs() << "Registering atexit function " << (void *)F << " for JD " + dbgs() << "Registering cxa atexit function " << (void *)F << " for JD " << (*static_cast(DSOHandle))->getName() << "\n"; }); static_cast(Self)->AtExitMgr.registerAtExit( F, Ctx, DSOHandle); } + static void registerAtExitHelper(void *Self, void *DSOHandle, void (*F)()) { + LLVM_DEBUG({ + dbgs() << "Registering atexit function " << (void *)F << " for JD " + << (*static_cast(DSOHandle))->getName() << "\n"; + }); + static_cast(Self)->AtExitMgr.registerAtExit( + reinterpret_cast(F), nullptr, DSOHandle); + } + static void runAtExitsHelper(void *Self, void *DSOHandle) { LLVM_DEBUG({ dbgs() << "Running atexit functions for JD " @@ -450,12 +470,12 @@ private: auto *IntTy = Type::getIntNTy(*Ctx, sizeof(int) * CHAR_BIT); auto *VoidTy = Type::getVoidTy(*Ctx); auto *BytePtrTy = PointerType::getUnqual(Int8Ty); - auto *AtExitCallbackTy = FunctionType::get(VoidTy, {BytePtrTy}, false); - auto *AtExitCallbackPtrTy = PointerType::getUnqual(AtExitCallbackTy); + auto *CxaAtExitCallbackTy = FunctionType::get(VoidTy, {BytePtrTy}, false); + auto *CxaAtExitCallbackPtrTy = PointerType::getUnqual(CxaAtExitCallbackTy); addHelperAndWrapper( *M, "__cxa_atexit", - FunctionType::get(IntTy, {AtExitCallbackPtrTy, BytePtrTy, BytePtrTy}, + FunctionType::get(IntTy, {CxaAtExitCallbackPtrTy, BytePtrTy, BytePtrTy}, false), GlobalValue::DefaultVisibility, "__lljit.cxa_atexit_helper", {PlatformInstanceDecl}); @@ -521,11 +541,7 @@ GlobalCtorDtorScraper::operator()(ThreadSafeModule TSM, for (auto E : COrDtors) InitsOrDeInits.push_back(std::make_pair(E.Func, E.Priority)); - llvm::sort(InitsOrDeInits, - [](const std::pair &LHS, - const std::pair &RHS) { - return LHS.first < RHS.first; - }); + llvm::sort(InitsOrDeInits, llvm::less_second()); auto *InitOrDeInitFuncEntryBlock = BasicBlock::Create(Ctx, "entry", InitOrDeInitFunc); @@ -589,7 +605,7 @@ void LLJIT::PlatformSupport::setInitTransform( J.InitHelperTransformLayer->setTransform(std::move(T)); } -LLJIT::PlatformSupport::~PlatformSupport() {} +LLJIT::PlatformSupport::~PlatformSupport() = default; Error LLJITBuilderState::prepareForConstruction() { @@ -701,10 +717,14 @@ Error LLJIT::addObjectFile(JITDylib &JD, std::unique_ptr Obj) { return addObjectFile(JD.getDefaultResourceTracker(), std::move(Obj)); } -Expected LLJIT::lookupLinkerMangled(JITDylib &JD, - SymbolStringPtr Name) { - return ES->lookup( - makeJITDylibSearchOrder(&JD, JITDylibLookupFlags::MatchAllSymbols), Name); +Expected LLJIT::lookupLinkerMangled(JITDylib &JD, + SymbolStringPtr Name) { + if (auto Sym = ES->lookup( + makeJITDylibSearchOrder(&JD, JITDylibLookupFlags::MatchAllSymbols), + Name)) + return ExecutorAddr(Sym->getAddress()); + else + return Sym.takeError(); } Expected> @@ -897,7 +917,7 @@ LLLazyJIT::LLLazyJIT(LLLazyJITBuilderState &S, Error &Err) : LLJIT(S, Err) { LCTMgr = std::move(S.LCTMgr); else { if (auto LCTMgrOrErr = createLocalLazyCallThroughManager( - S.TT, *ES, S.LazyCompileFailureAddr)) + S.TT, *ES, S.LazyCompileFailureAddr.getValue())) LCTMgr = std::move(*LCTMgrOrErr); else { Err = LCTMgrOrErr.takeError(); diff --git a/llvm/lib/ExecutionEngine/Orc/Layer.cpp b/llvm/lib/ExecutionEngine/Orc/Layer.cpp index adb8861793b1..4a50f2d7a153 100644 --- a/llvm/lib/ExecutionEngine/Orc/Layer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Layer.cpp @@ -19,7 +19,7 @@ namespace llvm { namespace orc { -IRLayer::~IRLayer() {} +IRLayer::~IRLayer() = default; Error IRLayer::add(ResourceTrackerSP RT, ThreadSafeModule TSM) { assert(RT && "RT can not be null"); @@ -158,7 +158,7 @@ char ObjectLayer::ID; ObjectLayer::ObjectLayer(ExecutionSession &ES) : ES(ES) {} -ObjectLayer::~ObjectLayer() {} +ObjectLayer::~ObjectLayer() = default; Error ObjectLayer::add(ResourceTrackerSP RT, std::unique_ptr O, MaterializationUnit::Interface I) { diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp index 66453e6a632f..20b655bdf4b1 100644 --- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -131,6 +131,10 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES, case Triple::mips64el: return LocalLazyCallThroughManager::Create(ES, ErrorHandlerAddr); + case Triple::riscv64: + return LocalLazyCallThroughManager::Create(ES, + ErrorHandlerAddr); + case Triple::x86_64: if (T.getOS() == Triple::OSType::Win32) return LocalLazyCallThroughManager::Create( diff --git a/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp b/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp index 44cb78c773c9..3452267e4df4 100644 --- a/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp @@ -24,7 +24,7 @@ void lookupAndRecordAddrs( Symbols.add(KV.first, LookupFlags); ES.lookup( - K, SearchOrder, Symbols, SymbolState::Ready, + K, SearchOrder, std::move(Symbols), SymbolState::Ready, [Pairs = std::move(Pairs), OnRec = std::move(OnRecorded)](Expected Result) mutable { if (!Result) @@ -47,7 +47,7 @@ Error lookupAndRecordAddrs( std::promise ResultP; auto ResultF = ResultP.get_future(); lookupAndRecordAddrs([&](Error Err) { ResultP.set_value(std::move(Err)); }, - ES, K, SearchOrder, Pairs, LookupFlags); + ES, K, SearchOrder, std::move(Pairs), LookupFlags); return ResultF.get(); } diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp index a364719855b4..d5274b06a76f 100644 --- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp @@ -22,6 +22,39 @@ using namespace llvm; using namespace llvm::orc; using namespace llvm::orc::shared; +namespace llvm { +namespace orc { +namespace shared { + +using SPSMachOJITDylibDepInfo = SPSTuple>; +using SPSMachOJITDylibDepInfoMap = + SPSSequence>; + +template <> +class SPSSerializationTraits { +public: + static size_t size(const MachOPlatform::MachOJITDylibDepInfo &DDI) { + return SPSMachOJITDylibDepInfo::AsArgList::size(DDI.Sealed, DDI.DepHeaders); + } + + static bool serialize(SPSOutputBuffer &OB, + const MachOPlatform::MachOJITDylibDepInfo &DDI) { + return SPSMachOJITDylibDepInfo::AsArgList::serialize(OB, DDI.Sealed, + DDI.DepHeaders); + } + + static bool deserialize(SPSInputBuffer &IB, + MachOPlatform::MachOJITDylibDepInfo &DDI) { + return SPSMachOJITDylibDepInfo::AsArgList::deserialize(IB, DDI.Sealed, + DDI.DepHeaders); + } +}; + +} // namespace shared +} // namespace orc +} // namespace llvm + namespace { class MachOHeaderMaterializationUnit : public MaterializationUnit { @@ -199,11 +232,25 @@ MachOPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer, } Error MachOPlatform::setupJITDylib(JITDylib &JD) { - return JD.define(std::make_unique( - *this, MachOHeaderStartSymbol)); + if (auto Err = JD.define(std::make_unique( + *this, MachOHeaderStartSymbol))) + return Err; + + return ES.lookup({&JD}, MachOHeaderStartSymbol).takeError(); } -Error MachOPlatform::teardownJITDylib(JITDylib &JD) { return Error::success(); } +Error MachOPlatform::teardownJITDylib(JITDylib &JD) { + std::lock_guard Lock(PlatformMutex); + auto I = JITDylibToHeaderAddr.find(&JD); + if (I != JITDylibToHeaderAddr.end()) { + assert(HeaderAddrToJITDylib.count(I->second) && + "HeaderAddrToJITDylib missing entry"); + HeaderAddrToJITDylib.erase(I->second); + JITDylibToHeaderAddr.erase(I); + } + JITDylibToPThreadKey.erase(&JD); + return Error::success(); +} Error MachOPlatform::notifyAdding(ResourceTracker &RT, const MaterializationUnit &MU) { @@ -255,6 +302,10 @@ MachOPlatform::standardRuntimeUtilityAliases() { static const std::pair StandardRuntimeUtilityAliases[] = { {"___orc_rt_run_program", "___orc_rt_macho_run_program"}, + {"___orc_rt_jit_dlerror", "___orc_rt_macho_jit_dlerror"}, + {"___orc_rt_jit_dlopen", "___orc_rt_macho_jit_dlopen"}, + {"___orc_rt_jit_dlclose", "___orc_rt_macho_jit_dlclose"}, + {"___orc_rt_jit_dlsym", "___orc_rt_macho_jit_dlsym"}, {"___orc_rt_log_error", "___orc_rt_log_error_to_stderr"}}; return ArrayRef>( @@ -305,16 +356,6 @@ MachOPlatform::MachOPlatform( State = BootstrapPhase2; - // PlatformJD hasn't been 'set-up' by the platform yet (since we're creating - // the platform now), so set it up. - if (auto E2 = setupJITDylib(PlatformJD)) { - Err = std::move(E2); - return; - } - - RegisteredInitSymbols[&PlatformJD].add( - MachOHeaderStartSymbol, SymbolLookupFlags::WeaklyReferencedSymbol); - // Associate wrapper function tags with JIT-side function implementations. if (auto E2 = associateRuntimeSupportFunctions(PlatformJD)) { Err = std::move(E2); @@ -329,23 +370,24 @@ MachOPlatform::MachOPlatform( return; } + // PlatformJD hasn't been set up by the platform yet (since we're creating + // the platform now), so set it up. + if (auto E2 = setupJITDylib(PlatformJD)) { + Err = std::move(E2); + return; + } + State = Initialized; } Error MachOPlatform::associateRuntimeSupportFunctions(JITDylib &PlatformJD) { ExecutionSession::JITDispatchHandlerAssociationMap WFs; - using GetInitializersSPSSig = - SPSExpected(SPSString); - WFs[ES.intern("___orc_rt_macho_get_initializers_tag")] = - ES.wrapAsyncWithSPS( - this, &MachOPlatform::rt_getInitializers); - - using GetDeinitializersSPSSig = - SPSExpected(SPSExecutorAddr); - WFs[ES.intern("___orc_rt_macho_get_deinitializers_tag")] = - ES.wrapAsyncWithSPS( - this, &MachOPlatform::rt_getDeinitializers); + using PushInitializersSPSSig = + SPSExpected(SPSExecutorAddr); + WFs[ES.intern("___orc_rt_macho_push_initializers_tag")] = + ES.wrapAsyncWithSPS( + this, &MachOPlatform::rt_pushInitializers); using LookupSymbolSPSSig = SPSExpected(SPSExecutorAddr, SPSString); @@ -356,53 +398,83 @@ Error MachOPlatform::associateRuntimeSupportFunctions(JITDylib &PlatformJD) { return ES.registerJITDispatchHandlers(PlatformJD, std::move(WFs)); } -void MachOPlatform::getInitializersBuildSequencePhase( - SendInitializerSequenceFn SendResult, JITDylib &JD, - std::vector DFSLinkOrder) { - MachOJITDylibInitializerSequence FullInitSeq; - { - std::lock_guard Lock(PlatformMutex); - for (auto &InitJD : reverse(DFSLinkOrder)) { - LLVM_DEBUG({ - dbgs() << "MachOPlatform: Appending inits for \"" << InitJD->getName() - << "\" to sequence\n"; - }); - auto ISItr = InitSeqs.find(InitJD.get()); - if (ISItr != InitSeqs.end()) { - FullInitSeq.emplace_back(std::move(ISItr->second)); - InitSeqs.erase(ISItr); - } - } - } - - SendResult(std::move(FullInitSeq)); -} - -void MachOPlatform::getInitializersLookupPhase( - SendInitializerSequenceFn SendResult, JITDylib &JD) { - - auto DFSLinkOrder = JD.getDFSLinkOrder(); - if (!DFSLinkOrder) { - SendResult(DFSLinkOrder.takeError()); - return; - } - +void MachOPlatform::pushInitializersLoop( + PushInitializersSendResultFn SendResult, JITDylibSP JD) { DenseMap NewInitSymbols; + DenseMap> JDDepMap; + SmallVector Worklist({JD.get()}); + ES.runSessionLocked([&]() { - for (auto &InitJD : *DFSLinkOrder) { - auto RISItr = RegisteredInitSymbols.find(InitJD.get()); + while (!Worklist.empty()) { + // FIXME: Check for defunct dylibs. + + auto DepJD = Worklist.back(); + Worklist.pop_back(); + + // If we've already visited this JITDylib on this iteration then continue. + if (JDDepMap.count(DepJD)) + continue; + + // Add dep info. + auto &DM = JDDepMap[DepJD]; + DepJD->withLinkOrderDo([&](const JITDylibSearchOrder &O) { + for (auto &KV : O) { + if (KV.first == DepJD) + continue; + DM.push_back(KV.first); + Worklist.push_back(KV.first); + } + }); + + // Add any registered init symbols. + auto RISItr = RegisteredInitSymbols.find(DepJD); if (RISItr != RegisteredInitSymbols.end()) { - NewInitSymbols[InitJD.get()] = std::move(RISItr->second); + NewInitSymbols[DepJD] = std::move(RISItr->second); RegisteredInitSymbols.erase(RISItr); } } }); - // If there are no further init symbols to look up then move on to the next - // phase. + // If there are no further init symbols to look up then send the link order + // (as a list of header addresses) to the caller. if (NewInitSymbols.empty()) { - getInitializersBuildSequencePhase(std::move(SendResult), JD, - std::move(*DFSLinkOrder)); + + // To make the list intelligible to the runtime we need to convert all + // JITDylib pointers to their header addresses. + DenseMap HeaderAddrs; + HeaderAddrs.reserve(JDDepMap.size()); + { + std::lock_guard Lock(PlatformMutex); + for (auto &KV : JDDepMap) { + auto I = JITDylibToHeaderAddr.find(KV.first); + if (I == JITDylibToHeaderAddr.end()) { + // The header address should have been materialized by the previous + // round, but we need to handle the pathalogical case where someone + // removes the symbol on another thread while we're running. + SendResult( + make_error("JITDylib " + KV.first->getName() + + " has no registered header address", + inconvertibleErrorCode())); + return; + } + HeaderAddrs[KV.first] = I->second; + } + } + + // Build the dep info map to return. + MachOJITDylibDepInfoMap DIM; + DIM.reserve(JDDepMap.size()); + for (auto &KV : JDDepMap) { + assert(HeaderAddrs.count(KV.first) && "Missing header addr"); + auto H = HeaderAddrs[KV.first]; + MachOJITDylibDepInfo DepInfo; + for (auto &Dep : KV.second) { + assert(HeaderAddrs.count(Dep) && "Missing header addr"); + DepInfo.DepHeaders.push_back(HeaderAddrs[Dep]); + } + DIM.push_back(std::make_pair(H, std::move(DepInfo))); + } + SendResult(DIM); return; } @@ -412,58 +484,38 @@ void MachOPlatform::getInitializersLookupPhase( if (Err) SendResult(std::move(Err)); else - getInitializersLookupPhase(std::move(SendResult), JD); + pushInitializersLoop(std::move(SendResult), JD); }, ES, std::move(NewInitSymbols)); } -void MachOPlatform::rt_getInitializers(SendInitializerSequenceFn SendResult, - StringRef JDName) { - LLVM_DEBUG({ - dbgs() << "MachOPlatform::rt_getInitializers(\"" << JDName << "\")\n"; - }); - - JITDylib *JD = ES.getJITDylibByName(JDName); - if (!JD) { - LLVM_DEBUG({ - dbgs() << " No such JITDylib \"" << JDName << "\". Sending error.\n"; - }); - SendResult(make_error("No JITDylib named " + JDName, - inconvertibleErrorCode())); - return; - } - - getInitializersLookupPhase(std::move(SendResult), *JD); -} - -void MachOPlatform::rt_getDeinitializers(SendDeinitializerSequenceFn SendResult, - ExecutorAddr Handle) { - LLVM_DEBUG({ - dbgs() << "MachOPlatform::rt_getDeinitializers(\"" - << formatv("{0:x}", Handle.getValue()) << "\")\n"; - }); - - JITDylib *JD = nullptr; - +void MachOPlatform::rt_pushInitializers(PushInitializersSendResultFn SendResult, + ExecutorAddr JDHeaderAddr) { + JITDylibSP JD; { std::lock_guard Lock(PlatformMutex); - auto I = HeaderAddrToJITDylib.find(Handle); + auto I = HeaderAddrToJITDylib.find(JDHeaderAddr); if (I != HeaderAddrToJITDylib.end()) JD = I->second; } + LLVM_DEBUG({ + dbgs() << "MachOPlatform::rt_pushInitializers(" << JDHeaderAddr << ") "; + if (JD) + dbgs() << "pushing initializers for " << JD->getName() << "\n"; + else + dbgs() << "No JITDylib for header address.\n"; + }); + if (!JD) { - LLVM_DEBUG({ - dbgs() << " No JITDylib for handle " - << formatv("{0:x}", Handle.getValue()) << "\n"; - }); - SendResult(make_error("No JITDylib associated with handle " + - formatv("{0:x}", Handle.getValue()), - inconvertibleErrorCode())); + SendResult( + make_error("No JITDylib with header addr " + + formatv("{0:x}", JDHeaderAddr.getValue()), + inconvertibleErrorCode())); return; } - SendResult(MachOJITDylibDeinitializerSequence()); + pushInitializersLoop(std::move(SendResult), JD); } void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult, @@ -526,10 +578,14 @@ Error MachOPlatform::bootstrapMachORuntime(JITDylib &PlatformJD) { &orc_rt_macho_platform_bootstrap}, {ES.intern("___orc_rt_macho_platform_shutdown"), &orc_rt_macho_platform_shutdown}, - {ES.intern("___orc_rt_macho_register_thread_data_section"), - &orc_rt_macho_register_thread_data_section}, - {ES.intern("___orc_rt_macho_deregister_thread_data_section"), - &orc_rt_macho_deregister_thread_data_section}, + {ES.intern("___orc_rt_macho_register_jitdylib"), + &orc_rt_macho_register_jitdylib}, + {ES.intern("___orc_rt_macho_deregister_jitdylib"), + &orc_rt_macho_deregister_jitdylib}, + {ES.intern("___orc_rt_macho_register_object_platform_sections"), + &orc_rt_macho_register_object_platform_sections}, + {ES.intern("___orc_rt_macho_deregister_object_platform_sections"), + &orc_rt_macho_deregister_object_platform_sections}, {ES.intern("___orc_rt_macho_create_pthread_key"), &orc_rt_macho_create_pthread_key}})) return Err; @@ -537,45 +593,6 @@ Error MachOPlatform::bootstrapMachORuntime(JITDylib &PlatformJD) { return ES.callSPSWrapper(orc_rt_macho_platform_bootstrap); } -Error MachOPlatform::registerInitInfo( - JITDylib &JD, ExecutorAddr ObjCImageInfoAddr, - ArrayRef InitSections) { - - std::unique_lock Lock(PlatformMutex); - - MachOJITDylibInitializers *InitSeq = nullptr; - { - auto I = InitSeqs.find(&JD); - if (I == InitSeqs.end()) { - // If there's no init sequence entry yet then we need to look up the - // header symbol to force creation of one. - Lock.unlock(); - - auto SearchOrder = - JD.withLinkOrderDo([](const JITDylibSearchOrder &SO) { return SO; }); - if (auto Err = ES.lookup(SearchOrder, MachOHeaderStartSymbol).takeError()) - return Err; - - Lock.lock(); - I = InitSeqs.find(&JD); - assert(I != InitSeqs.end() && - "Entry missing after header symbol lookup?"); - } - InitSeq = &I->second; - } - - InitSeq->ObjCImageInfoAddress = ObjCImageInfoAddr; - - for (auto *Sec : InitSections) { - // FIXME: Avoid copy here. - jitlink::SectionRange R(*Sec); - InitSeq->InitSections[Sec->getName()].push_back( - {ExecutorAddr(R.getStart()), ExecutorAddr(R.getEnd())}); - } - - return Error::success(); -} - Expected MachOPlatform::createPThreadKey() { if (!orc_rt_macho_create_pthread_key) return make_error( @@ -617,11 +634,6 @@ void MachOPlatform::MachOPlatformPlugin::modifyPassConfig( return Err; return processObjCImageInfo(G, MR); }); - - Config.PostFixupPasses.push_back( - [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) { - return registerInitSections(G, JD); - }); } // --- Add passes for eh-frame and TLV support --- @@ -639,10 +651,12 @@ void MachOPlatform::MachOPlatformPlugin::modifyPassConfig( return fixTLVSectionsAndEdges(G, JD); }); - // Add a pass to register the final addresses of the eh-frame and TLV sections - // with the runtime. - Config.PostFixupPasses.push_back( - [this](jitlink::LinkGraph &G) { return registerEHAndTLVSections(G); }); + // Add a pass to register the final addresses of any special sections in the + // object with the runtime. + Config.PostAllocationPasses.push_back( + [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) { + return registerObjectPlatformSections(G, JD); + }); } ObjectLinkingLayer::Plugin::SyntheticSymbolDependenciesMap @@ -661,7 +675,6 @@ MachOPlatform::MachOPlatformPlugin::getSyntheticSymbolDependencies( Error MachOPlatform::MachOPlatformPlugin::associateJITDylibHeaderSymbol( jitlink::LinkGraph &G, MaterializationResponsibility &MR) { - auto I = llvm::find_if(G.defined_symbols(), [this](jitlink::Symbol *Sym) { return Sym->getName() == *MP.MachOHeaderStartSymbol; }); @@ -670,10 +683,14 @@ Error MachOPlatform::MachOPlatformPlugin::associateJITDylibHeaderSymbol( auto &JD = MR.getTargetJITDylib(); std::lock_guard Lock(MP.PlatformMutex); auto HeaderAddr = (*I)->getAddress(); + MP.JITDylibToHeaderAddr[&JD] = HeaderAddr; MP.HeaderAddrToJITDylib[HeaderAddr] = &JD; - assert(!MP.InitSeqs.count(&JD) && "InitSeq entry for JD already exists"); - MP.InitSeqs.insert( - std::make_pair(&JD, MachOJITDylibInitializers(JD.getName(), HeaderAddr))); + G.allocActions().push_back( + {cantFail( + WrapperFunctionCall::Create>( + MP.orc_rt_macho_register_jitdylib, JD.getName(), HeaderAddr)), + cantFail(WrapperFunctionCall::Create>( + MP.orc_rt_macho_deregister_jitdylib, HeaderAddr))}); return Error::success(); } @@ -792,37 +809,6 @@ Error MachOPlatform::MachOPlatformPlugin::processObjCImageInfo( return Error::success(); } -Error MachOPlatform::MachOPlatformPlugin::registerInitSections( - jitlink::LinkGraph &G, JITDylib &JD) { - - ExecutorAddr ObjCImageInfoAddr; - SmallVector InitSections; - - if (auto *ObjCImageInfoSec = G.findSectionByName(ObjCImageInfoSectionName)) { - if (auto Addr = jitlink::SectionRange(*ObjCImageInfoSec).getStart()) - ObjCImageInfoAddr = Addr; - } - - for (auto InitSectionName : InitSectionNames) - if (auto *Sec = G.findSectionByName(InitSectionName)) - InitSections.push_back(Sec); - - // Dump the scraped inits. - LLVM_DEBUG({ - dbgs() << "MachOPlatform: Scraped " << G.getName() << " init sections:\n"; - if (ObjCImageInfoAddr) - dbgs() << " " << ObjCImageInfoSectionName << ": " - << formatv("{0:x}", ObjCImageInfoAddr.getValue()) << "\n"; - for (auto *Sec : InitSections) { - jitlink::SectionRange R(*Sec); - dbgs() << " " << Sec->getName() << ": " - << formatv("[ {0:x} -- {1:x} ]", R.getStart(), R.getEnd()) << "\n"; - } - }); - - return MP.registerInitInfo(JD, ObjCImageInfoAddr, InitSections); -} - Error MachOPlatform::MachOPlatformPlugin::fixTLVSectionsAndEdges( jitlink::LinkGraph &G, JITDylib &JD) { @@ -879,11 +865,10 @@ Error MachOPlatform::MachOPlatformPlugin::fixTLVSectionsAndEdges( return Error::success(); } -Error MachOPlatform::MachOPlatformPlugin::registerEHAndTLVSections( - jitlink::LinkGraph &G) { +Error MachOPlatform::MachOPlatformPlugin::registerObjectPlatformSections( + jitlink::LinkGraph &G, JITDylib &JD) { - // Add a pass to register the final addresses of the eh-frame and TLV sections - // with the runtime. + // Add an action to register the eh-frame. if (auto *EHFrameSection = G.findSectionByName(EHFrameSectionName)) { jitlink::SectionRange R(*EHFrameSection); if (!R.empty()) @@ -912,6 +897,8 @@ Error MachOPlatform::MachOPlatformPlugin::registerEHAndTLVSections( ThreadDataSection = ThreadBSSSection; } + SmallVector, 8> MachOPlatformSecs; + // Having merged thread BSS (if present) and thread data (if present), // record the resulting section range. if (ThreadDataSection) { @@ -922,16 +909,64 @@ Error MachOPlatform::MachOPlatformPlugin::registerEHAndTLVSections( "MachOPlatform has not finished booting", inconvertibleErrorCode()); - G.allocActions().push_back( - {cantFail( - WrapperFunctionCall::Create>( - MP.orc_rt_macho_register_thread_data_section, R.getRange())), - cantFail( - WrapperFunctionCall::Create>( - MP.orc_rt_macho_deregister_thread_data_section, - R.getRange()))}); + MachOPlatformSecs.push_back({ThreadDataSectionName, R.getRange()}); + } + } + + // If any platform sections were found then add an allocation action to call + // the registration function. + StringRef PlatformSections[] = { + ModInitFuncSectionName, ObjCClassListSectionName, + ObjCImageInfoSectionName, ObjCSelRefsSectionName, + Swift5ProtoSectionName, Swift5ProtosSectionName, + Swift5TypesSectionName, + }; + + for (auto &SecName : PlatformSections) { + auto *Sec = G.findSectionByName(SecName); + if (!Sec) + continue; + jitlink::SectionRange R(*Sec); + if (R.empty()) + continue; + + MachOPlatformSecs.push_back({SecName, R.getRange()}); + } + + if (!MachOPlatformSecs.empty()) { + Optional HeaderAddr; + { + std::lock_guard Lock(MP.PlatformMutex); + auto I = MP.JITDylibToHeaderAddr.find(&JD); + if (I != MP.JITDylibToHeaderAddr.end()) + HeaderAddr = I->second; } + + if (!HeaderAddr) + return make_error("Missing header for " + JD.getName(), + inconvertibleErrorCode()); + + // Dump the scraped inits. + LLVM_DEBUG({ + dbgs() << "MachOPlatform: Scraped " << G.getName() << " init sections:\n"; + for (auto &KV : MachOPlatformSecs) + dbgs() << " " << KV.first << ": " << KV.second << "\n"; + }); + + using SPSRegisterObjectPlatformSectionsArgs = + SPSArgList>>; + G.allocActions().push_back( + {cantFail( + WrapperFunctionCall::Create( + MP.orc_rt_macho_register_object_platform_sections, *HeaderAddr, + MachOPlatformSecs)), + cantFail( + WrapperFunctionCall::Create( + MP.orc_rt_macho_deregister_object_platform_sections, + *HeaderAddr, MachOPlatformSecs))}); } + return Error::success(); } diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp new file mode 100644 index 000000000000..8b3fbd7117e2 --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -0,0 +1,152 @@ +//===- MemoryMapper.cpp - Cross-process memory mapper ------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/MemoryMapper.h" + +namespace llvm { +namespace orc { + +MemoryMapper::~MemoryMapper() {} + +void InProcessMemoryMapper::reserve(size_t NumBytes, + OnReservedFunction OnReserved) { + std::error_code EC; + auto MB = sys::Memory::allocateMappedMemory( + NumBytes, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC); + + if (EC) + return OnReserved(errorCodeToError(EC)); + + { + std::lock_guard Lock(Mutex); + Reservations[MB.base()].Size = MB.allocatedSize(); + } + + OnReserved( + ExecutorAddrRange(ExecutorAddr::fromPtr(MB.base()), MB.allocatedSize())); +} + +char *InProcessMemoryMapper::prepare(ExecutorAddr Addr, size_t ContentSize) { + return Addr.toPtr(); +} + +void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI, + OnInitializedFunction OnInitialized) { + ExecutorAddr MinAddr(~0ULL); + + for (auto &Segment : AI.Segments) { + auto Base = AI.MappingBase + Segment.Offset; + auto Size = Segment.ContentSize + Segment.ZeroFillSize; + + if (Base < MinAddr) + MinAddr = Base; + + std::memset((Base + Segment.ContentSize).toPtr(), 0, + Segment.ZeroFillSize); + + if (auto EC = sys::Memory::protectMappedMemory({Base.toPtr(), Size}, + Segment.Prot)) { + return OnInitialized(errorCodeToError(EC)); + } + if (Segment.Prot & sys::Memory::MF_EXEC) + sys::Memory::InvalidateInstructionCache(Base.toPtr(), Size); + } + + auto DeinitializeActions = shared::runFinalizeActions(AI.Actions); + if (!DeinitializeActions) + return OnInitialized(DeinitializeActions.takeError()); + + { + std::lock_guard Lock(Mutex); + Allocations[MinAddr].DeinitializationActions = + std::move(*DeinitializeActions); + Reservations[AI.MappingBase.toPtr()].Allocations.push_back(MinAddr); + } + + OnInitialized(MinAddr); +} + +void InProcessMemoryMapper::deinitialize( + ArrayRef Bases, + MemoryMapper::OnDeinitializedFunction OnDeinitialized) { + Error AllErr = Error::success(); + + { + std::lock_guard Lock(Mutex); + + for (auto Base : Bases) { + + if (Error Err = shared::runDeallocActions( + Allocations[Base].DeinitializationActions)) { + AllErr = joinErrors(std::move(AllErr), std::move(Err)); + } + + Allocations.erase(Base); + } + } + + OnDeinitialized(std::move(AllErr)); +} + +void InProcessMemoryMapper::release(ArrayRef Bases, + OnReleasedFunction OnReleased) { + Error Err = Error::success(); + + for (auto Base : Bases) { + std::vector AllocAddrs; + size_t Size; + { + std::lock_guard Lock(Mutex); + auto &R = Reservations[Base.toPtr()]; + Size = R.Size; + AllocAddrs.swap(R.Allocations); + } + + // deinitialize sub allocations + std::promise P; + auto F = P.get_future(); + deinitialize(AllocAddrs, [&](Error Err) { P.set_value(std::move(Err)); }); + if (Error E = F.get()) { + Err = joinErrors(std::move(Err), std::move(E)); + } + + // free the memory + auto MB = sys::MemoryBlock(Base.toPtr(), Size); + + auto EC = sys::Memory::releaseMappedMemory(MB); + if (EC) { + Err = joinErrors(std::move(Err), errorCodeToError(EC)); + } + + std::lock_guard Lock(Mutex); + Reservations.erase(Base.toPtr()); + } + + OnReleased(std::move(Err)); +} + +InProcessMemoryMapper::~InProcessMemoryMapper() { + std::vector ReservationAddrs; + { + std::lock_guard Lock(Mutex); + + ReservationAddrs.reserve(Reservations.size()); + for (const auto &R : Reservations) { + ReservationAddrs.push_back(ExecutorAddr::fromPtr(R.getFirst())); + } + } + + std::promise P; + auto F = P.get_future(); + release(ReservationAddrs, [&](Error Err) { P.set_value(std::move(Err)); }); + cantFail(F.get()); +} + +} // namespace orc + +} // namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp index c1ad569dd65d..394a555e453b 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp @@ -63,7 +63,6 @@ getMachOObjectFileSymbolInfo(ExecutionSession &ES, auto Name = Sym.getName(); if (!Name) return Name.takeError(); - auto InternedName = ES.intern(*Name); auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym); if (!SymFlags) return SymFlags.takeError(); @@ -72,7 +71,7 @@ getMachOObjectFileSymbolInfo(ExecutionSession &ES, if (Name->startswith("l")) *SymFlags &= ~JITSymbolFlags::Exported; - I.SymbolFlags[InternedName] = std::move(*SymFlags); + I.SymbolFlags[ES.intern(*Name)] = std::move(*SymFlags); } for (auto &Sec : Obj.sections()) { @@ -121,7 +120,7 @@ getELFObjectFileSymbolInfo(ExecutionSession &ES, auto Name = Sym.getName(); if (!Name) return Name.takeError(); - auto InternedName = ES.intern(*Name); + auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym); if (!SymFlags) return SymFlags.takeError(); @@ -130,7 +129,7 @@ getELFObjectFileSymbolInfo(ExecutionSession &ES, if (Sym.getBinding() == ELF::STB_GNU_UNIQUE) *SymFlags |= JITSymbolFlags::Weak; - I.SymbolFlags[InternedName] = std::move(*SymFlags); + I.SymbolFlags[ES.intern(*Name)] = std::move(*SymFlags); } SymbolStringPtr InitSymbol; @@ -175,12 +174,12 @@ getGenericObjectFileSymbolInfo(ExecutionSession &ES, auto Name = Sym.getName(); if (!Name) return Name.takeError(); - auto InternedName = ES.intern(*Name); + auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym); if (!SymFlags) return SymFlags.takeError(); - I.SymbolFlags[InternedName] = std::move(*SymFlags); + I.SymbolFlags[ES.intern(*Name)] = std::move(*SymFlags); } return I; diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index 32c5998a789b..5ddb35cbafd5 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -78,9 +78,12 @@ private: } static bool hasELFInitSection(LinkGraph &G) { - for (auto &Sec : G.sections()) - if (Sec.getName() == ".init_array") + for (auto &Sec : G.sections()) { + auto SecName = Sec.getName(); + if (SecName.consume_front(".init_array") && + (SecName.empty() || SecName[0] == '.')) return true; + } return false; } @@ -226,12 +229,13 @@ public: } for (auto *Sym : G.absolute_symbols()) - if (Sym->hasName()) { + if (Sym->hasName() && Sym->getScope() != Scope::Local) { auto InternedName = ES.intern(Sym->getName()); JITSymbolFlags Flags; - Flags |= JITSymbolFlags::Absolute; if (Sym->isCallable()) Flags |= JITSymbolFlags::Callable; + if (Sym->getScope() == Scope::Default) + Flags |= JITSymbolFlags::Exported; if (Sym->getLinkage() == Linkage::Weak) Flags |= JITSymbolFlags::Weak; InternedResult[InternedName] = @@ -607,7 +611,7 @@ private: DenseMap InternalNamedSymbolDeps; }; -ObjectLinkingLayer::Plugin::~Plugin() {} +ObjectLinkingLayer::Plugin::~Plugin() = default; char ObjectLinkingLayer::ID; diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp index 18b3c5e12b1c..ef764a3f0d7f 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp @@ -906,5 +906,176 @@ void OrcMips64::writeIndirectStubsBlock( Stub[8 * I + 7] = 0x00000000; // nop } } + +void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) { + + const uint32_t ResolverCode[] = { + 0xef810113, // 0x00: addi sp,sp,-264 + 0x00813023, // 0x04: sd s0,0(sp) + 0x00913423, // 0x08: sd s1,8(sp) + 0x01213823, // 0x0c: sd s2,16(sp) + 0x01313c23, // 0x10: sd s3,24(sp) + 0x03413023, // 0x14: sd s4,32(sp) + 0x03513423, // 0x18: sd s5,40(sp) + 0x03613823, // 0x1c: sd s6,48(sp) + 0x03713c23, // 0x20: sd s7,56(sp) + 0x05813023, // 0x24: sd s8,64(sp) + 0x05913423, // 0x28: sd s9,72(sp) + 0x05a13823, // 0x2c: sd s10,80(sp) + 0x05b13c23, // 0x30: sd s11,88(sp) + 0x06113023, // 0x34: sd ra,96(sp) + 0x06a13423, // 0x38: sd a0,104(sp) + 0x06b13823, // 0x3c: sd a1,112(sp) + 0x06c13c23, // 0x40: sd a2,120(sp) + 0x08d13023, // 0x44: sd a3,128(sp) + 0x08e13423, // 0x48: sd a4,136(sp) + 0x08f13823, // 0x4c: sd a5,144(sp) + 0x09013c23, // 0x50: sd a6,152(sp) + 0x0b113023, // 0x54: sd a7,160(sp) + 0x0a813427, // 0x58: fsd fs0,168(sp) + 0x0a913827, // 0x5c: fsd fs1,176(sp) + 0x0b213c27, // 0x60: fsd fs2,184(sp) + 0x0d313027, // 0x64: fsd fs3,192(sp) + 0x0d413427, // 0x68: fsd fs4,200(sp) + 0x0d513827, // 0x6c: fsd fs5,208(sp) + 0x0d613c27, // 0x70: fsd fs6,216(sp) + 0x0f713027, // 0x74: fsd fs7,224(sp) + 0x0f813427, // 0x78: fsd fs8,232(sp) + 0x0f913827, // 0x7c: fsd fs9,240(sp) + 0x0fa13c27, // 0x80: fsd fs10,248(sp) + 0x11b13027, // 0x84: fsd fs11,256(sp) + 0x00000517, // 0x88: auipc a0,0x0 + 0x0b053503, // 0x8c: ld a0,176(a0) # 0x138 + 0x00030593, // 0x90: mv a1,t1 + 0xff458593, // 0x94: addi a1,a1,-12 + 0x00000617, // 0x98: auipc a2,0x0 + 0x0a863603, // 0x9c: ld a2,168(a2) # 0x140 + 0x000600e7, // 0xa0: jalr a2 + 0x00050293, // 0xa4: mv t0,a0 + 0x00013403, // 0xa8: ld s0,0(sp) + 0x00813483, // 0xac: ld s1,8(sp) + 0x01013903, // 0xb0: ld s2,16(sp) + 0x01813983, // 0xb4: ld s3,24(sp) + 0x02013a03, // 0xb8: ld s4,32(sp) + 0x02813a83, // 0xbc: ld s5,40(sp) + 0x03013b03, // 0xc0: ld s6,48(sp) + 0x03813b83, // 0xc4: ld s7,56(sp) + 0x04013c03, // 0xc8: ld s8,64(sp) + 0x04813c83, // 0xcc: ld s9,72(sp) + 0x05013d03, // 0xd0: ld s10,80(sp) + 0x05813d83, // 0xd4: ld s11,88(sp) + 0x06013083, // 0xd8: ld ra,96(sp) + 0x06813503, // 0xdc: ld a0,104(sp) + 0x07013583, // 0xe0: ld a1,112(sp) + 0x07813603, // 0xe4: ld a2,120(sp) + 0x08013683, // 0xe8: ld a3,128(sp) + 0x08813703, // 0xec: ld a4,136(sp) + 0x09013783, // 0xf0: ld a5,144(sp) + 0x09813803, // 0xf4: ld a6,152(sp) + 0x0a013883, // 0xf8: ld a7,160(sp) + 0x0a813407, // 0xfc: fld fs0,168(sp) + 0x0b013487, // 0x100: fld fs1,176(sp) + 0x0b813907, // 0x104: fld fs2,184(sp) + 0x0c013987, // 0x108: fld fs3,192(sp) + 0x0c813a07, // 0x10c: fld fs4,200(sp) + 0x0d013a87, // 0x110: fld fs5,208(sp) + 0x0d813b07, // 0x114: fld fs6,216(sp) + 0x0e013b87, // 0x118: fld fs7,224(sp) + 0x0e813c07, // 0x11c: fld fs8,232(sp) + 0x0f013c87, // 0x120: fld fs9,240(sp) + 0x0f813d07, // 0x124: fld fs10,248(sp) + 0x10013d87, // 0x128: fld fs11,256(sp) + 0x10810113, // 0x12c: addi sp,sp,264 + 0x00028067, // 0x130: jr t0 + 0x12345678, // 0x134: padding to align at 8 byte + 0x12345678, // 0x138: Lreentry_ctx_ptr: + 0xdeadbeef, // 0x13c: .quad 0 + 0x98765432, // 0x140: Lreentry_fn_ptr: + 0xcafef00d // 0x144: .quad 0 + }; + + const unsigned ReentryCtxAddrOffset = 0x138; + const unsigned ReentryFnAddrOffset = 0x140; + + memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr, + sizeof(uint64_t)); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr, + sizeof(uint64_t)); +} + +void OrcRiscv64::writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverAddr, + unsigned NumTrampolines) { + + unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8); + + memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr, + sizeof(uint64_t)); + + uint32_t *Trampolines = + reinterpret_cast(TrampolineBlockWorkingMem); + for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { + uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xFFFFF000; + uint32_t Lo12 = OffsetToPtr - Hi20; + Trampolines[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr) + Trampolines[4 * I + 1] = + 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr) + Trampolines[4 * I + 2] = 0x00028367; // jalr t1, t0 + Trampolines[4 * I + 3] = 0xdeadface; // padding + } +} + +void OrcRiscv64::writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { + // Stub format is: + // + // .section __orc_stubs + // stub1: + // auipc t0, %hi(ptr1) ; PC-rel load of ptr1 + // ld t0, %lo(t0) + // jr t0 ; Jump to resolver + // .quad 0 ; Pad to 16 bytes + // stub2: + // auipc t0, %hi(ptr1) ; PC-rel load of ptr1 + // ld t0, %lo(t0) + // jr t0 ; Jump to resolver + // .quad 0 + // + // ... + // + // .section __orc_ptrs + // ptr1: + // .quad 0x0 + // ptr2: + // .quad 0x0 + // + // ... + + assert(stubAndPointerRangesOk( + StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && + "PointersBlock is out of range"); + + uint32_t *Stub = reinterpret_cast(StubsBlockWorkingMem); + + for (unsigned I = 0; I < NumStubs; ++I) { + uint64_t PtrDisplacement = + PointersBlockTargetAddress - StubsBlockTargetAddress; + uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xFFFFF000; + uint32_t Lo12 = PtrDisplacement - Hi20; + Stub[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr) + Stub[4 * I + 1] = 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr) + Stub[4 * I + 2] = 0x00028067; // jr t0 + Stub[4 * I + 3] = 0xfeedbeef; // padding + PointersBlockTargetAddress += PointerSize; + StubsBlockTargetAddress += StubSize; + } +} + } // End namespace orc. } // End namespace llvm. diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp index 71be8dfdc004..b7eab6b85ecf 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp @@ -106,82 +106,6 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLJITBuilder, LLVMOrcLLJITBuilderRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLJIT, LLVMOrcLLJITRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef) -namespace llvm { -namespace orc { - -class CAPIDefinitionGenerator final : public DefinitionGenerator { -public: - CAPIDefinitionGenerator( - void *Ctx, - LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction TryToGenerate) - : Ctx(Ctx), TryToGenerate(TryToGenerate) {} - - Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD, - JITDylibLookupFlags JDLookupFlags, - const SymbolLookupSet &LookupSet) override { - - // Take the lookup state. - LLVMOrcLookupStateRef LSR = ::wrap(OrcV2CAPIHelper::extractLookupState(LS)); - - // Translate the lookup kind. - LLVMOrcLookupKind CLookupKind; - switch (K) { - case LookupKind::Static: - CLookupKind = LLVMOrcLookupKindStatic; - break; - case LookupKind::DLSym: - CLookupKind = LLVMOrcLookupKindDLSym; - break; - } - - // Translate the JITDylibSearchFlags. - LLVMOrcJITDylibLookupFlags CJDLookupFlags; - switch (JDLookupFlags) { - case JITDylibLookupFlags::MatchExportedSymbolsOnly: - CJDLookupFlags = LLVMOrcJITDylibLookupFlagsMatchExportedSymbolsOnly; - break; - case JITDylibLookupFlags::MatchAllSymbols: - CJDLookupFlags = LLVMOrcJITDylibLookupFlagsMatchAllSymbols; - break; - } - - // Translate the lookup set. - std::vector CLookupSet; - CLookupSet.reserve(LookupSet.size()); - for (auto &KV : LookupSet) { - LLVMOrcSymbolLookupFlags SLF; - LLVMOrcSymbolStringPoolEntryRef Name = - ::wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(KV.first)); - switch (KV.second) { - case SymbolLookupFlags::RequiredSymbol: - SLF = LLVMOrcSymbolLookupFlagsRequiredSymbol; - break; - case SymbolLookupFlags::WeaklyReferencedSymbol: - SLF = LLVMOrcSymbolLookupFlagsWeaklyReferencedSymbol; - break; - } - CLookupSet.push_back({Name, SLF}); - } - - // Run the C TryToGenerate function. - auto Err = unwrap(TryToGenerate(::wrap(this), Ctx, &LSR, CLookupKind, - ::wrap(&JD), CJDLookupFlags, - CLookupSet.data(), CLookupSet.size())); - - // Restore the lookup state. - OrcV2CAPIHelper::resetLookupState(LS, ::unwrap(LSR)); - - return Err; - } - -private: - void *Ctx; - LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction TryToGenerate; -}; - -} // end namespace orc -} // end namespace llvm - namespace { class OrcCAPIMaterializationUnit : public llvm::orc::MaterializationUnit { @@ -282,8 +206,134 @@ toSymbolDependenceMap(LLVMOrcCDependenceMapPairs Pairs, size_t NumPairs) { return SDM; } +static LookupKind toLookupKind(LLVMOrcLookupKind K) { + switch (K) { + case LLVMOrcLookupKindStatic: + return LookupKind::Static; + case LLVMOrcLookupKindDLSym: + return LookupKind::DLSym; + } + llvm_unreachable("unrecognized LLVMOrcLookupKind value"); +} + +static LLVMOrcLookupKind fromLookupKind(LookupKind K) { + switch (K) { + case LookupKind::Static: + return LLVMOrcLookupKindStatic; + case LookupKind::DLSym: + return LLVMOrcLookupKindDLSym; + } + llvm_unreachable("unrecognized LookupKind value"); +} + +static JITDylibLookupFlags +toJITDylibLookupFlags(LLVMOrcJITDylibLookupFlags LF) { + switch (LF) { + case LLVMOrcJITDylibLookupFlagsMatchExportedSymbolsOnly: + return JITDylibLookupFlags::MatchExportedSymbolsOnly; + case LLVMOrcJITDylibLookupFlagsMatchAllSymbols: + return JITDylibLookupFlags::MatchAllSymbols; + } + llvm_unreachable("unrecognized LLVMOrcJITDylibLookupFlags value"); +} + +static LLVMOrcJITDylibLookupFlags +fromJITDylibLookupFlags(JITDylibLookupFlags LF) { + switch (LF) { + case JITDylibLookupFlags::MatchExportedSymbolsOnly: + return LLVMOrcJITDylibLookupFlagsMatchExportedSymbolsOnly; + case JITDylibLookupFlags::MatchAllSymbols: + return LLVMOrcJITDylibLookupFlagsMatchAllSymbols; + } + llvm_unreachable("unrecognized JITDylibLookupFlags value"); +} + +static SymbolLookupFlags toSymbolLookupFlags(LLVMOrcSymbolLookupFlags SLF) { + switch (SLF) { + case LLVMOrcSymbolLookupFlagsRequiredSymbol: + return SymbolLookupFlags::RequiredSymbol; + case LLVMOrcSymbolLookupFlagsWeaklyReferencedSymbol: + return SymbolLookupFlags::WeaklyReferencedSymbol; + } + llvm_unreachable("unrecognized LLVMOrcSymbolLookupFlags value"); +} + +static LLVMOrcSymbolLookupFlags fromSymbolLookupFlags(SymbolLookupFlags SLF) { + switch (SLF) { + case SymbolLookupFlags::RequiredSymbol: + return LLVMOrcSymbolLookupFlagsRequiredSymbol; + case SymbolLookupFlags::WeaklyReferencedSymbol: + return LLVMOrcSymbolLookupFlagsWeaklyReferencedSymbol; + } + llvm_unreachable("unrecognized SymbolLookupFlags value"); +} + +static LLVMJITEvaluatedSymbol +fromJITEvaluatedSymbol(const JITEvaluatedSymbol &S) { + return {S.getAddress(), fromJITSymbolFlags(S.getFlags())}; +} + } // end anonymous namespace +namespace llvm { +namespace orc { + +class CAPIDefinitionGenerator final : public DefinitionGenerator { +public: + CAPIDefinitionGenerator( + LLVMOrcDisposeCAPIDefinitionGeneratorFunction Dispose, void *Ctx, + LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction TryToGenerate) + : Dispose(Dispose), Ctx(Ctx), TryToGenerate(TryToGenerate) {} + + ~CAPIDefinitionGenerator() { + if (Dispose) + Dispose(Ctx); + } + + Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &LookupSet) override { + + // Take the lookup state. + LLVMOrcLookupStateRef LSR = ::wrap(OrcV2CAPIHelper::extractLookupState(LS)); + + // Translate the lookup kind. + LLVMOrcLookupKind CLookupKind = fromLookupKind(K); + + // Translate the JITDylibLookupFlags. + LLVMOrcJITDylibLookupFlags CJDLookupFlags = + fromJITDylibLookupFlags(JDLookupFlags); + + // Translate the lookup set. + std::vector CLookupSet; + CLookupSet.reserve(LookupSet.size()); + for (auto &KV : LookupSet) { + LLVMOrcSymbolStringPoolEntryRef Name = + ::wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(KV.first)); + LLVMOrcSymbolLookupFlags SLF = fromSymbolLookupFlags(KV.second); + CLookupSet.push_back({Name, SLF}); + } + + // Run the C TryToGenerate function. + auto Err = unwrap(TryToGenerate(::wrap(this), Ctx, &LSR, CLookupKind, + ::wrap(&JD), CJDLookupFlags, + CLookupSet.data(), CLookupSet.size())); + + // Restore the lookup state. + OrcV2CAPIHelper::resetLookupState(LS, ::unwrap(LSR)); + + return Err; + } + +private: + LLVMOrcDisposeCAPIDefinitionGeneratorFunction Dispose; + void *Ctx; + LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction TryToGenerate; +}; + +} // end namespace orc +} // end namespace llvm + void LLVMOrcExecutionSessionSetErrorReporter( LLVMOrcExecutionSessionRef ES, LLVMOrcErrorReporterFunction ReportError, void *Ctx) { @@ -307,6 +357,42 @@ LLVMOrcExecutionSessionIntern(LLVMOrcExecutionSessionRef ES, const char *Name) { OrcV2CAPIHelper::moveFromSymbolStringPtr(unwrap(ES)->intern(Name))); } +void LLVMOrcExecutionSessionLookup( + LLVMOrcExecutionSessionRef ES, LLVMOrcLookupKind K, + LLVMOrcCJITDylibSearchOrder SearchOrder, size_t SearchOrderSize, + LLVMOrcCLookupSet Symbols, size_t SymbolsSize, + LLVMOrcExecutionSessionLookupHandleResultFunction HandleResult, void *Ctx) { + assert(ES && "ES cannot be null"); + assert(SearchOrder && "SearchOrder cannot be null"); + assert(Symbols && "Symbols cannot be null"); + assert(HandleResult && "HandleResult cannot be null"); + + JITDylibSearchOrder SO; + for (size_t I = 0; I != SearchOrderSize; ++I) + SO.push_back({unwrap(SearchOrder[I].JD), + toJITDylibLookupFlags(SearchOrder[I].JDLookupFlags)}); + + SymbolLookupSet SLS; + for (size_t I = 0; I != SymbolsSize; ++I) + SLS.add(OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(Symbols[I].Name)), + toSymbolLookupFlags(Symbols[I].LookupFlags)); + + unwrap(ES)->lookup( + toLookupKind(K), SO, std::move(SLS), SymbolState::Ready, + [HandleResult, Ctx](Expected Result) { + if (Result) { + SmallVector CResult; + for (auto &KV : *Result) + CResult.push_back(LLVMOrcCSymbolMapPair{ + wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(KV.first)), + fromJITEvaluatedSymbol(KV.second)}); + HandleResult(LLVMErrorSuccess, CResult.data(), CResult.size(), Ctx); + } else + HandleResult(wrap(Result.takeError()), nullptr, 0, Ctx); + }, + NoDependenciesToRegister); +} + void LLVMOrcRetainSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S) { OrcV2CAPIHelper::retainPoolEntry(unwrap(S)); } @@ -589,11 +675,19 @@ void LLVMOrcJITDylibAddGenerator(LLVMOrcJITDylibRef JD, } LLVMOrcDefinitionGeneratorRef LLVMOrcCreateCustomCAPIDefinitionGenerator( - LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction F, void *Ctx) { - auto DG = std::make_unique(Ctx, F); + LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction F, void *Ctx, + LLVMOrcDisposeCAPIDefinitionGeneratorFunction Dispose) { + auto DG = std::make_unique(Dispose, Ctx, F); return wrap(DG.release()); } +void LLVMOrcLookupStateContinueLookup(LLVMOrcLookupStateRef S, + LLVMErrorRef Err) { + LookupState LS; + OrcV2CAPIHelper::resetLookupState(LS, ::unwrap(S)); + LS.continueLookup(unwrap(Err)); +} + LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess( LLVMOrcDefinitionGeneratorRef *Result, char GlobalPrefix, LLVMOrcSymbolPredicate Filter, void *FilterCtx) { @@ -951,7 +1045,7 @@ LLVMErrorRef LLVMOrcLLJITLookup(LLVMOrcLLJITRef J, return wrap(Sym.takeError()); } - *Result = Sym->getAddress(); + *Result = Sym->getValue(); return LLVMErrorSuccess; } diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp index 64fc717b7b56..2bb204e688fc 100644 --- a/llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp @@ -43,8 +43,8 @@ const char *DispatchFnName = "__llvm_orc_SimpleRemoteEPC_dispatch_fn"; } // end namespace SimpleRemoteEPCDefaultBootstrapSymbolNames -SimpleRemoteEPCTransportClient::~SimpleRemoteEPCTransportClient() {} -SimpleRemoteEPCTransport::~SimpleRemoteEPCTransport() {} +SimpleRemoteEPCTransportClient::~SimpleRemoteEPCTransportClient() = default; +SimpleRemoteEPCTransport::~SimpleRemoteEPCTransport() = default; Expected> FDSimpleRemoteEPCTransport::Create(SimpleRemoteEPCTransportClient &C, int InFD, diff --git a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp index 0b4755fe23cf..b52d01318c0d 100644 --- a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp @@ -85,7 +85,7 @@ void IRSpeculationLayer::emit(std::unique_ptr R, auto IRNames = QueryAnalysis(Fn); // Instrument and register if Query has result - if (IRNames.hasValue()) { + if (IRNames) { // Emit globals for each function. auto LoadValueTy = Type::getInt8Ty(MContext); @@ -126,7 +126,7 @@ void IRSpeculationLayer::emit(std::unique_ptr R, assert(Mutator.GetInsertBlock()->getParent() == &Fn && "IR builder association mismatch?"); - S.registerSymbols(internToJITSymbols(IRNames.getValue()), + S.registerSymbols(internToJITSymbols(*IRNames), &R->getTargetJITDylib()); } } diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp index b6b21bde1182..8ab0af3eab6e 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp @@ -22,9 +22,9 @@ using namespace llvm::orc::shared; namespace llvm { namespace orc { -ExecutorBootstrapService::~ExecutorBootstrapService() {} +ExecutorBootstrapService::~ExecutorBootstrapService() = default; -SimpleRemoteEPCServer::Dispatcher::~Dispatcher() {} +SimpleRemoteEPCServer::Dispatcher::~Dispatcher() = default; #if LLVM_ENABLE_THREADS void SimpleRemoteEPCServer::ThreadDispatcher::dispatch( diff --git a/llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp b/llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp index 111c84ec87ed..11a99986f2ee 100644 --- a/llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp @@ -16,7 +16,7 @@ char GenericNamedTask::ID = 0; const char *GenericNamedTask::DefaultDescription = "Generic Task"; void Task::anchor() {} -TaskDispatcher::~TaskDispatcher() {} +TaskDispatcher::~TaskDispatcher() = default; void InPlaceTaskDispatcher::dispatch(std::unique_ptr T) { T->run(); } diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp index 9c8d402364c6..bc42eebf3fec 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp @@ -29,7 +29,7 @@ namespace llvm { -RTDyldMemoryManager::~RTDyldMemoryManager() {} +RTDyldMemoryManager::~RTDyldMemoryManager() = default; #if defined(HAVE_REGISTER_FRAME) && defined(HAVE_DEREGISTER_FRAME) && \ !defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__) @@ -95,18 +95,16 @@ void RTDyldMemoryManager::registerEHFramesInProcess(uint8_t *Addr, // and projects/libunwind/src/UnwindLevel1-gcc-ext.c. const char *P = (const char *)Addr; const char *End = P + Size; - do { + while (P != End) P = processFDE(P, false); - } while(P != End); } void RTDyldMemoryManager::deregisterEHFramesInProcess(uint8_t *Addr, size_t Size) { const char *P = (const char *)Addr; const char *End = P + Size; - do { + while (P != End) P = processFDE(P, true); - } while(P != End); } #else diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 3f38d26869d4..2e0cba849165 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -66,7 +66,7 @@ std::error_code RuntimeDyldError::convertToErrorCode() const { } // Empty out-of-line virtual destructor as the key function. -RuntimeDyldImpl::~RuntimeDyldImpl() {} +RuntimeDyldImpl::~RuntimeDyldImpl() = default; // Pin LoadedObjectInfo's vtables to this file. void RuntimeDyld::LoadedObjectInfo::anchor() {} @@ -1311,7 +1311,7 @@ RuntimeDyld::RuntimeDyld(RuntimeDyld::MemoryManager &MemMgr, ProcessAllSections = false; } -RuntimeDyld::~RuntimeDyld() {} +RuntimeDyld::~RuntimeDyld() = default; static std::unique_ptr createRuntimeDyldCOFF( diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index 33db23408cf2..ae1bb5a1da4b 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -15,6 +15,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MSVCErrorWorkarounds.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include #include @@ -892,7 +893,7 @@ RuntimeDyldChecker::RuntimeDyldChecker( std::move(GetGOTInfo), Endianness, Disassembler, InstPrinter, ErrStream)) {} -RuntimeDyldChecker::~RuntimeDyldChecker() {} +RuntimeDyldChecker::~RuntimeDyldChecker() = default; bool RuntimeDyldChecker::check(StringRef CheckExpr) const { return Impl->check(CheckExpr); diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index f92618afdff6..da1102fc9f07 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -216,7 +216,7 @@ namespace llvm { RuntimeDyldELF::RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver) : RuntimeDyldImpl(MemMgr, Resolver), GOTSectionID(0), CurrentGOTIndex(0) {} -RuntimeDyldELF::~RuntimeDyldELF() {} +RuntimeDyldELF::~RuntimeDyldELF() = default; void RuntimeDyldELF::registerEHFrames() { for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) { @@ -446,6 +446,13 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section, write(isBE, TargetPtr, static_cast(Result)); break; } + case ELF::R_AARCH64_PREL16: { + uint64_t Result = Value + Addend - FinalAddress; + assert(static_cast(Result) >= INT16_MIN && + static_cast(Result) <= UINT16_MAX); + write(isBE, TargetPtr, static_cast(Result & 0xffffU)); + break; + } case ELF::R_AARCH64_PREL32: { uint64_t Result = Value + Addend - FinalAddress; assert(static_cast(Result) >= INT32_MIN && diff --git a/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp b/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp index 56b232b9dbcd..b23e33039c35 100644 --- a/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp @@ -238,7 +238,7 @@ SectionMemoryManager::~SectionMemoryManager() { } } -SectionMemoryManager::MemoryMapper::~MemoryMapper() {} +SectionMemoryManager::MemoryMapper::~MemoryMapper() = default; void SectionMemoryManager::anchor() {} diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp index 6186af444e73..bf13b6c325ec 100644 --- a/llvm/lib/FileCheck/FileCheck.cpp +++ b/llvm/lib/FileCheck/FileCheck.cpp @@ -1651,6 +1651,8 @@ std::string Check::FileCheckType::getDescription(StringRef Prefix) const { switch (Kind) { case Check::CheckNone: return "invalid"; + case Check::CheckMisspelled: + return "misspelled"; case Check::CheckPlain: if (Count > 1) return WithModifiers("-COUNT"); @@ -1680,7 +1682,8 @@ std::string Check::FileCheckType::getDescription(StringRef Prefix) const { } static std::pair -FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { +FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix, + bool &Misspelled) { if (Buffer.size() <= Prefix.size()) return {Check::CheckNone, StringRef()}; @@ -1722,7 +1725,9 @@ FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { if (Rest.front() == '{') return ConsumeModifiers(Check::CheckPlain); - if (!Rest.consume_front("-")) + if (Rest.consume_front("_")) + Misspelled = true; + else if (!Rest.consume_front("-")) return {Check::CheckNone, StringRef()}; if (Rest.consume_front("COUNT-")) { @@ -1766,6 +1771,15 @@ FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { return {Check::CheckNone, Rest}; } +static std::pair +FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { + bool Misspelled = false; + auto Res = FindCheckType(Req, Buffer, Prefix, Misspelled); + if (Res.first != Check::CheckNone && Misspelled) + return {Check::CheckMisspelled, Res.second}; + return Res; +} + // From the given position, find the next character after the word. static size_t SkipWord(StringRef Str, size_t Loc) { while (Loc < Str.size() && IsPartOfWord(Str[Loc])) @@ -1939,6 +1953,16 @@ bool FileCheck::readCheckFile( Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size()) : AfterSuffix; + // Complain about misspelled directives. + if (CheckTy == Check::CheckMisspelled) { + StringRef UsedDirective(UsedPrefix.data(), + AfterSuffix.data() - UsedPrefix.data()); + SM.PrintMessage(SMLoc::getFromPointer(UsedDirective.data()), + SourceMgr::DK_Error, + "misspelled directive '" + UsedDirective + "'"); + return true; + } + // Complain about useful-looking but unsupported suffixes. if (CheckTy == Check::CheckBadNot) { SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, diff --git a/llvm/lib/Frontend/OpenMP/OMPContext.cpp b/llvm/lib/Frontend/OpenMP/OMPContext.cpp index 11d8da097c6c..6e8856f481af 100644 --- a/llvm/lib/Frontend/OpenMP/OMPContext.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPContext.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Frontend/OpenMP/OMPContext.h" -#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" @@ -214,7 +213,7 @@ static int isVariantApplicableInContextHelper( }); Optional Result = HandleTrait(Property, IsActiveTrait); - if (Result.hasValue()) + if (Result) return Result.getValue(); } @@ -235,7 +234,7 @@ static int isVariantApplicableInContextHelper( ConstructMatches->push_back(ConstructIdx - 1); Optional Result = HandleTrait(Property, FoundInOrder); - if (Result.hasValue()) + if (Result) return Result.getValue(); if (!FoundInOrder) { diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 99001269e1f8..9b08a24e14d4 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -15,15 +15,15 @@ #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Triple.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" @@ -31,17 +31,14 @@ #include "llvm/IR/Value.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Error.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CodeExtractor.h" #include "llvm/Transforms/Utils/LoopPeel.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include -#include #define DEBUG_TYPE "openmp-ir-builder" @@ -72,8 +69,263 @@ static bool isConflictIP(IRBuilder<>::InsertPoint IP1, return false; return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint(); } + +static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType) { + // Valid ordered/unordered and base algorithm combinations. + switch (SchedType & ~OMPScheduleType::MonotonicityMask) { + case OMPScheduleType::UnorderedStaticChunked: + case OMPScheduleType::UnorderedStatic: + case OMPScheduleType::UnorderedDynamicChunked: + case OMPScheduleType::UnorderedGuidedChunked: + case OMPScheduleType::UnorderedRuntime: + case OMPScheduleType::UnorderedAuto: + case OMPScheduleType::UnorderedTrapezoidal: + case OMPScheduleType::UnorderedGreedy: + case OMPScheduleType::UnorderedBalanced: + case OMPScheduleType::UnorderedGuidedIterativeChunked: + case OMPScheduleType::UnorderedGuidedAnalyticalChunked: + case OMPScheduleType::UnorderedSteal: + case OMPScheduleType::UnorderedStaticBalancedChunked: + case OMPScheduleType::UnorderedGuidedSimd: + case OMPScheduleType::UnorderedRuntimeSimd: + case OMPScheduleType::OrderedStaticChunked: + case OMPScheduleType::OrderedStatic: + case OMPScheduleType::OrderedDynamicChunked: + case OMPScheduleType::OrderedGuidedChunked: + case OMPScheduleType::OrderedRuntime: + case OMPScheduleType::OrderedAuto: + case OMPScheduleType::OrderdTrapezoidal: + case OMPScheduleType::NomergeUnorderedStaticChunked: + case OMPScheduleType::NomergeUnorderedStatic: + case OMPScheduleType::NomergeUnorderedDynamicChunked: + case OMPScheduleType::NomergeUnorderedGuidedChunked: + case OMPScheduleType::NomergeUnorderedRuntime: + case OMPScheduleType::NomergeUnorderedAuto: + case OMPScheduleType::NomergeUnorderedTrapezoidal: + case OMPScheduleType::NomergeUnorderedGreedy: + case OMPScheduleType::NomergeUnorderedBalanced: + case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked: + case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked: + case OMPScheduleType::NomergeUnorderedSteal: + case OMPScheduleType::NomergeOrderedStaticChunked: + case OMPScheduleType::NomergeOrderedStatic: + case OMPScheduleType::NomergeOrderedDynamicChunked: + case OMPScheduleType::NomergeOrderedGuidedChunked: + case OMPScheduleType::NomergeOrderedRuntime: + case OMPScheduleType::NomergeOrderedAuto: + case OMPScheduleType::NomergeOrderedTrapezoidal: + break; + default: + return false; + } + + // Must not set both monotonicity modifiers at the same time. + OMPScheduleType MonotonicityFlags = + SchedType & OMPScheduleType::MonotonicityMask; + if (MonotonicityFlags == OMPScheduleType::MonotonicityMask) + return false; + + return true; +} #endif +/// Determine which scheduling algorithm to use, determined from schedule clause +/// arguments. +static OMPScheduleType +getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, + bool HasSimdModifier) { + // Currently, the default schedule it static. + switch (ClauseKind) { + case OMP_SCHEDULE_Default: + case OMP_SCHEDULE_Static: + return HasChunks ? OMPScheduleType::BaseStaticChunked + : OMPScheduleType::BaseStatic; + case OMP_SCHEDULE_Dynamic: + return OMPScheduleType::BaseDynamicChunked; + case OMP_SCHEDULE_Guided: + return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd + : OMPScheduleType::BaseGuidedChunked; + case OMP_SCHEDULE_Auto: + return llvm::omp::OMPScheduleType::BaseAuto; + case OMP_SCHEDULE_Runtime: + return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd + : OMPScheduleType::BaseRuntime; + } + llvm_unreachable("unhandled schedule clause argument"); +} + +/// Adds ordering modifier flags to schedule type. +static OMPScheduleType +getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, + bool HasOrderedClause) { + assert((BaseScheduleType & OMPScheduleType::ModifierMask) == + OMPScheduleType::None && + "Must not have ordering nor monotonicity flags already set"); + + OMPScheduleType OrderingModifier = HasOrderedClause + ? OMPScheduleType::ModifierOrdered + : OMPScheduleType::ModifierUnordered; + OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier; + + // Unsupported combinations + if (OrderingScheduleType == + (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered)) + return OMPScheduleType::OrderedGuidedChunked; + else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd | + OMPScheduleType::ModifierOrdered)) + return OMPScheduleType::OrderedRuntime; + + return OrderingScheduleType; +} + +/// Adds monotonicity modifier flags to schedule type. +static OMPScheduleType +getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, + bool HasSimdModifier, bool HasMonotonic, + bool HasNonmonotonic, bool HasOrderedClause) { + assert((ScheduleType & OMPScheduleType::MonotonicityMask) == + OMPScheduleType::None && + "Must not have monotonicity flags already set"); + assert((!HasMonotonic || !HasNonmonotonic) && + "Monotonic and Nonmonotonic are contradicting each other"); + + if (HasMonotonic) { + return ScheduleType | OMPScheduleType::ModifierMonotonic; + } else if (HasNonmonotonic) { + return ScheduleType | OMPScheduleType::ModifierNonmonotonic; + } else { + // OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description. + // If the static schedule kind is specified or if the ordered clause is + // specified, and if the nonmonotonic modifier is not specified, the + // effect is as if the monotonic modifier is specified. Otherwise, unless + // the monotonic modifier is specified, the effect is as if the + // nonmonotonic modifier is specified. + OMPScheduleType BaseScheduleType = + ScheduleType & ~OMPScheduleType::ModifierMask; + if ((BaseScheduleType == OMPScheduleType::BaseStatic) || + (BaseScheduleType == OMPScheduleType::BaseStaticChunked) || + HasOrderedClause) { + // The monotonic is used by default in openmp runtime library, so no need + // to set it. + return ScheduleType; + } else { + return ScheduleType | OMPScheduleType::ModifierNonmonotonic; + } + } +} + +/// Determine the schedule type using schedule and ordering clause arguments. +static OMPScheduleType +computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, + bool HasSimdModifier, bool HasMonotonicModifier, + bool HasNonmonotonicModifier, bool HasOrderedClause) { + OMPScheduleType BaseSchedule = + getOpenMPBaseScheduleType(ClauseKind, HasChunks, HasSimdModifier); + OMPScheduleType OrderedSchedule = + getOpenMPOrderingScheduleType(BaseSchedule, HasOrderedClause); + OMPScheduleType Result = getOpenMPMonotonicityScheduleType( + OrderedSchedule, HasSimdModifier, HasMonotonicModifier, + HasNonmonotonicModifier, HasOrderedClause); + + assert(isValidWorkshareLoopScheduleType(Result)); + return Result; +} + +/// Make \p Source branch to \p Target. +/// +/// Handles two situations: +/// * \p Source already has an unconditional branch. +/// * \p Source is a degenerate block (no terminator because the BB is +/// the current head of the IR construction). +static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) { + if (Instruction *Term = Source->getTerminator()) { + auto *Br = cast(Term); + assert(!Br->isConditional() && + "BB's terminator must be an unconditional branch (or degenerate)"); + BasicBlock *Succ = Br->getSuccessor(0); + Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true); + Br->setSuccessor(0, Target); + return; + } + + auto *NewBr = BranchInst::Create(Target, Source); + NewBr->setDebugLoc(DL); +} + +void llvm::spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, + bool CreateBranch) { + assert(New->getFirstInsertionPt() == New->begin() && + "Target BB must not have PHI nodes"); + + // Move instructions to new block. + BasicBlock *Old = IP.getBlock(); + New->getInstList().splice(New->begin(), Old->getInstList(), IP.getPoint(), + Old->end()); + + if (CreateBranch) + BranchInst::Create(New, Old); +} + +void llvm::spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch) { + DebugLoc DebugLoc = Builder.getCurrentDebugLocation(); + BasicBlock *Old = Builder.GetInsertBlock(); + + spliceBB(Builder.saveIP(), New, CreateBranch); + if (CreateBranch) + Builder.SetInsertPoint(Old->getTerminator()); + else + Builder.SetInsertPoint(Old); + + // SetInsertPoint also updates the Builder's debug location, but we want to + // keep the one the Builder was configured to use. + Builder.SetCurrentDebugLocation(DebugLoc); +} + +BasicBlock *llvm::splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, + llvm::Twine Name) { + BasicBlock *Old = IP.getBlock(); + BasicBlock *New = BasicBlock::Create( + Old->getContext(), Name.isTriviallyEmpty() ? Old->getName() : Name, + Old->getParent(), Old->getNextNode()); + spliceBB(IP, New, CreateBranch); + New->replaceSuccessorsPhiUsesWith(Old, New); + return New; +} + +BasicBlock *llvm::splitBB(IRBuilderBase &Builder, bool CreateBranch, + llvm::Twine Name) { + DebugLoc DebugLoc = Builder.getCurrentDebugLocation(); + BasicBlock *New = splitBB(Builder.saveIP(), CreateBranch, Name); + if (CreateBranch) + Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator()); + else + Builder.SetInsertPoint(Builder.GetInsertBlock()); + // SetInsertPoint also updates the Builder's debug location, but we want to + // keep the one the Builder was configured to use. + Builder.SetCurrentDebugLocation(DebugLoc); + return New; +} + +BasicBlock *llvm::splitBB(IRBuilder<> &Builder, bool CreateBranch, + llvm::Twine Name) { + DebugLoc DebugLoc = Builder.getCurrentDebugLocation(); + BasicBlock *New = splitBB(Builder.saveIP(), CreateBranch, Name); + if (CreateBranch) + Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator()); + else + Builder.SetInsertPoint(Builder.GetInsertBlock()); + // SetInsertPoint also updates the Builder's debug location, but we want to + // keep the one the Builder was configured to use. + Builder.SetCurrentDebugLocation(DebugLoc); + return New; +} + +BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, + llvm::Twine Suffix) { + BasicBlock *Old = Builder.GetInsertBlock(); + return splitBB(Builder, CreateBranch, Old->getName() + Suffix); +} + void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { LLVMContext &Ctx = Fn.getContext(); @@ -199,6 +451,7 @@ void OpenMPIRBuilder::finalize(Function *Fn) { /* AssumptionCache */ nullptr, /* AllowVarArgs */ true, /* AllowAlloca */ true, + /* AllocaBlock*/ OI.OuterAllocaBB, /* Suffix */ ".omp_par"); LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); @@ -500,6 +753,44 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, return Builder.saveIP(); } +void OpenMPIRBuilder::emitOffloadingEntry(Constant *Addr, StringRef Name, + uint64_t Size, int32_t Flags, + StringRef SectionName) { + Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); + Type *Int32Ty = Type::getInt32Ty(M.getContext()); + Type *SizeTy = M.getDataLayout().getIntPtrType(M.getContext()); + + Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name); + + // Create the constant string used to look up the symbol in the device. + auto *Str = + new llvm::GlobalVariable(M, AddrName->getType(), /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, AddrName, + ".omp_offloading.entry_name"); + Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + + // Construct the offloading entry. + Constant *EntryData[] = { + ConstantExpr::getPointerBitCastOrAddrSpaceCast(Addr, Int8PtrTy), + ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, Int8PtrTy), + ConstantInt::get(SizeTy, Size), + ConstantInt::get(Int32Ty, Flags), + ConstantInt::get(Int32Ty, 0), + }; + Constant *EntryInitializer = + ConstantStruct::get(OpenMPIRBuilder::OffloadEntry, EntryData); + + auto *Entry = new GlobalVariable( + M, OpenMPIRBuilder::OffloadEntry, + /* isConstant = */ true, GlobalValue::WeakAnyLinkage, EntryInitializer, + ".omp_offloading.entry." + Name, nullptr, GlobalValue::NotThreadLocal, + M.getDataLayout().getDefaultGlobalsAddressSpace()); + + // The entry has to be created in the section the linker expects it to be. + Entry->setSection(SectionName); + Entry->setAlignment(Align(1)); +} + void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB) { @@ -670,7 +961,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( // Let the caller create the body. assert(BodyGenCB && "Expected body generation callback!"); InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); - BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB); + BodyGenCB(InnerAllocaIP, CodeGenIP); LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); @@ -777,6 +1068,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); FiniCB(PreFiniIP); + OI.OuterAllocaBB = OuterAllocaBlock; OI.EntryBB = PRegEntryBB; OI.ExitBB = PRegExitBB; @@ -800,6 +1092,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( /* AssumptionCache */ nullptr, /* AllowVarArgs */ true, /* AllowAlloca */ true, + /* AllocationBlock */ OuterAllocaBlock, /* Suffix */ ".omp_par"); // Find inputs to, outputs from the code region. @@ -960,10 +1253,185 @@ void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) { emitTaskyieldImpl(Loc); } +OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::createTask(const LocationDescription &Loc, + InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, + bool Tied, Value *Final) { + if (!updateToLocation(Loc)) + return InsertPointTy(); + + // The current basic block is split into four basic blocks. After outlining, + // they will be mapped as follows: + // ``` + // def current_fn() { + // current_basic_block: + // br label %task.exit + // task.exit: + // ; instructions after task + // } + // def outlined_fn() { + // task.alloca: + // br label %task.body + // task.body: + // ret void + // } + // ``` + BasicBlock *TaskExitBB = splitBB(Builder, /*CreateBranch=*/true, "task.exit"); + BasicBlock *TaskBodyBB = splitBB(Builder, /*CreateBranch=*/true, "task.body"); + BasicBlock *TaskAllocaBB = + splitBB(Builder, /*CreateBranch=*/true, "task.alloca"); + + OutlineInfo OI; + OI.EntryBB = TaskAllocaBB; + OI.OuterAllocaBB = AllocaIP.getBlock(); + OI.ExitBB = TaskExitBB; + OI.PostOutlineCB = [this, &Loc, Tied, Final](Function &OutlinedFn) { + // The input IR here looks like the following- + // ``` + // func @current_fn() { + // outlined_fn(%args) + // } + // func @outlined_fn(%args) { ... } + // ``` + // + // This is changed to the following- + // + // ``` + // func @current_fn() { + // runtime_call(..., wrapper_fn, ...) + // } + // func @wrapper_fn(..., %args) { + // outlined_fn(%args) + // } + // func @outlined_fn(%args) { ... } + // ``` + + // The stale call instruction will be replaced with a new call instruction + // for runtime call with a wrapper function. + assert(OutlinedFn.getNumUses() == 1 && + "there must be a single user for the outlined function"); + CallInst *StaleCI = cast(OutlinedFn.user_back()); + + // HasTaskData is true if any variables are captured in the outlined region, + // false otherwise. + bool HasTaskData = StaleCI->arg_size() > 0; + Builder.SetInsertPoint(StaleCI); + + // Gather the arguments for emitting the runtime call for + // @__kmpc_omp_task_alloc + Function *TaskAllocFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc); + + // Arguments - `loc_ref` (Ident) and `gtid` (ThreadID) + // call. + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Value *ThreadID = getOrCreateThreadID(Ident); + + // Argument - `flags` + // Task is tied iff (Flags & 1) == 1. + // Task is untied iff (Flags & 1) == 0. + // Task is final iff (Flags & 2) == 2. + // Task is not final iff (Flags & 2) == 0. + // TODO: Handle the other flags. + Value *Flags = Builder.getInt32(Tied); + if (Final) { + Value *FinalFlag = + Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0)); + Flags = Builder.CreateOr(FinalFlag, Flags); + } + + // Argument - `sizeof_kmp_task_t` (TaskSize) + // Tasksize refers to the size in bytes of kmp_task_t data structure + // including private vars accessed in task. + Value *TaskSize = Builder.getInt64(0); + if (HasTaskData) { + AllocaInst *ArgStructAlloca = + dyn_cast(StaleCI->getArgOperand(0)); + assert(ArgStructAlloca && + "Unable to find the alloca instruction corresponding to arguments " + "for extracted function"); + StructType *ArgStructType = + dyn_cast(ArgStructAlloca->getAllocatedType()); + assert(ArgStructType && "Unable to find struct type corresponding to " + "arguments for extracted function"); + TaskSize = + Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType)); + } + + // TODO: Argument - sizeof_shareds + + // Argument - task_entry (the wrapper function) + // If the outlined function has some captured variables (i.e. HasTaskData is + // true), then the wrapper function will have an additional argument (the + // struct containing captured variables). Otherwise, no such argument will + // be present. + SmallVector WrapperArgTys{Builder.getInt32Ty()}; + if (HasTaskData) + WrapperArgTys.push_back(OutlinedFn.getArg(0)->getType()); + FunctionCallee WrapperFuncVal = M.getOrInsertFunction( + (Twine(OutlinedFn.getName()) + ".wrapper").str(), + FunctionType::get(Builder.getInt32Ty(), WrapperArgTys, false)); + Function *WrapperFunc = dyn_cast(WrapperFuncVal.getCallee()); + PointerType *WrapperFuncBitcastType = + FunctionType::get(Builder.getInt32Ty(), + {Builder.getInt32Ty(), Builder.getInt8PtrTy()}, false) + ->getPointerTo(); + Value *WrapperFuncBitcast = + ConstantExpr::getBitCast(WrapperFunc, WrapperFuncBitcastType); + + // Emit the @__kmpc_omp_task_alloc runtime call + // The runtime call returns a pointer to an area where the task captured + // variables must be copied before the task is run (NewTaskData) + CallInst *NewTaskData = Builder.CreateCall( + TaskAllocFn, + {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags, + /*sizeof_task=*/TaskSize, /*sizeof_shared=*/Builder.getInt64(0), + /*task_func=*/WrapperFuncBitcast}); + + // Copy the arguments for outlined function + if (HasTaskData) { + Value *TaskData = StaleCI->getArgOperand(0); + Align Alignment = TaskData->getPointerAlignment(M.getDataLayout()); + Builder.CreateMemCpy(NewTaskData, Alignment, TaskData, Alignment, + TaskSize); + } + + // Emit the @__kmpc_omp_task runtime call to spawn the task + Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task); + Builder.CreateCall(TaskFn, {Ident, ThreadID, NewTaskData}); + + StaleCI->eraseFromParent(); + + // Emit the body for wrapper function + BasicBlock *WrapperEntryBB = + BasicBlock::Create(M.getContext(), "", WrapperFunc); + Builder.SetInsertPoint(WrapperEntryBB); + if (HasTaskData) + Builder.CreateCall(&OutlinedFn, {WrapperFunc->getArg(1)}); + else + Builder.CreateCall(&OutlinedFn); + Builder.CreateRet(Builder.getInt32(0)); + }; + + addOutlineInfo(std::move(OI)); + + InsertPointTy TaskAllocaIP = + InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); + InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); + BodyGenCB(TaskAllocaIP, TaskBodyIP); + Builder.SetInsertPoint(TaskExitBB); + + return Builder.saveIP(); +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) { + assert(!isConflictIP(AllocaIP, Loc.IP) && "Dedicated IP allocas required"); + if (!updateToLocation(Loc)) return Loc.IP; @@ -1006,26 +1474,25 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( // section_loop.after: // ; auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) { - auto *CurFn = CodeGenIP.getBlock()->getParent(); - auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor(); - auto *ForExitBB = CodeGenIP.getBlock() - ->getSinglePredecessor() - ->getTerminator() - ->getSuccessor(1); - SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB); Builder.restoreIP(CodeGenIP); + BasicBlock *Continue = + splitBBWithSuffix(Builder, /*CreateBranch=*/false, ".sections.after"); + Function *CurFn = Continue->getParent(); + SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, Continue); + unsigned CaseNumber = 0; for (auto SectionCB : SectionCBs) { - auto *CaseBB = BasicBlock::Create(M.getContext(), - "omp_section_loop.body.case", CurFn); + BasicBlock *CaseBB = BasicBlock::Create( + M.getContext(), "omp_section_loop.body.case", CurFn, Continue); SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); Builder.SetInsertPoint(CaseBB); - SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB); + BranchInst *CaseEndBr = Builder.CreateBr(Continue); + SectionCB(InsertPointTy(), + {CaseEndBr->getParent(), CaseEndBr->getIterator()}); CaseNumber++; } // remove the existing terminator from body BB since there can be no // terminators after switch/case - CodeGenIP.getBlock()->getTerminator()->eraseFromParent(); }; // Loop body ends here // LowerBound, UpperBound, and STride for createCanonicalLoop @@ -1035,29 +1502,22 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( Value *ST = ConstantInt::get(I32Ty, 1); llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop( Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); - Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator()); - AllocaIP = Builder.saveIP(); InsertPointTy AfterIP = applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait); - BasicBlock *LoopAfterBB = AfterIP.getBlock(); - Instruction *SplitPos = LoopAfterBB->getTerminator(); - if (!isa_and_nonnull(SplitPos)) - SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB); - // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB, - // which requires a BB with branch - BasicBlock *ExitBB = - LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end"); - SplitPos->eraseFromParent(); // Apply the finalization callback in LoopAfterBB auto FiniInfo = FinalizationStack.pop_back_val(); assert(FiniInfo.DK == OMPD_sections && "Unexpected finalization stack state!"); - Builder.SetInsertPoint(LoopAfterBB->getTerminator()); - FiniInfo.FiniCB(Builder.saveIP()); - Builder.SetInsertPoint(ExitBB); + if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) { + Builder.restoreIP(AfterIP); + BasicBlock *FiniBB = + splitBBWithSuffix(Builder, /*CreateBranch=*/true, "sections.fini"); + CB(Builder.saveIP()); + AfterIP = {FiniBB, FiniBB->begin()}; + } - return Builder.saveIP(); + return AfterIP; } OpenMPIRBuilder::InsertPointTy @@ -1402,10 +1862,8 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, // Split the loop at the insertion point: Branch to the preheader and move // every following instruction to after the loop (the After BB). Also, the // new successor is the loop's after block. + spliceBB(Builder, After, /*CreateBranch=*/false); Builder.CreateBr(CL->getPreheader()); - After->getInstList().splice(After->begin(), BB->getInstList(), - Builder.GetInsertPoint(), BB->end()); - After->replaceSuccessorsPhiUsesWith(BB, After); } // Emit the body content. We do it after connecting the loop to the CFG to @@ -1506,20 +1964,10 @@ static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, llvm_unreachable("unknown OpenMP loop iterator bitwidth"); } -// Sets the number of loop iterations to the given value. This value must be -// valid in the condition block (i.e., defined in the preheader) and is -// interpreted as an unsigned integer. -void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) { - Instruction *CmpI = &CLI->getCond()->front(); - assert(isa(CmpI) && "First inst must compare IV with TripCount"); - CmpI->setOperand(1, TripCount); - CLI->assertOK(); -} - OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, - bool NeedsBarrier, Value *Chunk) { + bool NeedsBarrier) { assert(CLI->isValid() && "Requires a valid canonical loop"); assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) && "Require dedicated allocate IP"); @@ -1559,38 +2007,31 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, Builder.CreateStore(UpperBound, PUpperBound); Builder.CreateStore(One, PStride); - // FIXME: schedule(static) is NOT the same as schedule(static,1) - if (!Chunk) - Chunk = One; - Value *ThreadNum = getOrCreateThreadID(SrcLoc); - Constant *SchedulingType = - ConstantInt::get(I32Type, static_cast(OMPScheduleType::Static)); + Constant *SchedulingType = ConstantInt::get( + I32Type, static_cast(OMPScheduleType::UnorderedStatic)); // Call the "init" function and update the trip count of the loop with the // value it produced. Builder.CreateCall(StaticInit, {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, - PUpperBound, PStride, One, Chunk}); + PUpperBound, PStride, One, Zero}); Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound); Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound); Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound); Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One); - setCanonicalLoopTripCount(CLI, TripCount); + CLI->setTripCount(TripCount); // Update all uses of the induction variable except the one in the condition // block that compares it with the actual upper bound, and the increment in // the latch block. - // TODO: this can eventually move to CanonicalLoopInfo or to a new - // CanonicalLoopInfoUpdater interface. - Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt()); - Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound); - IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) { - auto *Instr = dyn_cast(U.getUser()); - return !Instr || - (Instr->getParent() != CLI->getCond() && - Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV); + + CLI->mapIndVar([&](Instruction *OldIV) -> Value * { + Builder.SetInsertPoint(CLI->getBody(), + CLI->getBody()->getFirstInsertionPt()); + Builder.SetCurrentDebugLocation(DL); + return Builder.CreateAdd(OldIV, LowerBound); }); // In the "exit" block, call the "fini" function. @@ -1610,11 +2051,198 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, return AfterIP; } -OpenMPIRBuilder::InsertPointTy -OpenMPIRBuilder::applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, bool NeedsBarrier) { - // Currently only supports static schedules. - return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier); +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop( + DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, + bool NeedsBarrier, Value *ChunkSize) { + assert(CLI->isValid() && "Requires a valid canonical loop"); + assert(ChunkSize && "Chunk size is required"); + + LLVMContext &Ctx = CLI->getFunction()->getContext(); + Value *IV = CLI->getIndVar(); + Value *OrigTripCount = CLI->getTripCount(); + Type *IVTy = IV->getType(); + assert(IVTy->getIntegerBitWidth() <= 64 && + "Max supported tripcount bitwidth is 64 bits"); + Type *InternalIVTy = IVTy->getIntegerBitWidth() <= 32 ? Type::getInt32Ty(Ctx) + : Type::getInt64Ty(Ctx); + Type *I32Type = Type::getInt32Ty(M.getContext()); + Constant *Zero = ConstantInt::get(InternalIVTy, 0); + Constant *One = ConstantInt::get(InternalIVTy, 1); + + // Declare useful OpenMP runtime functions. + FunctionCallee StaticInit = + getKmpcForStaticInitForType(InternalIVTy, M, *this); + FunctionCallee StaticFini = + getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini); + + // Allocate space for computed loop bounds as expected by the "init" function. + Builder.restoreIP(AllocaIP); + Builder.SetCurrentDebugLocation(DL); + Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); + Value *PLowerBound = + Builder.CreateAlloca(InternalIVTy, nullptr, "p.lowerbound"); + Value *PUpperBound = + Builder.CreateAlloca(InternalIVTy, nullptr, "p.upperbound"); + Value *PStride = Builder.CreateAlloca(InternalIVTy, nullptr, "p.stride"); + + // Set up the source location value for the OpenMP runtime. + Builder.restoreIP(CLI->getPreheaderIP()); + Builder.SetCurrentDebugLocation(DL); + + // TODO: Detect overflow in ubsan or max-out with current tripcount. + Value *CastedChunkSize = + Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy, "chunksize"); + Value *CastedTripCount = + Builder.CreateZExt(OrigTripCount, InternalIVTy, "tripcount"); + + Constant *SchedulingType = ConstantInt::get( + I32Type, static_cast(OMPScheduleType::UnorderedStaticChunked)); + Builder.CreateStore(Zero, PLowerBound); + Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One); + Builder.CreateStore(OrigUpperBound, PUpperBound); + Builder.CreateStore(One, PStride); + + // Call the "init" function and update the trip count of the loop with the + // value it produced. + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize); + Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Value *ThreadNum = getOrCreateThreadID(SrcLoc); + Builder.CreateCall(StaticInit, + {/*loc=*/SrcLoc, /*global_tid=*/ThreadNum, + /*schedtype=*/SchedulingType, /*plastiter=*/PLastIter, + /*plower=*/PLowerBound, /*pupper=*/PUpperBound, + /*pstride=*/PStride, /*incr=*/One, + /*chunk=*/CastedChunkSize}); + + // Load values written by the "init" function. + Value *FirstChunkStart = + Builder.CreateLoad(InternalIVTy, PLowerBound, "omp_firstchunk.lb"); + Value *FirstChunkStop = + Builder.CreateLoad(InternalIVTy, PUpperBound, "omp_firstchunk.ub"); + Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One); + Value *ChunkRange = + Builder.CreateSub(FirstChunkEnd, FirstChunkStart, "omp_chunk.range"); + Value *NextChunkStride = + Builder.CreateLoad(InternalIVTy, PStride, "omp_dispatch.stride"); + + // Create outer "dispatch" loop for enumerating the chunks. + BasicBlock *DispatchEnter = splitBB(Builder, true); + Value *DispatchCounter; + CanonicalLoopInfo *DispatchCLI = createCanonicalLoop( + {Builder.saveIP(), DL}, + [&](InsertPointTy BodyIP, Value *Counter) { DispatchCounter = Counter; }, + FirstChunkStart, CastedTripCount, NextChunkStride, + /*IsSigned=*/false, /*InclusiveStop=*/false, /*ComputeIP=*/{}, + "dispatch"); + + // Remember the BasicBlocks of the dispatch loop we need, then invalidate to + // not have to preserve the canonical invariant. + BasicBlock *DispatchBody = DispatchCLI->getBody(); + BasicBlock *DispatchLatch = DispatchCLI->getLatch(); + BasicBlock *DispatchExit = DispatchCLI->getExit(); + BasicBlock *DispatchAfter = DispatchCLI->getAfter(); + DispatchCLI->invalidate(); + + // Rewire the original loop to become the chunk loop inside the dispatch loop. + redirectTo(DispatchAfter, CLI->getAfter(), DL); + redirectTo(CLI->getExit(), DispatchLatch, DL); + redirectTo(DispatchBody, DispatchEnter, DL); + + // Prepare the prolog of the chunk loop. + Builder.restoreIP(CLI->getPreheaderIP()); + Builder.SetCurrentDebugLocation(DL); + + // Compute the number of iterations of the chunk loop. + Builder.SetInsertPoint(CLI->getPreheader()->getTerminator()); + Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange); + Value *IsLastChunk = + Builder.CreateICmpUGE(ChunkEnd, CastedTripCount, "omp_chunk.is_last"); + Value *CountUntilOrigTripCount = + Builder.CreateSub(CastedTripCount, DispatchCounter); + Value *ChunkTripCount = Builder.CreateSelect( + IsLastChunk, CountUntilOrigTripCount, ChunkRange, "omp_chunk.tripcount"); + Value *BackcastedChunkTC = + Builder.CreateTrunc(ChunkTripCount, IVTy, "omp_chunk.tripcount.trunc"); + CLI->setTripCount(BackcastedChunkTC); + + // Update all uses of the induction variable except the one in the condition + // block that compares it with the actual upper bound, and the increment in + // the latch block. + Value *BackcastedDispatchCounter = + Builder.CreateTrunc(DispatchCounter, IVTy, "omp_dispatch.iv.trunc"); + CLI->mapIndVar([&](Instruction *) -> Value * { + Builder.restoreIP(CLI->getBodyIP()); + return Builder.CreateAdd(IV, BackcastedDispatchCounter); + }); + + // In the "exit" block, call the "fini" function. + Builder.SetInsertPoint(DispatchExit, DispatchExit->getFirstInsertionPt()); + Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum}); + + // Add the barrier if requested. + if (NeedsBarrier) + createBarrier(LocationDescription(Builder.saveIP(), DL), OMPD_for, + /*ForceSimpleCall=*/false, /*CheckCancelFlag=*/false); + +#ifndef NDEBUG + // Even though we currently do not support applying additional methods to it, + // the chunk loop should remain a canonical loop. + CLI->assertOK(); +#endif + + return {DispatchAfter, DispatchAfter->getFirstInsertionPt()}; +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoop( + DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, + bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind, + llvm::Value *ChunkSize, bool HasSimdModifier, bool HasMonotonicModifier, + bool HasNonmonotonicModifier, bool HasOrderedClause) { + OMPScheduleType EffectiveScheduleType = computeOpenMPScheduleType( + SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier, + HasNonmonotonicModifier, HasOrderedClause); + + bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) == + OMPScheduleType::ModifierOrdered; + switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) { + case OMPScheduleType::BaseStatic: + assert(!ChunkSize && "No chunk size with static-chunked schedule"); + if (IsOrdered) + return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType, + NeedsBarrier, ChunkSize); + // FIXME: Monotonicity ignored? + return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier); + + case OMPScheduleType::BaseStaticChunked: + if (IsOrdered) + return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType, + NeedsBarrier, ChunkSize); + // FIXME: Monotonicity ignored? + return applyStaticChunkedWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier, + ChunkSize); + + case OMPScheduleType::BaseRuntime: + case OMPScheduleType::BaseAuto: + case OMPScheduleType::BaseGreedy: + case OMPScheduleType::BaseBalanced: + case OMPScheduleType::BaseSteal: + case OMPScheduleType::BaseGuidedSimd: + case OMPScheduleType::BaseRuntimeSimd: + assert(!ChunkSize && + "schedule type does not support user-defined chunk sizes"); + LLVM_FALLTHROUGH; + case OMPScheduleType::BaseDynamicChunked: + case OMPScheduleType::BaseGuidedChunked: + case OMPScheduleType::BaseGuidedIterativeChunked: + case OMPScheduleType::BaseGuidedAnalyticalChunked: + case OMPScheduleType::BaseStaticBalancedChunked: + return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType, + NeedsBarrier, ChunkSize); + + default: + llvm_unreachable("Unknown/unimplemented schedule kind"); + } } /// Returns an LLVM function to call for initializing loop bounds using OpenMP @@ -1649,12 +2277,32 @@ getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { llvm_unreachable("unknown OpenMP loop iterator bitwidth"); } +/// Returns an LLVM function to call for finalizing the dynamic loop using +/// depending on `type`. Only i32 and i64 are supported by the runtime. Always +/// interpret integers as unsigned similarly to CanonicalLoopInfo. +static FunctionCallee +getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { + unsigned Bitwidth = Ty->getIntegerBitWidth(); + if (Bitwidth == 32) + return OMPBuilder.getOrCreateRuntimeFunction( + M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u); + if (Bitwidth == 64) + return OMPBuilder.getOrCreateRuntimeFunction( + M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u); + llvm_unreachable("unknown OpenMP loop iterator bitwidth"); +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) { assert(CLI->isValid() && "Requires a valid canonical loop"); assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) && "Require dedicated allocate IP"); + assert(isValidWorkshareLoopScheduleType(SchedType) && + "Require valid schedule type"); + + bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) == + OMPScheduleType::ModifierOrdered; // Set up the source location value for OpenMP runtime. Builder.SetCurrentDebugLocation(DL); @@ -1692,6 +2340,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( BasicBlock *Header = CLI->getHeader(); BasicBlock *Exit = CLI->getExit(); BasicBlock *Cond = CLI->getCond(); + BasicBlock *Latch = CLI->getLatch(); InsertPointTy AfterIP = CLI->getAfterIP(); // The CLI will be "broken" in the code below, as the loop is no longer @@ -1751,6 +2400,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( assert(BI->getSuccessor(1) == Exit); BI->setSuccessor(1, OuterCond); + // Call the "fini" function if "ordered" is present in wsloop directive. + if (Ordered) { + Builder.SetInsertPoint(&Latch->back()); + FunctionCallee DynamicFini = getKmpcForDynamicFiniForType(IVTy, M, *this); + Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum}); + } + // Add the barrier if requested. if (NeedsBarrier) { Builder.SetInsertPoint(&Exit->back()); @@ -1763,27 +2419,6 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( return AfterIP; } -/// Make \p Source branch to \p Target. -/// -/// Handles two situations: -/// * \p Source already has an unconditional branch. -/// * \p Source is a degenerate block (no terminator because the BB is -/// the current head of the IR construction). -static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) { - if (Instruction *Term = Source->getTerminator()) { - auto *Br = cast(Term); - assert(!Br->isConditional() && - "BB's terminator must be an unconditional branch (or degenerate)"); - BasicBlock *Succ = Br->getSuccessor(0); - Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true); - Br->setSuccessor(0, Target); - return; - } - - auto *NewBr = BranchInst::Create(Target, Source); - NewBr->setDebugLoc(DL); -} - /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is, /// after this \p OldTarget will be orphaned. static void redirectAllPredecessorsTo(BasicBlock *OldTarget, @@ -2385,16 +3020,17 @@ static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) { unsigned NumInlineCandidates; bool NotDuplicatable; bool Convergent; - unsigned LoopSize = + InstructionCost LoopSizeIC = ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, EphValues, UP.BEInsns); - LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n"); + LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSizeIC << "\n"); // Loop is not unrollable if the loop contains certain instructions. - if (NotDuplicatable || Convergent) { + if (NotDuplicatable || Convergent || !LoopSizeIC.isValid()) { LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n"); return 1; } + unsigned LoopSize = *LoopSizeIC.getValue(); // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might // be able to use it. @@ -2506,10 +3142,9 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointTy -OpenMPIRBuilder::createSingle(const LocationDescription &Loc, - BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, llvm::Value *DidIt) { +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle( + const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt) { if (!updateToLocation(Loc)) return Loc.IP; @@ -2537,9 +3172,16 @@ OpenMPIRBuilder::createSingle(const LocationDescription &Loc, // .... single region ... // __kmpc_end_single // } - - return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, - /*Conditional*/ true, /*hasFinalize*/ true); + // __kmpc_barrier + + EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, + /*Conditional*/ true, + /*hasFinalize*/ true); + if (!IsNowait) + createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), + omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, + /* CheckCancelFlag */ false); + return Builder.saveIP(); } OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( @@ -2674,48 +3316,28 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( // generate body BodyGenCB(/* AllocaIP */ InsertPointTy(), - /* CodeGenIP */ Builder.saveIP(), *FiniBB); - - // If we didn't emit a branch to FiniBB during body generation, it means - // FiniBB is unreachable (e.g. while(1);). stop generating all the - // unreachable blocks, and remove anything we are not going to use. - auto SkipEmittingRegion = FiniBB->hasNPredecessors(0); - if (SkipEmittingRegion) { - FiniBB->eraseFromParent(); - ExitCall->eraseFromParent(); - // Discard finalization if we have it. - if (HasFinalize) { - assert(!FinalizationStack.empty() && - "Unexpected finalization stack state!"); - FinalizationStack.pop_back(); - } - } else { - // emit exit call and do any needed finalization. - auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); - assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && - FiniBB->getTerminator()->getSuccessor(0) == ExitBB && - "Unexpected control flow graph state!!"); - emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); - assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && - "Unexpected Control Flow State!"); - MergeBlockIntoPredecessor(FiniBB); - } + /* CodeGenIP */ Builder.saveIP()); + + // emit exit call and do any needed finalization. + auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); + assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && + FiniBB->getTerminator()->getSuccessor(0) == ExitBB && + "Unexpected control flow graph state!!"); + emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); + assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && + "Unexpected Control Flow State!"); + MergeBlockIntoPredecessor(FiniBB); // If we are skipping the region of a non conditional, remove the exit // block, and clear the builder's insertion point. assert(SplitPos->getParent() == ExitBB && "Unexpected Insertion point location!"); - if (!Conditional && SkipEmittingRegion) { - ExitBB->eraseFromParent(); - Builder.ClearInsertionPoint(); - } else { - auto merged = MergeBlockIntoPredecessor(ExitBB); - BasicBlock *ExitPredBB = SplitPos->getParent(); - auto InsertBB = merged ? ExitPredBB : ExitBB; - if (!isa_and_nonnull(SplitPos)) - SplitPos->eraseFromParent(); - Builder.SetInsertPoint(InsertBB); - } + auto merged = MergeBlockIntoPredecessor(ExitBB); + BasicBlock *ExitPredBB = SplitPos->getParent(); + auto InsertBB = merged ? ExitPredBB : ExitBB; + if (!isa_and_nonnull(SplitPos)) + SplitPos->eraseFromParent(); + Builder.SetInsertPoint(InsertBB); return Builder.saveIP(); } @@ -3171,6 +3793,7 @@ bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic( } break; case Write: + case Compare: case Update: if (AO == AtomicOrdering::Release || AO == AtomicOrdering::AcquireRelease || AO == AtomicOrdering::SequentiallyConsistent) { @@ -3290,9 +3913,10 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc, } OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate( - const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, + const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) { + assert(!isConflictIP(Loc.IP, AllocaIP) && "IPs must not be ambiguous"); if (!updateToLocation(Loc)) return Loc.IP; @@ -3309,7 +3933,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate( "OpenMP atomic does not support LT or GT operations"); }); - emitAtomicUpdate(AllocIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp, + emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp, X.IsVolatile, IsXBinopExpr); checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update); return Builder.saveIP(); @@ -3344,20 +3968,39 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2, } std::pair OpenMPIRBuilder::emitAtomicUpdate( - Instruction *AllocIP, Value *X, Type *XElemTy, Value *Expr, + InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) { - bool DoCmpExch = - ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) || - (RMWOp == AtomicRMWInst::FSub) || - (RMWOp == AtomicRMWInst::Sub && !IsXBinopExpr); + // TODO: handle the case where XElemTy is not byte-sized or not a power of 2 + // or a complex datatype. + bool emitRMWOp = false; + switch (RMWOp) { + case AtomicRMWInst::Add: + case AtomicRMWInst::And: + case AtomicRMWInst::Nand: + case AtomicRMWInst::Or: + case AtomicRMWInst::Xor: + case AtomicRMWInst::Xchg: + emitRMWOp = XElemTy; + break; + case AtomicRMWInst::Sub: + emitRMWOp = (IsXBinopExpr && XElemTy); + break; + default: + emitRMWOp = false; + } + emitRMWOp &= XElemTy->isIntegerTy(); std::pair Res; - if (XElemTy->isIntegerTy() && !DoCmpExch) { + if (emitRMWOp) { Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO); // not needed except in case of postfix captures. Generate anyway for // consistency with the else part. Will be removed with any DCE pass. - Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp); + // AtomicRMWInst::Xchg does not have a coressponding instruction. + if (RMWOp == AtomicRMWInst::Xchg) + Res.second = Res.first; + else + Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp); } else { unsigned Addrspace = cast(X->getType())->getAddressSpace(); IntegerType *IntCastTy = @@ -3380,12 +4023,12 @@ std::pair OpenMPIRBuilder::emitAtomicUpdate( BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(), X->getName() + ".atomic.cont"); ContBB->getTerminator()->eraseFromParent(); + Builder.restoreIP(AllocaIP); + AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy); + NewAtomicAddr->setName(X->getName() + "x.new.val"); Builder.SetInsertPoint(ContBB); llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2); PHI->addIncoming(OldVal, CurBB); - AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy); - NewAtomicAddr->setName(X->getName() + "x.new.val"); - NewAtomicAddr->moveBefore(AllocIP); IntegerType *NewAtomicCastTy = IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); bool IsIntTy = XElemTy->isIntegerTy(); @@ -3407,7 +4050,7 @@ std::pair OpenMPIRBuilder::emitAtomicUpdate( Value *Upd = UpdateOp(OldExprVal, Builder); Builder.CreateStore(Upd, NewAtomicAddr); - LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr); + LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicIntAddr); Value *XAddr = (IsIntTy) ? X @@ -3415,7 +4058,7 @@ std::pair OpenMPIRBuilder::emitAtomicUpdate( AtomicOrdering Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO); AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg( - XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure); + XAddr, PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure); Result->setVolatile(VolatileX); Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0); Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1); @@ -3439,7 +4082,7 @@ std::pair OpenMPIRBuilder::emitAtomicUpdate( } OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture( - const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, + const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr) { @@ -3450,7 +4093,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture( Type *XTy = X.Var->getType(); assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); - Type *XElemTy = XTy->getPointerElementType(); + Type *XElemTy = X.ElemTy; assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || XElemTy->isPointerTy()) && "OMP atomic capture expected a scalar type"); @@ -3462,7 +4105,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture( // 'x' is simply atomically rewritten with 'expr'. AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg); std::pair Result = - emitAtomicUpdate(AllocIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp, + emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp, X.IsVolatile, IsXBinopExpr); Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second); @@ -3472,6 +4115,155 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture( return Builder.saveIP(); } +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare( + const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, + AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, + omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, + bool IsFailOnly) { + + if (!updateToLocation(Loc)) + return Loc.IP; + + assert(X.Var->getType()->isPointerTy() && + "OMP atomic expects a pointer to target memory"); + assert((X.ElemTy->isIntegerTy() || X.ElemTy->isPointerTy()) && + "OMP atomic compare expected a integer scalar type"); + // compare capture + if (V.Var) { + assert(V.Var->getType()->isPointerTy() && "v.var must be of pointer type"); + assert(V.ElemTy == X.ElemTy && "x and v must be of same type"); + } + + if (Op == OMPAtomicCompareOp::EQ) { + AtomicOrdering Failure = AtomicCmpXchgInst::getStrongestFailureOrdering(AO); + AtomicCmpXchgInst *Result = + Builder.CreateAtomicCmpXchg(X.Var, E, D, MaybeAlign(), AO, Failure); + if (V.Var) { + Value *OldValue = Builder.CreateExtractValue(Result, /*Idxs=*/0); + assert(OldValue->getType() == V.ElemTy && + "OldValue and V must be of same type"); + if (IsPostfixUpdate) { + Builder.CreateStore(OldValue, V.Var, V.IsVolatile); + } else { + Value *SuccessOrFail = Builder.CreateExtractValue(Result, /*Idxs=*/1); + if (IsFailOnly) { + // CurBB---- + // | | + // v | + // ContBB | + // | | + // v | + // ExitBB <- + // + // where ContBB only contains the store of old value to 'v'. + BasicBlock *CurBB = Builder.GetInsertBlock(); + Instruction *CurBBTI = CurBB->getTerminator(); + CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable(); + BasicBlock *ExitBB = CurBB->splitBasicBlock( + CurBBTI, X.Var->getName() + ".atomic.exit"); + BasicBlock *ContBB = CurBB->splitBasicBlock( + CurBB->getTerminator(), X.Var->getName() + ".atomic.cont"); + ContBB->getTerminator()->eraseFromParent(); + CurBB->getTerminator()->eraseFromParent(); + + Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB); + + Builder.SetInsertPoint(ContBB); + Builder.CreateStore(OldValue, V.Var); + Builder.CreateBr(ExitBB); + + if (UnreachableInst *ExitTI = + dyn_cast(ExitBB->getTerminator())) { + CurBBTI->eraseFromParent(); + Builder.SetInsertPoint(ExitBB); + } else { + Builder.SetInsertPoint(ExitTI); + } + } else { + Value *CapturedValue = + Builder.CreateSelect(SuccessOrFail, E, OldValue); + Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile); + } + } + } + // The comparison result has to be stored. + if (R.Var) { + assert(R.Var->getType()->isPointerTy() && + "r.var must be of pointer type"); + assert(R.ElemTy->isIntegerTy() && "r must be of integral type"); + + Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1); + Value *ResultCast = R.IsSigned + ? Builder.CreateSExt(SuccessFailureVal, R.ElemTy) + : Builder.CreateZExt(SuccessFailureVal, R.ElemTy); + Builder.CreateStore(ResultCast, R.Var, R.IsVolatile); + } + } else { + assert((Op == OMPAtomicCompareOp::MAX || Op == OMPAtomicCompareOp::MIN) && + "Op should be either max or min at this point"); + assert(!IsFailOnly && "IsFailOnly is only valid when the comparison is =="); + + // Reverse the ordop as the OpenMP forms are different from LLVM forms. + // Let's take max as example. + // OpenMP form: + // x = x > expr ? expr : x; + // LLVM form: + // *ptr = *ptr > val ? *ptr : val; + // We need to transform to LLVM form. + // x = x <= expr ? x : expr; + AtomicRMWInst::BinOp NewOp; + if (IsXBinopExpr) { + if (X.IsSigned) + NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Min + : AtomicRMWInst::Max; + else + NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMin + : AtomicRMWInst::UMax; + } else { + if (X.IsSigned) + NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Max + : AtomicRMWInst::Min; + else + NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMax + : AtomicRMWInst::UMin; + } + + AtomicRMWInst *OldValue = + Builder.CreateAtomicRMW(NewOp, X.Var, E, MaybeAlign(), AO); + if (V.Var) { + Value *CapturedValue = nullptr; + if (IsPostfixUpdate) { + CapturedValue = OldValue; + } else { + CmpInst::Predicate Pred; + switch (NewOp) { + case AtomicRMWInst::Max: + Pred = CmpInst::ICMP_SGT; + break; + case AtomicRMWInst::UMax: + Pred = CmpInst::ICMP_UGT; + break; + case AtomicRMWInst::Min: + Pred = CmpInst::ICMP_SLT; + break; + case AtomicRMWInst::UMin: + Pred = CmpInst::ICMP_ULT; + break; + default: + llvm_unreachable("unexpected comparison op"); + } + Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue, E); + CapturedValue = Builder.CreateSelect(NonAtomicCmp, E, OldValue); + } + Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile); + } + } + + checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Compare); + + return Builder.saveIP(); +} + GlobalVariable * OpenMPIRBuilder::createOffloadMapnames(SmallVectorImpl &Names, std::string VarName) { @@ -3543,6 +4335,51 @@ BasicBlock *CanonicalLoopInfo::getPreheader() const { llvm_unreachable("Missing preheader"); } +void CanonicalLoopInfo::setTripCount(Value *TripCount) { + assert(isValid() && "Requires a valid canonical loop"); + + Instruction *CmpI = &getCond()->front(); + assert(isa(CmpI) && "First inst must compare IV with TripCount"); + CmpI->setOperand(1, TripCount); + +#ifndef NDEBUG + assertOK(); +#endif +} + +void CanonicalLoopInfo::mapIndVar( + llvm::function_ref Updater) { + assert(isValid() && "Requires a valid canonical loop"); + + Instruction *OldIV = getIndVar(); + + // Record all uses excluding those introduced by the updater. Uses by the + // CanonicalLoopInfo itself to keep track of the number of iterations are + // excluded. + SmallVector ReplacableUses; + for (Use &U : OldIV->uses()) { + auto *User = dyn_cast(U.getUser()); + if (!User) + continue; + if (User->getParent() == getCond()) + continue; + if (User->getParent() == getLatch()) + continue; + ReplacableUses.push_back(&U); + } + + // Run the updater that may introduce new uses + Value *NewIV = Updater(OldIV); + + // Replace the old uses with the value returned by the updater. + for (Use *U : ReplacableUses) + U->set(NewIV); + +#ifndef NDEBUG + assertOK(); +#endif +} + void CanonicalLoopInfo::assertOK() const { #ifndef NDEBUG // No constraints if this object currently does not describe a loop. diff --git a/llvm/lib/FuzzMutate/FuzzerCLI.cpp b/llvm/lib/FuzzMutate/FuzzerCLI.cpp index 879d5a10b37b..90a1a35e2e3e 100644 --- a/llvm/lib/FuzzMutate/FuzzerCLI.cpp +++ b/llvm/lib/FuzzMutate/FuzzerCLI.cpp @@ -9,16 +9,9 @@ #include "llvm/FuzzMutate/FuzzerCLI.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/Bitcode/BitcodeWriter.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/IR/Verifier.h" using namespace llvm; @@ -166,44 +159,3 @@ int llvm::runFuzzerOnInputs(int ArgC, char *ArgV[], FuzzerTestFun TestOne, } return 0; } - -std::unique_ptr llvm::parseModule( - const uint8_t *Data, size_t Size, LLVMContext &Context) { - - if (Size <= 1) - // We get bogus data given an empty corpus - just create a new module. - return std::make_unique("M", Context); - - auto Buffer = MemoryBuffer::getMemBuffer( - StringRef(reinterpret_cast(Data), Size), "Fuzzer input", - /*RequiresNullTerminator=*/false); - - SMDiagnostic Err; - auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context); - if (Error E = M.takeError()) { - errs() << toString(std::move(E)) << "\n"; - return nullptr; - } - return std::move(M.get()); -} - -size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) { - std::string Buf; - { - raw_string_ostream OS(Buf); - WriteBitcodeToFile(M, OS); - } - if (Buf.size() > MaxSize) - return 0; - memcpy(Dest, Buf.data(), Buf.size()); - return Buf.size(); -} - -std::unique_ptr llvm::parseAndVerify(const uint8_t *Data, size_t Size, - LLVMContext &Context) { - auto M = parseModule(Data, Size, Context); - if (!M || verifyModule(*M, &errs())) - return nullptr; - - return M; -} diff --git a/llvm/lib/FuzzMutate/IRMutator.cpp b/llvm/lib/FuzzMutate/IRMutator.cpp index 0cd0f538fdbc..b62a326a40cc 100644 --- a/llvm/lib/FuzzMutate/IRMutator.cpp +++ b/llvm/lib/FuzzMutate/IRMutator.cpp @@ -9,6 +9,8 @@ #include "llvm/FuzzMutate/IRMutator.h" #include "llvm/ADT/Optional.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/FuzzMutate/Operations.h" #include "llvm/FuzzMutate/Random.h" #include "llvm/FuzzMutate/RandomIRBuilder.h" @@ -17,7 +19,9 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Transforms/Scalar/DCE.h" using namespace llvm; @@ -33,14 +37,15 @@ static void createEmptyFunction(Module &M) { } void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) { - if (M.empty()) - createEmptyFunction(M); - auto RS = makeSampler(IB.Rand); for (Function &F : M) if (!F.isDeclaration()) RS.sample(&F, /*Weight=*/1); - mutate(*RS.getSelection(), IB); + + if (RS.isEmpty()) + createEmptyFunction(M); + else + mutate(*RS.getSelection(), IB); } void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) { @@ -243,3 +248,44 @@ void InstModificationIRStrategy::mutate(Instruction &Inst, if (RS) RS.getSelection()(); } + +std::unique_ptr llvm::parseModule(const uint8_t *Data, size_t Size, + LLVMContext &Context) { + + if (Size <= 1) + // We get bogus data given an empty corpus - just create a new module. + return std::make_unique("M", Context); + + auto Buffer = MemoryBuffer::getMemBuffer( + StringRef(reinterpret_cast(Data), Size), "Fuzzer input", + /*RequiresNullTerminator=*/false); + + SMDiagnostic Err; + auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context); + if (Error E = M.takeError()) { + errs() << toString(std::move(E)) << "\n"; + return nullptr; + } + return std::move(M.get()); +} + +size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) { + std::string Buf; + { + raw_string_ostream OS(Buf); + WriteBitcodeToFile(M, OS); + } + if (Buf.size() > MaxSize) + return 0; + memcpy(Dest, Buf.data(), Buf.size()); + return Buf.size(); +} + +std::unique_ptr llvm::parseAndVerify(const uint8_t *Data, size_t Size, + LLVMContext &Context) { + auto M = parseModule(Data, Size, Context); + if (!M || verifyModule(*M, &errs())) + return nullptr; + + return M; +} diff --git a/llvm/lib/FuzzMutate/Operations.cpp b/llvm/lib/FuzzMutate/Operations.cpp index 221a3a84b49b..7443d49967c5 100644 --- a/llvm/lib/FuzzMutate/Operations.cpp +++ b/llvm/lib/FuzzMutate/Operations.cpp @@ -169,14 +169,21 @@ OpDescriptor llvm::fuzzerop::splitBlockDescriptor(unsigned Weight) { OpDescriptor llvm::fuzzerop::gepDescriptor(unsigned Weight) { auto buildGEP = [](ArrayRef Srcs, Instruction *Inst) { - Type *Ty = Srcs[0]->getType()->getPointerElementType(); - auto Indices = makeArrayRef(Srcs).drop_front(1); + // TODO: It would be better to generate a random type here, rather than + // generating a random value and picking its type. + Type *Ty = Srcs[0]->getType()->isOpaquePointerTy() + ? Srcs[1]->getType() + : Srcs[0]->getType()->getNonOpaquePointerElementType(); + auto Indices = makeArrayRef(Srcs).drop_front(2); return GetElementPtrInst::Create(Ty, Srcs[0], Indices, "G", Inst); }; // TODO: Handle aggregates and vectors // TODO: Support multiple indices. // TODO: Try to avoid meaningless accesses. - return {Weight, {sizedPtrType(), anyIntType()}, buildGEP}; + SourcePred sizedType( + [](ArrayRef, const Value *V) { return V->getType()->isSized(); }, + None); + return {Weight, {sizedPtrType(), sizedType, anyIntType()}, buildGEP}; } static uint64_t getAggregateNumElements(Type *T) { @@ -302,12 +309,12 @@ static SourcePred validShuffleVectorIndex() { return ShuffleVectorInst::isValidOperands(Cur[0], Cur[1], V); }; auto Make = [](ArrayRef Cur, ArrayRef Ts) { - auto *FirstTy = cast(Cur[0]->getType()); + auto *FirstTy = cast(Cur[0]->getType()); auto *Int32Ty = Type::getInt32Ty(Cur[0]->getContext()); // TODO: It's straighforward to make up reasonable values, but listing them // exhaustively would be insane. Come up with a couple of sensible ones. return std::vector{UndefValue::get( - FixedVectorType::get(Int32Ty, FirstTy->getNumElements()))}; + VectorType::get(Int32Ty, FirstTy->getElementCount()))}; }; return {Pred, Make}; } diff --git a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp index 27c3bdfb22a8..9ac31ebccb99 100644 --- a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp +++ b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp @@ -8,10 +8,10 @@ #include "llvm/FuzzMutate/RandomIRBuilder.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/FuzzMutate/OpDescriptor.h" #include "llvm/FuzzMutate/Random.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -53,8 +53,11 @@ Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef Insts, IP = ++I->getIterator(); assert(IP != BB.end() && "guaranteed by the findPointer"); } - auto *NewLoad = - new LoadInst(Ptr->getType()->getPointerElementType(), Ptr, "L", &*IP); + // For opaque pointers, pick the type independently. + Type *AccessTy = Ptr->getType()->isOpaquePointerTy() + ? RS.getSelection()->getType() + : Ptr->getType()->getNonOpaquePointerElementType(); + auto *NewLoad = new LoadInst(AccessTy, Ptr, "L", &*IP); // Only sample this load if it really matches the descriptor if (Pred.matches(Srcs, NewLoad)) @@ -139,9 +142,12 @@ Value *RandomIRBuilder::findPointer(BasicBlock &BB, if (Inst->isTerminator()) return false; - if (auto PtrTy = dyn_cast(Inst->getType())) { + if (auto *PtrTy = dyn_cast(Inst->getType())) { + if (PtrTy->isOpaque()) + return true; + // We can never generate loads from non first class or non sized types - Type *ElemTy = PtrTy->getPointerElementType(); + Type *ElemTy = PtrTy->getNonOpaquePointerElementType(); if (!ElemTy->isSized() || !ElemTy->isFirstClassType()) return false; diff --git a/llvm/lib/IR/AbstractCallSite.cpp b/llvm/lib/IR/AbstractCallSite.cpp index 2e41799e13e9..b7a10846a0d3 100644 --- a/llvm/lib/IR/AbstractCallSite.cpp +++ b/llvm/lib/IR/AbstractCallSite.cpp @@ -16,7 +16,6 @@ #include "llvm/IR/AbstractCallSite.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 179754e275b0..596348ddb462 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -223,9 +223,7 @@ predictValueUseListOrder(const Value *V, unsigned ID, const OrderMap &OM) { return LU->getOperandNo() > RU->getOperandNo(); }); - if (llvm::is_sorted(List, [](const Entry &L, const Entry &R) { - return L.second < R.second; - })) + if (llvm::is_sorted(List, llvm::less_second())) // Order is already correct. return {}; @@ -612,6 +610,11 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) { OS << '>'; return; } + case Type::DXILPointerTyID: + // DXIL pointer types are only handled by the DirectX backend. To avoid + // extra dependencies we just print the pointer's address here. + OS << "dxil-ptr (" << Ty << ")"; + return; } llvm_unreachable("Invalid TypeID"); } @@ -641,7 +644,7 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) { OS << '>'; } -AbstractSlotTrackerStorage::~AbstractSlotTrackerStorage() {} +AbstractSlotTrackerStorage::~AbstractSlotTrackerStorage() = default; namespace llvm { @@ -1290,7 +1293,7 @@ struct AsmWriterContext { /// prints a Metadata as operand. virtual void onWriteMetadataAsOperand(const Metadata *) {} - virtual ~AsmWriterContext() {} + virtual ~AsmWriterContext() = default; }; } // end anonymous namespace @@ -2072,7 +2075,7 @@ static void writeDIFile(raw_ostream &Out, const DIFile *N, AsmWriterContext &) { // Print all values for checksum together, or not at all. if (N->getChecksum()) Printer.printChecksum(*N->getChecksum()); - Printer.printString("source", N->getSource().getValueOr(StringRef()), + Printer.printString("source", N->getSource().value_or(StringRef()), /* ShouldSkipEmpty */ true); Out << ")"; } @@ -2131,6 +2134,7 @@ static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N, Printer.printMetadata("retainedNodes", N->getRawRetainedNodes()); Printer.printMetadata("thrownTypes", N->getRawThrownTypes()); Printer.printMetadata("annotations", N->getRawAnnotations()); + Printer.printString("targetFuncName", N->getTargetFuncName()); Out << ")"; } @@ -3531,6 +3535,19 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { Out << '"'; } + using SanitizerMetadata = llvm::GlobalValue::SanitizerMetadata; + if (GV->hasSanitizerMetadata()) { + SanitizerMetadata MD = GV->getSanitizerMetadata(); + if (MD.NoAddress) + Out << ", no_sanitize_address"; + if (MD.NoHWAddress) + Out << ", no_sanitize_hwaddress"; + if (MD.NoMemtag) + Out << ", no_sanitize_memtag"; + if (MD.IsDynInit) + Out << ", sanitize_address_dyninit"; + } + maybePrintComdat(Out, *GV); if (MaybeAlign A = GV->getAlign()) Out << ", align " << A->value(); @@ -4708,9 +4725,8 @@ struct MDTreeAsmWriterContext : public AsmWriterContext { : AsmWriterContext(TP, ST, M), Level(0U), Visited({InitMD}), MainOS(OS) {} void onWriteMetadataAsOperand(const Metadata *MD) override { - if (Visited.count(MD)) + if (!Visited.insert(MD).second) return; - Visited.insert(MD); std::string Str; raw_string_ostream SS(Str); diff --git a/llvm/lib/IR/Assumptions.cpp b/llvm/lib/IR/Assumptions.cpp index 3d24ae062841..27977d5d56b0 100644 --- a/llvm/lib/IR/Assumptions.cpp +++ b/llvm/lib/IR/Assumptions.cpp @@ -107,4 +107,5 @@ StringSet<> llvm::KnownAssumptionStrings({ "omp_no_openmp_routines", // OpenMP 5.1 "omp_no_parallelism", // OpenMP 5.1 "ompx_spmd_amenable", // OpenMPOpt extension + "ompx_no_call_asm", // OpenMPOpt extension }); diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h index 1153fb827b56..5eb958f5786a 100644 --- a/llvm/lib/IR/AttributeImpl.h +++ b/llvm/lib/IR/AttributeImpl.h @@ -255,6 +255,8 @@ public: std::pair> getAllocSizeArgs() const; unsigned getVScaleRangeMin() const; Optional getVScaleRangeMax() const; + UWTableKind getUWTableKind() const; + AllocFnKind getAllocKind() const; std::string getAsString(bool InAttrGrp) const; Type *getAttributeType(Attribute::AttrKind Kind) const; diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index 43fde64c3734..6d9f94b5eefd 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -28,7 +28,6 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Type.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -56,12 +55,11 @@ static const unsigned AllocSizeNumElemsNotPresent = -1; static uint64_t packAllocSizeArgs(unsigned ElemSizeArg, const Optional &NumElemsArg) { - assert((!NumElemsArg.hasValue() || - *NumElemsArg != AllocSizeNumElemsNotPresent) && + assert((!NumElemsArg || *NumElemsArg != AllocSizeNumElemsNotPresent) && "Attempting to pack a reserved value"); return uint64_t(ElemSizeArg) << 32 | - NumElemsArg.getValueOr(AllocSizeNumElemsNotPresent); + NumElemsArg.value_or(AllocSizeNumElemsNotPresent); } static std::pair> @@ -77,7 +75,7 @@ unpackAllocSizeArgs(uint64_t Num) { static uint64_t packVScaleRangeArgs(unsigned MinValue, Optional MaxValue) { - return uint64_t(MinValue) << 32 | MaxValue.getValueOr(0); + return uint64_t(MinValue) << 32 | MaxValue.value_or(0); } static std::pair> @@ -205,6 +203,11 @@ Attribute Attribute::getWithInAllocaType(LLVMContext &Context, Type *Ty) { return get(Context, InAlloca, Ty); } +Attribute Attribute::getWithUWTableKind(LLVMContext &Context, + UWTableKind Kind) { + return get(Context, UWTable, uint64_t(Kind)); +} + Attribute Attribute::getWithAllocSizeArgs(LLVMContext &Context, unsigned ElemSizeArg, const Optional &NumElemsArg) { @@ -366,6 +369,18 @@ Optional Attribute::getVScaleRangeMax() const { return unpackVScaleRangeArgs(pImpl->getValueAsInt()).second; } +UWTableKind Attribute::getUWTableKind() const { + assert(hasAttribute(Attribute::UWTable) && + "Trying to get unwind table kind from non-uwtable attribute"); + return UWTableKind(pImpl->getValueAsInt()); +} + +AllocFnKind Attribute::getAllocKind() const { + assert(hasAttribute(Attribute::AllocKind) && + "Trying to get allockind value from non-allockind attribute"); + return AllocFnKind(pImpl->getValueAsInt()); +} + std::string Attribute::getAsString(bool InAttrGrp) const { if (!pImpl) return {}; @@ -422,7 +437,38 @@ std::string Attribute::getAsString(bool InAttrGrp) const { unsigned MinValue = getVScaleRangeMin(); Optional MaxValue = getVScaleRangeMax(); return ("vscale_range(" + Twine(MinValue) + "," + - Twine(MaxValue.getValueOr(0)) + ")") + Twine(MaxValue.value_or(0)) + ")") + .str(); + } + + if (hasAttribute(Attribute::UWTable)) { + UWTableKind Kind = getUWTableKind(); + if (Kind != UWTableKind::None) { + return Kind == UWTableKind::Default + ? "uwtable" + : ("uwtable(" + + Twine(Kind == UWTableKind::Sync ? "sync" : "async") + ")") + .str(); + } + } + + if (hasAttribute(Attribute::AllocKind)) { + AllocFnKind Kind = getAllocKind(); + SmallVector parts; + if ((Kind & AllocFnKind::Alloc) != AllocFnKind::Unknown) + parts.push_back("alloc"); + if ((Kind & AllocFnKind::Realloc) != AllocFnKind::Unknown) + parts.push_back("realloc"); + if ((Kind & AllocFnKind::Free) != AllocFnKind::Unknown) + parts.push_back("free"); + if ((Kind & AllocFnKind::Uninitialized) != AllocFnKind::Unknown) + parts.push_back("uninitialized"); + if ((Kind & AllocFnKind::Zeroed) != AllocFnKind::Unknown) + parts.push_back("zeroed"); + if ((Kind & AllocFnKind::Aligned) != AllocFnKind::Unknown) + parts.push_back("aligned"); + return ("allockind(\"" + + Twine(llvm::join(parts.begin(), parts.end(), ",")) + "\")") .str(); } @@ -710,6 +756,14 @@ Optional AttributeSet::getVScaleRangeMax() const { return SetNode ? SetNode->getVScaleRangeMax() : None; } +UWTableKind AttributeSet::getUWTableKind() const { + return SetNode ? SetNode->getUWTableKind() : UWTableKind::None; +} + +AllocFnKind AttributeSet::getAllocKind() const { + return SetNode ? SetNode->getAllocKind() : AllocFnKind::Unknown; +} + std::string AttributeSet::getAsString(bool InAttrGrp) const { return SetNode ? SetNode->getAsString(InAttrGrp) : ""; } @@ -876,6 +930,18 @@ Optional AttributeSetNode::getVScaleRangeMax() const { return None; } +UWTableKind AttributeSetNode::getUWTableKind() const { + if (auto A = findEnumAttribute(Attribute::UWTable)) + return A->getUWTableKind(); + return UWTableKind::None; +} + +AllocFnKind AttributeSetNode::getAllocKind() const { + if (auto A = findEnumAttribute(Attribute::AllocKind)) + return A->getAllocKind(); + return AllocFnKind::Unknown; +} + std::string AttributeSetNode::getAsString(bool InAttrGrp) const { std::string Str; for (iterator I = begin(), E = end(); I != E; ++I) { @@ -987,11 +1053,7 @@ AttributeList::get(LLVMContext &C, if (Attrs.empty()) return {}; - assert(llvm::is_sorted(Attrs, - [](const std::pair &LHS, - const std::pair &RHS) { - return LHS.first < RHS.first; - }) && + assert(llvm::is_sorted(Attrs, llvm::less_first()) && "Misordered Attributes list!"); assert(llvm::all_of(Attrs, [](const std::pair &Pair) { @@ -1024,11 +1086,7 @@ AttributeList::get(LLVMContext &C, if (Attrs.empty()) return {}; - assert(llvm::is_sorted(Attrs, - [](const std::pair &LHS, - const std::pair &RHS) { - return LHS.first < RHS.first; - }) && + assert(llvm::is_sorted(Attrs, llvm::less_first()) && "Misordered Attributes list!"); assert(llvm::none_of(Attrs, [](const std::pair &Pair) { @@ -1428,6 +1486,14 @@ AttributeList::getParamDereferenceableOrNullBytes(unsigned Index) const { return getParamAttrs(Index).getDereferenceableOrNullBytes(); } +UWTableKind AttributeList::getUWTableKind() const { + return getFnAttrs().getUWTableKind(); +} + +AllocFnKind AttributeList::getAllocKind() const { + return getFnAttrs().getAllocKind(); +} + std::string AttributeList::getAsString(unsigned Index, bool InAttrGrp) const { return getAttributes(Index).getAsString(InAttrGrp); } @@ -1649,6 +1715,16 @@ AttrBuilder &AttrBuilder::addVScaleRangeAttrFromRawRepr(uint64_t RawArgs) { return addRawIntAttr(Attribute::VScaleRange, RawArgs); } +AttrBuilder &AttrBuilder::addUWTableAttr(UWTableKind Kind) { + if (Kind == UWTableKind::None) + return *this; + return addRawIntAttr(Attribute::UWTable, uint64_t(Kind)); +} + +AttrBuilder &AttrBuilder::addAllocKindAttr(AllocFnKind Kind) { + return addRawIntAttr(Attribute::AllocKind, static_cast(Kind)); +} + Type *AttrBuilder::getTypeAttr(Attribute::AttrKind Kind) const { assert(Attribute::isTypeAttrKind(Kind) && "Not a type attribute"); Attribute A = getAttribute(Kind); @@ -1732,39 +1808,51 @@ bool AttrBuilder::operator==(const AttrBuilder &B) const { //===----------------------------------------------------------------------===// /// Which attributes cannot be applied to a type. -AttributeMask AttributeFuncs::typeIncompatible(Type *Ty) { +AttributeMask AttributeFuncs::typeIncompatible(Type *Ty, + AttributeSafetyKind ASK) { AttributeMask Incompatible; - if (!Ty->isIntegerTy()) + if (!Ty->isIntegerTy()) { // Attributes that only apply to integers. - Incompatible.addAttribute(Attribute::SExt) - .addAttribute(Attribute::ZExt); + if (ASK & ASK_SAFE_TO_DROP) + Incompatible.addAttribute(Attribute::AllocAlign); + if (ASK & ASK_UNSAFE_TO_DROP) + Incompatible.addAttribute(Attribute::SExt).addAttribute(Attribute::ZExt); + } - if (!Ty->isPointerTy()) + if (!Ty->isPointerTy()) { // Attributes that only apply to pointers. - Incompatible.addAttribute(Attribute::Nest) - .addAttribute(Attribute::NoAlias) - .addAttribute(Attribute::NoCapture) - .addAttribute(Attribute::NonNull) - .addAttribute(Attribute::ReadNone) - .addAttribute(Attribute::ReadOnly) - .addAttribute(Attribute::SwiftError) - .addAttribute(Attribute::Dereferenceable) - .addAttribute(Attribute::DereferenceableOrNull) - .addAttribute(Attribute::Preallocated) - .addAttribute(Attribute::InAlloca) - .addAttribute(Attribute::ByVal) - .addAttribute(Attribute::StructRet) - .addAttribute(Attribute::ByRef) - .addAttribute(Attribute::ElementType); - - if (!Ty->isPtrOrPtrVectorTy()) + if (ASK & ASK_SAFE_TO_DROP) + Incompatible.addAttribute(Attribute::NoAlias) + .addAttribute(Attribute::NoCapture) + .addAttribute(Attribute::NonNull) + .addAttribute(Attribute::ReadNone) + .addAttribute(Attribute::ReadOnly) + .addAttribute(Attribute::Dereferenceable) + .addAttribute(Attribute::DereferenceableOrNull); + if (ASK & ASK_UNSAFE_TO_DROP) + Incompatible.addAttribute(Attribute::Nest) + .addAttribute(Attribute::SwiftError) + .addAttribute(Attribute::Preallocated) + .addAttribute(Attribute::InAlloca) + .addAttribute(Attribute::ByVal) + .addAttribute(Attribute::StructRet) + .addAttribute(Attribute::ByRef) + .addAttribute(Attribute::ElementType) + .addAttribute(Attribute::AllocatedPointer); + } + // Attributes that only apply to pointers or vectors of pointers. - Incompatible.addAttribute(Attribute::Alignment); + if (!Ty->isPtrOrPtrVectorTy()) { + if (ASK & ASK_SAFE_TO_DROP) + Incompatible.addAttribute(Attribute::Alignment); + } // Some attributes can apply to all "values" but there are no `void` values. - if (Ty->isVoidTy()) - Incompatible.addAttribute(Attribute::NoUndef); + if (Ty->isVoidTy()) { + if (ASK & ASK_SAFE_TO_DROP) + Incompatible.addAttribute(Attribute::NoUndef); + } return Incompatible; } @@ -1976,3 +2064,14 @@ void AttributeFuncs::mergeAttributesForOutlining(Function &Base, // that aspect in the merged function. mergeFnAttrs(Base, ToMerge); } + +void AttributeFuncs::updateMinLegalVectorWidthAttr(Function &Fn, + uint64_t Width) { + Attribute Attr = Fn.getFnAttribute("min-legal-vector-width"); + if (Attr.isValid()) { + uint64_t OldWidth; + Attr.getValueAsString().getAsInteger(0, OldWidth); + if (Width > OldWidth) + Fn.addFnAttr("min-legal-vector-width", llvm::utostr(Width)); + } +} diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 11839c7572e3..75594f90c926 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/AutoUpgrade.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" @@ -575,19 +576,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { F->arg_begin()->getType()); return true; } - static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); - if (vldRegex.match(Name)) { - auto fArgs = F->getFunctionType()->params(); - SmallVector Tys(fArgs.begin(), fArgs.end()); - // Can't use Intrinsic::getDeclaration here as the return types might - // then only be structurally equal. - FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); - StringRef Suffix = - F->getContext().supportsTypedPointers() ? "p0i8" : "p0"; - NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(), - "llvm." + Name + "." + Suffix, F->getParent()); - return true; - } static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); if (vstRegex.match(Name)) { static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, @@ -760,6 +748,23 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { break; } case 'e': { + if (Name.startswith("experimental.vector.extract.")) { + rename(F); + Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::vector_extract, Tys); + return true; + } + + if (Name.startswith("experimental.vector.insert.")) { + rename(F); + auto Args = F->getFunctionType()->params(); + Type *Tys[] = {Args[0], Args[1]}; + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::vector_insert, Tys); + return true; + } + SmallVector Groups; static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+"); if (R.match(Name, &Groups)) { @@ -1016,10 +1021,35 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { if (UpgradeX86IntrinsicFunction(F, Name, NewFn)) return true; } + + auto *ST = dyn_cast(F->getReturnType()); + if (ST && (!ST->isLiteral() || ST->isPacked())) { + // Replace return type with literal non-packed struct. Only do this for + // intrinsics declared to return a struct, not for intrinsics with + // overloaded return type, in which case the exact struct type will be + // mangled into the name. + SmallVector Desc; + Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc); + if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) { + auto *FT = F->getFunctionType(); + auto *NewST = StructType::get(ST->getContext(), ST->elements()); + auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg()); + std::string Name = F->getName().str(); + rename(F); + NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(), + Name, F->getParent()); + + // The new function may also need remangling. + if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F)) + NewFn = *Result; + return true; + } + } + // Remangle our intrinsic since we upgrade the mangling auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F); if (Result != None) { - NewFn = Result.getValue(); + NewFn = *Result; return true; } @@ -1237,7 +1267,7 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, return EmitX86Select(Builder, Mask, Align, Passthru); } -static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI, +static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm) { Type *Ty = CI.getType(); unsigned VecWidth = Ty->getPrimitiveSizeInBits(); @@ -1298,7 +1328,7 @@ static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI, return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru); } -static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallInst &CI, +static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID) { Type *Ty = CI.getType(); Value *Op0 = CI.getOperand(0); @@ -1314,7 +1344,7 @@ static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallInst &CI, return Res; } -static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI, +static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight) { Type *Ty = CI.getType(); Value *Src = CI.getArgOperand(0); @@ -1341,7 +1371,7 @@ static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI, return Res; } -static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm, +static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned) { Type *Ty = CI.getType(); Value *LHS = CI.getArgOperand(0); @@ -1380,7 +1410,7 @@ static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm, return Ext; } -static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI, +static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask) { Type *Ty = CI.getType(); Value *Op0 = CI.getArgOperand(0); @@ -1459,7 +1489,7 @@ static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru); } -static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) { +static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) { Type *Ty = CI.getType(); Value *Op0 = CI.getArgOperand(0); Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty); @@ -1469,7 +1499,7 @@ static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) { return Res; } -static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) { +static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) { Type *Ty = CI.getType(); // Arguments have a vXi32 type so cast to vXi64. @@ -1521,7 +1551,7 @@ static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U))); } -static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, +static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed) { Value *Op0 = CI.getArgOperand(0); unsigned NumElts = cast(Op0->getType())->getNumElements(); @@ -1553,7 +1583,7 @@ static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, } // Replace a masked intrinsic with an older unmasked intrinsic. -static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI, +static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID) { Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID); Value *Rep = Builder.CreateCall(Intrin, @@ -1561,7 +1591,7 @@ static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI, return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2)); } -static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) { +static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) { Value* A = CI.getArgOperand(0); Value* B = CI.getArgOperand(1); Value* Src = CI.getArgOperand(2); @@ -1576,7 +1606,7 @@ static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) { } -static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) { +static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) { Value* Op = CI.getArgOperand(0); Type* ReturnOp = CI.getType(); unsigned NumElts = cast(CI.getType())->getNumElements(); @@ -1586,7 +1616,7 @@ static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) { // Replace intrinsic with unmasked version and a select. static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, - CallInst &CI, Value *&Rep) { + CallBase &CI, Value *&Rep) { Name = Name.substr(12); // Remove avx512.mask. unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits(); @@ -1834,7 +1864,7 @@ void llvm::UpgradeInlineAsmString(std::string *AsmStr) { } } -static Value *UpgradeARMIntrinsicCall(StringRef Name, CallInst *CI, Function *F, +static Value *UpgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder) { if (Name == "mve.vctp64.old") { // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the @@ -1921,12 +1951,12 @@ static Value *UpgradeARMIntrinsicCall(StringRef Name, CallInst *CI, Function *F, Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys); return Builder.CreateCall(Fn, Ops, CI->getName()); } - llvm_unreachable("Unknown function for ARM CallInst upgrade."); + llvm_unreachable("Unknown function for ARM CallBase upgrade."); } /// Upgrade a call to an old intrinsic. All argument and return casting must be /// provided to seamlessly integrate with existing context. -void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { +void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Function *F = CI->getCalledFunction(); LLVMContext &C = CI->getContext(); IRBuilder<> Builder(C); @@ -3774,7 +3804,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } else if (IsARM) { Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder); } else { - llvm_unreachable("Unknown function for CallInst upgrade."); + llvm_unreachable("Unknown function for CallBase upgrade."); } if (Rep) @@ -3783,12 +3813,33 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { return; } - const auto &DefaultCase = [&NewFn, &CI]() -> void { - // Handle generic mangling change, but nothing else - assert( - (CI->getCalledFunction()->getName() != NewFn->getName()) && - "Unknown function for CallInst upgrade and isn't just a name change"); - CI->setCalledFunction(NewFn); + const auto &DefaultCase = [&]() -> void { + if (CI->getFunctionType() == NewFn->getFunctionType()) { + // Handle generic mangling change. + assert( + (CI->getCalledFunction()->getName() != NewFn->getName()) && + "Unknown function for CallBase upgrade and isn't just a name change"); + CI->setCalledFunction(NewFn); + return; + } + + // This must be an upgrade from a named to a literal struct. + auto *OldST = cast(CI->getType()); + assert(OldST != NewFn->getReturnType() && "Return type must have changed"); + assert(OldST->getNumElements() == + cast(NewFn->getReturnType())->getNumElements() && + "Must have same number of elements"); + + SmallVector Args(CI->args()); + Value *NewCI = Builder.CreateCall(NewFn, Args); + Value *Res = PoisonValue::get(OldST); + for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) { + Value *Elem = Builder.CreateExtractValue(NewCI, Idx); + Res = Builder.CreateInsertValue(Res, Elem, Idx); + } + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); + return; }; CallInst *NewCall = nullptr; switch (NewFn->getIntrinsicID()) { @@ -3796,13 +3847,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { DefaultCase(); return; } - case Intrinsic::arm_neon_vld1: - case Intrinsic::arm_neon_vld2: - case Intrinsic::arm_neon_vld3: - case Intrinsic::arm_neon_vld4: - case Intrinsic::arm_neon_vld2lane: - case Intrinsic::arm_neon_vld3lane: - case Intrinsic::arm_neon_vld4lane: case Intrinsic::arm_neon_vst1: case Intrinsic::arm_neon_vst2: case Intrinsic::arm_neon_vst3: @@ -3885,8 +3929,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::ptr_annotation: // Upgrade from versions that lacked the annotation attribute argument. - assert(CI->arg_size() == 4 && - "Before LLVM 12.0 this intrinsic took four arguments"); + if (CI->arg_size() != 4) { + DefaultCase(); + return; + } + // Create a new call with an added null annotation attribute argument. NewCall = Builder.CreateCall( NewFn, @@ -4047,6 +4094,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), CI->getArgOperand(4)}; NewCall = Builder.CreateCall(NewFn, Args); + AttributeList OldAttrs = CI->getAttributes(); + AttributeList NewAttrs = AttributeList::get( + C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(), + {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1), + OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)}); + NewCall->setAttributes(NewAttrs); auto *MemCI = cast(NewCall); // All mem intrinsics support dest alignment. const ConstantInt *Align = cast(CI->getArgOperand(3)); @@ -4074,8 +4127,8 @@ void llvm::UpgradeCallsToIntrinsic(Function *F) { // Replace all users of the old function with the new function or new // instructions. This is not a range loop because the call is deleted. for (User *U : make_early_inc_range(F->users())) - if (CallInst *CI = dyn_cast(U)) - UpgradeIntrinsicCall(CI, NewFn); + if (CallBase *CB = dyn_cast(U)) + UpgradeIntrinsicCall(CB, NewFn); // Remove old function, no longer used, from the module. F->eraseFromParent(); @@ -4126,7 +4179,7 @@ Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, return nullptr; } -Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { +Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { if (Opc != Instruction::BitCast) return nullptr; @@ -4358,6 +4411,24 @@ bool llvm::UpgradeModuleFlags(Module &M) { } } } + + // Upgrade branch protection and return address signing module flags. The + // module flag behavior for these fields were Error and now they are Min. + if (ID->getString() == "branch-target-enforcement" || + ID->getString().startswith("sign-return-address")) { + if (auto *Behavior = + mdconst::dyn_extract_or_null(Op->getOperand(0))) { + if (Behavior->getLimitedValue() == Module::Error) { + Type *Int32Ty = Type::getInt32Ty(M.getContext()); + Metadata *Ops[3] = { + ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)), + Op->getOperand(1), Op->getOperand(2)}; + ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); + Changed = true; + } + } + } + // Upgrade Objective-C Image Info Section. Removed the whitespce in the // section name so that llvm-lto will not complain about mismatching // module flags that is functionally the same. @@ -4469,7 +4540,7 @@ namespace { // folding and other libcall simplification. The nobuiltin attribute on the // callsite has the same effect. struct StrictFPUpgradeVisitor : public InstVisitor { - StrictFPUpgradeVisitor() {} + StrictFPUpgradeVisitor() = default; void visitCallBase(CallBase &Call) { if (!Call.isStrictFP()) @@ -4492,13 +4563,6 @@ void llvm::UpgradeFunctionAttributes(Function &F) { SFPV.visit(F); } - if (F.getCallingConv() == CallingConv::X86_INTR && - !F.arg_empty() && !F.hasParamAttribute(0, Attribute::ByVal)) { - Type *ByValTy = F.getArg(0)->getType()->getPointerElementType(); - Attribute NewAttr = Attribute::getWithByValType(F.getContext(), ByValTy); - F.addParamAttr(0, NewAttr); - } - // Remove all incompatibile attributes from function. F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType())); for (auto &Arg : F.args()) @@ -4628,3 +4692,15 @@ void llvm::UpgradeAttributes(AttrBuilder &B) { B.addAttribute(Attribute::NullPointerIsValid); } } + +void llvm::UpgradeOperandBundles(std::vector &Bundles) { + + // clang.arc.attachedcall bundles are now required to have an operand. + // If they don't, it's okay to drop them entirely: when there is an operand, + // the "attachedcall" is meaningful and required, but without an operand, + // it's just a marker NOP. Dropping it merely prevents an optimization. + erase_if(Bundles, [&](OperandBundleDef &OBD) { + return OBD.getTag() == "clang.arc.attachedcall" && + OBD.inputs().empty(); + }); +} diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index 99e3afaa8ba8..f064ff503eba 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -148,12 +148,6 @@ const Module *BasicBlock::getModule() const { return getParent()->getParent(); } -const Instruction *BasicBlock::getTerminator() const { - if (InstList.empty() || !InstList.back().isTerminator()) - return nullptr; - return &InstList.back(); -} - const CallInst *BasicBlock::getTerminatingMustTailCall() const { if (InstList.empty()) return nullptr; diff --git a/llvm/lib/IR/BuiltinGCs.cpp b/llvm/lib/IR/BuiltinGCs.cpp index 31ee86383e78..e9ef034c488f 100644 --- a/llvm/lib/IR/BuiltinGCs.cpp +++ b/llvm/lib/IR/BuiltinGCs.cpp @@ -53,7 +53,7 @@ public: /// while introducing only minor runtime overhead. class ShadowStackGC : public GCStrategy { public: - ShadowStackGC() {} + ShadowStackGC() = default; }; /// A GCStrategy which serves as an example for the usage of a statepoint based diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 936b1fc2ff6f..41b4f2919221 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -16,7 +16,7 @@ // //===----------------------------------------------------------------------===// -#include "ConstantFold.h" +#include "llvm/IR/ConstantFold.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Constants.h" @@ -379,7 +379,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, opc != Instruction::AddrSpaceCast && // Do not fold bitcast (gep) with inrange index, as this loses // information. - !cast(CE)->getInRangeIndex().hasValue() && + !cast(CE)->getInRangeIndex() && // Do not fold if the gep type is a vector, as bitcasting // operand 0 of a vector gep will result in a bitcast between // different sizes. @@ -435,14 +435,8 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, if (ConstantFP *FPC = dyn_cast(V)) { bool ignored; APFloat Val = FPC->getValueAPF(); - Val.convert(DestTy->isHalfTy() ? APFloat::IEEEhalf() : - DestTy->isFloatTy() ? APFloat::IEEEsingle() : - DestTy->isDoubleTy() ? APFloat::IEEEdouble() : - DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended() : - DestTy->isFP128Ty() ? APFloat::IEEEquad() : - DestTy->isPPC_FP128Ty() ? APFloat::PPCDoubleDouble() : - APFloat::Bogus(), - APFloat::rmNearestTiesToEven, &ignored); + Val.convert(DestTy->getFltSemantics(), APFloat::rmNearestTiesToEven, + &ignored); return ConstantFP::get(V->getContext(), Val); } return nullptr; // Can't fold. @@ -683,6 +677,11 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val, if (isa(Idx)) return PoisonValue::get(Val->getType()); + // Inserting null into all zeros is still all zeros. + // TODO: This is true for undef and poison splats too. + if (isa(Val) && Elt->isNullValue()) + return Val; + ConstantInt *CIdx = dyn_cast(Idx); if (!CIdx) return nullptr; @@ -724,7 +723,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, // Undefined shuffle mask -> undefined value. if (all_of(Mask, [](int Elt) { return Elt == UndefMaskElem; })) { - return UndefValue::get(FixedVectorType::get(EltTy, MaskNumElts)); + return UndefValue::get(VectorType::get(EltTy, MaskEltCount)); } // If the mask is all zeros this is a splat, no need to go through all @@ -2036,8 +2035,18 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C, // If inbounds, we can choose an out-of-bounds pointer as a base pointer. return InBounds ? PoisonValue::get(GEPTy) : UndefValue::get(GEPTy); - Constant *Idx0 = cast(Idxs[0]); - if (Idxs.size() == 1 && (Idx0->isNullValue() || isa(Idx0))) + auto IsNoOp = [&]() { + // For non-opaque pointers having multiple indices will change the result + // type of the GEP. + if (!C->getType()->getScalarType()->isOpaquePointerTy() && Idxs.size() != 1) + return false; + + return all_of(Idxs, [](Value *Idx) { + Constant *IdxC = cast(Idx); + return IdxC->isNullValue() || isa(IdxC); + }); + }; + if (IsNoOp()) return GEPTy->isVectorTy() && !C->getType()->isVectorTy() ? ConstantVector::getSplat( cast(GEPTy)->getElementCount(), C) @@ -2090,6 +2099,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C, // i32* getelementptr ([3 x i32]* %X, i64 0, i64 0) // // Don't fold if the cast is changing address spaces. + Constant *Idx0 = cast(Idxs[0]); if (CE->isCast() && Idxs.size() > 1 && Idx0->isNullValue()) { PointerType *SrcPtrTy = dyn_cast(CE->getOperand(0)->getType()); diff --git a/llvm/lib/IR/ConstantFold.h b/llvm/lib/IR/ConstantFold.h deleted file mode 100644 index 1aa44f4d21e5..000000000000 --- a/llvm/lib/IR/ConstantFold.h +++ /dev/null @@ -1,57 +0,0 @@ -//===-- ConstantFolding.h - Internal Constant Folding Interface -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the (internal) constant folding interfaces for LLVM. These -// interfaces are used by the ConstantExpr::get* methods to automatically fold -// constants when possible. -// -// These operators may return a null object if they don't know how to perform -// the specified operation on the specified constant types. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_IR_CONSTANTFOLD_H -#define LLVM_LIB_IR_CONSTANTFOLD_H - -#include "llvm/ADT/Optional.h" -#include "llvm/IR/InstrTypes.h" - -namespace llvm { -template class ArrayRef; - class Value; - class Constant; - class Type; - - // Constant fold various types of instruction... - Constant *ConstantFoldCastInstruction( - unsigned opcode, ///< The opcode of the cast - Constant *V, ///< The source constant - Type *DestTy ///< The destination type - ); - Constant *ConstantFoldSelectInstruction(Constant *Cond, - Constant *V1, Constant *V2); - Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx); - Constant *ConstantFoldInsertElementInstruction(Constant *Val, Constant *Elt, - Constant *Idx); - Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, - ArrayRef Mask); - Constant *ConstantFoldExtractValueInstruction(Constant *Agg, - ArrayRef Idxs); - Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, - ArrayRef Idxs); - Constant *ConstantFoldUnaryInstruction(unsigned Opcode, Constant *V); - Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, - Constant *V2); - Constant *ConstantFoldCompareInstruction(CmpInst::Predicate Predicate, - Constant *C1, Constant *C2); - Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool InBounds, - Optional InRangeIndex, - ArrayRef Idxs); -} // End llvm namespace - -#endif diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp index a0f2179bddb4..9d239101d8fd 100644 --- a/llvm/lib/IR/ConstantRange.cpp +++ b/llvm/lib/IR/ConstantRange.cpp @@ -75,6 +75,24 @@ ConstantRange ConstantRange::fromKnownBits(const KnownBits &Known, return ConstantRange(Lower, Upper + 1); } +KnownBits ConstantRange::toKnownBits() const { + // TODO: We could return conflicting known bits here, but consumers are + // likely not prepared for that. + if (isEmptySet()) + return KnownBits(getBitWidth()); + + // We can only retain the top bits that are the same between min and max. + APInt Min = getUnsignedMin(); + APInt Max = getUnsignedMax(); + KnownBits Known = KnownBits::makeConstant(Min); + if (Optional DifferentBit = + APIntOps::GetMostSignificantDifferentBit(Min, Max)) { + Known.Zero.clearLowBits(*DifferentBit + 1); + Known.One.clearLowBits(*DifferentBit + 1); + } + return Known; +} + ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &CR) { if (CR.isEmptySet()) @@ -721,15 +739,23 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp, case Instruction::UIToFP: { // TODO: use input range if available auto BW = getBitWidth(); - APInt Min = APInt::getMinValue(BW).zextOrSelf(ResultBitWidth); - APInt Max = APInt::getMaxValue(BW).zextOrSelf(ResultBitWidth); + APInt Min = APInt::getMinValue(BW); + APInt Max = APInt::getMaxValue(BW); + if (ResultBitWidth > BW) { + Min = Min.zext(ResultBitWidth); + Max = Max.zext(ResultBitWidth); + } return ConstantRange(std::move(Min), std::move(Max)); } case Instruction::SIToFP: { // TODO: use input range if available auto BW = getBitWidth(); - APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(ResultBitWidth); - APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(ResultBitWidth); + APInt SMin = APInt::getSignedMinValue(BW); + APInt SMax = APInt::getSignedMaxValue(BW); + if (ResultBitWidth > BW) { + SMin = SMin.sext(ResultBitWidth); + SMax = SMax.sext(ResultBitWidth); + } return ConstantRange(std::move(SMin), std::move(SMax)); } case Instruction::FPTrunc: @@ -1212,7 +1238,10 @@ ConstantRange ConstantRange::sdiv(const ConstantRange &RHS) const { // separately by combining division results with the appropriate signs. APInt Zero = APInt::getZero(getBitWidth()); APInt SignedMin = APInt::getSignedMinValue(getBitWidth()); - ConstantRange PosFilter(APInt(getBitWidth(), 1), SignedMin); + // There are no positive 1-bit values. The 1 would get interpreted as -1. + ConstantRange PosFilter = + getBitWidth() == 1 ? getEmpty() + : ConstantRange(APInt(getBitWidth(), 1), SignedMin); ConstantRange NegFilter(SignedMin, Zero); ConstantRange PosL = intersectWith(PosFilter); ConstantRange NegL = intersectWith(NegFilter); @@ -1368,34 +1397,29 @@ ConstantRange ConstantRange::binaryNot() const { return ConstantRange(APInt::getAllOnes(getBitWidth())).sub(*this); } -ConstantRange -ConstantRange::binaryAnd(const ConstantRange &Other) const { +ConstantRange ConstantRange::binaryAnd(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) return getEmpty(); - // Use APInt's implementation of AND for single element ranges. - if (isSingleElement() && Other.isSingleElement()) - return {*getSingleElement() & *Other.getSingleElement()}; - - // TODO: replace this with something less conservative - - APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax()); - return getNonEmpty(APInt::getZero(getBitWidth()), std::move(umin) + 1); + ConstantRange KnownBitsRange = + fromKnownBits(toKnownBits() & Other.toKnownBits(), false); + ConstantRange UMinUMaxRange = + getNonEmpty(APInt::getZero(getBitWidth()), + APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax()) + 1); + return KnownBitsRange.intersectWith(UMinUMaxRange); } -ConstantRange -ConstantRange::binaryOr(const ConstantRange &Other) const { +ConstantRange ConstantRange::binaryOr(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) return getEmpty(); - // Use APInt's implementation of OR for single element ranges. - if (isSingleElement() && Other.isSingleElement()) - return {*getSingleElement() | *Other.getSingleElement()}; - - // TODO: replace this with something less conservative - - APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()); - return getNonEmpty(std::move(umax), APInt::getZero(getBitWidth())); + ConstantRange KnownBitsRange = + fromKnownBits(toKnownBits() | Other.toKnownBits(), false); + // Upper wrapped range. + ConstantRange UMaxUMinRange = + getNonEmpty(APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()), + APInt::getZero(getBitWidth())); + return KnownBitsRange.intersectWith(UMaxUMinRange); } ConstantRange ConstantRange::binaryXor(const ConstantRange &Other) const { @@ -1412,8 +1436,7 @@ ConstantRange ConstantRange::binaryXor(const ConstantRange &Other) const { if (isSingleElement() && getSingleElement()->isAllOnes()) return Other.binaryNot(); - // TODO: replace this with something less conservative - return getFull(); + return fromKnownBits(toKnownBits() ^ Other.toKnownBits(), /*IsSigned*/false); } ConstantRange diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index b862a159127f..0bf5e09d6647 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -11,12 +11,12 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/Constants.h" -#include "ConstantFold.h" #include "LLVMContextImpl.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/ConstantFold.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" @@ -27,7 +27,6 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -353,26 +352,14 @@ Constant *Constant::getNullValue(Type *Ty) { case Type::IntegerTyID: return ConstantInt::get(Ty, 0); case Type::HalfTyID: - return ConstantFP::get(Ty->getContext(), - APFloat::getZero(APFloat::IEEEhalf())); case Type::BFloatTyID: - return ConstantFP::get(Ty->getContext(), - APFloat::getZero(APFloat::BFloat())); case Type::FloatTyID: - return ConstantFP::get(Ty->getContext(), - APFloat::getZero(APFloat::IEEEsingle())); case Type::DoubleTyID: - return ConstantFP::get(Ty->getContext(), - APFloat::getZero(APFloat::IEEEdouble())); case Type::X86_FP80TyID: - return ConstantFP::get(Ty->getContext(), - APFloat::getZero(APFloat::x87DoubleExtended())); case Type::FP128TyID: - return ConstantFP::get(Ty->getContext(), - APFloat::getZero(APFloat::IEEEquad())); case Type::PPC_FP128TyID: - return ConstantFP::get(Ty->getContext(), APFloat(APFloat::PPCDoubleDouble(), - APInt::getZero(128))); + return ConstantFP::get(Ty->getContext(), + APFloat::getZero(Ty->getFltSemantics())); case Type::PointerTyID: return ConstantPointerNull::get(cast(Ty)); case Type::StructTyID: @@ -560,8 +547,6 @@ void llvm::deleteConstant(Constant *C) { delete static_cast(C); else if (isa(C)) delete static_cast(C); - else if (isa(C)) - delete static_cast(C); else if (isa(C)) delete static_cast(C); else if (isa(C)) @@ -577,38 +562,47 @@ void llvm::deleteConstant(Constant *C) { } static bool canTrapImpl(const Constant *C, - SmallPtrSetImpl &NonTrappingOps) { - assert(C->getType()->isFirstClassType() && "Cannot evaluate aggregate vals!"); - // The only thing that could possibly trap are constant exprs. + SmallPtrSetImpl &NonTrappingOps) { + assert(C->getType()->isFirstClassType() && + "Cannot evaluate non-first-class types!"); + // ConstantExpr or ConstantAggregate trap if any operands can trap. + if (isa(C) || isa(C)) { + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { + const Constant *Op = cast(C->getOperand(i)); + if (isa(Op) || isa(Op)) { + if (NonTrappingOps.insert(Op).second && canTrapImpl(Op, NonTrappingOps)) + return true; + } + } + } + + // The only leafs that can trap are constant expressions. const ConstantExpr *CE = dyn_cast(C); if (!CE) return false; - // ConstantExpr traps if any operands can trap. - for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { - if (ConstantExpr *Op = dyn_cast(CE->getOperand(i))) { - if (NonTrappingOps.insert(Op).second && canTrapImpl(Op, NonTrappingOps)) - return true; - } - } - // Otherwise, only specific operations can trap. switch (CE->getOpcode()) { default: return false; - case Instruction::UDiv: case Instruction::SDiv: - case Instruction::URem: case Instruction::SRem: - // Div and rem can trap if the RHS is not known to be non-zero. - if (!isa(CE->getOperand(1)) ||CE->getOperand(1)->isNullValue()) + // Signed div/rem can trap for SignedMin / -1. + if (!CE->getOperand(0)->isNotMinSignedValue() && + (!isa(CE->getOperand(1)) || + CE->getOperand(1)->isAllOnesValue())) return true; - return false; + LLVM_FALLTHROUGH; + case Instruction::UDiv: + case Instruction::URem: + // Div and rem can trap if the RHS is not known to be non-zero. + return !isa(CE->getOperand(1)) || + CE->getOperand(1)->isNullValue(); } } bool Constant::canTrap() const { - SmallPtrSet NonTrappingOps; + SmallPtrSet NonTrappingOps; return canTrapImpl(this, NonTrappingOps); } @@ -742,9 +736,13 @@ static bool constantIsDead(const Constant *C, bool RemoveDeadUsers) { ++I; } - if (RemoveDeadUsers) + if (RemoveDeadUsers) { + // If C is only used by metadata, it should not be preserved but should + // have its uses replaced. + ReplaceableMetadataImpl::SalvageDebugInfo(*C); const_cast(C)->destroyConstant(); - + } + return true; } @@ -1046,9 +1044,9 @@ Constant *ConstantFP::getSNaN(Type *Ty, bool Negative, APInt *Payload) { return C; } -Constant *ConstantFP::getNegativeZero(Type *Ty) { +Constant *ConstantFP::getZero(Type *Ty, bool Negative) { const fltSemantics &Semantics = Ty->getScalarType()->getFltSemantics(); - APFloat NegZero = APFloat::getZero(Semantics, /*Negative=*/true); + APFloat NegZero = APFloat::getZero(Semantics, Negative); Constant *C = get(Ty->getContext(), NegZero); if (VectorType *VTy = dyn_cast(Ty)) @@ -1057,7 +1055,6 @@ Constant *ConstantFP::getNegativeZero(Type *Ty) { return C; } - Constant *ConstantFP::getZeroValueForNegation(Type *Ty) { if (Ty->isFPOrFPVectorTy()) return getNegativeZero(Ty); @@ -1492,15 +1489,10 @@ bool ConstantExpr::isCompare() const { } bool ConstantExpr::hasIndices() const { - return getOpcode() == Instruction::ExtractValue || - getOpcode() == Instruction::InsertValue; + return getOpcode() == Instruction::InsertValue; } ArrayRef ConstantExpr::getIndices() const { - if (const ExtractValueConstantExpr *EVCE = - dyn_cast(this)) - return EVCE->Indices; - return cast(this)->Indices; } @@ -1550,8 +1542,6 @@ Constant *ConstantExpr::getWithOperands(ArrayRef Ops, Type *Ty, case Instruction::InsertValue: return ConstantExpr::getInsertValue(Ops[0], Ops[1], getIndices(), OnlyIfReducedTy); - case Instruction::ExtractValue: - return ConstantExpr::getExtractValue(Ops[0], getIndices(), OnlyIfReducedTy); case Instruction::FNeg: return ConstantExpr::getFNeg(Ops[0]); case Instruction::ShuffleVector: @@ -2065,6 +2055,17 @@ Constant *ConstantExpr::getTruncOrBitCast(Constant *C, Type *Ty) { return getTrunc(C, Ty); } +Constant *ConstantExpr::getSExtOrTrunc(Constant *C, Type *Ty) { + assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() && + "Can only sign extend/truncate integers!"); + Type *CTy = C->getType(); + if (CTy->getScalarSizeInBits() < Ty->getScalarSizeInBits()) + return getSExt(C, Ty); + if (CTy->getScalarSizeInBits() > Ty->getScalarSizeInBits()) + return getTrunc(C, Ty); + return C; +} + Constant *ConstantExpr::getPointerCast(Constant *S, Type *Ty) { assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast"); assert((Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy()) && @@ -2233,8 +2234,8 @@ Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy, "PtrToInt destination must be integer or integer vector"); assert(isa(C->getType()) == isa(DstTy)); if (isa(C->getType())) - assert(cast(C->getType())->getNumElements() == - cast(DstTy)->getNumElements() && + assert(cast(C->getType())->getElementCount() == + cast(DstTy)->getElementCount() && "Invalid cast between a different number of vector elements"); return getFoldedCast(Instruction::PtrToInt, C, DstTy, OnlyIfReduced); } @@ -2667,30 +2668,6 @@ Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val, return pImpl->ExprConstants.getOrCreate(ReqTy, Key); } -Constant *ConstantExpr::getExtractValue(Constant *Agg, ArrayRef Idxs, - Type *OnlyIfReducedTy) { - assert(Agg->getType()->isFirstClassType() && - "Tried to create extractelement operation on non-first-class type!"); - - Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs); - (void)ReqTy; - assert(ReqTy && "extractvalue indices invalid!"); - - assert(Agg->getType()->isFirstClassType() && - "Non-first-class type for constant extractvalue expression"); - if (Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs)) - return FC; - - if (OnlyIfReducedTy == ReqTy) - return nullptr; - - Constant *ArgVec[] = { Agg }; - const ConstantExprKeyType Key(Instruction::ExtractValue, ArgVec, 0, 0, Idxs); - - LLVMContextImpl *pImpl = Agg->getContext().pImpl; - return pImpl->ExprConstants.getOrCreate(ReqTy, Key); -} - Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) { assert(C->getType()->isIntOrIntVectorTy() && "Cannot NEG a nonintegral value!"); @@ -2833,7 +2810,7 @@ Constant *ConstantExpr::getExactLogBase2(Constant *C) { } Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty, - bool AllowRHSConstant) { + bool AllowRHSConstant, bool NSZ) { assert(Instruction::isBinaryOp(Opcode) && "Only binops allowed"); // Commutative opcodes: it does not matter if AllowRHSConstant is set. @@ -2848,8 +2825,7 @@ Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty, case Instruction::And: // X & -1 = X return Constant::getAllOnesValue(Ty); case Instruction::FAdd: // X + -0.0 = X - // TODO: If the fadd has 'nsz', should we return +0.0? - return ConstantFP::getNegativeZero(Ty); + return ConstantFP::getZero(Ty, !NSZ); case Instruction::FMul: // X * 1.0 = X return ConstantFP::get(Ty, 1.0); default: @@ -3544,8 +3520,6 @@ Instruction *ConstantExpr::getAsInstruction(Instruction *InsertBefore) const { case Instruction::InsertValue: return InsertValueInst::Create(Ops[0], Ops[1], getIndices(), "", InsertBefore); - case Instruction::ExtractValue: - return ExtractValueInst::Create(Ops[0], getIndices(), "", InsertBefore); case Instruction::ShuffleVector: return new ShuffleVectorInst(Ops[0], Ops[1], getShuffleMask(), "", InsertBefore); diff --git a/llvm/lib/IR/ConstantsContext.h b/llvm/lib/IR/ConstantsContext.h index 4056c5748081..21ef1c0d9f64 100644 --- a/llvm/lib/IR/ConstantsContext.h +++ b/llvm/lib/IR/ConstantsContext.h @@ -209,36 +209,6 @@ public: } }; -/// ExtractValueConstantExpr - This class is private to -/// Constants.cpp, and is used behind the scenes to implement -/// extractvalue constant exprs. -class ExtractValueConstantExpr final : public ConstantExpr { -public: - ExtractValueConstantExpr(Constant *Agg, ArrayRef IdxList, - Type *DestTy) - : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1), - Indices(IdxList.begin(), IdxList.end()) { - Op<0>() = Agg; - } - - // allocate space for exactly one operand - void *operator new(size_t S) { return User::operator new(S, 1); } - void operator delete(void *Ptr) { User::operator delete(Ptr); } - - /// Indices - These identify which value to extract. - const SmallVector Indices; - - /// Transparently provide more efficient getOperand methods. - DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); - - static bool classof(const ConstantExpr *CE) { - return CE->getOpcode() == Instruction::ExtractValue; - } - static bool classof(const Value *V) { - return isa(V) && classof(cast(V)); - } -}; - /// InsertValueConstantExpr - This class is private to /// Constants.cpp, and is used behind the scenes to implement /// insertvalue constant exprs. @@ -362,11 +332,6 @@ struct OperandTraits : public FixedNumOperandTraits {}; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value) -template <> -struct OperandTraits - : public FixedNumOperandTraits {}; -DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractValueConstantExpr, Value) - template <> struct OperandTraits : public FixedNumOperandTraits {}; @@ -620,8 +585,6 @@ public: return new ShuffleVectorConstantExpr(Ops[0], Ops[1], ShuffleMask); case Instruction::InsertValue: return new InsertValueConstantExpr(Ops[0], Ops[1], Indexes, Ty); - case Instruction::ExtractValue: - return new ExtractValueConstantExpr(Ops[0], Indexes, Ty); case Instruction::GetElementPtr: return GetElementPtrConstantExpr::Create(ExplicitTy, Ops[0], Ops.slice(1), Ty, SubclassOptionalData); diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 7ed156d552b1..4b9189ca5baa 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -115,6 +115,10 @@ void LLVMContextSetDiscardValueNames(LLVMContextRef C, LLVMBool Discard) { unwrap(C)->setDiscardValueNames(Discard); } +void LLVMContextSetOpaquePointers(LLVMContextRef C, LLVMBool OpaquePointers) { + unwrap(C)->setOpaquePointers(OpaquePointers); +} + void LLVMContextDispose(LLVMContextRef C) { delete unwrap(C); } @@ -534,6 +538,8 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) { return LLVMTokenTypeKind; case Type::ScalableVectorTyID: return LLVMScalableVectorTypeKind; + case Type::DXILPointerTyID: + llvm_unreachable("DXIL pointers are unsupported via the C API"); } llvm_unreachable("Unhandled TypeID."); } @@ -786,6 +792,10 @@ LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace) { return wrap(PointerType::get(unwrap(ElementType), AddressSpace)); } +LLVMBool LLVMPointerTypeIsOpaque(LLVMTypeRef Ty) { + return unwrap(Ty)->isOpaquePointerTy(); +} + LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount) { return wrap(FixedVectorType::get(unwrap(ElementType), ElementCount)); } @@ -798,7 +808,7 @@ LLVMTypeRef LLVMScalableVectorType(LLVMTypeRef ElementType, LLVMTypeRef LLVMGetElementType(LLVMTypeRef WrappedTy) { auto *Ty = unwrap(WrappedTy); if (auto *PTy = dyn_cast(Ty)) - return wrap(PTy->getPointerElementType()); + return wrap(PTy->getNonOpaquePointerElementType()); if (auto *ATy = dyn_cast(Ty)) return wrap(ATy->getElementType()); return wrap(cast(Ty)->getElementType()); @@ -822,6 +832,10 @@ unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) { /*--.. Operations on other types ...........................................--*/ +LLVMTypeRef LLVMPointerTypeInContext(LLVMContextRef C, unsigned AddressSpace) { + return wrap(PointerType::get(*unwrap(C), AddressSpace)); +} + LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C) { return wrap(Type::getVoidTy(*unwrap(C))); } @@ -1431,6 +1445,10 @@ LLVMValueRef LLVMConstString(const char *Str, unsigned Length, DontNullTerminate); } +LLVMValueRef LLVMGetAggregateElement(LLVMValueRef C, unsigned Idx) { + return wrap(unwrap(C)->getAggregateElement(Idx)); +} + LLVMValueRef LLVMGetElementAsConstant(LLVMValueRef C, unsigned idx) { return wrap(unwrap(C)->getElementAsConstant(idx)); } @@ -1857,12 +1875,6 @@ LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant, IntMask)); } -LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList, - unsigned NumIdx) { - return wrap(ConstantExpr::getExtractValue(unwrap(AggConstant), - makeArrayRef(IdxList, NumIdx))); -} - LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, LLVMValueRef ElementValueConstant, unsigned *IdxList, unsigned NumIdx) { @@ -2061,13 +2073,13 @@ LLVMTypeRef LLVMGlobalGetValueType(LLVMValueRef Global) { unsigned LLVMGetAlignment(LLVMValueRef V) { Value *P = unwrap(V); if (GlobalObject *GV = dyn_cast(P)) - return GV->getAlignment(); + return GV->getAlign() ? GV->getAlign()->value() : 0; if (AllocaInst *AI = dyn_cast(P)) - return AI->getAlignment(); + return AI->getAlign().value(); if (LoadInst *LI = dyn_cast(P)) - return LI->getAlignment(); + return LI->getAlign().value(); if (StoreInst *SI = dyn_cast(P)) - return SI->getAlignment(); + return SI->getAlign().value(); if (AtomicRMWInst *RMWI = dyn_cast(P)) return RMWI->getAlign().value(); if (AtomicCmpXchgInst *CXI = dyn_cast(P)) @@ -3919,6 +3931,12 @@ LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val, return wrap(unwrap(B)->CreateFPCast(unwrap(Val), unwrap(DestTy), Name)); } +LLVMOpcode LLVMGetCastOpcode(LLVMValueRef Src, LLVMBool SrcIsSigned, + LLVMTypeRef DestTy, LLVMBool DestIsSigned) { + return map_to_llvmopcode(CastInst::getCastOpcode( + unwrap(Src), SrcIsSigned, unwrap(DestTy), DestIsSigned)); +} + /*--.. Comparisons .........................................................--*/ LLVMValueRef LLVMBuildICmp(LLVMBuilderRef B, LLVMIntPredicate Op, diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index dc5768dd4f26..34ffc9425281 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -19,7 +19,6 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" using namespace llvm; using namespace llvm::dwarf; @@ -293,6 +292,22 @@ DIStringType *DIBuilder::createStringType(StringRef Name, uint64_t SizeInBits) { SizeInBits, 0); } +DIStringType *DIBuilder::createStringType(StringRef Name, + DIVariable *StringLength, + DIExpression *StrLocationExp) { + assert(!Name.empty() && "Unable to create type without name"); + return DIStringType::get(VMContext, dwarf::DW_TAG_string_type, Name, + StringLength, nullptr, StrLocationExp, 0, 0, 0); +} + +DIStringType *DIBuilder::createStringType(StringRef Name, + DIExpression *StringLengthExp, + DIExpression *StrLocationExp) { + assert(!Name.empty() && "Unable to create type without name"); + return DIStringType::get(VMContext, dwarf::DW_TAG_string_type, Name, nullptr, + StringLengthExp, StrLocationExp, 0, 0, 0); +} + DIDerivedType *DIBuilder::createQualifiedType(unsigned Tag, DIType *FromTy) { return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, FromTy, 0, 0, 0, None, DINode::FlagZero); @@ -831,14 +846,15 @@ DISubprogram *DIBuilder::createFunction( unsigned LineNo, DISubroutineType *Ty, unsigned ScopeLine, DINode::DIFlags Flags, DISubprogram::DISPFlags SPFlags, DITemplateParameterArray TParams, DISubprogram *Decl, - DITypeArray ThrownTypes, DINodeArray Annotations) { + DITypeArray ThrownTypes, DINodeArray Annotations, + StringRef TargetFuncName) { bool IsDefinition = SPFlags & DISubprogram::SPFlagDefinition; auto *Node = getSubprogram( /*IsDistinct=*/IsDefinition, VMContext, getNonCompileUnitScope(Context), Name, LinkageName, File, LineNo, Ty, ScopeLine, nullptr, 0, 0, Flags, SPFlags, IsDefinition ? CUNode : nullptr, TParams, Decl, MDTuple::getTemporary(VMContext, None).release(), ThrownTypes, - Annotations); + Annotations, TargetFuncName); if (IsDefinition) AllSubprograms.push_back(Node); diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index b9fc5261fefe..50799327c78a 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -15,6 +15,7 @@ #include "MetadataImpl.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" @@ -26,7 +27,7 @@ using namespace llvm; namespace llvm { // Use FS-AFDO discriminator. cl::opt EnableFSDiscriminator( - "enable-fs-discriminator", cl::Hidden, cl::init(false), + "enable-fs-discriminator", cl::Hidden, cl::desc("Enable adding flow sensitive discriminators")); } // namespace llvm @@ -77,8 +78,8 @@ DILocation *DILocation::getImpl(LLVMContext &Context, unsigned Line, Ops.push_back(Scope); if (InlinedAt) Ops.push_back(InlinedAt); - return storeImpl(new (Ops.size()) DILocation(Context, Storage, Line, Column, - Ops, ImplicitCode), + return storeImpl(new (Ops.size(), Storage) DILocation( + Context, Storage, Line, Column, Ops, ImplicitCode), Storage, Context.pImpl->DILocations); } @@ -180,6 +181,7 @@ void DILocation::decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF, CI = getUnsignedFromPrefixEncoding( getNextComponentInDiscriminator(getNextComponentInDiscriminator(D))); } +dwarf::Tag DINode::getTag() const { return (dwarf::Tag)SubclassData16; } DINode::DIFlags DINode::getFlag(StringRef Flag) { return StringSwitch(Flag) @@ -282,6 +284,7 @@ static bool isCanonical(const MDString *S) { } #endif +dwarf::Tag GenericDINode::getTag() const { return (dwarf::Tag)SubclassData16; } GenericDINode *GenericDINode::getImpl(LLVMContext &Context, unsigned Tag, MDString *Header, ArrayRef DwarfOps, @@ -301,7 +304,7 @@ GenericDINode *GenericDINode::getImpl(LLVMContext &Context, unsigned Tag, // Use a nullptr for empty headers. assert(isCanonical(Header) && "Expected canonical MDString"); Metadata *PreOps[] = {Header}; - return storeImpl(new (DwarfOps.size() + 1) GenericDINode( + return storeImpl(new (DwarfOps.size() + 1, Storage) GenericDINode( Context, Storage, Hash, Tag, PreOps, DwarfOps), Storage, Context.pImpl->GenericDINodes); } @@ -326,20 +329,25 @@ void GenericDINode::recalculateHash() { } \ } while (false) #define DEFINE_GETIMPL_STORE(CLASS, ARGS, OPS) \ - return storeImpl(new (array_lengthof(OPS)) \ + return storeImpl(new (array_lengthof(OPS), Storage) \ CLASS(Context, Storage, UNWRAP_ARGS(ARGS), OPS), \ Storage, Context.pImpl->CLASS##s) #define DEFINE_GETIMPL_STORE_NO_OPS(CLASS, ARGS) \ - return storeImpl(new (0u) CLASS(Context, Storage, UNWRAP_ARGS(ARGS)), \ + return storeImpl(new (0u, Storage) \ + CLASS(Context, Storage, UNWRAP_ARGS(ARGS)), \ Storage, Context.pImpl->CLASS##s) #define DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(CLASS, OPS) \ - return storeImpl(new (array_lengthof(OPS)) CLASS(Context, Storage, OPS), \ + return storeImpl(new (array_lengthof(OPS), Storage) \ + CLASS(Context, Storage, OPS), \ Storage, Context.pImpl->CLASS##s) #define DEFINE_GETIMPL_STORE_N(CLASS, ARGS, OPS, NUM_OPS) \ - return storeImpl(new (NUM_OPS) \ + return storeImpl(new (NUM_OPS, Storage) \ CLASS(Context, Storage, UNWRAP_ARGS(ARGS), OPS), \ Storage, Context.pImpl->CLASS##s) +DISubrange::DISubrange(LLVMContext &C, StorageType Storage, + ArrayRef Ops) + : DINode(C, DISubrangeKind, Storage, dwarf::DW_TAG_subrange_type, Ops) {} DISubrange *DISubrange::getImpl(LLVMContext &Context, int64_t Count, int64_t Lo, StorageType Storage, bool ShouldCreate) { auto *CountNode = ConstantAsMetadata::get( @@ -450,6 +458,10 @@ DISubrange::BoundType DISubrange::getStride() const { return BoundType(); } +DIGenericSubrange::DIGenericSubrange(LLVMContext &C, StorageType Storage, + ArrayRef Ops) + : DINode(C, DIGenericSubrangeKind, Storage, dwarf::DW_TAG_generic_subrange, + Ops) {} DIGenericSubrange *DIGenericSubrange::getImpl(LLVMContext &Context, Metadata *CountNode, Metadata *LB, @@ -529,6 +541,13 @@ DIGenericSubrange::BoundType DIGenericSubrange::getStride() const { return BoundType(); } +DIEnumerator::DIEnumerator(LLVMContext &C, StorageType Storage, + const APInt &Value, bool IsUnsigned, + ArrayRef Ops) + : DINode(C, DIEnumeratorKind, Storage, dwarf::DW_TAG_enumerator, Ops), + Value(Value) { + SubclassData32 = IsUnsigned; +} DIEnumerator *DIEnumerator::getImpl(LLVMContext &Context, const APInt &Value, bool IsUnsigned, MDString *Name, StorageType Storage, bool ShouldCreate) { @@ -580,6 +599,36 @@ DIStringType *DIStringType::getImpl(LLVMContext &Context, unsigned Tag, DEFINE_GETIMPL_STORE(DIStringType, (Tag, SizeInBits, AlignInBits, Encoding), Ops); } +DIType *DIDerivedType::getClassType() const { + assert(getTag() == dwarf::DW_TAG_ptr_to_member_type); + return cast_or_null(getExtraData()); +} +uint32_t DIDerivedType::getVBPtrOffset() const { + assert(getTag() == dwarf::DW_TAG_inheritance); + if (auto *CM = cast_or_null(getExtraData())) + if (auto *CI = dyn_cast_or_null(CM->getValue())) + return static_cast(CI->getZExtValue()); + return 0; +} +Constant *DIDerivedType::getStorageOffsetInBits() const { + assert(getTag() == dwarf::DW_TAG_member && isBitField()); + if (auto *C = cast_or_null(getExtraData())) + return C->getValue(); + return nullptr; +} + +Constant *DIDerivedType::getConstant() const { + assert(getTag() == dwarf::DW_TAG_member && isStaticMember()); + if (auto *C = cast_or_null(getExtraData())) + return C->getValue(); + return nullptr; +} +Constant *DIDerivedType::getDiscriminantValue() const { + assert(getTag() == dwarf::DW_TAG_member && !isStaticMember()); + if (auto *C = cast_or_null(getExtraData())) + return C->getValue(); + return nullptr; +} DIDerivedType *DIDerivedType::getImpl( LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File, @@ -701,6 +750,12 @@ DICompositeType *DICompositeType::getODRTypeIfExists(LLVMContext &Context, return nullptr; return Context.pImpl->DITypeMap->lookup(&Identifier); } +DISubroutineType::DISubroutineType(LLVMContext &C, StorageType Storage, + DIFlags Flags, uint8_t CC, + ArrayRef Ops) + : DIType(C, DISubroutineTypeKind, Storage, dwarf::DW_TAG_subroutine_type, 0, + 0, 0, 0, Flags, Ops), + CC(CC) {} DISubroutineType *DISubroutineType::getImpl(LLVMContext &Context, DIFlags Flags, uint8_t CC, Metadata *TypeArray, @@ -711,6 +766,12 @@ DISubroutineType *DISubroutineType::getImpl(LLVMContext &Context, DIFlags Flags, DEFINE_GETIMPL_STORE(DISubroutineType, (Flags, CC), Ops); } +DIFile::DIFile(LLVMContext &C, StorageType Storage, + Optional> CS, Optional Src, + ArrayRef Ops) + : DIScope(C, DIFileKind, Storage, dwarf::DW_TAG_file_type, Ops), + Checksum(CS), Source(Src) {} + // FIXME: Implement this string-enum correspondence with a .def file and macros, // so that the association is explicit rather than implied. static const char *ChecksumKindName[DIFile::CSK_Last] = { @@ -746,9 +807,23 @@ DIFile *DIFile::getImpl(LLVMContext &Context, MDString *Filename, assert((!Source || isCanonical(*Source)) && "Expected canonical MDString"); DEFINE_GETIMPL_LOOKUP(DIFile, (Filename, Directory, CS, Source)); Metadata *Ops[] = {Filename, Directory, CS ? CS->Value : nullptr, - Source.getValueOr(nullptr)}; + Source.value_or(nullptr)}; DEFINE_GETIMPL_STORE(DIFile, (CS, Source), Ops); } +DICompileUnit::DICompileUnit(LLVMContext &C, StorageType Storage, + unsigned SourceLanguage, bool IsOptimized, + unsigned RuntimeVersion, unsigned EmissionKind, + uint64_t DWOId, bool SplitDebugInlining, + bool DebugInfoForProfiling, unsigned NameTableKind, + bool RangesBaseAddress, ArrayRef Ops) + : DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops), + SourceLanguage(SourceLanguage), IsOptimized(IsOptimized), + RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind), DWOId(DWOId), + SplitDebugInlining(SplitDebugInlining), + DebugInfoForProfiling(DebugInfoForProfiling), + NameTableKind(NameTableKind), RangesBaseAddress(RangesBaseAddress) { + assert(Storage != Uniqued); +} DICompileUnit *DICompileUnit::getImpl( LLVMContext &Context, unsigned SourceLanguage, Metadata *File, @@ -775,7 +850,7 @@ DICompileUnit *DICompileUnit::getImpl( Macros, SysRoot, SDK}; - return storeImpl(new (array_lengthof(Ops)) DICompileUnit( + return storeImpl(new (array_lengthof(Ops), Storage) DICompileUnit( Context, Storage, SourceLanguage, IsOptimized, RuntimeVersion, EmissionKind, DWOId, SplitDebugInlining, DebugInfoForProfiling, NameTableKind, RangesBaseAddress, @@ -827,6 +902,30 @@ const char *DICompileUnit::nameTableKindString(DebugNameTableKind NTK) { } return nullptr; } +DISubprogram::DISubprogram(LLVMContext &C, StorageType Storage, unsigned Line, + unsigned ScopeLine, unsigned VirtualIndex, + int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags, + ArrayRef Ops) + : DILocalScope(C, DISubprogramKind, Storage, dwarf::DW_TAG_subprogram, Ops), + Line(Line), ScopeLine(ScopeLine), VirtualIndex(VirtualIndex), + ThisAdjustment(ThisAdjustment), Flags(Flags), SPFlags(SPFlags) { + static_assert(dwarf::DW_VIRTUALITY_max < 4, "Virtuality out of range"); +} +DISubprogram::DISPFlags +DISubprogram::toSPFlags(bool IsLocalToUnit, bool IsDefinition, bool IsOptimized, + unsigned Virtuality, bool IsMainSubprogram) { + // We're assuming virtuality is the low-order field. + static_assert(int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) && + int(SPFlagPureVirtual) == + int(dwarf::DW_VIRTUALITY_pure_virtual), + "Virtuality constant mismatch"); + return static_cast( + (Virtuality & SPFlagVirtuality) | + (IsLocalToUnit ? SPFlagLocalToUnit : SPFlagZero) | + (IsDefinition ? SPFlagDefinition : SPFlagZero) | + (IsOptimized ? SPFlagOptimized : SPFlagZero) | + (IsMainSubprogram ? SPFlagMainSubprogram : SPFlagZero)); +} DISubprogram *DILocalScope::getSubprogram() const { if (auto *Block = dyn_cast(this)) @@ -881,27 +980,33 @@ DISubprogram *DISubprogram::getImpl( unsigned ScopeLine, Metadata *ContainingType, unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags, Metadata *Unit, Metadata *TemplateParams, Metadata *Declaration, Metadata *RetainedNodes, - Metadata *ThrownTypes, Metadata *Annotations, StorageType Storage, - bool ShouldCreate) { + Metadata *ThrownTypes, Metadata *Annotations, MDString *TargetFuncName, + StorageType Storage, bool ShouldCreate) { assert(isCanonical(Name) && "Expected canonical MDString"); assert(isCanonical(LinkageName) && "Expected canonical MDString"); + assert(isCanonical(TargetFuncName) && "Expected canonical MDString"); DEFINE_GETIMPL_LOOKUP(DISubprogram, (Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType, VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams, Declaration, - RetainedNodes, ThrownTypes, Annotations)); - SmallVector Ops = { + RetainedNodes, ThrownTypes, Annotations, + TargetFuncName)); + SmallVector Ops = { File, Scope, Name, LinkageName, Type, Unit, Declaration, RetainedNodes, - ContainingType, TemplateParams, ThrownTypes, Annotations}; - if (!Annotations) { + ContainingType, TemplateParams, ThrownTypes, Annotations, + TargetFuncName}; + if (!TargetFuncName) { Ops.pop_back(); - if (!ThrownTypes) { + if (!Annotations) { Ops.pop_back(); - if (!TemplateParams) { + if (!ThrownTypes) { Ops.pop_back(); - if (!ContainingType) + if (!TemplateParams) { Ops.pop_back(); + if (!ContainingType) + Ops.pop_back(); + } } } } @@ -915,6 +1020,10 @@ bool DISubprogram::describes(const Function *F) const { assert(F && "Invalid function"); return F->getSubprogram() == this; } +DILexicalBlockBase::DILexicalBlockBase(LLVMContext &C, unsigned ID, + StorageType Storage, + ArrayRef Ops) + : DILocalScope(C, ID, Storage, dwarf::DW_TAG_lexical_block, Ops) {} DILexicalBlock *DILexicalBlock::getImpl(LLVMContext &Context, Metadata *Scope, Metadata *File, unsigned Line, @@ -940,6 +1049,10 @@ DILexicalBlockFile *DILexicalBlockFile::getImpl(LLVMContext &Context, DEFINE_GETIMPL_STORE(DILexicalBlockFile, (Discriminator), Ops); } +DINamespace::DINamespace(LLVMContext &Context, StorageType Storage, + bool ExportSymbols, ArrayRef Ops) + : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace, Ops), + ExportSymbols(ExportSymbols) {} DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, bool ExportSymbols, StorageType Storage, bool ShouldCreate) { @@ -950,6 +1063,11 @@ DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope, DEFINE_GETIMPL_STORE(DINamespace, (ExportSymbols), Ops); } +DICommonBlock::DICommonBlock(LLVMContext &Context, StorageType Storage, + unsigned LineNo, ArrayRef Ops) + : DIScope(Context, DICommonBlockKind, Storage, dwarf::DW_TAG_common_block, + Ops), + LineNo(LineNo) {} DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope, Metadata *Decl, MDString *Name, Metadata *File, unsigned LineNo, @@ -961,6 +1079,10 @@ DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope, DEFINE_GETIMPL_STORE(DICommonBlock, (LineNo), Ops); } +DIModule::DIModule(LLVMContext &Context, StorageType Storage, unsigned LineNo, + bool IsDecl, ArrayRef Ops) + : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops), + LineNo(LineNo), IsDecl(IsDecl) {} DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *File, Metadata *Scope, MDString *Name, MDString *ConfigurationMacros, @@ -974,6 +1096,13 @@ DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *File, IncludePath, APINotesFile}; DEFINE_GETIMPL_STORE(DIModule, (LineNo, IsDecl), Ops); } +DITemplateTypeParameter::DITemplateTypeParameter(LLVMContext &Context, + StorageType Storage, + bool IsDefault, + ArrayRef Ops) + : DITemplateParameter(Context, DITemplateTypeParameterKind, Storage, + dwarf::DW_TAG_template_type_parameter, IsDefault, + Ops) {} DITemplateTypeParameter * DITemplateTypeParameter::getImpl(LLVMContext &Context, MDString *Name, @@ -1039,6 +1168,11 @@ DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, DEFINE_GETIMPL_STORE(DILocalVariable, (Line, Arg, Flags, AlignInBits), Ops); } +DIVariable::DIVariable(LLVMContext &C, unsigned ID, StorageType Storage, + signed Line, ArrayRef Ops, + uint32_t AlignInBits) + : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line), + AlignInBits(AlignInBits) {} Optional DIVariable::getSizeInBits() const { // This is used by the Verifier so be mindful of broken types. const Metadata *RawType = getRawType(); @@ -1062,6 +1196,9 @@ Optional DIVariable::getSizeInBits() const { return None; } +DILabel::DILabel(LLVMContext &C, StorageType Storage, unsigned Line, + ArrayRef Ops) + : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops), Line(Line) {} DILabel *DILabel::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, Metadata *File, unsigned Line, StorageType Storage, bool ShouldCreate) { @@ -1078,6 +1215,12 @@ DIExpression *DIExpression::getImpl(LLVMContext &Context, DEFINE_GETIMPL_LOOKUP(DIExpression, (Elements)); DEFINE_GETIMPL_STORE_NO_OPS(DIExpression, (Elements)); } +bool DIExpression::isEntryValue() const { + return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_LLVM_entry_value; +} +bool DIExpression::startsWithDeref() const { + return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_deref; +} unsigned DIExpression::ExprOperand::getSize() const { uint64_t Op = getOp(); @@ -1439,7 +1582,7 @@ DIExpression *DIExpression::appendToStack(const DIExpression *Expr, // // Match .* DW_OP_stack_value (DW_OP_LLVM_fragment A B)?. Optional FI = Expr->getFragmentInfo(); - unsigned DropUntilStackValue = FI.hasValue() ? 3 : 0; + unsigned DropUntilStackValue = FI ? 3 : 0; ArrayRef ExprOpsBeforeFragment = Expr->getElements().drop_back(DropUntilStackValue); bool NeedsDeref = (Expr->getNumElements() > DropUntilStackValue) && @@ -1597,6 +1740,11 @@ DIGlobalVariableExpression::getImpl(LLVMContext &Context, Metadata *Variable, Metadata *Ops[] = {Variable, Expression}; DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(DIGlobalVariableExpression, Ops); } +DIObjCProperty::DIObjCProperty(LLVMContext &C, StorageType Storage, + unsigned Line, unsigned Attributes, + ArrayRef Ops) + : DINode(C, DIObjCPropertyKind, Storage, dwarf::DW_TAG_APPLE_property, Ops), + Line(Line), Attributes(Attributes) {} DIObjCProperty *DIObjCProperty::getImpl( LLVMContext &Context, MDString *Name, Metadata *File, unsigned Line, diff --git a/llvm/lib/IR/DiagnosticHandler.cpp b/llvm/lib/IR/DiagnosticHandler.cpp index 7b40728a34e8..683eade50291 100644 --- a/llvm/lib/IR/DiagnosticHandler.cpp +++ b/llvm/lib/IR/DiagnosticHandler.cpp @@ -47,8 +47,7 @@ static cl::opt> PassRemarks( "pass-remarks", cl::value_desc("pattern"), cl::desc("Enable optimization remarks from passes whose name match " "the given regular expression"), - cl::Hidden, cl::location(PassRemarksPassedOptLoc), cl::ValueRequired, - cl::ZeroOrMore); + cl::Hidden, cl::location(PassRemarksPassedOptLoc), cl::ValueRequired); // -pass-remarks-missed // Command line flag to enable emitOptimizationRemarkMissed() @@ -56,8 +55,7 @@ static cl::opt> PassRemarksMissed( "pass-remarks-missed", cl::value_desc("pattern"), cl::desc("Enable missed optimization remarks from passes whose name match " "the given regular expression"), - cl::Hidden, cl::location(PassRemarksMissedOptLoc), cl::ValueRequired, - cl::ZeroOrMore); + cl::Hidden, cl::location(PassRemarksMissedOptLoc), cl::ValueRequired); // -pass-remarks-analysis // Command line flag to enable emitOptimizationRemarkAnalysis() @@ -67,8 +65,7 @@ static cl::opt> cl::desc( "Enable optimization analysis remarks from passes whose name match " "the given regular expression"), - cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired, - cl::ZeroOrMore); + cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired); } bool DiagnosticHandler::isAnalysisRemarkEnabled(StringRef PassName) const { diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp index f46f0fdd947d..50fe6829ad86 100644 --- a/llvm/lib/IR/DiagnosticInfo.cpp +++ b/llvm/lib/IR/DiagnosticInfo.cpp @@ -393,6 +393,17 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const { return OS.str(); } +DiagnosticInfoMisExpect::DiagnosticInfoMisExpect(const Instruction *Inst, + Twine &Msg) + : DiagnosticInfoWithLocationBase(DK_MisExpect, DS_Warning, + *Inst->getParent()->getParent(), + Inst->getDebugLoc()), + Msg(Msg) {} + +void DiagnosticInfoMisExpect::print(DiagnosticPrinter &DP) const { + DP << getLocationStr() << ": " << getMsg(); +} + void OptimizationRemarkAnalysisFPCommute::anchor() {} void OptimizationRemarkAnalysisAliasing::anchor() {} diff --git a/llvm/lib/IR/Dominators.cpp b/llvm/lib/IR/Dominators.cpp index aac8936c7bd6..09be2a8ef605 100644 --- a/llvm/lib/IR/Dominators.cpp +++ b/llvm/lib/IR/Dominators.cpp @@ -25,7 +25,6 @@ #include "llvm/PassRegistry.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/llvm/lib/IR/FPEnv.cpp b/llvm/lib/IR/FPEnv.cpp index c6e0938e71a6..48ee84080e98 100644 --- a/llvm/lib/IR/FPEnv.cpp +++ b/llvm/lib/IR/FPEnv.cpp @@ -14,6 +14,9 @@ #include "llvm/IR/FPEnv.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" namespace llvm { @@ -82,4 +85,46 @@ convertExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) { } return ExceptStr; } + +Intrinsic::ID getConstrainedIntrinsicID(const Instruction &Instr) { + Intrinsic::ID IID = Intrinsic::not_intrinsic; + switch (Instr.getOpcode()) { + case Instruction::FCmp: + // Unlike other instructions FCmp can be mapped to one of two intrinsic + // functions. We choose the non-signaling variant. + IID = Intrinsic::experimental_constrained_fcmp; + break; + + // Instructions +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ + case Instruction::NAME: \ + IID = Intrinsic::INTRINSIC; \ + break; +#define FUNCTION(NAME, NARG, ROUND_MODE, INTRINSIC) +#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) +#include "llvm/IR/ConstrainedOps.def" + + // Intrinsic calls. + case Instruction::Call: + if (auto *IntrinCall = dyn_cast(&Instr)) { + switch (IntrinCall->getIntrinsicID()) { +#define FUNCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ + case Intrinsic::NAME: \ + IID = Intrinsic::INTRINSIC; \ + break; +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) +#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) +#include "llvm/IR/ConstrainedOps.def" + default: + break; + } + } + break; + default: + break; + } + + return IID; +} + } // namespace llvm diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 726ba80da41b..53df94366760 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsBPF.h" +#include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/IntrinsicsHexagon.h" #include "llvm/IR/IntrinsicsMips.h" #include "llvm/IR/IntrinsicsNVPTX.h" @@ -339,8 +340,9 @@ Function *Function::createWithDefaultAttr(FunctionType *Ty, Module *M) { auto *F = new Function(Ty, Linkage, AddrSpace, N, M); AttrBuilder B(F->getContext()); - if (M->getUwtable()) - B.addAttribute(Attribute::UWTable); + UWTableKind UWTable = M->getUwtable(); + if (UWTable != UWTableKind::None) + B.addUWTableAttr(UWTable); switch (M->getFramePointer()) { case FramePointerKind::None: // 0 ("none") is the default. @@ -926,25 +928,25 @@ std::string Intrinsic::getNameNoUnnamedTypes(ID Id, ArrayRef Tys) { enum IIT_Info { // Common values should be encoded with 0-15. IIT_Done = 0, - IIT_I1 = 1, - IIT_I8 = 2, - IIT_I16 = 3, - IIT_I32 = 4, - IIT_I64 = 5, - IIT_F16 = 6, - IIT_F32 = 7, - IIT_F64 = 8, - IIT_V2 = 9, - IIT_V4 = 10, - IIT_V8 = 11, - IIT_V16 = 12, - IIT_V32 = 13, - IIT_PTR = 14, - IIT_ARG = 15, + IIT_I1 = 1, + IIT_I8 = 2, + IIT_I16 = 3, + IIT_I32 = 4, + IIT_I64 = 5, + IIT_F16 = 6, + IIT_F32 = 7, + IIT_F64 = 8, + IIT_V2 = 9, + IIT_V4 = 10, + IIT_V8 = 11, + IIT_V16 = 12, + IIT_V32 = 13, + IIT_PTR = 14, + IIT_ARG = 15, // Values from 16+ are only encodable with the inefficient encoding. - IIT_V64 = 16, - IIT_MMX = 17, + IIT_V64 = 16, + IIT_MMX = 17, IIT_TOKEN = 18, IIT_METADATA = 19, IIT_EMPTYSTRUCT = 20, @@ -955,7 +957,7 @@ enum IIT_Info { IIT_EXTEND_ARG = 25, IIT_TRUNC_ARG = 26, IIT_ANYPTR = 27, - IIT_V1 = 28, + IIT_V1 = 28, IIT_VARARG = 29, IIT_HALF_VEC_ARG = 30, IIT_SAME_VEC_WIDTH_ARG = 31, @@ -978,11 +980,14 @@ enum IIT_Info { IIT_BF16 = 48, IIT_STRUCT9 = 49, IIT_V256 = 50, - IIT_AMX = 51, + IIT_AMX = 51, IIT_PPCF128 = 52, IIT_V3 = 53, IIT_EXTERNREF = 54, - IIT_FUNCREF = 55 + IIT_FUNCREF = 55, + IIT_ANYPTR_TO_ELT = 56, + IIT_I2 = 57, + IIT_I4 = 58, }; static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, @@ -1035,6 +1040,12 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, case IIT_I1: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 1)); return; + case IIT_I2: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 2)); + return; + case IIT_I4: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 4)); + return; case IIT_I8: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 8)); return; @@ -1156,6 +1167,13 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToElt, ArgInfo)); return; } + case IIT_ANYPTR_TO_ELT: { + unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back( + IITDescriptor::get(IITDescriptor::AnyPtrToElt, ArgNo, RefNo)); + return; + } case IIT_VEC_OF_ANYPTRS_TO_ELT: { unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); @@ -1347,6 +1365,9 @@ static Type *DecodeFixedType(ArrayRef &Infos, case IITDescriptor::VecOfAnyPtrsToElt: // Return the overloaded type (which determines the pointers address space) return Tys[D.getOverloadArgNumber()]; + case IITDescriptor::AnyPtrToElt: + // Return the overloaded type (which determines the pointers address space) + return Tys[D.getOverloadArgNumber()]; } llvm_unreachable("unhandled"); } @@ -1406,10 +1427,10 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef Tys) { .getCallee()); } -// This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method. -#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +// This defines the "Intrinsic::getIntrinsicForClangBuiltin()" method. +#define GET_LLVM_INTRINSIC_FOR_CLANG_BUILTIN #include "llvm/IR/IntrinsicImpl.inc" -#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +#undef GET_LLVM_INTRINSIC_FOR_CLANG_BUILTIN // This defines the "Intrinsic::getIntrinsicForMSBuiltin()" method. #define GET_LLVM_INTRINSIC_FOR_MS_BUILTIN @@ -1463,19 +1484,37 @@ static bool matchIntrinsicType( PointerType *PT = dyn_cast(Ty); if (!PT || PT->getAddressSpace() != D.Pointer_AddressSpace) return true; - if (!PT->isOpaque()) + if (!PT->isOpaque()) { + /* Manually consume a pointer to empty struct descriptor, which is + * used for externref. We don't want to enforce that the struct is + * anonymous in this case. (This renders externref intrinsics + * non-unique, but this will go away with opaque pointers anyway.) */ + if (Infos.front().Kind == IITDescriptor::Struct && + Infos.front().Struct_NumElements == 0) { + Infos = Infos.slice(1); + return false; + } return matchIntrinsicType(PT->getNonOpaquePointerElementType(), Infos, ArgTys, DeferredChecks, IsDeferredCheck); + } // Consume IIT descriptors relating to the pointer element type. - while (Infos.front().Kind == IITDescriptor::Pointer) + // FIXME: Intrinsic type matching of nested single value types or even + // aggregates doesn't work properly with opaque pointers but hopefully + // doesn't happen in practice. + while (Infos.front().Kind == IITDescriptor::Pointer || + Infos.front().Kind == IITDescriptor::Vector) Infos = Infos.slice(1); + assert((Infos.front().Kind != IITDescriptor::Argument || + Infos.front().getArgumentKind() == IITDescriptor::AK_MatchType) && + "Unsupported polymorphic pointer type with opaque pointer"); Infos = Infos.slice(1); return false; } case IITDescriptor::Struct: { StructType *ST = dyn_cast(Ty); - if (!ST || ST->getNumElements() != D.Struct_NumElements) + if (!ST || !ST->isLiteral() || ST->isPacked() || + ST->getNumElements() != D.Struct_NumElements) return true; for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i) @@ -1587,6 +1626,30 @@ static bool matchIntrinsicType( return !ThisArgType->isOpaqueOrPointeeTypeMatches( ReferenceType->getElementType()); } + case IITDescriptor::AnyPtrToElt: { + unsigned RefArgNumber = D.getRefArgNumber(); + if (RefArgNumber >= ArgTys.size()) { + if (IsDeferredCheck) + return true; + // If forward referencing, already add the pointer type and + // defer the checks for later. + ArgTys.push_back(Ty); + return DeferCheck(Ty); + } + + if (!IsDeferredCheck) { + assert(D.getOverloadArgNumber() == ArgTys.size() && + "Table consistency error"); + ArgTys.push_back(Ty); + } + + auto *ReferenceType = dyn_cast(ArgTys[RefArgNumber]); + auto *ThisArgType = dyn_cast(Ty); + if (!ThisArgType || !ReferenceType) + return true; + return !ThisArgType->isOpaqueOrPointeeTypeMatches( + ReferenceType->getElementType()); + } case IITDescriptor::VecOfAnyPtrsToElt: { unsigned RefArgNumber = D.getRefArgNumber(); if (RefArgNumber >= ArgTys.size()) { @@ -1802,7 +1865,7 @@ bool Function::hasAddressTaken(const User **PutOffender, *PutOffender = FU; return true; } - if (!Call->isCallee(&U)) { + if (!Call->isCallee(&U) || Call->getFunctionType() != getFunctionType()) { if (IgnoreARCAttachedCall && Call->isOperandBundleOfType(LLVMContext::OB_clang_arc_attachedcall, U.getOperandNo())) @@ -1909,7 +1972,7 @@ void Function::setEntryCount(ProfileCount Count, const DenseSet *S) { #if !defined(NDEBUG) auto PrevCount = getEntryCount(); - assert(!PrevCount.hasValue() || PrevCount->getType() == Count.getType()); + assert(!PrevCount || PrevCount->getType() == Count.getType()); #endif auto ImportGUIDs = getImportGUIDs(); diff --git a/llvm/lib/IR/GVMaterializer.cpp b/llvm/lib/IR/GVMaterializer.cpp index 35397309a103..dc3b0e0fc236 100644 --- a/llvm/lib/IR/GVMaterializer.cpp +++ b/llvm/lib/IR/GVMaterializer.cpp @@ -14,4 +14,4 @@ #include "llvm/IR/GVMaterializer.h" using namespace llvm; -GVMaterializer::~GVMaterializer() {} +GVMaterializer::~GVMaterializer() = default; diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 47e8bc0a916d..3265050261c8 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -67,6 +67,10 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) { setDLLStorageClass(Src->getDLLStorageClass()); setDSOLocal(Src->isDSOLocal()); setPartition(Src->getPartition()); + if (Src->hasSanitizerMetadata()) + setSanitizerMetadata(Src->getSanitizerMetadata()); + else + removeSanitizerMetadata(); } void GlobalValue::removeFromParent() { @@ -217,6 +221,25 @@ void GlobalValue::setPartition(StringRef S) { HasPartition = !S.empty(); } +using SanitizerMetadata = GlobalValue::SanitizerMetadata; +const SanitizerMetadata &GlobalValue::getSanitizerMetadata() const { + assert(hasSanitizerMetadata()); + assert(getContext().pImpl->GlobalValueSanitizerMetadata.count(this)); + return getContext().pImpl->GlobalValueSanitizerMetadata[this]; +} + +void GlobalValue::setSanitizerMetadata(SanitizerMetadata Meta) { + getContext().pImpl->GlobalValueSanitizerMetadata[this] = Meta; + HasSanitizerMetadata = true; +} + +void GlobalValue::removeSanitizerMetadata() { + DenseMap &MetadataMap = + getContext().pImpl->GlobalValueSanitizerMetadata; + MetadataMap.erase(this); + HasSanitizerMetadata = false; +} + StringRef GlobalObject::getSectionImpl() const { assert(hasSection()); return getContext().pImpl->GlobalObjectSections[this]; @@ -262,7 +285,7 @@ bool GlobalObject::canIncreaseAlignment() const { // alignment specified. (If it is assigned a section, the global // could be densely packed with other objects in the section, and // increasing the alignment could cause padding issues.) - if (hasSection() && getAlign().hasValue()) + if (hasSection() && getAlign()) return false; // On ELF platforms, we're further restricted in that we can't diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 4e8f1b506811..d0c622fe2389 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/None.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -68,6 +69,21 @@ Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) { return CreateBitCast(Ptr, getInt8PtrTy(PT->getAddressSpace())); } +DebugLoc IRBuilderBase::getCurrentDebugLocation() const { + for (auto &KV : MetadataToCopy) + if (KV.first == LLVMContext::MD_dbg) + return {cast(KV.second)}; + + return {}; +} +void IRBuilderBase::SetInstDebugLocation(Instruction *I) const { + for (const auto &KV : MetadataToCopy) + if (KV.first == LLVMContext::MD_dbg) { + I->setDebugLoc(DebugLoc(KV.second)); + return; + } +} + static CallInst *createCallHelper(Function *Callee, ArrayRef Ops, IRBuilderBase *Builder, const Twine &Name = "", @@ -133,7 +149,36 @@ CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size, CallInst *CI = createCallHelper(TheFn, Ops, this); if (Align) - cast(CI)->setDestAlignment(Align->value()); + cast(CI)->setDestAlignment(*Align); + + // Set the TBAA info if present. + if (TBAATag) + CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + if (ScopeTag) + CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag); + + if (NoAliasTag) + CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + + return CI; +} + +CallInst *IRBuilderBase::CreateMemSetInline(Value *Dst, MaybeAlign DstAlign, + Value *Val, Value *Size, + bool IsVolatile, MDNode *TBAATag, + MDNode *ScopeTag, + MDNode *NoAliasTag) { + Dst = getCastedInt8PtrValue(Dst); + Value *Ops[] = {Dst, Val, Size, getInt1(IsVolatile)}; + Type *Tys[] = {Dst->getType(), Size->getType()}; + Module *M = BB->getParent()->getParent(); + Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset_inline, Tys); + + CallInst *CI = createCallHelper(TheFn, Ops, this); + + if (DstAlign) + cast(CI)->setDestAlignment(*DstAlign); // Set the TBAA info if present. if (TBAATag) @@ -672,34 +717,29 @@ getStatepointBundles(Optional> TransitionArgs, template static CallInst *CreateGCStatepointCallCommon( IRBuilderBase *Builder, uint64_t ID, uint32_t NumPatchBytes, - Value *ActualCallee, uint32_t Flags, ArrayRef CallArgs, - Optional> TransitionArgs, - Optional> DeoptArgs, ArrayRef GCArgs, - const Twine &Name) { - // Extract out the type of the callee. - auto *FuncPtrType = cast(ActualCallee->getType()); - assert(isa(FuncPtrType->getPointerElementType()) && - "actual callee must be a callable value"); - + FunctionCallee ActualCallee, uint32_t Flags, ArrayRef CallArgs, + Optional> TransitionArgs, Optional> DeoptArgs, + ArrayRef GCArgs, const Twine &Name) { Module *M = Builder->GetInsertBlock()->getParent()->getParent(); // Fill in the one generic type'd argument (the function is also vararg) - Type *ArgTypes[] = { FuncPtrType }; Function *FnStatepoint = - Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_statepoint, - ArgTypes); - - std::vector Args = - getStatepointArgs(*Builder, ID, NumPatchBytes, ActualCallee, Flags, - CallArgs); - - return Builder->CreateCall(FnStatepoint, Args, - getStatepointBundles(TransitionArgs, DeoptArgs, - GCArgs), - Name); + Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_statepoint, + {ActualCallee.getCallee()->getType()}); + + std::vector Args = getStatepointArgs( + *Builder, ID, NumPatchBytes, ActualCallee.getCallee(), Flags, CallArgs); + + CallInst *CI = Builder->CreateCall( + FnStatepoint, Args, + getStatepointBundles(TransitionArgs, DeoptArgs, GCArgs), Name); + CI->addParamAttr(2, + Attribute::get(Builder->getContext(), Attribute::ElementType, + ActualCallee.getFunctionType())); + return CI; } CallInst *IRBuilderBase::CreateGCStatepointCall( - uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee, + uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualCallee, ArrayRef CallArgs, Optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name) { return CreateGCStatepointCallCommon( @@ -708,17 +748,17 @@ CallInst *IRBuilderBase::CreateGCStatepointCall( } CallInst *IRBuilderBase::CreateGCStatepointCall( - uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee, uint32_t Flags, - ArrayRef CallArgs, Optional> TransitionArgs, - Optional> DeoptArgs, ArrayRef GCArgs, - const Twine &Name) { + uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualCallee, + uint32_t Flags, ArrayRef CallArgs, + Optional> TransitionArgs, Optional> DeoptArgs, + ArrayRef GCArgs, const Twine &Name) { return CreateGCStatepointCallCommon( this, ID, NumPatchBytes, ActualCallee, Flags, CallArgs, TransitionArgs, DeoptArgs, GCArgs, Name); } CallInst *IRBuilderBase::CreateGCStatepointCall( - uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee, + uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualCallee, ArrayRef CallArgs, Optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name) { return CreateGCStatepointCallCommon( @@ -729,32 +769,31 @@ CallInst *IRBuilderBase::CreateGCStatepointCall( template static InvokeInst *CreateGCStatepointInvokeCommon( IRBuilderBase *Builder, uint64_t ID, uint32_t NumPatchBytes, - Value *ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, - uint32_t Flags, ArrayRef InvokeArgs, + FunctionCallee ActualInvokee, BasicBlock *NormalDest, + BasicBlock *UnwindDest, uint32_t Flags, ArrayRef InvokeArgs, Optional> TransitionArgs, Optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name) { - // Extract out the type of the callee. - auto *FuncPtrType = cast(ActualInvokee->getType()); - assert(isa(FuncPtrType->getPointerElementType()) && - "actual callee must be a callable value"); - Module *M = Builder->GetInsertBlock()->getParent()->getParent(); // Fill in the one generic type'd argument (the function is also vararg) - Function *FnStatepoint = Intrinsic::getDeclaration( - M, Intrinsic::experimental_gc_statepoint, {FuncPtrType}); + Function *FnStatepoint = + Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_statepoint, + {ActualInvokee.getCallee()->getType()}); std::vector Args = - getStatepointArgs(*Builder, ID, NumPatchBytes, ActualInvokee, Flags, - InvokeArgs); + getStatepointArgs(*Builder, ID, NumPatchBytes, ActualInvokee.getCallee(), + Flags, InvokeArgs); - return Builder->CreateInvoke(FnStatepoint, NormalDest, UnwindDest, Args, - getStatepointBundles(TransitionArgs, DeoptArgs, - GCArgs), - Name); + InvokeInst *II = Builder->CreateInvoke( + FnStatepoint, NormalDest, UnwindDest, Args, + getStatepointBundles(TransitionArgs, DeoptArgs, GCArgs), Name); + II->addParamAttr(2, + Attribute::get(Builder->getContext(), Attribute::ElementType, + ActualInvokee.getFunctionType())); + return II; } InvokeInst *IRBuilderBase::CreateGCStatepointInvoke( - uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee, + uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef InvokeArgs, Optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name) { @@ -765,19 +804,21 @@ InvokeInst *IRBuilderBase::CreateGCStatepointInvoke( } InvokeInst *IRBuilderBase::CreateGCStatepointInvoke( - uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee, + uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags, ArrayRef InvokeArgs, Optional> TransitionArgs, - Optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name) { + Optional> DeoptArgs, ArrayRef GCArgs, + const Twine &Name) { return CreateGCStatepointInvokeCommon( this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest, Flags, InvokeArgs, TransitionArgs, DeoptArgs, GCArgs, Name); } InvokeInst *IRBuilderBase::CreateGCStatepointInvoke( - uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee, + uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef InvokeArgs, - Optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name) { + Optional> DeoptArgs, ArrayRef GCArgs, + const Twine &Name) { return CreateGCStatepointInvokeCommon( this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest, uint32_t(StatepointFlags::None), InvokeArgs, None, DeoptArgs, GCArgs, @@ -785,31 +826,26 @@ InvokeInst *IRBuilderBase::CreateGCStatepointInvoke( } CallInst *IRBuilderBase::CreateGCResult(Instruction *Statepoint, - Type *ResultType, - const Twine &Name) { - Intrinsic::ID ID = Intrinsic::experimental_gc_result; - Module *M = BB->getParent()->getParent(); - Type *Types[] = {ResultType}; - Function *FnGCResult = Intrinsic::getDeclaration(M, ID, Types); + Type *ResultType, const Twine &Name) { + Intrinsic::ID ID = Intrinsic::experimental_gc_result; + Module *M = BB->getParent()->getParent(); + Type *Types[] = {ResultType}; + Function *FnGCResult = Intrinsic::getDeclaration(M, ID, Types); - Value *Args[] = {Statepoint}; - return createCallHelper(FnGCResult, Args, this, Name); + Value *Args[] = {Statepoint}; + return createCallHelper(FnGCResult, Args, this, Name); } CallInst *IRBuilderBase::CreateGCRelocate(Instruction *Statepoint, - int BaseOffset, - int DerivedOffset, - Type *ResultType, - const Twine &Name) { - Module *M = BB->getParent()->getParent(); - Type *Types[] = {ResultType}; - Function *FnGCRelocate = - Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types); + int BaseOffset, int DerivedOffset, + Type *ResultType, const Twine &Name) { + Module *M = BB->getParent()->getParent(); + Type *Types[] = {ResultType}; + Function *FnGCRelocate = + Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types); - Value *Args[] = {Statepoint, - getInt32(BaseOffset), - getInt32(DerivedOffset)}; - return createCallHelper(FnGCRelocate, Args, this, Name); + Value *Args[] = {Statepoint, getInt32(BaseOffset), getInt32(DerivedOffset)}; + return createCallHelper(FnGCRelocate, Args, this, Name); } CallInst *IRBuilderBase::CreateGCGetPointerBase(Value *DerivedPtr, @@ -1262,8 +1298,8 @@ CallInst *IRBuilderBase::CreateAlignmentAssumption(const DataLayout &DL, return CreateAlignmentAssumptionHelper(DL, PtrValue, Alignment, OffsetValue); } -IRBuilderDefaultInserter::~IRBuilderDefaultInserter() {} -IRBuilderCallbackInserter::~IRBuilderCallbackInserter() {} -IRBuilderFolder::~IRBuilderFolder() {} +IRBuilderDefaultInserter::~IRBuilderDefaultInserter() = default; +IRBuilderCallbackInserter::~IRBuilderCallbackInserter() = default; +IRBuilderFolder::~IRBuilderFolder() = default; void ConstantFolder::anchor() {} void NoFolder::anchor() {} diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 36a20679863b..bf76c89f26ca 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -492,6 +492,9 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2, if (const ShuffleVectorInst *SVI = dyn_cast(I1)) return SVI->getShuffleMask() == cast(I2)->getShuffleMask(); + if (const GetElementPtrInst *GEP = dyn_cast(I1)) + return GEP->getSourceElementType() == + cast(I2)->getSourceElementType(); return true; } @@ -695,7 +698,7 @@ bool Instruction::mayHaveSideEffects() const { bool Instruction::isSafeToRemove() const { return (!isa(this) || !this->mayHaveSideEffects()) && - !this->isTerminator(); + !this->isTerminator() && !this->isEHPad(); } bool Instruction::willReturn() const { diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 7798af3b19b9..6a91edb75dd2 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -128,7 +128,7 @@ Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) { // If the PHI node is dead, because it has zero entries, nuke it now. if (getNumOperands() == 0 && DeletePHIIfEmpty) { // If anyone is using this PHI, make them use a dummy value instead... - replaceAllUsesWith(UndefValue::get(getType())); + replaceAllUsesWith(PoisonValue::get(getType())); eraseFromParent(); } return Removed; @@ -325,13 +325,13 @@ bool CallBase::isReturnNonNull() const { return false; } -Value *CallBase::getReturnedArgOperand() const { +Value *CallBase::getArgOperandWithAttribute(Attribute::AttrKind Kind) const { unsigned Index; - if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index)) + if (Attrs.hasAttrSomewhere(Kind, &Index)) return getArgOperand(Index - AttributeList::FirstArgIndex); if (const Function *F = getCalledFunction()) - if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index)) + if (F->getAttributes().hasAttrSomewhere(Kind, &Index)) return getArgOperand(Index - AttributeList::FirstArgIndex); return nullptr; @@ -372,6 +372,27 @@ bool CallBase::hasFnAttrOnCalledFunction(StringRef Kind) const { return false; } +template +Attribute CallBase::getFnAttrOnCalledFunction(AK Kind) const { + // Operand bundles override attributes on the called function, but don't + // override attributes directly present on the call instruction. + if (isFnAttrDisallowedByOpBundle(Kind)) + return Attribute(); + Value *V = getCalledOperand(); + if (auto *CE = dyn_cast(V)) + if (CE->getOpcode() == BitCast) + V = CE->getOperand(0); + + if (auto *F = dyn_cast(V)) + return F->getAttributes().getFnAttr(Kind); + + return Attribute(); +} + +template Attribute +CallBase::getFnAttrOnCalledFunction(Attribute::AttrKind Kind) const; +template Attribute CallBase::getFnAttrOnCalledFunction(StringRef Kind) const; + void CallBase::getOperandBundlesAsDefs( SmallVectorImpl &Defs) const { for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i) @@ -482,9 +503,10 @@ CallBase *CallBase::removeOperandBundle(CallBase *CB, uint32_t ID, bool CallBase::hasReadingOperandBundles() const { // Implementation note: this is a conservative implementation of operand - // bundle semantics, where *any* non-assume operand bundle forces a callsite - // to be at least readonly. - return hasOperandBundles() && getIntrinsicID() != Intrinsic::assume; + // bundle semantics, where *any* non-assume operand bundle (other than + // ptrauth) forces a callsite to be at least readonly. + return hasOperandBundlesOtherThan(LLVMContext::OB_ptrauth) && + getIntrinsicID() != Intrinsic::assume; } //===----------------------------------------------------------------------===// @@ -2194,7 +2216,13 @@ bool ShuffleVectorInst::isIdentityMask(ArrayRef Mask) { bool ShuffleVectorInst::isReverseMask(ArrayRef Mask) { if (!isSingleSourceMask(Mask)) return false; - for (int i = 0, NumElts = Mask.size(); i < NumElts; ++i) { + + // The number of elements in the mask must be at least 2. + int NumElts = Mask.size(); + if (NumElts < 2) + return false; + + for (int i = 0; i < NumElts; ++i) { if (Mask[i] == -1) continue; if (Mask[i] != (NumElts - 1 - i) && Mask[i] != (NumElts + NumElts - 1 - i)) @@ -3060,16 +3088,18 @@ unsigned CastInst::isEliminableCastPair( return 0; } case 8: { - // ext, trunc -> bitcast, if the SrcTy and DstTy are same size + // ext, trunc -> bitcast, if the SrcTy and DstTy are the same // ext, trunc -> ext, if sizeof(SrcTy) < sizeof(DstTy) // ext, trunc -> trunc, if sizeof(SrcTy) > sizeof(DstTy) unsigned SrcSize = SrcTy->getScalarSizeInBits(); unsigned DstSize = DstTy->getScalarSizeInBits(); - if (SrcSize == DstSize) + if (SrcTy == DstTy) return Instruction::BitCast; - else if (SrcSize < DstSize) + if (SrcSize < DstSize) return firstOp; - return secondOp; + if (SrcSize > DstSize) + return secondOp; + return 0; } case 9: // zext, sext -> zext, because sext can't sign extend after zext @@ -4447,7 +4477,7 @@ void SwitchInstProfUpdateWrapper::addCase( Weights.getValue()[SI.getNumSuccessors() - 1] = *W; } else if (Weights) { Changed = true; - Weights.getValue().push_back(W.getValueOr(0)); + Weights.getValue().push_back(W.value_or(0)); } if (Weights) assert(SI.getNumSuccessors() == Weights->size() && @@ -4467,7 +4497,7 @@ SwitchInstProfUpdateWrapper::CaseWeightOpt SwitchInstProfUpdateWrapper::getSuccessorWeight(unsigned idx) { if (!Weights) return None; - return Weights.getValue()[idx]; + return (*Weights)[idx]; } void SwitchInstProfUpdateWrapper::setSuccessorWeight( @@ -4479,7 +4509,7 @@ void SwitchInstProfUpdateWrapper::setSuccessorWeight( Weights = SmallVector(SI.getNumSuccessors(), 0); if (Weights) { - auto &OldW = Weights.getValue()[idx]; + auto &OldW = (*Weights)[idx]; if (*W != OldW) { Changed = true; OldW = *W; diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index e27758c5de02..b132a9dcb812 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -236,8 +236,8 @@ bool ConstrainedFPIntrinsic::isDefaultFPEnvironment() const { return true; } -FCmpInst::Predicate ConstrainedFPCmpIntrinsic::getPredicate() const { - Metadata *MD = cast(getArgOperand(2))->getMetadata(); +static FCmpInst::Predicate getFPPredicateFromMD(const Value *Op) { + Metadata *MD = cast(Op)->getMetadata(); if (!MD || !isa(MD)) return FCmpInst::BAD_FCMP_PREDICATE; return StringSwitch(cast(MD)->getString()) @@ -258,6 +258,10 @@ FCmpInst::Predicate ConstrainedFPCmpIntrinsic::getPredicate() const { .Default(FCmpInst::BAD_FCMP_PREDICATE); } +FCmpInst::Predicate ConstrainedFPCmpIntrinsic::getPredicate() const { + return getFPPredicateFromMD(getArgOperand(2)); +} + bool ConstrainedFPIntrinsic::isUnaryOp() const { switch (getIntrinsicID()) { default: @@ -299,13 +303,18 @@ ElementCount VPIntrinsic::getStaticVectorLength() const { }; Value *VPMask = getMaskParam(); - assert(VPMask && "No mask param?"); + if (!VPMask) { + assert((getIntrinsicID() == Intrinsic::vp_merge || + getIntrinsicID() == Intrinsic::vp_select) && + "Unexpected VP intrinsic without mask operand"); + return GetVectorLengthOfType(getType()); + } return GetVectorLengthOfType(VPMask->getType()); } Value *VPIntrinsic::getMaskParam() const { if (auto MaskPos = getMaskParamPos(getIntrinsicID())) - return getArgOperand(MaskPos.getValue()); + return getArgOperand(*MaskPos); return nullptr; } @@ -316,7 +325,7 @@ void VPIntrinsic::setMaskParam(Value *NewMask) { Value *VPIntrinsic::getVectorLengthParam() const { if (auto EVLPos = getVectorLengthParamPos(getIntrinsicID())) - return getArgOperand(EVLPos.getValue()); + return getArgOperand(*EVLPos); return nullptr; } @@ -354,7 +363,7 @@ VPIntrinsic::getVectorLengthParamPos(Intrinsic::ID IntrinsicID) { /// scatter. MaybeAlign VPIntrinsic::getPointerAlignment() const { Optional PtrParamOpt = getMemoryPointerParamPos(getIntrinsicID()); - assert(PtrParamOpt.hasValue() && "no pointer argument!"); + assert(PtrParamOpt && "no pointer argument!"); return getParamAlign(PtrParamOpt.getValue()); } @@ -380,7 +389,7 @@ Optional VPIntrinsic::getMemoryPointerParamPos(Intrinsic::ID VPID) { /// \return The data (payload) operand of this store or scatter. Value *VPIntrinsic::getMemoryDataParam() const { auto DataParamOpt = getMemoryDataParamPos(getIntrinsicID()); - if (!DataParamOpt.hasValue()) + if (!DataParamOpt) return nullptr; return getArgOperand(DataParamOpt.getValue()); } @@ -492,6 +501,20 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID, VPFunc = Intrinsic::getDeclaration(M, VPID, OverloadTy); break; } + case Intrinsic::vp_trunc: + case Intrinsic::vp_sext: + case Intrinsic::vp_zext: + case Intrinsic::vp_fptoui: + case Intrinsic::vp_fptosi: + case Intrinsic::vp_uitofp: + case Intrinsic::vp_sitofp: + case Intrinsic::vp_fptrunc: + case Intrinsic::vp_fpext: + case Intrinsic::vp_ptrtoint: + case Intrinsic::vp_inttoptr: + VPFunc = + Intrinsic::getDeclaration(M, VPID, {ReturnType, Params[0]->getType()}); + break; case Intrinsic::vp_merge: case Intrinsic::vp_select: VPFunc = Intrinsic::getDeclaration(M, VPID, {Params[1]->getType()}); @@ -500,6 +523,10 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID, VPFunc = Intrinsic::getDeclaration( M, VPID, {ReturnType, Params[0]->getType()}); break; + case Intrinsic::experimental_vp_strided_load: + VPFunc = Intrinsic::getDeclaration( + M, VPID, {ReturnType, Params[0]->getType(), Params[1]->getType()}); + break; case Intrinsic::vp_gather: VPFunc = Intrinsic::getDeclaration( M, VPID, {ReturnType, Params[0]->getType()}); @@ -508,6 +535,11 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID, VPFunc = Intrinsic::getDeclaration( M, VPID, {Params[0]->getType(), Params[1]->getType()}); break; + case Intrinsic::experimental_vp_strided_store: + VPFunc = Intrinsic::getDeclaration( + M, VPID, + {Params[0]->getType(), Params[1]->getType(), Params[2]->getType()}); + break; case Intrinsic::vp_scatter: VPFunc = Intrinsic::getDeclaration( M, VPID, {Params[0]->getType(), Params[1]->getType()}); @@ -529,6 +561,67 @@ bool VPReductionIntrinsic::isVPReduction(Intrinsic::ID ID) { return false; } +bool VPCastIntrinsic::isVPCast(Intrinsic::ID ID) { + switch (ID) { + default: + break; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#define VP_PROPERTY_CASTOP return true; +#define END_REGISTER_VP_INTRINSIC(VPID) break; +#include "llvm/IR/VPIntrinsics.def" + } + return false; +} + +bool VPCmpIntrinsic::isVPCmp(Intrinsic::ID ID) { + switch (ID) { + default: + break; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#define VP_PROPERTY_CMP(CCPOS, ...) return true; +#define END_REGISTER_VP_INTRINSIC(VPID) break; +#include "llvm/IR/VPIntrinsics.def" + } + return false; +} + +static ICmpInst::Predicate getIntPredicateFromMD(const Value *Op) { + Metadata *MD = cast(Op)->getMetadata(); + if (!MD || !isa(MD)) + return ICmpInst::BAD_ICMP_PREDICATE; + return StringSwitch(cast(MD)->getString()) + .Case("eq", ICmpInst::ICMP_EQ) + .Case("ne", ICmpInst::ICMP_NE) + .Case("ugt", ICmpInst::ICMP_UGT) + .Case("uge", ICmpInst::ICMP_UGE) + .Case("ult", ICmpInst::ICMP_ULT) + .Case("ule", ICmpInst::ICMP_ULE) + .Case("sgt", ICmpInst::ICMP_SGT) + .Case("sge", ICmpInst::ICMP_SGE) + .Case("slt", ICmpInst::ICMP_SLT) + .Case("sle", ICmpInst::ICMP_SLE) + .Default(ICmpInst::BAD_ICMP_PREDICATE); +} + +CmpInst::Predicate VPCmpIntrinsic::getPredicate() const { + bool IsFP = true; + Optional CCArgIdx; + switch (getIntrinsicID()) { + default: + break; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#define VP_PROPERTY_CMP(CCPOS, ISFP) \ + CCArgIdx = CCPOS; \ + IsFP = ISFP; \ + break; +#define END_REGISTER_VP_INTRINSIC(VPID) break; +#include "llvm/IR/VPIntrinsics.def" + } + assert(CCArgIdx && "Unexpected vector-predicated comparison"); + return IsFP ? getFPPredicateFromMD(getArgOperand(*CCArgIdx)) + : getIntPredicateFromMD(getArgOperand(*CCArgIdx)); +} + unsigned VPReductionIntrinsic::getVectorParamPos() const { return *VPReductionIntrinsic::getVectorParamPos(getIntrinsicID()); } diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp index e19ead98a616..4a1d5d3dcdf6 100644 --- a/llvm/lib/IR/LLVMContext.cpp +++ b/llvm/lib/IR/LLVMContext.cpp @@ -82,6 +82,11 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { "clang.arc.attachedcall operand bundle id drifted!"); (void)ClangAttachedCall; + auto *PtrauthEntry = pImpl->getOrInsertBundleTag("ptrauth"); + assert(PtrauthEntry->second == LLVMContext::OB_ptrauth && + "ptrauth operand bundle id drifted!"); + (void)PtrauthEntry; + SyncScope::ID SingleThreadSSID = pImpl->getOrInsertSyncScopeID("singlethread"); assert(SingleThreadSSID == SyncScope::SingleThread && @@ -133,13 +138,25 @@ bool LLVMContext::getDiagnosticsHotnessRequested() const { void LLVMContext::setDiagnosticsHotnessThreshold(Optional Threshold) { pImpl->DiagnosticsHotnessThreshold = Threshold; } - +void LLVMContext::setMisExpectWarningRequested(bool Requested) { + pImpl->MisExpectWarningRequested = Requested; +} +bool LLVMContext::getMisExpectWarningRequested() const { + return pImpl->MisExpectWarningRequested; +} uint64_t LLVMContext::getDiagnosticsHotnessThreshold() const { - return pImpl->DiagnosticsHotnessThreshold.getValueOr(UINT64_MAX); + return pImpl->DiagnosticsHotnessThreshold.value_or(UINT64_MAX); +} +void LLVMContext::setDiagnosticsMisExpectTolerance( + Optional Tolerance) { + pImpl->DiagnosticsMisExpectTolerance = Tolerance; +} +uint64_t LLVMContext::getDiagnosticsMisExpectTolerance() const { + return pImpl->DiagnosticsMisExpectTolerance.value_or(0); } bool LLVMContext::isDiagnosticsHotnessThresholdSetFromPSI() const { - return !pImpl->DiagnosticsHotnessThreshold.hasValue(); + return !pImpl->DiagnosticsHotnessThreshold.has_value(); } remarks::RemarkStreamer *LLVMContext::getMainRemarkStreamer() { @@ -346,12 +363,18 @@ std::unique_ptr LLVMContext::getDiagnosticHandler() { return std::move(pImpl->DiagHandler); } -void LLVMContext::enableOpaquePointers() const { - assert(pImpl->PointerTypes.empty() && pImpl->ASPointerTypes.empty() && - "Must be called before creating any pointer types"); - pImpl->setOpaquePointers(true); +bool LLVMContext::hasSetOpaquePointersValue() const { + return pImpl->hasOpaquePointersValue(); +} + +void LLVMContext::setOpaquePointers(bool Enable) const { + pImpl->setOpaquePointers(Enable); } bool LLVMContext::supportsTypedPointers() const { return !pImpl->getOpaquePointers(); } + +Any &LLVMContext::getTargetData() const { + return pImpl->TargetDataStorage; +} diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp index 8f9530290459..06b3a3afef9d 100644 --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -36,7 +36,7 @@ using namespace llvm; static cl::opt OpaquePointersCL("opaque-pointers", cl::desc("Use opaque pointers"), - cl::init(false)); + cl::init(true)); LLVMContextImpl::LLVMContextImpl(LLVMContext &C) : DiagHandler(std::make_unique()), @@ -47,7 +47,11 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C) X86_FP80Ty(C, Type::X86_FP80TyID), FP128Ty(C, Type::FP128TyID), PPC_FP128Ty(C, Type::PPC_FP128TyID), X86_MMXTy(C, Type::X86_MMXTyID), X86_AMXTy(C, Type::X86_AMXTyID), Int1Ty(C, 1), Int8Ty(C, 8), - Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128) {} + Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128) { + if (OpaquePointersCL.getNumOccurrences()) { + OpaquePointers = OpaquePointersCL; + } +} LLVMContextImpl::~LLVMContextImpl() { // NOTE: We need to delete the contents of OwnedModules, but Module's dtor @@ -245,10 +249,18 @@ void LLVMContextImpl::setOptPassGate(OptPassGate& OPG) { this->OPG = &OPG; } +bool LLVMContextImpl::hasOpaquePointersValue() { + return OpaquePointers.has_value(); +} + bool LLVMContextImpl::getOpaquePointers() { - if (LLVM_UNLIKELY(!(OpaquePointers.hasValue()))) + if (LLVM_UNLIKELY(!OpaquePointers)) OpaquePointers = OpaquePointersCL; return *OpaquePointers; } -void LLVMContextImpl::setOpaquePointers(bool OP) { OpaquePointers = OP; } +void LLVMContextImpl::setOpaquePointers(bool OP) { + assert((!OpaquePointers || OpaquePointers.getValue() == OP) && + "Cannot change opaque pointers mode once set"); + OpaquePointers = OP; +} diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index 70242f4d8f20..47add940f603 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -17,6 +17,7 @@ #include "ConstantsContext.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/Any.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" @@ -686,7 +687,7 @@ template <> struct MDNodeKeyImpl { unsigned getHashValue() const { return hash_combine(Filename, Directory, Checksum ? Checksum->Kind : 0, Checksum ? Checksum->Value : nullptr, - Source.getValueOr(nullptr)); + Source.value_or(nullptr)); } }; @@ -709,6 +710,7 @@ template <> struct MDNodeKeyImpl { Metadata *RetainedNodes; Metadata *ThrownTypes; Metadata *Annotations; + MDString *TargetFuncName; MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type, @@ -716,14 +718,15 @@ template <> struct MDNodeKeyImpl { unsigned VirtualIndex, int ThisAdjustment, unsigned Flags, unsigned SPFlags, Metadata *Unit, Metadata *TemplateParams, Metadata *Declaration, Metadata *RetainedNodes, - Metadata *ThrownTypes, Metadata *Annotations) + Metadata *ThrownTypes, Metadata *Annotations, + MDString *TargetFuncName) : Scope(Scope), Name(Name), LinkageName(LinkageName), File(File), Line(Line), Type(Type), ScopeLine(ScopeLine), ContainingType(ContainingType), VirtualIndex(VirtualIndex), ThisAdjustment(ThisAdjustment), Flags(Flags), SPFlags(SPFlags), Unit(Unit), TemplateParams(TemplateParams), Declaration(Declaration), RetainedNodes(RetainedNodes), ThrownTypes(ThrownTypes), - Annotations(Annotations) {} + Annotations(Annotations), TargetFuncName(TargetFuncName) {} MDNodeKeyImpl(const DISubprogram *N) : Scope(N->getRawScope()), Name(N->getRawName()), LinkageName(N->getRawLinkageName()), File(N->getRawFile()), @@ -736,7 +739,8 @@ template <> struct MDNodeKeyImpl { Declaration(N->getRawDeclaration()), RetainedNodes(N->getRawRetainedNodes()), ThrownTypes(N->getRawThrownTypes()), - Annotations(N->getRawAnnotations()) {} + Annotations(N->getRawAnnotations()), + TargetFuncName(N->getRawTargetFuncName()) {} bool isKeyOf(const DISubprogram *RHS) const { return Scope == RHS->getRawScope() && Name == RHS->getRawName() && @@ -752,7 +756,8 @@ template <> struct MDNodeKeyImpl { Declaration == RHS->getRawDeclaration() && RetainedNodes == RHS->getRawRetainedNodes() && ThrownTypes == RHS->getRawThrownTypes() && - Annotations == RHS->getRawAnnotations(); + Annotations == RHS->getRawAnnotations() && + TargetFuncName == RHS->getRawTargetFuncName(); } bool isDefinition() const { return SPFlags & DISubprogram::SPFlagDefinition; } @@ -1380,12 +1385,19 @@ public: /// If threshold option is not specified, it is disabled (0) by default. Optional DiagnosticsHotnessThreshold = 0; + /// The percentage of difference between profiling branch weights and + // llvm.expect branch weights to tolerate when emiting MisExpect diagnostics + Optional DiagnosticsMisExpectTolerance = 0; + bool MisExpectWarningRequested = false; + /// The specialized remark streamer used by LLVM's OptimizationRemarkEmitter. std::unique_ptr LLVMRS; LLVMContext::YieldCallbackTy YieldCallback = nullptr; void *YieldOpaqueHandle = nullptr; + DenseMap ValueNames; + using IntMapTy = DenseMap, DenseMapAPIntKeyInfo>; IntMapTy IntConstants; @@ -1402,8 +1414,6 @@ public: DenseMap ValuesAsMetadata; DenseMap MetadataAsValues; - DenseMap ValueNames; - #define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \ DenseSet CLASS##s; #include "llvm/IR/Metadata.def" @@ -1450,14 +1460,14 @@ public: ConstantInt *TheTrueVal = nullptr; ConstantInt *TheFalseVal = nullptr; - std::unique_ptr TheNoneToken; - // Basic type instances. Type VoidTy, LabelTy, HalfTy, BFloatTy, FloatTy, DoubleTy, MetadataTy, TokenTy; Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy, X86_AMXTy; IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty; + std::unique_ptr TheNoneToken; + BumpPtrAllocator Alloc; UniqueStringSaver Saver{Alloc}; @@ -1493,6 +1503,9 @@ public: /// Collection of per-GlobalValue partitions used in this context. DenseMap GlobalValuePartitions; + DenseMap + GlobalValueSanitizerMetadata; + /// DiscriminatorTable - This table maps file:line locations to an /// integer representing the next DWARF path discriminator to assign to /// instructions in different blocks at the same location. @@ -1555,8 +1568,11 @@ public: // TODO: clean up the following after we no longer support non-opaque pointer // types. bool getOpaquePointers(); + bool hasOpaquePointersValue(); void setOpaquePointers(bool OP); + llvm::Any TargetDataStorage; + private: Optional OpaquePointers; }; diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index 08cf909a83f9..ef3465177647 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -29,10 +29,6 @@ #include "llvm/Support/raw_ostream.h" #include -#ifdef EXPENSIVE_CHECKS -#include "llvm/IR/StructuralHash.h" -#endif - using namespace llvm; // See PassManagers.h for Pass Manager infrastructure overview. @@ -1429,12 +1425,12 @@ bool FPPassManager::runOnFunction(Function &F) { PassManagerPrettyStackEntry X(FP, F); TimeRegion PassTimer(getPassTimer(FP)); #ifdef EXPENSIVE_CHECKS - uint64_t RefHash = StructuralHash(F); + uint64_t RefHash = FP->structuralHash(F); #endif LocalChanged |= FP->runOnFunction(F); #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) - if (!LocalChanged && (RefHash != StructuralHash(F))) { + if (!LocalChanged && (RefHash != FP->structuralHash(F))) { llvm::errs() << "Pass modifies its input and doesn't report it: " << FP->getPassName() << "\n"; llvm_unreachable("Pass modifies its input and doesn't report it"); @@ -1543,13 +1539,13 @@ MPPassManager::runOnModule(Module &M) { TimeRegion PassTimer(getPassTimer(MP)); #ifdef EXPENSIVE_CHECKS - uint64_t RefHash = StructuralHash(M); + uint64_t RefHash = MP->structuralHash(M); #endif LocalChanged |= MP->runOnModule(M); #ifdef EXPENSIVE_CHECKS - assert((LocalChanged || (RefHash == StructuralHash(M))) && + assert((LocalChanged || (RefHash == MP->structuralHash(M))) && "Pass modifies its input and doesn't report it."); #endif @@ -1767,4 +1763,4 @@ void FunctionPass::assignPassManager(PMStack &PMS, PM->add(this); } -legacy::PassManagerBase::~PassManagerBase() {} +legacy::PassManagerBase::~PassManagerBase() = default; diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp index 35af8490287b..fc59fda9fe22 100644 --- a/llvm/lib/IR/MDBuilder.cpp +++ b/llvm/lib/IR/MDBuilder.cpp @@ -150,6 +150,14 @@ MDNode *MDBuilder::mergeCallbackEncodings(MDNode *ExistingCallbacks, return MDNode::get(Context, Ops); } +MDNode *MDBuilder::createRTTIPointerPrologue(Constant *PrologueSig, + Constant *RTTI) { + SmallVector Ops; + Ops.push_back(createConstant(PrologueSig)); + Ops.push_back(createConstant(RTTI)); + return MDNode::get(Context, Ops); +} + MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) { SmallVector Args(1, nullptr); if (Extra) diff --git a/llvm/lib/IR/Mangler.cpp b/llvm/lib/IR/Mangler.cpp index 2399ea27ee9d..b8e3e40e4c1d 100644 --- a/llvm/lib/IR/Mangler.cpp +++ b/llvm/lib/IR/Mangler.cpp @@ -144,7 +144,7 @@ void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, // Mangle functions with Microsoft calling conventions specially. Only do // this mangling for x86_64 vectorcall and 32-bit x86. - const Function *MSFunc = dyn_cast(GV); + const Function *MSFunc = dyn_cast_or_null(GV->getAliaseeObject()); // Don't add byte count suffixes when '\01' or '?' are in the first // character. diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp index 226718ecac28..ae2401026ebf 100644 --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -245,6 +245,36 @@ void ReplaceableMetadataImpl::moveRef(void *Ref, void *New, "Reference without owner must be direct"); } +void ReplaceableMetadataImpl::SalvageDebugInfo(const Constant &C) { + if (!C.isUsedByMetadata()) { + return; + } + + LLVMContext &Context = C.getType()->getContext(); + auto &Store = Context.pImpl->ValuesAsMetadata; + auto I = Store.find(&C); + ValueAsMetadata *MD = I->second; + using UseTy = + std::pair>; + // Copy out uses and update value of Constant used by debug info metadata with undef below + SmallVector Uses(MD->UseMap.begin(), MD->UseMap.end()); + + for (const auto &Pair : Uses) { + MetadataTracking::OwnerTy Owner = Pair.second.first; + if (!Owner) + continue; + if (!Owner.is()) + continue; + auto *OwnerMD = dyn_cast(Owner.get()); + if (!OwnerMD) + continue; + if (isa(OwnerMD)) { + OwnerMD->handleChangedOperand( + Pair.first, ValueAsMetadata::get(UndefValue::get(C.getType()))); + } + } +} + void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) { if (UseMap.empty()) return; @@ -252,9 +282,7 @@ void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) { // Copy out uses since UseMap will get touched below. using UseTy = std::pair>; SmallVector Uses(UseMap.begin(), UseMap.end()); - llvm::sort(Uses, [](const UseTy &L, const UseTy &R) { - return L.second.second < R.second.second; - }); + llvm::sort(Uses, llvm::less_second()); for (const auto &Pair : Uses) { // Check that this Ref hasn't disappeared after RAUW (when updating a // previous Ref). @@ -493,35 +521,26 @@ StringRef MDString::getString() const { "Alignment is insufficient after objects prepended to " #CLASS); #include "llvm/IR/Metadata.def" -void *MDNode::operator new(size_t Size, unsigned NumOps) { - size_t OpSize = NumOps * sizeof(MDOperand); +void *MDNode::operator new(size_t Size, size_t NumOps, StorageType Storage) { // uint64_t is the most aligned type we need support (ensured by static_assert // above) - OpSize = alignTo(OpSize, alignof(uint64_t)); - void *Ptr = reinterpret_cast(::operator new(OpSize + Size)) + OpSize; - MDOperand *O = static_cast(Ptr); - for (MDOperand *E = O - NumOps; O != E; --O) - (void)new (O - 1) MDOperand; - return Ptr; + size_t AllocSize = + alignTo(Header::getAllocSize(Storage, NumOps), alignof(uint64_t)); + char *Mem = reinterpret_cast(::operator new(AllocSize + Size)); + Header *H = new (Mem + AllocSize - sizeof(Header)) Header(NumOps, Storage); + return reinterpret_cast(H + 1); } -// Repress memory sanitization, due to use-after-destroy by operator -// delete. Bug report 24578 identifies this issue. -LLVM_NO_SANITIZE_MEMORY_ATTRIBUTE void MDNode::operator delete(void *Mem) { - MDNode *N = static_cast(Mem); - size_t OpSize = N->NumOperands * sizeof(MDOperand); - OpSize = alignTo(OpSize, alignof(uint64_t)); - - MDOperand *O = static_cast(Mem); - for (MDOperand *E = O - N->NumOperands; O != E; --O) - (O - 1)->~MDOperand(); - ::operator delete(reinterpret_cast(Mem) - OpSize); +void MDNode::operator delete(void *N) { + Header *H = reinterpret_cast
(N) - 1; + void *Mem = H->getAllocation(); + H->~Header(); + ::operator delete(Mem); } MDNode::MDNode(LLVMContext &Context, unsigned ID, StorageType Storage, ArrayRef Ops1, ArrayRef Ops2) - : Metadata(ID, Storage), NumOperands(Ops1.size() + Ops2.size()), - NumUnresolved(0), Context(Context) { + : Metadata(ID, Storage), Context(Context) { unsigned Op = 0; for (Metadata *MD : Ops1) setOperand(Op++, MD); @@ -547,6 +566,87 @@ TempMDNode MDNode::clone() const { } } +MDNode::Header::Header(size_t NumOps, StorageType Storage) { + IsLarge = isLarge(NumOps); + IsResizable = isResizable(Storage); + SmallSize = getSmallSize(NumOps, IsResizable, IsLarge); + if (IsLarge) { + SmallNumOps = 0; + new (getLargePtr()) LargeStorageVector(); + getLarge().resize(NumOps); + return; + } + SmallNumOps = NumOps; + MDOperand *O = reinterpret_cast(this) - SmallSize; + for (MDOperand *E = O + SmallSize; O != E;) + (void)new (O++) MDOperand(); +} + +MDNode::Header::~Header() { + if (IsLarge) { + getLarge().~LargeStorageVector(); + return; + } + MDOperand *O = reinterpret_cast(this); + for (MDOperand *E = O - SmallSize; O != E; --O) + (void)(O - 1)->~MDOperand(); +} + +void *MDNode::Header::getLargePtr() const { + static_assert(alignof(LargeStorageVector) <= alignof(Header), + "LargeStorageVector too strongly aligned"); + return reinterpret_cast(const_cast
(this)) - + sizeof(LargeStorageVector); +} + +void *MDNode::Header::getSmallPtr() { + static_assert(alignof(MDOperand) <= alignof(Header), + "MDOperand too strongly aligned"); + return reinterpret_cast(const_cast
(this)) - + sizeof(MDOperand) * SmallSize; +} + +void MDNode::Header::resize(size_t NumOps) { + assert(IsResizable && "Node is not resizable"); + if (operands().size() == NumOps) + return; + + if (IsLarge) + getLarge().resize(NumOps); + else if (NumOps <= SmallSize) + resizeSmall(NumOps); + else + resizeSmallToLarge(NumOps); +} + +void MDNode::Header::resizeSmall(size_t NumOps) { + assert(!IsLarge && "Expected a small MDNode"); + assert(NumOps <= SmallSize && "NumOps too large for small resize"); + + MutableArrayRef ExistingOps = operands(); + assert(NumOps != ExistingOps.size() && "Expected a different size"); + + int NumNew = (int)NumOps - (int)ExistingOps.size(); + MDOperand *O = ExistingOps.end(); + for (int I = 0, E = NumNew; I < E; ++I) + (O++)->reset(); + for (int I = 0, E = NumNew; I > E; --I) + (--O)->reset(); + SmallNumOps = NumOps; + assert(O == operands().end() && "Operands not (un)initialized until the end"); +} + +void MDNode::Header::resizeSmallToLarge(size_t NumOps) { + assert(!IsLarge && "Expected a small MDNode"); + assert(NumOps > SmallSize && "Expected NumOps to be larger than allocation"); + LargeStorageVector NewOps; + NewOps.resize(NumOps); + llvm::move(operands(), NewOps.begin()); + resizeSmall(0); + new (getLargePtr()) LargeStorageVector(std::move(NewOps)); + IsLarge = true; +} + static bool isOperandUnresolved(Metadata *Op) { if (auto *N = dyn_cast_or_null(Op)) return !N->isResolved(); @@ -554,9 +654,9 @@ static bool isOperandUnresolved(Metadata *Op) { } void MDNode::countUnresolvedOperands() { - assert(NumUnresolved == 0 && "Expected unresolved ops to be uncounted"); + assert(getNumUnresolved() == 0 && "Expected unresolved ops to be uncounted"); assert(isUniqued() && "Expected this to be uniqued"); - NumUnresolved = count_if(operands(), isOperandUnresolved); + setNumUnresolved(count_if(operands(), isOperandUnresolved)); } void MDNode::makeUniqued() { @@ -570,7 +670,7 @@ void MDNode::makeUniqued() { // Make this 'uniqued'. Storage = Uniqued; countUnresolvedOperands(); - if (!NumUnresolved) { + if (!getNumUnresolved()) { dropReplaceableUses(); assert(isResolved() && "Expected this to be resolved"); } @@ -594,14 +694,14 @@ void MDNode::resolve() { assert(isUniqued() && "Expected this to be uniqued"); assert(!isResolved() && "Expected this to be unresolved"); - NumUnresolved = 0; + setNumUnresolved(0); dropReplaceableUses(); assert(isResolved() && "Expected this to be resolved"); } void MDNode::dropReplaceableUses() { - assert(!NumUnresolved && "Unexpected unresolved operand"); + assert(!getNumUnresolved() && "Unexpected unresolved operand"); // Drop any RAUW support. if (Context.hasReplaceableUses()) @@ -610,13 +710,13 @@ void MDNode::dropReplaceableUses() { void MDNode::resolveAfterOperandChange(Metadata *Old, Metadata *New) { assert(isUniqued() && "Expected this to be uniqued"); - assert(NumUnresolved != 0 && "Expected unresolved operands"); + assert(getNumUnresolved() != 0 && "Expected unresolved operands"); // Check if an operand was resolved. if (!isOperandUnresolved(Old)) { if (isOperandUnresolved(New)) // An operand was un-resolved! - ++NumUnresolved; + setNumUnresolved(getNumUnresolved() + 1); } else if (!isOperandUnresolved(New)) decrementUnresolvedOperandCount(); } @@ -627,7 +727,8 @@ void MDNode::decrementUnresolvedOperandCount() { return; assert(isUniqued() && "Expected this to be uniqued"); - if (--NumUnresolved) + setNumUnresolved(getNumUnresolved() - 1); + if (getNumUnresolved()) return; // Last unresolved operand has just been resolved. @@ -702,7 +803,7 @@ void MDTuple::recalculateHash() { } void MDNode::dropAllReferences() { - for (unsigned I = 0, E = NumOperands; I != E; ++I) + for (unsigned I = 0, E = getNumOperands(); I != E; ++I) setOperand(I, nullptr); if (Context.hasReplaceableUses()) { Context.getReplaceableUses()->resolveAllUses(/* ResolveUsers */ false); @@ -838,7 +939,8 @@ MDTuple *MDTuple::getImpl(LLVMContext &Context, ArrayRef MDs, assert(ShouldCreate && "Expected non-uniqued nodes to always be created"); } - return storeImpl(new (MDs.size()) MDTuple(Context, Storage, Hash, MDs), + return storeImpl(new (MDs.size(), Storage) + MDTuple(Context, Storage, Hash, MDs), Storage, Context.pImpl->MDTuples); } @@ -850,7 +952,7 @@ void MDNode::deleteTemporary(MDNode *N) { void MDNode::storeDistinctInContext() { assert(!Context.hasReplaceableUses() && "Unexpected replaceable uses"); - assert(!NumUnresolved && "Unexpected unresolved nodes"); + assert(!getNumUnresolved() && "Unexpected unresolved nodes"); Storage = Distinct; assert(isResolved() && "Expected this to be resolved"); @@ -883,7 +985,7 @@ void MDNode::replaceOperandWith(unsigned I, Metadata *New) { } void MDNode::setOperand(unsigned I, Metadata *New) { - assert(I < NumOperands); + assert(I < getNumOperands()); mutable_begin()[I].reset(New, isUniqued() ? this : nullptr); } diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index 4974b372db2a..5cd74d53da75 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -71,8 +71,7 @@ template class llvm::SymbolTableListTraits; Module::Module(StringRef MID, LLVMContext &C) : Context(C), ValSymTab(std::make_unique(-1)), - Materializer(), ModuleID(std::string(MID)), - SourceFileName(std::string(MID)), DL("") { + ModuleID(std::string(MID)), SourceFileName(std::string(MID)), DL("") { Context.addModule(this); } @@ -671,12 +670,15 @@ void Module::setRtLibUseGOT() { addModuleFlag(ModFlagBehavior::Max, "RtLibUseGOT", 1); } -bool Module::getUwtable() const { - auto *Val = cast_or_null(getModuleFlag("uwtable")); - return Val && (cast(Val->getValue())->getZExtValue() > 0); +UWTableKind Module::getUwtable() const { + if (auto *Val = cast_or_null(getModuleFlag("uwtable"))) + return UWTableKind(cast(Val->getValue())->getZExtValue()); + return UWTableKind::None; } -void Module::setUwtable() { addModuleFlag(ModFlagBehavior::Max, "uwtable", 1); } +void Module::setUwtable(UWTableKind Kind) { + addModuleFlag(ModFlagBehavior::Max, "uwtable", uint32_t(Kind)); +} FramePointerKind Module::getFramePointer() const { auto *Val = cast_or_null(getModuleFlag("frame-pointer")); @@ -734,7 +736,7 @@ void Module::setOverrideStackAlignment(unsigned Align) { addModuleFlag(ModFlagBehavior::Error, "override-stack-alignment", Align); } -void Module::setSDKVersion(const VersionTuple &V) { +static void addSDKVersionMD(const VersionTuple &V, Module &M, StringRef Name) { SmallVector Entries; Entries.push_back(V.getMajor()); if (auto Minor = V.getMinor()) { @@ -744,8 +746,12 @@ void Module::setSDKVersion(const VersionTuple &V) { // Ignore the 'build' component as it can't be represented in the object // file. } - addModuleFlag(ModFlagBehavior::Warning, "SDK Version", - ConstantDataArray::get(Context, Entries)); + M.addModuleFlag(Module::ModFlagBehavior::Warning, Name, + ConstantDataArray::get(M.getContext(), Entries)); +} + +void Module::setSDKVersion(const VersionTuple &V) { + addSDKVersionMD(V, *this, "SDK Version"); } static VersionTuple getSDKVersionMD(Metadata *MD) { @@ -818,6 +824,15 @@ StringRef Module::getDarwinTargetVariantTriple() const { return ""; } +void Module::setDarwinTargetVariantTriple(StringRef T) { + addModuleFlag(ModFlagBehavior::Override, "darwin.target_variant.triple", + MDString::get(getContext(), T)); +} + VersionTuple Module::getDarwinTargetVariantSDKVersion() const { return getSDKVersionMD(getModuleFlag("darwin.target_variant.SDK Version")); } + +void Module::setDarwinTargetVariantSDKVersion(VersionTuple Version) { + addSDKVersionMD(Version, *this, "darwin.target_variant.SDK Version"); +} diff --git a/llvm/lib/IR/Pass.cpp b/llvm/lib/IR/Pass.cpp index 755ea57c63fd..fe0bfd81a81e 100644 --- a/llvm/lib/IR/Pass.cpp +++ b/llvm/lib/IR/Pass.cpp @@ -27,6 +27,10 @@ #include "llvm/Support/raw_ostream.h" #include +#ifdef EXPENSIVE_CHECKS +#include "llvm/IR/StructuralHash.h" +#endif + using namespace llvm; #define DEBUG_TYPE "ir" @@ -133,6 +137,12 @@ LLVM_DUMP_METHOD void Pass::dump() const { } #endif +#ifdef EXPENSIVE_CHECKS +uint64_t Pass::structuralHash(Module &M) const { return StructuralHash(M); } + +uint64_t Pass::structuralHash(Function &F) const { return StructuralHash(F); } +#endif + //===----------------------------------------------------------------------===// // ImmutablePass Implementation // diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp index d2f676192e7f..069da26e63b1 100644 --- a/llvm/lib/IR/ReplaceConstant.cpp +++ b/llvm/lib/IR/ReplaceConstant.cpp @@ -13,6 +13,7 @@ #include "llvm/IR/ReplaceConstant.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/ValueMap.h" diff --git a/llvm/lib/IR/SafepointIRVerifier.cpp b/llvm/lib/IR/SafepointIRVerifier.cpp index d8634e0ac7dd..5d3fa28f7d0a 100644 --- a/llvm/lib/IR/SafepointIRVerifier.cpp +++ b/llvm/lib/IR/SafepointIRVerifier.cpp @@ -357,6 +357,17 @@ static enum BaseType getBaseType(const Value *Val) { Worklist.push_back(SI->getFalseValue()); continue; } + if (const auto *GCRelocate = dyn_cast(V)) { + // GCRelocates do not change null-ness or constant-ness of the value. + // So we can continue with derived pointer this instruction relocates. + Worklist.push_back(GCRelocate->getDerivedPtr()); + continue; + } + if (const auto *FI = dyn_cast(V)) { + // Freeze does not change null-ness or constant-ness of the value. + Worklist.push_back(FI->getOperand(0)); + continue; + } if (isa(V)) { // We found at least one base pointer which is non-null, so this derived // pointer is not exclusively derived from null. diff --git a/llvm/lib/IR/Use.cpp b/llvm/lib/IR/Use.cpp index 601a9df5279e..99a89386d75f 100644 --- a/llvm/lib/IR/Use.cpp +++ b/llvm/lib/IR/Use.cpp @@ -11,10 +11,6 @@ namespace llvm { -class User; -template struct simplify_type; -class Value; - void Use::swap(Use &RHS) { if (Val == RHS.Val) return; diff --git a/llvm/lib/IR/User.cpp b/llvm/lib/IR/User.cpp index 68489075cd88..637af7aaa245 100644 --- a/llvm/lib/IR/User.cpp +++ b/llvm/lib/IR/User.cpp @@ -18,8 +18,9 @@ class BasicBlock; // User Class //===----------------------------------------------------------------------===// -void User::replaceUsesOfWith(Value *From, Value *To) { - if (From == To) return; // Duh what? +bool User::replaceUsesOfWith(Value *From, Value *To) { + bool Changed = false; + if (From == To) return Changed; // Duh what? assert((!isa(this) || isa(this)) && "Cannot call User::replaceUsesOfWith on a constant!"); @@ -30,11 +31,16 @@ void User::replaceUsesOfWith(Value *From, Value *To) { // "To", adding "this" to the uses list of To, and // most importantly, removing "this" from the use list of "From". setOperand(i, To); + Changed = true; } if (auto DVI = dyn_cast_or_null(this)) { - if (is_contained(DVI->location_ops(), From)) + if (is_contained(DVI->location_ops(), From)) { DVI->replaceVariableLocationOp(From, To); + Changed = true; + } } + + return Changed; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index 18aef37e2023..3990536f3da5 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -28,7 +28,6 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -377,6 +376,7 @@ void Value::setName(const Twine &NewName) { } void Value::takeName(Value *V) { + assert(V != this && "Illegal call to this->takeName(this)!"); ValueSymbolTable *ST = nullptr; // If this value has a name, drop it. if (hasName()) { @@ -408,7 +408,7 @@ void Value::takeName(Value *V) { } } - // Get V's ST, this should always succed, because V has a name. + // Get V's ST, this should always succeed, because V has a name. ValueSymbolTable *VST; bool Failure = getSymTab(V, VST); assert(!Failure && "V has a name, so it should have a ST!"); (void)Failure; @@ -963,6 +963,9 @@ Align Value::getPointerAlignment(const DataLayout &DL) const { return Align(CI->getLimitedValue()); } } else if (auto *CstPtr = dyn_cast(this)) { + // Strip pointer casts to avoid creating unnecessary ptrtoint expression + // if the only "reduction" is combining a bitcast + ptrtoint. + CstPtr = CstPtr->stripPointerCasts(); if (auto *CstInt = dyn_cast_or_null(ConstantExpr::getPtrToInt( const_cast(CstPtr), DL.getIntPtrType(getType()), /*OnlyIfReduced=*/true))) { @@ -1017,20 +1020,16 @@ bool Value::isSwiftError() const { } bool Value::isTransitiveUsedByMetadataOnly() const { - if (use_empty()) - return false; - llvm::SmallVector WorkList; - llvm::SmallPtrSet Visited; - WorkList.insert(WorkList.begin(), user_begin(), user_end()); + SmallVector WorkList(user_begin(), user_end()); + SmallPtrSet Visited(user_begin(), user_end()); while (!WorkList.empty()) { const User *U = WorkList.pop_back_val(); - Visited.insert(U); // If it is transitively used by a global value or a non-constant value, // it's obviously not only used by metadata. if (!isa(U) || isa(U)) return false; for (const User *UU : U->users()) - if (!Visited.count(UU)) + if (Visited.insert(UU).second) WorkList.push_back(UU); } return true; diff --git a/llvm/lib/IR/VectorBuilder.cpp b/llvm/lib/IR/VectorBuilder.cpp new file mode 100644 index 000000000000..e7be7a98a593 --- /dev/null +++ b/llvm/lib/IR/VectorBuilder.cpp @@ -0,0 +1,103 @@ +//===- VectorBuilder.cpp - Builder for VP Intrinsics ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the VectorBuilder class, which is used as a convenient +// way to create VP intrinsics as if they were LLVM instructions with a +// consistent and simplified interface. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include + +namespace llvm { + +void VectorBuilder::handleError(const char *ErrorMsg) const { + if (ErrorHandling == Behavior::SilentlyReturnNone) + return; + report_fatal_error(ErrorMsg); +} + +Module &VectorBuilder::getModule() const { + return *Builder.GetInsertBlock()->getModule(); +} + +Value *VectorBuilder::getAllTrueMask() { + auto *BoolTy = Builder.getInt1Ty(); + auto *MaskTy = VectorType::get(BoolTy, StaticVectorLength); + return ConstantInt::getAllOnesValue(MaskTy); +} + +Value &VectorBuilder::requestMask() { + if (Mask) + return *Mask; + + return *getAllTrueMask(); +} + +Value &VectorBuilder::requestEVL() { + if (ExplicitVectorLength) + return *ExplicitVectorLength; + + assert(!StaticVectorLength.isScalable() && "TODO vscale lowering"); + auto *IntTy = Builder.getInt32Ty(); + return *ConstantInt::get(IntTy, StaticVectorLength.getFixedValue()); +} + +Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy, + ArrayRef InstOpArray, + const Twine &Name) { + auto VPID = VPIntrinsic::getForOpcode(Opcode); + if (VPID == Intrinsic::not_intrinsic) + return returnWithError("No VPIntrinsic for this opcode"); + + auto MaskPosOpt = VPIntrinsic::getMaskParamPos(VPID); + auto VLenPosOpt = VPIntrinsic::getVectorLengthParamPos(VPID); + size_t NumInstParams = InstOpArray.size(); + size_t NumVPParams = + NumInstParams + MaskPosOpt.has_value() + VLenPosOpt.has_value(); + + SmallVector IntrinParams; + + // Whether the mask and vlen parameter are at the end of the parameter list. + bool TrailingMaskAndVLen = + std::min(MaskPosOpt.value_or(NumInstParams), + VLenPosOpt.value_or(NumInstParams)) >= NumInstParams; + + if (TrailingMaskAndVLen) { + // Fast path for trailing mask, vector length. + IntrinParams.append(InstOpArray.begin(), InstOpArray.end()); + IntrinParams.resize(NumVPParams); + } else { + IntrinParams.resize(NumVPParams); + // Insert mask and evl operands in between the instruction operands. + for (size_t VPParamIdx = 0, ParamIdx = 0; VPParamIdx < NumVPParams; + ++VPParamIdx) { + if ((MaskPosOpt && MaskPosOpt.value_or(NumVPParams) == VPParamIdx) || + (VLenPosOpt && VLenPosOpt.value_or(NumVPParams) == VPParamIdx)) + continue; + assert(ParamIdx < NumInstParams); + IntrinParams[VPParamIdx] = InstOpArray[ParamIdx++]; + } + } + + if (MaskPosOpt) + IntrinParams[*MaskPosOpt] = &requestMask(); + if (VLenPosOpt) + IntrinParams[*VLenPosOpt] = &requestEVL(); + + auto *VPDecl = VPIntrinsic::getDeclarationForParams(&getModule(), VPID, + ReturnTy, IntrinParams); + return Builder.CreateCall(VPDecl, IntrinParams, Name); +} + +} // namespace llvm diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 989d01e2e395..75d02f4c8c82 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -84,6 +84,8 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -100,7 +102,6 @@ #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -278,6 +279,12 @@ namespace { class Verifier : public InstVisitor, VerifierSupport { friend class InstVisitor; + // ISD::ArgFlagsTy::MemAlign only have 4 bits for alignment, so + // the alignment size should not exceed 2^15. Since encode(Align) + // would plus the shift value by 1, the alignment size should + // not exceed 2^14, otherwise it can NOT be properly lowered + // in backend. + static constexpr unsigned ParamMaxAlignment = 1 << 14; DominatorTree DT; /// When verifying a basic block, keep track of all of the @@ -465,6 +472,7 @@ private: void visitAnnotationMetadata(MDNode *Annotation); void visitAliasScopeMetadata(const MDNode *MD); void visitAliasScopeListMetadata(const MDNode *MD); + void visitAccessGroupMetadata(const MDNode *MD); template bool isValidMetadataArray(const MDTuple &N); #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N); @@ -521,6 +529,7 @@ private: void visitUserOp2(Instruction &I) { visitUserOp1(I); } void visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call); void visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI); + void visitVPIntrinsic(VPIntrinsic &VPI); void visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII); void visitDbgLabelIntrinsic(StringRef Kind, DbgLabelInst &DLI); void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI); @@ -587,17 +596,27 @@ private: } // end anonymous namespace /// We know that cond should be true, if not print an error message. -#define Assert(C, ...) \ - do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false) +#define Check(C, ...) \ + do { \ + if (!(C)) { \ + CheckFailed(__VA_ARGS__); \ + return; \ + } \ + } while (false) /// We know that a debug info condition should be true, if not print /// an error message. -#define AssertDI(C, ...) \ - do { if (!(C)) { DebugInfoCheckFailed(__VA_ARGS__); return; } } while (false) +#define CheckDI(C, ...) \ + do { \ + if (!(C)) { \ + DebugInfoCheckFailed(__VA_ARGS__); \ + return; \ + } \ + } while (false) void Verifier::visit(Instruction &I) { for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) - Assert(I.getOperand(i) != nullptr, "Operand is null", &I); + Check(I.getOperand(i) != nullptr, "Operand is null", &I); InstVisitor::visit(I); } @@ -620,43 +639,43 @@ static void forEachUser(const Value *User, } void Verifier::visitGlobalValue(const GlobalValue &GV) { - Assert(!GV.isDeclaration() || GV.hasValidDeclarationLinkage(), - "Global is external, but doesn't have external or weak linkage!", &GV); + Check(!GV.isDeclaration() || GV.hasValidDeclarationLinkage(), + "Global is external, but doesn't have external or weak linkage!", &GV); if (const GlobalObject *GO = dyn_cast(&GV)) { if (MaybeAlign A = GO->getAlign()) { - Assert(A->value() <= Value::MaximumAlignment, - "huge alignment values are unsupported", GO); + Check(A->value() <= Value::MaximumAlignment, + "huge alignment values are unsupported", GO); } } - Assert(!GV.hasAppendingLinkage() || isa(GV), - "Only global variables can have appending linkage!", &GV); + Check(!GV.hasAppendingLinkage() || isa(GV), + "Only global variables can have appending linkage!", &GV); if (GV.hasAppendingLinkage()) { const GlobalVariable *GVar = dyn_cast(&GV); - Assert(GVar && GVar->getValueType()->isArrayTy(), - "Only global arrays can have appending linkage!", GVar); + Check(GVar && GVar->getValueType()->isArrayTy(), + "Only global arrays can have appending linkage!", GVar); } if (GV.isDeclarationForLinker()) - Assert(!GV.hasComdat(), "Declaration may not be in a Comdat!", &GV); + Check(!GV.hasComdat(), "Declaration may not be in a Comdat!", &GV); if (GV.hasDLLImportStorageClass()) { - Assert(!GV.isDSOLocal(), - "GlobalValue with DLLImport Storage is dso_local!", &GV); + Check(!GV.isDSOLocal(), "GlobalValue with DLLImport Storage is dso_local!", + &GV); - Assert((GV.isDeclaration() && - (GV.hasExternalLinkage() || GV.hasExternalWeakLinkage())) || - GV.hasAvailableExternallyLinkage(), - "Global is marked as dllimport, but not external", &GV); + Check((GV.isDeclaration() && + (GV.hasExternalLinkage() || GV.hasExternalWeakLinkage())) || + GV.hasAvailableExternallyLinkage(), + "Global is marked as dllimport, but not external", &GV); } if (GV.isImplicitDSOLocal()) - Assert(GV.isDSOLocal(), - "GlobalValue with local linkage or non-default " - "visibility must be dso_local!", - &GV); + Check(GV.isDSOLocal(), + "GlobalValue with local linkage or non-default " + "visibility must be dso_local!", + &GV); forEachUser(&GV, GlobalValueVisited, [&](const Value *V) -> bool { if (const Instruction *I = dyn_cast(V)) { @@ -680,25 +699,25 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) { void Verifier::visitGlobalVariable(const GlobalVariable &GV) { if (GV.hasInitializer()) { - Assert(GV.getInitializer()->getType() == GV.getValueType(), - "Global variable initializer type does not match global " - "variable type!", - &GV); + Check(GV.getInitializer()->getType() == GV.getValueType(), + "Global variable initializer type does not match global " + "variable type!", + &GV); // If the global has common linkage, it must have a zero initializer and // cannot be constant. if (GV.hasCommonLinkage()) { - Assert(GV.getInitializer()->isNullValue(), - "'common' global must have a zero initializer!", &GV); - Assert(!GV.isConstant(), "'common' global may not be marked constant!", - &GV); - Assert(!GV.hasComdat(), "'common' global may not be in a Comdat!", &GV); + Check(GV.getInitializer()->isNullValue(), + "'common' global must have a zero initializer!", &GV); + Check(!GV.isConstant(), "'common' global may not be marked constant!", + &GV); + Check(!GV.hasComdat(), "'common' global may not be in a Comdat!", &GV); } } if (GV.hasName() && (GV.getName() == "llvm.global_ctors" || GV.getName() == "llvm.global_dtors")) { - Assert(!GV.hasInitializer() || GV.hasAppendingLinkage(), - "invalid linkage for intrinsic global variable", &GV); + Check(!GV.hasInitializer() || GV.hasAppendingLinkage(), + "invalid linkage for intrinsic global variable", &GV); // Don't worry about emitting an error for it not being an array, // visitGlobalValue will complain on appending non-array. if (ArrayType *ATy = dyn_cast(GV.getValueType())) { @@ -706,42 +725,41 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { PointerType *FuncPtrTy = FunctionType::get(Type::getVoidTy(Context), false)-> getPointerTo(DL.getProgramAddressSpace()); - Assert(STy && - (STy->getNumElements() == 2 || STy->getNumElements() == 3) && - STy->getTypeAtIndex(0u)->isIntegerTy(32) && - STy->getTypeAtIndex(1) == FuncPtrTy, - "wrong type for intrinsic global variable", &GV); - Assert(STy->getNumElements() == 3, - "the third field of the element type is mandatory, " - "specify i8* null to migrate from the obsoleted 2-field form"); + Check(STy && (STy->getNumElements() == 2 || STy->getNumElements() == 3) && + STy->getTypeAtIndex(0u)->isIntegerTy(32) && + STy->getTypeAtIndex(1) == FuncPtrTy, + "wrong type for intrinsic global variable", &GV); + Check(STy->getNumElements() == 3, + "the third field of the element type is mandatory, " + "specify i8* null to migrate from the obsoleted 2-field form"); Type *ETy = STy->getTypeAtIndex(2); Type *Int8Ty = Type::getInt8Ty(ETy->getContext()); - Assert(ETy->isPointerTy() && - cast(ETy)->isOpaqueOrPointeeTypeMatches(Int8Ty), - "wrong type for intrinsic global variable", &GV); + Check(ETy->isPointerTy() && + cast(ETy)->isOpaqueOrPointeeTypeMatches(Int8Ty), + "wrong type for intrinsic global variable", &GV); } } if (GV.hasName() && (GV.getName() == "llvm.used" || GV.getName() == "llvm.compiler.used")) { - Assert(!GV.hasInitializer() || GV.hasAppendingLinkage(), - "invalid linkage for intrinsic global variable", &GV); + Check(!GV.hasInitializer() || GV.hasAppendingLinkage(), + "invalid linkage for intrinsic global variable", &GV); Type *GVType = GV.getValueType(); if (ArrayType *ATy = dyn_cast(GVType)) { PointerType *PTy = dyn_cast(ATy->getElementType()); - Assert(PTy, "wrong type for intrinsic global variable", &GV); + Check(PTy, "wrong type for intrinsic global variable", &GV); if (GV.hasInitializer()) { const Constant *Init = GV.getInitializer(); const ConstantArray *InitArray = dyn_cast(Init); - Assert(InitArray, "wrong initalizer for intrinsic global variable", - Init); + Check(InitArray, "wrong initalizer for intrinsic global variable", + Init); for (Value *Op : InitArray->operands()) { Value *V = Op->stripPointerCasts(); - Assert(isa(V) || isa(V) || - isa(V), - Twine("invalid ") + GV.getName() + " member", V); - Assert(V->hasName(), - Twine("members of ") + GV.getName() + " must be named", V); + Check(isa(V) || isa(V) || + isa(V), + Twine("invalid ") + GV.getName() + " member", V); + Check(V->hasName(), + Twine("members of ") + GV.getName() + " must be named", V); } } } @@ -754,20 +772,20 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { if (auto *GVE = dyn_cast(MD)) visitDIGlobalVariableExpression(*GVE); else - AssertDI(false, "!dbg attachment of global variable must be a " - "DIGlobalVariableExpression"); + CheckDI(false, "!dbg attachment of global variable must be a " + "DIGlobalVariableExpression"); } // Scalable vectors cannot be global variables, since we don't know // the runtime size. If the global is an array containing scalable vectors, // that will be caught by the isValidElementType methods in StructType or // ArrayType instead. - Assert(!isa(GV.getValueType()), - "Globals cannot contain scalable vectors", &GV); + Check(!isa(GV.getValueType()), + "Globals cannot contain scalable vectors", &GV); if (auto *STy = dyn_cast(GV.getValueType())) - Assert(!STy->containsScalableVectorType(), - "Globals cannot contain scalable vectors", &GV); + Check(!STy->containsScalableVectorType(), + "Globals cannot contain scalable vectors", &GV); if (!GV.hasInitializer()) { visitGlobalValue(GV); @@ -789,14 +807,14 @@ void Verifier::visitAliaseeSubExpr(const GlobalAlias &GA, const Constant &C) { void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl &Visited, const GlobalAlias &GA, const Constant &C) { if (const auto *GV = dyn_cast(&C)) { - Assert(!GV->isDeclarationForLinker(), "Alias must point to a definition", - &GA); + Check(!GV->isDeclarationForLinker(), "Alias must point to a definition", + &GA); if (const auto *GA2 = dyn_cast(GV)) { - Assert(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA); + Check(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA); - Assert(!GA2->isInterposable(), "Alias cannot point to an interposable alias", - &GA); + Check(!GA2->isInterposable(), + "Alias cannot point to an interposable alias", &GA); } else { // Only continue verifying subexpressions of GlobalAliases. // Do not recurse into global initializers. @@ -817,17 +835,17 @@ void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl &Visited, } void Verifier::visitGlobalAlias(const GlobalAlias &GA) { - Assert(GlobalAlias::isValidLinkage(GA.getLinkage()), - "Alias should have private, internal, linkonce, weak, linkonce_odr, " - "weak_odr, or external linkage!", - &GA); + Check(GlobalAlias::isValidLinkage(GA.getLinkage()), + "Alias should have private, internal, linkonce, weak, linkonce_odr, " + "weak_odr, or external linkage!", + &GA); const Constant *Aliasee = GA.getAliasee(); - Assert(Aliasee, "Aliasee cannot be NULL!", &GA); - Assert(GA.getType() == Aliasee->getType(), - "Alias and aliasee types should match!", &GA); + Check(Aliasee, "Aliasee cannot be NULL!", &GA); + Check(GA.getType() == Aliasee->getType(), + "Alias and aliasee types should match!", &GA); - Assert(isa(Aliasee) || isa(Aliasee), - "Aliasee should be either GlobalValue or ConstantExpr", &GA); + Check(isa(Aliasee) || isa(Aliasee), + "Aliasee should be either GlobalValue or ConstantExpr", &GA); visitAliaseeSubExpr(GA, *Aliasee); @@ -835,30 +853,35 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) { } void Verifier::visitGlobalIFunc(const GlobalIFunc &GI) { + Check(GlobalIFunc::isValidLinkage(GI.getLinkage()), + "IFunc should have private, internal, linkonce, weak, linkonce_odr, " + "weak_odr, or external linkage!", + &GI); // Pierce through ConstantExprs and GlobalAliases and check that the resolver - // has a Function + // is a Function definition. const Function *Resolver = GI.getResolverFunction(); - Assert(Resolver, "IFunc must have a Function resolver", &GI); + Check(Resolver, "IFunc must have a Function resolver", &GI); + Check(!Resolver->isDeclarationForLinker(), + "IFunc resolver must be a definition", &GI); // Check that the immediate resolver operand (prior to any bitcasts) has the - // correct type + // correct type. const Type *ResolverTy = GI.getResolver()->getType(); const Type *ResolverFuncTy = GlobalIFunc::getResolverFunctionType(GI.getValueType()); - Assert(ResolverTy == ResolverFuncTy->getPointerTo(), - "IFunc resolver has incorrect type", &GI); + Check(ResolverTy == ResolverFuncTy->getPointerTo(), + "IFunc resolver has incorrect type", &GI); } void Verifier::visitNamedMDNode(const NamedMDNode &NMD) { // There used to be various other llvm.dbg.* nodes, but we don't support // upgrading them and we want to reserve the namespace for future uses. if (NMD.getName().startswith("llvm.dbg.")) - AssertDI(NMD.getName() == "llvm.dbg.cu", - "unrecognized named metadata node in the llvm.dbg namespace", - &NMD); + CheckDI(NMD.getName() == "llvm.dbg.cu", + "unrecognized named metadata node in the llvm.dbg namespace", &NMD); for (const MDNode *MD : NMD.operands()) { if (NMD.getName() == "llvm.dbg.cu") - AssertDI(MD && isa(MD), "invalid compile unit", &NMD, MD); + CheckDI(MD && isa(MD), "invalid compile unit", &NMD, MD); if (!MD) continue; @@ -873,8 +896,8 @@ void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) { if (!MDNodes.insert(&MD).second) return; - Assert(&MD.getContext() == &Context, - "MDNode context does not match Module context!", &MD); + Check(&MD.getContext() == &Context, + "MDNode context does not match Module context!", &MD); switch (MD.getMetadataID()) { default: @@ -891,10 +914,10 @@ void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) { for (const Metadata *Op : MD.operands()) { if (!Op) continue; - Assert(!isa(Op), "Invalid operand for global metadata!", - &MD, Op); - AssertDI(!isa(Op) || AllowLocs == AreDebugLocsAllowed::Yes, - "DILocation not allowed within this metadata node", &MD, Op); + Check(!isa(Op), "Invalid operand for global metadata!", + &MD, Op); + CheckDI(!isa(Op) || AllowLocs == AreDebugLocsAllowed::Yes, + "DILocation not allowed within this metadata node", &MD, Op); if (auto *N = dyn_cast(Op)) { visitMDNode(*N, AllowLocs); continue; @@ -906,26 +929,26 @@ void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) { } // Check these last, so we diagnose problems in operands first. - Assert(!MD.isTemporary(), "Expected no forward declarations!", &MD); - Assert(MD.isResolved(), "All nodes should be resolved!", &MD); + Check(!MD.isTemporary(), "Expected no forward declarations!", &MD); + Check(MD.isResolved(), "All nodes should be resolved!", &MD); } void Verifier::visitValueAsMetadata(const ValueAsMetadata &MD, Function *F) { - Assert(MD.getValue(), "Expected valid value", &MD); - Assert(!MD.getValue()->getType()->isMetadataTy(), - "Unexpected metadata round-trip through values", &MD, MD.getValue()); + Check(MD.getValue(), "Expected valid value", &MD); + Check(!MD.getValue()->getType()->isMetadataTy(), + "Unexpected metadata round-trip through values", &MD, MD.getValue()); auto *L = dyn_cast(&MD); if (!L) return; - Assert(F, "function-local metadata used outside a function", L); + Check(F, "function-local metadata used outside a function", L); // If this was an instruction, bb, or argument, verify that it is in the // function that we expect. Function *ActualF = nullptr; if (Instruction *I = dyn_cast(L->getValue())) { - Assert(I->getParent(), "function-local metadata not in basic block", L, I); + Check(I->getParent(), "function-local metadata not in basic block", L, I); ActualF = I->getParent()->getParent(); } else if (BasicBlock *BB = dyn_cast(L->getValue())) ActualF = BB->getParent(); @@ -933,7 +956,7 @@ void Verifier::visitValueAsMetadata(const ValueAsMetadata &MD, Function *F) { ActualF = A->getParent(); assert(ActualF && "Unimplemented function local metadata case!"); - Assert(ActualF == F, "function-local metadata used in wrong function", L); + Check(ActualF == F, "function-local metadata used in wrong function", L); } void Verifier::visitMetadataAsValue(const MetadataAsValue &MDV, Function *F) { @@ -957,125 +980,125 @@ static bool isScope(const Metadata *MD) { return !MD || isa(MD); } static bool isDINode(const Metadata *MD) { return !MD || isa(MD); } void Verifier::visitDILocation(const DILocation &N) { - AssertDI(N.getRawScope() && isa(N.getRawScope()), - "location requires a valid scope", &N, N.getRawScope()); + CheckDI(N.getRawScope() && isa(N.getRawScope()), + "location requires a valid scope", &N, N.getRawScope()); if (auto *IA = N.getRawInlinedAt()) - AssertDI(isa(IA), "inlined-at should be a location", &N, IA); + CheckDI(isa(IA), "inlined-at should be a location", &N, IA); if (auto *SP = dyn_cast(N.getRawScope())) - AssertDI(SP->isDefinition(), "scope points into the type hierarchy", &N); + CheckDI(SP->isDefinition(), "scope points into the type hierarchy", &N); } void Verifier::visitGenericDINode(const GenericDINode &N) { - AssertDI(N.getTag(), "invalid tag", &N); + CheckDI(N.getTag(), "invalid tag", &N); } void Verifier::visitDIScope(const DIScope &N) { if (auto *F = N.getRawFile()) - AssertDI(isa(F), "invalid file", &N, F); + CheckDI(isa(F), "invalid file", &N, F); } void Verifier::visitDISubrange(const DISubrange &N) { - AssertDI(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N); bool HasAssumedSizedArraySupport = dwarf::isFortran(CurrentSourceLang); - AssertDI(HasAssumedSizedArraySupport || N.getRawCountNode() || - N.getRawUpperBound(), - "Subrange must contain count or upperBound", &N); - AssertDI(!N.getRawCountNode() || !N.getRawUpperBound(), - "Subrange can have any one of count or upperBound", &N); + CheckDI(HasAssumedSizedArraySupport || N.getRawCountNode() || + N.getRawUpperBound(), + "Subrange must contain count or upperBound", &N); + CheckDI(!N.getRawCountNode() || !N.getRawUpperBound(), + "Subrange can have any one of count or upperBound", &N); auto *CBound = N.getRawCountNode(); - AssertDI(!CBound || isa(CBound) || - isa(CBound) || isa(CBound), - "Count must be signed constant or DIVariable or DIExpression", &N); + CheckDI(!CBound || isa(CBound) || + isa(CBound) || isa(CBound), + "Count must be signed constant or DIVariable or DIExpression", &N); auto Count = N.getCount(); - AssertDI(!Count || !Count.is() || - Count.get()->getSExtValue() >= -1, - "invalid subrange count", &N); + CheckDI(!Count || !Count.is() || + Count.get()->getSExtValue() >= -1, + "invalid subrange count", &N); auto *LBound = N.getRawLowerBound(); - AssertDI(!LBound || isa(LBound) || - isa(LBound) || isa(LBound), - "LowerBound must be signed constant or DIVariable or DIExpression", - &N); + CheckDI(!LBound || isa(LBound) || + isa(LBound) || isa(LBound), + "LowerBound must be signed constant or DIVariable or DIExpression", + &N); auto *UBound = N.getRawUpperBound(); - AssertDI(!UBound || isa(UBound) || - isa(UBound) || isa(UBound), - "UpperBound must be signed constant or DIVariable or DIExpression", - &N); + CheckDI(!UBound || isa(UBound) || + isa(UBound) || isa(UBound), + "UpperBound must be signed constant or DIVariable or DIExpression", + &N); auto *Stride = N.getRawStride(); - AssertDI(!Stride || isa(Stride) || - isa(Stride) || isa(Stride), - "Stride must be signed constant or DIVariable or DIExpression", &N); + CheckDI(!Stride || isa(Stride) || + isa(Stride) || isa(Stride), + "Stride must be signed constant or DIVariable or DIExpression", &N); } void Verifier::visitDIGenericSubrange(const DIGenericSubrange &N) { - AssertDI(N.getTag() == dwarf::DW_TAG_generic_subrange, "invalid tag", &N); - AssertDI(N.getRawCountNode() || N.getRawUpperBound(), - "GenericSubrange must contain count or upperBound", &N); - AssertDI(!N.getRawCountNode() || !N.getRawUpperBound(), - "GenericSubrange can have any one of count or upperBound", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_generic_subrange, "invalid tag", &N); + CheckDI(N.getRawCountNode() || N.getRawUpperBound(), + "GenericSubrange must contain count or upperBound", &N); + CheckDI(!N.getRawCountNode() || !N.getRawUpperBound(), + "GenericSubrange can have any one of count or upperBound", &N); auto *CBound = N.getRawCountNode(); - AssertDI(!CBound || isa(CBound) || isa(CBound), - "Count must be signed constant or DIVariable or DIExpression", &N); + CheckDI(!CBound || isa(CBound) || isa(CBound), + "Count must be signed constant or DIVariable or DIExpression", &N); auto *LBound = N.getRawLowerBound(); - AssertDI(LBound, "GenericSubrange must contain lowerBound", &N); - AssertDI(isa(LBound) || isa(LBound), - "LowerBound must be signed constant or DIVariable or DIExpression", - &N); + CheckDI(LBound, "GenericSubrange must contain lowerBound", &N); + CheckDI(isa(LBound) || isa(LBound), + "LowerBound must be signed constant or DIVariable or DIExpression", + &N); auto *UBound = N.getRawUpperBound(); - AssertDI(!UBound || isa(UBound) || isa(UBound), - "UpperBound must be signed constant or DIVariable or DIExpression", - &N); + CheckDI(!UBound || isa(UBound) || isa(UBound), + "UpperBound must be signed constant or DIVariable or DIExpression", + &N); auto *Stride = N.getRawStride(); - AssertDI(Stride, "GenericSubrange must contain stride", &N); - AssertDI(isa(Stride) || isa(Stride), - "Stride must be signed constant or DIVariable or DIExpression", &N); + CheckDI(Stride, "GenericSubrange must contain stride", &N); + CheckDI(isa(Stride) || isa(Stride), + "Stride must be signed constant or DIVariable or DIExpression", &N); } void Verifier::visitDIEnumerator(const DIEnumerator &N) { - AssertDI(N.getTag() == dwarf::DW_TAG_enumerator, "invalid tag", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_enumerator, "invalid tag", &N); } void Verifier::visitDIBasicType(const DIBasicType &N) { - AssertDI(N.getTag() == dwarf::DW_TAG_base_type || - N.getTag() == dwarf::DW_TAG_unspecified_type || - N.getTag() == dwarf::DW_TAG_string_type, - "invalid tag", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_base_type || + N.getTag() == dwarf::DW_TAG_unspecified_type || + N.getTag() == dwarf::DW_TAG_string_type, + "invalid tag", &N); } void Verifier::visitDIStringType(const DIStringType &N) { - AssertDI(N.getTag() == dwarf::DW_TAG_string_type, "invalid tag", &N); - AssertDI(!(N.isBigEndian() && N.isLittleEndian()) , - "has conflicting flags", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_string_type, "invalid tag", &N); + CheckDI(!(N.isBigEndian() && N.isLittleEndian()), "has conflicting flags", + &N); } void Verifier::visitDIDerivedType(const DIDerivedType &N) { // Common scope checks. visitDIScope(N); - AssertDI(N.getTag() == dwarf::DW_TAG_typedef || - N.getTag() == dwarf::DW_TAG_pointer_type || - N.getTag() == dwarf::DW_TAG_ptr_to_member_type || - N.getTag() == dwarf::DW_TAG_reference_type || - N.getTag() == dwarf::DW_TAG_rvalue_reference_type || - N.getTag() == dwarf::DW_TAG_const_type || - N.getTag() == dwarf::DW_TAG_immutable_type || - N.getTag() == dwarf::DW_TAG_volatile_type || - N.getTag() == dwarf::DW_TAG_restrict_type || - N.getTag() == dwarf::DW_TAG_atomic_type || - N.getTag() == dwarf::DW_TAG_member || - N.getTag() == dwarf::DW_TAG_inheritance || - N.getTag() == dwarf::DW_TAG_friend || - N.getTag() == dwarf::DW_TAG_set_type, - "invalid tag", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_typedef || + N.getTag() == dwarf::DW_TAG_pointer_type || + N.getTag() == dwarf::DW_TAG_ptr_to_member_type || + N.getTag() == dwarf::DW_TAG_reference_type || + N.getTag() == dwarf::DW_TAG_rvalue_reference_type || + N.getTag() == dwarf::DW_TAG_const_type || + N.getTag() == dwarf::DW_TAG_immutable_type || + N.getTag() == dwarf::DW_TAG_volatile_type || + N.getTag() == dwarf::DW_TAG_restrict_type || + N.getTag() == dwarf::DW_TAG_atomic_type || + N.getTag() == dwarf::DW_TAG_member || + N.getTag() == dwarf::DW_TAG_inheritance || + N.getTag() == dwarf::DW_TAG_friend || + N.getTag() == dwarf::DW_TAG_set_type, + "invalid tag", &N); if (N.getTag() == dwarf::DW_TAG_ptr_to_member_type) { - AssertDI(isType(N.getRawExtraData()), "invalid pointer to member type", &N, - N.getRawExtraData()); + CheckDI(isType(N.getRawExtraData()), "invalid pointer to member type", &N, + N.getRawExtraData()); } if (N.getTag() == dwarf::DW_TAG_set_type) { if (auto *T = N.getRawBaseType()) { auto *Enum = dyn_cast_or_null(T); auto *Basic = dyn_cast_or_null(T); - AssertDI( + CheckDI( (Enum && Enum->getTag() == dwarf::DW_TAG_enumeration_type) || (Basic && (Basic->getEncoding() == dwarf::DW_ATE_unsigned || Basic->getEncoding() == dwarf::DW_ATE_signed || @@ -1086,16 +1109,16 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) { } } - AssertDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope()); - AssertDI(isType(N.getRawBaseType()), "invalid base type", &N, - N.getRawBaseType()); + CheckDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope()); + CheckDI(isType(N.getRawBaseType()), "invalid base type", &N, + N.getRawBaseType()); if (N.getDWARFAddressSpace()) { - AssertDI(N.getTag() == dwarf::DW_TAG_pointer_type || - N.getTag() == dwarf::DW_TAG_reference_type || - N.getTag() == dwarf::DW_TAG_rvalue_reference_type, - "DWARF address space only applies to pointer or reference types", - &N); + CheckDI(N.getTag() == dwarf::DW_TAG_pointer_type || + N.getTag() == dwarf::DW_TAG_reference_type || + N.getTag() == dwarf::DW_TAG_rvalue_reference_type, + "DWARF address space only applies to pointer or reference types", + &N); } } @@ -1109,10 +1132,10 @@ static bool hasConflictingReferenceFlags(unsigned Flags) { void Verifier::visitTemplateParams(const MDNode &N, const Metadata &RawParams) { auto *Params = dyn_cast(&RawParams); - AssertDI(Params, "invalid template params", &N, &RawParams); + CheckDI(Params, "invalid template params", &N, &RawParams); for (Metadata *Op : Params->operands()) { - AssertDI(Op && isa(Op), "invalid template parameter", - &N, Params, Op); + CheckDI(Op && isa(Op), "invalid template parameter", + &N, Params, Op); } } @@ -1120,83 +1143,83 @@ void Verifier::visitDICompositeType(const DICompositeType &N) { // Common scope checks. visitDIScope(N); - AssertDI(N.getTag() == dwarf::DW_TAG_array_type || - N.getTag() == dwarf::DW_TAG_structure_type || - N.getTag() == dwarf::DW_TAG_union_type || - N.getTag() == dwarf::DW_TAG_enumeration_type || - N.getTag() == dwarf::DW_TAG_class_type || - N.getTag() == dwarf::DW_TAG_variant_part || - N.getTag() == dwarf::DW_TAG_namelist, - "invalid tag", &N); - - AssertDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope()); - AssertDI(isType(N.getRawBaseType()), "invalid base type", &N, - N.getRawBaseType()); - - AssertDI(!N.getRawElements() || isa(N.getRawElements()), - "invalid composite elements", &N, N.getRawElements()); - AssertDI(isType(N.getRawVTableHolder()), "invalid vtable holder", &N, - N.getRawVTableHolder()); - AssertDI(!hasConflictingReferenceFlags(N.getFlags()), - "invalid reference flags", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_array_type || + N.getTag() == dwarf::DW_TAG_structure_type || + N.getTag() == dwarf::DW_TAG_union_type || + N.getTag() == dwarf::DW_TAG_enumeration_type || + N.getTag() == dwarf::DW_TAG_class_type || + N.getTag() == dwarf::DW_TAG_variant_part || + N.getTag() == dwarf::DW_TAG_namelist, + "invalid tag", &N); + + CheckDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope()); + CheckDI(isType(N.getRawBaseType()), "invalid base type", &N, + N.getRawBaseType()); + + CheckDI(!N.getRawElements() || isa(N.getRawElements()), + "invalid composite elements", &N, N.getRawElements()); + CheckDI(isType(N.getRawVTableHolder()), "invalid vtable holder", &N, + N.getRawVTableHolder()); + CheckDI(!hasConflictingReferenceFlags(N.getFlags()), + "invalid reference flags", &N); unsigned DIBlockByRefStruct = 1 << 4; - AssertDI((N.getFlags() & DIBlockByRefStruct) == 0, - "DIBlockByRefStruct on DICompositeType is no longer supported", &N); + CheckDI((N.getFlags() & DIBlockByRefStruct) == 0, + "DIBlockByRefStruct on DICompositeType is no longer supported", &N); if (N.isVector()) { const DINodeArray Elements = N.getElements(); - AssertDI(Elements.size() == 1 && - Elements[0]->getTag() == dwarf::DW_TAG_subrange_type, - "invalid vector, expected one element of type subrange", &N); + CheckDI(Elements.size() == 1 && + Elements[0]->getTag() == dwarf::DW_TAG_subrange_type, + "invalid vector, expected one element of type subrange", &N); } if (auto *Params = N.getRawTemplateParams()) visitTemplateParams(N, *Params); if (auto *D = N.getRawDiscriminator()) { - AssertDI(isa(D) && N.getTag() == dwarf::DW_TAG_variant_part, - "discriminator can only appear on variant part"); + CheckDI(isa(D) && N.getTag() == dwarf::DW_TAG_variant_part, + "discriminator can only appear on variant part"); } if (N.getRawDataLocation()) { - AssertDI(N.getTag() == dwarf::DW_TAG_array_type, - "dataLocation can only appear in array type"); + CheckDI(N.getTag() == dwarf::DW_TAG_array_type, + "dataLocation can only appear in array type"); } if (N.getRawAssociated()) { - AssertDI(N.getTag() == dwarf::DW_TAG_array_type, - "associated can only appear in array type"); + CheckDI(N.getTag() == dwarf::DW_TAG_array_type, + "associated can only appear in array type"); } if (N.getRawAllocated()) { - AssertDI(N.getTag() == dwarf::DW_TAG_array_type, - "allocated can only appear in array type"); + CheckDI(N.getTag() == dwarf::DW_TAG_array_type, + "allocated can only appear in array type"); } if (N.getRawRank()) { - AssertDI(N.getTag() == dwarf::DW_TAG_array_type, - "rank can only appear in array type"); + CheckDI(N.getTag() == dwarf::DW_TAG_array_type, + "rank can only appear in array type"); } } void Verifier::visitDISubroutineType(const DISubroutineType &N) { - AssertDI(N.getTag() == dwarf::DW_TAG_subroutine_type, "invalid tag", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_subroutine_type, "invalid tag", &N); if (auto *Types = N.getRawTypeArray()) { - AssertDI(isa(Types), "invalid composite elements", &N, Types); + CheckDI(isa(Types), "invalid composite elements", &N, Types); for (Metadata *Ty : N.getTypeArray()->operands()) { - AssertDI(isType(Ty), "invalid subroutine type ref", &N, Types, Ty); + CheckDI(isType(Ty), "invalid subroutine type ref", &N, Types, Ty); } } - AssertDI(!hasConflictingReferenceFlags(N.getFlags()), - "invalid reference flags", &N); + CheckDI(!hasConflictingReferenceFlags(N.getFlags()), + "invalid reference flags", &N); } void Verifier::visitDIFile(const DIFile &N) { - AssertDI(N.getTag() == dwarf::DW_TAG_file_type, "invalid tag", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_file_type, "invalid tag", &N); Optional> Checksum = N.getChecksum(); if (Checksum) { - AssertDI(Checksum->Kind <= DIFile::ChecksumKind::CSK_Last, - "invalid checksum kind", &N); + CheckDI(Checksum->Kind <= DIFile::ChecksumKind::CSK_Last, + "invalid checksum kind", &N); size_t Size; switch (Checksum->Kind) { case DIFile::CSK_MD5: @@ -1209,137 +1232,137 @@ void Verifier::visitDIFile(const DIFile &N) { Size = 64; break; } - AssertDI(Checksum->Value.size() == Size, "invalid checksum length", &N); - AssertDI(Checksum->Value.find_if_not(llvm::isHexDigit) == StringRef::npos, - "invalid checksum", &N); + CheckDI(Checksum->Value.size() == Size, "invalid checksum length", &N); + CheckDI(Checksum->Value.find_if_not(llvm::isHexDigit) == StringRef::npos, + "invalid checksum", &N); } } void Verifier::visitDICompileUnit(const DICompileUnit &N) { - AssertDI(N.isDistinct(), "compile units must be distinct", &N); - AssertDI(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N); + CheckDI(N.isDistinct(), "compile units must be distinct", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N); // Don't bother verifying the compilation directory or producer string // as those could be empty. - AssertDI(N.getRawFile() && isa(N.getRawFile()), "invalid file", &N, - N.getRawFile()); - AssertDI(!N.getFile()->getFilename().empty(), "invalid filename", &N, - N.getFile()); + CheckDI(N.getRawFile() && isa(N.getRawFile()), "invalid file", &N, + N.getRawFile()); + CheckDI(!N.getFile()->getFilename().empty(), "invalid filename", &N, + N.getFile()); CurrentSourceLang = (dwarf::SourceLanguage)N.getSourceLanguage(); verifySourceDebugInfo(N, *N.getFile()); - AssertDI((N.getEmissionKind() <= DICompileUnit::LastEmissionKind), - "invalid emission kind", &N); + CheckDI((N.getEmissionKind() <= DICompileUnit::LastEmissionKind), + "invalid emission kind", &N); if (auto *Array = N.getRawEnumTypes()) { - AssertDI(isa(Array), "invalid enum list", &N, Array); + CheckDI(isa(Array), "invalid enum list", &N, Array); for (Metadata *Op : N.getEnumTypes()->operands()) { auto *Enum = dyn_cast_or_null(Op); - AssertDI(Enum && Enum->getTag() == dwarf::DW_TAG_enumeration_type, - "invalid enum type", &N, N.getEnumTypes(), Op); + CheckDI(Enum && Enum->getTag() == dwarf::DW_TAG_enumeration_type, + "invalid enum type", &N, N.getEnumTypes(), Op); } } if (auto *Array = N.getRawRetainedTypes()) { - AssertDI(isa(Array), "invalid retained type list", &N, Array); + CheckDI(isa(Array), "invalid retained type list", &N, Array); for (Metadata *Op : N.getRetainedTypes()->operands()) { - AssertDI(Op && (isa(Op) || - (isa(Op) && - !cast(Op)->isDefinition())), - "invalid retained type", &N, Op); + CheckDI( + Op && (isa(Op) || (isa(Op) && + !cast(Op)->isDefinition())), + "invalid retained type", &N, Op); } } if (auto *Array = N.getRawGlobalVariables()) { - AssertDI(isa(Array), "invalid global variable list", &N, Array); + CheckDI(isa(Array), "invalid global variable list", &N, Array); for (Metadata *Op : N.getGlobalVariables()->operands()) { - AssertDI(Op && (isa(Op)), - "invalid global variable ref", &N, Op); + CheckDI(Op && (isa(Op)), + "invalid global variable ref", &N, Op); } } if (auto *Array = N.getRawImportedEntities()) { - AssertDI(isa(Array), "invalid imported entity list", &N, Array); + CheckDI(isa(Array), "invalid imported entity list", &N, Array); for (Metadata *Op : N.getImportedEntities()->operands()) { - AssertDI(Op && isa(Op), "invalid imported entity ref", - &N, Op); + CheckDI(Op && isa(Op), "invalid imported entity ref", + &N, Op); } } if (auto *Array = N.getRawMacros()) { - AssertDI(isa(Array), "invalid macro list", &N, Array); + CheckDI(isa(Array), "invalid macro list", &N, Array); for (Metadata *Op : N.getMacros()->operands()) { - AssertDI(Op && isa(Op), "invalid macro ref", &N, Op); + CheckDI(Op && isa(Op), "invalid macro ref", &N, Op); } } CUVisited.insert(&N); } void Verifier::visitDISubprogram(const DISubprogram &N) { - AssertDI(N.getTag() == dwarf::DW_TAG_subprogram, "invalid tag", &N); - AssertDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope()); + CheckDI(N.getTag() == dwarf::DW_TAG_subprogram, "invalid tag", &N); + CheckDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope()); if (auto *F = N.getRawFile()) - AssertDI(isa(F), "invalid file", &N, F); + CheckDI(isa(F), "invalid file", &N, F); else - AssertDI(N.getLine() == 0, "line specified with no file", &N, N.getLine()); + CheckDI(N.getLine() == 0, "line specified with no file", &N, N.getLine()); if (auto *T = N.getRawType()) - AssertDI(isa(T), "invalid subroutine type", &N, T); - AssertDI(isType(N.getRawContainingType()), "invalid containing type", &N, - N.getRawContainingType()); + CheckDI(isa(T), "invalid subroutine type", &N, T); + CheckDI(isType(N.getRawContainingType()), "invalid containing type", &N, + N.getRawContainingType()); if (auto *Params = N.getRawTemplateParams()) visitTemplateParams(N, *Params); if (auto *S = N.getRawDeclaration()) - AssertDI(isa(S) && !cast(S)->isDefinition(), - "invalid subprogram declaration", &N, S); + CheckDI(isa(S) && !cast(S)->isDefinition(), + "invalid subprogram declaration", &N, S); if (auto *RawNode = N.getRawRetainedNodes()) { auto *Node = dyn_cast(RawNode); - AssertDI(Node, "invalid retained nodes list", &N, RawNode); + CheckDI(Node, "invalid retained nodes list", &N, RawNode); for (Metadata *Op : Node->operands()) { - AssertDI(Op && (isa(Op) || isa(Op)), - "invalid retained nodes, expected DILocalVariable or DILabel", - &N, Node, Op); + CheckDI(Op && (isa(Op) || isa(Op)), + "invalid retained nodes, expected DILocalVariable or DILabel", &N, + Node, Op); } } - AssertDI(!hasConflictingReferenceFlags(N.getFlags()), - "invalid reference flags", &N); + CheckDI(!hasConflictingReferenceFlags(N.getFlags()), + "invalid reference flags", &N); auto *Unit = N.getRawUnit(); if (N.isDefinition()) { // Subprogram definitions (not part of the type hierarchy). - AssertDI(N.isDistinct(), "subprogram definitions must be distinct", &N); - AssertDI(Unit, "subprogram definitions must have a compile unit", &N); - AssertDI(isa(Unit), "invalid unit type", &N, Unit); + CheckDI(N.isDistinct(), "subprogram definitions must be distinct", &N); + CheckDI(Unit, "subprogram definitions must have a compile unit", &N); + CheckDI(isa(Unit), "invalid unit type", &N, Unit); if (N.getFile()) verifySourceDebugInfo(*N.getUnit(), *N.getFile()); } else { // Subprogram declarations (part of the type hierarchy). - AssertDI(!Unit, "subprogram declarations must not have a compile unit", &N); + CheckDI(!Unit, "subprogram declarations must not have a compile unit", &N); } if (auto *RawThrownTypes = N.getRawThrownTypes()) { auto *ThrownTypes = dyn_cast(RawThrownTypes); - AssertDI(ThrownTypes, "invalid thrown types list", &N, RawThrownTypes); + CheckDI(ThrownTypes, "invalid thrown types list", &N, RawThrownTypes); for (Metadata *Op : ThrownTypes->operands()) - AssertDI(Op && isa(Op), "invalid thrown type", &N, ThrownTypes, - Op); + CheckDI(Op && isa(Op), "invalid thrown type", &N, ThrownTypes, + Op); } if (N.areAllCallsDescribed()) - AssertDI(N.isDefinition(), - "DIFlagAllCallsDescribed must be attached to a definition"); + CheckDI(N.isDefinition(), + "DIFlagAllCallsDescribed must be attached to a definition"); } void Verifier::visitDILexicalBlockBase(const DILexicalBlockBase &N) { - AssertDI(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N); - AssertDI(N.getRawScope() && isa(N.getRawScope()), - "invalid local scope", &N, N.getRawScope()); + CheckDI(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N); + CheckDI(N.getRawScope() && isa(N.getRawScope()), + "invalid local scope", &N, N.getRawScope()); if (auto *SP = dyn_cast(N.getRawScope())) - AssertDI(SP->isDefinition(), "scope points into the type hierarchy", &N); + CheckDI(SP->isDefinition(), "scope points into the type hierarchy", &N); } void Verifier::visitDILexicalBlock(const DILexicalBlock &N) { visitDILexicalBlockBase(N); - AssertDI(N.getLine() || !N.getColumn(), - "cannot have column info without line info", &N); + CheckDI(N.getLine() || !N.getColumn(), + "cannot have column info without line info", &N); } void Verifier::visitDILexicalBlockFile(const DILexicalBlockFile &N) { @@ -1347,95 +1370,95 @@ void Verifier::visitDILexicalBlockFile(const DILexicalBlockFile &N) { } void Verifier::visitDICommonBlock(const DICommonBlock &N) { - AssertDI(N.getTag() == dwarf::DW_TAG_common_block, "invalid tag", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_common_block, "invalid tag", &N); if (auto *S = N.getRawScope()) - AssertDI(isa(S), "invalid scope ref", &N, S); + CheckDI(isa(S), "invalid scope ref", &N, S); if (auto *S = N.getRawDecl()) - AssertDI(isa(S), "invalid declaration", &N, S); + CheckDI(isa(S), "invalid declaration", &N, S); } void Verifier::visitDINamespace(const DINamespace &N) { - AssertDI(N.getTag() == dwarf::DW_TAG_namespace, "invalid tag", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_namespace, "invalid tag", &N); if (auto *S = N.getRawScope()) - AssertDI(isa(S), "invalid scope ref", &N, S); + CheckDI(isa(S), "invalid scope ref", &N, S); } void Verifier::visitDIMacro(const DIMacro &N) { - AssertDI(N.getMacinfoType() == dwarf::DW_MACINFO_define || - N.getMacinfoType() == dwarf::DW_MACINFO_undef, - "invalid macinfo type", &N); - AssertDI(!N.getName().empty(), "anonymous macro", &N); + CheckDI(N.getMacinfoType() == dwarf::DW_MACINFO_define || + N.getMacinfoType() == dwarf::DW_MACINFO_undef, + "invalid macinfo type", &N); + CheckDI(!N.getName().empty(), "anonymous macro", &N); if (!N.getValue().empty()) { assert(N.getValue().data()[0] != ' ' && "Macro value has a space prefix"); } } void Verifier::visitDIMacroFile(const DIMacroFile &N) { - AssertDI(N.getMacinfoType() == dwarf::DW_MACINFO_start_file, - "invalid macinfo type", &N); + CheckDI(N.getMacinfoType() == dwarf::DW_MACINFO_start_file, + "invalid macinfo type", &N); if (auto *F = N.getRawFile()) - AssertDI(isa(F), "invalid file", &N, F); + CheckDI(isa(F), "invalid file", &N, F); if (auto *Array = N.getRawElements()) { - AssertDI(isa(Array), "invalid macro list", &N, Array); + CheckDI(isa(Array), "invalid macro list", &N, Array); for (Metadata *Op : N.getElements()->operands()) { - AssertDI(Op && isa(Op), "invalid macro ref", &N, Op); + CheckDI(Op && isa(Op), "invalid macro ref", &N, Op); } } } void Verifier::visitDIArgList(const DIArgList &N) { - AssertDI(!N.getNumOperands(), - "DIArgList should have no operands other than a list of " - "ValueAsMetadata", - &N); + CheckDI(!N.getNumOperands(), + "DIArgList should have no operands other than a list of " + "ValueAsMetadata", + &N); } void Verifier::visitDIModule(const DIModule &N) { - AssertDI(N.getTag() == dwarf::DW_TAG_module, "invalid tag", &N); - AssertDI(!N.getName().empty(), "anonymous module", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_module, "invalid tag", &N); + CheckDI(!N.getName().empty(), "anonymous module", &N); } void Verifier::visitDITemplateParameter(const DITemplateParameter &N) { - AssertDI(isType(N.getRawType()), "invalid type ref", &N, N.getRawType()); + CheckDI(isType(N.getRawType()), "invalid type ref", &N, N.getRawType()); } void Verifier::visitDITemplateTypeParameter(const DITemplateTypeParameter &N) { visitDITemplateParameter(N); - AssertDI(N.getTag() == dwarf::DW_TAG_template_type_parameter, "invalid tag", - &N); + CheckDI(N.getTag() == dwarf::DW_TAG_template_type_parameter, "invalid tag", + &N); } void Verifier::visitDITemplateValueParameter( const DITemplateValueParameter &N) { visitDITemplateParameter(N); - AssertDI(N.getTag() == dwarf::DW_TAG_template_value_parameter || - N.getTag() == dwarf::DW_TAG_GNU_template_template_param || - N.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack, - "invalid tag", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_template_value_parameter || + N.getTag() == dwarf::DW_TAG_GNU_template_template_param || + N.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack, + "invalid tag", &N); } void Verifier::visitDIVariable(const DIVariable &N) { if (auto *S = N.getRawScope()) - AssertDI(isa(S), "invalid scope", &N, S); + CheckDI(isa(S), "invalid scope", &N, S); if (auto *F = N.getRawFile()) - AssertDI(isa(F), "invalid file", &N, F); + CheckDI(isa(F), "invalid file", &N, F); } void Verifier::visitDIGlobalVariable(const DIGlobalVariable &N) { // Checks common to all variables. visitDIVariable(N); - AssertDI(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N); - AssertDI(isType(N.getRawType()), "invalid type ref", &N, N.getRawType()); - // Assert only if the global variable is not an extern + CheckDI(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N); + CheckDI(isType(N.getRawType()), "invalid type ref", &N, N.getRawType()); + // Check only if the global variable is not an extern if (N.isDefinition()) - AssertDI(N.getType(), "missing global variable type", &N); + CheckDI(N.getType(), "missing global variable type", &N); if (auto *Member = N.getRawStaticDataMemberDeclaration()) { - AssertDI(isa(Member), - "invalid static data member declaration", &N, Member); + CheckDI(isa(Member), + "invalid static data member declaration", &N, Member); } } @@ -1443,32 +1466,32 @@ void Verifier::visitDILocalVariable(const DILocalVariable &N) { // Checks common to all variables. visitDIVariable(N); - AssertDI(isType(N.getRawType()), "invalid type ref", &N, N.getRawType()); - AssertDI(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N); - AssertDI(N.getRawScope() && isa(N.getRawScope()), - "local variable requires a valid scope", &N, N.getRawScope()); + CheckDI(isType(N.getRawType()), "invalid type ref", &N, N.getRawType()); + CheckDI(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N); + CheckDI(N.getRawScope() && isa(N.getRawScope()), + "local variable requires a valid scope", &N, N.getRawScope()); if (auto Ty = N.getType()) - AssertDI(!isa(Ty), "invalid type", &N, N.getType()); + CheckDI(!isa(Ty), "invalid type", &N, N.getType()); } void Verifier::visitDILabel(const DILabel &N) { if (auto *S = N.getRawScope()) - AssertDI(isa(S), "invalid scope", &N, S); + CheckDI(isa(S), "invalid scope", &N, S); if (auto *F = N.getRawFile()) - AssertDI(isa(F), "invalid file", &N, F); + CheckDI(isa(F), "invalid file", &N, F); - AssertDI(N.getTag() == dwarf::DW_TAG_label, "invalid tag", &N); - AssertDI(N.getRawScope() && isa(N.getRawScope()), - "label requires a valid scope", &N, N.getRawScope()); + CheckDI(N.getTag() == dwarf::DW_TAG_label, "invalid tag", &N); + CheckDI(N.getRawScope() && isa(N.getRawScope()), + "label requires a valid scope", &N, N.getRawScope()); } void Verifier::visitDIExpression(const DIExpression &N) { - AssertDI(N.isValid(), "invalid expression", &N); + CheckDI(N.isValid(), "invalid expression", &N); } void Verifier::visitDIGlobalVariableExpression( const DIGlobalVariableExpression &GVE) { - AssertDI(GVE.getVariable(), "missing variable"); + CheckDI(GVE.getVariable(), "missing variable"); if (auto *Var = GVE.getVariable()) visitDIGlobalVariable(*Var); if (auto *Expr = GVE.getExpression()) { @@ -1479,21 +1502,21 @@ void Verifier::visitDIGlobalVariableExpression( } void Verifier::visitDIObjCProperty(const DIObjCProperty &N) { - AssertDI(N.getTag() == dwarf::DW_TAG_APPLE_property, "invalid tag", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_APPLE_property, "invalid tag", &N); if (auto *T = N.getRawType()) - AssertDI(isType(T), "invalid type ref", &N, T); + CheckDI(isType(T), "invalid type ref", &N, T); if (auto *F = N.getRawFile()) - AssertDI(isa(F), "invalid file", &N, F); + CheckDI(isa(F), "invalid file", &N, F); } void Verifier::visitDIImportedEntity(const DIImportedEntity &N) { - AssertDI(N.getTag() == dwarf::DW_TAG_imported_module || - N.getTag() == dwarf::DW_TAG_imported_declaration, - "invalid tag", &N); + CheckDI(N.getTag() == dwarf::DW_TAG_imported_module || + N.getTag() == dwarf::DW_TAG_imported_declaration, + "invalid tag", &N); if (auto *S = N.getRawScope()) - AssertDI(isa(S), "invalid scope for imported entity", &N, S); - AssertDI(isDINode(N.getRawEntity()), "invalid imported entity", &N, - N.getRawEntity()); + CheckDI(isa(S), "invalid scope for imported entity", &N, S); + CheckDI(isDINode(N.getRawEntity()), "invalid imported entity", &N, + N.getRawEntity()); } void Verifier::visitComdat(const Comdat &C) { @@ -1501,8 +1524,8 @@ void Verifier::visitComdat(const Comdat &C) { // Entities with private linkage don't have entries in the symbol table. if (TT.isOSBinFormatCOFF()) if (const GlobalValue *GV = M.getNamedValue(C.getName())) - Assert(!GV->hasPrivateLinkage(), - "comdat global value has private linkage", GV); + Check(!GV->hasPrivateLinkage(), "comdat global value has private linkage", + GV); } void Verifier::visitModuleIdents() { @@ -1513,12 +1536,12 @@ void Verifier::visitModuleIdents() { // llvm.ident takes a list of metadata entry. Each entry has only one string. // Scan each llvm.ident entry and make sure that this requirement is met. for (const MDNode *N : Idents->operands()) { - Assert(N->getNumOperands() == 1, - "incorrect number of operands in llvm.ident metadata", N); - Assert(dyn_cast_or_null(N->getOperand(0)), - ("invalid value for llvm.ident metadata entry operand" - "(the operand should be a string)"), - N->getOperand(0)); + Check(N->getNumOperands() == 1, + "incorrect number of operands in llvm.ident metadata", N); + Check(dyn_cast_or_null(N->getOperand(0)), + ("invalid value for llvm.ident metadata entry operand" + "(the operand should be a string)"), + N->getOperand(0)); } } @@ -1531,12 +1554,12 @@ void Verifier::visitModuleCommandLines() { // string. Scan each llvm.commandline entry and make sure that this // requirement is met. for (const MDNode *N : CommandLines->operands()) { - Assert(N->getNumOperands() == 1, - "incorrect number of operands in llvm.commandline metadata", N); - Assert(dyn_cast_or_null(N->getOperand(0)), - ("invalid value for llvm.commandline metadata entry operand" - "(the operand should be a string)"), - N->getOperand(0)); + Check(N->getNumOperands() == 1, + "incorrect number of operands in llvm.commandline metadata", N); + Check(dyn_cast_or_null(N->getOperand(0)), + ("invalid value for llvm.commandline metadata entry operand" + "(the operand should be a string)"), + N->getOperand(0)); } } @@ -1577,21 +1600,20 @@ Verifier::visitModuleFlag(const MDNode *Op, SmallVectorImpl &Requirements) { // Each module flag should have three arguments, the merge behavior (a // constant int), the flag ID (an MDString), and the value. - Assert(Op->getNumOperands() == 3, - "incorrect number of operands in module flag", Op); + Check(Op->getNumOperands() == 3, + "incorrect number of operands in module flag", Op); Module::ModFlagBehavior MFB; if (!Module::isValidModFlagBehavior(Op->getOperand(0), MFB)) { - Assert( - mdconst::dyn_extract_or_null(Op->getOperand(0)), - "invalid behavior operand in module flag (expected constant integer)", - Op->getOperand(0)); - Assert(false, - "invalid behavior operand in module flag (unexpected constant)", - Op->getOperand(0)); + Check(mdconst::dyn_extract_or_null(Op->getOperand(0)), + "invalid behavior operand in module flag (expected constant integer)", + Op->getOperand(0)); + Check(false, + "invalid behavior operand in module flag (unexpected constant)", + Op->getOperand(0)); } MDString *ID = dyn_cast_or_null(Op->getOperand(1)); - Assert(ID, "invalid ID operand in module flag (expected metadata string)", - Op->getOperand(1)); + Check(ID, "invalid ID operand in module flag (expected metadata string)", + Op->getOperand(1)); // Check the values for behaviors with additional requirements. switch (MFB) { @@ -1601,10 +1623,17 @@ Verifier::visitModuleFlag(const MDNode *Op, // These behavior types accept any value. break; + case Module::Min: { + Check(mdconst::dyn_extract_or_null(Op->getOperand(2)), + "invalid value for 'min' module flag (expected constant integer)", + Op->getOperand(2)); + break; + } + case Module::Max: { - Assert(mdconst::dyn_extract_or_null(Op->getOperand(2)), - "invalid value for 'max' module flag (expected constant integer)", - Op->getOperand(2)); + Check(mdconst::dyn_extract_or_null(Op->getOperand(2)), + "invalid value for 'max' module flag (expected constant integer)", + Op->getOperand(2)); break; } @@ -1612,13 +1641,13 @@ Verifier::visitModuleFlag(const MDNode *Op, // The value should itself be an MDNode with two operands, a flag ID (an // MDString), and a value. MDNode *Value = dyn_cast(Op->getOperand(2)); - Assert(Value && Value->getNumOperands() == 2, - "invalid value for 'require' module flag (expected metadata pair)", - Op->getOperand(2)); - Assert(isa(Value->getOperand(0)), - ("invalid value for 'require' module flag " - "(first value operand should be a string)"), - Value->getOperand(0)); + Check(Value && Value->getNumOperands() == 2, + "invalid value for 'require' module flag (expected metadata pair)", + Op->getOperand(2)); + Check(isa(Value->getOperand(0)), + ("invalid value for 'require' module flag " + "(first value operand should be a string)"), + Value->getOperand(0)); // Append it to the list of requirements, to check once all module flags are // scanned. @@ -1629,10 +1658,10 @@ Verifier::visitModuleFlag(const MDNode *Op, case Module::Append: case Module::AppendUnique: { // These behavior types require the operand be an MDNode. - Assert(isa(Op->getOperand(2)), - "invalid value for 'append'-type module flag " - "(expected a metadata node)", - Op->getOperand(2)); + Check(isa(Op->getOperand(2)), + "invalid value for 'append'-type module flag " + "(expected a metadata node)", + Op->getOperand(2)); break; } } @@ -1640,29 +1669,29 @@ Verifier::visitModuleFlag(const MDNode *Op, // Unless this is a "requires" flag, check the ID is unique. if (MFB != Module::Require) { bool Inserted = SeenIDs.insert(std::make_pair(ID, Op)).second; - Assert(Inserted, - "module flag identifiers must be unique (or of 'require' type)", ID); + Check(Inserted, + "module flag identifiers must be unique (or of 'require' type)", ID); } if (ID->getString() == "wchar_size") { ConstantInt *Value = mdconst::dyn_extract_or_null(Op->getOperand(2)); - Assert(Value, "wchar_size metadata requires constant integer argument"); + Check(Value, "wchar_size metadata requires constant integer argument"); } if (ID->getString() == "Linker Options") { // If the llvm.linker.options named metadata exists, we assume that the // bitcode reader has upgraded the module flag. Otherwise the flag might // have been created by a client directly. - Assert(M.getNamedMetadata("llvm.linker.options"), - "'Linker Options' named metadata no longer supported"); + Check(M.getNamedMetadata("llvm.linker.options"), + "'Linker Options' named metadata no longer supported"); } if (ID->getString() == "SemanticInterposition") { ConstantInt *Value = mdconst::dyn_extract_or_null(Op->getOperand(2)); - Assert(Value, - "SemanticInterposition metadata requires constant integer argument"); + Check(Value, + "SemanticInterposition metadata requires constant integer argument"); } if (ID->getString() == "CG Profile") { @@ -1676,16 +1705,16 @@ void Verifier::visitModuleFlagCGProfileEntry(const MDOperand &MDO) { if (!FuncMDO) return; auto F = dyn_cast(FuncMDO); - Assert(F && isa(F->getValue()->stripPointerCasts()), - "expected a Function or null", FuncMDO); + Check(F && isa(F->getValue()->stripPointerCasts()), + "expected a Function or null", FuncMDO); }; auto Node = dyn_cast_or_null(MDO); - Assert(Node && Node->getNumOperands() == 3, "expected a MDNode triple", MDO); + Check(Node && Node->getNumOperands() == 3, "expected a MDNode triple", MDO); CheckFunction(Node->getOperand(0)); CheckFunction(Node->getOperand(1)); auto Count = dyn_cast_or_null(Node->getOperand(2)); - Assert(Count && Count->getType()->isIntegerTy(), - "expected an integer constant", Node->getOperand(2)); + Check(Count && Count->getType()->isIntegerTy(), + "expected an integer constant", Node->getOperand(2)); } void Verifier::verifyAttributeTypes(AttributeSet Attrs, const Value *V) { @@ -1724,15 +1753,14 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty, verifyAttributeTypes(Attrs, V); for (Attribute Attr : Attrs) - Assert(Attr.isStringAttribute() || - Attribute::canUseAsParamAttr(Attr.getKindAsEnum()), - "Attribute '" + Attr.getAsString() + - "' does not apply to parameters", - V); + Check(Attr.isStringAttribute() || + Attribute::canUseAsParamAttr(Attr.getKindAsEnum()), + "Attribute '" + Attr.getAsString() + "' does not apply to parameters", + V); if (Attrs.hasAttribute(Attribute::ImmArg)) { - Assert(Attrs.getNumAttributes() == 1, - "Attribute 'immarg' is incompatible with other attributes", V); + Check(Attrs.getNumAttributes() == 1, + "Attribute 'immarg' is incompatible with other attributes", V); } // Check for mutually incompatible attributes. Only inreg is compatible with @@ -1745,52 +1773,52 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty, Attrs.hasAttribute(Attribute::InReg); AttrCount += Attrs.hasAttribute(Attribute::Nest); AttrCount += Attrs.hasAttribute(Attribute::ByRef); - Assert(AttrCount <= 1, - "Attributes 'byval', 'inalloca', 'preallocated', 'inreg', 'nest', " - "'byref', and 'sret' are incompatible!", - V); - - Assert(!(Attrs.hasAttribute(Attribute::InAlloca) && - Attrs.hasAttribute(Attribute::ReadOnly)), - "Attributes " - "'inalloca and readonly' are incompatible!", - V); - - Assert(!(Attrs.hasAttribute(Attribute::StructRet) && - Attrs.hasAttribute(Attribute::Returned)), - "Attributes " - "'sret and returned' are incompatible!", - V); - - Assert(!(Attrs.hasAttribute(Attribute::ZExt) && - Attrs.hasAttribute(Attribute::SExt)), - "Attributes " - "'zeroext and signext' are incompatible!", - V); - - Assert(!(Attrs.hasAttribute(Attribute::ReadNone) && - Attrs.hasAttribute(Attribute::ReadOnly)), - "Attributes " - "'readnone and readonly' are incompatible!", - V); - - Assert(!(Attrs.hasAttribute(Attribute::ReadNone) && - Attrs.hasAttribute(Attribute::WriteOnly)), - "Attributes " - "'readnone and writeonly' are incompatible!", - V); - - Assert(!(Attrs.hasAttribute(Attribute::ReadOnly) && - Attrs.hasAttribute(Attribute::WriteOnly)), - "Attributes " - "'readonly and writeonly' are incompatible!", - V); - - Assert(!(Attrs.hasAttribute(Attribute::NoInline) && - Attrs.hasAttribute(Attribute::AlwaysInline)), - "Attributes " - "'noinline and alwaysinline' are incompatible!", - V); + Check(AttrCount <= 1, + "Attributes 'byval', 'inalloca', 'preallocated', 'inreg', 'nest', " + "'byref', and 'sret' are incompatible!", + V); + + Check(!(Attrs.hasAttribute(Attribute::InAlloca) && + Attrs.hasAttribute(Attribute::ReadOnly)), + "Attributes " + "'inalloca and readonly' are incompatible!", + V); + + Check(!(Attrs.hasAttribute(Attribute::StructRet) && + Attrs.hasAttribute(Attribute::Returned)), + "Attributes " + "'sret and returned' are incompatible!", + V); + + Check(!(Attrs.hasAttribute(Attribute::ZExt) && + Attrs.hasAttribute(Attribute::SExt)), + "Attributes " + "'zeroext and signext' are incompatible!", + V); + + Check(!(Attrs.hasAttribute(Attribute::ReadNone) && + Attrs.hasAttribute(Attribute::ReadOnly)), + "Attributes " + "'readnone and readonly' are incompatible!", + V); + + Check(!(Attrs.hasAttribute(Attribute::ReadNone) && + Attrs.hasAttribute(Attribute::WriteOnly)), + "Attributes " + "'readnone and writeonly' are incompatible!", + V); + + Check(!(Attrs.hasAttribute(Attribute::ReadOnly) && + Attrs.hasAttribute(Attribute::WriteOnly)), + "Attributes " + "'readonly and writeonly' are incompatible!", + V); + + Check(!(Attrs.hasAttribute(Attribute::NoInline) && + Attrs.hasAttribute(Attribute::AlwaysInline)), + "Attributes " + "'noinline and alwaysinline' are incompatible!", + V); AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty); for (Attribute Attr : Attrs) { @@ -1804,55 +1832,61 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty, if (PointerType *PTy = dyn_cast(Ty)) { if (Attrs.hasAttribute(Attribute::ByVal)) { + if (Attrs.hasAttribute(Attribute::Alignment)) { + Align AttrAlign = Attrs.getAlignment().valueOrOne(); + Align MaxAlign(ParamMaxAlignment); + Check(AttrAlign <= MaxAlign, + "Attribute 'align' exceed the max size 2^14", V); + } SmallPtrSet Visited; - Assert(Attrs.getByValType()->isSized(&Visited), - "Attribute 'byval' does not support unsized types!", V); + Check(Attrs.getByValType()->isSized(&Visited), + "Attribute 'byval' does not support unsized types!", V); } if (Attrs.hasAttribute(Attribute::ByRef)) { SmallPtrSet Visited; - Assert(Attrs.getByRefType()->isSized(&Visited), - "Attribute 'byref' does not support unsized types!", V); + Check(Attrs.getByRefType()->isSized(&Visited), + "Attribute 'byref' does not support unsized types!", V); } if (Attrs.hasAttribute(Attribute::InAlloca)) { SmallPtrSet Visited; - Assert(Attrs.getInAllocaType()->isSized(&Visited), - "Attribute 'inalloca' does not support unsized types!", V); + Check(Attrs.getInAllocaType()->isSized(&Visited), + "Attribute 'inalloca' does not support unsized types!", V); } if (Attrs.hasAttribute(Attribute::Preallocated)) { SmallPtrSet Visited; - Assert(Attrs.getPreallocatedType()->isSized(&Visited), - "Attribute 'preallocated' does not support unsized types!", V); + Check(Attrs.getPreallocatedType()->isSized(&Visited), + "Attribute 'preallocated' does not support unsized types!", V); } if (!PTy->isOpaque()) { if (!isa(PTy->getNonOpaquePointerElementType())) - Assert(!Attrs.hasAttribute(Attribute::SwiftError), - "Attribute 'swifterror' only applies to parameters " - "with pointer to pointer type!", - V); + Check(!Attrs.hasAttribute(Attribute::SwiftError), + "Attribute 'swifterror' only applies to parameters " + "with pointer to pointer type!", + V); if (Attrs.hasAttribute(Attribute::ByRef)) { - Assert(Attrs.getByRefType() == PTy->getNonOpaquePointerElementType(), - "Attribute 'byref' type does not match parameter!", V); + Check(Attrs.getByRefType() == PTy->getNonOpaquePointerElementType(), + "Attribute 'byref' type does not match parameter!", V); } if (Attrs.hasAttribute(Attribute::ByVal) && Attrs.getByValType()) { - Assert(Attrs.getByValType() == PTy->getNonOpaquePointerElementType(), - "Attribute 'byval' type does not match parameter!", V); + Check(Attrs.getByValType() == PTy->getNonOpaquePointerElementType(), + "Attribute 'byval' type does not match parameter!", V); } if (Attrs.hasAttribute(Attribute::Preallocated)) { - Assert(Attrs.getPreallocatedType() == - PTy->getNonOpaquePointerElementType(), - "Attribute 'preallocated' type does not match parameter!", V); + Check(Attrs.getPreallocatedType() == + PTy->getNonOpaquePointerElementType(), + "Attribute 'preallocated' type does not match parameter!", V); } if (Attrs.hasAttribute(Attribute::InAlloca)) { - Assert(Attrs.getInAllocaType() == PTy->getNonOpaquePointerElementType(), - "Attribute 'inalloca' type does not match parameter!", V); + Check(Attrs.getInAllocaType() == PTy->getNonOpaquePointerElementType(), + "Attribute 'inalloca' type does not match parameter!", V); } if (Attrs.hasAttribute(Attribute::ElementType)) { - Assert(Attrs.getElementType() == PTy->getNonOpaquePointerElementType(), - "Attribute 'elementtype' type does not match parameter!", V); + Check(Attrs.getElementType() == PTy->getNonOpaquePointerElementType(), + "Attribute 'elementtype' type does not match parameter!", V); } } } @@ -1877,14 +1911,14 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, return; if (AttributeListsVisited.insert(Attrs.getRawPointer()).second) { - Assert(Attrs.hasParentContext(Context), - "Attribute list does not match Module context!", &Attrs, V); + Check(Attrs.hasParentContext(Context), + "Attribute list does not match Module context!", &Attrs, V); for (const auto &AttrSet : Attrs) { - Assert(!AttrSet.hasAttributes() || AttrSet.hasParentContext(Context), - "Attribute set does not match Module context!", &AttrSet, V); + Check(!AttrSet.hasAttributes() || AttrSet.hasParentContext(Context), + "Attribute set does not match Module context!", &AttrSet, V); for (const auto &A : AttrSet) { - Assert(A.hasParentContext(Context), - "Attribute does not match Module context!", &A, V); + Check(A.hasParentContext(Context), + "Attribute does not match Module context!", &A, V); } } } @@ -1899,11 +1933,11 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, // Verify return value attributes. AttributeSet RetAttrs = Attrs.getRetAttrs(); for (Attribute RetAttr : RetAttrs) - Assert(RetAttr.isStringAttribute() || - Attribute::canUseAsRetAttr(RetAttr.getKindAsEnum()), - "Attribute '" + RetAttr.getAsString() + - "' does not apply to function return values", - V); + Check(RetAttr.isStringAttribute() || + Attribute::canUseAsRetAttr(RetAttr.getKindAsEnum()), + "Attribute '" + RetAttr.getAsString() + + "' does not apply to function return values", + V); verifyParameterAttrs(RetAttrs, FT->getReturnType(), V); @@ -1913,56 +1947,55 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, AttributeSet ArgAttrs = Attrs.getParamAttrs(i); if (!IsIntrinsic) { - Assert(!ArgAttrs.hasAttribute(Attribute::ImmArg), - "immarg attribute only applies to intrinsics",V); + Check(!ArgAttrs.hasAttribute(Attribute::ImmArg), + "immarg attribute only applies to intrinsics", V); if (!IsInlineAsm) - Assert(!ArgAttrs.hasAttribute(Attribute::ElementType), - "Attribute 'elementtype' can only be applied to intrinsics" - " and inline asm.", V); + Check(!ArgAttrs.hasAttribute(Attribute::ElementType), + "Attribute 'elementtype' can only be applied to intrinsics" + " and inline asm.", + V); } verifyParameterAttrs(ArgAttrs, Ty, V); if (ArgAttrs.hasAttribute(Attribute::Nest)) { - Assert(!SawNest, "More than one parameter has attribute nest!", V); + Check(!SawNest, "More than one parameter has attribute nest!", V); SawNest = true; } if (ArgAttrs.hasAttribute(Attribute::Returned)) { - Assert(!SawReturned, "More than one parameter has attribute returned!", - V); - Assert(Ty->canLosslesslyBitCastTo(FT->getReturnType()), - "Incompatible argument and return types for 'returned' attribute", - V); + Check(!SawReturned, "More than one parameter has attribute returned!", V); + Check(Ty->canLosslesslyBitCastTo(FT->getReturnType()), + "Incompatible argument and return types for 'returned' attribute", + V); SawReturned = true; } if (ArgAttrs.hasAttribute(Attribute::StructRet)) { - Assert(!SawSRet, "Cannot have multiple 'sret' parameters!", V); - Assert(i == 0 || i == 1, - "Attribute 'sret' is not on first or second parameter!", V); + Check(!SawSRet, "Cannot have multiple 'sret' parameters!", V); + Check(i == 0 || i == 1, + "Attribute 'sret' is not on first or second parameter!", V); SawSRet = true; } if (ArgAttrs.hasAttribute(Attribute::SwiftSelf)) { - Assert(!SawSwiftSelf, "Cannot have multiple 'swiftself' parameters!", V); + Check(!SawSwiftSelf, "Cannot have multiple 'swiftself' parameters!", V); SawSwiftSelf = true; } if (ArgAttrs.hasAttribute(Attribute::SwiftAsync)) { - Assert(!SawSwiftAsync, "Cannot have multiple 'swiftasync' parameters!", V); + Check(!SawSwiftAsync, "Cannot have multiple 'swiftasync' parameters!", V); SawSwiftAsync = true; } if (ArgAttrs.hasAttribute(Attribute::SwiftError)) { - Assert(!SawSwiftError, "Cannot have multiple 'swifterror' parameters!", - V); + Check(!SawSwiftError, "Cannot have multiple 'swifterror' parameters!", V); SawSwiftError = true; } if (ArgAttrs.hasAttribute(Attribute::InAlloca)) { - Assert(i == FT->getNumParams() - 1, - "inalloca isn't on the last parameter!", V); + Check(i == FT->getNumParams() - 1, + "inalloca isn't on the last parameter!", V); } } @@ -1971,53 +2004,53 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, verifyAttributeTypes(Attrs.getFnAttrs(), V); for (Attribute FnAttr : Attrs.getFnAttrs()) - Assert(FnAttr.isStringAttribute() || - Attribute::canUseAsFnAttr(FnAttr.getKindAsEnum()), - "Attribute '" + FnAttr.getAsString() + - "' does not apply to functions!", - V); - - Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) && - Attrs.hasFnAttr(Attribute::ReadOnly)), - "Attributes 'readnone and readonly' are incompatible!", V); - - Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) && - Attrs.hasFnAttr(Attribute::WriteOnly)), - "Attributes 'readnone and writeonly' are incompatible!", V); - - Assert(!(Attrs.hasFnAttr(Attribute::ReadOnly) && - Attrs.hasFnAttr(Attribute::WriteOnly)), - "Attributes 'readonly and writeonly' are incompatible!", V); - - Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) && - Attrs.hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly)), - "Attributes 'readnone and inaccessiblemem_or_argmemonly' are " - "incompatible!", - V); - - Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) && - Attrs.hasFnAttr(Attribute::InaccessibleMemOnly)), - "Attributes 'readnone and inaccessiblememonly' are incompatible!", V); - - Assert(!(Attrs.hasFnAttr(Attribute::NoInline) && - Attrs.hasFnAttr(Attribute::AlwaysInline)), - "Attributes 'noinline and alwaysinline' are incompatible!", V); + Check(FnAttr.isStringAttribute() || + Attribute::canUseAsFnAttr(FnAttr.getKindAsEnum()), + "Attribute '" + FnAttr.getAsString() + + "' does not apply to functions!", + V); + + Check(!(Attrs.hasFnAttr(Attribute::ReadNone) && + Attrs.hasFnAttr(Attribute::ReadOnly)), + "Attributes 'readnone and readonly' are incompatible!", V); + + Check(!(Attrs.hasFnAttr(Attribute::ReadNone) && + Attrs.hasFnAttr(Attribute::WriteOnly)), + "Attributes 'readnone and writeonly' are incompatible!", V); + + Check(!(Attrs.hasFnAttr(Attribute::ReadOnly) && + Attrs.hasFnAttr(Attribute::WriteOnly)), + "Attributes 'readonly and writeonly' are incompatible!", V); + + Check(!(Attrs.hasFnAttr(Attribute::ReadNone) && + Attrs.hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly)), + "Attributes 'readnone and inaccessiblemem_or_argmemonly' are " + "incompatible!", + V); + + Check(!(Attrs.hasFnAttr(Attribute::ReadNone) && + Attrs.hasFnAttr(Attribute::InaccessibleMemOnly)), + "Attributes 'readnone and inaccessiblememonly' are incompatible!", V); + + Check(!(Attrs.hasFnAttr(Attribute::NoInline) && + Attrs.hasFnAttr(Attribute::AlwaysInline)), + "Attributes 'noinline and alwaysinline' are incompatible!", V); if (Attrs.hasFnAttr(Attribute::OptimizeNone)) { - Assert(Attrs.hasFnAttr(Attribute::NoInline), - "Attribute 'optnone' requires 'noinline'!", V); + Check(Attrs.hasFnAttr(Attribute::NoInline), + "Attribute 'optnone' requires 'noinline'!", V); - Assert(!Attrs.hasFnAttr(Attribute::OptimizeForSize), - "Attributes 'optsize and optnone' are incompatible!", V); + Check(!Attrs.hasFnAttr(Attribute::OptimizeForSize), + "Attributes 'optsize and optnone' are incompatible!", V); - Assert(!Attrs.hasFnAttr(Attribute::MinSize), - "Attributes 'minsize and optnone' are incompatible!", V); + Check(!Attrs.hasFnAttr(Attribute::MinSize), + "Attributes 'minsize and optnone' are incompatible!", V); } if (Attrs.hasFnAttr(Attribute::JumpTable)) { const GlobalValue *GV = cast(V); - Assert(GV->hasGlobalUnnamedAddr(), - "Attribute 'jumptable' requires 'unnamed_addr'", V); + Check(GV->hasGlobalUnnamedAddr(), + "Attribute 'jumptable' requires 'unnamed_addr'", V); } if (Attrs.hasFnAttr(Attribute::AllocSize)) { @@ -2047,6 +2080,25 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, return; } + if (Attrs.hasFnAttr(Attribute::AllocKind)) { + AllocFnKind K = Attrs.getAllocKind(); + AllocFnKind Type = + K & (AllocFnKind::Alloc | AllocFnKind::Realloc | AllocFnKind::Free); + if (!is_contained( + {AllocFnKind::Alloc, AllocFnKind::Realloc, AllocFnKind::Free}, + Type)) + CheckFailed( + "'allockind()' requires exactly one of alloc, realloc, and free"); + if ((Type == AllocFnKind::Free) && + ((K & (AllocFnKind::Uninitialized | AllocFnKind::Zeroed | + AllocFnKind::Aligned)) != AllocFnKind::Unknown)) + CheckFailed("'allockind(\"free\")' doesn't allow uninitialized, zeroed, " + "or aligned modifiers."); + AllocFnKind ZeroedUninit = AllocFnKind::Uninitialized | AllocFnKind::Zeroed; + if ((K & ZeroedUninit) == ZeroedUninit) + CheckFailed("'allockind()' can't be both zeroed and uninitialized"); + } + if (Attrs.hasFnAttr(Attribute::VScaleRange)) { unsigned VScaleMin = Attrs.getFnAttrs().getVScaleRangeMin(); if (VScaleMin == 0) @@ -2073,27 +2125,27 @@ void Verifier::verifyFunctionMetadata( for (const auto &Pair : MDs) { if (Pair.first == LLVMContext::MD_prof) { MDNode *MD = Pair.second; - Assert(MD->getNumOperands() >= 2, - "!prof annotations should have no less than 2 operands", MD); + Check(MD->getNumOperands() >= 2, + "!prof annotations should have no less than 2 operands", MD); // Check first operand. - Assert(MD->getOperand(0) != nullptr, "first operand should not be null", - MD); - Assert(isa(MD->getOperand(0)), - "expected string with name of the !prof annotation", MD); + Check(MD->getOperand(0) != nullptr, "first operand should not be null", + MD); + Check(isa(MD->getOperand(0)), + "expected string with name of the !prof annotation", MD); MDString *MDS = cast(MD->getOperand(0)); StringRef ProfName = MDS->getString(); - Assert(ProfName.equals("function_entry_count") || - ProfName.equals("synthetic_function_entry_count"), - "first operand should be 'function_entry_count'" - " or 'synthetic_function_entry_count'", - MD); + Check(ProfName.equals("function_entry_count") || + ProfName.equals("synthetic_function_entry_count"), + "first operand should be 'function_entry_count'" + " or 'synthetic_function_entry_count'", + MD); // Check second operand. - Assert(MD->getOperand(1) != nullptr, "second operand should not be null", - MD); - Assert(isa(MD->getOperand(1)), - "expected integer argument to function_entry_count", MD); + Check(MD->getOperand(1) != nullptr, "second operand should not be null", + MD); + Check(isa(MD->getOperand(1)), + "expected integer argument to function_entry_count", MD); } } } @@ -2115,8 +2167,8 @@ void Verifier::visitConstantExprsRecursively(const Constant *EntryC) { if (const auto *GV = dyn_cast(C)) { // Global Values get visited separately, but we do need to make sure // that the global value is in the correct module - Assert(GV->getParent() == &M, "Referencing global in another module!", - EntryC, &M, GV, GV->getParent()); + Check(GV->getParent() == &M, "Referencing global in another module!", + EntryC, &M, GV, GV->getParent()); continue; } @@ -2134,9 +2186,9 @@ void Verifier::visitConstantExprsRecursively(const Constant *EntryC) { void Verifier::visitConstantExpr(const ConstantExpr *CE) { if (CE->getOpcode() == Instruction::BitCast) - Assert(CastInst::castIsValid(Instruction::BitCast, CE->getOperand(0), - CE->getType()), - "Invalid bitcast", CE); + Check(CastInst::castIsValid(Instruction::BitCast, CE->getOperand(0), + CE->getType()), + "Invalid bitcast", CE); } bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) { @@ -2155,17 +2207,17 @@ void Verifier::verifyInlineAsmCall(const CallBase &Call) { if (CI.isIndirect) { const Value *Arg = Call.getArgOperand(ArgNo); - Assert(Arg->getType()->isPointerTy(), - "Operand for indirect constraint must have pointer type", - &Call); + Check(Arg->getType()->isPointerTy(), + "Operand for indirect constraint must have pointer type", &Call); - Assert(Call.getAttributes().getParamElementType(ArgNo), - "Operand for indirect constraint must have elementtype attribute", - &Call); + Check(Call.getParamElementType(ArgNo), + "Operand for indirect constraint must have elementtype attribute", + &Call); } else { - Assert(!Call.paramHasAttr(ArgNo, Attribute::ElementType), - "Elementtype attribute can only be applied for indirect " - "constraints", &Call); + Check(!Call.paramHasAttr(ArgNo, Attribute::ElementType), + "Elementtype attribute can only be applied for indirect " + "constraints", + &Call); } ArgNo++; @@ -2178,50 +2230,50 @@ void Verifier::verifyStatepoint(const CallBase &Call) { Call.getCalledFunction()->getIntrinsicID() == Intrinsic::experimental_gc_statepoint); - Assert(!Call.doesNotAccessMemory() && !Call.onlyReadsMemory() && - !Call.onlyAccessesArgMemory(), - "gc.statepoint must read and write all memory to preserve " - "reordering restrictions required by safepoint semantics", - Call); + Check(!Call.doesNotAccessMemory() && !Call.onlyReadsMemory() && + !Call.onlyAccessesArgMemory(), + "gc.statepoint must read and write all memory to preserve " + "reordering restrictions required by safepoint semantics", + Call); const int64_t NumPatchBytes = cast(Call.getArgOperand(1))->getSExtValue(); assert(isInt<32>(NumPatchBytes) && "NumPatchBytesV is an i32!"); - Assert(NumPatchBytes >= 0, - "gc.statepoint number of patchable bytes must be " - "positive", - Call); - - const Value *Target = Call.getArgOperand(2); - auto *PT = dyn_cast(Target->getType()); - Assert(PT && PT->getPointerElementType()->isFunctionTy(), - "gc.statepoint callee must be of function pointer type", Call, Target); - FunctionType *TargetFuncType = - cast(PT->getPointerElementType()); + Check(NumPatchBytes >= 0, + "gc.statepoint number of patchable bytes must be " + "positive", + Call); + + Type *TargetElemType = Call.getParamElementType(2); + Check(TargetElemType, + "gc.statepoint callee argument must have elementtype attribute", Call); + FunctionType *TargetFuncType = dyn_cast(TargetElemType); + Check(TargetFuncType, + "gc.statepoint callee elementtype must be function type", Call); const int NumCallArgs = cast(Call.getArgOperand(3))->getZExtValue(); - Assert(NumCallArgs >= 0, - "gc.statepoint number of arguments to underlying call " - "must be positive", - Call); + Check(NumCallArgs >= 0, + "gc.statepoint number of arguments to underlying call " + "must be positive", + Call); const int NumParams = (int)TargetFuncType->getNumParams(); if (TargetFuncType->isVarArg()) { - Assert(NumCallArgs >= NumParams, - "gc.statepoint mismatch in number of vararg call args", Call); + Check(NumCallArgs >= NumParams, + "gc.statepoint mismatch in number of vararg call args", Call); // TODO: Remove this limitation - Assert(TargetFuncType->getReturnType()->isVoidTy(), - "gc.statepoint doesn't support wrapping non-void " - "vararg functions yet", - Call); + Check(TargetFuncType->getReturnType()->isVoidTy(), + "gc.statepoint doesn't support wrapping non-void " + "vararg functions yet", + Call); } else - Assert(NumCallArgs == NumParams, - "gc.statepoint mismatch in number of call args", Call); + Check(NumCallArgs == NumParams, + "gc.statepoint mismatch in number of call args", Call); const uint64_t Flags = cast(Call.getArgOperand(4))->getZExtValue(); - Assert((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0, - "unknown flag used in gc.statepoint flags argument", Call); + Check((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0, + "unknown flag used in gc.statepoint flags argument", Call); // Verify that the types of the call parameter arguments match // the type of the wrapped callee. @@ -2229,63 +2281,62 @@ void Verifier::verifyStatepoint(const CallBase &Call) { for (int i = 0; i < NumParams; i++) { Type *ParamType = TargetFuncType->getParamType(i); Type *ArgType = Call.getArgOperand(5 + i)->getType(); - Assert(ArgType == ParamType, - "gc.statepoint call argument does not match wrapped " - "function type", - Call); + Check(ArgType == ParamType, + "gc.statepoint call argument does not match wrapped " + "function type", + Call); if (TargetFuncType->isVarArg()) { AttributeSet ArgAttrs = Attrs.getParamAttrs(5 + i); - Assert(!ArgAttrs.hasAttribute(Attribute::StructRet), - "Attribute 'sret' cannot be used for vararg call arguments!", - Call); + Check(!ArgAttrs.hasAttribute(Attribute::StructRet), + "Attribute 'sret' cannot be used for vararg call arguments!", Call); } } const int EndCallArgsInx = 4 + NumCallArgs; const Value *NumTransitionArgsV = Call.getArgOperand(EndCallArgsInx + 1); - Assert(isa(NumTransitionArgsV), - "gc.statepoint number of transition arguments " - "must be constant integer", - Call); + Check(isa(NumTransitionArgsV), + "gc.statepoint number of transition arguments " + "must be constant integer", + Call); const int NumTransitionArgs = cast(NumTransitionArgsV)->getZExtValue(); - Assert(NumTransitionArgs == 0, - "gc.statepoint w/inline transition bundle is deprecated", Call); + Check(NumTransitionArgs == 0, + "gc.statepoint w/inline transition bundle is deprecated", Call); const int EndTransitionArgsInx = EndCallArgsInx + 1 + NumTransitionArgs; const Value *NumDeoptArgsV = Call.getArgOperand(EndTransitionArgsInx + 1); - Assert(isa(NumDeoptArgsV), - "gc.statepoint number of deoptimization arguments " - "must be constant integer", - Call); + Check(isa(NumDeoptArgsV), + "gc.statepoint number of deoptimization arguments " + "must be constant integer", + Call); const int NumDeoptArgs = cast(NumDeoptArgsV)->getZExtValue(); - Assert(NumDeoptArgs == 0, - "gc.statepoint w/inline deopt operands is deprecated", Call); + Check(NumDeoptArgs == 0, + "gc.statepoint w/inline deopt operands is deprecated", Call); const int ExpectedNumArgs = 7 + NumCallArgs; - Assert(ExpectedNumArgs == (int)Call.arg_size(), - "gc.statepoint too many arguments", Call); + Check(ExpectedNumArgs == (int)Call.arg_size(), + "gc.statepoint too many arguments", Call); // Check that the only uses of this gc.statepoint are gc.result or // gc.relocate calls which are tied to this statepoint and thus part // of the same statepoint sequence for (const User *U : Call.users()) { const CallInst *UserCall = dyn_cast(U); - Assert(UserCall, "illegal use of statepoint token", Call, U); + Check(UserCall, "illegal use of statepoint token", Call, U); if (!UserCall) continue; - Assert(isa(UserCall) || isa(UserCall), - "gc.result or gc.relocate are the only value uses " - "of a gc.statepoint", - Call, U); + Check(isa(UserCall) || isa(UserCall), + "gc.result or gc.relocate are the only value uses " + "of a gc.statepoint", + Call, U); if (isa(UserCall)) { - Assert(UserCall->getArgOperand(0) == &Call, - "gc.result connected to wrong gc.statepoint", Call, UserCall); + Check(UserCall->getArgOperand(0) == &Call, + "gc.result connected to wrong gc.statepoint", Call, UserCall); } else if (isa(Call)) { - Assert(UserCall->getArgOperand(0) == &Call, - "gc.relocate connected to wrong gc.statepoint", Call, UserCall); + Check(UserCall->getArgOperand(0) == &Call, + "gc.relocate connected to wrong gc.statepoint", Call, UserCall); } } @@ -2304,11 +2355,11 @@ void Verifier::verifyFrameRecoverIndices() { Function *F = Counts.first; unsigned EscapedObjectCount = Counts.second.first; unsigned MaxRecoveredIndex = Counts.second.second; - Assert(MaxRecoveredIndex <= EscapedObjectCount, - "all indices passed to llvm.localrecover must be less than the " - "number of arguments passed to llvm.localescape in the parent " - "function", - F); + Check(MaxRecoveredIndex <= EscapedObjectCount, + "all indices passed to llvm.localrecover must be less than the " + "number of arguments passed to llvm.localescape in the parent " + "function", + F); } } @@ -2345,8 +2396,8 @@ void Verifier::verifySiblingFuncletUnwinds() { CycleNodes.push_back(CycleTerminator); CyclePad = getSuccPad(CycleTerminator); } while (CyclePad != SuccPad); - Assert(false, "EH pads can't handle each other's exceptions", - ArrayRef(CycleNodes)); + Check(false, "EH pads can't handle each other's exceptions", + ArrayRef(CycleNodes)); } // Don't re-walk a node we've already checked if (!Visited.insert(SuccPad).second) @@ -2374,24 +2425,24 @@ void Verifier::visitFunction(const Function &F) { FunctionType *FT = F.getFunctionType(); unsigned NumArgs = F.arg_size(); - Assert(&Context == &F.getContext(), - "Function context does not match Module context!", &F); + Check(&Context == &F.getContext(), + "Function context does not match Module context!", &F); - Assert(!F.hasCommonLinkage(), "Functions may not have common linkage", &F); - Assert(FT->getNumParams() == NumArgs, - "# formal arguments must match # of arguments for function type!", &F, - FT); - Assert(F.getReturnType()->isFirstClassType() || - F.getReturnType()->isVoidTy() || F.getReturnType()->isStructTy(), - "Functions cannot return aggregate values!", &F); + Check(!F.hasCommonLinkage(), "Functions may not have common linkage", &F); + Check(FT->getNumParams() == NumArgs, + "# formal arguments must match # of arguments for function type!", &F, + FT); + Check(F.getReturnType()->isFirstClassType() || + F.getReturnType()->isVoidTy() || F.getReturnType()->isStructTy(), + "Functions cannot return aggregate values!", &F); - Assert(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(), - "Invalid struct return type!", &F); + Check(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(), + "Invalid struct return type!", &F); AttributeList Attrs = F.getAttributes(); - Assert(verifyAttributeCount(Attrs, FT->getNumParams()), - "Attribute after last parameter!", &F); + Check(verifyAttributeCount(Attrs, FT->getNumParams()), + "Attribute after last parameter!", &F); bool IsIntrinsic = F.isIntrinsic(); @@ -2401,11 +2452,11 @@ void Verifier::visitFunction(const Function &F) { // On function declarations/definitions, we do not support the builtin // attribute. We do not check this in VerifyFunctionAttrs since that is // checking for Attributes that can/can not ever be on functions. - Assert(!Attrs.hasFnAttr(Attribute::Builtin), - "Attribute 'builtin' can only be applied to a callsite.", &F); + Check(!Attrs.hasFnAttr(Attribute::Builtin), + "Attribute 'builtin' can only be applied to a callsite.", &F); - Assert(!Attrs.hasAttrSomewhere(Attribute::ElementType), - "Attribute 'elementtype' can only be applied to a callsite.", &F); + Check(!Attrs.hasAttrSomewhere(Attribute::ElementType), + "Attribute 'elementtype' can only be applied to a callsite.", &F); // Check that this function meets the restrictions on this calling convention. // Sometimes varargs is used for perfectly forwarding thunks, so some of these @@ -2415,38 +2466,37 @@ void Verifier::visitFunction(const Function &F) { case CallingConv::C: break; case CallingConv::X86_INTR: { - Assert(F.arg_empty() || Attrs.hasParamAttr(0, Attribute::ByVal), - "Calling convention parameter requires byval", &F); + Check(F.arg_empty() || Attrs.hasParamAttr(0, Attribute::ByVal), + "Calling convention parameter requires byval", &F); break; } case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: - Assert(F.getReturnType()->isVoidTy(), - "Calling convention requires void return type", &F); + Check(F.getReturnType()->isVoidTy(), + "Calling convention requires void return type", &F); LLVM_FALLTHROUGH; case CallingConv::AMDGPU_VS: case CallingConv::AMDGPU_HS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: - Assert(!F.hasStructRetAttr(), - "Calling convention does not allow sret", &F); + Check(!F.hasStructRetAttr(), "Calling convention does not allow sret", &F); if (F.getCallingConv() != CallingConv::SPIR_KERNEL) { const unsigned StackAS = DL.getAllocaAddrSpace(); unsigned i = 0; for (const Argument &Arg : F.args()) { - Assert(!Attrs.hasParamAttr(i, Attribute::ByVal), - "Calling convention disallows byval", &F); - Assert(!Attrs.hasParamAttr(i, Attribute::Preallocated), - "Calling convention disallows preallocated", &F); - Assert(!Attrs.hasParamAttr(i, Attribute::InAlloca), - "Calling convention disallows inalloca", &F); + Check(!Attrs.hasParamAttr(i, Attribute::ByVal), + "Calling convention disallows byval", &F); + Check(!Attrs.hasParamAttr(i, Attribute::Preallocated), + "Calling convention disallows preallocated", &F); + Check(!Attrs.hasParamAttr(i, Attribute::InAlloca), + "Calling convention disallows inalloca", &F); if (Attrs.hasParamAttr(i, Attribute::ByRef)) { // FIXME: Should also disallow LDS and GDS, but we don't have the enum // value here. - Assert(Arg.getType()->getPointerAddressSpace() != StackAS, - "Calling convention disallows stack byref", &F); + Check(Arg.getType()->getPointerAddressSpace() != StackAS, + "Calling convention disallows stack byref", &F); } ++i; @@ -2459,27 +2509,28 @@ void Verifier::visitFunction(const Function &F) { case CallingConv::Intel_OCL_BI: case CallingConv::PTX_Kernel: case CallingConv::PTX_Device: - Assert(!F.isVarArg(), "Calling convention does not support varargs or " - "perfect forwarding!", - &F); + Check(!F.isVarArg(), + "Calling convention does not support varargs or " + "perfect forwarding!", + &F); break; } // Check that the argument values match the function type for this function... unsigned i = 0; for (const Argument &Arg : F.args()) { - Assert(Arg.getType() == FT->getParamType(i), - "Argument value does not match function argument type!", &Arg, - FT->getParamType(i)); - Assert(Arg.getType()->isFirstClassType(), - "Function arguments must have first-class types!", &Arg); + Check(Arg.getType() == FT->getParamType(i), + "Argument value does not match function argument type!", &Arg, + FT->getParamType(i)); + Check(Arg.getType()->isFirstClassType(), + "Function arguments must have first-class types!", &Arg); if (!IsIntrinsic) { - Assert(!Arg.getType()->isMetadataTy(), - "Function takes metadata but isn't an intrinsic", &Arg, &F); - Assert(!Arg.getType()->isTokenTy(), - "Function takes token but isn't an intrinsic", &Arg, &F); - Assert(!Arg.getType()->isX86_AMXTy(), - "Function takes x86_amx but isn't an intrinsic", &Arg, &F); + Check(!Arg.getType()->isMetadataTy(), + "Function takes metadata but isn't an intrinsic", &Arg, &F); + Check(!Arg.getType()->isTokenTy(), + "Function takes token but isn't an intrinsic", &Arg, &F); + Check(!Arg.getType()->isX86_AMXTy(), + "Function takes x86_amx but isn't an intrinsic", &Arg, &F); } // Check that swifterror argument is only used by loads and stores. @@ -2490,10 +2541,10 @@ void Verifier::visitFunction(const Function &F) { } if (!IsIntrinsic) { - Assert(!F.getReturnType()->isTokenTy(), - "Function returns a token but isn't an intrinsic", &F); - Assert(!F.getReturnType()->isX86_AMXTy(), - "Function returns a x86_amx but isn't an intrinsic", &F); + Check(!F.getReturnType()->isTokenTy(), + "Function returns a token but isn't an intrinsic", &F); + Check(!F.getReturnType()->isX86_AMXTy(), + "Function returns a x86_amx but isn't an intrinsic", &F); } // Get the function metadata attachments. @@ -2506,44 +2557,44 @@ void Verifier::visitFunction(const Function &F) { if (F.hasPersonalityFn()) { auto *Per = dyn_cast(F.getPersonalityFn()->stripPointerCasts()); if (Per) - Assert(Per->getParent() == F.getParent(), - "Referencing personality function in another module!", - &F, F.getParent(), Per, Per->getParent()); + Check(Per->getParent() == F.getParent(), + "Referencing personality function in another module!", &F, + F.getParent(), Per, Per->getParent()); } if (F.isMaterializable()) { // Function has a body somewhere we can't see. - Assert(MDs.empty(), "unmaterialized function cannot have metadata", &F, - MDs.empty() ? nullptr : MDs.front().second); + Check(MDs.empty(), "unmaterialized function cannot have metadata", &F, + MDs.empty() ? nullptr : MDs.front().second); } else if (F.isDeclaration()) { for (const auto &I : MDs) { // This is used for call site debug information. - AssertDI(I.first != LLVMContext::MD_dbg || - !cast(I.second)->isDistinct(), - "function declaration may only have a unique !dbg attachment", - &F); - Assert(I.first != LLVMContext::MD_prof, - "function declaration may not have a !prof attachment", &F); + CheckDI(I.first != LLVMContext::MD_dbg || + !cast(I.second)->isDistinct(), + "function declaration may only have a unique !dbg attachment", + &F); + Check(I.first != LLVMContext::MD_prof, + "function declaration may not have a !prof attachment", &F); // Verify the metadata itself. visitMDNode(*I.second, AreDebugLocsAllowed::Yes); } - Assert(!F.hasPersonalityFn(), - "Function declaration shouldn't have a personality routine", &F); + Check(!F.hasPersonalityFn(), + "Function declaration shouldn't have a personality routine", &F); } else { // Verify that this function (which has a body) is not named "llvm.*". It // is not legal to define intrinsics. - Assert(!IsIntrinsic, "llvm intrinsics cannot be defined!", &F); + Check(!IsIntrinsic, "llvm intrinsics cannot be defined!", &F); // Check the entry node const BasicBlock *Entry = &F.getEntryBlock(); - Assert(pred_empty(Entry), - "Entry block to function must not have predecessors!", Entry); + Check(pred_empty(Entry), + "Entry block to function must not have predecessors!", Entry); // The address of the entry block cannot be taken, unless it is dead. if (Entry->hasAddressTaken()) { - Assert(!BlockAddress::lookup(Entry)->isConstantUsed(), - "blockaddress may not be used with the entry block!", Entry); + Check(!BlockAddress::lookup(Entry)->isConstantUsed(), + "blockaddress may not be used with the entry block!", Entry); } unsigned NumDebugAttachments = 0, NumProfAttachments = 0; @@ -2556,26 +2607,26 @@ void Verifier::visitFunction(const Function &F) { break; case LLVMContext::MD_dbg: { ++NumDebugAttachments; - AssertDI(NumDebugAttachments == 1, - "function must have a single !dbg attachment", &F, I.second); - AssertDI(isa(I.second), - "function !dbg attachment must be a subprogram", &F, I.second); - AssertDI(cast(I.second)->isDistinct(), - "function definition may only have a distinct !dbg attachment", - &F); + CheckDI(NumDebugAttachments == 1, + "function must have a single !dbg attachment", &F, I.second); + CheckDI(isa(I.second), + "function !dbg attachment must be a subprogram", &F, I.second); + CheckDI(cast(I.second)->isDistinct(), + "function definition may only have a distinct !dbg attachment", + &F); auto *SP = cast(I.second); const Function *&AttachedTo = DISubprogramAttachments[SP]; - AssertDI(!AttachedTo || AttachedTo == &F, - "DISubprogram attached to more than one function", SP, &F); + CheckDI(!AttachedTo || AttachedTo == &F, + "DISubprogram attached to more than one function", SP, &F); AttachedTo = &F; AllowLocs = AreDebugLocsAllowed::Yes; break; } case LLVMContext::MD_prof: ++NumProfAttachments; - Assert(NumProfAttachments == 1, - "function must have a single !prof attachment", &F, I.second); + Check(NumProfAttachments == 1, + "function must have a single !prof attachment", &F, I.second); break; } @@ -2592,28 +2643,27 @@ void Verifier::visitFunction(const Function &F) { const User *U; if (F.hasAddressTaken(&U, false, true, false, /*IgnoreARCAttachedCall=*/true)) - Assert(false, "Invalid user of intrinsic instruction!", U); + Check(false, "Invalid user of intrinsic instruction!", U); } // Check intrinsics' signatures. switch (F.getIntrinsicID()) { case Intrinsic::experimental_gc_get_pointer_base: { FunctionType *FT = F.getFunctionType(); - Assert(FT->getNumParams() == 1, "wrong number of parameters", F); - Assert(isa(F.getReturnType()), - "gc.get.pointer.base must return a pointer", F); - Assert(FT->getParamType(0) == F.getReturnType(), - "gc.get.pointer.base operand and result must be of the same type", - F); + Check(FT->getNumParams() == 1, "wrong number of parameters", F); + Check(isa(F.getReturnType()), + "gc.get.pointer.base must return a pointer", F); + Check(FT->getParamType(0) == F.getReturnType(), + "gc.get.pointer.base operand and result must be of the same type", F); break; } case Intrinsic::experimental_gc_get_pointer_offset: { FunctionType *FT = F.getFunctionType(); - Assert(FT->getNumParams() == 1, "wrong number of parameters", F); - Assert(isa(FT->getParamType(0)), - "gc.get.pointer.offset operand must be a pointer", F); - Assert(F.getReturnType()->isIntegerTy(), - "gc.get.pointer.offset must return integer", F); + Check(FT->getNumParams() == 1, "wrong number of parameters", F); + Check(isa(FT->getParamType(0)), + "gc.get.pointer.offset operand must be a pointer", F); + Check(F.getReturnType()->isIntegerTy(), + "gc.get.pointer.offset must return integer", F); break; } } @@ -2638,12 +2688,11 @@ void Verifier::visitFunction(const Function &F) { return; Metadata *Parent = DL->getRawScope(); - AssertDI(Parent && isa(Parent), - "DILocation's scope must be a DILocalScope", N, &F, &I, DL, - Parent); + CheckDI(Parent && isa(Parent), + "DILocation's scope must be a DILocalScope", N, &F, &I, DL, Parent); DILocalScope *Scope = DL->getInlinedAtScope(); - Assert(Scope, "Failed to find DILocalScope", DL); + Check(Scope, "Failed to find DILocalScope", DL); if (!Seen.insert(Scope).second) return; @@ -2655,9 +2704,9 @@ void Verifier::visitFunction(const Function &F) { if (SP && ((Scope != SP) && !Seen.insert(SP).second)) return; - AssertDI(SP->describes(&F), - "!dbg attachment points at wrong subprogram for function", N, &F, - &I, DL, Scope, SP); + CheckDI(SP->describes(&F), + "!dbg attachment points at wrong subprogram for function", N, &F, + &I, DL, Scope, SP); }; for (auto &BB : F) for (auto &I : BB) { @@ -2677,7 +2726,7 @@ void Verifier::visitBasicBlock(BasicBlock &BB) { InstsInThisBlock.clear(); // Ensure that basic blocks have terminators! - Assert(BB.getTerminator(), "Basic Block does not have terminator!", &BB); + Check(BB.getTerminator(), "Basic Block does not have terminator!", &BB); // Check constraints that this basic block imposes on all of the PHI nodes in // it. @@ -2686,10 +2735,10 @@ void Verifier::visitBasicBlock(BasicBlock &BB) { SmallVector, 8> Values; llvm::sort(Preds); for (const PHINode &PN : BB.phis()) { - Assert(PN.getNumIncomingValues() == Preds.size(), - "PHINode should have one entry for each predecessor of its " - "parent basic block!", - &PN); + Check(PN.getNumIncomingValues() == Preds.size(), + "PHINode should have one entry for each predecessor of its " + "parent basic block!", + &PN); // Get and sort all incoming values in the PHI node... Values.clear(); @@ -2704,17 +2753,17 @@ void Verifier::visitBasicBlock(BasicBlock &BB) { // particular basic block in this PHI node, that the incoming values are // all identical. // - Assert(i == 0 || Values[i].first != Values[i - 1].first || - Values[i].second == Values[i - 1].second, - "PHI node has multiple entries for the same basic block with " - "different incoming values!", - &PN, Values[i].first, Values[i].second, Values[i - 1].second); + Check(i == 0 || Values[i].first != Values[i - 1].first || + Values[i].second == Values[i - 1].second, + "PHI node has multiple entries for the same basic block with " + "different incoming values!", + &PN, Values[i].first, Values[i].second, Values[i - 1].second); // Check to make sure that the predecessors and PHI node entries are // matched up. - Assert(Values[i].first == Preds[i], - "PHI node entries do not match predecessors!", &PN, - Values[i].first, Preds[i]); + Check(Values[i].first == Preds[i], + "PHI node entries do not match predecessors!", &PN, + Values[i].first, Preds[i]); } } } @@ -2722,21 +2771,21 @@ void Verifier::visitBasicBlock(BasicBlock &BB) { // Check that all instructions have their parent pointers set up correctly. for (auto &I : BB) { - Assert(I.getParent() == &BB, "Instruction has bogus parent pointer!"); + Check(I.getParent() == &BB, "Instruction has bogus parent pointer!"); } } void Verifier::visitTerminator(Instruction &I) { // Ensure that terminators only exist at the end of the basic block. - Assert(&I == I.getParent()->getTerminator(), - "Terminator found in the middle of a basic block!", I.getParent()); + Check(&I == I.getParent()->getTerminator(), + "Terminator found in the middle of a basic block!", I.getParent()); visitInstruction(I); } void Verifier::visitBranchInst(BranchInst &BI) { if (BI.isConditional()) { - Assert(BI.getCondition()->getType()->isIntegerTy(1), - "Branch condition is not 'i1' type!", &BI, BI.getCondition()); + Check(BI.getCondition()->getType()->isIntegerTy(1), + "Branch condition is not 'i1' type!", &BI, BI.getCondition()); } visitTerminator(BI); } @@ -2745,15 +2794,15 @@ void Verifier::visitReturnInst(ReturnInst &RI) { Function *F = RI.getParent()->getParent(); unsigned N = RI.getNumOperands(); if (F->getReturnType()->isVoidTy()) - Assert(N == 0, - "Found return instr that returns non-void in Function of void " - "return type!", - &RI, F->getReturnType()); + Check(N == 0, + "Found return instr that returns non-void in Function of void " + "return type!", + &RI, F->getReturnType()); else - Assert(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(), - "Function return type does not match operand " - "type of return inst!", - &RI, F->getReturnType()); + Check(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(), + "Function return type does not match operand " + "type of return inst!", + &RI, F->getReturnType()); // Check to make sure that the return value has necessary properties for // terminators... @@ -2761,46 +2810,45 @@ void Verifier::visitReturnInst(ReturnInst &RI) { } void Verifier::visitSwitchInst(SwitchInst &SI) { - Assert(SI.getType()->isVoidTy(), "Switch must have void result type!", &SI); + Check(SI.getType()->isVoidTy(), "Switch must have void result type!", &SI); // Check to make sure that all of the constants in the switch instruction // have the same type as the switched-on value. Type *SwitchTy = SI.getCondition()->getType(); SmallPtrSet Constants; for (auto &Case : SI.cases()) { - Assert(Case.getCaseValue()->getType() == SwitchTy, - "Switch constants must all be same type as switch value!", &SI); - Assert(Constants.insert(Case.getCaseValue()).second, - "Duplicate integer as switch case", &SI, Case.getCaseValue()); + Check(Case.getCaseValue()->getType() == SwitchTy, + "Switch constants must all be same type as switch value!", &SI); + Check(Constants.insert(Case.getCaseValue()).second, + "Duplicate integer as switch case", &SI, Case.getCaseValue()); } visitTerminator(SI); } void Verifier::visitIndirectBrInst(IndirectBrInst &BI) { - Assert(BI.getAddress()->getType()->isPointerTy(), - "Indirectbr operand must have pointer type!", &BI); + Check(BI.getAddress()->getType()->isPointerTy(), + "Indirectbr operand must have pointer type!", &BI); for (unsigned i = 0, e = BI.getNumDestinations(); i != e; ++i) - Assert(BI.getDestination(i)->getType()->isLabelTy(), - "Indirectbr destinations must all have pointer type!", &BI); + Check(BI.getDestination(i)->getType()->isLabelTy(), + "Indirectbr destinations must all have pointer type!", &BI); visitTerminator(BI); } void Verifier::visitCallBrInst(CallBrInst &CBI) { - Assert(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", - &CBI); + Check(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", &CBI); const InlineAsm *IA = cast(CBI.getCalledOperand()); - Assert(!IA->canThrow(), "Unwinding from Callbr is not allowed"); + Check(!IA->canThrow(), "Unwinding from Callbr is not allowed"); for (unsigned i = 0, e = CBI.getNumSuccessors(); i != e; ++i) - Assert(CBI.getSuccessor(i)->getType()->isLabelTy(), - "Callbr successors must all have pointer type!", &CBI); + Check(CBI.getSuccessor(i)->getType()->isLabelTy(), + "Callbr successors must all have pointer type!", &CBI); for (unsigned i = 0, e = CBI.getNumOperands(); i != e; ++i) { - Assert(i >= CBI.arg_size() || !isa(CBI.getOperand(i)), - "Using an unescaped label as a callbr argument!", &CBI); + Check(i >= CBI.arg_size() || !isa(CBI.getOperand(i)), + "Using an unescaped label as a callbr argument!", &CBI); if (isa(CBI.getOperand(i))) for (unsigned j = i + 1; j != e; ++j) - Assert(CBI.getOperand(i) != CBI.getOperand(j), - "Duplicate callbr destination!", &CBI); + Check(CBI.getOperand(i) != CBI.getOperand(j), + "Duplicate callbr destination!", &CBI); } { SmallPtrSet ArgBBs; @@ -2808,7 +2856,7 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) { if (auto *BA = dyn_cast(V)) ArgBBs.insert(BA->getBasicBlock()); for (BasicBlock *BB : CBI.getIndirectDests()) - Assert(ArgBBs.count(BB), "Indirect label missing from arglist.", &CBI); + Check(ArgBBs.count(BB), "Indirect label missing from arglist.", &CBI); } verifyInlineAsmCall(CBI); @@ -2816,12 +2864,12 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) { } void Verifier::visitSelectInst(SelectInst &SI) { - Assert(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1), - SI.getOperand(2)), - "Invalid operands for select instruction!", &SI); + Check(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1), + SI.getOperand(2)), + "Invalid operands for select instruction!", &SI); - Assert(SI.getTrueValue()->getType() == SI.getType(), - "Select values must have same type as select instruction!", &SI); + Check(SI.getTrueValue()->getType() == SI.getType(), + "Select values must have same type as select instruction!", &SI); visitInstruction(SI); } @@ -2829,7 +2877,7 @@ void Verifier::visitSelectInst(SelectInst &SI) { /// a pass, if any exist, it's an error. /// void Verifier::visitUserOp1(Instruction &I) { - Assert(false, "User-defined operators should not live outside of a pass!", &I); + Check(false, "User-defined operators should not live outside of a pass!", &I); } void Verifier::visitTruncInst(TruncInst &I) { @@ -2841,11 +2889,11 @@ void Verifier::visitTruncInst(TruncInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I); - Assert(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I); - Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), - "trunc source and destination must both be a vector or neither", &I); - Assert(SrcBitSize > DestBitSize, "DestTy too big for Trunc", &I); + Check(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I); + Check(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I); + Check(SrcTy->isVectorTy() == DestTy->isVectorTy(), + "trunc source and destination must both be a vector or neither", &I); + Check(SrcBitSize > DestBitSize, "DestTy too big for Trunc", &I); visitInstruction(I); } @@ -2856,14 +2904,14 @@ void Verifier::visitZExtInst(ZExtInst &I) { Type *DestTy = I.getType(); // Get the size of the types in bits, we'll need this later - Assert(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I); - Assert(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I); - Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), - "zext source and destination must both be a vector or neither", &I); + Check(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I); + Check(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I); + Check(SrcTy->isVectorTy() == DestTy->isVectorTy(), + "zext source and destination must both be a vector or neither", &I); unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert(SrcBitSize < DestBitSize, "Type too small for ZExt", &I); + Check(SrcBitSize < DestBitSize, "Type too small for ZExt", &I); visitInstruction(I); } @@ -2877,11 +2925,11 @@ void Verifier::visitSExtInst(SExtInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I); - Assert(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I); - Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), - "sext source and destination must both be a vector or neither", &I); - Assert(SrcBitSize < DestBitSize, "Type too small for SExt", &I); + Check(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I); + Check(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I); + Check(SrcTy->isVectorTy() == DestTy->isVectorTy(), + "sext source and destination must both be a vector or neither", &I); + Check(SrcBitSize < DestBitSize, "Type too small for SExt", &I); visitInstruction(I); } @@ -2894,11 +2942,11 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert(SrcTy->isFPOrFPVectorTy(), "FPTrunc only operates on FP", &I); - Assert(DestTy->isFPOrFPVectorTy(), "FPTrunc only produces an FP", &I); - Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), - "fptrunc source and destination must both be a vector or neither", &I); - Assert(SrcBitSize > DestBitSize, "DestTy too big for FPTrunc", &I); + Check(SrcTy->isFPOrFPVectorTy(), "FPTrunc only operates on FP", &I); + Check(DestTy->isFPOrFPVectorTy(), "FPTrunc only produces an FP", &I); + Check(SrcTy->isVectorTy() == DestTy->isVectorTy(), + "fptrunc source and destination must both be a vector or neither", &I); + Check(SrcBitSize > DestBitSize, "DestTy too big for FPTrunc", &I); visitInstruction(I); } @@ -2912,11 +2960,11 @@ void Verifier::visitFPExtInst(FPExtInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert(SrcTy->isFPOrFPVectorTy(), "FPExt only operates on FP", &I); - Assert(DestTy->isFPOrFPVectorTy(), "FPExt only produces an FP", &I); - Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), - "fpext source and destination must both be a vector or neither", &I); - Assert(SrcBitSize < DestBitSize, "DestTy too small for FPExt", &I); + Check(SrcTy->isFPOrFPVectorTy(), "FPExt only operates on FP", &I); + Check(DestTy->isFPOrFPVectorTy(), "FPExt only produces an FP", &I); + Check(SrcTy->isVectorTy() == DestTy->isVectorTy(), + "fpext source and destination must both be a vector or neither", &I); + Check(SrcBitSize < DestBitSize, "DestTy too small for FPExt", &I); visitInstruction(I); } @@ -2929,17 +2977,17 @@ void Verifier::visitUIToFPInst(UIToFPInst &I) { bool SrcVec = SrcTy->isVectorTy(); bool DstVec = DestTy->isVectorTy(); - Assert(SrcVec == DstVec, - "UIToFP source and dest must both be vector or scalar", &I); - Assert(SrcTy->isIntOrIntVectorTy(), - "UIToFP source must be integer or integer vector", &I); - Assert(DestTy->isFPOrFPVectorTy(), "UIToFP result must be FP or FP vector", - &I); + Check(SrcVec == DstVec, + "UIToFP source and dest must both be vector or scalar", &I); + Check(SrcTy->isIntOrIntVectorTy(), + "UIToFP source must be integer or integer vector", &I); + Check(DestTy->isFPOrFPVectorTy(), "UIToFP result must be FP or FP vector", + &I); if (SrcVec && DstVec) - Assert(cast(SrcTy)->getElementCount() == - cast(DestTy)->getElementCount(), - "UIToFP source and dest vector length mismatch", &I); + Check(cast(SrcTy)->getElementCount() == + cast(DestTy)->getElementCount(), + "UIToFP source and dest vector length mismatch", &I); visitInstruction(I); } @@ -2952,17 +3000,17 @@ void Verifier::visitSIToFPInst(SIToFPInst &I) { bool SrcVec = SrcTy->isVectorTy(); bool DstVec = DestTy->isVectorTy(); - Assert(SrcVec == DstVec, - "SIToFP source and dest must both be vector or scalar", &I); - Assert(SrcTy->isIntOrIntVectorTy(), - "SIToFP source must be integer or integer vector", &I); - Assert(DestTy->isFPOrFPVectorTy(), "SIToFP result must be FP or FP vector", - &I); + Check(SrcVec == DstVec, + "SIToFP source and dest must both be vector or scalar", &I); + Check(SrcTy->isIntOrIntVectorTy(), + "SIToFP source must be integer or integer vector", &I); + Check(DestTy->isFPOrFPVectorTy(), "SIToFP result must be FP or FP vector", + &I); if (SrcVec && DstVec) - Assert(cast(SrcTy)->getElementCount() == - cast(DestTy)->getElementCount(), - "SIToFP source and dest vector length mismatch", &I); + Check(cast(SrcTy)->getElementCount() == + cast(DestTy)->getElementCount(), + "SIToFP source and dest vector length mismatch", &I); visitInstruction(I); } @@ -2975,17 +3023,16 @@ void Verifier::visitFPToUIInst(FPToUIInst &I) { bool SrcVec = SrcTy->isVectorTy(); bool DstVec = DestTy->isVectorTy(); - Assert(SrcVec == DstVec, - "FPToUI source and dest must both be vector or scalar", &I); - Assert(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector", - &I); - Assert(DestTy->isIntOrIntVectorTy(), - "FPToUI result must be integer or integer vector", &I); + Check(SrcVec == DstVec, + "FPToUI source and dest must both be vector or scalar", &I); + Check(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector", &I); + Check(DestTy->isIntOrIntVectorTy(), + "FPToUI result must be integer or integer vector", &I); if (SrcVec && DstVec) - Assert(cast(SrcTy)->getElementCount() == - cast(DestTy)->getElementCount(), - "FPToUI source and dest vector length mismatch", &I); + Check(cast(SrcTy)->getElementCount() == + cast(DestTy)->getElementCount(), + "FPToUI source and dest vector length mismatch", &I); visitInstruction(I); } @@ -2998,17 +3045,16 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) { bool SrcVec = SrcTy->isVectorTy(); bool DstVec = DestTy->isVectorTy(); - Assert(SrcVec == DstVec, - "FPToSI source and dest must both be vector or scalar", &I); - Assert(SrcTy->isFPOrFPVectorTy(), "FPToSI source must be FP or FP vector", - &I); - Assert(DestTy->isIntOrIntVectorTy(), - "FPToSI result must be integer or integer vector", &I); + Check(SrcVec == DstVec, + "FPToSI source and dest must both be vector or scalar", &I); + Check(SrcTy->isFPOrFPVectorTy(), "FPToSI source must be FP or FP vector", &I); + Check(DestTy->isIntOrIntVectorTy(), + "FPToSI result must be integer or integer vector", &I); if (SrcVec && DstVec) - Assert(cast(SrcTy)->getElementCount() == - cast(DestTy)->getElementCount(), - "FPToSI source and dest vector length mismatch", &I); + Check(cast(SrcTy)->getElementCount() == + cast(DestTy)->getElementCount(), + "FPToSI source and dest vector length mismatch", &I); visitInstruction(I); } @@ -3018,17 +3064,17 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); - Assert(SrcTy->isPtrOrPtrVectorTy(), "PtrToInt source must be pointer", &I); + Check(SrcTy->isPtrOrPtrVectorTy(), "PtrToInt source must be pointer", &I); - Assert(DestTy->isIntOrIntVectorTy(), "PtrToInt result must be integral", &I); - Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "PtrToInt type mismatch", - &I); + Check(DestTy->isIntOrIntVectorTy(), "PtrToInt result must be integral", &I); + Check(SrcTy->isVectorTy() == DestTy->isVectorTy(), "PtrToInt type mismatch", + &I); if (SrcTy->isVectorTy()) { auto *VSrc = cast(SrcTy); auto *VDest = cast(DestTy); - Assert(VSrc->getElementCount() == VDest->getElementCount(), - "PtrToInt Vector width mismatch", &I); + Check(VSrc->getElementCount() == VDest->getElementCount(), + "PtrToInt Vector width mismatch", &I); } visitInstruction(I); @@ -3039,23 +3085,22 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); - Assert(SrcTy->isIntOrIntVectorTy(), - "IntToPtr source must be an integral", &I); - Assert(DestTy->isPtrOrPtrVectorTy(), "IntToPtr result must be a pointer", &I); + Check(SrcTy->isIntOrIntVectorTy(), "IntToPtr source must be an integral", &I); + Check(DestTy->isPtrOrPtrVectorTy(), "IntToPtr result must be a pointer", &I); - Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "IntToPtr type mismatch", - &I); + Check(SrcTy->isVectorTy() == DestTy->isVectorTy(), "IntToPtr type mismatch", + &I); if (SrcTy->isVectorTy()) { auto *VSrc = cast(SrcTy); auto *VDest = cast(DestTy); - Assert(VSrc->getElementCount() == VDest->getElementCount(), - "IntToPtr Vector width mismatch", &I); + Check(VSrc->getElementCount() == VDest->getElementCount(), + "IntToPtr Vector width mismatch", &I); } visitInstruction(I); } void Verifier::visitBitCastInst(BitCastInst &I) { - Assert( + Check( CastInst::castIsValid(Instruction::BitCast, I.getOperand(0), I.getType()), "Invalid bitcast", &I); visitInstruction(I); @@ -3065,16 +3110,16 @@ void Verifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); - Assert(SrcTy->isPtrOrPtrVectorTy(), "AddrSpaceCast source must be a pointer", - &I); - Assert(DestTy->isPtrOrPtrVectorTy(), "AddrSpaceCast result must be a pointer", - &I); - Assert(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(), - "AddrSpaceCast must be between different address spaces", &I); + Check(SrcTy->isPtrOrPtrVectorTy(), "AddrSpaceCast source must be a pointer", + &I); + Check(DestTy->isPtrOrPtrVectorTy(), "AddrSpaceCast result must be a pointer", + &I); + Check(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(), + "AddrSpaceCast must be between different address spaces", &I); if (auto *SrcVTy = dyn_cast(SrcTy)) - Assert(SrcVTy->getElementCount() == - cast(DestTy)->getElementCount(), - "AddrSpaceCast vector pointer number of elements mismatch", &I); + Check(SrcVTy->getElementCount() == + cast(DestTy)->getElementCount(), + "AddrSpaceCast vector pointer number of elements mismatch", &I); visitInstruction(I); } @@ -3085,18 +3130,18 @@ void Verifier::visitPHINode(PHINode &PN) { // This can be tested by checking whether the instruction before this is // either nonexistent (because this is begin()) or is a PHI node. If not, // then there is some other instruction before a PHI. - Assert(&PN == &PN.getParent()->front() || - isa(--BasicBlock::iterator(&PN)), - "PHI nodes not grouped at top of basic block!", &PN, PN.getParent()); + Check(&PN == &PN.getParent()->front() || + isa(--BasicBlock::iterator(&PN)), + "PHI nodes not grouped at top of basic block!", &PN, PN.getParent()); // Check that a PHI doesn't yield a Token. - Assert(!PN.getType()->isTokenTy(), "PHI nodes cannot have token type!"); + Check(!PN.getType()->isTokenTy(), "PHI nodes cannot have token type!"); // Check that all of the values of the PHI node have the same type as the // result, and that the incoming blocks are really basic blocks. for (Value *IncValue : PN.incoming_values()) { - Assert(PN.getType() == IncValue->getType(), - "PHI node operands are not the same type as the result!", &PN); + Check(PN.getType() == IncValue->getType(), + "PHI node operands are not the same type as the result!", &PN); } // All other PHI node constraints are checked in the visitBasicBlock method. @@ -3105,54 +3150,68 @@ void Verifier::visitPHINode(PHINode &PN) { } void Verifier::visitCallBase(CallBase &Call) { - Assert(Call.getCalledOperand()->getType()->isPointerTy(), - "Called function must be a pointer!", Call); + Check(Call.getCalledOperand()->getType()->isPointerTy(), + "Called function must be a pointer!", Call); PointerType *FPTy = cast(Call.getCalledOperand()->getType()); - Assert(FPTy->isOpaqueOrPointeeTypeMatches(Call.getFunctionType()), - "Called function is not the same type as the call!", Call); + Check(FPTy->isOpaqueOrPointeeTypeMatches(Call.getFunctionType()), + "Called function is not the same type as the call!", Call); FunctionType *FTy = Call.getFunctionType(); // Verify that the correct number of arguments are being passed if (FTy->isVarArg()) - Assert(Call.arg_size() >= FTy->getNumParams(), - "Called function requires more parameters than were provided!", - Call); + Check(Call.arg_size() >= FTy->getNumParams(), + "Called function requires more parameters than were provided!", Call); else - Assert(Call.arg_size() == FTy->getNumParams(), - "Incorrect number of arguments passed to called function!", Call); + Check(Call.arg_size() == FTy->getNumParams(), + "Incorrect number of arguments passed to called function!", Call); // Verify that all arguments to the call match the function type. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - Assert(Call.getArgOperand(i)->getType() == FTy->getParamType(i), - "Call parameter type does not match function signature!", - Call.getArgOperand(i), FTy->getParamType(i), Call); + Check(Call.getArgOperand(i)->getType() == FTy->getParamType(i), + "Call parameter type does not match function signature!", + Call.getArgOperand(i), FTy->getParamType(i), Call); AttributeList Attrs = Call.getAttributes(); - Assert(verifyAttributeCount(Attrs, Call.arg_size()), - "Attribute after last parameter!", Call); + Check(verifyAttributeCount(Attrs, Call.arg_size()), + "Attribute after last parameter!", Call); + + auto VerifyTypeAlign = [&](Type *Ty, const Twine &Message) { + if (!Ty->isSized()) + return; + Align ABIAlign = DL.getABITypeAlign(Ty); + Align MaxAlign(ParamMaxAlignment); + Check(ABIAlign <= MaxAlign, + "Incorrect alignment of " + Message + " to called function!", Call); + }; + + VerifyTypeAlign(FTy->getReturnType(), "return type"); + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) { + Type *Ty = FTy->getParamType(i); + VerifyTypeAlign(Ty, "argument passed"); + } Function *Callee = dyn_cast(Call.getCalledOperand()->stripPointerCasts()); bool IsIntrinsic = Callee && Callee->isIntrinsic(); if (IsIntrinsic) - Assert(Callee->getValueType() == FTy, - "Intrinsic called with incompatible signature", Call); + Check(Callee->getValueType() == FTy, + "Intrinsic called with incompatible signature", Call); if (Attrs.hasFnAttr(Attribute::Speculatable)) { // Don't allow speculatable on call sites, unless the underlying function // declaration is also speculatable. - Assert(Callee && Callee->isSpeculatable(), - "speculatable attribute may not apply to call sites", Call); + Check(Callee && Callee->isSpeculatable(), + "speculatable attribute may not apply to call sites", Call); } if (Attrs.hasFnAttr(Attribute::Preallocated)) { - Assert(Call.getCalledFunction()->getIntrinsicID() == - Intrinsic::call_preallocated_arg, - "preallocated as a call site attribute can only be on " - "llvm.call.preallocated.arg"); + Check(Call.getCalledFunction()->getIntrinsicID() == + Intrinsic::call_preallocated_arg, + "preallocated as a call site attribute can only be on " + "llvm.call.preallocated.arg"); } // Verify call attributes. @@ -3164,8 +3223,8 @@ void Verifier::visitCallBase(CallBase &Call) { if (Call.hasInAllocaArgument()) { Value *InAllocaArg = Call.getArgOperand(FTy->getNumParams() - 1); if (auto AI = dyn_cast(InAllocaArg->stripInBoundsOffsets())) - Assert(AI->isUsedWithInAlloca(), - "inalloca argument for call has mismatched alloca", AI, Call); + Check(AI->isUsedWithInAlloca(), + "inalloca argument for call has mismatched alloca", AI, Call); } // For each argument of the callsite, if it has the swifterror argument, @@ -3175,31 +3234,30 @@ void Verifier::visitCallBase(CallBase &Call) { if (Call.paramHasAttr(i, Attribute::SwiftError)) { Value *SwiftErrorArg = Call.getArgOperand(i); if (auto AI = dyn_cast(SwiftErrorArg->stripInBoundsOffsets())) { - Assert(AI->isSwiftError(), - "swifterror argument for call has mismatched alloca", AI, Call); + Check(AI->isSwiftError(), + "swifterror argument for call has mismatched alloca", AI, Call); continue; } auto ArgI = dyn_cast(SwiftErrorArg); - Assert(ArgI, - "swifterror argument should come from an alloca or parameter", - SwiftErrorArg, Call); - Assert(ArgI->hasSwiftErrorAttr(), - "swifterror argument for call has mismatched parameter", ArgI, - Call); + Check(ArgI, "swifterror argument should come from an alloca or parameter", + SwiftErrorArg, Call); + Check(ArgI->hasSwiftErrorAttr(), + "swifterror argument for call has mismatched parameter", ArgI, + Call); } if (Attrs.hasParamAttr(i, Attribute::ImmArg)) { // Don't allow immarg on call sites, unless the underlying declaration // also has the matching immarg. - Assert(Callee && Callee->hasParamAttribute(i, Attribute::ImmArg), - "immarg may not apply only to call sites", - Call.getArgOperand(i), Call); + Check(Callee && Callee->hasParamAttribute(i, Attribute::ImmArg), + "immarg may not apply only to call sites", Call.getArgOperand(i), + Call); } if (Call.paramHasAttr(i, Attribute::ImmArg)) { Value *ArgVal = Call.getArgOperand(i); - Assert(isa(ArgVal) || isa(ArgVal), - "immarg operand has non-immediate parameter", ArgVal, Call); + Check(isa(ArgVal) || isa(ArgVal), + "immarg operand has non-immediate parameter", ArgVal, Call); } if (Call.paramHasAttr(i, Attribute::Preallocated)) { @@ -3207,10 +3265,10 @@ void Verifier::visitCallBase(CallBase &Call) { bool hasOB = Call.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0; bool isMustTail = Call.isMustTailCall(); - Assert(hasOB != isMustTail, - "preallocated operand either requires a preallocated bundle or " - "the call to be musttail (but not both)", - ArgVal, Call); + Check(hasOB != isMustTail, + "preallocated operand either requires a preallocated bundle or " + "the call to be musttail (but not both)", + ArgVal, Call); } } @@ -3233,17 +3291,17 @@ void Verifier::visitCallBase(CallBase &Call) { verifyParameterAttrs(ArgAttrs, Ty, &Call); if (ArgAttrs.hasAttribute(Attribute::Nest)) { - Assert(!SawNest, "More than one parameter has attribute nest!", Call); + Check(!SawNest, "More than one parameter has attribute nest!", Call); SawNest = true; } if (ArgAttrs.hasAttribute(Attribute::Returned)) { - Assert(!SawReturned, "More than one parameter has attribute returned!", - Call); - Assert(Ty->canLosslesslyBitCastTo(FTy->getReturnType()), - "Incompatible argument and return types for 'returned' " - "attribute", - Call); + Check(!SawReturned, "More than one parameter has attribute returned!", + Call); + Check(Ty->canLosslesslyBitCastTo(FTy->getReturnType()), + "Incompatible argument and return types for 'returned' " + "attribute", + Call); SawReturned = true; } @@ -3252,32 +3310,32 @@ void Verifier::visitCallBase(CallBase &Call) { if (!Call.getCalledFunction() || Call.getCalledFunction()->getIntrinsicID() != Intrinsic::experimental_gc_statepoint) - Assert(!ArgAttrs.hasAttribute(Attribute::StructRet), - "Attribute 'sret' cannot be used for vararg call arguments!", - Call); + Check(!ArgAttrs.hasAttribute(Attribute::StructRet), + "Attribute 'sret' cannot be used for vararg call arguments!", + Call); if (ArgAttrs.hasAttribute(Attribute::InAlloca)) - Assert(Idx == Call.arg_size() - 1, - "inalloca isn't on the last argument!", Call); + Check(Idx == Call.arg_size() - 1, + "inalloca isn't on the last argument!", Call); } } // Verify that there's no metadata unless it's a direct call to an intrinsic. if (!IsIntrinsic) { for (Type *ParamTy : FTy->params()) { - Assert(!ParamTy->isMetadataTy(), - "Function has metadata parameter but isn't an intrinsic", Call); - Assert(!ParamTy->isTokenTy(), - "Function has token parameter but isn't an intrinsic", Call); + Check(!ParamTy->isMetadataTy(), + "Function has metadata parameter but isn't an intrinsic", Call); + Check(!ParamTy->isTokenTy(), + "Function has token parameter but isn't an intrinsic", Call); } } // Verify that indirect calls don't return tokens. if (!Call.getCalledFunction()) { - Assert(!FTy->getReturnType()->isTokenTy(), - "Return type cannot be token for indirect call!"); - Assert(!FTy->getReturnType()->isX86_AMXTy(), - "Return type cannot be x86_amx for indirect call!"); + Check(!FTy->getReturnType()->isTokenTy(), + "Return type cannot be token for indirect call!"); + Check(!FTy->getReturnType()->isX86_AMXTy(), + "Return type cannot be x86_amx for indirect call!"); } if (Function *F = Call.getCalledFunction()) @@ -3285,69 +3343,83 @@ void Verifier::visitCallBase(CallBase &Call) { visitIntrinsicCall(ID, Call); // Verify that a callsite has at most one "deopt", at most one "funclet", at - // most one "gc-transition", at most one "cfguardtarget", - // and at most one "preallocated" operand bundle. + // most one "gc-transition", at most one "cfguardtarget", at most one + // "preallocated" operand bundle, and at most one "ptrauth" operand bundle. bool FoundDeoptBundle = false, FoundFuncletBundle = false, FoundGCTransitionBundle = false, FoundCFGuardTargetBundle = false, FoundPreallocatedBundle = false, FoundGCLiveBundle = false, + FoundPtrauthBundle = false, FoundAttachedCallBundle = false; for (unsigned i = 0, e = Call.getNumOperandBundles(); i < e; ++i) { OperandBundleUse BU = Call.getOperandBundleAt(i); uint32_t Tag = BU.getTagID(); if (Tag == LLVMContext::OB_deopt) { - Assert(!FoundDeoptBundle, "Multiple deopt operand bundles", Call); + Check(!FoundDeoptBundle, "Multiple deopt operand bundles", Call); FoundDeoptBundle = true; } else if (Tag == LLVMContext::OB_gc_transition) { - Assert(!FoundGCTransitionBundle, "Multiple gc-transition operand bundles", - Call); + Check(!FoundGCTransitionBundle, "Multiple gc-transition operand bundles", + Call); FoundGCTransitionBundle = true; } else if (Tag == LLVMContext::OB_funclet) { - Assert(!FoundFuncletBundle, "Multiple funclet operand bundles", Call); + Check(!FoundFuncletBundle, "Multiple funclet operand bundles", Call); FoundFuncletBundle = true; - Assert(BU.Inputs.size() == 1, - "Expected exactly one funclet bundle operand", Call); - Assert(isa(BU.Inputs.front()), - "Funclet bundle operands should correspond to a FuncletPadInst", - Call); + Check(BU.Inputs.size() == 1, + "Expected exactly one funclet bundle operand", Call); + Check(isa(BU.Inputs.front()), + "Funclet bundle operands should correspond to a FuncletPadInst", + Call); } else if (Tag == LLVMContext::OB_cfguardtarget) { - Assert(!FoundCFGuardTargetBundle, - "Multiple CFGuardTarget operand bundles", Call); + Check(!FoundCFGuardTargetBundle, "Multiple CFGuardTarget operand bundles", + Call); FoundCFGuardTargetBundle = true; - Assert(BU.Inputs.size() == 1, - "Expected exactly one cfguardtarget bundle operand", Call); + Check(BU.Inputs.size() == 1, + "Expected exactly one cfguardtarget bundle operand", Call); + } else if (Tag == LLVMContext::OB_ptrauth) { + Check(!FoundPtrauthBundle, "Multiple ptrauth operand bundles", Call); + FoundPtrauthBundle = true; + Check(BU.Inputs.size() == 2, + "Expected exactly two ptrauth bundle operands", Call); + Check(isa(BU.Inputs[0]) && + BU.Inputs[0]->getType()->isIntegerTy(32), + "Ptrauth bundle key operand must be an i32 constant", Call); + Check(BU.Inputs[1]->getType()->isIntegerTy(64), + "Ptrauth bundle discriminator operand must be an i64", Call); } else if (Tag == LLVMContext::OB_preallocated) { - Assert(!FoundPreallocatedBundle, "Multiple preallocated operand bundles", - Call); + Check(!FoundPreallocatedBundle, "Multiple preallocated operand bundles", + Call); FoundPreallocatedBundle = true; - Assert(BU.Inputs.size() == 1, - "Expected exactly one preallocated bundle operand", Call); + Check(BU.Inputs.size() == 1, + "Expected exactly one preallocated bundle operand", Call); auto Input = dyn_cast(BU.Inputs.front()); - Assert(Input && - Input->getIntrinsicID() == Intrinsic::call_preallocated_setup, - "\"preallocated\" argument must be a token from " - "llvm.call.preallocated.setup", - Call); + Check(Input && + Input->getIntrinsicID() == Intrinsic::call_preallocated_setup, + "\"preallocated\" argument must be a token from " + "llvm.call.preallocated.setup", + Call); } else if (Tag == LLVMContext::OB_gc_live) { - Assert(!FoundGCLiveBundle, "Multiple gc-live operand bundles", - Call); + Check(!FoundGCLiveBundle, "Multiple gc-live operand bundles", Call); FoundGCLiveBundle = true; } else if (Tag == LLVMContext::OB_clang_arc_attachedcall) { - Assert(!FoundAttachedCallBundle, - "Multiple \"clang.arc.attachedcall\" operand bundles", Call); + Check(!FoundAttachedCallBundle, + "Multiple \"clang.arc.attachedcall\" operand bundles", Call); FoundAttachedCallBundle = true; verifyAttachedCallBundle(Call, BU); } } + // Verify that callee and callsite agree on whether to use pointer auth. + Check(!(Call.getCalledFunction() && FoundPtrauthBundle), + "Direct call cannot have a ptrauth bundle", Call); + // Verify that each inlinable callsite of a debug-info-bearing function in a // debug-info-bearing function has a debug location attached to it. Failure to // do so causes assertion failures when the inliner sets up inline scope info. if (Call.getFunction()->getSubprogram() && Call.getCalledFunction() && Call.getCalledFunction()->getSubprogram()) - AssertDI(Call.getDebugLoc(), - "inlinable function call in a function with " - "debug info must have a !dbg location", - Call); + CheckDI(Call.getDebugLoc(), + "inlinable function call in a function with " + "debug info must have a !dbg location", + Call); if (Call.isInlineAsm()) verifyInlineAsmCall(Call); @@ -3357,16 +3429,16 @@ void Verifier::visitCallBase(CallBase &Call) { void Verifier::verifyTailCCMustTailAttrs(const AttrBuilder &Attrs, StringRef Context) { - Assert(!Attrs.contains(Attribute::InAlloca), - Twine("inalloca attribute not allowed in ") + Context); - Assert(!Attrs.contains(Attribute::InReg), - Twine("inreg attribute not allowed in ") + Context); - Assert(!Attrs.contains(Attribute::SwiftError), - Twine("swifterror attribute not allowed in ") + Context); - Assert(!Attrs.contains(Attribute::Preallocated), - Twine("preallocated attribute not allowed in ") + Context); - Assert(!Attrs.contains(Attribute::ByRef), - Twine("byref attribute not allowed in ") + Context); + Check(!Attrs.contains(Attribute::InAlloca), + Twine("inalloca attribute not allowed in ") + Context); + Check(!Attrs.contains(Attribute::InReg), + Twine("inreg attribute not allowed in ") + Context); + Check(!Attrs.contains(Attribute::SwiftError), + Twine("swifterror attribute not allowed in ") + Context); + Check(!Attrs.contains(Attribute::Preallocated), + Twine("preallocated attribute not allowed in ") + Context); + Check(!Attrs.contains(Attribute::ByRef), + Twine("byref attribute not allowed in ") + Context); } /// Two types are "congruent" if they are identical, or if they are both pointer @@ -3403,19 +3475,19 @@ static AttrBuilder getParameterABIAttributes(LLVMContext& C, unsigned I, Attribu } void Verifier::verifyMustTailCall(CallInst &CI) { - Assert(!CI.isInlineAsm(), "cannot use musttail call with inline asm", &CI); + Check(!CI.isInlineAsm(), "cannot use musttail call with inline asm", &CI); Function *F = CI.getParent()->getParent(); FunctionType *CallerTy = F->getFunctionType(); FunctionType *CalleeTy = CI.getFunctionType(); - Assert(CallerTy->isVarArg() == CalleeTy->isVarArg(), - "cannot guarantee tail call due to mismatched varargs", &CI); - Assert(isTypeCongruent(CallerTy->getReturnType(), CalleeTy->getReturnType()), - "cannot guarantee tail call due to mismatched return types", &CI); + Check(CallerTy->isVarArg() == CalleeTy->isVarArg(), + "cannot guarantee tail call due to mismatched varargs", &CI); + Check(isTypeCongruent(CallerTy->getReturnType(), CalleeTy->getReturnType()), + "cannot guarantee tail call due to mismatched return types", &CI); // - The calling conventions of the caller and callee must match. - Assert(F->getCallingConv() == CI.getCallingConv(), - "cannot guarantee tail call due to mismatched calling conv", &CI); + Check(F->getCallingConv() == CI.getCallingConv(), + "cannot guarantee tail call due to mismatched calling conv", &CI); // - The call must immediately precede a :ref:`ret ` instruction, // or a pointer bitcast followed by a ret instruction. @@ -3426,19 +3498,18 @@ void Verifier::verifyMustTailCall(CallInst &CI) { // Handle the optional bitcast. if (BitCastInst *BI = dyn_cast_or_null(Next)) { - Assert(BI->getOperand(0) == RetVal, - "bitcast following musttail call must use the call", BI); + Check(BI->getOperand(0) == RetVal, + "bitcast following musttail call must use the call", BI); RetVal = BI; Next = BI->getNextNode(); } // Check the return. ReturnInst *Ret = dyn_cast_or_null(Next); - Assert(Ret, "musttail call must precede a ret with an optional bitcast", - &CI); - Assert(!Ret->getReturnValue() || Ret->getReturnValue() == RetVal || - isa(Ret->getReturnValue()), - "musttail call result must be returned", Ret); + Check(Ret, "musttail call must precede a ret with an optional bitcast", &CI); + Check(!Ret->getReturnValue() || Ret->getReturnValue() == RetVal || + isa(Ret->getReturnValue()), + "musttail call result must be returned", Ret); AttributeList CallerAttrs = F->getAttributes(); AttributeList CalleeAttrs = CI.getAttributes(); @@ -3460,8 +3531,8 @@ void Verifier::verifyMustTailCall(CallInst &CI) { verifyTailCCMustTailAttrs(ABIAttrs, Context); } // - Varargs functions are not allowed - Assert(!CallerTy->isVarArg(), Twine("cannot guarantee ") + CCName + - " tail call for varargs function"); + Check(!CallerTy->isVarArg(), Twine("cannot guarantee ") + CCName + + " tail call for varargs function"); return; } @@ -3469,11 +3540,10 @@ void Verifier::verifyMustTailCall(CallInst &CI) { // parameters or return types may differ in pointee type, but not // address space. if (!CI.getCalledFunction() || !CI.getCalledFunction()->isIntrinsic()) { - Assert(CallerTy->getNumParams() == CalleeTy->getNumParams(), - "cannot guarantee tail call due to mismatched parameter counts", - &CI); + Check(CallerTy->getNumParams() == CalleeTy->getNumParams(), + "cannot guarantee tail call due to mismatched parameter counts", &CI); for (unsigned I = 0, E = CallerTy->getNumParams(); I != E; ++I) { - Assert( + Check( isTypeCongruent(CallerTy->getParamType(I), CalleeTy->getParamType(I)), "cannot guarantee tail call due to mismatched parameter types", &CI); } @@ -3484,10 +3554,10 @@ void Verifier::verifyMustTailCall(CallInst &CI) { for (unsigned I = 0, E = CallerTy->getNumParams(); I != E; ++I) { AttrBuilder CallerABIAttrs = getParameterABIAttributes(F->getContext(), I, CallerAttrs); AttrBuilder CalleeABIAttrs = getParameterABIAttributes(F->getContext(), I, CalleeAttrs); - Assert(CallerABIAttrs == CalleeABIAttrs, - "cannot guarantee tail call due to mismatched ABI impacting " - "function attributes", - &CI, CI.getOperand(I)); + Check(CallerABIAttrs == CalleeABIAttrs, + "cannot guarantee tail call due to mismatched ABI impacting " + "function attributes", + &CI, CI.getOperand(I)); } } @@ -3503,7 +3573,7 @@ void Verifier::visitInvokeInst(InvokeInst &II) { // Verify that the first non-PHI instruction of the unwind destination is an // exception handling instruction. - Assert( + Check( II.getUnwindDest()->isEHPad(), "The unwind destination does not have an exception handling instruction!", &II); @@ -3514,17 +3584,17 @@ void Verifier::visitInvokeInst(InvokeInst &II) { /// visitUnaryOperator - Check the argument to the unary operator. /// void Verifier::visitUnaryOperator(UnaryOperator &U) { - Assert(U.getType() == U.getOperand(0)->getType(), - "Unary operators must have same type for" - "operands and result!", - &U); + Check(U.getType() == U.getOperand(0)->getType(), + "Unary operators must have same type for" + "operands and result!", + &U); switch (U.getOpcode()) { // Check that floating-point arithmetic operators are only used with // floating-point operands. case Instruction::FNeg: - Assert(U.getType()->isFPOrFPVectorTy(), - "FNeg operator only works with float types!", &U); + Check(U.getType()->isFPOrFPVectorTy(), + "FNeg operator only works with float types!", &U); break; default: llvm_unreachable("Unknown UnaryOperator opcode!"); @@ -3537,8 +3607,8 @@ void Verifier::visitUnaryOperator(UnaryOperator &U) { /// of the same type! /// void Verifier::visitBinaryOperator(BinaryOperator &B) { - Assert(B.getOperand(0)->getType() == B.getOperand(1)->getType(), - "Both operands to a binary operator are not of the same type!", &B); + Check(B.getOperand(0)->getType() == B.getOperand(1)->getType(), + "Both operands to a binary operator are not of the same type!", &B); switch (B.getOpcode()) { // Check that integer arithmetic operators are only used with @@ -3550,12 +3620,12 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { case Instruction::UDiv: case Instruction::SRem: case Instruction::URem: - Assert(B.getType()->isIntOrIntVectorTy(), - "Integer arithmetic operators only work with integral types!", &B); - Assert(B.getType() == B.getOperand(0)->getType(), - "Integer arithmetic operators must have same type " - "for operands and result!", - &B); + Check(B.getType()->isIntOrIntVectorTy(), + "Integer arithmetic operators only work with integral types!", &B); + Check(B.getType() == B.getOperand(0)->getType(), + "Integer arithmetic operators must have same type " + "for operands and result!", + &B); break; // Check that floating-point arithmetic operators are only used with // floating-point operands. @@ -3564,32 +3634,31 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { case Instruction::FMul: case Instruction::FDiv: case Instruction::FRem: - Assert(B.getType()->isFPOrFPVectorTy(), - "Floating-point arithmetic operators only work with " - "floating-point types!", - &B); - Assert(B.getType() == B.getOperand(0)->getType(), - "Floating-point arithmetic operators must have same type " - "for operands and result!", - &B); + Check(B.getType()->isFPOrFPVectorTy(), + "Floating-point arithmetic operators only work with " + "floating-point types!", + &B); + Check(B.getType() == B.getOperand(0)->getType(), + "Floating-point arithmetic operators must have same type " + "for operands and result!", + &B); break; // Check that logical operators are only used with integral operands. case Instruction::And: case Instruction::Or: case Instruction::Xor: - Assert(B.getType()->isIntOrIntVectorTy(), - "Logical operators only work with integral types!", &B); - Assert(B.getType() == B.getOperand(0)->getType(), - "Logical operators must have same type for operands and result!", - &B); + Check(B.getType()->isIntOrIntVectorTy(), + "Logical operators only work with integral types!", &B); + Check(B.getType() == B.getOperand(0)->getType(), + "Logical operators must have same type for operands and result!", &B); break; case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - Assert(B.getType()->isIntOrIntVectorTy(), - "Shifts only work with integral types!", &B); - Assert(B.getType() == B.getOperand(0)->getType(), - "Shift return type must be same as operands!", &B); + Check(B.getType()->isIntOrIntVectorTy(), + "Shifts only work with integral types!", &B); + Check(B.getType() == B.getOperand(0)->getType(), + "Shift return type must be same as operands!", &B); break; default: llvm_unreachable("Unknown BinaryOperator opcode!"); @@ -3602,14 +3671,13 @@ void Verifier::visitICmpInst(ICmpInst &IC) { // Check that the operands are the same type Type *Op0Ty = IC.getOperand(0)->getType(); Type *Op1Ty = IC.getOperand(1)->getType(); - Assert(Op0Ty == Op1Ty, - "Both operands to ICmp instruction are not of the same type!", &IC); + Check(Op0Ty == Op1Ty, + "Both operands to ICmp instruction are not of the same type!", &IC); // Check that the operands are the right type - Assert(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPtrOrPtrVectorTy(), - "Invalid operand types for ICmp instruction", &IC); + Check(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPtrOrPtrVectorTy(), + "Invalid operand types for ICmp instruction", &IC); // Check that the predicate is valid. - Assert(IC.isIntPredicate(), - "Invalid predicate in ICmp instruction!", &IC); + Check(IC.isIntPredicate(), "Invalid predicate in ICmp instruction!", &IC); visitInstruction(IC); } @@ -3618,63 +3686,61 @@ void Verifier::visitFCmpInst(FCmpInst &FC) { // Check that the operands are the same type Type *Op0Ty = FC.getOperand(0)->getType(); Type *Op1Ty = FC.getOperand(1)->getType(); - Assert(Op0Ty == Op1Ty, - "Both operands to FCmp instruction are not of the same type!", &FC); + Check(Op0Ty == Op1Ty, + "Both operands to FCmp instruction are not of the same type!", &FC); // Check that the operands are the right type - Assert(Op0Ty->isFPOrFPVectorTy(), - "Invalid operand types for FCmp instruction", &FC); + Check(Op0Ty->isFPOrFPVectorTy(), "Invalid operand types for FCmp instruction", + &FC); // Check that the predicate is valid. - Assert(FC.isFPPredicate(), - "Invalid predicate in FCmp instruction!", &FC); + Check(FC.isFPPredicate(), "Invalid predicate in FCmp instruction!", &FC); visitInstruction(FC); } void Verifier::visitExtractElementInst(ExtractElementInst &EI) { - Assert( - ExtractElementInst::isValidOperands(EI.getOperand(0), EI.getOperand(1)), - "Invalid extractelement operands!", &EI); + Check(ExtractElementInst::isValidOperands(EI.getOperand(0), EI.getOperand(1)), + "Invalid extractelement operands!", &EI); visitInstruction(EI); } void Verifier::visitInsertElementInst(InsertElementInst &IE) { - Assert(InsertElementInst::isValidOperands(IE.getOperand(0), IE.getOperand(1), - IE.getOperand(2)), - "Invalid insertelement operands!", &IE); + Check(InsertElementInst::isValidOperands(IE.getOperand(0), IE.getOperand(1), + IE.getOperand(2)), + "Invalid insertelement operands!", &IE); visitInstruction(IE); } void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) { - Assert(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1), - SV.getShuffleMask()), - "Invalid shufflevector operands!", &SV); + Check(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1), + SV.getShuffleMask()), + "Invalid shufflevector operands!", &SV); visitInstruction(SV); } void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { Type *TargetTy = GEP.getPointerOperandType()->getScalarType(); - Assert(isa(TargetTy), - "GEP base pointer is not a vector or a vector of pointers", &GEP); - Assert(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP); + Check(isa(TargetTy), + "GEP base pointer is not a vector or a vector of pointers", &GEP); + Check(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP); SmallVector Idxs(GEP.indices()); - Assert(all_of( - Idxs, [](Value* V) { return V->getType()->isIntOrIntVectorTy(); }), + Check( + all_of(Idxs, [](Value *V) { return V->getType()->isIntOrIntVectorTy(); }), "GEP indexes must be integers", &GEP); Type *ElTy = GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs); - Assert(ElTy, "Invalid indices for GEP pointer type!", &GEP); + Check(ElTy, "Invalid indices for GEP pointer type!", &GEP); - Assert(GEP.getType()->isPtrOrPtrVectorTy() && - GEP.getResultElementType() == ElTy, - "GEP is not of right type for indices!", &GEP, ElTy); + Check(GEP.getType()->isPtrOrPtrVectorTy() && + GEP.getResultElementType() == ElTy, + "GEP is not of right type for indices!", &GEP, ElTy); if (auto *GEPVTy = dyn_cast(GEP.getType())) { // Additional checks for vector GEPs. ElementCount GEPWidth = GEPVTy->getElementCount(); if (GEP.getPointerOperandType()->isVectorTy()) - Assert( + Check( GEPWidth == cast(GEP.getPointerOperandType())->getElementCount(), "Vector GEP result width doesn't match operand's", &GEP); @@ -3682,16 +3748,16 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { Type *IndexTy = Idx->getType(); if (auto *IndexVTy = dyn_cast(IndexTy)) { ElementCount IndexWidth = IndexVTy->getElementCount(); - Assert(IndexWidth == GEPWidth, "Invalid GEP index vector width", &GEP); + Check(IndexWidth == GEPWidth, "Invalid GEP index vector width", &GEP); } - Assert(IndexTy->isIntOrIntVectorTy(), - "All GEP indices should be of integer type"); + Check(IndexTy->isIntOrIntVectorTy(), + "All GEP indices should be of integer type"); } } if (auto *PTy = dyn_cast(GEP.getType())) { - Assert(GEP.getAddressSpace() == PTy->getAddressSpace(), - "GEP address space doesn't match type", &GEP); + Check(GEP.getAddressSpace() == PTy->getAddressSpace(), + "GEP address space doesn't match type", &GEP); } visitInstruction(GEP); @@ -3706,33 +3772,33 @@ void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) { "precondition violation"); unsigned NumOperands = Range->getNumOperands(); - Assert(NumOperands % 2 == 0, "Unfinished range!", Range); + Check(NumOperands % 2 == 0, "Unfinished range!", Range); unsigned NumRanges = NumOperands / 2; - Assert(NumRanges >= 1, "It should have at least one range!", Range); + Check(NumRanges >= 1, "It should have at least one range!", Range); ConstantRange LastRange(1, true); // Dummy initial value for (unsigned i = 0; i < NumRanges; ++i) { ConstantInt *Low = mdconst::dyn_extract(Range->getOperand(2 * i)); - Assert(Low, "The lower limit must be an integer!", Low); + Check(Low, "The lower limit must be an integer!", Low); ConstantInt *High = mdconst::dyn_extract(Range->getOperand(2 * i + 1)); - Assert(High, "The upper limit must be an integer!", High); - Assert(High->getType() == Low->getType() && High->getType() == Ty, - "Range types must match instruction type!", &I); + Check(High, "The upper limit must be an integer!", High); + Check(High->getType() == Low->getType() && High->getType() == Ty, + "Range types must match instruction type!", &I); APInt HighV = High->getValue(); APInt LowV = Low->getValue(); ConstantRange CurRange(LowV, HighV); - Assert(!CurRange.isEmptySet() && !CurRange.isFullSet(), - "Range must not be empty!", Range); + Check(!CurRange.isEmptySet() && !CurRange.isFullSet(), + "Range must not be empty!", Range); if (i != 0) { - Assert(CurRange.intersectWith(LastRange).isEmptySet(), - "Intervals are overlapping", Range); - Assert(LowV.sgt(LastRange.getLower()), "Intervals are not in order", - Range); - Assert(!isContiguous(CurRange, LastRange), "Intervals are contiguous", - Range); + Check(CurRange.intersectWith(LastRange).isEmptySet(), + "Intervals are overlapping", Range); + Check(LowV.sgt(LastRange.getLower()), "Intervals are not in order", + Range); + Check(!isContiguous(CurRange, LastRange), "Intervals are contiguous", + Range); } LastRange = ConstantRange(LowV, HighV); } @@ -3742,41 +3808,41 @@ void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) { APInt FirstHigh = mdconst::dyn_extract(Range->getOperand(1))->getValue(); ConstantRange FirstRange(FirstLow, FirstHigh); - Assert(FirstRange.intersectWith(LastRange).isEmptySet(), - "Intervals are overlapping", Range); - Assert(!isContiguous(FirstRange, LastRange), "Intervals are contiguous", - Range); + Check(FirstRange.intersectWith(LastRange).isEmptySet(), + "Intervals are overlapping", Range); + Check(!isContiguous(FirstRange, LastRange), "Intervals are contiguous", + Range); } } void Verifier::checkAtomicMemAccessSize(Type *Ty, const Instruction *I) { unsigned Size = DL.getTypeSizeInBits(Ty); - Assert(Size >= 8, "atomic memory access' size must be byte-sized", Ty, I); - Assert(!(Size & (Size - 1)), - "atomic memory access' operand must have a power-of-two size", Ty, I); + Check(Size >= 8, "atomic memory access' size must be byte-sized", Ty, I); + Check(!(Size & (Size - 1)), + "atomic memory access' operand must have a power-of-two size", Ty, I); } void Verifier::visitLoadInst(LoadInst &LI) { PointerType *PTy = dyn_cast(LI.getOperand(0)->getType()); - Assert(PTy, "Load operand must be a pointer.", &LI); + Check(PTy, "Load operand must be a pointer.", &LI); Type *ElTy = LI.getType(); if (MaybeAlign A = LI.getAlign()) { - Assert(A->value() <= Value::MaximumAlignment, - "huge alignment values are unsupported", &LI); + Check(A->value() <= Value::MaximumAlignment, + "huge alignment values are unsupported", &LI); } - Assert(ElTy->isSized(), "loading unsized types is not allowed", &LI); + Check(ElTy->isSized(), "loading unsized types is not allowed", &LI); if (LI.isAtomic()) { - Assert(LI.getOrdering() != AtomicOrdering::Release && - LI.getOrdering() != AtomicOrdering::AcquireRelease, - "Load cannot have Release ordering", &LI); - Assert(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(), - "atomic load operand must have integer, pointer, or floating point " - "type!", - ElTy, &LI); + Check(LI.getOrdering() != AtomicOrdering::Release && + LI.getOrdering() != AtomicOrdering::AcquireRelease, + "Load cannot have Release ordering", &LI); + Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(), + "atomic load operand must have integer, pointer, or floating point " + "type!", + ElTy, &LI); checkAtomicMemAccessSize(ElTy, &LI); } else { - Assert(LI.getSyncScopeID() == SyncScope::System, - "Non-atomic load cannot have SynchronizationScope specified", &LI); + Check(LI.getSyncScopeID() == SyncScope::System, + "Non-atomic load cannot have SynchronizationScope specified", &LI); } visitInstruction(LI); @@ -3784,27 +3850,27 @@ void Verifier::visitLoadInst(LoadInst &LI) { void Verifier::visitStoreInst(StoreInst &SI) { PointerType *PTy = dyn_cast(SI.getOperand(1)->getType()); - Assert(PTy, "Store operand must be a pointer.", &SI); + Check(PTy, "Store operand must be a pointer.", &SI); Type *ElTy = SI.getOperand(0)->getType(); - Assert(PTy->isOpaqueOrPointeeTypeMatches(ElTy), - "Stored value type does not match pointer operand type!", &SI, ElTy); + Check(PTy->isOpaqueOrPointeeTypeMatches(ElTy), + "Stored value type does not match pointer operand type!", &SI, ElTy); if (MaybeAlign A = SI.getAlign()) { - Assert(A->value() <= Value::MaximumAlignment, - "huge alignment values are unsupported", &SI); + Check(A->value() <= Value::MaximumAlignment, + "huge alignment values are unsupported", &SI); } - Assert(ElTy->isSized(), "storing unsized types is not allowed", &SI); + Check(ElTy->isSized(), "storing unsized types is not allowed", &SI); if (SI.isAtomic()) { - Assert(SI.getOrdering() != AtomicOrdering::Acquire && - SI.getOrdering() != AtomicOrdering::AcquireRelease, - "Store cannot have Acquire ordering", &SI); - Assert(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(), - "atomic store operand must have integer, pointer, or floating point " - "type!", - ElTy, &SI); + Check(SI.getOrdering() != AtomicOrdering::Acquire && + SI.getOrdering() != AtomicOrdering::AcquireRelease, + "Store cannot have Acquire ordering", &SI); + Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(), + "atomic store operand must have integer, pointer, or floating point " + "type!", + ElTy, &SI); checkAtomicMemAccessSize(ElTy, &SI); } else { - Assert(SI.getSyncScopeID() == SyncScope::System, - "Non-atomic store cannot have SynchronizationScope specified", &SI); + Check(SI.getSyncScopeID() == SyncScope::System, + "Non-atomic store cannot have SynchronizationScope specified", &SI); } visitInstruction(SI); } @@ -3814,10 +3880,10 @@ void Verifier::verifySwiftErrorCall(CallBase &Call, const Value *SwiftErrorVal) { for (const auto &I : llvm::enumerate(Call.args())) { if (I.value() == SwiftErrorVal) { - Assert(Call.paramHasAttr(I.index(), Attribute::SwiftError), - "swifterror value when used in a callsite should be marked " - "with swifterror attribute", - SwiftErrorVal, Call); + Check(Call.paramHasAttr(I.index(), Attribute::SwiftError), + "swifterror value when used in a callsite should be marked " + "with swifterror attribute", + SwiftErrorVal, Call); } } } @@ -3826,16 +3892,17 @@ void Verifier::verifySwiftErrorValue(const Value *SwiftErrorVal) { // Check that swifterror value is only used by loads, stores, or as // a swifterror argument. for (const User *U : SwiftErrorVal->users()) { - Assert(isa(U) || isa(U) || isa(U) || - isa(U), - "swifterror value can only be loaded and stored from, or " - "as a swifterror argument!", - SwiftErrorVal, U); + Check(isa(U) || isa(U) || isa(U) || + isa(U), + "swifterror value can only be loaded and stored from, or " + "as a swifterror argument!", + SwiftErrorVal, U); // If it is used by a store, check it is the second operand. if (auto StoreI = dyn_cast(U)) - Assert(StoreI->getOperand(1) == SwiftErrorVal, - "swifterror value should be the second operand when used " - "by stores", SwiftErrorVal, U); + Check(StoreI->getOperand(1) == SwiftErrorVal, + "swifterror value should be the second operand when used " + "by stores", + SwiftErrorVal, U); if (auto *Call = dyn_cast(U)) verifySwiftErrorCall(*const_cast(Call), SwiftErrorVal); } @@ -3843,16 +3910,20 @@ void Verifier::verifySwiftErrorValue(const Value *SwiftErrorVal) { void Verifier::visitAllocaInst(AllocaInst &AI) { SmallPtrSet Visited; - Assert(AI.getAllocatedType()->isSized(&Visited), - "Cannot allocate unsized type", &AI); - Assert(AI.getArraySize()->getType()->isIntegerTy(), - "Alloca array size must have integer type", &AI); + Check(AI.getAllocatedType()->isSized(&Visited), + "Cannot allocate unsized type", &AI); + Check(AI.getArraySize()->getType()->isIntegerTy(), + "Alloca array size must have integer type", &AI); if (MaybeAlign A = AI.getAlign()) { - Assert(A->value() <= Value::MaximumAlignment, - "huge alignment values are unsupported", &AI); + Check(A->value() <= Value::MaximumAlignment, + "huge alignment values are unsupported", &AI); } if (AI.isSwiftError()) { + Check(AI.getAllocatedType()->isPointerTy(), + "swifterror alloca must have pointer type", &AI); + Check(!AI.isArrayAllocation(), + "swifterror alloca must not be array allocation", &AI); verifySwiftErrorValue(&AI); } @@ -3861,64 +3932,65 @@ void Verifier::visitAllocaInst(AllocaInst &AI) { void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) { Type *ElTy = CXI.getOperand(1)->getType(); - Assert(ElTy->isIntOrPtrTy(), - "cmpxchg operand must have integer or pointer type", ElTy, &CXI); + Check(ElTy->isIntOrPtrTy(), + "cmpxchg operand must have integer or pointer type", ElTy, &CXI); checkAtomicMemAccessSize(ElTy, &CXI); visitInstruction(CXI); } void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) { - Assert(RMWI.getOrdering() != AtomicOrdering::Unordered, - "atomicrmw instructions cannot be unordered.", &RMWI); + Check(RMWI.getOrdering() != AtomicOrdering::Unordered, + "atomicrmw instructions cannot be unordered.", &RMWI); auto Op = RMWI.getOperation(); Type *ElTy = RMWI.getOperand(1)->getType(); if (Op == AtomicRMWInst::Xchg) { - Assert(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(), "atomicrmw " + - AtomicRMWInst::getOperationName(Op) + - " operand must have integer or floating point type!", - &RMWI, ElTy); + Check(ElTy->isIntegerTy() || ElTy->isFloatingPointTy() || + ElTy->isPointerTy(), + "atomicrmw " + AtomicRMWInst::getOperationName(Op) + + " operand must have integer or floating point type!", + &RMWI, ElTy); } else if (AtomicRMWInst::isFPOperation(Op)) { - Assert(ElTy->isFloatingPointTy(), "atomicrmw " + - AtomicRMWInst::getOperationName(Op) + - " operand must have floating point type!", - &RMWI, ElTy); + Check(ElTy->isFloatingPointTy(), + "atomicrmw " + AtomicRMWInst::getOperationName(Op) + + " operand must have floating point type!", + &RMWI, ElTy); } else { - Assert(ElTy->isIntegerTy(), "atomicrmw " + - AtomicRMWInst::getOperationName(Op) + - " operand must have integer type!", - &RMWI, ElTy); + Check(ElTy->isIntegerTy(), + "atomicrmw " + AtomicRMWInst::getOperationName(Op) + + " operand must have integer type!", + &RMWI, ElTy); } checkAtomicMemAccessSize(ElTy, &RMWI); - Assert(AtomicRMWInst::FIRST_BINOP <= Op && Op <= AtomicRMWInst::LAST_BINOP, - "Invalid binary operation!", &RMWI); + Check(AtomicRMWInst::FIRST_BINOP <= Op && Op <= AtomicRMWInst::LAST_BINOP, + "Invalid binary operation!", &RMWI); visitInstruction(RMWI); } void Verifier::visitFenceInst(FenceInst &FI) { const AtomicOrdering Ordering = FI.getOrdering(); - Assert(Ordering == AtomicOrdering::Acquire || - Ordering == AtomicOrdering::Release || - Ordering == AtomicOrdering::AcquireRelease || - Ordering == AtomicOrdering::SequentiallyConsistent, - "fence instructions may only have acquire, release, acq_rel, or " - "seq_cst ordering.", - &FI); + Check(Ordering == AtomicOrdering::Acquire || + Ordering == AtomicOrdering::Release || + Ordering == AtomicOrdering::AcquireRelease || + Ordering == AtomicOrdering::SequentiallyConsistent, + "fence instructions may only have acquire, release, acq_rel, or " + "seq_cst ordering.", + &FI); visitInstruction(FI); } void Verifier::visitExtractValueInst(ExtractValueInst &EVI) { - Assert(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(), - EVI.getIndices()) == EVI.getType(), - "Invalid ExtractValueInst operands!", &EVI); + Check(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(), + EVI.getIndices()) == EVI.getType(), + "Invalid ExtractValueInst operands!", &EVI); visitInstruction(EVI); } void Verifier::visitInsertValueInst(InsertValueInst &IVI) { - Assert(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(), - IVI.getIndices()) == - IVI.getOperand(1)->getType(), - "Invalid InsertValueInst operands!", &IVI); + Check(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(), + IVI.getIndices()) == + IVI.getOperand(1)->getType(), + "Invalid InsertValueInst operands!", &IVI); visitInstruction(IVI); } @@ -3936,7 +4008,7 @@ void Verifier::visitEHPadPredecessors(Instruction &I) { BasicBlock *BB = I.getParent(); Function *F = BB->getParent(); - Assert(BB != &F->getEntryBlock(), "EH pad cannot be in entry block.", &I); + Check(BB != &F->getEntryBlock(), "EH pad cannot be in entry block.", &I); if (auto *LPI = dyn_cast(&I)) { // The landingpad instruction defines its parent as a landing pad block. The @@ -3944,22 +4016,22 @@ void Verifier::visitEHPadPredecessors(Instruction &I) { // invoke. for (BasicBlock *PredBB : predecessors(BB)) { const auto *II = dyn_cast(PredBB->getTerminator()); - Assert(II && II->getUnwindDest() == BB && II->getNormalDest() != BB, - "Block containing LandingPadInst must be jumped to " - "only by the unwind edge of an invoke.", - LPI); + Check(II && II->getUnwindDest() == BB && II->getNormalDest() != BB, + "Block containing LandingPadInst must be jumped to " + "only by the unwind edge of an invoke.", + LPI); } return; } if (auto *CPI = dyn_cast(&I)) { if (!pred_empty(BB)) - Assert(BB->getUniquePredecessor() == CPI->getCatchSwitch()->getParent(), - "Block containg CatchPadInst must be jumped to " - "only by its catchswitch.", - CPI); - Assert(BB != CPI->getCatchSwitch()->getUnwindDest(), - "Catchswitch cannot unwind to one of its catchpads", - CPI->getCatchSwitch(), CPI); + Check(BB->getUniquePredecessor() == CPI->getCatchSwitch()->getParent(), + "Block containg CatchPadInst must be jumped to " + "only by its catchswitch.", + CPI); + Check(BB != CPI->getCatchSwitch()->getUnwindDest(), + "Catchswitch cannot unwind to one of its catchpads", + CPI->getCatchSwitch(), CPI); return; } @@ -3971,39 +4043,39 @@ void Verifier::visitEHPadPredecessors(Instruction &I) { Instruction *TI = PredBB->getTerminator(); Value *FromPad; if (auto *II = dyn_cast(TI)) { - Assert(II->getUnwindDest() == BB && II->getNormalDest() != BB, - "EH pad must be jumped to via an unwind edge", ToPad, II); + Check(II->getUnwindDest() == BB && II->getNormalDest() != BB, + "EH pad must be jumped to via an unwind edge", ToPad, II); if (auto Bundle = II->getOperandBundle(LLVMContext::OB_funclet)) FromPad = Bundle->Inputs[0]; else FromPad = ConstantTokenNone::get(II->getContext()); } else if (auto *CRI = dyn_cast(TI)) { FromPad = CRI->getOperand(0); - Assert(FromPad != ToPadParent, "A cleanupret must exit its cleanup", CRI); + Check(FromPad != ToPadParent, "A cleanupret must exit its cleanup", CRI); } else if (auto *CSI = dyn_cast(TI)) { FromPad = CSI; } else { - Assert(false, "EH pad must be jumped to via an unwind edge", ToPad, TI); + Check(false, "EH pad must be jumped to via an unwind edge", ToPad, TI); } // The edge may exit from zero or more nested pads. SmallSet Seen; for (;; FromPad = getParentPad(FromPad)) { - Assert(FromPad != ToPad, - "EH pad cannot handle exceptions raised within it", FromPad, TI); + Check(FromPad != ToPad, + "EH pad cannot handle exceptions raised within it", FromPad, TI); if (FromPad == ToPadParent) { // This is a legal unwind edge. break; } - Assert(!isa(FromPad), - "A single unwind edge may only enter one EH pad", TI); - Assert(Seen.insert(FromPad).second, - "EH pad jumps through a cycle of pads", FromPad); + Check(!isa(FromPad), + "A single unwind edge may only enter one EH pad", TI); + Check(Seen.insert(FromPad).second, "EH pad jumps through a cycle of pads", + FromPad); // This will be diagnosed on the corresponding instruction already. We // need the extra check here to make sure getParentPad() works. - Assert(isa(FromPad) || isa(FromPad), - "Parent pad must be catchpad/cleanuppad/catchswitch", TI); + Check(isa(FromPad) || isa(FromPad), + "Parent pad must be catchpad/cleanuppad/catchswitch", TI); } } } @@ -4011,38 +4083,37 @@ void Verifier::visitEHPadPredecessors(Instruction &I) { void Verifier::visitLandingPadInst(LandingPadInst &LPI) { // The landingpad instruction is ill-formed if it doesn't have any clauses and // isn't a cleanup. - Assert(LPI.getNumClauses() > 0 || LPI.isCleanup(), - "LandingPadInst needs at least one clause or to be a cleanup.", &LPI); + Check(LPI.getNumClauses() > 0 || LPI.isCleanup(), + "LandingPadInst needs at least one clause or to be a cleanup.", &LPI); visitEHPadPredecessors(LPI); if (!LandingPadResultTy) LandingPadResultTy = LPI.getType(); else - Assert(LandingPadResultTy == LPI.getType(), - "The landingpad instruction should have a consistent result type " - "inside a function.", - &LPI); + Check(LandingPadResultTy == LPI.getType(), + "The landingpad instruction should have a consistent result type " + "inside a function.", + &LPI); Function *F = LPI.getParent()->getParent(); - Assert(F->hasPersonalityFn(), - "LandingPadInst needs to be in a function with a personality.", &LPI); + Check(F->hasPersonalityFn(), + "LandingPadInst needs to be in a function with a personality.", &LPI); // The landingpad instruction must be the first non-PHI instruction in the // block. - Assert(LPI.getParent()->getLandingPadInst() == &LPI, - "LandingPadInst not the first non-PHI instruction in the block.", - &LPI); + Check(LPI.getParent()->getLandingPadInst() == &LPI, + "LandingPadInst not the first non-PHI instruction in the block.", &LPI); for (unsigned i = 0, e = LPI.getNumClauses(); i < e; ++i) { Constant *Clause = LPI.getClause(i); if (LPI.isCatch(i)) { - Assert(isa(Clause->getType()), - "Catch operand does not have pointer type!", &LPI); + Check(isa(Clause->getType()), + "Catch operand does not have pointer type!", &LPI); } else { - Assert(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI); - Assert(isa(Clause) || isa(Clause), - "Filter operand is not an array of constants!", &LPI); + Check(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI); + Check(isa(Clause) || isa(Clause), + "Filter operand is not an array of constants!", &LPI); } } @@ -4050,16 +4121,16 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) { } void Verifier::visitResumeInst(ResumeInst &RI) { - Assert(RI.getFunction()->hasPersonalityFn(), - "ResumeInst needs to be in a function with a personality.", &RI); + Check(RI.getFunction()->hasPersonalityFn(), + "ResumeInst needs to be in a function with a personality.", &RI); if (!LandingPadResultTy) LandingPadResultTy = RI.getValue()->getType(); else - Assert(LandingPadResultTy == RI.getValue()->getType(), - "The resume instruction should have a consistent result type " - "inside a function.", - &RI); + Check(LandingPadResultTy == RI.getValue()->getType(), + "The resume instruction should have a consistent result type " + "inside a function.", + &RI); visitTerminator(RI); } @@ -4068,26 +4139,26 @@ void Verifier::visitCatchPadInst(CatchPadInst &CPI) { BasicBlock *BB = CPI.getParent(); Function *F = BB->getParent(); - Assert(F->hasPersonalityFn(), - "CatchPadInst needs to be in a function with a personality.", &CPI); + Check(F->hasPersonalityFn(), + "CatchPadInst needs to be in a function with a personality.", &CPI); - Assert(isa(CPI.getParentPad()), - "CatchPadInst needs to be directly nested in a CatchSwitchInst.", - CPI.getParentPad()); + Check(isa(CPI.getParentPad()), + "CatchPadInst needs to be directly nested in a CatchSwitchInst.", + CPI.getParentPad()); // The catchpad instruction must be the first non-PHI instruction in the // block. - Assert(BB->getFirstNonPHI() == &CPI, - "CatchPadInst not the first non-PHI instruction in the block.", &CPI); + Check(BB->getFirstNonPHI() == &CPI, + "CatchPadInst not the first non-PHI instruction in the block.", &CPI); visitEHPadPredecessors(CPI); visitFuncletPadInst(CPI); } void Verifier::visitCatchReturnInst(CatchReturnInst &CatchReturn) { - Assert(isa(CatchReturn.getOperand(0)), - "CatchReturnInst needs to be provided a CatchPad", &CatchReturn, - CatchReturn.getOperand(0)); + Check(isa(CatchReturn.getOperand(0)), + "CatchReturnInst needs to be provided a CatchPad", &CatchReturn, + CatchReturn.getOperand(0)); visitTerminator(CatchReturn); } @@ -4096,18 +4167,17 @@ void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) { BasicBlock *BB = CPI.getParent(); Function *F = BB->getParent(); - Assert(F->hasPersonalityFn(), - "CleanupPadInst needs to be in a function with a personality.", &CPI); + Check(F->hasPersonalityFn(), + "CleanupPadInst needs to be in a function with a personality.", &CPI); // The cleanuppad instruction must be the first non-PHI instruction in the // block. - Assert(BB->getFirstNonPHI() == &CPI, - "CleanupPadInst not the first non-PHI instruction in the block.", - &CPI); + Check(BB->getFirstNonPHI() == &CPI, + "CleanupPadInst not the first non-PHI instruction in the block.", &CPI); auto *ParentPad = CPI.getParentPad(); - Assert(isa(ParentPad) || isa(ParentPad), - "CleanupPadInst has an invalid parent.", &CPI); + Check(isa(ParentPad) || isa(ParentPad), + "CleanupPadInst has an invalid parent.", &CPI); visitEHPadPredecessors(CPI); visitFuncletPadInst(CPI); @@ -4121,8 +4191,8 @@ void Verifier::visitFuncletPadInst(FuncletPadInst &FPI) { while (!Worklist.empty()) { FuncletPadInst *CurrentPad = Worklist.pop_back_val(); - Assert(Seen.insert(CurrentPad).second, - "FuncletPadInst must not be nested within itself", CurrentPad); + Check(Seen.insert(CurrentPad).second, + "FuncletPadInst must not be nested within itself", CurrentPad); Value *UnresolvedAncestorPad = nullptr; for (User *U : CurrentPad->users()) { BasicBlock *UnwindDest; @@ -4150,7 +4220,7 @@ void Verifier::visitFuncletPadInst(FuncletPadInst &FPI) { Worklist.push_back(CPI); continue; } else { - Assert(isa(U), "Bogus funclet pad use", U); + Check(isa(U), "Bogus funclet pad use", U); continue; } @@ -4200,10 +4270,11 @@ void Verifier::visitFuncletPadInst(FuncletPadInst &FPI) { // This unwind edge exits FPI. Make sure it agrees with other // such edges. if (FirstUser) { - Assert(UnwindPad == FirstUnwindPad, "Unwind edges out of a funclet " - "pad must have the same unwind " - "dest", - &FPI, U, FirstUser); + Check(UnwindPad == FirstUnwindPad, + "Unwind edges out of a funclet " + "pad must have the same unwind " + "dest", + &FPI, U, FirstUser); } else { FirstUser = U; FirstUnwindPad = UnwindPad; @@ -4262,10 +4333,10 @@ void Verifier::visitFuncletPadInst(FuncletPadInst &FPI) { SwitchUnwindPad = SwitchUnwindDest->getFirstNonPHI(); else SwitchUnwindPad = ConstantTokenNone::get(FPI.getContext()); - Assert(SwitchUnwindPad == FirstUnwindPad, - "Unwind edges out of a catch must have the same unwind dest as " - "the parent catchswitch", - &FPI, FirstUser, CatchSwitch); + Check(SwitchUnwindPad == FirstUnwindPad, + "Unwind edges out of a catch must have the same unwind dest as " + "the parent catchswitch", + &FPI, FirstUser, CatchSwitch); } } @@ -4276,38 +4347,38 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) { BasicBlock *BB = CatchSwitch.getParent(); Function *F = BB->getParent(); - Assert(F->hasPersonalityFn(), - "CatchSwitchInst needs to be in a function with a personality.", - &CatchSwitch); + Check(F->hasPersonalityFn(), + "CatchSwitchInst needs to be in a function with a personality.", + &CatchSwitch); // The catchswitch instruction must be the first non-PHI instruction in the // block. - Assert(BB->getFirstNonPHI() == &CatchSwitch, - "CatchSwitchInst not the first non-PHI instruction in the block.", - &CatchSwitch); + Check(BB->getFirstNonPHI() == &CatchSwitch, + "CatchSwitchInst not the first non-PHI instruction in the block.", + &CatchSwitch); auto *ParentPad = CatchSwitch.getParentPad(); - Assert(isa(ParentPad) || isa(ParentPad), - "CatchSwitchInst has an invalid parent.", ParentPad); + Check(isa(ParentPad) || isa(ParentPad), + "CatchSwitchInst has an invalid parent.", ParentPad); if (BasicBlock *UnwindDest = CatchSwitch.getUnwindDest()) { Instruction *I = UnwindDest->getFirstNonPHI(); - Assert(I->isEHPad() && !isa(I), - "CatchSwitchInst must unwind to an EH block which is not a " - "landingpad.", - &CatchSwitch); + Check(I->isEHPad() && !isa(I), + "CatchSwitchInst must unwind to an EH block which is not a " + "landingpad.", + &CatchSwitch); // Record catchswitch sibling unwinds for verifySiblingFuncletUnwinds if (getParentPad(I) == ParentPad) SiblingFuncletInfo[&CatchSwitch] = &CatchSwitch; } - Assert(CatchSwitch.getNumHandlers() != 0, - "CatchSwitchInst cannot have empty handler list", &CatchSwitch); + Check(CatchSwitch.getNumHandlers() != 0, + "CatchSwitchInst cannot have empty handler list", &CatchSwitch); for (BasicBlock *Handler : CatchSwitch.handlers()) { - Assert(isa(Handler->getFirstNonPHI()), - "CatchSwitchInst handlers must be catchpads", &CatchSwitch, Handler); + Check(isa(Handler->getFirstNonPHI()), + "CatchSwitchInst handlers must be catchpads", &CatchSwitch, Handler); } visitEHPadPredecessors(CatchSwitch); @@ -4315,16 +4386,16 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) { } void Verifier::visitCleanupReturnInst(CleanupReturnInst &CRI) { - Assert(isa(CRI.getOperand(0)), - "CleanupReturnInst needs to be provided a CleanupPad", &CRI, - CRI.getOperand(0)); + Check(isa(CRI.getOperand(0)), + "CleanupReturnInst needs to be provided a CleanupPad", &CRI, + CRI.getOperand(0)); if (BasicBlock *UnwindDest = CRI.getUnwindDest()) { Instruction *I = UnwindDest->getFirstNonPHI(); - Assert(I->isEHPad() && !isa(I), - "CleanupReturnInst must unwind to an EH block which is not a " - "landingpad.", - &CRI); + Check(I->isEHPad() && !isa(I), + "CleanupReturnInst must unwind to an EH block which is not a " + "landingpad.", + &CRI); } visitTerminator(CRI); @@ -4351,39 +4422,45 @@ void Verifier::verifyDominatesUse(Instruction &I, unsigned i) { return; const Use &U = I.getOperandUse(i); - Assert(DT.dominates(Op, U), - "Instruction does not dominate all uses!", Op, &I); + Check(DT.dominates(Op, U), "Instruction does not dominate all uses!", Op, &I); } void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) { - Assert(I.getType()->isPointerTy(), "dereferenceable, dereferenceable_or_null " - "apply only to pointer types", &I); - Assert((isa(I) || isa(I)), - "dereferenceable, dereferenceable_or_null apply only to load" - " and inttoptr instructions, use attributes for calls or invokes", &I); - Assert(MD->getNumOperands() == 1, "dereferenceable, dereferenceable_or_null " - "take one operand!", &I); + Check(I.getType()->isPointerTy(), + "dereferenceable, dereferenceable_or_null " + "apply only to pointer types", + &I); + Check((isa(I) || isa(I)), + "dereferenceable, dereferenceable_or_null apply only to load" + " and inttoptr instructions, use attributes for calls or invokes", + &I); + Check(MD->getNumOperands() == 1, + "dereferenceable, dereferenceable_or_null " + "take one operand!", + &I); ConstantInt *CI = mdconst::dyn_extract(MD->getOperand(0)); - Assert(CI && CI->getType()->isIntegerTy(64), "dereferenceable, " - "dereferenceable_or_null metadata value must be an i64!", &I); + Check(CI && CI->getType()->isIntegerTy(64), + "dereferenceable, " + "dereferenceable_or_null metadata value must be an i64!", + &I); } void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { - Assert(MD->getNumOperands() >= 2, - "!prof annotations should have no less than 2 operands", MD); + Check(MD->getNumOperands() >= 2, + "!prof annotations should have no less than 2 operands", MD); // Check first operand. - Assert(MD->getOperand(0) != nullptr, "first operand should not be null", MD); - Assert(isa(MD->getOperand(0)), - "expected string with name of the !prof annotation", MD); + Check(MD->getOperand(0) != nullptr, "first operand should not be null", MD); + Check(isa(MD->getOperand(0)), + "expected string with name of the !prof annotation", MD); MDString *MDS = cast(MD->getOperand(0)); StringRef ProfName = MDS->getString(); // Check consistency of !prof branch_weights metadata. if (ProfName.equals("branch_weights")) { if (isa(&I)) { - Assert(MD->getNumOperands() == 2 || MD->getNumOperands() == 3, - "Wrong number of InvokeInst branch_weights operands", MD); + Check(MD->getNumOperands() == 2 || MD->getNumOperands() == 3, + "Wrong number of InvokeInst branch_weights operands", MD); } else { unsigned ExpectedNumOperands = 0; if (BranchInst *BI = dyn_cast(&I)) @@ -4400,94 +4477,112 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { CheckFailed("!prof branch_weights are not allowed for this instruction", MD); - Assert(MD->getNumOperands() == 1 + ExpectedNumOperands, - "Wrong number of operands", MD); + Check(MD->getNumOperands() == 1 + ExpectedNumOperands, + "Wrong number of operands", MD); } for (unsigned i = 1; i < MD->getNumOperands(); ++i) { auto &MDO = MD->getOperand(i); - Assert(MDO, "second operand should not be null", MD); - Assert(mdconst::dyn_extract(MDO), - "!prof brunch_weights operand is not a const int"); + Check(MDO, "second operand should not be null", MD); + Check(mdconst::dyn_extract(MDO), + "!prof brunch_weights operand is not a const int"); } } } void Verifier::visitAnnotationMetadata(MDNode *Annotation) { - Assert(isa(Annotation), "annotation must be a tuple"); - Assert(Annotation->getNumOperands() >= 1, - "annotation must have at least one operand"); + Check(isa(Annotation), "annotation must be a tuple"); + Check(Annotation->getNumOperands() >= 1, + "annotation must have at least one operand"); for (const MDOperand &Op : Annotation->operands()) - Assert(isa(Op.get()), "operands must be strings"); + Check(isa(Op.get()), "operands must be strings"); } void Verifier::visitAliasScopeMetadata(const MDNode *MD) { unsigned NumOps = MD->getNumOperands(); - Assert(NumOps >= 2 && NumOps <= 3, "scope must have two or three operands", - MD); - Assert(MD->getOperand(0).get() == MD || isa(MD->getOperand(0)), - "first scope operand must be self-referential or string", MD); + Check(NumOps >= 2 && NumOps <= 3, "scope must have two or three operands", + MD); + Check(MD->getOperand(0).get() == MD || isa(MD->getOperand(0)), + "first scope operand must be self-referential or string", MD); if (NumOps == 3) - Assert(isa(MD->getOperand(2)), - "third scope operand must be string (if used)", MD); + Check(isa(MD->getOperand(2)), + "third scope operand must be string (if used)", MD); MDNode *Domain = dyn_cast(MD->getOperand(1)); - Assert(Domain != nullptr, "second scope operand must be MDNode", MD); + Check(Domain != nullptr, "second scope operand must be MDNode", MD); unsigned NumDomainOps = Domain->getNumOperands(); - Assert(NumDomainOps >= 1 && NumDomainOps <= 2, - "domain must have one or two operands", Domain); - Assert(Domain->getOperand(0).get() == Domain || - isa(Domain->getOperand(0)), - "first domain operand must be self-referential or string", Domain); + Check(NumDomainOps >= 1 && NumDomainOps <= 2, + "domain must have one or two operands", Domain); + Check(Domain->getOperand(0).get() == Domain || + isa(Domain->getOperand(0)), + "first domain operand must be self-referential or string", Domain); if (NumDomainOps == 2) - Assert(isa(Domain->getOperand(1)), - "second domain operand must be string (if used)", Domain); + Check(isa(Domain->getOperand(1)), + "second domain operand must be string (if used)", Domain); } void Verifier::visitAliasScopeListMetadata(const MDNode *MD) { for (const MDOperand &Op : MD->operands()) { const MDNode *OpMD = dyn_cast(Op); - Assert(OpMD != nullptr, "scope list must consist of MDNodes", MD); + Check(OpMD != nullptr, "scope list must consist of MDNodes", MD); visitAliasScopeMetadata(OpMD); } } +void Verifier::visitAccessGroupMetadata(const MDNode *MD) { + auto IsValidAccessScope = [](const MDNode *MD) { + return MD->getNumOperands() == 0 && MD->isDistinct(); + }; + + // It must be either an access scope itself... + if (IsValidAccessScope(MD)) + return; + + // ...or a list of access scopes. + for (const MDOperand &Op : MD->operands()) { + const MDNode *OpMD = dyn_cast(Op); + Check(OpMD != nullptr, "Access scope list must consist of MDNodes", MD); + Check(IsValidAccessScope(OpMD), + "Access scope list contains invalid access scope", MD); + } +} + /// verifyInstruction - Verify that an instruction is well formed. /// void Verifier::visitInstruction(Instruction &I) { BasicBlock *BB = I.getParent(); - Assert(BB, "Instruction not embedded in basic block!", &I); + Check(BB, "Instruction not embedded in basic block!", &I); if (!isa(I)) { // Check that non-phi nodes are not self referential for (User *U : I.users()) { - Assert(U != (User *)&I || !DT.isReachableFromEntry(BB), - "Only PHI nodes may reference their own value!", &I); + Check(U != (User *)&I || !DT.isReachableFromEntry(BB), + "Only PHI nodes may reference their own value!", &I); } } // Check that void typed values don't have names - Assert(!I.getType()->isVoidTy() || !I.hasName(), - "Instruction has a name, but provides a void value!", &I); + Check(!I.getType()->isVoidTy() || !I.hasName(), + "Instruction has a name, but provides a void value!", &I); // Check that the return value of the instruction is either void or a legal // value type. - Assert(I.getType()->isVoidTy() || I.getType()->isFirstClassType(), - "Instruction returns a non-scalar type!", &I); + Check(I.getType()->isVoidTy() || I.getType()->isFirstClassType(), + "Instruction returns a non-scalar type!", &I); // Check that the instruction doesn't produce metadata. Calls are already // checked against the callee type. - Assert(!I.getType()->isMetadataTy() || isa(I) || isa(I), - "Invalid use of metadata!", &I); + Check(!I.getType()->isMetadataTy() || isa(I) || isa(I), + "Invalid use of metadata!", &I); // Check that all uses of the instruction, if they are instructions // themselves, actually have parent basic blocks. If the use is not an // instruction, it is an error! for (Use &U : I.uses()) { if (Instruction *Used = dyn_cast(U.getUser())) - Assert(Used->getParent() != nullptr, - "Instruction referencing" - " instruction not embedded in a basic block!", - &I, Used); + Check(Used->getParent() != nullptr, + "Instruction referencing" + " instruction not embedded in a basic block!", + &I, Used); else { CheckFailed("Use of instruction is not an instruction!", U); return; @@ -4499,12 +4594,12 @@ void Verifier::visitInstruction(Instruction &I) { const CallBase *CBI = dyn_cast(&I); for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { - Assert(I.getOperand(i) != nullptr, "Instruction has null operand!", &I); + Check(I.getOperand(i) != nullptr, "Instruction has null operand!", &I); // Check to make sure that only first-class-values are operands to // instructions. if (!I.getOperand(i)->getType()->isFirstClassType()) { - Assert(false, "Instruction operands must be first-class values!", &I); + Check(false, "Instruction operands must be first-class values!", &I); } if (Function *F = dyn_cast(I.getOperand(i))) { @@ -4520,43 +4615,43 @@ void Verifier::visitInstruction(Instruction &I) { // taken. Ignore cases where the address of the intrinsic function is used // as the argument of operand bundle "clang.arc.attachedcall" as those // cases are handled in verifyAttachedCallBundle. - Assert((!F->isIntrinsic() || - (CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) || - IsAttachedCallOperand(F, CBI, i)), - "Cannot take the address of an intrinsic!", &I); - Assert( - !F->isIntrinsic() || isa(I) || - F->getIntrinsicID() == Intrinsic::donothing || - F->getIntrinsicID() == Intrinsic::seh_try_begin || - F->getIntrinsicID() == Intrinsic::seh_try_end || - F->getIntrinsicID() == Intrinsic::seh_scope_begin || - F->getIntrinsicID() == Intrinsic::seh_scope_end || - F->getIntrinsicID() == Intrinsic::coro_resume || - F->getIntrinsicID() == Intrinsic::coro_destroy || - F->getIntrinsicID() == Intrinsic::experimental_patchpoint_void || - F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 || - F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint || - F->getIntrinsicID() == Intrinsic::wasm_rethrow || - IsAttachedCallOperand(F, CBI, i), - "Cannot invoke an intrinsic other than donothing, patchpoint, " - "statepoint, coro_resume, coro_destroy or clang.arc.attachedcall", - &I); - Assert(F->getParent() == &M, "Referencing function in another module!", - &I, &M, F, F->getParent()); + Check((!F->isIntrinsic() || + (CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) || + IsAttachedCallOperand(F, CBI, i)), + "Cannot take the address of an intrinsic!", &I); + Check(!F->isIntrinsic() || isa(I) || + F->getIntrinsicID() == Intrinsic::donothing || + F->getIntrinsicID() == Intrinsic::seh_try_begin || + F->getIntrinsicID() == Intrinsic::seh_try_end || + F->getIntrinsicID() == Intrinsic::seh_scope_begin || + F->getIntrinsicID() == Intrinsic::seh_scope_end || + F->getIntrinsicID() == Intrinsic::coro_resume || + F->getIntrinsicID() == Intrinsic::coro_destroy || + F->getIntrinsicID() == + Intrinsic::experimental_patchpoint_void || + F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 || + F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint || + F->getIntrinsicID() == Intrinsic::wasm_rethrow || + IsAttachedCallOperand(F, CBI, i), + "Cannot invoke an intrinsic other than donothing, patchpoint, " + "statepoint, coro_resume, coro_destroy or clang.arc.attachedcall", + &I); + Check(F->getParent() == &M, "Referencing function in another module!", &I, + &M, F, F->getParent()); } else if (BasicBlock *OpBB = dyn_cast(I.getOperand(i))) { - Assert(OpBB->getParent() == BB->getParent(), - "Referring to a basic block in another function!", &I); + Check(OpBB->getParent() == BB->getParent(), + "Referring to a basic block in another function!", &I); } else if (Argument *OpArg = dyn_cast(I.getOperand(i))) { - Assert(OpArg->getParent() == BB->getParent(), - "Referring to an argument in another function!", &I); + Check(OpArg->getParent() == BB->getParent(), + "Referring to an argument in another function!", &I); } else if (GlobalValue *GV = dyn_cast(I.getOperand(i))) { - Assert(GV->getParent() == &M, "Referencing global in another module!", &I, - &M, GV, GV->getParent()); + Check(GV->getParent() == &M, "Referencing global in another module!", &I, + &M, GV, GV->getParent()); } else if (isa(I.getOperand(i))) { verifyDominatesUse(I, i); } else if (isa(I.getOperand(i))) { - Assert(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i), - "Cannot take the address of an inline asm!", &I); + Check(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i), + "Cannot take the address of an inline asm!", &I); } else if (ConstantExpr *CE = dyn_cast(I.getOperand(i))) { if (CE->getType()->isPtrOrPtrVectorTy()) { // If we have a ConstantExpr pointer, we need to see if it came from an @@ -4567,39 +4662,39 @@ void Verifier::visitInstruction(Instruction &I) { } if (MDNode *MD = I.getMetadata(LLVMContext::MD_fpmath)) { - Assert(I.getType()->isFPOrFPVectorTy(), - "fpmath requires a floating point result!", &I); - Assert(MD->getNumOperands() == 1, "fpmath takes one operand!", &I); + Check(I.getType()->isFPOrFPVectorTy(), + "fpmath requires a floating point result!", &I); + Check(MD->getNumOperands() == 1, "fpmath takes one operand!", &I); if (ConstantFP *CFP0 = mdconst::dyn_extract_or_null(MD->getOperand(0))) { const APFloat &Accuracy = CFP0->getValueAPF(); - Assert(&Accuracy.getSemantics() == &APFloat::IEEEsingle(), - "fpmath accuracy must have float type", &I); - Assert(Accuracy.isFiniteNonZero() && !Accuracy.isNegative(), - "fpmath accuracy not a positive number!", &I); + Check(&Accuracy.getSemantics() == &APFloat::IEEEsingle(), + "fpmath accuracy must have float type", &I); + Check(Accuracy.isFiniteNonZero() && !Accuracy.isNegative(), + "fpmath accuracy not a positive number!", &I); } else { - Assert(false, "invalid fpmath accuracy!", &I); + Check(false, "invalid fpmath accuracy!", &I); } } if (MDNode *Range = I.getMetadata(LLVMContext::MD_range)) { - Assert(isa(I) || isa(I) || isa(I), - "Ranges are only for loads, calls and invokes!", &I); + Check(isa(I) || isa(I) || isa(I), + "Ranges are only for loads, calls and invokes!", &I); visitRangeMetadata(I, Range, I.getType()); } if (I.hasMetadata(LLVMContext::MD_invariant_group)) { - Assert(isa(I) || isa(I), - "invariant.group metadata is only for loads and stores", &I); + Check(isa(I) || isa(I), + "invariant.group metadata is only for loads and stores", &I); } if (I.getMetadata(LLVMContext::MD_nonnull)) { - Assert(I.getType()->isPointerTy(), "nonnull applies only to pointer types", - &I); - Assert(isa(I), - "nonnull applies only to load instructions, use attributes" - " for calls or invokes", - &I); + Check(I.getType()->isPointerTy(), "nonnull applies only to pointer types", + &I); + Check(isa(I), + "nonnull applies only to load instructions, use attributes" + " for calls or invokes", + &I); } if (MDNode *MD = I.getMetadata(LLVMContext::MD_dereferenceable)) @@ -4616,20 +4711,25 @@ void Verifier::visitInstruction(Instruction &I) { if (MDNode *MD = I.getMetadata(LLVMContext::MD_alias_scope)) visitAliasScopeListMetadata(MD); + if (MDNode *MD = I.getMetadata(LLVMContext::MD_access_group)) + visitAccessGroupMetadata(MD); + if (MDNode *AlignMD = I.getMetadata(LLVMContext::MD_align)) { - Assert(I.getType()->isPointerTy(), "align applies only to pointer types", - &I); - Assert(isa(I), "align applies only to load instructions, " - "use attributes for calls or invokes", &I); - Assert(AlignMD->getNumOperands() == 1, "align takes one operand!", &I); + Check(I.getType()->isPointerTy(), "align applies only to pointer types", + &I); + Check(isa(I), + "align applies only to load instructions, " + "use attributes for calls or invokes", + &I); + Check(AlignMD->getNumOperands() == 1, "align takes one operand!", &I); ConstantInt *CI = mdconst::dyn_extract(AlignMD->getOperand(0)); - Assert(CI && CI->getType()->isIntegerTy(64), - "align metadata value must be an i64!", &I); + Check(CI && CI->getType()->isIntegerTy(64), + "align metadata value must be an i64!", &I); uint64_t Align = CI->getZExtValue(); - Assert(isPowerOf2_64(Align), - "align metadata value must be a power of 2!", &I); - Assert(Align <= Value::MaximumAlignment, - "alignment is larger that implementation defined limit", &I); + Check(isPowerOf2_64(Align), "align metadata value must be a power of 2!", + &I); + Check(Align <= Value::MaximumAlignment, + "alignment is larger that implementation defined limit", &I); } if (MDNode *MD = I.getMetadata(LLVMContext::MD_prof)) @@ -4639,7 +4739,7 @@ void Verifier::visitInstruction(Instruction &I) { visitAnnotationMetadata(Annotation); if (MDNode *N = I.getDebugLoc().getAsMDNode()) { - AssertDI(isa(N), "invalid !dbg metadata attachment", &I, N); + CheckDI(isa(N), "invalid !dbg metadata attachment", &I, N); visitMDNode(*N, AreDebugLocsAllowed::Yes); } @@ -4665,8 +4765,8 @@ void Verifier::visitInstruction(Instruction &I) { /// Allow intrinsics to be verified in different ways. void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { Function *IF = Call.getCalledFunction(); - Assert(IF->isDeclaration(), "Intrinsic functions should never be defined!", - IF); + Check(IF->isDeclaration(), "Intrinsic functions should never be defined!", + IF); // Verify that the intrinsic prototype lines up with what the .td files // describe. @@ -4681,21 +4781,21 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { SmallVector ArgTys; Intrinsic::MatchIntrinsicTypesResult Res = Intrinsic::matchIntrinsicSignature(IFTy, TableRef, ArgTys); - Assert(Res != Intrinsic::MatchIntrinsicTypes_NoMatchRet, - "Intrinsic has incorrect return type!", IF); - Assert(Res != Intrinsic::MatchIntrinsicTypes_NoMatchArg, - "Intrinsic has incorrect argument type!", IF); + Check(Res != Intrinsic::MatchIntrinsicTypes_NoMatchRet, + "Intrinsic has incorrect return type!", IF); + Check(Res != Intrinsic::MatchIntrinsicTypes_NoMatchArg, + "Intrinsic has incorrect argument type!", IF); // Verify if the intrinsic call matches the vararg property. if (IsVarArg) - Assert(!Intrinsic::matchIntrinsicVarArg(IsVarArg, TableRef), - "Intrinsic was not defined with variable arguments!", IF); + Check(!Intrinsic::matchIntrinsicVarArg(IsVarArg, TableRef), + "Intrinsic was not defined with variable arguments!", IF); else - Assert(!Intrinsic::matchIntrinsicVarArg(IsVarArg, TableRef), - "Callsite was not defined with variable arguments!", IF); + Check(!Intrinsic::matchIntrinsicVarArg(IsVarArg, TableRef), + "Callsite was not defined with variable arguments!", IF); // All descriptors should be absorbed by now. - Assert(TableRef.empty(), "Intrinsic has too few arguments!", IF); + Check(TableRef.empty(), "Intrinsic has too few arguments!", IF); // Now that we have the intrinsic ID and the actual argument types (and we // know they are legal for the intrinsic!) get the intrinsic name through the @@ -4703,11 +4803,11 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { // the name. const std::string ExpectedName = Intrinsic::getName(ID, ArgTys, IF->getParent(), IFTy); - Assert(ExpectedName == IF->getName(), - "Intrinsic name not mangled correctly for type arguments! " - "Should be: " + - ExpectedName, - IF); + Check(ExpectedName == IF->getName(), + "Intrinsic name not mangled correctly for type arguments! " + "Should be: " + + ExpectedName, + IF); // If the intrinsic takes MDNode arguments, verify that they are either global // or are local to *this* function. @@ -4715,8 +4815,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { if (auto *MD = dyn_cast(V)) visitMetadataAsValue(*MD, Call.getCaller()); if (auto *Const = dyn_cast(V)) - Assert(!Const->getType()->isX86_AMXTy(), - "const x86_amx is not allowed in argument!"); + Check(!Const->getType()->isX86_AMXTy(), + "const x86_amx is not allowed in argument!"); } switch (ID) { @@ -4724,36 +4824,35 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { break; case Intrinsic::assume: { for (auto &Elem : Call.bundle_op_infos()) { - Assert(Elem.Tag->getKey() == "ignore" || - Attribute::isExistingAttribute(Elem.Tag->getKey()), - "tags must be valid attribute names", Call); + Check(Elem.Tag->getKey() == "ignore" || + Attribute::isExistingAttribute(Elem.Tag->getKey()), + "tags must be valid attribute names", Call); Attribute::AttrKind Kind = Attribute::getAttrKindFromName(Elem.Tag->getKey()); unsigned ArgCount = Elem.End - Elem.Begin; if (Kind == Attribute::Alignment) { - Assert(ArgCount <= 3 && ArgCount >= 2, - "alignment assumptions should have 2 or 3 arguments", Call); - Assert(Call.getOperand(Elem.Begin)->getType()->isPointerTy(), - "first argument should be a pointer", Call); - Assert(Call.getOperand(Elem.Begin + 1)->getType()->isIntegerTy(), - "second argument should be an integer", Call); + Check(ArgCount <= 3 && ArgCount >= 2, + "alignment assumptions should have 2 or 3 arguments", Call); + Check(Call.getOperand(Elem.Begin)->getType()->isPointerTy(), + "first argument should be a pointer", Call); + Check(Call.getOperand(Elem.Begin + 1)->getType()->isIntegerTy(), + "second argument should be an integer", Call); if (ArgCount == 3) - Assert(Call.getOperand(Elem.Begin + 2)->getType()->isIntegerTy(), - "third argument should be an integer if present", Call); + Check(Call.getOperand(Elem.Begin + 2)->getType()->isIntegerTy(), + "third argument should be an integer if present", Call); return; } - Assert(ArgCount <= 2, "too many arguments", Call); + Check(ArgCount <= 2, "too many arguments", Call); if (Kind == Attribute::None) break; if (Attribute::isIntAttrKind(Kind)) { - Assert(ArgCount == 2, "this attribute should have 2 arguments", Call); - Assert(isa(Call.getOperand(Elem.Begin + 1)), - "the second argument should be a constant integral value", Call); + Check(ArgCount == 2, "this attribute should have 2 arguments", Call); + Check(isa(Call.getOperand(Elem.Begin + 1)), + "the second argument should be a constant integral value", Call); } else if (Attribute::canUseAsParamAttr(Kind)) { - Assert((ArgCount) == 1, "this attribute should have one argument", - Call); + Check((ArgCount) == 1, "this attribute should have one argument", Call); } else if (Attribute::canUseAsFnAttr(Kind)) { - Assert((ArgCount) == 0, "this attribute has no argument", Call); + Check((ArgCount) == 0, "this attribute has no argument", Call); } } break; @@ -4763,23 +4862,47 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { if (isa(InfoArg)) break; auto *GV = dyn_cast(InfoArg); - Assert(GV && GV->isConstant() && GV->hasDefinitiveInitializer(), - "info argument of llvm.coro.id must refer to an initialized " - "constant"); + Check(GV && GV->isConstant() && GV->hasDefinitiveInitializer(), + "info argument of llvm.coro.id must refer to an initialized " + "constant"); Constant *Init = GV->getInitializer(); - Assert(isa(Init) || isa(Init), - "info argument of llvm.coro.id must refer to either a struct or " - "an array"); + Check(isa(Init) || isa(Init), + "info argument of llvm.coro.id must refer to either a struct or " + "an array"); break; } + case Intrinsic::fptrunc_round: { + // Check the rounding mode + Metadata *MD = nullptr; + auto *MAV = dyn_cast(Call.getOperand(1)); + if (MAV) + MD = MAV->getMetadata(); + + Check(MD != nullptr, "missing rounding mode argument", Call); + + Check(isa(MD), + ("invalid value for llvm.fptrunc.round metadata operand" + " (the operand should be a string)"), + MD); + + Optional RoundMode = + convertStrToRoundingMode(cast(MD)->getString()); + Check(RoundMode && *RoundMode != RoundingMode::Dynamic, + "unsupported rounding mode argument", Call); + break; + } +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#include "llvm/IR/VPIntrinsics.def" + visitVPIntrinsic(cast(Call)); + break; #define INSTRUCTION(NAME, NARGS, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" visitConstrainedFPIntrinsic(cast(Call)); break; case Intrinsic::dbg_declare: // llvm.dbg.declare - Assert(isa(Call.getArgOperand(0)), - "invalid llvm.dbg.declare intrinsic call 1", Call); + Check(isa(Call.getArgOperand(0)), + "invalid llvm.dbg.declare intrinsic call 1", Call); visitDbgIntrinsic("declare", cast(Call)); break; case Intrinsic::dbg_addr: // llvm.dbg.addr @@ -4794,18 +4917,19 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::memcpy: case Intrinsic::memcpy_inline: case Intrinsic::memmove: - case Intrinsic::memset: { + case Intrinsic::memset: + case Intrinsic::memset_inline: { const auto *MI = cast(&Call); auto IsValidAlignment = [&](unsigned Alignment) -> bool { return Alignment == 0 || isPowerOf2_32(Alignment); }; - Assert(IsValidAlignment(MI->getDestAlignment()), - "alignment of arg 0 of memory intrinsic must be 0 or a power of 2", - Call); + Check(IsValidAlignment(MI->getDestAlignment()), + "alignment of arg 0 of memory intrinsic must be 0 or a power of 2", + Call); if (const auto *MTI = dyn_cast(MI)) { - Assert(IsValidAlignment(MTI->getSourceAlignment()), - "alignment of arg 1 of memory intrinsic must be 0 or a power of 2", - Call); + Check(IsValidAlignment(MTI->getSourceAlignment()), + "alignment of arg 1 of memory intrinsic must be 0 or a power of 2", + Call); } break; @@ -4818,50 +4942,50 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { ConstantInt *ElementSizeCI = cast(AMI->getRawElementSizeInBytes()); const APInt &ElementSizeVal = ElementSizeCI->getValue(); - Assert(ElementSizeVal.isPowerOf2(), - "element size of the element-wise atomic memory intrinsic " - "must be a power of 2", - Call); + Check(ElementSizeVal.isPowerOf2(), + "element size of the element-wise atomic memory intrinsic " + "must be a power of 2", + Call); auto IsValidAlignment = [&](uint64_t Alignment) { return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment); }; uint64_t DstAlignment = AMI->getDestAlignment(); - Assert(IsValidAlignment(DstAlignment), - "incorrect alignment of the destination argument", Call); + Check(IsValidAlignment(DstAlignment), + "incorrect alignment of the destination argument", Call); if (const auto *AMT = dyn_cast(AMI)) { uint64_t SrcAlignment = AMT->getSourceAlignment(); - Assert(IsValidAlignment(SrcAlignment), - "incorrect alignment of the source argument", Call); + Check(IsValidAlignment(SrcAlignment), + "incorrect alignment of the source argument", Call); } break; } case Intrinsic::call_preallocated_setup: { auto *NumArgs = dyn_cast(Call.getArgOperand(0)); - Assert(NumArgs != nullptr, - "llvm.call.preallocated.setup argument must be a constant"); + Check(NumArgs != nullptr, + "llvm.call.preallocated.setup argument must be a constant"); bool FoundCall = false; for (User *U : Call.users()) { auto *UseCall = dyn_cast(U); - Assert(UseCall != nullptr, - "Uses of llvm.call.preallocated.setup must be calls"); + Check(UseCall != nullptr, + "Uses of llvm.call.preallocated.setup must be calls"); const Function *Fn = UseCall->getCalledFunction(); if (Fn && Fn->getIntrinsicID() == Intrinsic::call_preallocated_arg) { auto *AllocArgIndex = dyn_cast(UseCall->getArgOperand(1)); - Assert(AllocArgIndex != nullptr, - "llvm.call.preallocated.alloc arg index must be a constant"); + Check(AllocArgIndex != nullptr, + "llvm.call.preallocated.alloc arg index must be a constant"); auto AllocArgIndexInt = AllocArgIndex->getValue(); - Assert(AllocArgIndexInt.sge(0) && - AllocArgIndexInt.slt(NumArgs->getValue()), - "llvm.call.preallocated.alloc arg index must be between 0 and " - "corresponding " - "llvm.call.preallocated.setup's argument count"); + Check(AllocArgIndexInt.sge(0) && + AllocArgIndexInt.slt(NumArgs->getValue()), + "llvm.call.preallocated.alloc arg index must be between 0 and " + "corresponding " + "llvm.call.preallocated.setup's argument count"); } else if (Fn && Fn->getIntrinsicID() == Intrinsic::call_preallocated_teardown) { // nothing to do } else { - Assert(!FoundCall, "Can have at most one call corresponding to a " - "llvm.call.preallocated.setup"); + Check(!FoundCall, "Can have at most one call corresponding to a " + "llvm.call.preallocated.setup"); FoundCall = true; size_t NumPreallocatedArgs = 0; for (unsigned i = 0; i < UseCall->arg_size(); i++) { @@ -4869,14 +4993,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { ++NumPreallocatedArgs; } } - Assert(NumPreallocatedArgs != 0, - "cannot use preallocated intrinsics on a call without " - "preallocated arguments"); - Assert(NumArgs->equalsInt(NumPreallocatedArgs), - "llvm.call.preallocated.setup arg size must be equal to number " - "of preallocated arguments " - "at call site", - Call, *UseCall); + Check(NumPreallocatedArgs != 0, + "cannot use preallocated intrinsics on a call without " + "preallocated arguments"); + Check(NumArgs->equalsInt(NumPreallocatedArgs), + "llvm.call.preallocated.setup arg size must be equal to number " + "of preallocated arguments " + "at call site", + Call, *UseCall); // getOperandBundle() cannot be called if more than one of the operand // bundle exists. There is already a check elsewhere for this, so skip // here if we see more than one. @@ -4886,33 +5010,33 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { } auto PreallocatedBundle = UseCall->getOperandBundle(LLVMContext::OB_preallocated); - Assert(PreallocatedBundle, - "Use of llvm.call.preallocated.setup outside intrinsics " - "must be in \"preallocated\" operand bundle"); - Assert(PreallocatedBundle->Inputs.front().get() == &Call, - "preallocated bundle must have token from corresponding " - "llvm.call.preallocated.setup"); + Check(PreallocatedBundle, + "Use of llvm.call.preallocated.setup outside intrinsics " + "must be in \"preallocated\" operand bundle"); + Check(PreallocatedBundle->Inputs.front().get() == &Call, + "preallocated bundle must have token from corresponding " + "llvm.call.preallocated.setup"); } } break; } case Intrinsic::call_preallocated_arg: { auto *Token = dyn_cast(Call.getArgOperand(0)); - Assert(Token && Token->getCalledFunction()->getIntrinsicID() == - Intrinsic::call_preallocated_setup, - "llvm.call.preallocated.arg token argument must be a " - "llvm.call.preallocated.setup"); - Assert(Call.hasFnAttr(Attribute::Preallocated), - "llvm.call.preallocated.arg must be called with a \"preallocated\" " - "call site attribute"); + Check(Token && Token->getCalledFunction()->getIntrinsicID() == + Intrinsic::call_preallocated_setup, + "llvm.call.preallocated.arg token argument must be a " + "llvm.call.preallocated.setup"); + Check(Call.hasFnAttr(Attribute::Preallocated), + "llvm.call.preallocated.arg must be called with a \"preallocated\" " + "call site attribute"); break; } case Intrinsic::call_preallocated_teardown: { auto *Token = dyn_cast(Call.getArgOperand(0)); - Assert(Token && Token->getCalledFunction()->getIntrinsicID() == - Intrinsic::call_preallocated_setup, - "llvm.call.preallocated.teardown token argument must be a " - "llvm.call.preallocated.setup"); + Check(Token && Token->getCalledFunction()->getIntrinsicID() == + Intrinsic::call_preallocated_setup, + "llvm.call.preallocated.teardown token argument must be a " + "llvm.call.preallocated.setup"); break; } case Intrinsic::gcroot: @@ -4921,46 +5045,46 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { if (ID == Intrinsic::gcroot) { AllocaInst *AI = dyn_cast(Call.getArgOperand(0)->stripPointerCasts()); - Assert(AI, "llvm.gcroot parameter #1 must be an alloca.", Call); - Assert(isa(Call.getArgOperand(1)), - "llvm.gcroot parameter #2 must be a constant.", Call); + Check(AI, "llvm.gcroot parameter #1 must be an alloca.", Call); + Check(isa(Call.getArgOperand(1)), + "llvm.gcroot parameter #2 must be a constant.", Call); if (!AI->getAllocatedType()->isPointerTy()) { - Assert(!isa(Call.getArgOperand(1)), - "llvm.gcroot parameter #1 must either be a pointer alloca, " - "or argument #2 must be a non-null constant.", - Call); + Check(!isa(Call.getArgOperand(1)), + "llvm.gcroot parameter #1 must either be a pointer alloca, " + "or argument #2 must be a non-null constant.", + Call); } } - Assert(Call.getParent()->getParent()->hasGC(), - "Enclosing function does not use GC.", Call); + Check(Call.getParent()->getParent()->hasGC(), + "Enclosing function does not use GC.", Call); break; case Intrinsic::init_trampoline: - Assert(isa(Call.getArgOperand(1)->stripPointerCasts()), - "llvm.init_trampoline parameter #2 must resolve to a function.", - Call); + Check(isa(Call.getArgOperand(1)->stripPointerCasts()), + "llvm.init_trampoline parameter #2 must resolve to a function.", + Call); break; case Intrinsic::prefetch: - Assert(cast(Call.getArgOperand(1))->getZExtValue() < 2 && - cast(Call.getArgOperand(2))->getZExtValue() < 4, - "invalid arguments to llvm.prefetch", Call); + Check(cast(Call.getArgOperand(1))->getZExtValue() < 2 && + cast(Call.getArgOperand(2))->getZExtValue() < 4, + "invalid arguments to llvm.prefetch", Call); break; case Intrinsic::stackprotector: - Assert(isa(Call.getArgOperand(1)->stripPointerCasts()), - "llvm.stackprotector parameter #2 must resolve to an alloca.", Call); + Check(isa(Call.getArgOperand(1)->stripPointerCasts()), + "llvm.stackprotector parameter #2 must resolve to an alloca.", Call); break; case Intrinsic::localescape: { BasicBlock *BB = Call.getParent(); - Assert(BB == &BB->getParent()->front(), - "llvm.localescape used outside of entry block", Call); - Assert(!SawFrameEscape, - "multiple calls to llvm.localescape in one function", Call); + Check(BB == &BB->getParent()->front(), + "llvm.localescape used outside of entry block", Call); + Check(!SawFrameEscape, "multiple calls to llvm.localescape in one function", + Call); for (Value *Arg : Call.args()) { if (isa(Arg)) continue; // Null values are allowed as placeholders. auto *AI = dyn_cast(Arg->stripPointerCasts()); - Assert(AI && AI->isStaticAlloca(), - "llvm.localescape only accepts static allocas", Call); + Check(AI && AI->isStaticAlloca(), + "llvm.localescape only accepts static allocas", Call); } FrameEscapeInfo[BB->getParent()].first = Call.arg_size(); SawFrameEscape = true; @@ -4969,10 +5093,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::localrecover: { Value *FnArg = Call.getArgOperand(0)->stripPointerCasts(); Function *Fn = dyn_cast(FnArg); - Assert(Fn && !Fn->isDeclaration(), - "llvm.localrecover first " - "argument must be function defined in this module", - Call); + Check(Fn && !Fn->isDeclaration(), + "llvm.localrecover first " + "argument must be function defined in this module", + Call); auto *IdxArg = cast(Call.getArgOperand(2)); auto &Entry = FrameEscapeInfo[Fn]; Entry.second = unsigned( @@ -4982,39 +5106,38 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::experimental_gc_statepoint: if (auto *CI = dyn_cast(&Call)) - Assert(!CI->isInlineAsm(), - "gc.statepoint support for inline assembly unimplemented", CI); - Assert(Call.getParent()->getParent()->hasGC(), - "Enclosing function does not use GC.", Call); + Check(!CI->isInlineAsm(), + "gc.statepoint support for inline assembly unimplemented", CI); + Check(Call.getParent()->getParent()->hasGC(), + "Enclosing function does not use GC.", Call); verifyStatepoint(Call); break; case Intrinsic::experimental_gc_result: { - Assert(Call.getParent()->getParent()->hasGC(), - "Enclosing function does not use GC.", Call); + Check(Call.getParent()->getParent()->hasGC(), + "Enclosing function does not use GC.", Call); // Are we tied to a statepoint properly? const auto *StatepointCall = dyn_cast(Call.getArgOperand(0)); const Function *StatepointFn = StatepointCall ? StatepointCall->getCalledFunction() : nullptr; - Assert(StatepointFn && StatepointFn->isDeclaration() && - StatepointFn->getIntrinsicID() == - Intrinsic::experimental_gc_statepoint, - "gc.result operand #1 must be from a statepoint", Call, - Call.getArgOperand(0)); - - // Assert that result type matches wrapped callee. - const Value *Target = StatepointCall->getArgOperand(2); - auto *PT = cast(Target->getType()); - auto *TargetFuncType = cast(PT->getPointerElementType()); - Assert(Call.getType() == TargetFuncType->getReturnType(), - "gc.result result type does not match wrapped callee", Call); + Check(StatepointFn && StatepointFn->isDeclaration() && + StatepointFn->getIntrinsicID() == + Intrinsic::experimental_gc_statepoint, + "gc.result operand #1 must be from a statepoint", Call, + Call.getArgOperand(0)); + + // Check that result type matches wrapped callee. + auto *TargetFuncType = + cast(StatepointCall->getParamElementType(2)); + Check(Call.getType() == TargetFuncType->getReturnType(), + "gc.result result type does not match wrapped callee", Call); break; } case Intrinsic::experimental_gc_relocate: { - Assert(Call.arg_size() == 3, "wrong number of arguments", Call); + Check(Call.arg_size() == 3, "wrong number of arguments", Call); - Assert(isa(Call.getType()->getScalarType()), - "gc.relocate must return a pointer or a vector of pointers", Call); + Check(isa(Call.getType()->getScalarType()), + "gc.relocate must return a pointer or a vector of pointers", Call); // Check that this relocate is correctly tied to the statepoint @@ -5027,19 +5150,19 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { // Landingpad relocates should have only one predecessor with invoke // statepoint terminator - Assert(InvokeBB, "safepoints should have unique landingpads", - LandingPad->getParent()); - Assert(InvokeBB->getTerminator(), "safepoint block should be well formed", - InvokeBB); - Assert(isa(InvokeBB->getTerminator()), - "gc relocate should be linked to a statepoint", InvokeBB); + Check(InvokeBB, "safepoints should have unique landingpads", + LandingPad->getParent()); + Check(InvokeBB->getTerminator(), "safepoint block should be well formed", + InvokeBB); + Check(isa(InvokeBB->getTerminator()), + "gc relocate should be linked to a statepoint", InvokeBB); } else { // In all other cases relocate should be tied to the statepoint directly. // This covers relocates on a normal return path of invoke statepoint and // relocates of a call statepoint. auto Token = Call.getArgOperand(0); - Assert(isa(Token), - "gc relocate is incorrectly tied to the statepoint", Call, Token); + Check(isa(Token), + "gc relocate is incorrectly tied to the statepoint", Call, Token); } // Verify rest of the relocate arguments. @@ -5048,22 +5171,22 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { // Both the base and derived must be piped through the safepoint. Value *Base = Call.getArgOperand(1); - Assert(isa(Base), - "gc.relocate operand #2 must be integer offset", Call); + Check(isa(Base), + "gc.relocate operand #2 must be integer offset", Call); Value *Derived = Call.getArgOperand(2); - Assert(isa(Derived), - "gc.relocate operand #3 must be integer offset", Call); + Check(isa(Derived), + "gc.relocate operand #3 must be integer offset", Call); const uint64_t BaseIndex = cast(Base)->getZExtValue(); const uint64_t DerivedIndex = cast(Derived)->getZExtValue(); // Check the bounds if (auto Opt = StatepointCall.getOperandBundle(LLVMContext::OB_gc_live)) { - Assert(BaseIndex < Opt->Inputs.size(), - "gc.relocate: statepoint base index out of bounds", Call); - Assert(DerivedIndex < Opt->Inputs.size(), - "gc.relocate: statepoint derived index out of bounds", Call); + Check(BaseIndex < Opt->Inputs.size(), + "gc.relocate: statepoint base index out of bounds", Call); + Check(DerivedIndex < Opt->Inputs.size(), + "gc.relocate: statepoint derived index out of bounds", Call); } // Relocated value must be either a pointer type or vector-of-pointer type, @@ -5071,15 +5194,15 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { // relocated pointer. It can be casted to the correct type later if it's // desired. However, they must have the same address space and 'vectorness' GCRelocateInst &Relocate = cast(Call); - Assert(Relocate.getDerivedPtr()->getType()->isPtrOrPtrVectorTy(), - "gc.relocate: relocated value must be a gc pointer", Call); + Check(Relocate.getDerivedPtr()->getType()->isPtrOrPtrVectorTy(), + "gc.relocate: relocated value must be a gc pointer", Call); auto ResultType = Call.getType(); auto DerivedType = Relocate.getDerivedPtr()->getType(); - Assert(ResultType->isVectorTy() == DerivedType->isVectorTy(), - "gc.relocate: vector relocates to vector and pointer to pointer", - Call); - Assert( + Check(ResultType->isVectorTy() == DerivedType->isVectorTy(), + "gc.relocate: vector relocates to vector and pointer to pointer", + Call); + Check( ResultType->getPointerAddressSpace() == DerivedType->getPointerAddressSpace(), "gc.relocate: relocating a pointer shouldn't change its address space", @@ -5088,39 +5211,43 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { } case Intrinsic::eh_exceptioncode: case Intrinsic::eh_exceptionpointer: { - Assert(isa(Call.getArgOperand(0)), - "eh.exceptionpointer argument must be a catchpad", Call); + Check(isa(Call.getArgOperand(0)), + "eh.exceptionpointer argument must be a catchpad", Call); break; } case Intrinsic::get_active_lane_mask: { - Assert(Call.getType()->isVectorTy(), "get_active_lane_mask: must return a " - "vector", Call); + Check(Call.getType()->isVectorTy(), + "get_active_lane_mask: must return a " + "vector", + Call); auto *ElemTy = Call.getType()->getScalarType(); - Assert(ElemTy->isIntegerTy(1), "get_active_lane_mask: element type is not " - "i1", Call); + Check(ElemTy->isIntegerTy(1), + "get_active_lane_mask: element type is not " + "i1", + Call); break; } case Intrinsic::masked_load: { - Assert(Call.getType()->isVectorTy(), "masked_load: must return a vector", - Call); + Check(Call.getType()->isVectorTy(), "masked_load: must return a vector", + Call); Value *Ptr = Call.getArgOperand(0); ConstantInt *Alignment = cast(Call.getArgOperand(1)); Value *Mask = Call.getArgOperand(2); Value *PassThru = Call.getArgOperand(3); - Assert(Mask->getType()->isVectorTy(), "masked_load: mask must be vector", - Call); - Assert(Alignment->getValue().isPowerOf2(), - "masked_load: alignment must be a power of 2", Call); + Check(Mask->getType()->isVectorTy(), "masked_load: mask must be vector", + Call); + Check(Alignment->getValue().isPowerOf2(), + "masked_load: alignment must be a power of 2", Call); PointerType *PtrTy = cast(Ptr->getType()); - Assert(PtrTy->isOpaqueOrPointeeTypeMatches(Call.getType()), - "masked_load: return must match pointer type", Call); - Assert(PassThru->getType() == Call.getType(), - "masked_load: pass through and return type must match", Call); - Assert(cast(Mask->getType())->getElementCount() == - cast(Call.getType())->getElementCount(), - "masked_load: vector mask must be same length as return", Call); + Check(PtrTy->isOpaqueOrPointeeTypeMatches(Call.getType()), + "masked_load: return must match pointer type", Call); + Check(PassThru->getType() == Call.getType(), + "masked_load: pass through and return type must match", Call); + Check(cast(Mask->getType())->getElementCount() == + cast(Call.getType())->getElementCount(), + "masked_load: vector mask must be same length as return", Call); break; } case Intrinsic::masked_store: { @@ -5128,61 +5255,61 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { Value *Ptr = Call.getArgOperand(1); ConstantInt *Alignment = cast(Call.getArgOperand(2)); Value *Mask = Call.getArgOperand(3); - Assert(Mask->getType()->isVectorTy(), "masked_store: mask must be vector", - Call); - Assert(Alignment->getValue().isPowerOf2(), - "masked_store: alignment must be a power of 2", Call); + Check(Mask->getType()->isVectorTy(), "masked_store: mask must be vector", + Call); + Check(Alignment->getValue().isPowerOf2(), + "masked_store: alignment must be a power of 2", Call); PointerType *PtrTy = cast(Ptr->getType()); - Assert(PtrTy->isOpaqueOrPointeeTypeMatches(Val->getType()), - "masked_store: storee must match pointer type", Call); - Assert(cast(Mask->getType())->getElementCount() == - cast(Val->getType())->getElementCount(), - "masked_store: vector mask must be same length as value", Call); + Check(PtrTy->isOpaqueOrPointeeTypeMatches(Val->getType()), + "masked_store: storee must match pointer type", Call); + Check(cast(Mask->getType())->getElementCount() == + cast(Val->getType())->getElementCount(), + "masked_store: vector mask must be same length as value", Call); break; } case Intrinsic::masked_gather: { const APInt &Alignment = cast(Call.getArgOperand(1))->getValue(); - Assert(Alignment.isZero() || Alignment.isPowerOf2(), - "masked_gather: alignment must be 0 or a power of 2", Call); + Check(Alignment.isZero() || Alignment.isPowerOf2(), + "masked_gather: alignment must be 0 or a power of 2", Call); break; } case Intrinsic::masked_scatter: { const APInt &Alignment = cast(Call.getArgOperand(2))->getValue(); - Assert(Alignment.isZero() || Alignment.isPowerOf2(), - "masked_scatter: alignment must be 0 or a power of 2", Call); + Check(Alignment.isZero() || Alignment.isPowerOf2(), + "masked_scatter: alignment must be 0 or a power of 2", Call); break; } case Intrinsic::experimental_guard: { - Assert(isa(Call), "experimental_guard cannot be invoked", Call); - Assert(Call.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1, - "experimental_guard must have exactly one " - "\"deopt\" operand bundle"); + Check(isa(Call), "experimental_guard cannot be invoked", Call); + Check(Call.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1, + "experimental_guard must have exactly one " + "\"deopt\" operand bundle"); break; } case Intrinsic::experimental_deoptimize: { - Assert(isa(Call), "experimental_deoptimize cannot be invoked", - Call); - Assert(Call.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1, - "experimental_deoptimize must have exactly one " - "\"deopt\" operand bundle"); - Assert(Call.getType() == Call.getFunction()->getReturnType(), - "experimental_deoptimize return type must match caller return type"); + Check(isa(Call), "experimental_deoptimize cannot be invoked", + Call); + Check(Call.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1, + "experimental_deoptimize must have exactly one " + "\"deopt\" operand bundle"); + Check(Call.getType() == Call.getFunction()->getReturnType(), + "experimental_deoptimize return type must match caller return type"); if (isa(Call)) { auto *RI = dyn_cast(Call.getNextNode()); - Assert(RI, - "calls to experimental_deoptimize must be followed by a return"); + Check(RI, + "calls to experimental_deoptimize must be followed by a return"); if (!Call.getType()->isVoidTy() && RI) - Assert(RI->getReturnValue() == &Call, - "calls to experimental_deoptimize must be followed by a return " - "of the value computed by experimental_deoptimize"); + Check(RI->getReturnValue() == &Call, + "calls to experimental_deoptimize must be followed by a return " + "of the value computed by experimental_deoptimize"); } break; @@ -5197,15 +5324,15 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::vector_reduce_umax: case Intrinsic::vector_reduce_umin: { Type *ArgTy = Call.getArgOperand(0)->getType(); - Assert(ArgTy->isIntOrIntVectorTy() && ArgTy->isVectorTy(), - "Intrinsic has incorrect argument type!"); + Check(ArgTy->isIntOrIntVectorTy() && ArgTy->isVectorTy(), + "Intrinsic has incorrect argument type!"); break; } case Intrinsic::vector_reduce_fmax: case Intrinsic::vector_reduce_fmin: { Type *ArgTy = Call.getArgOperand(0)->getType(); - Assert(ArgTy->isFPOrFPVectorTy() && ArgTy->isVectorTy(), - "Intrinsic has incorrect argument type!"); + Check(ArgTy->isFPOrFPVectorTy() && ArgTy->isVectorTy(), + "Intrinsic has incorrect argument type!"); break; } case Intrinsic::vector_reduce_fadd: @@ -5213,8 +5340,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { // Unlike the other reductions, the first argument is a start value. The // second argument is the vector to be reduced. Type *ArgTy = Call.getArgOperand(1)->getType(); - Assert(ArgTy->isFPOrFPVectorTy() && ArgTy->isVectorTy(), - "Intrinsic has incorrect argument type!"); + Check(ArgTy->isFPOrFPVectorTy() && ArgTy->isVectorTy(), + "Intrinsic has incorrect argument type!"); break; } case Intrinsic::smul_fix: @@ -5227,27 +5354,26 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::udiv_fix_sat: { Value *Op1 = Call.getArgOperand(0); Value *Op2 = Call.getArgOperand(1); - Assert(Op1->getType()->isIntOrIntVectorTy(), - "first operand of [us][mul|div]_fix[_sat] must be an int type or " - "vector of ints"); - Assert(Op2->getType()->isIntOrIntVectorTy(), - "second operand of [us][mul|div]_fix[_sat] must be an int type or " - "vector of ints"); + Check(Op1->getType()->isIntOrIntVectorTy(), + "first operand of [us][mul|div]_fix[_sat] must be an int type or " + "vector of ints"); + Check(Op2->getType()->isIntOrIntVectorTy(), + "second operand of [us][mul|div]_fix[_sat] must be an int type or " + "vector of ints"); auto *Op3 = cast(Call.getArgOperand(2)); - Assert(Op3->getType()->getBitWidth() <= 32, - "third argument of [us][mul|div]_fix[_sat] must fit within 32 bits"); + Check(Op3->getType()->getBitWidth() <= 32, + "third argument of [us][mul|div]_fix[_sat] must fit within 32 bits"); if (ID == Intrinsic::smul_fix || ID == Intrinsic::smul_fix_sat || ID == Intrinsic::sdiv_fix || ID == Intrinsic::sdiv_fix_sat) { - Assert( - Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(), - "the scale of s[mul|div]_fix[_sat] must be less than the width of " - "the operands"); + Check(Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(), + "the scale of s[mul|div]_fix[_sat] must be less than the width of " + "the operands"); } else { - Assert(Op3->getZExtValue() <= Op1->getType()->getScalarSizeInBits(), - "the scale of u[mul|div]_fix[_sat] must be less than or equal " - "to the width of the operands"); + Check(Op3->getZExtValue() <= Op1->getType()->getScalarSizeInBits(), + "the scale of u[mul|div]_fix[_sat] must be less than or equal " + "to the width of the operands"); } break; } @@ -5257,22 +5383,22 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::llrint: { Type *ValTy = Call.getArgOperand(0)->getType(); Type *ResultTy = Call.getType(); - Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(), - "Intrinsic does not support vectors", &Call); + Check(!ValTy->isVectorTy() && !ResultTy->isVectorTy(), + "Intrinsic does not support vectors", &Call); break; } case Intrinsic::bswap: { Type *Ty = Call.getType(); unsigned Size = Ty->getScalarSizeInBits(); - Assert(Size % 16 == 0, "bswap must be an even number of bytes", &Call); + Check(Size % 16 == 0, "bswap must be an even number of bytes", &Call); break; } case Intrinsic::invariant_start: { ConstantInt *InvariantSize = dyn_cast(Call.getArgOperand(0)); - Assert(InvariantSize && - (!InvariantSize->isNegative() || InvariantSize->isMinusOne()), - "invariant_start parameter must be -1, 0 or a positive number", - &Call); + Check(InvariantSize && + (!InvariantSize->isNegative() || InvariantSize->isMinusOne()), + "invariant_start parameter must be -1, 0 or a positive number", + &Call); break; } case Intrinsic::matrix_multiply: @@ -5333,27 +5459,29 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { llvm_unreachable("unexpected intrinsic"); } - Assert(ResultTy->getElementType()->isIntegerTy() || - ResultTy->getElementType()->isFloatingPointTy(), - "Result type must be an integer or floating-point type!", IF); + Check(ResultTy->getElementType()->isIntegerTy() || + ResultTy->getElementType()->isFloatingPointTy(), + "Result type must be an integer or floating-point type!", IF); if (Op0ElemTy) - Assert(ResultTy->getElementType() == Op0ElemTy, - "Vector element type mismatch of the result and first operand " - "vector!", IF); + Check(ResultTy->getElementType() == Op0ElemTy, + "Vector element type mismatch of the result and first operand " + "vector!", + IF); if (Op1ElemTy) - Assert(ResultTy->getElementType() == Op1ElemTy, - "Vector element type mismatch of the result and second operand " - "vector!", IF); + Check(ResultTy->getElementType() == Op1ElemTy, + "Vector element type mismatch of the result and second operand " + "vector!", + IF); - Assert(cast(ResultTy)->getNumElements() == - NumRows->getZExtValue() * NumColumns->getZExtValue(), - "Result of a matrix operation does not fit in the returned vector!"); + Check(cast(ResultTy)->getNumElements() == + NumRows->getZExtValue() * NumColumns->getZExtValue(), + "Result of a matrix operation does not fit in the returned vector!"); if (Stride) - Assert(Stride->getZExtValue() >= NumRows->getZExtValue(), - "Stride must be greater or equal than the number of rows!", IF); + Check(Stride->getZExtValue() >= NumRows->getZExtValue(), + "Stride must be greater or equal than the number of rows!", IF); break; } @@ -5366,25 +5494,25 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { if (Attrs.hasFnAttr(Attribute::VScaleRange)) KnownMinNumElements *= Attrs.getFnAttrs().getVScaleRangeMin(); } - Assert((Idx < 0 && std::abs(Idx) <= KnownMinNumElements) || - (Idx >= 0 && Idx < KnownMinNumElements), - "The splice index exceeds the range [-VL, VL-1] where VL is the " - "known minimum number of elements in the vector. For scalable " - "vectors the minimum number of elements is determined from " - "vscale_range.", - &Call); + Check((Idx < 0 && std::abs(Idx) <= KnownMinNumElements) || + (Idx >= 0 && Idx < KnownMinNumElements), + "The splice index exceeds the range [-VL, VL-1] where VL is the " + "known minimum number of elements in the vector. For scalable " + "vectors the minimum number of elements is determined from " + "vscale_range.", + &Call); break; } case Intrinsic::experimental_stepvector: { VectorType *VecTy = dyn_cast(Call.getType()); - Assert(VecTy && VecTy->getScalarType()->isIntegerTy() && - VecTy->getScalarSizeInBits() >= 8, - "experimental_stepvector only supported for vectors of integers " - "with a bitwidth of at least 8.", - &Call); + Check(VecTy && VecTy->getScalarType()->isIntegerTy() && + VecTy->getScalarSizeInBits() >= 8, + "experimental_stepvector only supported for vectors of integers " + "with a bitwidth of at least 8.", + &Call); break; } - case Intrinsic::experimental_vector_insert: { + case Intrinsic::vector_insert: { Value *Vec = Call.getArgOperand(0); Value *SubVec = Call.getArgOperand(1); Value *Idx = Call.getArgOperand(2); @@ -5395,27 +5523,26 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { ElementCount VecEC = VecTy->getElementCount(); ElementCount SubVecEC = SubVecTy->getElementCount(); - Assert(VecTy->getElementType() == SubVecTy->getElementType(), - "experimental_vector_insert parameters must have the same element " - "type.", - &Call); - Assert(IdxN % SubVecEC.getKnownMinValue() == 0, - "experimental_vector_insert index must be a constant multiple of " - "the subvector's known minimum vector length."); + Check(VecTy->getElementType() == SubVecTy->getElementType(), + "vector_insert parameters must have the same element " + "type.", + &Call); + Check(IdxN % SubVecEC.getKnownMinValue() == 0, + "vector_insert index must be a constant multiple of " + "the subvector's known minimum vector length."); // If this insertion is not the 'mixed' case where a fixed vector is // inserted into a scalable vector, ensure that the insertion of the // subvector does not overrun the parent vector. if (VecEC.isScalable() == SubVecEC.isScalable()) { - Assert( - IdxN < VecEC.getKnownMinValue() && - IdxN + SubVecEC.getKnownMinValue() <= VecEC.getKnownMinValue(), - "subvector operand of experimental_vector_insert would overrun the " - "vector being inserted into."); + Check(IdxN < VecEC.getKnownMinValue() && + IdxN + SubVecEC.getKnownMinValue() <= VecEC.getKnownMinValue(), + "subvector operand of vector_insert would overrun the " + "vector being inserted into."); } break; } - case Intrinsic::experimental_vector_extract: { + case Intrinsic::vector_extract: { Value *Vec = Call.getArgOperand(0); Value *Idx = Call.getArgOperand(1); unsigned IdxN = cast(Idx)->getZExtValue(); @@ -5426,21 +5553,21 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { ElementCount VecEC = VecTy->getElementCount(); ElementCount ResultEC = ResultTy->getElementCount(); - Assert(ResultTy->getElementType() == VecTy->getElementType(), - "experimental_vector_extract result must have the same element " - "type as the input vector.", - &Call); - Assert(IdxN % ResultEC.getKnownMinValue() == 0, - "experimental_vector_extract index must be a constant multiple of " - "the result type's known minimum vector length."); + Check(ResultTy->getElementType() == VecTy->getElementType(), + "vector_extract result must have the same element " + "type as the input vector.", + &Call); + Check(IdxN % ResultEC.getKnownMinValue() == 0, + "vector_extract index must be a constant multiple of " + "the result type's known minimum vector length."); // If this extraction is not the 'mixed' case where a fixed vector is is // extracted from a scalable vector, ensure that the extraction does not // overrun the parent vector. if (VecEC.isScalable() == ResultEC.isScalable()) { - Assert(IdxN < VecEC.getKnownMinValue() && - IdxN + ResultEC.getKnownMinValue() <= VecEC.getKnownMinValue(), - "experimental_vector_extract would overrun."); + Check(IdxN < VecEC.getKnownMinValue() && + IdxN + ResultEC.getKnownMinValue() <= VecEC.getKnownMinValue(), + "vector_extract would overrun."); } break; } @@ -5449,11 +5576,24 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { break; } case Intrinsic::preserve_array_access_index: - case Intrinsic::preserve_struct_access_index: { - Type *ElemTy = Call.getAttributes().getParamElementType(0); - Assert(ElemTy, - "Intrinsic requires elementtype attribute on first argument.", - &Call); + case Intrinsic::preserve_struct_access_index: + case Intrinsic::aarch64_ldaxr: + case Intrinsic::aarch64_ldxr: + case Intrinsic::arm_ldaex: + case Intrinsic::arm_ldrex: { + Type *ElemTy = Call.getParamElementType(0); + Check(ElemTy, "Intrinsic requires elementtype attribute on first argument.", + &Call); + break; + } + case Intrinsic::aarch64_stlxr: + case Intrinsic::aarch64_stxr: + case Intrinsic::arm_stlex: + case Intrinsic::arm_strex: { + Type *ElemTy = Call.getAttributes().getParamElementType(1); + Check(ElemTy, + "Intrinsic requires elementtype attribute on second argument.", + &Call); break; } }; @@ -5478,6 +5618,101 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) { return nullptr; } +void Verifier::visitVPIntrinsic(VPIntrinsic &VPI) { + if (auto *VPCast = dyn_cast(&VPI)) { + auto *RetTy = cast(VPCast->getType()); + auto *ValTy = cast(VPCast->getOperand(0)->getType()); + Check(RetTy->getElementCount() == ValTy->getElementCount(), + "VP cast intrinsic first argument and result vector lengths must be " + "equal", + *VPCast); + + switch (VPCast->getIntrinsicID()) { + default: + llvm_unreachable("Unknown VP cast intrinsic"); + case Intrinsic::vp_trunc: + Check(RetTy->isIntOrIntVectorTy() && ValTy->isIntOrIntVectorTy(), + "llvm.vp.trunc intrinsic first argument and result element type " + "must be integer", + *VPCast); + Check(RetTy->getScalarSizeInBits() < ValTy->getScalarSizeInBits(), + "llvm.vp.trunc intrinsic the bit size of first argument must be " + "larger than the bit size of the return type", + *VPCast); + break; + case Intrinsic::vp_zext: + case Intrinsic::vp_sext: + Check(RetTy->isIntOrIntVectorTy() && ValTy->isIntOrIntVectorTy(), + "llvm.vp.zext or llvm.vp.sext intrinsic first argument and result " + "element type must be integer", + *VPCast); + Check(RetTy->getScalarSizeInBits() > ValTy->getScalarSizeInBits(), + "llvm.vp.zext or llvm.vp.sext intrinsic the bit size of first " + "argument must be smaller than the bit size of the return type", + *VPCast); + break; + case Intrinsic::vp_fptoui: + case Intrinsic::vp_fptosi: + Check( + RetTy->isIntOrIntVectorTy() && ValTy->isFPOrFPVectorTy(), + "llvm.vp.fptoui or llvm.vp.fptosi intrinsic first argument element " + "type must be floating-point and result element type must be integer", + *VPCast); + break; + case Intrinsic::vp_uitofp: + case Intrinsic::vp_sitofp: + Check( + RetTy->isFPOrFPVectorTy() && ValTy->isIntOrIntVectorTy(), + "llvm.vp.uitofp or llvm.vp.sitofp intrinsic first argument element " + "type must be integer and result element type must be floating-point", + *VPCast); + break; + case Intrinsic::vp_fptrunc: + Check(RetTy->isFPOrFPVectorTy() && ValTy->isFPOrFPVectorTy(), + "llvm.vp.fptrunc intrinsic first argument and result element type " + "must be floating-point", + *VPCast); + Check(RetTy->getScalarSizeInBits() < ValTy->getScalarSizeInBits(), + "llvm.vp.fptrunc intrinsic the bit size of first argument must be " + "larger than the bit size of the return type", + *VPCast); + break; + case Intrinsic::vp_fpext: + Check(RetTy->isFPOrFPVectorTy() && ValTy->isFPOrFPVectorTy(), + "llvm.vp.fpext intrinsic first argument and result element type " + "must be floating-point", + *VPCast); + Check(RetTy->getScalarSizeInBits() > ValTy->getScalarSizeInBits(), + "llvm.vp.fpext intrinsic the bit size of first argument must be " + "smaller than the bit size of the return type", + *VPCast); + break; + case Intrinsic::vp_ptrtoint: + Check(RetTy->isIntOrIntVectorTy() && ValTy->isPtrOrPtrVectorTy(), + "llvm.vp.ptrtoint intrinsic first argument element type must be " + "pointer and result element type must be integer", + *VPCast); + break; + case Intrinsic::vp_inttoptr: + Check(RetTy->isPtrOrPtrVectorTy() && ValTy->isIntOrIntVectorTy(), + "llvm.vp.inttoptr intrinsic first argument element type must be " + "integer and result element type must be pointer", + *VPCast); + break; + } + } + if (VPI.getIntrinsicID() == Intrinsic::vp_fcmp) { + auto Pred = cast(&VPI)->getPredicate(); + Check(CmpInst::isFPPredicate(Pred), + "invalid predicate for VP FP comparison intrinsic", &VPI); + } + if (VPI.getIntrinsicID() == Intrinsic::vp_icmp) { + auto Pred = cast(&VPI)->getPredicate(); + Check(CmpInst::isIntPredicate(Pred), + "invalid predicate for VP integer comparison intrinsic", &VPI); + } +} + void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { unsigned NumOperands; bool HasRoundingMD; @@ -5495,16 +5730,16 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { // Compare intrinsics carry an extra predicate metadata operand. if (isa(FPI)) NumOperands += 1; - Assert((FPI.arg_size() == NumOperands), - "invalid arguments for constrained FP intrinsic", &FPI); + Check((FPI.arg_size() == NumOperands), + "invalid arguments for constrained FP intrinsic", &FPI); switch (FPI.getIntrinsicID()) { case Intrinsic::experimental_constrained_lrint: case Intrinsic::experimental_constrained_llrint: { Type *ValTy = FPI.getArgOperand(0)->getType(); Type *ResultTy = FPI.getType(); - Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(), - "Intrinsic does not support vectors", &FPI); + Check(!ValTy->isVectorTy() && !ResultTy->isVectorTy(), + "Intrinsic does not support vectors", &FPI); } break; @@ -5512,16 +5747,16 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { case Intrinsic::experimental_constrained_llround: { Type *ValTy = FPI.getArgOperand(0)->getType(); Type *ResultTy = FPI.getType(); - Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(), - "Intrinsic does not support vectors", &FPI); + Check(!ValTy->isVectorTy() && !ResultTy->isVectorTy(), + "Intrinsic does not support vectors", &FPI); break; } case Intrinsic::experimental_constrained_fcmp: case Intrinsic::experimental_constrained_fcmps: { auto Pred = cast(&FPI)->getPredicate(); - Assert(CmpInst::isFPPredicate(Pred), - "invalid predicate for constrained FP comparison intrinsic", &FPI); + Check(CmpInst::isFPPredicate(Pred), + "invalid predicate for constrained FP comparison intrinsic", &FPI); break; } @@ -5529,21 +5764,21 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { case Intrinsic::experimental_constrained_fptoui: { Value *Operand = FPI.getArgOperand(0); uint64_t NumSrcElem = 0; - Assert(Operand->getType()->isFPOrFPVectorTy(), - "Intrinsic first argument must be floating point", &FPI); + Check(Operand->getType()->isFPOrFPVectorTy(), + "Intrinsic first argument must be floating point", &FPI); if (auto *OperandT = dyn_cast(Operand->getType())) { NumSrcElem = cast(OperandT)->getNumElements(); } Operand = &FPI; - Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(), - "Intrinsic first argument and result disagree on vector use", &FPI); - Assert(Operand->getType()->isIntOrIntVectorTy(), - "Intrinsic result must be an integer", &FPI); + Check((NumSrcElem > 0) == Operand->getType()->isVectorTy(), + "Intrinsic first argument and result disagree on vector use", &FPI); + Check(Operand->getType()->isIntOrIntVectorTy(), + "Intrinsic result must be an integer", &FPI); if (auto *OperandT = dyn_cast(Operand->getType())) { - Assert(NumSrcElem == cast(OperandT)->getNumElements(), - "Intrinsic first argument and result vector lengths must be equal", - &FPI); + Check(NumSrcElem == cast(OperandT)->getNumElements(), + "Intrinsic first argument and result vector lengths must be equal", + &FPI); } } break; @@ -5552,21 +5787,21 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { case Intrinsic::experimental_constrained_uitofp: { Value *Operand = FPI.getArgOperand(0); uint64_t NumSrcElem = 0; - Assert(Operand->getType()->isIntOrIntVectorTy(), - "Intrinsic first argument must be integer", &FPI); + Check(Operand->getType()->isIntOrIntVectorTy(), + "Intrinsic first argument must be integer", &FPI); if (auto *OperandT = dyn_cast(Operand->getType())) { NumSrcElem = cast(OperandT)->getNumElements(); } Operand = &FPI; - Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(), - "Intrinsic first argument and result disagree on vector use", &FPI); - Assert(Operand->getType()->isFPOrFPVectorTy(), - "Intrinsic result must be a floating point", &FPI); + Check((NumSrcElem > 0) == Operand->getType()->isVectorTy(), + "Intrinsic first argument and result disagree on vector use", &FPI); + Check(Operand->getType()->isFPOrFPVectorTy(), + "Intrinsic result must be a floating point", &FPI); if (auto *OperandT = dyn_cast(Operand->getType())) { - Assert(NumSrcElem == cast(OperandT)->getNumElements(), - "Intrinsic first argument and result vector lengths must be equal", - &FPI); + Check(NumSrcElem == cast(OperandT)->getNumElements(), + "Intrinsic first argument and result vector lengths must be equal", + &FPI); } } break; @@ -5576,26 +5811,26 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { Type *OperandTy = Operand->getType(); Value *Result = &FPI; Type *ResultTy = Result->getType(); - Assert(OperandTy->isFPOrFPVectorTy(), - "Intrinsic first argument must be FP or FP vector", &FPI); - Assert(ResultTy->isFPOrFPVectorTy(), - "Intrinsic result must be FP or FP vector", &FPI); - Assert(OperandTy->isVectorTy() == ResultTy->isVectorTy(), - "Intrinsic first argument and result disagree on vector use", &FPI); + Check(OperandTy->isFPOrFPVectorTy(), + "Intrinsic first argument must be FP or FP vector", &FPI); + Check(ResultTy->isFPOrFPVectorTy(), + "Intrinsic result must be FP or FP vector", &FPI); + Check(OperandTy->isVectorTy() == ResultTy->isVectorTy(), + "Intrinsic first argument and result disagree on vector use", &FPI); if (OperandTy->isVectorTy()) { - Assert(cast(OperandTy)->getNumElements() == - cast(ResultTy)->getNumElements(), - "Intrinsic first argument and result vector lengths must be equal", - &FPI); + Check(cast(OperandTy)->getNumElements() == + cast(ResultTy)->getNumElements(), + "Intrinsic first argument and result vector lengths must be equal", + &FPI); } if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) { - Assert(OperandTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits(), - "Intrinsic first argument's type must be larger than result type", - &FPI); + Check(OperandTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits(), + "Intrinsic first argument's type must be larger than result type", + &FPI); } else { - Assert(OperandTy->getScalarSizeInBits() < ResultTy->getScalarSizeInBits(), - "Intrinsic first argument's type must be smaller than result type", - &FPI); + Check(OperandTy->getScalarSizeInBits() < ResultTy->getScalarSizeInBits(), + "Intrinsic first argument's type must be smaller than result type", + &FPI); } } break; @@ -5609,25 +5844,25 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { // match the specification in the intrinsic call table. Thus, no // argument type check is needed here. - Assert(FPI.getExceptionBehavior().hasValue(), - "invalid exception behavior argument", &FPI); + Check(FPI.getExceptionBehavior().has_value(), + "invalid exception behavior argument", &FPI); if (HasRoundingMD) { - Assert(FPI.getRoundingMode().hasValue(), - "invalid rounding mode argument", &FPI); + Check(FPI.getRoundingMode().has_value(), "invalid rounding mode argument", + &FPI); } } void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) { auto *MD = DII.getRawLocation(); - AssertDI(isa(MD) || isa(MD) || - (isa(MD) && !cast(MD)->getNumOperands()), - "invalid llvm.dbg." + Kind + " intrinsic address/value", &DII, MD); - AssertDI(isa(DII.getRawVariable()), - "invalid llvm.dbg." + Kind + " intrinsic variable", &DII, - DII.getRawVariable()); - AssertDI(isa(DII.getRawExpression()), - "invalid llvm.dbg." + Kind + " intrinsic expression", &DII, - DII.getRawExpression()); + CheckDI(isa(MD) || isa(MD) || + (isa(MD) && !cast(MD)->getNumOperands()), + "invalid llvm.dbg." + Kind + " intrinsic address/value", &DII, MD); + CheckDI(isa(DII.getRawVariable()), + "invalid llvm.dbg." + Kind + " intrinsic variable", &DII, + DII.getRawVariable()); + CheckDI(isa(DII.getRawExpression()), + "invalid llvm.dbg." + Kind + " intrinsic expression", &DII, + DII.getRawExpression()); // Ignore broken !dbg attachments; they're checked elsewhere. if (MDNode *N = DII.getDebugLoc().getAsMDNode()) @@ -5640,29 +5875,30 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) { // The scopes for variables and !dbg attachments must agree. DILocalVariable *Var = DII.getVariable(); DILocation *Loc = DII.getDebugLoc(); - AssertDI(Loc, "llvm.dbg." + Kind + " intrinsic requires a !dbg attachment", - &DII, BB, F); + CheckDI(Loc, "llvm.dbg." + Kind + " intrinsic requires a !dbg attachment", + &DII, BB, F); DISubprogram *VarSP = getSubprogram(Var->getRawScope()); DISubprogram *LocSP = getSubprogram(Loc->getRawScope()); if (!VarSP || !LocSP) return; // Broken scope chains are checked elsewhere. - AssertDI(VarSP == LocSP, "mismatched subprogram between llvm.dbg." + Kind + - " variable and !dbg attachment", - &DII, BB, F, Var, Var->getScope()->getSubprogram(), Loc, - Loc->getScope()->getSubprogram()); + CheckDI(VarSP == LocSP, + "mismatched subprogram between llvm.dbg." + Kind + + " variable and !dbg attachment", + &DII, BB, F, Var, Var->getScope()->getSubprogram(), Loc, + Loc->getScope()->getSubprogram()); // This check is redundant with one in visitLocalVariable(). - AssertDI(isType(Var->getRawType()), "invalid type ref", Var, - Var->getRawType()); + CheckDI(isType(Var->getRawType()), "invalid type ref", Var, + Var->getRawType()); verifyFnArgs(DII); } void Verifier::visitDbgLabelIntrinsic(StringRef Kind, DbgLabelInst &DLI) { - AssertDI(isa(DLI.getRawLabel()), - "invalid llvm.dbg." + Kind + " intrinsic variable", &DLI, - DLI.getRawLabel()); + CheckDI(isa(DLI.getRawLabel()), + "invalid llvm.dbg." + Kind + " intrinsic variable", &DLI, + DLI.getRawLabel()); // Ignore broken !dbg attachments; they're checked elsewhere. if (MDNode *N = DLI.getDebugLoc().getAsMDNode()) @@ -5675,18 +5911,19 @@ void Verifier::visitDbgLabelIntrinsic(StringRef Kind, DbgLabelInst &DLI) { // The scopes for variables and !dbg attachments must agree. DILabel *Label = DLI.getLabel(); DILocation *Loc = DLI.getDebugLoc(); - Assert(Loc, "llvm.dbg." + Kind + " intrinsic requires a !dbg attachment", - &DLI, BB, F); + Check(Loc, "llvm.dbg." + Kind + " intrinsic requires a !dbg attachment", &DLI, + BB, F); DISubprogram *LabelSP = getSubprogram(Label->getRawScope()); DISubprogram *LocSP = getSubprogram(Loc->getRawScope()); if (!LabelSP || !LocSP) return; - AssertDI(LabelSP == LocSP, "mismatched subprogram between llvm.dbg." + Kind + - " label and !dbg attachment", - &DLI, BB, F, Label, Label->getScope()->getSubprogram(), Loc, - Loc->getScope()->getSubprogram()); + CheckDI(LabelSP == LocSP, + "mismatched subprogram between llvm.dbg." + Kind + + " label and !dbg attachment", + &DLI, BB, F, Label, Label->getScope()->getSubprogram(), Loc, + Loc->getScope()->getSubprogram()); } void Verifier::verifyFragmentExpression(const DbgVariableIntrinsic &I) { @@ -5726,9 +5963,9 @@ void Verifier::verifyFragmentExpression(const DIVariable &V, unsigned FragSize = Fragment.SizeInBits; unsigned FragOffset = Fragment.OffsetInBits; - AssertDI(FragSize + FragOffset <= *VarSize, - "fragment is larger than or outside of variable", Desc, &V); - AssertDI(FragSize != *VarSize, "fragment covers entire variable", Desc, &V); + CheckDI(FragSize + FragOffset <= *VarSize, + "fragment is larger than or outside of variable", Desc, &V); + CheckDI(FragSize != *VarSize, "fragment covers entire variable", Desc, &V); } void Verifier::verifyFnArgs(const DbgVariableIntrinsic &I) { @@ -5743,7 +5980,7 @@ void Verifier::verifyFnArgs(const DbgVariableIntrinsic &I) { return; DILocalVariable *Var = I.getVariable(); - AssertDI(Var, "dbg intrinsic without variable"); + CheckDI(Var, "dbg intrinsic without variable"); unsigned ArgNo = Var->getArg(); if (!ArgNo) @@ -5756,8 +5993,8 @@ void Verifier::verifyFnArgs(const DbgVariableIntrinsic &I) { auto *Prev = DebugFnArgs[ArgNo - 1]; DebugFnArgs[ArgNo - 1] = Var; - AssertDI(!Prev || (Prev == Var), "conflicting debug info for argument", &I, - Prev, Var); + CheckDI(!Prev || (Prev == Var), "conflicting debug info for argument", &I, + Prev, Var); } void Verifier::verifyNotEntryValue(const DbgVariableIntrinsic &I) { @@ -5767,7 +6004,7 @@ void Verifier::verifyNotEntryValue(const DbgVariableIntrinsic &I) { if (!E || !E->isValid()) return; - AssertDI(!E->isEntryValue(), "Entry values are only allowed in MIR", &I); + CheckDI(!E->isEntryValue(), "Entry values are only allowed in MIR", &I); } void Verifier::verifyCompileUnits() { @@ -5781,7 +6018,7 @@ void Verifier::verifyCompileUnits() { if (CUs) Listed.insert(CUs->op_begin(), CUs->op_end()); for (auto *CU : CUVisited) - AssertDI(Listed.count(CU), "DICompileUnit not listed in llvm.dbg.cu", CU); + CheckDI(Listed.count(CU), "DICompileUnit not listed in llvm.dbg.cu", CU); CUVisited.clear(); } @@ -5791,10 +6028,10 @@ void Verifier::verifyDeoptimizeCallingConvs() { const Function *First = DeoptimizeDeclarations[0]; for (auto *F : makeArrayRef(DeoptimizeDeclarations).slice(1)) { - Assert(First->getCallingConv() == F->getCallingConv(), - "All llvm.experimental.deoptimize declarations must have the same " - "calling convention", - First, F); + Check(First->getCallingConv() == F->getCallingConv(), + "All llvm.experimental.deoptimize declarations must have the same " + "calling convention", + First, F); } } @@ -5802,39 +6039,39 @@ void Verifier::verifyAttachedCallBundle(const CallBase &Call, const OperandBundleUse &BU) { FunctionType *FTy = Call.getFunctionType(); - Assert((FTy->getReturnType()->isPointerTy() || - (Call.doesNotReturn() && FTy->getReturnType()->isVoidTy())), - "a call with operand bundle \"clang.arc.attachedcall\" must call a " - "function returning a pointer or a non-returning function that has a " - "void return type", - Call); + Check((FTy->getReturnType()->isPointerTy() || + (Call.doesNotReturn() && FTy->getReturnType()->isVoidTy())), + "a call with operand bundle \"clang.arc.attachedcall\" must call a " + "function returning a pointer or a non-returning function that has a " + "void return type", + Call); - Assert(BU.Inputs.size() == 1 && isa(BU.Inputs.front()), - "operand bundle \"clang.arc.attachedcall\" requires one function as " - "an argument", - Call); + Check(BU.Inputs.size() == 1 && isa(BU.Inputs.front()), + "operand bundle \"clang.arc.attachedcall\" requires one function as " + "an argument", + Call); auto *Fn = cast(BU.Inputs.front()); Intrinsic::ID IID = Fn->getIntrinsicID(); if (IID) { - Assert((IID == Intrinsic::objc_retainAutoreleasedReturnValue || - IID == Intrinsic::objc_unsafeClaimAutoreleasedReturnValue), - "invalid function argument", Call); + Check((IID == Intrinsic::objc_retainAutoreleasedReturnValue || + IID == Intrinsic::objc_unsafeClaimAutoreleasedReturnValue), + "invalid function argument", Call); } else { StringRef FnName = Fn->getName(); - Assert((FnName == "objc_retainAutoreleasedReturnValue" || - FnName == "objc_unsafeClaimAutoreleasedReturnValue"), - "invalid function argument", Call); + Check((FnName == "objc_retainAutoreleasedReturnValue" || + FnName == "objc_unsafeClaimAutoreleasedReturnValue"), + "invalid function argument", Call); } } void Verifier::verifySourceDebugInfo(const DICompileUnit &U, const DIFile &F) { - bool HasSource = F.getSource().hasValue(); + bool HasSource = F.getSource().has_value(); if (!HasSourceDebugInfo.count(&U)) HasSourceDebugInfo[&U] = HasSource; - AssertDI(HasSource == HasSourceDebugInfo[&U], - "inconsistent use of embedded source"); + CheckDI(HasSource == HasSourceDebugInfo[&U], + "inconsistent use of embedded source"); } void Verifier::verifyNoAliasScopeDecl() { @@ -5847,16 +6084,15 @@ void Verifier::verifyNoAliasScopeDecl() { "Not a llvm.experimental.noalias.scope.decl ?"); const auto *ScopeListMV = dyn_cast( II->getOperand(Intrinsic::NoAliasScopeDeclScopeArg)); - Assert(ScopeListMV != nullptr, - "llvm.experimental.noalias.scope.decl must have a MetadataAsValue " - "argument", - II); + Check(ScopeListMV != nullptr, + "llvm.experimental.noalias.scope.decl must have a MetadataAsValue " + "argument", + II); const auto *ScopeListMD = dyn_cast(ScopeListMV->getMetadata()); - Assert(ScopeListMD != nullptr, "!id.scope.list must point to an MDNode", - II); - Assert(ScopeListMD->getNumOperands() == 1, - "!id.scope.list must point to a list with a single scope", II); + Check(ScopeListMD != nullptr, "!id.scope.list must point to an MDNode", II); + Check(ScopeListMD->getNumOperands() == 1, + "!id.scope.list must point to a list with a single scope", II); visitAliasScopeListMetadata(ScopeListMD); } @@ -5899,10 +6135,10 @@ void Verifier::verifyNoAliasScopeDecl() { for (auto *I : llvm::make_range(ItCurrent, ItNext)) for (auto *J : llvm::make_range(ItCurrent, ItNext)) if (I != J) - Assert(!DT.dominates(I, J), - "llvm.experimental.noalias.scope.decl dominates another one " - "with the same scope", - I); + Check(!DT.dominates(I, J), + "llvm.experimental.noalias.scope.decl dominates another one " + "with the same scope", + I); ItCurrent = ItNext; } } @@ -5995,7 +6231,7 @@ template void TBAAVerifier::CheckFailed(Tys &&... Args) { return Diagnostic->CheckFailed(Args...); } -#define AssertTBAA(C, ...) \ +#define CheckTBAA(C, ...) \ do { \ if (!(C)) { \ CheckFailed(__VA_ARGS__); \ @@ -6185,7 +6421,7 @@ MDNode *TBAAVerifier::getFieldNodeFromTBAABaseNode(Instruction &I, // Scalar nodes have only one possible "field" -- their parent in the access // hierarchy. Offset must be zero at this point, but our caller is supposed - // to Assert that. + // to check that. if (BaseNode->getNumOperands() == 2) return cast(BaseNode->getOperand(1)); @@ -6227,17 +6463,17 @@ static bool isNewFormatTBAATypeNode(llvm::MDNode *Type) { } bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) { - AssertTBAA(isa(I) || isa(I) || isa(I) || - isa(I) || isa(I) || - isa(I), - "This instruction shall not have a TBAA access tag!", &I); + CheckTBAA(isa(I) || isa(I) || isa(I) || + isa(I) || isa(I) || + isa(I), + "This instruction shall not have a TBAA access tag!", &I); bool IsStructPathTBAA = isa(MD->getOperand(0)) && MD->getNumOperands() >= 3; - AssertTBAA( - IsStructPathTBAA, - "Old-style TBAA is no longer allowed, use struct-path TBAA instead", &I); + CheckTBAA(IsStructPathTBAA, + "Old-style TBAA is no longer allowed, use struct-path TBAA instead", + &I); MDNode *BaseNode = dyn_cast_or_null(MD->getOperand(0)); MDNode *AccessType = dyn_cast_or_null(MD->getOperand(1)); @@ -6245,18 +6481,18 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) { bool IsNewFormat = isNewFormatTBAATypeNode(AccessType); if (IsNewFormat) { - AssertTBAA(MD->getNumOperands() == 4 || MD->getNumOperands() == 5, - "Access tag metadata must have either 4 or 5 operands", &I, MD); + CheckTBAA(MD->getNumOperands() == 4 || MD->getNumOperands() == 5, + "Access tag metadata must have either 4 or 5 operands", &I, MD); } else { - AssertTBAA(MD->getNumOperands() < 5, - "Struct tag metadata must have either 3 or 4 operands", &I, MD); + CheckTBAA(MD->getNumOperands() < 5, + "Struct tag metadata must have either 3 or 4 operands", &I, MD); } // Check the access size field. if (IsNewFormat) { auto *AccessSizeNode = mdconst::dyn_extract_or_null( MD->getOperand(3)); - AssertTBAA(AccessSizeNode, "Access size field must be a constant", &I, MD); + CheckTBAA(AccessSizeNode, "Access size field must be a constant", &I, MD); } // Check the immutability flag. @@ -6264,28 +6500,28 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) { if (MD->getNumOperands() == ImmutabilityFlagOpNo + 1) { auto *IsImmutableCI = mdconst::dyn_extract_or_null( MD->getOperand(ImmutabilityFlagOpNo)); - AssertTBAA(IsImmutableCI, - "Immutability tag on struct tag metadata must be a constant", - &I, MD); - AssertTBAA( + CheckTBAA(IsImmutableCI, + "Immutability tag on struct tag metadata must be a constant", &I, + MD); + CheckTBAA( IsImmutableCI->isZero() || IsImmutableCI->isOne(), "Immutability part of the struct tag metadata must be either 0 or 1", &I, MD); } - AssertTBAA(BaseNode && AccessType, - "Malformed struct tag metadata: base and access-type " - "should be non-null and point to Metadata nodes", - &I, MD, BaseNode, AccessType); + CheckTBAA(BaseNode && AccessType, + "Malformed struct tag metadata: base and access-type " + "should be non-null and point to Metadata nodes", + &I, MD, BaseNode, AccessType); if (!IsNewFormat) { - AssertTBAA(isValidScalarTBAANode(AccessType), - "Access type node must be a valid scalar type", &I, MD, - AccessType); + CheckTBAA(isValidScalarTBAANode(AccessType), + "Access type node must be a valid scalar type", &I, MD, + AccessType); } auto *OffsetCI = mdconst::dyn_extract_or_null(MD->getOperand(2)); - AssertTBAA(OffsetCI, "Offset must be constant integer", &I, MD); + CheckTBAA(OffsetCI, "Offset must be constant integer", &I, MD); APInt Offset = OffsetCI->getValue(); bool SeenAccessTypeInPath = false; @@ -6313,21 +6549,21 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) { SeenAccessTypeInPath |= BaseNode == AccessType; if (isValidScalarTBAANode(BaseNode) || BaseNode == AccessType) - AssertTBAA(Offset == 0, "Offset not zero at the point of scalar access", - &I, MD, &Offset); + CheckTBAA(Offset == 0, "Offset not zero at the point of scalar access", + &I, MD, &Offset); - AssertTBAA(BaseNodeBitWidth == Offset.getBitWidth() || - (BaseNodeBitWidth == 0 && Offset == 0) || - (IsNewFormat && BaseNodeBitWidth == ~0u), - "Access bit-width not the same as description bit-width", &I, MD, - BaseNodeBitWidth, Offset.getBitWidth()); + CheckTBAA(BaseNodeBitWidth == Offset.getBitWidth() || + (BaseNodeBitWidth == 0 && Offset == 0) || + (IsNewFormat && BaseNodeBitWidth == ~0u), + "Access bit-width not the same as description bit-width", &I, MD, + BaseNodeBitWidth, Offset.getBitWidth()); if (IsNewFormat && SeenAccessTypeInPath) break; } - AssertTBAA(SeenAccessTypeInPath, "Did not see access type in access path!", - &I, MD); + CheckTBAA(SeenAccessTypeInPath, "Did not see access type in access path!", &I, + MD); return true; } diff --git a/llvm/lib/InterfaceStub/ELFObjHandler.cpp b/llvm/lib/InterfaceStub/ELFObjHandler.cpp index cb72f57f7bde..13801cd2cbc0 100644 --- a/llvm/lib/InterfaceStub/ELFObjHandler.cpp +++ b/llvm/lib/InterfaceStub/ELFObjHandler.cpp @@ -17,7 +17,6 @@ #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Process.h" using llvm::object::ELFObjectFile; @@ -195,7 +194,7 @@ public: for (const std::string &Lib : Stub.NeededLibs) DynStr.Content.add(Lib); if (Stub.SoName) - DynStr.Content.add(Stub.SoName.getValue()); + DynStr.Content.add(*Stub.SoName); std::vector *> Sections = {&DynSym, &DynStr, &DynTab, &ShStrTab}; @@ -218,7 +217,8 @@ public: // time as long as it is not SHN_UNDEF. Set shndx to 1, which // points to ".dynsym". uint16_t Shndx = Sym.Undefined ? SHN_UNDEF : 1; - DynSym.Content.add(DynStr.Content.getOffset(Sym.Name), Sym.Size, Bind, + uint64_t Size = Sym.Size.value_or(0); + DynSym.Content.add(DynStr.Content.getOffset(Sym.Name), Size, Bind, convertIFSSymbolTypeToELF(Sym.Type), 0, Shndx); } DynSym.Size = DynSym.Content.getSize(); @@ -226,11 +226,12 @@ public: // Poplulate dynamic table. size_t DynSymIndex = DynTab.Content.addAddr(DT_SYMTAB, 0); size_t DynStrIndex = DynTab.Content.addAddr(DT_STRTAB, 0); + DynTab.Content.addValue(DT_STRSZ, DynSym.Size); for (const std::string &Lib : Stub.NeededLibs) DynTab.Content.addValue(DT_NEEDED, DynStr.Content.getOffset(Lib)); if (Stub.SoName) DynTab.Content.addValue(DT_SONAME, - DynStr.Content.getOffset(Stub.SoName.getValue())); + DynStr.Content.getOffset(*Stub.SoName)); DynTab.Size = DynTab.Content.getSize(); // Calculate sections' addresses and offsets. uint64_t CurrentOffset = sizeof(Elf_Ehdr); @@ -249,8 +250,7 @@ public: fillStrTabShdr(ShStrTab); // Finish initializing the ELF header. - initELFHeader(ElfHeader, - static_cast(Stub.Target.Arch.getValue())); + initELFHeader(ElfHeader, static_cast(*Stub.Target.Arch)); ElfHeader.e_shstrndx = ShStrTab.Index; ElfHeader.e_shnum = LastSection->Index + 1; ElfHeader.e_shoff = @@ -334,6 +334,89 @@ private: write(Data + shdrOffset(Sec), Sec.Shdr); } }; + +/// This function takes an error, and appends a string of text to the end of +/// that error. Since "appending" to an Error isn't supported behavior of an +/// Error, this function technically creates a new error with the combined +/// message and consumes the old error. +/// +/// @param Err Source error. +/// @param After Text to append at the end of Err's error message. +Error appendToError(Error Err, StringRef After) { + std::string Message; + raw_string_ostream Stream(Message); + Stream << Err; + Stream << " " << After; + consumeError(std::move(Err)); + return createError(Stream.str()); +} + +template class DynSym { + using Elf_Shdr_Range = typename ELFT::ShdrRange; + using Elf_Shdr = typename ELFT::Shdr; + +public: + static Expected create(const ELFFile &ElfFile, + const DynamicEntries &DynEnt) { + Expected Shdrs = ElfFile.sections(); + if (!Shdrs) + return Shdrs.takeError(); + return DynSym(ElfFile, DynEnt, *Shdrs); + } + + Expected getDynSym() { + if (DynSymHdr) + return ElfFile.base() + DynSymHdr->sh_offset; + return getDynamicData(DynEnt.DynSymAddr, "dynamic symbol table"); + } + + Expected getDynStr() { + if (DynSymHdr) + return ElfFile.getStringTableForSymtab(*DynSymHdr, Shdrs); + Expected DataOrErr = getDynamicData( + DynEnt.StrTabAddr, "dynamic string table", DynEnt.StrSize); + if (!DataOrErr) + return DataOrErr.takeError(); + return StringRef(reinterpret_cast(*DataOrErr), + DynEnt.StrSize); + } + +private: + DynSym(const ELFFile &ElfFile, const DynamicEntries &DynEnt, + Elf_Shdr_Range Shdrs) + : ElfFile(ElfFile), DynEnt(DynEnt), Shdrs(Shdrs), + DynSymHdr(findDynSymHdr()) {} + + const Elf_Shdr *findDynSymHdr() { + for (const Elf_Shdr &Sec : Shdrs) + if (Sec.sh_type == SHT_DYNSYM) { + // If multiple .dynsym are present, use the first one. + // This behavior aligns with llvm::object::ELFFile::getDynSymtabSize() + return &Sec; + } + return nullptr; + } + + Expected getDynamicData(uint64_t EntAddr, StringRef Name, + uint64_t Size = 0) { + Expected SecPtr = ElfFile.toMappedAddr(EntAddr); + if (!SecPtr) + return appendToError( + SecPtr.takeError(), + ("when locating " + Name + " section contents").str()); + Expected SecEndPtr = ElfFile.toMappedAddr(EntAddr + Size); + if (!SecEndPtr) + return appendToError( + SecEndPtr.takeError(), + ("when locating " + Name + " section contents").str()); + return *SecPtr; + } + + const ELFFile &ElfFile; + const DynamicEntries &DynEnt; + Elf_Shdr_Range Shdrs; + const Elf_Shdr *DynSymHdr; +}; } // end anonymous namespace /// This function behaves similarly to StringRef::substr(), but attempts to @@ -353,22 +436,6 @@ static Expected terminatedSubstr(StringRef Str, size_t Offset) { return Str.substr(Offset, StrLen); } -/// This function takes an error, and appends a string of text to the end of -/// that error. Since "appending" to an Error isn't supported behavior of an -/// Error, this function technically creates a new error with the combined -/// message and consumes the old error. -/// -/// @param Err Source error. -/// @param After Text to append at the end of Err's error message. -Error appendToError(Error Err, StringRef After) { - std::string Message; - raw_string_ostream Stream(Message); - Stream << Err; - Stream << " " << After; - consumeError(std::move(Err)); - return createError(Stream.str()); -} - /// This function populates a DynamicEntries struct using an ELFT::DynRange. /// After populating the struct, the members are validated with /// some basic correctness checks. @@ -425,7 +492,7 @@ static Error populateDynamic(DynamicEntries &Dyn, return createError( "Couldn't locate dynamic symbol table (no DT_SYMTAB entry)"); } - if (Dyn.SONameOffset.hasValue() && *Dyn.SONameOffset >= Dyn.StrSize) { + if (Dyn.SONameOffset && *Dyn.SONameOffset >= Dyn.StrSize) { return createStringError(object_error::parse_failed, "DT_SONAME string offset (0x%016" PRIx64 ") outside of dynamic string table", @@ -507,7 +574,6 @@ template static Expected> buildStub(const ELFObjectFile &ElfObj) { using Elf_Dyn_Range = typename ELFT::DynRange; - using Elf_Phdr_Range = typename ELFT::PhdrRange; using Elf_Sym_Range = typename ELFT::SymRange; using Elf_Sym = typename ELFT::Sym; std::unique_ptr DestStub = std::make_unique(); @@ -518,25 +584,19 @@ buildStub(const ELFObjectFile &ElfObj) { return DynTable.takeError(); } - // Fetch program headers. - Expected PHdrs = ElfFile.program_headers(); - if (!PHdrs) { - return PHdrs.takeError(); - } - // Collect relevant .dynamic entries. DynamicEntries DynEnt; if (Error Err = populateDynamic(DynEnt, *DynTable)) return std::move(Err); + Expected> EDynSym = DynSym::create(ElfFile, DynEnt); + if (!EDynSym) + return EDynSym.takeError(); - // Get pointer to in-memory location of .dynstr section. - Expected DynStrPtr = ElfFile.toMappedAddr(DynEnt.StrTabAddr); - if (!DynStrPtr) - return appendToError(DynStrPtr.takeError(), - "when locating .dynstr section contents"); + Expected EDynStr = EDynSym->getDynStr(); + if (!EDynStr) + return EDynStr.takeError(); - StringRef DynStr(reinterpret_cast(DynStrPtr.get()), - DynEnt.StrSize); + StringRef DynStr = *EDynStr; // Populate Arch from ELF header. DestStub->Target.Arch = static_cast(ElfFile.getHeader().e_machine); @@ -547,7 +607,7 @@ buildStub(const ELFObjectFile &ElfObj) { DestStub->Target.ObjectFormat = "ELF"; // Populate SoName from .dynamic entries and dynamic string table. - if (DynEnt.SONameOffset.hasValue()) { + if (DynEnt.SONameOffset) { Expected NameOrErr = terminatedSubstr(DynStr, *DynEnt.SONameOffset); if (!NameOrErr) { @@ -572,8 +632,7 @@ buildStub(const ELFObjectFile &ElfObj) { return SymCount.takeError(); if (*SymCount > 0) { // Get pointer to in-memory location of .dynsym section. - Expected DynSymPtr = - ElfFile.toMappedAddr(DynEnt.DynSymAddr); + Expected DynSymPtr = EDynSym->getDynSym(); if (!DynSymPtr) return appendToError(DynSymPtr.takeError(), "when locating .dynsym section contents"); diff --git a/llvm/lib/InterfaceStub/IFSHandler.cpp b/llvm/lib/InterfaceStub/IFSHandler.cpp index 4ccbb18ca04a..71189e79360e 100644 --- a/llvm/lib/InterfaceStub/IFSHandler.cpp +++ b/llvm/lib/InterfaceStub/IFSHandler.cpp @@ -7,14 +7,17 @@ //===-----------------------------------------------------------------------===/ #include "llvm/InterfaceStub/IFSHandler.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/InterfaceStub/IFSStub.h" #include "llvm/Support/Error.h" +#include "llvm/Support/GlobPattern.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/YAMLTraits.h" +#include using namespace llvm; using namespace llvm::ifs; @@ -115,11 +118,12 @@ template <> struct MappingTraits { IO.mapRequired("Type", Symbol.Type); // The need for symbol size depends on the symbol type. if (Symbol.Type == IFSSymbolType::NoType) { - IO.mapOptional("Size", Symbol.Size, (uint64_t)0); - } else if (Symbol.Type == IFSSymbolType::Func) { - Symbol.Size = 0; - } else { - IO.mapRequired("Size", Symbol.Size); + // Size is None, so we are reading it in, or it is non 0 so we + // should emit it. + if (!Symbol.Size || *Symbol.Size) + IO.mapOptional("Size", Symbol.Size); + } else if (Symbol.Type != IFSSymbolType::Func) { + IO.mapOptional("Size", Symbol.Size); } IO.mapOptional("Undefined", Symbol.Undefined, false); IO.mapOptional("Weak", Symbol.Weak, false); @@ -189,7 +193,7 @@ Expected> ifs::readIFSFromBuffer(StringRef Buf) { std::make_error_code(std::errc::invalid_argument)); if (Stub->Target.ArchString) { Stub->Target.Arch = - ELF::convertArchNameToEMachine(Stub->Target.ArchString.getValue()); + ELF::convertArchNameToEMachine(*Stub->Target.ArchString); } return std::move(Stub); } @@ -262,7 +266,7 @@ Error ifs::validateIFSTarget(IFSStub &Stub, bool ParseTriple) { ValidationEC); } if (ParseTriple) { - IFSTarget TargetFromTriple = parseTriple(Stub.Target.Triple.getValue()); + IFSTarget TargetFromTriple = parseTriple(*Stub.Target.Triple); Stub.Target.Arch = TargetFromTriple.Arch; Stub.Target.BitWidth = TargetFromTriple.BitWidth; Stub.Target.Endianness = TargetFromTriple.Endianness; @@ -328,12 +332,28 @@ void ifs::stripIFSTarget(IFSStub &Stub, bool StripTriple, bool StripArch, } } -void ifs::stripIFSUndefinedSymbols(IFSStub &Stub) { - for (auto Iter = Stub.Symbols.begin(); Iter != Stub.Symbols.end();) { - if (Iter->Undefined) { - Iter = Stub.Symbols.erase(Iter); - } else { - Iter++; - } +Error ifs::filterIFSSyms(IFSStub &Stub, bool StripUndefined, + const std::vector &Exclude) { + std::function Filter = [](const IFSSymbol &) { + return false; + }; + + if (StripUndefined) { + Filter = [Filter](const IFSSymbol &Sym) { + return Sym.Undefined || Filter(Sym); + }; + } + + for (StringRef Glob : Exclude) { + Expected PatternOrErr = llvm::GlobPattern::create(Glob); + if (!PatternOrErr) + return PatternOrErr.takeError(); + Filter = [Pattern = *PatternOrErr, Filter](const IFSSymbol &Sym) { + return Pattern.match(Sym.Name) || Filter(Sym); + }; } + + llvm::erase_if(Stub.Symbols, Filter); + + return Error::success(); } diff --git a/llvm/lib/InterfaceStub/IFSStub.cpp b/llvm/lib/InterfaceStub/IFSStub.cpp index 1ce7a66869b8..f043f7e9e383 100644 --- a/llvm/lib/InterfaceStub/IFSStub.cpp +++ b/llvm/lib/InterfaceStub/IFSStub.cpp @@ -8,7 +8,7 @@ #include "llvm/InterfaceStub/IFSStub.h" #include "llvm/BinaryFormat/ELF.h" -#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; using namespace llvm::ifs; diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 418aad26fdd6..a9e04ba760ca 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -134,7 +134,6 @@ void llvm::computeLTOCacheKey( AddUnsigned(Conf.CGOptLevel); AddUnsigned(Conf.CGFileType); AddUnsigned(Conf.OptLevel); - AddUnsigned(Conf.UseNewPM); AddUnsigned(Conf.Freestanding); AddString(Conf.OptPipeline); AddString(Conf.AAPipeline); @@ -640,11 +639,11 @@ Error LTO::addModule(InputFile &Input, unsigned ModI, if (!LTOInfo) return LTOInfo.takeError(); - if (EnableSplitLTOUnit.hasValue()) { + if (EnableSplitLTOUnit) { // If only some modules were split, flag this in the index so that // we can skip or error on optimizations that need consistently split // modules (whole program devirt and lower type tests). - if (EnableSplitLTOUnit.getValue() != LTOInfo->EnableSplitLTOUnit) + if (*EnableSplitLTOUnit != LTOInfo->EnableSplitLTOUnit) ThinLTO.CombinedIndex.setPartiallySplitLTOUnits(); } else EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit; @@ -820,9 +819,10 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef Syms, // For now they aren't reported correctly by ModuleSymbolTable. auto &CommonRes = RegularLTO.Commons[std::string(Sym.getIRName())]; CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize()); - MaybeAlign SymAlign(Sym.getCommonAlignment()); - if (SymAlign) - CommonRes.Align = max(*SymAlign, CommonRes.Align); + if (uint32_t SymAlignValue = Sym.getCommonAlignment()) { + const Align SymAlign(SymAlignValue); + CommonRes.Align = std::max(SymAlign, CommonRes.Align.valueOrOne()); + } CommonRes.Prevailing |= Res.Prevailing; } } @@ -885,8 +885,7 @@ Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod, Keep.push_back(GV); } - return RegularLTO.Mover->move(std::move(Mod.M), Keep, - [](GlobalValue &, IRMover::ValueAdder) {}, + return RegularLTO.Mover->move(std::move(Mod.M), Keep, nullptr, /* IsPerformingImport */ false); } @@ -1162,14 +1161,18 @@ protected: const Config &Conf; ModuleSummaryIndex &CombinedIndex; const StringMap &ModuleToDefinedGVSummaries; + lto::IndexWriteCallback OnWrite; + bool ShouldEmitImportsFiles; public: ThinBackendProc(const Config &Conf, ModuleSummaryIndex &CombinedIndex, - const StringMap &ModuleToDefinedGVSummaries) + const StringMap &ModuleToDefinedGVSummaries, + lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles) : Conf(Conf), CombinedIndex(CombinedIndex), - ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries) {} + ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries), + OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles) {} - virtual ~ThinBackendProc() {} + virtual ~ThinBackendProc() = default; virtual Error start( unsigned Task, BitcodeModule BM, const FunctionImporter::ImportMapTy &ImportList, @@ -1178,6 +1181,30 @@ public: MapVector &ModuleMap) = 0; virtual Error wait() = 0; virtual unsigned getThreadCount() = 0; + + // Write sharded indices and (optionally) imports to disk + Error emitFiles(const FunctionImporter::ImportMapTy &ImportList, + llvm::StringRef ModulePath, + const std::string &NewModulePath) { + std::map ModuleToSummariesForIndex; + std::error_code EC; + gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries, + ImportList, ModuleToSummariesForIndex); + + raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC, + sys::fs::OpenFlags::OF_None); + if (EC) + return errorCodeToError(EC); + writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex); + + if (ShouldEmitImportsFiles) { + EC = EmitImportsFiles(ModulePath, NewModulePath + ".imports", + ModuleToSummariesForIndex); + if (EC) + return errorCodeToError(EC); + } + return Error::success(); + } }; namespace { @@ -1191,15 +1218,19 @@ class InProcessThinBackend : public ThinBackendProc { Optional Err; std::mutex ErrMu; + bool ShouldEmitIndexFiles; + public: InProcessThinBackend( const Config &Conf, ModuleSummaryIndex &CombinedIndex, ThreadPoolStrategy ThinLTOParallelism, const StringMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, FileCache Cache) - : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries), + AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite, + bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles) + : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries, + OnWrite, ShouldEmitImportsFiles), BackendThreadPool(ThinLTOParallelism), AddStream(std::move(AddStream)), - Cache(std::move(Cache)) { + Cache(std::move(Cache)), ShouldEmitIndexFiles(ShouldEmitIndexFiles) { for (auto &Name : CombinedIndex.cfiFunctionDefs()) CfiFunctionDefs.insert( GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name))); @@ -1228,6 +1259,11 @@ public: auto ModuleID = BM.getModuleIdentifier(); + if (ShouldEmitIndexFiles) { + if (auto E = emitFiles(ImportList, ModuleID, ModuleID.str())) + return E; + } + if (!Cache || !CombinedIndex.modulePaths().count(ModuleID) || all_of(CombinedIndex.getModuleHash(ModuleID), [](uint32_t V) { return V == 0; })) @@ -1286,6 +1322,9 @@ public: }, BM, std::ref(CombinedIndex), std::ref(ImportList), std::ref(ExportList), std::ref(ResolvedODR), std::ref(DefinedGlobals), std::ref(ModuleMap)); + + if (OnWrite) + OnWrite(std::string(ModulePath)); return Error::success(); } @@ -1303,13 +1342,16 @@ public: }; } // end anonymous namespace -ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism) { +ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism, + lto::IndexWriteCallback OnWrite, + bool ShouldEmitIndexFiles, + bool ShouldEmitImportsFiles) { return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, const StringMap &ModuleToDefinedGVSummaries, AddStreamFn AddStream, FileCache Cache) { return std::make_unique( Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, AddStream, - Cache); + Cache, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles); }; } @@ -1336,9 +1378,7 @@ std::string lto::getThinLTOOutputFile(const std::string &Path, namespace { class WriteIndexesThinBackend : public ThinBackendProc { std::string OldPrefix, NewPrefix; - bool ShouldEmitImportsFiles; raw_fd_ostream *LinkedObjectsFile; - lto::IndexWriteCallback OnWrite; public: WriteIndexesThinBackend( @@ -1346,10 +1386,10 @@ public: const StringMap &ModuleToDefinedGVSummaries, std::string OldPrefix, std::string NewPrefix, bool ShouldEmitImportsFiles, raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite) - : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries), + : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries, + OnWrite, ShouldEmitImportsFiles), OldPrefix(OldPrefix), NewPrefix(NewPrefix), - ShouldEmitImportsFiles(ShouldEmitImportsFiles), - LinkedObjectsFile(LinkedObjectsFile), OnWrite(OnWrite) {} + LinkedObjectsFile(LinkedObjectsFile) {} Error start( unsigned Task, BitcodeModule BM, @@ -1364,23 +1404,8 @@ public: if (LinkedObjectsFile) *LinkedObjectsFile << NewModulePath << '\n'; - std::map ModuleToSummariesForIndex; - gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries, - ImportList, ModuleToSummariesForIndex); - - std::error_code EC; - raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC, - sys::fs::OpenFlags::OF_None); - if (EC) - return errorCodeToError(EC); - writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex); - - if (ShouldEmitImportsFiles) { - EC = EmitImportsFiles(ModulePath, NewModulePath + ".imports", - ModuleToSummariesForIndex); - if (EC) - return errorCodeToError(EC); - } + if (auto E = emitFiles(ImportList, ModulePath, NewModulePath)) + return E; if (OnWrite) OnWrite(std::string(ModulePath)); @@ -1621,9 +1646,8 @@ lto::setupStatsFile(StringRef StatsFilename) { // is to sort them per size so that the largest module get schedule as soon as // possible. This is purely a compile-time optimization. std::vector lto::generateModulesOrdering(ArrayRef R) { - std::vector ModulesOrdering; - ModulesOrdering.resize(R.size()); - std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0); + auto Seq = llvm::seq(0, R.size()); + std::vector ModulesOrdering(Seq.begin(), Seq.end()); llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) { auto LSize = R[LeftIndex]->getBuffer().size(); auto RSize = R[RightIndex]->getBuffer().size(); diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 3877def53c3f..5d50e92ae377 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -18,7 +18,6 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/IR/LLVMRemarkStreamer.h" @@ -41,8 +40,6 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/Transforms/Utils/SplitModule.h" @@ -298,6 +295,8 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, report_fatal_error(Twine("unable to parse pass pipeline description '") + Conf.OptPipeline + "': " + toString(std::move(Err))); } + } else if (Conf.UseDefaultPipeline) { + MPM.addPass(PB.buildPerModuleDefaultPipeline(OL)); } else if (IsThinLTO) { MPM.addPass(PB.buildThinLTODefaultPipeline(OL, ImportSummary)); } else { @@ -310,39 +309,6 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, MPM.run(Mod, MAM); } -static void runOldPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, - bool IsThinLTO, ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) { - legacy::PassManager passes; - passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); - - PassManagerBuilder PMB; - PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())); - if (Conf.Freestanding) - PMB.LibraryInfo->disableAllFunctions(); - PMB.Inliner = createFunctionInliningPass(); - PMB.ExportSummary = ExportSummary; - PMB.ImportSummary = ImportSummary; - // Unconditionally verify input since it is not verified before this - // point and has unknown origin. - PMB.VerifyInput = true; - PMB.VerifyOutput = !Conf.DisableVerify; - PMB.LoopVectorize = true; - PMB.SLPVectorize = true; - PMB.OptLevel = Conf.OptLevel; - PMB.PGOSampleUse = Conf.SampleProfile; - PMB.EnablePGOCSInstrGen = Conf.RunCSIRInstr; - if (!Conf.RunCSIRInstr && !Conf.CSIRProfile.empty()) { - PMB.EnablePGOCSInstrUse = true; - PMB.PGOInstrUse = Conf.CSIRProfile; - } - if (IsThinLTO) - PMB.populateThinLTOPassManager(passes); - else - PMB.populateLTOPassManager(passes); - passes.run(Mod); -} - bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, bool IsThinLTO, ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary, @@ -365,12 +331,8 @@ bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, /*Cmdline*/ CmdArgs); } // FIXME: Plumb the combined index into the new pass manager. - if (Conf.UseNewPM || !Conf.OptPipeline.empty()) { - runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary, - ImportSummary); - } else { - runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary); - } + runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary, + ImportSummary); return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); } diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp index fdc9896aca78..2abf249cbd62 100644 --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -66,11 +66,7 @@ using namespace llvm; const char* LTOCodeGenerator::getVersionString() { -#ifdef LLVM_VERSION_INFO - return PACKAGE_NAME " version " PACKAGE_VERSION ", " LLVM_VERSION_INFO; -#else return PACKAGE_NAME " version " PACKAGE_VERSION; -#endif } namespace llvm { @@ -132,7 +128,7 @@ LTOCodeGenerator::LTOCodeGenerator(LLVMContext &Context) }; } -LTOCodeGenerator::~LTOCodeGenerator() {} +LTOCodeGenerator::~LTOCodeGenerator() = default; void LTOCodeGenerator::setAsmUndefinedRefs(LTOModule *Mod) { for (const StringRef &Undef : Mod->getAsmUndefinedRefs()) diff --git a/llvm/lib/LTO/LTOModule.cpp b/llvm/lib/LTO/LTOModule.cpp index 4cc1b307c553..5ad5e857296d 100644 --- a/llvm/lib/LTO/LTOModule.cpp +++ b/llvm/lib/LTO/LTOModule.cpp @@ -50,7 +50,7 @@ LTOModule::LTOModule(std::unique_ptr M, MemoryBufferRef MBRef, SymTab.addModule(Mod.get()); } -LTOModule::~LTOModule() {} +LTOModule::~LTOModule() = default; /// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM /// bitcode. diff --git a/llvm/lib/LTO/SummaryBasedOptimizations.cpp b/llvm/lib/LTO/SummaryBasedOptimizations.cpp index 9e9d5c84d50d..bd3565771c29 100644 --- a/llvm/lib/LTO/SummaryBasedOptimizations.cpp +++ b/llvm/lib/LTO/SummaryBasedOptimizations.cpp @@ -55,7 +55,7 @@ void llvm::computeSyntheticCounts(ModuleSummaryIndex &Index) { }; auto GetEntryCount = [](ValueInfo V) { if (V.getSummaryList().size()) { - auto S = V.getSummaryList().front().get()->getBaseObject(); + auto S = V.getSummaryList().front()->getBaseObject(); auto *F = cast(S); return F->entryCount(); } else { diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index 37e85b6af6ba..a1041b3c85f5 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -21,16 +21,15 @@ #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/Bitcode/BitcodeWriterPass.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LLVMRemarkStreamer.h" -#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/PassTimingInfo.h" #include "llvm/IR/Verifier.h" @@ -54,11 +53,9 @@ #include "llvm/Support/Threading.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" #include "llvm/Transforms/IPO/Internalize.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" @@ -239,38 +236,7 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index, static void optimizeModule(Module &TheModule, TargetMachine &TM, unsigned OptLevel, bool Freestanding, - ModuleSummaryIndex *Index) { - // Populate the PassManager - PassManagerBuilder PMB; - PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple()); - if (Freestanding) - PMB.LibraryInfo->disableAllFunctions(); - PMB.Inliner = createFunctionInliningPass(); - // FIXME: should get it from the bitcode? - PMB.OptLevel = OptLevel; - PMB.LoopVectorize = true; - PMB.SLPVectorize = true; - // Already did this in verifyLoadedModule(). - PMB.VerifyInput = false; - PMB.VerifyOutput = false; - PMB.ImportSummary = Index; - - legacy::PassManager PM; - - // Add the TTI (required to inform the vectorizer about register size for - // instance) - PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis())); - - // Add optimizations - PMB.populateThinLTOPassManager(PM); - - PM.run(TheModule); -} - -static void optimizeModuleNewPM(Module &TheModule, TargetMachine &TM, - unsigned OptLevel, bool Freestanding, - bool DebugPassManager, - ModuleSummaryIndex *Index) { + bool DebugPassManager, ModuleSummaryIndex *Index) { Optional PGOOpt; LoopAnalysisManager LAM; FunctionAnalysisManager FAM; @@ -485,7 +451,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, const ThinLTOCodeGenerator::CachingOptions &CacheOptions, bool DisableCodeGen, StringRef SaveTempsDir, bool Freestanding, unsigned OptLevel, unsigned count, - bool UseNewPM, bool DebugPassManager) { + bool DebugPassManager) { // "Benchmark"-like optimization: single-source case bool SingleModule = (ModuleMap.size() == 1); @@ -525,11 +491,8 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); } - if (UseNewPM) - optimizeModuleNewPM(TheModule, TM, OptLevel, Freestanding, DebugPassManager, - &Index); - else - optimizeModule(TheModule, TM, OptLevel, Freestanding, &Index); + optimizeModule(TheModule, TM, OptLevel, Freestanding, DebugPassManager, + &Index); saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc"); @@ -953,7 +916,7 @@ void ThinLTOCodeGenerator::optimize(Module &TheModule) { // Optimize now optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding, - nullptr); + DebugPassManager, nullptr); } /// Write out the generated object file, either from CacheEntryPath or from @@ -1216,7 +1179,7 @@ void ThinLTOCodeGenerator::run() { ExportList, GUIDPreservedSymbols, ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions, DisableCodeGen, SaveTempsDir, Freestanding, OptLevel, count, - UseNewPM, DebugPassManager); + DebugPassManager); // Commit to the cache (if enabled) CacheEntry.write(*OutputBuffer); diff --git a/llvm/lib/LineEditor/LineEditor.cpp b/llvm/lib/LineEditor/LineEditor.cpp index 37c4b79f8e29..09ec65a1d9c9 100644 --- a/llvm/lib/LineEditor/LineEditor.cpp +++ b/llvm/lib/LineEditor/LineEditor.cpp @@ -29,8 +29,8 @@ std::string LineEditor::getDefaultHistoryPath(StringRef ProgName) { return std::string(); } -LineEditor::CompleterConcept::~CompleterConcept() {} -LineEditor::ListCompleterConcept::~ListCompleterConcept() {} +LineEditor::CompleterConcept::~CompleterConcept() = default; +LineEditor::ListCompleterConcept::~ListCompleterConcept() = default; std::string LineEditor::ListCompleterConcept::getCommonPrefix( const std::vector &Comps) { diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index b475ea81d107..5a819e2d736c 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -9,19 +9,24 @@ #include "llvm/Linker/IRMover.h" #include "LinkDiagnosticInfo.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GVMaterializer.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/IR/TypeFinder.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Support/Error.h" #include "llvm/Support/Path.h" -#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include using namespace llvm; @@ -381,7 +386,7 @@ class IRLinker { std::unique_ptr SrcM; /// See IRMover::move(). - std::function AddLazyFor; + IRMover::LazyCallback AddLazyFor; TypeMapTy TypeMap; GlobalValueMaterializer GValMaterializer; @@ -524,8 +529,7 @@ public: IRLinker(Module &DstM, MDMapT &SharedMDs, IRMover::IdentifiedStructTypeSet &Set, std::unique_ptr SrcM, ArrayRef ValuesToLink, - std::function AddLazyFor, - bool IsPerformingImport) + IRMover::LazyCallback AddLazyFor, bool IsPerformingImport) : DstM(DstM), SrcM(std::move(SrcM)), AddLazyFor(std::move(AddLazyFor)), TypeMap(Set), GValMaterializer(*this), LValMaterializer(*this), SharedMDs(SharedMDs), IsPerformingImport(IsPerformingImport), @@ -987,10 +991,11 @@ bool IRLinker::shouldLink(GlobalValue *DGV, GlobalValue &SGV) { // Callback to the client to give a chance to lazily add the Global to the // list of value to link. bool LazilyAdded = false; - AddLazyFor(SGV, [this, &LazilyAdded](GlobalValue &GV) { - maybeAdd(&GV); - LazilyAdded = true; - }); + if (AddLazyFor) + AddLazyFor(SGV, [this, &LazilyAdded](GlobalValue &GV) { + maybeAdd(&GV); + LazilyAdded = true; + }); return LazilyAdded; } @@ -1041,7 +1046,7 @@ Expected IRLinker::linkGlobalValueProto(GlobalValue *SGV, if (Function *F = dyn_cast(NewGV)) if (auto Remangled = Intrinsic::remangleIntrinsicFunction(F)) { NewGV->eraseFromParent(); - NewGV = Remangled.getValue(); + NewGV = *Remangled; NeedsRenaming = false; } @@ -1229,8 +1234,15 @@ void IRLinker::linkNamedMDNodes() { continue; // Don't import pseudo probe descriptors here for thinLTO. They will be // emitted by the originating module. - if (IsPerformingImport && NMD.getName() == PseudoProbeDescMetadataName) + if (IsPerformingImport && NMD.getName() == PseudoProbeDescMetadataName) { + if (!DstM.getNamedMetadata(NMD.getName())) + emitWarning("Pseudo-probe ignored: source module '" + + SrcM->getModuleIdentifier() + + "' is compiled with -fpseudo-probe-for-profiling while " + "destination module '" + + DstM.getModuleIdentifier() + "' is not\n"); continue; + } NamedMDNode *DestNMD = DstM.getOrInsertNamedMetadata(NMD.getName()); // Add Src elements into Dest node. for (const MDNode *Op : NMD.operands()) @@ -1245,6 +1257,9 @@ Error IRLinker::linkModuleFlagsMetadata() { if (!SrcModFlags) return Error::success(); + // Check for module flag for updates before do anything. + UpgradeModuleFlags(*SrcM); + // If the destination module doesn't have module flags yet, then just copy // over the source module's flags. NamedMDNode *DstModFlags = DstM.getOrInsertModuleFlagsMetadata(); @@ -1327,11 +1342,15 @@ Error IRLinker::linkModuleFlagsMetadata() { // Diagnose inconsistent merge behavior types. if (SrcBehaviorValue != DstBehaviorValue) { + bool MinAndWarn = (SrcBehaviorValue == Module::Min && + DstBehaviorValue == Module::Warning) || + (DstBehaviorValue == Module::Min && + SrcBehaviorValue == Module::Warning); bool MaxAndWarn = (SrcBehaviorValue == Module::Max && DstBehaviorValue == Module::Warning) || (DstBehaviorValue == Module::Max && SrcBehaviorValue == Module::Warning); - if (!MaxAndWarn) + if (!(MaxAndWarn || MinAndWarn)) return stringErr("linking module flags '" + ID->getString() + "': IDs have conflicting behaviors in '" + SrcM->getModuleIdentifier() + "' and '" + @@ -1360,6 +1379,25 @@ Error IRLinker::linkModuleFlagsMetadata() { emitWarning(Str); } + // Choose the minimum if either source or destination request Min behavior. + if (DstBehaviorValue == Module::Min || SrcBehaviorValue == Module::Min) { + ConstantInt *DstValue = + mdconst::extract(DstOp->getOperand(2)); + ConstantInt *SrcValue = + mdconst::extract(SrcOp->getOperand(2)); + + // The resulting flag should have a Min behavior, and contain the minimum + // value from between the source and destination values. + Metadata *FlagOps[] = { + (DstBehaviorValue != Module::Min ? SrcOp : DstOp)->getOperand(0), ID, + (SrcValue->getZExtValue() < DstValue->getZExtValue() ? SrcOp : DstOp) + ->getOperand(2)}; + MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps); + DstModFlags->setOperand(DstIndex, Flag); + Flags[ID].first = Flag; + continue; + } + // Choose the maximum if either source or destination request Max behavior. if (DstBehaviorValue == Module::Max || SrcBehaviorValue == Module::Max) { ConstantInt *DstValue = @@ -1673,10 +1711,9 @@ IRMover::IRMover(Module &M) : Composite(M) { } } -Error IRMover::move( - std::unique_ptr Src, ArrayRef ValuesToLink, - std::function AddLazyFor, - bool IsPerformingImport) { +Error IRMover::move(std::unique_ptr Src, + ArrayRef ValuesToLink, + LazyCallback AddLazyFor, bool IsPerformingImport) { IRLinker TheIRLinker(Composite, SharedMDs, IdentifiedStructTypes, std::move(Src), ValuesToLink, std::move(AddLazyFor), IsPerformingImport); diff --git a/llvm/lib/Linker/LinkModules.cpp b/llvm/lib/Linker/LinkModules.cpp index f9f51bf17d95..17c3f09a23b7 100644 --- a/llvm/lib/Linker/LinkModules.cpp +++ b/llvm/lib/Linker/LinkModules.cpp @@ -14,7 +14,6 @@ #include "llvm-c/Linker.h" #include "llvm/ADT/SetVector.h" #include "llvm/IR/Comdat.h" -#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -573,11 +572,13 @@ bool ModuleLinker::run() { // FIXME: Propagate Errors through to the caller instead of emitting // diagnostics. bool HasErrors = false; - if (Error E = Mover.move(std::move(SrcM), ValuesToLink.getArrayRef(), - [this](GlobalValue &GV, IRMover::ValueAdder Add) { - addLazyFor(GV, Add); - }, - /* IsPerformingImport */ false)) { + if (Error E = + Mover.move(std::move(SrcM), ValuesToLink.getArrayRef(), + IRMover::LazyCallback( + [this](GlobalValue &GV, IRMover::ValueAdder Add) { + addLazyFor(GV, Add); + }), + /* IsPerformingImport */ false)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message())); HasErrors = true; diff --git a/llvm/lib/MC/ConstantPools.cpp b/llvm/lib/MC/ConstantPools.cpp index d8a08a4bd439..c3ab88b94476 100644 --- a/llvm/lib/MC/ConstantPools.cpp +++ b/llvm/lib/MC/ConstantPools.cpp @@ -39,25 +39,38 @@ void ConstantPool::emitEntries(MCStreamer &Streamer) { const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context, unsigned Size, SMLoc Loc) { const MCConstantExpr *C = dyn_cast(Value); + const MCSymbolRefExpr *S = dyn_cast(Value); // Check if there is existing entry for the same constant. If so, reuse it. - auto Itr = C ? CachedEntries.find(C->getValue()) : CachedEntries.end(); - if (Itr != CachedEntries.end()) - return Itr->second; + if (C) { + auto CItr = CachedConstantEntries.find(C->getValue()); + if (CItr != CachedConstantEntries.end()) + return CItr->second; + } + + // Check if there is existing entry for the same symbol. If so, reuse it. + if (S) { + auto SItr = CachedSymbolEntries.find(&(S->getSymbol())); + if (SItr != CachedSymbolEntries.end()) + return SItr->second; + } MCSymbol *CPEntryLabel = Context.createTempSymbol(); Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size, Loc)); const auto SymRef = MCSymbolRefExpr::create(CPEntryLabel, Context); if (C) - CachedEntries[C->getValue()] = SymRef; + CachedConstantEntries[C->getValue()] = SymRef; + if (S) + CachedSymbolEntries[&(S->getSymbol())] = SymRef; return SymRef; } bool ConstantPool::empty() { return Entries.empty(); } void ConstantPool::clearCache() { - CachedEntries.clear(); + CachedConstantEntries.clear(); + CachedSymbolEntries.clear(); } // @@ -79,7 +92,7 @@ AssemblerConstantPools::getOrCreateConstantPool(MCSection *Section) { static void emitConstantPool(MCStreamer &Streamer, MCSection *Section, ConstantPool &CP) { if (!CP.empty()) { - Streamer.SwitchSection(Section); + Streamer.switchSection(Section); CP.emitEntries(Streamer); } } diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 883735fcc293..eda495693595 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -13,10 +13,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" @@ -28,18 +28,18 @@ #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCFragment.h" -#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Alignment.h" -#include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -47,8 +47,6 @@ #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/StringSaver.h" -#include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -223,8 +221,6 @@ class ELFObjectWriter : public MCObjectWriter { DenseMap Renames; bool SeenGnuAbi = false; - bool EmitAddrsigSection = false; - std::vector AddrsigSyms; bool hasRelocationAddend() const; @@ -264,10 +260,6 @@ public: void markGnuAbi() override { SeenGnuAbi = true; } bool seenGnuAbi() const { return SeenGnuAbi; } - void emitAddrsigSection() override { EmitAddrsigSection = true; } - void addAddrsigSymbol(const MCSymbol *Sym) override { - AddrsigSyms.push_back(Sym); - } friend struct ELFWriter; }; @@ -549,9 +541,27 @@ void ELFWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex, uint64_t Size = 0; const MCExpr *ESize = MSD.Symbol->getSize(); - if (!ESize && Base) + if (!ESize && Base) { + // For expressions like .set y, x+1, if y's size is unset, inherit from x. ESize = Base->getSize(); + // For `.size x, 2; y = x; .size y, 1; z = y; z1 = z; .symver y, y@v1`, z, + // z1, and y@v1's st_size equals y's. However, `Base` is `x` which will give + // us 2. Follow the MCSymbolRefExpr assignment chain, which covers most + // needs. MCBinaryExpr is not handled. + const MCSymbolELF *Sym = &Symbol; + while (Sym->isVariable()) { + if (auto *Expr = + dyn_cast(Sym->getVariableValue(false))) { + Sym = cast(&Expr->getSymbol()); + if (!Sym->getSize()) + continue; + ESize = Sym->getSize(); + } + break; + } + } + if (ESize) { int64_t Res; if (!ESize->evaluateKnownAbsolute(Res, Layout)) @@ -850,13 +860,9 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, auto &MC = Asm.getContext(); const auto &MAI = MC.getAsmInfo(); - // Compressing debug_frame requires handling alignment fragments which is - // more work (possibly generalizing MCAssembler.cpp:writeFragment to allow - // for writing to arbitrary buffers) for little benefit. bool CompressionEnabled = MAI->compressDebugSections() != DebugCompressionType::None; - if (!CompressionEnabled || !SectionName.startswith(".debug_") || - SectionName == ".debug_frame") { + if (!CompressionEnabled || !SectionName.startswith(".debug_")) { Asm.writeSectionData(W.OS, &Section, Layout); return; } @@ -870,13 +876,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, Asm.writeSectionData(VecOS, &Section, Layout); SmallVector CompressedContents; - if (Error E = zlib::compress( - StringRef(UncompressedData.data(), UncompressedData.size()), - CompressedContents)) { - consumeError(std::move(E)); - W.OS << UncompressedData; - return; - } + zlib::compress(StringRef(UncompressedData.data(), UncompressedData.size()), + CompressedContents); bool ZlibStyle = MAI->compressDebugSections() == DebugCompressionType::Z; if (!maybeWriteCompression(UncompressedData.size(), CompressedContents, @@ -1336,6 +1337,7 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm, // can update it. return true; case ELF::STB_GLOBAL: + case ELF::STB_GNU_UNIQUE: // Global ELF symbols can be preempted by the dynamic linker. The relocation // has to point to the symbol for a reason analogous to the STB_WEAK case. return true; diff --git a/llvm/lib/MC/MCAsmBackend.cpp b/llvm/lib/MC/MCAsmBackend.cpp index 7989dd57907c..4ed9d8593336 100644 --- a/llvm/lib/MC/MCAsmBackend.cpp +++ b/llvm/lib/MC/MCAsmBackend.cpp @@ -8,11 +8,13 @@ #include "llvm/MC/MCAsmBackend.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLArrayExtras.h" +#include "llvm/MC/MCDXContainerWriter.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSPIRVObjectWriter.h" #include "llvm/MC/MCWasmObjectWriter.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/MC/MCXCOFFObjectWriter.h" @@ -39,12 +41,18 @@ MCAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { case Triple::COFF: return createWinCOFFObjectWriter( cast(std::move(TW)), OS); + case Triple::SPIRV: + return createSPIRVObjectWriter( + cast(std::move(TW)), OS); case Triple::Wasm: return createWasmObjectWriter(cast(std::move(TW)), OS); case Triple::XCOFF: return createXCOFFObjectWriter( cast(std::move(TW)), OS); + case Triple::DXContainer: + return createDXContainerObjectWriter( + cast(std::move(TW)), OS); default: llvm_unreachable("unexpected object format"); } diff --git a/llvm/lib/MC/MCAsmInfo.cpp b/llvm/lib/MC/MCAsmInfo.cpp index f52503d7b160..b8d0021ed432 100644 --- a/llvm/lib/MC/MCAsmInfo.cpp +++ b/llvm/lib/MC/MCAsmInfo.cpp @@ -114,7 +114,10 @@ MCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, } bool MCAsmInfo::isAcceptableChar(char C) const { - return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '@'; + if (C == '@') + return doesAllowAtInName(); + + return isAlnum(C) || C == '_' || C == '$' || C == '.'; } bool MCAsmInfo::isValidUnquotedName(StringRef Name) const { diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 61ec941f50b8..6f8934d66ef4 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" @@ -31,13 +30,13 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" -#include using namespace llvm; @@ -127,7 +126,7 @@ public: /// Return a raw_ostream that comments can be written to. /// Unlike AddComment, you are required to terminate comments with \n if you /// use this method. - raw_ostream &GetCommentOS() override { + raw_ostream &getCommentOS() override { if (!IsVerboseAsm) return nulls(); // Discard comments unless in verbose asm mode. return CommentStream; @@ -139,9 +138,7 @@ public: void emitExplicitComments() override; /// Emit a blank line to a .s file to pretty it up. - void AddBlankLine() override { - EmitEOL(); - } + void addBlankLine() override { EmitEOL(); } /// @name MCStreamer Interface /// @{ @@ -180,15 +177,15 @@ public: bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; void emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override; - void BeginCOFFSymbolDef(const MCSymbol *Symbol) override; - void EmitCOFFSymbolStorageClass(int StorageClass) override; - void EmitCOFFSymbolType(int Type) override; - void EndCOFFSymbolDef() override; - void EmitCOFFSafeSEH(MCSymbol const *Symbol) override; - void EmitCOFFSymbolIndex(MCSymbol const *Symbol) override; - void EmitCOFFSectionIndex(MCSymbol const *Symbol) override; - void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override; - void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override; + void beginCOFFSymbolDef(const MCSymbol *Symbol) override; + void emitCOFFSymbolStorageClass(int StorageClass) override; + void emitCOFFSymbolType(int Type) override; + void endCOFFSymbolDef() override; + void emitCOFFSafeSEH(MCSymbol const *Symbol) override; + void emitCOFFSymbolIndex(MCSymbol const *Symbol) override; + void emitCOFFSectionIndex(MCSymbol const *Symbol) override; + void emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override; + void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override; void emitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, uint64_t Size, MCSymbol *CsectSym, unsigned ByteAlign) override; @@ -198,6 +195,8 @@ public: void emitXCOFFRenameDirective(const MCSymbol *Name, StringRef Rename) override; + void emitXCOFFRefDirective(StringRef Name) override; + void emitELFSize(MCSymbol *Symbol, const MCExpr *Value) override; void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; @@ -276,11 +275,11 @@ public: StringRef FileName) override; MCSymbol *getDwarfLineTableSymbol(unsigned CUID) override; - bool EmitCVFileDirective(unsigned FileNo, StringRef Filename, + bool emitCVFileDirective(unsigned FileNo, StringRef Filename, ArrayRef Checksum, unsigned ChecksumKind) override; - bool EmitCVFuncIdDirective(unsigned FuncId) override; - bool EmitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc, + bool emitCVFuncIdDirective(unsigned FuncId) override; + bool emitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc, unsigned IAFile, unsigned IALine, unsigned IACol, SMLoc Loc) override; void emitCVLocDirective(unsigned FunctionId, unsigned FileNo, unsigned Line, @@ -316,10 +315,11 @@ public: void emitCVStringTableDirective() override; void emitCVFileChecksumsDirective() override; void emitCVFileChecksumOffsetDirective(unsigned FileNo) override; - void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc L) override; + void emitCVFPOData(const MCSymbol *ProcSym, SMLoc L) override; void emitIdent(StringRef IdentString) override; void emitCFIBKeyFrame() override; + void emitCFIMTETaggedFrame() override; void emitCFISections(bool EH, bool Debug) override; void emitCFIDefCfa(int64_t Register, int64_t Offset) override; void emitCFIDefCfaOffset(int64_t Offset) override; @@ -344,25 +344,25 @@ public: void emitCFINegateRAState() override; void emitCFIReturnColumn(int64_t Register) override; - void EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) override; - void EmitWinCFIEndProc(SMLoc Loc) override; - void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) override; - void EmitWinCFIStartChained(SMLoc Loc) override; - void EmitWinCFIEndChained(SMLoc Loc) override; - void EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) override; - void EmitWinCFISetFrame(MCRegister Register, unsigned Offset, + void emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) override; + void emitWinCFIEndProc(SMLoc Loc) override; + void emitWinCFIFuncletOrFuncEnd(SMLoc Loc) override; + void emitWinCFIStartChained(SMLoc Loc) override; + void emitWinCFIEndChained(SMLoc Loc) override; + void emitWinCFIPushReg(MCRegister Register, SMLoc Loc) override; + void emitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc) override; - void EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) override; - void EmitWinCFISaveReg(MCRegister Register, unsigned Offset, + void emitWinCFIAllocStack(unsigned Size, SMLoc Loc) override; + void emitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc) override; - void EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, + void emitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc) override; - void EmitWinCFIPushFrame(bool Code, SMLoc Loc) override; - void EmitWinCFIEndProlog(SMLoc Loc) override; + void emitWinCFIPushFrame(bool Code, SMLoc Loc) override; + void emitWinCFIEndProlog(SMLoc Loc) override; - void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, + void emitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, SMLoc Loc) override; - void EmitWinEHHandlerData(SMLoc Loc) override; + void emitWinEHHandlerData(SMLoc Loc) override; void emitCGProfileEntry(const MCSymbolRefExpr *From, const MCSymbolRefExpr *To, uint64_t Count) override; @@ -502,7 +502,7 @@ void MCAsmStreamer::changeSection(MCSection *Section, if (MCTargetStreamer *TS = getTargetStreamer()) { TS->changeSection(getCurrentSectionOnly(), Section, Subsection, OS); } else { - Section->PrintSwitchToSection(*MAI, getContext().getTargetTriple(), OS, + Section->printSwitchToSection(*MAI, getContext().getTargetTriple(), OS, Subsection); } } @@ -761,6 +761,8 @@ bool MCAsmStreamer::emitSymbolAttribute(MCSymbol *Symbol, case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break; case MCSA_Cold: // Assemblers currently do not support a .cold directive. + case MCSA_Exported: + // Non-AIX assemblers currently do not support exported visibility. return false; } @@ -787,47 +789,47 @@ void MCAsmStreamer::emitSyntaxDirective() { // with may have a value of prefix or noprefix. } -void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) { +void MCAsmStreamer::beginCOFFSymbolDef(const MCSymbol *Symbol) { OS << "\t.def\t"; Symbol->print(OS, MAI); OS << ';'; EmitEOL(); } -void MCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) { +void MCAsmStreamer::emitCOFFSymbolStorageClass(int StorageClass) { OS << "\t.scl\t" << StorageClass << ';'; EmitEOL(); } -void MCAsmStreamer::EmitCOFFSymbolType (int Type) { +void MCAsmStreamer::emitCOFFSymbolType(int Type) { OS << "\t.type\t" << Type << ';'; EmitEOL(); } -void MCAsmStreamer::EndCOFFSymbolDef() { +void MCAsmStreamer::endCOFFSymbolDef() { OS << "\t.endef"; EmitEOL(); } -void MCAsmStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { +void MCAsmStreamer::emitCOFFSafeSEH(MCSymbol const *Symbol) { OS << "\t.safeseh\t"; Symbol->print(OS, MAI); EmitEOL(); } -void MCAsmStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) { +void MCAsmStreamer::emitCOFFSymbolIndex(MCSymbol const *Symbol) { OS << "\t.symidx\t"; Symbol->print(OS, MAI); EmitEOL(); } -void MCAsmStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { +void MCAsmStreamer::emitCOFFSectionIndex(MCSymbol const *Symbol) { OS << "\t.secidx\t"; Symbol->print(OS, MAI); EmitEOL(); } -void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) { +void MCAsmStreamer::emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) { OS << "\t.secrel32\t"; Symbol->print(OS, MAI); if (Offset != 0) @@ -835,7 +837,7 @@ void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) { EmitEOL(); } -void MCAsmStreamer::EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) { +void MCAsmStreamer::emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) { OS << "\t.rva\t"; Symbol->print(OS, MAI); if (Offset > 0) @@ -903,6 +905,9 @@ void MCAsmStreamer::emitXCOFFSymbolLinkageWithVisibility( case MCSA_Protected: OS << ",protected"; break; + case MCSA_Exported: + OS << ",exported"; + break; default: report_fatal_error("unexpected value for Visibility type"); } @@ -931,6 +936,11 @@ void MCAsmStreamer::emitXCOFFRenameDirective(const MCSymbol *Name, EmitEOL(); } +void MCAsmStreamer::emitXCOFFRefDirective(StringRef Name) { + OS << "\t.ref " << Name; + EmitEOL(); +} + void MCAsmStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) { assert(MAI->hasDotTypeDotSizeDirective()); OS << "\t.size\t"; @@ -988,7 +998,7 @@ void MCAsmStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment, SMLoc Loc) { if (Symbol) - AssignFragment(Symbol, &Section->getDummyFragment()); + assignFragment(Symbol, &Section->getDummyFragment()); // Note: a .zerofill directive does not switch sections. OS << ".zerofill "; @@ -1015,7 +1025,7 @@ void MCAsmStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol, // e.g. _a. void MCAsmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { - AssignFragment(Symbol, &Section->getDummyFragment()); + assignFragment(Symbol, &Section->getDummyFragment()); assert(Symbol && "Symbol shouldn't be NULL!"); // Instead of using the Section we'll just use the shortcut. @@ -1643,7 +1653,7 @@ MCSymbol *MCAsmStreamer::getDwarfLineTableSymbol(unsigned CUID) { return MCStreamer::getDwarfLineTableSymbol(0); } -bool MCAsmStreamer::EmitCVFileDirective(unsigned FileNo, StringRef Filename, +bool MCAsmStreamer::emitCVFileDirective(unsigned FileNo, StringRef Filename, ArrayRef Checksum, unsigned ChecksumKind) { if (!getContext().getCVContext().addFile(*this, FileNo, Filename, Checksum, @@ -1666,19 +1676,19 @@ bool MCAsmStreamer::EmitCVFileDirective(unsigned FileNo, StringRef Filename, return true; } -bool MCAsmStreamer::EmitCVFuncIdDirective(unsigned FuncId) { +bool MCAsmStreamer::emitCVFuncIdDirective(unsigned FuncId) { OS << "\t.cv_func_id " << FuncId << '\n'; - return MCStreamer::EmitCVFuncIdDirective(FuncId); + return MCStreamer::emitCVFuncIdDirective(FuncId); } -bool MCAsmStreamer::EmitCVInlineSiteIdDirective(unsigned FunctionId, +bool MCAsmStreamer::emitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc, unsigned IAFile, unsigned IALine, unsigned IACol, SMLoc Loc) { OS << "\t.cv_inline_site_id " << FunctionId << " within " << IAFunc << " inlined_at " << IAFile << ' ' << IALine << ' ' << IACol << '\n'; - return MCStreamer::EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile, + return MCStreamer::emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile, IALine, IACol, Loc); } @@ -1795,7 +1805,7 @@ void MCAsmStreamer::emitCVFileChecksumOffsetDirective(unsigned FileNo) { EmitEOL(); } -void MCAsmStreamer::EmitCVFPOData(const MCSymbol *ProcSym, SMLoc L) { +void MCAsmStreamer::emitCVFPOData(const MCSymbol *ProcSym, SMLoc L) { OS << "\t.cv_fpo_data\t"; ProcSym->print(OS, MAI); EmitEOL(); @@ -2016,59 +2026,69 @@ void MCAsmStreamer::emitCFIBKeyFrame() { EmitEOL(); } -void MCAsmStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) { - MCStreamer::EmitWinCFIStartProc(Symbol, Loc); +void MCAsmStreamer::emitCFIMTETaggedFrame() { + MCStreamer::emitCFIMTETaggedFrame(); + OS << "\t.cfi_mte_tagged_frame"; + EmitEOL(); +} + +void MCAsmStreamer::emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) { + MCStreamer::emitWinCFIStartProc(Symbol, Loc); OS << ".seh_proc "; Symbol->print(OS, MAI); EmitEOL(); } -void MCAsmStreamer::EmitWinCFIEndProc(SMLoc Loc) { - MCStreamer::EmitWinCFIEndProc(Loc); +void MCAsmStreamer::emitWinCFIEndProc(SMLoc Loc) { + MCStreamer::emitWinCFIEndProc(Loc); OS << "\t.seh_endproc"; EmitEOL(); } -void MCAsmStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) { - MCStreamer::EmitWinCFIFuncletOrFuncEnd(Loc); +void MCAsmStreamer::emitWinCFIFuncletOrFuncEnd(SMLoc Loc) { + MCStreamer::emitWinCFIFuncletOrFuncEnd(Loc); OS << "\t.seh_endfunclet"; EmitEOL(); } -void MCAsmStreamer::EmitWinCFIStartChained(SMLoc Loc) { - MCStreamer::EmitWinCFIStartChained(Loc); +void MCAsmStreamer::emitWinCFIStartChained(SMLoc Loc) { + MCStreamer::emitWinCFIStartChained(Loc); OS << "\t.seh_startchained"; EmitEOL(); } -void MCAsmStreamer::EmitWinCFIEndChained(SMLoc Loc) { - MCStreamer::EmitWinCFIEndChained(Loc); +void MCAsmStreamer::emitWinCFIEndChained(SMLoc Loc) { + MCStreamer::emitWinCFIEndChained(Loc); OS << "\t.seh_endchained"; EmitEOL(); } -void MCAsmStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, +void MCAsmStreamer::emitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, SMLoc Loc) { - MCStreamer::EmitWinEHHandler(Sym, Unwind, Except, Loc); + MCStreamer::emitWinEHHandler(Sym, Unwind, Except, Loc); OS << "\t.seh_handler "; Sym->print(OS, MAI); + char Marker = '@'; + const Triple &T = getContext().getTargetTriple(); + if (T.getArch() == Triple::arm || T.getArch() == Triple::thumb) + Marker = '%'; if (Unwind) - OS << ", @unwind"; + OS << ", " << Marker << "unwind"; if (Except) - OS << ", @except"; + OS << ", " << Marker << "except"; EmitEOL(); } -void MCAsmStreamer::EmitWinEHHandlerData(SMLoc Loc) { - MCStreamer::EmitWinEHHandlerData(Loc); +void MCAsmStreamer::emitWinEHHandlerData(SMLoc Loc) { + MCStreamer::emitWinEHHandlerData(Loc); - // Switch sections. Don't call SwitchSection directly, because that will + // Switch sections. Don't call switchSection directly, because that will // cause the section switch to be visible in the emitted assembly. // We only do this so the section switch that terminates the handler // data block is visible. @@ -2081,23 +2101,23 @@ void MCAsmStreamer::EmitWinEHHandlerData(SMLoc Loc) { MCSection *TextSec = &CurFrame->Function->getSection(); MCSection *XData = getAssociatedXDataSection(TextSec); - SwitchSectionNoChange(XData); + switchSectionNoChange(XData); OS << "\t.seh_handlerdata"; EmitEOL(); } -void MCAsmStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) { - MCStreamer::EmitWinCFIPushReg(Register, Loc); +void MCAsmStreamer::emitWinCFIPushReg(MCRegister Register, SMLoc Loc) { + MCStreamer::emitWinCFIPushReg(Register, Loc); OS << "\t.seh_pushreg "; InstPrinter->printRegName(OS, Register); EmitEOL(); } -void MCAsmStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset, +void MCAsmStreamer::emitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc) { - MCStreamer::EmitWinCFISetFrame(Register, Offset, Loc); + MCStreamer::emitWinCFISetFrame(Register, Offset, Loc); OS << "\t.seh_setframe "; InstPrinter->printRegName(OS, Register); @@ -2105,16 +2125,16 @@ void MCAsmStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset, EmitEOL(); } -void MCAsmStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) { - MCStreamer::EmitWinCFIAllocStack(Size, Loc); +void MCAsmStreamer::emitWinCFIAllocStack(unsigned Size, SMLoc Loc) { + MCStreamer::emitWinCFIAllocStack(Size, Loc); OS << "\t.seh_stackalloc " << Size; EmitEOL(); } -void MCAsmStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset, +void MCAsmStreamer::emitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc) { - MCStreamer::EmitWinCFISaveReg(Register, Offset, Loc); + MCStreamer::emitWinCFISaveReg(Register, Offset, Loc); OS << "\t.seh_savereg "; InstPrinter->printRegName(OS, Register); @@ -2122,9 +2142,9 @@ void MCAsmStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset, EmitEOL(); } -void MCAsmStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, +void MCAsmStreamer::emitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc) { - MCStreamer::EmitWinCFISaveXMM(Register, Offset, Loc); + MCStreamer::emitWinCFISaveXMM(Register, Offset, Loc); OS << "\t.seh_savexmm "; InstPrinter->printRegName(OS, Register); @@ -2132,8 +2152,8 @@ void MCAsmStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, EmitEOL(); } -void MCAsmStreamer::EmitWinCFIPushFrame(bool Code, SMLoc Loc) { - MCStreamer::EmitWinCFIPushFrame(Code, Loc); +void MCAsmStreamer::emitWinCFIPushFrame(bool Code, SMLoc Loc) { + MCStreamer::emitWinCFIPushFrame(Code, Loc); OS << "\t.seh_pushframe"; if (Code) @@ -2141,8 +2161,8 @@ void MCAsmStreamer::EmitWinCFIPushFrame(bool Code, SMLoc Loc) { EmitEOL(); } -void MCAsmStreamer::EmitWinCFIEndProlog(SMLoc Loc) { - MCStreamer::EmitWinCFIEndProlog(Loc); +void MCAsmStreamer::emitWinCFIEndProlog(SMLoc Loc) { + MCStreamer::emitWinCFIEndProlog(Loc); OS << "\t.seh_endprologue"; EmitEOL(); @@ -2161,7 +2181,7 @@ void MCAsmStreamer::emitCGProfileEntry(const MCSymbolRefExpr *From, void MCAsmStreamer::AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &STI) { - raw_ostream &OS = GetCommentOS(); + raw_ostream &OS = getCommentOS(); SmallString<256> Code; SmallVector Fixups; raw_svector_ostream VecOS(Code); @@ -2245,8 +2265,10 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst, MCFixup &F = Fixups[i]; const MCFixupKindInfo &Info = getAssembler().getBackend().getFixupKindInfo(F.getKind()); - OS << " fixup " << char('A' + i) << " - " << "offset: " << F.getOffset() - << ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n"; + OS << " fixup " << char('A' + i) << " - " + << "offset: " << F.getOffset() << ", value: "; + F.getValue()->print(OS, MAI); + OS << ", kind: " << Info.Name << "\n"; } } @@ -2265,8 +2287,8 @@ void MCAsmStreamer::emitInstruction(const MCInst &Inst, // Show the MCInst if enabled. if (ShowInst) { - Inst.dump_pretty(GetCommentOS(), InstPrinter.get(), "\n "); - GetCommentOS() << "\n"; + Inst.dump_pretty(getCommentOS(), InstPrinter.get(), "\n "); + getCommentOS() << "\n"; } if(getTargetStreamer()) @@ -2276,7 +2298,7 @@ void MCAsmStreamer::emitInstruction(const MCInst &Inst, StringRef Comments = CommentToEmit; if (Comments.size() && Comments.back() != '\n') - GetCommentOS() << "\n"; + getCommentOS() << "\n"; EmitEOL(); } @@ -2365,7 +2387,7 @@ void MCAsmStreamer::finishImpl() { if (!Tables.empty()) { assert(Tables.size() == 1 && "asm output only supports one line table"); if (auto *Label = Tables.begin()->second.getLabel()) { - SwitchSection(getContext().getObjectFileInfo()->getDwarfLineSection()); + switchSection(getContext().getObjectFileInfo()->getDwarfLineSection()); emitLabel(Label); } } @@ -2492,7 +2514,7 @@ void MCAsmStreamer::doFinalizationAtSectionEnd(MCSection *Section) { if (MAI->usesDwarfFileAndLocDirectives()) return; - SwitchSectionNoChange(Section); + switchSectionNoChange(Section); MCSymbol *Sym = getCurrentSectionOnly()->getEndSymbol(getContext()); diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index a8837bbf57c7..a33d7ea9ebfe 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -27,7 +27,6 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" -#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Alignment.h" @@ -36,16 +35,18 @@ #include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include #include -#include #include #include using namespace llvm; +namespace llvm { +class MCSubtargetInfo; +} + #define DEBUG_TYPE "assembler" namespace { @@ -330,11 +331,11 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, case MCFragment::FT_Align: { const MCAlignFragment &AF = cast(F); unsigned Offset = Layout.getFragmentOffset(&AF); - unsigned Size = offsetToAlignment(Offset, Align(AF.getAlignment())); + unsigned Size = offsetToAlignment(Offset, AF.getAlignment()); // Insert extra Nops for code alignment if the target define // shouldInsertExtraNopBytesForCodeAlign target hook. - if (AF.getParent()->UseCodeAlign() && AF.hasEmitNops() && + if (AF.getParent()->useCodeAlign() && AF.hasEmitNops() && getBackend().shouldInsertExtraNopBytesForCodeAlign(AF, Size)) return Size; @@ -342,7 +343,7 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, // minimum nop size. if (Size > 0 && AF.hasEmitNops()) { while (Size % getBackend().getMinimumNopSize()) - Size += AF.getAlignment(); + Size += AF.getAlignment().value(); } if (Size > AF.getMaxBytesToEmit()) return 0; @@ -873,7 +874,7 @@ void MCAssembler::layout(MCAsmLayout &Layout) { MCAlignFragment &AF = cast(Frag); // Insert fixup type for code alignment if the target define // shouldInsertFixupForCodeAlign target hook. - if (Sec.UseCodeAlign() && AF.hasEmitNops()) + if (Sec.useCodeAlign() && AF.hasEmitNops()) getBackend().shouldInsertFixupForCodeAlign(*this, Layout, AF); continue; } diff --git a/llvm/lib/MC/MCCodeView.cpp b/llvm/lib/MC/MCCodeView.cpp index 3da1a9c3e331..375d54696cb2 100644 --- a/llvm/lib/MC/MCCodeView.cpp +++ b/llvm/lib/MC/MCCodeView.cpp @@ -17,6 +17,7 @@ #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCValue.h" @@ -25,7 +26,7 @@ using namespace llvm; using namespace llvm::codeview; -CodeViewContext::CodeViewContext() {} +CodeViewContext::CodeViewContext() = default; CodeViewContext::~CodeViewContext() { // If someone inserted strings into the string table but never actually @@ -334,8 +335,8 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS, OS.emitInt32(uint32_t(DebugSubsectionKind::Lines)); OS.emitAbsoluteSymbolDiff(LineEnd, LineBegin, 4); OS.emitLabel(LineBegin); - OS.EmitCOFFSecRel32(FuncBegin, /*Offset=*/0); - OS.EmitCOFFSectionIndex(FuncBegin); + OS.emitCOFFSecRel32(FuncBegin, /*Offset=*/0); + OS.emitCOFFSectionIndex(FuncBegin); // Actual line info. std::vector Locs = getFunctionLineEntries(FuncId); diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index eafcee1e0607..4be84ca7feb5 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCContext.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -15,21 +16,25 @@ #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFragment.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCLabel.h" -#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCSectionDXContainer.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionGOFF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionSPIRV.h" #include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolCOFF.h" #include "llvm/MC/MCSymbolELF.h" @@ -37,13 +42,14 @@ #include "llvm/MC/MCSymbolMachO.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/MCSymbolXCOFF.h" +#include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" -#include "llvm/Support/Signals.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include @@ -103,6 +109,12 @@ MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai, case Triple::GOFF: Env = IsGOFF; break; + case Triple::DXContainer: + Env = IsDXContainer; + break; + case Triple::SPIRV: + Env = IsSPIRV; + break; case Triple::UnknownObjectFormat: report_fatal_error("Cannot initialize MC for unknown object file format."); break; @@ -134,11 +146,14 @@ void MCContext::reset() { // Call the destructors so the fragments are freed COFFAllocator.DestroyAll(); + DXCAllocator.DestroyAll(); ELFAllocator.DestroyAll(); GOFFAllocator.DestroyAll(); MachOAllocator.DestroyAll(); + WasmAllocator.DestroyAll(); XCOFFAllocator.DestroyAll(); MCInstAllocator.DestroyAll(); + SPIRVAllocator.DestroyAll(); MCSubtargetAllocator.DestroyAll(); InlineAsmUsedLabelNames.clear(); @@ -163,6 +178,7 @@ void MCContext::reset() { COFFUniquingMap.clear(); WasmUniquingMap.clear(); XCOFFUniquingMap.clear(); + DXCUniquingMap.clear(); ELFEntrySizeMap.clear(); ELFSeenGenericMergeableSections.clear(); @@ -243,6 +259,11 @@ MCSymbol *MCContext::createSymbolImpl(const StringMapEntry *Name, return new (Name, *this) MCSymbolWasm(Name, IsTemporary); case MCContext::IsXCOFF: return createXCOFFSymbolImpl(Name, IsTemporary); + case MCContext::IsDXContainer: + break; + case MCContext::IsSPIRV: + return new (Name, *this) + MCSymbol(MCSymbol::SymbolKindUnset, Name, IsTemporary); } return new (Name, *this) MCSymbol(MCSymbol::SymbolKindUnset, Name, IsTemporary); @@ -616,11 +637,14 @@ Optional MCContext::getELFUniqueIDForEntsize(StringRef SectionName, return (I != ELFEntrySizeMap.end()) ? Optional(I->second) : None; } -MCSectionGOFF *MCContext::getGOFFSection(StringRef Section, SectionKind Kind) { +MCSectionGOFF *MCContext::getGOFFSection(StringRef Section, SectionKind Kind, + MCSection *Parent, + const MCExpr *SubsectionId) { // Do the lookup. If we don't have a hit, return a new section. auto &GOFFSection = GOFFUniquingMap[Section.str()]; if (!GOFFSection) - GOFFSection = new (GOFFAllocator.Allocate()) MCSectionGOFF(Section, Kind); + GOFFSection = new (GOFFAllocator.Allocate()) + MCSectionGOFF(Section, Kind, Parent, SubsectionId); return GOFFSection; } @@ -732,13 +756,19 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind, return Result; } +bool MCContext::hasXCOFFSection(StringRef Section, + XCOFF::CsectProperties CsectProp) const { + return XCOFFUniquingMap.count( + XCOFFSectionKey(Section.str(), CsectProp.MappingClass)) != 0; +} + MCSectionXCOFF *MCContext::getXCOFFSection( StringRef Section, SectionKind Kind, Optional CsectProp, bool MultiSymbolsAllowed, const char *BeginSymName, Optional DwarfSectionSubtypeFlags) { - bool IsDwarfSec = DwarfSectionSubtypeFlags.hasValue(); - assert((IsDwarfSec != CsectProp.hasValue()) && "Invalid XCOFF section!"); + bool IsDwarfSec = DwarfSectionSubtypeFlags.has_value(); + assert((IsDwarfSec != CsectProp.has_value()) && "Invalid XCOFF section!"); // Do the lookup. If we have a hit, return it. auto IterBool = XCOFFUniquingMap.insert(std::make_pair( @@ -796,6 +826,44 @@ MCSectionXCOFF *MCContext::getXCOFFSection( return Result; } +MCSectionSPIRV *MCContext::getSPIRVSection() { + MCSymbol *Begin = nullptr; + MCSectionSPIRV *Result = new (SPIRVAllocator.Allocate()) + MCSectionSPIRV(SectionKind::getText(), Begin); + + auto *F = new MCDataFragment(); + Result->getFragmentList().insert(Result->begin(), F); + F->setParent(Result); + + if (Begin) + Begin->setFragment(F); + + return Result; +} + +MCSectionDXContainer *MCContext::getDXContainerSection(StringRef Section, + SectionKind K) { + // Do the lookup, if we have a hit, return it. + auto ItInsertedPair = DXCUniquingMap.try_emplace(Section); + if (!ItInsertedPair.second) + return ItInsertedPair.first->second; + + auto MapIt = ItInsertedPair.first; + // Grab the name from the StringMap. Since the Section is going to keep a + // copy of this StringRef we need to make sure the underlying string stays + // alive as long as we need it. + StringRef Name = MapIt->first(); + MapIt->second = + new (DXCAllocator.Allocate()) MCSectionDXContainer(Name, K, nullptr); + + // The first fragment will store the header + auto *F = new MCDataFragment(); + MapIt->second->getFragmentList().insert(MapIt->second->begin(), F); + F->setParent(MapIt->second); + + return MapIt->second; +} + MCSubtargetInfo &MCContext::getSubtargetCopy(const MCSubtargetInfo &STI) { return *new (MCSubtargetAllocator.Allocate()) MCSubtargetInfo(STI); } @@ -835,6 +903,12 @@ void MCContext::RemapDebugPaths() { // Dwarf Management //===----------------------------------------------------------------------===// +EmitDwarfUnwindType MCContext::emitDwarfUnwindInfo() const { + if (!TargetOptions) + return EmitDwarfUnwindType::Default; + return TargetOptions->EmitDwarfUnwind; +} + void MCContext::setGenDwarfRootFile(StringRef InputFileName, StringRef Buffer) { // MCDwarf needs the root file as well as the compilation directory. // If we find a '.file 0' directive that will supersede these values. @@ -906,9 +980,9 @@ void MCContext::finalizeDwarfSections(MCStreamer &MCOS) { } CodeViewContext &MCContext::getCVContext() { - if (!CVContext.get()) + if (!CVContext) CVContext.reset(new CodeViewContext); - return *CVContext.get(); + return *CVContext; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/MC/MCDXContainerStreamer.cpp b/llvm/lib/MC/MCDXContainerStreamer.cpp new file mode 100644 index 000000000000..3cb452f3dfa5 --- /dev/null +++ b/llvm/lib/MC/MCDXContainerStreamer.cpp @@ -0,0 +1,31 @@ +//===- lib/MC/MCDXContainerStreamer.cpp - DXContainer Impl ----*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the object streamer for DXContainer files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCDXContainerStreamer.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/TargetRegistry.h" + +using namespace llvm; + +void MCDXContainerStreamer::emitInstToData(const MCInst &, + const MCSubtargetInfo &) {} + +MCStreamer *llvm::createDXContainerStreamer( + MCContext &Context, std::unique_ptr &&MAB, + std::unique_ptr &&OW, std::unique_ptr &&CE, + bool RelaxAll) { + auto *S = new MCDXContainerStreamer(Context, std::move(MAB), std::move(OW), + std::move(CE)); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + return S; +} diff --git a/llvm/lib/MC/MCDXContainerWriter.cpp b/llvm/lib/MC/MCDXContainerWriter.cpp new file mode 100644 index 000000000000..f5dad702d6f6 --- /dev/null +++ b/llvm/lib/MC/MCDXContainerWriter.cpp @@ -0,0 +1,143 @@ +//===- llvm/MC/MCDXContainerWriter.cpp - DXContainer Writer -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCDXContainerWriter.h" +#include "llvm/BinaryFormat/DXContainer.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/EndianStream.h" + +using namespace llvm; + +MCDXContainerTargetWriter::~MCDXContainerTargetWriter() {} + +namespace { +class DXContainerObjectWriter : public MCObjectWriter { + ::support::endian::Writer W; + + /// The target specific DXContainer writer instance. + std::unique_ptr TargetObjectWriter; + +public: + DXContainerObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS) + : W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {} + + ~DXContainerObjectWriter() override {} + +private: + void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) override {} + + void executePostLayoutBinding(MCAssembler &Asm, + const MCAsmLayout &Layout) override {} + + uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; +}; +} // namespace + +uint64_t DXContainerObjectWriter::writeObject(MCAssembler &Asm, + const MCAsmLayout &Layout) { + // Start the file size as the header plus the size of the part offsets. + // Presently DXContainer files usually contain 7-10 parts. Reserving space for + // 16 part offsets gives us a little room for growth. + llvm::SmallVector PartOffsets; + uint64_t PartOffset = 0; + for (const MCSection &Sec : Asm) { + uint64_t SectionSize = Layout.getSectionAddressSize(&Sec); + // Skip empty sections. + if (SectionSize == 0) + continue; + + assert(SectionSize < std::numeric_limits::max() && + "Section size too large for DXContainer"); + + PartOffsets.push_back(PartOffset); + PartOffset += sizeof(dxbc::PartHeader) + SectionSize; + PartOffset = alignTo(PartOffset, Align(4ul)); + } + assert(PartOffset < std::numeric_limits::max() && + "Part data too large for DXContainer"); + + uint64_t PartStart = + sizeof(dxbc::Header) + (PartOffsets.size() * sizeof(uint32_t)); + uint64_t FileSize = PartStart + PartOffset; + assert(FileSize < std::numeric_limits::max() && + "File size too large for DXContainer"); + + // Write the header. + W.write({'D', 'X', 'B', 'C'}); + // Write 16-bytes of 0's for the hash. + W.OS.write_zeros(16); + // Write 1.0 for file format version. + W.write(1u); + W.write(0u); + // Write the file size. + W.write(static_cast(FileSize)); + // Write the number of parts. + W.write(static_cast(PartOffsets.size())); + // Write the offsets for the part headers for each part. + for (uint64_t Offset : PartOffsets) + W.write(static_cast(PartStart + Offset)); + + for (const MCSection &Sec : Asm) { + uint64_t SectionSize = Layout.getSectionAddressSize(&Sec); + // Skip empty sections. + if (SectionSize == 0) + continue; + + unsigned Start = W.OS.tell(); + // Write section header. + W.write(ArrayRef(Sec.getName().data(), 4)); + + uint64_t PartSize = SectionSize + sizeof(dxbc::PartHeader); + + if (Sec.getName() == "DXIL") + PartSize += sizeof(dxbc::ProgramHeader); + // DXContainer parts should be 4-byte aligned. + PartSize = alignTo(PartSize, Align(4)); + W.write(static_cast(PartSize)); + if (Sec.getName() == "DXIL") { + dxbc::ProgramHeader Header; + memset(reinterpret_cast(&Header), 0, sizeof(dxbc::ProgramHeader)); + + const Triple &TT = Asm.getContext().getTargetTriple(); + VersionTuple Version = TT.getOSVersion(); + Header.MajorVersion = static_cast(Version.getMajor()); + if (Version.getMinor()) + Header.MinorVersion = static_cast(*Version.getMinor()); + if (TT.hasEnvironment()) + Header.ShaderKind = + static_cast(TT.getEnvironment() - Triple::Pixel); + + // The program header's size field is in 32-bit words. + Header.Size = (SectionSize + sizeof(dxbc::ProgramHeader) + 3) / 4; + memcpy(Header.Bitcode.Magic, "DXIL", 4); + Header.Bitcode.Offset = sizeof(dxbc::BitcodeHeader); + Header.Bitcode.Size = SectionSize; + if (sys::IsBigEndianHost) + Header.swapBytes(); + W.write(ArrayRef(reinterpret_cast(&Header), + sizeof(dxbc::ProgramHeader))); + } + Asm.writeSectionData(W.OS, &Sec, Layout); + unsigned Size = W.OS.tell() - Start; + W.OS.write_zeros(offsetToAlignment(Size, Align(4))); + } + return 0; +} + +std::unique_ptr llvm::createDXContainerObjectWriter( + std::unique_ptr MOTW, raw_pwrite_stream &OS) { + return std::make_unique(std::move(MOTW), OS); +} diff --git a/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/llvm/lib/MC/MCDisassembler/Disassembler.cpp index aaa3b747682c..f0c61840e413 100644 --- a/llvm/lib/MC/MCDisassembler/Disassembler.cpp +++ b/llvm/lib/MC/MCDisassembler/Disassembler.cpp @@ -30,7 +30,6 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" #include -#include #include using namespace llvm; diff --git a/llvm/lib/MC/MCDisassembler/Disassembler.h b/llvm/lib/MC/MCDisassembler/Disassembler.h index e5aab53a7613..3cb2479d388f 100644 --- a/llvm/lib/MC/MCDisassembler/Disassembler.h +++ b/llvm/lib/MC/MCDisassembler/Disassembler.h @@ -16,7 +16,7 @@ #ifndef LLVM_LIB_MC_MCDISASSEMBLER_DISASSEMBLER_H #define LLVM_LIB_MC_MCDISASSEMBLER_DISASSEMBLER_H -#include "llvm-c/Disassembler.h" +#include "llvm-c/DisassemblerTypes.h" #include "llvm/ADT/SmallString.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp index a58e8f6d9bcc..0c041186936d 100644 --- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp +++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp @@ -8,9 +8,6 @@ #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/raw_ostream.h" -#include using namespace llvm; @@ -25,11 +22,12 @@ MCDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, - uint64_t Offset, + uint64_t Offset, uint64_t OpSize, uint64_t InstSize) const { if (Symbolizer) - return Symbolizer->tryAddingSymbolicOperand( - Inst, *CommentStream, Value, Address, IsBranch, Offset, InstSize); + return Symbolizer->tryAddingSymbolicOperand(Inst, *CommentStream, Value, + Address, IsBranch, Offset, + OpSize, InstSize); return false; } @@ -85,10 +83,11 @@ bool XCOFFSymbolInfo::operator<(const XCOFFSymbolInfo &SymInfo) const { return SymInfo.IsLabel; // Symbols with a StorageMappingClass have higher priority than those without. - if (StorageMappingClass.hasValue() != SymInfo.StorageMappingClass.hasValue()) - return SymInfo.StorageMappingClass.hasValue(); + if (StorageMappingClass.has_value() != + SymInfo.StorageMappingClass.has_value()) + return SymInfo.StorageMappingClass.has_value(); - if (StorageMappingClass.hasValue()) { + if (StorageMappingClass) { return getSMCPriority(StorageMappingClass.getValue()) < getSMCPriority(SymInfo.StorageMappingClass.getValue()); } diff --git a/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp index 7befef86303c..e3f4cdd21557 100644 --- a/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp +++ b/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp @@ -31,19 +31,15 @@ class Triple; // is found an MCExpr is created with that, else an MCExpr with Value is // created. This function returns true if it adds an operand to the MCInst and // false otherwise. -bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, - raw_ostream &cStream, - int64_t Value, - uint64_t Address, - bool IsBranch, - uint64_t Offset, - uint64_t InstSize) { +bool MCExternalSymbolizer::tryAddingSymbolicOperand( + MCInst &MI, raw_ostream &cStream, int64_t Value, uint64_t Address, + bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) { struct LLVMOpInfo1 SymbolicOp; std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); SymbolicOp.Value = Value; if (!GetOpInfo || - !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) { + !GetOpInfo(DisInfo, Address, Offset, OpSize, InstSize, 1, &SymbolicOp)) { // Clear SymbolicOp.Value from above and also all other fields. std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); @@ -53,10 +49,10 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, // that always makes sense to guess. But in the case of an immediate it is // a bit more questionable if it is an address of a symbol or some other // reference. So if the immediate Value comes from a width of 1 byte, - // InstSize, we will not guess it is an address of a symbol. Because in + // OpSize, we will not guess it is an address of a symbol. Because in // object files assembled starting at address 0 this usually leads to // incorrect symbolication. - if (!SymbolLookUp || (InstSize == 1 && !IsBranch)) + if (!SymbolLookUp || (OpSize == 1 && !IsBranch)) return false; uint64_t ReferenceType; diff --git a/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp b/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp index 735be23206e4..137c44680080 100644 --- a/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp +++ b/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" -#include "llvm-c/Disassembler.h" +#include "llvm-c/DisassemblerTypes.h" #include "llvm/MC/TargetRegistry.h" using namespace llvm; diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index 2cb5a000f88a..4cbb9981fde2 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -269,7 +269,7 @@ void MCDwarfLineTable::emit(MCStreamer *MCOS, MCDwarfLineTableParams Params) { LineStr = MCDwarfLineStr(context); // Switch to the section where the table will be emitted into. - MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection()); + MCOS->switchSection(context.getObjectFileInfo()->getDwarfLineSection()); // Handle the rest of the Compile Units. for (const auto &CUIDTablePair : LineTables) { @@ -285,7 +285,7 @@ void MCDwarfDwoLineTable::Emit(MCStreamer &MCOS, MCDwarfLineTableParams Params, if (!HasSplitLineTable) return; Optional NoLineStr(None); - MCOS.SwitchSection(Section); + MCOS.switchSection(Section); MCOS.emitLabel(Header.Emit(&MCOS, Params, None, NoLineStr).second); } @@ -332,14 +332,20 @@ static void emitAbsValue(MCStreamer &OS, const MCExpr *Value, unsigned Size) { void MCDwarfLineStr::emitSection(MCStreamer *MCOS) { // Switch to the .debug_line_str section. - MCOS->SwitchSection( + MCOS->switchSection( MCOS->getContext().getObjectFileInfo()->getDwarfLineStrSection()); + SmallString<0> Data = getFinalizedData(); + MCOS->emitBinaryData(Data.str()); +} + +SmallString<0> MCDwarfLineStr::getFinalizedData() { // Emit the strings without perturbing the offsets we used. - LineStrings.finalizeInOrder(); + if (!LineStrings.isFinalized()) + LineStrings.finalizeInOrder(); SmallString<0> Data; Data.resize(LineStrings.getSize()); LineStrings.write((uint8_t *)Data.data()); - MCOS->emitBinaryData(Data.str()); + return Data; } void MCDwarfLineStr::emitRef(MCStreamer *MCOS, StringRef Path) { @@ -387,16 +393,14 @@ static void emitOneV5FileEntry(MCStreamer *MCOS, const MCDwarfFile &DwarfFile, if (EmitMD5) { const MD5::MD5Result &Cksum = *DwarfFile.Checksum; MCOS->emitBinaryData( - StringRef(reinterpret_cast(Cksum.Bytes.data()), - Cksum.Bytes.size())); + StringRef(reinterpret_cast(Cksum.data()), Cksum.size())); } if (HasSource) { if (LineStr) - LineStr->emitRef(MCOS, DwarfFile.Source.getValueOr(StringRef())); + LineStr->emitRef(MCOS, DwarfFile.Source.value_or(StringRef())); else { - MCOS->emitBytes( - DwarfFile.Source.getValueOr(StringRef())); // Source and... - MCOS->emitBytes(StringRef("\0", 1)); // its null terminator. + MCOS->emitBytes(DwarfFile.Source.value_or(StringRef())); // Source and... + MCOS->emitBytes(StringRef("\0", 1)); // its null terminator. } } } @@ -583,7 +587,7 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory, // Keep track of whether any or all files have an MD5 checksum. // If any files have embedded source, they all must. if (MCDwarfFiles.empty()) { - trackMD5Usage(Checksum.hasValue()); + trackMD5Usage(Checksum.has_value()); HasSource = (Source != None); } if (DwarfVersion >= 5 && isRootFile(RootFile, Directory, FileName, Checksum)) @@ -646,7 +650,7 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory, File.Name = std::string(FileName); File.DirIndex = DirIndex; File.Checksum = Checksum; - trackMD5Usage(Checksum.hasValue()); + trackMD5Usage(Checksum.has_value()); File.Source = Source; if (Source) HasSource = true; @@ -764,7 +768,7 @@ static void EmitAbbrev(MCStreamer *MCOS, uint64_t Name, uint64_t Form) { // the data for .debug_abbrev section which contains three DIEs. static void EmitGenDwarfAbbrev(MCStreamer *MCOS) { MCContext &context = MCOS->getContext(); - MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection()); + MCOS->switchSection(context.getObjectFileInfo()->getDwarfAbbrevSection()); // DW_TAG_compile_unit DIE abbrev (1). MCOS->emitULEB128IntValue(1); @@ -817,7 +821,7 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS, auto &Sections = context.getGenDwarfSectionSyms(); - MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection()); + MCOS->switchSection(context.getObjectFileInfo()->getDwarfARangesSection()); unsigned UnitLengthBytes = dwarf::getUnitLengthFieldByteSize(context.getDwarfFormat()); @@ -896,7 +900,7 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, const MCSymbol *RangesSymbol) { MCContext &context = MCOS->getContext(); - MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); + MCOS->switchSection(context.getObjectFileInfo()->getDwarfInfoSection()); // Create a symbol at the start and end of this section used in here for the // expression to calculate the length in the header. @@ -1073,7 +1077,7 @@ static MCSymbol *emitGenDwarfRanges(MCStreamer *MCOS) { MCSymbol *RangesSymbol; if (MCOS->getContext().getDwarfVersion() >= 5) { - MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRnglistsSection()); + MCOS->switchSection(context.getObjectFileInfo()->getDwarfRnglistsSection()); MCSymbol *EndSymbol = mcdwarf::emitListsTableHeaderStart(*MCOS); MCOS->AddComment("Offset entry count"); MCOS->emitInt32(0); @@ -1093,7 +1097,7 @@ static MCSymbol *emitGenDwarfRanges(MCStreamer *MCOS) { MCOS->emitInt8(dwarf::DW_RLE_end_of_list); MCOS->emitLabel(EndSymbol); } else { - MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection()); + MCOS->switchSection(context.getObjectFileInfo()->getDwarfRangesSection()); RangesSymbol = context.createTempSymbol("debug_ranges_start"); MCOS->emitLabel(RangesSymbol); for (MCSection *Sec : Sections) { @@ -1154,18 +1158,18 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) { MCOS->getContext().getDwarfVersion() >= 3; CreateDwarfSectionSymbols |= UseRangesSection; - MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); + MCOS->switchSection(context.getObjectFileInfo()->getDwarfInfoSection()); if (CreateDwarfSectionSymbols) { InfoSectionSymbol = context.createTempSymbol(); MCOS->emitLabel(InfoSectionSymbol); } - MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection()); + MCOS->switchSection(context.getObjectFileInfo()->getDwarfAbbrevSection()); if (CreateDwarfSectionSymbols) { AbbrevSectionSymbol = context.createTempSymbol(); MCOS->emitLabel(AbbrevSectionSymbol); } - MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection()); + MCOS->switchSection(context.getObjectFileInfo()->getDwarfARangesSection()); // Output the data for .debug_aranges section. EmitGenDwarfAranges(MCOS, InfoSectionSymbol); @@ -1599,6 +1603,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCDwarfFrameInfo &Frame) { Augmentation += "S"; if (Frame.IsBKeyFrame) Augmentation += "B"; + if (Frame.IsMTETaggedFrame) + Augmentation += "G"; Streamer.emitBytes(Augmentation); } Streamer.emitInt8(0); @@ -1835,8 +1841,6 @@ template <> struct DenseMapInfo { void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, bool IsEH) { - Streamer.generateCompactUnwindEncodings(MAB); - MCContext &Context = Streamer.getContext(); const MCObjectFileInfo *MOFI = Context.getObjectFileInfo(); const MCAsmInfo *AsmInfo = Context.getAsmInfo(); @@ -1846,11 +1850,12 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, // Emit the compact unwind info if available. bool NeedsEHFrameSection = !MOFI->getSupportsCompactUnwindWithoutEHFrame(); if (IsEH && MOFI->getCompactUnwindSection()) { + Streamer.generateCompactUnwindEncodings(MAB); bool SectionEmitted = false; for (const MCDwarfFrameInfo &Frame : FrameArray) { if (Frame.CompactUnwindEncoding == 0) continue; if (!SectionEmitted) { - Streamer.SwitchSection(MOFI->getCompactUnwindSection()); + Streamer.switchSection(MOFI->getCompactUnwindSection()); Streamer.emitValueToAlignment(AsmInfo->getCodePointerSize()); SectionEmitted = true; } @@ -1867,7 +1872,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, IsEH ? *const_cast(MOFI)->getEHFrameSection() : *MOFI->getDwarfFrameSection(); - Streamer.SwitchSection(&Section); + Streamer.switchSection(&Section); MCSymbol *SectionStart = Context.createTempSymbol(); Streamer.emitLabel(SectionStart); diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp index fbf3c860368a..ca7f28e1386e 100644 --- a/llvm/lib/MC/MCELFStreamer.cpp +++ b/llvm/lib/MC/MCELFStreamer.cpp @@ -90,11 +90,11 @@ void MCELFStreamer::mergeFragment(MCDataFragment *DF, void MCELFStreamer::initSections(bool NoExecStack, const MCSubtargetInfo &STI) { MCContext &Ctx = getContext(); - SwitchSection(Ctx.getObjectFileInfo()->getTextSection()); + switchSection(Ctx.getObjectFileInfo()->getTextSection()); emitCodeAlignment(Ctx.getObjectFileInfo()->getTextSectionAlignment(), &STI); if (NoExecStack) - SwitchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx)); + switchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx)); } void MCELFStreamer::emitLabel(MCSymbol *S, SMLoc Loc) { @@ -215,6 +215,7 @@ bool MCELFStreamer::emitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) { case MCSA_WeakDefAutoPrivate: case MCSA_Invalid: case MCSA_IndirectSymbol: + case MCSA_Exported: return false; case MCSA_NoDeadStrip: @@ -317,13 +318,13 @@ void MCELFStreamer::emitCommonSymbol(MCSymbol *S, uint64_t Size, MCSection &Section = *getAssembler().getContext().getELFSection( ".bss", ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); MCSectionSubPair P = getCurrentSection(); - SwitchSection(&Section); + switchSection(&Section); emitValueToAlignment(ByteAlignment, 0, 1, 0); emitLabel(Symbol); emitZeros(Size); - SwitchSection(P.first, P.second); + switchSection(P.first, P.second); } else { if(Symbol->declareCommon(Size, ByteAlignment)) report_fatal_error(Twine("Symbol: ") + Symbol->getName() + @@ -381,15 +382,15 @@ void MCELFStreamer::emitCGProfileEntry(const MCSymbolRefExpr *From, void MCELFStreamer::emitIdent(StringRef IdentString) { MCSection *Comment = getAssembler().getContext().getELFSection( ".comment", ELF::SHT_PROGBITS, ELF::SHF_MERGE | ELF::SHF_STRINGS, 1); - PushSection(); - SwitchSection(Comment); + pushSection(); + switchSection(Comment); if (!SeenIdent) { emitInt8(0); SeenIdent = true; } emitBytes(IdentString); emitInt8(0); - PopSection(); + popSection(); } void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { @@ -511,8 +512,8 @@ void MCELFStreamer::finalizeCGProfile() { MCSection *CGProfile = getAssembler().getContext().getELFSection( ".llvm.call-graph-profile", ELF::SHT_LLVM_CALL_GRAPH_PROFILE, ELF::SHF_EXCLUDE, /*sizeof(Elf_CGProfile_Impl<>)=*/8); - PushSection(); - SwitchSection(CGProfile); + pushSection(); + switchSection(CGProfile); uint64_t Offset = 0; for (MCAssembler::CGProfileEntry &E : Asm.CGProfile) { finalizeCGProfileEntry(E.From, Offset); @@ -520,7 +521,7 @@ void MCELFStreamer::finalizeCGProfile() { emitIntValue(E.Count, sizeof(uint64_t)); Offset += sizeof(uint64_t); } - PopSection(); + popSection(); } void MCELFStreamer::emitInstToFragment(const MCInst &Inst, @@ -832,10 +833,10 @@ void MCELFStreamer::createAttributesSection( // Switch section to AttributeSection or get/create the section. if (AttributeSection) { - SwitchSection(AttributeSection); + switchSection(AttributeSection); } else { AttributeSection = getContext().getELFSection(Section, Type, 0); - SwitchSection(AttributeSection); + switchSection(AttributeSection); // Format version emitInt8(0x41); diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index 10d494b5ac61..45a3d938257a 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -8,7 +8,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Config/llvm-config.h" #include "llvm/MC/MCAsmBackend.h" @@ -76,8 +75,9 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const { const MCSymbol &Sym = SRE.getSymbol(); // Parenthesize names that start with $ so that they don't look like // absolute names. - bool UseParens = - !InParens && !Sym.getName().empty() && Sym.getName()[0] == '$'; + bool UseParens = MAI && MAI->useParensForDollarSignNames() && !InParens && + !Sym.getName().empty() && Sym.getName()[0] == '$'; + if (UseParens) { OS << '('; Sym.print(OS, MAI); diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp index 4634de863b2f..4e6459c5d6e4 100644 --- a/llvm/lib/MC/MCFragment.cpp +++ b/llvm/lib/MC/MCFragment.cpp @@ -376,7 +376,7 @@ LLVM_DUMP_METHOD void MCFragment::dump() const { if (AF->hasEmitNops()) OS << " (emit nops)"; OS << "\n "; - OS << " Alignment:" << AF->getAlignment() + OS << " Alignment:" << AF->getAlignment().value() << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize() << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">"; break; diff --git a/llvm/lib/MC/MCInstPrinter.cpp b/llvm/lib/MC/MCInstPrinter.cpp index 7ce92b968f47..843afe359529 100644 --- a/llvm/lib/MC/MCInstPrinter.cpp +++ b/llvm/lib/MC/MCInstPrinter.cpp @@ -12,6 +12,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp index 4ed1c6286a72..85434b15bb5e 100644 --- a/llvm/lib/MC/MCInstrAnalysis.cpp +++ b/llvm/lib/MC/MCInstrAnalysis.cpp @@ -9,11 +9,12 @@ #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/ADT/APInt.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCInstrInfo.h" #include +namespace llvm { +class MCSubtargetInfo; +} + using namespace llvm; bool MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI, diff --git a/llvm/lib/MC/MCInstrDesc.cpp b/llvm/lib/MC/MCInstrDesc.cpp index b5c43f5edc0d..49a4a2cb546a 100644 --- a/llvm/lib/MC/MCInstrDesc.cpp +++ b/llvm/lib/MC/MCInstrDesc.cpp @@ -14,7 +14,6 @@ #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" using namespace llvm; diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp index 88aeeb980738..9f22b9b0a866 100644 --- a/llvm/lib/MC/MCMachOStreamer.cpp +++ b/llvm/lib/MC/MCMachOStreamer.cpp @@ -10,7 +10,6 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Triple.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" @@ -19,17 +18,16 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFragment.h" -#include "llvm/MC/MCInst.h" #include "llvm/MC/MCLinkerOptimizationHint.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolMachO.h" #include "llvm/MC/MCValue.h" +#include "llvm/MC/SectionKind.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -37,6 +35,13 @@ #include #include +namespace llvm { +class MCInst; +class MCStreamer; +class MCSubtargetInfo; +class Triple; +} // namespace llvm + using namespace llvm; namespace { @@ -126,6 +131,7 @@ public: void finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE); void finalizeCGProfile(); + void createAddrSigSection(); }; } // end anonymous namespace. @@ -353,6 +359,7 @@ bool MCMachOStreamer::emitSymbolAttribute(MCSymbol *Sym, case MCSA_Weak: case MCSA_Local: case MCSA_LGlobal: + case MCSA_Exported: return false; case MCSA_Global: @@ -455,8 +462,8 @@ void MCMachOStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol, // section. } - PushSection(); - SwitchSection(Section); + pushSection(); + switchSection(Section); // The symbol may not be present, which only creates the section. if (Symbol) { @@ -464,7 +471,7 @@ void MCMachOStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol, emitLabel(Symbol); emitZeros(Size); } - PopSection(); + popSection(); } // This should always be called with the thread local bss section. Like the @@ -524,6 +531,7 @@ void MCMachOStreamer::finishImpl() { finalizeCGProfile(); + createAddrSigSection(); this->MCObjectStreamer::finishImpl(); } @@ -574,3 +582,16 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context, S->getAssembler().setRelaxAll(true); return S; } + +// Create the AddrSig section and first data fragment here as its layout needs +// to be computed immediately after in order for it to be exported correctly. +void MCMachOStreamer::createAddrSigSection() { + MCAssembler &Asm = getAssembler(); + MCObjectWriter &writer = Asm.getWriter(); + if (!writer.getEmitAddrsigSection()) + return; + MCSection *AddrSigSection = + Asm.getContext().getObjectFileInfo()->getAddrSigSection(); + Asm.registerSection(*AddrSigSection); + new MCDataFragment(AddrSigSection); +} diff --git a/llvm/lib/MC/MCNullStreamer.cpp b/llvm/lib/MC/MCNullStreamer.cpp index 40b7eba58b03..83e8962451d5 100644 --- a/llvm/lib/MC/MCNullStreamer.cpp +++ b/llvm/lib/MC/MCNullStreamer.cpp @@ -7,9 +7,15 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/SMLoc.h" +namespace llvm { +class MCContext; +class MCExpr; +class MCSection; +class MCSymbol; +} // namespace llvm using namespace llvm; @@ -36,10 +42,10 @@ namespace { uint64_t Size = 0, unsigned ByteAlignment = 0, SMLoc Loc = SMLoc()) override {} void emitGPRel32Value(const MCExpr *Value) override {} - void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {} - void EmitCOFFSymbolStorageClass(int StorageClass) override {} - void EmitCOFFSymbolType(int Type) override {} - void EndCOFFSymbolDef() override {} + void beginCOFFSymbolDef(const MCSymbol *Symbol) override {} + void emitCOFFSymbolStorageClass(int StorageClass) override {} + void emitCOFFSymbolType(int Type) override {} + void endCOFFSymbolDef() override {} void emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol, MCSymbolAttr Linkage, MCSymbolAttr Visibility) override {} diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index b7890e7f0937..d6fe952c0c1d 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -16,11 +16,14 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCSectionDXContainer.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionGOFF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionSPIRV.h" #include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCSectionXCOFF.h" +#include "llvm/Support/Casting.h" using namespace llvm; @@ -62,8 +65,18 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32)) SupportsCompactUnwindWithoutEHFrame = true; - if (T.isWatchABI()) + switch (Ctx->emitDwarfUnwindInfo()) { + case EmitDwarfUnwindType::Always: + OmitDwarfIfHaveCompactUnwind = false; + break; + case EmitDwarfUnwindType::NoCompactUnwind: OmitDwarfIfHaveCompactUnwind = true; + break; + case EmitDwarfUnwindType::Default: + OmitDwarfIfHaveCompactUnwind = + T.isWatchABI() || SupportsCompactUnwindWithoutEHFrame; + break; + } FDECFIEncoding = dwarf::DW_EH_PE_pcrel; @@ -180,6 +193,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { MachO::S_THREAD_LOCAL_VARIABLE_POINTERS, SectionKind::getMetadata()); + AddrSigSection = Ctx->getMachOSection("__DATA", "__llvm_addrsig", 0, + SectionKind::getData()); + // Exception Handling. LSDASection = Ctx->getMachOSection("__TEXT", "__gcc_except_tab", 0, SectionKind::getReadOnlyWithRel()); @@ -518,8 +534,13 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) { } void MCObjectFileInfo::initGOFFMCObjectFileInfo(const Triple &T) { - TextSection = Ctx->getGOFFSection(".text", SectionKind::getText()); - BSSSection = Ctx->getGOFFSection(".bss", SectionKind::getBSS()); + TextSection = + Ctx->getGOFFSection(".text", SectionKind::getText(), nullptr, nullptr); + BSSSection = + Ctx->getGOFFSection(".bss", SectionKind::getBSS(), nullptr, nullptr); + PPA1Section = + Ctx->getGOFFSection(".ppa1", SectionKind::getMetadata(), TextSection, + MCConstantExpr::create(GOFF::SK_PPA1, *Ctx)); } void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { @@ -554,8 +575,9 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { ".rdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); - if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::aarch64) { - // On Windows 64 with SEH, the LSDA is emitted into the .xdata section + if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::aarch64 || + T.getArch() == Triple::arm || T.getArch() == Triple::thumb) { + // On Windows with SEH, the LSDA is emitted into the .xdata section LSDASection = nullptr; } else { LSDASection = Ctx->getCOFFSection(".gcc_except_table", @@ -803,6 +825,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { SectionKind::getReadOnly()); } +void MCObjectFileInfo::initSPIRVMCObjectFileInfo(const Triple &T) { + // Put everything in a single binary section. + TextSection = Ctx->getSPIRVSection(); +} + void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) { TextSection = Ctx->getWasmSection(".text", SectionKind::getText()); DataSection = Ctx->getWasmSection(".data", SectionKind::getData()); @@ -993,7 +1020,12 @@ void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) { /* MultiSymbolsAllowed */ true, ".dwmac", XCOFF::SSUBTYP_DWMAC); } -MCObjectFileInfo::~MCObjectFileInfo() {} +void MCObjectFileInfo::initDXContainerObjectFileInfo(const Triple &T) { + // At the moment the DXBC section should end up empty. + TextSection = Ctx->getDXContainerSection("DXBC", SectionKind::getText()); +} + +MCObjectFileInfo::~MCObjectFileInfo() = default; void MCObjectFileInfo::initMCObjectFileInfo(MCContext &MCCtx, bool PIC, bool LargeCodeModel) { @@ -1031,12 +1063,18 @@ void MCObjectFileInfo::initMCObjectFileInfo(MCContext &MCCtx, bool PIC, case MCContext::IsGOFF: initGOFFMCObjectFileInfo(TheTriple); break; + case MCContext::IsSPIRV: + initSPIRVMCObjectFileInfo(TheTriple); + break; case MCContext::IsWasm: initWasmMCObjectFileInfo(TheTriple); break; case MCContext::IsXCOFF: initXCOFFMCObjectFileInfo(TheTriple); break; + case MCContext::IsDXContainer: + initDXContainerObjectFileInfo(TheTriple); + break; } } @@ -1052,7 +1090,9 @@ MCSection *MCObjectFileInfo::getDwarfComdatSection(const char *Name, case Triple::MachO: case Triple::COFF: case Triple::GOFF: + case Triple::SPIRV: case Triple::XCOFF: + case Triple::DXContainer: case Triple::UnknownObjectFormat: report_fatal_error("Cannot get DWARF comdat section for this object file " "format: not implemented."); diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index ebbbd6ad4e16..0c4ed201a0c5 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCObjectStreamer.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -37,7 +36,7 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, setAllowAutoPadding(Assembler->getBackend().allowAutoPadding()); } -MCObjectStreamer::~MCObjectStreamer() {} +MCObjectStreamer::~MCObjectStreamer() = default; // AssemblerPtr is used for evaluation of expressions and causes // difference between asm and object outputs. Return nullptr to in @@ -561,7 +560,7 @@ void MCObjectStreamer::emitDwarfLineEndEntry(MCSection *Section, // Switch back the dwarf line section, in case endSection had to switch the // section. MCContext &Ctx = getContext(); - SwitchSection(Ctx.getObjectFileInfo()->getDwarfLineSection()); + switchSection(Ctx.getObjectFileInfo()->getDwarfLineSection()); const MCAsmInfo *AsmInfo = Ctx.getAsmInfo(); emitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd, @@ -648,7 +647,8 @@ void MCObjectStreamer::emitValueToAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit) { if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; - insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit)); + insert(new MCAlignFragment(Align(ByteAlignment), Value, ValueSize, + MaxBytesToEmit)); // Update the maximum alignment on the current section if necessary. MCSection *CurSec = getCurrentSectionOnly(); @@ -796,7 +796,7 @@ MCObjectStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr, SMLoc Loc, const MCSubtargetInfo &STI) { Optional MaybeKind = Assembler->getBackend().getFixupKind(Name); - if (!MaybeKind.hasValue()) + if (!MaybeKind) return std::make_pair(true, std::string("unknown relocation name")); MCFixupKind Kind = *MaybeKind; diff --git a/llvm/lib/MC/MCObjectWriter.cpp b/llvm/lib/MC/MCObjectWriter.cpp index a058bbe0ba0b..89ff5800da5b 100644 --- a/llvm/lib/MC/MCObjectWriter.cpp +++ b/llvm/lib/MC/MCObjectWriter.cpp @@ -7,10 +7,12 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFragment.h" #include "llvm/MC/MCSymbol.h" +namespace llvm { +class MCSection; +} using namespace llvm; diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp index bf9b9e916d6f..c3bc3bff6fa2 100644 --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -251,12 +251,12 @@ AsmToken AsmLexer::LexLineComment() { } static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { - // Skip ULL, UL, U, L and LL suffices. - if (CurPtr[0] == 'U') + // Skip case-insensitive ULL, UL, U, L and LL suffixes. + if (CurPtr[0] == 'U' || CurPtr[0] == 'u') ++CurPtr; - if (CurPtr[0] == 'L') + if (CurPtr[0] == 'L' || CurPtr[0] == 'l') ++CurPtr; - if (CurPtr[0] == 'L') + if (CurPtr[0] == 'L' || CurPtr[0] == 'l') ++CurPtr; } diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 0cea491f227d..ccc8e80e76ff 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -33,7 +33,6 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/AsmCond.h" #include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/MC/MCParser/MCAsmLexer.h" @@ -541,6 +540,7 @@ private: DK_PSEUDO_PROBE, DK_LTO_DISCARD, DK_LTO_SET_CONDITIONAL, + DK_CFI_MTE_TAGGED_FRAME, DK_END }; @@ -793,12 +793,19 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, case MCContext::IsGOFF: PlatformParser.reset(createGOFFAsmParser()); break; + case MCContext::IsSPIRV: + report_fatal_error( + "Need to implement createSPIRVAsmParser for SPIRV format."); + break; case MCContext::IsWasm: PlatformParser.reset(createWasmAsmParser()); break; case MCContext::IsXCOFF: PlatformParser.reset(createXCOFFAsmParser()); break; + case MCContext::IsDXContainer: + llvm_unreachable("DXContainer is not supported yet"); + break; } PlatformParser->Initialize(*this); @@ -1067,7 +1074,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { if (auto *TS = Out.getTargetStreamer()) TS->emitConstantPools(); - Out.Finish(Lexer.getLoc()); + Out.finish(Lexer.getLoc()); } return HadError || getContext().hadError(); @@ -1780,7 +1787,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, // if this is a line comment we can drop it safely if (getTok().getString().empty() || getTok().getString().front() == '\r' || getTok().getString().front() == '\n') - Out.AddBlankLine(); + Out.addBlankLine(); Lex(); return false; } @@ -1937,7 +1944,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, } // Consume any end of statement token, if present, to avoid spurious - // AddBlankLine calls(). + // addBlankLine calls(). if (getTok().is(AsmToken::EndOfStatement)) { Lex(); } @@ -3445,10 +3452,14 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) { // up to one. if (Alignment == 0) Alignment = 1; - if (!isPowerOf2_64(Alignment)) + else if (!isPowerOf2_64(Alignment)) { ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2"); - if (!isUInt<32>(Alignment)) + Alignment = PowerOf2Floor(Alignment); + } + if (!isUInt<32>(Alignment)) { ReturnVal |= Error(AlignmentLoc, "alignment must be smaller than 2**32"); + Alignment = 1u << 31; + } } // Diagnose non-sensical max bytes to align. @@ -3471,9 +3482,9 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) { // directive. const MCSection *Section = getStreamer().getCurrentSectionOnly(); assert(Section && "must have section to emit alignment"); - bool UseCodeAlign = Section->UseCodeAlign(); + bool useCodeAlign = Section->useCodeAlign(); if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) && - ValueSize == 1 && UseCodeAlign) { + ValueSize == 1 && useCodeAlign) { getStreamer().emitCodeAlignment(Alignment, &getTargetParser().getSTI(), MaxBytesToFill); } else { @@ -3571,8 +3582,8 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) { if (HasMD5) { MD5::MD5Result Sum; for (unsigned i = 0; i != 8; ++i) { - Sum.Bytes[i] = uint8_t(MD5Hi >> ((7 - i) * 8)); - Sum.Bytes[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8)); + Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8)); + Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8)); } CKMem = Sum; } @@ -3743,8 +3754,7 @@ bool AsmParser::parseDirectiveCVFile() { parseEscapedString(Checksum) || parseIntToken(ChecksumKind, "expected checksum kind in '.cv_file' directive") || - parseToken(AsmToken::EndOfStatement, - "unexpected token in '.cv_file' directive")) + parseEOL()) return true; } @@ -3754,7 +3764,7 @@ bool AsmParser::parseDirectiveCVFile() { ArrayRef ChecksumAsBytes(reinterpret_cast(CKMem), Checksum.size()); - if (!getStreamer().EmitCVFileDirective(FileNumber, Filename, ChecksumAsBytes, + if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes, static_cast(ChecksumKind))) return Error(FileNumberLoc, "file number already allocated"); @@ -3790,12 +3800,10 @@ bool AsmParser::parseDirectiveCVFuncId() { SMLoc FunctionIdLoc = getTok().getLoc(); int64_t FunctionId; - if (parseCVFunctionId(FunctionId, ".cv_func_id") || - parseToken(AsmToken::EndOfStatement, - "unexpected token in '.cv_func_id' directive")) + if (parseCVFunctionId(FunctionId, ".cv_func_id") || parseEOL()) return true; - if (!getStreamer().EmitCVFuncIdDirective(FunctionId)) + if (!getStreamer().emitCVFuncIdDirective(FunctionId)) return Error(FunctionIdLoc, "function id already allocated"); return false; @@ -3851,11 +3859,10 @@ bool AsmParser::parseDirectiveCVInlineSiteId() { Lex(); } - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.cv_inline_site_id' directive")) + if (parseEOL()) return true; - if (!getStreamer().EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile, + if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile, IALine, IACol, FunctionIdLoc)) return Error(FunctionIdLoc, "function id already allocated"); @@ -3976,7 +3983,7 @@ bool AsmParser::parseDirectiveCVInlineLinetable() { "expected identifier in directive")) return true; - if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement")) + if (parseEOL()) return true; MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName); @@ -4137,7 +4144,7 @@ bool AsmParser::parseDirectiveCVFileChecksumOffset() { int64_t FileNo; if (parseIntToken(FileNo, "expected identifier in directive")) return true; - if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement")) + if (parseEOL()) return true; getStreamer().emitCVFileChecksumOffsetDirective(FileNo); return false; @@ -4153,7 +4160,7 @@ bool AsmParser::parseDirectiveCVFPOData() { if (parseEOL()) return true; MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); - getStreamer().EmitCVFPOData(ProcSym, DirLoc); + getStreamer().emitCVFPOData(ProcSym, DirLoc); return false; } @@ -5550,6 +5557,7 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER; DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE; DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME; + DirectiveKindMap[".cfi_mte_tagged_frame"] = DK_CFI_MTE_TAGGED_FRAME; DirectiveKindMap[".macros_on"] = DK_MACROS_ON; DirectiveKindMap[".macros_off"] = DK_MACROS_OFF; DirectiveKindMap[".macro"] = DK_MACRO; @@ -6022,22 +6030,25 @@ bool AsmParser::parseMSInlineAsm( } bool isOutput = (i == 1) && Desc.mayStore(); + bool Restricted = Operand.isMemUseUpRegs(); SMLoc Start = SMLoc::getFromPointer(SymName.data()); - int64_t Size = Operand.isMemPlaceholder(Desc) ? 0 : SymName.size(); if (isOutput) { ++InputIdx; OutputDecls.push_back(OpDecl); OutputDeclsAddressOf.push_back(Operand.needAddressOf()); OutputConstraints.push_back(("=" + Constraint).str()); - AsmStrRewrites.emplace_back(AOK_Output, Start, Size); + AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size(), 0, + Restricted); } else { InputDecls.push_back(OpDecl); InputDeclsAddressOf.push_back(Operand.needAddressOf()); InputConstraints.push_back(Constraint.str()); if (Desc.OpInfo[i - 1].isBranchTarget()) - AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size()); + AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size(), 0, + Restricted); else - AsmStrRewrites.emplace_back(AOK_Input, Start, Size); + AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size(), 0, + Restricted); } } @@ -6152,17 +6163,19 @@ bool AsmParser::parseMSInlineAsm( OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label; break; case AOK_Input: - if (AR.Len) - OS << '$' << InputIdx; - ++InputIdx; + if (AR.IntelExpRestricted) + OS << "${" << InputIdx++ << ":P}"; + else + OS << '$' << InputIdx++; break; case AOK_CallInput: OS << "${" << InputIdx++ << ":P}"; break; case AOK_Output: - if (AR.Len) - OS << '$' << OutputIdx; - ++OutputIdx; + if (AR.IntelExpRestricted) + OS << "${" << OutputIdx++ << ":P}"; + else + OS << '$' << OutputIdx++; break; case AOK_SizeDirective: switch (AR.Val) { @@ -6299,7 +6312,7 @@ bool HLASMAsmParser::parseStatement(ParseStatementInfo &Info, // if this is a line comment we can drop it safely if (getTok().getString().empty() || getTok().getString().front() == '\r' || getTok().getString().front() == '\n') - Out.AddBlankLine(); + Out.addBlankLine(); Lex(); return false; } @@ -6315,7 +6328,7 @@ bool HLASMAsmParser::parseStatement(ParseStatementInfo &Info, if (Lexer.is(AsmToken::EndOfStatement)) { if (getTok().getString().front() == '\n' || getTok().getString().front() == '\r') { - Out.AddBlankLine(); + Out.addBlankLine(); Lex(); return false; } diff --git a/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/llvm/lib/MC/MCParser/COFFAsmParser.cpp index 0077c91cfdbd..b78595f5bab4 100644 --- a/llvm/lib/MC/MCParser/COFFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFAsmParser.cpp @@ -13,11 +13,8 @@ #include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" -#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParserExtension.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/SectionKind.h" @@ -322,7 +319,7 @@ bool COFFAsmParser::ParseSectionSwitch(StringRef Section, return TokError("unexpected token in section switching directive"); Lex(); - getStreamer().SwitchSection(getContext().getCOFFSection( + getStreamer().switchSection(getContext().getCOFFSection( Section, Characteristics, Kind, COMDATSymName, Type)); return false; @@ -419,7 +416,7 @@ bool COFFAsmParser::ParseDirectiveDef(StringRef, SMLoc) { MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName); - getStreamer().BeginCOFFSymbolDef(Sym); + getStreamer().beginCOFFSymbolDef(Sym); Lex(); return false; @@ -434,7 +431,7 @@ bool COFFAsmParser::ParseDirectiveScl(StringRef, SMLoc) { return TokError("unexpected token in directive"); Lex(); - getStreamer().EmitCOFFSymbolStorageClass(SymbolStorageClass); + getStreamer().emitCOFFSymbolStorageClass(SymbolStorageClass); return false; } @@ -447,13 +444,13 @@ bool COFFAsmParser::ParseDirectiveType(StringRef, SMLoc) { return TokError("unexpected token in directive"); Lex(); - getStreamer().EmitCOFFSymbolType(Type); + getStreamer().emitCOFFSymbolType(Type); return false; } bool COFFAsmParser::ParseDirectiveEndef(StringRef, SMLoc) { Lex(); - getStreamer().EndCOFFSymbolDef(); + getStreamer().endCOFFSymbolDef(); return false; } @@ -482,7 +479,7 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) { MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitCOFFSecRel32(Symbol, Offset); + getStreamer().emitCOFFSecRel32(Symbol, Offset); return false; } @@ -508,7 +505,7 @@ bool COFFAsmParser::ParseDirectiveRVA(StringRef, SMLoc) { MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - getStreamer().EmitCOFFImgRel32(Symbol, Offset); + getStreamer().emitCOFFImgRel32(Symbol, Offset); return false; }; @@ -528,7 +525,7 @@ bool COFFAsmParser::ParseDirectiveSafeSEH(StringRef, SMLoc) { MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitCOFFSafeSEH(Symbol); + getStreamer().emitCOFFSafeSEH(Symbol); return false; } @@ -543,7 +540,7 @@ bool COFFAsmParser::ParseDirectiveSecIdx(StringRef, SMLoc) { MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitCOFFSectionIndex(Symbol); + getStreamer().emitCOFFSectionIndex(Symbol); return false; } @@ -558,7 +555,7 @@ bool COFFAsmParser::ParseDirectiveSymIdx(StringRef, SMLoc) { MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitCOFFSymbolIndex(Symbol); + getStreamer().emitCOFFSymbolIndex(Symbol); return false; } @@ -621,31 +618,31 @@ bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc Loc) { MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitWinCFIStartProc(Symbol, Loc); + getStreamer().emitWinCFIStartProc(Symbol, Loc); return false; } bool COFFAsmParser::ParseSEHDirectiveEndProc(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinCFIEndProc(Loc); + getStreamer().emitWinCFIEndProc(Loc); return false; } bool COFFAsmParser::ParseSEHDirectiveEndFuncletOrFunc(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinCFIFuncletOrFuncEnd(Loc); + getStreamer().emitWinCFIFuncletOrFuncEnd(Loc); return false; } bool COFFAsmParser::ParseSEHDirectiveStartChained(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinCFIStartChained(Loc); + getStreamer().emitWinCFIStartChained(Loc); return false; } bool COFFAsmParser::ParseSEHDirectiveEndChained(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinCFIEndChained(Loc); + getStreamer().emitWinCFIEndChained(Loc); return false; } @@ -671,13 +668,13 @@ bool COFFAsmParser::ParseSEHDirectiveHandler(StringRef, SMLoc Loc) { MCSymbol *handler = getContext().getOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitWinEHHandler(handler, unwind, except, Loc); + getStreamer().emitWinEHHandler(handler, unwind, except, Loc); return false; } bool COFFAsmParser::ParseSEHDirectiveHandlerData(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinEHHandlerData(); + getStreamer().emitWinEHHandlerData(); return false; } @@ -690,20 +687,20 @@ bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc Loc) { return TokError("unexpected token in directive"); Lex(); - getStreamer().EmitWinCFIAllocStack(Size, Loc); + getStreamer().emitWinCFIAllocStack(Size, Loc); return false; } bool COFFAsmParser::ParseSEHDirectiveEndProlog(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinCFIEndProlog(Loc); + getStreamer().emitWinCFIEndProlog(Loc); return false; } bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) { StringRef identifier; - if (getLexer().isNot(AsmToken::At)) - return TokError("a handler attribute must begin with '@'"); + if (getLexer().isNot(AsmToken::At) && getLexer().isNot(AsmToken::Percent)) + return TokError("a handler attribute must begin with '@' or '%'"); SMLoc startLoc = getLexer().getLoc(); Lex(); if (getParser().parseIdentifier(identifier)) diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp index 9da880f3b2ea..c5fedef40782 100644 --- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp @@ -7,25 +7,18 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDirectives.h" -#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParserExtension.h" -#include "llvm/MC/MCParser/MCAsmParserUtils.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolCOFF.h" #include "llvm/MC/SectionKind.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/SMLoc.h" -#include #include -#include #include using namespace llvm; @@ -245,7 +238,7 @@ bool COFFMasmParser::ParseSectionSwitch(StringRef Section, return TokError("unexpected token in section switching directive"); Lex(); - getStreamer().SwitchSection(getContext().getCOFFSection( + getStreamer().switchSection(getContext().getCOFFSection( Section, Characteristics, Kind, COMDATSymName, Type)); return false; @@ -273,7 +266,7 @@ bool COFFMasmParser::ParseDirectiveSegment(StringRef Directive, SMLoc Loc) { COFF::IMAGE_SCN_MEM_READ; } SectionKind Kind = computeSectionKind(Flags); - getStreamer().SwitchSection(getContext().getCOFFSection( + getStreamer().switchSection(getContext().getCOFFSection( SectionName, Flags, Kind, "", (COFF::COMDATType)(0))); return false; } @@ -300,13 +293,13 @@ bool COFFMasmParser::ParseDirectiveIncludelib(StringRef Directive, SMLoc Loc) { unsigned Flags = COFF::IMAGE_SCN_MEM_PRELOAD | COFF::IMAGE_SCN_MEM_16BIT; SectionKind Kind = computeSectionKind(Flags); - getStreamer().PushSection(); - getStreamer().SwitchSection(getContext().getCOFFSection( + getStreamer().pushSection(); + getStreamer().switchSection(getContext().getCOFFSection( ".drectve", Flags, Kind, "", (COFF::COMDATType)(0))); getStreamer().emitBytes("/DEFAULTLIB:"); getStreamer().emitBytes(Lib); getStreamer().emitBytes(" "); - getStreamer().PopSection(); + getStreamer().popSection(); return false; } @@ -343,7 +336,7 @@ bool COFFMasmParser::ParseDirectiveProc(StringRef Directive, SMLoc Loc) { getTok().getString().equals_insensitive("frame")) { Lex(); Framed = true; - getStreamer().EmitWinCFIStartProc(Sym, Loc); + getStreamer().emitWinCFIStartProc(Sym, Loc); } getStreamer().emitLabel(Sym, Loc); @@ -364,7 +357,7 @@ bool COFFMasmParser::ParseDirectiveEndProc(StringRef Directive, SMLoc Loc) { CurrentProcedure + "'"); if (CurrentProcedureFramed) { - getStreamer().EmitWinCFIEndProc(Loc); + getStreamer().emitWinCFIEndProc(Loc); } CurrentProcedure = ""; CurrentProcedureFramed = false; @@ -398,13 +391,13 @@ bool COFFMasmParser::ParseSEHDirectiveAllocStack(StringRef Directive, return Error(SizeLoc, "expected integer size"); if (Size % 8 != 0) return Error(SizeLoc, "stack size must be a multiple of 8"); - getStreamer().EmitWinCFIAllocStack(static_cast(Size), Loc); + getStreamer().emitWinCFIAllocStack(static_cast(Size), Loc); return false; } bool COFFMasmParser::ParseSEHDirectiveEndProlog(StringRef Directive, SMLoc Loc) { - getStreamer().EmitWinCFIEndProlog(Loc); + getStreamer().emitWinCFIEndProlog(Loc); return false; } diff --git a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp index 308b3842c61e..bc59531eecb8 100644 --- a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" @@ -15,7 +14,6 @@ #include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" -#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCAsmParserExtension.h" @@ -29,7 +27,6 @@ #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include @@ -483,7 +480,7 @@ bool DarwinAsmParser::parseSectionSwitch(StringRef Segment, StringRef Section, // FIXME: Arch specific. bool isText = TAA & MachO::S_ATTR_PURE_INSTRUCTIONS; - getStreamer().SwitchSection(getContext().getMachOSection( + getStreamer().switchSection(getContext().getMachOSection( Segment, Section, TAA, StubSize, isText ? SectionKind::getText() : SectionKind::getData())); @@ -722,7 +719,7 @@ bool DarwinAsmParser::parseDirectiveSection(StringRef, SMLoc) { // FIXME: Arch specific. bool isText = Segment == "__TEXT"; // FIXME: Hack. - getStreamer().SwitchSection(getContext().getMachOSection( + getStreamer().switchSection(getContext().getMachOSection( Segment, Section, TAA, StubSize, isText ? SectionKind::getText() : SectionKind::getData())); return false; @@ -731,10 +728,10 @@ bool DarwinAsmParser::parseDirectiveSection(StringRef, SMLoc) { /// ParseDirectivePushSection: /// ::= .pushsection identifier (',' identifier)* bool DarwinAsmParser::parseDirectivePushSection(StringRef S, SMLoc Loc) { - getStreamer().PushSection(); + getStreamer().pushSection(); if (parseDirectiveSection(S, Loc)) { - getStreamer().PopSection(); + getStreamer().popSection(); return true; } @@ -744,7 +741,7 @@ bool DarwinAsmParser::parseDirectivePushSection(StringRef S, SMLoc Loc) { /// ParseDirectivePopSection: /// ::= .popsection bool DarwinAsmParser::parseDirectivePopSection(StringRef, SMLoc) { - if (!getStreamer().PopSection()) + if (!getStreamer().popSection()) return TokError(".popsection without corresponding .pushsection"); return false; } @@ -755,7 +752,7 @@ bool DarwinAsmParser::parseDirectivePrevious(StringRef DirName, SMLoc) { MCSectionSubPair PreviousSection = getStreamer().getPreviousSection(); if (!PreviousSection.first) return TokError(".previous without corresponding .section"); - getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second); + getStreamer().switchSection(PreviousSection.first, PreviousSection.second); return false; } @@ -1152,11 +1149,12 @@ static Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) { case MachO::PLATFORM_TVOS: return Triple::TvOS; case MachO::PLATFORM_WATCHOS: return Triple::WatchOS; case MachO::PLATFORM_BRIDGEOS: /* silence warning */ break; + case MachO::PLATFORM_DRIVERKIT: + return Triple::DriverKit; case MachO::PLATFORM_MACCATALYST: return Triple::IOS; case MachO::PLATFORM_IOSSIMULATOR: /* silence warning */ break; case MachO::PLATFORM_TVOSSIMULATOR: /* silence warning */ break; case MachO::PLATFORM_WATCHOSSIMULATOR: /* silence warning */ break; - case MachO::PLATFORM_DRIVERKIT: /* silence warning */ break; } llvm_unreachable("Invalid mach-o platform type"); } @@ -1175,6 +1173,7 @@ bool DarwinAsmParser::parseBuildVersion(StringRef Directive, SMLoc Loc) { .Case("tvos", MachO::PLATFORM_TVOS) .Case("watchos", MachO::PLATFORM_WATCHOS) .Case("macCatalyst", MachO::PLATFORM_MACCATALYST) + .Case("driverkit", MachO::PLATFORM_DRIVERKIT) .Default(0); if (Platform == 0) return Error(PlatformLoc, "unknown platform name"); diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index e814cf003656..04a234be3b47 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -12,11 +12,9 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCAsmParserExtension.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" @@ -214,7 +212,7 @@ bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type, } Lex(); - getStreamer().SwitchSection(getContext().getELFSection(Section, Type, Flags), + getStreamer().switchSection(getContext().getELFSection(Section, Type, Flags), Subsection); return false; @@ -284,7 +282,8 @@ bool ELFAsmParser::ParseSectionName(StringRef &SectionName) { return false; } -static unsigned parseSectionFlags(StringRef flagsStr, bool *UseLastGroup) { +static unsigned parseSectionFlags(const Triple &TT, StringRef flagsStr, + bool *UseLastGroup) { unsigned flags = 0; // If a valid numerical value is set for the section flag, use it verbatim @@ -333,7 +332,10 @@ static unsigned parseSectionFlags(StringRef flagsStr, bool *UseLastGroup) { flags |= ELF::SHF_GROUP; break; case 'R': - flags |= ELF::SHF_GNU_RETAIN; + if (TT.isOSSolaris()) + flags |= ELF::SHF_SUNW_NODISCARD; + else + flags |= ELF::SHF_GNU_RETAIN; break; case '?': *UseLastGroup = true; @@ -377,10 +379,10 @@ unsigned ELFAsmParser::parseSunStyleSectionFlags() { bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) { - getStreamer().PushSection(); + getStreamer().pushSection(); if (ParseSectionArguments(/*IsPush=*/true, loc)) { - getStreamer().PopSection(); + getStreamer().popSection(); return true; } @@ -388,7 +390,7 @@ bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) { } bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) { - if (!getStreamer().PopSection()) + if (!getStreamer().popSection()) return TokError(".popsection without corresponding .pushsection"); return false; } @@ -571,7 +573,8 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) { } else { StringRef FlagsStr = getTok().getStringContents(); Lex(); - extraFlags = parseSectionFlags(FlagsStr, &UseLastGroup); + extraFlags = parseSectionFlags(getContext().getTargetTriple(), FlagsStr, + &UseLastGroup); } if (extraFlags == -1U) @@ -675,7 +678,7 @@ EndStmt: MCSectionELF *Section = getContext().getELFSection(SectionName, Type, Flags, Size, GroupName, IsComdat, UniqueID, LinkedToSym); - getStreamer().SwitchSection(Section, Subsection); + getStreamer().switchSection(Section, Subsection); // Check that flags are used consistently. However, the GNU assembler permits // to leave out in subsequent uses of the same sections; for compatibility, // do likewise. @@ -715,7 +718,7 @@ bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) { MCSectionSubPair PreviousSection = getStreamer().getPreviousSection(); if (PreviousSection.first == nullptr) return TokError(".previous without corresponding .section"); - getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second); + getStreamer().switchSection(PreviousSection.first, PreviousSection.second); return false; } @@ -857,15 +860,15 @@ bool ELFAsmParser::ParseDirectiveVersion(StringRef, SMLoc) { MCSection *Note = getContext().getELFSection(".note", ELF::SHT_NOTE, 0); - getStreamer().PushSection(); - getStreamer().SwitchSection(Note); + getStreamer().pushSection(); + getStreamer().switchSection(Note); getStreamer().emitInt32(Data.size() + 1); // namesz getStreamer().emitInt32(0); // descsz = 0 (no description). getStreamer().emitInt32(1); // type = NT_VERSION getStreamer().emitBytes(Data); // name getStreamer().emitInt8(0); // NUL getStreamer().emitValueToAlignment(4); - getStreamer().PopSection(); + getStreamer().popSection(); return false; } @@ -907,7 +910,7 @@ bool ELFAsmParser::ParseDirectiveSubsection(StringRef, SMLoc) { Lex(); - getStreamer().SubSection(Subsection); + getStreamer().subSection(Subsection); return false; } diff --git a/llvm/lib/MC/MCParser/GOFFAsmParser.cpp b/llvm/lib/MC/MCParser/GOFFAsmParser.cpp index c2a7eaee8029..c3fc04607273 100644 --- a/llvm/lib/MC/MCParser/GOFFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/GOFFAsmParser.cpp @@ -6,16 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParserExtension.h" -#include "llvm/MC/MCSectionGOFF.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbolGOFF.h" using namespace llvm; @@ -31,7 +22,7 @@ class GOFFAsmParser : public MCAsmParserExtension { } public: - GOFFAsmParser() {} + GOFFAsmParser() = default; void Initialize(MCAsmParser &Parser) override { // Call the base implementation. diff --git a/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/llvm/lib/MC/MCParser/MCAsmLexer.cpp index 497055bc1760..632c52479d70 100644 --- a/llvm/lib/MC/MCParser/MCAsmLexer.cpp +++ b/llvm/lib/MC/MCParser/MCAsmLexer.cpp @@ -9,7 +9,6 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/MC/MCParser/MCAsmParser.cpp b/llvm/lib/MC/MCParser/MCAsmParser.cpp index d797c2d3f288..7fc1dbf56f98 100644 --- a/llvm/lib/MC/MCParser/MCAsmParser.cpp +++ b/llvm/lib/MC/MCParser/MCAsmParser.cpp @@ -25,7 +25,7 @@ cl::opt AsmMacroMaxNestingDepth( "asm-macro-max-nesting-depth", cl::init(20), cl::Hidden, cl::desc("The maximum nesting depth allowed for assembly macros.")); -MCAsmParser::MCAsmParser() {} +MCAsmParser::MCAsmParser() = default; MCAsmParser::~MCAsmParser() = default; diff --git a/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp b/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp index 0b5046cd8fad..f5a10ce9805b 100644 --- a/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp +++ b/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp @@ -8,6 +8,8 @@ #include "llvm/MC/MCParser/MCAsmParserExtension.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCStreamer.h" using namespace llvm; diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index f9433240743d..8c582d225e30 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -14,7 +14,6 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -36,21 +35,19 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/AsmCond.h" #include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCAsmParserExtension.h" -#include "llvm/MC/MCParser/MCAsmParserUtils.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCTargetOptions.h" -#include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -64,7 +61,6 @@ #include "llvm/Support/raw_ostream.h" #include #include -#include #include #include #include @@ -980,6 +976,8 @@ private: bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc); bool parseDirectiveNestedEnds(); + bool parseDirectiveExtern(); + /// Parse a directive like ".globl" which accepts a single symbol (which /// should be a label or an external). bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr); @@ -1192,7 +1190,7 @@ bool MasmParser::expandMacros() { } } - if (!ExpandedValue.hasValue()) + if (!ExpandedValue) return true; std::unique_ptr Instantiation = MemoryBuffer::getMemBufferCopy(*ExpandedValue, ""); @@ -1431,7 +1429,7 @@ bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // Finalize the output stream if there are no errors and if the client wants // us to. if (!HadError && !NoFinalize) - Out.Finish(Lexer.getLoc()); + Out.finish(Lexer.getLoc()); return HadError || getContext().hadError(); } @@ -2094,7 +2092,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, // If this is a line comment we can drop it safely. if (getTok().getString().empty() || getTok().getString().front() == '\r' || getTok().getString().front() == '\n') - Out.AddBlankLine(); + Out.addBlankLine(); Lex(); return false; } @@ -2283,7 +2281,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, } // Consume any end of statement token, if present, to avoid spurious - // AddBlankLine calls(). + // addBlankLine calls(). if (getTok().is(AsmToken::EndOfStatement)) { Lex(); } @@ -2409,8 +2407,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, case DK_ORG: return parseDirectiveOrg(); case DK_EXTERN: - eatToEndOfStatement(); // .extern is the default, ignore it. - return false; + return parseDirectiveExtern(); case DK_PUBLIC: return parseDirectiveSymbolAttribute(MCSA_Global); case DK_COMM: @@ -2905,7 +2902,7 @@ bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, if (Body[Pos] == '&') break; if (isMacroParameterChar(Body[Pos])) { - if (!CurrentQuote.hasValue()) + if (!CurrentQuote) break; if (IdentifierPos == End) IdentifierPos = Pos; @@ -2914,7 +2911,7 @@ bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, } // Track quotation status - if (!CurrentQuote.hasValue()) { + if (!CurrentQuote) { if (Body[Pos] == '\'' || Body[Pos] == '"') CurrentQuote = Body[Pos]; } else if (Body[Pos] == CurrentQuote) { @@ -3333,7 +3330,7 @@ bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) { ParseStatementInfo Info(&AsmStrRewrites); bool Parsed = parseStatement(Info, nullptr); - if (!Parsed && Info.ExitValue.hasValue()) { + if (!Parsed && Info.ExitValue) { ExitValue = std::move(*Info.ExitValue); break; } @@ -3628,7 +3625,7 @@ bool MasmParser::parseTextItem(std::string &Data) { if (BuiltinIt != BuiltinSymbolMap.end()) { llvm::Optional BuiltinText = evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc); - if (!BuiltinText.hasValue()) { + if (!BuiltinText) { // Not a text macro; break without substituting break; } @@ -4242,7 +4239,7 @@ bool MasmParser::parseStructInitializer(const StructInfo &Structure, auto &FieldInitializers = Initializer.FieldInitializers; size_t FieldIndex = 0; - if (EndToken.hasValue()) { + if (EndToken) { // Initialize all fields with given initializers. while (getTok().isNot(EndToken.getValue()) && FieldIndex < Structure.Fields.size()) { @@ -4275,7 +4272,7 @@ bool MasmParser::parseStructInitializer(const StructInfo &Structure, FieldInitializers.push_back(Field.Contents); } - if (EndToken.hasValue()) { + if (EndToken) { if (EndToken.getValue() == AsmToken::Greater) return parseAngleBracketClose(); @@ -4763,7 +4760,7 @@ bool MasmParser::emitAlignTo(int64_t Alignment) { // directive. const MCSection *Section = getStreamer().getCurrentSectionOnly(); assert(Section && "must have section to emit alignment"); - if (Section->UseCodeAlign()) { + if (Section->useCodeAlign()) { getStreamer().emitCodeAlignment(Alignment, &getTargetParser().getSTI(), /*MaxBytesToEmit=*/0); } else { @@ -4911,8 +4908,8 @@ bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) { if (HasMD5) { MD5::MD5Result Sum; for (unsigned i = 0; i != 8; ++i) { - Sum.Bytes[i] = uint8_t(MD5Hi >> ((7 - i) * 8)); - Sum.Bytes[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8)); + Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8)); + Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8)); } CKMem = Sum; } @@ -4952,8 +4949,7 @@ bool MasmParser::parseDirectiveLine() { (void)LineNumber; // FIXME: Do something with the .line. } - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.line' directive")) + if (parseEOL()) return true; return false; @@ -5086,8 +5082,7 @@ bool MasmParser::parseDirectiveCVFile() { parseEscapedString(Checksum) || parseIntToken(ChecksumKind, "expected checksum kind in '.cv_file' directive") || - parseToken(AsmToken::EndOfStatement, - "unexpected token in '.cv_file' directive")) + parseEOL()) return true; } @@ -5097,7 +5092,7 @@ bool MasmParser::parseDirectiveCVFile() { ArrayRef ChecksumAsBytes(reinterpret_cast(CKMem), Checksum.size()); - if (!getStreamer().EmitCVFileDirective(FileNumber, Filename, ChecksumAsBytes, + if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes, static_cast(ChecksumKind))) return Error(FileNumberLoc, "file number already allocated"); @@ -5133,12 +5128,10 @@ bool MasmParser::parseDirectiveCVFuncId() { SMLoc FunctionIdLoc = getTok().getLoc(); int64_t FunctionId; - if (parseCVFunctionId(FunctionId, ".cv_func_id") || - parseToken(AsmToken::EndOfStatement, - "unexpected token in '.cv_func_id' directive")) + if (parseCVFunctionId(FunctionId, ".cv_func_id") || parseEOL()) return true; - if (!getStreamer().EmitCVFuncIdDirective(FunctionId)) + if (!getStreamer().emitCVFuncIdDirective(FunctionId)) return Error(FunctionIdLoc, "function id already allocated"); return false; @@ -5194,11 +5187,10 @@ bool MasmParser::parseDirectiveCVInlineSiteId() { Lex(); } - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.cv_inline_site_id' directive")) + if (parseEOL()) return true; - if (!getStreamer().EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile, + if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile, IALine, IACol, FunctionIdLoc)) return Error(FunctionIdLoc, "function id already allocated"); @@ -5321,7 +5313,7 @@ bool MasmParser::parseDirectiveCVInlineLinetable() { "expected identifier in directive")) return true; - if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement")) + if (parseEOL()) return true; MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName); @@ -5482,7 +5474,7 @@ bool MasmParser::parseDirectiveCVFileChecksumOffset() { int64_t FileNo; if (parseIntToken(FileNo, "expected identifier in directive")) return true; - if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement")) + if (parseEOL()) return true; getStreamer().emitCVFileChecksumOffsetDirective(FileNo); return false; @@ -5498,7 +5490,7 @@ bool MasmParser::parseDirectiveCVFPOData() { if (parseEOL("unexpected tokens")) return addErrorSuffix(" in '.cv_fpo_data' directive"); MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); - getStreamer().EmitCVFPOData(ProcSym, DirLoc); + getStreamer().emitCVFPOData(ProcSym, DirLoc); return false; } @@ -5791,8 +5783,7 @@ bool MasmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) { /// parseDirectiveCFISignalFrame /// ::= .cfi_signal_frame bool MasmParser::parseDirectiveCFISignalFrame() { - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.cfi_signal_frame'")) + if (parseEOL()) return true; getStreamer().emitCFISignalFrame(); @@ -6023,6 +6014,39 @@ bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) { return false; } +bool MasmParser::parseDirectiveExtern() { + // .extern is the default - but we still need to take any provided type info. + auto parseOp = [&]() -> bool { + StringRef Name; + SMLoc NameLoc = getTok().getLoc(); + if (parseIdentifier(Name)) + return Error(NameLoc, "expected name"); + if (parseToken(AsmToken::Colon)) + return true; + + StringRef TypeName; + SMLoc TypeLoc = getTok().getLoc(); + if (parseIdentifier(TypeName)) + return Error(TypeLoc, "expected type"); + if (!TypeName.equals_insensitive("proc")) { + AsmTypeInfo Type; + if (lookUpType(TypeName, Type)) + return Error(TypeLoc, "unrecognized type"); + KnownType[Name.lower()] = Type; + } + + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + Sym->setExternal(true); + getStreamer().emitSymbolAttribute(Sym, MCSA_Extern); + + return false; + }; + + if (parseMany(parseOp)) + return addErrorSuffix(" in directive 'extern'"); + return false; +} + /// parseDirectiveSymbolAttribute /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) { @@ -6091,8 +6115,7 @@ bool MasmParser::parseDirectiveComm(bool IsLocal) { } } - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.comm' or '.lcomm' directive")) + if (parseEOL()) return true; // NOTE: a size of zero for a .comm should create a undefined symbol @@ -6138,8 +6161,7 @@ bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) { Lex(); // eat end of statement } while ( !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter)); - return parseToken(AsmToken::EndOfStatement, - "unexpected token in 'comment' directive"); + return parseEOL(); } /// parseDirectiveInclude @@ -6173,9 +6195,7 @@ bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) { eatToEndOfStatement(); } else { int64_t ExprValue; - if (parseAbsoluteExpression(ExprValue) || - parseToken(AsmToken::EndOfStatement, - "unexpected token in '.if' directive")) + if (parseAbsoluteExpression(ExprValue) || parseEOL()) return true; switch (DirKind) { @@ -6208,8 +6228,7 @@ bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) { if (parseTextItem(Str)) return TokError("expected text item parameter for 'ifb' directive"); - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in 'ifb' directive")) + if (parseEOL()) return true; TheCondState.CondMet = ExpectBlank == Str.empty(); @@ -6275,7 +6294,7 @@ bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) { if (!is_defined) { StringRef Name; if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") || - parseToken(AsmToken::EndOfStatement, "unexpected token in 'ifdef'")) + parseEOL()) return true; if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) { @@ -6316,8 +6335,7 @@ bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc, if (parseAbsoluteExpression(ExprValue)) return true; - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.elseif' directive")) + if (parseEOL()) return true; switch (DirKind) { @@ -6360,8 +6378,7 @@ bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) { return TokError("expected text item parameter for 'elseifnb' directive"); } - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in 'elseifb' directive")) + if (parseEOL()) return true; TheCondState.CondMet = ExpectBlank == Str.empty(); @@ -6398,8 +6415,7 @@ bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc, StringRef Name; if (check(parseIdentifier(Name), "expected identifier after 'elseifdef'") || - parseToken(AsmToken::EndOfStatement, - "unexpected token in 'elseifdef'")) + parseEOL()) return true; if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) { @@ -6475,8 +6491,7 @@ bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual, /// parseDirectiveElse /// ::= else bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) { - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in 'else' directive")) + if (parseEOL()) return true; if (TheCondState.TheCond != AsmCond::IfCond && @@ -6498,8 +6513,7 @@ bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) { /// parseDirectiveEnd /// ::= end bool MasmParser::parseDirectiveEnd(SMLoc DirectiveLoc) { - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in 'end' directive")) + if (parseEOL()) return true; while (Lexer.isNot(AsmToken::Eof)) @@ -6687,8 +6701,7 @@ bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) { /// parseDirectiveEndIf /// ::= .endif bool MasmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) { - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.endif' directive")) + if (parseEOL()) return true; if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty()) @@ -6982,9 +6995,7 @@ bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) { return Error(CountLoc, "unexpected token in '" + Dir + "' directive"); } - if (check(Count < 0, CountLoc, "Count is negative") || - parseToken(AsmToken::EndOfStatement, - "unexpected token in '" + Dir + "' directive")) + if (check(Count < 0, CountLoc, "Count is negative") || parseEOL()) return true; // Lex the repeat definition. @@ -7099,7 +7110,7 @@ bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) { if (parseToken(AsmToken::Greater, "values in '" + Dir + "' directive must be enclosed in angle brackets") || - parseToken(AsmToken::EndOfStatement, "expected End of Statement")) + parseEOL()) return true; // Lex the for definition. @@ -7149,7 +7160,7 @@ bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) { } Argument.resize(End); } - if (parseToken(AsmToken::EndOfStatement, "expected end of statement")) + if (parseEOL()) return true; // Lex the irpc definition. diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp index 833530bef3bf..a84d00d82b76 100644 --- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp +++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp @@ -21,11 +21,11 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolWasm.h" -#include "llvm/Support/MachineValueType.h" +#include "llvm/Support/Casting.h" using namespace llvm; @@ -53,6 +53,7 @@ public: this->MCAsmParserExtension::Initialize(*Parser); addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveText>(".text"); + addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveData>(".data"); addDirectiveHandler<&WasmAsmParser::parseSectionDirective>(".section"); addDirectiveHandler<&WasmAsmParser::parseDirectiveSize>(".size"); addDirectiveHandler<&WasmAsmParser::parseDirectiveType>(".type"); @@ -90,6 +91,12 @@ public: return false; } + bool parseSectionDirectiveData(StringRef, SMLoc) { + auto *S = getContext().getObjectFileInfo()->getDataSection(); + getStreamer().switchSection(S); + return false; + } + uint32_t parseSectionFlags(StringRef FlagStr, bool &Passive, bool &Group) { uint32_t flags = 0; for (char C : FlagStr) { @@ -181,7 +188,7 @@ public: // TODO: Parse UniqueID MCSectionWasm *WS = getContext().getWasmSection( - Name, Kind.getValue(), Flags, GroupName, MCContext::GenericSectionID); + Name, *Kind, Flags, GroupName, MCContext::GenericSectionID); if (WS->getSegmentFlags() != Flags) Parser->Error(loc, "changed section flags for " + Name + @@ -194,7 +201,7 @@ public: WS->setPassive(); } - getStreamer().SwitchSection(WS); + getStreamer().switchSection(WS); return false; } diff --git a/llvm/lib/MC/MCParser/XCOFFAsmParser.cpp b/llvm/lib/MC/MCParser/XCOFFAsmParser.cpp index 7494fe07734c..d20a65f6a476 100644 --- a/llvm/lib/MC/MCParser/XCOFFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/XCOFFAsmParser.cpp @@ -8,15 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/BinaryFormat/XCOFF.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCAsmParserExtension.h" -#include "llvm/MC/MCSectionXCOFF.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCSymbolXCOFF.h" -#include "llvm/Support/MachineValueType.h" using namespace llvm; @@ -35,7 +28,7 @@ class XCOFFAsmParser : public MCAsmParserExtension { } public: - XCOFFAsmParser() {} + XCOFFAsmParser() = default; void Initialize(MCAsmParser &P) override { Parser = &P; diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp index ebf38327f4dc..5277ce87bee0 100644 --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -9,9 +9,10 @@ #include "llvm/MC/MCPseudoProbe.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/raw_ostream.h" @@ -182,7 +183,7 @@ void MCPseudoProbeSection::emit(MCObjectStreamer *MCOS) { if (auto *S = Ctx.getObjectFileInfo()->getPseudoProbeSection(ProbeSec.first)) { // Switch to the .pseudoprobe section or a comdat group. - MCOS->SwitchSection(S); + MCOS->switchSection(S); // Emit probes grouped by GUID. ProbeSec.second.emit(MCOS, LastProbe); } @@ -229,8 +230,7 @@ void MCDecodedPseudoProbe::getInlineContext( // It will add the string of each node's inline site during iteration. // Note that it won't include the probe's belonging function(leaf location) while (Cur->hasInlineSite()) { - StringRef FuncName = - getProbeFNameForGUID(GUID2FuncMAP, std::get<0>(Cur->ISite)); + StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Cur->Parent->Guid); ContextStack.emplace_back( MCPseduoProbeFrameLocation(FuncName, std::get<1>(Cur->ISite))); Cur = static_cast(Cur->Parent); @@ -357,8 +357,9 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start, return true; } -bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start, - std::size_t Size) { +bool MCPseudoProbeDecoder::buildAddress2ProbeMap( + MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr, + std::unordered_set &GuildFilter) { // The pseudo_probe section encodes an inline forest and each tree has a // format like: // FUNCTION BODY (one for each uninlined function present in the text @@ -389,101 +390,110 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start, // FUNCTION BODY // A FUNCTION BODY entry describing the inlined function. - Data = Start; - End = Data + Size; - - MCDecodedPseudoProbeInlineTree *Root = &DummyInlineRoot; - MCDecodedPseudoProbeInlineTree *Cur = &DummyInlineRoot; - uint64_t LastAddr = 0; uint32_t Index = 0; - // A DFS-based decoding - while (Data < End) { - if (Root == Cur) { - // Use a sequential id for top level inliner. - Index = Root->getChildren().size(); - } else { - // Read inline site for inlinees - auto ErrorOrIndex = readUnsignedNumber(); - if (!ErrorOrIndex) - return false; - Index = std::move(*ErrorOrIndex); - } - // Switch/add to a new tree node(inlinee) - Cur = Cur->getOrAddNode(std::make_tuple(Cur->Guid, Index)); - // Read guid - auto ErrorOrCurGuid = readUnencodedNumber(); - if (!ErrorOrCurGuid) + if (Cur == &DummyInlineRoot) { + // Use a sequential id for top level inliner. + Index = Cur->getChildren().size(); + } else { + // Read inline site for inlinees + auto ErrorOrIndex = readUnsignedNumber(); + if (!ErrorOrIndex) return false; - Cur->Guid = std::move(*ErrorOrCurGuid); - // Read number of probes in the current node. - auto ErrorOrNodeCount = readUnsignedNumber(); - if (!ErrorOrNodeCount) + Index = std::move(*ErrorOrIndex); + } + + // Read guid + auto ErrorOrCurGuid = readUnencodedNumber(); + if (!ErrorOrCurGuid) + return false; + uint64_t Guid = std::move(*ErrorOrCurGuid); + + // Decide if top-level node should be disgarded. + if (Cur == &DummyInlineRoot && !GuildFilter.empty() && + !GuildFilter.count(Guid)) + Cur = nullptr; + + // If the incoming node is null, all its children nodes should be disgarded. + if (Cur) { + // Switch/add to a new tree node(inlinee) + Cur = Cur->getOrAddNode(std::make_tuple(Guid, Index)); + Cur->Guid = Guid; + } + + // Read number of probes in the current node. + auto ErrorOrNodeCount = readUnsignedNumber(); + if (!ErrorOrNodeCount) + return false; + uint32_t NodeCount = std::move(*ErrorOrNodeCount); + // Read number of direct inlinees + auto ErrorOrCurChildrenToProcess = readUnsignedNumber(); + if (!ErrorOrCurChildrenToProcess) + return false; + // Read all probes in this node + for (std::size_t I = 0; I < NodeCount; I++) { + // Read index + auto ErrorOrIndex = readUnsignedNumber(); + if (!ErrorOrIndex) return false; - uint32_t NodeCount = std::move(*ErrorOrNodeCount); - // Read number of direct inlinees - auto ErrorOrCurChildrenToProcess = readUnsignedNumber(); - if (!ErrorOrCurChildrenToProcess) + uint32_t Index = std::move(*ErrorOrIndex); + // Read type | flag. + auto ErrorOrValue = readUnencodedNumber(); + if (!ErrorOrValue) return false; - Cur->ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess); - // Read all probes in this node - for (std::size_t I = 0; I < NodeCount; I++) { - // Read index - auto ErrorOrIndex = readUnsignedNumber(); - if (!ErrorOrIndex) + uint8_t Value = std::move(*ErrorOrValue); + uint8_t Kind = Value & 0xf; + uint8_t Attr = (Value & 0x70) >> 4; + // Read address + uint64_t Addr = 0; + if (Value & 0x80) { + auto ErrorOrOffset = readSignedNumber(); + if (!ErrorOrOffset) return false; - uint32_t Index = std::move(*ErrorOrIndex); - // Read type | flag. - auto ErrorOrValue = readUnencodedNumber(); - if (!ErrorOrValue) + int64_t Offset = std::move(*ErrorOrOffset); + Addr = LastAddr + Offset; + } else { + auto ErrorOrAddr = readUnencodedNumber(); + if (!ErrorOrAddr) return false; - uint8_t Value = std::move(*ErrorOrValue); - uint8_t Kind = Value & 0xf; - uint8_t Attr = (Value & 0x70) >> 4; - // Read address - uint64_t Addr = 0; - if (Value & 0x80) { - auto ErrorOrOffset = readSignedNumber(); - if (!ErrorOrOffset) - return false; - int64_t Offset = std::move(*ErrorOrOffset); - Addr = LastAddr + Offset; - } else { - auto ErrorOrAddr = readUnencodedNumber(); - if (!ErrorOrAddr) - return false; - Addr = std::move(*ErrorOrAddr); - } + Addr = std::move(*ErrorOrAddr); + } + + if (Cur) { // Populate Address2ProbesMap auto &Probes = Address2ProbesMap[Addr]; Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr, Cur); Cur->addProbes(&Probes.back()); - LastAddr = Addr; } + LastAddr = Addr; + } - // Look for the parent for the next node by subtracting the current - // node count from tree counts along the parent chain. The first node - // in the chain that has a non-zero tree count is the target. - while (Cur != Root) { - if (Cur->ChildrenToProcess == 0) { - Cur = static_cast(Cur->Parent); - if (Cur != Root) { - assert(Cur->ChildrenToProcess > 0 && - "Should have some unprocessed nodes"); - Cur->ChildrenToProcess -= 1; - } - } else { - break; - } - } + uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess); + for (uint32_t I = 0; I < ChildrenToProcess; I++) { + buildAddress2ProbeMap(Cur, LastAddr, GuildFilter); } + return true; +} + +bool MCPseudoProbeDecoder::buildAddress2ProbeMap( + const uint8_t *Start, std::size_t Size, + std::unordered_set &GuildFilter) { + Data = Start; + End = Data + Size; + uint64_t LastAddr = 0; + while (Data < End) + buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuildFilter); assert(Data == End && "Have unprocessed data in pseudo_probe section"); - assert(Cur == Root && - " Cur should point to root when the forest is fully built up"); return true; } +bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start, + std::size_t Size) { + std::unordered_set GuildFilter; + return buildAddress2ProbeMap(Start, Size, GuildFilter); +} + void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) { OS << "Pseudo Probe Desc:\n"; // Make the output deterministic @@ -563,5 +573,5 @@ const MCPseudoProbeFuncDesc *MCPseudoProbeDecoder::getInlinerDescForProbe( MCDecodedPseudoProbeInlineTree *InlinerNode = Probe->getInlineTreeNode(); if (!InlinerNode->hasInlineSite()) return nullptr; - return getFuncDescForGUID(std::get<0>(InlinerNode->ISite)); + return getFuncDescForGUID(InlinerNode->Parent->Guid); } diff --git a/llvm/lib/MC/MCRegisterInfo.cpp b/llvm/lib/MC/MCRegisterInfo.cpp index d491c0eb7e06..d6c4fe10fc98 100644 --- a/llvm/lib/MC/MCRegisterInfo.cpp +++ b/llvm/lib/MC/MCRegisterInfo.cpp @@ -122,3 +122,14 @@ int MCRegisterInfo::getCodeViewRegNum(MCRegister RegNum) const { : Twine(RegNum))); return I->second; } + +bool MCRegisterInfo::regsOverlap(MCRegister RegA, MCRegister RegB) const { + // Regunits are numerically ordered. Find a common unit. + MCRegUnitIterator RUA(RegA, this); + MCRegUnitIterator RUB(RegB, this); + do { + if (*RUA == *RUB) + return true; + } while (*RUA < *RUB ? (++RUA).isValid() : (++RUB).isValid()); + return false; +} diff --git a/llvm/lib/MC/MCSPIRVStreamer.cpp b/llvm/lib/MC/MCSPIRVStreamer.cpp new file mode 100644 index 000000000000..863db7f36f29 --- /dev/null +++ b/llvm/lib/MC/MCSPIRVStreamer.cpp @@ -0,0 +1,45 @@ +//===- lib/MC/MCSPIRVStreamer.cpp - SPIR-V Object Output ------*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file assembles .s files and emits SPIR-V .o object files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSPIRVStreamer.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/TargetRegistry.h" + +using namespace llvm; + +void MCSPIRVStreamer::emitInstToData(const MCInst &Inst, + const MCSubtargetInfo &STI) { + MCAssembler &Assembler = getAssembler(); + SmallVector Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI); + + // Append the encoded instruction to the current data fragment (or create a + // new such fragment if the current fragment is not a data fragment). + MCDataFragment *DF = getOrCreateDataFragment(); + + DF->setHasInstructions(STI); + DF->getContents().append(Code.begin(), Code.end()); +} + +MCStreamer *llvm::createSPIRVStreamer(MCContext &Context, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&CE, + bool RelaxAll) { + MCSPIRVStreamer *S = new MCSPIRVStreamer(Context, std::move(MAB), + std::move(OW), std::move(CE)); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + return S; +} diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp index db08e2044113..98eb7eada064 100644 --- a/llvm/lib/MC/MCSchedule.cpp +++ b/llvm/lib/MC/MCSchedule.cpp @@ -98,7 +98,7 @@ MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI, double Temp = NumUnits * 1.0 / I->Cycles; Throughput = Throughput ? std::min(Throughput.getValue(), Temp) : Temp; } - if (Throughput.hasValue()) + if (Throughput) return 1.0 / Throughput.getValue(); // If no throughput value was calculated, assume that we can execute at the @@ -142,7 +142,7 @@ MCSchedModel::getReciprocalThroughput(unsigned SchedClass, double Temp = countPopulation(I->getUnits()) * 1.0 / I->getCycles(); Throughput = Throughput ? std::min(Throughput.getValue(), Temp) : Temp; } - if (Throughput.hasValue()) + if (Throughput) return 1.0 / Throughput.getValue(); // If there are no execution resources specified for this class, then assume diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp index 8342abacec09..7547558fe6e2 100644 --- a/llvm/lib/MC/MCSection.cpp +++ b/llvm/lib/MC/MCSection.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCSection.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Config/llvm-config.h" #include "llvm/MC/MCContext.h" @@ -15,7 +16,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include #include using namespace llvm; diff --git a/llvm/lib/MC/MCSectionCOFF.cpp b/llvm/lib/MC/MCSectionCOFF.cpp index 387bf2c884e5..f7ca0375544a 100644 --- a/llvm/lib/MC/MCSectionCOFF.cpp +++ b/llvm/lib/MC/MCSectionCOFF.cpp @@ -14,9 +14,9 @@ using namespace llvm; -// ShouldOmitSectionDirective - Decides whether a '.section' directive +// shouldOmitSectionDirective - Decides whether a '.section' directive // should be printed before the section name -bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name, +bool MCSectionCOFF::shouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const { if (COMDATSymbol) return false; @@ -34,11 +34,11 @@ void MCSectionCOFF::setSelection(int Selection) const { Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; } -void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, +void MCSectionCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, const MCExpr *Subsection) const { // standard sections don't require the '.section' - if (ShouldOmitSectionDirective(getName(), MAI)) { + if (shouldOmitSectionDirective(getName(), MAI)) { OS << '\t' << getName() << '\n'; return; } @@ -104,9 +104,7 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, OS << '\n'; } -bool MCSectionCOFF::UseCodeAlign() const { - return getKind().isText(); -} +bool MCSectionCOFF::useCodeAlign() const { return getKind().isText(); } bool MCSectionCOFF::isVirtualSection() const { return getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; diff --git a/llvm/lib/MC/MCSectionDXContainer.cpp b/llvm/lib/MC/MCSectionDXContainer.cpp new file mode 100644 index 000000000000..065b506c21ce --- /dev/null +++ b/llvm/lib/MC/MCSectionDXContainer.cpp @@ -0,0 +1,15 @@ +//===- lib/MC/MCSectionDXContainer.cpp - DXContainer Section --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSectionDXContainer.h" + +using namespace llvm; + +void MCSectionDXContainer::printSwitchToSection(const MCAsmInfo &, + const Triple &, raw_ostream &, + const MCExpr *) const {} diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp index d18876507cd7..27dc1826819b 100644 --- a/llvm/lib/MC/MCSectionELF.cpp +++ b/llvm/lib/MC/MCSectionELF.cpp @@ -19,7 +19,7 @@ using namespace llvm; // Decides whether a '.section' directive // should be printed before the section name. -bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name, +bool MCSectionELF::shouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const { if (isUnique()) return false; @@ -50,10 +50,10 @@ static void printName(raw_ostream &OS, StringRef Name) { OS << '"'; } -void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, +void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, const MCExpr *Subsection) const { - if (ShouldOmitSectionDirective(getName(), MAI)) { + if (shouldOmitSectionDirective(getName(), MAI)) { OS << '\t' << getName(); if (Subsection) { OS << '\t'; @@ -105,6 +105,11 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, if (Flags & ELF::SHF_GNU_RETAIN) OS << 'R'; + // If there are os-specific flags, print them. + if (T.isOSSolaris()) + if (Flags & ELF::SHF_SUNW_NODISCARD) + OS << 'R'; + // If there are target-specific flags, print them. Triple::ArchType Arch = T.getArch(); if (Arch == Triple::xcore) { @@ -160,6 +165,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, OS << "llvm_sympart"; else if (Type == ELF::SHT_LLVM_BB_ADDR_MAP) OS << "llvm_bb_addr_map"; + else if (Type == ELF::SHT_LLVM_BB_ADDR_MAP_V0) + OS << "llvm_bb_addr_map_v0"; else report_fatal_error("unsupported type 0x" + Twine::utohexstr(Type) + " for section " + getName()); @@ -196,7 +203,7 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, } } -bool MCSectionELF::UseCodeAlign() const { +bool MCSectionELF::useCodeAlign() const { return getFlags() & ELF::SHF_EXECINSTR; } diff --git a/llvm/lib/MC/MCSectionMachO.cpp b/llvm/lib/MC/MCSectionMachO.cpp index d914e64ca23a..1c210fb0f4c8 100644 --- a/llvm/lib/MC/MCSectionMachO.cpp +++ b/llvm/lib/MC/MCSectionMachO.cpp @@ -7,9 +7,16 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCContext.h" +#include "llvm/MC/SectionKind.h" #include "llvm/Support/raw_ostream.h" -#include + +namespace llvm { +class MCAsmInfo; +class MCExpr; +class MCSymbol; +class Triple; +} // namespace llvm + using namespace llvm; /// SectionTypeDescriptors - These are strings that describe the various section @@ -19,7 +26,7 @@ static constexpr struct { StringLiteral AssemblerName, EnumName; } SectionTypeDescriptors[MachO::LAST_KNOWN_SECTION_TYPE + 1] = { {StringLiteral("regular"), StringLiteral("S_REGULAR")}, // 0x00 - {StringLiteral(""), StringLiteral("S_ZEROFILL")}, // 0x01 + {StringLiteral("zerofill"), StringLiteral("S_ZEROFILL")}, // 0x01 {StringLiteral("cstring_literals"), StringLiteral("S_CSTRING_LITERALS")}, // 0x02 {StringLiteral("4byte_literals"), @@ -95,7 +102,7 @@ MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section, } } -void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, +void MCSectionMachO::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, const MCExpr *Subsection) const { OS << "\t.section\t" << getSegmentName() << ',' << getName(); @@ -159,7 +166,7 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, OS << '\n'; } -bool MCSectionMachO::UseCodeAlign() const { +bool MCSectionMachO::useCodeAlign() const { return hasAttribute(MachO::S_ATTR_PURE_INSTRUCTIONS); } diff --git a/llvm/lib/MC/MCSectionWasm.cpp b/llvm/lib/MC/MCSectionWasm.cpp index 459913263268..e90f401b1efa 100644 --- a/llvm/lib/MC/MCSectionWasm.cpp +++ b/llvm/lib/MC/MCSectionWasm.cpp @@ -9,7 +9,6 @@ #include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/Support/raw_ostream.h" @@ -45,7 +44,7 @@ static void printName(raw_ostream &OS, StringRef Name) { OS << '"'; } -void MCSectionWasm::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, +void MCSectionWasm::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, const MCExpr *Subsection) const { @@ -102,6 +101,6 @@ void MCSectionWasm::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, } } -bool MCSectionWasm::UseCodeAlign() const { return false; } +bool MCSectionWasm::useCodeAlign() const { return false; } bool MCSectionWasm::isVirtualSection() const { return false; } diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp index 2ff4839d3706..ee8fa04c421f 100644 --- a/llvm/lib/MC/MCSectionXCOFF.cpp +++ b/llvm/lib/MC/MCSectionXCOFF.cpp @@ -8,10 +8,12 @@ #include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +namespace llvm { +class MCExpr; +class Triple; +} // namespace llvm using namespace llvm; @@ -22,7 +24,7 @@ void MCSectionXCOFF::printCsectDirective(raw_ostream &OS) const { << '\n'; } -void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, +void MCSectionXCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, const MCExpr *Subsection) const { if (getKind().isText()) { @@ -117,7 +119,7 @@ void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, report_fatal_error("Printing for this SectionKind is unimplemented."); } -bool MCSectionXCOFF::UseCodeAlign() const { return getKind().isText(); } +bool MCSectionXCOFF::useCodeAlign() const { return getKind().isText(); } bool MCSectionXCOFF::isVirtualSection() const { // DWARF sections are always not virtual. diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index a14f0de65a9d..a229d282dabe 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" @@ -59,7 +60,7 @@ void MCTargetStreamer::changeSection(const MCSection *CurSection, MCSection *Section, const MCExpr *Subsection, raw_ostream &OS) { - Section->PrintSwitchToSection(*Streamer.getContext().getAsmInfo(), + Section->printSwitchToSection(*Streamer.getContext().getAsmInfo(), Streamer.getContext().getTargetTriple(), OS, Subsection); } @@ -96,7 +97,7 @@ MCStreamer::MCStreamer(MCContext &Ctx) SectionStack.push_back(std::pair()); } -MCStreamer::~MCStreamer() {} +MCStreamer::~MCStreamer() = default; void MCStreamer::reset() { DwarfFrameInfos.clear(); @@ -107,7 +108,7 @@ void MCStreamer::reset() { SectionStack.push_back(std::pair()); } -raw_ostream &MCStreamer::GetCommentOS() { +raw_ostream &MCStreamer::getCommentOS() { // By default, discard comments. return nulls(); } @@ -186,7 +187,7 @@ void MCStreamer::emitSymbolValue(const MCSymbol *Sym, unsigned Size, if (!IsSectionRelative) emitValueImpl(MCSymbolRefExpr::create(Sym, getContext()), Size); else - EmitCOFFSecRel32(Sym, /*Offset=*/0); + emitCOFFSecRel32(Sym, /*Offset=*/0); } void MCStreamer::emitDTPRel64Value(const MCExpr *Value) { @@ -251,6 +252,13 @@ void MCStreamer::emitCFIBKeyFrame() { CurFrame->IsBKeyFrame = true; } +void MCStreamer::emitCFIMTETaggedFrame() { + MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; + CurFrame->IsMTETaggedFrame = true; +} + void MCStreamer::emitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, unsigned Isa, unsigned Discriminator, @@ -283,18 +291,18 @@ MCDwarfFrameInfo *MCStreamer::getCurrentDwarfFrameInfo() { return &DwarfFrameInfos.back(); } -bool MCStreamer::EmitCVFileDirective(unsigned FileNo, StringRef Filename, +bool MCStreamer::emitCVFileDirective(unsigned FileNo, StringRef Filename, ArrayRef Checksum, unsigned ChecksumKind) { return getContext().getCVContext().addFile(*this, FileNo, Filename, Checksum, ChecksumKind); } -bool MCStreamer::EmitCVFuncIdDirective(unsigned FunctionId) { +bool MCStreamer::emitCVFuncIdDirective(unsigned FunctionId) { return getContext().getCVContext().recordFunctionId(FunctionId); } -bool MCStreamer::EmitCVInlineSiteIdDirective(unsigned FunctionId, +bool MCStreamer::emitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc, unsigned IAFile, unsigned IALine, unsigned IACol, SMLoc Loc) { @@ -400,10 +408,10 @@ void MCStreamer::emitEHSymAttributes(const MCSymbol *Symbol, } void MCStreamer::initSections(bool NoExecStack, const MCSubtargetInfo &STI) { - SwitchSection(getContext().getObjectFileInfo()->getTextSection()); + switchSection(getContext().getObjectFileInfo()->getTextSection()); } -void MCStreamer::AssignFragment(MCSymbol *Symbol, MCFragment *Fragment) { +void MCStreamer::assignFragment(MCSymbol *Symbol, MCFragment *Fragment) { assert(Fragment); Symbol->setFragment(Fragment); @@ -698,7 +706,7 @@ WinEH::FrameInfo *MCStreamer::EnsureValidWinFrameInfo(SMLoc Loc) { return CurrentWinFrameInfo; } -void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) { +void MCStreamer::emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) { const MCAsmInfo *MAI = Context.getAsmInfo(); if (!MAI->usesWindowsCFI()) return getContext().reportError( @@ -716,7 +724,7 @@ void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) { CurrentWinFrameInfo->TextSection = getCurrentSectionOnly(); } -void MCStreamer::EmitWinCFIEndProc(SMLoc Loc) { +void MCStreamer::emitWinCFIEndProc(SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -730,11 +738,11 @@ void MCStreamer::EmitWinCFIEndProc(SMLoc Loc) { for (size_t I = CurrentProcWinFrameInfoStartIndex, E = WinFrameInfos.size(); I != E; ++I) - EmitWindowsUnwindTables(WinFrameInfos[I].get()); - SwitchSection(CurFrame->TextSection); + emitWindowsUnwindTables(WinFrameInfos[I].get()); + switchSection(CurFrame->TextSection); } -void MCStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) { +void MCStreamer::emitWinCFIFuncletOrFuncEnd(SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -745,7 +753,7 @@ void MCStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) { CurFrame->FuncletOrFuncEnd = Label; } -void MCStreamer::EmitWinCFIStartChained(SMLoc Loc) { +void MCStreamer::emitWinCFIStartChained(SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -758,7 +766,7 @@ void MCStreamer::EmitWinCFIStartChained(SMLoc Loc) { CurrentWinFrameInfo->TextSection = getCurrentSectionOnly(); } -void MCStreamer::EmitWinCFIEndChained(SMLoc Loc) { +void MCStreamer::emitWinCFIEndChained(SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -772,7 +780,7 @@ void MCStreamer::EmitWinCFIEndChained(SMLoc Loc) { CurrentWinFrameInfo = const_cast(CurFrame->ChainedParent); } -void MCStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, +void MCStreamer::emitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) @@ -789,7 +797,7 @@ void MCStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, CurFrame->HandlesExceptions = true; } -void MCStreamer::EmitWinEHHandlerData(SMLoc Loc) { +void MCStreamer::emitWinEHHandlerData(SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -853,7 +861,7 @@ static unsigned encodeSEHRegNum(MCContext &Ctx, MCRegister Reg) { return Ctx.getRegisterInfo()->getSEHRegNum(Reg); } -void MCStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) { +void MCStreamer::emitWinCFIPushReg(MCRegister Register, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -865,7 +873,7 @@ void MCStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) { CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset, +void MCStreamer::emitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) @@ -887,7 +895,7 @@ void MCStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset, CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) { +void MCStreamer::emitWinCFIAllocStack(unsigned Size, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -904,7 +912,7 @@ void MCStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) { CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset, +void MCStreamer::emitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) @@ -921,7 +929,7 @@ void MCStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset, CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, +void MCStreamer::emitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) @@ -936,7 +944,7 @@ void MCStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFIPushFrame(bool Code, SMLoc Loc) { +void MCStreamer::emitWinCFIPushFrame(bool Code, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -950,7 +958,7 @@ void MCStreamer::EmitWinCFIPushFrame(bool Code, SMLoc Loc) { CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFIEndProlog(SMLoc Loc) { +void MCStreamer::emitWinCFIEndProlog(SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -960,15 +968,15 @@ void MCStreamer::EmitWinCFIEndProlog(SMLoc Loc) { CurFrame->PrologEnd = Label; } -void MCStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {} +void MCStreamer::emitCOFFSafeSEH(MCSymbol const *Symbol) {} -void MCStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) {} +void MCStreamer::emitCOFFSymbolIndex(MCSymbol const *Symbol) {} -void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {} +void MCStreamer::emitCOFFSectionIndex(MCSymbol const *Symbol) {} -void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {} +void MCStreamer::emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {} -void MCStreamer::EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {} +void MCStreamer::emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {} /// EmitRawText - If this file is backed by an assembly streamer, this dumps /// the specified string in the output .s file. This capability is @@ -987,13 +995,11 @@ void MCStreamer::emitRawText(const Twine &T) { emitRawTextImpl(T.toStringRef(Str)); } -void MCStreamer::EmitWindowsUnwindTables() { -} +void MCStreamer::emitWindowsUnwindTables() {} -void MCStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) { -} +void MCStreamer::emitWindowsUnwindTables(WinEH::FrameInfo *Frame) {} -void MCStreamer::Finish(SMLoc EndLoc) { +void MCStreamer::finish(SMLoc EndLoc) { if ((!DwarfFrameInfos.empty() && !DwarfFrameInfos.back().End) || (!WinFrameInfos.empty() && !WinFrameInfos.back()->End)) { getContext().reportError(EndLoc, "Unfinished frame!"); @@ -1145,20 +1151,20 @@ void MCStreamer::emitAbsoluteSymbolDiffAsULEB128(const MCSymbol *Hi, void MCStreamer::emitAssemblerFlag(MCAssemblerFlag Flag) {} void MCStreamer::emitThumbFunc(MCSymbol *Func) {} void MCStreamer::emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} -void MCStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) { +void MCStreamer::beginCOFFSymbolDef(const MCSymbol *Symbol) { llvm_unreachable("this directive only supported on COFF targets"); } -void MCStreamer::EndCOFFSymbolDef() { +void MCStreamer::endCOFFSymbolDef() { llvm_unreachable("this directive only supported on COFF targets"); } void MCStreamer::emitFileDirective(StringRef Filename) {} void MCStreamer::emitFileDirective(StringRef Filename, StringRef CompilerVerion, StringRef TimeStamp, StringRef Description) { } -void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { +void MCStreamer::emitCOFFSymbolStorageClass(int StorageClass) { llvm_unreachable("this directive only supported on COFF targets"); } -void MCStreamer::EmitCOFFSymbolType(int Type) { +void MCStreamer::emitCOFFSymbolType(int Type) { llvm_unreachable("this directive only supported on COFF targets"); } void MCStreamer::emitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, uint64_t Size, @@ -1180,6 +1186,10 @@ void MCStreamer::emitXCOFFRenameDirective(const MCSymbol *Name, "XCOFF targets"); } +void MCStreamer::emitXCOFFRefDirective(StringRef Name) { + llvm_unreachable("emitXCOFFRefDirective is only supported on XCOFF targets"); +} + void MCStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} void MCStreamer::emitELFSymverDirective(const MCSymbol *OriginalSym, StringRef Name, bool KeepOriginalSym) {} @@ -1212,7 +1222,7 @@ void MCStreamer::emitBundleLock(bool AlignToEnd) {} void MCStreamer::finishImpl() {} void MCStreamer::emitBundleUnlock() {} -void MCStreamer::SwitchSection(MCSection *Section, const MCExpr *Subsection) { +void MCStreamer::switchSection(MCSection *Section, const MCExpr *Subsection) { assert(Section && "Cannot switch to a null section!"); MCSectionSubPair curSection = SectionStack.back().first; SectionStack.back().second = curSection; @@ -1233,7 +1243,7 @@ MCSymbol *MCStreamer::endSection(MCSection *Section) { if (Sym->isInSection()) return Sym; - SwitchSection(Section); + switchSection(Section); emitLabel(Sym); return Sym; } @@ -1281,6 +1291,9 @@ static VersionTuple getMachoBuildVersionSupportedOS(const Triple &Target) { return VersionTuple(12); case Triple::WatchOS: return VersionTuple(5); + case Triple::DriverKit: + // DriverKit always uses the build version load command. + return VersionTuple(); default: break; } @@ -1305,6 +1318,8 @@ getMachoBuildVersionPlatformType(const Triple &Target) { case Triple::WatchOS: return Target.isSimulatorEnvironment() ? MachO::PLATFORM_WATCHOSSIMULATOR : MachO::PLATFORM_WATCHOS; + case Triple::DriverKit: + return MachO::PLATFORM_DRIVERKIT; default: break; } @@ -1334,6 +1349,9 @@ void MCStreamer::emitVersionForTarget( case Triple::WatchOS: Version = Target.getWatchOSVersion(); break; + case Triple::DriverKit: + Version = Target.getDriverKitVersion(); + break; default: llvm_unreachable("unexpected OS type"); } @@ -1353,15 +1371,14 @@ void MCStreamer::emitVersionForTarget( emitDarwinTargetVariantBuildVersion( getMachoBuildVersionPlatformType(Target), LinkedTargetVersion.getMajor(), - LinkedTargetVersion.getMinor().getValueOr(0), - LinkedTargetVersion.getSubminor().getValueOr(0), SDKVersion); + LinkedTargetVersion.getMinor().value_or(0), + LinkedTargetVersion.getSubminor().value_or(0), SDKVersion); return; } emitBuildVersion(getMachoBuildVersionPlatformType(Target), LinkedTargetVersion.getMajor(), - LinkedTargetVersion.getMinor().getValueOr(0), - LinkedTargetVersion.getSubminor().getValueOr(0), - SDKVersion); + LinkedTargetVersion.getMinor().value_or(0), + LinkedTargetVersion.getSubminor().value_or(0), SDKVersion); ShouldEmitBuildVersion = true; } @@ -1372,8 +1389,8 @@ void MCStreamer::emitVersionForTarget( emitDarwinTargetVariantBuildVersion( getMachoBuildVersionPlatformType(*TVT), TVLinkedTargetVersion.getMajor(), - TVLinkedTargetVersion.getMinor().getValueOr(0), - TVLinkedTargetVersion.getSubminor().getValueOr(0), + TVLinkedTargetVersion.getMinor().value_or(0), + TVLinkedTargetVersion.getSubminor().value_or(0), DarwinTargetVariantSDKVersion); } } @@ -1383,6 +1400,6 @@ void MCStreamer::emitVersionForTarget( emitVersionMin(getMachoVersionMinLoadCommandType(Target), LinkedTargetVersion.getMajor(), - LinkedTargetVersion.getMinor().getValueOr(0), - LinkedTargetVersion.getSubminor().getValueOr(0), SDKVersion); + LinkedTargetVersion.getMinor().value_or(0), + LinkedTargetVersion.getSubminor().value_or(0), SDKVersion); } diff --git a/llvm/lib/MC/MCSymbol.cpp b/llvm/lib/MC/MCSymbol.cpp index 67cab9a92722..4017225a81c4 100644 --- a/llvm/lib/MC/MCSymbol.cpp +++ b/llvm/lib/MC/MCSymbol.cpp @@ -11,7 +11,6 @@ #include "llvm/Config/llvm-config.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFragment.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" diff --git a/llvm/lib/MC/MCSymbolELF.cpp b/llvm/lib/MC/MCSymbolELF.cpp index 1830b87fd856..820a91f57c17 100644 --- a/llvm/lib/MC/MCSymbolELF.cpp +++ b/llvm/lib/MC/MCSymbolELF.cpp @@ -8,7 +8,6 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/BinaryFormat/ELF.h" -#include "llvm/MC/MCFixupKindInfo.h" namespace llvm { diff --git a/llvm/lib/MC/MCTargetOptions.cpp b/llvm/lib/MC/MCTargetOptions.cpp index eb57917ee8fd..c2946da3ee66 100644 --- a/llvm/lib/MC/MCTargetOptions.cpp +++ b/llvm/lib/MC/MCTargetOptions.cpp @@ -13,11 +13,12 @@ using namespace llvm; MCTargetOptions::MCTargetOptions() : MCRelaxAll(false), MCNoExecStack(false), MCFatalWarnings(false), - MCNoWarn(false), MCNoDeprecatedWarn(false), - MCNoTypeCheck(false), MCSaveTempLabels(false), - MCUseDwarfDirectory(false), MCIncrementalLinkerCompatible(false), + MCNoWarn(false), MCNoDeprecatedWarn(false), MCNoTypeCheck(false), + MCSaveTempLabels(false), MCIncrementalLinkerCompatible(false), ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false), - PreserveAsmComments(true), Dwarf64(false) {} + PreserveAsmComments(true), Dwarf64(false), + EmitDwarfUnwind(EmitDwarfUnwindType::Default), + MCUseDwarfDirectory(DefaultDwarfDirectory) {} StringRef MCTargetOptions::getABIName() const { return ABIName; diff --git a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp index 762c8d43063c..a310dc894021 100644 --- a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp +++ b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp @@ -1,5 +1,4 @@ -//===-- MCTargetOptionsCommandFlags.cpp --------------------------*- C++ -//-*-===// +//===-- MCTargetOptionsCommandFlags.cpp -----------------------*- C++ //-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -39,6 +38,7 @@ MCOPT_EXP(bool, RelaxAll) MCOPT(bool, IncrementalLinkerCompatible) MCOPT(int, DwarfVersion) MCOPT(bool, Dwarf64) +MCOPT(EmitDwarfUnwindType, EmitDwarfUnwind) MCOPT(bool, ShowMCInst) MCOPT(bool, FatalWarnings) MCOPT(bool, NoWarn) @@ -73,6 +73,19 @@ llvm::mc::RegisterMCTargetOptionsFlags::RegisterMCTargetOptionsFlags() { cl::desc("Generate debugging info in the 64-bit DWARF format")); MCBINDOPT(Dwarf64); + static cl::opt EmitDwarfUnwind( + "emit-dwarf-unwind", cl::desc("Whether to emit DWARF EH frame entries."), + cl::init(EmitDwarfUnwindType::Default), + cl::values(clEnumValN(EmitDwarfUnwindType::Always, "always", + "Always emit EH frame entries"), + clEnumValN(EmitDwarfUnwindType::NoCompactUnwind, + "no-compact-unwind", + "Only emit EH frame entries when compact unwind is " + "not available"), + clEnumValN(EmitDwarfUnwindType::Default, "default", + "Use target platform default"))); + MCBINDOPT(EmitDwarfUnwind); + static cl::opt ShowMCInst( "asm-show-inst", cl::desc("Emit internal instruction representation to assembly file")); @@ -116,5 +129,7 @@ MCTargetOptions llvm::mc::InitMCTargetOptionsFromFlags() { Options.MCNoWarn = getNoWarn(); Options.MCNoDeprecatedWarn = getNoDeprecatedWarn(); Options.MCNoTypeCheck = getNoTypeCheck(); + Options.EmitDwarfUnwind = getEmitDwarfUnwind(); + return Options; } diff --git a/llvm/lib/MC/MCWasmStreamer.cpp b/llvm/lib/MC/MCWasmStreamer.cpp index 90249fb7380a..ce948c7435f5 100644 --- a/llvm/lib/MC/MCWasmStreamer.cpp +++ b/llvm/lib/MC/MCWasmStreamer.cpp @@ -11,27 +11,30 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCWasmStreamer.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolWasm.h" -#include "llvm/MC/MCValue.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +namespace llvm { +class MCContext; +class MCStreamer; +class MCSubtargetInfo; +} // namespace llvm + using namespace llvm; MCWasmStreamer::~MCWasmStreamer() = default; // anchor. @@ -118,6 +121,7 @@ bool MCWasmStreamer::emitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) { case MCSA_Invalid: case MCSA_IndirectSymbol: case MCSA_Protected: + case MCSA_Exported: return false; case MCSA_Hidden: diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp index 2a93c352c68a..ffabe0fe8978 100644 --- a/llvm/lib/MC/MCWin64EH.cpp +++ b/llvm/lib/MC/MCWin64EH.cpp @@ -7,15 +7,17 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCWin64EH.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Win64EH.h" +namespace llvm { +class MCSection; +} using namespace llvm; @@ -226,14 +228,14 @@ void llvm::Win64EH::UnwindEmitter::Emit(MCStreamer &Streamer) const { // Emit the unwind info structs first. for (const auto &CFI : Streamer.getWinFrameInfos()) { MCSection *XData = Streamer.getAssociatedXDataSection(CFI->TextSection); - Streamer.SwitchSection(XData); + Streamer.switchSection(XData); ::EmitUnwindInfo(Streamer, CFI.get()); } // Now emit RUNTIME_FUNCTION entries. for (const auto &CFI : Streamer.getWinFrameInfos()) { MCSection *PData = Streamer.getAssociatedPDataSection(CFI->TextSection); - Streamer.SwitchSection(PData); + Streamer.switchSection(PData); EmitRuntimeFunction(Streamer, CFI.get()); } } @@ -244,13 +246,26 @@ void llvm::Win64EH::UnwindEmitter::EmitUnwindInfo(MCStreamer &Streamer, // Switch sections (the static function above is meant to be called from // here and from Emit(). MCSection *XData = Streamer.getAssociatedXDataSection(info->TextSection); - Streamer.SwitchSection(XData); + Streamer.switchSection(XData); ::EmitUnwindInfo(Streamer, info); } -static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS, - const MCSymbol *RHS) { +static const MCExpr *GetSubDivExpr(MCStreamer &Streamer, const MCSymbol *LHS, + const MCSymbol *RHS, int Div) { + MCContext &Context = Streamer.getContext(); + const MCExpr *Expr = + MCBinaryExpr::createSub(MCSymbolRefExpr::create(LHS, Context), + MCSymbolRefExpr::create(RHS, Context), Context); + if (Div != 1) + Expr = MCBinaryExpr::createDiv(Expr, MCConstantExpr::create(Div, Context), + Context); + return Expr; +} + +static Optional GetOptionalAbsDifference(MCStreamer &Streamer, + const MCSymbol *LHS, + const MCSymbol *RHS) { MCContext &Context = Streamer.getContext(); const MCExpr *Diff = MCBinaryExpr::createSub(MCSymbolRefExpr::create(LHS, Context), @@ -261,10 +276,18 @@ static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS, // unusual constructs, like an inline asm with an alignment directive. int64_t value; if (!Diff->evaluateAsAbsolute(value, OS->getAssembler())) - report_fatal_error("Failed to evaluate function length in SEH unwind info"); + return None; return value; } +static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS, + const MCSymbol *RHS) { + Optional MaybeDiff = GetOptionalAbsDifference(Streamer, LHS, RHS); + if (!MaybeDiff) + report_fatal_error("Failed to evaluate function length in SEH unwind info"); + return *MaybeDiff; +} + static uint32_t ARM64CountOfUnwindCodes(ArrayRef Insns) { uint32_t Count = 0; for (const auto &I : Insns) { @@ -350,7 +373,7 @@ static uint32_t ARM64CountOfUnwindCodes(ArrayRef Insns) { // Unwind opcode encodings and restrictions are documented at // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling -static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin, +static void ARM64EmitUnwindCode(MCStreamer &streamer, const WinEH::Instruction &inst) { uint8_t b, reg; switch (static_cast(inst.Operation)) { @@ -513,7 +536,7 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin, } // Returns the epilog symbol of an epilog with the exact same unwind code -// sequence, if it exists. Otherwise, returns nulltpr. +// sequence, if it exists. Otherwise, returns nullptr. // EpilogInstrs - Unwind codes for the current epilog. // Epilogs - Epilogs that potentialy match the current epilog. static MCSymbol* @@ -524,18 +547,16 @@ FindMatchingEpilog(const std::vector& EpilogInstrs, auto InstrsIter = info->EpilogMap.find(EpilogStart); assert(InstrsIter != info->EpilogMap.end() && "Epilog not found in EpilogMap"); - const auto &Instrs = InstrsIter->second; + const auto &Instrs = InstrsIter->second.Instructions; if (Instrs.size() != EpilogInstrs.size()) continue; bool Match = true; for (unsigned i = 0; i < Instrs.size(); ++i) - if (Instrs[i].Operation != EpilogInstrs[i].Operation || - Instrs[i].Offset != EpilogInstrs[i].Offset || - Instrs[i].Register != EpilogInstrs[i].Register) { - Match = false; - break; + if (Instrs[i] != EpilogInstrs[i]) { + Match = false; + break; } if (Match) @@ -544,8 +565,8 @@ FindMatchingEpilog(const std::vector& EpilogInstrs, return nullptr; } -static void simplifyOpcodes(std::vector &Instructions, - bool Reverse) { +static void simplifyARM64Opcodes(std::vector &Instructions, + bool Reverse) { unsigned PrevOffset = -1; unsigned PrevRegister = -1; @@ -606,26 +627,37 @@ static void simplifyOpcodes(std::vector &Instructions, } } -static int checkPackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info, - int PrologCodeBytes) { - // Can only pack if there's one single epilog - if (info->EpilogMap.size() != 1) - return -1; - - const std::vector &Epilog = - info->EpilogMap.begin()->second; - - // Can pack if the epilog is a subset of the prolog but not vice versa - if (Epilog.size() > info->Instructions.size()) +// Check if an epilog exists as a subset of the end of a prolog (backwards). +static int +getARM64OffsetInProlog(const std::vector &Prolog, + const std::vector &Epilog) { + // Can't find an epilog as a subset if it is longer than the prolog. + if (Epilog.size() > Prolog.size()) return -1; // Check that the epilog actually is a perfect match for the end (backwrds) // of the prolog. for (int I = Epilog.size() - 1; I >= 0; I--) { - if (info->Instructions[I] != Epilog[Epilog.size() - 1 - I]) + if (Prolog[I] != Epilog[Epilog.size() - 1 - I]) return -1; } + // If the epilog was a subset of the prolog, find its offset. + if (Epilog.size() == Prolog.size()) + return 0; + return ARM64CountOfUnwindCodes(ArrayRef( + &Prolog[Epilog.size()], Prolog.size() - Epilog.size())); +} + +static int checkARM64PackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info, + int PrologCodeBytes) { + // Can only pack if there's one single epilog + if (info->EpilogMap.size() != 1) + return -1; + + const std::vector &Epilog = + info->EpilogMap.begin()->second.Instructions; + // Check that the epilog actually is at the very end of the function, // otherwise it can't be packed. uint32_t DistanceFromEnd = (uint32_t)GetAbsDifference( @@ -633,24 +665,33 @@ static int checkPackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info, if (DistanceFromEnd / 4 != Epilog.size()) return -1; - int Offset = Epilog.size() == info->Instructions.size() - ? 0 - : ARM64CountOfUnwindCodes(ArrayRef( - &info->Instructions[Epilog.size()], - info->Instructions.size() - Epilog.size())); + int RetVal = -1; + // Even if we don't end up sharing opcodes with the prolog, we can still + // write the offset as a packed offset, if the single epilog is located at + // the end of the function and the offset (pointing after the prolog) fits + // as a packed offset. + if (PrologCodeBytes <= 31 && + PrologCodeBytes + ARM64CountOfUnwindCodes(Epilog) <= 124) + RetVal = PrologCodeBytes; + + int Offset = getARM64OffsetInProlog(info->Instructions, Epilog); + if (Offset < 0) + return RetVal; // Check that the offset and prolog size fits in the first word; it's // unclear whether the epilog count in the extension word can be taken // as packed epilog offset. if (Offset > 31 || PrologCodeBytes > 124) - return -1; + return RetVal; + // As we choose to express the epilog as part of the prolog, remove the + // epilog from the map, so we don't try to emit its opcodes. info->EpilogMap.clear(); return Offset; } -static bool tryPackedUnwind(WinEH::FrameInfo *info, uint32_t FuncLength, - int PackedEpilogOffset) { +static bool tryARM64PackedUnwind(WinEH::FrameInfo *info, uint32_t FuncLength, + int PackedEpilogOffset) { if (PackedEpilogOffset == 0) { // Fully symmetric prolog and epilog, should be ok for packed format. // For CR=3, the corresponding synthesized epilog actually lacks the @@ -842,6 +883,16 @@ static bool tryPackedUnwind(WinEH::FrameInfo *info, uint32_t FuncLength, if (Nops != 0 && Nops != 4) return false; int H = Nops == 4; + // There's an inconsistency regarding packed unwind info with homed + // parameters; according to the documentation, the epilog shouldn't have + // the same corresponding nops (and thus, to set the H bit, we should + // require an epilog which isn't exactly symmetrical - we shouldn't accept + // an exact mirrored epilog for those cases), but in practice, + // RtlVirtualUnwind behaves as if it does expect the epilogue to contain + // the same nops. See https://github.com/llvm/llvm-project/issues/54879. + // To play it safe, don't produce packed unwind info with homed parameters. + if (H) + return false; int IntSZ = 8 * RegI; if (StandaloneLR) IntSZ += 8; @@ -901,9 +952,9 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info, return; } - simplifyOpcodes(info->Instructions, false); + simplifyARM64Opcodes(info->Instructions, false); for (auto &I : info->EpilogMap) - simplifyOpcodes(I.second, true); + simplifyARM64Opcodes(I.second.Instructions, true); MCContext &context = streamer.getContext(); MCSymbol *Label = context.createTempSymbol(); @@ -951,10 +1002,12 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info, uint32_t PrologCodeBytes = ARM64CountOfUnwindCodes(info->Instructions); uint32_t TotalCodeBytes = PrologCodeBytes; - int PackedEpilogOffset = checkPackedEpilog(streamer, info, PrologCodeBytes); + int PackedEpilogOffset = + checkARM64PackedEpilog(streamer, info, PrologCodeBytes); - if (PackedEpilogOffset >= 0 && !info->HandlesExceptions && - FuncLength <= 0x7ff && TryPacked) { + if (PackedEpilogOffset >= 0 && + uint32_t(PackedEpilogOffset) < PrologCodeBytes && + !info->HandlesExceptions && FuncLength <= 0x7ff && TryPacked) { // Matching prolog/epilog and no exception handlers; check if the // prolog matches the patterns that can be described by the packed // format. @@ -963,7 +1016,7 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info, // unwind info there. Keep using that as indicator that this unwind // info has been generated already. - if (tryPackedUnwind(info, FuncLength, PackedEpilogOffset)) + if (tryARM64PackedUnwind(info, FuncLength, PackedEpilogOffset)) return; } @@ -974,11 +1027,12 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info, for (auto &I : info->EpilogMap) { MCSymbol *EpilogStart = I.first; - auto &EpilogInstrs = I.second; + auto &EpilogInstrs = I.second.Instructions; uint32_t CodeBytes = ARM64CountOfUnwindCodes(EpilogInstrs); MCSymbol* MatchingEpilog = FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info); + int PrologOffset; if (MatchingEpilog) { assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() && "Duplicate epilog not found"); @@ -986,6 +1040,12 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info, // Clear the unwind codes in the EpilogMap, so that they don't get output // in the logic below. EpilogInstrs.clear(); + } else if ((PrologOffset = getARM64OffsetInProlog(info->Instructions, + EpilogInstrs)) >= 0) { + EpilogInfo[EpilogStart] = PrologOffset; + // Clear the unwind codes in the EpilogMap, so that they don't get output + // in the logic below. + EpilogInstrs.clear(); } else { EpilogInfo[EpilogStart] = TotalCodeBytes; TotalCodeBytes += CodeBytes; @@ -1016,8 +1076,6 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info, // Extended Code Words, Extended Epilog Count if (ExtensionWord) { // FIXME: We should be able to split unwind info into multiple sections. - // FIXME: We should share epilog codes across epilogs, where possible, - // which would make this issue show up less frequently. if (CodeWords > 0xFF || EpilogCount > 0xFFFF) report_fatal_error("SEH unwind data splitting not yet implemented"); uint32_t row2 = 0x0; @@ -1026,17 +1084,19 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info, streamer.emitInt32(row2); } - // Epilog Start Index, Epilog Start Offset - for (auto &I : EpilogInfo) { - MCSymbol *EpilogStart = I.first; - uint32_t EpilogIndex = I.second; - uint32_t EpilogOffset = - (uint32_t)GetAbsDifference(streamer, EpilogStart, info->Begin); - if (EpilogOffset) - EpilogOffset /= 4; - uint32_t row3 = EpilogOffset; - row3 |= (EpilogIndex & 0x3FF) << 22; - streamer.emitInt32(row3); + if (PackedEpilogOffset < 0) { + // Epilog Start Index, Epilog Start Offset + for (auto &I : EpilogInfo) { + MCSymbol *EpilogStart = I.first; + uint32_t EpilogIndex = I.second; + uint32_t EpilogOffset = + (uint32_t)GetAbsDifference(streamer, EpilogStart, info->Begin); + if (EpilogOffset) + EpilogOffset /= 4; + uint32_t row3 = EpilogOffset; + row3 |= (EpilogIndex & 0x3FF) << 22; + streamer.emitInt32(row3); + } } // Emit prolog unwind instructions (in reverse order). @@ -1044,14 +1104,14 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info, for (uint8_t c = 0; c < numInst; ++c) { WinEH::Instruction inst = info->Instructions.back(); info->Instructions.pop_back(); - ARM64EmitUnwindCode(streamer, info->Begin, inst); + ARM64EmitUnwindCode(streamer, inst); } // Emit epilog unwind instructions for (auto &I : info->EpilogMap) { - auto &EpilogInstrs = I.second; + auto &EpilogInstrs = I.second.Instructions; for (const WinEH::Instruction &inst : EpilogInstrs) - ARM64EmitUnwindCode(streamer, info->Begin, inst); + ARM64EmitUnwindCode(streamer, inst); } int32_t BytesMod = CodeWords * 4 - TotalCodeBytes; @@ -1066,8 +1126,1087 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info, 4); } -static void ARM64EmitRuntimeFunction(MCStreamer &streamer, - const WinEH::FrameInfo *info) { +static uint32_t ARMCountOfUnwindCodes(ArrayRef Insns) { + uint32_t Count = 0; + for (const auto &I : Insns) { + switch (static_cast(I.Operation)) { + default: + llvm_unreachable("Unsupported ARM unwind code"); + case Win64EH::UOP_AllocSmall: + Count += 1; + break; + case Win64EH::UOP_AllocLarge: + Count += 3; + break; + case Win64EH::UOP_AllocHuge: + Count += 4; + break; + case Win64EH::UOP_WideAllocMedium: + Count += 2; + break; + case Win64EH::UOP_WideAllocLarge: + Count += 3; + break; + case Win64EH::UOP_WideAllocHuge: + Count += 4; + break; + case Win64EH::UOP_WideSaveRegMask: + Count += 2; + break; + case Win64EH::UOP_SaveSP: + Count += 1; + break; + case Win64EH::UOP_SaveRegsR4R7LR: + Count += 1; + break; + case Win64EH::UOP_WideSaveRegsR4R11LR: + Count += 1; + break; + case Win64EH::UOP_SaveFRegD8D15: + Count += 1; + break; + case Win64EH::UOP_SaveRegMask: + Count += 2; + break; + case Win64EH::UOP_SaveLR: + Count += 2; + break; + case Win64EH::UOP_SaveFRegD0D15: + Count += 2; + break; + case Win64EH::UOP_SaveFRegD16D31: + Count += 2; + break; + case Win64EH::UOP_Nop: + case Win64EH::UOP_WideNop: + case Win64EH::UOP_End: + case Win64EH::UOP_EndNop: + case Win64EH::UOP_WideEndNop: + Count += 1; + break; + case Win64EH::UOP_Custom: { + int J; + for (J = 3; J > 0; J--) + if (I.Offset & (0xffu << (8 * J))) + break; + Count += J + 1; + break; + } + } + } + return Count; +} + +static uint32_t ARMCountOfInstructionBytes(ArrayRef Insns, + bool *HasCustom = nullptr) { + uint32_t Count = 0; + for (const auto &I : Insns) { + switch (static_cast(I.Operation)) { + default: + llvm_unreachable("Unsupported ARM unwind code"); + case Win64EH::UOP_AllocSmall: + case Win64EH::UOP_AllocLarge: + case Win64EH::UOP_AllocHuge: + Count += 2; + break; + case Win64EH::UOP_WideAllocMedium: + case Win64EH::UOP_WideAllocLarge: + case Win64EH::UOP_WideAllocHuge: + Count += 4; + break; + case Win64EH::UOP_WideSaveRegMask: + case Win64EH::UOP_WideSaveRegsR4R11LR: + Count += 4; + break; + case Win64EH::UOP_SaveSP: + Count += 2; + break; + case Win64EH::UOP_SaveRegMask: + case Win64EH::UOP_SaveRegsR4R7LR: + Count += 2; + break; + case Win64EH::UOP_SaveFRegD8D15: + case Win64EH::UOP_SaveFRegD0D15: + case Win64EH::UOP_SaveFRegD16D31: + Count += 4; + break; + case Win64EH::UOP_SaveLR: + Count += 4; + break; + case Win64EH::UOP_Nop: + case Win64EH::UOP_EndNop: + Count += 2; + break; + case Win64EH::UOP_WideNop: + case Win64EH::UOP_WideEndNop: + Count += 4; + break; + case Win64EH::UOP_End: + // This doesn't map to any instruction + break; + case Win64EH::UOP_Custom: + // We can't reason about what instructions this maps to; return a + // phony number to make sure we don't accidentally do epilog packing. + Count += 1000; + if (HasCustom) + *HasCustom = true; + break; + } + } + return Count; +} + +static void checkARMInstructions(MCStreamer &Streamer, + ArrayRef Insns, + const MCSymbol *Begin, const MCSymbol *End, + StringRef Name, StringRef Type) { + if (!End) + return; + Optional MaybeDistance = + GetOptionalAbsDifference(Streamer, End, Begin); + if (!MaybeDistance) + return; + uint32_t Distance = (uint32_t)*MaybeDistance; + bool HasCustom = false; + uint32_t InstructionBytes = ARMCountOfInstructionBytes(Insns, &HasCustom); + if (HasCustom) + return; + if (Distance != InstructionBytes) { + Streamer.getContext().reportError( + SMLoc(), "Incorrect size for " + Name + " " + Type + ": " + + Twine(Distance) + + " bytes of instructions in range, but .seh directives " + "corresponding to " + + Twine(InstructionBytes) + " bytes\n"); + } +} + +static bool isARMTerminator(const WinEH::Instruction &inst) { + switch (static_cast(inst.Operation)) { + case Win64EH::UOP_End: + case Win64EH::UOP_EndNop: + case Win64EH::UOP_WideEndNop: + return true; + default: + return false; + } +} + +// Unwind opcode encodings and restrictions are documented at +// https://docs.microsoft.com/en-us/cpp/build/arm-exception-handling +static void ARMEmitUnwindCode(MCStreamer &streamer, + const WinEH::Instruction &inst) { + uint32_t w, lr; + int i; + switch (static_cast(inst.Operation)) { + default: + llvm_unreachable("Unsupported ARM unwind code"); + case Win64EH::UOP_AllocSmall: + assert((inst.Offset & 3) == 0); + assert(inst.Offset / 4 <= 0x7f); + streamer.emitInt8(inst.Offset / 4); + break; + case Win64EH::UOP_WideSaveRegMask: + assert((inst.Register & ~0x5fff) == 0); + lr = (inst.Register >> 14) & 1; + w = 0x8000 | (inst.Register & 0x1fff) | (lr << 13); + streamer.emitInt8((w >> 8) & 0xff); + streamer.emitInt8((w >> 0) & 0xff); + break; + case Win64EH::UOP_SaveSP: + assert(inst.Register <= 0x0f); + streamer.emitInt8(0xc0 | inst.Register); + break; + case Win64EH::UOP_SaveRegsR4R7LR: + assert(inst.Register >= 4 && inst.Register <= 7); + assert(inst.Offset <= 1); + streamer.emitInt8(0xd0 | (inst.Register - 4) | (inst.Offset << 2)); + break; + case Win64EH::UOP_WideSaveRegsR4R11LR: + assert(inst.Register >= 8 && inst.Register <= 11); + assert(inst.Offset <= 1); + streamer.emitInt8(0xd8 | (inst.Register - 8) | (inst.Offset << 2)); + break; + case Win64EH::UOP_SaveFRegD8D15: + assert(inst.Register >= 8 && inst.Register <= 15); + streamer.emitInt8(0xe0 | (inst.Register - 8)); + break; + case Win64EH::UOP_WideAllocMedium: + assert((inst.Offset & 3) == 0); + assert(inst.Offset / 4 <= 0x3ff); + w = 0xe800 | (inst.Offset / 4); + streamer.emitInt8((w >> 8) & 0xff); + streamer.emitInt8((w >> 0) & 0xff); + break; + case Win64EH::UOP_SaveRegMask: + assert((inst.Register & ~0x40ff) == 0); + lr = (inst.Register >> 14) & 1; + w = 0xec00 | (inst.Register & 0x0ff) | (lr << 8); + streamer.emitInt8((w >> 8) & 0xff); + streamer.emitInt8((w >> 0) & 0xff); + break; + case Win64EH::UOP_SaveLR: + assert((inst.Offset & 3) == 0); + assert(inst.Offset / 4 <= 0x0f); + streamer.emitInt8(0xef); + streamer.emitInt8(inst.Offset / 4); + break; + case Win64EH::UOP_SaveFRegD0D15: + assert(inst.Register <= 15); + assert(inst.Offset <= 15); + assert(inst.Register <= inst.Offset); + streamer.emitInt8(0xf5); + streamer.emitInt8((inst.Register << 4) | inst.Offset); + break; + case Win64EH::UOP_SaveFRegD16D31: + assert(inst.Register >= 16 && inst.Register <= 31); + assert(inst.Offset >= 16 && inst.Offset <= 31); + assert(inst.Register <= inst.Offset); + streamer.emitInt8(0xf6); + streamer.emitInt8(((inst.Register - 16) << 4) | (inst.Offset - 16)); + break; + case Win64EH::UOP_AllocLarge: + assert((inst.Offset & 3) == 0); + assert(inst.Offset / 4 <= 0xffff); + w = inst.Offset / 4; + streamer.emitInt8(0xf7); + streamer.emitInt8((w >> 8) & 0xff); + streamer.emitInt8((w >> 0) & 0xff); + break; + case Win64EH::UOP_AllocHuge: + assert((inst.Offset & 3) == 0); + assert(inst.Offset / 4 <= 0xffffff); + w = inst.Offset / 4; + streamer.emitInt8(0xf8); + streamer.emitInt8((w >> 16) & 0xff); + streamer.emitInt8((w >> 8) & 0xff); + streamer.emitInt8((w >> 0) & 0xff); + break; + case Win64EH::UOP_WideAllocLarge: + assert((inst.Offset & 3) == 0); + assert(inst.Offset / 4 <= 0xffff); + w = inst.Offset / 4; + streamer.emitInt8(0xf9); + streamer.emitInt8((w >> 8) & 0xff); + streamer.emitInt8((w >> 0) & 0xff); + break; + case Win64EH::UOP_WideAllocHuge: + assert((inst.Offset & 3) == 0); + assert(inst.Offset / 4 <= 0xffffff); + w = inst.Offset / 4; + streamer.emitInt8(0xfa); + streamer.emitInt8((w >> 16) & 0xff); + streamer.emitInt8((w >> 8) & 0xff); + streamer.emitInt8((w >> 0) & 0xff); + break; + case Win64EH::UOP_Nop: + streamer.emitInt8(0xfb); + break; + case Win64EH::UOP_WideNop: + streamer.emitInt8(0xfc); + break; + case Win64EH::UOP_EndNop: + streamer.emitInt8(0xfd); + break; + case Win64EH::UOP_WideEndNop: + streamer.emitInt8(0xfe); + break; + case Win64EH::UOP_End: + streamer.emitInt8(0xff); + break; + case Win64EH::UOP_Custom: + for (i = 3; i > 0; i--) + if (inst.Offset & (0xffu << (8 * i))) + break; + for (; i >= 0; i--) + streamer.emitInt8((inst.Offset >> (8 * i)) & 0xff); + break; + } +} + +// Check if an epilog exists as a subset of the end of a prolog (backwards). +// An epilog may end with one out of three different end opcodes; if this +// is the first epilog that shares opcodes with the prolog, we can tolerate +// that this opcode differs (and the caller will update the prolog to use +// the same end opcode as the epilog). If another epilog already shares +// opcodes with the prolog, the ending opcode must be a strict match. +static int getARMOffsetInProlog(const std::vector &Prolog, + const std::vector &Epilog, + bool CanTweakProlog) { + // Can't find an epilog as a subset if it is longer than the prolog. + if (Epilog.size() > Prolog.size()) + return -1; + + // Check that the epilog actually is a perfect match for the end (backwrds) + // of the prolog. + // If we can adjust the prolog afterwards, don't check that the end opcodes + // match. + int EndIdx = CanTweakProlog ? 1 : 0; + for (int I = Epilog.size() - 1; I >= EndIdx; I--) { + // TODO: Could also allow minor mismatches, e.g. "add sp, #16" vs + // "push {r0-r3}". + if (Prolog[I] != Epilog[Epilog.size() - 1 - I]) + return -1; + } + + if (CanTweakProlog) { + // Check that both prolog and epilog end with an expected end opcode. + if (Prolog.front().Operation != Win64EH::UOP_End) + return -1; + if (Epilog.back().Operation != Win64EH::UOP_End && + Epilog.back().Operation != Win64EH::UOP_EndNop && + Epilog.back().Operation != Win64EH::UOP_WideEndNop) + return -1; + } + + // If the epilog was a subset of the prolog, find its offset. + if (Epilog.size() == Prolog.size()) + return 0; + return ARMCountOfUnwindCodes(ArrayRef( + &Prolog[Epilog.size()], Prolog.size() - Epilog.size())); +} + +static int checkARMPackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info, + int PrologCodeBytes) { + // Can only pack if there's one single epilog + if (info->EpilogMap.size() != 1) + return -1; + + const WinEH::FrameInfo::Epilog &EpilogInfo = info->EpilogMap.begin()->second; + // Can only pack if the epilog is unconditional + if (EpilogInfo.Condition != 0xe) // ARMCC::AL + return -1; + + const std::vector &Epilog = EpilogInfo.Instructions; + // Make sure we have at least the trailing end opcode + if (info->Instructions.empty() || Epilog.empty()) + return -1; + + // Check that the epilog actually is at the very end of the function, + // otherwise it can't be packed. + Optional MaybeDistance = GetOptionalAbsDifference( + streamer, info->FuncletOrFuncEnd, info->EpilogMap.begin()->first); + if (!MaybeDistance) + return -1; + uint32_t DistanceFromEnd = (uint32_t)*MaybeDistance; + uint32_t InstructionBytes = ARMCountOfInstructionBytes(Epilog); + if (DistanceFromEnd != InstructionBytes) + return -1; + + int RetVal = -1; + // Even if we don't end up sharing opcodes with the prolog, we can still + // write the offset as a packed offset, if the single epilog is located at + // the end of the function and the offset (pointing after the prolog) fits + // as a packed offset. + if (PrologCodeBytes <= 31 && + PrologCodeBytes + ARMCountOfUnwindCodes(Epilog) <= 63) + RetVal = PrologCodeBytes; + + int Offset = + getARMOffsetInProlog(info->Instructions, Epilog, /*CanTweakProlog=*/true); + if (Offset < 0) + return RetVal; + + // Check that the offset and prolog size fits in the first word; it's + // unclear whether the epilog count in the extension word can be taken + // as packed epilog offset. + if (Offset > 31 || PrologCodeBytes > 63) + return RetVal; + + // Replace the regular end opcode of the prolog with the one from the + // epilog. + info->Instructions.front() = Epilog.back(); + + // As we choose to express the epilog as part of the prolog, remove the + // epilog from the map, so we don't try to emit its opcodes. + info->EpilogMap.clear(); + return Offset; +} + +static bool parseRegMask(unsigned Mask, bool &HasLR, bool &HasR11, + unsigned &Folded, int &IntRegs) { + if (Mask & (1 << 14)) { + HasLR = true; + Mask &= ~(1 << 14); + } + if (Mask & (1 << 11)) { + HasR11 = true; + Mask &= ~(1 << 11); + } + Folded = 0; + IntRegs = -1; + if (!Mask) + return true; + int First = 0; + // Shift right until we have the bits at the bottom + while ((Mask & 1) == 0) { + First++; + Mask >>= 1; + } + if ((Mask & (Mask + 1)) != 0) + return false; // Not a consecutive series of bits? Can't be packed. + // Count the bits + int N = 0; + while (Mask & (1 << N)) + N++; + if (First < 4) { + if (First + N < 4) + return false; + Folded = 4 - First; + N -= Folded; + First = 4; + } + if (First > 4) + return false; // Can't be packed + if (N >= 1) + IntRegs = N - 1; + return true; +} + +static bool tryARMPackedUnwind(MCStreamer &streamer, WinEH::FrameInfo *info, + uint32_t FuncLength) { + int Step = 0; + bool Homing = false; + bool HasR11 = false; + bool HasChain = false; + bool HasLR = false; + int IntRegs = -1; // r4 - r(4+N) + int FloatRegs = -1; // d8 - d(8+N) + unsigned PF = 0; // Number of extra pushed registers + unsigned StackAdjust = 0; + // Iterate over the prolog and check that all opcodes exactly match + // the canonical order and form. + for (const WinEH::Instruction &Inst : info->Instructions) { + switch (Inst.Operation) { + default: + llvm_unreachable("Unsupported ARM unwind code"); + case Win64EH::UOP_Custom: + case Win64EH::UOP_AllocLarge: + case Win64EH::UOP_AllocHuge: + case Win64EH::UOP_WideAllocLarge: + case Win64EH::UOP_WideAllocHuge: + case Win64EH::UOP_SaveFRegD0D15: + case Win64EH::UOP_SaveFRegD16D31: + // Can't be packed + return false; + case Win64EH::UOP_SaveSP: + // Can't be packed; we can't rely on restoring sp from r11 when + // unwinding a packed prologue. + return false; + case Win64EH::UOP_SaveLR: + // Can't be present in a packed prologue + return false; + + case Win64EH::UOP_End: + case Win64EH::UOP_EndNop: + case Win64EH::UOP_WideEndNop: + if (Step != 0) + return false; + Step = 1; + break; + + case Win64EH::UOP_SaveRegsR4R7LR: + case Win64EH::UOP_WideSaveRegsR4R11LR: + // push {r4-r11,lr} + if (Step != 1 && Step != 2) + return false; + assert(Inst.Register >= 4 && Inst.Register <= 11); // r4-rX + assert(Inst.Offset <= 1); // Lr + IntRegs = Inst.Register - 4; + if (Inst.Register == 11) { + HasR11 = true; + IntRegs--; + } + if (Inst.Offset) + HasLR = true; + Step = 3; + break; + + case Win64EH::UOP_SaveRegMask: + if (Step == 1 && Inst.Register == 0x0f) { + // push {r0-r3} + Homing = true; + Step = 2; + break; + } + LLVM_FALLTHROUGH; + case Win64EH::UOP_WideSaveRegMask: + if (Step != 1 && Step != 2) + return false; + // push {r4-r9,r11,lr} + // push {r11,lr} + // push {r1-r5} + if (!parseRegMask(Inst.Register, HasLR, HasR11, PF, IntRegs)) + return false; + Step = 3; + break; + + case Win64EH::UOP_Nop: + // mov r11, sp + if (Step != 3 || !HasR11 || IntRegs >= 0 || PF > 0) + return false; + HasChain = true; + Step = 4; + break; + case Win64EH::UOP_WideNop: + // add.w r11, sp, #xx + if (Step != 3 || !HasR11 || (IntRegs < 0 && PF == 0)) + return false; + HasChain = true; + Step = 4; + break; + + case Win64EH::UOP_SaveFRegD8D15: + if (Step != 1 && Step != 2 && Step != 3 && Step != 4) + return false; + assert(Inst.Register >= 8 && Inst.Register <= 15); + if (Inst.Register == 15) + return false; // Can't pack this case, R==7 means no IntRegs + if (IntRegs >= 0) + return false; + FloatRegs = Inst.Register - 8; + Step = 5; + break; + + case Win64EH::UOP_AllocSmall: + case Win64EH::UOP_WideAllocMedium: + if (Step != 1 && Step != 2 && Step != 3 && Step != 4 && Step != 5) + return false; + if (PF > 0) // Can't have both folded and explicit stack allocation + return false; + if (Inst.Offset / 4 >= 0x3f4) + return false; + StackAdjust = Inst.Offset / 4; + Step = 6; + break; + } + } + if (HasR11 && !HasChain) { + if (IntRegs + 4 == 10) { + // r11 stored, but not chaining; can be packed if already saving r4-r10 + // and we can fit r11 into this range. + IntRegs++; + HasR11 = false; + } else + return false; + } + if (HasChain && !HasLR) + return false; + + // Packed uneind info can't express multiple epilogues. + if (info->EpilogMap.size() > 1) + return false; + + unsigned EF = 0; + int Ret = 0; + if (info->EpilogMap.size() == 0) { + Ret = 3; // No epilogue + } else { + // As the prologue and epilogue aren't exact mirrors of each other, + // we have to check the epilogue too and see if it matches what we've + // concluded from the prologue. + const WinEH::FrameInfo::Epilog &EpilogInfo = + info->EpilogMap.begin()->second; + if (EpilogInfo.Condition != 0xe) // ARMCC::AL + return false; + const std::vector &Epilog = EpilogInfo.Instructions; + Optional MaybeDistance = GetOptionalAbsDifference( + streamer, info->FuncletOrFuncEnd, info->EpilogMap.begin()->first); + if (!MaybeDistance) + return false; + uint32_t DistanceFromEnd = (uint32_t)*MaybeDistance; + uint32_t InstructionBytes = ARMCountOfInstructionBytes(Epilog); + if (DistanceFromEnd != InstructionBytes) + return false; + + bool GotStackAdjust = false; + bool GotFloatRegs = false; + bool GotIntRegs = false; + bool GotHomingRestore = false; + bool GotLRRestore = false; + bool NeedsReturn = false; + bool GotReturn = false; + + Step = 6; + for (const WinEH::Instruction &Inst : Epilog) { + switch (Inst.Operation) { + default: + llvm_unreachable("Unsupported ARM unwind code"); + case Win64EH::UOP_Custom: + case Win64EH::UOP_AllocLarge: + case Win64EH::UOP_AllocHuge: + case Win64EH::UOP_WideAllocLarge: + case Win64EH::UOP_WideAllocHuge: + case Win64EH::UOP_SaveFRegD0D15: + case Win64EH::UOP_SaveFRegD16D31: + case Win64EH::UOP_SaveSP: + case Win64EH::UOP_Nop: + case Win64EH::UOP_WideNop: + // Can't be packed in an epilogue + return false; + + case Win64EH::UOP_AllocSmall: + case Win64EH::UOP_WideAllocMedium: + if (Inst.Offset / 4 >= 0x3f4) + return false; + if (Step == 6) { + if (Homing && FloatRegs < 0 && IntRegs < 0 && StackAdjust == 0 && + PF == 0 && Inst.Offset == 16) { + GotHomingRestore = true; + Step = 10; + } else { + if (StackAdjust > 0) { + // Got stack adjust in prologue too; must match. + if (StackAdjust != Inst.Offset / 4) + return false; + GotStackAdjust = true; + } else if (PF == Inst.Offset / 4) { + // Folded prologue, non-folded epilogue + StackAdjust = Inst.Offset / 4; + GotStackAdjust = true; + } else { + // StackAdjust == 0 in prologue, mismatch + return false; + } + Step = 7; + } + } else if (Step == 7 || Step == 8 || Step == 9) { + if (!Homing || Inst.Offset != 16) + return false; + GotHomingRestore = true; + Step = 10; + } else + return false; + break; + + case Win64EH::UOP_SaveFRegD8D15: + if (Step != 6 && Step != 7) + return false; + assert(Inst.Register >= 8 && Inst.Register <= 15); + if (FloatRegs != (int)(Inst.Register - 8)) + return false; + GotFloatRegs = true; + Step = 8; + break; + + case Win64EH::UOP_SaveRegsR4R7LR: + case Win64EH::UOP_WideSaveRegsR4R11LR: { + // push {r4-r11,lr} + if (Step != 6 && Step != 7 && Step != 8) + return false; + assert(Inst.Register >= 4 && Inst.Register <= 11); // r4-rX + assert(Inst.Offset <= 1); // Lr + if (Homing && HasLR) { + // If homing and LR is backed up, we can either restore LR here + // and return with Ret == 1 or 2, or return with SaveLR below + if (Inst.Offset) { + GotLRRestore = true; + NeedsReturn = true; + } else { + // Expecting a separate SaveLR below + } + } else { + if (HasLR != (Inst.Offset == 1)) + return false; + } + GotLRRestore = Inst.Offset == 1; + if (IntRegs < 0) // This opcode must include r4 + return false; + int Expected = IntRegs; + if (HasChain) { + // Can't express r11 here unless IntRegs describe r4-r10 + if (IntRegs != 6) + return false; + Expected++; + } + if (Expected != (int)(Inst.Register - 4)) + return false; + GotIntRegs = true; + Step = 9; + break; + } + + case Win64EH::UOP_SaveRegMask: + case Win64EH::UOP_WideSaveRegMask: { + if (Step != 6 && Step != 7 && Step != 8) + return false; + // push {r4-r9,r11,lr} + // push {r11,lr} + // push {r1-r5} + bool CurHasLR = false, CurHasR11 = false; + int Regs; + if (!parseRegMask(Inst.Register, CurHasLR, CurHasR11, EF, Regs)) + return false; + if (EF > 0) { + if (EF != PF && EF != StackAdjust) + return false; + } + if (Homing && HasLR) { + // If homing and LR is backed up, we can either restore LR here + // and return with Ret == 1 or 2, or return with SaveLR below + if (CurHasLR) { + GotLRRestore = true; + NeedsReturn = true; + } else { + // Expecting a separate SaveLR below + } + } else { + if (CurHasLR != HasLR) + return false; + GotLRRestore = CurHasLR; + } + int Expected = IntRegs; + if (HasChain) { + // If we have chaining, the mask must have included r11. + if (!CurHasR11) + return false; + } else if (Expected == 7) { + // If we don't have chaining, the mask could still include r11, + // expressed as part of IntRegs Instead. + Expected--; + if (!CurHasR11) + return false; + } else { + // Neither HasChain nor r11 included in IntRegs, must not have r11 + // here either. + if (CurHasR11) + return false; + } + if (Expected != Regs) + return false; + GotIntRegs = true; + Step = 9; + break; + } + + case Win64EH::UOP_SaveLR: + if (Step != 6 && Step != 7 && Step != 8 && Step != 9) + return false; + if (!Homing || Inst.Offset != 20 || GotLRRestore) + return false; + GotLRRestore = true; + GotHomingRestore = true; + Step = 10; + break; + + case Win64EH::UOP_EndNop: + case Win64EH::UOP_WideEndNop: + GotReturn = true; + Ret = (Inst.Operation == Win64EH::UOP_EndNop) ? 1 : 2; + LLVM_FALLTHROUGH; + case Win64EH::UOP_End: + if (Step != 6 && Step != 7 && Step != 8 && Step != 9 && Step != 10) + return false; + Step = 11; + break; + } + } + + if (Step != 11) + return false; + if (StackAdjust > 0 && !GotStackAdjust && EF == 0) + return false; + if (FloatRegs >= 0 && !GotFloatRegs) + return false; + if (IntRegs >= 0 && !GotIntRegs) + return false; + if (Homing && !GotHomingRestore) + return false; + if (HasLR && !GotLRRestore) + return false; + if (NeedsReturn && !GotReturn) + return false; + } + + assert(PF == 0 || EF == 0 || + StackAdjust == 0); // Can't have adjust in all three + if (PF > 0 || EF > 0) { + StackAdjust = PF > 0 ? (PF - 1) : (EF - 1); + assert(StackAdjust <= 3); + StackAdjust |= 0x3f0; + if (PF > 0) + StackAdjust |= 1 << 2; + if (EF > 0) + StackAdjust |= 1 << 3; + } + + assert(FuncLength <= 0x7FF && "FuncLength should have been checked earlier"); + int Flag = info->Fragment ? 0x02 : 0x01; + int H = Homing ? 1 : 0; + int L = HasLR ? 1 : 0; + int C = HasChain ? 1 : 0; + assert(IntRegs < 0 || FloatRegs < 0); + unsigned Reg, R; + if (IntRegs >= 0) { + Reg = IntRegs; + assert(Reg <= 7); + R = 0; + } else if (FloatRegs >= 0) { + Reg = FloatRegs; + assert(Reg < 7); + R = 1; + } else { + // No int or float regs stored (except possibly R11,LR) + Reg = 7; + R = 1; + } + info->PackedInfo |= Flag << 0; + info->PackedInfo |= (FuncLength & 0x7FF) << 2; + info->PackedInfo |= (Ret & 0x3) << 13; + info->PackedInfo |= H << 15; + info->PackedInfo |= Reg << 16; + info->PackedInfo |= R << 19; + info->PackedInfo |= L << 20; + info->PackedInfo |= C << 21; + assert(StackAdjust <= 0x3ff); + info->PackedInfo |= StackAdjust << 22; + return true; +} + +// Populate the .xdata section. The format of .xdata on ARM is documented at +// https://docs.microsoft.com/en-us/cpp/build/arm-exception-handling +static void ARMEmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info, + bool TryPacked = true) { + // If this UNWIND_INFO already has a symbol, it's already been emitted. + if (info->Symbol) + return; + // If there's no unwind info here (not even a terminating UOP_End), the + // unwind info is considered bogus and skipped. If this was done in + // response to an explicit .seh_handlerdata, the associated trailing + // handler data is left orphaned in the xdata section. + if (info->empty()) { + info->EmitAttempted = true; + return; + } + if (info->EmitAttempted) { + // If we tried to emit unwind info before (due to an explicit + // .seh_handlerdata directive), but skipped it (because there was no + // valid information to emit at the time), and it later got valid unwind + // opcodes, we can't emit it here, because the trailing handler data + // was already emitted elsewhere in the xdata section. + streamer.getContext().reportError( + SMLoc(), "Earlier .seh_handlerdata for " + info->Function->getName() + + " skipped due to no unwind info at the time " + "(.seh_handlerdata too early?), but the function later " + "did get unwind info that can't be emitted"); + return; + } + + MCContext &context = streamer.getContext(); + MCSymbol *Label = context.createTempSymbol(); + + streamer.emitValueToAlignment(4); + streamer.emitLabel(Label); + info->Symbol = Label; + + if (!info->PrologEnd) + streamer.getContext().reportError(SMLoc(), "Prologue in " + + info->Function->getName() + + " not correctly terminated"); + + if (info->PrologEnd && !info->Fragment) + checkARMInstructions(streamer, info->Instructions, info->Begin, + info->PrologEnd, info->Function->getName(), + "prologue"); + for (auto &I : info->EpilogMap) { + MCSymbol *EpilogStart = I.first; + auto &Epilog = I.second; + checkARMInstructions(streamer, Epilog.Instructions, EpilogStart, Epilog.End, + info->Function->getName(), "epilogue"); + if (Epilog.Instructions.empty() || + !isARMTerminator(Epilog.Instructions.back())) + streamer.getContext().reportError( + SMLoc(), "Epilogue in " + info->Function->getName() + + " not correctly terminated"); + } + + Optional RawFuncLength; + const MCExpr *FuncLengthExpr = nullptr; + if (!info->FuncletOrFuncEnd) { + report_fatal_error("FuncletOrFuncEnd not set"); + } else { + // As the size of many thumb2 instructions isn't known until later, + // we can't always rely on being able to calculate the absolute + // length of the function here. If we can't calculate it, defer it + // to a relocation. + // + // In such a case, we won't know if the function is too long so that + // the unwind info would need to be split (but this isn't implemented + // anyway). + RawFuncLength = + GetOptionalAbsDifference(streamer, info->FuncletOrFuncEnd, info->Begin); + if (!RawFuncLength) + FuncLengthExpr = + GetSubDivExpr(streamer, info->FuncletOrFuncEnd, info->Begin, 2); + } + uint32_t FuncLength = 0; + if (RawFuncLength) + FuncLength = (uint32_t)*RawFuncLength / 2; + if (FuncLength > 0x3FFFF) + report_fatal_error("SEH unwind data splitting not yet implemented"); + uint32_t PrologCodeBytes = ARMCountOfUnwindCodes(info->Instructions); + uint32_t TotalCodeBytes = PrologCodeBytes; + + if (!info->HandlesExceptions && RawFuncLength && FuncLength <= 0x7ff && + TryPacked) { + // No exception handlers; check if the prolog and epilog matches the + // patterns that can be described by the packed format. If we don't + // know the exact function length yet, we can't do this. + + // info->Symbol was already set even if we didn't actually write any + // unwind info there. Keep using that as indicator that this unwind + // info has been generated already. + + if (tryARMPackedUnwind(streamer, info, FuncLength)) + return; + } + + int PackedEpilogOffset = + checkARMPackedEpilog(streamer, info, PrologCodeBytes); + + // Process epilogs. + MapVector EpilogInfo; + // Epilogs processed so far. + std::vector AddedEpilogs; + + bool CanTweakProlog = true; + for (auto &I : info->EpilogMap) { + MCSymbol *EpilogStart = I.first; + auto &EpilogInstrs = I.second.Instructions; + uint32_t CodeBytes = ARMCountOfUnwindCodes(EpilogInstrs); + + MCSymbol *MatchingEpilog = + FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info); + int PrologOffset; + if (MatchingEpilog) { + assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() && + "Duplicate epilog not found"); + EpilogInfo[EpilogStart] = EpilogInfo.lookup(MatchingEpilog); + // Clear the unwind codes in the EpilogMap, so that they don't get output + // in the logic below. + EpilogInstrs.clear(); + } else if ((PrologOffset = getARMOffsetInProlog( + info->Instructions, EpilogInstrs, CanTweakProlog)) >= 0) { + if (CanTweakProlog) { + // Replace the regular end opcode of the prolog with the one from the + // epilog. + info->Instructions.front() = EpilogInstrs.back(); + // Later epilogs need a strict match for the end opcode. + CanTweakProlog = false; + } + EpilogInfo[EpilogStart] = PrologOffset; + // Clear the unwind codes in the EpilogMap, so that they don't get output + // in the logic below. + EpilogInstrs.clear(); + } else { + EpilogInfo[EpilogStart] = TotalCodeBytes; + TotalCodeBytes += CodeBytes; + AddedEpilogs.push_back(EpilogStart); + } + } + + // Code Words, Epilog count, F, E, X, Vers, Function Length + uint32_t row1 = 0x0; + uint32_t CodeWords = TotalCodeBytes / 4; + uint32_t CodeWordsMod = TotalCodeBytes % 4; + if (CodeWordsMod) + CodeWords++; + uint32_t EpilogCount = + PackedEpilogOffset >= 0 ? PackedEpilogOffset : info->EpilogMap.size(); + bool ExtensionWord = EpilogCount > 31 || CodeWords > 15; + if (!ExtensionWord) { + row1 |= (EpilogCount & 0x1F) << 23; + row1 |= (CodeWords & 0x0F) << 28; + } + if (info->HandlesExceptions) // X + row1 |= 1 << 20; + if (PackedEpilogOffset >= 0) // E + row1 |= 1 << 21; + if (info->Fragment) // F + row1 |= 1 << 22; + row1 |= FuncLength & 0x3FFFF; + if (RawFuncLength) + streamer.emitInt32(row1); + else + streamer.emitValue( + MCBinaryExpr::createOr(FuncLengthExpr, + MCConstantExpr::create(row1, context), context), + 4); + + // Extended Code Words, Extended Epilog Count + if (ExtensionWord) { + // FIXME: We should be able to split unwind info into multiple sections. + if (CodeWords > 0xFF || EpilogCount > 0xFFFF) + report_fatal_error("SEH unwind data splitting not yet implemented"); + uint32_t row2 = 0x0; + row2 |= (CodeWords & 0xFF) << 16; + row2 |= (EpilogCount & 0xFFFF); + streamer.emitInt32(row2); + } + + if (PackedEpilogOffset < 0) { + // Epilog Start Index, Epilog Start Offset + for (auto &I : EpilogInfo) { + MCSymbol *EpilogStart = I.first; + uint32_t EpilogIndex = I.second; + + Optional MaybeEpilogOffset = + GetOptionalAbsDifference(streamer, EpilogStart, info->Begin); + const MCExpr *OffsetExpr = nullptr; + uint32_t EpilogOffset = 0; + if (MaybeEpilogOffset) + EpilogOffset = *MaybeEpilogOffset / 2; + else + OffsetExpr = GetSubDivExpr(streamer, EpilogStart, info->Begin, 2); + + assert(info->EpilogMap.find(EpilogStart) != info->EpilogMap.end()); + unsigned Condition = info->EpilogMap[EpilogStart].Condition; + assert(Condition <= 0xf); + + uint32_t row3 = EpilogOffset; + row3 |= Condition << 20; + row3 |= (EpilogIndex & 0x3FF) << 24; + if (MaybeEpilogOffset) + streamer.emitInt32(row3); + else + streamer.emitValue( + MCBinaryExpr::createOr( + OffsetExpr, MCConstantExpr::create(row3, context), context), + 4); + } + } + + // Emit prolog unwind instructions (in reverse order). + uint8_t numInst = info->Instructions.size(); + for (uint8_t c = 0; c < numInst; ++c) { + WinEH::Instruction inst = info->Instructions.back(); + info->Instructions.pop_back(); + ARMEmitUnwindCode(streamer, inst); + } + + // Emit epilog unwind instructions + for (auto &I : info->EpilogMap) { + auto &EpilogInstrs = I.second.Instructions; + for (uint32_t i = 0; i < EpilogInstrs.size(); i++) { + WinEH::Instruction inst = EpilogInstrs[i]; + ARMEmitUnwindCode(streamer, inst); + } + } + + int32_t BytesMod = CodeWords * 4 - TotalCodeBytes; + assert(BytesMod >= 0); + for (int i = 0; i < BytesMod; i++) + streamer.emitInt8(0xFB); + + if (info->HandlesExceptions) + streamer.emitValue( + MCSymbolRefExpr::create(info->ExceptionHandler, + MCSymbolRefExpr::VK_COFF_IMGREL32, context), + 4); +} + +static void ARMEmitRuntimeFunction(MCStreamer &streamer, + const WinEH::FrameInfo *info) { MCContext &context = streamer.getContext(); streamer.emitValueToAlignment(4); @@ -1088,7 +2227,7 @@ void llvm::Win64EH::ARM64UnwindEmitter::Emit(MCStreamer &Streamer) const { if (Info->empty()) continue; MCSection *XData = Streamer.getAssociatedXDataSection(CFI->TextSection); - Streamer.SwitchSection(XData); + Streamer.switchSection(XData); ARM64EmitUnwindInfo(Streamer, Info); } @@ -1101,8 +2240,8 @@ void llvm::Win64EH::ARM64UnwindEmitter::Emit(MCStreamer &Streamer) const { if (!Info->Symbol) continue; MCSection *PData = Streamer.getAssociatedPDataSection(CFI->TextSection); - Streamer.SwitchSection(PData); - ARM64EmitRuntimeFunction(Streamer, Info); + Streamer.switchSection(PData); + ARMEmitRuntimeFunction(Streamer, Info); } } @@ -1116,12 +2255,57 @@ void llvm::Win64EH::ARM64UnwindEmitter::EmitUnwindInfo(MCStreamer &Streamer, // end hasn't been marked yet, the xdata function length won't cover the // whole function, only up to this point. if (!info->FuncletOrFuncEnd) { - Streamer.SwitchSection(info->TextSection); + Streamer.switchSection(info->TextSection); info->FuncletOrFuncEnd = Streamer.emitCFILabel(); } // Switch sections (the static function above is meant to be called from // here and from Emit(). MCSection *XData = Streamer.getAssociatedXDataSection(info->TextSection); - Streamer.SwitchSection(XData); + Streamer.switchSection(XData); ARM64EmitUnwindInfo(Streamer, info, /* TryPacked = */ !HandlerData); } + +void llvm::Win64EH::ARMUnwindEmitter::Emit(MCStreamer &Streamer) const { + // Emit the unwind info structs first. + for (const auto &CFI : Streamer.getWinFrameInfos()) { + WinEH::FrameInfo *Info = CFI.get(); + if (Info->empty()) + continue; + MCSection *XData = Streamer.getAssociatedXDataSection(CFI->TextSection); + Streamer.switchSection(XData); + ARMEmitUnwindInfo(Streamer, Info); + } + + // Now emit RUNTIME_FUNCTION entries. + for (const auto &CFI : Streamer.getWinFrameInfos()) { + WinEH::FrameInfo *Info = CFI.get(); + // ARMEmitUnwindInfo above clears the info struct, so we can't check + // empty here. But if a Symbol is set, we should create the corresponding + // pdata entry. + if (!Info->Symbol) + continue; + MCSection *PData = Streamer.getAssociatedPDataSection(CFI->TextSection); + Streamer.switchSection(PData); + ARMEmitRuntimeFunction(Streamer, Info); + } +} + +void llvm::Win64EH::ARMUnwindEmitter::EmitUnwindInfo(MCStreamer &Streamer, + WinEH::FrameInfo *info, + bool HandlerData) const { + // Called if there's an .seh_handlerdata directive before the end of the + // function. This forces writing the xdata record already here - and + // in this case, the function isn't actually ended already, but the xdata + // record needs to know the function length. In these cases, if the funclet + // end hasn't been marked yet, the xdata function length won't cover the + // whole function, only up to this point. + if (!info->FuncletOrFuncEnd) { + Streamer.switchSection(info->TextSection); + info->FuncletOrFuncEnd = Streamer.emitCFILabel(); + } + // Switch sections (the static function above is meant to be called from + // here and from Emit(). + MCSection *XData = Streamer.getAssociatedXDataSection(info->TextSection); + Streamer.switchSection(XData); + ARMEmitUnwindInfo(Streamer, info, /* TryPacked = */ !HandlerData); +} diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp index 0dfe5a5c2bdb..ad883131eae1 100644 --- a/llvm/lib/MC/MCWinCOFFStreamer.cpp +++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCWinCOFFStreamer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" @@ -27,14 +28,12 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbolCOFF.h" -#include "llvm/MC/MCWinCOFFStreamer.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/raw_ostream.h" #include -#include #include using namespace llvm; @@ -71,16 +70,16 @@ void MCWinCOFFStreamer::initSections(bool NoExecStack, // FIXME: this is identical to the ELF one. // This emulates the same behavior of GNU as. This makes it easier // to compare the output as the major sections are in the same order. - SwitchSection(getContext().getObjectFileInfo()->getTextSection()); + switchSection(getContext().getObjectFileInfo()->getTextSection()); emitCodeAlignment(4, &STI); - SwitchSection(getContext().getObjectFileInfo()->getDataSection()); + switchSection(getContext().getObjectFileInfo()->getDataSection()); emitCodeAlignment(4, &STI); - SwitchSection(getContext().getObjectFileInfo()->getBSSSection()); + switchSection(getContext().getObjectFileInfo()->getBSSSection()); emitCodeAlignment(4, &STI); - SwitchSection(getContext().getObjectFileInfo()->getTextSection()); + switchSection(getContext().getObjectFileInfo()->getTextSection()); } void MCWinCOFFStreamer::emitLabel(MCSymbol *S, SMLoc Loc) { @@ -134,7 +133,7 @@ void MCWinCOFFStreamer::emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { llvm_unreachable("not implemented"); } -void MCWinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *S) { +void MCWinCOFFStreamer::beginCOFFSymbolDef(MCSymbol const *S) { auto *Symbol = cast(S); if (CurSymbol) Error("starting a new symbol definition without completing the " @@ -142,7 +141,7 @@ void MCWinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *S) { CurSymbol = Symbol; } -void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { +void MCWinCOFFStreamer::emitCOFFSymbolStorageClass(int StorageClass) { if (!CurSymbol) { Error("storage class specified outside of symbol definition"); return; @@ -158,7 +157,7 @@ void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { cast(CurSymbol)->setClass((uint16_t)StorageClass); } -void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) { +void MCWinCOFFStreamer::emitCOFFSymbolType(int Type) { if (!CurSymbol) { Error("symbol type specified outside of a symbol definition"); return; @@ -173,13 +172,13 @@ void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) { cast(CurSymbol)->setType((uint16_t)Type); } -void MCWinCOFFStreamer::EndCOFFSymbolDef() { +void MCWinCOFFStreamer::endCOFFSymbolDef() { if (!CurSymbol) Error("ending symbol definition without starting one"); CurSymbol = nullptr; } -void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { +void MCWinCOFFStreamer::emitCOFFSafeSEH(MCSymbol const *Symbol) { // SafeSEH is a feature specific to 32-bit x86. It does not exist (and is // unnecessary) on all platforms which use table-based exception dispatch. if (getContext().getTargetTriple().getArch() != Triple::x86) @@ -205,7 +204,7 @@ void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { << COFF::SCT_COMPLEX_TYPE_SHIFT); } -void MCWinCOFFStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) { +void MCWinCOFFStreamer::emitCOFFSymbolIndex(MCSymbol const *Symbol) { MCSection *Sec = getCurrentSectionOnly(); getAssembler().registerSection(*Sec); if (Sec->getAlignment() < 4) @@ -216,7 +215,7 @@ void MCWinCOFFStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) { getAssembler().registerSymbol(*Symbol); } -void MCWinCOFFStreamer::EmitCOFFSectionIndex(const MCSymbol *Symbol) { +void MCWinCOFFStreamer::emitCOFFSectionIndex(const MCSymbol *Symbol) { visitUsedSymbol(*Symbol); MCDataFragment *DF = getOrCreateDataFragment(); const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext()); @@ -225,7 +224,7 @@ void MCWinCOFFStreamer::EmitCOFFSectionIndex(const MCSymbol *Symbol) { DF->getContents().resize(DF->getContents().size() + 2, 0); } -void MCWinCOFFStreamer::EmitCOFFSecRel32(const MCSymbol *Symbol, +void MCWinCOFFStreamer::emitCOFFSecRel32(const MCSymbol *Symbol, uint64_t Offset) { visitUsedSymbol(*Symbol); MCDataFragment *DF = getOrCreateDataFragment(); @@ -243,7 +242,7 @@ void MCWinCOFFStreamer::EmitCOFFSecRel32(const MCSymbol *Symbol, DF->getContents().resize(DF->getContents().size() + 4, 0); } -void MCWinCOFFStreamer::EmitCOFFImgRel32(const MCSymbol *Symbol, +void MCWinCOFFStreamer::emitCOFFImgRel32(const MCSymbol *Symbol, int64_t Offset) { visitUsedSymbol(*Symbol); MCDataFragment *DF = getOrCreateDataFragment(); @@ -287,10 +286,10 @@ void MCWinCOFFStreamer::emitCommonSymbol(MCSymbol *S, uint64_t Size, OS << " -aligncomm:\"" << Symbol->getName() << "\"," << Log2_32_Ceil(ByteAlignment); - PushSection(); - SwitchSection(MFI->getDrectveSection()); + pushSection(); + switchSection(MFI->getDrectveSection()); emitBytes(Directive); - PopSection(); + popSection(); } } @@ -299,13 +298,13 @@ void MCWinCOFFStreamer::emitLocalCommonSymbol(MCSymbol *S, uint64_t Size, auto *Symbol = cast(S); MCSection *Section = getContext().getObjectFileInfo()->getBSSSection(); - PushSection(); - SwitchSection(Section); + pushSection(); + switchSection(Section); emitValueToAlignment(ByteAlignment, 0, 1, 0); emitLabel(Symbol); Symbol->setExternal(false); emitZeros(Size); - PopSection(); + popSection(); } void MCWinCOFFStreamer::emitWeakReference(MCSymbol *AliasS, @@ -334,7 +333,7 @@ void MCWinCOFFStreamer::emitIdent(StringRef IdentString) { llvm_unreachable("not implemented"); } -void MCWinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { +void MCWinCOFFStreamer::emitWinEHHandlerData(SMLoc Loc) { llvm_unreachable("not implemented"); } diff --git a/llvm/lib/MC/MCWinEH.cpp b/llvm/lib/MC/MCWinEH.cpp index e58a0b2cf654..1a6d5a3b562e 100644 --- a/llvm/lib/MC/MCWinEH.cpp +++ b/llvm/lib/MC/MCWinEH.cpp @@ -7,18 +7,11 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCWinEH.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/COFF.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCSectionCOFF.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" namespace llvm { namespace WinEH { -UnwindEmitter::~UnwindEmitter() {} +UnwindEmitter::~UnwindEmitter() = default; } } diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp index 90604782de13..a4a42279d6e2 100644 --- a/llvm/lib/MC/MCXCOFFStreamer.cpp +++ b/llvm/lib/MC/MCXCOFFStreamer.cpp @@ -13,12 +13,14 @@ #include "llvm/MC/MCXCOFFStreamer.h" #include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Casting.h" using namespace llvm; @@ -54,6 +56,9 @@ bool MCXCOFFStreamer::emitSymbolAttribute(MCSymbol *Sym, case llvm::MCSA_Protected: Symbol->setVisibilityType(XCOFF::SYM_V_PROTECTED); break; + case llvm::MCSA_Exported: + Symbol->setVisibilityType(XCOFF::SYM_V_EXPORTED); + break; default: report_fatal_error("Not implemented yet."); } diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp index 56bb03ad8d42..78d0d9cec556 100644 --- a/llvm/lib/MC/MachObjectWriter.cpp +++ b/llvm/lib/MC/MachObjectWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCFragment.h" #include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionMachO.h" @@ -29,6 +30,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include @@ -751,6 +753,24 @@ static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) { llvm_unreachable("Invalid mc version min type"); } +// Encode addrsig data as symbol indexes in variable length encoding. +void MachObjectWriter::writeAddrsigSection(MCAssembler &Asm) { + MCSection *AddrSigSection = + Asm.getContext().getObjectFileInfo()->getAddrSigSection(); + MCSection::FragmentListType &fragmentList = AddrSigSection->getFragmentList(); + if (!fragmentList.size()) + return; + + assert(fragmentList.size() == 1); + MCFragment *pFragment = &*fragmentList.begin(); + MCDataFragment *pDataFragment = dyn_cast_or_null(pFragment); + assert(pDataFragment); + + raw_svector_ostream OS(pDataFragment->getContents()); + for (const MCSymbol *sym : this->getAddrsigSyms()) + encodeULEB128(sym->getIndex(), OS); +} + uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { uint64_t StartOffset = W.OS.tell(); @@ -758,6 +778,7 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, // Compute symbol table information and bind symbol indices. computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, UndefinedSymbolData); + writeAddrsigSection(Asm); if (!Asm.CGProfile.empty()) { MCSection *CGProfileSection = Asm.getContext().getMachOSection( @@ -894,8 +915,8 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, [&](const MCAssembler::VersionInfoType &VersionInfo) { auto EncodeVersion = [](VersionTuple V) -> uint32_t { assert(!V.empty() && "empty version"); - unsigned Update = V.getSubminor().getValueOr(0); - unsigned Minor = V.getMinor().getValueOr(0); + unsigned Update = V.getSubminor().value_or(0); + unsigned Minor = V.getMinor().value_or(0); assert(Update < 256 && "unencodable update target version"); assert(Minor < 256 && "unencodable minor target version"); assert(V.getMajor() < 65536 && "unencodable major target version"); diff --git a/llvm/lib/MC/SPIRVObjectWriter.cpp b/llvm/lib/MC/SPIRVObjectWriter.cpp new file mode 100644 index 000000000000..4a07740e8d14 --- /dev/null +++ b/llvm/lib/MC/SPIRVObjectWriter.cpp @@ -0,0 +1,76 @@ +//===- llvm/MC/MCSPIRVObjectWriter.cpp - SPIR-V Object Writer ----*- C++ *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCSPIRVObjectWriter.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/EndianStream.h" + +using namespace llvm; + +class SPIRVObjectWriter : public MCObjectWriter { + ::support::endian::Writer W; + + /// The target specific SPIR-V writer instance. + std::unique_ptr TargetObjectWriter; + +public: + SPIRVObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS) + : W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {} + + ~SPIRVObjectWriter() override {} + +private: + void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) override {} + + void executePostLayoutBinding(MCAssembler &Asm, + const MCAsmLayout &Layout) override {} + + uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; + void writeHeader(const MCAssembler &Asm); +}; + +void SPIRVObjectWriter::writeHeader(const MCAssembler &Asm) { + constexpr uint32_t MagicNumber = 0x07230203; + + // TODO: set the version on a min-necessary basis (just like the translator + // does) requires some refactoring of MCAssembler::VersionInfoType. + constexpr uint32_t Major = 1; + constexpr uint32_t Minor = 0; + constexpr uint32_t VersionNumber = 0 | (Major << 16) | (Minor << 8); + // TODO: check if we could use anything other than 0 (spec allows). + constexpr uint32_t GeneratorMagicNumber = 0; + // TODO: do not hardcode this as well. + constexpr uint32_t Bound = 900; + constexpr uint32_t Schema = 0; + + W.write(MagicNumber); + W.write(VersionNumber); + W.write(GeneratorMagicNumber); + W.write(Bound); + W.write(Schema); +} + +uint64_t SPIRVObjectWriter::writeObject(MCAssembler &Asm, + const MCAsmLayout &Layout) { + uint64_t StartOffset = W.OS.tell(); + writeHeader(Asm); + for (const MCSection &S : Asm) + Asm.writeSectionData(W.OS, &S, Layout); + return W.OS.tell() - StartOffset; +} + +std::unique_ptr +llvm::createSPIRVObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS) { + return std::make_unique(std::move(MOTW), OS); +} diff --git a/llvm/lib/MC/SubtargetFeature.cpp b/llvm/lib/MC/SubtargetFeature.cpp index 3155adcf2674..d53cc2f7e37b 100644 --- a/llvm/lib/MC/SubtargetFeature.cpp +++ b/llvm/lib/MC/SubtargetFeature.cpp @@ -20,10 +20,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include -#include -#include -#include -#include #include #include diff --git a/llvm/lib/MC/TargetRegistry.cpp b/llvm/lib/MC/TargetRegistry.cpp index 09684b1e5ad2..57444fd23784 100644 --- a/llvm/lib/MC/TargetRegistry.cpp +++ b/llvm/lib/MC/TargetRegistry.cpp @@ -33,7 +33,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName, [&](const Target &T) { return ArchName == T.getName(); }); if (I == targets().end()) { - Error = "error: invalid target '" + ArchName + "'.\n"; + Error = "invalid target '" + ArchName + "'.\n"; return nullptr; } @@ -49,7 +49,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName, std::string TempError; TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), TempError); if (!TheTarget) { - Error = ": error: unable to get target for '" + Error = "unable to get target for '" + TheTriple.getTriple() + "', see --version and --triple.\n"; return nullptr; diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index 636c1d238932..7cc11d24f286 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/BinaryFormat/Wasm.h" #include "llvm/BinaryFormat/WasmTraits.h" #include "llvm/Config/llvm-config.h" @@ -31,7 +30,6 @@ #include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/StringSaver.h" #include using namespace llvm; @@ -125,12 +123,11 @@ struct WasmCustomSection { StringRef Name; MCSectionWasm *Section; - uint32_t OutputContentsOffset; - uint32_t OutputIndex; + uint32_t OutputContentsOffset = 0; + uint32_t OutputIndex = InvalidIndex; WasmCustomSection(StringRef Name, MCSectionWasm *Section) - : Name(Name), Section(Section), OutputContentsOffset(0), - OutputIndex(InvalidIndex) {} + : Name(Name), Section(Section) {} }; #if !defined(NDEBUG) @@ -140,36 +137,58 @@ raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) { } #endif -// Write X as an (unsigned) LEB value at offset Offset in Stream, padded +// Write Value as an (unsigned) LEB value at offset Offset in Stream, padded // to allow patching. -template -void writePatchableLEB(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) { +template +void writePatchableULEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) { uint8_t Buffer[W]; - unsigned SizeLen = encodeULEB128(X, Buffer, W); + unsigned SizeLen = encodeULEB128(Value, Buffer, W); assert(SizeLen == W); Stream.pwrite((char *)Buffer, SizeLen, Offset); } -// Write X as an signed LEB value at offset Offset in Stream, padded +// Write Value as an signed LEB value at offset Offset in Stream, padded // to allow patching. -template -void writePatchableSLEB(raw_pwrite_stream &Stream, int64_t X, uint64_t Offset) { +template +void writePatchableSLEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) { uint8_t Buffer[W]; - unsigned SizeLen = encodeSLEB128(X, Buffer, W); + unsigned SizeLen = encodeSLEB128(Value, Buffer, W); assert(SizeLen == W); Stream.pwrite((char *)Buffer, SizeLen, Offset); } -// Write X as a plain integer value at offset Offset in Stream. -static void patchI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) { +static void writePatchableU32(raw_pwrite_stream &Stream, uint32_t Value, + uint64_t Offset) { + writePatchableULEB(Stream, Value, Offset); +} + +static void writePatchableS32(raw_pwrite_stream &Stream, int32_t Value, + uint64_t Offset) { + writePatchableSLEB(Stream, Value, Offset); +} + +static void writePatchableU64(raw_pwrite_stream &Stream, uint64_t Value, + uint64_t Offset) { + writePatchableSLEB(Stream, Value, Offset); +} + +static void writePatchableS64(raw_pwrite_stream &Stream, int64_t Value, + uint64_t Offset) { + writePatchableSLEB(Stream, Value, Offset); +} + +// Write Value as a plain integer value at offset Offset in Stream. +static void patchI32(raw_pwrite_stream &Stream, uint32_t Value, + uint64_t Offset) { uint8_t Buffer[4]; - support::endian::write32le(Buffer, X); + support::endian::write32le(Buffer, Value); Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset); } -static void patchI64(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) { +static void patchI64(raw_pwrite_stream &Stream, uint64_t Value, + uint64_t Offset) { uint8_t Buffer[8]; - support::endian::write64le(Buffer, X); + support::endian::write64le(Buffer, Value); Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset); } @@ -423,8 +442,8 @@ void WasmObjectWriter::endSection(SectionBookkeeping &Section) { // Write the final section size to the payload_len field, which follows // the section id byte. - writePatchableLEB<5>(static_cast(W->OS), Size, - Section.SizeOffset); + writePatchableU32(static_cast(W->OS), Size, + Section.SizeOffset); } // Emit the Wasm header. @@ -755,7 +774,7 @@ void WasmObjectWriter::applyRelocations( RelEntry.Offset; LLVM_DEBUG(dbgs() << "applyRelocation: " << RelEntry << "\n"); - auto Value = getProvisionalValue(RelEntry, Layout); + uint64_t Value = getProvisionalValue(RelEntry, Layout); switch (RelEntry.Type) { case wasm::R_WASM_FUNCTION_INDEX_LEB: @@ -764,10 +783,10 @@ void WasmObjectWriter::applyRelocations( case wasm::R_WASM_MEMORY_ADDR_LEB: case wasm::R_WASM_TAG_INDEX_LEB: case wasm::R_WASM_TABLE_NUMBER_LEB: - writePatchableLEB<5>(Stream, Value, Offset); + writePatchableU32(Stream, Value, Offset); break; case wasm::R_WASM_MEMORY_ADDR_LEB64: - writePatchableLEB<10>(Stream, Value, Offset); + writePatchableU64(Stream, Value, Offset); break; case wasm::R_WASM_TABLE_INDEX_I32: case wasm::R_WASM_MEMORY_ADDR_I32: @@ -787,14 +806,14 @@ void WasmObjectWriter::applyRelocations( case wasm::R_WASM_MEMORY_ADDR_SLEB: case wasm::R_WASM_MEMORY_ADDR_REL_SLEB: case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB: - writePatchableSLEB<5>(Stream, Value, Offset); + writePatchableS32(Stream, Value, Offset); break; case wasm::R_WASM_TABLE_INDEX_SLEB64: case wasm::R_WASM_TABLE_INDEX_REL_SLEB64: case wasm::R_WASM_MEMORY_ADDR_SLEB64: case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64: case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB64: - writePatchableSLEB<10>(Stream, Value, Offset); + writePatchableS64(Stream, Value, Offset); break; default: llvm_unreachable("invalid relocation type"); @@ -912,25 +931,29 @@ void WasmObjectWriter::writeGlobalSection(ArrayRef Globals) { for (const wasm::WasmGlobal &Global : Globals) { encodeULEB128(Global.Type.Type, W->OS); W->OS << char(Global.Type.Mutable); - W->OS << char(Global.InitExpr.Opcode); - switch (Global.Type.Type) { - case wasm::WASM_TYPE_I32: - encodeSLEB128(0, W->OS); - break; - case wasm::WASM_TYPE_I64: - encodeSLEB128(0, W->OS); - break; - case wasm::WASM_TYPE_F32: - writeI32(0); - break; - case wasm::WASM_TYPE_F64: - writeI64(0); - break; - case wasm::WASM_TYPE_EXTERNREF: - writeValueType(wasm::ValType::EXTERNREF); - break; - default: - llvm_unreachable("unexpected type"); + if (Global.InitExpr.Extended) { + llvm_unreachable("extected init expressions not supported"); + } else { + W->OS << char(Global.InitExpr.Inst.Opcode); + switch (Global.Type.Type) { + case wasm::WASM_TYPE_I32: + encodeSLEB128(0, W->OS); + break; + case wasm::WASM_TYPE_I64: + encodeSLEB128(0, W->OS); + break; + case wasm::WASM_TYPE_F32: + writeI32(0); + break; + case wasm::WASM_TYPE_F64: + writeI64(0); + break; + case wasm::WASM_TYPE_EXTERNREF: + writeValueType(wasm::ValType::EXTERNREF); + break; + default: + llvm_unreachable("unexpected type"); + } } W->OS << char(wasm::WASM_OPCODE_END); } @@ -1547,9 +1570,9 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm, continue; const auto &WS = static_cast(S); - LLVM_DEBUG(dbgs() - << "MCSymbol: " - << toString(WS.getType().getValueOr(wasm::WASM_SYMBOL_TYPE_DATA)) + LLVM_DEBUG( + dbgs() << "MCSymbol: " + << toString(WS.getType().value_or(wasm::WASM_SYMBOL_TYPE_DATA)) << " '" << S << "'" << " isDefined=" << S.isDefined() << " isExternal=" << S.isExternal() << " isTemporary=" << S.isTemporary() @@ -1639,21 +1662,22 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm, wasm::WasmGlobal Global; Global.Type = WS.getGlobalType(); Global.Index = NumGlobalImports + Globals.size(); + Global.InitExpr.Extended = false; switch (Global.Type.Type) { case wasm::WASM_TYPE_I32: - Global.InitExpr.Opcode = wasm::WASM_OPCODE_I32_CONST; + Global.InitExpr.Inst.Opcode = wasm::WASM_OPCODE_I32_CONST; break; case wasm::WASM_TYPE_I64: - Global.InitExpr.Opcode = wasm::WASM_OPCODE_I64_CONST; + Global.InitExpr.Inst.Opcode = wasm::WASM_OPCODE_I64_CONST; break; case wasm::WASM_TYPE_F32: - Global.InitExpr.Opcode = wasm::WASM_OPCODE_F32_CONST; + Global.InitExpr.Inst.Opcode = wasm::WASM_OPCODE_F32_CONST; break; case wasm::WASM_TYPE_F64: - Global.InitExpr.Opcode = wasm::WASM_OPCODE_F64_CONST; + Global.InitExpr.Inst.Opcode = wasm::WASM_OPCODE_F64_CONST; break; case wasm::WASM_TYPE_EXTERNREF: - Global.InitExpr.Opcode = wasm::WASM_OPCODE_REF_NULL; + Global.InitExpr.Inst.Opcode = wasm::WASM_OPCODE_REF_NULL; break; default: llvm_unreachable("unexpected type"); @@ -1785,7 +1809,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm, wasm::WasmSymbolInfo Info; Info.Name = WS.getName(); - Info.Kind = WS.getType().getValueOr(wasm::WASM_SYMBOL_TYPE_DATA); + Info.Kind = WS.getType().value_or(wasm::WASM_SYMBOL_TYPE_DATA); Info.Flags = Flags; if (!WS.isData()) { assert(WasmIndices.count(&WS) > 0); @@ -1852,7 +1876,8 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm, const MCFragment &AlignFrag = *IT; if (AlignFrag.getKind() != MCFragment::FT_Align) report_fatal_error(".init_array section should be aligned"); - if (cast(AlignFrag).getAlignment() != (is64Bit() ? 8 : 4)) + if (cast(AlignFrag).getAlignment() != + Align(is64Bit() ? 8 : 4)) report_fatal_error(".init_array section should be aligned for pointers"); const MCFragment &Frag = *std::next(IT); diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp index 73c687331d30..33e496b7a864 100644 --- a/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -41,7 +41,6 @@ #include "llvm/Support/raw_ostream.h" #include #include -#include #include #include #include @@ -155,9 +154,7 @@ public: bool UseBigObj; bool UseOffsetLabels = false; - bool EmitAddrsigSection = false; MCSectionCOFF *AddrsigSection; - std::vector AddrsigSyms; MCSectionCOFF *CGProfileSection = nullptr; @@ -221,11 +218,6 @@ public: void assignSectionNumbers(); void assignFileOffsets(MCAssembler &Asm, const MCAsmLayout &Layout); - void emitAddrsigSection() override { EmitAddrsigSection = true; } - void addAddrsigSymbol(const MCSymbol *Sym) override { - AddrsigSyms.push_back(Sym); - } - uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; }; @@ -452,32 +444,6 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &MCSym, Sym->MC = &MCSym; } -// Maximum offsets for different string table entry encodings. -enum : unsigned { Max7DecimalOffset = 9999999U }; -enum : uint64_t { MaxBase64Offset = 0xFFFFFFFFFULL }; // 64^6, including 0 - -// Encode a string table entry offset in base 64, padded to 6 chars, and -// prefixed with a double slash: '//AAAAAA', '//AAAAAB', ... -// Buffer must be at least 8 bytes large. No terminating null appended. -static void encodeBase64StringEntry(char *Buffer, uint64_t Value) { - assert(Value > Max7DecimalOffset && Value <= MaxBase64Offset && - "Illegal section name encoding for value"); - - static const char Alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - - Buffer[0] = '/'; - Buffer[1] = '/'; - - char *Ptr = Buffer + 7; - for (unsigned i = 0; i < 6; ++i) { - unsigned Rem = Value % 64; - Value /= 64; - *(Ptr--) = Alphabet[Rem]; - } -} - void WinCOFFObjectWriter::SetSectionName(COFFSection &S) { if (S.Name.size() <= COFF::NameSize) { std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size()); @@ -485,19 +451,8 @@ void WinCOFFObjectWriter::SetSectionName(COFFSection &S) { } uint64_t StringTableEntry = Strings.getOffset(S.Name); - if (StringTableEntry <= Max7DecimalOffset) { - SmallVector Buffer; - Twine('/').concat(Twine(StringTableEntry)).toVector(Buffer); - assert(Buffer.size() <= COFF::NameSize && Buffer.size() >= 2); - std::memcpy(S.Header.Name, Buffer.data(), Buffer.size()); - return; - } - if (StringTableEntry <= MaxBase64Offset) { - // Starting with 10,000,000, offsets are encoded as base64. - encodeBase64StringEntry(S.Header.Name, StringTableEntry); - return; - } - report_fatal_error("COFF string table is greater than 64 GB."); + if (!COFF::encodeSectionName(S.Header.Name, StringTableEntry)) + report_fatal_error("COFF string table is greater than 64 GB."); } void WinCOFFObjectWriter::SetSymbolName(COFFSymbol &S) { @@ -1003,7 +958,7 @@ void WinCOFFObjectWriter::assignFileOffsets(MCAssembler &Asm, for (const auto &Section : Asm) { COFFSection *Sec = SectionMap[&Section]; - if (Sec->Number == -1) + if (!Sec || Sec->Number == -1) continue; Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(&Section); diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp index 177253d7a9d7..977e77bf67fd 100644 --- a/llvm/lib/MC/XCOFFObjectWriter.cpp +++ b/llvm/lib/MC/XCOFFObjectWriter.cpp @@ -22,8 +22,9 @@ #include "llvm/MC/MCValue.h" #include "llvm/MC/MCXCOFFObjectWriter.h" #include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/EndianStream.h" -#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include @@ -65,6 +66,10 @@ struct Symbol { const MCSymbolXCOFF *const MCSym; uint32_t SymbolTableIndex; + XCOFF::VisibilityType getVisibilityType() const { + return MCSym->getVisibilityType(); + } + XCOFF::StorageClass getStorageClass() const { return MCSym->getStorageClass(); } @@ -77,12 +82,15 @@ struct Symbol { struct XCOFFSection { const MCSectionXCOFF *const MCSec; uint32_t SymbolTableIndex; - uint32_t Address; - uint32_t Size; + uint64_t Address; + uint64_t Size; SmallVector Syms; SmallVector Relocations; StringRef getSymbolTableName() const { return MCSec->getSymbolTableName(); } + XCOFF::VisibilityType getVisibilityType() const { + return MCSec->getVisibilityType(); + } XCOFFSection(const MCSectionXCOFF *MCSec) : MCSec(MCSec), SymbolTableIndex(-1), Address(-1), Size(0) {} }; @@ -100,10 +108,10 @@ struct SectionEntry { char Name[XCOFF::NameSize]; // The physical/virtual address of the section. For an object file // these values are equivalent. - uint32_t Address; - uint32_t Size; - uint32_t FileOffsetToData; - uint32_t FileOffsetToRelocations; + uint64_t Address; + uint64_t Size; + uint64_t FileOffsetToData; + uint64_t FileOffsetToRelocations; uint32_t RelocationCount; int32_t Flags; @@ -136,7 +144,7 @@ struct SectionEntry { Index = UninitializedIndex; } - virtual ~SectionEntry() {} + virtual ~SectionEntry() = default; }; // Represents the data related to a section excluding the csects that make up @@ -165,16 +173,21 @@ struct CsectSectionEntry : public SectionEntry { Group->clear(); } - virtual ~CsectSectionEntry() {} + virtual ~CsectSectionEntry() = default; }; struct DwarfSectionEntry : public SectionEntry { // For DWARF section entry. std::unique_ptr DwarfSect; + // For DWARF section, we must use real size in the section header. MemorySize + // is for the size the DWARF section occupies including paddings. + uint32_t MemorySize; + DwarfSectionEntry(StringRef N, int32_t Flags, std::unique_ptr Sect) - : SectionEntry(N, Flags | XCOFF::STYP_DWARF), DwarfSect(std::move(Sect)) { + : SectionEntry(N, Flags | XCOFF::STYP_DWARF), DwarfSect(std::move(Sect)), + MemorySize(0) { assert(DwarfSect->MCSec->isDwarfSect() && "This should be a DWARF section!"); assert(N.size() <= XCOFF::NameSize && "section name too long"); @@ -183,20 +196,24 @@ struct DwarfSectionEntry : public SectionEntry { DwarfSectionEntry(DwarfSectionEntry &&s) = default; - virtual ~DwarfSectionEntry() {} + virtual ~DwarfSectionEntry() = default; }; class XCOFFObjectWriter : public MCObjectWriter { uint32_t SymbolTableEntryCount = 0; - uint32_t SymbolTableOffset = 0; + uint64_t SymbolTableOffset = 0; uint16_t SectionCount = 0; - uint32_t RelocationEntryOffset = 0; + uint64_t RelocationEntryOffset = 0; + std::vector> FileNames; support::endian::Writer W; std::unique_ptr TargetObjectWriter; StringTableBuilder Strings; + const uint64_t MaxRawDataSize = + TargetObjectWriter->is64Bit() ? UINT64_MAX : UINT32_MAX; + // Maps the MCSection representation to its corresponding XCOFFSection // wrapper. Needed for finding the XCOFFSection to insert an MCSymbol into // from its containing MCSectionXCOFF. @@ -244,26 +261,39 @@ class XCOFFObjectWriter : public MCObjectWriter { uint64_t writeObject(MCAssembler &, const MCAsmLayout &) override; - static bool nameShouldBeInStringTable(const StringRef &); + bool is64Bit() const { return TargetObjectWriter->is64Bit(); } + bool nameShouldBeInStringTable(const StringRef &); void writeSymbolName(const StringRef &); - void writeSymbolTableEntryForCsectMemberLabel(const Symbol &, - const XCOFFSection &, int16_t, - uint64_t); - void writeSymbolTableEntryForControlSection(const XCOFFSection &, int16_t, - XCOFF::StorageClass); - void writeSymbolTableEntryForDwarfSection(const XCOFFSection &, int16_t); + + void writeSymbolEntryForCsectMemberLabel(const Symbol &SymbolRef, + const XCOFFSection &CSectionRef, + int16_t SectionIndex, + uint64_t SymbolOffset); + void writeSymbolEntryForControlSection(const XCOFFSection &CSectionRef, + int16_t SectionIndex, + XCOFF::StorageClass StorageClass); + void writeSymbolEntryForDwarfSection(const XCOFFSection &DwarfSectionRef, + int16_t SectionIndex); void writeFileHeader(); void writeSectionHeaderTable(); void writeSections(const MCAssembler &Asm, const MCAsmLayout &Layout); void writeSectionForControlSectionEntry(const MCAssembler &Asm, const MCAsmLayout &Layout, const CsectSectionEntry &CsectEntry, - uint32_t &CurrentAddressLocation); + uint64_t &CurrentAddressLocation); void writeSectionForDwarfSectionEntry(const MCAssembler &Asm, const MCAsmLayout &Layout, const DwarfSectionEntry &DwarfEntry, - uint32_t &CurrentAddressLocation); + uint64_t &CurrentAddressLocation); void writeSymbolTable(const MCAsmLayout &Layout); + void writeSymbolAuxDwarfEntry(uint64_t LengthOfSectionPortion, + uint64_t NumberOfRelocEnt = 0); + void writeSymbolAuxCsectEntry(uint64_t SectionOrLength, + uint8_t SymbolAlignmentAndType, + uint8_t StorageMappingClass); + void writeSymbolEntry(StringRef SymbolName, uint64_t Value, + int16_t SectionNumber, uint16_t SymbolType, + uint8_t StorageClass, uint8_t NumberOfAuxEntries = 1); void writeRelocations(); void writeRelocation(XCOFFRelocation Reloc, const XCOFFSection &Section); @@ -278,10 +308,8 @@ class XCOFFObjectWriter : public MCObjectWriter { void assignAddressesAndIndices(const MCAsmLayout &); void finalizeSectionInfo(); - bool - needsAuxiliaryHeader() const { /* TODO aux header support not implemented. */ - return false; - } + // TODO aux header support not implemented. + bool needsAuxiliaryHeader() const { return false; } // Returns the size of the auxiliary header to be written to the object file. size_t auxiliaryHeaderSize() const { @@ -293,6 +321,10 @@ class XCOFFObjectWriter : public MCObjectWriter { public: XCOFFObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS); + + void writeWord(uint64_t Word) { + is64Bit() ? W.write(Word) : W.write(Word); + } }; XCOFFObjectWriter::XCOFFObjectWriter( @@ -396,9 +428,6 @@ static MCSectionXCOFF *getContainingCsect(const MCSymbolXCOFF *XSym) { void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) { - if (TargetObjectWriter->is64Bit()) - report_fatal_error("64-bit XCOFF object files are not supported yet."); - for (const auto &S : Asm) { const auto *MCSec = cast(&S); assert(SectionMap.find(MCSec) == SectionMap.end() && @@ -424,7 +453,7 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, SectionMap[MCSec] = DwarfSec.get(); DwarfSectionEntry SecEntry(MCSec->getName(), - MCSec->getDwarfSubtypeFlags().getValue(), + *MCSec->getDwarfSubtypeFlags(), std::move(DwarfSec)); DwarfSections.push_back(std::move(SecEntry)); } else @@ -470,6 +499,15 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, Strings.add(XSym->getSymbolTableName()); } + FileNames = Asm.getFileNames(); + // Emit ".file" as the source file name when there is no file name. + if (FileNames.empty()) + FileNames.emplace_back(".file", 0); + for (const std::pair &F : FileNames) { + if (nameShouldBeInStringTable(F.first)) + Strings.add(F.first); + } + Strings.finalize(); assignAddressesAndIndices(Layout); } @@ -547,10 +585,9 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm, FixedValue = TOCEntryOffset; } - assert( - (TargetObjectWriter->is64Bit() || - Fixup.getOffset() <= UINT32_MAX - Layout.getFragmentOffset(Fragment)) && - "Fragment offset + fixup offset is overflowed in 32-bit mode."); + assert((Fixup.getOffset() <= + MaxRawDataSize - Layout.getFragmentOffset(Fragment)) && + "Fragment offset + fixup offset is overflowed."); uint32_t FixupOffsetInCsect = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); @@ -590,7 +627,7 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm, void XCOFFObjectWriter::writeSections(const MCAssembler &Asm, const MCAsmLayout &Layout) { - uint32_t CurrentAddressLocation = 0; + uint64_t CurrentAddressLocation = 0; for (const auto *Section : Sections) writeSectionForControlSectionEntry(Asm, Layout, *Section, CurrentAddressLocation); @@ -607,9 +644,6 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, if (Asm.isIncrementalLinkerCompatible()) report_fatal_error("Incremental linking not supported for XCOFF."); - if (TargetObjectWriter->is64Bit()) - report_fatal_error("64-bit XCOFF object files are not supported yet."); - finalizeSectionInfo(); uint64_t StartOffset = W.OS.tell(); @@ -617,7 +651,6 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, writeSectionHeaderTable(); writeSections(Asm, Layout); writeRelocations(); - writeSymbolTable(Layout); // Write the string table. Strings.write(W.OS); @@ -626,142 +659,130 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, } bool XCOFFObjectWriter::nameShouldBeInStringTable(const StringRef &SymbolName) { - return SymbolName.size() > XCOFF::NameSize; + return SymbolName.size() > XCOFF::NameSize || is64Bit(); } void XCOFFObjectWriter::writeSymbolName(const StringRef &SymbolName) { + // Magic, Offset or SymbolName. if (nameShouldBeInStringTable(SymbolName)) { W.write(0); W.write(Strings.getOffset(SymbolName)); } else { - char Name[XCOFF::NameSize+1]; + char Name[XCOFF::NameSize + 1]; std::strncpy(Name, SymbolName.data(), XCOFF::NameSize); ArrayRef NameRef(Name, XCOFF::NameSize); W.write(NameRef); } } -void XCOFFObjectWriter::writeSymbolTableEntryForCsectMemberLabel( - const Symbol &SymbolRef, const XCOFFSection &CSectionRef, - int16_t SectionIndex, uint64_t SymbolOffset) { - // Name or Zeros and string table offset - writeSymbolName(SymbolRef.getSymbolTableName()); - assert(SymbolOffset <= UINT32_MAX - CSectionRef.Address && - "Symbol address overflows."); - W.write(CSectionRef.Address + SymbolOffset); - W.write(SectionIndex); +void XCOFFObjectWriter::writeSymbolEntry(StringRef SymbolName, uint64_t Value, + int16_t SectionNumber, + uint16_t SymbolType, + uint8_t StorageClass, + uint8_t NumberOfAuxEntries) { + if (is64Bit()) { + W.write(Value); + W.write(Strings.getOffset(SymbolName)); + } else { + writeSymbolName(SymbolName); + W.write(Value); + } + W.write(SectionNumber); // Basic/Derived type. See the description of the n_type field for symbol // table entries for a detailed description. Since we don't yet support // visibility, and all other bits are either optionally set or reserved, this // is always zero. - // TODO FIXME How to assert a symbol's visibilty is default? + if (SymbolType != 0) + report_fatal_error("Emitting non-zero visibilities is not supported yet."); // TODO Set the function indicator (bit 10, 0x0020) for functions // when debugging is enabled. - W.write(0); - W.write(SymbolRef.getStorageClass()); - // Always 1 aux entry for now. - W.write(1); - - // Now output the auxiliary entry. - W.write(CSectionRef.SymbolTableIndex); - // Parameter typecheck hash. Not supported. - W.write(0); - // Typecheck section number. Not supported. - W.write(0); - // Symbol type: Label - W.write(XCOFF::XTY_LD); - // Storage mapping class. - W.write(CSectionRef.MCSec->getMappingClass()); - // Reserved (x_stab). - W.write(0); - // Reserved (x_snstab). - W.write(0); + W.write(SymbolType); + W.write(StorageClass); + W.write(NumberOfAuxEntries); } -void XCOFFObjectWriter::writeSymbolTableEntryForDwarfSection( +void XCOFFObjectWriter::writeSymbolAuxCsectEntry(uint64_t SectionOrLength, + uint8_t SymbolAlignmentAndType, + uint8_t StorageMappingClass) { + W.write(is64Bit() ? Lo_32(SectionOrLength) : SectionOrLength); + W.write(0); // ParameterHashIndex + W.write(0); // TypeChkSectNum + W.write(SymbolAlignmentAndType); + W.write(StorageMappingClass); + if (is64Bit()) { + W.write(Hi_32(SectionOrLength)); + W.OS.write_zeros(1); // Reserved + W.write(XCOFF::AUX_CSECT); + } else { + W.write(0); // StabInfoIndex + W.write(0); // StabSectNum + } +} + +void XCOFFObjectWriter::writeSymbolAuxDwarfEntry( + uint64_t LengthOfSectionPortion, uint64_t NumberOfRelocEnt) { + writeWord(LengthOfSectionPortion); + if (!is64Bit()) + W.OS.write_zeros(4); // Reserved + writeWord(NumberOfRelocEnt); + if (is64Bit()) { + W.OS.write_zeros(1); // Reserved + W.write(XCOFF::AUX_SECT); + } else { + W.OS.write_zeros(6); // Reserved + } +} + +void XCOFFObjectWriter::writeSymbolEntryForCsectMemberLabel( + const Symbol &SymbolRef, const XCOFFSection &CSectionRef, + int16_t SectionIndex, uint64_t SymbolOffset) { + assert(SymbolOffset <= MaxRawDataSize - CSectionRef.Address && + "Symbol address overflowed."); + + writeSymbolEntry(SymbolRef.getSymbolTableName(), + CSectionRef.Address + SymbolOffset, SectionIndex, + SymbolRef.getVisibilityType(), SymbolRef.getStorageClass()); + + writeSymbolAuxCsectEntry(CSectionRef.SymbolTableIndex, XCOFF::XTY_LD, + CSectionRef.MCSec->getMappingClass()); +} + +void XCOFFObjectWriter::writeSymbolEntryForDwarfSection( const XCOFFSection &DwarfSectionRef, int16_t SectionIndex) { assert(DwarfSectionRef.MCSec->isDwarfSect() && "Not a DWARF section!"); - // n_name, n_zeros, n_offset - writeSymbolName(DwarfSectionRef.getSymbolTableName()); - // n_value - W.write(0); - // n_scnum - W.write(SectionIndex); - // n_type - W.write(0); - // n_sclass - W.write(XCOFF::C_DWARF); - // Always 1 aux entry for now. - W.write(1); - - // Now output the auxiliary entry. - // x_scnlen - W.write(DwarfSectionRef.Size); - // Reserved - W.write(0); - // x_nreloc. Set to 0 for now. - W.write(0); - // Reserved - W.write(0); - // Reserved - W.write(0); + writeSymbolEntry(DwarfSectionRef.getSymbolTableName(), /*Value=*/0, + SectionIndex, /*SymbolType=*/0, XCOFF::C_DWARF); + + writeSymbolAuxDwarfEntry(DwarfSectionRef.Size); } -void XCOFFObjectWriter::writeSymbolTableEntryForControlSection( +void XCOFFObjectWriter::writeSymbolEntryForControlSection( const XCOFFSection &CSectionRef, int16_t SectionIndex, XCOFF::StorageClass StorageClass) { - // n_name, n_zeros, n_offset - writeSymbolName(CSectionRef.getSymbolTableName()); - // n_value - W.write(CSectionRef.Address); - // n_scnum - W.write(SectionIndex); - // Basic/Derived type. See the description of the n_type field for symbol - // table entries for a detailed description. Since we don't yet support - // visibility, and all other bits are either optionally set or reserved, this - // is always zero. - // TODO FIXME How to assert a symbol's visibilty is default? - // TODO Set the function indicator (bit 10, 0x0020) for functions - // when debugging is enabled. - W.write(0); - // n_sclass - W.write(StorageClass); - // Always 1 aux entry for now. - W.write(1); - - // Now output the auxiliary entry. - W.write(CSectionRef.Size); - // Parameter typecheck hash. Not supported. - W.write(0); - // Typecheck section number. Not supported. - W.write(0); - // Symbol type. - W.write(getEncodedType(CSectionRef.MCSec)); - // Storage mapping class. - W.write(CSectionRef.MCSec->getMappingClass()); - // Reserved (x_stab). - W.write(0); - // Reserved (x_snstab). - W.write(0); + writeSymbolEntry(CSectionRef.getSymbolTableName(), CSectionRef.Address, + SectionIndex, CSectionRef.getVisibilityType(), StorageClass); + + writeSymbolAuxCsectEntry(CSectionRef.Size, getEncodedType(CSectionRef.MCSec), + CSectionRef.MCSec->getMappingClass()); } void XCOFFObjectWriter::writeFileHeader() { - // Magic. - W.write(0x01df); - // Number of sections. + W.write(is64Bit() ? XCOFF::XCOFF64 : XCOFF::XCOFF32); W.write(SectionCount); - // Timestamp field. For reproducible output we write a 0, which represents no - // timestamp. - W.write(0); - // Byte Offset to the start of the symbol table. - W.write(SymbolTableOffset); - // Number of entries in the symbol table. - W.write(SymbolTableEntryCount); - // Size of the optional header. - W.write(0); - // Flags. - W.write(0); + W.write(0); // TimeStamp + writeWord(SymbolTableOffset); + if (is64Bit()) { + W.write(0); // AuxHeaderSize. No optional header for an object + // file that is not to be loaded. + W.write(0); // Flags + W.write(SymbolTableEntryCount); + } else { + W.write(SymbolTableEntryCount); + W.write(0); // AuxHeaderSize. No optional header for an object + // file that is not to be loaded. + W.write(0); // Flags + } } void XCOFFObjectWriter::writeSectionHeaderTable() { @@ -777,28 +798,25 @@ void XCOFFObjectWriter::writeSectionHeaderTable() { // Write the Physical Address and Virtual Address. In an object file these // are the same. // We use 0 for DWARF sections' Physical and Virtual Addresses. - if (!IsDwarf) { - W.write(Sec->Address); - W.write(Sec->Address); + writeWord(IsDwarf ? 0 : Sec->Address); + writeWord(IsDwarf ? 0 : Sec->Address); + + writeWord(Sec->Size); + writeWord(Sec->FileOffsetToData); + writeWord(Sec->FileOffsetToRelocations); + writeWord(0); // FileOffsetToLineNumberInfo. Not supported yet. + + if (is64Bit()) { + W.write(Sec->RelocationCount); + W.write(0); // NumberOfLineNumbers. Not supported yet. + W.write(Sec->Flags); + W.OS.write_zeros(4); } else { - W.write(0); - W.write(0); + W.write(Sec->RelocationCount); + W.write(0); // NumberOfLineNumbers. Not supported yet. + W.write(Sec->Flags); } - W.write(Sec->Size); - W.write(Sec->FileOffsetToData); - W.write(Sec->FileOffsetToRelocations); - - // Line number pointer. Not supported yet. - W.write(0); - - W.write(Sec->RelocationCount); - - // Line number counts. Not supported yet. - W.write(0); - - W.write(Sec->Flags); - return true; }; @@ -811,11 +829,11 @@ void XCOFFObjectWriter::writeSectionHeaderTable() { void XCOFFObjectWriter::writeRelocation(XCOFFRelocation Reloc, const XCOFFSection &Section) { if (Section.MCSec->isCsect()) - W.write(Section.Address + Reloc.FixupOffsetInCsect); + writeWord(Section.Address + Reloc.FixupOffsetInCsect); else { // DWARF sections' address is set to 0. assert(Section.MCSec->isDwarfSect() && "unsupport section type!"); - W.write(Reloc.FixupOffsetInCsect); + writeWord(Reloc.FixupOffsetInCsect); } W.write(Reloc.SymbolTableIndex); W.write(Reloc.SignAndSize); @@ -845,34 +863,18 @@ void XCOFFObjectWriter::writeRelocations() { } void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) { - // Write symbol 0 as C_FILE. - // FIXME: support 64-bit C_FILE symbol. - // - // n_name. The n_name of a C_FILE symbol is the source filename when no - // auxiliary entries are present. The source filename is alternatively - // provided by an auxiliary entry, in which case the n_name of the C_FILE - // symbol is `.file`. - // FIXME: add the real source filename. - writeSymbolName(".file"); - // n_value. The n_value of a C_FILE symbol is its symbol table index. - W.write(0); - // n_scnum. N_DEBUG is a reserved section number for indicating a special - // symbolic debugging symbol. - W.write(XCOFF::ReservedSectionNum::N_DEBUG); - // n_type. The n_type field of a C_FILE symbol encodes the source language and - // CPU version info; zero indicates no info. - W.write(0); - // n_sclass. The C_FILE symbol provides source file-name information, - // source-language ID and CPU-version ID information and some other optional - // infos. - W.write(XCOFF::C_FILE); - // n_numaux. No aux entry for now. - W.write(0); + // Write C_FILE symbols. + // The n_name of a C_FILE symbol is the source file's name when no auxiliary + // entries are present. + for (const std::pair &F : FileNames) { + writeSymbolEntry(F.first, /*Value=*/0, XCOFF::ReservedSectionNum::N_DEBUG, + /*SymbolType=*/0, XCOFF::C_FILE, + /*NumberOfAuxEntries=*/0); + } for (const auto &Csect : UndefinedCsects) { - writeSymbolTableEntryForControlSection(Csect, - XCOFF::ReservedSectionNum::N_UNDEF, - Csect.MCSec->getStorageClass()); + writeSymbolEntryForControlSection(Csect, XCOFF::ReservedSectionNum::N_UNDEF, + Csect.MCSec->getStorageClass()); } for (const auto *Section : Sections) { @@ -887,19 +889,19 @@ void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) { const int16_t SectionIndex = Section->Index; for (const auto &Csect : *Group) { // Write out the control section first and then each symbol in it. - writeSymbolTableEntryForControlSection(Csect, SectionIndex, - Csect.MCSec->getStorageClass()); + writeSymbolEntryForControlSection(Csect, SectionIndex, + Csect.MCSec->getStorageClass()); for (const auto &Sym : Csect.Syms) - writeSymbolTableEntryForCsectMemberLabel( + writeSymbolEntryForCsectMemberLabel( Sym, Csect, SectionIndex, Layout.getSymbolOffset(*(Sym.MCSym))); } } } for (const auto &DwarfSection : DwarfSections) - writeSymbolTableEntryForDwarfSection(*DwarfSection.DwarfSect, - DwarfSection.Index); + writeSymbolEntryForDwarfSection(*DwarfSection.DwarfSect, + DwarfSection.Index); } void XCOFFObjectWriter::finalizeSectionInfo() { @@ -914,8 +916,10 @@ void XCOFFObjectWriter::finalizeSectionInfo() { for (auto &Csect : *Group) { const size_t CsectRelocCount = Csect.Relocations.size(); - if (CsectRelocCount >= XCOFF::RelocOverflow || - Section->RelocationCount >= XCOFF::RelocOverflow - CsectRelocCount) + // An XCOFF64 file may not contain an overflow section header. + if (!is64Bit() && (CsectRelocCount >= XCOFF::RelocOverflow || + Section->RelocationCount >= + XCOFF::RelocOverflow - CsectRelocCount)) report_fatal_error( "relocation entries overflowed; overflow section is " "not implemented yet"); @@ -938,10 +942,12 @@ void XCOFFObjectWriter::finalizeSectionInfo() { return false; Sec->FileOffsetToRelocations = RawPointer; - const uint32_t RelocationSizeInSec = - Sec->RelocationCount * XCOFF::RelocationSerializationSize32; + const uint64_t RelocationSizeInSec = + Sec->RelocationCount * (is64Bit() + ? XCOFF::RelocationSerializationSize64 + : XCOFF::RelocationSerializationSize32); RawPointer += RelocationSizeInSec; - if (RawPointer > UINT32_MAX) + if (RawPointer > MaxRawDataSize) report_fatal_error("Relocation data overflowed this object file."); return true; @@ -960,8 +966,8 @@ void XCOFFObjectWriter::finalizeSectionInfo() { } void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) { - // The first symbol table entry (at index 0) is for the file name. - uint32_t SymbolTableIndex = 1; + // The symbol table starts with all the C_FILE symbols. + uint32_t SymbolTableIndex = FileNames.size(); // Calculate indices for undefined symbols. for (auto &Csect : UndefinedCsects) { @@ -976,10 +982,11 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) { // The address corrresponds to the address of sections and symbols in the // object file. We place the shared address 0 immediately after the // section header table. - uint32_t Address = 0; + uint64_t Address = 0; // Section indices are 1-based in XCOFF. int32_t SectionIndex = 1; bool HasTDataSection = false; + uint32_t PaddingsBeforeDwarf = 0; for (auto *Section : Sections) { const bool IsEmpty = @@ -1039,6 +1046,19 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) { Section->Size = Address - Section->Address; } + // Start to generate DWARF sections. Sections other than DWARF section use + // DefaultSectionAlign as the default alignment, while DWARF sections have + // their own alignments. If these two alignments are not the same, we need + // some paddings here and record the paddings bytes for FileOffsetToData + // calculation. + if (!DwarfSections.empty()) + PaddingsBeforeDwarf = + alignTo(Address, + (*DwarfSections.begin()).DwarfSect->MCSec->getAlignment()) - + Address; + + DwarfSectionEntry *LastDwarfSection = nullptr; + for (auto &DwarfSection : DwarfSections) { assert((SectionIndex <= MaxSectionIndex) && "Section index overflow!"); @@ -1066,40 +1086,52 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) { // For DWARF section, we must use the real size which may be not aligned. DwarfSection.Size = DwarfSect.Size = Layout.getSectionAddressSize(MCSec); - // Make the Address align to default alignment for follow section. - Address = alignTo(DwarfSect.Address + DwarfSect.Size, DefaultSectionAlign); + Address = DwarfSection.Address + DwarfSection.Size; + + if (LastDwarfSection) + LastDwarfSection->MemorySize = + DwarfSection.Address - LastDwarfSection->Address; + LastDwarfSection = &DwarfSection; + } + if (LastDwarfSection) { + // Make the final DWARF section address align to the default section + // alignment for follow contents. + Address = alignTo(LastDwarfSection->Address + LastDwarfSection->Size, + DefaultSectionAlign); + LastDwarfSection->MemorySize = Address - LastDwarfSection->Address; } SymbolTableEntryCount = SymbolTableIndex; // Calculate the RawPointer value for each section. - uint64_t RawPointer = XCOFF::FileHeaderSize32 + auxiliaryHeaderSize() + - SectionCount * XCOFF::SectionHeaderSize32; + uint64_t RawPointer = + (is64Bit() ? (XCOFF::FileHeaderSize64 + + SectionCount * XCOFF::SectionHeaderSize64) + : (XCOFF::FileHeaderSize32 + + SectionCount * XCOFF::SectionHeaderSize32)) + + auxiliaryHeaderSize(); + for (auto *Sec : Sections) { if (Sec->Index == SectionEntry::UninitializedIndex || Sec->IsVirtual) continue; Sec->FileOffsetToData = RawPointer; RawPointer += Sec->Size; - if (RawPointer > UINT32_MAX) + if (RawPointer > MaxRawDataSize) report_fatal_error("Section raw data overflowed this object file."); } - for (auto &DwarfSection : DwarfSections) { - // Address of csect sections are always aligned to DefaultSectionAlign, but - // address of DWARF section are aligned to Section alignment which may be - // bigger than DefaultSectionAlign, need to execlude the padding bits. - RawPointer = - alignTo(RawPointer, DwarfSection.DwarfSect->MCSec->getAlignment()); + // Increase the raw pointer for the padding bytes between csect sections and + // DWARF sections. + if (!DwarfSections.empty()) + RawPointer += PaddingsBeforeDwarf; + for (auto &DwarfSection : DwarfSections) { DwarfSection.FileOffsetToData = RawPointer; - // Some section entries, like DWARF section size is not aligned, so - // RawPointer may be not aligned. - RawPointer += DwarfSection.Size; - // Make sure RawPointer is aligned. - RawPointer = alignTo(RawPointer, DefaultSectionAlign); - assert(RawPointer <= UINT32_MAX && + RawPointer += DwarfSection.MemorySize; + + assert(RawPointer <= MaxRawDataSize && "Section raw data overflowed this object file."); } @@ -1108,7 +1140,7 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) { void XCOFFObjectWriter::writeSectionForControlSectionEntry( const MCAssembler &Asm, const MCAsmLayout &Layout, - const CsectSectionEntry &CsectEntry, uint32_t &CurrentAddressLocation) { + const CsectSectionEntry &CsectEntry, uint64_t &CurrentAddressLocation) { // Nothing to write for this Section. if (CsectEntry.Index == SectionEntry::UninitializedIndex) return; @@ -1146,7 +1178,7 @@ void XCOFFObjectWriter::writeSectionForControlSectionEntry( // The size of the tail padding in a section is the end virtual address of // the current section minus the the end virtual address of the last csect // in that section. - if (uint32_t PaddingSize = + if (uint64_t PaddingSize = CsectEntry.Address + CsectEntry.Size - CurrentAddressLocation) { W.OS.write_zeros(PaddingSize); CurrentAddressLocation += PaddingSize; @@ -1155,7 +1187,7 @@ void XCOFFObjectWriter::writeSectionForControlSectionEntry( void XCOFFObjectWriter::writeSectionForDwarfSectionEntry( const MCAssembler &Asm, const MCAsmLayout &Layout, - const DwarfSectionEntry &DwarfEntry, uint32_t &CurrentAddressLocation) { + const DwarfSectionEntry &DwarfEntry, uint64_t &CurrentAddressLocation) { // There could be a gap (without corresponding zero padding) between // sections. For example DWARF section alignment is bigger than // DefaultSectionAlign. @@ -1163,7 +1195,7 @@ void XCOFFObjectWriter::writeSectionForDwarfSectionEntry( "CurrentAddressLocation should be less than or equal to section " "address."); - if (uint32_t PaddingSize = DwarfEntry.Address - CurrentAddressLocation) + if (uint64_t PaddingSize = DwarfEntry.Address - CurrentAddressLocation) W.OS.write_zeros(PaddingSize); if (DwarfEntry.Size) diff --git a/llvm/lib/MCA/CustomBehaviour.cpp b/llvm/lib/MCA/CustomBehaviour.cpp index a9ea8edff059..a10a2f5c56f0 100644 --- a/llvm/lib/MCA/CustomBehaviour.cpp +++ b/llvm/lib/MCA/CustomBehaviour.cpp @@ -16,7 +16,7 @@ namespace llvm { namespace mca { -CustomBehaviour::~CustomBehaviour() {} +CustomBehaviour::~CustomBehaviour() = default; unsigned CustomBehaviour::checkCustomHazard(ArrayRef IssuedInst, const InstRef &IR) { diff --git a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp index 121d320f10e6..bdc8b3d0e390 100644 --- a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp +++ b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp @@ -39,7 +39,7 @@ LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ, } } -LSUnitBase::~LSUnitBase() {} +LSUnitBase::~LSUnitBase() = default; void LSUnitBase::cycleEvent() { for (const std::pair> &G : Groups) @@ -67,17 +67,17 @@ void LSUnitBase::dump() const { #endif unsigned LSUnit::dispatch(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - bool IsStoreBarrier = IR.getInstruction()->isAStoreBarrier(); - bool IsLoadBarrier = IR.getInstruction()->isALoadBarrier(); - assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!"); + const Instruction &IS = *IR.getInstruction(); + bool IsStoreBarrier = IS.isAStoreBarrier(); + bool IsLoadBarrier = IS.isALoadBarrier(); + assert((IS.getMayLoad() || IS.getMayStore()) && "Not a memory operation!"); - if (Desc.MayLoad) + if (IS.getMayLoad()) acquireLQSlot(); - if (Desc.MayStore) + if (IS.getMayStore()) acquireSQSlot(); - if (Desc.MayStore) { + if (IS.getMayStore()) { unsigned NewGID = createMemoryGroup(); MemoryGroup &NewGroup = getGroup(NewGID); NewGroup.addInstruction(); @@ -115,7 +115,7 @@ unsigned LSUnit::dispatch(const InstRef &IR) { if (IsStoreBarrier) CurrentStoreBarrierGroupID = NewGID; - if (Desc.MayLoad) { + if (IS.getMayLoad()) { CurrentLoadGroupID = NewGID; if (IsLoadBarrier) CurrentLoadBarrierGroupID = NewGID; @@ -124,7 +124,7 @@ unsigned LSUnit::dispatch(const InstRef &IR) { return NewGID; } - assert(Desc.MayLoad && "Expected a load!"); + assert(IS.getMayLoad() && "Expected a load!"); unsigned ImmediateLoadDominator = std::max(CurrentLoadGroupID, CurrentLoadBarrierGroupID); @@ -194,10 +194,10 @@ unsigned LSUnit::dispatch(const InstRef &IR) { } LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - if (Desc.MayLoad && isLQFull()) + const Instruction &IS = *IR.getInstruction(); + if (IS.getMayLoad() && isLQFull()) return LSUnit::LSU_LQUEUE_FULL; - if (Desc.MayStore && isSQFull()) + if (IS.getMayStore() && isSQFull()) return LSUnit::LSU_SQUEUE_FULL; return LSUnit::LSU_AVAILABLE; } @@ -212,9 +212,9 @@ void LSUnitBase::onInstructionExecuted(const InstRef &IR) { } void LSUnitBase::onInstructionRetired(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - bool IsALoad = Desc.MayLoad; - bool IsAStore = Desc.MayStore; + const Instruction &IS = *IR.getInstruction(); + bool IsALoad = IS.getMayLoad(); + bool IsAStore = IS.getMayStore(); assert((IsALoad || IsAStore) && "Expected a memory operation!"); if (IsALoad) { diff --git a/llvm/lib/MCA/IncrementalSourceMgr.cpp b/llvm/lib/MCA/IncrementalSourceMgr.cpp new file mode 100644 index 000000000000..10b86b501a2e --- /dev/null +++ b/llvm/lib/MCA/IncrementalSourceMgr.cpp @@ -0,0 +1,51 @@ +//===-------------------- IncrementalSourceMgr.cpp ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines some implementations for IncrementalSourceMgr. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/IncrementalSourceMgr.h" +#ifndef NDEBUG +#include "llvm/Support/Format.h" +#endif + +using namespace llvm; +using namespace llvm::mca; + +void IncrementalSourceMgr::clear() { + Staging.clear(); + InstStorage.clear(); + TotalCounter = 0U; + EOS = false; +} + +void IncrementalSourceMgr::updateNext() { + ++TotalCounter; + Instruction *I = Staging.front(); + Staging.pop_front(); + I->reset(); + + if (InstFreedCB) + InstFreedCB(I); +} + +#ifndef NDEBUG +void IncrementalSourceMgr::printStatistic(raw_ostream &OS) { + unsigned MaxInstStorageSize = InstStorage.size(); + if (MaxInstStorageSize <= TotalCounter) { + auto Ratio = double(MaxInstStorageSize) / double(TotalCounter); + OS << "Cache ratio = " << MaxInstStorageSize << " / " << TotalCounter + << llvm::format(" (%.2f%%)", (1.0 - Ratio) * 100.0) << "\n"; + } else { + OS << "Error: Number of created instructions " + << "are larger than the number of issued instructions\n"; + } +} +#endif diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp index d8283f8d2682..45acea253587 100644 --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -14,16 +14,19 @@ #include "llvm/MCA/InstrBuilder.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Debug.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" -#define DEBUG_TYPE "llvm-mca" +#define DEBUG_TYPE "llvm-mca-instrbuilder" namespace llvm { namespace mca { +char RecycledInstErr::ID = 0; + InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii, const llvm::MCRegisterInfo &mri, @@ -572,6 +575,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) { LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); + LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n'); // Create a new empty descriptor. std::unique_ptr ID = std::make_unique(); @@ -593,13 +597,6 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) { FirstReturnInst = false; } - ID->MayLoad = MCDesc.mayLoad(); - ID->MayStore = MCDesc.mayStore(); - ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects(); - ID->BeginGroup = SCDesc.BeginGroup; - ID->EndGroup = SCDesc.EndGroup; - ID->RetireOOO = SCDesc.RetireOOO; - initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); computeMaxLatency(*ID, MCDesc, SCDesc, STI); @@ -618,7 +615,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) { // Now add the new descriptor. bool IsVariadic = MCDesc.isVariadic(); - if (!IsVariadic && !IsVariant) { + if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) { Descriptors[MCI.getOpcode()] = std::move(ID); return *Descriptors[MCI.getOpcode()]; } @@ -638,14 +635,43 @@ InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { return createInstrDescImpl(MCI); } +STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc"); + Expected> InstrBuilder::createInstruction(const MCInst &MCI) { Expected DescOrErr = getOrCreateInstrDesc(MCI); if (!DescOrErr) return DescOrErr.takeError(); const InstrDesc &D = *DescOrErr; - std::unique_ptr NewIS = - std::make_unique(D, MCI.getOpcode()); + Instruction *NewIS = nullptr; + std::unique_ptr CreatedIS; + bool IsInstRecycled = false; + + if (!D.IsRecyclable) + ++NumVariantInst; + + if (D.IsRecyclable && InstRecycleCB) { + if (auto *I = InstRecycleCB(D)) { + NewIS = I; + NewIS->reset(); + IsInstRecycled = true; + } + } + if (!IsInstRecycled) { + CreatedIS = std::make_unique(D, MCI.getOpcode()); + NewIS = CreatedIS.get(); + } + + const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); + const MCSchedClassDesc &SCDesc = + *STI.getSchedModel().getSchedClassDesc(D.SchedClassID); + + NewIS->setMayLoad(MCDesc.mayLoad()); + NewIS->setMayStore(MCDesc.mayStore()); + NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects()); + NewIS->setBeginGroup(SCDesc.BeginGroup); + NewIS->setEndGroup(SCDesc.EndGroup); + NewIS->setRetireOOO(SCDesc.RetireOOO); // Check if this is a dependency breaking instruction. APInt Mask; @@ -663,6 +689,7 @@ InstrBuilder::createInstruction(const MCInst &MCI) { // Initialize Reads first. MCPhysReg RegID = 0; + size_t Idx = 0U; for (const ReadDescriptor &RD : D.Reads) { if (!RD.isImplicitRead()) { // explicit read. @@ -681,15 +708,22 @@ InstrBuilder::createInstruction(const MCInst &MCI) { continue; // Okay, this is a register operand. Create a ReadState for it. - NewIS->getUses().emplace_back(RD, RegID); - ReadState &RS = NewIS->getUses().back(); + ReadState *RS = nullptr; + if (IsInstRecycled && Idx < NewIS->getUses().size()) { + NewIS->getUses()[Idx] = ReadState(RD, RegID); + RS = &NewIS->getUses()[Idx++]; + } else { + NewIS->getUses().emplace_back(RD, RegID); + RS = &NewIS->getUses().back(); + ++Idx; + } if (IsDepBreaking) { // A mask of all zeroes means: explicit input operands are not // independent. if (Mask.isZero()) { if (!RD.isImplicitRead()) - RS.setIndependentFromDef(); + RS->setIndependentFromDef(); } else { // Check if this register operand is independent according to `Mask`. // Note that Mask may not have enough bits to describe all explicit and @@ -699,15 +733,21 @@ InstrBuilder::createInstruction(const MCInst &MCI) { if (Mask.getBitWidth() > RD.UseIndex) { // Okay. This map describe register use `RD.UseIndex`. if (Mask[RD.UseIndex]) - RS.setIndependentFromDef(); + RS->setIndependentFromDef(); } } } } + if (IsInstRecycled && Idx < NewIS->getUses().size()) + NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx); // Early exit if there are no writes. - if (D.Writes.empty()) - return std::move(NewIS); + if (D.Writes.empty()) { + if (IsInstRecycled) + return llvm::make_error(NewIS); + else + return std::move(CreatedIS); + } // Track register writes that implicitly clear the upper portion of the // underlying super-registers using an APInt. @@ -720,6 +760,7 @@ InstrBuilder::createInstruction(const MCInst &MCI) { // Initialize writes. unsigned WriteIndex = 0; + Idx = 0U; for (const WriteDescriptor &WD : D.Writes) { RegID = WD.isImplicitWrite() ? WD.RegisterID : MCI.getOperand(WD.OpIndex).getReg(); @@ -730,13 +771,26 @@ InstrBuilder::createInstruction(const MCInst &MCI) { } assert(RegID && "Expected a valid register ID!"); - NewIS->getDefs().emplace_back(WD, RegID, - /* ClearsSuperRegs */ WriteMask[WriteIndex], - /* WritesZero */ IsZeroIdiom); + if (IsInstRecycled && Idx < NewIS->getDefs().size()) { + NewIS->getDefs()[Idx++] = + WriteState(WD, RegID, + /* ClearsSuperRegs */ WriteMask[WriteIndex], + /* WritesZero */ IsZeroIdiom); + } else { + NewIS->getDefs().emplace_back(WD, RegID, + /* ClearsSuperRegs */ WriteMask[WriteIndex], + /* WritesZero */ IsZeroIdiom); + ++Idx; + } ++WriteIndex; } + if (IsInstRecycled && Idx < NewIS->getDefs().size()) + NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx); - return std::move(NewIS); + if (IsInstRecycled) + return llvm::make_error(NewIS); + else + return std::move(CreatedIS); } } // namespace mca } // namespace llvm diff --git a/llvm/lib/MCA/Instruction.cpp b/llvm/lib/MCA/Instruction.cpp index e658b869a67e..d4adfce59713 100644 --- a/llvm/lib/MCA/Instruction.cpp +++ b/llvm/lib/MCA/Instruction.cpp @@ -148,6 +148,18 @@ const CriticalDependency &Instruction::computeCriticalRegDep() { return CriticalRegDep; } +void Instruction::reset() { + // Note that this won't clear read/write descriptors + // or other non-trivial fields + Stage = IS_INVALID; + CyclesLeft = UNKNOWN_CYCLES; + clearOptimizableMove(); + RCUTokenID = 0; + LSUTokenID = 0; + CriticalResourceMask = 0; + IsEliminated = false; +} + void Instruction::dispatch(unsigned RCUToken) { assert(Stage == IS_INVALID); Stage = IS_DISPATCHED; diff --git a/llvm/lib/MCA/Pipeline.cpp b/llvm/lib/MCA/Pipeline.cpp index 22b9d0799f77..c94fe1422a69 100644 --- a/llvm/lib/MCA/Pipeline.cpp +++ b/llvm/lib/MCA/Pipeline.cpp @@ -38,7 +38,8 @@ Expected Pipeline::run() { assert(!Stages.empty() && "Unexpected empty pipeline found!"); do { - notifyCycleBegin(); + if (!isPaused()) + notifyCycleBegin(); if (Error Err = runCycle()) return std::move(Err); notifyCycleEnd(); @@ -53,15 +54,25 @@ Error Pipeline::runCycle() { // Update stages before we start processing new instructions. for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) { const std::unique_ptr &S = *I; - Err = S->cycleStart(); + if (isPaused()) + Err = S->cycleResume(); + else + Err = S->cycleStart(); } + CurrentState = State::Started; + // Now fetch and execute new instructions. InstRef IR; Stage &FirstStage = *Stages[0]; while (!Err && FirstStage.isAvailable(IR)) Err = FirstStage.execute(IR); + if (Err.isA()) { + CurrentState = State::Paused; + return Err; + } + // Update stages in preparation for a new cycle. for (const std::unique_ptr &S : Stages) { Err = S->cycleEnd(); diff --git a/llvm/lib/MCA/Stages/DispatchStage.cpp b/llvm/lib/MCA/Stages/DispatchStage.cpp index 66228bd5a862..10e433bf1689 100644 --- a/llvm/lib/MCA/Stages/DispatchStage.cpp +++ b/llvm/lib/MCA/Stages/DispatchStage.cpp @@ -78,7 +78,6 @@ bool DispatchStage::canDispatch(const InstRef &IR) const { Error DispatchStage::dispatch(InstRef IR) { assert(!CarryOver && "Cannot dispatch another instruction!"); Instruction &IS = *IR.getInstruction(); - const InstrDesc &Desc = IS.getDesc(); const unsigned NumMicroOps = IS.getNumMicroOps(); if (NumMicroOps > DispatchWidth) { assert(AvailableEntries == DispatchWidth); @@ -91,7 +90,7 @@ Error DispatchStage::dispatch(InstRef IR) { } // Check if this instructions ends the dispatch group. - if (Desc.EndGroup) + if (IS.getEndGroup()) AvailableEntries = 0; // Check if this is an optimizable reg-reg move or an XCHG-like instruction. @@ -159,12 +158,11 @@ bool DispatchStage::isAvailable(const InstRef &IR) const { const Instruction &Inst = *IR.getInstruction(); unsigned NumMicroOps = Inst.getNumMicroOps(); - const InstrDesc &Desc = Inst.getDesc(); unsigned Required = std::min(NumMicroOps, DispatchWidth); if (Required > AvailableEntries) return false; - if (Desc.BeginGroup && AvailableEntries != DispatchWidth) + if (Inst.getBeginGroup() && AvailableEntries != DispatchWidth) return false; // The dispatch logic doesn't internally buffer instructions. It only accepts diff --git a/llvm/lib/MCA/Stages/EntryStage.cpp b/llvm/lib/MCA/Stages/EntryStage.cpp index 66135790a4cd..6b3fbb8c6236 100644 --- a/llvm/lib/MCA/Stages/EntryStage.cpp +++ b/llvm/lib/MCA/Stages/EntryStage.cpp @@ -19,7 +19,7 @@ namespace llvm { namespace mca { bool EntryStage::hasWorkToComplete() const { - return static_cast(CurrentInstruction); + return static_cast(CurrentInstruction) || !SM.isEnd(); } bool EntryStage::isAvailable(const InstRef & /* unused */) const { @@ -28,15 +28,20 @@ bool EntryStage::isAvailable(const InstRef & /* unused */) const { return false; } -void EntryStage::getNextInstruction() { +Error EntryStage::getNextInstruction() { assert(!CurrentInstruction && "There is already an instruction to process!"); - if (!SM.hasNext()) - return; + if (!SM.hasNext()) { + if (!SM.isEnd()) + return llvm::make_error(); + else + return llvm::ErrorSuccess(); + } SourceRef SR = SM.peekNext(); std::unique_ptr Inst = std::make_unique(SR.second); CurrentInstruction = InstRef(SR.first, Inst.get()); Instructions.emplace_back(std::move(Inst)); SM.updateNext(); + return llvm::ErrorSuccess(); } llvm::Error EntryStage::execute(InstRef & /*unused */) { @@ -46,16 +51,20 @@ llvm::Error EntryStage::execute(InstRef & /*unused */) { // Move the program counter. CurrentInstruction.invalidate(); - getNextInstruction(); - return llvm::ErrorSuccess(); + return getNextInstruction(); } llvm::Error EntryStage::cycleStart() { if (!CurrentInstruction) - getNextInstruction(); + return getNextInstruction(); return llvm::ErrorSuccess(); } +llvm::Error EntryStage::cycleResume() { + assert(!CurrentInstruction); + return getNextInstruction(); +} + llvm::Error EntryStage::cycleEnd() { // Find the first instruction which hasn't been retired. auto Range = make_range(&Instructions[NumRetired], Instructions.end()); diff --git a/llvm/lib/MCA/Stages/ExecuteStage.cpp b/llvm/lib/MCA/Stages/ExecuteStage.cpp index 2b11f73b19df..369e2f5a4ef1 100644 --- a/llvm/lib/MCA/Stages/ExecuteStage.cpp +++ b/llvm/lib/MCA/Stages/ExecuteStage.cpp @@ -165,8 +165,8 @@ static void verifyInstructionEliminated(const InstRef &IR) { // Ensure that instructions eliminated at register renaming stage are in a // consistent state. - const InstrDesc &Desc = Inst.getDesc(); - assert(!Desc.MayLoad && !Desc.MayStore && "Cannot eliminate a memory op!"); + assert(!Inst.getMayLoad() && !Inst.getMayStore() && + "Cannot eliminate a memory op!"); } #endif diff --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp index abfbc80f17c9..0f1737dc3cbc 100644 --- a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp +++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp @@ -63,7 +63,6 @@ bool InOrderIssueStage::isAvailable(const InstRef &IR) const { const Instruction &Inst = *IR.getInstruction(); unsigned NumMicroOps = Inst.getNumMicroOps(); - const InstrDesc &Desc = Inst.getDesc(); bool ShouldCarryOver = NumMicroOps > getIssueWidth(); if (Bandwidth < NumMicroOps && !ShouldCarryOver) @@ -71,7 +70,7 @@ bool InOrderIssueStage::isAvailable(const InstRef &IR) const { // Instruction with BeginGroup must be the first instruction to be issued in a // cycle. - if (Desc.BeginGroup && NumIssued != 0) + if (Inst.getBeginGroup() && NumIssued != 0) return false; return true; @@ -140,7 +139,7 @@ bool InOrderIssueStage::canExecute(const InstRef &IR) { } if (LastWriteBackCycle) { - if (!IR.getInstruction()->getDesc().RetireOOO) { + if (!IR.getInstruction()->getRetireOOO()) { unsigned NextWriteBackCycle = findFirstWriteBackCycle(IR); // Delay the instruction to ensure that writes happen in program order. if (NextWriteBackCycle < LastWriteBackCycle) { @@ -254,7 +253,7 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR) { LLVM_DEBUG(dbgs() << "[N] Carry over #" << IR << " \n"); } else { NumIssued += NumMicroOps; - Bandwidth = Desc.EndGroup ? 0 : Bandwidth - NumMicroOps; + Bandwidth = IS.getEndGroup() ? 0 : Bandwidth - NumMicroOps; } // If the instruction has a latency of 0, we need to handle @@ -272,7 +271,7 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR) { IssuedInst.push_back(IR); - if (!IR.getInstruction()->getDesc().RetireOOO) + if (!IR.getInstruction()->getRetireOOO()) LastWriteBackCycle = IS.getCyclesLeft(); return llvm::ErrorSuccess(); @@ -325,7 +324,7 @@ void InOrderIssueStage::updateCarriedOver() { LLVM_DEBUG(dbgs() << "[N] Carry over (complete) #" << CarriedOver << " \n"); - if (CarriedOver.getInstruction()->getDesc().EndGroup) + if (CarriedOver.getInstruction()->getEndGroup()) Bandwidth = 0; else Bandwidth -= CarryOver; diff --git a/llvm/lib/MCA/Stages/Stage.cpp b/llvm/lib/MCA/Stages/Stage.cpp index ed512ac9711c..5613d4d6bd07 100644 --- a/llvm/lib/MCA/Stages/Stage.cpp +++ b/llvm/lib/MCA/Stages/Stage.cpp @@ -24,5 +24,6 @@ void Stage::addListener(HWEventListener *Listener) { Listeners.insert(Listener); } +char InstStreamPause::ID = 0; } // namespace mca } // namespace llvm diff --git a/llvm/lib/ObjCopy/Archive.cpp b/llvm/lib/ObjCopy/Archive.cpp new file mode 100644 index 000000000000..742ca0b890cf --- /dev/null +++ b/llvm/lib/ObjCopy/Archive.cpp @@ -0,0 +1,110 @@ +//===- Archive.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Archive.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/MultiFormatConfig.h" +#include "llvm/ObjCopy/ObjCopy.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" + +namespace llvm { +namespace objcopy { + +using namespace llvm::object; + +Expected> +createNewArchiveMembers(const MultiFormatConfig &Config, const Archive &Ar) { + std::vector NewArchiveMembers; + Error Err = Error::success(); + for (const Archive::Child &Child : Ar.children(Err)) { + Expected ChildNameOrErr = Child.getName(); + if (!ChildNameOrErr) + return createFileError(Ar.getFileName(), ChildNameOrErr.takeError()); + + Expected> ChildOrErr = Child.getAsBinary(); + if (!ChildOrErr) + return createFileError(Ar.getFileName() + "(" + *ChildNameOrErr + ")", + ChildOrErr.takeError()); + + SmallVector Buffer; + raw_svector_ostream MemStream(Buffer); + + if (Error E = executeObjcopyOnBinary(Config, *ChildOrErr->get(), MemStream)) + return std::move(E); + + Expected Member = NewArchiveMember::getOldMember( + Child, Config.getCommonConfig().DeterministicArchives); + if (!Member) + return createFileError(Ar.getFileName(), Member.takeError()); + + Member->Buf = std::make_unique( + std::move(Buffer), ChildNameOrErr.get()); + Member->MemberName = Member->Buf->getBufferIdentifier(); + NewArchiveMembers.push_back(std::move(*Member)); + } + if (Err) + return createFileError(Config.getCommonConfig().InputFilename, + std::move(Err)); + return std::move(NewArchiveMembers); +} + +// For regular archives this function simply calls llvm::writeArchive, +// For thin archives it writes the archive file itself as well as its members. +static Error deepWriteArchive(StringRef ArcName, + ArrayRef NewMembers, + bool WriteSymtab, object::Archive::Kind Kind, + bool Deterministic, bool Thin) { + if (Kind == object::Archive::K_BSD && !NewMembers.empty() && + NewMembers.front().detectKindFromObject() == object::Archive::K_DARWIN) + Kind = object::Archive::K_DARWIN; + + if (Error E = writeArchive(ArcName, NewMembers, WriteSymtab, Kind, + Deterministic, Thin)) + return createFileError(ArcName, std::move(E)); + + if (!Thin) + return Error::success(); + + for (const NewArchiveMember &Member : NewMembers) { + // For regular files (as is the case for deepWriteArchive), + // FileOutputBuffer::create will return OnDiskBuffer. + // OnDiskBuffer uses a temporary file and then renames it. So in reality + // there is no inefficiency / duplicated in-memory buffers in this case. For + // now in-memory buffers can not be completely avoided since + // NewArchiveMember still requires them even though writeArchive does not + // write them on disk. + Expected> FB = + FileOutputBuffer::create(Member.MemberName, Member.Buf->getBufferSize(), + FileOutputBuffer::F_executable); + if (!FB) + return FB.takeError(); + std::copy(Member.Buf->getBufferStart(), Member.Buf->getBufferEnd(), + (*FB)->getBufferStart()); + if (Error E = (*FB)->commit()) + return E; + } + return Error::success(); +} + +Error executeObjcopyOnArchive(const MultiFormatConfig &Config, + const object::Archive &Ar) { + Expected> NewArchiveMembersOrErr = + createNewArchiveMembers(Config, Ar); + if (!NewArchiveMembersOrErr) + return NewArchiveMembersOrErr.takeError(); + const CommonConfig &CommonConfig = Config.getCommonConfig(); + return deepWriteArchive(CommonConfig.OutputFilename, *NewArchiveMembersOrErr, + Ar.hasSymbolTable(), Ar.kind(), + CommonConfig.DeterministicArchives, Ar.isThin()); +} + +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/Archive.h b/llvm/lib/ObjCopy/Archive.h new file mode 100644 index 000000000000..08aae563505c --- /dev/null +++ b/llvm/lib/ObjCopy/Archive.h @@ -0,0 +1,31 @@ +//===- Archive.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_ARCHIVE_H +#define LLVM_LIB_OBJCOPY_ARCHIVE_H + +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { +namespace objcopy { + +class MultiFormatConfig; + +/// Applies the transformations described by \p Config to +/// each member in archive \p Ar. +/// \returns Vector of transformed archive members. +Expected> +createNewArchiveMembers(const MultiFormatConfig &Config, + const object::Archive &Ar); + +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_ARCHIVE_H diff --git a/llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp b/llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp new file mode 100644 index 000000000000..cda93ce0fb3c --- /dev/null +++ b/llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp @@ -0,0 +1,311 @@ +//===- COFFObjcopy.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/COFF/COFFObjcopy.h" +#include "COFFObject.h" +#include "COFFReader.h" +#include "COFFWriter.h" +#include "llvm/ObjCopy/COFF/COFFConfig.h" +#include "llvm/ObjCopy/CommonConfig.h" + +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/CRC.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Path.h" +#include + +namespace llvm { +namespace objcopy { +namespace coff { + +using namespace object; +using namespace COFF; + +static bool isDebugSection(const Section &Sec) { + return Sec.Name.startswith(".debug"); +} + +static uint64_t getNextRVA(const Object &Obj) { + if (Obj.getSections().empty()) + return 0; + const Section &Last = Obj.getSections().back(); + return alignTo(Last.Header.VirtualAddress + Last.Header.VirtualSize, + Obj.IsPE ? Obj.PeHeader.SectionAlignment : 1); +} + +static Expected> +createGnuDebugLinkSectionContents(StringRef File) { + ErrorOr> LinkTargetOrErr = + MemoryBuffer::getFile(File); + if (!LinkTargetOrErr) + return createFileError(File, LinkTargetOrErr.getError()); + auto LinkTarget = std::move(*LinkTargetOrErr); + uint32_t CRC32 = llvm::crc32(arrayRefFromStringRef(LinkTarget->getBuffer())); + + StringRef FileName = sys::path::filename(File); + size_t CRCPos = alignTo(FileName.size() + 1, 4); + std::vector Data(CRCPos + 4); + memcpy(Data.data(), FileName.data(), FileName.size()); + support::endian::write32le(Data.data() + CRCPos, CRC32); + return Data; +} + +// Adds named section with given contents to the object. +static void addSection(Object &Obj, StringRef Name, ArrayRef Contents, + uint32_t Characteristics) { + bool NeedVA = Characteristics & (IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE); + + Section Sec; + Sec.setOwnedContents(Contents); + Sec.Name = Name; + Sec.Header.VirtualSize = NeedVA ? Sec.getContents().size() : 0u; + Sec.Header.VirtualAddress = NeedVA ? getNextRVA(Obj) : 0u; + Sec.Header.SizeOfRawData = + NeedVA ? alignTo(Sec.Header.VirtualSize, + Obj.IsPE ? Obj.PeHeader.FileAlignment : 1) + : Sec.getContents().size(); + // Sec.Header.PointerToRawData is filled in by the writer. + Sec.Header.PointerToRelocations = 0; + Sec.Header.PointerToLinenumbers = 0; + // Sec.Header.NumberOfRelocations is filled in by the writer. + Sec.Header.NumberOfLinenumbers = 0; + Sec.Header.Characteristics = Characteristics; + + Obj.addSections(Sec); +} + +static Error addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) { + Expected> Contents = + createGnuDebugLinkSectionContents(DebugLinkFile); + if (!Contents) + return Contents.takeError(); + + addSection(Obj, ".gnu_debuglink", *Contents, + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_DISCARDABLE); + + return Error::success(); +} + +static uint32_t flagsToCharacteristics(SectionFlag AllFlags, uint32_t OldChar) { + // Need to preserve alignment flags. + const uint32_t PreserveMask = + IMAGE_SCN_ALIGN_1BYTES | IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_ALIGN_4BYTES | + IMAGE_SCN_ALIGN_8BYTES | IMAGE_SCN_ALIGN_16BYTES | + IMAGE_SCN_ALIGN_32BYTES | IMAGE_SCN_ALIGN_64BYTES | + IMAGE_SCN_ALIGN_128BYTES | IMAGE_SCN_ALIGN_256BYTES | + IMAGE_SCN_ALIGN_512BYTES | IMAGE_SCN_ALIGN_1024BYTES | + IMAGE_SCN_ALIGN_2048BYTES | IMAGE_SCN_ALIGN_4096BYTES | + IMAGE_SCN_ALIGN_8192BYTES; + + // Setup new section characteristics based on the flags provided in command + // line. + uint32_t NewCharacteristics = (OldChar & PreserveMask) | IMAGE_SCN_MEM_READ; + + if ((AllFlags & SectionFlag::SecAlloc) && !(AllFlags & SectionFlag::SecLoad)) + NewCharacteristics |= IMAGE_SCN_CNT_UNINITIALIZED_DATA; + if (AllFlags & SectionFlag::SecNoload) + NewCharacteristics |= IMAGE_SCN_LNK_REMOVE; + if (!(AllFlags & SectionFlag::SecReadonly)) + NewCharacteristics |= IMAGE_SCN_MEM_WRITE; + if (AllFlags & SectionFlag::SecDebug) + NewCharacteristics |= + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_DISCARDABLE; + if (AllFlags & SectionFlag::SecCode) + NewCharacteristics |= IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE; + if (AllFlags & SectionFlag::SecData) + NewCharacteristics |= IMAGE_SCN_CNT_INITIALIZED_DATA; + if (AllFlags & SectionFlag::SecShare) + NewCharacteristics |= IMAGE_SCN_MEM_SHARED; + if (AllFlags & SectionFlag::SecExclude) + NewCharacteristics |= IMAGE_SCN_LNK_REMOVE; + + return NewCharacteristics; +} + +static Error handleArgs(const CommonConfig &Config, + const COFFConfig &COFFConfig, Object &Obj) { + // Perform the actual section removals. + Obj.removeSections([&Config](const Section &Sec) { + // Contrary to --only-keep-debug, --only-section fully removes sections that + // aren't mentioned. + if (!Config.OnlySection.empty() && !Config.OnlySection.matches(Sec.Name)) + return true; + + if (Config.StripDebug || Config.StripAll || Config.StripAllGNU || + Config.DiscardMode == DiscardType::All || Config.StripUnneeded) { + if (isDebugSection(Sec) && + (Sec.Header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) != 0) + return true; + } + + if (Config.ToRemove.matches(Sec.Name)) + return true; + + return false; + }); + + if (Config.OnlyKeepDebug) { + // For --only-keep-debug, we keep all other sections, but remove their + // content. The VirtualSize field in the section header is kept intact. + Obj.truncateSections([](const Section &Sec) { + return !isDebugSection(Sec) && Sec.Name != ".buildid" && + ((Sec.Header.Characteristics & + (IMAGE_SCN_CNT_CODE | IMAGE_SCN_CNT_INITIALIZED_DATA)) != 0); + }); + } + + // StripAll removes all symbols and thus also removes all relocations. + if (Config.StripAll || Config.StripAllGNU) + for (Section &Sec : Obj.getMutableSections()) + Sec.Relocs.clear(); + + // If we need to do per-symbol removals, initialize the Referenced field. + if (Config.StripUnneeded || Config.DiscardMode == DiscardType::All || + !Config.SymbolsToRemove.empty()) + if (Error E = Obj.markSymbols()) + return E; + + for (Symbol &Sym : Obj.getMutableSymbols()) { + auto I = Config.SymbolsToRename.find(Sym.Name); + if (I != Config.SymbolsToRename.end()) + Sym.Name = I->getValue(); + } + + auto ToRemove = [&](const Symbol &Sym) -> Expected { + // For StripAll, all relocations have been stripped and we remove all + // symbols. + if (Config.StripAll || Config.StripAllGNU) + return true; + + if (Config.SymbolsToRemove.matches(Sym.Name)) { + // Explicitly removing a referenced symbol is an error. + if (Sym.Referenced) + return createStringError( + llvm::errc::invalid_argument, + "'" + Config.OutputFilename + "': not stripping symbol '" + + Sym.Name.str() + "' because it is named in a relocation"); + return true; + } + + if (!Sym.Referenced) { + // With --strip-unneeded, GNU objcopy removes all unreferenced local + // symbols, and any unreferenced undefined external. + // With --strip-unneeded-symbol we strip only specific unreferenced + // local symbol instead of removing all of such. + if (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC || + Sym.Sym.SectionNumber == 0) + if (Config.StripUnneeded || + Config.UnneededSymbolsToRemove.matches(Sym.Name)) + return true; + + // GNU objcopy keeps referenced local symbols and external symbols + // if --discard-all is set, similar to what --strip-unneeded does, + // but undefined local symbols are kept when --discard-all is set. + if (Config.DiscardMode == DiscardType::All && + Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC && + Sym.Sym.SectionNumber != 0) + return true; + } + + return false; + }; + + // Actually do removals of symbols. + if (Error Err = Obj.removeSymbols(ToRemove)) + return Err; + + if (!Config.SetSectionFlags.empty()) + for (Section &Sec : Obj.getMutableSections()) { + const auto It = Config.SetSectionFlags.find(Sec.Name); + if (It != Config.SetSectionFlags.end()) + Sec.Header.Characteristics = flagsToCharacteristics( + It->second.NewFlags, Sec.Header.Characteristics); + } + + for (const NewSectionInfo &NewSection : Config.AddSection) { + uint32_t Characteristics; + const auto It = Config.SetSectionFlags.find(NewSection.SectionName); + if (It != Config.SetSectionFlags.end()) + Characteristics = flagsToCharacteristics(It->second.NewFlags, 0); + else + Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_ALIGN_1BYTES; + + addSection(Obj, NewSection.SectionName, + makeArrayRef(reinterpret_cast( + NewSection.SectionData->getBufferStart()), + NewSection.SectionData->getBufferSize()), + Characteristics); + } + + for (const NewSectionInfo &NewSection : Config.UpdateSection) { + auto It = llvm::find_if(Obj.getMutableSections(), [&](auto &Sec) { + return Sec.Name == NewSection.SectionName; + }); + if (It == Obj.getMutableSections().end()) + return createStringError(errc::invalid_argument, + "could not find section with name '%s'", + NewSection.SectionName.str().c_str()); + size_t ContentSize = It->getContents().size(); + if (!ContentSize) + return createStringError( + errc::invalid_argument, + "section '%s' cannot be updated because it does not have contents", + NewSection.SectionName.str().c_str()); + if (ContentSize < NewSection.SectionData->getBufferSize()) + return createStringError( + errc::invalid_argument, + "new section cannot be larger than previous section"); + It->setOwnedContents({NewSection.SectionData->getBufferStart(), + NewSection.SectionData->getBufferEnd()}); + } + + if (!Config.AddGnuDebugLink.empty()) + if (Error E = addGnuDebugLink(Obj, Config.AddGnuDebugLink)) + return E; + + if (COFFConfig.Subsystem || COFFConfig.MajorSubsystemVersion || + COFFConfig.MinorSubsystemVersion) { + if (!Obj.IsPE) + return createStringError( + errc::invalid_argument, + "'" + Config.OutputFilename + + "': unable to set subsystem on a relocatable object file"); + if (COFFConfig.Subsystem) + Obj.PeHeader.Subsystem = *COFFConfig.Subsystem; + if (COFFConfig.MajorSubsystemVersion) + Obj.PeHeader.MajorSubsystemVersion = *COFFConfig.MajorSubsystemVersion; + if (COFFConfig.MinorSubsystemVersion) + Obj.PeHeader.MinorSubsystemVersion = *COFFConfig.MinorSubsystemVersion; + } + + return Error::success(); +} + +Error executeObjcopyOnBinary(const CommonConfig &Config, + const COFFConfig &COFFConfig, COFFObjectFile &In, + raw_ostream &Out) { + COFFReader Reader(In); + Expected> ObjOrErr = Reader.create(); + if (!ObjOrErr) + return createFileError(Config.InputFilename, ObjOrErr.takeError()); + Object *Obj = ObjOrErr->get(); + assert(Obj && "Unable to deserialize COFF object"); + if (Error E = handleArgs(Config, COFFConfig, *Obj)) + return createFileError(Config.InputFilename, std::move(E)); + COFFWriter Writer(*Obj, Out); + if (Error E = Writer.write()) + return createFileError(Config.OutputFilename, std::move(E)); + return Error::success(); +} + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/COFF/COFFObject.cpp b/llvm/lib/ObjCopy/COFF/COFFObject.cpp new file mode 100644 index 000000000000..1d27b7eaa891 --- /dev/null +++ b/llvm/lib/ObjCopy/COFF/COFFObject.cpp @@ -0,0 +1,132 @@ +//===- COFFObject.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "COFFObject.h" +#include "llvm/ADT/DenseSet.h" +#include + +namespace llvm { +namespace objcopy { +namespace coff { + +using namespace object; + +void Object::addSymbols(ArrayRef NewSymbols) { + for (Symbol S : NewSymbols) { + S.UniqueId = NextSymbolUniqueId++; + Symbols.emplace_back(S); + } + updateSymbols(); +} + +void Object::updateSymbols() { + SymbolMap = DenseMap(Symbols.size()); + for (Symbol &Sym : Symbols) + SymbolMap[Sym.UniqueId] = &Sym; +} + +const Symbol *Object::findSymbol(size_t UniqueId) const { + return SymbolMap.lookup(UniqueId); +} + +Error Object::removeSymbols( + function_ref(const Symbol &)> ToRemove) { + Error Errs = Error::success(); + llvm::erase_if(Symbols, [ToRemove, &Errs](const Symbol &Sym) { + Expected ShouldRemove = ToRemove(Sym); + if (!ShouldRemove) { + Errs = joinErrors(std::move(Errs), ShouldRemove.takeError()); + return false; + } + return *ShouldRemove; + }); + + updateSymbols(); + return Errs; +} + +Error Object::markSymbols() { + for (Symbol &Sym : Symbols) + Sym.Referenced = false; + for (const Section &Sec : Sections) { + for (const Relocation &R : Sec.Relocs) { + auto It = SymbolMap.find(R.Target); + if (It == SymbolMap.end()) + return createStringError(object_error::invalid_symbol_index, + "relocation target %zu not found", R.Target); + It->second->Referenced = true; + } + } + return Error::success(); +} + +void Object::addSections(ArrayRef
NewSections) { + for (Section S : NewSections) { + S.UniqueId = NextSectionUniqueId++; + Sections.emplace_back(S); + } + updateSections(); +} + +void Object::updateSections() { + SectionMap = DenseMap(Sections.size()); + size_t Index = 1; + for (Section &S : Sections) { + SectionMap[S.UniqueId] = &S; + S.Index = Index++; + } +} + +const Section *Object::findSection(ssize_t UniqueId) const { + return SectionMap.lookup(UniqueId); +} + +void Object::removeSections(function_ref ToRemove) { + DenseSet AssociatedSections; + auto RemoveAssociated = [&AssociatedSections](const Section &Sec) { + return AssociatedSections.contains(Sec.UniqueId); + }; + do { + DenseSet RemovedSections; + llvm::erase_if(Sections, [ToRemove, &RemovedSections](const Section &Sec) { + bool Remove = ToRemove(Sec); + if (Remove) + RemovedSections.insert(Sec.UniqueId); + return Remove; + }); + // Remove all symbols referring to the removed sections. + AssociatedSections.clear(); + llvm::erase_if( + Symbols, [&RemovedSections, &AssociatedSections](const Symbol &Sym) { + // If there are sections that are associative to a removed + // section, + // remove those as well as nothing will include them (and we can't + // leave them dangling). + if (RemovedSections.contains(Sym.AssociativeComdatTargetSectionId)) + AssociatedSections.insert(Sym.TargetSectionId); + return RemovedSections.contains(Sym.TargetSectionId); + }); + ToRemove = RemoveAssociated; + } while (!AssociatedSections.empty()); + updateSections(); + updateSymbols(); +} + +void Object::truncateSections(function_ref ToTruncate) { + for (Section &Sec : Sections) { + if (ToTruncate(Sec)) { + Sec.clearContents(); + Sec.Relocs.clear(); + Sec.Header.SizeOfRawData = 0; + } + } +} + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/COFF/COFFObject.h b/llvm/lib/ObjCopy/COFF/COFFObject.h new file mode 100644 index 000000000000..66c0a19429ce --- /dev/null +++ b/llvm/lib/ObjCopy/COFF/COFFObject.h @@ -0,0 +1,212 @@ +//===- COFFObject.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_COFF_COFFOBJECT_H +#define LLVM_LIB_OBJCOPY_COFF_COFFOBJECT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" +#include +#include +#include + +namespace llvm { +namespace objcopy { +namespace coff { + +struct Relocation { + Relocation() = default; + Relocation(const object::coff_relocation &R) : Reloc(R) {} + + object::coff_relocation Reloc; + size_t Target = 0; + StringRef TargetName; // Used for diagnostics only +}; + +struct Section { + object::coff_section Header; + std::vector Relocs; + StringRef Name; + ssize_t UniqueId; + size_t Index; + + ArrayRef getContents() const { + if (!OwnedContents.empty()) + return OwnedContents; + return ContentsRef; + } + + void setContentsRef(ArrayRef Data) { + OwnedContents.clear(); + ContentsRef = Data; + } + + void setOwnedContents(std::vector &&Data) { + ContentsRef = ArrayRef(); + OwnedContents = std::move(Data); + Header.SizeOfRawData = OwnedContents.size(); + } + + void clearContents() { + ContentsRef = ArrayRef(); + OwnedContents.clear(); + } + +private: + ArrayRef ContentsRef; + std::vector OwnedContents; +}; + +struct AuxSymbol { + AuxSymbol(ArrayRef In) { + assert(In.size() == sizeof(Opaque)); + std::copy(In.begin(), In.end(), Opaque); + } + + ArrayRef getRef() const { + return ArrayRef(Opaque, sizeof(Opaque)); + } + + uint8_t Opaque[sizeof(object::coff_symbol16)]; +}; + +struct Symbol { + object::coff_symbol32 Sym; + StringRef Name; + std::vector AuxData; + StringRef AuxFile; + ssize_t TargetSectionId; + ssize_t AssociativeComdatTargetSectionId = 0; + Optional WeakTargetSymbolId; + size_t UniqueId; + size_t RawIndex; + bool Referenced; +}; + +struct Object { + bool IsPE = false; + + object::dos_header DosHeader; + ArrayRef DosStub; + + object::coff_file_header CoffFileHeader; + + bool Is64 = false; + object::pe32plus_header PeHeader; + uint32_t BaseOfData = 0; // pe32plus_header lacks this field. + + std::vector DataDirectories; + + ArrayRef getSymbols() const { return Symbols; } + // This allows mutating individual Symbols, but not mutating the list + // of symbols itself. + iterator_range::iterator> getMutableSymbols() { + return make_range(Symbols.begin(), Symbols.end()); + } + + const Symbol *findSymbol(size_t UniqueId) const; + + void addSymbols(ArrayRef NewSymbols); + Error removeSymbols(function_ref(const Symbol &)> ToRemove); + + // Set the Referenced field on all Symbols, based on relocations in + // all sections. + Error markSymbols(); + + ArrayRef
getSections() const { return Sections; } + // This allows mutating individual Sections, but not mutating the list + // of sections itself. + iterator_range::iterator> getMutableSections() { + return make_range(Sections.begin(), Sections.end()); + } + + const Section *findSection(ssize_t UniqueId) const; + + void addSections(ArrayRef
NewSections); + void removeSections(function_ref ToRemove); + void truncateSections(function_ref ToTruncate); + +private: + std::vector Symbols; + DenseMap SymbolMap; + + size_t NextSymbolUniqueId = 0; + + std::vector
Sections; + DenseMap SectionMap; + + ssize_t NextSectionUniqueId = 1; // Allow a UniqueId 0 to mean undefined. + + // Update SymbolMap. + void updateSymbols(); + + // Update SectionMap and Index in each Section. + void updateSections(); +}; + +// Copy between coff_symbol16 and coff_symbol32. +// The source and destination files can use either coff_symbol16 or +// coff_symbol32, while we always store them as coff_symbol32 in the +// intermediate data structure. +template +void copySymbol(Symbol1Ty &Dest, const Symbol2Ty &Src) { + static_assert(sizeof(Dest.Name.ShortName) == sizeof(Src.Name.ShortName), + "Mismatched name sizes"); + memcpy(Dest.Name.ShortName, Src.Name.ShortName, sizeof(Dest.Name.ShortName)); + Dest.Value = Src.Value; + Dest.SectionNumber = Src.SectionNumber; + Dest.Type = Src.Type; + Dest.StorageClass = Src.StorageClass; + Dest.NumberOfAuxSymbols = Src.NumberOfAuxSymbols; +} + +// Copy between pe32_header and pe32plus_header. +// We store the intermediate state in a pe32plus_header. +template +void copyPeHeader(PeHeader1Ty &Dest, const PeHeader2Ty &Src) { + Dest.Magic = Src.Magic; + Dest.MajorLinkerVersion = Src.MajorLinkerVersion; + Dest.MinorLinkerVersion = Src.MinorLinkerVersion; + Dest.SizeOfCode = Src.SizeOfCode; + Dest.SizeOfInitializedData = Src.SizeOfInitializedData; + Dest.SizeOfUninitializedData = Src.SizeOfUninitializedData; + Dest.AddressOfEntryPoint = Src.AddressOfEntryPoint; + Dest.BaseOfCode = Src.BaseOfCode; + Dest.ImageBase = Src.ImageBase; + Dest.SectionAlignment = Src.SectionAlignment; + Dest.FileAlignment = Src.FileAlignment; + Dest.MajorOperatingSystemVersion = Src.MajorOperatingSystemVersion; + Dest.MinorOperatingSystemVersion = Src.MinorOperatingSystemVersion; + Dest.MajorImageVersion = Src.MajorImageVersion; + Dest.MinorImageVersion = Src.MinorImageVersion; + Dest.MajorSubsystemVersion = Src.MajorSubsystemVersion; + Dest.MinorSubsystemVersion = Src.MinorSubsystemVersion; + Dest.Win32VersionValue = Src.Win32VersionValue; + Dest.SizeOfImage = Src.SizeOfImage; + Dest.SizeOfHeaders = Src.SizeOfHeaders; + Dest.CheckSum = Src.CheckSum; + Dest.Subsystem = Src.Subsystem; + Dest.DLLCharacteristics = Src.DLLCharacteristics; + Dest.SizeOfStackReserve = Src.SizeOfStackReserve; + Dest.SizeOfStackCommit = Src.SizeOfStackCommit; + Dest.SizeOfHeapReserve = Src.SizeOfHeapReserve; + Dest.SizeOfHeapCommit = Src.SizeOfHeapCommit; + Dest.LoaderFlags = Src.LoaderFlags; + Dest.NumberOfRvaAndSize = Src.NumberOfRvaAndSize; +} + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_COFF_COFFOBJECT_H diff --git a/llvm/lib/ObjCopy/COFF/COFFReader.cpp b/llvm/lib/ObjCopy/COFF/COFFReader.cpp new file mode 100644 index 000000000000..44bf303078dd --- /dev/null +++ b/llvm/lib/ObjCopy/COFF/COFFReader.cpp @@ -0,0 +1,226 @@ +//===- COFFReader.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "COFFReader.h" +#include "COFFObject.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include + +namespace llvm { +namespace objcopy { +namespace coff { + +using namespace object; +using namespace COFF; + +Error COFFReader::readExecutableHeaders(Object &Obj) const { + const dos_header *DH = COFFObj.getDOSHeader(); + Obj.Is64 = COFFObj.is64(); + if (!DH) + return Error::success(); + + Obj.IsPE = true; + Obj.DosHeader = *DH; + if (DH->AddressOfNewExeHeader > sizeof(*DH)) + Obj.DosStub = ArrayRef(reinterpret_cast(&DH[1]), + DH->AddressOfNewExeHeader - sizeof(*DH)); + + if (COFFObj.is64()) { + Obj.PeHeader = *COFFObj.getPE32PlusHeader(); + } else { + const pe32_header *PE32 = COFFObj.getPE32Header(); + copyPeHeader(Obj.PeHeader, *PE32); + // The pe32plus_header (stored in Object) lacks the BaseOfData field. + Obj.BaseOfData = PE32->BaseOfData; + } + + for (size_t I = 0; I < Obj.PeHeader.NumberOfRvaAndSize; I++) { + const data_directory *Dir = COFFObj.getDataDirectory(I); + if (!Dir) + return errorCodeToError(object_error::parse_failed); + Obj.DataDirectories.emplace_back(*Dir); + } + return Error::success(); +} + +Error COFFReader::readSections(Object &Obj) const { + std::vector
Sections; + // Section indexing starts from 1. + for (size_t I = 1, E = COFFObj.getNumberOfSections(); I <= E; I++) { + Expected SecOrErr = COFFObj.getSection(I); + if (!SecOrErr) + return SecOrErr.takeError(); + const coff_section *Sec = *SecOrErr; + Sections.push_back(Section()); + Section &S = Sections.back(); + S.Header = *Sec; + S.Header.Characteristics &= ~COFF::IMAGE_SCN_LNK_NRELOC_OVFL; + ArrayRef Contents; + if (Error E = COFFObj.getSectionContents(Sec, Contents)) + return E; + S.setContentsRef(Contents); + ArrayRef Relocs = COFFObj.getRelocations(Sec); + for (const coff_relocation &R : Relocs) + S.Relocs.push_back(R); + if (Expected NameOrErr = COFFObj.getSectionName(Sec)) + S.Name = *NameOrErr; + else + return NameOrErr.takeError(); + } + Obj.addSections(Sections); + return Error::success(); +} + +Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const { + std::vector Symbols; + Symbols.reserve(COFFObj.getRawNumberOfSymbols()); + ArrayRef
Sections = Obj.getSections(); + for (uint32_t I = 0, E = COFFObj.getRawNumberOfSymbols(); I < E;) { + Expected SymOrErr = COFFObj.getSymbol(I); + if (!SymOrErr) + return SymOrErr.takeError(); + COFFSymbolRef SymRef = *SymOrErr; + + Symbols.push_back(Symbol()); + Symbol &Sym = Symbols.back(); + // Copy symbols from the original form into an intermediate coff_symbol32. + if (IsBigObj) + copySymbol(Sym.Sym, + *reinterpret_cast(SymRef.getRawPtr())); + else + copySymbol(Sym.Sym, + *reinterpret_cast(SymRef.getRawPtr())); + auto NameOrErr = COFFObj.getSymbolName(SymRef); + if (!NameOrErr) + return NameOrErr.takeError(); + Sym.Name = *NameOrErr; + + ArrayRef AuxData = COFFObj.getSymbolAuxData(SymRef); + size_t SymSize = IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16); + assert(AuxData.size() == SymSize * SymRef.getNumberOfAuxSymbols()); + // The auxillary symbols are structs of sizeof(coff_symbol16) each. + // In the big object format (where symbols are coff_symbol32), each + // auxillary symbol is padded with 2 bytes at the end. Copy each + // auxillary symbol to the Sym.AuxData vector. For file symbols, + // the whole range of aux symbols are interpreted as one null padded + // string instead. + if (SymRef.isFileRecord()) + Sym.AuxFile = StringRef(reinterpret_cast(AuxData.data()), + AuxData.size()) + .rtrim('\0'); + else + for (size_t I = 0; I < SymRef.getNumberOfAuxSymbols(); I++) + Sym.AuxData.push_back(AuxData.slice(I * SymSize, sizeof(AuxSymbol))); + + // Find the unique id of the section + if (SymRef.getSectionNumber() <= + 0) // Special symbol (undefined/absolute/debug) + Sym.TargetSectionId = SymRef.getSectionNumber(); + else if (static_cast(SymRef.getSectionNumber() - 1) < + Sections.size()) + Sym.TargetSectionId = Sections[SymRef.getSectionNumber() - 1].UniqueId; + else + return createStringError(object_error::parse_failed, + "section number out of range"); + // For section definitions, check if it is comdat associative, and if + // it is, find the target section unique id. + const coff_aux_section_definition *SD = SymRef.getSectionDefinition(); + const coff_aux_weak_external *WE = SymRef.getWeakExternal(); + if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) { + int32_t Index = SD->getNumber(IsBigObj); + if (Index <= 0 || static_cast(Index - 1) >= Sections.size()) + return createStringError(object_error::parse_failed, + "unexpected associative section index"); + Sym.AssociativeComdatTargetSectionId = Sections[Index - 1].UniqueId; + } else if (WE) { + // This is a raw symbol index for now, but store it in the Symbol + // until we've added them to the Object, which assigns the final + // unique ids. + Sym.WeakTargetSymbolId = WE->TagIndex; + } + I += 1 + SymRef.getNumberOfAuxSymbols(); + } + Obj.addSymbols(Symbols); + return Error::success(); +} + +Error COFFReader::setSymbolTargets(Object &Obj) const { + std::vector RawSymbolTable; + for (const Symbol &Sym : Obj.getSymbols()) { + RawSymbolTable.push_back(&Sym); + for (size_t I = 0; I < Sym.Sym.NumberOfAuxSymbols; I++) + RawSymbolTable.push_back(nullptr); + } + for (Symbol &Sym : Obj.getMutableSymbols()) { + // Convert WeakTargetSymbolId from the original raw symbol index to + // a proper unique id. + if (Sym.WeakTargetSymbolId) { + if (*Sym.WeakTargetSymbolId >= RawSymbolTable.size()) + return createStringError(object_error::parse_failed, + "weak external reference out of range"); + const Symbol *Target = RawSymbolTable[*Sym.WeakTargetSymbolId]; + if (Target == nullptr) + return createStringError(object_error::parse_failed, + "invalid SymbolTableIndex"); + Sym.WeakTargetSymbolId = Target->UniqueId; + } + } + for (Section &Sec : Obj.getMutableSections()) { + for (Relocation &R : Sec.Relocs) { + if (R.Reloc.SymbolTableIndex >= RawSymbolTable.size()) + return createStringError(object_error::parse_failed, + "SymbolTableIndex out of range"); + const Symbol *Sym = RawSymbolTable[R.Reloc.SymbolTableIndex]; + if (Sym == nullptr) + return createStringError(object_error::parse_failed, + "invalid SymbolTableIndex"); + R.Target = Sym->UniqueId; + R.TargetName = Sym->Name; + } + } + return Error::success(); +} + +Expected> COFFReader::create() const { + auto Obj = std::make_unique(); + + bool IsBigObj = false; + if (const coff_file_header *CFH = COFFObj.getCOFFHeader()) { + Obj->CoffFileHeader = *CFH; + } else { + const coff_bigobj_file_header *CBFH = COFFObj.getCOFFBigObjHeader(); + if (!CBFH) + return createStringError(object_error::parse_failed, + "no COFF file header returned"); + // Only copying the few fields from the bigobj header that we need + // and won't recreate in the end. + Obj->CoffFileHeader.Machine = CBFH->Machine; + Obj->CoffFileHeader.TimeDateStamp = CBFH->TimeDateStamp; + IsBigObj = true; + } + + if (Error E = readExecutableHeaders(*Obj)) + return std::move(E); + if (Error E = readSections(*Obj)) + return std::move(E); + if (Error E = readSymbols(*Obj, IsBigObj)) + return std::move(E); + if (Error E = setSymbolTargets(*Obj)) + return std::move(E); + + return std::move(Obj); +} + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/COFF/COFFReader.h b/llvm/lib/ObjCopy/COFF/COFFReader.h new file mode 100644 index 000000000000..b4957f844392 --- /dev/null +++ b/llvm/lib/ObjCopy/COFF/COFFReader.h @@ -0,0 +1,41 @@ +//===- COFFReader.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_COFF_COFFREADER_H +#define LLVM_LIB_OBJCOPY_COFF_COFFREADER_H + +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace objcopy { +namespace coff { + +struct Object; + +using object::COFFObjectFile; + +class COFFReader { + const COFFObjectFile &COFFObj; + + Error readExecutableHeaders(Object &Obj) const; + Error readSections(Object &Obj) const; + Error readSymbols(Object &Obj, bool IsBigObj) const; + Error setSymbolTargets(Object &Obj) const; + +public: + explicit COFFReader(const COFFObjectFile &O) : COFFObj(O) {} + Expected> create() const; +}; + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_COFF_COFFREADER_H diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp new file mode 100644 index 000000000000..88eb4d14ba25 --- /dev/null +++ b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp @@ -0,0 +1,466 @@ +//===- COFFWriter.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "COFFWriter.h" +#include "COFFObject.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include + +namespace llvm { +namespace objcopy { +namespace coff { + +using namespace object; +using namespace COFF; + +Error COFFWriter::finalizeRelocTargets() { + for (Section &Sec : Obj.getMutableSections()) { + for (Relocation &R : Sec.Relocs) { + const Symbol *Sym = Obj.findSymbol(R.Target); + if (Sym == nullptr) + return createStringError(object_error::invalid_symbol_index, + "relocation target '%s' (%zu) not found", + R.TargetName.str().c_str(), R.Target); + R.Reloc.SymbolTableIndex = Sym->RawIndex; + } + } + return Error::success(); +} + +Error COFFWriter::finalizeSymbolContents() { + for (Symbol &Sym : Obj.getMutableSymbols()) { + if (Sym.TargetSectionId <= 0) { + // Undefined, or a special kind of symbol. These negative values + // are stored in the SectionNumber field which is unsigned. + Sym.Sym.SectionNumber = static_cast(Sym.TargetSectionId); + } else { + const Section *Sec = Obj.findSection(Sym.TargetSectionId); + if (Sec == nullptr) + return createStringError(object_error::invalid_symbol_index, + "symbol '%s' points to a removed section", + Sym.Name.str().c_str()); + Sym.Sym.SectionNumber = Sec->Index; + + if (Sym.Sym.NumberOfAuxSymbols == 1 && + Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC) { + coff_aux_section_definition *SD = + reinterpret_cast( + Sym.AuxData[0].Opaque); + uint32_t SDSectionNumber; + if (Sym.AssociativeComdatTargetSectionId == 0) { + // Not a comdat associative section; just set the Number field to + // the number of the section itself. + SDSectionNumber = Sec->Index; + } else { + Sec = Obj.findSection(Sym.AssociativeComdatTargetSectionId); + if (Sec == nullptr) + return createStringError( + object_error::invalid_symbol_index, + "symbol '%s' is associative to a removed section", + Sym.Name.str().c_str()); + SDSectionNumber = Sec->Index; + } + // Update the section definition with the new section number. + SD->NumberLowPart = static_cast(SDSectionNumber); + SD->NumberHighPart = static_cast(SDSectionNumber >> 16); + } + } + // Check that we actually have got AuxData to match the weak symbol target + // we want to set. Only >= 1 would be required, but only == 1 makes sense. + if (Sym.WeakTargetSymbolId && Sym.Sym.NumberOfAuxSymbols == 1) { + coff_aux_weak_external *WE = + reinterpret_cast(Sym.AuxData[0].Opaque); + const Symbol *Target = Obj.findSymbol(*Sym.WeakTargetSymbolId); + if (Target == nullptr) + return createStringError(object_error::invalid_symbol_index, + "symbol '%s' is missing its weak target", + Sym.Name.str().c_str()); + WE->TagIndex = Target->RawIndex; + } + } + return Error::success(); +} + +void COFFWriter::layoutSections() { + for (auto &S : Obj.getMutableSections()) { + if (S.Header.SizeOfRawData > 0) + S.Header.PointerToRawData = FileSize; + FileSize += S.Header.SizeOfRawData; // For executables, this is already + // aligned to FileAlignment. + if (S.Relocs.size() >= 0xffff) { + S.Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL; + S.Header.NumberOfRelocations = 0xffff; + S.Header.PointerToRelocations = FileSize; + FileSize += sizeof(coff_relocation); + } else { + S.Header.NumberOfRelocations = S.Relocs.size(); + S.Header.PointerToRelocations = S.Relocs.size() ? FileSize : 0; + } + + FileSize += S.Relocs.size() * sizeof(coff_relocation); + FileSize = alignTo(FileSize, FileAlignment); + + if (S.Header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA) + SizeOfInitializedData += S.Header.SizeOfRawData; + } +} + +Expected COFFWriter::finalizeStringTable() { + for (const auto &S : Obj.getSections()) + if (S.Name.size() > COFF::NameSize) + StrTabBuilder.add(S.Name); + + for (const auto &S : Obj.getSymbols()) + if (S.Name.size() > COFF::NameSize) + StrTabBuilder.add(S.Name); + + StrTabBuilder.finalize(); + + for (auto &S : Obj.getMutableSections()) { + memset(S.Header.Name, 0, sizeof(S.Header.Name)); + if (S.Name.size() <= COFF::NameSize) { + // Short names can go in the field directly. + memcpy(S.Header.Name, S.Name.data(), S.Name.size()); + } else { + // Offset of the section name in the string table. + size_t Offset = StrTabBuilder.getOffset(S.Name); + if (!COFF::encodeSectionName(S.Header.Name, Offset)) + return createStringError(object_error::invalid_section_index, + "COFF string table is greater than 64GB, " + "unable to encode section name offset"); + } + } + for (auto &S : Obj.getMutableSymbols()) { + if (S.Name.size() > COFF::NameSize) { + S.Sym.Name.Offset.Zeroes = 0; + S.Sym.Name.Offset.Offset = StrTabBuilder.getOffset(S.Name); + } else { + strncpy(S.Sym.Name.ShortName, S.Name.data(), COFF::NameSize); + } + } + return StrTabBuilder.getSize(); +} + +template +std::pair COFFWriter::finalizeSymbolTable() { + size_t RawSymIndex = 0; + for (auto &S : Obj.getMutableSymbols()) { + // Symbols normally have NumberOfAuxSymbols set correctly all the time. + // For file symbols, we need to know the output file's symbol size to be + // able to calculate the number of slots it occupies. + if (!S.AuxFile.empty()) + S.Sym.NumberOfAuxSymbols = + alignTo(S.AuxFile.size(), sizeof(SymbolTy)) / sizeof(SymbolTy); + S.RawIndex = RawSymIndex; + RawSymIndex += 1 + S.Sym.NumberOfAuxSymbols; + } + return std::make_pair(RawSymIndex * sizeof(SymbolTy), sizeof(SymbolTy)); +} + +Error COFFWriter::finalize(bool IsBigObj) { + size_t SymTabSize, SymbolSize; + std::tie(SymTabSize, SymbolSize) = IsBigObj + ? finalizeSymbolTable() + : finalizeSymbolTable(); + + if (Error E = finalizeRelocTargets()) + return E; + if (Error E = finalizeSymbolContents()) + return E; + + size_t SizeOfHeaders = 0; + FileAlignment = 1; + size_t PeHeaderSize = 0; + if (Obj.IsPE) { + Obj.DosHeader.AddressOfNewExeHeader = + sizeof(Obj.DosHeader) + Obj.DosStub.size(); + SizeOfHeaders += Obj.DosHeader.AddressOfNewExeHeader + sizeof(PEMagic); + + FileAlignment = Obj.PeHeader.FileAlignment; + Obj.PeHeader.NumberOfRvaAndSize = Obj.DataDirectories.size(); + + PeHeaderSize = Obj.Is64 ? sizeof(pe32plus_header) : sizeof(pe32_header); + SizeOfHeaders += + PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size(); + } + Obj.CoffFileHeader.NumberOfSections = Obj.getSections().size(); + SizeOfHeaders += + IsBigObj ? sizeof(coff_bigobj_file_header) : sizeof(coff_file_header); + SizeOfHeaders += sizeof(coff_section) * Obj.getSections().size(); + SizeOfHeaders = alignTo(SizeOfHeaders, FileAlignment); + + Obj.CoffFileHeader.SizeOfOptionalHeader = + PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size(); + + FileSize = SizeOfHeaders; + SizeOfInitializedData = 0; + + layoutSections(); + + if (Obj.IsPE) { + Obj.PeHeader.SizeOfHeaders = SizeOfHeaders; + Obj.PeHeader.SizeOfInitializedData = SizeOfInitializedData; + + if (!Obj.getSections().empty()) { + const Section &S = Obj.getSections().back(); + Obj.PeHeader.SizeOfImage = + alignTo(S.Header.VirtualAddress + S.Header.VirtualSize, + Obj.PeHeader.SectionAlignment); + } + + // If the PE header had a checksum, clear it, since it isn't valid + // any longer. (We don't calculate a new one.) + Obj.PeHeader.CheckSum = 0; + } + + Expected StrTabSizeOrErr = finalizeStringTable(); + if (!StrTabSizeOrErr) + return StrTabSizeOrErr.takeError(); + + size_t StrTabSize = *StrTabSizeOrErr; + + size_t PointerToSymbolTable = FileSize; + // StrTabSize <= 4 is the size of an empty string table, only consisting + // of the length field. + if (SymTabSize == 0 && StrTabSize <= 4 && Obj.IsPE) { + // For executables, don't point to the symbol table and skip writing + // the length field, if both the symbol and string tables are empty. + PointerToSymbolTable = 0; + StrTabSize = 0; + } + + size_t NumRawSymbols = SymTabSize / SymbolSize; + Obj.CoffFileHeader.PointerToSymbolTable = PointerToSymbolTable; + Obj.CoffFileHeader.NumberOfSymbols = NumRawSymbols; + FileSize += SymTabSize + StrTabSize; + FileSize = alignTo(FileSize, FileAlignment); + + return Error::success(); +} + +void COFFWriter::writeHeaders(bool IsBigObj) { + uint8_t *Ptr = reinterpret_cast(Buf->getBufferStart()); + if (Obj.IsPE) { + memcpy(Ptr, &Obj.DosHeader, sizeof(Obj.DosHeader)); + Ptr += sizeof(Obj.DosHeader); + memcpy(Ptr, Obj.DosStub.data(), Obj.DosStub.size()); + Ptr += Obj.DosStub.size(); + memcpy(Ptr, PEMagic, sizeof(PEMagic)); + Ptr += sizeof(PEMagic); + } + if (!IsBigObj) { + memcpy(Ptr, &Obj.CoffFileHeader, sizeof(Obj.CoffFileHeader)); + Ptr += sizeof(Obj.CoffFileHeader); + } else { + // Generate a coff_bigobj_file_header, filling it in with the values + // from Obj.CoffFileHeader. All extra fields that don't exist in + // coff_file_header can be set to hardcoded values. + coff_bigobj_file_header BigObjHeader; + BigObjHeader.Sig1 = IMAGE_FILE_MACHINE_UNKNOWN; + BigObjHeader.Sig2 = 0xffff; + BigObjHeader.Version = BigObjHeader::MinBigObjectVersion; + BigObjHeader.Machine = Obj.CoffFileHeader.Machine; + BigObjHeader.TimeDateStamp = Obj.CoffFileHeader.TimeDateStamp; + memcpy(BigObjHeader.UUID, BigObjMagic, sizeof(BigObjMagic)); + BigObjHeader.unused1 = 0; + BigObjHeader.unused2 = 0; + BigObjHeader.unused3 = 0; + BigObjHeader.unused4 = 0; + // The value in Obj.CoffFileHeader.NumberOfSections is truncated, thus + // get the original one instead. + BigObjHeader.NumberOfSections = Obj.getSections().size(); + BigObjHeader.PointerToSymbolTable = Obj.CoffFileHeader.PointerToSymbolTable; + BigObjHeader.NumberOfSymbols = Obj.CoffFileHeader.NumberOfSymbols; + + memcpy(Ptr, &BigObjHeader, sizeof(BigObjHeader)); + Ptr += sizeof(BigObjHeader); + } + if (Obj.IsPE) { + if (Obj.Is64) { + memcpy(Ptr, &Obj.PeHeader, sizeof(Obj.PeHeader)); + Ptr += sizeof(Obj.PeHeader); + } else { + pe32_header PeHeader; + copyPeHeader(PeHeader, Obj.PeHeader); + // The pe32plus_header (stored in Object) lacks the BaseOfData field. + PeHeader.BaseOfData = Obj.BaseOfData; + + memcpy(Ptr, &PeHeader, sizeof(PeHeader)); + Ptr += sizeof(PeHeader); + } + for (const auto &DD : Obj.DataDirectories) { + memcpy(Ptr, &DD, sizeof(DD)); + Ptr += sizeof(DD); + } + } + for (const auto &S : Obj.getSections()) { + memcpy(Ptr, &S.Header, sizeof(S.Header)); + Ptr += sizeof(S.Header); + } +} + +void COFFWriter::writeSections() { + for (const auto &S : Obj.getSections()) { + uint8_t *Ptr = reinterpret_cast(Buf->getBufferStart()) + + S.Header.PointerToRawData; + ArrayRef Contents = S.getContents(); + std::copy(Contents.begin(), Contents.end(), Ptr); + + // For executable sections, pad the remainder of the raw data size with + // 0xcc, which is int3 on x86. + if ((S.Header.Characteristics & IMAGE_SCN_CNT_CODE) && + S.Header.SizeOfRawData > Contents.size()) + memset(Ptr + Contents.size(), 0xcc, + S.Header.SizeOfRawData - Contents.size()); + + Ptr += S.Header.SizeOfRawData; + + if (S.Relocs.size() >= 0xffff) { + object::coff_relocation R; + R.VirtualAddress = S.Relocs.size() + 1; + R.SymbolTableIndex = 0; + R.Type = 0; + memcpy(Ptr, &R, sizeof(R)); + Ptr += sizeof(R); + } + for (const auto &R : S.Relocs) { + memcpy(Ptr, &R.Reloc, sizeof(R.Reloc)); + Ptr += sizeof(R.Reloc); + } + } +} + +template void COFFWriter::writeSymbolStringTables() { + uint8_t *Ptr = reinterpret_cast(Buf->getBufferStart()) + + Obj.CoffFileHeader.PointerToSymbolTable; + for (const auto &S : Obj.getSymbols()) { + // Convert symbols back to the right size, from coff_symbol32. + copySymbol(*reinterpret_cast(Ptr), + S.Sym); + Ptr += sizeof(SymbolTy); + if (!S.AuxFile.empty()) { + // For file symbols, just write the string into the aux symbol slots, + // assuming that the unwritten parts are initialized to zero in the memory + // mapped file. + std::copy(S.AuxFile.begin(), S.AuxFile.end(), Ptr); + Ptr += S.Sym.NumberOfAuxSymbols * sizeof(SymbolTy); + } else { + // For other auxillary symbols, write their opaque payload into one symbol + // table slot each. For big object files, the symbols are larger than the + // opaque auxillary symbol struct and we leave padding at the end of each + // entry. + for (const AuxSymbol &AuxSym : S.AuxData) { + ArrayRef Ref = AuxSym.getRef(); + std::copy(Ref.begin(), Ref.end(), Ptr); + Ptr += sizeof(SymbolTy); + } + } + } + if (StrTabBuilder.getSize() > 4 || !Obj.IsPE) { + // Always write a string table in object files, even an empty one. + StrTabBuilder.write(Ptr); + Ptr += StrTabBuilder.getSize(); + } +} + +Error COFFWriter::write(bool IsBigObj) { + if (Error E = finalize(IsBigObj)) + return E; + + Buf = WritableMemoryBuffer::getNewMemBuffer(FileSize); + if (!Buf) + return createStringError(llvm::errc::not_enough_memory, + "failed to allocate memory buffer of " + + Twine::utohexstr(FileSize) + " bytes."); + + writeHeaders(IsBigObj); + writeSections(); + if (IsBigObj) + writeSymbolStringTables(); + else + writeSymbolStringTables(); + + if (Obj.IsPE) + if (Error E = patchDebugDirectory()) + return E; + + // TODO: Implement direct writing to the output stream (without intermediate + // memory buffer Buf). + Out.write(Buf->getBufferStart(), Buf->getBufferSize()); + return Error::success(); +} + +Expected COFFWriter::virtualAddressToFileAddress(uint32_t RVA) { + for (const auto &S : Obj.getSections()) { + if (RVA >= S.Header.VirtualAddress && + RVA < S.Header.VirtualAddress + S.Header.SizeOfRawData) + return S.Header.PointerToRawData + RVA - S.Header.VirtualAddress; + } + return createStringError(object_error::parse_failed, + "debug directory payload not found"); +} + +// Locate which sections contain the debug directories, iterate over all +// the debug_directory structs in there, and set the PointerToRawData field +// in all of them, according to their new physical location in the file. +Error COFFWriter::patchDebugDirectory() { + if (Obj.DataDirectories.size() <= DEBUG_DIRECTORY) + return Error::success(); + const data_directory *Dir = &Obj.DataDirectories[DEBUG_DIRECTORY]; + if (Dir->Size <= 0) + return Error::success(); + for (const auto &S : Obj.getSections()) { + if (Dir->RelativeVirtualAddress >= S.Header.VirtualAddress && + Dir->RelativeVirtualAddress < + S.Header.VirtualAddress + S.Header.SizeOfRawData) { + if (Dir->RelativeVirtualAddress + Dir->Size > + S.Header.VirtualAddress + S.Header.SizeOfRawData) + return createStringError(object_error::parse_failed, + "debug directory extends past end of section"); + + size_t Offset = Dir->RelativeVirtualAddress - S.Header.VirtualAddress; + uint8_t *Ptr = reinterpret_cast(Buf->getBufferStart()) + + S.Header.PointerToRawData + Offset; + uint8_t *End = Ptr + Dir->Size; + while (Ptr < End) { + debug_directory *Debug = reinterpret_cast(Ptr); + if (Debug->PointerToRawData) { + if (Expected FilePosOrErr = + virtualAddressToFileAddress(Debug->AddressOfRawData)) + Debug->PointerToRawData = *FilePosOrErr; + else + return FilePosOrErr.takeError(); + } + Ptr += sizeof(debug_directory); + Offset += sizeof(debug_directory); + } + // Debug directory found and patched, all done. + return Error::success(); + } + } + return createStringError(object_error::parse_failed, + "debug directory not found"); +} + +Error COFFWriter::write() { + bool IsBigObj = Obj.getSections().size() > MaxNumberOfSections16; + if (IsBigObj && Obj.IsPE) + return createStringError(object_error::parse_failed, + "too many sections for executable"); + return write(IsBigObj); +} + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.h b/llvm/lib/ObjCopy/COFF/COFFWriter.h new file mode 100644 index 000000000000..b7dca69e9a81 --- /dev/null +++ b/llvm/lib/ObjCopy/COFF/COFFWriter.h @@ -0,0 +1,63 @@ +//===- COFFWriter.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_COFF_COFFWRITER_H +#define LLVM_LIB_OBJCOPY_COFF_COFFWRITER_H + +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include +#include + +namespace llvm { +namespace objcopy { +namespace coff { + +struct Object; + +class COFFWriter { + Object &Obj; + std::unique_ptr Buf; + raw_ostream &Out; + + size_t FileSize; + size_t FileAlignment; + size_t SizeOfInitializedData; + StringTableBuilder StrTabBuilder; + + template std::pair finalizeSymbolTable(); + Error finalizeRelocTargets(); + Error finalizeSymbolContents(); + void layoutSections(); + Expected finalizeStringTable(); + + Error finalize(bool IsBigObj); + + void writeHeaders(bool IsBigObj); + void writeSections(); + template void writeSymbolStringTables(); + + Error write(bool IsBigObj); + + Error patchDebugDirectory(); + Expected virtualAddressToFileAddress(uint32_t RVA); + +public: + virtual ~COFFWriter() {} + Error write(); + + COFFWriter(Object &Obj, raw_ostream &Out) + : Obj(Obj), Out(Out), StrTabBuilder(StringTableBuilder::WinCOFF) {} +}; + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_COFF_COFFWRITER_H diff --git a/llvm/lib/ObjCopy/CommonConfig.cpp b/llvm/lib/ObjCopy/CommonConfig.cpp new file mode 100644 index 000000000000..e85715d0c44c --- /dev/null +++ b/llvm/lib/ObjCopy/CommonConfig.cpp @@ -0,0 +1,50 @@ +//===- CommonConfig.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/CommonConfig.h" + +namespace llvm { +namespace objcopy { + +Expected +NameOrPattern::create(StringRef Pattern, MatchStyle MS, + function_ref ErrorCallback) { + switch (MS) { + case MatchStyle::Literal: + return NameOrPattern(Pattern); + case MatchStyle::Wildcard: { + SmallVector Data; + bool IsPositiveMatch = true; + if (Pattern[0] == '!') { + IsPositiveMatch = false; + Pattern = Pattern.drop_front(); + } + Expected GlobOrErr = GlobPattern::create(Pattern); + + // If we couldn't create it as a glob, report the error, but try again + // with a literal if the error reporting is non-fatal. + if (!GlobOrErr) { + if (Error E = ErrorCallback(GlobOrErr.takeError())) + return std::move(E); + return create(Pattern, MatchStyle::Literal, ErrorCallback); + } + + return NameOrPattern(std::make_shared(*GlobOrErr), + IsPositiveMatch); + } + case MatchStyle::Regex: { + SmallVector Data; + return NameOrPattern(std::make_shared( + ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data))); + } + } + llvm_unreachable("Unhandled llvm.objcopy.MatchStyle enum"); +} + +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/ConfigManager.cpp b/llvm/lib/ObjCopy/ConfigManager.cpp new file mode 100644 index 000000000000..9d8883a15c0b --- /dev/null +++ b/llvm/lib/ObjCopy/ConfigManager.cpp @@ -0,0 +1,97 @@ +//===- ConfigManager.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/ConfigManager.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace objcopy { + +Expected ConfigManager::getCOFFConfig() const { + if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() || + !Common.AllocSectionsPrefix.empty() || !Common.DumpSection.empty() || + !Common.KeepSection.empty() || !Common.SymbolsToGlobalize.empty() || + !Common.SymbolsToKeep.empty() || !Common.SymbolsToLocalize.empty() || + !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() || + !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || + Common.ExtractDWO || Common.PreserveDates || Common.StripDWO || + Common.StripNonAlloc || Common.StripSections || Common.Weaken || + Common.DecompressDebugSections || + Common.DiscardMode == DiscardType::Locals || !Common.SymbolsToAdd.empty()) + return createStringError(llvm::errc::invalid_argument, + "option is not supported for COFF"); + + return COFF; +} + +Expected ConfigManager::getMachOConfig() const { + if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() || + !Common.AllocSectionsPrefix.empty() || !Common.KeepSection.empty() || + !Common.SymbolsToGlobalize.empty() || !Common.SymbolsToKeep.empty() || + !Common.SymbolsToLocalize.empty() || !Common.SymbolsToWeaken.empty() || + !Common.SymbolsToKeepGlobal.empty() || !Common.SectionsToRename.empty() || + !Common.UnneededSymbolsToRemove.empty() || + !Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() || + Common.ExtractDWO || Common.PreserveDates || Common.StripAllGNU || + Common.StripDWO || Common.StripNonAlloc || Common.StripSections || + Common.Weaken || Common.DecompressDebugSections || Common.StripUnneeded || + Common.DiscardMode == DiscardType::Locals || !Common.SymbolsToAdd.empty()) + return createStringError(llvm::errc::invalid_argument, + "option is not supported for MachO"); + + return MachO; +} + +Expected ConfigManager::getWasmConfig() const { + if (!Common.AddGnuDebugLink.empty() || Common.ExtractPartition || + !Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() || + !Common.AllocSectionsPrefix.empty() || + Common.DiscardMode != DiscardType::None || !Common.SymbolsToAdd.empty() || + !Common.SymbolsToGlobalize.empty() || !Common.SymbolsToLocalize.empty() || + !Common.SymbolsToKeep.empty() || !Common.SymbolsToRemove.empty() || + !Common.UnneededSymbolsToRemove.empty() || + !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() || + !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || + !Common.SetSectionFlags.empty() || !Common.SymbolsToRename.empty()) + return createStringError(llvm::errc::invalid_argument, + "only flags for section dumping, removal, and " + "addition are supported"); + + return Wasm; +} + +Expected ConfigManager::getXCOFFConfig() const { + if (!Common.AddGnuDebugLink.empty() || Common.ExtractPartition || + !Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() || + !Common.AllocSectionsPrefix.empty() || + Common.DiscardMode != DiscardType::None || !Common.AddSection.empty() || + !Common.DumpSection.empty() || !Common.SymbolsToAdd.empty() || + !Common.KeepSection.empty() || !Common.OnlySection.empty() || + !Common.ToRemove.empty() || !Common.SymbolsToGlobalize.empty() || + !Common.SymbolsToKeep.empty() || !Common.SymbolsToLocalize.empty() || + !Common.SymbolsToRemove.empty() || + !Common.UnneededSymbolsToRemove.empty() || + !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() || + !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || + !Common.SetSectionFlags.empty() || !Common.SymbolsToRename.empty() || + Common.ExtractDWO || Common.ExtractMainPartition || + Common.OnlyKeepDebug || Common.PreserveDates || Common.StripAllGNU || + Common.StripDWO || Common.StripDebug || Common.StripNonAlloc || + Common.StripSections || Common.Weaken || Common.StripUnneeded || + Common.DecompressDebugSections) { + return createStringError( + llvm::errc::invalid_argument, + "no flags are supported yet, only basic copying is allowed"); + } + + return XCOFF; +} + +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp new file mode 100644 index 000000000000..2d388f8a867e --- /dev/null +++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp @@ -0,0 +1,821 @@ +//===- ELFObjcopy.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/ELF/ELFObjcopy.h" +#include "ELFObject.h" +#include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/ELF/ELFConfig.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Object/Error.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::objcopy; +using namespace llvm::objcopy::elf; +using namespace llvm::object; + +using SectionPred = std::function; + +static bool isDebugSection(const SectionBase &Sec) { + return StringRef(Sec.Name).startswith(".debug") || Sec.Name == ".gdb_index"; +} + +static bool isDWOSection(const SectionBase &Sec) { + return StringRef(Sec.Name).endswith(".dwo"); +} + +static bool onlyKeepDWOPred(const Object &Obj, const SectionBase &Sec) { + // We can't remove the section header string table. + if (&Sec == Obj.SectionNames) + return false; + // Short of keeping the string table we want to keep everything that is a DWO + // section and remove everything else. + return !isDWOSection(Sec); +} + +static uint64_t getNewShfFlags(SectionFlag AllFlags) { + uint64_t NewFlags = 0; + if (AllFlags & SectionFlag::SecAlloc) + NewFlags |= ELF::SHF_ALLOC; + if (!(AllFlags & SectionFlag::SecReadonly)) + NewFlags |= ELF::SHF_WRITE; + if (AllFlags & SectionFlag::SecCode) + NewFlags |= ELF::SHF_EXECINSTR; + if (AllFlags & SectionFlag::SecMerge) + NewFlags |= ELF::SHF_MERGE; + if (AllFlags & SectionFlag::SecStrings) + NewFlags |= ELF::SHF_STRINGS; + if (AllFlags & SectionFlag::SecExclude) + NewFlags |= ELF::SHF_EXCLUDE; + return NewFlags; +} + +static uint64_t getSectionFlagsPreserveMask(uint64_t OldFlags, + uint64_t NewFlags) { + // Preserve some flags which should not be dropped when setting flags. + // Also, preserve anything OS/processor dependant. + const uint64_t PreserveMask = + (ELF::SHF_COMPRESSED | ELF::SHF_GROUP | ELF::SHF_LINK_ORDER | + ELF::SHF_MASKOS | ELF::SHF_MASKPROC | ELF::SHF_TLS | + ELF::SHF_INFO_LINK) & + ~ELF::SHF_EXCLUDE; + return (OldFlags & PreserveMask) | (NewFlags & ~PreserveMask); +} + +static void setSectionFlagsAndType(SectionBase &Sec, SectionFlag Flags) { + Sec.Flags = getSectionFlagsPreserveMask(Sec.Flags, getNewShfFlags(Flags)); + + // In GNU objcopy, certain flags promote SHT_NOBITS to SHT_PROGBITS. This rule + // may promote more non-ALLOC sections than GNU objcopy, but it is fine as + // non-ALLOC SHT_NOBITS sections do not make much sense. + if (Sec.Type == SHT_NOBITS && + (!(Sec.Flags & ELF::SHF_ALLOC) || + Flags & (SectionFlag::SecContents | SectionFlag::SecLoad))) + Sec.Type = SHT_PROGBITS; +} + +static ElfType getOutputElfType(const Binary &Bin) { + // Infer output ELF type from the input ELF object + if (isa>(Bin)) + return ELFT_ELF32LE; + if (isa>(Bin)) + return ELFT_ELF64LE; + if (isa>(Bin)) + return ELFT_ELF32BE; + if (isa>(Bin)) + return ELFT_ELF64BE; + llvm_unreachable("Invalid ELFType"); +} + +static ElfType getOutputElfType(const MachineInfo &MI) { + // Infer output ELF type from the binary arch specified + if (MI.Is64Bit) + return MI.IsLittleEndian ? ELFT_ELF64LE : ELFT_ELF64BE; + else + return MI.IsLittleEndian ? ELFT_ELF32LE : ELFT_ELF32BE; +} + +static std::unique_ptr createELFWriter(const CommonConfig &Config, + Object &Obj, raw_ostream &Out, + ElfType OutputElfType) { + // Depending on the initial ELFT and OutputFormat we need a different Writer. + switch (OutputElfType) { + case ELFT_ELF32LE: + return std::make_unique>(Obj, Out, !Config.StripSections, + Config.OnlyKeepDebug); + case ELFT_ELF64LE: + return std::make_unique>(Obj, Out, !Config.StripSections, + Config.OnlyKeepDebug); + case ELFT_ELF32BE: + return std::make_unique>(Obj, Out, !Config.StripSections, + Config.OnlyKeepDebug); + case ELFT_ELF64BE: + return std::make_unique>(Obj, Out, !Config.StripSections, + Config.OnlyKeepDebug); + } + llvm_unreachable("Invalid output format"); +} + +static std::unique_ptr createWriter(const CommonConfig &Config, + Object &Obj, raw_ostream &Out, + ElfType OutputElfType) { + switch (Config.OutputFormat) { + case FileFormat::Binary: + return std::make_unique(Obj, Out); + case FileFormat::IHex: + return std::make_unique(Obj, Out); + default: + return createELFWriter(Config, Obj, Out, OutputElfType); + } +} + +template +static Error makeStringError(std::error_code EC, const Twine &Msg, + Ts &&...Args) { + std::string FullMsg = (EC.message() + ": " + Msg).str(); + return createStringError(EC, FullMsg.c_str(), std::forward(Args)...); +} + +static Error dumpSectionToFile(StringRef SecName, StringRef Filename, + Object &Obj) { + for (auto &Sec : Obj.sections()) { + if (Sec.Name == SecName) { + if (Sec.Type == SHT_NOBITS) + return createStringError(object_error::parse_failed, + "cannot dump section '%s': it has no contents", + SecName.str().c_str()); + Expected> BufferOrErr = + FileOutputBuffer::create(Filename, Sec.OriginalData.size()); + if (!BufferOrErr) + return BufferOrErr.takeError(); + std::unique_ptr Buf = std::move(*BufferOrErr); + std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), + Buf->getBufferStart()); + if (Error E = Buf->commit()) + return E; + return Error::success(); + } + } + return createStringError(object_error::parse_failed, "section '%s' not found", + SecName.str().c_str()); +} + +static bool isCompressable(const SectionBase &Sec) { + return !(Sec.Flags & ELF::SHF_COMPRESSED) && + StringRef(Sec.Name).startswith(".debug"); +} + +static Error replaceDebugSections( + Object &Obj, function_ref ShouldReplace, + function_ref(const SectionBase *)> AddSection) { + // Build a list of the debug sections we are going to replace. + // We can't call `AddSection` while iterating over sections, + // because it would mutate the sections array. + SmallVector ToReplace; + for (auto &Sec : Obj.sections()) + if (ShouldReplace(Sec)) + ToReplace.push_back(&Sec); + + // Build a mapping from original section to a new one. + DenseMap FromTo; + for (SectionBase *S : ToReplace) { + Expected NewSection = AddSection(S); + if (!NewSection) + return NewSection.takeError(); + + FromTo[S] = *NewSection; + } + + return Obj.replaceSections(FromTo); +} + +static bool isAArch64MappingSymbol(const Symbol &Sym) { + if (Sym.Binding != STB_LOCAL || Sym.Type != STT_NOTYPE || + Sym.getShndx() == SHN_UNDEF) + return false; + StringRef Name = Sym.Name; + if (!Name.consume_front("$x") && !Name.consume_front("$d")) + return false; + return Name.empty() || Name.startswith("."); +} + +static bool isArmMappingSymbol(const Symbol &Sym) { + if (Sym.Binding != STB_LOCAL || Sym.Type != STT_NOTYPE || + Sym.getShndx() == SHN_UNDEF) + return false; + StringRef Name = Sym.Name; + if (!Name.consume_front("$a") && !Name.consume_front("$d") && + !Name.consume_front("$t")) + return false; + return Name.empty() || Name.startswith("."); +} + +// Check if the symbol should be preserved because it is required by ABI. +static bool isRequiredByABISymbol(const Object &Obj, const Symbol &Sym) { + switch (Obj.Machine) { + case EM_AARCH64: + // Mapping symbols should be preserved for a relocatable object file. + return Obj.isRelocatable() && isAArch64MappingSymbol(Sym); + case EM_ARM: + // Mapping symbols should be preserved for a relocatable object file. + return Obj.isRelocatable() && isArmMappingSymbol(Sym); + default: + return false; + } +} + +static bool isUnneededSymbol(const Symbol &Sym) { + return !Sym.Referenced && + (Sym.Binding == STB_LOCAL || Sym.getShndx() == SHN_UNDEF) && + Sym.Type != STT_SECTION; +} + +static Error updateAndRemoveSymbols(const CommonConfig &Config, + const ELFConfig &ELFConfig, Object &Obj) { + // TODO: update or remove symbols only if there is an option that affects + // them. + if (!Obj.SymbolTable) + return Error::success(); + + Obj.SymbolTable->updateSymbols([&](Symbol &Sym) { + // Common and undefined symbols don't make sense as local symbols, and can + // even cause crashes if we localize those, so skip them. + if (!Sym.isCommon() && Sym.getShndx() != SHN_UNDEF && + ((ELFConfig.LocalizeHidden && + (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) || + Config.SymbolsToLocalize.matches(Sym.Name))) + Sym.Binding = STB_LOCAL; + + // Note: these two globalize flags have very similar names but different + // meanings: + // + // --globalize-symbol: promote a symbol to global + // --keep-global-symbol: all symbols except for these should be made local + // + // If --globalize-symbol is specified for a given symbol, it will be + // global in the output file even if it is not included via + // --keep-global-symbol. Because of that, make sure to check + // --globalize-symbol second. + if (!Config.SymbolsToKeepGlobal.empty() && + !Config.SymbolsToKeepGlobal.matches(Sym.Name) && + Sym.getShndx() != SHN_UNDEF) + Sym.Binding = STB_LOCAL; + + if (Config.SymbolsToGlobalize.matches(Sym.Name) && + Sym.getShndx() != SHN_UNDEF) + Sym.Binding = STB_GLOBAL; + + // SymbolsToWeaken applies to both STB_GLOBAL and STB_GNU_UNIQUE. + if (Config.SymbolsToWeaken.matches(Sym.Name) && Sym.Binding != STB_LOCAL) + Sym.Binding = STB_WEAK; + + if (Config.Weaken && Sym.Binding != STB_LOCAL && + Sym.getShndx() != SHN_UNDEF) + Sym.Binding = STB_WEAK; + + const auto I = Config.SymbolsToRename.find(Sym.Name); + if (I != Config.SymbolsToRename.end()) + Sym.Name = std::string(I->getValue()); + + if (!Config.SymbolsPrefix.empty() && Sym.Type != STT_SECTION) + Sym.Name = (Config.SymbolsPrefix + Sym.Name).str(); + }); + + // The purpose of this loop is to mark symbols referenced by sections + // (like GroupSection or RelocationSection). This way, we know which + // symbols are still 'needed' and which are not. + if (Config.StripUnneeded || !Config.UnneededSymbolsToRemove.empty() || + !Config.OnlySection.empty()) { + for (SectionBase &Sec : Obj.sections()) + Sec.markSymbols(); + } + + auto RemoveSymbolsPred = [&](const Symbol &Sym) { + if (Config.SymbolsToKeep.matches(Sym.Name) || + (ELFConfig.KeepFileSymbols && Sym.Type == STT_FILE)) + return false; + + if (Config.SymbolsToRemove.matches(Sym.Name)) + return true; + + if (Config.StripAll || Config.StripAllGNU) + return true; + + if (isRequiredByABISymbol(Obj, Sym)) + return false; + + if (Config.StripDebug && Sym.Type == STT_FILE) + return true; + + if ((Config.DiscardMode == DiscardType::All || + (Config.DiscardMode == DiscardType::Locals && + StringRef(Sym.Name).startswith(".L"))) && + Sym.Binding == STB_LOCAL && Sym.getShndx() != SHN_UNDEF && + Sym.Type != STT_FILE && Sym.Type != STT_SECTION) + return true; + + if ((Config.StripUnneeded || + Config.UnneededSymbolsToRemove.matches(Sym.Name)) && + (!Obj.isRelocatable() || isUnneededSymbol(Sym))) + return true; + + // We want to remove undefined symbols if all references have been stripped. + if (!Config.OnlySection.empty() && !Sym.Referenced && + Sym.getShndx() == SHN_UNDEF) + return true; + + return false; + }; + + return Obj.removeSymbols(RemoveSymbolsPred); +} + +static Error replaceAndRemoveSections(const CommonConfig &Config, + const ELFConfig &ELFConfig, Object &Obj) { + SectionPred RemovePred = [](const SectionBase &) { return false; }; + + // Removes: + if (!Config.ToRemove.empty()) { + RemovePred = [&Config](const SectionBase &Sec) { + return Config.ToRemove.matches(Sec.Name); + }; + } + + if (Config.StripDWO) + RemovePred = [RemovePred](const SectionBase &Sec) { + return isDWOSection(Sec) || RemovePred(Sec); + }; + + if (Config.ExtractDWO) + RemovePred = [RemovePred, &Obj](const SectionBase &Sec) { + return onlyKeepDWOPred(Obj, Sec) || RemovePred(Sec); + }; + + if (Config.StripAllGNU) + RemovePred = [RemovePred, &Obj](const SectionBase &Sec) { + if (RemovePred(Sec)) + return true; + if ((Sec.Flags & SHF_ALLOC) != 0) + return false; + if (&Sec == Obj.SectionNames) + return false; + switch (Sec.Type) { + case SHT_SYMTAB: + case SHT_REL: + case SHT_RELA: + case SHT_STRTAB: + return true; + } + return isDebugSection(Sec); + }; + + if (Config.StripSections) { + RemovePred = [RemovePred](const SectionBase &Sec) { + return RemovePred(Sec) || Sec.ParentSegment == nullptr; + }; + } + + if (Config.StripDebug || Config.StripUnneeded) { + RemovePred = [RemovePred](const SectionBase &Sec) { + return RemovePred(Sec) || isDebugSection(Sec); + }; + } + + if (Config.StripNonAlloc) + RemovePred = [RemovePred, &Obj](const SectionBase &Sec) { + if (RemovePred(Sec)) + return true; + if (&Sec == Obj.SectionNames) + return false; + return (Sec.Flags & SHF_ALLOC) == 0 && Sec.ParentSegment == nullptr; + }; + + if (Config.StripAll) + RemovePred = [RemovePred, &Obj](const SectionBase &Sec) { + if (RemovePred(Sec)) + return true; + if (&Sec == Obj.SectionNames) + return false; + if (StringRef(Sec.Name).startswith(".gnu.warning")) + return false; + // We keep the .ARM.attribute section to maintain compatibility + // with Debian derived distributions. This is a bug in their + // patchset as documented here: + // https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=943798 + if (Sec.Type == SHT_ARM_ATTRIBUTES) + return false; + if (Sec.ParentSegment != nullptr) + return false; + return (Sec.Flags & SHF_ALLOC) == 0; + }; + + if (Config.ExtractPartition || Config.ExtractMainPartition) { + RemovePred = [RemovePred](const SectionBase &Sec) { + if (RemovePred(Sec)) + return true; + if (Sec.Type == SHT_LLVM_PART_EHDR || Sec.Type == SHT_LLVM_PART_PHDR) + return true; + return (Sec.Flags & SHF_ALLOC) != 0 && !Sec.ParentSegment; + }; + } + + // Explicit copies: + if (!Config.OnlySection.empty()) { + RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) { + // Explicitly keep these sections regardless of previous removes. + if (Config.OnlySection.matches(Sec.Name)) + return false; + + // Allow all implicit removes. + if (RemovePred(Sec)) + return true; + + // Keep special sections. + if (Obj.SectionNames == &Sec) + return false; + if (Obj.SymbolTable == &Sec || + (Obj.SymbolTable && Obj.SymbolTable->getStrTab() == &Sec)) + return false; + + // Remove everything else. + return true; + }; + } + + if (!Config.KeepSection.empty()) { + RemovePred = [&Config, RemovePred](const SectionBase &Sec) { + // Explicitly keep these sections regardless of previous removes. + if (Config.KeepSection.matches(Sec.Name)) + return false; + // Otherwise defer to RemovePred. + return RemovePred(Sec); + }; + } + + // This has to be the last predicate assignment. + // If the option --keep-symbol has been specified + // and at least one of those symbols is present + // (equivalently, the updated symbol table is not empty) + // the symbol table and the string table should not be removed. + if ((!Config.SymbolsToKeep.empty() || ELFConfig.KeepFileSymbols) && + Obj.SymbolTable && !Obj.SymbolTable->empty()) { + RemovePred = [&Obj, RemovePred](const SectionBase &Sec) { + if (&Sec == Obj.SymbolTable || &Sec == Obj.SymbolTable->getStrTab()) + return false; + return RemovePred(Sec); + }; + } + + if (Error E = Obj.removeSections(ELFConfig.AllowBrokenLinks, RemovePred)) + return E; + + if (Config.CompressionType != DebugCompressionType::None) { + if (Error Err = replaceDebugSections( + Obj, isCompressable, + [&Config, &Obj](const SectionBase *S) -> Expected { + return &Obj.addSection( + CompressedSection(*S, Config.CompressionType)); + })) + return Err; + } else if (Config.DecompressDebugSections) { + if (Error Err = replaceDebugSections( + Obj, + [](const SectionBase &S) { return isa(&S); }, + [&Obj](const SectionBase *S) { + const CompressedSection *CS = cast(S); + return &Obj.addSection(*CS); + })) + return Err; + } + + return Error::success(); +} + +// Add symbol to the Object symbol table with the specified properties. +static void addSymbol(Object &Obj, const NewSymbolInfo &SymInfo, + uint8_t DefaultVisibility) { + SectionBase *Sec = Obj.findSection(SymInfo.SectionName); + uint64_t Value = Sec ? Sec->Addr + SymInfo.Value : SymInfo.Value; + + uint8_t Bind = ELF::STB_GLOBAL; + uint8_t Type = ELF::STT_NOTYPE; + uint8_t Visibility = DefaultVisibility; + + for (SymbolFlag FlagValue : SymInfo.Flags) + switch (FlagValue) { + case SymbolFlag::Global: + Bind = ELF::STB_GLOBAL; + break; + case SymbolFlag::Local: + Bind = ELF::STB_LOCAL; + break; + case SymbolFlag::Weak: + Bind = ELF::STB_WEAK; + break; + case SymbolFlag::Default: + Visibility = ELF::STV_DEFAULT; + break; + case SymbolFlag::Hidden: + Visibility = ELF::STV_HIDDEN; + break; + case SymbolFlag::Protected: + Visibility = ELF::STV_PROTECTED; + break; + case SymbolFlag::File: + Type = ELF::STT_FILE; + break; + case SymbolFlag::Section: + Type = ELF::STT_SECTION; + break; + case SymbolFlag::Object: + Type = ELF::STT_OBJECT; + break; + case SymbolFlag::Function: + Type = ELF::STT_FUNC; + break; + case SymbolFlag::IndirectFunction: + Type = ELF::STT_GNU_IFUNC; + break; + default: /* Other flag values are ignored for ELF. */ + break; + }; + + Obj.SymbolTable->addSymbol( + SymInfo.SymbolName, Bind, Type, Sec, Value, Visibility, + Sec ? (uint16_t)SYMBOL_SIMPLE_INDEX : (uint16_t)SHN_ABS, 0); +} + +static Error +handleUserSection(const NewSectionInfo &NewSection, + function_ref)> F) { + ArrayRef Data(reinterpret_cast( + NewSection.SectionData->getBufferStart()), + NewSection.SectionData->getBufferSize()); + return F(NewSection.SectionName, Data); +} + +// This function handles the high level operations of GNU objcopy including +// handling command line options. It's important to outline certain properties +// we expect to hold of the command line operations. Any operation that "keeps" +// should keep regardless of a remove. Additionally any removal should respect +// any previous removals. Lastly whether or not something is removed shouldn't +// depend a) on the order the options occur in or b) on some opaque priority +// system. The only priority is that keeps/copies overrule removes. +static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, + Object &Obj) { + if (Config.OutputArch) { + Obj.Machine = Config.OutputArch.getValue().EMachine; + Obj.OSABI = Config.OutputArch.getValue().OSABI; + } + + if (!Config.SplitDWO.empty() && Config.ExtractDWO) { + return Obj.removeSections( + ELFConfig.AllowBrokenLinks, + [&Obj](const SectionBase &Sec) { return onlyKeepDWOPred(Obj, Sec); }); + } + + // Dump sections before add/remove for compatibility with GNU objcopy. + for (StringRef Flag : Config.DumpSection) { + StringRef SectionName; + StringRef FileName; + std::tie(SectionName, FileName) = Flag.split('='); + if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) + return E; + } + + // It is important to remove the sections first. For example, we want to + // remove the relocation sections before removing the symbols. That allows + // us to avoid reporting the inappropriate errors about removing symbols + // named in relocations. + if (Error E = replaceAndRemoveSections(Config, ELFConfig, Obj)) + return E; + + if (Error E = updateAndRemoveSymbols(Config, ELFConfig, Obj)) + return E; + + if (!Config.SectionsToRename.empty()) { + std::vector RelocSections; + DenseSet RenamedSections; + for (SectionBase &Sec : Obj.sections()) { + auto *RelocSec = dyn_cast(&Sec); + const auto Iter = Config.SectionsToRename.find(Sec.Name); + if (Iter != Config.SectionsToRename.end()) { + const SectionRename &SR = Iter->second; + Sec.Name = std::string(SR.NewName); + if (SR.NewFlags) + setSectionFlagsAndType(Sec, SR.NewFlags.getValue()); + RenamedSections.insert(&Sec); + } else if (RelocSec && !(Sec.Flags & SHF_ALLOC)) + // Postpone processing relocation sections which are not specified in + // their explicit '--rename-section' commands until after their target + // sections are renamed. + // Dynamic relocation sections (i.e. ones with SHF_ALLOC) should be + // renamed only explicitly. Otherwise, renaming, for example, '.got.plt' + // would affect '.rela.plt', which is not desirable. + RelocSections.push_back(RelocSec); + } + + // Rename relocation sections according to their target sections. + for (RelocationSectionBase *RelocSec : RelocSections) { + auto Iter = RenamedSections.find(RelocSec->getSection()); + if (Iter != RenamedSections.end()) + RelocSec->Name = (RelocSec->getNamePrefix() + (*Iter)->Name).str(); + } + } + + // Add a prefix to allocated sections and their relocation sections. This + // should be done after renaming the section by Config.SectionToRename to + // imitate the GNU objcopy behavior. + if (!Config.AllocSectionsPrefix.empty()) { + DenseSet PrefixedSections; + for (SectionBase &Sec : Obj.sections()) { + if (Sec.Flags & SHF_ALLOC) { + Sec.Name = (Config.AllocSectionsPrefix + Sec.Name).str(); + PrefixedSections.insert(&Sec); + } else if (auto *RelocSec = dyn_cast(&Sec)) { + // Rename relocation sections associated to the allocated sections. + // For example, if we rename .text to .prefix.text, we also rename + // .rel.text to .rel.prefix.text. + // + // Dynamic relocation sections (SHT_REL[A] with SHF_ALLOC) are handled + // above, e.g., .rela.plt is renamed to .prefix.rela.plt, not + // .rela.prefix.plt since GNU objcopy does so. + const SectionBase *TargetSec = RelocSec->getSection(); + if (TargetSec && (TargetSec->Flags & SHF_ALLOC)) { + // If the relocation section comes *after* the target section, we + // don't add Config.AllocSectionsPrefix because we've already added + // the prefix to TargetSec->Name. Otherwise, if the relocation + // section comes *before* the target section, we add the prefix. + if (PrefixedSections.count(TargetSec)) + Sec.Name = (RelocSec->getNamePrefix() + TargetSec->Name).str(); + else + Sec.Name = (RelocSec->getNamePrefix() + Config.AllocSectionsPrefix + + TargetSec->Name) + .str(); + } + } + } + } + + if (!Config.SetSectionAlignment.empty()) { + for (SectionBase &Sec : Obj.sections()) { + auto I = Config.SetSectionAlignment.find(Sec.Name); + if (I != Config.SetSectionAlignment.end()) + Sec.Align = I->second; + } + } + + if (Config.OnlyKeepDebug) + for (auto &Sec : Obj.sections()) + if (Sec.Flags & SHF_ALLOC && Sec.Type != SHT_NOTE) + Sec.Type = SHT_NOBITS; + + for (const NewSectionInfo &AddedSection : Config.AddSection) { + auto AddSection = [&](StringRef Name, ArrayRef Data) { + OwnedDataSection &NewSection = + Obj.addSection(Name, Data); + if (Name.startswith(".note") && Name != ".note.GNU-stack") + NewSection.Type = SHT_NOTE; + return Error::success(); + }; + if (Error E = handleUserSection(AddedSection, AddSection)) + return E; + } + + for (const NewSectionInfo &NewSection : Config.UpdateSection) { + auto UpdateSection = [&](StringRef Name, ArrayRef Data) { + return Obj.updateSection(Name, Data); + }; + if (Error E = handleUserSection(NewSection, UpdateSection)) + return E; + } + + if (!Config.AddGnuDebugLink.empty()) + Obj.addSection(Config.AddGnuDebugLink, + Config.GnuDebugLinkCRC32); + + // If the symbol table was previously removed, we need to create a new one + // before adding new symbols. + if (!Obj.SymbolTable && !Config.SymbolsToAdd.empty()) + if (Error E = Obj.addNewSymbolTable()) + return E; + + for (const NewSymbolInfo &SI : Config.SymbolsToAdd) + addSymbol(Obj, SI, ELFConfig.NewSymbolVisibility); + + // --set-section-flags works with sections added by --add-section. + if (!Config.SetSectionFlags.empty()) { + for (auto &Sec : Obj.sections()) { + const auto Iter = Config.SetSectionFlags.find(Sec.Name); + if (Iter != Config.SetSectionFlags.end()) { + const SectionFlagsUpdate &SFU = Iter->second; + setSectionFlagsAndType(Sec, SFU.NewFlags); + } + } + } + + if (ELFConfig.EntryExpr) + Obj.Entry = ELFConfig.EntryExpr(Obj.Entry); + return Error::success(); +} + +static Error writeOutput(const CommonConfig &Config, Object &Obj, + raw_ostream &Out, ElfType OutputElfType) { + std::unique_ptr Writer = + createWriter(Config, Obj, Out, OutputElfType); + if (Error E = Writer->finalize()) + return E; + return Writer->write(); +} + +Error objcopy::elf::executeObjcopyOnIHex(const CommonConfig &Config, + const ELFConfig &ELFConfig, + MemoryBuffer &In, raw_ostream &Out) { + IHexReader Reader(&In); + Expected> Obj = Reader.create(true); + if (!Obj) + return Obj.takeError(); + + const ElfType OutputElfType = + getOutputElfType(Config.OutputArch.value_or(MachineInfo())); + if (Error E = handleArgs(Config, ELFConfig, **Obj)) + return E; + return writeOutput(Config, **Obj, Out, OutputElfType); +} + +Error objcopy::elf::executeObjcopyOnRawBinary(const CommonConfig &Config, + const ELFConfig &ELFConfig, + MemoryBuffer &In, + raw_ostream &Out) { + BinaryReader Reader(&In, ELFConfig.NewSymbolVisibility); + Expected> Obj = Reader.create(true); + if (!Obj) + return Obj.takeError(); + + // Prefer OutputArch (-O) if set, otherwise fallback to BinaryArch + // (-B). + const ElfType OutputElfType = + getOutputElfType(Config.OutputArch.value_or(MachineInfo())); + if (Error E = handleArgs(Config, ELFConfig, **Obj)) + return E; + return writeOutput(Config, **Obj, Out, OutputElfType); +} + +Error objcopy::elf::executeObjcopyOnBinary(const CommonConfig &Config, + const ELFConfig &ELFConfig, + object::ELFObjectFileBase &In, + raw_ostream &Out) { + ELFReader Reader(&In, Config.ExtractPartition); + Expected> Obj = + Reader.create(!Config.SymbolsToAdd.empty()); + if (!Obj) + return Obj.takeError(); + // Prefer OutputArch (-O) if set, otherwise infer it from the input. + const ElfType OutputElfType = + Config.OutputArch ? getOutputElfType(Config.OutputArch.getValue()) + : getOutputElfType(In); + + if (Error E = handleArgs(Config, ELFConfig, **Obj)) + return createFileError(Config.InputFilename, std::move(E)); + + if (Error E = writeOutput(Config, **Obj, Out, OutputElfType)) + return createFileError(Config.InputFilename, std::move(E)); + + return Error::success(); +} diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp new file mode 100644 index 000000000000..b241bd817ff5 --- /dev/null +++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp @@ -0,0 +1,2795 @@ +//===- ELFObject.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ELFObject.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Path.h" +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::objcopy::elf; +using namespace llvm::object; + +template void ELFWriter::writePhdr(const Segment &Seg) { + uint8_t *B = reinterpret_cast(Buf->getBufferStart()) + + Obj.ProgramHdrSegment.Offset + Seg.Index * sizeof(Elf_Phdr); + Elf_Phdr &Phdr = *reinterpret_cast(B); + Phdr.p_type = Seg.Type; + Phdr.p_flags = Seg.Flags; + Phdr.p_offset = Seg.Offset; + Phdr.p_vaddr = Seg.VAddr; + Phdr.p_paddr = Seg.PAddr; + Phdr.p_filesz = Seg.FileSize; + Phdr.p_memsz = Seg.MemSize; + Phdr.p_align = Seg.Align; +} + +Error SectionBase::removeSectionReferences( + bool, function_ref) { + return Error::success(); +} + +Error SectionBase::removeSymbols(function_ref) { + return Error::success(); +} + +Error SectionBase::initialize(SectionTableRef) { return Error::success(); } +void SectionBase::finalize() {} +void SectionBase::markSymbols() {} +void SectionBase::replaceSectionReferences( + const DenseMap &) {} +void SectionBase::onRemove() {} + +template void ELFWriter::writeShdr(const SectionBase &Sec) { + uint8_t *B = + reinterpret_cast(Buf->getBufferStart()) + Sec.HeaderOffset; + Elf_Shdr &Shdr = *reinterpret_cast(B); + Shdr.sh_name = Sec.NameIndex; + Shdr.sh_type = Sec.Type; + Shdr.sh_flags = Sec.Flags; + Shdr.sh_addr = Sec.Addr; + Shdr.sh_offset = Sec.Offset; + Shdr.sh_size = Sec.Size; + Shdr.sh_link = Sec.Link; + Shdr.sh_info = Sec.Info; + Shdr.sh_addralign = Sec.Align; + Shdr.sh_entsize = Sec.EntrySize; +} + +template Error ELFSectionSizer::visit(Section &) { + return Error::success(); +} + +template Error ELFSectionSizer::visit(OwnedDataSection &) { + return Error::success(); +} + +template Error ELFSectionSizer::visit(StringTableSection &) { + return Error::success(); +} + +template +Error ELFSectionSizer::visit(DynamicRelocationSection &) { + return Error::success(); +} + +template +Error ELFSectionSizer::visit(SymbolTableSection &Sec) { + Sec.EntrySize = sizeof(Elf_Sym); + Sec.Size = Sec.Symbols.size() * Sec.EntrySize; + // Align to the largest field in Elf_Sym. + Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word); + return Error::success(); +} + +template +Error ELFSectionSizer::visit(RelocationSection &Sec) { + Sec.EntrySize = Sec.Type == SHT_REL ? sizeof(Elf_Rel) : sizeof(Elf_Rela); + Sec.Size = Sec.Relocations.size() * Sec.EntrySize; + // Align to the largest field in Elf_Rel(a). + Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word); + return Error::success(); +} + +template +Error ELFSectionSizer::visit(GnuDebugLinkSection &) { + return Error::success(); +} + +template Error ELFSectionSizer::visit(GroupSection &Sec) { + Sec.Size = sizeof(Elf_Word) + Sec.GroupMembers.size() * sizeof(Elf_Word); + return Error::success(); +} + +template +Error ELFSectionSizer::visit(SectionIndexSection &) { + return Error::success(); +} + +template Error ELFSectionSizer::visit(CompressedSection &) { + return Error::success(); +} + +template +Error ELFSectionSizer::visit(DecompressedSection &) { + return Error::success(); +} + +Error BinarySectionWriter::visit(const SectionIndexSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write symbol section index table '" + + Sec.Name + "' "); +} + +Error BinarySectionWriter::visit(const SymbolTableSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write symbol table '" + Sec.Name + + "' out to binary"); +} + +Error BinarySectionWriter::visit(const RelocationSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write relocation section '" + Sec.Name + + "' out to binary"); +} + +Error BinarySectionWriter::visit(const GnuDebugLinkSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write '" + Sec.Name + "' out to binary"); +} + +Error BinarySectionWriter::visit(const GroupSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write '" + Sec.Name + "' out to binary"); +} + +Error SectionWriter::visit(const Section &Sec) { + if (Sec.Type != SHT_NOBITS) + llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset); + + return Error::success(); +} + +static bool addressOverflows32bit(uint64_t Addr) { + // Sign extended 32 bit addresses (e.g 0xFFFFFFFF80000000) are ok + return Addr > UINT32_MAX && Addr + 0x80000000 > UINT32_MAX; +} + +template static T checkedGetHex(StringRef S) { + T Value; + bool Fail = S.getAsInteger(16, Value); + assert(!Fail); + (void)Fail; + return Value; +} + +// Fills exactly Len bytes of buffer with hexadecimal characters +// representing value 'X' +template +static Iterator toHexStr(T X, Iterator It, size_t Len) { + // Fill range with '0' + std::fill(It, It + Len, '0'); + + for (long I = Len - 1; I >= 0; --I) { + unsigned char Mod = static_cast(X) & 15; + *(It + I) = hexdigit(Mod, false); + X >>= 4; + } + assert(X == 0); + return It + Len; +} + +uint8_t IHexRecord::getChecksum(StringRef S) { + assert((S.size() & 1) == 0); + uint8_t Checksum = 0; + while (!S.empty()) { + Checksum += checkedGetHex(S.take_front(2)); + S = S.drop_front(2); + } + return -Checksum; +} + +IHexLineData IHexRecord::getLine(uint8_t Type, uint16_t Addr, + ArrayRef Data) { + IHexLineData Line(getLineLength(Data.size())); + assert(Line.size()); + auto Iter = Line.begin(); + *Iter++ = ':'; + Iter = toHexStr(Data.size(), Iter, 2); + Iter = toHexStr(Addr, Iter, 4); + Iter = toHexStr(Type, Iter, 2); + for (uint8_t X : Data) + Iter = toHexStr(X, Iter, 2); + StringRef S(Line.data() + 1, std::distance(Line.begin() + 1, Iter)); + Iter = toHexStr(getChecksum(S), Iter, 2); + *Iter++ = '\r'; + *Iter++ = '\n'; + assert(Iter == Line.end()); + return Line; +} + +static Error checkRecord(const IHexRecord &R) { + switch (R.Type) { + case IHexRecord::Data: + if (R.HexData.size() == 0) + return createStringError( + errc::invalid_argument, + "zero data length is not allowed for data records"); + break; + case IHexRecord::EndOfFile: + break; + case IHexRecord::SegmentAddr: + // 20-bit segment address. Data length must be 2 bytes + // (4 bytes in hex) + if (R.HexData.size() != 4) + return createStringError( + errc::invalid_argument, + "segment address data should be 2 bytes in size"); + break; + case IHexRecord::StartAddr80x86: + case IHexRecord::StartAddr: + if (R.HexData.size() != 8) + return createStringError(errc::invalid_argument, + "start address data should be 4 bytes in size"); + // According to Intel HEX specification '03' record + // only specifies the code address within the 20-bit + // segmented address space of the 8086/80186. This + // means 12 high order bits should be zeroes. + if (R.Type == IHexRecord::StartAddr80x86 && + R.HexData.take_front(3) != "000") + return createStringError(errc::invalid_argument, + "start address exceeds 20 bit for 80x86"); + break; + case IHexRecord::ExtendedAddr: + // 16-31 bits of linear base address + if (R.HexData.size() != 4) + return createStringError( + errc::invalid_argument, + "extended address data should be 2 bytes in size"); + break; + default: + // Unknown record type + return createStringError(errc::invalid_argument, "unknown record type: %u", + static_cast(R.Type)); + } + return Error::success(); +} + +// Checks that IHEX line contains valid characters. +// This allows converting hexadecimal data to integers +// without extra verification. +static Error checkChars(StringRef Line) { + assert(!Line.empty()); + if (Line[0] != ':') + return createStringError(errc::invalid_argument, + "missing ':' in the beginning of line."); + + for (size_t Pos = 1; Pos < Line.size(); ++Pos) + if (hexDigitValue(Line[Pos]) == -1U) + return createStringError(errc::invalid_argument, + "invalid character at position %zu.", Pos + 1); + return Error::success(); +} + +Expected IHexRecord::parse(StringRef Line) { + assert(!Line.empty()); + + // ':' + Length + Address + Type + Checksum with empty data ':LLAAAATTCC' + if (Line.size() < 11) + return createStringError(errc::invalid_argument, + "line is too short: %zu chars.", Line.size()); + + if (Error E = checkChars(Line)) + return std::move(E); + + IHexRecord Rec; + size_t DataLen = checkedGetHex(Line.substr(1, 2)); + if (Line.size() != getLength(DataLen)) + return createStringError(errc::invalid_argument, + "invalid line length %zu (should be %zu)", + Line.size(), getLength(DataLen)); + + Rec.Addr = checkedGetHex(Line.substr(3, 4)); + Rec.Type = checkedGetHex(Line.substr(7, 2)); + Rec.HexData = Line.substr(9, DataLen * 2); + + if (getChecksum(Line.drop_front(1)) != 0) + return createStringError(errc::invalid_argument, "incorrect checksum."); + if (Error E = checkRecord(Rec)) + return std::move(E); + return Rec; +} + +static uint64_t sectionPhysicalAddr(const SectionBase *Sec) { + Segment *Seg = Sec->ParentSegment; + if (Seg && Seg->Type != ELF::PT_LOAD) + Seg = nullptr; + return Seg ? Seg->PAddr + Sec->OriginalOffset - Seg->OriginalOffset + : Sec->Addr; +} + +void IHexSectionWriterBase::writeSection(const SectionBase *Sec, + ArrayRef Data) { + assert(Data.size() == Sec->Size); + const uint32_t ChunkSize = 16; + uint32_t Addr = sectionPhysicalAddr(Sec) & 0xFFFFFFFFU; + while (!Data.empty()) { + uint64_t DataSize = std::min(Data.size(), ChunkSize); + if (Addr > SegmentAddr + BaseAddr + 0xFFFFU) { + if (Addr > 0xFFFFFU) { + // Write extended address record, zeroing segment address + // if needed. + if (SegmentAddr != 0) + SegmentAddr = writeSegmentAddr(0U); + BaseAddr = writeBaseAddr(Addr); + } else { + // We can still remain 16-bit + SegmentAddr = writeSegmentAddr(Addr); + } + } + uint64_t SegOffset = Addr - BaseAddr - SegmentAddr; + assert(SegOffset <= 0xFFFFU); + DataSize = std::min(DataSize, 0x10000U - SegOffset); + writeData(0, SegOffset, Data.take_front(DataSize)); + Addr += DataSize; + Data = Data.drop_front(DataSize); + } +} + +uint64_t IHexSectionWriterBase::writeSegmentAddr(uint64_t Addr) { + assert(Addr <= 0xFFFFFU); + uint8_t Data[] = {static_cast((Addr & 0xF0000U) >> 12), 0}; + writeData(2, 0, Data); + return Addr & 0xF0000U; +} + +uint64_t IHexSectionWriterBase::writeBaseAddr(uint64_t Addr) { + assert(Addr <= 0xFFFFFFFFU); + uint64_t Base = Addr & 0xFFFF0000U; + uint8_t Data[] = {static_cast(Base >> 24), + static_cast((Base >> 16) & 0xFF)}; + writeData(4, 0, Data); + return Base; +} + +void IHexSectionWriterBase::writeData(uint8_t, uint16_t, + ArrayRef Data) { + Offset += IHexRecord::getLineLength(Data.size()); +} + +Error IHexSectionWriterBase::visit(const Section &Sec) { + writeSection(&Sec, Sec.Contents); + return Error::success(); +} + +Error IHexSectionWriterBase::visit(const OwnedDataSection &Sec) { + writeSection(&Sec, Sec.Data); + return Error::success(); +} + +Error IHexSectionWriterBase::visit(const StringTableSection &Sec) { + // Check that sizer has already done its work + assert(Sec.Size == Sec.StrTabBuilder.getSize()); + // We are free to pass an invalid pointer to writeSection as long + // as we don't actually write any data. The real writer class has + // to override this method . + writeSection(&Sec, {nullptr, static_cast(Sec.Size)}); + return Error::success(); +} + +Error IHexSectionWriterBase::visit(const DynamicRelocationSection &Sec) { + writeSection(&Sec, Sec.Contents); + return Error::success(); +} + +void IHexSectionWriter::writeData(uint8_t Type, uint16_t Addr, + ArrayRef Data) { + IHexLineData HexData = IHexRecord::getLine(Type, Addr, Data); + memcpy(Out.getBufferStart() + Offset, HexData.data(), HexData.size()); + Offset += HexData.size(); +} + +Error IHexSectionWriter::visit(const StringTableSection &Sec) { + assert(Sec.Size == Sec.StrTabBuilder.getSize()); + std::vector Data(Sec.Size); + Sec.StrTabBuilder.write(Data.data()); + writeSection(&Sec, Data); + return Error::success(); +} + +Error Section::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error Section::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +Error SectionWriter::visit(const OwnedDataSection &Sec) { + llvm::copy(Sec.Data, Out.getBufferStart() + Sec.Offset); + return Error::success(); +} + +static constexpr std::array ZlibGnuMagic = {{'Z', 'L', 'I', 'B'}}; + +static bool isDataGnuCompressed(ArrayRef Data) { + return Data.size() > ZlibGnuMagic.size() && + std::equal(ZlibGnuMagic.begin(), ZlibGnuMagic.end(), Data.data()); +} + +template +static std::tuple +getDecompressedSizeAndAlignment(ArrayRef Data) { + const bool IsGnuDebug = isDataGnuCompressed(Data); + const uint64_t DecompressedSize = + IsGnuDebug + ? support::endian::read64be(Data.data() + ZlibGnuMagic.size()) + : reinterpret_cast *>(Data.data())->ch_size; + const uint64_t DecompressedAlign = + IsGnuDebug ? 1 + : reinterpret_cast *>(Data.data()) + ->ch_addralign; + + return std::make_tuple(DecompressedSize, DecompressedAlign); +} + +template +Error ELFSectionWriter::visit(const DecompressedSection &Sec) { + const size_t DataOffset = isDataGnuCompressed(Sec.OriginalData) + ? (ZlibGnuMagic.size() + sizeof(Sec.Size)) + : sizeof(Elf_Chdr_Impl); + + StringRef CompressedContent( + reinterpret_cast(Sec.OriginalData.data()) + DataOffset, + Sec.OriginalData.size() - DataOffset); + + SmallVector DecompressedContent; + if (Error Err = zlib::uncompress(CompressedContent, DecompressedContent, + static_cast(Sec.Size))) + return createStringError(errc::invalid_argument, + "'" + Sec.Name + "': " + toString(std::move(Err))); + + uint8_t *Buf = reinterpret_cast(Out.getBufferStart()) + Sec.Offset; + std::copy(DecompressedContent.begin(), DecompressedContent.end(), Buf); + + return Error::success(); +} + +Error BinarySectionWriter::visit(const DecompressedSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write compressed section '" + Sec.Name + + "' "); +} + +Error DecompressedSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error DecompressedSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +Error OwnedDataSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error OwnedDataSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +void OwnedDataSection::appendHexData(StringRef HexData) { + assert((HexData.size() & 1) == 0); + while (!HexData.empty()) { + Data.push_back(checkedGetHex(HexData.take_front(2))); + HexData = HexData.drop_front(2); + } + Size = Data.size(); +} + +Error BinarySectionWriter::visit(const CompressedSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write compressed section '" + Sec.Name + + "' "); +} + +template +Error ELFSectionWriter::visit(const CompressedSection &Sec) { + uint8_t *Buf = reinterpret_cast(Out.getBufferStart()) + Sec.Offset; + Elf_Chdr_Impl Chdr; + switch (Sec.CompressionType) { + case DebugCompressionType::None: + std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf); + return Error::success(); + case DebugCompressionType::GNU: + llvm_unreachable("unexpected zlib-gnu"); + break; + case DebugCompressionType::Z: + Chdr.ch_type = ELF::ELFCOMPRESS_ZLIB; + break; + } + Chdr.ch_size = Sec.DecompressedSize; + Chdr.ch_addralign = Sec.DecompressedAlign; + memcpy(Buf, &Chdr, sizeof(Chdr)); + Buf += sizeof(Chdr); + + std::copy(Sec.CompressedData.begin(), Sec.CompressedData.end(), Buf); + return Error::success(); +} + +CompressedSection::CompressedSection(const SectionBase &Sec, + DebugCompressionType CompressionType) + : SectionBase(Sec), CompressionType(CompressionType), + DecompressedSize(Sec.OriginalData.size()), DecompressedAlign(Sec.Align) { + zlib::compress(StringRef(reinterpret_cast(OriginalData.data()), + OriginalData.size()), + CompressedData); + + assert(CompressionType != DebugCompressionType::None); + Flags |= ELF::SHF_COMPRESSED; + size_t ChdrSize = + std::max(std::max(sizeof(object::Elf_Chdr_Impl), + sizeof(object::Elf_Chdr_Impl)), + std::max(sizeof(object::Elf_Chdr_Impl), + sizeof(object::Elf_Chdr_Impl))); + Size = ChdrSize + CompressedData.size(); + Align = 8; +} + +CompressedSection::CompressedSection(ArrayRef CompressedData, + uint64_t DecompressedSize, + uint64_t DecompressedAlign) + : CompressionType(DebugCompressionType::None), + DecompressedSize(DecompressedSize), DecompressedAlign(DecompressedAlign) { + OriginalData = CompressedData; +} + +Error CompressedSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error CompressedSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +void StringTableSection::addString(StringRef Name) { StrTabBuilder.add(Name); } + +uint32_t StringTableSection::findIndex(StringRef Name) const { + return StrTabBuilder.getOffset(Name); +} + +void StringTableSection::prepareForLayout() { + StrTabBuilder.finalize(); + Size = StrTabBuilder.getSize(); +} + +Error SectionWriter::visit(const StringTableSection &Sec) { + Sec.StrTabBuilder.write(reinterpret_cast(Out.getBufferStart()) + + Sec.Offset); + return Error::success(); +} + +Error StringTableSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error StringTableSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +template +Error ELFSectionWriter::visit(const SectionIndexSection &Sec) { + uint8_t *Buf = reinterpret_cast(Out.getBufferStart()) + Sec.Offset; + llvm::copy(Sec.Indexes, reinterpret_cast(Buf)); + return Error::success(); +} + +Error SectionIndexSection::initialize(SectionTableRef SecTable) { + Size = 0; + Expected Sec = + SecTable.getSectionOfType( + Link, + "Link field value " + Twine(Link) + " in section " + Name + + " is invalid", + "Link field value " + Twine(Link) + " in section " + Name + + " is not a symbol table"); + if (!Sec) + return Sec.takeError(); + + setSymTab(*Sec); + Symbols->setShndxTable(this); + return Error::success(); +} + +void SectionIndexSection::finalize() { Link = Symbols->Index; } + +Error SectionIndexSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error SectionIndexSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) { + switch (Index) { + case SHN_ABS: + case SHN_COMMON: + return true; + } + + if (Machine == EM_AMDGPU) { + return Index == SHN_AMDGPU_LDS; + } + + if (Machine == EM_MIPS) { + switch (Index) { + case SHN_MIPS_ACOMMON: + case SHN_MIPS_SCOMMON: + case SHN_MIPS_SUNDEFINED: + return true; + } + } + + if (Machine == EM_HEXAGON) { + switch (Index) { + case SHN_HEXAGON_SCOMMON: + case SHN_HEXAGON_SCOMMON_1: + case SHN_HEXAGON_SCOMMON_2: + case SHN_HEXAGON_SCOMMON_4: + case SHN_HEXAGON_SCOMMON_8: + return true; + } + } + return false; +} + +// Large indexes force us to clarify exactly what this function should do. This +// function should return the value that will appear in st_shndx when written +// out. +uint16_t Symbol::getShndx() const { + if (DefinedIn != nullptr) { + if (DefinedIn->Index >= SHN_LORESERVE) + return SHN_XINDEX; + return DefinedIn->Index; + } + + if (ShndxType == SYMBOL_SIMPLE_INDEX) { + // This means that we don't have a defined section but we do need to + // output a legitimate section index. + return SHN_UNDEF; + } + + assert(ShndxType == SYMBOL_ABS || ShndxType == SYMBOL_COMMON || + (ShndxType >= SYMBOL_LOPROC && ShndxType <= SYMBOL_HIPROC) || + (ShndxType >= SYMBOL_LOOS && ShndxType <= SYMBOL_HIOS)); + return static_cast(ShndxType); +} + +bool Symbol::isCommon() const { return getShndx() == SHN_COMMON; } + +void SymbolTableSection::assignIndices() { + uint32_t Index = 0; + for (auto &Sym : Symbols) + Sym->Index = Index++; +} + +void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type, + SectionBase *DefinedIn, uint64_t Value, + uint8_t Visibility, uint16_t Shndx, + uint64_t SymbolSize) { + Symbol Sym; + Sym.Name = Name.str(); + Sym.Binding = Bind; + Sym.Type = Type; + Sym.DefinedIn = DefinedIn; + if (DefinedIn != nullptr) + DefinedIn->HasSymbol = true; + if (DefinedIn == nullptr) { + if (Shndx >= SHN_LORESERVE) + Sym.ShndxType = static_cast(Shndx); + else + Sym.ShndxType = SYMBOL_SIMPLE_INDEX; + } + Sym.Value = Value; + Sym.Visibility = Visibility; + Sym.Size = SymbolSize; + Sym.Index = Symbols.size(); + Symbols.emplace_back(std::make_unique(Sym)); + Size += this->EntrySize; +} + +Error SymbolTableSection::removeSectionReferences( + bool AllowBrokenLinks, function_ref ToRemove) { + if (ToRemove(SectionIndexTable)) + SectionIndexTable = nullptr; + if (ToRemove(SymbolNames)) { + if (!AllowBrokenLinks) + return createStringError( + llvm::errc::invalid_argument, + "string table '%s' cannot be removed because it is " + "referenced by the symbol table '%s'", + SymbolNames->Name.data(), this->Name.data()); + SymbolNames = nullptr; + } + return removeSymbols( + [ToRemove](const Symbol &Sym) { return ToRemove(Sym.DefinedIn); }); +} + +void SymbolTableSection::updateSymbols(function_ref Callable) { + for (SymPtr &Sym : llvm::drop_begin(Symbols)) + Callable(*Sym); + std::stable_partition( + std::begin(Symbols), std::end(Symbols), + [](const SymPtr &Sym) { return Sym->Binding == STB_LOCAL; }); + assignIndices(); +} + +Error SymbolTableSection::removeSymbols( + function_ref ToRemove) { + Symbols.erase( + std::remove_if(std::begin(Symbols) + 1, std::end(Symbols), + [ToRemove](const SymPtr &Sym) { return ToRemove(*Sym); }), + std::end(Symbols)); + Size = Symbols.size() * EntrySize; + assignIndices(); + return Error::success(); +} + +void SymbolTableSection::replaceSectionReferences( + const DenseMap &FromTo) { + for (std::unique_ptr &Sym : Symbols) + if (SectionBase *To = FromTo.lookup(Sym->DefinedIn)) + Sym->DefinedIn = To; +} + +Error SymbolTableSection::initialize(SectionTableRef SecTable) { + Size = 0; + Expected Sec = + SecTable.getSectionOfType( + Link, + "Symbol table has link index of " + Twine(Link) + + " which is not a valid index", + "Symbol table has link index of " + Twine(Link) + + " which is not a string table"); + if (!Sec) + return Sec.takeError(); + + setStrTab(*Sec); + return Error::success(); +} + +void SymbolTableSection::finalize() { + uint32_t MaxLocalIndex = 0; + for (std::unique_ptr &Sym : Symbols) { + Sym->NameIndex = + SymbolNames == nullptr ? 0 : SymbolNames->findIndex(Sym->Name); + if (Sym->Binding == STB_LOCAL) + MaxLocalIndex = std::max(MaxLocalIndex, Sym->Index); + } + // Now we need to set the Link and Info fields. + Link = SymbolNames == nullptr ? 0 : SymbolNames->Index; + Info = MaxLocalIndex + 1; +} + +void SymbolTableSection::prepareForLayout() { + // Reserve proper amount of space in section index table, so we can + // layout sections correctly. We will fill the table with correct + // indexes later in fillShdnxTable. + if (SectionIndexTable) + SectionIndexTable->reserve(Symbols.size()); + + // Add all of our strings to SymbolNames so that SymbolNames has the right + // size before layout is decided. + // If the symbol names section has been removed, don't try to add strings to + // the table. + if (SymbolNames != nullptr) + for (std::unique_ptr &Sym : Symbols) + SymbolNames->addString(Sym->Name); +} + +void SymbolTableSection::fillShndxTable() { + if (SectionIndexTable == nullptr) + return; + // Fill section index table with real section indexes. This function must + // be called after assignOffsets. + for (const std::unique_ptr &Sym : Symbols) { + if (Sym->DefinedIn != nullptr && Sym->DefinedIn->Index >= SHN_LORESERVE) + SectionIndexTable->addIndex(Sym->DefinedIn->Index); + else + SectionIndexTable->addIndex(SHN_UNDEF); + } +} + +Expected +SymbolTableSection::getSymbolByIndex(uint32_t Index) const { + if (Symbols.size() <= Index) + return createStringError(errc::invalid_argument, + "invalid symbol index: " + Twine(Index)); + return Symbols[Index].get(); +} + +Expected SymbolTableSection::getSymbolByIndex(uint32_t Index) { + Expected Sym = + static_cast(this)->getSymbolByIndex(Index); + if (!Sym) + return Sym.takeError(); + + return const_cast(*Sym); +} + +template +Error ELFSectionWriter::visit(const SymbolTableSection &Sec) { + Elf_Sym *Sym = reinterpret_cast(Out.getBufferStart() + Sec.Offset); + // Loop though symbols setting each entry of the symbol table. + for (const std::unique_ptr &Symbol : Sec.Symbols) { + Sym->st_name = Symbol->NameIndex; + Sym->st_value = Symbol->Value; + Sym->st_size = Symbol->Size; + Sym->st_other = Symbol->Visibility; + Sym->setBinding(Symbol->Binding); + Sym->setType(Symbol->Type); + Sym->st_shndx = Symbol->getShndx(); + ++Sym; + } + return Error::success(); +} + +Error SymbolTableSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error SymbolTableSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +StringRef RelocationSectionBase::getNamePrefix() const { + switch (Type) { + case SHT_REL: + return ".rel"; + case SHT_RELA: + return ".rela"; + default: + llvm_unreachable("not a relocation section"); + } +} + +Error RelocationSection::removeSectionReferences( + bool AllowBrokenLinks, function_ref ToRemove) { + if (ToRemove(Symbols)) { + if (!AllowBrokenLinks) + return createStringError( + llvm::errc::invalid_argument, + "symbol table '%s' cannot be removed because it is " + "referenced by the relocation section '%s'", + Symbols->Name.data(), this->Name.data()); + Symbols = nullptr; + } + + for (const Relocation &R : Relocations) { + if (!R.RelocSymbol || !R.RelocSymbol->DefinedIn || + !ToRemove(R.RelocSymbol->DefinedIn)) + continue; + return createStringError(llvm::errc::invalid_argument, + "section '%s' cannot be removed: (%s+0x%" PRIx64 + ") has relocation against symbol '%s'", + R.RelocSymbol->DefinedIn->Name.data(), + SecToApplyRel->Name.data(), R.Offset, + R.RelocSymbol->Name.c_str()); + } + + return Error::success(); +} + +template +Error RelocSectionWithSymtabBase::initialize( + SectionTableRef SecTable) { + if (Link != SHN_UNDEF) { + Expected Sec = SecTable.getSectionOfType( + Link, + "Link field value " + Twine(Link) + " in section " + Name + + " is invalid", + "Link field value " + Twine(Link) + " in section " + Name + + " is not a symbol table"); + if (!Sec) + return Sec.takeError(); + + setSymTab(*Sec); + } + + if (Info != SHN_UNDEF) { + Expected Sec = + SecTable.getSection(Info, "Info field value " + Twine(Info) + + " in section " + Name + " is invalid"); + if (!Sec) + return Sec.takeError(); + + setSection(*Sec); + } else + setSection(nullptr); + + return Error::success(); +} + +template +void RelocSectionWithSymtabBase::finalize() { + this->Link = Symbols ? Symbols->Index : 0; + + if (SecToApplyRel != nullptr) + this->Info = SecToApplyRel->Index; +} + +template +static void setAddend(Elf_Rel_Impl &, uint64_t) {} + +template +static void setAddend(Elf_Rel_Impl &Rela, uint64_t Addend) { + Rela.r_addend = Addend; +} + +template +static void writeRel(const RelRange &Relocations, T *Buf, bool IsMips64EL) { + for (const auto &Reloc : Relocations) { + Buf->r_offset = Reloc.Offset; + setAddend(*Buf, Reloc.Addend); + Buf->setSymbolAndType(Reloc.RelocSymbol ? Reloc.RelocSymbol->Index : 0, + Reloc.Type, IsMips64EL); + ++Buf; + } +} + +template +Error ELFSectionWriter::visit(const RelocationSection &Sec) { + uint8_t *Buf = reinterpret_cast(Out.getBufferStart()) + Sec.Offset; + if (Sec.Type == SHT_REL) + writeRel(Sec.Relocations, reinterpret_cast(Buf), + Sec.getObject().IsMips64EL); + else + writeRel(Sec.Relocations, reinterpret_cast(Buf), + Sec.getObject().IsMips64EL); + return Error::success(); +} + +Error RelocationSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error RelocationSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +Error RelocationSection::removeSymbols( + function_ref ToRemove) { + for (const Relocation &Reloc : Relocations) + if (Reloc.RelocSymbol && ToRemove(*Reloc.RelocSymbol)) + return createStringError( + llvm::errc::invalid_argument, + "not stripping symbol '%s' because it is named in a relocation", + Reloc.RelocSymbol->Name.data()); + return Error::success(); +} + +void RelocationSection::markSymbols() { + for (const Relocation &Reloc : Relocations) + if (Reloc.RelocSymbol) + Reloc.RelocSymbol->Referenced = true; +} + +void RelocationSection::replaceSectionReferences( + const DenseMap &FromTo) { + // Update the target section if it was replaced. + if (SectionBase *To = FromTo.lookup(SecToApplyRel)) + SecToApplyRel = To; +} + +Error SectionWriter::visit(const DynamicRelocationSection &Sec) { + llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset); + return Error::success(); +} + +Error DynamicRelocationSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error DynamicRelocationSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +Error DynamicRelocationSection::removeSectionReferences( + bool AllowBrokenLinks, function_ref ToRemove) { + if (ToRemove(Symbols)) { + if (!AllowBrokenLinks) + return createStringError( + llvm::errc::invalid_argument, + "symbol table '%s' cannot be removed because it is " + "referenced by the relocation section '%s'", + Symbols->Name.data(), this->Name.data()); + Symbols = nullptr; + } + + // SecToApplyRel contains a section referenced by sh_info field. It keeps + // a section to which the relocation section applies. When we remove any + // sections we also remove their relocation sections. Since we do that much + // earlier, this assert should never be triggered. + assert(!SecToApplyRel || !ToRemove(SecToApplyRel)); + return Error::success(); +} + +Error Section::removeSectionReferences( + bool AllowBrokenDependency, + function_ref ToRemove) { + if (ToRemove(LinkSection)) { + if (!AllowBrokenDependency) + return createStringError(llvm::errc::invalid_argument, + "section '%s' cannot be removed because it is " + "referenced by the section '%s'", + LinkSection->Name.data(), this->Name.data()); + LinkSection = nullptr; + } + return Error::success(); +} + +void GroupSection::finalize() { + this->Info = Sym ? Sym->Index : 0; + this->Link = SymTab ? SymTab->Index : 0; + // Linker deduplication for GRP_COMDAT is based on Sym->Name. The local/global + // status is not part of the equation. If Sym is localized, the intention is + // likely to make the group fully localized. Drop GRP_COMDAT to suppress + // deduplication. See https://groups.google.com/g/generic-abi/c/2X6mR-s2zoc + if ((FlagWord & GRP_COMDAT) && Sym && Sym->Binding == STB_LOCAL) + this->FlagWord &= ~GRP_COMDAT; +} + +Error GroupSection::removeSectionReferences( + bool AllowBrokenLinks, function_ref ToRemove) { + if (ToRemove(SymTab)) { + if (!AllowBrokenLinks) + return createStringError( + llvm::errc::invalid_argument, + "section '.symtab' cannot be removed because it is " + "referenced by the group section '%s'", + this->Name.data()); + SymTab = nullptr; + Sym = nullptr; + } + llvm::erase_if(GroupMembers, ToRemove); + return Error::success(); +} + +Error GroupSection::removeSymbols(function_ref ToRemove) { + if (ToRemove(*Sym)) + return createStringError(llvm::errc::invalid_argument, + "symbol '%s' cannot be removed because it is " + "referenced by the section '%s[%d]'", + Sym->Name.data(), this->Name.data(), this->Index); + return Error::success(); +} + +void GroupSection::markSymbols() { + if (Sym) + Sym->Referenced = true; +} + +void GroupSection::replaceSectionReferences( + const DenseMap &FromTo) { + for (SectionBase *&Sec : GroupMembers) + if (SectionBase *To = FromTo.lookup(Sec)) + Sec = To; +} + +void GroupSection::onRemove() { + // As the header section of the group is removed, drop the Group flag in its + // former members. + for (SectionBase *Sec : GroupMembers) + Sec->Flags &= ~SHF_GROUP; +} + +Error Section::initialize(SectionTableRef SecTable) { + if (Link == ELF::SHN_UNDEF) + return Error::success(); + + Expected Sec = + SecTable.getSection(Link, "Link field value " + Twine(Link) + + " in section " + Name + " is invalid"); + if (!Sec) + return Sec.takeError(); + + LinkSection = *Sec; + + if (LinkSection->Type == ELF::SHT_SYMTAB) + LinkSection = nullptr; + + return Error::success(); +} + +void Section::finalize() { this->Link = LinkSection ? LinkSection->Index : 0; } + +void GnuDebugLinkSection::init(StringRef File) { + FileName = sys::path::filename(File); + // The format for the .gnu_debuglink starts with the file name and is + // followed by a null terminator and then the CRC32 of the file. The CRC32 + // should be 4 byte aligned. So we add the FileName size, a 1 for the null + // byte, and then finally push the size to alignment and add 4. + Size = alignTo(FileName.size() + 1, 4) + 4; + // The CRC32 will only be aligned if we align the whole section. + Align = 4; + Type = OriginalType = ELF::SHT_PROGBITS; + Name = ".gnu_debuglink"; + // For sections not found in segments, OriginalOffset is only used to + // establish the order that sections should go in. By using the maximum + // possible offset we cause this section to wind up at the end. + OriginalOffset = std::numeric_limits::max(); +} + +GnuDebugLinkSection::GnuDebugLinkSection(StringRef File, + uint32_t PrecomputedCRC) + : FileName(File), CRC32(PrecomputedCRC) { + init(File); +} + +template +Error ELFSectionWriter::visit(const GnuDebugLinkSection &Sec) { + unsigned char *Buf = + reinterpret_cast(Out.getBufferStart()) + Sec.Offset; + Elf_Word *CRC = + reinterpret_cast(Buf + Sec.Size - sizeof(Elf_Word)); + *CRC = Sec.CRC32; + llvm::copy(Sec.FileName, Buf); + return Error::success(); +} + +Error GnuDebugLinkSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error GnuDebugLinkSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +template +Error ELFSectionWriter::visit(const GroupSection &Sec) { + ELF::Elf32_Word *Buf = + reinterpret_cast(Out.getBufferStart() + Sec.Offset); + support::endian::write32(Buf++, Sec.FlagWord); + for (SectionBase *S : Sec.GroupMembers) + support::endian::write32(Buf++, S->Index); + return Error::success(); +} + +Error GroupSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error GroupSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +// Returns true IFF a section is wholly inside the range of a segment +static bool sectionWithinSegment(const SectionBase &Sec, const Segment &Seg) { + // If a section is empty it should be treated like it has a size of 1. This is + // to clarify the case when an empty section lies on a boundary between two + // segments and ensures that the section "belongs" to the second segment and + // not the first. + uint64_t SecSize = Sec.Size ? Sec.Size : 1; + + // Ignore just added sections. + if (Sec.OriginalOffset == std::numeric_limits::max()) + return false; + + if (Sec.Type == SHT_NOBITS) { + if (!(Sec.Flags & SHF_ALLOC)) + return false; + + bool SectionIsTLS = Sec.Flags & SHF_TLS; + bool SegmentIsTLS = Seg.Type == PT_TLS; + if (SectionIsTLS != SegmentIsTLS) + return false; + + return Seg.VAddr <= Sec.Addr && + Seg.VAddr + Seg.MemSize >= Sec.Addr + SecSize; + } + + return Seg.Offset <= Sec.OriginalOffset && + Seg.Offset + Seg.FileSize >= Sec.OriginalOffset + SecSize; +} + +// Returns true IFF a segment's original offset is inside of another segment's +// range. +static bool segmentOverlapsSegment(const Segment &Child, + const Segment &Parent) { + + return Parent.OriginalOffset <= Child.OriginalOffset && + Parent.OriginalOffset + Parent.FileSize > Child.OriginalOffset; +} + +static bool compareSegmentsByOffset(const Segment *A, const Segment *B) { + // Any segment without a parent segment should come before a segment + // that has a parent segment. + if (A->OriginalOffset < B->OriginalOffset) + return true; + if (A->OriginalOffset > B->OriginalOffset) + return false; + return A->Index < B->Index; +} + +void BasicELFBuilder::initFileHeader() { + Obj->Flags = 0x0; + Obj->Type = ET_REL; + Obj->OSABI = ELFOSABI_NONE; + Obj->ABIVersion = 0; + Obj->Entry = 0x0; + Obj->Machine = EM_NONE; + Obj->Version = 1; +} + +void BasicELFBuilder::initHeaderSegment() { Obj->ElfHdrSegment.Index = 0; } + +StringTableSection *BasicELFBuilder::addStrTab() { + auto &StrTab = Obj->addSection(); + StrTab.Name = ".strtab"; + + Obj->SectionNames = &StrTab; + return &StrTab; +} + +SymbolTableSection *BasicELFBuilder::addSymTab(StringTableSection *StrTab) { + auto &SymTab = Obj->addSection(); + + SymTab.Name = ".symtab"; + SymTab.Link = StrTab->Index; + + // The symbol table always needs a null symbol + SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0); + + Obj->SymbolTable = &SymTab; + return &SymTab; +} + +Error BasicELFBuilder::initSections() { + for (SectionBase &Sec : Obj->sections()) + if (Error Err = Sec.initialize(Obj->sections())) + return Err; + + return Error::success(); +} + +void BinaryELFBuilder::addData(SymbolTableSection *SymTab) { + auto Data = ArrayRef( + reinterpret_cast(MemBuf->getBufferStart()), + MemBuf->getBufferSize()); + auto &DataSection = Obj->addSection
(Data); + DataSection.Name = ".data"; + DataSection.Type = ELF::SHT_PROGBITS; + DataSection.Size = Data.size(); + DataSection.Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE; + + std::string SanitizedFilename = MemBuf->getBufferIdentifier().str(); + std::replace_if( + std::begin(SanitizedFilename), std::end(SanitizedFilename), + [](char C) { return !isAlnum(C); }, '_'); + Twine Prefix = Twine("_binary_") + SanitizedFilename; + + SymTab->addSymbol(Prefix + "_start", STB_GLOBAL, STT_NOTYPE, &DataSection, + /*Value=*/0, NewSymbolVisibility, 0, 0); + SymTab->addSymbol(Prefix + "_end", STB_GLOBAL, STT_NOTYPE, &DataSection, + /*Value=*/DataSection.Size, NewSymbolVisibility, 0, 0); + SymTab->addSymbol(Prefix + "_size", STB_GLOBAL, STT_NOTYPE, nullptr, + /*Value=*/DataSection.Size, NewSymbolVisibility, SHN_ABS, + 0); +} + +Expected> BinaryELFBuilder::build() { + initFileHeader(); + initHeaderSegment(); + + SymbolTableSection *SymTab = addSymTab(addStrTab()); + if (Error Err = initSections()) + return std::move(Err); + addData(SymTab); + + return std::move(Obj); +} + +// Adds sections from IHEX data file. Data should have been +// fully validated by this time. +void IHexELFBuilder::addDataSections() { + OwnedDataSection *Section = nullptr; + uint64_t SegmentAddr = 0, BaseAddr = 0; + uint32_t SecNo = 1; + + for (const IHexRecord &R : Records) { + uint64_t RecAddr; + switch (R.Type) { + case IHexRecord::Data: + // Ignore empty data records + if (R.HexData.empty()) + continue; + RecAddr = R.Addr + SegmentAddr + BaseAddr; + if (!Section || Section->Addr + Section->Size != RecAddr) { + // OriginalOffset field is only used to sort sections before layout, so + // instead of keeping track of real offsets in IHEX file, and as + // layoutSections() and layoutSectionsForOnlyKeepDebug() use + // llvm::stable_sort(), we can just set it to a constant (zero). + Section = &Obj->addSection( + ".sec" + std::to_string(SecNo), RecAddr, + ELF::SHF_ALLOC | ELF::SHF_WRITE, 0); + SecNo++; + } + Section->appendHexData(R.HexData); + break; + case IHexRecord::EndOfFile: + break; + case IHexRecord::SegmentAddr: + // 20-bit segment address. + SegmentAddr = checkedGetHex(R.HexData) << 4; + break; + case IHexRecord::StartAddr80x86: + case IHexRecord::StartAddr: + Obj->Entry = checkedGetHex(R.HexData); + assert(Obj->Entry <= 0xFFFFFU); + break; + case IHexRecord::ExtendedAddr: + // 16-31 bits of linear base address + BaseAddr = checkedGetHex(R.HexData) << 16; + break; + default: + llvm_unreachable("unknown record type"); + } + } +} + +Expected> IHexELFBuilder::build() { + initFileHeader(); + initHeaderSegment(); + StringTableSection *StrTab = addStrTab(); + addSymTab(StrTab); + if (Error Err = initSections()) + return std::move(Err); + addDataSections(); + + return std::move(Obj); +} + +template +ELFBuilder::ELFBuilder(const ELFObjectFile &ElfObj, Object &Obj, + Optional ExtractPartition) + : ElfFile(ElfObj.getELFFile()), Obj(Obj), + ExtractPartition(ExtractPartition) { + Obj.IsMips64EL = ElfFile.isMips64EL(); +} + +template void ELFBuilder::setParentSegment(Segment &Child) { + for (Segment &Parent : Obj.segments()) { + // Every segment will overlap with itself but we don't want a segment to + // be its own parent so we avoid that situation. + if (&Child != &Parent && segmentOverlapsSegment(Child, Parent)) { + // We want a canonical "most parental" segment but this requires + // inspecting the ParentSegment. + if (compareSegmentsByOffset(&Parent, &Child)) + if (Child.ParentSegment == nullptr || + compareSegmentsByOffset(&Parent, Child.ParentSegment)) { + Child.ParentSegment = &Parent; + } + } + } +} + +template Error ELFBuilder::findEhdrOffset() { + if (!ExtractPartition) + return Error::success(); + + for (const SectionBase &Sec : Obj.sections()) { + if (Sec.Type == SHT_LLVM_PART_EHDR && Sec.Name == *ExtractPartition) { + EhdrOffset = Sec.Offset; + return Error::success(); + } + } + return createStringError(errc::invalid_argument, + "could not find partition named '" + + *ExtractPartition + "'"); +} + +template +Error ELFBuilder::readProgramHeaders(const ELFFile &HeadersFile) { + uint32_t Index = 0; + + Expected::Elf_Phdr_Range> Headers = + HeadersFile.program_headers(); + if (!Headers) + return Headers.takeError(); + + for (const typename ELFFile::Elf_Phdr &Phdr : *Headers) { + if (Phdr.p_offset + Phdr.p_filesz > HeadersFile.getBufSize()) + return createStringError( + errc::invalid_argument, + "program header with offset 0x" + Twine::utohexstr(Phdr.p_offset) + + " and file size 0x" + Twine::utohexstr(Phdr.p_filesz) + + " goes past the end of the file"); + + ArrayRef Data{HeadersFile.base() + Phdr.p_offset, + (size_t)Phdr.p_filesz}; + Segment &Seg = Obj.addSegment(Data); + Seg.Type = Phdr.p_type; + Seg.Flags = Phdr.p_flags; + Seg.OriginalOffset = Phdr.p_offset + EhdrOffset; + Seg.Offset = Phdr.p_offset + EhdrOffset; + Seg.VAddr = Phdr.p_vaddr; + Seg.PAddr = Phdr.p_paddr; + Seg.FileSize = Phdr.p_filesz; + Seg.MemSize = Phdr.p_memsz; + Seg.Align = Phdr.p_align; + Seg.Index = Index++; + for (SectionBase &Sec : Obj.sections()) + if (sectionWithinSegment(Sec, Seg)) { + Seg.addSection(&Sec); + if (!Sec.ParentSegment || Sec.ParentSegment->Offset > Seg.Offset) + Sec.ParentSegment = &Seg; + } + } + + auto &ElfHdr = Obj.ElfHdrSegment; + ElfHdr.Index = Index++; + ElfHdr.OriginalOffset = ElfHdr.Offset = EhdrOffset; + + const typename ELFT::Ehdr &Ehdr = HeadersFile.getHeader(); + auto &PrHdr = Obj.ProgramHdrSegment; + PrHdr.Type = PT_PHDR; + PrHdr.Flags = 0; + // The spec requires us to have p_vaddr % p_align == p_offset % p_align. + // Whereas this works automatically for ElfHdr, here OriginalOffset is + // always non-zero and to ensure the equation we assign the same value to + // VAddr as well. + PrHdr.OriginalOffset = PrHdr.Offset = PrHdr.VAddr = EhdrOffset + Ehdr.e_phoff; + PrHdr.PAddr = 0; + PrHdr.FileSize = PrHdr.MemSize = Ehdr.e_phentsize * Ehdr.e_phnum; + // The spec requires us to naturally align all the fields. + PrHdr.Align = sizeof(Elf_Addr); + PrHdr.Index = Index++; + + // Now we do an O(n^2) loop through the segments in order to match up + // segments. + for (Segment &Child : Obj.segments()) + setParentSegment(Child); + setParentSegment(ElfHdr); + setParentSegment(PrHdr); + + return Error::success(); +} + +template +Error ELFBuilder::initGroupSection(GroupSection *GroupSec) { + if (GroupSec->Align % sizeof(ELF::Elf32_Word) != 0) + return createStringError(errc::invalid_argument, + "invalid alignment " + Twine(GroupSec->Align) + + " of group section '" + GroupSec->Name + "'"); + SectionTableRef SecTable = Obj.sections(); + if (GroupSec->Link != SHN_UNDEF) { + auto SymTab = SecTable.template getSectionOfType( + GroupSec->Link, + "link field value '" + Twine(GroupSec->Link) + "' in section '" + + GroupSec->Name + "' is invalid", + "link field value '" + Twine(GroupSec->Link) + "' in section '" + + GroupSec->Name + "' is not a symbol table"); + if (!SymTab) + return SymTab.takeError(); + + Expected Sym = (*SymTab)->getSymbolByIndex(GroupSec->Info); + if (!Sym) + return createStringError(errc::invalid_argument, + "info field value '" + Twine(GroupSec->Info) + + "' in section '" + GroupSec->Name + + "' is not a valid symbol index"); + GroupSec->setSymTab(*SymTab); + GroupSec->setSymbol(*Sym); + } + if (GroupSec->Contents.size() % sizeof(ELF::Elf32_Word) || + GroupSec->Contents.empty()) + return createStringError(errc::invalid_argument, + "the content of the section " + GroupSec->Name + + " is malformed"); + const ELF::Elf32_Word *Word = + reinterpret_cast(GroupSec->Contents.data()); + const ELF::Elf32_Word *End = + Word + GroupSec->Contents.size() / sizeof(ELF::Elf32_Word); + GroupSec->setFlagWord( + support::endian::read32(Word++)); + for (; Word != End; ++Word) { + uint32_t Index = support::endian::read32(Word); + Expected Sec = SecTable.getSection( + Index, "group member index " + Twine(Index) + " in section '" + + GroupSec->Name + "' is invalid"); + if (!Sec) + return Sec.takeError(); + + GroupSec->addMember(*Sec); + } + + return Error::success(); +} + +template +Error ELFBuilder::initSymbolTable(SymbolTableSection *SymTab) { + Expected Shdr = ElfFile.getSection(SymTab->Index); + if (!Shdr) + return Shdr.takeError(); + + Expected StrTabData = ElfFile.getStringTableForSymtab(**Shdr); + if (!StrTabData) + return StrTabData.takeError(); + + ArrayRef ShndxData; + + Expected::Elf_Sym_Range> Symbols = + ElfFile.symbols(*Shdr); + if (!Symbols) + return Symbols.takeError(); + + for (const typename ELFFile::Elf_Sym &Sym : *Symbols) { + SectionBase *DefSection = nullptr; + + Expected Name = Sym.getName(*StrTabData); + if (!Name) + return Name.takeError(); + + if (Sym.st_shndx == SHN_XINDEX) { + if (SymTab->getShndxTable() == nullptr) + return createStringError(errc::invalid_argument, + "symbol '" + *Name + + "' has index SHN_XINDEX but no " + "SHT_SYMTAB_SHNDX section exists"); + if (ShndxData.data() == nullptr) { + Expected ShndxSec = + ElfFile.getSection(SymTab->getShndxTable()->Index); + if (!ShndxSec) + return ShndxSec.takeError(); + + Expected> Data = + ElfFile.template getSectionContentsAsArray(**ShndxSec); + if (!Data) + return Data.takeError(); + + ShndxData = *Data; + if (ShndxData.size() != Symbols->size()) + return createStringError( + errc::invalid_argument, + "symbol section index table does not have the same number of " + "entries as the symbol table"); + } + Elf_Word Index = ShndxData[&Sym - Symbols->begin()]; + Expected Sec = Obj.sections().getSection( + Index, + "symbol '" + *Name + "' has invalid section index " + Twine(Index)); + if (!Sec) + return Sec.takeError(); + + DefSection = *Sec; + } else if (Sym.st_shndx >= SHN_LORESERVE) { + if (!isValidReservedSectionIndex(Sym.st_shndx, Obj.Machine)) { + return createStringError( + errc::invalid_argument, + "symbol '" + *Name + + "' has unsupported value greater than or equal " + "to SHN_LORESERVE: " + + Twine(Sym.st_shndx)); + } + } else if (Sym.st_shndx != SHN_UNDEF) { + Expected Sec = Obj.sections().getSection( + Sym.st_shndx, "symbol '" + *Name + + "' is defined has invalid section index " + + Twine(Sym.st_shndx)); + if (!Sec) + return Sec.takeError(); + + DefSection = *Sec; + } + + SymTab->addSymbol(*Name, Sym.getBinding(), Sym.getType(), DefSection, + Sym.getValue(), Sym.st_other, Sym.st_shndx, Sym.st_size); + } + + return Error::success(); +} + +template +static void getAddend(uint64_t &, const Elf_Rel_Impl &) {} + +template +static void getAddend(uint64_t &ToSet, const Elf_Rel_Impl &Rela) { + ToSet = Rela.r_addend; +} + +template +static Error initRelocations(RelocationSection *Relocs, T RelRange) { + for (const auto &Rel : RelRange) { + Relocation ToAdd; + ToAdd.Offset = Rel.r_offset; + getAddend(ToAdd.Addend, Rel); + ToAdd.Type = Rel.getType(Relocs->getObject().IsMips64EL); + + if (uint32_t Sym = Rel.getSymbol(Relocs->getObject().IsMips64EL)) { + if (!Relocs->getObject().SymbolTable) + return createStringError( + errc::invalid_argument, + "'" + Relocs->Name + "': relocation references symbol with index " + + Twine(Sym) + ", but there is no symbol table"); + Expected SymByIndex = + Relocs->getObject().SymbolTable->getSymbolByIndex(Sym); + if (!SymByIndex) + return SymByIndex.takeError(); + + ToAdd.RelocSymbol = *SymByIndex; + } + + Relocs->addRelocation(ToAdd); + } + + return Error::success(); +} + +Expected SectionTableRef::getSection(uint32_t Index, + Twine ErrMsg) { + if (Index == SHN_UNDEF || Index > Sections.size()) + return createStringError(errc::invalid_argument, ErrMsg); + return Sections[Index - 1].get(); +} + +template +Expected SectionTableRef::getSectionOfType(uint32_t Index, + Twine IndexErrMsg, + Twine TypeErrMsg) { + Expected BaseSec = getSection(Index, IndexErrMsg); + if (!BaseSec) + return BaseSec.takeError(); + + if (T *Sec = dyn_cast(*BaseSec)) + return Sec; + + return createStringError(errc::invalid_argument, TypeErrMsg); +} + +template +Expected ELFBuilder::makeSection(const Elf_Shdr &Shdr) { + switch (Shdr.sh_type) { + case SHT_REL: + case SHT_RELA: + if (Shdr.sh_flags & SHF_ALLOC) { + if (Expected> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection(*Data); + else + return Data.takeError(); + } + return Obj.addSection(Obj); + case SHT_STRTAB: + // If a string table is allocated we don't want to mess with it. That would + // mean altering the memory image. There are no special link types or + // anything so we can just use a Section. + if (Shdr.sh_flags & SHF_ALLOC) { + if (Expected> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection
(*Data); + else + return Data.takeError(); + } + return Obj.addSection(); + case SHT_HASH: + case SHT_GNU_HASH: + // Hash tables should refer to SHT_DYNSYM which we're not going to change. + // Because of this we don't need to mess with the hash tables either. + if (Expected> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection
(*Data); + else + return Data.takeError(); + case SHT_GROUP: + if (Expected> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection(*Data); + else + return Data.takeError(); + case SHT_DYNSYM: + if (Expected> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection(*Data); + else + return Data.takeError(); + case SHT_DYNAMIC: + if (Expected> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection(*Data); + else + return Data.takeError(); + case SHT_SYMTAB: { + auto &SymTab = Obj.addSection(); + Obj.SymbolTable = &SymTab; + return SymTab; + } + case SHT_SYMTAB_SHNDX: { + auto &ShndxSection = Obj.addSection(); + Obj.SectionIndexTable = &ShndxSection; + return ShndxSection; + } + case SHT_NOBITS: + return Obj.addSection
(ArrayRef()); + default: { + Expected> Data = ElfFile.getSectionContents(Shdr); + if (!Data) + return Data.takeError(); + + Expected Name = ElfFile.getSectionName(Shdr); + if (!Name) + return Name.takeError(); + + if (Name->startswith(".zdebug") || (Shdr.sh_flags & ELF::SHF_COMPRESSED)) { + uint64_t DecompressedSize, DecompressedAlign; + std::tie(DecompressedSize, DecompressedAlign) = + getDecompressedSizeAndAlignment(*Data); + return Obj.addSection( + CompressedSection(*Data, DecompressedSize, DecompressedAlign)); + } + + return Obj.addSection
(*Data); + } + } +} + +template Error ELFBuilder::readSectionHeaders() { + uint32_t Index = 0; + Expected::Elf_Shdr_Range> Sections = + ElfFile.sections(); + if (!Sections) + return Sections.takeError(); + + for (const typename ELFFile::Elf_Shdr &Shdr : *Sections) { + if (Index == 0) { + ++Index; + continue; + } + Expected Sec = makeSection(Shdr); + if (!Sec) + return Sec.takeError(); + + Expected SecName = ElfFile.getSectionName(Shdr); + if (!SecName) + return SecName.takeError(); + Sec->Name = SecName->str(); + Sec->Type = Sec->OriginalType = Shdr.sh_type; + Sec->Flags = Sec->OriginalFlags = Shdr.sh_flags; + Sec->Addr = Shdr.sh_addr; + Sec->Offset = Shdr.sh_offset; + Sec->OriginalOffset = Shdr.sh_offset; + Sec->Size = Shdr.sh_size; + Sec->Link = Shdr.sh_link; + Sec->Info = Shdr.sh_info; + Sec->Align = Shdr.sh_addralign; + Sec->EntrySize = Shdr.sh_entsize; + Sec->Index = Index++; + Sec->OriginalIndex = Sec->Index; + Sec->OriginalData = ArrayRef( + ElfFile.base() + Shdr.sh_offset, + (Shdr.sh_type == SHT_NOBITS) ? (size_t)0 : Shdr.sh_size); + } + + return Error::success(); +} + +template Error ELFBuilder::readSections(bool EnsureSymtab) { + uint32_t ShstrIndex = ElfFile.getHeader().e_shstrndx; + if (ShstrIndex == SHN_XINDEX) { + Expected Sec = ElfFile.getSection(0); + if (!Sec) + return Sec.takeError(); + + ShstrIndex = (*Sec)->sh_link; + } + + if (ShstrIndex == SHN_UNDEF) + Obj.HadShdrs = false; + else { + Expected Sec = + Obj.sections().template getSectionOfType( + ShstrIndex, + "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " + + " is invalid", + "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " + + " does not reference a string table"); + if (!Sec) + return Sec.takeError(); + + Obj.SectionNames = *Sec; + } + + // If a section index table exists we'll need to initialize it before we + // initialize the symbol table because the symbol table might need to + // reference it. + if (Obj.SectionIndexTable) + if (Error Err = Obj.SectionIndexTable->initialize(Obj.sections())) + return Err; + + // Now that all of the sections have been added we can fill out some extra + // details about symbol tables. We need the symbol table filled out before + // any relocations. + if (Obj.SymbolTable) { + if (Error Err = Obj.SymbolTable->initialize(Obj.sections())) + return Err; + if (Error Err = initSymbolTable(Obj.SymbolTable)) + return Err; + } else if (EnsureSymtab) { + if (Error Err = Obj.addNewSymbolTable()) + return Err; + } + + // Now that all sections and symbols have been added we can add + // relocations that reference symbols and set the link and info fields for + // relocation sections. + for (SectionBase &Sec : Obj.sections()) { + if (&Sec == Obj.SymbolTable) + continue; + if (Error Err = Sec.initialize(Obj.sections())) + return Err; + if (auto RelSec = dyn_cast(&Sec)) { + Expected::Elf_Shdr_Range> Sections = + ElfFile.sections(); + if (!Sections) + return Sections.takeError(); + + const typename ELFFile::Elf_Shdr *Shdr = + Sections->begin() + RelSec->Index; + if (RelSec->Type == SHT_REL) { + Expected::Elf_Rel_Range> Rels = + ElfFile.rels(*Shdr); + if (!Rels) + return Rels.takeError(); + + if (Error Err = initRelocations(RelSec, *Rels)) + return Err; + } else { + Expected::Elf_Rela_Range> Relas = + ElfFile.relas(*Shdr); + if (!Relas) + return Relas.takeError(); + + if (Error Err = initRelocations(RelSec, *Relas)) + return Err; + } + } else if (auto GroupSec = dyn_cast(&Sec)) { + if (Error Err = initGroupSection(GroupSec)) + return Err; + } + } + + return Error::success(); +} + +template Error ELFBuilder::build(bool EnsureSymtab) { + if (Error E = readSectionHeaders()) + return E; + if (Error E = findEhdrOffset()) + return E; + + // The ELFFile whose ELF headers and program headers are copied into the + // output file. Normally the same as ElfFile, but if we're extracting a + // loadable partition it will point to the partition's headers. + Expected> HeadersFile = ELFFile::create(toStringRef( + {ElfFile.base() + EhdrOffset, ElfFile.getBufSize() - EhdrOffset})); + if (!HeadersFile) + return HeadersFile.takeError(); + + const typename ELFFile::Elf_Ehdr &Ehdr = HeadersFile->getHeader(); + Obj.OSABI = Ehdr.e_ident[EI_OSABI]; + Obj.ABIVersion = Ehdr.e_ident[EI_ABIVERSION]; + Obj.Type = Ehdr.e_type; + Obj.Machine = Ehdr.e_machine; + Obj.Version = Ehdr.e_version; + Obj.Entry = Ehdr.e_entry; + Obj.Flags = Ehdr.e_flags; + + if (Error E = readSections(EnsureSymtab)) + return E; + return readProgramHeaders(*HeadersFile); +} + +Writer::~Writer() = default; + +Reader::~Reader() = default; + +Expected> +BinaryReader::create(bool /*EnsureSymtab*/) const { + return BinaryELFBuilder(MemBuf, NewSymbolVisibility).build(); +} + +Expected> IHexReader::parse() const { + SmallVector Lines; + std::vector Records; + bool HasSections = false; + + MemBuf->getBuffer().split(Lines, '\n'); + Records.reserve(Lines.size()); + for (size_t LineNo = 1; LineNo <= Lines.size(); ++LineNo) { + StringRef Line = Lines[LineNo - 1].trim(); + if (Line.empty()) + continue; + + Expected R = IHexRecord::parse(Line); + if (!R) + return parseError(LineNo, R.takeError()); + if (R->Type == IHexRecord::EndOfFile) + break; + HasSections |= (R->Type == IHexRecord::Data); + Records.push_back(*R); + } + if (!HasSections) + return parseError(-1U, "no sections"); + + return std::move(Records); +} + +Expected> +IHexReader::create(bool /*EnsureSymtab*/) const { + Expected> Records = parse(); + if (!Records) + return Records.takeError(); + + return IHexELFBuilder(*Records).build(); +} + +Expected> ELFReader::create(bool EnsureSymtab) const { + auto Obj = std::make_unique(); + if (auto *O = dyn_cast>(Bin)) { + ELFBuilder Builder(*O, *Obj, ExtractPartition); + if (Error Err = Builder.build(EnsureSymtab)) + return std::move(Err); + return std::move(Obj); + } else if (auto *O = dyn_cast>(Bin)) { + ELFBuilder Builder(*O, *Obj, ExtractPartition); + if (Error Err = Builder.build(EnsureSymtab)) + return std::move(Err); + return std::move(Obj); + } else if (auto *O = dyn_cast>(Bin)) { + ELFBuilder Builder(*O, *Obj, ExtractPartition); + if (Error Err = Builder.build(EnsureSymtab)) + return std::move(Err); + return std::move(Obj); + } else if (auto *O = dyn_cast>(Bin)) { + ELFBuilder Builder(*O, *Obj, ExtractPartition); + if (Error Err = Builder.build(EnsureSymtab)) + return std::move(Err); + return std::move(Obj); + } + return createStringError(errc::invalid_argument, "invalid file type"); +} + +template void ELFWriter::writeEhdr() { + Elf_Ehdr &Ehdr = *reinterpret_cast(Buf->getBufferStart()); + std::fill(Ehdr.e_ident, Ehdr.e_ident + 16, 0); + Ehdr.e_ident[EI_MAG0] = 0x7f; + Ehdr.e_ident[EI_MAG1] = 'E'; + Ehdr.e_ident[EI_MAG2] = 'L'; + Ehdr.e_ident[EI_MAG3] = 'F'; + Ehdr.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32; + Ehdr.e_ident[EI_DATA] = + ELFT::TargetEndianness == support::big ? ELFDATA2MSB : ELFDATA2LSB; + Ehdr.e_ident[EI_VERSION] = EV_CURRENT; + Ehdr.e_ident[EI_OSABI] = Obj.OSABI; + Ehdr.e_ident[EI_ABIVERSION] = Obj.ABIVersion; + + Ehdr.e_type = Obj.Type; + Ehdr.e_machine = Obj.Machine; + Ehdr.e_version = Obj.Version; + Ehdr.e_entry = Obj.Entry; + // We have to use the fully-qualified name llvm::size + // since some compilers complain on ambiguous resolution. + Ehdr.e_phnum = llvm::size(Obj.segments()); + Ehdr.e_phoff = (Ehdr.e_phnum != 0) ? Obj.ProgramHdrSegment.Offset : 0; + Ehdr.e_phentsize = (Ehdr.e_phnum != 0) ? sizeof(Elf_Phdr) : 0; + Ehdr.e_flags = Obj.Flags; + Ehdr.e_ehsize = sizeof(Elf_Ehdr); + if (WriteSectionHeaders && Obj.sections().size() != 0) { + Ehdr.e_shentsize = sizeof(Elf_Shdr); + Ehdr.e_shoff = Obj.SHOff; + // """ + // If the number of sections is greater than or equal to + // SHN_LORESERVE (0xff00), this member has the value zero and the actual + // number of section header table entries is contained in the sh_size field + // of the section header at index 0. + // """ + auto Shnum = Obj.sections().size() + 1; + if (Shnum >= SHN_LORESERVE) + Ehdr.e_shnum = 0; + else + Ehdr.e_shnum = Shnum; + // """ + // If the section name string table section index is greater than or equal + // to SHN_LORESERVE (0xff00), this member has the value SHN_XINDEX (0xffff) + // and the actual index of the section name string table section is + // contained in the sh_link field of the section header at index 0. + // """ + if (Obj.SectionNames->Index >= SHN_LORESERVE) + Ehdr.e_shstrndx = SHN_XINDEX; + else + Ehdr.e_shstrndx = Obj.SectionNames->Index; + } else { + Ehdr.e_shentsize = 0; + Ehdr.e_shoff = 0; + Ehdr.e_shnum = 0; + Ehdr.e_shstrndx = 0; + } +} + +template void ELFWriter::writePhdrs() { + for (auto &Seg : Obj.segments()) + writePhdr(Seg); +} + +template void ELFWriter::writeShdrs() { + // This reference serves to write the dummy section header at the begining + // of the file. It is not used for anything else + Elf_Shdr &Shdr = + *reinterpret_cast(Buf->getBufferStart() + Obj.SHOff); + Shdr.sh_name = 0; + Shdr.sh_type = SHT_NULL; + Shdr.sh_flags = 0; + Shdr.sh_addr = 0; + Shdr.sh_offset = 0; + // See writeEhdr for why we do this. + uint64_t Shnum = Obj.sections().size() + 1; + if (Shnum >= SHN_LORESERVE) + Shdr.sh_size = Shnum; + else + Shdr.sh_size = 0; + // See writeEhdr for why we do this. + if (Obj.SectionNames != nullptr && Obj.SectionNames->Index >= SHN_LORESERVE) + Shdr.sh_link = Obj.SectionNames->Index; + else + Shdr.sh_link = 0; + Shdr.sh_info = 0; + Shdr.sh_addralign = 0; + Shdr.sh_entsize = 0; + + for (SectionBase &Sec : Obj.sections()) + writeShdr(Sec); +} + +template Error ELFWriter::writeSectionData() { + for (SectionBase &Sec : Obj.sections()) + // Segments are responsible for writing their contents, so only write the + // section data if the section is not in a segment. Note that this renders + // sections in segments effectively immutable. + if (Sec.ParentSegment == nullptr) + if (Error Err = Sec.accept(*SecWriter)) + return Err; + + return Error::success(); +} + +template void ELFWriter::writeSegmentData() { + for (Segment &Seg : Obj.segments()) { + size_t Size = std::min(Seg.FileSize, Seg.getContents().size()); + std::memcpy(Buf->getBufferStart() + Seg.Offset, Seg.getContents().data(), + Size); + } + + for (auto it : Obj.getUpdatedSections()) { + SectionBase *Sec = it.first; + ArrayRef Data = it.second; + + auto *Parent = Sec->ParentSegment; + assert(Parent && "This section should've been part of a segment."); + uint64_t Offset = + Sec->OriginalOffset - Parent->OriginalOffset + Parent->Offset; + llvm::copy(Data, Buf->getBufferStart() + Offset); + } + + // Iterate over removed sections and overwrite their old data with zeroes. + for (auto &Sec : Obj.removedSections()) { + Segment *Parent = Sec.ParentSegment; + if (Parent == nullptr || Sec.Type == SHT_NOBITS || Sec.Size == 0) + continue; + uint64_t Offset = + Sec.OriginalOffset - Parent->OriginalOffset + Parent->Offset; + std::memset(Buf->getBufferStart() + Offset, 0, Sec.Size); + } +} + +template +ELFWriter::ELFWriter(Object &Obj, raw_ostream &Buf, bool WSH, + bool OnlyKeepDebug) + : Writer(Obj, Buf), WriteSectionHeaders(WSH && Obj.HadShdrs), + OnlyKeepDebug(OnlyKeepDebug) {} + +Error Object::updateSection(StringRef Name, ArrayRef Data) { + auto It = llvm::find_if(Sections, + [&](const SecPtr &Sec) { return Sec->Name == Name; }); + if (It == Sections.end()) + return createStringError(errc::invalid_argument, "section '%s' not found", + Name.str().c_str()); + + auto *OldSec = It->get(); + if (!OldSec->hasContents()) + return createStringError( + errc::invalid_argument, + "section '%s' cannot be updated because it does not have contents", + Name.str().c_str()); + + if (Data.size() > OldSec->Size && OldSec->ParentSegment) + return createStringError(errc::invalid_argument, + "cannot fit data of size %zu into section '%s' " + "with size %zu that is part of a segment", + Data.size(), Name.str().c_str(), OldSec->Size); + + if (!OldSec->ParentSegment) { + *It = std::make_unique(*OldSec, Data); + } else { + // The segment writer will be in charge of updating these contents. + OldSec->Size = Data.size(); + UpdatedSections[OldSec] = Data; + } + + return Error::success(); +} + +Error Object::removeSections( + bool AllowBrokenLinks, std::function ToRemove) { + + auto Iter = std::stable_partition( + std::begin(Sections), std::end(Sections), [=](const SecPtr &Sec) { + if (ToRemove(*Sec)) + return false; + if (auto RelSec = dyn_cast(Sec.get())) { + if (auto ToRelSec = RelSec->getSection()) + return !ToRemove(*ToRelSec); + } + return true; + }); + if (SymbolTable != nullptr && ToRemove(*SymbolTable)) + SymbolTable = nullptr; + if (SectionNames != nullptr && ToRemove(*SectionNames)) + SectionNames = nullptr; + if (SectionIndexTable != nullptr && ToRemove(*SectionIndexTable)) + SectionIndexTable = nullptr; + // Now make sure there are no remaining references to the sections that will + // be removed. Sometimes it is impossible to remove a reference so we emit + // an error here instead. + std::unordered_set RemoveSections; + RemoveSections.reserve(std::distance(Iter, std::end(Sections))); + for (auto &RemoveSec : make_range(Iter, std::end(Sections))) { + for (auto &Segment : Segments) + Segment->removeSection(RemoveSec.get()); + RemoveSec->onRemove(); + RemoveSections.insert(RemoveSec.get()); + } + + // For each section that remains alive, we want to remove the dead references. + // This either might update the content of the section (e.g. remove symbols + // from symbol table that belongs to removed section) or trigger an error if + // a live section critically depends on a section being removed somehow + // (e.g. the removed section is referenced by a relocation). + for (auto &KeepSec : make_range(std::begin(Sections), Iter)) { + if (Error E = KeepSec->removeSectionReferences( + AllowBrokenLinks, [&RemoveSections](const SectionBase *Sec) { + return RemoveSections.find(Sec) != RemoveSections.end(); + })) + return E; + } + + // Transfer removed sections into the Object RemovedSections container for use + // later. + std::move(Iter, Sections.end(), std::back_inserter(RemovedSections)); + // Now finally get rid of them all together. + Sections.erase(Iter, std::end(Sections)); + return Error::success(); +} + +Error Object::replaceSections( + const DenseMap &FromTo) { + auto SectionIndexLess = [](const SecPtr &Lhs, const SecPtr &Rhs) { + return Lhs->Index < Rhs->Index; + }; + assert(llvm::is_sorted(Sections, SectionIndexLess) && + "Sections are expected to be sorted by Index"); + // Set indices of new sections so that they can be later sorted into positions + // of removed ones. + for (auto &I : FromTo) + I.second->Index = I.first->Index; + + // Notify all sections about the replacement. + for (auto &Sec : Sections) + Sec->replaceSectionReferences(FromTo); + + if (Error E = removeSections( + /*AllowBrokenLinks=*/false, + [=](const SectionBase &Sec) { return FromTo.count(&Sec) > 0; })) + return E; + llvm::sort(Sections, SectionIndexLess); + return Error::success(); +} + +Error Object::removeSymbols(function_ref ToRemove) { + if (SymbolTable) + for (const SecPtr &Sec : Sections) + if (Error E = Sec->removeSymbols(ToRemove)) + return E; + return Error::success(); +} + +Error Object::addNewSymbolTable() { + assert(!SymbolTable && "Object must not has a SymbolTable."); + + // Reuse an existing SHT_STRTAB section if it exists. + StringTableSection *StrTab = nullptr; + for (SectionBase &Sec : sections()) { + if (Sec.Type == ELF::SHT_STRTAB && !(Sec.Flags & SHF_ALLOC)) { + StrTab = static_cast(&Sec); + + // Prefer a string table that is not the section header string table, if + // such a table exists. + if (SectionNames != &Sec) + break; + } + } + if (!StrTab) + StrTab = &addSection(); + + SymbolTableSection &SymTab = addSection(); + SymTab.Name = ".symtab"; + SymTab.Link = StrTab->Index; + if (Error Err = SymTab.initialize(sections())) + return Err; + SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0); + + SymbolTable = &SymTab; + + return Error::success(); +} + +// Orders segments such that if x = y->ParentSegment then y comes before x. +static void orderSegments(std::vector &Segments) { + llvm::stable_sort(Segments, compareSegmentsByOffset); +} + +// This function finds a consistent layout for a list of segments starting from +// an Offset. It assumes that Segments have been sorted by orderSegments and +// returns an Offset one past the end of the last segment. +static uint64_t layoutSegments(std::vector &Segments, + uint64_t Offset) { + assert(llvm::is_sorted(Segments, compareSegmentsByOffset)); + // The only way a segment should move is if a section was between two + // segments and that section was removed. If that section isn't in a segment + // then it's acceptable, but not ideal, to simply move it to after the + // segments. So we can simply layout segments one after the other accounting + // for alignment. + for (Segment *Seg : Segments) { + // We assume that segments have been ordered by OriginalOffset and Index + // such that a parent segment will always come before a child segment in + // OrderedSegments. This means that the Offset of the ParentSegment should + // already be set and we can set our offset relative to it. + if (Seg->ParentSegment != nullptr) { + Segment *Parent = Seg->ParentSegment; + Seg->Offset = + Parent->Offset + Seg->OriginalOffset - Parent->OriginalOffset; + } else { + Seg->Offset = + alignTo(Offset, std::max(Seg->Align, 1), Seg->VAddr); + } + Offset = std::max(Offset, Seg->Offset + Seg->FileSize); + } + return Offset; +} + +// This function finds a consistent layout for a list of sections. It assumes +// that the ->ParentSegment of each section has already been laid out. The +// supplied starting Offset is used for the starting offset of any section that +// does not have a ParentSegment. It returns either the offset given if all +// sections had a ParentSegment or an offset one past the last section if there +// was a section that didn't have a ParentSegment. +template +static uint64_t layoutSections(Range Sections, uint64_t Offset) { + // Now the offset of every segment has been set we can assign the offsets + // of each section. For sections that are covered by a segment we should use + // the segment's original offset and the section's original offset to compute + // the offset from the start of the segment. Using the offset from the start + // of the segment we can assign a new offset to the section. For sections not + // covered by segments we can just bump Offset to the next valid location. + // While it is not necessary, layout the sections in the order based on their + // original offsets to resemble the input file as close as possible. + std::vector OutOfSegmentSections; + uint32_t Index = 1; + for (auto &Sec : Sections) { + Sec.Index = Index++; + if (Sec.ParentSegment != nullptr) { + auto Segment = *Sec.ParentSegment; + Sec.Offset = + Segment.Offset + (Sec.OriginalOffset - Segment.OriginalOffset); + } else + OutOfSegmentSections.push_back(&Sec); + } + + llvm::stable_sort(OutOfSegmentSections, + [](const SectionBase *Lhs, const SectionBase *Rhs) { + return Lhs->OriginalOffset < Rhs->OriginalOffset; + }); + for (auto *Sec : OutOfSegmentSections) { + Offset = alignTo(Offset, Sec->Align == 0 ? 1 : Sec->Align); + Sec->Offset = Offset; + if (Sec->Type != SHT_NOBITS) + Offset += Sec->Size; + } + return Offset; +} + +// Rewrite sh_offset after some sections are changed to SHT_NOBITS and thus +// occupy no space in the file. +static uint64_t layoutSectionsForOnlyKeepDebug(Object &Obj, uint64_t Off) { + // The layout algorithm requires the sections to be handled in the order of + // their offsets in the input file, at least inside segments. + std::vector Sections; + Sections.reserve(Obj.sections().size()); + uint32_t Index = 1; + for (auto &Sec : Obj.sections()) { + Sec.Index = Index++; + Sections.push_back(&Sec); + } + llvm::stable_sort(Sections, + [](const SectionBase *Lhs, const SectionBase *Rhs) { + return Lhs->OriginalOffset < Rhs->OriginalOffset; + }); + + for (auto *Sec : Sections) { + auto *FirstSec = Sec->ParentSegment && Sec->ParentSegment->Type == PT_LOAD + ? Sec->ParentSegment->firstSection() + : nullptr; + + // The first section in a PT_LOAD has to have congruent offset and address + // modulo the alignment, which usually equals the maximum page size. + if (FirstSec && FirstSec == Sec) + Off = alignTo(Off, Sec->ParentSegment->Align, Sec->Addr); + + // sh_offset is not significant for SHT_NOBITS sections, but the congruence + // rule must be followed if it is the first section in a PT_LOAD. Do not + // advance Off. + if (Sec->Type == SHT_NOBITS) { + Sec->Offset = Off; + continue; + } + + if (!FirstSec) { + // FirstSec being nullptr generally means that Sec does not have the + // SHF_ALLOC flag. + Off = Sec->Align ? alignTo(Off, Sec->Align) : Off; + } else if (FirstSec != Sec) { + // The offset is relative to the first section in the PT_LOAD segment. Use + // sh_offset for non-SHF_ALLOC sections. + Off = Sec->OriginalOffset - FirstSec->OriginalOffset + FirstSec->Offset; + } + Sec->Offset = Off; + Off += Sec->Size; + } + return Off; +} + +// Rewrite p_offset and p_filesz of non-PT_PHDR segments after sh_offset values +// have been updated. +static uint64_t layoutSegmentsForOnlyKeepDebug(std::vector &Segments, + uint64_t HdrEnd) { + uint64_t MaxOffset = 0; + for (Segment *Seg : Segments) { + if (Seg->Type == PT_PHDR) + continue; + + // The segment offset is generally the offset of the first section. + // + // For a segment containing no section (see sectionWithinSegment), if it has + // a parent segment, copy the parent segment's offset field. This works for + // empty PT_TLS. If no parent segment, use 0: the segment is not useful for + // debugging anyway. + const SectionBase *FirstSec = Seg->firstSection(); + uint64_t Offset = + FirstSec ? FirstSec->Offset + : (Seg->ParentSegment ? Seg->ParentSegment->Offset : 0); + uint64_t FileSize = 0; + for (const SectionBase *Sec : Seg->Sections) { + uint64_t Size = Sec->Type == SHT_NOBITS ? 0 : Sec->Size; + if (Sec->Offset + Size > Offset) + FileSize = std::max(FileSize, Sec->Offset + Size - Offset); + } + + // If the segment includes EHDR and program headers, don't make it smaller + // than the headers. + if (Seg->Offset < HdrEnd && HdrEnd <= Seg->Offset + Seg->FileSize) { + FileSize += Offset - Seg->Offset; + Offset = Seg->Offset; + FileSize = std::max(FileSize, HdrEnd - Offset); + } + + Seg->Offset = Offset; + Seg->FileSize = FileSize; + MaxOffset = std::max(MaxOffset, Offset + FileSize); + } + return MaxOffset; +} + +template void ELFWriter::initEhdrSegment() { + Segment &ElfHdr = Obj.ElfHdrSegment; + ElfHdr.Type = PT_PHDR; + ElfHdr.Flags = 0; + ElfHdr.VAddr = 0; + ElfHdr.PAddr = 0; + ElfHdr.FileSize = ElfHdr.MemSize = sizeof(Elf_Ehdr); + ElfHdr.Align = 0; +} + +template void ELFWriter::assignOffsets() { + // We need a temporary list of segments that has a special order to it + // so that we know that anytime ->ParentSegment is set that segment has + // already had its offset properly set. + std::vector OrderedSegments; + for (Segment &Segment : Obj.segments()) + OrderedSegments.push_back(&Segment); + OrderedSegments.push_back(&Obj.ElfHdrSegment); + OrderedSegments.push_back(&Obj.ProgramHdrSegment); + orderSegments(OrderedSegments); + + uint64_t Offset; + if (OnlyKeepDebug) { + // For --only-keep-debug, the sections that did not preserve contents were + // changed to SHT_NOBITS. We now rewrite sh_offset fields of sections, and + // then rewrite p_offset/p_filesz of program headers. + uint64_t HdrEnd = + sizeof(Elf_Ehdr) + llvm::size(Obj.segments()) * sizeof(Elf_Phdr); + Offset = layoutSectionsForOnlyKeepDebug(Obj, HdrEnd); + Offset = std::max(Offset, + layoutSegmentsForOnlyKeepDebug(OrderedSegments, HdrEnd)); + } else { + // Offset is used as the start offset of the first segment to be laid out. + // Since the ELF Header (ElfHdrSegment) must be at the start of the file, + // we start at offset 0. + Offset = layoutSegments(OrderedSegments, 0); + Offset = layoutSections(Obj.sections(), Offset); + } + // If we need to write the section header table out then we need to align the + // Offset so that SHOffset is valid. + if (WriteSectionHeaders) + Offset = alignTo(Offset, sizeof(Elf_Addr)); + Obj.SHOff = Offset; +} + +template size_t ELFWriter::totalSize() const { + // We already have the section header offset so we can calculate the total + // size by just adding up the size of each section header. + if (!WriteSectionHeaders) + return Obj.SHOff; + size_t ShdrCount = Obj.sections().size() + 1; // Includes null shdr. + return Obj.SHOff + ShdrCount * sizeof(Elf_Shdr); +} + +template Error ELFWriter::write() { + // Segment data must be written first, so that the ELF header and program + // header tables can overwrite it, if covered by a segment. + writeSegmentData(); + writeEhdr(); + writePhdrs(); + if (Error E = writeSectionData()) + return E; + if (WriteSectionHeaders) + writeShdrs(); + + // TODO: Implement direct writing to the output stream (without intermediate + // memory buffer Buf). + Out.write(Buf->getBufferStart(), Buf->getBufferSize()); + return Error::success(); +} + +static Error removeUnneededSections(Object &Obj) { + // We can remove an empty symbol table from non-relocatable objects. + // Relocatable objects typically have relocation sections whose + // sh_link field points to .symtab, so we can't remove .symtab + // even if it is empty. + if (Obj.isRelocatable() || Obj.SymbolTable == nullptr || + !Obj.SymbolTable->empty()) + return Error::success(); + + // .strtab can be used for section names. In such a case we shouldn't + // remove it. + auto *StrTab = Obj.SymbolTable->getStrTab() == Obj.SectionNames + ? nullptr + : Obj.SymbolTable->getStrTab(); + return Obj.removeSections(false, [&](const SectionBase &Sec) { + return &Sec == Obj.SymbolTable || &Sec == StrTab; + }); +} + +template Error ELFWriter::finalize() { + // It could happen that SectionNames has been removed and yet the user wants + // a section header table output. We need to throw an error if a user tries + // to do that. + if (Obj.SectionNames == nullptr && WriteSectionHeaders) + return createStringError(llvm::errc::invalid_argument, + "cannot write section header table because " + "section header string table was removed"); + + if (Error E = removeUnneededSections(Obj)) + return E; + + // We need to assign indexes before we perform layout because we need to know + // if we need large indexes or not. We can assign indexes first and check as + // we go to see if we will actully need large indexes. + bool NeedsLargeIndexes = false; + if (Obj.sections().size() >= SHN_LORESERVE) { + SectionTableRef Sections = Obj.sections(); + // Sections doesn't include the null section header, so account for this + // when skipping the first N sections. + NeedsLargeIndexes = + any_of(drop_begin(Sections, SHN_LORESERVE - 1), + [](const SectionBase &Sec) { return Sec.HasSymbol; }); + // TODO: handle case where only one section needs the large index table but + // only needs it because the large index table hasn't been removed yet. + } + + if (NeedsLargeIndexes) { + // This means we definitely need to have a section index table but if we + // already have one then we should use it instead of making a new one. + if (Obj.SymbolTable != nullptr && Obj.SectionIndexTable == nullptr) { + // Addition of a section to the end does not invalidate the indexes of + // other sections and assigns the correct index to the new section. + auto &Shndx = Obj.addSection(); + Obj.SymbolTable->setShndxTable(&Shndx); + Shndx.setSymTab(Obj.SymbolTable); + } + } else { + // Since we don't need SectionIndexTable we should remove it and all + // references to it. + if (Obj.SectionIndexTable != nullptr) { + // We do not support sections referring to the section index table. + if (Error E = Obj.removeSections(false /*AllowBrokenLinks*/, + [this](const SectionBase &Sec) { + return &Sec == Obj.SectionIndexTable; + })) + return E; + } + } + + // Make sure we add the names of all the sections. Importantly this must be + // done after we decide to add or remove SectionIndexes. + if (Obj.SectionNames != nullptr) + for (const SectionBase &Sec : Obj.sections()) + Obj.SectionNames->addString(Sec.Name); + + initEhdrSegment(); + + // Before we can prepare for layout the indexes need to be finalized. + // Also, the output arch may not be the same as the input arch, so fix up + // size-related fields before doing layout calculations. + uint64_t Index = 0; + auto SecSizer = std::make_unique>(); + for (SectionBase &Sec : Obj.sections()) { + Sec.Index = Index++; + if (Error Err = Sec.accept(*SecSizer)) + return Err; + } + + // The symbol table does not update all other sections on update. For + // instance, symbol names are not added as new symbols are added. This means + // that some sections, like .strtab, don't yet have their final size. + if (Obj.SymbolTable != nullptr) + Obj.SymbolTable->prepareForLayout(); + + // Now that all strings are added we want to finalize string table builders, + // because that affects section sizes which in turn affects section offsets. + for (SectionBase &Sec : Obj.sections()) + if (auto StrTab = dyn_cast(&Sec)) + StrTab->prepareForLayout(); + + assignOffsets(); + + // layoutSections could have modified section indexes, so we need + // to fill the index table after assignOffsets. + if (Obj.SymbolTable != nullptr) + Obj.SymbolTable->fillShndxTable(); + + // Finally now that all offsets and indexes have been set we can finalize any + // remaining issues. + uint64_t Offset = Obj.SHOff + sizeof(Elf_Shdr); + for (SectionBase &Sec : Obj.sections()) { + Sec.HeaderOffset = Offset; + Offset += sizeof(Elf_Shdr); + if (WriteSectionHeaders) + Sec.NameIndex = Obj.SectionNames->findIndex(Sec.Name); + Sec.finalize(); + } + + size_t TotalSize = totalSize(); + Buf = WritableMemoryBuffer::getNewMemBuffer(TotalSize); + if (!Buf) + return createStringError(errc::not_enough_memory, + "failed to allocate memory buffer of " + + Twine::utohexstr(TotalSize) + " bytes"); + + SecWriter = std::make_unique>(*Buf); + return Error::success(); +} + +Error BinaryWriter::write() { + for (const SectionBase &Sec : Obj.allocSections()) + if (Error Err = Sec.accept(*SecWriter)) + return Err; + + // TODO: Implement direct writing to the output stream (without intermediate + // memory buffer Buf). + Out.write(Buf->getBufferStart(), Buf->getBufferSize()); + return Error::success(); +} + +Error BinaryWriter::finalize() { + // Compute the section LMA based on its sh_offset and the containing segment's + // p_offset and p_paddr. Also compute the minimum LMA of all non-empty + // sections as MinAddr. In the output, the contents between address 0 and + // MinAddr will be skipped. + uint64_t MinAddr = UINT64_MAX; + for (SectionBase &Sec : Obj.allocSections()) { + if (Sec.ParentSegment != nullptr) + Sec.Addr = + Sec.Offset - Sec.ParentSegment->Offset + Sec.ParentSegment->PAddr; + if (Sec.Type != SHT_NOBITS && Sec.Size > 0) + MinAddr = std::min(MinAddr, Sec.Addr); + } + + // Now that every section has been laid out we just need to compute the total + // file size. This might not be the same as the offset returned by + // layoutSections, because we want to truncate the last segment to the end of + // its last non-empty section, to match GNU objcopy's behaviour. + TotalSize = 0; + for (SectionBase &Sec : Obj.allocSections()) + if (Sec.Type != SHT_NOBITS && Sec.Size > 0) { + Sec.Offset = Sec.Addr - MinAddr; + TotalSize = std::max(TotalSize, Sec.Offset + Sec.Size); + } + + Buf = WritableMemoryBuffer::getNewMemBuffer(TotalSize); + if (!Buf) + return createStringError(errc::not_enough_memory, + "failed to allocate memory buffer of " + + Twine::utohexstr(TotalSize) + " bytes"); + SecWriter = std::make_unique(*Buf); + return Error::success(); +} + +bool IHexWriter::SectionCompare::operator()(const SectionBase *Lhs, + const SectionBase *Rhs) const { + return (sectionPhysicalAddr(Lhs) & 0xFFFFFFFFU) < + (sectionPhysicalAddr(Rhs) & 0xFFFFFFFFU); +} + +uint64_t IHexWriter::writeEntryPointRecord(uint8_t *Buf) { + IHexLineData HexData; + uint8_t Data[4] = {}; + // We don't write entry point record if entry is zero. + if (Obj.Entry == 0) + return 0; + + if (Obj.Entry <= 0xFFFFFU) { + Data[0] = ((Obj.Entry & 0xF0000U) >> 12) & 0xFF; + support::endian::write(&Data[2], static_cast(Obj.Entry), + support::big); + HexData = IHexRecord::getLine(IHexRecord::StartAddr80x86, 0, Data); + } else { + support::endian::write(Data, static_cast(Obj.Entry), + support::big); + HexData = IHexRecord::getLine(IHexRecord::StartAddr, 0, Data); + } + memcpy(Buf, HexData.data(), HexData.size()); + return HexData.size(); +} + +uint64_t IHexWriter::writeEndOfFileRecord(uint8_t *Buf) { + IHexLineData HexData = IHexRecord::getLine(IHexRecord::EndOfFile, 0, {}); + memcpy(Buf, HexData.data(), HexData.size()); + return HexData.size(); +} + +Error IHexWriter::write() { + IHexSectionWriter Writer(*Buf); + // Write sections. + for (const SectionBase *Sec : Sections) + if (Error Err = Sec->accept(Writer)) + return Err; + + uint64_t Offset = Writer.getBufferOffset(); + // Write entry point address. + Offset += writeEntryPointRecord( + reinterpret_cast(Buf->getBufferStart()) + Offset); + // Write EOF. + Offset += writeEndOfFileRecord( + reinterpret_cast(Buf->getBufferStart()) + Offset); + assert(Offset == TotalSize); + + // TODO: Implement direct writing to the output stream (without intermediate + // memory buffer Buf). + Out.write(Buf->getBufferStart(), Buf->getBufferSize()); + return Error::success(); +} + +Error IHexWriter::checkSection(const SectionBase &Sec) { + uint64_t Addr = sectionPhysicalAddr(&Sec); + if (addressOverflows32bit(Addr) || addressOverflows32bit(Addr + Sec.Size - 1)) + return createStringError( + errc::invalid_argument, + "Section '%s' address range [0x%llx, 0x%llx] is not 32 bit", + Sec.Name.c_str(), Addr, Addr + Sec.Size - 1); + return Error::success(); +} + +Error IHexWriter::finalize() { + // We can't write 64-bit addresses. + if (addressOverflows32bit(Obj.Entry)) + return createStringError(errc::invalid_argument, + "Entry point address 0x%llx overflows 32 bits", + Obj.Entry); + + for (const SectionBase &Sec : Obj.sections()) + if ((Sec.Flags & ELF::SHF_ALLOC) && Sec.Type != ELF::SHT_NOBITS && + Sec.Size > 0) { + if (Error E = checkSection(Sec)) + return E; + Sections.insert(&Sec); + } + + std::unique_ptr EmptyBuffer = + WritableMemoryBuffer::getNewMemBuffer(0); + if (!EmptyBuffer) + return createStringError(errc::not_enough_memory, + "failed to allocate memory buffer of 0 bytes"); + + IHexSectionWriterBase LengthCalc(*EmptyBuffer); + for (const SectionBase *Sec : Sections) + if (Error Err = Sec->accept(LengthCalc)) + return Err; + + // We need space to write section records + StartAddress record + // (if start adress is not zero) + EndOfFile record. + TotalSize = LengthCalc.getBufferOffset() + + (Obj.Entry ? IHexRecord::getLineLength(4) : 0) + + IHexRecord::getLineLength(0); + + Buf = WritableMemoryBuffer::getNewMemBuffer(TotalSize); + if (!Buf) + return createStringError(errc::not_enough_memory, + "failed to allocate memory buffer of " + + Twine::utohexstr(TotalSize) + " bytes"); + + return Error::success(); +} + +namespace llvm { +namespace objcopy { +namespace elf { + +template class ELFBuilder; +template class ELFBuilder; +template class ELFBuilder; +template class ELFBuilder; + +template class ELFWriter; +template class ELFWriter; +template class ELFWriter; +template class ELFWriter; + +} // end namespace elf +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h new file mode 100644 index 000000000000..f33bbb029c9b --- /dev/null +++ b/llvm/lib/ObjCopy/ELF/ELFObject.h @@ -0,0 +1,1108 @@ +//===- ELFObject.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H +#define LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/MemoryBuffer.h" +#include +#include +#include +#include +#include +#include + +namespace llvm { +enum class DebugCompressionType; +namespace objcopy { +namespace elf { + +class SectionBase; +class Section; +class OwnedDataSection; +class StringTableSection; +class SymbolTableSection; +class RelocationSection; +class DynamicRelocationSection; +class GnuDebugLinkSection; +class GroupSection; +class SectionIndexSection; +class CompressedSection; +class DecompressedSection; +class Segment; +class Object; +struct Symbol; + +class SectionTableRef { + ArrayRef> Sections; + +public: + using iterator = pointee_iterator *>; + + explicit SectionTableRef(ArrayRef> Secs) + : Sections(Secs) {} + SectionTableRef(const SectionTableRef &) = default; + + iterator begin() const { return iterator(Sections.data()); } + iterator end() const { return iterator(Sections.data() + Sections.size()); } + size_t size() const { return Sections.size(); } + + Expected getSection(uint32_t Index, Twine ErrMsg); + + template + Expected getSectionOfType(uint32_t Index, Twine IndexErrMsg, + Twine TypeErrMsg); +}; + +enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE }; + +class SectionVisitor { +public: + virtual ~SectionVisitor() = default; + + virtual Error visit(const Section &Sec) = 0; + virtual Error visit(const OwnedDataSection &Sec) = 0; + virtual Error visit(const StringTableSection &Sec) = 0; + virtual Error visit(const SymbolTableSection &Sec) = 0; + virtual Error visit(const RelocationSection &Sec) = 0; + virtual Error visit(const DynamicRelocationSection &Sec) = 0; + virtual Error visit(const GnuDebugLinkSection &Sec) = 0; + virtual Error visit(const GroupSection &Sec) = 0; + virtual Error visit(const SectionIndexSection &Sec) = 0; + virtual Error visit(const CompressedSection &Sec) = 0; + virtual Error visit(const DecompressedSection &Sec) = 0; +}; + +class MutableSectionVisitor { +public: + virtual ~MutableSectionVisitor() = default; + + virtual Error visit(Section &Sec) = 0; + virtual Error visit(OwnedDataSection &Sec) = 0; + virtual Error visit(StringTableSection &Sec) = 0; + virtual Error visit(SymbolTableSection &Sec) = 0; + virtual Error visit(RelocationSection &Sec) = 0; + virtual Error visit(DynamicRelocationSection &Sec) = 0; + virtual Error visit(GnuDebugLinkSection &Sec) = 0; + virtual Error visit(GroupSection &Sec) = 0; + virtual Error visit(SectionIndexSection &Sec) = 0; + virtual Error visit(CompressedSection &Sec) = 0; + virtual Error visit(DecompressedSection &Sec) = 0; +}; + +class SectionWriter : public SectionVisitor { +protected: + WritableMemoryBuffer &Out; + +public: + virtual ~SectionWriter() = default; + + Error visit(const Section &Sec) override; + Error visit(const OwnedDataSection &Sec) override; + Error visit(const StringTableSection &Sec) override; + Error visit(const DynamicRelocationSection &Sec) override; + virtual Error visit(const SymbolTableSection &Sec) override = 0; + virtual Error visit(const RelocationSection &Sec) override = 0; + virtual Error visit(const GnuDebugLinkSection &Sec) override = 0; + virtual Error visit(const GroupSection &Sec) override = 0; + virtual Error visit(const SectionIndexSection &Sec) override = 0; + virtual Error visit(const CompressedSection &Sec) override = 0; + virtual Error visit(const DecompressedSection &Sec) override = 0; + + explicit SectionWriter(WritableMemoryBuffer &Buf) : Out(Buf) {} +}; + +template class ELFSectionWriter : public SectionWriter { +private: + using Elf_Word = typename ELFT::Word; + using Elf_Rel = typename ELFT::Rel; + using Elf_Rela = typename ELFT::Rela; + using Elf_Sym = typename ELFT::Sym; + +public: + virtual ~ELFSectionWriter() {} + Error visit(const SymbolTableSection &Sec) override; + Error visit(const RelocationSection &Sec) override; + Error visit(const GnuDebugLinkSection &Sec) override; + Error visit(const GroupSection &Sec) override; + Error visit(const SectionIndexSection &Sec) override; + Error visit(const CompressedSection &Sec) override; + Error visit(const DecompressedSection &Sec) override; + + explicit ELFSectionWriter(WritableMemoryBuffer &Buf) : SectionWriter(Buf) {} +}; + +template class ELFSectionSizer : public MutableSectionVisitor { +private: + using Elf_Rel = typename ELFT::Rel; + using Elf_Rela = typename ELFT::Rela; + using Elf_Sym = typename ELFT::Sym; + using Elf_Word = typename ELFT::Word; + using Elf_Xword = typename ELFT::Xword; + +public: + Error visit(Section &Sec) override; + Error visit(OwnedDataSection &Sec) override; + Error visit(StringTableSection &Sec) override; + Error visit(DynamicRelocationSection &Sec) override; + Error visit(SymbolTableSection &Sec) override; + Error visit(RelocationSection &Sec) override; + Error visit(GnuDebugLinkSection &Sec) override; + Error visit(GroupSection &Sec) override; + Error visit(SectionIndexSection &Sec) override; + Error visit(CompressedSection &Sec) override; + Error visit(DecompressedSection &Sec) override; +}; + +#define MAKE_SEC_WRITER_FRIEND \ + friend class SectionWriter; \ + friend class IHexSectionWriterBase; \ + friend class IHexSectionWriter; \ + template friend class ELFSectionWriter; \ + template friend class ELFSectionSizer; + +class BinarySectionWriter : public SectionWriter { +public: + virtual ~BinarySectionWriter() {} + + Error visit(const SymbolTableSection &Sec) override; + Error visit(const RelocationSection &Sec) override; + Error visit(const GnuDebugLinkSection &Sec) override; + Error visit(const GroupSection &Sec) override; + Error visit(const SectionIndexSection &Sec) override; + Error visit(const CompressedSection &Sec) override; + Error visit(const DecompressedSection &Sec) override; + + explicit BinarySectionWriter(WritableMemoryBuffer &Buf) + : SectionWriter(Buf) {} +}; + +using IHexLineData = SmallVector; + +struct IHexRecord { + // Memory address of the record. + uint16_t Addr; + // Record type (see below). + uint16_t Type; + // Record data in hexadecimal form. + StringRef HexData; + + // Helper method to get file length of the record + // including newline character + static size_t getLength(size_t DataSize) { + // :LLAAAATT[DD...DD]CC' + return DataSize * 2 + 11; + } + + // Gets length of line in a file (getLength + CRLF). + static size_t getLineLength(size_t DataSize) { + return getLength(DataSize) + 2; + } + + // Given type, address and data returns line which can + // be written to output file. + static IHexLineData getLine(uint8_t Type, uint16_t Addr, + ArrayRef Data); + + // Parses the line and returns record if possible. + // Line should be trimmed from whitespace characters. + static Expected parse(StringRef Line); + + // Calculates checksum of stringified record representation + // S must NOT contain leading ':' and trailing whitespace + // characters + static uint8_t getChecksum(StringRef S); + + enum Type { + // Contains data and a 16-bit starting address for the data. + // The byte count specifies number of data bytes in the record. + Data = 0, + // Must occur exactly once per file in the last line of the file. + // The data field is empty (thus byte count is 00) and the address + // field is typically 0000. + EndOfFile = 1, + // The data field contains a 16-bit segment base address (thus byte + // count is always 02) compatible with 80x86 real mode addressing. + // The address field (typically 0000) is ignored. The segment address + // from the most recent 02 record is multiplied by 16 and added to each + // subsequent data record address to form the physical starting address + // for the data. This allows addressing up to one megabyte of address + // space. + SegmentAddr = 2, + // or 80x86 processors, specifies the initial content of the CS:IP + // registers. The address field is 0000, the byte count is always 04, + // the first two data bytes are the CS value, the latter two are the + // IP value. + StartAddr80x86 = 3, + // Allows for 32 bit addressing (up to 4GiB). The record's address field + // is ignored (typically 0000) and its byte count is always 02. The two + // data bytes (big endian) specify the upper 16 bits of the 32 bit + // absolute address for all subsequent type 00 records + ExtendedAddr = 4, + // The address field is 0000 (not used) and the byte count is always 04. + // The four data bytes represent a 32-bit address value. In the case of + // 80386 and higher CPUs, this address is loaded into the EIP register. + StartAddr = 5, + // We have no other valid types + InvalidType = 6 + }; +}; + +// Base class for IHexSectionWriter. This class implements writing algorithm, +// but doesn't actually write records. It is used for output buffer size +// calculation in IHexWriter::finalize. +class IHexSectionWriterBase : public BinarySectionWriter { + // 20-bit segment address + uint32_t SegmentAddr = 0; + // Extended linear address + uint32_t BaseAddr = 0; + + // Write segment address corresponding to 'Addr' + uint64_t writeSegmentAddr(uint64_t Addr); + // Write extended linear (base) address corresponding to 'Addr' + uint64_t writeBaseAddr(uint64_t Addr); + +protected: + // Offset in the output buffer + uint64_t Offset = 0; + + void writeSection(const SectionBase *Sec, ArrayRef Data); + virtual void writeData(uint8_t Type, uint16_t Addr, ArrayRef Data); + +public: + explicit IHexSectionWriterBase(WritableMemoryBuffer &Buf) + : BinarySectionWriter(Buf) {} + + uint64_t getBufferOffset() const { return Offset; } + Error visit(const Section &Sec) final; + Error visit(const OwnedDataSection &Sec) final; + Error visit(const StringTableSection &Sec) override; + Error visit(const DynamicRelocationSection &Sec) final; + using BinarySectionWriter::visit; +}; + +// Real IHEX section writer +class IHexSectionWriter : public IHexSectionWriterBase { +public: + IHexSectionWriter(WritableMemoryBuffer &Buf) : IHexSectionWriterBase(Buf) {} + + void writeData(uint8_t Type, uint16_t Addr, ArrayRef Data) override; + Error visit(const StringTableSection &Sec) override; +}; + +class Writer { +protected: + Object &Obj; + std::unique_ptr Buf; + raw_ostream &Out; + +public: + virtual ~Writer(); + virtual Error finalize() = 0; + virtual Error write() = 0; + + Writer(Object &O, raw_ostream &Out) : Obj(O), Out(Out) {} +}; + +template class ELFWriter : public Writer { +private: + using Elf_Addr = typename ELFT::Addr; + using Elf_Shdr = typename ELFT::Shdr; + using Elf_Phdr = typename ELFT::Phdr; + using Elf_Ehdr = typename ELFT::Ehdr; + + void initEhdrSegment(); + + void writeEhdr(); + void writePhdr(const Segment &Seg); + void writeShdr(const SectionBase &Sec); + + void writePhdrs(); + void writeShdrs(); + Error writeSectionData(); + void writeSegmentData(); + + void assignOffsets(); + + std::unique_ptr> SecWriter; + + size_t totalSize() const; + +public: + virtual ~ELFWriter() {} + bool WriteSectionHeaders; + + // For --only-keep-debug, select an alternative section/segment layout + // algorithm. + bool OnlyKeepDebug; + + Error finalize() override; + Error write() override; + ELFWriter(Object &Obj, raw_ostream &Out, bool WSH, bool OnlyKeepDebug); +}; + +class BinaryWriter : public Writer { +private: + std::unique_ptr SecWriter; + + uint64_t TotalSize = 0; + +public: + ~BinaryWriter() {} + Error finalize() override; + Error write() override; + BinaryWriter(Object &Obj, raw_ostream &Out) : Writer(Obj, Out) {} +}; + +class IHexWriter : public Writer { + struct SectionCompare { + bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const; + }; + + std::set Sections; + size_t TotalSize = 0; + + Error checkSection(const SectionBase &Sec); + uint64_t writeEntryPointRecord(uint8_t *Buf); + uint64_t writeEndOfFileRecord(uint8_t *Buf); + +public: + ~IHexWriter() {} + Error finalize() override; + Error write() override; + IHexWriter(Object &Obj, raw_ostream &Out) : Writer(Obj, Out) {} +}; + +class SectionBase { +public: + std::string Name; + Segment *ParentSegment = nullptr; + uint64_t HeaderOffset = 0; + uint32_t Index = 0; + + uint32_t OriginalIndex = 0; + uint64_t OriginalFlags = 0; + uint64_t OriginalType = ELF::SHT_NULL; + uint64_t OriginalOffset = std::numeric_limits::max(); + + uint64_t Addr = 0; + uint64_t Align = 1; + uint32_t EntrySize = 0; + uint64_t Flags = 0; + uint64_t Info = 0; + uint64_t Link = ELF::SHN_UNDEF; + uint64_t NameIndex = 0; + uint64_t Offset = 0; + uint64_t Size = 0; + uint64_t Type = ELF::SHT_NULL; + ArrayRef OriginalData; + bool HasSymbol = false; + + SectionBase() = default; + SectionBase(const SectionBase &) = default; + + virtual ~SectionBase() = default; + + virtual Error initialize(SectionTableRef SecTable); + virtual void finalize(); + // Remove references to these sections. The list of sections must be sorted. + virtual Error + removeSectionReferences(bool AllowBrokenLinks, + function_ref ToRemove); + virtual Error removeSymbols(function_ref ToRemove); + virtual Error accept(SectionVisitor &Visitor) const = 0; + virtual Error accept(MutableSectionVisitor &Visitor) = 0; + virtual void markSymbols(); + virtual void + replaceSectionReferences(const DenseMap &); + virtual bool hasContents() const { return false; } + // Notify the section that it is subject to removal. + virtual void onRemove(); +}; + +class Segment { +private: + struct SectionCompare { + bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const { + // Some sections might have the same address if one of them is empty. To + // fix this we can use the lexicographic ordering on ->Addr and the + // original index. + if (Lhs->OriginalOffset == Rhs->OriginalOffset) + return Lhs->OriginalIndex < Rhs->OriginalIndex; + return Lhs->OriginalOffset < Rhs->OriginalOffset; + } + }; + +public: + uint32_t Type = 0; + uint32_t Flags = 0; + uint64_t Offset = 0; + uint64_t VAddr = 0; + uint64_t PAddr = 0; + uint64_t FileSize = 0; + uint64_t MemSize = 0; + uint64_t Align = 0; + + uint32_t Index = 0; + uint64_t OriginalOffset = 0; + Segment *ParentSegment = nullptr; + ArrayRef Contents; + std::set Sections; + + explicit Segment(ArrayRef Data) : Contents(Data) {} + Segment() = default; + + const SectionBase *firstSection() const { + if (!Sections.empty()) + return *Sections.begin(); + return nullptr; + } + + void removeSection(const SectionBase *Sec) { Sections.erase(Sec); } + void addSection(const SectionBase *Sec) { Sections.insert(Sec); } + + ArrayRef getContents() const { return Contents; } +}; + +class Section : public SectionBase { + MAKE_SEC_WRITER_FRIEND + + ArrayRef Contents; + SectionBase *LinkSection = nullptr; + +public: + explicit Section(ArrayRef Data) : Contents(Data) {} + + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; + Error removeSectionReferences( + bool AllowBrokenLinks, + function_ref ToRemove) override; + Error initialize(SectionTableRef SecTable) override; + void finalize() override; + bool hasContents() const override { + return Type != ELF::SHT_NOBITS && Type != ELF::SHT_NULL; + } +}; + +class OwnedDataSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + + std::vector Data; + +public: + OwnedDataSection(StringRef SecName, ArrayRef Data) + : Data(std::begin(Data), std::end(Data)) { + Name = SecName.str(); + Type = OriginalType = ELF::SHT_PROGBITS; + Size = Data.size(); + OriginalOffset = std::numeric_limits::max(); + } + + OwnedDataSection(const Twine &SecName, uint64_t SecAddr, uint64_t SecFlags, + uint64_t SecOff) { + Name = SecName.str(); + Type = OriginalType = ELF::SHT_PROGBITS; + Addr = SecAddr; + Flags = OriginalFlags = SecFlags; + OriginalOffset = SecOff; + } + + OwnedDataSection(SectionBase &S, ArrayRef Data) + : SectionBase(S), Data(std::begin(Data), std::end(Data)) { + Size = Data.size(); + } + + void appendHexData(StringRef HexData); + Error accept(SectionVisitor &Sec) const override; + Error accept(MutableSectionVisitor &Visitor) override; + bool hasContents() const override { return true; } +}; + +class CompressedSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + + DebugCompressionType CompressionType; + uint64_t DecompressedSize; + uint64_t DecompressedAlign; + SmallVector CompressedData; + +public: + CompressedSection(const SectionBase &Sec, + DebugCompressionType CompressionType); + CompressedSection(ArrayRef CompressedData, uint64_t DecompressedSize, + uint64_t DecompressedAlign); + + uint64_t getDecompressedSize() const { return DecompressedSize; } + uint64_t getDecompressedAlign() const { return DecompressedAlign; } + + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; + + static bool classof(const SectionBase *S) { + return S->OriginalFlags & ELF::SHF_COMPRESSED; + } +}; + +class DecompressedSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + +public: + explicit DecompressedSection(const CompressedSection &Sec) + : SectionBase(Sec) { + Size = Sec.getDecompressedSize(); + Align = Sec.getDecompressedAlign(); + Flags = OriginalFlags = (Flags & ~ELF::SHF_COMPRESSED); + } + + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; +}; + +// There are two types of string tables that can exist, dynamic and not dynamic. +// In the dynamic case the string table is allocated. Changing a dynamic string +// table would mean altering virtual addresses and thus the memory image. So +// dynamic string tables should not have an interface to modify them or +// reconstruct them. This type lets us reconstruct a string table. To avoid +// this class being used for dynamic string tables (which has happened) the +// classof method checks that the particular instance is not allocated. This +// then agrees with the makeSection method used to construct most sections. +class StringTableSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + + StringTableBuilder StrTabBuilder; + +public: + StringTableSection() : StrTabBuilder(StringTableBuilder::ELF) { + Type = OriginalType = ELF::SHT_STRTAB; + } + + void addString(StringRef Name); + uint32_t findIndex(StringRef Name) const; + void prepareForLayout(); + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; + + static bool classof(const SectionBase *S) { + if (S->OriginalFlags & ELF::SHF_ALLOC) + return false; + return S->OriginalType == ELF::SHT_STRTAB; + } +}; + +// Symbols have a st_shndx field that normally stores an index but occasionally +// stores a different special value. This enum keeps track of what the st_shndx +// field means. Most of the values are just copies of the special SHN_* values. +// SYMBOL_SIMPLE_INDEX means that the st_shndx is just an index of a section. +enum SymbolShndxType { + SYMBOL_SIMPLE_INDEX = 0, + SYMBOL_ABS = ELF::SHN_ABS, + SYMBOL_COMMON = ELF::SHN_COMMON, + SYMBOL_LOPROC = ELF::SHN_LOPROC, + SYMBOL_AMDGPU_LDS = ELF::SHN_AMDGPU_LDS, + SYMBOL_HEXAGON_SCOMMON = ELF::SHN_HEXAGON_SCOMMON, + SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2, + SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4, + SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8, + SYMBOL_MIPS_ACOMMON = ELF::SHN_MIPS_ACOMMON, + SYMBOL_MIPS_TEXT = ELF::SHN_MIPS_TEXT, + SYMBOL_MIPS_DATA = ELF::SHN_MIPS_DATA, + SYMBOL_MIPS_SCOMMON = ELF::SHN_MIPS_SCOMMON, + SYMBOL_MIPS_SUNDEFINED = ELF::SHN_MIPS_SUNDEFINED, + SYMBOL_HIPROC = ELF::SHN_HIPROC, + SYMBOL_LOOS = ELF::SHN_LOOS, + SYMBOL_HIOS = ELF::SHN_HIOS, + SYMBOL_XINDEX = ELF::SHN_XINDEX, +}; + +struct Symbol { + uint8_t Binding; + SectionBase *DefinedIn = nullptr; + SymbolShndxType ShndxType; + uint32_t Index; + std::string Name; + uint32_t NameIndex; + uint64_t Size; + uint8_t Type; + uint64_t Value; + uint8_t Visibility; + bool Referenced = false; + + uint16_t getShndx() const; + bool isCommon() const; +}; + +class SectionIndexSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + +private: + std::vector Indexes; + SymbolTableSection *Symbols = nullptr; + +public: + virtual ~SectionIndexSection() {} + void addIndex(uint32_t Index) { + assert(Size > 0); + Indexes.push_back(Index); + } + + void reserve(size_t NumSymbols) { + Indexes.reserve(NumSymbols); + Size = NumSymbols * 4; + } + void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; } + Error initialize(SectionTableRef SecTable) override; + void finalize() override; + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; + + SectionIndexSection() { + Name = ".symtab_shndx"; + Align = 4; + EntrySize = 4; + Type = OriginalType = ELF::SHT_SYMTAB_SHNDX; + } +}; + +class SymbolTableSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + + void setStrTab(StringTableSection *StrTab) { SymbolNames = StrTab; } + void assignIndices(); + +protected: + std::vector> Symbols; + StringTableSection *SymbolNames = nullptr; + SectionIndexSection *SectionIndexTable = nullptr; + + using SymPtr = std::unique_ptr; + +public: + SymbolTableSection() { Type = OriginalType = ELF::SHT_SYMTAB; } + + void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn, + uint64_t Value, uint8_t Visibility, uint16_t Shndx, + uint64_t SymbolSize); + void prepareForLayout(); + // An 'empty' symbol table still contains a null symbol. + bool empty() const { return Symbols.size() == 1; } + void setShndxTable(SectionIndexSection *ShndxTable) { + SectionIndexTable = ShndxTable; + } + const SectionIndexSection *getShndxTable() const { return SectionIndexTable; } + void fillShndxTable(); + const SectionBase *getStrTab() const { return SymbolNames; } + Expected getSymbolByIndex(uint32_t Index) const; + Expected getSymbolByIndex(uint32_t Index); + void updateSymbols(function_ref Callable); + + Error removeSectionReferences( + bool AllowBrokenLinks, + function_ref ToRemove) override; + Error initialize(SectionTableRef SecTable) override; + void finalize() override; + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; + Error removeSymbols(function_ref ToRemove) override; + void replaceSectionReferences( + const DenseMap &FromTo) override; + + static bool classof(const SectionBase *S) { + return S->OriginalType == ELF::SHT_SYMTAB; + } +}; + +struct Relocation { + Symbol *RelocSymbol = nullptr; + uint64_t Offset; + uint64_t Addend; + uint32_t Type; +}; + +// All relocation sections denote relocations to apply to another section. +// However, some relocation sections use a dynamic symbol table and others use +// a regular symbol table. Because the types of the two symbol tables differ in +// our system (because they should behave differently) we can't uniformly +// represent all relocations with the same base class if we expose an interface +// that mentions the symbol table type. So we split the two base types into two +// different classes, one which handles the section the relocation is applied to +// and another which handles the symbol table type. The symbol table type is +// taken as a type parameter to the class (see RelocSectionWithSymtabBase). +class RelocationSectionBase : public SectionBase { +protected: + SectionBase *SecToApplyRel = nullptr; + +public: + const SectionBase *getSection() const { return SecToApplyRel; } + void setSection(SectionBase *Sec) { SecToApplyRel = Sec; } + + StringRef getNamePrefix() const; + + static bool classof(const SectionBase *S) { + return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA; + } +}; + +// Takes the symbol table type to use as a parameter so that we can deduplicate +// that code between the two symbol table types. +template +class RelocSectionWithSymtabBase : public RelocationSectionBase { + void setSymTab(SymTabType *SymTab) { Symbols = SymTab; } + +protected: + RelocSectionWithSymtabBase() = default; + + SymTabType *Symbols = nullptr; + +public: + Error initialize(SectionTableRef SecTable) override; + void finalize() override; +}; + +class RelocationSection + : public RelocSectionWithSymtabBase { + MAKE_SEC_WRITER_FRIEND + + std::vector Relocations; + const Object &Obj; + +public: + RelocationSection(const Object &O) : Obj(O) {} + void addRelocation(Relocation Rel) { Relocations.push_back(Rel); } + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; + Error removeSectionReferences( + bool AllowBrokenLinks, + function_ref ToRemove) override; + Error removeSymbols(function_ref ToRemove) override; + void markSymbols() override; + void replaceSectionReferences( + const DenseMap &FromTo) override; + const Object &getObject() const { return Obj; } + + static bool classof(const SectionBase *S) { + if (S->OriginalFlags & ELF::SHF_ALLOC) + return false; + return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA; + } +}; + +// TODO: The way stripping and groups interact is complicated +// and still needs to be worked on. + +class GroupSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + const SymbolTableSection *SymTab = nullptr; + Symbol *Sym = nullptr; + ELF::Elf32_Word FlagWord; + SmallVector GroupMembers; + +public: + // TODO: Contents is present in several classes of the hierarchy. + // This needs to be refactored to avoid duplication. + ArrayRef Contents; + + explicit GroupSection(ArrayRef Data) : Contents(Data) {} + + void setSymTab(const SymbolTableSection *SymTabSec) { SymTab = SymTabSec; } + void setSymbol(Symbol *S) { Sym = S; } + void setFlagWord(ELF::Elf32_Word W) { FlagWord = W; } + void addMember(SectionBase *Sec) { GroupMembers.push_back(Sec); } + + Error accept(SectionVisitor &) const override; + Error accept(MutableSectionVisitor &Visitor) override; + void finalize() override; + Error removeSectionReferences( + bool AllowBrokenLinks, + function_ref ToRemove) override; + Error removeSymbols(function_ref ToRemove) override; + void markSymbols() override; + void replaceSectionReferences( + const DenseMap &FromTo) override; + void onRemove() override; + + static bool classof(const SectionBase *S) { + return S->OriginalType == ELF::SHT_GROUP; + } +}; + +class DynamicSymbolTableSection : public Section { +public: + explicit DynamicSymbolTableSection(ArrayRef Data) : Section(Data) {} + + static bool classof(const SectionBase *S) { + return S->OriginalType == ELF::SHT_DYNSYM; + } +}; + +class DynamicSection : public Section { +public: + explicit DynamicSection(ArrayRef Data) : Section(Data) {} + + static bool classof(const SectionBase *S) { + return S->OriginalType == ELF::SHT_DYNAMIC; + } +}; + +class DynamicRelocationSection + : public RelocSectionWithSymtabBase { + MAKE_SEC_WRITER_FRIEND + +private: + ArrayRef Contents; + +public: + explicit DynamicRelocationSection(ArrayRef Data) : Contents(Data) {} + + Error accept(SectionVisitor &) const override; + Error accept(MutableSectionVisitor &Visitor) override; + Error removeSectionReferences( + bool AllowBrokenLinks, + function_ref ToRemove) override; + + static bool classof(const SectionBase *S) { + if (!(S->OriginalFlags & ELF::SHF_ALLOC)) + return false; + return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA; + } +}; + +class GnuDebugLinkSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + +private: + StringRef FileName; + uint32_t CRC32; + + void init(StringRef File); + +public: + // If we add this section from an external source we can use this ctor. + explicit GnuDebugLinkSection(StringRef File, uint32_t PrecomputedCRC); + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; +}; + +class Reader { +public: + virtual ~Reader(); + virtual Expected> create(bool EnsureSymtab) const = 0; +}; + +using object::Binary; +using object::ELFFile; +using object::ELFObjectFile; +using object::OwningBinary; + +class BasicELFBuilder { +protected: + std::unique_ptr Obj; + + void initFileHeader(); + void initHeaderSegment(); + StringTableSection *addStrTab(); + SymbolTableSection *addSymTab(StringTableSection *StrTab); + Error initSections(); + +public: + BasicELFBuilder() : Obj(std::make_unique()) {} +}; + +class BinaryELFBuilder : public BasicELFBuilder { + MemoryBuffer *MemBuf; + uint8_t NewSymbolVisibility; + void addData(SymbolTableSection *SymTab); + +public: + BinaryELFBuilder(MemoryBuffer *MB, uint8_t NewSymbolVisibility) + : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {} + + Expected> build(); +}; + +class IHexELFBuilder : public BasicELFBuilder { + const std::vector &Records; + + void addDataSections(); + +public: + IHexELFBuilder(const std::vector &Records) : Records(Records) {} + + Expected> build(); +}; + +template class ELFBuilder { +private: + using Elf_Addr = typename ELFT::Addr; + using Elf_Shdr = typename ELFT::Shdr; + using Elf_Word = typename ELFT::Word; + + const ELFFile &ElfFile; + Object &Obj; + size_t EhdrOffset = 0; + Optional ExtractPartition; + + void setParentSegment(Segment &Child); + Error readProgramHeaders(const ELFFile &HeadersFile); + Error initGroupSection(GroupSection *GroupSec); + Error initSymbolTable(SymbolTableSection *SymTab); + Error readSectionHeaders(); + Error readSections(bool EnsureSymtab); + Error findEhdrOffset(); + Expected makeSection(const Elf_Shdr &Shdr); + +public: + ELFBuilder(const ELFObjectFile &ElfObj, Object &Obj, + Optional ExtractPartition); + + Error build(bool EnsureSymtab); +}; + +class BinaryReader : public Reader { + MemoryBuffer *MemBuf; + uint8_t NewSymbolVisibility; + +public: + BinaryReader(MemoryBuffer *MB, const uint8_t NewSymbolVisibility) + : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {} + Expected> create(bool EnsureSymtab) const override; +}; + +class IHexReader : public Reader { + MemoryBuffer *MemBuf; + + Expected> parse() const; + Error parseError(size_t LineNo, Error E) const { + return LineNo == -1U + ? createFileError(MemBuf->getBufferIdentifier(), std::move(E)) + : createFileError(MemBuf->getBufferIdentifier(), LineNo, + std::move(E)); + } + template + Error parseError(size_t LineNo, char const *Fmt, const Ts &...Vals) const { + Error E = createStringError(errc::invalid_argument, Fmt, Vals...); + return parseError(LineNo, std::move(E)); + } + +public: + IHexReader(MemoryBuffer *MB) : MemBuf(MB) {} + + Expected> create(bool EnsureSymtab) const override; +}; + +class ELFReader : public Reader { + Binary *Bin; + Optional ExtractPartition; + +public: + Expected> create(bool EnsureSymtab) const override; + explicit ELFReader(Binary *B, Optional ExtractPartition) + : Bin(B), ExtractPartition(ExtractPartition) {} +}; + +class Object { +private: + using SecPtr = std::unique_ptr; + using SegPtr = std::unique_ptr; + + std::vector Sections; + std::vector Segments; + std::vector RemovedSections; + DenseMap> UpdatedSections; + + static bool sectionIsAlloc(const SectionBase &Sec) { + return Sec.Flags & ELF::SHF_ALLOC; + }; + +public: + template + using ConstRange = iterator_range>::const_iterator>>; + + // It is often the case that the ELF header and the program header table are + // not present in any segment. This could be a problem during file layout, + // because other segments may get assigned an offset where either of the + // two should reside, which will effectively corrupt the resulting binary. + // Other than that we use these segments to track program header offsets + // when they may not follow the ELF header. + Segment ElfHdrSegment; + Segment ProgramHdrSegment; + + uint8_t OSABI; + uint8_t ABIVersion; + uint64_t Entry; + uint64_t SHOff; + uint32_t Type; + uint32_t Machine; + uint32_t Version; + uint32_t Flags; + + bool HadShdrs = true; + bool MustBeRelocatable = false; + StringTableSection *SectionNames = nullptr; + SymbolTableSection *SymbolTable = nullptr; + SectionIndexSection *SectionIndexTable = nullptr; + + bool IsMips64EL = false; + + SectionTableRef sections() const { return SectionTableRef(Sections); } + iterator_range< + filter_iterator::const_iterator>, + decltype(§ionIsAlloc)>> + allocSections() const { + return make_filter_range(make_pointee_range(Sections), sectionIsAlloc); + } + + const auto &getUpdatedSections() const { return UpdatedSections; } + Error updateSection(StringRef Name, ArrayRef Data); + + SectionBase *findSection(StringRef Name) { + auto SecIt = + find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; }); + return SecIt == Sections.end() ? nullptr : SecIt->get(); + } + SectionTableRef removedSections() { return SectionTableRef(RemovedSections); } + + ConstRange segments() const { return make_pointee_range(Segments); } + + Error removeSections(bool AllowBrokenLinks, + std::function ToRemove); + Error replaceSections(const DenseMap &FromTo); + Error removeSymbols(function_ref ToRemove); + template T &addSection(Ts &&...Args) { + auto Sec = std::make_unique(std::forward(Args)...); + auto Ptr = Sec.get(); + MustBeRelocatable |= isa(*Ptr); + Sections.emplace_back(std::move(Sec)); + Ptr->Index = Sections.size(); + return *Ptr; + } + Error addNewSymbolTable(); + Segment &addSegment(ArrayRef Data) { + Segments.emplace_back(std::make_unique(Data)); + return *Segments.back(); + } + bool isRelocatable() const { + return (Type != ELF::ET_DYN && Type != ELF::ET_EXEC) || MustBeRelocatable; + } +}; + +} // end namespace elf +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H diff --git a/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp b/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp new file mode 100644 index 000000000000..6b731abd9ed9 --- /dev/null +++ b/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp @@ -0,0 +1,441 @@ +//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOLayoutBuilder.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; +using namespace llvm::objcopy::macho; + +StringTableBuilder::Kind +MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) { + if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) + return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO; + return Is64Bit ? StringTableBuilder::MachO64Linked + : StringTableBuilder::MachOLinked; +} + +uint32_t MachOLayoutBuilder::computeSizeOfCmds() const { + uint32_t Size = 0; + for (const LoadCommand &LC : O.LoadCommands) { + const MachO::macho_load_command &MLC = LC.MachOLoadCommand; + auto cmd = MLC.load_command_data.cmd; + switch (cmd) { + case MachO::LC_SEGMENT: + Size += sizeof(MachO::segment_command) + + sizeof(MachO::section) * LC.Sections.size(); + continue; + case MachO::LC_SEGMENT_64: + Size += sizeof(MachO::segment_command_64) + + sizeof(MachO::section_64) * LC.Sections.size(); + continue; + } + + switch (cmd) { +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \ + break; +#include "llvm/BinaryFormat/MachO.def" +#undef HANDLE_LOAD_COMMAND + } + } + + return Size; +} + +void MachOLayoutBuilder::constructStringTable() { + for (std::unique_ptr &Sym : O.SymTable.Symbols) + StrTableBuilder.add(Sym->Name); + StrTableBuilder.finalize(); +} + +void MachOLayoutBuilder::updateSymbolIndexes() { + uint32_t Index = 0; + for (auto &Symbol : O.SymTable.Symbols) + Symbol->Index = Index++; +} + +// Updates the index and the number of local/external/undefined symbols. +void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) { + assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB); + // Make sure that nlist entries in the symbol table are sorted by the those + // types. The order is: local < defined external < undefined external. + assert(llvm::is_sorted(O.SymTable.Symbols, + [](const std::unique_ptr &A, + const std::unique_ptr &B) { + bool AL = A->isLocalSymbol(), + BL = B->isLocalSymbol(); + if (AL != BL) + return AL; + return !AL && !A->isUndefinedSymbol() && + B->isUndefinedSymbol(); + }) && + "Symbols are not sorted by their types."); + + uint32_t NumLocalSymbols = 0; + auto Iter = O.SymTable.Symbols.begin(); + auto End = O.SymTable.Symbols.end(); + for (; Iter != End; ++Iter) { + if ((*Iter)->isExternalSymbol()) + break; + + ++NumLocalSymbols; + } + + uint32_t NumExtDefSymbols = 0; + for (; Iter != End; ++Iter) { + if ((*Iter)->isUndefinedSymbol()) + break; + + ++NumExtDefSymbols; + } + + MLC.dysymtab_command_data.ilocalsym = 0; + MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols; + MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols; + MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; + MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; + MLC.dysymtab_command_data.nundefsym = + O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols); +} + +// Recomputes and updates offset and size fields in load commands and sections +// since they could be modified. +uint64_t MachOLayoutBuilder::layoutSegments() { + auto HeaderSize = + Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + const bool IsObjectFile = + O.Header.FileType == MachO::HeaderFileType::MH_OBJECT; + uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0; + for (LoadCommand &LC : O.LoadCommands) { + auto &MLC = LC.MachOLoadCommand; + StringRef Segname; + uint64_t SegmentVmAddr; + uint64_t SegmentVmSize; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + SegmentVmAddr = MLC.segment_command_data.vmaddr; + SegmentVmSize = MLC.segment_command_data.vmsize; + Segname = StringRef(MLC.segment_command_data.segname, + strnlen(MLC.segment_command_data.segname, + sizeof(MLC.segment_command_data.segname))); + break; + case MachO::LC_SEGMENT_64: + SegmentVmAddr = MLC.segment_command_64_data.vmaddr; + SegmentVmSize = MLC.segment_command_64_data.vmsize; + Segname = StringRef(MLC.segment_command_64_data.segname, + strnlen(MLC.segment_command_64_data.segname, + sizeof(MLC.segment_command_64_data.segname))); + break; + default: + continue; + } + + if (Segname == "__LINKEDIT") { + // We update the __LINKEDIT segment later (in layoutTail). + assert(LC.Sections.empty() && "__LINKEDIT segment has sections"); + LinkEditLoadCommand = &MLC; + continue; + } + + // Update file offsets and sizes of sections. + uint64_t SegOffset = Offset; + uint64_t SegFileSize = 0; + uint64_t VMSize = 0; + for (std::unique_ptr
&Sec : LC.Sections) { + assert(SegmentVmAddr <= Sec->Addr && + "Section's address cannot be smaller than Segment's one"); + uint32_t SectOffset = Sec->Addr - SegmentVmAddr; + if (IsObjectFile) { + if (!Sec->hasValidOffset()) { + Sec->Offset = 0; + } else { + uint64_t PaddingSize = + offsetToAlignment(SegFileSize, Align(1ull << Sec->Align)); + Sec->Offset = SegOffset + SegFileSize + PaddingSize; + Sec->Size = Sec->Content.size(); + SegFileSize += PaddingSize + Sec->Size; + } + } else { + if (!Sec->hasValidOffset()) { + Sec->Offset = 0; + } else { + Sec->Offset = SegOffset + SectOffset; + Sec->Size = Sec->Content.size(); + SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size); + } + } + VMSize = std::max(VMSize, SectOffset + Sec->Size); + } + + if (IsObjectFile) { + Offset += SegFileSize; + } else { + Offset = alignTo(Offset + SegFileSize, PageSize); + SegFileSize = alignTo(SegFileSize, PageSize); + // Use the original vmsize if the segment is __PAGEZERO. + VMSize = + Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize); + } + + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + MLC.segment_command_data.cmdsize = + sizeof(MachO::segment_command) + + sizeof(MachO::section) * LC.Sections.size(); + MLC.segment_command_data.nsects = LC.Sections.size(); + MLC.segment_command_data.fileoff = SegOffset; + MLC.segment_command_data.vmsize = VMSize; + MLC.segment_command_data.filesize = SegFileSize; + break; + case MachO::LC_SEGMENT_64: + MLC.segment_command_64_data.cmdsize = + sizeof(MachO::segment_command_64) + + sizeof(MachO::section_64) * LC.Sections.size(); + MLC.segment_command_64_data.nsects = LC.Sections.size(); + MLC.segment_command_64_data.fileoff = SegOffset; + MLC.segment_command_64_data.vmsize = VMSize; + MLC.segment_command_64_data.filesize = SegFileSize; + break; + } + } + + return Offset; +} + +uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) { + for (LoadCommand &LC : O.LoadCommands) + for (std::unique_ptr
&Sec : LC.Sections) { + Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset; + Sec->NReloc = Sec->Relocations.size(); + Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc; + } + + return Offset; +} + +Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { + // If we are building the layout of an executable or dynamic library + // which does not have any segments other than __LINKEDIT, + // the Offset can be equal to zero by this time. It happens because of the + // convention that in such cases the file offsets specified by LC_SEGMENT + // start with zero (unlike the case of a relocatable object file). + const uint64_t HeaderSize = + Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) || + Offset >= HeaderSize + O.Header.SizeOfCmds) && + "Incorrect tail offset"); + Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds); + + // The order of LINKEDIT elements is as follows: + // rebase info, binding info, weak binding info, lazy binding info, export + // trie, data-in-code, symbol table, indirect symbol table, symbol table + // strings, code signature. + uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + uint64_t StartOfLinkEdit = Offset; + uint64_t StartOfRebaseInfo = StartOfLinkEdit; + uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size(); + uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size(); + uint64_t StartOfLazyBindingInfo = + StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size(); + uint64_t StartOfExportTrie = + StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size(); + uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size(); + uint64_t StartOfDyldExportsTrie = + StartOfFunctionStarts + O.FunctionStarts.Data.size(); + uint64_t StartOfChainedFixups = + StartOfDyldExportsTrie + O.ExportsTrie.Data.size(); + uint64_t StartOfDataInCode = + StartOfChainedFixups + O.ChainedFixups.Data.size(); + uint64_t StartOfLinkerOptimizationHint = + StartOfDataInCode + O.DataInCode.Data.size(); + uint64_t StartOfSymbols = + StartOfLinkerOptimizationHint + O.LinkerOptimizationHint.Data.size(); + uint64_t StartOfIndirectSymbols = + StartOfSymbols + NListSize * O.SymTable.Symbols.size(); + uint64_t StartOfSymbolStrings = + StartOfIndirectSymbols + + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size(); + uint64_t StartOfCodeSignature = + StartOfSymbolStrings + StrTableBuilder.getSize(); + uint32_t CodeSignatureSize = 0; + if (O.CodeSignatureCommandIndex) { + StartOfCodeSignature = alignTo(StartOfCodeSignature, 16); + + // Note: These calculations are to be kept in sync with the same + // calculations performed in LLD's CodeSignatureSection. + const uint32_t AllHeadersSize = + alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1, + CodeSignature.Align); + const uint32_t BlockCount = + (StartOfCodeSignature + CodeSignature.BlockSize - 1) / + CodeSignature.BlockSize; + const uint32_t Size = + alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize, + CodeSignature.Align); + + CodeSignature.StartOffset = StartOfCodeSignature; + CodeSignature.AllHeadersSize = AllHeadersSize; + CodeSignature.BlockCount = BlockCount; + CodeSignature.OutputFileName = OutputFileName; + CodeSignature.Size = Size; + CodeSignatureSize = Size; + } + uint64_t LinkEditSize = + StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit; + + // Now we have determined the layout of the contents of the __LINKEDIT + // segment. Update its load command. + if (LinkEditLoadCommand) { + MachO::macho_load_command *MLC = LinkEditLoadCommand; + switch (LinkEditLoadCommand->load_command_data.cmd) { + case MachO::LC_SEGMENT: + MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command); + MLC->segment_command_data.fileoff = StartOfLinkEdit; + MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize); + MLC->segment_command_data.filesize = LinkEditSize; + break; + case MachO::LC_SEGMENT_64: + MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64); + MLC->segment_command_64_data.fileoff = StartOfLinkEdit; + MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize); + MLC->segment_command_64_data.filesize = LinkEditSize; + break; + } + } + + for (LoadCommand &LC : O.LoadCommands) { + auto &MLC = LC.MachOLoadCommand; + auto cmd = MLC.load_command_data.cmd; + switch (cmd) { + case MachO::LC_CODE_SIGNATURE: + MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature; + MLC.linkedit_data_command_data.datasize = CodeSignatureSize; + break; + case MachO::LC_SYMTAB: + MLC.symtab_command_data.symoff = StartOfSymbols; + MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); + MLC.symtab_command_data.stroff = StartOfSymbolStrings; + MLC.symtab_command_data.strsize = StrTableBuilder.getSize(); + break; + case MachO::LC_DYSYMTAB: { + if (MLC.dysymtab_command_data.ntoc != 0 || + MLC.dysymtab_command_data.nmodtab != 0 || + MLC.dysymtab_command_data.nextrefsyms != 0 || + MLC.dysymtab_command_data.nlocrel != 0 || + MLC.dysymtab_command_data.nextrel != 0) + return createStringError(llvm::errc::not_supported, + "shared library is not yet supported"); + + if (!O.IndirectSymTable.Symbols.empty()) { + MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols; + MLC.dysymtab_command_data.nindirectsyms = + O.IndirectSymTable.Symbols.size(); + } + + updateDySymTab(MLC); + break; + } + case MachO::LC_DATA_IN_CODE: + MLC.linkedit_data_command_data.dataoff = StartOfDataInCode; + MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size(); + break; + case MachO::LC_LINKER_OPTIMIZATION_HINT: + MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint; + MLC.linkedit_data_command_data.datasize = + O.LinkerOptimizationHint.Data.size(); + break; + case MachO::LC_FUNCTION_STARTS: + MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts; + MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size(); + break; + case MachO::LC_DYLD_CHAINED_FIXUPS: + MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups; + MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size(); + break; + case MachO::LC_DYLD_EXPORTS_TRIE: + MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie; + MLC.linkedit_data_command_data.datasize = O.ExportsTrie.Data.size(); + break; + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + MLC.dyld_info_command_data.rebase_off = + O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo; + MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size(); + MLC.dyld_info_command_data.bind_off = + O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo; + MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size(); + MLC.dyld_info_command_data.weak_bind_off = + O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo; + MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size(); + MLC.dyld_info_command_data.lazy_bind_off = + O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo; + MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size(); + MLC.dyld_info_command_data.export_off = + O.Exports.Trie.empty() ? 0 : StartOfExportTrie; + MLC.dyld_info_command_data.export_size = O.Exports.Trie.size(); + break; + // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in + // is not an offset in the binary file, instead, it is a + // relative virtual address. At the moment modification of the __TEXT + // segment of executables isn't supported anyway (e.g. data in code entries + // are not recalculated). Moreover, in general + // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because + // without making additional assumptions (e.g. that the entire __TEXT + // segment should be encrypted) we do not know how to recalculate the + // boundaries of the encrypted part. For now just copy over these load + // commands until we encounter a real world usecase where + // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted. + case MachO::LC_ENCRYPTION_INFO: + case MachO::LC_ENCRYPTION_INFO_64: + case MachO::LC_LOAD_DYLINKER: + case MachO::LC_MAIN: + case MachO::LC_RPATH: + case MachO::LC_SEGMENT: + case MachO::LC_SEGMENT_64: + case MachO::LC_VERSION_MIN_MACOSX: + case MachO::LC_VERSION_MIN_IPHONEOS: + case MachO::LC_VERSION_MIN_TVOS: + case MachO::LC_VERSION_MIN_WATCHOS: + case MachO::LC_BUILD_VERSION: + case MachO::LC_ID_DYLIB: + case MachO::LC_LOAD_DYLIB: + case MachO::LC_LOAD_WEAK_DYLIB: + case MachO::LC_UUID: + case MachO::LC_SOURCE_VERSION: + case MachO::LC_THREAD: + case MachO::LC_UNIXTHREAD: + case MachO::LC_SUB_FRAMEWORK: + case MachO::LC_SUB_UMBRELLA: + case MachO::LC_SUB_CLIENT: + case MachO::LC_SUB_LIBRARY: + case MachO::LC_LINKER_OPTION: + // Nothing to update. + break; + default: + // Abort if it's unsupported in order to prevent corrupting the object. + return createStringError(llvm::errc::not_supported, + "unsupported load command (cmd=0x%x)", cmd); + } + } + + return Error::success(); +} + +Error MachOLayoutBuilder::layout() { + O.Header.NCmds = O.LoadCommands.size(); + O.Header.SizeOfCmds = computeSizeOfCmds(); + constructStringTable(); + updateSymbolIndexes(); + uint64_t Offset = layoutSegments(); + Offset = layoutRelocations(Offset); + return layoutTail(Offset); +} diff --git a/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.h b/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.h new file mode 100644 index 000000000000..8d8716df22bb --- /dev/null +++ b/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.h @@ -0,0 +1,97 @@ +//===- MachOLayoutBuilder.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H +#define LLVM_LIB_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H + +#include "MachOObject.h" +#include "llvm/ObjCopy/MachO/MachOObjcopy.h" + +namespace llvm { +namespace objcopy { +namespace macho { + +/// When MachO binaries include a LC_CODE_SIGNATURE load command, +/// the __LINKEDIT data segment will include a section corresponding +/// to the LC_CODE_SIGNATURE load command. This section serves as a signature +/// for the binary. Included in the CodeSignature section is a header followed +/// by a hash of the binary. If present, the CodeSignature section is the +/// last component of the binary. +struct CodeSignatureInfo { + // NOTE: These values are to be kept in sync with those in + // LLD's CodeSignatureSection class. + + static constexpr uint32_t Align = 16; + static constexpr uint8_t BlockSizeShift = 12; + // The binary is read in blocks of the following size. + static constexpr size_t BlockSize = (1 << BlockSizeShift); // 4 KiB + // For each block, a SHA256 hash (256 bits, 32 bytes) is written to + // the CodeSignature section. + static constexpr size_t HashSize = 256 / 8; + static constexpr size_t BlobHeadersSize = llvm::alignTo<8>( + sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex)); + // The size of the entire header depends upon the filename the binary is being + // written to, but the rest of the header is fixed in size. + static constexpr uint32_t FixedHeadersSize = + BlobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory); + + // The offset relative to the start of the binary where + // the CodeSignature section should begin. + uint32_t StartOffset; + // The size of the entire header, output file name size included. + uint32_t AllHeadersSize; + // The number of blocks required to hash the binary. + uint32_t BlockCount; + StringRef OutputFileName; + // The size of the entire CodeSignature section, including both the header and + // hashes. + uint32_t Size; +}; + +class MachOLayoutBuilder { + Object &O; + bool Is64Bit; + StringRef OutputFileName; + uint64_t PageSize; + CodeSignatureInfo CodeSignature; + + // Points to the __LINKEDIT segment if it exists. + MachO::macho_load_command *LinkEditLoadCommand = nullptr; + StringTableBuilder StrTableBuilder; + + uint32_t computeSizeOfCmds() const; + void constructStringTable(); + void updateSymbolIndexes(); + void updateDySymTab(MachO::macho_load_command &MLC); + uint64_t layoutSegments(); + uint64_t layoutRelocations(uint64_t Offset); + Error layoutTail(uint64_t Offset); + + static StringTableBuilder::Kind getStringTableBuilderKind(const Object &O, + bool Is64Bit); + +public: + MachOLayoutBuilder(Object &O, bool Is64Bit, StringRef OutputFileName, + uint64_t PageSize) + : O(O), Is64Bit(Is64Bit), OutputFileName(OutputFileName), + PageSize(PageSize), + StrTableBuilder(getStringTableBuilderKind(O, Is64Bit)) {} + + // Recomputes and updates fields in the given object such as file offsets. + Error layout(); + + StringTableBuilder &getStringTableBuilder() { return StrTableBuilder; } + + const CodeSignatureInfo &getCodeSignature() const { return CodeSignature; } +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H diff --git a/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp b/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp new file mode 100644 index 000000000000..5db03a4e268e --- /dev/null +++ b/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp @@ -0,0 +1,550 @@ +//===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/MachO/MachOObjcopy.h" +#include "Archive.h" +#include "MachOReader.h" +#include "MachOWriter.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/MachO/MachOConfig.h" +#include "llvm/ObjCopy/MultiFormatConfig.h" +#include "llvm/ObjCopy/ObjCopy.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/MachOUniversalWriter.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" + +using namespace llvm; +using namespace llvm::objcopy; +using namespace llvm::objcopy::macho; +using namespace llvm::object; + +using SectionPred = std::function &Sec)>; +using LoadCommandPred = std::function; + +#ifndef NDEBUG +static bool isLoadCommandWithPayloadString(const LoadCommand &LC) { + // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and + // LC_LAZY_LOAD_DYLIB + return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB; +} +#endif + +static StringRef getPayloadString(const LoadCommand &LC) { + assert(isLoadCommandWithPayloadString(LC) && + "unsupported load command encountered"); + + return StringRef(reinterpret_cast(LC.Payload.data()), + LC.Payload.size()) + .rtrim('\0'); +} + +static Error removeSections(const CommonConfig &Config, Object &Obj) { + SectionPred RemovePred = [](const std::unique_ptr
&) { + return false; + }; + + if (!Config.ToRemove.empty()) { + RemovePred = [&Config, RemovePred](const std::unique_ptr
&Sec) { + return Config.ToRemove.matches(Sec->CanonicalName); + }; + } + + if (Config.StripAll || Config.StripDebug) { + // Remove all debug sections. + RemovePred = [RemovePred](const std::unique_ptr
&Sec) { + if (Sec->Segname == "__DWARF") + return true; + + return RemovePred(Sec); + }; + } + + if (!Config.OnlySection.empty()) { + // Overwrite RemovePred because --only-section takes priority. + RemovePred = [&Config](const std::unique_ptr
&Sec) { + return !Config.OnlySection.matches(Sec->CanonicalName); + }; + } + + return Obj.removeSections(RemovePred); +} + +static void markSymbols(const CommonConfig &, Object &Obj) { + // Symbols referenced from the indirect symbol table must not be removed. + for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols) + if (ISE.Symbol) + (*ISE.Symbol)->Referenced = true; +} + +static void updateAndRemoveSymbols(const CommonConfig &Config, + const MachOConfig &MachOConfig, + Object &Obj) { + for (SymbolEntry &Sym : Obj.SymTable) { + auto I = Config.SymbolsToRename.find(Sym.Name); + if (I != Config.SymbolsToRename.end()) + Sym.Name = std::string(I->getValue()); + } + + auto RemovePred = [&Config, &MachOConfig, + &Obj](const std::unique_ptr &N) { + if (N->Referenced) + return false; + if (MachOConfig.KeepUndefined && N->isUndefinedSymbol()) + return false; + if (N->n_desc & MachO::REFERENCED_DYNAMICALLY) + return false; + if (Config.StripAll) + return true; + if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT)) + return true; + // This behavior is consistent with cctools' strip. + if (MachOConfig.StripSwiftSymbols && + (Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion && + *Obj.SwiftVersion && N->isSwiftSymbol()) + return true; + return false; + }; + + Obj.SymTable.removeSymbols(RemovePred); +} + +template +static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) { + assert(isLoadCommandWithPayloadString(LC) && + "unsupported load command encountered"); + + uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8); + + LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize; + LC.Payload.assign(NewCmdsize - sizeof(LCType), 0); + std::copy(S.begin(), S.end(), LC.Payload.begin()); +} + +static LoadCommand buildRPathLoadCommand(StringRef Path) { + LoadCommand LC; + MachO::rpath_command RPathLC; + RPathLC.cmd = MachO::LC_RPATH; + RPathLC.path = sizeof(MachO::rpath_command); + RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8); + LC.MachOLoadCommand.rpath_command_data = RPathLC; + LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0); + std::copy(Path.begin(), Path.end(), LC.Payload.begin()); + return LC; +} + +static Error processLoadCommands(const MachOConfig &MachOConfig, Object &Obj) { + // Remove RPaths. + DenseSet RPathsToRemove(MachOConfig.RPathsToRemove.begin(), + MachOConfig.RPathsToRemove.end()); + + LoadCommandPred RemovePred = [&RPathsToRemove, + &MachOConfig](const LoadCommand &LC) { + if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) { + // When removing all RPaths we don't need to care + // about what it contains + if (MachOConfig.RemoveAllRpaths) + return true; + + StringRef RPath = getPayloadString(LC); + if (RPathsToRemove.count(RPath)) { + RPathsToRemove.erase(RPath); + return true; + } + } + return false; + }; + + if (Error E = Obj.removeLoadCommands(RemovePred)) + return E; + + // Emit an error if the Mach-O binary does not contain an rpath path name + // specified in -delete_rpath. + for (StringRef RPath : MachOConfig.RPathsToRemove) { + if (RPathsToRemove.count(RPath)) + return createStringError(errc::invalid_argument, + "no LC_RPATH load command with path: %s", + RPath.str().c_str()); + } + + DenseSet RPaths; + + // Get all existing RPaths. + for (LoadCommand &LC : Obj.LoadCommands) { + if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) + RPaths.insert(getPayloadString(LC)); + } + + // Throw errors for invalid RPaths. + for (const auto &OldNew : MachOConfig.RPathsToUpdate) { + StringRef Old = OldNew.getFirst(); + StringRef New = OldNew.getSecond(); + if (!RPaths.contains(Old)) + return createStringError(errc::invalid_argument, + "no LC_RPATH load command with path: " + Old); + if (RPaths.contains(New)) + return createStringError(errc::invalid_argument, + "rpath '" + New + + "' would create a duplicate load command"); + } + + // Update load commands. + for (LoadCommand &LC : Obj.LoadCommands) { + switch (LC.MachOLoadCommand.load_command_data.cmd) { + case MachO::LC_ID_DYLIB: + if (MachOConfig.SharedLibId) + updateLoadCommandPayloadString( + LC, *MachOConfig.SharedLibId); + break; + + case MachO::LC_RPATH: { + StringRef RPath = getPayloadString(LC); + StringRef NewRPath = MachOConfig.RPathsToUpdate.lookup(RPath); + if (!NewRPath.empty()) + updateLoadCommandPayloadString(LC, NewRPath); + break; + } + + // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB + // here once llvm-objcopy supports them. + case MachO::LC_LOAD_DYLIB: + case MachO::LC_LOAD_WEAK_DYLIB: + StringRef InstallName = getPayloadString(LC); + StringRef NewInstallName = + MachOConfig.InstallNamesToUpdate.lookup(InstallName); + if (!NewInstallName.empty()) + updateLoadCommandPayloadString(LC, + NewInstallName); + break; + } + } + + // Add new RPaths. + for (StringRef RPath : MachOConfig.RPathToAdd) { + if (RPaths.contains(RPath)) + return createStringError(errc::invalid_argument, + "rpath '" + RPath + + "' would create a duplicate load command"); + RPaths.insert(RPath); + Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath)); + } + + for (StringRef RPath : MachOConfig.RPathToPrepend) { + if (RPaths.contains(RPath)) + return createStringError(errc::invalid_argument, + "rpath '" + RPath + + "' would create a duplicate load command"); + + RPaths.insert(RPath); + Obj.LoadCommands.insert(Obj.LoadCommands.begin(), + buildRPathLoadCommand(RPath)); + } + + // Unlike appending rpaths, the indexes of subsequent load commands must + // be recalculated after prepending one. + if (!MachOConfig.RPathToPrepend.empty()) + Obj.updateLoadCommandIndexes(); + + // Remove any empty segments if required. + if (!MachOConfig.EmptySegmentsToRemove.empty()) { + auto RemovePred = [&MachOConfig](const LoadCommand &LC) { + if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT_64 || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT) { + return LC.Sections.empty() && + MachOConfig.EmptySegmentsToRemove.contains(*LC.getSegmentName()); + } + return false; + }; + if (Error E = Obj.removeLoadCommands(RemovePred)) + return E; + } + + return Error::success(); +} + +static Error dumpSectionToFile(StringRef SecName, StringRef Filename, + Object &Obj) { + for (LoadCommand &LC : Obj.LoadCommands) + for (const std::unique_ptr
&Sec : LC.Sections) { + if (Sec->CanonicalName == SecName) { + Expected> BufferOrErr = + FileOutputBuffer::create(Filename, Sec->Content.size()); + if (!BufferOrErr) + return BufferOrErr.takeError(); + std::unique_ptr Buf = std::move(*BufferOrErr); + llvm::copy(Sec->Content, Buf->getBufferStart()); + + if (Error E = Buf->commit()) + return E; + return Error::success(); + } + } + + return createStringError(object_error::parse_failed, "section '%s' not found", + SecName.str().c_str()); +} + +static Error addSection(const NewSectionInfo &NewSection, Object &Obj) { + std::pair Pair = NewSection.SectionName.split(','); + StringRef TargetSegName = Pair.first; + Section Sec(TargetSegName, Pair.second); + Sec.Content = + Obj.NewSectionsContents.save(NewSection.SectionData->getBuffer()); + Sec.Size = Sec.Content.size(); + + // Add the a section into an existing segment. + for (LoadCommand &LC : Obj.LoadCommands) { + Optional SegName = LC.getSegmentName(); + if (SegName && SegName == TargetSegName) { + uint64_t Addr = *LC.getSegmentVMAddr(); + for (const std::unique_ptr
&S : LC.Sections) + Addr = std::max(Addr, S->Addr + S->Size); + LC.Sections.push_back(std::make_unique
(Sec)); + LC.Sections.back()->Addr = Addr; + return Error::success(); + } + } + + // There's no segment named TargetSegName. Create a new load command and + // Insert a new section into it. + LoadCommand &NewSegment = + Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384)); + NewSegment.Sections.push_back(std::make_unique
(Sec)); + NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr(); + return Error::success(); +} + +static Expected
findSection(StringRef SecName, Object &O) { + StringRef SegName; + std::tie(SegName, SecName) = SecName.split(","); + auto FoundSeg = + llvm::find_if(O.LoadCommands, [SegName](const LoadCommand &LC) { + return LC.getSegmentName() == SegName; + }); + if (FoundSeg == O.LoadCommands.end()) + return createStringError(errc::invalid_argument, + "could not find segment with name '%s'", + SegName.str().c_str()); + auto FoundSec = llvm::find_if(FoundSeg->Sections, + [SecName](const std::unique_ptr
&Sec) { + return Sec->Sectname == SecName; + }); + if (FoundSec == FoundSeg->Sections.end()) + return createStringError(errc::invalid_argument, + "could not find section with name '%s'", + SecName.str().c_str()); + + assert(FoundSec->get()->CanonicalName == (SegName + "," + SecName).str()); + return *FoundSec->get(); +} + +static Error updateSection(const NewSectionInfo &NewSection, Object &O) { + Expected
SecToUpdateOrErr = findSection(NewSection.SectionName, O); + + if (!SecToUpdateOrErr) + return SecToUpdateOrErr.takeError(); + Section &Sec = *SecToUpdateOrErr; + + if (NewSection.SectionData->getBufferSize() > Sec.Size) + return createStringError( + errc::invalid_argument, + "new section cannot be larger than previous section"); + Sec.Content = O.NewSectionsContents.save(NewSection.SectionData->getBuffer()); + Sec.Size = Sec.Content.size(); + return Error::success(); +} + +// isValidMachOCannonicalName returns success if Name is a MachO cannonical name +// (",
") and lengths of both segment and section names are +// valid. +static Error isValidMachOCannonicalName(StringRef Name) { + if (Name.count(',') != 1) + return createStringError(errc::invalid_argument, + "invalid section name '%s' (should be formatted " + "as ',
')", + Name.str().c_str()); + + std::pair Pair = Name.split(','); + if (Pair.first.size() > 16) + return createStringError(errc::invalid_argument, + "too long segment name: '%s'", + Pair.first.str().c_str()); + if (Pair.second.size() > 16) + return createStringError(errc::invalid_argument, + "too long section name: '%s'", + Pair.second.str().c_str()); + return Error::success(); +} + +static Error handleArgs(const CommonConfig &Config, + const MachOConfig &MachOConfig, Object &Obj) { + // Dump sections before add/remove for compatibility with GNU objcopy. + for (StringRef Flag : Config.DumpSection) { + StringRef SectionName; + StringRef FileName; + std::tie(SectionName, FileName) = Flag.split('='); + if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) + return E; + } + + if (Error E = removeSections(Config, Obj)) + return E; + + // Mark symbols to determine which symbols are still needed. + if (Config.StripAll) + markSymbols(Config, Obj); + + updateAndRemoveSymbols(Config, MachOConfig, Obj); + + if (Config.StripAll) + for (LoadCommand &LC : Obj.LoadCommands) + for (std::unique_ptr
&Sec : LC.Sections) + Sec->Relocations.clear(); + + for (const NewSectionInfo &NewSection : Config.AddSection) { + if (Error E = isValidMachOCannonicalName(NewSection.SectionName)) + return E; + if (Error E = addSection(NewSection, Obj)) + return E; + } + + for (const NewSectionInfo &NewSection : Config.UpdateSection) { + if (Error E = isValidMachOCannonicalName(NewSection.SectionName)) + return E; + if (Error E = updateSection(NewSection, Obj)) + return E; + } + + if (Error E = processLoadCommands(MachOConfig, Obj)) + return E; + + return Error::success(); +} + +Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config, + const MachOConfig &MachOConfig, + object::MachOObjectFile &In, + raw_ostream &Out) { + MachOReader Reader(In); + Expected> O = Reader.create(); + if (!O) + return createFileError(Config.InputFilename, O.takeError()); + + if (O->get()->Header.FileType == MachO::HeaderFileType::MH_PRELOAD) + return createStringError(std::errc::not_supported, + "%s: MH_PRELOAD files are not supported", + Config.InputFilename.str().c_str()); + + if (Error E = handleArgs(Config, MachOConfig, **O)) + return createFileError(Config.InputFilename, std::move(E)); + + // Page size used for alignment of segment sizes in Mach-O executables and + // dynamic libraries. + uint64_t PageSize; + switch (In.getArch()) { + case Triple::ArchType::arm: + case Triple::ArchType::aarch64: + case Triple::ArchType::aarch64_32: + PageSize = 16384; + break; + default: + PageSize = 4096; + } + + MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(), + sys::path::filename(Config.OutputFilename), PageSize, Out); + if (auto E = Writer.finalize()) + return E; + return Writer.write(); +} + +Error objcopy::macho::executeObjcopyOnMachOUniversalBinary( + const MultiFormatConfig &Config, const MachOUniversalBinary &In, + raw_ostream &Out) { + SmallVector, 2> Binaries; + SmallVector Slices; + for (const auto &O : In.objects()) { + Expected> ArOrErr = O.getAsArchive(); + if (ArOrErr) { + Expected> NewArchiveMembersOrErr = + createNewArchiveMembers(Config, **ArOrErr); + if (!NewArchiveMembersOrErr) + return NewArchiveMembersOrErr.takeError(); + auto Kind = (*ArOrErr)->kind(); + if (Kind == object::Archive::K_BSD) + Kind = object::Archive::K_DARWIN; + Expected> OutputBufferOrErr = + writeArchiveToBuffer(*NewArchiveMembersOrErr, + (*ArOrErr)->hasSymbolTable(), Kind, + Config.getCommonConfig().DeterministicArchives, + (*ArOrErr)->isThin()); + if (!OutputBufferOrErr) + return OutputBufferOrErr.takeError(); + Expected> BinaryOrErr = + object::createBinary(**OutputBufferOrErr); + if (!BinaryOrErr) + return BinaryOrErr.takeError(); + Binaries.emplace_back(std::move(*BinaryOrErr), + std::move(*OutputBufferOrErr)); + Slices.emplace_back(*cast(Binaries.back().getBinary()), + O.getCPUType(), O.getCPUSubType(), + O.getArchFlagName(), O.getAlign()); + continue; + } + // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class + // ObjectForArch return an Error in case of the type mismatch. We need to + // check each in turn to see what kind of slice this is, so ignore errors + // produced along the way. + consumeError(ArOrErr.takeError()); + + Expected> ObjOrErr = O.getAsObjectFile(); + if (!ObjOrErr) { + consumeError(ObjOrErr.takeError()); + return createStringError( + std::errc::invalid_argument, + "slice for '%s' of the universal Mach-O binary " + "'%s' is not a Mach-O object or an archive", + O.getArchFlagName().c_str(), + Config.getCommonConfig().InputFilename.str().c_str()); + } + std::string ArchFlagName = O.getArchFlagName(); + + SmallVector Buffer; + raw_svector_ostream MemStream(Buffer); + + Expected MachO = Config.getMachOConfig(); + if (!MachO) + return MachO.takeError(); + + if (Error E = executeObjcopyOnBinary(Config.getCommonConfig(), *MachO, + **ObjOrErr, MemStream)) + return E; + + auto MB = std::make_unique( + std::move(Buffer), ArchFlagName, /*RequiresNullTerminator=*/false); + Expected> BinaryOrErr = object::createBinary(*MB); + if (!BinaryOrErr) + return BinaryOrErr.takeError(); + Binaries.emplace_back(std::move(*BinaryOrErr), std::move(MB)); + Slices.emplace_back(*cast(Binaries.back().getBinary()), + O.getAlign()); + } + + if (Error Err = writeUniversalBinaryToStream(Slices, Out)) + return Err; + + return Error::success(); +} diff --git a/llvm/lib/ObjCopy/MachO/MachOObject.cpp b/llvm/lib/ObjCopy/MachO/MachOObject.cpp new file mode 100644 index 000000000000..56f31e456198 --- /dev/null +++ b/llvm/lib/ObjCopy/MachO/MachOObject.cpp @@ -0,0 +1,214 @@ +//===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOObject.h" +#include "llvm/ADT/SmallPtrSet.h" +#include + +using namespace llvm; +using namespace llvm::objcopy::macho; + +const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const { + assert(Index < Symbols.size() && "invalid symbol index"); + return Symbols[Index].get(); +} + +SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) { + return const_cast( + static_cast(this)->getSymbolByIndex(Index)); +} + +void SymbolTable::removeSymbols( + function_ref &)> ToRemove) { + llvm::erase_if(Symbols, ToRemove); +} + +void Object::updateLoadCommandIndexes() { + static constexpr char TextSegmentName[] = "__TEXT"; + // Update indices of special load commands + for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) { + LoadCommand &LC = LoadCommands[Index]; + switch (LC.MachOLoadCommand.load_command_data.cmd) { + case MachO::LC_CODE_SIGNATURE: + CodeSignatureCommandIndex = Index; + break; + case MachO::LC_SEGMENT: + if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) == + TextSegmentName) + TextSegmentCommandIndex = Index; + break; + case MachO::LC_SEGMENT_64: + if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) == + TextSegmentName) + TextSegmentCommandIndex = Index; + break; + case MachO::LC_SYMTAB: + SymTabCommandIndex = Index; + break; + case MachO::LC_DYSYMTAB: + DySymTabCommandIndex = Index; + break; + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + DyLdInfoCommandIndex = Index; + break; + case MachO::LC_DATA_IN_CODE: + DataInCodeCommandIndex = Index; + break; + case MachO::LC_LINKER_OPTIMIZATION_HINT: + LinkerOptimizationHintCommandIndex = Index; + break; + case MachO::LC_FUNCTION_STARTS: + FunctionStartsCommandIndex = Index; + break; + case MachO::LC_DYLD_CHAINED_FIXUPS: + ChainedFixupsCommandIndex = Index; + break; + case MachO::LC_DYLD_EXPORTS_TRIE: + ExportsTrieCommandIndex = Index; + break; + } + } +} + +Error Object::removeLoadCommands( + function_ref ToRemove) { + auto It = std::stable_partition( + LoadCommands.begin(), LoadCommands.end(), + [&](const LoadCommand &LC) { return !ToRemove(LC); }); + LoadCommands.erase(It, LoadCommands.end()); + + updateLoadCommandIndexes(); + return Error::success(); +} + +Error Object::removeSections( + function_ref &)> ToRemove) { + DenseMap OldIndexToSection; + uint32_t NextSectionIndex = 1; + for (LoadCommand &LC : LoadCommands) { + auto It = std::stable_partition( + std::begin(LC.Sections), std::end(LC.Sections), + [&](const std::unique_ptr
&Sec) { return !ToRemove(Sec); }); + for (auto I = LC.Sections.begin(), End = It; I != End; ++I) { + OldIndexToSection[(*I)->Index] = I->get(); + (*I)->Index = NextSectionIndex++; + } + LC.Sections.erase(It, LC.Sections.end()); + } + + auto IsDead = [&](const std::unique_ptr &S) -> bool { + Optional Section = S->section(); + return (Section && !OldIndexToSection.count(*Section)); + }; + + SmallPtrSet DeadSymbols; + for (const std::unique_ptr &Sym : SymTable.Symbols) + if (IsDead(Sym)) + DeadSymbols.insert(Sym.get()); + + for (const LoadCommand &LC : LoadCommands) + for (const std::unique_ptr
&Sec : LC.Sections) + for (const RelocationInfo &R : Sec->Relocations) + if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol)) + return createStringError(std::errc::invalid_argument, + "symbol '%s' defined in section with index " + "'%u' cannot be removed because it is " + "referenced by a relocation in section '%s'", + (*R.Symbol)->Name.c_str(), + *((*R.Symbol)->section()), + Sec->CanonicalName.c_str()); + SymTable.removeSymbols(IsDead); + for (std::unique_ptr &S : SymTable.Symbols) + if (S->section()) + S->n_sect = OldIndexToSection[S->n_sect]->Index; + return Error::success(); +} + +uint64_t Object::nextAvailableSegmentAddress() const { + uint64_t HeaderSize = + is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + uint64_t Addr = HeaderSize + Header.SizeOfCmds; + for (const LoadCommand &LC : LoadCommands) { + const MachO::macho_load_command &MLC = LC.MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + Addr = std::max(Addr, + static_cast(MLC.segment_command_data.vmaddr) + + MLC.segment_command_data.vmsize); + break; + case MachO::LC_SEGMENT_64: + Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr + + MLC.segment_command_64_data.vmsize); + break; + default: + continue; + } + } + return Addr; +} + +template +static void +constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType, + StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) { + assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name"); + memset(&Seg, 0, sizeof(SegmentType)); + Seg.cmd = CmdType; + strncpy(Seg.segname, SegName.data(), SegName.size()); + Seg.maxprot |= + (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); + Seg.initprot |= + (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); + Seg.vmaddr = SegVMAddr; + Seg.vmsize = SegVMSize; +} + +LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) { + LoadCommand LC; + const uint64_t SegVMAddr = nextAvailableSegmentAddress(); + if (is64Bit()) + constructSegment(LC.MachOLoadCommand.segment_command_64_data, + MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize); + else + constructSegment(LC.MachOLoadCommand.segment_command_data, + MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize); + + LoadCommands.push_back(std::move(LC)); + return LoadCommands.back(); +} + +/// Extracts a segment name from a string which is possibly non-null-terminated. +static StringRef extractSegmentName(const char *SegName) { + return StringRef(SegName, + strnlen(SegName, sizeof(MachO::segment_command::segname))); +} + +Optional LoadCommand::getSegmentName() const { + const MachO::macho_load_command &MLC = MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + return extractSegmentName(MLC.segment_command_data.segname); + case MachO::LC_SEGMENT_64: + return extractSegmentName(MLC.segment_command_64_data.segname); + default: + return None; + } +} + +Optional LoadCommand::getSegmentVMAddr() const { + const MachO::macho_load_command &MLC = MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + return MLC.segment_command_data.vmaddr; + case MachO::LC_SEGMENT_64: + return MLC.segment_command_64_data.vmaddr; + default: + return None; + } +} diff --git a/llvm/lib/ObjCopy/MachO/MachOObject.h b/llvm/lib/ObjCopy/MachO/MachOObject.h new file mode 100644 index 000000000000..df9261b76e4d --- /dev/null +++ b/llvm/lib/ObjCopy/MachO/MachOObject.h @@ -0,0 +1,374 @@ +//===- MachOObject.h - Mach-O object file model -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_MACHO_MACHOOBJECT_H +#define LLVM_LIB_OBJCOPY_MACHO_MACHOOBJECT_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/ObjectYAML/DWARFYAML.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include +#include + +namespace llvm { +namespace objcopy { +namespace macho { + +struct MachHeader { + uint32_t Magic; + uint32_t CPUType; + uint32_t CPUSubType; + uint32_t FileType; + uint32_t NCmds; + uint32_t SizeOfCmds; + uint32_t Flags; + uint32_t Reserved = 0; +}; + +struct RelocationInfo; +struct Section { + uint32_t Index; + std::string Segname; + std::string Sectname; + // CanonicalName is a string formatted as “,". + std::string CanonicalName; + uint64_t Addr = 0; + uint64_t Size = 0; + // Offset in the input file. + Optional OriginalOffset; + uint32_t Offset = 0; + uint32_t Align = 0; + uint32_t RelOff = 0; + uint32_t NReloc = 0; + uint32_t Flags = 0; + uint32_t Reserved1 = 0; + uint32_t Reserved2 = 0; + uint32_t Reserved3 = 0; + StringRef Content; + std::vector Relocations; + + Section(StringRef SegName, StringRef SectName) + : Segname(std::string(SegName)), Sectname(std::string(SectName)), + CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {} + + Section(StringRef SegName, StringRef SectName, StringRef Content) + : Segname(std::string(SegName)), Sectname(std::string(SectName)), + CanonicalName((Twine(SegName) + Twine(',') + SectName).str()), + Content(Content) {} + + MachO::SectionType getType() const { + return static_cast(Flags & MachO::SECTION_TYPE); + } + + bool isVirtualSection() const { + return (getType() == MachO::S_ZEROFILL || + getType() == MachO::S_GB_ZEROFILL || + getType() == MachO::S_THREAD_LOCAL_ZEROFILL); + } + + bool hasValidOffset() const { + return !(isVirtualSection() || (OriginalOffset && *OriginalOffset == 0)); + } +}; + +struct LoadCommand { + // The type MachO::macho_load_command is defined in llvm/BinaryFormat/MachO.h + // and it is a union of all the structs corresponding to various load + // commands. + MachO::macho_load_command MachOLoadCommand; + + // The raw content of the payload of the load command (located right after the + // corresponding struct). In some cases it is either empty or can be + // copied-over without digging into its structure. + std::vector Payload; + + // Some load commands can contain (inside the payload) an array of sections, + // though the contents of the sections are stored separately. The struct + // Section describes only sections' metadata and where to find the + // corresponding content inside the binary. + std::vector> Sections; + + // Returns the segment name if the load command is a segment command. + Optional getSegmentName() const; + + // Returns the segment vm address if the load command is a segment command. + Optional getSegmentVMAddr() const; +}; + +// A symbol information. Fields which starts with "n_" are same as them in the +// nlist. +struct SymbolEntry { + std::string Name; + bool Referenced = false; + uint32_t Index; + uint8_t n_type; + uint8_t n_sect; + uint16_t n_desc; + uint64_t n_value; + + bool isExternalSymbol() const { return n_type & MachO::N_EXT; } + + bool isLocalSymbol() const { return !isExternalSymbol(); } + + bool isUndefinedSymbol() const { + return (n_type & MachO::N_TYPE) == MachO::N_UNDF; + } + + bool isSwiftSymbol() const { + return StringRef(Name).startswith("_$s") || + StringRef(Name).startswith("_$S"); + } + + Optional section() const { + return n_sect == MachO::NO_SECT ? None : Optional(n_sect); + } +}; + +/// The location of the symbol table inside the binary is described by LC_SYMTAB +/// load command. +struct SymbolTable { + std::vector> Symbols; + + using iterator = pointee_iterator< + std::vector>::const_iterator>; + + iterator begin() const { return iterator(Symbols.begin()); } + iterator end() const { return iterator(Symbols.end()); } + + const SymbolEntry *getSymbolByIndex(uint32_t Index) const; + SymbolEntry *getSymbolByIndex(uint32_t Index); + void removeSymbols( + function_ref &)> ToRemove); +}; + +struct IndirectSymbolEntry { + // The original value in an indirect symbol table. Higher bits encode extra + // information (INDIRECT_SYMBOL_LOCAL and INDIRECT_SYMBOL_ABS). + uint32_t OriginalIndex; + /// The Symbol referenced by this entry. It's None if the index is + /// INDIRECT_SYMBOL_LOCAL or INDIRECT_SYMBOL_ABS. + Optional Symbol; + + IndirectSymbolEntry(uint32_t OriginalIndex, Optional Symbol) + : OriginalIndex(OriginalIndex), Symbol(Symbol) {} +}; + +struct IndirectSymbolTable { + std::vector Symbols; +}; + +/// The location of the string table inside the binary is described by LC_SYMTAB +/// load command. +struct StringTable { + std::vector Strings; +}; + +struct RelocationInfo { + // The referenced symbol entry. Set if !Scattered && Extern. + Optional Symbol; + // The referenced section. Set if !Scattered && !Extern. + Optional Sec; + // True if Info is a scattered_relocation_info. + bool Scattered; + // True if the type is an ADDEND. r_symbolnum holds the addend instead of a + // symbol index. + bool IsAddend; + // True if the r_symbolnum points to a section number (i.e. r_extern=0). + bool Extern; + MachO::any_relocation_info Info; + + unsigned getPlainRelocationSymbolNum(bool IsLittleEndian) { + if (IsLittleEndian) + return Info.r_word1 & 0xffffff; + return Info.r_word1 >> 8; + } + + void setPlainRelocationSymbolNum(unsigned SymbolNum, bool IsLittleEndian) { + assert(SymbolNum < (1 << 24) && "SymbolNum out of range"); + if (IsLittleEndian) + Info.r_word1 = (Info.r_word1 & ~0x00ffffff) | SymbolNum; + else + Info.r_word1 = (Info.r_word1 & ~0xffffff00) | (SymbolNum << 8); + } +}; + +/// The location of the rebase info inside the binary is described by +/// LC_DYLD_INFO load command. Dyld rebases an image whenever dyld loads it at +/// an address different from its preferred address. The rebase information is +/// a stream of byte sized opcodes whose symbolic names start with +/// REBASE_OPCODE_. Conceptually the rebase information is a table of tuples: +/// +/// The opcodes are a compressed way to encode the table by only +/// encoding when a column changes. In addition simple patterns +/// like "every n'th offset for m times" can be encoded in a few +/// bytes. +struct RebaseInfo { + // At the moment we do not parse this info (and it is simply copied over), + // but the proper support will be added later. + ArrayRef Opcodes; +}; + +/// The location of the bind info inside the binary is described by +/// LC_DYLD_INFO load command. Dyld binds an image during the loading process, +/// if the image requires any pointers to be initialized to symbols in other +/// images. The bind information is a stream of byte sized opcodes whose +/// symbolic names start with BIND_OPCODE_. Conceptually the bind information is +/// a table of tuples: The opcodes are a compressed way to encode the table by +/// only encoding when a column changes. In addition simple patterns like for +/// runs of pointers initialized to the same value can be encoded in a few +/// bytes. +struct BindInfo { + // At the moment we do not parse this info (and it is simply copied over), + // but the proper support will be added later. + ArrayRef Opcodes; +}; + +/// The location of the weak bind info inside the binary is described by +/// LC_DYLD_INFO load command. Some C++ programs require dyld to unique symbols +/// so that all images in the process use the same copy of some code/data. This +/// step is done after binding. The content of the weak_bind info is an opcode +/// stream like the bind_info. But it is sorted alphabetically by symbol name. +/// This enable dyld to walk all images with weak binding information in order +/// and look for collisions. If there are no collisions, dyld does no updating. +/// That means that some fixups are also encoded in the bind_info. For +/// instance, all calls to "operator new" are first bound to libstdc++.dylib +/// using the information in bind_info. Then if some image overrides operator +/// new that is detected when the weak_bind information is processed and the +/// call to operator new is then rebound. +struct WeakBindInfo { + // At the moment we do not parse this info (and it is simply copied over), + // but the proper support will be added later. + ArrayRef Opcodes; +}; + +/// The location of the lazy bind info inside the binary is described by +/// LC_DYLD_INFO load command. Some uses of external symbols do not need to be +/// bound immediately. Instead they can be lazily bound on first use. The +/// lazy_bind contains a stream of BIND opcodes to bind all lazy symbols. Normal +/// use is that dyld ignores the lazy_bind section when loading an image. +/// Instead the static linker arranged for the lazy pointer to initially point +/// to a helper function which pushes the offset into the lazy_bind area for the +/// symbol needing to be bound, then jumps to dyld which simply adds the offset +/// to lazy_bind_off to get the information on what to bind. +struct LazyBindInfo { + ArrayRef Opcodes; +}; + +/// The location of the export info inside the binary is described by +/// LC_DYLD_INFO load command. The symbols exported by a dylib are encoded in a +/// trie. This is a compact representation that factors out common prefixes. It +/// also reduces LINKEDIT pages in RAM because it encodes all information (name, +/// address, flags) in one small, contiguous range. The export area is a stream +/// of nodes. The first node sequentially is the start node for the trie. Nodes +/// for a symbol start with a uleb128 that is the length of the exported symbol +/// information for the string so far. If there is no exported symbol, the node +/// starts with a zero byte. If there is exported info, it follows the length. +/// First is a uleb128 containing flags. Normally, it is followed by +/// a uleb128 encoded offset which is location of the content named +/// by the symbol from the mach_header for the image. If the flags +/// is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags is +/// a uleb128 encoded library ordinal, then a zero terminated +/// UTF8 string. If the string is zero length, then the symbol +/// is re-export from the specified dylib with the same name. +/// If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following +/// the flags is two uleb128s: the stub offset and the resolver offset. +/// The stub is used by non-lazy pointers. The resolver is used +/// by lazy pointers and must be called to get the actual address to use. +/// After the optional exported symbol information is a byte of +/// how many edges (0-255) that this node has leaving it, +/// followed by each edge. +/// Each edge is a zero terminated UTF8 of the addition chars +/// in the symbol, followed by a uleb128 offset for the node that +/// edge points to. +struct ExportInfo { + ArrayRef Trie; +}; + +struct LinkData { + ArrayRef Data; +}; + +struct Object { + MachHeader Header; + std::vector LoadCommands; + + SymbolTable SymTable; + StringTable StrTable; + + RebaseInfo Rebases; + BindInfo Binds; + WeakBindInfo WeakBinds; + LazyBindInfo LazyBinds; + ExportInfo Exports; + IndirectSymbolTable IndirectSymTable; + LinkData DataInCode; + LinkData LinkerOptimizationHint; + LinkData FunctionStarts; + LinkData ExportsTrie; + LinkData ChainedFixups; + + Optional SwiftVersion; + + /// The index of LC_CODE_SIGNATURE load command if present. + Optional CodeSignatureCommandIndex; + /// The index of LC_SYMTAB load command if present. + Optional SymTabCommandIndex; + /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present. + Optional DyLdInfoCommandIndex; + /// The index LC_DYSYMTAB load command if present. + Optional DySymTabCommandIndex; + /// The index LC_DATA_IN_CODE load command if present. + Optional DataInCodeCommandIndex; + /// The index of LC_LINKER_OPTIMIZATIN_HINT load command if present. + Optional LinkerOptimizationHintCommandIndex; + /// The index LC_FUNCTION_STARTS load command if present. + Optional FunctionStartsCommandIndex; + /// The index LC_DYLD_CHAINED_FIXUPS load command if present. + Optional ChainedFixupsCommandIndex; + /// The index LC_DYLD_EXPORTS_TRIE load command if present. + Optional ExportsTrieCommandIndex; + /// The index of the LC_SEGMENT or LC_SEGMENT_64 load command + /// corresponding to the __TEXT segment. + Optional TextSegmentCommandIndex; + + BumpPtrAllocator Alloc; + StringSaver NewSectionsContents; + + Object() : NewSectionsContents(Alloc) {} + + Error + removeSections(function_ref &)> ToRemove); + + Error removeLoadCommands(function_ref ToRemove); + + void updateLoadCommandIndexes(); + + /// Creates a new segment load command in the object and returns a reference + /// to the newly created load command. The caller should verify that SegName + /// is not too long (SegName.size() should be less than or equal to 16). + LoadCommand &addSegment(StringRef SegName, uint64_t SegVMSize); + + bool is64Bit() const { + return Header.Magic == MachO::MH_MAGIC_64 || + Header.Magic == MachO::MH_CIGAM_64; + } + + uint64_t nextAvailableSegmentAddress() const; +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_MACHO_MACHOOBJECT_H diff --git a/llvm/lib/ObjCopy/MachO/MachOReader.cpp b/llvm/lib/ObjCopy/MachO/MachOReader.cpp new file mode 100644 index 000000000000..94459a436094 --- /dev/null +++ b/llvm/lib/ObjCopy/MachO/MachOReader.cpp @@ -0,0 +1,374 @@ +//===- MachOReader.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOReader.h" +#include "MachOObject.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/Errc.h" +#include + +using namespace llvm; +using namespace llvm::objcopy; +using namespace llvm::objcopy::macho; + +void MachOReader::readHeader(Object &O) const { + O.Header.Magic = MachOObj.getHeader().magic; + O.Header.CPUType = MachOObj.getHeader().cputype; + O.Header.CPUSubType = MachOObj.getHeader().cpusubtype; + O.Header.FileType = MachOObj.getHeader().filetype; + O.Header.NCmds = MachOObj.getHeader().ncmds; + O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds; + O.Header.Flags = MachOObj.getHeader().flags; +} + +template +static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { + StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); + StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); + Section S(SegName, SectName); + S.Index = Index; + S.Addr = Sec.addr; + S.Size = Sec.size; + S.OriginalOffset = Sec.offset; + S.Align = Sec.align; + S.RelOff = Sec.reloff; + S.NReloc = Sec.nreloc; + S.Flags = Sec.flags; + S.Reserved1 = Sec.reserved1; + S.Reserved2 = Sec.reserved2; + S.Reserved3 = 0; + return S; +} + +Section constructSection(const MachO::section &Sec, uint32_t Index) { + return constructSectionCommon(Sec, Index); +} + +Section constructSection(const MachO::section_64 &Sec, uint32_t Index) { + Section S = constructSectionCommon(Sec, Index); + S.Reserved3 = Sec.reserved3; + return S; +} + +template +Expected>> static extractSections( + const object::MachOObjectFile::LoadCommandInfo &LoadCmd, + const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) { + std::vector> Sections; + for (auto Curr = reinterpret_cast(LoadCmd.Ptr + + sizeof(SegmentType)), + End = reinterpret_cast(LoadCmd.Ptr + + LoadCmd.C.cmdsize); + Curr < End; ++Curr) { + SectionType Sec; + memcpy((void *)&Sec, Curr, sizeof(SectionType)); + + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(Sec); + + Sections.push_back( + std::make_unique
(constructSection(Sec, NextSectionIndex))); + + Section &S = *Sections.back(); + + Expected SecRef = + MachOObj.getSection(NextSectionIndex++); + if (!SecRef) + return SecRef.takeError(); + + Expected> Data = + MachOObj.getSectionContents(SecRef->getRawDataRefImpl()); + if (!Data) + return Data.takeError(); + + S.Content = + StringRef(reinterpret_cast(Data->data()), Data->size()); + + const uint32_t CPUType = MachOObj.getHeader().cputype; + S.Relocations.reserve(S.NReloc); + for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()), + RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl()); + RI != RE; ++RI) { + RelocationInfo R; + R.Symbol = nullptr; // We'll fill this field later. + R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); + R.Scattered = MachOObj.isRelocationScattered(R.Info); + unsigned Type = MachOObj.getAnyRelocationType(R.Info); + // TODO Support CPU_TYPE_ARM. + R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 && + Type == MachO::ARM64_RELOC_ADDEND); + R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info); + S.Relocations.push_back(R); + } + + assert(S.NReloc == S.Relocations.size() && + "Incorrect number of relocations"); + } + return std::move(Sections); +} + +Error MachOReader::readLoadCommands(Object &O) const { + // For MachO sections indices start from 1. + uint32_t NextSectionIndex = 1; + static constexpr char TextSegmentName[] = "__TEXT"; + for (auto LoadCmd : MachOObj.load_commands()) { + LoadCommand LC; + switch (LoadCmd.C.cmd) { + case MachO::LC_CODE_SIGNATURE: + O.CodeSignatureCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_SEGMENT: + // LoadCmd.Ptr might not be aligned temporarily as + // MachO::segment_command requires, but the segname char pointer do not + // have alignment restrictions. + if (StringRef(reinterpret_cast( + LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) == + TextSegmentName) + O.TextSegmentCommandIndex = O.LoadCommands.size(); + + if (Expected>> Sections = + extractSections( + LoadCmd, MachOObj, NextSectionIndex)) + LC.Sections = std::move(*Sections); + else + return Sections.takeError(); + break; + case MachO::LC_SEGMENT_64: + // LoadCmd.Ptr might not be aligned temporarily as + // MachO::segment_command_64 requires, but the segname char pointer do + // not have alignment restrictions. + if (StringRef(reinterpret_cast( + LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) == + TextSegmentName) + O.TextSegmentCommandIndex = O.LoadCommands.size(); + + if (Expected>> Sections = + extractSections( + LoadCmd, MachOObj, NextSectionIndex)) + LC.Sections = std::move(*Sections); + else + return Sections.takeError(); + break; + case MachO::LC_SYMTAB: + O.SymTabCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_DYSYMTAB: + O.DySymTabCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + O.DyLdInfoCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_DATA_IN_CODE: + O.DataInCodeCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_LINKER_OPTIMIZATION_HINT: + O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_FUNCTION_STARTS: + O.FunctionStartsCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_DYLD_EXPORTS_TRIE: + O.ExportsTrieCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_DYLD_CHAINED_FIXUPS: + O.ChainedFixupsCommandIndex = O.LoadCommands.size(); + break; + } +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \ + sizeof(MachO::LCStruct)); \ + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \ + MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \ + if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \ + LC.Payload = ArrayRef( \ + reinterpret_cast(const_cast(LoadCmd.Ptr)) + \ + sizeof(MachO::LCStruct), \ + LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \ + break; + + switch (LoadCmd.C.cmd) { + default: + memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr, + sizeof(MachO::load_command)); + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(LC.MachOLoadCommand.load_command_data); + if (LoadCmd.C.cmdsize > sizeof(MachO::load_command)) + LC.Payload = ArrayRef( + reinterpret_cast(const_cast(LoadCmd.Ptr)) + + sizeof(MachO::load_command), + LoadCmd.C.cmdsize - sizeof(MachO::load_command)); + break; +#include "llvm/BinaryFormat/MachO.def" + } + O.LoadCommands.push_back(std::move(LC)); + } + return Error::success(); +} + +template +SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { + assert(nlist.n_strx < StrTable.size() && + "n_strx exceeds the size of the string table"); + SymbolEntry SE; + SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); + SE.n_type = nlist.n_type; + SE.n_sect = nlist.n_sect; + SE.n_desc = nlist.n_desc; + SE.n_value = nlist.n_value; + return SE; +} + +void MachOReader::readSymbolTable(Object &O) const { + StringRef StrTable = MachOObj.getStringTableData(); + for (auto Symbol : MachOObj.symbols()) { + SymbolEntry SE = + (MachOObj.is64Bit() + ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry( + Symbol.getRawDataRefImpl())) + : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry( + Symbol.getRawDataRefImpl()))); + + O.SymTable.Symbols.push_back(std::make_unique(SE)); + } +} + +void MachOReader::setSymbolInRelocationInfo(Object &O) const { + std::vector Sections; + for (auto &LC : O.LoadCommands) + for (std::unique_ptr
&Sec : LC.Sections) + Sections.push_back(Sec.get()); + + for (LoadCommand &LC : O.LoadCommands) + for (std::unique_ptr
&Sec : LC.Sections) + for (auto &Reloc : Sec->Relocations) + if (!Reloc.Scattered && !Reloc.IsAddend) { + const uint32_t SymbolNum = + Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian()); + if (Reloc.Extern) { + Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum); + } else { + // FIXME: Refactor error handling in MachOReader and report an error + // if we encounter an invalid relocation. + assert(SymbolNum >= 1 && SymbolNum <= Sections.size() && + "Invalid section index."); + Reloc.Sec = Sections[SymbolNum - 1]; + } + } +} + +void MachOReader::readRebaseInfo(Object &O) const { + O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes(); +} + +void MachOReader::readBindInfo(Object &O) const { + O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes(); +} + +void MachOReader::readWeakBindInfo(Object &O) const { + O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes(); +} + +void MachOReader::readLazyBindInfo(Object &O) const { + O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes(); +} + +void MachOReader::readExportInfo(Object &O) const { + O.Exports.Trie = MachOObj.getDyldInfoExportsTrie(); +} + +void MachOReader::readLinkData(Object &O, Optional LCIndex, + LinkData &LD) const { + if (!LCIndex) + return; + const MachO::linkedit_data_command &LC = + O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; + LD.Data = + arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize)); +} + +void MachOReader::readDataInCodeData(Object &O) const { + return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode); +} + +void MachOReader::readLinkerOptimizationHint(Object &O) const { + return readLinkData(O, O.LinkerOptimizationHintCommandIndex, + O.LinkerOptimizationHint); +} + +void MachOReader::readFunctionStartsData(Object &O) const { + return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts); +} + +void MachOReader::readExportsTrie(Object &O) const { + return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie); +} + +void MachOReader::readChainedFixups(Object &O) const { + return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups); +} + +void MachOReader::readIndirectSymbolTable(Object &O) const { + MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); + constexpr uint32_t AbsOrLocalMask = + MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS; + for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) { + uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i); + if ((Index & AbsOrLocalMask) != 0) + O.IndirectSymTable.Symbols.emplace_back(Index, None); + else + O.IndirectSymTable.Symbols.emplace_back( + Index, O.SymTable.getSymbolByIndex(Index)); + } +} + +void MachOReader::readSwiftVersion(Object &O) const { + struct ObjCImageInfo { + uint32_t Version; + uint32_t Flags; + } ImageInfo; + + for (const LoadCommand &LC : O.LoadCommands) + for (const std::unique_ptr
&Sec : LC.Sections) + if (Sec->Sectname == "__objc_imageinfo" && + (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" || + Sec->Segname == "__DATA_DIRTY") && + Sec->Content.size() >= sizeof(ObjCImageInfo)) { + memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo)); + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { + sys::swapByteOrder(ImageInfo.Version); + sys::swapByteOrder(ImageInfo.Flags); + } + O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff; + return; + } +} + +Expected> MachOReader::create() const { + auto Obj = std::make_unique(); + readHeader(*Obj); + if (Error E = readLoadCommands(*Obj)) + return std::move(E); + readSymbolTable(*Obj); + setSymbolInRelocationInfo(*Obj); + readRebaseInfo(*Obj); + readBindInfo(*Obj); + readWeakBindInfo(*Obj); + readLazyBindInfo(*Obj); + readExportInfo(*Obj); + readDataInCodeData(*Obj); + readLinkerOptimizationHint(*Obj); + readFunctionStartsData(*Obj); + readExportsTrie(*Obj); + readChainedFixups(*Obj); + readIndirectSymbolTable(*Obj); + readSwiftVersion(*Obj); + return std::move(Obj); +} diff --git a/llvm/lib/ObjCopy/MachO/MachOReader.h b/llvm/lib/ObjCopy/MachO/MachOReader.h new file mode 100644 index 000000000000..ef374aa9efae --- /dev/null +++ b/llvm/lib/ObjCopy/MachO/MachOReader.h @@ -0,0 +1,62 @@ +//===- MachOReader.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_MACHO_MACHOREADER_H +#define LLVM_LIB_OBJCOPY_MACHO_MACHOREADER_H + +#include "MachOObject.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/ObjCopy/MachO/MachOObjcopy.h" +#include "llvm/Object/MachO.h" +#include + +namespace llvm { +namespace objcopy { +namespace macho { + +// The hierarchy of readers is responsible for parsing different inputs: +// raw binaries and regular MachO object files. +class Reader { +public: + virtual ~Reader(){}; + virtual Expected> create() const = 0; +}; + +class MachOReader : public Reader { + const object::MachOObjectFile &MachOObj; + + void readHeader(Object &O) const; + Error readLoadCommands(Object &O) const; + void readSymbolTable(Object &O) const; + void setSymbolInRelocationInfo(Object &O) const; + void readRebaseInfo(Object &O) const; + void readBindInfo(Object &O) const; + void readWeakBindInfo(Object &O) const; + void readLazyBindInfo(Object &O) const; + void readExportInfo(Object &O) const; + void readLinkData(Object &O, Optional LCIndex, LinkData &LD) const; + void readCodeSignature(Object &O) const; + void readDataInCodeData(Object &O) const; + void readLinkerOptimizationHint(Object &O) const; + void readFunctionStartsData(Object &O) const; + void readExportsTrie(Object &O) const; + void readChainedFixups(Object &O) const; + void readIndirectSymbolTable(Object &O) const; + void readSwiftVersion(Object &O) const; + +public: + explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {} + + Expected> create() const override; +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_MACHO_MACHOREADER_H diff --git a/llvm/lib/ObjCopy/MachO/MachOWriter.cpp b/llvm/lib/ObjCopy/MachO/MachOWriter.cpp new file mode 100644 index 000000000000..bc633285e03c --- /dev/null +++ b/llvm/lib/ObjCopy/MachO/MachOWriter.cpp @@ -0,0 +1,662 @@ +//===- MachOWriter.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOWriter.h" +#include "MachOLayoutBuilder.h" +#include "MachOObject.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SHA256.h" +#include + +#if defined(__APPLE__) +#include +#endif + +using namespace llvm; +using namespace llvm::objcopy::macho; +using namespace llvm::support::endian; + +size_t MachOWriter::headerSize() const { + return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); +} + +size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; } + +size_t MachOWriter::symTableSize() const { + return O.SymTable.Symbols.size() * + (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist)); +} + +size_t MachOWriter::totalSize() const { + // Going from tail to head and looking for an appropriate "anchor" to + // calculate the total size assuming that all the offsets are either valid + // ("true") or 0 (0 indicates that the corresponding part is missing). + + SmallVector Ends; + if (O.SymTabCommandIndex) { + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + if (SymTabCommand.symoff) + Ends.push_back(SymTabCommand.symoff + symTableSize()); + if (SymTabCommand.stroff) + Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize); + } + if (O.DyLdInfoCommandIndex) { + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + if (DyLdInfoCommand.rebase_off) { + assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && + "Incorrect rebase opcodes size"); + Ends.push_back(DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size); + } + if (DyLdInfoCommand.bind_off) { + assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && + "Incorrect bind opcodes size"); + Ends.push_back(DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size); + } + if (DyLdInfoCommand.weak_bind_off) { + assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && + "Incorrect weak bind opcodes size"); + Ends.push_back(DyLdInfoCommand.weak_bind_off + + DyLdInfoCommand.weak_bind_size); + } + if (DyLdInfoCommand.lazy_bind_off) { + assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && + "Incorrect lazy bind opcodes size"); + Ends.push_back(DyLdInfoCommand.lazy_bind_off + + DyLdInfoCommand.lazy_bind_size); + } + if (DyLdInfoCommand.export_off) { + assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && + "Incorrect trie size"); + Ends.push_back(DyLdInfoCommand.export_off + DyLdInfoCommand.export_size); + } + } + + if (O.DySymTabCommandIndex) { + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + if (DySymTabCommand.indirectsymoff) + Ends.push_back(DySymTabCommand.indirectsymoff + + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); + } + + for (Optional LinkEditDataCommandIndex : + {O.CodeSignatureCommandIndex, O.DataInCodeCommandIndex, + O.LinkerOptimizationHintCommandIndex, O.FunctionStartsCommandIndex, + O.ChainedFixupsCommandIndex, O.ExportsTrieCommandIndex}) + if (LinkEditDataCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*LinkEditDataCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + if (LinkEditDataCommand.dataoff) + Ends.push_back(LinkEditDataCommand.dataoff + + LinkEditDataCommand.datasize); + } + + // Otherwise, use the last section / reloction. + for (const LoadCommand &LC : O.LoadCommands) + for (const std::unique_ptr
&S : LC.Sections) { + if (!S->hasValidOffset()) { + assert((S->Offset == 0) && "Skipped section's offset must be zero"); + assert((S->isVirtualSection() || S->Size == 0) && + "Non-zero-fill sections with zero offset must have zero size"); + continue; + } + assert((S->Offset != 0) && + "Non-zero-fill section's offset cannot be zero"); + Ends.push_back(S->Offset + S->Size); + if (S->RelOff) + Ends.push_back(S->RelOff + + S->NReloc * sizeof(MachO::any_relocation_info)); + } + + if (!Ends.empty()) + return *std::max_element(Ends.begin(), Ends.end()); + + // Otherwise, we have only Mach header and load commands. + return headerSize() + loadCommandsSize(); +} + +void MachOWriter::writeHeader() { + MachO::mach_header_64 Header; + + Header.magic = O.Header.Magic; + Header.cputype = O.Header.CPUType; + Header.cpusubtype = O.Header.CPUSubType; + Header.filetype = O.Header.FileType; + Header.ncmds = O.Header.NCmds; + Header.sizeofcmds = O.Header.SizeOfCmds; + Header.flags = O.Header.Flags; + Header.reserved = O.Header.Reserved; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Header); + + auto HeaderSize = + Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + memcpy(Buf->getBufferStart(), &Header, HeaderSize); +} + +void MachOWriter::writeLoadCommands() { + uint8_t *Begin = + reinterpret_cast(Buf->getBufferStart()) + headerSize(); + for (const LoadCommand &LC : O.LoadCommands) { + // Construct a load command. + MachO::macho_load_command MLC = LC.MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(MLC.segment_command_data); + memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command)); + Begin += sizeof(MachO::segment_command); + + for (const std::unique_ptr
&Sec : LC.Sections) + writeSectionInLoadCommand(*Sec, Begin); + continue; + case MachO::LC_SEGMENT_64: + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(MLC.segment_command_64_data); + memcpy(Begin, &MLC.segment_command_64_data, + sizeof(MachO::segment_command_64)); + Begin += sizeof(MachO::segment_command_64); + + for (const std::unique_ptr
&Sec : LC.Sections) + writeSectionInLoadCommand(*Sec, Begin); + continue; + } + +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \ + MLC.load_command_data.cmdsize); \ + if (IsLittleEndian != sys::IsLittleEndianHost) \ + MachO::swapStruct(MLC.LCStruct##_data); \ + memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \ + Begin += sizeof(MachO::LCStruct); \ + if (!LC.Payload.empty()) \ + memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \ + Begin += LC.Payload.size(); \ + break; + + // Copy the load command as it is. + switch (MLC.load_command_data.cmd) { + default: + assert(sizeof(MachO::load_command) + LC.Payload.size() == + MLC.load_command_data.cmdsize); + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(MLC.load_command_data); + memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command)); + Begin += sizeof(MachO::load_command); + if (!LC.Payload.empty()) + memcpy(Begin, LC.Payload.data(), LC.Payload.size()); + Begin += LC.Payload.size(); + break; +#include "llvm/BinaryFormat/MachO.def" + } + } +} + +template +void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) { + StructType Temp; + assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name"); + assert(Sec.Sectname.size() <= sizeof(Temp.sectname) && + "too long section name"); + memset(&Temp, 0, sizeof(StructType)); + memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size()); + memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size()); + Temp.addr = Sec.Addr; + Temp.size = Sec.Size; + Temp.offset = Sec.Offset; + Temp.align = Sec.Align; + Temp.reloff = Sec.RelOff; + Temp.nreloc = Sec.NReloc; + Temp.flags = Sec.Flags; + Temp.reserved1 = Sec.Reserved1; + Temp.reserved2 = Sec.Reserved2; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Temp); + memcpy(Out, &Temp, sizeof(StructType)); + Out += sizeof(StructType); +} + +void MachOWriter::writeSections() { + for (const LoadCommand &LC : O.LoadCommands) + for (const std::unique_ptr
&Sec : LC.Sections) { + if (!Sec->hasValidOffset()) { + assert((Sec->Offset == 0) && "Skipped section's offset must be zero"); + assert((Sec->isVirtualSection() || Sec->Size == 0) && + "Non-zero-fill sections with zero offset must have zero size"); + continue; + } + + assert(Sec->Offset && "Section offset can not be zero"); + assert((Sec->Size == Sec->Content.size()) && "Incorrect section size"); + memcpy(Buf->getBufferStart() + Sec->Offset, Sec->Content.data(), + Sec->Content.size()); + for (size_t Index = 0; Index < Sec->Relocations.size(); ++Index) { + RelocationInfo RelocInfo = Sec->Relocations[Index]; + if (!RelocInfo.Scattered && !RelocInfo.IsAddend) { + const uint32_t SymbolNum = RelocInfo.Extern + ? (*RelocInfo.Symbol)->Index + : (*RelocInfo.Sec)->Index; + RelocInfo.setPlainRelocationSymbolNum(SymbolNum, IsLittleEndian); + } + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct( + reinterpret_cast(RelocInfo.Info)); + memcpy(Buf->getBufferStart() + Sec->RelOff + + Index * sizeof(MachO::any_relocation_info), + &RelocInfo.Info, sizeof(RelocInfo.Info)); + } + } +} + +template +void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out, + uint32_t Nstrx) { + NListType ListEntry; + ListEntry.n_strx = Nstrx; + ListEntry.n_type = SE.n_type; + ListEntry.n_sect = SE.n_sect; + ListEntry.n_desc = SE.n_desc; + ListEntry.n_value = SE.n_value; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(ListEntry); + memcpy(Out, reinterpret_cast(&ListEntry), sizeof(NListType)); + Out += sizeof(NListType); +} + +void MachOWriter::writeStringTable() { + if (!O.SymTabCommandIndex) + return; + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + + uint8_t *StrTable = (uint8_t *)Buf->getBufferStart() + SymTabCommand.stroff; + LayoutBuilder.getStringTableBuilder().write(StrTable); +} + +void MachOWriter::writeSymbolTable() { + if (!O.SymTabCommandIndex) + return; + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + + char *SymTable = (char *)Buf->getBufferStart() + SymTabCommand.symoff; + for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end(); + Iter != End; Iter++) { + SymbolEntry *Sym = Iter->get(); + uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name); + + if (Is64Bit) + writeNListEntry(*Sym, IsLittleEndian, SymTable, Nstrx); + else + writeNListEntry(*Sym, IsLittleEndian, SymTable, Nstrx); + } +} + +void MachOWriter::writeRebaseInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.rebase_off; + assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && + "Incorrect rebase opcodes size"); + memcpy(Out, O.Rebases.Opcodes.data(), O.Rebases.Opcodes.size()); +} + +void MachOWriter::writeBindInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.bind_off; + assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && + "Incorrect bind opcodes size"); + memcpy(Out, O.Binds.Opcodes.data(), O.Binds.Opcodes.size()); +} + +void MachOWriter::writeWeakBindInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.weak_bind_off; + assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && + "Incorrect weak bind opcodes size"); + memcpy(Out, O.WeakBinds.Opcodes.data(), O.WeakBinds.Opcodes.size()); +} + +void MachOWriter::writeLazyBindInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.lazy_bind_off; + assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && + "Incorrect lazy bind opcodes size"); + memcpy(Out, O.LazyBinds.Opcodes.data(), O.LazyBinds.Opcodes.size()); +} + +void MachOWriter::writeExportInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.export_off; + assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && + "Incorrect export trie size"); + memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size()); +} + +void MachOWriter::writeIndirectSymbolTable() { + if (!O.DySymTabCommandIndex) + return; + + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + uint32_t *Out = + (uint32_t *)(Buf->getBufferStart() + DySymTabCommand.indirectsymoff); + for (const IndirectSymbolEntry &Sym : O.IndirectSymTable.Symbols) { + uint32_t Entry = (Sym.Symbol) ? (*Sym.Symbol)->Index : Sym.OriginalIndex; + if (IsLittleEndian != sys::IsLittleEndianHost) + sys::swapByteOrder(Entry); + *Out++ = Entry; + } +} + +void MachOWriter::writeLinkData(Optional LCIndex, const LinkData &LD) { + if (!LCIndex) + return; + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; + char *Out = (char *)Buf->getBufferStart() + LinkEditDataCommand.dataoff; + assert((LinkEditDataCommand.datasize == LD.Data.size()) && + "Incorrect data size"); + memcpy(Out, LD.Data.data(), LD.Data.size()); +} + +static uint64_t +getSegmentFileOffset(const LoadCommand &TextSegmentLoadCommand) { + const MachO::macho_load_command &MLC = + TextSegmentLoadCommand.MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + return MLC.segment_command_data.fileoff; + case MachO::LC_SEGMENT_64: + return MLC.segment_command_64_data.fileoff; + default: + return 0; + } +} + +static uint64_t getSegmentFileSize(const LoadCommand &TextSegmentLoadCommand) { + const MachO::macho_load_command &MLC = + TextSegmentLoadCommand.MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + return MLC.segment_command_data.filesize; + case MachO::LC_SEGMENT_64: + return MLC.segment_command_64_data.filesize; + default: + return 0; + } +} + +void MachOWriter::writeCodeSignatureData() { + // NOTE: This CodeSignature section behaviour must be kept in sync with that + // performed in LLD's CodeSignatureSection::write / + // CodeSignatureSection::writeHashes. Furthermore, this call must occur only + // after the rest of the binary has already been written to the buffer. This + // is because the buffer is read from to perform the necessary hashing. + + // The CodeSignature section is the last section in the MachO binary and + // contains a hash of all content in the binary before it. Since llvm-objcopy + // has likely modified the target binary, the hash must be regenerated + // entirely. To generate this hash, we must read from the start of the binary + // (HashReadStart) to just before the start of the CodeSignature section + // (HashReadEnd). + + const CodeSignatureInfo &CodeSignature = LayoutBuilder.getCodeSignature(); + + uint8_t *BufferStart = reinterpret_cast(Buf->getBufferStart()); + uint8_t *HashReadStart = BufferStart; + uint8_t *HashReadEnd = BufferStart + CodeSignature.StartOffset; + + // The CodeSignature section begins with a header, after which the hashes + // of each page of the binary are written. + uint8_t *HashWriteStart = HashReadEnd + CodeSignature.AllHeadersSize; + + uint32_t TextSegmentFileOff = 0; + uint32_t TextSegmentFileSize = 0; + if (O.TextSegmentCommandIndex) { + const LoadCommand &TextSegmentLoadCommand = + O.LoadCommands[*O.TextSegmentCommandIndex]; + assert(TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd == + MachO::LC_SEGMENT || + TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd == + MachO::LC_SEGMENT_64); + assert(StringRef(TextSegmentLoadCommand.MachOLoadCommand + .segment_command_data.segname) == "__TEXT"); + TextSegmentFileOff = getSegmentFileOffset(TextSegmentLoadCommand); + TextSegmentFileSize = getSegmentFileSize(TextSegmentLoadCommand); + } + + const uint32_t FileNamePad = CodeSignature.AllHeadersSize - + CodeSignature.FixedHeadersSize - + CodeSignature.OutputFileName.size(); + + // Write code section header. + auto *SuperBlob = reinterpret_cast(HashReadEnd); + write32be(&SuperBlob->magic, MachO::CSMAGIC_EMBEDDED_SIGNATURE); + write32be(&SuperBlob->length, CodeSignature.Size); + write32be(&SuperBlob->count, 1); + auto *BlobIndex = reinterpret_cast(&SuperBlob[1]); + write32be(&BlobIndex->type, MachO::CSSLOT_CODEDIRECTORY); + write32be(&BlobIndex->offset, CodeSignature.BlobHeadersSize); + auto *CodeDirectory = reinterpret_cast( + HashReadEnd + CodeSignature.BlobHeadersSize); + write32be(&CodeDirectory->magic, MachO::CSMAGIC_CODEDIRECTORY); + write32be(&CodeDirectory->length, + CodeSignature.Size - CodeSignature.BlobHeadersSize); + write32be(&CodeDirectory->version, MachO::CS_SUPPORTSEXECSEG); + write32be(&CodeDirectory->flags, MachO::CS_ADHOC | MachO::CS_LINKER_SIGNED); + write32be(&CodeDirectory->hashOffset, + sizeof(MachO::CS_CodeDirectory) + + CodeSignature.OutputFileName.size() + FileNamePad); + write32be(&CodeDirectory->identOffset, sizeof(MachO::CS_CodeDirectory)); + CodeDirectory->nSpecialSlots = 0; + write32be(&CodeDirectory->nCodeSlots, CodeSignature.BlockCount); + write32be(&CodeDirectory->codeLimit, CodeSignature.StartOffset); + CodeDirectory->hashSize = static_cast(CodeSignature.HashSize); + CodeDirectory->hashType = MachO::kSecCodeSignatureHashSHA256; + CodeDirectory->platform = 0; + CodeDirectory->pageSize = CodeSignature.BlockSizeShift; + CodeDirectory->spare2 = 0; + CodeDirectory->scatterOffset = 0; + CodeDirectory->teamOffset = 0; + CodeDirectory->spare3 = 0; + CodeDirectory->codeLimit64 = 0; + write64be(&CodeDirectory->execSegBase, TextSegmentFileOff); + write64be(&CodeDirectory->execSegLimit, TextSegmentFileSize); + write64be(&CodeDirectory->execSegFlags, O.Header.FileType == MachO::MH_EXECUTE + ? MachO::CS_EXECSEG_MAIN_BINARY + : 0); + + auto *Id = reinterpret_cast(&CodeDirectory[1]); + memcpy(Id, CodeSignature.OutputFileName.begin(), + CodeSignature.OutputFileName.size()); + memset(Id + CodeSignature.OutputFileName.size(), 0, FileNamePad); + + // Write the hashes. + uint8_t *CurrHashReadPosition = HashReadStart; + uint8_t *CurrHashWritePosition = HashWriteStart; + while (CurrHashReadPosition < HashReadEnd) { + StringRef Block(reinterpret_cast(CurrHashReadPosition), + std::min(static_cast(HashReadEnd + - CurrHashReadPosition), + static_cast(CodeSignature.BlockSize))); + SHA256 Hasher; + Hasher.update(Block); + std::array Hash = Hasher.final(); + assert(Hash.size() == CodeSignature.HashSize); + memcpy(CurrHashWritePosition, Hash.data(), CodeSignature.HashSize); + CurrHashReadPosition += CodeSignature.BlockSize; + CurrHashWritePosition += CodeSignature.HashSize; + } +#if defined(__APPLE__) + // This is macOS-specific work-around and makes no sense for any + // other host OS. See https://openradar.appspot.com/FB8914231 + // + // The macOS kernel maintains a signature-verification cache to + // quickly validate applications at time of execve(2). The trouble + // is that for the kernel creates the cache entry at the time of the + // mmap(2) call, before we have a chance to write either the code to + // sign or the signature header+hashes. The fix is to invalidate + // all cached data associated with the output file, thus discarding + // the bogus prematurely-cached signature. + msync(BufferStart, CodeSignature.StartOffset + CodeSignature.Size, + MS_INVALIDATE); +#endif +} + +void MachOWriter::writeDataInCodeData() { + return writeLinkData(O.DataInCodeCommandIndex, O.DataInCode); +} + +void MachOWriter::writeLinkerOptimizationHint() { + return writeLinkData(O.LinkerOptimizationHintCommandIndex, + O.LinkerOptimizationHint); +} + +void MachOWriter::writeFunctionStartsData() { + return writeLinkData(O.FunctionStartsCommandIndex, O.FunctionStarts); +} + +void MachOWriter::writeChainedFixupsData() { + return writeLinkData(O.ChainedFixupsCommandIndex, O.ChainedFixups); +} + +void MachOWriter::writeExportsTrieData() { + return writeLinkData(O.ExportsTrieCommandIndex, O.ExportsTrie); +} + +void MachOWriter::writeTail() { + typedef void (MachOWriter::*WriteHandlerType)(); + typedef std::pair WriteOperation; + SmallVector Queue; + + if (O.SymTabCommandIndex) { + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + if (SymTabCommand.symoff) + Queue.push_back({SymTabCommand.symoff, &MachOWriter::writeSymbolTable}); + if (SymTabCommand.stroff) + Queue.push_back({SymTabCommand.stroff, &MachOWriter::writeStringTable}); + } + + if (O.DyLdInfoCommandIndex) { + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + if (DyLdInfoCommand.rebase_off) + Queue.push_back( + {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo}); + if (DyLdInfoCommand.bind_off) + Queue.push_back({DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo}); + if (DyLdInfoCommand.weak_bind_off) + Queue.push_back( + {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo}); + if (DyLdInfoCommand.lazy_bind_off) + Queue.push_back( + {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo}); + if (DyLdInfoCommand.export_off) + Queue.push_back( + {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo}); + } + + if (O.DySymTabCommandIndex) { + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + if (DySymTabCommand.indirectsymoff) + Queue.emplace_back(DySymTabCommand.indirectsymoff, + &MachOWriter::writeIndirectSymbolTable); + } + + std::initializer_list, WriteHandlerType>> + LinkEditDataCommandWriters = { + {O.CodeSignatureCommandIndex, &MachOWriter::writeCodeSignatureData}, + {O.DataInCodeCommandIndex, &MachOWriter::writeDataInCodeData}, + {O.LinkerOptimizationHintCommandIndex, + &MachOWriter::writeLinkerOptimizationHint}, + {O.FunctionStartsCommandIndex, &MachOWriter::writeFunctionStartsData}, + {O.ChainedFixupsCommandIndex, &MachOWriter::writeChainedFixupsData}, + {O.ExportsTrieCommandIndex, &MachOWriter::writeExportsTrieData}}; + for (const auto &W : LinkEditDataCommandWriters) { + Optional LinkEditDataCommandIndex; + WriteHandlerType WriteHandler; + std::tie(LinkEditDataCommandIndex, WriteHandler) = W; + if (LinkEditDataCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*LinkEditDataCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + if (LinkEditDataCommand.dataoff) + Queue.emplace_back(LinkEditDataCommand.dataoff, WriteHandler); + } + } + + llvm::sort(Queue, llvm::less_first()); + + for (auto WriteOp : Queue) + (this->*WriteOp.second)(); +} + +Error MachOWriter::finalize() { return LayoutBuilder.layout(); } + +Error MachOWriter::write() { + size_t TotalSize = totalSize(); + Buf = WritableMemoryBuffer::getNewMemBuffer(TotalSize); + if (!Buf) + return createStringError(errc::not_enough_memory, + "failed to allocate memory buffer of " + + Twine::utohexstr(TotalSize) + " bytes"); + writeHeader(); + writeLoadCommands(); + writeSections(); + writeTail(); + + // TODO: Implement direct writing to the output stream (without intermediate + // memory buffer Buf). + Out.write(Buf->getBufferStart(), Buf->getBufferSize()); + return Error::success(); +} diff --git a/llvm/lib/ObjCopy/MachO/MachOWriter.h b/llvm/lib/ObjCopy/MachO/MachOWriter.h new file mode 100644 index 000000000000..a54c10294246 --- /dev/null +++ b/llvm/lib/ObjCopy/MachO/MachOWriter.h @@ -0,0 +1,76 @@ +//===- MachOWriter.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_MACHO_MACHOWRITER_H +#define LLVM_LIB_OBJCOPY_MACHO_MACHOWRITER_H + +#include "MachOLayoutBuilder.h" +#include "MachOObject.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/ObjCopy/MachO/MachOObjcopy.h" +#include "llvm/Object/MachO.h" + +namespace llvm { +class Error; + +namespace objcopy { +namespace macho { + +class MachOWriter { + Object &O; + bool Is64Bit; + bool IsLittleEndian; + uint64_t PageSize; + std::unique_ptr Buf; + raw_ostream &Out; + MachOLayoutBuilder LayoutBuilder; + + size_t headerSize() const; + size_t loadCommandsSize() const; + size_t symTableSize() const; + size_t strTableSize() const; + + void writeHeader(); + void writeLoadCommands(); + template + void writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out); + void writeSections(); + void writeSymbolTable(); + void writeStringTable(); + void writeRebaseInfo(); + void writeBindInfo(); + void writeWeakBindInfo(); + void writeLazyBindInfo(); + void writeExportInfo(); + void writeIndirectSymbolTable(); + void writeLinkData(Optional LCIndex, const LinkData &LD); + void writeCodeSignatureData(); + void writeDataInCodeData(); + void writeLinkerOptimizationHint(); + void writeFunctionStartsData(); + void writeChainedFixupsData(); + void writeExportsTrieData(); + void writeTail(); + +public: + MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, + StringRef OutputFileName, uint64_t PageSize, raw_ostream &Out) + : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), + PageSize(PageSize), Out(Out), + LayoutBuilder(O, Is64Bit, OutputFileName, PageSize) {} + + size_t totalSize() const; + Error finalize(); + Error write(); +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_MACHO_MACHOWRITER_H diff --git a/llvm/lib/ObjCopy/ObjCopy.cpp b/llvm/lib/ObjCopy/ObjCopy.cpp new file mode 100644 index 000000000000..16968d202265 --- /dev/null +++ b/llvm/lib/ObjCopy/ObjCopy.cpp @@ -0,0 +1,90 @@ +//===- Objcopy.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/ObjCopy.h" +#include "llvm/ObjCopy/COFF/COFFConfig.h" +#include "llvm/ObjCopy/COFF/COFFObjcopy.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/ELF/ELFConfig.h" +#include "llvm/ObjCopy/ELF/ELFObjcopy.h" +#include "llvm/ObjCopy/MachO/MachOConfig.h" +#include "llvm/ObjCopy/MachO/MachOObjcopy.h" +#include "llvm/ObjCopy/MultiFormatConfig.h" +#include "llvm/ObjCopy/wasm/WasmConfig.h" +#include "llvm/ObjCopy/wasm/WasmObjcopy.h" +#include "llvm/ObjCopy/XCOFF/XCOFFConfig.h" +#include "llvm/ObjCopy/XCOFF/XCOFFObjcopy.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Object/XCOFFObjectFile.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" + +namespace llvm { +namespace objcopy { + +using namespace llvm::object; + +/// The function executeObjcopyOnBinary does the dispatch based on the format +/// of the input binary (ELF, MachO or COFF). +Error executeObjcopyOnBinary(const MultiFormatConfig &Config, + object::Binary &In, raw_ostream &Out) { + if (auto *ELFBinary = dyn_cast(&In)) { + Expected ELFConfig = Config.getELFConfig(); + if (!ELFConfig) + return ELFConfig.takeError(); + + return elf::executeObjcopyOnBinary(Config.getCommonConfig(), *ELFConfig, + *ELFBinary, Out); + } + if (auto *COFFBinary = dyn_cast(&In)) { + Expected COFFConfig = Config.getCOFFConfig(); + if (!COFFConfig) + return COFFConfig.takeError(); + + return coff::executeObjcopyOnBinary(Config.getCommonConfig(), *COFFConfig, + *COFFBinary, Out); + } + if (auto *MachOBinary = dyn_cast(&In)) { + Expected MachOConfig = Config.getMachOConfig(); + if (!MachOConfig) + return MachOConfig.takeError(); + + return macho::executeObjcopyOnBinary(Config.getCommonConfig(), *MachOConfig, + *MachOBinary, Out); + } + if (auto *MachOUniversalBinary = + dyn_cast(&In)) { + return macho::executeObjcopyOnMachOUniversalBinary( + Config, *MachOUniversalBinary, Out); + } + if (auto *WasmBinary = dyn_cast(&In)) { + Expected WasmConfig = Config.getWasmConfig(); + if (!WasmConfig) + return WasmConfig.takeError(); + + return objcopy::wasm::executeObjcopyOnBinary(Config.getCommonConfig(), + *WasmConfig, *WasmBinary, Out); + } + if (auto *XCOFFBinary = dyn_cast(&In)) { + Expected XCOFFConfig = Config.getXCOFFConfig(); + if (!XCOFFConfig) + return XCOFFConfig.takeError(); + + return xcoff::executeObjcopyOnBinary(Config.getCommonConfig(), *XCOFFConfig, + *XCOFFBinary, Out); + } + return createStringError(object_error::invalid_file_type, + "unsupported object file format"); +} + +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFObjcopy.cpp b/llvm/lib/ObjCopy/XCOFF/XCOFFObjcopy.cpp new file mode 100644 index 000000000000..f6e29bd315cb --- /dev/null +++ b/llvm/lib/ObjCopy/XCOFF/XCOFFObjcopy.cpp @@ -0,0 +1,45 @@ +//===- XCOFFObjcopy.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/XCOFF/XCOFFConfig.h" +#include "llvm/ObjCopy/XCOFF/XCOFFObjcopy.h" +#include "llvm/Support/Errc.h" +#include "XCOFFObject.h" +#include "XCOFFReader.h" +#include "XCOFFWriter.h" + +namespace llvm { +namespace objcopy { +namespace xcoff { + +using namespace object; + +static Error handleArgs(const CommonConfig &Config, Object &Obj) { + return Error::success(); +} + +Error executeObjcopyOnBinary(const CommonConfig &Config, const XCOFFConfig &, + XCOFFObjectFile &In, raw_ostream &Out) { + XCOFFReader Reader(In); + Expected> ObjOrErr = Reader.create(); + if (!ObjOrErr) + return createFileError(Config.InputFilename, ObjOrErr.takeError()); + Object *Obj = ObjOrErr->get(); + assert(Obj && "Unable to deserialize XCOFF object"); + if (Error E = handleArgs(Config, *Obj)) + return createFileError(Config.InputFilename, std::move(E)); + XCOFFWriter Writer(*Obj, Out); + if (Error E = Writer.write()) + return createFileError(Config.OutputFilename, std::move(E)); + return Error::success(); +} + +} // end namespace xcoff +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFObject.h b/llvm/lib/ObjCopy/XCOFF/XCOFFObject.h new file mode 100644 index 000000000000..3c68b6d3878f --- /dev/null +++ b/llvm/lib/ObjCopy/XCOFF/XCOFFObject.h @@ -0,0 +1,48 @@ +//===- XCOFFObject.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_XCOFF_XCOFFOBJECT_H +#define LLVM_LIB_OBJCOPY_XCOFF_XCOFFOBJECT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/XCOFFObjectFile.h" +#include + +namespace llvm { +namespace objcopy { +namespace xcoff { + +using namespace object; + +struct Section { + XCOFFSectionHeader32 SectionHeader; + ArrayRef Contents; + std::vector Relocations; +}; + +struct Symbol { + XCOFFSymbolEntry32 Sym; + // For now, each auxiliary symbol is only an opaque binary blob with no + // distinction. + StringRef AuxSymbolEntries; +}; + +struct Object { + XCOFFFileHeader32 FileHeader; + XCOFFAuxiliaryHeader32 OptionalFileHeader; + std::vector
Sections; + std::vector Symbols; + StringRef StringTable; +}; + +} // end namespace xcoff +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_XCOFF_XCOFFOBJECT_H diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp b/llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp new file mode 100644 index 000000000000..8ad3021a0342 --- /dev/null +++ b/llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp @@ -0,0 +1,101 @@ +//===- XCOFFReader.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "XCOFFReader.h" + +namespace llvm { +namespace objcopy { +namespace xcoff { + +using namespace object; + +Error XCOFFReader::readSections(Object &Obj) const { + ArrayRef Sections = XCOFFObj.sections32(); + for (const XCOFFSectionHeader32 &Sec : Sections) { + Section ReadSec; + // Section header. + ReadSec.SectionHeader = Sec; + DataRefImpl SectionDRI; + SectionDRI.p = reinterpret_cast(&Sec); + + // Section data. + if (Sec.SectionSize) { + Expected> ContentsRef = + XCOFFObj.getSectionContents(SectionDRI); + if (!ContentsRef) + return ContentsRef.takeError(); + ReadSec.Contents = ContentsRef.get(); + } + + // Relocations. + if (Sec.NumberOfRelocations) { + auto Relocations = + XCOFFObj.relocations(Sec); + if (!Relocations) + return Relocations.takeError(); + for (const XCOFFRelocation32 &Rel : Relocations.get()) + ReadSec.Relocations.push_back(Rel); + } + + Obj.Sections.push_back(std::move(ReadSec)); + } + return Error::success(); +} + +Error XCOFFReader::readSymbols(Object &Obj) const { + std::vector Symbols; + Symbols.reserve(XCOFFObj.getNumberOfSymbolTableEntries()); + for (SymbolRef Sym : XCOFFObj.symbols()) { + Symbol ReadSym; + DataRefImpl SymbolDRI = Sym.getRawDataRefImpl(); + XCOFFSymbolRef SymbolEntRef = XCOFFObj.toSymbolRef(SymbolDRI); + ReadSym.Sym = *SymbolEntRef.getSymbol32(); + // Auxiliary entries. + if (SymbolEntRef.getNumberOfAuxEntries()) { + const char *Start = reinterpret_cast( + SymbolDRI.p + XCOFF::SymbolTableEntrySize); + Expected RawAuxEntriesOrError = XCOFFObj.getRawData( + Start, + XCOFF::SymbolTableEntrySize * SymbolEntRef.getNumberOfAuxEntries(), + StringRef("symbol")); + if (!RawAuxEntriesOrError) + return RawAuxEntriesOrError.takeError(); + ReadSym.AuxSymbolEntries = RawAuxEntriesOrError.get(); + } + Obj.Symbols.push_back(std::move(ReadSym)); + } + return Error::success(); +} + +Expected> XCOFFReader::create() const { + auto Obj = std::make_unique(); + // Only 32-bit supported now. + if (XCOFFObj.is64Bit()) + return createStringError(object_error::invalid_file_type, + "64-bit XCOFF is not supported yet"); + // Read the file header. + Obj->FileHeader = *XCOFFObj.fileHeader32(); + // Read the optional header. + if (XCOFFObj.getOptionalHeaderSize()) + Obj->OptionalFileHeader = *XCOFFObj.auxiliaryHeader32(); + // Read each section. + Obj->Sections.reserve(XCOFFObj.getNumberOfSections()); + if (Error E = readSections(*Obj)) + return std::move(E); + // Read each symbol. + Obj->Symbols.reserve(XCOFFObj.getRawNumberOfSymbolTableEntries32()); + if (Error E = readSymbols(*Obj)) + return std::move(E); + // String table. + Obj->StringTable = XCOFFObj.getStringTable(); + return std::move(Obj); +} + +} // end namespace xcoff +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFReader.h b/llvm/lib/ObjCopy/XCOFF/XCOFFReader.h new file mode 100644 index 000000000000..63a8d8579d37 --- /dev/null +++ b/llvm/lib/ObjCopy/XCOFF/XCOFFReader.h @@ -0,0 +1,35 @@ +//===- XCOFFReader.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_XCOFF_XCOFFREADER_H +#define LLVM_LIB_OBJCOPY_XCOFF_XCOFFREADER_H + +#include "XCOFFObject.h" + +namespace llvm { +namespace objcopy { +namespace xcoff { + +using namespace object; + +class XCOFFReader { +public: + explicit XCOFFReader(const XCOFFObjectFile &O) : XCOFFObj(O) {} + Expected> create() const; + +private: + const XCOFFObjectFile &XCOFFObj; + Error readSections(Object &Obj) const; + Error readSymbols(Object &Obj) const; +}; + +} // end namespace xcoff +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_XCOFF_XCOFFREADER_H diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.cpp b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.cpp new file mode 100644 index 000000000000..bae3128822e2 --- /dev/null +++ b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.cpp @@ -0,0 +1,125 @@ +//===- XCOFFWriter.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Errc.h" +#include "XCOFFWriter.h" + +namespace llvm { +namespace objcopy { +namespace xcoff { + +using namespace object; + +void XCOFFWriter::finalizeHeaders() { + // File header. + FileSize += sizeof(XCOFFFileHeader32); + // Optional file header. + FileSize += Obj.FileHeader.AuxHeaderSize; + // Section headers. + FileSize += sizeof(XCOFFSectionHeader32) * Obj.Sections.size(); +} + +void XCOFFWriter::finalizeSections() { + for (const Section &Sec : Obj.Sections) { + // Section data. + FileSize += Sec.Contents.size(); + // Relocations. + FileSize += + Sec.SectionHeader.NumberOfRelocations * sizeof(XCOFFRelocation32); + } +} + +void XCOFFWriter::finalizeSymbolStringTable() { + assert(Obj.FileHeader.SymbolTableOffset >= FileSize); + FileSize = Obj.FileHeader.SymbolTableOffset; + // Symbols and auxiliary entries. + FileSize += + Obj.FileHeader.NumberOfSymTableEntries * XCOFF::SymbolTableEntrySize; + // String table. + FileSize += Obj.StringTable.size(); +} + +void XCOFFWriter::finalize() { + FileSize = 0; + finalizeHeaders(); + finalizeSections(); + finalizeSymbolStringTable(); +} + +void XCOFFWriter::writeHeaders() { + // Write the file header. + uint8_t *Ptr = reinterpret_cast(Buf->getBufferStart()); + memcpy(Ptr, &Obj.FileHeader, sizeof(XCOFFFileHeader32)); + Ptr += sizeof(XCOFFFileHeader32); + + // Write the optional header. + if (Obj.FileHeader.AuxHeaderSize) { + memcpy(Ptr, &Obj.OptionalFileHeader, Obj.FileHeader.AuxHeaderSize); + Ptr += Obj.FileHeader.AuxHeaderSize; + } + + // Write section headers. + for (const Section &Sec : Obj.Sections) { + memcpy(Ptr, &Sec.SectionHeader, sizeof(XCOFFSectionHeader32)); + Ptr += sizeof(XCOFFSectionHeader32); + } +} + +void XCOFFWriter::writeSections() { + // Write section data. + for (const Section &Sec : Obj.Sections) { + uint8_t *Ptr = reinterpret_cast(Buf->getBufferStart()) + + Sec.SectionHeader.FileOffsetToRawData; + Ptr = std::copy(Sec.Contents.begin(), Sec.Contents.end(), Ptr); + } + + // Write relocations. + for (const Section &Sec : Obj.Sections) { + uint8_t *Ptr = reinterpret_cast(Buf->getBufferStart()) + + Sec.SectionHeader.FileOffsetToRelocationInfo; + for (const XCOFFRelocation32 &Rel : Sec.Relocations) { + memcpy(Ptr, &Rel, sizeof(XCOFFRelocation32)); + Ptr += sizeof(XCOFFRelocation32); + } + } +} + +void XCOFFWriter::writeSymbolStringTable() { + // Write symbols. + uint8_t *Ptr = reinterpret_cast(Buf->getBufferStart()) + + Obj.FileHeader.SymbolTableOffset; + for (const Symbol &Sym : Obj.Symbols) { + memcpy(Ptr, &Sym.Sym, XCOFF::SymbolTableEntrySize); + Ptr += XCOFF::SymbolTableEntrySize; + // Auxiliary symbols. + memcpy(Ptr, Sym.AuxSymbolEntries.data(), Sym.AuxSymbolEntries.size()); + Ptr += Sym.AuxSymbolEntries.size(); + } + // Write the string table. + memcpy(Ptr, Obj.StringTable.data(), Obj.StringTable.size()); + Ptr += Obj.StringTable.size(); +} + +Error XCOFFWriter::write() { + finalize(); + Buf = WritableMemoryBuffer::getNewMemBuffer(FileSize); + if (!Buf) + return createStringError(errc::not_enough_memory, + "failed to allocate memory buffer of " + + Twine::utohexstr(FileSize) + " bytes"); + + writeHeaders(); + writeSections(); + writeSymbolStringTable(); + Out.write(Buf->getBufferStart(), Buf->getBufferSize()); + return Error::success(); +} + +} // end namespace xcoff +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h new file mode 100644 index 000000000000..54c7b5f3ccbe --- /dev/null +++ b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h @@ -0,0 +1,48 @@ +//===- XCOFFWriter.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_XCOFF_XCOFFWRITER_H +#define LLVM_LIB_OBJCOPY_XCOFF_XCOFFWRITER_H + +#include "llvm/Support/MemoryBuffer.h" +#include "XCOFFObject.h" + +#include +#include + +namespace llvm { +namespace objcopy { +namespace xcoff { + +class XCOFFWriter { +public: + virtual ~XCOFFWriter() {} + XCOFFWriter(Object &Obj, raw_ostream &Out) : Obj(Obj), Out(Out) {} + Error write(); + +private: + Object &Obj; + raw_ostream &Out; + std::unique_ptr Buf; + size_t FileSize; + + void finalizeHeaders(); + void finalizeSections(); + void finalizeSymbolStringTable(); + void finalize(); + + void writeHeaders(); + void writeSections(); + void writeSymbolStringTable(); +}; + +} // end namespace xcoff +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_XCOFF_XCOFFWRITER_H diff --git a/llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp b/llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp new file mode 100644 index 000000000000..6877cd68bee4 --- /dev/null +++ b/llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp @@ -0,0 +1,160 @@ +//===- WasmObjcopy.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/wasm/WasmObjcopy.h" +#include "WasmObject.h" +#include "WasmReader.h" +#include "WasmWriter.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileOutputBuffer.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +using namespace object; +using SectionPred = std::function; + +static bool isDebugSection(const Section &Sec) { + return Sec.Name.startswith(".debug"); +} + +static bool isLinkerSection(const Section &Sec) { + return Sec.Name.startswith("reloc.") || Sec.Name == "linking"; +} + +static bool isNameSection(const Section &Sec) { return Sec.Name == "name"; } + +// Sections which are known to be "comments" or informational and do not affect +// program semantics. +static bool isCommentSection(const Section &Sec) { + return Sec.Name == "producers"; +} + +static Error dumpSectionToFile(StringRef SecName, StringRef Filename, + Object &Obj) { + for (const Section &Sec : Obj.Sections) { + if (Sec.Name == SecName) { + ArrayRef Contents = Sec.Contents; + Expected> BufferOrErr = + FileOutputBuffer::create(Filename, Contents.size()); + if (!BufferOrErr) + return BufferOrErr.takeError(); + std::unique_ptr Buf = std::move(*BufferOrErr); + std::copy(Contents.begin(), Contents.end(), Buf->getBufferStart()); + if (Error E = Buf->commit()) + return E; + return Error::success(); + } + } + return createStringError(errc::invalid_argument, "section '%s' not found", + SecName.str().c_str()); +} + +static void removeSections(const CommonConfig &Config, Object &Obj) { + SectionPred RemovePred = [](const Section &) { return false; }; + + // Explicitly-requested sections. + if (!Config.ToRemove.empty()) { + RemovePred = [&Config](const Section &Sec) { + return Config.ToRemove.matches(Sec.Name); + }; + } + + if (Config.StripDebug) { + RemovePred = [RemovePred](const Section &Sec) { + return RemovePred(Sec) || isDebugSection(Sec); + }; + } + + if (Config.StripAll) { + RemovePred = [RemovePred](const Section &Sec) { + return RemovePred(Sec) || isDebugSection(Sec) || isLinkerSection(Sec) || + isNameSection(Sec) || isCommentSection(Sec); + }; + } + + if (Config.OnlyKeepDebug) { + RemovePred = [&Config](const Section &Sec) { + // Keep debug sections, unless explicitly requested to remove. + // Remove everything else, including known sections. + return Config.ToRemove.matches(Sec.Name) || !isDebugSection(Sec); + }; + } + + if (!Config.OnlySection.empty()) { + RemovePred = [&Config](const Section &Sec) { + // Explicitly keep these sections regardless of previous removes. + // Remove everything else, inluding known sections. + return !Config.OnlySection.matches(Sec.Name); + }; + } + + if (!Config.KeepSection.empty()) { + RemovePred = [&Config, RemovePred](const Section &Sec) { + // Explicitly keep these sections regardless of previous removes. + if (Config.KeepSection.matches(Sec.Name)) + return false; + // Otherwise defer to RemovePred. + return RemovePred(Sec); + }; + } + + Obj.removeSections(RemovePred); +} + +static Error handleArgs(const CommonConfig &Config, Object &Obj) { + // Only support AddSection, DumpSection, RemoveSection for now. + for (StringRef Flag : Config.DumpSection) { + StringRef SecName; + StringRef FileName; + std::tie(SecName, FileName) = Flag.split("="); + if (Error E = dumpSectionToFile(SecName, FileName, Obj)) + return createFileError(FileName, std::move(E)); + } + + removeSections(Config, Obj); + + for (const NewSectionInfo &NewSection : Config.AddSection) { + Section Sec; + Sec.SectionType = llvm::wasm::WASM_SEC_CUSTOM; + Sec.Name = NewSection.SectionName; + + std::unique_ptr BufferCopy = MemoryBuffer::getMemBufferCopy( + NewSection.SectionData->getBufferStart(), + NewSection.SectionData->getBufferIdentifier()); + Sec.Contents = makeArrayRef( + reinterpret_cast(BufferCopy->getBufferStart()), + BufferCopy->getBufferSize()); + + Obj.addSectionWithOwnedContents(Sec, std::move(BufferCopy)); + } + + return Error::success(); +} + +Error executeObjcopyOnBinary(const CommonConfig &Config, const WasmConfig &, + object::WasmObjectFile &In, raw_ostream &Out) { + Reader TheReader(In); + Expected> ObjOrErr = TheReader.create(); + if (!ObjOrErr) + return createFileError(Config.InputFilename, ObjOrErr.takeError()); + Object *Obj = ObjOrErr->get(); + assert(Obj && "Unable to deserialize Wasm object"); + if (Error E = handleArgs(Config, *Obj)) + return E; + Writer TheWriter(*Obj, Out); + if (Error E = TheWriter.write()) + return createFileError(Config.OutputFilename, std::move(E)); + return Error::success(); +} + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/wasm/WasmObject.cpp b/llvm/lib/ObjCopy/wasm/WasmObject.cpp new file mode 100644 index 000000000000..28a2de6e6e4f --- /dev/null +++ b/llvm/lib/ObjCopy/wasm/WasmObject.cpp @@ -0,0 +1,34 @@ +//===- WasmObject.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "WasmObject.h" + +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +using namespace object; +using namespace llvm::wasm; + +void Object::addSectionWithOwnedContents( + Section NewSection, std::unique_ptr &&Content) { + Sections.push_back(NewSection); + OwnedContents.emplace_back(std::move(Content)); +} + +void Object::removeSections(function_ref ToRemove) { + // TODO: remove reloc sections for the removed section, handle symbols, etc. + llvm::erase_if(Sections, ToRemove); +} + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/wasm/WasmObject.h b/llvm/lib/ObjCopy/wasm/WasmObject.h new file mode 100644 index 000000000000..9bc5831926c6 --- /dev/null +++ b/llvm/lib/ObjCopy/wasm/WasmObject.h @@ -0,0 +1,47 @@ +//===- WasmObject.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_WASM_WASMOBJECT_H +#define LLVM_LIB_OBJCOPY_WASM_WASMOBJECT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Support/MemoryBuffer.h" +#include + +namespace llvm { +namespace objcopy { +namespace wasm { + +struct Section { + // For now, each section is only an opaque binary blob with no distinction + // between custom and known sections. + uint8_t SectionType; + StringRef Name; + ArrayRef Contents; +}; + +struct Object { + llvm::wasm::WasmObjectHeader Header; + // For now don't discriminate between kinds of sections. + std::vector
Sections; + + void addSectionWithOwnedContents(Section NewSection, + std::unique_ptr &&Content); + void removeSections(function_ref ToRemove); + +private: + std::vector> OwnedContents; +}; + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_WASM_WASMOBJECT_H diff --git a/llvm/lib/ObjCopy/wasm/WasmReader.cpp b/llvm/lib/ObjCopy/wasm/WasmReader.cpp new file mode 100644 index 000000000000..6e7d8b5591c9 --- /dev/null +++ b/llvm/lib/ObjCopy/wasm/WasmReader.cpp @@ -0,0 +1,39 @@ +//===- WasmReader.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "WasmReader.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +using namespace object; +using namespace llvm::wasm; + +Expected> Reader::create() const { + auto Obj = std::make_unique(); + Obj->Header = WasmObj.getHeader(); + std::vector
Sections; + Obj->Sections.reserve(WasmObj.getNumSections()); + for (const SectionRef &Sec : WasmObj.sections()) { + const WasmSection &WS = WasmObj.getWasmSection(Sec); + Obj->Sections.push_back( + {static_cast(WS.Type), WS.Name, WS.Content}); + // Give known sections standard names to allow them to be selected. (Custom + // sections already have their names filled in by the parser). + Section &ReaderSec = Obj->Sections.back(); + if (ReaderSec.SectionType > WASM_SEC_CUSTOM && + ReaderSec.SectionType <= WASM_SEC_LAST_KNOWN) + ReaderSec.Name = sectionTypeToString(ReaderSec.SectionType); + } + return std::move(Obj); +} + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/wasm/WasmReader.h b/llvm/lib/ObjCopy/wasm/WasmReader.h new file mode 100644 index 000000000000..d71660fa2b65 --- /dev/null +++ b/llvm/lib/ObjCopy/wasm/WasmReader.h @@ -0,0 +1,31 @@ +//===- WasmReader.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_WASM_WASMREADER_H +#define LLVM_LIB_OBJCOPY_WASM_WASMREADER_H + +#include "WasmObject.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +class Reader { +public: + explicit Reader(const object::WasmObjectFile &O) : WasmObj(O) {} + Expected> create() const; + +private: + const object::WasmObjectFile &WasmObj; +}; + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_WASM_WASMREADER_H diff --git a/llvm/lib/ObjCopy/wasm/WasmWriter.cpp b/llvm/lib/ObjCopy/wasm/WasmWriter.cpp new file mode 100644 index 000000000000..fdcd441cc798 --- /dev/null +++ b/llvm/lib/ObjCopy/wasm/WasmWriter.cpp @@ -0,0 +1,79 @@ +//===- WasmWriter.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "WasmWriter.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +using namespace object; +using namespace llvm::wasm; + +Writer::SectionHeader Writer::createSectionHeader(const Section &S, + size_t &SectionSize) { + SectionHeader Header; + raw_svector_ostream OS(Header); + OS << S.SectionType; + bool HasName = S.SectionType == WASM_SEC_CUSTOM; + SectionSize = S.Contents.size(); + if (HasName) + SectionSize += getULEB128Size(S.Name.size()) + S.Name.size(); + // Pad the LEB value out to 5 bytes to make it a predictable size, and + // match the behavior of clang. + encodeULEB128(SectionSize, OS, 5); + if (HasName) { + encodeULEB128(S.Name.size(), OS); + OS << S.Name; + } + // Total section size is the content size plus 1 for the section type and + // 5 for the LEB-encoded size. + SectionSize = SectionSize + 1 + 5; + return Header; +} + +size_t Writer::finalize() { + size_t ObjectSize = sizeof(WasmMagic) + sizeof(WasmVersion); + SectionHeaders.reserve(Obj.Sections.size()); + // Finalize the headers of each section so we know the total size. + for (const Section &S : Obj.Sections) { + size_t SectionSize; + SectionHeaders.push_back(createSectionHeader(S, SectionSize)); + ObjectSize += SectionSize; + } + return ObjectSize; +} + +Error Writer::write() { + size_t TotalSize = finalize(); + Out.reserveExtraSpace(TotalSize); + + // Write the header. + Out.write(Obj.Header.Magic.data(), Obj.Header.Magic.size()); + uint32_t Version; + support::endian::write32le(&Version, Obj.Header.Version); + Out.write(reinterpret_cast(&Version), sizeof(Version)); + + // Write each section. + for (size_t I = 0, S = SectionHeaders.size(); I < S; ++I) { + Out.write(SectionHeaders[I].data(), SectionHeaders[I].size()); + Out.write(reinterpret_cast(Obj.Sections[I].Contents.data()), + Obj.Sections[I].Contents.size()); + } + + return Error::success(); +} + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/lib/ObjCopy/wasm/WasmWriter.h b/llvm/lib/ObjCopy/wasm/WasmWriter.h new file mode 100644 index 000000000000..14bbcf88875e --- /dev/null +++ b/llvm/lib/ObjCopy/wasm/WasmWriter.h @@ -0,0 +1,49 @@ +//===- WasmWriter.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_WASM_WASMWRITER_H +#define LLVM_LIB_OBJCOPY_WASM_WASMWRITER_H + +#include "WasmObject.h" +#include +#include + +namespace llvm { +namespace objcopy { +namespace wasm { + +class Writer { +public: + Writer(Object &Obj, raw_ostream &Out) : Obj(Obj), Out(Out) {} + Error write(); + +private: + using SectionHeader = SmallVector; + Object &Obj; + raw_ostream &Out; + std::vector SectionHeaders; + + /// Generate a wasm section section header for S. + /// The header consists of + /// * A one-byte section ID (aka the section type). + /// * The size of the section contents, encoded as ULEB128. + /// * If the section is a custom section (type 0) it also has a name, which is + /// encoded as a length-prefixed string. The encoded section size *includes* + /// this string. + /// See https://webassembly.github.io/spec/core/binary/modules.html#sections + /// Return the header and store the total size in SectionSize. + static SectionHeader createSectionHeader(const Section &S, + size_t &SectionSize); + size_t finalize(); +}; + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_WASM_WASMWRITER_H diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp index 9a4ef055faa4..ad03f9cae9f8 100644 --- a/llvm/lib/Object/Archive.cpp +++ b/llvm/lib/Object/Archive.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Host.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" @@ -30,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -257,6 +257,14 @@ Expected ArchiveMemberHeader::getName(uint64_t Size) const { return Name; if (Name.size() == 2 && Name[1] == '/') // String table. return Name; + // System libraries from the Windows SDK for Windows 11 contain this symbol. + // It looks like a CFG guard: we just skip it for now. + if (Name.equals("//")) + return Name; + // Some libraries (e.g., arm64rt.lib) from the Windows WDK + // (version 10.0.22000.0) contain this undocumented special member. + if (Name.equals("//")) + return Name; // It's a long name. // Get the string table offset. std::size_t StringOffset; @@ -922,6 +930,14 @@ Archive::Archive(MemoryBufferRef Source, Error &Err) Err = Error::success(); } +object::Archive::Kind Archive::getDefaultKindForHost() { + Triple HostTriple(sys::getProcessTriple()); + return HostTriple.isOSDarwin() + ? object::Archive::K_DARWIN + : (HostTriple.isOSAIX() ? object::Archive::K_AIXBIG + : object::Archive::K_GNU); +} + Archive::child_iterator Archive::child_begin(Error &Err, bool SkipInternal) const { if (isEmpty()) diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp index 053b3dafed95..dbf5052cdac0 100644 --- a/llvm/lib/Object/ArchiveWriter.cpp +++ b/llvm/lib/Object/ArchiveWriter.cpp @@ -18,16 +18,19 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Object/Archive.h" #include "llvm/Object/Error.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" +#include "llvm/Object/XCOFFObjectFile.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/SmallVectorMemoryBuffer.h" -#include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" #include @@ -44,6 +47,40 @@ NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), MemberName(BufRef.getBufferIdentifier()) {} +object::Archive::Kind NewArchiveMember::detectKindFromObject() const { + auto MemBufferRef = this->Buf->getMemBufferRef(); + Expected> OptionalObject = + object::ObjectFile::createObjectFile(MemBufferRef); + + if (OptionalObject) + return isa(**OptionalObject) + ? object::Archive::K_DARWIN + : (isa(**OptionalObject) + ? object::Archive::K_AIXBIG + : object::Archive::K_GNU); + + // Squelch the error in case we had a non-object file. + consumeError(OptionalObject.takeError()); + + // If we're adding a bitcode file to the archive, detect the Archive kind + // based on the target triple. + LLVMContext Context; + if (identify_magic(MemBufferRef.getBuffer()) == file_magic::bitcode) { + if (auto ObjOrErr = object::SymbolicFile::createSymbolicFile( + MemBufferRef, file_magic::bitcode, &Context)) { + auto &IRObject = cast(**ObjOrErr); + return Triple(IRObject.getTargetTriple()).isOSDarwin() + ? object::Archive::K_DARWIN + : object::Archive::K_GNU; + } else { + // Squelch the error in case this was not a SymbolicFile. + consumeError(ObjOrErr.takeError()); + } + } + + return object::Archive::getDefaultKindForHost(); +} + Expected NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, bool Deterministic) { @@ -128,16 +165,20 @@ static bool isDarwin(object::Archive::Kind Kind) { Kind == object::Archive::K_DARWIN64; } +static bool isAIXBigArchive(object::Archive::Kind Kind) { + return Kind == object::Archive::K_AIXBIG; +} + static bool isBSDLike(object::Archive::Kind Kind) { switch (Kind) { case object::Archive::K_GNU: case object::Archive::K_GNU64: + case object::Archive::K_AIXBIG: return false; case object::Archive::K_BSD: case object::Archive::K_DARWIN: case object::Archive::K_DARWIN64: return true; - case object::Archive::K_AIXBIG: case object::Archive::K_COFF: break; } @@ -190,6 +231,31 @@ printBSDMemberHeader(raw_ostream &Out, uint64_t Pos, StringRef Name, Out.write(uint8_t(0)); } +static void +printBigArchiveMemberHeader(raw_ostream &Out, StringRef Name, + const sys::TimePoint &ModTime, + unsigned UID, unsigned GID, unsigned Perms, + uint64_t Size, unsigned PrevOffset, + unsigned NextOffset) { + unsigned NameLen = Name.size(); + + printWithSpacePadding(Out, Size, 20); // File member size + printWithSpacePadding(Out, NextOffset, 20); // Next member header offset + printWithSpacePadding(Out, PrevOffset, 20); // Previous member header offset + printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); // File member date + // The big archive format has 12 chars for uid and gid. + printWithSpacePadding(Out, UID % 1000000000000, 12); // UID + printWithSpacePadding(Out, GID % 1000000000000, 12); // GID + printWithSpacePadding(Out, format("%o", Perms), 12); // Permission + printWithSpacePadding(Out, NameLen, 4); // Name length + if (NameLen) { + printWithSpacePadding(Out, Name, NameLen); // Name + if (NameLen % 2) + Out.write(uint8_t(0)); // Null byte padding + } + Out << "`\n"; // Terminator +} + static bool useStringTable(bool Thin, StringRef Name) { return Thin || Name.size() >= 16 || Name.contains('/'); } @@ -200,8 +266,8 @@ static bool is64BitKind(object::Archive::Kind Kind) { case object::Archive::K_BSD: case object::Archive::K_DARWIN: case object::Archive::K_COFF: - case object::Archive::K_AIXBIG: return false; + case object::Archive::K_AIXBIG: case object::Archive::K_DARWIN64: case object::Archive::K_GNU64: return true; @@ -305,7 +371,11 @@ static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, // least 4-byte aligned for 32-bit content. Opt for the larger encoding // uniformly. // We do this for all bsd formats because it simplifies aligning members. - uint32_t Pad = offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2)); + // For the big archive format, the symbol table is the last member, so there + // is no need to align. + uint32_t Pad = isAIXBigArchive(Kind) + ? 0 + : offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2)); Size += Pad; if (Padding) *Padding = Pad; @@ -313,11 +383,15 @@ static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, } static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind, - bool Deterministic, uint64_t Size) { + bool Deterministic, uint64_t Size, + uint64_t PrevMemberOffset = 0) { if (isBSDLike(Kind)) { const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF"; printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0, Size); + } else if (isAIXBigArchive(Kind)) { + printBigArchiveMemberHeader(Out, "", now(Deterministic), 0, 0, + 0, Size, PrevMemberOffset, 0); } else { const char *Name = is64BitKind(Kind) ? "/SYM64" : ""; printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size); @@ -326,7 +400,8 @@ static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind, static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, bool Deterministic, ArrayRef Members, - StringRef StringTable) { + StringRef StringTable, + uint64_t PrevMemberOffset = 0) { // We don't write a symbol table on an archive with no members -- except on // Darwin, where the linker will abort unless the archive has a symbol table. if (StringTable.empty() && !isDarwin(Kind)) @@ -339,9 +414,10 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4; uint32_t Pad; uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, StringTable, &Pad); - writeSymbolTableHeader(Out, Kind, Deterministic, Size); + writeSymbolTableHeader(Out, Kind, Deterministic, Size, PrevMemberOffset); - uint64_t Pos = Out.tell() + Size; + uint64_t Pos = isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) + : Out.tell() + Size; if (isBSDLike(Kind)) printNBits(Out, Kind, NumSyms * 2 * OffsetSize); @@ -410,9 +486,8 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, bool NeedSymbols, ArrayRef NewMembers) { static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; - // This ignores the symbol table, but we only need the value mod 8 and the - // symbol table is aligned to be a multiple of 8 bytes - uint64_t Pos = 0; + uint64_t Pos = + isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) : 0; std::vector Ret; bool HasObject = false; @@ -472,6 +547,9 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, Entry.second = Entry.second > 1 ? 1 : 0; } + // The big archive format needs to know the offset of the previous member + // header. + unsigned PrevOffset = 0; for (const NewArchiveMember &M : NewMembers) { std::string Header; raw_string_ostream Out(Header); @@ -504,8 +582,16 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, std::move(StringMsg), object::object_error::parse_failed); } - printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M, - ModTime, Size); + if (isAIXBigArchive(Kind)) { + unsigned NextOffset = Pos + sizeof(object::BigArMemHdrType) + + alignTo(M.MemberName.size(), 2) + alignTo(Size, 2); + printBigArchiveMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID, + M.Perms, Size, PrevOffset, NextOffset); + PrevOffset = Pos; + } else { + printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M, + ModTime, Size); + } Out.flush(); std::vector Symbols; @@ -589,22 +675,25 @@ static Error writeArchiveToStream(raw_ostream &Out, return E; std::vector &Data = *DataOrErr; - if (!StringTableBuf.empty()) + if (!StringTableBuf.empty() && !isAIXBigArchive(Kind)) Data.insert(Data.begin(), computeStringTable(StringTableBuf)); // We would like to detect if we need to switch to a 64-bit symbol table. - if (WriteSymtab) { - uint64_t MaxOffset = 8; // For the file signature. - uint64_t LastOffset = MaxOffset; - uint64_t NumSyms = 0; - for (const auto &M : Data) { - // Record the start of the member's offset - LastOffset = MaxOffset; - // Account for the size of each part associated with the member. - MaxOffset += M.Header.size() + M.Data.size() + M.Padding.size(); - NumSyms += M.Symbols.size(); - } + uint64_t LastMemberEndOffset = + isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) : 8; + uint64_t LastMemberHeaderOffset = LastMemberEndOffset; + uint64_t NumSyms = 0; + for (const auto &M : Data) { + // Record the start of the member's offset + LastMemberHeaderOffset = LastMemberEndOffset; + // Account for the size of each part associated with the member. + LastMemberEndOffset += M.Header.size() + M.Data.size() + M.Padding.size(); + NumSyms += M.Symbols.size(); + } + // The symbol table is put at the end of the big archive file. The symbol + // table is at the start of the archive file for other archive formats. + if (WriteSymtab && !isAIXBigArchive(Kind)) { // We assume 32-bit offsets to see if 32-bit symbols are possible or not. uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, 4, SymNamesBuf); auto computeSymbolTableHeaderSize = @@ -614,7 +703,7 @@ static Error writeArchiveToStream(raw_ostream &Out, writeSymbolTableHeader(Tmp, Kind, Deterministic, SymtabSize); return TmpBuf.size(); }; - LastOffset += computeSymbolTableHeaderSize() + SymtabSize; + LastMemberHeaderOffset += computeSymbolTableHeaderSize() + SymtabSize; // The SYM64 format is used when an archive's member offsets are larger than // 32-bits can hold. The need for this shift in format is detected by @@ -628,10 +717,10 @@ static Error writeArchiveToStream(raw_ostream &Out, if (Sym64Env) StringRef(Sym64Env).getAsInteger(10, Sym64Threshold); - // If LastOffset isn't going to fit in a 32-bit varible we need to switch - // to 64-bit. Note that the file can be larger than 4GB as long as the last - // member starts before the 4GB offset. - if (LastOffset >= Sym64Threshold) { + // If LastMemberHeaderOffset isn't going to fit in a 32-bit varible we need + // to switch to 64-bit. Note that the file can be larger than 4GB as long as + // the last member starts before the 4GB offset. + if (LastMemberHeaderOffset >= Sym64Threshold) { if (Kind == object::Archive::K_DARWIN) Kind = object::Archive::K_DARWIN64; else @@ -641,15 +730,92 @@ static Error writeArchiveToStream(raw_ostream &Out, if (Thin) Out << "!\n"; + else if (isAIXBigArchive(Kind)) + Out << "\n"; else Out << "!\n"; - if (WriteSymtab) - writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf); + if (!isAIXBigArchive(Kind)) { + if (WriteSymtab) + writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf); + for (const MemberData &M : Data) + Out << M.Header << M.Data << M.Padding; + } else { + // For the big archive (AIX) format, compute a table of member names and + // offsets, used in the member table. + uint64_t MemberTableNameStrTblSize = 0; + std::vector MemberOffsets; + std::vector MemberNames; + // Loop across object to find offset and names. + uint64_t MemberEndOffset = sizeof(object::BigArchive::FixLenHdr); + for (size_t I = 0, Size = NewMembers.size(); I != Size; ++I) { + const NewArchiveMember &Member = NewMembers[I]; + MemberTableNameStrTblSize += Member.MemberName.size() + 1; + MemberOffsets.push_back(MemberEndOffset); + MemberNames.push_back(Member.MemberName); + // File member name ended with "`\n". The length is included in + // BigArMemHdrType. + MemberEndOffset += sizeof(object::BigArMemHdrType) + + alignTo(Data[I].Data.size(), 2) + + alignTo(Member.MemberName.size(), 2); + } - for (const MemberData &M : Data) - Out << M.Header << M.Data << M.Padding; + // AIX member table size. + unsigned MemberTableSize = 20 + // Number of members field + 20 * MemberOffsets.size() + + MemberTableNameStrTblSize; + + unsigned GlobalSymbolOffset = + (WriteSymtab && NumSyms > 0) + ? LastMemberEndOffset + + alignTo(sizeof(object::BigArMemHdrType) + MemberTableSize, 2) + : 0; + + // Fixed Sized Header. + printWithSpacePadding(Out, NewMembers.size() ? LastMemberEndOffset : 0, + 20); // Offset to member table + // If there are no file members in the archive, there will be no global + // symbol table. + printWithSpacePadding(Out, NewMembers.size() ? GlobalSymbolOffset : 0, 20); + printWithSpacePadding( + Out, 0, + 20); // Offset to 64 bits global symbol table - Not supported yet + printWithSpacePadding( + Out, NewMembers.size() ? sizeof(object::BigArchive::FixLenHdr) : 0, + 20); // Offset to first archive member + printWithSpacePadding(Out, NewMembers.size() ? LastMemberHeaderOffset : 0, + 20); // Offset to last archive member + printWithSpacePadding( + Out, 0, + 20); // Offset to first member of free list - Not supported yet + + for (const MemberData &M : Data) { + Out << M.Header << M.Data; + if (M.Data.size() % 2) + Out << '\0'; + } + if (NewMembers.size()) { + // Member table. + printBigArchiveMemberHeader(Out, "", sys::toTimePoint(0), 0, 0, 0, + MemberTableSize, LastMemberHeaderOffset, + GlobalSymbolOffset); + printWithSpacePadding(Out, MemberOffsets.size(), 20); // Number of members + for (uint64_t MemberOffset : MemberOffsets) + printWithSpacePadding(Out, MemberOffset, + 20); // Offset to member file header. + for (StringRef MemberName : MemberNames) + Out << MemberName << '\0'; // Member file name, null byte padding. + + if (MemberTableNameStrTblSize % 2) + Out << '\0'; // Name table must be tail padded to an even number of + // bytes. + + if (WriteSymtab && NumSyms > 0) + writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf, + LastMemberEndOffset); + } + } Out.flush(); return Error::success(); } diff --git a/llvm/lib/Object/Binary.cpp b/llvm/lib/Object/Binary.cpp index 143554344256..8065e3eb1d85 100644 --- a/llvm/lib/Object/Binary.cpp +++ b/llvm/lib/Object/Binary.cpp @@ -18,14 +18,13 @@ #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/Minidump.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/OffloadBinary.h" #include "llvm/Object/TapiUniversal.h" #include "llvm/Object/WindowsResource.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" -#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" -#include #include #include @@ -84,9 +83,13 @@ Expected> object::createBinary(MemoryBufferRef Buffer, // PDB does not support the Binary interface. return errorCodeToError(object_error::invalid_file_type); case file_magic::unknown: + case file_magic::cuda_fatbinary: case file_magic::coff_cl_gl_object: + case file_magic::dxcontainer_object: // Unrecognized object file format. return errorCodeToError(object_error::invalid_file_type); + case file_magic::offload_binary: + return OffloadBinary::create(Buffer); case file_magic::minidump: return MinidumpFile::create(Buffer); case file_magic::tapi_file: diff --git a/llvm/lib/Object/COFFImportFile.cpp b/llvm/lib/Object/COFFImportFile.cpp index 69bbf70b43a1..91ecea11511d 100644 --- a/llvm/lib/Object/COFFImportFile.cpp +++ b/llvm/lib/Object/COFFImportFile.cpp @@ -12,10 +12,14 @@ #include "llvm/Object/COFFImportFile.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/COFF.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Path.h" #include diff --git a/llvm/lib/Object/COFFModuleDefinition.cpp b/llvm/lib/Object/COFFModuleDefinition.cpp index 55ddd3baca2b..0666970d5c60 100644 --- a/llvm/lib/Object/COFFModuleDefinition.cpp +++ b/llvm/lib/Object/COFFModuleDefinition.cpp @@ -17,12 +17,10 @@ #include "llvm/Object/COFFModuleDefinition.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Object/COFF.h" #include "llvm/Object/COFFImportFile.h" #include "llvm/Object/Error.h" #include "llvm/Support/Error.h" #include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm::COFF; using namespace llvm; diff --git a/llvm/lib/Object/COFFObjectFile.cpp b/llvm/lib/Object/COFFObjectFile.cpp index 354b3c0d5577..1a4bb329201a 100644 --- a/llvm/lib/Object/COFFObjectFile.cpp +++ b/llvm/lib/Object/COFFObjectFile.cpp @@ -25,7 +25,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" #include #include #include @@ -447,7 +447,8 @@ Error COFFObjectFile::initSymbolTablePtr() { // Check that the string table is null terminated if has any in it. if (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0) - return errorCodeToError(object_error::parse_failed); + return createStringError(object_error::parse_failed, + "string table missing null terminator"); return Error::success(); } @@ -469,23 +470,43 @@ Error COFFObjectFile::getVaPtr(uint64_t Addr, uintptr_t &Res) const { } // Returns the file offset for the given RVA. -Error COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const { +Error COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res, + const char *ErrorContext) const { for (const SectionRef &S : sections()) { const coff_section *Section = getCOFFSection(S); uint32_t SectionStart = Section->VirtualAddress; uint32_t SectionEnd = Section->VirtualAddress + Section->VirtualSize; if (SectionStart <= Addr && Addr < SectionEnd) { + // A table/directory entry can be pointing to somewhere in a stripped + // section, in an object that went through `objcopy --only-keep-debug`. + // In this case we don't want to cause the parsing of the object file to + // fail, otherwise it will be impossible to use this object as debug info + // in LLDB. Return SectionStrippedError here so that + // COFFObjectFile::initialize can ignore the error. + // Somewhat common binaries may have RVAs pointing outside of the + // provided raw data. Instead of rejecting the binaries, just + // treat the section as stripped for these purposes. + if (Section->SizeOfRawData < Section->VirtualSize && + Addr >= SectionStart + Section->SizeOfRawData) { + return make_error(); + } uint32_t Offset = Addr - SectionStart; Res = reinterpret_cast(base()) + Section->PointerToRawData + Offset; return Error::success(); } } - return errorCodeToError(object_error::parse_failed); + if (ErrorContext) + return createStringError(object_error::parse_failed, + "RVA 0x%" PRIx32 " for %s not found", Addr, + ErrorContext); + return createStringError(object_error::parse_failed, + "RVA 0x%" PRIx32 " not found", Addr); } Error COFFObjectFile::getRvaAndSizeAsBytes(uint32_t RVA, uint32_t Size, - ArrayRef &Contents) const { + ArrayRef &Contents, + const char *ErrorContext) const { for (const SectionRef &S : sections()) { const coff_section *Section = getCOFFSection(S); uint32_t SectionStart = Section->VirtualAddress; @@ -501,7 +522,12 @@ Error COFFObjectFile::getRvaAndSizeAsBytes(uint32_t RVA, uint32_t Size, return Error::success(); } } - return errorCodeToError(object_error::parse_failed); + if (ErrorContext) + return createStringError(object_error::parse_failed, + "RVA 0x%" PRIx32 " for %s not found", RVA, + ErrorContext); + return createStringError(object_error::parse_failed, + "RVA 0x%" PRIx32 " not found", RVA); } // Returns hint and name fields, assuming \p Rva is pointing to a Hint/Name @@ -521,11 +547,12 @@ Error COFFObjectFile::getDebugPDBInfo(const debug_directory *DebugDir, const codeview::DebugInfo *&PDBInfo, StringRef &PDBFileName) const { ArrayRef InfoBytes; - if (Error E = getRvaAndSizeAsBytes( - DebugDir->AddressOfRawData, DebugDir->SizeOfData, InfoBytes)) + if (Error E = + getRvaAndSizeAsBytes(DebugDir->AddressOfRawData, DebugDir->SizeOfData, + InfoBytes, "PDB info")) return E; if (InfoBytes.size() < sizeof(*PDBInfo) + 1) - return errorCodeToError(object_error::parse_failed); + return createStringError(object_error::parse_failed, "PDB info too small"); PDBInfo = reinterpret_cast(InfoBytes.data()); InfoBytes = InfoBytes.drop_front(sizeof(*PDBInfo)); PDBFileName = StringRef(reinterpret_cast(InfoBytes.data()), @@ -563,7 +590,7 @@ Error COFFObjectFile::initImportTablePtr() { // Find the section that contains the RVA. This is needed because the RVA is // the import table's memory address which is different from its file offset. uintptr_t IntPtr = 0; - if (Error E = getRvaPtr(ImportTableRva, IntPtr)) + if (Error E = getRvaPtr(ImportTableRva, IntPtr, "import table")) return E; if (Error E = checkOffset(Data, IntPtr, DataEntry->Size)) return E; @@ -586,8 +613,11 @@ Error COFFObjectFile::initDelayImportTablePtr() { sizeof(delay_import_directory_table_entry) - 1; uintptr_t IntPtr = 0; - if (Error E = getRvaPtr(RVA, IntPtr)) + if (Error E = getRvaPtr(RVA, IntPtr, "delay import table")) return E; + if (Error E = checkOffset(Data, IntPtr, DataEntry->Size)) + return E; + DelayImportDirectory = reinterpret_cast< const delay_import_directory_table_entry *>(IntPtr); return Error::success(); @@ -607,8 +637,11 @@ Error COFFObjectFile::initExportTablePtr() { uint32_t ExportTableRva = DataEntry->RelativeVirtualAddress; uintptr_t IntPtr = 0; - if (Error E = getRvaPtr(ExportTableRva, IntPtr)) + if (Error E = getRvaPtr(ExportTableRva, IntPtr, "export table")) return E; + if (Error E = checkOffset(Data, IntPtr, DataEntry->Size)) + return E; + ExportDirectory = reinterpret_cast(IntPtr); return Error::success(); @@ -623,8 +656,12 @@ Error COFFObjectFile::initBaseRelocPtr() { return Error::success(); uintptr_t IntPtr = 0; - if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr, + "base reloc table")) + return E; + if (Error E = checkOffset(Data, IntPtr, DataEntry->Size)) return E; + BaseRelocHeader = reinterpret_cast( IntPtr); BaseRelocEnd = reinterpret_cast( @@ -646,11 +683,16 @@ Error COFFObjectFile::initDebugDirectoryPtr() { // Check that the size is a multiple of the entry size. if (DataEntry->Size % sizeof(debug_directory) != 0) - return errorCodeToError(object_error::parse_failed); + return createStringError(object_error::parse_failed, + "debug directory has uneven size"); uintptr_t IntPtr = 0; - if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr, + "debug directory")) + return E; + if (Error E = checkOffset(Data, IntPtr, DataEntry->Size)) return E; + DebugDirectoryBegin = reinterpret_cast(IntPtr); DebugDirectoryEnd = reinterpret_cast( IntPtr + DataEntry->Size); @@ -680,7 +722,10 @@ Error COFFObjectFile::initTLSDirectoryPtr() { static_cast(DataEntry->Size), DirSize); uintptr_t IntPtr = 0; - if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + if (Error E = + getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr, "TLS directory")) + return E; + if (Error E = checkOffset(Data, IntPtr, DataEntry->Size)) return E; if (is64()) @@ -701,7 +746,10 @@ Error COFFObjectFile::initLoadConfigPtr() { if (DataEntry->RelativeVirtualAddress == 0) return Error::success(); uintptr_t IntPtr = 0; - if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr, + "load config table")) + return E; + if (Error E = checkOffset(Data, IntPtr, DataEntry->Size)) return E; LoadConfig = (const void *)IntPtr; @@ -727,6 +775,14 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object) DebugDirectoryBegin(nullptr), DebugDirectoryEnd(nullptr), TLSDirectory32(nullptr), TLSDirectory64(nullptr) {} +static Error ignoreStrippedErrors(Error E) { + if (E.isA()) { + consumeError(std::move(E)); + return Error::success(); + } + return E; +} + Error COFFObjectFile::initialize() { // Check that we at least have enough room for a header. std::error_code EC; @@ -749,7 +805,8 @@ Error COFFObjectFile::initialize() { CurPtr = DH->AddressOfNewExeHeader; // Check the PE magic bytes. ("PE\0\0") if (memcmp(base() + CurPtr, COFF::PEMagic, sizeof(COFF::PEMagic)) != 0) { - return errorCodeToError(object_error::parse_failed); + return createStringError(object_error::parse_failed, + "incorrect PE magic"); } CurPtr += sizeof(COFF::PEMagic); // Skip the PE magic bytes. HasPEHeader = true; @@ -805,7 +862,8 @@ Error COFFObjectFile::initialize() { DataDirSize = sizeof(data_directory) * PE32PlusHeader->NumberOfRvaAndSize; } else { // It's neither PE32 nor PE32+. - return errorCodeToError(object_error::parse_failed); + return createStringError(object_error::parse_failed, + "incorrect PE magic"); } if (Error E = getObject(DataDirectory, Data, DataDirAddr, DataDirSize)) return E; @@ -834,33 +892,34 @@ Error COFFObjectFile::initialize() { } else { // We had better not have any symbols if we don't have a symbol table. if (getNumberOfSymbols() != 0) { - return errorCodeToError(object_error::parse_failed); + return createStringError(object_error::parse_failed, + "symbol table missing"); } } // Initialize the pointer to the beginning of the import table. - if (Error E = initImportTablePtr()) + if (Error E = ignoreStrippedErrors(initImportTablePtr())) return E; - if (Error E = initDelayImportTablePtr()) + if (Error E = ignoreStrippedErrors(initDelayImportTablePtr())) return E; // Initialize the pointer to the export table. - if (Error E = initExportTablePtr()) + if (Error E = ignoreStrippedErrors(initExportTablePtr())) return E; // Initialize the pointer to the base relocation table. - if (Error E = initBaseRelocPtr()) + if (Error E = ignoreStrippedErrors(initBaseRelocPtr())) return E; // Initialize the pointer to the debug directory. - if (Error E = initDebugDirectoryPtr()) + if (Error E = ignoreStrippedErrors(initDebugDirectoryPtr())) return E; // Initialize the pointer to the TLS directory. - if (Error E = initTLSDirectoryPtr()) + if (Error E = ignoreStrippedErrors(initTLSDirectoryPtr())) return E; - if (Error E = initLoadConfigPtr()) + if (Error E = ignoreStrippedErrors(initLoadConfigPtr())) return E; return Error::success(); @@ -1021,13 +1080,14 @@ Expected COFFObjectFile::getSection(int32_t Index) const { // We already verified the section table data, so no need to check again. return SectionTable + (Index - 1); } - return errorCodeToError(object_error::parse_failed); + return createStringError(object_error::parse_failed, + "section index out of bounds"); } Expected COFFObjectFile::getString(uint32_t Offset) const { if (StringTableSize <= 4) // Tried to get a string from an empty string table. - return errorCodeToError(object_error::parse_failed); + return createStringError(object_error::parse_failed, "string table empty"); if (Offset >= StringTableSize) return errorCodeToError(object_error::unexpected_eof); return StringRef(StringTable + Offset); @@ -1086,13 +1146,7 @@ uint32_t COFFObjectFile::getSymbolIndex(COFFSymbolRef Symbol) const { Expected COFFObjectFile::getSectionName(const coff_section *Sec) const { - StringRef Name; - if (Sec->Name[COFF::NameSize - 1] == 0) - // Null terminated, let ::strlen figure out the length. - Name = Sec->Name; - else - // Not null terminated, use all 8 bytes. - Name = StringRef(Sec->Name, COFF::NameSize); + StringRef Name = StringRef(Sec->Name, COFF::NameSize).split('\0').first; // Check for string table entry. First byte is '/'. if (Name.startswith("/")) { @@ -1414,7 +1468,8 @@ ImportDirectoryEntryRef::lookup_table_symbols() const { Error ImportDirectoryEntryRef::getName(StringRef &Result) const { uintptr_t IntPtr = 0; - if (Error E = OwningObject->getRvaPtr(ImportTable[Index].NameRVA, IntPtr)) + if (Error E = OwningObject->getRvaPtr(ImportTable[Index].NameRVA, IntPtr, + "import directory name")) return E; Result = StringRef(reinterpret_cast(IntPtr)); return Error::success(); @@ -1460,7 +1515,8 @@ DelayImportDirectoryEntryRef::imported_symbols() const { Error DelayImportDirectoryEntryRef::getName(StringRef &Result) const { uintptr_t IntPtr = 0; - if (Error E = OwningObject->getRvaPtr(Table[Index].Name, IntPtr)) + if (Error E = OwningObject->getRvaPtr(Table[Index].Name, IntPtr, + "delay import directory name")) return E; Result = StringRef(reinterpret_cast(IntPtr)); return Error::success(); @@ -1477,7 +1533,7 @@ Error DelayImportDirectoryEntryRef::getImportAddress(int AddrIndex, uint32_t RVA = Table[Index].DelayImportAddressTable + AddrIndex * (OwningObject->is64() ? 8 : 4); uintptr_t IntPtr = 0; - if (Error E = OwningObject->getRvaPtr(RVA, IntPtr)) + if (Error E = OwningObject->getRvaPtr(RVA, IntPtr, "import address")) return E; if (OwningObject->is64()) Result = *reinterpret_cast(IntPtr); @@ -1499,7 +1555,8 @@ void ExportDirectoryEntryRef::moveNext() { // by ordinal, the empty string is set as a result. Error ExportDirectoryEntryRef::getDllName(StringRef &Result) const { uintptr_t IntPtr = 0; - if (Error E = OwningObject->getRvaPtr(ExportTable->NameRVA, IntPtr)) + if (Error E = + OwningObject->getRvaPtr(ExportTable->NameRVA, IntPtr, "dll name")) return E; Result = StringRef(reinterpret_cast(IntPtr)); return Error::success(); @@ -1520,8 +1577,8 @@ Error ExportDirectoryEntryRef::getOrdinal(uint32_t &Result) const { // Returns the address of the current export symbol. Error ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const { uintptr_t IntPtr = 0; - if (Error EC = - OwningObject->getRvaPtr(ExportTable->ExportAddressTableRVA, IntPtr)) + if (Error EC = OwningObject->getRvaPtr(ExportTable->ExportAddressTableRVA, + IntPtr, "export address")) return EC; const export_address_table_entry *entry = reinterpret_cast(IntPtr); @@ -1534,8 +1591,8 @@ Error ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const { Error ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const { uintptr_t IntPtr = 0; - if (Error EC = - OwningObject->getRvaPtr(ExportTable->OrdinalTableRVA, IntPtr)) + if (Error EC = OwningObject->getRvaPtr(ExportTable->OrdinalTableRVA, IntPtr, + "export ordinal table")) return EC; const ulittle16_t *Start = reinterpret_cast(IntPtr); @@ -1545,11 +1602,12 @@ ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const { I < E; ++I, ++Offset) { if (*I != Index) continue; - if (Error EC = - OwningObject->getRvaPtr(ExportTable->NamePointerRVA, IntPtr)) + if (Error EC = OwningObject->getRvaPtr(ExportTable->NamePointerRVA, IntPtr, + "export table entry")) return EC; const ulittle32_t *NamePtr = reinterpret_cast(IntPtr); - if (Error EC = OwningObject->getRvaPtr(NamePtr[Offset], IntPtr)) + if (Error EC = OwningObject->getRvaPtr(NamePtr[Offset], IntPtr, + "export symbol name")) return EC; Result = StringRef(reinterpret_cast(IntPtr)); return Error::success(); @@ -1562,7 +1620,8 @@ Error ExportDirectoryEntryRef::isForwarder(bool &Result) const { const data_directory *DataEntry = OwningObject->getDataDirectory(COFF::EXPORT_TABLE); if (!DataEntry) - return errorCodeToError(object_error::parse_failed); + return createStringError(object_error::parse_failed, + "export table missing"); uint32_t RVA; if (auto EC = getExportRVA(RVA)) return EC; @@ -1577,7 +1636,7 @@ Error ExportDirectoryEntryRef::getForwardTo(StringRef &Result) const { if (auto EC = getExportRVA(RVA)) return EC; uintptr_t IntPtr = 0; - if (auto EC = OwningObject->getRvaPtr(RVA, IntPtr)) + if (auto EC = OwningObject->getRvaPtr(RVA, IntPtr, "export forward target")) return EC; Result = StringRef(reinterpret_cast(IntPtr)); return Error::success(); @@ -1606,7 +1665,7 @@ Error ImportedSymbolRef::getSymbolName(StringRef &Result) const { RVA = Entry64[Index].getHintNameRVA(); } uintptr_t IntPtr = 0; - if (Error EC = OwningObject->getRvaPtr(RVA, IntPtr)) + if (Error EC = OwningObject->getRvaPtr(RVA, IntPtr, "import symbol name")) return EC; // +2 because the first two bytes is hint. Result = StringRef(reinterpret_cast(IntPtr + 2)); @@ -1645,7 +1704,7 @@ Error ImportedSymbolRef::getOrdinal(uint16_t &Result) const { RVA = Entry64[Index].getHintNameRVA(); } uintptr_t IntPtr = 0; - if (Error EC = OwningObject->getRvaPtr(RVA, IntPtr)) + if (Error EC = OwningObject->getRvaPtr(RVA, IntPtr, "import symbol ordinal")) return EC; Result = *reinterpret_cast(IntPtr); return Error::success(); diff --git a/llvm/lib/Object/DXContainer.cpp b/llvm/lib/Object/DXContainer.cpp new file mode 100644 index 000000000000..ca859c1f69ae --- /dev/null +++ b/llvm/lib/Object/DXContainer.cpp @@ -0,0 +1,111 @@ +//===- DXContainer.cpp - DXContainer object file implementation -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/DXContainer.h" +#include "llvm/BinaryFormat/DXContainer.h" +#include "llvm/Object/Error.h" + +using namespace llvm; +using namespace llvm::object; + +static Error parseFailed(const Twine &Msg) { + return make_error(Msg.str(), object_error::parse_failed); +} + +template +static Error readStruct(StringRef Buffer, const char *Src, T &Struct) { + // Don't read before the beginning or past the end of the file + if (Src < Buffer.begin() || Src + sizeof(T) > Buffer.end()) + return parseFailed("Reading structure out of file bounds"); + + memcpy(&Struct, Src, sizeof(T)); + // DXContainer is always little endian + if (sys::IsBigEndianHost) + Struct.swapBytes(); + return Error::success(); +} + +template +static Error readInteger(StringRef Buffer, const char *Src, T &Val) { + static_assert(std::is_integral::value, + "Cannot call readInteger on non-integral type."); + assert(reinterpret_cast(Src) % alignof(T) == 0 && + "Unaligned read of value from buffer!"); + // Don't read before the beginning or past the end of the file + if (Src < Buffer.begin() || Src + sizeof(T) > Buffer.end()) + return parseFailed("Reading structure out of file bounds"); + + Val = *reinterpret_cast(Src); + // DXContainer is always little endian + if (sys::IsBigEndianHost) + sys::swapByteOrder(Val); + return Error::success(); +} + +DXContainer::DXContainer(MemoryBufferRef O) : Data(O) {} + +Error DXContainer::parseHeader() { + return readStruct(Data.getBuffer(), Data.getBuffer().data(), Header); +} + +Error DXContainer::parseDXILHeader(uint32_t Offset) { + if (DXIL) + return parseFailed("More than one DXIL part is present in the file"); + const char *Current = Data.getBuffer().data() + Offset; + dxbc::ProgramHeader Header; + if (Error Err = readStruct(Data.getBuffer(), Current, Header)) + return Err; + Current += offsetof(dxbc::ProgramHeader, Bitcode) + Header.Bitcode.Offset; + DXIL.emplace(std::make_pair(Header, Current)); + return Error::success(); +} + +Error DXContainer::parsePartOffsets() { + const char *Current = Data.getBuffer().data() + sizeof(dxbc::Header); + for (uint32_t Part = 0; Part < Header.PartCount; ++Part) { + uint32_t PartOffset; + if (Error Err = readInteger(Data.getBuffer(), Current, PartOffset)) + return Err; + Current += sizeof(uint32_t); + // We need to ensure that each part offset leaves enough space for a part + // header. To prevent overflow, we subtract the part header size from the + // buffer size, rather than adding to the offset. Since the file header is + // larger than the part header we can't reach this code unless the buffer + // is larger than the part header, so this can't underflow. + if (PartOffset > Data.getBufferSize() - sizeof(dxbc::PartHeader)) + return parseFailed("Part offset points beyond boundary of the file"); + PartOffsets.push_back(PartOffset); + + // If this isn't a dxil part stop here... + if (Data.getBuffer().substr(PartOffset, 4) != "DXIL") + continue; + if (Error Err = parseDXILHeader(PartOffset + sizeof(dxbc::PartHeader))) + return Err; + } + return Error::success(); +} + +Expected DXContainer::create(MemoryBufferRef Object) { + DXContainer Container(Object); + if (Error Err = Container.parseHeader()) + return std::move(Err); + if (Error Err = Container.parsePartOffsets()) + return std::move(Err); + return Container; +} + +void DXContainer::PartIterator::updateIteratorImpl(const uint32_t Offset) { + StringRef Buffer = Container.Data.getBuffer(); + const char *Current = Buffer.data() + Offset; + // Offsets are validated during parsing, so all offsets in the container are + // valid and contain enough readable data to read a header. + cantFail(readStruct(Buffer, Current, IteratorState.Part)); + IteratorState.Data = + StringRef(Current + sizeof(dxbc::PartHeader), IteratorState.Part.Size); + IteratorState.Offset = Offset; +} diff --git a/llvm/lib/Object/Decompressor.cpp b/llvm/lib/Object/Decompressor.cpp index 11efd857d1a1..de067ed59ac5 100644 --- a/llvm/lib/Object/Decompressor.cpp +++ b/llvm/lib/Object/Decompressor.cpp @@ -8,7 +8,7 @@ #include "llvm/Object/Decompressor.h" #include "llvm/BinaryFormat/ELF.h" -#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/Compression.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Endian.h" diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 6e56da1a31f3..6acf4543be5a 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -166,6 +166,13 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, break; } break; + case ELF::EM_LOONGARCH: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def" + default: + break; + } + break; default: break; } @@ -288,6 +295,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_SYMPART); STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_EHDR); STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_PHDR); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_BB_ADDR_MAP_V0); STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_BB_ADDR_MAP); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH); @@ -561,11 +569,9 @@ Expected ELFFile::dynamicEntries() const { } if (Dyn.empty()) - // TODO: this error is untested. return createError("invalid empty dynamic section"); if (Dyn.back().d_tag != ELF::DT_NULL) - // TODO: this error is untested. return createError("dynamic sections must be DT_NULL terminated"); return Dyn; @@ -635,7 +641,6 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec) const { DataExtractor::Cursor Cur(0); Error ULEBSizeErr = Error::success(); - // Helper to extract and decode the next ULEB128 value as uint32_t. // Returns zero and sets ULEBSizeErr if the ULEB128 value exceeds the uint32_t // limit. @@ -655,18 +660,34 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec) const { return static_cast(Value); }; + uint8_t Version = 0; while (!ULEBSizeErr && Cur && Cur.tell() < Content.size()) { + if (Sec.sh_type == ELF::SHT_LLVM_BB_ADDR_MAP) { + Version = Data.getU8(Cur); + if (!Cur) + break; + if (Version > 1) + return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " + + Twine(static_cast(Version))); + Data.getU8(Cur); // Feature byte + } uintX_t Address = static_cast(Data.getAddress(Cur)); uint32_t NumBlocks = ReadULEB128AsUInt32(); std::vector BBEntries; + uint32_t PrevBBEndOffset = 0; for (uint32_t BlockID = 0; !ULEBSizeErr && Cur && (BlockID < NumBlocks); ++BlockID) { uint32_t Offset = ReadULEB128AsUInt32(); uint32_t Size = ReadULEB128AsUInt32(); uint32_t Metadata = ReadULEB128AsUInt32(); + if (Version >= 1) { + // Offset is calculated relative to the end of the previous BB. + Offset += PrevBBEndOffset; + PrevBBEndOffset = Offset + Size; + } BBEntries.push_back({Offset, Size, Metadata}); } - FunctionEntries.push_back({Address, BBEntries}); + FunctionEntries.push_back({Address, std::move(BBEntries)}); } // Either Cur is in the error state, or ULEBSizeError is set (not both), but // we join the two errors here to be safe. diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index cf1f12d9a9a7..38de669f1d3d 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -21,7 +21,6 @@ #include "llvm/Object/Error.h" #include "llvm/Support/ARMAttributeParser.h" #include "llvm/Support/ARMBuildAttributes.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/RISCVAttributeParser.h" @@ -31,7 +30,6 @@ #include #include #include -#include #include using namespace llvm; @@ -169,11 +167,11 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { bool isV7 = false; Optional Attr = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); - if (Attr.hasValue()) + if (Attr) isV7 = Attr.getValue() == ARMBuildAttrs::v7; Attr = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile); - if (Attr.hasValue()) { + if (Attr) { switch (Attr.getValue()) { case ARMBuildAttrs::ApplicationProfile: Features.AddFeature("aclass"); @@ -192,7 +190,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { } Attr = Attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use); - if (Attr.hasValue()) { + if (Attr) { switch (Attr.getValue()) { default: break; @@ -207,7 +205,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { } Attr = Attributes.getAttributeValue(ARMBuildAttrs::FP_arch); - if (Attr.hasValue()) { + if (Attr) { switch (Attr.getValue()) { default: break; @@ -231,7 +229,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { } Attr = Attributes.getAttributeValue(ARMBuildAttrs::Advanced_SIMD_arch); - if (Attr.hasValue()) { + if (Attr) { switch (Attr.getValue()) { default: break; @@ -250,7 +248,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { } Attr = Attributes.getAttributeValue(ARMBuildAttrs::MVE_arch); - if (Attr.hasValue()) { + if (Attr) { switch (Attr.getValue()) { default: break; @@ -269,7 +267,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { } Attr = Attributes.getAttributeValue(ARMBuildAttrs::DIV_use); - if (Attr.hasValue()) { + if (Attr) { switch (Attr.getValue()) { default: break; @@ -305,11 +303,11 @@ SubtargetFeatures ELFObjectFileBase::getRISCVFeatures() const { } Optional Attr = Attributes.getAttributeString(RISCVAttrs::ARCH); - if (Attr.hasValue()) { + if (Attr) { // The Arch pattern is [rv32|rv64][i|e]version(_[m|a|f|d|c]version)* // Version string pattern is (major)p(minor). Major and minor are optional. // For example, a version number could be 2p0, 2, or p92. - StringRef Arch = Attr.getValue(); + StringRef Arch = *Attr; if (Arch.consume_front("rv32")) Features.AddFeature("64bit", false); else if (Arch.consume_front("rv64")) @@ -360,6 +358,8 @@ Optional ELFObjectFileBase::tryGetCPUName() const { switch (getEMachine()) { case ELF::EM_AMDGPU: return getAMDGPUCPUName(); + case ELF::EM_PPC64: + return StringRef("future"); default: return None; } @@ -461,6 +461,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const { return "gfx90a"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: return "gfx90c"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: + return "gfx940"; // AMDGCN GFX10. case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: @@ -483,6 +485,18 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const { return "gfx1034"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: return "gfx1035"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: + return "gfx1036"; + + // AMDGCN GFX11. + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: + return "gfx1100"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: + return "gfx1101"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: + return "gfx1102"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: + return "gfx1103"; default: llvm_unreachable("Unknown EF_AMDGPU_MACH value"); } @@ -509,7 +523,7 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { Optional Attr = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); - if (Attr.hasValue()) { + if (Attr) { switch (Attr.getValue()) { case ARMBuildAttrs::v4: Triple += "v4"; @@ -541,7 +555,7 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { case ARMBuildAttrs::v7: { Optional ArchProfileAttr = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile); - if (ArchProfileAttr.hasValue() && + if (ArchProfileAttr && ArchProfileAttr.getValue() == ARMBuildAttrs::MicroControllerProfile) Triple += "v7m"; else @@ -572,6 +586,9 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { case ARMBuildAttrs::v8_1_M_Main: Triple += "v8.1m.main"; break; + case ARMBuildAttrs::v9_A: + Triple += "v9a"; + break; } } if (!isLittleEndian()) @@ -655,6 +672,36 @@ ELFObjectFileBase::getPltAddresses() const { return Result; } +template +Expected> +readBBAddrMapImpl(const ELFFile &EF, + Optional TextSectionIndex) { + using Elf_Shdr = typename ELFT::Shdr; + std::vector BBAddrMaps; + const auto &Sections = cantFail(EF.sections()); + for (const Elf_Shdr &Sec : Sections) { + if (Sec.sh_type != ELF::SHT_LLVM_BB_ADDR_MAP && + Sec.sh_type != ELF::SHT_LLVM_BB_ADDR_MAP_V0) + continue; + if (TextSectionIndex) { + Expected TextSecOrErr = EF.getSection(Sec.sh_link); + if (!TextSecOrErr) + return createError("unable to get the linked-to section for " + + describe(EF, Sec) + ": " + + toString(TextSecOrErr.takeError())); + if (*TextSectionIndex != std::distance(Sections.begin(), *TextSecOrErr)) + continue; + } + Expected> BBAddrMapOrErr = EF.decodeBBAddrMap(Sec); + if (!BBAddrMapOrErr) + return createError("unable to read " + describe(EF, Sec) + ": " + + toString(BBAddrMapOrErr.takeError())); + std::move(BBAddrMapOrErr->begin(), BBAddrMapOrErr->end(), + std::back_inserter(BBAddrMaps)); + } + return BBAddrMaps; +} + template static Expected> readDynsymVersionsImpl(const ELFFile &EF, @@ -723,3 +770,17 @@ ELFObjectFileBase::readDynsymVersions() const { return readDynsymVersionsImpl(cast(this)->getELFFile(), Symbols); } + +Expected> +ELFObjectFileBase::readBBAddrMap(Optional TextSectionIndex) const { + if (const auto *Obj = dyn_cast(this)) + return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex); + if (const auto *Obj = dyn_cast(this)) + return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex); + if (const auto *Obj = dyn_cast(this)) + return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex); + if (const auto *Obj = cast(this)) + return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex); + else + llvm_unreachable("Unsupported binary format"); +} diff --git a/llvm/lib/Object/Error.cpp b/llvm/lib/Object/Error.cpp index bc75bc6c0445..6d1e3f2a59d0 100644 --- a/llvm/lib/Object/Error.cpp +++ b/llvm/lib/Object/Error.cpp @@ -52,6 +52,8 @@ std::string _object_error_category::message(int EV) const { return "Bitcode section not found in object file"; case object_error::invalid_symbol_index: return "Invalid symbol index"; + case object_error::section_stripped: + return "Section has been stripped from the object file"; } llvm_unreachable("An enumerator of object_error does not have a message " "defined."); diff --git a/llvm/lib/Object/IRObjectFile.cpp b/llvm/lib/Object/IRObjectFile.cpp index c653262791cc..091930988bd0 100644 --- a/llvm/lib/Object/IRObjectFile.cpp +++ b/llvm/lib/Object/IRObjectFile.cpp @@ -11,20 +11,20 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/IRObjectFile.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/PointerUnion.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/IR/GVMaterializer.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" -#include "llvm/MC/TargetRegistry.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace object; +namespace llvm { +class LLVMContext; +class raw_ostream; +} // namespace llvm + IRObjectFile::IRObjectFile(MemoryBufferRef Object, std::vector> Mods) : SymbolicFile(Binary::ID_IR, Object), Mods(std::move(Mods)) { @@ -32,7 +32,7 @@ IRObjectFile::IRObjectFile(MemoryBufferRef Object, SymTab.addModule(M.get()); } -IRObjectFile::~IRObjectFile() {} +IRObjectFile::~IRObjectFile() = default; static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb) { return *reinterpret_cast(Symb.p); diff --git a/llvm/lib/Object/IRSymtab.cpp b/llvm/lib/Object/IRSymtab.cpp index dea3d90d3560..5a7ecdb1fc25 100644 --- a/llvm/lib/Object/IRSymtab.cpp +++ b/llvm/lib/Object/IRSymtab.cpp @@ -24,7 +24,6 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/MC/StringTableBuilder.h" -#include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Allocator.h" diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp index 3d95b18f4672..2f463a1bd458 100644 --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -34,7 +34,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/Path.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" @@ -1303,7 +1303,6 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, } const char *DyldIdLoadCmd = nullptr; - const char *FuncStartsLoadCmd = nullptr; const char *SplitInfoLoadCmd = nullptr; const char *CodeSignDrsLoadCmd = nullptr; const char *CodeSignLoadCmd = nullptr; @@ -1381,6 +1380,11 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd, "LC_DYLD_INFO_ONLY", Elements))) return; + } else if (Load.C.cmd == MachO::LC_DYLD_CHAINED_FIXUPS) { + if ((Err = checkLinkeditDataCommand( + *this, Load, I, &DyldChainedFixupsLoadCmd, + "LC_DYLD_CHAINED_FIXUPS", Elements, "chained fixups"))) + return; } else if (Load.C.cmd == MachO::LC_UUID) { if (Load.C.cmdsize != sizeof(MachO::uuid_command)) { Err = malformedError("LC_UUID command " + Twine(I) + " has incorrect " @@ -1596,9 +1600,9 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, return; // Note: LC_TWOLEVEL_HINTS is really obsolete and is not supported. } else if (Load.C.cmd == MachO::LC_TWOLEVEL_HINTS) { - if ((Err = checkTwoLevelHintsCommand(*this, Load, I, - &TwoLevelHintsLoadCmd, Elements))) - return; + if ((Err = checkTwoLevelHintsCommand(*this, Load, I, + &TwoLevelHintsLoadCmd, Elements))) + return; } else if (Load.C.cmd == MachO::LC_IDENT) { // Note: LC_IDENT is ignored. continue; @@ -2993,7 +2997,9 @@ void ExportEntry::pushNode(uint64_t offset) { return; } if (O != nullptr) { - if (State.Other > O->getLibraryCount()) { + // Only positive numbers represent library ordinals. Zero and negative + // numbers have special meaning (see BindSpecialDylib). + if ((int64_t)State.Other > 0 && State.Other > O->getLibraryCount()) { *E = malformedError( "bad library ordinal: " + Twine((int)State.Other) + " (max " + Twine((int)O->getLibraryCount()) + @@ -3186,6 +3192,106 @@ iterator_range MachOObjectFile::exports(Error &Err) const { return exports(Err, getDyldInfoExportsTrie(), this); } +MachOAbstractFixupEntry::MachOAbstractFixupEntry(Error *E, + const MachOObjectFile *O) + : E(E), O(O) { + // Cache the vmaddress of __TEXT + for (const auto &Command : O->load_commands()) { + if (Command.C.cmd == MachO::LC_SEGMENT) { + MachO::segment_command SLC = O->getSegmentLoadCommand(Command); + if (StringRef(SLC.segname) == StringRef("__TEXT")) { + TextAddress = SLC.vmaddr; + break; + } + } else if (Command.C.cmd == MachO::LC_SEGMENT_64) { + MachO::segment_command_64 SLC_64 = O->getSegment64LoadCommand(Command); + if (StringRef(SLC_64.segname) == StringRef("__TEXT")) { + TextAddress = SLC_64.vmaddr; + break; + } + } + } +} + +int32_t MachOAbstractFixupEntry::segmentIndex() const { return SegmentIndex; } + +uint64_t MachOAbstractFixupEntry::segmentOffset() const { + return SegmentOffset; +} + +uint64_t MachOAbstractFixupEntry::segmentAddress() const { + return O->BindRebaseAddress(SegmentIndex, 0); +} + +StringRef MachOAbstractFixupEntry::segmentName() const { + return O->BindRebaseSegmentName(SegmentIndex); +} + +StringRef MachOAbstractFixupEntry::sectionName() const { + return O->BindRebaseSectionName(SegmentIndex, SegmentOffset); +} + +uint64_t MachOAbstractFixupEntry::address() const { + return O->BindRebaseAddress(SegmentIndex, SegmentOffset); +} + +StringRef MachOAbstractFixupEntry::symbolName() const { return SymbolName; } + +int64_t MachOAbstractFixupEntry::addend() const { return Addend; } + +uint32_t MachOAbstractFixupEntry::flags() const { return Flags; } + +int MachOAbstractFixupEntry::ordinal() const { return Ordinal; } + +StringRef MachOAbstractFixupEntry::typeName() const { return "unknown"; } + +void MachOAbstractFixupEntry::moveToFirst() { + SegmentOffset = 0; + SegmentIndex = -1; + Ordinal = 0; + Flags = 0; + Addend = 0; + Done = false; +} + +void MachOAbstractFixupEntry::moveToEnd() { Done = true; } + +MachOChainedFixupEntry::MachOChainedFixupEntry(Error *E, + const MachOObjectFile *O, + bool Parse) + : MachOAbstractFixupEntry(E, O) { + ErrorAsOutParameter e(E); + if (!Parse) + return; + if (auto FixupTargetsOrErr = O->getDyldChainedFixupTargets()) + FixupTargets = *FixupTargetsOrErr; + else { + *E = FixupTargetsOrErr.takeError(); + return; + } +} + +void MachOChainedFixupEntry::moveToFirst() { + MachOAbstractFixupEntry::moveToFirst(); + FixupIndex = 0; + moveNext(); +} + +void MachOChainedFixupEntry::moveToEnd() { + MachOAbstractFixupEntry::moveToEnd(); +} + +void MachOChainedFixupEntry::moveNext() { Done = true; } + +bool MachOChainedFixupEntry::operator==( + const MachOChainedFixupEntry &Other) const { + if (Done == Other.Done) + return true; + if ((FixupIndex == Other.FixupIndex)) + return true; + return false; +} + MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O, ArrayRef Bytes, bool is64Bit) : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), @@ -4194,6 +4300,16 @@ iterator_range MachOObjectFile::weakBindTable(Error &Err) { MachOBindEntry::Kind::Weak); } +iterator_range MachOObjectFile::fixupTable(Error &Err) { + MachOChainedFixupEntry Start(&Err, this, true); + Start.moveToFirst(); + + MachOChainedFixupEntry Finish(&Err, this, false); + Finish.moveToEnd(); + + return make_range(fixup_iterator(Start), fixup_iterator(Finish)); +} + MachOObjectFile::load_command_iterator MachOObjectFile::begin_load_commands() const { return LoadCommands.begin(); @@ -4649,6 +4765,72 @@ ArrayRef MachOObjectFile::getDyldInfoLazyBindOpcodes() const { return makeArrayRef(Ptr, DyldInfo.lazy_bind_size); } +Expected> +MachOObjectFile::getChainedFixupsHeader() const { + // Load the dyld chained fixups load command. + if (!DyldChainedFixupsLoadCmd) + return llvm::None; + auto DyldChainedFixupsOrErr = getStructOrErr( + *this, DyldChainedFixupsLoadCmd); + if (!DyldChainedFixupsOrErr) + return DyldChainedFixupsOrErr.takeError(); + MachO::linkedit_data_command DyldChainedFixups = DyldChainedFixupsOrErr.get(); + + // If the load command is present but the data offset has been zeroed out, + // as is the case for dylib stubs, return None (no error). + uint64_t CFHeaderOffset = DyldChainedFixups.dataoff; + if (CFHeaderOffset == 0) + return DyldChainedFixupsOrErr.takeError(); + + // Load the dyld chained fixups header. + const char *CFHeaderPtr = getPtr(*this, CFHeaderOffset); + auto CFHeaderOrErr = + getStructOrErr(*this, CFHeaderPtr); + if (!CFHeaderOrErr) + return CFHeaderOrErr.takeError(); + MachO::dyld_chained_fixups_header CFHeader = CFHeaderOrErr.get(); + + // Reject unknown chained fixup formats. + if (CFHeader.fixups_version != 0) + return malformedError(Twine("bad chained fixups: unknown version: ") + + Twine(CFHeader.fixups_version)); + if (CFHeader.imports_format < 1 || CFHeader.imports_format > 3) + return malformedError( + Twine("bad chained fixups: unknown imports format: ") + + Twine(CFHeader.imports_format)); + + // Validate the image format. + // + // Load the image starts. + uint64_t CFImageStartsOffset = (CFHeaderOffset + CFHeader.starts_offset); + if (CFHeader.starts_offset < sizeof(MachO::dyld_chained_fixups_header)) { + return malformedError(Twine("bad chained fixups: image starts offset ") + + Twine(CFHeader.starts_offset) + + " overlaps with chained fixups header"); + } + uint32_t EndOffset = DyldChainedFixups.dataoff + DyldChainedFixups.datasize; + if (CFImageStartsOffset + sizeof(MachO::dyld_chained_starts_in_image) > + EndOffset) { + return malformedError(Twine("bad chained fixups: image starts end ") + + Twine(CFImageStartsOffset + + sizeof(MachO::dyld_chained_starts_in_image)) + + " extends past end " + Twine(EndOffset)); + } + + return CFHeader; +} + +Expected> +MachOObjectFile::getDyldChainedFixupTargets() const { + auto CFHeaderOrErr = getChainedFixupsHeader(); + if (!CFHeaderOrErr) + return CFHeaderOrErr.takeError(); + std::vector Targets; + if (!(*CFHeaderOrErr)) + return Targets; + return Targets; +} + ArrayRef MachOObjectFile::getDyldInfoExportsTrie() const { if (!DyldInfoLoadCmd) return None; @@ -4663,6 +4845,21 @@ ArrayRef MachOObjectFile::getDyldInfoExportsTrie() const { return makeArrayRef(Ptr, DyldInfo.export_size); } +SmallVector MachOObjectFile::getFunctionStarts() const { + if (!FuncStartsLoadCmd) + return {}; + + auto InfoOrErr = + getStructOrErr(*this, FuncStartsLoadCmd); + if (!InfoOrErr) + return {}; + + MachO::linkedit_data_command Info = InfoOrErr.get(); + SmallVector FunctionStarts; + this->ReadULEB128s(Info.dataoff, FunctionStarts); + return std::move(FunctionStarts); +} + ArrayRef MachOObjectFile::getUuid() const { if (!UuidLoadCmd) return None; @@ -4778,3 +4975,23 @@ MachOObjectFile::mapReflectionSectionNameToEnumValue( .Default(llvm::binaryformat::Swift5ReflectionSectionKind::unknown); #undef HANDLE_SWIFT_SECTION } + +bool MachOObjectFile::isMachOPairedReloc(uint64_t RelocType, uint64_t Arch) { + switch (Arch) { + case Triple::x86: + return RelocType == MachO::GENERIC_RELOC_SECTDIFF || + RelocType == MachO::GENERIC_RELOC_LOCAL_SECTDIFF; + case Triple::x86_64: + return RelocType == MachO::X86_64_RELOC_SUBTRACTOR; + case Triple::arm: + case Triple::thumb: + return RelocType == MachO::ARM_RELOC_SECTDIFF || + RelocType == MachO::ARM_RELOC_LOCAL_SECTDIFF || + RelocType == MachO::ARM_RELOC_HALF || + RelocType == MachO::ARM_RELOC_HALF_SECTDIFF; + case Triple::aarch64: + return RelocType == MachO::ARM64_RELOC_SUBTRACTOR; + default: + return false; + } +} diff --git a/llvm/lib/Object/MachOUniversal.cpp b/llvm/lib/Object/MachOUniversal.cpp index f3ce005e6ef9..c2c2b67814dc 100644 --- a/llvm/lib/Object/MachOUniversal.cpp +++ b/llvm/lib/Object/MachOUniversal.cpp @@ -15,9 +15,9 @@ #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/type_traits.h" using namespace llvm; using namespace object; diff --git a/llvm/lib/Object/MachOUniversalWriter.cpp b/llvm/lib/Object/MachOUniversalWriter.cpp index ae1ff09a4f8f..333706baf8c1 100644 --- a/llvm/lib/Object/MachOUniversalWriter.cpp +++ b/llvm/lib/Object/MachOUniversalWriter.cpp @@ -12,13 +12,21 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/MachOUniversalWriter.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/Object/Archive.h" #include "llvm/Object/Binary.h" -#include "llvm/Object/Error.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/MachOUniversal.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBufferRef.h" +#include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace object; @@ -205,7 +213,7 @@ Expected Slice::create(const Archive &A, LLVMContext *LLVMCtx) { .c_str()); if (MFO) { - Slice ArchiveSlice(*(MFO.get()), MFO->is64Bit() ? 3 : 2); + Slice ArchiveSlice(*(MFO), MFO->is64Bit() ? 3 : 2); ArchiveSlice.B = &A; return ArchiveSlice; } diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp index 954d1f09f4e9..11274a7fcc16 100644 --- a/llvm/lib/Object/ModuleSymbolTable.cpp +++ b/llvm/lib/Object/ModuleSymbolTable.cpp @@ -15,7 +15,6 @@ #include "llvm/Object/ModuleSymbolTable.h" #include "RecordStreamer.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" @@ -27,7 +26,6 @@ #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmParser.h" @@ -39,7 +37,6 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SMLoc.h" diff --git a/llvm/lib/Object/Object.cpp b/llvm/lib/Object/Object.cpp index 576eb8d069d6..d5e67160dfa3 100644 --- a/llvm/lib/Object/Object.cpp +++ b/llvm/lib/Object/Object.cpp @@ -120,6 +120,8 @@ LLVMBinaryType LLVMBinaryGetType(LLVMBinaryRef BR) { return LLVMBinaryTypeMachO64L; case ID_MachO64B: return LLVMBinaryTypeMachO64B; + case ID_Offload: + return LLVMBinaryTypeOffload; case ID_Wasm: return LLVMBinaryTypeWasm; case ID_StartObjects: diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp index 6fd02f3b9592..1be8f11751be 100644 --- a/llvm/lib/Object/ObjectFile.cpp +++ b/llvm/lib/Object/ObjectFile.cpp @@ -21,10 +21,9 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" -#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include @@ -147,6 +146,9 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type, case file_magic::pdb: case file_magic::minidump: case file_magic::goff_object: + case file_magic::cuda_fatbinary: + case file_magic::offload_binary: + case file_magic::dxcontainer_object: return errorCodeToError(object_error::invalid_file_type); case file_magic::tapi_file: return errorCodeToError(object_error::invalid_file_type); @@ -198,3 +200,12 @@ ObjectFile::createObjectFile(StringRef ObjectPath) { return OwningBinary(std::move(Obj), std::move(Buffer)); } + +bool ObjectFile::isReflectionSectionStrippable( + llvm::binaryformat::Swift5ReflectionSectionKind ReflectionSectionKind) + const { + using llvm::binaryformat::Swift5ReflectionSectionKind; + return ReflectionSectionKind == Swift5ReflectionSectionKind::fieldmd || + ReflectionSectionKind == Swift5ReflectionSectionKind::reflstr || + ReflectionSectionKind == Swift5ReflectionSectionKind::assocty; +} diff --git a/llvm/lib/Object/OffloadBinary.cpp b/llvm/lib/Object/OffloadBinary.cpp new file mode 100644 index 000000000000..21946ec2d6fb --- /dev/null +++ b/llvm/lib/Object/OffloadBinary.cpp @@ -0,0 +1,164 @@ +//===- Offloading.cpp - Utilities for handling offloading code -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/OffloadBinary.h" + +#include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/FileOutputBuffer.h" + +using namespace llvm; +using namespace llvm::object; + +Expected> +OffloadBinary::create(MemoryBufferRef Buf) { + if (Buf.getBufferSize() < sizeof(Header) + sizeof(Entry)) + return errorCodeToError(object_error::parse_failed); + + // Check for 0x10FF1OAD magic bytes. + if (identify_magic(Buf.getBuffer()) != file_magic::offload_binary) + return errorCodeToError(object_error::parse_failed); + + // Make sure that the data has sufficient alignment. + if (!isAddrAligned(Align(getAlignment()), Buf.getBufferStart())) + return errorCodeToError(object_error::parse_failed); + + const char *Start = Buf.getBufferStart(); + const Header *TheHeader = reinterpret_cast(Start); + if (TheHeader->Version != OffloadBinary::Version) + return errorCodeToError(object_error::parse_failed); + + if (TheHeader->Size > Buf.getBufferSize() || + TheHeader->EntryOffset > TheHeader->Size - sizeof(Entry) || + TheHeader->EntrySize > TheHeader->Size - sizeof(Header)) + return errorCodeToError(object_error::unexpected_eof); + + const Entry *TheEntry = + reinterpret_cast(&Start[TheHeader->EntryOffset]); + + if (TheEntry->ImageOffset > Buf.getBufferSize() || + TheEntry->StringOffset > Buf.getBufferSize()) + return errorCodeToError(object_error::unexpected_eof); + + return std::unique_ptr( + new OffloadBinary(Buf, TheHeader, TheEntry)); +} + +std::unique_ptr +OffloadBinary::write(const OffloadingImage &OffloadingData) { + // Create a null-terminated string table with all the used strings. + StringTableBuilder StrTab(StringTableBuilder::ELF); + for (auto &KeyAndValue : OffloadingData.StringData) { + StrTab.add(KeyAndValue.getKey()); + StrTab.add(KeyAndValue.getValue()); + } + StrTab.finalize(); + + uint64_t StringEntrySize = + sizeof(StringEntry) * OffloadingData.StringData.size(); + + // Make sure the image we're wrapping around is aligned as well. + uint64_t BinaryDataSize = alignTo(sizeof(Header) + sizeof(Entry) + + StringEntrySize + StrTab.getSize(), + getAlignment()); + + // Create the header and fill in the offsets. The entry will be directly + // placed after the header in memory. Align the size to the alignment of the + // header so this can be placed contiguously in a single section. + Header TheHeader; + TheHeader.Size = alignTo( + BinaryDataSize + OffloadingData.Image->getBufferSize(), getAlignment()); + TheHeader.EntryOffset = sizeof(Header); + TheHeader.EntrySize = sizeof(Entry); + + // Create the entry using the string table offsets. The string table will be + // placed directly after the entry in memory, and the image after that. + Entry TheEntry; + TheEntry.TheImageKind = OffloadingData.TheImageKind; + TheEntry.TheOffloadKind = OffloadingData.TheOffloadKind; + TheEntry.Flags = OffloadingData.Flags; + TheEntry.StringOffset = sizeof(Header) + sizeof(Entry); + TheEntry.NumStrings = OffloadingData.StringData.size(); + + TheEntry.ImageOffset = BinaryDataSize; + TheEntry.ImageSize = OffloadingData.Image->getBufferSize(); + + SmallVector Data; + Data.reserve(TheHeader.Size); + raw_svector_ostream OS(Data); + OS << StringRef(reinterpret_cast(&TheHeader), sizeof(Header)); + OS << StringRef(reinterpret_cast(&TheEntry), sizeof(Entry)); + for (auto &KeyAndValue : OffloadingData.StringData) { + uint64_t Offset = sizeof(Header) + sizeof(Entry) + StringEntrySize; + StringEntry Map{Offset + StrTab.getOffset(KeyAndValue.getKey()), + Offset + StrTab.getOffset(KeyAndValue.getValue())}; + OS << StringRef(reinterpret_cast(&Map), sizeof(StringEntry)); + } + StrTab.write(OS); + // Add padding to required image alignment. + OS.write_zeros(TheEntry.ImageOffset - OS.tell()); + OS << OffloadingData.Image->getBuffer(); + + // Add final padding to required alignment. + assert(TheHeader.Size >= OS.tell() && "Too much data written?"); + OS.write_zeros(TheHeader.Size - OS.tell()); + assert(TheHeader.Size == OS.tell() && "Size mismatch"); + + return MemoryBuffer::getMemBufferCopy(OS.str()); +} + +OffloadKind object::getOffloadKind(StringRef Name) { + return llvm::StringSwitch(Name) + .Case("openmp", OFK_OpenMP) + .Case("cuda", OFK_Cuda) + .Case("hip", OFK_HIP) + .Default(OFK_None); +} + +StringRef object::getOffloadKindName(OffloadKind Kind) { + switch (Kind) { + case OFK_OpenMP: + return "openmp"; + case OFK_Cuda: + return "cuda"; + case OFK_HIP: + return "hip"; + default: + return "none"; + } +} + +ImageKind object::getImageKind(StringRef Name) { + return llvm::StringSwitch(Name) + .Case("o", IMG_Object) + .Case("bc", IMG_Bitcode) + .Case("cubin", IMG_Cubin) + .Case("fatbin", IMG_Fatbinary) + .Case("s", IMG_PTX) + .Default(IMG_None); +} + +StringRef object::getImageKindName(ImageKind Kind) { + switch (Kind) { + case IMG_Object: + return "o"; + case IMG_Bitcode: + return "bc"; + case IMG_Cubin: + return "cubin"; + case IMG_Fatbinary: + return "fatbin"; + case IMG_PTX: + return "s"; + default: + return ""; + } +} diff --git a/llvm/lib/Object/RecordStreamer.h b/llvm/lib/Object/RecordStreamer.h index 957d80f33bf4..5c6541e5052d 100644 --- a/llvm/lib/Object/RecordStreamer.h +++ b/llvm/lib/Object/RecordStreamer.h @@ -57,10 +57,10 @@ public: // Ignore COFF-specific directives; we do not need any information from them, // but the default implementation of these methods crashes, so we override // them with versions that do nothing. - void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {} - void EmitCOFFSymbolStorageClass(int StorageClass) override {} - void EmitCOFFSymbolType(int Type) override {} - void EndCOFFSymbolDef() override {} + void beginCOFFSymbolDef(const MCSymbol *Symbol) override {} + void emitCOFFSymbolStorageClass(int StorageClass) override {} + void emitCOFFSymbolType(int Type) override {} + void endCOFFSymbolDef() override {} /// Record .symver aliases for later processing. void emitELFSymverDirective(const MCSymbol *OriginalSym, StringRef Name, diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp index 00a45e2c5d4e..e14301663df3 100644 --- a/llvm/lib/Object/RelocationResolver.cpp +++ b/llvm/lib/Object/RelocationResolver.cpp @@ -11,6 +11,21 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/RelocationResolver.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include namespace llvm { namespace object { @@ -63,6 +78,7 @@ static bool supportsAArch64(uint64_t Type) { switch (Type) { case ELF::R_AARCH64_ABS32: case ELF::R_AARCH64_ABS64: + case ELF::R_AARCH64_PREL16: case ELF::R_AARCH64_PREL32: case ELF::R_AARCH64_PREL64: return true; @@ -78,6 +94,8 @@ static uint64_t resolveAArch64(uint64_t Type, uint64_t Offset, uint64_t S, return (S + Addend) & 0xFFFFFFFF; case ELF::R_AARCH64_ABS64: return S + Addend; + case ELF::R_AARCH64_PREL16: + return (S + Addend - Offset) & 0xFFFF; case ELF::R_AARCH64_PREL32: return (S + Addend - Offset) & 0xFFFFFFFF; case ELF::R_AARCH64_PREL64: @@ -468,6 +486,31 @@ static uint64_t resolveRISCV(uint64_t Type, uint64_t Offset, uint64_t S, } } +static bool supportsCSKY(uint64_t Type) { + switch (Type) { + case ELF::R_CKCORE_NONE: + case ELF::R_CKCORE_ADDR32: + case ELF::R_CKCORE_PCREL32: + return true; + default: + return false; + } +} + +static uint64_t resolveCSKY(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t Addend) { + switch (Type) { + case ELF::R_CKCORE_NONE: + return LocData; + case ELF::R_CKCORE_ADDR32: + return (S + Addend) & 0xFFFFFFFF; + case ELF::R_CKCORE_PCREL32: + return (S + Addend - Offset) & 0xFFFFFFFF; + default: + llvm_unreachable("Invalid relocation type"); + } +} + static bool supportsCOFFX86(uint64_t Type) { switch (Type) { case COFF::IMAGE_REL_I386_SECREL: @@ -715,6 +758,8 @@ getRelocationResolver(const ObjectFile &Obj) { return {supportsHexagon, resolveHexagon}; case Triple::riscv32: return {supportsRISCV, resolveRISCV}; + case Triple::csky: + return {supportsCSKY, resolveCSKY}; default: return {nullptr, nullptr}; } diff --git a/llvm/lib/Object/SymbolicFile.cpp b/llvm/lib/Object/SymbolicFile.cpp index 58db5b672914..05f47cfbf2ff 100644 --- a/llvm/lib/Object/SymbolicFile.cpp +++ b/llvm/lib/Object/SymbolicFile.cpp @@ -17,18 +17,17 @@ #include "llvm/Object/Error.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ErrorOr.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" -#include #include using namespace llvm; using namespace object; +namespace llvm { +class LLVMContext; +} + SymbolicFile::SymbolicFile(unsigned int Type, MemoryBufferRef Source) : Binary(Type, Source) {} diff --git a/llvm/lib/Object/TapiFile.cpp b/llvm/lib/Object/TapiFile.cpp index 83568e8d823a..596445a09e85 100644 --- a/llvm/lib/Object/TapiFile.cpp +++ b/llvm/lib/Object/TapiFile.cpp @@ -12,8 +12,12 @@ #include "llvm/Object/TapiFile.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Object/Error.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" +#include "llvm/TextAPI/ArchitectureSet.h" +#include "llvm/TextAPI/InterfaceFile.h" +#include "llvm/TextAPI/Platform.h" #include "llvm/TextAPI/Symbol.h" using namespace llvm; diff --git a/llvm/lib/Object/TapiUniversal.cpp b/llvm/lib/Object/TapiUniversal.cpp index d73d93f6bd53..bf96b57f0321 100644 --- a/llvm/lib/Object/TapiUniversal.cpp +++ b/llvm/lib/Object/TapiUniversal.cpp @@ -13,7 +13,8 @@ #include "llvm/Object/TapiUniversal.h" #include "llvm/ADT/StringRef.h" #include "llvm/Object/Error.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Object/TapiFile.h" +#include "llvm/TextAPI/ArchitectureSet.h" #include "llvm/TextAPI/TextAPIReader.h" using namespace llvm; @@ -47,7 +48,7 @@ TapiUniversal::~TapiUniversal() = default; Expected> TapiUniversal::ObjectForArch::getAsObjectFile() const { return std::unique_ptr(new TapiFile(Parent->getMemoryBufferRef(), - *Parent->ParsedFile.get(), + *Parent->ParsedFile, Parent->Libraries[Index].Arch)); } diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index 6a19b159f3d5..ce816b097691 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -8,7 +8,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -30,7 +29,6 @@ #include #include #include -#include #define DEBUG_TYPE "wasm-object" @@ -166,23 +164,25 @@ static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx) { static Error readInitExpr(wasm::WasmInitExpr &Expr, WasmObjectFile::ReadContext &Ctx) { - Expr.Opcode = readOpcode(Ctx); + auto Start = Ctx.Ptr; - switch (Expr.Opcode) { + Expr.Extended = false; + Expr.Inst.Opcode = readOpcode(Ctx); + switch (Expr.Inst.Opcode) { case wasm::WASM_OPCODE_I32_CONST: - Expr.Value.Int32 = readVarint32(Ctx); + Expr.Inst.Value.Int32 = readVarint32(Ctx); break; case wasm::WASM_OPCODE_I64_CONST: - Expr.Value.Int64 = readVarint64(Ctx); + Expr.Inst.Value.Int64 = readVarint64(Ctx); break; case wasm::WASM_OPCODE_F32_CONST: - Expr.Value.Float32 = readFloat32(Ctx); + Expr.Inst.Value.Float32 = readFloat32(Ctx); break; case wasm::WASM_OPCODE_F64_CONST: - Expr.Value.Float64 = readFloat64(Ctx); + Expr.Inst.Value.Float64 = readFloat64(Ctx); break; case wasm::WASM_OPCODE_GLOBAL_GET: - Expr.Value.Global = readULEB128(Ctx); + Expr.Inst.Value.Global = readULEB128(Ctx); break; case wasm::WASM_OPCODE_REF_NULL: { wasm::ValType Ty = static_cast(readULEB128(Ctx)); @@ -193,15 +193,46 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr, break; } default: - return make_error("invalid opcode in init_expr", - object_error::parse_failed); + Expr.Extended = true; } - uint8_t EndOpcode = readOpcode(Ctx); - if (EndOpcode != wasm::WASM_OPCODE_END) { - return make_error("invalid init_expr", - object_error::parse_failed); + if (!Expr.Extended) { + uint8_t EndOpcode = readOpcode(Ctx); + if (EndOpcode != wasm::WASM_OPCODE_END) + Expr.Extended = true; + } + + if (Expr.Extended) { + Ctx.Ptr = Start; + while (1) { + uint8_t Opcode = readOpcode(Ctx); + switch (Opcode) { + case wasm::WASM_OPCODE_I32_CONST: + case wasm::WASM_OPCODE_GLOBAL_GET: + case wasm::WASM_OPCODE_REF_NULL: + case wasm::WASM_OPCODE_I64_CONST: + case wasm::WASM_OPCODE_F32_CONST: + case wasm::WASM_OPCODE_F64_CONST: + readULEB128(Ctx); + break; + case wasm::WASM_OPCODE_I32_ADD: + case wasm::WASM_OPCODE_I32_SUB: + case wasm::WASM_OPCODE_I32_MUL: + case wasm::WASM_OPCODE_I64_ADD: + case wasm::WASM_OPCODE_I64_SUB: + case wasm::WASM_OPCODE_I64_MUL: + break; + case wasm::WASM_OPCODE_END: + Expr.Body = ArrayRef(Start, Ctx.Ptr - Start); + return Error::success(); + default: + return make_error( + Twine("invalid opcode in init_expr: ") + Twine(unsigned(Opcode)), + object_error::parse_failed); + } + } } + return Error::success(); } @@ -420,10 +451,6 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) { llvm::DenseSet SeenFunctions; llvm::DenseSet SeenGlobals; llvm::DenseSet SeenSegments; - if (Functions.size() && !SeenCodeSection) { - return make_error("names must come after code section", - object_error::parse_failed); - } while (Ctx.Ptr < Ctx.End) { uint8_t Type = readUint8(Ctx); @@ -443,7 +470,7 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) { return make_error( "function named more than once", object_error::parse_failed); if (!isValidFunctionIndex(Index) || Name.empty()) - return make_error("invalid name entry", + return make_error("invalid function name entry", object_error::parse_failed); if (isDefinedFunctionIndex(Index)) @@ -454,7 +481,7 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) { return make_error("global named more than once", object_error::parse_failed); if (!isValidGlobalIndex(Index) || Name.empty()) - return make_error("invalid name entry", + return make_error("invalid global name entry", object_error::parse_failed); } else { nameType = wasm::NameType::DATA_SEGMENT; @@ -462,7 +489,7 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) { return make_error( "segment named more than once", object_error::parse_failed); if (Index > DataSegments.size()) - return make_error("invalid named data segment", + return make_error("invalid data segment name entry", object_error::parse_failed); } DebugNames.push_back(wasm::WasmDebugName{nameType, Index, Name}); @@ -488,11 +515,6 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) { Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) { HasLinkingSection = true; - if (Functions.size() && !SeenCodeSection) { - return make_error( - "linking data must come after code section", - object_error::parse_failed); - } LinkingData.Version = readVaruint32(Ctx); if (LinkingData.Version != wasm::WasmMetadataVersion) { @@ -1379,7 +1401,6 @@ Error WasmObjectFile::parseStartSection(ReadContext &Ctx) { } Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) { - SeenCodeSection = true; CodeSection = Sections.size(); uint32_t FunctionCount = readVaruint32(Ctx); if (FunctionCount != Functions.size()) { @@ -1443,8 +1464,9 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) { object_error::parse_failed); if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_PASSIVE) { - Segment.Offset.Opcode = wasm::WASM_OPCODE_I32_CONST; - Segment.Offset.Value.Int32 = 0; + Segment.Offset.Extended = false; + Segment.Offset.Inst.Opcode = wasm::WASM_OPCODE_I32_CONST; + Segment.Offset.Inst.Value.Int32 = 0; } else { if (Error Err = readInitExpr(Segment.Offset, Ctx)) return Err; @@ -1488,7 +1510,7 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) { Error WasmObjectFile::parseDataSection(ReadContext &Ctx) { DataSection = Sections.size(); uint32_t Count = readVaruint32(Ctx); - if (DataCount && Count != DataCount.getValue()) + if (DataCount && Count != *DataCount) return make_error( "number of data segments does not match DataCount section"); DataSegments.reserve(Count); @@ -1503,8 +1525,9 @@ Error WasmObjectFile::parseDataSection(ReadContext &Ctx) { if (Error Err = readInitExpr(Segment.Data.Offset, Ctx)) return Err; } else { - Segment.Data.Offset.Opcode = wasm::WASM_OPCODE_I32_CONST; - Segment.Data.Offset.Value.Int32 = 0; + Segment.Data.Offset.Extended = false; + Segment.Data.Offset.Inst.Opcode = wasm::WASM_OPCODE_I32_CONST; + Segment.Data.Offset.Inst.Value.Int32 = 0; } uint32_t Size = readVaruint32(Ctx); if (Size > (size_t)(Ctx.End - Ctx.Ptr)) @@ -1602,10 +1625,12 @@ uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const { // offset within the segment. uint32_t SegmentIndex = Sym.Info.DataRef.Segment; const wasm::WasmDataSegment &Segment = DataSegments[SegmentIndex].Data; - if (Segment.Offset.Opcode == wasm::WASM_OPCODE_I32_CONST) { - return Segment.Offset.Value.Int32 + Sym.Info.DataRef.Offset; - } else if (Segment.Offset.Opcode == wasm::WASM_OPCODE_I64_CONST) { - return Segment.Offset.Value.Int64 + Sym.Info.DataRef.Offset; + if (Segment.Offset.Extended) { + llvm_unreachable("extended init exprs not supported"); + } else if (Segment.Offset.Inst.Opcode == wasm::WASM_OPCODE_I32_CONST) { + return Segment.Offset.Inst.Value.Int32 + Sym.Info.DataRef.Offset; + } else if (Segment.Offset.Inst.Opcode == wasm::WASM_OPCODE_I64_CONST) { + return Segment.Offset.Inst.Value.Int64 + Sym.Info.DataRef.Offset; } else { llvm_unreachable("unknown init expr opcode"); } @@ -1692,29 +1717,11 @@ void WasmObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; } Expected WasmObjectFile::getSectionName(DataRefImpl Sec) const { const WasmSection &S = Sections[Sec.d.a]; -#define ECase(X) \ - case wasm::WASM_SEC_##X: \ - return #X; - switch (S.Type) { - ECase(TYPE); - ECase(IMPORT); - ECase(FUNCTION); - ECase(TABLE); - ECase(MEMORY); - ECase(GLOBAL); - ECase(TAG); - ECase(EXPORT); - ECase(START); - ECase(ELEM); - ECase(CODE); - ECase(DATA); - ECase(DATACOUNT); - case wasm::WASM_SEC_CUSTOM: + if (S.Type == wasm::WASM_SEC_CUSTOM) return S.Name; - default: + if (S.Type > wasm::WASM_SEC_LAST_KNOWN) return createStringError(object_error::invalid_section_index, ""); - } -#undef ECase + return wasm::sectionTypeToString(S.Type); } uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; } diff --git a/llvm/lib/Object/WindowsResource.cpp b/llvm/lib/Object/WindowsResource.cpp index 2a69c6c46b59..d50f149629c3 100644 --- a/llvm/lib/Object/WindowsResource.cpp +++ b/llvm/lib/Object/WindowsResource.cpp @@ -12,13 +12,11 @@ #include "llvm/Object/WindowsResource.h" #include "llvm/Object/COFF.h" -#include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ScopedPrinter.h" #include #include -#include using namespace llvm; using namespace object; diff --git a/llvm/lib/Object/XCOFFObjectFile.cpp b/llvm/lib/Object/XCOFFObjectFile.cpp index f2f6d700ddd8..ff39fe1794c0 100644 --- a/llvm/lib/Object/XCOFFObjectFile.cpp +++ b/llvm/lib/Object/XCOFFObjectFile.cpp @@ -615,6 +615,16 @@ Expected XCOFFObjectFile::getSymbolFlags(DataRefImpl Symb) const { if (XCOFFSym.getSectionNumber() == XCOFF::N_UNDEF) Result |= SymbolRef::SF_Undefined; + // There is no visibility in old 32 bit XCOFF object file interpret. + if (is64Bit() || (auxiliaryHeader32() && (auxiliaryHeader32()->getVersion() == + NEW_XCOFF_INTERPRET))) { + uint16_t SymType = XCOFFSym.getSymbolType(); + if ((SymType & VISIBILITY_MASK) == SYM_V_HIDDEN) + Result |= SymbolRef::SF_Hidden; + + if ((SymType & VISIBILITY_MASK) == SYM_V_EXPORTED) + Result |= SymbolRef::SF_Exported; + } return Result; } @@ -699,6 +709,19 @@ bool XCOFFObjectFile::is64Bit() const { return Binary::ID_XCOFF64 == getType(); } +Expected XCOFFObjectFile::getRawData(const char *Start, + uint64_t Size, + StringRef Name) const { + uintptr_t StartPtr = reinterpret_cast(Start); + // TODO: this path is untested. + if (Error E = Binary::checkOffset(Data, StartPtr, Size)) + return createError(toString(std::move(E)) + ": " + Name.data() + + " data with offset 0x" + Twine::utohexstr(StartPtr) + + " and size 0x" + Twine::utohexstr(Size) + + " goes past the end of the file"); + return StringRef(Start, Size); +} + uint16_t XCOFFObjectFile::getMagic() const { return is64Bit() ? fileHeader64()->Magic : fileHeader32()->Magic; } @@ -1319,7 +1342,7 @@ XCOFFTracebackTable::XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size, NumOfCtlAnchors = DE.getU32(Cur); if (Cur && NumOfCtlAnchors) { SmallVector Disp; - Disp.reserve(NumOfCtlAnchors.getValue()); + Disp.reserve(*NumOfCtlAnchors); for (uint32_t I = 0; I < NumOfCtlAnchors && Cur; ++I) Disp.push_back(DE.getU32(Cur)); if (Cur) @@ -1346,7 +1369,7 @@ XCOFFTracebackTable::XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size, return; } VecExt = TBVecExtOrErr.get(); - VectorParmsNum = VecExt.getValue().getNumberOfVectorParms(); + VectorParmsNum = VecExt->getNumberOfVectorParms(); } } diff --git a/llvm/lib/ObjectYAML/COFFEmitter.cpp b/llvm/lib/ObjectYAML/COFFEmitter.cpp index d884e2fd55cd..72d7db665d0e 100644 --- a/llvm/lib/ObjectYAML/COFFEmitter.cpp +++ b/llvm/lib/ObjectYAML/COFFEmitter.cpp @@ -19,6 +19,7 @@ #include "llvm/Object/COFF.h" #include "llvm/ObjectYAML/ObjectYAML.h" #include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" @@ -45,7 +46,7 @@ struct COFFParser { COFF::MaxNumberOfSections16; } - bool isPE() const { return Obj.OptionalHeader.hasValue(); } + bool isPE() const { return Obj.OptionalHeader.has_value(); } bool is64Bit() const { return Obj.Header.Machine == COFF::IMAGE_FILE_MACHINE_AMD64 || Obj.Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64; @@ -236,7 +237,7 @@ static bool layoutCOFF(COFFParser &CP) { if (S.SectionData.binary_size() == 0) S.SectionData = CodeViewYAML::toDebugT(S.DebugP, CP.Allocator, S.Name); } else if (S.Name == ".debug$H") { - if (S.DebugH.hasValue() && S.SectionData.binary_size() == 0) + if (S.DebugH && S.SectionData.binary_size() == 0) S.SectionData = CodeViewYAML::toDebugH(*S.DebugH, CP.Allocator); } @@ -456,7 +457,7 @@ static bool writeCOFF(COFFParser &CP, raw_ostream &OS) { CP.Obj.OptionalHeader->DataDirectories; uint32_t NumDataDir = sizeof(CP.Obj.OptionalHeader->DataDirectories) / sizeof(Optional); - if (I >= NumDataDir || !DataDirectories[I].hasValue()) { + if (I >= NumDataDir || !DataDirectories[I]) { OS << zeros(uint32_t(0)); OS << zeros(uint32_t(0)); } else { diff --git a/llvm/lib/ObjectYAML/COFFYAML.cpp b/llvm/lib/ObjectYAML/COFFYAML.cpp index 6e5cdce89060..099ddb2b9665 100644 --- a/llvm/lib/ObjectYAML/COFFYAML.cpp +++ b/llvm/lib/ObjectYAML/COFFYAML.cpp @@ -75,6 +75,9 @@ void ScalarEnumerationTraits::enumeration( ECase(IMAGE_FILE_MACHINE_POWERPC); ECase(IMAGE_FILE_MACHINE_POWERPCFP); ECase(IMAGE_FILE_MACHINE_R4000); + ECase(IMAGE_FILE_MACHINE_RISCV32); + ECase(IMAGE_FILE_MACHINE_RISCV64); + ECase(IMAGE_FILE_MACHINE_RISCV128); ECase(IMAGE_FILE_MACHINE_SH3); ECase(IMAGE_FILE_MACHINE_SH3DSP); ECase(IMAGE_FILE_MACHINE_SH4); diff --git a/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp index 6b6a1176628b..b1ad10d425cc 100644 --- a/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp +++ b/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp @@ -25,6 +25,7 @@ #include "llvm/ObjectYAML/YAML.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" +#include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/YAMLTraits.h" #include #include diff --git a/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp index 49b24e21cf60..e4e2b2a6d21a 100644 --- a/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp +++ b/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp @@ -490,7 +490,10 @@ private: Error LeafRecordImpl::fromCodeViewRecord(CVType Type) { MemberRecordConversionVisitor V(Members); - return visitMemberRecordStream(Type.content(), V); + FieldListRecord FieldList; + cantFail(TypeDeserializer::deserializeAs(Type, + FieldList)); + return visitMemberRecordStream(FieldList.Data, V); } CVType LeafRecordImpl::toCodeViewRecord( diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp index eec733c7d7f9..c0e2cdd54f07 100644 --- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp +++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp @@ -423,7 +423,7 @@ Error DWARFYAML::emitDebugInfo(raw_ostream &OS, const DWARFYAML::Data &DI) { std::string EntryBuffer; raw_string_ostream EntryBufferOS(EntryBuffer); - uint64_t AbbrevTableID = Unit.AbbrevTableID.getValueOr(I); + uint64_t AbbrevTableID = Unit.AbbrevTableID.value_or(I); for (const DWARFYAML::Entry &Entry : Unit.Entries) { if (Expected EntryLength = writeDIE(DI, I, AbbrevTableID, Params, Entry, EntryBufferOS, @@ -507,7 +507,7 @@ static void writeExtendedOpcode(const DWARFYAML::LineTableOpcode &Op, for (auto OpByte : Op.UnknownOpcodeData) writeInteger((uint8_t)OpByte, OpBufferOS, IsLittleEndian); } - uint64_t ExtLen = Op.ExtLen.getValueOr(OpBuffer.size()); + uint64_t ExtLen = Op.ExtLen.value_or(OpBuffer.size()); encodeULEB128(ExtLen, OS); OS.write(OpBuffer.data(), OpBuffer.size()); } @@ -582,7 +582,7 @@ Error DWARFYAML::emitDebugLine(raw_ostream &OS, const DWARFYAML::Data &DI) { writeInteger(LineTable.LineRange, BufferOS, DI.IsLittleEndian); std::vector StandardOpcodeLengths = - LineTable.StandardOpcodeLengths.getValueOr( + LineTable.StandardOpcodeLengths.value_or( getStandardOpcodeLengths(LineTable.Version, LineTable.OpcodeBase)); uint8_t OpcodeBase = LineTable.OpcodeBase ? *LineTable.OpcodeBase diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp index 2591bf4d5af4..37116ada9901 100644 --- a/llvm/lib/ObjectYAML/DWARFYAML.cpp +++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp @@ -62,7 +62,7 @@ DWARFYAML::Data::getAbbrevTableInfoByID(uint64_t ID) const { for (auto &AbbrevTable : enumerate(DebugAbbrev)) { // If the abbrev table's ID isn't specified, we use the index as its ID. uint64_t AbbrevTableID = - AbbrevTable.value().ID.getValueOr(AbbrevTable.index()); + AbbrevTable.value().ID.value_or(AbbrevTable.index()); auto It = AbbrevTableInfoMap.insert( {AbbrevTableID, AbbrevTableInfo{/*Index=*/AbbrevTable.index(), /*Offset=*/AbbrevTableOffset}}); diff --git a/llvm/lib/ObjectYAML/DXContainerEmitter.cpp b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp new file mode 100644 index 000000000000..9834b036de90 --- /dev/null +++ b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp @@ -0,0 +1,190 @@ +//===- DXContainerEmitter.cpp - Convert YAML to a DXContainer -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Binary emitter for yaml to DXContainer binary +/// +//===----------------------------------------------------------------------===// + +#include "llvm/BinaryFormat/DXContainer.h" +#include "llvm/ObjectYAML/ObjectYAML.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +class DXContainerWriter { +public: + DXContainerWriter(DXContainerYAML::Object &ObjectFile) + : ObjectFile(ObjectFile) {} + + Error write(raw_ostream &OS); + +private: + DXContainerYAML::Object &ObjectFile; + + Error computePartOffsets(); + Error validatePartOffsets(); + Error validateSize(uint32_t Computed); + + void writeHeader(raw_ostream &OS); + void writeParts(raw_ostream &OS); +}; +} // namespace + +Error DXContainerWriter::validateSize(uint32_t Computed) { + if (!ObjectFile.Header.FileSize) + ObjectFile.Header.FileSize = Computed; + else if (*ObjectFile.Header.FileSize < Computed) + return createStringError(errc::result_out_of_range, + "File size specified is too small."); + return Error::success(); +} + +Error DXContainerWriter::validatePartOffsets() { + if (ObjectFile.Parts.size() != ObjectFile.Header.PartOffsets->size()) + return createStringError( + errc::invalid_argument, + "Mismatch between number of parts and part offsets."); + uint32_t RollingOffset = + sizeof(dxbc::Header) + (ObjectFile.Header.PartCount * sizeof(uint32_t)); + for (auto I : llvm::zip(ObjectFile.Parts, *ObjectFile.Header.PartOffsets)) { + if (RollingOffset > std::get<1>(I)) + return createStringError(errc::invalid_argument, + "Offset mismatch, not enough space for data."); + RollingOffset = + std::get<1>(I) + sizeof(dxbc::PartHeader) + std::get<0>(I).Size; + } + if (Error Err = validateSize(RollingOffset)) + return Err; + + return Error::success(); +} + +Error DXContainerWriter::computePartOffsets() { + if (ObjectFile.Header.PartOffsets) + return validatePartOffsets(); + uint32_t RollingOffset = + sizeof(dxbc::Header) + (ObjectFile.Header.PartCount * sizeof(uint32_t)); + ObjectFile.Header.PartOffsets = std::vector(); + for (const auto &Part : ObjectFile.Parts) { + ObjectFile.Header.PartOffsets->push_back(RollingOffset); + RollingOffset += sizeof(dxbc::PartHeader) + Part.Size; + } + if (Error Err = validateSize(RollingOffset)) + return Err; + + return Error::success(); +} + +void DXContainerWriter::writeHeader(raw_ostream &OS) { + dxbc::Header Header; + memcpy(Header.Magic, "DXBC", 4); + memcpy(Header.FileHash.Digest, ObjectFile.Header.Hash.data(), 16); + Header.Version.Major = ObjectFile.Header.Version.Major; + Header.Version.Minor = ObjectFile.Header.Version.Minor; + Header.FileSize = *ObjectFile.Header.FileSize; + Header.PartCount = ObjectFile.Parts.size(); + if (sys::IsBigEndianHost) + Header.swapBytes(); + OS.write(reinterpret_cast(&Header), sizeof(Header)); + SmallVector Offsets(ObjectFile.Header.PartOffsets->begin(), + ObjectFile.Header.PartOffsets->end()); + if (sys::IsBigEndianHost) + for (auto &O : Offsets) + sys::swapByteOrder(O); + OS.write(reinterpret_cast(Offsets.data()), + Offsets.size() * sizeof(uint32_t)); +} + +void DXContainerWriter::writeParts(raw_ostream &OS) { + uint32_t RollingOffset = + sizeof(dxbc::Header) + (ObjectFile.Header.PartCount * sizeof(uint32_t)); + for (auto I : llvm::zip(ObjectFile.Parts, *ObjectFile.Header.PartOffsets)) { + if (RollingOffset < std::get<1>(I)) { + uint32_t PadBytes = std::get<1>(I) - RollingOffset; + OS.write_zeros(PadBytes); + } + DXContainerYAML::Part P = std::get<0>(I); + OS.write(P.Name.c_str(), 4); + if (sys::IsBigEndianHost) + sys::swapByteOrder(P.Size); + OS.write(reinterpret_cast(&P.Size), sizeof(uint32_t)); + RollingOffset = std::get<1>(I) + sizeof(dxbc::PartHeader); + + if (P.Name == "DXIL" && P.Program) { + dxbc::ProgramHeader Header; + Header.MajorVersion = P.Program->MajorVersion; + Header.MinorVersion = P.Program->MinorVersion; + Header.Unused = 0; + Header.ShaderKind = P.Program->ShaderKind; + memcpy(Header.Bitcode.Magic, "DXIL", 4); + Header.Bitcode.MajorVersion = P.Program->DXILMajorVersion; + Header.Bitcode.MinorVersion = P.Program->DXILMinorVersion; + Header.Bitcode.Unused = 0; + + // Compute the optional fields if needed... + if (P.Program->DXILOffset) + Header.Bitcode.Offset = P.Program->DXILOffset.getValue(); + else + Header.Bitcode.Offset = sizeof(dxbc::BitcodeHeader); + + if (P.Program->DXILSize) + Header.Bitcode.Size = P.Program->DXILSize.getValue(); + else + Header.Bitcode.Size = P.Program->DXIL ? P.Program->DXIL->size() : 0; + + if (P.Program->Size) + Header.Size = P.Program->Size.getValue(); + else + Header.Size = sizeof(dxbc::ProgramHeader) + Header.Bitcode.Size; + + uint32_t BitcodeOffset = Header.Bitcode.Offset; + if (sys::IsBigEndianHost) + Header.swapBytes(); + OS.write(reinterpret_cast(&Header), + sizeof(dxbc::ProgramHeader)); + if (P.Program->DXIL) { + if (BitcodeOffset > sizeof(dxbc::BitcodeHeader)) { + uint32_t PadBytes = BitcodeOffset - sizeof(dxbc::BitcodeHeader); + OS.write_zeros(PadBytes); + } + OS.write(reinterpret_cast(P.Program->DXIL->data()), + P.Program->DXIL->size()); + } + } + } +} + +Error DXContainerWriter::write(raw_ostream &OS) { + if (Error Err = computePartOffsets()) + return Err; + writeHeader(OS); + writeParts(OS); + return Error::success(); +} + +namespace llvm { +namespace yaml { + +bool yaml2dxcontainer(DXContainerYAML::Object &Doc, raw_ostream &Out, + ErrorHandler EH) { + DXContainerWriter Writer(Doc); + if (Error Err = Writer.write(Out)) { + handleAllErrors(std::move(Err), + [&](const ErrorInfoBase &Err) { EH(Err.message()); }); + return false; + } + return true; +} + +} // namespace yaml +} // namespace llvm diff --git a/llvm/lib/ObjectYAML/DXContainerYAML.cpp b/llvm/lib/ObjectYAML/DXContainerYAML.cpp new file mode 100644 index 000000000000..7952fa4bf0e8 --- /dev/null +++ b/llvm/lib/ObjectYAML/DXContainerYAML.cpp @@ -0,0 +1,61 @@ +//===- DXContainerYAML.cpp - DXContainer YAMLIO implementation ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of +// DXContainerYAML. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjectYAML/DXContainerYAML.h" + +namespace llvm { +namespace yaml { + +void MappingTraits::mapping( + IO &IO, DXContainerYAML::VersionTuple &Version) { + IO.mapRequired("Major", Version.Major); + IO.mapRequired("Minor", Version.Minor); +} + +void MappingTraits::mapping( + IO &IO, DXContainerYAML::FileHeader &Header) { + IO.mapRequired("Hash", Header.Hash); + IO.mapRequired("Version", Header.Version); + IO.mapOptional("FileSize", Header.FileSize); + IO.mapRequired("PartCount", Header.PartCount); + IO.mapOptional("PartOffsets", Header.PartOffsets); +} + +void MappingTraits::mapping( + IO &IO, DXContainerYAML::DXILProgram &Program) { + IO.mapRequired("MajorVersion", Program.MajorVersion); + IO.mapRequired("MinorVersion", Program.MinorVersion); + IO.mapRequired("ShaderKind", Program.ShaderKind); + IO.mapOptional("Size", Program.Size); + IO.mapRequired("DXILMajorVersion", Program.DXILMajorVersion); + IO.mapRequired("DXILMinorVersion", Program.DXILMinorVersion); + IO.mapOptional("DXILSize", Program.DXILSize); + IO.mapOptional("DXIL", Program.DXIL); +} + +void MappingTraits::mapping(IO &IO, + DXContainerYAML::Part &P) { + IO.mapRequired("Name", P.Name); + IO.mapRequired("Size", P.Size); + IO.mapOptional("Program", P.Program); +} + +void MappingTraits::mapping( + IO &IO, DXContainerYAML::Object &Obj) { + IO.mapTag("!dxcontainer", true); + IO.mapRequired("Header", Obj.Header); + IO.mapRequired("Parts", Obj.Parts); +} + +} // namespace yaml +} // namespace llvm diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index e378be3892fe..f5611ed1197b 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -412,7 +412,7 @@ ELFState::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH) } // TODO: Only create the .strtab here if any symbols have been requested. ImplicitSections.insert(".strtab"); - if (!SecHdrTable || !SecHdrTable->NoHeaders.getValueOr(false)) + if (!SecHdrTable || !SecHdrTable->NoHeaders.value_or(false)) ImplicitSections.insert(SectionHeaderStringTableName); // Insert placeholders for implicit sections that are not @@ -596,12 +596,11 @@ unsigned ELFState::toSectionIndex(StringRef S, StringRef LocSec, const ELFYAML::SectionHeaderTable &SectionHeaders = Doc.getSectionHeaderTable(); if (SectionHeaders.IsImplicit || - (SectionHeaders.NoHeaders && !SectionHeaders.NoHeaders.getValue()) || + (SectionHeaders.NoHeaders && !*SectionHeaders.NoHeaders) || SectionHeaders.isDefault()) return Index; - assert(!SectionHeaders.NoHeaders.getValueOr(false) || - !SectionHeaders.Sections); + assert(!SectionHeaders.NoHeaders.value_or(false) || !SectionHeaders.Sections); size_t FirstExcluded = SectionHeaders.Sections ? SectionHeaders.Sections->size() : 0; if (Index > FirstExcluded) { @@ -771,7 +770,7 @@ void ELFState::initSectionHeaders(std::vector &SHeaders, if (ELFYAML::SectionHeaderTable *S = dyn_cast(D.get())) { - if (S->NoHeaders.getValueOr(false)) + if (S->NoHeaders.value_or(false)) continue; if (!S->Offset) @@ -808,7 +807,7 @@ void ELFState::initSectionHeaders(std::vector &SHeaders, SHeader.sh_entsize = *Sec->EntSize; else SHeader.sh_entsize = ELFYAML::getDefaultShEntSize( - Doc.Header.Machine.getValueOr(ELF::EM_NONE), Sec->Type, Sec->Name); + Doc.Header.Machine.value_or(ELF::EM_NONE), Sec->Type, Sec->Name); // We have a few sections like string or symbol tables that are usually // added implicitly to the end. However, if they are explicitly specified @@ -958,9 +957,9 @@ ELFState::toELFSymbols(ArrayRef Symbols, else if (Sym.Index) Symbol.st_shndx = *Sym.Index; - Symbol.st_value = Sym.Value.getValueOr(yaml::Hex64(0)); + Symbol.st_value = Sym.Value.value_or(yaml::Hex64(0)); Symbol.st_other = Sym.Other ? *Sym.Other : 0; - Symbol.st_size = Sym.Size.getValueOr(yaml::Hex64(0)); + Symbol.st_size = Sym.Size.value_or(yaml::Hex64(0)); } return Ret; @@ -1394,12 +1393,22 @@ void ELFState::writeSectionContent( return; for (const ELFYAML::BBAddrMapEntry &E : *Section.Entries) { + // Write version and feature values. + if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) { + if (E.Version > 1) + WithColor::warning() << "unsupported SHT_LLVM_BB_ADDR_MAP version: " + << static_cast(E.Version) + << "; encoding using the most recent version"; + CBA.write(E.Version); + CBA.write(E.Feature); + SHeader.sh_size += 2; + } // Write the address of the function. CBA.write(E.Address, ELFT::TargetEndianness); // Write number of BBEntries (number of basic blocks in the function). This // is overridden by the 'NumBlocks' YAML field when specified. uint64_t NumBlocks = - E.NumBlocks.getValueOr(E.BBEntries ? E.BBEntries->size() : 0); + E.NumBlocks.value_or(E.BBEntries ? E.BBEntries->size() : 0); SHeader.sh_size += sizeof(uintX_t) + CBA.writeULEB128(NumBlocks); // Write all BBEntries. if (!E.BBEntries) @@ -1486,10 +1495,10 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, return; CBA.write( - Section.NBucket.getValueOr(llvm::yaml::Hex64(Section.Bucket->size())), + Section.NBucket.value_or(llvm::yaml::Hex64(Section.Bucket->size())), ELFT::TargetEndianness); CBA.write( - Section.NChain.getValueOr(llvm::yaml::Hex64(Section.Chain->size())), + Section.NChain.value_or(llvm::yaml::Hex64(Section.Chain->size())), ELFT::TargetEndianness); for (uint32_t Val : *Section.Bucket) @@ -1518,10 +1527,10 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, const ELFYAML::VerdefEntry &E = (*Section.Entries)[I]; Elf_Verdef VerDef; - VerDef.vd_version = E.Version.getValueOr(1); - VerDef.vd_flags = E.Flags.getValueOr(0); - VerDef.vd_ndx = E.VersionNdx.getValueOr(0); - VerDef.vd_hash = E.Hash.getValueOr(0); + VerDef.vd_version = E.Version.value_or(1); + VerDef.vd_flags = E.Flags.value_or(0); + VerDef.vd_ndx = E.VersionNdx.value_or(0); + VerDef.vd_hash = E.Hash.value_or(0); VerDef.vd_aux = sizeof(Elf_Verdef); VerDef.vd_cnt = E.VerNames.size(); if (I == Section.Entries->size() - 1) @@ -1830,7 +1839,7 @@ template void ELFState::buildSectionIndex() { if (!ExcludedSectionHeaders.insert(Hdr.Name).second) llvm_unreachable("buildSectionIndex() failed"); - if (SectionHeaders.NoHeaders.getValueOr(false)) + if (SectionHeaders.NoHeaders.value_or(false)) for (const ELFYAML::Section *S : Sections) if (!ExcludedSectionHeaders.insert(S->Name).second) llvm_unreachable("buildSectionIndex() failed"); @@ -1960,7 +1969,7 @@ bool ELFState::writeELF(raw_ostream &OS, ELFYAML::Object &Doc, writeArrayData(OS, makeArrayRef(PHeaders)); const ELFYAML::SectionHeaderTable &SHT = Doc.getSectionHeaderTable(); - if (!SHT.NoHeaders.getValueOr(false)) + if (!SHT.NoHeaders.value_or(false)) CBA.updateDataAt(*SHT.Offset, SHeaders.data(), SHT.getNumHeaders(SHeaders.size()) * sizeof(Elf_Shdr)); diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index d597148b98ab..cdd180cdc15d 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -29,6 +29,8 @@ namespace llvm { ELFYAML::Chunk::~Chunk() = default; namespace ELFYAML { +ELF_ELFOSABI Object::getOSAbi() const { return Header.OSABI; } + unsigned Object::getMachine() const { if (Header.Machine) return *Header.Machine; @@ -175,6 +177,10 @@ void ScalarEnumerationTraits::enumeration( ECase(NT_AMD_PAL_METADATA); // AMDGPU specific notes. (Code Object V3) ECase(NT_AMDGPU_METADATA); + // Android specific notes. + ECase(NT_ANDROID_TYPE_IDENT); + ECase(NT_ANDROID_TYPE_KUSER); + ECase(NT_ANDROID_TYPE_MEMTAG); #undef ECase IO.enumFallback(Value); } @@ -344,6 +350,7 @@ void ScalarEnumerationTraits::enumeration( ECase(EM_BPF); ECase(EM_VE); ECase(EM_CSKY); + ECase(EM_LOONGARCH); #undef ECase IO.enumFallback(Value); } @@ -560,6 +567,7 @@ void ScalarBitSetTraits::bitset(IO &IO, BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH); @@ -570,6 +578,11 @@ void ScalarBitSetTraits::bitset(IO &IO, BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1033, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1034, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1035, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1036, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1100, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1101, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1102, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1103, EF_AMDGPU_MACH); switch (Object->Header.ABIVersion) { default: // ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags. @@ -641,6 +654,7 @@ void ScalarEnumerationTraits::enumeration( ECase(SHT_LLVM_SYMPART); ECase(SHT_LLVM_PART_EHDR); ECase(SHT_LLVM_PART_PHDR); + ECase(SHT_LLVM_BB_ADDR_MAP_V0); ECase(SHT_LLVM_BB_ADDR_MAP); ECase(SHT_GNU_ATTRIBUTES); ECase(SHT_GNU_HASH); @@ -705,7 +719,14 @@ void ScalarBitSetTraits::bitset(IO &IO, BCase(SHF_GROUP); BCase(SHF_TLS); BCase(SHF_COMPRESSED); - BCase(SHF_GNU_RETAIN); + switch (Object->getOSAbi()) { + case ELF::ELFOSABI_SOLARIS: + BCase(SHF_SUNW_NODISCARD); + break; + default: + BCase(SHF_GNU_RETAIN); + break; + } switch (Object->getMachine()) { case ELF::EM_ARM: BCase(SHF_ARM_PURECODE); @@ -735,6 +756,8 @@ void ScalarBitSetTraits::bitset(IO &IO, void ScalarEnumerationTraits::enumeration( IO &IO, ELFYAML::ELF_SHN &Value) { + const auto *Object = static_cast(IO.getContext()); + assert(Object && "The IO context is not initialized"); #define ECase(X) IO.enumCase(Value, #X, ELF::X) ECase(SHN_UNDEF); ECase(SHN_LORESERVE); @@ -747,6 +770,15 @@ void ScalarEnumerationTraits::enumeration( ECase(SHN_XINDEX); ECase(SHN_HIRESERVE); ECase(SHN_AMDGPU_LDS); + + if (!IO.outputting() || Object->getMachine() == ELF::EM_MIPS) { + ECase(SHN_MIPS_ACOMMON); + ECase(SHN_MIPS_TEXT); + ECase(SHN_MIPS_DATA); + ECase(SHN_MIPS_SCOMMON); + ECase(SHN_MIPS_SUNDEFINED); + } + ECase(SHN_HEXAGON_SCOMMON); ECase(SHN_HEXAGON_SCOMMON_1); ECase(SHN_HEXAGON_SCOMMON_2); @@ -839,12 +871,18 @@ void ScalarEnumerationTraits::enumeration( case ELF::EM_CSKY: #include "llvm/BinaryFormat/ELFRelocs/CSKY.def" break; + case ELF::EM_PPC: +#include "llvm/BinaryFormat/ELFRelocs/PowerPC.def" + break; case ELF::EM_PPC64: #include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def" break; case ELF::EM_68K: #include "llvm/BinaryFormat/ELFRelocs/M68k.def" break; + case ELF::EM_LOONGARCH: +#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def" + break; default: // Nothing to do. break; @@ -1298,7 +1336,7 @@ static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) { // We also support reading a content as array of bytes using the ContentArray // key. obj2yaml never prints this field. - assert(!IO.outputting() || !Section.ContentBuf.hasValue()); + assert(!IO.outputting() || !Section.ContentBuf); IO.mapOptional("ContentArray", Section.ContentBuf); if (Section.ContentBuf) { if (Section.Content) @@ -1327,8 +1365,7 @@ static void sectionMapping(IO &IO, ELFYAML::HashSection &Section) { // obj2yaml does not dump these fields. They can be used to override nchain // and nbucket values for creating broken sections. - assert(!IO.outputting() || - (!Section.NBucket.hasValue() && !Section.NChain.hasValue())); + assert(!IO.outputting() || (!Section.NBucket && !Section.NChain)); IO.mapOptional("NChain", Section.NChain); IO.mapOptional("NBucket", Section.NBucket); } @@ -1603,6 +1640,7 @@ void MappingTraits>::mapping( Section.reset(new ELFYAML::CallGraphProfileSection()); sectionMapping(IO, *cast(Section.get())); break; + case ELF::SHT_LLVM_BB_ADDR_MAP_V0: case ELF::SHT_LLVM_BB_ADDR_MAP: if (!IO.outputting()) Section.reset(new ELFYAML::BBAddrMapSection()); @@ -1732,6 +1770,8 @@ void MappingTraits::mapping( void MappingTraits::mapping( IO &IO, ELFYAML::BBAddrMapEntry &E) { assert(IO.getContext() && "The IO context is not initialized"); + IO.mapRequired("Version", E.Version); + IO.mapOptional("Feature", E.Feature, Hex8(0)); IO.mapOptional("Address", E.Address, Hex64(0)); IO.mapOptional("NumBlocks", E.NumBlocks); IO.mapOptional("BBEntries", E.BBEntries); diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp index b9fad2982828..3d06f3d0bf86 100644 --- a/llvm/lib/ObjectYAML/MachOEmitter.cpp +++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp @@ -55,6 +55,7 @@ private: void writeStringTable(raw_ostream &OS); void writeExportTrie(raw_ostream &OS); void writeDynamicSymbolTable(raw_ostream &OS); + void writeFunctionStarts(raw_ostream &OS); void dumpExportEntry(raw_ostream &OS, MachOYAML::ExportEntry &Entry); void ZeroToOffset(raw_ostream &OS, size_t offset); @@ -484,6 +485,7 @@ void MachOWriter::writeLinkEditData(raw_ostream &OS) { MachO::dyld_info_command *DyldInfoOnlyCmd = nullptr; MachO::symtab_command *SymtabCmd = nullptr; MachO::dysymtab_command *DSymtabCmd = nullptr; + MachO::linkedit_data_command *FunctionStartsCmd = nullptr; for (auto &LC : Obj.LoadCommands) { switch (LC.Data.load_command_data.cmd) { case MachO::LC_SYMTAB: @@ -511,12 +513,15 @@ void MachOWriter::writeLinkEditData(raw_ostream &OS) { WriteQueue.push_back(std::make_pair( DSymtabCmd->indirectsymoff, &MachOWriter::writeDynamicSymbolTable)); break; + case MachO::LC_FUNCTION_STARTS: + FunctionStartsCmd = &LC.Data.linkedit_data_command_data; + WriteQueue.push_back(std::make_pair(FunctionStartsCmd->dataoff, + &MachOWriter::writeFunctionStarts)); + break; } } - llvm::sort(WriteQueue, [](const writeOperation &a, const writeOperation &b) { - return a.first < b.first; - }); + llvm::sort(WriteQueue, llvm::less_first()); for (auto writeOp : WriteQueue) { ZeroToOffset(OS, writeOp.first); @@ -569,6 +574,17 @@ void MachOWriter::writeDynamicSymbolTable(raw_ostream &OS) { sizeof(yaml::Hex32::BaseType)); } +void MachOWriter::writeFunctionStarts(raw_ostream &OS) { + uint64_t Addr = 0; + for (uint64_t NextAddr : Obj.LinkEdit.FunctionStarts) { + uint64_t Delta = NextAddr - Addr; + encodeULEB128(Delta, OS); + Addr = NextAddr; + } + + OS.write('\0'); +} + class UniversalWriter { public: UniversalWriter(yaml::YamlObjectFile &ObjectFile) diff --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp index f32009458110..b6f3b53a42b3 100644 --- a/llvm/lib/ObjectYAML/MachOYAML.cpp +++ b/llvm/lib/ObjectYAML/MachOYAML.cpp @@ -26,10 +26,10 @@ namespace llvm { MachOYAML::LoadCommand::~LoadCommand() = default; bool MachOYAML::LinkEditData::isEmpty() const { - return 0 == - RebaseOpcodes.size() + BindOpcodes.size() + WeakBindOpcodes.size() + - LazyBindOpcodes.size() + ExportTrie.Children.size() + - NameList.size() + StringTable.size(); + return 0 == RebaseOpcodes.size() + BindOpcodes.size() + + WeakBindOpcodes.size() + LazyBindOpcodes.size() + + ExportTrie.Children.size() + NameList.size() + + StringTable.size() + FunctionStarts.size(); } namespace yaml { @@ -165,6 +165,7 @@ void MappingTraits::mapping( IO.mapOptional("NameList", LinkEditData.NameList); IO.mapOptional("StringTable", LinkEditData.StringTable); IO.mapOptional("IndirectSymbols", LinkEditData.IndirectSymbols); + IO.mapOptional("FunctionStarts", LinkEditData.FunctionStarts); } void MappingTraits::mapping( diff --git a/llvm/lib/ObjectYAML/MinidumpEmitter.cpp b/llvm/lib/ObjectYAML/MinidumpEmitter.cpp index bbfd2cd8cbab..9505473a2415 100644 --- a/llvm/lib/ObjectYAML/MinidumpEmitter.cpp +++ b/llvm/lib/ObjectYAML/MinidumpEmitter.cpp @@ -219,7 +219,7 @@ static Directory layout(BlobAllocator &File, Stream &S) { // If DataEnd is not set, we assume everything we generated is a part of the // stream. Result.Location.DataSize = - DataEnd.getValueOr(File.tell()) - Result.Location.RVA; + DataEnd.value_or(File.tell()) - Result.Location.RVA; return Result; } diff --git a/llvm/lib/ObjectYAML/ObjectYAML.cpp b/llvm/lib/ObjectYAML/ObjectYAML.cpp index 63769d2eba0e..d57e5583016b 100644 --- a/llvm/lib/ObjectYAML/ObjectYAML.cpp +++ b/llvm/lib/ObjectYAML/ObjectYAML.cpp @@ -56,12 +56,19 @@ void MappingTraits::mapping(IO &IO, } else if (IO.mapTag("!minidump")) { ObjectFile.Minidump.reset(new MinidumpYAML::Object()); MappingTraits::mapping(IO, *ObjectFile.Minidump); + } else if (IO.mapTag("!Offload")) { + ObjectFile.Offload.reset(new OffloadYAML::Binary()); + MappingTraits::mapping(IO, *ObjectFile.Offload); } else if (IO.mapTag("!WASM")) { ObjectFile.Wasm.reset(new WasmYAML::Object()); MappingTraits::mapping(IO, *ObjectFile.Wasm); } else if (IO.mapTag("!XCOFF")) { ObjectFile.Xcoff.reset(new XCOFFYAML::Object()); MappingTraits::mapping(IO, *ObjectFile.Xcoff); + } else if (IO.mapTag("!dxcontainer")) { + ObjectFile.DXContainer.reset(new DXContainerYAML::Object()); + MappingTraits::mapping(IO, + *ObjectFile.DXContainer); } else if (const Node *N = In.getCurrentNode()) { if (N->getRawTag().empty()) IO.setError("YAML Object File missing document type tag!"); diff --git a/llvm/lib/ObjectYAML/OffloadEmitter.cpp b/llvm/lib/ObjectYAML/OffloadEmitter.cpp new file mode 100644 index 000000000000..3ffbc4ff0e11 --- /dev/null +++ b/llvm/lib/ObjectYAML/OffloadEmitter.cpp @@ -0,0 +1,68 @@ +//===- OffloadEmitter.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/OffloadBinary.h" +#include "llvm/ObjectYAML/OffloadYAML.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace OffloadYAML; + +namespace llvm { +namespace yaml { + +bool yaml2offload(Binary &Doc, raw_ostream &Out, ErrorHandler EH) { + for (const auto &Member : Doc.Members) { + object::OffloadBinary::OffloadingImage Image{}; + if (Member.ImageKind) + Image.TheImageKind = *Member.ImageKind; + if (Member.OffloadKind) + Image.TheOffloadKind = *Member.OffloadKind; + if (Member.Flags) + Image.Flags = *Member.Flags; + + StringMap &StringData = Image.StringData; + if (Member.StringEntries) { + for (const auto &Entry : *Member.StringEntries) { + StringData[Entry.Key] = Entry.Value; + } + } + + SmallVector Data; + raw_svector_ostream OS(Data); + if (Member.Content) + Member.Content->writeAsBinary(OS); + Image.Image = MemoryBuffer::getMemBufferCopy(OS.str()); + + std::unique_ptr Binary = object::OffloadBinary::write(Image); + + // Copy the data to a new buffer so we can modify the bytes directly. + SmallVector NewBuffer; + std::copy(Binary->getBufferStart(), Binary->getBufferEnd(), + std::back_inserter(NewBuffer)); + auto *TheHeader = + reinterpret_cast(&NewBuffer[0]); + if (Doc.Version) + TheHeader->Version = *Doc.Version; + if (Doc.Size) + TheHeader->Size = *Doc.Size; + if (Doc.EntryOffset) + TheHeader->EntryOffset = *Doc.EntryOffset; + if (Doc.EntrySize) + TheHeader->EntrySize = *Doc.EntrySize; + + Out.write(NewBuffer.begin(), NewBuffer.size()); + } + + return true; +} + +} // namespace yaml +} // namespace llvm diff --git a/llvm/lib/ObjectYAML/OffloadYAML.cpp b/llvm/lib/ObjectYAML/OffloadYAML.cpp new file mode 100644 index 000000000000..d5a0edde2179 --- /dev/null +++ b/llvm/lib/ObjectYAML/OffloadYAML.cpp @@ -0,0 +1,78 @@ +//===- OffloadYAML.cpp - Offload Binary YAMLIO implementation -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of offload +// binaries. +// +//===----------------------------------------------------------------------===// + +#include + +namespace llvm { + +namespace yaml { + +void ScalarEnumerationTraits::enumeration( + IO &IO, object::ImageKind &Value) { +#define ECase(X) IO.enumCase(Value, #X, object::X) + ECase(IMG_None); + ECase(IMG_Object); + ECase(IMG_Bitcode); + ECase(IMG_Cubin); + ECase(IMG_Fatbinary); + ECase(IMG_PTX); + ECase(IMG_LAST); +#undef ECase + IO.enumFallback(Value); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, object::OffloadKind &Value) { +#define ECase(X) IO.enumCase(Value, #X, object::X) + ECase(OFK_None); + ECase(OFK_OpenMP); + ECase(OFK_Cuda); + ECase(OFK_HIP); + ECase(OFK_LAST); +#undef ECase + IO.enumFallback(Value); +} + +void MappingTraits::mapping(IO &IO, + OffloadYAML::Binary &O) { + assert(!IO.getContext() && "The IO context is initialized already"); + IO.setContext(&O); + IO.mapTag("!Offload", true); + IO.mapOptional("Version", O.Version); + IO.mapOptional("Size", O.Size); + IO.mapOptional("EntryOffset", O.EntryOffset); + IO.mapOptional("EntrySize", O.EntrySize); + IO.mapRequired("Members", O.Members); + IO.setContext(nullptr); +} + +void MappingTraits::mapping( + IO &IO, OffloadYAML::Binary::StringEntry &SE) { + assert(IO.getContext() && "The IO context is not initialized"); + IO.mapRequired("Key", SE.Key); + IO.mapRequired("Value", SE.Value); +} + +void MappingTraits::mapping( + IO &IO, OffloadYAML::Binary::Member &M) { + assert(IO.getContext() && "The IO context is not initialized"); + IO.mapOptional("ImageKind", M.ImageKind); + IO.mapOptional("OffloadKind", M.OffloadKind); + IO.mapOptional("Flags", M.Flags); + IO.mapOptional("String", M.StringEntries); + IO.mapOptional("Content", M.Content); +} + +} // namespace yaml + +} // namespace llvm diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp index 2aa2ef3e5541..6230312eff7b 100644 --- a/llvm/lib/ObjectYAML/WasmEmitter.cpp +++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp @@ -33,7 +33,7 @@ private: void writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec, uint32_t SectionIndex); - void writeInitExpr(raw_ostream &OS, const wasm::WasmInitExpr &InitExpr); + void writeInitExpr(raw_ostream &OS, const WasmYAML::InitExpr &InitExpr); void writeSectionContent(raw_ostream &OS, WasmYAML::CustomSection &Section); void writeSectionContent(raw_ostream &OS, WasmYAML::TypeSection &Section); @@ -129,29 +129,34 @@ void WasmWriter::reportError(const Twine &Msg) { } void WasmWriter::writeInitExpr(raw_ostream &OS, - const wasm::WasmInitExpr &InitExpr) { - writeUint8(OS, InitExpr.Opcode); - switch (InitExpr.Opcode) { - case wasm::WASM_OPCODE_I32_CONST: - encodeSLEB128(InitExpr.Value.Int32, OS); - break; - case wasm::WASM_OPCODE_I64_CONST: - encodeSLEB128(InitExpr.Value.Int64, OS); - break; - case wasm::WASM_OPCODE_F32_CONST: - writeUint32(OS, InitExpr.Value.Float32); - break; - case wasm::WASM_OPCODE_F64_CONST: - writeUint64(OS, InitExpr.Value.Float64); - break; - case wasm::WASM_OPCODE_GLOBAL_GET: - encodeULEB128(InitExpr.Value.Global, OS); - break; - default: - reportError("unknown opcode in init_expr: " + Twine(InitExpr.Opcode)); - return; + const WasmYAML::InitExpr &InitExpr) { + if (InitExpr.Extended) { + InitExpr.Body.writeAsBinary(OS); + } else { + writeUint8(OS, InitExpr.Inst.Opcode); + switch (InitExpr.Inst.Opcode) { + case wasm::WASM_OPCODE_I32_CONST: + encodeSLEB128(InitExpr.Inst.Value.Int32, OS); + break; + case wasm::WASM_OPCODE_I64_CONST: + encodeSLEB128(InitExpr.Inst.Value.Int64, OS); + break; + case wasm::WASM_OPCODE_F32_CONST: + writeUint32(OS, InitExpr.Inst.Value.Float32); + break; + case wasm::WASM_OPCODE_F64_CONST: + writeUint64(OS, InitExpr.Inst.Value.Float64); + break; + case wasm::WASM_OPCODE_GLOBAL_GET: + encodeULEB128(InitExpr.Inst.Value.Global, OS); + break; + default: + reportError("unknown opcode in init_expr: " + + Twine(InitExpr.Inst.Opcode)); + return; + } + writeUint8(OS, wasm::WASM_OPCODE_END); } - writeUint8(OS, wasm::WASM_OPCODE_END); } void WasmWriter::writeSectionContent(raw_ostream &OS, @@ -187,13 +192,10 @@ void WasmWriter::writeSectionContent(raw_ostream &OS, // SYMBOL_TABLE subsection if (Section.SymbolTable.size()) { writeUint8(OS, wasm::WASM_SYMBOL_TABLE); - encodeULEB128(Section.SymbolTable.size(), SubSection.getStream()); -#ifndef NDEBUG - uint32_t SymbolIndex = 0; -#endif - for (const WasmYAML::SymbolInfo &Info : Section.SymbolTable) { - assert(Info.Index == SymbolIndex++); + for (auto Sym : llvm::enumerate(Section.SymbolTable)) { + const WasmYAML::SymbolInfo &Info = Sym.value(); + assert(Info.Index == Sym.index()); writeUint8(SubSection.getStream(), Info.Kind); encodeULEB128(Info.Flags, SubSection.getStream()); switch (Info.Kind) { @@ -481,7 +483,7 @@ void WasmWriter::writeSectionContent(raw_ostream &OS, ++ExpectedIndex; writeUint8(OS, Global.Type); writeUint8(OS, Global.Mutable); - writeInitExpr(OS, Global.InitExpr); + writeInitExpr(OS, Global.Init); } } diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp index 3f0172ebf361..7ca422487df2 100644 --- a/llvm/lib/ObjectYAML/WasmYAML.cpp +++ b/llvm/lib/ObjectYAML/WasmYAML.cpp @@ -367,8 +367,7 @@ void MappingTraits::mapping( void MappingTraits::mapping(IO &IO, WasmYAML::Limits &Limits) { - if (!IO.outputting() || Limits.Flags) - IO.mapOptional("Flags", Limits.Flags); + IO.mapOptional("Flags", Limits.Flags, 0); IO.mapRequired("Minimum", Limits.Minimum); if (!IO.outputting() || Limits.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX) IO.mapOptional("Maximum", Limits.Maximum); @@ -376,8 +375,7 @@ void MappingTraits::mapping(IO &IO, void MappingTraits::mapping( IO &IO, WasmYAML::ElemSegment &Segment) { - if (!IO.outputting() || Segment.Flags) - IO.mapOptional("Flags", Segment.Flags); + IO.mapOptional("Flags", Segment.Flags, 0); if (!IO.outputting() || Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER) IO.mapOptional("TableNumber", Segment.TableNumber); @@ -420,35 +418,40 @@ void MappingTraits::mapping(IO &IO, IO.mapRequired("Index", Global.Index); IO.mapRequired("Type", Global.Type); IO.mapRequired("Mutable", Global.Mutable); - IO.mapRequired("InitExpr", Global.InitExpr); + IO.mapRequired("InitExpr", Global.Init); } -void MappingTraits::mapping(IO &IO, - wasm::WasmInitExpr &Expr) { - WasmYAML::Opcode Op = Expr.Opcode; - IO.mapRequired("Opcode", Op); - Expr.Opcode = Op; - switch (Expr.Opcode) { - case wasm::WASM_OPCODE_I32_CONST: - IO.mapRequired("Value", Expr.Value.Int32); - break; - case wasm::WASM_OPCODE_I64_CONST: - IO.mapRequired("Value", Expr.Value.Int64); - break; - case wasm::WASM_OPCODE_F32_CONST: - IO.mapRequired("Value", Expr.Value.Float32); - break; - case wasm::WASM_OPCODE_F64_CONST: - IO.mapRequired("Value", Expr.Value.Float64); - break; - case wasm::WASM_OPCODE_GLOBAL_GET: - IO.mapRequired("Index", Expr.Value.Global); - break; - case wasm::WASM_OPCODE_REF_NULL: { - WasmYAML::ValueType Ty = wasm::WASM_TYPE_EXTERNREF; - IO.mapRequired("Type", Ty); - break; - } +void MappingTraits::mapping(IO &IO, + WasmYAML::InitExpr &Expr) { + IO.mapOptional("Extended", Expr.Extended, false); + if (Expr.Extended) { + IO.mapRequired("Body", Expr.Body); + } else { + WasmYAML::Opcode Op = Expr.Inst.Opcode; + IO.mapRequired("Opcode", Op); + Expr.Inst.Opcode = Op; + switch (Expr.Inst.Opcode) { + case wasm::WASM_OPCODE_I32_CONST: + IO.mapRequired("Value", Expr.Inst.Value.Int32); + break; + case wasm::WASM_OPCODE_I64_CONST: + IO.mapRequired("Value", Expr.Inst.Value.Int64); + break; + case wasm::WASM_OPCODE_F32_CONST: + IO.mapRequired("Value", Expr.Inst.Value.Float32); + break; + case wasm::WASM_OPCODE_F64_CONST: + IO.mapRequired("Value", Expr.Inst.Value.Float64); + break; + case wasm::WASM_OPCODE_GLOBAL_GET: + IO.mapRequired("Index", Expr.Inst.Value.Global); + break; + case wasm::WASM_OPCODE_REF_NULL: { + WasmYAML::ValueType Ty = wasm::WASM_TYPE_EXTERNREF; + IO.mapRequired("Type", Ty); + break; + } + } } } @@ -464,8 +467,8 @@ void MappingTraits::mapping( if ((Segment.InitFlags & wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) { IO.mapRequired("Offset", Segment.Offset); } else { - Segment.Offset.Opcode = wasm::WASM_OPCODE_I32_CONST; - Segment.Offset.Value.Int32 = 0; + Segment.Offset.Inst.Opcode = wasm::WASM_OPCODE_I32_CONST; + Segment.Offset.Inst.Value.Int32 = 0; } IO.mapRequired("Content", Segment.Content); } diff --git a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp index 2a7204d3f773..1ceac6c05893 100644 --- a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp +++ b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp @@ -212,8 +212,8 @@ bool XCOFFWriter::initStringTable() { for (const std::unique_ptr &AuxSym : YamlSym.AuxEntries) { if (auto AS = dyn_cast(AuxSym.get())) - if (nameShouldBeInStringTable(AS->FileNameOrString.getValueOr(""))) - StrTblBuilder.add(AS->FileNameOrString.getValueOr("")); + if (nameShouldBeInStringTable(AS->FileNameOrString.value_or(""))) + StrTblBuilder.add(AS->FileNameOrString.value_or("")); } } @@ -247,8 +247,7 @@ bool XCOFFWriter::initFileHeader(uint64_t CurrentOffset) { Twine(AuxCount)); return false; } - YamlSym.NumberOfAuxEntries = - YamlSym.NumberOfAuxEntries.getValueOr(AuxCount); + YamlSym.NumberOfAuxEntries = YamlSym.NumberOfAuxEntries.value_or(AuxCount); // Add the number of auxiliary symbols to the total number. InitFileHdr.NumberOfSymTableEntries += *YamlSym.NumberOfAuxEntries; } @@ -378,59 +377,60 @@ void XCOFFWriter::writeFileHeader() { } void XCOFFWriter::writeAuxFileHeader() { - W.write(InitAuxFileHdr.Magic.getValueOr(yaml::Hex16(1))); - W.write(InitAuxFileHdr.Version.getValueOr(yaml::Hex16(1))); + W.write(InitAuxFileHdr.Magic.value_or(yaml::Hex16(1))); + W.write(InitAuxFileHdr.Version.value_or(yaml::Hex16(1))); if (Is64Bit) { W.OS.write_zeros(4); // Reserved for debugger. - W.write(InitAuxFileHdr.TextStartAddr.getValueOr(yaml::Hex64(0))); - W.write(InitAuxFileHdr.DataStartAddr.getValueOr(yaml::Hex64(0))); - W.write(InitAuxFileHdr.TOCAnchorAddr.getValueOr(yaml::Hex64(0))); + W.write(InitAuxFileHdr.TextStartAddr.value_or(yaml::Hex64(0))); + W.write(InitAuxFileHdr.DataStartAddr.value_or(yaml::Hex64(0))); + W.write(InitAuxFileHdr.TOCAnchorAddr.value_or(yaml::Hex64(0))); } else { - W.write(InitAuxFileHdr.TextSize.getValueOr(yaml::Hex64(0))); - W.write(InitAuxFileHdr.InitDataSize.getValueOr(yaml::Hex64(0))); - W.write(InitAuxFileHdr.BssDataSize.getValueOr(yaml::Hex64(0))); - W.write(InitAuxFileHdr.EntryPointAddr.getValueOr(yaml::Hex64(0))); - W.write(InitAuxFileHdr.TextStartAddr.getValueOr(yaml::Hex64(0))); - W.write(InitAuxFileHdr.DataStartAddr.getValueOr(yaml::Hex64(0))); - W.write(InitAuxFileHdr.TOCAnchorAddr.getValueOr(yaml::Hex64(0))); + W.write(InitAuxFileHdr.TextSize.value_or(yaml::Hex64(0))); + W.write(InitAuxFileHdr.InitDataSize.value_or(yaml::Hex64(0))); + W.write(InitAuxFileHdr.BssDataSize.value_or(yaml::Hex64(0))); + W.write(InitAuxFileHdr.EntryPointAddr.value_or(yaml::Hex64(0))); + W.write(InitAuxFileHdr.TextStartAddr.value_or(yaml::Hex64(0))); + W.write(InitAuxFileHdr.DataStartAddr.value_or(yaml::Hex64(0))); + W.write(InitAuxFileHdr.TOCAnchorAddr.value_or(yaml::Hex64(0))); } - W.write(InitAuxFileHdr.SecNumOfEntryPoint.getValueOr(0)); - W.write(InitAuxFileHdr.SecNumOfText.getValueOr(0)); - W.write(InitAuxFileHdr.SecNumOfData.getValueOr(0)); - W.write(InitAuxFileHdr.SecNumOfTOC.getValueOr(0)); - W.write(InitAuxFileHdr.SecNumOfLoader.getValueOr(0)); - W.write(InitAuxFileHdr.SecNumOfBSS.getValueOr(0)); - W.write(InitAuxFileHdr.MaxAlignOfText.getValueOr(yaml::Hex16(0))); - W.write(InitAuxFileHdr.MaxAlignOfData.getValueOr(yaml::Hex16(0))); - W.write(InitAuxFileHdr.ModuleType.getValueOr(yaml::Hex16(0))); - W.write(InitAuxFileHdr.CpuFlag.getValueOr(yaml::Hex8(0))); + W.write(InitAuxFileHdr.SecNumOfEntryPoint.value_or(0)); + W.write(InitAuxFileHdr.SecNumOfText.value_or(0)); + W.write(InitAuxFileHdr.SecNumOfData.value_or(0)); + W.write(InitAuxFileHdr.SecNumOfTOC.value_or(0)); + W.write(InitAuxFileHdr.SecNumOfLoader.value_or(0)); + W.write(InitAuxFileHdr.SecNumOfBSS.value_or(0)); + W.write(InitAuxFileHdr.MaxAlignOfText.value_or(yaml::Hex16(0))); + W.write(InitAuxFileHdr.MaxAlignOfData.value_or(yaml::Hex16(0))); + W.write(InitAuxFileHdr.ModuleType.value_or(yaml::Hex16(0))); + W.write(InitAuxFileHdr.CpuFlag.value_or(yaml::Hex8(0))); W.write(0); // Reserved for CPU type. if (Is64Bit) { - W.write(InitAuxFileHdr.TextPageSize.getValueOr(yaml::Hex8(0))); - W.write(InitAuxFileHdr.DataPageSize.getValueOr(yaml::Hex8(0))); - W.write(InitAuxFileHdr.StackPageSize.getValueOr(yaml::Hex8(0))); + W.write(InitAuxFileHdr.TextPageSize.value_or(yaml::Hex8(0))); + W.write(InitAuxFileHdr.DataPageSize.value_or(yaml::Hex8(0))); + W.write(InitAuxFileHdr.StackPageSize.value_or(yaml::Hex8(0))); W.write( - InitAuxFileHdr.FlagAndTDataAlignment.getValueOr(yaml::Hex8(0x80))); - W.write(InitAuxFileHdr.TextSize.getValueOr(yaml::Hex64(0))); - W.write(InitAuxFileHdr.InitDataSize.getValueOr(yaml::Hex64(0))); - W.write(InitAuxFileHdr.BssDataSize.getValueOr(yaml::Hex64(0))); - W.write(InitAuxFileHdr.EntryPointAddr.getValueOr(yaml::Hex64(0))); - W.write(InitAuxFileHdr.MaxStackSize.getValueOr(yaml::Hex64(0))); - W.write(InitAuxFileHdr.MaxDataSize.getValueOr(yaml::Hex64(0))); + InitAuxFileHdr.FlagAndTDataAlignment.value_or(yaml::Hex8(0x80))); + W.write(InitAuxFileHdr.TextSize.value_or(yaml::Hex64(0))); + W.write(InitAuxFileHdr.InitDataSize.value_or(yaml::Hex64(0))); + W.write(InitAuxFileHdr.BssDataSize.value_or(yaml::Hex64(0))); + W.write(InitAuxFileHdr.EntryPointAddr.value_or(yaml::Hex64(0))); + W.write(InitAuxFileHdr.MaxStackSize.value_or(yaml::Hex64(0))); + W.write(InitAuxFileHdr.MaxDataSize.value_or(yaml::Hex64(0))); } else { - W.write(InitAuxFileHdr.MaxStackSize.getValueOr(yaml::Hex64(0))); - W.write(InitAuxFileHdr.MaxDataSize.getValueOr(yaml::Hex64(0))); + W.write(InitAuxFileHdr.MaxStackSize.value_or(yaml::Hex64(0))); + W.write(InitAuxFileHdr.MaxDataSize.value_or(yaml::Hex64(0))); W.OS.write_zeros(4); // Reserved for debugger. - W.write(InitAuxFileHdr.TextPageSize.getValueOr(yaml::Hex8(0))); - W.write(InitAuxFileHdr.DataPageSize.getValueOr(yaml::Hex8(0))); - W.write(InitAuxFileHdr.StackPageSize.getValueOr(yaml::Hex8(0))); + W.write(InitAuxFileHdr.TextPageSize.value_or(yaml::Hex8(0))); + W.write(InitAuxFileHdr.DataPageSize.value_or(yaml::Hex8(0))); + W.write(InitAuxFileHdr.StackPageSize.value_or(yaml::Hex8(0))); W.write( - InitAuxFileHdr.FlagAndTDataAlignment.getValueOr(yaml::Hex8(0))); + InitAuxFileHdr.FlagAndTDataAlignment.value_or(yaml::Hex8(0))); } - W.write(InitAuxFileHdr.SecNumOfTData.getValueOr(0)); - W.write(InitAuxFileHdr.SecNumOfTBSS.getValueOr(0)); + W.write(InitAuxFileHdr.SecNumOfTData.value_or(0)); + W.write(InitAuxFileHdr.SecNumOfTBSS.value_or(0)); if (Is64Bit) { - W.write(InitAuxFileHdr.Flag.getValueOr(yaml::Hex16(XCOFF::SHR_SYMTAB))); + W.write( + InitAuxFileHdr.Flag.value_or(yaml::Hex16(XCOFF::SHR_SYMTAB))); if (InitFileHdr.AuxHeaderSize > XCOFF::AuxFileHeaderSize64) W.OS.write_zeros(InitFileHdr.AuxHeaderSize - XCOFF::AuxFileHeaderSize64); } else if (InitFileHdr.AuxHeaderSize > XCOFF::AuxFileHeaderSize32) { @@ -526,52 +526,52 @@ bool XCOFFWriter::writeRelocations() { void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::CsectAuxEnt &AuxSym) { if (Is64Bit) { - W.write(AuxSym.SectionOrLengthLo.getValueOr(0)); - W.write(AuxSym.ParameterHashIndex.getValueOr(0)); - W.write(AuxSym.TypeChkSectNum.getValueOr(0)); - W.write(AuxSym.SymbolAlignmentAndType.getValueOr(0)); - W.write(AuxSym.StorageMappingClass.getValueOr(XCOFF::XMC_PR)); - W.write(AuxSym.SectionOrLengthHi.getValueOr(0)); + W.write(AuxSym.SectionOrLengthLo.value_or(0)); + W.write(AuxSym.ParameterHashIndex.value_or(0)); + W.write(AuxSym.TypeChkSectNum.value_or(0)); + W.write(AuxSym.SymbolAlignmentAndType.value_or(0)); + W.write(AuxSym.StorageMappingClass.value_or(XCOFF::XMC_PR)); + W.write(AuxSym.SectionOrLengthHi.value_or(0)); W.write(0); W.write(XCOFF::AUX_CSECT); } else { - W.write(AuxSym.SectionOrLength.getValueOr(0)); - W.write(AuxSym.ParameterHashIndex.getValueOr(0)); - W.write(AuxSym.TypeChkSectNum.getValueOr(0)); - W.write(AuxSym.SymbolAlignmentAndType.getValueOr(0)); - W.write(AuxSym.StorageMappingClass.getValueOr(XCOFF::XMC_PR)); - W.write(AuxSym.StabInfoIndex.getValueOr(0)); - W.write(AuxSym.StabSectNum.getValueOr(0)); + W.write(AuxSym.SectionOrLength.value_or(0)); + W.write(AuxSym.ParameterHashIndex.value_or(0)); + W.write(AuxSym.TypeChkSectNum.value_or(0)); + W.write(AuxSym.SymbolAlignmentAndType.value_or(0)); + W.write(AuxSym.StorageMappingClass.value_or(XCOFF::XMC_PR)); + W.write(AuxSym.StabInfoIndex.value_or(0)); + W.write(AuxSym.StabSectNum.value_or(0)); } } void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::ExcpetionAuxEnt &AuxSym) { assert(Is64Bit && "can't write the exception auxiliary symbol for XCOFF32"); - W.write(AuxSym.OffsetToExceptionTbl.getValueOr(0)); - W.write(AuxSym.SizeOfFunction.getValueOr(0)); - W.write(AuxSym.SymIdxOfNextBeyond.getValueOr(0)); + W.write(AuxSym.OffsetToExceptionTbl.value_or(0)); + W.write(AuxSym.SizeOfFunction.value_or(0)); + W.write(AuxSym.SymIdxOfNextBeyond.value_or(0)); W.write(0); W.write(XCOFF::AUX_EXCEPT); } void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::FunctionAuxEnt &AuxSym) { if (Is64Bit) { - W.write(AuxSym.PtrToLineNum.getValueOr(0)); - W.write(AuxSym.SizeOfFunction.getValueOr(0)); - W.write(AuxSym.SymIdxOfNextBeyond.getValueOr(0)); + W.write(AuxSym.PtrToLineNum.value_or(0)); + W.write(AuxSym.SizeOfFunction.value_or(0)); + W.write(AuxSym.SymIdxOfNextBeyond.value_or(0)); W.write(0); W.write(XCOFF::AUX_FCN); } else { - W.write(AuxSym.OffsetToExceptionTbl.getValueOr(0)); - W.write(AuxSym.SizeOfFunction.getValueOr(0)); - W.write(AuxSym.PtrToLineNum.getValueOr(0)); - W.write(AuxSym.SymIdxOfNextBeyond.getValueOr(0)); + W.write(AuxSym.OffsetToExceptionTbl.value_or(0)); + W.write(AuxSym.SizeOfFunction.value_or(0)); + W.write(AuxSym.PtrToLineNum.value_or(0)); + W.write(AuxSym.SymIdxOfNextBeyond.value_or(0)); W.OS.write_zeros(2); } } void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::FileAuxEnt &AuxSym) { - StringRef FileName = AuxSym.FileNameOrString.getValueOr(""); + StringRef FileName = AuxSym.FileNameOrString.value_or(""); if (nameShouldBeInStringTable(FileName)) { W.write(0); W.write(StrTblBuilder.getOffset(FileName)); @@ -579,7 +579,7 @@ void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::FileAuxEnt &AuxSym) { writeName(FileName, W); } W.OS.write_zeros(XCOFF::FileNamePadSize); - W.write(AuxSym.FileStringType.getValueOr(XCOFF::XFT_FN)); + W.write(AuxSym.FileStringType.value_or(XCOFF::XFT_FN)); if (Is64Bit) { W.OS.write_zeros(2); W.write(XCOFF::AUX_FILE); @@ -590,36 +590,36 @@ void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::FileAuxEnt &AuxSym) { void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::BlockAuxEnt &AuxSym) { if (Is64Bit) { - W.write(AuxSym.LineNum.getValueOr(0)); + W.write(AuxSym.LineNum.value_or(0)); W.OS.write_zeros(13); W.write(XCOFF::AUX_SYM); } else { W.OS.write_zeros(2); - W.write(AuxSym.LineNumHi.getValueOr(0)); - W.write(AuxSym.LineNumLo.getValueOr(0)); + W.write(AuxSym.LineNumHi.value_or(0)); + W.write(AuxSym.LineNumLo.value_or(0)); W.OS.write_zeros(12); } } void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::SectAuxEntForDWARF &AuxSym) { if (Is64Bit) { - W.write(AuxSym.LengthOfSectionPortion.getValueOr(0)); - W.write(AuxSym.NumberOfRelocEnt.getValueOr(0)); + W.write(AuxSym.LengthOfSectionPortion.value_or(0)); + W.write(AuxSym.NumberOfRelocEnt.value_or(0)); W.write(0); W.write(XCOFF::AUX_SECT); } else { - W.write(AuxSym.LengthOfSectionPortion.getValueOr(0)); + W.write(AuxSym.LengthOfSectionPortion.value_or(0)); W.OS.write_zeros(4); - W.write(AuxSym.NumberOfRelocEnt.getValueOr(0)); + W.write(AuxSym.NumberOfRelocEnt.value_or(0)); W.OS.write_zeros(6); } } void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::SectAuxEntForStat &AuxSym) { assert(!Is64Bit && "can't write the stat auxiliary symbol for XCOFF64"); - W.write(AuxSym.SectionLength.getValueOr(0)); - W.write(AuxSym.NumberOfRelocEnt.getValueOr(0)); - W.write(AuxSym.NumberOfLineNum.getValueOr(0)); + W.write(AuxSym.SectionLength.value_or(0)); + W.write(AuxSym.NumberOfRelocEnt.value_or(0)); + W.write(AuxSym.NumberOfLineNum.value_or(0)); W.OS.write_zeros(10); } @@ -686,7 +686,7 @@ bool XCOFFWriter::writeSymbols() { W.write(YamlSym.Type); W.write(YamlSym.StorageClass); - uint8_t NumOfAuxSym = YamlSym.NumberOfAuxEntries.getValueOr(0); + uint8_t NumOfAuxSym = YamlSym.NumberOfAuxEntries.value_or(0); W.write(NumOfAuxSym); if (!NumOfAuxSym && !YamlSym.AuxEntries.size()) diff --git a/llvm/lib/ObjectYAML/yaml2obj.cpp b/llvm/lib/ObjectYAML/yaml2obj.cpp index d19fa0a52530..06050e246fbf 100644 --- a/llvm/lib/ObjectYAML/yaml2obj.cpp +++ b/llvm/lib/ObjectYAML/yaml2obj.cpp @@ -42,10 +42,14 @@ bool convertYAML(yaml::Input &YIn, raw_ostream &Out, ErrorHandler ErrHandler, return yaml2macho(Doc, Out, ErrHandler); if (Doc.Minidump) return yaml2minidump(*Doc.Minidump, Out, ErrHandler); + if (Doc.Offload) + return yaml2offload(*Doc.Offload, Out, ErrHandler); if (Doc.Wasm) return yaml2wasm(*Doc.Wasm, Out, ErrHandler); if (Doc.Xcoff) return yaml2xcoff(*Doc.Xcoff, Out, ErrHandler); + if (Doc.DXContainer) + return yaml2dxcontainer(*Doc.DXContainer, Out, ErrHandler); ErrHandler("unknown document type"); return false; diff --git a/llvm/lib/Option/ArgList.cpp b/llvm/lib/Option/ArgList.cpp index ad7be5fbec19..fab0fb07cbc8 100644 --- a/llvm/lib/Option/ArgList.cpp +++ b/llvm/lib/Option/ArgList.cpp @@ -95,6 +95,13 @@ std::vector ArgList::getAllArgValues(OptSpecifier Id) const { return std::vector(Values.begin(), Values.end()); } +void ArgList::addOptInFlag(ArgStringList &Output, OptSpecifier Pos, + OptSpecifier Neg) const { + if (Arg *A = getLastArg(Pos, Neg)) + if (A->getOption().matches(Pos)) + A->render(*this, Output); +} + void ArgList::AddAllArgsExcept(ArgStringList &Output, ArrayRef Ids, ArrayRef ExcludeIds) const { diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 015ca1eec4df..42fde3752724 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallPrinter.h" #include "llvm/Analysis/CostModel.h" #include "llvm/Analysis/CycleAnalysis.h" #include "llvm/Analysis/DDG.h" @@ -185,7 +186,7 @@ #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" #include "llvm/Transforms/Scalar/LoopVersioningLICM.h" -#include "llvm/Transforms/Scalar/LowerAtomic.h" +#include "llvm/Transforms/Scalar/LowerAtomicPass.h" #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h" @@ -212,6 +213,7 @@ #include "llvm/Transforms/Scalar/SpeculativeExecution.h" #include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h" #include "llvm/Transforms/Scalar/StructurizeCFG.h" +#include "llvm/Transforms/Scalar/TLSVariableHoist.h" #include "llvm/Transforms/Scalar/TailRecursionElimination.h" #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" @@ -229,11 +231,13 @@ #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Utils/LowerGlobalDtors.h" #include "llvm/Transforms/Utils/LowerInvoke.h" #include "llvm/Transforms/Utils/LowerSwitch.h" #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/MetaRenamer.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" +#include "llvm/Transforms/Utils/PredicateInfo.h" #include "llvm/Transforms/Utils/RelLookupTableConverter.h" #include "llvm/Transforms/Utils/StripGCRelocates.h" #include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h" @@ -371,6 +375,17 @@ bool shouldPopulateClassToPassNames() { !printAfterPasses().empty(); } +// A pass for testing -print-on-crash. +// DO NOT USE THIS EXCEPT FOR TESTING! +class TriggerCrashPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &, ModuleAnalysisManager &) { + abort(); + return PreservedAnalyses::all(); + } + static StringRef name() { return "TriggerCrashPass"; } +}; + } // namespace PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO, @@ -585,6 +600,10 @@ Expected parseInlinerPassOptions(StringRef Params) { return parseSinglePassOption(Params, "only-mandatory", "InlinerPass"); } +Expected parseCoroSplitPassOptions(StringRef Params) { + return parseSinglePassOption(Params, "reuse-storage", "CoroSplitPass"); +} + Expected parseEarlyCSEPassOptions(StringRef Params) { return parseSinglePassOption(Params, "memssa", "EarlyCSE"); } @@ -679,6 +698,8 @@ Expected parseSimplifyCFGOptions(StringRef Params) { bool Enable = !ParamName.consume_front("no-"); if (ParamName == "forward-switch-cond") { Result.forwardSwitchCondToPhi(Enable); + } else if (ParamName == "switch-range-to-icmp") { + Result.convertSwitchRangeToICmp(Enable); } else if (ParamName == "switch-to-lookup") { Result.convertSwitchToLookupTable(Enable); } else if (ParamName == "keep-loops") { @@ -747,6 +768,24 @@ Expected> parseLoopUnswitchOptions(StringRef Params) { return Result; } +Expected parseLICMOptions(StringRef Params) { + LICMOptions Result; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + bool Enable = !ParamName.consume_front("no-"); + if (ParamName == "allowspeculation") { + Result.AllowSpeculation = Enable; + } else { + return make_error( + formatv("invalid LICM pass parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + } + } + return Result; +} + Expected parseMergedLoadStoreMotionOptions(StringRef Params) { bool Result = false; while (!Params.empty()) { diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 93637c890c4f..a5345172aae1 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -32,6 +32,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/Coroutines/CoroCleanup.h" +#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h" #include "llvm/Transforms/Coroutines/CoroEarly.h" #include "llvm/Transforms/Coroutines/CoroElide.h" #include "llvm/Transforms/Coroutines/CoroSplit.h" @@ -140,7 +141,7 @@ static cl::opt UseInlineAdvisor( "Use release mode (AOT-compiled model)."))); static cl::opt EnableSyntheticCounts( - "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore, + "enable-npm-synthetic-counts", cl::Hidden, cl::desc("Run synthetic function entry count generation " "pass")); @@ -150,8 +151,7 @@ static cl::opt cl::Hidden, cl::desc("Enable inline deferral during PGO")); -static cl::opt EnableMemProfiler("enable-mem-prof", cl::init(false), - cl::Hidden, cl::ZeroOrMore, +static cl::opt EnableMemProfiler("enable-mem-prof", cl::Hidden, cl::desc("Enable memory profiler")); static cl::opt EnableModuleInliner("enable-module-inliner", @@ -159,13 +159,13 @@ static cl::opt EnableModuleInliner("enable-module-inliner", cl::desc("Enable module inliner")); static cl::opt PerformMandatoryInliningsFirst( - "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore, + "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining.")); static cl::opt EnableO3NonTrivialUnswitching( "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden, - cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3")); + cl::desc("Enable non-trivial loop unswitching for -O3")); static cl::opt EnableEagerlyInvalidateAnalyses( "eagerly-invalidate-analyses", cl::init(true), cl::Hidden, @@ -233,9 +233,7 @@ void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM, // Helper to add AnnotationRemarksPass. static void addAnnotationRemarksPass(ModulePassManager &MPM) { - FunctionPassManager FPM; - FPM.addPass(AnnotationRemarksPass()); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass())); } // Helper to check if the current compilation phase is preparing for LTO @@ -259,14 +257,16 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); // Hoisting of scalars and load expressions. - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); FPM.addPass(LibCallsShrinkWrapPass()); invokePeepholeEPCallbacks(FPM, Level); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); // Form canonically associated expression trees, and simplify the trees using // basic mathematical properties. For example, this will form (nearly) @@ -291,14 +291,19 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, LPM1.addPass(LoopSimplifyCFGPass()); // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. + // to reduce amount of IR that will have to be duplicated. However, + // do not perform speculative hoisting the first time as LICM + // will destroy metadata that may not need to be destroyed if run + // after loop rotation. // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/false)); LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); LPM1.addPass(SimpleLoopUnswitchPass()); if (EnableLoopFlatten) LPM1.addPass(LoopFlattenPass()); @@ -335,7 +340,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. @@ -373,7 +379,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, // the simplifications and basic cleanup after all the simplifications. // TODO: Investigate if this is too expensive. FPM.addPass(ADCEPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); @@ -408,7 +415,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Global value numbering based sinking. if (EnableGVNSink) { FPM.addPass(GVNSinkPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); } if (EnableConstraintElimination) @@ -421,7 +429,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); if (Level == OptimizationLevel::O3) FPM.addPass(AggressiveInstCombinePass()); @@ -438,7 +447,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(PGOMemOPSizeOpt()); FPM.addPass(TailCallElimPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); // Form canonically associated expression trees, and simplify the trees using // basic mathematical properties. For example, this will form (nearly) @@ -463,15 +473,20 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, LPM1.addPass(LoopSimplifyCFGPass()); // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. + // to reduce amount of IR that will have to be duplicated. However, + // do not perform speculative hoisting the first time as LICM + // will destroy metadata that may not need to be destroyed if run + // after loop rotation. // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/false)); // Disable header duplication in loop rotation at -Oz. LPM1.addPass( LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); LPM1.addPass( SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 && EnableO3NonTrivialUnswitching)); @@ -510,7 +525,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. @@ -567,7 +583,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(DSEPass()); FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); FPM.addPass(CoroElidePass()); @@ -575,8 +592,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); - FPM.addPass(SimplifyCFGPass( - SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true))); + FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() + .convertSwitchRangeToICmp(true) + .hoistCommonInsts(true) + .sinkCommonInsts(true))); FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); @@ -596,7 +615,8 @@ void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) { void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, OptimizationLevel Level, bool RunProfileGen, bool IsCS, std::string ProfileFile, - std::string ProfileRemappingFile) { + std::string ProfileRemappingFile, + ThinOrFullLTOPhase LTOPhase) { assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!"); if (!IsCS && !DisablePreInliner) { InlineParams IP; @@ -608,13 +628,16 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, // performance testing. // FIXME: this comment is cargo culted from the old pass manager, revisit). IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325; - ModuleInlinerWrapperPass MIWP(IP); + ModuleInlinerWrapperPass MIWP( + IP, /* MandatoryFirst */ true, + InlineContext{LTOPhase, InlinePass::EarlyInliner}); CGSCCPassManager &CGPipeline = MIWP.getPM(); FunctionPassManager FPM; FPM.addPass(SROAPass()); FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. - FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks. + FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Merge & remove basic blocks. FPM.addPass(InstCombinePass()); // Combine silly sequences. invokePeepholeEPCallbacks(FPM, Level); @@ -641,13 +664,13 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, // Perform PGO instrumentation. MPM.addPass(PGOInstrumentationGen(IsCS)); - FunctionPassManager FPM; // Disable header duplication in loop rotation at -Oz. - FPM.addPass(createFunctionToLoopPassAdaptor( - LoopRotatePass(Level != OptimizationLevel::Oz), /*UseMemorySSA=*/false, - /*UseBlockFrequencyInfo=*/false)); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM), - PTO.EagerlyInvalidateAnalyses)); + MPM.addPass(createModuleToFunctionPassAdaptor( + createFunctionToLoopPassAdaptor( + LoopRotatePass(Level != OptimizationLevel::Oz), + /*UseMemorySSA=*/false, + /*UseBlockFrequencyInfo=*/false), + PTO.EagerlyInvalidateAnalyses)); // Add the profile lowering pass. InstrProfOptions Options; @@ -692,6 +715,12 @@ ModuleInlinerWrapperPass PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase) { InlineParams IP = getInlineParamsFromOptLevel(Level); + // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to + // disable hot callsite inline (as much as possible [1]) because it makes + // profile annotation in the backend inaccurate. + // + // [1] Note the cost of a function could be below zero due to erased + // prologue / epilogue. if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) IP.HotCallSiteThreshold = 0; @@ -699,8 +728,10 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level, if (PGOOpt) IP.EnableDeferral = EnablePGOInlineDeferral; - ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst, - UseInlineAdvisor, MaxDevirtIterations); + ModuleInlinerWrapperPass MIWP( + IP, PerformMandatoryInliningsFirst, + InlineContext{Phase, InlinePass::CGSCCInliner}, + UseInlineAdvisor, MaxDevirtIterations); // Require the GlobalsAA analysis for the module so we can query it within // the CGSCC pipeline. @@ -765,6 +796,12 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level, ModulePassManager MPM; InlineParams IP = getInlineParamsFromOptLevel(Level); + // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to + // disable hot callsite inline (as much as possible [1]) because it makes + // profile annotation in the backend inaccurate. + // + // [1] Note the cost of a function could be below zero due to erased + // prologue / epilogue. if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) IP.HotCallSiteThreshold = 0; @@ -780,7 +817,7 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level, // inline deferral logic in module inliner. IP.EnableDeferral = false; - MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor)); + MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase)); MPM.addPass(createModuleToFunctionPassAdaptor( buildFunctionSimplificationPipeline(Level, Phase), @@ -832,6 +869,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // Do basic inference of function attributes from known properties of system // libraries and other oracles. MPM.addPass(InferFunctionAttrsPass()); + MPM.addPass(CoroEarlyPass()); // Create an early function pass manager to cleanup the output of the // frontend. @@ -842,7 +880,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, EarlyFPM.addPass(SimplifyCFGPass()); EarlyFPM.addPass(SROAPass()); EarlyFPM.addPass(EarlyCSEPass()); - EarlyFPM.addPass(CoroEarlyPass()); if (Level == OptimizationLevel::O3) EarlyFPM.addPass(CallSiteSplittingPass()); @@ -928,7 +965,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, GlobalCleanupPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(GlobalCleanupPM, Level); - GlobalCleanupPM.addPass(SimplifyCFGPass()); + GlobalCleanupPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM), PTO.EagerlyInvalidateAnalyses)); @@ -939,7 +977,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, addPGOInstrPasses(MPM, Level, /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr, /* IsCS */ false, PGOOpt->ProfileFile, - PGOOpt->ProfileRemappingFile); + PGOOpt->ProfileRemappingFile, Phase); MPM.addPass(PGOIndirectCallPromotion(false, false)); } if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && @@ -955,6 +993,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, else MPM.addPass(buildInlinerPipeline(Level, Phase)); + MPM.addPass(CoroCleanupPass()); + if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) { MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); MPM.addPass(ModuleMemProfilerPass()); @@ -1007,7 +1047,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, ExtraPasses.addPass(CorrelatedValuePropagationPass()); ExtraPasses.addPass(InstCombinePass()); LoopPassManager LPM; - LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); ExtraPasses.addPass( @@ -1015,7 +1056,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, ExtraPasses.addPass( createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); - ExtraPasses.addPass(SimplifyCFGPass()); + ExtraPasses.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); ExtraPasses.addPass(InstCombinePass()); FPM.addPass(std::move(ExtraPasses)); } @@ -1031,6 +1073,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, // before SLP vectorization. FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) + .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) @@ -1073,7 +1116,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, FPM.addPass( RequireAnalysisPass()); FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); } @@ -1087,7 +1131,9 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, ModulePassManager PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, - bool LTOPreLink) { + ThinOrFullLTOPhase LTOPhase) { + const bool LTOPreLink = (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink || + LTOPhase == ThinOrFullLTOPhase::FullLTOPreLink); ModulePassManager MPM; // Optimize globals now that the module is fully simplified. @@ -1127,21 +1173,24 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, if (PGOOpt->CSAction == PGOOptions::CSIRInstr) addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ true, PGOOpt->CSProfileGenFile, - PGOOpt->ProfileRemappingFile); + PGOOpt->ProfileRemappingFile, LTOPhase); else if (PGOOpt->CSAction == PGOOptions::CSIRUse) addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false, /* IsCS */ true, PGOOpt->ProfileFile, - PGOOpt->ProfileRemappingFile); + PGOOpt->ProfileRemappingFile, LTOPhase); } - // Re-require GloblasAA here prior to function passes. This is particularly + // Re-compute GlobalsAA here prior to function passes. This is particularly // useful as the above will have inlined, DCE'ed, and function-attr // propagated everything. We should at this point have a reasonably minimal // and richly annotated call graph. By computing aliasing and mod/ref // information for all local globals here, the late loop passes and notably // the vectorizer will be able to use them to help recognize vectorizable // memory operations. - MPM.addPass(RequireAnalysisPass()); + MPM.addPass(RecomputeGlobalsAAPass()); + + for (auto &C : OptimizerEarlyEPCallbacks) + C(MPM, Level); FunctionPassManager OptimizePM; OptimizePM.addPass(Float2IntPass()); @@ -1202,9 +1251,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // LoopSink (and other loop passes since the last simplifyCFG) might have // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. - OptimizePM.addPass(SimplifyCFGPass()); - - OptimizePM.addPass(CoroCleanupPass()); + OptimizePM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); // Add the core optimizing pipeline. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM), @@ -1230,9 +1278,6 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, if (PTO.MergeFunctions) MPM.addPass(MergeFunctionsPass()); - if (PTO.CallGraphProfile) - MPM.addPass(CGProfilePass()); - // Now we need to do some global optimization transforms. // FIXME: It would seem like these should come first in the optimization // pipeline and maybe be the bottom of the canonicalization pipeline? Weird @@ -1240,6 +1285,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, MPM.addPass(GlobalDCEPass()); MPM.addPass(ConstantMergePass()); + if (PTO.CallGraphProfile && !LTOPreLink) + MPM.addPass(CGProfilePass()); + // TODO: Relative look table converter pass caused an issue when full lto is // enabled. See https://reviews.llvm.org/D94355 for more details. // Until the issue fixed, disable this pass during pre-linking phase. @@ -1270,13 +1318,14 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, if (PGOOpt && PGOOpt->DebugInfoForProfiling) MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); + const ThinOrFullLTOPhase LTOPhase = LTOPreLink + ? ThinOrFullLTOPhase::FullLTOPreLink + : ThinOrFullLTOPhase::None; // Add the core simplification pipeline. - MPM.addPass(buildModuleSimplificationPipeline( - Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink - : ThinOrFullLTOPhase::None)); + MPM.addPass(buildModuleSimplificationPipeline(Level, LTOPhase)); // Now add the optimization pipeline. - MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink)); + MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPhase)); if (PGOOpt && PGOOpt->PseudoProbeForProfiling && PGOOpt->Action == PGOOptions::SampleUse) @@ -1330,11 +1379,6 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { // Reduce the size of the IR as much as possible. MPM.addPass(GlobalOptPass()); - // Module simplification splits coroutines, but does not fully clean up - // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up - // on these, we schedule the cleanup here. - MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); - if (PGOOpt && PGOOpt->PseudoProbeForProfiling && PGOOpt->Action == PGOOptions::SampleUse) MPM.addPass(PseudoProbeUpdatePass()); @@ -1400,7 +1444,8 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline( Level, ThinOrFullLTOPhase::ThinLTOPostLink)); // Now add the optimization pipeline. - MPM.addPass(buildModuleOptimizationPipeline(Level)); + MPM.addPass(buildModuleOptimizationPipeline( + Level, ThinOrFullLTOPhase::ThinLTOPostLink)); // Emit annotation remarks. addAnnotationRemarksPass(MPM); @@ -1425,6 +1470,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // Convert @llvm.global.annotations to !annotation metadata. MPM.addPass(Annotation2MetadataPass()); + for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks) + C(MPM, Level); + // Create a function that performs CFI checks for cross-DSO calls with targets // in the current module. MPM.addPass(CrossDSOCFIPass()); @@ -1438,6 +1486,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // in ICP. MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); + for (auto &C : FullLinkTimeOptimizationLastEPCallbacks) + C(MPM, Level); + // Emit annotation remarks. addAnnotationRemarksPass(MPM); @@ -1469,10 +1520,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(InferFunctionAttrsPass()); if (Level.getSpeedupLevel() > 1) { - FunctionPassManager EarlyFPM; - EarlyFPM.addPass(CallSiteSplittingPass()); MPM.addPass(createModuleToFunctionPassAdaptor( - std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses)); + CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses)); // Indirect call promotion. This should promote all the targets that are // left by the earlier promotion pass that promotes intra-module targets. @@ -1519,6 +1568,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // pipeline). MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); + for (auto &C : FullLinkTimeOptimizationLastEPCallbacks) + C(MPM, Level); + // Emit annotation remarks. addAnnotationRemarksPass(MPM); @@ -1556,7 +1608,11 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // valuable as the inliner doesn't currently care whether it is inlining an // invoke or a call. // Run the inliner now. - MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level))); + MPM.addPass(ModuleInlinerWrapperPass( + getInlineParamsFromOptLevel(Level), + /* MandatoryFirst */ true, + InlineContext{ThinOrFullLTOPhase::FullLTOPostLink, + InlinePass::CGSCCInliner})); // Optimize globals again after we ran the inliner. MPM.addPass(GlobalOptPass()); @@ -1573,7 +1629,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); - FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); + FPM.addPass(JumpThreadingPass()); // Do a post inline PGO instrumentation and use pass. This is a context // sensitive PGO pass. @@ -1581,11 +1637,13 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, if (PGOOpt->CSAction == PGOOptions::CSIRInstr) addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ true, PGOOpt->CSProfileGenFile, - PGOOpt->ProfileRemappingFile); + PGOOpt->ProfileRemappingFile, + ThinOrFullLTOPhase::FullLTOPostLink); else if (PGOOpt->CSAction == PGOOptions::CSIRUse) addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false, /* IsCS */ true, PGOOpt->ProfileFile, - PGOOpt->ProfileRemappingFile); + PGOOpt->ProfileRemappingFile, + ThinOrFullLTOPhase::FullLTOPostLink); } // Break up allocas @@ -1612,7 +1670,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, FunctionPassManager MainFPM; MainFPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true), /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); if (RunNewGVN) @@ -1656,7 +1715,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, createModuleToPostOrderCGSCCPassAdaptor(OpenMPOptCGSCCPass())); invokePeepholeEPCallbacks(MainFPM, Level); - MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); + MainFPM.addPass(JumpThreadingPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM), PTO.EagerlyInvalidateAnalyses)); @@ -1676,8 +1735,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // Add late LTO optimization passes. // Delete basic blocks, which optimization passes may have killed. - MPM.addPass(createModuleToFunctionPassAdaptor( - SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)))); + MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts( + true)))); // Drop bodies of available eternally objects to improve GlobalDCE. MPM.addPass(EliminateAvailableExternallyPass()); @@ -1688,6 +1748,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, if (PTO.MergeFunctions) MPM.addPass(MergeFunctionsPass()); + if (PTO.CallGraphProfile) + MPM.addPass(CGProfilePass()); + + for (auto &C : FullLinkTimeOptimizationLastEPCallbacks) + C(MPM, Level); + // Emit annotation remarks. addAnnotationRemarksPass(MPM); @@ -1770,6 +1836,10 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, if (!FPM.isEmpty()) MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } + + for (auto &C : OptimizerEarlyEPCallbacks) + C(MPM, Level); + if (!VectorizerStartEPCallbacks.empty()) { FunctionPassManager FPM; for (auto &C : VectorizerStartEPCallbacks) @@ -1778,11 +1848,14 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } - MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass())); + ModulePassManager CoroPM; + CoroPM.addPass(CoroEarlyPass()); CGSCCPassManager CGPM; CGPM.addPass(CoroSplitPass()); - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); - MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); + CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); + CoroPM.addPass(CoroCleanupPass()); + CoroPM.addPass(GlobalDCEPass()); + MPM.addPass(CoroConditionalWrapper(std::move(CoroPM))); for (auto &C : OptimizerLastEPCallbacks) C(MPM, Level); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 8e0af11b854d..7c29bffbc327 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -26,7 +26,6 @@ MODULE_ANALYSIS("profile-summary", ProfileSummaryAnalysis()) MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis()) MODULE_ANALYSIS("verify", VerifierAnalysis()) MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) -MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis()) MODULE_ANALYSIS("inline-advisor", InlineAdvisorAnalysis()) MODULE_ANALYSIS("ir-similarity", IRSimilarityAnalysis()) @@ -50,9 +49,12 @@ MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass()) MODULE_PASS("cg-profile", CGProfilePass()) MODULE_PASS("check-debugify", NewPMCheckDebugifyPass()) MODULE_PASS("constmerge", ConstantMergePass()) +MODULE_PASS("coro-early", CoroEarlyPass()) +MODULE_PASS("coro-cleanup", CoroCleanupPass()) MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass()) MODULE_PASS("deadargelim", DeadArgumentEliminationPass()) MODULE_PASS("debugify", NewPMDebugifyPass()) +MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass()) MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass()) MODULE_PASS("extract-blocks", BlockExtractorPass()) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) @@ -64,6 +66,7 @@ MODULE_PASS("globalsplit", GlobalSplitPass()) MODULE_PASS("hotcoldsplit", HotColdSplittingPass()) MODULE_PASS("inferattrs", InferFunctionAttrsPass()) MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass()) +MODULE_PASS("inliner-ml-advisor-release", ModuleInlinerWrapperPass(getInlineParams(), true, {}, InliningAdvisorMode::Release, 0)) MODULE_PASS("print", InlineAdvisorAnalysisPrinterPass(dbgs())) MODULE_PASS("inliner-wrapper-no-mandatory-first", ModuleInlinerWrapperPass( getInlineParams(), @@ -76,6 +79,7 @@ MODULE_PASS("invalidate", InvalidateAllAnalysesPass()) MODULE_PASS("ipsccp", IPSCCPPass()) MODULE_PASS("iroutliner", IROutlinerPass()) MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs())) +MODULE_PASS("lower-global-dtors", LowerGlobalDtorsPass()) MODULE_PASS("lowertypetests", LowerTypeTestsPass()) MODULE_PASS("metarenamer", MetaRenamerPass()) MODULE_PASS("mergefunc", MergeFunctionsPass()) @@ -94,6 +98,7 @@ MODULE_PASS("print-lcg-dot", LazyCallGraphDOTPrinterPass(dbgs())) MODULE_PASS("print-must-be-executed-contexts", MustBeExecutedContextPrinterPass(dbgs())) MODULE_PASS("print-stack-safety", StackSafetyGlobalPrinterPass(dbgs())) MODULE_PASS("print", ModuleDebugInfoPrinterPass(dbgs())) +MODULE_PASS("recompute-globalsaa", RecomputeGlobalsAAPass()) MODULE_PASS("rel-lookup-table-converter", RelLookupTableConverterPass()) MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC()) MODULE_PASS("rewrite-symbols", RewriteSymbolPass()) @@ -109,7 +114,9 @@ MODULE_PASS("strip-debug-declare", StripDebugDeclarePass()) MODULE_PASS("strip-nondebug", StripNonDebugSymbolsPass()) MODULE_PASS("strip-nonlinetable-debuginfo", StripNonLineTableDebugInfoPass()) MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) +MODULE_PASS("trigger-crash", TriggerCrashPass()) MODULE_PASS("verify", VerifierPass()) +MODULE_PASS("view-callgraph", CallGraphViewerPass()) MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass()) MODULE_PASS("dfsan", DataFlowSanitizerPass()) MODULE_PASS("msan-module", ModuleMemorySanitizerPass({})) @@ -165,7 +172,6 @@ CGSCC_PASS("invalidate", InvalidateAllAnalysesPass()) CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass()) CGSCC_PASS("attributor-cgscc", AttributorCGSCCPass()) CGSCC_PASS("openmp-opt-cgscc", OpenMPOptCGSCCPass()) -CGSCC_PASS("coro-split", CoroSplitPass()) CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass()) #undef CGSCC_PASS @@ -179,6 +185,13 @@ CGSCC_PASS_WITH_PARAMS("inline", }, parseInlinerPassOptions, "only-mandatory") +CGSCC_PASS_WITH_PARAMS("coro-split", + "CoroSplitPass", + [](bool OptimizeFrame) { + return CoroSplitPass(OptimizeFrame); + }, + parseCoroSplitPassOptions, + "reuse-storage") #undef CGSCC_PASS_WITH_PARAMS #ifndef FUNCTION_ANALYSIS @@ -247,9 +260,7 @@ FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass()) FUNCTION_PASS("consthoist", ConstantHoistingPass()) FUNCTION_PASS("constraint-elimination", ConstraintEliminationPass()) FUNCTION_PASS("chr", ControlHeightReductionPass()) -FUNCTION_PASS("coro-early", CoroEarlyPass()) FUNCTION_PASS("coro-elide", CoroElidePass()) -FUNCTION_PASS("coro-cleanup", CoroCleanupPass()) FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass()) FUNCTION_PASS("dce", DCEPass()) FUNCTION_PASS("dfa-jump-threading", DFAJumpThreadingPass()) @@ -257,8 +268,14 @@ FUNCTION_PASS("div-rem-pairs", DivRemPairsPass()) FUNCTION_PASS("dse", DSEPass()) FUNCTION_PASS("dot-cfg", CFGPrinterPass()) FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass()) -FUNCTION_PASS("dot-dom", DomTreePrinterPass()) -FUNCTION_PASS("dot-dom-only", DomTreeOnlyPrinterPass()) +FUNCTION_PASS("dot-dom", DomPrinter()) +FUNCTION_PASS("dot-dom-only", DomOnlyPrinter()) +FUNCTION_PASS("dot-post-dom", PostDomPrinter()) +FUNCTION_PASS("dot-post-dom-only", PostDomOnlyPrinter()) +FUNCTION_PASS("view-dom", DomViewer()) +FUNCTION_PASS("view-dom-only", DomOnlyViewer()) +FUNCTION_PASS("view-post-dom", PostDomViewer()) +FUNCTION_PASS("view-post-dom-only", PostDomOnlyViewer()) FUNCTION_PASS("fix-irreducible", FixIrreduciblePass()) FUNCTION_PASS("flattencfg", FlattenCFGPass()) FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass()) @@ -361,6 +378,7 @@ FUNCTION_PASS("verify", SafepointIRVerifierPass()) FUNCTION_PASS("verify", ScalarEvolutionVerifierPass()) FUNCTION_PASS("view-cfg", CFGViewerPass()) FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass()) +FUNCTION_PASS("tlshoist", TLSVariableHoistPass()) FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) FUNCTION_PASS("tsan", ThreadSanitizerPass()) FUNCTION_PASS("memprof", MemProfilerPass()) @@ -402,13 +420,6 @@ FUNCTION_PASS_WITH_PARAMS("loop-unroll", "no-profile-peeling;profile-peeling;" "no-runtime;runtime;" "no-upperbound;upperbound") -FUNCTION_PASS_WITH_PARAMS("asan", - "AddressSanitizerPass", - [](AddressSanitizerOptions Opts) { - return AddressSanitizerPass(Opts); - }, - parseASanPassOptions, - "kernel") FUNCTION_PASS_WITH_PARAMS("msan", "MemorySanitizerPass", [](MemorySanitizerOptions Opts) { @@ -423,6 +434,7 @@ FUNCTION_PASS_WITH_PARAMS("simplifycfg", }, parseSimplifyCFGOptions, "no-forward-switch-cond;forward-switch-cond;" + "no-switch-range-to-icmp;switch-range-to-icmp;" "no-switch-to-lookup;switch-to-lookup;" "no-keep-loops;keep-loops;" "no-hoist-common-insts;hoist-common-insts;" @@ -466,7 +478,6 @@ FUNCTION_PASS_WITH_PARAMS("print", #ifndef LOOPNEST_PASS #define LOOPNEST_PASS(NAME, CREATE_PASS) #endif -LOOPNEST_PASS("lnicm", LNICMPass()) LOOPNEST_PASS("loop-flatten", LoopFlattenPass()) LOOPNEST_PASS("loop-interchange", LoopInterchangePass()) LOOPNEST_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass()) @@ -489,7 +500,6 @@ LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass()) LOOP_PASS("dot-ddg", DDGDotPrinterPass()) LOOP_PASS("invalidate", InvalidateAllAnalysesPass()) -LOOP_PASS("licm", LICMPass()) LOOP_PASS("loop-idiom", LoopIdiomRecognizePass()) LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass()) LOOP_PASS("loop-rotate", LoopRotatePass()) @@ -522,4 +532,18 @@ LOOP_PASS_WITH_PARAMS("simple-loop-unswitch", }, parseLoopUnswitchOptions, "nontrivial;no-nontrivial;trivial;no-trivial") + +LOOP_PASS_WITH_PARAMS("licm", "LICMPass", + [](LICMOptions Params) { + return LICMPass(Params); + }, + parseLICMOptions, + "allowspeculation"); + +LOOP_PASS_WITH_PARAMS("lnicm", "LNICMPass", + [](LICMOptions Params) { + return LNICMPass(Params); + }, + parseLICMOptions, + "allowspeculation"); #undef LOOP_PASS_WITH_PARAMS diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index c42b1cb26f13..ab9f8bf9c957 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" @@ -27,12 +28,14 @@ #include "llvm/IR/PrintPasses.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/CrashRecoveryContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Program.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/Signals.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -164,6 +167,12 @@ static cl::opt DotCfgDir( cl::desc("Generate dot files into specified directory for changed IRs"), cl::Hidden, cl::init("./")); +// An option to print the IR that was being processed when a pass crashes. +static cl::opt + PrintCrashIR("print-on-crash", + cl::desc("Print the last form of the IR before crash"), + cl::init(false), cl::Hidden); + namespace { // Perform a system based diff between \p Before and \p After, using @@ -439,19 +448,11 @@ const Module *getModuleForComparison(Any IR) { return nullptr; } -} // namespace - -template ChangeReporter::~ChangeReporter() { - assert(BeforeStack.empty() && "Problem with Change Printer stack."); -} - -template -bool ChangeReporter::isInterestingFunction(const Function &F) { +bool isInterestingFunction(const Function &F) { return isFunctionInPrintList(F.getName()); } -template -bool ChangeReporter::isInterestingPass(StringRef PassID) { +bool isInterestingPass(StringRef PassID) { if (isIgnored(PassID)) return false; @@ -462,8 +463,7 @@ bool ChangeReporter::isInterestingPass(StringRef PassID) { // Return true when this is a pass on IR for which printing // of changes is desired. -template -bool ChangeReporter::isInteresting(Any IR, StringRef PassID) { +bool isInteresting(Any IR, StringRef PassID) { if (!isInterestingPass(PassID)) return false; if (any_isa(IR)) @@ -471,6 +471,12 @@ bool ChangeReporter::isInteresting(Any IR, StringRef PassID) { return true; } +} // namespace + +template ChangeReporter::~ChangeReporter() { + assert(BeforeStack.empty() && "Problem with Change Printer stack."); +} + template void ChangeReporter::saveIRBeforePass(Any IR, StringRef PassID) { // Always need to place something on the stack because invalidated passes @@ -587,7 +593,7 @@ void TextChangeReporter::handleIgnored(StringRef PassID, std::string &Name) { Out << formatv("*** IR Pass {0} on {1} ignored ***\n", PassID, Name); } -IRChangedPrinter::~IRChangedPrinter() {} +IRChangedPrinter::~IRChangedPrinter() = default; void IRChangedPrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) { if (PrintChanged == ChangePrinter::PrintChangedVerbose || @@ -1186,7 +1192,7 @@ void VerifyInstrumentation::registerCallbacks( if (DebugLogging) dbgs() << "Verifying function " << F->getName() << "\n"; - if (verifyFunction(*F)) + if (verifyFunction(*F, &errs())) report_fatal_error("Broken function found, compilation aborted!"); } else if (any_isa(IR) || any_isa(IR)) { @@ -1201,13 +1207,13 @@ void VerifyInstrumentation::registerCallbacks( if (DebugLogging) dbgs() << "Verifying module " << M->getName() << "\n"; - if (verifyModule(*M)) + if (verifyModule(*M, &errs())) report_fatal_error("Broken module found, compilation aborted!"); } }); } -InLineChangePrinter::~InLineChangePrinter() {} +InLineChangePrinter::~InLineChangePrinter() = default; void InLineChangePrinter::generateIRRepresentation(Any IR, StringRef PassID, IRDataT &D) { @@ -2117,6 +2123,51 @@ StandardInstrumentations::StandardInstrumentations( ChangePrinter::PrintChangedDotCfgVerbose), Verify(DebugLogging), VerifyEach(VerifyEach) {} +PrintCrashIRInstrumentation *PrintCrashIRInstrumentation::CrashReporter = + nullptr; + +void PrintCrashIRInstrumentation::reportCrashIR() { dbgs() << SavedIR; } + +void PrintCrashIRInstrumentation::SignalHandler(void *) { + // Called by signal handlers so do not lock here + // Is the PrintCrashIRInstrumentation still alive? + if (!CrashReporter) + return; + + assert(PrintCrashIR && "Did not expect to get here without option set."); + CrashReporter->reportCrashIR(); +} + +PrintCrashIRInstrumentation::~PrintCrashIRInstrumentation() { + if (!CrashReporter) + return; + + assert(PrintCrashIR && "Did not expect to get here without option set."); + CrashReporter = nullptr; +} + +void PrintCrashIRInstrumentation::registerCallbacks( + PassInstrumentationCallbacks &PIC) { + if (!PrintCrashIR || CrashReporter) + return; + + sys::AddSignalHandler(SignalHandler, nullptr); + CrashReporter = this; + + PIC.registerBeforeNonSkippedPassCallback([this](StringRef PassID, Any IR) { + SavedIR.clear(); + raw_string_ostream OS(SavedIR); + OS << formatv("*** Dump of {0}IR Before Last Pass {1}", + llvm::forcePrintModuleIR() ? "Module " : "", PassID); + if (!isInteresting(IR, PassID)) { + OS << " Filtered Out ***\n"; + return; + } + OS << " Started ***\n"; + unwrapAndPrint(OS, IR); + }); +} + void StandardInstrumentations::registerCallbacks( PassInstrumentationCallbacks &PIC, FunctionAnalysisManager *FAM) { PrintIR.registerCallbacks(PIC); @@ -2132,6 +2183,7 @@ void StandardInstrumentations::registerCallbacks( Verify.registerCallbacks(PIC); PrintChangedDiff.registerCallbacks(PIC); WebsiteChangeReporter.registerCallbacks(PIC); + PrintCrashIR.registerCallbacks(PIC); } template class ChangeReporter; diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index 94c2bee3590c..f9e58fd6afa5 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -123,13 +123,15 @@ Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) { return C; } -Counter CounterExpressionBuilder::add(Counter LHS, Counter RHS) { - return simplify(get(CounterExpression(CounterExpression::Add, LHS, RHS))); +Counter CounterExpressionBuilder::add(Counter LHS, Counter RHS, bool Simplify) { + auto Cnt = get(CounterExpression(CounterExpression::Add, LHS, RHS)); + return Simplify ? simplify(Cnt) : Cnt; } -Counter CounterExpressionBuilder::subtract(Counter LHS, Counter RHS) { - return simplify( - get(CounterExpression(CounterExpression::Subtract, LHS, RHS))); +Counter CounterExpressionBuilder::subtract(Counter LHS, Counter RHS, + bool Simplify) { + auto Cnt = get(CounterExpression(CounterExpression::Subtract, LHS, RHS)); + return Simplify ? simplify(Cnt) : Cnt; } void CounterMappingContext::dump(const Counter &C, raw_ostream &OS) const { diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp index c6691e321b3c..1a187795a8a0 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/Object/Archive.h" #include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" #include "llvm/Object/Error.h" @@ -174,7 +175,8 @@ Error RawCoverageFilenamesReader::readUncompressed(CovMapVersion Version, else P.assign(CWD); llvm::sys::path::append(P, Filename); - Filenames.push_back(static_cast(P)); + sys::path::remove_dots(P, /*remove_dot_dot=*/true); + Filenames.push_back(static_cast(P.str())); } } } diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp index ceb2d7dcb5b9..781a2901dbb9 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp @@ -49,12 +49,8 @@ void CoverageFilenamesSectionWriter::write(raw_ostream &OS, bool Compress) { SmallString<128> CompressedStr; bool doCompression = Compress && zlib::isAvailable() && DoInstrProfNameCompression; - if (doCompression) { - auto E = - zlib::compress(FilenamesStr, CompressedStr, zlib::BestSizeCompression); - if (E) - report_bad_alloc_error("Failed to zlib compress coverage data"); - } + if (doCompression) + zlib::compress(FilenamesStr, CompressedStr, zlib::BestSizeCompression); // ::= // diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp index 72d1addab01e..feacf40b8d0a 100644 --- a/llvm/lib/ProfileData/GCOV.cpp +++ b/llvm/lib/ProfileData/GCOV.cpp @@ -13,6 +13,7 @@ #include "llvm/ProfileData/GCOV.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Config/llvm-config.h" #include "llvm/Demangle/Demangle.h" #include "llvm/Support/Debug.h" @@ -23,7 +24,6 @@ #include "llvm/Support/raw_ostream.h" #include #include -#include using namespace llvm; @@ -663,6 +663,8 @@ void Context::collectFunction(GCOVFunction &f, Summary &summary) { if (f.startLine >= si.startLineToFunctions.size()) si.startLineToFunctions.resize(f.startLine + 1); si.startLineToFunctions[f.startLine].push_back(&f); + SmallSet lines; + SmallSet linesExec; for (const GCOVBlock &b : f.blocksRange()) { if (b.lines.empty()) continue; @@ -671,9 +673,9 @@ void Context::collectFunction(GCOVFunction &f, Summary &summary) { si.lines.resize(maxLineNum + 1); for (uint32_t lineNum : b.lines) { LineInfo &line = si.lines[lineNum]; - if (!line.exists) + if (lines.insert(lineNum).second) ++summary.lines; - if (line.count == 0 && b.count) + if (b.count && linesExec.insert(lineNum).second) ++summary.linesExec; line.exists = true; line.count += b.count; diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 07d467305ae5..48ac5ce0d607 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -466,12 +467,8 @@ Error collectPGOFuncNameStrings(ArrayRef NameStrs, } SmallString<128> CompressedNameStrings; - Error E = zlib::compress(StringRef(UncompressedNameStrings), - CompressedNameStrings, zlib::BestSizeCompression); - if (E) { - consumeError(std::move(E)); - return make_error(instrprof_error::compress_failed); - } + zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings, + zlib::BestSizeCompression); return WriteStringToResult(CompressedNameStrings.size(), CompressedNameStrings); @@ -1311,4 +1308,76 @@ void OverlapStats::dump(raw_fd_ostream &OS) const { } } +namespace IndexedInstrProf { +// A C++14 compatible version of the offsetof macro. +template +inline size_t constexpr offsetOf(T1 T2::*Member) { + constexpr T2 Object{}; + return size_t(&(Object.*Member)) - size_t(&Object); +} + +static inline uint64_t read(const unsigned char *Buffer, size_t Offset) { + return *reinterpret_cast(Buffer + Offset); +} + +uint64_t Header::formatVersion() const { + using namespace support; + return endian::byte_swap(Version); +} + +Expected
Header::readFromBuffer(const unsigned char *Buffer) { + using namespace support; + static_assert(std::is_standard_layout
::value, + "The header should be standard layout type since we use offset " + "of fields to read."); + Header H; + + H.Magic = read(Buffer, offsetOf(&Header::Magic)); + // Check the magic number. + uint64_t Magic = endian::byte_swap(H.Magic); + if (Magic != IndexedInstrProf::Magic) + return make_error(instrprof_error::bad_magic); + + // Read the version. + H.Version = read(Buffer, offsetOf(&Header::Version)); + if (GET_VERSION(H.formatVersion()) > + IndexedInstrProf::ProfVersion::CurrentVersion) + return make_error(instrprof_error::unsupported_version); + + switch (GET_VERSION(H.formatVersion())) { + // When a new field is added in the header add a case statement here to + // populate it. + static_assert( + IndexedInstrProf::ProfVersion::CurrentVersion == Version8, + "Please update the reading code below if a new field has been added, " + "if not add a case statement to fall through to the latest version."); + case 8ull: + H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset)); + LLVM_FALLTHROUGH; + default: // Version7 (when the backwards compatible header was introduced). + H.HashType = read(Buffer, offsetOf(&Header::HashType)); + H.HashOffset = read(Buffer, offsetOf(&Header::HashOffset)); + } + + return H; +} + +size_t Header::size() const { + switch (GET_VERSION(formatVersion())) { + // When a new field is added to the header add a case statement here to + // compute the size as offset of the new field + size of the new field. This + // relies on the field being added to the end of the list. + static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version8, + "Please update the size computation below if a new field has " + "been added to the header, if not add a case statement to " + "fall through to the latest version."); + case 8ull: + return offsetOf(&Header::MemProfOffset) + sizeof(Header::MemProfOffset); + default: // Version7 (when the backwards compatible header was introduced). + return offsetOf(&Header::HashOffset) + sizeof(Header::HashOffset); + } +} + +} // namespace IndexedInstrProf + } // end namespace llvm diff --git a/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/llvm/lib/ProfileData/InstrProfCorrelator.cpp index 8e38a6869d07..4b8212c546f7 100644 --- a/llvm/lib/ProfileData/InstrProfCorrelator.cpp +++ b/llvm/lib/ProfileData/InstrProfCorrelator.cpp @@ -7,10 +7,15 @@ //===----------------------------------------------------------------------===// #include "llvm/ProfileData/InstrProfCorrelator.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDie.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Object/MachO.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" #define DEBUG_TYPE "correlator" @@ -279,7 +284,7 @@ void DwarfInstrProfCorrelator::correlateProfileDataImpl() { LLVM_DEBUG(Die.dump(dbgs())); } this->addProbe(*FunctionName, *CFGHash, *CounterPtr - CountersStart, - FunctionPtr.getValueOr(0), *NumCounters); + FunctionPtr.value_or(0), *NumCounters); }; for (auto &CU : DICtx->normal_units()) for (const auto &Entry : CU->dies()) diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 138b1532d778..ee8989979a26 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -14,11 +14,11 @@ #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" @@ -27,7 +27,6 @@ #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/SymbolRemappingReader.h" #include -#include #include #include #include @@ -43,13 +42,13 @@ using namespace llvm; static InstrProfKind getProfileKindFromVersion(uint64_t Version) { InstrProfKind ProfileKind = InstrProfKind::Unknown; if (Version & VARIANT_MASK_IR_PROF) { - ProfileKind |= InstrProfKind::IR; + ProfileKind |= InstrProfKind::IRInstrumentation; } if (Version & VARIANT_MASK_CSIR_PROF) { - ProfileKind |= InstrProfKind::CS; + ProfileKind |= InstrProfKind::ContextSensitive; } if (Version & VARIANT_MASK_INSTR_ENTRY) { - ProfileKind |= InstrProfKind::BB; + ProfileKind |= InstrProfKind::FunctionEntryInstrumentation; } if (Version & VARIANT_MASK_BYTE_COVERAGE) { ProfileKind |= InstrProfKind::SingleByteCoverage; @@ -57,6 +56,9 @@ static InstrProfKind getProfileKindFromVersion(uint64_t Version) { if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) { ProfileKind |= InstrProfKind::FunctionEntryOnly; } + if (Version & VARIANT_MASK_MEMPROF) { + ProfileKind |= InstrProfKind::MemProf; + } return ProfileKind; } @@ -153,14 +155,6 @@ IndexedInstrProfReader::create(std::unique_ptr Buffer, return std::move(Result); } -void InstrProfIterator::Increment() { - if (auto E = Reader->readNextRecord(Record)) { - // Handle errors in the reader. - InstrProfError::take(std::move(E)); - *this = InstrProfIterator(); - } -} - bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { // Verify that this really looks like plain ASCII text by checking a // 'reasonable' number of characters (up to profile magic size). @@ -180,16 +174,16 @@ Error TextInstrProfReader::readHeader() { while (Line->startswith(":")) { StringRef Str = Line->substr(1); if (Str.equals_insensitive("ir")) - ProfileKind |= InstrProfKind::IR; + ProfileKind |= InstrProfKind::IRInstrumentation; else if (Str.equals_insensitive("fe")) - ProfileKind |= InstrProfKind::FE; + ProfileKind |= InstrProfKind::FrontendInstrumentation; else if (Str.equals_insensitive("csir")) { - ProfileKind |= InstrProfKind::IR; - ProfileKind |= InstrProfKind::CS; + ProfileKind |= InstrProfKind::IRInstrumentation; + ProfileKind |= InstrProfKind::ContextSensitive; } else if (Str.equals_insensitive("entry_first")) - ProfileKind |= InstrProfKind::BB; + ProfileKind |= InstrProfKind::FunctionEntryInstrumentation; else if (Str.equals_insensitive("not_entry_first")) - ProfileKind &= ~InstrProfKind::BB; + ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation; else return error(instrprof_error::bad_header); ++Line; @@ -454,7 +448,7 @@ Error RawInstrProfReader::readHeader( return error(instrprof_error::bad_header); std::unique_ptr NewSymtab = std::make_unique(); - if (Error E = createSymtab(*NewSymtab.get())) + if (Error E = createSymtab(*NewSymtab)) return E; Symtab = std::move(NewSymtab); @@ -942,24 +936,17 @@ Error IndexedInstrProfReader::readHeader() { if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) return error(instrprof_error::truncated); - auto *Header = reinterpret_cast(Cur); - Cur += sizeof(IndexedInstrProf::Header); + auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Start); + if (!HeaderOr) + return HeaderOr.takeError(); - // Check the magic number. - uint64_t Magic = endian::byte_swap(Header->Magic); - if (Magic != IndexedInstrProf::Magic) - return error(instrprof_error::bad_magic); - - // Read the version. - uint64_t FormatVersion = endian::byte_swap(Header->Version); - if (GET_VERSION(FormatVersion) > - IndexedInstrProf::ProfVersion::CurrentVersion) - return error(instrprof_error::unsupported_version); + const IndexedInstrProf::Header *Header = &HeaderOr.get(); + Cur += Header->size(); - Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, + Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur, /* UseCS */ false); - if (FormatVersion & VARIANT_MASK_CSIR_PROF) - Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, + if (Header->formatVersion() & VARIANT_MASK_CSIR_PROF) + Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur, /* UseCS */ true); // Read the hash type and start offset. @@ -970,10 +957,46 @@ Error IndexedInstrProfReader::readHeader() { uint64_t HashOffset = endian::byte_swap(Header->HashOffset); - // The rest of the file is an on disk hash table. - auto IndexPtr = - std::make_unique>( - Start + HashOffset, Cur, Start, HashType, FormatVersion); + // The hash table with profile counts comes next. + auto IndexPtr = std::make_unique>( + Start + HashOffset, Cur, Start, HashType, Header->formatVersion()); + + // The MemProfOffset field in the header is only valid when the format version + // is higher than 8 (when it was introduced). + if (GET_VERSION(Header->formatVersion()) >= 8 && + Header->formatVersion() & VARIANT_MASK_MEMPROF) { + uint64_t MemProfOffset = + endian::byte_swap(Header->MemProfOffset); + + const unsigned char *Ptr = Start + MemProfOffset; + // The value returned from RecordTableGenerator.Emit. + const uint64_t RecordTableOffset = + support::endian::readNext(Ptr); + // The offset in the stream right before invoking FrameTableGenerator.Emit. + const uint64_t FramePayloadOffset = + support::endian::readNext(Ptr); + // The value returned from FrameTableGenerator.Emit. + const uint64_t FrameTableOffset = + support::endian::readNext(Ptr); + + // Read the schema. + auto SchemaOr = memprof::readMemProfSchema(Ptr); + if (!SchemaOr) + return SchemaOr.takeError(); + Schema = SchemaOr.get(); + + // Now initialize the table reader with a pointer into data buffer. + MemProfRecordTable.reset(MemProfRecordHashTable::Create( + /*Buckets=*/Start + RecordTableOffset, + /*Payload=*/Ptr, + /*Base=*/Start, memprof::RecordLookupTrait(Schema))); + + // Initialize the frame table reader with the payload and bucket offsets. + MemProfFrameTable.reset(MemProfFrameHashTable::Create( + /*Buckets=*/Start + FrameTableOffset, + /*Payload=*/Start + FramePayloadOffset, + /*Base=*/Start, memprof::FrameLookupTrait())); + } // Load the remapping table now if requested. if (RemappingBuffer) { @@ -991,16 +1014,16 @@ Error IndexedInstrProfReader::readHeader() { } InstrProfSymtab &IndexedInstrProfReader::getSymtab() { - if (Symtab.get()) - return *Symtab.get(); + if (Symtab) + return *Symtab; std::unique_ptr NewSymtab = std::make_unique(); - if (Error E = Index->populateSymtab(*NewSymtab.get())) { + if (Error E = Index->populateSymtab(*NewSymtab)) { consumeError(error(InstrProfError::take(std::move(E)))); } Symtab = std::move(NewSymtab); - return *Symtab.get(); + return *Symtab; } Expected @@ -1019,6 +1042,43 @@ IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, return error(instrprof_error::hash_mismatch); } +Expected +IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) { + // TODO: Add memprof specific errors. + if (MemProfRecordTable == nullptr) + return make_error(instrprof_error::invalid_prof, + "no memprof data available in profile"); + auto Iter = MemProfRecordTable->find(FuncNameHash); + if (Iter == MemProfRecordTable->end()) + return make_error( + instrprof_error::unknown_function, + "memprof record not found for function hash " + Twine(FuncNameHash)); + + // Setup a callback to convert from frame ids to frame using the on-disk + // FrameData hash table. + memprof::FrameId LastUnmappedFrameId = 0; + bool HasFrameMappingError = false; + auto IdToFrameCallback = [&](const memprof::FrameId Id) { + auto FrIter = MemProfFrameTable->find(Id); + if (FrIter == MemProfFrameTable->end()) { + LastUnmappedFrameId = Id; + HasFrameMappingError = true; + return memprof::Frame(0, 0, 0, false); + } + return *FrIter; + }; + + memprof::MemProfRecord Record(*Iter, IdToFrameCallback); + + // Check that all frame ids were successfully converted to frames. + if (HasFrameMappingError) { + return make_error(instrprof_error::hash_mismatch, + "memprof frame not found for frame id " + + Twine(LastUnmappedFrameId)); + } + return Record; +} + Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, uint64_t FuncHash, std::vector &Counts) { diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 8ded1c0426e5..cd4e8900c963 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" @@ -23,7 +24,6 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/OnDiskHashTable.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include @@ -32,7 +32,6 @@ #include using namespace llvm; -extern cl::opt DebugInfoCorrelate; // A struct to define how the data stream should be patched. For Indexed // profiling, only uint64_t data type is needed. @@ -64,11 +63,16 @@ public: if (IsFDOStream) { raw_fd_ostream &FDOStream = static_cast(OS); + const uint64_t LastPos = FDOStream.tell(); for (int K = 0; K < NItems; K++) { FDOStream.seek(P[K].Pos); for (int I = 0; I < P[K].N; I++) write(P[K].D[I]); } + // Reset the stream to the last position after patching so that users + // don't accidentally overwrite data. This makes it consistent with + // the string stream below which replaces the data directly. + FDOStream.seek(LastPos); } else { raw_string_ostream &SOStream = static_cast(OS); std::string &Data = SOStream.str(); // with flush @@ -249,11 +253,51 @@ void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash, Dest.sortValueData(); } +void InstrProfWriter::addMemProfRecord( + const Function::GUID Id, const memprof::IndexedMemProfRecord &Record) { + auto Result = MemProfRecordData.insert({Id, Record}); + // If we inserted a new record then we are done. + if (Result.second) { + return; + } + memprof::IndexedMemProfRecord &Existing = Result.first->second; + Existing.merge(Record); +} + +bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id, + const memprof::Frame &Frame, + function_ref Warn) { + auto Result = MemProfFrameData.insert({Id, Frame}); + // If a mapping already exists for the current frame id and it does not + // match the new mapping provided then reset the existing contents and bail + // out. We don't support the merging of memprof data whose Frame -> Id + // mapping across profiles is inconsistent. + if (!Result.second && Result.first->second != Frame) { + Warn(make_error(instrprof_error::malformed, + "frame to id mapping mismatch")); + return false; + } + return true; +} + void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref Warn) { for (auto &I : IPW.FunctionData) for (auto &Func : I.getValue()) addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn); + + MemProfFrameData.reserve(IPW.MemProfFrameData.size()); + for (auto &I : IPW.MemProfFrameData) { + // If we weren't able to add the frame mappings then it doesn't make sense + // to try to merge the records from this profile. + if (!addMemProfFrame(I.first, I.second, Warn)) + return; + } + + MemProfRecordData.reserve(IPW.MemProfRecordData.size()); + for (auto &I : IPW.MemProfRecordData) { + addMemProfRecord(I.first, I.second); + } } bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { @@ -298,30 +342,34 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) Generator.insert(I.getKey(), &I.getValue()); + // Write the header. IndexedInstrProf::Header Header; Header.Magic = IndexedInstrProf::Magic; Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion; - if (static_cast(ProfileKind & InstrProfKind::IR)) + if (static_cast(ProfileKind & InstrProfKind::IRInstrumentation)) Header.Version |= VARIANT_MASK_IR_PROF; - if (static_cast(ProfileKind & InstrProfKind::CS)) + if (static_cast(ProfileKind & InstrProfKind::ContextSensitive)) Header.Version |= VARIANT_MASK_CSIR_PROF; - if (static_cast(ProfileKind & InstrProfKind::BB)) + if (static_cast(ProfileKind & + InstrProfKind::FunctionEntryInstrumentation)) Header.Version |= VARIANT_MASK_INSTR_ENTRY; if (static_cast(ProfileKind & InstrProfKind::SingleByteCoverage)) Header.Version |= VARIANT_MASK_BYTE_COVERAGE; if (static_cast(ProfileKind & InstrProfKind::FunctionEntryOnly)) Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY; + if (static_cast(ProfileKind & InstrProfKind::MemProf)) + Header.Version |= VARIANT_MASK_MEMPROF; Header.Unused = 0; Header.HashType = static_cast(IndexedInstrProf::HashType); Header.HashOffset = 0; + Header.MemProfOffset = 0; int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t); - // Only write out all the fields except 'HashOffset'. We need - // to remember the offset of that field to allow back patching - // later. - for (int I = 0; I < N - 1; I++) + // Only write out all the fields except 'HashOffset' and 'MemProfOffset'. We + // need to remember the offset of these fields to allow back patching later. + for (int I = 0; I < N - 2; I++) OS.write(reinterpret_cast(&Header)[I]); // Save the location of Header.HashOffset field in \c OS. @@ -329,6 +377,13 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { // Reserve the space for HashOffset field. OS.write(0); + // Save the location of MemProf profile data. This is stored in two parts as + // the schema and as a separate on-disk chained hashtable. + uint64_t MemProfSectionOffset = OS.tell(); + // Reserve space for the MemProf table field to be patched later if this + // profile contains memory profile information. + OS.write(0); + // Reserve space to write profile summary data. uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size(); uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries); @@ -338,7 +393,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { OS.write(0); uint64_t CSSummaryOffset = 0; uint64_t CSSummarySize = 0; - if (static_cast(ProfileKind & InstrProfKind::CS)) { + if (static_cast(ProfileKind & InstrProfKind::ContextSensitive)) { CSSummaryOffset = OS.tell(); CSSummarySize = SummarySize / sizeof(uint64_t); for (unsigned I = 0; I < CSSummarySize; I++) @@ -348,6 +403,63 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { // Write the hash table. uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj); + // Write the MemProf profile data if we have it. This includes a simple schema + // with the format described below followed by the hashtable: + // uint64_t RecordTableOffset = RecordTableGenerator.Emit + // uint64_t FramePayloadOffset = Stream offset before emitting the frame table + // uint64_t FrameTableOffset = FrameTableGenerator.Emit + // uint64_t Num schema entries + // uint64_t Schema entry 0 + // uint64_t Schema entry 1 + // .... + // uint64_t Schema entry N - 1 + // OnDiskChainedHashTable MemProfRecordData + // OnDiskChainedHashTable MemProfFrameData + uint64_t MemProfSectionStart = 0; + if (static_cast(ProfileKind & InstrProfKind::MemProf)) { + MemProfSectionStart = OS.tell(); + OS.write(0ULL); // Reserve space for the memprof record table offset. + OS.write(0ULL); // Reserve space for the memprof frame payload offset. + OS.write(0ULL); // Reserve space for the memprof frame table offset. + + auto Schema = memprof::PortableMemInfoBlock::getSchema(); + OS.write(static_cast(Schema.size())); + for (const auto Id : Schema) { + OS.write(static_cast(Id)); + } + + auto RecordWriter = std::make_unique(); + RecordWriter->Schema = &Schema; + OnDiskChainedHashTableGenerator + RecordTableGenerator; + for (auto &I : MemProfRecordData) { + // Insert the key (func hash) and value (memprof record). + RecordTableGenerator.insert(I.first, I.second); + } + + uint64_t RecordTableOffset = + RecordTableGenerator.Emit(OS.OS, *RecordWriter); + + uint64_t FramePayloadOffset = OS.tell(); + + auto FrameWriter = std::make_unique(); + OnDiskChainedHashTableGenerator + FrameTableGenerator; + for (auto &I : MemProfFrameData) { + // Insert the key (frame id) and value (frame contents). + FrameTableGenerator.insert(I.first, I.second); + } + + uint64_t FrameTableOffset = FrameTableGenerator.Emit(OS.OS, *FrameWriter); + + PatchItem PatchItems[] = { + {MemProfSectionStart, &RecordTableOffset, 1}, + {MemProfSectionStart + sizeof(uint64_t), &FramePayloadOffset, 1}, + {MemProfSectionStart + 2 * sizeof(uint64_t), &FrameTableOffset, 1}, + }; + OS.patch(PatchItems, 3); + } + // Allocate space for data to be serialized out. std::unique_ptr TheSummary = IndexedInstrProf::allocSummary(SummarySize); @@ -359,7 +471,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { // For Context Sensitive summary. std::unique_ptr TheCSSummary = nullptr; - if (static_cast(ProfileKind & InstrProfKind::CS)) { + if (static_cast(ProfileKind & InstrProfKind::ContextSensitive)) { TheCSSummary = IndexedInstrProf::allocSummary(SummarySize); std::unique_ptr CSPS = CSISB.getSummary(); setSummary(TheCSSummary.get(), *CSPS); @@ -370,6 +482,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { PatchItem PatchItems[] = { // Patch the Header.HashOffset field. {HashTableStartFieldOffset, &HashTableStart, 1}, + // Patch the Header.MemProfOffset (=0 for profiles without MemProf data). + {MemProfSectionOffset, &MemProfSectionStart, 1}, // Patch the summary data. {SummaryOffset, reinterpret_cast(TheSummary.get()), (int)(SummarySize / sizeof(uint64_t))}, @@ -472,12 +586,13 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash, Error InstrProfWriter::writeText(raw_fd_ostream &OS) { // Check CS first since it implies an IR level profile. - if (static_cast(ProfileKind & InstrProfKind::CS)) + if (static_cast(ProfileKind & InstrProfKind::ContextSensitive)) OS << "# CSIR level Instrumentation Flag\n:csir\n"; - else if (static_cast(ProfileKind & InstrProfKind::IR)) + else if (static_cast(ProfileKind & InstrProfKind::IRInstrumentation)) OS << "# IR level Instrumentation Flag\n:ir\n"; - if (static_cast(ProfileKind & InstrProfKind::BB)) + if (static_cast(ProfileKind & + InstrProfKind::FunctionEntryInstrumentation)) OS << "# Always instrument the function entry block\n:entry_first\n"; InstrProfSymtab Symtab; diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp new file mode 100644 index 000000000000..3d44cf0b4c37 --- /dev/null +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -0,0 +1,110 @@ +#include "llvm/ProfileData/MemProf.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/EndianStream.h" + +namespace llvm { +namespace memprof { + +void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, + raw_ostream &OS) { + using namespace support; + + endian::Writer LE(OS, little); + + LE.write(AllocSites.size()); + for (const IndexedAllocationInfo &N : AllocSites) { + LE.write(N.CallStack.size()); + for (const FrameId &Id : N.CallStack) + LE.write(Id); + N.Info.serialize(Schema, OS); + } + + // Related contexts. + LE.write(CallSites.size()); + for (const auto &Frames : CallSites) { + LE.write(Frames.size()); + for (const FrameId &Id : Frames) + LE.write(Id); + } +} + +IndexedMemProfRecord +IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, + const unsigned char *Ptr) { + using namespace support; + + IndexedMemProfRecord Record; + + // Read the meminfo nodes. + const uint64_t NumNodes = endian::readNext(Ptr); + for (uint64_t I = 0; I < NumNodes; I++) { + IndexedAllocationInfo Node; + const uint64_t NumFrames = + endian::readNext(Ptr); + for (uint64_t J = 0; J < NumFrames; J++) { + const FrameId Id = endian::readNext(Ptr); + Node.CallStack.push_back(Id); + } + Node.Info.deserialize(Schema, Ptr); + Ptr += PortableMemInfoBlock::serializedSize(); + Record.AllocSites.push_back(Node); + } + + // Read the callsite information. + const uint64_t NumCtxs = endian::readNext(Ptr); + for (uint64_t J = 0; J < NumCtxs; J++) { + const uint64_t NumFrames = + endian::readNext(Ptr); + llvm::SmallVector Frames; + Frames.reserve(NumFrames); + for (uint64_t K = 0; K < NumFrames; K++) { + const FrameId Id = endian::readNext(Ptr); + Frames.push_back(Id); + } + Record.CallSites.push_back(Frames); + } + + return Record; +} + +GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) { + const auto Pos = FunctionName.find(".llvm."); + + // We use the function guid which we expect to be a uint64_t. At + // this time, it is the lower 64 bits of the md5 of the function + // name. Any suffix with .llvm. is trimmed since these are added by + // thinLTO global promotion. At the time the profile is consumed, + // these suffixes will not be present. + return Function::getGUID(FunctionName.take_front(Pos)); +} + +Expected readMemProfSchema(const unsigned char *&Buffer) { + using namespace support; + + const unsigned char *Ptr = Buffer; + const uint64_t NumSchemaIds = + endian::readNext(Ptr); + if (NumSchemaIds > static_cast(Meta::Size)) { + return make_error(instrprof_error::malformed, + "memprof schema invalid"); + } + + MemProfSchema Result; + for (size_t I = 0; I < NumSchemaIds; I++) { + const uint64_t Tag = endian::readNext(Ptr); + if (Tag >= static_cast(Meta::Size)) { + return make_error(instrprof_error::malformed, + "memprof schema invalid"); + } + Result.push_back(static_cast(Tag)); + } + // Advace the buffer to one past the schema if we succeeded. + Buffer = Ptr; + return Result; +} + +} // namespace memprof +} // namespace llvm diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp index bbb640cfaee8..755e25b355a8 100644 --- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp +++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -10,20 +10,16 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Attributes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Type.h" +#include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" using namespace llvm; cl::opt UseContextLessSummary( - "profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore, + "profile-summary-contextless", cl::Hidden, cl::desc("Merge context profiles before calculating thresholds.")); // The following two parameters determine the threshold for a count to be @@ -34,38 +30,38 @@ cl::opt UseContextLessSummary( // threshold for determining cold count (everything <= this threshold is // considered cold). cl::opt ProfileSummaryCutoffHot( - "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore, + "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::desc("A count is hot if it exceeds the minimum count to" " reach this percentile of total counts.")); cl::opt ProfileSummaryCutoffCold( - "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore, + "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::desc("A count is cold if it is below the minimum count" " to reach this percentile of total counts.")); cl::opt ProfileSummaryHugeWorkingSetSizeThreshold( "profile-summary-huge-working-set-size-threshold", cl::Hidden, - cl::init(15000), cl::ZeroOrMore, + cl::init(15000), cl::desc("The code working set size is considered huge if the number of" " blocks required to reach the -profile-summary-cutoff-hot" " percentile exceeds this count.")); cl::opt ProfileSummaryLargeWorkingSetSizeThreshold( "profile-summary-large-working-set-size-threshold", cl::Hidden, - cl::init(12500), cl::ZeroOrMore, + cl::init(12500), cl::desc("The code working set size is considered large if the number of" " blocks required to reach the -profile-summary-cutoff-hot" " percentile exceeds this count.")); // The next two options override the counts derived from summary computation and // are useful for debugging purposes. -cl::opt ProfileSummaryHotCount( - "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore, +cl::opt ProfileSummaryHotCount( + "profile-summary-hot-count", cl::ReallyHidden, cl::desc("A fixed hot count that overrides the count derived from" " profile-summary-cutoff-hot")); -cl::opt ProfileSummaryColdCount( - "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore, +cl::opt ProfileSummaryColdCount( + "profile-summary-cold-count", cl::ReallyHidden, cl::desc("A fixed cold count that overrides the count derived from" " profile-summary-cutoff-cold")); @@ -110,7 +106,13 @@ void SampleProfileSummaryBuilder::addRecord( NumFunctions++; if (FS.getHeadSamples() > MaxFunctionCount) MaxFunctionCount = FS.getHeadSamples(); + } else if (FS.getContext().hasAttribute( + sampleprof::ContextDuplicatedIntoBase)) { + // Do not recount callee samples if they are already merged into their base + // profiles. This can happen to CS nested profile. + return; } + for (const auto &I : FS.getBodySamples()) { uint64_t Count = I.second.getSamples(); addCount(Count); @@ -194,7 +196,7 @@ SampleProfileSummaryBuilder::computeSummaryForProfiles( // more function profiles each with lower counts, which in turn leads to lower // hot thresholds. To compensate for that, by default we merge context // profiles before computing profile summary. - if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCSFlat && + if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS && !UseContextLessSummary.getNumOccurrences())) { for (const auto &I : Profiles) { ContextLessProfiles[I.second.getName()].merge(I.second); diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp index f8d13c74fac3..2423fd38e9a2 100644 --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -10,69 +10,55 @@ // //===----------------------------------------------------------------------===// +#include #include +#include #include +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" +#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/MemProfData.inc" #include "llvm/ProfileData/RawMemProfReader.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Path.h" + +#define DEBUG_TYPE "memprof" namespace llvm { namespace memprof { namespace { - -struct Summary { - uint64_t Version; - uint64_t TotalSizeBytes; - uint64_t NumSegments; - uint64_t NumMIBInfo; - uint64_t NumStackOffsets; -}; - template inline T alignedRead(const char *Ptr) { static_assert(std::is_pod::value, "Not a pod type."); assert(reinterpret_cast(Ptr) % sizeof(T) == 0 && "Unaligned Read"); return *reinterpret_cast(Ptr); } -Summary computeSummary(const char *Start) { - auto *H = reinterpret_cast(Start); - - // Check alignment while reading the number of items in each section. - return Summary{ - H->Version, - H->TotalSize, - alignedRead(Start + H->SegmentOffset), - alignedRead(Start + H->MIBOffset), - alignedRead(Start + H->StackOffset), - }; -} - -} // namespace - -Expected> -RawMemProfReader::create(const Twine &Path) { - auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); - if (std::error_code EC = BufferOr.getError()) - return errorCodeToError(EC); - - std::unique_ptr Buffer(BufferOr.get().release()); +Error checkBuffer(const MemoryBuffer &Buffer) { + if (!RawMemProfReader::hasFormat(Buffer)) + return make_error(instrprof_error::bad_magic); - if (Buffer->getBufferSize() == 0) + if (Buffer.getBufferSize() == 0) return make_error(instrprof_error::empty_raw_profile); - if (!RawMemProfReader::hasFormat(*Buffer)) - return make_error(instrprof_error::bad_magic); - - if (Buffer->getBufferSize() < sizeof(Header)) { + if (Buffer.getBufferSize() < sizeof(Header)) { return make_error(instrprof_error::truncated); } // The size of the buffer can be > header total size since we allow repeated // serialization of memprof profiles to the same file. uint64_t TotalSize = 0; - const char *Next = Buffer->getBufferStart(); - while (Next < Buffer->getBufferEnd()) { + const char *Next = Buffer.getBufferStart(); + while (Next < Buffer.getBufferEnd()) { auto *H = reinterpret_cast(Next); if (H->Version != MEMPROF_RAW_VERSION) { return make_error(instrprof_error::unsupported_version); @@ -82,11 +68,143 @@ RawMemProfReader::create(const Twine &Path) { Next += H->TotalSize; } - if (Buffer->getBufferSize() != TotalSize) { + if (Buffer.getBufferSize() != TotalSize) { return make_error(instrprof_error::malformed); } + return Error::success(); +} + +llvm::SmallVector readSegmentEntries(const char *Ptr) { + using namespace support; + + const uint64_t NumItemsToRead = + endian::readNext(Ptr); + llvm::SmallVector Items; + for (uint64_t I = 0; I < NumItemsToRead; I++) { + Items.push_back(*reinterpret_cast( + Ptr + I * sizeof(SegmentEntry))); + } + return Items; +} + +llvm::SmallVector> +readMemInfoBlocks(const char *Ptr) { + using namespace support; + + const uint64_t NumItemsToRead = + endian::readNext(Ptr); + llvm::SmallVector> Items; + for (uint64_t I = 0; I < NumItemsToRead; I++) { + const uint64_t Id = endian::readNext(Ptr); + const MemInfoBlock MIB = *reinterpret_cast(Ptr); + Items.push_back({Id, MIB}); + // Only increment by size of MIB since readNext implicitly increments. + Ptr += sizeof(MemInfoBlock); + } + return Items; +} + +CallStackMap readStackInfo(const char *Ptr) { + using namespace support; + + const uint64_t NumItemsToRead = + endian::readNext(Ptr); + CallStackMap Items; + + for (uint64_t I = 0; I < NumItemsToRead; I++) { + const uint64_t StackId = endian::readNext(Ptr); + const uint64_t NumPCs = endian::readNext(Ptr); + + SmallVector CallStack; + for (uint64_t J = 0; J < NumPCs; J++) { + CallStack.push_back(endian::readNext(Ptr)); + } + + Items[StackId] = CallStack; + } + return Items; +} + +// Merges the contents of stack information in \p From to \p To. Returns true if +// any stack ids observed previously map to a different set of program counter +// addresses. +bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { + for (const auto &IdStack : From) { + auto I = To.find(IdStack.first); + if (I == To.end()) { + To[IdStack.first] = IdStack.second; + } else { + // Check that the PCs are the same (in order). + if (IdStack.second != I->second) + return true; + } + } + return false; +} - return std::make_unique(std::move(Buffer)); +Error report(Error E, const StringRef Context) { + return joinErrors(createStringError(inconvertibleErrorCode(), Context), + std::move(E)); +} + +bool isRuntimePath(const StringRef Path) { + return StringRef(llvm::sys::path::convert_to_slash(Path)) + .contains("memprof/memprof_"); +} + +std::string getBuildIdString(const SegmentEntry &Entry) { + constexpr size_t Size = sizeof(Entry.BuildId) / sizeof(uint8_t); + constexpr uint8_t Zeros[Size] = {0}; + // If the build id is unset print a helpful string instead of all zeros. + if (memcmp(Entry.BuildId, Zeros, Size) == 0) + return ""; + + std::string Str; + raw_string_ostream OS(Str); + for (size_t I = 0; I < Size; I++) { + OS << format_hex_no_prefix(Entry.BuildId[I], 2); + } + return OS.str(); +} +} // namespace + +Expected> +RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, + bool KeepName) { + auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); + if (std::error_code EC = BufferOr.getError()) + return report(errorCodeToError(EC), Path.getSingleStringRef()); + + std::unique_ptr Buffer(BufferOr.get().release()); + if (Error E = checkBuffer(*Buffer)) + return report(std::move(E), Path.getSingleStringRef()); + + if (ProfiledBinary.empty()) + return report( + errorCodeToError(make_error_code(std::errc::invalid_argument)), + "Path to profiled binary is empty!"); + + auto BinaryOr = llvm::object::createBinary(ProfiledBinary); + if (!BinaryOr) { + return report(BinaryOr.takeError(), ProfiledBinary); + } + + // Use new here since constructor is private. + std::unique_ptr Reader( + new RawMemProfReader(std::move(BinaryOr.get()), KeepName)); + if (Error E = Reader->initialize(std::move(Buffer))) { + return std::move(E); + } + return std::move(Reader); +} + +bool RawMemProfReader::hasFormat(const StringRef Path) { + auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); + if (!BufferOr) + return false; + + std::unique_ptr Buffer(BufferOr.get().release()); + return hasFormat(*Buffer); } bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { @@ -98,24 +216,343 @@ bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { return Magic == MEMPROF_RAW_MAGIC_64; } -void RawMemProfReader::printSummaries(raw_ostream &OS) const { - int Count = 0; +void RawMemProfReader::printYAML(raw_ostream &OS) { + uint64_t NumAllocFunctions = 0, NumMibInfo = 0; + for (const auto &KV : FunctionProfileData) { + const size_t NumAllocSites = KV.second.AllocSites.size(); + if (NumAllocSites > 0) { + NumAllocFunctions++; + NumMibInfo += NumAllocSites; + } + } + + OS << "MemprofProfile:\n"; + OS << " Summary:\n"; + OS << " Version: " << MEMPROF_RAW_VERSION << "\n"; + OS << " NumSegments: " << SegmentInfo.size() << "\n"; + OS << " NumMibInfo: " << NumMibInfo << "\n"; + OS << " NumAllocFunctions: " << NumAllocFunctions << "\n"; + OS << " NumStackOffsets: " << StackMap.size() << "\n"; + // Print out the segment information. + OS << " Segments:\n"; + for (const auto &Entry : SegmentInfo) { + OS << " -\n"; + OS << " BuildId: " << getBuildIdString(Entry) << "\n"; + OS << " Start: 0x" << llvm::utohexstr(Entry.Start) << "\n"; + OS << " End: 0x" << llvm::utohexstr(Entry.End) << "\n"; + OS << " Offset: 0x" << llvm::utohexstr(Entry.Offset) << "\n"; + } + // Print out the merged contents of the profiles. + OS << " Records:\n"; + for (const auto &Entry : *this) { + OS << " -\n"; + OS << " FunctionGUID: " << Entry.first << "\n"; + Entry.second.print(OS); + } +} + +Error RawMemProfReader::initialize(std::unique_ptr DataBuffer) { + const StringRef FileName = Binary.getBinary()->getFileName(); + + auto *ElfObject = dyn_cast(Binary.getBinary()); + if (!ElfObject) { + return report(make_error(Twine("Not an ELF file: "), + inconvertibleErrorCode()), + FileName); + } + + // Check whether the profiled binary was built with position independent code + // (PIC). For now we provide a error message until symbolization support + // is added for pic. + auto* Elf64LEObject = llvm::cast(ElfObject); + const llvm::object::ELF64LEFile& ElfFile = Elf64LEObject->getELFFile(); + auto PHdrsOr = ElfFile.program_headers(); + if(!PHdrsOr) + return report(make_error(Twine("Could not read program headers: "), + inconvertibleErrorCode()), + FileName); + auto FirstLoadHeader = PHdrsOr->begin(); + while (FirstLoadHeader->p_type != llvm::ELF::PT_LOAD) + ++FirstLoadHeader; + if(FirstLoadHeader->p_vaddr == 0) + return report(make_error(Twine("Unsupported position independent code"), + inconvertibleErrorCode()), + FileName); + + auto Triple = ElfObject->makeTriple(); + if (!Triple.isX86()) + return report(make_error(Twine("Unsupported target: ") + + Triple.getArchName(), + inconvertibleErrorCode()), + FileName); + + auto *Object = cast(Binary.getBinary()); + std::unique_ptr Context = DWARFContext::create( + *Object, DWARFContext::ProcessDebugRelocations::Process); + + auto SOFOr = symbolize::SymbolizableObjectFile::create( + Object, std::move(Context), /*UntagAddresses=*/false); + if (!SOFOr) + return report(SOFOr.takeError(), FileName); + Symbolizer = std::move(SOFOr.get()); + + if (Error E = readRawProfile(std::move(DataBuffer))) + return E; + + if (Error E = symbolizeAndFilterStackFrames()) + return E; + + return mapRawProfileToRecords(); +} + +Error RawMemProfReader::mapRawProfileToRecords() { + // Hold a mapping from function to each callsite location we encounter within + // it that is part of some dynamic allocation context. The location is stored + // as a pointer to a symbolized list of inline frames. + using LocationPtr = const llvm::SmallVector *; + llvm::DenseMap> + PerFunctionCallSites; + + // Convert the raw profile callstack data into memprof records. While doing so + // keep track of related contexts so that we can fill these in later. + for (const auto &Entry : CallstackProfileData) { + const uint64_t StackId = Entry.first; + + auto It = StackMap.find(StackId); + if (It == StackMap.end()) + return make_error( + instrprof_error::malformed, + "memprof callstack record does not contain id: " + Twine(StackId)); + + // Construct the symbolized callstack. + llvm::SmallVector Callstack; + Callstack.reserve(It->getSecond().size()); + + llvm::ArrayRef Addresses = It->getSecond(); + for (size_t I = 0; I < Addresses.size(); I++) { + const uint64_t Address = Addresses[I]; + assert(SymbolizedFrame.count(Address) > 0 && + "Address not found in SymbolizedFrame map"); + const SmallVector &Frames = SymbolizedFrame[Address]; + + assert(!idToFrame(Frames.back()).IsInlineFrame && + "The last frame should not be inlined"); + + // Record the callsites for each function. Skip the first frame of the + // first address since it is the allocation site itself that is recorded + // as an alloc site. + for (size_t J = 0; J < Frames.size(); J++) { + if (I == 0 && J == 0) + continue; + // We attach the entire bottom-up frame here for the callsite even + // though we only need the frames up to and including the frame for + // Frames[J].Function. This will enable better deduplication for + // compression in the future. + const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function; + PerFunctionCallSites[Guid].insert(&Frames); + } + + // Add all the frames to the current allocation callstack. + Callstack.append(Frames.begin(), Frames.end()); + } + + // We attach the memprof record to each function bottom-up including the + // first non-inline frame. + for (size_t I = 0; /*Break out using the condition below*/; I++) { + const Frame &F = idToFrame(Callstack[I]); + auto Result = + FunctionProfileData.insert({F.Function, IndexedMemProfRecord()}); + IndexedMemProfRecord &Record = Result.first->second; + Record.AllocSites.emplace_back(Callstack, Entry.second); + + if (!F.IsInlineFrame) + break; + } + } + + // Fill in the related callsites per function. + for (auto I = PerFunctionCallSites.begin(), E = PerFunctionCallSites.end(); + I != E; I++) { + const GlobalValue::GUID Id = I->first; + // Some functions may have only callsite data and no allocation data. Here + // we insert a new entry for callsite data if we need to. + auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()}); + IndexedMemProfRecord &Record = Result.first->second; + for (LocationPtr Loc : I->getSecond()) { + Record.CallSites.push_back(*Loc); + } + } + + return Error::success(); +} + +Error RawMemProfReader::symbolizeAndFilterStackFrames() { + // The specifier to use when symbolization is requested. + const DILineInfoSpecifier Specifier( + DILineInfoSpecifier::FileLineInfoKind::RawValue, + DILineInfoSpecifier::FunctionNameKind::LinkageName); + + // For entries where all PCs in the callstack are discarded, we erase the + // entry from the stack map. + llvm::SmallVector EntriesToErase; + // We keep track of all prior discarded entries so that we can avoid invoking + // the symbolizer for such entries. + llvm::DenseSet AllVAddrsToDiscard; + for (auto &Entry : StackMap) { + for (const uint64_t VAddr : Entry.getSecond()) { + // Check if we have already symbolized and cached the result or if we + // don't want to attempt symbolization since we know this address is bad. + // In this case the address is also removed from the current callstack. + if (SymbolizedFrame.count(VAddr) > 0 || + AllVAddrsToDiscard.contains(VAddr)) + continue; + + Expected DIOr = Symbolizer->symbolizeInlinedCode( + getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false); + if (!DIOr) + return DIOr.takeError(); + DIInliningInfo DI = DIOr.get(); + + // Drop frames which we can't symbolize or if they belong to the runtime. + if (DI.getFrame(0).FunctionName == DILineInfo::BadString || + isRuntimePath(DI.getFrame(0).FileName)) { + AllVAddrsToDiscard.insert(VAddr); + continue; + } + + for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames; + I++) { + const auto &DIFrame = DI.getFrame(I); + const uint64_t Guid = + IndexedMemProfRecord::getGUID(DIFrame.FunctionName); + const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, + // Only the last entry is not an inlined location. + I != NumFrames - 1); + // Here we retain a mapping from the GUID to symbol name instead of + // adding it to the frame object directly to reduce memory overhead. + // This is because there can be many unique frames, particularly for + // callsite frames. + if (KeepSymbolName) + GuidToSymbolName.insert({Guid, DIFrame.FunctionName}); + + const FrameId Hash = F.hash(); + IdToFrame.insert({Hash, F}); + SymbolizedFrame[VAddr].push_back(Hash); + } + } + + auto &CallStack = Entry.getSecond(); + llvm::erase_if(CallStack, [&AllVAddrsToDiscard](const uint64_t A) { + return AllVAddrsToDiscard.contains(A); + }); + if (CallStack.empty()) + EntriesToErase.push_back(Entry.getFirst()); + } + + // Drop the entries where the callstack is empty. + for (const uint64_t Id : EntriesToErase) { + StackMap.erase(Id); + CallstackProfileData.erase(Id); + } + + if (StackMap.empty()) + return make_error( + instrprof_error::malformed, + "no entries in callstack map after symbolization"); + + return Error::success(); +} + +Error RawMemProfReader::readRawProfile( + std::unique_ptr DataBuffer) { const char *Next = DataBuffer->getBufferStart(); + while (Next < DataBuffer->getBufferEnd()) { - auto Summary = computeSummary(Next); - OS << "MemProf Profile " << ++Count << "\n"; - OS << " Version: " << Summary.Version << "\n"; - OS << " TotalSizeBytes: " << Summary.TotalSizeBytes << "\n"; - OS << " NumSegments: " << Summary.NumSegments << "\n"; - OS << " NumMIBInfo: " << Summary.NumMIBInfo << "\n"; - OS << " NumStackOffsets: " << Summary.NumStackOffsets << "\n"; - // TODO: Print the build ids once we can record them using the - // sanitizer_procmaps library for linux. + auto *Header = reinterpret_cast(Next); - auto *H = reinterpret_cast(Next); - Next += H->TotalSize; + // Read in the segment information, check whether its the same across all + // profiles in this binary file. + const llvm::SmallVector Entries = + readSegmentEntries(Next + Header->SegmentOffset); + if (!SegmentInfo.empty() && SegmentInfo != Entries) { + // We do not expect segment information to change when deserializing from + // the same binary profile file. This can happen if dynamic libraries are + // loaded/unloaded between profile dumping. + return make_error( + instrprof_error::malformed, + "memprof raw profile has different segment information"); + } + SegmentInfo.assign(Entries.begin(), Entries.end()); + + // Read in the MemInfoBlocks. Merge them based on stack id - we assume that + // raw profiles in the same binary file are from the same process so the + // stackdepot ids are the same. + for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) { + if (CallstackProfileData.count(Value.first)) { + CallstackProfileData[Value.first].Merge(Value.second); + } else { + CallstackProfileData[Value.first] = Value.second; + } + } + + // Read in the callstack for each ids. For multiple raw profiles in the same + // file, we expect that the callstack is the same for a unique id. + const CallStackMap CSM = readStackInfo(Next + Header->StackOffset); + if (StackMap.empty()) { + StackMap = CSM; + } else { + if (mergeStackMap(CSM, StackMap)) + return make_error( + instrprof_error::malformed, + "memprof raw profile got different call stack for same id"); + } + + Next += Header->TotalSize; + } + + return Error::success(); +} + +object::SectionedAddress +RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { + LLVM_DEBUG({ + SegmentEntry *ContainingSegment = nullptr; + for (auto &SE : SegmentInfo) { + if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) { + ContainingSegment = &SE; + } } + + // Ensure that the virtual address is valid. + assert(ContainingSegment && "Could not find a segment entry"); + }); + + // TODO: Compute the file offset based on the maps and program headers. For + // now this only works for non PIE binaries. + return object::SectionedAddress{VirtualAddress}; } +Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) { + if (FunctionProfileData.empty()) + return make_error(instrprof_error::empty_raw_profile); + + if (Iter == FunctionProfileData.end()) + return make_error(instrprof_error::eof); + + auto IdToFrameCallback = [this](const FrameId Id) { + Frame F = this->idToFrame(Id); + if (!this->KeepSymbolName) + return F; + auto Iter = this->GuidToSymbolName.find(F.Function); + assert(Iter != this->GuidToSymbolName.end()); + F.SymbolName = Iter->getSecond(); + return F; + }; + + const IndexedMemProfRecord &IndexedRecord = Iter->second; + GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)}; + Iter++; + return Error::success(); +} } // namespace memprof } // namespace llvm diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp index 9b01a386a360..f794e64a13e7 100644 --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -19,9 +19,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/LEB128.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include @@ -31,22 +29,21 @@ using namespace llvm; using namespace sampleprof; static cl::opt ProfileSymbolListCutOff( - "profile-symbol-list-cutoff", cl::Hidden, cl::init(-1), cl::ZeroOrMore, + "profile-symbol-list-cutoff", cl::Hidden, cl::init(-1), cl::desc("Cutoff value about how many symbols in profile symbol list " "will be used. This is very useful for performance debugging")); cl::opt GenerateMergedBaseProfiles( - "generate-merged-base-profiles", cl::init(true), cl::ZeroOrMore, + "generate-merged-base-profiles", cl::desc("When generating nested context-sensitive profiles, always " "generate extra base profile for function with all its context " "profiles merged into it.")); namespace llvm { namespace sampleprof { -SampleProfileFormat FunctionSamples::Format; bool FunctionSamples::ProfileIsProbeBased = false; -bool FunctionSamples::ProfileIsCSFlat = false; -bool FunctionSamples::ProfileIsCSNested = false; +bool FunctionSamples::ProfileIsCS = false; +bool FunctionSamples::ProfileIsPreInlined = false; bool FunctionSamples::UseMD5 = false; bool FunctionSamples::HasUniqSuffix = true; bool FunctionSamples::ProfileIsFS = false; @@ -88,8 +85,6 @@ class SampleProfErrorCategoryType : public std::error_category { return "Counter overflow"; case sampleprof_error::ostream_seek_unsupported: return "Ostream does not support seek"; - case sampleprof_error::compress_failed: - return "Compress failure"; case sampleprof_error::uncompress_failed: return "Uncompress failure"; case sampleprof_error::zlib_unavailable: @@ -523,6 +518,12 @@ void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) { auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc); SamplesMap.emplace(OrigChildContext.getName().str(), *ChildProfile); NodeProfile->addTotalSamples(ChildProfile->getTotalSamples()); + // Remove the corresponding body sample for the callsite and update the + // total weight. + auto Count = NodeProfile->removeCalledTargetAndBodySample( + ChildNode.CallSiteLoc.LineOffset, ChildNode.CallSiteLoc.Discriminator, + OrigChildContext.getName()); + NodeProfile->removeTotalSamples(Count); } // Separate child profile to be a standalone profile, if the current parent @@ -531,13 +532,14 @@ void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) { // thus done optionally. It is seen that duplicating context profiles into // base profiles improves the code quality for thinlto build by allowing a // profile in the prelink phase for to-be-fully-inlined functions. - if (!NodeProfile || GenerateMergedBaseProfiles) + if (!NodeProfile) { ProfileMap[ChildProfile->getContext()].merge(*ChildProfile); - - // Contexts coming with a `ContextShouldBeInlined` attribute indicate this - // is a preinliner-computed profile. - if (OrigChildContext.hasAttribute(ContextShouldBeInlined)) - FunctionSamples::ProfileIsCSNested = true; + } else if (GenerateMergedBaseProfiles) { + ProfileMap[ChildProfile->getContext()].merge(*ChildProfile); + auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc); + SamplesMap[ChildProfile->getName().str()].getContext().setAttribute( + ContextDuplicatedIntoBase); + } // Remove the original child profile. ProfileMap.erase(OrigChildContext); diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 80c02faaba04..280e3c6cb8d1 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/Module.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" @@ -39,7 +40,6 @@ #include #include #include -#include #include #include @@ -348,7 +348,7 @@ std::error_code SampleProfileReaderText::readImpl() { } FProfile.getContext().setAllAttributes(Attributes); if (Attributes & (uint32_t)ContextShouldBeInlined) - ProfileIsCSNested = true; + ProfileIsPreInlined = true; DepthMetadata = Depth; break; } @@ -358,14 +358,14 @@ std::error_code SampleProfileReaderText::readImpl() { assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && "Cannot have both context-sensitive and regular profile"); - ProfileIsCSFlat = (CSProfileCount > 0); + ProfileIsCS = (CSProfileCount > 0); assert((TopLevelProbeProfileCount == 0 || TopLevelProbeProfileCount == Profiles.size()) && "Cannot have both probe-based profiles and regular profiles"); ProfileIsProbeBased = (TopLevelProbeProfileCount > 0); FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; - FunctionSamples::ProfileIsCSFlat = ProfileIsCSFlat; - FunctionSamples::ProfileIsCSNested = ProfileIsCSNested; + FunctionSamples::ProfileIsCS = ProfileIsCS; + FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined; if (Result == sampleprof_error::success) computeSummary(); @@ -630,7 +630,7 @@ SampleProfileReaderExtBinaryBase::readContextFromTable() { ErrorOr SampleProfileReaderExtBinaryBase::readSampleContextFromTable() { - if (ProfileIsCSFlat) { + if (ProfileIsCS) { auto FContext(readContextFromTable()); if (std::error_code EC = FContext.getError()) return EC; @@ -654,9 +654,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection( if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) Summary->setPartialProfile(true); if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) - FunctionSamples::ProfileIsCSFlat = ProfileIsCSFlat = true; - if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsCSNested)) - FunctionSamples::ProfileIsCSNested = ProfileIsCSNested; + FunctionSamples::ProfileIsCS = ProfileIsCS = true; + if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined)) + FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true; if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) FunctionSamples::ProfileIsFS = ProfileIsFS = true; break; @@ -777,7 +777,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { } } - if (ProfileIsCSFlat) { + if (ProfileIsCS) { DenseSet FuncGuidsToUse; if (useMD5()) { for (auto Name : FuncsToUse) @@ -847,7 +847,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { } assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && "Cannot have both context-sensitive and regular profile"); - assert((!CSProfileCount || ProfileIsCSFlat) && + assert((!CSProfileCount || ProfileIsCS) && "Section flag should be consistent with actual profile"); return sampleprof_error::success; } @@ -1105,7 +1105,7 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute, FProfile->getContext().setAllAttributes(*Attributes); } - if (!ProfileIsCSFlat) { + if (!ProfileIsCS) { // Read all the attributes for inlined function calls. auto NumCallsites = readNumber(); if (std::error_code EC = NumCallsites.getError()) @@ -1275,8 +1275,8 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { Flags.append("partial,"); if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) Flags.append("context,"); - if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsCSNested)) - Flags.append("context-nested,"); + if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined)) + Flags.append("preInlined,"); if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) Flags.append("fs-discriminator,"); break; @@ -1828,7 +1828,7 @@ SampleProfileReaderItaniumRemapper::create(std::unique_ptr &B, SampleProfileReader &Reader, LLVMContext &C) { auto Remappings = std::make_unique(); - if (Error E = Remappings->read(*B.get())) { + if (Error E = Remappings->read(*B)) { handleAllErrors( std::move(E), [&](const SymbolRemappingParseError &ParseError) { C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(), @@ -1882,7 +1882,6 @@ SampleProfileReader::create(std::unique_ptr &B, LLVMContext &C, Reader->Remapper = std::move(ReaderOrErr.get()); } - FunctionSamples::Format = Reader->getFormat(); if (std::error_code EC = Reader->readHeader()) { return EC; } diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index b575425d4e94..8ec6b7ebc29e 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -19,7 +19,6 @@ #include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Compression.h" @@ -87,10 +86,8 @@ std::error_code SampleProfileWriterExtBinaryBase::compressAndOutput() { return sampleprof_error::success; auto &OS = *OutputStream; SmallString<128> CompressedStrings; - llvm::Error E = zlib::compress(UncompressedStrings, CompressedStrings, - zlib::BestSizeCompression); - if (E) - return sampleprof_error::compress_failed; + zlib::compress(UncompressedStrings, CompressedStrings, + zlib::BestSizeCompression); encodeULEB128(UncompressedStrings.size(), OS); encodeULEB128(CompressedStrings.size(), OS); OS << CompressedStrings.str(); @@ -172,7 +169,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeFuncOffsetTable() { return (std::error_code)sampleprof_error::success; }; - if (FunctionSamples::ProfileIsCSFlat) { + if (FunctionSamples::ProfileIsCS) { // Sort the contexts before writing them out. This is to help fast load all // context profiles for a function as well as their callee contexts which // can help profile-guided importing for ThinLTO. @@ -202,11 +199,11 @@ std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata( if (FunctionSamples::ProfileIsProbeBased) encodeULEB128(FunctionProfile.getFunctionHash(), OS); - if (FunctionSamples::ProfileIsCSFlat || FunctionSamples::ProfileIsCSNested) { + if (FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsPreInlined) { encodeULEB128(FunctionProfile.getContext().getAllAttributes(), OS); } - if (!FunctionSamples::ProfileIsCSFlat) { + if (!FunctionSamples::ProfileIsCS) { // Recursively emit attributes for all callee samples. uint64_t NumCallsites = 0; for (const auto &J : FunctionProfile.getCallsiteSamples()) @@ -228,8 +225,8 @@ std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata( std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata( const SampleProfileMap &Profiles) { - if (!FunctionSamples::ProfileIsProbeBased && - !FunctionSamples::ProfileIsCSFlat && !FunctionSamples::ProfileIsCSNested) + if (!FunctionSamples::ProfileIsProbeBased && !FunctionSamples::ProfileIsCS && + !FunctionSamples::ProfileIsPreInlined) return sampleprof_error::success; for (const auto &Entry : Profiles) { if (std::error_code EC = writeFuncMetadata(Entry.second)) @@ -324,12 +321,12 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection( if (Type == SecFuncMetadata && FunctionSamples::ProfileIsProbeBased) addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsProbeBased); if (Type == SecFuncMetadata && - (FunctionSamples::ProfileIsCSFlat || FunctionSamples::ProfileIsCSNested)) + (FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsPreInlined)) addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagHasAttribute); - if (Type == SecProfSummary && FunctionSamples::ProfileIsCSFlat) + if (Type == SecProfSummary && FunctionSamples::ProfileIsCS) addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext); - if (Type == SecProfSummary && FunctionSamples::ProfileIsCSNested) - addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsCSNested); + if (Type == SecProfSummary && FunctionSamples::ProfileIsPreInlined) + addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined); if (Type == SecProfSummary && FunctionSamples::ProfileIsFS) addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator); @@ -471,7 +468,7 @@ SampleProfileWriterCompactBinary::write(const SampleProfileMap &ProfileMap) { /// it needs to be parsed by the SampleProfileReaderText class. std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { auto &OS = *OutputStream; - if (FunctionSamples::ProfileIsCSFlat) + if (FunctionSamples::ProfileIsCS) OS << "[" << S.getContext().toString() << "]:" << S.getTotalSamples(); else OS << S.getName() << ":" << S.getTotalSamples(); @@ -871,8 +868,7 @@ SampleProfileWriter::create(std::unique_ptr &OS, std::unique_ptr Writer; // Currently only Text and Extended Binary format are supported for CSSPGO. - if ((FunctionSamples::ProfileIsCSFlat || - FunctionSamples::ProfileIsProbeBased) && + if ((FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsProbeBased) && (Format == SPF_Binary || Format == SPF_Compact_Binary)) return sampleprof_error::unsupported_writing_format; diff --git a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp index 0810bf531db8..5a77a25b1569 100644 --- a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp +++ b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Remarks/BitstreamRemarkSerializer.h" +#include "llvm/Remarks/Remark.h" using namespace llvm; using namespace llvm::remarks; diff --git a/llvm/lib/Remarks/RemarkLinker.cpp b/llvm/lib/Remarks/RemarkLinker.cpp index 62f80918ea1d..cbe966794c49 100644 --- a/llvm/lib/Remarks/RemarkLinker.cpp +++ b/llvm/lib/Remarks/RemarkLinker.cpp @@ -17,11 +17,14 @@ #include "llvm/Remarks/RemarkParser.h" #include "llvm/Remarks/RemarkSerializer.h" #include "llvm/Support/Error.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::remarks; +namespace llvm { +class raw_ostream; +} + static Expected getRemarksSectionName(const object::ObjectFile &Obj) { if (Obj.isMachO()) @@ -63,7 +66,7 @@ void RemarkLinker::setExternalFilePrependPath(StringRef PrependPathIn) { } // Discard remarks with no source location. -static bool shouldKeepRemark(const Remark &R) { return R.Loc.hasValue(); } +static bool shouldKeepRemark(const Remark &R) { return R.Loc.has_value(); } Error RemarkLinker::link(StringRef Buffer, Optional RemarkFormat) { if (!RemarkFormat) { diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp index f36767efcbf4..fc0612fb76e2 100644 --- a/llvm/lib/Remarks/RemarkParser.cpp +++ b/llvm/lib/Remarks/RemarkParser.cpp @@ -118,7 +118,7 @@ struct CParser { : createRemarkParser(ParserFormat, Buf))) {} void handleError(Error E) { Err.emplace(toString(std::move(E))); } - bool hasError() const { return Err.hasValue(); } + bool hasError() const { return Err.has_value(); } const char *getMessage() const { return Err ? Err->c_str() : nullptr; }; }; } // namespace diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp index 9e965aa4f6c4..fff2b655e821 100644 --- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp +++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Remarks/YAMLRemarkSerializer.h" +#include "llvm/Remarks/Remark.h" #include "llvm/Support/FileSystem.h" using namespace llvm; @@ -58,8 +59,7 @@ template <> struct MappingTraits { if (auto *Serializer = dyn_cast( reinterpret_cast(io.getContext()))) { - assert(Serializer->StrTab.hasValue() && - "YAMLStrTabSerializer with no StrTab."); + assert(Serializer->StrTab && "YAMLStrTabSerializer with no StrTab."); StringTable &StrTab = *Serializer->StrTab; unsigned PassID = StrTab.add(Remark->PassName).first; unsigned NameID = StrTab.add(Remark->RemarkName).first; @@ -83,8 +83,7 @@ template <> struct MappingTraits { if (auto *Serializer = dyn_cast( reinterpret_cast(io.getContext()))) { - assert(Serializer->StrTab.hasValue() && - "YAMLStrTabSerializer with no StrTab."); + assert(Serializer->StrTab && "YAMLStrTabSerializer with no StrTab."); StringTable &StrTab = *Serializer->StrTab; unsigned FileID = StrTab.add(File).first; io.mapRequired("File", FileID); @@ -138,8 +137,7 @@ template <> struct MappingTraits { if (auto *Serializer = dyn_cast( reinterpret_cast(io.getContext()))) { - assert(Serializer->StrTab.hasValue() && - "YAMLStrTabSerializer with no StrTab."); + assert(Serializer->StrTab && "YAMLStrTabSerializer with no StrTab."); StringTable &StrTab = *Serializer->StrTab; auto ValueID = StrTab.add(A.Val).first; io.mapRequired(A.Key.data(), ValueID); diff --git a/llvm/lib/Support/AArch64TargetParser.cpp b/llvm/lib/Support/AArch64TargetParser.cpp index cdf7c8ade9aa..e2579bf53260 100644 --- a/llvm/lib/Support/AArch64TargetParser.cpp +++ b/llvm/lib/Support/AArch64TargetParser.cpp @@ -64,62 +64,14 @@ bool AArch64::getExtensionFeatures(uint64_t Extensions, if (Extensions == AArch64::AEK_INVALID) return false; - if (Extensions & AEK_FP) - Features.push_back("+fp-armv8"); - if (Extensions & AEK_SIMD) - Features.push_back("+neon"); - if (Extensions & AEK_CRC) - Features.push_back("+crc"); - if (Extensions & AEK_CRYPTO) - Features.push_back("+crypto"); - if (Extensions & AEK_DOTPROD) - Features.push_back("+dotprod"); - if (Extensions & AEK_FP16FML) - Features.push_back("+fp16fml"); - if (Extensions & AEK_FP16) - Features.push_back("+fullfp16"); - if (Extensions & AEK_PROFILE) - Features.push_back("+spe"); - if (Extensions & AEK_RAS) - Features.push_back("+ras"); - if (Extensions & AEK_LSE) - Features.push_back("+lse"); - if (Extensions & AEK_RDM) - Features.push_back("+rdm"); - if (Extensions & AEK_SVE) - Features.push_back("+sve"); - if (Extensions & AEK_SVE2) - Features.push_back("+sve2"); - if (Extensions & AEK_SVE2AES) - Features.push_back("+sve2-aes"); - if (Extensions & AEK_SVE2SM4) - Features.push_back("+sve2-sm4"); - if (Extensions & AEK_SVE2SHA3) - Features.push_back("+sve2-sha3"); - if (Extensions & AEK_SVE2BITPERM) - Features.push_back("+sve2-bitperm"); - if (Extensions & AArch64::AEK_TME) - Features.push_back("+tme"); - if (Extensions & AEK_RCPC) - Features.push_back("+rcpc"); - if (Extensions & AEK_BRBE) - Features.push_back("+brbe"); - if (Extensions & AEK_PAUTH) - Features.push_back("+pauth"); - if (Extensions & AEK_FLAGM) - Features.push_back("+flagm"); - if (Extensions & AArch64::AEK_SME) - Features.push_back("+sme"); - if (Extensions & AArch64::AEK_SMEF64) - Features.push_back("+sme-f64"); - if (Extensions & AArch64::AEK_SMEI64) - Features.push_back("+sme-i64"); - if (Extensions & AArch64::AEK_HBC) - Features.push_back("+hbc"); - if (Extensions & AArch64::AEK_MOPS) - Features.push_back("+mops"); - if (Extensions & AArch64::AEK_PERFMON) - Features.push_back("+perfmon"); +#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \ + if (Extensions & ID) { \ + const char *feature = FEATURE; \ + /* INVALID and NONE have no feature name. */ \ + if (feature) \ + Features.push_back(feature); \ + } +#include "../../include/llvm/Support/AArch64TargetParser.def" return true; } diff --git a/llvm/lib/Support/APFixedPoint.cpp b/llvm/lib/Support/APFixedPoint.cpp index 61b30b5c5c60..f1d07184793c 100644 --- a/llvm/lib/Support/APFixedPoint.cpp +++ b/llvm/lib/Support/APFixedPoint.cpp @@ -233,11 +233,11 @@ APFixedPoint APFixedPoint::mul(const APFixedPoint &Other, // Widen the LHS and RHS so we can perform a full multiplication. unsigned Wide = CommonFXSema.getWidth() * 2; if (CommonFXSema.isSigned()) { - ThisVal = ThisVal.sextOrSelf(Wide); - OtherVal = OtherVal.sextOrSelf(Wide); + ThisVal = ThisVal.sext(Wide); + OtherVal = OtherVal.sext(Wide); } else { - ThisVal = ThisVal.zextOrSelf(Wide); - OtherVal = OtherVal.zextOrSelf(Wide); + ThisVal = ThisVal.zext(Wide); + OtherVal = OtherVal.zext(Wide); } // Perform the full multiplication and downscale to get the same scale. @@ -290,11 +290,11 @@ APFixedPoint APFixedPoint::div(const APFixedPoint &Other, // Widen the LHS and RHS so we can perform a full division. unsigned Wide = CommonFXSema.getWidth() * 2; if (CommonFXSema.isSigned()) { - ThisVal = ThisVal.sextOrSelf(Wide); - OtherVal = OtherVal.sextOrSelf(Wide); + ThisVal = ThisVal.sext(Wide); + OtherVal = OtherVal.sext(Wide); } else { - ThisVal = ThisVal.zextOrSelf(Wide); - OtherVal = OtherVal.zextOrSelf(Wide); + ThisVal = ThisVal.zext(Wide); + OtherVal = OtherVal.zext(Wide); } // Upscale to compensate for the loss of precision from division, and @@ -340,9 +340,9 @@ APFixedPoint APFixedPoint::shl(unsigned Amt, bool *Overflow) const { // Widen the LHS. unsigned Wide = Sema.getWidth() * 2; if (Sema.isSigned()) - ThisVal = ThisVal.sextOrSelf(Wide); + ThisVal = ThisVal.sext(Wide); else - ThisVal = ThisVal.zextOrSelf(Wide); + ThisVal = ThisVal.zext(Wide); // Clamp the shift amount at the original width, and perform the shift. Amt = std::min(Amt, ThisVal.getBitWidth()); diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 4b75c9db8526..2ae28fe066cd 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -2213,8 +2213,11 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, // when truncating from PowerPC double-double to double format), the // right shift could lose result mantissa bits. Adjust exponent instead // of performing excessive shift. + // Also do a similar trick in case shifting denormal would produce zero + // significand as this case isn't handled correctly by normalize. if (shift < 0 && isFiniteNonZero()) { - int exponentChange = significandMSB() + 1 - fromSemantics.precision; + int omsb = significandMSB() + 1; + int exponentChange = omsb - fromSemantics.precision; if (exponent + exponentChange < toSemantics.minExponent) exponentChange = toSemantics.minExponent - exponent; if (exponentChange < shift) @@ -2222,6 +2225,10 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, if (exponentChange < 0) { shift -= exponentChange; exponent += exponentChange; + } else if (omsb <= -shift) { + exponentChange = omsb + shift - 1; // leave at least one bit set + shift -= exponentChange; + exponent += exponentChange; } } diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index b536e9a9a6d0..f74178b1ba4e 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -343,7 +343,7 @@ void APInt::flipAllBitsSlowCase() { /// In the slow case, we know the result is large. APInt APInt::concatSlowCase(const APInt &NewLSB) const { unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth(); - APInt Result = NewLSB.zextOrSelf(NewWidth); + APInt Result = NewLSB.zext(NewWidth); Result.insertBits(*this, NewLSB.getBitWidth()); return Result; } @@ -502,12 +502,51 @@ uint64_t APInt::extractBitsAsZExtValue(unsigned numBits, return retBits; } +unsigned APInt::getSufficientBitsNeeded(StringRef Str, uint8_t Radix) { + assert(!Str.empty() && "Invalid string length"); + size_t StrLen = Str.size(); + + // Each computation below needs to know if it's negative. + unsigned IsNegative = false; + if (Str[0] == '-' || Str[0] == '+') { + IsNegative = Str[0] == '-'; + StrLen--; + assert(StrLen && "String is only a sign, needs a value."); + } + + // For radixes of power-of-two values, the bits required is accurately and + // easily computed. + if (Radix == 2) + return StrLen + IsNegative; + if (Radix == 8) + return StrLen * 3 + IsNegative; + if (Radix == 16) + return StrLen * 4 + IsNegative; + + // Compute a sufficient number of bits that is always large enough but might + // be too large. This avoids the assertion in the constructor. This + // calculation doesn't work appropriately for the numbers 0-9, so just use 4 + // bits in that case. + if (Radix == 10) + return (StrLen == 1 ? 4 : StrLen * 64 / 18) + IsNegative; + + assert(Radix == 36); + return (StrLen == 1 ? 7 : StrLen * 16 / 3) + IsNegative; +} + unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { - assert(!str.empty() && "Invalid string length"); - assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || - radix == 36) && - "Radix should be 2, 8, 10, 16, or 36!"); + // Compute a sufficient number of bits that is always large enough but might + // be too large. + unsigned sufficient = getSufficientBitsNeeded(str, radix); + + // For bases 2, 8, and 16, the sufficient number of bits is exact and we can + // return the value directly. For bases 10 and 36, we need to do extra work. + if (radix == 2 || radix == 8 || radix == 16) + return sufficient; + // This is grossly inefficient but accurate. We could probably do something + // with a computation of roughly slen*64/20 and then adjust by the value of + // the first few digits. But, I'm not sure how accurate that could be. size_t slen = str.size(); // Each computation below needs to know if it's negative. @@ -519,28 +558,6 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { assert(slen && "String is only a sign, needs a value."); } - // For radixes of power-of-two values, the bits required is accurately and - // easily computed - if (radix == 2) - return slen + isNegative; - if (radix == 8) - return slen * 3 + isNegative; - if (radix == 16) - return slen * 4 + isNegative; - - // FIXME: base 36 - - // This is grossly inefficient but accurate. We could probably do something - // with a computation of roughly slen*64/20 and then adjust by the value of - // the first few digits. But, I'm not sure how accurate that could be. - - // Compute a sufficient number of bits that is always large enough but might - // be too large. This avoids the assertion in the constructor. This - // calculation doesn't work appropriately for the numbers 0-9, so just use 4 - // bits in that case. - unsigned sufficient - = radix == 10? (slen == 1 ? 4 : slen * 64/18) - : (slen == 1 ? 7 : slen * 16/3); // Convert to the actual binary value. APInt tmp(sufficient, StringRef(p, slen), radix); @@ -595,7 +612,7 @@ APInt APInt::getLoBits(unsigned numBits) const { APInt APInt::getSplat(unsigned NewLen, const APInt &V) { assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); - APInt Val = V.zextOrSelf(NewLen); + APInt Val = V.zext(NewLen); for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) Val |= Val << I; @@ -879,11 +896,14 @@ double APInt::roundToDouble(bool isSigned) const { // Truncate to new width. APInt APInt::trunc(unsigned width) const { - assert(width < BitWidth && "Invalid APInt Truncate request"); + assert(width <= BitWidth && "Invalid APInt Truncate request"); if (width <= APINT_BITS_PER_WORD) return APInt(width, getRawData()[0]); + if (width == BitWidth) + return *this; + APInt Result(getMemory(getNumWords(width)), width); // Copy full words. @@ -901,7 +921,7 @@ APInt APInt::trunc(unsigned width) const { // Truncate to new width with unsigned saturation. APInt APInt::truncUSat(unsigned width) const { - assert(width < BitWidth && "Invalid APInt Truncate request"); + assert(width <= BitWidth && "Invalid APInt Truncate request"); // Can we just losslessly truncate it? if (isIntN(width)) @@ -912,7 +932,7 @@ APInt APInt::truncUSat(unsigned width) const { // Truncate to new width with signed saturation. APInt APInt::truncSSat(unsigned width) const { - assert(width < BitWidth && "Invalid APInt Truncate request"); + assert(width <= BitWidth && "Invalid APInt Truncate request"); // Can we just losslessly truncate it? if (isSignedIntN(width)) @@ -924,11 +944,14 @@ APInt APInt::truncSSat(unsigned width) const { // Sign extend to a new width. APInt APInt::sext(unsigned Width) const { - assert(Width > BitWidth && "Invalid APInt SignExtend request"); + assert(Width >= BitWidth && "Invalid APInt SignExtend request"); if (Width <= APINT_BITS_PER_WORD) return APInt(Width, SignExtend64(U.VAL, BitWidth)); + if (Width == BitWidth) + return *this; + APInt Result(getMemory(getNumWords(Width)), Width); // Copy words. @@ -948,11 +971,14 @@ APInt APInt::sext(unsigned Width) const { // Zero extend to a new width. APInt APInt::zext(unsigned width) const { - assert(width > BitWidth && "Invalid APInt ZeroExtend request"); + assert(width >= BitWidth && "Invalid APInt ZeroExtend request"); if (width <= APINT_BITS_PER_WORD) return APInt(width, U.VAL); + if (width == BitWidth) + return *this; + APInt Result(getMemory(getNumWords(width)), width); // Copy words. @@ -981,24 +1007,6 @@ APInt APInt::sextOrTrunc(unsigned width) const { return *this; } -APInt APInt::truncOrSelf(unsigned width) const { - if (BitWidth > width) - return trunc(width); - return *this; -} - -APInt APInt::zextOrSelf(unsigned width) const { - if (BitWidth < width) - return zext(width); - return *this; -} - -APInt APInt::sextOrSelf(unsigned width) const { - if (BitWidth < width) - return sext(width); - return *this; -} - /// Arithmetic right-shift this APInt by shiftAmt. /// Arithmetic right-shift function. void APInt::ashrInPlace(const APInt &shiftAmt) { @@ -2960,7 +2968,8 @@ llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) { return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1); } -APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth) { +APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth, + bool MatchAllBits) { unsigned OldBitWidth = A.getBitWidth(); assert((((OldBitWidth % NewBitWidth) == 0) || ((NewBitWidth % OldBitWidth) == 0)) && @@ -2984,11 +2993,16 @@ APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth) { if (A[i]) NewA.setBits(i * Scale, (i + 1) * Scale); } else { - // Merge bits - if any old bit is set, then set scale equivalent new bit. unsigned Scale = OldBitWidth / NewBitWidth; - for (unsigned i = 0; i != NewBitWidth; ++i) - if (!A.extractBits(Scale, i * Scale).isZero()) - NewA.setBit(i); + for (unsigned i = 0; i != NewBitWidth; ++i) { + if (MatchAllBits) { + if (A.extractBits(Scale, i * Scale).isAllOnes()) + NewA.setBit(i); + } else { + if (!A.extractBits(Scale, i * Scale).isZero()) + NewA.setBit(i); + } + } } return NewA; diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp index 9ba224cee0ca..adb5d3f0964d 100644 --- a/llvm/lib/Support/ARMAttributeParser.cpp +++ b/llvm/lib/Support/ARMAttributeParser.cpp @@ -87,7 +87,7 @@ Error ARMAttributeParser::CPU_arch(AttrType tag) { "ARM v6KZ", "ARM v6T2", "ARM v6K", "ARM v7", "ARM v6-M", "ARM v6S-M", "ARM v7E-M", "ARM v8", nullptr, "ARM v8-M Baseline", "ARM v8-M Mainline", nullptr, nullptr, nullptr, - "ARM v8.1-M Mainline" + "ARM v8.1-M Mainline", "ARM v9-A" }; return parseStringAttribute("CPU_arch", tag, makeArrayRef(strings)); } diff --git a/llvm/lib/Support/ARMWinEH.cpp b/llvm/lib/Support/ARMWinEH.cpp index 8e7fa1149082..29c7a28541f2 100644 --- a/llvm/lib/Support/ARMWinEH.cpp +++ b/llvm/lib/Support/ARMWinEH.cpp @@ -11,22 +11,35 @@ namespace llvm { namespace ARM { namespace WinEH { -std::pair SavedRegisterMask(const RuntimeFunction &RF) { +std::pair SavedRegisterMask(const RuntimeFunction &RF, + bool Prologue) { uint8_t NumRegisters = RF.Reg(); uint8_t RegistersVFP = RF.R(); uint8_t LinkRegister = RF.L(); uint8_t ChainedFrame = RF.C(); - uint16_t GPRMask = (ChainedFrame << 11) | (LinkRegister << 14); + uint16_t GPRMask = (ChainedFrame << 11); uint32_t VFPMask = 0; + if (Prologue) { + GPRMask |= (LinkRegister << 14); + } else { + // If Ret != 0, we pop into Lr and return later + if (RF.Ret() != ReturnType::RT_POP) + GPRMask |= (LinkRegister << 14); + else if (!RF.H()) // If H == 0, we pop directly into Pc + GPRMask |= (LinkRegister << 15); + // else, Ret == 0 && H == 1, we pop into Pc separately afterwards + } + if (RegistersVFP) VFPMask |= (((1 << ((NumRegisters + 1) % 8)) - 1) << 8); else GPRMask |= (((1 << (NumRegisters + 1)) - 1) << 4); - if (PrologueFolding(RF)) - GPRMask |= (((1 << (NumRegisters + 1)) - 1) << (~RF.StackAdjust() & 0x3)); + if ((PrologueFolding(RF) && Prologue) || (EpilogueFolding(RF) && !Prologue)) + GPRMask |= (((1 << ((RF.StackAdjust() & 0x3) + 1)) - 1) + << (~RF.StackAdjust() & 0x3)); return std::make_pair(GPRMask, VFPMask); } diff --git a/llvm/lib/Support/AddressRanges.cpp b/llvm/lib/Support/AddressRanges.cpp new file mode 100644 index 000000000000..5ba011bac4e9 --- /dev/null +++ b/llvm/lib/Support/AddressRanges.cpp @@ -0,0 +1,59 @@ +//===- AddressRanges.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/AddressRanges.h" +#include "llvm/ADT/STLExtras.h" +#include + +using namespace llvm; + +void AddressRanges::insert(AddressRange Range) { + if (Range.size() == 0) + return; + + auto It = llvm::upper_bound(Ranges, Range); + auto It2 = It; + while (It2 != Ranges.end() && It2->start() < Range.end()) + ++It2; + if (It != It2) { + Range = {Range.start(), std::max(Range.end(), It2[-1].end())}; + It = Ranges.erase(It, It2); + } + if (It != Ranges.begin() && Range.start() < It[-1].end()) + It[-1] = {It[-1].start(), std::max(It[-1].end(), Range.end())}; + else + Ranges.insert(It, Range); +} + +bool AddressRanges::contains(uint64_t Addr) const { + auto It = std::partition_point( + Ranges.begin(), Ranges.end(), + [=](const AddressRange &R) { return R.start() <= Addr; }); + return It != Ranges.begin() && Addr < It[-1].end(); +} + +bool AddressRanges::contains(AddressRange Range) const { + if (Range.size() == 0) + return false; + auto It = std::partition_point( + Ranges.begin(), Ranges.end(), + [=](const AddressRange &R) { return R.start() <= Range.start(); }); + if (It == Ranges.begin()) + return false; + return Range.end() <= It[-1].end(); +} + +Optional +AddressRanges::getRangeThatContains(uint64_t Addr) const { + auto It = std::partition_point( + Ranges.begin(), Ranges.end(), + [=](const AddressRange &R) { return R.start() <= Addr; }); + if (It != Ranges.begin() && Addr < It[-1].end()) + return It[-1]; + return llvm::None; +} diff --git a/llvm/lib/Support/BLAKE3/LICENSE b/llvm/lib/Support/BLAKE3/LICENSE new file mode 100644 index 000000000000..f5892efc3b9b --- /dev/null +++ b/llvm/lib/Support/BLAKE3/LICENSE @@ -0,0 +1,330 @@ +This work is released into the public domain with CC0 1.0. Alternatively, it is +licensed under the Apache License 2.0. + +------------------------------------------------------------------------------- + +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. + +------------------------------------------------------------------------------- + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2019 Jack O'Connor and Samuel Neves + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/llvm/lib/Support/BLAKE3/README.md b/llvm/lib/Support/BLAKE3/README.md new file mode 100644 index 000000000000..319a7514e8b5 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/README.md @@ -0,0 +1,296 @@ +Implementation of BLAKE3, originating from https://github.com/BLAKE3-team/BLAKE3/tree/1.3.1/c + +# Example + +An example program that hashes bytes from standard input and prints the +result: + +Using the C++ API: + +```c++ +#include "llvm/Support/BLAKE3.h" +#include +#include +#include +#include +#include + +int main() { + // Initialize the hasher. + llvm::BLAKE3 hasher; + + // Read input bytes from stdin. + char buf[65536]; + while (1) { + ssize_t n = read(STDIN_FILENO, buf, sizeof(buf)); + if (n > 0) { + hasher.update(llvm::StringRef(buf, n)); + } else if (n == 0) { + break; // end of file + } else { + fprintf(stderr, "read failed: %s\n", strerror(errno)); + exit(1); + } + } + + // Finalize the hash. Default output length is 32 bytes. + auto output = hasher.final(); + + // Print the hash as hexadecimal. + for (uint8_t byte : output) { + printf("%02x", byte); + } + printf("\n"); + return 0; +} +``` + +Using the C API: + +```c +#include "llvm-c/blake3.h" +#include +#include +#include +#include +#include + +int main() { + // Initialize the hasher. + llvm_blake3_hasher hasher; + llvm_blake3_hasher_init(&hasher); + + // Read input bytes from stdin. + unsigned char buf[65536]; + while (1) { + ssize_t n = read(STDIN_FILENO, buf, sizeof(buf)); + if (n > 0) { + llvm_blake3_hasher_update(&hasher, buf, n); + } else if (n == 0) { + break; // end of file + } else { + fprintf(stderr, "read failed: %s\n", strerror(errno)); + exit(1); + } + } + + // Finalize the hash. LLVM_BLAKE3_OUT_LEN is the default output length, 32 bytes. + uint8_t output[LLVM_BLAKE3_OUT_LEN]; + llvm_blake3_hasher_finalize(&hasher, output, LLVM_BLAKE3_OUT_LEN); + + // Print the hash as hexadecimal. + for (size_t i = 0; i < LLVM_BLAKE3_OUT_LEN; i++) { + printf("%02x", output[i]); + } + printf("\n"); + return 0; +} +``` + +# API + +## The Class/Struct + +```c++ +class BLAKE3 { + // API +private: + llvm_blake3_hasher Hasher; +}; +``` +```c +typedef struct { + // private fields +} llvm_blake3_hasher; +``` + +An incremental BLAKE3 hashing state, which can accept any number of +updates. This implementation doesn't allocate any heap memory, but +`sizeof(llvm_blake3_hasher)` itself is relatively large, currently 1912 bytes +on x86-64. This size can be reduced by restricting the maximum input +length, as described in Section 5.4 of [the BLAKE3 +spec](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf), +but this implementation doesn't currently support that strategy. + +## Common API Functions + +```c++ +BLAKE3::BLAKE3(); + +void BLAKE3::init(); +``` +```c +void llvm_blake3_hasher_init( + llvm_blake3_hasher *self); +``` + +Initialize a `llvm_blake3_hasher` in the default hashing mode. + +--- + +```c++ +void BLAKE3::update(ArrayRef Data); + +void BLAKE3::update(StringRef Str); +``` +```c +void llvm_blake3_hasher_update( + llvm_blake3_hasher *self, + const void *input, + size_t input_len); +``` + +Add input to the hasher. This can be called any number of times. + +--- + +```c++ +template +using BLAKE3Result = std::array; + +template +void BLAKE3::final(BLAKE3Result &Result); + +template +BLAKE3Result BLAKE3::final(); +``` +```c +void llvm_blake3_hasher_finalize( + const llvm_blake3_hasher *self, + uint8_t *out, + size_t out_len); +``` + +Finalize the hasher and return an output of any length, given in bytes. +This doesn't modify the hasher itself, and it's possible to finalize +again after adding more input. The constant `LLVM_BLAKE3_OUT_LEN` provides +the default output length, 32 bytes, which is recommended for most +callers. + +Outputs shorter than the default length of 32 bytes (256 bits) provide +less security. An N-bit BLAKE3 output is intended to provide N bits of +first and second preimage resistance and N/2 bits of collision +resistance, for any N up to 256. Longer outputs don't provide any +additional security. + +Shorter BLAKE3 outputs are prefixes of longer ones. Explicitly +requesting a short output is equivalent to truncating the default-length +output. (Note that this is different between BLAKE2 and BLAKE3.) + +## Less Common API Functions + +```c +void llvm_blake3_hasher_init_keyed( + llvm_blake3_hasher *self, + const uint8_t key[LLVM_BLAKE3_KEY_LEN]); +``` + +Initialize a `llvm_blake3_hasher` in the keyed hashing mode. The key must be +exactly 32 bytes. + +--- + +```c +void llvm_blake3_hasher_init_derive_key( + llvm_blake3_hasher *self, + const char *context); +``` + +Initialize a `llvm_blake3_hasher` in the key derivation mode. The context +string is given as an initialization parameter, and afterwards input key +material should be given with `llvm_blake3_hasher_update`. The context string +is a null-terminated C string which should be **hardcoded, globally +unique, and application-specific**. The context string should not +include any dynamic input like salts, nonces, or identifiers read from a +database at runtime. A good default format for the context string is +`"[application] [commit timestamp] [purpose]"`, e.g., `"example.com +2019-12-25 16:18:03 session tokens v1"`. + +This function is intended for application code written in C. For +language bindings, see `llvm_blake3_hasher_init_derive_key_raw` below. + +--- + +```c +void llvm_blake3_hasher_init_derive_key_raw( + llvm_blake3_hasher *self, + const void *context, + size_t context_len); +``` + +As `llvm_blake3_hasher_init_derive_key` above, except that the context string +is given as a pointer to an array of arbitrary bytes with a provided +length. This is intended for writing language bindings, where C string +conversion would add unnecessary overhead and new error cases. Unicode +strings should be encoded as UTF-8. + +Application code in C should prefer `llvm_blake3_hasher_init_derive_key`, +which takes the context as a C string. If you need to use arbitrary +bytes as a context string in application code, consider whether you're +violating the requirement that context strings should be hardcoded. + +--- + +```c +void llvm_blake3_hasher_finalize_seek( + const llvm_blake3_hasher *self, + uint64_t seek, + uint8_t *out, + size_t out_len); +``` + +The same as `llvm_blake3_hasher_finalize`, but with an additional `seek` +parameter for the starting byte position in the output stream. To +efficiently stream a large output without allocating memory, call this +function in a loop, incrementing `seek` by the output length each time. + +--- + +```c +void llvm_blake3_hasher_reset( + llvm_blake3_hasher *self); +``` + +Reset the hasher to its initial state, prior to any calls to +`llvm_blake3_hasher_update`. Currently this is no different from calling +`llvm_blake3_hasher_init` or similar again. However, if this implementation gains +multithreading support in the future, and if `llvm_blake3_hasher` holds (optional) +threading resources, this function will reuse those resources. + + +# Building + +This implementation is just C and assembly files. + +## x86 + +Dynamic dispatch is enabled by default on x86. The implementation will +query the CPU at runtime to detect SIMD support, and it will use the +widest instruction set available. By default, `blake3_dispatch.c` +expects to be linked with code for five different instruction sets: +portable C, SSE2, SSE4.1, AVX2, and AVX-512. + +For each of the x86 SIMD instruction sets, four versions are available: +three flavors of assembly (Unix, Windows MSVC, and Windows GNU) and one +version using C intrinsics. The assembly versions are generally +preferred. They perform better, they perform more consistently across +different compilers, and they build more quickly. On the other hand, the +assembly versions are x86\_64-only, and you need to select the right +flavor for your target platform. + +## ARM NEON + +The NEON implementation is enabled by default on AArch64, but not on +other ARM targets, since not all of them support it. To enable it, set +`BLAKE3_USE_NEON=1`. + +To explicitiy disable using NEON instructions on AArch64, set +`BLAKE3_USE_NEON=0`. + +## Other Platforms + +The portable implementation should work on most other architectures. + +# Multithreading + +The implementation doesn't currently support multithreading. diff --git a/llvm/lib/Support/BLAKE3/blake3.c b/llvm/lib/Support/BLAKE3/blake3.c new file mode 100644 index 000000000000..a369452a3e75 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3.c @@ -0,0 +1,627 @@ +/*===-- blake3.c - BLAKE3 C Implementation ------------------------*- C -*-===*\ +|* *| +|* Released into the public domain with CC0 1.0 *| +|* See 'llvm/lib/Support/BLAKE3/LICENSE' for info. *| +|* SPDX-License-Identifier: CC0-1.0 *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#include +#include +#include + +#include "blake3_impl.h" + +const char *llvm_blake3_version(void) { return BLAKE3_VERSION_STRING; } + +INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8], + uint8_t flags) { + memcpy(self->cv, key, BLAKE3_KEY_LEN); + self->chunk_counter = 0; + memset(self->buf, 0, BLAKE3_BLOCK_LEN); + self->buf_len = 0; + self->blocks_compressed = 0; + self->flags = flags; +} + +INLINE void chunk_state_reset(blake3_chunk_state *self, const uint32_t key[8], + uint64_t chunk_counter) { + memcpy(self->cv, key, BLAKE3_KEY_LEN); + self->chunk_counter = chunk_counter; + self->blocks_compressed = 0; + memset(self->buf, 0, BLAKE3_BLOCK_LEN); + self->buf_len = 0; +} + +INLINE size_t chunk_state_len(const blake3_chunk_state *self) { + return (BLAKE3_BLOCK_LEN * (size_t)self->blocks_compressed) + + ((size_t)self->buf_len); +} + +INLINE size_t chunk_state_fill_buf(blake3_chunk_state *self, + const uint8_t *input, size_t input_len) { + size_t take = BLAKE3_BLOCK_LEN - ((size_t)self->buf_len); + if (take > input_len) { + take = input_len; + } + uint8_t *dest = self->buf + ((size_t)self->buf_len); + memcpy(dest, input, take); + self->buf_len += (uint8_t)take; + return take; +} + +INLINE uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state *self) { + if (self->blocks_compressed == 0) { + return CHUNK_START; + } else { + return 0; + } +} + +typedef struct { + uint32_t input_cv[8]; + uint64_t counter; + uint8_t block[BLAKE3_BLOCK_LEN]; + uint8_t block_len; + uint8_t flags; +} output_t; + +INLINE output_t make_output(const uint32_t input_cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags) { + output_t ret; + memcpy(ret.input_cv, input_cv, 32); + memcpy(ret.block, block, BLAKE3_BLOCK_LEN); + ret.block_len = block_len; + ret.counter = counter; + ret.flags = flags; + return ret; +} + +// Chaining values within a given chunk (specifically the compress_in_place +// interface) are represented as words. This avoids unnecessary bytes<->words +// conversion overhead in the portable implementation. However, the hash_many +// interface handles both user input and parent node blocks, so it accepts +// bytes. For that reason, chaining values in the CV stack are represented as +// bytes. +INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) { + uint32_t cv_words[8]; + memcpy(cv_words, self->input_cv, 32); + blake3_compress_in_place(cv_words, self->block, self->block_len, + self->counter, self->flags); + store_cv_words(cv, cv_words); +} + +INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out, + size_t out_len) { + uint64_t output_block_counter = seek / 64; + size_t offset_within_block = seek % 64; + uint8_t wide_buf[64]; + while (out_len > 0) { + blake3_compress_xof(self->input_cv, self->block, self->block_len, + output_block_counter, self->flags | ROOT, wide_buf); + size_t available_bytes = 64 - offset_within_block; + size_t memcpy_len; + if (out_len > available_bytes) { + memcpy_len = available_bytes; + } else { + memcpy_len = out_len; + } + memcpy(out, wide_buf + offset_within_block, memcpy_len); + out += memcpy_len; + out_len -= memcpy_len; + output_block_counter += 1; + offset_within_block = 0; + } +} + +INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input, + size_t input_len) { + if (self->buf_len > 0) { + size_t take = chunk_state_fill_buf(self, input, input_len); + input += take; + input_len -= take; + if (input_len > 0) { + blake3_compress_in_place( + self->cv, self->buf, BLAKE3_BLOCK_LEN, self->chunk_counter, + self->flags | chunk_state_maybe_start_flag(self)); + self->blocks_compressed += 1; + self->buf_len = 0; + memset(self->buf, 0, BLAKE3_BLOCK_LEN); + } + } + + while (input_len > BLAKE3_BLOCK_LEN) { + blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN, + self->chunk_counter, + self->flags | chunk_state_maybe_start_flag(self)); + self->blocks_compressed += 1; + input += BLAKE3_BLOCK_LEN; + input_len -= BLAKE3_BLOCK_LEN; + } + + size_t take = chunk_state_fill_buf(self, input, input_len); + input += take; + input_len -= take; +} + +INLINE output_t chunk_state_output(const blake3_chunk_state *self) { + uint8_t block_flags = + self->flags | chunk_state_maybe_start_flag(self) | CHUNK_END; + return make_output(self->cv, self->buf, self->buf_len, self->chunk_counter, + block_flags); +} + +INLINE output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN], + const uint32_t key[8], uint8_t flags) { + return make_output(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT); +} + +// Given some input larger than one chunk, return the number of bytes that +// should go in the left subtree. This is the largest power-of-2 number of +// chunks that leaves at least 1 byte for the right subtree. +INLINE size_t left_len(size_t content_len) { + // Subtract 1 to reserve at least one byte for the right side. content_len + // should always be greater than BLAKE3_CHUNK_LEN. + size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN; + return round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN; +} + +// Use SIMD parallelism to hash up to MAX_SIMD_DEGREE chunks at the same time +// on a single thread. Write out the chunk chaining values and return the +// number of chunks hashed. These chunks are never the root and never empty; +// those cases use a different codepath. +INLINE size_t compress_chunks_parallel(const uint8_t *input, size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, uint8_t flags, + uint8_t *out) { +#if defined(BLAKE3_TESTING) + assert(0 < input_len); + assert(input_len <= MAX_SIMD_DEGREE * BLAKE3_CHUNK_LEN); +#endif + + const uint8_t *chunks_array[MAX_SIMD_DEGREE]; + size_t input_position = 0; + size_t chunks_array_len = 0; + while (input_len - input_position >= BLAKE3_CHUNK_LEN) { + chunks_array[chunks_array_len] = &input[input_position]; + input_position += BLAKE3_CHUNK_LEN; + chunks_array_len += 1; + } + + blake3_hash_many(chunks_array, chunks_array_len, + BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN, key, chunk_counter, + true, flags, CHUNK_START, CHUNK_END, out); + + // Hash the remaining partial chunk, if there is one. Note that the empty + // chunk (meaning the empty message) is a different codepath. + if (input_len > input_position) { + uint64_t counter = chunk_counter + (uint64_t)chunks_array_len; + blake3_chunk_state chunk_state; + chunk_state_init(&chunk_state, key, flags); + chunk_state.chunk_counter = counter; + chunk_state_update(&chunk_state, &input[input_position], + input_len - input_position); + output_t output = chunk_state_output(&chunk_state); + output_chaining_value(&output, &out[chunks_array_len * BLAKE3_OUT_LEN]); + return chunks_array_len + 1; + } else { + return chunks_array_len; + } +} + +// Use SIMD parallelism to hash up to MAX_SIMD_DEGREE parents at the same time +// on a single thread. Write out the parent chaining values and return the +// number of parents hashed. (If there's an odd input chaining value left over, +// return it as an additional output.) These parents are never the root and +// never empty; those cases use a different codepath. +INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values, + size_t num_chaining_values, + const uint32_t key[8], uint8_t flags, + uint8_t *out) { +#if defined(BLAKE3_TESTING) + assert(2 <= num_chaining_values); + assert(num_chaining_values <= 2 * MAX_SIMD_DEGREE_OR_2); +#endif + + const uint8_t *parents_array[MAX_SIMD_DEGREE_OR_2]; + size_t parents_array_len = 0; + while (num_chaining_values - (2 * parents_array_len) >= 2) { + parents_array[parents_array_len] = + &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN]; + parents_array_len += 1; + } + + blake3_hash_many(parents_array, parents_array_len, 1, key, + 0, // Parents always use counter 0. + false, flags | PARENT, + 0, // Parents have no start flags. + 0, // Parents have no end flags. + out); + + // If there's an odd child left over, it becomes an output. + if (num_chaining_values > 2 * parents_array_len) { + memcpy(&out[parents_array_len * BLAKE3_OUT_LEN], + &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN], + BLAKE3_OUT_LEN); + return parents_array_len + 1; + } else { + return parents_array_len; + } +} + +// The wide helper function returns (writes out) an array of chaining values +// and returns the length of that array. The number of chaining values returned +// is the dyanmically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer, +// if the input is shorter than that many chunks. The reason for maintaining a +// wide array of chaining values going back up the tree, is to allow the +// implementation to hash as many parents in parallel as possible. +// +// As a special case when the SIMD degree is 1, this function will still return +// at least 2 outputs. This guarantees that this function doesn't perform the +// root compression. (If it did, it would use the wrong flags, and also we +// wouldn't be able to implement exendable ouput.) Note that this function is +// not used when the whole input is only 1 chunk long; that's a different +// codepath. +// +// Why not just have the caller split the input on the first update(), instead +// of implementing this special rule? Because we don't want to limit SIMD or +// multi-threading parallelism for that update(). +static size_t blake3_compress_subtree_wide(const uint8_t *input, + size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, + uint8_t flags, uint8_t *out) { + // Note that the single chunk case does *not* bump the SIMD degree up to 2 + // when it is 1. If this implementation adds multi-threading in the future, + // this gives us the option of multi-threading even the 2-chunk case, which + // can help performance on smaller platforms. + if (input_len <= blake3_simd_degree() * BLAKE3_CHUNK_LEN) { + return compress_chunks_parallel(input, input_len, key, chunk_counter, flags, + out); + } + + // With more than simd_degree chunks, we need to recurse. Start by dividing + // the input into left and right subtrees. (Note that this is only optimal + // as long as the SIMD degree is a power of 2. If we ever get a SIMD degree + // of 3 or something, we'll need a more complicated strategy.) + size_t left_input_len = left_len(input_len); + size_t right_input_len = input_len - left_input_len; + const uint8_t *right_input = &input[left_input_len]; + uint64_t right_chunk_counter = + chunk_counter + (uint64_t)(left_input_len / BLAKE3_CHUNK_LEN); + + // Make space for the child outputs. Here we use MAX_SIMD_DEGREE_OR_2 to + // account for the special case of returning 2 outputs when the SIMD degree + // is 1. + uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN]; + size_t degree = blake3_simd_degree(); + if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) { + // The special case: We always use a degree of at least two, to make + // sure there are two outputs. Except, as noted above, at the chunk + // level, where we allow degree=1. (Note that the 1-chunk-input case is + // a different codepath.) + degree = 2; + } + uint8_t *right_cvs = &cv_array[degree * BLAKE3_OUT_LEN]; + + // Recurse! If this implementation adds multi-threading support in the + // future, this is where it will go. + size_t left_n = blake3_compress_subtree_wide(input, left_input_len, key, + chunk_counter, flags, cv_array); + size_t right_n = blake3_compress_subtree_wide( + right_input, right_input_len, key, right_chunk_counter, flags, right_cvs); + + // The special case again. If simd_degree=1, then we'll have left_n=1 and + // right_n=1. Rather than compressing them into a single output, return + // them directly, to make sure we always have at least two outputs. + if (left_n == 1) { + memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN); + return 2; + } + + // Otherwise, do one layer of parent node compression. + size_t num_chaining_values = left_n + right_n; + return compress_parents_parallel(cv_array, num_chaining_values, key, flags, + out); +} + +// Hash a subtree with compress_subtree_wide(), and then condense the resulting +// list of chaining values down to a single parent node. Don't compress that +// last parent node, however. Instead, return its message bytes (the +// concatenated chaining values of its children). This is necessary when the +// first call to update() supplies a complete subtree, because the topmost +// parent node of that subtree could end up being the root. It's also necessary +// for extended output in the general case. +// +// As with compress_subtree_wide(), this function is not used on inputs of 1 +// chunk or less. That's a different codepath. +INLINE void compress_subtree_to_parent_node( + const uint8_t *input, size_t input_len, const uint32_t key[8], + uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN]) { +#if defined(BLAKE3_TESTING) + assert(input_len > BLAKE3_CHUNK_LEN); +#endif + + uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN]; + size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key, + chunk_counter, flags, cv_array); + assert(num_cvs <= MAX_SIMD_DEGREE_OR_2); + + // If MAX_SIMD_DEGREE is greater than 2 and there's enough input, + // compress_subtree_wide() returns more than 2 chaining values. Condense + // them into 2 by forming parent nodes repeatedly. + uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2]; + // The second half of this loop condition is always true, and we just + // asserted it above. But GCC can't tell that it's always true, and if NDEBUG + // is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious + // warnings here. GCC 8.5 is particularly sensitive, so if you're changing + // this code, test it against that version. + while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) { + num_cvs = + compress_parents_parallel(cv_array, num_cvs, key, flags, out_array); + memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN); + } + memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN); +} + +INLINE void hasher_init_base(blake3_hasher *self, const uint32_t key[8], + uint8_t flags) { + memcpy(self->key, key, BLAKE3_KEY_LEN); + chunk_state_init(&self->chunk, key, flags); + self->cv_stack_len = 0; +} + +void llvm_blake3_hasher_init(blake3_hasher *self) { hasher_init_base(self, IV, 0); } + +void llvm_blake3_hasher_init_keyed(blake3_hasher *self, + const uint8_t key[BLAKE3_KEY_LEN]) { + uint32_t key_words[8]; + load_key_words(key, key_words); + hasher_init_base(self, key_words, KEYED_HASH); +} + +void llvm_blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context, + size_t context_len) { + blake3_hasher context_hasher; + hasher_init_base(&context_hasher, IV, DERIVE_KEY_CONTEXT); + llvm_blake3_hasher_update(&context_hasher, context, context_len); + uint8_t context_key[BLAKE3_KEY_LEN]; + llvm_blake3_hasher_finalize(&context_hasher, context_key, BLAKE3_KEY_LEN); + uint32_t context_key_words[8]; + load_key_words(context_key, context_key_words); + hasher_init_base(self, context_key_words, DERIVE_KEY_MATERIAL); +} + +void llvm_blake3_hasher_init_derive_key(blake3_hasher *self, const char *context) { + llvm_blake3_hasher_init_derive_key_raw(self, context, strlen(context)); +} + +// As described in hasher_push_cv() below, we do "lazy merging", delaying +// merges until right before the next CV is about to be added. This is +// different from the reference implementation. Another difference is that we +// aren't always merging 1 chunk at a time. Instead, each CV might represent +// any power-of-two number of chunks, as long as the smaller-above-larger stack +// order is maintained. Instead of the "count the trailing 0-bits" algorithm +// described in the spec, we use a "count the total number of 1-bits" variant +// that doesn't require us to retain the subtree size of the CV on top of the +// stack. The principle is the same: each CV that should remain in the stack is +// represented by a 1-bit in the total number of chunks (or bytes) so far. +INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len) { + size_t post_merge_stack_len = (size_t)popcnt(total_len); + while (self->cv_stack_len > post_merge_stack_len) { + uint8_t *parent_node = + &self->cv_stack[(self->cv_stack_len - 2) * BLAKE3_OUT_LEN]; + output_t output = parent_output(parent_node, self->key, self->chunk.flags); + output_chaining_value(&output, parent_node); + self->cv_stack_len -= 1; + } +} + +// In reference_impl.rs, we merge the new CV with existing CVs from the stack +// before pushing it. We can do that because we know more input is coming, so +// we know none of the merges are root. +// +// This setting is different. We want to feed as much input as possible to +// compress_subtree_wide(), without setting aside anything for the chunk_state. +// If the user gives us 64 KiB, we want to parallelize over all 64 KiB at once +// as a single subtree, if at all possible. +// +// This leads to two problems: +// 1) This 64 KiB input might be the only call that ever gets made to update. +// In this case, the root node of the 64 KiB subtree would be the root node +// of the whole tree, and it would need to be ROOT finalized. We can't +// compress it until we know. +// 2) This 64 KiB input might complete a larger tree, whose root node is +// similarly going to be the the root of the whole tree. For example, maybe +// we have 196 KiB (that is, 128 + 64) hashed so far. We can't compress the +// node at the root of the 256 KiB subtree until we know how to finalize it. +// +// The second problem is solved with "lazy merging". That is, when we're about +// to add a CV to the stack, we don't merge it with anything first, as the +// reference impl does. Instead we do merges using the *previous* CV that was +// added, which is sitting on top of the stack, and we put the new CV +// (unmerged) on top of the stack afterwards. This guarantees that we never +// merge the root node until finalize(). +// +// Solving the first problem requires an additional tool, +// compress_subtree_to_parent_node(). That function always returns the top +// *two* chaining values of the subtree it's compressing. We then do lazy +// merging with each of them separately, so that the second CV will always +// remain unmerged. (That also helps us support extendable output when we're +// hashing an input all-at-once.) +INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN], + uint64_t chunk_counter) { + hasher_merge_cv_stack(self, chunk_counter); + memcpy(&self->cv_stack[self->cv_stack_len * BLAKE3_OUT_LEN], new_cv, + BLAKE3_OUT_LEN); + self->cv_stack_len += 1; +} + +void llvm_blake3_hasher_update(blake3_hasher *self, const void *input, + size_t input_len) { + // Explicitly checking for zero avoids causing UB by passing a null pointer + // to memcpy. This comes up in practice with things like: + // std::vector v; + // blake3_hasher_update(&hasher, v.data(), v.size()); + if (input_len == 0) { + return; + } + + const uint8_t *input_bytes = (const uint8_t *)input; + + // If we have some partial chunk bytes in the internal chunk_state, we need + // to finish that chunk first. + if (chunk_state_len(&self->chunk) > 0) { + size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&self->chunk); + if (take > input_len) { + take = input_len; + } + chunk_state_update(&self->chunk, input_bytes, take); + input_bytes += take; + input_len -= take; + // If we've filled the current chunk and there's more coming, finalize this + // chunk and proceed. In this case we know it's not the root. + if (input_len > 0) { + output_t output = chunk_state_output(&self->chunk); + uint8_t chunk_cv[32]; + output_chaining_value(&output, chunk_cv); + hasher_push_cv(self, chunk_cv, self->chunk.chunk_counter); + chunk_state_reset(&self->chunk, self->key, self->chunk.chunk_counter + 1); + } else { + return; + } + } + + // Now the chunk_state is clear, and we have more input. If there's more than + // a single chunk (so, definitely not the root chunk), hash the largest whole + // subtree we can, with the full benefits of SIMD (and maybe in the future, + // multi-threading) parallelism. Two restrictions: + // - The subtree has to be a power-of-2 number of chunks. Only subtrees along + // the right edge can be incomplete, and we don't know where the right edge + // is going to be until we get to finalize(). + // - The subtree must evenly divide the total number of chunks up until this + // point (if total is not 0). If the current incomplete subtree is only + // waiting for 1 more chunk, we can't hash a subtree of 4 chunks. We have + // to complete the current subtree first. + // Because we might need to break up the input to form powers of 2, or to + // evenly divide what we already have, this part runs in a loop. + while (input_len > BLAKE3_CHUNK_LEN) { + size_t subtree_len = round_down_to_power_of_2(input_len); + uint64_t count_so_far = self->chunk.chunk_counter * BLAKE3_CHUNK_LEN; + // Shrink the subtree_len until it evenly divides the count so far. We know + // that subtree_len itself is a power of 2, so we can use a bitmasking + // trick instead of an actual remainder operation. (Note that if the caller + // consistently passes power-of-2 inputs of the same size, as is hopefully + // typical, this loop condition will always fail, and subtree_len will + // always be the full length of the input.) + // + // An aside: We don't have to shrink subtree_len quite this much. For + // example, if count_so_far is 1, we could pass 2 chunks to + // compress_subtree_to_parent_node. Since we'll get 2 CVs back, we'll still + // get the right answer in the end, and we might get to use 2-way SIMD + // parallelism. The problem with this optimization, is that it gets us + // stuck always hashing 2 chunks. The total number of chunks will remain + // odd, and we'll never graduate to higher degrees of parallelism. See + // https://github.com/BLAKE3-team/BLAKE3/issues/69. + while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) { + subtree_len /= 2; + } + // The shrunken subtree_len might now be 1 chunk long. If so, hash that one + // chunk by itself. Otherwise, compress the subtree into a pair of CVs. + uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN; + if (subtree_len <= BLAKE3_CHUNK_LEN) { + blake3_chunk_state chunk_state; + chunk_state_init(&chunk_state, self->key, self->chunk.flags); + chunk_state.chunk_counter = self->chunk.chunk_counter; + chunk_state_update(&chunk_state, input_bytes, subtree_len); + output_t output = chunk_state_output(&chunk_state); + uint8_t cv[BLAKE3_OUT_LEN]; + output_chaining_value(&output, cv); + hasher_push_cv(self, cv, chunk_state.chunk_counter); + } else { + // This is the high-performance happy path, though getting here depends + // on the caller giving us a long enough input. + uint8_t cv_pair[2 * BLAKE3_OUT_LEN]; + compress_subtree_to_parent_node(input_bytes, subtree_len, self->key, + self->chunk.chunk_counter, + self->chunk.flags, cv_pair); + hasher_push_cv(self, cv_pair, self->chunk.chunk_counter); + hasher_push_cv(self, &cv_pair[BLAKE3_OUT_LEN], + self->chunk.chunk_counter + (subtree_chunks / 2)); + } + self->chunk.chunk_counter += subtree_chunks; + input_bytes += subtree_len; + input_len -= subtree_len; + } + + // If there's any remaining input less than a full chunk, add it to the chunk + // state. In that case, also do a final merge loop to make sure the subtree + // stack doesn't contain any unmerged pairs. The remaining input means we + // know these merges are non-root. This merge loop isn't strictly necessary + // here, because hasher_push_chunk_cv already does its own merge loop, but it + // simplifies blake3_hasher_finalize below. + if (input_len > 0) { + chunk_state_update(&self->chunk, input_bytes, input_len); + hasher_merge_cv_stack(self, self->chunk.chunk_counter); + } +} + +void llvm_blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out, + size_t out_len) { + llvm_blake3_hasher_finalize_seek(self, 0, out, out_len); +#if LLVM_MEMORY_SANITIZER_BUILD + // Avoid false positives due to uninstrumented assembly code. + __msan_unpoison(out, out_len); +#endif +} + +void llvm_blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek, + uint8_t *out, size_t out_len) { + // Explicitly checking for zero avoids causing UB by passing a null pointer + // to memcpy. This comes up in practice with things like: + // std::vector v; + // blake3_hasher_finalize(&hasher, v.data(), v.size()); + if (out_len == 0) { + return; + } + + // If the subtree stack is empty, then the current chunk is the root. + if (self->cv_stack_len == 0) { + output_t output = chunk_state_output(&self->chunk); + output_root_bytes(&output, seek, out, out_len); + return; + } + // If there are any bytes in the chunk state, finalize that chunk and do a + // roll-up merge between that chunk hash and every subtree in the stack. In + // this case, the extra merge loop at the end of blake3_hasher_update + // guarantees that none of the subtrees in the stack need to be merged with + // each other first. Otherwise, if there are no bytes in the chunk state, + // then the top of the stack is a chunk hash, and we start the merge from + // that. + output_t output; + size_t cvs_remaining; + if (chunk_state_len(&self->chunk) > 0) { + cvs_remaining = self->cv_stack_len; + output = chunk_state_output(&self->chunk); + } else { + // There are always at least 2 CVs in the stack in this case. + cvs_remaining = self->cv_stack_len - 2; + output = parent_output(&self->cv_stack[cvs_remaining * 32], self->key, + self->chunk.flags); + } + while (cvs_remaining > 0) { + cvs_remaining -= 1; + uint8_t parent_block[BLAKE3_BLOCK_LEN]; + memcpy(parent_block, &self->cv_stack[cvs_remaining * 32], 32); + output_chaining_value(&output, &parent_block[32]); + output = parent_output(parent_block, self->key, self->chunk.flags); + } + output_root_bytes(&output, seek, out, out_len); +} + +void llvm_blake3_hasher_reset(blake3_hasher *self) { + chunk_state_reset(&self->chunk, self->key, 0); + self->cv_stack_len = 0; +} diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2.c b/llvm/lib/Support/BLAKE3/blake3_avx2.c new file mode 100644 index 000000000000..e76aa1a3aeb3 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_avx2.c @@ -0,0 +1,326 @@ +#include "blake3_impl.h" + +#include + +#define DEGREE 8 + +INLINE __m256i loadu(const uint8_t src[32]) { + return _mm256_loadu_si256((const __m256i *)src); +} + +INLINE void storeu(__m256i src, uint8_t dest[16]) { + _mm256_storeu_si256((__m256i *)dest, src); +} + +INLINE __m256i addv(__m256i a, __m256i b) { return _mm256_add_epi32(a, b); } + +// Note that clang-format doesn't like the name "xor" for some reason. +INLINE __m256i xorv(__m256i a, __m256i b) { return _mm256_xor_si256(a, b); } + +INLINE __m256i set1(uint32_t x) { return _mm256_set1_epi32((int32_t)x); } + +INLINE __m256i rot16(__m256i x) { + return _mm256_shuffle_epi8( + x, _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, + 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)); +} + +INLINE __m256i rot12(__m256i x) { + return _mm256_or_si256(_mm256_srli_epi32(x, 12), _mm256_slli_epi32(x, 32 - 12)); +} + +INLINE __m256i rot8(__m256i x) { + return _mm256_shuffle_epi8( + x, _mm256_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1, + 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1)); +} + +INLINE __m256i rot7(__m256i x) { + return _mm256_or_si256(_mm256_srli_epi32(x, 7), _mm256_slli_epi32(x, 32 - 7)); +} + +INLINE void round_fn(__m256i v[16], __m256i m[16], size_t r) { + v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]); + v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]); + v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]); + v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]); + v[0] = addv(v[0], v[4]); + v[1] = addv(v[1], v[5]); + v[2] = addv(v[2], v[6]); + v[3] = addv(v[3], v[7]); + v[12] = xorv(v[12], v[0]); + v[13] = xorv(v[13], v[1]); + v[14] = xorv(v[14], v[2]); + v[15] = xorv(v[15], v[3]); + v[12] = rot16(v[12]); + v[13] = rot16(v[13]); + v[14] = rot16(v[14]); + v[15] = rot16(v[15]); + v[8] = addv(v[8], v[12]); + v[9] = addv(v[9], v[13]); + v[10] = addv(v[10], v[14]); + v[11] = addv(v[11], v[15]); + v[4] = xorv(v[4], v[8]); + v[5] = xorv(v[5], v[9]); + v[6] = xorv(v[6], v[10]); + v[7] = xorv(v[7], v[11]); + v[4] = rot12(v[4]); + v[5] = rot12(v[5]); + v[6] = rot12(v[6]); + v[7] = rot12(v[7]); + v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]); + v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]); + v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]); + v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]); + v[0] = addv(v[0], v[4]); + v[1] = addv(v[1], v[5]); + v[2] = addv(v[2], v[6]); + v[3] = addv(v[3], v[7]); + v[12] = xorv(v[12], v[0]); + v[13] = xorv(v[13], v[1]); + v[14] = xorv(v[14], v[2]); + v[15] = xorv(v[15], v[3]); + v[12] = rot8(v[12]); + v[13] = rot8(v[13]); + v[14] = rot8(v[14]); + v[15] = rot8(v[15]); + v[8] = addv(v[8], v[12]); + v[9] = addv(v[9], v[13]); + v[10] = addv(v[10], v[14]); + v[11] = addv(v[11], v[15]); + v[4] = xorv(v[4], v[8]); + v[5] = xorv(v[5], v[9]); + v[6] = xorv(v[6], v[10]); + v[7] = xorv(v[7], v[11]); + v[4] = rot7(v[4]); + v[5] = rot7(v[5]); + v[6] = rot7(v[6]); + v[7] = rot7(v[7]); + + v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]); + v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]); + v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]); + v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]); + v[0] = addv(v[0], v[5]); + v[1] = addv(v[1], v[6]); + v[2] = addv(v[2], v[7]); + v[3] = addv(v[3], v[4]); + v[15] = xorv(v[15], v[0]); + v[12] = xorv(v[12], v[1]); + v[13] = xorv(v[13], v[2]); + v[14] = xorv(v[14], v[3]); + v[15] = rot16(v[15]); + v[12] = rot16(v[12]); + v[13] = rot16(v[13]); + v[14] = rot16(v[14]); + v[10] = addv(v[10], v[15]); + v[11] = addv(v[11], v[12]); + v[8] = addv(v[8], v[13]); + v[9] = addv(v[9], v[14]); + v[5] = xorv(v[5], v[10]); + v[6] = xorv(v[6], v[11]); + v[7] = xorv(v[7], v[8]); + v[4] = xorv(v[4], v[9]); + v[5] = rot12(v[5]); + v[6] = rot12(v[6]); + v[7] = rot12(v[7]); + v[4] = rot12(v[4]); + v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]); + v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]); + v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]); + v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]); + v[0] = addv(v[0], v[5]); + v[1] = addv(v[1], v[6]); + v[2] = addv(v[2], v[7]); + v[3] = addv(v[3], v[4]); + v[15] = xorv(v[15], v[0]); + v[12] = xorv(v[12], v[1]); + v[13] = xorv(v[13], v[2]); + v[14] = xorv(v[14], v[3]); + v[15] = rot8(v[15]); + v[12] = rot8(v[12]); + v[13] = rot8(v[13]); + v[14] = rot8(v[14]); + v[10] = addv(v[10], v[15]); + v[11] = addv(v[11], v[12]); + v[8] = addv(v[8], v[13]); + v[9] = addv(v[9], v[14]); + v[5] = xorv(v[5], v[10]); + v[6] = xorv(v[6], v[11]); + v[7] = xorv(v[7], v[8]); + v[4] = xorv(v[4], v[9]); + v[5] = rot7(v[5]); + v[6] = rot7(v[6]); + v[7] = rot7(v[7]); + v[4] = rot7(v[4]); +} + +INLINE void transpose_vecs(__m256i vecs[DEGREE]) { + // Interleave 32-bit lanes. The low unpack is lanes 00/11/44/55, and the high + // is 22/33/66/77. + __m256i ab_0145 = _mm256_unpacklo_epi32(vecs[0], vecs[1]); + __m256i ab_2367 = _mm256_unpackhi_epi32(vecs[0], vecs[1]); + __m256i cd_0145 = _mm256_unpacklo_epi32(vecs[2], vecs[3]); + __m256i cd_2367 = _mm256_unpackhi_epi32(vecs[2], vecs[3]); + __m256i ef_0145 = _mm256_unpacklo_epi32(vecs[4], vecs[5]); + __m256i ef_2367 = _mm256_unpackhi_epi32(vecs[4], vecs[5]); + __m256i gh_0145 = _mm256_unpacklo_epi32(vecs[6], vecs[7]); + __m256i gh_2367 = _mm256_unpackhi_epi32(vecs[6], vecs[7]); + + // Interleave 64-bit lates. The low unpack is lanes 00/22 and the high is + // 11/33. + __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145); + __m256i abcd_15 = _mm256_unpackhi_epi64(ab_0145, cd_0145); + __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367); + __m256i abcd_37 = _mm256_unpackhi_epi64(ab_2367, cd_2367); + __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145); + __m256i efgh_15 = _mm256_unpackhi_epi64(ef_0145, gh_0145); + __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367); + __m256i efgh_37 = _mm256_unpackhi_epi64(ef_2367, gh_2367); + + // Interleave 128-bit lanes. + vecs[0] = _mm256_permute2x128_si256(abcd_04, efgh_04, 0x20); + vecs[1] = _mm256_permute2x128_si256(abcd_15, efgh_15, 0x20); + vecs[2] = _mm256_permute2x128_si256(abcd_26, efgh_26, 0x20); + vecs[3] = _mm256_permute2x128_si256(abcd_37, efgh_37, 0x20); + vecs[4] = _mm256_permute2x128_si256(abcd_04, efgh_04, 0x31); + vecs[5] = _mm256_permute2x128_si256(abcd_15, efgh_15, 0x31); + vecs[6] = _mm256_permute2x128_si256(abcd_26, efgh_26, 0x31); + vecs[7] = _mm256_permute2x128_si256(abcd_37, efgh_37, 0x31); +} + +INLINE void transpose_msg_vecs(const uint8_t *const *inputs, + size_t block_offset, __m256i out[16]) { + out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m256i)]); + out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m256i)]); + out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m256i)]); + out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m256i)]); + out[4] = loadu(&inputs[4][block_offset + 0 * sizeof(__m256i)]); + out[5] = loadu(&inputs[5][block_offset + 0 * sizeof(__m256i)]); + out[6] = loadu(&inputs[6][block_offset + 0 * sizeof(__m256i)]); + out[7] = loadu(&inputs[7][block_offset + 0 * sizeof(__m256i)]); + out[8] = loadu(&inputs[0][block_offset + 1 * sizeof(__m256i)]); + out[9] = loadu(&inputs[1][block_offset + 1 * sizeof(__m256i)]); + out[10] = loadu(&inputs[2][block_offset + 1 * sizeof(__m256i)]); + out[11] = loadu(&inputs[3][block_offset + 1 * sizeof(__m256i)]); + out[12] = loadu(&inputs[4][block_offset + 1 * sizeof(__m256i)]); + out[13] = loadu(&inputs[5][block_offset + 1 * sizeof(__m256i)]); + out[14] = loadu(&inputs[6][block_offset + 1 * sizeof(__m256i)]); + out[15] = loadu(&inputs[7][block_offset + 1 * sizeof(__m256i)]); + for (size_t i = 0; i < 8; ++i) { + _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0); + } + transpose_vecs(&out[0]); + transpose_vecs(&out[8]); +} + +INLINE void load_counters(uint64_t counter, bool increment_counter, + __m256i *out_lo, __m256i *out_hi) { + const __m256i mask = _mm256_set1_epi32(-(int32_t)increment_counter); + const __m256i add0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); + const __m256i add1 = _mm256_and_si256(mask, add0); + __m256i l = _mm256_add_epi32(_mm256_set1_epi32((int32_t)counter), add1); + __m256i carry = _mm256_cmpgt_epi32(_mm256_xor_si256(add1, _mm256_set1_epi32(0x80000000)), + _mm256_xor_si256( l, _mm256_set1_epi32(0x80000000))); + __m256i h = _mm256_sub_epi32(_mm256_set1_epi32((int32_t)(counter >> 32)), carry); + *out_lo = l; + *out_hi = h; +} + +static +void blake3_hash8_avx2(const uint8_t *const *inputs, size_t blocks, + const uint32_t key[8], uint64_t counter, + bool increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + __m256i h_vecs[8] = { + set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]), + set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]), + }; + __m256i counter_low_vec, counter_high_vec; + load_counters(counter, increment_counter, &counter_low_vec, + &counter_high_vec); + uint8_t block_flags = flags | flags_start; + + for (size_t block = 0; block < blocks; block++) { + if (block + 1 == blocks) { + block_flags |= flags_end; + } + __m256i block_len_vec = set1(BLAKE3_BLOCK_LEN); + __m256i block_flags_vec = set1(block_flags); + __m256i msg_vecs[16]; + transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs); + + __m256i v[16] = { + h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3], + h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7], + set1(IV[0]), set1(IV[1]), set1(IV[2]), set1(IV[3]), + counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec, + }; + round_fn(v, msg_vecs, 0); + round_fn(v, msg_vecs, 1); + round_fn(v, msg_vecs, 2); + round_fn(v, msg_vecs, 3); + round_fn(v, msg_vecs, 4); + round_fn(v, msg_vecs, 5); + round_fn(v, msg_vecs, 6); + h_vecs[0] = xorv(v[0], v[8]); + h_vecs[1] = xorv(v[1], v[9]); + h_vecs[2] = xorv(v[2], v[10]); + h_vecs[3] = xorv(v[3], v[11]); + h_vecs[4] = xorv(v[4], v[12]); + h_vecs[5] = xorv(v[5], v[13]); + h_vecs[6] = xorv(v[6], v[14]); + h_vecs[7] = xorv(v[7], v[15]); + + block_flags = flags; + } + + transpose_vecs(h_vecs); + storeu(h_vecs[0], &out[0 * sizeof(__m256i)]); + storeu(h_vecs[1], &out[1 * sizeof(__m256i)]); + storeu(h_vecs[2], &out[2 * sizeof(__m256i)]); + storeu(h_vecs[3], &out[3 * sizeof(__m256i)]); + storeu(h_vecs[4], &out[4 * sizeof(__m256i)]); + storeu(h_vecs[5], &out[5 * sizeof(__m256i)]); + storeu(h_vecs[6], &out[6 * sizeof(__m256i)]); + storeu(h_vecs[7], &out[7 * sizeof(__m256i)]); +} + +#if !defined(BLAKE3_NO_SSE41) +void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out); +#else +void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out); +#endif + +void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out) { + while (num_inputs >= DEGREE) { + blake3_hash8_avx2(inputs, blocks, key, counter, increment_counter, flags, + flags_start, flags_end, out); + if (increment_counter) { + counter += DEGREE; + } + inputs += DEGREE; + num_inputs -= DEGREE; + out = &out[DEGREE * BLAKE3_OUT_LEN]; + } +#if !defined(BLAKE3_NO_SSE41) + blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, out); +#else + blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, + out); +#endif +} diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S new file mode 100644 index 000000000000..449e07492832 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S @@ -0,0 +1,1826 @@ +#if defined(__x86_64__) + +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +#endif + +#if defined(__ELF__) && defined(__CET__) && defined(__has_include) +#if __has_include() +#include +#endif +#endif + +#if !defined(_CET_ENDBR) +#define _CET_ENDBR +#endif + +#ifdef __APPLE__ +#define HIDDEN .private_extern +#else +#define HIDDEN .hidden +#endif + +.intel_syntax noprefix +HIDDEN _blake3_hash_many_avx2 +HIDDEN blake3_hash_many_avx2 +.global _blake3_hash_many_avx2 +.global blake3_hash_many_avx2 +#ifdef __APPLE__ +.text +#else +.section .text +#endif + .p2align 6 +_blake3_hash_many_avx2: +blake3_hash_many_avx2: + _CET_ENDBR + push r15 + push r14 + push r13 + push r12 + push rbx + push rbp + mov rbp, rsp + sub rsp, 680 + and rsp, 0xFFFFFFFFFFFFFFC0 + neg r9d + vmovd xmm0, r9d + vpbroadcastd ymm0, xmm0 + vmovdqa ymmword ptr [rsp+0x280], ymm0 + vpand ymm1, ymm0, ymmword ptr [ADD0+rip] + vpand ymm2, ymm0, ymmword ptr [ADD1+rip] + vmovdqa ymmword ptr [rsp+0x220], ymm2 + vmovd xmm2, r8d + vpbroadcastd ymm2, xmm2 + vpaddd ymm2, ymm2, ymm1 + vmovdqa ymmword ptr [rsp+0x240], ymm2 + vpxor ymm1, ymm1, ymmword ptr [CMP_MSB_MASK+rip] + vpxor ymm2, ymm2, ymmword ptr [CMP_MSB_MASK+rip] + vpcmpgtd ymm2, ymm1, ymm2 + shr r8, 32 + vmovd xmm3, r8d + vpbroadcastd ymm3, xmm3 + vpsubd ymm3, ymm3, ymm2 + vmovdqa ymmword ptr [rsp+0x260], ymm3 + shl rdx, 6 + mov qword ptr [rsp+0x2A0], rdx + cmp rsi, 8 + jc 3f +2: + vpbroadcastd ymm0, dword ptr [rcx] + vpbroadcastd ymm1, dword ptr [rcx+0x4] + vpbroadcastd ymm2, dword ptr [rcx+0x8] + vpbroadcastd ymm3, dword ptr [rcx+0xC] + vpbroadcastd ymm4, dword ptr [rcx+0x10] + vpbroadcastd ymm5, dword ptr [rcx+0x14] + vpbroadcastd ymm6, dword ptr [rcx+0x18] + vpbroadcastd ymm7, dword ptr [rcx+0x1C] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov r12, qword ptr [rdi+0x20] + mov r13, qword ptr [rdi+0x28] + mov r14, qword ptr [rdi+0x30] + mov r15, qword ptr [rdi+0x38] + movzx eax, byte ptr [rbp+0x38] + movzx ebx, byte ptr [rbp+0x40] + or eax, ebx + xor edx, edx +.p2align 5 +9: + movzx ebx, byte ptr [rbp+0x48] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+0x2A0] + cmove eax, ebx + mov dword ptr [rsp+0x200], eax + vmovups xmm8, xmmword ptr [r8+rdx-0x40] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x40] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x40] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x40] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+0x20], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+0x40], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+0x60], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-0x30] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x30] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x30] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x30] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+0x80], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+0xA0], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+0xC0], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+0xE0], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-0x20] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x20] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x20] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x20] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+0x100], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+0x120], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+0x140], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+0x160], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-0x10] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x10] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x10] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x10] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+0x180], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+0x1A0], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+0x1C0], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+0x1E0], ymm11 + vpbroadcastd ymm15, dword ptr [rsp+0x200] + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r12+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r13+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r14+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + prefetcht0 [r15+rdx+0x80] + vpaddd ymm0, ymm0, ymmword ptr [rsp] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm0, ymmword ptr [rsp+0x240] + vpxor ymm13, ymm1, ymmword ptr [rsp+0x260] + vpxor ymm14, ymm2, ymmword ptr [BLAKE3_BLOCK_LEN+rip] + vpxor ymm15, ymm3, ymm15 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [BLAKE3_IV_0+rip] + vpaddd ymm9, ymm13, ymmword ptr [BLAKE3_IV_1+rip] + vpaddd ymm10, ymm14, ymmword ptr [BLAKE3_IV_2+rip] + vpaddd ymm11, ymm15, ymmword ptr [BLAKE3_IV_3+rip] + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x100] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0xE0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140] + vpaddd ymm2, ymm2, ymmword ptr [rsp] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x160] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xA0] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x180] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x140] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0] + vpaddd ymm2, ymm2, ymmword ptr [rsp] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0xC0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1E0] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vpxor ymm0, ymm0, ymm8 + vpxor ymm1, ymm1, ymm9 + vpxor ymm2, ymm2, ymm10 + vpxor ymm3, ymm3, ymm11 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpxor ymm4, ymm4, ymm12 + vpxor ymm5, ymm5, ymm13 + vpxor ymm6, ymm6, ymm14 + vpxor ymm7, ymm7, ymm15 + movzx eax, byte ptr [rbp+0x38] + jne 9b + mov rbx, qword ptr [rbp+0x50] + vunpcklps ymm8, ymm0, ymm1 + vunpcklps ymm9, ymm2, ymm3 + vunpckhps ymm10, ymm0, ymm1 + vunpcklps ymm11, ymm4, ymm5 + vunpcklps ymm0, ymm6, ymm7 + vshufps ymm12, ymm8, ymm9, 78 + vblendps ymm1, ymm8, ymm12, 0xCC + vshufps ymm8, ymm11, ymm0, 78 + vunpckhps ymm13, ymm2, ymm3 + vblendps ymm2, ymm11, ymm8, 0xCC + vblendps ymm3, ymm12, ymm9, 0xCC + vperm2f128 ymm12, ymm1, ymm2, 0x20 + vmovups ymmword ptr [rbx], ymm12 + vunpckhps ymm14, ymm4, ymm5 + vblendps ymm4, ymm8, ymm0, 0xCC + vunpckhps ymm15, ymm6, ymm7 + vperm2f128 ymm7, ymm3, ymm4, 0x20 + vmovups ymmword ptr [rbx+0x20], ymm7 + vshufps ymm5, ymm10, ymm13, 78 + vblendps ymm6, ymm5, ymm13, 0xCC + vshufps ymm13, ymm14, ymm15, 78 + vblendps ymm10, ymm10, ymm5, 0xCC + vblendps ymm14, ymm14, ymm13, 0xCC + vperm2f128 ymm8, ymm10, ymm14, 0x20 + vmovups ymmword ptr [rbx+0x40], ymm8 + vblendps ymm15, ymm13, ymm15, 0xCC + vperm2f128 ymm13, ymm6, ymm15, 0x20 + vmovups ymmword ptr [rbx+0x60], ymm13 + vperm2f128 ymm9, ymm1, ymm2, 0x31 + vperm2f128 ymm11, ymm3, ymm4, 0x31 + vmovups ymmword ptr [rbx+0x80], ymm9 + vperm2f128 ymm14, ymm10, ymm14, 0x31 + vperm2f128 ymm15, ymm6, ymm15, 0x31 + vmovups ymmword ptr [rbx+0xA0], ymm11 + vmovups ymmword ptr [rbx+0xC0], ymm14 + vmovups ymmword ptr [rbx+0xE0], ymm15 + vmovdqa ymm0, ymmword ptr [rsp+0x220] + vpaddd ymm1, ymm0, ymmword ptr [rsp+0x240] + vmovdqa ymmword ptr [rsp+0x240], ymm1 + vpxor ymm0, ymm0, ymmword ptr [CMP_MSB_MASK+rip] + vpxor ymm2, ymm1, ymmword ptr [CMP_MSB_MASK+rip] + vpcmpgtd ymm2, ymm0, ymm2 + vmovdqa ymm0, ymmword ptr [rsp+0x260] + vpsubd ymm2, ymm0, ymm2 + vmovdqa ymmword ptr [rsp+0x260], ymm2 + add rdi, 64 + add rbx, 256 + mov qword ptr [rbp+0x50], rbx + sub rsi, 8 + cmp rsi, 8 + jnc 2b + test rsi, rsi + jnz 3f +4: + vzeroupper + mov rsp, rbp + pop rbp + pop rbx + pop r12 + pop r13 + pop r14 + pop r15 + ret +.p2align 5 +3: + mov rbx, qword ptr [rbp+0x50] + mov r15, qword ptr [rsp+0x2A0] + movzx r13d, byte ptr [rbp+0x38] + movzx r12d, byte ptr [rbp+0x48] + test rsi, 0x4 + je 3f + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+0x10] + vmovdqa ymm8, ymm0 + vmovdqa ymm9, ymm1 + vbroadcasti128 ymm12, xmmword ptr [rsp+0x240] + vbroadcasti128 ymm13, xmmword ptr [rsp+0x260] + vpunpckldq ymm14, ymm12, ymm13 + vpunpckhdq ymm15, ymm12, ymm13 + vpermq ymm14, ymm14, 0x50 + vpermq ymm15, ymm15, 0x50 + vbroadcasti128 ymm12, xmmword ptr [BLAKE3_BLOCK_LEN+rip] + vpblendd ymm14, ymm14, ymm12, 0x44 + vpblendd ymm15, ymm15, ymm12, 0x44 + vmovdqa ymmword ptr [rsp], ymm14 + vmovdqa ymmword ptr [rsp+0x20], ymm15 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+0x200], eax + vmovups ymm2, ymmword ptr [r8+rdx-0x40] + vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x40], 0x01 + vmovups ymm3, ymmword ptr [r8+rdx-0x30] + vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x30], 0x01 + vshufps ymm4, ymm2, ymm3, 136 + vshufps ymm5, ymm2, ymm3, 221 + vmovups ymm2, ymmword ptr [r8+rdx-0x20] + vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x20], 0x01 + vmovups ymm3, ymmword ptr [r8+rdx-0x10] + vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x10], 0x01 + vshufps ymm6, ymm2, ymm3, 136 + vshufps ymm7, ymm2, ymm3, 221 + vpshufd ymm6, ymm6, 0x93 + vpshufd ymm7, ymm7, 0x93 + vmovups ymm10, ymmword ptr [r10+rdx-0x40] + vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x40], 0x01 + vmovups ymm11, ymmword ptr [r10+rdx-0x30] + vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x30], 0x01 + vshufps ymm12, ymm10, ymm11, 136 + vshufps ymm13, ymm10, ymm11, 221 + vmovups ymm10, ymmword ptr [r10+rdx-0x20] + vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x20], 0x01 + vmovups ymm11, ymmword ptr [r10+rdx-0x10] + vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x10], 0x01 + vshufps ymm14, ymm10, ymm11, 136 + vshufps ymm15, ymm10, ymm11, 221 + vpshufd ymm14, ymm14, 0x93 + vpshufd ymm15, ymm15, 0x93 + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + vpbroadcastd ymm2, dword ptr [rsp+0x200] + vmovdqa ymm3, ymmword ptr [rsp] + vmovdqa ymm11, ymmword ptr [rsp+0x20] + vpblendd ymm3, ymm3, ymm2, 0x88 + vpblendd ymm11, ymm11, ymm2, 0x88 + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip] + vmovdqa ymm10, ymm2 + mov al, 7 +9: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm8, ymm8, ymm12 + vmovdqa ymmword ptr [rsp+0x40], ymm4 + nop + vmovdqa ymmword ptr [rsp+0x60], ymm12 + nop + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT16+rip] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 12 + vpslld ymm9, ymm9, 20 + vpor ymm9, ymm9, ymm4 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vmovdqa ymmword ptr [rsp+0x80], ymm5 + vmovdqa ymmword ptr [rsp+0xA0], ymm13 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT8+rip] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 7 + vpslld ymm9, ymm9, 25 + vpor ymm9, ymm9, ymm4 + vpshufd ymm0, ymm0, 0x93 + vpshufd ymm8, ymm8, 0x93 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm11, ymm11, 0x4E + vpshufd ymm2, ymm2, 0x39 + vpshufd ymm10, ymm10, 0x39 + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm8, ymm8, ymm14 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT16+rip] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 12 + vpslld ymm9, ymm9, 20 + vpor ymm9, ymm9, ymm4 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm8, ymm8, ymm15 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT8+rip] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 7 + vpslld ymm9, ymm9, 25 + vpor ymm9, ymm9, ymm4 + vpshufd ymm0, ymm0, 0x39 + vpshufd ymm8, ymm8, 0x39 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm11, ymm11, 0x4E + vpshufd ymm2, ymm2, 0x93 + vpshufd ymm10, ymm10, 0x93 + dec al + je 9f + vmovdqa ymm4, ymmword ptr [rsp+0x40] + vmovdqa ymm5, ymmword ptr [rsp+0x80] + vshufps ymm12, ymm4, ymm5, 214 + vpshufd ymm13, ymm4, 0x0F + vpshufd ymm4, ymm12, 0x39 + vshufps ymm12, ymm6, ymm7, 250 + vpblendd ymm13, ymm13, ymm12, 0xAA + vpunpcklqdq ymm12, ymm7, ymm5 + vpblendd ymm12, ymm12, ymm6, 0x88 + vpshufd ymm12, ymm12, 0x78 + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 0x1E + vmovdqa ymmword ptr [rsp+0x40], ymm13 + vmovdqa ymmword ptr [rsp+0x80], ymm12 + vmovdqa ymm12, ymmword ptr [rsp+0x60] + vmovdqa ymm13, ymmword ptr [rsp+0xA0] + vshufps ymm5, ymm12, ymm13, 214 + vpshufd ymm6, ymm12, 0x0F + vpshufd ymm12, ymm5, 0x39 + vshufps ymm5, ymm14, ymm15, 250 + vpblendd ymm6, ymm6, ymm5, 0xAA + vpunpcklqdq ymm5, ymm15, ymm13 + vpblendd ymm5, ymm5, ymm14, 0x88 + vpshufd ymm5, ymm5, 0x78 + vpunpckhdq ymm13, ymm13, ymm15 + vpunpckldq ymm14, ymm14, ymm13 + vpshufd ymm15, ymm14, 0x1E + vmovdqa ymm13, ymm6 + vmovdqa ymm14, ymm5 + vmovdqa ymm5, ymmword ptr [rsp+0x40] + vmovdqa ymm6, ymmword ptr [rsp+0x80] + jmp 9b +9: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + vpxor ymm8, ymm8, ymm10 + vpxor ymm9, ymm9, ymm11 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01 + vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01 + vmovdqu xmmword ptr [rbx+0x40], xmm8 + vmovdqu xmmword ptr [rbx+0x50], xmm9 + vextracti128 xmmword ptr [rbx+0x60], ymm8, 0x01 + vextracti128 xmmword ptr [rbx+0x70], ymm9, 0x01 + vmovaps xmm8, xmmword ptr [rsp+0x280] + vmovaps xmm0, xmmword ptr [rsp+0x240] + vmovaps xmm1, xmmword ptr [rsp+0x250] + vmovaps xmm2, xmmword ptr [rsp+0x260] + vmovaps xmm3, xmmword ptr [rsp+0x270] + vblendvps xmm0, xmm0, xmm1, xmm8 + vblendvps xmm2, xmm2, xmm3, xmm8 + vmovaps xmmword ptr [rsp+0x240], xmm0 + vmovaps xmmword ptr [rsp+0x260], xmm2 + add rbx, 128 + add rdi, 32 + sub rsi, 4 +3: + test rsi, 0x2 + je 3f + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+0x10] + vmovd xmm13, dword ptr [rsp+0x240] + vpinsrd xmm13, xmm13, dword ptr [rsp+0x260], 1 + vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vmovd xmm14, dword ptr [rsp+0x244] + vpinsrd xmm14, xmm14, dword ptr [rsp+0x264], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vinserti128 ymm13, ymm13, xmm14, 0x01 + vbroadcasti128 ymm14, xmmword ptr [ROT16+rip] + vbroadcasti128 ymm15, xmmword ptr [ROT8+rip] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+0x200], eax + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip] + vpbroadcastd ymm8, dword ptr [rsp+0x200] + vpblendd ymm3, ymm13, ymm8, 0x88 + vmovups ymm8, ymmword ptr [r8+rdx-0x40] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01 + vmovups ymm9, ymmword ptr [r8+rdx-0x30] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01 + vshufps ymm4, ymm8, ymm9, 136 + vshufps ymm5, ymm8, ymm9, 221 + vmovups ymm8, ymmword ptr [r8+rdx-0x20] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01 + vmovups ymm9, ymmword ptr [r8+rdx-0x10] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01 + vshufps ymm6, ymm8, ymm9, 136 + vshufps ymm7, ymm8, ymm9, 221 + vpshufd ymm6, ymm6, 0x93 + vpshufd ymm7, ymm7, 0x93 + mov al, 7 +9: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm14 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm8 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm15 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm8 + vpshufd ymm0, ymm0, 0x93 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm2, ymm2, 0x39 + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm14 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm8 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm15 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm8 + vpshufd ymm0, ymm0, 0x39 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm2, ymm2, 0x93 + dec al + jz 9f + vshufps ymm8, ymm4, ymm5, 214 + vpshufd ymm9, ymm4, 0x0F + vpshufd ymm4, ymm8, 0x39 + vshufps ymm8, ymm6, ymm7, 250 + vpblendd ymm9, ymm9, ymm8, 0xAA + vpunpcklqdq ymm8, ymm7, ymm5 + vpblendd ymm8, ymm8, ymm6, 0x88 + vpshufd ymm8, ymm8, 0x78 + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 0x1E + vmovdqa ymm5, ymm9 + vmovdqa ymm6, ymm8 + jmp 9b +9: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01 + vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01 + vmovaps ymm8, ymmword ptr [rsp+0x280] + vmovaps ymm0, ymmword ptr [rsp+0x240] + vmovups ymm1, ymmword ptr [rsp+0x248] + vmovaps ymm2, ymmword ptr [rsp+0x260] + vmovups ymm3, ymmword ptr [rsp+0x268] + vblendvps ymm0, ymm0, ymm1, ymm8 + vblendvps ymm2, ymm2, ymm3, ymm8 + vmovaps ymmword ptr [rsp+0x240], ymm0 + vmovaps ymmword ptr [rsp+0x260], ymm2 + add rbx, 64 + add rdi, 16 + sub rsi, 2 +3: + test rsi, 0x1 + je 4b + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+0x10] + vmovd xmm3, dword ptr [rsp+0x240] + vpinsrd xmm3, xmm3, dword ptr [rsp+0x260], 1 + vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vmovdqa xmm14, xmmword ptr [ROT16+rip] + vmovdqa xmm15, xmmword ptr [ROT8+rip] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + vmovdqa xmm2, xmmword ptr [BLAKE3_IV+rip] + vmovdqa xmm3, xmm13 + vpinsrd xmm3, xmm3, eax, 3 + vmovups xmm8, xmmword ptr [r8+rdx-0x40] + vmovups xmm9, xmmword ptr [r8+rdx-0x30] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x20] + vmovups xmm9, xmmword ptr [r8+rdx-0x10] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 0x93 + vpshufd xmm7, xmm7, 0x93 + mov al, 7 +9: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm14 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 12 + vpslld xmm1, xmm1, 20 + vpor xmm1, xmm1, xmm8 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm15 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 7 + vpslld xmm1, xmm1, 25 + vpor xmm1, xmm1, xmm8 + vpshufd xmm0, xmm0, 0x93 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x39 + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm14 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 12 + vpslld xmm1, xmm1, 20 + vpor xmm1, xmm1, xmm8 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm15 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 7 + vpslld xmm1, xmm1, 25 + vpor xmm1, xmm1, xmm8 + vpshufd xmm0, xmm0, 0x39 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x93 + dec al + jz 9f + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0x0F + vpshufd xmm4, xmm8, 0x39 + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0xAA + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 0x88 + vpshufd xmm8, xmm8, 0x78 + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 0x1E + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp 9b +9: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + jmp 4b + + +#ifdef __APPLE__ +.static_data +#else +.section .rodata +#endif +.p2align 6 +ADD0: + .long 0, 1, 2, 3, 4, 5, 6, 7 +ADD1: + .long 8, 8, 8, 8, 8, 8, 8, 8 +BLAKE3_IV_0: + .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 + .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 +BLAKE3_IV_1: + .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 + .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 +BLAKE3_IV_2: + .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 + .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 +BLAKE3_IV_3: + .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A + .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A +BLAKE3_BLOCK_LEN: + .long 0x00000040, 0x00000040, 0x00000040, 0x00000040 + .long 0x00000040, 0x00000040, 0x00000040, 0x00000040 +ROT16: + .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 +ROT8: + .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 +CMP_MSB_MASK: + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 +BLAKE3_IV: + .long 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A + +#endif diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S new file mode 100644 index 000000000000..bb58d2ae64b1 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S @@ -0,0 +1,1817 @@ +.intel_syntax noprefix +.global _blake3_hash_many_avx2 +.global blake3_hash_many_avx2 +.section .text + .p2align 6 +_blake3_hash_many_avx2: +blake3_hash_many_avx2: + push r15 + push r14 + push r13 + push r12 + push rsi + push rdi + push rbx + push rbp + mov rbp, rsp + sub rsp, 880 + and rsp, 0xFFFFFFFFFFFFFFC0 + vmovdqa xmmword ptr [rsp+0x2D0], xmm6 + vmovdqa xmmword ptr [rsp+0x2E0], xmm7 + vmovdqa xmmword ptr [rsp+0x2F0], xmm8 + vmovdqa xmmword ptr [rsp+0x300], xmm9 + vmovdqa xmmword ptr [rsp+0x310], xmm10 + vmovdqa xmmword ptr [rsp+0x320], xmm11 + vmovdqa xmmword ptr [rsp+0x330], xmm12 + vmovdqa xmmword ptr [rsp+0x340], xmm13 + vmovdqa xmmword ptr [rsp+0x350], xmm14 + vmovdqa xmmword ptr [rsp+0x360], xmm15 + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, qword ptr [rbp+0x68] + movzx r9, byte ptr [rbp+0x70] + neg r9d + vmovd xmm0, r9d + vpbroadcastd ymm0, xmm0 + vmovdqa ymmword ptr [rsp+0x260], ymm0 + vpand ymm1, ymm0, ymmword ptr [ADD0+rip] + vpand ymm2, ymm0, ymmword ptr [ADD1+rip] + vmovdqa ymmword ptr [rsp+0x2A0], ymm2 + vmovd xmm2, r8d + vpbroadcastd ymm2, xmm2 + vpaddd ymm2, ymm2, ymm1 + vmovdqa ymmword ptr [rsp+0x220], ymm2 + vpxor ymm1, ymm1, ymmword ptr [CMP_MSB_MASK+rip] + vpxor ymm2, ymm2, ymmword ptr [CMP_MSB_MASK+rip] + vpcmpgtd ymm2, ymm1, ymm2 + shr r8, 32 + vmovd xmm3, r8d + vpbroadcastd ymm3, xmm3 + vpsubd ymm3, ymm3, ymm2 + vmovdqa ymmword ptr [rsp+0x240], ymm3 + shl rdx, 6 + mov qword ptr [rsp+0x2C0], rdx + cmp rsi, 8 + jc 3f +2: + vpbroadcastd ymm0, dword ptr [rcx] + vpbroadcastd ymm1, dword ptr [rcx+0x4] + vpbroadcastd ymm2, dword ptr [rcx+0x8] + vpbroadcastd ymm3, dword ptr [rcx+0xC] + vpbroadcastd ymm4, dword ptr [rcx+0x10] + vpbroadcastd ymm5, dword ptr [rcx+0x14] + vpbroadcastd ymm6, dword ptr [rcx+0x18] + vpbroadcastd ymm7, dword ptr [rcx+0x1C] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov r12, qword ptr [rdi+0x20] + mov r13, qword ptr [rdi+0x28] + mov r14, qword ptr [rdi+0x30] + mov r15, qword ptr [rdi+0x38] + movzx eax, byte ptr [rbp+0x78] + movzx ebx, byte ptr [rbp+0x80] + or eax, ebx + xor edx, edx +.p2align 5 +9: + movzx ebx, byte ptr [rbp+0x88] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+0x2C0] + cmove eax, ebx + mov dword ptr [rsp+0x200], eax + vmovups xmm8, xmmword ptr [r8+rdx-0x40] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x40] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x40] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x40] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+0x20], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+0x40], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+0x60], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-0x30] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x30] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x30] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x30] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+0x80], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+0xA0], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+0xC0], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+0xE0], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-0x20] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x20] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x20] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x20] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+0x100], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+0x120], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+0x140], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+0x160], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-0x10] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x10] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x10] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x10] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+0x180], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+0x1A0], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+0x1C0], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+0x1E0], ymm11 + vpbroadcastd ymm15, dword ptr [rsp+0x200] + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r12+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r13+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r14+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + prefetcht0 [r15+rdx+0x80] + vpaddd ymm0, ymm0, ymmword ptr [rsp] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm0, ymmword ptr [rsp+0x220] + vpxor ymm13, ymm1, ymmword ptr [rsp+0x240] + vpxor ymm14, ymm2, ymmword ptr [BLAKE3_BLOCK_LEN+rip] + vpxor ymm15, ymm3, ymm15 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [BLAKE3_IV_0+rip] + vpaddd ymm9, ymm13, ymmword ptr [BLAKE3_IV_1+rip] + vpaddd ymm10, ymm14, ymmword ptr [BLAKE3_IV_2+rip] + vpaddd ymm11, ymm15, ymmword ptr [BLAKE3_IV_3+rip] + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x100] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0xE0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140] + vpaddd ymm2, ymm2, ymmword ptr [rsp] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x160] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xA0] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x180] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x140] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0] + vpaddd ymm2, ymm2, ymmword ptr [rsp] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0xC0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1E0] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vpxor ymm0, ymm0, ymm8 + vpxor ymm1, ymm1, ymm9 + vpxor ymm2, ymm2, ymm10 + vpxor ymm3, ymm3, ymm11 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpxor ymm4, ymm4, ymm12 + vpxor ymm5, ymm5, ymm13 + vpxor ymm6, ymm6, ymm14 + vpxor ymm7, ymm7, ymm15 + movzx eax, byte ptr [rbp+0x78] + jne 9b + mov rbx, qword ptr [rbp+0x90] + vunpcklps ymm8, ymm0, ymm1 + vunpcklps ymm9, ymm2, ymm3 + vunpckhps ymm10, ymm0, ymm1 + vunpcklps ymm11, ymm4, ymm5 + vunpcklps ymm0, ymm6, ymm7 + vshufps ymm12, ymm8, ymm9, 78 + vblendps ymm1, ymm8, ymm12, 0xCC + vshufps ymm8, ymm11, ymm0, 78 + vunpckhps ymm13, ymm2, ymm3 + vblendps ymm2, ymm11, ymm8, 0xCC + vblendps ymm3, ymm12, ymm9, 0xCC + vperm2f128 ymm12, ymm1, ymm2, 0x20 + vmovups ymmword ptr [rbx], ymm12 + vunpckhps ymm14, ymm4, ymm5 + vblendps ymm4, ymm8, ymm0, 0xCC + vunpckhps ymm15, ymm6, ymm7 + vperm2f128 ymm7, ymm3, ymm4, 0x20 + vmovups ymmword ptr [rbx+0x20], ymm7 + vshufps ymm5, ymm10, ymm13, 78 + vblendps ymm6, ymm5, ymm13, 0xCC + vshufps ymm13, ymm14, ymm15, 78 + vblendps ymm10, ymm10, ymm5, 0xCC + vblendps ymm14, ymm14, ymm13, 0xCC + vperm2f128 ymm8, ymm10, ymm14, 0x20 + vmovups ymmword ptr [rbx+0x40], ymm8 + vblendps ymm15, ymm13, ymm15, 0xCC + vperm2f128 ymm13, ymm6, ymm15, 0x20 + vmovups ymmword ptr [rbx+0x60], ymm13 + vperm2f128 ymm9, ymm1, ymm2, 0x31 + vperm2f128 ymm11, ymm3, ymm4, 0x31 + vmovups ymmword ptr [rbx+0x80], ymm9 + vperm2f128 ymm14, ymm10, ymm14, 0x31 + vperm2f128 ymm15, ymm6, ymm15, 0x31 + vmovups ymmword ptr [rbx+0xA0], ymm11 + vmovups ymmword ptr [rbx+0xC0], ymm14 + vmovups ymmword ptr [rbx+0xE0], ymm15 + vmovdqa ymm0, ymmword ptr [rsp+0x2A0] + vpaddd ymm1, ymm0, ymmword ptr [rsp+0x220] + vmovdqa ymmword ptr [rsp+0x220], ymm1 + vpxor ymm0, ymm0, ymmword ptr [CMP_MSB_MASK+rip] + vpxor ymm2, ymm1, ymmword ptr [CMP_MSB_MASK+rip] + vpcmpgtd ymm2, ymm0, ymm2 + vmovdqa ymm0, ymmword ptr [rsp+0x240] + vpsubd ymm2, ymm0, ymm2 + vmovdqa ymmword ptr [rsp+0x240], ymm2 + add rdi, 64 + add rbx, 256 + mov qword ptr [rbp+0x90], rbx + sub rsi, 8 + cmp rsi, 8 + jnc 2b + test rsi, rsi + jnz 3f +4: + vzeroupper + vmovdqa xmm6, xmmword ptr [rsp+0x2D0] + vmovdqa xmm7, xmmword ptr [rsp+0x2E0] + vmovdqa xmm8, xmmword ptr [rsp+0x2F0] + vmovdqa xmm9, xmmword ptr [rsp+0x300] + vmovdqa xmm10, xmmword ptr [rsp+0x310] + vmovdqa xmm11, xmmword ptr [rsp+0x320] + vmovdqa xmm12, xmmword ptr [rsp+0x330] + vmovdqa xmm13, xmmword ptr [rsp+0x340] + vmovdqa xmm14, xmmword ptr [rsp+0x350] + vmovdqa xmm15, xmmword ptr [rsp+0x360] + mov rsp, rbp + pop rbp + pop rbx + pop rdi + pop rsi + pop r12 + pop r13 + pop r14 + pop r15 + ret +.p2align 5 +3: + mov rbx, qword ptr [rbp+0x90] + mov r15, qword ptr [rsp+0x2C0] + movzx r13d, byte ptr [rbp+0x78] + movzx r12d, byte ptr [rbp+0x88] + test rsi, 0x4 + je 3f + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+0x10] + vmovdqa ymm8, ymm0 + vmovdqa ymm9, ymm1 + vbroadcasti128 ymm12, xmmword ptr [rsp+0x220] + vbroadcasti128 ymm13, xmmword ptr [rsp+0x240] + vpunpckldq ymm14, ymm12, ymm13 + vpunpckhdq ymm15, ymm12, ymm13 + vpermq ymm14, ymm14, 0x50 + vpermq ymm15, ymm15, 0x50 + vbroadcasti128 ymm12, xmmword ptr [BLAKE3_BLOCK_LEN+rip] + vpblendd ymm14, ymm14, ymm12, 0x44 + vpblendd ymm15, ymm15, ymm12, 0x44 + vmovdqa ymmword ptr [rsp], ymm14 + vmovdqa ymmword ptr [rsp+0x20], ymm15 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + movzx eax, byte ptr [rbp+0x80] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+0x200], eax + vmovups ymm2, ymmword ptr [r8+rdx-0x40] + vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x40], 0x01 + vmovups ymm3, ymmword ptr [r8+rdx-0x30] + vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x30], 0x01 + vshufps ymm4, ymm2, ymm3, 136 + vshufps ymm5, ymm2, ymm3, 221 + vmovups ymm2, ymmword ptr [r8+rdx-0x20] + vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x20], 0x01 + vmovups ymm3, ymmword ptr [r8+rdx-0x10] + vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x10], 0x01 + vshufps ymm6, ymm2, ymm3, 136 + vshufps ymm7, ymm2, ymm3, 221 + vpshufd ymm6, ymm6, 0x93 + vpshufd ymm7, ymm7, 0x93 + vmovups ymm10, ymmword ptr [r10+rdx-0x40] + vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x40], 0x01 + vmovups ymm11, ymmword ptr [r10+rdx-0x30] + vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x30], 0x01 + vshufps ymm12, ymm10, ymm11, 136 + vshufps ymm13, ymm10, ymm11, 221 + vmovups ymm10, ymmword ptr [r10+rdx-0x20] + vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x20], 0x01 + vmovups ymm11, ymmword ptr [r10+rdx-0x10] + vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x10], 0x01 + vshufps ymm14, ymm10, ymm11, 136 + vshufps ymm15, ymm10, ymm11, 221 + vpshufd ymm14, ymm14, 0x93 + vpshufd ymm15, ymm15, 0x93 + vpbroadcastd ymm2, dword ptr [rsp+0x200] + vmovdqa ymm3, ymmword ptr [rsp] + vmovdqa ymm11, ymmword ptr [rsp+0x20] + vpblendd ymm3, ymm3, ymm2, 0x88 + vpblendd ymm11, ymm11, ymm2, 0x88 + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip] + vmovdqa ymm10, ymm2 + mov al, 7 +9: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm8, ymm8, ymm12 + vmovdqa ymmword ptr [rsp+0x40], ymm4 + nop + vmovdqa ymmword ptr [rsp+0x60], ymm12 + nop + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT16+rip] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 12 + vpslld ymm9, ymm9, 20 + vpor ymm9, ymm9, ymm4 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vmovdqa ymmword ptr [rsp+0x80], ymm5 + vmovdqa ymmword ptr [rsp+0xA0], ymm13 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT8+rip] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 7 + vpslld ymm9, ymm9, 25 + vpor ymm9, ymm9, ymm4 + vpshufd ymm0, ymm0, 0x93 + vpshufd ymm8, ymm8, 0x93 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm11, ymm11, 0x4E + vpshufd ymm2, ymm2, 0x39 + vpshufd ymm10, ymm10, 0x39 + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm8, ymm8, ymm14 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT16+rip] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 12 + vpslld ymm9, ymm9, 20 + vpor ymm9, ymm9, ymm4 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm8, ymm8, ymm15 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT8+rip] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 7 + vpslld ymm9, ymm9, 25 + vpor ymm9, ymm9, ymm4 + vpshufd ymm0, ymm0, 0x39 + vpshufd ymm8, ymm8, 0x39 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm11, ymm11, 0x4E + vpshufd ymm2, ymm2, 0x93 + vpshufd ymm10, ymm10, 0x93 + dec al + je 9f + vmovdqa ymm4, ymmword ptr [rsp+0x40] + vmovdqa ymm5, ymmword ptr [rsp+0x80] + vshufps ymm12, ymm4, ymm5, 214 + vpshufd ymm13, ymm4, 0x0F + vpshufd ymm4, ymm12, 0x39 + vshufps ymm12, ymm6, ymm7, 250 + vpblendd ymm13, ymm13, ymm12, 0xAA + vpunpcklqdq ymm12, ymm7, ymm5 + vpblendd ymm12, ymm12, ymm6, 0x88 + vpshufd ymm12, ymm12, 0x78 + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 0x1E + vmovdqa ymmword ptr [rsp+0x40], ymm13 + vmovdqa ymmword ptr [rsp+0x80], ymm12 + vmovdqa ymm12, ymmword ptr [rsp+0x60] + vmovdqa ymm13, ymmword ptr [rsp+0xA0] + vshufps ymm5, ymm12, ymm13, 214 + vpshufd ymm6, ymm12, 0x0F + vpshufd ymm12, ymm5, 0x39 + vshufps ymm5, ymm14, ymm15, 250 + vpblendd ymm6, ymm6, ymm5, 0xAA + vpunpcklqdq ymm5, ymm15, ymm13 + vpblendd ymm5, ymm5, ymm14, 0x88 + vpshufd ymm5, ymm5, 0x78 + vpunpckhdq ymm13, ymm13, ymm15 + vpunpckldq ymm14, ymm14, ymm13 + vpshufd ymm15, ymm14, 0x1E + vmovdqa ymm13, ymm6 + vmovdqa ymm14, ymm5 + vmovdqa ymm5, ymmword ptr [rsp+0x40] + vmovdqa ymm6, ymmword ptr [rsp+0x80] + jmp 9b +9: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + vpxor ymm8, ymm8, ymm10 + vpxor ymm9, ymm9, ymm11 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01 + vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01 + vmovdqu xmmword ptr [rbx+0x40], xmm8 + vmovdqu xmmword ptr [rbx+0x50], xmm9 + vextracti128 xmmword ptr [rbx+0x60], ymm8, 0x01 + vextracti128 xmmword ptr [rbx+0x70], ymm9, 0x01 + vmovaps xmm8, xmmword ptr [rsp+0x260] + vmovaps xmm0, xmmword ptr [rsp+0x220] + vmovaps xmm1, xmmword ptr [rsp+0x230] + vmovaps xmm2, xmmword ptr [rsp+0x240] + vmovaps xmm3, xmmword ptr [rsp+0x250] + vblendvps xmm0, xmm0, xmm1, xmm8 + vblendvps xmm2, xmm2, xmm3, xmm8 + vmovaps xmmword ptr [rsp+0x220], xmm0 + vmovaps xmmword ptr [rsp+0x240], xmm2 + add rbx, 128 + add rdi, 32 + sub rsi, 4 +3: + test rsi, 0x2 + je 3f + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+0x10] + vmovd xmm13, dword ptr [rsp+0x220] + vpinsrd xmm13, xmm13, dword ptr [rsp+0x240], 1 + vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vmovd xmm14, dword ptr [rsp+0x224] + vpinsrd xmm14, xmm14, dword ptr [rsp+0x244], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vinserti128 ymm13, ymm13, xmm14, 0x01 + vbroadcasti128 ymm14, xmmword ptr [ROT16+rip] + vbroadcasti128 ymm15, xmmword ptr [ROT8+rip] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + movzx eax, byte ptr [rbp+0x80] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+0x200], eax + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip] + vpbroadcastd ymm8, dword ptr [rsp+0x200] + vpblendd ymm3, ymm13, ymm8, 0x88 + vmovups ymm8, ymmword ptr [r8+rdx-0x40] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01 + vmovups ymm9, ymmword ptr [r8+rdx-0x30] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01 + vshufps ymm4, ymm8, ymm9, 136 + vshufps ymm5, ymm8, ymm9, 221 + vmovups ymm8, ymmword ptr [r8+rdx-0x20] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01 + vmovups ymm9, ymmword ptr [r8+rdx-0x10] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01 + vshufps ymm6, ymm8, ymm9, 136 + vshufps ymm7, ymm8, ymm9, 221 + vpshufd ymm6, ymm6, 0x93 + vpshufd ymm7, ymm7, 0x93 + mov al, 7 +9: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm14 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm8 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm15 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm8 + vpshufd ymm0, ymm0, 0x93 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm2, ymm2, 0x39 + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm14 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm8 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm15 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm8 + vpshufd ymm0, ymm0, 0x39 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm2, ymm2, 0x93 + dec al + jz 9f + vshufps ymm8, ymm4, ymm5, 214 + vpshufd ymm9, ymm4, 0x0F + vpshufd ymm4, ymm8, 0x39 + vshufps ymm8, ymm6, ymm7, 250 + vpblendd ymm9, ymm9, ymm8, 0xAA + vpunpcklqdq ymm8, ymm7, ymm5 + vpblendd ymm8, ymm8, ymm6, 0x88 + vpshufd ymm8, ymm8, 0x78 + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 0x1E + vmovdqa ymm5, ymm9 + vmovdqa ymm6, ymm8 + jmp 9b +9: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01 + vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01 + vmovaps ymm8, ymmword ptr [rsp+0x260] + vmovaps ymm0, ymmword ptr [rsp+0x220] + vmovups ymm1, ymmword ptr [rsp+0x228] + vmovaps ymm2, ymmword ptr [rsp+0x240] + vmovups ymm3, ymmword ptr [rsp+0x248] + vblendvps ymm0, ymm0, ymm1, ymm8 + vblendvps ymm2, ymm2, ymm3, ymm8 + vmovaps ymmword ptr [rsp+0x220], ymm0 + vmovaps ymmword ptr [rsp+0x240], ymm2 + add rbx, 64 + add rdi, 16 + sub rsi, 2 +3: + test rsi, 0x1 + je 4b + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+0x10] + vmovd xmm3, dword ptr [rsp+0x220] + vpinsrd xmm3, xmm3, dword ptr [rsp+0x240], 1 + vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vmovdqa xmm14, xmmword ptr [ROT16+rip] + vmovdqa xmm15, xmmword ptr [ROT8+rip] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+0x80] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + vmovdqa xmm2, xmmword ptr [BLAKE3_IV+rip] + vmovdqa xmm3, xmm13 + vpinsrd xmm3, xmm3, eax, 3 + vmovups xmm8, xmmword ptr [r8+rdx-0x40] + vmovups xmm9, xmmword ptr [r8+rdx-0x30] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x20] + vmovups xmm9, xmmword ptr [r8+rdx-0x10] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 0x93 + vpshufd xmm7, xmm7, 0x93 + mov al, 7 +9: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm14 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 12 + vpslld xmm1, xmm1, 20 + vpor xmm1, xmm1, xmm8 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm15 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 7 + vpslld xmm1, xmm1, 25 + vpor xmm1, xmm1, xmm8 + vpshufd xmm0, xmm0, 0x93 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x39 + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm14 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 12 + vpslld xmm1, xmm1, 20 + vpor xmm1, xmm1, xmm8 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm15 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 7 + vpslld xmm1, xmm1, 25 + vpor xmm1, xmm1, xmm8 + vpshufd xmm0, xmm0, 0x39 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x93 + dec al + jz 9f + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0x0F + vpshufd xmm4, xmm8, 0x39 + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0xAA + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 0x88 + vpshufd xmm8, xmm8, 0x78 + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 0x1E + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp 9b +9: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + jmp 4b + +.section .rodata +.p2align 6 +ADD0: + .long 0, 1, 2, 3, 4, 5, 6, 7 +ADD1: + .long 8, 8, 8, 8, 8, 8, 8, 8 +BLAKE3_IV_0: + .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 + .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 +BLAKE3_IV_1: + .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 + .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 +BLAKE3_IV_2: + .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 + .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 +BLAKE3_IV_3: + .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A + .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A +BLAKE3_BLOCK_LEN: + .long 0x00000040, 0x00000040, 0x00000040, 0x00000040 + .long 0x00000040, 0x00000040, 0x00000040, 0x00000040 +ROT16: + .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 +ROT8: + .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 +CMP_MSB_MASK: + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 +BLAKE3_IV: + .long 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A + diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm new file mode 100644 index 000000000000..352298edd2e8 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm @@ -0,0 +1,1828 @@ +public _blake3_hash_many_avx2 +public blake3_hash_many_avx2 + +_TEXT SEGMENT ALIGN(16) 'CODE' + +ALIGN 16 +blake3_hash_many_avx2 PROC +_blake3_hash_many_avx2 PROC + push r15 + push r14 + push r13 + push r12 + push rsi + push rdi + push rbx + push rbp + mov rbp, rsp + sub rsp, 880 + and rsp, 0FFFFFFFFFFFFFFC0H + vmovdqa xmmword ptr [rsp+2D0H], xmm6 + vmovdqa xmmword ptr [rsp+2E0H], xmm7 + vmovdqa xmmword ptr [rsp+2F0H], xmm8 + vmovdqa xmmword ptr [rsp+300H], xmm9 + vmovdqa xmmword ptr [rsp+310H], xmm10 + vmovdqa xmmword ptr [rsp+320H], xmm11 + vmovdqa xmmword ptr [rsp+330H], xmm12 + vmovdqa xmmword ptr [rsp+340H], xmm13 + vmovdqa xmmword ptr [rsp+350H], xmm14 + vmovdqa xmmword ptr [rsp+360H], xmm15 + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, qword ptr [rbp+68H] + movzx r9, byte ptr [rbp+70H] + neg r9d + vmovd xmm0, r9d + vpbroadcastd ymm0, xmm0 + vmovdqa ymmword ptr [rsp+260H], ymm0 + vpand ymm1, ymm0, ymmword ptr [ADD0] + vpand ymm2, ymm0, ymmword ptr [ADD1] + vmovdqa ymmword ptr [rsp+2A0H], ymm2 + vmovd xmm2, r8d + vpbroadcastd ymm2, xmm2 + vpaddd ymm2, ymm2, ymm1 + vmovdqa ymmword ptr [rsp+220H], ymm2 + vpxor ymm1, ymm1, ymmword ptr [CMP_MSB_MASK] + vpxor ymm2, ymm2, ymmword ptr [CMP_MSB_MASK] + vpcmpgtd ymm2, ymm1, ymm2 + shr r8, 32 + vmovd xmm3, r8d + vpbroadcastd ymm3, xmm3 + vpsubd ymm3, ymm3, ymm2 + vmovdqa ymmword ptr [rsp+240H], ymm3 + shl rdx, 6 + mov qword ptr [rsp+2C0H], rdx + cmp rsi, 8 + jc final7blocks +outerloop8: + vpbroadcastd ymm0, dword ptr [rcx] + vpbroadcastd ymm1, dword ptr [rcx+4H] + vpbroadcastd ymm2, dword ptr [rcx+8H] + vpbroadcastd ymm3, dword ptr [rcx+0CH] + vpbroadcastd ymm4, dword ptr [rcx+10H] + vpbroadcastd ymm5, dword ptr [rcx+14H] + vpbroadcastd ymm6, dword ptr [rcx+18H] + vpbroadcastd ymm7, dword ptr [rcx+1CH] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + mov r12, qword ptr [rdi+20H] + mov r13, qword ptr [rdi+28H] + mov r14, qword ptr [rdi+30H] + mov r15, qword ptr [rdi+38H] + movzx eax, byte ptr [rbp+78H] + movzx ebx, byte ptr [rbp+80H] + or eax, ebx + xor edx, edx +ALIGN 16 +innerloop8: + movzx ebx, byte ptr [rbp+88H] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+2C0H] + cmove eax, ebx + mov dword ptr [rsp+200H], eax + vmovups xmm8, xmmword ptr [r8+rdx-40H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-40H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-40H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-40H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-40H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-40H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-40H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-40H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+20H], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+40H], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+60H], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-30H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-30H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-30H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-30H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-30H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-30H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-30H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-30H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+80H], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+0A0H], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+0C0H], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+0E0H], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-20H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-20H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-20H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-20H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-20H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-20H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-20H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-20H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+100H], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+120H], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+140H], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+160H], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-10H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-10H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-10H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-10H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-10H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-10H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-10H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-10H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+180H], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+1A0H], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+1C0H], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+1E0H], ymm11 + vpbroadcastd ymm15, dword ptr [rsp+200H] + prefetcht0 byte ptr [r8+rdx+80H] + prefetcht0 byte ptr [r12+rdx+80H] + prefetcht0 byte ptr [r9+rdx+80H] + prefetcht0 byte ptr [r13+rdx+80H] + prefetcht0 byte ptr [r10+rdx+80H] + prefetcht0 byte ptr [r14+rdx+80H] + prefetcht0 byte ptr [r11+rdx+80H] + prefetcht0 byte ptr [r15+rdx+80H] + vpaddd ymm0, ymm0, ymmword ptr [rsp] + vpaddd ymm1, ymm1, ymmword ptr [rsp+40H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+80H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm0, ymmword ptr [rsp+220H] + vpxor ymm13, ymm1, ymmword ptr [rsp+240H] + vpxor ymm14, ymm2, ymmword ptr [BLAKE3_BLOCK_LEN] + vpxor ymm15, ymm3, ymm15 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [BLAKE3_IV_0] + vpaddd ymm9, ymm13, ymmword ptr [BLAKE3_IV_1] + vpaddd ymm10, ymm14, ymmword ptr [BLAKE3_IV_2] + vpaddd ymm11, ymm15, ymmword ptr [BLAKE3_IV_3] + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+20H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+60H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0A0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+100H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+140H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+180H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+120H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+160H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+1A0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+40H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+60H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0E0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+80H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0C0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+140H] + vpaddd ymm2, ymm2, ymmword ptr [rsp] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+20H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+180H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+120H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+160H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+1C0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+100H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+60H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+140H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+1A0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+80H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+180H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+40H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0C0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+120H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+160H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+100H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0A0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+1E0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+20H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+140H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+180H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+1C0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0E0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+120H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+60H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+80H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+160H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0A0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+20H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp] + vpaddd ymm1, ymm1, ymmword ptr [rsp+40H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+100H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+180H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+120H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+1E0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+1A0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+160H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+140H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+100H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0E0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H] + vpaddd ymm2, ymm2, ymmword ptr [rsp] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+40H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+60H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+20H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+80H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+120H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+160H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+100H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+1C0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+180H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+20H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+1A0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+40H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+80H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+60H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+140H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0C0H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+160H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+20H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+100H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+1E0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+120H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+1C0H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+40H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+60H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+200H], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+140H] + vpaddd ymm1, ymm1, ymmword ptr [rsp+180H] + vpaddd ymm2, ymm2, ymmword ptr [rsp+80H] + vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+200H] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vpxor ymm0, ymm0, ymm8 + vpxor ymm1, ymm1, ymm9 + vpxor ymm2, ymm2, ymm10 + vpxor ymm3, ymm3, ymm11 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpxor ymm4, ymm4, ymm12 + vpxor ymm5, ymm5, ymm13 + vpxor ymm6, ymm6, ymm14 + vpxor ymm7, ymm7, ymm15 + movzx eax, byte ptr [rbp+78H] + jne innerloop8 + mov rbx, qword ptr [rbp+90H] + vunpcklps ymm8, ymm0, ymm1 + vunpcklps ymm9, ymm2, ymm3 + vunpckhps ymm10, ymm0, ymm1 + vunpcklps ymm11, ymm4, ymm5 + vunpcklps ymm0, ymm6, ymm7 + vshufps ymm12, ymm8, ymm9, 78 + vblendps ymm1, ymm8, ymm12, 0CCH + vshufps ymm8, ymm11, ymm0, 78 + vunpckhps ymm13, ymm2, ymm3 + vblendps ymm2, ymm11, ymm8, 0CCH + vblendps ymm3, ymm12, ymm9, 0CCH + vperm2f128 ymm12, ymm1, ymm2, 20H + vmovups ymmword ptr [rbx], ymm12 + vunpckhps ymm14, ymm4, ymm5 + vblendps ymm4, ymm8, ymm0, 0CCH + vunpckhps ymm15, ymm6, ymm7 + vperm2f128 ymm7, ymm3, ymm4, 20H + vmovups ymmword ptr [rbx+20H], ymm7 + vshufps ymm5, ymm10, ymm13, 78 + vblendps ymm6, ymm5, ymm13, 0CCH + vshufps ymm13, ymm14, ymm15, 78 + vblendps ymm10, ymm10, ymm5, 0CCH + vblendps ymm14, ymm14, ymm13, 0CCH + vperm2f128 ymm8, ymm10, ymm14, 20H + vmovups ymmword ptr [rbx+40H], ymm8 + vblendps ymm15, ymm13, ymm15, 0CCH + vperm2f128 ymm13, ymm6, ymm15, 20H + vmovups ymmword ptr [rbx+60H], ymm13 + vperm2f128 ymm9, ymm1, ymm2, 31H + vperm2f128 ymm11, ymm3, ymm4, 31H + vmovups ymmword ptr [rbx+80H], ymm9 + vperm2f128 ymm14, ymm10, ymm14, 31H + vperm2f128 ymm15, ymm6, ymm15, 31H + vmovups ymmword ptr [rbx+0A0H], ymm11 + vmovups ymmword ptr [rbx+0C0H], ymm14 + vmovups ymmword ptr [rbx+0E0H], ymm15 + vmovdqa ymm0, ymmword ptr [rsp+2A0H] + vpaddd ymm1, ymm0, ymmword ptr [rsp+220H] + vmovdqa ymmword ptr [rsp+220H], ymm1 + vpxor ymm0, ymm0, ymmword ptr [CMP_MSB_MASK] + vpxor ymm2, ymm1, ymmword ptr [CMP_MSB_MASK] + vpcmpgtd ymm2, ymm0, ymm2 + vmovdqa ymm0, ymmword ptr [rsp+240H] + vpsubd ymm2, ymm0, ymm2 + vmovdqa ymmword ptr [rsp+240H], ymm2 + add rdi, 64 + add rbx, 256 + mov qword ptr [rbp+90H], rbx + sub rsi, 8 + cmp rsi, 8 + jnc outerloop8 + test rsi, rsi + jnz final7blocks +unwind: + vzeroupper + vmovdqa xmm6, xmmword ptr [rsp+2D0H] + vmovdqa xmm7, xmmword ptr [rsp+2E0H] + vmovdqa xmm8, xmmword ptr [rsp+2F0H] + vmovdqa xmm9, xmmword ptr [rsp+300H] + vmovdqa xmm10, xmmword ptr [rsp+310H] + vmovdqa xmm11, xmmword ptr [rsp+320H] + vmovdqa xmm12, xmmword ptr [rsp+330H] + vmovdqa xmm13, xmmword ptr [rsp+340H] + vmovdqa xmm14, xmmword ptr [rsp+350H] + vmovdqa xmm15, xmmword ptr [rsp+360H] + mov rsp, rbp + pop rbp + pop rbx + pop rdi + pop rsi + pop r12 + pop r13 + pop r14 + pop r15 + ret +ALIGN 16 +final7blocks: + mov rbx, qword ptr [rbp+90H] + mov r15, qword ptr [rsp+2C0H] + movzx r13d, byte ptr [rbp+78H] + movzx r12d, byte ptr [rbp+88H] + test rsi, 4H + je final3blocks + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+10H] + vmovdqa ymm8, ymm0 + vmovdqa ymm9, ymm1 + vbroadcasti128 ymm12, xmmword ptr [rsp+220H] + vbroadcasti128 ymm13, xmmword ptr [rsp+240H] + vpunpckldq ymm14, ymm12, ymm13 + vpunpckhdq ymm15, ymm12, ymm13 + vpermq ymm14, ymm14, 50H + vpermq ymm15, ymm15, 50H + vbroadcasti128 ymm12, xmmword ptr [BLAKE3_BLOCK_LEN] + vpblendd ymm14, ymm14, ymm12, 44H + vpblendd ymm15, ymm15, ymm12, 44H + vmovdqa ymmword ptr [rsp], ymm14 + vmovdqa ymmword ptr [rsp+20H], ymm15 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +ALIGN 16 +innerloop4: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+200H], eax + vmovups ymm2, ymmword ptr [r8+rdx-40H] + vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-40H], 01H + vmovups ymm3, ymmword ptr [r8+rdx-30H] + vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-30H], 01H + vshufps ymm4, ymm2, ymm3, 136 + vshufps ymm5, ymm2, ymm3, 221 + vmovups ymm2, ymmword ptr [r8+rdx-20H] + vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-20H], 01H + vmovups ymm3, ymmword ptr [r8+rdx-10H] + vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-10H], 01H + vshufps ymm6, ymm2, ymm3, 136 + vshufps ymm7, ymm2, ymm3, 221 + vpshufd ymm6, ymm6, 93H + vpshufd ymm7, ymm7, 93H + vmovups ymm10, ymmword ptr [r10+rdx-40H] + vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-40H], 01H + vmovups ymm11, ymmword ptr [r10+rdx-30H] + vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-30H], 01H + vshufps ymm12, ymm10, ymm11, 136 + vshufps ymm13, ymm10, ymm11, 221 + vmovups ymm10, ymmword ptr [r10+rdx-20H] + vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-20H], 01H + vmovups ymm11, ymmword ptr [r10+rdx-10H] + vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-10H], 01H + vshufps ymm14, ymm10, ymm11, 136 + vshufps ymm15, ymm10, ymm11, 221 + vpshufd ymm14, ymm14, 93H + vpshufd ymm15, ymm15, 93H + vpbroadcastd ymm2, dword ptr [rsp+200H] + vmovdqa ymm3, ymmword ptr [rsp] + vmovdqa ymm11, ymmword ptr [rsp+20H] + vpblendd ymm3, ymm3, ymm2, 88H + vpblendd ymm11, ymm11, ymm2, 88H + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV] + vmovdqa ymm10, ymm2 + mov al, 7 +roundloop4: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm8, ymm8, ymm12 + vmovdqa ymmword ptr [rsp+40H], ymm4 + nop + vmovdqa ymmword ptr [rsp+60H], ymm12 + nop + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT16] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 12 + vpslld ymm9, ymm9, 20 + vpor ymm9, ymm9, ymm4 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vmovdqa ymmword ptr [rsp+80H], ymm5 + vmovdqa ymmword ptr [rsp+0A0H], ymm13 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT8] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 7 + vpslld ymm9, ymm9, 25 + vpor ymm9, ymm9, ymm4 + vpshufd ymm0, ymm0, 93H + vpshufd ymm8, ymm8, 93H + vpshufd ymm3, ymm3, 4EH + vpshufd ymm11, ymm11, 4EH + vpshufd ymm2, ymm2, 39H + vpshufd ymm10, ymm10, 39H + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm8, ymm8, ymm14 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT16] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 12 + vpslld ymm9, ymm9, 20 + vpor ymm9, ymm9, ymm4 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm8, ymm8, ymm15 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT8] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 7 + vpslld ymm9, ymm9, 25 + vpor ymm9, ymm9, ymm4 + vpshufd ymm0, ymm0, 39H + vpshufd ymm8, ymm8, 39H + vpshufd ymm3, ymm3, 4EH + vpshufd ymm11, ymm11, 4EH + vpshufd ymm2, ymm2, 93H + vpshufd ymm10, ymm10, 93H + dec al + je endroundloop4 + vmovdqa ymm4, ymmword ptr [rsp+40H] + vmovdqa ymm5, ymmword ptr [rsp+80H] + vshufps ymm12, ymm4, ymm5, 214 + vpshufd ymm13, ymm4, 0FH + vpshufd ymm4, ymm12, 39H + vshufps ymm12, ymm6, ymm7, 250 + vpblendd ymm13, ymm13, ymm12, 0AAH + vpunpcklqdq ymm12, ymm7, ymm5 + vpblendd ymm12, ymm12, ymm6, 88H + vpshufd ymm12, ymm12, 78H + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 1EH + vmovdqa ymmword ptr [rsp+40H], ymm13 + vmovdqa ymmword ptr [rsp+80H], ymm12 + vmovdqa ymm12, ymmword ptr [rsp+60H] + vmovdqa ymm13, ymmword ptr [rsp+0A0H] + vshufps ymm5, ymm12, ymm13, 214 + vpshufd ymm6, ymm12, 0FH + vpshufd ymm12, ymm5, 39H + vshufps ymm5, ymm14, ymm15, 250 + vpblendd ymm6, ymm6, ymm5, 0AAH + vpunpcklqdq ymm5, ymm15, ymm13 + vpblendd ymm5, ymm5, ymm14, 88H + vpshufd ymm5, ymm5, 78H + vpunpckhdq ymm13, ymm13, ymm15 + vpunpckldq ymm14, ymm14, ymm13 + vpshufd ymm15, ymm14, 1EH + vmovdqa ymm13, ymm6 + vmovdqa ymm14, ymm5 + vmovdqa ymm5, ymmword ptr [rsp+40H] + vmovdqa ymm6, ymmword ptr [rsp+80H] + jmp roundloop4 +endroundloop4: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + vpxor ymm8, ymm8, ymm10 + vpxor ymm9, ymm9, ymm11 + mov eax, r13d + cmp rdx, r15 + jne innerloop4 + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+10H], xmm1 + vextracti128 xmmword ptr [rbx+20H], ymm0, 01H + vextracti128 xmmword ptr [rbx+30H], ymm1, 01H + vmovdqu xmmword ptr [rbx+40H], xmm8 + vmovdqu xmmword ptr [rbx+50H], xmm9 + vextracti128 xmmword ptr [rbx+60H], ymm8, 01H + vextracti128 xmmword ptr [rbx+70H], ymm9, 01H + vmovaps xmm8, xmmword ptr [rsp+260H] + vmovaps xmm0, xmmword ptr [rsp+220H] + vmovaps xmm1, xmmword ptr [rsp+230H] + vmovaps xmm2, xmmword ptr [rsp+240H] + vmovaps xmm3, xmmword ptr [rsp+250H] + vblendvps xmm0, xmm0, xmm1, xmm8 + vblendvps xmm2, xmm2, xmm3, xmm8 + vmovaps xmmword ptr [rsp+220H], xmm0 + vmovaps xmmword ptr [rsp+240H], xmm2 + add rbx, 128 + add rdi, 32 + sub rsi, 4 +final3blocks: + test rsi, 2H + je final1blocks + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+10H] + vmovd xmm13, dword ptr [rsp+220H] + vpinsrd xmm13, xmm13, dword ptr [rsp+240H], 1 + vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2 + vmovd xmm14, dword ptr [rsp+224H] + vpinsrd xmm14, xmm14, dword ptr [rsp+244H], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2 + vinserti128 ymm13, ymm13, xmm14, 01H + vbroadcasti128 ymm14, xmmword ptr [ROT16] + vbroadcasti128 ymm15, xmmword ptr [ROT8] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +ALIGN 16 +innerloop2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+200H], eax + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV] + vpbroadcastd ymm8, dword ptr [rsp+200H] + vpblendd ymm3, ymm13, ymm8, 88H + vmovups ymm8, ymmword ptr [r8+rdx-40H] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-40H], 01H + vmovups ymm9, ymmword ptr [r8+rdx-30H] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-30H], 01H + vshufps ymm4, ymm8, ymm9, 136 + vshufps ymm5, ymm8, ymm9, 221 + vmovups ymm8, ymmword ptr [r8+rdx-20H] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-20H], 01H + vmovups ymm9, ymmword ptr [r8+rdx-10H] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-10H], 01H + vshufps ymm6, ymm8, ymm9, 136 + vshufps ymm7, ymm8, ymm9, 221 + vpshufd ymm6, ymm6, 93H + vpshufd ymm7, ymm7, 93H + mov al, 7 +roundloop2: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm14 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm8 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm15 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm8 + vpshufd ymm0, ymm0, 93H + vpshufd ymm3, ymm3, 4EH + vpshufd ymm2, ymm2, 39H + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm14 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm8 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm15 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm8 + vpshufd ymm0, ymm0, 39H + vpshufd ymm3, ymm3, 4EH + vpshufd ymm2, ymm2, 93H + dec al + jz endroundloop2 + vshufps ymm8, ymm4, ymm5, 214 + vpshufd ymm9, ymm4, 0FH + vpshufd ymm4, ymm8, 39H + vshufps ymm8, ymm6, ymm7, 250 + vpblendd ymm9, ymm9, ymm8, 0AAH + vpunpcklqdq ymm8, ymm7, ymm5 + vpblendd ymm8, ymm8, ymm6, 88H + vpshufd ymm8, ymm8, 78H + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 1EH + vmovdqa ymm5, ymm9 + vmovdqa ymm6, ymm8 + jmp roundloop2 +endroundloop2: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + mov eax, r13d + cmp rdx, r15 + jne innerloop2 + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+10H], xmm1 + vextracti128 xmmword ptr [rbx+20H], ymm0, 01H + vextracti128 xmmword ptr [rbx+30H], ymm1, 01H + vmovaps ymm8, ymmword ptr [rsp+260H] + vmovaps ymm0, ymmword ptr [rsp+220H] + vmovups ymm1, ymmword ptr [rsp+228H] + vmovaps ymm2, ymmword ptr [rsp+240H] + vmovups ymm3, ymmword ptr [rsp+248H] + vblendvps ymm0, ymm0, ymm1, ymm8 + vblendvps ymm2, ymm2, ymm3, ymm8 + vmovaps ymmword ptr [rsp+220H], ymm0 + vmovaps ymmword ptr [rsp+240H], ymm2 + add rbx, 64 + add rdi, 16 + sub rsi, 2 +final1blocks: + test rsi, 1H + je unwind + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+10H] + vmovd xmm3, dword ptr [rsp+220H] + vpinsrd xmm3, xmm3, dword ptr [rsp+240H], 1 + vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN], 2 + vmovdqa xmm14, xmmword ptr [ROT16] + vmovdqa xmm15, xmmword ptr [ROT8] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +ALIGN 16 +innerloop1: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + vmovdqa xmm2, xmmword ptr [BLAKE3_IV] + vmovdqa xmm3, xmm13 + vpinsrd xmm3, xmm3, eax, 3 + vmovups xmm8, xmmword ptr [r8+rdx-40H] + vmovups xmm9, xmmword ptr [r8+rdx-30H] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [r8+rdx-20H] + vmovups xmm9, xmmword ptr [r8+rdx-10H] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 93H + vpshufd xmm7, xmm7, 93H + mov al, 7 +roundloop1: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm14 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 12 + vpslld xmm1, xmm1, 20 + vpor xmm1, xmm1, xmm8 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm15 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 7 + vpslld xmm1, xmm1, 25 + vpor xmm1, xmm1, xmm8 + vpshufd xmm0, xmm0, 93H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 39H + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm14 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 12 + vpslld xmm1, xmm1, 20 + vpor xmm1, xmm1, xmm8 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm15 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 7 + vpslld xmm1, xmm1, 25 + vpor xmm1, xmm1, xmm8 + vpshufd xmm0, xmm0, 39H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 93H + dec al + jz endroundloop1 + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0FH + vpshufd xmm4, xmm8, 39H + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0AAH + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 88H + vpshufd xmm8, xmm8, 78H + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 1EH + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp roundloop1 +endroundloop1: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne innerloop1 + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+10H], xmm1 + jmp unwind + +_blake3_hash_many_avx2 ENDP +blake3_hash_many_avx2 ENDP +_TEXT ENDS + +_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST' +ALIGN 64 +ADD0: + dd 0, 1, 2, 3, 4, 5, 6, 7 + +ADD1: + dd 8 dup (8) + +BLAKE3_IV_0: + dd 8 dup (6A09E667H) + +BLAKE3_IV_1: + dd 8 dup (0BB67AE85H) + +BLAKE3_IV_2: + dd 8 dup (3C6EF372H) + +BLAKE3_IV_3: + dd 8 dup (0A54FF53AH) + +BLAKE3_BLOCK_LEN: + dd 8 dup (64) + +ROT16: + db 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 + +ROT8: + db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 + +CMP_MSB_MASK: + dd 8 dup(80000000H) + +BLAKE3_IV: + dd 6A09E667H, 0BB67AE85H, 3C6EF372H, 0A54FF53AH + +_RDATA ENDS +END diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512.c b/llvm/lib/Support/BLAKE3/blake3_avx512.c new file mode 100644 index 000000000000..9c35b08c439a --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_avx512.c @@ -0,0 +1,1207 @@ +#include "blake3_impl.h" + +#include + +#define _mm_shuffle_ps2(a, b, c) \ + (_mm_castps_si128( \ + _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c)))) + +INLINE __m128i loadu_128(const uint8_t src[16]) { + return _mm_loadu_si128((const __m128i *)src); +} + +INLINE __m256i loadu_256(const uint8_t src[32]) { + return _mm256_loadu_si256((const __m256i *)src); +} + +INLINE __m512i loadu_512(const uint8_t src[64]) { + return _mm512_loadu_si512((const __m512i *)src); +} + +INLINE void storeu_128(__m128i src, uint8_t dest[16]) { + _mm_storeu_si128((__m128i *)dest, src); +} + +INLINE void storeu_256(__m256i src, uint8_t dest[16]) { + _mm256_storeu_si256((__m256i *)dest, src); +} + +INLINE __m128i add_128(__m128i a, __m128i b) { return _mm_add_epi32(a, b); } + +INLINE __m256i add_256(__m256i a, __m256i b) { return _mm256_add_epi32(a, b); } + +INLINE __m512i add_512(__m512i a, __m512i b) { return _mm512_add_epi32(a, b); } + +INLINE __m128i xor_128(__m128i a, __m128i b) { return _mm_xor_si128(a, b); } + +INLINE __m256i xor_256(__m256i a, __m256i b) { return _mm256_xor_si256(a, b); } + +INLINE __m512i xor_512(__m512i a, __m512i b) { return _mm512_xor_si512(a, b); } + +INLINE __m128i set1_128(uint32_t x) { return _mm_set1_epi32((int32_t)x); } + +INLINE __m256i set1_256(uint32_t x) { return _mm256_set1_epi32((int32_t)x); } + +INLINE __m512i set1_512(uint32_t x) { return _mm512_set1_epi32((int32_t)x); } + +INLINE __m128i set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d); +} + +INLINE __m128i rot16_128(__m128i x) { return _mm_ror_epi32(x, 16); } + +INLINE __m256i rot16_256(__m256i x) { return _mm256_ror_epi32(x, 16); } + +INLINE __m512i rot16_512(__m512i x) { return _mm512_ror_epi32(x, 16); } + +INLINE __m128i rot12_128(__m128i x) { return _mm_ror_epi32(x, 12); } + +INLINE __m256i rot12_256(__m256i x) { return _mm256_ror_epi32(x, 12); } + +INLINE __m512i rot12_512(__m512i x) { return _mm512_ror_epi32(x, 12); } + +INLINE __m128i rot8_128(__m128i x) { return _mm_ror_epi32(x, 8); } + +INLINE __m256i rot8_256(__m256i x) { return _mm256_ror_epi32(x, 8); } + +INLINE __m512i rot8_512(__m512i x) { return _mm512_ror_epi32(x, 8); } + +INLINE __m128i rot7_128(__m128i x) { return _mm_ror_epi32(x, 7); } + +INLINE __m256i rot7_256(__m256i x) { return _mm256_ror_epi32(x, 7); } + +INLINE __m512i rot7_512(__m512i x) { return _mm512_ror_epi32(x, 7); } + +/* + * ---------------------------------------------------------------------------- + * compress_avx512 + * ---------------------------------------------------------------------------- + */ + +INLINE void g1(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, + __m128i m) { + *row0 = add_128(add_128(*row0, m), *row1); + *row3 = xor_128(*row3, *row0); + *row3 = rot16_128(*row3); + *row2 = add_128(*row2, *row3); + *row1 = xor_128(*row1, *row2); + *row1 = rot12_128(*row1); +} + +INLINE void g2(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, + __m128i m) { + *row0 = add_128(add_128(*row0, m), *row1); + *row3 = xor_128(*row3, *row0); + *row3 = rot8_128(*row3); + *row2 = add_128(*row2, *row3); + *row1 = xor_128(*row1, *row2); + *row1 = rot7_128(*row1); +} + +// Note the optimization here of leaving row1 as the unrotated row, rather than +// row0. All the message loads below are adjusted to compensate for this. See +// discussion at https://github.com/sneves/blake2-avx2/pull/4 +INLINE void diagonalize(__m128i *row0, __m128i *row2, __m128i *row3) { + *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3)); + *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2)); + *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1)); +} + +INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) { + *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1)); + *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2)); + *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3)); +} + +INLINE void compress_pre(__m128i rows[4], const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, uint8_t flags) { + rows[0] = loadu_128((uint8_t *)&cv[0]); + rows[1] = loadu_128((uint8_t *)&cv[4]); + rows[2] = set4(IV[0], IV[1], IV[2], IV[3]); + rows[3] = set4(counter_low(counter), counter_high(counter), + (uint32_t)block_len, (uint32_t)flags); + + __m128i m0 = loadu_128(&block[sizeof(__m128i) * 0]); + __m128i m1 = loadu_128(&block[sizeof(__m128i) * 1]); + __m128i m2 = loadu_128(&block[sizeof(__m128i) * 2]); + __m128i m3 = loadu_128(&block[sizeof(__m128i) * 3]); + + __m128i t0, t1, t2, t3, tt; + + // Round 1. The first round permutes the message words from the original + // input order, into the groups that get mixed in parallel. + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); // 6 4 2 0 + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); // 7 5 3 1 + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10 8 + t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2, 1, 0, 3)); // 12 10 8 14 + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11 9 + t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(2, 1, 0, 3)); // 13 11 9 15 + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 2. This round and all following rounds apply a fixed permutation + // to the message words from the round before. + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = _mm_blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = _mm_blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 3 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = _mm_blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = _mm_blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 4 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = _mm_blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = _mm_blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 5 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = _mm_blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = _mm_blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 6 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = _mm_blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = _mm_blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 7 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = _mm_blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = _mm_blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); +} + +void blake3_compress_xof_avx512(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags, uint8_t out[64]) { + __m128i rows[4]; + compress_pre(rows, cv, block, block_len, counter, flags); + storeu_128(xor_128(rows[0], rows[2]), &out[0]); + storeu_128(xor_128(rows[1], rows[3]), &out[16]); + storeu_128(xor_128(rows[2], loadu_128((uint8_t *)&cv[0])), &out[32]); + storeu_128(xor_128(rows[3], loadu_128((uint8_t *)&cv[4])), &out[48]); +} + +void blake3_compress_in_place_avx512(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags) { + __m128i rows[4]; + compress_pre(rows, cv, block, block_len, counter, flags); + storeu_128(xor_128(rows[0], rows[2]), (uint8_t *)&cv[0]); + storeu_128(xor_128(rows[1], rows[3]), (uint8_t *)&cv[4]); +} + +/* + * ---------------------------------------------------------------------------- + * hash4_avx512 + * ---------------------------------------------------------------------------- + */ + +INLINE void round_fn4(__m128i v[16], __m128i m[16], size_t r) { + v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][0]]); + v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][2]]); + v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][4]]); + v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][6]]); + v[0] = add_128(v[0], v[4]); + v[1] = add_128(v[1], v[5]); + v[2] = add_128(v[2], v[6]); + v[3] = add_128(v[3], v[7]); + v[12] = xor_128(v[12], v[0]); + v[13] = xor_128(v[13], v[1]); + v[14] = xor_128(v[14], v[2]); + v[15] = xor_128(v[15], v[3]); + v[12] = rot16_128(v[12]); + v[13] = rot16_128(v[13]); + v[14] = rot16_128(v[14]); + v[15] = rot16_128(v[15]); + v[8] = add_128(v[8], v[12]); + v[9] = add_128(v[9], v[13]); + v[10] = add_128(v[10], v[14]); + v[11] = add_128(v[11], v[15]); + v[4] = xor_128(v[4], v[8]); + v[5] = xor_128(v[5], v[9]); + v[6] = xor_128(v[6], v[10]); + v[7] = xor_128(v[7], v[11]); + v[4] = rot12_128(v[4]); + v[5] = rot12_128(v[5]); + v[6] = rot12_128(v[6]); + v[7] = rot12_128(v[7]); + v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][1]]); + v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][3]]); + v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][5]]); + v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][7]]); + v[0] = add_128(v[0], v[4]); + v[1] = add_128(v[1], v[5]); + v[2] = add_128(v[2], v[6]); + v[3] = add_128(v[3], v[7]); + v[12] = xor_128(v[12], v[0]); + v[13] = xor_128(v[13], v[1]); + v[14] = xor_128(v[14], v[2]); + v[15] = xor_128(v[15], v[3]); + v[12] = rot8_128(v[12]); + v[13] = rot8_128(v[13]); + v[14] = rot8_128(v[14]); + v[15] = rot8_128(v[15]); + v[8] = add_128(v[8], v[12]); + v[9] = add_128(v[9], v[13]); + v[10] = add_128(v[10], v[14]); + v[11] = add_128(v[11], v[15]); + v[4] = xor_128(v[4], v[8]); + v[5] = xor_128(v[5], v[9]); + v[6] = xor_128(v[6], v[10]); + v[7] = xor_128(v[7], v[11]); + v[4] = rot7_128(v[4]); + v[5] = rot7_128(v[5]); + v[6] = rot7_128(v[6]); + v[7] = rot7_128(v[7]); + + v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][8]]); + v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][10]]); + v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][12]]); + v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][14]]); + v[0] = add_128(v[0], v[5]); + v[1] = add_128(v[1], v[6]); + v[2] = add_128(v[2], v[7]); + v[3] = add_128(v[3], v[4]); + v[15] = xor_128(v[15], v[0]); + v[12] = xor_128(v[12], v[1]); + v[13] = xor_128(v[13], v[2]); + v[14] = xor_128(v[14], v[3]); + v[15] = rot16_128(v[15]); + v[12] = rot16_128(v[12]); + v[13] = rot16_128(v[13]); + v[14] = rot16_128(v[14]); + v[10] = add_128(v[10], v[15]); + v[11] = add_128(v[11], v[12]); + v[8] = add_128(v[8], v[13]); + v[9] = add_128(v[9], v[14]); + v[5] = xor_128(v[5], v[10]); + v[6] = xor_128(v[6], v[11]); + v[7] = xor_128(v[7], v[8]); + v[4] = xor_128(v[4], v[9]); + v[5] = rot12_128(v[5]); + v[6] = rot12_128(v[6]); + v[7] = rot12_128(v[7]); + v[4] = rot12_128(v[4]); + v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][9]]); + v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][11]]); + v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][13]]); + v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][15]]); + v[0] = add_128(v[0], v[5]); + v[1] = add_128(v[1], v[6]); + v[2] = add_128(v[2], v[7]); + v[3] = add_128(v[3], v[4]); + v[15] = xor_128(v[15], v[0]); + v[12] = xor_128(v[12], v[1]); + v[13] = xor_128(v[13], v[2]); + v[14] = xor_128(v[14], v[3]); + v[15] = rot8_128(v[15]); + v[12] = rot8_128(v[12]); + v[13] = rot8_128(v[13]); + v[14] = rot8_128(v[14]); + v[10] = add_128(v[10], v[15]); + v[11] = add_128(v[11], v[12]); + v[8] = add_128(v[8], v[13]); + v[9] = add_128(v[9], v[14]); + v[5] = xor_128(v[5], v[10]); + v[6] = xor_128(v[6], v[11]); + v[7] = xor_128(v[7], v[8]); + v[4] = xor_128(v[4], v[9]); + v[5] = rot7_128(v[5]); + v[6] = rot7_128(v[6]); + v[7] = rot7_128(v[7]); + v[4] = rot7_128(v[4]); +} + +INLINE void transpose_vecs_128(__m128i vecs[4]) { + // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is + // 22/33. Note that this doesn't split the vector into two lanes, as the + // AVX2 counterparts do. + __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]); + __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]); + __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]); + __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]); + + // Interleave 64-bit lanes. + __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01); + __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01); + __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23); + __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23); + + vecs[0] = abcd_0; + vecs[1] = abcd_1; + vecs[2] = abcd_2; + vecs[3] = abcd_3; +} + +INLINE void transpose_msg_vecs4(const uint8_t *const *inputs, + size_t block_offset, __m128i out[16]) { + out[0] = loadu_128(&inputs[0][block_offset + 0 * sizeof(__m128i)]); + out[1] = loadu_128(&inputs[1][block_offset + 0 * sizeof(__m128i)]); + out[2] = loadu_128(&inputs[2][block_offset + 0 * sizeof(__m128i)]); + out[3] = loadu_128(&inputs[3][block_offset + 0 * sizeof(__m128i)]); + out[4] = loadu_128(&inputs[0][block_offset + 1 * sizeof(__m128i)]); + out[5] = loadu_128(&inputs[1][block_offset + 1 * sizeof(__m128i)]); + out[6] = loadu_128(&inputs[2][block_offset + 1 * sizeof(__m128i)]); + out[7] = loadu_128(&inputs[3][block_offset + 1 * sizeof(__m128i)]); + out[8] = loadu_128(&inputs[0][block_offset + 2 * sizeof(__m128i)]); + out[9] = loadu_128(&inputs[1][block_offset + 2 * sizeof(__m128i)]); + out[10] = loadu_128(&inputs[2][block_offset + 2 * sizeof(__m128i)]); + out[11] = loadu_128(&inputs[3][block_offset + 2 * sizeof(__m128i)]); + out[12] = loadu_128(&inputs[0][block_offset + 3 * sizeof(__m128i)]); + out[13] = loadu_128(&inputs[1][block_offset + 3 * sizeof(__m128i)]); + out[14] = loadu_128(&inputs[2][block_offset + 3 * sizeof(__m128i)]); + out[15] = loadu_128(&inputs[3][block_offset + 3 * sizeof(__m128i)]); + for (size_t i = 0; i < 4; ++i) { + _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0); + } + transpose_vecs_128(&out[0]); + transpose_vecs_128(&out[4]); + transpose_vecs_128(&out[8]); + transpose_vecs_128(&out[12]); +} + +INLINE void load_counters4(uint64_t counter, bool increment_counter, + __m128i *out_lo, __m128i *out_hi) { + uint64_t mask = (increment_counter ? ~0 : 0); + __m256i mask_vec = _mm256_set1_epi64x(mask); + __m256i deltas = _mm256_setr_epi64x(0, 1, 2, 3); + deltas = _mm256_and_si256(mask_vec, deltas); + __m256i counters = + _mm256_add_epi64(_mm256_set1_epi64x((int64_t)counter), deltas); + *out_lo = _mm256_cvtepi64_epi32(counters); + *out_hi = _mm256_cvtepi64_epi32(_mm256_srli_epi64(counters, 32)); +} + +static +void blake3_hash4_avx512(const uint8_t *const *inputs, size_t blocks, + const uint32_t key[8], uint64_t counter, + bool increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + __m128i h_vecs[8] = { + set1_128(key[0]), set1_128(key[1]), set1_128(key[2]), set1_128(key[3]), + set1_128(key[4]), set1_128(key[5]), set1_128(key[6]), set1_128(key[7]), + }; + __m128i counter_low_vec, counter_high_vec; + load_counters4(counter, increment_counter, &counter_low_vec, + &counter_high_vec); + uint8_t block_flags = flags | flags_start; + + for (size_t block = 0; block < blocks; block++) { + if (block + 1 == blocks) { + block_flags |= flags_end; + } + __m128i block_len_vec = set1_128(BLAKE3_BLOCK_LEN); + __m128i block_flags_vec = set1_128(block_flags); + __m128i msg_vecs[16]; + transpose_msg_vecs4(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs); + + __m128i v[16] = { + h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3], + h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7], + set1_128(IV[0]), set1_128(IV[1]), set1_128(IV[2]), set1_128(IV[3]), + counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec, + }; + round_fn4(v, msg_vecs, 0); + round_fn4(v, msg_vecs, 1); + round_fn4(v, msg_vecs, 2); + round_fn4(v, msg_vecs, 3); + round_fn4(v, msg_vecs, 4); + round_fn4(v, msg_vecs, 5); + round_fn4(v, msg_vecs, 6); + h_vecs[0] = xor_128(v[0], v[8]); + h_vecs[1] = xor_128(v[1], v[9]); + h_vecs[2] = xor_128(v[2], v[10]); + h_vecs[3] = xor_128(v[3], v[11]); + h_vecs[4] = xor_128(v[4], v[12]); + h_vecs[5] = xor_128(v[5], v[13]); + h_vecs[6] = xor_128(v[6], v[14]); + h_vecs[7] = xor_128(v[7], v[15]); + + block_flags = flags; + } + + transpose_vecs_128(&h_vecs[0]); + transpose_vecs_128(&h_vecs[4]); + // The first four vecs now contain the first half of each output, and the + // second four vecs contain the second half of each output. + storeu_128(h_vecs[0], &out[0 * sizeof(__m128i)]); + storeu_128(h_vecs[4], &out[1 * sizeof(__m128i)]); + storeu_128(h_vecs[1], &out[2 * sizeof(__m128i)]); + storeu_128(h_vecs[5], &out[3 * sizeof(__m128i)]); + storeu_128(h_vecs[2], &out[4 * sizeof(__m128i)]); + storeu_128(h_vecs[6], &out[5 * sizeof(__m128i)]); + storeu_128(h_vecs[3], &out[6 * sizeof(__m128i)]); + storeu_128(h_vecs[7], &out[7 * sizeof(__m128i)]); +} + +/* + * ---------------------------------------------------------------------------- + * hash8_avx512 + * ---------------------------------------------------------------------------- + */ + +INLINE void round_fn8(__m256i v[16], __m256i m[16], size_t r) { + v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][0]]); + v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][2]]); + v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][4]]); + v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][6]]); + v[0] = add_256(v[0], v[4]); + v[1] = add_256(v[1], v[5]); + v[2] = add_256(v[2], v[6]); + v[3] = add_256(v[3], v[7]); + v[12] = xor_256(v[12], v[0]); + v[13] = xor_256(v[13], v[1]); + v[14] = xor_256(v[14], v[2]); + v[15] = xor_256(v[15], v[3]); + v[12] = rot16_256(v[12]); + v[13] = rot16_256(v[13]); + v[14] = rot16_256(v[14]); + v[15] = rot16_256(v[15]); + v[8] = add_256(v[8], v[12]); + v[9] = add_256(v[9], v[13]); + v[10] = add_256(v[10], v[14]); + v[11] = add_256(v[11], v[15]); + v[4] = xor_256(v[4], v[8]); + v[5] = xor_256(v[5], v[9]); + v[6] = xor_256(v[6], v[10]); + v[7] = xor_256(v[7], v[11]); + v[4] = rot12_256(v[4]); + v[5] = rot12_256(v[5]); + v[6] = rot12_256(v[6]); + v[7] = rot12_256(v[7]); + v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][1]]); + v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][3]]); + v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][5]]); + v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][7]]); + v[0] = add_256(v[0], v[4]); + v[1] = add_256(v[1], v[5]); + v[2] = add_256(v[2], v[6]); + v[3] = add_256(v[3], v[7]); + v[12] = xor_256(v[12], v[0]); + v[13] = xor_256(v[13], v[1]); + v[14] = xor_256(v[14], v[2]); + v[15] = xor_256(v[15], v[3]); + v[12] = rot8_256(v[12]); + v[13] = rot8_256(v[13]); + v[14] = rot8_256(v[14]); + v[15] = rot8_256(v[15]); + v[8] = add_256(v[8], v[12]); + v[9] = add_256(v[9], v[13]); + v[10] = add_256(v[10], v[14]); + v[11] = add_256(v[11], v[15]); + v[4] = xor_256(v[4], v[8]); + v[5] = xor_256(v[5], v[9]); + v[6] = xor_256(v[6], v[10]); + v[7] = xor_256(v[7], v[11]); + v[4] = rot7_256(v[4]); + v[5] = rot7_256(v[5]); + v[6] = rot7_256(v[6]); + v[7] = rot7_256(v[7]); + + v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][8]]); + v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][10]]); + v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][12]]); + v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][14]]); + v[0] = add_256(v[0], v[5]); + v[1] = add_256(v[1], v[6]); + v[2] = add_256(v[2], v[7]); + v[3] = add_256(v[3], v[4]); + v[15] = xor_256(v[15], v[0]); + v[12] = xor_256(v[12], v[1]); + v[13] = xor_256(v[13], v[2]); + v[14] = xor_256(v[14], v[3]); + v[15] = rot16_256(v[15]); + v[12] = rot16_256(v[12]); + v[13] = rot16_256(v[13]); + v[14] = rot16_256(v[14]); + v[10] = add_256(v[10], v[15]); + v[11] = add_256(v[11], v[12]); + v[8] = add_256(v[8], v[13]); + v[9] = add_256(v[9], v[14]); + v[5] = xor_256(v[5], v[10]); + v[6] = xor_256(v[6], v[11]); + v[7] = xor_256(v[7], v[8]); + v[4] = xor_256(v[4], v[9]); + v[5] = rot12_256(v[5]); + v[6] = rot12_256(v[6]); + v[7] = rot12_256(v[7]); + v[4] = rot12_256(v[4]); + v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][9]]); + v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][11]]); + v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][13]]); + v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][15]]); + v[0] = add_256(v[0], v[5]); + v[1] = add_256(v[1], v[6]); + v[2] = add_256(v[2], v[7]); + v[3] = add_256(v[3], v[4]); + v[15] = xor_256(v[15], v[0]); + v[12] = xor_256(v[12], v[1]); + v[13] = xor_256(v[13], v[2]); + v[14] = xor_256(v[14], v[3]); + v[15] = rot8_256(v[15]); + v[12] = rot8_256(v[12]); + v[13] = rot8_256(v[13]); + v[14] = rot8_256(v[14]); + v[10] = add_256(v[10], v[15]); + v[11] = add_256(v[11], v[12]); + v[8] = add_256(v[8], v[13]); + v[9] = add_256(v[9], v[14]); + v[5] = xor_256(v[5], v[10]); + v[6] = xor_256(v[6], v[11]); + v[7] = xor_256(v[7], v[8]); + v[4] = xor_256(v[4], v[9]); + v[5] = rot7_256(v[5]); + v[6] = rot7_256(v[6]); + v[7] = rot7_256(v[7]); + v[4] = rot7_256(v[4]); +} + +INLINE void transpose_vecs_256(__m256i vecs[8]) { + // Interleave 32-bit lanes. The low unpack is lanes 00/11/44/55, and the high + // is 22/33/66/77. + __m256i ab_0145 = _mm256_unpacklo_epi32(vecs[0], vecs[1]); + __m256i ab_2367 = _mm256_unpackhi_epi32(vecs[0], vecs[1]); + __m256i cd_0145 = _mm256_unpacklo_epi32(vecs[2], vecs[3]); + __m256i cd_2367 = _mm256_unpackhi_epi32(vecs[2], vecs[3]); + __m256i ef_0145 = _mm256_unpacklo_epi32(vecs[4], vecs[5]); + __m256i ef_2367 = _mm256_unpackhi_epi32(vecs[4], vecs[5]); + __m256i gh_0145 = _mm256_unpacklo_epi32(vecs[6], vecs[7]); + __m256i gh_2367 = _mm256_unpackhi_epi32(vecs[6], vecs[7]); + + // Interleave 64-bit lates. The low unpack is lanes 00/22 and the high is + // 11/33. + __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145); + __m256i abcd_15 = _mm256_unpackhi_epi64(ab_0145, cd_0145); + __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367); + __m256i abcd_37 = _mm256_unpackhi_epi64(ab_2367, cd_2367); + __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145); + __m256i efgh_15 = _mm256_unpackhi_epi64(ef_0145, gh_0145); + __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367); + __m256i efgh_37 = _mm256_unpackhi_epi64(ef_2367, gh_2367); + + // Interleave 128-bit lanes. + vecs[0] = _mm256_permute2x128_si256(abcd_04, efgh_04, 0x20); + vecs[1] = _mm256_permute2x128_si256(abcd_15, efgh_15, 0x20); + vecs[2] = _mm256_permute2x128_si256(abcd_26, efgh_26, 0x20); + vecs[3] = _mm256_permute2x128_si256(abcd_37, efgh_37, 0x20); + vecs[4] = _mm256_permute2x128_si256(abcd_04, efgh_04, 0x31); + vecs[5] = _mm256_permute2x128_si256(abcd_15, efgh_15, 0x31); + vecs[6] = _mm256_permute2x128_si256(abcd_26, efgh_26, 0x31); + vecs[7] = _mm256_permute2x128_si256(abcd_37, efgh_37, 0x31); +} + +INLINE void transpose_msg_vecs8(const uint8_t *const *inputs, + size_t block_offset, __m256i out[16]) { + out[0] = loadu_256(&inputs[0][block_offset + 0 * sizeof(__m256i)]); + out[1] = loadu_256(&inputs[1][block_offset + 0 * sizeof(__m256i)]); + out[2] = loadu_256(&inputs[2][block_offset + 0 * sizeof(__m256i)]); + out[3] = loadu_256(&inputs[3][block_offset + 0 * sizeof(__m256i)]); + out[4] = loadu_256(&inputs[4][block_offset + 0 * sizeof(__m256i)]); + out[5] = loadu_256(&inputs[5][block_offset + 0 * sizeof(__m256i)]); + out[6] = loadu_256(&inputs[6][block_offset + 0 * sizeof(__m256i)]); + out[7] = loadu_256(&inputs[7][block_offset + 0 * sizeof(__m256i)]); + out[8] = loadu_256(&inputs[0][block_offset + 1 * sizeof(__m256i)]); + out[9] = loadu_256(&inputs[1][block_offset + 1 * sizeof(__m256i)]); + out[10] = loadu_256(&inputs[2][block_offset + 1 * sizeof(__m256i)]); + out[11] = loadu_256(&inputs[3][block_offset + 1 * sizeof(__m256i)]); + out[12] = loadu_256(&inputs[4][block_offset + 1 * sizeof(__m256i)]); + out[13] = loadu_256(&inputs[5][block_offset + 1 * sizeof(__m256i)]); + out[14] = loadu_256(&inputs[6][block_offset + 1 * sizeof(__m256i)]); + out[15] = loadu_256(&inputs[7][block_offset + 1 * sizeof(__m256i)]); + for (size_t i = 0; i < 8; ++i) { + _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0); + } + transpose_vecs_256(&out[0]); + transpose_vecs_256(&out[8]); +} + +INLINE void load_counters8(uint64_t counter, bool increment_counter, + __m256i *out_lo, __m256i *out_hi) { + uint64_t mask = (increment_counter ? ~0 : 0); + __m512i mask_vec = _mm512_set1_epi64(mask); + __m512i deltas = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + deltas = _mm512_and_si512(mask_vec, deltas); + __m512i counters = + _mm512_add_epi64(_mm512_set1_epi64((int64_t)counter), deltas); + *out_lo = _mm512_cvtepi64_epi32(counters); + *out_hi = _mm512_cvtepi64_epi32(_mm512_srli_epi64(counters, 32)); +} + +static +void blake3_hash8_avx512(const uint8_t *const *inputs, size_t blocks, + const uint32_t key[8], uint64_t counter, + bool increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + __m256i h_vecs[8] = { + set1_256(key[0]), set1_256(key[1]), set1_256(key[2]), set1_256(key[3]), + set1_256(key[4]), set1_256(key[5]), set1_256(key[6]), set1_256(key[7]), + }; + __m256i counter_low_vec, counter_high_vec; + load_counters8(counter, increment_counter, &counter_low_vec, + &counter_high_vec); + uint8_t block_flags = flags | flags_start; + + for (size_t block = 0; block < blocks; block++) { + if (block + 1 == blocks) { + block_flags |= flags_end; + } + __m256i block_len_vec = set1_256(BLAKE3_BLOCK_LEN); + __m256i block_flags_vec = set1_256(block_flags); + __m256i msg_vecs[16]; + transpose_msg_vecs8(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs); + + __m256i v[16] = { + h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3], + h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7], + set1_256(IV[0]), set1_256(IV[1]), set1_256(IV[2]), set1_256(IV[3]), + counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec, + }; + round_fn8(v, msg_vecs, 0); + round_fn8(v, msg_vecs, 1); + round_fn8(v, msg_vecs, 2); + round_fn8(v, msg_vecs, 3); + round_fn8(v, msg_vecs, 4); + round_fn8(v, msg_vecs, 5); + round_fn8(v, msg_vecs, 6); + h_vecs[0] = xor_256(v[0], v[8]); + h_vecs[1] = xor_256(v[1], v[9]); + h_vecs[2] = xor_256(v[2], v[10]); + h_vecs[3] = xor_256(v[3], v[11]); + h_vecs[4] = xor_256(v[4], v[12]); + h_vecs[5] = xor_256(v[5], v[13]); + h_vecs[6] = xor_256(v[6], v[14]); + h_vecs[7] = xor_256(v[7], v[15]); + + block_flags = flags; + } + + transpose_vecs_256(h_vecs); + storeu_256(h_vecs[0], &out[0 * sizeof(__m256i)]); + storeu_256(h_vecs[1], &out[1 * sizeof(__m256i)]); + storeu_256(h_vecs[2], &out[2 * sizeof(__m256i)]); + storeu_256(h_vecs[3], &out[3 * sizeof(__m256i)]); + storeu_256(h_vecs[4], &out[4 * sizeof(__m256i)]); + storeu_256(h_vecs[5], &out[5 * sizeof(__m256i)]); + storeu_256(h_vecs[6], &out[6 * sizeof(__m256i)]); + storeu_256(h_vecs[7], &out[7 * sizeof(__m256i)]); +} + +/* + * ---------------------------------------------------------------------------- + * hash16_avx512 + * ---------------------------------------------------------------------------- + */ + +INLINE void round_fn16(__m512i v[16], __m512i m[16], size_t r) { + v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][0]]); + v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][2]]); + v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][4]]); + v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][6]]); + v[0] = add_512(v[0], v[4]); + v[1] = add_512(v[1], v[5]); + v[2] = add_512(v[2], v[6]); + v[3] = add_512(v[3], v[7]); + v[12] = xor_512(v[12], v[0]); + v[13] = xor_512(v[13], v[1]); + v[14] = xor_512(v[14], v[2]); + v[15] = xor_512(v[15], v[3]); + v[12] = rot16_512(v[12]); + v[13] = rot16_512(v[13]); + v[14] = rot16_512(v[14]); + v[15] = rot16_512(v[15]); + v[8] = add_512(v[8], v[12]); + v[9] = add_512(v[9], v[13]); + v[10] = add_512(v[10], v[14]); + v[11] = add_512(v[11], v[15]); + v[4] = xor_512(v[4], v[8]); + v[5] = xor_512(v[5], v[9]); + v[6] = xor_512(v[6], v[10]); + v[7] = xor_512(v[7], v[11]); + v[4] = rot12_512(v[4]); + v[5] = rot12_512(v[5]); + v[6] = rot12_512(v[6]); + v[7] = rot12_512(v[7]); + v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][1]]); + v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][3]]); + v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][5]]); + v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][7]]); + v[0] = add_512(v[0], v[4]); + v[1] = add_512(v[1], v[5]); + v[2] = add_512(v[2], v[6]); + v[3] = add_512(v[3], v[7]); + v[12] = xor_512(v[12], v[0]); + v[13] = xor_512(v[13], v[1]); + v[14] = xor_512(v[14], v[2]); + v[15] = xor_512(v[15], v[3]); + v[12] = rot8_512(v[12]); + v[13] = rot8_512(v[13]); + v[14] = rot8_512(v[14]); + v[15] = rot8_512(v[15]); + v[8] = add_512(v[8], v[12]); + v[9] = add_512(v[9], v[13]); + v[10] = add_512(v[10], v[14]); + v[11] = add_512(v[11], v[15]); + v[4] = xor_512(v[4], v[8]); + v[5] = xor_512(v[5], v[9]); + v[6] = xor_512(v[6], v[10]); + v[7] = xor_512(v[7], v[11]); + v[4] = rot7_512(v[4]); + v[5] = rot7_512(v[5]); + v[6] = rot7_512(v[6]); + v[7] = rot7_512(v[7]); + + v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][8]]); + v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][10]]); + v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][12]]); + v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][14]]); + v[0] = add_512(v[0], v[5]); + v[1] = add_512(v[1], v[6]); + v[2] = add_512(v[2], v[7]); + v[3] = add_512(v[3], v[4]); + v[15] = xor_512(v[15], v[0]); + v[12] = xor_512(v[12], v[1]); + v[13] = xor_512(v[13], v[2]); + v[14] = xor_512(v[14], v[3]); + v[15] = rot16_512(v[15]); + v[12] = rot16_512(v[12]); + v[13] = rot16_512(v[13]); + v[14] = rot16_512(v[14]); + v[10] = add_512(v[10], v[15]); + v[11] = add_512(v[11], v[12]); + v[8] = add_512(v[8], v[13]); + v[9] = add_512(v[9], v[14]); + v[5] = xor_512(v[5], v[10]); + v[6] = xor_512(v[6], v[11]); + v[7] = xor_512(v[7], v[8]); + v[4] = xor_512(v[4], v[9]); + v[5] = rot12_512(v[5]); + v[6] = rot12_512(v[6]); + v[7] = rot12_512(v[7]); + v[4] = rot12_512(v[4]); + v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][9]]); + v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][11]]); + v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][13]]); + v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][15]]); + v[0] = add_512(v[0], v[5]); + v[1] = add_512(v[1], v[6]); + v[2] = add_512(v[2], v[7]); + v[3] = add_512(v[3], v[4]); + v[15] = xor_512(v[15], v[0]); + v[12] = xor_512(v[12], v[1]); + v[13] = xor_512(v[13], v[2]); + v[14] = xor_512(v[14], v[3]); + v[15] = rot8_512(v[15]); + v[12] = rot8_512(v[12]); + v[13] = rot8_512(v[13]); + v[14] = rot8_512(v[14]); + v[10] = add_512(v[10], v[15]); + v[11] = add_512(v[11], v[12]); + v[8] = add_512(v[8], v[13]); + v[9] = add_512(v[9], v[14]); + v[5] = xor_512(v[5], v[10]); + v[6] = xor_512(v[6], v[11]); + v[7] = xor_512(v[7], v[8]); + v[4] = xor_512(v[4], v[9]); + v[5] = rot7_512(v[5]); + v[6] = rot7_512(v[6]); + v[7] = rot7_512(v[7]); + v[4] = rot7_512(v[4]); +} + +// 0b10001000, or lanes a0/a2/b0/b2 in little-endian order +#define LO_IMM8 0x88 + +INLINE __m512i unpack_lo_128(__m512i a, __m512i b) { + return _mm512_shuffle_i32x4(a, b, LO_IMM8); +} + +// 0b11011101, or lanes a1/a3/b1/b3 in little-endian order +#define HI_IMM8 0xdd + +INLINE __m512i unpack_hi_128(__m512i a, __m512i b) { + return _mm512_shuffle_i32x4(a, b, HI_IMM8); +} + +INLINE void transpose_vecs_512(__m512i vecs[16]) { + // Interleave 32-bit lanes. The _0 unpack is lanes + // 0/0/1/1/4/4/5/5/8/8/9/9/12/12/13/13, and the _2 unpack is lanes + // 2/2/3/3/6/6/7/7/10/10/11/11/14/14/15/15. + __m512i ab_0 = _mm512_unpacklo_epi32(vecs[0], vecs[1]); + __m512i ab_2 = _mm512_unpackhi_epi32(vecs[0], vecs[1]); + __m512i cd_0 = _mm512_unpacklo_epi32(vecs[2], vecs[3]); + __m512i cd_2 = _mm512_unpackhi_epi32(vecs[2], vecs[3]); + __m512i ef_0 = _mm512_unpacklo_epi32(vecs[4], vecs[5]); + __m512i ef_2 = _mm512_unpackhi_epi32(vecs[4], vecs[5]); + __m512i gh_0 = _mm512_unpacklo_epi32(vecs[6], vecs[7]); + __m512i gh_2 = _mm512_unpackhi_epi32(vecs[6], vecs[7]); + __m512i ij_0 = _mm512_unpacklo_epi32(vecs[8], vecs[9]); + __m512i ij_2 = _mm512_unpackhi_epi32(vecs[8], vecs[9]); + __m512i kl_0 = _mm512_unpacklo_epi32(vecs[10], vecs[11]); + __m512i kl_2 = _mm512_unpackhi_epi32(vecs[10], vecs[11]); + __m512i mn_0 = _mm512_unpacklo_epi32(vecs[12], vecs[13]); + __m512i mn_2 = _mm512_unpackhi_epi32(vecs[12], vecs[13]); + __m512i op_0 = _mm512_unpacklo_epi32(vecs[14], vecs[15]); + __m512i op_2 = _mm512_unpackhi_epi32(vecs[14], vecs[15]); + + // Interleave 64-bit lates. The _0 unpack is lanes + // 0/0/0/0/4/4/4/4/8/8/8/8/12/12/12/12, the _1 unpack is lanes + // 1/1/1/1/5/5/5/5/9/9/9/9/13/13/13/13, the _2 unpack is lanes + // 2/2/2/2/6/6/6/6/10/10/10/10/14/14/14/14, and the _3 unpack is lanes + // 3/3/3/3/7/7/7/7/11/11/11/11/15/15/15/15. + __m512i abcd_0 = _mm512_unpacklo_epi64(ab_0, cd_0); + __m512i abcd_1 = _mm512_unpackhi_epi64(ab_0, cd_0); + __m512i abcd_2 = _mm512_unpacklo_epi64(ab_2, cd_2); + __m512i abcd_3 = _mm512_unpackhi_epi64(ab_2, cd_2); + __m512i efgh_0 = _mm512_unpacklo_epi64(ef_0, gh_0); + __m512i efgh_1 = _mm512_unpackhi_epi64(ef_0, gh_0); + __m512i efgh_2 = _mm512_unpacklo_epi64(ef_2, gh_2); + __m512i efgh_3 = _mm512_unpackhi_epi64(ef_2, gh_2); + __m512i ijkl_0 = _mm512_unpacklo_epi64(ij_0, kl_0); + __m512i ijkl_1 = _mm512_unpackhi_epi64(ij_0, kl_0); + __m512i ijkl_2 = _mm512_unpacklo_epi64(ij_2, kl_2); + __m512i ijkl_3 = _mm512_unpackhi_epi64(ij_2, kl_2); + __m512i mnop_0 = _mm512_unpacklo_epi64(mn_0, op_0); + __m512i mnop_1 = _mm512_unpackhi_epi64(mn_0, op_0); + __m512i mnop_2 = _mm512_unpacklo_epi64(mn_2, op_2); + __m512i mnop_3 = _mm512_unpackhi_epi64(mn_2, op_2); + + // Interleave 128-bit lanes. The _0 unpack is + // 0/0/0/0/8/8/8/8/0/0/0/0/8/8/8/8, the _1 unpack is + // 1/1/1/1/9/9/9/9/1/1/1/1/9/9/9/9, and so on. + __m512i abcdefgh_0 = unpack_lo_128(abcd_0, efgh_0); + __m512i abcdefgh_1 = unpack_lo_128(abcd_1, efgh_1); + __m512i abcdefgh_2 = unpack_lo_128(abcd_2, efgh_2); + __m512i abcdefgh_3 = unpack_lo_128(abcd_3, efgh_3); + __m512i abcdefgh_4 = unpack_hi_128(abcd_0, efgh_0); + __m512i abcdefgh_5 = unpack_hi_128(abcd_1, efgh_1); + __m512i abcdefgh_6 = unpack_hi_128(abcd_2, efgh_2); + __m512i abcdefgh_7 = unpack_hi_128(abcd_3, efgh_3); + __m512i ijklmnop_0 = unpack_lo_128(ijkl_0, mnop_0); + __m512i ijklmnop_1 = unpack_lo_128(ijkl_1, mnop_1); + __m512i ijklmnop_2 = unpack_lo_128(ijkl_2, mnop_2); + __m512i ijklmnop_3 = unpack_lo_128(ijkl_3, mnop_3); + __m512i ijklmnop_4 = unpack_hi_128(ijkl_0, mnop_0); + __m512i ijklmnop_5 = unpack_hi_128(ijkl_1, mnop_1); + __m512i ijklmnop_6 = unpack_hi_128(ijkl_2, mnop_2); + __m512i ijklmnop_7 = unpack_hi_128(ijkl_3, mnop_3); + + // Interleave 128-bit lanes again for the final outputs. + vecs[0] = unpack_lo_128(abcdefgh_0, ijklmnop_0); + vecs[1] = unpack_lo_128(abcdefgh_1, ijklmnop_1); + vecs[2] = unpack_lo_128(abcdefgh_2, ijklmnop_2); + vecs[3] = unpack_lo_128(abcdefgh_3, ijklmnop_3); + vecs[4] = unpack_lo_128(abcdefgh_4, ijklmnop_4); + vecs[5] = unpack_lo_128(abcdefgh_5, ijklmnop_5); + vecs[6] = unpack_lo_128(abcdefgh_6, ijklmnop_6); + vecs[7] = unpack_lo_128(abcdefgh_7, ijklmnop_7); + vecs[8] = unpack_hi_128(abcdefgh_0, ijklmnop_0); + vecs[9] = unpack_hi_128(abcdefgh_1, ijklmnop_1); + vecs[10] = unpack_hi_128(abcdefgh_2, ijklmnop_2); + vecs[11] = unpack_hi_128(abcdefgh_3, ijklmnop_3); + vecs[12] = unpack_hi_128(abcdefgh_4, ijklmnop_4); + vecs[13] = unpack_hi_128(abcdefgh_5, ijklmnop_5); + vecs[14] = unpack_hi_128(abcdefgh_6, ijklmnop_6); + vecs[15] = unpack_hi_128(abcdefgh_7, ijklmnop_7); +} + +INLINE void transpose_msg_vecs16(const uint8_t *const *inputs, + size_t block_offset, __m512i out[16]) { + out[0] = loadu_512(&inputs[0][block_offset]); + out[1] = loadu_512(&inputs[1][block_offset]); + out[2] = loadu_512(&inputs[2][block_offset]); + out[3] = loadu_512(&inputs[3][block_offset]); + out[4] = loadu_512(&inputs[4][block_offset]); + out[5] = loadu_512(&inputs[5][block_offset]); + out[6] = loadu_512(&inputs[6][block_offset]); + out[7] = loadu_512(&inputs[7][block_offset]); + out[8] = loadu_512(&inputs[8][block_offset]); + out[9] = loadu_512(&inputs[9][block_offset]); + out[10] = loadu_512(&inputs[10][block_offset]); + out[11] = loadu_512(&inputs[11][block_offset]); + out[12] = loadu_512(&inputs[12][block_offset]); + out[13] = loadu_512(&inputs[13][block_offset]); + out[14] = loadu_512(&inputs[14][block_offset]); + out[15] = loadu_512(&inputs[15][block_offset]); + for (size_t i = 0; i < 16; ++i) { + _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0); + } + transpose_vecs_512(out); +} + +INLINE void load_counters16(uint64_t counter, bool increment_counter, + __m512i *out_lo, __m512i *out_hi) { + const __m512i mask = _mm512_set1_epi32(-(int32_t)increment_counter); + const __m512i add0 = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + const __m512i add1 = _mm512_and_si512(mask, add0); + __m512i l = _mm512_add_epi32(_mm512_set1_epi32((int32_t)counter), add1); + __mmask16 carry = _mm512_cmp_epu32_mask(l, add1, _MM_CMPINT_LT); + __m512i h = _mm512_mask_add_epi32(_mm512_set1_epi32((int32_t)(counter >> 32)), carry, _mm512_set1_epi32((int32_t)(counter >> 32)), _mm512_set1_epi32(1)); + *out_lo = l; + *out_hi = h; +} + +static +void blake3_hash16_avx512(const uint8_t *const *inputs, size_t blocks, + const uint32_t key[8], uint64_t counter, + bool increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, + uint8_t *out) { + __m512i h_vecs[8] = { + set1_512(key[0]), set1_512(key[1]), set1_512(key[2]), set1_512(key[3]), + set1_512(key[4]), set1_512(key[5]), set1_512(key[6]), set1_512(key[7]), + }; + __m512i counter_low_vec, counter_high_vec; + load_counters16(counter, increment_counter, &counter_low_vec, + &counter_high_vec); + uint8_t block_flags = flags | flags_start; + + for (size_t block = 0; block < blocks; block++) { + if (block + 1 == blocks) { + block_flags |= flags_end; + } + __m512i block_len_vec = set1_512(BLAKE3_BLOCK_LEN); + __m512i block_flags_vec = set1_512(block_flags); + __m512i msg_vecs[16]; + transpose_msg_vecs16(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs); + + __m512i v[16] = { + h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3], + h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7], + set1_512(IV[0]), set1_512(IV[1]), set1_512(IV[2]), set1_512(IV[3]), + counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec, + }; + round_fn16(v, msg_vecs, 0); + round_fn16(v, msg_vecs, 1); + round_fn16(v, msg_vecs, 2); + round_fn16(v, msg_vecs, 3); + round_fn16(v, msg_vecs, 4); + round_fn16(v, msg_vecs, 5); + round_fn16(v, msg_vecs, 6); + h_vecs[0] = xor_512(v[0], v[8]); + h_vecs[1] = xor_512(v[1], v[9]); + h_vecs[2] = xor_512(v[2], v[10]); + h_vecs[3] = xor_512(v[3], v[11]); + h_vecs[4] = xor_512(v[4], v[12]); + h_vecs[5] = xor_512(v[5], v[13]); + h_vecs[6] = xor_512(v[6], v[14]); + h_vecs[7] = xor_512(v[7], v[15]); + + block_flags = flags; + } + + // transpose_vecs_512 operates on a 16x16 matrix of words, but we only have 8 + // state vectors. Pad the matrix with zeros. After transposition, store the + // lower half of each vector. + __m512i padded[16] = { + h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3], + h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7], + set1_512(0), set1_512(0), set1_512(0), set1_512(0), + set1_512(0), set1_512(0), set1_512(0), set1_512(0), + }; + transpose_vecs_512(padded); + _mm256_mask_storeu_epi32(&out[0 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[0])); + _mm256_mask_storeu_epi32(&out[1 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[1])); + _mm256_mask_storeu_epi32(&out[2 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[2])); + _mm256_mask_storeu_epi32(&out[3 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[3])); + _mm256_mask_storeu_epi32(&out[4 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[4])); + _mm256_mask_storeu_epi32(&out[5 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[5])); + _mm256_mask_storeu_epi32(&out[6 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[6])); + _mm256_mask_storeu_epi32(&out[7 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[7])); + _mm256_mask_storeu_epi32(&out[8 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[8])); + _mm256_mask_storeu_epi32(&out[9 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[9])); + _mm256_mask_storeu_epi32(&out[10 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[10])); + _mm256_mask_storeu_epi32(&out[11 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[11])); + _mm256_mask_storeu_epi32(&out[12 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[12])); + _mm256_mask_storeu_epi32(&out[13 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[13])); + _mm256_mask_storeu_epi32(&out[14 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[14])); + _mm256_mask_storeu_epi32(&out[15 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[15])); +} + +/* + * ---------------------------------------------------------------------------- + * hash_many_avx512 + * ---------------------------------------------------------------------------- + */ + +INLINE void hash_one_avx512(const uint8_t *input, size_t blocks, + const uint32_t key[8], uint64_t counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) { + uint32_t cv[8]; + memcpy(cv, key, BLAKE3_KEY_LEN); + uint8_t block_flags = flags | flags_start; + while (blocks > 0) { + if (blocks == 1) { + block_flags |= flags_end; + } + blake3_compress_in_place_avx512(cv, input, BLAKE3_BLOCK_LEN, counter, + block_flags); + input = &input[BLAKE3_BLOCK_LEN]; + blocks -= 1; + block_flags = flags; + } + memcpy(out, cv, BLAKE3_OUT_LEN); +} + +void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out) { + while (num_inputs >= 16) { + blake3_hash16_avx512(inputs, blocks, key, counter, increment_counter, flags, + flags_start, flags_end, out); + if (increment_counter) { + counter += 16; + } + inputs += 16; + num_inputs -= 16; + out = &out[16 * BLAKE3_OUT_LEN]; + } + while (num_inputs >= 8) { + blake3_hash8_avx512(inputs, blocks, key, counter, increment_counter, flags, + flags_start, flags_end, out); + if (increment_counter) { + counter += 8; + } + inputs += 8; + num_inputs -= 8; + out = &out[8 * BLAKE3_OUT_LEN]; + } + while (num_inputs >= 4) { + blake3_hash4_avx512(inputs, blocks, key, counter, increment_counter, flags, + flags_start, flags_end, out); + if (increment_counter) { + counter += 4; + } + inputs += 4; + num_inputs -= 4; + out = &out[4 * BLAKE3_OUT_LEN]; + } + while (num_inputs > 0) { + hash_one_avx512(inputs[0], blocks, key, counter, flags, flags_start, + flags_end, out); + if (increment_counter) { + counter += 1; + } + inputs += 1; + num_inputs -= 1; + out = &out[BLAKE3_OUT_LEN]; + } +} diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S new file mode 100644 index 000000000000..3afc0e2250e2 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S @@ -0,0 +1,2601 @@ +#if defined(__x86_64__) + +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +#endif + +#if defined(__ELF__) && defined(__CET__) && defined(__has_include) +#if __has_include() +#include +#endif +#endif + +#if !defined(_CET_ENDBR) +#define _CET_ENDBR +#endif + +#ifdef __APPLE__ +#define HIDDEN .private_extern +#else +#define HIDDEN .hidden +#endif + +.intel_syntax noprefix +HIDDEN _blake3_hash_many_avx512 +HIDDEN blake3_hash_many_avx512 +HIDDEN blake3_compress_in_place_avx512 +HIDDEN _blake3_compress_in_place_avx512 +HIDDEN blake3_compress_xof_avx512 +HIDDEN _blake3_compress_xof_avx512 +.global _blake3_hash_many_avx512 +.global blake3_hash_many_avx512 +.global blake3_compress_in_place_avx512 +.global _blake3_compress_in_place_avx512 +.global blake3_compress_xof_avx512 +.global _blake3_compress_xof_avx512 + +#ifdef __APPLE__ +.text +#else +.section .text +#endif +.p2align 6 +_blake3_hash_many_avx512: +blake3_hash_many_avx512: + _CET_ENDBR + push r15 + push r14 + push r13 + push r12 + push rbx + push rbp + mov rbp, rsp + sub rsp, 144 + and rsp, 0xFFFFFFFFFFFFFFC0 + neg r9 + kmovw k1, r9d + vmovd xmm0, r8d + vpbroadcastd ymm0, xmm0 + shr r8, 32 + vmovd xmm1, r8d + vpbroadcastd ymm1, xmm1 + vmovdqa ymm4, ymm1 + vmovdqa ymm5, ymm1 + vpaddd ymm2, ymm0, ymmword ptr [ADD0+rip] + vpaddd ymm3, ymm0, ymmword ptr [ADD0+32+rip] + vpcmpltud k2, ymm2, ymm0 + vpcmpltud k3, ymm3, ymm0 + vpaddd ymm4 {k2}, ymm4, dword ptr [ADD1+rip] {1to8} + vpaddd ymm5 {k3}, ymm5, dword ptr [ADD1+rip] {1to8} + knotw k2, k1 + vmovdqa32 ymm2 {k2}, ymm0 + vmovdqa32 ymm3 {k2}, ymm0 + vmovdqa32 ymm4 {k2}, ymm1 + vmovdqa32 ymm5 {k2}, ymm1 + vmovdqa ymmword ptr [rsp], ymm2 + vmovdqa ymmword ptr [rsp+0x1*0x20], ymm3 + vmovdqa ymmword ptr [rsp+0x2*0x20], ymm4 + vmovdqa ymmword ptr [rsp+0x3*0x20], ymm5 + shl rdx, 6 + mov qword ptr [rsp+0x80], rdx + cmp rsi, 16 + jc 3f +2: + vpbroadcastd zmm0, dword ptr [rcx] + vpbroadcastd zmm1, dword ptr [rcx+0x1*0x4] + vpbroadcastd zmm2, dword ptr [rcx+0x2*0x4] + vpbroadcastd zmm3, dword ptr [rcx+0x3*0x4] + vpbroadcastd zmm4, dword ptr [rcx+0x4*0x4] + vpbroadcastd zmm5, dword ptr [rcx+0x5*0x4] + vpbroadcastd zmm6, dword ptr [rcx+0x6*0x4] + vpbroadcastd zmm7, dword ptr [rcx+0x7*0x4] + movzx eax, byte ptr [rbp+0x38] + movzx ebx, byte ptr [rbp+0x40] + or eax, ebx + xor edx, edx +.p2align 5 +9: + movzx ebx, byte ptr [rbp+0x48] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+0x80] + cmove eax, ebx + mov dword ptr [rsp+0x88], eax + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov r12, qword ptr [rdi+0x40] + mov r13, qword ptr [rdi+0x48] + mov r14, qword ptr [rdi+0x50] + mov r15, qword ptr [rdi+0x58] + vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20] + vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01 + vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20] + vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01 + vpunpcklqdq zmm8, zmm16, zmm17 + vpunpckhqdq zmm9, zmm16, zmm17 + vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20] + vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01 + vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20] + vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01 + vpunpcklqdq zmm10, zmm18, zmm19 + vpunpckhqdq zmm11, zmm18, zmm19 + mov r8, qword ptr [rdi+0x20] + mov r9, qword ptr [rdi+0x28] + mov r10, qword ptr [rdi+0x30] + mov r11, qword ptr [rdi+0x38] + mov r12, qword ptr [rdi+0x60] + mov r13, qword ptr [rdi+0x68] + mov r14, qword ptr [rdi+0x70] + mov r15, qword ptr [rdi+0x78] + vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20] + vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01 + vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20] + vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01 + vpunpcklqdq zmm12, zmm16, zmm17 + vpunpckhqdq zmm13, zmm16, zmm17 + vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20] + vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01 + vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20] + vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01 + vpunpcklqdq zmm14, zmm18, zmm19 + vpunpckhqdq zmm15, zmm18, zmm19 + vmovdqa32 zmm27, zmmword ptr [INDEX0+rip] + vmovdqa32 zmm31, zmmword ptr [INDEX1+rip] + vshufps zmm16, zmm8, zmm10, 136 + vshufps zmm17, zmm12, zmm14, 136 + vmovdqa32 zmm20, zmm16 + vpermt2d zmm16, zmm27, zmm17 + vpermt2d zmm20, zmm31, zmm17 + vshufps zmm17, zmm8, zmm10, 221 + vshufps zmm30, zmm12, zmm14, 221 + vmovdqa32 zmm21, zmm17 + vpermt2d zmm17, zmm27, zmm30 + vpermt2d zmm21, zmm31, zmm30 + vshufps zmm18, zmm9, zmm11, 136 + vshufps zmm8, zmm13, zmm15, 136 + vmovdqa32 zmm22, zmm18 + vpermt2d zmm18, zmm27, zmm8 + vpermt2d zmm22, zmm31, zmm8 + vshufps zmm19, zmm9, zmm11, 221 + vshufps zmm8, zmm13, zmm15, 221 + vmovdqa32 zmm23, zmm19 + vpermt2d zmm19, zmm27, zmm8 + vpermt2d zmm23, zmm31, zmm8 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov r12, qword ptr [rdi+0x40] + mov r13, qword ptr [rdi+0x48] + mov r14, qword ptr [rdi+0x50] + mov r15, qword ptr [rdi+0x58] + vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20] + vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01 + vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20] + vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01 + vpunpcklqdq zmm8, zmm24, zmm25 + vpunpckhqdq zmm9, zmm24, zmm25 + vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20] + vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01 + vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20] + vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01 + vpunpcklqdq zmm10, zmm24, zmm25 + vpunpckhqdq zmm11, zmm24, zmm25 + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r12+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r13+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r14+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + prefetcht0 [r15+rdx+0x80] + mov r8, qword ptr [rdi+0x20] + mov r9, qword ptr [rdi+0x28] + mov r10, qword ptr [rdi+0x30] + mov r11, qword ptr [rdi+0x38] + mov r12, qword ptr [rdi+0x60] + mov r13, qword ptr [rdi+0x68] + mov r14, qword ptr [rdi+0x70] + mov r15, qword ptr [rdi+0x78] + vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20] + vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01 + vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20] + vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01 + vpunpcklqdq zmm12, zmm24, zmm25 + vpunpckhqdq zmm13, zmm24, zmm25 + vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20] + vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01 + vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20] + vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01 + vpunpcklqdq zmm14, zmm24, zmm25 + vpunpckhqdq zmm15, zmm24, zmm25 + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r12+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r13+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r14+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + prefetcht0 [r15+rdx+0x80] + vshufps zmm24, zmm8, zmm10, 136 + vshufps zmm30, zmm12, zmm14, 136 + vmovdqa32 zmm28, zmm24 + vpermt2d zmm24, zmm27, zmm30 + vpermt2d zmm28, zmm31, zmm30 + vshufps zmm25, zmm8, zmm10, 221 + vshufps zmm30, zmm12, zmm14, 221 + vmovdqa32 zmm29, zmm25 + vpermt2d zmm25, zmm27, zmm30 + vpermt2d zmm29, zmm31, zmm30 + vshufps zmm26, zmm9, zmm11, 136 + vshufps zmm8, zmm13, zmm15, 136 + vmovdqa32 zmm30, zmm26 + vpermt2d zmm26, zmm27, zmm8 + vpermt2d zmm30, zmm31, zmm8 + vshufps zmm8, zmm9, zmm11, 221 + vshufps zmm10, zmm13, zmm15, 221 + vpermi2d zmm27, zmm8, zmm10 + vpermi2d zmm31, zmm8, zmm10 + vpbroadcastd zmm8, dword ptr [BLAKE3_IV_0+rip] + vpbroadcastd zmm9, dword ptr [BLAKE3_IV_1+rip] + vpbroadcastd zmm10, dword ptr [BLAKE3_IV_2+rip] + vpbroadcastd zmm11, dword ptr [BLAKE3_IV_3+rip] + vmovdqa32 zmm12, zmmword ptr [rsp] + vmovdqa32 zmm13, zmmword ptr [rsp+0x1*0x40] + vpbroadcastd zmm14, dword ptr [BLAKE3_BLOCK_LEN+rip] + vpbroadcastd zmm15, dword ptr [rsp+0x22*0x4] + vpaddd zmm0, zmm0, zmm16 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm20 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm17 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm21 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm24 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm28 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm25 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm29 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm18 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm23 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm22 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm16 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm17 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm25 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm27 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm30 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm19 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm29 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm20 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm18 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm22 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm27 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm21 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm31 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm26 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm30 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm23 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm19 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm20 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm21 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm16 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm24 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm28 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm31 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm29 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm26 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm23 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm16 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm18 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm17 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm25 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm24 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm30 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm28 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm29 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm18 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm19 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm22 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm27 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm17 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm31 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm25 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm30 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm19 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm26 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm20 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpxord zmm0, zmm0, zmm8 + vpxord zmm1, zmm1, zmm9 + vpxord zmm2, zmm2, zmm10 + vpxord zmm3, zmm3, zmm11 + vpxord zmm4, zmm4, zmm12 + vpxord zmm5, zmm5, zmm13 + vpxord zmm6, zmm6, zmm14 + vpxord zmm7, zmm7, zmm15 + movzx eax, byte ptr [rbp+0x38] + jne 9b + mov rbx, qword ptr [rbp+0x50] + vpunpckldq zmm16, zmm0, zmm1 + vpunpckhdq zmm17, zmm0, zmm1 + vpunpckldq zmm18, zmm2, zmm3 + vpunpckhdq zmm19, zmm2, zmm3 + vpunpckldq zmm20, zmm4, zmm5 + vpunpckhdq zmm21, zmm4, zmm5 + vpunpckldq zmm22, zmm6, zmm7 + vpunpckhdq zmm23, zmm6, zmm7 + vpunpcklqdq zmm0, zmm16, zmm18 + vpunpckhqdq zmm1, zmm16, zmm18 + vpunpcklqdq zmm2, zmm17, zmm19 + vpunpckhqdq zmm3, zmm17, zmm19 + vpunpcklqdq zmm4, zmm20, zmm22 + vpunpckhqdq zmm5, zmm20, zmm22 + vpunpcklqdq zmm6, zmm21, zmm23 + vpunpckhqdq zmm7, zmm21, zmm23 + vshufi32x4 zmm16, zmm0, zmm4, 0x88 + vshufi32x4 zmm17, zmm1, zmm5, 0x88 + vshufi32x4 zmm18, zmm2, zmm6, 0x88 + vshufi32x4 zmm19, zmm3, zmm7, 0x88 + vshufi32x4 zmm20, zmm0, zmm4, 0xDD + vshufi32x4 zmm21, zmm1, zmm5, 0xDD + vshufi32x4 zmm22, zmm2, zmm6, 0xDD + vshufi32x4 zmm23, zmm3, zmm7, 0xDD + vshufi32x4 zmm0, zmm16, zmm17, 0x88 + vshufi32x4 zmm1, zmm18, zmm19, 0x88 + vshufi32x4 zmm2, zmm20, zmm21, 0x88 + vshufi32x4 zmm3, zmm22, zmm23, 0x88 + vshufi32x4 zmm4, zmm16, zmm17, 0xDD + vshufi32x4 zmm5, zmm18, zmm19, 0xDD + vshufi32x4 zmm6, zmm20, zmm21, 0xDD + vshufi32x4 zmm7, zmm22, zmm23, 0xDD + vmovdqu32 zmmword ptr [rbx], zmm0 + vmovdqu32 zmmword ptr [rbx+0x1*0x40], zmm1 + vmovdqu32 zmmword ptr [rbx+0x2*0x40], zmm2 + vmovdqu32 zmmword ptr [rbx+0x3*0x40], zmm3 + vmovdqu32 zmmword ptr [rbx+0x4*0x40], zmm4 + vmovdqu32 zmmword ptr [rbx+0x5*0x40], zmm5 + vmovdqu32 zmmword ptr [rbx+0x6*0x40], zmm6 + vmovdqu32 zmmword ptr [rbx+0x7*0x40], zmm7 + vmovdqa32 zmm0, zmmword ptr [rsp] + vmovdqa32 zmm1, zmmword ptr [rsp+0x1*0x40] + vmovdqa32 zmm2, zmm0 + vpaddd zmm2{k1}, zmm0, dword ptr [ADD16+rip] {1to16} + vpcmpltud k2, zmm2, zmm0 + vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1+rip] {1to16} + vmovdqa32 zmmword ptr [rsp], zmm2 + vmovdqa32 zmmword ptr [rsp+0x1*0x40], zmm1 + add rdi, 128 + add rbx, 512 + mov qword ptr [rbp+0x50], rbx + sub rsi, 16 + cmp rsi, 16 + jnc 2b + test rsi, rsi + jnz 3f +4: + vzeroupper + mov rsp, rbp + pop rbp + pop rbx + pop r12 + pop r13 + pop r14 + pop r15 + ret +.p2align 6 +3: + test esi, 0x8 + je 3f + vpbroadcastd ymm0, dword ptr [rcx] + vpbroadcastd ymm1, dword ptr [rcx+0x4] + vpbroadcastd ymm2, dword ptr [rcx+0x8] + vpbroadcastd ymm3, dword ptr [rcx+0xC] + vpbroadcastd ymm4, dword ptr [rcx+0x10] + vpbroadcastd ymm5, dword ptr [rcx+0x14] + vpbroadcastd ymm6, dword ptr [rcx+0x18] + vpbroadcastd ymm7, dword ptr [rcx+0x1C] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov r12, qword ptr [rdi+0x20] + mov r13, qword ptr [rdi+0x28] + mov r14, qword ptr [rdi+0x30] + mov r15, qword ptr [rdi+0x38] + movzx eax, byte ptr [rbp+0x38] + movzx ebx, byte ptr [rbp+0x40] + or eax, ebx + xor edx, edx +2: + movzx ebx, byte ptr [rbp+0x48] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+0x80] + cmove eax, ebx + mov dword ptr [rsp+0x88], eax + vmovups xmm8, xmmword ptr [r8+rdx-0x40] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x40] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x40] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x40] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm16, ymm12, ymm14, 136 + vshufps ymm17, ymm12, ymm14, 221 + vshufps ymm18, ymm13, ymm15, 136 + vshufps ymm19, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x30] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x30] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x30] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x30] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm20, ymm12, ymm14, 136 + vshufps ymm21, ymm12, ymm14, 221 + vshufps ymm22, ymm13, ymm15, 136 + vshufps ymm23, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x20] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x20] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x20] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x20] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm24, ymm12, ymm14, 136 + vshufps ymm25, ymm12, ymm14, 221 + vshufps ymm26, ymm13, ymm15, 136 + vshufps ymm27, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x10] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x10] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x10] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x10] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm28, ymm12, ymm14, 136 + vshufps ymm29, ymm12, ymm14, 221 + vshufps ymm30, ymm13, ymm15, 136 + vshufps ymm31, ymm13, ymm15, 221 + vpbroadcastd ymm8, dword ptr [BLAKE3_IV_0+rip] + vpbroadcastd ymm9, dword ptr [BLAKE3_IV_1+rip] + vpbroadcastd ymm10, dword ptr [BLAKE3_IV_2+rip] + vpbroadcastd ymm11, dword ptr [BLAKE3_IV_3+rip] + vmovdqa ymm12, ymmword ptr [rsp] + vmovdqa ymm13, ymmword ptr [rsp+0x40] + vpbroadcastd ymm14, dword ptr [BLAKE3_BLOCK_LEN+rip] + vpbroadcastd ymm15, dword ptr [rsp+0x88] + vpaddd ymm0, ymm0, ymm16 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm20 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm17 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm21 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm24 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm28 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm25 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm29 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm18 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm23 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm22 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm16 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm17 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm25 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm27 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm30 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm19 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm29 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm20 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm18 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm22 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm27 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm21 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm31 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm26 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm30 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm23 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm19 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm20 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm21 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm16 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm24 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm28 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm31 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm29 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm26 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm23 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm16 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm18 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm17 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm25 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm24 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm30 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm28 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm29 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm18 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm19 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm22 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm27 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm17 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm31 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm25 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm30 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm19 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm26 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm20 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpxor ymm0, ymm0, ymm8 + vpxor ymm1, ymm1, ymm9 + vpxor ymm2, ymm2, ymm10 + vpxor ymm3, ymm3, ymm11 + vpxor ymm4, ymm4, ymm12 + vpxor ymm5, ymm5, ymm13 + vpxor ymm6, ymm6, ymm14 + vpxor ymm7, ymm7, ymm15 + movzx eax, byte ptr [rbp+0x38] + jne 2b + mov rbx, qword ptr [rbp+0x50] + vunpcklps ymm8, ymm0, ymm1 + vunpcklps ymm9, ymm2, ymm3 + vunpckhps ymm10, ymm0, ymm1 + vunpcklps ymm11, ymm4, ymm5 + vunpcklps ymm0, ymm6, ymm7 + vshufps ymm12, ymm8, ymm9, 78 + vblendps ymm1, ymm8, ymm12, 0xCC + vshufps ymm8, ymm11, ymm0, 78 + vunpckhps ymm13, ymm2, ymm3 + vblendps ymm2, ymm11, ymm8, 0xCC + vblendps ymm3, ymm12, ymm9, 0xCC + vperm2f128 ymm12, ymm1, ymm2, 0x20 + vmovups ymmword ptr [rbx], ymm12 + vunpckhps ymm14, ymm4, ymm5 + vblendps ymm4, ymm8, ymm0, 0xCC + vunpckhps ymm15, ymm6, ymm7 + vperm2f128 ymm7, ymm3, ymm4, 0x20 + vmovups ymmword ptr [rbx+0x20], ymm7 + vshufps ymm5, ymm10, ymm13, 78 + vblendps ymm6, ymm5, ymm13, 0xCC + vshufps ymm13, ymm14, ymm15, 78 + vblendps ymm10, ymm10, ymm5, 0xCC + vblendps ymm14, ymm14, ymm13, 0xCC + vperm2f128 ymm8, ymm10, ymm14, 0x20 + vmovups ymmword ptr [rbx+0x40], ymm8 + vblendps ymm15, ymm13, ymm15, 0xCC + vperm2f128 ymm13, ymm6, ymm15, 0x20 + vmovups ymmword ptr [rbx+0x60], ymm13 + vperm2f128 ymm9, ymm1, ymm2, 0x31 + vperm2f128 ymm11, ymm3, ymm4, 0x31 + vmovups ymmword ptr [rbx+0x80], ymm9 + vperm2f128 ymm14, ymm10, ymm14, 0x31 + vperm2f128 ymm15, ymm6, ymm15, 0x31 + vmovups ymmword ptr [rbx+0xA0], ymm11 + vmovups ymmword ptr [rbx+0xC0], ymm14 + vmovups ymmword ptr [rbx+0xE0], ymm15 + vmovdqa ymm0, ymmword ptr [rsp] + vmovdqa ymm2, ymmword ptr [rsp+0x2*0x20] + vmovdqa32 ymm0 {k1}, ymmword ptr [rsp+0x1*0x20] + vmovdqa32 ymm2 {k1}, ymmword ptr [rsp+0x3*0x20] + vmovdqa ymmword ptr [rsp], ymm0 + vmovdqa ymmword ptr [rsp+0x2*0x20], ymm2 + add rbx, 256 + mov qword ptr [rbp+0x50], rbx + add rdi, 64 + sub rsi, 8 +3: + mov rbx, qword ptr [rbp+0x50] + mov r15, qword ptr [rsp+0x80] + movzx r13, byte ptr [rbp+0x38] + movzx r12, byte ptr [rbp+0x48] + test esi, 0x4 + je 3f + vbroadcasti32x4 zmm0, xmmword ptr [rcx] + vbroadcasti32x4 zmm1, xmmword ptr [rcx+0x1*0x10] + vmovdqa xmm12, xmmword ptr [rsp] + vmovdqa xmm13, xmmword ptr [rsp+0x4*0x10] + vpunpckldq xmm14, xmm12, xmm13 + vpunpckhdq xmm15, xmm12, xmm13 + vpermq ymm14, ymm14, 0xDC + vpermq ymm15, ymm15, 0xDC + vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip] + vinserti64x4 zmm13, zmm14, ymm15, 0x01 + mov eax, 17476 + kmovw k2, eax + vpblendmd zmm13 {k2}, zmm13, zmm12 + vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV+rip] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov eax, 43690 + kmovw k3, eax + mov eax, 34952 + kmovw k4, eax + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+0x88], eax + vmovdqa32 zmm2, zmm15 + vpbroadcastd zmm8, dword ptr [rsp+0x22*0x4] + vpblendmd zmm3 {k4}, zmm13, zmm8 + vmovups zmm8, zmmword ptr [r8+rdx-0x1*0x40] + vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x4*0x10], 0x01 + vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x4*0x10], 0x02 + vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x4*0x10], 0x03 + vmovups zmm9, zmmword ptr [r8+rdx-0x30] + vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x3*0x10], 0x01 + vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x3*0x10], 0x02 + vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x3*0x10], 0x03 + vshufps zmm4, zmm8, zmm9, 136 + vshufps zmm5, zmm8, zmm9, 221 + vmovups zmm8, zmmword ptr [r8+rdx-0x20] + vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x2*0x10], 0x01 + vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x2*0x10], 0x02 + vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x2*0x10], 0x03 + vmovups zmm9, zmmword ptr [r8+rdx-0x10] + vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x1*0x10], 0x01 + vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x1*0x10], 0x02 + vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x1*0x10], 0x03 + vshufps zmm6, zmm8, zmm9, 136 + vshufps zmm7, zmm8, zmm9, 221 + vpshufd zmm6, zmm6, 0x93 + vpshufd zmm7, zmm7, 0x93 + mov al, 7 +9: + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 16 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 12 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 8 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 7 + vpshufd zmm0, zmm0, 0x93 + vpshufd zmm3, zmm3, 0x4E + vpshufd zmm2, zmm2, 0x39 + vpaddd zmm0, zmm0, zmm6 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 16 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 12 + vpaddd zmm0, zmm0, zmm7 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 8 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 7 + vpshufd zmm0, zmm0, 0x39 + vpshufd zmm3, zmm3, 0x4E + vpshufd zmm2, zmm2, 0x93 + dec al + jz 9f + vshufps zmm8, zmm4, zmm5, 214 + vpshufd zmm9, zmm4, 0x0F + vpshufd zmm4, zmm8, 0x39 + vshufps zmm8, zmm6, zmm7, 250 + vpblendmd zmm9 {k3}, zmm9, zmm8 + vpunpcklqdq zmm8, zmm7, zmm5 + vpblendmd zmm8 {k4}, zmm8, zmm6 + vpshufd zmm8, zmm8, 0x78 + vpunpckhdq zmm5, zmm5, zmm7 + vpunpckldq zmm6, zmm6, zmm5 + vpshufd zmm7, zmm6, 0x1E + vmovdqa32 zmm5, zmm9 + vmovdqa32 zmm6, zmm8 + jmp 9b +9: + vpxord zmm0, zmm0, zmm2 + vpxord zmm1, zmm1, zmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01 + vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01 + vextracti32x4 xmmword ptr [rbx+0x4*0x10], zmm0, 0x02 + vextracti32x4 xmmword ptr [rbx+0x5*0x10], zmm1, 0x02 + vextracti32x4 xmmword ptr [rbx+0x6*0x10], zmm0, 0x03 + vextracti32x4 xmmword ptr [rbx+0x7*0x10], zmm1, 0x03 + vmovdqa xmm0, xmmword ptr [rsp] + vmovdqa xmm2, xmmword ptr [rsp+0x40] + vmovdqa32 xmm0 {k1}, xmmword ptr [rsp+0x1*0x10] + vmovdqa32 xmm2 {k1}, xmmword ptr [rsp+0x5*0x10] + vmovdqa xmmword ptr [rsp], xmm0 + vmovdqa xmmword ptr [rsp+0x40], xmm2 + add rbx, 128 + add rdi, 32 + sub rsi, 4 +3: + test esi, 0x2 + je 3f + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+0x10] + vmovd xmm13, dword ptr [rsp] + vpinsrd xmm13, xmm13, dword ptr [rsp+0x40], 1 + vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vmovd xmm14, dword ptr [rsp+0x4] + vpinsrd xmm14, xmm14, dword ptr [rsp+0x44], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vinserti128 ymm13, ymm13, xmm14, 0x01 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+0x88], eax + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip] + vpbroadcastd ymm8, dword ptr [rsp+0x88] + vpblendd ymm3, ymm13, ymm8, 0x88 + vmovups ymm8, ymmword ptr [r8+rdx-0x40] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01 + vmovups ymm9, ymmword ptr [r8+rdx-0x30] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01 + vshufps ymm4, ymm8, ymm9, 136 + vshufps ymm5, ymm8, ymm9, 221 + vmovups ymm8, ymmword ptr [r8+rdx-0x20] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01 + vmovups ymm9, ymmword ptr [r8+rdx-0x10] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01 + vshufps ymm6, ymm8, ymm9, 136 + vshufps ymm7, ymm8, ymm9, 221 + vpshufd ymm6, ymm6, 0x93 + vpshufd ymm7, ymm7, 0x93 + mov al, 7 +9: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 16 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 12 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 8 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 7 + vpshufd ymm0, ymm0, 0x93 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm2, ymm2, 0x39 + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 16 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 12 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 8 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 7 + vpshufd ymm0, ymm0, 0x39 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm2, ymm2, 0x93 + dec al + jz 9f + vshufps ymm8, ymm4, ymm5, 214 + vpshufd ymm9, ymm4, 0x0F + vpshufd ymm4, ymm8, 0x39 + vshufps ymm8, ymm6, ymm7, 250 + vpblendd ymm9, ymm9, ymm8, 0xAA + vpunpcklqdq ymm8, ymm7, ymm5 + vpblendd ymm8, ymm8, ymm6, 0x88 + vpshufd ymm8, ymm8, 0x78 + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 0x1E + vmovdqa ymm5, ymm9 + vmovdqa ymm6, ymm8 + jmp 9b +9: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01 + vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01 + vmovdqa xmm0, xmmword ptr [rsp] + vmovdqa xmm2, xmmword ptr [rsp+0x4*0x10] + vmovdqu32 xmm0 {k1}, xmmword ptr [rsp+0x8] + vmovdqu32 xmm2 {k1}, xmmword ptr [rsp+0x48] + vmovdqa xmmword ptr [rsp], xmm0 + vmovdqa xmmword ptr [rsp+0x4*0x10], xmm2 + add rbx, 64 + add rdi, 16 + sub rsi, 2 +3: + test esi, 0x1 + je 4b + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+0x10] + vmovd xmm14, dword ptr [rsp] + vpinsrd xmm14, xmm14, dword ptr [rsp+0x40], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vmovdqa xmm15, xmmword ptr [BLAKE3_IV+rip] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + vpinsrd xmm3, xmm14, eax, 3 + vmovdqa xmm2, xmm15 + vmovups xmm8, xmmword ptr [r8+rdx-0x40] + vmovups xmm9, xmmword ptr [r8+rdx-0x30] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x20] + vmovups xmm9, xmmword ptr [r8+rdx-0x10] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 0x93 + vpshufd xmm7, xmm7, 0x93 + mov al, 7 +9: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x93 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x39 + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x39 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x93 + dec al + jz 9f + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0x0F + vpshufd xmm4, xmm8, 0x39 + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0xAA + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 0x88 + vpshufd xmm8, xmm8, 0x78 + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 0x1E + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp 9b +9: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + jmp 4b +.p2align 6 +_blake3_compress_in_place_avx512: +blake3_compress_in_place_avx512: + _CET_ENDBR + vmovdqu xmm0, xmmword ptr [rdi] + vmovdqu xmm1, xmmword ptr [rdi+0x10] + movzx eax, r8b + movzx edx, dl + shl rax, 32 + add rdx, rax + vmovq xmm3, rcx + vmovq xmm4, rdx + vpunpcklqdq xmm3, xmm3, xmm4 + vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip] + vmovups xmm8, xmmword ptr [rsi] + vmovups xmm9, xmmword ptr [rsi+0x10] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [rsi+0x20] + vmovups xmm9, xmmword ptr [rsi+0x30] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 0x93 + vpshufd xmm7, xmm7, 0x93 + mov al, 7 +9: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x93 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x39 + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x39 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x93 + dec al + jz 9f + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0x0F + vpshufd xmm4, xmm8, 0x39 + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0xAA + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 0x88 + vpshufd xmm8, xmm8, 0x78 + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 0x1E + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp 9b +9: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + vmovdqu xmmword ptr [rdi], xmm0 + vmovdqu xmmword ptr [rdi+0x10], xmm1 + ret + +.p2align 6 +_blake3_compress_xof_avx512: +blake3_compress_xof_avx512: + _CET_ENDBR + vmovdqu xmm0, xmmword ptr [rdi] + vmovdqu xmm1, xmmword ptr [rdi+0x10] + movzx eax, r8b + movzx edx, dl + shl rax, 32 + add rdx, rax + vmovq xmm3, rcx + vmovq xmm4, rdx + vpunpcklqdq xmm3, xmm3, xmm4 + vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip] + vmovups xmm8, xmmword ptr [rsi] + vmovups xmm9, xmmword ptr [rsi+0x10] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [rsi+0x20] + vmovups xmm9, xmmword ptr [rsi+0x30] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 0x93 + vpshufd xmm7, xmm7, 0x93 + mov al, 7 +9: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x93 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x39 + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x39 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x93 + dec al + jz 9f + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0x0F + vpshufd xmm4, xmm8, 0x39 + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0xAA + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 0x88 + vpshufd xmm8, xmm8, 0x78 + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 0x1E + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp 9b +9: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + vpxor xmm2, xmm2, [rdi] + vpxor xmm3, xmm3, [rdi+0x10] + vmovdqu xmmword ptr [r9], xmm0 + vmovdqu xmmword ptr [r9+0x10], xmm1 + vmovdqu xmmword ptr [r9+0x20], xmm2 + vmovdqu xmmword ptr [r9+0x30], xmm3 + ret + +#ifdef __APPLE__ +.static_data +#else +.section .rodata +#endif +.p2align 6 +INDEX0: + .long 0, 1, 2, 3, 16, 17, 18, 19 + .long 8, 9, 10, 11, 24, 25, 26, 27 +INDEX1: + .long 4, 5, 6, 7, 20, 21, 22, 23 + .long 12, 13, 14, 15, 28, 29, 30, 31 +ADD0: + .long 0, 1, 2, 3, 4, 5, 6, 7 + .long 8, 9, 10, 11, 12, 13, 14, 15 +ADD1: .long 1 + +ADD16: .long 16 +BLAKE3_BLOCK_LEN: + .long 64 +.p2align 6 +BLAKE3_IV: +BLAKE3_IV_0: + .long 0x6A09E667 +BLAKE3_IV_1: + .long 0xBB67AE85 +BLAKE3_IV_2: + .long 0x3C6EF372 +BLAKE3_IV_3: + .long 0xA54FF53A + +#endif diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S new file mode 100644 index 000000000000..e10b9f36cbcc --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S @@ -0,0 +1,2615 @@ +.intel_syntax noprefix + +.global _blake3_hash_many_avx512 +.global blake3_hash_many_avx512 +.global blake3_compress_in_place_avx512 +.global _blake3_compress_in_place_avx512 +.global blake3_compress_xof_avx512 +.global _blake3_compress_xof_avx512 + +.section .text +.p2align 6 +_blake3_hash_many_avx512: +blake3_hash_many_avx512: + push r15 + push r14 + push r13 + push r12 + push rdi + push rsi + push rbx + push rbp + mov rbp, rsp + sub rsp, 304 + and rsp, 0xFFFFFFFFFFFFFFC0 + vmovdqa xmmword ptr [rsp+0x90], xmm6 + vmovdqa xmmword ptr [rsp+0xA0], xmm7 + vmovdqa xmmword ptr [rsp+0xB0], xmm8 + vmovdqa xmmword ptr [rsp+0xC0], xmm9 + vmovdqa xmmword ptr [rsp+0xD0], xmm10 + vmovdqa xmmword ptr [rsp+0xE0], xmm11 + vmovdqa xmmword ptr [rsp+0xF0], xmm12 + vmovdqa xmmword ptr [rsp+0x100], xmm13 + vmovdqa xmmword ptr [rsp+0x110], xmm14 + vmovdqa xmmword ptr [rsp+0x120], xmm15 + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, qword ptr [rbp+0x68] + movzx r9, byte ptr [rbp+0x70] + neg r9 + kmovw k1, r9d + vmovd xmm0, r8d + vpbroadcastd ymm0, xmm0 + shr r8, 32 + vmovd xmm1, r8d + vpbroadcastd ymm1, xmm1 + vmovdqa ymm4, ymm1 + vmovdqa ymm5, ymm1 + vpaddd ymm2, ymm0, ymmword ptr [ADD0+rip] + vpaddd ymm3, ymm0, ymmword ptr [ADD0+32+rip] + vpcmpltud k2, ymm2, ymm0 + vpcmpltud k3, ymm3, ymm0 + vpaddd ymm4 {k2}, ymm4, dword ptr [ADD1+rip] {1to8} + vpaddd ymm5 {k3}, ymm5, dword ptr [ADD1+rip] {1to8} + knotw k2, k1 + vmovdqa32 ymm2 {k2}, ymm0 + vmovdqa32 ymm3 {k2}, ymm0 + vmovdqa32 ymm4 {k2}, ymm1 + vmovdqa32 ymm5 {k2}, ymm1 + vmovdqa ymmword ptr [rsp], ymm2 + vmovdqa ymmword ptr [rsp+0x20], ymm3 + vmovdqa ymmword ptr [rsp+0x40], ymm4 + vmovdqa ymmword ptr [rsp+0x60], ymm5 + shl rdx, 6 + mov qword ptr [rsp+0x80], rdx + cmp rsi, 16 + jc 3f +2: + vpbroadcastd zmm0, dword ptr [rcx] + vpbroadcastd zmm1, dword ptr [rcx+0x1*0x4] + vpbroadcastd zmm2, dword ptr [rcx+0x2*0x4] + vpbroadcastd zmm3, dword ptr [rcx+0x3*0x4] + vpbroadcastd zmm4, dword ptr [rcx+0x4*0x4] + vpbroadcastd zmm5, dword ptr [rcx+0x5*0x4] + vpbroadcastd zmm6, dword ptr [rcx+0x6*0x4] + vpbroadcastd zmm7, dword ptr [rcx+0x7*0x4] + movzx eax, byte ptr [rbp+0x78] + movzx ebx, byte ptr [rbp+0x80] + or eax, ebx + xor edx, edx +.p2align 5 +9: + movzx ebx, byte ptr [rbp+0x88] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+0x80] + cmove eax, ebx + mov dword ptr [rsp+0x88], eax + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov r12, qword ptr [rdi+0x40] + mov r13, qword ptr [rdi+0x48] + mov r14, qword ptr [rdi+0x50] + mov r15, qword ptr [rdi+0x58] + vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20] + vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01 + vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20] + vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01 + vpunpcklqdq zmm8, zmm16, zmm17 + vpunpckhqdq zmm9, zmm16, zmm17 + vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20] + vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01 + vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20] + vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01 + vpunpcklqdq zmm10, zmm18, zmm19 + vpunpckhqdq zmm11, zmm18, zmm19 + mov r8, qword ptr [rdi+0x20] + mov r9, qword ptr [rdi+0x28] + mov r10, qword ptr [rdi+0x30] + mov r11, qword ptr [rdi+0x38] + mov r12, qword ptr [rdi+0x60] + mov r13, qword ptr [rdi+0x68] + mov r14, qword ptr [rdi+0x70] + mov r15, qword ptr [rdi+0x78] + vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20] + vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01 + vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20] + vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01 + vpunpcklqdq zmm12, zmm16, zmm17 + vpunpckhqdq zmm13, zmm16, zmm17 + vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20] + vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01 + vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20] + vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01 + vpunpcklqdq zmm14, zmm18, zmm19 + vpunpckhqdq zmm15, zmm18, zmm19 + vmovdqa32 zmm27, zmmword ptr [INDEX0+rip] + vmovdqa32 zmm31, zmmword ptr [INDEX1+rip] + vshufps zmm16, zmm8, zmm10, 136 + vshufps zmm17, zmm12, zmm14, 136 + vmovdqa32 zmm20, zmm16 + vpermt2d zmm16, zmm27, zmm17 + vpermt2d zmm20, zmm31, zmm17 + vshufps zmm17, zmm8, zmm10, 221 + vshufps zmm30, zmm12, zmm14, 221 + vmovdqa32 zmm21, zmm17 + vpermt2d zmm17, zmm27, zmm30 + vpermt2d zmm21, zmm31, zmm30 + vshufps zmm18, zmm9, zmm11, 136 + vshufps zmm8, zmm13, zmm15, 136 + vmovdqa32 zmm22, zmm18 + vpermt2d zmm18, zmm27, zmm8 + vpermt2d zmm22, zmm31, zmm8 + vshufps zmm19, zmm9, zmm11, 221 + vshufps zmm8, zmm13, zmm15, 221 + vmovdqa32 zmm23, zmm19 + vpermt2d zmm19, zmm27, zmm8 + vpermt2d zmm23, zmm31, zmm8 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov r12, qword ptr [rdi+0x40] + mov r13, qword ptr [rdi+0x48] + mov r14, qword ptr [rdi+0x50] + mov r15, qword ptr [rdi+0x58] + vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20] + vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01 + vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20] + vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01 + vpunpcklqdq zmm8, zmm24, zmm25 + vpunpckhqdq zmm9, zmm24, zmm25 + vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20] + vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01 + vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20] + vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01 + vpunpcklqdq zmm10, zmm24, zmm25 + vpunpckhqdq zmm11, zmm24, zmm25 + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r12+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r13+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r14+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + prefetcht0 [r15+rdx+0x80] + mov r8, qword ptr [rdi+0x20] + mov r9, qword ptr [rdi+0x28] + mov r10, qword ptr [rdi+0x30] + mov r11, qword ptr [rdi+0x38] + mov r12, qword ptr [rdi+0x60] + mov r13, qword ptr [rdi+0x68] + mov r14, qword ptr [rdi+0x70] + mov r15, qword ptr [rdi+0x78] + vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20] + vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01 + vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20] + vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01 + vpunpcklqdq zmm12, zmm24, zmm25 + vpunpckhqdq zmm13, zmm24, zmm25 + vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20] + vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01 + vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20] + vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01 + vpunpcklqdq zmm14, zmm24, zmm25 + vpunpckhqdq zmm15, zmm24, zmm25 + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r12+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r13+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r14+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + prefetcht0 [r15+rdx+0x80] + vshufps zmm24, zmm8, zmm10, 136 + vshufps zmm30, zmm12, zmm14, 136 + vmovdqa32 zmm28, zmm24 + vpermt2d zmm24, zmm27, zmm30 + vpermt2d zmm28, zmm31, zmm30 + vshufps zmm25, zmm8, zmm10, 221 + vshufps zmm30, zmm12, zmm14, 221 + vmovdqa32 zmm29, zmm25 + vpermt2d zmm25, zmm27, zmm30 + vpermt2d zmm29, zmm31, zmm30 + vshufps zmm26, zmm9, zmm11, 136 + vshufps zmm8, zmm13, zmm15, 136 + vmovdqa32 zmm30, zmm26 + vpermt2d zmm26, zmm27, zmm8 + vpermt2d zmm30, zmm31, zmm8 + vshufps zmm8, zmm9, zmm11, 221 + vshufps zmm10, zmm13, zmm15, 221 + vpermi2d zmm27, zmm8, zmm10 + vpermi2d zmm31, zmm8, zmm10 + vpbroadcastd zmm8, dword ptr [BLAKE3_IV_0+rip] + vpbroadcastd zmm9, dword ptr [BLAKE3_IV_1+rip] + vpbroadcastd zmm10, dword ptr [BLAKE3_IV_2+rip] + vpbroadcastd zmm11, dword ptr [BLAKE3_IV_3+rip] + vmovdqa32 zmm12, zmmword ptr [rsp] + vmovdqa32 zmm13, zmmword ptr [rsp+0x1*0x40] + vpbroadcastd zmm14, dword ptr [BLAKE3_BLOCK_LEN+rip] + vpbroadcastd zmm15, dword ptr [rsp+0x22*0x4] + vpaddd zmm0, zmm0, zmm16 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm20 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm17 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm21 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm24 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm28 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm25 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm29 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm18 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm23 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm22 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm16 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm17 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm25 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm27 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm30 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm19 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm29 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm20 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm18 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm22 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm27 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm21 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm31 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm26 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm30 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm23 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm19 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm20 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm21 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm16 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm24 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm28 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm31 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm29 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm26 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm23 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm16 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm18 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm17 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm25 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm24 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm30 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm28 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm29 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm18 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm19 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm22 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm27 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm17 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm31 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm25 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm30 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm19 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm26 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm20 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpxord zmm0, zmm0, zmm8 + vpxord zmm1, zmm1, zmm9 + vpxord zmm2, zmm2, zmm10 + vpxord zmm3, zmm3, zmm11 + vpxord zmm4, zmm4, zmm12 + vpxord zmm5, zmm5, zmm13 + vpxord zmm6, zmm6, zmm14 + vpxord zmm7, zmm7, zmm15 + movzx eax, byte ptr [rbp+0x78] + jne 9b + mov rbx, qword ptr [rbp+0x90] + vpunpckldq zmm16, zmm0, zmm1 + vpunpckhdq zmm17, zmm0, zmm1 + vpunpckldq zmm18, zmm2, zmm3 + vpunpckhdq zmm19, zmm2, zmm3 + vpunpckldq zmm20, zmm4, zmm5 + vpunpckhdq zmm21, zmm4, zmm5 + vpunpckldq zmm22, zmm6, zmm7 + vpunpckhdq zmm23, zmm6, zmm7 + vpunpcklqdq zmm0, zmm16, zmm18 + vpunpckhqdq zmm1, zmm16, zmm18 + vpunpcklqdq zmm2, zmm17, zmm19 + vpunpckhqdq zmm3, zmm17, zmm19 + vpunpcklqdq zmm4, zmm20, zmm22 + vpunpckhqdq zmm5, zmm20, zmm22 + vpunpcklqdq zmm6, zmm21, zmm23 + vpunpckhqdq zmm7, zmm21, zmm23 + vshufi32x4 zmm16, zmm0, zmm4, 0x88 + vshufi32x4 zmm17, zmm1, zmm5, 0x88 + vshufi32x4 zmm18, zmm2, zmm6, 0x88 + vshufi32x4 zmm19, zmm3, zmm7, 0x88 + vshufi32x4 zmm20, zmm0, zmm4, 0xDD + vshufi32x4 zmm21, zmm1, zmm5, 0xDD + vshufi32x4 zmm22, zmm2, zmm6, 0xDD + vshufi32x4 zmm23, zmm3, zmm7, 0xDD + vshufi32x4 zmm0, zmm16, zmm17, 0x88 + vshufi32x4 zmm1, zmm18, zmm19, 0x88 + vshufi32x4 zmm2, zmm20, zmm21, 0x88 + vshufi32x4 zmm3, zmm22, zmm23, 0x88 + vshufi32x4 zmm4, zmm16, zmm17, 0xDD + vshufi32x4 zmm5, zmm18, zmm19, 0xDD + vshufi32x4 zmm6, zmm20, zmm21, 0xDD + vshufi32x4 zmm7, zmm22, zmm23, 0xDD + vmovdqu32 zmmword ptr [rbx], zmm0 + vmovdqu32 zmmword ptr [rbx+0x1*0x40], zmm1 + vmovdqu32 zmmword ptr [rbx+0x2*0x40], zmm2 + vmovdqu32 zmmword ptr [rbx+0x3*0x40], zmm3 + vmovdqu32 zmmword ptr [rbx+0x4*0x40], zmm4 + vmovdqu32 zmmword ptr [rbx+0x5*0x40], zmm5 + vmovdqu32 zmmword ptr [rbx+0x6*0x40], zmm6 + vmovdqu32 zmmword ptr [rbx+0x7*0x40], zmm7 + vmovdqa32 zmm0, zmmword ptr [rsp] + vmovdqa32 zmm1, zmmword ptr [rsp+0x1*0x40] + vmovdqa32 zmm2, zmm0 + vpaddd zmm2{k1}, zmm0, dword ptr [ADD16+rip] {1to16} + vpcmpltud k2, zmm2, zmm0 + vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1+rip] {1to16} + vmovdqa32 zmmword ptr [rsp], zmm2 + vmovdqa32 zmmword ptr [rsp+0x1*0x40], zmm1 + add rdi, 128 + add rbx, 512 + mov qword ptr [rbp+0x90], rbx + sub rsi, 16 + cmp rsi, 16 + jnc 2b + test rsi, rsi + jne 3f +4: + vzeroupper + vmovdqa xmm6, xmmword ptr [rsp+0x90] + vmovdqa xmm7, xmmword ptr [rsp+0xA0] + vmovdqa xmm8, xmmword ptr [rsp+0xB0] + vmovdqa xmm9, xmmword ptr [rsp+0xC0] + vmovdqa xmm10, xmmword ptr [rsp+0xD0] + vmovdqa xmm11, xmmword ptr [rsp+0xE0] + vmovdqa xmm12, xmmword ptr [rsp+0xF0] + vmovdqa xmm13, xmmword ptr [rsp+0x100] + vmovdqa xmm14, xmmword ptr [rsp+0x110] + vmovdqa xmm15, xmmword ptr [rsp+0x120] + mov rsp, rbp + pop rbp + pop rbx + pop rsi + pop rdi + pop r12 + pop r13 + pop r14 + pop r15 + ret +.p2align 6 +3: + test esi, 0x8 + je 3f + vpbroadcastd ymm0, dword ptr [rcx] + vpbroadcastd ymm1, dword ptr [rcx+0x4] + vpbroadcastd ymm2, dword ptr [rcx+0x8] + vpbroadcastd ymm3, dword ptr [rcx+0xC] + vpbroadcastd ymm4, dword ptr [rcx+0x10] + vpbroadcastd ymm5, dword ptr [rcx+0x14] + vpbroadcastd ymm6, dword ptr [rcx+0x18] + vpbroadcastd ymm7, dword ptr [rcx+0x1C] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov r12, qword ptr [rdi+0x20] + mov r13, qword ptr [rdi+0x28] + mov r14, qword ptr [rdi+0x30] + mov r15, qword ptr [rdi+0x38] + movzx eax, byte ptr [rbp+0x78] + movzx ebx, byte ptr [rbp+0x80] + or eax, ebx + xor edx, edx +2: + movzx ebx, byte ptr [rbp+0x88] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+0x80] + cmove eax, ebx + mov dword ptr [rsp+0x88], eax + vmovups xmm8, xmmword ptr [r8+rdx-0x40] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x40] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x40] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x40] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm16, ymm12, ymm14, 136 + vshufps ymm17, ymm12, ymm14, 221 + vshufps ymm18, ymm13, ymm15, 136 + vshufps ymm19, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x30] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x30] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x30] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x30] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm20, ymm12, ymm14, 136 + vshufps ymm21, ymm12, ymm14, 221 + vshufps ymm22, ymm13, ymm15, 136 + vshufps ymm23, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x20] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x20] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x20] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x20] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm24, ymm12, ymm14, 136 + vshufps ymm25, ymm12, ymm14, 221 + vshufps ymm26, ymm13, ymm15, 136 + vshufps ymm27, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x10] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x10] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x10] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x10] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm28, ymm12, ymm14, 136 + vshufps ymm29, ymm12, ymm14, 221 + vshufps ymm30, ymm13, ymm15, 136 + vshufps ymm31, ymm13, ymm15, 221 + vpbroadcastd ymm8, dword ptr [BLAKE3_IV_0+rip] + vpbroadcastd ymm9, dword ptr [BLAKE3_IV_1+rip] + vpbroadcastd ymm10, dword ptr [BLAKE3_IV_2+rip] + vpbroadcastd ymm11, dword ptr [BLAKE3_IV_3+rip] + vmovdqa ymm12, ymmword ptr [rsp] + vmovdqa ymm13, ymmword ptr [rsp+0x40] + vpbroadcastd ymm14, dword ptr [BLAKE3_BLOCK_LEN+rip] + vpbroadcastd ymm15, dword ptr [rsp+0x88] + vpaddd ymm0, ymm0, ymm16 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm20 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm17 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm21 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm24 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm28 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm25 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm29 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm18 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm23 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm22 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm16 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm17 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm25 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm27 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm30 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm19 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm29 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm20 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm18 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm22 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm27 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm21 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm31 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm26 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm30 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm23 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm19 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm20 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm21 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm16 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm24 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm28 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm31 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm29 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm26 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm23 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm16 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm18 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm17 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm25 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm24 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm30 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm28 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm29 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm18 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm19 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm22 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm27 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm17 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm31 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm25 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm30 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm19 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm26 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm20 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpxor ymm0, ymm0, ymm8 + vpxor ymm1, ymm1, ymm9 + vpxor ymm2, ymm2, ymm10 + vpxor ymm3, ymm3, ymm11 + vpxor ymm4, ymm4, ymm12 + vpxor ymm5, ymm5, ymm13 + vpxor ymm6, ymm6, ymm14 + vpxor ymm7, ymm7, ymm15 + movzx eax, byte ptr [rbp+0x78] + jne 2b + mov rbx, qword ptr [rbp+0x90] + vunpcklps ymm8, ymm0, ymm1 + vunpcklps ymm9, ymm2, ymm3 + vunpckhps ymm10, ymm0, ymm1 + vunpcklps ymm11, ymm4, ymm5 + vunpcklps ymm0, ymm6, ymm7 + vshufps ymm12, ymm8, ymm9, 78 + vblendps ymm1, ymm8, ymm12, 0xCC + vshufps ymm8, ymm11, ymm0, 78 + vunpckhps ymm13, ymm2, ymm3 + vblendps ymm2, ymm11, ymm8, 0xCC + vblendps ymm3, ymm12, ymm9, 0xCC + vperm2f128 ymm12, ymm1, ymm2, 0x20 + vmovups ymmword ptr [rbx], ymm12 + vunpckhps ymm14, ymm4, ymm5 + vblendps ymm4, ymm8, ymm0, 0xCC + vunpckhps ymm15, ymm6, ymm7 + vperm2f128 ymm7, ymm3, ymm4, 0x20 + vmovups ymmword ptr [rbx+0x20], ymm7 + vshufps ymm5, ymm10, ymm13, 78 + vblendps ymm6, ymm5, ymm13, 0xCC + vshufps ymm13, ymm14, ymm15, 78 + vblendps ymm10, ymm10, ymm5, 0xCC + vblendps ymm14, ymm14, ymm13, 0xCC + vperm2f128 ymm8, ymm10, ymm14, 0x20 + vmovups ymmword ptr [rbx+0x40], ymm8 + vblendps ymm15, ymm13, ymm15, 0xCC + vperm2f128 ymm13, ymm6, ymm15, 0x20 + vmovups ymmword ptr [rbx+0x60], ymm13 + vperm2f128 ymm9, ymm1, ymm2, 0x31 + vperm2f128 ymm11, ymm3, ymm4, 0x31 + vmovups ymmword ptr [rbx+0x80], ymm9 + vperm2f128 ymm14, ymm10, ymm14, 0x31 + vperm2f128 ymm15, ymm6, ymm15, 0x31 + vmovups ymmword ptr [rbx+0xA0], ymm11 + vmovups ymmword ptr [rbx+0xC0], ymm14 + vmovups ymmword ptr [rbx+0xE0], ymm15 + vmovdqa ymm0, ymmword ptr [rsp] + vmovdqa ymm2, ymmword ptr [rsp+0x40] + vmovdqa32 ymm0 {k1}, ymmword ptr [rsp+0x1*0x20] + vmovdqa32 ymm2 {k1}, ymmword ptr [rsp+0x3*0x20] + vmovdqa ymmword ptr [rsp], ymm0 + vmovdqa ymmword ptr [rsp+0x40], ymm2 + add rbx, 256 + mov qword ptr [rbp+0x90], rbx + add rdi, 64 + sub rsi, 8 +3: + mov rbx, qword ptr [rbp+0x90] + mov r15, qword ptr [rsp+0x80] + movzx r13, byte ptr [rbp+0x78] + movzx r12, byte ptr [rbp+0x88] + test esi, 0x4 + je 3f + vbroadcasti32x4 zmm0, xmmword ptr [rcx] + vbroadcasti32x4 zmm1, xmmword ptr [rcx+0x1*0x10] + vmovdqa xmm12, xmmword ptr [rsp] + vmovdqa xmm13, xmmword ptr [rsp+0x40] + vpunpckldq xmm14, xmm12, xmm13 + vpunpckhdq xmm15, xmm12, xmm13 + vpermq ymm14, ymm14, 0xDC + vpermq ymm15, ymm15, 0xDC + vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip] + vinserti64x4 zmm13, zmm14, ymm15, 0x01 + mov eax, 17476 + kmovw k2, eax + vpblendmd zmm13 {k2}, zmm13, zmm12 + vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV+rip] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov eax, 43690 + kmovw k3, eax + mov eax, 34952 + kmovw k4, eax + movzx eax, byte ptr [rbp+0x80] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+0x88], eax + vmovdqa32 zmm2, zmm15 + vpbroadcastd zmm8, dword ptr [rsp+0x22*0x4] + vpblendmd zmm3 {k4}, zmm13, zmm8 + vmovups zmm8, zmmword ptr [r8+rdx-0x1*0x40] + vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x4*0x10], 0x01 + vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x4*0x10], 0x02 + vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x4*0x10], 0x03 + vmovups zmm9, zmmword ptr [r8+rdx-0x30] + vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x3*0x10], 0x01 + vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x3*0x10], 0x02 + vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x3*0x10], 0x03 + vshufps zmm4, zmm8, zmm9, 136 + vshufps zmm5, zmm8, zmm9, 221 + vmovups zmm8, zmmword ptr [r8+rdx-0x20] + vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x2*0x10], 0x01 + vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x2*0x10], 0x02 + vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x2*0x10], 0x03 + vmovups zmm9, zmmword ptr [r8+rdx-0x10] + vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x1*0x10], 0x01 + vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x1*0x10], 0x02 + vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x1*0x10], 0x03 + vshufps zmm6, zmm8, zmm9, 136 + vshufps zmm7, zmm8, zmm9, 221 + vpshufd zmm6, zmm6, 0x93 + vpshufd zmm7, zmm7, 0x93 + mov al, 7 +9: + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 16 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 12 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 8 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 7 + vpshufd zmm0, zmm0, 0x93 + vpshufd zmm3, zmm3, 0x4E + vpshufd zmm2, zmm2, 0x39 + vpaddd zmm0, zmm0, zmm6 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 16 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 12 + vpaddd zmm0, zmm0, zmm7 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 8 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 7 + vpshufd zmm0, zmm0, 0x39 + vpshufd zmm3, zmm3, 0x4E + vpshufd zmm2, zmm2, 0x93 + dec al + jz 9f + vshufps zmm8, zmm4, zmm5, 214 + vpshufd zmm9, zmm4, 0x0F + vpshufd zmm4, zmm8, 0x39 + vshufps zmm8, zmm6, zmm7, 250 + vpblendmd zmm9 {k3}, zmm9, zmm8 + vpunpcklqdq zmm8, zmm7, zmm5 + vpblendmd zmm8 {k4}, zmm8, zmm6 + vpshufd zmm8, zmm8, 0x78 + vpunpckhdq zmm5, zmm5, zmm7 + vpunpckldq zmm6, zmm6, zmm5 + vpshufd zmm7, zmm6, 0x1E + vmovdqa32 zmm5, zmm9 + vmovdqa32 zmm6, zmm8 + jmp 9b +9: + vpxord zmm0, zmm0, zmm2 + vpxord zmm1, zmm1, zmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01 + vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01 + vextracti32x4 xmmword ptr [rbx+0x4*0x10], zmm0, 0x02 + vextracti32x4 xmmword ptr [rbx+0x5*0x10], zmm1, 0x02 + vextracti32x4 xmmword ptr [rbx+0x6*0x10], zmm0, 0x03 + vextracti32x4 xmmword ptr [rbx+0x7*0x10], zmm1, 0x03 + vmovdqa xmm0, xmmword ptr [rsp] + vmovdqa xmm2, xmmword ptr [rsp+0x40] + vmovdqa32 xmm0 {k1}, xmmword ptr [rsp+0x1*0x10] + vmovdqa32 xmm2 {k1}, xmmword ptr [rsp+0x5*0x10] + vmovdqa xmmword ptr [rsp], xmm0 + vmovdqa xmmword ptr [rsp+0x40], xmm2 + add rbx, 128 + add rdi, 32 + sub rsi, 4 +3: + test esi, 0x2 + je 3f + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+0x10] + vmovd xmm13, dword ptr [rsp] + vpinsrd xmm13, xmm13, dword ptr [rsp+0x40], 1 + vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vmovd xmm14, dword ptr [rsp+0x4] + vpinsrd xmm14, xmm14, dword ptr [rsp+0x44], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vinserti128 ymm13, ymm13, xmm14, 0x01 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + movzx eax, byte ptr [rbp+0x80] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+0x88], eax + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip] + vpbroadcastd ymm8, dword ptr [rsp+0x88] + vpblendd ymm3, ymm13, ymm8, 0x88 + vmovups ymm8, ymmword ptr [r8+rdx-0x40] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01 + vmovups ymm9, ymmword ptr [r8+rdx-0x30] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01 + vshufps ymm4, ymm8, ymm9, 136 + vshufps ymm5, ymm8, ymm9, 221 + vmovups ymm8, ymmword ptr [r8+rdx-0x20] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01 + vmovups ymm9, ymmword ptr [r8+rdx-0x10] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01 + vshufps ymm6, ymm8, ymm9, 136 + vshufps ymm7, ymm8, ymm9, 221 + vpshufd ymm6, ymm6, 0x93 + vpshufd ymm7, ymm7, 0x93 + mov al, 7 +9: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 16 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 12 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 8 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 7 + vpshufd ymm0, ymm0, 0x93 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm2, ymm2, 0x39 + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 16 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 12 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 8 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 7 + vpshufd ymm0, ymm0, 0x39 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm2, ymm2, 0x93 + dec al + jz 9f + vshufps ymm8, ymm4, ymm5, 214 + vpshufd ymm9, ymm4, 0x0F + vpshufd ymm4, ymm8, 0x39 + vshufps ymm8, ymm6, ymm7, 250 + vpblendd ymm9, ymm9, ymm8, 0xAA + vpunpcklqdq ymm8, ymm7, ymm5 + vpblendd ymm8, ymm8, ymm6, 0x88 + vpshufd ymm8, ymm8, 0x78 + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 0x1E + vmovdqa ymm5, ymm9 + vmovdqa ymm6, ymm8 + jmp 9b +9: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01 + vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01 + vmovdqa xmm0, xmmword ptr [rsp] + vmovdqa xmm2, xmmword ptr [rsp+0x40] + vmovdqu32 xmm0 {k1}, xmmword ptr [rsp+0x8] + vmovdqu32 xmm2 {k1}, xmmword ptr [rsp+0x48] + vmovdqa xmmword ptr [rsp], xmm0 + vmovdqa xmmword ptr [rsp+0x40], xmm2 + add rbx, 64 + add rdi, 16 + sub rsi, 2 +3: + test esi, 0x1 + je 4b + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+0x10] + vmovd xmm14, dword ptr [rsp] + vpinsrd xmm14, xmm14, dword ptr [rsp+0x40], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vmovdqa xmm15, xmmword ptr [BLAKE3_IV+rip] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+0x80] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + vpinsrd xmm3, xmm14, eax, 3 + vmovdqa xmm2, xmm15 + vmovups xmm8, xmmword ptr [r8+rdx-0x40] + vmovups xmm9, xmmword ptr [r8+rdx-0x30] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x20] + vmovups xmm9, xmmword ptr [r8+rdx-0x10] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 0x93 + vpshufd xmm7, xmm7, 0x93 + mov al, 7 +9: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x93 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x39 + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x39 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x93 + dec al + jz 9f + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0x0F + vpshufd xmm4, xmm8, 0x39 + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0xAA + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 0x88 + vpshufd xmm8, xmm8, 0x78 + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 0x1E + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp 9b +9: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + jmp 4b + + +.p2align 6 +_blake3_compress_in_place_avx512: +blake3_compress_in_place_avx512: + sub rsp, 72 + vmovdqa xmmword ptr [rsp], xmm6 + vmovdqa xmmword ptr [rsp+0x10], xmm7 + vmovdqa xmmword ptr [rsp+0x20], xmm8 + vmovdqa xmmword ptr [rsp+0x30], xmm9 + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+0x10] + movzx eax, byte ptr [rsp+0x70] + movzx r8d, r8b + shl rax, 32 + add r8, rax + vmovq xmm3, r9 + vmovq xmm4, r8 + vpunpcklqdq xmm3, xmm3, xmm4 + vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip] + vmovups xmm8, xmmword ptr [rdx] + vmovups xmm9, xmmword ptr [rdx+0x10] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [rdx+0x20] + vmovups xmm9, xmmword ptr [rdx+0x30] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 0x93 + vpshufd xmm7, xmm7, 0x93 + mov al, 7 +9: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x93 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x39 + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x39 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x93 + dec al + jz 9f + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0x0F + vpshufd xmm4, xmm8, 0x39 + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0xAA + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 0x88 + vpshufd xmm8, xmm8, 0x78 + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 0x1E + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp 9b +9: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + vmovdqu xmmword ptr [rcx], xmm0 + vmovdqu xmmword ptr [rcx+0x10], xmm1 + vmovdqa xmm6, xmmword ptr [rsp] + vmovdqa xmm7, xmmword ptr [rsp+0x10] + vmovdqa xmm8, xmmword ptr [rsp+0x20] + vmovdqa xmm9, xmmword ptr [rsp+0x30] + add rsp, 72 + ret + + +.p2align 6 +_blake3_compress_xof_avx512: +blake3_compress_xof_avx512: + sub rsp, 72 + vmovdqa xmmword ptr [rsp], xmm6 + vmovdqa xmmword ptr [rsp+0x10], xmm7 + vmovdqa xmmword ptr [rsp+0x20], xmm8 + vmovdqa xmmword ptr [rsp+0x30], xmm9 + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+0x10] + movzx eax, byte ptr [rsp+0x70] + movzx r8d, r8b + mov r10, qword ptr [rsp+0x78] + shl rax, 32 + add r8, rax + vmovq xmm3, r9 + vmovq xmm4, r8 + vpunpcklqdq xmm3, xmm3, xmm4 + vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip] + vmovups xmm8, xmmword ptr [rdx] + vmovups xmm9, xmmword ptr [rdx+0x10] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [rdx+0x20] + vmovups xmm9, xmmword ptr [rdx+0x30] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 0x93 + vpshufd xmm7, xmm7, 0x93 + mov al, 7 +9: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x93 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x39 + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x39 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x93 + dec al + jz 9f + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0x0F + vpshufd xmm4, xmm8, 0x39 + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0xAA + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 0x88 + vpshufd xmm8, xmm8, 0x78 + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 0x1E + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp 9b +9: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + vpxor xmm2, xmm2, xmmword ptr [rcx] + vpxor xmm3, xmm3, xmmword ptr [rcx+0x10] + vmovdqu xmmword ptr [r10], xmm0 + vmovdqu xmmword ptr [r10+0x10], xmm1 + vmovdqu xmmword ptr [r10+0x20], xmm2 + vmovdqu xmmword ptr [r10+0x30], xmm3 + vmovdqa xmm6, xmmword ptr [rsp] + vmovdqa xmm7, xmmword ptr [rsp+0x10] + vmovdqa xmm8, xmmword ptr [rsp+0x20] + vmovdqa xmm9, xmmword ptr [rsp+0x30] + add rsp, 72 + ret + +.section .rodata +.p2align 6 +INDEX0: + .long 0, 1, 2, 3, 16, 17, 18, 19 + .long 8, 9, 10, 11, 24, 25, 26, 27 +INDEX1: + .long 4, 5, 6, 7, 20, 21, 22, 23 + .long 12, 13, 14, 15, 28, 29, 30, 31 +ADD0: + .long 0, 1, 2, 3, 4, 5, 6, 7 + .long 8, 9, 10, 11, 12, 13, 14, 15 +ADD1: .long 1 + +ADD16: .long 16 +BLAKE3_BLOCK_LEN: + .long 64 +.p2align 6 +BLAKE3_IV: +BLAKE3_IV_0: + .long 0x6A09E667 +BLAKE3_IV_1: + .long 0xBB67AE85 +BLAKE3_IV_2: + .long 0x3C6EF372 +BLAKE3_IV_3: + .long 0xA54FF53A diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm new file mode 100644 index 000000000000..b19efbaaeb36 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm @@ -0,0 +1,2634 @@ +public _blake3_hash_many_avx512 +public blake3_hash_many_avx512 +public blake3_compress_in_place_avx512 +public _blake3_compress_in_place_avx512 +public blake3_compress_xof_avx512 +public _blake3_compress_xof_avx512 + +_TEXT SEGMENT ALIGN(16) 'CODE' + +ALIGN 16 +blake3_hash_many_avx512 PROC +_blake3_hash_many_avx512 PROC + push r15 + push r14 + push r13 + push r12 + push rdi + push rsi + push rbx + push rbp + mov rbp, rsp + sub rsp, 304 + and rsp, 0FFFFFFFFFFFFFFC0H + vmovdqa xmmword ptr [rsp+90H], xmm6 + vmovdqa xmmword ptr [rsp+0A0H], xmm7 + vmovdqa xmmword ptr [rsp+0B0H], xmm8 + vmovdqa xmmword ptr [rsp+0C0H], xmm9 + vmovdqa xmmword ptr [rsp+0D0H], xmm10 + vmovdqa xmmword ptr [rsp+0E0H], xmm11 + vmovdqa xmmword ptr [rsp+0F0H], xmm12 + vmovdqa xmmword ptr [rsp+100H], xmm13 + vmovdqa xmmword ptr [rsp+110H], xmm14 + vmovdqa xmmword ptr [rsp+120H], xmm15 + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, qword ptr [rbp+68H] + movzx r9, byte ptr [rbp+70H] + neg r9 + kmovw k1, r9d + vmovd xmm0, r8d + vpbroadcastd ymm0, xmm0 + shr r8, 32 + vmovd xmm1, r8d + vpbroadcastd ymm1, xmm1 + vmovdqa ymm4, ymm1 + vmovdqa ymm5, ymm1 + vpaddd ymm2, ymm0, ymmword ptr [ADD0] + vpaddd ymm3, ymm0, ymmword ptr [ADD0+32] + vpcmpud k2, ymm2, ymm0, 1 + vpcmpud k3, ymm3, ymm0, 1 + ; XXX: ml64.exe does not currently understand the syntax. We use a workaround. + vpbroadcastd ymm6, dword ptr [ADD1] + vpaddd ymm4 {k2}, ymm4, ymm6 + vpaddd ymm5 {k3}, ymm5, ymm6 + ; vpaddd ymm4 {k2}, ymm4, dword ptr [ADD1] {1to8} + ; vpaddd ymm5 {k3}, ymm5, dword ptr [ADD1] {1to8} + knotw k2, k1 + vmovdqa32 ymm2 {k2}, ymm0 + vmovdqa32 ymm3 {k2}, ymm0 + vmovdqa32 ymm4 {k2}, ymm1 + vmovdqa32 ymm5 {k2}, ymm1 + vmovdqa ymmword ptr [rsp], ymm2 + vmovdqa ymmword ptr [rsp+20H], ymm3 + vmovdqa ymmword ptr [rsp+40H], ymm4 + vmovdqa ymmword ptr [rsp+60H], ymm5 + shl rdx, 6 + mov qword ptr [rsp+80H], rdx + cmp rsi, 16 + jc final15blocks +outerloop16: + vpbroadcastd zmm0, dword ptr [rcx] + vpbroadcastd zmm1, dword ptr [rcx+1H*4H] + vpbroadcastd zmm2, dword ptr [rcx+2H*4H] + vpbroadcastd zmm3, dword ptr [rcx+3H*4H] + vpbroadcastd zmm4, dword ptr [rcx+4H*4H] + vpbroadcastd zmm5, dword ptr [rcx+5H*4H] + vpbroadcastd zmm6, dword ptr [rcx+6H*4H] + vpbroadcastd zmm7, dword ptr [rcx+7H*4H] + movzx eax, byte ptr [rbp+78H] + movzx ebx, byte ptr [rbp+80H] + or eax, ebx + xor edx, edx +ALIGN 16 +innerloop16: + movzx ebx, byte ptr [rbp+88H] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+80H] + cmove eax, ebx + mov dword ptr [rsp+88H], eax + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + mov r12, qword ptr [rdi+40H] + mov r13, qword ptr [rdi+48H] + mov r14, qword ptr [rdi+50H] + mov r15, qword ptr [rdi+58H] + vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H] + vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H + vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H] + vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H + vpunpcklqdq zmm8, zmm16, zmm17 + vpunpckhqdq zmm9, zmm16, zmm17 + vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H] + vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H + vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H] + vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H + vpunpcklqdq zmm10, zmm18, zmm19 + vpunpckhqdq zmm11, zmm18, zmm19 + mov r8, qword ptr [rdi+20H] + mov r9, qword ptr [rdi+28H] + mov r10, qword ptr [rdi+30H] + mov r11, qword ptr [rdi+38H] + mov r12, qword ptr [rdi+60H] + mov r13, qword ptr [rdi+68H] + mov r14, qword ptr [rdi+70H] + mov r15, qword ptr [rdi+78H] + vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H] + vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H + vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H] + vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H + vpunpcklqdq zmm12, zmm16, zmm17 + vpunpckhqdq zmm13, zmm16, zmm17 + vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H] + vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H + vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H] + vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H + vpunpcklqdq zmm14, zmm18, zmm19 + vpunpckhqdq zmm15, zmm18, zmm19 + vmovdqa32 zmm27, zmmword ptr [INDEX0] + vmovdqa32 zmm31, zmmword ptr [INDEX1] + vshufps zmm16, zmm8, zmm10, 136 + vshufps zmm17, zmm12, zmm14, 136 + vmovdqa32 zmm20, zmm16 + vpermt2d zmm16, zmm27, zmm17 + vpermt2d zmm20, zmm31, zmm17 + vshufps zmm17, zmm8, zmm10, 221 + vshufps zmm30, zmm12, zmm14, 221 + vmovdqa32 zmm21, zmm17 + vpermt2d zmm17, zmm27, zmm30 + vpermt2d zmm21, zmm31, zmm30 + vshufps zmm18, zmm9, zmm11, 136 + vshufps zmm8, zmm13, zmm15, 136 + vmovdqa32 zmm22, zmm18 + vpermt2d zmm18, zmm27, zmm8 + vpermt2d zmm22, zmm31, zmm8 + vshufps zmm19, zmm9, zmm11, 221 + vshufps zmm8, zmm13, zmm15, 221 + vmovdqa32 zmm23, zmm19 + vpermt2d zmm19, zmm27, zmm8 + vpermt2d zmm23, zmm31, zmm8 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + mov r12, qword ptr [rdi+40H] + mov r13, qword ptr [rdi+48H] + mov r14, qword ptr [rdi+50H] + mov r15, qword ptr [rdi+58H] + vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H] + vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H + vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H] + vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H + vpunpcklqdq zmm8, zmm24, zmm25 + vpunpckhqdq zmm9, zmm24, zmm25 + vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H] + vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H + vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H] + vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H + vpunpcklqdq zmm10, zmm24, zmm25 + vpunpckhqdq zmm11, zmm24, zmm25 + prefetcht0 byte ptr [r8+rdx+80H] + prefetcht0 byte ptr [r12+rdx+80H] + prefetcht0 byte ptr [r9+rdx+80H] + prefetcht0 byte ptr [r13+rdx+80H] + prefetcht0 byte ptr [r10+rdx+80H] + prefetcht0 byte ptr [r14+rdx+80H] + prefetcht0 byte ptr [r11+rdx+80H] + prefetcht0 byte ptr [r15+rdx+80H] + mov r8, qword ptr [rdi+20H] + mov r9, qword ptr [rdi+28H] + mov r10, qword ptr [rdi+30H] + mov r11, qword ptr [rdi+38H] + mov r12, qword ptr [rdi+60H] + mov r13, qword ptr [rdi+68H] + mov r14, qword ptr [rdi+70H] + mov r15, qword ptr [rdi+78H] + vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H] + vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H + vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H] + vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H + vpunpcklqdq zmm12, zmm24, zmm25 + vpunpckhqdq zmm13, zmm24, zmm25 + vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H] + vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H + vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H] + vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H + vpunpcklqdq zmm14, zmm24, zmm25 + vpunpckhqdq zmm15, zmm24, zmm25 + prefetcht0 byte ptr [r8+rdx+80H] + prefetcht0 byte ptr [r12+rdx+80H] + prefetcht0 byte ptr [r9+rdx+80H] + prefetcht0 byte ptr [r13+rdx+80H] + prefetcht0 byte ptr [r10+rdx+80H] + prefetcht0 byte ptr [r14+rdx+80H] + prefetcht0 byte ptr [r11+rdx+80H] + prefetcht0 byte ptr [r15+rdx+80H] + vshufps zmm24, zmm8, zmm10, 136 + vshufps zmm30, zmm12, zmm14, 136 + vmovdqa32 zmm28, zmm24 + vpermt2d zmm24, zmm27, zmm30 + vpermt2d zmm28, zmm31, zmm30 + vshufps zmm25, zmm8, zmm10, 221 + vshufps zmm30, zmm12, zmm14, 221 + vmovdqa32 zmm29, zmm25 + vpermt2d zmm25, zmm27, zmm30 + vpermt2d zmm29, zmm31, zmm30 + vshufps zmm26, zmm9, zmm11, 136 + vshufps zmm8, zmm13, zmm15, 136 + vmovdqa32 zmm30, zmm26 + vpermt2d zmm26, zmm27, zmm8 + vpermt2d zmm30, zmm31, zmm8 + vshufps zmm8, zmm9, zmm11, 221 + vshufps zmm10, zmm13, zmm15, 221 + vpermi2d zmm27, zmm8, zmm10 + vpermi2d zmm31, zmm8, zmm10 + vpbroadcastd zmm8, dword ptr [BLAKE3_IV_0] + vpbroadcastd zmm9, dword ptr [BLAKE3_IV_1] + vpbroadcastd zmm10, dword ptr [BLAKE3_IV_2] + vpbroadcastd zmm11, dword ptr [BLAKE3_IV_3] + vmovdqa32 zmm12, zmmword ptr [rsp] + vmovdqa32 zmm13, zmmword ptr [rsp+1H*40H] + vpbroadcastd zmm14, dword ptr [BLAKE3_BLOCK_LEN] + vpbroadcastd zmm15, dword ptr [rsp+22H*4H] + vpaddd zmm0, zmm0, zmm16 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm20 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm17 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm21 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm24 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm28 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm25 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm29 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm18 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm23 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm22 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm16 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm17 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm25 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm27 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm30 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm19 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm29 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm20 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm18 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm22 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm27 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm21 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm31 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm26 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm30 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm23 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm19 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm20 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm21 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm16 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm24 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm28 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm31 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm29 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm26 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm23 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm16 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm18 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm17 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm25 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm24 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm30 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm28 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm29 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm18 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm19 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm22 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm27 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm17 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm31 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm25 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm30 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm19 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm26 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm20 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpxord zmm0, zmm0, zmm8 + vpxord zmm1, zmm1, zmm9 + vpxord zmm2, zmm2, zmm10 + vpxord zmm3, zmm3, zmm11 + vpxord zmm4, zmm4, zmm12 + vpxord zmm5, zmm5, zmm13 + vpxord zmm6, zmm6, zmm14 + vpxord zmm7, zmm7, zmm15 + movzx eax, byte ptr [rbp+78H] + jne innerloop16 + mov rbx, qword ptr [rbp+90H] + vpunpckldq zmm16, zmm0, zmm1 + vpunpckhdq zmm17, zmm0, zmm1 + vpunpckldq zmm18, zmm2, zmm3 + vpunpckhdq zmm19, zmm2, zmm3 + vpunpckldq zmm20, zmm4, zmm5 + vpunpckhdq zmm21, zmm4, zmm5 + vpunpckldq zmm22, zmm6, zmm7 + vpunpckhdq zmm23, zmm6, zmm7 + vpunpcklqdq zmm0, zmm16, zmm18 + vpunpckhqdq zmm1, zmm16, zmm18 + vpunpcklqdq zmm2, zmm17, zmm19 + vpunpckhqdq zmm3, zmm17, zmm19 + vpunpcklqdq zmm4, zmm20, zmm22 + vpunpckhqdq zmm5, zmm20, zmm22 + vpunpcklqdq zmm6, zmm21, zmm23 + vpunpckhqdq zmm7, zmm21, zmm23 + vshufi32x4 zmm16, zmm0, zmm4, 88H + vshufi32x4 zmm17, zmm1, zmm5, 88H + vshufi32x4 zmm18, zmm2, zmm6, 88H + vshufi32x4 zmm19, zmm3, zmm7, 88H + vshufi32x4 zmm20, zmm0, zmm4, 0DDH + vshufi32x4 zmm21, zmm1, zmm5, 0DDH + vshufi32x4 zmm22, zmm2, zmm6, 0DDH + vshufi32x4 zmm23, zmm3, zmm7, 0DDH + vshufi32x4 zmm0, zmm16, zmm17, 88H + vshufi32x4 zmm1, zmm18, zmm19, 88H + vshufi32x4 zmm2, zmm20, zmm21, 88H + vshufi32x4 zmm3, zmm22, zmm23, 88H + vshufi32x4 zmm4, zmm16, zmm17, 0DDH + vshufi32x4 zmm5, zmm18, zmm19, 0DDH + vshufi32x4 zmm6, zmm20, zmm21, 0DDH + vshufi32x4 zmm7, zmm22, zmm23, 0DDH + vmovdqu32 zmmword ptr [rbx], zmm0 + vmovdqu32 zmmword ptr [rbx+1H*40H], zmm1 + vmovdqu32 zmmword ptr [rbx+2H*40H], zmm2 + vmovdqu32 zmmword ptr [rbx+3H*40H], zmm3 + vmovdqu32 zmmword ptr [rbx+4H*40H], zmm4 + vmovdqu32 zmmword ptr [rbx+5H*40H], zmm5 + vmovdqu32 zmmword ptr [rbx+6H*40H], zmm6 + vmovdqu32 zmmword ptr [rbx+7H*40H], zmm7 + vmovdqa32 zmm0, zmmword ptr [rsp] + vmovdqa32 zmm1, zmmword ptr [rsp+1H*40H] + vmovdqa32 zmm2, zmm0 + ; XXX: ml64.exe does not currently understand the syntax. We use a workaround. + vpbroadcastd zmm4, dword ptr [ADD16] + vpbroadcastd zmm5, dword ptr [ADD1] + vpaddd zmm2{k1}, zmm0, zmm4 + ; vpaddd zmm2{k1}, zmm0, dword ptr [ADD16] ; {1to16} + vpcmpud k2, zmm2, zmm0, 1 + vpaddd zmm1 {k2}, zmm1, zmm5 + ; vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1] ; {1to16} + vmovdqa32 zmmword ptr [rsp], zmm2 + vmovdqa32 zmmword ptr [rsp+1H*40H], zmm1 + add rdi, 128 + add rbx, 512 + mov qword ptr [rbp+90H], rbx + sub rsi, 16 + cmp rsi, 16 + jnc outerloop16 + test rsi, rsi + jne final15blocks +unwind: + vzeroupper + vmovdqa xmm6, xmmword ptr [rsp+90H] + vmovdqa xmm7, xmmword ptr [rsp+0A0H] + vmovdqa xmm8, xmmword ptr [rsp+0B0H] + vmovdqa xmm9, xmmword ptr [rsp+0C0H] + vmovdqa xmm10, xmmword ptr [rsp+0D0H] + vmovdqa xmm11, xmmword ptr [rsp+0E0H] + vmovdqa xmm12, xmmword ptr [rsp+0F0H] + vmovdqa xmm13, xmmword ptr [rsp+100H] + vmovdqa xmm14, xmmword ptr [rsp+110H] + vmovdqa xmm15, xmmword ptr [rsp+120H] + mov rsp, rbp + pop rbp + pop rbx + pop rsi + pop rdi + pop r12 + pop r13 + pop r14 + pop r15 + ret +ALIGN 16 +final15blocks: + test esi, 8H + je final7blocks + vpbroadcastd ymm0, dword ptr [rcx] + vpbroadcastd ymm1, dword ptr [rcx+4H] + vpbroadcastd ymm2, dword ptr [rcx+8H] + vpbroadcastd ymm3, dword ptr [rcx+0CH] + vpbroadcastd ymm4, dword ptr [rcx+10H] + vpbroadcastd ymm5, dword ptr [rcx+14H] + vpbroadcastd ymm6, dword ptr [rcx+18H] + vpbroadcastd ymm7, dword ptr [rcx+1CH] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + mov r12, qword ptr [rdi+20H] + mov r13, qword ptr [rdi+28H] + mov r14, qword ptr [rdi+30H] + mov r15, qword ptr [rdi+38H] + movzx eax, byte ptr [rbp+78H] + movzx ebx, byte ptr [rbp+80H] + or eax, ebx + xor edx, edx +innerloop8: + movzx ebx, byte ptr [rbp+88H] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+80H] + cmove eax, ebx + mov dword ptr [rsp+88H], eax + vmovups xmm8, xmmword ptr [r8+rdx-40H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-40H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-40H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-40H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-40H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-40H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-40H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-40H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm16, ymm12, ymm14, 136 + vshufps ymm17, ymm12, ymm14, 221 + vshufps ymm18, ymm13, ymm15, 136 + vshufps ymm19, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-30H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-30H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-30H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-30H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-30H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-30H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-30H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-30H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm20, ymm12, ymm14, 136 + vshufps ymm21, ymm12, ymm14, 221 + vshufps ymm22, ymm13, ymm15, 136 + vshufps ymm23, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-20H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-20H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-20H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-20H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-20H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-20H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-20H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-20H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm24, ymm12, ymm14, 136 + vshufps ymm25, ymm12, ymm14, 221 + vshufps ymm26, ymm13, ymm15, 136 + vshufps ymm27, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-10H] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-10H], 01H + vmovups xmm9, xmmword ptr [r9+rdx-10H] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-10H], 01H + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-10H] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-10H], 01H + vmovups xmm11, xmmword ptr [r11+rdx-10H] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-10H], 01H + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm28, ymm12, ymm14, 136 + vshufps ymm29, ymm12, ymm14, 221 + vshufps ymm30, ymm13, ymm15, 136 + vshufps ymm31, ymm13, ymm15, 221 + vpbroadcastd ymm8, dword ptr [BLAKE3_IV_0] + vpbroadcastd ymm9, dword ptr [BLAKE3_IV_1] + vpbroadcastd ymm10, dword ptr [BLAKE3_IV_2] + vpbroadcastd ymm11, dword ptr [BLAKE3_IV_3] + vmovdqa ymm12, ymmword ptr [rsp] + vmovdqa ymm13, ymmword ptr [rsp+40H] + vpbroadcastd ymm14, dword ptr [BLAKE3_BLOCK_LEN] + vpbroadcastd ymm15, dword ptr [rsp+88H] + vpaddd ymm0, ymm0, ymm16 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm20 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm17 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm21 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm24 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm28 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm25 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm29 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm18 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm23 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm22 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm16 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm17 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm25 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm27 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm30 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm19 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm29 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm20 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm18 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm22 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm27 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm21 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm31 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm26 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm30 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm23 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm19 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm20 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm21 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm16 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm24 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm28 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm31 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm29 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm26 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm23 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm16 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm18 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm17 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm25 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm24 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm30 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm28 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm29 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm18 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm19 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm22 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm27 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm17 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm31 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm25 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm30 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm19 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm26 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm20 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpxor ymm0, ymm0, ymm8 + vpxor ymm1, ymm1, ymm9 + vpxor ymm2, ymm2, ymm10 + vpxor ymm3, ymm3, ymm11 + vpxor ymm4, ymm4, ymm12 + vpxor ymm5, ymm5, ymm13 + vpxor ymm6, ymm6, ymm14 + vpxor ymm7, ymm7, ymm15 + movzx eax, byte ptr [rbp+78H] + jne innerloop8 + mov rbx, qword ptr [rbp+90H] + vunpcklps ymm8, ymm0, ymm1 + vunpcklps ymm9, ymm2, ymm3 + vunpckhps ymm10, ymm0, ymm1 + vunpcklps ymm11, ymm4, ymm5 + vunpcklps ymm0, ymm6, ymm7 + vshufps ymm12, ymm8, ymm9, 78 + vblendps ymm1, ymm8, ymm12, 0CCH + vshufps ymm8, ymm11, ymm0, 78 + vunpckhps ymm13, ymm2, ymm3 + vblendps ymm2, ymm11, ymm8, 0CCH + vblendps ymm3, ymm12, ymm9, 0CCH + vperm2f128 ymm12, ymm1, ymm2, 20H + vmovups ymmword ptr [rbx], ymm12 + vunpckhps ymm14, ymm4, ymm5 + vblendps ymm4, ymm8, ymm0, 0CCH + vunpckhps ymm15, ymm6, ymm7 + vperm2f128 ymm7, ymm3, ymm4, 20H + vmovups ymmword ptr [rbx+20H], ymm7 + vshufps ymm5, ymm10, ymm13, 78 + vblendps ymm6, ymm5, ymm13, 0CCH + vshufps ymm13, ymm14, ymm15, 78 + vblendps ymm10, ymm10, ymm5, 0CCH + vblendps ymm14, ymm14, ymm13, 0CCH + vperm2f128 ymm8, ymm10, ymm14, 20H + vmovups ymmword ptr [rbx+40H], ymm8 + vblendps ymm15, ymm13, ymm15, 0CCH + vperm2f128 ymm13, ymm6, ymm15, 20H + vmovups ymmword ptr [rbx+60H], ymm13 + vperm2f128 ymm9, ymm1, ymm2, 31H + vperm2f128 ymm11, ymm3, ymm4, 31H + vmovups ymmword ptr [rbx+80H], ymm9 + vperm2f128 ymm14, ymm10, ymm14, 31H + vperm2f128 ymm15, ymm6, ymm15, 31H + vmovups ymmword ptr [rbx+0A0H], ymm11 + vmovups ymmword ptr [rbx+0C0H], ymm14 + vmovups ymmword ptr [rbx+0E0H], ymm15 + vmovdqa ymm0, ymmword ptr [rsp] + vmovdqa ymm2, ymmword ptr [rsp+40H] + vmovdqa32 ymm0 {k1}, ymmword ptr [rsp+1H*20H] + vmovdqa32 ymm2 {k1}, ymmword ptr [rsp+3H*20H] + vmovdqa ymmword ptr [rsp], ymm0 + vmovdqa ymmword ptr [rsp+40H], ymm2 + add rbx, 256 + mov qword ptr [rbp+90H], rbx + add rdi, 64 + sub rsi, 8 +final7blocks: + mov rbx, qword ptr [rbp+90H] + mov r15, qword ptr [rsp+80H] + movzx r13, byte ptr [rbp+78H] + movzx r12, byte ptr [rbp+88H] + test esi, 4H + je final3blocks + vbroadcasti32x4 zmm0, xmmword ptr [rcx] + vbroadcasti32x4 zmm1, xmmword ptr [rcx+1H*10H] + vmovdqa xmm12, xmmword ptr [rsp] + vmovdqa xmm13, xmmword ptr [rsp+40H] + vpunpckldq xmm14, xmm12, xmm13 + vpunpckhdq xmm15, xmm12, xmm13 + vpermq ymm14, ymm14, 0DCH + vpermq ymm15, ymm15, 0DCH + vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN] + vinserti64x4 zmm13, zmm14, ymm15, 01H + mov eax, 17476 + kmovw k2, eax + vpblendmd zmm13 {k2}, zmm13, zmm12 + vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + mov eax, 43690 + kmovw k3, eax + mov eax, 34952 + kmovw k4, eax + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +ALIGN 16 +innerloop4: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+88H], eax + vmovdqa32 zmm2, zmm15 + vpbroadcastd zmm8, dword ptr [rsp+22H*4H] + vpblendmd zmm3 {k4}, zmm13, zmm8 + vmovups zmm8, zmmword ptr [r8+rdx-1H*40H] + vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-4H*10H], 01H + vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-4H*10H], 02H + vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-4H*10H], 03H + vmovups zmm9, zmmword ptr [r8+rdx-30H] + vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-3H*10H], 01H + vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-3H*10H], 02H + vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-3H*10H], 03H + vshufps zmm4, zmm8, zmm9, 136 + vshufps zmm5, zmm8, zmm9, 221 + vmovups zmm8, zmmword ptr [r8+rdx-20H] + vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-2H*10H], 01H + vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-2H*10H], 02H + vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-2H*10H], 03H + vmovups zmm9, zmmword ptr [r8+rdx-10H] + vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-1H*10H], 01H + vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-1H*10H], 02H + vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-1H*10H], 03H + vshufps zmm6, zmm8, zmm9, 136 + vshufps zmm7, zmm8, zmm9, 221 + vpshufd zmm6, zmm6, 93H + vpshufd zmm7, zmm7, 93H + mov al, 7 +roundloop4: + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 16 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 12 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 8 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 7 + vpshufd zmm0, zmm0, 93H + vpshufd zmm3, zmm3, 4EH + vpshufd zmm2, zmm2, 39H + vpaddd zmm0, zmm0, zmm6 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 16 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 12 + vpaddd zmm0, zmm0, zmm7 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 8 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 7 + vpshufd zmm0, zmm0, 39H + vpshufd zmm3, zmm3, 4EH + vpshufd zmm2, zmm2, 93H + dec al + jz endroundloop4 + vshufps zmm8, zmm4, zmm5, 214 + vpshufd zmm9, zmm4, 0FH + vpshufd zmm4, zmm8, 39H + vshufps zmm8, zmm6, zmm7, 250 + vpblendmd zmm9 {k3}, zmm9, zmm8 + vpunpcklqdq zmm8, zmm7, zmm5 + vpblendmd zmm8 {k4}, zmm8, zmm6 + vpshufd zmm8, zmm8, 78H + vpunpckhdq zmm5, zmm5, zmm7 + vpunpckldq zmm6, zmm6, zmm5 + vpshufd zmm7, zmm6, 1EH + vmovdqa32 zmm5, zmm9 + vmovdqa32 zmm6, zmm8 + jmp roundloop4 +endroundloop4: + vpxord zmm0, zmm0, zmm2 + vpxord zmm1, zmm1, zmm3 + mov eax, r13d + cmp rdx, r15 + jne innerloop4 + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+10H], xmm1 + vextracti128 xmmword ptr [rbx+20H], ymm0, 01H + vextracti128 xmmword ptr [rbx+30H], ymm1, 01H + vextracti32x4 xmmword ptr [rbx+4H*10H], zmm0, 02H + vextracti32x4 xmmword ptr [rbx+5H*10H], zmm1, 02H + vextracti32x4 xmmword ptr [rbx+6H*10H], zmm0, 03H + vextracti32x4 xmmword ptr [rbx+7H*10H], zmm1, 03H + vmovdqa xmm0, xmmword ptr [rsp] + vmovdqa xmm2, xmmword ptr [rsp+40H] + vmovdqa32 xmm0 {k1}, xmmword ptr [rsp+1H*10H] + vmovdqa32 xmm2 {k1}, xmmword ptr [rsp+5H*10H] + vmovdqa xmmword ptr [rsp], xmm0 + vmovdqa xmmword ptr [rsp+40H], xmm2 + add rbx, 128 + add rdi, 32 + sub rsi, 4 +final3blocks: + test esi, 2H + je final1block + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+10H] + vmovd xmm13, dword ptr [rsp] + vpinsrd xmm13, xmm13, dword ptr [rsp+40H], 1 + vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2 + vmovd xmm14, dword ptr [rsp+4H] + vpinsrd xmm14, xmm14, dword ptr [rsp+44H], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2 + vinserti128 ymm13, ymm13, xmm14, 01H + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +ALIGN 16 +innerloop2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+88H], eax + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV] + vpbroadcastd ymm8, dword ptr [rsp+88H] + vpblendd ymm3, ymm13, ymm8, 88H + vmovups ymm8, ymmword ptr [r8+rdx-40H] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-40H], 01H + vmovups ymm9, ymmword ptr [r8+rdx-30H] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-30H], 01H + vshufps ymm4, ymm8, ymm9, 136 + vshufps ymm5, ymm8, ymm9, 221 + vmovups ymm8, ymmword ptr [r8+rdx-20H] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-20H], 01H + vmovups ymm9, ymmword ptr [r8+rdx-10H] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-10H], 01H + vshufps ymm6, ymm8, ymm9, 136 + vshufps ymm7, ymm8, ymm9, 221 + vpshufd ymm6, ymm6, 93H + vpshufd ymm7, ymm7, 93H + mov al, 7 +roundloop2: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 16 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 12 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 8 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 7 + vpshufd ymm0, ymm0, 93H + vpshufd ymm3, ymm3, 4EH + vpshufd ymm2, ymm2, 39H + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 16 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 12 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 8 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 7 + vpshufd ymm0, ymm0, 39H + vpshufd ymm3, ymm3, 4EH + vpshufd ymm2, ymm2, 93H + dec al + jz endroundloop2 + vshufps ymm8, ymm4, ymm5, 214 + vpshufd ymm9, ymm4, 0FH + vpshufd ymm4, ymm8, 39H + vshufps ymm8, ymm6, ymm7, 250 + vpblendd ymm9, ymm9, ymm8, 0AAH + vpunpcklqdq ymm8, ymm7, ymm5 + vpblendd ymm8, ymm8, ymm6, 88H + vpshufd ymm8, ymm8, 78H + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 1EH + vmovdqa ymm5, ymm9 + vmovdqa ymm6, ymm8 + jmp roundloop2 +endroundloop2: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + mov eax, r13d + cmp rdx, r15 + jne innerloop2 + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+10H], xmm1 + vextracti128 xmmword ptr [rbx+20H], ymm0, 01H + vextracti128 xmmword ptr [rbx+30H], ymm1, 01H + vmovdqa xmm0, xmmword ptr [rsp] + vmovdqa xmm2, xmmword ptr [rsp+40H] + vmovdqu32 xmm0 {k1}, xmmword ptr [rsp+8H] + vmovdqu32 xmm2 {k1}, xmmword ptr [rsp+48H] + vmovdqa xmmword ptr [rsp], xmm0 + vmovdqa xmmword ptr [rsp+40H], xmm2 + add rbx, 64 + add rdi, 16 + sub rsi, 2 +final1block: + test esi, 1H + je unwind + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+10H] + vmovd xmm14, dword ptr [rsp] + vpinsrd xmm14, xmm14, dword ptr [rsp+40H], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2 + vmovdqa xmm15, xmmword ptr [BLAKE3_IV] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +ALIGN 16 +innerloop1: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + vpinsrd xmm3, xmm14, eax, 3 + vmovdqa xmm2, xmm15 + vmovups xmm8, xmmword ptr [r8+rdx-40H] + vmovups xmm9, xmmword ptr [r8+rdx-30H] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [r8+rdx-20H] + vmovups xmm9, xmmword ptr [r8+rdx-10H] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 93H + vpshufd xmm7, xmm7, 93H + mov al, 7 +roundloop1: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 93H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 39H + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 39H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 93H + dec al + jz endroundloop1 + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0FH + vpshufd xmm4, xmm8, 39H + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0AAH + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 88H + vpshufd xmm8, xmm8, 78H + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 1EH + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp roundloop1 +endroundloop1: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne innerloop1 + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+10H], xmm1 + jmp unwind + +_blake3_hash_many_avx512 ENDP +blake3_hash_many_avx512 ENDP + +ALIGN 16 +blake3_compress_in_place_avx512 PROC +_blake3_compress_in_place_avx512 PROC + sub rsp, 72 + vmovdqa xmmword ptr [rsp], xmm6 + vmovdqa xmmword ptr [rsp+10H], xmm7 + vmovdqa xmmword ptr [rsp+20H], xmm8 + vmovdqa xmmword ptr [rsp+30H], xmm9 + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+10H] + movzx eax, byte ptr [rsp+70H] + movzx r8d, r8b + shl rax, 32 + add r8, rax + vmovq xmm3, r9 + vmovq xmm4, r8 + vpunpcklqdq xmm3, xmm3, xmm4 + vmovaps xmm2, xmmword ptr [BLAKE3_IV] + vmovups xmm8, xmmword ptr [rdx] + vmovups xmm9, xmmword ptr [rdx+10H] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [rdx+20H] + vmovups xmm9, xmmword ptr [rdx+30H] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 93H + vpshufd xmm7, xmm7, 93H + mov al, 7 +@@: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 93H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 39H + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 39H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 93H + dec al + jz @F + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0FH + vpshufd xmm4, xmm8, 39H + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0AAH + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 88H + vpshufd xmm8, xmm8, 78H + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 1EH + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp @B +@@: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + vmovdqu xmmword ptr [rcx], xmm0 + vmovdqu xmmword ptr [rcx+10H], xmm1 + vmovdqa xmm6, xmmword ptr [rsp] + vmovdqa xmm7, xmmword ptr [rsp+10H] + vmovdqa xmm8, xmmword ptr [rsp+20H] + vmovdqa xmm9, xmmword ptr [rsp+30H] + add rsp, 72 + ret +_blake3_compress_in_place_avx512 ENDP +blake3_compress_in_place_avx512 ENDP + +ALIGN 16 +blake3_compress_xof_avx512 PROC +_blake3_compress_xof_avx512 PROC + sub rsp, 72 + vmovdqa xmmword ptr [rsp], xmm6 + vmovdqa xmmword ptr [rsp+10H], xmm7 + vmovdqa xmmword ptr [rsp+20H], xmm8 + vmovdqa xmmword ptr [rsp+30H], xmm9 + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+10H] + movzx eax, byte ptr [rsp+70H] + movzx r8d, r8b + mov r10, qword ptr [rsp+78H] + shl rax, 32 + add r8, rax + vmovq xmm3, r9 + vmovq xmm4, r8 + vpunpcklqdq xmm3, xmm3, xmm4 + vmovaps xmm2, xmmword ptr [BLAKE3_IV] + vmovups xmm8, xmmword ptr [rdx] + vmovups xmm9, xmmword ptr [rdx+10H] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [rdx+20H] + vmovups xmm9, xmmword ptr [rdx+30H] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 93H + vpshufd xmm7, xmm7, 93H + mov al, 7 +@@: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 93H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 39H + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 39H + vpshufd xmm3, xmm3, 4EH + vpshufd xmm2, xmm2, 93H + dec al + jz @F + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0FH + vpshufd xmm4, xmm8, 39H + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0AAH + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 88H + vpshufd xmm8, xmm8, 78H + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 1EH + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp @B +@@: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + vpxor xmm2, xmm2, xmmword ptr [rcx] + vpxor xmm3, xmm3, xmmword ptr [rcx+10H] + vmovdqu xmmword ptr [r10], xmm0 + vmovdqu xmmword ptr [r10+10H], xmm1 + vmovdqu xmmword ptr [r10+20H], xmm2 + vmovdqu xmmword ptr [r10+30H], xmm3 + vmovdqa xmm6, xmmword ptr [rsp] + vmovdqa xmm7, xmmword ptr [rsp+10H] + vmovdqa xmm8, xmmword ptr [rsp+20H] + vmovdqa xmm9, xmmword ptr [rsp+30H] + add rsp, 72 + ret +_blake3_compress_xof_avx512 ENDP +blake3_compress_xof_avx512 ENDP + +_TEXT ENDS + +_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST' +ALIGN 64 +INDEX0: + dd 0, 1, 2, 3, 16, 17, 18, 19 + dd 8, 9, 10, 11, 24, 25, 26, 27 +INDEX1: + dd 4, 5, 6, 7, 20, 21, 22, 23 + dd 12, 13, 14, 15, 28, 29, 30, 31 +ADD0: + dd 0, 1, 2, 3, 4, 5, 6, 7 + dd 8, 9, 10, 11, 12, 13, 14, 15 +ADD1: + dd 1 +ADD16: + dd 16 +BLAKE3_BLOCK_LEN: + dd 64 +ALIGN 64 +BLAKE3_IV: +BLAKE3_IV_0: + dd 06A09E667H +BLAKE3_IV_1: + dd 0BB67AE85H +BLAKE3_IV_2: + dd 03C6EF372H +BLAKE3_IV_3: + dd 0A54FF53AH + +_RDATA ENDS +END diff --git a/llvm/lib/Support/BLAKE3/blake3_dispatch.c b/llvm/lib/Support/BLAKE3/blake3_dispatch.c new file mode 100644 index 000000000000..e96e714225f4 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_dispatch.c @@ -0,0 +1,277 @@ +#include +#include +#include + +#include "blake3_impl.h" + +#if defined(IS_X86) +#if defined(_MSC_VER) +#include +#elif defined(__GNUC__) +#include +#else +#error "Unimplemented!" +#endif +#endif + +#define MAYBE_UNUSED(x) (void)((x)) + +#if defined(IS_X86) +static uint64_t xgetbv(void) { +#if defined(_MSC_VER) + return _xgetbv(0); +#else + uint32_t eax = 0, edx = 0; + __asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0)); + return ((uint64_t)edx << 32) | eax; +#endif +} + +static void cpuid(uint32_t out[4], uint32_t id) { +#if defined(_MSC_VER) + __cpuid((int *)out, id); +#elif defined(__i386__) || defined(_M_IX86) + __asm__ __volatile__("movl %%ebx, %1\n" + "cpuid\n" + "xchgl %1, %%ebx\n" + : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]) + : "a"(id)); +#else + __asm__ __volatile__("cpuid\n" + : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) + : "a"(id)); +#endif +} + +static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) { +#if defined(_MSC_VER) + __cpuidex((int *)out, id, sid); +#elif defined(__i386__) || defined(_M_IX86) + __asm__ __volatile__("movl %%ebx, %1\n" + "cpuid\n" + "xchgl %1, %%ebx\n" + : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]) + : "a"(id), "c"(sid)); +#else + __asm__ __volatile__("cpuid\n" + : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) + : "a"(id), "c"(sid)); +#endif +} + +#endif + +enum cpu_feature { + SSE2 = 1 << 0, + SSSE3 = 1 << 1, + SSE41 = 1 << 2, + AVX = 1 << 3, + AVX2 = 1 << 4, + AVX512F = 1 << 5, + AVX512VL = 1 << 6, + /* ... */ + UNDEFINED = 1 << 30 +}; + +#if !defined(BLAKE3_TESTING) +static /* Allow the variable to be controlled manually for testing */ +#endif + enum cpu_feature g_cpu_features = UNDEFINED; + +LLVM_ATTRIBUTE_USED +#if !defined(BLAKE3_TESTING) +static +#endif + enum cpu_feature + get_cpu_features(void) { + + if (g_cpu_features != UNDEFINED) { + return g_cpu_features; + } else { +#if defined(IS_X86) + uint32_t regs[4] = {0}; + uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3]; + (void)edx; + enum cpu_feature features = 0; + cpuid(regs, 0); + const int max_id = *eax; + cpuid(regs, 1); +#if defined(__amd64__) || defined(_M_X64) + features |= SSE2; +#else + if (*edx & (1UL << 26)) + features |= SSE2; +#endif + if (*ecx & (1UL << 0)) + features |= SSSE3; + if (*ecx & (1UL << 19)) + features |= SSE41; + + if (*ecx & (1UL << 27)) { // OSXSAVE + const uint64_t mask = xgetbv(); + if ((mask & 6) == 6) { // SSE and AVX states + if (*ecx & (1UL << 28)) + features |= AVX; + if (max_id >= 7) { + cpuidex(regs, 7, 0); + if (*ebx & (1UL << 5)) + features |= AVX2; + if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm + if (*ebx & (1UL << 31)) + features |= AVX512VL; + if (*ebx & (1UL << 16)) + features |= AVX512F; + } + } + } + } + g_cpu_features = features; + return features; +#else + /* How to detect NEON? */ + return 0; +#endif + } +} + +void blake3_compress_in_place(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags) { +#if defined(IS_X86) + const enum cpu_feature features = get_cpu_features(); + MAYBE_UNUSED(features); +#if !defined(BLAKE3_NO_AVX512) + if (features & AVX512VL) { + blake3_compress_in_place_avx512(cv, block, block_len, counter, flags); + return; + } +#endif +#if !defined(BLAKE3_NO_SSE41) + if (features & SSE41) { + blake3_compress_in_place_sse41(cv, block, block_len, counter, flags); + return; + } +#endif +#if !defined(BLAKE3_NO_SSE2) + if (features & SSE2) { + blake3_compress_in_place_sse2(cv, block, block_len, counter, flags); + return; + } +#endif +#endif + blake3_compress_in_place_portable(cv, block, block_len, counter, flags); +} + +void blake3_compress_xof(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, uint8_t flags, + uint8_t out[64]) { +#if defined(IS_X86) + const enum cpu_feature features = get_cpu_features(); + MAYBE_UNUSED(features); +#if !defined(BLAKE3_NO_AVX512) + if (features & AVX512VL) { + blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out); + return; + } +#endif +#if !defined(BLAKE3_NO_SSE41) + if (features & SSE41) { + blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out); + return; + } +#endif +#if !defined(BLAKE3_NO_SSE2) + if (features & SSE2) { + blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out); + return; + } +#endif +#endif + blake3_compress_xof_portable(cv, block, block_len, counter, flags, out); +} + +void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], uint64_t counter, + bool increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { +#if defined(IS_X86) + const enum cpu_feature features = get_cpu_features(); + MAYBE_UNUSED(features); +#if !defined(BLAKE3_NO_AVX512) + if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) { + blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, + out); + return; + } +#endif +#if !defined(BLAKE3_NO_AVX2) + if (features & AVX2) { + blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, + out); + return; + } +#endif +#if !defined(BLAKE3_NO_SSE41) + if (features & SSE41) { + blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, + out); + return; + } +#endif +#if !defined(BLAKE3_NO_SSE2) + if (features & SSE2) { + blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, + out); + return; + } +#endif +#endif + +#if BLAKE3_USE_NEON == 1 + blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, out); + return; +#endif + + blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, + out); +} + +// The dynamically detected SIMD degree of the current platform. +size_t blake3_simd_degree(void) { +#if defined(IS_X86) + const enum cpu_feature features = get_cpu_features(); + MAYBE_UNUSED(features); +#if !defined(BLAKE3_NO_AVX512) + if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) { + return 16; + } +#endif +#if !defined(BLAKE3_NO_AVX2) + if (features & AVX2) { + return 8; + } +#endif +#if !defined(BLAKE3_NO_SSE41) + if (features & SSE41) { + return 4; + } +#endif +#if !defined(BLAKE3_NO_SSE2) + if (features & SSE2) { + return 4; + } +#endif +#endif +#if BLAKE3_USE_NEON == 1 + return 4; +#endif + return 1; +} diff --git a/llvm/lib/Support/BLAKE3/blake3_impl.h b/llvm/lib/Support/BLAKE3/blake3_impl.h new file mode 100644 index 000000000000..180d0a6eeda8 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_impl.h @@ -0,0 +1,312 @@ +#ifndef BLAKE3_IMPL_H +#define BLAKE3_IMPL_H + +#include +#include +#include +#include +#include + +#include "llvm-c/blake3.h" +// For \p LLVM_LIBRARY_VISIBILITY +#include "llvm/Support/Compiler.h" + +// Remove the 'llvm_' prefix for the rest of the internal implementation. +#define BLAKE3_VERSION_STRING LLVM_BLAKE3_VERSION_STRING +#define BLAKE3_KEY_LEN LLVM_BLAKE3_KEY_LEN +#define BLAKE3_OUT_LEN LLVM_BLAKE3_OUT_LEN +#define BLAKE3_BLOCK_LEN LLVM_BLAKE3_BLOCK_LEN +#define BLAKE3_CHUNK_LEN LLVM_BLAKE3_CHUNK_LEN +#define BLAKE3_MAX_DEPTH LLVM_BLAKE3_MAX_DEPTH +#define blake3_hasher llvm_blake3_hasher +#define blake3_chunk_state llvm_blake3_chunk_state + +// internal flags +enum blake3_flags { + CHUNK_START = 1 << 0, + CHUNK_END = 1 << 1, + PARENT = 1 << 2, + ROOT = 1 << 3, + KEYED_HASH = 1 << 4, + DERIVE_KEY_CONTEXT = 1 << 5, + DERIVE_KEY_MATERIAL = 1 << 6, +}; + +// This C implementation tries to support recent versions of GCC, Clang, and +// MSVC. +#if defined(_MSC_VER) +#define INLINE static __forceinline +#else +#define INLINE static inline __attribute__((always_inline)) +#endif + +#if defined(__x86_64__) || defined(_M_X64) +#define IS_X86 +#define IS_X86_64 +#endif + +#if defined(__i386__) || defined(_M_IX86) +#define IS_X86 +#define IS_X86_32 +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#define IS_AARCH64 +#endif + +#if defined(IS_X86) +#if defined(_MSC_VER) +#include +#endif +#include +#endif + +#if !defined(BLAKE3_USE_NEON) + // If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness + #if defined(IS_AARCH64) + #define BLAKE3_USE_NEON 1 + #else + #define BLAKE3_USE_NEON 0 + #endif +#endif + +#if defined(IS_X86) +#define MAX_SIMD_DEGREE 16 +#elif BLAKE3_USE_NEON == 1 +#define MAX_SIMD_DEGREE 4 +#else +#define MAX_SIMD_DEGREE 1 +#endif + +// There are some places where we want a static size that's equal to the +// MAX_SIMD_DEGREE, but also at least 2. +#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2) + +static const uint32_t IV[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, + 0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL, + 0x1F83D9ABUL, 0x5BE0CD19UL}; + +static const uint8_t MSG_SCHEDULE[7][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8}, + {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1}, + {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6}, + {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4}, + {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7}, + {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13}, +}; + +/* Find index of the highest set bit */ +/* x is assumed to be nonzero. */ +static unsigned int highest_one(uint64_t x) { +#if defined(__GNUC__) || defined(__clang__) + return 63 ^ __builtin_clzll(x); +#elif defined(_MSC_VER) && defined(IS_X86_64) + unsigned long index; + _BitScanReverse64(&index, x); + return index; +#elif defined(_MSC_VER) && defined(IS_X86_32) + if(x >> 32) { + unsigned long index; + _BitScanReverse(&index, (unsigned long)(x >> 32)); + return 32 + index; + } else { + unsigned long index; + _BitScanReverse(&index, (unsigned long)x); + return index; + } +#else + unsigned int c = 0; + if(x & 0xffffffff00000000ULL) { x >>= 32; c += 32; } + if(x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; } + if(x & 0x000000000000ff00ULL) { x >>= 8; c += 8; } + if(x & 0x00000000000000f0ULL) { x >>= 4; c += 4; } + if(x & 0x000000000000000cULL) { x >>= 2; c += 2; } + if(x & 0x0000000000000002ULL) { c += 1; } + return c; +#endif +} + +// Count the number of 1 bits. +INLINE unsigned int popcnt(uint64_t x) { +#if defined(__GNUC__) || defined(__clang__) + return __builtin_popcountll(x); +#else + unsigned int count = 0; + while (x != 0) { + count += 1; + x &= x - 1; + } + return count; +#endif +} + +// Largest power of two less than or equal to x. As a special case, returns 1 +// when x is 0. +INLINE uint64_t round_down_to_power_of_2(uint64_t x) { + return 1ULL << highest_one(x | 1); +} + +INLINE uint32_t counter_low(uint64_t counter) { return (uint32_t)counter; } + +INLINE uint32_t counter_high(uint64_t counter) { + return (uint32_t)(counter >> 32); +} + +INLINE uint32_t load32(const void *src) { + const uint8_t *p = (const uint8_t *)src; + return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) | + ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24); +} + +INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN], + uint32_t key_words[8]) { + key_words[0] = load32(&key[0 * 4]); + key_words[1] = load32(&key[1 * 4]); + key_words[2] = load32(&key[2 * 4]); + key_words[3] = load32(&key[3 * 4]); + key_words[4] = load32(&key[4 * 4]); + key_words[5] = load32(&key[5 * 4]); + key_words[6] = load32(&key[6 * 4]); + key_words[7] = load32(&key[7 * 4]); +} + +INLINE void store32(void *dst, uint32_t w) { + uint8_t *p = (uint8_t *)dst; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); +} + +INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) { + store32(&bytes_out[0 * 4], cv_words[0]); + store32(&bytes_out[1 * 4], cv_words[1]); + store32(&bytes_out[2 * 4], cv_words[2]); + store32(&bytes_out[3 * 4], cv_words[3]); + store32(&bytes_out[4 * 4], cv_words[4]); + store32(&bytes_out[5 * 4], cv_words[5]); + store32(&bytes_out[6 * 4], cv_words[6]); + store32(&bytes_out[7 * 4], cv_words[7]); +} + +LLVM_LIBRARY_VISIBILITY +void blake3_compress_in_place(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags); + +LLVM_LIBRARY_VISIBILITY +void blake3_compress_xof(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, uint8_t flags, + uint8_t out[64]); + +LLVM_LIBRARY_VISIBILITY +void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], uint64_t counter, + bool increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out); + +LLVM_LIBRARY_VISIBILITY +size_t blake3_simd_degree(void); + + +// Declarations for implementation-specific functions. +LLVM_LIBRARY_VISIBILITY +void blake3_compress_in_place_portable(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags); + +LLVM_LIBRARY_VISIBILITY +void blake3_compress_xof_portable(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags, uint8_t out[64]); + +LLVM_LIBRARY_VISIBILITY +void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out); + +#if defined(IS_X86) +#if !defined(BLAKE3_NO_SSE2) +LLVM_LIBRARY_VISIBILITY +void blake3_compress_in_place_sse2(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags); +LLVM_LIBRARY_VISIBILITY +void blake3_compress_xof_sse2(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags, uint8_t out[64]); +LLVM_LIBRARY_VISIBILITY +void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out); +#endif +#if !defined(BLAKE3_NO_SSE41) +LLVM_LIBRARY_VISIBILITY +void blake3_compress_in_place_sse41(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags); +LLVM_LIBRARY_VISIBILITY +void blake3_compress_xof_sse41(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags, uint8_t out[64]); +LLVM_LIBRARY_VISIBILITY +void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out); +#endif +#if !defined(BLAKE3_NO_AVX2) +LLVM_LIBRARY_VISIBILITY +void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out); +#endif +#if !defined(BLAKE3_NO_AVX512) +LLVM_LIBRARY_VISIBILITY +void blake3_compress_in_place_avx512(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags); + +LLVM_LIBRARY_VISIBILITY +void blake3_compress_xof_avx512(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags, uint8_t out[64]); + +LLVM_LIBRARY_VISIBILITY +void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out); +#endif +#endif + +#if BLAKE3_USE_NEON == 1 +LLVM_LIBRARY_VISIBILITY +void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out); +#endif + + +#endif /* BLAKE3_IMPL_H */ diff --git a/llvm/lib/Support/BLAKE3/blake3_neon.c b/llvm/lib/Support/BLAKE3/blake3_neon.c new file mode 100644 index 000000000000..380bbfc3e466 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_neon.c @@ -0,0 +1,356 @@ +#include "blake3_impl.h" + +#if BLAKE3_USE_NEON + +#include + +#ifdef __ARM_BIG_ENDIAN +#error "This implementation only supports little-endian ARM." +// It might be that all we need for big-endian support here is to get the loads +// and stores right, but step zero would be finding a way to test it in CI. +#endif + +INLINE uint32x4_t loadu_128(const uint8_t src[16]) { + // vld1q_u32 has alignment requirements. Don't use it. + uint32x4_t x; + memcpy(&x, src, 16); + return x; +} + +INLINE void storeu_128(uint32x4_t src, uint8_t dest[16]) { + // vst1q_u32 has alignment requirements. Don't use it. + memcpy(dest, &src, 16); +} + +INLINE uint32x4_t add_128(uint32x4_t a, uint32x4_t b) { + return vaddq_u32(a, b); +} + +INLINE uint32x4_t xor_128(uint32x4_t a, uint32x4_t b) { + return veorq_u32(a, b); +} + +INLINE uint32x4_t set1_128(uint32_t x) { return vld1q_dup_u32(&x); } + +INLINE uint32x4_t set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t array[4] = {a, b, c, d}; + return vld1q_u32(array); +} + +INLINE uint32x4_t rot16_128(uint32x4_t x) { + return vorrq_u32(vshrq_n_u32(x, 16), vshlq_n_u32(x, 32 - 16)); +} + +INLINE uint32x4_t rot12_128(uint32x4_t x) { + return vorrq_u32(vshrq_n_u32(x, 12), vshlq_n_u32(x, 32 - 12)); +} + +INLINE uint32x4_t rot8_128(uint32x4_t x) { + return vorrq_u32(vshrq_n_u32(x, 8), vshlq_n_u32(x, 32 - 8)); +} + +INLINE uint32x4_t rot7_128(uint32x4_t x) { + return vorrq_u32(vshrq_n_u32(x, 7), vshlq_n_u32(x, 32 - 7)); +} + +// TODO: compress_neon + +// TODO: hash2_neon + +/* + * ---------------------------------------------------------------------------- + * hash4_neon + * ---------------------------------------------------------------------------- + */ + +INLINE void round_fn4(uint32x4_t v[16], uint32x4_t m[16], size_t r) { + v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][0]]); + v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][2]]); + v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][4]]); + v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][6]]); + v[0] = add_128(v[0], v[4]); + v[1] = add_128(v[1], v[5]); + v[2] = add_128(v[2], v[6]); + v[3] = add_128(v[3], v[7]); + v[12] = xor_128(v[12], v[0]); + v[13] = xor_128(v[13], v[1]); + v[14] = xor_128(v[14], v[2]); + v[15] = xor_128(v[15], v[3]); + v[12] = rot16_128(v[12]); + v[13] = rot16_128(v[13]); + v[14] = rot16_128(v[14]); + v[15] = rot16_128(v[15]); + v[8] = add_128(v[8], v[12]); + v[9] = add_128(v[9], v[13]); + v[10] = add_128(v[10], v[14]); + v[11] = add_128(v[11], v[15]); + v[4] = xor_128(v[4], v[8]); + v[5] = xor_128(v[5], v[9]); + v[6] = xor_128(v[6], v[10]); + v[7] = xor_128(v[7], v[11]); + v[4] = rot12_128(v[4]); + v[5] = rot12_128(v[5]); + v[6] = rot12_128(v[6]); + v[7] = rot12_128(v[7]); + v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][1]]); + v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][3]]); + v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][5]]); + v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][7]]); + v[0] = add_128(v[0], v[4]); + v[1] = add_128(v[1], v[5]); + v[2] = add_128(v[2], v[6]); + v[3] = add_128(v[3], v[7]); + v[12] = xor_128(v[12], v[0]); + v[13] = xor_128(v[13], v[1]); + v[14] = xor_128(v[14], v[2]); + v[15] = xor_128(v[15], v[3]); + v[12] = rot8_128(v[12]); + v[13] = rot8_128(v[13]); + v[14] = rot8_128(v[14]); + v[15] = rot8_128(v[15]); + v[8] = add_128(v[8], v[12]); + v[9] = add_128(v[9], v[13]); + v[10] = add_128(v[10], v[14]); + v[11] = add_128(v[11], v[15]); + v[4] = xor_128(v[4], v[8]); + v[5] = xor_128(v[5], v[9]); + v[6] = xor_128(v[6], v[10]); + v[7] = xor_128(v[7], v[11]); + v[4] = rot7_128(v[4]); + v[5] = rot7_128(v[5]); + v[6] = rot7_128(v[6]); + v[7] = rot7_128(v[7]); + + v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][8]]); + v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][10]]); + v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][12]]); + v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][14]]); + v[0] = add_128(v[0], v[5]); + v[1] = add_128(v[1], v[6]); + v[2] = add_128(v[2], v[7]); + v[3] = add_128(v[3], v[4]); + v[15] = xor_128(v[15], v[0]); + v[12] = xor_128(v[12], v[1]); + v[13] = xor_128(v[13], v[2]); + v[14] = xor_128(v[14], v[3]); + v[15] = rot16_128(v[15]); + v[12] = rot16_128(v[12]); + v[13] = rot16_128(v[13]); + v[14] = rot16_128(v[14]); + v[10] = add_128(v[10], v[15]); + v[11] = add_128(v[11], v[12]); + v[8] = add_128(v[8], v[13]); + v[9] = add_128(v[9], v[14]); + v[5] = xor_128(v[5], v[10]); + v[6] = xor_128(v[6], v[11]); + v[7] = xor_128(v[7], v[8]); + v[4] = xor_128(v[4], v[9]); + v[5] = rot12_128(v[5]); + v[6] = rot12_128(v[6]); + v[7] = rot12_128(v[7]); + v[4] = rot12_128(v[4]); + v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][9]]); + v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][11]]); + v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][13]]); + v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][15]]); + v[0] = add_128(v[0], v[5]); + v[1] = add_128(v[1], v[6]); + v[2] = add_128(v[2], v[7]); + v[3] = add_128(v[3], v[4]); + v[15] = xor_128(v[15], v[0]); + v[12] = xor_128(v[12], v[1]); + v[13] = xor_128(v[13], v[2]); + v[14] = xor_128(v[14], v[3]); + v[15] = rot8_128(v[15]); + v[12] = rot8_128(v[12]); + v[13] = rot8_128(v[13]); + v[14] = rot8_128(v[14]); + v[10] = add_128(v[10], v[15]); + v[11] = add_128(v[11], v[12]); + v[8] = add_128(v[8], v[13]); + v[9] = add_128(v[9], v[14]); + v[5] = xor_128(v[5], v[10]); + v[6] = xor_128(v[6], v[11]); + v[7] = xor_128(v[7], v[8]); + v[4] = xor_128(v[4], v[9]); + v[5] = rot7_128(v[5]); + v[6] = rot7_128(v[6]); + v[7] = rot7_128(v[7]); + v[4] = rot7_128(v[4]); +} + +INLINE void transpose_vecs_128(uint32x4_t vecs[4]) { + // Individually transpose the four 2x2 sub-matrices in each corner. + uint32x4x2_t rows01 = vtrnq_u32(vecs[0], vecs[1]); + uint32x4x2_t rows23 = vtrnq_u32(vecs[2], vecs[3]); + + // Swap the top-right and bottom-left 2x2s (which just got transposed). + vecs[0] = + vcombine_u32(vget_low_u32(rows01.val[0]), vget_low_u32(rows23.val[0])); + vecs[1] = + vcombine_u32(vget_low_u32(rows01.val[1]), vget_low_u32(rows23.val[1])); + vecs[2] = + vcombine_u32(vget_high_u32(rows01.val[0]), vget_high_u32(rows23.val[0])); + vecs[3] = + vcombine_u32(vget_high_u32(rows01.val[1]), vget_high_u32(rows23.val[1])); +} + +INLINE void transpose_msg_vecs4(const uint8_t *const *inputs, + size_t block_offset, uint32x4_t out[16]) { + out[0] = loadu_128(&inputs[0][block_offset + 0 * sizeof(uint32x4_t)]); + out[1] = loadu_128(&inputs[1][block_offset + 0 * sizeof(uint32x4_t)]); + out[2] = loadu_128(&inputs[2][block_offset + 0 * sizeof(uint32x4_t)]); + out[3] = loadu_128(&inputs[3][block_offset + 0 * sizeof(uint32x4_t)]); + out[4] = loadu_128(&inputs[0][block_offset + 1 * sizeof(uint32x4_t)]); + out[5] = loadu_128(&inputs[1][block_offset + 1 * sizeof(uint32x4_t)]); + out[6] = loadu_128(&inputs[2][block_offset + 1 * sizeof(uint32x4_t)]); + out[7] = loadu_128(&inputs[3][block_offset + 1 * sizeof(uint32x4_t)]); + out[8] = loadu_128(&inputs[0][block_offset + 2 * sizeof(uint32x4_t)]); + out[9] = loadu_128(&inputs[1][block_offset + 2 * sizeof(uint32x4_t)]); + out[10] = loadu_128(&inputs[2][block_offset + 2 * sizeof(uint32x4_t)]); + out[11] = loadu_128(&inputs[3][block_offset + 2 * sizeof(uint32x4_t)]); + out[12] = loadu_128(&inputs[0][block_offset + 3 * sizeof(uint32x4_t)]); + out[13] = loadu_128(&inputs[1][block_offset + 3 * sizeof(uint32x4_t)]); + out[14] = loadu_128(&inputs[2][block_offset + 3 * sizeof(uint32x4_t)]); + out[15] = loadu_128(&inputs[3][block_offset + 3 * sizeof(uint32x4_t)]); + transpose_vecs_128(&out[0]); + transpose_vecs_128(&out[4]); + transpose_vecs_128(&out[8]); + transpose_vecs_128(&out[12]); +} + +INLINE void load_counters4(uint64_t counter, bool increment_counter, + uint32x4_t *out_low, uint32x4_t *out_high) { + uint64_t mask = (increment_counter ? ~0 : 0); + *out_low = set4( + counter_low(counter + (mask & 0)), counter_low(counter + (mask & 1)), + counter_low(counter + (mask & 2)), counter_low(counter + (mask & 3))); + *out_high = set4( + counter_high(counter + (mask & 0)), counter_high(counter + (mask & 1)), + counter_high(counter + (mask & 2)), counter_high(counter + (mask & 3))); +} + +static +void blake3_hash4_neon(const uint8_t *const *inputs, size_t blocks, + const uint32_t key[8], uint64_t counter, + bool increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + uint32x4_t h_vecs[8] = { + set1_128(key[0]), set1_128(key[1]), set1_128(key[2]), set1_128(key[3]), + set1_128(key[4]), set1_128(key[5]), set1_128(key[6]), set1_128(key[7]), + }; + uint32x4_t counter_low_vec, counter_high_vec; + load_counters4(counter, increment_counter, &counter_low_vec, + &counter_high_vec); + uint8_t block_flags = flags | flags_start; + + for (size_t block = 0; block < blocks; block++) { + if (block + 1 == blocks) { + block_flags |= flags_end; + } + uint32x4_t block_len_vec = set1_128(BLAKE3_BLOCK_LEN); + uint32x4_t block_flags_vec = set1_128(block_flags); + uint32x4_t msg_vecs[16]; + transpose_msg_vecs4(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs); + + uint32x4_t v[16] = { + h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3], + h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7], + set1_128(IV[0]), set1_128(IV[1]), set1_128(IV[2]), set1_128(IV[3]), + counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec, + }; + round_fn4(v, msg_vecs, 0); + round_fn4(v, msg_vecs, 1); + round_fn4(v, msg_vecs, 2); + round_fn4(v, msg_vecs, 3); + round_fn4(v, msg_vecs, 4); + round_fn4(v, msg_vecs, 5); + round_fn4(v, msg_vecs, 6); + h_vecs[0] = xor_128(v[0], v[8]); + h_vecs[1] = xor_128(v[1], v[9]); + h_vecs[2] = xor_128(v[2], v[10]); + h_vecs[3] = xor_128(v[3], v[11]); + h_vecs[4] = xor_128(v[4], v[12]); + h_vecs[5] = xor_128(v[5], v[13]); + h_vecs[6] = xor_128(v[6], v[14]); + h_vecs[7] = xor_128(v[7], v[15]); + + block_flags = flags; + } + + transpose_vecs_128(&h_vecs[0]); + transpose_vecs_128(&h_vecs[4]); + // The first four vecs now contain the first half of each output, and the + // second four vecs contain the second half of each output. + storeu_128(h_vecs[0], &out[0 * sizeof(uint32x4_t)]); + storeu_128(h_vecs[4], &out[1 * sizeof(uint32x4_t)]); + storeu_128(h_vecs[1], &out[2 * sizeof(uint32x4_t)]); + storeu_128(h_vecs[5], &out[3 * sizeof(uint32x4_t)]); + storeu_128(h_vecs[2], &out[4 * sizeof(uint32x4_t)]); + storeu_128(h_vecs[6], &out[5 * sizeof(uint32x4_t)]); + storeu_128(h_vecs[3], &out[6 * sizeof(uint32x4_t)]); + storeu_128(h_vecs[7], &out[7 * sizeof(uint32x4_t)]); +} + +/* + * ---------------------------------------------------------------------------- + * hash_many_neon + * ---------------------------------------------------------------------------- + */ + +void blake3_compress_in_place_portable(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags); + +INLINE void hash_one_neon(const uint8_t *input, size_t blocks, + const uint32_t key[8], uint64_t counter, + uint8_t flags, uint8_t flags_start, uint8_t flags_end, + uint8_t out[BLAKE3_OUT_LEN]) { + uint32_t cv[8]; + memcpy(cv, key, BLAKE3_KEY_LEN); + uint8_t block_flags = flags | flags_start; + while (blocks > 0) { + if (blocks == 1) { + block_flags |= flags_end; + } + // TODO: Implement compress_neon. However note that according to + // https://github.com/BLAKE2/BLAKE2/commit/7965d3e6e1b4193438b8d3a656787587d2579227, + // compress_neon might not be any faster than compress_portable. + blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter, + block_flags); + input = &input[BLAKE3_BLOCK_LEN]; + blocks -= 1; + block_flags = flags; + } + memcpy(out, cv, BLAKE3_OUT_LEN); +} + +void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out) { + while (num_inputs >= 4) { + blake3_hash4_neon(inputs, blocks, key, counter, increment_counter, flags, + flags_start, flags_end, out); + if (increment_counter) { + counter += 4; + } + inputs += 4; + num_inputs -= 4; + out = &out[4 * BLAKE3_OUT_LEN]; + } + while (num_inputs > 0) { + hash_one_neon(inputs[0], blocks, key, counter, flags, flags_start, + flags_end, out); + if (increment_counter) { + counter += 1; + } + inputs += 1; + num_inputs -= 1; + out = &out[BLAKE3_OUT_LEN]; + } +} + +#endif // BLAKE3_USE_NEON diff --git a/llvm/lib/Support/BLAKE3/blake3_portable.c b/llvm/lib/Support/BLAKE3/blake3_portable.c new file mode 100644 index 000000000000..062dd1b47fb6 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_portable.c @@ -0,0 +1,160 @@ +#include "blake3_impl.h" +#include + +INLINE uint32_t rotr32(uint32_t w, uint32_t c) { + return (w >> c) | (w << (32 - c)); +} + +INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d, + uint32_t x, uint32_t y) { + state[a] = state[a] + state[b] + x; + state[d] = rotr32(state[d] ^ state[a], 16); + state[c] = state[c] + state[d]; + state[b] = rotr32(state[b] ^ state[c], 12); + state[a] = state[a] + state[b] + y; + state[d] = rotr32(state[d] ^ state[a], 8); + state[c] = state[c] + state[d]; + state[b] = rotr32(state[b] ^ state[c], 7); +} + +INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) { + // Select the message schedule based on the round. + const uint8_t *schedule = MSG_SCHEDULE[round]; + + // Mix the columns. + g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]); + g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]); + g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]); + g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]); + + // Mix the rows. + g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]); + g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]); + g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]); + g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]); +} + +INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, uint8_t flags) { + uint32_t block_words[16]; + block_words[0] = load32(block + 4 * 0); + block_words[1] = load32(block + 4 * 1); + block_words[2] = load32(block + 4 * 2); + block_words[3] = load32(block + 4 * 3); + block_words[4] = load32(block + 4 * 4); + block_words[5] = load32(block + 4 * 5); + block_words[6] = load32(block + 4 * 6); + block_words[7] = load32(block + 4 * 7); + block_words[8] = load32(block + 4 * 8); + block_words[9] = load32(block + 4 * 9); + block_words[10] = load32(block + 4 * 10); + block_words[11] = load32(block + 4 * 11); + block_words[12] = load32(block + 4 * 12); + block_words[13] = load32(block + 4 * 13); + block_words[14] = load32(block + 4 * 14); + block_words[15] = load32(block + 4 * 15); + + state[0] = cv[0]; + state[1] = cv[1]; + state[2] = cv[2]; + state[3] = cv[3]; + state[4] = cv[4]; + state[5] = cv[5]; + state[6] = cv[6]; + state[7] = cv[7]; + state[8] = IV[0]; + state[9] = IV[1]; + state[10] = IV[2]; + state[11] = IV[3]; + state[12] = counter_low(counter); + state[13] = counter_high(counter); + state[14] = (uint32_t)block_len; + state[15] = (uint32_t)flags; + + round_fn(state, &block_words[0], 0); + round_fn(state, &block_words[0], 1); + round_fn(state, &block_words[0], 2); + round_fn(state, &block_words[0], 3); + round_fn(state, &block_words[0], 4); + round_fn(state, &block_words[0], 5); + round_fn(state, &block_words[0], 6); +} + +void blake3_compress_in_place_portable(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags) { + uint32_t state[16]; + compress_pre(state, cv, block, block_len, counter, flags); + cv[0] = state[0] ^ state[8]; + cv[1] = state[1] ^ state[9]; + cv[2] = state[2] ^ state[10]; + cv[3] = state[3] ^ state[11]; + cv[4] = state[4] ^ state[12]; + cv[5] = state[5] ^ state[13]; + cv[6] = state[6] ^ state[14]; + cv[7] = state[7] ^ state[15]; +} + +void blake3_compress_xof_portable(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags, uint8_t out[64]) { + uint32_t state[16]; + compress_pre(state, cv, block, block_len, counter, flags); + + store32(&out[0 * 4], state[0] ^ state[8]); + store32(&out[1 * 4], state[1] ^ state[9]); + store32(&out[2 * 4], state[2] ^ state[10]); + store32(&out[3 * 4], state[3] ^ state[11]); + store32(&out[4 * 4], state[4] ^ state[12]); + store32(&out[5 * 4], state[5] ^ state[13]); + store32(&out[6 * 4], state[6] ^ state[14]); + store32(&out[7 * 4], state[7] ^ state[15]); + store32(&out[8 * 4], state[8] ^ cv[0]); + store32(&out[9 * 4], state[9] ^ cv[1]); + store32(&out[10 * 4], state[10] ^ cv[2]); + store32(&out[11 * 4], state[11] ^ cv[3]); + store32(&out[12 * 4], state[12] ^ cv[4]); + store32(&out[13 * 4], state[13] ^ cv[5]); + store32(&out[14 * 4], state[14] ^ cv[6]); + store32(&out[15 * 4], state[15] ^ cv[7]); +} + +INLINE void hash_one_portable(const uint8_t *input, size_t blocks, + const uint32_t key[8], uint64_t counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) { + uint32_t cv[8]; + memcpy(cv, key, BLAKE3_KEY_LEN); + uint8_t block_flags = flags | flags_start; + while (blocks > 0) { + if (blocks == 1) { + block_flags |= flags_end; + } + blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter, + block_flags); + input = &input[BLAKE3_BLOCK_LEN]; + blocks -= 1; + block_flags = flags; + } + store_cv_words(out, cv); +} + +void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out) { + while (num_inputs > 0) { + hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start, + flags_end, out); + if (increment_counter) { + counter += 1; + } + inputs += 1; + num_inputs -= 1; + out = &out[BLAKE3_OUT_LEN]; + } +} diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2.c b/llvm/lib/Support/BLAKE3/blake3_sse2.c new file mode 100644 index 000000000000..f4449ac0b3cd --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_sse2.c @@ -0,0 +1,566 @@ +#include "blake3_impl.h" + +#include + +#define DEGREE 4 + +#define _mm_shuffle_ps2(a, b, c) \ + (_mm_castps_si128( \ + _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c)))) + +INLINE __m128i loadu(const uint8_t src[16]) { + return _mm_loadu_si128((const __m128i *)src); +} + +INLINE void storeu(__m128i src, uint8_t dest[16]) { + _mm_storeu_si128((__m128i *)dest, src); +} + +INLINE __m128i addv(__m128i a, __m128i b) { return _mm_add_epi32(a, b); } + +// Note that clang-format doesn't like the name "xor" for some reason. +INLINE __m128i xorv(__m128i a, __m128i b) { return _mm_xor_si128(a, b); } + +INLINE __m128i set1(uint32_t x) { return _mm_set1_epi32((int32_t)x); } + +INLINE __m128i set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d); +} + +INLINE __m128i rot16(__m128i x) { + return _mm_shufflehi_epi16(_mm_shufflelo_epi16(x, 0xB1), 0xB1); +} + +INLINE __m128i rot12(__m128i x) { + return xorv(_mm_srli_epi32(x, 12), _mm_slli_epi32(x, 32 - 12)); +} + +INLINE __m128i rot8(__m128i x) { + return xorv(_mm_srli_epi32(x, 8), _mm_slli_epi32(x, 32 - 8)); +} + +INLINE __m128i rot7(__m128i x) { + return xorv(_mm_srli_epi32(x, 7), _mm_slli_epi32(x, 32 - 7)); +} + +INLINE void g1(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, + __m128i m) { + *row0 = addv(addv(*row0, m), *row1); + *row3 = xorv(*row3, *row0); + *row3 = rot16(*row3); + *row2 = addv(*row2, *row3); + *row1 = xorv(*row1, *row2); + *row1 = rot12(*row1); +} + +INLINE void g2(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, + __m128i m) { + *row0 = addv(addv(*row0, m), *row1); + *row3 = xorv(*row3, *row0); + *row3 = rot8(*row3); + *row2 = addv(*row2, *row3); + *row1 = xorv(*row1, *row2); + *row1 = rot7(*row1); +} + +// Note the optimization here of leaving row1 as the unrotated row, rather than +// row0. All the message loads below are adjusted to compensate for this. See +// discussion at https://github.com/sneves/blake2-avx2/pull/4 +INLINE void diagonalize(__m128i *row0, __m128i *row2, __m128i *row3) { + *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3)); + *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2)); + *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1)); +} + +INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) { + *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1)); + *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2)); + *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3)); +} + +INLINE __m128i blend_epi16(__m128i a, __m128i b, const int16_t imm8) { + const __m128i bits = _mm_set_epi16(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01); + __m128i mask = _mm_set1_epi16(imm8); + mask = _mm_and_si128(mask, bits); + mask = _mm_cmpeq_epi16(mask, bits); + return _mm_or_si128(_mm_and_si128(mask, b), _mm_andnot_si128(mask, a)); +} + +INLINE void compress_pre(__m128i rows[4], const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, uint8_t flags) { + rows[0] = loadu((uint8_t *)&cv[0]); + rows[1] = loadu((uint8_t *)&cv[4]); + rows[2] = set4(IV[0], IV[1], IV[2], IV[3]); + rows[3] = set4(counter_low(counter), counter_high(counter), + (uint32_t)block_len, (uint32_t)flags); + + __m128i m0 = loadu(&block[sizeof(__m128i) * 0]); + __m128i m1 = loadu(&block[sizeof(__m128i) * 1]); + __m128i m2 = loadu(&block[sizeof(__m128i) * 2]); + __m128i m3 = loadu(&block[sizeof(__m128i) * 3]); + + __m128i t0, t1, t2, t3, tt; + + // Round 1. The first round permutes the message words from the original + // input order, into the groups that get mixed in parallel. + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); // 6 4 2 0 + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); // 7 5 3 1 + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10 8 + t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2, 1, 0, 3)); // 12 10 8 14 + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11 9 + t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(2, 1, 0, 3)); // 13 11 9 15 + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 2. This round and all following rounds apply a fixed permutation + // to the message words from the round before. + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 3 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 4 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 5 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 6 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 7 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); +} + +void blake3_compress_in_place_sse2(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags) { + __m128i rows[4]; + compress_pre(rows, cv, block, block_len, counter, flags); + storeu(xorv(rows[0], rows[2]), (uint8_t *)&cv[0]); + storeu(xorv(rows[1], rows[3]), (uint8_t *)&cv[4]); +} + +void blake3_compress_xof_sse2(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags, uint8_t out[64]) { + __m128i rows[4]; + compress_pre(rows, cv, block, block_len, counter, flags); + storeu(xorv(rows[0], rows[2]), &out[0]); + storeu(xorv(rows[1], rows[3]), &out[16]); + storeu(xorv(rows[2], loadu((uint8_t *)&cv[0])), &out[32]); + storeu(xorv(rows[3], loadu((uint8_t *)&cv[4])), &out[48]); +} + +INLINE void round_fn(__m128i v[16], __m128i m[16], size_t r) { + v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]); + v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]); + v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]); + v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]); + v[0] = addv(v[0], v[4]); + v[1] = addv(v[1], v[5]); + v[2] = addv(v[2], v[6]); + v[3] = addv(v[3], v[7]); + v[12] = xorv(v[12], v[0]); + v[13] = xorv(v[13], v[1]); + v[14] = xorv(v[14], v[2]); + v[15] = xorv(v[15], v[3]); + v[12] = rot16(v[12]); + v[13] = rot16(v[13]); + v[14] = rot16(v[14]); + v[15] = rot16(v[15]); + v[8] = addv(v[8], v[12]); + v[9] = addv(v[9], v[13]); + v[10] = addv(v[10], v[14]); + v[11] = addv(v[11], v[15]); + v[4] = xorv(v[4], v[8]); + v[5] = xorv(v[5], v[9]); + v[6] = xorv(v[6], v[10]); + v[7] = xorv(v[7], v[11]); + v[4] = rot12(v[4]); + v[5] = rot12(v[5]); + v[6] = rot12(v[6]); + v[7] = rot12(v[7]); + v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]); + v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]); + v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]); + v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]); + v[0] = addv(v[0], v[4]); + v[1] = addv(v[1], v[5]); + v[2] = addv(v[2], v[6]); + v[3] = addv(v[3], v[7]); + v[12] = xorv(v[12], v[0]); + v[13] = xorv(v[13], v[1]); + v[14] = xorv(v[14], v[2]); + v[15] = xorv(v[15], v[3]); + v[12] = rot8(v[12]); + v[13] = rot8(v[13]); + v[14] = rot8(v[14]); + v[15] = rot8(v[15]); + v[8] = addv(v[8], v[12]); + v[9] = addv(v[9], v[13]); + v[10] = addv(v[10], v[14]); + v[11] = addv(v[11], v[15]); + v[4] = xorv(v[4], v[8]); + v[5] = xorv(v[5], v[9]); + v[6] = xorv(v[6], v[10]); + v[7] = xorv(v[7], v[11]); + v[4] = rot7(v[4]); + v[5] = rot7(v[5]); + v[6] = rot7(v[6]); + v[7] = rot7(v[7]); + + v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]); + v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]); + v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]); + v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]); + v[0] = addv(v[0], v[5]); + v[1] = addv(v[1], v[6]); + v[2] = addv(v[2], v[7]); + v[3] = addv(v[3], v[4]); + v[15] = xorv(v[15], v[0]); + v[12] = xorv(v[12], v[1]); + v[13] = xorv(v[13], v[2]); + v[14] = xorv(v[14], v[3]); + v[15] = rot16(v[15]); + v[12] = rot16(v[12]); + v[13] = rot16(v[13]); + v[14] = rot16(v[14]); + v[10] = addv(v[10], v[15]); + v[11] = addv(v[11], v[12]); + v[8] = addv(v[8], v[13]); + v[9] = addv(v[9], v[14]); + v[5] = xorv(v[5], v[10]); + v[6] = xorv(v[6], v[11]); + v[7] = xorv(v[7], v[8]); + v[4] = xorv(v[4], v[9]); + v[5] = rot12(v[5]); + v[6] = rot12(v[6]); + v[7] = rot12(v[7]); + v[4] = rot12(v[4]); + v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]); + v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]); + v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]); + v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]); + v[0] = addv(v[0], v[5]); + v[1] = addv(v[1], v[6]); + v[2] = addv(v[2], v[7]); + v[3] = addv(v[3], v[4]); + v[15] = xorv(v[15], v[0]); + v[12] = xorv(v[12], v[1]); + v[13] = xorv(v[13], v[2]); + v[14] = xorv(v[14], v[3]); + v[15] = rot8(v[15]); + v[12] = rot8(v[12]); + v[13] = rot8(v[13]); + v[14] = rot8(v[14]); + v[10] = addv(v[10], v[15]); + v[11] = addv(v[11], v[12]); + v[8] = addv(v[8], v[13]); + v[9] = addv(v[9], v[14]); + v[5] = xorv(v[5], v[10]); + v[6] = xorv(v[6], v[11]); + v[7] = xorv(v[7], v[8]); + v[4] = xorv(v[4], v[9]); + v[5] = rot7(v[5]); + v[6] = rot7(v[6]); + v[7] = rot7(v[7]); + v[4] = rot7(v[4]); +} + +INLINE void transpose_vecs(__m128i vecs[DEGREE]) { + // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is + // 22/33. Note that this doesn't split the vector into two lanes, as the + // AVX2 counterparts do. + __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]); + __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]); + __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]); + __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]); + + // Interleave 64-bit lanes. + __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01); + __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01); + __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23); + __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23); + + vecs[0] = abcd_0; + vecs[1] = abcd_1; + vecs[2] = abcd_2; + vecs[3] = abcd_3; +} + +INLINE void transpose_msg_vecs(const uint8_t *const *inputs, + size_t block_offset, __m128i out[16]) { + out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m128i)]); + out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m128i)]); + out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m128i)]); + out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m128i)]); + out[4] = loadu(&inputs[0][block_offset + 1 * sizeof(__m128i)]); + out[5] = loadu(&inputs[1][block_offset + 1 * sizeof(__m128i)]); + out[6] = loadu(&inputs[2][block_offset + 1 * sizeof(__m128i)]); + out[7] = loadu(&inputs[3][block_offset + 1 * sizeof(__m128i)]); + out[8] = loadu(&inputs[0][block_offset + 2 * sizeof(__m128i)]); + out[9] = loadu(&inputs[1][block_offset + 2 * sizeof(__m128i)]); + out[10] = loadu(&inputs[2][block_offset + 2 * sizeof(__m128i)]); + out[11] = loadu(&inputs[3][block_offset + 2 * sizeof(__m128i)]); + out[12] = loadu(&inputs[0][block_offset + 3 * sizeof(__m128i)]); + out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]); + out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]); + out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]); + for (size_t i = 0; i < 4; ++i) { + _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0); + } + transpose_vecs(&out[0]); + transpose_vecs(&out[4]); + transpose_vecs(&out[8]); + transpose_vecs(&out[12]); +} + +INLINE void load_counters(uint64_t counter, bool increment_counter, + __m128i *out_lo, __m128i *out_hi) { + const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter); + const __m128i add0 = _mm_set_epi32(3, 2, 1, 0); + const __m128i add1 = _mm_and_si128(mask, add0); + __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1); + __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)), + _mm_xor_si128( l, _mm_set1_epi32(0x80000000))); + __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry); + *out_lo = l; + *out_hi = h; +} + +static +void blake3_hash4_sse2(const uint8_t *const *inputs, size_t blocks, + const uint32_t key[8], uint64_t counter, + bool increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + __m128i h_vecs[8] = { + set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]), + set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]), + }; + __m128i counter_low_vec, counter_high_vec; + load_counters(counter, increment_counter, &counter_low_vec, + &counter_high_vec); + uint8_t block_flags = flags | flags_start; + + for (size_t block = 0; block < blocks; block++) { + if (block + 1 == blocks) { + block_flags |= flags_end; + } + __m128i block_len_vec = set1(BLAKE3_BLOCK_LEN); + __m128i block_flags_vec = set1(block_flags); + __m128i msg_vecs[16]; + transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs); + + __m128i v[16] = { + h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3], + h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7], + set1(IV[0]), set1(IV[1]), set1(IV[2]), set1(IV[3]), + counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec, + }; + round_fn(v, msg_vecs, 0); + round_fn(v, msg_vecs, 1); + round_fn(v, msg_vecs, 2); + round_fn(v, msg_vecs, 3); + round_fn(v, msg_vecs, 4); + round_fn(v, msg_vecs, 5); + round_fn(v, msg_vecs, 6); + h_vecs[0] = xorv(v[0], v[8]); + h_vecs[1] = xorv(v[1], v[9]); + h_vecs[2] = xorv(v[2], v[10]); + h_vecs[3] = xorv(v[3], v[11]); + h_vecs[4] = xorv(v[4], v[12]); + h_vecs[5] = xorv(v[5], v[13]); + h_vecs[6] = xorv(v[6], v[14]); + h_vecs[7] = xorv(v[7], v[15]); + + block_flags = flags; + } + + transpose_vecs(&h_vecs[0]); + transpose_vecs(&h_vecs[4]); + // The first four vecs now contain the first half of each output, and the + // second four vecs contain the second half of each output. + storeu(h_vecs[0], &out[0 * sizeof(__m128i)]); + storeu(h_vecs[4], &out[1 * sizeof(__m128i)]); + storeu(h_vecs[1], &out[2 * sizeof(__m128i)]); + storeu(h_vecs[5], &out[3 * sizeof(__m128i)]); + storeu(h_vecs[2], &out[4 * sizeof(__m128i)]); + storeu(h_vecs[6], &out[5 * sizeof(__m128i)]); + storeu(h_vecs[3], &out[6 * sizeof(__m128i)]); + storeu(h_vecs[7], &out[7 * sizeof(__m128i)]); +} + +INLINE void hash_one_sse2(const uint8_t *input, size_t blocks, + const uint32_t key[8], uint64_t counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) { + uint32_t cv[8]; + memcpy(cv, key, BLAKE3_KEY_LEN); + uint8_t block_flags = flags | flags_start; + while (blocks > 0) { + if (blocks == 1) { + block_flags |= flags_end; + } + blake3_compress_in_place_sse2(cv, input, BLAKE3_BLOCK_LEN, counter, + block_flags); + input = &input[BLAKE3_BLOCK_LEN]; + blocks -= 1; + block_flags = flags; + } + memcpy(out, cv, BLAKE3_OUT_LEN); +} + +void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out) { + while (num_inputs >= DEGREE) { + blake3_hash4_sse2(inputs, blocks, key, counter, increment_counter, flags, + flags_start, flags_end, out); + if (increment_counter) { + counter += DEGREE; + } + inputs += DEGREE; + num_inputs -= DEGREE; + out = &out[DEGREE * BLAKE3_OUT_LEN]; + } + while (num_inputs > 0) { + hash_one_sse2(inputs[0], blocks, key, counter, flags, flags_start, + flags_end, out); + if (increment_counter) { + counter += 1; + } + inputs += 1; + num_inputs -= 1; + out = &out[BLAKE3_OUT_LEN]; + } +} diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S new file mode 100644 index 000000000000..0106b13ba851 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S @@ -0,0 +1,2307 @@ +#if defined(__x86_64__) + +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +#endif + +#if defined(__ELF__) && defined(__CET__) && defined(__has_include) +#if __has_include() +#include +#endif +#endif + +#if !defined(_CET_ENDBR) +#define _CET_ENDBR +#endif + +#ifdef __APPLE__ +#define HIDDEN .private_extern +#else +#define HIDDEN .hidden +#endif + +.intel_syntax noprefix +HIDDEN blake3_hash_many_sse2 +HIDDEN _blake3_hash_many_sse2 +HIDDEN blake3_compress_in_place_sse2 +HIDDEN _blake3_compress_in_place_sse2 +HIDDEN blake3_compress_xof_sse2 +HIDDEN _blake3_compress_xof_sse2 +.global blake3_hash_many_sse2 +.global _blake3_hash_many_sse2 +.global blake3_compress_in_place_sse2 +.global _blake3_compress_in_place_sse2 +.global blake3_compress_xof_sse2 +.global _blake3_compress_xof_sse2 +#ifdef __APPLE__ +.text +#else +.section .text +#endif + .p2align 6 +_blake3_hash_many_sse2: +blake3_hash_many_sse2: + _CET_ENDBR + push r15 + push r14 + push r13 + push r12 + push rbx + push rbp + mov rbp, rsp + sub rsp, 360 + and rsp, 0xFFFFFFFFFFFFFFC0 + neg r9d + movd xmm0, r9d + pshufd xmm0, xmm0, 0x00 + movdqa xmmword ptr [rsp+0x130], xmm0 + movdqa xmm1, xmm0 + pand xmm1, xmmword ptr [ADD0+rip] + pand xmm0, xmmword ptr [ADD1+rip] + movdqa xmmword ptr [rsp+0x150], xmm0 + movd xmm0, r8d + pshufd xmm0, xmm0, 0x00 + paddd xmm0, xmm1 + movdqa xmmword ptr [rsp+0x110], xmm0 + pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] + pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] + pcmpgtd xmm1, xmm0 + shr r8, 32 + movd xmm2, r8d + pshufd xmm2, xmm2, 0x00 + psubd xmm2, xmm1 + movdqa xmmword ptr [rsp+0x120], xmm2 + mov rbx, qword ptr [rbp+0x50] + mov r15, rdx + shl r15, 6 + movzx r13d, byte ptr [rbp+0x38] + movzx r12d, byte ptr [rbp+0x48] + cmp rsi, 4 + jc 3f +2: + movdqu xmm3, xmmword ptr [rcx] + pshufd xmm0, xmm3, 0x00 + pshufd xmm1, xmm3, 0x55 + pshufd xmm2, xmm3, 0xAA + pshufd xmm3, xmm3, 0xFF + movdqu xmm7, xmmword ptr [rcx+0x10] + pshufd xmm4, xmm7, 0x00 + pshufd xmm5, xmm7, 0x55 + pshufd xmm6, xmm7, 0xAA + pshufd xmm7, xmm7, 0xFF + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +9: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movdqu xmm8, xmmword ptr [r8+rdx-0x40] + movdqu xmm9, xmmword ptr [r9+rdx-0x40] + movdqu xmm10, xmmword ptr [r10+rdx-0x40] + movdqu xmm11, xmmword ptr [r11+rdx-0x40] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp], xmm8 + movdqa xmmword ptr [rsp+0x10], xmm9 + movdqa xmmword ptr [rsp+0x20], xmm12 + movdqa xmmword ptr [rsp+0x30], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x30] + movdqu xmm9, xmmword ptr [r9+rdx-0x30] + movdqu xmm10, xmmword ptr [r10+rdx-0x30] + movdqu xmm11, xmmword ptr [r11+rdx-0x30] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0x40], xmm8 + movdqa xmmword ptr [rsp+0x50], xmm9 + movdqa xmmword ptr [rsp+0x60], xmm12 + movdqa xmmword ptr [rsp+0x70], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x20] + movdqu xmm9, xmmword ptr [r9+rdx-0x20] + movdqu xmm10, xmmword ptr [r10+rdx-0x20] + movdqu xmm11, xmmword ptr [r11+rdx-0x20] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0x80], xmm8 + movdqa xmmword ptr [rsp+0x90], xmm9 + movdqa xmmword ptr [rsp+0xA0], xmm12 + movdqa xmmword ptr [rsp+0xB0], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x10] + movdqu xmm9, xmmword ptr [r9+rdx-0x10] + movdqu xmm10, xmmword ptr [r10+rdx-0x10] + movdqu xmm11, xmmword ptr [r11+rdx-0x10] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0xC0], xmm8 + movdqa xmmword ptr [rsp+0xD0], xmm9 + movdqa xmmword ptr [rsp+0xE0], xmm12 + movdqa xmmword ptr [rsp+0xF0], xmm13 + movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] + movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] + movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] + movdqa xmm12, xmmword ptr [rsp+0x110] + movdqa xmm13, xmmword ptr [rsp+0x120] + movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] + movd xmm15, eax + pshufd xmm15, xmm15, 0x00 + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x40] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x10] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x50] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x80] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0xC0] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x90] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0xD0] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x20] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x70] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x60] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x10] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x90] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xB0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0xE0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x30] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0xD0] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x40] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x20] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x60] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0xB0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x50] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0xF0] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xA0] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0xE0] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x70] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0x30] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x40] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0x50] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x80] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xC0] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0xF0] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xD0] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0xA0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x70] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x20] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x10] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x90] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0x80] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xE0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0xC0] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xD0] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0x20] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x30] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0x60] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xB0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0x10] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xF0] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0x90] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xE0] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x30] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xA0] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x40] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + pxor xmm0, xmm8 + pxor xmm1, xmm9 + pxor xmm2, xmm10 + pxor xmm3, xmm11 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + pxor xmm4, xmm12 + pxor xmm5, xmm13 + pxor xmm6, xmm14 + pxor xmm7, xmm15 + mov eax, r13d + jne 9b + movdqa xmm9, xmm0 + punpckldq xmm0, xmm1 + punpckhdq xmm9, xmm1 + movdqa xmm11, xmm2 + punpckldq xmm2, xmm3 + punpckhdq xmm11, xmm3 + movdqa xmm1, xmm0 + punpcklqdq xmm0, xmm2 + punpckhqdq xmm1, xmm2 + movdqa xmm3, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm3, xmm11 + movdqu xmmword ptr [rbx], xmm0 + movdqu xmmword ptr [rbx+0x20], xmm1 + movdqu xmmword ptr [rbx+0x40], xmm9 + movdqu xmmword ptr [rbx+0x60], xmm3 + movdqa xmm9, xmm4 + punpckldq xmm4, xmm5 + punpckhdq xmm9, xmm5 + movdqa xmm11, xmm6 + punpckldq xmm6, xmm7 + punpckhdq xmm11, xmm7 + movdqa xmm5, xmm4 + punpcklqdq xmm4, xmm6 + punpckhqdq xmm5, xmm6 + movdqa xmm7, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm7, xmm11 + movdqu xmmword ptr [rbx+0x10], xmm4 + movdqu xmmword ptr [rbx+0x30], xmm5 + movdqu xmmword ptr [rbx+0x50], xmm9 + movdqu xmmword ptr [rbx+0x70], xmm7 + movdqa xmm1, xmmword ptr [rsp+0x110] + movdqa xmm0, xmm1 + paddd xmm1, xmmword ptr [rsp+0x150] + movdqa xmmword ptr [rsp+0x110], xmm1 + pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] + pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] + pcmpgtd xmm0, xmm1 + movdqa xmm1, xmmword ptr [rsp+0x120] + psubd xmm1, xmm0 + movdqa xmmword ptr [rsp+0x120], xmm1 + add rbx, 128 + add rdi, 32 + sub rsi, 4 + cmp rsi, 4 + jnc 2b + test rsi, rsi + jnz 3f +4: + mov rsp, rbp + pop rbp + pop rbx + pop r12 + pop r13 + pop r14 + pop r15 + ret +.p2align 5 +3: + test esi, 0x2 + je 3f + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movaps xmm8, xmm0 + movaps xmm9, xmm1 + movd xmm13, dword ptr [rsp+0x110] + movd xmm14, dword ptr [rsp+0x120] + punpckldq xmm13, xmm14 + movaps xmmword ptr [rsp], xmm13 + movd xmm14, dword ptr [rsp+0x114] + movd xmm13, dword ptr [rsp+0x124] + punpckldq xmm14, xmm13 + movaps xmmword ptr [rsp+0x10], xmm14 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movaps xmm10, xmm2 + movups xmm4, xmmword ptr [r8+rdx-0x40] + movups xmm5, xmmword ptr [r8+rdx-0x30] + movaps xmm3, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm3, xmm5, 221 + movaps xmm5, xmm3 + movups xmm6, xmmword ptr [r8+rdx-0x20] + movups xmm7, xmmword ptr [r8+rdx-0x10] + movaps xmm3, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm3, xmm7, 221 + pshufd xmm7, xmm3, 0x93 + movups xmm12, xmmword ptr [r9+rdx-0x40] + movups xmm13, xmmword ptr [r9+rdx-0x30] + movaps xmm11, xmm12 + shufps xmm12, xmm13, 136 + shufps xmm11, xmm13, 221 + movaps xmm13, xmm11 + movups xmm14, xmmword ptr [r9+rdx-0x20] + movups xmm15, xmmword ptr [r9+rdx-0x10] + movaps xmm11, xmm14 + shufps xmm14, xmm15, 136 + pshufd xmm14, xmm14, 0x93 + shufps xmm11, xmm15, 221 + pshufd xmm15, xmm11, 0x93 + shl rax, 0x20 + or rax, 0x40 + movq xmm3, rax + movdqa xmmword ptr [rsp+0x20], xmm3 + movaps xmm3, xmmword ptr [rsp] + movaps xmm11, xmmword ptr [rsp+0x10] + punpcklqdq xmm3, xmmword ptr [rsp+0x20] + punpcklqdq xmm11, xmmword ptr [rsp+0x20] + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm8, xmm12 + movaps xmmword ptr [rsp+0x20], xmm4 + movaps xmmword ptr [rsp+0x30], xmm12 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + pshuflw xmm11, xmm11, 0xB1 + pshufhw xmm11, xmm11, 0xB1 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm5 + paddd xmm8, xmm13 + movaps xmmword ptr [rsp+0x40], xmm5 + movaps xmmword ptr [rsp+0x50], xmm13 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movdqa xmm13, xmm3 + psrld xmm3, 8 + pslld xmm13, 24 + pxor xmm3, xmm13 + movdqa xmm13, xmm11 + psrld xmm11, 8 + pslld xmm13, 24 + pxor xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 0x93 + pshufd xmm8, xmm8, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm11, xmm11, 0x4E + pshufd xmm2, xmm2, 0x39 + pshufd xmm10, xmm10, 0x39 + paddd xmm0, xmm6 + paddd xmm8, xmm14 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + pshuflw xmm11, xmm11, 0xB1 + pshufhw xmm11, xmm11, 0xB1 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm7 + paddd xmm8, xmm15 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movdqa xmm13, xmm3 + psrld xmm3, 8 + pslld xmm13, 24 + pxor xmm3, xmm13 + movdqa xmm13, xmm11 + psrld xmm11, 8 + pslld xmm13, 24 + pxor xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 0x39 + pshufd xmm8, xmm8, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm11, xmm11, 0x4E + pshufd xmm2, xmm2, 0x93 + pshufd xmm10, xmm10, 0x93 + dec al + je 9f + movdqa xmm12, xmmword ptr [rsp+0x20] + movdqa xmm5, xmmword ptr [rsp+0x40] + pshufd xmm13, xmm12, 0x0F + shufps xmm12, xmm5, 214 + pshufd xmm4, xmm12, 0x39 + movdqa xmm12, xmm6 + shufps xmm12, xmm7, 250 + pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm13, xmm12 + movdqa xmmword ptr [rsp+0x20], xmm13 + movdqa xmm12, xmm7 + punpcklqdq xmm12, xmm5 + movdqa xmm13, xmm6 + pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm12, xmm13 + pshufd xmm12, xmm12, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmmword ptr [rsp+0x40], xmm12 + movdqa xmm5, xmmword ptr [rsp+0x30] + movdqa xmm13, xmmword ptr [rsp+0x50] + pshufd xmm6, xmm5, 0x0F + shufps xmm5, xmm13, 214 + pshufd xmm12, xmm5, 0x39 + movdqa xmm5, xmm14 + shufps xmm5, xmm15, 250 + pand xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm6, xmm5 + movdqa xmm5, xmm15 + punpcklqdq xmm5, xmm13 + movdqa xmmword ptr [rsp+0x30], xmm2 + movdqa xmm2, xmm14 + pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm5, xmm2 + movdqa xmm2, xmmword ptr [rsp+0x30] + pshufd xmm5, xmm5, 0x78 + punpckhdq xmm13, xmm15 + punpckldq xmm14, xmm13 + pshufd xmm15, xmm14, 0x1E + movdqa xmm13, xmm6 + movdqa xmm14, xmm5 + movdqa xmm5, xmmword ptr [rsp+0x20] + movdqa xmm6, xmmword ptr [rsp+0x40] + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm8, xmm10 + pxor xmm9, xmm11 + mov eax, r13d + cmp rdx, r15 + jne 2b + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+0x10], xmm1 + movups xmmword ptr [rbx+0x20], xmm8 + movups xmmword ptr [rbx+0x30], xmm9 + mov eax, dword ptr [rsp+0x130] + neg eax + mov r10d, dword ptr [rsp+0x110+8*rax] + mov r11d, dword ptr [rsp+0x120+8*rax] + mov dword ptr [rsp+0x110], r10d + mov dword ptr [rsp+0x120], r11d + add rdi, 16 + add rbx, 64 + sub rsi, 2 +3: + test esi, 0x1 + je 4b + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movd xmm13, dword ptr [rsp+0x110] + movd xmm14, dword ptr [rsp+0x120] + punpckldq xmm13, xmm14 + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + shl rax, 32 + or rax, 64 + movq xmm12, rax + movdqa xmm3, xmm13 + punpcklqdq xmm3, xmm12 + movups xmm4, xmmword ptr [r8+rdx-0x40] + movups xmm5, xmmword ptr [r8+rdx-0x30] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [r8+rdx-0x20] + movups xmm7, xmmword ptr [r8+rdx-0x10] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm9, xmm8 + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + movdqa xmm10, xmm6 + pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm8, xmm10 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+0x10], xmm1 + jmp 4b + +.p2align 6 +blake3_compress_in_place_sse2: +_blake3_compress_in_place_sse2: + _CET_ENDBR + movups xmm0, xmmword ptr [rdi] + movups xmm1, xmmword ptr [rdi+0x10] + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + shl r8, 32 + add rdx, r8 + movq xmm3, rcx + movq xmm4, rdx + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rsi] + movups xmm5, xmmword ptr [rsi+0x10] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rsi+0x20] + movups xmm7, xmmword ptr [rsi+0x30] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm9, xmm8 + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + movdqa xmm10, xmm6 + pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm8, xmm10 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + movups xmmword ptr [rdi], xmm0 + movups xmmword ptr [rdi+0x10], xmm1 + ret + +.p2align 6 +blake3_compress_xof_sse2: +_blake3_compress_xof_sse2: + _CET_ENDBR + movups xmm0, xmmword ptr [rdi] + movups xmm1, xmmword ptr [rdi+0x10] + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movzx eax, r8b + movzx edx, dl + shl rax, 32 + add rdx, rax + movq xmm3, rcx + movq xmm4, rdx + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rsi] + movups xmm5, xmmword ptr [rsi+0x10] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rsi+0x20] + movups xmm7, xmmword ptr [rsi+0x30] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm9, xmm8 + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + movdqa xmm10, xmm6 + pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm8, xmm10 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + movdqu xmm4, xmmword ptr [rdi] + movdqu xmm5, xmmword ptr [rdi+0x10] + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm2, xmm4 + pxor xmm3, xmm5 + movups xmmword ptr [r9], xmm0 + movups xmmword ptr [r9+0x10], xmm1 + movups xmmword ptr [r9+0x20], xmm2 + movups xmmword ptr [r9+0x30], xmm3 + ret + + +#ifdef __APPLE__ +.static_data +#else +.section .rodata +#endif +.p2align 6 +BLAKE3_IV: + .long 0x6A09E667, 0xBB67AE85 + .long 0x3C6EF372, 0xA54FF53A +ADD0: + .long 0, 1, 2, 3 +ADD1: + .long 4, 4, 4, 4 +BLAKE3_IV_0: + .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 +BLAKE3_IV_1: + .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 +BLAKE3_IV_2: + .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 +BLAKE3_IV_3: + .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A +BLAKE3_BLOCK_LEN: + .long 64, 64, 64, 64 +CMP_MSB_MASK: + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 +PBLENDW_0x33_MASK: + .long 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 +PBLENDW_0xCC_MASK: + .long 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF +PBLENDW_0x3F_MASK: + .long 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 +PBLENDW_0xC0_MASK: + .long 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF + +#endif diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S new file mode 100644 index 000000000000..8852ba5976e1 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S @@ -0,0 +1,2332 @@ +.intel_syntax noprefix +.global blake3_hash_many_sse2 +.global _blake3_hash_many_sse2 +.global blake3_compress_in_place_sse2 +.global _blake3_compress_in_place_sse2 +.global blake3_compress_xof_sse2 +.global _blake3_compress_xof_sse2 +.section .text + .p2align 6 +_blake3_hash_many_sse2: +blake3_hash_many_sse2: + push r15 + push r14 + push r13 + push r12 + push rsi + push rdi + push rbx + push rbp + mov rbp, rsp + sub rsp, 528 + and rsp, 0xFFFFFFFFFFFFFFC0 + movdqa xmmword ptr [rsp+0x170], xmm6 + movdqa xmmword ptr [rsp+0x180], xmm7 + movdqa xmmword ptr [rsp+0x190], xmm8 + movdqa xmmword ptr [rsp+0x1A0], xmm9 + movdqa xmmword ptr [rsp+0x1B0], xmm10 + movdqa xmmword ptr [rsp+0x1C0], xmm11 + movdqa xmmword ptr [rsp+0x1D0], xmm12 + movdqa xmmword ptr [rsp+0x1E0], xmm13 + movdqa xmmword ptr [rsp+0x1F0], xmm14 + movdqa xmmword ptr [rsp+0x200], xmm15 + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, qword ptr [rbp+0x68] + movzx r9, byte ptr [rbp+0x70] + neg r9d + movd xmm0, r9d + pshufd xmm0, xmm0, 0x00 + movdqa xmmword ptr [rsp+0x130], xmm0 + movdqa xmm1, xmm0 + pand xmm1, xmmword ptr [ADD0+rip] + pand xmm0, xmmword ptr [ADD1+rip] + movdqa xmmword ptr [rsp+0x150], xmm0 + movd xmm0, r8d + pshufd xmm0, xmm0, 0x00 + paddd xmm0, xmm1 + movdqa xmmword ptr [rsp+0x110], xmm0 + pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] + pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] + pcmpgtd xmm1, xmm0 + shr r8, 32 + movd xmm2, r8d + pshufd xmm2, xmm2, 0x00 + psubd xmm2, xmm1 + movdqa xmmword ptr [rsp+0x120], xmm2 + mov rbx, qword ptr [rbp+0x90] + mov r15, rdx + shl r15, 6 + movzx r13d, byte ptr [rbp+0x78] + movzx r12d, byte ptr [rbp+0x88] + cmp rsi, 4 + jc 3f +2: + movdqu xmm3, xmmword ptr [rcx] + pshufd xmm0, xmm3, 0x00 + pshufd xmm1, xmm3, 0x55 + pshufd xmm2, xmm3, 0xAA + pshufd xmm3, xmm3, 0xFF + movdqu xmm7, xmmword ptr [rcx+0x10] + pshufd xmm4, xmm7, 0x00 + pshufd xmm5, xmm7, 0x55 + pshufd xmm6, xmm7, 0xAA + pshufd xmm7, xmm7, 0xFF + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + movzx eax, byte ptr [rbp+0x80] + or eax, r13d + xor edx, edx +9: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movdqu xmm8, xmmword ptr [r8+rdx-0x40] + movdqu xmm9, xmmword ptr [r9+rdx-0x40] + movdqu xmm10, xmmword ptr [r10+rdx-0x40] + movdqu xmm11, xmmword ptr [r11+rdx-0x40] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp], xmm8 + movdqa xmmword ptr [rsp+0x10], xmm9 + movdqa xmmword ptr [rsp+0x20], xmm12 + movdqa xmmword ptr [rsp+0x30], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x30] + movdqu xmm9, xmmword ptr [r9+rdx-0x30] + movdqu xmm10, xmmword ptr [r10+rdx-0x30] + movdqu xmm11, xmmword ptr [r11+rdx-0x30] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0x40], xmm8 + movdqa xmmword ptr [rsp+0x50], xmm9 + movdqa xmmword ptr [rsp+0x60], xmm12 + movdqa xmmword ptr [rsp+0x70], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x20] + movdqu xmm9, xmmword ptr [r9+rdx-0x20] + movdqu xmm10, xmmword ptr [r10+rdx-0x20] + movdqu xmm11, xmmword ptr [r11+rdx-0x20] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0x80], xmm8 + movdqa xmmword ptr [rsp+0x90], xmm9 + movdqa xmmword ptr [rsp+0xA0], xmm12 + movdqa xmmword ptr [rsp+0xB0], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x10] + movdqu xmm9, xmmword ptr [r9+rdx-0x10] + movdqu xmm10, xmmword ptr [r10+rdx-0x10] + movdqu xmm11, xmmword ptr [r11+rdx-0x10] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0xC0], xmm8 + movdqa xmmword ptr [rsp+0xD0], xmm9 + movdqa xmmword ptr [rsp+0xE0], xmm12 + movdqa xmmword ptr [rsp+0xF0], xmm13 + movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] + movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] + movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] + movdqa xmm12, xmmword ptr [rsp+0x110] + movdqa xmm13, xmmword ptr [rsp+0x120] + movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] + movd xmm15, eax + pshufd xmm15, xmm15, 0x00 + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x40] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x10] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x50] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x80] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0xC0] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x90] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0xD0] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x20] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x70] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x60] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x10] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x90] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xB0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0xE0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x30] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0xD0] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x40] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x20] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x60] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0xB0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x50] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0xF0] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xA0] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0xE0] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x70] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0x30] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x40] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0x50] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x80] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xC0] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0xF0] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xD0] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0xA0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x70] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x20] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x10] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x90] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0x80] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xE0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0xC0] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xD0] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0x20] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x30] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0x60] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xB0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0x10] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xF0] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0x90] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xE0] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x30] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xA0] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x40] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + pxor xmm0, xmm8 + pxor xmm1, xmm9 + pxor xmm2, xmm10 + pxor xmm3, xmm11 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + pxor xmm4, xmm12 + pxor xmm5, xmm13 + pxor xmm6, xmm14 + pxor xmm7, xmm15 + mov eax, r13d + jne 9b + movdqa xmm9, xmm0 + punpckldq xmm0, xmm1 + punpckhdq xmm9, xmm1 + movdqa xmm11, xmm2 + punpckldq xmm2, xmm3 + punpckhdq xmm11, xmm3 + movdqa xmm1, xmm0 + punpcklqdq xmm0, xmm2 + punpckhqdq xmm1, xmm2 + movdqa xmm3, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm3, xmm11 + movdqu xmmword ptr [rbx], xmm0 + movdqu xmmword ptr [rbx+0x20], xmm1 + movdqu xmmword ptr [rbx+0x40], xmm9 + movdqu xmmword ptr [rbx+0x60], xmm3 + movdqa xmm9, xmm4 + punpckldq xmm4, xmm5 + punpckhdq xmm9, xmm5 + movdqa xmm11, xmm6 + punpckldq xmm6, xmm7 + punpckhdq xmm11, xmm7 + movdqa xmm5, xmm4 + punpcklqdq xmm4, xmm6 + punpckhqdq xmm5, xmm6 + movdqa xmm7, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm7, xmm11 + movdqu xmmword ptr [rbx+0x10], xmm4 + movdqu xmmword ptr [rbx+0x30], xmm5 + movdqu xmmword ptr [rbx+0x50], xmm9 + movdqu xmmword ptr [rbx+0x70], xmm7 + movdqa xmm1, xmmword ptr [rsp+0x110] + movdqa xmm0, xmm1 + paddd xmm1, xmmword ptr [rsp+0x150] + movdqa xmmword ptr [rsp+0x110], xmm1 + pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] + pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] + pcmpgtd xmm0, xmm1 + movdqa xmm1, xmmword ptr [rsp+0x120] + psubd xmm1, xmm0 + movdqa xmmword ptr [rsp+0x120], xmm1 + add rbx, 128 + add rdi, 32 + sub rsi, 4 + cmp rsi, 4 + jnc 2b + test rsi, rsi + jne 3f +4: + movdqa xmm6, xmmword ptr [rsp+0x170] + movdqa xmm7, xmmword ptr [rsp+0x180] + movdqa xmm8, xmmword ptr [rsp+0x190] + movdqa xmm9, xmmword ptr [rsp+0x1A0] + movdqa xmm10, xmmword ptr [rsp+0x1B0] + movdqa xmm11, xmmword ptr [rsp+0x1C0] + movdqa xmm12, xmmword ptr [rsp+0x1D0] + movdqa xmm13, xmmword ptr [rsp+0x1E0] + movdqa xmm14, xmmword ptr [rsp+0x1F0] + movdqa xmm15, xmmword ptr [rsp+0x200] + mov rsp, rbp + pop rbp + pop rbx + pop rdi + pop rsi + pop r12 + pop r13 + pop r14 + pop r15 + ret +.p2align 5 +3: + test esi, 0x2 + je 3f + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movaps xmm8, xmm0 + movaps xmm9, xmm1 + movd xmm13, dword ptr [rsp+0x110] + movd xmm14, dword ptr [rsp+0x120] + punpckldq xmm13, xmm14 + movaps xmmword ptr [rsp], xmm13 + movd xmm14, dword ptr [rsp+0x114] + movd xmm13, dword ptr [rsp+0x124] + punpckldq xmm14, xmm13 + movaps xmmword ptr [rsp+0x10], xmm14 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + movzx eax, byte ptr [rbp+0x80] + or eax, r13d + xor edx, edx +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movaps xmm10, xmm2 + movups xmm4, xmmword ptr [r8+rdx-0x40] + movups xmm5, xmmword ptr [r8+rdx-0x30] + movaps xmm3, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm3, xmm5, 221 + movaps xmm5, xmm3 + movups xmm6, xmmword ptr [r8+rdx-0x20] + movups xmm7, xmmword ptr [r8+rdx-0x10] + movaps xmm3, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm3, xmm7, 221 + pshufd xmm7, xmm3, 0x93 + movups xmm12, xmmword ptr [r9+rdx-0x40] + movups xmm13, xmmword ptr [r9+rdx-0x30] + movaps xmm11, xmm12 + shufps xmm12, xmm13, 136 + shufps xmm11, xmm13, 221 + movaps xmm13, xmm11 + movups xmm14, xmmword ptr [r9+rdx-0x20] + movups xmm15, xmmword ptr [r9+rdx-0x10] + movaps xmm11, xmm14 + shufps xmm14, xmm15, 136 + pshufd xmm14, xmm14, 0x93 + shufps xmm11, xmm15, 221 + pshufd xmm15, xmm11, 0x93 + shl rax, 0x20 + or rax, 0x40 + movq xmm3, rax + movdqa xmmword ptr [rsp+0x20], xmm3 + movaps xmm3, xmmword ptr [rsp] + movaps xmm11, xmmword ptr [rsp+0x10] + punpcklqdq xmm3, xmmword ptr [rsp+0x20] + punpcklqdq xmm11, xmmword ptr [rsp+0x20] + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm8, xmm12 + movaps xmmword ptr [rsp+0x20], xmm4 + movaps xmmword ptr [rsp+0x30], xmm12 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + pshuflw xmm11, xmm11, 0xB1 + pshufhw xmm11, xmm11, 0xB1 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm5 + paddd xmm8, xmm13 + movaps xmmword ptr [rsp+0x40], xmm5 + movaps xmmword ptr [rsp+0x50], xmm13 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movdqa xmm13, xmm3 + psrld xmm3, 8 + pslld xmm13, 24 + pxor xmm3, xmm13 + movdqa xmm13, xmm11 + psrld xmm11, 8 + pslld xmm13, 24 + pxor xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 0x93 + pshufd xmm8, xmm8, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm11, xmm11, 0x4E + pshufd xmm2, xmm2, 0x39 + pshufd xmm10, xmm10, 0x39 + paddd xmm0, xmm6 + paddd xmm8, xmm14 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + pshuflw xmm11, xmm11, 0xB1 + pshufhw xmm11, xmm11, 0xB1 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm7 + paddd xmm8, xmm15 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movdqa xmm13, xmm3 + psrld xmm3, 8 + pslld xmm13, 24 + pxor xmm3, xmm13 + movdqa xmm13, xmm11 + psrld xmm11, 8 + pslld xmm13, 24 + pxor xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 0x39 + pshufd xmm8, xmm8, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm11, xmm11, 0x4E + pshufd xmm2, xmm2, 0x93 + pshufd xmm10, xmm10, 0x93 + dec al + je 9f + movdqa xmm12, xmmword ptr [rsp+0x20] + movdqa xmm5, xmmword ptr [rsp+0x40] + pshufd xmm13, xmm12, 0x0F + shufps xmm12, xmm5, 214 + pshufd xmm4, xmm12, 0x39 + movdqa xmm12, xmm6 + shufps xmm12, xmm7, 250 + pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm13, xmm12 + movdqa xmmword ptr [rsp+0x20], xmm13 + movdqa xmm12, xmm7 + punpcklqdq xmm12, xmm5 + movdqa xmm13, xmm6 + pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm12, xmm13 + pshufd xmm12, xmm12, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmmword ptr [rsp+0x40], xmm12 + movdqa xmm5, xmmword ptr [rsp+0x30] + movdqa xmm13, xmmword ptr [rsp+0x50] + pshufd xmm6, xmm5, 0x0F + shufps xmm5, xmm13, 214 + pshufd xmm12, xmm5, 0x39 + movdqa xmm5, xmm14 + shufps xmm5, xmm15, 250 + pand xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm6, xmm5 + movdqa xmm5, xmm15 + punpcklqdq xmm5, xmm13 + movdqa xmmword ptr [rsp+0x30], xmm2 + movdqa xmm2, xmm14 + pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm5, xmm2 + movdqa xmm2, xmmword ptr [rsp+0x30] + pshufd xmm5, xmm5, 0x78 + punpckhdq xmm13, xmm15 + punpckldq xmm14, xmm13 + pshufd xmm15, xmm14, 0x1E + movdqa xmm13, xmm6 + movdqa xmm14, xmm5 + movdqa xmm5, xmmword ptr [rsp+0x20] + movdqa xmm6, xmmword ptr [rsp+0x40] + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm8, xmm10 + pxor xmm9, xmm11 + mov eax, r13d + cmp rdx, r15 + jne 2b + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+0x10], xmm1 + movups xmmword ptr [rbx+0x20], xmm8 + movups xmmword ptr [rbx+0x30], xmm9 + mov eax, dword ptr [rsp+0x130] + neg eax + mov r10d, dword ptr [rsp+0x110+8*rax] + mov r11d, dword ptr [rsp+0x120+8*rax] + mov dword ptr [rsp+0x110], r10d + mov dword ptr [rsp+0x120], r11d + add rdi, 16 + add rbx, 64 + sub rsi, 2 +3: + test esi, 0x1 + je 4b + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movd xmm13, dword ptr [rsp+0x110] + movd xmm14, dword ptr [rsp+0x120] + punpckldq xmm13, xmm14 + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+0x80] + or eax, r13d + xor edx, edx +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + shl rax, 32 + or rax, 64 + movq xmm12, rax + movdqa xmm3, xmm13 + punpcklqdq xmm3, xmm12 + movups xmm4, xmmword ptr [r8+rdx-0x40] + movups xmm5, xmmword ptr [r8+rdx-0x30] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [r8+rdx-0x20] + movups xmm7, xmmword ptr [r8+rdx-0x10] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm9, xmm8 + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + movdqa xmm10, xmm6 + pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm8, xmm10 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+0x10], xmm1 + jmp 4b + +.p2align 6 +blake3_compress_in_place_sse2: +_blake3_compress_in_place_sse2: + sub rsp, 120 + movdqa xmmword ptr [rsp], xmm6 + movdqa xmmword ptr [rsp+0x10], xmm7 + movdqa xmmword ptr [rsp+0x20], xmm8 + movdqa xmmword ptr [rsp+0x30], xmm9 + movdqa xmmword ptr [rsp+0x40], xmm11 + movdqa xmmword ptr [rsp+0x50], xmm14 + movdqa xmmword ptr [rsp+0x60], xmm15 + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movzx eax, byte ptr [rsp+0xA0] + movzx r8d, r8b + shl rax, 32 + add r8, rax + movq xmm3, r9 + movq xmm4, r8 + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rdx] + movups xmm5, xmmword ptr [rdx+0x10] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rdx+0x20] + movups xmm7, xmmword ptr [rdx+0x30] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm9, xmm8 + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + movdqa xmm14, xmm6 + pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm8, xmm14 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + movups xmmword ptr [rcx], xmm0 + movups xmmword ptr [rcx+0x10], xmm1 + movdqa xmm6, xmmword ptr [rsp] + movdqa xmm7, xmmword ptr [rsp+0x10] + movdqa xmm8, xmmword ptr [rsp+0x20] + movdqa xmm9, xmmword ptr [rsp+0x30] + movdqa xmm11, xmmword ptr [rsp+0x40] + movdqa xmm14, xmmword ptr [rsp+0x50] + movdqa xmm15, xmmword ptr [rsp+0x60] + add rsp, 120 + ret + + +.p2align 6 +_blake3_compress_xof_sse2: +blake3_compress_xof_sse2: + sub rsp, 120 + movdqa xmmword ptr [rsp], xmm6 + movdqa xmmword ptr [rsp+0x10], xmm7 + movdqa xmmword ptr [rsp+0x20], xmm8 + movdqa xmmword ptr [rsp+0x30], xmm9 + movdqa xmmword ptr [rsp+0x40], xmm11 + movdqa xmmword ptr [rsp+0x50], xmm14 + movdqa xmmword ptr [rsp+0x60], xmm15 + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movzx eax, byte ptr [rsp+0xA0] + movzx r8d, r8b + mov r10, qword ptr [rsp+0xA8] + shl rax, 32 + add r8, rax + movq xmm3, r9 + movq xmm4, r8 + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rdx] + movups xmm5, xmmword ptr [rdx+0x10] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rdx+0x20] + movups xmm7, xmmword ptr [rdx+0x30] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm9, xmm8 + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + movdqa xmm14, xmm6 + pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm8, xmm14 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + movdqu xmm4, xmmword ptr [rcx] + movdqu xmm5, xmmword ptr [rcx+0x10] + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm2, xmm4 + pxor xmm3, xmm5 + movups xmmword ptr [r10], xmm0 + movups xmmword ptr [r10+0x10], xmm1 + movups xmmword ptr [r10+0x20], xmm2 + movups xmmword ptr [r10+0x30], xmm3 + movdqa xmm6, xmmword ptr [rsp] + movdqa xmm7, xmmword ptr [rsp+0x10] + movdqa xmm8, xmmword ptr [rsp+0x20] + movdqa xmm9, xmmword ptr [rsp+0x30] + movdqa xmm11, xmmword ptr [rsp+0x40] + movdqa xmm14, xmmword ptr [rsp+0x50] + movdqa xmm15, xmmword ptr [rsp+0x60] + add rsp, 120 + ret + + +.section .rodata +.p2align 6 +BLAKE3_IV: + .long 0x6A09E667, 0xBB67AE85 + .long 0x3C6EF372, 0xA54FF53A +ADD0: + .long 0, 1, 2, 3 +ADD1: + .long 4, 4, 4, 4 +BLAKE3_IV_0: + .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 +BLAKE3_IV_1: + .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 +BLAKE3_IV_2: + .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 +BLAKE3_IV_3: + .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A +BLAKE3_BLOCK_LEN: + .long 64, 64, 64, 64 +CMP_MSB_MASK: + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 +PBLENDW_0x33_MASK: + .long 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 +PBLENDW_0xCC_MASK: + .long 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF +PBLENDW_0x3F_MASK: + .long 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 +PBLENDW_0xC0_MASK: + .long 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm new file mode 100644 index 000000000000..507502f11a80 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm @@ -0,0 +1,2350 @@ +public _blake3_hash_many_sse2 +public blake3_hash_many_sse2 +public blake3_compress_in_place_sse2 +public _blake3_compress_in_place_sse2 +public blake3_compress_xof_sse2 +public _blake3_compress_xof_sse2 + +_TEXT SEGMENT ALIGN(16) 'CODE' + +ALIGN 16 +blake3_hash_many_sse2 PROC +_blake3_hash_many_sse2 PROC + push r15 + push r14 + push r13 + push r12 + push rsi + push rdi + push rbx + push rbp + mov rbp, rsp + sub rsp, 528 + and rsp, 0FFFFFFFFFFFFFFC0H + movdqa xmmword ptr [rsp+170H], xmm6 + movdqa xmmword ptr [rsp+180H], xmm7 + movdqa xmmword ptr [rsp+190H], xmm8 + movdqa xmmword ptr [rsp+1A0H], xmm9 + movdqa xmmword ptr [rsp+1B0H], xmm10 + movdqa xmmword ptr [rsp+1C0H], xmm11 + movdqa xmmword ptr [rsp+1D0H], xmm12 + movdqa xmmword ptr [rsp+1E0H], xmm13 + movdqa xmmword ptr [rsp+1F0H], xmm14 + movdqa xmmword ptr [rsp+200H], xmm15 + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, qword ptr [rbp+68H] + movzx r9, byte ptr [rbp+70H] + neg r9d + movd xmm0, r9d + pshufd xmm0, xmm0, 00H + movdqa xmmword ptr [rsp+130H], xmm0 + movdqa xmm1, xmm0 + pand xmm1, xmmword ptr [ADD0] + pand xmm0, xmmword ptr [ADD1] + movdqa xmmword ptr [rsp+150H], xmm0 + movd xmm0, r8d + pshufd xmm0, xmm0, 00H + paddd xmm0, xmm1 + movdqa xmmword ptr [rsp+110H], xmm0 + pxor xmm0, xmmword ptr [CMP_MSB_MASK] + pxor xmm1, xmmword ptr [CMP_MSB_MASK] + pcmpgtd xmm1, xmm0 + shr r8, 32 + movd xmm2, r8d + pshufd xmm2, xmm2, 00H + psubd xmm2, xmm1 + movdqa xmmword ptr [rsp+120H], xmm2 + mov rbx, qword ptr [rbp+90H] + mov r15, rdx + shl r15, 6 + movzx r13d, byte ptr [rbp+78H] + movzx r12d, byte ptr [rbp+88H] + cmp rsi, 4 + jc final3blocks +outerloop4: + movdqu xmm3, xmmword ptr [rcx] + pshufd xmm0, xmm3, 00H + pshufd xmm1, xmm3, 55H + pshufd xmm2, xmm3, 0AAH + pshufd xmm3, xmm3, 0FFH + movdqu xmm7, xmmword ptr [rcx+10H] + pshufd xmm4, xmm7, 00H + pshufd xmm5, xmm7, 55H + pshufd xmm6, xmm7, 0AAH + pshufd xmm7, xmm7, 0FFH + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +innerloop4: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movdqu xmm8, xmmword ptr [r8+rdx-40H] + movdqu xmm9, xmmword ptr [r9+rdx-40H] + movdqu xmm10, xmmword ptr [r10+rdx-40H] + movdqu xmm11, xmmword ptr [r11+rdx-40H] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp], xmm8 + movdqa xmmword ptr [rsp+10H], xmm9 + movdqa xmmword ptr [rsp+20H], xmm12 + movdqa xmmword ptr [rsp+30H], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-30H] + movdqu xmm9, xmmword ptr [r9+rdx-30H] + movdqu xmm10, xmmword ptr [r10+rdx-30H] + movdqu xmm11, xmmword ptr [r11+rdx-30H] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+40H], xmm8 + movdqa xmmword ptr [rsp+50H], xmm9 + movdqa xmmword ptr [rsp+60H], xmm12 + movdqa xmmword ptr [rsp+70H], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-20H] + movdqu xmm9, xmmword ptr [r9+rdx-20H] + movdqu xmm10, xmmword ptr [r10+rdx-20H] + movdqu xmm11, xmmword ptr [r11+rdx-20H] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+80H], xmm8 + movdqa xmmword ptr [rsp+90H], xmm9 + movdqa xmmword ptr [rsp+0A0H], xmm12 + movdqa xmmword ptr [rsp+0B0H], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-10H] + movdqu xmm9, xmmword ptr [r9+rdx-10H] + movdqu xmm10, xmmword ptr [r10+rdx-10H] + movdqu xmm11, xmmword ptr [r11+rdx-10H] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0C0H], xmm8 + movdqa xmmword ptr [rsp+0D0H], xmm9 + movdqa xmmword ptr [rsp+0E0H], xmm12 + movdqa xmmword ptr [rsp+0F0H], xmm13 + movdqa xmm9, xmmword ptr [BLAKE3_IV_1] + movdqa xmm10, xmmword ptr [BLAKE3_IV_2] + movdqa xmm11, xmmword ptr [BLAKE3_IV_3] + movdqa xmm12, xmmword ptr [rsp+110H] + movdqa xmm13, xmmword ptr [rsp+120H] + movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN] + movd xmm15, eax + pshufd xmm15, xmm15, 00H + prefetcht0 byte ptr [r8+rdx+80H] + prefetcht0 byte ptr [r9+rdx+80H] + prefetcht0 byte ptr [r10+rdx+80H] + prefetcht0 byte ptr [r11+rdx+80H] + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+20H] + paddd xmm2, xmmword ptr [rsp+40H] + paddd xmm3, xmmword ptr [rsp+60H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0B1H + pshufhw xmm12, xmm12, 0B1H + pshuflw xmm13, xmm13, 0B1H + pshufhw xmm13, xmm13, 0B1H + pshuflw xmm14, xmm14, 0B1H + pshufhw xmm14, xmm14, 0B1H + pshuflw xmm15, xmm15, 0B1H + pshufhw xmm15, xmm15, 0B1H + movdqa xmm8, xmmword ptr [BLAKE3_IV_0] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+10H] + paddd xmm1, xmmword ptr [rsp+30H] + paddd xmm2, xmmword ptr [rsp+50H] + paddd xmm3, xmmword ptr [rsp+70H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+80H] + paddd xmm1, xmmword ptr [rsp+0A0H] + paddd xmm2, xmmword ptr [rsp+0C0H] + paddd xmm3, xmmword ptr [rsp+0E0H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0B1H + pshufhw xmm15, xmm15, 0B1H + pshuflw xmm12, xmm12, 0B1H + pshufhw xmm12, xmm12, 0B1H + pshuflw xmm13, xmm13, 0B1H + pshufhw xmm13, xmm13, 0B1H + pshuflw xmm14, xmm14, 0B1H + pshufhw xmm14, xmm14, 0B1H + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+90H] + paddd xmm1, xmmword ptr [rsp+0B0H] + paddd xmm2, xmmword ptr [rsp+0D0H] + paddd xmm3, xmmword ptr [rsp+0F0H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+20H] + paddd xmm1, xmmword ptr [rsp+30H] + paddd xmm2, xmmword ptr [rsp+70H] + paddd xmm3, xmmword ptr [rsp+40H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0B1H + pshufhw xmm12, xmm12, 0B1H + pshuflw xmm13, xmm13, 0B1H + pshufhw xmm13, xmm13, 0B1H + pshuflw xmm14, xmm14, 0B1H + pshufhw xmm14, xmm14, 0B1H + pshuflw xmm15, xmm15, 0B1H + pshufhw xmm15, xmm15, 0B1H + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+60H] + paddd xmm1, xmmword ptr [rsp+0A0H] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0D0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+10H] + paddd xmm1, xmmword ptr [rsp+0C0H] + paddd xmm2, xmmword ptr [rsp+90H] + paddd xmm3, xmmword ptr [rsp+0F0H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0B1H + pshufhw xmm15, xmm15, 0B1H + pshuflw xmm12, xmm12, 0B1H + pshufhw xmm12, xmm12, 0B1H + pshuflw xmm13, xmm13, 0B1H + pshufhw xmm13, xmm13, 0B1H + pshuflw xmm14, xmm14, 0B1H + pshufhw xmm14, xmm14, 0B1H + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0B0H] + paddd xmm1, xmmword ptr [rsp+50H] + paddd xmm2, xmmword ptr [rsp+0E0H] + paddd xmm3, xmmword ptr [rsp+80H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+30H] + paddd xmm1, xmmword ptr [rsp+0A0H] + paddd xmm2, xmmword ptr [rsp+0D0H] + paddd xmm3, xmmword ptr [rsp+70H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0B1H + pshufhw xmm12, xmm12, 0B1H + pshuflw xmm13, xmm13, 0B1H + pshufhw xmm13, xmm13, 0B1H + pshuflw xmm14, xmm14, 0B1H + pshufhw xmm14, xmm14, 0B1H + pshuflw xmm15, xmm15, 0B1H + pshufhw xmm15, xmm15, 0B1H + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+40H] + paddd xmm1, xmmword ptr [rsp+0C0H] + paddd xmm2, xmmword ptr [rsp+20H] + paddd xmm3, xmmword ptr [rsp+0E0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+60H] + paddd xmm1, xmmword ptr [rsp+90H] + paddd xmm2, xmmword ptr [rsp+0B0H] + paddd xmm3, xmmword ptr [rsp+80H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0B1H + pshufhw xmm15, xmm15, 0B1H + pshuflw xmm12, xmm12, 0B1H + pshufhw xmm12, xmm12, 0B1H + pshuflw xmm13, xmm13, 0B1H + pshufhw xmm13, xmm13, 0B1H + pshuflw xmm14, xmm14, 0B1H + pshufhw xmm14, xmm14, 0B1H + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+50H] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0F0H] + paddd xmm3, xmmword ptr [rsp+10H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0A0H] + paddd xmm1, xmmword ptr [rsp+0C0H] + paddd xmm2, xmmword ptr [rsp+0E0H] + paddd xmm3, xmmword ptr [rsp+0D0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0B1H + pshufhw xmm12, xmm12, 0B1H + pshuflw xmm13, xmm13, 0B1H + pshufhw xmm13, xmm13, 0B1H + pshuflw xmm14, xmm14, 0B1H + pshufhw xmm14, xmm14, 0B1H + pshuflw xmm15, xmm15, 0B1H + pshufhw xmm15, xmm15, 0B1H + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+70H] + paddd xmm1, xmmword ptr [rsp+90H] + paddd xmm2, xmmword ptr [rsp+30H] + paddd xmm3, xmmword ptr [rsp+0F0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+40H] + paddd xmm1, xmmword ptr [rsp+0B0H] + paddd xmm2, xmmword ptr [rsp+50H] + paddd xmm3, xmmword ptr [rsp+10H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0B1H + pshufhw xmm15, xmm15, 0B1H + pshuflw xmm12, xmm12, 0B1H + pshufhw xmm12, xmm12, 0B1H + pshuflw xmm13, xmm13, 0B1H + pshufhw xmm13, xmm13, 0B1H + pshuflw xmm14, xmm14, 0B1H + pshufhw xmm14, xmm14, 0B1H + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+20H] + paddd xmm2, xmmword ptr [rsp+80H] + paddd xmm3, xmmword ptr [rsp+60H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0C0H] + paddd xmm1, xmmword ptr [rsp+90H] + paddd xmm2, xmmword ptr [rsp+0F0H] + paddd xmm3, xmmword ptr [rsp+0E0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0B1H + pshufhw xmm12, xmm12, 0B1H + pshuflw xmm13, xmm13, 0B1H + pshufhw xmm13, xmm13, 0B1H + pshuflw xmm14, xmm14, 0B1H + pshufhw xmm14, xmm14, 0B1H + pshuflw xmm15, xmm15, 0B1H + pshufhw xmm15, xmm15, 0B1H + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0D0H] + paddd xmm1, xmmword ptr [rsp+0B0H] + paddd xmm2, xmmword ptr [rsp+0A0H] + paddd xmm3, xmmword ptr [rsp+80H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+70H] + paddd xmm1, xmmword ptr [rsp+50H] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+60H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0B1H + pshufhw xmm15, xmm15, 0B1H + pshuflw xmm12, xmm12, 0B1H + pshufhw xmm12, xmm12, 0B1H + pshuflw xmm13, xmm13, 0B1H + pshufhw xmm13, xmm13, 0B1H + pshuflw xmm14, xmm14, 0B1H + pshufhw xmm14, xmm14, 0B1H + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+20H] + paddd xmm1, xmmword ptr [rsp+30H] + paddd xmm2, xmmword ptr [rsp+10H] + paddd xmm3, xmmword ptr [rsp+40H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+90H] + paddd xmm1, xmmword ptr [rsp+0B0H] + paddd xmm2, xmmword ptr [rsp+80H] + paddd xmm3, xmmword ptr [rsp+0F0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0B1H + pshufhw xmm12, xmm12, 0B1H + pshuflw xmm13, xmm13, 0B1H + pshufhw xmm13, xmm13, 0B1H + pshuflw xmm14, xmm14, 0B1H + pshufhw xmm14, xmm14, 0B1H + pshuflw xmm15, xmm15, 0B1H + pshufhw xmm15, xmm15, 0B1H + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0E0H] + paddd xmm1, xmmword ptr [rsp+50H] + paddd xmm2, xmmword ptr [rsp+0C0H] + paddd xmm3, xmmword ptr [rsp+10H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0D0H] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+20H] + paddd xmm3, xmmword ptr [rsp+40H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0B1H + pshufhw xmm15, xmm15, 0B1H + pshuflw xmm12, xmm12, 0B1H + pshufhw xmm12, xmm12, 0B1H + pshuflw xmm13, xmm13, 0B1H + pshufhw xmm13, xmm13, 0B1H + pshuflw xmm14, xmm14, 0B1H + pshufhw xmm14, xmm14, 0B1H + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+30H] + paddd xmm1, xmmword ptr [rsp+0A0H] + paddd xmm2, xmmword ptr [rsp+60H] + paddd xmm3, xmmword ptr [rsp+70H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0B0H] + paddd xmm1, xmmword ptr [rsp+50H] + paddd xmm2, xmmword ptr [rsp+10H] + paddd xmm3, xmmword ptr [rsp+80H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0B1H + pshufhw xmm12, xmm12, 0B1H + pshuflw xmm13, xmm13, 0B1H + pshufhw xmm13, xmm13, 0B1H + pshuflw xmm14, xmm14, 0B1H + pshufhw xmm14, xmm14, 0B1H + pshuflw xmm15, xmm15, 0B1H + pshufhw xmm15, xmm15, 0B1H + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0F0H] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+90H] + paddd xmm3, xmmword ptr [rsp+60H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0E0H] + paddd xmm1, xmmword ptr [rsp+20H] + paddd xmm2, xmmword ptr [rsp+30H] + paddd xmm3, xmmword ptr [rsp+70H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0B1H + pshufhw xmm15, xmm15, 0B1H + pshuflw xmm12, xmm12, 0B1H + pshufhw xmm12, xmm12, 0B1H + pshuflw xmm13, xmm13, 0B1H + pshufhw xmm13, xmm13, 0B1H + pshuflw xmm14, xmm14, 0B1H + pshufhw xmm14, xmm14, 0B1H + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0A0H] + paddd xmm1, xmmword ptr [rsp+0C0H] + paddd xmm2, xmmword ptr [rsp+40H] + paddd xmm3, xmmword ptr [rsp+0D0H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + pxor xmm0, xmm8 + pxor xmm1, xmm9 + pxor xmm2, xmm10 + pxor xmm3, xmm11 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + pxor xmm4, xmm12 + pxor xmm5, xmm13 + pxor xmm6, xmm14 + pxor xmm7, xmm15 + mov eax, r13d + jne innerloop4 + movdqa xmm9, xmm0 + punpckldq xmm0, xmm1 + punpckhdq xmm9, xmm1 + movdqa xmm11, xmm2 + punpckldq xmm2, xmm3 + punpckhdq xmm11, xmm3 + movdqa xmm1, xmm0 + punpcklqdq xmm0, xmm2 + punpckhqdq xmm1, xmm2 + movdqa xmm3, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm3, xmm11 + movdqu xmmword ptr [rbx], xmm0 + movdqu xmmword ptr [rbx+20H], xmm1 + movdqu xmmword ptr [rbx+40H], xmm9 + movdqu xmmword ptr [rbx+60H], xmm3 + movdqa xmm9, xmm4 + punpckldq xmm4, xmm5 + punpckhdq xmm9, xmm5 + movdqa xmm11, xmm6 + punpckldq xmm6, xmm7 + punpckhdq xmm11, xmm7 + movdqa xmm5, xmm4 + punpcklqdq xmm4, xmm6 + punpckhqdq xmm5, xmm6 + movdqa xmm7, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm7, xmm11 + movdqu xmmword ptr [rbx+10H], xmm4 + movdqu xmmword ptr [rbx+30H], xmm5 + movdqu xmmword ptr [rbx+50H], xmm9 + movdqu xmmword ptr [rbx+70H], xmm7 + movdqa xmm1, xmmword ptr [rsp+110H] + movdqa xmm0, xmm1 + paddd xmm1, xmmword ptr [rsp+150H] + movdqa xmmword ptr [rsp+110H], xmm1 + pxor xmm0, xmmword ptr [CMP_MSB_MASK] + pxor xmm1, xmmword ptr [CMP_MSB_MASK] + pcmpgtd xmm0, xmm1 + movdqa xmm1, xmmword ptr [rsp+120H] + psubd xmm1, xmm0 + movdqa xmmword ptr [rsp+120H], xmm1 + add rbx, 128 + add rdi, 32 + sub rsi, 4 + cmp rsi, 4 + jnc outerloop4 + test rsi, rsi + jne final3blocks +unwind: + movdqa xmm6, xmmword ptr [rsp+170H] + movdqa xmm7, xmmword ptr [rsp+180H] + movdqa xmm8, xmmword ptr [rsp+190H] + movdqa xmm9, xmmword ptr [rsp+1A0H] + movdqa xmm10, xmmword ptr [rsp+1B0H] + movdqa xmm11, xmmword ptr [rsp+1C0H] + movdqa xmm12, xmmword ptr [rsp+1D0H] + movdqa xmm13, xmmword ptr [rsp+1E0H] + movdqa xmm14, xmmword ptr [rsp+1F0H] + movdqa xmm15, xmmword ptr [rsp+200H] + mov rsp, rbp + pop rbp + pop rbx + pop rdi + pop rsi + pop r12 + pop r13 + pop r14 + pop r15 + ret +ALIGN 16 +final3blocks: + test esi, 2H + je final1block + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+10H] + movaps xmm8, xmm0 + movaps xmm9, xmm1 + movd xmm13, dword ptr [rsp+110H] + movd xmm14, dword ptr [rsp+120H] + punpckldq xmm13, xmm14 + movaps xmmword ptr [rsp], xmm13 + movd xmm14, dword ptr [rsp+114H] + movd xmm13, dword ptr [rsp+124H] + punpckldq xmm14, xmm13 + movaps xmmword ptr [rsp+10H], xmm14 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +innerloop2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV] + movaps xmm10, xmm2 + movups xmm4, xmmword ptr [r8+rdx-40H] + movups xmm5, xmmword ptr [r8+rdx-30H] + movaps xmm3, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm3, xmm5, 221 + movaps xmm5, xmm3 + movups xmm6, xmmword ptr [r8+rdx-20H] + movups xmm7, xmmword ptr [r8+rdx-10H] + movaps xmm3, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 93H + shufps xmm3, xmm7, 221 + pshufd xmm7, xmm3, 93H + movups xmm12, xmmword ptr [r9+rdx-40H] + movups xmm13, xmmword ptr [r9+rdx-30H] + movaps xmm11, xmm12 + shufps xmm12, xmm13, 136 + shufps xmm11, xmm13, 221 + movaps xmm13, xmm11 + movups xmm14, xmmword ptr [r9+rdx-20H] + movups xmm15, xmmword ptr [r9+rdx-10H] + movaps xmm11, xmm14 + shufps xmm14, xmm15, 136 + pshufd xmm14, xmm14, 93H + shufps xmm11, xmm15, 221 + pshufd xmm15, xmm11, 93H + shl rax, 20H + or rax, 40H + movd xmm3, rax + movdqa xmmword ptr [rsp+20H], xmm3 + movaps xmm3, xmmword ptr [rsp] + movaps xmm11, xmmword ptr [rsp+10H] + punpcklqdq xmm3, xmmword ptr [rsp+20H] + punpcklqdq xmm11, xmmword ptr [rsp+20H] + mov al, 7 +roundloop2: + paddd xmm0, xmm4 + paddd xmm8, xmm12 + movaps xmmword ptr [rsp+20H], xmm4 + movaps xmmword ptr [rsp+30H], xmm12 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshuflw xmm3, xmm3, 0B1H + pshufhw xmm3, xmm3, 0B1H + pshuflw xmm11, xmm11, 0B1H + pshufhw xmm11, xmm11, 0B1H + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm5 + paddd xmm8, xmm13 + movaps xmmword ptr [rsp+40H], xmm5 + movaps xmmword ptr [rsp+50H], xmm13 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movdqa xmm13, xmm3 + psrld xmm3, 8 + pslld xmm13, 24 + pxor xmm3, xmm13 + movdqa xmm13, xmm11 + psrld xmm11, 8 + pslld xmm13, 24 + pxor xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 93H + pshufd xmm8, xmm8, 93H + pshufd xmm3, xmm3, 4EH + pshufd xmm11, xmm11, 4EH + pshufd xmm2, xmm2, 39H + pshufd xmm10, xmm10, 39H + paddd xmm0, xmm6 + paddd xmm8, xmm14 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshuflw xmm3, xmm3, 0B1H + pshufhw xmm3, xmm3, 0B1H + pshuflw xmm11, xmm11, 0B1H + pshufhw xmm11, xmm11, 0B1H + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm7 + paddd xmm8, xmm15 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movdqa xmm13, xmm3 + psrld xmm3, 8 + pslld xmm13, 24 + pxor xmm3, xmm13 + movdqa xmm13, xmm11 + psrld xmm11, 8 + pslld xmm13, 24 + pxor xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 39H + pshufd xmm8, xmm8, 39H + pshufd xmm3, xmm3, 4EH + pshufd xmm11, xmm11, 4EH + pshufd xmm2, xmm2, 93H + pshufd xmm10, xmm10, 93H + dec al + je endroundloop2 + movdqa xmm12, xmmword ptr [rsp+20H] + movdqa xmm5, xmmword ptr [rsp+40H] + pshufd xmm13, xmm12, 0FH + shufps xmm12, xmm5, 214 + pshufd xmm4, xmm12, 39H + movdqa xmm12, xmm6 + shufps xmm12, xmm7, 250 + pand xmm13, xmmword ptr [PBLENDW_0x33_MASK] + pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK] + por xmm13, xmm12 + movdqa xmmword ptr [rsp+20H], xmm13 + movdqa xmm12, xmm7 + punpcklqdq xmm12, xmm5 + movdqa xmm13, xmm6 + pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK] + pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK] + por xmm12, xmm13 + pshufd xmm12, xmm12, 78H + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 1EH + movdqa xmmword ptr [rsp+40H], xmm12 + movdqa xmm5, xmmword ptr [rsp+30H] + movdqa xmm13, xmmword ptr [rsp+50H] + pshufd xmm6, xmm5, 0FH + shufps xmm5, xmm13, 214 + pshufd xmm12, xmm5, 39H + movdqa xmm5, xmm14 + shufps xmm5, xmm15, 250 + pand xmm6, xmmword ptr [PBLENDW_0x33_MASK] + pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK] + por xmm6, xmm5 + movdqa xmm5, xmm15 + punpcklqdq xmm5, xmm13 + movdqa xmmword ptr [rsp+30H], xmm2 + movdqa xmm2, xmm14 + pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK] + pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK] + por xmm5, xmm2 + movdqa xmm2, xmmword ptr [rsp+30H] + pshufd xmm5, xmm5, 78H + punpckhdq xmm13, xmm15 + punpckldq xmm14, xmm13 + pshufd xmm15, xmm14, 1EH + movdqa xmm13, xmm6 + movdqa xmm14, xmm5 + movdqa xmm5, xmmword ptr [rsp+20H] + movdqa xmm6, xmmword ptr [rsp+40H] + jmp roundloop2 +endroundloop2: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm8, xmm10 + pxor xmm9, xmm11 + mov eax, r13d + cmp rdx, r15 + jne innerloop2 + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+10H], xmm1 + movups xmmword ptr [rbx+20H], xmm8 + movups xmmword ptr [rbx+30H], xmm9 + mov eax, dword ptr [rsp+130H] + neg eax + mov r10d, dword ptr [rsp+110H+8*rax] + mov r11d, dword ptr [rsp+120H+8*rax] + mov dword ptr [rsp+110H], r10d + mov dword ptr [rsp+120H], r11d + add rdi, 16 + add rbx, 64 + sub rsi, 2 +final1block: + test esi, 1H + je unwind + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+10H] + movd xmm13, dword ptr [rsp+110H] + movd xmm14, dword ptr [rsp+120H] + punpckldq xmm13, xmm14 + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +innerloop1: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV] + shl rax, 32 + or rax, 64 + movd xmm12, rax + movdqa xmm3, xmm13 + punpcklqdq xmm3, xmm12 + movups xmm4, xmmword ptr [r8+rdx-40H] + movups xmm5, xmmword ptr [r8+rdx-30H] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [r8+rdx-20H] + movups xmm7, xmmword ptr [r8+rdx-10H] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 93H + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 93H + mov al, 7 +roundloop1: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0B1H + pshufhw xmm3, xmm3, 0B1H + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 93H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 39H + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0B1H + pshufhw xmm3, xmm3, 0B1H + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 39H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 93H + dec al + jz endroundloop1 + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0FH + pshufd xmm4, xmm8, 39H + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pand xmm9, xmmword ptr [PBLENDW_0x33_MASK] + pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK] + por xmm9, xmm8 + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + movdqa xmm10, xmm6 + pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK] + pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK] + por xmm8, xmm10 + pshufd xmm8, xmm8, 78H + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 1EH + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp roundloop1 +endroundloop1: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne innerloop1 + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+10H], xmm1 + jmp unwind +_blake3_hash_many_sse2 ENDP +blake3_hash_many_sse2 ENDP + +blake3_compress_in_place_sse2 PROC +_blake3_compress_in_place_sse2 PROC + sub rsp, 120 + movdqa xmmword ptr [rsp], xmm6 + movdqa xmmword ptr [rsp+10H], xmm7 + movdqa xmmword ptr [rsp+20H], xmm8 + movdqa xmmword ptr [rsp+30H], xmm9 + movdqa xmmword ptr [rsp+40H], xmm11 + movdqa xmmword ptr [rsp+50H], xmm14 + movdqa xmmword ptr [rsp+60H], xmm15 + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+10H] + movaps xmm2, xmmword ptr [BLAKE3_IV] + movzx eax, byte ptr [rsp+0A0H] + movzx r8d, r8b + shl rax, 32 + add r8, rax + movd xmm3, r9 + movd xmm4, r8 + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rdx] + movups xmm5, xmmword ptr [rdx+10H] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rdx+20H] + movups xmm7, xmmword ptr [rdx+30H] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 93H + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 93H + mov al, 7 +@@: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0B1H + pshufhw xmm3, xmm3, 0B1H + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 93H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 39H + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0B1H + pshufhw xmm3, xmm3, 0B1H + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 39H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 93H + dec al + jz @F + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0FH + pshufd xmm4, xmm8, 39H + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pand xmm9, xmmword ptr [PBLENDW_0x33_MASK] + pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK] + por xmm9, xmm8 + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + movdqa xmm14, xmm6 + pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK] + pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK] + por xmm8, xmm14 + pshufd xmm8, xmm8, 78H + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 1EH + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp @B +@@: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + movups xmmword ptr [rcx], xmm0 + movups xmmword ptr [rcx+10H], xmm1 + movdqa xmm6, xmmword ptr [rsp] + movdqa xmm7, xmmword ptr [rsp+10H] + movdqa xmm8, xmmword ptr [rsp+20H] + movdqa xmm9, xmmword ptr [rsp+30H] + movdqa xmm11, xmmword ptr [rsp+40H] + movdqa xmm14, xmmword ptr [rsp+50H] + movdqa xmm15, xmmword ptr [rsp+60H] + add rsp, 120 + ret +_blake3_compress_in_place_sse2 ENDP +blake3_compress_in_place_sse2 ENDP + +ALIGN 16 +blake3_compress_xof_sse2 PROC +_blake3_compress_xof_sse2 PROC + sub rsp, 120 + movdqa xmmword ptr [rsp], xmm6 + movdqa xmmword ptr [rsp+10H], xmm7 + movdqa xmmword ptr [rsp+20H], xmm8 + movdqa xmmword ptr [rsp+30H], xmm9 + movdqa xmmword ptr [rsp+40H], xmm11 + movdqa xmmword ptr [rsp+50H], xmm14 + movdqa xmmword ptr [rsp+60H], xmm15 + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+10H] + movaps xmm2, xmmword ptr [BLAKE3_IV] + movzx eax, byte ptr [rsp+0A0H] + movzx r8d, r8b + mov r10, qword ptr [rsp+0A8H] + shl rax, 32 + add r8, rax + movd xmm3, r9 + movd xmm4, r8 + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rdx] + movups xmm5, xmmword ptr [rdx+10H] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rdx+20H] + movups xmm7, xmmword ptr [rdx+30H] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 93H + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 93H + mov al, 7 +@@: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0B1H + pshufhw xmm3, xmm3, 0B1H + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 93H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 39H + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0B1H + pshufhw xmm3, xmm3, 0B1H + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 39H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 93H + dec al + jz @F + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0FH + pshufd xmm4, xmm8, 39H + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pand xmm9, xmmword ptr [PBLENDW_0x33_MASK] + pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK] + por xmm9, xmm8 + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + movdqa xmm14, xmm6 + pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK] + pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK] + por xmm8, xmm14 + pshufd xmm8, xmm8, 78H + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 1EH + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp @B +@@: + movdqu xmm4, xmmword ptr [rcx] + movdqu xmm5, xmmword ptr [rcx+10H] + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm2, xmm4 + pxor xmm3, xmm5 + movups xmmword ptr [r10], xmm0 + movups xmmword ptr [r10+10H], xmm1 + movups xmmword ptr [r10+20H], xmm2 + movups xmmword ptr [r10+30H], xmm3 + movdqa xmm6, xmmword ptr [rsp] + movdqa xmm7, xmmword ptr [rsp+10H] + movdqa xmm8, xmmword ptr [rsp+20H] + movdqa xmm9, xmmword ptr [rsp+30H] + movdqa xmm11, xmmword ptr [rsp+40H] + movdqa xmm14, xmmword ptr [rsp+50H] + movdqa xmm15, xmmword ptr [rsp+60H] + add rsp, 120 + ret +_blake3_compress_xof_sse2 ENDP +blake3_compress_xof_sse2 ENDP + +_TEXT ENDS + + +_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST' +ALIGN 64 +BLAKE3_IV: + dd 6A09E667H, 0BB67AE85H, 3C6EF372H, 0A54FF53AH + +ADD0: + dd 0, 1, 2, 3 + +ADD1: + dd 4 dup (4) + +BLAKE3_IV_0: + dd 4 dup (6A09E667H) + +BLAKE3_IV_1: + dd 4 dup (0BB67AE85H) + +BLAKE3_IV_2: + dd 4 dup (3C6EF372H) + +BLAKE3_IV_3: + dd 4 dup (0A54FF53AH) + +BLAKE3_BLOCK_LEN: + dd 4 dup (64) + +CMP_MSB_MASK: + dd 8 dup(80000000H) + +PBLENDW_0x33_MASK: + dd 0FFFFFFFFH, 000000000H, 0FFFFFFFFH, 000000000H +PBLENDW_0xCC_MASK: + dd 000000000H, 0FFFFFFFFH, 000000000H, 0FFFFFFFFH +PBLENDW_0x3F_MASK: + dd 0FFFFFFFFH, 0FFFFFFFFH, 0FFFFFFFFH, 000000000H +PBLENDW_0xC0_MASK: + dd 000000000H, 000000000H, 000000000H, 0FFFFFFFFH + +_RDATA ENDS +END diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41.c b/llvm/lib/Support/BLAKE3/blake3_sse41.c new file mode 100644 index 000000000000..87a8dae15ce9 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_sse41.c @@ -0,0 +1,560 @@ +#include "blake3_impl.h" + +#include + +#define DEGREE 4 + +#define _mm_shuffle_ps2(a, b, c) \ + (_mm_castps_si128( \ + _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c)))) + +INLINE __m128i loadu(const uint8_t src[16]) { + return _mm_loadu_si128((const __m128i *)src); +} + +INLINE void storeu(__m128i src, uint8_t dest[16]) { + _mm_storeu_si128((__m128i *)dest, src); +} + +INLINE __m128i addv(__m128i a, __m128i b) { return _mm_add_epi32(a, b); } + +// Note that clang-format doesn't like the name "xor" for some reason. +INLINE __m128i xorv(__m128i a, __m128i b) { return _mm_xor_si128(a, b); } + +INLINE __m128i set1(uint32_t x) { return _mm_set1_epi32((int32_t)x); } + +INLINE __m128i set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d); +} + +INLINE __m128i rot16(__m128i x) { + return _mm_shuffle_epi8( + x, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)); +} + +INLINE __m128i rot12(__m128i x) { + return xorv(_mm_srli_epi32(x, 12), _mm_slli_epi32(x, 32 - 12)); +} + +INLINE __m128i rot8(__m128i x) { + return _mm_shuffle_epi8( + x, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1)); +} + +INLINE __m128i rot7(__m128i x) { + return xorv(_mm_srli_epi32(x, 7), _mm_slli_epi32(x, 32 - 7)); +} + +INLINE void g1(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, + __m128i m) { + *row0 = addv(addv(*row0, m), *row1); + *row3 = xorv(*row3, *row0); + *row3 = rot16(*row3); + *row2 = addv(*row2, *row3); + *row1 = xorv(*row1, *row2); + *row1 = rot12(*row1); +} + +INLINE void g2(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, + __m128i m) { + *row0 = addv(addv(*row0, m), *row1); + *row3 = xorv(*row3, *row0); + *row3 = rot8(*row3); + *row2 = addv(*row2, *row3); + *row1 = xorv(*row1, *row2); + *row1 = rot7(*row1); +} + +// Note the optimization here of leaving row1 as the unrotated row, rather than +// row0. All the message loads below are adjusted to compensate for this. See +// discussion at https://github.com/sneves/blake2-avx2/pull/4 +INLINE void diagonalize(__m128i *row0, __m128i *row2, __m128i *row3) { + *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3)); + *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2)); + *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1)); +} + +INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) { + *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1)); + *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2)); + *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3)); +} + +INLINE void compress_pre(__m128i rows[4], const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, uint8_t flags) { + rows[0] = loadu((uint8_t *)&cv[0]); + rows[1] = loadu((uint8_t *)&cv[4]); + rows[2] = set4(IV[0], IV[1], IV[2], IV[3]); + rows[3] = set4(counter_low(counter), counter_high(counter), + (uint32_t)block_len, (uint32_t)flags); + + __m128i m0 = loadu(&block[sizeof(__m128i) * 0]); + __m128i m1 = loadu(&block[sizeof(__m128i) * 1]); + __m128i m2 = loadu(&block[sizeof(__m128i) * 2]); + __m128i m3 = loadu(&block[sizeof(__m128i) * 3]); + + __m128i t0, t1, t2, t3, tt; + + // Round 1. The first round permutes the message words from the original + // input order, into the groups that get mixed in parallel. + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); // 6 4 2 0 + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); // 7 5 3 1 + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10 8 + t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2, 1, 0, 3)); // 12 10 8 14 + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11 9 + t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(2, 1, 0, 3)); // 13 11 9 15 + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 2. This round and all following rounds apply a fixed permutation + // to the message words from the round before. + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = _mm_blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = _mm_blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 3 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = _mm_blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = _mm_blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 4 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = _mm_blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = _mm_blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 5 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = _mm_blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = _mm_blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 6 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = _mm_blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = _mm_blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); + m0 = t0; + m1 = t1; + m2 = t2; + m3 = t3; + + // Round 7 + t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2)); + t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t0); + t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2)); + tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3)); + t1 = _mm_blend_epi16(tt, t1, 0xCC); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t1); + diagonalize(&rows[0], &rows[2], &rows[3]); + t2 = _mm_unpacklo_epi64(m3, m1); + tt = _mm_blend_epi16(t2, m2, 0xC0); + t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0)); + g1(&rows[0], &rows[1], &rows[2], &rows[3], t2); + t3 = _mm_unpackhi_epi32(m1, m3); + tt = _mm_unpacklo_epi32(m2, t3); + t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2)); + g2(&rows[0], &rows[1], &rows[2], &rows[3], t3); + undiagonalize(&rows[0], &rows[2], &rows[3]); +} + +void blake3_compress_in_place_sse41(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags) { + __m128i rows[4]; + compress_pre(rows, cv, block, block_len, counter, flags); + storeu(xorv(rows[0], rows[2]), (uint8_t *)&cv[0]); + storeu(xorv(rows[1], rows[3]), (uint8_t *)&cv[4]); +} + +void blake3_compress_xof_sse41(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags, uint8_t out[64]) { + __m128i rows[4]; + compress_pre(rows, cv, block, block_len, counter, flags); + storeu(xorv(rows[0], rows[2]), &out[0]); + storeu(xorv(rows[1], rows[3]), &out[16]); + storeu(xorv(rows[2], loadu((uint8_t *)&cv[0])), &out[32]); + storeu(xorv(rows[3], loadu((uint8_t *)&cv[4])), &out[48]); +} + +INLINE void round_fn(__m128i v[16], __m128i m[16], size_t r) { + v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]); + v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]); + v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]); + v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]); + v[0] = addv(v[0], v[4]); + v[1] = addv(v[1], v[5]); + v[2] = addv(v[2], v[6]); + v[3] = addv(v[3], v[7]); + v[12] = xorv(v[12], v[0]); + v[13] = xorv(v[13], v[1]); + v[14] = xorv(v[14], v[2]); + v[15] = xorv(v[15], v[3]); + v[12] = rot16(v[12]); + v[13] = rot16(v[13]); + v[14] = rot16(v[14]); + v[15] = rot16(v[15]); + v[8] = addv(v[8], v[12]); + v[9] = addv(v[9], v[13]); + v[10] = addv(v[10], v[14]); + v[11] = addv(v[11], v[15]); + v[4] = xorv(v[4], v[8]); + v[5] = xorv(v[5], v[9]); + v[6] = xorv(v[6], v[10]); + v[7] = xorv(v[7], v[11]); + v[4] = rot12(v[4]); + v[5] = rot12(v[5]); + v[6] = rot12(v[6]); + v[7] = rot12(v[7]); + v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]); + v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]); + v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]); + v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]); + v[0] = addv(v[0], v[4]); + v[1] = addv(v[1], v[5]); + v[2] = addv(v[2], v[6]); + v[3] = addv(v[3], v[7]); + v[12] = xorv(v[12], v[0]); + v[13] = xorv(v[13], v[1]); + v[14] = xorv(v[14], v[2]); + v[15] = xorv(v[15], v[3]); + v[12] = rot8(v[12]); + v[13] = rot8(v[13]); + v[14] = rot8(v[14]); + v[15] = rot8(v[15]); + v[8] = addv(v[8], v[12]); + v[9] = addv(v[9], v[13]); + v[10] = addv(v[10], v[14]); + v[11] = addv(v[11], v[15]); + v[4] = xorv(v[4], v[8]); + v[5] = xorv(v[5], v[9]); + v[6] = xorv(v[6], v[10]); + v[7] = xorv(v[7], v[11]); + v[4] = rot7(v[4]); + v[5] = rot7(v[5]); + v[6] = rot7(v[6]); + v[7] = rot7(v[7]); + + v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]); + v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]); + v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]); + v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]); + v[0] = addv(v[0], v[5]); + v[1] = addv(v[1], v[6]); + v[2] = addv(v[2], v[7]); + v[3] = addv(v[3], v[4]); + v[15] = xorv(v[15], v[0]); + v[12] = xorv(v[12], v[1]); + v[13] = xorv(v[13], v[2]); + v[14] = xorv(v[14], v[3]); + v[15] = rot16(v[15]); + v[12] = rot16(v[12]); + v[13] = rot16(v[13]); + v[14] = rot16(v[14]); + v[10] = addv(v[10], v[15]); + v[11] = addv(v[11], v[12]); + v[8] = addv(v[8], v[13]); + v[9] = addv(v[9], v[14]); + v[5] = xorv(v[5], v[10]); + v[6] = xorv(v[6], v[11]); + v[7] = xorv(v[7], v[8]); + v[4] = xorv(v[4], v[9]); + v[5] = rot12(v[5]); + v[6] = rot12(v[6]); + v[7] = rot12(v[7]); + v[4] = rot12(v[4]); + v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]); + v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]); + v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]); + v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]); + v[0] = addv(v[0], v[5]); + v[1] = addv(v[1], v[6]); + v[2] = addv(v[2], v[7]); + v[3] = addv(v[3], v[4]); + v[15] = xorv(v[15], v[0]); + v[12] = xorv(v[12], v[1]); + v[13] = xorv(v[13], v[2]); + v[14] = xorv(v[14], v[3]); + v[15] = rot8(v[15]); + v[12] = rot8(v[12]); + v[13] = rot8(v[13]); + v[14] = rot8(v[14]); + v[10] = addv(v[10], v[15]); + v[11] = addv(v[11], v[12]); + v[8] = addv(v[8], v[13]); + v[9] = addv(v[9], v[14]); + v[5] = xorv(v[5], v[10]); + v[6] = xorv(v[6], v[11]); + v[7] = xorv(v[7], v[8]); + v[4] = xorv(v[4], v[9]); + v[5] = rot7(v[5]); + v[6] = rot7(v[6]); + v[7] = rot7(v[7]); + v[4] = rot7(v[4]); +} + +INLINE void transpose_vecs(__m128i vecs[DEGREE]) { + // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is + // 22/33. Note that this doesn't split the vector into two lanes, as the + // AVX2 counterparts do. + __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]); + __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]); + __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]); + __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]); + + // Interleave 64-bit lanes. + __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01); + __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01); + __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23); + __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23); + + vecs[0] = abcd_0; + vecs[1] = abcd_1; + vecs[2] = abcd_2; + vecs[3] = abcd_3; +} + +INLINE void transpose_msg_vecs(const uint8_t *const *inputs, + size_t block_offset, __m128i out[16]) { + out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m128i)]); + out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m128i)]); + out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m128i)]); + out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m128i)]); + out[4] = loadu(&inputs[0][block_offset + 1 * sizeof(__m128i)]); + out[5] = loadu(&inputs[1][block_offset + 1 * sizeof(__m128i)]); + out[6] = loadu(&inputs[2][block_offset + 1 * sizeof(__m128i)]); + out[7] = loadu(&inputs[3][block_offset + 1 * sizeof(__m128i)]); + out[8] = loadu(&inputs[0][block_offset + 2 * sizeof(__m128i)]); + out[9] = loadu(&inputs[1][block_offset + 2 * sizeof(__m128i)]); + out[10] = loadu(&inputs[2][block_offset + 2 * sizeof(__m128i)]); + out[11] = loadu(&inputs[3][block_offset + 2 * sizeof(__m128i)]); + out[12] = loadu(&inputs[0][block_offset + 3 * sizeof(__m128i)]); + out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]); + out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]); + out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]); + for (size_t i = 0; i < 4; ++i) { + _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0); + } + transpose_vecs(&out[0]); + transpose_vecs(&out[4]); + transpose_vecs(&out[8]); + transpose_vecs(&out[12]); +} + +INLINE void load_counters(uint64_t counter, bool increment_counter, + __m128i *out_lo, __m128i *out_hi) { + const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter); + const __m128i add0 = _mm_set_epi32(3, 2, 1, 0); + const __m128i add1 = _mm_and_si128(mask, add0); + __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1); + __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)), + _mm_xor_si128( l, _mm_set1_epi32(0x80000000))); + __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry); + *out_lo = l; + *out_hi = h; +} + +static +void blake3_hash4_sse41(const uint8_t *const *inputs, size_t blocks, + const uint32_t key[8], uint64_t counter, + bool increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + __m128i h_vecs[8] = { + set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]), + set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]), + }; + __m128i counter_low_vec, counter_high_vec; + load_counters(counter, increment_counter, &counter_low_vec, + &counter_high_vec); + uint8_t block_flags = flags | flags_start; + + for (size_t block = 0; block < blocks; block++) { + if (block + 1 == blocks) { + block_flags |= flags_end; + } + __m128i block_len_vec = set1(BLAKE3_BLOCK_LEN); + __m128i block_flags_vec = set1(block_flags); + __m128i msg_vecs[16]; + transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs); + + __m128i v[16] = { + h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3], + h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7], + set1(IV[0]), set1(IV[1]), set1(IV[2]), set1(IV[3]), + counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec, + }; + round_fn(v, msg_vecs, 0); + round_fn(v, msg_vecs, 1); + round_fn(v, msg_vecs, 2); + round_fn(v, msg_vecs, 3); + round_fn(v, msg_vecs, 4); + round_fn(v, msg_vecs, 5); + round_fn(v, msg_vecs, 6); + h_vecs[0] = xorv(v[0], v[8]); + h_vecs[1] = xorv(v[1], v[9]); + h_vecs[2] = xorv(v[2], v[10]); + h_vecs[3] = xorv(v[3], v[11]); + h_vecs[4] = xorv(v[4], v[12]); + h_vecs[5] = xorv(v[5], v[13]); + h_vecs[6] = xorv(v[6], v[14]); + h_vecs[7] = xorv(v[7], v[15]); + + block_flags = flags; + } + + transpose_vecs(&h_vecs[0]); + transpose_vecs(&h_vecs[4]); + // The first four vecs now contain the first half of each output, and the + // second four vecs contain the second half of each output. + storeu(h_vecs[0], &out[0 * sizeof(__m128i)]); + storeu(h_vecs[4], &out[1 * sizeof(__m128i)]); + storeu(h_vecs[1], &out[2 * sizeof(__m128i)]); + storeu(h_vecs[5], &out[3 * sizeof(__m128i)]); + storeu(h_vecs[2], &out[4 * sizeof(__m128i)]); + storeu(h_vecs[6], &out[5 * sizeof(__m128i)]); + storeu(h_vecs[3], &out[6 * sizeof(__m128i)]); + storeu(h_vecs[7], &out[7 * sizeof(__m128i)]); +} + +INLINE void hash_one_sse41(const uint8_t *input, size_t blocks, + const uint32_t key[8], uint64_t counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) { + uint32_t cv[8]; + memcpy(cv, key, BLAKE3_KEY_LEN); + uint8_t block_flags = flags | flags_start; + while (blocks > 0) { + if (blocks == 1) { + block_flags |= flags_end; + } + blake3_compress_in_place_sse41(cv, input, BLAKE3_BLOCK_LEN, counter, + block_flags); + input = &input[BLAKE3_BLOCK_LEN]; + blocks -= 1; + block_flags = flags; + } + memcpy(out, cv, BLAKE3_OUT_LEN); +} + +void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out) { + while (num_inputs >= DEGREE) { + blake3_hash4_sse41(inputs, blocks, key, counter, increment_counter, flags, + flags_start, flags_end, out); + if (increment_counter) { + counter += DEGREE; + } + inputs += DEGREE; + num_inputs -= DEGREE; + out = &out[DEGREE * BLAKE3_OUT_LEN]; + } + while (num_inputs > 0) { + hash_one_sse41(inputs[0], blocks, key, counter, flags, flags_start, + flags_end, out); + if (increment_counter) { + counter += 1; + } + inputs += 1; + num_inputs -= 1; + out = &out[BLAKE3_OUT_LEN]; + } +} diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S new file mode 100644 index 000000000000..4e918c5bb2cc --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S @@ -0,0 +1,2044 @@ +#if defined(__x86_64__) + +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +#endif + +#if defined(__ELF__) && defined(__CET__) && defined(__has_include) +#if __has_include() +#include +#endif +#endif + +#if !defined(_CET_ENDBR) +#define _CET_ENDBR +#endif + +#ifdef __APPLE__ +#define HIDDEN .private_extern +#else +#define HIDDEN .hidden +#endif + +.intel_syntax noprefix +HIDDEN blake3_hash_many_sse41 +HIDDEN _blake3_hash_many_sse41 +HIDDEN blake3_compress_in_place_sse41 +HIDDEN _blake3_compress_in_place_sse41 +HIDDEN blake3_compress_xof_sse41 +HIDDEN _blake3_compress_xof_sse41 +.global blake3_hash_many_sse41 +.global _blake3_hash_many_sse41 +.global blake3_compress_in_place_sse41 +.global _blake3_compress_in_place_sse41 +.global blake3_compress_xof_sse41 +.global _blake3_compress_xof_sse41 +#ifdef __APPLE__ +.text +#else +.section .text +#endif + .p2align 6 +_blake3_hash_many_sse41: +blake3_hash_many_sse41: + _CET_ENDBR + push r15 + push r14 + push r13 + push r12 + push rbx + push rbp + mov rbp, rsp + sub rsp, 360 + and rsp, 0xFFFFFFFFFFFFFFC0 + neg r9d + movd xmm0, r9d + pshufd xmm0, xmm0, 0x00 + movdqa xmmword ptr [rsp+0x130], xmm0 + movdqa xmm1, xmm0 + pand xmm1, xmmword ptr [ADD0+rip] + pand xmm0, xmmword ptr [ADD1+rip] + movdqa xmmword ptr [rsp+0x150], xmm0 + movd xmm0, r8d + pshufd xmm0, xmm0, 0x00 + paddd xmm0, xmm1 + movdqa xmmword ptr [rsp+0x110], xmm0 + pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] + pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] + pcmpgtd xmm1, xmm0 + shr r8, 32 + movd xmm2, r8d + pshufd xmm2, xmm2, 0x00 + psubd xmm2, xmm1 + movdqa xmmword ptr [rsp+0x120], xmm2 + mov rbx, qword ptr [rbp+0x50] + mov r15, rdx + shl r15, 6 + movzx r13d, byte ptr [rbp+0x38] + movzx r12d, byte ptr [rbp+0x48] + cmp rsi, 4 + jc 3f +2: + movdqu xmm3, xmmword ptr [rcx] + pshufd xmm0, xmm3, 0x00 + pshufd xmm1, xmm3, 0x55 + pshufd xmm2, xmm3, 0xAA + pshufd xmm3, xmm3, 0xFF + movdqu xmm7, xmmword ptr [rcx+0x10] + pshufd xmm4, xmm7, 0x00 + pshufd xmm5, xmm7, 0x55 + pshufd xmm6, xmm7, 0xAA + pshufd xmm7, xmm7, 0xFF + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +9: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movdqu xmm8, xmmword ptr [r8+rdx-0x40] + movdqu xmm9, xmmword ptr [r9+rdx-0x40] + movdqu xmm10, xmmword ptr [r10+rdx-0x40] + movdqu xmm11, xmmword ptr [r11+rdx-0x40] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp], xmm8 + movdqa xmmword ptr [rsp+0x10], xmm9 + movdqa xmmword ptr [rsp+0x20], xmm12 + movdqa xmmword ptr [rsp+0x30], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x30] + movdqu xmm9, xmmword ptr [r9+rdx-0x30] + movdqu xmm10, xmmword ptr [r10+rdx-0x30] + movdqu xmm11, xmmword ptr [r11+rdx-0x30] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0x40], xmm8 + movdqa xmmword ptr [rsp+0x50], xmm9 + movdqa xmmword ptr [rsp+0x60], xmm12 + movdqa xmmword ptr [rsp+0x70], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x20] + movdqu xmm9, xmmword ptr [r9+rdx-0x20] + movdqu xmm10, xmmword ptr [r10+rdx-0x20] + movdqu xmm11, xmmword ptr [r11+rdx-0x20] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0x80], xmm8 + movdqa xmmword ptr [rsp+0x90], xmm9 + movdqa xmmword ptr [rsp+0xA0], xmm12 + movdqa xmmword ptr [rsp+0xB0], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x10] + movdqu xmm9, xmmword ptr [r9+rdx-0x10] + movdqu xmm10, xmmword ptr [r10+rdx-0x10] + movdqu xmm11, xmmword ptr [r11+rdx-0x10] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0xC0], xmm8 + movdqa xmmword ptr [rsp+0xD0], xmm9 + movdqa xmmword ptr [rsp+0xE0], xmm12 + movdqa xmmword ptr [rsp+0xF0], xmm13 + movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] + movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] + movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] + movdqa xmm12, xmmword ptr [rsp+0x110] + movdqa xmm13, xmmword ptr [rsp+0x120] + movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] + movd xmm15, eax + pshufd xmm15, xmm15, 0x00 + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x40] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x10] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x50] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x80] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0xC0] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x90] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0xD0] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x20] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x70] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x60] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x10] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x90] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xB0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0xE0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x30] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0xD0] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x40] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x20] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x60] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0xB0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x50] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0xF0] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xA0] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0xE0] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x70] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0x30] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x40] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0x50] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x80] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xC0] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0xF0] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xD0] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0xA0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x70] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x20] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x10] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x90] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0x80] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xE0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0xC0] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xD0] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0x20] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x30] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0x60] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xB0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0x10] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xF0] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0x90] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xE0] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x30] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xA0] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x40] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + pxor xmm0, xmm8 + pxor xmm1, xmm9 + pxor xmm2, xmm10 + pxor xmm3, xmm11 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + pxor xmm4, xmm12 + pxor xmm5, xmm13 + pxor xmm6, xmm14 + pxor xmm7, xmm15 + mov eax, r13d + jne 9b + movdqa xmm9, xmm0 + punpckldq xmm0, xmm1 + punpckhdq xmm9, xmm1 + movdqa xmm11, xmm2 + punpckldq xmm2, xmm3 + punpckhdq xmm11, xmm3 + movdqa xmm1, xmm0 + punpcklqdq xmm0, xmm2 + punpckhqdq xmm1, xmm2 + movdqa xmm3, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm3, xmm11 + movdqu xmmword ptr [rbx], xmm0 + movdqu xmmword ptr [rbx+0x20], xmm1 + movdqu xmmword ptr [rbx+0x40], xmm9 + movdqu xmmword ptr [rbx+0x60], xmm3 + movdqa xmm9, xmm4 + punpckldq xmm4, xmm5 + punpckhdq xmm9, xmm5 + movdqa xmm11, xmm6 + punpckldq xmm6, xmm7 + punpckhdq xmm11, xmm7 + movdqa xmm5, xmm4 + punpcklqdq xmm4, xmm6 + punpckhqdq xmm5, xmm6 + movdqa xmm7, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm7, xmm11 + movdqu xmmword ptr [rbx+0x10], xmm4 + movdqu xmmword ptr [rbx+0x30], xmm5 + movdqu xmmword ptr [rbx+0x50], xmm9 + movdqu xmmword ptr [rbx+0x70], xmm7 + movdqa xmm1, xmmword ptr [rsp+0x110] + movdqa xmm0, xmm1 + paddd xmm1, xmmword ptr [rsp+0x150] + movdqa xmmword ptr [rsp+0x110], xmm1 + pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] + pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] + pcmpgtd xmm0, xmm1 + movdqa xmm1, xmmword ptr [rsp+0x120] + psubd xmm1, xmm0 + movdqa xmmword ptr [rsp+0x120], xmm1 + add rbx, 128 + add rdi, 32 + sub rsi, 4 + cmp rsi, 4 + jnc 2b + test rsi, rsi + jnz 3f +4: + mov rsp, rbp + pop rbp + pop rbx + pop r12 + pop r13 + pop r14 + pop r15 + ret +.p2align 5 +3: + test esi, 0x2 + je 3f + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movaps xmm8, xmm0 + movaps xmm9, xmm1 + movd xmm13, dword ptr [rsp+0x110] + pinsrd xmm13, dword ptr [rsp+0x120], 1 + pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + movaps xmmword ptr [rsp], xmm13 + movd xmm14, dword ptr [rsp+0x114] + pinsrd xmm14, dword ptr [rsp+0x124], 1 + pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + movaps xmmword ptr [rsp+0x10], xmm14 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movaps xmm10, xmm2 + movups xmm4, xmmword ptr [r8+rdx-0x40] + movups xmm5, xmmword ptr [r8+rdx-0x30] + movaps xmm3, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm3, xmm5, 221 + movaps xmm5, xmm3 + movups xmm6, xmmword ptr [r8+rdx-0x20] + movups xmm7, xmmword ptr [r8+rdx-0x10] + movaps xmm3, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm3, xmm7, 221 + pshufd xmm7, xmm3, 0x93 + movups xmm12, xmmword ptr [r9+rdx-0x40] + movups xmm13, xmmword ptr [r9+rdx-0x30] + movaps xmm11, xmm12 + shufps xmm12, xmm13, 136 + shufps xmm11, xmm13, 221 + movaps xmm13, xmm11 + movups xmm14, xmmword ptr [r9+rdx-0x20] + movups xmm15, xmmword ptr [r9+rdx-0x10] + movaps xmm11, xmm14 + shufps xmm14, xmm15, 136 + pshufd xmm14, xmm14, 0x93 + shufps xmm11, xmm15, 221 + pshufd xmm15, xmm11, 0x93 + movaps xmm3, xmmword ptr [rsp] + movaps xmm11, xmmword ptr [rsp+0x10] + pinsrd xmm3, eax, 3 + pinsrd xmm11, eax, 3 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm8, xmm12 + movaps xmmword ptr [rsp+0x20], xmm4 + movaps xmmword ptr [rsp+0x30], xmm12 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movaps xmm12, xmmword ptr [ROT16+rip] + pshufb xmm3, xmm12 + pshufb xmm11, xmm12 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm5 + paddd xmm8, xmm13 + movaps xmmword ptr [rsp+0x40], xmm5 + movaps xmmword ptr [rsp+0x50], xmm13 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movaps xmm13, xmmword ptr [ROT8+rip] + pshufb xmm3, xmm13 + pshufb xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 0x93 + pshufd xmm8, xmm8, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm11, xmm11, 0x4E + pshufd xmm2, xmm2, 0x39 + pshufd xmm10, xmm10, 0x39 + paddd xmm0, xmm6 + paddd xmm8, xmm14 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshufb xmm3, xmm12 + pshufb xmm11, xmm12 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm7 + paddd xmm8, xmm15 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshufb xmm3, xmm13 + pshufb xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 0x39 + pshufd xmm8, xmm8, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm11, xmm11, 0x4E + pshufd xmm2, xmm2, 0x93 + pshufd xmm10, xmm10, 0x93 + dec al + je 9f + movdqa xmm12, xmmword ptr [rsp+0x20] + movdqa xmm5, xmmword ptr [rsp+0x40] + pshufd xmm13, xmm12, 0x0F + shufps xmm12, xmm5, 214 + pshufd xmm4, xmm12, 0x39 + movdqa xmm12, xmm6 + shufps xmm12, xmm7, 250 + pblendw xmm13, xmm12, 0xCC + movdqa xmm12, xmm7 + punpcklqdq xmm12, xmm5 + pblendw xmm12, xmm6, 0xC0 + pshufd xmm12, xmm12, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmmword ptr [rsp+0x20], xmm13 + movdqa xmmword ptr [rsp+0x40], xmm12 + movdqa xmm5, xmmword ptr [rsp+0x30] + movdqa xmm13, xmmword ptr [rsp+0x50] + pshufd xmm6, xmm5, 0x0F + shufps xmm5, xmm13, 214 + pshufd xmm12, xmm5, 0x39 + movdqa xmm5, xmm14 + shufps xmm5, xmm15, 250 + pblendw xmm6, xmm5, 0xCC + movdqa xmm5, xmm15 + punpcklqdq xmm5, xmm13 + pblendw xmm5, xmm14, 0xC0 + pshufd xmm5, xmm5, 0x78 + punpckhdq xmm13, xmm15 + punpckldq xmm14, xmm13 + pshufd xmm15, xmm14, 0x1E + movdqa xmm13, xmm6 + movdqa xmm14, xmm5 + movdqa xmm5, xmmword ptr [rsp+0x20] + movdqa xmm6, xmmword ptr [rsp+0x40] + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm8, xmm10 + pxor xmm9, xmm11 + mov eax, r13d + cmp rdx, r15 + jne 2b + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+0x10], xmm1 + movups xmmword ptr [rbx+0x20], xmm8 + movups xmmword ptr [rbx+0x30], xmm9 + movdqa xmm0, xmmword ptr [rsp+0x130] + movdqa xmm1, xmmword ptr [rsp+0x110] + movdqa xmm2, xmmword ptr [rsp+0x120] + movdqu xmm3, xmmword ptr [rsp+0x118] + movdqu xmm4, xmmword ptr [rsp+0x128] + blendvps xmm1, xmm3, xmm0 + blendvps xmm2, xmm4, xmm0 + movdqa xmmword ptr [rsp+0x110], xmm1 + movdqa xmmword ptr [rsp+0x120], xmm2 + add rdi, 16 + add rbx, 64 + sub rsi, 2 +3: + test esi, 0x1 + je 4b + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movd xmm13, dword ptr [rsp+0x110] + pinsrd xmm13, dword ptr [rsp+0x120], 1 + pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + movaps xmm14, xmmword ptr [ROT8+rip] + movaps xmm15, xmmword ptr [ROT16+rip] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movaps xmm3, xmm13 + pinsrd xmm3, eax, 3 + movups xmm4, xmmword ptr [r8+rdx-0x40] + movups xmm5, xmmword ptr [r8+rdx-0x30] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [r8+rdx-0x20] + movups xmm7, xmmword ptr [r8+rdx-0x10] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0xCC + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0xC0 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+0x10], xmm1 + jmp 4b + +.p2align 6 +blake3_compress_in_place_sse41: +_blake3_compress_in_place_sse41: + _CET_ENDBR + movups xmm0, xmmword ptr [rdi] + movups xmm1, xmmword ptr [rdi+0x10] + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + shl r8, 32 + add rdx, r8 + movq xmm3, rcx + movq xmm4, rdx + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rsi] + movups xmm5, xmmword ptr [rsi+0x10] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rsi+0x20] + movups xmm7, xmmword ptr [rsi+0x30] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + movaps xmm14, xmmword ptr [ROT8+rip] + movaps xmm15, xmmword ptr [ROT16+rip] + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0xCC + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0xC0 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + movups xmmword ptr [rdi], xmm0 + movups xmmword ptr [rdi+0x10], xmm1 + ret + +.p2align 6 +blake3_compress_xof_sse41: +_blake3_compress_xof_sse41: + _CET_ENDBR + movups xmm0, xmmword ptr [rdi] + movups xmm1, xmmword ptr [rdi+0x10] + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movzx eax, r8b + movzx edx, dl + shl rax, 32 + add rdx, rax + movq xmm3, rcx + movq xmm4, rdx + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rsi] + movups xmm5, xmmword ptr [rsi+0x10] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rsi+0x20] + movups xmm7, xmmword ptr [rsi+0x30] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + movaps xmm14, xmmword ptr [ROT8+rip] + movaps xmm15, xmmword ptr [ROT16+rip] + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0xCC + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0xC0 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + movdqu xmm4, xmmword ptr [rdi] + movdqu xmm5, xmmword ptr [rdi+0x10] + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm2, xmm4 + pxor xmm3, xmm5 + movups xmmword ptr [r9], xmm0 + movups xmmword ptr [r9+0x10], xmm1 + movups xmmword ptr [r9+0x20], xmm2 + movups xmmword ptr [r9+0x30], xmm3 + ret + + +#ifdef __APPLE__ +.static_data +#else +.section .rodata +#endif +.p2align 6 +BLAKE3_IV: + .long 0x6A09E667, 0xBB67AE85 + .long 0x3C6EF372, 0xA54FF53A +ROT16: + .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 +ROT8: + .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 +ADD0: + .long 0, 1, 2, 3 +ADD1: + .long 4, 4, 4, 4 +BLAKE3_IV_0: + .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 +BLAKE3_IV_1: + .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 +BLAKE3_IV_2: + .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 +BLAKE3_IV_3: + .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A +BLAKE3_BLOCK_LEN: + .long 64, 64, 64, 64 +CMP_MSB_MASK: + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 + +#endif diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S new file mode 100644 index 000000000000..60d0a4042e71 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S @@ -0,0 +1,2069 @@ +.intel_syntax noprefix +.global blake3_hash_many_sse41 +.global _blake3_hash_many_sse41 +.global blake3_compress_in_place_sse41 +.global _blake3_compress_in_place_sse41 +.global blake3_compress_xof_sse41 +.global _blake3_compress_xof_sse41 +.section .text + .p2align 6 +_blake3_hash_many_sse41: +blake3_hash_many_sse41: + push r15 + push r14 + push r13 + push r12 + push rsi + push rdi + push rbx + push rbp + mov rbp, rsp + sub rsp, 528 + and rsp, 0xFFFFFFFFFFFFFFC0 + movdqa xmmword ptr [rsp+0x170], xmm6 + movdqa xmmword ptr [rsp+0x180], xmm7 + movdqa xmmword ptr [rsp+0x190], xmm8 + movdqa xmmword ptr [rsp+0x1A0], xmm9 + movdqa xmmword ptr [rsp+0x1B0], xmm10 + movdqa xmmword ptr [rsp+0x1C0], xmm11 + movdqa xmmword ptr [rsp+0x1D0], xmm12 + movdqa xmmword ptr [rsp+0x1E0], xmm13 + movdqa xmmword ptr [rsp+0x1F0], xmm14 + movdqa xmmword ptr [rsp+0x200], xmm15 + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, qword ptr [rbp+0x68] + movzx r9, byte ptr [rbp+0x70] + neg r9d + movd xmm0, r9d + pshufd xmm0, xmm0, 0x00 + movdqa xmmword ptr [rsp+0x130], xmm0 + movdqa xmm1, xmm0 + pand xmm1, xmmword ptr [ADD0+rip] + pand xmm0, xmmword ptr [ADD1+rip] + movdqa xmmword ptr [rsp+0x150], xmm0 + movd xmm0, r8d + pshufd xmm0, xmm0, 0x00 + paddd xmm0, xmm1 + movdqa xmmword ptr [rsp+0x110], xmm0 + pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] + pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] + pcmpgtd xmm1, xmm0 + shr r8, 32 + movd xmm2, r8d + pshufd xmm2, xmm2, 0x00 + psubd xmm2, xmm1 + movdqa xmmword ptr [rsp+0x120], xmm2 + mov rbx, qword ptr [rbp+0x90] + mov r15, rdx + shl r15, 6 + movzx r13d, byte ptr [rbp+0x78] + movzx r12d, byte ptr [rbp+0x88] + cmp rsi, 4 + jc 3f +2: + movdqu xmm3, xmmword ptr [rcx] + pshufd xmm0, xmm3, 0x00 + pshufd xmm1, xmm3, 0x55 + pshufd xmm2, xmm3, 0xAA + pshufd xmm3, xmm3, 0xFF + movdqu xmm7, xmmword ptr [rcx+0x10] + pshufd xmm4, xmm7, 0x00 + pshufd xmm5, xmm7, 0x55 + pshufd xmm6, xmm7, 0xAA + pshufd xmm7, xmm7, 0xFF + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + movzx eax, byte ptr [rbp+0x80] + or eax, r13d + xor edx, edx +9: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movdqu xmm8, xmmword ptr [r8+rdx-0x40] + movdqu xmm9, xmmword ptr [r9+rdx-0x40] + movdqu xmm10, xmmword ptr [r10+rdx-0x40] + movdqu xmm11, xmmword ptr [r11+rdx-0x40] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp], xmm8 + movdqa xmmword ptr [rsp+0x10], xmm9 + movdqa xmmword ptr [rsp+0x20], xmm12 + movdqa xmmword ptr [rsp+0x30], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x30] + movdqu xmm9, xmmword ptr [r9+rdx-0x30] + movdqu xmm10, xmmword ptr [r10+rdx-0x30] + movdqu xmm11, xmmword ptr [r11+rdx-0x30] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0x40], xmm8 + movdqa xmmword ptr [rsp+0x50], xmm9 + movdqa xmmword ptr [rsp+0x60], xmm12 + movdqa xmmword ptr [rsp+0x70], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x20] + movdqu xmm9, xmmword ptr [r9+rdx-0x20] + movdqu xmm10, xmmword ptr [r10+rdx-0x20] + movdqu xmm11, xmmword ptr [r11+rdx-0x20] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0x80], xmm8 + movdqa xmmword ptr [rsp+0x90], xmm9 + movdqa xmmword ptr [rsp+0xA0], xmm12 + movdqa xmmword ptr [rsp+0xB0], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x10] + movdqu xmm9, xmmword ptr [r9+rdx-0x10] + movdqu xmm10, xmmword ptr [r10+rdx-0x10] + movdqu xmm11, xmmword ptr [r11+rdx-0x10] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0xC0], xmm8 + movdqa xmmword ptr [rsp+0xD0], xmm9 + movdqa xmmword ptr [rsp+0xE0], xmm12 + movdqa xmmword ptr [rsp+0xF0], xmm13 + movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] + movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] + movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] + movdqa xmm12, xmmword ptr [rsp+0x110] + movdqa xmm13, xmmword ptr [rsp+0x120] + movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] + movd xmm15, eax + pshufd xmm15, xmm15, 0x00 + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x40] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x10] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x50] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x80] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0xC0] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x90] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0xD0] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x20] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x70] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x60] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x10] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x90] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xB0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0xE0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x30] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0xD0] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x40] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x20] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x60] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0xB0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x50] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0xF0] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xA0] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0xE0] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x70] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0x30] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x40] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0x50] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x80] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xC0] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0xF0] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xD0] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0xA0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x70] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x20] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x10] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x90] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0x80] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xE0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0xC0] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xD0] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0x20] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x30] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0x60] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xB0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0x10] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xF0] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0x90] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xE0] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x30] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xA0] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x40] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + pxor xmm0, xmm8 + pxor xmm1, xmm9 + pxor xmm2, xmm10 + pxor xmm3, xmm11 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + pxor xmm4, xmm12 + pxor xmm5, xmm13 + pxor xmm6, xmm14 + pxor xmm7, xmm15 + mov eax, r13d + jne 9b + movdqa xmm9, xmm0 + punpckldq xmm0, xmm1 + punpckhdq xmm9, xmm1 + movdqa xmm11, xmm2 + punpckldq xmm2, xmm3 + punpckhdq xmm11, xmm3 + movdqa xmm1, xmm0 + punpcklqdq xmm0, xmm2 + punpckhqdq xmm1, xmm2 + movdqa xmm3, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm3, xmm11 + movdqu xmmword ptr [rbx], xmm0 + movdqu xmmword ptr [rbx+0x20], xmm1 + movdqu xmmword ptr [rbx+0x40], xmm9 + movdqu xmmword ptr [rbx+0x60], xmm3 + movdqa xmm9, xmm4 + punpckldq xmm4, xmm5 + punpckhdq xmm9, xmm5 + movdqa xmm11, xmm6 + punpckldq xmm6, xmm7 + punpckhdq xmm11, xmm7 + movdqa xmm5, xmm4 + punpcklqdq xmm4, xmm6 + punpckhqdq xmm5, xmm6 + movdqa xmm7, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm7, xmm11 + movdqu xmmword ptr [rbx+0x10], xmm4 + movdqu xmmword ptr [rbx+0x30], xmm5 + movdqu xmmword ptr [rbx+0x50], xmm9 + movdqu xmmword ptr [rbx+0x70], xmm7 + movdqa xmm1, xmmword ptr [rsp+0x110] + movdqa xmm0, xmm1 + paddd xmm1, xmmword ptr [rsp+0x150] + movdqa xmmword ptr [rsp+0x110], xmm1 + pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] + pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] + pcmpgtd xmm0, xmm1 + movdqa xmm1, xmmword ptr [rsp+0x120] + psubd xmm1, xmm0 + movdqa xmmword ptr [rsp+0x120], xmm1 + add rbx, 128 + add rdi, 32 + sub rsi, 4 + cmp rsi, 4 + jnc 2b + test rsi, rsi + jne 3f +4: + movdqa xmm6, xmmword ptr [rsp+0x170] + movdqa xmm7, xmmword ptr [rsp+0x180] + movdqa xmm8, xmmword ptr [rsp+0x190] + movdqa xmm9, xmmword ptr [rsp+0x1A0] + movdqa xmm10, xmmword ptr [rsp+0x1B0] + movdqa xmm11, xmmword ptr [rsp+0x1C0] + movdqa xmm12, xmmword ptr [rsp+0x1D0] + movdqa xmm13, xmmword ptr [rsp+0x1E0] + movdqa xmm14, xmmword ptr [rsp+0x1F0] + movdqa xmm15, xmmword ptr [rsp+0x200] + mov rsp, rbp + pop rbp + pop rbx + pop rdi + pop rsi + pop r12 + pop r13 + pop r14 + pop r15 + ret +.p2align 5 +3: + test esi, 0x2 + je 3f + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movaps xmm8, xmm0 + movaps xmm9, xmm1 + movd xmm13, dword ptr [rsp+0x110] + pinsrd xmm13, dword ptr [rsp+0x120], 1 + pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + movaps xmmword ptr [rsp], xmm13 + movd xmm14, dword ptr [rsp+0x114] + pinsrd xmm14, dword ptr [rsp+0x124], 1 + pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + movaps xmmword ptr [rsp+0x10], xmm14 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + movzx eax, byte ptr [rbp+0x80] + or eax, r13d + xor edx, edx +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movaps xmm10, xmm2 + movups xmm4, xmmword ptr [r8+rdx-0x40] + movups xmm5, xmmword ptr [r8+rdx-0x30] + movaps xmm3, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm3, xmm5, 221 + movaps xmm5, xmm3 + movups xmm6, xmmword ptr [r8+rdx-0x20] + movups xmm7, xmmword ptr [r8+rdx-0x10] + movaps xmm3, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm3, xmm7, 221 + pshufd xmm7, xmm3, 0x93 + movups xmm12, xmmword ptr [r9+rdx-0x40] + movups xmm13, xmmword ptr [r9+rdx-0x30] + movaps xmm11, xmm12 + shufps xmm12, xmm13, 136 + shufps xmm11, xmm13, 221 + movaps xmm13, xmm11 + movups xmm14, xmmword ptr [r9+rdx-0x20] + movups xmm15, xmmword ptr [r9+rdx-0x10] + movaps xmm11, xmm14 + shufps xmm14, xmm15, 136 + pshufd xmm14, xmm14, 0x93 + shufps xmm11, xmm15, 221 + pshufd xmm15, xmm11, 0x93 + movaps xmm3, xmmword ptr [rsp] + movaps xmm11, xmmword ptr [rsp+0x10] + pinsrd xmm3, eax, 3 + pinsrd xmm11, eax, 3 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm8, xmm12 + movaps xmmword ptr [rsp+0x20], xmm4 + movaps xmmword ptr [rsp+0x30], xmm12 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movaps xmm12, xmmword ptr [ROT16+rip] + pshufb xmm3, xmm12 + pshufb xmm11, xmm12 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm5 + paddd xmm8, xmm13 + movaps xmmword ptr [rsp+0x40], xmm5 + movaps xmmword ptr [rsp+0x50], xmm13 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movaps xmm13, xmmword ptr [ROT8+rip] + pshufb xmm3, xmm13 + pshufb xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 0x93 + pshufd xmm8, xmm8, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm11, xmm11, 0x4E + pshufd xmm2, xmm2, 0x39 + pshufd xmm10, xmm10, 0x39 + paddd xmm0, xmm6 + paddd xmm8, xmm14 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshufb xmm3, xmm12 + pshufb xmm11, xmm12 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm7 + paddd xmm8, xmm15 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshufb xmm3, xmm13 + pshufb xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 0x39 + pshufd xmm8, xmm8, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm11, xmm11, 0x4E + pshufd xmm2, xmm2, 0x93 + pshufd xmm10, xmm10, 0x93 + dec al + je 9f + movdqa xmm12, xmmword ptr [rsp+0x20] + movdqa xmm5, xmmword ptr [rsp+0x40] + pshufd xmm13, xmm12, 0x0F + shufps xmm12, xmm5, 214 + pshufd xmm4, xmm12, 0x39 + movdqa xmm12, xmm6 + shufps xmm12, xmm7, 250 + pblendw xmm13, xmm12, 0xCC + movdqa xmm12, xmm7 + punpcklqdq xmm12, xmm5 + pblendw xmm12, xmm6, 0xC0 + pshufd xmm12, xmm12, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmmword ptr [rsp+0x20], xmm13 + movdqa xmmword ptr [rsp+0x40], xmm12 + movdqa xmm5, xmmword ptr [rsp+0x30] + movdqa xmm13, xmmword ptr [rsp+0x50] + pshufd xmm6, xmm5, 0x0F + shufps xmm5, xmm13, 214 + pshufd xmm12, xmm5, 0x39 + movdqa xmm5, xmm14 + shufps xmm5, xmm15, 250 + pblendw xmm6, xmm5, 0xCC + movdqa xmm5, xmm15 + punpcklqdq xmm5, xmm13 + pblendw xmm5, xmm14, 0xC0 + pshufd xmm5, xmm5, 0x78 + punpckhdq xmm13, xmm15 + punpckldq xmm14, xmm13 + pshufd xmm15, xmm14, 0x1E + movdqa xmm13, xmm6 + movdqa xmm14, xmm5 + movdqa xmm5, xmmword ptr [rsp+0x20] + movdqa xmm6, xmmword ptr [rsp+0x40] + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm8, xmm10 + pxor xmm9, xmm11 + mov eax, r13d + cmp rdx, r15 + jne 2b + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+0x10], xmm1 + movups xmmword ptr [rbx+0x20], xmm8 + movups xmmword ptr [rbx+0x30], xmm9 + movdqa xmm0, xmmword ptr [rsp+0x130] + movdqa xmm1, xmmword ptr [rsp+0x110] + movdqa xmm2, xmmword ptr [rsp+0x120] + movdqu xmm3, xmmword ptr [rsp+0x118] + movdqu xmm4, xmmword ptr [rsp+0x128] + blendvps xmm1, xmm3, xmm0 + blendvps xmm2, xmm4, xmm0 + movdqa xmmword ptr [rsp+0x110], xmm1 + movdqa xmmword ptr [rsp+0x120], xmm2 + add rdi, 16 + add rbx, 64 + sub rsi, 2 +3: + test esi, 0x1 + je 4b + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movd xmm13, dword ptr [rsp+0x110] + pinsrd xmm13, dword ptr [rsp+0x120], 1 + pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + movaps xmm14, xmmword ptr [ROT8+rip] + movaps xmm15, xmmword ptr [ROT16+rip] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+0x80] + or eax, r13d + xor edx, edx +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movaps xmm3, xmm13 + pinsrd xmm3, eax, 3 + movups xmm4, xmmword ptr [r8+rdx-0x40] + movups xmm5, xmmword ptr [r8+rdx-0x30] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [r8+rdx-0x20] + movups xmm7, xmmword ptr [r8+rdx-0x10] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0xCC + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0xC0 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+0x10], xmm1 + jmp 4b + +.p2align 6 +blake3_compress_in_place_sse41: +_blake3_compress_in_place_sse41: + sub rsp, 120 + movdqa xmmword ptr [rsp], xmm6 + movdqa xmmword ptr [rsp+0x10], xmm7 + movdqa xmmword ptr [rsp+0x20], xmm8 + movdqa xmmword ptr [rsp+0x30], xmm9 + movdqa xmmword ptr [rsp+0x40], xmm11 + movdqa xmmword ptr [rsp+0x50], xmm14 + movdqa xmmword ptr [rsp+0x60], xmm15 + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movzx eax, byte ptr [rsp+0xA0] + movzx r8d, r8b + shl rax, 32 + add r8, rax + movq xmm3, r9 + movq xmm4, r8 + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rdx] + movups xmm5, xmmword ptr [rdx+0x10] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rdx+0x20] + movups xmm7, xmmword ptr [rdx+0x30] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + movaps xmm14, xmmword ptr [ROT8+rip] + movaps xmm15, xmmword ptr [ROT16+rip] + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0xCC + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0xC0 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + movups xmmword ptr [rcx], xmm0 + movups xmmword ptr [rcx+0x10], xmm1 + movdqa xmm6, xmmword ptr [rsp] + movdqa xmm7, xmmword ptr [rsp+0x10] + movdqa xmm8, xmmword ptr [rsp+0x20] + movdqa xmm9, xmmword ptr [rsp+0x30] + movdqa xmm11, xmmword ptr [rsp+0x40] + movdqa xmm14, xmmword ptr [rsp+0x50] + movdqa xmm15, xmmword ptr [rsp+0x60] + add rsp, 120 + ret + + +.p2align 6 +_blake3_compress_xof_sse41: +blake3_compress_xof_sse41: + sub rsp, 120 + movdqa xmmword ptr [rsp], xmm6 + movdqa xmmword ptr [rsp+0x10], xmm7 + movdqa xmmword ptr [rsp+0x20], xmm8 + movdqa xmmword ptr [rsp+0x30], xmm9 + movdqa xmmword ptr [rsp+0x40], xmm11 + movdqa xmmword ptr [rsp+0x50], xmm14 + movdqa xmmword ptr [rsp+0x60], xmm15 + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movzx eax, byte ptr [rsp+0xA0] + movzx r8d, r8b + mov r10, qword ptr [rsp+0xA8] + shl rax, 32 + add r8, rax + movq xmm3, r9 + movq xmm4, r8 + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rdx] + movups xmm5, xmmword ptr [rdx+0x10] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rdx+0x20] + movups xmm7, xmmword ptr [rdx+0x30] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + movaps xmm14, xmmword ptr [ROT8+rip] + movaps xmm15, xmmword ptr [ROT16+rip] + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0xCC + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0xC0 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + movdqu xmm4, xmmword ptr [rcx] + movdqu xmm5, xmmword ptr [rcx+0x10] + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm2, xmm4 + pxor xmm3, xmm5 + movups xmmword ptr [r10], xmm0 + movups xmmword ptr [r10+0x10], xmm1 + movups xmmword ptr [r10+0x20], xmm2 + movups xmmword ptr [r10+0x30], xmm3 + movdqa xmm6, xmmword ptr [rsp] + movdqa xmm7, xmmword ptr [rsp+0x10] + movdqa xmm8, xmmword ptr [rsp+0x20] + movdqa xmm9, xmmword ptr [rsp+0x30] + movdqa xmm11, xmmword ptr [rsp+0x40] + movdqa xmm14, xmmword ptr [rsp+0x50] + movdqa xmm15, xmmword ptr [rsp+0x60] + add rsp, 120 + ret + + +.section .rodata +.p2align 6 +BLAKE3_IV: + .long 0x6A09E667, 0xBB67AE85 + .long 0x3C6EF372, 0xA54FF53A +ROT16: + .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 +ROT8: + .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 +ADD0: + .long 0, 1, 2, 3 +ADD1: + .long 4, 4, 4, 4 +BLAKE3_IV_0: + .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 +BLAKE3_IV_1: + .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 +BLAKE3_IV_2: + .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 +BLAKE3_IV_3: + .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A +BLAKE3_BLOCK_LEN: + .long 64, 64, 64, 64 +CMP_MSB_MASK: + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm new file mode 100644 index 000000000000..8966c7b84406 --- /dev/null +++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm @@ -0,0 +1,2089 @@ +public _blake3_hash_many_sse41 +public blake3_hash_many_sse41 +public blake3_compress_in_place_sse41 +public _blake3_compress_in_place_sse41 +public blake3_compress_xof_sse41 +public _blake3_compress_xof_sse41 + +_TEXT SEGMENT ALIGN(16) 'CODE' + +ALIGN 16 +blake3_hash_many_sse41 PROC +_blake3_hash_many_sse41 PROC + push r15 + push r14 + push r13 + push r12 + push rsi + push rdi + push rbx + push rbp + mov rbp, rsp + sub rsp, 528 + and rsp, 0FFFFFFFFFFFFFFC0H + movdqa xmmword ptr [rsp+170H], xmm6 + movdqa xmmword ptr [rsp+180H], xmm7 + movdqa xmmword ptr [rsp+190H], xmm8 + movdqa xmmword ptr [rsp+1A0H], xmm9 + movdqa xmmword ptr [rsp+1B0H], xmm10 + movdqa xmmword ptr [rsp+1C0H], xmm11 + movdqa xmmword ptr [rsp+1D0H], xmm12 + movdqa xmmword ptr [rsp+1E0H], xmm13 + movdqa xmmword ptr [rsp+1F0H], xmm14 + movdqa xmmword ptr [rsp+200H], xmm15 + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, qword ptr [rbp+68H] + movzx r9, byte ptr [rbp+70H] + neg r9d + movd xmm0, r9d + pshufd xmm0, xmm0, 00H + movdqa xmmword ptr [rsp+130H], xmm0 + movdqa xmm1, xmm0 + pand xmm1, xmmword ptr [ADD0] + pand xmm0, xmmword ptr [ADD1] + movdqa xmmword ptr [rsp+150H], xmm0 + movd xmm0, r8d + pshufd xmm0, xmm0, 00H + paddd xmm0, xmm1 + movdqa xmmword ptr [rsp+110H], xmm0 + pxor xmm0, xmmword ptr [CMP_MSB_MASK] + pxor xmm1, xmmword ptr [CMP_MSB_MASK] + pcmpgtd xmm1, xmm0 + shr r8, 32 + movd xmm2, r8d + pshufd xmm2, xmm2, 00H + psubd xmm2, xmm1 + movdqa xmmword ptr [rsp+120H], xmm2 + mov rbx, qword ptr [rbp+90H] + mov r15, rdx + shl r15, 6 + movzx r13d, byte ptr [rbp+78H] + movzx r12d, byte ptr [rbp+88H] + cmp rsi, 4 + jc final3blocks +outerloop4: + movdqu xmm3, xmmword ptr [rcx] + pshufd xmm0, xmm3, 00H + pshufd xmm1, xmm3, 55H + pshufd xmm2, xmm3, 0AAH + pshufd xmm3, xmm3, 0FFH + movdqu xmm7, xmmword ptr [rcx+10H] + pshufd xmm4, xmm7, 00H + pshufd xmm5, xmm7, 55H + pshufd xmm6, xmm7, 0AAH + pshufd xmm7, xmm7, 0FFH + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + mov r10, qword ptr [rdi+10H] + mov r11, qword ptr [rdi+18H] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +innerloop4: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movdqu xmm8, xmmword ptr [r8+rdx-40H] + movdqu xmm9, xmmword ptr [r9+rdx-40H] + movdqu xmm10, xmmword ptr [r10+rdx-40H] + movdqu xmm11, xmmword ptr [r11+rdx-40H] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp], xmm8 + movdqa xmmword ptr [rsp+10H], xmm9 + movdqa xmmword ptr [rsp+20H], xmm12 + movdqa xmmword ptr [rsp+30H], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-30H] + movdqu xmm9, xmmword ptr [r9+rdx-30H] + movdqu xmm10, xmmword ptr [r10+rdx-30H] + movdqu xmm11, xmmword ptr [r11+rdx-30H] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+40H], xmm8 + movdqa xmmword ptr [rsp+50H], xmm9 + movdqa xmmword ptr [rsp+60H], xmm12 + movdqa xmmword ptr [rsp+70H], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-20H] + movdqu xmm9, xmmword ptr [r9+rdx-20H] + movdqu xmm10, xmmword ptr [r10+rdx-20H] + movdqu xmm11, xmmword ptr [r11+rdx-20H] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+80H], xmm8 + movdqa xmmword ptr [rsp+90H], xmm9 + movdqa xmmword ptr [rsp+0A0H], xmm12 + movdqa xmmword ptr [rsp+0B0H], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-10H] + movdqu xmm9, xmmword ptr [r9+rdx-10H] + movdqu xmm10, xmmword ptr [r10+rdx-10H] + movdqu xmm11, xmmword ptr [r11+rdx-10H] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0C0H], xmm8 + movdqa xmmword ptr [rsp+0D0H], xmm9 + movdqa xmmword ptr [rsp+0E0H], xmm12 + movdqa xmmword ptr [rsp+0F0H], xmm13 + movdqa xmm9, xmmword ptr [BLAKE3_IV_1] + movdqa xmm10, xmmword ptr [BLAKE3_IV_2] + movdqa xmm11, xmmword ptr [BLAKE3_IV_3] + movdqa xmm12, xmmword ptr [rsp+110H] + movdqa xmm13, xmmword ptr [rsp+120H] + movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN] + movd xmm15, eax + pshufd xmm15, xmm15, 00H + prefetcht0 byte ptr [r8+rdx+80H] + prefetcht0 byte ptr [r9+rdx+80H] + prefetcht0 byte ptr [r10+rdx+80H] + prefetcht0 byte ptr [r11+rdx+80H] + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+20H] + paddd xmm2, xmmword ptr [rsp+40H] + paddd xmm3, xmmword ptr [rsp+60H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [BLAKE3_IV_0] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+10H] + paddd xmm1, xmmword ptr [rsp+30H] + paddd xmm2, xmmword ptr [rsp+50H] + paddd xmm3, xmmword ptr [rsp+70H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+80H] + paddd xmm1, xmmword ptr [rsp+0A0H] + paddd xmm2, xmmword ptr [rsp+0C0H] + paddd xmm3, xmmword ptr [rsp+0E0H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+90H] + paddd xmm1, xmmword ptr [rsp+0B0H] + paddd xmm2, xmmword ptr [rsp+0D0H] + paddd xmm3, xmmword ptr [rsp+0F0H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+20H] + paddd xmm1, xmmword ptr [rsp+30H] + paddd xmm2, xmmword ptr [rsp+70H] + paddd xmm3, xmmword ptr [rsp+40H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+60H] + paddd xmm1, xmmword ptr [rsp+0A0H] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0D0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+10H] + paddd xmm1, xmmword ptr [rsp+0C0H] + paddd xmm2, xmmword ptr [rsp+90H] + paddd xmm3, xmmword ptr [rsp+0F0H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0B0H] + paddd xmm1, xmmword ptr [rsp+50H] + paddd xmm2, xmmword ptr [rsp+0E0H] + paddd xmm3, xmmword ptr [rsp+80H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+30H] + paddd xmm1, xmmword ptr [rsp+0A0H] + paddd xmm2, xmmword ptr [rsp+0D0H] + paddd xmm3, xmmword ptr [rsp+70H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+40H] + paddd xmm1, xmmword ptr [rsp+0C0H] + paddd xmm2, xmmword ptr [rsp+20H] + paddd xmm3, xmmword ptr [rsp+0E0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+60H] + paddd xmm1, xmmword ptr [rsp+90H] + paddd xmm2, xmmword ptr [rsp+0B0H] + paddd xmm3, xmmword ptr [rsp+80H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+50H] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0F0H] + paddd xmm3, xmmword ptr [rsp+10H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0A0H] + paddd xmm1, xmmword ptr [rsp+0C0H] + paddd xmm2, xmmword ptr [rsp+0E0H] + paddd xmm3, xmmword ptr [rsp+0D0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+70H] + paddd xmm1, xmmword ptr [rsp+90H] + paddd xmm2, xmmword ptr [rsp+30H] + paddd xmm3, xmmword ptr [rsp+0F0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+40H] + paddd xmm1, xmmword ptr [rsp+0B0H] + paddd xmm2, xmmword ptr [rsp+50H] + paddd xmm3, xmmword ptr [rsp+10H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+20H] + paddd xmm2, xmmword ptr [rsp+80H] + paddd xmm3, xmmword ptr [rsp+60H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0C0H] + paddd xmm1, xmmword ptr [rsp+90H] + paddd xmm2, xmmword ptr [rsp+0F0H] + paddd xmm3, xmmword ptr [rsp+0E0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0D0H] + paddd xmm1, xmmword ptr [rsp+0B0H] + paddd xmm2, xmmword ptr [rsp+0A0H] + paddd xmm3, xmmword ptr [rsp+80H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+70H] + paddd xmm1, xmmword ptr [rsp+50H] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+60H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+20H] + paddd xmm1, xmmword ptr [rsp+30H] + paddd xmm2, xmmword ptr [rsp+10H] + paddd xmm3, xmmword ptr [rsp+40H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+90H] + paddd xmm1, xmmword ptr [rsp+0B0H] + paddd xmm2, xmmword ptr [rsp+80H] + paddd xmm3, xmmword ptr [rsp+0F0H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0E0H] + paddd xmm1, xmmword ptr [rsp+50H] + paddd xmm2, xmmword ptr [rsp+0C0H] + paddd xmm3, xmmword ptr [rsp+10H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0D0H] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+20H] + paddd xmm3, xmmword ptr [rsp+40H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+30H] + paddd xmm1, xmmword ptr [rsp+0A0H] + paddd xmm2, xmmword ptr [rsp+60H] + paddd xmm3, xmmword ptr [rsp+70H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0B0H] + paddd xmm1, xmmword ptr [rsp+50H] + paddd xmm2, xmmword ptr [rsp+10H] + paddd xmm3, xmmword ptr [rsp+80H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0F0H] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+90H] + paddd xmm3, xmmword ptr [rsp+60H] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0E0H] + paddd xmm1, xmmword ptr [rsp+20H] + paddd xmm2, xmmword ptr [rsp+30H] + paddd xmm3, xmmword ptr [rsp+70H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+100H], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0A0H] + paddd xmm1, xmmword ptr [rsp+0C0H] + paddd xmm2, xmmword ptr [rsp+40H] + paddd xmm3, xmmword ptr [rsp+0D0H] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+100H] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + pxor xmm0, xmm8 + pxor xmm1, xmm9 + pxor xmm2, xmm10 + pxor xmm3, xmm11 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + pxor xmm4, xmm12 + pxor xmm5, xmm13 + pxor xmm6, xmm14 + pxor xmm7, xmm15 + mov eax, r13d + jne innerloop4 + movdqa xmm9, xmm0 + punpckldq xmm0, xmm1 + punpckhdq xmm9, xmm1 + movdqa xmm11, xmm2 + punpckldq xmm2, xmm3 + punpckhdq xmm11, xmm3 + movdqa xmm1, xmm0 + punpcklqdq xmm0, xmm2 + punpckhqdq xmm1, xmm2 + movdqa xmm3, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm3, xmm11 + movdqu xmmword ptr [rbx], xmm0 + movdqu xmmword ptr [rbx+20H], xmm1 + movdqu xmmword ptr [rbx+40H], xmm9 + movdqu xmmword ptr [rbx+60H], xmm3 + movdqa xmm9, xmm4 + punpckldq xmm4, xmm5 + punpckhdq xmm9, xmm5 + movdqa xmm11, xmm6 + punpckldq xmm6, xmm7 + punpckhdq xmm11, xmm7 + movdqa xmm5, xmm4 + punpcklqdq xmm4, xmm6 + punpckhqdq xmm5, xmm6 + movdqa xmm7, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm7, xmm11 + movdqu xmmword ptr [rbx+10H], xmm4 + movdqu xmmword ptr [rbx+30H], xmm5 + movdqu xmmword ptr [rbx+50H], xmm9 + movdqu xmmword ptr [rbx+70H], xmm7 + movdqa xmm1, xmmword ptr [rsp+110H] + movdqa xmm0, xmm1 + paddd xmm1, xmmword ptr [rsp+150H] + movdqa xmmword ptr [rsp+110H], xmm1 + pxor xmm0, xmmword ptr [CMP_MSB_MASK] + pxor xmm1, xmmword ptr [CMP_MSB_MASK] + pcmpgtd xmm0, xmm1 + movdqa xmm1, xmmword ptr [rsp+120H] + psubd xmm1, xmm0 + movdqa xmmword ptr [rsp+120H], xmm1 + add rbx, 128 + add rdi, 32 + sub rsi, 4 + cmp rsi, 4 + jnc outerloop4 + test rsi, rsi + jne final3blocks +unwind: + movdqa xmm6, xmmword ptr [rsp+170H] + movdqa xmm7, xmmword ptr [rsp+180H] + movdqa xmm8, xmmword ptr [rsp+190H] + movdqa xmm9, xmmword ptr [rsp+1A0H] + movdqa xmm10, xmmword ptr [rsp+1B0H] + movdqa xmm11, xmmword ptr [rsp+1C0H] + movdqa xmm12, xmmword ptr [rsp+1D0H] + movdqa xmm13, xmmword ptr [rsp+1E0H] + movdqa xmm14, xmmword ptr [rsp+1F0H] + movdqa xmm15, xmmword ptr [rsp+200H] + mov rsp, rbp + pop rbp + pop rbx + pop rdi + pop rsi + pop r12 + pop r13 + pop r14 + pop r15 + ret +ALIGN 16 +final3blocks: + test esi, 2H + je final1block + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+10H] + movaps xmm8, xmm0 + movaps xmm9, xmm1 + movd xmm13, dword ptr [rsp+110H] + pinsrd xmm13, dword ptr [rsp+120H], 1 + pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2 + movaps xmmword ptr [rsp], xmm13 + movd xmm14, dword ptr [rsp+114H] + pinsrd xmm14, dword ptr [rsp+124H], 1 + pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2 + movaps xmmword ptr [rsp+10H], xmm14 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+8H] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +innerloop2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV] + movaps xmm10, xmm2 + movups xmm4, xmmword ptr [r8+rdx-40H] + movups xmm5, xmmword ptr [r8+rdx-30H] + movaps xmm3, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm3, xmm5, 221 + movaps xmm5, xmm3 + movups xmm6, xmmword ptr [r8+rdx-20H] + movups xmm7, xmmword ptr [r8+rdx-10H] + movaps xmm3, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 93H + shufps xmm3, xmm7, 221 + pshufd xmm7, xmm3, 93H + movups xmm12, xmmword ptr [r9+rdx-40H] + movups xmm13, xmmword ptr [r9+rdx-30H] + movaps xmm11, xmm12 + shufps xmm12, xmm13, 136 + shufps xmm11, xmm13, 221 + movaps xmm13, xmm11 + movups xmm14, xmmword ptr [r9+rdx-20H] + movups xmm15, xmmword ptr [r9+rdx-10H] + movaps xmm11, xmm14 + shufps xmm14, xmm15, 136 + pshufd xmm14, xmm14, 93H + shufps xmm11, xmm15, 221 + pshufd xmm15, xmm11, 93H + movaps xmm3, xmmword ptr [rsp] + movaps xmm11, xmmword ptr [rsp+10H] + pinsrd xmm3, eax, 3 + pinsrd xmm11, eax, 3 + mov al, 7 +roundloop2: + paddd xmm0, xmm4 + paddd xmm8, xmm12 + movaps xmmword ptr [rsp+20H], xmm4 + movaps xmmword ptr [rsp+30H], xmm12 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movaps xmm12, xmmword ptr [ROT16] + pshufb xmm3, xmm12 + pshufb xmm11, xmm12 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm5 + paddd xmm8, xmm13 + movaps xmmword ptr [rsp+40H], xmm5 + movaps xmmword ptr [rsp+50H], xmm13 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movaps xmm13, xmmword ptr [ROT8] + pshufb xmm3, xmm13 + pshufb xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 93H + pshufd xmm8, xmm8, 93H + pshufd xmm3, xmm3, 4EH + pshufd xmm11, xmm11, 4EH + pshufd xmm2, xmm2, 39H + pshufd xmm10, xmm10, 39H + paddd xmm0, xmm6 + paddd xmm8, xmm14 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshufb xmm3, xmm12 + pshufb xmm11, xmm12 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm7 + paddd xmm8, xmm15 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshufb xmm3, xmm13 + pshufb xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 39H + pshufd xmm8, xmm8, 39H + pshufd xmm3, xmm3, 4EH + pshufd xmm11, xmm11, 4EH + pshufd xmm2, xmm2, 93H + pshufd xmm10, xmm10, 93H + dec al + je endroundloop2 + movdqa xmm12, xmmword ptr [rsp+20H] + movdqa xmm5, xmmword ptr [rsp+40H] + pshufd xmm13, xmm12, 0FH + shufps xmm12, xmm5, 214 + pshufd xmm4, xmm12, 39H + movdqa xmm12, xmm6 + shufps xmm12, xmm7, 250 + pblendw xmm13, xmm12, 0CCH + movdqa xmm12, xmm7 + punpcklqdq xmm12, xmm5 + pblendw xmm12, xmm6, 0C0H + pshufd xmm12, xmm12, 78H + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 1EH + movdqa xmmword ptr [rsp+20H], xmm13 + movdqa xmmword ptr [rsp+40H], xmm12 + movdqa xmm5, xmmword ptr [rsp+30H] + movdqa xmm13, xmmword ptr [rsp+50H] + pshufd xmm6, xmm5, 0FH + shufps xmm5, xmm13, 214 + pshufd xmm12, xmm5, 39H + movdqa xmm5, xmm14 + shufps xmm5, xmm15, 250 + pblendw xmm6, xmm5, 0CCH + movdqa xmm5, xmm15 + punpcklqdq xmm5, xmm13 + pblendw xmm5, xmm14, 0C0H + pshufd xmm5, xmm5, 78H + punpckhdq xmm13, xmm15 + punpckldq xmm14, xmm13 + pshufd xmm15, xmm14, 1EH + movdqa xmm13, xmm6 + movdqa xmm14, xmm5 + movdqa xmm5, xmmword ptr [rsp+20H] + movdqa xmm6, xmmword ptr [rsp+40H] + jmp roundloop2 +endroundloop2: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm8, xmm10 + pxor xmm9, xmm11 + mov eax, r13d + cmp rdx, r15 + jne innerloop2 + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+10H], xmm1 + movups xmmword ptr [rbx+20H], xmm8 + movups xmmword ptr [rbx+30H], xmm9 + movdqa xmm0, xmmword ptr [rsp+130H] + movdqa xmm1, xmmword ptr [rsp+110H] + movdqa xmm2, xmmword ptr [rsp+120H] + movdqu xmm3, xmmword ptr [rsp+118H] + movdqu xmm4, xmmword ptr [rsp+128H] + blendvps xmm1, xmm3, xmm0 + blendvps xmm2, xmm4, xmm0 + movdqa xmmword ptr [rsp+110H], xmm1 + movdqa xmmword ptr [rsp+120H], xmm2 + add rdi, 16 + add rbx, 64 + sub rsi, 2 +final1block: + test esi, 1H + je unwind + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+10H] + movd xmm13, dword ptr [rsp+110H] + pinsrd xmm13, dword ptr [rsp+120H], 1 + pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2 + movaps xmm14, xmmword ptr [ROT8] + movaps xmm15, xmmword ptr [ROT16] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+80H] + or eax, r13d + xor edx, edx +innerloop1: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV] + movaps xmm3, xmm13 + pinsrd xmm3, eax, 3 + movups xmm4, xmmword ptr [r8+rdx-40H] + movups xmm5, xmmword ptr [r8+rdx-30H] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [r8+rdx-20H] + movups xmm7, xmmword ptr [r8+rdx-10H] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 93H + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 93H + mov al, 7 +roundloop1: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 93H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 39H + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 39H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 93H + dec al + jz endroundloop1 + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0FH + pshufd xmm4, xmm8, 39H + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0CCH + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0C0H + pshufd xmm8, xmm8, 78H + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 1EH + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp roundloop1 +endroundloop1: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne innerloop1 + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+10H], xmm1 + jmp unwind +_blake3_hash_many_sse41 ENDP +blake3_hash_many_sse41 ENDP + +blake3_compress_in_place_sse41 PROC +_blake3_compress_in_place_sse41 PROC + sub rsp, 120 + movdqa xmmword ptr [rsp], xmm6 + movdqa xmmword ptr [rsp+10H], xmm7 + movdqa xmmword ptr [rsp+20H], xmm8 + movdqa xmmword ptr [rsp+30H], xmm9 + movdqa xmmword ptr [rsp+40H], xmm11 + movdqa xmmword ptr [rsp+50H], xmm14 + movdqa xmmword ptr [rsp+60H], xmm15 + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+10H] + movaps xmm2, xmmword ptr [BLAKE3_IV] + movzx eax, byte ptr [rsp+0A0H] + movzx r8d, r8b + shl rax, 32 + add r8, rax + movd xmm3, r9 + movd xmm4, r8 + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rdx] + movups xmm5, xmmword ptr [rdx+10H] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rdx+20H] + movups xmm7, xmmword ptr [rdx+30H] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 93H + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 93H + movaps xmm14, xmmword ptr [ROT8] + movaps xmm15, xmmword ptr [ROT16] + mov al, 7 +@@: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 93H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 39H + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 39H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 93H + dec al + jz @F + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0FH + pshufd xmm4, xmm8, 39H + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0CCH + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0C0H + pshufd xmm8, xmm8, 78H + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 1EH + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp @B +@@: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + movups xmmword ptr [rcx], xmm0 + movups xmmword ptr [rcx+10H], xmm1 + movdqa xmm6, xmmword ptr [rsp] + movdqa xmm7, xmmword ptr [rsp+10H] + movdqa xmm8, xmmword ptr [rsp+20H] + movdqa xmm9, xmmword ptr [rsp+30H] + movdqa xmm11, xmmword ptr [rsp+40H] + movdqa xmm14, xmmword ptr [rsp+50H] + movdqa xmm15, xmmword ptr [rsp+60H] + add rsp, 120 + ret +_blake3_compress_in_place_sse41 ENDP +blake3_compress_in_place_sse41 ENDP + +ALIGN 16 +blake3_compress_xof_sse41 PROC +_blake3_compress_xof_sse41 PROC + sub rsp, 120 + movdqa xmmword ptr [rsp], xmm6 + movdqa xmmword ptr [rsp+10H], xmm7 + movdqa xmmword ptr [rsp+20H], xmm8 + movdqa xmmword ptr [rsp+30H], xmm9 + movdqa xmmword ptr [rsp+40H], xmm11 + movdqa xmmword ptr [rsp+50H], xmm14 + movdqa xmmword ptr [rsp+60H], xmm15 + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+10H] + movaps xmm2, xmmword ptr [BLAKE3_IV] + movzx eax, byte ptr [rsp+0A0H] + movzx r8d, r8b + mov r10, qword ptr [rsp+0A8H] + shl rax, 32 + add r8, rax + movd xmm3, r9 + movd xmm4, r8 + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rdx] + movups xmm5, xmmword ptr [rdx+10H] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rdx+20H] + movups xmm7, xmmword ptr [rdx+30H] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 93H + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 93H + movaps xmm14, xmmword ptr [ROT8] + movaps xmm15, xmmword ptr [ROT16] + mov al, 7 +@@: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 93H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 39H + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 39H + pshufd xmm3, xmm3, 4EH + pshufd xmm2, xmm2, 93H + dec al + jz @F + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0FH + pshufd xmm4, xmm8, 39H + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0CCH + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0C0H + pshufd xmm8, xmm8, 78H + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 1EH + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp @B +@@: + movdqu xmm4, xmmword ptr [rcx] + movdqu xmm5, xmmword ptr [rcx+10H] + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm2, xmm4 + pxor xmm3, xmm5 + movups xmmword ptr [r10], xmm0 + movups xmmword ptr [r10+10H], xmm1 + movups xmmword ptr [r10+20H], xmm2 + movups xmmword ptr [r10+30H], xmm3 + movdqa xmm6, xmmword ptr [rsp] + movdqa xmm7, xmmword ptr [rsp+10H] + movdqa xmm8, xmmword ptr [rsp+20H] + movdqa xmm9, xmmword ptr [rsp+30H] + movdqa xmm11, xmmword ptr [rsp+40H] + movdqa xmm14, xmmword ptr [rsp+50H] + movdqa xmm15, xmmword ptr [rsp+60H] + add rsp, 120 + ret +_blake3_compress_xof_sse41 ENDP +blake3_compress_xof_sse41 ENDP + +_TEXT ENDS + + +_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST' +ALIGN 64 +BLAKE3_IV: + dd 6A09E667H, 0BB67AE85H, 3C6EF372H, 0A54FF53AH + +ADD0: + dd 0, 1, 2, 3 + +ADD1: + dd 4 dup (4) + +BLAKE3_IV_0: + dd 4 dup (6A09E667H) + +BLAKE3_IV_1: + dd 4 dup (0BB67AE85H) + +BLAKE3_IV_2: + dd 4 dup (3C6EF372H) + +BLAKE3_IV_3: + dd 4 dup (0A54FF53AH) + +BLAKE3_BLOCK_LEN: + dd 4 dup (64) + +ROT16: + db 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 + +ROT8: + db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 + +CMP_MSB_MASK: + dd 8 dup(80000000H) + +_RDATA ENDS +END + diff --git a/llvm/lib/Support/BinaryStreamWriter.cpp b/llvm/lib/Support/BinaryStreamWriter.cpp index 8c9efa0ed9a9..dc4ea200c7be 100644 --- a/llvm/lib/Support/BinaryStreamWriter.cpp +++ b/llvm/lib/Support/BinaryStreamWriter.cpp @@ -8,7 +8,6 @@ #include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/BinaryStreamError.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/LEB128.h" @@ -94,10 +93,11 @@ BinaryStreamWriter::split(uint64_t Off) const { Error BinaryStreamWriter::padToAlignment(uint32_t Align) { uint64_t NewOffset = alignTo(Offset, Align); - if (NewOffset > getLength()) - return make_error(stream_error_code::stream_too_short); + const uint64_t ZerosSize = 64; + static constexpr char Zeros[ZerosSize] = {}; while (Offset < NewOffset) - if (auto EC = writeInteger('\0')) - return EC; + if (auto E = writeArray( + ArrayRef(Zeros, std::min(ZerosSize, NewOffset - Offset)))) + return E; return Error::success(); } diff --git a/llvm/lib/Support/CSKYAttributeParser.cpp b/llvm/lib/Support/CSKYAttributeParser.cpp new file mode 100644 index 000000000000..ea1ac9232315 --- /dev/null +++ b/llvm/lib/Support/CSKYAttributeParser.cpp @@ -0,0 +1,155 @@ +//===-- CSKYAttributeParser.cpp - CSKY Attribute Parser -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CSKYAttributeParser.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Errc.h" + +using namespace llvm; + +const CSKYAttributeParser::DisplayHandler + CSKYAttributeParser::displayRoutines[] = { + { + CSKYAttrs::CSKY_ARCH_NAME, + &ELFAttributeParser::stringAttribute, + }, + { + CSKYAttrs::CSKY_CPU_NAME, + &ELFAttributeParser::stringAttribute, + }, + { + CSKYAttrs::CSKY_ISA_FLAGS, + &ELFAttributeParser::integerAttribute, + }, + { + CSKYAttrs::CSKY_ISA_EXT_FLAGS, + &ELFAttributeParser::integerAttribute, + }, + { + CSKYAttrs::CSKY_DSP_VERSION, + &CSKYAttributeParser::dspVersion, + }, + { + CSKYAttrs::CSKY_VDSP_VERSION, + &CSKYAttributeParser::vdspVersion, + }, + { + CSKYAttrs::CSKY_FPU_VERSION, + &CSKYAttributeParser::fpuVersion, + }, + { + CSKYAttrs::CSKY_FPU_ABI, + &CSKYAttributeParser::fpuABI, + }, + { + CSKYAttrs::CSKY_FPU_ROUNDING, + &CSKYAttributeParser::fpuRounding, + }, + { + CSKYAttrs::CSKY_FPU_DENORMAL, + &CSKYAttributeParser::fpuDenormal, + }, + { + CSKYAttrs::CSKY_FPU_EXCEPTION, + &CSKYAttributeParser::fpuException, + }, + { + CSKYAttrs::CSKY_FPU_NUMBER_MODULE, + &ELFAttributeParser::stringAttribute, + }, + { + CSKYAttrs::CSKY_FPU_HARDFP, + &CSKYAttributeParser::fpuHardFP, + }}; + +Error CSKYAttributeParser::handler(uint64_t tag, bool &handled) { + handled = false; + for (unsigned AHI = 0, AHE = array_lengthof(displayRoutines); AHI != AHE; + ++AHI) { + if (uint64_t(displayRoutines[AHI].attribute) == tag) { + if (Error e = (this->*displayRoutines[AHI].routine)(tag)) + return e; + handled = true; + break; + } + } + + return Error::success(); +} + +Error CSKYAttributeParser::dspVersion(unsigned tag) { + static const char *strings[] = {"Error", "DSP Extension", "DSP 2.0"}; + return parseStringAttribute("Tag_CSKY_DSP_VERSION", tag, + makeArrayRef(strings)); +} + +Error CSKYAttributeParser::vdspVersion(unsigned tag) { + static const char *strings[] = {"Error", "VDSP Version 1", "VDSP Version 2"}; + return parseStringAttribute("Tag_CSKY_VDSP_VERSION", tag, + makeArrayRef(strings)); +} + +Error CSKYAttributeParser::fpuVersion(unsigned tag) { + static const char *strings[] = {"Error", "FPU Version 1", "FPU Version 2", + "FPU Version 3"}; + return parseStringAttribute("Tag_CSKY_FPU_VERSION", tag, + makeArrayRef(strings)); +} + +Error CSKYAttributeParser::fpuABI(unsigned tag) { + static const char *strings[] = {"Error", "Soft", "SoftFP", "Hard"}; + return parseStringAttribute("Tag_CSKY_FPU_ABI", tag, makeArrayRef(strings)); +} + +Error CSKYAttributeParser::fpuRounding(unsigned tag) { + static const char *strings[] = {"None", "Needed"}; + return parseStringAttribute("Tag_CSKY_FPU_ROUNDING", tag, + makeArrayRef(strings)); +} + +Error CSKYAttributeParser::fpuDenormal(unsigned tag) { + static const char *strings[] = {"None", "Needed"}; + return parseStringAttribute("Tag_CSKY_FPU_DENORMAL", tag, + makeArrayRef(strings)); +} + +Error CSKYAttributeParser::fpuException(unsigned tag) { + static const char *strings[] = {"None", "Needed"}; + return parseStringAttribute("Tag_CSKY_FPU_EXCEPTION", tag, + makeArrayRef(strings)); +} + +Error CSKYAttributeParser::fpuHardFP(unsigned tag) { + uint64_t value = de.getULEB128(cursor); + ListSeparator LS(" "); + + std::string description; + + if (value & 0x1) { + description += LS; + description += "Half"; + } + if ((value >> 1) & 0x1) { + description += LS; + description += "Single"; + } + if ((value >> 2) & 0x1) { + description += LS; + description += "Double"; + } + + if (description.empty()) { + printAttribute(tag, value, ""); + return createStringError(errc::invalid_argument, + "unknown Tag_CSKY_FPU_HARDFP value: " + + Twine(value)); + } + + printAttribute(tag, value, description); + return Error::success(); +} diff --git a/llvm/lib/Support/CSKYAttributes.cpp b/llvm/lib/Support/CSKYAttributes.cpp new file mode 100644 index 000000000000..6130517e44e3 --- /dev/null +++ b/llvm/lib/Support/CSKYAttributes.cpp @@ -0,0 +1,33 @@ +//===-- CSKYAttributes.cpp - CSKY Attributes ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CSKYAttributes.h" + +using namespace llvm; +using namespace llvm::CSKYAttrs; + +static const TagNameItem tagData[] = { + {CSKY_ARCH_NAME, "Tag_CSKY_ARCH_NAME"}, + {CSKY_CPU_NAME, "Tag_CSKY_CPU_NAME"}, + {CSKY_CPU_NAME, "Tag_CSKY_CPU_NAME"}, + {CSKY_ISA_FLAGS, "Tag_CSKY_ISA_FLAGS"}, + {CSKY_ISA_EXT_FLAGS, "Tag_CSKY_ISA_EXT_FLAGS"}, + {CSKY_DSP_VERSION, "Tag_CSKY_DSP_VERSION"}, + {CSKY_VDSP_VERSION, "Tag_CSKY_VDSP_VERSION"}, + {CSKY_FPU_VERSION, "Tag_CSKY_FPU_VERSION"}, + {CSKY_FPU_ABI, "Tag_CSKY_FPU_ABI"}, + {CSKY_FPU_ROUNDING, "Tag_CSKY_FPU_ROUNDING"}, + {CSKY_FPU_DENORMAL, "Tag_CSKY_FPU_DENORMAL"}, + {CSKY_FPU_EXCEPTION, "Tag_CSKY_FPU_EXCEPTION"}, + {CSKY_FPU_NUMBER_MODULE, "Tag_CSKY_FPU_NUMBER_MODULE"}, + {CSKY_FPU_HARDFP, "Tag_CSKY_FPU_HARDFP"}}; + +constexpr TagNameMap CSKYAttributeTags{tagData}; +const TagNameMap &llvm::CSKYAttrs::getCSKYAttributeTags() { + return CSKYAttributeTags; +} diff --git a/llvm/lib/Support/CSKYTargetParser.cpp b/llvm/lib/Support/CSKYTargetParser.cpp new file mode 100644 index 000000000000..7e9d2ca0428d --- /dev/null +++ b/llvm/lib/Support/CSKYTargetParser.cpp @@ -0,0 +1,181 @@ +//===-- TargetParser - Parser for target features ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise CSKY hardware features +// such as CPU/ARCH names. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CSKYTargetParser.h" +#include "llvm/ADT/StringSwitch.h" + +using namespace llvm; + +bool CSKY::getFPUFeatures(CSKYFPUKind CSKYFPUKind, + std::vector &Features) { + + if (CSKYFPUKind >= FK_LAST || CSKYFPUKind == FK_INVALID) + return false; + + switch (CSKYFPUKind) { + case FK_AUTO: + Features.push_back("+fpuv2_sf"); + Features.push_back("+fpuv2_df"); + Features.push_back("+fdivdu"); + break; + case FK_FPV2: + Features.push_back("+fpuv2_sf"); + Features.push_back("+fpuv2_df"); + break; + case FK_FPV2_DIVD: + Features.push_back("+fpuv2_sf"); + Features.push_back("+fpuv2_df"); + Features.push_back("+fdivdu"); + break; + case FK_FPV2_SF: + Features.push_back("+fpuv2_sf"); + break; + case FK_FPV3: + Features.push_back("+fpuv3_hf"); + Features.push_back("+fpuv3_hi"); + Features.push_back("+fpuv3_sf"); + Features.push_back("+fpuv3_df"); + break; + case FK_FPV3_HF: + Features.push_back("+fpuv3_hf"); + Features.push_back("+fpuv3_hi"); + break; + case FK_FPV3_HSF: + Features.push_back("+fpuv3_hf"); + Features.push_back("+fpuv3_hi"); + Features.push_back("+fpuv3_sf"); + break; + case FK_FPV3_SDF: + Features.push_back("+fpuv3_sf"); + Features.push_back("+fpuv3_df"); + break; + default: + llvm_unreachable("Unknown FPU Kind"); + return false; + } + + return true; +} + +// ======================================================= // +// Information by ID +// ======================================================= // + +StringRef CSKY::getArchName(ArchKind AK) { + return ARCHNames[static_cast(AK)].getName(); +} + +// The default cpu's name is same as arch name. +StringRef CSKY::getDefaultCPU(StringRef Arch) { + ArchKind AK = parseArch(Arch); + if (AK == CSKY::ArchKind::INVALID) + return StringRef(); + + return Arch; +} + +// ======================================================= // +// Parsers +// ======================================================= // +CSKY::ArchKind CSKY::parseArch(StringRef Arch) { + for (const auto A : ARCHNames) { + if (A.getName() == Arch) + return A.ID; + } + + return CSKY::ArchKind::INVALID; +} + +CSKY::ArchKind CSKY::parseCPUArch(StringRef CPU) { + for (const auto C : CPUNames) { + if (CPU == C.getName()) + return C.ArchID; + } + + return CSKY::ArchKind::INVALID; +} + +uint64_t CSKY::parseArchExt(StringRef ArchExt) { + for (const auto &A : CSKYARCHExtNames) { + if (ArchExt == A.getName()) + return A.ID; + } + return AEK_INVALID; +} + +void CSKY::fillValidCPUArchList(SmallVectorImpl &Values) { + for (const CpuNames &Arch : CPUNames) { + if (Arch.ArchID != CSKY::ArchKind::INVALID) + Values.push_back(Arch.getName()); + } +} + +StringRef CSKY::getFPUName(unsigned FPUKind) { + if (FPUKind >= FK_LAST) + return StringRef(); + return FPUNames[FPUKind].getName(); +} + +CSKY::FPUVersion CSKY::getFPUVersion(unsigned FPUKind) { + if (FPUKind >= FK_LAST) + return FPUVersion::NONE; + return FPUNames[FPUKind].FPUVer; +} + +uint64_t CSKY::getDefaultExtensions(StringRef CPU) { + return StringSwitch(CPU) +#define CSKY_CPU_NAME(NAME, ID, DEFAULT_EXT) \ + .Case(NAME, ARCHNames[static_cast(ArchKind::ID)].archBaseExt | \ + DEFAULT_EXT) +#include "llvm/Support/CSKYTargetParser.def" + .Default(CSKY::AEK_INVALID); +} + +StringRef CSKY::getArchExtName(uint64_t ArchExtKind) { + for (const auto &AE : CSKYARCHExtNames) + if (ArchExtKind == AE.ID) + return AE.getName(); + return StringRef(); +} + +static bool stripNegationPrefix(StringRef &Name) { + if (Name.startswith("no")) { + Name = Name.substr(2); + return true; + } + return false; +} + +StringRef CSKY::getArchExtFeature(StringRef ArchExt) { + bool Negated = stripNegationPrefix(ArchExt); + for (const auto &AE : CSKYARCHExtNames) { + if (AE.Feature && ArchExt == AE.getName()) + return StringRef(Negated ? AE.NegFeature : AE.Feature); + } + + return StringRef(); +} + +bool CSKY::getExtensionFeatures(uint64_t Extensions, + std::vector &Features) { + if (Extensions == CSKY::AEK_INVALID) + return false; + + for (const auto &AE : CSKYARCHExtNames) { + if ((Extensions & AE.ID) == AE.ID && AE.Feature) + Features.push_back(AE.Feature); + } + + return true; +} diff --git a/llvm/lib/Support/CodeGenCoverage.cpp b/llvm/lib/Support/CodeGenCoverage.cpp index 73e0fb3edce8..d5ab77b9c66f 100644 --- a/llvm/lib/Support/CodeGenCoverage.cpp +++ b/llvm/lib/Support/CodeGenCoverage.cpp @@ -23,7 +23,7 @@ using namespace llvm; static sys::SmartMutex OutputMutex; -CodeGenCoverage::CodeGenCoverage() {} +CodeGenCoverage::CodeGenCoverage() = default; void CodeGenCoverage::setCovered(uint64_t RuleID) { if (RuleCoverage.size() <= RuleID) diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 71a6ebf2a72e..eb6c04d987b3 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -166,7 +166,7 @@ public: // This collects the different subcommands that have been registered. SmallPtrSet RegisteredSubCommands; - CommandLineParser() : ActiveSubCommand(nullptr) { + CommandLineParser() { registerSubCommand(&*TopLevelSubCommand); registerSubCommand(&*AllSubCommands); } @@ -418,7 +418,7 @@ public: } private: - SubCommand *ActiveSubCommand; + SubCommand *ActiveSubCommand = nullptr; Option *LookupOption(SubCommand &Sub, StringRef &Arg, StringRef &Value); Option *LookupLongOption(SubCommand &Sub, StringRef &Arg, StringRef &Value, @@ -918,21 +918,34 @@ static size_t parseBackslash(StringRef Src, size_t I, SmallString<128> &Token) { return I - 1; } -// Windows treats whitespace, double quotes, and backslashes specially. +// Windows treats whitespace, double quotes, and backslashes specially, except +// when parsing the first token of a full command line, in which case +// backslashes are not special. static bool isWindowsSpecialChar(char C) { return isWhitespaceOrNull(C) || C == '\\' || C == '\"'; } +static bool isWindowsSpecialCharInCommandName(char C) { + return isWhitespaceOrNull(C) || C == '\"'; +} // Windows tokenization implementation. The implementation is designed to be // inlined and specialized for the two user entry points. -static inline void -tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver, - function_ref AddToken, - bool AlwaysCopy, function_ref MarkEOL) { +static inline void tokenizeWindowsCommandLineImpl( + StringRef Src, StringSaver &Saver, function_ref AddToken, + bool AlwaysCopy, function_ref MarkEOL, bool InitialCommandName) { SmallString<128> Token; + // Sometimes, this function will be handling a full command line including an + // executable pathname at the start. In that situation, the initial pathname + // needs different handling from the following arguments, because when + // CreateProcess or cmd.exe scans the pathname, it doesn't treat \ as + // escaping the quote character, whereas when libc scans the rest of the + // command line, it does. + bool CommandName = InitialCommandName; + // Try to do as much work inside the state machine as possible. enum { INIT, UNQUOTED, QUOTED } State = INIT; + for (size_t I = 0, E = Src.size(); I < E; ++I) { switch (State) { case INIT: { @@ -947,19 +960,29 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver, if (I >= E) break; size_t Start = I; - while (I < E && !isWindowsSpecialChar(Src[I])) - ++I; + if (CommandName) { + while (I < E && !isWindowsSpecialCharInCommandName(Src[I])) + ++I; + } else { + while (I < E && !isWindowsSpecialChar(Src[I])) + ++I; + } StringRef NormalChars = Src.slice(Start, I); if (I >= E || isWhitespaceOrNull(Src[I])) { // No special characters: slice out the substring and start the next // token. Copy the string if the caller asks us to. AddToken(AlwaysCopy ? Saver.save(NormalChars) : NormalChars); - if (I < E && Src[I] == '\n') + if (I < E && Src[I] == '\n') { MarkEOL(); + CommandName = InitialCommandName; + } else { + CommandName = false; + } } else if (Src[I] == '\"') { Token += NormalChars; State = QUOTED; } else if (Src[I] == '\\') { + assert(!CommandName && "or else we'd have treated it as a normal char"); Token += NormalChars; I = parseBackslash(Src, I, Token); State = UNQUOTED; @@ -976,12 +999,16 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver, // token. AddToken(Saver.save(Token.str())); Token.clear(); - if (Src[I] == '\n') + if (Src[I] == '\n') { + CommandName = InitialCommandName; MarkEOL(); + } else { + CommandName = false; + } State = INIT; } else if (Src[I] == '\"') { State = QUOTED; - } else if (Src[I] == '\\') { + } else if (Src[I] == '\\' && !CommandName) { I = parseBackslash(Src, I, Token); } else { Token.push_back(Src[I]); @@ -999,7 +1026,7 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver, // Otherwise, end the quoted portion and return to the unquoted state. State = UNQUOTED; } - } else if (Src[I] == '\\') { + } else if (Src[I] == '\\' && !CommandName) { I = parseBackslash(Src, I, Token); } else { Token.push_back(Src[I]); @@ -1008,7 +1035,7 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver, } } - if (State == UNQUOTED) + if (State != INIT) AddToken(Saver.save(Token.str())); } @@ -1021,7 +1048,7 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, NewArgv.push_back(nullptr); }; tokenizeWindowsCommandLineImpl(Src, Saver, AddToken, - /*AlwaysCopy=*/true, OnEOL); + /*AlwaysCopy=*/true, OnEOL, false); } void cl::TokenizeWindowsCommandLineNoCopy(StringRef Src, StringSaver &Saver, @@ -1029,7 +1056,19 @@ void cl::TokenizeWindowsCommandLineNoCopy(StringRef Src, StringSaver &Saver, auto AddToken = [&](StringRef Tok) { NewArgv.push_back(Tok); }; auto OnEOL = []() {}; tokenizeWindowsCommandLineImpl(Src, Saver, AddToken, /*AlwaysCopy=*/false, - OnEOL); + OnEOL, false); +} + +void cl::TokenizeWindowsCommandLineFull(StringRef Src, StringSaver &Saver, + SmallVectorImpl &NewArgv, + bool MarkEOLs) { + auto AddToken = [&](StringRef Tok) { NewArgv.push_back(Tok.data()); }; + auto OnEOL = [&]() { + if (MarkEOLs) + NewArgv.push_back(nullptr); + }; + tokenizeWindowsCommandLineImpl(Src, Saver, AddToken, + /*AlwaysCopy=*/true, OnEOL, true); } void cl::tokenizeConfigFile(StringRef Source, StringSaver &Saver, @@ -1737,21 +1776,6 @@ bool Option::addOccurrence(unsigned pos, StringRef ArgName, StringRef Value, if (!MultiArg) NumOccurrences++; // Increment the number of times we have been seen - switch (getNumOccurrencesFlag()) { - case Optional: - if (NumOccurrences > 1) - return error("may only occur zero or one times!", ArgName); - break; - case Required: - if (NumOccurrences > 1) - return error("must occur exactly one time!", ArgName); - LLVM_FALLTHROUGH; - case OneOrMore: - case ZeroOrMore: - case ConsumeAfter: - break; - } - return handleOccurrence(pos, ArgName, Value); } @@ -2236,7 +2260,7 @@ protected: public: explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {} - virtual ~HelpPrinter() {} + virtual ~HelpPrinter() = default; // Invoke the printer. void operator=(bool Value) { @@ -2444,11 +2468,7 @@ public: #else OS << "LLVM (http://llvm.org/):\n "; #endif - OS << PACKAGE_NAME << " version " << PACKAGE_VERSION; -#ifdef LLVM_VERSION_INFO - OS << " " << LLVM_VERSION_INFO; -#endif - OS << "\n "; + OS << PACKAGE_NAME << " version " << PACKAGE_VERSION << "\n "; #if LLVM_IS_DEBUG_BUILD OS << "DEBUG build"; #else diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp index ccf6ef4bb662..983a6348bbe4 100644 --- a/llvm/lib/Support/Compression.cpp +++ b/llvm/lib/Support/Compression.cpp @@ -46,18 +46,20 @@ static StringRef convertZlibCodeToString(int Code) { bool zlib::isAvailable() { return true; } -Error zlib::compress(StringRef InputBuffer, - SmallVectorImpl &CompressedBuffer, int Level) { +void zlib::compress(StringRef InputBuffer, + SmallVectorImpl &CompressedBuffer, int Level) { unsigned long CompressedSize = ::compressBound(InputBuffer.size()); CompressedBuffer.resize_for_overwrite(CompressedSize); int Res = ::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize, (const Bytef *)InputBuffer.data(), InputBuffer.size(), Level); + if (Res == Z_MEM_ERROR) + report_bad_alloc_error("Allocation failed"); + assert(Res == Z_OK); // Tell MemorySanitizer that zlib output buffer is fully initialized. // This avoids a false report when running LLVM with uninstrumented ZLib. __msan_unpoison(CompressedBuffer.data(), CompressedSize); CompressedBuffer.truncate(CompressedSize); - return Res ? createError(convertZlibCodeToString(Res)) : Error::success(); } Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer, @@ -87,8 +89,8 @@ uint32_t zlib::crc32(StringRef Buffer) { #else bool zlib::isAvailable() { return false; } -Error zlib::compress(StringRef InputBuffer, - SmallVectorImpl &CompressedBuffer, int Level) { +void zlib::compress(StringRef InputBuffer, + SmallVectorImpl &CompressedBuffer, int Level) { llvm_unreachable("zlib::compress is unavailable"); } Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer, diff --git a/llvm/lib/Support/ConvertUTFWrapper.cpp b/llvm/lib/Support/ConvertUTFWrapper.cpp index 392c4c4890e1..9bf3f8f8b897 100644 --- a/llvm/lib/Support/ConvertUTFWrapper.cpp +++ b/llvm/lib/Support/ConvertUTFWrapper.cpp @@ -34,31 +34,31 @@ bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, const UTF8 *sourceStart = (const UTF8*)Source.data(); // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. - UTF16 *targetStart = reinterpret_cast(ResultPtr); + UTF16 *targetStart = reinterpret_cast(ResultPtr); ConversionFlags flags = strictConversion; - result = ConvertUTF8toUTF16( - &sourceStart, sourceStart + Source.size(), - &targetStart, targetStart + Source.size(), flags); + result = + ConvertUTF8toUTF16(&sourceStart, sourceStart + Source.size(), + &targetStart, targetStart + Source.size(), flags); if (result == conversionOK) - ResultPtr = reinterpret_cast(targetStart); + ResultPtr = reinterpret_cast(targetStart); else ErrorPtr = sourceStart; } else if (WideCharWidth == 4) { - const UTF8 *sourceStart = (const UTF8*)Source.data(); + const UTF8 *sourceStart = (const UTF8 *)Source.data(); // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. - UTF32 *targetStart = reinterpret_cast(ResultPtr); + UTF32 *targetStart = reinterpret_cast(ResultPtr); ConversionFlags flags = strictConversion; - result = ConvertUTF8toUTF32( - &sourceStart, sourceStart + Source.size(), - &targetStart, targetStart + Source.size(), flags); + result = + ConvertUTF8toUTF32(&sourceStart, sourceStart + Source.size(), + &targetStart, targetStart + Source.size(), flags); if (result == conversionOK) - ResultPtr = reinterpret_cast(targetStart); + ResultPtr = reinterpret_cast(targetStart); else ErrorPtr = sourceStart; } - assert((result != targetExhausted) - && "ConvertUTF8toUTFXX exhausted target buffer"); + assert((result != targetExhausted) && + "ConvertUTF8toUTFXX exhausted target buffer"); return result == conversionOK; } @@ -67,20 +67,18 @@ bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) { const UTF32 *SourceEnd = SourceStart + 1; UTF8 *TargetStart = reinterpret_cast(ResultPtr); UTF8 *TargetEnd = TargetStart + 4; - ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd, - &TargetStart, TargetEnd, - strictConversion); + ConversionResult CR = ConvertUTF32toUTF8( + &SourceStart, SourceEnd, &TargetStart, TargetEnd, strictConversion); if (CR != conversionOK) return false; - ResultPtr = reinterpret_cast(TargetStart); + ResultPtr = reinterpret_cast(TargetStart); return true; } bool hasUTF16ByteOrderMark(ArrayRef S) { - return (S.size() >= 2 && - ((S[0] == '\xff' && S[1] == '\xfe') || - (S[0] == '\xfe' && S[1] == '\xff'))); + return (S.size() >= 2 && ((S[0] == '\xff' && S[1] == '\xfe') || + (S[0] == '\xfe' && S[1] == '\xff'))); } bool convertUTF16ToUTF8String(ArrayRef SrcBytes, std::string &Out) { @@ -134,11 +132,69 @@ bool convertUTF16ToUTF8String(ArrayRef SrcBytes, std::string &Out) { return true; } -bool convertUTF16ToUTF8String(ArrayRef Src, std::string &Out) -{ +bool convertUTF16ToUTF8String(ArrayRef Src, std::string &Out) { return convertUTF16ToUTF8String( llvm::ArrayRef(reinterpret_cast(Src.data()), - Src.size() * sizeof(UTF16)), Out); + Src.size() * sizeof(UTF16)), + Out); +} + +bool convertUTF32ToUTF8String(ArrayRef SrcBytes, std::string &Out) { + assert(Out.empty()); + + // Error out on an uneven byte count. + if (SrcBytes.size() % 4) + return false; + + // Avoid OOB by returning early on empty input. + if (SrcBytes.empty()) + return true; + + const UTF32 *Src = reinterpret_cast(SrcBytes.begin()); + const UTF32 *SrcEnd = reinterpret_cast(SrcBytes.end()); + + assert((uintptr_t)Src % sizeof(UTF32) == 0); + + // Byteswap if necessary. + std::vector ByteSwapped; + if (Src[0] == UNI_UTF32_BYTE_ORDER_MARK_SWAPPED) { + ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd); + for (UTF32 &I : ByteSwapped) + I = llvm::ByteSwap_32(I); + Src = &ByteSwapped[0]; + SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1; + } + + // Skip the BOM for conversion. + if (Src[0] == UNI_UTF32_BYTE_ORDER_MARK_NATIVE) + Src++; + + // Just allocate enough space up front. We'll shrink it later. Allocate + // enough that we can fit a null terminator without reallocating. + Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1); + UTF8 *Dst = reinterpret_cast(&Out[0]); + UTF8 *DstEnd = Dst + Out.size(); + + ConversionResult CR = + ConvertUTF32toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion); + assert(CR != targetExhausted); + + if (CR != conversionOK) { + Out.clear(); + return false; + } + + Out.resize(reinterpret_cast(Dst) - &Out[0]); + Out.push_back(0); + Out.pop_back(); + return true; +} + +bool convertUTF32ToUTF8String(ArrayRef Src, std::string &Out) { + return convertUTF32ToUTF8String( + llvm::ArrayRef(reinterpret_cast(Src.data()), + Src.size() * sizeof(UTF32)), + Out); } bool convertUTF8ToUTF16String(StringRef SrcUTF8, diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp index 2ee3074b840e..292ba63d14aa 100644 --- a/llvm/lib/Support/CrashRecoveryContext.cpp +++ b/llvm/lib/Support/CrashRecoveryContext.cpp @@ -9,6 +9,7 @@ #include "llvm/Support/CrashRecoveryContext.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ExitCodes.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Signals.h" #include "llvm/Support/ThreadLocal.h" @@ -16,10 +17,6 @@ #include #include -#if !defined(_MSC_VER) && !defined(_WIN32) -#include "llvm/Support/ExitCodes.h" -#endif - using namespace llvm; namespace { @@ -97,7 +94,7 @@ static ManagedStatic> static void installExceptionOrSignalHandlers(); static void uninstallExceptionOrSignalHandlers(); -CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {} +CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() = default; CrashRecoveryContext::CrashRecoveryContext() { // On Windows, if abort() was previously triggered (and caught by a previous @@ -445,7 +442,7 @@ bool CrashRecoveryContext::RunSafely(function_ref Fn) { llvm_unreachable("Most likely setjmp wasn't called!"); } -bool CrashRecoveryContext::throwIfCrash(int RetCode) { +bool CrashRecoveryContext::isCrash(int RetCode) { #if defined(_WIN32) // On Windows, the high bits are reserved for kernel return codes. Values // starting with 0x80000000 are reserved for "warnings"; values of 0xC0000000 @@ -454,12 +451,21 @@ bool CrashRecoveryContext::throwIfCrash(int RetCode) { unsigned Code = ((unsigned)RetCode & 0xF0000000) >> 28; if (Code != 0xC && Code != 8) return false; - ::RaiseException(RetCode, 0, 0, NULL); #else // On Unix, signals are represented by return codes of 128 or higher. // Exit code 128 is a reserved value and should not be raised as a signal. if (RetCode <= 128) return false; +#endif + return true; +} + +bool CrashRecoveryContext::throwIfCrash(int RetCode) { + if (!isCrash(RetCode)) + return false; +#if defined(_WIN32) + ::RaiseException(RetCode, 0, 0, NULL); +#else llvm::sys::unregisterHandlers(); raise(RetCode - 128); #endif diff --git a/llvm/lib/Support/Debug.cpp b/llvm/lib/Support/Debug.cpp index 5470d931b00b..98a9ac4722b5 100644 --- a/llvm/lib/Support/Debug.cpp +++ b/llvm/lib/Support/Debug.cpp @@ -132,7 +132,7 @@ struct CreateDebugOnly { "debug-only", cl::desc("Enable a specific type of debug output (comma separated list " "of types)"), - cl::Hidden, cl::ZeroOrMore, cl::value_desc("debug string"), + cl::Hidden, cl::value_desc("debug string"), cl::location(DebugOnlyOptLoc), cl::ValueRequired); } }; diff --git a/llvm/lib/Support/DebugCounter.cpp b/llvm/lib/Support/DebugCounter.cpp index f553463be8df..bc2df37e773d 100644 --- a/llvm/lib/Support/DebugCounter.cpp +++ b/llvm/lib/Support/DebugCounter.cpp @@ -49,8 +49,7 @@ struct CreateDebugCounterOption { return new DebugCounterList( "debug-counter", cl::Hidden, cl::desc("Comma separated list of debug counter skip and count"), - cl::CommaSeparated, cl::ZeroOrMore, - cl::location(DebugCounter::instance())); + cl::CommaSeparated, cl::location(DebugCounter::instance())); } }; } // namespace diff --git a/llvm/lib/Support/DeltaAlgorithm.cpp b/llvm/lib/Support/DeltaAlgorithm.cpp index a2017a10ab3f..341de244547c 100644 --- a/llvm/lib/Support/DeltaAlgorithm.cpp +++ b/llvm/lib/Support/DeltaAlgorithm.cpp @@ -11,8 +11,7 @@ #include using namespace llvm; -DeltaAlgorithm::~DeltaAlgorithm() { -} +DeltaAlgorithm::~DeltaAlgorithm() = default; bool DeltaAlgorithm::GetTestResult(const changeset_ty &Changes) { if (FailedTestsCache.count(Changes)) diff --git a/llvm/lib/Support/DynamicLibrary.cpp b/llvm/lib/Support/DynamicLibrary.cpp index 2bcdbdcdb9b0..7b9d7abe7545 100644 --- a/llvm/lib/Support/DynamicLibrary.cpp +++ b/llvm/lib/Support/DynamicLibrary.cpp @@ -12,14 +12,11 @@ #include "llvm/Support/DynamicLibrary.h" #include "llvm-c/Support.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/Config/config.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" -#include -#include #include using namespace llvm; @@ -29,14 +26,14 @@ using namespace llvm::sys; class DynamicLibrary::HandleSet { typedef std::vector HandleList; HandleList Handles; - void *Process; + void *Process = nullptr; public: static void *DLOpen(const char *Filename, std::string *Err); static void DLClose(void *Handle); static void *DLSym(void *Handle, const char *Symbol); - HandleSet() : Process(nullptr) {} + HandleSet() = default; ~HandleSet(); HandleList::iterator Find(void *Handle) { return find(Handles, Handle); } diff --git a/llvm/lib/Support/Errno.cpp b/llvm/lib/Support/Errno.cpp index d18231c6ebf5..7f665be8db6c 100644 --- a/llvm/lib/Support/Errno.cpp +++ b/llvm/lib/Support/Errno.cpp @@ -12,8 +12,7 @@ #include "llvm/Support/Errno.h" #include "llvm/Config/config.h" -#include "llvm/Support/raw_ostream.h" -#include +#include #if HAVE_ERRNO_H #include diff --git a/llvm/lib/Support/ErrorHandling.cpp b/llvm/lib/Support/ErrorHandling.cpp index 80c0e00439a5..b8b3b7424ac6 100644 --- a/llvm/lib/Support/ErrorHandling.cpp +++ b/llvm/lib/Support/ErrorHandling.cpp @@ -119,7 +119,10 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { // files registered with RemoveFileOnSignal. sys::RunInterruptHandlers(); - abort(); + if (GenCrashDiag) + abort(); + else + exit(1); } void llvm::install_bad_alloc_error_handler(fatal_error_handler_t handler, diff --git a/llvm/lib/Support/FileUtilities.cpp b/llvm/lib/Support/FileUtilities.cpp index 489b8d119e6f..eda3eb044901 100644 --- a/llvm/lib/Support/FileUtilities.cpp +++ b/llvm/lib/Support/FileUtilities.cpp @@ -17,6 +17,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Process.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -323,4 +324,69 @@ llvm::Error llvm::writeFileAtomically( return Error::success(); } +Expected +FilePermissionsApplier::create(StringRef InputFilename) { + sys::fs::file_status Status; + + if (InputFilename != "-") { + if (auto EC = sys::fs::status(InputFilename, Status)) + return createFileError(InputFilename, EC); + } else { + Status.permissions(static_cast(0777)); + } + + return FilePermissionsApplier(InputFilename, Status); +} + +Error FilePermissionsApplier::apply( + StringRef OutputFilename, bool CopyDates, + Optional OverwritePermissions) { + sys::fs::file_status Status = InputStatus; + + if (OverwritePermissions) + Status.permissions(*OverwritePermissions); + + int FD = 0; + + // Writing to stdout should not be treated as an error here, just + // do not set access/modification times or permissions. + if (OutputFilename == "-") + return Error::success(); + + if (std::error_code EC = sys::fs::openFileForWrite(OutputFilename, FD, + sys::fs::CD_OpenExisting)) + return createFileError(OutputFilename, EC); + + if (CopyDates) + if (std::error_code EC = sys::fs::setLastAccessAndModificationTime( + FD, Status.getLastAccessedTime(), Status.getLastModificationTime())) + return createFileError(OutputFilename, EC); + + sys::fs::file_status OStat; + if (std::error_code EC = sys::fs::status(FD, OStat)) + return createFileError(OutputFilename, EC); + if (OStat.type() == sys::fs::file_type::regular_file) { +#ifndef _WIN32 + // Keep ownership if llvm-objcopy is called under root. + if (OutputFilename == InputFilename && OStat.getUser() == 0) + sys::fs::changeFileOwnership(FD, Status.getUser(), Status.getGroup()); +#endif + + sys::fs::perms Perm = Status.permissions(); + if (OutputFilename != InputFilename) + Perm = static_cast(Perm & ~sys::fs::getUmask() & ~06000); +#ifdef _WIN32 + if (std::error_code EC = sys::fs::setPermissions(OutputFilename, Perm)) +#else + if (std::error_code EC = sys::fs::setPermissions(FD, Perm)) +#endif + return createFileError(OutputFilename, EC); + } + + if (std::error_code EC = sys::Process::SafelyCloseFileDescriptor(FD)) + return createFileError(OutputFilename, EC); + + return Error::success(); +} + char llvm::AtomicFileWriteError::ID; diff --git a/llvm/lib/Support/FoldingSet.cpp b/llvm/lib/Support/FoldingSet.cpp index e3d7168305af..178855289fe8 100644 --- a/llvm/lib/Support/FoldingSet.cpp +++ b/llvm/lib/Support/FoldingSet.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/ErrorHandling.h" @@ -25,12 +24,6 @@ using namespace llvm; //===----------------------------------------------------------------------===// // FoldingSetNodeIDRef Implementation -/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, -/// used to lookup the node in the FoldingSetBase. -unsigned FoldingSetNodeIDRef::ComputeHash() const { - return static_cast(hash_combine_range(Data, Data+Size)); -} - bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const { if (Size != RHS.Size) return false; return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0; @@ -49,41 +42,6 @@ bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const { /// Add* - Add various data types to Bit data. /// -void FoldingSetNodeID::AddPointer(const void *Ptr) { - // Note: this adds pointers to the hash using sizes and endianness that - // depend on the host. It doesn't matter, however, because hashing on - // pointer values is inherently unstable. Nothing should depend on the - // ordering of nodes in the folding set. - static_assert(sizeof(uintptr_t) <= sizeof(unsigned long long), - "unexpected pointer size"); - AddInteger(reinterpret_cast(Ptr)); -} -void FoldingSetNodeID::AddInteger(signed I) { - Bits.push_back(I); -} -void FoldingSetNodeID::AddInteger(unsigned I) { - Bits.push_back(I); -} -void FoldingSetNodeID::AddInteger(long I) { - AddInteger((unsigned long)I); -} -void FoldingSetNodeID::AddInteger(unsigned long I) { - if (sizeof(long) == sizeof(int)) - AddInteger(unsigned(I)); - else if (sizeof(long) == sizeof(long long)) { - AddInteger((unsigned long long)I); - } else { - llvm_unreachable("unexpected sizeof(long)"); - } -} -void FoldingSetNodeID::AddInteger(long long I) { - AddInteger((unsigned long long)I); -} -void FoldingSetNodeID::AddInteger(unsigned long long I) { - AddInteger(unsigned(I)); - AddInteger(unsigned(I >> 32)); -} - void FoldingSetNodeID::AddString(StringRef String) { unsigned Size = String.size(); @@ -145,12 +103,6 @@ void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { Bits.append(ID.Bits.begin(), ID.Bits.end()); } -/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to -/// lookup the node in the FoldingSetBase. -unsigned FoldingSetNodeID::ComputeHash() const { - return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); -} - /// operator== - Used to compare two nodes to each other. /// bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const { diff --git a/llvm/lib/Support/FormatVariadic.cpp b/llvm/lib/Support/FormatVariadic.cpp index f6d48bcd50e8..0709d65e81e0 100644 --- a/llvm/lib/Support/FormatVariadic.cpp +++ b/llvm/lib/Support/FormatVariadic.cpp @@ -130,7 +130,7 @@ formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) { StringRef Right = Fmt.substr(BC + 1); auto RI = parseReplacementItem(Spec); - if (RI.hasValue()) + if (RI) return std::make_pair(*RI, Right); // If there was an error parsing the replacement item, treat it as an diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index f6003b783245..08e3a27e0173 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -11,20 +11,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Host.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/Config/llvm-config.h" -#include "llvm/Support/BCD.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/X86TargetParser.h" #include "llvm/Support/raw_ostream.h" -#include #include // Include the platform-specific parts of this class. @@ -38,11 +33,16 @@ #ifdef _MSC_VER #include #endif -#if defined(__APPLE__) && (!defined(__x86_64__)) +#ifdef __MVS__ +#include "llvm/Support/BCD.h" +#endif +#if defined(__APPLE__) #include #include #include #include +#include +#include #endif #ifdef _AIX #include @@ -296,6 +296,12 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { } } + if (Implementer == "0xc0") { // Ampere Computing + return StringSwitch(Part) + .Case("0xac3", "ampere1") + .Default("generic"); + } + return "generic"; } @@ -330,7 +336,7 @@ StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) { case 3931: case 3932: default: - return HaveVectorSupport? "arch14" : "zEC12"; + return HaveVectorSupport? "z16" : "zEC12"; } } } // end anonymous namespace @@ -380,6 +386,26 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { return "generic"; } +StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) { + // There are 24 lines in /proc/cpuinfo + SmallVector Lines; + ProcCpuinfoContent.split(Lines, "\n"); + + // Look for uarch line to determine cpu name + StringRef UArch; + for (unsigned I = 0, E = Lines.size(); I != E; ++I) { + if (Lines[I].startswith("uarch")) { + UArch = Lines[I].substr(5).ltrim("\t :"); + break; + } + } + + return StringSwitch(UArch) + .Case("sifive,u74-mc", "sifive-u74") + .Case("sifive,bullet0", "sifive-u74") + .Default("generic"); +} + StringRef sys::detail::getHostCPUNameForBPF() { #if !defined(__linux__) || !defined(__x86_64__) return "generic"; @@ -1034,9 +1060,9 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 25: CPU = "znver3"; *Type = X86::AMDFAM19H; - if (Model <= 0x0f) { + if (Model <= 0x0f || Model == 0x21) { *Subtype = X86::AMDFAM19H_ZNVER3; - break; // 00h-0Fh: Zen3 + break; // 00h-0Fh, 21h: Zen3 } break; default: @@ -1299,32 +1325,45 @@ StringRef sys::getHostCPUName() { bool HaveVectorSupport = CVT[244] & 0x80; return getCPUNameFromS390Model(Id, HaveVectorSupport); } -#elif defined(__APPLE__) && defined(__aarch64__) -StringRef sys::getHostCPUName() { - return "cyclone"; -} -#elif defined(__APPLE__) && defined(__arm__) -StringRef sys::getHostCPUName() { - host_basic_info_data_t hostInfo; - mach_msg_type_number_t infoCount; +#elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) +#define CPUFAMILY_ARM_SWIFT 0x1e2d6381 +#define CPUFAMILY_ARM_CYCLONE 0x37a09642 +#define CPUFAMILY_ARM_TYPHOON 0x2c91a47e +#define CPUFAMILY_ARM_TWISTER 0x92fb37c8 +#define CPUFAMILY_ARM_HURRICANE 0x67ceee93 +#define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6 +#define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f +#define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2 +#define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3 - infoCount = HOST_BASIC_INFO_COUNT; - mach_port_t hostPort = mach_host_self(); - host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, - &infoCount); - mach_port_deallocate(mach_task_self(), hostPort); +StringRef sys::getHostCPUName() { + uint32_t Family; + size_t Length = sizeof(Family); + sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0); - if (hostInfo.cpu_type != CPU_TYPE_ARM) { - assert(false && "CPUType not equal to ARM should not be possible on ARM"); - return "generic"; + switch (Family) { + case CPUFAMILY_ARM_SWIFT: + return "swift"; + case CPUFAMILY_ARM_CYCLONE: + return "apple-a7"; + case CPUFAMILY_ARM_TYPHOON: + return "apple-a8"; + case CPUFAMILY_ARM_TWISTER: + return "apple-a9"; + case CPUFAMILY_ARM_HURRICANE: + return "apple-a10"; + case CPUFAMILY_ARM_MONSOON_MISTRAL: + return "apple-a11"; + case CPUFAMILY_ARM_VORTEX_TEMPEST: + return "apple-a12"; + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + return "apple-a13"; + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + return "apple-m1"; + default: + // Default to the newest CPU we know about. + return "apple-m1"; } - switch (hostInfo.cpu_subtype) { - case CPU_SUBTYPE_ARM_V7S: - return "swift"; - default:; - } - - return "generic"; } #elif defined(_AIX) StringRef sys::getHostCPUName() { @@ -1360,6 +1399,11 @@ StringRef sys::getHostCPUName() { } #elif defined(__riscv) StringRef sys::getHostCPUName() { +#if defined(__linux__) + std::unique_ptr P = getProcCpuinfoContent(); + StringRef Content = P ? P->getBuffer() : ""; + return detail::getHostCPUNameForRISCV(Content); +#else #if __riscv_xlen == 64 return "generic-rv64"; #elif __riscv_xlen == 32 @@ -1367,6 +1411,7 @@ StringRef sys::getHostCPUName() { #else #error "Unhandled value of __riscv_xlen" #endif +#endif } #else StringRef sys::getHostCPUName() { return "generic"; } @@ -1455,9 +1500,6 @@ int computeHostNumPhysicalCores() { #elif defined(__linux__) && defined(__s390x__) int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); } #elif defined(__APPLE__) -#include -#include - // Gets the number of *physical cores* on the machine. int computeHostNumPhysicalCores() { uint32_t count; @@ -1706,6 +1748,9 @@ bool sys::getHostCPUFeatures(StringMap &Features) { .Case("asimd", "neon") .Case("fp", "fp-armv8") .Case("crc32", "crc") + .Case("atomics", "lse") + .Case("sve", "sve") + .Case("sve2", "sve2") #else .Case("half", "fp16") .Case("neon", "neon") diff --git a/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp b/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp index e6cba26cfcf3..52d5de93ff7d 100644 --- a/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp +++ b/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp @@ -189,20 +189,6 @@ public: bool trackedNodeIsUsed() const { return TrackedNodeIsUsed; } }; -/// Convert St3foo to NSt3fooE so that equivalences naming one also affect the -/// other. -template<> -struct CanonicalizerAllocator::MakeNodeImpl< - itanium_demangle::StdQualifiedName> { - CanonicalizerAllocator &Self; - Node *make(Node *Child) { - Node *StdNamespace = Self.makeNode("std"); - if (!StdNamespace) - return nullptr; - return Self.makeNode(StdNamespace, Child); - } -}; - // FIXME: Also expand built-in substitutions? using CanonicalizingDemangler = diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp index 20babbe56d86..b87e39f0a963 100644 --- a/llvm/lib/Support/JSON.cpp +++ b/llvm/lib/Support/JSON.cpp @@ -509,13 +509,25 @@ bool Parser::parseNumber(char First, Value &Out) { S.push_back(next()); char *End; // Try first to parse as integer, and if so preserve full 64 bits. - // strtoll returns long long >= 64 bits, so check it's in range too. - auto I = std::strtoll(S.c_str(), &End, 10); - if (End == S.end() && I >= std::numeric_limits::min() && - I <= std::numeric_limits::max()) { + // We check for errno for out of bounds errors and for End == S.end() + // to make sure that the numeric string is not malformed. + errno = 0; + int64_t I = std::strtoll(S.c_str(), &End, 10); + if (End == S.end() && errno != ERANGE) { Out = int64_t(I); return true; } + // strtroull has a special handling for negative numbers, but in this + // case we don't want to do that because negative numbers were already + // handled in the previous block. + if (First != '-') { + errno = 0; + uint64_t UI = std::strtoull(S.c_str(), &End, 10); + if (End == S.end() && errno != ERANGE) { + Out = UI; + return true; + } + } // If it's not an integer Out = std::strtod(S.c_str(), &End); return End == S.end() || parseError("Invalid JSON value (number?)"); diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index 8e154067abc0..9f34405e54fc 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -340,7 +340,7 @@ Optional KnownBits::eq(const KnownBits &LHS, const KnownBits &RHS) { Optional KnownBits::ne(const KnownBits &LHS, const KnownBits &RHS) { if (Optional KnownEQ = eq(LHS, RHS)) - return Optional(!KnownEQ.getValue()); + return Optional(!*KnownEQ); return None; } @@ -356,7 +356,7 @@ Optional KnownBits::ugt(const KnownBits &LHS, const KnownBits &RHS) { Optional KnownBits::uge(const KnownBits &LHS, const KnownBits &RHS) { if (Optional IsUGT = ugt(RHS, LHS)) - return Optional(!IsUGT.getValue()); + return Optional(!*IsUGT); return None; } @@ -380,7 +380,7 @@ Optional KnownBits::sgt(const KnownBits &LHS, const KnownBits &RHS) { Optional KnownBits::sge(const KnownBits &LHS, const KnownBits &RHS) { if (Optional KnownSGT = sgt(RHS, LHS)) - return Optional(!KnownSGT.getValue()); + return Optional(!*KnownSGT); return None; } @@ -413,11 +413,11 @@ KnownBits KnownBits::abs(bool IntMinIsPoison) const { } KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS, - bool SelfMultiply) { + bool NoUndefSelfMultiply) { unsigned BitWidth = LHS.getBitWidth(); assert(BitWidth == RHS.getBitWidth() && !LHS.hasConflict() && !RHS.hasConflict() && "Operand mismatch"); - assert((!SelfMultiply || (LHS.One == RHS.One && LHS.Zero == RHS.Zero)) && + assert((!NoUndefSelfMultiply || LHS == RHS) && "Self multiplication knownbits mismatch"); // Compute the high known-0 bits by multiplying the unsigned max of each side. @@ -501,7 +501,7 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS, Res.One = BottomKnown.getLoBits(ResultBitsKnown); // If we're self-multiplying then bit[1] is guaranteed to be zero. - if (SelfMultiply && BitWidth > 1) { + if (NoUndefSelfMultiply && BitWidth > 1) { assert(Res.One[1] == 0 && "Self-multiplication failed Quadratic Reciprocity!"); Res.Zero.setBit(1); diff --git a/llvm/lib/Support/LineIterator.cpp b/llvm/lib/Support/LineIterator.cpp index 7bdf1271ac25..9874d16d19e1 100644 --- a/llvm/lib/Support/LineIterator.cpp +++ b/llvm/lib/Support/LineIterator.cpp @@ -38,7 +38,7 @@ line_iterator::line_iterator(const MemoryBuffer &Buffer, bool SkipBlanks, line_iterator::line_iterator(const MemoryBufferRef &Buffer, bool SkipBlanks, char CommentMarker) : Buffer(Buffer.getBufferSize() ? Optional(Buffer) : None), - CommentMarker(CommentMarker), SkipBlanks(SkipBlanks), LineNumber(1), + CommentMarker(CommentMarker), SkipBlanks(SkipBlanks), CurrentLine(Buffer.getBufferSize() ? Buffer.getBufferStart() : nullptr, 0) { // Ensure that if we are constructed on a non-empty memory buffer that it is diff --git a/llvm/lib/Support/MD5.cpp b/llvm/lib/Support/MD5.cpp index caadde389504..fdcf34d70ad9 100644 --- a/llvm/lib/Support/MD5.cpp +++ b/llvm/lib/Support/MD5.cpp @@ -261,13 +261,13 @@ void MD5::final(MD5Result &Result) { support::endian::write32le(&Result[12], InternalState.d); } -StringRef MD5::final() { +MD5::MD5Result MD5::final() { + MD5Result Result; final(Result); - return StringRef(reinterpret_cast(Result.Bytes.data()), - Result.Bytes.size()); + return Result; } -StringRef MD5::result() { +MD5::MD5Result MD5::result() { auto StateToRestore = InternalState; auto Hash = final(); @@ -280,15 +280,15 @@ StringRef MD5::result() { SmallString<32> MD5::MD5Result::digest() const { SmallString<32> Str; - toHex(Bytes, /*LowerCase*/ true, Str); + toHex(*this, /*LowerCase*/ true, Str); return Str; } void MD5::stringifyResult(MD5Result &Result, SmallVectorImpl &Str) { - toHex(Result.Bytes, /*LowerCase*/ true, Str); + toHex(Result, /*LowerCase*/ true, Str); } -std::array MD5::hash(ArrayRef Data) { +MD5::MD5Result MD5::hash(ArrayRef Data) { MD5 Hash; Hash.update(Data); MD5::MD5Result Res; diff --git a/llvm/lib/Support/MathExtras.cpp b/llvm/lib/Support/MathExtras.cpp index 7efffaa7f8b8..ad44b1a21676 100644 --- a/llvm/lib/Support/MathExtras.cpp +++ b/llvm/lib/Support/MathExtras.cpp @@ -15,7 +15,7 @@ #ifdef _MSC_VER #include #else -#include +#include #endif namespace llvm { diff --git a/llvm/lib/Support/Memory.cpp b/llvm/lib/Support/Memory.cpp index 581484268cd8..f1ba2d0cfe3a 100644 --- a/llvm/lib/Support/Memory.cpp +++ b/llvm/lib/Support/Memory.cpp @@ -13,7 +13,6 @@ #include "llvm/Support/Memory.h" #include "llvm/Config/llvm-config.h" -#include "llvm/Support/Valgrind.h" #ifndef NDEBUG #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp index 7816779cca1d..9872dfa78b26 100644 --- a/llvm/lib/Support/MemoryBuffer.cpp +++ b/llvm/lib/Support/MemoryBuffer.cpp @@ -13,10 +13,9 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/ADT/SmallString.h" #include "llvm/Config/config.h" -#include "llvm/Support/AutoConvert.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Process.h" @@ -32,13 +31,17 @@ #else #include #endif + +#ifdef __MVS__ +#include "llvm/Support/AutoConvert.h" +#endif using namespace llvm; //===----------------------------------------------------------------------===// // MemoryBuffer implementation itself. //===----------------------------------------------------------------------===// -MemoryBuffer::~MemoryBuffer() { } +MemoryBuffer::~MemoryBuffer() = default; /// init - Initialize this MemoryBuffer as a reference to externally allocated /// memory, memory that we know is already null terminated. @@ -286,6 +289,8 @@ WritableMemoryBuffer::getNewUninitMemBuffer(size_t Size, const Twine &BufferName StringRef NameRef = BufferName.toStringRef(NameBuf); size_t AlignedStringLen = alignTo(sizeof(MemBuffer) + NameRef.size() + 1, 16); size_t RealLen = AlignedStringLen + Size + 1; + if (RealLen <= Size) // Check for rollover. + return nullptr; char *Mem = static_cast(operator new(RealLen, std::nothrow)); if (!Mem) return nullptr; @@ -533,4 +538,4 @@ MemoryBufferRef MemoryBuffer::getMemBufferRef() const { return MemoryBufferRef(Data, Identifier); } -SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() {} +SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() = default; diff --git a/llvm/lib/Support/NativeFormatting.cpp b/llvm/lib/Support/NativeFormatting.cpp index 0a797046bb68..8a69f7513255 100644 --- a/llvm/lib/Support/NativeFormatting.cpp +++ b/llvm/lib/Support/NativeFormatting.cpp @@ -14,6 +14,10 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#if defined(_WIN32) && !defined(__MINGW32__) +#include // For _fpclass in llvm::write_double. +#endif + using namespace llvm; template @@ -133,7 +137,7 @@ void llvm::write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, Optional Width) { const size_t kMaxWidth = 128u; - size_t W = std::min(kMaxWidth, Width.getValueOr(0u)); + size_t W = std::min(kMaxWidth, Width.value_or(0u)); unsigned Nibbles = (64 - countLeadingZeros(N) + 3) / 4; bool Prefix = (Style == HexPrintStyle::PrefixLower || @@ -161,7 +165,7 @@ void llvm::write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, void llvm::write_double(raw_ostream &S, double N, FloatStyle Style, Optional Precision) { - size_t Prec = Precision.getValueOr(getDefaultPrecision(Style)); + size_t Prec = Precision.value_or(getDefaultPrecision(Style)); if (std::isnan(N)) { S << "nan"; @@ -258,5 +262,5 @@ size_t llvm::getDefaultPrecision(FloatStyle Style) { case FloatStyle::Percent: return 2; // Number of decimal places. } - LLVM_BUILTIN_UNREACHABLE; + llvm_unreachable("Unknown FloatStyle enum"); } diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp index 4977c188f934..798d7124e7e9 100644 --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -89,7 +89,7 @@ public: void add(std::function F) override { { std::lock_guard Lock(Mutex); - WorkStack.push(F); + WorkStack.push(std::move(F)); } Cond.notify_one(); } @@ -102,7 +102,7 @@ private: Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); if (Stop) break; - auto Task = WorkStack.top(); + auto Task = std::move(WorkStack.top()); WorkStack.pop(); Lock.unlock(); Task(); @@ -161,7 +161,7 @@ TaskGroup::~TaskGroup() { void TaskGroup::spawn(std::function F) { if (Parallel) { L.inc(); - Executor::getDefaultExecutor()->add([&, F] { + Executor::getDefaultExecutor()->add([&, F = std::move(F)] { F(); L.dec(); }); @@ -175,8 +175,8 @@ void TaskGroup::spawn(std::function F) { } // namespace llvm #endif // LLVM_ENABLE_THREADS -void llvm::parallelForEachN(size_t Begin, size_t End, - llvm::function_ref Fn) { +void llvm::parallelFor(size_t Begin, size_t End, + llvm::function_ref Fn) { // If we have zero or one items, then do not incur the overhead of spinning up // a task group. They are surprisingly expensive, and because they do not // support nested parallelism, a single entry task group can block parallel diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp index 63d8d4ee4648..283dc70f2bc9 100644 --- a/llvm/lib/Support/Path.cpp +++ b/llvm/lib/Support/Path.cpp @@ -22,7 +22,6 @@ #include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" #include -#include #if !defined(_MSC_VER) && !defined(__MINGW32__) #include @@ -761,11 +760,15 @@ bool remove_dots(SmallVectorImpl &the_path, bool remove_dot_dot, } } + SmallString<256> buffer = root; + // "root" could be "/", which may need to be translated into "\". + make_preferred(buffer, style); + needs_change |= root != buffer; + // Avoid rewriting the path unless we have to. if (!needs_change) return false; - SmallString<256> buffer = root; if (!components.empty()) { buffer += components[0]; for (StringRef C : makeArrayRef(components).drop_front()) { @@ -1199,9 +1202,18 @@ Error readNativeFileToEOF(file_t FileHandle, SmallVectorImpl &Buffer, #include "Windows/Path.inc" #endif +bool IsLLVMDriver = false; + namespace llvm { namespace sys { namespace fs { + +std::string getMainExecutable(const char *Argv0, void *MainAddr) { + if (IsLLVMDriver) + return sys::path::stem(Argv0).str(); + return getMainExecutableImpl(Argv0, MainAddr); +} + TempFile::TempFile(StringRef Name, int FD) : TmpName(std::string(Name)), FD(FD) {} TempFile::TempFile(TempFile &&Other) { *this = std::move(Other); } diff --git a/llvm/lib/Support/Process.cpp b/llvm/lib/Support/Process.cpp index 547b3b73eec2..cf3962ae927b 100644 --- a/llvm/lib/Support/Process.cpp +++ b/llvm/lib/Support/Process.cpp @@ -42,7 +42,7 @@ Optional Process::FindInEnvPath(StringRef EnvName, assert(!path::is_absolute(FileName)); Optional FoundPath; Optional OptPath = Process::GetEnv(EnvName); - if (!OptPath.hasValue()) + if (!OptPath) return FoundPath; const char EnvPathSeparatorStr[] = {Separator, '\0'}; diff --git a/llvm/lib/Support/Program.cpp b/llvm/lib/Support/Program.cpp index c7a59642b27e..0560714a6acd 100644 --- a/llvm/lib/Support/Program.cpp +++ b/llvm/lib/Support/Program.cpp @@ -14,7 +14,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; using namespace sys; diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 2b3395b669b8..7fe04af4696b 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -37,7 +37,7 @@ struct RISCVSupportedExtension { } // end anonymous namespace -static constexpr StringLiteral AllStdExts = "mafdqlcbjtpvn"; +static constexpr StringLiteral AllStdExts = "mafdqlcbkjtpvn"; static const RISCVSupportedExtension SupportedExtensions[] = { {"i", RISCVExtensionVersion{2, 0}}, @@ -48,9 +48,16 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"d", RISCVExtensionVersion{2, 0}}, {"c", RISCVExtensionVersion{2, 0}}, + {"zihintpause", RISCVExtensionVersion{2, 0}}, + {"zfhmin", RISCVExtensionVersion{1, 0}}, {"zfh", RISCVExtensionVersion{1, 0}}, + {"zfinx", RISCVExtensionVersion{1, 0}}, + {"zdinx", RISCVExtensionVersion{1, 0}}, + {"zhinxmin", RISCVExtensionVersion{1, 0}}, + {"zhinx", RISCVExtensionVersion{1, 0}}, + {"zba", RISCVExtensionVersion{1, 0}}, {"zbb", RISCVExtensionVersion{1, 0}}, {"zbc", RISCVExtensionVersion{1, 0}}, @@ -88,6 +95,10 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"zve64x", RISCVExtensionVersion{1, 0}}, {"zve64f", RISCVExtensionVersion{1, 0}}, {"zve64d", RISCVExtensionVersion{1, 0}}, + + {"zicbom", RISCVExtensionVersion{1, 0}}, + {"zicboz", RISCVExtensionVersion{1, 0}}, + {"zicbop", RISCVExtensionVersion{1, 0}}, }; static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { @@ -97,6 +108,7 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { {"zbp", RISCVExtensionVersion{0, 93}}, {"zbr", RISCVExtensionVersion{0, 93}}, {"zbt", RISCVExtensionVersion{0, 93}}, + {"zvfh", RISCVExtensionVersion{0, 1}}, }; static bool stripExperimentalPrefix(StringRef &Ext) { @@ -340,7 +352,7 @@ static Error getExtensionVersion(StringRef Ext, StringRef In, unsigned &Major, if (!MajorStr.empty() && In.consume_front("p")) { MinorStr = In.take_while(isDigit); - In = In.substr(MajorStr.size() + 1); + In = In.substr(MajorStr.size() + MinorStr.size() - 1); // Expected 'p' to be followed by minor version number. if (MinorStr.empty()) { @@ -398,8 +410,8 @@ static Error getExtensionVersion(StringRef Ext, StringRef In, unsigned &Major, if (!MinorStr.empty()) Error += "." + MinorStr.str(); Error += " for experimental extension '" + Ext.str() + - "'(this compiler supports " + utostr(SupportedVers.Major) + "." + - utostr(SupportedVers.Minor) + ")"; + "' (this compiler supports " + utostr(SupportedVers.Major) + + "." + utostr(SupportedVers.Minor) + ")"; return createStringError(errc::invalid_argument, Error); } return Error::success(); @@ -686,11 +698,11 @@ Error RISCVISAInfo::checkDependency() { bool HasE = Exts.count("e") != 0; bool HasD = Exts.count("d") != 0; bool HasF = Exts.count("f") != 0; - bool HasZve32x = Exts.count("zve32x") != 0; + bool HasZfinx = Exts.count("zfinx") != 0; + bool HasZdinx = Exts.count("zdinx") != 0; + bool HasVector = Exts.count("zve32x") != 0; bool HasZve32f = Exts.count("zve32f") != 0; bool HasZve64d = Exts.count("zve64d") != 0; - bool HasV = Exts.count("v") != 0; - bool HasVector = HasZve32x || HasV; bool HasZvl = MinVLen != 0; if (HasE && !IsRv32) @@ -706,17 +718,22 @@ Error RISCVISAInfo::checkDependency() { return createStringError(errc::invalid_argument, "d requires f extension to also be specified"); - // FIXME: Consider Zfinx in the future - if (HasZve32f && !HasF) + if (HasZve32f && !HasF && !HasZfinx) + return createStringError( + errc::invalid_argument, + "zve32f requires f or zfinx extension to also be specified"); + + if (HasZve64d && !HasD && !HasZdinx) return createStringError( errc::invalid_argument, - "zve32f requires f extension to also be specified"); + "zve64d requires d or zdinx extension to also be specified"); - // FIXME: Consider Zdinx in the future - if (HasZve64d && !HasD) + if (Exts.count("zvfh") && !Exts.count("zfh") && !Exts.count("zfhmin") && + !Exts.count("zhinx") && !Exts.count("zhinxmin")) return createStringError( errc::invalid_argument, - "zve64d requires d extension to also be specified"); + "zvfh requires zfh, zfhmin, zhinx or zhinxmin extension to also be " + "specified"); if (HasZvl && !HasVector) return createStringError( @@ -730,9 +747,12 @@ Error RISCVISAInfo::checkDependency() { return Error::success(); } -static const char *ImpliedExtsV[] = {"zvl128b", "f", "d"}; +static const char *ImpliedExtsV[] = {"zvl128b", "zve64d", "f", "d"}; static const char *ImpliedExtsZfhmin[] = {"f"}; static const char *ImpliedExtsZfh[] = {"f"}; +static const char *ImpliedExtsZdinx[] = {"zfinx"}; +static const char *ImpliedExtsZhinxmin[] = {"zfinx"}; +static const char *ImpliedExtsZhinx[] = {"zfinx"}; static const char *ImpliedExtsZve64d[] = {"zve64f"}; static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"}; static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"}; @@ -752,6 +772,7 @@ static const char *ImpliedExtsZvl64b[] = {"zvl32b"}; static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"}; static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx", "zkne", "zknd", "zknh"}; static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zbkx", "zksed", "zksh"}; +static const char *ImpliedExtsZvfh[] = {"zve32f"}; struct ImpliedExtsEntry { StringLiteral Name; @@ -767,8 +788,11 @@ struct ImpliedExtsEntry { // Note: The table needs to be sorted by name. static constexpr ImpliedExtsEntry ImpliedExts[] = { {{"v"}, {ImpliedExtsV}}, + {{"zdinx"}, {ImpliedExtsZdinx}}, {{"zfh"}, {ImpliedExtsZfh}}, {{"zfhmin"}, {ImpliedExtsZfhmin}}, + {{"zhinx"}, {ImpliedExtsZhinx}}, + {{"zhinxmin"}, {ImpliedExtsZhinxmin}}, {{"zk"}, {ImpliedExtsZk}}, {{"zkn"}, {ImpliedExtsZkn}}, {{"zks"}, {ImpliedExtsZks}}, @@ -777,6 +801,7 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = { {{"zve64d"}, {ImpliedExtsZve64d}}, {{"zve64f"}, {ImpliedExtsZve64f}}, {{"zve64x"}, {ImpliedExtsZve64x}}, + {{"zvfh"}, {ImpliedExtsZvfh}}, {{"zvl1024b"}, {ImpliedExtsZvl1024b}}, {{"zvl128b"}, {ImpliedExtsZvl128b}}, {{"zvl16384b"}, {ImpliedExtsZvl16384b}}, @@ -826,6 +851,38 @@ void RISCVISAInfo::updateImplication() { } } +struct CombinedExtsEntry { + StringLiteral CombineExt; + ArrayRef RequiredExts; +}; + +static constexpr CombinedExtsEntry CombineIntoExts[] = { + {{"zk"}, {ImpliedExtsZk}}, + {{"zkn"}, {ImpliedExtsZkn}}, + {{"zks"}, {ImpliedExtsZks}}, +}; + +void RISCVISAInfo::updateCombination() { + bool IsNewCombine = false; + do { + IsNewCombine = false; + for (CombinedExtsEntry CombineIntoExt : CombineIntoExts) { + auto CombineExt = CombineIntoExt.CombineExt; + auto RequiredExts = CombineIntoExt.RequiredExts; + if (hasExtension(CombineExt)) + continue; + bool IsAllRequiredFeatureExist = true; + for (const char *Ext : RequiredExts) + IsAllRequiredFeatureExist &= hasExtension(Ext); + if (IsAllRequiredFeatureExist) { + auto Version = findDefaultVersion(CombineExt); + addExtension(CombineExt, Version->Major, Version->Minor); + IsNewCombine = true; + } + } + } while (IsNewCombine); +} + void RISCVISAInfo::updateFLen() { FLen = 0; // TODO: Handle q extension. @@ -862,11 +919,6 @@ void RISCVISAInfo::updateMaxELen() { ExtName.getAsInteger(10, ZveELen); MaxELen = std::max(MaxELen, ZveELen); } - if (ExtName == "v") { - MaxELenFp = 64; - MaxELen = 64; - return; - } } } @@ -904,6 +956,7 @@ std::vector RISCVISAInfo::toFeatureVector() const { llvm::Expected> RISCVISAInfo::postProcessAndChecking(std::unique_ptr &&ISAInfo) { ISAInfo->updateImplication(); + ISAInfo->updateCombination(); ISAInfo->updateFLen(); ISAInfo->updateMinVLen(); ISAInfo->updateMaxELen(); @@ -912,3 +965,18 @@ RISCVISAInfo::postProcessAndChecking(std::unique_ptr &&ISAInfo) { return std::move(Result); return std::move(ISAInfo); } + +StringRef RISCVISAInfo::computeDefaultABI() const { + if (XLen == 32) { + if (hasExtension("d")) + return "ilp32d"; + if (hasExtension("e")) + return "ilp32e"; + return "ilp32"; + } else if (XLen == 64) { + if (hasExtension("d")) + return "lp64d"; + return "lp64"; + } + llvm_unreachable("Invalid XLEN"); +} diff --git a/llvm/lib/Support/SHA1.cpp b/llvm/lib/Support/SHA1.cpp index 5dce44af9ecd..52bae700350d 100644 --- a/llvm/lib/Support/SHA1.cpp +++ b/llvm/lib/Support/SHA1.cpp @@ -263,7 +263,7 @@ void SHA1::pad() { addUncounted(InternalState.ByteCount << 3); } -StringRef SHA1::final() { +void SHA1::final(std::array &HashResult) { // Pad to complete the last block pad(); @@ -281,12 +281,19 @@ StringRef SHA1::final() { (((InternalState.State[i]) >> 24) & 0x000000ff); } #endif +} - // Return pointer to hash (20 characters) - return StringRef((char *)HashResult, HASH_LENGTH); +std::array SHA1::final() { + union { + std::array HashResult; + std::array ReturnResult; + }; + static_assert(sizeof(HashResult) == sizeof(ReturnResult), ""); + final(HashResult); + return ReturnResult; } -StringRef SHA1::result() { +std::array SHA1::result() { auto StateToRestore = InternalState; auto Hash = final(); @@ -301,9 +308,5 @@ StringRef SHA1::result() { std::array SHA1::hash(ArrayRef Data) { SHA1 Hash; Hash.update(Data); - StringRef S = Hash.final(); - - std::array Arr; - memcpy(Arr.data(), S.data(), S.size()); - return Arr; + return Hash.final(); } diff --git a/llvm/lib/Support/SHA256.cpp b/llvm/lib/Support/SHA256.cpp index 3b81506847ec..81d897fb4187 100644 --- a/llvm/lib/Support/SHA256.cpp +++ b/llvm/lib/Support/SHA256.cpp @@ -243,7 +243,7 @@ void SHA256::pad() { addUncounted(len); } -StringRef SHA256::final() { +void SHA256::final(std::array &HashResult) { // Pad to complete the last block pad(); @@ -261,12 +261,19 @@ StringRef SHA256::final() { (((InternalState.State[i]) >> 24) & 0x000000ff); } #endif +} - // Return pointer to hash (32 characters) - return StringRef((char *)HashResult, HASH_LENGTH); +std::array SHA256::final() { + union { + std::array HashResult; + std::array ReturnResult; + }; + static_assert(sizeof(HashResult) == sizeof(ReturnResult), ""); + final(HashResult); + return ReturnResult; } -StringRef SHA256::result() { +std::array SHA256::result() { auto StateToRestore = InternalState; auto Hash = final(); @@ -281,11 +288,7 @@ StringRef SHA256::result() { std::array SHA256::hash(ArrayRef Data) { SHA256 Hash; Hash.update(Data); - StringRef S = Hash.final(); - - std::array Arr; - memcpy(Arr.data(), S.data(), S.size()); - return Arr; + return Hash.final(); } } // namespace llvm diff --git a/llvm/lib/Support/ScopedPrinter.cpp b/llvm/lib/Support/ScopedPrinter.cpp index a434e50e8c1f..ef6dd5fdf1d6 100644 --- a/llvm/lib/Support/ScopedPrinter.cpp +++ b/llvm/lib/Support/ScopedPrinter.cpp @@ -7,17 +7,10 @@ using namespace llvm::support; namespace llvm { raw_ostream &operator<<(raw_ostream &OS, const HexNumber &Value) { - OS << "0x" << to_hexString(Value.Value); + OS << "0x" << utohexstr(Value.Value); return OS; } -std::string to_hexString(uint64_t Value, bool UpperCase) { - std::string number; - llvm::raw_string_ostream stream(number); - stream << format_hex_no_prefix(Value, 1, UpperCase); - return stream.str(); -} - void ScopedPrinter::printBinaryImpl(StringRef Label, StringRef Str, ArrayRef Data, bool Block, uint32_t StartOffset) { diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp index 1d61f2bf7525..a6fd845da869 100644 --- a/llvm/lib/Support/Signals.cpp +++ b/llvm/lib/Support/Signals.cpp @@ -15,7 +15,6 @@ #include "DebugOptions.h" -#include "llvm/ADT/STLArrayExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/CommandLine.h" @@ -23,15 +22,14 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Format.h" -#include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Mutex.h" #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" #include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" +#include #include //===----------------------------------------------------------------------===// @@ -83,12 +81,20 @@ struct CallbackAndCookie { enum class Status { Empty, Initializing, Initialized, Executing }; std::atomic Flag; }; + static constexpr size_t MaxSignalHandlerCallbacks = 8; -static CallbackAndCookie CallBacksToRun[MaxSignalHandlerCallbacks]; + +// A global array of CallbackAndCookie may not compile with +// -Werror=global-constructors in c++20 and above +static std::array & +CallBacksToRun() { + static std::array callbacks; + return callbacks; +} // Signal-safe. void sys::RunSignalHandlers() { - for (CallbackAndCookie &RunMe : CallBacksToRun) { + for (CallbackAndCookie &RunMe : CallBacksToRun()) { auto Expected = CallbackAndCookie::Status::Initialized; auto Desired = CallbackAndCookie::Status::Executing; if (!RunMe.Flag.compare_exchange_strong(Expected, Desired)) @@ -103,7 +109,7 @@ void sys::RunSignalHandlers() { // Signal-safe. static void insertSignalHandler(sys::SignalHandlerCallback FnPtr, void *Cookie) { - for (CallbackAndCookie &SetMe : CallBacksToRun) { + for (CallbackAndCookie &SetMe : CallBacksToRun()) { auto Expected = CallbackAndCookie::Status::Empty; auto Desired = CallbackAndCookie::Status::Initializing; if (!SetMe.Flag.compare_exchange_strong(Expected, Desired)) diff --git a/llvm/lib/Support/Signposts.cpp b/llvm/lib/Support/Signposts.cpp index 074dddc81c80..232b84e965a0 100644 --- a/llvm/lib/Support/Signposts.cpp +++ b/llvm/lib/Support/Signposts.cpp @@ -7,8 +7,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Signposts.h" - +#include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" + #if LLVM_SUPPORT_XCODE_SIGNPOSTS #include "llvm/ADT/DenseMap.h" #include "llvm/Support/Mutex.h" @@ -24,7 +25,7 @@ using namespace llvm; namespace { os_log_t *LogCreator() { os_log_t *X = new os_log_t; - *X = os_log_create("org.llvm.signposts", OS_LOG_CATEGORY_POINTS_OF_INTEREST); + *X = os_log_create("org.llvm.signposts", "toolchain"); return X; } struct LogDeleter { diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp index 2eb2989b200b..42982b4c8e6c 100644 --- a/llvm/lib/Support/SourceMgr.cpp +++ b/llvm/lib/Support/SourceMgr.cpp @@ -40,6 +40,17 @@ static const size_t TabStop = 8; unsigned SourceMgr::AddIncludeFile(const std::string &Filename, SMLoc IncludeLoc, std::string &IncludedFile) { + ErrorOr> NewBufOrErr = + OpenIncludeFile(Filename, IncludedFile); + if (!NewBufOrErr) + return 0; + + return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc); +} + +ErrorOr> +SourceMgr::OpenIncludeFile(const std::string &Filename, + std::string &IncludedFile) { IncludedFile = Filename; ErrorOr> NewBufOrErr = MemoryBuffer::getFile(IncludedFile); @@ -52,10 +63,7 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename, NewBufOrErr = MemoryBuffer::getFile(IncludedFile); } - if (!NewBufOrErr) - return 0; - - return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc); + return NewBufOrErr; } unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const { diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 137b37f2b1c3..0fb65accbf1d 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -198,7 +198,7 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, return true; } -SpecialCaseList::~SpecialCaseList() {} +SpecialCaseList::~SpecialCaseList() = default; bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category) const { diff --git a/llvm/lib/Support/Statistic.cpp b/llvm/lib/Support/Statistic.cpp index 95ee885d2f8f..ec12118650c1 100644 --- a/llvm/lib/Support/Statistic.cpp +++ b/llvm/lib/Support/Statistic.cpp @@ -192,7 +192,7 @@ void llvm::PrintStatistics(raw_ostream &OS) { // Print all of the statistics. for (TrackingStatistic *Stat : Stats.Stats) - OS << format("%*u %-*s - %s\n", MaxValLen, Stat->getValue(), + OS << format("%*" PRIu64 " %-*s - %s\n", MaxValLen, Stat->getValue(), MaxDebugTypeLen, Stat->getDebugType(), Stat->getDesc()); OS << '\n'; // Flush the output stream. @@ -253,9 +253,9 @@ void llvm::PrintStatistics() { #endif } -const std::vector> llvm::GetStatistics() { +const std::vector> llvm::GetStatistics() { sys::SmartScopedLock Reader(*StatLock); - std::vector> ReturnStats; + std::vector> ReturnStats; for (const auto &Stat : StatInfo->statistics()) ReturnStats.emplace_back(Stat->getName(), Stat->getValue()); diff --git a/llvm/lib/Support/StringMap.cpp b/llvm/lib/Support/StringMap.cpp index 012c785b4351..9b2f96fca2cd 100644 --- a/llvm/lib/Support/StringMap.cpp +++ b/llvm/lib/Support/StringMap.cpp @@ -18,7 +18,7 @@ using namespace llvm; /// Returns the number of buckets to allocate to ensure that the DenseMap can /// accommodate \p NumEntries without need to grow(). -static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) { +static inline unsigned getMinBucketToReserveForEntries(unsigned NumEntries) { // Ensure that "NumEntries * 4 < NumBuckets * 3" if (NumEntries == 0) return 0; @@ -27,6 +27,21 @@ static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) { return NextPowerOf2(NumEntries * 4 / 3 + 1); } +static inline StringMapEntryBase **createTable(unsigned NewNumBuckets) { + auto **Table = static_cast(safe_calloc( + NewNumBuckets + 1, sizeof(StringMapEntryBase **) + sizeof(unsigned))); + + // Allocate one extra bucket, set it to look filled so the iterators stop at + // end. + Table[NewNumBuckets] = (StringMapEntryBase *)2; + return Table; +} + +static inline unsigned *getHashTable(StringMapEntryBase **TheTable, + unsigned NumBuckets) { + return reinterpret_cast(TheTable + NumBuckets + 1); +} + StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) { ItemSize = itemSize; @@ -54,15 +69,10 @@ void StringMapImpl::init(unsigned InitSize) { NumItems = 0; NumTombstones = 0; - TheTable = static_cast(safe_calloc( - NewNumBuckets + 1, sizeof(StringMapEntryBase **) + sizeof(unsigned))); + TheTable = createTable(NewNumBuckets); // Set the member only if TheTable was successfully allocated NumBuckets = NewNumBuckets; - - // Allocate one extra bucket, set it to look filled so the iterators stop at - // end. - TheTable[NumBuckets] = (StringMapEntryBase *)2; } /// LookupBucketFor - Look up the bucket that the specified string should end @@ -71,14 +81,12 @@ void StringMapImpl::init(unsigned InitSize) { /// case, the FullHashValue field of the bucket will be set to the hash value /// of the string. unsigned StringMapImpl::LookupBucketFor(StringRef Name) { - unsigned HTSize = NumBuckets; - if (HTSize == 0) { // Hash table unallocated so far? + // Hash table unallocated so far? + if (NumBuckets == 0) init(16); - HTSize = NumBuckets; - } unsigned FullHashValue = djbHash(Name, 0); - unsigned BucketNo = FullHashValue & (HTSize - 1); - unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1); + unsigned BucketNo = FullHashValue & (NumBuckets - 1); + unsigned *HashTable = getHashTable(TheTable, NumBuckets); unsigned ProbeAmt = 1; int FirstTombstone = -1; @@ -117,7 +125,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) { } // Okay, we didn't find the item. Probe to the next bucket. - BucketNo = (BucketNo + ProbeAmt) & (HTSize - 1); + BucketNo = (BucketNo + ProbeAmt) & (NumBuckets - 1); // Use quadratic probing, it has fewer clumping artifacts than linear // probing and has good cache behavior in the common case. @@ -129,12 +137,11 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) { /// in the map, return the bucket number of the key. Otherwise return -1. /// This does not modify the map. int StringMapImpl::FindKey(StringRef Key) const { - unsigned HTSize = NumBuckets; - if (HTSize == 0) + if (NumBuckets == 0) return -1; // Really empty table? unsigned FullHashValue = djbHash(Key, 0); - unsigned BucketNo = FullHashValue & (HTSize - 1); - unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1); + unsigned BucketNo = FullHashValue & (NumBuckets - 1); + unsigned *HashTable = getHashTable(TheTable, NumBuckets); unsigned ProbeAmt = 1; while (true) { @@ -161,7 +168,7 @@ int StringMapImpl::FindKey(StringRef Key) const { } // Okay, we didn't find the item. Probe to the next bucket. - BucketNo = (BucketNo + ProbeAmt) & (HTSize - 1); + BucketNo = (BucketNo + ProbeAmt) & (NumBuckets - 1); // Use quadratic probing, it has fewer clumping artifacts than linear // probing and has good cache behavior in the common case. @@ -198,8 +205,6 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) { /// the appropriate mod-of-hashtable-size. unsigned StringMapImpl::RehashTable(unsigned BucketNo) { unsigned NewSize; - unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1); - // If the hash table is now more than 3/4 full, or if fewer than 1/8 of // the buckets are empty (meaning that many are filled with tombstones), // grow/rehash the table. @@ -213,36 +218,25 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) { } unsigned NewBucketNo = BucketNo; - // Allocate one extra bucket which will always be non-empty. This allows the - // iterators to stop at end. - auto NewTableArray = static_cast(safe_calloc( - NewSize + 1, sizeof(StringMapEntryBase *) + sizeof(unsigned))); - - unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1); - NewTableArray[NewSize] = (StringMapEntryBase *)2; + auto **NewTableArray = createTable(NewSize); + unsigned *NewHashArray = getHashTable(NewTableArray, NewSize); + unsigned *HashTable = getHashTable(TheTable, NumBuckets); // Rehash all the items into their new buckets. Luckily :) we already have // the hash values available, so we don't have to rehash any strings. for (unsigned I = 0, E = NumBuckets; I != E; ++I) { StringMapEntryBase *Bucket = TheTable[I]; if (Bucket && Bucket != getTombstoneVal()) { - // Fast case, bucket available. + // If the bucket is not available, probe for a spot. unsigned FullHash = HashTable[I]; unsigned NewBucket = FullHash & (NewSize - 1); - if (!NewTableArray[NewBucket]) { - NewTableArray[FullHash & (NewSize - 1)] = Bucket; - NewHashArray[FullHash & (NewSize - 1)] = FullHash; - if (I == BucketNo) - NewBucketNo = NewBucket; - continue; + if (NewTableArray[NewBucket]) { + unsigned ProbeSize = 1; + do { + NewBucket = (NewBucket + ProbeSize++) & (NewSize - 1); + } while (NewTableArray[NewBucket]); } - // Otherwise probe for a spot. - unsigned ProbeSize = 1; - do { - NewBucket = (NewBucket + ProbeSize++) & (NewSize - 1); - } while (NewTableArray[NewBucket]); - // Finally found a slot. Fill it in. NewTableArray[NewBucket] = Bucket; NewHashArray[NewBucket] = FullHash; diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp index 3ed08ed38661..096b2d2d8c07 100644 --- a/llvm/lib/Support/StringRef.cpp +++ b/llvm/lib/Support/StringRef.cpp @@ -98,6 +98,13 @@ unsigned StringRef::edit_distance(llvm::StringRef Other, AllowReplacements, MaxEditDistance); } +unsigned llvm::StringRef::edit_distance_insensitive( + StringRef Other, bool AllowReplacements, unsigned MaxEditDistance) const { + return llvm::ComputeMappedEditDistance( + makeArrayRef(data(), size()), makeArrayRef(Other.data(), Other.size()), + llvm::toLower, AllowReplacements, MaxEditDistance); +} + //===----------------------------------------------------------------------===// // String Operations //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp index 0105cd2e8153..e5590d458fed 100644 --- a/llvm/lib/Support/TargetParser.cpp +++ b/llvm/lib/Support/TargetParser.cpp @@ -104,6 +104,7 @@ constexpr GPUInfo AMDGCNGPUs[] = { {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, {{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, {{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK}, {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK}, {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK}, @@ -114,6 +115,11 @@ constexpr GPUInfo AMDGCNGPUs[] = { {{"gfx1033"}, {"gfx1033"}, GK_GFX1033, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, {{"gfx1034"}, {"gfx1034"}, GK_GFX1034, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, {{"gfx1035"}, {"gfx1035"}, GK_GFX1035, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, + {{"gfx1036"}, {"gfx1036"}, GK_GFX1036, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, + {{"gfx1100"}, {"gfx1100"}, GK_GFX1100, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, + {{"gfx1101"}, {"gfx1101"}, GK_GFX1101, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, + {{"gfx1102"}, {"gfx1102"}, GK_GFX1102, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, + {{"gfx1103"}, {"gfx1103"}, GK_GFX1103, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, }; const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef Table) { @@ -217,6 +223,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { case GK_GFX909: return {9, 0, 9}; case GK_GFX90A: return {9, 0, 10}; case GK_GFX90C: return {9, 0, 12}; + case GK_GFX940: return {9, 4, 0}; case GK_GFX1010: return {10, 1, 0}; case GK_GFX1011: return {10, 1, 1}; case GK_GFX1012: return {10, 1, 2}; @@ -227,6 +234,11 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { case GK_GFX1033: return {10, 3, 3}; case GK_GFX1034: return {10, 3, 4}; case GK_GFX1035: return {10, 3, 5}; + case GK_GFX1036: return {10, 3, 6}; + case GK_GFX1100: return {11, 0, 0}; + case GK_GFX1101: return {11, 0, 1}; + case GK_GFX1102: return {11, 0, 2}; + case GK_GFX1103: return {11, 0, 3}; default: return {0, 0, 0}; } } @@ -329,21 +341,6 @@ bool getCPUFeaturesExceptStdExt(CPUKind Kind, return true; } -StringRef computeDefaultABIFromArch(const llvm::RISCVISAInfo &ISAInfo) { - if (ISAInfo.getXLen() == 32) { - if (ISAInfo.hasExtension("d")) - return "ilp32d"; - if (ISAInfo.hasExtension("e")) - return "ilp32e"; - return "ilp32"; - } else if (ISAInfo.getXLen() == 64) { - if (ISAInfo.hasExtension("d")) - return "lp64d"; - return "lp64"; - } - llvm_unreachable("Invalid XLEN"); -} - } // namespace RISCV } // namespace llvm diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp index 9f92ae1c7a7c..31461e31c65c 100644 --- a/llvm/lib/Support/ThreadPool.cpp +++ b/llvm/lib/Support/ThreadPool.cpp @@ -24,11 +24,19 @@ using namespace llvm; #if LLVM_ENABLE_THREADS +// A note on thread groups: Tasks are by default in no group (represented +// by nullptr ThreadPoolTaskGroup pointer in the Tasks queue) and functionality +// here normally works on all tasks regardless of their group (functions +// in that case receive nullptr ThreadPoolTaskGroup pointer as argument). +// A task in a group has a pointer to that ThreadPoolTaskGroup in the Tasks +// queue, and functions called to work only on tasks from one group take that +// pointer. + ThreadPool::ThreadPool(ThreadPoolStrategy S) : Strategy(S), MaxThreadCount(S.compute_thread_count()) {} void ThreadPool::grow(int requested) { - std::unique_lock LockGuard(ThreadsLock); + llvm::sys::ScopedWriter LockGuard(ThreadsLock); if (Threads.size() >= MaxThreadCount) return; // Already hit the max thread pool size. int newThreadCount = std::min(requested, MaxThreadCount); @@ -36,52 +44,129 @@ void ThreadPool::grow(int requested) { int ThreadID = Threads.size(); Threads.emplace_back([this, ThreadID] { Strategy.apply_thread_strategy(ThreadID); - while (true) { - std::function Task; - { - std::unique_lock LockGuard(QueueLock); - // Wait for tasks to be pushed in the queue - QueueCondition.wait(LockGuard, - [&] { return !EnableFlag || !Tasks.empty(); }); - // Exit condition - if (!EnableFlag && Tasks.empty()) - return; - // Yeah, we have a task, grab it and release the lock on the queue - - // We first need to signal that we are active before popping the queue - // in order for wait() to properly detect that even if the queue is - // empty, there is still a task in flight. - ++ActiveThreads; - Task = std::move(Tasks.front()); - Tasks.pop(); - } - // Run the task we just grabbed - Task(); - - bool Notify; - { - // Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait() - std::lock_guard LockGuard(QueueLock); - --ActiveThreads; - Notify = workCompletedUnlocked(); - } - // Notify task completion if this is the last active thread, in case - // someone waits on ThreadPool::wait(). - if (Notify) - CompletionCondition.notify_all(); - } + processTasks(nullptr); }); } } +#ifndef NDEBUG +// The group of the tasks run by the current thread. +static LLVM_THREAD_LOCAL std::vector + *CurrentThreadTaskGroups = nullptr; +#endif + +// WaitingForGroup == nullptr means all tasks regardless of their group. +void ThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) { + while (true) { + std::function Task; + ThreadPoolTaskGroup *GroupOfTask; + { + std::unique_lock LockGuard(QueueLock); + bool workCompletedForGroup = false; // Result of workCompletedUnlocked() + // Wait for tasks to be pushed in the queue + QueueCondition.wait(LockGuard, [&] { + return !EnableFlag || !Tasks.empty() || + (WaitingForGroup != nullptr && + (workCompletedForGroup = + workCompletedUnlocked(WaitingForGroup))); + }); + // Exit condition + if (!EnableFlag && Tasks.empty()) + return; + if (WaitingForGroup != nullptr && workCompletedForGroup) + return; + // Yeah, we have a task, grab it and release the lock on the queue + + // We first need to signal that we are active before popping the queue + // in order for wait() to properly detect that even if the queue is + // empty, there is still a task in flight. + ++ActiveThreads; + Task = std::move(Tasks.front().first); + GroupOfTask = Tasks.front().second; + // Need to count active threads in each group separately, ActiveThreads + // would never be 0 if waiting for another group inside a wait. + if (GroupOfTask != nullptr) + ++ActiveGroups[GroupOfTask]; // Increment or set to 1 if new item + Tasks.pop_front(); + } +#ifndef NDEBUG + if (CurrentThreadTaskGroups == nullptr) + CurrentThreadTaskGroups = new std::vector; + CurrentThreadTaskGroups->push_back(GroupOfTask); +#endif + + // Run the task we just grabbed + Task(); + +#ifndef NDEBUG + CurrentThreadTaskGroups->pop_back(); + if (CurrentThreadTaskGroups->empty()) { + delete CurrentThreadTaskGroups; + CurrentThreadTaskGroups = nullptr; + } +#endif + + bool Notify; + bool NotifyGroup; + { + // Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait() + std::lock_guard LockGuard(QueueLock); + --ActiveThreads; + if (GroupOfTask != nullptr) { + auto A = ActiveGroups.find(GroupOfTask); + if (--(A->second) == 0) + ActiveGroups.erase(A); + } + Notify = workCompletedUnlocked(GroupOfTask); + NotifyGroup = GroupOfTask != nullptr && Notify; + } + // Notify task completion if this is the last active thread, in case + // someone waits on ThreadPool::wait(). + if (Notify) + CompletionCondition.notify_all(); + // If this was a task in a group, notify also threads waiting for tasks + // in this function on QueueCondition, to make a recursive wait() return + // after the group it's been waiting for has finished. + if (NotifyGroup) + QueueCondition.notify_all(); + } +} + +bool ThreadPool::workCompletedUnlocked(ThreadPoolTaskGroup *Group) const { + if (Group == nullptr) + return !ActiveThreads && Tasks.empty(); + return ActiveGroups.count(Group) == 0 && + !llvm::any_of(Tasks, + [Group](const auto &T) { return T.second == Group; }); +} + void ThreadPool::wait() { + assert(!isWorkerThread()); // Would deadlock waiting for itself. // Wait for all threads to complete and the queue to be empty std::unique_lock LockGuard(QueueLock); - CompletionCondition.wait(LockGuard, [&] { return workCompletedUnlocked(); }); + CompletionCondition.wait(LockGuard, + [&] { return workCompletedUnlocked(nullptr); }); +} + +void ThreadPool::wait(ThreadPoolTaskGroup &Group) { + // Wait for all threads in the group to complete. + if (!isWorkerThread()) { + std::unique_lock LockGuard(QueueLock); + CompletionCondition.wait(LockGuard, + [&] { return workCompletedUnlocked(&Group); }); + return; + } + // Make sure to not deadlock waiting for oneself. + assert(CurrentThreadTaskGroups == nullptr || + !llvm::is_contained(*CurrentThreadTaskGroups, &Group)); + // Handle the case of recursive call from another task in a different group, + // in which case process tasks while waiting to keep the thread busy and avoid + // possible deadlock. + processTasks(&Group); } bool ThreadPool::isWorkerThread() const { - std::unique_lock LockGuard(ThreadsLock); + llvm::sys::ScopedReader LockGuard(ThreadsLock); llvm::thread::id CurrentThreadId = llvm::this_thread::get_id(); for (const llvm::thread &Thread : Threads) if (CurrentThreadId == Thread.get_id()) @@ -96,7 +181,7 @@ ThreadPool::~ThreadPool() { EnableFlag = false; } QueueCondition.notify_all(); - std::unique_lock LockGuard(ThreadsLock); + llvm::sys::ScopedReader LockGuard(ThreadsLock); for (auto &Worker : Threads) Worker.join(); } @@ -115,12 +200,18 @@ ThreadPool::ThreadPool(ThreadPoolStrategy S) : MaxThreadCount(1) { void ThreadPool::wait() { // Sequential implementation running the tasks while (!Tasks.empty()) { - auto Task = std::move(Tasks.front()); - Tasks.pop(); + auto Task = std::move(Tasks.front().first); + Tasks.pop_front(); Task(); } } +void ThreadPool::wait(ThreadPoolTaskGroup &) { + // Simply wait for all, this works even if recursive (the running task + // is already removed from the queue). + wait(); +} + bool ThreadPool::isWorkerThread() const { report_fatal_error("LLVM compiled without multithreading"); } diff --git a/llvm/lib/Support/TrigramIndex.cpp b/llvm/lib/Support/TrigramIndex.cpp index 4370adc9c3e0..40a20ccc6583 100644 --- a/llvm/lib/Support/TrigramIndex.cpp +++ b/llvm/lib/Support/TrigramIndex.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/TrigramIndex.h" +#include "llvm/ADT/StringRef.h" #include using namespace llvm; diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp index a9afcc9db96a..6696d158b2c1 100644 --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -37,6 +37,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) { case bpfeb: return "bpfeb"; case bpfel: return "bpfel"; case csky: return "csky"; + case dxil: return "dxil"; case hexagon: return "hexagon"; case hsail64: return "hsail64"; case hsail: return "hsail"; @@ -44,6 +45,8 @@ StringRef Triple::getArchTypeName(ArchType Kind) { case lanai: return "lanai"; case le32: return "le32"; case le64: return "le64"; + case loongarch32: return "loongarch32"; + case loongarch64: return "loongarch64"; case m68k: return "m68k"; case mips64: return "mips64"; case mips64el: return "mips64el"; @@ -164,6 +167,11 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) { case ve: return "ve"; case csky: return "csky"; + + case loongarch32: + case loongarch64: return "loongarch"; + + case dxil: return "dx"; } } @@ -203,6 +211,7 @@ StringRef Triple::getOSTypeName(OSType Kind) { case Contiki: return "contiki"; case Darwin: return "darwin"; case DragonFly: return "dragonfly"; + case DriverKit: return "driverkit"; case ELFIAMCU: return "elfiamcu"; case Emscripten: return "emscripten"; case FreeBSD: return "freebsd"; @@ -222,6 +231,7 @@ StringRef Triple::getOSTypeName(OSType Kind) { case NetBSD: return "netbsd"; case OpenBSD: return "openbsd"; case PS4: return "ps4"; + case PS5: return "ps5"; case RTEMS: return "rtems"; case Solaris: return "solaris"; case TvOS: return "tvos"; @@ -229,6 +239,7 @@ StringRef Triple::getOSTypeName(OSType Kind) { case WatchOS: return "watchos"; case Win32: return "windows"; case ZOS: return "zos"; + case ShaderModel: return "shadermodel"; } llvm_unreachable("Invalid OSType"); @@ -258,6 +269,21 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) { case MuslEABIHF: return "musleabihf"; case MuslX32: return "muslx32"; case Simulator: return "simulator"; + case Pixel: return "pixel"; + case Vertex: return "vertex"; + case Geometry: return "geometry"; + case Hull: return "hull"; + case Domain: return "domain"; + case Compute: return "compute"; + case Library: return "library"; + case RayGeneration: return "raygeneration"; + case Intersection: return "intersection"; + case AnyHit: return "anyhit"; + case ClosestHit: return "closesthit"; + case Miss: return "miss"; + case Callable: return "callable"; + case Mesh: return "mesh"; + case Amplification: return "amplification"; } llvm_unreachable("Invalid EnvironmentType!"); @@ -311,12 +337,14 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("sparc", sparc) .Case("sparcel", sparcel) .Case("sparcv9", sparcv9) + .Case("s390x", systemz) .Case("systemz", systemz) .Case("tce", tce) .Case("tcele", tcele) .Case("thumb", thumb) .Case("thumbeb", thumbeb) .Case("x86", x86) + .Case("i386", x86) .Case("x86-64", x86_64) .Case("xcore", xcore) .Case("nvptx", nvptx) @@ -340,6 +368,9 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("renderscript64", renderscript64) .Case("ve", ve) .Case("csky", csky) + .Case("loongarch32", loongarch32) + .Case("loongarch64", loongarch64) + .Case("dxil", dxil) .Default(UnknownArch); } @@ -464,8 +495,10 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("hsail64", Triple::hsail64) .Case("spir", Triple::spir) .Case("spir64", Triple::spir64) - .Case("spirv32", Triple::spirv32) - .Case("spirv64", Triple::spirv64) + .Cases("spirv32", "spirv32v1.0", "spirv32v1.1", "spirv32v1.2", + "spirv32v1.3", "spirv32v1.4", "spirv32v1.5", Triple::spirv32) + .Cases("spirv64", "spirv64v1.0", "spirv64v1.1", "spirv64v1.2", + "spirv64v1.3", "spirv64v1.4", "spirv64v1.5", Triple::spirv64) .StartsWith("kalimba", Triple::kalimba) .Case("lanai", Triple::lanai) .Case("renderscript32", Triple::renderscript32) @@ -475,6 +508,9 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("wasm32", Triple::wasm32) .Case("wasm64", Triple::wasm64) .Case("csky", Triple::csky) + .Case("loongarch32", Triple::loongarch32) + .Case("loongarch64", Triple::loongarch64) + .Case("dxil", Triple::dxil) .Default(Triple::UnknownArch); // Some architectures require special parsing logic just to compute the @@ -538,9 +574,11 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("nvcl", Triple::NVCL) .StartsWith("amdhsa", Triple::AMDHSA) .StartsWith("ps4", Triple::PS4) + .StartsWith("ps5", Triple::PS5) .StartsWith("elfiamcu", Triple::ELFIAMCU) .StartsWith("tvos", Triple::TvOS) .StartsWith("watchos", Triple::WatchOS) + .StartsWith("driverkit", Triple::DriverKit) .StartsWith("mesa3d", Triple::Mesa3D) .StartsWith("contiki", Triple::Contiki) .StartsWith("amdpal", Triple::AMDPAL) @@ -548,6 +586,7 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("hurd", Triple::Hurd) .StartsWith("wasi", Triple::WASI) .StartsWith("emscripten", Triple::Emscripten) + .StartsWith("shadermodel", Triple::ShaderModel) .Default(Triple::UnknownOS); } @@ -574,20 +613,36 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { .StartsWith("coreclr", Triple::CoreCLR) .StartsWith("simulator", Triple::Simulator) .StartsWith("macabi", Triple::MacABI) + .StartsWith("pixel", Triple::Pixel) + .StartsWith("vertex", Triple::Vertex) + .StartsWith("geometry", Triple::Geometry) + .StartsWith("hull", Triple::Hull) + .StartsWith("domain", Triple::Domain) + .StartsWith("compute", Triple::Compute) + .StartsWith("library", Triple::Library) + .StartsWith("raygeneration", Triple::RayGeneration) + .StartsWith("intersection", Triple::Intersection) + .StartsWith("anyhit", Triple::AnyHit) + .StartsWith("closesthit", Triple::ClosestHit) + .StartsWith("miss", Triple::Miss) + .StartsWith("callable", Triple::Callable) + .StartsWith("mesh", Triple::Mesh) + .StartsWith("amplification", Triple::Amplification) .Default(Triple::UnknownEnvironment); } static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) { return StringSwitch(EnvironmentName) - // "xcoff" must come before "coff" because of the order-dependendent - // pattern matching. - .EndsWith("xcoff", Triple::XCOFF) - .EndsWith("coff", Triple::COFF) - .EndsWith("elf", Triple::ELF) - .EndsWith("goff", Triple::GOFF) - .EndsWith("macho", Triple::MachO) - .EndsWith("wasm", Triple::Wasm) - .Default(Triple::UnknownObjectFormat); + // "xcoff" must come before "coff" because of the order-dependendent + // pattern matching. + .EndsWith("xcoff", Triple::XCOFF) + .EndsWith("coff", Triple::COFF) + .EndsWith("elf", Triple::ELF) + .EndsWith("goff", Triple::GOFF) + .EndsWith("macho", Triple::MachO) + .EndsWith("wasm", Triple::Wasm) + .EndsWith("spirv", Triple::SPIRV) + .Default(Triple::UnknownObjectFormat); } static Triple::SubArchType parseSubArch(StringRef SubArchName) { @@ -601,6 +656,16 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { if (SubArchName == "arm64e") return Triple::AArch64SubArch_arm64e; + if (SubArchName.startswith("spirv")) + return StringSwitch(SubArchName) + .EndsWith("v1.0", Triple::SPIRVSubArch_v10) + .EndsWith("v1.1", Triple::SPIRVSubArch_v11) + .EndsWith("v1.2", Triple::SPIRVSubArch_v12) + .EndsWith("v1.3", Triple::SPIRVSubArch_v13) + .EndsWith("v1.4", Triple::SPIRVSubArch_v14) + .EndsWith("v1.5", Triple::SPIRVSubArch_v15) + .Default(Triple::NoSubArch); + StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName); // For now, this is the small part. Early return. @@ -688,13 +753,24 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { static StringRef getObjectFormatTypeName(Triple::ObjectFormatType Kind) { switch (Kind) { - case Triple::UnknownObjectFormat: return ""; - case Triple::COFF: return "coff"; - case Triple::ELF: return "elf"; - case Triple::GOFF: return "goff"; - case Triple::MachO: return "macho"; - case Triple::Wasm: return "wasm"; - case Triple::XCOFF: return "xcoff"; + case Triple::UnknownObjectFormat: + return ""; + case Triple::COFF: + return "coff"; + case Triple::ELF: + return "elf"; + case Triple::GOFF: + return "goff"; + case Triple::MachO: + return "macho"; + case Triple::Wasm: + return "wasm"; + case Triple::XCOFF: + return "xcoff"; + case Triple::DXContainer: + return "dxcontainer"; + case Triple::SPIRV: + return "spirv"; } llvm_unreachable("unknown object format type"); } @@ -731,6 +807,8 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { case Triple::lanai: case Triple::le32: case Triple::le64: + case Triple::loongarch32: + case Triple::loongarch64: case Triple::m68k: case Triple::mips64: case Triple::mips64el: @@ -776,8 +854,10 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { case Triple::spirv32: case Triple::spirv64: - // TODO: In future this will be Triple::SPIRV. - return Triple::UnknownObjectFormat; + return Triple::SPIRV; + + case Triple::dxil: + return Triple::DXContainer; } llvm_unreachable("unknown architecture"); } @@ -1158,6 +1238,8 @@ bool Triple::getMacOSXVersion(VersionTuple &Version) const { // IOS. Version = VersionTuple(10, 4); break; + case DriverKit: + llvm_unreachable("OSX version isn't relevant for DriverKit"); } return true; } @@ -1182,6 +1264,8 @@ VersionTuple Triple::getiOSVersion() const { } case WatchOS: llvm_unreachable("conflicting triple info"); + case DriverKit: + llvm_unreachable("DriverKit doesn't have an iOS version"); } } @@ -1203,6 +1287,20 @@ VersionTuple Triple::getWatchOSVersion() const { } case IOS: llvm_unreachable("conflicting triple info"); + case DriverKit: + llvm_unreachable("DriverKit doesn't have a WatchOS version"); + } +} + +VersionTuple Triple::getDriverKitVersion() const { + switch (getOS()) { + default: + llvm_unreachable("unexpected OS for Darwin triple"); + case DriverKit: + VersionTuple Version = getOSVersion(); + if (Version.getMajor() == 0) + return Version.withMajorReplaced(19); + return Version; } } @@ -1285,11 +1383,13 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::csky: + case llvm::Triple::dxil: case llvm::Triple::hexagon: case llvm::Triple::hsail: case llvm::Triple::kalimba: case llvm::Triple::lanai: case llvm::Triple::le32: + case llvm::Triple::loongarch32: case llvm::Triple::m68k: case llvm::Triple::mips: case llvm::Triple::mipsel: @@ -1321,6 +1421,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::bpfel: case llvm::Triple::hsail64: case llvm::Triple::le64: + case llvm::Triple::loongarch64: case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::nvptx64: @@ -1372,11 +1473,13 @@ Triple Triple::get32BitArchVariant() const { case Triple::arm: case Triple::armeb: case Triple::csky: + case Triple::dxil: case Triple::hexagon: case Triple::hsail: case Triple::kalimba: case Triple::lanai: case Triple::le32: + case Triple::loongarch32: case Triple::m68k: case Triple::mips: case Triple::mipsel: @@ -1406,6 +1509,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::amdil64: T.setArch(Triple::amdil); break; case Triple::hsail64: T.setArch(Triple::hsail); break; case Triple::le64: T.setArch(Triple::le32); break; + case Triple::loongarch64: T.setArch(Triple::loongarch32); break; case Triple::mips64: T.setArch(Triple::mips, getSubArch()); break; @@ -1419,7 +1523,9 @@ Triple Triple::get32BitArchVariant() const { case Triple::riscv64: T.setArch(Triple::riscv32); break; case Triple::sparcv9: T.setArch(Triple::sparc); break; case Triple::spir64: T.setArch(Triple::spir); break; - case Triple::spirv64: T.setArch(Triple::spirv32); break; + case Triple::spirv64: + T.setArch(Triple::spirv32, getSubArch()); + break; case Triple::wasm64: T.setArch(Triple::wasm32); break; case Triple::x86_64: T.setArch(Triple::x86); break; } @@ -1433,6 +1539,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::arc: case Triple::avr: case Triple::csky: + case Triple::dxil: case Triple::hexagon: case Triple::kalimba: case Triple::lanai: @@ -1455,6 +1562,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::bpfel: case Triple::hsail64: case Triple::le64: + case Triple::loongarch64: case Triple::mips64: case Triple::mips64el: case Triple::nvptx64: @@ -1478,6 +1586,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::armeb: T.setArch(Triple::aarch64_be); break; case Triple::hsail: T.setArch(Triple::hsail64); break; case Triple::le32: T.setArch(Triple::le64); break; + case Triple::loongarch32: T.setArch(Triple::loongarch64); break; case Triple::mips: T.setArch(Triple::mips64, getSubArch()); break; @@ -1491,7 +1600,9 @@ Triple Triple::get64BitArchVariant() const { case Triple::riscv32: T.setArch(Triple::riscv64); break; case Triple::sparc: T.setArch(Triple::sparcv9); break; case Triple::spir: T.setArch(Triple::spir64); break; - case Triple::spirv32: T.setArch(Triple::spirv64); break; + case Triple::spirv32: + T.setArch(Triple::spirv64, getSubArch()); + break; case Triple::thumb: T.setArch(Triple::aarch64); break; case Triple::thumbeb: T.setArch(Triple::aarch64_be); break; case Triple::wasm32: T.setArch(Triple::wasm64); break; @@ -1511,12 +1622,15 @@ Triple Triple::getBigEndianArchVariant() const { case Triple::amdil64: case Triple::amdil: case Triple::avr: + case Triple::dxil: case Triple::hexagon: case Triple::hsail64: case Triple::hsail: case Triple::kalimba: case Triple::le32: case Triple::le64: + case Triple::loongarch32: + case Triple::loongarch64: case Triple::msp430: case Triple::nvptx64: case Triple::nvptx: @@ -1611,12 +1725,15 @@ bool Triple::isLittleEndian() const { case Triple::avr: case Triple::bpfel: case Triple::csky: + case Triple::dxil: case Triple::hexagon: case Triple::hsail64: case Triple::hsail: case Triple::kalimba: case Triple::le32: case Triple::le64: + case Triple::loongarch32: + case Triple::loongarch64: case Triple::mips64el: case Triple::mipsel: case Triple::msp430: @@ -1725,6 +1842,8 @@ VersionTuple Triple::getMinimumSupportedOSVersion() const { if (isSimulatorEnvironment()) return VersionTuple(7, 0, 0); break; + case Triple::DriverKit: + return VersionTuple(20, 0, 0); default: break; } @@ -1755,6 +1874,7 @@ StringRef Triple::getARMCPUForArch(StringRef MArch) const { case llvm::Triple::MacOSX: case llvm::Triple::TvOS: case llvm::Triple::WatchOS: + case llvm::Triple::DriverKit: if (MArch == "v7k") return "cortex-a7"; break; @@ -1811,3 +1931,33 @@ VersionTuple Triple::getCanonicalVersionForOS(OSType OSKind, return Version; } } + +// HLSL triple environment orders are relied on in the front end +static_assert(Triple::Vertex - Triple::Pixel == 1, + "incorrect HLSL stage order"); +static_assert(Triple::Geometry - Triple::Pixel == 2, + "incorrect HLSL stage order"); +static_assert(Triple::Hull - Triple::Pixel == 3, + "incorrect HLSL stage order"); +static_assert(Triple::Domain - Triple::Pixel == 4, + "incorrect HLSL stage order"); +static_assert(Triple::Compute - Triple::Pixel == 5, + "incorrect HLSL stage order"); +static_assert(Triple::Library - Triple::Pixel == 6, + "incorrect HLSL stage order"); +static_assert(Triple::RayGeneration - Triple::Pixel == 7, + "incorrect HLSL stage order"); +static_assert(Triple::Intersection - Triple::Pixel == 8, + "incorrect HLSL stage order"); +static_assert(Triple::AnyHit - Triple::Pixel == 9, + "incorrect HLSL stage order"); +static_assert(Triple::ClosestHit - Triple::Pixel == 10, + "incorrect HLSL stage order"); +static_assert(Triple::Miss - Triple::Pixel == 11, + "incorrect HLSL stage order"); +static_assert(Triple::Callable - Triple::Pixel == 12, + "incorrect HLSL stage order"); +static_assert(Triple::Mesh - Triple::Pixel == 13, + "incorrect HLSL stage order"); +static_assert(Triple::Amplification - Triple::Pixel == 14, + "incorrect HLSL stage order"); diff --git a/llvm/lib/Support/TypeSize.cpp b/llvm/lib/Support/TypeSize.cpp index a80fde83e3bc..8bed9b29cba5 100644 --- a/llvm/lib/Support/TypeSize.cpp +++ b/llvm/lib/Support/TypeSize.cpp @@ -21,11 +21,10 @@ struct CreateScalableErrorAsWarning { /// using the wrong interface on a scalable vector. static void *call() { return new cl::opt( - "treat-scalable-fixed-error-as-warning", cl::Hidden, cl::init(false), + "treat-scalable-fixed-error-as-warning", cl::Hidden, cl::desc( "Treat issues where a fixed-width property is requested from a " - "scalable type as a warning, instead of an error."), - cl::ZeroOrMore); + "scalable type as a warning, instead of an error")); } }; } // namespace diff --git a/llvm/lib/Support/Unicode.cpp b/llvm/lib/Support/Unicode.cpp index bb6e75555b4c..103710303094 100644 --- a/llvm/lib/Support/Unicode.cpp +++ b/llvm/lib/Support/Unicode.cpp @@ -19,197 +19,271 @@ namespace llvm { namespace sys { namespace unicode { +/// Unicode code points of the categories L, M, N, P, S and Zs are considered +/// printable. +/// In addition, U+00AD SOFT HYPHEN is also considered printable, as +/// it's actually displayed on most terminals. \return true if the character is +/// considered printable. bool isPrintable(int UCS) { - // Sorted list of non-overlapping intervals of code points that are not - // supposed to be printable. - static const UnicodeCharRange NonPrintableRanges[] = { - { 0x0000, 0x001F }, { 0x007F, 0x009F }, { 0x034F, 0x034F }, - { 0x0378, 0x0379 }, { 0x037F, 0x0383 }, { 0x038B, 0x038B }, - { 0x038D, 0x038D }, { 0x03A2, 0x03A2 }, { 0x0528, 0x0530 }, - { 0x0557, 0x0558 }, { 0x0560, 0x0560 }, { 0x0588, 0x0588 }, - { 0x058B, 0x058E }, { 0x0590, 0x0590 }, { 0x05C8, 0x05CF }, - { 0x05EB, 0x05EF }, { 0x05F5, 0x0605 }, { 0x061C, 0x061D }, - { 0x06DD, 0x06DD }, { 0x070E, 0x070F }, { 0x074B, 0x074C }, - { 0x07B2, 0x07BF }, { 0x07FB, 0x07FF }, { 0x082E, 0x082F }, - { 0x083F, 0x083F }, { 0x085C, 0x085D }, { 0x085F, 0x089F }, - { 0x08A1, 0x08A1 }, { 0x08AD, 0x08E3 }, { 0x08FF, 0x08FF }, - { 0x0978, 0x0978 }, { 0x0980, 0x0980 }, { 0x0984, 0x0984 }, - { 0x098D, 0x098E }, { 0x0991, 0x0992 }, { 0x09A9, 0x09A9 }, - { 0x09B1, 0x09B1 }, { 0x09B3, 0x09B5 }, { 0x09BA, 0x09BB }, - { 0x09C5, 0x09C6 }, { 0x09C9, 0x09CA }, { 0x09CF, 0x09D6 }, - { 0x09D8, 0x09DB }, { 0x09DE, 0x09DE }, { 0x09E4, 0x09E5 }, - { 0x09FC, 0x0A00 }, { 0x0A04, 0x0A04 }, { 0x0A0B, 0x0A0E }, - { 0x0A11, 0x0A12 }, { 0x0A29, 0x0A29 }, { 0x0A31, 0x0A31 }, - { 0x0A34, 0x0A34 }, { 0x0A37, 0x0A37 }, { 0x0A3A, 0x0A3B }, - { 0x0A3D, 0x0A3D }, { 0x0A43, 0x0A46 }, { 0x0A49, 0x0A4A }, - { 0x0A4E, 0x0A50 }, { 0x0A52, 0x0A58 }, { 0x0A5D, 0x0A5D }, - { 0x0A5F, 0x0A65 }, { 0x0A76, 0x0A80 }, { 0x0A84, 0x0A84 }, - { 0x0A8E, 0x0A8E }, { 0x0A92, 0x0A92 }, { 0x0AA9, 0x0AA9 }, - { 0x0AB1, 0x0AB1 }, { 0x0AB4, 0x0AB4 }, { 0x0ABA, 0x0ABB }, - { 0x0AC6, 0x0AC6 }, { 0x0ACA, 0x0ACA }, { 0x0ACE, 0x0ACF }, - { 0x0AD1, 0x0ADF }, { 0x0AE4, 0x0AE5 }, { 0x0AF2, 0x0B00 }, - { 0x0B04, 0x0B04 }, { 0x0B0D, 0x0B0E }, { 0x0B11, 0x0B12 }, - { 0x0B29, 0x0B29 }, { 0x0B31, 0x0B31 }, { 0x0B34, 0x0B34 }, - { 0x0B3A, 0x0B3B }, { 0x0B45, 0x0B46 }, { 0x0B49, 0x0B4A }, - { 0x0B4E, 0x0B55 }, { 0x0B58, 0x0B5B }, { 0x0B5E, 0x0B5E }, - { 0x0B64, 0x0B65 }, { 0x0B78, 0x0B81 }, { 0x0B84, 0x0B84 }, - { 0x0B8B, 0x0B8D }, { 0x0B91, 0x0B91 }, { 0x0B96, 0x0B98 }, - { 0x0B9B, 0x0B9B }, { 0x0B9D, 0x0B9D }, { 0x0BA0, 0x0BA2 }, - { 0x0BA5, 0x0BA7 }, { 0x0BAB, 0x0BAD }, { 0x0BBA, 0x0BBD }, - { 0x0BC3, 0x0BC5 }, { 0x0BC9, 0x0BC9 }, { 0x0BCE, 0x0BCF }, - { 0x0BD1, 0x0BD6 }, { 0x0BD8, 0x0BE5 }, { 0x0BFB, 0x0C00 }, - { 0x0C04, 0x0C04 }, { 0x0C0D, 0x0C0D }, { 0x0C11, 0x0C11 }, - { 0x0C29, 0x0C29 }, { 0x0C34, 0x0C34 }, { 0x0C3A, 0x0C3C }, - { 0x0C45, 0x0C45 }, { 0x0C49, 0x0C49 }, { 0x0C4E, 0x0C54 }, - { 0x0C57, 0x0C57 }, { 0x0C5A, 0x0C5F }, { 0x0C64, 0x0C65 }, - { 0x0C70, 0x0C77 }, { 0x0C80, 0x0C81 }, { 0x0C84, 0x0C84 }, - { 0x0C8D, 0x0C8D }, { 0x0C91, 0x0C91 }, { 0x0CA9, 0x0CA9 }, - { 0x0CB4, 0x0CB4 }, { 0x0CBA, 0x0CBB }, { 0x0CC5, 0x0CC5 }, - { 0x0CC9, 0x0CC9 }, { 0x0CCE, 0x0CD4 }, { 0x0CD7, 0x0CDD }, - { 0x0CDF, 0x0CDF }, { 0x0CE4, 0x0CE5 }, { 0x0CF0, 0x0CF0 }, - { 0x0CF3, 0x0D01 }, { 0x0D04, 0x0D04 }, { 0x0D0D, 0x0D0D }, - { 0x0D11, 0x0D11 }, { 0x0D3B, 0x0D3C }, { 0x0D45, 0x0D45 }, - { 0x0D49, 0x0D49 }, { 0x0D4F, 0x0D56 }, { 0x0D58, 0x0D5F }, - { 0x0D64, 0x0D65 }, { 0x0D76, 0x0D78 }, { 0x0D80, 0x0D81 }, - { 0x0D84, 0x0D84 }, { 0x0D97, 0x0D99 }, { 0x0DB2, 0x0DB2 }, - { 0x0DBC, 0x0DBC }, { 0x0DBE, 0x0DBF }, { 0x0DC7, 0x0DC9 }, - { 0x0DCB, 0x0DCE }, { 0x0DD5, 0x0DD5 }, { 0x0DD7, 0x0DD7 }, - { 0x0DE0, 0x0DF1 }, { 0x0DF5, 0x0E00 }, { 0x0E3B, 0x0E3E }, - { 0x0E5C, 0x0E80 }, { 0x0E83, 0x0E83 }, { 0x0E85, 0x0E86 }, - { 0x0E89, 0x0E89 }, { 0x0E8B, 0x0E8C }, { 0x0E8E, 0x0E93 }, - { 0x0E98, 0x0E98 }, { 0x0EA0, 0x0EA0 }, { 0x0EA4, 0x0EA4 }, - { 0x0EA6, 0x0EA6 }, { 0x0EA8, 0x0EA9 }, { 0x0EAC, 0x0EAC }, - { 0x0EBA, 0x0EBA }, { 0x0EBE, 0x0EBF }, { 0x0EC5, 0x0EC5 }, - { 0x0EC7, 0x0EC7 }, { 0x0ECE, 0x0ECF }, { 0x0EDA, 0x0EDB }, - { 0x0EE0, 0x0EFF }, { 0x0F48, 0x0F48 }, { 0x0F6D, 0x0F70 }, - { 0x0F98, 0x0F98 }, { 0x0FBD, 0x0FBD }, { 0x0FCD, 0x0FCD }, - { 0x0FDB, 0x0FFF }, { 0x10C6, 0x10C6 }, { 0x10C8, 0x10CC }, - { 0x10CE, 0x10CF }, { 0x115F, 0x1160 }, { 0x1249, 0x1249 }, - { 0x124E, 0x124F }, { 0x1257, 0x1257 }, { 0x1259, 0x1259 }, - { 0x125E, 0x125F }, { 0x1289, 0x1289 }, { 0x128E, 0x128F }, - { 0x12B1, 0x12B1 }, { 0x12B6, 0x12B7 }, { 0x12BF, 0x12BF }, - { 0x12C1, 0x12C1 }, { 0x12C6, 0x12C7 }, { 0x12D7, 0x12D7 }, - { 0x1311, 0x1311 }, { 0x1316, 0x1317 }, { 0x135B, 0x135C }, - { 0x137D, 0x137F }, { 0x139A, 0x139F }, { 0x13F5, 0x13FF }, - { 0x169D, 0x169F }, { 0x16F1, 0x16FF }, { 0x170D, 0x170D }, - { 0x1715, 0x171F }, { 0x1737, 0x173F }, { 0x1754, 0x175F }, - { 0x176D, 0x176D }, { 0x1771, 0x1771 }, { 0x1774, 0x177F }, - { 0x17B4, 0x17B5 }, { 0x17DE, 0x17DF }, { 0x17EA, 0x17EF }, - { 0x17FA, 0x17FF }, { 0x180B, 0x180D }, { 0x180F, 0x180F }, - { 0x181A, 0x181F }, { 0x1878, 0x187F }, { 0x18AB, 0x18AF }, - { 0x18F6, 0x18FF }, { 0x191D, 0x191F }, { 0x192C, 0x192F }, - { 0x193C, 0x193F }, { 0x1941, 0x1943 }, { 0x196E, 0x196F }, - { 0x1975, 0x197F }, { 0x19AC, 0x19AF }, { 0x19CA, 0x19CF }, - { 0x19DB, 0x19DD }, { 0x1A1C, 0x1A1D }, { 0x1A5F, 0x1A5F }, - { 0x1A7D, 0x1A7E }, { 0x1A8A, 0x1A8F }, { 0x1A9A, 0x1A9F }, - { 0x1AAE, 0x1AFF }, { 0x1B4C, 0x1B4F }, { 0x1B7D, 0x1B7F }, - { 0x1BF4, 0x1BFB }, { 0x1C38, 0x1C3A }, { 0x1C4A, 0x1C4C }, - { 0x1C80, 0x1CBF }, { 0x1CC8, 0x1CCF }, { 0x1CF7, 0x1CFF }, - { 0x1DE7, 0x1DFB }, { 0x1F16, 0x1F17 }, { 0x1F1E, 0x1F1F }, - { 0x1F46, 0x1F47 }, { 0x1F4E, 0x1F4F }, { 0x1F58, 0x1F58 }, - { 0x1F5A, 0x1F5A }, { 0x1F5C, 0x1F5C }, { 0x1F5E, 0x1F5E }, - { 0x1F7E, 0x1F7F }, { 0x1FB5, 0x1FB5 }, { 0x1FC5, 0x1FC5 }, - { 0x1FD4, 0x1FD5 }, { 0x1FDC, 0x1FDC }, { 0x1FF0, 0x1FF1 }, - { 0x1FF5, 0x1FF5 }, { 0x1FFF, 0x1FFF }, { 0x200B, 0x200F }, - { 0x202A, 0x202E }, { 0x2060, 0x206F }, { 0x2072, 0x2073 }, - { 0x208F, 0x208F }, { 0x209D, 0x209F }, { 0x20BB, 0x20CF }, - { 0x20F1, 0x20FF }, { 0x218A, 0x218F }, { 0x23F4, 0x23FF }, - { 0x2427, 0x243F }, { 0x244B, 0x245F }, { 0x2700, 0x2700 }, - { 0x2B4D, 0x2B4F }, { 0x2B5A, 0x2BFF }, { 0x2C2F, 0x2C2F }, - { 0x2C5F, 0x2C5F }, { 0x2CF4, 0x2CF8 }, { 0x2D26, 0x2D26 }, - { 0x2D28, 0x2D2C }, { 0x2D2E, 0x2D2F }, { 0x2D68, 0x2D6E }, - { 0x2D71, 0x2D7E }, { 0x2D97, 0x2D9F }, { 0x2DA7, 0x2DA7 }, - { 0x2DAF, 0x2DAF }, { 0x2DB7, 0x2DB7 }, { 0x2DBF, 0x2DBF }, - { 0x2DC7, 0x2DC7 }, { 0x2DCF, 0x2DCF }, { 0x2DD7, 0x2DD7 }, - { 0x2DDF, 0x2DDF }, { 0x2E3C, 0x2E7F }, { 0x2E9A, 0x2E9A }, - { 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, { 0x2FFC, 0x2FFF }, - { 0x3040, 0x3040 }, { 0x3097, 0x3098 }, { 0x3100, 0x3104 }, - { 0x312E, 0x3130 }, { 0x3164, 0x3164 }, { 0x318F, 0x318F }, - { 0x31BB, 0x31BF }, { 0x31E4, 0x31EF }, { 0x321F, 0x321F }, - { 0x32FF, 0x32FF }, { 0x4DB6, 0x4DBF }, { 0x9FCD, 0x9FFF }, - { 0xA48D, 0xA48F }, { 0xA4C7, 0xA4CF }, { 0xA62C, 0xA63F }, - { 0xA698, 0xA69E }, { 0xA6F8, 0xA6FF }, { 0xA78F, 0xA78F }, - { 0xA794, 0xA79F }, { 0xA7AB, 0xA7F7 }, { 0xA82C, 0xA82F }, - { 0xA83A, 0xA83F }, { 0xA878, 0xA87F }, { 0xA8C5, 0xA8CD }, - { 0xA8DA, 0xA8DF }, { 0xA8FC, 0xA8FF }, { 0xA954, 0xA95E }, - { 0xA97D, 0xA97F }, { 0xA9CE, 0xA9CE }, { 0xA9DA, 0xA9DD }, - { 0xA9E0, 0xA9FF }, { 0xAA37, 0xAA3F }, { 0xAA4E, 0xAA4F }, - { 0xAA5A, 0xAA5B }, { 0xAA7C, 0xAA7F }, { 0xAAC3, 0xAADA }, - { 0xAAF7, 0xAB00 }, { 0xAB07, 0xAB08 }, { 0xAB0F, 0xAB10 }, - { 0xAB17, 0xAB1F }, { 0xAB27, 0xAB27 }, { 0xAB2F, 0xABBF }, - { 0xABEE, 0xABEF }, { 0xABFA, 0xABFF }, { 0xD7A4, 0xD7AF }, - { 0xD7C7, 0xD7CA }, { 0xD7FC, 0xDFFF }, { 0xFA6E, 0xFA6F }, - { 0xFADA, 0xFAFF }, { 0xFB07, 0xFB12 }, { 0xFB18, 0xFB1C }, - { 0xFB37, 0xFB37 }, { 0xFB3D, 0xFB3D }, { 0xFB3F, 0xFB3F }, - { 0xFB42, 0xFB42 }, { 0xFB45, 0xFB45 }, { 0xFBC2, 0xFBD2 }, - { 0xFD40, 0xFD4F }, { 0xFD90, 0xFD91 }, { 0xFDC8, 0xFDEF }, - { 0xFDFE, 0xFE0F }, { 0xFE1A, 0xFE1F }, { 0xFE27, 0xFE2F }, - { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 }, { 0xFE6C, 0xFE6F }, - { 0xFE75, 0xFE75 }, { 0xFEFD, 0xFEFF }, { 0xFF00, 0xFF00 }, - { 0xFFA0, 0xFFA0 }, { 0xFFBF, 0xFFC1 }, { 0xFFC8, 0xFFC9 }, - { 0xFFD0, 0xFFD1 }, { 0xFFD8, 0xFFD9 }, { 0xFFDD, 0xFFDF }, - { 0xFFE7, 0xFFE7 }, { 0xFFEF, 0xFFFB }, { 0xFFFE, 0xFFFF }, - { 0x1000C, 0x1000C }, { 0x10027, 0x10027 }, { 0x1003B, 0x1003B }, - { 0x1003E, 0x1003E }, { 0x1004E, 0x1004F }, { 0x1005E, 0x1007F }, - { 0x100FB, 0x100FF }, { 0x10103, 0x10106 }, { 0x10134, 0x10136 }, - { 0x1018B, 0x1018F }, { 0x1019C, 0x101CF }, { 0x101FE, 0x1027F }, - { 0x1029D, 0x1029F }, { 0x102D1, 0x102FF }, { 0x1031F, 0x1031F }, - { 0x10324, 0x1032F }, { 0x1034B, 0x1037F }, { 0x1039E, 0x1039E }, - { 0x103C4, 0x103C7 }, { 0x103D6, 0x103FF }, { 0x1049E, 0x1049F }, - { 0x104AA, 0x107FF }, { 0x10806, 0x10807 }, { 0x10809, 0x10809 }, - { 0x10836, 0x10836 }, { 0x10839, 0x1083B }, { 0x1083D, 0x1083E }, - { 0x10856, 0x10856 }, { 0x10860, 0x108FF }, { 0x1091C, 0x1091E }, - { 0x1093A, 0x1093E }, { 0x10940, 0x1097F }, { 0x109B8, 0x109BD }, - { 0x109C0, 0x109FF }, { 0x10A04, 0x10A04 }, { 0x10A07, 0x10A0B }, - { 0x10A14, 0x10A14 }, { 0x10A18, 0x10A18 }, { 0x10A34, 0x10A37 }, - { 0x10A3B, 0x10A3E }, { 0x10A48, 0x10A4F }, { 0x10A59, 0x10A5F }, - { 0x10A80, 0x10AFF }, { 0x10B36, 0x10B38 }, { 0x10B56, 0x10B57 }, - { 0x10B73, 0x10B77 }, { 0x10B80, 0x10BFF }, { 0x10C49, 0x10E5F }, - { 0x10E7F, 0x10FFF }, { 0x1104E, 0x11051 }, { 0x11070, 0x1107F }, - { 0x110BD, 0x110BD }, { 0x110C2, 0x110CF }, { 0x110E9, 0x110EF }, - { 0x110FA, 0x110FF }, { 0x11135, 0x11135 }, { 0x11144, 0x1117F }, - { 0x111C9, 0x111CF }, { 0x111DA, 0x1167F }, { 0x116B8, 0x116BF }, - { 0x116CA, 0x11FFF }, { 0x1236F, 0x123FF }, { 0x12463, 0x1246F }, - { 0x12474, 0x12FFF }, { 0x1342F, 0x167FF }, { 0x16A39, 0x16EFF }, - { 0x16F45, 0x16F4F }, { 0x16F7F, 0x16F8E }, { 0x16FA0, 0x1AFFF }, - { 0x1B002, 0x1CFFF }, { 0x1D0F6, 0x1D0FF }, { 0x1D127, 0x1D128 }, - { 0x1D173, 0x1D17A }, { 0x1D1DE, 0x1D1FF }, { 0x1D246, 0x1D2FF }, - { 0x1D357, 0x1D35F }, { 0x1D372, 0x1D3FF }, { 0x1D455, 0x1D455 }, - { 0x1D49D, 0x1D49D }, { 0x1D4A0, 0x1D4A1 }, { 0x1D4A3, 0x1D4A4 }, - { 0x1D4A7, 0x1D4A8 }, { 0x1D4AD, 0x1D4AD }, { 0x1D4BA, 0x1D4BA }, - { 0x1D4BC, 0x1D4BC }, { 0x1D4C4, 0x1D4C4 }, { 0x1D506, 0x1D506 }, - { 0x1D50B, 0x1D50C }, { 0x1D515, 0x1D515 }, { 0x1D51D, 0x1D51D }, - { 0x1D53A, 0x1D53A }, { 0x1D53F, 0x1D53F }, { 0x1D545, 0x1D545 }, - { 0x1D547, 0x1D549 }, { 0x1D551, 0x1D551 }, { 0x1D6A6, 0x1D6A7 }, - { 0x1D7CC, 0x1D7CD }, { 0x1D800, 0x1EDFF }, { 0x1EE04, 0x1EE04 }, - { 0x1EE20, 0x1EE20 }, { 0x1EE23, 0x1EE23 }, { 0x1EE25, 0x1EE26 }, - { 0x1EE28, 0x1EE28 }, { 0x1EE33, 0x1EE33 }, { 0x1EE38, 0x1EE38 }, - { 0x1EE3A, 0x1EE3A }, { 0x1EE3C, 0x1EE41 }, { 0x1EE43, 0x1EE46 }, - { 0x1EE48, 0x1EE48 }, { 0x1EE4A, 0x1EE4A }, { 0x1EE4C, 0x1EE4C }, - { 0x1EE50, 0x1EE50 }, { 0x1EE53, 0x1EE53 }, { 0x1EE55, 0x1EE56 }, - { 0x1EE58, 0x1EE58 }, { 0x1EE5A, 0x1EE5A }, { 0x1EE5C, 0x1EE5C }, - { 0x1EE5E, 0x1EE5E }, { 0x1EE60, 0x1EE60 }, { 0x1EE63, 0x1EE63 }, - { 0x1EE65, 0x1EE66 }, { 0x1EE6B, 0x1EE6B }, { 0x1EE73, 0x1EE73 }, - { 0x1EE78, 0x1EE78 }, { 0x1EE7D, 0x1EE7D }, { 0x1EE7F, 0x1EE7F }, - { 0x1EE8A, 0x1EE8A }, { 0x1EE9C, 0x1EEA0 }, { 0x1EEA4, 0x1EEA4 }, - { 0x1EEAA, 0x1EEAA }, { 0x1EEBC, 0x1EEEF }, { 0x1EEF2, 0x1EFFF }, - { 0x1F02C, 0x1F02F }, { 0x1F094, 0x1F09F }, { 0x1F0AF, 0x1F0B0 }, - { 0x1F0BF, 0x1F0C0 }, { 0x1F0D0, 0x1F0D0 }, { 0x1F0E0, 0x1F0FF }, - { 0x1F10B, 0x1F10F }, { 0x1F12F, 0x1F12F }, { 0x1F16C, 0x1F16F }, - { 0x1F19B, 0x1F1E5 }, { 0x1F203, 0x1F20F }, { 0x1F23B, 0x1F23F }, - { 0x1F249, 0x1F24F }, { 0x1F252, 0x1F2FF }, { 0x1F321, 0x1F32F }, - { 0x1F336, 0x1F336 }, { 0x1F37D, 0x1F37F }, { 0x1F394, 0x1F39F }, - { 0x1F3C5, 0x1F3C5 }, { 0x1F3CB, 0x1F3DF }, { 0x1F3F1, 0x1F3FF }, - { 0x1F43F, 0x1F43F }, { 0x1F441, 0x1F441 }, { 0x1F4F8, 0x1F4F8 }, - { 0x1F4FD, 0x1F4FF }, { 0x1F53E, 0x1F53F }, { 0x1F544, 0x1F54F }, - { 0x1F568, 0x1F5FA }, { 0x1F641, 0x1F644 }, { 0x1F650, 0x1F67F }, - { 0x1F6C6, 0x1F6FF }, { 0x1F774, 0x1FFFF }, { 0x2A6D7, 0x2A6FF }, - { 0x2B735, 0x2B73F }, { 0x2B81E, 0x2F7FF }, { 0x2FA1E, 0xF0000 }, - { 0xFFFFE, 0xFFFFF }, { 0x10FFFE, 0x10FFFF } - }; - static const UnicodeCharSet NonPrintables(NonPrintableRanges); + // https://unicode.org/Public/14.0.0/ucdxml/ + static const UnicodeCharRange PrintableRanges[] = { + {0x0020, 0x007E}, {0x00A0, 0x00AC}, {0x00AE, 0x0377}, + {0x037A, 0x037F}, {0x0384, 0x038A}, {0x038C, 0x038C}, + {0x038E, 0x03A1}, {0x03A3, 0x052F}, {0x0531, 0x0556}, + {0x0559, 0x058A}, {0x058D, 0x058F}, {0x0591, 0x05C7}, + {0x05D0, 0x05EA}, {0x05EF, 0x05F4}, {0x0606, 0x061B}, + {0x061D, 0x06DC}, {0x06DE, 0x070D}, {0x0710, 0x074A}, + {0x074D, 0x07B1}, {0x07C0, 0x07FA}, {0x07FD, 0x082D}, + {0x0830, 0x083E}, {0x0840, 0x085B}, {0x085E, 0x085E}, + {0x0860, 0x086A}, {0x0870, 0x088E}, {0x0898, 0x08E1}, + {0x08E3, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990}, + {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B2, 0x09B2}, + {0x09B6, 0x09B9}, {0x09BC, 0x09C4}, {0x09C7, 0x09C8}, + {0x09CB, 0x09CE}, {0x09D7, 0x09D7}, {0x09DC, 0x09DD}, + {0x09DF, 0x09E3}, {0x09E6, 0x09FE}, {0x0A01, 0x0A03}, + {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, + {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, + {0x0A38, 0x0A39}, {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A42}, + {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51}, + {0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E}, {0x0A66, 0x0A76}, + {0x0A81, 0x0A83}, {0x0A85, 0x0A8D}, {0x0A8F, 0x0A91}, + {0x0A93, 0x0AA8}, {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, + {0x0AB5, 0x0AB9}, {0x0ABC, 0x0AC5}, {0x0AC7, 0x0AC9}, + {0x0ACB, 0x0ACD}, {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE3}, + {0x0AE6, 0x0AF1}, {0x0AF9, 0x0AFF}, {0x0B01, 0x0B03}, + {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, + {0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, {0x0B35, 0x0B39}, + {0x0B3C, 0x0B44}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D}, + {0x0B55, 0x0B57}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63}, + {0x0B66, 0x0B77}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A}, + {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, + {0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, + {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB9}, {0x0BBE, 0x0BC2}, + {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0BD0, 0x0BD0}, + {0x0BD7, 0x0BD7}, {0x0BE6, 0x0BFA}, {0x0C00, 0x0C0C}, + {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C39}, + {0x0C3C, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, + {0x0C55, 0x0C56}, {0x0C58, 0x0C5A}, {0x0C5D, 0x0C5D}, + {0x0C60, 0x0C63}, {0x0C66, 0x0C6F}, {0x0C77, 0x0C8C}, + {0x0C8E, 0x0C90}, {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, + {0x0CB5, 0x0CB9}, {0x0CBC, 0x0CC4}, {0x0CC6, 0x0CC8}, + {0x0CCA, 0x0CCD}, {0x0CD5, 0x0CD6}, {0x0CDD, 0x0CDE}, + {0x0CE0, 0x0CE3}, {0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF2}, + {0x0D00, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D44}, + {0x0D46, 0x0D48}, {0x0D4A, 0x0D4F}, {0x0D54, 0x0D63}, + {0x0D66, 0x0D7F}, {0x0D81, 0x0D83}, {0x0D85, 0x0D96}, + {0x0D9A, 0x0DB1}, {0x0DB3, 0x0DBB}, {0x0DBD, 0x0DBD}, + {0x0DC0, 0x0DC6}, {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4}, + {0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF}, + {0x0DF2, 0x0DF4}, {0x0E01, 0x0E3A}, {0x0E3F, 0x0E5B}, + {0x0E81, 0x0E82}, {0x0E84, 0x0E84}, {0x0E86, 0x0E8A}, + {0x0E8C, 0x0EA3}, {0x0EA5, 0x0EA5}, {0x0EA7, 0x0EBD}, + {0x0EC0, 0x0EC4}, {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECD}, + {0x0ED0, 0x0ED9}, {0x0EDC, 0x0EDF}, {0x0F00, 0x0F47}, + {0x0F49, 0x0F6C}, {0x0F71, 0x0F97}, {0x0F99, 0x0FBC}, + {0x0FBE, 0x0FCC}, {0x0FCE, 0x0FDA}, {0x1000, 0x10C5}, + {0x10C7, 0x10C7}, {0x10CD, 0x10CD}, {0x10D0, 0x1248}, + {0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258}, + {0x125A, 0x125D}, {0x1260, 0x1288}, {0x128A, 0x128D}, + {0x1290, 0x12B0}, {0x12B2, 0x12B5}, {0x12B8, 0x12BE}, + {0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C8, 0x12D6}, + {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A}, + {0x135D, 0x137C}, {0x1380, 0x1399}, {0x13A0, 0x13F5}, + {0x13F8, 0x13FD}, {0x1400, 0x169C}, {0x16A0, 0x16F8}, + {0x1700, 0x1715}, {0x171F, 0x1736}, {0x1740, 0x1753}, + {0x1760, 0x176C}, {0x176E, 0x1770}, {0x1772, 0x1773}, + {0x1780, 0x17DD}, {0x17E0, 0x17E9}, {0x17F0, 0x17F9}, + {0x1800, 0x180D}, {0x180F, 0x1819}, {0x1820, 0x1878}, + {0x1880, 0x18AA}, {0x18B0, 0x18F5}, {0x1900, 0x191E}, + {0x1920, 0x192B}, {0x1930, 0x193B}, {0x1940, 0x1940}, + {0x1944, 0x196D}, {0x1970, 0x1974}, {0x1980, 0x19AB}, + {0x19B0, 0x19C9}, {0x19D0, 0x19DA}, {0x19DE, 0x1A1B}, + {0x1A1E, 0x1A5E}, {0x1A60, 0x1A7C}, {0x1A7F, 0x1A89}, + {0x1A90, 0x1A99}, {0x1AA0, 0x1AAD}, {0x1AB0, 0x1ACE}, + {0x1B00, 0x1B4C}, {0x1B50, 0x1B7E}, {0x1B80, 0x1BF3}, + {0x1BFC, 0x1C37}, {0x1C3B, 0x1C49}, {0x1C4D, 0x1C88}, + {0x1C90, 0x1CBA}, {0x1CBD, 0x1CC7}, {0x1CD0, 0x1CFA}, + {0x1D00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, + {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59}, + {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, + {0x1F80, 0x1FB4}, {0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3}, + {0x1FD6, 0x1FDB}, {0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4}, + {0x1FF6, 0x1FFE}, {0x2000, 0x200A}, {0x2010, 0x2027}, + {0x202F, 0x205F}, {0x2070, 0x2071}, {0x2074, 0x208E}, + {0x2090, 0x209C}, {0x20A0, 0x20C0}, {0x20D0, 0x20F0}, + {0x2100, 0x218B}, {0x2190, 0x2426}, {0x2440, 0x244A}, + {0x2460, 0x2B73}, {0x2B76, 0x2B95}, {0x2B97, 0x2CF3}, + {0x2CF9, 0x2D25}, {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D}, + {0x2D30, 0x2D67}, {0x2D6F, 0x2D70}, {0x2D7F, 0x2D96}, + {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6}, + {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE}, + {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, {0x2DE0, 0x2E5D}, + {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, + {0x2FF0, 0x2FFB}, {0x3000, 0x303F}, {0x3041, 0x3096}, + {0x3099, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E}, + {0x3190, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0xA48C}, + {0xA490, 0xA4C6}, {0xA4D0, 0xA62B}, {0xA640, 0xA6F7}, + {0xA700, 0xA7CA}, {0xA7D0, 0xA7D1}, {0xA7D3, 0xA7D3}, + {0xA7D5, 0xA7D9}, {0xA7F2, 0xA82C}, {0xA830, 0xA839}, + {0xA840, 0xA877}, {0xA880, 0xA8C5}, {0xA8CE, 0xA8D9}, + {0xA8E0, 0xA953}, {0xA95F, 0xA97C}, {0xA980, 0xA9CD}, + {0xA9CF, 0xA9D9}, {0xA9DE, 0xA9FE}, {0xAA00, 0xAA36}, + {0xAA40, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA5C, 0xAAC2}, + {0xAADB, 0xAAF6}, {0xAB01, 0xAB06}, {0xAB09, 0xAB0E}, + {0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E}, + {0xAB30, 0xAB6B}, {0xAB70, 0xABED}, {0xABF0, 0xABF9}, + {0xAC00, 0xD7A3}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB}, + {0xF900, 0xFA6D}, {0xFA70, 0xFAD9}, {0xFB00, 0xFB06}, + {0xFB13, 0xFB17}, {0xFB1D, 0xFB36}, {0xFB38, 0xFB3C}, + {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44}, + {0xFB46, 0xFBC2}, {0xFBD3, 0xFD8F}, {0xFD92, 0xFDC7}, + {0xFDCF, 0xFDCF}, {0xFDF0, 0xFE19}, {0xFE20, 0xFE52}, + {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFE70, 0xFE74}, + {0xFE76, 0xFEFC}, {0xFF01, 0xFFBE}, {0xFFC2, 0xFFC7}, + {0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7}, {0xFFDA, 0xFFDC}, + {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD}, + {0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A}, + {0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D}, + {0x10080, 0x100FA}, {0x10100, 0x10102}, {0x10107, 0x10133}, + {0x10137, 0x1018E}, {0x10190, 0x1019C}, {0x101A0, 0x101A0}, + {0x101D0, 0x101FD}, {0x10280, 0x1029C}, {0x102A0, 0x102D0}, + {0x102E0, 0x102FB}, {0x10300, 0x10323}, {0x1032D, 0x1034A}, + {0x10350, 0x1037A}, {0x10380, 0x1039D}, {0x1039F, 0x103C3}, + {0x103C8, 0x103D5}, {0x10400, 0x1049D}, {0x104A0, 0x104A9}, + {0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527}, + {0x10530, 0x10563}, {0x1056F, 0x1057A}, {0x1057C, 0x1058A}, + {0x1058C, 0x10592}, {0x10594, 0x10595}, {0x10597, 0x105A1}, + {0x105A3, 0x105B1}, {0x105B3, 0x105B9}, {0x105BB, 0x105BC}, + {0x10600, 0x10736}, {0x10740, 0x10755}, {0x10760, 0x10767}, + {0x10780, 0x10785}, {0x10787, 0x107B0}, {0x107B2, 0x107BA}, + {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080A, 0x10835}, + {0x10837, 0x10838}, {0x1083C, 0x1083C}, {0x1083F, 0x10855}, + {0x10857, 0x1089E}, {0x108A7, 0x108AF}, {0x108E0, 0x108F2}, + {0x108F4, 0x108F5}, {0x108FB, 0x1091B}, {0x1091F, 0x10939}, + {0x1093F, 0x1093F}, {0x10980, 0x109B7}, {0x109BC, 0x109CF}, + {0x109D2, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A13}, + {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, {0x10A38, 0x10A3A}, + {0x10A3F, 0x10A48}, {0x10A50, 0x10A58}, {0x10A60, 0x10A9F}, + {0x10AC0, 0x10AE6}, {0x10AEB, 0x10AF6}, {0x10B00, 0x10B35}, + {0x10B39, 0x10B55}, {0x10B58, 0x10B72}, {0x10B78, 0x10B91}, + {0x10B99, 0x10B9C}, {0x10BA9, 0x10BAF}, {0x10C00, 0x10C48}, + {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2}, {0x10CFA, 0x10D27}, + {0x10D30, 0x10D39}, {0x10E60, 0x10E7E}, {0x10E80, 0x10EA9}, + {0x10EAB, 0x10EAD}, {0x10EB0, 0x10EB1}, {0x10F00, 0x10F27}, + {0x10F30, 0x10F59}, {0x10F70, 0x10F89}, {0x10FB0, 0x10FCB}, + {0x10FE0, 0x10FF6}, {0x11000, 0x1104D}, {0x11052, 0x11075}, + {0x1107F, 0x110BC}, {0x110BE, 0x110C2}, {0x110D0, 0x110E8}, + {0x110F0, 0x110F9}, {0x11100, 0x11134}, {0x11136, 0x11147}, + {0x11150, 0x11176}, {0x11180, 0x111DF}, {0x111E1, 0x111F4}, + {0x11200, 0x11211}, {0x11213, 0x1123E}, {0x11280, 0x11286}, + {0x11288, 0x11288}, {0x1128A, 0x1128D}, {0x1128F, 0x1129D}, + {0x1129F, 0x112A9}, {0x112B0, 0x112EA}, {0x112F0, 0x112F9}, + {0x11300, 0x11303}, {0x11305, 0x1130C}, {0x1130F, 0x11310}, + {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333}, + {0x11335, 0x11339}, {0x1133B, 0x11344}, {0x11347, 0x11348}, + {0x1134B, 0x1134D}, {0x11350, 0x11350}, {0x11357, 0x11357}, + {0x1135D, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374}, + {0x11400, 0x1145B}, {0x1145D, 0x11461}, {0x11480, 0x114C7}, + {0x114D0, 0x114D9}, {0x11580, 0x115B5}, {0x115B8, 0x115DD}, + {0x11600, 0x11644}, {0x11650, 0x11659}, {0x11660, 0x1166C}, + {0x11680, 0x116B9}, {0x116C0, 0x116C9}, {0x11700, 0x1171A}, + {0x1171D, 0x1172B}, {0x11730, 0x11746}, {0x11800, 0x1183B}, + {0x118A0, 0x118F2}, {0x118FF, 0x11906}, {0x11909, 0x11909}, + {0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x11935}, + {0x11937, 0x11938}, {0x1193B, 0x11946}, {0x11950, 0x11959}, + {0x119A0, 0x119A7}, {0x119AA, 0x119D7}, {0x119DA, 0x119E4}, + {0x11A00, 0x11A47}, {0x11A50, 0x11AA2}, {0x11AB0, 0x11AF8}, + {0x11C00, 0x11C08}, {0x11C0A, 0x11C36}, {0x11C38, 0x11C45}, + {0x11C50, 0x11C6C}, {0x11C70, 0x11C8F}, {0x11C92, 0x11CA7}, + {0x11CA9, 0x11CB6}, {0x11D00, 0x11D06}, {0x11D08, 0x11D09}, + {0x11D0B, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D}, + {0x11D3F, 0x11D47}, {0x11D50, 0x11D59}, {0x11D60, 0x11D65}, + {0x11D67, 0x11D68}, {0x11D6A, 0x11D8E}, {0x11D90, 0x11D91}, + {0x11D93, 0x11D98}, {0x11DA0, 0x11DA9}, {0x11EE0, 0x11EF8}, + {0x11FB0, 0x11FB0}, {0x11FC0, 0x11FF1}, {0x11FFF, 0x12399}, + {0x12400, 0x1246E}, {0x12470, 0x12474}, {0x12480, 0x12543}, + {0x12F90, 0x12FF2}, {0x13000, 0x1342E}, {0x14400, 0x14646}, + {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16A60, 0x16A69}, + {0x16A6E, 0x16ABE}, {0x16AC0, 0x16AC9}, {0x16AD0, 0x16AED}, + {0x16AF0, 0x16AF5}, {0x16B00, 0x16B45}, {0x16B50, 0x16B59}, + {0x16B5B, 0x16B61}, {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F}, + {0x16E40, 0x16E9A}, {0x16F00, 0x16F4A}, {0x16F4F, 0x16F87}, + {0x16F8F, 0x16F9F}, {0x16FE0, 0x16FE4}, {0x16FF0, 0x16FF1}, + {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08}, + {0x1AFF0, 0x1AFF3}, {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE}, + {0x1B000, 0x1B122}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167}, + {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C}, + {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1BC9C, 0x1BC9F}, + {0x1CF00, 0x1CF2D}, {0x1CF30, 0x1CF46}, {0x1CF50, 0x1CFC3}, + {0x1D000, 0x1D0F5}, {0x1D100, 0x1D126}, {0x1D129, 0x1D172}, + {0x1D17B, 0x1D1EA}, {0x1D200, 0x1D245}, {0x1D2E0, 0x1D2F3}, + {0x1D300, 0x1D356}, {0x1D360, 0x1D378}, {0x1D400, 0x1D454}, + {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2}, + {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9}, + {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505}, + {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, {0x1D516, 0x1D51C}, + {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544}, + {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5}, + {0x1D6A8, 0x1D7CB}, {0x1D7CE, 0x1DA8B}, {0x1DA9B, 0x1DA9F}, + {0x1DAA1, 0x1DAAF}, {0x1DF00, 0x1DF1E}, {0x1E000, 0x1E006}, + {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, + {0x1E026, 0x1E02A}, {0x1E100, 0x1E12C}, {0x1E130, 0x1E13D}, + {0x1E140, 0x1E149}, {0x1E14E, 0x1E14F}, {0x1E290, 0x1E2AE}, + {0x1E2C0, 0x1E2F9}, {0x1E2FF, 0x1E2FF}, {0x1E7E0, 0x1E7E6}, + {0x1E7E8, 0x1E7EB}, {0x1E7ED, 0x1E7EE}, {0x1E7F0, 0x1E7FE}, + {0x1E800, 0x1E8C4}, {0x1E8C7, 0x1E8D6}, {0x1E900, 0x1E94B}, + {0x1E950, 0x1E959}, {0x1E95E, 0x1E95F}, {0x1EC71, 0x1ECB4}, + {0x1ED01, 0x1ED3D}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, + {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27}, + {0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, + {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47}, + {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, + {0x1EE51, 0x1EE52}, {0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, + {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D}, + {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, + {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72}, {0x1EE74, 0x1EE77}, + {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, + {0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, + {0x1EEAB, 0x1EEBB}, {0x1EEF0, 0x1EEF1}, {0x1F000, 0x1F02B}, + {0x1F030, 0x1F093}, {0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF}, + {0x1F0C1, 0x1F0CF}, {0x1F0D1, 0x1F0F5}, {0x1F100, 0x1F1AD}, + {0x1F1E6, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248}, + {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F6D7}, + {0x1F6DD, 0x1F6EC}, {0x1F6F0, 0x1F6FC}, {0x1F700, 0x1F773}, + {0x1F780, 0x1F7D8}, {0x1F7E0, 0x1F7EB}, {0x1F7F0, 0x1F7F0}, + {0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859}, + {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F8B0, 0x1F8B1}, + {0x1F900, 0x1FA53}, {0x1FA60, 0x1FA6D}, {0x1FA70, 0x1FA74}, + {0x1FA78, 0x1FA7C}, {0x1FA80, 0x1FA86}, {0x1FA90, 0x1FAAC}, + {0x1FAB0, 0x1FABA}, {0x1FAC0, 0x1FAC5}, {0x1FAD0, 0x1FAD9}, + {0x1FAE0, 0x1FAE7}, {0x1FAF0, 0x1FAF6}, {0x1FB00, 0x1FB92}, + {0x1FB94, 0x1FBCA}, {0x1FBF0, 0x1FBF9}, {0x20000, 0x2A6DF}, + {0x2A700, 0x2B738}, {0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1}, + {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D}, {0x30000, 0x3134A}, + {0xE0100, 0xE01EF}}; + + static const UnicodeCharSet Printables(PrintableRanges); + // Clang special cases 0x00AD (SOFT HYPHEN) which is rendered as an actual + // hyphen in most terminals. + return UCS == 0x00AD || Printables.contains(UCS); +} + +/// Unicode code points of the Cf category are considered +/// fornatting characters. +bool isFormatting(int UCS) { + + // https://unicode.org/Public/14.0.0/ucdxml/ + static const UnicodeCharRange Cf[] = { + {0x00AD, 0x00AD}, {0x0600, 0x0605}, {0x061C, 0x061C}, + {0x06DD, 0x06DD}, {0x070F, 0x070F}, {0x0890, 0x0891}, + {0x08E2, 0x08E2}, {0x180E, 0x180E}, {0x200B, 0x200F}, + {0x202A, 0x202E}, {0x2060, 0x2064}, {0x2066, 0x206F}, + {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFB}, {0x110BD, 0x110BD}, + {0x110CD, 0x110CD}, {0x13430, 0x13438}, {0x1BCA0, 0x1BCA3}, + {0x1D173, 0x1D17A}, {0xE0001, 0xE0001}, {0xE0020, 0xE007F}}; - return UCS >= 0 && UCS <= 0x10FFFF && !NonPrintables.contains(UCS); + static const UnicodeCharSet Format(Cf); + return Format.contains(UCS); } /// Gets the number of positions a character is likely to occupy when output diff --git a/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/llvm/lib/Support/UnicodeNameToCodepoint.cpp new file mode 100644 index 000000000000..1e8aebf1b8eb --- /dev/null +++ b/llvm/lib/Support/UnicodeNameToCodepoint.cpp @@ -0,0 +1,551 @@ +//===- llvm/Support/UnicodeNameToCodepoint.cpp - Unicode character properties +//-*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements functions to map the name or alias of a unicode +// character to its codepoint. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Unicode.h" + +namespace llvm { +namespace sys { +namespace unicode { + +extern const char *UnicodeNameToCodepointDict; +extern const uint8_t *UnicodeNameToCodepointIndex; +extern const std::size_t UnicodeNameToCodepointIndexSize; +extern const std::size_t UnicodeNameToCodepointLargestNameSize; + +using BufferType = SmallString<64>; + +struct Node { + bool IsRoot = false; + char32_t Value = 0xFFFFFFFF; + uint32_t ChildrenOffset = 0; + bool HasSibling = false; + uint32_t Size = 0; + StringRef Name; + const Node *Parent = nullptr; + + constexpr bool isValid() const { + return !Name.empty() || Value == 0xFFFFFFFF; + } + constexpr bool hasChildren() const { return ChildrenOffset != 0 || IsRoot; } + + std::string fullName() const { + std::string S; + // Reserve enough space for most unicode code points. + // The chosen value represent the 99th percentile of name size as of + // Unicode 14. + S.reserve(46); + const Node *N = this; + while (N) { + std::reverse_copy(N->Name.begin(), N->Name.end(), std::back_inserter(S)); + N = N->Parent; + } + std::reverse(S.begin(), S.end()); + return S; + } +}; + +static Node createRoot() { + Node N; + N.IsRoot = true; + N.ChildrenOffset = 1; + N.Size = 1; + return N; +} + +static Node readNode(uint32_t Offset, const Node *Parent = nullptr) { + if (Offset == 0) + return createRoot(); + + uint32_t Origin = Offset; + Node N; + N.Parent = Parent; + uint8_t NameInfo = UnicodeNameToCodepointIndex[Offset++]; + if (Offset + 6 >= UnicodeNameToCodepointIndexSize) + return N; + + bool LongName = NameInfo & 0x40; + bool HasValue = NameInfo & 0x80; + std::size_t Size = NameInfo & ~0xC0; + if (LongName) { + uint32_t NameOffset = (UnicodeNameToCodepointIndex[Offset++] << 8); + NameOffset |= UnicodeNameToCodepointIndex[Offset++]; + N.Name = StringRef(UnicodeNameToCodepointDict + NameOffset, Size); + } else { + N.Name = StringRef(UnicodeNameToCodepointDict + Size, 1); + } + if (HasValue) { + uint8_t H = UnicodeNameToCodepointIndex[Offset++]; + uint8_t M = UnicodeNameToCodepointIndex[Offset++]; + uint8_t L = UnicodeNameToCodepointIndex[Offset++]; + N.Value = ((H << 16) | (M << 8) | L) >> 3; + + bool HasChildren = L & 0x02; + N.HasSibling = L & 0x01; + + if (HasChildren) { + N.ChildrenOffset = UnicodeNameToCodepointIndex[Offset++] << 16; + N.ChildrenOffset |= UnicodeNameToCodepointIndex[Offset++] << 8; + N.ChildrenOffset |= UnicodeNameToCodepointIndex[Offset++]; + } + } else { + uint8_t H = UnicodeNameToCodepointIndex[Offset++]; + N.HasSibling = H & 0x80; + bool HasChildren = H & 0x40; + H &= ~0xC0; + if (HasChildren) { + N.ChildrenOffset = (H << 16); + N.ChildrenOffset |= + (uint32_t(UnicodeNameToCodepointIndex[Offset++]) << 8); + N.ChildrenOffset |= UnicodeNameToCodepointIndex[Offset++]; + } + } + N.Size = Offset - Origin; + return N; +} + +static bool startsWith(StringRef Name, StringRef Needle, bool Strict, + std::size_t &Consummed, char &PreviousCharInName, + char &PreviousCharInNeedle, bool IsPrefix = false) { + + Consummed = 0; + if (Strict) { + if (!Name.startswith(Needle)) + return false; + Consummed = Needle.size(); + return true; + } + if (Needle.empty()) + return true; + + auto NamePos = Name.begin(); + auto NeedlePos = Needle.begin(); + + char PreviousCharInNameOrigin = PreviousCharInName; + char PreviousCharInNeedleOrigin = PreviousCharInNeedle; + + auto IgnoreSpaces = [](auto It, auto End, char &PreviousChar, + bool IgnoreEnd = false) { + while (It != End) { + const auto Next = std::next(It); + // Ignore spaces, underscore, medial hyphens + // https://unicode.org/reports/tr44/#UAX44-LM2. + bool Ignore = + *It == ' ' || *It == '_' || + (*It == '-' && isAlnum(PreviousChar) && + ((Next != End && isAlnum(*Next)) || (Next == End && IgnoreEnd))); + PreviousChar = *It; + if (!Ignore) + break; + ++It; + } + return It; + }; + + while (true) { + NamePos = IgnoreSpaces(NamePos, Name.end(), PreviousCharInName); + NeedlePos = + IgnoreSpaces(NeedlePos, Needle.end(), PreviousCharInNeedle, IsPrefix); + if (NeedlePos == Needle.end()) + break; + if (NamePos == Name.end()) + break; + if (toUpper(*NeedlePos) != toUpper(*NamePos)) + break; + NeedlePos++; + NamePos++; + } + Consummed = std::distance(Name.begin(), NamePos); + if (NeedlePos != Needle.end()) { + PreviousCharInName = PreviousCharInNameOrigin; + PreviousCharInNeedle = PreviousCharInNeedleOrigin; + } + return NeedlePos == Needle.end(); +} + +static std::tuple +compareNode(uint32_t Offset, StringRef Name, bool Strict, + char PreviousCharInName, char PreviousCharInNeedle, + BufferType &Buffer, const Node *Parent = nullptr) { + Node N = readNode(Offset, Parent); + std::size_t Consummed = 0; + bool DoesStartWith = + N.IsRoot || startsWith(Name, N.Name, Strict, Consummed, + PreviousCharInName, PreviousCharInNeedle); + if (!DoesStartWith) + return std::make_tuple(N, false, 0); + + if (Name.size() - Consummed == 0 && N.Value != 0xFFFFFFFF) + return std::make_tuple(N, true, N.Value); + + if (N.hasChildren()) { + uint32_t ChildOffset = N.ChildrenOffset; + for (;;) { + Node C; + bool Matches; + uint32_t Value; + std::tie(C, Matches, Value) = + compareNode(ChildOffset, Name.substr(Consummed), Strict, + PreviousCharInName, PreviousCharInNeedle, Buffer, &N); + if (Matches) { + std::reverse_copy(C.Name.begin(), C.Name.end(), + std::back_inserter(Buffer)); + return std::make_tuple(N, true, Value); + } + ChildOffset += C.Size; + if (!C.HasSibling) + break; + } + } + return std::make_tuple(N, false, 0); +} + +static std::tuple +compareNode(uint32_t Offset, StringRef Name, bool Strict, BufferType &Buffer) { + return compareNode(Offset, Name, Strict, 0, 0, Buffer); +} + +// clang-format off +constexpr const char *const HangulSyllables[][3] = { + { "G", "A", "" }, + { "GG", "AE", "G" }, + { "N", "YA", "GG" }, + { "D", "YAE", "GS" }, + { "DD", "EO", "N", }, + { "R", "E", "NJ" }, + { "M", "YEO", "NH" }, + { "B", "YE", "D" }, + { "BB", "O", "L" }, + { "S", "WA", "LG" }, + { "SS", "WAE", "LM" }, + { "", "OE", "LB" }, + { "J", "YO", "LS" }, + { "JJ", "U", "LT" }, + { "C", "WEO", "LP" }, + { "K", "WE", "LH" }, + { "T", "WI", "M" }, + { "P", "YU", "B" }, + { "H", "EU", "BS" }, + { 0, "YI", "S" }, + { 0, "I", "SS" }, + { 0, 0, "NG" }, + { 0, 0, "J" }, + { 0, 0, "C" }, + { 0, 0, "K" }, + { 0, 0, "T" }, + { 0, 0, "P" }, + { 0, 0, "H" } + }; +// clang-format on + +// Unicode 14.0 +// 3.12 Conjoining Jamo Behavior Common constants +constexpr const char32_t SBase = 0xAC00; +constexpr const uint32_t LCount = 19; +constexpr const uint32_t VCount = 21; +constexpr const uint32_t TCount = 28; + +static std::size_t findSyllable(StringRef Name, bool Strict, + char &PreviousInName, int &Pos, int Column) { + assert(Column == 0 || Column == 1 || Column == 2); + static std::size_t CountPerColumn[] = {LCount, VCount, TCount}; + char NeedleStart = 0; + int Len = -1; + int Prev = PreviousInName; + for (std::size_t I = 0; I < CountPerColumn[Column]; I++) { + StringRef Syllable(HangulSyllables[I][Column]); + if (int(Syllable.size()) <= Len) + continue; + std::size_t Consummed = 0; + char PreviousInNameCopy = PreviousInName; + bool DoesStartWith = startsWith(Name, Syllable, Strict, Consummed, + PreviousInNameCopy, NeedleStart); + if (!DoesStartWith) + continue; + Len = Consummed; + Pos = I; + Prev = PreviousInNameCopy; + } + if (Len == -1) + return 0; + PreviousInName = Prev; + return size_t(Len); +} + +static llvm::Optional +nameToHangulCodePoint(StringRef Name, bool Strict, BufferType &Buffer) { + Buffer.clear(); + // Hangul Syllable Decomposition + std::size_t Consummed = 0; + char NameStart = 0, NeedleStart = 0; + bool DoesStartWith = startsWith(Name, "HANGUL SYLLABLE ", Strict, Consummed, + NameStart, NeedleStart); + if (!DoesStartWith) + return None; + Name = Name.substr(Consummed); + int L = -1, V = -1, T = -1; + Name = Name.substr(findSyllable(Name, Strict, NameStart, L, 0)); + Name = Name.substr(findSyllable(Name, Strict, NameStart, V, 1)); + Name = Name.substr(findSyllable(Name, Strict, NameStart, T, 2)); + if (L != -1 && V != -1 && T != -1 && Name.empty()) { + if (!Strict) { + Buffer.append("HANGUL SYLLABLE "); + if (L != -1) + Buffer.append(HangulSyllables[L][0]); + if (V != -1) + Buffer.append(HangulSyllables[V][1]); + if (T != -1) + Buffer.append(HangulSyllables[T][2]); + } + return SBase + (std::uint32_t(L) * VCount + std::uint32_t(V)) * TCount + + std::uint32_t(T); + } + // Otherwise, it's an illegal syllable name. + return None; +} + +struct GeneratedNamesData { + StringRef Prefix; + uint32_t Start; + uint32_t End; +}; + +// Unicode 14.0 Table 4-8. Name Derivation Rule Prefix Strings +// This needs to be kept in sync with +// llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp +static const GeneratedNamesData GeneratedNamesDataTable[] = { + {"CJK UNIFIED IDEOGRAPH-", 0x3400, 0x4DBF}, + {"CJK UNIFIED IDEOGRAPH-", 0x4E00, 0x9FFC}, + {"CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2A6DD}, + {"CJK UNIFIED IDEOGRAPH-", 0x2A700, 0x2B734}, + {"CJK UNIFIED IDEOGRAPH-", 0x2B740, 0x2B81D}, + {"CJK UNIFIED IDEOGRAPH-", 0x2B820, 0x2CEA1}, + {"CJK UNIFIED IDEOGRAPH-", 0x2CEB0, 0x2EBE0}, + {"CJK UNIFIED IDEOGRAPH-", 0x30000, 0x3134A}, + {"TANGUT IDEOGRAPH-", 0x17000, 0x187F7}, + {"TANGUT IDEOGRAPH-", 0x18D00, 0x18D08}, + {"KHITAN SMALL SCRIPT CHARACTER-", 0x18B00, 0x18CD5}, + {"NUSHU CHARACTER-", 0x1B170, 0x1B2FB}, + {"CJK COMPATIBILITY IDEOGRAPH-", 0xF900, 0xFA6D}, + {"CJK COMPATIBILITY IDEOGRAPH-", 0xFA70, 0xFAD9}, + {"CJK COMPATIBILITY IDEOGRAPH-", 0x2F800, 0x2FA1D}, +}; + +static llvm::Optional +nameToGeneratedCodePoint(StringRef Name, bool Strict, BufferType &Buffer) { + for (auto &&Item : GeneratedNamesDataTable) { + Buffer.clear(); + std::size_t Consummed = 0; + char NameStart = 0, NeedleStart = 0; + bool DoesStartWith = startsWith(Name, Item.Prefix, Strict, Consummed, + NameStart, NeedleStart, /*isPrefix*/ true); + if (!DoesStartWith) + continue; + auto Number = Name.substr(Consummed); + unsigned long long V = 0; + // Be consistent about mandating upper casing. + if (Strict && + llvm::any_of(Number, [](char C) { return C >= 'a' && C <= 'f'; })) + return {}; + if (getAsUnsignedInteger(Number, 16, V) || V < Item.Start || V > Item.End) + continue; + if (!Strict) { + Buffer.append(Item.Prefix); + Buffer.append(utohexstr(V, true)); + } + return V; + } + return None; +} + +static llvm::Optional nameToCodepoint(StringRef Name, bool Strict, + BufferType &Buffer) { + if (Name.empty()) + return None; + + llvm::Optional Res = nameToHangulCodePoint(Name, Strict, Buffer); + if (!Res) + Res = nameToGeneratedCodePoint(Name, Strict, Buffer); + if (Res) + return *Res; + + Buffer.clear(); + Node Node; + bool Matches; + uint32_t Value; + std::tie(Node, Matches, Value) = compareNode(0, Name, Strict, Buffer); + if (Matches) { + std::reverse(Buffer.begin(), Buffer.end()); + // UAX44-LM2. Ignore case, whitespace, underscore ('_'), and all medial + // hyphens except the hyphen in U+1180 HANGUL JUNGSEONG O-E. + if (!Strict && Value == 0x116c && + Name.find_insensitive("O-E") != StringRef::npos) { + Buffer = "HANGUL JUNGSEONG O-E"; + Value = 0x1180; + } + return Value; + } + return None; +} + +llvm::Optional nameToCodepointStrict(StringRef Name) { + + BufferType Buffer; + auto Opt = nameToCodepoint(Name, true, Buffer); + return Opt; +} + +llvm::Optional +nameToCodepointLooseMatching(StringRef Name) { + BufferType Buffer; + auto Opt = nameToCodepoint(Name, false, Buffer); + if (!Opt) + return None; + return LooseMatchingResult{*Opt, Buffer}; +} + +// Find the unicode character whose editing distance to Pattern +// is shortest, using the Wagner–Fischer algorithm. +llvm::SmallVector +nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount) { + // We maintain a fixed size vector of matches, + // sorted by distance + // The worst match (with the biggest distance) are discarded when new elements + // are added. + std::size_t LargestEditDistance = 0; + llvm::SmallVector Matches; + Matches.reserve(MaxMatchesCount + 1); + + auto Insert = [&](const Node &Node, uint32_t Distance, + char32_t Value) -> bool { + if (Distance > LargestEditDistance) { + if (Matches.size() == MaxMatchesCount) + return false; + LargestEditDistance = Distance; + } + // To avoid allocations, the creation of the name is delayed + // as much as possible. + std::string Name; + auto GetName = [&] { + if (Name.empty()) + Name = Node.fullName(); + return Name; + }; + + auto It = std::lower_bound( + Matches.begin(), Matches.end(), Distance, + [&](const MatchForCodepointName &a, std::size_t Distance) { + if (Distance == a.Distance) + return a.Name < GetName(); + return a.Distance < Distance; + }); + if (It == Matches.end() && Matches.size() == MaxMatchesCount) + return false; + + MatchForCodepointName M{GetName(), Distance, Value}; + Matches.insert(It, std::move(M)); + if (Matches.size() > MaxMatchesCount) + Matches.pop_back(); + return true; + }; + + // We ignore case, space, hyphens, etc, + // in both the search pattern and the prospective names. + auto Normalize = [](StringRef Name) { + std::string Out; + Out.reserve(Name.size()); + for (char C : Name) { + if (isAlnum(C)) + Out.push_back(toUpper(C)); + } + return Out; + }; + std::string NormalizedName = Normalize(Pattern); + + // Allocate a matrix big enough for longest names. + const std::size_t Columns = + std::min(NormalizedName.size(), UnicodeNameToCodepointLargestNameSize) + + 1; + + LLVM_ATTRIBUTE_UNUSED static std::size_t Rows = + UnicodeNameToCodepointLargestNameSize + 1; + + std::vector Distances( + Columns * (UnicodeNameToCodepointLargestNameSize + 1), 0); + + auto Get = [&Distances, Columns](size_t Column, std::size_t Row) -> char & { + assert(Column < Columns); + assert(Row < Rows); + return Distances[Row * Columns + Column]; + }; + + for (std::size_t I = 0; I < Columns; I++) + Get(I, 0) = I; + + // Visit the childrens, + // Filling (and overriding) the matrix for the name fragment of each node + // iteratively. CompleteName is used to collect the actual name of potential + // match, respecting case and spacing. + auto VisitNode = [&](const Node &N, std::size_t Row, + auto &VisitNode) -> void { + std::size_t J = 0; + for (; J < N.Name.size(); J++) { + if (!isAlnum(N.Name[J])) + continue; + + Get(0, Row) = Row; + + for (std::size_t I = 1; I < Columns; I++) { + const int Delete = Get(I - 1, Row) + 1; + const int Insert = Get(I, Row - 1) + 1; + + const int Replace = + Get(I - 1, Row - 1) + (NormalizedName[I - 1] != N.Name[J] ? 1 : 0); + + Get(I, Row) = std::min(Insert, std::min(Delete, Replace)); + } + + Row++; + } + + unsigned Cost = Get(Columns - 1, Row - 1); + if (N.Value != 0xFFFFFFFF) { + Insert(N, Cost, N.Value); + } + + if (N.hasChildren()) { + auto ChildOffset = N.ChildrenOffset; + for (;;) { + Node C = readNode(ChildOffset, &N); + ChildOffset += C.Size; + if (!C.isValid()) + break; + VisitNode(C, Row, VisitNode); + if (!C.HasSibling) + break; + } + } + }; + + Node Root = createRoot(); + VisitNode(Root, 1, VisitNode); + return Matches; +} + +} // namespace unicode + +} // namespace sys +} // namespace llvm diff --git a/llvm/lib/Support/UnicodeNameToCodepointGenerated.cpp b/llvm/lib/Support/UnicodeNameToCodepointGenerated.cpp new file mode 100644 index 000000000000..86e8378eceb1 --- /dev/null +++ b/llvm/lib/Support/UnicodeNameToCodepointGenerated.cpp @@ -0,0 +1,20911 @@ + +//===------------- Support/UnicodeNameToCodepointGenerated.cpp ------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements mapping the name of a unicode code point to its value. +// +// This file was generated using ./bin/UnicodeNameMappingGenerator. +// Do not edit manually. +// +//===----------------------------------------------------------------------===// + +/* +UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE + +See Terms of Use +for definitions of Unicode Inc.’s Data Files and Software. + +NOTICE TO USER: Carefully read the following legal agreement. +BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +TERMS AND CONDITIONS OF THIS AGREEMENT. +IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +THE DATA FILES OR SOFTWARE. + +COPYRIGHT AND PERMISSION NOTICE + +Copyright © 1991-2022 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in https://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that either +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, or +(b) this copyright and permission notice appear in associated +Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. +*/ + +#include "llvm/Support/Compiler.h" +#include +#include +namespace llvm { +namespace sys { +namespace unicode { +extern const char *UnicodeNameToCodepointDict; +extern const uint8_t *UnicodeNameToCodepointIndex; +extern const std::size_t UnicodeNameToCodepointIndexSize; +extern const std::size_t UnicodeNameToCodepointLargestNameSize; +const char *UnicodeNameToCodepointDict = + " _-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789OWER RIGHT CURLY BRACKET SECTIONM " + "LEFT MEMBER OF DOUBLE VERTICALPER BODY TILTING FROM HIP JOINTSFACE WITH " + "SYMBOLS COVERING MOUTHVED STEM PARAGRAPH SIGN ORNAMENTVE LESS-THAN ABOVE " + "SLANTED EQUAL KORANIC STOP SIGN ISOLATED FORMROFLEX CLICK WITH RETROFLEX " + "HOOKSWIRL BIRGA WITH DOUBLE ORNAMENTOWNWARDS HARPOON WITH BARB RIGHT " + "HORIZONTAL STROKES TO THE RIGHT LEFTWARDS TRIANGLE-HEADED " + "ARROWFT-POINTING ANGLE QUOTATION MARK LOWER HALF INVERSE MEDIUM " + "SHADERONT-TILTED SHADOWED WHITE ARROWDIFIER LETTER LABIALIZATION MARKDIC " + "KASHMIRI INDEPENDENT SVARITAMARK WITH LEFT RIGHT ARROW ABOVEOUBLE-LINE " + "EQUAL ABOVE LESS-THANL ARABIC LETTER TAH AND TWO DOTSLL BUT UPPER LEFT " + "QUADRANT BLACKRIGHT SEMICIRCLE WITH THREE DOTSLAR SIGN WITH OVERLAID " + "BACKSLASH CONTAINING SMALL WHITE TRIANGLEEN ARM ENDING IN ARROW POINTING " + "LAGAB TIMES U OVER LAGAB TIMES ULOWER LEFT CURLY BRACKET " + "SECTIONRIGHTWARDS TRIANGLE-HEADED ARROWTRIANGLE-HEADED RIGHTWARDS ARROW " + "DOWNWARDS EQUILATERAL ARROWHEAD DOWNWARDS TRIANGLE-HEADED ARROWER ARROWS " + "CIRCLING ANTICLOCKWISEER IGI SHIR OVER SHIR UD OVER UDER TAB NI OVER NI " + "DISH OVER DISHESS-THAN ABOVE DOUBLE-LINE EQUALETALLED BLACK AND WHITE " + "FLORETTEATHARVAVEDIC INDEPENDENT SVARITAAND MIDDLE RIGHT TO LOWER " + "CENTREWO DOTS ABOVE AND TWO DOTS BELOWWO DOTS OVER ONE DOT PUNCTUATIONS " + "VERTICALLY BELOW AND SMALL TAHTIMES ASH2 KU OVER HI TIMES ASH2AND " + "LEFTWARDS OPEN CIRCLE ARROWSTICAL BAR DOUBLE RIGHT TURNSTILENORMAL FACTOR " + "SEMIDIRECT PRODUCTD ARROW WITH TRIANGLE ARROWHEADSSEMICIRCULAR " + "ANTICLOCKWISE ARROWINTING DOWNWARDS THEN NORTH EASTHT-POINTING ANGLE " + "QUOTATION MARKHUR KAZAKH KIRGHIZ ALEF MAKSURA THIRD WHITE RIGHT POINTING " + "INDEX SHADOWED WHITE RIGHTWARDS ARROWIDE AND JOINED WITH " + "INTERSECTIONUPPER AND LOWER ONE EIGHTH BLOCKIGHTWARDS HARPOON WITH BARB " + "DOWNTER-THAN ABOVE DOUBLE-LINE EQUALH SUPERSCRIPT ALEF ISOLATED " + "FORMROXIMATELY NOR ACTUALLY EQUAL TOAISING BOTH HANDS IN CELEBRATIONIRECT " + "PRODUCT WITH BOTTOM CLOSEDTOP HALF DIVIDED BY VERTICAL BARGREATER-THAN " + "ABOVE SLANTED EQUALTOM-LIGHTED RIGHTWARDS ARROWHEADH HAMZA ABOVE WITH " + "ALEF MAKSURA H HORIZONTAL MIDDLE BLACK STRIPERONG CENTRALIZATION STROKE " + "BELOW TRIANGULAR THREE QUARTERS BLOCK TORTOISE SHELL BRACKET " + "ORNAMENTWNWARDS ARROW WITH TIP LEFTWARDSDED HIGH STOP WITH FILLED " + "CENTRETION SIGN WITH CIRCUMFLEX ACCENTS AND UPWARDS OPEN CIRCLE " + "ARROWSHAND WITH MIDDLE FINGER EXTENDEDOF UPWARDS TRIANGLE-HEADED " + "ARROWLEFTWARDS HARPOON WITH BARB DOWNED ARABIC-INDIC DIGIT FOUR " + "BELOWEDIUM SHADE AND RIGHT HALF BLOCKLE-LINE EQUAL ABOVE GREATER-THANARDS " + "ARROW ABOVE LEFTWARDS ARROW BAR AT END OF HORIZONTAL STROKEEDIUM SHADE " + "AND LOWER HALF BLOCKE TO MIDDLE LEFT TO LOWER CENTREED ARABIC-INDIC DIGIT " + "FOUR ABOVEED COMMA QUOTATION MARK ORNAMENTE-POINTED BLACK RIGHTWARDS " + "ARROWE CONTAINING BLACK SMALL LOZENGEARDROP-SPOKED PROPELLER ASTERISKE " + "SQUARED LATIN CAPITAL LETTER PLE COMMA QUOTATION MARK ORNAMENTUG2 OVER " + "TUG2 TUG2 OVER TUG2 PAPARDS HARPOON WITH BARB DOWNWARDS-POINTING ANGLE " + "BRACKET ORNAMENTRIANGLE-HEADED OPEN CIRCLE ARROW BETWEEN MIDDLE AND RING " + "FINGERSED UPWARDS EQUILATERAL ARROWHEAD-SHADOWED WHITE RIGHTWARDS " + "ARROWAISED HAND WITH FINGERS SPLAYEDETALLED OUTLINED BLACK " + "FLORETTEACK-TILTED SHADOWED WHITE ARROWTNAMESE ALTERNATE READING MARK " + "RINGS OVER ONE RING PUNCTUATIONRIGHTWARDS HARPOON WITH BARB UPAND MIDDLE " + "LEFT TO LOWER CENTREONE HUNDRED THIRTY-FIVE DEGREES CROSSING ASH OVER ASH " + "OVER ASHUPWARDS HARPOON WITH BARB RIGHTRING OVER TWO RINGS " + "PUNCTUATIONLEFTWARDS EQUILATERAL ARROWHEADIN WHITE CIRCLE IN BLACK " + "SQUAREMAKSURA WITH SUPERSCRIPT ALEF -HIRAGANA PROLONGED SOUND MARKSAD " + "WITH LAM WITH ALEF MAKSURADOWNWARDS AND RIGHTWARDS ARROWEFT SEMICIRCLE " + "WITH THREE DOTSGHT FOUR POINTED PINWHEEL STARDOT BELOW AND THREE DOTS " + "ABOVEAND JOINED BY DASH WITH SUBSETGREATER-THAN ABOVE EQUALS SIGNINDEX " + "THUMB CURVE THUMB INSIDEDIVIDED BY HORIZONTAL BAR AND EART EXCLAMATION " + "MARK ORNAMENTHT CENTRALIZATION STROKE BELOWON WITH RIGHTWARDS ARROW " + "ABOVEMODIFIER LETTER LEFT HALF RINGOPEN CENTRE EIGHT POINTED STARQAF WITH " + "LAM WITH ALEF MAKSURAHIGH-REVERSED-9 QUOTATION MARKMINTON RACQUET AND " + "SHUTTLECOCKAGGRAVATED INDEPENDENT SVARITAEXTENDED ARABIC-INDIC DIGIT " + "TEVERSED LUNATE EPSILON SYMBOLWITH RIGHTWARDS ARROW AT LEFTONAL INDICATOR " + "SYMBOL LETTER OVER RIGHTWARDS ARROW TO BARSUPERSCRIPT ALEF INITIAL " + "FORMNS-SERIF INTERROBANG ORNAMENTEFTWARDS HARPOON WITH BARB " + "UPSEMICIRCULAR PATH AROUND POLEDOWN MIDDLE THUMB INDEX CROSSDOWN HEAVY " + "AND RIGHT UP LIGHTCKED FACE WITH EXPLODING HEAD WITH REVERSED NEGATION " + "SLASHLIGHT FOUR POINTED BLACK CUSP DOWN INDEX THUMB HOOK MIDDLEDOT OVER " + "TWO DOTS PUNCTUATIONPUNCTUATION CHINOOK FULL STOPUP HEAVY AND RIGHT DOWN " + "LIGHTCONTAINING BLACK SMALL CIRCLEACE DIRECTION POSITION NOSE FTING POINT " + "RIGHTWARDS ARROWT LITTER IN ITS PLACE SYMBOLOUND-TIPPED RIGHTWARDS " + "ARROWISMILLAH AR-RAHMAN AR-RAHEEMDOWN HEAVY AND LEFT UP LIGHTUPWARDS AND " + "RIGHTWARDS ARROWRECTANGULAR PATH AROUND POLEEFT ARC GREATER-THAN " + "BRACKETMONOGRAMMOS TESSERA DODEKATASALTIRE WITH ROUNDED CORNERSBESIDE AND " + "JOINED WITH UNIONMIDDLE RING LITTLE CONJOINEDASTERISKS ALIGNED " + "VERTICALLYUP HEAVY AND LEFT DOWN LIGHTUPPER CENTRE TO MIDDLE RIGHTHREE " + "HUNDRED FIFTEEN DEGREESLEFTWARDS OF DOWNWARDS ARROWDOUBLE ANUSVARA " + "ANTARGOMUKHAHADED WHITE RIGHTWARDS ARROWU ALAYHI WAAALIHEE WA-SALLAMIBE " + "SYLLABLE BOUNDARY MARKEREDGE-TAILED RIGHTWARDS ARROWLIQUID MEASURE FIRST " + "SUBUNIT-FEATHERED RIGHTWARDS ARROWRIANGULAR ONE QUARTER BLOCKIMPERFECTUM " + "CUM PROLATIONE OUR BALLOON-SPOKED ASTERISKEAVY WHITE RIGHTWARDS ARROWIDE " + "ARC ANTICLOCKWISE ARROWIDE-HEADED RIGHTWARDS ARROWCIRCLE WITH NORTHWEST " + "ARROWBETWEEN TWO HORIZONTAL BARSHEAD MARK WITH MOON AND SUNZERO FOR ODD " + "POWERS OF FOURWO DOTS BELOW AND DOT ABOVEHANDED INTERLACED " + "PENTAGRAMLESS-THAN ABOVE EQUALS SIGNBRDA RNYING YIG MGO MDUN MABRDA " + "RNYING YIG MGO SGAB MARIGHT ARC LESS-THAN BRACKETUPPER MIDDLE LEFT TO " + "UPPER CONTINUOUS UNDERLINE SYMBOL AND LEFT SEMICIRCLE ARROWSTALIC LATIN " + "CAPITAL LETTER ONE LARGE AND ONE SMALL EYEENTATION FORM FOR VERTICAL " + "LARGE EQUILATERAL ARROWHEADEMICIRCULAR CLOCKWISE ARROWFINGER COVERING " + "CLOSED LIPSSTRUMENTAL NOTATION SYMBOL-PHARYNGEAL VOICED FRICATIVE BARREE " + "WITH TWO DOTS BELOWKATHAKA INDEPENDENT SVARITATWO HUNDRED SEVENTY " + "DEGREESDOUBLE PRIME QUOTATION MARKDOUBLE ANGLE QUOTATION MARKRIPLE " + "VERTICAL BAR OPERATOR DIVIDED BY HORIZONTAL RULEPPY PERSON RAISING ONE " + "HANDWALLPLANE SHOULDER HIP MOVELOWER MIDDLE LEFT TO LOWER FOUR FINGERS " + "CONJOINED BENTLOWER TONAL RANGE INDICATORLIGHT CENTRALIZATION " + "STROKEYAJURVEDIC MIDLINE SVARITAINDUSTRIAL STANDARD SYMBOLMEEM WITH HAH " + "WITH TATWEELDOTTED SUBSTITUTION MARKERCRIPT LIGATURE ET ORNAMENTSSIAN " + "ASTROLOGICAL SYMBOL ONOMICAL SYMBOL FOR URANUSOORPLANE SHOULDER HIP " + "MOVEHTORA SKLIRON CHROMA VASIS OR APPROXIMATELY EQUAL TOLANTED SOUTH " + "ARROW WITH HORIGHT PARENTHESIS ORNAMENTDOTTED LUNATE SIGMA " + "SYMBOLDROP-SHADOWED WHITE SQUAREMODIFIER FITZPATRICK TYPE-AND MIDDLE " + "FINGERS CROSSEDE ONE-WAY LEFT WAY TRAFFIC GAD OVER GAD GAR OVER GARLINE " + "FEED SEPARATOR SYMBOLRIPLE DOT PUNCTUATION MARKLEFTWARDS OF UPWARDS " + "ARROWTHREE DOTS ABOVE DOWNWARDSU REVERSED OVER U REVERSEDBLE TENNIS " + "PADDLE AND BALLERSTRASS ELLIPTIC FUNCTIONOCKED FEMALE AND MALE SIGN " + "WITHIN TRIANGLE ARROWHEADUNEVEN EYES AND WAVY MOUTH LESS THAN THE " + "DENOMINATORAND RIGHT ONE EIGHTH BLOCK NEGATED WITH VERTICAL BARJECT " + "REPLACEMENT CHARACTERMARRIED PARTNERSHIP SYMBOLIDEOGRAPHIC ITERATION " + "MARKOTATED FLORAL HEART BULLETALEF MAKSURA ISOLATED FORMORTHOGONAL " + "CROSSHATCH FILLWITH LEFTWARDS ARROW ABOVECLOCKWISE ARROW WITH " + "MINUSLLALLAHOU ALAYHE WASSALLAMCAT FACE WITH SMILING EYESOUTLINED " + "RIGHTWARDS ARROWINVERTED EXCLAMATION MARKBREVE WITH INVERTED " + "BREVEFECTIVENESS OR DISTORTIONOLD ASSYRIAN WORD DIVIDERMBINING " + "CRYPTOGRAMMIC DOTLEFT PARENTHESIS ORNAMENTREE-HUNDRED-AND-TWENTIETHSTROKE " + "AND TWO DOTS ABOVETERNION INTEGRAL OPERATORRIGHT DIAGONAL HALF BLACKRIPLE " + "BIRGA WITH ORNAMENTDOUBLE CANDRABINDU VIRAMAOUBLE BIRGA WITH ORNAMENT " + "WITH DOUBLE MIDDLE TILDERANCH BANK IDENTIFICATIONELD HOCKEY STICK AND " + "BALL WITH DOUBLE GRAVE ACCENTMULTIPLICATION SIGN BELOWNIVERSAL RECYCLING " + "SYMBOLLEFTWARDS ARROW WITH HOOKONE UNDER EIGHTEEN SYMBOLLOW QUILT SQUARE " + "ORNAMENTFFICULTY AT THE BEGINNINGBUT NOT ACTUALLY EQUAL TOTTED " + "SUBSTITUTION BRACKETTAB OVER TAB GAR OVER GARMEDIUM TRIANGLE ARROWHEAD " + "OVER NUN LAGAR TIMES SALRIST CIRCLE HITTING WALL WITH DOUBLE VERTICAL " + "BARCROSSING NORTH EAST ARROW WITH CIRCLED ONE OVERLAYCAT FACE WITH CLOSED " + "EYESDIAERESIS AND HOOK SYMBOLDRY MEASURE FIRST SUBUNITING ON THE FLOOR " + "LAUGHINGAND MALE AND FEMALE SIGNVOICED LARYNGEAL SPIRANTTEARDROP-SPOKED " + "ASTERISKTED INTERPOLATION MARKERUPRIGHT RECTANGULAR ZERORIGHTWARDS THEN " + "CURVING BLACK LENTICULAR BRACKETIGATURE OPEN ET ORNAMENTARROW POINTING " + "DIRECTLY BLIC ADDRESS LOUDSPEAKERCULINE ORDINAL INDICATORING FACE WITH " + "OPEN MOUTHMTAVRULI CAPITAL LETTER ARM CIRCLE HITTING WALL WELVE POINTED " + "BLACK STARLARGE TRIANGLE ARROWHEADLINE HORIZONTAL ELLIPSISORIZONTAL BAR " + "WITH NOTCHWITH UPWARDS ARROW ABOVEONE-HUNDRED-AND-SIXTIETHBUSINESS SUIT " + "LEVITATINGPERSCRIPT ALEF MOKHASSASCONSECUTIVE EQUALS SIGNSDESCENDING " + "MUSICAL NOTESGLOTTAL STOP WITH STROKEEYES AND HAND OVER MOUTHLICATION " + "PROGRAM COMMANDFINGER AND THUMB CROSSEDGREATER-THAN OR EQUAL TOISOSCELES " + "RIGHT TRIANGLEWITH CANCELLATION STROKEOTTOM SHADED WHITE " + "ARROWOTTOM-SHADED WHITE ARROWDIAGONAL CROSSHATCH FILLUPWARD POINTING " + "TRIANGLESINGLE-LINE NOT EQUAL TOSYLLABLE REPETITION MARKT BLACK " + "RIGHTWARDS ARROWMALL CIRCLE TO THE RIGHTSMALL ARABIC LETTER TAH DOUBLE " + "HORIZONTAL STROKE POINTING BACKHAND INDEXEQUAL TO OR " + "GREATER-THANINTERSECTION WITH SERIFSHEAVY BLACK HEART BULLETBERKANAN " + "BEORC BJARKAN BCOMPATIBILITY IDEOGRAPH-LEFT DIAGONAL HALF BLACKWO DOTS " + "VERTICALLY ABOVEDOWNSCALING FACTOR KIIZH OVER TOP SQUARE " + "BRACKETLY-RECYCLED PAPER SYMBOLE PLUS A PLUS SU PLUS NASTROKE THROUGH " + "DESCENDERPOINTING DOWNWARDS ABOVESHAPE WITH A DOT INSIDEIVE FINGERS " + "SPREAD OPENALGAMATION OR COPRODUCTCIRCUMFLEX ACCENT ABOVEININE ORDINAL " + "INDICATORLSCHREIBER PAUSE SYMBOLUPWARDS THEN NORTH WESTLEFT-SHADED WHITE " + "ARROWCLUSTER-INITIAL LETTER ALEF MAKSURA FINAL FORMMITIAN CONJUGATE " + "MATRIXISTED RIGHTWARDS ARROWSSING DIAGONAL CROSSING YELORUSSIAN-UKRAINIAN " + "ISOLIDUS BINARY RELATION WITH HALF-CIRCLE BELOWRIGHT HORIZONTAL SECANTUP " + "SPREAD THUMB FORWARDORIGINAL OF OR EQUAL TOPUNCTUATION END OF " + "TEXTVERTICAL BISECTING LINERIGHT DIAGONAL ELLIPSISORAH WITH NINE BRANCHES " + "POINTING AT THE VIEWERREE VARIATION SELECTOR WO-WAY LEFT WAY TRAFFICWHITE " + "FOUR POINTED CUSPHANKED RIGHTWARDS ARROWWESTERN PWO KAREN TONE-ESS " + "OUTLINED WHITE STARP WITH EXCLAMATION MARK HUNDRED TWENTY-EIGHTH BARBED " + "RIGHTWARDS ARROWRTOISE SHELL BRACKETED OMBINING ANUSVARA ABOVEATTACHING " + "VERTICAL OMETDOT BELOW AND DOT ABOVEAVOURING DELICIOUS FOODRAISED " + "OMISSION BRACKETPA OVER PA GAR OVER GARGREEK SMALL LETTER IOTAASCENDING " + "MUSICAL NOTESIDE ARC CLOCKWISE ARROWAND WOMAN HOLDING HANDSRIGHT-POINTING " + "TRIANGLEOVER RIGHTWARDS HARPOON CAKE WITH SWIRL DESIGNZANTINE MUSICAL " + "SYMBOL IGHT-SHADED WHITE ARROWHT TRIFOLIATE SNOWFLAKEOVERLAPPING LOGICAL " + "ANDHREE POINTED BLACK STARARTY HORN AND PARTY HATCURRENT SYMBOL FORM TWO " + "ROTATED NINETY DEGREESUBLE VERTICAL BAR BELOWDOWNWARDS THEN CURVING " + "ARABIC LETTER TAH ABOVEANG DEPARTING TONE MARK WITH DECORATIVE COVEROVER " + "NU11 BUR OVER BUROVER LEFTWARDS HARPOONUIGHUR KIRGHIZ YEH " + "WITSYMPTOTICALLY EQUAL TOOVER SHIR BUR OVER BURCONSONANT MODIFIER " + "BARDOMAIN ANTIRESTRICTIONND RECORDING COPYRIGHTTRIPLE VERTICAL " + "STROKEUPPER RIGHT AND LOWER DOUBLE SOLIDUS OVERLAYLATIN CAPITAL LETTER " + "SLONG HORIZONTAL STROKERIGHT-POINTING FLEURONQUESTION MARK ORNAMENT WITH " + "THREE DOTS ABOVEUBSCRIPT SMALL LETTER LOW PARAPHRASE BRACKET WITH SINGLE " + "ZAPYATAYAPUNCTUATION KUNDDALIYAUPPER ONE EIGHTH BLOCKARMENIAN ETERNITY " + "SIGNDOUBLE VERTICAL STROKEPRECEDED BY APOSTROPHEPOINTING UPWARDS " + "BELOWKEEPING STILL MOUNTAINTWO HORIZONTAL STROKESPERSET OF NOR EQUAL " + "TODOUBLE-LINED HEAD MARKMNYAM YIG GI MGO RGYANEAST-POINTING AIRPLANEIGEL " + "LONG-BRANCH-SOL SDOWNWARDS ZIGZAG ARROWACKSLANTED SOUTH ARROWRECTILINEAR " + "BLACK STARI YFESIS TETARTIMORIONREE-CIRCLE ALTERNATE IDOWN-POINTING " + "TRIANGLEHEXIFORM LONG ANUSVARANOT INCLUDING THE POLESHORT VERTICAL " + "STROKES SYMBOL FOR LIGHTHOUSEUSTOMER ACCOUNT NUMBERIN DEPARTING TONE " + "MARKDRESSED TO THE SUBJECTSHORT RIGHTWARDS ARROWLEFT TRIANGLE " + "OPERATORALEF WITH LAM WITH YEH RIGHT ARROWHEAD ABOVEING HEAD IN " + "SILHOUETTEHORT HORIZONTAL STROKEINDIRECT QUESTION MARKSEMI-VOICED SOUND " + "MARKCURLY BRACKET ORNAMENTCJK UNIFIED IDEOGRAPH-TRIPLE RIGHT " + "TURNSTILEYIAKENG PUACHUE HMONG WITH CIRCUMFLEX ABOVEWITH HORIZONTAL " + "STROKECONSONANT SIGN MEDIAL ROUND A POINT OPERATORWITH JEEM INITIAL " + "FORMWASALLAM ISOLATED FORM-ROTATED DIVISION SIGNRROW WITH ROUNDED " + "HEADGREATER-THAN DIAERESISWITH VOICED SOUND MARKLE BESIDE VERTICAL " + "BARINVERTED SMALL V BELOWINVERTED SMALL V ABOVE OVER STAMPED ENVELOPEBAR " + "ABOVE INTERSECTIONREASE FONT SIZE SYMBOLARD SHELL FLOPPY DISKDOWNWARDS " + "ARROW ABOVEACUTE AND HOOK SYMBOLEFT-POINTING TRIANGLE-SHAPED BAG " + "DELIMITEREFT OPEN BOX OPERATORDOWN HORIZONTAL LIGHTEFT HORIZONTAL " + "SECANTDOWN HORIZONTAL HEAVYYIG MGO TSHEG SHAD MA-ROUND NOTEHEAD DOWN " + "ABOVE SHORT DOWN TACKAKIA TELOUS ICHIMATOSINVERTED GLOTTAL STOPINVERTED " + "BRIDGE BELOWDELIMITER TSHEG BSTARHALF TRIANGULAR COLONHAND INTERIOR " + "PRODUCTWO-CIRCLE ALTERNATE IWO-CIRCLE NUKTA ABOVEINTERSECTION " + "OPERATORINTERSECTING LOGICAL TILDE OPERATOR ABOVE GRUENT WITH DOT " + "ABOVEHOCKEY STICK AND PUCKHORIZONTAL TABULATIONHOUSAND MILLIONS SIGNTHICK " + "LETTER SELECTORCTOR OR CROSS PRODUCTCRUCIFORM NUMBER FOURTEEN POINTED " + "ASTERISKCROSSE STICK AND BALLXTRA SHORT VOWEL MARKFINAL CONSONANT SIGN " + "EIGHT SPOKED ASTERISKELATIONAL COMPOSITIONVOICED ITERATION MARKDOUBLE " + "LEFT TURNSTILEEQUAL TO OR LESS-THANER RIGHT CORNER ANGLEALLING DIAGONAL " + "SLASHLATTENED OPEN A ABOVEFLATTENED PARENTHESISDIGRAMMOS EX " + "DODEKATATRIANGULAR HALF BLOCKWITH INVERTED V ABOVEGHT OPEN BOX " + "OPERATORTOUCHING INSIDE MOUTHGRAMMOS OKTO DODEKATAARKENING OF THE " + "LIGHTVERY HEAVY BARB ARROW WITH VERTICAL STROKE AND SLANTED PARALLELSH " + "AMPERSAND ORNAMENT WITH SHORT RIGHT LEGAND VOWEL LENGTH MARKPAP PLUS PAP " + "PLUS LU3RATING SYSTEM COMMANDVERTICAL LINE OVERLAYBOTTOM U-SHAPED ARROWND " + "TELEPHONE RECEIVERRISING DIAGONAL SLASHMORPHOLOGICAL DIVIDERSHORT " + "LEFTWARDS ARROWMIDDLE RING LITTLE ONSIDE TO SIDE SCISSORSMALE WITH STROKE " + "SIGNBUT NOT EQUIVALENT TOARYSTIAN FIVE HUNDREDQUADRANT CIRCULAR ARCRELICT " + "HOUSE BUILDINGREVERSED FEATHER MARKLETTER SMALL CAPITAL OP SHADED WHITE " + "ARROWOCAL NOTATION SYMBOL-OPPOSING AN PLUS NAGABESIDE RIGHT " + "TRIANGLENTISTRY SYMBOL LIGHT OHAMMAD ISOLATED FORMLESS-THAN OR EQUAL " + "TOWITH SOROCHYA NOZHKAHAR2 TIMES GAL PLUS RUMAI PALAUNG TONE-5HALF CIRCLE " + "WITH DOTPLUS GISH TIMES TAK4VAL WITH OVAL INSIDEINSIDE MOUTH RELAXEDINING " + "OBLIQUE STROKEDOUBLE ANGLE BRACKETCRESCENT MOON SYMBOLGRA GCAN -CHAR " + "RTAGSENARMONIOS ANTIFONIAA- SHOG GI MGO RGYAN OVER TUR ZA OVER " + "ZAUBHAANAHU WA TAAALAAONE MARK SGAW KAREN INVERSE WHITE CIRCLEINVERTED " + "CANDRABINDU OVER LAGAR GUNU SHEAND NORTH EAST ARROWWET CULTIVATION " + "SIGNSIDEWAYS NOON GHUNNAONCAVE-SIDED DIAMONDBSET OF NOR EQUAL TODOUBLE " + "DOT TONE MARKPOTABLE WATER SYMBOLSINGLE DOT TONE MARKIRCLES HITTING WALL " + "HREE-DOT NUKTA ABOVEFOUR RAISED KNUCKLESBETWEEN PALM FACINGSANGE " + "ANTIRESTRICTIONCURRENCY SYMBOL RIELTRANSPOSITION MARKERSEPARATOR MIDDLE " + "DOTSEPARATOR KEY SYMBOLFORMS LIGHT VERTICALOVER LEFTWARDS ARROWTHROUGH " + "SMALL CIRCLENIS RACQUET AND BALLWITH FOUR DOTS ABOVESCRIPTION CHARACTER " + "CURVED ANGLE BRACKETHORIZONTAL BAR WITH OTLESS J WITH STROKEFINAL " + "CONSONANT MARKMULTIPLE PUNCTUATIONINDEX RING LITTLE ONUP-POINTING " + "TRIANGLEAND NORTH WEST ARROWDOTLESS HEAD OF KHAHIMAGE OF OR EQUAL " + "TOGHTWARDS ARROW BELOWEVERSED ROTATED RANAAND SOUTH EAST ARROWAND SOUTH " + "WEST ARROWFIVE SPOKED ASTERISK79 OVER LAK-079 GUNULEFT-TO-RIGHT " + "SECANTHIGH RATHA OR LOW PAWORD REPETITION MARKHIGH TONE APOSTROPHEE " + "CONSONANT MODIFIERCONSONANT SIGN HAARULEFT AND LOWER RIGHTCENTRE VERTICAL " + "LINERIGHT QUADRANT BLACKRIGHT-POINTING ANGLEJUDEO-SPANISH VARIKAKHAMTI " + "REDUPLICATIONARXIS KAI FTHORA VOUREAN STANDARD SYMBOLYRENAIC TWO " + "DRACHMASLATALIZED HOOK BELOWRIGHT U-SHAPED ARROWLE WITH POPPING " + "CORKWARE-FUNCTION SYMBOLLASHING SWEAT SYMBOL WITH HORIZONTAL BARL " + "FUNCTIONAL SYMBOL CHEMICAL SYMBOL FOR AND DIAGONAL STROKESTAR WITH " + "MIDDLE DOTCHARACTER INTRODUCERDOWN ARROWHEAD BELOWEMESTVENNY ZADERZHKA " + "BEGIN LOGOGRAM MARKREVERSED ONE HUNDREDRIGHT ANGLE WITH DOTYIG MGO PHUR " + "SHAD MA ABOVE LEFT TRIANGLEOW-9 QUOTATION MARK WITH STRIKETHROUGHGIBBOUS " + "MOON SYMBOLTHANG LONG ANUSVARALEADING MCHAN RTAGSVARIATION INDICATORSEVEN " + "EIGHTHS BLOCKNETWORKED COMPUTERSKULL AND CROSSBONESLANTED EQUAL ABOVE " + "VASTNESS OR WASTINGAHU ALAYHI WA-AALIHNE HUNDRED TWENTY PNDRED POINTS " + "SYMBOLRROW NO-BREAK SPACEIGATURE AYIN-DALETHSH PLUS HU PLUS ASHFLOORPLANE " + "TWISTINGRATUM SUPER STRATUMOTATED ARDHAVISARGAWOMEN HOLDING HANDSBETWEEN " + "MIDDLE RING WITH VERTICAL TAILDOWN POINTING INDEXTIGHTLY-CLOSED " + "EYESALTERNATE LAKH MARKD CIRCUMFLEX ACCENTVARIANT WITH SQUARENOGRAPHIC " + "FULL STOPGAPPED CIRCLE ARROWUP HORIZONTAL LIGHTLF MADDA OVER " + "MADDAREE-QUARTER CIRCLE NORTH ARROW WITH HOANSPOSITION BRACKETSEQUENCE " + "INTRODUCERARENTHESIS NOTEHEADHORT STROKE OVERLAYVERTICAL TABULATIONOVER E " + "NUN OVER NUNTRANNO MALO POVYSHEUP HORIZONTAL HEAVY AND " + "PROSGEGRAMMENIVARIANT FORM ILIMMUFT-POINTING FLEURON LOVE YOU HAND " + "SIGNHURISAZ THURS THORN AND RETROFLEX HOOKARTIAL DIFFERENTIALLEFT " + "POINTING INDEXTO LOWER RIGHT FILLQUESTION MARK ABOVECIRCLED SANS-SERIF " + "HAND COVERING MOUTHWITH YEH FINAL FORMET WITH WHITE CROSSLEFT TO LOWER " + "RIGHTATED TELLER MACHINERIGHT TO LOWER LEFTINSIDE CIRCLE BELOWCIRCLED " + "WHITE ARROWRY CULTIVATION SIGNURRENCY SYMBOL BAHTITED LIABILITY SIGNVERSE " + "FINAL BARLINEUBLE DOT WITHIN DOTVERSAL INTERSECTIONISPUTED END OF AYAHOP " + "SEMICIRCLE ARROWDENOMINATOR SIXTEENLEFT U-SHAPED ARROWQUADRUPLE " + "CRESCENTSA END LOGOGRAM MARKSYMBOL FOR BEGINNERPREFIXED NASAL SIGN " + "FLUTTERING IN WINDC DIGRAPH WITH CURLSTRAIGHT THUMB BENTRIGHT MIDDLE " + "STROKETWENTY-FIVE DEGREESSTRATIAN FIFTY MNASIN CHEN SPUNGS SHADTURNED " + "SECTION MARKTURNED PADA PISELEH KASKAL U GUNU DISHEVEN POWERS OF FOURDOWN " + "AND HORIZONTALIMIDIA SEXTULA SIGNPARAGRAPH SEPARATORARABIC FORM " + "SHAPINGILDING CONSTRUCTIONHEAD-SHAPED POINTERNAXIAN FIVE HUNDREDFIVE " + "FINGERS SPREAD IN A RECTANGLE BOXLUB-SPOKED ASTERISKMSHELL MOBILE " + "PHONETART OF RUB EL HIZBANS-SERIF CAPITAL LING SHIRT WITH SASHSLANTED " + "NORTH ARROWMOVES AGAINST CHEEKRAILING MCHAN RTAGSWEST POINTING LEAF OVER " + "INVERTED SHUGGLY VERTICAL LINEUM WITH DRUMSTICKSWITH STROKE SYMBOLTO " + "LOWER LEFT FILLBAARAKA WA-TAAALAATOP U-SHAPED ARROWGISH CROSSING " + "GISHASTROLOGICAL SIGN PERFIXED LETTER RAATIN SMALL LETTER RIST CIRCLE " + "FRONT EVERSED CHELYUSTKAABBREVIATION MARK EVENTEEN FULL STOPATERRESTRIAL " + "ALIENTYPE A ELECTRONICSARROW SHAFT WIDTH WHITE VERTICAL BAR FOR " + "SIMALUNGUN SAU-SHAPED ORNAMENTSQUARTER NOTE STEM ERTICAL BAR " + "VIRAMAEPIGRAPHIC LETTER DOUBLE PUNCTUATIONPUNCTUATION BINDU ENTY-TWO " + "POINT TWOENTERING TONE MARKASTED SWEET POTATOVARIANT FORM LIMMUGATIVE " + "ACKNOWLEDGEWITH JUSTIFICATIONDOWN-OUTPUT SYMBOLOTLESS DALATH RISH NOT " + "LITTER SYMBOLOU ALAYHE WASALLAMOUCHTONE TELEPHONE AND NO DOTS ABOVEORK ON " + "THE DECAYEDEAST POINTING LEAFTROFLEX HOOK BELOW AND SMASH PRODUCTOW TONE " + "APOSTROPHEFORTY-FIVE DEGREESFORKED PARAGRAPHOSVERY SMALL DIAMOND AND " + "YPOGEGRAMMENIFIVE EIGHTHS BLOCKPACING CANDRABINDU WITH KAVYKA " + "ABOVEIGATURE ZAYIN-YODHJEEM ISOLATED FORMYLLABLE LENGTHENER WITH FLOWING " + "SANDSET OVER BUILDINGSKANTAJA NAASIKYAYACUP WITHOUT HANDLEKBAR ISOLATED " + "FORMSEPTUPLE CRESCENTSHUNDREDS UNIT MARKNINETEEN FULL STOPCTLY EQUIVALENT " + "TOUPPER MIDDLE RIGHTHOUSANDS SEPARATORNISH VERSE DIVIDERNITE PART " + "INTEGRALHORIZONTALLY BELOWSMALL CIRCLE ABOVEKOREAN CHARACTER ONORMAL " + "SUBGROUP OFCANTILLATION SIGN HOLDING BACK TEARSLOWER MIDDLE RIGHTCOPPER " + "ANTIMONIATEAND LOW RIGHT RING THUMB INDEX THUMBCONTINUING " + "OVERLAPMATHEMATICAL SPACESINGLE PUNCTUATIONINDEPENDENT VOWEL IN " + "POSSESSION SIGN WITH CIRCLE ABOVEITAN SMALL SCRIPT WITH CIRCLE BELOW " + "WITH CROSSED-TAILSHAN REDUPLICATIONBOTTOM RIGHT KASRAIGSAW PUZZLE PIECEIX " + "SPOKED ASTERISKSYMMETRIC SWAPPING SPREAD THUMB SIDEUP ARROWHEAD " + "BELOWTILTING FROM WAISTYPTIAN HIEROGLYPH NYOOGA NAAKSIKYAYABASELINE ROUND " + "DOTHAIS LUS NTOG NTOGS PRESSED TOGETHERNYET THYOOM TA-ROLHILOSOPHERS " + "SULFURSMALL RED TRIANGLERYUKOVAYA SVETLAYALEFT MIDDLE STROKEUTLINED BLACK " + "STARLOSED CIRCLE ARROWLEFT-STEM TONE BARS INSIDE AND ABOVESOUL ISOLATED " + "FORMVOCALIZATION MARK WITH BULLET NOSEA PLUS HA PLUS DAPUNCTUATION SIGN " + "ALTERNATE NUMBER BUT RELIEVED FACECONSONANT SIGN PA-GAAHLAA TTUDDAAGAMBDA " + "WITH STROKEAPLI DYO DODEKATALAGOLITIC LETTER WHITE PARENTHESISDELPHIC " + "FIVE MNASINVERTED MCHU CANYEH ISOLATED FORMCONTOURED OUTLINESIGN O WITH " + "CROSSPRECEDING SOLIDUS ALTERNATION MARKASTERN PWO KAREN MEEM INITIAL " + "FORMPRESSIONLESS FACEPRIZNAK MODIFIER MEDIUM BARB ARROWCIRCLES WITH " + "DOTSCONTINUATION SIGNWHITE SHOGI PIECERIATION SELECTOR-CANDRABINDU " + "ABOVEEAR SCREEN SYMBOL WITH TILDE ABOVEABBREVIATION SIGNKE BOTTLE AND " + "CUPKHAH INITIAL FORMLAPPING LESS-THANSTRAIGHT MOVEMENT AND PALATAL " + "HOOKREATIONAL VEHICLEAMPHYLIAN DIGAMMARIGHT HALF CIRCLEVERY SMALL " + "SQUARECLOSED LITTLE YUSCOMBINING NUMBER LAH ISOLATED FORM WITH SOUND " + "WAVESULAR MEDIUM SHADESQUARED TIMES KURLHOUETTE OF JAPANMANENT PAPER " + "SIGNEMICOLON UNDERBARMALL WHITE CIRCLELIAN HIEROGLYPH ALD PERMIC LETTER " + "URNED DAMMA BELOWURNED COMMA ABOVEQUAT REVERSED ESHCAL SYMBOL BOTTOMAEUM " + "ONE PLETHRON0 WHEELED CHARIOTCANCELLATION MARKTRIPLE DASH ARROWHIRTEEN " + "FULL STOPVARIANT FORM IMINVRE TOURNOIS SIGNTHREE SOUND WAVESUP POINTING " + "INDEXVARIANT FORM USSUHORIZONTAL DOUBLEHORIZONTAL SINGLEGENERIC " + "MATERIALSOURTEEN FULL STOPNG STROKE OVERLAYNFORMATION SOURCEFROM SMALL " + "CIRCLEFRACTION ONE HALFBOTTOM HALF BLACKIASTRE MARK ABOVESERVER EYE " + "SYMBOLICTED LEFT ENTRY-NEGATIVE CIRCLED IDEOGRAPHIC COMMA OVER ZU PLUS " + "SARHAH ISOLATED FORMUP AND HORIZONTALRYBLION BASE SIGNVARIANT FORM " + "ASH9TONAL RANGE MARK ONE EIGHTH BLOCK-DENTAL PERCUSSIVEBE WITH " + "MERIDIANSGREATER-THAN SIGNGREATER-THAN NOR BRIGHTNESS SYMBOLBERBER " + "ACADEMY YAS REVOLVING LIGHTHEART-SHAPED EYES PLUS SHA3 PLUS AOPEN-HEADED " + "ARROWWO VERTICAL DOTS WITH NOT EQUAL TOTIAL ARTS UNIFORMING POLE AND " + "FISHFACING BABY CHICKVEE WITH UNDERBARY ON BLACK SQUAREAUKAZ LAGU LOGR " + "LATHERING TOGETHERINEAR ANNOTATION TARTING FROM SIGNNE EYEBROW " + "RAISEDPINWHEEL ASTERISKINITIAL LETTER RAMILITARY AIRPLANEVERAGE WITH " + "SLASHTAN ISOLATED FORM GRAVEYARD SYMBOL TO BLACK DIAMONDAND BLACK " + "SQUARESOWER NUMERAL SIGNIGHTEEN FULL STOP LAGAB TIMES ASH2NASALIZATION " + "MARKFINGER-POST ARROW LAGAR OVER LAGARTERSYLLABIC TSHEGNAUDIZ NYD NAUD " + "NTEN THOUSAND SIGNBRACKET EXTENSIONFLICK ALTERNATINGCTION " + "APPLICATIONCROSS PUNCTUATIONVARIANT FORM ESHCH WITH UMBRELLAARENTHESES " + "ABOVEDOUBLE TURNSTILEDITORIAL CORONISVERY HEAVY SHAFTDOUBLE DOT " + "ABOVECONSONANT JOINERVIEWING CEREMONYBOTTOM HALF RINGCORNER " + "DOWNWARDSDOUBLE CRESCENTSAFFRICATION MARKUPERSCRIPT ALAPHUP-OUTPUT " + "SYMBOLCOMPRESSED ARROWANABAZAR SQUARE UPPER OVER LOWERVOWEL LENGTHENERUP " + "MIDDLE HINGEDDOWN RIGHT BARB BOLD GREEK CROSSDEWAYS U BRACKETDOUBLE " + "ZAPYATAYAB2 TENU PLUS TABDOTTED CRESCENTSCASIAN ALBANIAN DOUBLE HEAD " + "MARKCREAMING IN FEARCORNER LEFTWARDSIFTEEN FULL STOP LIGHT MOON " + "ARTASERIFS AT BOTTOMNION WITH SERIFSHYPHENATION MARKSMALL NOON ABOVEIDED " + "GREEK CROSSORIZONTAL JOINERIGHTH NOTE STEM IMENSIONAL ANGLEINDEPENDENT " + "SHININDEX THUMB SIDEHIGH SPACING DOTMAGNIFYING GLASSRISING TONE MARK " + "SMALL ROTATIONS INSERTION POINTRIZONTAL ELLIPSEINES CONVERGING HMATULLAH " + "ALAYHESLANTED EQUAL TOSMALL CAPITAL ELHOLDING TOGETHERPEN CENTRE " + "CROSSLTERNATE HASANTALOWER OVER UPPERSTUCK-OUT TONGUESTRING " + "FRETBOARDSTRAIGHT STRETCHSTICKING OUT FARSTERISK OPERATOR PLUS KAK PLUS " + "AADIAN SYLLABICS K PERMITTED HEREO-MINOAN SIGN CMLD ASSYRIAN ONE LEFT " + "HALF CIRCLELEFT ARROW ABOVENTAIGANA LETTER SANS-SERIF ARROW OR THE IMAGE " + "OFYATHOS BASE SIGNLLOW PAN OF FOODTAKANA-HIRAGANA IPPER-MOUTH FACEIRCLE X " + "NOTEHEADLIGHT BARB ARROWLIGHT AND RIGHT ISTOS DISC SIGN OLD WHITE " + "CIRCLEIVE POINTED STAROLD TAMIL VIRAMAYIR MKPARAQ MEUNEPSILON " + "UNDERBARUDLY CRYING FACEEN MILLIONS SIGNRIGHT DOWN BARB END OF TEXT " + "MARKUBJOINED LETTER ENTRE WHITE STARENUMERATION SIGNERCURY SUBLIMATERAYS " + "AND DOTTED RIGHT HALF BELOWRIGHT HALF BLACKMIDDLE AND RIGHTMIDDLE AND " + "BELOWRAIDO RAD REID R TIMES GAN2 TENUUMBER SIGN ABOVEDVUMYA ZAPYATYMI " + "TIMES DISH TENUSHU2 PLUS KASKALRESH-AYIN-DALETHREPETITION MARK-WAVY HAMZA " + "BELOWE PLUS GAN2 TENUPLE MEASURE REST AND HEAVY RIGHTULDERED OPEN " + "BOXECIMAL SEPARATOR AND LIGHT RIGHTEFORE COMPLETIONRECORD SEPARATORWITH " + "HEARING AIDWITH CENTRED DOTSIGN RISING TONE WITH BUNNY EARSWITH LEFT " + "UPTURNPRECEDING SUBSETQUALS SIGN BELOWWITH HAMZA ABOVEQ WITH HOOK " + "TAILTRIPLE CRESCENTSSITION INDICATORPRECHGESANG STEMNAL DIGIT " + "SHAPESEVERSED VISARGA EVERY OTHER TIMEMESTVENNY KLYUCHPLACEHOLDER MARKR " + "PLUS GAN2 TENUFALLING DIAGONAL WITH DOT INSIDEPOSTPOSITION MENFFERENCE " + "BETWEEN CAPPED MOUNTAINFLOORPLANE SPACEND OF PARAGRAPHMURDA " + "MAHAPRANABINDING BRACKETNASALIZED TONE-N-ARY SUMMATIONUSTER NOTEHEAD " + "BLOCK DIAGONAL NOON WITH KASRANOON FINAL FORMNO GOOD GESTURENJOINING " + "MACRONNA DOUBLE HELIXRIGHT RERENGGANATINATE MYSLITEPERTHO PEORTH PPLUS " + "SIGN BELOWATA LINK ESCAPEPRISHTHAMATRA EPUT SYMBOL FOR RIGHTWARDS " + "TICKRIGHTWARDS AND QUADRUPLE ARROWQUADRUPLE DASH R WITH FISHHOOKPENSION " + "RAILWAYRIGHT HALF RINGVERTICAL SECANTREAMY EYEBROWS RECEPTIVE " + "EARTHRECITATIVE MARKREVERSE SOLIDUSREVERSED OPEN EGHT REPEAT SIGNON TOP " + "OF MODEMNVERTED UBADAMASALTER PAHLAVI BENT OVER INDEXBELOW LONG " + "DASHBELGTHOR SYMBOLODO SOFT HYPHENS IN SILHOUETTES ELEVATUS MARKOGOGRAM " + "KHAMTI BAR ABOVE UNIONOLIDUS OPERATORNOT APPROXIMATEOND PLACE " + "MEDALONJOINED HINGEDONTOUR INTEGRALORIZONTAL COLONORT EQUALS SIGNOUBLE " + "BACKSLASHOW-FALLING TONEOWER HALF BLACKRNAMENT STROKE-RMAN PENNY SIGNPEN " + "SQUARED DOTTOP RIGHT FATHADOING CARTWHEELFOUR DOTS WITH FOUR " + "ENCLOSURESFRACTION DIGIT FTER COMPLETIONDIGA AELA-PILLADIALYTIKA " + "TONOSTRIANGULAR MARKDI ALLAHOU ANHUGEMINATION MARKGGLY LINE " + "BELOWDESCENDING TONEFORWARD TILTINGGROUP SEPARATORHAKING PARALLELHALF " + "FILL SPACETIP ON THE LEFTHEH MEDIAL FORMTILDE DIAERESISTHROWING A " + "KISSDAGESH OR MAPIQHOOKED INDEX UPTHREE DISH TENUHORIZONTAL " + "DASHHORIZONTAL FILLEH INITIAL FORMDOWNWARDS TRENDUMAI PALAUNG FAE " + "ISOLATED FORME MUSICAL NOTESE OVER INFINITYDOWN SEQUENTIALULTIPLICATION " + "XUGMENTATION DOTEFT REPEAT SIGNEFTWARDS ARROWSDOUBLE TRIANGLEUBLE RING " + "BELOWERICAN FOOTBALLESIDE LESS-THANU PLUS U PLUS UESSARON CHRONONETIC " + "VERSE SIGNTWO WITH STROKEEXPONENT SYMBOLTVIMADUR SYMBOLLONG VOWEL SIGNLD " + "TAMIL SHORT LEFT DOWN BARB LEFT HALF BELOWLEFT HALF BLACKCIRCUIT-OUTPUT " + "LEFT HAND INDEXLETTER CAPITAL LEVEL TONE MARKLEVEN FULL STOPLIGHT AND " + "LEFT LMOST EQUAL TO UR POINTED STARLONG HOOK BELOWCKET CALCULATORLOOK OF " + "TRIUMPHLOSED INSULAR GCAPITAL LETTERSSIXTEENTH NOTESMALAKON CHROMA " + "MARRYING MAIDENMEEM FINAL FORMBROWS STRAIGHT BREAKING HYPHENMIDDLE " + "DIAGONALSHORT OVER LONGINVERTED STROKEHOUSAND STATERSHREE DOTS " + "BELOWIAMOND UNDERBARIDING ENCLOSUREIGN PALI VIRAMAIMISEOS " + "CHRONOUIMPERFECTA RESTING SYMBOL FOR CORNER WITH DOTINGLE HEAD MARKINUS " + "SIGN BELOWINVERTED LAZY SSHITA PLUS GISHIRCUMFLEX BELOWTAI LAING TONE-ITH " + "FINGERNAILSIZED WHEELCHAIRSTROKE NOT SIGNKISIM5 TIMES BISTERESIS SYMBOLST " + "SYRIAC CROSSST QUARTER MOONSSICAL BUILDINGCLOSED BY CURVELATION " + "FUNCTIONXTEEN FULL STOPAMARITAN SOURCE WITH DESCENDER CORNER " + "BRACKET-CARRIER LETTERZAIN FINAL FORM OVER SIG4 SHU2 NEPOSTOYANNAYA OVER " + "MOUNTAINSVOWEL SEPARATORZERO WITH SLASH TOUCHING INDEX THUMB STRAIGHT " + "CLOUD AND RAINYNCHRONOUS IDLE TIMES IGI GUNU WITH RIGHT LEGVOWEL " + "SHORTENERWITH DOWN ARROWACHES THE LIMITWITH RAIN DROPSAI LAING DIGIT " + "OPERATOR WITH ALMOST EQUAL TOWHITE DOT RIGHTWALLPLANE SPACE PLUS HI PLUS " + "A-PIECE SWIMSUIT THROUGH CIRCLE AND LOWER LEFTAMOUNT OF CHECK DEYTEROU " + "ICHOU WITH DIAERESIS ALTERNATE FORM-NO-EVIL MONKEY PARESTIGMENON ALIF " + "LENGTHENER2 CHARIOT FRAMEALAYHE ASSALLAMAND PARALLEL TOBLACK " + "TRIANGLEBLADE SCISSORSPARATED SYMBOLD-UP NEWSPAPERPARTMENT STOREFORWARD " + "INDEX INOLOGICAL DOTMOTHETIC ABOVEFINAL ANUSVARAAND COLD SWEATINVERTED " + "BIRGASEL LOCOMOTIVEUP RIGHT BARB OVER GUD LUGALINSERTION SIGNVRON " + "SNOWFLAKESEPARATOR MARKING HANDS SIGNSMALL TRIANGLEUSPENSION MARKDASIA " + "PNEUMATAINFINITY BELOWPAO KAREN TONESHESHIG TIMES IGHTWARDS VANEUNIT " + "SEPARATORTRIANGLE WITH XO EKFONITIKONTERMINAL MARK-UNION OPERATORDI " + "ALLAAHU ANHWITH LEFT HOOKPPED MIDDLE UPDEYTEROS ICHOSDIAGONAL " + "MOUTHTETARTOS ICHOSDIAGONAL PATH PROTECTED AREAMRACHNOTIKHAYARING " + "MEGAPHONEGERED TREMOLO-BAG MEMBERSHIP HASER FOR VAVWITH DOT BELOWPEN MARK " + "BELOWSMALL LETTER JLOTUS POSITIONSMALL LETTER DBHATTIPROLU AAANGLE " + "OPENING SHAN MEDIAL WAPLE WITH HEARTPLETE INFINITYLOWER DIAGONALPLITTING " + "APARTED SYMBOL FOR IKHAYA PUTNAYATELPIECE CLOCKWITH FATHATAN CERTAINTY " + "SIGNENDED MULTIMAPLEFTWARDS AND CRIFICIAL WINEYOUTHFUL FOLLYEND OF " + "SECTIONONE SOUND WAVELEFTWARDS TICKTWO WHITE DOTSSTRONG ISOLATEENNA WITH " + "BARSCEPTER OF JOVECENTURIAL SIGNOOTNOTE MARKERTWO ENCLOSURESLESS-THAN NOR " + "-HEADED ARROW SPEECH BUBBLESEMIVOWEL SIGN ALLAJALALOUHOUCOLON OPERATORUAL " + "WHEELCHAIRSQUIGGLE ARROWOBLIQUE HYPHENERIAL ARAMAIC ERIC " + "INDICATOREPENTHETIC YUTLETTER OVERLAPNYI ZLA NAA DAUBHAYATO MUKHAERTICAL " + "JOINEROLD RESOLUTIONALF TREE TRUNKVONIC ASTERISKLACE OF SAJDAHLITTLE " + "SECTIONOT TILDE ABOVELIGHTLY SMALL UPPED INDEX UPOTHERS CIRCLEDTURKIC " + "LETTER FATHATAN ABOVEISED ROUND DOTSECOND SUBUNITLINE EXTENSION1 OVER " + "LAK-081ROSS ON SHIELDIRCULAR VIRAMAFFED FLATBREADFFICE BUILDINGOUR OBOLS " + "SIGNSMOKING SYMBOLOUSING THUNDERLEVEN TWELFTHSSURROUND FROM OPPOSING " + "PIRIGJOINED SQUARESAMNUC PII KUUHORANGE DIAMONDORD SEPARATOR EXCLAMATION " + "OHTWO DOT LEADERINVERTED DAMMANORTH ARABIAN -CURRENCY SIGNIWAZ TIR TYR " + "TIVE OBOLS SIGNIVE KEY SYMBOLOSITION SYMBOLITA PLUS GISH ISSION " + "TICKETSVERTICAL HEAVYSIDE-DOWN FACEZAKAYA LANTERNTIMES OPERATORDIRECTION " + "FLIPREH FINAL FORMRD PLACE MEDALAU LENGTH MARKWORD SEPARATOR CROSSING " + "ESH2GYPTOLOGICAL AVERTICAL LIGHTDOUBLE-STRUCK DIO MICROPHONEVERTICAL " + "ABOVEDOES NOT EXISTGHT WITH STARSGUNU TIMES ASHAFETY SCISSORSHIRD-STAGE " + "HLIREATIVE HEAVENTHER CHRISTMASAROUND-PROFILEHREE-LEGGED TEVENIENCE " + "STOREQUINARIUS SIGNVERTICAL COLONRIGHT CROSSBARUNDER RELATIONMENSION " + "ORIGINTHOUSANDS MARKUND MARK ABOVEZAH WITH MEEM REVERSED-SCHWA WITH LONG " + "LEGREE-LINE STAFFMEDIUM DIAMONDTHOUSANDS SIGNTHAKA ANUDATTAAI LENGTH " + "MARKTOP HALF BLACK AND DIAERESISTRANSMIT STATEDUN3 GUNU GUNUTHALAN ETHEL " + "OTHREE POINTED TIMES SHU TENUMID-LEVEL TONEHESIVE BANDAGERRIAGE RETURN OF " + "THE HORNSAPPED PRESENT-ESASA DOTTEDMALO POVYSHE GTER TSHEG MADOUBLE " + "STROKEEVERSED DAMMACULATED LORRYHIEROGLYPHIC MESSENIAN TENDVOECHELNAYA " + "JES SU NGA ROGYA GRAM SHADOPPOSING NAGARPENTRY PLANETU WAS-SALAAMDOUBLE " + "CIRCLEVERLAY MIDDLEAN RUPEE SIGNVERGREEN TREEROTATED BIRGABY " + "DEFINITIONURNED W BELOWUPERIMPOSED XLISION SYMBOLUPONDIUS SIGNDOTTED " + "ZLAMA IRCLED INDEX NING MOVEMENTIOT SYLLABLE FICATION CARDNINE " + "TWELFTHSINVERTED TURNITING THROUGHHINESE TONE YSYNDESMOS NEOIVE SLOW SIGN " + "AND SKI BOOTAMUHU ALAYNAAIVE POINT ONEDOUBLE MUCAADHERICAL ANGLEDOUBLE " + "HYPHEN AND YEN SIGNMALL LETTER ZOTEHEAD BLACKISH LIRA SIGNNUMERIC SIGN " + "MEDIUM SQUARE VARIANT FORMERTION SYMBOLAR WITH QUILLHAKASSIAN CHEARLAUG " + "SYMBOLSAMYOK SANNYACIRCLE INSIDESSAGE WAITINGUPSILON WITH U WITH " + "STROKENUMERATOR ONEOLVING HEARTSOMAN NUMERAL CHRYSANTHEMUMSTABLE " + "SYMBOLL-TYPE SYMBOLOBLIQUE LINE ARCHAIC KOPPAER BOARD FILLS KRYZHEM ON S " + "KAI APOTHESHAM DIGIT ONEMASORA CIRCLELATERAL CLICKNTY FULL STOPOGOTYPE " + "SIGN S UP TOGETHER-PER-EM SPACE-OR-PLUS SIGNLEFT CROSSBARSAL PLUS " + "TUG2ARGOSYNTHETON-OFF CALENDARCITATION MARKTIRTA TUMETESEUROPE-AFRICAYOD " + "YOD PATAHCROSSING GAN2WO-LINE STAFFYMBOL TAU RHOKAPYEOUNPIEUPRTABLE " + "STEREOSILI PNEUMATACROSSING GABAOON NOTEHEAD CROSSING MUSHARROW " + "OVERLAYH-TYPE SYMBOLVERTICAL BARS OPPOSING KUREMPHATIC TONESIGN " + "AVAGRAHASIGN PAMUDPODVERTICAL FILLONAL COMPUTERMARKS CHAPTERMELODIC " + "QITSACRIPTION TAKESTERTIUS SIGNCRIPTIONAL PAK WORK SYMBOLLEGETOS ICHOSONG " + "RIGHT LEGCHECKER BOARDUPWARDS TRENDONG-LEGGED DEONGRATULATIONARRED " + "TRIDENTSHESH PLUS KII WITH STROKEGAR FRACTION BAT AND BALL CROSSING " + "KA2WITH INTEGRALAUDATE CHRIVIFOREMENTIONEDMODIFIER MARK WITHOUT SNOWED " + "PAPERCLIPSZHOU NUMERAL VEN POINT ONENG TERMINATORPPOSING LUGALGAW KAREN " + "SHADIAERESIZED UWITH ASTERISKBOHAIRIC KHEIPA NJI PIPAEMED DOUBLE VERBASAN " + "LETTER MINDER RIBBONSIA-AUSTRALIA WITH JEGOGANHREE TWELFTHSPAIRED " + "ARROWSUSICAL LEIMMA BZHI MIG CANRN PENTATHLONLVEOLAR CLICKTE ORDER " + "MARKGIFT ENVELOPEVE-LINE STAFFSMALL LETTERSYUUKALEAPINTURIZONTAL " + "TAILEELING PERSON WITH TEE TOPPLUS OPERATORFROWNING FACEIMAGE " + "BRACKETRIPLE SVARITAIGHT TWELFTHSRACKETS ABOVEWAVY OVERLINELVE FULL " + "STOPTHIRD SUBUNITMINUS WHITE XMINUS SIMILARILE SEPARATORBACKSLASH BARW " + "RING INSIDE DIMINUTION-1FINAL SEMKATHEHU FEOH FE FFULL SURROUND HEADED " + "ARROWSELECTED AREAUDDISA SIRRAHDIC MARK SIGNBALL AND HOOPUSHING " + "UPWARDWAW-AYIN-RESHOUT MIDDLE UP WITH INK PENOURTH SUBUNITRANKS CASKET " + "INVERTED FORKVICE CONTROL DIRECTIONAL TROFLEX CLICKRIGHT " + "HARPOONAWELLEMET YAZNAP PIZZICATOFINAL LETTER MAILBOX WITH TOP HALF " + "RINGANNED LEATHERLOCATION SIGNACCOMMODATION B BAR SYMBOLBOTTOM CORNERFT " + "ARROWHEAD TED HAND SIGNUFFLE PRODUCTMULTIOCULAR OQUARTERS SIGNEAVENLY " + "EARTHPREPONDERANCEFIXED-FORM RAIFI ROHINGYA LOCK WITH KEYILABIAL " + "CLICKINTEREST SIGNWAVY LOW LINEEDIC ANUSVARAMOBILE PHONESVOWEL SIGN " + "PABOWING DEEPLY WITH OVERBARUE OF LIBERTY TIMES KASKALLEFT-LIGHTEDVOLTAGE " + "SIGNCRESCENT BARSHORT RIKRIKNUITY SYMBOLUPPER CORNERENOS CHRONOUDIGRAPH " + "YORIALLPOINT PENDIGRAPH KOTOMPTY CENTRE LU PLUS ESH2DICTION SIGNLEADING " + "EYESMPHASIS MARKMEDARY CAMELMBELLISHMENTACE INTEGRALS SUBPUNCTISLUS " + "NOTEHEADLOWERED FLAGDOWN NEUTRALN ELEMENT OFENT ARROW POULL NOTEHEAD-MAIL " + "SYMBOLUME INTEGRALSHED BARLINESMALL DOUBLELEFT HARPOONCROSSING " + "NUNMONOGRAPH UKMUM TIMES PAMEDIUM SHAFTNGLE BARLINEDOUBLE ARROWEGIN " + "SEGMENTUBSCRIPT TWOMADDA ABOVE MALL SECTIONAFU LEERAEWAWDATA SQUARESMALL " + "TRIPLELICKING LIPSAA AS-SALAAM-DZUD RTAGS DASHED ARROWNORTHERN TSESMILING " + "FACEEIGHTH NOTESMIDDLE PIECELL MODIFIER-UN WITH RAYSACUTE ACCENTSECTION " + "SIGNLINKING MARKLINGING FIREDOT OPERATORLLE PATTERN NJALA GONDI LIMBS " + "DIGITSDOUBLE ARCH WITH INDEX NDING PERSONM NSHUT NYAMLER CONSTANTSH ZIDA " + "TENUNCK CONSTANTCROSSING LU2CROSSING KALCROSSING GI4DENTAL CLICKNATURAL " + "SIGNENARIUS SIGNNARROW SHAFTDOWN HARPOONDUG TIMES NIUGHT BALLOONMING TO " + "MEETNERSHIP SIGNNEPOSTOYANNYMETA STAVROUEMELY HEAVY WITH DAGESHEAGULL " + "BELOW SKEWED LEFTLOWER CORNERNOTCHED HOOKNOTCHED TAILEMISOFT SIGNEEPING " + "SMALLDE MARK SIGNMANNAZ MAN MUH PLUS GISHSAZ IS ISS IRNAM BCAD MARISTMAS " + "TREETEARS OF JOYTE SEPARATOR IN TRIANGLEIN MIDDLE UPBINING MARK PHEME " + "JOINERANG KHANG GYBLACK CIRCLEFOUNTAIN PENFORMING ARTSINDEX MIDDLEPOETRY " + "MARK-GAW KAREN EURION CHRONONPOUTING FACEIGATURE SHRITERNATE AYINPORT " + "CONTROLBEHIND CLOUDUTH-SLAVEY KUTH ARABIAN TRIPLE DANDATRIPLE " + "FLAMEBETWEEN LIPSFT RERENGGANINUSOID SIGNUSEATED FACEINVERTEBRATEAND " + "OPERATORBRATION MODEAND CRESCENTBRIDGE ABOVEBSCRIPT ALEFOUR TWELFTHSYAN " + "NUMERAL IRAGANA HOKAOUGHT BUBBLEFERENCE MARKOUCHES THUMBFEMININE " + "DOTBUTTON MOUSEFOLDED HANDSBLOWING FACEBLUE DIAMONDING ENVELOPE " + "KLYUCHEVAYAING HITTING ING OPERATORXIRON KLASMAFLAG ON POSTROLLING EYES " + "LINE SYMBOLINTEGRATION OVER KASKAL RIGHT DOUBLERED KEYBOARD AND " + "PICTUREGUARDED AREAGROUND SLIDEGREEN DRAGONRCHAIC SAMPITHREE HEARTSWITH " + "SMALL VRANCHING OUTHEAD-BANDAGEHAND FORMAT RIAL TRAMWAYRIAGE SYMBOLHASIS " + "SYMBOLARALLELOGRAMHALF BRACKETREVERSE MARKVER EQUAL TOAR DIAERESISHAH " + "WITH DALREN CROSSINGREFACE COLONHIBITED SIGNBAHIRGOMUKHAQUARTER " + "SIGNQUARED ARROW CROSSING GUBACK OF HANDQUIRREL TAILIDENTICAL TOGEBA " + "KAREN IRING OVERLAYVAKRAHASANYAPROTOS ICHOSGBY FOOTBALLRAFFIC LIGHTHREE " + "FINGERSATNAH HAFUKHVICTORY HANDTOP-LIGHTED ATTOOED HEADRAH BEN YOMO6 LONG " + "NGGOO-SHAPED SIGNTHODOX CROSSHYPHEN-MINUSRIGHT SINGLETHIC LETTER TRAGRAM " + "FOR THETA SYMBOLWIGGLY FENCEOPPOSING LU2 OVER KISIM5OQ NSHUT YUMLARGE " + "DOUBLE ON PEDESTALS ABOVE SIGN OVER MIDDLEALT PAN SIGNOPLE HUGGINGOHAZARD " + "SIGNLATALIZATIONYOD TRIANGLEOGOGRAM NYAJYOUTHFULNESSON US SYMBOLYMBOL " + "BINDU OK HAND SIGNKANA REPEAT CIRCLED PLUSLARGE TRIPLECENDING NODESS-THAN " + "SIGNEVERING FACEERPENDICULARKLYUCHEVAYA CK-O-LANTERNOPENING LEFTSUR OVER " + "SURKAPPA SYMBOLCIRCLES AND OING TO MEETOID NOTEHEADOTTOM HALF OT " + "MONGKEUAEQCHARACTER-1BCABBAGE-TREEALTERNATING FALLING DOTS OVER TWO " + "PIIRTY-SECOND BYSMAL WATERONISHED FACEETRETES SIGNLAYING CARDSCHAIR " + "SYMBOLKHAMTI TONE-KHMIMIC KHEICHARACTER-18CALENDAR PADCIAN LETTER " + "-SIMPLIFIED IVE TWELFTHS OF ANTIMONYROUNDED ZEROHREE BALUDAE WITH " + "VEILGRAMMA SIGNHORA DIGIT ULO TWO SUMLACK SULFURTRAIGHT WAWL OF THREADL " + "TIMES LAL0 FOOTSTOOL WITH JACKSWHITE JOKERI TIMES NUNI TIMES BADESH " + "DIGRAPHACKED COMMATHIRDS SIGNLACKLETTER MACING FACE-OFF SYMBOLLEFT " + "SYMBOLLEFT SINGLEXAGRAM FOR ENTHESIZED 6 LONG NGGE-MINUS SIGN WITH " + "FLASHE2 TIMES ANLEEP SYMBOLLEAF CLOVERHEELED SHOEWO TWELFTHSHAGGAR " + "YAZHLATIN CROSSERCENT SIGNHEAVEN MARKDUATION CAPHEATED FACE WITH " + "COMMAEPIDAUREAN HAWH HMONG WITH CARONHANG KHUDAMSINGLE AND 5 LONG " + "MBOOLCE TSA CANMBA BAYANNALD SCRIPT XSIMILE SIGNMBLER GLASSLD POLISH " + "OLEFT DOUBLESSANGKIYEOKGRAVE-ACUTEACUTE-GRAVEHOKHLOM ON THREE " + "TIMESEORGIAN NARSTERED SIGNHLETIC SHOEACTIVE SIGNHITE DRAGONGSUM " + "-KHYILDYO CHRONONGUISED FACETONAL MARK UMAN FIGUREWASLA ABOVETIEE " + "SHEUOQTIGHT ACUTE WITH DASIASPIRATED FAHIGH STROKELETION MARKJECT " + "SYMBOLLON SKEWED JIHVAMULIYAUG RTAGS GYSVASTI SIGNINDICESIMA TRUNCATED " + "AEEZING FACELEU SATANGAINDERGARTENJOYOUS LAKEKAARA POLLUFOURTH ROOT WITH " + "TRILLZZA WA JALL WITH TITLOUISHED FACELOSED ENTRYSPEED TRAININ EQUAL " + "TOLOSING MARKLOTI NAGRI IMULTANEOUSUETTE BREADTUNE " + "COOKIEYEORINHIEUHIRCLED TEXTIPLE TONGUEFGHANI SIGNTA EQUAL TOISIGOTHIC " + "ZWING NEEDLEFINAL SIGMA-COPPER ORE WRIST FLEXFIRE ENGINEIVERY TRUCKUBLE " + "TONGUESYURA SASAKWINKING EYEIX TWELFTHSWE PALAUNG SYMBOL VIDJ WITH " + "MAPIQIEN MONSTERKRAINIAN IETRESS SIGN LTED FLOWERGE AT " + "NIGHTKTIESELSKABLTERNATE YAXI RADICAL LINE FILLERLU PLUS IGIGENTLE WIND3 " + "LONG NGGOTETRAFONIASXESTES SIGNTH-THALATHAEAVER DENE ENG DIGRAPHSTEAMY " + "ROOMGHAIN WITH THAM DIGIT LUPOVODNAYAIBLE-CREE YTWO FINGERSEUNJOMNDEUQTY " + "THOUSANDILIQUA SIGNEDICAL MASKILCROW SIGNABOVE RIGHTIL " + "FRAGMENTXTINGUISHERTENS DIGIT WITH GARDENEN STRAIGHTTRIAN CAMELGAP " + "FILLER-SMALL CLOUDSTORIC SITEGAYANUKITTA WITH PLATELT OF CLOTHETEI MAYEK " + "TRESVETLAYASECOND MARKPHNAEK MUANRISING DOTSBETA SYMBOLZIGZAG LINEUTH " + "CORNERSCURVED BENDRITING HANDBELOW RIGHTPODCHASHIEMUPADHMANIYAUTING " + "WHALECROSSING URPARAKALESMABLACK ARROWCROSSING BUCROSSING ENCROSSING " + "IMCROSSING PIRIPLE PRIMENSE CHEEKS PROPORTIONCTION MARK CTION " + "MARK-PERISPOMENI I ZAPYATOYAWNING FACEDE KIKAKUI VARYS ICHOSQUERED " + "FLAGQUIQUADRATEND OF PIECEVYKA ABOVE SHOE STILEND ODD SIGNSHAAYATHIYAVE " + "OF PEACEDENT EMBLEMNBLENDED UKRIGHT-LIGHTRIGHT-HAND UNJO WYNN W S " + "ZAPYATOYNIKOLSBURG POST OFFICEVA V CHELNUBANK " + "SYMBOLDALETH-RESHVAMAGOMUKHAPUT MORTUUMNG LEFT LEGRING LIQUIDDASH SYMBOL " + "DECORATIONCAN RGYINGSRPOON ABOVECARET TILDE OF FLOWERSOLD NUBIAN ORT " + "BARLINEAMUSED FACEORCE SYMBOLVISARGA ONERYVNIA SIGNCK SEXTANT-OHINGYA " + "YEHOF MASHFAATZERO THIRDSOF ENVELOPERUNNING MANONIAN SIGN OVER BULUG " + "OVER IDIM CH AND LAMPCHING CHICKCELANDIC-YRCE OF PIZZAOMAN SIYAQ " + "CCUMULATIONOPPOSING ENOPPOSING IMOR OPERATORBOTTOM MARKNYIS -KHYILCONTAIN " + "AS BREVE BELOWOUTHERN TSEROR-BARRED RONTHISMATAOVERSTRUCK COND " + "SCREENNUSVARA ONENUN HAFUKHANUMBER ZEROROKUTASTI ANUMBER SIGNCREDIT " + "SIGNNTIMONY ORE PLUS MASH2OUBLE ACUTEBZHI -KHYIL PLUS NUNUZURRENT " + "SIGNOUBLE DANDANITIAL IZHECOMBINATIONOUNDED FACEROSS ACCENTBUMPY " + "ABOVERCHAIC JNYAMIDDLE STEMASE TO THE AND MACRONDONG " + "TSHUGSDOACHASHMEEREAKTHROUGH TIMES ESH2AILLESS PHIRIGHT GUARDMONOCULAR " + "OMOVED BELOWDIATONON DIATH PRODUCTRANSMISSIONRIGHT HEAVYRIGHT LIGHTMFON " + "PIPAEMME LONG CANMED RGYINGSARAM GONDI UPPER HALFRESPONDS " + "TOAESCULAPIUSAESHAE NYAMARM SPIRAL ARMS RAISEDDOLLAR SIGNDOUBLE " + "SHADDOUBLE RINGDOUBLE MARKARPEGGIATO AGAZ DAEG DMICAL HEARTMIDDLE " + "BENTDOUBLE AND MIDDLE HOOKAGONAL SIGNDESK PERSONSHEQEL SIGNUNIT DIGIT " + "MUUSIKATOANMUNCIA SIGNRADITIONAL N THE VERGERACHMA SIGNATION SPACE TACK " + "BELOWRA SOMPENG ATION POINTRAISED FLAGRAGGISMATAOTING STAR1 PLASTICSZH " + "DIGRAPHFAHRENHEITQUISH QUADOSTAL MARKVEL SLIDERTHMIKON N 1 LONG " + "MBEURIPIGMENTIT MBAAKETC WITH DOTROUND DOT HEAVY BEATISMUTH OREGHT " + "LIFTERWO SHORTS OUT INDEX URVED OMETBSTRUCTIONHERMOMETERION BOTTLEXED " + "BICEPSBROKEN BARHAAPRAANA WING HEARTOUTER JOIN AND BREVEFINAL HETHOUTHERN " + "TAATRICHISMAOSSED SHEIVIOUS PAGEAYER BEADS AND ARROWOUND OMEGA AND " + "ACUTEFFICULTIESTAIL GLASSATTY WITH OUR FIFTHSRSI SYMBOLTWO SHORTSOON " + "LILITHOON SELENAEUTRAL YERSTRUCTION RGE CIRCLEUR YIG MGOUR HUNDREDR2 PLUS " + "SUYMBOL AIVAOP NKAARAEKAI SYMBOLKA SATANGAK2 PLUS BUGIMEL-HETHRHO " + "SYMBOLETTA-PILLAKINDI MVOPSTRAL SIGNHAMZA MARKI ARCHAIONTYPE COLONOPEN " + "SHELFCHAD RTAGSUR CORNERSCH BALLOONRGE SQUARESTROM SIGNTWO THIRDSRESH " + "BELOW5 PLASTICS OF DHARMAHEADSTROKEORTHERN TARIGHT SIGNIXTHS DISHROUNDED " + "ERF SHE-GOATT AND BOLT3 PLASTICSHUNGARIAN TIMES SIGNTING HEARTEVERSED PE6 " + "PLASTICSJONG TILE REVERSED IITH DIGIT SYLLABLE MZU OVER ZUCAPITAL ETOROME " + "SIGNVERAGE BOXPLUS BELOWIKRON ISONUTH OR SPYPLUS ERIN2TEMPLATIONHOOK " + "ABOVEPLUS NAGA BELOW LEFTWITH SPOONHAN DIGIT FRONT WALLY AND RICEGREE " + "SLASHRCHAIC KHAWITH STRAWANGKHANKHUGAGE CLAIMFTOGGOS OUGGING FACERING " + "ABOVEILE FOLDERIDDLE MARKIGATING RA DRAWINGS TERNATIVE PRALINEAR " + "GBAKURUNENTESE CROSSPPOPOTAMUSRIGHT HOOKIED SHRIMPTRESS AND " + "TREFACTIONHREE ABOVEXHEEJ CEEVIDEOGRAPH POLICE CARANGULAR TOTOP " + "CORNERGANDA MARKHOTIC HOOKPOUND SIGNIGATURE OEGAS BZUNG TRETCHED " + "CROEZENIAN INHERENT A AND MOUSEBOLD SHAFT2 LONG MBOING-SHIFT ANDHI " + "MARKING LARGE INITIAL RAROAD OMEGAAUTOMOBILE2 PLASTICSFOR RECORDINDU " + "BELOWTAMAN SIGNUSEL HORSEGOLUBCHIK THDAY CAKERED DRAGONTHAPASCAN 2 PLUS " + "ASH AND KNIFEUSHED FACEVIE CAMERA LATE FORMICAL TAPERRDHACANDRAWITH " + "WINGSASTERISCUSICK FIGUREPASSIMBANG KABA TENUPEDAL MARK7 PLASTICSRKING " + "FACE4 PLASTICSRECIPITATEFORMATTINGGUA PI MAOINDEX BENTBLACK " + "FLAGASPIRATIONGGRAVATIONBA SATANGALPAPRAANA WITH RAIN WITH PLUSA TANG " + "LAIED FINGERSNTITY MARKED FIGURE-N NGGEUAETALENT SIGN WITH " + "PAGEENETRATIONNTO SHRINESHMIRI YEHLEFT-HAND -LUE KARANENS SYMBOLLEK ATTAK " + "NAKE BELOWEDESTRIANSLENDED YUS POVODNAYALOWER HOOKALEF LAMEDCROSS MARK " + "THOUSANDSCROPHONIC UBLE DASH WITH RINGSHARP SIGNLEFT GUARDLEFT " + "LIGHTMONOGRAM BLEFT HEAVYMONOFONIASDIRGA MUREEONGCHIEUMMONOSPACE AILED " + "BIRD PLUS SHU2EARTH MARKW OR MODELCOMPONENT-COMPONENT OANDAKHIATUPPER " + "HOOKNUMBER TENDIATONIKI LTERNATE UA PLUS KURLTIC CROSSSBUB " + "-CHALENTHUSIASMLEFT SERIFA PLUS IGIEBENSTIMME WITH LOW DIGIT ZEROMONTH " + "SIGNSGOR RTAGSSMALL TAH EIGHTIETHSLONG FINALLONG OVER UP HARPOONZAR " + "AMULETNDU TEMPLELONG TSHEGCY MESSAGEDA PLUS HANGUAGE TAGUP OR DOWNUP " + "NEUTRALNGLICANA WLLOW HEARTDA SATANGA SCHROEDERSELINE ESHAB2 TIMES EICH " + "STARKABATA TREED WITH DOTLOGICAL ORAKKHANGYAOSMILO SIGNNASPIRATEDUNKIA " + "SIGNLHAG RTAGSLGIZ EOLHX WITH TAILSPACE MARKCURLED WAWNANGMONTHONOTE WITH " + "LET SYMBOLSCAN LINE-ND SEGMENTLINDRICITYLIMITATIONDED PERSONNDA PA " + "NJISE-CREE SKLIGHT BULBLIGHT BEATMOTORCYCLE WITH TICKEEKING EYE RGYA " + "GRAMCURLY HAIRELT BUCKLE RESUPINUSMEL SYMBOLMALL ALEPHSSANGARAEAON MEDIAL " + "E PLUS SUMCISIVENESSADAK BINDILANE MERGE WITH EGGS TIMES SHESS OF MILKU " + "CIN HAU UM ROTUNDAKRYZHEVAYAWHOLE NOTEST PALETTEOLON EQUALLACK JOKEROLING " + "FACEDUOUS TREEWHITE HAIRRUPEE MARKLA USED ASMEEM ABOVEUMAN EARTHSIDEWAYS " + "IZEIRO SIGNU2 PLUS BACIRCLED CAST-FEEDINGOMMA BELOWDOUBLE BARSSANGPIEUPM " + "STALLIONMINO TILE OVER KAD5COLATE BARAEDA-PILLAUAM TSHOOJRUDIMENTA " + "-SHAPED HASIXTEENTHSEQUIHOPPERALLY MARK LE LETTER ME PLUS " + "ENLE-DELAYEDCHECK MARKEARLY FORMUARDEDNESSADDA WITH OF HYGIEIAWHITE " + "FLAGMILLE SIGN WITH BASE WITH BELTMADDA MARK SPARKLERHEADSCARFHARD SIGNIA " + "SYMBOLHARACTERSSEMICOLONNGER SHIPZ DIGRAPHNCLOSING NFORZANDOSHAB CEEBLOND " + "HAIRIDEWAYS UARCHAIC MRFUL FACEQUSHSHAYAXHAUSTIONNG SANDALIDEOGRAM " + "QUADCOLONLONG TIP TIMES PAPSEPTEMBERQUEEN OF IALECT-P NDAILING ICE CREAM5 " + "CYPERUS5 LONG JO AND TAILWRY SMILEWORDSPACEMRACHNAYAHINOCEROSHOT " + "SASAKMAEMGBIEEWRINKLES HIMA SIMARED JOKERMUKPHRENGRCHAIC IIHIYYAALAAREAK " + "HERE TIMES HAM HE-GOATRDEL DKARRCHAIC RALVIN SIGNREDNE ON APODEXIAHOOK " + "MARKMBROIDERYZAL SASAKMALL RINGHWAZ EH E3 PLUS ANTIMES NA2RIED FACE5 " + "BATHTUBLOWER DOTI PLUS LI STREAMERMHANCHOLLR PLUS RA " + "TROMIKONMETOBELUSMARK CIM ZAKRYTAYAHREE FOR AND CURLHI SYMBOLMARK SHADNA " + "KHONNAXCITEMENTREFORMED AND BELTSIVE FACE TIMES UDISEN-ISEN PLUS LAL " + "PLUS KU3ROTATION-OTAL SIGNOF STIMME-STACCATO PLUS GUDT ON BONE PLUS GALS " + "DIGRAPHODIASTOLET OF MEATLARGEMENTYRANISMA OKED HEADITRA SIGNZERO " + "SIGNOKED TAILLAN SIGN OF BLOODIVE-PULL-IVINATIONNVERTED ROUTH WIND PLUS " + "ZA7 PLUS TUROUT MOUTHYEAR SIGNYEH ABOVEYEH WITH OURA SIGNORTH " + "WINDTAKHALLUS PLUS SAGSPIRITUS IRST MARKTABE SIGNOCCLUSIONZENE RINGON " + "GROUNDL ME HANDKYO TOWERON TEUAEQSTEBASKETRTER MARKRUM CLEF-OO DENNENKU " + "RU KHAKSTREPTON OVER LUMONE MARK- OVER BALKEMPHRENGONE THIRDSTRELNAYARTS " + "MEDAL0 LONG LEONG GRAVEKING BOOTONGSEONG " + "RPORATIONOKOUFISMAORT-TWIG-SSANGSIOS1 CHARIOT OF PAPERJERUSALEMLACKFOOT " + "RWARI DDAOM SYMBOLK GESTUREKA- SHOG KAMEYTSA OP HALF OSTAL BALLPLE " + "HEARTLITTLE UP GARSHUNILISSANDO IGN NUKTAIGN SAFHAIGN TOMPILINE FACETEH " + "ABOVELIGHTNING-AMMONIACIGHTH ASHTED PLANT RICKSHAWNO TELEIAPIDERY HAILE " + "TILDE247 DIPTEILIPPINE Y BLOSSOMNIGHT OF NGUN SIGNPROJECTORZIR SASAKSMALL " + "YUSPPOSITIONLLABLE OMPPOINTED LLABLE B0NIGGAHITA RA OR RINIHSHVASASOF " + "PASUQ FROM BARLIVERANCENING SIGNIGH HAMZAP ELAMITEING LANESP DIGRAPH-LOW " + "TONEING STONENTRACTIONINISHMENTROJECTIONINNYIIYHELEFT " + "TACKNUSVARAYAPAA-PILLAOW KAVYKATANDSTILL2 GARMENTOVER MUSHLEFT RINGOVER " + "GAN2-MID TONENTERPRISEPENTASEMEPENT SIGNIN SQUAREINAL NOTENSERT AT " + "INARBORASRNEY PARAY-FOURTH Y-FOURTHSRO WIDTH NTESSENCE-KHYUD " + "PAPANYANGGAING CARD ING DOLLSPADE SUITING GLOVEED DIGIT ETRASIMOUEAVY " + "DOWNURNED AYBBITE LIPSEBIT SIGNTRESVETLOAVE ARROWETTI BALLCHOSEONG URLY " + "LOOPFROM WALLUTRA MARKFACING UPED PLANETABOVE TO UPPER DOTATHAMASATAL " + "RUNOUTCORN FACEVIGINTILEUURDHAJA UBSTITUTEANG CITI URNED GANFEH WITH " + "TUKWENTISDEPARTUREURAMAZDAABKHASIAN ANTHAKHATDENT AND VERLONG " + "AAJANYALANUR-DE-LISACE NOTE ALI GALI VRAKHIYA G IN HOLEA PLUS " + "NAVELOPMENTAOS ICHOSCAPITAL QGREATER YANTAYALANBICYCLISTCAPITAL IANSKRIT " + "SUE MAEMBAGITTARIUSBIAL SIGNCARTRIDGEDAD WITH B DIGRAPHEIGHT OF " + "CRESCENDOVISARGAYAVOCALIC RBEER MUGSVER LUGALD SALTIRETUTEYASATCANG " + "TE-UTONE MAI EEN WITH ER BUBBLEVICE MARKBING CANEGRIK SIGNENTRY SAWWITH " + "FACEATTACHED EFAIDRIN CAPITAL DANGGEUAETEFORMED TARISTERA HALF NOTEFISH " + "TAILEMPTY SETDOWN SIGNDOWN STEPCOIN SIGNADMA GDANBASE UNITWING STAREURO " + "SIGNADEG ADEGARM CLOCKAROSHTHI VOETOCHIEFINAL NUNCHANICAL CUBE ROOTCLOSED " + "PLESAME DOTALPAPRANAES AKURU EMBEDDINGAFFE FACEFLAT SIGNAF PERSONBOTH " + "BENTTREDECILEALAYALAM ERTY LINEBO GYFU GHALSHELETTTED STEMDOWN HANDBO " + "BAIMAIHALF SIGNELEGRAPH AISED DOTFINAL NGABRUL SHADFOUR BENTAS MEMBERETER " + "SIGNTO CORNERERCIAL ATE AT LEFTUNGSEONG VANAGARI URUZ UR UVINE " + "LEAFUPTSTIMMEUVUZHAKKUAINTBRUSHFINAL MEMDRAM SIGNHAIKSUKI " + "UNGLASSESCHAVIYANICOMPLETEDWASH TAILUMED HEADELLOWSHIPTRAIGHT UDUS " + "RTAGSVEUAENGAMANEROSIS KAIYARAAEVEN OF CHATTAWA OVER " + "KGKATAKANAKASRATANETRASEMEL POLISHETA SIGNCK CHARTET SHOESOHM SIGN PLUS " + "DI PLUS DUL-LAKUNAEST WINDLA LENGACLIMBING OVER ZIEUFEUAETONE FOR OVER " + "MUCHINESE ON CROSSOMMA BARCLOSED TOMANIAN OM NTEUMOLLOWINGBUNDANCEBOX " + "TRAYOVER GA2OVER BU FILE BOXBRA FACETTO MARK8 KANAKOYBEYFILIROSSED OANC " + "SIGNYENISEI IRD MARKYER YAGHTAI LUE FEBRUARYTAALUJA IS FORM BOL SIGNING " + "ROD LANTANGBOT FACETAR EYESOVERRIDEIS WHEELTTENTIONOVER TIROVER SHEOVER " + "SAGOVER GI4FINAL THCASSETTE1 BARLEYJACK OF " + "JAVIYANISWIMMINGEXCHANGECEILING RSE DUNGJUNCTIONSUPERSETCER " + "BALLEVERANCEOO TYPE SUCCEEDSCANDICUSIS-PILLAC SIYAQ OTIFIED YESIEUNG " + "NUTILLUCABLEWAYITA MFONOT MBUAETURNED MCAL DISC OTTAVA AMS HORNT NGGEET1 " + "HELMETYIDDISH ORM FEED OF YARNOREHEAD ON LEFTNAVIYANIECH YIWNLTRY " + "LEGEBEEFILILUB SUITSMA SIGNNCE SIGNM ALLAAHED BRICKULLS LEGNAMENNY " + "ZAKRYTOEAIYANNOINA METEKN-JOINERSIX DOTSACKSPACELORRAINEABAAFILIWBOY " + "HATABOAFILIDAMMATANLONG BARNG RTAGSDANTAJA LONG S TNEUTRAL E OF " + "POOUKEUTNDALOW DOUBNEIFORM LOW STOPNED FOODDDY BEARLOZHITIE " + "SLIDINGSIFISTONHAN-AKATDIM GUNUUNG DASHAEN NYAMMON TIMESHORT ERSIGN " + "LAEMEM-QOPHUNDERTIEUNDERDOTDIT CARD TTUDDAGMMATION MIONIAN DOCUMENTW " + "PRINTSDUSHENNAMALL AXEMY HOUSE TALENTSMANDARINDVISVARAMANGALAMDVANTAGE " + "SCOTS SSHKIR KAMARRATANDS-CREE SHOE JOTDIAMONDSWASH KAFDIFONIASME " + "BADGEUATRILLOERAL URNER TRUTHALLIANCESALT OF VOLUTION-PHIEUPHUAREG " + "YALEANING SQUEEZEDYRILLIC EOUT BOXVOMITINGCOUNCIL COUNTERSA SIGN " + "AUBJOINERENICIAN ESH LOOPODESTONE0 BRONZEOCUS OF OCK SALTOCALIC " + "MYPORROON-X BELOWOBOOFILICOMBINEDEREVODKAERDIGRISLATION XSNA LDANSE " + "WEDGEELEPHANTEK ONKARNITIAL ZD BUBBLESOFTNESSD CROSS NINE OF SCRIPT " + "GLKULIZMYUP TRUCKNI ABOVE YUQ NAEUDAWADI SATCHELEGORIAN " + "SENTAGONLOCATIVENOTE PAD POLNAYA-KHIEUKHSPERSIONSANYAKA EN NTEUMNRES " + "TOSLESS SHALESSER YNOVEMBERS OCHKOM-EM DASHLF RING LFWIDTH RASWADI-CREE " + "THCURLICUENO THUMBCURSIVE NO SLASHY BEETLERDEL NAGIMANSIS GBASINNAASTERN " + "WGLASNAYAAZHAAKKU CURRENTTO-LEFT ATAKANA XCELLENTVERGENCEATE " + "MARKATEBOARDTHOSCOPEBINOVILETICK IN PENTAGONAPITAL FRILLIONSREE " + "MARKINAGARI ARTYRIA RED HAIRBACKWARDFRAKTUR BATBEIT QAIRTHRAY " + "POPPERHESPIAN REATNESSTHIOPIC BACK YERANS SIGNFRICAN DPAVIYANI ANTENNAAST " + "WINDHOP BELLQUINTILEBEVERAGEBER POLEGORAZDO HANDLESAVY BANDTRICOLONGREAT " + "SA CEDILLATER FACEIGMOID SWRINKLEDVE SASAK3 ARMOURWRITING RAMMA GGRAUGHTS " + "BILLIONSATH MARKHREE OF RASMIAN GARITIC BIEE FONTRI DISHWON " + "SIGNAY-NIGHTRIYOOSAN AT DUSK56 TURO2FLOURISHFOR STOPPALOCHKABLE " + "SIGNICHAEAN ARCASITEPUSHPIKAZWJ THAJV OVER MAR " + "TSHESHARBAHAYZWARAKAYHARMONICBLINEAR PAKPAK ETIRRUP RTISMOS EANE " + "TREEARKLEAN BLED CARHAGALL HWO ABOVEPRECEDESHALF GURGENITIVEVESSEL " + "BPROSTAYAPUB DAWBPAIRTHRAARSI YEHRESVETLYWN HEARTI SHAKTIING BELL KEMBANG " + "FACING ING BOWLTOWARDS ARRIVINGPUN IYEKPTHAHA SOV ROGLF FACE RAMBATAY " + "SIGNGOLIAN VAYANNAVE DOT QUEEZE GHEUGHEEL PUMPUBUFILI-WELSH ERNIN " + "ANJAEMLILAMITE ZQAPHA D MADDAD MOUTHIBIFILIGRADUALPSTICKSALLOT " + "X-TIKEUTSCOOTER CHIKI LASHES CER-WAAXIMATAQUARIUS-CREE RIANGQI LIGHT " + "XCOMING 3 OMEGABAMBOOSSOLDIERTRAINERA NAME VAPOURSVANESE " + "THESEOSPUSHPINSANDHI CRACKER-MU-MO--SHIFT-3 SPICE3 SWORD-MACRONENSHUETI " + "RTAGS6 NGGOOI NTEUMSAMPHAOLE LEAFVOICINGPURPLE A -PHRUSPRINGSCOPTIC " + "THIEUTHHYAAUSHNUMBERSSA VAH BAIRKANSAYANNAVAV YODCONTACTEN " + "LEAFS-SAJDALEUT KAVOWEL K-THIRTYTHKUQI SANGAN ALESMA GLAGOLIER " + "THAN-KIYEOKLEYBALLNTAINS LAYANNALEK TOO3 WHEELLENGTH-TORNADOAS " + "SIGNHAARKAADYNAMICSHIFT TMANCHU WO WAENMUNGKAH TEDUNGMARCATOVYSOKO DU " + "NJAAWO MARKMASSAGEMRACHNYDIARGONDRIL BUMAAYYAATIKRAMAEAD OREHEXAGONUM " + "IYEKMAI SATTIVATE VEW NOWREATHY ASHTRA ACTER TDHALATHE GLASSE DRINKAD " + "NECKASH FRODIPLOUNDISIMOUMERICASUN MEUTAETMEUNHANGUL DOFONONSHORT AMINIMA " + "MINGKALSIDDHAMARDNESSAHAPAKHARRED OMBOL B0ARRED BREREKANHEADINGWO FOR " + "RESILLOHALANTASIGN UD5 NGGEEAELAENGHAYANNA WAAJIBMEETORUAU " + "MARKVEMENT-DANCINGDANESE WOLOSONG MASKRAKHANG SHAKERHIUCHUSNESTED " + "SERPINADAYANNAUKKAKHANEQUDAADA FACENIKAHITLJUDIJER2 GUNUEIGHT KUP TACKUP " + "STEPUP SIGNWORSHIPRA REPAAPEZIUMAUNTLETAULDRON BUTTONUP " + "MARKWDRIVERLYGISMAEAVY YAATAEAN ASUTORUDEAVOURRD DISKRD FACENAYANNA " + "STRIDESHAKINGNANCIALHI SIGNRDO RJE APLOUNUP HANDRANGKEPRARIETYATH OF ED " + "RICEWAZ EOHSEXTILERAYANNAECEMBER SLOWLYTAISYOU3 AREPAYMAIC " + "LBULANCESUKUUDOBUFFALOOUR OF RISIMOU9 CLOTH MENDUTRTHIAN OUT HUB2 WOMAN " + "MUQDAMJIBWAY ANGKUOQ7 NGUAN OPEN-O MUOMAEONTIEENBLACHKOWIFRUITCELSIUSOP " + "MARK KEFULAXOPHONEEULEUNGOVER ANCHEINAP0 WHEATTTHACANANGLONGKAYANNAFINAGH " + "0 SPEAROVER DUVILIK BYNAMIC FORKINGRIPPLE " + "CHEVRONKEUAERICHIEUCHTROLLEYUSSYERUTTILIK BREVIS YELLOW BERRIES3 " + "EIGHTBERGINETALL AAPHUTHAOONGONANANGLED KARO BAONG UEXPLOYAN URFACE " + "URGLASSPENGKALCAP TENISIBLE T ASHESRMUKHI ISLANDF SASAKPAYEROKIVE OF " + "IMILAR F DAVIDOT NGOMITALIC PECTIVEOT REPHPEGERMAFATIGUE " + "OCLOCKORTIETHCANDRA ILLEANNCABINET7 NGGUAITON RA1 ARROWAN MARKJAIN " + "OMJARATI TCHFORKJAYANNARRECTUSJECTIVEWIGNYANCAYANNAURATIONTAYANNAJERAN " + "JIL DRUMBIG YUSORKHON FAYANNA26 EYYYPAYANNATA MARKOREVMA SYNAGMAIKHAHITY " + "GREENORCULUSUT TIMEPERVISEANGOLATCK LIMEPOVODNYSTERINGGENERALFLUENCE9 " + "NGGAAKUTAARUKYLISMAESTIVALCLEAVER3 MONTHKPAK " + "WAVILLAINKOMBUVATYSCAPEOLAPUK KOQNDONKORONISINNABAR " + "FLEXUSOKRYTIEANDERERALTILLOPRENKHACLOTHESLAGIOS " + "ROGRESSTHALIYALAK-050OCTOBERIC WANDOCTAGONCOASTERP PIEETICYCLESOGDIAN " + "OWILO SL SEGNOBARREKHPPROACHOFFICERST TUBEUYGHUR BORZAYAOF SOAPCLOSE EOX " + "BACKICOPTEROX LINEROKEN L2 OLIVEYA LAMPOMERANGPALLAWAPOMOFO " + "LONSUMKKURUNIETNAHTATASHEELYAH LI TRYASKAPANSIOSPANESE YAYANNAKHA " + "YATGAYANNAFINAL YBOURINGON FACEYANMAR MAELEEIFIED " + "ETSECHKABOARDERAMAKKANOW ALEF PLOPHUAM ALEFRY " + "FACEARADDOBOWTIEPBOARDDIESISROCKET TIKHYNACLESSICKLEBLINK " + "DICINEDOKMAIANCHORRENGTHAPYRUSAJANI PECIALVILIANAPLI MURNAMABISHOPDERMA " + "PALUTAPEAKS BURGERAEMMAE AGUNG MURDAASSINGVERTKAC CLEF LONGA " + "LELETUNGAAMARBUTADGEHOGN DASHSHAYIMVIRIAMSHMAAMRICORNREMEDYZHITSAASHGABOW " + "TIE KAPALAILUREN-NISFARSEOSMPLING MELIKN YANGOTTED-BOFILINSUZ ANOWMANON " + "KEYNOZHEKSAUCERNSANAQPOKOJIPOMMEENTEVMANTIIMUCHURCHNTOGENUUMISHCREASECRAYO" + "NCHEMA ONOCLEANUARYNOKHUKCHEIKHCUPPEDNOR BUCHESS CUMBER " + "QATANBEFILICHIRETCHO CHOBELOSOFOUNDPUFFEDCLOSETS " + "TENTOGONEKODHADHANIMALBANWA EPOCHS SHOE EQUIDOCENCEOCIETYCODILE " + "DIPLIUYANNAQETANARIISAPQAMATSNKNOWNOITIC " + "PWATCHUZEIROBAFILISAADIYCKNESSRAVEL-PEPPERAPISMACARIK CASTLECATAWANEUME " + "AKEUAEBGBIEERAKLITATTERYATTIC BISCUSCALATEOSETTERKAANU SPLITAK-668NDA " + "TARBITSAPENCILDE DOGAKABATUP BOWNISTERRSHANAPIRIT OOMUUTRSIAN " + "CARETNIRUGU RULERRSENICCEVITUNIZKO " + "RISEMEUPNAYACHADINCHAMKOANGKATOPITSANGBAT NCH " + "FRPICKETRACINGDAGGERRAAKANOPEN POPEN DAUTUMNBETAN OOPED SOUNAP9 MUENKE " + "PHOKAYAH UBLE XEUNYAMELLITELIGIONLIGON 2 NGGUI MAIMI HOOKKASAR 2 MBOO6 " + "NGGEEUREUTSTROFO-HIEUHEN GHEEUAENAKEYCAP-HIDETEMPUS SPATHIGHAMALIB " + "YAMLEVEL-3 NGGAIASMA WEORTHGHETTISPADESGHEUAEEMASTIHORT " + "IGOBLINSUCKEDEVENTHLONG EUGGAGEGORGON00-102GO NGUEENTH-INSHIPSURANG4 " + "DART4 DEERWRENCH4 KPEEINGAATLISHA HUR PALITIKIUCIBLEHUMBS EIGHTYGLAZ " + "HINHALETARGETUDARKA2 KPOOLLIPOPAASHAETOPBARGNANT LAMEDHYOMBO TE USETE " + "TSEERMATASTLERSUAEQTUXO NEOSTOLI LASTON7 NGONKRISISLD " + "MAPFRAMESUANGXIKNIFE IGGLESGANGIA3 GBEE3 HEEIYRENE STANCYSTANCE7 MBEEKY " + "WAYESTAN 7 KAPOU MBITILBOAT7 MBUU7 NDOOIN YEHKUSHU2LAFRONSSLESSET " + "KUTILLAGETRIKE 9 NJEEKTIKO 7 GUANLAMADH6 TREEENIKI 0 " + "NGGOGEDOLAKILLERTRAPLIIDE ESFORMEE-IEUNGYSTICKINDHI GEADALEU " + "MBUTAUROSTHAKKUGGLING0 NYONA-KARA0 NYUNTAU ROEPACT INAGMA-PIEUPSPLIT " + "KLITONTERON SPITALINCUNXX FACEA HAAMXIMIZEIEVAN GBASAQTEUWEN0 " + "NGGIENTIMAFORTIS1 WINE8 NYENMANYA WN BOWWN BOXSYOUWA8 NYANTUXEDOF " + "CLEFDVANCEDUCEUSHERMESIX OF HEISEITIMATEF MARE1 GBOO1 GOLDIXTY " + "PHIMAHUGURAMUMADDAHMADR MEYANNAE WAVEIYANNAMALGAMITULUMAGRANTSYNAFIHIBIT " + "5 WOOLMALL FWINDOWTIKENOHEUAEP8 MBEEITABLEAFFIX TURBAN1 NDEEFATHA " + "HASHKAFAMILYISSIMOHAM AIHAMEDHISSHARHAMILOISSANTAGOGUE5 MERIWO OF ME " + "DIEFF OF MECHIK1 HORNTAIKHUTIRYAKITHER HE MGOAESURAT NJAQHALF " + "HIRINGUTAMINGEXHALE8 HOOUHO HOISKAPI 4 NGENSIXTHS4 NJOOM BOARM " + "BULLHIVETEGS-PA SURED YAKASHED ICEWBERRYED CAPGRASP 4 MUANWORKER6 HUAN6 " + "GUEIYIN-DOSWORDSEXISTS4 NYINHINGE EAHMUKXYOOJFLUTEPEAN 8 KPEFEARN8 " + "GBUFSAAQRONOSPAATOBREW INNA PASEQ2-VASZHAINPATAKIMMERINTHU1 " + "WVIIMGBAFLAGSPCHA LACA7 NEN7 MIN2 NJA2 HEN1 PEEANGELOTHALEYBUSBISAHILLU " + "2 NJUPEITHZIDI 8 FEEILVERYAMOKPEN O JERA2 HOOPEN-P1 TWO2 POO2 PTEYECEK2 " + "MBUROGOMWISADBORZYTSADI2 SEE MOOD1 YOOTTOCK2 MBEBOOTSFORCEBSTERTTORU1 " + "TEE2 MBA1-VASIRACYTUEUMIPEHA7 TWE KAWI7 NIN8 KPOYENAP2 KPA2 " + "KPIBLANKTSEEBWINDUBLAKOOUNCEURTLEIPINGWINJA7-" + "VASFLICTTSEREHIRIQHISTIHIUTHATIYA4 NDO6 GBARAIDAHOLAM4 TOO4 WUIATAF 4 " + "WOO4 VOOWLINETON AGVANGGURE HOLARHIMELRATERGULUSRASHAAWAY " + "WU318WUAETAVROSHOTELGORGIQUIRY6 KOOHOUR 32 JE CHWV4 KPU4 MON4 MBO4 LOO4 " + "LEERACHYAUTHS4 GBIZSEKAR-RUB CAPO4 ABB AMPS5 NDUHASE-HATHIHAYINHALA AR " + "AERIEENRIEULHAINU5 KEEZYGOS5 MBI " + "ALLORICEMHANNATINNETIPPIARERUHALQAASPERTILES4-VASHETHERDIONHI " + "ROTIGMA5-VASRCHIDRELAARELA " + "REIWATKAANREGIATMAAUARTARHADDAAPPLEHAALUASEIAPMUNK3 HINPLUTOPLUTA HAA " + "UTIESBENDE GORAPLHAU3 FOOGAMALPPAGE6 WEEBASSAGEAN 6-VASPONSEPOLI " + "FUJIZILDEXING GAZE-BEITH3 HON ICONBHADHBHETHRITSITRIOLIKARAIHVUSXTRA- " + "ILUTTEGEHIKURUXW XWUTEUXPITER3 BOO7 FUA7 GBEGESH2GADOL7 HUNPI RO7 JEE3 " + "RA3HUTA TORSO DEKABAARUTHINGRILLA3 VEEQAAFUI KOIBACUSTRACKGOGI " + "TORCH3-VASHROOMI-RESVATOR3 WEI COATHUMP 6 SOO6 SIAICHON6 TA2ICRONBASA " + "PTUNEGHULU6 RA2BALAGTRAIFVIET GHNUTPEPETPSILIIARDSIAUDAVAAVU3 " + "NDIANNONNNAN -ALAFAAMAEEKEET-BEAMUBURUUBUTSEISMANIS " + "FYURIICUBEDEMBICNINTHAADHUSOLVEWBOATLOBE " + "SENTONGENTEGALILOMKAEESHIUGUSTVRIDOLOOP UDAATNIEUNEIDONNGUE SARIEGL " + "HDAIC NGMANEGIONLOAN ALGARLEASEWFISHLEERIEOPLEEO-EUENUTOUBITOO BOX9-VASO " + "RUAO PLA-SIOSLAYARO KAIO ANGSADHENZEUMSAKINALLI " + "ALLEY-RINGSALADCROWNNSYONNSUAENSIEESATA " + "ENANOSAUILEMLJACTRICNUENGENJETNTXIVENENGOTERIA UNADATUSNTHA A " + "YUESPINEUMMERMSHAEMROCKUNGBAMPIRESHOOKSILA3MPAREVZMET TELUMALONMUOY " + "SHIMAADULTMAQAFMUCH DSMANMI ROSHTINDKAR " + "MISRAMETRYDLINGWAQFAMINGOSICLEAGMA " + "MIEUMWAAVUDOTS-" + "MELONAEMAEMEEMUMEIZIAEPENDWICHAEREEMENOEMEPETMETEGMMOTHUNOO LURALEATH " + "LWAY " + "SKATEEBALLDELTANCORADENCEDEPTHUKARADBOATLOURENENOENEMKANASHINEGARDESTYNA " + "POSHARUMADYAMAI KMAIZEDHAM E GEEWATTOMAALAACHKAUNITYM " + "RAMMAAEHENDEPSHANGEAGLE TABSSHAR2SHARANADA MACUSNABLACHULASUKUNCIEUCF " + "COWRUSH CHUTECCEPT1 FANOMBIEEYYAL0 MANAMEKH8 NWAK-020RYASOTUUMUSTNUTOLD " + "XRRITOKO LA9 MUNITUALCANUSCEREKSTORMROWN F SOWYIZETF " + "EWEKNOBSUQUETCHADACAUSEEURAELATIKRUHUAKARORRUDAA9 DEE0-VASOQPEN9 KUAJANG " + "WIANGCAUDA8 RO2EVAL 1 DWEKHAPHEUAEMCHOOLCHOOIRUMP-CHIME0 " + "OILRULAIKESH2KERETCHESTCHERYKBALL9 MENU U U0 DWOERKHA8 " + "MANCCOLICAKESCLUBSJUDGECAKRAURITYLAGUSESHE3JUDULALOG " + "LABOROPLETLABATVITAEFAIHUOBYLAOCADO0 BEELAMDA8-VASESO E9 WVE9 " + "WVAJERVISURYAISTLE0 DOO0 JOO0 HEESTARTKUSMACKAGEKURONURINE9 YEEET TUOJKI " + "8 NANCLONECALYAORUTOOKARACECAK9 NDEOKEE 9 NDACAANGITHI 9 " + "PU2JUEUIOSTERALPHA0 GBOCECEKCLIFF9 NUNL-JUZ9 NONL NET0 GEE0 " + "HANCKTIEKWAENFAAFURUISROOKVEYZRRORRROISHYATUKIZETATURUWAIRWAHAYEUXUNAVWAET" + "WAAKUNAHSINKRPSEVEUXSIKIRIFYURUSVESTZATAZZY TZELZIZ2VOS " + "YUDHRUSISOKARUTUYUKUZELOZAYNUTANSA-ITAXIUTTYTFONXEYNZIETXEIAWAW " + "SUABVIDAUON SLURULU SUNGRSO-RT TRUNARUNGROA " + "YWAASELFWDERSEEVSEENWULUROARUHURRUKUVIYOVEDESEYEVUEQHEYSHEENHEEPHEROHERUHE" + "YNHEYTHHWA2 YAHID HIINHILDHAVEHAYNHWAA2 SOHUEN2 RO2 QOHSHUHWAH2 " + "PEIANOIARA2 NOHMI 2 VIHOKEHOM HOPHHOSTHSDA3 MUFFINFIRIFITA3 PA3 MI3 ME3 " + "TAEZZO3 YU3 LE3 RIFAIBFASTFEEMFETHFEUQGIBAGIDAGIEAGIR2GOALGORTGROMGRU " + "GUINFWAA3 L33 KU3 JO3 JEGAMEGAML3 EEGEDEGGWS3 A3GHOMKMA KOBAKOETKOKEKOKO1 " + "KU1 KIKWAA1 IN1 HA1 GA1 DULAAN1 DO1 RAKANGKAPHKCET1 QI1 " + "POKICKLFERLFIELIFULIUMLIWNLOLL1 DAKALIILUYIK HINORINY 2 L22 KAIFAT2 " + "BUIGERIITOJOT JEONJIIM1 YI1 VU1 SU1 SI1 SAKAAFKAD3KAKOIPODIQAAISI " + "1358ARGIAROO7 JAATIMAPAQ7 LUAPON7 KIARA36 NA6 RU6 QABASH6 POBAYI6 LA6 L66 " + "JOATYA7 EIAULA7 DD7 DA7 BE6 WU6 SEBAGSBALD8 QE8 PI8 KO8 JIAAMU8 GU8 " + "FOAFEL8 EN9 JA9 TA9 TO9 TU9 SO9 SI9 SE9 PI9 PAA IEA-HA8 WE8 SUAACUALTA7 " + "VO7 TIAMLA7 REAN X8 DU8 BOAILM7 ZAALDAEAAE5 FE5 FADZHA5 DE5 BB5 AU5 AN5 " + "A2EANSEEEEDGER5 LIDIM2EENG5 JU5 IN5 GI4 FI4 NE4 L44 KE4 DO4 WAEENUEETA4 " + "ZEEHEH4 WIEIPTEIRTEIWS4 TU4 TE6 DIBUNGBUOYCANOCASECAYNCHAUCHEH6 " + "HIBERDBETH6 JE6 HEBOOKBORE6 FU5 VACWAA5 TODAGSDAIR5 TEDDAKDDHI5 OODEAD5 " + "NU5 MOCKEN5 WE5 WACOONCOREHUVA5 VECRETNDAPPAWN0 " + "BINCERPEEPNAM2NHAYOXIANGA2 OHMNET NEO -UM NDUEPLUMMUASPOLOMPET0 " + "NIPLUGPRIL0 PUMMU2QEF NAG NAAUPEUX0 HO0 JUPHABPHIN0 " + "KOMVATMUINOBATOFUMOENGODLEOONEOBROO-YOOOTH R SOJI ONA " + "WEBNRUAOUBTNPEANOWC-ONE-RAYNJAMORAXNWAANUTSORIINUNGNTOCNTAANSUB0 ZO C " + "DRAFE0 SA0 YEQOPAMFAALUMNMARUMESOMARYREIAMIIM028BMIINMLYARGU2LOVOLUIS0 " + "WIMEARQHAU0 RADE6DA2UOPZUP8 ID70D42WAU5 " + "UCYACWIWOQUEHUEZ8F04-" + "0UDYA7AOMSREX9819E3UMXDJAE80DZEVOKAUMAUJAWXZOOZJEB89B576-" + "0620AZUVAUAYD6D7ZORQ00PUQQIGQIF7 " + "OQARPOQQUFVNOQOTQOFCA9550557BXGCAHBUD5B68 " + "AUQAAG-CAIVOYAL2BAU72C5-0VUUBIBIMNYOT18D15514DIWRMU Y00I-IHOJHOX0 E0 " + "UL000-0LJE04A0B9LFA1 XSUUJHAK00121JAH1-21-0JEUKUEKAQSIIFOMFLYO " + "YOAYXEHTUJFAJOEH3 IFUEES-OIX4 " + "EF8CF143-0XAUEZHEYKXAN305X0031CXWVXWG25320BNII-TE3 " + "DTJE2DD2-0HHANIB40488309713938291716494B4E1D1AQWR7R0C0D0VDW099F39092G9G3"; +uint8_t UnicodeNameToCodepointIndex_[239405] = { + 0x00, 0x05, 0xc0, 0x00, 0x6b, 0x15, 0xc0, 0x00, 0x95, 0x12, 0xc0, 0x00, + 0xdd, 0x06, 0xc0, 0x01, 0x03, 0x14, 0xc0, 0x01, 0x27, 0x18, 0xc0, 0x01, + 0x41, 0x16, 0xc0, 0x01, 0x57, 0x03, 0xc0, 0x01, 0x7b, 0x04, 0xc0, 0x01, + 0xd8, 0x0e, 0xc0, 0x01, 0xfe, 0x17, 0xc0, 0x02, 0x22, 0x0a, 0xc0, 0x02, + 0x3f, 0x0b, 0xc0, 0x02, 0x5d, 0x19, 0xc0, 0x02, 0x7d, 0x08, 0xc0, 0x02, + 0x95, 0x0f, 0xc0, 0x02, 0xb1, 0x0d, 0xc0, 0x02, 0xd1, 0x10, 0xc0, 0x02, + 0xef, 0x1a, 0xc0, 0x03, 0x15, 0x07, 0xc0, 0x03, 0x2d, 0x09, 0xc0, 0x03, + 0x84, 0x11, 0xc0, 0x03, 0xa6, 0x1c, 0xc0, 0x04, 0x0a, 0x0c, 0xc0, 0x04, + 0x2c, 0x42, 0x00, 0xe3, 0xc0, 0x04, 0x44, 0x1b, 0x40, 0x04, 0x5a, 0x03, + 0xc0, 0x04, 0x6e, 0x43, 0x30, 0x23, 0xc0, 0x04, 0x9d, 0x0a, 0xc0, 0x04, + 0xaf, 0x14, 0xc0, 0x04, 0xcb, 0x11, 0xc0, 0x04, 0xea, 0x0e, 0xc0, 0x05, + 0x25, 0x0b, 0xc0, 0x05, 0x37, 0x17, 0xc0, 0x05, 0x4c, 0x07, 0xc0, 0x05, + 0x72, 0x1b, 0x40, 0x05, 0x8a, 0x07, 0xc0, 0x05, 0xa2, 0x0b, 0xc0, 0x05, + 0xe9, 0x16, 0xc0, 0x06, 0x07, 0x03, 0xc0, 0x06, 0x24, 0x0d, 0xc0, 0x06, + 0x60, 0x0e, 0xc0, 0x06, 0x6e, 0x0a, 0xc0, 0x06, 0x7e, 0x05, 0xc0, 0x06, + 0x9a, 0x10, 0xc0, 0x06, 0xaf, 0x11, 0xc0, 0x06, 0xbf, 0x42, 0x00, 0xe3, + 0xc0, 0x06, 0xf1, 0x1b, 0xc0, 0x06, 0xfb, 0x12, 0xc0, 0x07, 0x0f, 0x17, + 0xc0, 0x07, 0x2e, 0x0f, 0xc0, 0x07, 0x5a, 0x19, 0xc0, 0x07, 0x68, 0xcc, + 0x85, 0x35, 0x01, 0x4e, 0x60, 0x14, 0xc0, 0x07, 0x78, 0x0e, 0xc0, 0x07, + 0x8a, 0x0b, 0xc0, 0x07, 0x92, 0x03, 0xc0, 0x07, 0xbb, 0x11, 0xc0, 0x07, + 0xef, 0x07, 0xc0, 0x08, 0x1d, 0x17, 0xc0, 0x08, 0x3f, 0x4f, 0x62, 0x1f, + 0xc0, 0x08, 0x5b, 0x0a, 0x40, 0x08, 0x79, 0x07, 0xc0, 0x08, 0x87, 0x0b, + 0xc0, 0x08, 0xbb, 0x14, 0xc0, 0x08, 0xf9, 0x11, 0xc0, 0x09, 0x13, 0x17, + 0xc0, 0x09, 0x5f, 0x03, 0xc0, 0x09, 0x71, 0xc2, 0xe6, 0x9f, 0x0f, 0xa6, + 0x01, 0xcf, 0x60, 0xb7, 0x0f, 0xcf, 0x60, 0x07, 0xc0, 0x09, 0x96, 0x0b, + 0xc0, 0x09, 0xd2, 0x11, 0xc0, 0x0a, 0x02, 0x03, 0xc0, 0x0a, 0x44, 0x17, + 0xc0, 0x0a, 0x6c, 0xc9, 0xa9, 0x51, 0x0f, 0xcc, 0x78, 0x03, 0xc0, 0x0a, + 0x94, 0x07, 0xc0, 0x0a, 0xa6, 0x0b, 0xc0, 0x0a, 0xbc, 0x11, 0xc0, 0x0a, + 0xe4, 0x42, 0x03, 0x66, 0x40, 0x0a, 0xee, 0x03, 0xc0, 0x0a, 0xfa, 0x02, + 0xc0, 0x0b, 0x34, 0x17, 0xc0, 0x0b, 0x40, 0x0a, 0xc0, 0x0b, 0x56, 0x11, + 0xc0, 0x0b, 0x72, 0x14, 0xc0, 0x0b, 0x9e, 0x07, 0xc0, 0x0b, 0xae, 0x0b, + 0xc0, 0x0b, 0xcc, 0x19, 0x40, 0x0c, 0x04, 0x14, 0xc0, 0x0c, 0x14, 0xc2, + 0x24, 0xe2, 0x0f, 0xd4, 0x99, 0x06, 0xc0, 0x0c, 0x36, 0x0e, 0xc0, 0x0c, + 0x58, 0x17, 0xc0, 0x0c, 0x80, 0xc7, 0x2e, 0x21, 0x01, 0x38, 0x43, 0x00, + 0x0c, 0x92, 0x10, 0xc0, 0x0c, 0x96, 0x15, 0xc0, 0x0c, 0xb9, 0x16, 0xc0, + 0x0c, 0xcd, 0xc7, 0xc0, 0xa5, 0x01, 0x32, 0x91, 0x44, 0xdf, 0xff, 0xc0, + 0x0c, 0xd9, 0x05, 0xc0, 0x0c, 0xfb, 0x12, 0xc0, 0x0d, 0x19, 0xcb, 0x91, + 0xe6, 0x01, 0x0a, 0x69, 0x18, 0xc0, 0x0d, 0x27, 0x0f, 0xc0, 0x0d, 0x33, + 0xcb, 0x90, 0xff, 0x00, 0x30, 0x59, 0x07, 0xc0, 0x0d, 0x49, 0xc5, 0xd8, + 0x44, 0x0f, 0xcf, 0x70, 0x11, 0xc0, 0x0d, 0x55, 0x0e, 0xc0, 0x0d, 0x95, + 0x03, 0xc0, 0x0d, 0xa3, 0x0b, 0xc0, 0x0d, 0xd5, 0x07, 0xc0, 0x0e, 0x01, + 0x17, 0xc0, 0x0e, 0x2a, 0x14, 0xc0, 0x0e, 0x65, 0x1b, 0xc0, 0x0e, 0x75, + 0x49, 0xb4, 0xc7, 0x40, 0x0e, 0x81, 0x11, 0xc0, 0x0e, 0xaf, 0x07, 0xc0, + 0x0e, 0xed, 0x0b, 0xc0, 0x0f, 0x22, 0x1b, 0xc0, 0x0f, 0x5b, 0x03, 0xc0, + 0x0f, 0x6d, 0xcd, 0x7f, 0x73, 0x01, 0x08, 0xa1, 0xc4, 0x0f, 0x0c, 0x0f, + 0xcc, 0xc9, 0x17, 0x40, 0x0f, 0x9a, 0x12, 0xc0, 0x0f, 0xa6, 0x10, 0xc0, + 0x0f, 0xc2, 0xc7, 0x57, 0x8b, 0x01, 0x30, 0x13, 0x00, 0x0f, 0xdc, 0xc5, + 0x19, 0xdd, 0x01, 0x32, 0x29, 0x48, 0xbe, 0x5a, 0x40, 0x0f, 0xe0, 0x07, + 0xc0, 0x0f, 0xec, 0x11, 0xc0, 0x10, 0x10, 0x03, 0xc0, 0x10, 0x3e, 0x0b, + 0xc0, 0x10, 0x68, 0x1b, 0xc0, 0x10, 0x92, 0xcb, 0x96, 0x3d, 0x01, 0x05, + 0xa1, 0x17, 0x40, 0x10, 0xa2, 0x10, 0xc0, 0x10, 0xb8, 0x42, 0x00, 0x06, + 0xc0, 0x10, 0xe4, 0x43, 0x00, 0x89, 0xc0, 0x10, 0xf0, 0x0f, 0xc0, 0x11, + 0x00, 0xce, 0x72, 0xc6, 0x0f, 0x9f, 0x71, 0xd3, 0x42, 0xc7, 0x0f, 0xc8, + 0xf8, 0x11, 0xc0, 0x11, 0x10, 0x0a, 0xc0, 0x11, 0x2a, 0x0b, 0xc0, 0x11, + 0x3f, 0x03, 0xc0, 0x11, 0x5b, 0x07, 0xc0, 0x11, 0x7d, 0x14, 0x40, 0x11, + 0x91, 0x0e, 0xc0, 0x11, 0xa1, 0x11, 0xc0, 0x11, 0xba, 0x03, 0xc0, 0x11, + 0xe4, 0x14, 0xc0, 0x12, 0x0a, 0x17, 0xc0, 0x12, 0x1c, 0x07, 0xc0, 0x12, + 0x32, 0x0b, 0x40, 0x12, 0x46, 0x0b, 0xc0, 0x12, 0x6a, 0x07, 0xc0, 0x12, + 0x8b, 0x11, 0xc0, 0x12, 0xbd, 0x03, 0xc0, 0x12, 0xec, 0x17, 0xc0, 0x13, + 0x2d, 0x43, 0x15, 0xe9, 0xc0, 0x13, 0x3d, 0x47, 0xca, 0x45, 0x40, 0x13, + 0x47, 0x10, 0xc0, 0x13, 0x6b, 0x07, 0xc0, 0x13, 0x77, 0x03, 0xc0, 0x13, + 0x84, 0x0a, 0xc0, 0x13, 0xa0, 0x0b, 0xc0, 0x13, 0xbe, 0x11, 0xc0, 0x13, + 0xdf, 0xc5, 0xd4, 0x02, 0x01, 0x5f, 0x18, 0x07, 0xc0, 0x13, 0xeb, 0x03, + 0xc0, 0x14, 0x20, 0x11, 0xc0, 0x14, 0x4f, 0x56, 0x30, 0x4e, 0xc0, 0x14, + 0x74, 0x17, 0xc0, 0x14, 0x8e, 0x45, 0x60, 0x4f, 0xc0, 0x14, 0xa4, 0x43, + 0xc2, 0x7e, 0xc0, 0x14, 0xd3, 0x0b, 0x40, 0x14, 0xf9, 0x47, 0xc0, 0xb3, + 0xc0, 0x15, 0x05, 0xd3, 0x46, 0x6a, 0x01, 0x19, 0x39, 0xc2, 0x00, 0xbf, + 0x01, 0x15, 0xd9, 0xc4, 0xe4, 0x5b, 0x0f, 0xd3, 0xd8, 0x0f, 0xc0, 0x15, + 0x11, 0x03, 0xc0, 0x15, 0x1f, 0x09, 0xc0, 0x15, 0x32, 0x1a, 0xc0, 0x15, + 0x3c, 0x48, 0xbd, 0x72, 0xc0, 0x15, 0x4a, 0x0e, 0xc0, 0x15, 0x7c, 0x44, + 0x00, 0x2d, 0xc0, 0x15, 0x90, 0x10, 0xc0, 0x15, 0x9a, 0xcb, 0x8f, 0xcb, + 0x01, 0x1e, 0x79, 0x14, 0xc0, 0x15, 0xb9, 0x42, 0x00, 0xe3, 0xc0, 0x15, + 0xcb, 0x15, 0xc0, 0x15, 0xd5, 0x17, 0xc0, 0x15, 0xe1, 0xcc, 0x81, 0xbd, + 0x0f, 0xa7, 0x39, 0xcd, 0x76, 0x5c, 0x0f, 0x99, 0x91, 0xc2, 0x0c, 0x43, + 0x0f, 0xa2, 0x0b, 0x00, 0x15, 0xed, 0xd0, 0x57, 0xb2, 0x01, 0x70, 0x70, + 0x17, 0xc0, 0x15, 0xf7, 0x11, 0xc0, 0x16, 0x13, 0x14, 0xc0, 0x16, 0x2f, + 0x07, 0xc0, 0x16, 0x3f, 0x0b, 0xc0, 0x16, 0x62, 0xc4, 0xe0, 0x07, 0x0f, + 0xa3, 0xd9, 0x03, 0xc0, 0x16, 0x6c, 0x0e, 0x40, 0x16, 0x78, 0xc5, 0xc8, + 0x6f, 0x0f, 0xcd, 0x51, 0x14, 0xc0, 0x16, 0x86, 0x42, 0x02, 0x10, 0xc0, + 0x16, 0xa2, 0xc2, 0x09, 0x66, 0x0f, 0xcc, 0x49, 0xc7, 0xc7, 0xf9, 0x0f, + 0xb7, 0x11, 0x10, 0xc0, 0x16, 0xae, 0x12, 0xc0, 0x16, 0xc4, 0x0e, 0xc0, + 0x16, 0xda, 0x17, 0xc0, 0x16, 0xea, 0x05, 0xc0, 0x16, 0xf4, 0x04, 0xc0, + 0x16, 0xfe, 0xc7, 0xb5, 0x83, 0x01, 0x09, 0x31, 0x43, 0x00, 0x5f, 0xc0, + 0x17, 0x10, 0x09, 0xc0, 0x17, 0x1a, 0xc8, 0xad, 0x5d, 0x0f, 0xaa, 0x49, + 0xce, 0x71, 0x76, 0x0f, 0x9f, 0x11, 0xc3, 0x02, 0x3b, 0x0f, 0x9b, 0x11, + 0x9a, 0x0f, 0xa0, 0x11, 0x15, 0xc0, 0x17, 0x26, 0xcb, 0x8a, 0xd6, 0x0f, + 0xa2, 0x60, 0xd0, 0x5c, 0x12, 0x0f, 0xc8, 0x81, 0x48, 0xb8, 0x6a, 0xc0, + 0x17, 0x32, 0x50, 0x58, 0x72, 0xc0, 0x17, 0x44, 0x4a, 0x17, 0xa1, 0xc0, + 0x17, 0x6c, 0x07, 0xc0, 0x17, 0x8c, 0xc5, 0xdc, 0x1d, 0x0f, 0xce, 0xf8, + 0x03, 0xc0, 0x17, 0x9e, 0x17, 0xc0, 0x17, 0xb4, 0x11, 0xc0, 0x17, 0xc6, + 0xc4, 0xe2, 0x9b, 0x0f, 0xa2, 0xb1, 0xd2, 0x4d, 0x45, 0x0f, 0xcf, 0x48, + 0xc6, 0xd1, 0x75, 0x01, 0x35, 0xd9, 0x03, 0xc0, 0x17, 0xd2, 0x46, 0x2c, + 0xb4, 0xc0, 0x17, 0xe4, 0xcc, 0x01, 0xbb, 0x00, 0x01, 0x10, 0x0b, 0xc0, + 0x17, 0xee, 0x07, 0xc0, 0x17, 0xf8, 0xcb, 0x94, 0xa6, 0x0f, 0xcb, 0x89, + 0xc4, 0xe4, 0x2f, 0x0f, 0xd4, 0x00, 0x10, 0xc0, 0x18, 0x0a, 0xc4, 0x26, + 0xba, 0x01, 0x37, 0x59, 0x14, 0xc0, 0x18, 0x26, 0x12, 0xc0, 0x18, 0x48, + 0x06, 0xc0, 0x18, 0x54, 0x17, 0xc0, 0x18, 0x60, 0x0f, 0xc0, 0x18, 0x6c, + 0x0e, 0xc0, 0x18, 0x7b, 0xc4, 0xc7, 0xcb, 0x0f, 0x99, 0xa9, 0x96, 0x0f, + 0xa0, 0x42, 0x00, 0x18, 0x87, 0x58, 0x25, 0x43, 0xc0, 0x18, 0x90, 0x48, + 0x91, 0xff, 0xc0, 0x18, 0x9a, 0x47, 0x08, 0x5b, 0x40, 0x18, 0xe8, 0x07, + 0xc0, 0x19, 0x22, 0x03, 0xc0, 0x19, 0x3c, 0xc4, 0xcc, 0x07, 0x01, 0x37, + 0x51, 0x0b, 0xc0, 0x19, 0x50, 0x11, 0xc0, 0x19, 0x71, 0xcc, 0x85, 0x11, + 0x0f, 0x9c, 0x20, 0x17, 0xc0, 0x19, 0x83, 0xc2, 0x00, 0x03, 0x0f, 0xcc, + 0x01, 0x1b, 0xc0, 0x19, 0x8f, 0x11, 0xc0, 0x19, 0x9b, 0x07, 0xc0, 0x19, + 0xb3, 0xc5, 0x72, 0xa4, 0x0f, 0xcc, 0xba, 0x00, 0x19, 0xbf, 0x05, 0xc0, + 0x19, 0xc5, 0x0f, 0xc0, 0x19, 0xcf, 0x17, 0xc0, 0x19, 0xe3, 0xc4, 0xe0, + 0x13, 0x01, 0x35, 0x81, 0x10, 0xc0, 0x19, 0xf5, 0x14, 0xc0, 0x1a, 0x1b, + 0x0e, 0xc0, 0x1a, 0x2d, 0x42, 0x01, 0x25, 0xc0, 0x1a, 0x3c, 0x99, 0x0f, + 0xa0, 0x23, 0x00, 0x1a, 0x46, 0x12, 0xc0, 0x1a, 0x4c, 0xc2, 0x00, 0xfe, + 0x0f, 0xcf, 0x29, 0xc2, 0x00, 0x74, 0x0f, 0xd4, 0xc8, 0x0b, 0xc0, 0x1a, + 0x56, 0x11, 0xc0, 0x1a, 0x62, 0xd1, 0x50, 0xac, 0x01, 0x1c, 0xd1, 0x03, + 0x40, 0x1a, 0x7d, 0x42, 0x02, 0xd3, 0xc0, 0x1a, 0x8f, 0xc7, 0xc8, 0xc4, + 0x0f, 0x9e, 0xcb, 0x00, 0x1a, 0x99, 0xc4, 0x78, 0xfe, 0x0f, 0x9d, 0x30, + 0x42, 0x00, 0x15, 0xc0, 0x1a, 0x9f, 0x48, 0xb9, 0x0a, 0xc0, 0x1a, 0xab, + 0x14, 0xc0, 0x1a, 0xbd, 0x12, 0xc0, 0x1a, 0xcb, 0xc7, 0xb3, 0x73, 0x01, + 0x10, 0xd9, 0xc6, 0xcc, 0x53, 0x0f, 0xca, 0x91, 0xc9, 0xab, 0x52, 0x0f, + 0xcb, 0x48, 0xca, 0xa3, 0x8c, 0x0f, 0xaa, 0x41, 0xc3, 0x20, 0xac, 0x01, + 0x35, 0x99, 0x42, 0x00, 0x84, 0xc0, 0x1a, 0xdb, 0x42, 0x01, 0xdd, 0x40, + 0x1a, 0xe7, 0x42, 0x05, 0xc0, 0xc0, 0x1a, 0xf3, 0xca, 0xa5, 0x62, 0x01, + 0x19, 0x69, 0x47, 0xba, 0x9b, 0xc0, 0x1a, 0xff, 0xc5, 0xdd, 0xfd, 0x0f, + 0x98, 0x00, 0x42, 0x00, 0x30, 0xc0, 0x1b, 0x23, 0xc5, 0x65, 0x68, 0x01, + 0x18, 0x9b, 0x00, 0x1b, 0x2f, 0xcb, 0x91, 0x20, 0x0f, 0xd5, 0x09, 0x03, + 0xc0, 0x1b, 0x35, 0x15, 0xc0, 0x1b, 0x3d, 0x42, 0x02, 0x2f, 0xc0, 0x1b, + 0x49, 0xc5, 0xc5, 0x38, 0x01, 0x35, 0xc9, 0x05, 0xc0, 0x1b, 0x59, 0x14, + 0xc0, 0x1b, 0x63, 0x07, 0xc0, 0x1b, 0x6f, 0xc3, 0x92, 0x91, 0x01, 0x5f, + 0x91, 0xce, 0x6b, 0xaa, 0x01, 0x5f, 0xd9, 0xc4, 0xe0, 0xff, 0x0f, 0xc9, + 0x98, 0x10, 0xc0, 0x1b, 0x7b, 0x42, 0x00, 0xbc, 0xc0, 0x1b, 0x8d, 0x1a, + 0xc0, 0x1b, 0x99, 0x06, 0xc0, 0x1b, 0xab, 0xd1, 0x51, 0xde, 0x0f, 0xaf, + 0xf1, 0x46, 0xc7, 0x36, 0x40, 0x1b, 0xb7, 0x07, 0xc0, 0x1b, 0xc9, 0x03, + 0xc0, 0x1b, 0xdb, 0x14, 0xc0, 0x1b, 0xfb, 0x11, 0xc0, 0x1c, 0x09, 0x17, + 0xc0, 0x1c, 0x15, 0xca, 0xa1, 0x0c, 0x0f, 0xde, 0x2a, 0x00, 0x1c, 0x27, + 0x0e, 0xc0, 0x1c, 0x2b, 0x42, 0x00, 0x33, 0xc0, 0x1c, 0x35, 0x10, 0xc0, + 0x1c, 0x41, 0xc6, 0xd0, 0x7f, 0x01, 0x37, 0xa9, 0xc9, 0xb1, 0xe5, 0x01, + 0x32, 0x81, 0x16, 0xc0, 0x1c, 0x4d, 0x48, 0x69, 0x46, 0xc0, 0x1c, 0x5c, + 0xc7, 0xc6, 0x01, 0x0f, 0x9d, 0xb9, 0xd1, 0x50, 0xdf, 0x0f, 0x9b, 0xb1, + 0xc2, 0x00, 0x2c, 0x0f, 0xcb, 0xd9, 0x45, 0x73, 0xa7, 0x40, 0x1c, 0x78, + 0x17, 0xc0, 0x1c, 0x84, 0x0b, 0xc0, 0x1c, 0x93, 0xc8, 0xbc, 0xe2, 0x0f, + 0xb7, 0xc8, 0x11, 0xc0, 0x1c, 0x9f, 0x07, 0xc0, 0x1c, 0xa7, 0x0b, 0xc0, + 0x1c, 0xb7, 0x03, 0x40, 0x1c, 0xc3, 0x14, 0xc0, 0x1c, 0xcf, 0x03, 0xc0, + 0x1c, 0xdb, 0x11, 0xc0, 0x1c, 0xf5, 0x0b, 0xc0, 0x1d, 0x19, 0xcd, 0x7f, + 0xa7, 0x01, 0x4f, 0x11, 0xc3, 0x2d, 0xa5, 0x0f, 0xa0, 0x88, 0x11, 0xc0, + 0x1d, 0x2f, 0x03, 0xc0, 0x1d, 0x3b, 0x14, 0xc0, 0x1d, 0x47, 0xc4, 0xdc, + 0xf0, 0x0f, 0x9f, 0x5a, 0x00, 0x1d, 0x5d, 0xcb, 0x90, 0x18, 0x0f, 0xc9, + 0x39, 0x42, 0x00, 0x27, 0xc0, 0x1d, 0x63, 0x03, 0x40, 0x1d, 0x7e, 0x17, + 0xc0, 0x1d, 0x8a, 0x43, 0x1c, 0x85, 0xc0, 0x1d, 0x96, 0xde, 0x0f, 0x40, + 0x0f, 0xa8, 0xe1, 0x46, 0xcf, 0xdd, 0xc0, 0x1d, 0xa8, 0x05, 0xc0, 0x1d, + 0xdf, 0x42, 0x00, 0x4b, 0xc0, 0x1d, 0xeb, 0xc6, 0x55, 0xf6, 0x01, 0x06, + 0x01, 0x4b, 0x9a, 0x47, 0xc0, 0x1d, 0xfb, 0x46, 0xc9, 0x58, 0x40, 0x1e, + 0x07, 0x03, 0xc0, 0x1e, 0x25, 0xc2, 0x02, 0xfb, 0x0f, 0xcc, 0x88, 0x0f, + 0xc0, 0x1e, 0x31, 0x10, 0xc0, 0x1e, 0x3d, 0x42, 0x00, 0x2c, 0xc0, 0x1e, + 0x49, 0x4b, 0x90, 0xb2, 0x40, 0x1e, 0x55, 0x07, 0xc0, 0x1e, 0x6d, 0x03, + 0xc0, 0x1e, 0x7d, 0xcd, 0x77, 0x7a, 0x01, 0x11, 0x13, 0x00, 0x1e, 0x8f, + 0x0b, 0xc0, 0x1e, 0x95, 0xd4, 0x3e, 0x58, 0x0f, 0xa5, 0x31, 0x11, 0x40, + 0x1e, 0xa4, 0x43, 0x00, 0x67, 0xc0, 0x1e, 0xba, 0x90, 0x01, 0x30, 0x4b, + 0x00, 0x1e, 0xca, 0x48, 0xb9, 0xa2, 0xc0, 0x1e, 0xe9, 0xc6, 0xb7, 0x74, + 0x01, 0x13, 0xdb, 0x00, 0x1e, 0xfb, 0x42, 0x0e, 0xa6, 0xc0, 0x1e, 0xff, + 0x42, 0x15, 0x13, 0xc0, 0x1f, 0x11, 0x15, 0x40, 0x1f, 0x1d, 0x0b, 0xc0, + 0x1f, 0x29, 0x03, 0xc0, 0x1f, 0x33, 0xcc, 0x71, 0x94, 0x0f, 0xb5, 0x60, + 0xc8, 0xb9, 0x52, 0x01, 0x02, 0x99, 0x03, 0xc0, 0x1f, 0x3f, 0xc5, 0xd4, + 0x2a, 0x0f, 0x9e, 0x50, 0x0b, 0xc0, 0x1f, 0x49, 0x11, 0xc0, 0x1f, 0x59, + 0x07, 0xc0, 0x1f, 0x75, 0xca, 0x9b, 0xbc, 0x0f, 0xa7, 0xf8, 0x03, 0xc0, + 0x1f, 0x94, 0x17, 0x40, 0x1f, 0xa5, 0x10, 0xc0, 0x1f, 0xb8, 0xc2, 0x00, + 0x3b, 0x01, 0x36, 0x7b, 0x00, 0x1f, 0xce, 0x15, 0xc0, 0x1f, 0xd4, 0xc7, + 0xc7, 0xba, 0x01, 0x16, 0xa3, 0x00, 0x1f, 0xe0, 0x0e, 0xc0, 0x1f, 0xe6, + 0x89, 0x0f, 0xa0, 0xb3, 0x00, 0x1f, 0xf6, 0x87, 0x0f, 0xcb, 0x38, 0x42, + 0x00, 0xcc, 0xc0, 0x1f, 0xfa, 0x09, 0xc0, 0x20, 0x0a, 0x14, 0xc0, 0x20, + 0x17, 0x4a, 0xa6, 0x5c, 0xc0, 0x20, 0x2b, 0x0e, 0xc0, 0x20, 0x50, 0x4b, + 0x8e, 0x55, 0xc0, 0x20, 0x5a, 0xc5, 0xdd, 0xda, 0x0f, 0xa7, 0x31, 0xc7, + 0x7b, 0xdd, 0x0f, 0xa6, 0x71, 0xc8, 0xb9, 0xba, 0x0f, 0xa1, 0xf1, 0x10, + 0x40, 0x20, 0x7c, 0x16, 0xc0, 0x20, 0x88, 0x17, 0xc0, 0x20, 0x98, 0x44, + 0x00, 0x28, 0xc0, 0x20, 0xb6, 0x15, 0xc0, 0x20, 0xc0, 0x12, 0xc0, 0x20, + 0xd0, 0xcf, 0x66, 0xfc, 0x0f, 0xad, 0x49, 0xcd, 0x79, 0xf7, 0x0f, 0xa7, + 0xf1, 0x45, 0x9f, 0x92, 0xc0, 0x20, 0xdc, 0xc4, 0xe4, 0x23, 0x0f, 0xa1, + 0x48, 0x14, 0xc0, 0x20, 0xeb, 0x10, 0xc0, 0x21, 0x0e, 0x03, 0xc0, 0x21, + 0x2c, 0x15, 0xc0, 0x21, 0x3a, 0xc8, 0xa2, 0x57, 0x0f, 0xb5, 0xb1, 0xc8, + 0xbe, 0x6a, 0x0f, 0xcf, 0x59, 0xcc, 0x8a, 0x75, 0x0f, 0xd6, 0x10, 0x44, + 0x05, 0x1e, 0xc0, 0x21, 0x46, 0xd8, 0x21, 0xcb, 0x0f, 0xa7, 0x11, 0xc5, + 0xc1, 0x02, 0x0f, 0xa6, 0x61, 0x14, 0xc0, 0x21, 0x52, 0xdc, 0x12, 0x71, + 0x0f, 0xb5, 0x70, 0x47, 0x34, 0x2f, 0xc0, 0x21, 0x5e, 0x4f, 0x63, 0x87, + 0xc0, 0x21, 0x71, 0xd3, 0x45, 0x86, 0x08, 0x5c, 0xd1, 0xcc, 0x45, 0x8d, + 0x08, 0x5c, 0xc9, 0x47, 0x02, 0x0e, 0x40, 0x21, 0x7d, 0x49, 0xae, 0x34, + 0xc0, 0x21, 0xd8, 0x11, 0xc0, 0x21, 0xe4, 0x03, 0x40, 0x21, 0xf0, 0x18, + 0xc0, 0x21, 0xfc, 0xc2, 0x00, 0x29, 0x0f, 0xcc, 0x61, 0x15, 0xc0, 0x22, + 0x08, 0x05, 0xc0, 0x22, 0x1a, 0x55, 0x38, 0x15, 0xc0, 0x22, 0x24, 0x0e, + 0xc0, 0x22, 0x3c, 0x45, 0x9e, 0xa0, 0xc0, 0x22, 0x4e, 0xce, 0x6b, 0xc6, + 0x0f, 0x9f, 0x61, 0xd5, 0x37, 0x82, 0x0f, 0x9e, 0xd1, 0xc9, 0xb3, 0xb9, + 0x0f, 0xce, 0x78, 0xc7, 0xc9, 0xc7, 0x0f, 0xd4, 0xa1, 0x44, 0xde, 0xdf, + 0xc0, 0x22, 0x60, 0x09, 0xc0, 0x22, 0x6c, 0x18, 0xc0, 0x22, 0x78, 0x46, + 0xce, 0x09, 0xc0, 0x22, 0x88, 0x15, 0xc0, 0x22, 0x94, 0x07, 0xc0, 0x22, + 0xa4, 0x45, 0x05, 0xbb, 0xc0, 0x22, 0xb0, 0xce, 0x74, 0x40, 0x01, 0x19, + 0x89, 0x03, 0xc0, 0x22, 0xbc, 0xd0, 0x5f, 0xe2, 0x01, 0x12, 0x79, 0xc8, + 0xb6, 0x42, 0x01, 0x80, 0x18, 0x11, 0xc0, 0x22, 0xc6, 0x03, 0xc0, 0x22, + 0xd6, 0xcd, 0x77, 0x39, 0x01, 0x36, 0xd1, 0xc3, 0x00, 0xcb, 0x0f, 0xa2, + 0xb9, 0xd2, 0x47, 0x4b, 0x0f, 0xca, 0x08, 0xc2, 0x00, 0x58, 0x0f, 0xcd, + 0x21, 0x42, 0x01, 0x48, 0xc0, 0x22, 0xeb, 0x4a, 0xa7, 0x42, 0xc0, 0x22, + 0xfb, 0x17, 0xc0, 0x23, 0x07, 0x16, 0xc0, 0x23, 0x13, 0x89, 0x0f, 0xa0, + 0xab, 0x00, 0x23, 0x1d, 0x47, 0x73, 0x7e, 0xc0, 0x23, 0x29, 0xc7, 0xae, + 0xcf, 0x01, 0x05, 0x59, 0xc6, 0xb9, 0xb4, 0x0f, 0xae, 0x73, 0x00, 0x23, + 0x4d, 0xcb, 0x95, 0x14, 0x0f, 0xaa, 0x51, 0x0e, 0xc0, 0x23, 0x53, 0xc2, + 0x00, 0xbf, 0x0f, 0xb5, 0x51, 0xd2, 0x49, 0x8b, 0x0f, 0xb5, 0x78, 0x47, + 0xc6, 0xe1, 0xc0, 0x23, 0x5f, 0xc6, 0xcb, 0xab, 0x0f, 0xca, 0xf9, 0xc2, + 0x00, 0x3b, 0x0f, 0xcc, 0x30, 0x42, 0x01, 0xe2, 0xc0, 0x23, 0x83, 0x44, + 0x39, 0x86, 0xc0, 0x23, 0x8d, 0xca, 0xa5, 0x44, 0x01, 0x09, 0xc1, 0xc4, + 0xce, 0x23, 0x01, 0x01, 0x03, 0x00, 0x23, 0x99, 0x10, 0xc0, 0x23, 0x9d, + 0xce, 0x61, 0x03, 0x00, 0x00, 0x80, 0x18, 0xc0, 0x23, 0xa9, 0x15, 0xc0, + 0x23, 0xb5, 0x05, 0xc0, 0x23, 0xc1, 0x45, 0x75, 0x61, 0xc0, 0x23, 0xd9, + 0xcc, 0x86, 0xd9, 0x01, 0x01, 0xd9, 0xcd, 0x7c, 0x74, 0x0f, 0x9c, 0xb9, + 0x42, 0x00, 0xa9, 0xc0, 0x23, 0xeb, 0x42, 0x04, 0x2b, 0xc0, 0x23, 0xf7, + 0x45, 0xdc, 0xc7, 0xc0, 0x24, 0x03, 0xcb, 0x4f, 0x1a, 0x0f, 0xb0, 0x61, + 0xd3, 0x1c, 0x59, 0x07, 0xff, 0xe8, 0x43, 0x00, 0x2e, 0xc0, 0x24, 0x19, + 0xc2, 0x00, 0x75, 0x0f, 0xa4, 0x6b, 0x00, 0x24, 0x2d, 0xc4, 0x7c, 0x7d, + 0x0f, 0x9c, 0x03, 0x00, 0x24, 0x3d, 0x43, 0x00, 0x89, 0xc0, 0x24, 0x43, + 0x57, 0x27, 0x2f, 0xc0, 0x24, 0x4f, 0xc7, 0x44, 0xfa, 0x07, 0xef, 0xe1, + 0xc3, 0x01, 0x09, 0x0f, 0xca, 0x30, 0xc2, 0x00, 0x3b, 0x0f, 0xd5, 0x43, + 0x00, 0x24, 0x5b, 0x42, 0x02, 0xa7, 0xc0, 0x24, 0x61, 0xc8, 0xb6, 0xba, + 0x0f, 0xc8, 0xb1, 0x43, 0x0d, 0x05, 0xc0, 0x24, 0x71, 0x46, 0x1c, 0xa1, + 0xc0, 0x24, 0x7b, 0x44, 0x12, 0xb8, 0xc0, 0x24, 0x99, 0xd2, 0x49, 0x1f, + 0x0f, 0x9b, 0x01, 0xc2, 0x00, 0x40, 0x0f, 0x99, 0xcb, 0x00, 0x24, 0xbf, + 0xc5, 0xde, 0x39, 0x0f, 0xa0, 0x99, 0xc5, 0xd9, 0x2a, 0x0f, 0xb5, 0x18, + 0xc3, 0xe5, 0x57, 0x0f, 0xd4, 0x91, 0x0b, 0xc0, 0x24, 0xc5, 0x42, 0x01, + 0xdd, 0xc0, 0x24, 0xd8, 0x96, 0x0f, 0xa0, 0x03, 0x00, 0x24, 0xe5, 0x05, + 0xc0, 0x24, 0xeb, 0xc4, 0xb0, 0x4f, 0x0f, 0xa0, 0x3b, 0x00, 0x24, 0xf7, + 0x8f, 0x0f, 0xa0, 0x78, 0xc8, 0xbe, 0xb2, 0x01, 0x05, 0xe9, 0xc8, 0x76, + 0x54, 0x01, 0x05, 0x41, 0x43, 0x5d, 0xc0, 0xc0, 0x24, 0xfd, 0x10, 0xc0, + 0x25, 0x0f, 0xcc, 0x89, 0x49, 0x0f, 0x9e, 0x49, 0xca, 0xa7, 0xba, 0x01, + 0x4f, 0xa1, 0x5a, 0x19, 0xae, 0x40, 0x25, 0x19, 0x51, 0x50, 0x8a, 0xc0, + 0x25, 0x3d, 0x42, 0x02, 0x32, 0xc0, 0x25, 0x7c, 0xc5, 0xda, 0x74, 0x0f, + 0xce, 0xd8, 0x14, 0xc0, 0x25, 0x9a, 0xc3, 0x0e, 0x6a, 0x01, 0x35, 0xb1, + 0x44, 0x02, 0x27, 0xc0, 0x25, 0xac, 0xd5, 0x34, 0x10, 0x01, 0x51, 0x78, + 0x07, 0xc0, 0x25, 0xb8, 0xca, 0x89, 0x7b, 0x01, 0x38, 0x61, 0xc3, 0x14, + 0x45, 0x01, 0x32, 0x69, 0x43, 0x1c, 0x87, 0xc0, 0x25, 0xc4, 0xcc, 0x86, + 0x79, 0x0f, 0xa7, 0x99, 0xc4, 0x87, 0x8b, 0x0f, 0x9d, 0xd9, 0x47, 0xc1, + 0xe0, 0x40, 0x25, 0xce, 0x0e, 0xc0, 0x25, 0xda, 0xd0, 0x59, 0xb2, 0x0f, + 0xdd, 0xd8, 0x4d, 0x7b, 0x70, 0xc0, 0x25, 0xec, 0xc5, 0xdc, 0x63, 0x01, + 0x5f, 0x30, 0x09, 0xc0, 0x26, 0x06, 0xc2, 0x07, 0x49, 0x0f, 0xb4, 0xa9, + 0x49, 0xa7, 0x9d, 0xc0, 0x26, 0x16, 0x10, 0xc0, 0x26, 0x22, 0x0f, 0xc0, + 0x26, 0x2c, 0x43, 0x26, 0x1e, 0xc0, 0x26, 0x38, 0xc4, 0xde, 0xd3, 0x01, + 0x32, 0x49, 0x0d, 0xc0, 0x26, 0x44, 0x42, 0x02, 0x32, 0xc0, 0x26, 0x50, + 0xda, 0x1b, 0x82, 0x0f, 0x9e, 0x99, 0xc2, 0x00, 0x99, 0x0f, 0x99, 0x70, + 0xc3, 0xe5, 0x18, 0x0f, 0xcc, 0xb1, 0xc5, 0x46, 0xcd, 0x0f, 0xa2, 0xa8, + 0x14, 0xc0, 0x26, 0x62, 0xc9, 0xb2, 0x90, 0x01, 0x05, 0x71, 0xc3, 0x17, + 0x93, 0x0f, 0x99, 0xb9, 0xcb, 0x8e, 0xb8, 0x0f, 0xca, 0x18, 0x43, 0x02, + 0xdf, 0xc0, 0x26, 0x72, 0x0b, 0xc0, 0x26, 0x7a, 0x11, 0xc0, 0x26, 0x84, + 0x17, 0xc0, 0x26, 0x90, 0x42, 0x00, 0x29, 0xc0, 0x26, 0x9c, 0x03, 0x40, + 0x26, 0xa6, 0xc4, 0xbc, 0xf7, 0x0f, 0xb5, 0xe9, 0x42, 0x00, 0x7f, 0xc0, + 0x26, 0xb2, 0x16, 0xc0, 0x26, 0xe8, 0xc9, 0xac, 0x60, 0x0f, 0xaf, 0xe1, + 0x57, 0x29, 0x12, 0xc0, 0x26, 0xf4, 0xc4, 0x32, 0xd0, 0x0f, 0x9a, 0x29, + 0xc4, 0x5a, 0xfe, 0x0f, 0xa2, 0x29, 0x11, 0x40, 0x27, 0x00, 0x03, 0xc0, + 0x27, 0x0f, 0x0b, 0xc0, 0x27, 0x2c, 0x17, 0xc0, 0x27, 0x4a, 0x11, 0x40, + 0x27, 0x57, 0x4c, 0x89, 0xf1, 0xc0, 0x27, 0x64, 0x03, 0xc0, 0x27, 0xc4, + 0x0e, 0xc0, 0x27, 0xd4, 0x10, 0xc0, 0x27, 0xde, 0xc7, 0xc9, 0x81, 0x0f, + 0xcf, 0x51, 0xc8, 0xb9, 0x22, 0x0f, 0xcf, 0xc0, 0x09, 0xc0, 0x27, 0xee, + 0x42, 0x00, 0x4e, 0xc0, 0x27, 0xfd, 0xc3, 0x18, 0xb3, 0x00, 0x03, 0xf3, + 0x00, 0x28, 0x09, 0x14, 0xc0, 0x28, 0x0d, 0xc2, 0x16, 0x59, 0x01, 0x4f, + 0xf3, 0x00, 0x28, 0x1f, 0xc4, 0x00, 0x3b, 0x0f, 0x9d, 0x59, 0xcf, 0x65, + 0x3a, 0x01, 0x4e, 0xe9, 0x46, 0xce, 0x3f, 0xc0, 0x28, 0x25, 0x47, 0xc6, + 0x39, 0x40, 0x28, 0x54, 0xd7, 0x22, 0x44, 0x01, 0x39, 0xc9, 0x11, 0xc0, + 0x28, 0x6c, 0xd7, 0x27, 0x18, 0x0f, 0xa8, 0x00, 0x43, 0x01, 0xa4, 0xc0, + 0x28, 0x76, 0xc3, 0x91, 0xe8, 0x01, 0x32, 0x41, 0x85, 0x01, 0x18, 0x91, + 0x44, 0x02, 0x8b, 0xc0, 0x28, 0x82, 0x47, 0x2d, 0x4e, 0xc0, 0x28, 0x8c, + 0x42, 0x00, 0x43, 0x40, 0x28, 0xbc, 0xce, 0x75, 0x4a, 0x0f, 0xd3, 0xc9, + 0xc8, 0xbf, 0x9a, 0x01, 0x31, 0x61, 0xd6, 0x2f, 0x46, 0x01, 0x08, 0x09, + 0x0f, 0xc0, 0x28, 0xc8, 0xc3, 0x1f, 0x19, 0x0f, 0xce, 0x89, 0x44, 0x0d, + 0xff, 0x40, 0x28, 0xd4, 0x54, 0x3e, 0x94, 0xc0, 0x29, 0x06, 0x46, 0x0c, + 0x8e, 0xc0, 0x29, 0x6a, 0x07, 0xc0, 0x29, 0x76, 0xc9, 0xb3, 0x44, 0x01, + 0x1f, 0x81, 0x42, 0x00, 0xe6, 0xc0, 0x29, 0x88, 0x4b, 0x66, 0xd0, 0xc0, + 0x29, 0x94, 0xcb, 0x91, 0xaf, 0x0f, 0xa3, 0xf0, 0x42, 0x00, 0xf1, 0xc0, + 0x29, 0xa3, 0xca, 0x9c, 0xca, 0x01, 0x05, 0x99, 0xc7, 0xc6, 0xb0, 0x0f, + 0x9a, 0x30, 0x00, 0x40, 0x29, 0xad, 0x43, 0x10, 0x73, 0xc0, 0x29, 0xb9, + 0x96, 0x0f, 0xa0, 0xe3, 0x00, 0x29, 0xc5, 0xca, 0xa4, 0xc2, 0x01, 0x3e, + 0x89, 0xc4, 0xca, 0xcf, 0x01, 0x34, 0x99, 0xc2, 0x06, 0x46, 0x01, 0x31, + 0x29, 0x09, 0x40, 0x29, 0xd1, 0x16, 0xc0, 0x29, 0xf2, 0x05, 0xc0, 0x2a, + 0x02, 0xc7, 0x5a, 0x55, 0x01, 0x15, 0x31, 0xd5, 0x2b, 0xc1, 0x01, 0x12, + 0x18, 0xc9, 0xad, 0x5c, 0x01, 0x34, 0xd9, 0xcb, 0x8f, 0x26, 0x0f, 0xa2, + 0xf8, 0x47, 0x02, 0x0e, 0xc0, 0x2a, 0x0e, 0x15, 0xc0, 0x2a, 0x55, 0x48, + 0xa3, 0x64, 0xc0, 0x2a, 0x61, 0x46, 0x09, 0x97, 0xc0, 0x2a, 0x6d, 0x4b, + 0x6f, 0xc7, 0xc0, 0x2a, 0x91, 0x56, 0x30, 0x90, 0x40, 0x2a, 0xae, 0xc8, + 0xbc, 0xb2, 0x01, 0x1f, 0x31, 0x42, 0x00, 0x99, 0xc0, 0x2a, 0xb8, 0x47, + 0xc2, 0xd5, 0xc0, 0x2a, 0xc4, 0xc9, 0x49, 0x4c, 0x00, 0x00, 0x31, 0x45, + 0x31, 0xf0, 0x40, 0x2a, 0xd0, 0x54, 0x3e, 0x80, 0xc0, 0x2a, 0xdc, 0x12, + 0xc0, 0x2b, 0x38, 0x11, 0x40, 0x2b, 0x44, 0x46, 0xd0, 0x6d, 0xc0, 0x2b, + 0x50, 0xc5, 0xdd, 0x8f, 0x0f, 0xca, 0x88, 0xcf, 0x65, 0xb2, 0x0f, 0x9e, + 0x41, 0xd7, 0x26, 0x49, 0x01, 0x51, 0xf9, 0x12, 0xc0, 0x2b, 0x5c, 0xc7, + 0xc5, 0x67, 0x0f, 0xb4, 0x88, 0xcc, 0x88, 0x35, 0x0f, 0xb5, 0x09, 0x45, + 0xd7, 0x72, 0x40, 0x2b, 0x68, 0x1a, 0xc0, 0x2b, 0x8a, 0x43, 0x1d, 0xbb, + 0xc0, 0x2b, 0x96, 0x42, 0x02, 0x10, 0xc0, 0x2b, 0xb2, 0x19, 0xc0, 0x2b, + 0xbe, 0x9b, 0x0f, 0xa3, 0x33, 0x00, 0x2b, 0xd1, 0x11, 0xc0, 0x2b, 0xd7, + 0xc2, 0x00, 0x50, 0x0f, 0xa5, 0x19, 0xc5, 0xdc, 0x8b, 0x0f, 0xa4, 0x83, + 0x00, 0x2b, 0xe4, 0xc2, 0x00, 0xb1, 0x0f, 0xa0, 0xb9, 0xc2, 0x02, 0x6f, + 0x0f, 0xcd, 0xa1, 0x47, 0xc9, 0xdc, 0x40, 0x2b, 0xea, 0x11, 0xc0, 0x2b, + 0xf6, 0x03, 0xc0, 0x2c, 0x08, 0x42, 0x0f, 0xe1, 0x40, 0x2c, 0x14, 0x10, + 0xc0, 0x2c, 0x1e, 0x0e, 0xc0, 0x2c, 0x31, 0x15, 0xc0, 0x2c, 0x3b, 0x06, + 0xc0, 0x2c, 0x50, 0xc2, 0x07, 0xb8, 0x0f, 0xa3, 0xb3, 0x00, 0x2c, 0x5c, + 0x44, 0x82, 0x11, 0xc0, 0x2c, 0x60, 0x05, 0xc0, 0x2c, 0x84, 0x96, 0x0f, + 0xcc, 0x3b, 0x00, 0x2c, 0x94, 0x14, 0xc0, 0x2c, 0xa7, 0x09, 0x40, 0x2c, + 0xb1, 0xc3, 0x18, 0x91, 0x0f, 0xcd, 0x61, 0xcc, 0x8a, 0x81, 0x01, 0x31, + 0x19, 0x16, 0xc0, 0x2c, 0xc3, 0xc4, 0x56, 0x1d, 0x0f, 0xa2, 0xc9, 0x42, + 0x02, 0xa7, 0xc0, 0x2c, 0xcf, 0x14, 0xc0, 0x2c, 0xdb, 0x42, 0x00, 0x76, + 0xc0, 0x2c, 0xe5, 0x44, 0x1f, 0x3c, 0x40, 0x2c, 0xf1, 0x03, 0xc0, 0x2c, + 0xfb, 0x10, 0xc0, 0x2d, 0x1d, 0xc2, 0x02, 0xa7, 0x0f, 0xa8, 0xa3, 0x00, + 0x2d, 0x30, 0x16, 0xc0, 0x2d, 0x3a, 0xc5, 0xdc, 0x95, 0x01, 0x11, 0xa9, + 0x07, 0xc0, 0x2d, 0x46, 0x86, 0x0f, 0xb6, 0x79, 0xca, 0x9e, 0x1e, 0x0f, + 0xce, 0x18, 0xc4, 0x02, 0x10, 0x0f, 0xce, 0x43, 0x00, 0x2d, 0x52, 0x95, + 0x0f, 0xb4, 0x63, 0x00, 0x2d, 0x58, 0x42, 0x02, 0xa7, 0xc0, 0x2d, 0x62, + 0x89, 0x0f, 0xa0, 0xdb, 0x00, 0x2d, 0x7a, 0x44, 0xdf, 0xb3, 0xc0, 0x2d, + 0x80, 0xd3, 0x46, 0x1e, 0x0f, 0x9e, 0xb9, 0x44, 0x6f, 0xbf, 0xc0, 0x2d, + 0x8c, 0xc4, 0x00, 0x3b, 0x0f, 0xd5, 0x19, 0xc5, 0xdc, 0x4f, 0x0f, 0x99, + 0x78, 0x0b, 0xc0, 0x2d, 0x96, 0x03, 0xc0, 0x2d, 0xa6, 0x11, 0xc0, 0x2d, + 0xb0, 0x07, 0x40, 0x2d, 0xc8, 0x57, 0x2a, 0x54, 0xc0, 0x2d, 0xd2, 0xcd, + 0x7c, 0xe9, 0x07, 0xf7, 0xf8, 0xd2, 0x4b, 0x4d, 0x08, 0xe3, 0x61, 0x47, + 0x34, 0x2f, 0xc0, 0x2e, 0x26, 0x06, 0xc0, 0x2e, 0x4a, 0x4b, 0x93, 0x30, + 0xc0, 0x2e, 0x5c, 0xce, 0x73, 0x1a, 0x08, 0xe2, 0x19, 0x45, 0x00, 0xba, + 0xc0, 0x2e, 0x64, 0x4b, 0x6f, 0xc7, 0xc0, 0x2e, 0x74, 0x47, 0x02, 0x0e, + 0x40, 0x2e, 0x94, 0x19, 0xc0, 0x2e, 0xfb, 0x43, 0x00, 0x75, 0xc0, 0x2f, + 0x05, 0xc5, 0x0a, 0xe2, 0x01, 0x2e, 0x53, 0x00, 0x2f, 0x15, 0x46, 0x19, + 0xbb, 0xc0, 0x2f, 0x1b, 0xc2, 0x00, 0x3b, 0x0f, 0xa8, 0x93, 0x00, 0x2f, + 0x2d, 0x43, 0x00, 0xc7, 0xc0, 0x2f, 0x39, 0xc6, 0xcf, 0xbf, 0x0f, 0x9b, + 0x69, 0xd0, 0x5c, 0xb2, 0x0f, 0xb1, 0x69, 0xc5, 0xd5, 0x01, 0x0f, 0xcc, + 0xf1, 0x16, 0x40, 0x2f, 0x45, 0x42, 0x00, 0x4b, 0xc0, 0x2f, 0x51, 0x42, + 0x0f, 0x9b, 0xc0, 0x2f, 0x5f, 0x91, 0x01, 0x32, 0x63, 0x00, 0x2f, 0x6b, + 0x48, 0x00, 0xcc, 0xc0, 0x2f, 0x71, 0x45, 0xd4, 0x43, 0xc0, 0x2f, 0x9a, + 0xc4, 0xe2, 0xa3, 0x0f, 0xa6, 0x91, 0xca, 0x9a, 0xae, 0x0f, 0x9c, 0xd1, + 0xc3, 0x13, 0x35, 0x0f, 0x9a, 0x59, 0x89, 0x0f, 0xcd, 0xa8, 0xc7, 0xca, + 0x3e, 0x0f, 0xcc, 0x09, 0x09, 0xc0, 0x2f, 0xbc, 0x43, 0x1b, 0x67, 0xc0, + 0x2f, 0xc8, 0xc3, 0x00, 0x38, 0x01, 0x32, 0x71, 0xd1, 0x52, 0xee, 0x01, + 0x05, 0xb1, 0xc7, 0x77, 0xc1, 0x01, 0x05, 0x21, 0x10, 0xc0, 0x2f, 0xd4, + 0x0f, 0xc0, 0x2f, 0xdc, 0xc2, 0x10, 0x3f, 0x0f, 0xaf, 0x13, 0x00, 0x2f, + 0xe8, 0xc4, 0x8a, 0x84, 0x0f, 0xcc, 0x70, 0xc8, 0x21, 0xfb, 0x0f, 0xc9, + 0x29, 0x45, 0x5b, 0x53, 0xc0, 0x2f, 0xee, 0x4c, 0x8c, 0x61, 0x40, 0x2f, + 0xfa, 0x14, 0xc0, 0x30, 0x63, 0x44, 0x0b, 0x13, 0xc0, 0x30, 0x6f, 0xca, + 0xa4, 0x54, 0x70, 0x00, 0x09, 0xcf, 0x68, 0xfa, 0x01, 0x31, 0xf3, 0x00, + 0x30, 0x83, 0x04, 0xc0, 0x30, 0x87, 0x06, 0xc0, 0x30, 0x93, 0xd5, 0x34, + 0x4f, 0x0f, 0xca, 0x69, 0x42, 0x01, 0x7c, 0x40, 0x30, 0x9f, 0xc5, 0xcf, + 0x36, 0x0f, 0xcf, 0x99, 0xc3, 0x0c, 0xa5, 0x0f, 0xd6, 0x08, 0x44, 0x00, + 0x67, 0xc0, 0x30, 0xd9, 0x46, 0x01, 0x4a, 0xc0, 0x31, 0x0d, 0x4a, 0x01, + 0xa9, 0xc0, 0x31, 0x4b, 0xce, 0x72, 0xb8, 0x0f, 0xb2, 0x19, 0x00, 0x40, + 0x31, 0x69, 0x0b, 0xc0, 0x31, 0x90, 0xda, 0x1c, 0x6c, 0x01, 0x35, 0x79, + 0x06, 0xc0, 0x31, 0xa9, 0xcb, 0x96, 0x1c, 0x0f, 0xb0, 0x91, 0xce, 0x6e, + 0xc8, 0x01, 0x5e, 0x88, 0x00, 0x40, 0x31, 0xb5, 0x47, 0x02, 0x0e, 0xc0, + 0x31, 0xc1, 0xcc, 0x1d, 0xc7, 0x08, 0x1c, 0xf8, 0x03, 0xc0, 0x32, 0x24, + 0x0e, 0xc0, 0x32, 0x32, 0x50, 0x5b, 0xb2, 0xc0, 0x32, 0x42, 0x14, 0xc0, + 0x32, 0x84, 0x45, 0xd4, 0x0c, 0xc0, 0x32, 0x8e, 0xc6, 0xcb, 0x57, 0x0f, + 0xcc, 0xa1, 0x4b, 0x8d, 0x8f, 0x40, 0x32, 0xa8, 0x14, 0xc0, 0x33, 0x00, + 0x16, 0xc0, 0x33, 0x0f, 0x17, 0xc0, 0x33, 0x19, 0xc8, 0x6b, 0xf0, 0x01, + 0x11, 0xd9, 0x0e, 0xc0, 0x33, 0x2b, 0xc3, 0x6b, 0x12, 0x0f, 0xa9, 0x51, + 0xc6, 0xd1, 0x6f, 0x0f, 0x9f, 0x29, 0x43, 0x6e, 0xfe, 0xc0, 0x33, 0x38, + 0xc2, 0x01, 0x25, 0x0f, 0xd4, 0xe8, 0x0f, 0xc0, 0x33, 0x44, 0x10, 0xc0, + 0x33, 0x57, 0x42, 0x01, 0x29, 0xc0, 0x33, 0x6b, 0xc7, 0xc4, 0xcd, 0x0f, + 0xad, 0xa1, 0x16, 0xc0, 0x33, 0x77, 0xdb, 0x18, 0x8a, 0x0f, 0xb2, 0x59, + 0xc3, 0x23, 0x1b, 0x01, 0x5f, 0x09, 0x48, 0xbc, 0x42, 0x40, 0x33, 0x83, + 0x42, 0x00, 0x09, 0xc0, 0x33, 0xbf, 0x47, 0x0d, 0xdb, 0xc0, 0x33, 0xc7, + 0xcb, 0x93, 0x46, 0x01, 0x37, 0x61, 0xc6, 0xcd, 0x5b, 0x0f, 0x99, 0xd1, + 0xca, 0xa4, 0x2c, 0x0f, 0xb6, 0xa9, 0xc9, 0xac, 0xf9, 0x0f, 0xcb, 0xf1, + 0xca, 0x9f, 0x40, 0x0f, 0xcc, 0xd8, 0xcf, 0x68, 0xdc, 0x01, 0x1c, 0x71, + 0x12, 0xc0, 0x33, 0xdf, 0xc4, 0xe0, 0x5b, 0x01, 0x5e, 0xd0, 0xd3, 0x40, + 0x67, 0x0f, 0xa5, 0x79, 0xc9, 0x8c, 0x04, 0x0f, 0xb1, 0x79, 0x96, 0x0f, + 0xb6, 0xb1, 0xca, 0x9e, 0xdc, 0x0f, 0xc8, 0xb8, 0x18, 0xc0, 0x33, 0xee, + 0x4f, 0x61, 0x20, 0xc0, 0x33, 0xfa, 0x42, 0x00, 0xac, 0xc0, 0x34, 0x0c, + 0x15, 0xc0, 0x34, 0x19, 0x08, 0xc0, 0x34, 0x25, 0x05, 0xc0, 0x34, 0x34, + 0x06, 0xc0, 0x34, 0x40, 0x46, 0xd2, 0x65, 0xc0, 0x34, 0x4d, 0xc8, 0xb6, + 0x1a, 0x0f, 0xa7, 0x28, 0x43, 0x01, 0xad, 0xc0, 0x34, 0x59, 0x49, 0x1c, + 0x89, 0x40, 0x34, 0x65, 0xc5, 0xdb, 0x41, 0x01, 0x37, 0xc1, 0xd5, 0x33, + 0xbc, 0x0f, 0x9e, 0x91, 0x05, 0x40, 0x34, 0xaf, 0xc6, 0x3c, 0x52, 0x01, + 0x15, 0xbb, 0x00, 0x34, 0xbb, 0x92, 0x0f, 0xa3, 0xfa, 0x00, 0x34, 0xc1, + 0x14, 0xc0, 0x34, 0xc7, 0xc6, 0x08, 0xea, 0x01, 0x05, 0x49, 0x0f, 0xc0, + 0x34, 0xdd, 0xc7, 0xbf, 0xe8, 0x0f, 0xa1, 0xd1, 0xc2, 0x00, 0x6c, 0x0f, + 0xd5, 0xa8, 0x43, 0x01, 0xfe, 0xc0, 0x34, 0xec, 0xc3, 0x0e, 0x66, 0x0f, + 0xb6, 0xf3, 0x00, 0x34, 0xf6, 0xc3, 0x04, 0x85, 0x0f, 0xa0, 0x58, 0x4a, + 0x15, 0x7c, 0xc0, 0x35, 0x02, 0xcc, 0x87, 0xb1, 0x0f, 0xad, 0x71, 0x10, + 0xc0, 0x35, 0x26, 0xcb, 0x91, 0xd0, 0x0f, 0xca, 0x01, 0xd2, 0x47, 0x39, + 0x01, 0x71, 0xf0, 0x16, 0xc0, 0x35, 0x36, 0x10, 0xc0, 0x35, 0x42, 0x14, + 0xc0, 0x35, 0x4e, 0x18, 0xc0, 0x35, 0x5a, 0xc9, 0xac, 0x72, 0x0f, 0xae, + 0x89, 0x45, 0xd7, 0x90, 0xc0, 0x35, 0x6c, 0xc4, 0x7f, 0xa8, 0x0f, 0xce, + 0x38, 0x06, 0xc0, 0x35, 0x78, 0xcf, 0x68, 0xeb, 0x01, 0x33, 0x81, 0x0b, + 0xc0, 0x35, 0x84, 0x44, 0x14, 0x97, 0x40, 0x35, 0x90, 0xca, 0x93, 0xd6, + 0x01, 0x38, 0x69, 0x07, 0xc0, 0x35, 0x9c, 0xcd, 0x75, 0x72, 0x0f, 0x9c, + 0x08, 0x9b, 0x0f, 0xd5, 0x83, 0x00, 0x35, 0xae, 0x03, 0xc0, 0x35, 0xb4, + 0x11, 0xc0, 0x35, 0xc4, 0x07, 0xc0, 0x35, 0xd9, 0xca, 0xa0, 0xc6, 0x0f, + 0xb1, 0x98, 0xc6, 0xd1, 0x7b, 0x0f, 0xcc, 0x51, 0x17, 0xc0, 0x35, 0xe5, + 0x14, 0xc0, 0x35, 0xef, 0xc2, 0x01, 0xbb, 0x0f, 0xcd, 0xb3, 0x00, 0x36, + 0x0b, 0xc4, 0x18, 0xb3, 0x0f, 0xae, 0x01, 0x89, 0x0f, 0x99, 0x5b, 0x00, + 0x36, 0x11, 0xc4, 0xe3, 0xc3, 0x0f, 0xd6, 0xa8, 0x05, 0xc0, 0x36, 0x17, + 0x42, 0x01, 0x0c, 0xc0, 0x36, 0x29, 0x0e, 0xc0, 0x36, 0x35, 0xca, 0x9c, + 0x0c, 0x01, 0x31, 0x59, 0xce, 0x73, 0xd0, 0x0f, 0x9c, 0x29, 0xc3, 0xd3, + 0x0e, 0x0f, 0xce, 0xd1, 0xc4, 0xd2, 0xb5, 0x0f, 0xa3, 0x50, 0x07, 0xc0, + 0x36, 0x3f, 0x11, 0xc0, 0x36, 0x4b, 0x03, 0xc0, 0x36, 0x60, 0xca, 0x9f, + 0x54, 0x0f, 0x9b, 0x20, 0x42, 0x02, 0xa7, 0xc0, 0x36, 0x6c, 0xc7, 0xc0, + 0x20, 0x01, 0x37, 0xe9, 0x10, 0xc0, 0x36, 0x76, 0xc2, 0x00, 0x40, 0x01, + 0x1e, 0xd8, 0x42, 0x01, 0xa3, 0xc0, 0x36, 0x82, 0x0f, 0xc0, 0x36, 0x8c, + 0x03, 0xc0, 0x36, 0x98, 0xc4, 0xe3, 0x9b, 0x0f, 0xc9, 0xd0, 0x14, 0xc0, + 0x36, 0xa4, 0x15, 0xc0, 0x36, 0xb1, 0x47, 0xc0, 0x0b, 0xc0, 0x36, 0xbe, + 0x45, 0xd5, 0xd3, 0xc0, 0x36, 0xca, 0x0e, 0xc0, 0x36, 0xd6, 0xd9, 0x1e, + 0xe6, 0x0f, 0x9e, 0x89, 0xd2, 0x4b, 0xb9, 0x01, 0x50, 0x68, 0xc4, 0xde, + 0x8b, 0x0f, 0xd4, 0xf3, 0x00, 0x36, 0xe2, 0x0e, 0xc0, 0x36, 0xe8, 0x43, + 0x6c, 0xc3, 0xc0, 0x36, 0xfa, 0x42, 0x07, 0x2f, 0xc0, 0x37, 0x12, 0x06, + 0xc0, 0x37, 0x1a, 0x10, 0x40, 0x37, 0x26, 0x49, 0xb3, 0x68, 0xc0, 0x37, + 0x34, 0x06, 0xc0, 0x37, 0x40, 0x42, 0x01, 0x1b, 0xc0, 0x37, 0x4a, 0x10, + 0xc0, 0x37, 0x54, 0x14, 0xc0, 0x37, 0x66, 0x03, 0xc0, 0x37, 0x78, 0x4b, + 0x93, 0x72, 0xc0, 0x37, 0x84, 0xc2, 0x00, 0xa2, 0x0f, 0xa6, 0xe9, 0x0e, + 0xc0, 0x37, 0xa8, 0xcd, 0x78, 0x3d, 0x00, 0x04, 0xa8, 0x16, 0xc0, 0x37, + 0xb4, 0x17, 0xc0, 0x37, 0xc0, 0x10, 0xc0, 0x37, 0xd5, 0x06, 0xc0, 0x37, + 0xee, 0xc3, 0x87, 0x43, 0x0f, 0xaf, 0xf9, 0x11, 0xc0, 0x37, 0xfc, 0x43, + 0x0b, 0x09, 0xc0, 0x38, 0x08, 0xca, 0x46, 0x99, 0x0f, 0xa7, 0x8b, 0x00, + 0x38, 0x12, 0xca, 0xa0, 0xd0, 0x0f, 0x9d, 0x28, 0x16, 0xc0, 0x38, 0x16, + 0x4c, 0x86, 0xb5, 0xc0, 0x38, 0x22, 0x46, 0xce, 0x93, 0xc0, 0x38, 0x47, + 0x15, 0xc0, 0x38, 0x65, 0x14, 0xc0, 0x38, 0x7d, 0x0e, 0xc0, 0x38, 0x8f, + 0x12, 0xc0, 0x38, 0xa1, 0x90, 0x0f, 0xa3, 0x43, 0x00, 0x38, 0xad, 0x0a, + 0xc0, 0x38, 0xdb, 0xc6, 0xd1, 0x87, 0x0f, 0xae, 0xb1, 0xc4, 0x60, 0xb3, + 0x00, 0x05, 0x79, 0xc5, 0xdb, 0x28, 0x0f, 0xcd, 0x19, 0x09, 0x40, 0x38, + 0xe7, 0x15, 0xc0, 0x38, 0xf7, 0x42, 0x00, 0x72, 0xc0, 0x39, 0x03, 0x43, + 0x1c, 0xe7, 0x40, 0x39, 0x0d, 0x06, 0xc0, 0x39, 0x19, 0x47, 0x02, 0x0e, + 0x40, 0x39, 0x2b, 0x15, 0xc0, 0x39, 0x8b, 0x0e, 0xc0, 0x39, 0x9d, 0x50, + 0x0f, 0x5e, 0xc0, 0x39, 0xa9, 0x16, 0xc0, 0x39, 0xb5, 0x4b, 0x6f, 0xc7, + 0xc0, 0x39, 0xc1, 0x4f, 0x30, 0x90, 0xc0, 0x3a, 0x02, 0x46, 0x09, 0x97, + 0x40, 0x3a, 0x0c, 0xc2, 0x01, 0xbb, 0x0f, 0xd5, 0x11, 0xcd, 0x7d, 0x37, + 0x0f, 0xce, 0x70, 0x9b, 0x0f, 0xa8, 0x8b, 0x00, 0x3a, 0x30, 0xc9, 0xa9, + 0xcf, 0x01, 0x09, 0x50, 0x46, 0x5c, 0x02, 0xc0, 0x3a, 0x3f, 0x45, 0xde, + 0x2a, 0xc0, 0x3a, 0x49, 0xc3, 0x4d, 0xd4, 0x0f, 0xaa, 0x59, 0x47, 0xc9, + 0xff, 0xc0, 0x3a, 0x72, 0x10, 0x40, 0x3a, 0x90, 0x52, 0x4c, 0xeb, 0xc0, + 0x3a, 0x9a, 0x48, 0xbb, 0xb2, 0xc0, 0x3a, 0xa6, 0x45, 0xdd, 0xf3, 0xc0, + 0x3a, 0xbe, 0x44, 0x2f, 0x1e, 0xc0, 0x3a, 0xde, 0x49, 0xb3, 0x4d, 0x40, + 0x3b, 0x00, 0xc6, 0x00, 0xf3, 0x01, 0x05, 0x69, 0xc2, 0x00, 0xcc, 0x0f, + 0xa4, 0x7b, 0x00, 0x3b, 0x28, 0xc4, 0x13, 0x35, 0x0f, 0xa2, 0xc1, 0xc7, + 0xc5, 0xe5, 0x0f, 0xca, 0xe9, 0xc2, 0x00, 0xac, 0x0f, 0xd4, 0x08, 0xc3, + 0x14, 0x6b, 0x0f, 0xa1, 0x41, 0xd4, 0x3d, 0xe0, 0x01, 0x93, 0xf8, 0x15, + 0xc0, 0x3b, 0x34, 0x42, 0x00, 0xa4, 0xc0, 0x3b, 0x3e, 0x19, 0xc0, 0x3b, + 0x4a, 0x43, 0x11, 0x7f, 0xc0, 0x3b, 0x60, 0xc5, 0xd8, 0x99, 0x01, 0x32, + 0x33, 0x00, 0x3b, 0x6c, 0x43, 0x5c, 0xeb, 0xc0, 0x3b, 0x72, 0x46, 0xd3, + 0x13, 0xc0, 0x3b, 0x7e, 0xc5, 0xde, 0x70, 0x0f, 0xa2, 0xa1, 0xc7, 0xc4, + 0xd4, 0x0f, 0xc8, 0x98, 0xcc, 0x86, 0x55, 0x0f, 0xc9, 0x11, 0xc2, 0x02, + 0x35, 0x01, 0x15, 0xe3, 0x00, 0x3b, 0x8e, 0x04, 0xc0, 0x3b, 0x94, 0x0b, + 0xc0, 0x3b, 0xa0, 0x47, 0x34, 0xa6, 0xc0, 0x3b, 0xac, 0xd3, 0x40, 0x7a, + 0x01, 0x01, 0x79, 0xc8, 0xba, 0x42, 0x0f, 0xa6, 0xd9, 0xca, 0xa4, 0x22, + 0x0f, 0xcf, 0xf8, 0x10, 0xc0, 0x3b, 0xb8, 0x94, 0x01, 0x15, 0xeb, 0x00, + 0x3b, 0xc2, 0x16, 0xc0, 0x3b, 0xd7, 0x00, 0xc0, 0x3b, 0xe8, 0x42, 0x02, + 0x2f, 0xc0, 0x3c, 0x0b, 0xc2, 0x00, 0x40, 0x0f, 0xa2, 0x19, 0xcc, 0x40, + 0x81, 0x00, 0x05, 0x00, 0xca, 0xa7, 0x06, 0x0f, 0x0a, 0x79, 0x0e, 0xc0, + 0x3c, 0x17, 0x46, 0x09, 0x97, 0xc0, 0x3c, 0x23, 0x15, 0xc0, 0x3c, 0x47, + 0x45, 0x28, 0xb1, 0x40, 0x3c, 0x53, 0x44, 0x75, 0x34, 0xc0, 0x3c, 0x6f, + 0x0f, 0xc0, 0x3c, 0x7b, 0xca, 0x9d, 0x92, 0x0f, 0xa9, 0x49, 0xc2, 0x02, + 0xa7, 0x00, 0x00, 0x00, 0xc5, 0x13, 0x84, 0x01, 0x16, 0x1b, 0x00, 0x3c, + 0x87, 0xcc, 0x06, 0xbb, 0x01, 0x16, 0x11, 0x48, 0x19, 0xb9, 0xc0, 0x3c, + 0x8d, 0x15, 0xc0, 0x3c, 0x99, 0x05, 0xc0, 0x3c, 0xa5, 0xc7, 0x05, 0xc0, + 0x01, 0x10, 0x79, 0xce, 0x72, 0xd4, 0x01, 0x50, 0x49, 0xd2, 0x48, 0x6b, + 0x01, 0x57, 0xf8, 0xca, 0xa0, 0x76, 0x00, 0x3f, 0xf9, 0x06, 0xc0, 0x3c, + 0xb1, 0x0e, 0xc0, 0x3c, 0xc3, 0xd0, 0x0f, 0x09, 0x00, 0x3f, 0xc9, 0x43, + 0x0a, 0x8a, 0xc0, 0x3c, 0xd5, 0x47, 0x10, 0x78, 0xc0, 0x3c, 0xe1, 0xd4, + 0x3d, 0x18, 0x00, 0x3f, 0xa0, 0xc3, 0x83, 0x55, 0x0f, 0xcb, 0xb9, 0xce, + 0x73, 0x8a, 0x0f, 0x98, 0x18, 0x46, 0x04, 0x8f, 0xc0, 0x3c, 0xed, 0x44, + 0x0b, 0x0d, 0x40, 0x3d, 0x0f, 0x44, 0xe4, 0x3b, 0xc0, 0x3d, 0x31, 0x12, + 0xc0, 0x3d, 0x3d, 0x00, 0x40, 0x3d, 0x49, 0xc3, 0x01, 0x97, 0x0f, 0xcc, + 0x29, 0xcf, 0x68, 0xeb, 0x01, 0x33, 0x89, 0x94, 0x0f, 0xa2, 0x12, 0x00, + 0x3d, 0x5b, 0x89, 0x0f, 0xca, 0xd1, 0x52, 0x4d, 0xb1, 0x40, 0x3d, 0x68, + 0x16, 0xc0, 0x3d, 0xee, 0x05, 0xc0, 0x3d, 0xf8, 0xd1, 0x50, 0x24, 0x0f, + 0xb0, 0x88, 0x15, 0xc0, 0x3e, 0x04, 0x42, 0x00, 0x99, 0xc0, 0x3e, 0x0e, + 0xc9, 0xa9, 0x3f, 0x00, 0x9b, 0x09, 0xc9, 0x11, 0xf6, 0x00, 0x9b, 0x11, + 0x12, 0xc0, 0x3e, 0x18, 0xcd, 0x2c, 0xb2, 0x00, 0x9b, 0x39, 0x46, 0x09, + 0x97, 0xc0, 0x3e, 0x24, 0x47, 0x34, 0x2f, 0xc0, 0x3e, 0x42, 0x4b, 0x8f, + 0x68, 0x40, 0x3e, 0x60, 0x07, 0xc0, 0x3e, 0x86, 0x47, 0xc5, 0x60, 0xc0, + 0x3e, 0xa1, 0x88, 0x0f, 0xce, 0xe9, 0x4d, 0x7c, 0x67, 0x40, 0x3e, 0xad, + 0x00, 0xc0, 0x3f, 0x26, 0xc6, 0x59, 0xd6, 0x01, 0x33, 0x50, 0xc6, 0x31, + 0x92, 0x01, 0x38, 0x4b, 0x00, 0x3f, 0x36, 0xca, 0x3a, 0x52, 0x01, 0x1c, + 0x31, 0x42, 0x00, 0xa9, 0xc0, 0x3f, 0x3c, 0x00, 0xc0, 0x3f, 0x48, 0xc5, + 0xd6, 0x0f, 0x00, 0x00, 0x28, 0x4b, 0x98, 0x4d, 0xc0, 0x3f, 0x5a, 0x4b, + 0x97, 0x45, 0xc0, 0x3f, 0x66, 0x48, 0xb6, 0x9a, 0x40, 0x3f, 0x72, 0x42, + 0x00, 0x65, 0xc0, 0x3f, 0x7e, 0x0b, 0x40, 0x3f, 0x88, 0x46, 0xd2, 0x05, + 0xc0, 0x3f, 0x94, 0xc4, 0x61, 0x0d, 0x00, 0x00, 0xd8, 0xcc, 0x83, 0x9d, + 0x01, 0x08, 0x39, 0x42, 0x00, 0x79, 0x40, 0x3f, 0x9e, 0x95, 0x0f, 0xa2, + 0x01, 0xc7, 0xb4, 0xd2, 0x0f, 0xa2, 0x98, 0x0b, 0xc0, 0x3f, 0xb0, 0x4c, + 0x83, 0x55, 0xc0, 0x3f, 0xbc, 0x42, 0x00, 0xb1, 0xc0, 0x3f, 0xd8, 0x47, + 0xc7, 0x12, 0xc0, 0x3f, 0xe4, 0x47, 0xc7, 0xb3, 0x40, 0x40, 0x18, 0xc5, + 0xd8, 0x30, 0x0f, 0xcc, 0x69, 0xc4, 0xe0, 0xfb, 0x0f, 0x9e, 0x61, 0x03, + 0xc0, 0x40, 0x42, 0xc5, 0xd0, 0x38, 0x0f, 0xcb, 0xe9, 0x4c, 0x89, 0xe5, + 0x40, 0x40, 0x4c, 0x07, 0xc0, 0x40, 0xc0, 0x03, 0xc0, 0x40, 0xca, 0x0b, + 0xc0, 0x40, 0xe2, 0x11, 0x40, 0x40, 0xee, 0xc2, 0x00, 0xb1, 0x01, 0x34, + 0xcb, 0x00, 0x40, 0xfa, 0x0f, 0xc0, 0x41, 0x00, 0x11, 0xc0, 0x41, 0x0c, + 0xcf, 0x63, 0x4b, 0x01, 0x05, 0x81, 0xc3, 0x73, 0xfc, 0x0f, 0xce, 0xf1, + 0xc7, 0xc8, 0x23, 0x01, 0x80, 0x98, 0xca, 0xa5, 0xee, 0x01, 0x09, 0xb9, + 0x14, 0x40, 0x41, 0x18, 0xc6, 0xd2, 0xef, 0x0f, 0x9d, 0x91, 0xc4, 0xbc, + 0x5c, 0x0f, 0xce, 0x20, 0x11, 0xc0, 0x41, 0x25, 0xca, 0xa4, 0xae, 0x01, + 0x4f, 0x31, 0x03, 0x40, 0x41, 0x37, 0x43, 0x01, 0x95, 0xc0, 0x41, 0x43, + 0xd0, 0x5f, 0xc2, 0x01, 0x3e, 0x39, 0xcc, 0x89, 0xc1, 0x01, 0x31, 0x31, + 0x0b, 0xc0, 0x41, 0x4f, 0x45, 0x0c, 0x91, 0x40, 0x41, 0x5b, 0xc2, 0x00, + 0x29, 0x0f, 0xcd, 0x31, 0x4b, 0x96, 0xd7, 0x40, 0x41, 0x67, 0x47, 0xc0, + 0xc1, 0xc0, 0x41, 0x7f, 0x07, 0xc0, 0x41, 0x9d, 0x52, 0x28, 0xce, 0xc0, + 0x41, 0xa7, 0xc3, 0x00, 0x44, 0x0f, 0xce, 0x28, 0x07, 0xc0, 0x41, 0xad, + 0xc7, 0xc4, 0x10, 0x01, 0x36, 0x71, 0xc8, 0x12, 0x47, 0x01, 0x30, 0x69, + 0x42, 0x00, 0x43, 0x40, 0x41, 0xb7, 0x06, 0xc0, 0x41, 0xc6, 0x47, 0xc0, + 0x89, 0xc0, 0x41, 0xd0, 0xc3, 0x0d, 0x14, 0x0f, 0xd6, 0x90, 0x16, 0xc0, + 0x41, 0xf8, 0xc8, 0xb8, 0x4a, 0x01, 0x09, 0x28, 0x42, 0x00, 0x2a, 0xc0, + 0x42, 0x04, 0x16, 0x40, 0x42, 0x28, 0xd1, 0x53, 0xdc, 0x01, 0x1f, 0xf9, + 0x46, 0x38, 0xe8, 0xc0, 0x42, 0x34, 0xda, 0x1c, 0x52, 0x07, 0xff, 0xe0, + 0x0e, 0xc0, 0x42, 0x40, 0xcb, 0x8e, 0x34, 0x0f, 0xcb, 0xa8, 0x44, 0x78, + 0xf3, 0xc0, 0x42, 0x4f, 0xc4, 0xcc, 0x91, 0x00, 0x16, 0xd8, 0x46, 0xd1, + 0xbd, 0xc0, 0x42, 0x67, 0x44, 0x3c, 0x52, 0x40, 0x42, 0x73, 0x46, 0xcd, + 0x37, 0xc0, 0x42, 0x7f, 0x51, 0x50, 0x35, 0xc0, 0x42, 0xc2, 0x4a, 0x51, + 0x89, 0x40, 0x42, 0xda, 0x15, 0xc0, 0x42, 0xf2, 0x42, 0x01, 0x0e, 0xc0, + 0x42, 0xfe, 0x48, 0x10, 0xb4, 0xc0, 0x43, 0x0a, 0x45, 0x01, 0xc3, 0xc0, + 0x43, 0x16, 0xd4, 0x3b, 0xd8, 0x08, 0xd1, 0x99, 0x47, 0x02, 0x0e, 0xc0, + 0x43, 0x2e, 0x46, 0x34, 0x6f, 0x40, 0x43, 0x8a, 0xce, 0x6d, 0xcc, 0x01, + 0x17, 0xf9, 0x14, 0xc0, 0x43, 0x96, 0x15, 0xc0, 0x43, 0xa8, 0x45, 0x00, + 0x49, 0xc0, 0x43, 0xb4, 0xca, 0x9c, 0xe8, 0x01, 0x4c, 0x11, 0xd6, 0x2c, + 0x02, 0x01, 0x53, 0x20, 0x49, 0xaf, 0xe4, 0xc0, 0x43, 0xc0, 0xc2, 0x11, + 0xa5, 0x01, 0x5f, 0x11, 0xc8, 0xb6, 0x3a, 0x0f, 0xcc, 0x98, 0x47, 0xca, + 0x14, 0xc0, 0x43, 0xd2, 0x47, 0xc0, 0xf2, 0xc0, 0x44, 0x02, 0xcc, 0x8b, + 0x41, 0x0f, 0x9c, 0x19, 0x94, 0x0f, 0xd6, 0xc8, 0xc2, 0x00, 0x10, 0x01, + 0x35, 0xa9, 0xc5, 0xd7, 0xe5, 0x01, 0x32, 0x19, 0xc6, 0xd1, 0x2d, 0x0f, + 0xc9, 0xc8, 0xc6, 0xd1, 0x09, 0x0f, 0xab, 0xc9, 0xc2, 0x00, 0x74, 0x01, + 0x50, 0xe8, 0xc9, 0x48, 0xa4, 0x01, 0x33, 0x49, 0x42, 0x02, 0xbc, 0xc0, + 0x44, 0x32, 0xd9, 0x1e, 0x37, 0x01, 0x50, 0xb0, 0xcb, 0x5a, 0x97, 0x01, + 0x12, 0xf9, 0x00, 0x40, 0x44, 0x3e, 0xc6, 0xcb, 0xb7, 0x01, 0x31, 0x79, + 0x00, 0x40, 0x44, 0x4a, 0x45, 0xd4, 0x89, 0xc0, 0x44, 0x56, 0xca, 0xa4, + 0x7c, 0x0f, 0xa4, 0xd9, 0xc6, 0x08, 0xea, 0x00, 0x05, 0x28, 0x42, 0x00, + 0x89, 0xc0, 0x44, 0x68, 0xc8, 0xb9, 0x1a, 0x0f, 0xcb, 0x59, 0xc2, 0x49, + 0x0c, 0x0f, 0xb7, 0xb1, 0x50, 0x5b, 0x52, 0xc0, 0x44, 0x73, 0x06, 0x40, + 0x44, 0xf5, 0xc8, 0xb9, 0x32, 0x01, 0x36, 0x81, 0x07, 0xc0, 0x44, 0xff, + 0x42, 0x00, 0xa9, 0xc0, 0x45, 0x0c, 0x11, 0xc0, 0x45, 0x1b, 0x12, 0xc0, + 0x45, 0x25, 0x14, 0xc0, 0x45, 0x31, 0x4b, 0x8c, 0x62, 0x40, 0x45, 0x3d, + 0xc6, 0xcb, 0x75, 0x01, 0x32, 0x89, 0xc6, 0xd2, 0x53, 0x01, 0x71, 0xf8, + 0xc5, 0xd1, 0xee, 0x01, 0x31, 0x21, 0xc5, 0xda, 0x47, 0x01, 0x08, 0x30, + 0xc9, 0x08, 0xe7, 0x01, 0x31, 0x09, 0x50, 0x59, 0x12, 0x40, 0x45, 0xb5, + 0xc3, 0x03, 0xd9, 0x0f, 0xa7, 0xbb, 0x00, 0x45, 0xc1, 0xc4, 0x2a, 0xa0, + 0x0f, 0x9e, 0xa8, 0xc5, 0x79, 0x8a, 0x0f, 0xa6, 0x29, 0xc9, 0xac, 0x57, + 0x0f, 0xc8, 0xc8, 0xc5, 0x11, 0x55, 0x0f, 0xa1, 0x8a, 0x00, 0x45, 0xc7, + 0x42, 0xbe, 0x99, 0xc0, 0x45, 0xcd, 0x08, 0x40, 0x45, 0xd9, 0x14, 0xc0, + 0x45, 0xe1, 0x05, 0xc0, 0x45, 0xeb, 0x15, 0xc0, 0x46, 0x05, 0x12, 0xc0, + 0x46, 0x29, 0x04, 0xc0, 0x46, 0x35, 0x16, 0xc0, 0x46, 0x4b, 0x46, 0xd0, + 0x31, 0xc0, 0x46, 0x63, 0x06, 0xc0, 0x46, 0x6f, 0x0e, 0xc0, 0x46, 0x81, + 0x0a, 0xc0, 0x46, 0x8d, 0x0f, 0xc0, 0x46, 0x9f, 0x19, 0xc0, 0x46, 0xa7, + 0x08, 0xc0, 0x46, 0xb1, 0x0c, 0xc0, 0x46, 0xbd, 0x07, 0xc0, 0x46, 0xc9, + 0x44, 0xe3, 0xb7, 0xc0, 0x46, 0xdb, 0xc3, 0x1a, 0x7c, 0x01, 0x75, 0xc9, + 0x09, 0x40, 0x46, 0xeb, 0x96, 0x01, 0x8e, 0x03, 0x00, 0x46, 0xf7, 0xc2, + 0x47, 0xa4, 0x01, 0x8e, 0x09, 0xc2, 0xe5, 0x85, 0x01, 0x8e, 0x11, 0xc3, + 0xe5, 0x84, 0x01, 0x8e, 0x19, 0x95, 0x01, 0x8e, 0x8b, 0x00, 0x46, 0xfb, + 0x8a, 0x01, 0x8e, 0x83, 0x00, 0x47, 0x15, 0x90, 0x01, 0x8e, 0x79, 0x92, + 0x01, 0x8e, 0x93, 0x00, 0x47, 0x2d, 0x86, 0x01, 0x8e, 0xa1, 0x93, 0x01, + 0x8f, 0x18, 0x42, 0x00, 0x3b, 0xc0, 0x47, 0x39, 0x07, 0xc0, 0x47, 0x48, + 0x14, 0xc0, 0x47, 0x54, 0xcb, 0x94, 0xc7, 0x0f, 0x9e, 0x09, 0xc5, 0xdc, + 0x45, 0x0f, 0x99, 0x80, 0x0b, 0xc0, 0x47, 0x5e, 0x14, 0xc0, 0x47, 0x68, + 0x44, 0xe0, 0xa7, 0xc0, 0x47, 0x74, 0x42, 0x00, 0x47, 0x40, 0x47, 0x9e, + 0xc3, 0x01, 0xe7, 0x01, 0x35, 0xb9, 0xc4, 0x79, 0xe6, 0x01, 0x31, 0x39, + 0xc5, 0xd7, 0x2c, 0x0f, 0xa1, 0xf9, 0xc4, 0xe3, 0x6f, 0x0f, 0xa0, 0xa1, + 0xc2, 0x18, 0xb3, 0x0f, 0xce, 0x92, 0x00, 0x47, 0xbc, 0x48, 0xbe, 0x52, + 0xc0, 0x47, 0xc2, 0xca, 0xa7, 0x56, 0x0f, 0x9b, 0x59, 0xc7, 0xc0, 0x6d, + 0x0f, 0xcb, 0x10, 0xc3, 0x1c, 0xe6, 0x0f, 0xd3, 0xe1, 0xca, 0xa6, 0xf2, + 0x01, 0x05, 0x10, 0x44, 0x00, 0x74, 0xc0, 0x47, 0xce, 0xc9, 0xad, 0x89, + 0x0f, 0xa9, 0x70, 0x42, 0x00, 0xcc, 0xc0, 0x47, 0xda, 0xc2, 0x01, 0x48, + 0x0f, 0xa2, 0x89, 0xc6, 0xcc, 0xc5, 0x0f, 0xa0, 0x51, 0xc6, 0xd2, 0xd7, + 0x0f, 0xca, 0x80, 0xc8, 0xb9, 0x92, 0x0f, 0xa5, 0x99, 0xca, 0x39, 0x0b, + 0x0f, 0x98, 0xc8, 0xcd, 0x7b, 0x7d, 0x0f, 0x9e, 0x78, 0xc4, 0x9e, 0x3a, + 0x0f, 0xcb, 0x29, 0x0d, 0x40, 0x47, 0xea, 0x47, 0x1d, 0xd4, 0xc0, 0x47, + 0xf6, 0xc2, 0x00, 0x3d, 0x01, 0x30, 0x21, 0x12, 0xc0, 0x48, 0x5c, 0x0f, + 0x40, 0x48, 0x74, 0x42, 0x00, 0x84, 0xc0, 0x48, 0x7e, 0xce, 0x6e, 0x58, + 0x0f, 0xa4, 0x89, 0xcb, 0x96, 0xab, 0x0f, 0xb6, 0x58, 0xc8, 0xb7, 0x4a, + 0x01, 0x30, 0x61, 0x16, 0xc0, 0x48, 0x8a, 0xca, 0xa0, 0xe4, 0x01, 0x19, + 0x91, 0x4a, 0x9c, 0x3e, 0xc0, 0x48, 0xa2, 0xce, 0x73, 0xfa, 0x0f, 0x9f, + 0x51, 0x08, 0xc0, 0x48, 0xae, 0xd5, 0x33, 0xa7, 0x01, 0x53, 0x68, 0xcb, + 0x99, 0x29, 0x01, 0x12, 0xc1, 0xc2, 0x00, 0x65, 0x0f, 0xd5, 0xc1, 0xd2, + 0x4b, 0xa7, 0x01, 0x72, 0x78, 0xc2, 0x00, 0x45, 0x00, 0x01, 0xd3, 0x00, + 0x48, 0xc0, 0xcd, 0x76, 0x9d, 0x0f, 0xa5, 0x28, 0x0b, 0xc0, 0x48, 0xc4, + 0xc7, 0xc5, 0x28, 0x0f, 0x9a, 0xd0, 0xc5, 0x11, 0x55, 0x0f, 0xa1, 0x70, + 0x1b, 0xc0, 0x48, 0xce, 0x44, 0x1b, 0xaa, 0x40, 0x48, 0xda, 0x46, 0x83, + 0x27, 0xc0, 0x48, 0xf8, 0xc6, 0xca, 0x97, 0x0f, 0xa6, 0x58, 0xc7, 0x72, + 0xbf, 0x0f, 0xc9, 0x09, 0x42, 0x00, 0x40, 0xc0, 0x49, 0x04, 0x42, 0x00, + 0x3b, 0xc0, 0x49, 0x10, 0xc2, 0x04, 0x3d, 0x01, 0x30, 0x0a, 0x00, 0x49, + 0x1c, 0xd3, 0x46, 0x90, 0x0f, 0xac, 0x09, 0x42, 0x02, 0xaf, 0xc0, 0x49, + 0x22, 0xcf, 0x69, 0x09, 0x0f, 0x9e, 0xd8, 0x42, 0x00, 0x49, 0xc0, 0x49, + 0x2e, 0x17, 0x40, 0x49, 0x38, 0xc8, 0xbe, 0x8a, 0x0f, 0x98, 0x30, 0xc3, + 0xe5, 0x15, 0x0f, 0xb6, 0x19, 0xc3, 0x01, 0x4b, 0x0f, 0x9b, 0x70, 0x45, + 0x00, 0xba, 0xc0, 0x49, 0x4a, 0x51, 0x4e, 0xf2, 0xc0, 0x49, 0x9a, 0x4d, + 0x77, 0xc8, 0x40, 0x49, 0xac, 0x0e, 0xc0, 0x49, 0xc6, 0xe0, 0x00, 0xa7, + 0x01, 0x3b, 0x09, 0x14, 0x40, 0x49, 0xd2, 0x00, 0xc0, 0x49, 0xde, 0xc3, + 0x2e, 0xab, 0x01, 0x5f, 0x01, 0xc4, 0x2a, 0x3e, 0x0f, 0xce, 0x08, 0x42, + 0x01, 0x19, 0xc0, 0x49, 0xea, 0xc5, 0x00, 0xb9, 0x00, 0x05, 0x10, 0xc5, + 0x00, 0xb9, 0x01, 0x05, 0xa9, 0xc3, 0x12, 0xad, 0x00, 0x05, 0xc0, 0x50, + 0x5b, 0x72, 0xc0, 0x49, 0xf6, 0x4d, 0x76, 0xde, 0x40, 0x4a, 0x04, 0x47, + 0x02, 0x0e, 0xc0, 0x4a, 0x48, 0x47, 0x0a, 0xda, 0xc0, 0x4a, 0x5a, 0x49, + 0x0b, 0x17, 0xc0, 0x4a, 0x66, 0xce, 0x74, 0xb0, 0x00, 0x24, 0x11, 0xc6, + 0x4a, 0x9f, 0x05, 0x33, 0xf1, 0xc7, 0xc7, 0x27, 0x05, 0x33, 0xf8, 0xce, + 0x74, 0xf6, 0x00, 0x04, 0x99, 0xc5, 0x1d, 0x1d, 0x01, 0x10, 0xb0, 0x49, + 0xb0, 0x2c, 0x40, 0x4a, 0x72, 0x8e, 0x0f, 0xcd, 0x69, 0x96, 0x0f, 0xa5, + 0xd0, 0xcb, 0x94, 0xd2, 0x01, 0x35, 0xe1, 0xc7, 0xb3, 0x85, 0x07, 0xf2, + 0x28, 0xc7, 0xc5, 0x36, 0x01, 0x35, 0xd1, 0x06, 0xc0, 0x4a, 0x96, 0xc5, + 0x33, 0x24, 0x00, 0x01, 0xd8, 0x16, 0xc0, 0x4a, 0x9c, 0xcf, 0x62, 0xc4, + 0x0f, 0xca, 0x40, 0xc9, 0xb2, 0x7e, 0x01, 0x09, 0x01, 0x45, 0x29, 0x7c, + 0x40, 0x4a, 0xa8, 0xc5, 0xda, 0xce, 0x0f, 0x99, 0x89, 0xcf, 0x6b, 0x34, + 0x0f, 0xb2, 0x40, 0x43, 0x01, 0x97, 0xc0, 0x4a, 0xae, 0xc6, 0xd2, 0x41, + 0x01, 0x11, 0xf9, 0x45, 0xd6, 0x7d, 0x40, 0x4a, 0xb8, 0x48, 0xbe, 0x22, + 0xc0, 0x4a, 0xd4, 0xcd, 0x75, 0x65, 0x0f, 0xc8, 0xc0, 0x42, 0x00, 0xaf, + 0xc0, 0x4b, 0x26, 0xd5, 0x34, 0x3a, 0x01, 0x39, 0xd1, 0xcd, 0x79, 0x41, + 0x01, 0x00, 0x30, 0x45, 0xdb, 0xa0, 0xc0, 0x4b, 0x32, 0x46, 0x39, 0xfb, + 0x40, 0x4b, 0x52, 0xcd, 0x7d, 0xd3, 0x01, 0x53, 0x61, 0x43, 0x05, 0xb2, + 0xc0, 0x4b, 0x5e, 0x46, 0x00, 0xd4, 0x40, 0x4b, 0x6a, 0xc8, 0xbc, 0xea, + 0x0f, 0xd3, 0xd1, 0x42, 0x00, 0xc2, 0xc0, 0x4b, 0x76, 0xd3, 0x41, 0x84, + 0x01, 0x71, 0xe0, 0x16, 0xc0, 0x4b, 0x82, 0x14, 0xc0, 0x4b, 0x8e, 0x46, + 0xd2, 0xf5, 0xc0, 0x4b, 0x98, 0xcd, 0x31, 0x8b, 0x0f, 0xac, 0x19, 0xc4, + 0x01, 0xdd, 0x0f, 0x9e, 0xf9, 0xcc, 0x83, 0x85, 0x0f, 0xce, 0x68, 0xd7, + 0x28, 0xb6, 0x01, 0x39, 0x49, 0x03, 0xc0, 0x4b, 0xa4, 0x0b, 0x40, 0x4b, + 0xb0, 0xc6, 0xcc, 0xf5, 0x01, 0x1f, 0x89, 0xc8, 0xb5, 0x72, 0x0f, 0xaf, + 0x00, 0xce, 0x73, 0x60, 0x0f, 0x9c, 0xc9, 0xc2, 0x00, 0xb0, 0x0f, 0xb6, + 0x99, 0xce, 0x71, 0x68, 0x0f, 0xca, 0xc8, 0x00, 0x40, 0x4b, 0xbc, 0x16, + 0xc0, 0x4b, 0xc8, 0xca, 0x85, 0xc7, 0x0f, 0xd7, 0x08, 0xc4, 0xba, 0xe0, + 0x0f, 0xcc, 0xa9, 0x47, 0xc2, 0xea, 0x40, 0x4b, 0xd4, 0x48, 0x10, 0xc1, + 0xc0, 0x4b, 0xf0, 0xc5, 0xdb, 0x0f, 0x0f, 0xcb, 0x50, 0xc3, 0x05, 0x9f, + 0x01, 0x32, 0x21, 0xc6, 0xce, 0x6f, 0x0f, 0xb7, 0x82, 0x00, 0x4b, 0xfc, + 0x4c, 0x11, 0xe2, 0xc0, 0x4c, 0x02, 0xd1, 0x48, 0x11, 0x00, 0x41, 0xb1, + 0x0f, 0xc0, 0x4c, 0x2c, 0x4b, 0x6f, 0xc7, 0xc0, 0x4c, 0x38, 0x47, 0x02, + 0x0e, 0x40, 0x4c, 0x5c, 0xc4, 0xde, 0xd7, 0x0f, 0xcd, 0xd1, 0xc3, 0x0e, + 0x61, 0x0f, 0xcf, 0xb8, 0xc2, 0x1e, 0xd5, 0x0f, 0xcd, 0x41, 0xc2, 0x02, + 0xa7, 0x0f, 0xa4, 0x02, 0x00, 0x4c, 0xb4, 0xc2, 0x00, 0x29, 0x01, 0x37, + 0xb9, 0xcd, 0x77, 0x46, 0x0f, 0x9d, 0xf8, 0x16, 0xc0, 0x4c, 0xba, 0x12, + 0x40, 0x4c, 0xc4, 0x86, 0x0f, 0xb7, 0xb9, 0xca, 0x9e, 0x3c, 0x0f, 0xab, + 0xa9, 0x42, 0x02, 0x37, 0x40, 0x4c, 0xce, 0x46, 0x70, 0xd0, 0xc0, 0x4c, + 0xda, 0xcb, 0x96, 0xcc, 0x0f, 0x9a, 0xa8, 0x45, 0x00, 0xdd, 0xc0, 0x4c, + 0xe6, 0xce, 0x70, 0x96, 0x05, 0x33, 0x98, 0xc3, 0x15, 0x0f, 0x0f, 0xcc, + 0x81, 0xc2, 0x0b, 0x47, 0x0f, 0xc9, 0xb8, 0x14, 0xc0, 0x4c, 0xf2, 0x4c, + 0x01, 0xf6, 0xc0, 0x4c, 0xfc, 0xc5, 0xda, 0x6a, 0x01, 0x30, 0xc1, 0x18, + 0xc0, 0x4d, 0x0e, 0xd0, 0x5b, 0xf2, 0x0f, 0xca, 0xc0, 0xc3, 0x00, 0x28, + 0x0f, 0xb5, 0xf9, 0x42, 0x00, 0x61, 0xc0, 0x4d, 0x1a, 0xd0, 0x5e, 0x42, + 0x01, 0x1b, 0xe9, 0xca, 0x9a, 0x72, 0x0f, 0x99, 0x01, 0x46, 0x2a, 0x9f, + 0xc0, 0x4d, 0x2e, 0xdd, 0x11, 0x51, 0x0f, 0xc9, 0x78, 0xca, 0xa2, 0x10, + 0x01, 0x37, 0x49, 0x43, 0x00, 0x4b, 0xc0, 0x4d, 0x3a, 0x92, 0x0f, 0xb5, + 0x11, 0xc3, 0x19, 0x78, 0x0f, 0xb7, 0x08, 0x43, 0xc4, 0x20, 0xc0, 0x4d, + 0x46, 0xc4, 0xc0, 0x85, 0x0f, 0xb7, 0xa0, 0xc3, 0x00, 0xca, 0x01, 0x34, + 0xb1, 0xc2, 0x15, 0x13, 0x0f, 0xcf, 0x18, 0x44, 0x07, 0x31, 0xc0, 0x4d, + 0x52, 0xc4, 0x44, 0xba, 0x01, 0x08, 0x41, 0x07, 0xc0, 0x4d, 0x64, 0xc3, + 0x1f, 0x48, 0x0f, 0xa6, 0xe0, 0xc8, 0xbb, 0xba, 0x0f, 0x9c, 0x90, 0xc5, + 0x2a, 0x94, 0x01, 0x3a, 0x21, 0xc3, 0x12, 0xb8, 0x01, 0x30, 0x1b, 0x00, + 0x4d, 0x70, 0xd0, 0x5f, 0xf2, 0x0f, 0x9e, 0xa1, 0xc7, 0xca, 0x61, 0x0f, + 0x9e, 0x10, 0xc2, 0x00, 0x71, 0x0f, 0xa0, 0x61, 0xc2, 0x00, 0x3c, 0x0f, + 0xa0, 0x68, 0x43, 0x00, 0x8e, 0xc0, 0x4d, 0x76, 0xd6, 0x2c, 0x18, 0x01, + 0x08, 0xb8, 0xd6, 0x1f, 0x7f, 0x0f, 0xb3, 0x53, 0x00, 0x4d, 0x82, 0xc2, + 0x11, 0xa5, 0x00, 0x01, 0x7a, 0x00, 0x4d, 0x88, 0x4e, 0x6d, 0x16, 0xc0, + 0x4d, 0x8e, 0xdb, 0x15, 0xcc, 0x08, 0xd5, 0x03, 0x00, 0x4d, 0x96, 0x45, + 0x01, 0xc3, 0xc0, 0x4d, 0x9c, 0x15, 0xc0, 0x4d, 0xb4, 0xcf, 0x63, 0xff, + 0x08, 0xd4, 0xc1, 0x55, 0x34, 0x79, 0xc0, 0x4d, 0xc0, 0x57, 0x26, 0xd3, + 0xc0, 0x4d, 0xf0, 0x47, 0x02, 0x0e, 0xc0, 0x4e, 0x00, 0x46, 0x34, 0x6f, + 0x40, 0x4e, 0x5a, 0xc8, 0xb7, 0x5a, 0x01, 0x35, 0xe9, 0xc2, 0x01, 0x26, + 0x0f, 0xcf, 0x30, 0xd4, 0x3e, 0x44, 0x01, 0x1c, 0xa1, 0x00, 0xc0, 0x4e, + 0x66, 0xc4, 0x15, 0x2e, 0x0f, 0xca, 0x70, 0x46, 0x09, 0x97, 0xc0, 0x4e, + 0x78, 0x47, 0x02, 0x0e, 0x40, 0x4e, 0x9c, 0x4c, 0x11, 0xe2, 0xc0, 0x4f, + 0x16, 0x47, 0x34, 0x2f, 0xc0, 0x4f, 0x28, 0x4a, 0x51, 0x89, 0xc0, 0x4f, + 0x35, 0xd0, 0x59, 0xf2, 0x08, 0x7a, 0x29, 0x47, 0x02, 0x0e, 0x40, 0x4f, + 0x5f, 0x42, 0x01, 0x19, 0xc0, 0x4f, 0xbc, 0xd8, 0x24, 0x6b, 0x01, 0x3d, + 0x38, 0x48, 0x19, 0xd4, 0xc0, 0x4f, 0xc6, 0xc5, 0xda, 0xc9, 0x01, 0x19, + 0x78, 0xc6, 0xd2, 0xdd, 0x0f, 0xaa, 0x69, 0xcd, 0x6a, 0x0a, 0x00, 0x00, + 0xb0, 0x43, 0x68, 0xf2, 0xc0, 0x50, 0x1a, 0xc3, 0x09, 0x3a, 0x0f, 0xa4, + 0x48, 0x47, 0x02, 0x0e, 0xc0, 0x50, 0x72, 0x45, 0x00, 0xba, 0xc0, 0x50, + 0xc8, 0x4b, 0x6f, 0xc7, 0xc0, 0x50, 0xd8, 0x4c, 0x85, 0xa1, 0x40, 0x50, + 0xee, 0x07, 0xc0, 0x50, 0xfe, 0xca, 0xa4, 0xe0, 0x01, 0x05, 0xb9, 0x42, + 0x06, 0x4e, 0x40, 0x51, 0x0a, 0x43, 0x1b, 0x32, 0xc0, 0x51, 0x1f, 0xc6, + 0xce, 0xff, 0x0f, 0x9a, 0xe9, 0xc2, 0x00, 0x89, 0x00, 0x01, 0x00, 0x49, + 0x6e, 0x41, 0x40, 0x51, 0x2c, 0x44, 0x03, 0xda, 0xc0, 0x51, 0x38, 0xc3, + 0x01, 0xe5, 0x0f, 0xab, 0xba, 0x00, 0x51, 0x4a, 0xc9, 0xac, 0xde, 0x0f, + 0x9e, 0x29, 0xcb, 0x94, 0x01, 0x0f, 0xa1, 0x99, 0x11, 0xc0, 0x51, 0x50, + 0xc3, 0x09, 0x3a, 0x0f, 0xcf, 0xe8, 0x15, 0xc0, 0x51, 0x5a, 0xc4, 0xdf, + 0x9b, 0x0f, 0xcd, 0xc1, 0xc7, 0xc8, 0xb6, 0x0f, 0xcd, 0xc8, 0x00, 0xc0, + 0x51, 0x66, 0x47, 0xc3, 0xed, 0xc0, 0x51, 0x72, 0xc6, 0x91, 0xd5, 0x0f, + 0x99, 0xd9, 0xc4, 0xaf, 0x8f, 0x0f, 0x98, 0x2b, 0x00, 0x51, 0x9c, 0xd2, + 0x4a, 0xf3, 0x0f, 0x98, 0x38, 0xc6, 0x07, 0x9a, 0x01, 0x1d, 0x99, 0xc3, + 0x00, 0xf1, 0x01, 0x1d, 0x91, 0xcd, 0x7b, 0x97, 0x01, 0x50, 0x58, 0x00, + 0x40, 0x51, 0xa2, 0x43, 0x00, 0x3d, 0xc0, 0x51, 0xba, 0x46, 0x07, 0x2f, + 0xc0, 0x51, 0xcf, 0xc6, 0xb0, 0xf5, 0x00, 0x00, 0xd0, 0xcc, 0x81, 0x5d, + 0x01, 0x11, 0x79, 0xc2, 0x00, 0x29, 0x0f, 0x9e, 0x20, 0xc2, 0x00, 0x0a, + 0x0f, 0x9b, 0x19, 0xcf, 0x61, 0x7a, 0x0f, 0xb4, 0xf8, 0x0e, 0xc0, 0x52, + 0x09, 0xca, 0xa1, 0x3e, 0x0f, 0xb0, 0x78, 0x42, 0x02, 0xa7, 0xc0, 0x52, + 0x13, 0xca, 0x4a, 0x11, 0x01, 0x51, 0x98, 0xd5, 0x36, 0x1d, 0x0f, 0xb3, + 0xa9, 0x90, 0x0f, 0xcd, 0x10, 0x42, 0x02, 0x41, 0xc0, 0x52, 0x20, 0x10, + 0xc0, 0x52, 0x2c, 0xc2, 0x00, 0x4e, 0x01, 0x01, 0x90, 0xc9, 0xb2, 0x87, + 0x0f, 0xcd, 0x79, 0xc7, 0xc7, 0xcf, 0x01, 0x18, 0x29, 0x12, 0xc0, 0x52, + 0x39, 0xc7, 0xc4, 0x1e, 0x01, 0x5e, 0xc1, 0xcc, 0x88, 0xb9, 0x0f, 0xb6, + 0x38, 0xca, 0x9b, 0xb2, 0x01, 0x1c, 0xb9, 0xc5, 0xbf, 0x4d, 0x01, 0x13, + 0xd3, 0x00, 0x52, 0x48, 0x15, 0xc0, 0x52, 0x4c, 0x46, 0xcf, 0xd1, 0xc0, + 0x52, 0x58, 0xc4, 0xde, 0xe3, 0x0f, 0xcb, 0x40, 0x05, 0xc0, 0x52, 0x6a, + 0xcc, 0x83, 0xb5, 0x01, 0x08, 0x73, 0x00, 0x52, 0x76, 0x1b, 0x40, 0x52, + 0x7c, 0xc2, 0x00, 0xf1, 0x01, 0x32, 0x3b, 0x00, 0x52, 0x88, 0x15, 0xc0, + 0x52, 0x8e, 0xc4, 0x09, 0x3a, 0x0f, 0xd5, 0x00, 0x42, 0x11, 0xee, 0xc0, + 0x52, 0x9d, 0xca, 0x0e, 0x64, 0x01, 0x39, 0x79, 0x07, 0xc0, 0x52, 0xa9, + 0xc3, 0x13, 0x4e, 0x0f, 0xd4, 0x28, 0xc8, 0xbb, 0x9a, 0x0f, 0xb7, 0xd8, + 0xc3, 0x4c, 0xa1, 0x01, 0x32, 0x99, 0xc3, 0x1a, 0x2e, 0x0f, 0xa9, 0x58, + 0xcd, 0x7d, 0x44, 0x01, 0x56, 0xd0, 0xc8, 0xb8, 0xf2, 0x0f, 0xa5, 0x49, + 0x8e, 0x0f, 0xa4, 0x51, 0xc9, 0x92, 0xda, 0x00, 0x05, 0xb0, 0x00, 0x40, + 0x52, 0xb5, 0xcc, 0x85, 0xe9, 0x0f, 0xb6, 0x11, 0x49, 0xab, 0xa3, 0xc0, + 0x52, 0xc1, 0x07, 0x40, 0x52, 0xcd, 0x87, 0x0f, 0xae, 0x7b, 0x00, 0x52, + 0xd9, 0xc3, 0x7f, 0x6c, 0x0f, 0xb6, 0xa0, 0x16, 0xc0, 0x52, 0xe5, 0x4b, + 0x8d, 0x9a, 0xc0, 0x52, 0xfd, 0x03, 0xc0, 0x53, 0x21, 0xc3, 0x2a, 0xf6, + 0x0f, 0xcc, 0xe0, 0xcc, 0x23, 0x33, 0x08, 0xd7, 0xab, 0x00, 0x53, 0x33, + 0x0e, 0xc0, 0x53, 0x37, 0xce, 0x75, 0x3c, 0x08, 0xd7, 0x7b, 0x00, 0x53, + 0x46, 0x47, 0xc1, 0x07, 0xc0, 0x53, 0x4a, 0xcb, 0x5a, 0x32, 0x08, 0xd7, + 0x32, 0x00, 0x53, 0x5c, 0xc3, 0x03, 0x03, 0x01, 0x35, 0xa1, 0x0f, 0x40, + 0x53, 0x60, 0x05, 0xc0, 0x53, 0x70, 0x45, 0x00, 0xba, 0xc0, 0x53, 0x7c, + 0x47, 0x34, 0x2f, 0xc0, 0x53, 0xb4, 0x46, 0x09, 0x97, 0xc0, 0x53, 0xc4, + 0x49, 0xaa, 0x7a, 0xc0, 0x53, 0xe8, 0x47, 0xc1, 0xd2, 0x40, 0x53, 0xfa, + 0xc7, 0xc4, 0x3a, 0x0f, 0xa1, 0xe1, 0xc5, 0xdd, 0x44, 0x0f, 0xca, 0xf0, + 0x03, 0xc0, 0x54, 0x12, 0xc8, 0x5b, 0xfa, 0x0f, 0x9b, 0x91, 0xc9, 0xad, + 0xfe, 0x0f, 0xd5, 0xa0, 0x45, 0x00, 0x73, 0xc0, 0x54, 0x1e, 0xc8, 0xb8, + 0x2a, 0x0f, 0x9a, 0xb9, 0xc7, 0x42, 0xd3, 0x00, 0x05, 0x19, 0xcb, 0x95, + 0xb9, 0x0f, 0xd6, 0xb9, 0xc2, 0x11, 0xee, 0x0f, 0xa2, 0xe8, 0x15, 0xc0, + 0x54, 0x2a, 0x42, 0x00, 0x45, 0x40, 0x54, 0x36, 0xcf, 0x5f, 0x33, 0x01, + 0x18, 0xb1, 0x16, 0xc0, 0x54, 0x42, 0xc5, 0xd9, 0x66, 0x01, 0x5f, 0x38, + 0x4d, 0x7e, 0xe4, 0xc0, 0x54, 0x4e, 0xc4, 0x13, 0x66, 0x0f, 0x9b, 0xf8, + 0xc3, 0x63, 0x7e, 0x0f, 0xb4, 0x9b, 0x00, 0x54, 0x5a, 0xc7, 0xc9, 0x7a, + 0x0f, 0xa3, 0x70, 0xca, 0x8b, 0x2b, 0x01, 0x3e, 0x13, 0x00, 0x54, 0x60, + 0x15, 0xc0, 0x54, 0x66, 0xd1, 0x51, 0xef, 0x01, 0x33, 0xf1, 0x00, 0xc0, + 0x54, 0x78, 0xcc, 0x85, 0x89, 0x0f, 0x9d, 0x69, 0xc9, 0x8e, 0x15, 0x00, + 0x01, 0x28, 0xc3, 0xb3, 0xd0, 0x01, 0x38, 0x79, 0xc6, 0x16, 0x32, 0x01, + 0x37, 0x21, 0xd6, 0x31, 0x82, 0x0f, 0xac, 0x31, 0xc9, 0xaa, 0xd4, 0x0f, + 0xb0, 0xa1, 0xc4, 0xe0, 0x73, 0x0f, 0xa1, 0x38, 0x05, 0xc0, 0x54, 0x8a, + 0x94, 0x0f, 0x9a, 0x81, 0xc4, 0xe4, 0x6b, 0x0f, 0xca, 0xe0, 0xc6, 0xa4, + 0xe4, 0x01, 0x05, 0x89, 0xc8, 0xb5, 0x6a, 0x01, 0x05, 0x38, 0xcb, 0x9a, + 0x31, 0x01, 0x00, 0x41, 0xcf, 0x62, 0x79, 0x01, 0x72, 0x70, 0xc9, 0xad, + 0x92, 0x0f, 0xa4, 0xe1, 0xc2, 0x00, 0x40, 0x0f, 0xa2, 0xd8, 0x16, 0xc0, + 0x54, 0x9a, 0xc3, 0x05, 0x14, 0x08, 0x5d, 0x4b, 0x00, 0x54, 0xaa, 0xc4, + 0x09, 0x9d, 0x08, 0x5d, 0x60, 0xc3, 0x02, 0xa3, 0x08, 0x5c, 0xe1, 0xc5, + 0x0d, 0x20, 0x08, 0x5c, 0xd8, 0xc3, 0xb5, 0x3e, 0x08, 0x5c, 0x89, 0x15, + 0xc0, 0x54, 0xb0, 0xc2, 0x00, 0x67, 0x08, 0x5c, 0x71, 0xc3, 0x20, 0x18, + 0x08, 0x5c, 0x61, 0xc8, 0xb9, 0x7a, 0x08, 0x5c, 0x59, 0xc6, 0xcf, 0xd7, + 0x08, 0x5c, 0x51, 0xc4, 0xe0, 0xe7, 0x08, 0x5c, 0x49, 0xc4, 0x4a, 0xb9, + 0x08, 0x5c, 0x41, 0xc2, 0x01, 0x7f, 0x08, 0x5c, 0x23, 0x00, 0x54, 0xba, + 0xc5, 0x4a, 0xb3, 0x08, 0x5c, 0x31, 0xcd, 0x7e, 0x89, 0x08, 0x5c, 0x29, + 0xc6, 0x40, 0x9a, 0x08, 0x5c, 0x19, 0xc5, 0x9c, 0xa2, 0x08, 0x5c, 0x11, + 0xc4, 0xe3, 0x27, 0x08, 0x5c, 0x09, 0xc5, 0xa5, 0xfd, 0x08, 0x5c, 0x00, + 0xd2, 0x48, 0xd7, 0x00, 0xb9, 0xb1, 0xd2, 0x4c, 0xa3, 0x00, 0xb9, 0xa8, + 0x48, 0xba, 0xd2, 0xc0, 0x54, 0xc0, 0xc3, 0x25, 0xd6, 0x01, 0x5e, 0xd8, + 0x46, 0xd3, 0x79, 0xc0, 0x54, 0xd2, 0x50, 0x5c, 0x52, 0x40, 0x54, 0xe8, + 0x4c, 0x7e, 0xd8, 0xc0, 0x55, 0x3c, 0x48, 0xb4, 0x80, 0x40, 0x55, 0x52, + 0xcc, 0x8b, 0x05, 0x01, 0x30, 0x59, 0x45, 0x74, 0xd9, 0xc0, 0x55, 0x86, + 0x42, 0x00, 0x29, 0x40, 0x55, 0x92, 0x0b, 0xc0, 0x55, 0x9f, 0xd6, 0x31, + 0xae, 0x0f, 0xae, 0xd8, 0x49, 0x07, 0xbb, 0xc0, 0x55, 0xab, 0xd1, 0x54, + 0x42, 0x01, 0x1e, 0x53, 0x00, 0x55, 0xb7, 0xd3, 0x45, 0xd2, 0x01, 0x1e, + 0x4a, 0x00, 0x55, 0xbd, 0xcb, 0x91, 0x0a, 0x01, 0x12, 0xe1, 0xc3, 0x1e, + 0x36, 0x00, 0x03, 0xf9, 0xcb, 0x91, 0x57, 0x0f, 0xb4, 0xd0, 0xca, 0x9a, + 0x90, 0x01, 0x08, 0x49, 0xc7, 0xc5, 0xec, 0x01, 0x08, 0x19, 0xc4, 0x00, + 0xba, 0x00, 0x05, 0x80, 0xc4, 0x00, 0x87, 0x0f, 0xb1, 0xa9, 0xc6, 0x00, + 0x91, 0x0f, 0xa5, 0x58, 0x48, 0x89, 0xf5, 0xc0, 0x55, 0xc3, 0x43, 0x09, + 0x9a, 0x40, 0x55, 0xdc, 0x49, 0xb3, 0x95, 0xc0, 0x56, 0x0c, 0xcb, 0x96, + 0x27, 0x01, 0x35, 0x71, 0x0b, 0x40, 0x56, 0x3e, 0x51, 0x53, 0xfe, 0xc0, + 0x56, 0x50, 0x53, 0x43, 0x4c, 0x40, 0x56, 0x62, 0x03, 0xc0, 0x56, 0x6e, + 0xdb, 0x16, 0xbf, 0x01, 0x1c, 0x11, 0xcb, 0x8f, 0x5d, 0x0f, 0xcb, 0xc0, + 0x46, 0x8d, 0x69, 0xc0, 0x56, 0x7a, 0xce, 0x6c, 0x28, 0x0f, 0xb7, 0x90, + 0xd7, 0x2a, 0xde, 0x01, 0x1c, 0x99, 0xc3, 0x01, 0xfd, 0x0f, 0x9d, 0x78, + 0x0f, 0xc0, 0x56, 0x92, 0xc6, 0x20, 0xab, 0x00, 0x05, 0x40, 0x12, 0xc0, + 0x56, 0x9e, 0xca, 0xa6, 0xa2, 0x0f, 0xc9, 0x21, 0xcc, 0x81, 0x45, 0x0f, + 0xa1, 0x50, 0xdc, 0x12, 0x55, 0x01, 0x3c, 0xd9, 0xc9, 0x9a, 0x28, 0x01, + 0x05, 0x79, 0xc3, 0x1c, 0xd9, 0x0f, 0xa0, 0x4a, 0x00, 0x56, 0xaa, 0x44, + 0x01, 0x4a, 0xc0, 0x56, 0xb0, 0x00, 0xc0, 0x56, 0xbc, 0x4a, 0x01, 0xa9, + 0x40, 0x56, 0xd7, 0x4a, 0x01, 0x68, 0xc0, 0x56, 0xe9, 0x48, 0x00, 0x5f, + 0x40, 0x56, 0xf5, 0x43, 0x00, 0x5b, 0xc0, 0x57, 0x01, 0xc5, 0xd8, 0xb7, + 0x0f, 0x9b, 0x48, 0x44, 0x00, 0xde, 0xc0, 0x57, 0x0f, 0x00, 0x40, 0x57, + 0x35, 0x43, 0x06, 0x64, 0xc0, 0x57, 0x4d, 0xc5, 0x11, 0x55, 0x0f, 0xa1, + 0xb0, 0x4b, 0x97, 0x24, 0xc0, 0x57, 0x65, 0xc7, 0xb7, 0x72, 0x01, 0x14, + 0x0b, 0x00, 0x57, 0x74, 0x42, 0x05, 0xc0, 0xc0, 0x57, 0x7a, 0xc5, 0xd4, + 0xfc, 0x01, 0x15, 0x71, 0xc6, 0x07, 0xb0, 0x01, 0x11, 0x22, 0x00, 0x57, + 0x89, 0x46, 0x00, 0x8b, 0x40, 0x57, 0x8f, 0xc4, 0xe4, 0x07, 0x0f, 0xa1, + 0x61, 0xc8, 0x02, 0xe7, 0x00, 0x01, 0x20, 0xdd, 0x11, 0xe2, 0x0d, 0xe4, + 0xf9, 0xcb, 0x99, 0x81, 0x0d, 0xe4, 0xf1, 0xd5, 0x33, 0xfb, 0x0d, 0xe4, + 0xe9, 0xd1, 0x4f, 0xcf, 0x0d, 0xe4, 0xe1, 0x46, 0xd2, 0x95, 0xc0, 0x57, + 0x9e, 0x47, 0x02, 0x0e, 0x40, 0x57, 0xba, 0x43, 0x00, 0xa8, 0xc0, 0x58, + 0x57, 0x00, 0x40, 0x58, 0x69, 0xc4, 0x01, 0xe3, 0x01, 0x2c, 0x99, 0xc9, + 0xb4, 0xd0, 0x0f, 0xab, 0xb0, 0x00, 0x40, 0x58, 0x75, 0xc3, 0x3e, 0xe1, + 0x0f, 0xa4, 0x19, 0xc2, 0x0f, 0x7b, 0x0f, 0x9b, 0x08, 0x44, 0x01, 0xd6, + 0xc0, 0x58, 0x81, 0xcd, 0x78, 0x71, 0x0f, 0xa4, 0xf0, 0x42, 0x01, 0x1b, + 0xc0, 0x58, 0x8b, 0xc5, 0xd7, 0x7c, 0x01, 0x08, 0xf8, 0x43, 0x1f, 0x3d, + 0xc0, 0x58, 0x97, 0xcd, 0x5e, 0x85, 0x00, 0x00, 0xf1, 0xd1, 0x51, 0x34, + 0x0f, 0xb4, 0xc9, 0xc4, 0xe2, 0xeb, 0x0f, 0xcf, 0xf0, 0xc6, 0x00, 0x91, + 0x01, 0x1e, 0x71, 0xc4, 0x00, 0x49, 0x01, 0x5c, 0x81, 0xc5, 0x00, 0x2c, + 0x01, 0x5c, 0x88, 0xc5, 0xd7, 0x1d, 0x0f, 0x9a, 0x71, 0xcd, 0x7c, 0xf6, + 0x0f, 0xcf, 0x38, 0x5d, 0x10, 0x69, 0xc0, 0x58, 0xa3, 0xcb, 0x8f, 0x1b, + 0x00, 0x05, 0x70, 0xcc, 0x45, 0x8d, 0x05, 0x4a, 0xf9, 0x18, 0xc0, 0x59, + 0x0b, 0x4f, 0x30, 0x90, 0xc0, 0x59, 0x17, 0x47, 0x02, 0x0e, 0x40, 0x59, + 0x26, 0x00, 0xc0, 0x59, 0x86, 0x46, 0x01, 0x4a, 0xc0, 0x59, 0xd5, 0x02, + 0xc0, 0x5a, 0x1c, 0xd5, 0x33, 0x29, 0x01, 0x51, 0xe8, 0x00, 0xc0, 0x5a, + 0x38, 0xc8, 0xbf, 0xa2, 0x0f, 0xab, 0x69, 0xc9, 0xb0, 0xaa, 0x0f, 0xd4, + 0x80, 0x47, 0x02, 0x5b, 0x40, 0x5a, 0x5c, 0xc4, 0x15, 0x2e, 0x0f, 0x9a, + 0xc9, 0xc7, 0xc1, 0x0e, 0x0f, 0x9a, 0xc0, 0xd0, 0x5f, 0xb2, 0x01, 0x49, + 0x59, 0xd0, 0x3c, 0x90, 0x01, 0x49, 0x80, 0xc2, 0x00, 0x3d, 0x0f, 0xb4, + 0x00, 0xd9, 0x20, 0xda, 0x0f, 0xc9, 0x19, 0x07, 0xc0, 0x5a, 0x74, 0xc9, + 0xad, 0x38, 0x0f, 0xcf, 0xd8, 0x00, 0xc0, 0x5a, 0x80, 0x4e, 0x6e, 0x90, + 0x40, 0x5a, 0x8c, 0xd3, 0x1c, 0xa7, 0x01, 0x3b, 0x39, 0xd8, 0x25, 0x13, + 0x01, 0x3b, 0x29, 0xc9, 0xb1, 0xa6, 0x01, 0x09, 0xd1, 0xdd, 0x11, 0x8b, + 0x01, 0x5e, 0x69, 0xd7, 0x28, 0x71, 0x01, 0x5e, 0x78, 0x48, 0x56, 0x9a, + 0xc0, 0x5a, 0xaa, 0x15, 0xc0, 0x5a, 0xcf, 0xca, 0x9a, 0x06, 0x08, 0x0c, + 0x89, 0x06, 0xc0, 0x5a, 0xd9, 0xce, 0x74, 0x08, 0x08, 0x0c, 0xb9, 0xc7, + 0xc2, 0x3b, 0x08, 0x0c, 0xd1, 0xce, 0x6f, 0x70, 0x08, 0x0c, 0xd8, 0xc3, + 0x02, 0x10, 0x0f, 0x9f, 0xa8, 0x45, 0xdb, 0x3c, 0xc0, 0x5a, 0xeb, 0x44, + 0x0b, 0xe6, 0xc0, 0x5a, 0xf7, 0x90, 0x01, 0x36, 0x32, 0x00, 0x5b, 0x2b, + 0x91, 0x0f, 0xa7, 0xdb, 0x00, 0x5b, 0x31, 0xd1, 0x52, 0x77, 0x01, 0x1d, + 0xb8, 0xc2, 0x00, 0x44, 0x01, 0x11, 0xb0, 0x44, 0x00, 0x74, 0xc0, 0x5b, + 0x3d, 0xc4, 0xe3, 0x7b, 0x0f, 0xcc, 0xe8, 0xc5, 0x11, 0x55, 0x0f, 0xa1, + 0x80, 0x49, 0x53, 0xa9, 0xc0, 0x5b, 0x49, 0x47, 0x34, 0x2f, 0xc0, 0x5b, + 0x55, 0x46, 0x09, 0x97, 0x40, 0x5b, 0x73, 0x43, 0x00, 0xed, 0xc0, 0x5b, + 0x91, 0x10, 0x40, 0x5b, 0xbb, 0xc9, 0xb0, 0xe0, 0x01, 0x5f, 0x99, 0xc6, + 0xbc, 0xf4, 0x01, 0x5f, 0xa1, 0xc8, 0xbd, 0xb2, 0x01, 0x5f, 0xa9, 0xc8, + 0xbc, 0xf2, 0x01, 0x5f, 0xb1, 0xc8, 0xbb, 0xca, 0x01, 0x5f, 0xb9, 0xc9, + 0xb3, 0xcb, 0x01, 0x5f, 0xc0, 0x9e, 0x07, 0xf0, 0x03, 0x00, 0x5b, 0xc7, + 0x9f, 0x07, 0xf0, 0x0b, 0x00, 0x5c, 0x0d, 0xa6, 0x07, 0xf0, 0x43, 0x00, + 0x5c, 0x47, 0xa5, 0x07, 0xf0, 0x3b, 0x00, 0x5c, 0x6f, 0xa4, 0x07, 0xf0, + 0x33, 0x00, 0x5c, 0x97, 0xa3, 0x07, 0xf0, 0x2b, 0x00, 0x5c, 0xbf, 0xa2, + 0x07, 0xf0, 0x23, 0x00, 0x5c, 0xe7, 0xa1, 0x07, 0xf0, 0x1b, 0x00, 0x5d, + 0x0f, 0xa0, 0x07, 0xf0, 0x12, 0x00, 0x5d, 0x37, 0x42, 0x00, 0x91, 0xc0, + 0x5d, 0x5f, 0xc5, 0x0a, 0x8a, 0x05, 0x30, 0x69, 0xc9, 0x11, 0xf6, 0x05, + 0x30, 0x71, 0xcd, 0x2c, 0xb2, 0x05, 0x30, 0x79, 0x46, 0x09, 0x97, 0x40, + 0x5d, 0x6b, 0x46, 0x05, 0x87, 0xc0, 0x5d, 0x8f, 0x42, 0x00, 0x36, 0xc0, + 0x5d, 0xd2, 0xc5, 0xda, 0xdd, 0x01, 0x09, 0x18, 0x45, 0x00, 0xba, 0xc0, + 0x5d, 0xe4, 0x45, 0x2b, 0x5f, 0x40, 0x5e, 0x22, 0x5f, 0x0c, 0x84, 0xc0, + 0x5e, 0x56, 0xcc, 0x82, 0x7d, 0x01, 0x18, 0xb8, 0xc8, 0xb7, 0x0a, 0x0f, + 0xa7, 0xe1, 0x00, 0x40, 0x5e, 0x62, 0x4f, 0x0b, 0x17, 0xc0, 0x5e, 0x6e, + 0x4d, 0x29, 0xb9, 0x40, 0x5e, 0xee, 0xcc, 0x81, 0xc9, 0x01, 0x11, 0x81, + 0xc7, 0xc2, 0x0a, 0x0f, 0x9e, 0x81, 0xc4, 0xe3, 0x0b, 0x0f, 0x98, 0x58, + 0xcb, 0x96, 0x69, 0x01, 0x0c, 0x49, 0xcd, 0x3f, 0xe2, 0x01, 0x0a, 0xf1, + 0x08, 0xc0, 0x5f, 0x6e, 0x16, 0xc0, 0x5f, 0x7a, 0x44, 0x05, 0x14, 0x40, + 0x5f, 0x86, 0x00, 0xc0, 0x5f, 0xac, 0x46, 0xcc, 0xa1, 0xc0, 0x5f, 0xf6, + 0x45, 0xdd, 0x6c, 0x40, 0x60, 0x02, 0xc4, 0x0d, 0x13, 0x0e, 0x9b, 0xc1, + 0xc3, 0x05, 0x14, 0x0e, 0x9b, 0xb8, 0x09, 0xc0, 0x60, 0x14, 0xca, 0xa4, + 0xb8, 0x0f, 0x9c, 0x58, 0x43, 0x5c, 0x89, 0xc0, 0x60, 0x26, 0xc3, 0x04, + 0x85, 0x0f, 0xd6, 0xa0, 0xc5, 0xc4, 0xa4, 0x01, 0x38, 0x39, 0xc9, 0xb1, + 0xf7, 0x0f, 0xad, 0x68, 0x43, 0x02, 0x31, 0xc0, 0x60, 0x7a, 0xc8, 0xba, + 0xa2, 0x0f, 0xcb, 0x08, 0x45, 0x92, 0x80, 0xc0, 0x60, 0x98, 0x4a, 0xa7, + 0xa6, 0xc0, 0x60, 0xbc, 0x45, 0xd8, 0xb2, 0x40, 0x61, 0x22, 0x0d, 0xc0, + 0x61, 0x40, 0x44, 0x06, 0xb2, 0xc0, 0x61, 0x4c, 0xc3, 0x0f, 0xed, 0x0f, + 0xa1, 0x10, 0x00, 0xc0, 0x61, 0x7a, 0x02, 0x40, 0x61, 0xa4, 0x10, 0xc0, + 0x61, 0xb6, 0xce, 0x72, 0xfe, 0x0f, 0xca, 0x48, 0xcc, 0x84, 0x2d, 0x0f, + 0xa5, 0x69, 0xc9, 0xa8, 0xc1, 0x0f, 0xd3, 0xa0, 0x44, 0x16, 0xcb, 0xc0, + 0x61, 0xc0, 0x44, 0x83, 0x63, 0x40, 0x61, 0xcc, 0x07, 0xc0, 0x61, 0xd8, + 0x42, 0x00, 0xa2, 0x40, 0x61, 0xe2, 0x44, 0x0d, 0xde, 0xc0, 0x61, 0xee, + 0x42, 0x02, 0x32, 0x40, 0x62, 0x12, 0xd8, 0x22, 0xa3, 0x0f, 0xa8, 0xe9, + 0xd6, 0x08, 0x88, 0x01, 0x1f, 0x01, 0xcd, 0x00, 0x32, 0x01, 0x1e, 0xf1, + 0xcb, 0x1a, 0x50, 0x01, 0x1e, 0xe1, 0xce, 0x25, 0xad, 0x01, 0x1d, 0xa1, + 0x42, 0x00, 0xd0, 0xc0, 0x62, 0x1c, 0x46, 0x00, 0x2c, 0xc0, 0x62, 0x26, + 0x45, 0x00, 0x49, 0xc0, 0x62, 0x30, 0x44, 0x13, 0x1d, 0x40, 0x62, 0x3a, + 0x42, 0x01, 0x7c, 0xc0, 0x62, 0x49, 0xc9, 0xb0, 0xce, 0x01, 0x19, 0x80, + 0x56, 0x30, 0x22, 0xc0, 0x62, 0x55, 0xd6, 0x2c, 0x70, 0x0f, 0x89, 0x50, + 0xc2, 0x00, 0x8e, 0x0f, 0xcd, 0xbb, 0x00, 0x62, 0x67, 0xc4, 0x7f, 0x35, + 0x0f, 0xcf, 0x80, 0x8f, 0x0f, 0xb4, 0x53, 0x00, 0x62, 0x6d, 0xc2, 0x00, + 0x74, 0x0f, 0xb4, 0x31, 0xcc, 0x84, 0xd5, 0x01, 0x09, 0x11, 0x05, 0xc0, + 0x62, 0x73, 0x42, 0x05, 0x26, 0x40, 0x62, 0x7f, 0x43, 0x01, 0x95, 0xc0, + 0x62, 0x8b, 0x49, 0x89, 0xf4, 0xc0, 0x62, 0x97, 0x44, 0x0b, 0x26, 0xc0, + 0x62, 0xbf, 0xc5, 0x33, 0x24, 0x01, 0x02, 0xe9, 0xcb, 0x95, 0x1f, 0x0f, + 0xa9, 0x88, 0x87, 0x01, 0x15, 0x43, 0x00, 0x62, 0xf3, 0xc4, 0xe3, 0xd3, + 0x0f, 0x9d, 0xd0, 0x12, 0xc0, 0x62, 0xf9, 0xc2, 0x02, 0xa7, 0x0f, 0xce, + 0x62, 0x00, 0x63, 0x05, 0x08, 0xc0, 0x63, 0x0b, 0x0e, 0xc0, 0x63, 0x21, + 0x06, 0xc0, 0x63, 0x2b, 0x11, 0xc0, 0x63, 0x45, 0x05, 0xc0, 0x63, 0x51, + 0x03, 0xc0, 0x63, 0x67, 0x0a, 0xc0, 0x63, 0x7f, 0x15, 0xc0, 0x63, 0x8b, + 0x07, 0xc0, 0x63, 0x9b, 0x42, 0x00, 0x74, 0xc0, 0x63, 0xb7, 0x42, 0x01, + 0x4a, 0xc0, 0x63, 0xc3, 0x0f, 0xc0, 0x63, 0xcf, 0x09, 0xc0, 0x63, 0xe1, + 0xc5, 0xdb, 0xb9, 0x0e, 0x99, 0xd9, 0xd3, 0x40, 0x2e, 0x0e, 0x99, 0xb9, + 0x14, 0xc0, 0x63, 0xfc, 0x12, 0xc0, 0x64, 0x06, 0x0d, 0xc0, 0x64, 0x16, + 0x04, 0xc0, 0x64, 0x22, 0xc3, 0x85, 0x26, 0x0e, 0x98, 0xe9, 0xcc, 0x8a, + 0xb1, 0x0e, 0x98, 0x88, 0x14, 0xc0, 0x64, 0x34, 0xd2, 0x4b, 0x17, 0x0f, + 0x9b, 0xa9, 0xc3, 0x3a, 0x48, 0x0f, 0xd6, 0xb0, 0x07, 0xc0, 0x64, 0x40, + 0x44, 0xcd, 0xca, 0x40, 0x64, 0x52, 0x96, 0x01, 0x37, 0xd1, 0xc7, 0x80, + 0xa2, 0x01, 0x05, 0xc1, 0xd4, 0x3b, 0x60, 0x0f, 0x9d, 0xf0, 0xd7, 0x2a, + 0x82, 0x01, 0x3a, 0x29, 0xc2, 0x00, 0x29, 0x0f, 0xa0, 0x2a, 0x00, 0x64, + 0x76, 0xc7, 0x17, 0x6b, 0x01, 0x1f, 0x91, 0x47, 0x50, 0x5d, 0x40, 0x64, + 0x7c, 0x00, 0x40, 0x64, 0x88, 0x45, 0xd8, 0x17, 0xc0, 0x64, 0x97, 0x4b, + 0x96, 0x8a, 0xc0, 0x64, 0xbf, 0xc7, 0x11, 0x53, 0x0f, 0xb1, 0x58, 0x42, + 0x00, 0x6f, 0x40, 0x64, 0xcb, 0x15, 0xc0, 0x64, 0xd1, 0x45, 0x01, 0xc3, + 0xc0, 0x64, 0xe1, 0x0e, 0xc0, 0x65, 0x2d, 0x52, 0x47, 0xb7, 0xc0, 0x65, + 0x39, 0x46, 0x09, 0x97, 0xc0, 0x65, 0x43, 0x4b, 0x6f, 0xc7, 0xc0, 0x65, + 0x6d, 0xc9, 0xac, 0x96, 0x00, 0x7d, 0xf3, 0x00, 0x65, 0x9e, 0x52, 0x4c, + 0x13, 0x40, 0x65, 0xa4, 0x47, 0x02, 0x0e, 0xc0, 0x65, 0xbc, 0x42, 0x00, + 0xa2, 0xc0, 0x65, 0xce, 0xce, 0x6c, 0x6e, 0x01, 0x6b, 0x81, 0xd0, 0x57, + 0xe2, 0x01, 0x6b, 0xf8, 0x00, 0xc0, 0x65, 0xd4, 0xc8, 0xbc, 0x32, 0x01, + 0x71, 0xd0, 0xd3, 0x46, 0x31, 0x0f, 0xdd, 0x81, 0x4a, 0x03, 0x3d, 0x40, + 0x66, 0x16, 0x00, 0xc0, 0x66, 0x28, 0x47, 0x09, 0x90, 0x40, 0x66, 0x8f, + 0x47, 0x0a, 0xda, 0xc0, 0x66, 0xa7, 0xc9, 0xb4, 0xbe, 0x00, 0x2c, 0x79, + 0xc6, 0x59, 0x92, 0x00, 0x2c, 0x51, 0xc9, 0x11, 0xf6, 0x00, 0x2c, 0x49, + 0x03, 0xc0, 0x66, 0xb3, 0xcd, 0x2c, 0xb2, 0x00, 0x2a, 0xf1, 0x05, 0xc0, + 0x66, 0xbf, 0x07, 0xc0, 0x66, 0xcb, 0xde, 0x0f, 0x5e, 0x00, 0x2a, 0xc8, + 0xca, 0xa6, 0x84, 0x0f, 0x9d, 0x41, 0xcd, 0x75, 0xc0, 0x0f, 0xb4, 0xd8, + 0xce, 0x72, 0x9c, 0x0f, 0x9c, 0xf9, 0xc4, 0x7a, 0xfe, 0x01, 0x5f, 0x28, + 0x05, 0xc0, 0x66, 0xd7, 0x4d, 0x29, 0xb9, 0xc0, 0x66, 0xe3, 0xcf, 0x6b, + 0x52, 0x0f, 0x4a, 0x21, 0xd0, 0x58, 0x92, 0x0f, 0x4a, 0x29, 0x47, 0x63, + 0xff, 0xc0, 0x67, 0x63, 0xc5, 0x08, 0x09, 0x0f, 0x4a, 0x39, 0x10, 0xc0, + 0x67, 0x6f, 0x46, 0x09, 0x97, 0xc0, 0x67, 0x7b, 0x48, 0x10, 0xb4, 0x40, + 0x67, 0x9f, 0x04, 0xc0, 0x67, 0xab, 0x05, 0xc0, 0x67, 0xcc, 0x06, 0xc0, + 0x67, 0xe0, 0x12, 0xc0, 0x67, 0xec, 0x16, 0xc0, 0x68, 0x00, 0x14, 0xc0, + 0x68, 0x1b, 0x18, 0xc0, 0x68, 0x28, 0x15, 0xc0, 0x68, 0x32, 0x03, 0xc0, + 0x68, 0x58, 0x0e, 0xc0, 0x68, 0x86, 0x42, 0x00, 0xec, 0xc0, 0x68, 0x92, + 0x0f, 0xc0, 0x68, 0x9e, 0x42, 0x01, 0x4a, 0xc0, 0x68, 0xb3, 0xc5, 0x61, + 0xc0, 0x0f, 0xb8, 0x19, 0x43, 0x03, 0xd3, 0xc0, 0x68, 0xbd, 0xc4, 0x83, + 0x39, 0x0f, 0xb8, 0x11, 0x09, 0xc0, 0x68, 0xc9, 0x44, 0x1a, 0x05, 0xc0, + 0x68, 0xd5, 0xc3, 0xdd, 0x05, 0x0f, 0xba, 0x31, 0xc5, 0xdd, 0xe4, 0x0f, + 0xba, 0xa9, 0x0a, 0x40, 0x68, 0xe4, 0xda, 0x1a, 0xcc, 0x01, 0x36, 0xa9, + 0xce, 0x72, 0x72, 0x01, 0x1c, 0x38, 0xc4, 0xd9, 0x17, 0x01, 0x34, 0xb9, + 0xc8, 0x8d, 0x71, 0x01, 0x09, 0xa9, 0xc2, 0x00, 0x61, 0x00, 0x00, 0x38, + 0xce, 0x73, 0xde, 0x01, 0x19, 0x71, 0xc8, 0x07, 0x5f, 0x01, 0x12, 0x60, + 0xcb, 0x23, 0xa0, 0x01, 0x12, 0x51, 0xc2, 0x00, 0xf1, 0x01, 0x12, 0x42, + 0x00, 0x68, 0xee, 0xc9, 0xae, 0x07, 0x0f, 0xb7, 0xd1, 0x0f, 0x40, 0x68, + 0xf4, 0xc8, 0xbf, 0xca, 0x0f, 0xb7, 0x61, 0xc9, 0xb1, 0x1f, 0x0f, 0xb7, + 0x58, 0x51, 0x52, 0x22, 0xc0, 0x69, 0x00, 0xcb, 0x99, 0xa2, 0x0f, 0xd6, + 0x00, 0x4b, 0x05, 0xf7, 0xc0, 0x69, 0x18, 0xce, 0x6f, 0x54, 0x0f, 0xa7, + 0xb0, 0xc2, 0x00, 0x49, 0x01, 0x11, 0x03, 0x00, 0x69, 0x38, 0xca, 0x9d, + 0x24, 0x01, 0x09, 0x59, 0xc9, 0x25, 0xca, 0x0f, 0xa5, 0x11, 0xc7, 0xca, + 0x84, 0x0f, 0xb1, 0x01, 0xcb, 0x90, 0x7b, 0x0f, 0xb1, 0x38, 0x14, 0xc0, + 0x69, 0x3e, 0x44, 0x0b, 0x02, 0xc0, 0x69, 0x4a, 0xcc, 0x8c, 0x01, 0x0f, + 0xb1, 0x90, 0xcb, 0x8b, 0x06, 0x01, 0x30, 0x51, 0xc9, 0xa8, 0x43, 0x08, + 0x0c, 0xe0, 0x0e, 0xc0, 0x69, 0x55, 0x10, 0xc0, 0x69, 0x5f, 0x06, 0xc0, + 0x69, 0x75, 0x16, 0xc0, 0x69, 0x83, 0x05, 0xc0, 0x69, 0x91, 0x83, 0x08, + 0xb8, 0x93, 0x00, 0x69, 0x9b, 0x0c, 0xc0, 0x69, 0xa1, 0x04, 0xc0, 0x69, + 0xab, 0x09, 0xc0, 0x69, 0xb5, 0xc2, 0x00, 0xd0, 0x08, 0xb8, 0x89, 0xc2, + 0x0d, 0xf6, 0x08, 0xb8, 0x79, 0xc2, 0x00, 0x39, 0x08, 0xb8, 0x69, 0xc2, + 0x01, 0xc3, 0x08, 0xb8, 0x49, 0x12, 0xc0, 0x69, 0xbf, 0x0d, 0x40, 0x69, + 0xc9, 0xc8, 0x91, 0x9a, 0x08, 0xb9, 0xf9, 0x44, 0x00, 0xbb, 0x40, 0x69, + 0xd3, 0xc5, 0x28, 0xee, 0x08, 0xb9, 0xd9, 0xc2, 0x00, 0xc4, 0x08, 0xb9, + 0xd0, 0xc4, 0x26, 0x78, 0x08, 0xb9, 0xc9, 0xc5, 0x06, 0xdb, 0x08, 0xb9, + 0xc1, 0x15, 0xc0, 0x69, 0xe3, 0x08, 0xc0, 0x69, 0xef, 0x16, 0xc0, 0x69, + 0xfb, 0xc3, 0x05, 0x14, 0x08, 0xb9, 0x89, 0xc4, 0x15, 0xe7, 0x08, 0xb9, + 0x80, 0x83, 0x08, 0xb9, 0x03, 0x00, 0x6a, 0x07, 0x91, 0x08, 0xb9, 0x41, + 0x87, 0x08, 0xb9, 0x31, 0x97, 0x08, 0xb9, 0x23, 0x00, 0x6a, 0x17, 0x8b, + 0x08, 0xb9, 0x12, 0x00, 0x6a, 0x1b, 0x0e, 0xc0, 0x6a, 0x1f, 0xc2, 0x00, + 0x39, 0x08, 0xb8, 0xf0, 0xc6, 0x6a, 0xfb, 0x01, 0x08, 0x01, 0xc5, 0xd6, + 0xdc, 0x0f, 0xd4, 0xb8, 0xd3, 0x46, 0x0b, 0x01, 0x03, 0x69, 0xd2, 0x4d, + 0x69, 0x01, 0x03, 0x58, 0xc4, 0x01, 0x96, 0x01, 0x4c, 0xf9, 0xc5, 0x09, + 0x02, 0x00, 0x05, 0xa0, 0x42, 0x00, 0xe3, 0xc0, 0x6a, 0x29, 0xc5, 0xde, + 0x3e, 0x01, 0x1b, 0xd3, 0x00, 0x6a, 0x38, 0xc5, 0x9b, 0xd5, 0x01, 0x1b, + 0xab, 0x00, 0x6a, 0x3e, 0x0b, 0xc0, 0x6a, 0x44, 0xd0, 0x5c, 0xa2, 0x01, + 0x1b, 0xb9, 0x14, 0xc0, 0x6a, 0x53, 0x42, 0x02, 0xae, 0xc0, 0x6a, 0x5f, + 0x06, 0xc0, 0x6a, 0x69, 0x15, 0xc0, 0x6a, 0x7b, 0xc5, 0xd7, 0x8b, 0x01, + 0x1b, 0x61, 0x05, 0xc0, 0x6a, 0x91, 0xd6, 0x31, 0x14, 0x01, 0x1b, 0x49, + 0xcf, 0x64, 0x86, 0x01, 0x1b, 0x41, 0x44, 0x00, 0x49, 0xc0, 0x6a, 0x9d, + 0x44, 0xe1, 0x43, 0xc0, 0x6a, 0xa9, 0xcd, 0x7d, 0xed, 0x01, 0x1a, 0x00, + 0x42, 0x00, 0x79, 0xc0, 0x6a, 0xb5, 0xd8, 0x23, 0x63, 0x00, 0x04, 0xf8, + 0xc7, 0x2d, 0x87, 0x00, 0x01, 0x39, 0xc4, 0x66, 0x29, 0x01, 0x5f, 0x20, + 0xd1, 0x48, 0x11, 0x08, 0x59, 0xc9, 0x47, 0x02, 0x0e, 0x40, 0x6a, 0xc1, + 0xc4, 0x3d, 0xd8, 0x0f, 0x9f, 0xd1, 0xc6, 0x36, 0x23, 0x00, 0x01, 0x30, + 0xca, 0xa7, 0xc4, 0x08, 0x08, 0x11, 0x47, 0x34, 0x2f, 0xc0, 0x6b, 0x42, + 0x19, 0xc0, 0x6b, 0x69, 0xd9, 0x20, 0xc1, 0x08, 0x09, 0xe1, 0xdc, 0x14, + 0xbd, 0x08, 0x09, 0xe9, 0x48, 0x14, 0xc4, 0x40, 0x6b, 0x75, 0x4a, 0x9f, + 0x0e, 0xc0, 0x6b, 0x81, 0xc9, 0xb0, 0x23, 0x0f, 0xca, 0x50, 0xd4, 0x3c, + 0xb4, 0x0f, 0xbd, 0x89, 0xcb, 0x58, 0xc7, 0x0f, 0xbd, 0x21, 0x46, 0x01, + 0xfc, 0xc0, 0x6b, 0xa3, 0x15, 0xc0, 0x6b, 0xaf, 0xd5, 0x34, 0x8e, 0x0f, + 0xbd, 0xe8, 0x43, 0x00, 0x7a, 0xc0, 0x6b, 0xbb, 0xd4, 0x3e, 0x30, 0x0f, + 0x9b, 0xf0, 0xc3, 0x1e, 0x19, 0x01, 0x16, 0x43, 0x00, 0x6b, 0xee, 0x0e, + 0xc0, 0x6b, 0xf4, 0xca, 0x9b, 0xc6, 0x0f, 0x9f, 0xc8, 0xc8, 0x2f, 0x03, + 0x0f, 0xb6, 0x48, 0x8d, 0x0f, 0xab, 0x73, 0x00, 0x6b, 0xfe, 0xc6, 0xc9, + 0xcf, 0x0f, 0xd4, 0x18, 0xcb, 0x95, 0xfb, 0x0f, 0x9c, 0xa8, 0x47, 0x02, + 0x0e, 0xc0, 0x6c, 0x0b, 0x4d, 0x7f, 0x25, 0x40, 0x6c, 0x95, 0x4b, 0x96, + 0x48, 0xc0, 0x6c, 0xa9, 0xc4, 0xae, 0x42, 0x0f, 0x99, 0xe1, 0xc5, 0xd9, + 0x98, 0x0f, 0xa1, 0x08, 0x42, 0x00, 0x3b, 0xc0, 0x6c, 0xd0, 0xc9, 0x95, + 0x84, 0x01, 0x21, 0x10, 0x00, 0xc0, 0x6c, 0xd8, 0xc7, 0xc6, 0xa2, 0x0f, + 0xd6, 0x80, 0xc2, 0x00, 0x81, 0x0f, 0xd4, 0xa9, 0x8d, 0x0f, 0x9f, 0x33, + 0x00, 0x6c, 0xe4, 0xc3, 0x09, 0xe5, 0x0f, 0x9a, 0x60, 0x0e, 0xc0, 0x6c, + 0xea, 0x46, 0x77, 0x20, 0x40, 0x6c, 0xfa, 0xc3, 0x00, 0x3c, 0x0f, 0xcf, + 0xd3, 0x00, 0x6d, 0x30, 0xc5, 0xdb, 0x46, 0x01, 0x35, 0xf1, 0x47, 0xc1, + 0x9a, 0x40, 0x6d, 0x36, 0xc3, 0x09, 0x3b, 0x0f, 0xcd, 0x09, 0xde, 0x0f, + 0xd6, 0x0f, 0x9f, 0xc0, 0x00, 0x40, 0x6d, 0x48, 0x47, 0x02, 0x0e, 0xc0, + 0x6d, 0x60, 0x42, 0x00, 0x99, 0xc0, 0x6d, 0xa5, 0xc7, 0xc0, 0x3c, 0x05, + 0x37, 0x91, 0xc9, 0x11, 0xf6, 0x05, 0x37, 0x99, 0xc9, 0xa8, 0x55, 0x05, + 0x37, 0xb1, 0xcd, 0x2c, 0xb2, 0x05, 0x37, 0xb8, 0x0d, 0xc0, 0x6d, 0xaf, + 0xcb, 0x93, 0x25, 0x0f, 0xa1, 0x59, 0xc2, 0x00, 0x45, 0x0f, 0xca, 0x98, + 0x43, 0x40, 0x85, 0xc0, 0x6d, 0xbd, 0xc4, 0xcd, 0x51, 0x0f, 0xa8, 0x59, + 0x8a, 0x0f, 0xb6, 0x02, 0x00, 0x6d, 0xd9, 0x00, 0xc0, 0x6d, 0xdf, 0xc8, + 0xbd, 0xc2, 0x0f, 0xa4, 0x40, 0xca, 0x9e, 0xc8, 0x0f, 0xb6, 0x21, 0xcb, + 0x90, 0xc8, 0x0f, 0xca, 0xb1, 0xc2, 0x05, 0x03, 0x0f, 0xcb, 0x78, 0xc9, + 0xb3, 0x0e, 0x01, 0x05, 0xf9, 0xc7, 0x82, 0x99, 0x0f, 0xd7, 0x30, 0xc5, + 0xd8, 0xc6, 0x0f, 0x9d, 0x89, 0xc6, 0xd3, 0x97, 0x0f, 0xcf, 0x10, 0xca, + 0xa0, 0x94, 0x0f, 0x9c, 0x11, 0x86, 0x0f, 0xa1, 0x30, 0xcf, 0x61, 0xd4, + 0x01, 0x4f, 0xc9, 0xc7, 0x27, 0x5d, 0x01, 0x4f, 0xc0, 0x87, 0x0f, 0xb5, + 0x91, 0xc3, 0x1d, 0xb1, 0x0f, 0xb5, 0xa0, 0xc3, 0x00, 0x5f, 0x0f, 0xcd, + 0x59, 0x44, 0x7c, 0x59, 0xc0, 0x6d, 0xeb, 0xca, 0x9d, 0xba, 0x0f, 0xa4, + 0x99, 0xd0, 0x57, 0x82, 0x0f, 0x9e, 0xb1, 0x14, 0xc0, 0x6e, 0x03, 0xc2, + 0x05, 0x26, 0x0f, 0xd6, 0xc0, 0xc9, 0xac, 0x45, 0x01, 0x19, 0x63, 0x00, + 0x6e, 0x0f, 0x45, 0xb1, 0x74, 0xc0, 0x6e, 0x15, 0x16, 0x40, 0x6e, 0x47, + 0x00, 0xc0, 0x6e, 0x53, 0xc8, 0xbd, 0xaa, 0x0f, 0xb6, 0x70, 0xc4, 0x0b, + 0xcb, 0x01, 0x13, 0x61, 0xc7, 0x00, 0x90, 0x01, 0x09, 0xb0, 0xc5, 0xb2, + 0x39, 0x0f, 0x9b, 0xd1, 0xc3, 0x0f, 0xed, 0x0f, 0xd5, 0x90, 0xc3, 0xe6, + 0x11, 0x0f, 0xcc, 0x58, 0xc5, 0x00, 0xef, 0x0f, 0xb4, 0x79, 0x16, 0x40, + 0x6e, 0x65, 0xc4, 0xdf, 0x87, 0x01, 0x2e, 0x71, 0xc2, 0x00, 0x3d, 0x01, + 0x01, 0x13, 0x00, 0x6e, 0x71, 0xc4, 0x2a, 0xcc, 0x0f, 0xab, 0x5a, 0x00, + 0x6e, 0x77, 0x46, 0x77, 0x20, 0x40, 0x6e, 0x7d, 0x4b, 0x6f, 0xc7, 0xc0, + 0x6e, 0x95, 0x47, 0x02, 0x0e, 0x40, 0x6e, 0x9d, 0xc4, 0x4c, 0x31, 0x0f, + 0xce, 0x59, 0x95, 0x0f, 0xd7, 0x38, 0x06, 0xc0, 0x6e, 0xfb, 0x42, 0x00, + 0x07, 0xc0, 0x6f, 0x07, 0xc2, 0x00, 0x3b, 0x0f, 0xcf, 0x88, 0x0b, 0xc0, + 0x6f, 0x11, 0x44, 0xdf, 0xf3, 0x40, 0x6f, 0x1b, 0x44, 0x9b, 0x5b, 0xc0, + 0x6f, 0x3b, 0xc8, 0xbf, 0x92, 0x0f, 0xc8, 0x71, 0xc5, 0xdd, 0x3f, 0x0f, + 0xcb, 0x31, 0xc2, 0x00, 0x7a, 0x0f, 0xcf, 0xc8, 0x03, 0xc0, 0x6f, 0x4d, + 0xc2, 0x00, 0x5f, 0x00, 0x16, 0xc0, 0x09, 0xc0, 0x6f, 0x5d, 0x0d, 0xc0, + 0x6f, 0x6f, 0x03, 0xc0, 0x6f, 0x92, 0x15, 0xc0, 0x6f, 0xa4, 0x06, 0xc0, + 0x6f, 0xc1, 0x1b, 0xc0, 0x6f, 0xd1, 0x08, 0xc0, 0x6f, 0xdb, 0x42, 0x11, + 0xee, 0xc0, 0x6f, 0xed, 0x0b, 0xc0, 0x6f, 0xff, 0x07, 0xc0, 0x70, 0x0f, + 0x0f, 0xc0, 0x70, 0x31, 0x16, 0xc0, 0x70, 0x3d, 0x0e, 0xc0, 0x70, 0x4f, + 0x11, 0xc0, 0x70, 0x59, 0x12, 0xc0, 0x70, 0x71, 0xcc, 0x87, 0x5d, 0x0e, + 0x83, 0x51, 0x42, 0x02, 0x41, 0xc0, 0x70, 0x87, 0xc4, 0xc6, 0xc9, 0x0e, + 0x82, 0x01, 0x14, 0x40, 0x70, 0x93, 0xc4, 0x26, 0x78, 0x08, 0xe3, 0x13, + 0x00, 0x70, 0x9f, 0xc5, 0x06, 0xdb, 0x08, 0xe3, 0x0b, 0x00, 0x70, 0xa5, + 0x15, 0xc0, 0x70, 0xa9, 0x08, 0xc0, 0x70, 0xbb, 0x16, 0xc0, 0x70, 0xc3, + 0xc3, 0x05, 0x14, 0x08, 0xe2, 0xd0, 0x45, 0x09, 0x98, 0xc0, 0x70, 0xd1, + 0xcb, 0x97, 0xf5, 0x08, 0xe2, 0x11, 0xc4, 0x19, 0x53, 0x08, 0xe2, 0x08, + 0x9f, 0x08, 0xe2, 0x29, 0x9e, 0x08, 0xe2, 0x20, 0x03, 0xc0, 0x70, 0xf5, + 0x42, 0x07, 0xb2, 0xc0, 0x71, 0x01, 0xcb, 0x1e, 0x89, 0x08, 0xe1, 0xe0, + 0x03, 0xc0, 0x71, 0x0d, 0x91, 0x08, 0xe1, 0xd1, 0x87, 0x08, 0xe1, 0xc1, + 0x48, 0xb2, 0x2d, 0xc0, 0x71, 0x19, 0x97, 0x08, 0xe1, 0x93, 0x00, 0x71, + 0x24, 0x8b, 0x08, 0xe1, 0x82, 0x00, 0x71, 0x28, 0xc2, 0x00, 0xd0, 0x08, + 0xe1, 0x71, 0x15, 0xc0, 0x71, 0x2c, 0x18, 0xc0, 0x71, 0x3c, 0xc2, 0x00, + 0xdb, 0x08, 0xe1, 0x49, 0xc2, 0x00, 0x39, 0x08, 0xe1, 0x41, 0xc2, 0x19, + 0x2c, 0x08, 0xe1, 0x39, 0xc2, 0x01, 0xc3, 0x08, 0xe1, 0x31, 0x04, 0xc0, + 0x71, 0x46, 0x12, 0xc0, 0x71, 0x50, 0x10, 0xc0, 0x71, 0x5a, 0x06, 0xc0, + 0x71, 0x70, 0x16, 0xc0, 0x71, 0x7e, 0x0c, 0xc0, 0x71, 0x8c, 0x05, 0xc0, + 0x71, 0x96, 0x09, 0xc0, 0x71, 0xa0, 0x0d, 0xc0, 0x71, 0xaa, 0x83, 0x08, + 0xe0, 0x03, 0x00, 0x71, 0xb4, 0x91, 0x08, 0xe0, 0x61, 0x87, 0x08, 0xe0, + 0x51, 0x97, 0x08, 0xe0, 0x23, 0x00, 0x71, 0xc0, 0x8b, 0x08, 0xe0, 0x12, + 0x00, 0x71, 0xc4, 0x43, 0x00, 0x29, 0xc0, 0x71, 0xc8, 0x00, 0x40, 0x71, + 0xf6, 0x45, 0x00, 0x2c, 0xc0, 0x72, 0x15, 0x44, 0x00, 0x49, 0xc0, 0x72, + 0x21, 0x06, 0x40, 0x72, 0x2b, 0xdb, 0x18, 0x6f, 0x01, 0x3f, 0x00, 0xc2, + 0x00, 0xbf, 0x01, 0x11, 0x43, 0x00, 0x72, 0x3d, 0xc3, 0x02, 0x9b, 0x01, + 0x11, 0x3a, 0x00, 0x72, 0x41, 0xcd, 0x7e, 0xa3, 0x0f, 0xa8, 0x79, 0x4a, + 0xa0, 0x1c, 0x40, 0x72, 0x47, 0xc6, 0x02, 0x0e, 0x0f, 0xa4, 0x61, 0xc5, + 0xd6, 0x05, 0x0f, 0x9f, 0x48, 0xca, 0x9b, 0x44, 0x0f, 0xcf, 0xa1, 0xc2, + 0x11, 0xa5, 0x0f, 0xd5, 0xb8, 0x00, 0xc0, 0x72, 0x53, 0x46, 0x01, 0x4a, + 0xc0, 0x72, 0xa2, 0x02, 0x40, 0x72, 0xe9, 0xc7, 0xc8, 0x3f, 0x0f, 0xcb, + 0x61, 0xd3, 0x45, 0x01, 0x0f, 0x9a, 0x18, 0xc4, 0x0b, 0x66, 0x0f, 0xa0, + 0x30, 0x4b, 0x37, 0x43, 0xc0, 0x73, 0x05, 0xd8, 0x24, 0xe3, 0x01, 0x16, + 0xd1, 0x45, 0x00, 0x8c, 0xc0, 0x73, 0x11, 0x11, 0xc0, 0x73, 0x23, 0x03, + 0xc0, 0x73, 0x2f, 0xc4, 0x00, 0xba, 0x00, 0x01, 0xe1, 0xcf, 0x69, 0x18, + 0x01, 0x55, 0x32, 0x00, 0x73, 0x3b, 0x47, 0x02, 0x0e, 0xc0, 0x73, 0x41, + 0x46, 0x09, 0x97, 0xc0, 0x73, 0x99, 0x4c, 0x11, 0xe2, 0xc0, 0x73, 0xbd, + 0x15, 0xc0, 0x73, 0xcd, 0x4f, 0x30, 0x90, 0xc0, 0x73, 0xd9, 0x4b, 0x6f, + 0xc7, 0x40, 0x73, 0xfb, 0x42, 0x00, 0x2f, 0xc0, 0x74, 0x17, 0xd6, 0x21, + 0x9d, 0x0f, 0xb3, 0x90, 0x47, 0x02, 0x0e, 0xc0, 0x74, 0x24, 0x4c, 0x11, + 0xe2, 0x40, 0x74, 0x9a, 0x07, 0xc0, 0x74, 0xa6, 0x0d, 0x40, 0x74, 0xb0, + 0x43, 0xb6, 0x2f, 0xc0, 0x74, 0xbc, 0xd3, 0x44, 0x1d, 0x01, 0x96, 0x78, + 0xc4, 0x1e, 0xf2, 0x0f, 0xa4, 0x20, 0xcf, 0x63, 0xe1, 0x08, 0x49, 0xf9, + 0x47, 0x02, 0x0e, 0x40, 0x74, 0xde, 0x83, 0x08, 0x14, 0x03, 0x00, 0x75, + 0x40, 0x87, 0x08, 0x14, 0x0b, 0x00, 0x75, 0x44, 0x84, 0x08, 0x14, 0x13, + 0x00, 0x75, 0x48, 0x89, 0x08, 0x14, 0x21, 0x86, 0x08, 0x14, 0x29, 0x8b, + 0x08, 0x14, 0x31, 0x99, 0x08, 0x14, 0x39, 0x9c, 0x08, 0x14, 0x41, 0x96, + 0x08, 0x14, 0xbb, 0x00, 0x75, 0x4c, 0x8c, 0x08, 0x14, 0x51, 0x8d, 0x08, + 0x14, 0x5b, 0x00, 0x75, 0x54, 0x93, 0x08, 0x14, 0x61, 0x8e, 0x08, 0x14, + 0x69, 0x8f, 0x08, 0x14, 0x73, 0x00, 0x75, 0x58, 0x90, 0x08, 0x14, 0x7b, + 0x00, 0x75, 0x5c, 0x97, 0x08, 0x14, 0x91, 0x92, 0x08, 0x14, 0x99, 0x94, + 0x08, 0x14, 0xa9, 0x95, 0x08, 0x14, 0xb1, 0x8a, 0x08, 0x14, 0xd9, 0x9a, + 0x08, 0x14, 0xe0, 0x42, 0x09, 0x3b, 0xc0, 0x75, 0x60, 0xc6, 0x8f, 0xfc, + 0x01, 0x05, 0xf0, 0x15, 0xc0, 0x75, 0x6d, 0x47, 0x02, 0x0e, 0xc0, 0x75, + 0x79, 0x05, 0xc0, 0x75, 0xc9, 0x52, 0x48, 0xc5, 0x40, 0x75, 0xd5, 0x00, + 0x40, 0x75, 0xeb, 0xc2, 0x05, 0x03, 0x0f, 0x9f, 0xb9, 0xc5, 0xd8, 0x71, + 0x0f, 0xcb, 0xe0, 0xc8, 0xbc, 0x7a, 0x0f, 0xa0, 0xf1, 0xc3, 0x01, 0xe5, + 0x0f, 0xd4, 0xe0, 0x47, 0x02, 0x0e, 0xc0, 0x75, 0xf7, 0xc8, 0x22, 0x83, + 0x00, 0x75, 0x79, 0x4b, 0x6f, 0xc7, 0xc0, 0x76, 0x4e, 0x15, 0xc0, 0x76, + 0x7b, 0xc5, 0xdc, 0x54, 0x00, 0x76, 0x31, 0x49, 0xb2, 0x63, 0xc0, 0x76, + 0x87, 0xd1, 0x52, 0xaa, 0x00, 0x76, 0x61, 0xc9, 0xae, 0x97, 0x00, 0x76, + 0x69, 0x46, 0x09, 0x97, 0xc0, 0x76, 0x97, 0x43, 0x60, 0xe8, 0x40, 0x76, + 0xbb, 0x46, 0x00, 0x2c, 0xc0, 0x76, 0xc7, 0x45, 0x00, 0x49, 0xc0, 0x76, + 0xef, 0x44, 0x02, 0x9b, 0xc0, 0x77, 0x0b, 0x45, 0x01, 0xce, 0xc0, 0x77, + 0x15, 0xce, 0x6b, 0x9c, 0x01, 0x38, 0x09, 0x44, 0x05, 0x14, 0xc0, 0x77, + 0x30, 0x16, 0xc0, 0x77, 0x3c, 0xd2, 0x4a, 0x75, 0x0f, 0xdc, 0x21, 0xd3, + 0x3f, 0xe2, 0x0f, 0xdc, 0x30, 0x46, 0x01, 0xfc, 0xc0, 0x77, 0x48, 0x16, + 0xc0, 0x77, 0x5a, 0x15, 0xc0, 0x77, 0x66, 0xd0, 0x58, 0x62, 0x0f, 0xc1, + 0xe9, 0xd1, 0x56, 0xd9, 0x0f, 0xc1, 0xa9, 0x03, 0xc0, 0x77, 0x72, 0xcf, + 0x61, 0x4d, 0x01, 0x3f, 0x81, 0x06, 0xc0, 0x77, 0x81, 0xcd, 0x7c, 0xa8, + 0x01, 0x0e, 0x41, 0x0a, 0xc0, 0x77, 0x8d, 0xc6, 0xca, 0xa3, 0x0f, 0xb3, + 0x69, 0x46, 0x04, 0x8f, 0x40, 0x77, 0x99, 0x46, 0x03, 0x13, 0xc0, 0x77, + 0xa5, 0x4e, 0x6c, 0xfa, 0xc0, 0x77, 0xb1, 0xcc, 0x4e, 0x35, 0x0f, 0xa9, + 0xd1, 0xd1, 0x56, 0x2f, 0x0f, 0xb7, 0x31, 0xc8, 0x2e, 0x20, 0x0f, 0xb7, + 0x38, 0xc4, 0x32, 0xbc, 0x01, 0x15, 0x2b, 0x00, 0x77, 0xbd, 0x45, 0x01, + 0xa2, 0xc0, 0x77, 0xc3, 0xd7, 0x27, 0xfe, 0x01, 0x17, 0x81, 0x45, 0x11, + 0x17, 0xc0, 0x77, 0xd2, 0xc9, 0xb2, 0xea, 0x01, 0x4b, 0xf1, 0x45, 0x01, + 0x5d, 0x40, 0x77, 0xf9, 0xc9, 0xb0, 0xd7, 0x0f, 0xcc, 0x21, 0xd7, 0x1f, + 0x33, 0x01, 0x33, 0x91, 0xc2, 0x00, 0x45, 0x01, 0x11, 0x53, 0x00, 0x78, + 0x05, 0x16, 0x40, 0x78, 0x09, 0xc8, 0x9c, 0xae, 0x01, 0x1c, 0x61, 0xc5, + 0xb9, 0x85, 0x01, 0x01, 0xf8, 0xc9, 0xac, 0x4e, 0x01, 0x37, 0x89, 0xcf, + 0x6a, 0x62, 0x01, 0x30, 0xa0, 0x03, 0xc0, 0x78, 0x15, 0xc4, 0x93, 0xa9, + 0x08, 0x1c, 0x09, 0x09, 0xc0, 0x78, 0x21, 0x0d, 0xc0, 0x78, 0x2d, 0x06, + 0xc0, 0x78, 0x39, 0xc2, 0x01, 0x23, 0x08, 0x1c, 0x2b, 0x00, 0x78, 0x45, + 0xc2, 0x02, 0xa0, 0x08, 0x1c, 0x31, 0x1c, 0xc0, 0x78, 0x4b, 0x16, 0xc0, + 0x78, 0x55, 0xc3, 0x4a, 0xb9, 0x08, 0x1c, 0x51, 0x15, 0xc0, 0x78, 0x65, + 0xc5, 0xdd, 0x99, 0x08, 0x1c, 0x69, 0xc3, 0x00, 0x4e, 0x08, 0x1c, 0x71, + 0xc3, 0x20, 0x18, 0x08, 0x1c, 0x81, 0xc2, 0x05, 0x1c, 0x08, 0x1c, 0xa1, + 0xc4, 0xe4, 0x97, 0x08, 0x1c, 0xb1, 0xc5, 0xd5, 0xec, 0x08, 0x1c, 0xb9, + 0x8b, 0x08, 0x1c, 0xd9, 0x97, 0x08, 0x1c, 0xe0, 0x43, 0x11, 0x3c, 0xc0, + 0x78, 0x75, 0x06, 0xc0, 0x78, 0xd1, 0x14, 0x40, 0x78, 0xe0, 0xc7, 0xc9, + 0xab, 0x0f, 0xb4, 0x09, 0x0f, 0xc0, 0x78, 0xec, 0xd7, 0x26, 0x8e, 0x01, + 0x5f, 0xf8, 0x14, 0xc0, 0x78, 0xf8, 0x0a, 0xc0, 0x79, 0x16, 0x10, 0xc0, + 0x79, 0x34, 0x0d, 0xc0, 0x79, 0x58, 0x42, 0x28, 0x5b, 0xc0, 0x79, 0x76, + 0x42, 0x01, 0x99, 0xc0, 0x79, 0x82, 0x42, 0x36, 0xa2, 0xc0, 0x79, 0x9a, + 0x42, 0x2f, 0xf9, 0xc0, 0x79, 0xae, 0x42, 0x14, 0x7d, 0xc0, 0x79, 0xbe, + 0x19, 0xc0, 0x79, 0xd0, 0x1b, 0xc0, 0x79, 0xe8, 0x0f, 0xc0, 0x79, 0xfa, + 0x16, 0xc0, 0x7a, 0x18, 0x15, 0x40, 0x7a, 0x36, 0xd7, 0x27, 0x01, 0x01, + 0x15, 0xc9, 0x84, 0x0f, 0x99, 0xf8, 0x0e, 0xc0, 0x7a, 0x54, 0x12, 0xc0, + 0x7a, 0x60, 0xcc, 0x8a, 0x99, 0x00, 0x2f, 0x79, 0x45, 0x01, 0xc3, 0xc0, + 0x7a, 0x6c, 0x47, 0x26, 0x6b, 0x40, 0x7a, 0x7e, 0x16, 0xc0, 0x7a, 0xc8, + 0x06, 0xc0, 0x7a, 0xd4, 0xce, 0x6f, 0x00, 0x02, 0x6e, 0x19, 0x19, 0xc0, + 0x7a, 0xe8, 0x42, 0x00, 0x99, 0xc0, 0x7a, 0xf4, 0xd0, 0x5a, 0xb2, 0x02, + 0x6e, 0x39, 0x15, 0xc0, 0x7a, 0xfe, 0x12, 0xc0, 0x7b, 0x10, 0x08, 0xc0, + 0x7b, 0x22, 0x09, 0xc0, 0x7b, 0x2e, 0x42, 0x00, 0xa2, 0xc0, 0x7b, 0x38, + 0xca, 0xa3, 0xa0, 0x02, 0x6e, 0x79, 0x03, 0xc0, 0x7b, 0x44, 0x04, 0xc0, + 0x7b, 0x56, 0x42, 0x01, 0x19, 0xc0, 0x7b, 0x68, 0x42, 0x00, 0x74, 0xc0, + 0x7b, 0x72, 0x11, 0xc0, 0x7b, 0x82, 0xca, 0xa5, 0x6c, 0x02, 0x6f, 0xd8, + 0x48, 0x01, 0x6b, 0xc0, 0x7b, 0x8e, 0xc2, 0x00, 0x40, 0x0f, 0xa0, 0x72, + 0x00, 0x7b, 0xb4, 0x00, 0xc0, 0x7b, 0xb8, 0xc2, 0x05, 0x03, 0x0f, 0x9f, + 0x40, 0xc6, 0xc6, 0xf0, 0x01, 0x18, 0xdb, 0x00, 0x7b, 0xd0, 0xc2, 0x00, + 0x40, 0x01, 0x18, 0x12, 0x00, 0x7b, 0xd6, 0xd9, 0x1f, 0x7c, 0x0f, 0xb3, + 0x43, 0x00, 0x7b, 0xda, 0x87, 0x0f, 0xab, 0x98, 0xc4, 0x49, 0x2a, 0x0f, + 0x9b, 0x79, 0xc3, 0xb2, 0x36, 0x0f, 0xa0, 0xe8, 0x15, 0xc0, 0x7b, 0xe0, + 0xc3, 0x2f, 0x1e, 0x0f, 0xa9, 0x43, 0x00, 0x7b, 0xea, 0xc6, 0xcb, 0x15, + 0x0f, 0x9a, 0xa0, 0x06, 0xc0, 0x7b, 0xf0, 0x4d, 0x7f, 0xf5, 0xc0, 0x7c, + 0x02, 0x45, 0xdb, 0x2d, 0xc0, 0x7c, 0x20, 0x09, 0x40, 0x7c, 0x32, 0xc6, + 0x40, 0x87, 0x01, 0x00, 0x51, 0xc3, 0x23, 0x08, 0x0f, 0xa4, 0x38, 0x44, + 0xc7, 0xf4, 0xc0, 0x7c, 0x3e, 0xcb, 0x96, 0xb6, 0x0f, 0xa1, 0x18, 0x4c, + 0x1c, 0x86, 0xc0, 0x7c, 0x4a, 0x44, 0x00, 0x49, 0xc0, 0x7c, 0x56, 0x45, + 0x00, 0x2c, 0xc0, 0x7c, 0x62, 0x48, 0xb5, 0x4a, 0xc0, 0x7c, 0x6e, 0x47, + 0xc3, 0x3e, 0xc0, 0x7c, 0x78, 0xd4, 0x3b, 0x24, 0x07, 0xff, 0x41, 0xcd, + 0x1b, 0x41, 0x07, 0xff, 0x51, 0xcf, 0x14, 0x22, 0x07, 0xff, 0x61, 0xcc, + 0x0d, 0xae, 0x07, 0xff, 0x69, 0xcc, 0x0d, 0x9e, 0x07, 0xff, 0x70, 0x02, + 0xc0, 0x7c, 0x84, 0x00, 0x40, 0x7c, 0x93, 0x47, 0x02, 0x0e, 0xc0, 0x7c, + 0x9f, 0xce, 0x1c, 0x92, 0x01, 0x84, 0xe9, 0xd5, 0x34, 0xb8, 0x01, 0x84, + 0xf1, 0xcc, 0x80, 0xe5, 0x01, 0x84, 0xf8, 0xc3, 0x06, 0x19, 0x01, 0x00, + 0x83, 0x00, 0x7c, 0xf7, 0xc9, 0xab, 0x49, 0x01, 0x70, 0x90, 0x42, 0x00, + 0x29, 0xc0, 0x7d, 0x07, 0x47, 0xc7, 0x04, 0x40, 0x7d, 0x13, 0x46, 0x0b, + 0x11, 0xc0, 0x7d, 0x25, 0xc7, 0x00, 0x91, 0x0f, 0xa9, 0x19, 0xc7, 0xc1, + 0x93, 0x0f, 0xa9, 0x10, 0x14, 0xc0, 0x7d, 0x37, 0xc4, 0x1e, 0x43, 0x01, + 0x11, 0x5a, 0x00, 0x7d, 0x56, 0xcd, 0x77, 0xef, 0x01, 0x1c, 0x01, 0x4d, + 0x7a, 0xe1, 0x40, 0x7d, 0x5a, 0xc5, 0x65, 0x44, 0x01, 0x10, 0xf3, 0x00, + 0x7d, 0x66, 0x49, 0x53, 0x89, 0x40, 0x7d, 0x6c, 0x42, 0x01, 0x19, 0xc0, + 0x7d, 0x76, 0x42, 0x00, 0x7a, 0x40, 0x7d, 0x82, 0x0b, 0xc0, 0x7d, 0x8e, + 0xc2, 0x01, 0x0b, 0x00, 0x04, 0x22, 0x00, 0x7d, 0x9a, 0xd3, 0x46, 0x0b, + 0x01, 0x03, 0x61, 0xd2, 0x4d, 0x69, 0x01, 0x03, 0x50, 0xcd, 0x76, 0xeb, + 0x0f, 0xd5, 0x51, 0x44, 0x05, 0x89, 0x40, 0x7d, 0xa0, 0x16, 0xc0, 0x7d, + 0xaf, 0x42, 0x00, 0x06, 0xc0, 0x7d, 0xbb, 0xc5, 0x40, 0x88, 0x01, 0x80, + 0x01, 0x05, 0xc0, 0x7d, 0xc7, 0xc9, 0x11, 0xf6, 0x01, 0x80, 0x11, 0xce, + 0x1c, 0x92, 0x01, 0x80, 0x29, 0xcb, 0x97, 0x87, 0x01, 0x80, 0x39, 0xcf, + 0x66, 0xa2, 0x01, 0x81, 0x51, 0xd0, 0x5a, 0x32, 0x01, 0x81, 0x59, 0xd2, + 0x49, 0x0d, 0x01, 0x81, 0x69, 0xd3, 0x3f, 0xcf, 0x01, 0x81, 0xf1, 0xcf, + 0x64, 0x59, 0x01, 0x81, 0xf9, 0x4b, 0x55, 0xe0, 0x40, 0x7d, 0xd3, 0xc4, + 0x59, 0x33, 0x0f, 0x9b, 0x41, 0xc3, 0xb3, 0x72, 0x0f, 0xce, 0x50, 0xda, + 0x1a, 0x16, 0x01, 0x12, 0x98, 0x4e, 0x70, 0x18, 0x40, 0x7e, 0x09, 0x8f, + 0x0f, 0xd5, 0x89, 0x42, 0x00, 0xa9, 0xc0, 0x7e, 0x1b, 0xc6, 0xd0, 0x1f, + 0x0f, 0xaf, 0xd1, 0xc9, 0xaa, 0x29, 0x0f, 0xb0, 0xf8, 0xc2, 0x00, 0xd1, + 0x0f, 0xa3, 0x4b, 0x00, 0x7e, 0x27, 0xca, 0xa2, 0x38, 0x0f, 0xb5, 0xd0, + 0x00, 0xc0, 0x7e, 0x33, 0xdb, 0x14, 0xd9, 0x01, 0x3d, 0x98, 0xcc, 0x8c, + 0x25, 0x01, 0x33, 0xf9, 0xca, 0x9d, 0x4c, 0x01, 0x31, 0xc0, 0x46, 0x1a, + 0x37, 0xc0, 0x7e, 0x85, 0x46, 0x06, 0x1d, 0xc0, 0x7e, 0x91, 0x4a, 0x03, + 0xc8, 0xc0, 0x7e, 0x9d, 0x4b, 0x03, 0x87, 0xc0, 0x7e, 0xbb, 0x4a, 0x01, + 0x88, 0xc0, 0x7e, 0xd9, 0x48, 0x09, 0x0d, 0x40, 0x7e, 0xf7, 0x06, 0xc0, + 0x7f, 0x15, 0xc7, 0xc2, 0xff, 0x0f, 0x9b, 0xb9, 0xc9, 0xa1, 0x3f, 0x0f, + 0xb0, 0x48, 0x42, 0x00, 0x29, 0xc0, 0x7f, 0x1f, 0xc2, 0x11, 0xee, 0x01, + 0x18, 0xd0, 0x44, 0xcc, 0x6b, 0xc0, 0x7f, 0x29, 0x44, 0x00, 0x74, 0x40, + 0x7f, 0x41, 0x49, 0xb0, 0xfb, 0xc0, 0x7f, 0x4d, 0xc9, 0xae, 0xcd, 0x01, + 0x35, 0x00, 0x42, 0x00, 0x36, 0xc0, 0x7f, 0x6b, 0x44, 0x00, 0x74, 0xc0, + 0x7f, 0x7b, 0x42, 0x00, 0x5d, 0x40, 0x7f, 0x8d, 0xd3, 0x3f, 0x96, 0x0f, + 0x98, 0xa1, 0xd4, 0x39, 0x08, 0x0f, 0x98, 0x90, 0xda, 0x14, 0xa3, 0x01, + 0x3d, 0xe1, 0xc4, 0x03, 0x30, 0x0f, 0xa4, 0x90, 0xda, 0x1b, 0x9c, 0x01, + 0x08, 0xc1, 0xca, 0x9b, 0x08, 0x0f, 0x9e, 0x58, 0xc4, 0x00, 0x87, 0x0f, + 0xb1, 0x49, 0xc8, 0x1d, 0x3c, 0x0f, 0xb2, 0x00, 0xcb, 0x98, 0xc6, 0x01, + 0x12, 0x01, 0xc3, 0x1e, 0xcf, 0x0f, 0xa9, 0x39, 0xc6, 0xcf, 0xf5, 0x0f, + 0xc9, 0xe0, 0x44, 0x00, 0x74, 0x40, 0x7f, 0x99, 0xc5, 0xda, 0xd8, 0x0f, + 0xcd, 0x49, 0x16, 0xc0, 0x7f, 0xab, 0xc9, 0xb1, 0x82, 0x01, 0x37, 0x98, + 0xc9, 0x1c, 0xaa, 0x01, 0x3b, 0x31, 0xc3, 0x00, 0x28, 0x01, 0x34, 0xc3, + 0x00, 0x7f, 0xbd, 0xc8, 0x31, 0xd1, 0x0f, 0xa5, 0xf0, 0xc9, 0xb1, 0x5e, + 0x01, 0x34, 0xe1, 0xca, 0x9b, 0x4e, 0x0f, 0xa5, 0x50, 0x14, 0xc0, 0x7f, + 0xc3, 0xc5, 0x03, 0x0a, 0x01, 0x37, 0x90, 0xc3, 0x4c, 0xa1, 0x01, 0x15, + 0x49, 0xc4, 0x63, 0xf2, 0x01, 0x10, 0x01, 0x0d, 0xc0, 0x7f, 0xd3, 0xc6, + 0xb7, 0xfc, 0x00, 0x00, 0x61, 0xcb, 0x90, 0xd3, 0x0f, 0xcb, 0x00, 0xc6, + 0xb9, 0xbc, 0x0f, 0xa3, 0x18, 0xc2, 0x2e, 0x0e, 0x0f, 0x98, 0x08, 0x42, + 0x00, 0x5d, 0xc0, 0x7f, 0xe8, 0xcb, 0x8e, 0xad, 0x01, 0x09, 0xd9, 0xc4, + 0x89, 0x7c, 0x0f, 0x9f, 0x68, 0xc7, 0x43, 0xb7, 0x0f, 0xa7, 0x01, 0xc4, + 0xd7, 0xa5, 0x0f, 0xad, 0xb8, 0x0e, 0xc0, 0x80, 0x0a, 0xc4, 0xe2, 0x0b, + 0x0f, 0xce, 0x30, 0xca, 0x90, 0x19, 0x0f, 0xcb, 0xb1, 0x46, 0xce, 0x0f, + 0x40, 0x80, 0x16, 0x43, 0x01, 0xe9, 0xc0, 0x80, 0x22, 0xc2, 0x01, 0x48, + 0x01, 0x19, 0x13, 0x00, 0x80, 0x2e, 0xc6, 0x21, 0xfd, 0x0f, 0xa1, 0xc0, + 0x46, 0x12, 0x41, 0xc0, 0x80, 0x34, 0x48, 0xa3, 0xc6, 0x40, 0x80, 0x40, + 0x00, 0xc0, 0x80, 0x52, 0x46, 0x48, 0x65, 0x40, 0x80, 0x6a, 0xc8, 0xba, + 0x52, 0x01, 0x35, 0x89, 0xd1, 0x57, 0x50, 0x01, 0x03, 0x08, 0x9b, 0x01, + 0x37, 0xa1, 0xc8, 0xb6, 0xd2, 0x0f, 0x9d, 0x08, 0xc8, 0x1b, 0xc8, 0x01, + 0x32, 0x01, 0xd7, 0x26, 0x77, 0x00, 0x05, 0x50, 0xc9, 0xa8, 0xa6, 0x0f, + 0xb1, 0x41, 0xc4, 0x14, 0xdd, 0x0f, 0xd5, 0xb0, 0x43, 0x14, 0xcf, 0xc0, + 0x80, 0xca, 0x87, 0x0f, 0xa9, 0x2a, 0x00, 0x80, 0xdf, 0x8a, 0x0f, 0xa0, + 0xfb, 0x00, 0x80, 0xf1, 0xcd, 0x7f, 0x9a, 0x0f, 0xa2, 0x50, 0xcb, 0x05, + 0x1c, 0x01, 0x02, 0xc9, 0xc4, 0x01, 0xc3, 0x01, 0x71, 0x68, 0xc4, 0x0e, + 0x9a, 0x01, 0x00, 0x91, 0xc5, 0x40, 0x88, 0x01, 0x00, 0x38, 0x42, 0x00, + 0x5d, 0xc0, 0x81, 0x03, 0x42, 0x00, 0x47, 0x40, 0x81, 0x15, 0xc5, 0x15, + 0x2d, 0x0f, 0xd5, 0x48, 0x46, 0x56, 0x32, 0xc0, 0x81, 0x21, 0xc6, 0x44, + 0xfb, 0x01, 0x05, 0x29, 0xc6, 0xd0, 0x67, 0x0f, 0x98, 0x60, 0x47, 0x02, + 0x0e, 0xc0, 0x81, 0x2d, 0x45, 0x2b, 0x5f, 0xc0, 0x81, 0x87, 0x4b, 0x6f, + 0xc7, 0xc0, 0x81, 0x9f, 0x45, 0x00, 0xba, 0x40, 0x81, 0xe6, 0x00, 0xc0, + 0x81, 0xf8, 0x11, 0x40, 0x82, 0x04, 0xd8, 0x22, 0x73, 0x01, 0x17, 0x79, + 0x44, 0x04, 0xce, 0x40, 0x82, 0x1c, 0x42, 0x11, 0xa5, 0xc0, 0x82, 0x28, + 0x0b, 0xc0, 0x82, 0x32, 0x9b, 0x01, 0x4f, 0xf8, 0xc3, 0x03, 0x2a, 0x0f, + 0xcd, 0xf1, 0xc3, 0x36, 0x44, 0x0f, 0xcd, 0xf8, 0x0b, 0xc0, 0x82, 0x44, + 0x49, 0xb2, 0xb4, 0x40, 0x82, 0x50, 0x91, 0x0f, 0xb4, 0x39, 0x45, 0x05, + 0x88, 0x40, 0x82, 0x70, 0x4b, 0x94, 0xb1, 0xc0, 0x82, 0x8c, 0xd7, 0x28, + 0x15, 0x0f, 0xaa, 0x71, 0xc8, 0x2f, 0x03, 0x0f, 0xb5, 0xc8, 0xc4, 0x5d, + 0x24, 0x01, 0x31, 0xf9, 0x46, 0xcc, 0xef, 0xc0, 0x82, 0x9e, 0xc6, 0x18, + 0x8e, 0x0f, 0xce, 0xe0, 0x46, 0xd3, 0x7f, 0xc0, 0x82, 0xaa, 0xc9, 0xab, + 0x2e, 0x0f, 0x9a, 0xb0, 0x46, 0x09, 0x97, 0xc0, 0x82, 0xbf, 0x03, 0xc0, + 0x82, 0xe3, 0x18, 0xc0, 0x82, 0xf5, 0x0e, 0xc0, 0x83, 0x01, 0xd4, 0x3d, + 0x04, 0x05, 0x57, 0xa1, 0xd8, 0x24, 0x53, 0x05, 0x57, 0x99, 0x46, 0xcc, + 0x3b, 0x40, 0x83, 0x0d, 0xc2, 0x00, 0x45, 0x0f, 0x9a, 0x41, 0xc9, 0x85, + 0xc8, 0x0f, 0xd7, 0x00, 0x42, 0x00, 0xbf, 0xc0, 0x83, 0x19, 0xcd, 0x73, + 0xd1, 0x0f, 0xc9, 0xb0, 0x42, 0x00, 0x84, 0xc0, 0x83, 0x29, 0xc2, 0x00, + 0x8e, 0x0f, 0xa2, 0x21, 0xc2, 0x00, 0x40, 0x0f, 0xa0, 0x0a, 0x00, 0x83, + 0x38, 0x11, 0xc0, 0x83, 0x3c, 0x47, 0xbf, 0xfd, 0xc0, 0x83, 0x4e, 0x42, + 0x17, 0x28, 0xc0, 0x83, 0x9d, 0xc3, 0x19, 0x2a, 0x0f, 0xa0, 0x92, 0x00, + 0x83, 0xa7, 0x0b, 0xc0, 0x83, 0xad, 0x07, 0xc0, 0x83, 0xb7, 0xcb, 0x8c, + 0xc9, 0x01, 0x50, 0x50, 0xc8, 0xbb, 0x6a, 0x0f, 0xaf, 0x81, 0x42, 0x00, + 0xbd, 0x40, 0x83, 0xc3, 0x87, 0x0f, 0xaa, 0x61, 0xc3, 0x57, 0xb3, 0x0f, + 0xcc, 0xf8, 0x00, 0x40, 0x83, 0xcf, 0x4a, 0x4c, 0x94, 0xc0, 0x83, 0xdb, + 0xc7, 0xc3, 0x06, 0x0f, 0xce, 0x48, 0xc4, 0x26, 0x78, 0x0e, 0x97, 0x4b, + 0x00, 0x84, 0x07, 0x07, 0xc0, 0x84, 0x0d, 0x15, 0xc0, 0x84, 0x1c, 0x08, + 0xc0, 0x84, 0x2e, 0x16, 0xc0, 0x84, 0x3b, 0xc3, 0x05, 0x14, 0x0e, 0x97, + 0x09, 0xc4, 0x15, 0xe7, 0x0e, 0x97, 0x00, 0xce, 0x6f, 0x0e, 0x08, 0xf7, + 0xc1, 0xca, 0xa1, 0x16, 0x08, 0xf7, 0xb9, 0x4b, 0x6f, 0xc7, 0xc0, 0x84, + 0x49, 0xc5, 0xcd, 0xfd, 0x08, 0xf7, 0x91, 0x47, 0x02, 0x0e, 0x40, 0x84, + 0x59, 0x4b, 0x99, 0x13, 0xc0, 0x84, 0xb5, 0xcd, 0x7a, 0xee, 0x0f, 0x8d, + 0x69, 0xd8, 0x21, 0xe3, 0x00, 0x05, 0xd1, 0xc6, 0xc3, 0xd9, 0x01, 0x81, + 0xe0, 0x45, 0x45, 0x76, 0xc0, 0x84, 0xcf, 0xcc, 0x88, 0x41, 0x01, 0x35, + 0x69, 0xd1, 0x55, 0x63, 0x0f, 0xca, 0x58, 0xca, 0x9f, 0x36, 0x01, 0x39, + 0x01, 0x42, 0x00, 0x5d, 0xc0, 0x84, 0xeb, 0x47, 0xb3, 0xd6, 0x40, 0x84, + 0xfd, 0xd6, 0x2f, 0x04, 0x01, 0x37, 0x79, 0xc7, 0xc1, 0x5b, 0x0f, 0x9a, + 0x08, 0xc7, 0x61, 0xfa, 0x01, 0x05, 0xe1, 0x48, 0xbe, 0xba, 0xc0, 0x85, + 0x25, 0x00, 0xc0, 0x85, 0x43, 0xce, 0x6e, 0xac, 0x0f, 0xab, 0x81, 0x45, + 0xd9, 0x7a, 0xc0, 0x85, 0x5b, 0xc2, 0x0f, 0x7b, 0x0f, 0xcb, 0x69, 0xce, + 0x6f, 0xee, 0x0f, 0xcd, 0xe9, 0xc6, 0xcc, 0xad, 0x0f, 0xa2, 0xf0, 0x46, + 0xca, 0xdf, 0xc0, 0x85, 0x79, 0x4a, 0x9d, 0xd8, 0x40, 0x85, 0x87, 0x87, + 0x0f, 0xce, 0xc9, 0xc3, 0x2b, 0x00, 0x0f, 0xcf, 0x91, 0xc7, 0xc9, 0x34, + 0x0f, 0xd4, 0x20, 0x42, 0x00, 0x63, 0xc0, 0x85, 0xcd, 0xc5, 0xd8, 0x5d, + 0x0f, 0x9a, 0x20, 0x0b, 0xc0, 0x85, 0xd7, 0x44, 0x91, 0x02, 0x40, 0x85, + 0xec, 0xcc, 0x07, 0xc7, 0x01, 0x13, 0x59, 0xc9, 0x00, 0xca, 0x01, 0x13, + 0x50, 0xcb, 0x97, 0xf5, 0x0b, 0x53, 0x79, 0xc4, 0x19, 0x53, 0x0b, 0x53, + 0x71, 0x45, 0x09, 0x98, 0x40, 0x85, 0xf8, 0x16, 0xc0, 0x86, 0x1c, 0x14, + 0xc0, 0x86, 0x2c, 0x42, 0x00, 0xd0, 0xc0, 0x86, 0x34, 0xc2, 0x00, 0xdb, + 0x0b, 0x52, 0xdb, 0x00, 0x86, 0x3c, 0x0d, 0xc0, 0x86, 0x40, 0x87, 0x0b, + 0x52, 0xc3, 0x00, 0x86, 0x50, 0xc2, 0x01, 0x4a, 0x0b, 0x52, 0xb9, 0xc3, + 0x04, 0x2e, 0x0b, 0x52, 0xa1, 0x91, 0x0b, 0x52, 0x93, 0x00, 0x86, 0x54, + 0x12, 0xc0, 0x86, 0x5c, 0x10, 0xc0, 0x86, 0x66, 0x0f, 0xc0, 0x86, 0x72, + 0xc3, 0x30, 0x59, 0x0b, 0x52, 0x59, 0xc2, 0x0e, 0x9a, 0x0b, 0x52, 0x2b, + 0x00, 0x86, 0x7e, 0x83, 0x0b, 0x52, 0x31, 0xc2, 0x01, 0x5d, 0x0b, 0x52, + 0x21, 0xc2, 0x42, 0xcd, 0x0b, 0x52, 0x10, 0x44, 0x00, 0xbb, 0xc0, 0x86, + 0x82, 0x46, 0x10, 0x79, 0xc0, 0x86, 0xba, 0x4a, 0x9e, 0x82, 0x40, 0x86, + 0xd6, 0x46, 0x02, 0x0f, 0xc0, 0x86, 0xfa, 0x4f, 0x62, 0x88, 0x40, 0x87, + 0x64, 0xd4, 0x3d, 0xb8, 0x05, 0x53, 0x81, 0xd2, 0x4d, 0x21, 0x05, 0x4f, + 0x30, 0x4f, 0x6a, 0x71, 0xc0, 0x87, 0x76, 0x54, 0x39, 0x80, 0x40, 0x87, + 0x9a, 0xc7, 0xc6, 0xb7, 0x00, 0x81, 0x59, 0x03, 0xc0, 0x87, 0xa6, 0x8b, + 0x00, 0x81, 0x6b, 0x00, 0x87, 0xb1, 0x97, 0x00, 0x81, 0x7b, 0x00, 0x87, + 0xb5, 0x87, 0x00, 0x81, 0x8b, 0x00, 0x87, 0xb9, 0x44, 0xb9, 0x62, 0xc0, + 0x87, 0xbf, 0x48, 0xb2, 0x2d, 0xc0, 0x87, 0xc9, 0x15, 0xc0, 0x87, 0xd7, + 0x52, 0x28, 0x9f, 0xc0, 0x87, 0xe3, 0xcc, 0x89, 0x19, 0x00, 0x83, 0x89, + 0x46, 0xce, 0x5d, 0x40, 0x87, 0xef, 0x0f, 0xc0, 0x87, 0xff, 0xce, 0x6e, + 0x4a, 0x00, 0x84, 0x10, 0xc4, 0x15, 0xe7, 0x00, 0x82, 0x01, 0xc3, 0x05, + 0x14, 0x00, 0x82, 0x09, 0x16, 0xc0, 0x88, 0x0b, 0x08, 0xc0, 0x88, 0x17, + 0x15, 0xc0, 0x88, 0x23, 0xc5, 0x06, 0xdb, 0x00, 0x82, 0x41, 0xc4, 0x26, + 0x78, 0x00, 0x82, 0x48, 0xc7, 0xc6, 0xfd, 0x0f, 0xa8, 0xf9, 0xc5, 0x5b, + 0x0d, 0x01, 0x19, 0x42, 0x00, 0x88, 0x2f, 0x00, 0xc0, 0x88, 0x35, 0x4a, + 0x0d, 0xd8, 0x40, 0x88, 0x53, 0xcb, 0x97, 0x9d, 0x08, 0x85, 0xeb, 0x00, + 0x88, 0x6b, 0x4b, 0x6f, 0xc7, 0xc0, 0x88, 0x71, 0x06, 0xc0, 0x88, 0x91, + 0x15, 0xc0, 0x88, 0x9d, 0xd0, 0x5d, 0x12, 0x08, 0x85, 0xe1, 0xd1, 0x50, + 0xce, 0x08, 0x85, 0xd9, 0x47, 0x02, 0x0e, 0x40, 0x88, 0xa9, 0x45, 0x00, + 0xba, 0xc0, 0x89, 0x10, 0x45, 0x2b, 0x5f, 0xc0, 0x89, 0x1c, 0x46, 0x34, + 0x6f, 0xc0, 0x89, 0x2b, 0x47, 0x02, 0x0e, 0xc0, 0x89, 0x3d, 0x46, 0x09, + 0x97, 0x40, 0x89, 0xa3, 0x45, 0xdb, 0xe6, 0xc0, 0x89, 0xc7, 0x09, 0x40, + 0x89, 0xe5, 0x4c, 0x8c, 0x49, 0xc0, 0x89, 0xf1, 0xc6, 0x92, 0x0c, 0x0b, + 0x7f, 0x20, 0x46, 0x09, 0x97, 0xc0, 0x89, 0xf9, 0x45, 0x00, 0xba, 0xc0, + 0x8a, 0x1d, 0x4b, 0x6f, 0xc7, 0xc0, 0x8a, 0x2f, 0x47, 0x02, 0x0e, 0x40, + 0x8a, 0x49, 0x15, 0xc0, 0x8a, 0xb0, 0xd1, 0x50, 0xce, 0x08, 0x91, 0xe9, + 0x06, 0xc0, 0x8a, 0xbc, 0xce, 0x73, 0x1a, 0x08, 0x91, 0xd1, 0x4b, 0x6f, + 0xc7, 0xc0, 0x8a, 0xc8, 0x47, 0x02, 0x0e, 0x40, 0x8a, 0xdf, 0x15, 0xc0, + 0x8b, 0x42, 0x46, 0x09, 0x97, 0xc0, 0x8b, 0x4e, 0xd4, 0x3a, 0xd4, 0x00, + 0xbe, 0xd9, 0x46, 0x34, 0x6f, 0xc0, 0x8b, 0x72, 0x52, 0x4c, 0xb5, 0xc0, + 0x8b, 0x7e, 0x47, 0x02, 0x0e, 0x40, 0x8b, 0x94, 0x4c, 0x11, 0xe2, 0xc0, + 0x8b, 0xde, 0xd1, 0x53, 0xa9, 0x08, 0x52, 0x41, 0x47, 0x34, 0x2f, 0xc0, + 0x8b, 0xf6, 0x46, 0x09, 0x97, 0xc0, 0x8c, 0x00, 0x18, 0xc0, 0x8c, 0x10, + 0x45, 0x00, 0xba, 0xc0, 0x8c, 0x1c, 0x47, 0x02, 0x0e, 0x40, 0x8c, 0x3a, + 0xc5, 0x01, 0xc2, 0x0f, 0xa4, 0x59, 0x44, 0x00, 0x74, 0x40, 0x8c, 0x90, + 0x16, 0xc0, 0x8c, 0x9f, 0xc3, 0x7c, 0xb4, 0x01, 0x5e, 0xe0, 0x44, 0x03, + 0xda, 0xc0, 0x8c, 0xab, 0xc2, 0x00, 0x29, 0x01, 0x35, 0x90, 0xc6, 0x6b, + 0xc0, 0x0f, 0xa7, 0x81, 0x42, 0x01, 0x31, 0xc0, 0x8c, 0xb7, 0x00, 0xc0, + 0x8c, 0xef, 0x45, 0x02, 0x6d, 0x40, 0x8d, 0x07, 0x44, 0x0d, 0x14, 0xc0, + 0x8d, 0x13, 0x4d, 0x7c, 0x5a, 0x40, 0x8d, 0x2b, 0xc9, 0x2a, 0xec, 0x01, + 0x5e, 0x48, 0xc4, 0x9b, 0xb8, 0x01, 0x1c, 0xc1, 0xc4, 0x02, 0x6d, 0x00, + 0x04, 0x28, 0x03, 0xc0, 0x8d, 0x31, 0x51, 0x54, 0xca, 0xc0, 0x8d, 0x3d, + 0x4e, 0x6f, 0x9a, 0x40, 0x8d, 0x49, 0x48, 0xbd, 0x22, 0x40, 0x8d, 0x55, + 0xc2, 0x00, 0xdb, 0x01, 0x10, 0x39, 0x47, 0xc4, 0xb8, 0x40, 0x8d, 0x6d, + 0xc7, 0x77, 0xc1, 0x01, 0x05, 0x31, 0xc8, 0xb5, 0x62, 0x0f, 0xa4, 0x28, + 0xcc, 0x5f, 0x56, 0x01, 0x03, 0x71, 0xc4, 0xa8, 0x2a, 0x0f, 0x9e, 0xf0, + 0x02, 0xc0, 0x8d, 0x7f, 0xc7, 0xc6, 0x5c, 0x01, 0x56, 0xe8, 0x42, 0x00, + 0x8e, 0xc0, 0x8d, 0x8b, 0xcf, 0x4c, 0x04, 0x01, 0x15, 0x93, 0x00, 0x8d, + 0x95, 0xcd, 0x7e, 0x62, 0x01, 0x05, 0xd8, 0x45, 0x84, 0xa8, 0xc0, 0x8d, + 0x9b, 0x00, 0xc0, 0x8d, 0xab, 0x87, 0x0f, 0xae, 0x42, 0x00, 0x8d, 0xe4, + 0xd9, 0x1f, 0x63, 0x0f, 0xa8, 0xf1, 0xc5, 0x53, 0xf8, 0x01, 0x36, 0xa3, + 0x00, 0x8d, 0xf3, 0x12, 0xc0, 0x8d, 0xf9, 0xcd, 0x80, 0x43, 0x0f, 0xa7, + 0xa9, 0x04, 0xc0, 0x8e, 0x05, 0xce, 0x71, 0x92, 0x0f, 0xb5, 0x68, 0xd0, + 0x5f, 0x52, 0x01, 0x03, 0x79, 0xc8, 0xb8, 0x42, 0x08, 0x0c, 0x70, 0xcc, + 0x8a, 0xa5, 0x0f, 0x0a, 0x71, 0x46, 0x02, 0x0f, 0x40, 0x8e, 0x11, 0xc4, + 0x26, 0x78, 0x0f, 0x0a, 0x49, 0xc5, 0x06, 0xdb, 0x0f, 0x0a, 0x41, 0x15, + 0xc0, 0x8e, 0x93, 0x08, 0xc0, 0x8e, 0x9f, 0x16, 0xc0, 0x8e, 0xab, 0xc3, + 0x05, 0x14, 0x0f, 0x0a, 0x09, 0xc4, 0x15, 0xe7, 0x0f, 0x0a, 0x00, 0xd2, + 0x4a, 0xcf, 0x0f, 0x09, 0xe9, 0x44, 0x00, 0xbb, 0x40, 0x8e, 0xb7, 0x86, + 0x0f, 0x09, 0xb1, 0x89, 0x0f, 0x09, 0xa9, 0x95, 0x0f, 0x09, 0xa1, 0x98, + 0x0f, 0x09, 0x99, 0x8c, 0x0f, 0x09, 0x91, 0x8f, 0x0f, 0x09, 0x89, 0x84, + 0x0f, 0x09, 0x80, 0x4c, 0x8b, 0xad, 0xc0, 0x8e, 0xc3, 0xce, 0x1c, 0x92, + 0x0b, 0x7f, 0x08, 0x44, 0x00, 0x51, 0xc0, 0x8e, 0xcb, 0xc8, 0xab, 0x80, + 0x01, 0x08, 0xb0, 0x4f, 0x6a, 0x80, 0x40, 0x8e, 0xe1, 0xc2, 0x00, 0xbf, + 0x01, 0x16, 0x09, 0xc3, 0x02, 0x9b, 0x01, 0x16, 0x00, 0xc8, 0x60, 0x55, + 0x01, 0x10, 0x89, 0x46, 0x1f, 0x87, 0x40, 0x8e, 0xed, 0xc8, 0x26, 0x58, + 0x01, 0x10, 0x81, 0x47, 0x20, 0x7d, 0x40, 0x8e, 0xf9, 0xca, 0x9e, 0x14, + 0x00, 0x3f, 0xf1, 0xc9, 0xb1, 0xb8, 0x00, 0x3f, 0xe9, 0x45, 0x09, 0x98, + 0x40, 0x8f, 0x0b, 0xc9, 0xb1, 0x55, 0x00, 0x3f, 0xd1, 0xd2, 0x4a, 0x1b, + 0x00, 0x3f, 0xa9, 0x46, 0x02, 0x0f, 0x40, 0x8f, 0x2f, 0xc2, 0x01, 0xc3, + 0x00, 0x3f, 0xc1, 0x47, 0x1d, 0xd4, 0x40, 0x8f, 0xaf, 0xca, 0x9f, 0x2c, + 0x00, 0x3f, 0xb9, 0xc9, 0xac, 0x8d, 0x00, 0x3f, 0xb0, 0xc7, 0xc0, 0xd6, + 0x0f, 0xd3, 0x69, 0xc7, 0xc8, 0x77, 0x0f, 0xd3, 0x39, 0xc8, 0xb9, 0xe2, + 0x0f, 0xd3, 0x41, 0xc8, 0xbb, 0x52, 0x0f, 0xd3, 0x49, 0xc5, 0xa0, 0x85, + 0x0f, 0xd3, 0x51, 0x05, 0x40, 0x8f, 0xc7, 0xc5, 0xa0, 0x85, 0x0f, 0xd3, + 0x19, 0xc7, 0xc8, 0x77, 0x0f, 0xd3, 0x01, 0xc8, 0xb9, 0xe2, 0x0f, 0xd3, + 0x09, 0xc8, 0xbb, 0x52, 0x0f, 0xd3, 0x11, 0x05, 0xc0, 0x8f, 0xd3, 0xc7, + 0xc0, 0xd6, 0x0f, 0xd3, 0x30, 0x4a, 0xa3, 0x46, 0xc0, 0x8f, 0xdf, 0x5a, + 0x1a, 0x98, 0x40, 0x8f, 0xf7, 0xcc, 0x88, 0x4d, 0x01, 0x1c, 0x19, 0x43, + 0x18, 0x14, 0x40, 0x90, 0x0d, 0xc4, 0x0e, 0x9a, 0x01, 0x00, 0xa1, 0xc5, + 0x40, 0x88, 0x01, 0x00, 0x19, 0xc4, 0x02, 0xb9, 0x01, 0x00, 0x08, 0xc2, + 0x00, 0x8e, 0x01, 0x32, 0x0b, 0x00, 0x90, 0x29, 0x00, 0x40, 0x90, 0x2f, + 0x07, 0xc0, 0x90, 0x3b, 0x04, 0xc0, 0x90, 0x45, 0x11, 0xc0, 0x90, 0x51, + 0x0b, 0xc0, 0x90, 0x5b, 0x0a, 0xc0, 0x90, 0x65, 0x18, 0xc0, 0x90, 0x71, + 0x03, 0xc0, 0x90, 0x7b, 0x42, 0x00, 0x1c, 0xc0, 0x90, 0x85, 0x43, 0xe5, + 0xc6, 0xc0, 0x90, 0x8d, 0x43, 0xe6, 0x4d, 0xc0, 0x90, 0xb0, 0x42, 0xe6, + 0xa1, 0xc0, 0x90, 0xd9, 0x42, 0xdd, 0x2f, 0xc0, 0x90, 0xe5, 0x42, 0xde, + 0x65, 0xc0, 0x90, 0xf9, 0x42, 0xe4, 0xce, 0xc0, 0x91, 0x09, 0x42, 0xe6, + 0x99, 0xc0, 0x91, 0x1d, 0x43, 0xe5, 0x5a, 0xc0, 0x91, 0x29, 0x42, 0xc6, + 0x1c, 0xc0, 0x91, 0x45, 0x10, 0xc0, 0x91, 0x4d, 0x42, 0xe4, 0xb6, 0xc0, + 0x91, 0x5d, 0x43, 0xe5, 0xd8, 0xc0, 0x91, 0x71, 0x43, 0xe5, 0xf3, 0xc0, + 0x91, 0x97, 0x42, 0xd1, 0x32, 0xc0, 0x91, 0xb7, 0x42, 0xe5, 0x0a, 0xc0, + 0x91, 0xcf, 0x42, 0xe6, 0x9d, 0xc0, 0x91, 0xe7, 0x42, 0xe6, 0x9b, 0x40, + 0x92, 0x03, 0x14, 0xc0, 0x92, 0x0f, 0x59, 0x10, 0x15, 0x40, 0x92, 0x1b, + 0xc3, 0x00, 0xcb, 0x01, 0x11, 0xc9, 0x49, 0x0f, 0x0c, 0x40, 0x92, 0x3f, + 0x48, 0x14, 0x8a, 0xc0, 0x92, 0x4b, 0x07, 0x40, 0x92, 0x9f, 0x0f, 0xc0, + 0x92, 0xab, 0xc3, 0x0d, 0xe5, 0x00, 0x9b, 0x28, 0xcc, 0x88, 0xad, 0x00, + 0x9b, 0x31, 0xd2, 0x45, 0xf9, 0x00, 0x9b, 0x40, 0xc3, 0x05, 0x14, 0x00, + 0x9b, 0x49, 0x16, 0xc0, 0x92, 0xb7, 0x08, 0xc0, 0x92, 0xc3, 0x15, 0xc0, + 0x92, 0xcf, 0xc5, 0x06, 0xdb, 0x00, 0x9b, 0x81, 0xc4, 0x26, 0x78, 0x00, + 0x9b, 0x88, 0x16, 0xc0, 0x92, 0xdb, 0x08, 0xc0, 0x92, 0xf0, 0x15, 0xc0, + 0x92, 0xfc, 0xc6, 0xcf, 0x9b, 0x00, 0x9b, 0xc9, 0xc6, 0x2a, 0xfe, 0x00, + 0x9b, 0xd1, 0xc7, 0x0d, 0x04, 0x00, 0x9b, 0xd8, 0xc5, 0xdc, 0x7c, 0x00, + 0x9c, 0x81, 0x06, 0xc0, 0x93, 0x08, 0xc6, 0x80, 0xbb, 0x00, 0x9c, 0x91, + 0xcc, 0x80, 0xb5, 0x00, 0x9c, 0x99, 0x0d, 0xc0, 0x93, 0x17, 0xc6, 0xcc, + 0x65, 0x00, 0x9c, 0xb1, 0xc5, 0xce, 0xca, 0x00, 0x9c, 0xb8, 0xc7, 0x81, + 0x9e, 0x01, 0x10, 0x43, 0x00, 0x93, 0x23, 0x45, 0xda, 0x29, 0xc0, 0x93, + 0x27, 0xc5, 0xbb, 0x55, 0x0f, 0xa0, 0xc1, 0xc5, 0xd8, 0x67, 0x0f, 0xb6, + 0xb8, 0xd2, 0x4a, 0xab, 0x08, 0x7f, 0xb1, 0x46, 0x02, 0x0f, 0x40, 0x93, + 0x31, 0x83, 0x08, 0x28, 0x01, 0xc2, 0x00, 0x51, 0x08, 0x28, 0x09, 0x05, + 0xc0, 0x93, 0x94, 0x06, 0xc0, 0x93, 0x9e, 0x10, 0xc0, 0x93, 0xa8, 0x87, + 0x08, 0x28, 0x43, 0x00, 0x93, 0xbc, 0xc2, 0x14, 0xda, 0x08, 0x28, 0x49, + 0x09, 0xc0, 0x93, 0xc0, 0xc2, 0x01, 0x7f, 0x08, 0x28, 0x61, 0x8b, 0x08, + 0x28, 0x69, 0xc2, 0x1c, 0x52, 0x08, 0x28, 0x71, 0x0d, 0xc0, 0x93, 0xce, + 0x0e, 0xc0, 0x93, 0xd8, 0xc2, 0x00, 0x4e, 0x08, 0x28, 0x91, 0x91, 0x08, + 0x28, 0xb1, 0xc2, 0x00, 0x67, 0x08, 0x28, 0xb9, 0xc2, 0x99, 0xe7, 0x08, + 0x28, 0xc1, 0x14, 0xc0, 0x93, 0xe2, 0x15, 0xc0, 0x93, 0xec, 0x16, 0xc0, + 0x93, 0xf6, 0x97, 0x08, 0x28, 0xf9, 0xc2, 0x00, 0x5f, 0x08, 0x29, 0x01, + 0xc2, 0x24, 0xe2, 0x08, 0x29, 0x09, 0x9b, 0x08, 0x29, 0x11, 0x1c, 0x40, + 0x94, 0x00, 0x42, 0x00, 0xac, 0xc0, 0x94, 0x0a, 0x12, 0xc0, 0x94, 0x10, + 0xcf, 0x15, 0x36, 0x01, 0x39, 0x98, 0x46, 0x00, 0x8b, 0x40, 0x94, 0x1c, + 0x43, 0x00, 0x55, 0xc0, 0x94, 0x28, 0xda, 0x1c, 0xee, 0x0f, 0xa8, 0xd0, + 0xc4, 0x0e, 0x9a, 0x01, 0x00, 0x99, 0xc5, 0x40, 0x88, 0x01, 0x00, 0x11, + 0xc4, 0x02, 0xb9, 0x01, 0x00, 0x00, 0xc4, 0x00, 0x49, 0x01, 0x19, 0x59, + 0xc5, 0x00, 0x2c, 0x01, 0x19, 0x30, 0x46, 0x04, 0x8f, 0xc0, 0x94, 0x4a, + 0x46, 0x01, 0xfc, 0x40, 0x94, 0x5c, 0xc3, 0x05, 0x14, 0x01, 0x5f, 0x81, + 0xc3, 0x02, 0x9f, 0x01, 0x5f, 0x88, 0x00, 0xc0, 0x94, 0x6e, 0x42, 0x00, + 0x97, 0x40, 0x94, 0x7a, 0xca, 0x9f, 0x9a, 0x01, 0x12, 0xd1, 0x47, 0x37, + 0x4f, 0x40, 0x94, 0x8f, 0x95, 0x01, 0x12, 0xc9, 0xc8, 0x19, 0x58, 0x01, + 0x09, 0x70, 0xc5, 0x00, 0xb9, 0x01, 0x05, 0x61, 0xce, 0x72, 0x48, 0x01, + 0x05, 0x01, 0x45, 0xd3, 0xe9, 0x40, 0x94, 0x9b, 0xc6, 0xcd, 0x91, 0x0f, + 0xcd, 0x71, 0xc3, 0x0e, 0x6b, 0x0f, 0x9d, 0xc0, 0x46, 0x09, 0x97, 0xc0, + 0x94, 0xa7, 0xc2, 0x00, 0x7a, 0x08, 0xec, 0xc1, 0x18, 0xc0, 0x94, 0xcb, + 0x45, 0x00, 0xba, 0xc0, 0x94, 0xd7, 0x47, 0x02, 0x0e, 0x40, 0x94, 0xe3, + 0xc8, 0x91, 0x02, 0x01, 0x05, 0x91, 0xc5, 0xda, 0xb0, 0x0f, 0xa4, 0x10, + 0x45, 0x00, 0xba, 0xc0, 0x95, 0x50, 0x47, 0x02, 0x0e, 0xc0, 0x95, 0x74, + 0x4b, 0x6f, 0xc7, 0xc0, 0x95, 0xe9, 0x46, 0x09, 0x97, 0xc0, 0x96, 0x07, + 0xc5, 0xd6, 0xb9, 0x00, 0x53, 0x81, 0x03, 0xc0, 0x96, 0x2b, 0xc3, 0x02, + 0x30, 0x00, 0x53, 0x91, 0xc3, 0x06, 0x63, 0x00, 0x53, 0x99, 0xc8, 0xbb, + 0x5a, 0x00, 0x53, 0xa0, 0x45, 0x00, 0xba, 0xc0, 0x96, 0x37, 0x47, 0x02, + 0x0e, 0xc0, 0x96, 0x59, 0x46, 0x34, 0x6f, 0xc0, 0x96, 0xc4, 0xc2, 0x00, + 0x7a, 0x00, 0x56, 0x81, 0x46, 0x09, 0x97, 0xc0, 0x96, 0xd0, 0xd1, 0x50, + 0xce, 0x00, 0x57, 0x81, 0xca, 0x76, 0x52, 0x00, 0x57, 0x88, 0x96, 0x0f, + 0xa0, 0x81, 0xc5, 0xde, 0x61, 0x0f, 0xca, 0x28, 0xc4, 0xe3, 0x4b, 0x08, + 0x19, 0x99, 0x03, 0xc0, 0x96, 0xf4, 0xc8, 0xbd, 0x52, 0x08, 0x19, 0xa9, + 0x0b, 0xc0, 0x97, 0x00, 0x0a, 0xc0, 0x97, 0x0c, 0x16, 0xc0, 0x97, 0x18, + 0xc3, 0x71, 0x13, 0x08, 0x19, 0xc9, 0xc5, 0xdd, 0xd5, 0x08, 0x19, 0xd1, + 0xc5, 0xdd, 0x5d, 0x08, 0x19, 0xd9, 0xc5, 0x84, 0xe1, 0x08, 0x19, 0xe1, + 0x10, 0xc0, 0x97, 0x24, 0xc3, 0xad, 0x41, 0x08, 0x19, 0xf1, 0xc4, 0xde, + 0xd3, 0x08, 0x19, 0xf9, 0xc8, 0xbf, 0x7a, 0x08, 0x1a, 0x01, 0xc5, 0xd5, + 0xa1, 0x08, 0x1a, 0x11, 0xc5, 0xda, 0x1f, 0x08, 0x1a, 0x19, 0xc5, 0xd5, + 0x6f, 0x08, 0x1a, 0x29, 0xc5, 0xdd, 0x85, 0x08, 0x1a, 0x31, 0xc5, 0xd4, + 0x6b, 0x08, 0x1a, 0x49, 0xc7, 0xc1, 0xa1, 0x08, 0x19, 0x89, 0xc4, 0xe0, + 0x53, 0x08, 0x19, 0x90, 0x07, 0xc0, 0x97, 0x30, 0x4a, 0x07, 0xca, 0x40, + 0x97, 0x3c, 0x45, 0xda, 0x51, 0xc0, 0x97, 0x63, 0xcb, 0x8e, 0x29, 0x0f, + 0x9c, 0x99, 0xc3, 0x5f, 0x5f, 0x0f, 0x9a, 0x39, 0xc9, 0x1f, 0x0f, 0x00, + 0x03, 0x00, 0x46, 0x96, 0x81, 0xc0, 0x97, 0x81, 0xcb, 0x8d, 0x63, 0x0f, + 0xb1, 0x60, 0xca, 0x9b, 0x6c, 0x0f, 0xa4, 0xb9, 0x43, 0x11, 0x49, 0x40, + 0x97, 0x90, 0x45, 0x00, 0x8b, 0x40, 0x97, 0x9c, 0xc3, 0x03, 0x0d, 0x01, + 0x32, 0x51, 0xc6, 0xaf, 0x06, 0x0f, 0xa4, 0x70, 0x46, 0x4d, 0x6c, 0xc0, + 0x97, 0xa8, 0x46, 0x8f, 0x12, 0x40, 0x97, 0xb4, 0x8e, 0x0f, 0xa3, 0x3b, + 0x00, 0x97, 0xd2, 0xc9, 0xb3, 0xa7, 0x0f, 0xcc, 0x90, 0xc9, 0xb3, 0x29, + 0x0f, 0x98, 0xf9, 0xd1, 0x54, 0xa8, 0x0f, 0x98, 0x81, 0xc3, 0x26, 0x19, + 0x0f, 0xcf, 0x20, 0x48, 0x4f, 0x6b, 0xc0, 0x97, 0xd8, 0xca, 0xa6, 0x52, + 0x0f, 0xca, 0xd8, 0xc4, 0xdf, 0x0b, 0x0f, 0xcd, 0x39, 0x42, 0x00, 0x5d, + 0x40, 0x97, 0xe4, 0xc8, 0x27, 0xbc, 0x01, 0x15, 0xb1, 0x43, 0x38, 0x5f, + 0x40, 0x97, 0xf0, 0xd0, 0x1d, 0xec, 0x07, 0xe9, 0xf1, 0xd1, 0x1a, 0x4a, + 0x07, 0xe9, 0xf8, 0x4d, 0x53, 0xa9, 0xc0, 0x98, 0x18, 0x47, 0x34, 0x2f, + 0xc0, 0x98, 0x24, 0xc8, 0xba, 0x12, 0x0f, 0x69, 0x71, 0x51, 0x4f, 0x03, + 0x40, 0x98, 0x4b, 0xc4, 0xdf, 0x13, 0x0f, 0xb4, 0xb1, 0xc3, 0x22, 0xd3, + 0x0f, 0xb4, 0x69, 0xca, 0x9f, 0x90, 0x0f, 0xb4, 0xa1, 0xca, 0xa0, 0x4e, + 0x0f, 0xb4, 0xc1, 0xcb, 0x91, 0x4c, 0x0f, 0xb7, 0x88, 0x00, 0xc0, 0x98, + 0x63, 0xcf, 0x6a, 0xcb, 0x0f, 0xd3, 0x88, 0xe0, 0x02, 0x47, 0x0f, 0xa8, + 0xd8, 0x10, 0xc0, 0x98, 0x6f, 0xd5, 0x36, 0x71, 0x00, 0x04, 0xe8, 0xc6, + 0xcb, 0xd5, 0x01, 0x19, 0x29, 0xc8, 0xb7, 0xca, 0x0f, 0xa5, 0xfa, 0x00, + 0x98, 0x77, 0x00, 0xc0, 0x98, 0x7d, 0x43, 0x00, 0x29, 0x40, 0x98, 0xb3, + 0x12, 0xc0, 0x98, 0xc5, 0xc4, 0x14, 0x4c, 0x00, 0xe3, 0xe9, 0xc5, 0xd6, + 0xf5, 0x00, 0xe3, 0xd9, 0x42, 0x14, 0x48, 0xc0, 0x98, 0xd1, 0xd0, 0x4f, + 0x37, 0x00, 0xe3, 0xc9, 0x47, 0x02, 0x0e, 0xc0, 0x98, 0xdd, 0x46, 0x09, + 0x97, 0x40, 0x98, 0xf5, 0x46, 0x0c, 0x51, 0xc0, 0x99, 0x19, 0xc8, 0xb6, + 0x1a, 0x0f, 0xa7, 0x20, 0x06, 0xc0, 0x99, 0x31, 0x05, 0xc0, 0x99, 0x3d, + 0xcf, 0x6a, 0xf8, 0x01, 0x22, 0x39, 0x04, 0xc0, 0x99, 0x49, 0xcd, 0x7e, + 0xca, 0x01, 0x22, 0x19, 0xc4, 0x4a, 0x3f, 0x01, 0x22, 0x11, 0xc4, 0x01, + 0x23, 0x01, 0x22, 0x00, 0xc4, 0x7e, 0x7a, 0x0f, 0xa0, 0xc9, 0xcb, 0x99, + 0xce, 0x0f, 0xb6, 0x88, 0x4e, 0x6e, 0x3c, 0xc0, 0x99, 0x5b, 0xc6, 0x59, + 0x92, 0x01, 0x72, 0xe8, 0xc3, 0x02, 0x6e, 0x01, 0x01, 0xf1, 0xc2, 0x00, + 0xb6, 0x0f, 0xae, 0xba, 0x00, 0x99, 0x67, 0xd5, 0x37, 0x97, 0x00, 0xb4, + 0xe1, 0xcc, 0x37, 0xa0, 0x00, 0xb4, 0xd9, 0x47, 0x02, 0x0e, 0xc0, 0x99, + 0x6d, 0xca, 0xa5, 0x1c, 0x00, 0xb4, 0x00, 0x47, 0x02, 0x0e, 0xc0, 0x99, + 0xc7, 0x46, 0x09, 0x97, 0x40, 0x9a, 0x4a, 0x4f, 0x0b, 0x17, 0xc0, 0x9a, + 0x6e, 0x4d, 0x29, 0xb9, 0x40, 0x9a, 0xd5, 0x12, 0xc0, 0x9b, 0x3c, 0xc5, + 0xdb, 0x73, 0x0e, 0x7e, 0x11, 0x06, 0xc0, 0x9b, 0x4d, 0x11, 0xc0, 0x9b, + 0x63, 0x0d, 0xc0, 0x9b, 0x72, 0x15, 0xc0, 0x9b, 0x90, 0xc6, 0xd2, 0xfb, + 0x0e, 0x7d, 0x3b, 0x00, 0x9b, 0xa3, 0x1c, 0xc0, 0x9b, 0xa7, 0xc4, 0xe0, + 0x1b, 0x0e, 0x7c, 0x19, 0x14, 0xc0, 0x9b, 0xb1, 0x42, 0x11, 0xee, 0xc0, + 0x9b, 0xbd, 0x49, 0xb1, 0x79, 0xc0, 0x9b, 0xc9, 0x4a, 0xa0, 0x8a, 0x40, + 0x9b, 0xe7, 0xc3, 0x23, 0x6d, 0x0e, 0x7a, 0x31, 0xc5, 0x78, 0xdb, 0x0e, + 0x7a, 0x29, 0xce, 0x72, 0xe2, 0x0e, 0x7a, 0x21, 0x46, 0xce, 0xe7, 0x40, + 0x9b, 0xfd, 0xdb, 0x18, 0xf6, 0x0e, 0x7a, 0x09, 0x45, 0x01, 0xc3, 0xc0, + 0x9c, 0x05, 0xd7, 0x29, 0x40, 0x0e, 0x79, 0xf1, 0x51, 0x54, 0x75, 0x40, + 0x9c, 0x57, 0xc8, 0xba, 0xca, 0x08, 0xd2, 0x39, 0x44, 0x00, 0xbb, 0x40, + 0x9c, 0x69, 0x46, 0x37, 0xee, 0xc0, 0x9c, 0x7b, 0x46, 0x26, 0xd5, 0x40, + 0x9c, 0x87, 0xd6, 0x2d, 0xe6, 0x08, 0xd2, 0x29, 0xc9, 0x15, 0xcc, 0x08, + 0xd1, 0xf8, 0xca, 0xa4, 0x36, 0x08, 0xd2, 0x21, 0xcb, 0x99, 0x6b, 0x08, + 0xd2, 0x19, 0xc4, 0x01, 0xe2, 0x08, 0xd2, 0x11, 0xc5, 0x32, 0x89, 0x08, + 0xd2, 0x08, 0x0d, 0xc0, 0x9c, 0x93, 0xc2, 0x00, 0xd0, 0x08, 0xd1, 0x89, + 0x15, 0xc0, 0x9c, 0xa3, 0xc2, 0x02, 0x41, 0x08, 0xd1, 0x69, 0xc2, 0x00, + 0xdb, 0x08, 0xd1, 0x61, 0xc2, 0x00, 0x39, 0x08, 0xd1, 0x59, 0xc2, 0x19, + 0x2c, 0x08, 0xd1, 0x51, 0xc2, 0x00, 0x02, 0x08, 0xd1, 0x49, 0x1c, 0xc0, + 0x9c, 0xb3, 0x06, 0xc0, 0x9c, 0xbd, 0x16, 0xc0, 0x9c, 0xcf, 0xc2, 0x01, + 0xc3, 0x08, 0xd1, 0x11, 0x04, 0xc0, 0x9c, 0xe1, 0x12, 0xc0, 0x9c, 0xeb, + 0x10, 0xc0, 0x9c, 0xf5, 0xc2, 0x25, 0x3b, 0x08, 0xd0, 0x91, 0x05, 0xc0, + 0x9d, 0x0b, 0x09, 0xc0, 0x9d, 0x15, 0x83, 0x08, 0xd0, 0x00, 0xcb, 0x36, + 0x51, 0x08, 0xd0, 0x51, 0x45, 0x00, 0xba, 0x40, 0x9d, 0x1f, 0xd5, 0x34, + 0xa3, 0x01, 0x51, 0xf1, 0x45, 0x00, 0x2d, 0xc0, 0x9d, 0x3f, 0xd4, 0x3a, + 0xc0, 0x01, 0x53, 0x28, 0x46, 0xcc, 0x23, 0xc0, 0x9d, 0x4b, 0xc3, 0x3a, + 0x48, 0x01, 0x4c, 0x08, 0xcf, 0x60, 0x30, 0x01, 0x4c, 0x49, 0xcd, 0x7d, + 0x6b, 0x01, 0x4c, 0x38, 0xc6, 0x57, 0xec, 0x01, 0x00, 0x69, 0x42, 0x00, + 0x10, 0xc0, 0x9d, 0x55, 0xc5, 0x40, 0x88, 0x01, 0x00, 0x58, 0xcb, 0x95, + 0x82, 0x01, 0x37, 0xd9, 0xd3, 0x44, 0xdb, 0x0f, 0xa9, 0x81, 0xc6, 0xcf, + 0x1d, 0x0f, 0xa3, 0xd1, 0xc4, 0xc9, 0x19, 0x0f, 0xa3, 0xc9, 0xcb, 0x95, + 0x6c, 0x0f, 0x9f, 0x19, 0xc5, 0xb0, 0x15, 0x0f, 0x9c, 0x71, 0xc6, 0xcd, + 0x2b, 0x0f, 0x9f, 0x79, 0xda, 0x19, 0x46, 0x01, 0x80, 0x20, 0x42, 0x00, + 0xb0, 0xc0, 0x9d, 0x61, 0x42, 0x00, 0x49, 0xc0, 0x9d, 0x6d, 0x46, 0x09, + 0x97, 0xc0, 0x9d, 0x79, 0xd3, 0x45, 0x99, 0x05, 0x4e, 0x69, 0xcf, 0x60, + 0xc6, 0x05, 0x4e, 0x11, 0x4f, 0x30, 0x90, 0xc0, 0x9d, 0x9d, 0x4b, 0x6f, + 0xc7, 0xc0, 0x9d, 0xaf, 0x45, 0x00, 0xba, 0x40, 0x9d, 0xd1, 0x44, 0x02, + 0xbe, 0xc0, 0x9d, 0xec, 0x45, 0x44, 0xba, 0x40, 0x9d, 0xf8, 0xd0, 0x0f, + 0x09, 0x01, 0x02, 0x41, 0xc4, 0x01, 0xc3, 0x00, 0x01, 0xf8, 0x49, 0x14, + 0x89, 0xc0, 0x9e, 0x04, 0x48, 0x91, 0xff, 0x40, 0x9e, 0x7d, 0x47, 0x02, + 0x0e, 0xc0, 0x9e, 0xcf, 0xd0, 0x59, 0x92, 0x08, 0x75, 0x69, 0x4a, 0x51, + 0x89, 0x40, 0x9f, 0x54, 0x8e, 0x00, 0x00, 0xc3, 0x00, 0x9f, 0x60, 0x94, + 0x01, 0x32, 0x58, 0x95, 0x00, 0xa8, 0x2b, 0x00, 0x9f, 0x6a, 0x90, 0x00, + 0xa6, 0x83, 0x00, 0x9f, 0x95, 0x85, 0x00, 0xa5, 0x0b, 0x00, 0x9f, 0xd2, + 0x04, 0xc0, 0x9f, 0xf5, 0x96, 0x00, 0xa3, 0x33, 0x00, 0xa0, 0x07, 0x19, + 0xc0, 0xa0, 0x39, 0x94, 0x00, 0xaa, 0x83, 0x00, 0xa0, 0x55, 0x88, 0x00, + 0xaa, 0xeb, 0x00, 0xa0, 0x78, 0x87, 0x00, 0xa0, 0x0b, 0x00, 0xa0, 0x9d, + 0x91, 0x00, 0xa0, 0x2b, 0x00, 0xa0, 0xa7, 0x9b, 0x00, 0xa9, 0xf3, 0x00, + 0xa0, 0xb9, 0x8e, 0x00, 0xa7, 0x53, 0x00, 0xa0, 0xdc, 0x8f, 0x00, 0xa5, + 0xdb, 0x00, 0xa1, 0x00, 0x8d, 0x00, 0xa4, 0x1b, 0x00, 0xa1, 0x24, 0x92, + 0x00, 0xa2, 0x4b, 0x00, 0xa1, 0x44, 0x83, 0x00, 0xa0, 0x53, 0x00, 0xa1, + 0x61, 0x93, 0x00, 0xac, 0x2b, 0x00, 0xa1, 0x7d, 0x0a, 0xc0, 0xa1, 0x92, + 0x8b, 0x00, 0xa0, 0x1b, 0x00, 0xa1, 0x9c, 0xcc, 0x23, 0x33, 0x00, 0xa0, + 0xf0, 0xc2, 0x00, 0x49, 0x0f, 0xab, 0x79, 0x9b, 0x0f, 0x9b, 0x60, 0xc3, + 0x00, 0x54, 0x01, 0x08, 0x29, 0x96, 0x01, 0x01, 0xc2, 0x00, 0xa1, 0xa4, + 0xc8, 0xb6, 0x32, 0x0f, 0xae, 0x19, 0xc5, 0x06, 0x82, 0x0f, 0xa6, 0x3a, + 0x00, 0xa1, 0xaa, 0xca, 0xa0, 0x80, 0x0f, 0x9d, 0x01, 0x90, 0x00, 0x16, + 0x38, 0xc9, 0xaa, 0x4d, 0x0f, 0x9c, 0x79, 0xc9, 0xb2, 0x99, 0x0f, 0xd4, + 0xd0, 0xcb, 0x75, 0x5a, 0x00, 0x00, 0x69, 0xc2, 0x01, 0xbb, 0x0f, 0xca, + 0xa8, 0x97, 0x08, 0x15, 0x93, 0x00, 0xa1, 0xb0, 0x94, 0x08, 0x15, 0x2b, + 0x00, 0xa1, 0xb7, 0x8e, 0x08, 0x15, 0x1b, 0x00, 0xa1, 0xbb, 0x83, 0x08, + 0x15, 0x03, 0x00, 0xa1, 0xc2, 0x93, 0x08, 0x15, 0x41, 0x84, 0x08, 0x15, + 0x49, 0x8f, 0x08, 0x15, 0x53, 0x00, 0xa1, 0xc6, 0x91, 0x08, 0x15, 0x59, + 0x86, 0x08, 0x15, 0x13, 0x00, 0xa1, 0xcd, 0x96, 0x08, 0x15, 0x6b, 0x00, + 0xa1, 0xd1, 0x95, 0x08, 0x15, 0x83, 0x00, 0xa1, 0xd8, 0x42, 0x09, 0x8f, + 0xc0, 0xa1, 0xea, 0x90, 0x08, 0x15, 0xab, 0x00, 0xa1, 0xf6, 0x9a, 0x08, + 0x15, 0xa1, 0x92, 0x08, 0x15, 0xbb, 0x00, 0xa2, 0x02, 0x8b, 0x08, 0x15, + 0xcb, 0x00, 0xa2, 0x06, 0x87, 0x08, 0x15, 0xd3, 0x00, 0xa2, 0x0a, 0x8d, + 0x08, 0x15, 0xe3, 0x00, 0xa2, 0x0e, 0x89, 0x08, 0x16, 0x02, 0x00, 0xa2, + 0x12, 0x47, 0x02, 0x0e, 0xc0, 0xa2, 0x16, 0xcd, 0x79, 0x8f, 0x08, 0x2b, + 0x78, 0xcb, 0x8d, 0xbb, 0x0f, 0xa7, 0xc0, 0x46, 0x00, 0x8b, 0x40, 0xa2, + 0x8a, 0x26, 0xc0, 0xa2, 0x96, 0x25, 0xc0, 0xa2, 0xd6, 0x03, 0x40, 0xa3, + 0x16, 0x03, 0xc0, 0xa3, 0x1e, 0x26, 0x40, 0xa3, 0x56, 0xc5, 0x61, 0xf7, + 0x01, 0x74, 0x01, 0x03, 0x40, 0xa3, 0x96, 0x0e, 0xc0, 0xa3, 0xa4, 0xc4, + 0xdf, 0xfb, 0x01, 0x74, 0xd9, 0x0b, 0xc0, 0xa3, 0xb0, 0xc2, 0x00, 0x27, + 0x01, 0x75, 0x39, 0x4c, 0x8c, 0x6d, 0x40, 0xa3, 0xbc, 0x07, 0xc0, 0xa3, + 0xf2, 0x45, 0x03, 0x14, 0xc0, 0xa3, 0xfe, 0x10, 0xc0, 0xa4, 0x0a, 0xc2, + 0x05, 0x1d, 0x01, 0x74, 0xe1, 0x0b, 0xc0, 0xa4, 0x16, 0x46, 0xcd, 0xaf, + 0xc0, 0xa4, 0x22, 0xc4, 0xdf, 0x97, 0x01, 0x75, 0xb0, 0xc5, 0x18, 0x8f, + 0x01, 0x74, 0x29, 0x43, 0x39, 0x8b, 0x40, 0xa4, 0x2e, 0x11, 0xc0, 0xa4, + 0x3a, 0xc5, 0xc0, 0xd0, 0x01, 0x75, 0x71, 0x45, 0xdc, 0x09, 0xc0, 0xa4, + 0x4a, 0xc3, 0x87, 0x22, 0x01, 0x76, 0xc0, 0xc4, 0x14, 0x8d, 0x01, 0x74, + 0x39, 0xc5, 0x8c, 0xf0, 0x01, 0x74, 0x99, 0xc4, 0xe1, 0x3b, 0x01, 0x76, + 0x09, 0xc5, 0xd5, 0x65, 0x01, 0x77, 0x88, 0xc3, 0x05, 0x14, 0x01, 0x74, + 0x41, 0xc3, 0x02, 0x9f, 0x01, 0x74, 0x48, 0xc9, 0xab, 0xac, 0x01, 0x74, + 0x51, 0xc4, 0x04, 0xa6, 0x01, 0x74, 0xf1, 0xc2, 0x13, 0x38, 0x01, 0x75, + 0x40, 0x44, 0xb3, 0x85, 0xc0, 0xa4, 0x56, 0x44, 0x08, 0x48, 0x40, 0xa4, + 0x66, 0x42, 0x01, 0x9c, 0xc0, 0xa4, 0x72, 0xc3, 0x02, 0x9b, 0x01, 0x74, + 0xc1, 0xc3, 0x00, 0xbf, 0x01, 0x76, 0x38, 0x11, 0xc0, 0xa4, 0x7c, 0x07, + 0x40, 0xa4, 0x94, 0x03, 0xc0, 0xa4, 0xa0, 0x44, 0x15, 0xa8, 0x40, 0xa4, + 0xac, 0xc3, 0x05, 0xba, 0x01, 0x75, 0x19, 0xc3, 0x65, 0xba, 0x01, 0x76, + 0x50, 0xc3, 0x01, 0x9d, 0x01, 0x75, 0x49, 0x4c, 0x8c, 0x6d, 0x40, 0xa4, + 0xb8, 0xc2, 0x0c, 0x43, 0x01, 0x75, 0x59, 0xc2, 0x00, 0x28, 0x01, 0x75, + 0xc1, 0x43, 0x0a, 0x0c, 0x40, 0xa4, 0xc8, 0xc3, 0x05, 0x14, 0x01, 0x75, + 0x89, 0x16, 0xc0, 0xa4, 0xd2, 0xc4, 0x09, 0x9d, 0x01, 0x75, 0xa0, 0x45, + 0x1b, 0xa0, 0xc0, 0xa4, 0xde, 0xc4, 0xe0, 0x07, 0x01, 0x77, 0x20, 0x90, + 0x01, 0x8e, 0xe8, 0x99, 0x01, 0x8e, 0x23, 0x00, 0xa4, 0xe8, 0x9c, 0x01, + 0x8e, 0xbb, 0x00, 0xa4, 0xf0, 0x92, 0x01, 0x8e, 0x99, 0x96, 0x01, 0x8e, + 0xc9, 0x89, 0x01, 0x8e, 0xd0, 0x9c, 0x01, 0x8e, 0xab, 0x00, 0xa4, 0xfa, + 0x92, 0x01, 0x8e, 0x3b, 0x00, 0xa5, 0x10, 0x89, 0x01, 0x8e, 0xb1, 0xc3, + 0xe6, 0x56, 0x01, 0x8f, 0x00, 0x86, 0x01, 0x8e, 0xd9, 0x9c, 0x01, 0x8e, + 0xe1, 0x89, 0x01, 0x8f, 0x10, 0xc8, 0x78, 0xcc, 0x0f, 0xb3, 0xf3, 0x00, + 0xa5, 0x16, 0xc5, 0x01, 0xc2, 0x01, 0x38, 0x98, 0xce, 0x6d, 0xb0, 0x0f, + 0xa7, 0x19, 0xc8, 0xbb, 0x4a, 0x0f, 0xce, 0x00, 0x45, 0xde, 0x1b, 0xc0, + 0xa5, 0x1c, 0x14, 0x40, 0xa5, 0x28, 0x94, 0x0f, 0xd4, 0x89, 0xc2, 0x05, + 0x26, 0x01, 0x36, 0x98, 0x47, 0xc2, 0xf1, 0xc0, 0xa5, 0x34, 0x47, 0x07, + 0x93, 0x40, 0xa5, 0x43, 0x47, 0x02, 0x0e, 0xc0, 0xa5, 0x52, 0x18, 0xc0, + 0xa5, 0xb4, 0xcd, 0x2c, 0xb2, 0x08, 0x8a, 0x19, 0x06, 0xc0, 0xa5, 0xc0, + 0x15, 0xc0, 0xa5, 0xd2, 0xc7, 0xc2, 0xab, 0x08, 0x89, 0xa1, 0xc7, 0xc3, + 0xd8, 0x08, 0x89, 0x91, 0xc6, 0xb6, 0x44, 0x08, 0x89, 0x88, 0x4f, 0x30, + 0x90, 0xc0, 0xa5, 0xde, 0x4b, 0x6f, 0xc7, 0xc0, 0xa5, 0xfc, 0x47, 0x02, + 0x0e, 0xc0, 0xa6, 0x1b, 0x4c, 0x11, 0xe2, 0xc0, 0xa6, 0x84, 0x46, 0x09, + 0x97, 0x40, 0xa6, 0x94, 0xcc, 0x88, 0xa1, 0x0f, 0xb5, 0xc0, 0x47, 0x34, + 0x2f, 0xc0, 0xa6, 0xb8, 0x47, 0x02, 0x0e, 0x40, 0xa6, 0xcb, 0xc8, 0x1d, + 0x3c, 0x0f, 0xb1, 0xf9, 0xc4, 0x00, 0x87, 0x0f, 0xb1, 0x10, 0x00, 0xc0, + 0xa7, 0x30, 0xc9, 0xae, 0xe8, 0x01, 0x36, 0x61, 0x43, 0x00, 0xa8, 0x40, + 0xa7, 0x40, 0xca, 0x9b, 0xee, 0x0f, 0x9b, 0xc1, 0xc5, 0xc9, 0x75, 0x0f, + 0xd5, 0x98, 0x09, 0xc0, 0xa7, 0x52, 0x03, 0xc0, 0xa7, 0x5c, 0x14, 0xc0, + 0xa7, 0x72, 0x0e, 0xc0, 0xa7, 0x7a, 0x42, 0x00, 0x8c, 0xc0, 0xa7, 0x90, + 0x16, 0xc0, 0xa7, 0x9c, 0x06, 0xc0, 0xa7, 0xb7, 0x07, 0xc0, 0xa7, 0xc8, + 0x08, 0xc0, 0xa7, 0xd4, 0x05, 0xc0, 0xa7, 0xe0, 0x15, 0xc0, 0xa8, 0x03, + 0x04, 0xc0, 0xa8, 0x25, 0x42, 0x02, 0x2b, 0xc0, 0xa8, 0x2f, 0x17, 0xc0, + 0xa8, 0x3b, 0x0b, 0xc0, 0xa8, 0x4b, 0x47, 0x2e, 0x48, 0xc0, 0xa8, 0x55, + 0x11, 0xc0, 0xa8, 0x61, 0x0f, 0xc0, 0xa8, 0x7c, 0x12, 0xc0, 0xa8, 0x8b, + 0x10, 0xc0, 0xa8, 0x95, 0x1a, 0xc0, 0xa8, 0xa1, 0x42, 0x01, 0x23, 0xc0, + 0xa8, 0xab, 0x49, 0x07, 0xbb, 0x40, 0xa8, 0xbd, 0xce, 0x72, 0x80, 0x01, + 0x1c, 0x21, 0xc6, 0x81, 0x9c, 0x01, 0x10, 0x09, 0xc7, 0x50, 0x25, 0x0f, + 0xae, 0xe1, 0xc3, 0x1b, 0xa1, 0x0f, 0xcf, 0x68, 0x47, 0xb4, 0x64, 0xc0, + 0xa8, 0xc9, 0x83, 0x00, 0x01, 0x60, 0x48, 0xb6, 0xaa, 0xc0, 0xa8, 0xd5, + 0x42, 0x00, 0x29, 0x40, 0xa8, 0xe1, 0xd7, 0x16, 0xc3, 0x01, 0x1c, 0x09, + 0x45, 0xc2, 0x13, 0xc0, 0xa8, 0xed, 0xcc, 0x62, 0xe5, 0x01, 0x11, 0x71, + 0x44, 0x7e, 0xe0, 0x40, 0xa8, 0xf9, 0xc6, 0xd3, 0xa9, 0x0f, 0xa3, 0xb9, + 0xc4, 0x00, 0xba, 0x0f, 0xb5, 0x38, 0xc9, 0xb0, 0x74, 0x0f, 0x9c, 0x51, + 0xcb, 0x98, 0x16, 0x0f, 0xb0, 0xb1, 0xc9, 0x96, 0x1e, 0x0f, 0xb0, 0xa8, + 0x00, 0x40, 0xa9, 0x05, 0xc2, 0x00, 0x75, 0x0f, 0x9b, 0x99, 0x87, 0x0f, + 0x9b, 0x50, 0xcb, 0x8d, 0x79, 0x0f, 0x89, 0x79, 0xca, 0x9d, 0x6a, 0x00, + 0x05, 0x48, 0x15, 0xc0, 0xa9, 0x11, 0x05, 0xc0, 0xa9, 0x1d, 0x46, 0xd1, + 0x4b, 0xc0, 0xa9, 0x29, 0x4b, 0x96, 0x06, 0xc0, 0xa9, 0x3b, 0x08, 0xc0, + 0xa9, 0x53, 0xd5, 0x36, 0xda, 0x01, 0x67, 0xf8, 0xc7, 0xb4, 0xd2, 0x0f, + 0xca, 0x11, 0xc9, 0xb2, 0x36, 0x0f, 0x9b, 0xd8, 0x42, 0x00, 0xa9, 0xc0, + 0xa9, 0x5f, 0xc3, 0x02, 0xad, 0x01, 0x02, 0x80, 0x45, 0x05, 0xfd, 0xc0, + 0xa9, 0x81, 0x46, 0x11, 0x55, 0x40, 0xa9, 0xa7, 0x46, 0x00, 0x8b, 0x40, + 0xa9, 0xc3, 0xce, 0x6c, 0x7c, 0x0f, 0xa2, 0x79, 0xc8, 0x78, 0xcc, 0x0f, + 0x9d, 0x60, 0x42, 0x00, 0xa9, 0xc0, 0xa9, 0xdb, 0x00, 0x40, 0xaa, 0x3d, + 0xc6, 0xcc, 0x89, 0x0f, 0x9d, 0x51, 0xcf, 0x69, 0x27, 0x01, 0x50, 0x81, + 0xcc, 0x08, 0xfb, 0x00, 0x02, 0xf0, 0x1c, 0xc0, 0xaa, 0x49, 0x97, 0x09, + 0x18, 0x5b, 0x00, 0xaa, 0x64, 0x16, 0xc0, 0xaa, 0x9f, 0x15, 0xc0, 0xaa, + 0xbb, 0x10, 0xc0, 0xaa, 0xd4, 0x0f, 0xc0, 0xaa, 0xf0, 0x0e, 0xc0, 0xab, + 0x0c, 0x0d, 0xc0, 0xab, 0x21, 0x0a, 0xc0, 0xab, 0x42, 0x09, 0xc0, 0xab, + 0x57, 0x87, 0x09, 0x04, 0x53, 0x00, 0xab, 0x70, 0x06, 0xc0, 0xab, 0xa8, + 0x04, 0xc0, 0xab, 0xbd, 0x83, 0x09, 0x00, 0x03, 0x00, 0xab, 0xd2, 0x12, + 0xc0, 0xac, 0x16, 0x14, 0xc0, 0xac, 0x2d, 0x8b, 0x09, 0x09, 0xfa, 0x00, + 0xac, 0x3c, 0x49, 0x1e, 0x56, 0xc0, 0xac, 0x72, 0xce, 0x74, 0x16, 0x09, + 0x23, 0x89, 0xd9, 0x1d, 0xba, 0x09, 0x23, 0x80, 0x42, 0x00, 0xec, 0xc0, + 0xac, 0x84, 0x07, 0xc0, 0xac, 0x90, 0x15, 0xc0, 0xac, 0x9c, 0x08, 0xc0, + 0xac, 0xae, 0x11, 0xc0, 0xac, 0xba, 0x16, 0x40, 0xac, 0xc6, 0x42, 0x00, + 0x36, 0xc0, 0xac, 0xd2, 0xc9, 0xaf, 0x03, 0x0f, 0xca, 0x60, 0x45, 0x3a, + 0xd8, 0xc0, 0xac, 0xde, 0xca, 0x9e, 0x96, 0x0f, 0x9a, 0xd8, 0xcf, 0x55, + 0xa9, 0x01, 0x37, 0xf1, 0xca, 0x9e, 0xb4, 0x0f, 0xcb, 0x20, 0xcc, 0x87, + 0x81, 0x01, 0x08, 0x21, 0x45, 0x02, 0x6d, 0x40, 0xac, 0xea, 0x42, 0xe6, + 0x4a, 0xc0, 0xac, 0xf6, 0x1e, 0xc0, 0xac, 0xfe, 0x1d, 0x40, 0xad, 0x06, + 0x19, 0xc0, 0xad, 0x2e, 0x1a, 0xc0, 0xad, 0x3e, 0x1c, 0xc0, 0xad, 0x46, + 0x83, 0x08, 0x40, 0x01, 0x87, 0x08, 0x40, 0x09, 0x8b, 0x08, 0x40, 0x11, + 0x91, 0x08, 0x40, 0x19, 0x97, 0x08, 0x40, 0x21, 0x0c, 0xc0, 0xad, 0x4e, + 0x0d, 0xc0, 0xad, 0x56, 0x0e, 0xc0, 0xad, 0x6a, 0x0f, 0xc0, 0xad, 0x7e, + 0x10, 0xc0, 0xad, 0x92, 0x12, 0xc0, 0xad, 0xa6, 0x14, 0xc0, 0xad, 0xba, + 0x15, 0xc0, 0xad, 0xce, 0x16, 0x40, 0xad, 0xe2, 0xd0, 0x5a, 0xa2, 0x00, + 0xe9, 0x59, 0xc8, 0xbe, 0xaa, 0x00, 0x26, 0x01, 0xcd, 0x7f, 0xb4, 0x05, + 0x33, 0x70, 0x46, 0x02, 0x0f, 0xc0, 0xad, 0xf6, 0x48, 0x19, 0x9b, 0x40, + 0xae, 0x73, 0x46, 0x02, 0x0f, 0xc0, 0xae, 0x85, 0x48, 0x19, 0x9b, 0x40, + 0xaf, 0x04, 0xc4, 0x26, 0x78, 0x0f, 0xdf, 0xc9, 0xc4, 0x15, 0xe7, 0x0f, + 0xdf, 0x81, 0xc3, 0x05, 0x14, 0x0f, 0xdf, 0x89, 0x16, 0xc0, 0xaf, 0x16, + 0x08, 0xc0, 0xaf, 0x22, 0x15, 0xc0, 0xaf, 0x2e, 0xc5, 0x06, 0xdb, 0x0f, + 0xdf, 0xc0, 0xe0, 0x07, 0x87, 0x01, 0x51, 0x90, 0xc2, 0x00, 0xbf, 0x01, + 0x18, 0xa1, 0xc8, 0x08, 0xe8, 0x00, 0x05, 0x38, 0xe0, 0x00, 0x87, 0x0f, + 0xc9, 0x60, 0x47, 0xc1, 0x1c, 0xc0, 0xaf, 0x3a, 0x00, 0x40, 0xaf, 0x42, + 0x48, 0x78, 0xbf, 0xc0, 0xaf, 0x5e, 0x45, 0x00, 0xba, 0xc0, 0xaf, 0x6a, + 0x0e, 0xc0, 0xaf, 0x7a, 0x4b, 0x6f, 0xc7, 0xc0, 0xaf, 0x86, 0xd6, 0x2d, + 0x20, 0x00, 0x6f, 0xa0, 0x14, 0xc0, 0xaf, 0x9c, 0x08, 0xc0, 0xaf, 0xa8, + 0xcb, 0x1a, 0x50, 0x0e, 0xd4, 0x59, 0x05, 0xc0, 0xaf, 0xc2, 0x15, 0xc0, + 0xaf, 0xcc, 0x0e, 0xc0, 0xaf, 0xea, 0x42, 0x02, 0xae, 0xc0, 0xaf, 0xf4, + 0x16, 0xc0, 0xaf, 0xfa, 0xdb, 0x18, 0xa5, 0x0e, 0xd3, 0x79, 0x07, 0xc0, + 0xb0, 0x08, 0x0a, 0xc0, 0xb0, 0x1a, 0x10, 0xc0, 0xb0, 0x27, 0x42, 0x00, + 0xa2, 0xc0, 0xb0, 0x33, 0x42, 0x00, 0x38, 0xc0, 0xb0, 0x3f, 0x44, 0x8c, + 0x27, 0xc0, 0xb0, 0x4b, 0x06, 0xc0, 0xb0, 0x57, 0x46, 0xd3, 0x9d, 0x40, + 0xb0, 0x63, 0xe0, 0x04, 0x87, 0x01, 0x39, 0xf1, 0x47, 0x0a, 0xaa, 0x40, + 0xb0, 0x75, 0x4b, 0x6f, 0xc7, 0xc0, 0xb0, 0x87, 0x47, 0x02, 0x0e, 0xc0, + 0xb0, 0xaa, 0x15, 0xc0, 0xb1, 0x11, 0xd0, 0x5c, 0xe2, 0x08, 0xae, 0x49, + 0x50, 0x5d, 0xf2, 0xc0, 0xb1, 0x1b, 0x06, 0x40, 0xb1, 0x27, 0x46, 0x04, + 0x8f, 0xc0, 0xb1, 0x33, 0x46, 0x01, 0xfc, 0x40, 0xb1, 0x4b, 0xc9, 0x00, + 0xca, 0x01, 0x54, 0xe9, 0xcc, 0x07, 0xc7, 0x01, 0x54, 0xf0, 0xdb, 0x16, + 0x38, 0x01, 0x54, 0xf9, 0xde, 0x0e, 0xaa, 0x01, 0x55, 0x00, 0xcb, 0x6c, + 0x2b, 0x0f, 0xb4, 0x11, 0xc8, 0xbf, 0xba, 0x0f, 0x9a, 0xe0, 0xc3, 0x00, + 0x44, 0x0f, 0xb4, 0x49, 0xcd, 0x80, 0x77, 0x0f, 0xaf, 0xe8, 0x00, 0xc0, + 0xb1, 0x63, 0x45, 0x2d, 0xd5, 0x40, 0xb1, 0x79, 0xc6, 0xd1, 0xf3, 0x01, + 0x34, 0xd1, 0xcb, 0x99, 0x34, 0x01, 0x34, 0xa8, 0x44, 0x00, 0x2d, 0xc0, + 0xb1, 0x95, 0xc6, 0xd3, 0x91, 0x0f, 0x9a, 0x98, 0xd2, 0x4b, 0x71, 0x01, + 0x13, 0x19, 0xcd, 0x7b, 0xff, 0x00, 0x04, 0xe0, 0x45, 0x00, 0x8c, 0xc0, + 0xb1, 0xa1, 0x48, 0xba, 0x8a, 0x40, 0xb1, 0xad, 0xc7, 0xc4, 0x09, 0x0f, + 0xce, 0x11, 0xc3, 0x05, 0xba, 0x01, 0x30, 0x98, 0x45, 0x00, 0xba, 0xc0, + 0xb1, 0xb9, 0x4b, 0x6f, 0xc7, 0xc0, 0xb1, 0xcb, 0x47, 0x02, 0x0e, 0xc0, + 0xb1, 0xf1, 0xd4, 0x3d, 0x40, 0x05, 0x45, 0xa1, 0x06, 0x40, 0xb2, 0x5c, + 0xd4, 0x10, 0xc9, 0x0f, 0xb3, 0xd1, 0x46, 0x11, 0x39, 0x40, 0xb2, 0x6e, + 0xc8, 0xbd, 0x9a, 0x0f, 0xa7, 0x08, 0x03, 0xc0, 0xb2, 0x7a, 0x15, 0xc0, + 0xb2, 0x90, 0xc4, 0xde, 0x9f, 0x00, 0x41, 0xd9, 0x1c, 0xc0, 0xb2, 0x9c, + 0xc5, 0x7a, 0xc2, 0x00, 0x41, 0xc9, 0xcd, 0x7a, 0xba, 0x00, 0x41, 0xb9, + 0xc3, 0xe5, 0xa2, 0x00, 0x41, 0x99, 0xc7, 0xc4, 0x33, 0x00, 0x41, 0x80, + 0x44, 0x01, 0xc4, 0xc0, 0xb2, 0xa8, 0x4f, 0x0f, 0x5f, 0x40, 0xb2, 0xc9, + 0x15, 0xc0, 0xb2, 0xd9, 0x91, 0x00, 0x41, 0x5b, 0x00, 0xb2, 0xe5, 0x8b, + 0x00, 0x41, 0x51, 0x45, 0x2c, 0x86, 0xc0, 0xb2, 0xee, 0x97, 0x00, 0x41, + 0x39, 0x83, 0x00, 0x41, 0x1b, 0x00, 0xb3, 0x01, 0x87, 0x00, 0x40, 0xe8, + 0x16, 0xc0, 0xb3, 0x05, 0x15, 0xc0, 0xb3, 0x17, 0xc4, 0x49, 0x87, 0x00, + 0x40, 0x99, 0xc3, 0xe5, 0x6f, 0x00, 0x40, 0x91, 0xc2, 0x02, 0x09, 0x00, + 0x40, 0x81, 0x0b, 0xc0, 0xb3, 0x23, 0xc3, 0x20, 0x18, 0x00, 0x40, 0x69, + 0xc3, 0x8c, 0x3f, 0x00, 0x40, 0x61, 0xc5, 0xdd, 0x7b, 0x00, 0x40, 0x59, + 0xc4, 0xe1, 0x63, 0x00, 0x40, 0x51, 0xc3, 0x70, 0x3f, 0x00, 0x40, 0x49, + 0xc3, 0x0a, 0xe2, 0x00, 0x40, 0x31, 0x04, 0xc0, 0xb3, 0x2f, 0xc5, 0x49, + 0x80, 0x00, 0x40, 0x19, 0xc5, 0xb5, 0x1e, 0x00, 0x40, 0x11, 0xc4, 0xd8, + 0xe5, 0x00, 0x40, 0x00, 0xcf, 0x40, 0x0c, 0x01, 0x31, 0x00, 0x8a, 0x0f, + 0xcd, 0x29, 0xc8, 0x43, 0xb6, 0x0f, 0x9d, 0x80, 0x87, 0x01, 0x19, 0x99, + 0x4a, 0xa0, 0x30, 0x40, 0xb3, 0x3b, 0x44, 0x00, 0x74, 0xc0, 0xb3, 0x47, + 0xc6, 0xca, 0x85, 0x0f, 0xb1, 0x50, 0xcc, 0x82, 0xd1, 0x0f, 0xb2, 0x11, + 0xcd, 0x7d, 0x5e, 0x0f, 0xb2, 0x08, 0x4c, 0x24, 0xe3, 0xc0, 0xb3, 0x59, + 0x53, 0x41, 0xe3, 0x40, 0xb3, 0x6b, 0x8d, 0x0f, 0xcc, 0x41, 0x44, 0x45, + 0xa1, 0x40, 0xb3, 0x77, 0xc6, 0x02, 0xd1, 0x01, 0x3a, 0x69, 0xc4, 0x0e, + 0x6a, 0x01, 0x39, 0x81, 0xcb, 0x8e, 0x08, 0x01, 0x38, 0xf0, 0xc6, 0xd3, + 0x8b, 0x0f, 0x9b, 0x39, 0x4b, 0x8c, 0x62, 0x40, 0xb3, 0xa7, 0x4c, 0x88, + 0x29, 0xc0, 0xb4, 0x27, 0xc4, 0x2a, 0x3e, 0x0f, 0x9b, 0x81, 0x00, 0xc0, + 0xb4, 0x3f, 0x95, 0x0f, 0xd3, 0x98, 0xc4, 0xe1, 0xe7, 0x0f, 0xb6, 0x69, + 0xc7, 0xc6, 0x7f, 0x0f, 0xb6, 0x90, 0xc2, 0x00, 0x74, 0x00, 0x00, 0x79, + 0xc3, 0x00, 0xa3, 0x00, 0x00, 0x70, 0xc2, 0x00, 0x45, 0x0f, 0xcc, 0x11, + 0xc2, 0x11, 0xa5, 0x01, 0x32, 0x78, 0x46, 0x03, 0x13, 0xc0, 0xb4, 0x67, + 0x48, 0x0b, 0x17, 0xc0, 0xb4, 0x77, 0xd4, 0x19, 0x9a, 0x0f, 0xb3, 0x80, + 0xc2, 0x00, 0xc4, 0x0f, 0xad, 0xa9, 0xc7, 0xc4, 0xa3, 0x0f, 0xd4, 0xd8, + 0xcd, 0x7b, 0xcb, 0x01, 0x36, 0x20, 0x45, 0x15, 0xa7, 0xc0, 0xb4, 0x9b, + 0x45, 0x20, 0x6c, 0x40, 0xb4, 0xcb, 0xd0, 0x0d, 0xaa, 0x0f, 0xb3, 0x58, + 0xcd, 0x80, 0x6a, 0x01, 0x4f, 0xb0, 0x9f, 0x08, 0xd5, 0x11, 0x9e, 0x08, + 0xd5, 0x08, 0x45, 0x02, 0x9a, 0x40, 0xb4, 0xfb, 0xc5, 0xd7, 0x3b, 0x08, + 0xd4, 0xe9, 0xcb, 0x99, 0x6b, 0x08, 0xd4, 0xe1, 0xc4, 0x01, 0xe2, 0x08, + 0xd4, 0xd9, 0xc5, 0x32, 0x89, 0x08, 0xd4, 0xd0, 0xc8, 0xba, 0xca, 0x08, + 0xd4, 0xc9, 0x44, 0x00, 0xbb, 0x40, 0xb5, 0x07, 0xc2, 0x00, 0x02, 0x08, + 0xd4, 0xa9, 0x95, 0x08, 0xd4, 0xa3, 0x00, 0xb5, 0x1f, 0x8e, 0x08, 0xd4, + 0x91, 0x94, 0x08, 0xd4, 0x89, 0x8f, 0x08, 0xd4, 0x81, 0x84, 0x08, 0xd4, + 0x79, 0x90, 0x08, 0xd4, 0x73, 0x00, 0xb5, 0x23, 0x86, 0x08, 0xd4, 0x69, + 0x8d, 0x08, 0xd4, 0x59, 0x89, 0x08, 0xd4, 0x50, 0x15, 0xc0, 0xb5, 0x27, + 0xc2, 0x00, 0xdb, 0x08, 0xd4, 0x39, 0xc2, 0x00, 0x39, 0x08, 0xd4, 0x30, + 0x0d, 0xc0, 0xb5, 0x31, 0xc2, 0x00, 0xd0, 0x08, 0xd4, 0x11, 0x15, 0xc0, + 0xb5, 0x41, 0xc2, 0x02, 0x41, 0x08, 0xd3, 0xf1, 0xc2, 0x00, 0xdb, 0x08, + 0xd3, 0xe9, 0xc2, 0x00, 0x39, 0x08, 0xd3, 0xe1, 0xc2, 0x19, 0x2c, 0x08, + 0xd3, 0xd9, 0xc2, 0x00, 0x02, 0x08, 0xd3, 0xd1, 0x1c, 0xc0, 0xb5, 0x51, + 0x06, 0xc0, 0xb5, 0x5b, 0x16, 0xc0, 0xb5, 0x6f, 0xc2, 0x01, 0xc3, 0x08, + 0xd3, 0xa1, 0x04, 0xc0, 0xb5, 0x81, 0x12, 0xc0, 0xb5, 0x8b, 0x10, 0xc0, + 0xb5, 0x95, 0x0c, 0xc0, 0xb5, 0xab, 0x05, 0xc0, 0xb5, 0xb5, 0x09, 0xc0, + 0xb5, 0xbf, 0x83, 0x08, 0xd2, 0x80, 0xcb, 0x36, 0x51, 0x08, 0xd2, 0xd9, + 0x45, 0x00, 0xba, 0x40, 0xb5, 0xc9, 0xd1, 0x31, 0xc8, 0x0f, 0xad, 0x61, + 0xc9, 0xa9, 0x12, 0x0f, 0x9b, 0x31, 0xc6, 0x59, 0x92, 0x00, 0x05, 0x68, + 0xc4, 0x26, 0x78, 0x08, 0x87, 0xc9, 0xc5, 0x06, 0xdb, 0x08, 0x87, 0xc1, + 0x15, 0xc0, 0xb5, 0xe9, 0x08, 0xc0, 0xb5, 0xf5, 0x16, 0xc0, 0xb6, 0x01, + 0xc3, 0x05, 0x14, 0x08, 0x87, 0x89, 0xc4, 0x15, 0xe7, 0x08, 0x87, 0x80, + 0x42, 0x01, 0xc3, 0xc0, 0xb6, 0x0d, 0x07, 0xc0, 0xb6, 0x15, 0xc2, 0x38, + 0x2a, 0x08, 0x87, 0x31, 0xc2, 0x53, 0x31, 0x08, 0x87, 0x29, 0xc2, 0x14, + 0x77, 0x08, 0x87, 0x21, 0xc2, 0x02, 0x98, 0x08, 0x87, 0x11, 0x10, 0xc0, + 0xb6, 0x1f, 0xc3, 0xe5, 0xf9, 0x08, 0x87, 0x01, 0xc3, 0x38, 0x66, 0x08, + 0x86, 0xf9, 0xc3, 0x14, 0x4b, 0x08, 0x86, 0xf1, 0xc3, 0x0f, 0xb6, 0x08, + 0x86, 0xe9, 0xc3, 0x44, 0x79, 0x08, 0x86, 0xe1, 0xc3, 0x62, 0x26, 0x08, + 0x86, 0xd9, 0xc3, 0xc1, 0x9d, 0x08, 0x86, 0xd1, 0xc3, 0x12, 0xae, 0x08, + 0x86, 0xc1, 0xc3, 0x40, 0x40, 0x08, 0x86, 0xa9, 0xc3, 0x70, 0xaf, 0x08, + 0x86, 0xa1, 0xc3, 0xe5, 0x87, 0x08, 0x86, 0x99, 0xc3, 0x44, 0x19, 0x08, + 0x86, 0x91, 0xc3, 0x02, 0x97, 0x08, 0x86, 0x89, 0xc3, 0xc3, 0x6e, 0x08, + 0x86, 0x80, 0xd4, 0x38, 0x90, 0x08, 0x7a, 0xc9, 0x44, 0x02, 0x9f, 0xc0, + 0xb6, 0x31, 0xcf, 0x38, 0x95, 0x08, 0x7a, 0xb8, 0xc3, 0x05, 0x14, 0x08, + 0x7a, 0x8b, 0x00, 0xb6, 0x40, 0x16, 0x40, 0xb6, 0x46, 0xcc, 0x08, 0x5b, + 0x08, 0x7a, 0x81, 0xca, 0x9d, 0x38, 0x08, 0x7a, 0x79, 0xcf, 0x66, 0xed, + 0x08, 0x7a, 0x71, 0x45, 0x11, 0xba, 0xc0, 0xb6, 0x52, 0x46, 0x0e, 0xd4, + 0xc0, 0xb6, 0x5e, 0x49, 0x04, 0xf9, 0xc0, 0xb6, 0x6a, 0x44, 0x05, 0x18, + 0x40, 0xb6, 0x76, 0x0e, 0xc0, 0xb6, 0x82, 0xc4, 0xe0, 0x4b, 0x08, 0x7a, + 0x19, 0xc3, 0xb5, 0x3e, 0x08, 0x7a, 0x11, 0x15, 0xc0, 0xb6, 0x8e, 0xc9, + 0x5d, 0xe2, 0x08, 0x7a, 0x01, 0xc2, 0x00, 0x67, 0x08, 0x79, 0xf1, 0x03, + 0xc0, 0xb6, 0x98, 0xc3, 0x20, 0x18, 0x08, 0x79, 0xd9, 0xc3, 0x00, 0x4e, + 0x08, 0x79, 0xd1, 0xc4, 0xe0, 0xe7, 0x08, 0x79, 0xc1, 0xc4, 0x4a, 0xb9, + 0x08, 0x79, 0xb9, 0xc2, 0x01, 0x7f, 0x08, 0x79, 0x9b, 0x00, 0xb6, 0xa4, + 0xc5, 0x4a, 0xb3, 0x08, 0x79, 0xa9, 0xc3, 0x7e, 0x89, 0x08, 0x79, 0xa1, + 0xc5, 0x9c, 0xa2, 0x08, 0x79, 0x91, 0xc4, 0xe3, 0x27, 0x08, 0x79, 0x88, + 0x00, 0xc0, 0xb6, 0xaa, 0x42, 0x00, 0xa9, 0x40, 0xb7, 0x06, 0xcd, 0x7a, + 0xad, 0x0f, 0xaa, 0x29, 0x15, 0xc0, 0xb7, 0x5e, 0x06, 0xc0, 0xb7, 0x85, + 0x10, 0xc0, 0xb7, 0x8f, 0xce, 0x6c, 0xec, 0x01, 0x20, 0xf9, 0xd0, 0x5e, + 0x82, 0x01, 0x20, 0xf1, 0xcf, 0x64, 0x3b, 0x01, 0x20, 0xe9, 0x08, 0xc0, + 0xb7, 0x99, 0x07, 0xc0, 0xb7, 0xa5, 0x42, 0x00, 0x64, 0xc0, 0xb7, 0xaf, + 0xd3, 0x42, 0x42, 0x01, 0x20, 0x59, 0xc9, 0x1b, 0x00, 0x01, 0x20, 0x51, + 0xd5, 0x33, 0xd1, 0x01, 0x20, 0x49, 0x04, 0xc0, 0xb7, 0xbb, 0xcb, 0x49, + 0x4a, 0x01, 0x20, 0x31, 0xd2, 0x48, 0x47, 0x01, 0x5c, 0xb8, 0x47, 0x02, + 0x0e, 0xc0, 0xb7, 0xc7, 0x0a, 0xc0, 0xb8, 0x39, 0x4d, 0x76, 0xb7, 0xc0, + 0xb8, 0x4b, 0x14, 0xc0, 0xb8, 0x57, 0x47, 0xc0, 0x4a, 0xc0, 0xb8, 0x69, + 0x47, 0xbf, 0xda, 0xc0, 0xb8, 0x7b, 0xd1, 0x48, 0x11, 0x00, 0x38, 0x79, + 0x42, 0x00, 0x99, 0xc0, 0xb8, 0x8d, 0x42, 0x06, 0x62, 0xc0, 0xb8, 0x99, + 0x07, 0xc0, 0xb8, 0xa5, 0xc7, 0xc9, 0x6c, 0x00, 0x3a, 0x51, 0xc5, 0x23, + 0x26, 0x00, 0x3a, 0x49, 0xcc, 0x86, 0xf1, 0x00, 0x3a, 0x01, 0xc9, 0xa8, + 0xaf, 0x00, 0x3a, 0x09, 0x16, 0xc0, 0xb8, 0xb1, 0x4d, 0x78, 0xb2, 0x40, + 0xb8, 0xbd, 0x83, 0x05, 0x40, 0x01, 0x8b, 0x05, 0x40, 0x09, 0x97, 0x05, + 0x40, 0x19, 0x87, 0x05, 0x40, 0x21, 0x91, 0x05, 0x40, 0x29, 0x0d, 0xc0, + 0xb8, 0xc9, 0x09, 0xc0, 0xb8, 0xd3, 0x05, 0xc0, 0xb8, 0xdd, 0x16, 0xc0, + 0xb8, 0xe7, 0x06, 0xc0, 0xb8, 0xf5, 0xc2, 0x01, 0x23, 0x05, 0x41, 0x11, + 0x0c, 0xc0, 0xb9, 0x03, 0xc2, 0x00, 0x10, 0x05, 0x40, 0xc1, 0x12, 0xc0, + 0xb9, 0x0d, 0x04, 0xc0, 0xb9, 0x17, 0xc2, 0x00, 0xa2, 0x05, 0x40, 0xe9, + 0x14, 0xc0, 0xb9, 0x21, 0xc2, 0x01, 0xc8, 0x05, 0x40, 0xf9, 0xc2, 0x00, + 0xfb, 0x05, 0x41, 0x08, 0xc8, 0xb9, 0xea, 0x05, 0x40, 0x11, 0xc7, 0x5a, + 0xdb, 0x05, 0x40, 0x31, 0x03, 0x40, 0xb9, 0x2b, 0x83, 0x05, 0x41, 0x19, + 0x8b, 0x05, 0x41, 0x21, 0x97, 0x05, 0x41, 0x29, 0x87, 0x05, 0x41, 0x31, + 0xc2, 0x01, 0x24, 0x05, 0x41, 0x38, 0x9e, 0x05, 0x41, 0x41, 0x9f, 0x05, + 0x41, 0x49, 0xa0, 0x05, 0x41, 0x51, 0xa1, 0x05, 0x41, 0x58, 0xca, 0x9d, + 0x10, 0x0f, 0xa5, 0x61, 0xc5, 0xdb, 0x64, 0x0f, 0xb5, 0x20, 0xd6, 0x2f, + 0xb4, 0x0f, 0xaf, 0x19, 0xc2, 0x00, 0x29, 0x0f, 0xa8, 0x43, 0x00, 0xb9, + 0x37, 0xcf, 0x6b, 0x34, 0x0f, 0xb2, 0x50, 0x87, 0x01, 0x3a, 0x3b, 0x00, + 0xb9, 0x3d, 0xc9, 0x78, 0x74, 0x0f, 0xa4, 0xb0, 0xc2, 0x02, 0xae, 0x01, + 0x4d, 0x09, 0xc4, 0x00, 0x49, 0x01, 0x4d, 0x00, 0xcc, 0x8c, 0x55, 0x0f, + 0xae, 0x99, 0xc8, 0xbb, 0xda, 0x0f, 0xae, 0x91, 0xc5, 0x08, 0x91, 0x0f, + 0xa0, 0xd0, 0xc4, 0xe4, 0x43, 0x0f, 0xab, 0xc0, 0x90, 0x0f, 0xca, 0x21, + 0xcb, 0x8c, 0xea, 0x0f, 0xcf, 0xa8, 0x43, 0x00, 0x3d, 0xc0, 0xb9, 0x41, + 0x46, 0x07, 0x2f, 0x40, 0xb9, 0x62, 0xcc, 0x85, 0xf5, 0x01, 0x36, 0x29, + 0xc9, 0xb2, 0xa2, 0x0f, 0x98, 0xf0, 0x52, 0x48, 0xe9, 0xc0, 0xb9, 0x9a, + 0x47, 0x02, 0x0e, 0xc0, 0xb9, 0xc2, 0xc8, 0x7a, 0x7e, 0x00, 0xdd, 0xd1, + 0x46, 0x09, 0x97, 0xc0, 0xba, 0x4c, 0x51, 0x4f, 0x25, 0xc0, 0xba, 0x70, + 0x45, 0x00, 0xba, 0xc0, 0xba, 0x82, 0x4d, 0x80, 0x50, 0x40, 0xba, 0x8e, + 0xcf, 0x69, 0xae, 0x0f, 0x98, 0x20, 0xd5, 0x37, 0x43, 0x01, 0x17, 0x49, + 0xce, 0x74, 0x32, 0x01, 0x15, 0x89, 0x46, 0x23, 0xa0, 0xc0, 0xba, 0x98, + 0x46, 0x00, 0xd4, 0x40, 0xba, 0xa4, 0xc2, 0x00, 0x55, 0x01, 0x14, 0x13, + 0x00, 0xba, 0xbc, 0x46, 0x00, 0xd4, 0xc0, 0xba, 0xc0, 0x45, 0x00, 0x8c, + 0x40, 0xba, 0xcc, 0xd1, 0x1a, 0x4a, 0x01, 0x04, 0x71, 0xd0, 0x1d, 0xec, + 0x01, 0x04, 0x69, 0x07, 0xc0, 0xba, 0xde, 0xc5, 0x1d, 0x1d, 0x01, 0x04, + 0x59, 0xc9, 0x60, 0xf3, 0x01, 0x04, 0x51, 0xc4, 0x26, 0x78, 0x01, 0x04, + 0x49, 0x15, 0xc0, 0xba, 0xea, 0x08, 0xc0, 0xba, 0xf6, 0x16, 0xc0, 0xbb, + 0x02, 0xc3, 0x05, 0x14, 0x01, 0x04, 0x09, 0xc4, 0x15, 0xe7, 0x01, 0x04, + 0x00, 0x87, 0x01, 0x19, 0x19, 0x44, 0x00, 0x74, 0x40, 0xbb, 0x0e, 0x00, + 0xc0, 0xbb, 0x1a, 0xc7, 0xc1, 0xfc, 0x01, 0x55, 0x52, 0x00, 0xbb, 0x7c, + 0x46, 0xcf, 0xe3, 0xc0, 0xbb, 0x82, 0xca, 0xa4, 0x40, 0x00, 0x04, 0xf0, + 0x16, 0xc0, 0xbb, 0x8a, 0xc2, 0x00, 0x89, 0x0f, 0xc9, 0xa2, 0x00, 0xbb, + 0x99, 0xc6, 0x1d, 0xb4, 0x01, 0x11, 0xbb, 0x00, 0xbb, 0x9f, 0xc9, 0xb3, + 0xdd, 0x01, 0x0a, 0x50, 0x00, 0x40, 0xbb, 0xa5, 0xcd, 0x7a, 0xc7, 0x01, + 0x08, 0xf1, 0x5b, 0x17, 0x2b, 0x40, 0xbb, 0xbd, 0xc5, 0x29, 0xfc, 0x0f, + 0xc9, 0x81, 0xc3, 0x12, 0xb8, 0x0f, 0xd6, 0x19, 0xc6, 0x18, 0x8e, 0x0f, + 0xd6, 0x20, 0xc3, 0x01, 0x4b, 0x0f, 0xd5, 0x39, 0x45, 0x3c, 0x54, 0x40, + 0xbb, 0xf5, 0xcc, 0x8b, 0xdd, 0x01, 0x08, 0x78, 0x49, 0xb0, 0x08, 0xc0, + 0xbc, 0x01, 0xcc, 0x87, 0xd5, 0x0f, 0xb6, 0xe8, 0x46, 0x17, 0x33, 0x40, + 0xbc, 0x3f, 0xc5, 0x00, 0xb9, 0x00, 0x01, 0x5b, 0x00, 0xbc, 0x47, 0xcb, + 0x8d, 0xb0, 0x00, 0x05, 0x88, 0xc8, 0x2a, 0x06, 0x0f, 0xc8, 0x79, 0xca, + 0xa1, 0xca, 0x0f, 0xc8, 0x60, 0xcb, 0x95, 0xcf, 0x0f, 0x9c, 0x69, 0xc5, + 0xd7, 0x1d, 0x0f, 0x9a, 0x68, 0xc4, 0x12, 0x50, 0x0f, 0xa1, 0xe9, 0xc4, + 0x00, 0x87, 0x0f, 0xa1, 0xb8, 0xd0, 0x58, 0x52, 0x01, 0x1c, 0x91, 0xd2, + 0x49, 0x67, 0x01, 0x1c, 0x88, 0xc8, 0x19, 0x58, 0x01, 0x5f, 0xe9, 0xc9, + 0xa8, 0x5e, 0x0f, 0xb7, 0x98, 0x94, 0x0f, 0xa6, 0xf9, 0x00, 0xc0, 0xbc, + 0x4b, 0x95, 0x0f, 0xae, 0x80, 0x43, 0x02, 0x18, 0xc0, 0xbc, 0x57, 0xc8, + 0xbd, 0x5a, 0x0f, 0x9c, 0x49, 0xd1, 0x4f, 0xf1, 0x01, 0x81, 0xe9, 0xcc, + 0x84, 0x45, 0x01, 0x92, 0x80, 0x46, 0x0b, 0x11, 0xc0, 0xbc, 0x61, 0x47, + 0x34, 0x2f, 0xc0, 0xbc, 0x6d, 0x46, 0x09, 0x97, 0xc0, 0xbc, 0x83, 0x47, + 0xc3, 0x3e, 0xc0, 0xbc, 0xa1, 0x52, 0x4b, 0xef, 0xc0, 0xbc, 0xe7, 0x4a, + 0x9f, 0x86, 0x40, 0xbc, 0xf3, 0x45, 0x6b, 0x87, 0xc0, 0xbd, 0x31, 0x45, + 0x00, 0xb4, 0xc0, 0xbd, 0x3d, 0xc5, 0xdc, 0x0e, 0x0f, 0xd4, 0x10, 0x00, + 0x40, 0xbd, 0x4f, 0xcf, 0x63, 0x1e, 0x08, 0xd7, 0xa3, 0x00, 0xbd, 0x5b, + 0x46, 0x02, 0x0f, 0x40, 0xbd, 0x5f, 0x00, 0x40, 0xbd, 0xcd, 0xc4, 0x28, + 0xb1, 0x08, 0xd7, 0x63, 0x00, 0xbd, 0xd9, 0xcc, 0x23, 0x33, 0x08, 0xd7, + 0x3a, 0x00, 0xbd, 0xdd, 0x00, 0x40, 0xbd, 0xe3, 0x00, 0xc0, 0xbd, 0xf2, + 0x46, 0xd0, 0x4f, 0xc0, 0xbe, 0x0a, 0xcd, 0x79, 0x34, 0x0f, 0xc9, 0x90, + 0x49, 0xab, 0x91, 0xc0, 0xbe, 0x1c, 0x49, 0x2b, 0xed, 0x40, 0xbe, 0x4e, + 0x44, 0xaa, 0x7f, 0xc0, 0xbe, 0x90, 0x0f, 0xc0, 0xbe, 0xaa, 0xc3, 0x07, + 0xa2, 0x0b, 0x5b, 0x81, 0x16, 0xc0, 0xbe, 0xb6, 0xc2, 0x04, 0xad, 0x0b, + 0x5b, 0x61, 0x10, 0xc0, 0xbe, 0xc8, 0x1a, 0xc0, 0xbe, 0xd4, 0x0a, 0xc0, + 0xbe, 0xe4, 0xc8, 0xbe, 0xd2, 0x0b, 0x5b, 0x39, 0x44, 0xde, 0xeb, 0xc0, + 0xbe, 0xf0, 0xc6, 0xce, 0xe1, 0x0b, 0x5a, 0x18, 0x16, 0xc0, 0xbf, 0x0c, + 0x47, 0x0d, 0x04, 0xc0, 0xbf, 0x18, 0xc8, 0x33, 0xee, 0x0b, 0x5a, 0xf0, + 0xc4, 0x26, 0x78, 0x0b, 0x5a, 0xc9, 0xc5, 0x06, 0xdb, 0x0b, 0x5a, 0xc1, + 0x15, 0xc0, 0xbf, 0x22, 0x08, 0xc0, 0xbf, 0x2e, 0x16, 0xc0, 0xbf, 0x3a, + 0xc3, 0x05, 0x14, 0x0b, 0x5a, 0x89, 0xc4, 0x15, 0xe7, 0x0b, 0x5a, 0x80, + 0x16, 0xc0, 0xbf, 0x46, 0xc3, 0xdf, 0xff, 0x0b, 0x59, 0xa9, 0x15, 0xc0, + 0xbf, 0x52, 0x0d, 0x40, 0xbf, 0x5c, 0x03, 0xc0, 0xbf, 0x68, 0x19, 0xc0, + 0xbf, 0x80, 0x0b, 0xc0, 0xbf, 0x88, 0x11, 0xc0, 0xbf, 0x94, 0x17, 0xc0, + 0xbf, 0xa0, 0x07, 0x40, 0xbf, 0xac, 0xd0, 0x3a, 0x4c, 0x0f, 0xb5, 0x81, + 0xc2, 0x00, 0xf1, 0x0f, 0xca, 0xa0, 0xc8, 0x1d, 0x3c, 0x0f, 0xb1, 0xf1, + 0xc4, 0x00, 0x87, 0x0f, 0xb1, 0x08, 0xcb, 0x8d, 0xd1, 0x01, 0x1f, 0xf1, + 0xc5, 0x00, 0x92, 0x01, 0x1f, 0xd8, 0xc7, 0x00, 0x90, 0x01, 0x1f, 0xe9, + 0xcb, 0x8d, 0x6e, 0x01, 0x1f, 0xe0, 0x43, 0x00, 0xe5, 0xc0, 0xbf, 0xb8, + 0xc3, 0x32, 0x36, 0x0f, 0xa7, 0x70, 0xc7, 0x00, 0xfa, 0x01, 0x03, 0x49, + 0xca, 0xa1, 0x5c, 0x01, 0x01, 0x60, 0xd1, 0x54, 0xfd, 0x0f, 0xb5, 0x40, + 0xc7, 0x00, 0x8b, 0x01, 0x57, 0x08, 0x42, 0x00, 0x45, 0xc0, 0xbf, 0xc7, + 0xc7, 0xc7, 0x51, 0x01, 0x18, 0x31, 0xcc, 0x8b, 0x1d, 0x0f, 0xb1, 0x18, + 0xc4, 0x00, 0xba, 0x01, 0x0a, 0x61, 0xd1, 0x57, 0x1d, 0x01, 0x01, 0x89, + 0xca, 0xa8, 0x00, 0x01, 0x01, 0x80, 0xc8, 0x12, 0x85, 0x01, 0x31, 0x71, + 0x8a, 0x0f, 0x9a, 0x89, 0xc3, 0x04, 0x20, 0x0f, 0xcc, 0xd0, 0xc4, 0x02, + 0xde, 0x08, 0x5d, 0x59, 0x19, 0xc0, 0xbf, 0xd1, 0xc2, 0x00, 0xc4, 0x08, + 0x5d, 0x68, 0xc8, 0x0d, 0x03, 0x08, 0x5d, 0x78, 0xc3, 0x11, 0xef, 0x08, + 0x5c, 0x81, 0x03, 0x40, 0xbf, 0xdb, 0xc2, 0x00, 0x8e, 0x08, 0x5c, 0x38, + 0xce, 0x73, 0x1a, 0x08, 0x48, 0xf9, 0x47, 0x34, 0x2f, 0xc0, 0xbf, 0xe7, + 0x47, 0x02, 0x0e, 0x40, 0xbf, 0xf4, 0x47, 0x02, 0x0e, 0xc0, 0xc0, 0x57, + 0x15, 0xc0, 0xc0, 0xdd, 0xd0, 0x59, 0x22, 0x05, 0x43, 0xa9, 0x45, 0x01, + 0xc3, 0x40, 0xc0, 0xe7, 0x12, 0xc0, 0xc0, 0xf3, 0x16, 0xc0, 0xc1, 0x03, + 0x05, 0xc0, 0xc1, 0x15, 0x19, 0xc0, 0xc1, 0x29, 0x0a, 0xc0, 0xc1, 0x35, + 0x04, 0xc0, 0xc1, 0x47, 0x15, 0xc0, 0xc1, 0x5a, 0x42, 0x01, 0xc3, 0xc0, + 0xc1, 0x78, 0x42, 0x01, 0x0f, 0xc0, 0xc1, 0x84, 0x42, 0x00, 0x58, 0xc0, + 0xc1, 0x8e, 0x14, 0xc0, 0xc1, 0x9a, 0xc5, 0xdb, 0xd2, 0x08, 0x0f, 0x71, + 0xc4, 0xb4, 0x91, 0x08, 0x0f, 0x99, 0xc7, 0xc9, 0xa4, 0x08, 0x0f, 0xb9, + 0x09, 0xc0, 0xc1, 0xa6, 0xc5, 0x01, 0xa2, 0x08, 0x0e, 0xc9, 0xc5, 0xd3, + 0xe4, 0x08, 0x0f, 0xc0, 0xc6, 0x5b, 0x02, 0x00, 0x04, 0x81, 0xc4, 0x09, + 0x9d, 0x00, 0x00, 0xa1, 0x16, 0xc0, 0xc1, 0xb2, 0xc3, 0x05, 0x14, 0x00, + 0x00, 0x88, 0x03, 0xc0, 0xc1, 0xbe, 0x09, 0xc0, 0xc1, 0xca, 0x15, 0xc0, + 0xc1, 0xd6, 0xc2, 0x00, 0x7a, 0x00, 0x4a, 0x81, 0x4b, 0x6f, 0xc7, 0xc0, + 0xc1, 0xe2, 0x47, 0x02, 0x0e, 0xc0, 0xc2, 0x17, 0xc7, 0xc7, 0xac, 0x05, + 0x47, 0xe9, 0xca, 0x9d, 0x56, 0x05, 0x47, 0xd9, 0xc5, 0x95, 0xf0, 0x05, + 0x47, 0xd1, 0x06, 0x40, 0xc2, 0x8c, 0xc6, 0xd2, 0x77, 0x0f, 0xae, 0xa1, + 0xc8, 0x3f, 0xff, 0x0f, 0xad, 0x28, 0x96, 0x0f, 0x9e, 0xe3, 0x00, 0xc2, + 0x9e, 0x43, 0x00, 0x3d, 0x40, 0xc2, 0xa4, 0x44, 0x05, 0xaa, 0xc0, 0xc2, + 0xb0, 0xca, 0xa6, 0xac, 0x0f, 0x99, 0x98, 0x44, 0x02, 0x9b, 0xc0, 0xc2, + 0xbc, 0x45, 0x00, 0x8c, 0x40, 0xc2, 0xce, 0x46, 0x00, 0x8b, 0x40, 0xc2, + 0xda, 0x46, 0x00, 0x8b, 0x40, 0xc2, 0xec, 0xc5, 0x61, 0xc0, 0x0e, 0x98, + 0x2b, 0x00, 0xc2, 0xfe, 0x0a, 0xc0, 0xc3, 0x04, 0x49, 0xb1, 0xaf, 0xc0, + 0xc3, 0x10, 0x48, 0xbc, 0x1a, 0x40, 0xc3, 0x1c, 0xc4, 0x26, 0x78, 0x00, + 0x01, 0xcb, 0x00, 0xc3, 0x28, 0xc5, 0x06, 0xdb, 0x00, 0x01, 0xc3, 0x00, + 0xc3, 0x2c, 0x15, 0xc0, 0xc3, 0x30, 0x08, 0xc0, 0xc3, 0x42, 0x16, 0xc0, + 0xc3, 0x54, 0xc3, 0x05, 0x14, 0x00, 0x01, 0x8b, 0x00, 0xc3, 0x66, 0xc4, + 0x15, 0xe7, 0x00, 0x01, 0x82, 0x00, 0xc3, 0x6a, 0x06, 0xc0, 0xc3, 0x6e, + 0xd0, 0x5c, 0xe2, 0x08, 0xca, 0x31, 0xca, 0x93, 0x30, 0x08, 0xca, 0x29, + 0x45, 0x00, 0xba, 0xc0, 0xc3, 0x7a, 0x47, 0x30, 0x9f, 0xc0, 0xc3, 0x92, + 0xca, 0xa0, 0x3a, 0x08, 0xca, 0x09, 0xd3, 0x44, 0xee, 0x08, 0xc9, 0xf9, + 0x18, 0xc0, 0xc3, 0x9e, 0x47, 0x02, 0x0e, 0x40, 0xc3, 0xaa, 0x45, 0x29, + 0x90, 0xc0, 0xc4, 0x17, 0xc3, 0x23, 0x1b, 0x01, 0x11, 0x19, 0xc7, 0xc3, + 0xfb, 0x0f, 0xc9, 0xf8, 0x4b, 0x43, 0x54, 0xc0, 0xc4, 0x21, 0xca, 0xa3, + 0x64, 0x01, 0x3b, 0xf9, 0x46, 0x09, 0x97, 0x40, 0xc4, 0x2d, 0xca, 0xa3, + 0x64, 0x01, 0x3c, 0x49, 0x46, 0x09, 0x97, 0x40, 0xc4, 0x4b, 0xc8, 0xbf, + 0x32, 0x01, 0x36, 0x69, 0x49, 0xae, 0x85, 0x40, 0xc4, 0x6f, 0xa3, 0x01, + 0x34, 0x29, 0xa2, 0x01, 0x34, 0x21, 0xa1, 0x01, 0x34, 0x19, 0xa0, 0x01, + 0x34, 0x11, 0x9f, 0x01, 0x34, 0x09, 0x9e, 0x01, 0x34, 0x00, 0xc9, 0xb4, + 0x52, 0x01, 0x18, 0x01, 0x44, 0x4a, 0x60, 0x40, 0xc4, 0x7b, 0xc9, 0xab, + 0x9a, 0x0f, 0xd3, 0xc1, 0xc3, 0x02, 0x0e, 0x0f, 0xa5, 0x38, 0xc5, 0x11, + 0x55, 0x0f, 0xa1, 0x90, 0x48, 0xbf, 0xb2, 0xc0, 0xc4, 0x93, 0x42, 0x00, + 0x97, 0x40, 0xc4, 0xa5, 0xc9, 0x03, 0xde, 0x01, 0x18, 0x21, 0xd7, 0x27, + 0xfe, 0x01, 0x17, 0x89, 0xc4, 0x32, 0xbc, 0x01, 0x15, 0x23, 0x00, 0xc4, + 0xec, 0xc9, 0xb2, 0xea, 0x01, 0x4b, 0xf8, 0xd2, 0x4e, 0x2f, 0x0f, 0xa9, + 0xe9, 0xcc, 0x4e, 0x35, 0x0f, 0xa9, 0xd9, 0x4e, 0x6c, 0xfa, 0x40, 0xc4, + 0xf2, 0x42, 0x3c, 0xd3, 0xc0, 0xc4, 0xfe, 0xc5, 0x02, 0xfd, 0x0f, 0x81, + 0x80, 0xc5, 0x02, 0xfd, 0x0f, 0x83, 0x11, 0x42, 0x3c, 0xd3, 0x40, 0xc5, + 0x28, 0x00, 0xc0, 0xc5, 0x52, 0x42, 0x00, 0xa9, 0xc0, 0xc5, 0xa4, 0x02, + 0x40, 0xc5, 0xb6, 0x05, 0xc0, 0xc5, 0xc8, 0xc5, 0x8a, 0x10, 0x01, 0x4c, + 0xc9, 0x15, 0xc0, 0xc5, 0xd4, 0xc9, 0xad, 0xd1, 0x0f, 0xd7, 0x29, 0xd4, + 0x3a, 0xe8, 0x01, 0x70, 0x41, 0xc6, 0xcc, 0x71, 0x01, 0x70, 0x99, 0xd4, + 0x3d, 0x90, 0x01, 0x70, 0xb0, 0xc8, 0x18, 0x67, 0x01, 0x16, 0x29, 0xc5, + 0x1d, 0x1d, 0x01, 0x11, 0xc1, 0xc4, 0x25, 0xd5, 0x01, 0x10, 0xa1, 0xc5, + 0x00, 0xd4, 0x00, 0x16, 0xc8, 0xd1, 0x50, 0xce, 0x08, 0xc1, 0xd9, 0x45, + 0x00, 0xba, 0xc0, 0xc5, 0xe0, 0x4b, 0x6f, 0xc7, 0xc0, 0xc5, 0xf2, 0x47, + 0x02, 0x0e, 0x40, 0xc6, 0x15, 0xcf, 0x4c, 0x01, 0x01, 0x17, 0x5b, 0x00, + 0xc6, 0x7c, 0xc6, 0x00, 0x4e, 0x01, 0x10, 0x60, 0xc9, 0x23, 0x9f, 0x01, + 0x17, 0x08, 0xc5, 0x2d, 0x7a, 0x01, 0x14, 0x03, 0x00, 0xc6, 0x82, 0xc3, + 0x00, 0x9a, 0x01, 0x15, 0x60, 0xdd, 0x11, 0x6e, 0x01, 0x57, 0x70, 0xc7, + 0x87, 0xc2, 0x0f, 0xad, 0xd9, 0xc4, 0x27, 0xe3, 0x0f, 0xad, 0xca, 0x00, + 0xc6, 0x88, 0x0e, 0xc0, 0xc6, 0x8e, 0x45, 0x08, 0xcb, 0xc0, 0xc6, 0x9a, + 0x49, 0xb2, 0xab, 0xc0, 0xc6, 0xcb, 0x44, 0xaf, 0x82, 0xc0, 0xc6, 0xe9, + 0xd7, 0x27, 0x8b, 0x0d, 0xe3, 0x90, 0x99, 0x0d, 0xe1, 0xc3, 0x00, 0xc6, + 0xf5, 0x96, 0x0d, 0xe0, 0x1b, 0x00, 0xc7, 0x14, 0x95, 0x0d, 0xe0, 0xe3, + 0x00, 0xc7, 0x1c, 0x8c, 0x0d, 0xe0, 0xdb, 0x00, 0xc7, 0x2c, 0x90, 0x0d, + 0xe0, 0xd3, 0x00, 0xc7, 0x30, 0x8f, 0x0d, 0xe0, 0xcb, 0x00, 0xc7, 0x3a, + 0x94, 0x0d, 0xe0, 0x5b, 0x00, 0xc7, 0x3e, 0x8e, 0x0d, 0xe0, 0x33, 0x00, + 0xc7, 0x4e, 0x8a, 0x0d, 0xe0, 0x03, 0x00, 0xc7, 0x58, 0x8d, 0x0d, 0xe0, + 0x2b, 0x00, 0xc7, 0x5c, 0x86, 0x0d, 0xe0, 0x43, 0x00, 0xc7, 0x64, 0x88, + 0x0d, 0xe0, 0x23, 0x00, 0xc7, 0x6e, 0x92, 0x0d, 0xe0, 0x13, 0x00, 0xc7, + 0x74, 0x89, 0x0d, 0xe0, 0x53, 0x00, 0xc7, 0x80, 0x98, 0x0d, 0xe0, 0x4b, + 0x00, 0xc7, 0x86, 0x84, 0x0d, 0xe0, 0x39, 0x9a, 0x0d, 0xe0, 0x0b, 0x00, + 0xc7, 0x8c, 0x91, 0x0d, 0xe2, 0x23, 0x00, 0xc7, 0x90, 0x97, 0x0d, 0xe2, + 0x8b, 0x00, 0xc7, 0xa2, 0x87, 0x0d, 0xe2, 0x3b, 0x00, 0xc7, 0xb0, 0xc2, + 0x0c, 0x43, 0x0d, 0xe2, 0x81, 0x8b, 0x0d, 0xe2, 0x33, 0x00, 0xc7, 0xb8, + 0x83, 0x0d, 0xe2, 0x0a, 0x00, 0xc7, 0xbc, 0xe0, 0x03, 0xa7, 0x01, 0x3c, + 0xf9, 0xc8, 0x7d, 0xa4, 0x07, 0xf2, 0x49, 0xc8, 0x80, 0x2e, 0x07, 0xf2, + 0x68, 0xc6, 0x00, 0x91, 0x0f, 0xa5, 0x41, 0xd0, 0x5e, 0xd2, 0x01, 0x72, + 0x18, 0xc5, 0xa0, 0xc1, 0x0f, 0xaf, 0x09, 0x45, 0x00, 0x8c, 0x40, 0xc7, + 0xc2, 0x00, 0xc0, 0xc7, 0xce, 0x42, 0x00, 0xa9, 0x40, 0xc7, 0xef, 0x51, + 0x53, 0xed, 0xc0, 0xc8, 0x38, 0xc3, 0x4e, 0x13, 0x0f, 0xb5, 0xd8, 0xcf, + 0x25, 0xc4, 0x01, 0x33, 0xe1, 0x4f, 0x68, 0x28, 0x40, 0xc8, 0x40, 0x9c, + 0x0f, 0x8f, 0xf9, 0x9b, 0x0f, 0x8f, 0xf1, 0x9a, 0x0f, 0x8f, 0xe9, 0x99, + 0x0f, 0x8f, 0xe1, 0x98, 0x0f, 0x8f, 0xd9, 0x97, 0x0f, 0x8f, 0xd1, 0x96, + 0x0f, 0x8f, 0xc9, 0x95, 0x0f, 0x8f, 0xc1, 0x94, 0x0f, 0x8f, 0xb9, 0x93, + 0x0f, 0x8f, 0xb1, 0x92, 0x0f, 0x8f, 0xa9, 0x91, 0x0f, 0x8f, 0xa1, 0x90, + 0x0f, 0x8f, 0x99, 0x8f, 0x0f, 0x8f, 0x91, 0x8e, 0x0f, 0x8f, 0x89, 0x8d, + 0x0f, 0x8f, 0x81, 0x8c, 0x0f, 0x8f, 0x79, 0x8b, 0x0f, 0x8f, 0x71, 0x8a, + 0x0f, 0x8f, 0x69, 0x89, 0x0f, 0x8f, 0x61, 0x88, 0x0f, 0x8f, 0x59, 0x87, + 0x0f, 0x8f, 0x51, 0x86, 0x0f, 0x8f, 0x49, 0x85, 0x0f, 0x8f, 0x41, 0x84, + 0x0f, 0x8f, 0x39, 0x83, 0x0f, 0x8f, 0x30, 0xc5, 0x1e, 0x96, 0x05, 0x4a, + 0x99, 0x4a, 0x6f, 0xc8, 0x40, 0xc8, 0x4c, 0x8a, 0x05, 0x4a, 0x91, 0x94, + 0x05, 0x4a, 0x89, 0x90, 0x05, 0x4a, 0x82, 0x00, 0xc8, 0x63, 0x83, 0x05, + 0x4a, 0x31, 0x10, 0xc0, 0xc8, 0x67, 0x0f, 0xc0, 0xc8, 0x79, 0xc2, 0x00, + 0xd0, 0x05, 0x4a, 0x09, 0xc2, 0x01, 0x4a, 0x05, 0x4a, 0x01, 0xc2, 0x19, + 0x2c, 0x05, 0x49, 0xf9, 0xc2, 0x00, 0xdb, 0x05, 0x49, 0xf1, 0xc2, 0x00, + 0x39, 0x05, 0x49, 0xe9, 0xc2, 0x0d, 0xf6, 0x05, 0x49, 0xe1, 0xc2, 0x25, + 0x3b, 0x05, 0x49, 0xd1, 0xc2, 0x00, 0x64, 0x05, 0x49, 0xc9, 0xc2, 0x01, + 0x5d, 0x05, 0x49, 0xb9, 0xc2, 0x00, 0xb0, 0x05, 0x49, 0xb1, 0xc2, 0x0e, + 0x9a, 0x05, 0x49, 0xa1, 0xc2, 0x01, 0x6f, 0x05, 0x49, 0x99, 0xc2, 0x01, + 0x30, 0x05, 0x49, 0x89, 0xc2, 0x02, 0x2b, 0x05, 0x49, 0x80, 0x15, 0xc0, + 0xc8, 0x83, 0x03, 0xc0, 0xc8, 0xa6, 0x11, 0xc0, 0xc8, 0xae, 0x42, 0x00, + 0xd0, 0xc0, 0xc8, 0xc0, 0x4a, 0x07, 0xbb, 0xc0, 0xc8, 0xcc, 0x05, 0xc0, + 0xc8, 0xd8, 0xcb, 0x1a, 0x50, 0x00, 0x01, 0x4b, 0x00, 0xc8, 0xed, 0x08, + 0xc0, 0xc8, 0xf1, 0xe0, 0x05, 0xa7, 0x01, 0x16, 0x51, 0x16, 0xc0, 0xc8, + 0xfb, 0x42, 0x00, 0x58, 0xc0, 0xc9, 0x0f, 0x19, 0xc0, 0xc9, 0x1b, 0x46, + 0x04, 0x8f, 0xc0, 0xc9, 0x27, 0xd7, 0x29, 0x85, 0x01, 0x70, 0x69, 0xd6, + 0x2c, 0xf4, 0x01, 0x70, 0xe8, 0x19, 0xc0, 0xc9, 0x33, 0x16, 0xc0, 0xc9, + 0x42, 0x15, 0xc0, 0xc9, 0x54, 0x0a, 0xc0, 0xc9, 0x60, 0xd0, 0x58, 0x62, + 0x0f, 0xc1, 0xf1, 0xc5, 0x01, 0xa2, 0x01, 0x0c, 0x93, 0x00, 0xc9, 0x6a, + 0xd1, 0x55, 0x30, 0x01, 0x0f, 0xf1, 0x06, 0xc0, 0xc9, 0x74, 0xcd, 0x7c, + 0xa8, 0x01, 0x0e, 0x49, 0x14, 0xc0, 0xc9, 0x80, 0xcf, 0x61, 0x4d, 0x01, + 0x5a, 0x31, 0x04, 0xc0, 0xc9, 0x8c, 0x08, 0xc0, 0xc9, 0x9e, 0xd7, 0x26, + 0xbc, 0x0f, 0xc5, 0x38, 0x49, 0x01, 0xaa, 0xc0, 0xc9, 0xaa, 0x15, 0xc0, + 0xc9, 0xc2, 0xdb, 0x16, 0x1d, 0x01, 0x37, 0x29, 0x48, 0xbc, 0xba, 0xc0, + 0xc9, 0xce, 0x47, 0x55, 0x85, 0x40, 0xc9, 0xe6, 0xc8, 0x07, 0x5f, 0x01, + 0x12, 0xb9, 0xcb, 0x90, 0x9c, 0x01, 0x12, 0xb1, 0xc8, 0x18, 0x67, 0x01, + 0x10, 0xc1, 0xc5, 0x00, 0xd4, 0x00, 0x16, 0xd1, 0xc4, 0xe3, 0x07, 0x0f, + 0xb6, 0xf9, 0xc5, 0x01, 0xaa, 0x01, 0x71, 0x80, 0x45, 0x11, 0x17, 0xc0, + 0xc9, 0xfb, 0x43, 0x11, 0x49, 0xc0, 0xca, 0x07, 0x45, 0x00, 0x49, 0xc0, + 0xca, 0x13, 0x46, 0x00, 0x2c, 0x40, 0xca, 0x1f, 0xce, 0x6b, 0xb8, 0x0f, + 0xae, 0xf1, 0x42, 0x00, 0x2a, 0x40, 0xca, 0x2b, 0xc6, 0xcf, 0xad, 0x0f, + 0xbc, 0x59, 0xc7, 0xc1, 0x00, 0x0f, 0xa6, 0x68, 0xc3, 0xe5, 0x99, 0x0f, + 0x93, 0x29, 0x42, 0x01, 0xe2, 0xc0, 0xca, 0x37, 0xc2, 0x07, 0x49, 0x0f, + 0x93, 0x19, 0xc2, 0x10, 0x37, 0x0f, 0x93, 0x09, 0xc2, 0x11, 0xf6, 0x0f, + 0x93, 0x00, 0xc3, 0x05, 0x14, 0x01, 0x0b, 0x03, 0x00, 0xca, 0x43, 0x08, + 0xc0, 0xca, 0x47, 0x15, 0xc0, 0xca, 0x51, 0xd4, 0x3f, 0x20, 0x01, 0x0c, + 0x19, 0x16, 0xc0, 0xca, 0x60, 0x07, 0xc0, 0xca, 0x73, 0xc4, 0x26, 0x78, + 0x01, 0x0b, 0x40, 0x07, 0xc0, 0xca, 0x7f, 0xcb, 0x92, 0xc2, 0x08, 0x0c, + 0xa8, 0xd3, 0x45, 0xe5, 0x08, 0x0c, 0xa1, 0xcc, 0x83, 0xfd, 0x08, 0x0c, + 0xb1, 0xcd, 0x76, 0xaa, 0x08, 0x0c, 0xc8, 0xc3, 0x63, 0x7e, 0x0f, 0xb4, + 0x19, 0xc5, 0xd8, 0x49, 0x0f, 0xb7, 0x20, 0xc4, 0x07, 0x73, 0x01, 0x38, + 0x5b, 0x00, 0xca, 0x91, 0xc4, 0xb9, 0x3c, 0x01, 0x38, 0x51, 0x0f, 0xc0, + 0xca, 0x97, 0xcc, 0x88, 0xf5, 0x0f, 0xc8, 0xd1, 0xd4, 0x21, 0x3f, 0x01, + 0x70, 0x31, 0xc3, 0x02, 0xa3, 0x01, 0x71, 0x9b, 0x00, 0xca, 0xa9, 0xc6, + 0x0b, 0x09, 0x01, 0x70, 0x59, 0xc5, 0x0a, 0x8a, 0x01, 0x71, 0xa0, 0xc3, + 0x80, 0x5d, 0x0f, 0x98, 0x40, 0xcb, 0x8f, 0x31, 0x01, 0x31, 0x11, 0xc7, + 0xc4, 0x95, 0x0f, 0xa8, 0xc0, 0xc3, 0x63, 0x7e, 0x0f, 0x9e, 0x71, 0xca, + 0xa5, 0xa8, 0x0f, 0x9e, 0x68, 0xca, 0x9d, 0x2e, 0x08, 0x73, 0xf1, 0x44, + 0x05, 0x14, 0x40, 0xca, 0xaf, 0x44, 0x26, 0x78, 0xc0, 0xca, 0xc1, 0x45, + 0x06, 0xdb, 0xc0, 0xca, 0xcd, 0x15, 0xc0, 0xca, 0xd7, 0x08, 0xc0, 0xca, + 0xe3, 0x16, 0xc0, 0xca, 0xeb, 0xcb, 0x0d, 0x00, 0x08, 0x73, 0x90, 0xc4, + 0x26, 0x78, 0x08, 0x73, 0x41, 0xc5, 0x06, 0xdb, 0x08, 0x73, 0x39, 0x15, + 0xc0, 0xca, 0xf9, 0x08, 0xc0, 0xcb, 0x05, 0x16, 0xc0, 0xcb, 0x11, 0xc3, + 0x05, 0x14, 0x08, 0x73, 0x00, 0x47, 0x02, 0x0e, 0xc0, 0xcb, 0x1d, 0xcf, + 0x62, 0x4c, 0x00, 0xb7, 0x81, 0xcf, 0x66, 0x1b, 0x00, 0xb7, 0x79, 0xcd, + 0x78, 0x16, 0x00, 0xb7, 0x71, 0xd1, 0x57, 0x61, 0x00, 0xb7, 0x69, 0xd4, + 0x3b, 0xec, 0x00, 0xb7, 0x61, 0xd2, 0x4c, 0xa3, 0x00, 0xb7, 0x58, 0xc2, + 0x00, 0x29, 0x0f, 0x9e, 0x19, 0xd3, 0x46, 0xc9, 0x0f, 0x9d, 0xe8, 0xa2, + 0x07, 0xf0, 0x73, 0x00, 0xcb, 0xad, 0x9e, 0x07, 0xf0, 0x53, 0x00, 0xcb, + 0xd5, 0x9d, 0x07, 0xf0, 0x4b, 0x00, 0xcb, 0xfd, 0xa6, 0x70, 0x08, 0x13, + 0x00, 0xcc, 0x25, 0xa5, 0x70, 0x08, 0x0b, 0x00, 0xcc, 0x4d, 0xa4, 0x70, + 0x08, 0x03, 0x00, 0xcc, 0x75, 0xa3, 0x07, 0xf0, 0x7b, 0x00, 0xcc, 0x9d, + 0xa1, 0x07, 0xf0, 0x6b, 0x00, 0xcc, 0xc5, 0xa0, 0x07, 0xf0, 0x63, 0x00, + 0xcc, 0xed, 0x9f, 0x07, 0xf0, 0x5a, 0x00, 0xcd, 0x15, 0xa2, 0x70, 0x08, + 0x43, 0x00, 0xcd, 0x3d, 0xa1, 0x70, 0x08, 0x3b, 0x00, 0xcd, 0x59, 0xa0, + 0x70, 0x08, 0x33, 0x00, 0xcd, 0x81, 0x9f, 0x70, 0x08, 0x2b, 0x00, 0xcd, + 0xa9, 0x9e, 0x70, 0x08, 0x23, 0x00, 0xcd, 0xd1, 0x9d, 0x70, 0x08, 0x1b, + 0x00, 0xcd, 0xf9, 0xa6, 0x70, 0x08, 0x61, 0xa5, 0x70, 0x08, 0x59, 0xa4, + 0x70, 0x08, 0x51, 0xa3, 0x70, 0x08, 0x48, 0xa6, 0x70, 0x0a, 0x91, 0xa5, + 0x70, 0x0a, 0x89, 0xa4, 0x70, 0x0a, 0x81, 0xa3, 0x70, 0x0a, 0x79, 0xa2, + 0x70, 0x0a, 0x71, 0xa1, 0x70, 0x0a, 0x69, 0xa0, 0x70, 0x0a, 0x61, 0x9f, + 0x70, 0x0a, 0x59, 0x9e, 0x70, 0x0a, 0x51, 0x9d, 0x70, 0x0a, 0x48, 0xa6, + 0x70, 0x0a, 0x41, 0xa5, 0x70, 0x0a, 0x39, 0xa4, 0x70, 0x0a, 0x31, 0xa3, + 0x70, 0x0a, 0x29, 0xa2, 0x70, 0x0a, 0x21, 0xa1, 0x70, 0x0a, 0x19, 0xa0, + 0x70, 0x0a, 0x11, 0x9f, 0x70, 0x0a, 0x09, 0x9e, 0x70, 0x0a, 0x01, 0x9d, + 0x70, 0x09, 0xf8, 0xa6, 0x70, 0x09, 0xf1, 0xa5, 0x70, 0x09, 0xe9, 0xa4, + 0x70, 0x09, 0xe1, 0xa3, 0x70, 0x09, 0xd9, 0xa2, 0x70, 0x09, 0xd1, 0xa1, + 0x70, 0x09, 0xc9, 0xa0, 0x70, 0x09, 0xc1, 0x9f, 0x70, 0x09, 0xb9, 0x9e, + 0x70, 0x09, 0xb1, 0x9d, 0x70, 0x09, 0xa8, 0xa6, 0x70, 0x09, 0xa1, 0xa5, + 0x70, 0x09, 0x99, 0xa4, 0x70, 0x09, 0x91, 0xa3, 0x70, 0x09, 0x89, 0xa2, + 0x70, 0x09, 0x81, 0xa1, 0x70, 0x09, 0x79, 0xa0, 0x70, 0x09, 0x71, 0x9f, + 0x70, 0x09, 0x69, 0x9e, 0x70, 0x09, 0x61, 0x9d, 0x70, 0x09, 0x58, 0xa6, + 0x70, 0x09, 0x51, 0xa5, 0x70, 0x09, 0x49, 0xa4, 0x70, 0x09, 0x41, 0xa3, + 0x70, 0x09, 0x39, 0xa2, 0x70, 0x09, 0x31, 0xa1, 0x70, 0x09, 0x29, 0xa0, + 0x70, 0x09, 0x21, 0x9f, 0x70, 0x09, 0x19, 0x9e, 0x70, 0x09, 0x11, 0x9d, + 0x70, 0x09, 0x08, 0xa6, 0x70, 0x09, 0x01, 0xa5, 0x70, 0x08, 0xf9, 0xa4, + 0x70, 0x08, 0xf1, 0xa3, 0x70, 0x08, 0xe9, 0xa2, 0x70, 0x08, 0xe1, 0xa1, + 0x70, 0x08, 0xd9, 0xa0, 0x70, 0x08, 0xd1, 0x9f, 0x70, 0x08, 0xc9, 0x9e, + 0x70, 0x08, 0xc1, 0x9d, 0x70, 0x08, 0xb8, 0xa6, 0x70, 0x08, 0xb1, 0xa5, + 0x70, 0x08, 0xa9, 0xa4, 0x70, 0x08, 0xa1, 0xa3, 0x70, 0x08, 0x99, 0xa2, + 0x70, 0x08, 0x91, 0xa1, 0x70, 0x08, 0x89, 0xa0, 0x70, 0x08, 0x81, 0x9f, + 0x70, 0x08, 0x79, 0x9e, 0x70, 0x08, 0x71, 0x9d, 0x70, 0x08, 0x68, 0x47, + 0x14, 0x8b, 0xc0, 0xce, 0x21, 0x45, 0x10, 0x7a, 0x40, 0xce, 0x90, 0xc4, + 0x15, 0xe7, 0x05, 0x31, 0x01, 0xc3, 0x05, 0x14, 0x05, 0x31, 0x09, 0x16, + 0xc0, 0xce, 0xb2, 0x08, 0xc0, 0xce, 0xbe, 0x15, 0xc0, 0xce, 0xca, 0xc5, + 0x06, 0xdb, 0x05, 0x31, 0x41, 0xc4, 0x26, 0x78, 0x05, 0x31, 0x48, 0x51, + 0x54, 0x86, 0xc0, 0xce, 0xd6, 0x44, 0x05, 0x8d, 0xc0, 0xce, 0xee, 0xd5, + 0x37, 0x2e, 0x01, 0x35, 0x41, 0xc4, 0x02, 0x6d, 0x00, 0x03, 0xe3, 0x00, + 0xcf, 0x06, 0xc8, 0x22, 0x83, 0x01, 0x17, 0x71, 0xc9, 0x3b, 0x79, 0x01, + 0x02, 0xf1, 0x16, 0xc0, 0xcf, 0x0a, 0xcb, 0x93, 0xb4, 0x01, 0x4c, 0xd1, + 0xc8, 0xb8, 0x92, 0x01, 0x71, 0xe9, 0x4c, 0x8a, 0xe1, 0xc0, 0xcf, 0x1c, + 0xda, 0x1c, 0x86, 0x01, 0x81, 0xd8, 0x46, 0x11, 0x39, 0xc0, 0xcf, 0x2e, + 0xd0, 0x58, 0xc2, 0x0f, 0xbd, 0x29, 0x45, 0xda, 0xab, 0x40, 0xcf, 0x50, + 0xdc, 0x14, 0x31, 0x00, 0xe7, 0xd1, 0x03, 0xc0, 0xcf, 0x5c, 0xcb, 0x93, + 0xf6, 0x00, 0xe7, 0xb1, 0xcb, 0x8f, 0xe1, 0x00, 0xe7, 0xa9, 0x14, 0xc0, + 0xcf, 0x6e, 0xcd, 0x2e, 0xcb, 0x00, 0xe7, 0x79, 0xd6, 0x2e, 0xc2, 0x00, + 0xe7, 0x71, 0xc6, 0xd3, 0x0d, 0x00, 0xe7, 0x69, 0x48, 0x5f, 0x6a, 0xc0, + 0xcf, 0x80, 0xda, 0x19, 0x2c, 0x00, 0xe6, 0xa1, 0xc9, 0xae, 0xa9, 0x00, + 0xe6, 0x98, 0x42, 0x00, 0x58, 0xc0, 0xcf, 0x98, 0x42, 0x00, 0x2c, 0xc0, + 0xcf, 0xa4, 0x47, 0xc7, 0x7b, 0xc0, 0xcf, 0xb0, 0xe0, 0x04, 0xa7, 0x00, + 0xe7, 0x09, 0x16, 0xc0, 0xcf, 0xbc, 0x42, 0x02, 0x2b, 0xc0, 0xcf, 0xce, + 0x4b, 0x19, 0x2c, 0xc0, 0xcf, 0xda, 0xc7, 0xc9, 0x03, 0x00, 0xe6, 0x91, + 0xc5, 0xdb, 0xe1, 0x00, 0xe6, 0x88, 0xc4, 0xe3, 0xa7, 0x0b, 0x7f, 0x89, + 0xc2, 0x00, 0x64, 0x0b, 0x7f, 0x80, 0xc6, 0xa0, 0xd4, 0x0f, 0xa7, 0xc9, + 0xc4, 0xe0, 0x8b, 0x0f, 0x9d, 0x70, 0x83, 0x08, 0x2b, 0x81, 0x04, 0xc0, + 0xcf, 0xef, 0x05, 0xc0, 0xcf, 0xf9, 0x06, 0xc0, 0xd0, 0x03, 0x87, 0x08, + 0x2b, 0xc3, 0x00, 0xd0, 0x0d, 0xc2, 0x14, 0xda, 0x08, 0x2b, 0xc9, 0xc2, + 0x01, 0x30, 0x08, 0x2b, 0xd1, 0x0a, 0xc0, 0xd0, 0x11, 0x8b, 0x08, 0x2b, + 0xf3, 0x00, 0xd0, 0x1b, 0xc2, 0x1c, 0x52, 0x08, 0x2c, 0x01, 0x0e, 0xc0, + 0xd0, 0x21, 0xc2, 0x00, 0x4e, 0x08, 0x2c, 0x21, 0x10, 0xc0, 0xd0, 0x2b, + 0x91, 0x08, 0x2c, 0x39, 0xc2, 0x00, 0x67, 0x08, 0x2c, 0x41, 0xc2, 0x0f, + 0x9a, 0x08, 0x2c, 0x49, 0x15, 0xc0, 0xd0, 0x35, 0x16, 0xc0, 0xd0, 0x3f, + 0x97, 0x08, 0x2c, 0x81, 0x9b, 0x08, 0x2c, 0xa1, 0xc2, 0x0a, 0xe2, 0x08, + 0x2c, 0xa9, 0xc2, 0x02, 0x2b, 0x08, 0x2c, 0x09, 0xc2, 0x01, 0x19, 0x08, + 0x2c, 0x51, 0xc2, 0x00, 0x5f, 0x08, 0x2c, 0x89, 0xc2, 0x24, 0xe2, 0x08, + 0x2c, 0x90, 0x83, 0x08, 0x2c, 0xb9, 0x04, 0xc0, 0xd0, 0x49, 0x05, 0xc0, + 0xd0, 0x53, 0x06, 0xc0, 0xd0, 0x5d, 0x87, 0x08, 0x2c, 0xfb, 0x00, 0xd0, + 0x67, 0xc2, 0x14, 0xda, 0x08, 0x2d, 0x01, 0xc2, 0x01, 0x30, 0x08, 0x2d, + 0x09, 0x0a, 0xc0, 0xd0, 0x6b, 0x8b, 0x08, 0x2d, 0x2b, 0x00, 0xd0, 0x75, + 0xc2, 0x1c, 0x52, 0x08, 0x2d, 0x39, 0xc2, 0x02, 0x2b, 0x08, 0x2d, 0x41, + 0x0e, 0xc0, 0xd0, 0x7b, 0xc2, 0x00, 0x4e, 0x08, 0x2d, 0x59, 0x10, 0xc0, + 0xd0, 0x85, 0x91, 0x08, 0x2d, 0x71, 0xc2, 0x00, 0x67, 0x08, 0x2d, 0x79, + 0xc2, 0x0f, 0x9a, 0x08, 0x2d, 0x81, 0xc2, 0x01, 0x19, 0x08, 0x2d, 0x89, + 0x15, 0xc0, 0xd0, 0x8f, 0x16, 0xc0, 0xd0, 0x99, 0x97, 0x08, 0x2d, 0xb9, + 0xc2, 0x00, 0x5f, 0x08, 0x2d, 0xc1, 0xc2, 0x24, 0xe2, 0x08, 0x2d, 0xc9, + 0x9b, 0x08, 0x2d, 0xd9, 0xc2, 0x0a, 0xe2, 0x08, 0x2d, 0xe0, 0x44, 0x0d, + 0x14, 0xc0, 0xd0, 0xa3, 0xca, 0x9c, 0x02, 0x01, 0x0a, 0xc0, 0x45, 0x02, + 0xde, 0xc0, 0xd0, 0xaf, 0x43, 0x02, 0xa0, 0x40, 0xd0, 0xc1, 0xc6, 0x06, + 0xdb, 0x01, 0x0a, 0xd9, 0x15, 0xc0, 0xd0, 0xcd, 0xc5, 0x9c, 0x06, 0x01, + 0x0a, 0xa9, 0x16, 0xc0, 0xd0, 0xd9, 0xc5, 0xd9, 0x1b, 0x01, 0x0a, 0x89, + 0xc7, 0x08, 0x79, 0x00, 0x05, 0xe1, 0xc4, 0x01, 0xce, 0x00, 0x05, 0xe8, + 0x42, 0x00, 0xb4, 0xc0, 0xd0, 0xe5, 0x0e, 0xc0, 0xd0, 0xf1, 0x05, 0xc0, + 0xd1, 0x01, 0x14, 0xc0, 0xd1, 0x0b, 0x42, 0x00, 0xe3, 0xc0, 0xd1, 0x17, + 0x07, 0xc0, 0xd1, 0x23, 0x15, 0xc0, 0xd1, 0x2f, 0x06, 0xc0, 0xd1, 0x41, + 0xc9, 0x11, 0xf6, 0x70, 0x01, 0x71, 0xcc, 0x89, 0xcd, 0x70, 0x01, 0x69, + 0x12, 0xc0, 0xd1, 0x4d, 0x03, 0xc0, 0xd1, 0x59, 0xc5, 0x1e, 0xc8, 0x70, + 0x03, 0xf1, 0xcd, 0x36, 0x86, 0x70, 0x03, 0xe1, 0xcb, 0x97, 0x9d, 0x70, + 0x01, 0x18, 0x4b, 0x6f, 0xc7, 0xc0, 0xd1, 0x6b, 0x47, 0x02, 0x0e, 0x40, + 0xd1, 0x73, 0x47, 0x02, 0x0e, 0xc0, 0xd1, 0xc5, 0x45, 0x00, 0xba, 0xc0, + 0xd2, 0x26, 0x4b, 0x6f, 0xc7, 0x40, 0xd2, 0x32, 0x43, 0x02, 0xab, 0xc0, + 0xd2, 0x3a, 0x43, 0x44, 0xc7, 0xc0, 0xd2, 0x46, 0xc5, 0x55, 0xd8, 0x0f, + 0x9a, 0x50, 0xd7, 0x27, 0xd0, 0x08, 0xff, 0xf9, 0x15, 0xc0, 0xd2, 0x52, + 0xd2, 0x4c, 0xc7, 0x08, 0xff, 0x71, 0x16, 0xc0, 0xd2, 0x6a, 0x03, 0xc0, + 0xd2, 0x76, 0x05, 0xc0, 0xd2, 0x88, 0x0e, 0xc0, 0xd2, 0x94, 0x06, 0xc0, + 0xd2, 0xa0, 0xd4, 0x39, 0xe4, 0x08, 0xff, 0x21, 0x49, 0x53, 0xa9, 0xc0, + 0xd2, 0xb8, 0x4b, 0x6f, 0xc7, 0xc0, 0xd2, 0xca, 0xc2, 0x00, 0x7a, 0x00, + 0x5e, 0x81, 0x47, 0x34, 0x2f, 0xc0, 0xd2, 0xea, 0xca, 0xa3, 0xdc, 0x00, + 0x5f, 0xa1, 0xc9, 0xab, 0xe2, 0x00, 0x5f, 0xa9, 0xca, 0x76, 0x52, 0x00, + 0x5f, 0xc8, 0x46, 0x09, 0x97, 0xc0, 0xd2, 0xfc, 0xd1, 0x50, 0xce, 0x08, + 0xb5, 0xc9, 0x47, 0x02, 0x0e, 0xc0, 0xd3, 0x20, 0x45, 0x00, 0xba, 0xc0, + 0xd3, 0x87, 0x4b, 0x6f, 0xc7, 0x40, 0xd3, 0x99, 0x45, 0x00, 0xba, 0xc0, + 0xd3, 0xb3, 0x4b, 0x92, 0x80, 0xc0, 0xd3, 0xe6, 0x4b, 0x8c, 0xbe, 0xc0, + 0xd4, 0x0a, 0x42, 0x00, 0x99, 0xc0, 0xd4, 0x2e, 0x4b, 0x6f, 0xc7, 0xc0, + 0xd4, 0x3a, 0x47, 0x02, 0x0e, 0x40, 0xd4, 0x64, 0x16, 0xc0, 0xd4, 0xb2, + 0x83, 0x00, 0xcb, 0x1b, 0x00, 0xd4, 0xc6, 0x87, 0x00, 0xcb, 0x5b, 0x00, + 0xd4, 0xd0, 0x97, 0x00, 0xcb, 0x3b, 0x00, 0xd4, 0xd8, 0x91, 0x00, 0xcb, + 0x4b, 0x00, 0xd4, 0xdc, 0x8b, 0x00, 0xcb, 0x21, 0x10, 0xc0, 0xd4, 0xe0, + 0x0d, 0xc0, 0xd4, 0xea, 0xc2, 0x0f, 0x9a, 0x00, 0xca, 0xf9, 0xc2, 0x00, + 0xd0, 0x00, 0xca, 0xf1, 0xc2, 0x02, 0x41, 0x00, 0xca, 0xe9, 0xc2, 0x00, + 0x87, 0x00, 0xca, 0xe1, 0xc2, 0x01, 0xc3, 0x00, 0xca, 0xd9, 0x12, 0xc0, + 0xd4, 0xf4, 0xc2, 0x00, 0xdb, 0x00, 0xca, 0xc1, 0xc2, 0x19, 0x2c, 0x00, + 0xca, 0xa9, 0xc2, 0x0d, 0xf6, 0x00, 0xca, 0xa1, 0xc2, 0x8d, 0x8f, 0x00, + 0xca, 0x88, 0x47, 0x10, 0x78, 0xc0, 0xd4, 0xfe, 0x49, 0xb2, 0x63, 0xc0, + 0xd5, 0x16, 0x46, 0x34, 0x6f, 0xc0, 0xd5, 0x2e, 0x45, 0xdb, 0x96, 0xc0, + 0xd5, 0x48, 0x47, 0x02, 0x0e, 0x40, 0xd5, 0x54, 0xc2, 0x17, 0x28, 0x0f, + 0xcc, 0x19, 0xcd, 0x77, 0xbb, 0x01, 0x05, 0xd0, 0x46, 0x04, 0x8f, 0xc0, + 0xd5, 0x60, 0xd1, 0x50, 0x79, 0x01, 0x36, 0x49, 0x42, 0x00, 0x10, 0xc0, + 0xd5, 0x6c, 0x06, 0xc0, 0xd5, 0x78, 0x15, 0xc0, 0xd5, 0x84, 0x03, 0xc0, + 0xd5, 0x9c, 0x05, 0xc0, 0xd5, 0xa8, 0xd7, 0x29, 0xb3, 0x01, 0x09, 0x49, + 0xcc, 0x8a, 0xd5, 0x0f, 0xac, 0x78, 0xd2, 0x22, 0x49, 0x0f, 0xbe, 0x11, + 0x06, 0xc0, 0xd5, 0xb4, 0x0e, 0xc0, 0xd5, 0xc0, 0x14, 0xc0, 0xd5, 0xcc, + 0xce, 0x6f, 0xb6, 0x0f, 0xaf, 0x59, 0xcc, 0x86, 0xfd, 0x0f, 0xad, 0x89, + 0xd3, 0x3f, 0xf5, 0x0f, 0xad, 0x39, 0xd8, 0x23, 0x03, 0x01, 0x53, 0xb0, + 0x42, 0x00, 0xa9, 0xc0, 0xd5, 0xd8, 0xcc, 0x79, 0x42, 0x01, 0x00, 0x21, + 0xc7, 0xbc, 0x33, 0x01, 0x71, 0xd8, 0x00, 0xc0, 0xd5, 0xf0, 0xc9, 0xa1, + 0x3f, 0x0f, 0xc8, 0xa0, 0xcf, 0x69, 0xf9, 0x01, 0x36, 0x41, 0xc5, 0xdc, + 0x6d, 0x01, 0x30, 0x40, 0xc9, 0xb2, 0xfc, 0x0f, 0xa2, 0x71, 0xc7, 0xc4, + 0x6b, 0x0f, 0xa2, 0x68, 0xc4, 0x5e, 0x73, 0x01, 0x11, 0xa1, 0x00, 0x40, + 0xd5, 0xfa, 0xc5, 0x9b, 0x3f, 0x0f, 0x99, 0x09, 0xc7, 0xc2, 0x49, 0x01, + 0x4f, 0x38, 0x11, 0xc0, 0xd6, 0x06, 0xc7, 0xc0, 0x27, 0x00, 0x3d, 0x51, + 0x07, 0xc0, 0xd6, 0x18, 0xc7, 0xc0, 0x5f, 0x00, 0x3d, 0x41, 0x03, 0xc0, + 0xd6, 0x2a, 0x47, 0x02, 0x0e, 0xc0, 0xd6, 0x36, 0xc5, 0xdb, 0xfa, 0x00, + 0x3d, 0x80, 0x05, 0xc0, 0xd6, 0xa0, 0x46, 0x09, 0x97, 0x40, 0xd6, 0xac, + 0x43, 0x01, 0xd0, 0xc0, 0xd6, 0xd0, 0x96, 0x0f, 0x9d, 0x48, 0x05, 0xc0, + 0xd6, 0xee, 0xcc, 0x88, 0x65, 0x01, 0x71, 0x18, 0x05, 0xc0, 0xd6, 0xfa, + 0xcc, 0x88, 0x65, 0x01, 0x71, 0x10, 0xd3, 0x05, 0xf4, 0x01, 0x49, 0xd3, + 0x00, 0xd7, 0x06, 0xda, 0x1d, 0x08, 0x01, 0x49, 0xe0, 0xd0, 0x5e, 0xb2, + 0x0f, 0x15, 0x71, 0x47, 0x02, 0x0e, 0x40, 0xd7, 0x0c, 0x42, 0xe6, 0x8f, + 0xc0, 0xd7, 0x85, 0x23, 0xc0, 0xd7, 0x91, 0x22, 0xc0, 0xd7, 0xa3, 0x24, + 0x40, 0xd7, 0xaf, 0xc5, 0xb4, 0xb0, 0x0f, 0xd5, 0x28, 0xc4, 0x63, 0x7d, + 0x0f, 0xb4, 0x58, 0xc5, 0xdd, 0x21, 0x0f, 0xad, 0x91, 0xc3, 0x05, 0xb1, + 0x0f, 0xb4, 0xe0, 0xd3, 0x44, 0x56, 0x01, 0x56, 0xd9, 0xc5, 0xd7, 0x36, + 0x01, 0x5e, 0xb8, 0x42, 0x00, 0x49, 0xc0, 0xd7, 0xbb, 0x45, 0x05, 0xef, + 0x40, 0xd7, 0xc7, 0xc5, 0x61, 0xc0, 0x01, 0x31, 0xb9, 0xc8, 0x2d, 0xb2, + 0x01, 0x31, 0xb1, 0x19, 0xc0, 0xd7, 0xd9, 0xc7, 0x71, 0xa7, 0x01, 0x31, + 0x99, 0xc4, 0x83, 0x39, 0x01, 0x31, 0x91, 0xc4, 0x2a, 0x95, 0x01, 0x31, + 0x89, 0xc6, 0x73, 0xca, 0x01, 0x31, 0x80, 0x4d, 0x18, 0x5a, 0xc0, 0xd7, + 0xe5, 0xc5, 0x1e, 0xc8, 0x01, 0x12, 0x59, 0xc8, 0x1e, 0x3f, 0x01, 0x11, + 0x69, 0x12, 0xc0, 0xd7, 0xfd, 0x54, 0x3b, 0xb0, 0xc0, 0xd8, 0x09, 0xce, + 0x6f, 0xe0, 0x01, 0x57, 0xb1, 0x47, 0xc4, 0x17, 0xc0, 0xd8, 0x15, 0xd7, + 0x27, 0x5d, 0x01, 0x57, 0xd9, 0xc6, 0xce, 0x21, 0x01, 0x72, 0x58, 0xd0, + 0x59, 0x62, 0x01, 0x5e, 0xf8, 0xc2, 0x38, 0x5e, 0x0f, 0x9e, 0x31, 0x45, + 0x05, 0x88, 0x40, 0xd8, 0x21, 0xc5, 0xd4, 0x70, 0x0f, 0xb4, 0x70, 0x11, + 0xc0, 0xd8, 0x2d, 0xc6, 0xcb, 0x99, 0x0e, 0x9a, 0x81, 0xc5, 0x07, 0xeb, + 0x0e, 0x99, 0xb1, 0x43, 0x11, 0xf7, 0x40, 0xd8, 0x39, 0x03, 0xc0, 0xd8, + 0x45, 0xc5, 0xd9, 0x84, 0x0e, 0x99, 0x28, 0x0b, 0xc0, 0xd8, 0x51, 0xc8, + 0x35, 0xc9, 0x0e, 0x9a, 0x41, 0x07, 0xc0, 0xd8, 0x61, 0xc4, 0xe4, 0x4b, + 0x0e, 0x9a, 0x19, 0xc5, 0xd7, 0x45, 0x0e, 0x99, 0x00, 0xcb, 0x9a, 0x1b, + 0x0e, 0x9a, 0x99, 0xc9, 0xae, 0x73, 0x0e, 0x98, 0x68, 0x11, 0xc0, 0xd8, + 0x73, 0x43, 0x07, 0xa2, 0xc0, 0xd8, 0x7d, 0xc5, 0xb7, 0x35, 0x0e, 0x99, + 0x09, 0xc5, 0x04, 0xe2, 0x0e, 0x98, 0x30, 0xca, 0xa1, 0x8e, 0x0e, 0x9a, + 0x89, 0xcb, 0x96, 0xe2, 0x0e, 0x9a, 0x09, 0xc6, 0xd1, 0xe7, 0x0e, 0x98, + 0xc9, 0xc5, 0x39, 0x0b, 0x0e, 0x98, 0x60, 0xc7, 0xc3, 0x68, 0x0e, 0x9a, + 0x69, 0xcb, 0x4c, 0x26, 0x0e, 0x98, 0xb0, 0x16, 0xc0, 0xd8, 0x87, 0xc8, + 0xb7, 0x62, 0x0e, 0x9a, 0x59, 0xc6, 0x83, 0x26, 0x0e, 0x9a, 0x28, 0xc9, + 0xa8, 0xb8, 0x0e, 0x9a, 0x51, 0xcc, 0x81, 0x51, 0x0e, 0x9a, 0x11, 0xc7, + 0x2d, 0x56, 0x0e, 0x99, 0xd1, 0x10, 0xc0, 0xd8, 0x91, 0xc3, 0x2c, 0xff, + 0x0e, 0x98, 0xe0, 0xc3, 0x13, 0x69, 0x0e, 0x9a, 0x31, 0xc6, 0xcc, 0x17, + 0x0e, 0x98, 0x90, 0xc3, 0x1c, 0xe6, 0x0e, 0x9a, 0x21, 0xc5, 0x20, 0xd8, + 0x0e, 0x98, 0xb8, 0xc6, 0xcb, 0x2d, 0x0e, 0x9a, 0x01, 0xc6, 0x14, 0xc5, + 0x0e, 0x99, 0xc9, 0xc4, 0x7c, 0xaa, 0x0e, 0x98, 0x40, 0xc8, 0x55, 0xc9, + 0x0e, 0x99, 0x43, 0x00, 0xd8, 0xa3, 0xca, 0xa7, 0xd8, 0x0e, 0x99, 0xf1, + 0xc8, 0xbd, 0x6a, 0x0e, 0x99, 0x91, 0xcc, 0x8b, 0x7d, 0x0e, 0x99, 0x78, + 0xc5, 0xdc, 0x5e, 0x0e, 0x99, 0xa9, 0x07, 0x40, 0xd8, 0xa9, 0x03, 0xc0, + 0xd8, 0xb9, 0xc5, 0xdd, 0x58, 0x0e, 0x99, 0x51, 0xca, 0xa2, 0x06, 0x0e, + 0x98, 0x98, 0xc6, 0xcf, 0x53, 0x0e, 0x99, 0x39, 0xcc, 0x84, 0xc9, 0x0e, + 0x98, 0x50, 0xce, 0x70, 0x7a, 0x0e, 0x99, 0x19, 0xcc, 0x88, 0x11, 0x0e, + 0x98, 0x71, 0xc6, 0x69, 0x74, 0x0e, 0x98, 0x48, 0x45, 0x0a, 0xe9, 0xc0, + 0xd8, 0xc5, 0xcd, 0x79, 0x82, 0x0f, 0xa6, 0x30, 0x46, 0x36, 0xb7, 0xc0, + 0xd8, 0xd1, 0xc5, 0xbc, 0xed, 0x0f, 0xa9, 0x69, 0xc6, 0x30, 0xf3, 0x0f, + 0xa7, 0xd0, 0x45, 0x00, 0xba, 0xc0, 0xd8, 0xe9, 0x42, 0x00, 0x49, 0xc0, + 0xd9, 0x09, 0x4b, 0x6f, 0xc7, 0xc0, 0xd9, 0x15, 0xce, 0x74, 0xcc, 0x00, + 0x62, 0xb1, 0x46, 0x09, 0x97, 0xc0, 0xd9, 0x3b, 0x4f, 0x63, 0xa5, 0x40, + 0xd9, 0x5f, 0xc5, 0x11, 0x55, 0x0f, 0xa1, 0x78, 0xd0, 0x5d, 0x52, 0x01, + 0x4e, 0xa9, 0xcf, 0x66, 0x66, 0x01, 0x4e, 0xa0, 0xc8, 0x18, 0x67, 0x01, + 0x11, 0xe3, 0x00, 0xd9, 0x6f, 0x45, 0x00, 0x8c, 0x40, 0xd9, 0x73, 0x46, + 0x09, 0x97, 0xc0, 0xd9, 0x7f, 0xc2, 0x00, 0x7a, 0x08, 0xa6, 0x39, 0x03, + 0xc0, 0xd9, 0xa3, 0xc5, 0xd5, 0xce, 0x08, 0xa6, 0x29, 0x45, 0x00, 0xba, + 0xc0, 0xd9, 0xaf, 0x4b, 0x6f, 0xc7, 0xc0, 0xd9, 0xc5, 0x47, 0x02, 0x0e, + 0x40, 0xd9, 0xeb, 0xc2, 0x00, 0x3d, 0x01, 0x02, 0x51, 0xca, 0x9e, 0x0a, + 0x01, 0x72, 0x90, 0xe0, 0x05, 0x07, 0x08, 0x59, 0xd0, 0x1b, 0xc0, 0xda, + 0x52, 0x44, 0x00, 0xbb, 0xc0, 0xda, 0x5e, 0x49, 0x5c, 0xf2, 0x40, 0xda, + 0x8a, 0x09, 0xc0, 0xda, 0x96, 0x42, 0x00, 0x74, 0xc0, 0xda, 0xa2, 0x05, + 0xc0, 0xda, 0xae, 0xd5, 0x32, 0x81, 0x00, 0x78, 0x39, 0x15, 0xc0, 0xda, + 0xc0, 0x04, 0xc0, 0xda, 0xcc, 0xd5, 0x32, 0xff, 0x00, 0x78, 0x61, 0x10, + 0xc0, 0xda, 0xd6, 0x16, 0xc0, 0xda, 0xe2, 0x14, 0xc0, 0xda, 0xec, 0x4c, + 0x85, 0x65, 0xc0, 0xda, 0xf8, 0xc7, 0xc3, 0xa7, 0x00, 0x7c, 0x21, 0xc6, + 0xcb, 0x09, 0x00, 0x7c, 0x29, 0xd6, 0x2d, 0xfc, 0x00, 0x7e, 0x89, 0xd3, + 0x3f, 0xbc, 0x00, 0x7e, 0xc8, 0x4d, 0x79, 0x27, 0xc0, 0xdb, 0x04, 0x46, + 0x02, 0x0f, 0x40, 0xdb, 0x10, 0x15, 0xc0, 0xdb, 0x70, 0xc9, 0xaf, 0xf6, + 0x00, 0x78, 0xc0, 0xc4, 0x15, 0xe7, 0x00, 0x79, 0x01, 0xc3, 0x05, 0x14, + 0x00, 0x79, 0x09, 0x16, 0xc0, 0xdb, 0x7c, 0x08, 0xc0, 0xdb, 0x88, 0x15, + 0xc0, 0xdb, 0x94, 0xc5, 0x06, 0xdb, 0x00, 0x79, 0x41, 0xc4, 0x26, 0x78, + 0x00, 0x79, 0x49, 0x45, 0x01, 0xce, 0x40, 0xdb, 0xa0, 0xc2, 0x04, 0xc6, + 0x00, 0x7b, 0x89, 0x8b, 0x00, 0x7b, 0x93, 0x00, 0xdb, 0xc4, 0x97, 0x00, + 0x7b, 0xa3, 0x00, 0xdb, 0xc8, 0x48, 0xb2, 0x2d, 0xc0, 0xdb, 0xcc, 0x87, + 0x00, 0x7b, 0xd3, 0x00, 0xdb, 0xda, 0x91, 0x00, 0x7b, 0xe3, 0x00, 0xdb, + 0xde, 0xca, 0x9d, 0xe2, 0x00, 0x7c, 0x02, 0x00, 0xdb, 0xe2, 0xcd, 0x7c, + 0xc2, 0x00, 0x7d, 0xf8, 0xca, 0x9a, 0xf4, 0x00, 0x7e, 0x01, 0xca, 0xa5, + 0x9e, 0x00, 0x7e, 0x09, 0xc9, 0xb2, 0x5a, 0x00, 0x7e, 0x11, 0xca, 0xa3, + 0x96, 0x00, 0x7e, 0x18, 0x1b, 0xc0, 0xdb, 0xe6, 0x51, 0x54, 0xec, 0xc0, + 0xdc, 0x00, 0x16, 0xc0, 0xdc, 0x08, 0x03, 0x40, 0xdc, 0x14, 0xe0, 0x02, + 0x07, 0x01, 0x6b, 0x78, 0x43, 0x02, 0xa3, 0xc0, 0xdc, 0x20, 0xdc, 0x13, + 0xa5, 0x01, 0x02, 0x89, 0xce, 0x6f, 0xb6, 0x0f, 0xaf, 0x51, 0xcc, 0x86, + 0xfd, 0x0f, 0xad, 0x81, 0xc6, 0x78, 0x78, 0x0f, 0xa4, 0xa9, 0x55, 0x33, + 0x7d, 0xc0, 0xdc, 0x2a, 0x48, 0x19, 0xb9, 0xc0, 0xdc, 0x36, 0xce, 0x71, + 0xd8, 0x01, 0x4e, 0x49, 0xd8, 0x23, 0x03, 0x01, 0x53, 0xa9, 0xd1, 0x40, + 0xee, 0x0f, 0xa3, 0x61, 0xd3, 0x40, 0xec, 0x0f, 0xa3, 0x68, 0xd7, 0x26, + 0xa5, 0x0f, 0xc5, 0x81, 0x58, 0x21, 0x6b, 0xc0, 0xdc, 0x42, 0x57, 0x2b, + 0x23, 0x40, 0xdc, 0x54, 0x0e, 0xc0, 0xdc, 0x60, 0x42, 0x01, 0xc3, 0xc0, + 0xdc, 0x70, 0x06, 0xc0, 0xdc, 0x82, 0x14, 0xc0, 0xdc, 0x98, 0xc5, 0x4d, + 0x40, 0x00, 0x32, 0x83, 0x00, 0xdc, 0xae, 0x08, 0xc0, 0xdc, 0xbb, 0x15, + 0xc0, 0xdc, 0xd6, 0x45, 0x05, 0x75, 0xc0, 0xdd, 0x01, 0x16, 0xc0, 0xdd, + 0x13, 0x05, 0xc0, 0xdd, 0x2f, 0x42, 0x00, 0xd0, 0xc0, 0xdd, 0x3b, 0x12, + 0xc0, 0xdd, 0x47, 0x18, 0xc0, 0xdd, 0x5d, 0xd2, 0x4d, 0xd5, 0x00, 0x44, + 0x39, 0x07, 0xc0, 0xdd, 0x69, 0xd0, 0x5e, 0x02, 0x00, 0x32, 0xf9, 0xc8, + 0xbe, 0xf2, 0x00, 0x32, 0xc9, 0xce, 0x72, 0x2c, 0x00, 0x32, 0xb9, 0xcd, + 0x2c, 0xb2, 0x00, 0x30, 0xf9, 0x47, 0x34, 0x2f, 0x40, 0xdd, 0x75, 0x46, + 0x09, 0x97, 0xc0, 0xdd, 0x81, 0x44, 0x00, 0x67, 0xc0, 0xdd, 0xa5, 0xcb, + 0x90, 0x4f, 0x00, 0x30, 0x39, 0xc9, 0xb3, 0x71, 0x00, 0x30, 0x30, 0x48, + 0x19, 0x9b, 0xc0, 0xdd, 0xb1, 0x46, 0x02, 0x0f, 0x40, 0xdd, 0xc3, 0xd0, + 0x48, 0x12, 0x00, 0x2a, 0xf9, 0xc9, 0x2d, 0x85, 0x00, 0x2a, 0xd0, 0xc4, + 0x0a, 0x8b, 0x00, 0x2a, 0xe9, 0x4e, 0x0b, 0x18, 0x40, 0xde, 0x3c, 0xcf, + 0x0f, 0x0a, 0x00, 0x2a, 0xe1, 0xcc, 0x81, 0x39, 0x00, 0x2a, 0xd8, 0x4e, + 0x0b, 0x18, 0xc0, 0xde, 0xb5, 0xd1, 0x2b, 0xed, 0x0f, 0x4a, 0x40, 0xc4, + 0x6b, 0x52, 0x0f, 0x49, 0x11, 0x06, 0xc0, 0xdf, 0x35, 0xc4, 0x76, 0x31, + 0x0f, 0x49, 0x21, 0xc4, 0xe4, 0xb3, 0x0f, 0x49, 0x29, 0x04, 0xc0, 0xdf, + 0x41, 0x15, 0xc0, 0xdf, 0x4b, 0xc2, 0x00, 0x67, 0x0f, 0x49, 0x41, 0xc2, + 0x00, 0x39, 0x0f, 0x49, 0x51, 0x87, 0x0f, 0x49, 0x59, 0xc2, 0x00, 0x87, + 0x0f, 0x49, 0x61, 0x8b, 0x0f, 0x49, 0x69, 0x91, 0x0f, 0x49, 0x71, 0x1b, + 0xc0, 0xdf, 0x57, 0xc3, 0x7e, 0x89, 0x0f, 0x49, 0x89, 0x10, 0xc0, 0xdf, + 0x61, 0x0d, 0xc0, 0xdf, 0x73, 0x97, 0x0f, 0x49, 0xa9, 0xc4, 0xe1, 0x4b, + 0x0f, 0x49, 0xb1, 0xc3, 0x11, 0xee, 0x0f, 0x49, 0xb9, 0xc2, 0x00, 0xd0, + 0x0f, 0x49, 0xc1, 0xc4, 0xd8, 0x3a, 0x0f, 0x49, 0xc9, 0x09, 0xc0, 0xdf, + 0x85, 0xc2, 0x00, 0x16, 0x0f, 0x49, 0xe1, 0xc2, 0x02, 0x41, 0x0f, 0x49, + 0xf1, 0xc3, 0xa9, 0xfc, 0x0f, 0x4a, 0x08, 0xc8, 0x01, 0xbf, 0x0f, 0x4a, + 0x31, 0xd4, 0x3d, 0x2c, 0x0f, 0x4a, 0x48, 0xc4, 0x33, 0x5e, 0x0f, 0x4a, + 0x51, 0xd0, 0x56, 0xc9, 0x0f, 0x4a, 0x58, 0xc4, 0x15, 0xe7, 0x0f, 0x4a, + 0x81, 0xc3, 0x05, 0x14, 0x0f, 0x4a, 0x89, 0x16, 0xc0, 0xdf, 0x8f, 0x08, + 0xc0, 0xdf, 0x9b, 0x15, 0xc0, 0xdf, 0xa7, 0xc5, 0x06, 0xdb, 0x0f, 0x4a, + 0xc1, 0xc4, 0x26, 0x78, 0x0f, 0x4a, 0xc8, 0xd0, 0x0f, 0x09, 0x0f, 0x4a, + 0xf1, 0xcd, 0x2c, 0xb2, 0x0f, 0x4a, 0xf8, 0x47, 0xc5, 0x21, 0xc0, 0xdf, + 0xb3, 0xc4, 0xe4, 0x63, 0x0f, 0xba, 0x13, 0x00, 0xdf, 0xbf, 0xcb, 0x8c, + 0xd4, 0x0f, 0xb8, 0x79, 0xca, 0x9a, 0xfe, 0x0f, 0xb9, 0xf1, 0xc4, 0x1a, + 0xa8, 0x0f, 0xba, 0xc8, 0x14, 0xc0, 0xdf, 0xc3, 0xc7, 0xc8, 0xe0, 0x0f, + 0xb8, 0x99, 0x46, 0x4c, 0x4a, 0xc0, 0xdf, 0xd2, 0x03, 0x40, 0xdf, 0xde, + 0x42, 0x00, 0xfa, 0xc0, 0xdf, 0xf0, 0xc8, 0xbe, 0x7a, 0x0f, 0xbb, 0x80, + 0x11, 0xc0, 0xdf, 0xff, 0xd2, 0x4e, 0x1d, 0x0f, 0xb8, 0x71, 0xca, 0xa1, + 0x52, 0x0f, 0xba, 0xf9, 0x17, 0x40, 0xe0, 0x0e, 0xc5, 0xd7, 0x13, 0x0f, + 0xb9, 0xfb, 0x00, 0xe0, 0x1a, 0x42, 0x00, 0x74, 0xc0, 0xe0, 0x20, 0xc4, + 0xdf, 0x17, 0x0f, 0xba, 0x69, 0xc6, 0x7b, 0x50, 0x0f, 0xba, 0x88, 0x07, + 0xc0, 0xe0, 0x2c, 0xc8, 0xba, 0xfa, 0x0f, 0xb8, 0xc2, 0x00, 0xe0, 0x44, + 0x0b, 0xc0, 0xe0, 0x4a, 0xc8, 0xbb, 0x32, 0x0f, 0xb9, 0x40, 0x17, 0xc0, + 0xe0, 0x5c, 0x42, 0x00, 0x65, 0xc0, 0xe0, 0x68, 0xc5, 0xd4, 0x93, 0x0f, + 0xb8, 0xd9, 0xc5, 0xac, 0x22, 0x0f, 0xba, 0x39, 0xce, 0x6f, 0x62, 0x0f, + 0xba, 0x79, 0x16, 0xc0, 0xe0, 0x75, 0xc3, 0xc9, 0x9a, 0x0f, 0xba, 0xa0, + 0xcb, 0x97, 0xb3, 0x0f, 0xb9, 0x59, 0x43, 0x00, 0xe3, 0xc0, 0xe0, 0x84, + 0xc2, 0x01, 0x29, 0x0f, 0xb8, 0x09, 0x0e, 0xc0, 0xe0, 0x8e, 0xc6, 0xcd, + 0xd3, 0x0f, 0xb9, 0xd1, 0xca, 0x9a, 0xcc, 0x0f, 0xb9, 0xe9, 0xc4, 0x04, + 0x65, 0x0f, 0xba, 0xb9, 0xc6, 0xd2, 0x4d, 0x0f, 0xba, 0xd8, 0xc7, 0xc2, + 0xb9, 0x0f, 0xb9, 0x51, 0xc8, 0xba, 0xe2, 0x0f, 0xba, 0x98, 0xc3, 0x04, + 0xe4, 0x0f, 0xb8, 0xa9, 0xc3, 0x00, 0x2e, 0x0f, 0xbb, 0x78, 0xd0, 0x5d, + 0x22, 0x0f, 0xb8, 0x83, 0x00, 0xe0, 0xa3, 0xc8, 0xbe, 0xc2, 0x0f, 0xb9, + 0xc1, 0xc4, 0x97, 0x51, 0x0f, 0xbb, 0x88, 0xc3, 0x02, 0x11, 0x0f, 0xb8, + 0x21, 0x9a, 0x0f, 0xba, 0x50, 0xc9, 0xaf, 0xed, 0x0f, 0xb8, 0x01, 0xc7, + 0xc8, 0x62, 0x0f, 0xba, 0x08, 0xc3, 0x1a, 0x7c, 0x0f, 0xb8, 0xd1, 0xc2, + 0x01, 0xdf, 0x0f, 0xba, 0x48, 0xc4, 0x91, 0x3d, 0x0f, 0xb8, 0xe3, 0x00, + 0xe0, 0xa7, 0xcb, 0x91, 0x36, 0x0f, 0xb9, 0x08, 0x11, 0xc0, 0xe0, 0xad, + 0x44, 0x01, 0xcf, 0x40, 0xe0, 0xb9, 0xd7, 0x08, 0xf0, 0x01, 0x53, 0x78, + 0xd3, 0x43, 0xab, 0x0f, 0x9f, 0x39, 0xc5, 0x46, 0x98, 0x0f, 0xb4, 0xb8, + 0x1d, 0xc0, 0xe0, 0xc5, 0x1e, 0xc0, 0xe0, 0xed, 0x1f, 0xc0, 0xe1, 0x15, + 0x20, 0xc0, 0xe1, 0x3d, 0x21, 0xc0, 0xe1, 0x65, 0x22, 0x40, 0xe1, 0x8d, + 0xd3, 0x41, 0x97, 0x01, 0x3f, 0x91, 0x05, 0xc0, 0xe1, 0x9f, 0xd1, 0x05, + 0x75, 0x01, 0x0d, 0xd1, 0x16, 0xc0, 0xe1, 0xab, 0x48, 0x03, 0xc8, 0xc0, + 0xe1, 0xb7, 0xcb, 0x87, 0x8d, 0x01, 0x50, 0x88, 0x46, 0x00, 0x8b, 0x40, + 0xe1, 0xbd, 0xda, 0x19, 0xc8, 0x01, 0x37, 0x11, 0xc3, 0x92, 0x53, 0x01, + 0x5e, 0xc8, 0x8d, 0x00, 0x01, 0x53, 0x00, 0xe1, 0xc9, 0x8f, 0x01, 0x02, + 0x10, 0xc2, 0x00, 0xdb, 0x08, 0xba, 0x31, 0x83, 0x08, 0xb8, 0x70, 0xc2, + 0x00, 0xc1, 0x08, 0xba, 0x29, 0xc2, 0x19, 0x2c, 0x08, 0xb8, 0x81, 0x83, + 0x08, 0xb8, 0x19, 0xc2, 0x01, 0x30, 0x08, 0xb8, 0x10, 0x06, 0xc0, 0xe1, + 0xcf, 0xc2, 0x00, 0xd0, 0x08, 0xb8, 0xa1, 0x83, 0x08, 0xb8, 0x98, 0x16, + 0xc0, 0xe1, 0xd9, 0xc2, 0x00, 0xd0, 0x08, 0xb8, 0x61, 0x83, 0x08, 0xb8, + 0x20, 0x83, 0x08, 0xba, 0x01, 0xc2, 0x00, 0xd0, 0x08, 0xb8, 0x58, 0x49, + 0x0c, 0x8d, 0x40, 0xe1, 0xe3, 0xc2, 0x00, 0xd0, 0x08, 0xb8, 0xc9, 0x83, + 0x08, 0xb8, 0x50, 0xc2, 0x00, 0xd0, 0x08, 0xb8, 0xc1, 0x83, 0x08, 0xb8, + 0x40, 0xc2, 0x00, 0xd0, 0x08, 0xb8, 0xb9, 0x83, 0x08, 0xb8, 0xa8, 0xc2, + 0x00, 0xd0, 0x08, 0xb8, 0x39, 0x83, 0x08, 0xb8, 0x30, 0xc2, 0x00, 0xd0, + 0x08, 0xb8, 0x09, 0x83, 0x08, 0xb8, 0x00, 0xc5, 0xdd, 0x08, 0x08, 0xb9, + 0xf1, 0x15, 0xc0, 0xe1, 0xf5, 0xc6, 0xd0, 0xeb, 0x08, 0xb9, 0x58, 0xc4, + 0x18, 0x10, 0x08, 0xb9, 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0xb9, 0xb0, 0xc3, + 0x0d, 0x14, 0x08, 0xb9, 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0xb9, 0xa0, 0xc4, + 0x02, 0xde, 0x08, 0xb9, 0x99, 0xc2, 0x02, 0xa0, 0x08, 0xb9, 0x90, 0x8f, + 0x08, 0xb9, 0x51, 0x8b, 0x08, 0xb9, 0x49, 0x99, 0x08, 0xb9, 0x39, 0x83, + 0x08, 0xb9, 0x08, 0x97, 0x08, 0xb9, 0x28, 0x8b, 0x08, 0xb9, 0x18, 0xca, + 0x9f, 0x04, 0x08, 0xb8, 0xf9, 0x83, 0x08, 0xb8, 0xe8, 0xc2, 0x01, 0x9d, + 0x01, 0x1c, 0xab, 0x00, 0xe2, 0x01, 0x44, 0x48, 0xaa, 0x40, 0xe2, 0x05, + 0xc9, 0x52, 0x08, 0x01, 0x1b, 0xb0, 0xc9, 0x52, 0x08, 0x01, 0x1b, 0xc8, + 0xc3, 0x01, 0xbb, 0x01, 0x1b, 0x9b, 0x00, 0xe2, 0x11, 0xc5, 0xd8, 0xf3, + 0x01, 0x19, 0xb0, 0xc2, 0x01, 0x23, 0x01, 0x1b, 0xa1, 0xce, 0x6c, 0xde, + 0x01, 0x1a, 0x30, 0x00, 0xc0, 0xe2, 0x17, 0xca, 0x6c, 0xe2, 0x01, 0x1a, + 0x78, 0x43, 0x01, 0x47, 0xc0, 0xe2, 0x29, 0x42, 0x05, 0x03, 0xc0, 0xe2, + 0x33, 0xcf, 0x67, 0xdd, 0x01, 0x1a, 0xd0, 0xd1, 0x52, 0x00, 0x01, 0x1b, + 0x71, 0x16, 0xc0, 0xe2, 0x3d, 0xc8, 0x7d, 0xf2, 0x01, 0x19, 0xf9, 0xca, + 0x9a, 0x9a, 0x01, 0x19, 0xb8, 0xc8, 0xb5, 0xea, 0x01, 0x1b, 0x51, 0x46, + 0x02, 0xd2, 0x40, 0xe2, 0x49, 0xcb, 0x94, 0xf3, 0x01, 0x1b, 0x39, 0xca, + 0x6c, 0xe2, 0x01, 0x1a, 0x28, 0xc9, 0x20, 0xa8, 0x01, 0x1b, 0x21, 0xc8, + 0x52, 0x09, 0x01, 0x1a, 0xd8, 0x49, 0x07, 0x49, 0xc0, 0xe2, 0x67, 0xcf, + 0x6a, 0x53, 0x01, 0x12, 0x80, 0x0a, 0xc0, 0xe2, 0x73, 0x15, 0xc0, 0xe2, + 0x7d, 0xc2, 0x00, 0x5f, 0x08, 0x59, 0x61, 0x1b, 0xc0, 0xe2, 0x8b, 0xc2, + 0x00, 0x4e, 0x08, 0x59, 0x41, 0x10, 0xc0, 0xe2, 0x95, 0x06, 0xc0, 0xe2, + 0xa9, 0x16, 0xc0, 0xe2, 0xb3, 0xc2, 0x1c, 0x52, 0x08, 0x58, 0xc1, 0xc2, + 0x00, 0x89, 0x08, 0x58, 0xb9, 0x09, 0xc0, 0xe2, 0xc3, 0x1a, 0xc0, 0xe2, + 0xd3, 0xc2, 0x00, 0x3c, 0x08, 0x58, 0x81, 0x97, 0x08, 0x58, 0x73, 0x00, + 0xe2, 0xe3, 0x8b, 0x08, 0x58, 0x63, 0x00, 0xe2, 0xe7, 0x91, 0x08, 0x58, + 0x53, 0x00, 0xe2, 0xeb, 0x87, 0x08, 0x58, 0x43, 0x00, 0xe2, 0xef, 0x83, + 0x08, 0x58, 0x03, 0x00, 0xe2, 0xf3, 0xc2, 0x00, 0x67, 0x08, 0x58, 0xf1, + 0xc2, 0x14, 0xda, 0x08, 0x58, 0xf9, 0x04, 0xc0, 0xe3, 0x09, 0xc2, 0x01, + 0x19, 0x08, 0x59, 0x69, 0xc2, 0x00, 0x49, 0x08, 0x59, 0x71, 0x1c, 0x40, + 0xe3, 0x13, 0xc3, 0x05, 0x14, 0x08, 0x08, 0x3b, 0x00, 0xe3, 0x1d, 0x16, + 0xc0, 0xe3, 0x21, 0x08, 0xc0, 0xe3, 0x32, 0x15, 0xc0, 0xe3, 0x3a, 0xc5, + 0x06, 0xdb, 0x08, 0x08, 0x73, 0x00, 0xe3, 0x4c, 0xc4, 0x26, 0x78, 0x08, + 0x08, 0x7a, 0x00, 0xe3, 0x57, 0x46, 0x0f, 0x88, 0xc0, 0xe3, 0x64, 0x4e, + 0x72, 0x02, 0x40, 0xe3, 0x7a, 0xce, 0x71, 0x22, 0x08, 0x09, 0xf1, 0xcd, + 0x7d, 0xb9, 0x08, 0x09, 0xf8, 0x0e, 0xc0, 0xe3, 0x86, 0x46, 0x11, 0x39, + 0xc0, 0xe3, 0x92, 0x42, 0x00, 0x58, 0xc0, 0xe3, 0xcb, 0x49, 0x07, 0xbb, + 0xc0, 0xe3, 0xd7, 0x43, 0x11, 0x49, 0xc0, 0xe3, 0xef, 0x46, 0x00, 0x2c, + 0x40, 0xe4, 0x07, 0xc6, 0x0b, 0x09, 0x0f, 0xbc, 0x81, 0xc6, 0x02, 0xd1, + 0x0f, 0xbc, 0x30, 0xc6, 0x13, 0x52, 0x0f, 0xbd, 0x59, 0xd2, 0x4d, 0x57, + 0x0f, 0xbd, 0xb8, 0xd6, 0x08, 0x88, 0x01, 0x1f, 0x09, 0xcd, 0x00, 0x32, + 0x01, 0x1e, 0xf9, 0xcb, 0x1a, 0x50, 0x01, 0x1e, 0xe9, 0xce, 0x25, 0xad, + 0x01, 0x1d, 0xab, 0x00, 0xe4, 0x1f, 0x45, 0x01, 0xce, 0xc0, 0xe4, 0x25, + 0x46, 0x00, 0x2c, 0xc0, 0xe4, 0x3d, 0x45, 0x00, 0x49, 0xc0, 0xe4, 0x47, + 0xd7, 0x15, 0x64, 0x01, 0x49, 0xd8, 0x46, 0x00, 0x8b, 0x40, 0xe4, 0x51, + 0x00, 0xc0, 0xe4, 0x5d, 0xc3, 0x00, 0x74, 0x0f, 0x9d, 0x98, 0xc4, 0x01, + 0xc3, 0x0f, 0xa8, 0xb3, 0x00, 0xe4, 0x69, 0x95, 0x0f, 0xa6, 0xd0, 0x84, + 0x01, 0x88, 0x2b, 0x00, 0xe4, 0x6f, 0x92, 0x01, 0x88, 0x31, 0x8f, 0x01, + 0x88, 0x39, 0x88, 0x01, 0x88, 0x41, 0x86, 0x01, 0x88, 0x49, 0x96, 0x01, + 0x88, 0x51, 0x90, 0x01, 0x88, 0x5b, 0x00, 0xe4, 0x73, 0x8e, 0x01, 0x88, + 0x63, 0x00, 0xe4, 0x7e, 0x89, 0x01, 0x88, 0x6b, 0x00, 0xe4, 0x82, 0x8d, + 0x01, 0x88, 0x73, 0x00, 0xe4, 0x92, 0x8a, 0x01, 0x88, 0x79, 0x8c, 0x01, + 0x88, 0x83, 0x00, 0xe4, 0x96, 0x93, 0x01, 0x88, 0x89, 0x9a, 0x01, 0x88, + 0x91, 0x9c, 0x01, 0x88, 0xbb, 0x00, 0xe4, 0x9a, 0x85, 0x01, 0x88, 0xc3, + 0x00, 0xe4, 0xa6, 0x95, 0x01, 0x88, 0xcb, 0x00, 0xe4, 0xaa, 0x94, 0x01, + 0x88, 0xb1, 0x83, 0x01, 0x88, 0xd3, 0x00, 0xe4, 0xae, 0x91, 0x01, 0x88, + 0xdb, 0x00, 0xe4, 0xcb, 0x87, 0x01, 0x88, 0xe3, 0x00, 0xe4, 0xe5, 0x8b, + 0x01, 0x89, 0x3b, 0x00, 0xe4, 0xfc, 0x97, 0x01, 0x89, 0x43, 0x00, 0xe5, + 0x15, 0x98, 0x01, 0x89, 0x50, 0x92, 0x01, 0x8d, 0xa1, 0x96, 0x01, 0x8d, + 0xa9, 0x8d, 0x01, 0x8d, 0xb1, 0x8a, 0x01, 0x8d, 0xb9, 0x89, 0x01, 0x8d, + 0xd8, 0x9e, 0x0f, 0xd8, 0x03, 0x00, 0xe5, 0x1b, 0xa0, 0x0f, 0xd8, 0x1b, + 0x00, 0xe5, 0x3b, 0x9f, 0x0f, 0xd8, 0x0b, 0x00, 0xe5, 0x4d, 0xa2, 0x0f, + 0xd8, 0x7b, 0x00, 0xe5, 0x66, 0xa1, 0x0f, 0xd8, 0x3b, 0x00, 0xe5, 0x6a, + 0xa3, 0x0f, 0xd8, 0xf0, 0x00, 0xc0, 0xe5, 0x75, 0x02, 0x40, 0xe5, 0xbf, + 0xc4, 0xe3, 0x33, 0x0f, 0xa6, 0xc1, 0xc5, 0x1c, 0xae, 0x0f, 0xa4, 0xc8, + 0x4a, 0xa5, 0x3a, 0x40, 0xe5, 0xcb, 0xc8, 0xb5, 0x7a, 0x0f, 0xd3, 0x81, + 0xc8, 0xb8, 0x02, 0x0f, 0xcf, 0xb1, 0x11, 0x40, 0xe5, 0xe3, 0x42, 0x00, + 0xb0, 0xc0, 0xe5, 0xf2, 0x4f, 0x2a, 0x5c, 0xc0, 0xe5, 0xff, 0x46, 0xcd, + 0x25, 0xc0, 0xe6, 0x15, 0xc5, 0xd5, 0x56, 0x00, 0xda, 0xe1, 0x46, 0x09, + 0x97, 0xc0, 0xe6, 0x21, 0x47, 0x02, 0x0e, 0xc0, 0xe6, 0x45, 0xc9, 0xb3, + 0x3b, 0x00, 0xda, 0x21, 0x4b, 0x6f, 0xc7, 0xc0, 0xe6, 0xe9, 0x45, 0x00, + 0xba, 0x40, 0xe7, 0x1a, 0xcd, 0x7e, 0x6f, 0x0f, 0x9e, 0x00, 0xc9, 0x11, + 0xf6, 0x0b, 0x57, 0xa9, 0x4a, 0x51, 0x89, 0xc0, 0xe7, 0x38, 0x47, 0x02, + 0x0e, 0x40, 0xe7, 0x4a, 0xc6, 0x00, 0x91, 0x0f, 0xb5, 0xe1, 0xc5, 0xd4, + 0x66, 0x0f, 0xa3, 0xe1, 0xc6, 0x50, 0xe2, 0x0f, 0x9b, 0xe1, 0xc5, 0x55, + 0x91, 0x0f, 0xa1, 0x20, 0x12, 0xc0, 0xe7, 0xc2, 0x83, 0x05, 0x35, 0x01, + 0x0d, 0xc0, 0xe7, 0xd8, 0x97, 0x05, 0x35, 0x11, 0xc2, 0x02, 0xe0, 0x05, + 0x35, 0x21, 0x14, 0xc0, 0xe7, 0xfb, 0x16, 0xc0, 0xe8, 0x0d, 0x91, 0x05, + 0x35, 0x39, 0x10, 0xc0, 0xe8, 0x19, 0x8b, 0x05, 0x35, 0x49, 0x0e, 0xc0, + 0xe8, 0x46, 0x8f, 0x05, 0x35, 0x9b, 0x00, 0xe8, 0x5e, 0x15, 0xc0, 0xe8, + 0x76, 0x1b, 0xc0, 0xe8, 0x90, 0x19, 0xc0, 0xe8, 0xa0, 0x08, 0x40, 0xe8, + 0xaa, 0x0f, 0xc0, 0xe8, 0xc0, 0xc3, 0x0d, 0xe5, 0x05, 0x37, 0xa0, 0x47, + 0x01, 0xeb, 0xc0, 0xe8, 0xcc, 0x00, 0xc0, 0xe8, 0xd2, 0x15, 0x40, 0xe8, + 0xde, 0x15, 0xc0, 0xe8, 0xea, 0x43, 0x0c, 0xe0, 0xc0, 0xe8, 0xf6, 0x4f, + 0x30, 0x90, 0xc0, 0xe9, 0x02, 0x4b, 0x6f, 0xc7, 0xc0, 0xe9, 0x0c, 0x47, + 0x02, 0x0e, 0x40, 0xe9, 0x2e, 0xc3, 0x82, 0x4c, 0x0f, 0xb6, 0x08, 0xc5, + 0xb5, 0x75, 0x0f, 0xa6, 0x51, 0xc7, 0xc9, 0x96, 0x0f, 0xcf, 0xe0, 0xcf, + 0x67, 0x38, 0x01, 0x33, 0x61, 0xcc, 0x82, 0xdd, 0x01, 0x33, 0x59, 0xd8, + 0x23, 0x1b, 0x0f, 0x9c, 0xe9, 0xd7, 0x29, 0xca, 0x0f, 0x9c, 0xe0, 0xc5, + 0x11, 0x55, 0x0f, 0xa1, 0xd9, 0xca, 0xa5, 0x76, 0x0f, 0xce, 0xa0, 0xcc, + 0x20, 0x76, 0x01, 0x1f, 0x18, 0x47, 0x02, 0x0e, 0xc0, 0xe9, 0x91, 0x15, + 0xc0, 0xe9, 0xf4, 0x4b, 0x6f, 0xc7, 0xc0, 0xea, 0x00, 0x03, 0xc0, 0xea, + 0x20, 0x46, 0x09, 0x97, 0xc0, 0xea, 0x32, 0x46, 0x76, 0x52, 0xc0, 0xea, + 0x56, 0x49, 0x3a, 0xd4, 0xc0, 0xea, 0x62, 0xc6, 0xd2, 0xcb, 0x00, 0x4f, + 0xd1, 0xca, 0x9f, 0xae, 0x00, 0x4f, 0xd8, 0xc5, 0xd9, 0xb6, 0x0f, 0x9b, + 0x89, 0x49, 0x03, 0x37, 0x40, 0xea, 0x6e, 0xc6, 0x00, 0x91, 0x01, 0x1b, + 0xf1, 0xd8, 0x23, 0xc3, 0x0f, 0xa8, 0xa9, 0xc6, 0xcd, 0x19, 0x0f, 0xd6, + 0x88, 0xcf, 0x62, 0x6a, 0x0f, 0xa3, 0x29, 0xce, 0x2f, 0xbc, 0x0f, 0xa3, + 0x20, 0xc9, 0x18, 0x66, 0x01, 0x10, 0xc8, 0xd1, 0x51, 0xab, 0x0f, 0xab, + 0x60, 0xce, 0x6f, 0x0e, 0x00, 0xd0, 0xf9, 0xc7, 0xc9, 0xd5, 0x00, 0xd0, + 0xf1, 0x4b, 0x6f, 0xc7, 0xc0, 0xea, 0x74, 0x47, 0x02, 0x0e, 0x40, 0xea, + 0x8a, 0x97, 0x00, 0xba, 0x99, 0x8b, 0x00, 0xba, 0x90, 0xc2, 0x00, 0xd0, + 0x00, 0xba, 0x89, 0xc2, 0x0d, 0xf6, 0x00, 0xba, 0x81, 0xc2, 0x01, 0x4a, + 0x00, 0xba, 0x79, 0xc2, 0x00, 0xdb, 0x00, 0xba, 0x71, 0xc2, 0x00, 0x39, + 0x00, 0xba, 0x69, 0xc2, 0x19, 0x2c, 0x00, 0xba, 0x61, 0xc2, 0x01, 0xc3, + 0x00, 0xba, 0x59, 0xc2, 0x01, 0x5d, 0x00, 0xba, 0x51, 0xc2, 0x00, 0xb0, + 0x00, 0xba, 0x49, 0x10, 0xc0, 0xea, 0xea, 0xc2, 0x0e, 0x9a, 0x00, 0xba, + 0x39, 0xc2, 0x01, 0x6f, 0x00, 0xba, 0x31, 0xc2, 0x01, 0x30, 0x00, 0xba, + 0x21, 0xc2, 0x02, 0x2b, 0x00, 0xba, 0x19, 0x97, 0x00, 0xba, 0x11, 0x8b, + 0x00, 0xba, 0x09, 0x83, 0x00, 0xba, 0x00, 0xcb, 0x8c, 0xa8, 0x0f, 0xa3, + 0x81, 0xcb, 0x91, 0xdb, 0x0f, 0x98, 0x48, 0xc4, 0xe3, 0x0f, 0x0f, 0xa5, + 0xe1, 0x95, 0x0f, 0xd3, 0x90, 0x4c, 0x83, 0x49, 0xc0, 0xea, 0xf4, 0x90, + 0x0f, 0xcf, 0x00, 0x47, 0x34, 0x2f, 0xc0, 0xeb, 0x00, 0x47, 0x02, 0x0e, + 0xc0, 0xeb, 0x2d, 0x18, 0xc0, 0xeb, 0x95, 0x45, 0x00, 0xba, 0xc0, 0xeb, + 0xa1, 0x06, 0xc0, 0xeb, 0xc5, 0x4c, 0x11, 0xe2, 0x40, 0xeb, 0xd7, 0xdb, + 0x15, 0x96, 0x01, 0x1c, 0x59, 0xc5, 0x1c, 0xae, 0x0f, 0xa4, 0xa1, 0xc3, + 0x01, 0x5d, 0x00, 0x05, 0x30, 0x86, 0x0f, 0x9a, 0xf1, 0xd0, 0x5b, 0x62, + 0x00, 0x04, 0x11, 0xca, 0xa7, 0x10, 0x0f, 0xc9, 0x88, 0x42, 0x00, 0xbf, + 0xc0, 0xeb, 0xe7, 0x46, 0xd0, 0xd9, 0xc0, 0xeb, 0xf3, 0xcb, 0x97, 0x50, + 0x0e, 0x82, 0x28, 0xc5, 0x87, 0x64, 0x0e, 0x81, 0x23, 0x00, 0xeb, 0xff, + 0x46, 0xd1, 0xa5, 0xc0, 0xec, 0x03, 0x11, 0xc0, 0xec, 0x10, 0x14, 0xc0, + 0xec, 0x25, 0x42, 0x00, 0xfe, 0xc0, 0xec, 0x31, 0xc6, 0xc8, 0x94, 0x0e, + 0x83, 0x08, 0x14, 0xc0, 0xec, 0x3d, 0x12, 0xc0, 0xec, 0x49, 0x45, 0xd8, + 0x4e, 0xc0, 0xec, 0x59, 0x10, 0x40, 0xec, 0x71, 0x16, 0xc0, 0xec, 0x7d, + 0x48, 0xbc, 0x8a, 0xc0, 0xec, 0x92, 0xc5, 0xd9, 0x02, 0x0e, 0x81, 0x4b, + 0x00, 0xec, 0xa4, 0x1b, 0xc0, 0xec, 0xaa, 0xc7, 0xc0, 0x9e, 0x0e, 0x80, + 0xe8, 0x0b, 0xc0, 0xec, 0xb7, 0xc2, 0x42, 0xcd, 0x0e, 0x81, 0x79, 0xc5, + 0xd7, 0x27, 0x0e, 0x80, 0x08, 0x42, 0x14, 0xda, 0xc0, 0xec, 0xd4, 0x12, + 0x40, 0xec, 0xe0, 0x46, 0x3d, 0xd7, 0xc0, 0xec, 0xea, 0xda, 0x19, 0xfc, + 0x0e, 0x86, 0x29, 0x49, 0xb5, 0x21, 0x40, 0xed, 0x15, 0x44, 0xdf, 0x57, + 0xc0, 0xed, 0x27, 0x47, 0xc8, 0x2a, 0xc0, 0xed, 0x39, 0x44, 0x56, 0x2e, + 0x40, 0xed, 0x45, 0x42, 0x02, 0x2f, 0xc0, 0xed, 0x4f, 0x15, 0xc0, 0xed, + 0x59, 0xc6, 0xcd, 0xf1, 0x0e, 0x81, 0xf8, 0x10, 0xc0, 0xed, 0x65, 0x46, + 0xd1, 0x69, 0xc0, 0xed, 0x71, 0xc7, 0xc7, 0x5f, 0x0e, 0x83, 0x41, 0xc9, + 0xac, 0x9f, 0x0e, 0x83, 0x21, 0xc6, 0xd0, 0x9d, 0x0e, 0x82, 0xa9, 0xce, + 0x6d, 0x08, 0x0e, 0x80, 0x70, 0x48, 0xbd, 0x2a, 0xc0, 0xed, 0x7d, 0xca, + 0x9e, 0x32, 0x0e, 0x82, 0xb8, 0x14, 0xc0, 0xed, 0x9d, 0x07, 0xc0, 0xed, + 0xa7, 0x0a, 0xc0, 0xed, 0xb9, 0xc6, 0xd1, 0x51, 0x0e, 0x81, 0x38, 0x07, + 0xc0, 0xed, 0xc3, 0xc6, 0xc4, 0xab, 0x0e, 0x82, 0xe8, 0x49, 0xab, 0x64, + 0xc0, 0xed, 0xcf, 0xc5, 0xda, 0x92, 0x0e, 0x82, 0xd9, 0x44, 0xdf, 0x27, + 0xc0, 0xed, 0xdb, 0x46, 0xce, 0x7b, 0x40, 0xed, 0xe5, 0x42, 0x00, 0xba, + 0xc0, 0xed, 0xf1, 0x42, 0x00, 0xb1, 0xc0, 0xed, 0xfb, 0x46, 0xce, 0xf3, + 0xc0, 0xee, 0x07, 0x07, 0x40, 0xee, 0x13, 0x44, 0xe4, 0xaf, 0xc0, 0xee, + 0x28, 0xc3, 0x4e, 0x10, 0x0e, 0x80, 0xc8, 0xc6, 0xcd, 0x1f, 0x0e, 0x81, + 0xe1, 0xc4, 0xc8, 0x2c, 0x0e, 0x81, 0x28, 0xc2, 0x0d, 0x10, 0x08, 0xe3, + 0x58, 0x9b, 0x08, 0xe3, 0x50, 0xc4, 0x18, 0x10, 0x08, 0xe3, 0x03, 0x00, + 0xee, 0x32, 0xc2, 0x22, 0xcc, 0x08, 0xe2, 0xfa, 0x00, 0xee, 0x38, 0x0b, + 0xc0, 0xee, 0x3e, 0x11, 0x40, 0xee, 0x4a, 0x0a, 0xc0, 0xee, 0x56, 0x19, + 0xc0, 0xee, 0x62, 0xc2, 0x00, 0xc4, 0x08, 0xe3, 0x18, 0xc4, 0x26, 0x78, + 0x08, 0xe2, 0xc9, 0xc5, 0x06, 0xdb, 0x08, 0xe2, 0xc1, 0x15, 0xc0, 0xee, + 0x6c, 0x08, 0xc0, 0xee, 0x78, 0x16, 0xc0, 0xee, 0x84, 0xc3, 0x05, 0x14, + 0x08, 0xe2, 0x89, 0xc4, 0x15, 0xe7, 0x08, 0xe2, 0x80, 0xc7, 0x7a, 0x7f, + 0x08, 0xe2, 0x01, 0xc7, 0x14, 0x39, 0x08, 0xe1, 0xe8, 0xc4, 0x1e, 0x97, + 0x08, 0xe1, 0xf9, 0xc5, 0x40, 0xe7, 0x08, 0xe1, 0xf0, 0x97, 0x08, 0xe1, + 0xd9, 0x8b, 0x08, 0xe1, 0xc9, 0x83, 0x08, 0xe1, 0x78, 0x8e, 0x08, 0xe1, + 0xb1, 0x94, 0x08, 0xe1, 0xa2, 0x00, 0xee, 0x90, 0x97, 0x08, 0xe1, 0x98, + 0x8b, 0x08, 0xe1, 0x88, 0x83, 0x08, 0xe1, 0x69, 0xc2, 0x0d, 0xf6, 0x08, + 0xe1, 0x61, 0xc2, 0x00, 0xd0, 0x08, 0xe1, 0x58, 0x83, 0x08, 0xe1, 0x51, + 0x47, 0xb2, 0x2e, 0x40, 0xee, 0x94, 0xc2, 0x00, 0xd0, 0x08, 0xe1, 0x29, + 0x83, 0x08, 0xe1, 0x20, 0xc2, 0x00, 0xd0, 0x08, 0xe1, 0x19, 0x83, 0x08, + 0xe1, 0x10, 0x83, 0x08, 0xe1, 0x09, 0xc2, 0x00, 0xc1, 0x08, 0xe0, 0xe1, + 0xc2, 0x19, 0x2c, 0x08, 0xe0, 0xb9, 0xc2, 0x01, 0x30, 0x08, 0xe0, 0x90, + 0xc2, 0x00, 0xd0, 0x08, 0xe1, 0x01, 0x83, 0x08, 0xe0, 0xf9, 0x06, 0x40, + 0xee, 0x9f, 0xc2, 0x00, 0xd0, 0x08, 0xe0, 0xf1, 0x83, 0x08, 0xe0, 0xe9, + 0x16, 0x40, 0xee, 0xa9, 0xc2, 0x00, 0xd0, 0x08, 0xe0, 0xb1, 0x83, 0x08, + 0xe0, 0xa8, 0xc2, 0x00, 0xd0, 0x08, 0xe0, 0xa1, 0x83, 0x08, 0xe0, 0x98, + 0xc2, 0x00, 0xd0, 0x08, 0xe0, 0x89, 0x83, 0x08, 0xe0, 0x80, 0xc2, 0x00, + 0xd0, 0x08, 0xe0, 0x79, 0x83, 0x08, 0xe0, 0x70, 0x97, 0x08, 0xe0, 0x69, + 0x8b, 0x08, 0xe0, 0x59, 0x83, 0x08, 0xe0, 0x08, 0x97, 0x08, 0xe0, 0x28, + 0x8b, 0x08, 0xe0, 0x18, 0x45, 0x00, 0x49, 0xc0, 0xee, 0xb3, 0x46, 0x00, + 0x2c, 0xc0, 0xee, 0xd9, 0x16, 0xc0, 0xef, 0x01, 0xce, 0x6b, 0x9c, 0x01, + 0x38, 0x19, 0x45, 0x01, 0xce, 0xc0, 0xef, 0x0d, 0xd3, 0x3f, 0xe2, 0x01, + 0x2c, 0x39, 0xd2, 0x4a, 0x75, 0x01, 0x2c, 0x29, 0x44, 0x05, 0x14, 0x40, + 0xef, 0x25, 0x04, 0xc0, 0xef, 0x31, 0xc8, 0x0a, 0xff, 0x01, 0x02, 0x71, + 0xc4, 0x02, 0x6d, 0x00, 0x02, 0xf9, 0xc6, 0x4a, 0x9f, 0x01, 0x72, 0x3b, + 0x00, 0xef, 0x3d, 0xdb, 0x18, 0x1e, 0x01, 0x80, 0xf8, 0x46, 0x01, 0x4a, + 0xc0, 0xef, 0x43, 0xc5, 0x32, 0xbb, 0x01, 0x3e, 0xe8, 0x46, 0x01, 0x4a, + 0xc0, 0xef, 0x5b, 0x00, 0x40, 0xef, 0x73, 0xc7, 0x30, 0xf2, 0x01, 0x3e, + 0x61, 0x47, 0xc3, 0x14, 0xc0, 0xef, 0x7f, 0xc3, 0x17, 0x99, 0x0f, 0xd4, + 0xc0, 0x00, 0x40, 0xef, 0x85, 0x46, 0x00, 0x8b, 0x40, 0xef, 0x91, 0xc4, + 0x15, 0xe7, 0x00, 0x00, 0x79, 0xc3, 0x05, 0x14, 0x00, 0x00, 0x70, 0x03, + 0xc0, 0xef, 0xa9, 0x42, 0x00, 0xd0, 0xc0, 0xef, 0xb1, 0x14, 0xc0, 0xef, + 0xbd, 0xc8, 0x6e, 0xdc, 0x01, 0x3e, 0xe1, 0x11, 0xc0, 0xef, 0xc9, 0x15, + 0xc0, 0xef, 0xd5, 0x05, 0xc0, 0xef, 0xf8, 0x16, 0xc0, 0xf0, 0x13, 0x08, + 0xc0, 0xf0, 0x27, 0x4a, 0x07, 0xbb, 0xc0, 0xf0, 0x31, 0xcb, 0x1a, 0x50, + 0x00, 0x01, 0x43, 0x00, 0xf0, 0x3d, 0xe0, 0x05, 0xa7, 0x01, 0x16, 0x49, + 0x42, 0x00, 0x58, 0xc0, 0xf0, 0x41, 0x19, 0xc0, 0xf0, 0x4d, 0x04, 0xc0, + 0xf0, 0x5f, 0x0e, 0x40, 0xf0, 0x6b, 0x19, 0xc0, 0xf0, 0x77, 0x16, 0xc0, + 0xf0, 0x86, 0xd0, 0x58, 0x62, 0x0f, 0xc1, 0xe1, 0xc5, 0x01, 0xa2, 0x01, + 0x0c, 0x83, 0x00, 0xf0, 0x98, 0x14, 0xc0, 0xf0, 0xa2, 0xd1, 0x55, 0x30, + 0x01, 0x0f, 0xe9, 0x06, 0xc0, 0xf0, 0xae, 0x15, 0xc0, 0xf0, 0xba, 0x0a, + 0xc0, 0xf0, 0xc6, 0xcd, 0x7c, 0xa8, 0x01, 0x0e, 0x39, 0x04, 0xc0, 0xf0, + 0xd0, 0xcf, 0x61, 0x4d, 0x01, 0x5a, 0x29, 0x08, 0xc0, 0xf0, 0xe2, 0xd7, + 0x26, 0xbc, 0x0f, 0xc5, 0x20, 0x49, 0x01, 0xaa, 0xc0, 0xf0, 0xee, 0x15, + 0xc0, 0xf1, 0x06, 0xdb, 0x16, 0x1d, 0x01, 0x37, 0x31, 0x49, 0x3c, 0xe1, + 0xc0, 0xf1, 0x12, 0x47, 0x55, 0x85, 0x40, 0xf1, 0x2a, 0xca, 0x37, 0x4e, + 0x01, 0x17, 0x31, 0xc5, 0x07, 0x62, 0x01, 0x13, 0x40, 0xc3, 0x02, 0xa3, + 0x01, 0x16, 0xb1, 0xcd, 0x78, 0x30, 0x01, 0x53, 0xc9, 0xd3, 0x43, 0x39, + 0x01, 0x53, 0xd8, 0x42, 0x00, 0x2a, 0xc0, 0xf1, 0x3f, 0xcc, 0x88, 0x7d, + 0x01, 0x13, 0x30, 0x45, 0x00, 0xd5, 0xc0, 0xf1, 0x5a, 0x43, 0x02, 0x9c, + 0x40, 0xf1, 0x70, 0xd4, 0x00, 0xd3, 0x01, 0x55, 0x40, 0x06, 0xc0, 0xf1, + 0x7c, 0x16, 0xc0, 0xf1, 0x8c, 0x83, 0x00, 0xe1, 0x19, 0xc2, 0x01, 0x4a, + 0x00, 0xe1, 0x11, 0x15, 0xc0, 0xf1, 0x9e, 0xc2, 0x02, 0x41, 0x00, 0xe0, + 0xf9, 0x0a, 0xc0, 0xf1, 0xa8, 0xc2, 0x00, 0xdb, 0x00, 0xe0, 0xe1, 0xc2, + 0x00, 0x39, 0x00, 0xe0, 0xd9, 0xc2, 0x19, 0x2c, 0x00, 0xe0, 0xd1, 0x0f, + 0xc0, 0xf1, 0xb2, 0x04, 0xc0, 0xf1, 0xbc, 0x08, 0xc0, 0xf1, 0xc6, 0x12, + 0xc0, 0xf1, 0xd0, 0x10, 0xc0, 0xf1, 0xe0, 0xc2, 0x25, 0x3b, 0x00, 0xe0, + 0x41, 0x05, 0xc0, 0xf1, 0xf0, 0x09, 0xc0, 0xf1, 0xfa, 0x0d, 0x40, 0xf2, + 0x04, 0xc4, 0x26, 0x78, 0x00, 0xe2, 0x49, 0xc5, 0x06, 0xdb, 0x00, 0xe2, + 0x41, 0x15, 0xc0, 0xf2, 0x14, 0x08, 0xc0, 0xf2, 0x20, 0x16, 0xc0, 0xf2, + 0x2c, 0xc3, 0x05, 0x14, 0x00, 0xe2, 0x09, 0xc4, 0x15, 0xe7, 0x00, 0xe2, + 0x00, 0x16, 0xc0, 0xf2, 0x38, 0xc6, 0xc0, 0x98, 0x00, 0xe1, 0xe9, 0xd2, + 0x4e, 0x0b, 0x00, 0xe1, 0xe0, 0x44, 0x00, 0xbb, 0xc0, 0xf2, 0x47, 0x50, + 0x5c, 0xf2, 0x40, 0xf2, 0x53, 0x8d, 0x00, 0xe1, 0x6b, 0x00, 0xf2, 0x5f, + 0x90, 0x00, 0xe1, 0x83, 0x00, 0xf2, 0x65, 0x96, 0x00, 0xe1, 0x99, 0x94, + 0x00, 0xe1, 0x91, 0x92, 0x00, 0xe1, 0x89, 0x8e, 0x00, 0xe1, 0x79, 0x8f, + 0x00, 0xe1, 0x70, 0x87, 0x00, 0xe1, 0x61, 0x97, 0x00, 0xe1, 0x53, 0x00, + 0xf2, 0x6b, 0x91, 0x00, 0xe1, 0x43, 0x00, 0xf2, 0x6f, 0x8b, 0x00, 0xe1, + 0x39, 0xc2, 0x04, 0xc6, 0x00, 0xe1, 0x30, 0x00, 0xc0, 0xf2, 0x73, 0xc4, + 0x03, 0x0e, 0x01, 0x30, 0x3a, 0x00, 0xf2, 0xa7, 0x1b, 0xc0, 0xf2, 0xb0, + 0xc2, 0x01, 0x5d, 0x05, 0x26, 0x81, 0x12, 0xc0, 0xf2, 0xba, 0x06, 0xc0, + 0xf2, 0xc4, 0x16, 0xc0, 0xf2, 0xce, 0x09, 0xc0, 0xf2, 0xe2, 0x0d, 0xc0, + 0xf2, 0xec, 0xc2, 0x25, 0x3b, 0x05, 0x26, 0xc9, 0x05, 0xc0, 0xf2, 0xf6, + 0xc2, 0x01, 0xc3, 0x05, 0x26, 0xf9, 0x10, 0xc0, 0xf3, 0x00, 0xc2, 0x00, + 0xdb, 0x05, 0x27, 0x09, 0x15, 0xc0, 0xf3, 0x0a, 0x1c, 0xc0, 0xf3, 0x14, + 0x0a, 0xc0, 0xf3, 0x1e, 0xc2, 0x8d, 0x8f, 0x05, 0x27, 0x39, 0xc2, 0x00, + 0x87, 0x05, 0x27, 0x49, 0xc2, 0x01, 0x4a, 0x05, 0x27, 0x51, 0x83, 0x05, + 0x27, 0x73, 0x00, 0xf3, 0x28, 0x87, 0x05, 0x27, 0x83, 0x00, 0xf3, 0x2c, + 0x8b, 0x05, 0x27, 0x91, 0x91, 0x05, 0x27, 0x9b, 0x00, 0xf3, 0x30, 0x97, + 0x05, 0x27, 0xa2, 0x00, 0xf3, 0x34, 0xc5, 0x0a, 0x8a, 0x05, 0x27, 0xf1, + 0xc9, 0x11, 0xf6, 0x05, 0x27, 0xf8, 0x00, 0xc0, 0xf3, 0x3c, 0x43, 0x02, + 0xe8, 0x40, 0xf3, 0x57, 0xcd, 0x7b, 0xd8, 0x0f, 0xac, 0x39, 0xc7, 0x00, + 0x90, 0x0f, 0xa8, 0xb8, 0x46, 0x09, 0x97, 0xc0, 0xf3, 0x63, 0xcd, 0x2c, + 0xb2, 0x00, 0xca, 0x29, 0xd0, 0x0f, 0x09, 0x00, 0xca, 0x21, 0x15, 0xc0, + 0xf3, 0x87, 0x45, 0x34, 0x6f, 0xc0, 0xf3, 0x99, 0x47, 0x02, 0x0e, 0x40, + 0xf3, 0xa5, 0x85, 0x08, 0x49, 0xc9, 0x90, 0x08, 0x49, 0x5b, 0x00, 0xf3, + 0xf4, 0x8e, 0x08, 0x49, 0x4b, 0x00, 0xf3, 0xf8, 0x87, 0x08, 0x49, 0x23, + 0x00, 0xf3, 0xfc, 0x83, 0x08, 0x49, 0x03, 0x00, 0xf4, 0x00, 0x96, 0x08, + 0x49, 0x7b, 0x00, 0xf4, 0x04, 0x95, 0x08, 0x49, 0x9b, 0x00, 0xf4, 0x08, + 0x93, 0x08, 0x49, 0x91, 0x88, 0x08, 0x49, 0x89, 0x97, 0x08, 0x49, 0x81, + 0x94, 0x08, 0x49, 0x69, 0x91, 0x08, 0x49, 0x61, 0x8f, 0x08, 0x49, 0x51, + 0x8d, 0x08, 0x49, 0x41, 0x9b, 0x08, 0x49, 0x39, 0x8b, 0x08, 0x49, 0x31, + 0x98, 0x08, 0x49, 0x29, 0x86, 0x08, 0x49, 0x19, 0x89, 0x08, 0x49, 0x11, + 0x84, 0x08, 0x49, 0x08, 0x90, 0x08, 0x14, 0xc8, 0x90, 0x08, 0x14, 0xd0, + 0x8a, 0x08, 0x14, 0x18, 0x8a, 0x08, 0x14, 0x49, 0x96, 0x08, 0x14, 0xc0, + 0x8d, 0x08, 0x14, 0xa0, 0x8f, 0x08, 0x14, 0x80, 0x90, 0x08, 0x14, 0x88, + 0x00, 0xc0, 0xf4, 0x0c, 0xc6, 0xc1, 0xfd, 0x01, 0x55, 0x5a, 0x00, 0xf4, + 0x48, 0x45, 0x03, 0x14, 0xc0, 0xf4, 0x4e, 0x56, 0x2c, 0xde, 0x40, 0xf4, + 0x58, 0x15, 0xc0, 0xf4, 0x9f, 0xd5, 0x32, 0xd5, 0x00, 0x14, 0xb3, 0x00, + 0xf4, 0xb4, 0x42, 0x01, 0x19, 0xc0, 0xf4, 0xba, 0x03, 0xc0, 0xf4, 0xc9, + 0xd8, 0x21, 0x0b, 0x00, 0xe9, 0x21, 0xcc, 0x23, 0x33, 0x00, 0x14, 0xa3, + 0x00, 0xf4, 0xd5, 0xdb, 0x17, 0xb2, 0x00, 0x14, 0xa9, 0x42, 0x01, 0x2d, + 0xc0, 0xf4, 0xdb, 0xc2, 0x1d, 0xc1, 0x00, 0x0d, 0x31, 0xcf, 0x65, 0xfd, + 0x00, 0x0d, 0xd9, 0xc4, 0x95, 0x50, 0x00, 0x0d, 0xf9, 0xcc, 0x83, 0xe5, + 0x00, 0x0e, 0x01, 0xcd, 0x79, 0x0d, 0x00, 0x0e, 0x08, 0xc4, 0x0d, 0x21, + 0x01, 0x38, 0xe9, 0x48, 0x0b, 0x18, 0x40, 0xf4, 0xe7, 0xca, 0xa6, 0xe8, + 0x05, 0x3f, 0xb9, 0x49, 0x11, 0x74, 0xc0, 0xf4, 0xf3, 0x0b, 0xc0, 0xf4, + 0xfb, 0xc9, 0xa8, 0x9d, 0x05, 0x3f, 0xf8, 0xc9, 0xb2, 0xa2, 0x0f, 0x98, + 0xe1, 0xc6, 0x00, 0x91, 0x0f, 0x98, 0xb8, 0x0d, 0xc0, 0xf5, 0x07, 0x12, + 0xc0, 0xf5, 0x0f, 0x10, 0xc0, 0xf5, 0x1f, 0xc2, 0x00, 0x99, 0x00, 0x74, + 0x41, 0x15, 0xc0, 0xf5, 0x2f, 0xc2, 0x00, 0x58, 0x00, 0x74, 0xa1, 0x16, + 0xc0, 0xf5, 0x3b, 0xc2, 0x00, 0x6b, 0x00, 0x74, 0xd1, 0x43, 0xc9, 0xe0, + 0xc0, 0xf5, 0x45, 0xc2, 0x00, 0xa2, 0x00, 0x75, 0x09, 0xc2, 0x42, 0xcd, + 0x00, 0x75, 0x11, 0xc2, 0x00, 0x79, 0x00, 0x75, 0x19, 0xc2, 0x01, 0xc8, + 0x00, 0x75, 0x2b, 0x00, 0xf5, 0x55, 0xc2, 0x02, 0xa0, 0x00, 0x75, 0x39, + 0x43, 0x60, 0xe8, 0xc0, 0xf5, 0x5b, 0x91, 0x00, 0x75, 0x68, 0x83, 0x00, + 0x75, 0x83, 0x00, 0xf5, 0x67, 0x45, 0xdb, 0x96, 0xc0, 0xf5, 0x77, 0x8b, + 0x00, 0x75, 0xa3, 0x00, 0xf5, 0x83, 0x9b, 0x00, 0x75, 0xb3, 0x00, 0xf5, + 0x87, 0x97, 0x00, 0x75, 0xc3, 0x00, 0xf5, 0x8b, 0x87, 0x00, 0x76, 0x03, + 0x00, 0xf5, 0x8f, 0x91, 0x00, 0x76, 0x10, 0xcf, 0x67, 0xfb, 0x00, 0x75, + 0xd1, 0x4e, 0x6f, 0xc4, 0x40, 0xf5, 0x93, 0xc2, 0x13, 0x4c, 0x00, 0x76, + 0x41, 0x16, 0xc0, 0xf5, 0x9f, 0xc6, 0xcd, 0x31, 0x00, 0x76, 0x58, 0xc4, + 0x15, 0xe7, 0x00, 0x76, 0x81, 0xc3, 0x05, 0x14, 0x00, 0x76, 0x89, 0x16, + 0xc0, 0xf5, 0xa9, 0x08, 0xc0, 0xf5, 0xb5, 0x15, 0xc0, 0xf5, 0xc1, 0xc5, + 0x06, 0xdb, 0x00, 0x76, 0xc1, 0xc4, 0x26, 0x78, 0x00, 0x76, 0xc8, 0xc2, + 0x00, 0x10, 0x00, 0x76, 0xe1, 0xc2, 0x00, 0xa2, 0x00, 0x76, 0xe8, 0x16, + 0xc0, 0xf5, 0xcd, 0x4f, 0x60, 0x6c, 0xc0, 0xf5, 0xd9, 0x4f, 0x01, 0xf3, + 0xc0, 0xf5, 0xe5, 0xda, 0x1a, 0x7e, 0x01, 0x3a, 0x81, 0xc6, 0xcd, 0x8b, + 0x01, 0x38, 0x81, 0xd5, 0x37, 0x6d, 0x01, 0x2e, 0xe9, 0x43, 0x05, 0xb2, + 0x40, 0xf5, 0xf1, 0x16, 0xc0, 0xf5, 0xf7, 0x4f, 0x60, 0x6c, 0xc0, 0xf6, + 0x03, 0xcf, 0x68, 0x37, 0x01, 0x3e, 0xa1, 0xd5, 0x37, 0x6d, 0x01, 0x2e, + 0xe1, 0x44, 0x20, 0xe8, 0x40, 0xf6, 0x0f, 0x0e, 0xc0, 0xf6, 0x15, 0x4f, + 0x2c, 0x4a, 0x40, 0xf6, 0x21, 0x48, 0x01, 0xd3, 0xc0, 0xf6, 0x27, 0xc5, + 0x06, 0xe2, 0x01, 0x2c, 0x03, 0x00, 0xf6, 0x31, 0xc6, 0x02, 0xd1, 0x01, + 0x2f, 0x01, 0xcc, 0x01, 0xdb, 0x0f, 0xdc, 0x70, 0xcc, 0x06, 0xdb, 0x01, + 0x2c, 0xa1, 0xcd, 0x15, 0x02, 0x0f, 0xdc, 0x10, 0xdb, 0x14, 0xf4, 0x0f, + 0xdb, 0x69, 0x45, 0x02, 0xde, 0x40, 0xf6, 0x37, 0xc5, 0x01, 0xa2, 0x01, + 0x0f, 0x3b, 0x00, 0xf6, 0x43, 0xcc, 0x82, 0x35, 0x01, 0x0f, 0x72, 0x00, + 0xf6, 0x47, 0x42, 0x00, 0x2c, 0xc0, 0xf6, 0x4d, 0x42, 0x02, 0xa0, 0x40, + 0xf6, 0x59, 0xcf, 0x5b, 0xc3, 0x0f, 0xc2, 0x89, 0xcc, 0x88, 0xdd, 0x0f, + 0xc1, 0xc8, 0xc4, 0x01, 0xa3, 0x01, 0x0c, 0x8b, 0x00, 0xf6, 0x65, 0xc5, + 0xdb, 0x50, 0x01, 0x70, 0xa8, 0xcb, 0x82, 0xba, 0x01, 0x0f, 0x09, 0xcb, + 0x82, 0x36, 0x01, 0x0e, 0x88, 0x51, 0x01, 0x51, 0xc0, 0xf6, 0x69, 0x45, + 0x11, 0x3a, 0x40, 0xf6, 0x75, 0xc5, 0x01, 0xa2, 0x01, 0x58, 0x31, 0xd3, + 0x43, 0xe4, 0x01, 0x5c, 0x48, 0xc8, 0x2e, 0x20, 0x0f, 0xb7, 0x41, 0xcc, + 0x4e, 0x35, 0x0f, 0xa9, 0xe0, 0xd0, 0x5d, 0x52, 0x01, 0x2f, 0x71, 0xcf, + 0x66, 0x66, 0x01, 0x2f, 0x68, 0xd2, 0x4c, 0xd9, 0x01, 0x3e, 0xf8, 0xc4, + 0x01, 0x9b, 0x01, 0x18, 0x1b, 0x00, 0xf6, 0x81, 0xcf, 0x6a, 0xda, 0x01, + 0x4d, 0xe8, 0xcb, 0x01, 0xfc, 0x01, 0x0f, 0x99, 0xcc, 0x82, 0x35, 0x01, + 0x0e, 0xa9, 0xc5, 0x01, 0xa2, 0x01, 0x0c, 0xab, 0x00, 0xf6, 0x85, 0xcb, + 0x94, 0x22, 0x01, 0x58, 0x69, 0xd5, 0x01, 0x92, 0x01, 0x5b, 0x29, 0xd0, + 0x5b, 0xc2, 0x0f, 0xc2, 0xc8, 0x4f, 0x66, 0x48, 0xc0, 0xf6, 0x8b, 0x50, + 0x5c, 0xd2, 0x40, 0xf6, 0x97, 0x00, 0x40, 0xf6, 0xa3, 0xca, 0x1b, 0x09, + 0x00, 0x00, 0xf9, 0xc9, 0x6b, 0xaf, 0x01, 0x5f, 0xd0, 0xc3, 0xa1, 0xa2, + 0x08, 0x1c, 0x01, 0xc2, 0x00, 0x74, 0x08, 0x1c, 0x98, 0xc4, 0xe2, 0x57, + 0x08, 0x1c, 0x11, 0xc4, 0x92, 0x76, 0x08, 0x1c, 0xc8, 0xc2, 0x00, 0xd0, + 0x08, 0x1c, 0x19, 0xc2, 0x0f, 0x9b, 0x08, 0x1c, 0x58, 0xc4, 0xdb, 0x4c, + 0x08, 0x1c, 0x21, 0xc3, 0x01, 0xce, 0x08, 0x1c, 0x78, 0xc2, 0x01, 0x6f, + 0x08, 0x1c, 0x40, 0xc3, 0x04, 0x87, 0x08, 0x1c, 0x39, 0x97, 0x08, 0x1c, + 0x88, 0xc2, 0x00, 0x3d, 0x08, 0x1c, 0x49, 0xc5, 0xd6, 0xaf, 0x08, 0x1c, + 0xc1, 0x91, 0x08, 0x1c, 0xd0, 0xc3, 0x11, 0xef, 0x08, 0x1c, 0x61, 0x03, + 0xc0, 0xf6, 0xb5, 0xc2, 0x06, 0x62, 0x08, 0x1c, 0xe8, 0x0a, 0xc0, 0xf6, + 0xc1, 0x07, 0xc0, 0xf6, 0xcd, 0x19, 0xc0, 0xf6, 0xdf, 0x15, 0xc0, 0xf6, + 0xf1, 0x46, 0x06, 0x1d, 0xc0, 0xf7, 0x0b, 0x0e, 0xc0, 0xf7, 0x17, 0x16, + 0xc0, 0xf7, 0x2d, 0x04, 0xc0, 0xf7, 0x3f, 0x42, 0x02, 0xae, 0xc0, 0xf7, + 0x4b, 0x05, 0xc0, 0xf7, 0x57, 0x06, 0xc0, 0xf7, 0x6c, 0x14, 0xc0, 0xf7, + 0x7c, 0x0f, 0xc0, 0xf7, 0x88, 0xc9, 0x60, 0xf3, 0x01, 0x3c, 0xa9, 0xcc, + 0x07, 0xbb, 0x01, 0x3a, 0xd1, 0x03, 0xc0, 0xf7, 0x94, 0x11, 0xc0, 0xf7, + 0xa6, 0x08, 0xc0, 0xf7, 0xb8, 0xcb, 0x58, 0xc7, 0x01, 0x38, 0xd1, 0xd4, + 0x10, 0xc9, 0x0f, 0xb3, 0xc8, 0xc5, 0xaf, 0x07, 0x0f, 0xd5, 0x33, 0x00, + 0xf7, 0xc4, 0xc5, 0x36, 0xb7, 0x0f, 0x9d, 0x38, 0x42, 0x00, 0x30, 0xc0, + 0xf7, 0xca, 0xcf, 0x6b, 0x34, 0x0f, 0xb2, 0x48, 0xd3, 0x43, 0x85, 0x01, + 0x36, 0x89, 0xc7, 0x00, 0x90, 0x01, 0x1c, 0x40, 0x42, 0x36, 0xa2, 0xc0, + 0xf7, 0xdc, 0x42, 0x2f, 0xf9, 0xc0, 0xf7, 0xf4, 0x42, 0x14, 0x7d, 0xc0, + 0xf8, 0x10, 0x42, 0x28, 0x5b, 0xc0, 0xf8, 0x20, 0x42, 0x01, 0x99, 0x40, + 0xf8, 0x38, 0x42, 0x28, 0x5b, 0xc0, 0xf8, 0x48, 0x42, 0x01, 0x99, 0xc0, + 0xf8, 0x68, 0x42, 0x36, 0xa2, 0xc0, 0xf8, 0x84, 0x42, 0x2f, 0xf9, 0xc0, + 0xf8, 0x90, 0x42, 0x14, 0x7d, 0x40, 0xf8, 0xac, 0x42, 0x28, 0x5b, 0xc0, + 0xf8, 0xd3, 0x42, 0x01, 0x99, 0xc0, 0xf8, 0xe7, 0x42, 0x36, 0xa2, 0xc0, + 0xf9, 0x05, 0x42, 0x2f, 0xf9, 0xc0, 0xf9, 0x11, 0x42, 0x14, 0x7d, 0xc0, + 0xf9, 0x33, 0x47, 0xc1, 0x15, 0x40, 0xf9, 0x57, 0x42, 0x28, 0x5b, 0xc0, + 0xf9, 0x5f, 0x42, 0x01, 0x99, 0xc0, 0xf9, 0x71, 0x42, 0x36, 0xa2, 0xc0, + 0xf9, 0x89, 0x42, 0x2f, 0xf9, 0xc0, 0xf9, 0xa5, 0x42, 0x14, 0x7d, 0x40, + 0xf9, 0xc5, 0xa0, 0x0d, 0x80, 0xb1, 0x9f, 0x0d, 0x80, 0xa9, 0x9e, 0x0d, + 0x80, 0xa0, 0xa3, 0x0d, 0x80, 0x99, 0xa2, 0x0d, 0x80, 0x91, 0xa1, 0x0d, + 0x80, 0x89, 0xa0, 0x0d, 0x80, 0x81, 0x9f, 0x0d, 0x80, 0x79, 0x9e, 0x0d, + 0x80, 0x08, 0xa2, 0x0d, 0x80, 0x71, 0xa1, 0x0d, 0x80, 0x69, 0xa0, 0x0d, + 0x80, 0x61, 0x9f, 0x0d, 0x80, 0x59, 0x9e, 0x0d, 0x80, 0x50, 0xa1, 0x0d, + 0x80, 0x49, 0xa0, 0x0d, 0x80, 0x41, 0x9f, 0x0d, 0x80, 0x39, 0x9e, 0x0d, + 0x80, 0x30, 0xc2, 0x02, 0xa0, 0x0d, 0x80, 0x29, 0xa0, 0x0d, 0x80, 0x21, + 0x9f, 0x0d, 0x80, 0x19, 0x9e, 0x0d, 0x80, 0x10, 0x42, 0x28, 0x5b, 0xc0, + 0xf9, 0xf2, 0x42, 0x01, 0x99, 0xc0, 0xfa, 0x0e, 0x42, 0x2f, 0xf9, 0xc0, + 0xfa, 0x1e, 0x42, 0x14, 0x7d, 0x40, 0xfa, 0x32, 0x42, 0x14, 0x7d, 0xc0, + 0xfa, 0x46, 0x42, 0x36, 0xa2, 0xc0, 0xfa, 0x60, 0x42, 0x28, 0x5b, 0x40, + 0xfa, 0x70, 0x42, 0x28, 0x5b, 0xc0, 0xfa, 0x88, 0x42, 0x01, 0x99, 0xc0, + 0xfa, 0xa0, 0x42, 0x36, 0xa2, 0xc0, 0xfa, 0xae, 0x42, 0x2f, 0xf9, 0xc0, + 0xfa, 0xbe, 0x42, 0x14, 0x7d, 0x40, 0xfa, 0xda, 0x42, 0x28, 0x5b, 0xc0, + 0xfa, 0xf6, 0x42, 0x01, 0x99, 0xc0, 0xfb, 0x14, 0x42, 0x2f, 0xf9, 0xc0, + 0xfb, 0x38, 0x42, 0x14, 0x7d, 0xc0, 0xfb, 0x54, 0x42, 0x36, 0xa2, 0x40, + 0xfb, 0x64, 0x42, 0x28, 0x5b, 0xc0, 0xfb, 0x7a, 0x42, 0x01, 0x99, 0xc0, + 0xfb, 0x96, 0x42, 0x36, 0xa2, 0xc0, 0xfb, 0xaa, 0x42, 0x2f, 0xf9, 0xc0, + 0xfb, 0xca, 0x42, 0x14, 0x7d, 0x40, 0xfb, 0xe2, 0x48, 0x19, 0x9b, 0xc0, + 0xfc, 0x02, 0x46, 0x02, 0x0f, 0x40, 0xfc, 0x0e, 0x45, 0x12, 0x5c, 0xc0, + 0xfc, 0xa4, 0x4b, 0x11, 0xe3, 0x40, 0xfc, 0xd4, 0xc9, 0xaa, 0x3b, 0x00, + 0x2e, 0x29, 0xc9, 0xb0, 0xbc, 0x00, 0x2e, 0x21, 0xcd, 0x79, 0x00, 0x00, + 0x2d, 0x78, 0x1c, 0xc0, 0xfc, 0xf2, 0x06, 0xc0, 0xfc, 0xfc, 0xc4, 0xe1, + 0x1b, 0x00, 0x2d, 0x61, 0xc3, 0x11, 0x14, 0x00, 0x2d, 0x59, 0x42, 0x0c, + 0x43, 0xc0, 0xfd, 0x08, 0x16, 0xc0, 0xfd, 0x14, 0x42, 0x0f, 0x9a, 0xc0, + 0xfd, 0x1e, 0xcc, 0x89, 0x6d, 0x00, 0x2d, 0x11, 0x42, 0x00, 0xb0, 0xc0, + 0xfd, 0x2a, 0xc5, 0x48, 0x14, 0x00, 0x2c, 0xb9, 0x15, 0xc0, 0xfd, 0x36, + 0xc7, 0xc9, 0xf1, 0x00, 0x2c, 0x89, 0x43, 0x09, 0x3b, 0xc0, 0xfd, 0x42, + 0x0f, 0x40, 0xfd, 0x51, 0x43, 0x01, 0x7f, 0xc0, 0xfd, 0x66, 0xc7, 0x0c, + 0x96, 0x02, 0x6e, 0x48, 0x0b, 0xc0, 0xfd, 0x96, 0xc7, 0xc7, 0xe4, 0x02, + 0x6e, 0xf9, 0xd5, 0x35, 0xc9, 0x02, 0x6f, 0x19, 0x07, 0x40, 0xfd, 0xa2, + 0xc6, 0x78, 0x44, 0x02, 0x6e, 0x21, 0xd2, 0x49, 0xd3, 0x02, 0x6e, 0x88, + 0x10, 0xc0, 0xfd, 0xb4, 0xcc, 0x84, 0x39, 0x02, 0x6f, 0x58, 0x45, 0x03, + 0x14, 0xc0, 0xfd, 0xc0, 0xc9, 0xaf, 0x54, 0x02, 0x6e, 0x59, 0xce, 0x6e, + 0x82, 0x02, 0x6e, 0xb0, 0xc4, 0x12, 0x38, 0x02, 0x6e, 0x51, 0xc7, 0xc9, + 0x18, 0x02, 0x6f, 0x11, 0xcd, 0x7e, 0x7c, 0x02, 0x6f, 0x68, 0xc9, 0xb4, + 0xfd, 0x02, 0x6e, 0x61, 0xc8, 0xb6, 0x0a, 0x02, 0x6e, 0x80, 0x14, 0xc0, + 0xfd, 0xcc, 0xd1, 0x55, 0xc9, 0x02, 0x6f, 0x60, 0xc5, 0xdb, 0x82, 0x02, + 0x6e, 0x71, 0xcb, 0x93, 0xbf, 0x02, 0x6e, 0xd0, 0xc7, 0xc9, 0x73, 0x02, + 0x6e, 0x91, 0xc8, 0xb6, 0x12, 0x02, 0x6f, 0xb1, 0xcf, 0x63, 0xb4, 0x02, + 0x6f, 0xf0, 0xcd, 0x77, 0x12, 0x02, 0x6e, 0xa1, 0xcb, 0x98, 0x79, 0x02, + 0x6f, 0x51, 0xd0, 0x5e, 0x72, 0x02, 0x6f, 0xf8, 0x16, 0xc0, 0xfd, 0xd8, + 0xc8, 0xba, 0x72, 0x02, 0x6f, 0x80, 0x10, 0xc0, 0xfd, 0xe4, 0xc7, 0xc8, + 0x7e, 0x02, 0x6e, 0xf1, 0xc6, 0xcc, 0x17, 0x02, 0x6f, 0x48, 0x42, 0x02, + 0xaf, 0xc0, 0xfd, 0xf0, 0xca, 0x9b, 0x30, 0x02, 0x6f, 0x30, 0x51, 0x54, + 0x86, 0xc0, 0xfd, 0xfc, 0x04, 0xc0, 0xfe, 0x1a, 0xd5, 0x37, 0x2e, 0x01, + 0x35, 0x49, 0x4a, 0xa5, 0x4e, 0xc0, 0xfe, 0x26, 0xce, 0x71, 0x30, 0x01, + 0x1d, 0x79, 0xc8, 0x22, 0x83, 0x01, 0x01, 0x31, 0x16, 0x40, 0xfe, 0x36, + 0x00, 0x40, 0xfe, 0x42, 0xc7, 0xc1, 0x77, 0x01, 0x33, 0x41, 0xc8, 0xbd, + 0xba, 0x01, 0x30, 0xa9, 0xc6, 0xcd, 0x19, 0x0f, 0x99, 0xb1, 0xc3, 0xcd, + 0x94, 0x0f, 0x99, 0x68, 0xd2, 0x4a, 0xe1, 0x01, 0x1f, 0x98, 0x00, 0x40, + 0xfe, 0x4e, 0xd0, 0x0d, 0xaa, 0x0f, 0xb3, 0x48, 0x83, 0x0f, 0xd5, 0x61, + 0xc8, 0xbd, 0xfa, 0x0f, 0xa1, 0xc8, 0x45, 0x02, 0x9a, 0x40, 0xfe, 0x5d, + 0x42, 0x01, 0x5d, 0xc0, 0xfe, 0x6f, 0xc5, 0xc4, 0x0a, 0x0f, 0xc8, 0xe9, + 0x4c, 0x83, 0x79, 0x40, 0xfe, 0x79, 0x46, 0x09, 0x97, 0xc0, 0xfe, 0x85, + 0x45, 0x00, 0xba, 0xc0, 0xfe, 0xa9, 0x45, 0x01, 0xc3, 0xc0, 0xfe, 0xb5, + 0x46, 0x34, 0x6f, 0xc0, 0xfe, 0xc1, 0x47, 0x02, 0x0e, 0x40, 0xfe, 0xd5, + 0xcd, 0x7a, 0x86, 0x00, 0xb9, 0xa1, 0x4b, 0x6f, 0xc7, 0xc0, 0xff, 0x3f, + 0x47, 0x02, 0x0e, 0x40, 0xff, 0x47, 0x43, 0x4e, 0xaf, 0xc0, 0xff, 0xa5, + 0x4d, 0x7b, 0xe5, 0x40, 0xff, 0xc7, 0x47, 0x34, 0x2f, 0xc0, 0xff, 0xe5, + 0x47, 0x02, 0x0e, 0x40, 0xff, 0xf8, 0xc9, 0x11, 0xf6, 0x07, 0xfb, 0x09, + 0xc5, 0x0a, 0x8a, 0x07, 0xfb, 0x20, 0xcf, 0x69, 0x63, 0x07, 0xfb, 0x11, + 0xcb, 0x03, 0xbc, 0x07, 0xff, 0x48, 0xcf, 0x69, 0x63, 0x07, 0xfb, 0x19, + 0xcb, 0x03, 0xbc, 0x07, 0xff, 0x58, 0x00, 0xc1, 0x00, 0x55, 0xde, 0x0d, + 0xd8, 0x07, 0xfb, 0x80, 0xc6, 0x92, 0x0c, 0x07, 0xfd, 0x01, 0x47, 0x02, + 0x0e, 0x41, 0x00, 0x6d, 0xcb, 0x90, 0x91, 0x0f, 0xb4, 0x23, 0x01, 0x00, + 0xc7, 0xcb, 0x8d, 0xe7, 0x0f, 0xa3, 0x00, 0xcc, 0x80, 0x9d, 0x01, 0x35, + 0x09, 0xd1, 0x54, 0xdb, 0x0f, 0xa8, 0x30, 0x83, 0x01, 0x82, 0x13, 0x01, + 0x00, 0xcd, 0x15, 0xc1, 0x00, 0xd3, 0x8b, 0x01, 0x82, 0x21, 0x97, 0x01, + 0x82, 0x31, 0x87, 0x01, 0x82, 0x41, 0x91, 0x01, 0x82, 0x51, 0x0d, 0xc1, + 0x00, 0xed, 0x09, 0xc1, 0x01, 0x01, 0x1c, 0xc1, 0x01, 0x15, 0x16, 0xc1, + 0x01, 0x29, 0x06, 0xc1, 0x01, 0x3d, 0x90, 0x01, 0x84, 0x9b, 0x01, 0x01, + 0x51, 0x0a, 0xc1, 0x01, 0x65, 0x04, 0xc1, 0x01, 0x79, 0x12, 0xc1, 0x01, + 0x8d, 0x0f, 0xc1, 0x01, 0xa1, 0x1b, 0xc1, 0x01, 0xb5, 0x14, 0xc1, 0x01, + 0xc1, 0x19, 0xc1, 0x01, 0xd5, 0xc2, 0x5d, 0xb3, 0x01, 0x84, 0xa0, 0x00, + 0xc1, 0x01, 0xe5, 0xcb, 0x9a, 0x52, 0x01, 0x01, 0x39, 0xc6, 0x89, 0xd3, + 0x00, 0x01, 0x68, 0x43, 0x01, 0xd8, 0xc1, 0x01, 0xf1, 0x44, 0x00, 0xde, + 0x41, 0x02, 0x0f, 0xc4, 0x25, 0xd5, 0x01, 0x03, 0x21, 0xc9, 0x1b, 0x0a, + 0x01, 0x03, 0x19, 0xc5, 0x03, 0x4d, 0x01, 0x03, 0x10, 0xcf, 0x67, 0x29, + 0x0f, 0xa9, 0x01, 0xc7, 0x67, 0x31, 0x0f, 0xa9, 0x21, 0xcd, 0x7d, 0x10, + 0x0f, 0xa9, 0x08, 0x0e, 0xc1, 0x02, 0x37, 0xc6, 0xcd, 0x61, 0x01, 0x15, + 0xd1, 0xc7, 0x00, 0x40, 0x01, 0x11, 0x4b, 0x01, 0x02, 0x43, 0xc6, 0x10, + 0xce, 0x01, 0x01, 0xe9, 0xcb, 0x33, 0x33, 0x01, 0x51, 0xe0, 0x00, 0x41, + 0x02, 0x47, 0x46, 0x62, 0x28, 0xc1, 0x02, 0x57, 0x47, 0xc5, 0x98, 0x41, + 0x02, 0x63, 0xda, 0x1c, 0x38, 0x01, 0x4e, 0xf0, 0x15, 0xc1, 0x02, 0x6f, + 0xcb, 0x99, 0xd9, 0x0f, 0xa4, 0x08, 0xc4, 0x00, 0xc3, 0x01, 0x10, 0x31, + 0x43, 0x2c, 0xff, 0x41, 0x02, 0x7b, 0xcc, 0x87, 0x2d, 0x0f, 0xa7, 0x41, + 0xce, 0x6e, 0x66, 0x01, 0x4e, 0xe0, 0xcd, 0x76, 0x4f, 0x01, 0x05, 0xc9, + 0x48, 0xb7, 0x8a, 0x41, 0x02, 0x87, 0xd7, 0x28, 0x2c, 0x0f, 0xd7, 0xa8, + 0xc2, 0x00, 0xf1, 0x01, 0x13, 0x0b, 0x01, 0x02, 0xab, 0xce, 0x33, 0xae, + 0x01, 0x53, 0x38, 0x4a, 0xa7, 0x9c, 0xc1, 0x02, 0xb1, 0x49, 0xb4, 0x1c, + 0x41, 0x02, 0xbf, 0x54, 0x3b, 0x88, 0xc1, 0x02, 0xcb, 0xd1, 0x2b, 0x57, + 0x01, 0x81, 0x60, 0xc4, 0x0a, 0x8b, 0x01, 0x80, 0x09, 0xcb, 0x90, 0xa7, + 0x01, 0x80, 0x30, 0xcc, 0x83, 0x25, 0x01, 0x8c, 0x81, 0xcc, 0x88, 0x71, + 0x01, 0x8c, 0x89, 0xc8, 0x2b, 0x60, 0x01, 0x8c, 0x91, 0x16, 0xc1, 0x02, + 0xe9, 0x08, 0xc1, 0x02, 0xf9, 0x0f, 0xc1, 0x03, 0x05, 0xcb, 0x97, 0x0e, + 0x01, 0x8c, 0xc1, 0xcb, 0x93, 0x88, 0x01, 0x8c, 0xd1, 0xcb, 0x8e, 0x1e, + 0x01, 0x8c, 0xe9, 0xca, 0xa3, 0x28, 0x01, 0x8c, 0xf0, 0x47, 0x34, 0x2f, + 0xc1, 0x03, 0x11, 0xcc, 0x83, 0x19, 0x08, 0x42, 0xb9, 0x47, 0x02, 0x0e, + 0x41, 0x03, 0x1e, 0xc6, 0x57, 0xec, 0x01, 0x03, 0x01, 0xd4, 0x3a, 0xfc, + 0x01, 0x71, 0x88, 0x42, 0x00, 0x97, 0xc1, 0x03, 0x81, 0xd0, 0x5e, 0xc2, + 0x0f, 0xa3, 0x78, 0x05, 0xc1, 0x03, 0x99, 0x0a, 0xc1, 0x03, 0xb7, 0x52, + 0x48, 0x59, 0xc1, 0x03, 0xc5, 0x15, 0xc1, 0x03, 0xd1, 0x0e, 0xc1, 0x04, + 0x05, 0x06, 0xc1, 0x04, 0x15, 0x16, 0xc1, 0x04, 0x2a, 0xd9, 0x0f, 0x09, + 0x01, 0x3a, 0xa9, 0xd6, 0x2c, 0xb2, 0x01, 0x3a, 0xa1, 0x08, 0xc1, 0x04, + 0x40, 0xc3, 0xe6, 0x74, 0x01, 0x38, 0x89, 0x14, 0xc1, 0x04, 0x50, 0x42, + 0x02, 0xae, 0xc1, 0x04, 0x5c, 0x0f, 0xc1, 0x04, 0x68, 0xc6, 0x1c, 0xb4, + 0x01, 0x2f, 0x31, 0x12, 0xc1, 0x04, 0x74, 0x43, 0x00, 0x5f, 0x41, 0x04, + 0x80, 0x45, 0x15, 0xa7, 0xc1, 0x04, 0x8c, 0x45, 0x20, 0x6c, 0x41, 0x04, + 0xaa, 0x45, 0x20, 0x6c, 0xc1, 0x04, 0xc8, 0x45, 0x15, 0xa7, 0x41, 0x04, + 0xe6, 0xd5, 0x35, 0xde, 0x0f, 0xc4, 0x19, 0xca, 0x35, 0xe9, 0x0f, 0xc3, + 0x59, 0xd0, 0x5c, 0x32, 0x0f, 0xc3, 0x19, 0xd1, 0x50, 0x46, 0x0f, 0xc3, + 0x99, 0xd0, 0x35, 0xe3, 0x0f, 0xc3, 0xd8, 0xd5, 0x35, 0xde, 0x0f, 0xc4, + 0x11, 0xd0, 0x35, 0xe3, 0x0f, 0xc3, 0xd1, 0xd0, 0x5c, 0x32, 0x0f, 0xc3, + 0x11, 0xca, 0x35, 0xe9, 0x0f, 0xc3, 0x51, 0xd1, 0x50, 0x46, 0x0f, 0xc3, + 0x90, 0xd5, 0x35, 0xde, 0x0f, 0xc4, 0x01, 0xd0, 0x5c, 0x32, 0x0f, 0xc3, + 0x01, 0xca, 0x35, 0xe9, 0x0f, 0xc3, 0x41, 0xd1, 0x50, 0x46, 0x0f, 0xc3, + 0x81, 0xd0, 0x35, 0xe3, 0x0f, 0xc3, 0xc0, 0xd0, 0x5c, 0x32, 0x0f, 0xc3, + 0x09, 0xca, 0x35, 0xe9, 0x0f, 0xc3, 0x49, 0xd1, 0x50, 0x46, 0x0f, 0xc3, + 0x89, 0xd0, 0x35, 0xe3, 0x0f, 0xc3, 0xc9, 0xd5, 0x35, 0xde, 0x0f, 0xc4, + 0x08, 0x00, 0xc1, 0x05, 0x04, 0xc2, 0x00, 0x27, 0x0f, 0xd4, 0xf8, 0x00, + 0xc1, 0x05, 0x10, 0xc5, 0xda, 0xf6, 0x0f, 0x9a, 0x48, 0xc9, 0xae, 0x4f, + 0x0f, 0x17, 0xf9, 0x46, 0x09, 0x97, 0xc1, 0x05, 0x28, 0x45, 0x2b, 0x5f, + 0xc1, 0x05, 0x4c, 0x47, 0x02, 0x0e, 0x41, 0x05, 0x5e, 0xd4, 0x39, 0x08, + 0x0f, 0x98, 0xc1, 0xd3, 0x3f, 0x96, 0x0f, 0x98, 0xb0, 0xc2, 0x00, 0x7a, + 0x08, 0xc7, 0xf9, 0x47, 0x34, 0x2f, 0xc1, 0x05, 0xe5, 0x46, 0x09, 0x97, + 0xc1, 0x05, 0xfd, 0x4d, 0x29, 0xb9, 0xc1, 0x06, 0x21, 0x4f, 0x0b, 0x17, + 0x41, 0x06, 0x80, 0x0e, 0xc1, 0x06, 0xdf, 0xc8, 0x7d, 0xa4, 0x07, 0xf2, + 0x59, 0xc4, 0x0e, 0x9a, 0x01, 0x81, 0x80, 0xca, 0xa7, 0xf6, 0x0f, 0x9f, + 0x99, 0xca, 0xa1, 0x7a, 0x0f, 0x9f, 0xa1, 0xc9, 0x42, 0xd1, 0x0f, 0xa2, + 0x58, 0x58, 0x21, 0xb3, 0xc1, 0x06, 0xeb, 0xc4, 0x0e, 0x9a, 0x01, 0x80, + 0xe0, 0xc8, 0x31, 0x90, 0x0f, 0xac, 0x29, 0xc6, 0xcb, 0xe1, 0x0f, 0xb7, + 0xc1, 0xc4, 0x5c, 0x58, 0x0f, 0xca, 0x78, 0xc5, 0x8d, 0xed, 0x0f, 0xcb, + 0xf9, 0xc4, 0x1d, 0xa8, 0x01, 0x1f, 0x29, 0xc5, 0x71, 0x71, 0x0f, 0xd6, + 0x98, 0x42, 0x00, 0xaf, 0x41, 0x06, 0xf7, 0x00, 0xc1, 0x07, 0x03, 0xc7, + 0x90, 0x53, 0x01, 0x10, 0xe1, 0xcd, 0x79, 0x41, 0x01, 0x00, 0x28, 0xca, + 0xa0, 0xbc, 0x0f, 0x9b, 0xa3, 0x01, 0x07, 0x25, 0xc3, 0x00, 0x74, 0x01, + 0x56, 0xe1, 0xce, 0x4a, 0x43, 0x01, 0x70, 0x80, 0x44, 0x00, 0x8c, 0xc1, + 0x07, 0x2b, 0xc4, 0x3a, 0xb4, 0x0f, 0xc9, 0x31, 0xc7, 0xc2, 0x8f, 0x0f, + 0xa4, 0x31, 0xcf, 0x64, 0x95, 0x0f, 0xb0, 0xc1, 0x15, 0xc1, 0x07, 0x35, + 0xd2, 0x4c, 0x25, 0x0f, 0xcb, 0xc8, 0x4d, 0x27, 0x30, 0xc1, 0x07, 0x41, + 0xc7, 0xc1, 0xbd, 0x0f, 0x9a, 0x10, 0xc8, 0xb6, 0x62, 0x01, 0x05, 0x19, + 0xc3, 0x91, 0xe8, 0x0f, 0x9a, 0xf8, 0x46, 0x01, 0xec, 0xc1, 0x07, 0x4d, + 0xd1, 0x55, 0x85, 0x0f, 0xa1, 0x28, 0xd8, 0x21, 0xfb, 0x0f, 0xb1, 0x30, + 0xcd, 0x78, 0x64, 0x01, 0x0a, 0xf9, 0xc5, 0x03, 0x02, 0x01, 0x02, 0x20, + 0xc4, 0xe2, 0x5f, 0x0f, 0xad, 0xf1, 0xc5, 0xd6, 0xcd, 0x0f, 0xad, 0xe9, + 0xc7, 0x87, 0xc2, 0x0f, 0xad, 0xe0, 0xca, 0x9b, 0x76, 0x01, 0x3e, 0xb9, + 0xc5, 0x06, 0xe2, 0x01, 0x2c, 0x41, 0x45, 0x15, 0xdb, 0xc1, 0x07, 0x53, + 0xc4, 0x00, 0xf0, 0x00, 0x01, 0x70, 0x10, 0xc1, 0x07, 0x5f, 0x03, 0xc1, + 0x07, 0x6b, 0x06, 0xc1, 0x07, 0x7d, 0x05, 0xc1, 0x07, 0x89, 0x15, 0xc1, + 0x07, 0x99, 0x0e, 0xc1, 0x07, 0xa5, 0x07, 0xc1, 0x07, 0xb5, 0x42, 0x00, + 0xb4, 0xc1, 0x07, 0xc1, 0x42, 0x00, 0xe3, 0xc1, 0x07, 0xcd, 0x14, 0xc1, + 0x07, 0xd9, 0xc5, 0x1e, 0xc8, 0x07, 0xfa, 0xf1, 0x12, 0xc1, 0x07, 0xe5, + 0xc6, 0x60, 0xb1, 0x07, 0xff, 0x19, 0xca, 0x9b, 0x58, 0x07, 0xff, 0x21, + 0xc8, 0x77, 0x99, 0x07, 0xff, 0x29, 0xc8, 0xbe, 0x72, 0x07, 0xff, 0x31, + 0xcc, 0x89, 0xcd, 0x07, 0xf8, 0x69, 0xc9, 0x11, 0xf6, 0x07, 0xf8, 0x71, + 0xcd, 0x36, 0x86, 0x07, 0xfa, 0xe0, 0xcc, 0x68, 0xfd, 0x01, 0x31, 0xeb, + 0x01, 0x07, 0xf7, 0xce, 0x6f, 0x46, 0x01, 0x03, 0x41, 0xcb, 0x62, 0xc8, + 0x0f, 0xca, 0x38, 0x44, 0x3f, 0xf8, 0xc1, 0x07, 0xfb, 0x42, 0x00, 0xe1, + 0xc1, 0x08, 0x05, 0xc7, 0xc1, 0x0e, 0x0f, 0xcf, 0x40, 0xc3, 0x17, 0x28, + 0x01, 0x2e, 0x49, 0xd1, 0x55, 0x74, 0x0f, 0x9d, 0x19, 0xd7, 0x2a, 0x3d, + 0x0f, 0x9b, 0x28, 0xc7, 0xc7, 0x89, 0x0f, 0xae, 0x21, 0xc6, 0x9e, 0xf4, + 0x0f, 0xa6, 0x09, 0xc9, 0x1b, 0x0a, 0x00, 0x00, 0xe0, 0xc9, 0xae, 0x58, + 0x0f, 0xa7, 0xe9, 0xc6, 0xd0, 0x25, 0x0f, 0x9c, 0xf0, 0xc6, 0xb7, 0xec, + 0x0f, 0xd4, 0xb1, 0xc5, 0x62, 0xce, 0x0f, 0x9c, 0xb0, 0x14, 0xc1, 0x08, + 0x11, 0x16, 0xc1, 0x08, 0x1d, 0x10, 0xc1, 0x08, 0x3b, 0x06, 0xc1, 0x08, + 0x54, 0x15, 0xc1, 0x08, 0x68, 0x04, 0xc1, 0x08, 0x7e, 0x0a, 0xc1, 0x08, + 0x88, 0x03, 0xc1, 0x08, 0x92, 0xc2, 0x01, 0x4a, 0x0b, 0x7a, 0x11, 0x1c, + 0xc1, 0x08, 0x9c, 0x43, 0x70, 0x51, 0xc1, 0x08, 0xae, 0x09, 0xc1, 0x08, + 0xca, 0xc2, 0x8d, 0x8f, 0x0b, 0x79, 0x39, 0x13, 0xc1, 0x08, 0xd2, 0xc2, + 0x02, 0x2b, 0x0b, 0x78, 0xf1, 0x0e, 0xc1, 0x08, 0xdc, 0x18, 0xc1, 0x08, + 0xea, 0xc2, 0x00, 0x87, 0x0b, 0x78, 0x39, 0x0f, 0xc1, 0x08, 0xf4, 0x12, + 0x41, 0x08, 0xfe, 0xc5, 0x05, 0x02, 0x0b, 0x7c, 0x91, 0xc5, 0x00, 0xd4, + 0x0b, 0x7c, 0x89, 0xc9, 0x63, 0x69, 0x0b, 0x7c, 0x81, 0xc5, 0x00, 0x2c, + 0x0b, 0x7c, 0x78, 0x97, 0x0b, 0x7b, 0x53, 0x01, 0x09, 0x08, 0x8b, 0x0b, + 0x7b, 0x0b, 0x01, 0x09, 0x29, 0x87, 0x0b, 0x7a, 0xeb, 0x01, 0x09, 0x4d, + 0xc2, 0x00, 0x18, 0x0b, 0x7c, 0x19, 0x91, 0x0b, 0x7a, 0xcb, 0x01, 0x09, + 0x63, 0x9b, 0x0b, 0x7b, 0x8b, 0x01, 0x09, 0x73, 0x90, 0x0b, 0x7b, 0xeb, + 0x01, 0x09, 0x7d, 0x83, 0x0b, 0x7a, 0xa3, 0x01, 0x09, 0x81, 0xca, 0x9d, + 0x7e, 0x0b, 0x7b, 0xc3, 0x01, 0x09, 0xa1, 0x99, 0x0b, 0x7a, 0xe2, 0x01, + 0x09, 0xa5, 0x49, 0xaa, 0xc2, 0xc1, 0x09, 0xa9, 0xca, 0xa1, 0x84, 0x0b, + 0x7a, 0x89, 0xd6, 0x2b, 0xec, 0x0b, 0x7a, 0x78, 0xcb, 0x95, 0xc4, 0x01, + 0x22, 0x49, 0xcc, 0x8a, 0xbd, 0x01, 0x22, 0x40, 0xc5, 0xbc, 0xed, 0x0f, + 0xa9, 0x61, 0xc5, 0x36, 0xb7, 0x0f, 0x9d, 0x21, 0xc5, 0x00, 0xb9, 0x00, + 0x05, 0xa9, 0xc2, 0x00, 0x51, 0x0f, 0xcd, 0x00, 0xc3, 0x02, 0xa3, 0x00, + 0x05, 0xb9, 0xe0, 0x06, 0x67, 0x0f, 0xde, 0x10, 0x00, 0xc1, 0x09, 0xb5, + 0xcd, 0x79, 0x4e, 0x01, 0x10, 0x98, 0xc4, 0xd1, 0x89, 0x0f, 0xae, 0xa9, + 0xc4, 0x5c, 0x58, 0x0f, 0xa5, 0xe9, 0xc3, 0x22, 0xd3, 0x0f, 0xb4, 0x80, + 0x43, 0x01, 0xdf, 0xc1, 0x09, 0xc4, 0x45, 0xdc, 0xdb, 0x41, 0x0a, 0x00, + 0xce, 0x72, 0x10, 0x0b, 0x74, 0xd1, 0x15, 0xc1, 0x0a, 0x12, 0xc9, 0x11, + 0xf6, 0x0b, 0x74, 0xc1, 0x05, 0xc1, 0x0a, 0x1e, 0x46, 0x09, 0x97, 0xc1, + 0x0a, 0x2a, 0x47, 0x34, 0x2f, 0x41, 0x0a, 0x51, 0xc9, 0xaf, 0x93, 0x01, + 0x1e, 0xc9, 0x16, 0xc1, 0x0a, 0x67, 0x4a, 0xa4, 0x0e, 0xc1, 0x0a, 0x79, + 0xcf, 0x67, 0xa1, 0x01, 0x1e, 0x99, 0xc5, 0x1d, 0x88, 0x01, 0x1e, 0x88, + 0x4a, 0x9d, 0xf6, 0xc1, 0x0a, 0x85, 0x46, 0x09, 0x97, 0xc1, 0x0a, 0x8d, + 0x51, 0x51, 0x89, 0x41, 0x0a, 0xab, 0x48, 0xbc, 0x6a, 0xc1, 0x0a, 0xbb, + 0x4d, 0x75, 0xcd, 0x41, 0x0a, 0xcb, 0xc2, 0x07, 0xb8, 0x01, 0x12, 0xf1, + 0xc5, 0x01, 0x95, 0x01, 0x11, 0x0b, 0x01, 0x0a, 0xda, 0xd4, 0x3f, 0x34, + 0x01, 0x4c, 0xe8, 0xc4, 0x15, 0xe7, 0x05, 0x5f, 0x81, 0xc4, 0x26, 0x78, + 0x05, 0x5f, 0xc9, 0xc3, 0x05, 0x14, 0x05, 0x5f, 0x89, 0x16, 0xc1, 0x0a, + 0xde, 0x08, 0xc1, 0x0a, 0xea, 0x15, 0xc1, 0x0a, 0xf6, 0xc5, 0x06, 0xdb, + 0x05, 0x5f, 0xc0, 0xc8, 0xbf, 0xd2, 0x05, 0x5f, 0x69, 0xc3, 0x7c, 0x50, + 0x05, 0x57, 0x91, 0xcb, 0x8e, 0x6b, 0x05, 0x57, 0x88, 0x4a, 0x6f, 0xc8, + 0xc1, 0x0b, 0x02, 0xc5, 0x1e, 0x96, 0x05, 0x57, 0xb0, 0x46, 0x02, 0x0f, + 0xc1, 0x0b, 0x32, 0xc7, 0xc2, 0xc7, 0x05, 0x5f, 0x60, 0xc2, 0x00, 0xd1, + 0x05, 0x57, 0x81, 0xc2, 0x06, 0xdb, 0x05, 0x5f, 0x58, 0x00, 0xc1, 0x0b, + 0xa1, 0xc3, 0x1a, 0xd2, 0x0f, 0xb7, 0x19, 0xcf, 0x68, 0xaf, 0x0f, 0xcd, + 0xe0, 0xc3, 0x03, 0x0c, 0x01, 0x37, 0x83, 0x01, 0x0b, 0xad, 0xc5, 0xd7, + 0x86, 0x0f, 0xaf, 0xd8, 0x00, 0x41, 0x0b, 0xb1, 0x49, 0x89, 0xf4, 0xc1, + 0x0b, 0xbd, 0xcd, 0x78, 0x98, 0x01, 0x1c, 0x69, 0xc4, 0x47, 0x02, 0x0f, + 0xb4, 0xe8, 0x16, 0xc1, 0x0b, 0xc7, 0x15, 0xc1, 0x0b, 0xd9, 0xce, 0x6c, + 0x1a, 0x08, 0xb3, 0x3b, 0x01, 0x0b, 0xe8, 0xcd, 0x76, 0x69, 0x08, 0xb3, + 0x0b, 0x01, 0x0b, 0xee, 0xc5, 0x01, 0x2d, 0x00, 0xc0, 0x03, 0x01, 0x0b, + 0xf4, 0x06, 0xc1, 0x0b, 0xfa, 0x47, 0x02, 0x0e, 0xc1, 0x0c, 0x06, 0x08, + 0xc1, 0x0c, 0x91, 0xcf, 0x69, 0xbd, 0x00, 0xc0, 0x71, 0xc6, 0xcd, 0xc7, + 0x00, 0xc0, 0x51, 0x47, 0xc2, 0x57, 0xc1, 0x0c, 0xa3, 0x42, 0x00, 0x99, + 0xc1, 0x0c, 0xaf, 0xc8, 0x22, 0x83, 0x00, 0xc0, 0x08, 0x00, 0xc1, 0x0c, + 0xbb, 0xcb, 0x5c, 0x17, 0x0f, 0xc8, 0x88, 0xc5, 0x11, 0x55, 0x0f, 0xa1, + 0xa8, 0x00, 0xc1, 0x0c, 0xc7, 0x45, 0x02, 0x09, 0x41, 0x0c, 0xe3, 0xc2, + 0x00, 0x96, 0x01, 0x15, 0x39, 0xcd, 0x7c, 0xcf, 0x0f, 0xc9, 0xd8, 0xd0, + 0x57, 0xf2, 0x0f, 0x9c, 0x89, 0xc4, 0x2a, 0x3e, 0x0f, 0xcb, 0x70, 0xc3, + 0x79, 0x83, 0x0f, 0xa7, 0xa1, 0xdd, 0x10, 0x4c, 0x0f, 0xa7, 0x90, 0x47, + 0xc7, 0x4a, 0xc1, 0x0c, 0xef, 0x45, 0x58, 0xc2, 0xc1, 0x0d, 0x1d, 0x4a, + 0xa3, 0x0a, 0xc1, 0x0d, 0x5b, 0x15, 0xc1, 0x0d, 0x6d, 0x4e, 0x73, 0x52, + 0xc1, 0x0d, 0x79, 0x08, 0xc1, 0x0d, 0x8b, 0x42, 0x00, 0x2c, 0xc1, 0x0d, + 0x97, 0x45, 0x00, 0x49, 0x41, 0x0d, 0xa3, 0xc4, 0x14, 0x09, 0x0e, 0x97, + 0x98, 0xc4, 0x00, 0x2d, 0x0e, 0x97, 0x43, 0x01, 0x0d, 0xbb, 0xc5, 0x66, + 0xb1, 0x0e, 0x97, 0x58, 0xc4, 0x18, 0x10, 0x0e, 0x97, 0x3b, 0x01, 0x0d, + 0xc1, 0xc2, 0x22, 0xcc, 0x0e, 0x97, 0x32, 0x01, 0x0d, 0xc7, 0x0b, 0xc1, + 0x0d, 0xcd, 0xc3, 0x09, 0x9e, 0x0e, 0x97, 0x22, 0x01, 0x0d, 0xd9, 0x0a, + 0xc1, 0x0d, 0xdf, 0x19, 0xc1, 0x0d, 0xeb, 0xc2, 0x00, 0xc4, 0x0e, 0x97, + 0x50, 0x91, 0x08, 0xf7, 0xb1, 0x87, 0x08, 0xf7, 0xa9, 0x97, 0x08, 0xf7, + 0xa1, 0x8b, 0x08, 0xf7, 0x98, 0x83, 0x08, 0xf7, 0x89, 0xc2, 0x0d, 0xf6, + 0x08, 0xf7, 0x81, 0xc2, 0x02, 0x41, 0x08, 0xf7, 0x79, 0xc2, 0x00, 0xdb, + 0x08, 0xf7, 0x71, 0xc2, 0x00, 0x39, 0x08, 0xf7, 0x69, 0xc2, 0x19, 0x2c, + 0x08, 0xf7, 0x61, 0x10, 0xc1, 0x0d, 0xf5, 0xc2, 0x25, 0x3b, 0x08, 0xf7, + 0x51, 0xc2, 0x00, 0x64, 0x08, 0xf7, 0x49, 0xc2, 0x0e, 0x9a, 0x08, 0xf7, + 0x39, 0xc2, 0x01, 0x6f, 0x08, 0xf7, 0x31, 0xc2, 0x01, 0xc3, 0x08, 0xf7, + 0x29, 0xc2, 0x01, 0x5d, 0x08, 0xf7, 0x21, 0xc2, 0x00, 0xb0, 0x08, 0xf7, + 0x19, 0xc2, 0x01, 0x30, 0x08, 0xf7, 0x09, 0xc2, 0x02, 0x2b, 0x08, 0xf7, + 0x00, 0x46, 0x09, 0x97, 0xc1, 0x0e, 0x05, 0x14, 0xc1, 0x0e, 0x29, 0x18, + 0xc1, 0x0e, 0x35, 0x45, 0x00, 0xba, 0xc1, 0x0e, 0x41, 0x47, 0x02, 0x0e, + 0x41, 0x0e, 0x5f, 0x15, 0xc1, 0x0e, 0xc6, 0x4b, 0x6f, 0xc7, 0xc1, 0x0e, + 0xd2, 0x47, 0x02, 0x0e, 0xc1, 0x0e, 0xe8, 0xc9, 0xaa, 0xa7, 0x08, 0xe3, + 0x89, 0xc9, 0x15, 0xcc, 0x08, 0xe3, 0x80, 0x4c, 0x37, 0x33, 0xc1, 0x0f, + 0x48, 0xcf, 0x20, 0xfc, 0x01, 0x35, 0x29, 0xc4, 0x00, 0xba, 0x01, 0x32, + 0x10, 0x45, 0x00, 0xba, 0xc1, 0x0f, 0x54, 0x47, 0x02, 0x0e, 0xc1, 0x0f, + 0x66, 0x4b, 0x6f, 0xc7, 0xc1, 0x0f, 0xcf, 0xce, 0x73, 0x0c, 0x00, 0x6a, + 0xb9, 0x49, 0x53, 0xa9, 0xc1, 0x0f, 0xf5, 0x06, 0xc1, 0x10, 0x01, 0x47, + 0x34, 0x2f, 0x41, 0x10, 0x0d, 0x4c, 0x11, 0xe2, 0xc1, 0x10, 0x19, 0x47, + 0x34, 0x2f, 0xc1, 0x10, 0x37, 0x52, 0x48, 0x11, 0xc1, 0x10, 0x4a, 0x47, + 0x02, 0x0e, 0xc1, 0x10, 0x56, 0xc7, 0xc3, 0xae, 0x08, 0x56, 0x40, 0xc7, + 0xc3, 0xe6, 0x0f, 0xab, 0xd1, 0x43, 0x03, 0x35, 0xc1, 0x10, 0xbb, 0x45, + 0x00, 0x8c, 0xc1, 0x10, 0xc7, 0xd7, 0x29, 0xf8, 0x0f, 0xa3, 0x58, 0xcb, + 0x05, 0x1c, 0x00, 0x42, 0xf1, 0xcf, 0x63, 0xff, 0x00, 0x42, 0xd9, 0xd1, + 0x4e, 0xbf, 0x00, 0x42, 0xd1, 0xd0, 0x58, 0x32, 0x00, 0x42, 0xc9, 0x47, + 0x02, 0x0e, 0x41, 0x10, 0xd3, 0x0e, 0xc1, 0x10, 0xf3, 0x15, 0xc1, 0x10, + 0xff, 0xd1, 0x50, 0xce, 0x08, 0x8b, 0xa0, 0xc5, 0x8d, 0x1c, 0x0f, 0x81, + 0x51, 0x19, 0xc1, 0x11, 0x0b, 0x07, 0xc1, 0x11, 0x1d, 0x15, 0xc1, 0x11, + 0x29, 0x10, 0xc1, 0x11, 0x47, 0xca, 0xa0, 0x9e, 0x0f, 0x80, 0x21, 0xcc, + 0x87, 0xe1, 0x0f, 0x80, 0x29, 0x11, 0xc1, 0x11, 0x53, 0x16, 0xc1, 0x11, + 0x5f, 0x08, 0xc1, 0x11, 0x6b, 0xc4, 0xe3, 0xc7, 0x0f, 0x81, 0x11, 0xcd, + 0x78, 0x8b, 0x0f, 0x81, 0x29, 0x42, 0x01, 0x5d, 0xc1, 0x11, 0x77, 0xc6, + 0xce, 0x39, 0x0f, 0x81, 0x40, 0x43, 0x00, 0xe5, 0xc1, 0x11, 0x83, 0x00, + 0x41, 0x11, 0x96, 0x42, 0x0b, 0x26, 0xc1, 0x11, 0xa8, 0xc3, 0x64, 0xae, + 0x01, 0x15, 0xc1, 0xc3, 0x0e, 0xa7, 0x01, 0x14, 0x62, 0x01, 0x11, 0xb4, + 0xcc, 0x45, 0x8d, 0x08, 0x95, 0x49, 0x47, 0x02, 0x0e, 0x41, 0x11, 0xb8, + 0xc4, 0x26, 0x78, 0x0b, 0x53, 0x49, 0xc5, 0x06, 0xdb, 0x0b, 0x53, 0x41, + 0x15, 0xc1, 0x12, 0x14, 0x08, 0xc1, 0x12, 0x20, 0x16, 0xc1, 0x12, 0x2c, + 0xc3, 0x05, 0x14, 0x0b, 0x53, 0x09, 0xc4, 0x15, 0xe7, 0x0b, 0x53, 0x00, + 0xc2, 0x13, 0x4c, 0x0b, 0x52, 0xf1, 0xc3, 0x01, 0x9b, 0x0b, 0x52, 0xa9, + 0x83, 0x0b, 0x52, 0x00, 0x8b, 0x0b, 0x52, 0xe9, 0x91, 0x0b, 0x52, 0x98, + 0x8b, 0x0b, 0x52, 0xe1, 0x91, 0x0b, 0x52, 0x48, 0x90, 0x0b, 0x52, 0xd0, + 0x91, 0x0b, 0x52, 0xc9, 0xc4, 0xe2, 0x77, 0x0b, 0x52, 0x61, 0xc3, 0x4d, + 0xe7, 0x0b, 0x52, 0x40, 0x83, 0x0b, 0x52, 0xb0, 0x91, 0x0b, 0x52, 0x89, + 0x8e, 0x0b, 0x52, 0x68, 0x83, 0x0b, 0x52, 0x81, 0xc2, 0x00, 0x0a, 0x0b, + 0x52, 0x38, 0xc2, 0x00, 0x74, 0x0b, 0x52, 0x79, 0xc2, 0x04, 0x2b, 0x0b, + 0x52, 0x08, 0xc3, 0x7c, 0x57, 0x0b, 0x52, 0x71, 0xc2, 0x03, 0x4e, 0x0b, + 0x52, 0x18, 0x8b, 0x0b, 0x52, 0x50, 0x4f, 0x68, 0x91, 0xc1, 0x12, 0x38, + 0xce, 0x6c, 0xc2, 0x05, 0x53, 0xd9, 0x15, 0xc1, 0x12, 0x40, 0x03, 0xc1, + 0x12, 0x4c, 0xc9, 0x0e, 0x6e, 0x00, 0x81, 0xb9, 0x42, 0x07, 0xb2, 0xc1, + 0x12, 0x58, 0xce, 0x70, 0xb2, 0x00, 0x82, 0x51, 0x57, 0x28, 0x9f, 0xc1, + 0x12, 0x64, 0xd4, 0x38, 0x7c, 0x00, 0x84, 0x79, 0x4c, 0x8c, 0x31, 0x41, + 0x12, 0x78, 0x03, 0xc1, 0x12, 0x80, 0xc8, 0xbb, 0xd2, 0x00, 0x82, 0x61, + 0xc9, 0xb4, 0xe2, 0x00, 0x82, 0x69, 0xc8, 0xbf, 0x5a, 0x00, 0x82, 0x79, + 0x45, 0x4d, 0x21, 0x41, 0x12, 0x8c, 0xc4, 0x15, 0xe7, 0x00, 0x84, 0x81, + 0xc3, 0x05, 0x14, 0x00, 0x84, 0x89, 0x16, 0xc1, 0x12, 0x98, 0x08, 0xc1, + 0x12, 0xa4, 0x15, 0xc1, 0x12, 0xb0, 0xc5, 0x06, 0xdb, 0x00, 0x84, 0xc1, + 0xc4, 0x26, 0x78, 0x00, 0x84, 0xc8, 0x83, 0x00, 0x81, 0x0b, 0x01, 0x12, + 0xbc, 0x0d, 0xc1, 0x12, 0xc6, 0x16, 0xc1, 0x12, 0xd3, 0x15, 0xc1, 0x12, + 0xe4, 0x09, 0xc1, 0x12, 0xf8, 0x10, 0xc1, 0x13, 0x08, 0x05, 0xc1, 0x13, + 0x1c, 0x0c, 0xc1, 0x13, 0x26, 0x06, 0xc1, 0x13, 0x30, 0x12, 0xc1, 0x13, + 0x3e, 0x04, 0xc1, 0x13, 0x48, 0x0f, 0xc1, 0x13, 0x52, 0xc2, 0x19, 0x2c, + 0x00, 0x80, 0xd1, 0x14, 0xc1, 0x13, 0x5c, 0x0e, 0xc1, 0x13, 0x66, 0x19, + 0xc1, 0x13, 0x70, 0xc2, 0x00, 0xd0, 0x00, 0x80, 0xf9, 0x8b, 0x00, 0x81, + 0x1b, 0x01, 0x13, 0x7a, 0x97, 0x00, 0x81, 0x2b, 0x01, 0x13, 0x7e, 0x87, + 0x00, 0x81, 0x3b, 0x01, 0x13, 0x82, 0x91, 0x00, 0x81, 0x49, 0x48, 0xb2, + 0x2d, 0x41, 0x13, 0x88, 0xc2, 0x02, 0x2e, 0x05, 0x53, 0xb1, 0xc2, 0xc8, + 0xd4, 0x05, 0x53, 0xa9, 0xc3, 0xe6, 0x17, 0x05, 0x53, 0xa0, 0xc4, 0x26, + 0x78, 0x05, 0x4f, 0xc9, 0xc5, 0x06, 0xdb, 0x05, 0x4f, 0xc1, 0x15, 0xc1, + 0x13, 0x96, 0x08, 0xc1, 0x13, 0xa2, 0x16, 0xc1, 0x13, 0xae, 0xc3, 0x05, + 0x14, 0x05, 0x4f, 0x89, 0xc4, 0x15, 0xe7, 0x05, 0x4f, 0x80, 0xc5, 0xd6, + 0x73, 0x00, 0x83, 0x19, 0xc6, 0xce, 0x57, 0x00, 0x83, 0x20, 0x83, 0x00, + 0x81, 0x61, 0x8b, 0x00, 0x81, 0x92, 0x01, 0x13, 0xba, 0x8b, 0x00, 0x81, + 0x70, 0x97, 0x00, 0x81, 0x80, 0xc6, 0x00, 0xd3, 0x00, 0x81, 0xa8, 0xc2, + 0x25, 0x9f, 0x00, 0x81, 0x99, 0x91, 0x00, 0x81, 0xa0, 0x94, 0x00, 0x82, + 0xb3, 0x01, 0x13, 0xc3, 0x8e, 0x00, 0x82, 0xc2, 0x01, 0x13, 0xc7, 0xcc, + 0x85, 0xad, 0x00, 0x83, 0x11, 0x44, 0x00, 0xd0, 0x41, 0x13, 0xcb, 0xc2, + 0x2c, 0x43, 0x00, 0x83, 0x39, 0xc2, 0x0f, 0xe1, 0x00, 0x83, 0x40, 0xc2, + 0x49, 0x0c, 0x00, 0x83, 0x91, 0x97, 0x00, 0x83, 0x99, 0xc2, 0x02, 0xe0, + 0x00, 0x83, 0xa0, 0x46, 0x30, 0xa0, 0xc1, 0x13, 0xde, 0x4a, 0xa6, 0x0c, + 0x41, 0x13, 0xf6, 0xc2, 0x02, 0xa0, 0x00, 0x82, 0x11, 0xc4, 0x02, 0xde, + 0x00, 0x82, 0x18, 0xc3, 0x09, 0x9e, 0x00, 0x82, 0x21, 0xc3, 0x0d, 0x14, + 0x00, 0x82, 0x28, 0xc2, 0x22, 0xcc, 0x00, 0x82, 0x31, 0xc4, 0x18, 0x10, + 0x00, 0x82, 0x38, 0xca, 0x9f, 0xfe, 0x0f, 0xad, 0x30, 0x47, 0x02, 0x0e, + 0xc1, 0x14, 0x08, 0xca, 0x3b, 0x06, 0x01, 0x87, 0xd9, 0xce, 0x1c, 0x92, + 0x01, 0x87, 0xe9, 0xd5, 0x34, 0xb8, 0x01, 0x87, 0xf1, 0xcc, 0x80, 0xfd, + 0x01, 0x87, 0xf8, 0xd1, 0x2f, 0xfb, 0x01, 0x84, 0xd9, 0xd6, 0x2f, 0xf6, + 0x01, 0x84, 0xe1, 0xcd, 0x77, 0x87, 0x01, 0x85, 0x01, 0xd4, 0x0d, 0xe2, + 0x01, 0x87, 0xe0, 0xc6, 0x00, 0xd3, 0x08, 0x86, 0x68, 0xc9, 0xb2, 0x2d, + 0x08, 0x86, 0x11, 0x03, 0xc1, 0x14, 0x5e, 0x91, 0x08, 0x85, 0xb9, 0x87, + 0x08, 0x85, 0xa9, 0x97, 0x08, 0x85, 0x9b, 0x01, 0x14, 0x6a, 0x8b, 0x08, + 0x85, 0x8a, 0x01, 0x14, 0x6e, 0x46, 0x00, 0x59, 0xc1, 0x14, 0x72, 0xc4, + 0x19, 0x53, 0x08, 0x86, 0x00, 0xcb, 0x45, 0x8e, 0x08, 0x85, 0xf1, 0x44, + 0x00, 0xbb, 0x41, 0x14, 0x7e, 0xc2, 0x00, 0xd0, 0x08, 0x85, 0x79, 0x15, + 0xc1, 0x14, 0x96, 0xc2, 0x02, 0x41, 0x08, 0x85, 0x59, 0xc2, 0x00, 0xdb, + 0x08, 0x85, 0x51, 0x14, 0xc1, 0x14, 0xa6, 0xc2, 0x19, 0x2c, 0x08, 0x85, + 0x41, 0xc2, 0x01, 0xc3, 0x08, 0x85, 0x39, 0x04, 0xc1, 0x14, 0xb0, 0x12, + 0xc1, 0x14, 0xba, 0x10, 0xc1, 0x14, 0xc4, 0x06, 0xc1, 0x14, 0xda, 0x16, + 0xc1, 0x14, 0xe8, 0x0c, 0xc1, 0x14, 0xf6, 0x05, 0xc1, 0x15, 0x00, 0x09, + 0xc1, 0x15, 0x0a, 0x0d, 0xc1, 0x15, 0x14, 0x83, 0x08, 0x84, 0x1b, 0x01, + 0x15, 0x1e, 0x91, 0x08, 0x84, 0x59, 0x87, 0x08, 0x84, 0x49, 0x97, 0x08, + 0x84, 0x3b, 0x01, 0x15, 0x2a, 0x8b, 0x08, 0x84, 0x2a, 0x01, 0x15, 0x2e, + 0xc4, 0xde, 0x93, 0x05, 0x49, 0x79, 0xc3, 0xe4, 0xfd, 0x05, 0x49, 0x70, + 0xc5, 0xde, 0x02, 0x05, 0x49, 0x63, 0x01, 0x15, 0x32, 0xc6, 0xca, 0x77, + 0x05, 0x49, 0x58, 0x91, 0x05, 0x49, 0x51, 0x87, 0x05, 0x49, 0x3b, 0x01, + 0x15, 0x38, 0x97, 0x05, 0x49, 0x42, 0x01, 0x15, 0x3c, 0x11, 0xc1, 0x15, + 0x40, 0x8b, 0x05, 0x49, 0x21, 0x83, 0x05, 0x49, 0x11, 0xc2, 0x00, 0x64, + 0x05, 0x49, 0x09, 0xc2, 0x02, 0x41, 0x05, 0x49, 0x01, 0x0a, 0xc1, 0x15, + 0x48, 0x16, 0xc1, 0x15, 0x52, 0xc2, 0x01, 0x4a, 0x05, 0x48, 0xe9, 0xc2, + 0x00, 0xdb, 0x05, 0x48, 0xe1, 0xc2, 0x19, 0x2c, 0x05, 0x48, 0xd9, 0xc2, + 0x00, 0x39, 0x05, 0x48, 0xd1, 0xc2, 0x01, 0x5d, 0x05, 0x48, 0xc9, 0xc2, + 0x0e, 0x9a, 0x05, 0x48, 0xc1, 0xc2, 0x01, 0xc3, 0x05, 0x48, 0xb9, 0x12, + 0xc1, 0x15, 0x5c, 0x10, 0xc1, 0x15, 0x66, 0xc2, 0x02, 0x1c, 0x05, 0x48, + 0x81, 0x15, 0xc1, 0x15, 0x76, 0xc2, 0x01, 0x30, 0x05, 0x48, 0x61, 0x0d, + 0x41, 0x15, 0x80, 0xc4, 0x26, 0x78, 0x05, 0x48, 0x49, 0xc5, 0x06, 0xdb, + 0x05, 0x48, 0x41, 0x15, 0xc1, 0x15, 0x8a, 0x08, 0xc1, 0x15, 0x96, 0x16, + 0xc1, 0x15, 0xa2, 0xc3, 0x05, 0x14, 0x05, 0x48, 0x09, 0xc4, 0x15, 0xe7, + 0x05, 0x48, 0x00, 0x45, 0x00, 0xba, 0xc1, 0x15, 0xae, 0x42, 0x00, 0x49, + 0xc1, 0x15, 0xd4, 0x4b, 0x6f, 0xc7, 0xc1, 0x15, 0xe0, 0xce, 0x74, 0xcc, + 0x00, 0x66, 0xb1, 0x46, 0x09, 0x97, 0x41, 0x16, 0x06, 0xc4, 0xe1, 0x83, + 0x0f, 0xcc, 0xc1, 0x4b, 0x91, 0xfc, 0x41, 0x16, 0x2a, 0x05, 0xc1, 0x16, + 0x8e, 0x04, 0x41, 0x16, 0xc6, 0xc4, 0x26, 0x78, 0x08, 0x97, 0xc9, 0x15, + 0xc1, 0x17, 0x06, 0x08, 0xc1, 0x17, 0x12, 0x16, 0xc1, 0x17, 0x1e, 0xc3, + 0x05, 0x14, 0x08, 0x97, 0x89, 0xc4, 0x15, 0xe7, 0x08, 0x97, 0x81, 0xc5, + 0x06, 0xdb, 0x08, 0x97, 0xc0, 0xc6, 0x1e, 0x95, 0x08, 0x97, 0x51, 0xc5, + 0x33, 0x5d, 0x08, 0x97, 0x49, 0xc8, 0x14, 0x38, 0x08, 0x96, 0xf8, 0x91, + 0x08, 0x97, 0x39, 0x03, 0xc1, 0x17, 0x2a, 0x87, 0x08, 0x97, 0x29, 0x97, + 0x08, 0x97, 0x1b, 0x01, 0x17, 0x36, 0x8b, 0x08, 0x97, 0x0a, 0x01, 0x17, + 0x3a, 0xc2, 0x00, 0xd0, 0x08, 0x96, 0xf1, 0x15, 0xc1, 0x17, 0x3e, 0xc2, + 0x02, 0x41, 0x08, 0x96, 0xd9, 0xc2, 0x00, 0xdb, 0x08, 0x96, 0xd1, 0x14, + 0xc1, 0x17, 0x48, 0xc2, 0x19, 0x2c, 0x08, 0x96, 0xc1, 0xc2, 0x01, 0xc3, + 0x08, 0x96, 0xb9, 0x04, 0xc1, 0x17, 0x52, 0x12, 0xc1, 0x17, 0x62, 0x10, + 0xc1, 0x17, 0x6c, 0x06, 0xc1, 0x17, 0x82, 0x16, 0xc1, 0x17, 0x90, 0x0c, + 0xc1, 0x17, 0x9e, 0x05, 0xc1, 0x17, 0xae, 0x09, 0xc1, 0x17, 0xb8, 0x0d, + 0xc1, 0x17, 0xc8, 0x83, 0x08, 0x95, 0x83, 0x01, 0x17, 0xd2, 0x91, 0x08, + 0x95, 0xc1, 0x87, 0x08, 0x95, 0xb1, 0x97, 0x08, 0x95, 0xa3, 0x01, 0x17, + 0xde, 0x8b, 0x08, 0x95, 0x92, 0x01, 0x17, 0xe2, 0x44, 0x00, 0xbb, 0xc1, + 0x17, 0xe6, 0xcb, 0x45, 0x8e, 0x08, 0x91, 0xd8, 0x46, 0x00, 0x59, 0xc1, + 0x17, 0xfc, 0xc4, 0x19, 0x53, 0x08, 0x91, 0xc0, 0x03, 0xc1, 0x18, 0x08, + 0x91, 0x08, 0x91, 0x91, 0x87, 0x08, 0x91, 0x81, 0x97, 0x08, 0x91, 0x79, + 0x8b, 0x08, 0x91, 0x6a, 0x01, 0x18, 0x14, 0x0e, 0xc1, 0x18, 0x18, 0xc2, + 0x00, 0xd0, 0x08, 0x91, 0x51, 0xc2, 0x0d, 0xf6, 0x08, 0x91, 0x49, 0xc2, + 0x02, 0x41, 0x08, 0x91, 0x41, 0xc2, 0x00, 0x39, 0x08, 0x91, 0x31, 0xc2, + 0x19, 0x2c, 0x08, 0x91, 0x29, 0xc2, 0x01, 0xc3, 0x08, 0x91, 0x21, 0x04, + 0xc1, 0x18, 0x22, 0x12, 0xc1, 0x18, 0x32, 0x10, 0xc1, 0x18, 0x3c, 0x06, + 0xc1, 0x18, 0x52, 0x16, 0xc1, 0x18, 0x60, 0x0c, 0xc1, 0x18, 0x6e, 0x05, + 0xc1, 0x18, 0x78, 0x09, 0xc1, 0x18, 0x82, 0x0d, 0xc1, 0x18, 0x92, 0x83, + 0x08, 0x90, 0x03, 0x01, 0x18, 0x9c, 0x91, 0x08, 0x90, 0x31, 0x87, 0x08, + 0x90, 0x21, 0x97, 0x08, 0x90, 0x19, 0x8b, 0x08, 0x90, 0x10, 0x46, 0x10, + 0x79, 0xc1, 0x18, 0xa8, 0x44, 0x00, 0xbb, 0x41, 0x18, 0xc8, 0xc4, 0x26, + 0x78, 0x00, 0xbf, 0x49, 0xc5, 0x06, 0xdb, 0x00, 0xbf, 0x41, 0x15, 0xc1, + 0x19, 0x0a, 0x08, 0xc1, 0x19, 0x16, 0x16, 0xc1, 0x19, 0x22, 0xc3, 0x05, + 0x14, 0x00, 0xbf, 0x09, 0xc4, 0x15, 0xe7, 0x00, 0xbf, 0x00, 0x45, 0x00, + 0xba, 0xc1, 0x19, 0x2e, 0x4a, 0x9f, 0xf4, 0x41, 0x19, 0x4f, 0x13, 0xc1, + 0x19, 0x57, 0xc2, 0x00, 0x35, 0x00, 0xbd, 0x6b, 0x01, 0x19, 0x73, 0xc2, + 0x14, 0x98, 0x00, 0xbd, 0x5a, 0x01, 0x19, 0x77, 0xc2, 0x0f, 0x9a, 0x00, + 0xbd, 0x11, 0x0e, 0xc1, 0x19, 0x7b, 0xc2, 0x00, 0xd0, 0x00, 0xbd, 0x01, + 0x15, 0xc1, 0x19, 0x83, 0xc2, 0x17, 0xbd, 0x00, 0xbc, 0xe1, 0xc2, 0x00, + 0x79, 0x00, 0xbc, 0xd1, 0xc2, 0x42, 0xcd, 0x00, 0xbc, 0xc9, 0xc2, 0x00, + 0xa2, 0x00, 0xbc, 0xc1, 0x12, 0xc1, 0x19, 0x93, 0xc2, 0x01, 0x5d, 0x00, + 0xbc, 0xa1, 0x10, 0xc1, 0x19, 0x9b, 0x16, 0xc1, 0x19, 0xb1, 0x06, 0xc1, + 0x19, 0xc3, 0x05, 0xc1, 0x19, 0xcb, 0x0d, 0x41, 0x19, 0xd7, 0x0e, 0xc1, + 0x19, 0xe3, 0x06, 0xc1, 0x19, 0xef, 0xc8, 0xb9, 0xf2, 0x08, 0x52, 0xa1, + 0x05, 0xc1, 0x19, 0xf9, 0xcc, 0x12, 0x2d, 0x08, 0x52, 0x88, 0x44, 0x05, + 0x14, 0xc1, 0x1a, 0x05, 0x16, 0x41, 0x1a, 0x11, 0xc4, 0x09, 0x9d, 0x08, + 0x52, 0x19, 0x16, 0xc1, 0x1a, 0x1d, 0xc3, 0x05, 0x14, 0x08, 0x52, 0x00, + 0xc5, 0x1e, 0x96, 0x08, 0x51, 0xf9, 0x45, 0x34, 0x70, 0x41, 0x1a, 0x29, + 0x42, 0x00, 0x58, 0xc1, 0x1a, 0x35, 0xc5, 0xdc, 0xd1, 0x08, 0x51, 0xc9, + 0xc9, 0x31, 0x98, 0x08, 0x51, 0xc1, 0xc7, 0x40, 0xe5, 0x08, 0x50, 0x79, + 0xc8, 0x14, 0x38, 0x08, 0x50, 0x70, 0x18, 0xc1, 0x1a, 0x41, 0x16, 0xc1, + 0x1a, 0x4b, 0xc2, 0x00, 0xdb, 0x08, 0x51, 0x59, 0xc2, 0x00, 0x39, 0x08, + 0x51, 0x51, 0xc2, 0x19, 0x2c, 0x08, 0x51, 0x49, 0xc2, 0x01, 0xc3, 0x08, + 0x51, 0x41, 0x04, 0xc1, 0x1a, 0x59, 0x12, 0xc1, 0x1a, 0x63, 0x10, 0xc1, + 0x1a, 0x6d, 0x06, 0xc1, 0x1a, 0x7d, 0xc2, 0x25, 0x3b, 0x08, 0x50, 0xb9, + 0x05, 0xc1, 0x1a, 0x8b, 0x09, 0xc1, 0x1a, 0x95, 0x0d, 0xc1, 0x1a, 0x9f, + 0x83, 0x08, 0x50, 0x01, 0x15, 0xc1, 0x1a, 0xaf, 0xc2, 0x02, 0x1c, 0x08, + 0x51, 0x81, 0xc2, 0x00, 0xd0, 0x08, 0x51, 0x88, 0xc4, 0x00, 0x87, 0x0f, + 0xb0, 0xbb, 0x01, 0x1a, 0xbf, 0xd9, 0x20, 0x8f, 0x0f, 0xb1, 0xe8, 0xc9, + 0xb0, 0x11, 0x0f, 0xd4, 0x31, 0xca, 0xa6, 0x3e, 0x0f, 0xd5, 0xd0, 0x46, + 0xcc, 0x4d, 0xc1, 0x1a, 0xc5, 0xc4, 0x00, 0x87, 0x0f, 0xb0, 0x80, 0x15, + 0xc1, 0x1a, 0xfc, 0x47, 0x02, 0x0e, 0xc1, 0x1b, 0x06, 0xce, 0x6c, 0x52, + 0x08, 0xa2, 0xe9, 0xd0, 0x5f, 0x92, 0x08, 0xa2, 0xd9, 0x06, 0xc1, 0x1b, + 0x6d, 0xd1, 0x50, 0xce, 0x08, 0xa2, 0x79, 0xca, 0x93, 0x30, 0x08, 0xa2, + 0x71, 0xc5, 0x0a, 0x8a, 0x08, 0xa2, 0x69, 0xc2, 0x00, 0x7a, 0x08, 0xa2, + 0x49, 0x4b, 0x6f, 0xc7, 0x41, 0x1b, 0x7f, 0xcb, 0x99, 0xe4, 0x01, 0x05, + 0x51, 0x48, 0xb6, 0x82, 0xc1, 0x1b, 0x9f, 0x45, 0x15, 0xdb, 0xc1, 0x1b, + 0xbe, 0xc4, 0x02, 0x6d, 0x00, 0x00, 0x50, 0xc4, 0x00, 0x49, 0x01, 0x5c, + 0x91, 0xc5, 0x00, 0x2c, 0x01, 0x5c, 0x98, 0x48, 0x0b, 0x09, 0xc1, 0x1b, + 0xca, 0x48, 0x20, 0x7c, 0xc1, 0x1b, 0xfa, 0xcb, 0x49, 0x4a, 0x00, 0x00, + 0xa9, 0x49, 0x1e, 0x56, 0x41, 0x1c, 0x18, 0xe0, 0x05, 0x87, 0x01, 0x15, + 0x78, 0x43, 0x07, 0x28, 0xc1, 0x1c, 0x2a, 0x42, 0x02, 0xaf, 0x41, 0x1c, + 0x36, 0xc9, 0x00, 0xca, 0x01, 0x13, 0xc9, 0x43, 0x00, 0xe2, 0x41, 0x1c, + 0x3c, 0xcc, 0x07, 0xc7, 0x01, 0x13, 0xc1, 0x43, 0x00, 0xe2, 0x41, 0x1c, + 0x48, 0x4b, 0x6f, 0xc7, 0xc1, 0x1c, 0x54, 0xca, 0x9d, 0x56, 0x08, 0xcf, + 0x19, 0x45, 0x00, 0xba, 0xc1, 0x1c, 0x7d, 0x47, 0x02, 0x0e, 0x41, 0x1c, + 0x8d, 0x47, 0x34, 0x2f, 0xc1, 0x1c, 0xf0, 0xd5, 0x34, 0x25, 0x08, 0x45, + 0x59, 0x47, 0x02, 0x0e, 0x41, 0x1d, 0x01, 0xd4, 0x3a, 0x48, 0x0f, 0xb5, + 0x89, 0xcf, 0x67, 0x83, 0x01, 0x00, 0x88, 0x00, 0xc1, 0x1d, 0x6a, 0xd6, + 0x2e, 0x12, 0x0f, 0xb7, 0x50, 0xcc, 0x23, 0x9f, 0x01, 0x15, 0xa0, 0xe0, + 0x02, 0xc7, 0x0f, 0xaa, 0x21, 0x0e, 0xc1, 0x1d, 0x7c, 0x4b, 0x2c, 0x44, + 0x41, 0x1d, 0x88, 0xca, 0xa7, 0xc4, 0x01, 0x1b, 0xd9, 0xd2, 0x4c, 0x01, + 0x01, 0x17, 0x53, 0x01, 0x1d, 0x8e, 0x15, 0xc1, 0x1d, 0x94, 0x16, 0xc1, + 0x1d, 0xa0, 0x03, 0xc1, 0x1d, 0xac, 0xcc, 0x07, 0xc7, 0x01, 0x13, 0x79, + 0xc9, 0x00, 0xca, 0x01, 0x13, 0x71, 0x43, 0x00, 0xe2, 0xc1, 0x1d, 0xc4, + 0xcc, 0x89, 0x0d, 0x01, 0x13, 0x11, 0xcb, 0x6b, 0x83, 0x01, 0x11, 0x30, + 0x43, 0x00, 0xaf, 0xc1, 0x1d, 0xd0, 0xc4, 0xe3, 0x33, 0x0f, 0xa6, 0x9a, + 0x01, 0x1d, 0xda, 0xc5, 0x00, 0xb9, 0x0f, 0xb5, 0x58, 0xc5, 0xd5, 0x1a, + 0x0f, 0xab, 0x91, 0xca, 0xa2, 0x56, 0x0f, 0xb5, 0xb8, 0xc9, 0xa9, 0xa2, + 0x00, 0x04, 0x19, 0xc7, 0xc9, 0x50, 0x0f, 0xb5, 0x98, 0x99, 0x0f, 0x09, + 0x61, 0x87, 0x0f, 0x09, 0x53, 0x01, 0x1d, 0xe0, 0x91, 0x0f, 0x09, 0x43, + 0x01, 0x1d, 0xe4, 0x97, 0x0f, 0x09, 0x39, 0x8b, 0x0f, 0x09, 0x31, 0x83, + 0x0f, 0x09, 0x23, 0x01, 0x1d, 0xe8, 0x14, 0xc1, 0x1d, 0xec, 0xc2, 0x01, + 0x30, 0x0f, 0x09, 0x11, 0x12, 0xc1, 0x1d, 0xf6, 0x0f, 0xc1, 0x1e, 0x00, + 0xc2, 0x00, 0xd0, 0x0f, 0x08, 0x23, 0x01, 0x1e, 0x0a, 0x10, 0xc1, 0x1e, + 0x0e, 0x06, 0xc1, 0x1e, 0x38, 0x1a, 0xc1, 0x1e, 0x42, 0xc2, 0x19, 0x2c, + 0x0f, 0x08, 0xc1, 0xc2, 0x0f, 0x9a, 0x0f, 0x08, 0xb9, 0xc2, 0x00, 0x87, + 0x0f, 0x08, 0xa9, 0x16, 0xc1, 0x1e, 0x4c, 0xc2, 0x02, 0x41, 0x0f, 0x08, + 0x91, 0xc2, 0x02, 0x2b, 0x0f, 0x08, 0x71, 0xc2, 0x02, 0x1c, 0x0f, 0x08, + 0x59, 0xc2, 0x0d, 0xf6, 0x0f, 0x08, 0x51, 0xc2, 0x00, 0xdb, 0x0f, 0x08, + 0x49, 0xc2, 0x00, 0x64, 0x0f, 0x08, 0x40, 0xc4, 0x18, 0x10, 0x0f, 0x0a, + 0x39, 0xc2, 0x22, 0xcc, 0x0f, 0x0a, 0x30, 0xc3, 0x0d, 0x14, 0x0f, 0x0a, + 0x29, 0xc3, 0x09, 0x9e, 0x0f, 0x0a, 0x20, 0xc4, 0x02, 0xde, 0x0f, 0x0a, + 0x19, 0xc2, 0x02, 0xa0, 0x0f, 0x0a, 0x10, 0xc5, 0xd7, 0xdb, 0x0f, 0x09, + 0xe1, 0x44, 0x15, 0xec, 0x41, 0x1e, 0x5c, 0x1f, 0xc1, 0x1e, 0x7a, 0x1e, + 0x41, 0x1e, 0xba, 0x16, 0xc1, 0x1e, 0xde, 0xd2, 0x4b, 0x5f, 0x01, 0x24, + 0xd1, 0x07, 0xc1, 0x1e, 0xf0, 0x15, 0xc1, 0x1e, 0xfc, 0x08, 0x41, 0x1f, + 0x06, 0xc4, 0x25, 0xd5, 0x01, 0x50, 0x21, 0xc3, 0x02, 0xa3, 0x01, 0x50, + 0x18, 0xce, 0x6d, 0x24, 0x01, 0x50, 0x31, 0xd5, 0x33, 0x68, 0x01, 0x50, + 0x28, 0xce, 0x72, 0xd4, 0x01, 0x50, 0x11, 0xcd, 0x7d, 0x51, 0x01, 0x50, + 0x09, 0xcc, 0x83, 0x3d, 0x01, 0x50, 0x00, 0xc4, 0x26, 0x78, 0x00, 0x3e, + 0x49, 0xc5, 0x06, 0xdb, 0x00, 0x3e, 0x41, 0x15, 0xc1, 0x1f, 0x12, 0x08, + 0xc1, 0x1f, 0x1e, 0x16, 0xc1, 0x1f, 0x2a, 0xc3, 0x05, 0x14, 0x00, 0x3e, + 0x09, 0xc4, 0x15, 0xe7, 0x00, 0x3e, 0x00, 0x0c, 0xc1, 0x1f, 0x36, 0x90, + 0x00, 0x3e, 0x93, 0x01, 0x1f, 0x40, 0xc2, 0x19, 0x2c, 0x00, 0x3f, 0x31, + 0xc2, 0x01, 0x4a, 0x00, 0x3f, 0x29, 0xc2, 0x00, 0xd0, 0x00, 0x3f, 0x21, + 0xc2, 0x01, 0xc3, 0x00, 0x3f, 0x09, 0xc2, 0x00, 0xdb, 0x00, 0x3e, 0xf9, + 0xc2, 0x02, 0x2b, 0x00, 0x3e, 0xf1, 0xc2, 0x00, 0x87, 0x00, 0x3e, 0xe9, + 0xc3, 0x9f, 0x2c, 0x00, 0x3e, 0xe1, 0xc2, 0x0d, 0xf6, 0x00, 0x3e, 0xd9, + 0x14, 0xc1, 0x1f, 0x50, 0xc2, 0x0e, 0x9a, 0x00, 0x3e, 0xc3, 0x01, 0x1f, + 0x5a, 0xc3, 0x1c, 0x63, 0x00, 0x3e, 0xb9, 0xc2, 0x01, 0x6f, 0x00, 0x3e, + 0xa9, 0xc2, 0x00, 0xb0, 0x00, 0x3e, 0xa1, 0xc2, 0x01, 0x5d, 0x00, 0x3e, + 0x99, 0x91, 0x00, 0x3e, 0x83, 0x01, 0x1f, 0x60, 0x97, 0x00, 0x3e, 0x71, + 0x87, 0x00, 0x3e, 0x6b, 0x01, 0x1f, 0x64, 0x8b, 0x00, 0x3e, 0x61, 0x83, + 0x00, 0x3e, 0x50, 0xd0, 0x57, 0xd2, 0x00, 0x3f, 0x99, 0xd1, 0x56, 0xc8, + 0x00, 0x3f, 0x91, 0x45, 0x2c, 0x86, 0xc1, 0x1f, 0x68, 0x46, 0x2e, 0xee, + 0x41, 0x1f, 0x80, 0xc6, 0x52, 0xa4, 0x0f, 0xd3, 0x59, 0xc5, 0xd8, 0xda, + 0x0f, 0xd3, 0x60, 0xc6, 0x52, 0xa4, 0x0f, 0xd3, 0x21, 0xc5, 0xd8, 0xda, + 0x0f, 0xd3, 0x28, 0xc8, 0xbd, 0x32, 0x0f, 0xcd, 0x81, 0xca, 0xa5, 0xd0, + 0x0f, 0xcd, 0x89, 0xc4, 0xe1, 0xeb, 0x0f, 0xcd, 0x91, 0xca, 0xa6, 0xb6, + 0x0f, 0xcd, 0x98, 0xa3, 0x0f, 0x9f, 0xf9, 0xa2, 0x0f, 0x9f, 0xf1, 0xa1, + 0x0f, 0x9f, 0xe9, 0xa0, 0x0f, 0x9f, 0xe1, 0xc3, 0xe5, 0xfc, 0x0f, 0x9f, + 0xd8, 0xc3, 0x0e, 0xa7, 0x01, 0x10, 0x2b, 0x01, 0x1f, 0x92, 0xc4, 0x9b, + 0xb8, 0x0f, 0xae, 0x63, 0x01, 0x1f, 0x98, 0xc8, 0xb9, 0xb2, 0x0f, 0xae, + 0x59, 0x10, 0x41, 0x1f, 0x9c, 0x42, 0x09, 0xda, 0x41, 0x1f, 0xab, 0x43, + 0x00, 0x55, 0xc1, 0x1f, 0xb7, 0xd0, 0x5e, 0x92, 0x0f, 0xcd, 0xd8, 0xca, + 0xa5, 0x58, 0x09, 0xa1, 0xc1, 0x1d, 0x41, 0x1f, 0xc3, 0xcc, 0x82, 0x41, + 0x09, 0xa1, 0xb9, 0x42, 0xcf, 0x41, 0x41, 0x1f, 0xd3, 0xcd, 0x76, 0x42, + 0x09, 0xa1, 0xb1, 0x1d, 0x41, 0x1f, 0xfa, 0x49, 0xaf, 0xb7, 0xc1, 0x20, + 0x12, 0x1d, 0x41, 0x20, 0x1e, 0xd0, 0x59, 0xc2, 0x09, 0xa1, 0x89, 0x42, + 0xcf, 0x41, 0x41, 0x20, 0x26, 0xce, 0x70, 0x6c, 0x09, 0xa1, 0x81, 0x1d, + 0x41, 0x20, 0x49, 0x42, 0xd1, 0x3e, 0xc1, 0x20, 0x62, 0x1d, 0x41, 0x20, + 0x72, 0x1e, 0xc1, 0x20, 0x94, 0x1d, 0x41, 0x20, 0xb6, 0xa5, 0x09, 0x9f, + 0x19, 0xa4, 0x09, 0x9f, 0x11, 0xa3, 0x09, 0x9f, 0x09, 0xa2, 0x09, 0x9f, + 0x01, 0xa1, 0x09, 0x9e, 0xf9, 0xa0, 0x09, 0x9e, 0xf1, 0x9f, 0x09, 0x9e, + 0xe9, 0x9e, 0x09, 0x9e, 0xda, 0x01, 0x20, 0xe6, 0xa5, 0x09, 0x9e, 0xcb, + 0x01, 0x20, 0xea, 0xa4, 0x09, 0x9e, 0xc1, 0xa3, 0x09, 0x9e, 0xb3, 0x01, + 0x20, 0xee, 0xa2, 0x09, 0x9e, 0xa9, 0xa1, 0x09, 0x9e, 0x93, 0x01, 0x20, + 0xf2, 0xa0, 0x09, 0x9e, 0x89, 0x9f, 0x09, 0x9e, 0x81, 0x9e, 0x09, 0x9e, + 0x78, 0x1f, 0xc1, 0x20, 0xfa, 0x1e, 0xc1, 0x21, 0x15, 0x1d, 0x41, 0x21, + 0x49, 0x21, 0xc1, 0x21, 0x73, 0x20, 0xc1, 0x21, 0x7f, 0x1f, 0xc1, 0x21, + 0xaa, 0x1e, 0xc1, 0x21, 0xd8, 0x1d, 0x41, 0x22, 0x00, 0x20, 0xc1, 0x22, + 0x27, 0x1f, 0xc1, 0x22, 0x49, 0x1e, 0xc1, 0x22, 0x71, 0x1d, 0x41, 0x22, + 0x9f, 0x21, 0xc1, 0x22, 0xcf, 0x20, 0xc1, 0x22, 0xeb, 0x1f, 0xc1, 0x23, + 0x16, 0x1e, 0xc1, 0x23, 0x41, 0x1d, 0x41, 0x23, 0x6f, 0x1f, 0xc1, 0x23, + 0x99, 0x1e, 0xc1, 0x23, 0xc1, 0x1d, 0x41, 0x23, 0xef, 0xa4, 0x09, 0x95, + 0x71, 0xa3, 0x09, 0x95, 0x69, 0xa2, 0x09, 0x95, 0x61, 0xa1, 0x09, 0x95, + 0x59, 0xa0, 0x09, 0x95, 0x51, 0x9f, 0x09, 0x95, 0x49, 0x9e, 0x09, 0x95, + 0x40, 0x1e, 0xc1, 0x24, 0x19, 0x1d, 0x41, 0x24, 0x21, 0x42, 0xdd, 0x2f, + 0xc1, 0x24, 0x4b, 0x42, 0x8c, 0xff, 0xc1, 0x24, 0x57, 0x1d, 0x41, 0x24, + 0x65, 0x21, 0xc1, 0x24, 0x79, 0x20, 0xc1, 0x24, 0x90, 0x1f, 0xc1, 0x24, + 0xbe, 0x1e, 0xc1, 0x24, 0xef, 0x1d, 0x41, 0x25, 0x26, 0xa5, 0x09, 0x8d, + 0x61, 0xa4, 0x09, 0x8d, 0x59, 0xa3, 0x09, 0x8d, 0x4b, 0x01, 0x25, 0x50, + 0xa2, 0x09, 0x8d, 0x41, 0xa1, 0x09, 0x8d, 0x39, 0xa0, 0x09, 0x8d, 0x31, + 0x9f, 0x09, 0x8d, 0x23, 0x01, 0x25, 0x54, 0x9e, 0x09, 0x8d, 0x18, 0xa5, + 0x09, 0x8d, 0x11, 0xa4, 0x09, 0x8d, 0x09, 0xa3, 0x09, 0x8d, 0x01, 0xa2, + 0x09, 0x8c, 0xf9, 0xa1, 0x09, 0x8c, 0xf1, 0xa0, 0x09, 0x8c, 0xe9, 0x9f, + 0x09, 0x8c, 0xe1, 0x9e, 0x09, 0x8c, 0xd8, 0x22, 0xc1, 0x25, 0x58, 0x21, + 0xc1, 0x25, 0x6c, 0x20, 0xc1, 0x25, 0x9a, 0x1f, 0xc1, 0x25, 0xc8, 0x1e, + 0xc1, 0x25, 0xf6, 0x1d, 0x41, 0x26, 0x21, 0x22, 0xc1, 0x26, 0x4b, 0x21, + 0xc1, 0x26, 0x5e, 0x20, 0xc1, 0x26, 0x8f, 0x1f, 0xc1, 0x26, 0xc0, 0x1e, + 0xc1, 0x26, 0xeb, 0x1d, 0x41, 0x27, 0x16, 0x23, 0xc1, 0x27, 0x3d, 0x22, + 0xc1, 0x27, 0x60, 0x21, 0xc1, 0x27, 0x91, 0x20, 0xc1, 0x27, 0xbf, 0x1f, + 0xc1, 0x27, 0xed, 0x1e, 0xc1, 0x28, 0x18, 0x1d, 0x41, 0x28, 0x40, 0x1f, + 0xc1, 0x28, 0x67, 0x1e, 0xc1, 0x28, 0x7b, 0x1d, 0x41, 0x28, 0xa6, 0x4c, + 0x84, 0x69, 0xc1, 0x28, 0xcd, 0xd2, 0x48, 0x35, 0x0f, 0xa3, 0xe8, 0xc4, + 0x26, 0x78, 0x00, 0x37, 0xc9, 0xc5, 0x06, 0xdb, 0x00, 0x37, 0xc1, 0x15, + 0xc1, 0x28, 0xe3, 0x08, 0xc1, 0x28, 0xef, 0x16, 0xc1, 0x28, 0xfb, 0xc3, + 0x05, 0x14, 0x00, 0x37, 0x89, 0xc4, 0x15, 0xe7, 0x00, 0x37, 0x80, 0xcd, + 0x2c, 0xb2, 0x01, 0x02, 0x49, 0xc4, 0x01, 0xc3, 0x00, 0x01, 0x08, 0x09, + 0xc1, 0x29, 0x07, 0x0a, 0xc1, 0x29, 0x39, 0x04, 0xc1, 0x29, 0x5a, 0x05, + 0xc1, 0x29, 0x7f, 0x06, 0xc1, 0x29, 0xaa, 0x16, 0xc1, 0x29, 0xd5, 0x0e, + 0xc1, 0x2a, 0x0a, 0x0f, 0xc1, 0x2a, 0x2d, 0x15, 0xc1, 0x2a, 0x54, 0x14, + 0xc1, 0x2a, 0x83, 0x13, 0xc1, 0x2a, 0xac, 0x18, 0xc1, 0x2a, 0xd5, 0x1a, + 0xc1, 0x2a, 0xf5, 0x10, 0xc1, 0x2b, 0x1a, 0x0d, 0xc1, 0x2b, 0x41, 0x19, + 0xc1, 0x2b, 0x6a, 0x12, 0xc1, 0x2b, 0x87, 0x1c, 0xc1, 0x2b, 0xac, 0x1b, + 0xc1, 0x2b, 0xd7, 0x0c, 0xc1, 0x2b, 0xf4, 0x08, 0x41, 0x2c, 0x17, 0xca, + 0x45, 0x8f, 0x00, 0x9b, 0x01, 0xc7, 0x52, 0x01, 0x00, 0x9b, 0x20, 0x47, + 0x1d, 0xd4, 0xc1, 0x2c, 0x3b, 0xc2, 0x01, 0xc3, 0x00, 0x9b, 0x18, 0xc2, + 0x02, 0xa0, 0x00, 0x9b, 0x51, 0xc4, 0x02, 0xde, 0x00, 0x9b, 0x58, 0xc3, + 0x09, 0x9e, 0x00, 0x9b, 0x61, 0xc3, 0x0d, 0x14, 0x00, 0x9b, 0x68, 0xc2, + 0x22, 0xcc, 0x00, 0x9b, 0x71, 0xc4, 0x18, 0x10, 0x00, 0x9b, 0x78, 0xc2, + 0x00, 0xc4, 0x00, 0x9b, 0x93, 0x01, 0x2c, 0x47, 0xc5, 0x28, 0xee, 0x00, + 0x9b, 0x99, 0xc5, 0x0d, 0x0d, 0x00, 0x9b, 0xa0, 0xc4, 0x4a, 0x2e, 0x00, + 0x9b, 0xa9, 0xc4, 0x45, 0x6a, 0x00, 0x9b, 0xb0, 0xc4, 0xd2, 0x1d, 0x00, + 0x9b, 0xb9, 0xc6, 0x18, 0x10, 0x00, 0x9b, 0xc0, 0xc4, 0xb4, 0x50, 0x00, + 0x9c, 0x8b, 0x01, 0x2c, 0x4d, 0xc4, 0xe1, 0x33, 0x00, 0x9c, 0xa0, 0xc4, + 0x59, 0x96, 0x00, 0x9c, 0xa9, 0xc3, 0x34, 0x38, 0x00, 0x9c, 0xc8, 0x00, + 0x41, 0x2c, 0x53, 0xcf, 0x44, 0x5a, 0x01, 0x1f, 0x39, 0x00, 0x41, 0x2c, + 0x5f, 0x16, 0xc1, 0x2c, 0x77, 0x15, 0xc1, 0x2c, 0x83, 0xc4, 0x5d, 0xe2, + 0x08, 0x7f, 0x99, 0xc4, 0xb9, 0x7e, 0x08, 0x7f, 0x91, 0xc2, 0x00, 0x67, + 0x08, 0x7f, 0x81, 0xc3, 0x20, 0x18, 0x08, 0x7f, 0x69, 0xc3, 0x00, 0x4e, + 0x08, 0x7f, 0x61, 0xc6, 0xcf, 0xd7, 0x08, 0x7f, 0x59, 0xc4, 0xe0, 0xe7, + 0x08, 0x7f, 0x51, 0xc4, 0x4a, 0xb9, 0x08, 0x7f, 0x49, 0xc2, 0x01, 0x7f, + 0x08, 0x7f, 0x23, 0x01, 0x2c, 0x8d, 0xc5, 0x4a, 0xb3, 0x08, 0x7f, 0x31, + 0xc3, 0x7e, 0x89, 0x08, 0x7f, 0x29, 0xc6, 0x40, 0x9a, 0x08, 0x7f, 0x19, + 0xc5, 0x9c, 0xa2, 0x08, 0x7f, 0x11, 0xc4, 0xe3, 0x27, 0x08, 0x7f, 0x09, + 0x03, 0x41, 0x2c, 0x93, 0x87, 0x08, 0x28, 0x11, 0xc2, 0x01, 0x7f, 0x08, + 0x28, 0x18, 0x87, 0x08, 0x28, 0x21, 0xc2, 0x01, 0x7f, 0x08, 0x28, 0x30, + 0xc2, 0x00, 0x06, 0x08, 0x28, 0x29, 0x87, 0x08, 0x28, 0x99, 0x83, 0x08, + 0x28, 0xa1, 0xc2, 0x1c, 0x52, 0x08, 0x28, 0xa8, 0x8b, 0x08, 0x28, 0x38, + 0x87, 0x08, 0x28, 0x51, 0xc2, 0x1c, 0x52, 0x08, 0x28, 0x59, 0x0a, 0x41, + 0x2c, 0x9f, 0x87, 0x08, 0x28, 0x79, 0xc2, 0x01, 0x7f, 0x08, 0x29, 0x38, + 0x87, 0x08, 0x28, 0x81, 0xc2, 0x00, 0x49, 0x08, 0x28, 0x88, 0x87, 0x08, + 0x28, 0xc9, 0xc2, 0x01, 0x19, 0x08, 0x28, 0xd0, 0x87, 0x08, 0x28, 0xd9, + 0xc2, 0x01, 0x7f, 0x08, 0x28, 0xe0, 0x87, 0x08, 0x28, 0xe9, 0xc2, 0x01, + 0x7f, 0x08, 0x28, 0xf0, 0x87, 0x08, 0x29, 0x19, 0xc2, 0x01, 0x7f, 0x08, + 0x29, 0x20, 0xe0, 0x0a, 0xe7, 0x01, 0x3a, 0x50, 0xdf, 0x0c, 0x46, 0x01, + 0x3a, 0x09, 0x47, 0x0a, 0xaa, 0x41, 0x2c, 0xa9, 0xc9, 0xad, 0xe3, 0x0f, + 0xac, 0x21, 0xd5, 0x31, 0xd9, 0x0f, 0xa7, 0x48, 0x43, 0x05, 0xc0, 0xc1, + 0x2c, 0xbb, 0xc6, 0x01, 0xdb, 0x00, 0x00, 0xc9, 0x16, 0xc1, 0x2c, 0xc7, + 0xc4, 0x02, 0x6d, 0x00, 0x00, 0x51, 0xcd, 0x7e, 0x48, 0x00, 0x04, 0x39, + 0xcc, 0x87, 0xc9, 0x00, 0x04, 0xb8, 0xc6, 0x02, 0xd1, 0x01, 0x4f, 0x99, + 0xc7, 0x3a, 0x19, 0x01, 0x4f, 0x89, 0xc6, 0x0b, 0x09, 0x01, 0x4f, 0x78, + 0xc6, 0x02, 0xd1, 0x01, 0x4f, 0x91, 0xc7, 0x3a, 0x19, 0x01, 0x4f, 0x81, + 0xc6, 0x0b, 0x09, 0x01, 0x4f, 0x70, 0x43, 0x01, 0x7b, 0xc1, 0x2c, 0xd6, + 0xcf, 0x6b, 0x7f, 0x01, 0x16, 0xa8, 0xc5, 0x33, 0x24, 0x01, 0x12, 0xa9, + 0xc4, 0x00, 0xba, 0x00, 0x01, 0xeb, 0x01, 0x2c, 0xe2, 0xcd, 0x7c, 0x33, + 0x01, 0x53, 0x70, 0xc2, 0x00, 0xf1, 0x01, 0x12, 0x69, 0xd4, 0x3b, 0x74, + 0x01, 0x53, 0xc0, 0xcb, 0x95, 0x6c, 0x0f, 0x9f, 0x21, 0xc6, 0xcd, 0x2b, + 0x0f, 0x9f, 0x80, 0xc4, 0x26, 0x78, 0x08, 0xed, 0x49, 0xc5, 0x06, 0xdb, + 0x08, 0xed, 0x41, 0x15, 0xc1, 0x2c, 0xe6, 0x08, 0xc1, 0x2c, 0xf2, 0x16, + 0xc1, 0x2c, 0xfe, 0xc3, 0x05, 0x14, 0x08, 0xed, 0x09, 0xc4, 0x15, 0xe7, + 0x08, 0xed, 0x00, 0xc5, 0x1e, 0x96, 0x08, 0xec, 0xb9, 0x4a, 0x6f, 0xc8, + 0x41, 0x2d, 0x0a, 0xc7, 0x40, 0xe5, 0x08, 0xec, 0xb1, 0xc8, 0x14, 0x38, + 0x08, 0xec, 0xa8, 0xc2, 0x0d, 0xf6, 0x08, 0xec, 0x49, 0xc2, 0x00, 0x39, + 0x08, 0xec, 0x41, 0xc2, 0x00, 0xd0, 0x08, 0xec, 0x39, 0x12, 0xc1, 0x2d, + 0x28, 0x10, 0xc1, 0x2d, 0x32, 0x06, 0xc1, 0x2d, 0x3c, 0x0c, 0xc1, 0x2d, + 0x4a, 0x0e, 0xc1, 0x2d, 0x54, 0x16, 0xc1, 0x2d, 0x5e, 0x05, 0xc1, 0x2d, + 0x6c, 0x09, 0xc1, 0x2d, 0x76, 0x0d, 0xc1, 0x2d, 0x80, 0xc2, 0x01, 0xc3, + 0x08, 0xeb, 0x81, 0x04, 0xc1, 0x2d, 0x8a, 0xc2, 0x02, 0x41, 0x08, 0xeb, + 0x69, 0xc2, 0x19, 0x2c, 0x08, 0xeb, 0x61, 0x83, 0x08, 0xeb, 0x03, 0x01, + 0x2d, 0x94, 0xc2, 0x01, 0x24, 0x08, 0xeb, 0x51, 0xc2, 0x02, 0xe0, 0x08, + 0xeb, 0x39, 0x97, 0x08, 0xeb, 0x23, 0x01, 0x2d, 0xa0, 0x8b, 0x08, 0xeb, + 0x12, 0x01, 0x2d, 0xa4, 0xca, 0xa6, 0x2a, 0x00, 0x50, 0x09, 0xc5, 0x60, + 0x30, 0x00, 0x50, 0x11, 0x42, 0x07, 0xb2, 0xc1, 0x2d, 0xa8, 0xc5, 0x33, + 0x5d, 0x00, 0x51, 0xe1, 0xc5, 0xd9, 0x5c, 0x00, 0x52, 0x89, 0xc6, 0xd3, + 0x85, 0x00, 0x53, 0xa8, 0x83, 0x00, 0x50, 0x2b, 0x01, 0x2d, 0xb4, 0x8b, + 0x00, 0x50, 0x3b, 0x01, 0x2d, 0xc0, 0x97, 0x00, 0x50, 0x4b, 0x01, 0x2d, + 0xc4, 0xc2, 0x02, 0xe0, 0x00, 0x50, 0x79, 0xc2, 0x01, 0x24, 0x00, 0x50, + 0x99, 0x0d, 0xc1, 0x2d, 0xc8, 0x09, 0xc1, 0x2d, 0xd0, 0x10, 0xc1, 0x2d, + 0xd8, 0x05, 0xc1, 0x2d, 0xee, 0x0c, 0xc1, 0x2d, 0xf8, 0x16, 0xc1, 0x2e, + 0x02, 0x06, 0xc1, 0x2e, 0x10, 0x12, 0xc1, 0x2e, 0x1e, 0x04, 0xc1, 0x2e, + 0x28, 0xc2, 0x01, 0xc3, 0x00, 0x51, 0x71, 0xc2, 0x19, 0x2c, 0x00, 0x51, + 0x79, 0x14, 0xc1, 0x2e, 0x32, 0x0e, 0xc1, 0x2e, 0x3c, 0xc2, 0x02, 0x41, + 0x00, 0x51, 0xa9, 0x15, 0xc1, 0x2e, 0x46, 0xc2, 0x00, 0xd0, 0x00, 0x51, + 0xc9, 0xc2, 0x02, 0x1c, 0x00, 0x52, 0xd9, 0xc2, 0x00, 0x87, 0x00, 0x52, + 0xf0, 0x03, 0xc1, 0x2e, 0x50, 0x8b, 0x00, 0x51, 0xfb, 0x01, 0x2e, 0x5c, + 0x97, 0x00, 0x52, 0x0b, 0x01, 0x2e, 0x60, 0xc2, 0x02, 0xe0, 0x00, 0x52, + 0x39, 0xc2, 0x01, 0x24, 0x00, 0x52, 0x58, 0xc4, 0x15, 0xe7, 0x00, 0x53, + 0x31, 0xc3, 0x05, 0x14, 0x00, 0x53, 0x39, 0x16, 0xc1, 0x2e, 0x64, 0x08, + 0xc1, 0x2e, 0x70, 0x15, 0xc1, 0x2e, 0x7c, 0xc5, 0x06, 0xdb, 0x00, 0x53, + 0x71, 0xc4, 0x26, 0x78, 0x00, 0x53, 0x78, 0xc4, 0xe3, 0x57, 0x00, 0x53, + 0x89, 0xd0, 0x50, 0xcf, 0x00, 0x53, 0xb0, 0x05, 0xc1, 0x2e, 0x88, 0x03, + 0xc1, 0x2e, 0x94, 0x42, 0x07, 0xb2, 0xc1, 0x2e, 0xa0, 0xc5, 0x33, 0x5d, + 0x00, 0x55, 0xe1, 0x15, 0xc1, 0x2e, 0xac, 0xc6, 0xd2, 0x2f, 0x00, 0x57, + 0xe1, 0x16, 0x41, 0x2e, 0xb8, 0x83, 0x00, 0x54, 0x2b, 0x01, 0x2e, 0xc4, + 0x8b, 0x00, 0x54, 0x3b, 0x01, 0x2e, 0xd0, 0x97, 0x00, 0x54, 0x4b, 0x01, + 0x2e, 0xd4, 0x18, 0xc1, 0x2e, 0xd8, 0x87, 0x00, 0x54, 0x79, 0x91, 0x00, + 0x54, 0x99, 0x0d, 0xc1, 0x2e, 0xe2, 0x09, 0xc1, 0x2e, 0xec, 0x10, 0xc1, + 0x2e, 0xf6, 0x05, 0xc1, 0x2f, 0x0c, 0x0c, 0xc1, 0x2f, 0x16, 0x16, 0xc1, + 0x2f, 0x20, 0x06, 0xc1, 0x2f, 0x2e, 0x12, 0xc1, 0x2f, 0x3c, 0x04, 0xc1, + 0x2f, 0x46, 0xc2, 0x01, 0xc3, 0x00, 0x55, 0x71, 0xc2, 0x19, 0x2c, 0x00, + 0x55, 0x79, 0xc2, 0x00, 0x39, 0x00, 0x55, 0x81, 0x0e, 0xc1, 0x2f, 0x50, + 0x15, 0xc1, 0x2f, 0x5a, 0xc2, 0x00, 0xd0, 0x00, 0x55, 0xc9, 0xc3, 0xb4, + 0xa6, 0x00, 0x57, 0xc8, 0x47, 0xc7, 0x7b, 0xc1, 0x2f, 0x6a, 0x45, 0x00, + 0xba, 0x41, 0x2f, 0x72, 0xc4, 0x15, 0xe7, 0x00, 0x57, 0x31, 0xc3, 0x05, + 0x14, 0x00, 0x57, 0x39, 0x16, 0xc1, 0x2f, 0x98, 0x08, 0xc1, 0x2f, 0xa4, + 0x15, 0xc1, 0x2f, 0xb0, 0xc5, 0x06, 0xdb, 0x00, 0x57, 0x71, 0xc4, 0x26, + 0x78, 0x00, 0x57, 0x78, 0xc5, 0xd7, 0xc2, 0x08, 0x19, 0xa1, 0xc3, 0x84, + 0xf8, 0x08, 0x19, 0x80, 0xc3, 0xb6, 0x96, 0x08, 0x19, 0xb1, 0xc4, 0xe0, + 0x9b, 0x08, 0x1a, 0x38, 0xc3, 0xdb, 0xd3, 0x08, 0x19, 0xb9, 0xc4, 0xde, + 0xa3, 0x08, 0x1a, 0x40, 0xc5, 0xd5, 0x8d, 0x08, 0x19, 0xc1, 0xc4, 0xe2, + 0xf3, 0x08, 0x1a, 0x20, 0xc5, 0xd6, 0x46, 0x08, 0x19, 0xe9, 0x43, 0x02, + 0x6e, 0x41, 0x2f, 0xbc, 0x42, 0x01, 0x12, 0xc1, 0x2f, 0xc8, 0x42, 0x00, + 0xbd, 0x41, 0x30, 0x32, 0x04, 0xc1, 0x30, 0x4a, 0xd5, 0x34, 0xe2, 0x01, + 0x16, 0xd9, 0x45, 0x00, 0x8c, 0xc1, 0x30, 0x56, 0x11, 0xc1, 0x30, 0x68, + 0x03, 0xc1, 0x30, 0x74, 0xc4, 0x00, 0xba, 0x00, 0x01, 0xf1, 0xcf, 0x69, + 0x18, 0x01, 0x55, 0x3a, 0x01, 0x30, 0x80, 0x4b, 0x6f, 0xc7, 0xc1, 0x30, + 0x86, 0x47, 0x02, 0x0e, 0xc1, 0x30, 0xaa, 0x45, 0x00, 0xba, 0xc1, 0x31, + 0x13, 0xce, 0x73, 0x0c, 0x08, 0x9a, 0xb9, 0xc2, 0x00, 0x7a, 0x08, 0x9a, + 0x80, 0xc4, 0x00, 0x87, 0x0f, 0xb0, 0x03, 0x01, 0x31, 0x2d, 0xda, 0x1d, + 0x3c, 0x0f, 0xb1, 0xc0, 0xc9, 0x1b, 0x0a, 0x00, 0x00, 0xe9, 0xc4, 0x01, + 0xc3, 0x01, 0x5e, 0x90, 0xc8, 0xbd, 0xd2, 0x01, 0x37, 0x71, 0xc7, 0xc5, + 0x9f, 0x01, 0x37, 0x68, 0x48, 0x07, 0x5a, 0xc1, 0x31, 0x33, 0xcb, 0x94, + 0x6f, 0x01, 0x11, 0xd0, 0x58, 0x22, 0x13, 0xc1, 0x31, 0x3f, 0x4f, 0x0b, + 0x17, 0xc1, 0x31, 0xc5, 0x47, 0x02, 0x0e, 0xc1, 0x32, 0x49, 0xd3, 0x45, + 0xf8, 0x00, 0x87, 0xd9, 0x4d, 0x29, 0xb9, 0x41, 0x32, 0xcf, 0xc8, 0x2f, + 0x03, 0x0f, 0xb6, 0x50, 0x4f, 0x0b, 0x17, 0xc1, 0x33, 0x53, 0x4d, 0x29, + 0xb9, 0x41, 0x33, 0xbc, 0xc4, 0xe3, 0x33, 0x0f, 0xa6, 0xc9, 0xc5, 0x1c, + 0xae, 0x0f, 0xcf, 0x08, 0x45, 0x00, 0xba, 0xc1, 0x34, 0x25, 0x47, 0x02, + 0x0e, 0xc1, 0x34, 0x41, 0x4b, 0x6f, 0xc7, 0xc1, 0x34, 0xa8, 0x03, 0xc1, + 0x34, 0xc8, 0x46, 0x09, 0x97, 0xc1, 0x34, 0xd4, 0xc6, 0xd2, 0xcb, 0x00, + 0x5b, 0x81, 0x49, 0x53, 0xa9, 0x41, 0x34, 0xf8, 0xc5, 0xd3, 0x5b, 0x0f, + 0x69, 0xe9, 0xc4, 0x01, 0xce, 0x0f, 0x69, 0xe0, 0x16, 0xc1, 0x35, 0x04, + 0x08, 0xc1, 0x35, 0x15, 0xc3, 0x05, 0x14, 0x0f, 0x68, 0x0b, 0x01, 0x35, + 0x1d, 0x15, 0xc1, 0x35, 0x21, 0xc5, 0x06, 0xdb, 0x0f, 0x68, 0x43, 0x01, + 0x35, 0x33, 0xc4, 0x26, 0x78, 0x0f, 0x68, 0x4a, 0x01, 0x35, 0x3e, 0x16, + 0xc1, 0x35, 0x4b, 0x08, 0xc1, 0x35, 0x63, 0x15, 0xc1, 0x35, 0x72, 0xc5, + 0x06, 0xdb, 0x0f, 0x69, 0xa9, 0xc4, 0x26, 0x78, 0x0f, 0x69, 0xb0, 0x44, + 0x05, 0x18, 0xc1, 0x35, 0x81, 0xcc, 0x86, 0xfd, 0x0f, 0xad, 0x78, 0x00, + 0xc1, 0x35, 0x8d, 0x02, 0x41, 0x35, 0xb5, 0xc5, 0xd7, 0xa4, 0x0f, 0xad, + 0xc0, 0x48, 0xb5, 0xda, 0xc1, 0x35, 0xc1, 0x47, 0xc9, 0x88, 0xc1, 0x35, + 0xcd, 0x42, 0x00, 0xfb, 0xc1, 0x35, 0xdf, 0x4a, 0x9d, 0xa6, 0xc1, 0x35, + 0xeb, 0x4e, 0x70, 0xf8, 0xc1, 0x35, 0xfd, 0x4e, 0x72, 0x3a, 0xc1, 0x36, + 0x09, 0xc3, 0x19, 0x2a, 0x0f, 0xae, 0xe9, 0x43, 0x00, 0x67, 0xc1, 0x36, + 0x15, 0x47, 0xc7, 0x4a, 0x41, 0x36, 0x1f, 0xc5, 0x29, 0xfc, 0x0f, 0xa3, + 0xa9, 0xc3, 0x12, 0xb8, 0x0f, 0xa3, 0xa1, 0xc5, 0xda, 0xa1, 0x0f, 0xce, + 0x98, 0x4b, 0x11, 0xe3, 0xc1, 0x36, 0x2b, 0xc7, 0xc2, 0x42, 0x00, 0xe3, + 0xe0, 0xd1, 0x4f, 0x36, 0x00, 0xe3, 0xd1, 0xc8, 0xb9, 0x9a, 0x00, 0xe3, + 0xc0, 0x11, 0xc1, 0x36, 0x37, 0x0e, 0xc1, 0x36, 0x49, 0x07, 0xc1, 0x36, + 0x60, 0x17, 0xc1, 0x36, 0x74, 0x0b, 0xc1, 0x36, 0x86, 0x03, 0x41, 0x36, + 0x98, 0xc4, 0x26, 0x78, 0x00, 0xe2, 0xc9, 0xc5, 0x06, 0xdb, 0x00, 0xe2, + 0xc1, 0x15, 0xc1, 0x36, 0xae, 0x08, 0xc1, 0x36, 0xba, 0x16, 0xc1, 0x36, + 0xc6, 0xc3, 0x05, 0x14, 0x00, 0xe2, 0x89, 0xc4, 0x15, 0xe7, 0x00, 0xe2, + 0x80, 0xca, 0x22, 0x51, 0x01, 0x39, 0x69, 0xcb, 0x8e, 0x08, 0x01, 0x38, + 0xf9, 0xcb, 0x58, 0xc7, 0x01, 0x38, 0xc9, 0xca, 0x28, 0xc3, 0x01, 0x34, + 0xe8, 0xcf, 0x63, 0x0f, 0x01, 0x22, 0x51, 0xc3, 0x02, 0x2c, 0x01, 0x22, + 0x40, 0xd6, 0x2f, 0x1a, 0x01, 0x22, 0x49, 0xc4, 0x68, 0xba, 0x01, 0x22, + 0x08, 0xd9, 0x1e, 0xcd, 0x01, 0x22, 0x31, 0xc6, 0xcb, 0x8d, 0x01, 0x22, + 0x29, 0xca, 0xa5, 0xda, 0x01, 0x22, 0x20, 0xc4, 0x03, 0xc8, 0x01, 0x4d, + 0x39, 0xc2, 0x02, 0xae, 0x01, 0x4d, 0x30, 0x45, 0x2a, 0xa0, 0x41, 0x36, + 0xd2, 0xc5, 0xd4, 0x84, 0x00, 0xb4, 0xd1, 0x42, 0x01, 0x9c, 0xc1, 0x36, + 0xde, 0x0b, 0xc1, 0x36, 0xf0, 0x17, 0xc1, 0x36, 0xfc, 0x11, 0xc1, 0x37, + 0x0c, 0xc4, 0xe2, 0x6b, 0x00, 0xb4, 0x81, 0xc4, 0xde, 0x7f, 0x00, 0xb4, + 0x79, 0x15, 0xc1, 0x37, 0x16, 0x10, 0xc1, 0x37, 0x22, 0xc4, 0xe0, 0x67, + 0x00, 0xb4, 0x61, 0xc4, 0xe4, 0x13, 0x00, 0xb4, 0x59, 0x05, 0xc1, 0x37, + 0x2e, 0xc5, 0xd6, 0xb4, 0x00, 0xb4, 0x41, 0xc4, 0xe3, 0x4f, 0x00, 0xb4, + 0x39, 0xc5, 0xd3, 0xf3, 0x00, 0xb4, 0x19, 0xc4, 0xe4, 0xcb, 0x00, 0xb4, + 0x11, 0xc5, 0xd7, 0x9a, 0x00, 0xb4, 0x08, 0x83, 0x08, 0x24, 0xb3, 0x01, + 0x37, 0x3a, 0xc2, 0x01, 0x5d, 0x08, 0x24, 0x09, 0xc2, 0x01, 0x6f, 0x08, + 0x24, 0x11, 0xc2, 0x25, 0x3b, 0x08, 0x24, 0x19, 0xc2, 0x8d, 0x8f, 0x08, + 0x24, 0x21, 0x0d, 0xc1, 0x37, 0x44, 0x06, 0xc1, 0x37, 0x50, 0xc2, 0x00, + 0x39, 0x08, 0x24, 0x39, 0x15, 0xc1, 0x37, 0x5c, 0xc4, 0xe3, 0x13, 0x08, + 0x24, 0x59, 0xc2, 0x01, 0x30, 0x08, 0x24, 0x61, 0xc2, 0x00, 0x87, 0x08, + 0x24, 0x69, 0xc4, 0xd8, 0x3a, 0x08, 0x24, 0x71, 0xc4, 0xe0, 0xd7, 0x08, + 0x24, 0x81, 0xc4, 0xe4, 0xbb, 0x08, 0x24, 0x89, 0xc4, 0xb9, 0x50, 0x08, + 0x24, 0x91, 0xc3, 0x7e, 0x89, 0x08, 0x24, 0x99, 0xc2, 0x00, 0xd0, 0x08, + 0x24, 0xa1, 0xc2, 0x19, 0x2c, 0x08, 0x24, 0xa9, 0x87, 0x08, 0x24, 0xbb, + 0x01, 0x37, 0x66, 0x8b, 0x08, 0x24, 0xc1, 0x91, 0x08, 0x24, 0xcb, 0x01, + 0x37, 0x6a, 0x97, 0x08, 0x24, 0xd0, 0xc4, 0x15, 0xe7, 0x08, 0x25, 0x01, + 0xc3, 0x05, 0x14, 0x08, 0x25, 0x09, 0x16, 0xc1, 0x37, 0x6e, 0x08, 0xc1, + 0x37, 0x7a, 0x15, 0xc1, 0x37, 0x86, 0xc5, 0x06, 0xdb, 0x08, 0x25, 0x41, + 0xc4, 0x26, 0x78, 0x08, 0x25, 0x48, 0x83, 0x08, 0x25, 0x83, 0x01, 0x37, + 0x92, 0xc3, 0x00, 0x38, 0x08, 0x25, 0xa1, 0xc3, 0x1c, 0x63, 0x08, 0x25, + 0xa9, 0x87, 0x08, 0x25, 0xbb, 0x01, 0x37, 0x9d, 0x0a, 0xc1, 0x37, 0xa7, + 0x8b, 0x08, 0x25, 0xd9, 0x0d, 0xc1, 0x37, 0xb1, 0xc2, 0x00, 0xdb, 0x08, + 0x25, 0xf9, 0xc2, 0x01, 0xc3, 0x08, 0x26, 0x01, 0xc2, 0x00, 0xc1, 0x08, + 0x26, 0x09, 0x91, 0x08, 0x26, 0x13, 0x01, 0x37, 0xc1, 0xc2, 0x00, 0xb0, + 0x08, 0x26, 0x21, 0x15, 0xc1, 0x37, 0xc7, 0x16, 0xc1, 0x37, 0xd1, 0xc3, + 0x40, 0xe2, 0x08, 0x26, 0x69, 0x97, 0x08, 0x26, 0x71, 0xc2, 0x01, 0x4a, + 0x08, 0x26, 0x79, 0xc3, 0x91, 0x00, 0x08, 0x26, 0x89, 0x1c, 0x41, 0x37, + 0xd9, 0x83, 0x08, 0x26, 0xc3, 0x01, 0x37, 0xe3, 0xc3, 0x00, 0x38, 0x08, + 0x26, 0xe1, 0xc3, 0x1c, 0x63, 0x08, 0x26, 0xe9, 0x87, 0x08, 0x26, 0xfb, + 0x01, 0x37, 0xee, 0x0a, 0xc1, 0x37, 0xf8, 0x8b, 0x08, 0x27, 0x19, 0x0d, + 0xc1, 0x38, 0x02, 0xc2, 0x00, 0xdb, 0x08, 0x27, 0x39, 0xc2, 0x01, 0xc3, + 0x08, 0x27, 0x41, 0xc2, 0x00, 0xc1, 0x08, 0x27, 0x49, 0x91, 0x08, 0x27, + 0x53, 0x01, 0x38, 0x12, 0xc2, 0x00, 0xb0, 0x08, 0x27, 0x61, 0x15, 0xc1, + 0x38, 0x18, 0x16, 0xc1, 0x38, 0x22, 0xc3, 0x40, 0xe2, 0x08, 0x27, 0xa9, + 0x97, 0x08, 0x27, 0xb1, 0xc2, 0x01, 0x4a, 0x08, 0x27, 0xb9, 0xc3, 0x91, + 0x00, 0x08, 0x27, 0xc9, 0x1c, 0x41, 0x38, 0x2a, 0x03, 0xc1, 0x38, 0x34, + 0x11, 0xc1, 0x38, 0x46, 0xc8, 0xbb, 0x2a, 0x0e, 0x7a, 0xc2, 0x01, 0x38, + 0x52, 0xc3, 0x74, 0xc6, 0x0e, 0x7e, 0x09, 0x07, 0xc1, 0x38, 0x58, 0xcf, + 0x58, 0xe3, 0x0e, 0x7b, 0x59, 0xcb, 0x95, 0x77, 0x0e, 0x7a, 0x98, 0xc5, + 0xd5, 0x5b, 0x0e, 0x7e, 0x01, 0xc4, 0xde, 0xf7, 0x0e, 0x7d, 0x7a, 0x01, + 0x38, 0x64, 0xc6, 0xad, 0x17, 0x0e, 0x7d, 0xf9, 0xc5, 0xdd, 0x8a, 0x0e, + 0x7c, 0x21, 0x42, 0x14, 0x98, 0xc1, 0x38, 0x68, 0xc6, 0xd2, 0xd1, 0x0e, + 0x7b, 0x71, 0xc5, 0x5f, 0x8d, 0x0e, 0x7a, 0xa0, 0x16, 0xc1, 0x38, 0x77, + 0xc8, 0xb9, 0x2a, 0x0e, 0x7b, 0xeb, 0x01, 0x38, 0x8f, 0x49, 0xad, 0x77, + 0x41, 0x38, 0x93, 0x00, 0x41, 0x38, 0xaf, 0xc6, 0xad, 0x79, 0x0e, 0x7c, + 0x29, 0x03, 0x41, 0x38, 0xbb, 0xc2, 0x13, 0x38, 0x0e, 0x7c, 0x11, 0xd2, + 0x47, 0xff, 0x0e, 0x7b, 0x60, 0xc5, 0xd2, 0xae, 0x0e, 0x7b, 0x79, 0xc8, + 0x48, 0x09, 0x0e, 0x7a, 0xd8, 0x4c, 0x8b, 0x35, 0xc1, 0x38, 0xc7, 0xcb, + 0x93, 0x7d, 0x0e, 0x7b, 0x31, 0xc8, 0x4e, 0x4b, 0x0e, 0x7b, 0x29, 0xc9, + 0xa9, 0x48, 0x0e, 0x7b, 0x21, 0xc8, 0xbf, 0x6a, 0x0e, 0x7b, 0x18, 0x16, + 0xc1, 0x38, 0xdf, 0xc6, 0xbf, 0x8c, 0x0e, 0x7b, 0x09, 0xc7, 0xc2, 0x96, + 0x0e, 0x7b, 0x01, 0xc5, 0xd4, 0xd4, 0x0e, 0x7a, 0xf0, 0xa0, 0x0e, 0x7a, + 0x19, 0x9f, 0x0e, 0x7a, 0x10, 0x0d, 0xc1, 0x38, 0xeb, 0x05, 0xc1, 0x39, + 0x00, 0x06, 0xc1, 0x39, 0x0f, 0x16, 0xc1, 0x39, 0x1b, 0x15, 0xc1, 0x39, + 0x2d, 0x11, 0xc1, 0x39, 0x45, 0x42, 0x01, 0x53, 0xc1, 0x39, 0x55, 0x1c, + 0xc1, 0x39, 0x5f, 0x42, 0x00, 0x39, 0xc1, 0x39, 0x69, 0xc5, 0xd9, 0x43, + 0x0e, 0x79, 0x39, 0xc6, 0xcf, 0xb3, 0x0e, 0x79, 0x29, 0xc7, 0xc9, 0x8f, + 0x0e, 0x79, 0x21, 0x48, 0xbd, 0xca, 0xc1, 0x39, 0x75, 0x4d, 0x75, 0x8c, + 0xc1, 0x39, 0x81, 0x47, 0xc2, 0x7a, 0xc1, 0x39, 0x8b, 0x46, 0xcd, 0xdf, + 0x41, 0x39, 0x97, 0xc9, 0xb0, 0x62, 0x0e, 0x79, 0x91, 0xc6, 0xb0, 0x65, + 0x0e, 0x79, 0x89, 0xc7, 0x6d, 0xa2, 0x0e, 0x79, 0x80, 0x42, 0x07, 0xb2, + 0xc1, 0x39, 0xa3, 0xc8, 0x14, 0x38, 0x08, 0xd1, 0xc1, 0x46, 0x1e, 0x89, + 0x41, 0x39, 0xaf, 0xd6, 0x2d, 0xe6, 0x08, 0xd2, 0x31, 0xc9, 0x15, 0xcc, + 0x08, 0xd2, 0x00, 0x4d, 0x7f, 0x25, 0xc1, 0x39, 0xbe, 0xd1, 0x56, 0x1e, + 0x08, 0xd1, 0xd0, 0xc3, 0x1d, 0x35, 0x08, 0xd1, 0x91, 0xc2, 0x00, 0xd0, + 0x08, 0xd0, 0x61, 0x83, 0x08, 0xd0, 0x58, 0x83, 0x08, 0xd1, 0x81, 0xc2, + 0x0d, 0xf6, 0x08, 0xd1, 0x79, 0xc2, 0x00, 0xd0, 0x08, 0xd1, 0x70, 0x83, + 0x08, 0xd1, 0x41, 0xc2, 0x00, 0xd0, 0x08, 0xd1, 0x38, 0x1c, 0xc1, 0x39, + 0xd6, 0xc2, 0x00, 0xd0, 0x08, 0xd0, 0xe1, 0x83, 0x08, 0xd0, 0xd9, 0x06, + 0x41, 0x39, 0xe0, 0x15, 0xc1, 0x39, 0xea, 0xc2, 0x00, 0xd0, 0x08, 0xd0, + 0xd1, 0x83, 0x08, 0xd0, 0xc9, 0x16, 0x41, 0x39, 0xf4, 0xc2, 0x00, 0xd0, + 0x08, 0xd1, 0x09, 0x83, 0x08, 0xd1, 0x00, 0xc2, 0x00, 0xd0, 0x08, 0xd0, + 0xf9, 0x83, 0x08, 0xd0, 0xf0, 0x83, 0x08, 0xd0, 0xe9, 0xc2, 0x00, 0xc1, + 0x08, 0xd0, 0xc1, 0xc2, 0x19, 0x2c, 0x08, 0xd0, 0x99, 0xc2, 0x01, 0x30, + 0x08, 0xd0, 0x78, 0xc2, 0x00, 0xd0, 0x08, 0xd0, 0x89, 0x83, 0x08, 0xd0, + 0x80, 0xc2, 0x00, 0xd0, 0x08, 0xd0, 0x71, 0x83, 0x08, 0xd0, 0x68, 0xca, + 0x9d, 0xe2, 0x08, 0xd0, 0x49, 0x03, 0xc1, 0x39, 0xfe, 0x91, 0x08, 0xd0, + 0x33, 0x01, 0x3a, 0x06, 0x87, 0x08, 0xd0, 0x21, 0x97, 0x08, 0xd0, 0x1b, + 0x01, 0x3a, 0x0a, 0x8b, 0x08, 0xd0, 0x08, 0xcf, 0x60, 0x30, 0x01, 0x4c, + 0x51, 0xcd, 0x7d, 0x6b, 0x01, 0x4c, 0x40, 0x12, 0xc1, 0x3a, 0x0e, 0xcb, + 0x34, 0xad, 0x01, 0x50, 0xf8, 0xc8, 0xb8, 0x8a, 0x01, 0x00, 0x61, 0xcc, + 0x40, 0x81, 0x07, 0xf7, 0xf8, 0x43, 0x16, 0x55, 0xc1, 0x3a, 0x1a, 0x42, + 0x00, 0x75, 0x41, 0x3a, 0x3e, 0x45, 0x02, 0x10, 0xc1, 0x3a, 0x4a, 0xcc, + 0x86, 0x3d, 0x05, 0x4e, 0x08, 0x16, 0xc1, 0x3a, 0xd6, 0xc3, 0x05, 0x14, + 0x05, 0x4e, 0x89, 0xc4, 0x15, 0xe7, 0x05, 0x4e, 0x81, 0x08, 0xc1, 0x3a, + 0xe2, 0x15, 0xc1, 0x3a, 0xee, 0xc5, 0x06, 0xdb, 0x05, 0x4e, 0xc1, 0xc4, + 0x26, 0x78, 0x05, 0x4e, 0xc8, 0xc5, 0xdd, 0x53, 0x05, 0x4d, 0xf9, 0xc7, + 0xc6, 0xf6, 0x05, 0x4d, 0xf1, 0xc5, 0xdd, 0x12, 0x05, 0x4d, 0xe8, 0xc5, + 0xd8, 0xbc, 0x05, 0x4d, 0xe1, 0xca, 0xa2, 0xf6, 0x05, 0x4d, 0xd9, 0x16, + 0xc1, 0x3a, 0xfa, 0xc4, 0xc5, 0x6e, 0x05, 0x4d, 0xc3, 0x01, 0x3b, 0x04, + 0xc4, 0xdf, 0x6f, 0x05, 0x4d, 0xb2, 0x01, 0x3b, 0x0a, 0xc5, 0xde, 0x11, + 0x05, 0x4c, 0x0b, 0x01, 0x3b, 0x10, 0xc7, 0xc7, 0xd6, 0x05, 0x4c, 0x19, + 0xc5, 0xd9, 0xcf, 0x05, 0x4c, 0x11, 0xc9, 0xaf, 0xff, 0x05, 0x4c, 0x00, + 0x46, 0x02, 0xae, 0xc1, 0x3b, 0x16, 0x46, 0x01, 0xc8, 0x41, 0x3b, 0x28, + 0xc5, 0x18, 0x25, 0x01, 0x02, 0xb9, 0xd1, 0x1e, 0x3f, 0x01, 0x50, 0x60, + 0x10, 0xc1, 0x3b, 0x34, 0x0c, 0xc1, 0x3b, 0x73, 0x13, 0xc1, 0x3b, 0x93, + 0x14, 0xc1, 0x3b, 0xaf, 0x15, 0xc1, 0x3b, 0xd6, 0x05, 0xc1, 0x3c, 0x08, + 0x1c, 0xc1, 0x3c, 0x36, 0x19, 0xc1, 0x3c, 0x68, 0x0a, 0xc1, 0x3c, 0x84, + 0x1b, 0xc1, 0x3c, 0xb6, 0x1a, 0xc1, 0x3c, 0xd2, 0x0f, 0xc1, 0x3c, 0xf0, + 0x8b, 0x05, 0x00, 0x13, 0x01, 0x3d, 0x1e, 0x83, 0x05, 0x00, 0x53, 0x01, + 0x3d, 0x34, 0xc2, 0x01, 0xba, 0x05, 0x00, 0x6b, 0x01, 0x3d, 0x40, 0x91, + 0x05, 0x00, 0x8b, 0x01, 0x3d, 0x48, 0x87, 0x05, 0x00, 0xa3, 0x01, 0x3d, + 0x54, 0x04, 0xc1, 0x3d, 0x58, 0x12, 0xc1, 0x3d, 0x86, 0x08, 0xc1, 0x3d, + 0xa9, 0x18, 0xc1, 0x3d, 0xcc, 0x06, 0xc1, 0x3d, 0xf3, 0x16, 0xc1, 0x3e, + 0x1a, 0x0e, 0xc1, 0x3e, 0x3d, 0x09, 0xc1, 0x3e, 0x67, 0x0d, 0x41, 0x3e, + 0x8e, 0xc3, 0xe5, 0x75, 0x05, 0x24, 0x81, 0x0e, 0xc1, 0x3e, 0xb1, 0x0d, + 0xc1, 0x3e, 0xbe, 0x10, 0xc1, 0x3e, 0xc8, 0x05, 0xc1, 0x3e, 0xd8, 0x15, + 0xc1, 0x3e, 0xf1, 0x09, 0xc1, 0x3e, 0xfb, 0x0f, 0xc1, 0x3f, 0x0f, 0x0a, + 0xc1, 0x3f, 0x19, 0x04, 0xc1, 0x3f, 0x23, 0x1b, 0xc1, 0x3f, 0x2f, 0x12, + 0xc1, 0x3f, 0x39, 0x16, 0xc1, 0x3f, 0x45, 0x1c, 0xc1, 0x3f, 0x4f, 0x06, + 0xc1, 0x3f, 0x63, 0xc2, 0x00, 0x11, 0x05, 0x25, 0x49, 0x0c, 0xc1, 0x3f, + 0x6d, 0x18, 0xc1, 0x3f, 0x75, 0xc2, 0x02, 0xa0, 0x05, 0x25, 0xc0, 0xc3, + 0xe5, 0xb4, 0x08, 0x75, 0x43, 0x01, 0x3f, 0x81, 0xc3, 0x0d, 0xff, 0x08, + 0x75, 0x03, 0x01, 0x3f, 0x87, 0x07, 0xc1, 0x3f, 0x8d, 0x0a, 0xc1, 0x3f, + 0xa1, 0xc2, 0x00, 0x27, 0x08, 0x75, 0x29, 0xc3, 0x7e, 0x89, 0x08, 0x75, + 0x21, 0xc2, 0x01, 0xdf, 0x08, 0x75, 0x19, 0xc3, 0x20, 0x18, 0x08, 0x75, + 0x11, 0xc3, 0x8c, 0x3f, 0x08, 0x75, 0x09, 0xc3, 0xb3, 0xa6, 0x08, 0x74, + 0xf9, 0x0d, 0xc1, 0x3f, 0xad, 0xc3, 0x0f, 0x9a, 0x08, 0x74, 0xe1, 0xc2, + 0x02, 0x41, 0x08, 0x74, 0xd3, 0x01, 0x3f, 0xb9, 0xc2, 0x00, 0x87, 0x08, + 0x74, 0xc9, 0x1a, 0xc1, 0x3f, 0xbf, 0x1c, 0xc1, 0x3f, 0xc9, 0x16, 0xc1, + 0x3f, 0xd4, 0x42, 0x0e, 0x9a, 0xc1, 0x3f, 0xde, 0x15, 0xc1, 0x3f, 0xe6, + 0xc2, 0x25, 0x3b, 0x08, 0x74, 0x81, 0x14, 0xc1, 0x3f, 0xfc, 0x05, 0xc1, + 0x40, 0x06, 0x12, 0xc1, 0x40, 0x10, 0xc2, 0x00, 0x51, 0x08, 0x74, 0x08, + 0xca, 0xa8, 0x1e, 0x08, 0x75, 0x61, 0xca, 0x9c, 0xd4, 0x08, 0x75, 0x58, + 0x00, 0xc1, 0x40, 0x1a, 0xc8, 0xbb, 0x3a, 0x0f, 0xae, 0xc8, 0x12, 0xc1, + 0x40, 0x26, 0x83, 0x00, 0xa7, 0xa3, 0x01, 0x40, 0x36, 0x8a, 0x00, 0xa9, + 0x2b, 0x01, 0x40, 0x44, 0x91, 0x00, 0xa7, 0x8b, 0x01, 0x40, 0x61, 0x99, + 0x00, 0xa8, 0x3b, 0x01, 0x40, 0x6f, 0x87, 0x00, 0xa7, 0x69, 0x8b, 0x00, + 0xa7, 0x7a, 0x01, 0x40, 0x88, 0x83, 0x00, 0xa6, 0x3b, 0x01, 0x40, 0x8c, + 0x19, 0xc1, 0x40, 0xa3, 0x91, 0x00, 0xa6, 0x23, 0x01, 0x40, 0xbc, 0xc2, + 0x00, 0x75, 0x00, 0xac, 0xb3, 0x01, 0x40, 0xc4, 0x89, 0x00, 0xac, 0xab, + 0x01, 0x40, 0xd9, 0x44, 0xde, 0xaf, 0xc1, 0x40, 0xee, 0x48, 0xbc, 0x52, + 0xc1, 0x40, 0xfd, 0x87, 0x00, 0xa6, 0x01, 0x8b, 0x00, 0xa6, 0x13, 0x01, + 0x41, 0x08, 0x8a, 0x00, 0xa6, 0x90, 0x83, 0x00, 0xa4, 0x83, 0x01, 0x41, + 0x0c, 0xc7, 0xc4, 0xf7, 0x00, 0xb3, 0x69, 0x19, 0xc1, 0x41, 0x19, 0x91, + 0x00, 0xa4, 0x6b, 0x01, 0x41, 0x32, 0x8b, 0x00, 0xa4, 0x5b, 0x01, 0x41, + 0x36, 0x87, 0x00, 0xa4, 0x48, 0x4b, 0x92, 0x54, 0xc1, 0x41, 0x3a, 0x49, + 0xad, 0x4a, 0xc1, 0x41, 0x42, 0xcb, 0x92, 0x96, 0x00, 0xa9, 0xf8, 0x42, + 0x07, 0x26, 0xc1, 0x41, 0x65, 0x16, 0xc1, 0x41, 0x7e, 0x8a, 0x00, 0xab, + 0x53, 0x01, 0x41, 0x95, 0x83, 0x00, 0xa2, 0xab, 0x01, 0x41, 0xbb, 0x1b, + 0xc1, 0x41, 0xc6, 0x19, 0xc1, 0x41, 0xd6, 0x91, 0x00, 0xa2, 0x83, 0x01, + 0x41, 0xef, 0x8b, 0x00, 0xa2, 0x73, 0x01, 0x41, 0xf3, 0x87, 0x00, 0xa2, + 0x60, 0x87, 0x00, 0xa0, 0x63, 0x01, 0x41, 0xf7, 0x83, 0x00, 0xa0, 0xbb, + 0x01, 0x41, 0xfd, 0x91, 0x00, 0xa0, 0x93, 0x01, 0x42, 0x05, 0x8b, 0x00, + 0xa0, 0x72, 0x01, 0x42, 0x0c, 0x47, 0xc0, 0xac, 0xc1, 0x42, 0x10, 0x19, + 0xc1, 0x42, 0x1a, 0x83, 0x00, 0xaa, 0x5b, 0x01, 0x42, 0x35, 0x91, 0x00, + 0xaa, 0x43, 0x01, 0x42, 0x40, 0x8b, 0x00, 0xaa, 0x33, 0x01, 0x42, 0x44, + 0x87, 0x00, 0xaa, 0x10, 0x8b, 0x00, 0xaa, 0xab, 0x01, 0x42, 0x48, 0xc8, + 0x11, 0xf7, 0x00, 0xb3, 0x71, 0xc3, 0x14, 0x72, 0x00, 0xaa, 0xd9, 0x83, + 0x00, 0xaa, 0xcb, 0x01, 0x42, 0x52, 0x91, 0x00, 0xaa, 0xbb, 0x01, 0x42, + 0x59, 0x87, 0x00, 0xaa, 0x98, 0xc8, 0xbc, 0x9a, 0x00, 0xc6, 0xe1, 0x90, + 0x00, 0xa1, 0x58, 0x47, 0xc5, 0xb4, 0xc1, 0x42, 0x5d, 0x9b, 0x00, 0xc5, + 0x81, 0x91, 0x00, 0xa0, 0x31, 0x90, 0x00, 0xa1, 0x68, 0x83, 0x00, 0xa9, + 0x6b, 0x01, 0x42, 0x7f, 0x91, 0x00, 0xa9, 0x53, 0x01, 0x42, 0x8a, 0x19, + 0xc1, 0x42, 0x92, 0x46, 0x92, 0x9a, 0xc1, 0x42, 0xab, 0x8b, 0x00, 0xa9, + 0x43, 0x01, 0x42, 0xe9, 0x87, 0x00, 0xa9, 0x30, 0x83, 0x00, 0xa6, 0xd3, + 0x01, 0x42, 0xed, 0x8a, 0x00, 0xad, 0x33, 0x01, 0x42, 0xf8, 0x87, 0x00, + 0xa6, 0x99, 0x8b, 0x00, 0xa6, 0xab, 0x01, 0x43, 0x0d, 0x91, 0x00, 0xa6, + 0xbb, 0x01, 0x43, 0x11, 0x19, 0x41, 0x43, 0x15, 0x83, 0x00, 0xa5, 0x53, + 0x01, 0x43, 0x2e, 0x87, 0x00, 0xa5, 0x1b, 0x01, 0x43, 0x39, 0x91, 0x00, + 0xa5, 0x3b, 0x01, 0x43, 0x3f, 0x8b, 0x00, 0xa5, 0x2b, 0x01, 0x43, 0x46, + 0x19, 0xc1, 0x43, 0x4a, 0x8a, 0x00, 0xa5, 0xe8, 0x99, 0x00, 0xa4, 0x23, + 0x01, 0x43, 0x63, 0x83, 0x00, 0xa3, 0x93, 0x01, 0x43, 0x7c, 0x87, 0x00, + 0xa3, 0x59, 0x8b, 0x00, 0xa3, 0x6b, 0x01, 0x43, 0x87, 0x91, 0x00, 0xa3, + 0x7a, 0x01, 0x43, 0x8b, 0x19, 0xc1, 0x43, 0x8f, 0x83, 0x00, 0xa1, 0xc3, + 0x01, 0x43, 0xa8, 0x91, 0x00, 0xa1, 0x9b, 0x01, 0x43, 0xb3, 0x87, 0x00, + 0xa1, 0x79, 0x8b, 0x00, 0xa1, 0x8a, 0x01, 0x43, 0xbb, 0x83, 0x00, 0xa0, + 0x5b, 0x01, 0x43, 0xbf, 0x9b, 0x00, 0xc5, 0x89, 0x8b, 0x00, 0xa0, 0xe3, + 0x01, 0x43, 0xc7, 0x4a, 0xa0, 0xa8, 0xc1, 0x43, 0xcd, 0x90, 0x00, 0xa1, + 0x70, 0x83, 0x00, 0xac, 0x1b, 0x01, 0x43, 0xd5, 0x91, 0x00, 0xac, 0x0b, + 0x01, 0x43, 0xe0, 0x8b, 0x00, 0xab, 0xfa, 0x01, 0x43, 0xe4, 0x8d, 0x00, + 0xab, 0xe9, 0xc5, 0x59, 0x93, 0x00, 0xa0, 0x00, 0x8b, 0x00, 0xa0, 0x21, + 0x90, 0x00, 0xa1, 0x60, 0xd0, 0x5a, 0x52, 0x01, 0x02, 0x08, 0xc9, 0x36, + 0xe7, 0x0f, 0xae, 0x10, 0x97, 0x08, 0x15, 0xfa, 0x01, 0x43, 0xe8, 0x94, + 0x08, 0x16, 0x48, 0x86, 0x08, 0x15, 0x32, 0x01, 0x43, 0xef, 0x9f, 0x08, + 0x15, 0x38, 0x84, 0x08, 0x16, 0x52, 0x01, 0x43, 0xf3, 0x9f, 0x08, 0x15, + 0x60, 0x96, 0x08, 0x16, 0x3a, 0x01, 0x43, 0xff, 0x8a, 0x08, 0x15, 0x73, + 0x01, 0x44, 0x03, 0x95, 0x08, 0x15, 0xc1, 0x96, 0x08, 0x16, 0x12, 0x01, + 0x44, 0x07, 0xc2, 0x8c, 0x53, 0x08, 0x15, 0x89, 0xc2, 0xe6, 0x81, 0x08, + 0x16, 0x30, 0x90, 0x08, 0x15, 0x99, 0x86, 0x08, 0x15, 0xf1, 0x89, 0x08, + 0x16, 0x20, 0x9f, 0x08, 0x15, 0x08, 0x8b, 0x08, 0x16, 0x28, 0x9f, 0x08, + 0x16, 0x78, 0x9f, 0x08, 0x15, 0xe8, 0x9f, 0x08, 0x16, 0x08, 0x03, 0xc1, + 0x44, 0x0b, 0xc3, 0x0b, 0xc8, 0x08, 0x29, 0x89, 0x09, 0xc1, 0x44, 0x17, + 0x06, 0xc1, 0x44, 0x23, 0x07, 0xc1, 0x44, 0x33, 0x1c, 0xc1, 0x44, 0x3d, + 0x16, 0xc1, 0x44, 0x47, 0x05, 0xc1, 0x44, 0x59, 0x1b, 0xc1, 0x44, 0x67, + 0x0b, 0xc1, 0x44, 0x73, 0x15, 0xc1, 0x44, 0x85, 0x0e, 0xc1, 0x44, 0x8f, + 0xc4, 0xdf, 0x1f, 0x08, 0x2a, 0x01, 0x0c, 0xc1, 0x44, 0x9b, 0x0d, 0xc1, + 0x44, 0xa7, 0xc4, 0xdf, 0xa7, 0x08, 0x2a, 0x31, 0x42, 0x0f, 0x9a, 0xc1, + 0x44, 0xb3, 0xc3, 0xda, 0xa6, 0x08, 0x2a, 0x61, 0xc4, 0xe4, 0x53, 0x08, + 0x2a, 0x71, 0xc2, 0x00, 0x45, 0x08, 0x2a, 0x91, 0xc3, 0xd2, 0xb3, 0x08, + 0x2a, 0xa1, 0x12, 0xc1, 0x44, 0xbb, 0xc3, 0x07, 0x81, 0x08, 0x2a, 0xc9, + 0xc4, 0xde, 0x87, 0x08, 0x2a, 0xd8, 0xcc, 0x85, 0x1d, 0x0f, 0xb1, 0xc9, + 0xc9, 0xa9, 0x36, 0x0f, 0xb1, 0xe0, 0x07, 0xc1, 0x44, 0xc7, 0x06, 0xc1, + 0x45, 0x07, 0x03, 0xc1, 0x45, 0x47, 0x08, 0xc1, 0x45, 0x87, 0x24, 0xc1, + 0x45, 0xc7, 0x23, 0xc1, 0x46, 0x07, 0x20, 0xc1, 0x46, 0x47, 0x1f, 0xc1, + 0x46, 0x87, 0x1e, 0xc1, 0x46, 0xc7, 0x1d, 0xc1, 0x47, 0x07, 0x05, 0xc1, + 0x47, 0x47, 0x04, 0xc1, 0x47, 0x87, 0x26, 0xc1, 0x47, 0xc7, 0x25, 0xc1, + 0x48, 0x07, 0x22, 0xc1, 0x48, 0x47, 0x21, 0x41, 0x48, 0x87, 0x24, 0xc1, + 0x48, 0xc7, 0x23, 0xc1, 0x49, 0x07, 0x22, 0xc1, 0x49, 0x47, 0x21, 0xc1, + 0x49, 0x87, 0x1f, 0xc1, 0x49, 0xc7, 0x1d, 0xc1, 0x4a, 0x07, 0x08, 0xc1, + 0x4a, 0x47, 0x04, 0xc1, 0x4a, 0x87, 0x03, 0xc1, 0x4a, 0xc7, 0x26, 0xc1, + 0x4b, 0x07, 0x25, 0xc1, 0x4b, 0x47, 0x07, 0xc1, 0x4b, 0x87, 0x06, 0xc1, + 0x4b, 0xc7, 0x05, 0xc1, 0x4c, 0x07, 0x20, 0xc1, 0x4c, 0x47, 0x1e, 0x41, + 0x4c, 0x87, 0x1e, 0xc1, 0x4c, 0xc7, 0x1d, 0x41, 0x4c, 0xff, 0x06, 0xc1, + 0x4d, 0x3f, 0x05, 0xc1, 0x4d, 0x67, 0x04, 0xc1, 0x4d, 0xa7, 0x03, 0xc1, + 0x4d, 0xe7, 0x26, 0xc1, 0x4e, 0x27, 0x25, 0xc1, 0x4e, 0x67, 0x24, 0xc1, + 0x4e, 0xa7, 0x23, 0xc1, 0x4e, 0xe7, 0x22, 0xc1, 0x4f, 0x1f, 0x21, 0xc1, + 0x4f, 0x5f, 0x20, 0xc1, 0x4f, 0x9f, 0x1f, 0xc1, 0x4f, 0xdf, 0x1e, 0xc1, + 0x50, 0x1f, 0x1d, 0x41, 0x50, 0x5f, 0x08, 0xc1, 0x50, 0x9f, 0x07, 0xc1, + 0x50, 0xdf, 0x06, 0xc1, 0x51, 0x1f, 0x05, 0xc1, 0x51, 0x5f, 0x04, 0xc1, + 0x51, 0x9f, 0x03, 0xc1, 0x51, 0xdf, 0x26, 0xc1, 0x52, 0x1f, 0x25, 0xc1, + 0x52, 0x5f, 0x24, 0xc1, 0x52, 0x9f, 0x23, 0xc1, 0x52, 0xdf, 0x22, 0xc1, + 0x53, 0x1f, 0x21, 0xc1, 0x53, 0x5f, 0x20, 0xc1, 0x53, 0x9f, 0x1f, 0xc1, + 0x53, 0xdf, 0x1e, 0xc1, 0x54, 0x1f, 0x1d, 0x41, 0x54, 0x5f, 0x92, 0x01, + 0x74, 0xc9, 0x8f, 0x01, 0x75, 0xb9, 0xc2, 0x00, 0x74, 0x01, 0x76, 0xb8, + 0xc3, 0x43, 0x08, 0x01, 0x74, 0x09, 0xc5, 0x78, 0xee, 0x01, 0x76, 0x10, + 0xc6, 0xca, 0xeb, 0x01, 0x75, 0x01, 0xc2, 0x0d, 0x10, 0x01, 0x76, 0x78, + 0x15, 0xc1, 0x54, 0x9f, 0xc4, 0x63, 0x7e, 0x01, 0x76, 0x59, 0x09, 0xc1, + 0x54, 0xbd, 0x0e, 0xc1, 0x54, 0xc9, 0x16, 0xc1, 0x54, 0xd5, 0xc4, 0x45, + 0x10, 0x01, 0x76, 0xd9, 0x08, 0xc1, 0x54, 0xe7, 0x07, 0xc1, 0x54, 0xf9, + 0xc5, 0xa0, 0x85, 0x01, 0x77, 0x11, 0xc4, 0xa3, 0x1a, 0x01, 0x77, 0x31, + 0xc6, 0x87, 0xe7, 0x01, 0x77, 0x80, 0x45, 0x71, 0x24, 0xc1, 0x55, 0x05, + 0xc2, 0x00, 0x65, 0x01, 0x74, 0x58, 0xc3, 0x05, 0x14, 0x01, 0x74, 0x61, + 0xc3, 0x02, 0x9f, 0x01, 0x74, 0x68, 0xc3, 0x21, 0xdf, 0x01, 0x74, 0x91, + 0x44, 0x4b, 0x1f, 0x41, 0x55, 0x0f, 0x49, 0x8c, 0x70, 0xc1, 0x55, 0x1b, + 0xc2, 0x8c, 0x30, 0x01, 0x75, 0x78, 0xc3, 0x05, 0x14, 0x01, 0x75, 0x61, + 0xc3, 0x02, 0x9f, 0x01, 0x75, 0x68, 0xc3, 0x05, 0x14, 0x01, 0x75, 0x21, + 0xc3, 0x02, 0x9f, 0x01, 0x75, 0x28, 0x9a, 0x01, 0x74, 0x31, 0xcb, 0x93, + 0x67, 0x01, 0x75, 0x51, 0xc2, 0x02, 0x6f, 0x01, 0x77, 0x18, 0xc3, 0x05, + 0x14, 0x01, 0x75, 0xd1, 0xc3, 0x02, 0x9f, 0x01, 0x75, 0xd8, 0xc3, 0x05, + 0x14, 0x01, 0x74, 0x71, 0x16, 0xc1, 0x55, 0x29, 0xc4, 0x09, 0x9d, 0x01, + 0x74, 0x88, 0xc3, 0x05, 0x14, 0x01, 0x76, 0x89, 0xc3, 0x02, 0x9f, 0x01, + 0x76, 0x90, 0x43, 0x0f, 0x06, 0xc1, 0x55, 0x35, 0x86, 0x01, 0x77, 0x08, + 0xc2, 0x00, 0x45, 0x01, 0x74, 0xe9, 0xc4, 0x14, 0xdd, 0x01, 0x74, 0xf9, + 0xc4, 0xd7, 0x14, 0x01, 0x75, 0xe9, 0x44, 0x0d, 0xee, 0x41, 0x55, 0x41, + 0xc2, 0x01, 0xe2, 0x01, 0x75, 0xa9, 0xc2, 0x00, 0xfe, 0x01, 0x75, 0xe0, + 0x44, 0x02, 0x11, 0xc1, 0x55, 0x4d, 0x43, 0xad, 0x64, 0x41, 0x55, 0x59, + 0xc3, 0x05, 0x14, 0x01, 0x76, 0x19, 0xc3, 0x02, 0x9f, 0x01, 0x76, 0x20, + 0xc4, 0x18, 0x10, 0x01, 0x77, 0x59, 0x16, 0xc1, 0x55, 0x65, 0xc6, 0x87, + 0xe7, 0x01, 0x77, 0x78, 0xc3, 0x05, 0x14, 0x01, 0x76, 0xe9, 0x16, 0x41, + 0x55, 0x71, 0xc2, 0x02, 0xa0, 0x01, 0x75, 0x91, 0xc4, 0x02, 0xde, 0x01, + 0x75, 0x98, 0xc3, 0x05, 0x14, 0x01, 0x75, 0xf1, 0x16, 0x41, 0x55, 0x7d, + 0x9c, 0x01, 0x8e, 0xc1, 0x89, 0x01, 0x8e, 0xf8, 0xc2, 0x47, 0xa4, 0x01, + 0x8e, 0x49, 0x9c, 0x01, 0x8e, 0xf0, 0x9c, 0x01, 0x8e, 0x2b, 0x01, 0x55, + 0x89, 0x89, 0x01, 0x8e, 0x31, 0x99, 0x01, 0x8e, 0x6b, 0x01, 0x55, 0x94, + 0x96, 0x01, 0x8e, 0x50, 0xc2, 0x47, 0xa4, 0x01, 0x8e, 0x60, 0xc5, 0x08, + 0xd9, 0x0f, 0xdc, 0xa8, 0x4d, 0x29, 0xb9, 0xc1, 0x55, 0x98, 0x47, 0x02, + 0x0e, 0x41, 0x55, 0xe7, 0xc3, 0x91, 0xe8, 0x0f, 0x9a, 0x91, 0xc9, 0xae, + 0x3d, 0x0f, 0x99, 0xc0, 0xc2, 0x02, 0x0a, 0x01, 0x02, 0x01, 0xc9, 0x33, + 0xdd, 0x00, 0x00, 0x4a, 0x01, 0x56, 0x36, 0xcf, 0x64, 0xfe, 0x0f, 0xa6, + 0x49, 0xcd, 0x7b, 0x22, 0x0f, 0xa6, 0x42, 0x01, 0x56, 0x3a, 0xc3, 0xd8, + 0xd0, 0x08, 0x8a, 0x39, 0x0e, 0xc1, 0x56, 0x40, 0xc3, 0x39, 0x6e, 0x08, + 0x89, 0x31, 0xc3, 0x82, 0xa0, 0x08, 0x89, 0x29, 0xc3, 0x14, 0x72, 0x08, + 0x89, 0x21, 0xc3, 0x47, 0xd9, 0x08, 0x89, 0x11, 0x1b, 0xc1, 0x56, 0x4c, + 0xc3, 0xc2, 0xab, 0x08, 0x88, 0xf9, 0x04, 0xc1, 0x56, 0x58, 0x12, 0xc1, + 0x56, 0x64, 0x10, 0xc1, 0x56, 0x70, 0x06, 0xc1, 0x56, 0x88, 0x16, 0xc1, + 0x56, 0x98, 0x0c, 0xc1, 0x56, 0xa8, 0x05, 0xc1, 0x56, 0xb4, 0x09, 0xc1, + 0x56, 0xc0, 0x0d, 0xc1, 0x56, 0xcc, 0x87, 0x08, 0x88, 0x31, 0x97, 0x08, + 0x88, 0x29, 0x8b, 0x08, 0x88, 0x21, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0x18, + 0x4a, 0x6f, 0xc8, 0xc1, 0x56, 0xd8, 0xc5, 0x1e, 0x96, 0x08, 0x89, 0x98, + 0xcb, 0x97, 0xf5, 0x08, 0x8a, 0x11, 0xc4, 0x19, 0x53, 0x08, 0x8a, 0x09, + 0x45, 0x09, 0x98, 0x41, 0x56, 0xfb, 0xcb, 0x45, 0x8e, 0x08, 0x8a, 0x01, + 0x44, 0x00, 0xbb, 0x41, 0x57, 0x1f, 0xc2, 0x01, 0x4a, 0x05, 0x51, 0xb1, + 0xc2, 0x00, 0xdb, 0x05, 0x51, 0xa9, 0xc2, 0x00, 0x39, 0x05, 0x51, 0xa1, + 0xc2, 0x19, 0x2c, 0x05, 0x51, 0x99, 0x46, 0x26, 0xf7, 0x41, 0x57, 0x31, + 0x97, 0x05, 0x51, 0x6b, 0x01, 0x57, 0x3f, 0x03, 0xc1, 0x57, 0x43, 0x91, + 0x05, 0x51, 0x7b, 0x01, 0x57, 0x4f, 0xc2, 0x06, 0xdb, 0x05, 0x51, 0x61, + 0x8b, 0x05, 0x51, 0x52, 0x01, 0x57, 0x53, 0xc2, 0x00, 0xd0, 0x05, 0x51, + 0x41, 0x15, 0xc1, 0x57, 0x57, 0x10, 0xc1, 0x57, 0x61, 0x09, 0xc1, 0x57, + 0x73, 0x0d, 0xc1, 0x57, 0x7d, 0x91, 0x05, 0x50, 0x29, 0x83, 0x05, 0x50, + 0x03, 0x01, 0x57, 0x87, 0x87, 0x05, 0x50, 0x19, 0x46, 0x26, 0xf7, 0xc1, + 0x57, 0x8b, 0xc2, 0x02, 0x41, 0x05, 0x51, 0x29, 0xc2, 0x00, 0xdb, 0x05, + 0x51, 0x21, 0xc2, 0x00, 0x39, 0x05, 0x51, 0x19, 0xc2, 0x19, 0x2c, 0x05, + 0x51, 0x11, 0x04, 0xc1, 0x57, 0xba, 0x0f, 0xc1, 0x57, 0xca, 0x12, 0xc1, + 0x57, 0xd4, 0x06, 0xc1, 0x57, 0xe4, 0x16, 0xc1, 0x57, 0xf4, 0x0c, 0xc1, + 0x57, 0xfe, 0x42, 0x11, 0xee, 0xc1, 0x58, 0x08, 0x97, 0x05, 0x50, 0x11, + 0x8b, 0x05, 0x50, 0x08, 0xcc, 0x86, 0x19, 0x05, 0x52, 0xf9, 0x06, 0xc1, + 0x58, 0x12, 0xc6, 0x99, 0x4e, 0x05, 0x52, 0xe0, 0xc4, 0x26, 0x78, 0x05, + 0x52, 0xc9, 0xc5, 0x06, 0xdb, 0x05, 0x52, 0xc1, 0x15, 0xc1, 0x58, 0x1e, + 0x08, 0xc1, 0x58, 0x2a, 0x16, 0xc1, 0x58, 0x36, 0xc4, 0x15, 0xe7, 0x05, + 0x52, 0x81, 0xc3, 0x05, 0x14, 0x05, 0x52, 0x88, 0xc3, 0x05, 0x14, 0x08, + 0x7e, 0x2b, 0x01, 0x58, 0x42, 0x16, 0xc1, 0x58, 0x48, 0xc4, 0x09, 0x9d, + 0x08, 0x7e, 0x40, 0xc3, 0xb5, 0x3e, 0x08, 0x7e, 0x21, 0x15, 0xc1, 0x58, + 0x58, 0xc4, 0xe0, 0xe7, 0x08, 0x7d, 0xd9, 0xc4, 0x4a, 0xb9, 0x08, 0x7d, + 0xd1, 0xc2, 0x01, 0x7f, 0x08, 0x7d, 0xab, 0x01, 0x58, 0x6a, 0xc5, 0x4a, + 0xb3, 0x08, 0x7d, 0xc1, 0xca, 0xa5, 0x26, 0x08, 0x7d, 0xb9, 0xc3, 0x7e, + 0x89, 0x08, 0x7d, 0xb1, 0xc6, 0x40, 0x9a, 0x08, 0x7d, 0xa1, 0xc5, 0x9c, + 0xa2, 0x08, 0x7d, 0x99, 0xc4, 0xe3, 0x27, 0x08, 0x7d, 0x91, 0x03, 0xc1, + 0x58, 0x70, 0xc6, 0xcf, 0xd7, 0x08, 0x7d, 0xe1, 0xc3, 0x00, 0x4e, 0x08, + 0x7d, 0xe9, 0xc3, 0x20, 0x18, 0x08, 0x7d, 0xf1, 0xc2, 0x00, 0x67, 0x08, + 0x7e, 0x09, 0xc4, 0x5d, 0xe2, 0x08, 0x7e, 0x10, 0xc4, 0x01, 0xc3, 0x01, + 0x3a, 0x61, 0x43, 0x00, 0x55, 0xc1, 0x58, 0x7c, 0x12, 0x41, 0x58, 0x88, + 0xc6, 0xd3, 0xc1, 0x01, 0x34, 0xa1, 0xc5, 0xd4, 0x3e, 0x0f, 0x9c, 0x61, + 0x47, 0x53, 0xfe, 0x41, 0x58, 0x97, 0x51, 0x4f, 0x69, 0xc1, 0x58, 0x9d, + 0x14, 0x41, 0x59, 0x0e, 0x48, 0x5b, 0x32, 0xc1, 0x59, 0x18, 0x10, 0xc1, + 0x59, 0x24, 0x4f, 0x66, 0xcf, 0xc1, 0x59, 0x30, 0x44, 0x31, 0xef, 0x41, + 0x59, 0x3c, 0x0b, 0xc1, 0x59, 0x44, 0x07, 0x41, 0x59, 0x50, 0x43, 0x00, + 0x4a, 0xc1, 0x59, 0x5c, 0x11, 0xc1, 0x59, 0x66, 0x45, 0x0b, 0x12, 0xc1, + 0x59, 0x72, 0x42, 0x00, 0x2d, 0x41, 0x59, 0x7e, 0x43, 0x06, 0xa8, 0xc1, + 0x59, 0x8a, 0xcf, 0x64, 0x0e, 0x00, 0xd5, 0xb0, 0x46, 0x18, 0x54, 0xc1, + 0x59, 0x96, 0xcf, 0x0e, 0x7d, 0x01, 0x06, 0xd9, 0xc4, 0x1e, 0xc9, 0x00, + 0x18, 0x1b, 0x01, 0x59, 0xa8, 0xd1, 0x52, 0x55, 0x00, 0x18, 0x90, 0x11, + 0xc1, 0x59, 0xac, 0x07, 0xc1, 0x59, 0xbc, 0xc8, 0x20, 0xa9, 0x00, 0x18, + 0x42, 0x01, 0x59, 0xc8, 0x49, 0xa8, 0x70, 0xc1, 0x59, 0xd4, 0xd0, 0x5e, + 0xf2, 0x00, 0x1a, 0x38, 0xce, 0x3b, 0x7a, 0x01, 0x06, 0xe1, 0xc6, 0xcf, + 0xef, 0x00, 0x1a, 0x90, 0x49, 0x05, 0xf9, 0xc1, 0x59, 0xf3, 0x48, 0xba, + 0x9a, 0xc1, 0x59, 0xff, 0xd0, 0x08, 0xf7, 0x00, 0x18, 0x13, 0x01, 0x5a, + 0x2b, 0x03, 0xc1, 0x5a, 0x31, 0x11, 0xc1, 0x5a, 0x40, 0xc6, 0xbd, 0xf4, + 0x00, 0x19, 0x38, 0x45, 0x2e, 0xef, 0xc1, 0x5a, 0x4f, 0xce, 0x6c, 0x98, + 0x00, 0xee, 0x19, 0xca, 0xa2, 0x4c, 0x00, 0xee, 0x11, 0x47, 0x25, 0xae, + 0xc1, 0x5a, 0x59, 0x16, 0xc1, 0x5a, 0x65, 0xcc, 0x84, 0x81, 0x00, 0x19, + 0xe0, 0xca, 0xa0, 0x6c, 0x08, 0x99, 0xd9, 0x14, 0x41, 0x5a, 0x6b, 0x4b, + 0x94, 0xe8, 0xc1, 0x5a, 0x7a, 0x50, 0x5c, 0x02, 0x41, 0x5a, 0x86, 0x12, + 0xc1, 0x5a, 0x92, 0xc7, 0x04, 0xed, 0x00, 0xee, 0x91, 0xc7, 0x0a, 0x80, + 0x00, 0xee, 0x88, 0xc7, 0x05, 0x00, 0x00, 0xee, 0x81, 0x10, 0x41, 0x5a, + 0x9e, 0xc5, 0x05, 0x02, 0x00, 0xee, 0x79, 0xc5, 0x00, 0xd4, 0x00, 0x1a, + 0xd8, 0xc5, 0xcc, 0x90, 0x00, 0x19, 0x43, 0x01, 0x5a, 0xaa, 0xce, 0x6d, + 0xf6, 0x00, 0xd5, 0xb9, 0xc7, 0x7d, 0xa5, 0x00, 0x18, 0x29, 0x51, 0x52, + 0x33, 0x41, 0x5a, 0xb0, 0xc5, 0x60, 0xb2, 0x00, 0x18, 0x23, 0x01, 0x5a, + 0xce, 0xcf, 0x68, 0x55, 0x00, 0x19, 0x00, 0x49, 0x60, 0xf4, 0xc1, 0x5a, + 0xd6, 0x03, 0x41, 0x5a, 0xe2, 0xd0, 0x5d, 0xa2, 0x00, 0xd6, 0x31, 0xce, + 0x70, 0xc0, 0x00, 0x1a, 0x50, 0xc8, 0xbb, 0x12, 0x00, 0xd5, 0xa9, 0x00, + 0x41, 0x5a, 0xee, 0xc8, 0x9e, 0x5c, 0x00, 0x18, 0x49, 0xc2, 0x00, 0xc0, + 0x00, 0x18, 0xd9, 0xce, 0x6b, 0xf0, 0x00, 0x1a, 0x58, 0x45, 0x02, 0x6d, + 0xc1, 0x5a, 0xfa, 0xc5, 0x1e, 0xc8, 0x00, 0x19, 0xf0, 0xca, 0x8d, 0xb1, + 0x01, 0x02, 0x91, 0xc2, 0x00, 0xfe, 0x00, 0x02, 0x00, 0x4b, 0x93, 0x04, + 0xc1, 0x5b, 0x06, 0x4b, 0x99, 0xef, 0x41, 0x5b, 0x24, 0xc4, 0xde, 0xbf, + 0x01, 0x19, 0xa9, 0xc4, 0xe3, 0x37, 0x01, 0x19, 0xa0, 0x45, 0x00, 0x8c, + 0xc1, 0x5b, 0x42, 0x43, 0x54, 0xfc, 0x41, 0x5b, 0x54, 0xc5, 0xdc, 0x86, + 0x0f, 0x9c, 0xd9, 0xd3, 0x42, 0x09, 0x00, 0x04, 0xd8, 0xc6, 0x0e, 0xbd, + 0x01, 0x12, 0xa1, 0xc4, 0x00, 0xba, 0x01, 0x05, 0x08, 0x4c, 0x29, 0xba, + 0xc1, 0x5b, 0x63, 0x46, 0x10, 0x79, 0x41, 0x5b, 0xd0, 0x4e, 0x0b, 0x18, + 0xc1, 0x5b, 0xea, 0x49, 0x29, 0x29, 0x41, 0x5c, 0x57, 0xce, 0x74, 0x4e, + 0x08, 0x17, 0x01, 0x46, 0x09, 0x97, 0xc1, 0x5c, 0x63, 0x47, 0x34, 0x2f, + 0x41, 0x5c, 0x81, 0xc9, 0x11, 0xf6, 0x01, 0x67, 0xc9, 0xd4, 0x2f, 0xe2, + 0x01, 0x67, 0xd1, 0xd6, 0x2f, 0xe0, 0x01, 0x67, 0xd9, 0xcd, 0x4b, 0xac, + 0x01, 0x67, 0xe0, 0xd0, 0x53, 0xaa, 0x01, 0x67, 0xe9, 0xc8, 0x11, 0xf7, + 0x01, 0x67, 0xf0, 0xcd, 0x80, 0x02, 0x0f, 0xa8, 0x81, 0x4d, 0x7f, 0x32, + 0xc1, 0x5c, 0x9f, 0xc4, 0xe3, 0x33, 0x0f, 0xa6, 0xa9, 0x17, 0xc1, 0x5c, + 0xab, 0xd8, 0x24, 0xfb, 0x01, 0x52, 0x69, 0x42, 0x06, 0x62, 0x41, 0x5c, + 0xba, 0xd3, 0x41, 0x97, 0x01, 0x3f, 0x99, 0x05, 0xc1, 0x5c, 0xcc, 0xc8, + 0x1e, 0x3f, 0x01, 0x11, 0x89, 0xd1, 0x05, 0x75, 0x01, 0x0d, 0xd9, 0x16, + 0xc1, 0x5c, 0xd8, 0x45, 0x00, 0x2c, 0xc1, 0x5c, 0xe4, 0x48, 0x03, 0xc8, + 0x41, 0x5c, 0xf0, 0x16, 0xc1, 0x5c, 0xf6, 0x07, 0xc1, 0x5d, 0x06, 0x44, + 0x26, 0x78, 0xc1, 0x5d, 0x12, 0x15, 0xc1, 0x5d, 0x1e, 0x08, 0xc1, 0x5d, + 0x2a, 0x43, 0x05, 0x14, 0x41, 0x5d, 0x36, 0xc9, 0xad, 0xe3, 0x0f, 0x99, + 0x49, 0xc4, 0x2a, 0x90, 0x0f, 0x99, 0x41, 0xc4, 0x27, 0x54, 0x0f, 0x99, + 0x39, 0xc7, 0xc2, 0x34, 0x0f, 0x99, 0x50, 0x05, 0xc1, 0x5d, 0x42, 0x0a, + 0xc1, 0x5d, 0x56, 0xde, 0x0f, 0x7c, 0x01, 0x3a, 0x11, 0x19, 0xc1, 0x5d, + 0x6e, 0x06, 0xc1, 0x5d, 0x78, 0x0e, 0xc1, 0x5d, 0x86, 0x47, 0x34, 0x2f, + 0xc1, 0x5d, 0x92, 0x16, 0xc1, 0x5d, 0xa8, 0xc6, 0x0e, 0xbd, 0x01, 0x14, + 0xe1, 0x03, 0xc1, 0x5d, 0xb7, 0x14, 0xc1, 0x5d, 0xc3, 0x0f, 0xc1, 0x5d, + 0xcf, 0x12, 0xc1, 0x5d, 0xdb, 0x0b, 0xc1, 0x5d, 0xf3, 0xcc, 0x07, 0xc7, + 0x01, 0x4e, 0x09, 0x04, 0xc1, 0x5e, 0x05, 0xcc, 0x07, 0xbb, 0x01, 0x4d, + 0xb1, 0x9a, 0x01, 0x5d, 0xf1, 0xcf, 0x69, 0xcc, 0x0f, 0x88, 0x69, 0xc6, + 0x0b, 0x09, 0x0f, 0xbe, 0xb9, 0x0d, 0x41, 0x5e, 0x11, 0x45, 0x00, 0x8c, + 0xc1, 0x5e, 0x1d, 0x5e, 0x0e, 0xe6, 0x41, 0x5e, 0x47, 0x97, 0x09, 0x1b, + 0x53, 0x01, 0x5e, 0x4d, 0x83, 0x09, 0x1a, 0xeb, 0x01, 0x5e, 0x64, 0x8b, + 0x09, 0x1b, 0x1b, 0x01, 0x5e, 0x76, 0xc2, 0x8d, 0xc6, 0x09, 0x1b, 0x10, + 0x94, 0x09, 0x19, 0x43, 0x01, 0x5e, 0x91, 0x00, 0xc1, 0x5e, 0xae, 0x8f, + 0x09, 0x18, 0xeb, 0x01, 0x5e, 0xc1, 0x1c, 0xc1, 0x5e, 0xd6, 0xc4, 0xde, + 0x97, 0x09, 0x1a, 0xc9, 0xc2, 0x01, 0xe2, 0x09, 0x1a, 0x8b, 0x01, 0x5e, + 0xe1, 0x90, 0x09, 0x19, 0x33, 0x01, 0x5e, 0xf5, 0x86, 0x09, 0x18, 0x9b, + 0x01, 0x5e, 0xfb, 0x84, 0x09, 0x18, 0x91, 0x9f, 0x09, 0x18, 0x88, 0x97, + 0x09, 0x18, 0x2b, 0x01, 0x5f, 0x05, 0x83, 0x09, 0x17, 0x5b, 0x01, 0x5f, + 0x1d, 0x8b, 0x09, 0x17, 0xf3, 0x01, 0x5f, 0x3c, 0x87, 0x09, 0x17, 0xe2, + 0x01, 0x5f, 0x51, 0x8b, 0x09, 0x16, 0xdb, 0x01, 0x5f, 0x57, 0x0a, 0xc1, + 0x5f, 0x6e, 0x83, 0x09, 0x14, 0x9b, 0x01, 0x5f, 0x87, 0x97, 0x09, 0x17, + 0x12, 0x01, 0x5f, 0x9f, 0x8b, 0x09, 0x12, 0x63, 0x01, 0x5f, 0xc0, 0x97, + 0x09, 0x13, 0x0b, 0x01, 0x5f, 0xde, 0x83, 0x09, 0x11, 0xf3, 0x01, 0x5f, + 0xee, 0x87, 0x09, 0x12, 0x42, 0x01, 0x60, 0x06, 0x97, 0x09, 0x11, 0x63, + 0x01, 0x60, 0x0a, 0x8b, 0x09, 0x11, 0x53, 0x01, 0x60, 0x2c, 0x87, 0x09, + 0x11, 0x43, 0x01, 0x60, 0x36, 0x83, 0x09, 0x11, 0x02, 0x01, 0x60, 0x3d, + 0x97, 0x09, 0x0f, 0xdb, 0x01, 0x60, 0x56, 0x83, 0x09, 0x0d, 0xbb, 0x01, + 0x60, 0x7f, 0x8b, 0x09, 0x0f, 0xba, 0x01, 0x60, 0x9f, 0x83, 0x09, 0x0a, + 0xbb, 0x01, 0x60, 0xaf, 0xc5, 0xd5, 0xf6, 0x09, 0x0d, 0xb1, 0x97, 0x09, + 0x0d, 0x53, 0x01, 0x60, 0xe5, 0x8b, 0x09, 0x0d, 0x03, 0x01, 0x61, 0x12, + 0xc4, 0x73, 0x32, 0x09, 0x0c, 0xf8, 0x8b, 0x09, 0x09, 0x6b, 0x01, 0x61, + 0x24, 0x83, 0x09, 0x09, 0x4b, 0x01, 0x61, 0x2a, 0x97, 0x09, 0x09, 0xba, + 0x01, 0x61, 0x32, 0x97, 0x09, 0x08, 0xb3, 0x01, 0x61, 0x47, 0x8b, 0x09, + 0x08, 0x03, 0x01, 0x61, 0x6d, 0x07, 0xc1, 0x61, 0x8a, 0x83, 0x09, 0x05, + 0xaa, 0x01, 0x61, 0x99, 0xc3, 0x0a, 0xe2, 0x09, 0x05, 0x0b, 0x01, 0x61, + 0xd5, 0xc3, 0x05, 0x4e, 0x09, 0x05, 0x03, 0x01, 0x61, 0xd9, 0x14, 0xc1, + 0x61, 0xdf, 0x9f, 0x09, 0x04, 0x6b, 0x01, 0x61, 0xee, 0x90, 0x09, 0x04, + 0xbb, 0x01, 0x61, 0xf4, 0x8e, 0x09, 0x04, 0xb1, 0xc3, 0xe0, 0x5f, 0x09, + 0x04, 0xa9, 0xc3, 0x03, 0x30, 0x09, 0x04, 0xa1, 0x00, 0x41, 0x61, 0xf8, + 0x97, 0x09, 0x03, 0xd3, 0x01, 0x62, 0x04, 0x8b, 0x09, 0x03, 0x93, 0x01, + 0x62, 0x27, 0x83, 0x09, 0x02, 0xaa, 0x01, 0x62, 0x42, 0x97, 0x09, 0x02, + 0x6b, 0x01, 0x62, 0x5a, 0x83, 0x09, 0x02, 0x03, 0x01, 0x62, 0x6e, 0x8b, + 0x09, 0x02, 0x4a, 0x01, 0x62, 0x92, 0x86, 0x09, 0x00, 0xe3, 0x01, 0x62, + 0x98, 0x84, 0x09, 0x00, 0x53, 0x01, 0x62, 0x9e, 0xc3, 0x01, 0xc3, 0x09, + 0x01, 0x5b, 0x01, 0x62, 0xa9, 0x15, 0xc1, 0x62, 0xaf, 0x14, 0xc1, 0x62, + 0xbc, 0xc3, 0x0e, 0x61, 0x09, 0x01, 0x99, 0x90, 0x09, 0x01, 0x6b, 0x01, + 0x62, 0xcb, 0x8e, 0x09, 0x01, 0x03, 0x01, 0x62, 0xd5, 0x8d, 0x09, 0x00, + 0xeb, 0x01, 0x62, 0xe7, 0x9f, 0x09, 0x00, 0x49, 0x47, 0x03, 0x4c, 0x41, + 0x62, 0xed, 0x8b, 0x09, 0x13, 0xfb, 0x01, 0x63, 0x1b, 0xc4, 0x73, 0x32, + 0x09, 0x13, 0xf3, 0x01, 0x63, 0x23, 0x83, 0x09, 0x13, 0xd2, 0x01, 0x63, + 0x29, 0x97, 0x09, 0x14, 0x91, 0x8b, 0x09, 0x14, 0x89, 0x83, 0x09, 0x14, + 0x7a, 0x01, 0x63, 0x35, 0xc2, 0x01, 0xe2, 0x09, 0x0a, 0xb1, 0x94, 0x09, + 0x0a, 0xa9, 0x90, 0x09, 0x0a, 0xa1, 0x8f, 0x09, 0x0a, 0x73, 0x01, 0x63, + 0x39, 0x8e, 0x09, 0x0a, 0x5b, 0x01, 0x63, 0x43, 0x89, 0x09, 0x0a, 0x2b, + 0x01, 0x63, 0x4d, 0xc3, 0x7e, 0x08, 0x09, 0x0a, 0x13, 0x01, 0x63, 0x54, + 0x84, 0x09, 0x0a, 0x09, 0xc2, 0x00, 0xd3, 0x09, 0x0a, 0x00, 0xc9, 0xa8, + 0xd3, 0x09, 0x23, 0xa1, 0xc8, 0xbd, 0xe2, 0x09, 0x23, 0x99, 0xc5, 0x33, + 0x24, 0x09, 0x23, 0x90, 0x43, 0x02, 0x6f, 0xc1, 0x63, 0x5a, 0x44, 0xe0, + 0x57, 0x41, 0x63, 0x82, 0x45, 0x00, 0x2d, 0xc1, 0x63, 0x8e, 0x47, 0xc0, + 0x43, 0x41, 0x63, 0xb6, 0x45, 0x1b, 0xec, 0xc1, 0x63, 0xc6, 0x43, 0x4d, + 0x57, 0xc1, 0x63, 0xeb, 0x54, 0x38, 0x68, 0x41, 0x64, 0x13, 0x44, 0x0d, + 0x14, 0xc1, 0x64, 0x1f, 0x44, 0x09, 0x9e, 0x41, 0x64, 0x43, 0x43, 0x02, + 0x6f, 0xc1, 0x64, 0x72, 0x50, 0x5b, 0x82, 0x41, 0x64, 0x98, 0x43, 0x02, + 0xa0, 0xc1, 0x64, 0xa4, 0x45, 0x02, 0xde, 0x41, 0x64, 0xc9, 0x42, 0x01, + 0xc8, 0xc1, 0x64, 0xee, 0xd1, 0x57, 0x2e, 0x01, 0x1d, 0x50, 0xc8, 0xb7, + 0x32, 0x0f, 0xa5, 0x89, 0xc4, 0x00, 0xba, 0x00, 0x05, 0x20, 0xc8, 0x7d, + 0xa4, 0x07, 0xf2, 0x51, 0xc8, 0x80, 0x2e, 0x07, 0xf2, 0x70, 0x9f, 0x09, + 0x7f, 0x91, 0x9e, 0x09, 0x7f, 0x88, 0x1e, 0xc1, 0x64, 0xfa, 0x1d, 0x41, + 0x65, 0x06, 0x26, 0xc1, 0x65, 0x2a, 0x25, 0xc1, 0x65, 0x4e, 0x24, 0xc1, + 0x65, 0x76, 0x23, 0xc1, 0x65, 0x9d, 0x22, 0xc1, 0x65, 0xc1, 0x21, 0xc1, + 0x65, 0xe5, 0x20, 0xc1, 0x65, 0xfd, 0x1f, 0xc1, 0x66, 0x1d, 0x1e, 0xc1, + 0x66, 0x3d, 0x1d, 0x41, 0x66, 0x5c, 0x87, 0x08, 0x41, 0x99, 0x8b, 0x08, + 0x41, 0xa1, 0x91, 0x08, 0x41, 0xa9, 0x83, 0x08, 0x41, 0x90, 0x83, 0x08, + 0x41, 0xb9, 0x87, 0x08, 0x41, 0xc0, 0x83, 0x08, 0x41, 0xe1, 0x91, 0x08, + 0x41, 0xf8, 0x83, 0x08, 0x40, 0x29, 0x91, 0x08, 0x40, 0x40, 0x83, 0x08, + 0x40, 0x51, 0x87, 0x08, 0x40, 0x59, 0x8b, 0x08, 0x40, 0x61, 0x91, 0x08, + 0x40, 0x69, 0x97, 0x08, 0x40, 0x70, 0x83, 0x08, 0x40, 0x79, 0x87, 0x08, + 0x40, 0x81, 0x8b, 0x08, 0x40, 0x89, 0x91, 0x08, 0x40, 0x91, 0x97, 0x08, + 0x40, 0x98, 0x83, 0x08, 0x40, 0xa1, 0x87, 0x08, 0x40, 0xa9, 0x8b, 0x08, + 0x40, 0xb1, 0x91, 0x08, 0x40, 0xb9, 0x97, 0x08, 0x40, 0xc0, 0x83, 0x08, + 0x40, 0xc9, 0x87, 0x08, 0x40, 0xd1, 0x8b, 0x08, 0x40, 0xd9, 0x91, 0x08, + 0x40, 0xe1, 0x97, 0x08, 0x40, 0xe8, 0x83, 0x08, 0x40, 0xf1, 0x87, 0x08, + 0x40, 0xf9, 0x8b, 0x08, 0x41, 0x01, 0x91, 0x08, 0x41, 0x09, 0x97, 0x08, + 0x41, 0x10, 0x83, 0x08, 0x41, 0x19, 0x87, 0x08, 0x41, 0x21, 0x8b, 0x08, + 0x41, 0x29, 0x91, 0x08, 0x41, 0x31, 0x97, 0x08, 0x41, 0x38, 0x83, 0x08, + 0x41, 0x41, 0x87, 0x08, 0x41, 0x49, 0x8b, 0x08, 0x41, 0x51, 0x91, 0x08, + 0x41, 0x59, 0x97, 0x08, 0x41, 0x60, 0x83, 0x08, 0x41, 0x69, 0x87, 0x08, + 0x41, 0x71, 0x8b, 0x08, 0x41, 0x79, 0x91, 0x08, 0x41, 0x81, 0x97, 0x08, + 0x41, 0x88, 0x97, 0x00, 0x22, 0x1b, 0x01, 0x66, 0x7c, 0x16, 0xc1, 0x66, + 0x8f, 0x19, 0xc1, 0x66, 0xb2, 0x10, 0xc1, 0x66, 0xbc, 0x0e, 0xc1, 0x66, + 0xce, 0x14, 0xc1, 0x66, 0xe6, 0x87, 0x00, 0x22, 0x6b, 0x01, 0x66, 0xf8, + 0x06, 0xc1, 0x67, 0x25, 0x15, 0xc1, 0x67, 0x48, 0x12, 0xc1, 0x67, 0x6a, + 0x83, 0x00, 0x21, 0x83, 0x01, 0x67, 0x7d, 0xc2, 0x0f, 0x9a, 0x00, 0x28, + 0xd9, 0x1b, 0xc1, 0x67, 0x8f, 0x0d, 0xc1, 0x67, 0xab, 0x0a, 0xc1, 0x67, + 0xc8, 0x09, 0xc1, 0x67, 0xd5, 0x04, 0xc1, 0x67, 0xe4, 0x91, 0x00, 0x21, + 0xf3, 0x01, 0x68, 0x02, 0x8b, 0x00, 0x21, 0xc3, 0x01, 0x68, 0x15, 0x1c, + 0xc1, 0x68, 0x32, 0x05, 0xc1, 0x68, 0x3d, 0x44, 0x13, 0x35, 0xc1, 0x68, + 0x58, 0xc2, 0x00, 0x5f, 0x00, 0x21, 0x91, 0xc2, 0x1c, 0x52, 0x00, 0x22, + 0xc1, 0xc4, 0xe0, 0x1b, 0x00, 0x23, 0x98, 0xc4, 0xe2, 0x37, 0x00, 0x26, + 0xa9, 0xc6, 0xcf, 0xe9, 0x00, 0x25, 0xa9, 0xc6, 0xce, 0xb7, 0x00, 0x25, + 0x28, 0x87, 0x00, 0x21, 0x6b, 0x01, 0x68, 0x64, 0x06, 0xc1, 0x68, 0x91, + 0x15, 0xc1, 0x68, 0xb4, 0x12, 0xc1, 0x68, 0xd6, 0x83, 0x00, 0x20, 0x83, + 0x01, 0x68, 0xe3, 0xc2, 0x00, 0x28, 0x00, 0x28, 0xe1, 0xc2, 0x0f, 0x9a, + 0x00, 0x28, 0xd1, 0x1b, 0xc1, 0x68, 0xf5, 0x14, 0xc1, 0x69, 0x11, 0x0e, + 0xc1, 0x69, 0x23, 0x0d, 0xc1, 0x69, 0x35, 0x0a, 0xc1, 0x69, 0x52, 0x09, + 0xc1, 0x69, 0x5f, 0x05, 0xc1, 0x69, 0x6e, 0x97, 0x00, 0x21, 0x1b, 0x01, + 0x69, 0x89, 0x04, 0xc1, 0x69, 0x96, 0x91, 0x00, 0x20, 0xf3, 0x01, 0x69, + 0xb4, 0x8b, 0x00, 0x20, 0xc3, 0x01, 0x69, 0xc7, 0x1c, 0xc1, 0x69, 0xe4, + 0x16, 0xc1, 0x69, 0xef, 0xc2, 0x1c, 0x52, 0x00, 0x20, 0x41, 0x10, 0xc1, + 0x6a, 0x06, 0xc2, 0x00, 0x5f, 0x00, 0x20, 0x91, 0x44, 0x13, 0x35, 0xc1, + 0x6a, 0x12, 0xc4, 0xe0, 0x1b, 0x00, 0x23, 0x90, 0xc4, 0xe2, 0x37, 0x00, + 0x26, 0xa1, 0xc6, 0xcf, 0xe9, 0x00, 0x25, 0xa1, 0xc6, 0xce, 0xb7, 0x00, + 0x25, 0x20, 0xc2, 0x02, 0xa0, 0x0f, 0xdf, 0x91, 0xc4, 0x02, 0xde, 0x0f, + 0xdf, 0x98, 0xc3, 0x09, 0x9e, 0x0f, 0xdf, 0xa1, 0xc3, 0x0d, 0x14, 0x0f, + 0xdf, 0xa8, 0xc2, 0x22, 0xcc, 0x0f, 0xdf, 0xb1, 0xc4, 0x18, 0x10, 0x0f, + 0xdf, 0xb8, 0xa0, 0x00, 0x04, 0x79, 0x9f, 0x00, 0x04, 0x70, 0x47, 0xc2, + 0x50, 0xc1, 0x6a, 0x1e, 0x43, 0x00, 0x2c, 0xc1, 0x6a, 0x2a, 0x0e, 0xc1, + 0x6a, 0x30, 0xde, 0x0f, 0xb8, 0x01, 0x00, 0xd9, 0xd4, 0x3e, 0xd0, 0x00, + 0x04, 0xd0, 0x47, 0x34, 0x2f, 0xc1, 0x6a, 0x3a, 0x46, 0x09, 0x97, 0x41, + 0x6a, 0x58, 0xcb, 0x1e, 0x89, 0x00, 0x6c, 0x09, 0x03, 0xc1, 0x6a, 0x76, + 0xc9, 0xb2, 0x24, 0x00, 0x6c, 0x18, 0x46, 0x02, 0x0f, 0xc1, 0x6a, 0x82, + 0x4a, 0x9d, 0xec, 0x41, 0x6a, 0xd0, 0xca, 0x63, 0xc8, 0x00, 0x6e, 0x79, + 0x0d, 0xc1, 0x6a, 0xf4, 0x45, 0x63, 0xc3, 0xc1, 0x6b, 0x00, 0x42, 0x01, + 0x30, 0x41, 0x6b, 0x1e, 0x47, 0x01, 0xbb, 0xc1, 0x6b, 0x2a, 0x43, 0x46, + 0xac, 0x41, 0x6b, 0x34, 0x0b, 0xc1, 0x6b, 0x46, 0xc8, 0x11, 0xf7, 0x0e, + 0xd4, 0x41, 0x0e, 0xc1, 0x6b, 0x52, 0x48, 0xb8, 0x0a, 0xc1, 0x6b, 0x5e, + 0x5c, 0x12, 0x39, 0x41, 0x6b, 0x70, 0x11, 0xc1, 0x6b, 0x7f, 0x46, 0x94, + 0x69, 0x41, 0x6b, 0x8b, 0xc8, 0x52, 0x00, 0x0e, 0xd4, 0x49, 0x48, 0x18, + 0xb0, 0xc1, 0x6b, 0x9d, 0x47, 0xc0, 0x12, 0xc1, 0x6b, 0xa9, 0x47, 0xc6, + 0xe8, 0xc1, 0x6b, 0xb9, 0x46, 0xd0, 0xb5, 0x41, 0x6b, 0xc5, 0x47, 0x7f, + 0x5a, 0xc1, 0x6b, 0xd7, 0x0b, 0x41, 0x6b, 0xdf, 0xe0, 0x00, 0x67, 0x0e, + 0xd3, 0xa8, 0x11, 0xc1, 0x6b, 0xe9, 0x07, 0xc1, 0x6b, 0xfb, 0x46, 0xcd, + 0x13, 0x41, 0x6c, 0x0a, 0xc9, 0xaa, 0xb9, 0x0e, 0xd3, 0x61, 0xc3, 0x10, + 0xa1, 0x0e, 0xd1, 0x81, 0x42, 0x0c, 0x43, 0x41, 0x6c, 0x16, 0x03, 0xc1, + 0x6c, 0x32, 0xc3, 0x01, 0x9c, 0x0e, 0xcf, 0xfa, 0x01, 0x6c, 0x3e, 0xc3, + 0x6b, 0x04, 0x0e, 0xd3, 0x51, 0x44, 0x12, 0x51, 0x41, 0x6c, 0x42, 0x47, + 0xc3, 0xdf, 0xc1, 0x6c, 0x52, 0x44, 0x1a, 0x39, 0x41, 0x6c, 0x6a, 0x45, + 0xdb, 0x37, 0xc1, 0x6c, 0x9e, 0x44, 0xdc, 0x0a, 0x41, 0x6c, 0xaa, 0x44, + 0xcf, 0x23, 0xc1, 0x6c, 0xbc, 0x44, 0x87, 0x15, 0x41, 0x6c, 0xc8, 0x4f, + 0x61, 0xa7, 0xc1, 0x6c, 0xd4, 0x47, 0xc6, 0x55, 0x41, 0x6c, 0xe6, 0xc7, + 0x0b, 0xc8, 0x0e, 0xc8, 0x51, 0xc8, 0x3b, 0xec, 0x0e, 0xc8, 0x49, 0xc6, + 0x24, 0x3b, 0x0e, 0xc8, 0x40, 0xca, 0x22, 0x51, 0x01, 0x39, 0xb1, 0xd4, + 0x3e, 0xbc, 0x0f, 0xa9, 0x79, 0xcd, 0x0e, 0x61, 0x0f, 0xbe, 0x68, 0x03, + 0xc1, 0x6d, 0x0e, 0x91, 0x08, 0xad, 0xd1, 0x87, 0x08, 0xad, 0xc1, 0xc9, + 0xb2, 0x2d, 0x08, 0xad, 0xa3, 0x01, 0x6d, 0x23, 0x97, 0x08, 0xad, 0x93, + 0x01, 0x6d, 0x27, 0x8b, 0x08, 0xad, 0x82, 0x01, 0x6d, 0x2b, 0x83, 0x08, + 0xac, 0x03, 0x01, 0x6d, 0x2f, 0x16, 0xc1, 0x6d, 0x41, 0xc2, 0x00, 0xd0, + 0x08, 0xad, 0x71, 0x15, 0xc1, 0x6d, 0x56, 0x18, 0xc1, 0x6d, 0x66, 0xc2, + 0x00, 0xdb, 0x08, 0xad, 0x49, 0xc2, 0x00, 0x39, 0x08, 0xad, 0x41, 0xc2, + 0x19, 0x2c, 0x08, 0xad, 0x39, 0xc2, 0x01, 0xc3, 0x08, 0xad, 0x31, 0x04, + 0xc1, 0x6d, 0x70, 0x12, 0xc1, 0x6d, 0x7a, 0x10, 0xc1, 0x6d, 0x84, 0x06, + 0xc1, 0x6d, 0x9a, 0x0c, 0xc1, 0x6d, 0xa8, 0x05, 0xc1, 0x6d, 0xb2, 0x09, + 0xc1, 0x6d, 0xbc, 0x0d, 0xc1, 0x6d, 0xc6, 0x91, 0x08, 0xac, 0x61, 0x87, + 0x08, 0xac, 0x51, 0x97, 0x08, 0xac, 0x23, 0x01, 0x6d, 0xd0, 0x8b, 0x08, + 0xac, 0x12, 0x01, 0x6d, 0xd4, 0x07, 0xc1, 0x6d, 0xd8, 0x44, 0x00, 0xbb, + 0x41, 0x6d, 0xe4, 0xa0, 0x08, 0xae, 0x41, 0x9f, 0x08, 0xae, 0x39, 0x9e, + 0x08, 0xae, 0x30, 0xcb, 0x97, 0xf5, 0x08, 0xae, 0x19, 0xc4, 0x19, 0x53, + 0x08, 0xae, 0x10, 0xd3, 0x41, 0x25, 0x0f, 0xad, 0x09, 0xd1, 0x53, 0x10, + 0x0f, 0xad, 0x01, 0xd4, 0x06, 0x73, 0x0f, 0xac, 0xd9, 0xd3, 0x43, 0x13, + 0x0f, 0xac, 0xd0, 0xd3, 0x41, 0x25, 0x0f, 0xac, 0xf9, 0xd1, 0x53, 0x10, + 0x0f, 0xac, 0xf1, 0xd4, 0x06, 0x73, 0x0f, 0xac, 0xc9, 0xd3, 0x43, 0x13, + 0x0f, 0xac, 0xc0, 0x11, 0xc1, 0x6e, 0x02, 0xcc, 0x86, 0x85, 0x01, 0x31, + 0x51, 0xc6, 0x0e, 0xbd, 0x01, 0x12, 0xd9, 0x45, 0x00, 0x8c, 0x41, 0x6e, + 0x0e, 0xc4, 0x27, 0xe3, 0x00, 0x00, 0x11, 0xc7, 0xc3, 0x92, 0x00, 0x00, + 0x09, 0x15, 0xc1, 0x6e, 0x1a, 0xce, 0x6d, 0x94, 0x00, 0x04, 0xb1, 0xcc, + 0x87, 0xc9, 0x00, 0x04, 0xb0, 0xc4, 0x1d, 0xa8, 0x01, 0x1f, 0x21, 0xc6, + 0xcd, 0xcd, 0x0f, 0xa6, 0x78, 0xcb, 0x99, 0x55, 0x0f, 0xde, 0x31, 0xc5, + 0x21, 0xd2, 0x0f, 0xde, 0x48, 0xc4, 0x00, 0x49, 0x0f, 0xde, 0x39, 0xc5, + 0x00, 0x2c, 0x0f, 0xde, 0x40, 0xcb, 0x1e, 0x89, 0x05, 0x46, 0x29, 0x42, + 0x07, 0xb2, 0xc1, 0x6e, 0x26, 0xc8, 0x14, 0x38, 0x05, 0x44, 0x00, 0x03, + 0xc1, 0x6e, 0x32, 0x91, 0x05, 0x46, 0x0b, 0x01, 0x6e, 0x3e, 0x87, 0x05, + 0x45, 0xf3, 0x01, 0x6e, 0x42, 0x48, 0xb2, 0x2d, 0xc1, 0x6e, 0x46, 0x8b, + 0x05, 0x45, 0xb3, 0x01, 0x6e, 0x54, 0x97, 0x05, 0x45, 0xc2, 0x01, 0x6e, + 0x58, 0x15, 0xc1, 0x6e, 0x5c, 0xc2, 0x00, 0xd0, 0x05, 0x45, 0x91, 0x0e, + 0xc1, 0x6e, 0x6c, 0x83, 0x05, 0x44, 0x13, 0x01, 0x6e, 0x76, 0x8b, 0x05, + 0x44, 0x23, 0x01, 0x6e, 0x82, 0x97, 0x05, 0x44, 0x33, 0x01, 0x6e, 0x86, + 0x18, 0xc1, 0x6e, 0x8a, 0x87, 0x05, 0x44, 0x63, 0x01, 0x6e, 0x94, 0x91, + 0x05, 0x44, 0x7b, 0x01, 0x6e, 0x98, 0x0d, 0xc1, 0x6e, 0x9c, 0x09, 0xc1, + 0x6e, 0xa6, 0x10, 0xc1, 0x6e, 0xb0, 0x05, 0xc1, 0x6e, 0xc6, 0x0c, 0xc1, + 0x6e, 0xd0, 0x16, 0xc1, 0x6e, 0xda, 0x06, 0xc1, 0x6e, 0xe8, 0x12, 0xc1, + 0x6e, 0xf6, 0x04, 0xc1, 0x6f, 0x00, 0xc2, 0x01, 0xc3, 0x05, 0x45, 0x51, + 0xc2, 0x19, 0x2c, 0x05, 0x45, 0x59, 0xc2, 0x00, 0x39, 0x05, 0x45, 0x60, + 0xc4, 0x19, 0x53, 0x05, 0x46, 0x71, 0xcb, 0x97, 0xf5, 0x05, 0x46, 0x79, + 0x45, 0x09, 0x98, 0x41, 0x6f, 0x0a, 0x47, 0x00, 0x58, 0xc1, 0x6f, 0x2e, + 0x48, 0xb9, 0x02, 0x41, 0x6f, 0x3a, 0x10, 0xc1, 0x6f, 0x40, 0xc6, 0xcd, + 0x6d, 0x00, 0x41, 0xe1, 0xc5, 0xd7, 0x0e, 0x00, 0x41, 0xa1, 0xc5, 0xd3, + 0xfd, 0x00, 0x41, 0x88, 0xcb, 0x96, 0x5e, 0x00, 0x41, 0xe9, 0xc9, 0xa9, + 0x99, 0x00, 0x41, 0xa8, 0xc3, 0xdd, 0x83, 0x00, 0x41, 0xd1, 0xc4, 0xe1, + 0x73, 0x00, 0x41, 0xc0, 0xc7, 0xc4, 0x33, 0x00, 0x41, 0x69, 0xce, 0x70, + 0x34, 0x00, 0x40, 0xd9, 0xc6, 0x64, 0xa4, 0x00, 0x40, 0xc9, 0xc9, 0xac, + 0x3c, 0x00, 0x40, 0xc1, 0xc2, 0x00, 0x74, 0x00, 0x40, 0xb2, 0x01, 0x6f, + 0x4c, 0x8b, 0x00, 0x41, 0x41, 0xc7, 0xc3, 0x4c, 0x00, 0x41, 0x21, 0xce, + 0x70, 0x34, 0x00, 0x40, 0xd0, 0xc4, 0xdb, 0xfb, 0x00, 0x41, 0x61, 0xc6, + 0xc3, 0x4d, 0x00, 0x41, 0x28, 0xc9, 0xb1, 0x4c, 0x00, 0x41, 0x0a, 0x01, + 0x6f, 0x52, 0x8b, 0x00, 0x41, 0x49, 0x97, 0x00, 0x41, 0x31, 0x83, 0x00, + 0x41, 0x13, 0x01, 0x6f, 0x56, 0x87, 0x00, 0x40, 0xe0, 0x83, 0x00, 0x41, + 0x00, 0xc3, 0xb8, 0xac, 0x00, 0x40, 0xa9, 0xc6, 0xcd, 0x07, 0x00, 0x40, + 0x89, 0xc2, 0x00, 0x8d, 0x00, 0x40, 0x40, 0xc3, 0x00, 0xd0, 0x00, 0x40, + 0xa1, 0xc6, 0xcf, 0x77, 0x00, 0x40, 0x70, 0x90, 0x00, 0x40, 0x79, 0x96, + 0x00, 0x40, 0x39, 0x9b, 0x00, 0x40, 0x20, 0xc2, 0x04, 0xc6, 0x00, 0x40, + 0x29, 0xc2, 0x00, 0x8d, 0x00, 0x40, 0x08, 0xc3, 0x02, 0x9b, 0x01, 0x52, + 0xc1, 0xc2, 0x00, 0xbf, 0x01, 0x52, 0xb8, 0xc6, 0x00, 0x91, 0x0f, 0xa5, + 0x21, 0xc4, 0x00, 0x87, 0x0f, 0xb1, 0xa1, 0xcd, 0x7f, 0x66, 0x0f, 0xb6, + 0x60, 0xc9, 0x00, 0xca, 0x01, 0x54, 0xab, 0x01, 0x6f, 0x5a, 0xcc, 0x07, + 0xc7, 0x01, 0x54, 0xb2, 0x01, 0x6f, 0x60, 0xc9, 0xab, 0x6d, 0x01, 0x5a, + 0xd1, 0xcd, 0x7d, 0x2a, 0x01, 0x5a, 0xe0, 0x15, 0xc1, 0x6f, 0x66, 0xd1, + 0x50, 0x68, 0x08, 0x8e, 0xe9, 0xca, 0x9d, 0x56, 0x08, 0x8e, 0xe1, 0x07, + 0xc1, 0x6f, 0x7c, 0x06, 0xc1, 0x6f, 0x88, 0x46, 0x34, 0x6f, 0xc1, 0x6f, + 0x9a, 0xd1, 0x50, 0xce, 0x08, 0x8e, 0x39, 0xc2, 0x00, 0x7a, 0x08, 0x8e, + 0x21, 0x47, 0x02, 0x0e, 0x41, 0x6f, 0xa6, 0xc4, 0xe3, 0x9f, 0x08, 0x22, + 0x81, 0x16, 0xc1, 0x70, 0x0b, 0xc4, 0xe0, 0xf7, 0x08, 0x22, 0x91, 0xc3, + 0x1b, 0x05, 0x08, 0x22, 0x99, 0x15, 0xc1, 0x70, 0x15, 0xc6, 0xcc, 0x05, + 0x08, 0x22, 0xb9, 0x42, 0x0c, 0x43, 0xc1, 0x70, 0x1f, 0x0a, 0xc1, 0x70, + 0x27, 0xc3, 0xe5, 0xae, 0x08, 0x22, 0xd1, 0xc4, 0xe3, 0x63, 0x08, 0x22, + 0xd9, 0xc3, 0x9e, 0xc8, 0x08, 0x22, 0xe1, 0xc3, 0x34, 0x6f, 0x08, 0x22, + 0xe9, 0xc3, 0xe5, 0x39, 0x08, 0x22, 0xf9, 0x0f, 0xc1, 0x70, 0x33, 0xc5, + 0xdd, 0x4e, 0x08, 0x23, 0x09, 0x42, 0x02, 0xa0, 0xc1, 0x70, 0x3f, 0xc4, + 0xe1, 0x0f, 0x08, 0x23, 0x21, 0x0b, 0xc1, 0x70, 0x49, 0x07, 0xc1, 0x70, + 0x59, 0x03, 0xc1, 0x70, 0x69, 0x11, 0xc1, 0x70, 0x8f, 0xc4, 0xdf, 0x73, + 0x08, 0x23, 0x71, 0xc3, 0x20, 0x18, 0x08, 0x23, 0x79, 0xc2, 0x02, 0xae, + 0x08, 0x23, 0x98, 0xc7, 0xc4, 0x64, 0x0d, 0xe5, 0x19, 0xc9, 0xb3, 0x05, + 0x0d, 0xe5, 0x11, 0xd2, 0x4c, 0x7f, 0x0d, 0xe5, 0x09, 0xce, 0x70, 0x42, + 0x0d, 0xe5, 0x00, 0x46, 0x03, 0x87, 0xc1, 0x70, 0xaf, 0xc9, 0xaf, 0x30, + 0x01, 0x56, 0xf1, 0xc9, 0x32, 0xb7, 0x01, 0x56, 0xfb, 0x01, 0x70, 0xb5, + 0xc7, 0xc4, 0x5d, 0x01, 0x57, 0x03, 0x01, 0x70, 0xbb, 0xd3, 0x46, 0xdc, + 0x01, 0x5a, 0x71, 0x04, 0x41, 0x70, 0xbf, 0x91, 0x01, 0x09, 0xa1, 0x87, + 0x01, 0x09, 0x79, 0x8e, 0x01, 0x08, 0x99, 0x89, 0x01, 0x08, 0x50, 0x8f, + 0x01, 0x09, 0x99, 0x88, 0x01, 0x09, 0x89, 0x87, 0x01, 0x09, 0x81, 0x84, + 0x01, 0x09, 0x61, 0x94, 0x01, 0x08, 0xd9, 0x92, 0x01, 0x08, 0xc1, 0x8e, + 0x01, 0x08, 0x91, 0x8b, 0x01, 0x08, 0x81, 0x8a, 0x01, 0x08, 0x58, 0xd0, + 0x5b, 0xc2, 0x0f, 0xc2, 0xb9, 0xcc, 0x82, 0x35, 0x01, 0x0e, 0xc9, 0xc5, + 0x01, 0xa2, 0x01, 0x0c, 0xcb, 0x01, 0x70, 0xcb, 0x49, 0x01, 0xaa, 0xc1, + 0x70, 0xcf, 0xcb, 0x01, 0xfc, 0x01, 0x58, 0x19, 0xcb, 0x94, 0x22, 0x01, + 0x58, 0x59, 0xd5, 0x01, 0x92, 0x01, 0x5b, 0x4a, 0x01, 0x70, 0xe1, 0xd0, + 0x5b, 0xc2, 0x0f, 0xc2, 0xb1, 0xc5, 0x01, 0xa2, 0x01, 0x0c, 0xc3, 0x01, + 0x70, 0xe7, 0xcc, 0x82, 0x35, 0x01, 0x0e, 0xc1, 0x49, 0x01, 0xaa, 0xc1, + 0x70, 0xeb, 0xcb, 0x01, 0xfc, 0x01, 0x58, 0x11, 0xcb, 0x94, 0x22, 0x01, + 0x58, 0x51, 0xd5, 0x01, 0x92, 0x01, 0x5b, 0x42, 0x01, 0x70, 0xfd, 0xc5, + 0x86, 0x2c, 0x08, 0xd4, 0xf9, 0xcc, 0x86, 0x25, 0x08, 0xd4, 0xf0, 0xc7, + 0x40, 0xe5, 0x08, 0xd4, 0xb9, 0xc8, 0x14, 0x38, 0x08, 0xd4, 0xb1, 0xcb, + 0x93, 0xf6, 0x08, 0xd4, 0x29, 0xcb, 0x8f, 0xe1, 0x08, 0xd4, 0x20, 0x8a, + 0x08, 0xd4, 0x98, 0x89, 0x08, 0xd4, 0x60, 0x83, 0x08, 0xd4, 0x49, 0xc2, + 0x00, 0xd0, 0x08, 0xd4, 0x40, 0xc3, 0x1d, 0x35, 0x08, 0xd4, 0x19, 0xc2, + 0x00, 0xd0, 0x08, 0xd2, 0xe9, 0x83, 0x08, 0xd2, 0xe0, 0x83, 0x08, 0xd4, + 0x09, 0xc2, 0x0d, 0xf6, 0x08, 0xd4, 0x01, 0xc2, 0x00, 0xd0, 0x08, 0xd3, + 0xf8, 0x83, 0x08, 0xd3, 0xc9, 0xc2, 0x00, 0xd0, 0x08, 0xd3, 0xc0, 0xc2, + 0x02, 0x1c, 0x08, 0xd3, 0xb9, 0xc2, 0x00, 0xd0, 0x08, 0xd3, 0x71, 0x83, + 0x08, 0xd3, 0x69, 0x06, 0x41, 0x71, 0x03, 0x15, 0xc1, 0x71, 0x0d, 0xc2, + 0x00, 0xd0, 0x08, 0xd3, 0x61, 0x83, 0x08, 0xd3, 0x59, 0x16, 0x41, 0x71, + 0x17, 0xc2, 0x00, 0xd0, 0x08, 0xd3, 0x99, 0x83, 0x08, 0xd3, 0x90, 0xc2, + 0x00, 0xd0, 0x08, 0xd3, 0x89, 0x83, 0x08, 0xd3, 0x80, 0x83, 0x08, 0xd3, + 0x79, 0xc2, 0x00, 0xc1, 0x08, 0xd3, 0x51, 0xc2, 0x19, 0x2c, 0x08, 0xd3, + 0x29, 0xc2, 0x01, 0x30, 0x08, 0xd3, 0x00, 0xc2, 0x00, 0xd0, 0x08, 0xd3, + 0x21, 0x83, 0x08, 0xd3, 0x18, 0xc2, 0x00, 0xd0, 0x08, 0xd3, 0x11, 0x83, + 0x08, 0xd3, 0x08, 0xc2, 0x00, 0xd0, 0x08, 0xd2, 0xf9, 0x83, 0x08, 0xd2, + 0xf0, 0x48, 0xb2, 0x2d, 0xc1, 0x71, 0x21, 0x03, 0xc1, 0x71, 0x29, 0x91, + 0x08, 0xd2, 0xab, 0x01, 0x71, 0x31, 0x87, 0x08, 0xd2, 0xa1, 0x97, 0x08, + 0xd2, 0x9b, 0x01, 0x71, 0x35, 0x8b, 0x08, 0xd2, 0x88, 0xc4, 0x18, 0x10, + 0x08, 0x87, 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0x87, 0xb0, 0xc3, 0x0d, 0x14, + 0x08, 0x87, 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0x87, 0xa0, 0xc4, 0x02, 0xde, + 0x08, 0x87, 0x99, 0xc2, 0x02, 0xa0, 0x08, 0x87, 0x90, 0x87, 0x08, 0x87, + 0x41, 0x8a, 0x08, 0x86, 0xb0, 0x8a, 0x08, 0x87, 0x39, 0xc2, 0x16, 0x1c, + 0x08, 0x87, 0x18, 0xc3, 0x44, 0x79, 0x08, 0x87, 0x09, 0xc2, 0x02, 0x98, + 0x08, 0x86, 0xc9, 0xc3, 0x40, 0x40, 0x08, 0x86, 0xb8, 0xd1, 0x50, 0x57, + 0x08, 0x7a, 0xc1, 0xcd, 0x7a, 0x52, 0x08, 0x7a, 0xaa, 0x01, 0x71, 0x39, + 0xc8, 0x0d, 0x03, 0x08, 0x7a, 0xa0, 0xc5, 0x28, 0xee, 0x08, 0x7a, 0x99, + 0xc2, 0x00, 0xc4, 0x08, 0x7a, 0x90, 0xc5, 0x05, 0x02, 0x08, 0x7a, 0x69, + 0xc5, 0x00, 0xd4, 0x08, 0x7a, 0x60, 0xc5, 0x05, 0x02, 0x08, 0x7a, 0x59, + 0xc5, 0x00, 0xd4, 0x08, 0x7a, 0x50, 0xc5, 0x00, 0xd4, 0x08, 0x7a, 0x49, + 0xc5, 0x05, 0x02, 0x08, 0x7a, 0x38, 0xc5, 0x00, 0xd4, 0x08, 0x7a, 0x41, + 0xc5, 0x05, 0x02, 0x08, 0x7a, 0x30, 0xc3, 0x26, 0x1a, 0x08, 0x7a, 0x21, + 0xc5, 0xcf, 0xd8, 0x08, 0x79, 0xc8, 0xc3, 0x11, 0xef, 0x08, 0x7a, 0x09, + 0x03, 0x41, 0x71, 0x3f, 0xc3, 0x16, 0x5a, 0x08, 0x79, 0xe9, 0xc4, 0x36, + 0xb5, 0x08, 0x79, 0x80, 0xc2, 0x00, 0x8e, 0x08, 0x79, 0xb0, 0x16, 0xc1, + 0x71, 0x4b, 0x08, 0xc1, 0x71, 0x5d, 0x19, 0xc1, 0x71, 0x65, 0x0e, 0xc1, + 0x71, 0x75, 0x11, 0xc1, 0x71, 0x8b, 0x0b, 0xc1, 0x71, 0xa4, 0x05, 0xc1, + 0x71, 0xb8, 0x14, 0xc1, 0x71, 0xde, 0x0a, 0xc1, 0x71, 0xf9, 0x06, 0xc1, + 0x72, 0x21, 0x12, 0xc1, 0x72, 0x47, 0x07, 0xc1, 0x72, 0x80, 0x03, 0xc1, + 0x72, 0x94, 0xc3, 0xdf, 0x37, 0x01, 0x98, 0x31, 0x0d, 0xc1, 0x72, 0xba, + 0x09, 0xc1, 0x73, 0x1b, 0x15, 0xc1, 0x73, 0x40, 0x10, 0xc1, 0x73, 0x58, + 0x04, 0xc1, 0x73, 0x79, 0x0f, 0xc1, 0x73, 0x99, 0x1b, 0xc1, 0x73, 0xec, + 0xc8, 0xbe, 0xda, 0x01, 0x9e, 0xf0, 0x0e, 0xc1, 0x73, 0xf8, 0x15, 0xc1, + 0x74, 0x02, 0x0d, 0xc1, 0x74, 0x32, 0xcc, 0x83, 0x3d, 0x01, 0x15, 0x09, + 0x16, 0xc1, 0x74, 0x3e, 0x0f, 0xc1, 0x74, 0x4e, 0x12, 0xc1, 0x74, 0x58, + 0x05, 0xc1, 0x74, 0x64, 0x18, 0xc1, 0x74, 0x74, 0x17, 0xc1, 0x74, 0x7e, + 0x0a, 0xc1, 0x74, 0x8a, 0x11, 0xc1, 0x74, 0x9e, 0x08, 0xc1, 0x74, 0xa8, + 0xc7, 0xc4, 0x56, 0x0f, 0x8c, 0xf9, 0x10, 0xc1, 0x74, 0xc0, 0xc2, 0x02, + 0xfb, 0x0f, 0x8c, 0xa1, 0xc8, 0x0a, 0xff, 0x01, 0x4e, 0x31, 0xd5, 0x36, + 0xc5, 0x01, 0x4e, 0x21, 0xc2, 0x15, 0x95, 0x0f, 0x8a, 0x78, 0xc9, 0xb0, + 0xf2, 0x01, 0x20, 0xd3, 0x01, 0x74, 0xca, 0xc4, 0x40, 0x89, 0x01, 0x21, + 0x01, 0xcf, 0x6a, 0x08, 0x01, 0x20, 0xb1, 0x45, 0xa0, 0x21, 0xc1, 0x74, + 0xd0, 0x48, 0x46, 0xa3, 0xc1, 0x74, 0xdc, 0xcf, 0x69, 0x45, 0x01, 0x0a, + 0x78, 0x07, 0xc1, 0x74, 0xe8, 0xcf, 0x61, 0x02, 0x01, 0x20, 0x80, 0x07, + 0xc1, 0x74, 0xf7, 0xc3, 0x11, 0xf7, 0x01, 0x20, 0x00, 0xcd, 0x7d, 0xe0, + 0x01, 0x20, 0xe1, 0xc8, 0xb7, 0xfa, 0x01, 0x20, 0x60, 0xc5, 0x61, 0x0c, + 0x01, 0x20, 0xd9, 0x10, 0x41, 0x75, 0x03, 0xc4, 0x23, 0xca, 0x01, 0x20, + 0xc1, 0xcd, 0x75, 0x58, 0x01, 0x20, 0x68, 0xc8, 0xb8, 0x9a, 0x01, 0x20, + 0x41, 0xc3, 0x08, 0x93, 0x01, 0x20, 0x38, 0x0f, 0xc1, 0x75, 0x0f, 0xc2, + 0x00, 0x67, 0x00, 0x39, 0x33, 0x01, 0x75, 0x1b, 0x16, 0xc1, 0x75, 0x21, + 0x15, 0xc1, 0x75, 0x30, 0x14, 0xc1, 0x75, 0x4e, 0xc4, 0xc0, 0x4b, 0x00, + 0x39, 0x49, 0x87, 0x00, 0x39, 0x29, 0xcd, 0x7e, 0x14, 0x00, 0x39, 0x21, + 0xc3, 0x20, 0x18, 0x00, 0x39, 0x11, 0xc6, 0xd0, 0xcd, 0x00, 0x39, 0x01, + 0xc4, 0xe0, 0xe7, 0x00, 0x38, 0xf9, 0xc4, 0xde, 0xef, 0x00, 0x38, 0xeb, + 0x01, 0x75, 0x5a, 0xc2, 0x01, 0x7f, 0x00, 0x38, 0xbb, 0x01, 0x75, 0x60, + 0xc4, 0x69, 0x81, 0x00, 0x38, 0xc9, 0xc3, 0x7e, 0x89, 0x00, 0x38, 0xc1, + 0x06, 0xc1, 0x75, 0x66, 0xc5, 0xd7, 0x5e, 0x00, 0x38, 0x9b, 0x01, 0x75, + 0x72, 0xc4, 0xe3, 0x27, 0x00, 0x38, 0x91, 0xc5, 0x58, 0x4d, 0x00, 0x38, + 0x80, 0x44, 0x7c, 0x67, 0xc1, 0x75, 0x78, 0x48, 0xbf, 0x2a, 0xc1, 0x75, + 0x82, 0xcf, 0x62, 0xf1, 0x00, 0x38, 0x28, 0xc7, 0x08, 0x6b, 0x00, 0x39, + 0xc9, 0xca, 0x01, 0x68, 0x00, 0x39, 0xc0, 0x45, 0xd8, 0x94, 0xc1, 0x75, + 0x94, 0xc4, 0xde, 0xa7, 0x00, 0x39, 0xf9, 0xc7, 0xc4, 0x2c, 0x00, 0x3a, + 0x10, 0xc6, 0x19, 0x7a, 0x00, 0x39, 0xa9, 0xc5, 0x05, 0x02, 0x00, 0x39, + 0xa1, 0xc5, 0x00, 0xd4, 0x00, 0x39, 0x98, 0xc6, 0x19, 0x7a, 0x00, 0x39, + 0x91, 0xc5, 0x05, 0x02, 0x00, 0x39, 0x89, 0xc5, 0x00, 0xd4, 0x00, 0x39, + 0x80, 0xc9, 0xaf, 0x0c, 0x00, 0x38, 0x51, 0x4b, 0x8f, 0xd6, 0x41, 0x75, + 0xa0, 0x48, 0xbf, 0x02, 0xc1, 0x75, 0xac, 0x4a, 0x9f, 0x22, 0x41, 0x75, + 0xbb, 0xcf, 0x60, 0x12, 0x00, 0x38, 0x01, 0x45, 0x75, 0x81, 0x41, 0x75, + 0xca, 0x51, 0x55, 0x41, 0xc1, 0x75, 0xd6, 0x4a, 0x0e, 0x7d, 0x41, 0x75, + 0xe2, 0xc5, 0x00, 0xd4, 0x00, 0x3a, 0x39, 0xc5, 0x05, 0x02, 0x00, 0x3a, + 0x40, 0x91, 0x05, 0x40, 0x39, 0xc2, 0x01, 0x23, 0x05, 0x40, 0x40, 0x91, + 0x05, 0x40, 0x49, 0xc2, 0x01, 0x23, 0x05, 0x40, 0x50, 0x91, 0x05, 0x40, + 0x61, 0xc2, 0x01, 0x23, 0x05, 0x40, 0x68, 0x16, 0xc1, 0x75, 0xee, 0x91, + 0x05, 0x40, 0xa1, 0xc2, 0x01, 0x23, 0x05, 0x40, 0xa8, 0x06, 0xc1, 0x75, + 0xf8, 0x91, 0x05, 0x40, 0xb1, 0xc2, 0x01, 0x23, 0x05, 0x40, 0xb8, 0x91, + 0x05, 0x40, 0x71, 0xc2, 0x01, 0x23, 0x05, 0x40, 0x78, 0x91, 0x05, 0x40, + 0xc9, 0xc2, 0x01, 0x23, 0x05, 0x40, 0xd0, 0x91, 0x05, 0x40, 0xd9, 0xc2, + 0x01, 0x23, 0x05, 0x40, 0xe0, 0x91, 0x05, 0x40, 0xf1, 0xc2, 0x00, 0x79, + 0x05, 0x41, 0x00, 0xc7, 0x14, 0x39, 0x05, 0x40, 0x59, 0xd0, 0x5a, 0xd2, + 0x05, 0x41, 0x60, 0x46, 0x00, 0x8b, 0x41, 0x76, 0x02, 0x95, 0x01, 0x39, + 0x40, 0xd1, 0x4f, 0xe0, 0x01, 0x3e, 0x49, 0xc2, 0x00, 0x55, 0x01, 0x14, + 0x1b, 0x01, 0x76, 0x14, 0x46, 0x00, 0xd4, 0xc1, 0x76, 0x18, 0x45, 0x00, + 0x8c, 0xc1, 0x76, 0x24, 0x47, 0x13, 0x6d, 0x41, 0x76, 0x36, 0x0e, 0xc1, + 0x76, 0x42, 0xd1, 0x1a, 0x4a, 0x01, 0x03, 0xf1, 0x07, 0xc1, 0x76, 0x4e, + 0xc5, 0x1d, 0x1d, 0x01, 0x03, 0xd9, 0xc9, 0x60, 0xf3, 0x01, 0x03, 0xd1, + 0xc4, 0x26, 0x78, 0x01, 0x03, 0xc9, 0x15, 0xc1, 0x76, 0x5a, 0x08, 0xc1, + 0x76, 0x66, 0xc4, 0x15, 0xe7, 0x01, 0x03, 0x81, 0x16, 0xc1, 0x76, 0x72, + 0xc3, 0x05, 0x14, 0x00, 0x05, 0xc8, 0xca, 0xa1, 0x98, 0x00, 0xe6, 0x39, + 0xca, 0xa4, 0x86, 0x00, 0xe6, 0x31, 0xca, 0x9c, 0x8e, 0x00, 0xe6, 0x29, + 0xcb, 0x90, 0x23, 0x00, 0xe6, 0x21, 0xc5, 0xdd, 0x53, 0x00, 0xe6, 0x19, + 0x12, 0xc1, 0x76, 0x7e, 0xc5, 0xdd, 0xb7, 0x00, 0xe6, 0x00, 0x08, 0xc1, + 0x76, 0x8a, 0x04, 0xc1, 0x76, 0x94, 0x0e, 0xc1, 0x76, 0x9e, 0x14, 0xc1, + 0x76, 0xa8, 0x15, 0xc1, 0x76, 0xb2, 0x0d, 0xc1, 0x76, 0xbc, 0xc2, 0x00, + 0xd0, 0x00, 0xdd, 0x01, 0xc2, 0x8d, 0x8f, 0x00, 0xdc, 0xf9, 0xc2, 0x01, + 0x4a, 0x00, 0xdc, 0xe9, 0xc2, 0x19, 0x2c, 0x00, 0xdc, 0xd1, 0xc2, 0x01, + 0xc3, 0x00, 0xdc, 0xc9, 0xc2, 0x02, 0x41, 0x00, 0xdc, 0xb9, 0xc2, 0x00, + 0xb0, 0x00, 0xdc, 0xa9, 0x10, 0xc1, 0x76, 0xc6, 0xc2, 0x0e, 0x9a, 0x00, + 0xdc, 0x99, 0xc2, 0x01, 0x6f, 0x00, 0xdc, 0x91, 0xc2, 0x02, 0x1c, 0x00, + 0xdc, 0x81, 0xc2, 0x25, 0x3b, 0x00, 0xdc, 0x79, 0xc2, 0x00, 0x64, 0x00, + 0xdc, 0x71, 0xc2, 0x01, 0x30, 0x00, 0xdc, 0x61, 0xc2, 0x0f, 0x9a, 0x00, + 0xdc, 0x59, 0x87, 0x00, 0xdc, 0x43, 0x01, 0x76, 0xd6, 0x91, 0x00, 0xdc, + 0x39, 0x83, 0x00, 0xdc, 0x1b, 0x01, 0x76, 0xda, 0x97, 0x00, 0xdc, 0x29, + 0x8b, 0x00, 0xdc, 0x20, 0xc4, 0x26, 0x78, 0x00, 0xdd, 0xc9, 0xc5, 0x06, + 0xdb, 0x00, 0xdd, 0xc1, 0x15, 0xc1, 0x76, 0xde, 0x08, 0xc1, 0x76, 0xea, + 0x16, 0xc1, 0x76, 0xf6, 0xc3, 0x05, 0x14, 0x00, 0xdd, 0x89, 0xc4, 0x15, + 0xe7, 0x00, 0xdd, 0x80, 0x47, 0xc1, 0xe7, 0xc1, 0x77, 0x02, 0x42, 0x16, + 0x59, 0xc1, 0x77, 0x0e, 0xc7, 0xc3, 0x5a, 0x00, 0xdd, 0x08, 0xc6, 0x1e, + 0x95, 0x00, 0xdd, 0x59, 0x42, 0x00, 0xb0, 0x41, 0x77, 0x1a, 0x10, 0xc1, + 0x77, 0x24, 0xc5, 0xdb, 0x1e, 0x00, 0xdd, 0x40, 0xca, 0x37, 0x4e, 0x01, + 0x13, 0xf9, 0xc5, 0x07, 0x62, 0x01, 0x13, 0xe8, 0x4c, 0x24, 0x3b, 0xc1, + 0x77, 0x42, 0xcb, 0x0e, 0xbd, 0x01, 0x55, 0xa1, 0x44, 0x1f, 0xb2, 0xc1, + 0x77, 0x4e, 0xcf, 0x6a, 0x8f, 0x01, 0x55, 0xc0, 0x00, 0x41, 0x77, 0x5a, + 0xd0, 0x03, 0xb7, 0x01, 0x4b, 0xc9, 0x42, 0x06, 0x62, 0x41, 0x77, 0x6f, + 0xc3, 0x02, 0xa3, 0x01, 0x55, 0xe9, 0xcf, 0x60, 0xf3, 0x01, 0x55, 0xf9, + 0xd9, 0x1f, 0x18, 0x01, 0x56, 0x08, 0xca, 0x0e, 0xbe, 0x01, 0x04, 0x61, + 0xc4, 0x00, 0x2d, 0x01, 0x04, 0x40, 0xc4, 0x18, 0x10, 0x01, 0x04, 0x39, + 0xc2, 0x22, 0xcc, 0x01, 0x04, 0x30, 0xc3, 0x0d, 0x14, 0x01, 0x04, 0x29, + 0xc3, 0x09, 0x9e, 0x01, 0x04, 0x20, 0xc4, 0x02, 0xde, 0x01, 0x04, 0x19, + 0xc2, 0x02, 0xa0, 0x01, 0x04, 0x10, 0x4a, 0x00, 0x87, 0xc1, 0x77, 0x7b, + 0x4e, 0x1d, 0x3c, 0x41, 0x77, 0x92, 0x42, 0x00, 0x99, 0xc1, 0x77, 0x9e, + 0x07, 0xc1, 0x77, 0xb0, 0x14, 0xc1, 0x77, 0xcb, 0x16, 0xc1, 0x77, 0xdd, + 0xcc, 0x87, 0x21, 0x0f, 0xa9, 0xc9, 0xce, 0x71, 0xf4, 0x0f, 0xa9, 0xc1, + 0xd1, 0x55, 0x96, 0x01, 0x53, 0x09, 0x03, 0xc1, 0x77, 0xe9, 0xd1, 0x54, + 0x0f, 0x07, 0xf2, 0x89, 0xc9, 0x11, 0xf6, 0x07, 0xf2, 0x91, 0xc9, 0xa8, + 0x55, 0x07, 0xf2, 0xa1, 0xcd, 0x2c, 0xb2, 0x07, 0xf2, 0xb1, 0x42, 0x00, + 0x49, 0xc1, 0x77, 0xfb, 0xcb, 0x97, 0x9d, 0x07, 0xf2, 0xf9, 0x12, 0xc1, + 0x78, 0x07, 0xcc, 0x89, 0xcd, 0x07, 0xf3, 0x19, 0xd1, 0x54, 0xb9, 0x07, + 0xf3, 0x29, 0xcb, 0x99, 0x60, 0x07, 0xf3, 0x48, 0xcc, 0x23, 0x9f, 0x01, + 0x55, 0x60, 0x02, 0xc1, 0x78, 0x13, 0x00, 0x41, 0x78, 0x1b, 0xce, 0x50, + 0xaf, 0x01, 0x1c, 0xc9, 0xc2, 0x00, 0x29, 0x0f, 0xad, 0x42, 0x01, 0x78, + 0x27, 0xc2, 0x00, 0xcc, 0x0f, 0xa3, 0xc0, 0xc5, 0x07, 0x62, 0x01, 0x10, + 0xe8, 0xd5, 0x37, 0x43, 0x01, 0x17, 0x41, 0xce, 0x74, 0x32, 0x01, 0x15, + 0x81, 0x46, 0x23, 0xa0, 0xc1, 0x78, 0x2d, 0x46, 0x00, 0xd4, 0x41, 0x78, + 0x39, 0x42, 0x00, 0x99, 0xc1, 0x78, 0x51, 0xc9, 0xa8, 0x55, 0x07, 0xf0, + 0xa1, 0x07, 0xc1, 0x78, 0x5d, 0xcd, 0x2c, 0xb2, 0x07, 0xf0, 0xb1, 0xd3, + 0x22, 0x78, 0x07, 0xf0, 0xc9, 0xce, 0x72, 0x1e, 0x07, 0xf1, 0x81, 0xcd, + 0x80, 0x29, 0x07, 0xf1, 0xa1, 0x0e, 0xc1, 0x78, 0x6f, 0x46, 0x00, 0x2c, + 0xc1, 0x78, 0x7b, 0x4c, 0x1c, 0x86, 0x41, 0x78, 0xa9, 0xcd, 0x80, 0x1c, + 0x01, 0x18, 0xc1, 0xc7, 0xc4, 0x72, 0x0f, 0xb6, 0x80, 0x04, 0xc1, 0x78, + 0xb5, 0x47, 0x70, 0xa5, 0xc1, 0x78, 0xc1, 0x16, 0xc1, 0x78, 0xd9, 0x08, + 0xc1, 0x78, 0xf1, 0x15, 0xc1, 0x78, 0xfb, 0x49, 0xb2, 0x12, 0xc1, 0x79, + 0x07, 0x48, 0xbb, 0x82, 0xc1, 0x79, 0x1f, 0x48, 0xb7, 0x1a, 0xc1, 0x79, + 0x37, 0x0d, 0xc1, 0x79, 0x4f, 0x49, 0xa8, 0xf7, 0xc1, 0x79, 0x5b, 0xc9, + 0xa9, 0x7e, 0x0f, 0x85, 0xf9, 0xcb, 0x8d, 0x16, 0x0f, 0x86, 0xf8, 0x16, + 0xc1, 0x79, 0x73, 0x08, 0x41, 0x79, 0x7f, 0x00, 0x41, 0x79, 0x8b, 0x46, + 0x08, 0xf1, 0xc1, 0x79, 0x9d, 0xc9, 0xb0, 0xa1, 0x0f, 0xa6, 0x20, 0x00, + 0xc1, 0x79, 0xa9, 0xd8, 0x25, 0xbb, 0x01, 0x33, 0xe8, 0x4d, 0x29, 0xb9, + 0xc1, 0x79, 0xb5, 0x4f, 0x0b, 0x17, 0x41, 0x7a, 0x1d, 0x16, 0xc1, 0x7a, + 0x85, 0xc8, 0x4b, 0x5f, 0x01, 0x24, 0x31, 0x07, 0xc1, 0x7a, 0x97, 0x15, + 0xc1, 0x7a, 0xa3, 0x08, 0x41, 0x7a, 0xaf, 0xc4, 0x26, 0x78, 0x01, 0x23, + 0xe1, 0xc5, 0x06, 0xdb, 0x01, 0x23, 0xd9, 0x15, 0xc1, 0x7a, 0xbb, 0x08, + 0xc1, 0x7a, 0xc7, 0x16, 0xc1, 0x7a, 0xd3, 0xc3, 0x05, 0x14, 0x01, 0x23, + 0xa0, 0x0d, 0xc1, 0x7a, 0xdf, 0xc5, 0xd9, 0x61, 0x01, 0x90, 0x0b, 0x01, + 0x7a, 0xf1, 0x16, 0xc1, 0x7a, 0xf7, 0xc5, 0xd6, 0x8c, 0x01, 0x90, 0x1b, + 0x01, 0x7b, 0x09, 0xc5, 0xda, 0xe7, 0x01, 0x90, 0x23, 0x01, 0x7b, 0x0f, + 0x12, 0xc1, 0x7b, 0x15, 0xc4, 0xad, 0x2b, 0x01, 0x90, 0x33, 0x01, 0x7b, + 0x27, 0xc5, 0xb7, 0x9d, 0x01, 0x90, 0x3b, 0x01, 0x7b, 0x2d, 0x05, 0xc1, + 0x7b, 0x33, 0xc5, 0x90, 0xe4, 0x01, 0x90, 0x6a, 0x01, 0x7b, 0x45, 0xc4, + 0xe1, 0x47, 0x01, 0x90, 0xe9, 0xc3, 0x0d, 0x03, 0x01, 0x90, 0xf0, 0xc3, + 0x05, 0x14, 0x01, 0x91, 0x01, 0x16, 0xc1, 0x7b, 0x4b, 0x08, 0xc1, 0x7b, + 0x5d, 0x15, 0xc1, 0x7b, 0x6d, 0x07, 0xc1, 0x7b, 0x8b, 0x10, 0xc1, 0x7b, + 0x9d, 0x0f, 0xc1, 0x7b, 0xa9, 0x19, 0xc1, 0x7b, 0xb5, 0xc4, 0xdf, 0xbf, + 0x01, 0x91, 0x91, 0x05, 0xc1, 0x7b, 0xc1, 0xc5, 0xdd, 0x71, 0x01, 0x91, + 0xc1, 0x42, 0x01, 0x19, 0xc1, 0x7b, 0xcd, 0xc8, 0xba, 0x62, 0x01, 0x91, + 0xf8, 0xc2, 0x00, 0xf1, 0x01, 0x11, 0x29, 0x45, 0x00, 0x8c, 0x41, 0x7b, + 0xdd, 0xca, 0x1b, 0x09, 0x01, 0x01, 0x49, 0xc2, 0x07, 0xa3, 0x01, 0x70, + 0x79, 0xc7, 0x62, 0x81, 0x01, 0x72, 0x68, 0xc5, 0x26, 0xf7, 0x08, 0xd7, + 0xc1, 0xc7, 0x41, 0x71, 0x08, 0xd7, 0x80, 0x00, 0x41, 0x7b, 0xe9, 0x08, + 0xc1, 0x7b, 0xf8, 0x8b, 0x08, 0xd6, 0xbb, 0x01, 0x7c, 0x02, 0x97, 0x08, + 0xd6, 0xcb, 0x01, 0x7c, 0x06, 0x91, 0x08, 0xd6, 0xc1, 0x87, 0x08, 0xd6, + 0xb1, 0x83, 0x08, 0xd6, 0xa9, 0x05, 0xc1, 0x7c, 0x0a, 0xc2, 0x00, 0x39, + 0x08, 0xd6, 0x91, 0x12, 0xc1, 0x7c, 0x14, 0x10, 0xc1, 0x7c, 0x1e, 0x16, + 0xc1, 0x7c, 0x28, 0xc2, 0x01, 0x5d, 0x08, 0xd6, 0x61, 0xc2, 0x0d, 0xf6, + 0x08, 0xd6, 0x59, 0x0d, 0xc1, 0x7c, 0x32, 0xc2, 0x01, 0x30, 0x08, 0xd6, + 0x49, 0xc2, 0x00, 0xd0, 0x08, 0xd6, 0x41, 0xc2, 0x02, 0x41, 0x08, 0xd6, + 0x31, 0xc2, 0x02, 0x1c, 0x08, 0xd6, 0x29, 0xc2, 0x0e, 0x9a, 0x08, 0xd6, + 0x21, 0xc2, 0x01, 0xc3, 0x08, 0xd6, 0x19, 0xc2, 0x00, 0xdb, 0x08, 0xd6, + 0x10, 0xc5, 0x26, 0xf7, 0x08, 0xd7, 0x91, 0xca, 0xa4, 0x04, 0x08, 0xd7, + 0x88, 0x00, 0x41, 0x7c, 0x3c, 0xc6, 0x26, 0xf6, 0x08, 0xd7, 0x50, 0xc5, + 0x26, 0xf7, 0x08, 0xd7, 0x49, 0xc4, 0x0d, 0xe5, 0x08, 0xd7, 0x2a, 0x01, + 0x7c, 0x4b, 0xc4, 0x0a, 0x64, 0x0f, 0x99, 0xa1, 0xc9, 0xb4, 0x01, 0x0f, + 0xd7, 0x99, 0xc7, 0xc5, 0x0c, 0x0f, 0xd7, 0xa1, 0xc6, 0x28, 0x24, 0x01, + 0x70, 0xc8, 0x47, 0x34, 0x2f, 0xc1, 0x7c, 0x51, 0xd6, 0x2c, 0x9c, 0x08, + 0x43, 0xc1, 0x42, 0x00, 0x49, 0x41, 0x7c, 0x5f, 0x18, 0xc1, 0x7c, 0x6b, + 0x0d, 0xc1, 0x7c, 0x77, 0x16, 0xc1, 0x7c, 0x89, 0x1b, 0xc1, 0x7c, 0x93, + 0xc3, 0xe6, 0x20, 0x0b, 0x5c, 0x59, 0x42, 0x00, 0xd0, 0xc1, 0x7c, 0x9f, + 0xc4, 0xe4, 0x03, 0x0b, 0x5c, 0x39, 0xc4, 0xe3, 0xcb, 0x0b, 0x5c, 0x21, + 0xc5, 0xd3, 0xdf, 0x0b, 0x5c, 0x09, 0x0e, 0x41, 0x7c, 0xa9, 0x05, 0xc1, + 0x7c, 0xb5, 0xc3, 0xe6, 0x3e, 0x0b, 0x59, 0x71, 0xc2, 0x20, 0xec, 0x0b, + 0x59, 0x69, 0x10, 0xc1, 0x7c, 0xc1, 0xc5, 0xd7, 0x54, 0x0b, 0x59, 0x51, + 0x0a, 0xc1, 0x7c, 0xdd, 0xc3, 0xc4, 0x86, 0x0b, 0x59, 0x31, 0xc3, 0x2d, + 0x34, 0x0b, 0x59, 0x21, 0xc4, 0xe4, 0xd7, 0x0b, 0x59, 0x19, 0xc3, 0xbe, + 0x32, 0x0b, 0x59, 0x09, 0xc3, 0x20, 0xeb, 0x0b, 0x58, 0xf1, 0xc3, 0xe5, + 0x4e, 0x0b, 0x58, 0xe0, 0xc8, 0xbc, 0x0a, 0x0b, 0x5b, 0xb9, 0xc8, 0xbf, + 0x72, 0x0b, 0x5b, 0xb1, 0x16, 0xc1, 0x7c, 0xef, 0x05, 0xc1, 0x7c, 0xfe, + 0xd2, 0x4d, 0xe7, 0x0b, 0x5b, 0x90, 0xc2, 0x11, 0xa5, 0x0b, 0x5b, 0x89, + 0x44, 0x9f, 0x7e, 0x41, 0x7d, 0x0a, 0xc2, 0x20, 0xec, 0x0b, 0x5b, 0x79, + 0xca, 0x9f, 0x7c, 0x0b, 0x5b, 0x69, 0xce, 0x73, 0xb4, 0x0b, 0x5b, 0x30, + 0xc3, 0xe6, 0x1d, 0x0b, 0x5b, 0x59, 0xc3, 0xe5, 0x60, 0x0b, 0x5b, 0x48, + 0xc3, 0x44, 0x23, 0x0b, 0x5b, 0x51, 0x1b, 0xc1, 0x7d, 0x16, 0xc3, 0x26, + 0x9a, 0x0b, 0x5a, 0x20, 0xc3, 0x95, 0x80, 0x0b, 0x5b, 0x41, 0xc2, 0x01, + 0x0f, 0x0b, 0x5b, 0x28, 0xc3, 0x46, 0x7d, 0x0b, 0x5b, 0x19, 0xc4, 0xe4, + 0x47, 0x0b, 0x5a, 0x11, 0xc4, 0xdf, 0x67, 0x0b, 0x5a, 0x01, 0xc4, 0xe0, + 0x47, 0x0b, 0x59, 0xd9, 0x16, 0x41, 0x7d, 0x22, 0xc8, 0xbd, 0x12, 0x0b, + 0x5b, 0x09, 0x42, 0x00, 0xc4, 0x41, 0x7d, 0x2c, 0xc9, 0x33, 0xed, 0x0b, + 0x5a, 0xf9, 0x95, 0x0b, 0x5a, 0xe0, 0xc4, 0x18, 0x10, 0x0b, 0x5a, 0xb9, + 0xc2, 0x22, 0xcc, 0x0b, 0x5a, 0xb0, 0xc3, 0x0d, 0x14, 0x0b, 0x5a, 0xa9, + 0xc3, 0x09, 0x9e, 0x0b, 0x5a, 0xa0, 0xc4, 0x02, 0xde, 0x0b, 0x5a, 0x99, + 0xc2, 0x02, 0xa0, 0x0b, 0x5a, 0x90, 0xc3, 0xe5, 0x30, 0x0b, 0x59, 0xb1, + 0xc2, 0x00, 0x5a, 0x0b, 0x59, 0x80, 0xc3, 0xa7, 0x6a, 0x0b, 0x59, 0xa1, + 0x91, 0x0b, 0x59, 0x88, 0xc3, 0x40, 0xe3, 0x0b, 0x59, 0x99, 0xc2, 0x00, + 0xcb, 0x0b, 0x59, 0x90, 0x03, 0xc1, 0x7d, 0x34, 0x98, 0x0b, 0x58, 0xb9, + 0x84, 0x0b, 0x58, 0xb1, 0x19, 0xc1, 0x7d, 0x3c, 0x0b, 0xc1, 0x7d, 0x44, + 0x17, 0x41, 0x7d, 0x4c, 0x98, 0x0b, 0x58, 0xc9, 0x84, 0x0b, 0x58, 0xc0, + 0x03, 0xc1, 0x7d, 0x54, 0x98, 0x0b, 0x58, 0x19, 0x84, 0x0b, 0x58, 0x10, + 0x98, 0x0b, 0x58, 0x99, 0x84, 0x0b, 0x58, 0x91, 0x11, 0x41, 0x7d, 0x5c, + 0x03, 0xc1, 0x7d, 0x64, 0x98, 0x0b, 0x58, 0x39, 0x84, 0x0b, 0x58, 0x30, + 0x98, 0x0b, 0x58, 0x49, 0x84, 0x0b, 0x58, 0x41, 0x07, 0x41, 0x7d, 0x6c, + 0xc4, 0x2a, 0xcc, 0x0f, 0xa7, 0x79, 0xc4, 0x01, 0xc3, 0x01, 0x80, 0x92, + 0x01, 0x7d, 0x74, 0x00, 0xc1, 0x7d, 0x7a, 0xcb, 0x7a, 0xa2, 0x0f, 0xa5, + 0xd8, 0x91, 0x08, 0x5d, 0x51, 0xc4, 0x18, 0x12, 0x08, 0x5d, 0x70, 0xc3, + 0x77, 0x79, 0x08, 0x5c, 0x79, 0xc4, 0xdc, 0x2d, 0x08, 0x5c, 0x68, 0x16, + 0xc1, 0x7d, 0xa2, 0xc3, 0x05, 0x14, 0x08, 0x48, 0xb2, 0x01, 0x7d, 0xb2, + 0x16, 0xc1, 0x7d, 0xb8, 0x15, 0xc1, 0x7d, 0xc4, 0xc4, 0xa9, 0x57, 0x08, + 0x48, 0x99, 0xc3, 0xe5, 0x78, 0x08, 0x48, 0x91, 0xc2, 0x00, 0x67, 0x08, + 0x48, 0x81, 0x03, 0xc1, 0x7d, 0xd6, 0xc3, 0x20, 0x18, 0x08, 0x48, 0x69, + 0xc3, 0x00, 0x4e, 0x08, 0x48, 0x61, 0xc4, 0xb9, 0xf7, 0x08, 0x48, 0x59, + 0xc3, 0xba, 0x37, 0x08, 0x48, 0x51, 0xc3, 0x4a, 0xb9, 0x08, 0x48, 0x49, + 0xc2, 0x01, 0x7f, 0x08, 0x48, 0x23, 0x01, 0x7d, 0xe2, 0xc3, 0x69, 0x81, + 0x08, 0x48, 0x31, 0xc3, 0xe4, 0xf4, 0x08, 0x48, 0x29, 0xc4, 0xdb, 0x4b, + 0x08, 0x48, 0x19, 0xc4, 0xe0, 0x8f, 0x08, 0x48, 0x11, 0xc3, 0x0b, 0xc8, + 0x08, 0x48, 0x08, 0x0d, 0xc1, 0x7d, 0xe6, 0x09, 0xc1, 0x7d, 0xf0, 0x10, + 0xc1, 0x7d, 0xfa, 0x05, 0xc1, 0x7e, 0x10, 0xc2, 0x25, 0x3b, 0x05, 0x42, + 0x31, 0x16, 0xc1, 0x7e, 0x1d, 0x06, 0xc1, 0x7e, 0x2f, 0x12, 0xc1, 0x7e, + 0x3f, 0xc2, 0x01, 0x5d, 0x05, 0x42, 0x71, 0xc2, 0x01, 0xc3, 0x05, 0x42, + 0x79, 0xc2, 0x01, 0x4a, 0x05, 0x42, 0x99, 0x1c, 0xc1, 0x7e, 0x49, 0x15, + 0xc1, 0x7e, 0x53, 0xc2, 0x19, 0x2c, 0x05, 0x42, 0xb9, 0xc2, 0x00, 0x39, + 0x05, 0x42, 0xc1, 0xc2, 0x00, 0xdb, 0x05, 0x42, 0xc9, 0xc2, 0x00, 0xd0, + 0x05, 0x42, 0xe1, 0x83, 0x05, 0x42, 0xeb, 0x01, 0x7e, 0x63, 0x8b, 0x05, + 0x42, 0xf1, 0x97, 0x05, 0x42, 0xf9, 0x87, 0x05, 0x43, 0x03, 0x01, 0x7e, + 0x6f, 0x91, 0x05, 0x43, 0x09, 0xc2, 0x0f, 0x9a, 0x05, 0x43, 0x11, 0xc2, + 0x8d, 0x8f, 0x05, 0x43, 0x19, 0xc2, 0x00, 0x87, 0x05, 0x43, 0x21, 0x45, + 0x17, 0xbd, 0x41, 0x7e, 0x73, 0x17, 0xc1, 0x7e, 0x7f, 0xcf, 0x68, 0x46, + 0x05, 0x43, 0xa0, 0xc4, 0x01, 0xe2, 0x05, 0x43, 0xb1, 0xcb, 0x99, 0x6b, + 0x05, 0x43, 0xb8, 0xc9, 0xa2, 0x56, 0x08, 0x0e, 0x81, 0x0e, 0xc1, 0x7e, + 0x8b, 0xc6, 0xca, 0xd9, 0x08, 0x0f, 0xa0, 0xcc, 0x89, 0x91, 0x08, 0x0e, + 0x91, 0xc4, 0xdf, 0xeb, 0x08, 0x0e, 0xc1, 0xc4, 0x5e, 0xc9, 0x08, 0x0f, + 0x80, 0x03, 0xc1, 0x7e, 0x97, 0xc4, 0xdf, 0xbb, 0x08, 0x0e, 0xa1, 0xc3, + 0x46, 0x7d, 0x08, 0x0e, 0xe1, 0x11, 0x41, 0x7e, 0xa7, 0xc4, 0x29, 0xfd, + 0x08, 0x0e, 0xa9, 0xc8, 0xbd, 0xda, 0x08, 0x0f, 0xe0, 0xc5, 0xb7, 0xed, + 0x08, 0x0e, 0xb1, 0xc3, 0x00, 0xbf, 0x08, 0x0f, 0x49, 0xc3, 0x06, 0xa7, + 0x08, 0x0f, 0x50, 0x11, 0xc1, 0x7e, 0xb6, 0xc2, 0x02, 0xe0, 0x08, 0x0f, + 0x8b, 0x01, 0x7e, 0xc0, 0xc8, 0xb8, 0x62, 0x08, 0x0f, 0x58, 0x42, 0x00, + 0x0a, 0xc1, 0x7e, 0xc6, 0xc2, 0x39, 0x8b, 0x08, 0x0e, 0xf9, 0xc4, 0x04, + 0x15, 0x08, 0x0f, 0x29, 0xc8, 0xb9, 0xca, 0x08, 0x0f, 0xd9, 0xc7, 0xc0, + 0xdd, 0x08, 0x0f, 0xd0, 0xc6, 0xca, 0xaf, 0x08, 0x0e, 0xe9, 0xc5, 0xd4, + 0xed, 0x08, 0x0e, 0xf0, 0x86, 0x08, 0x0f, 0x01, 0xc2, 0x00, 0x35, 0x08, + 0x0f, 0xb0, 0xc4, 0xe1, 0x07, 0x08, 0x0f, 0x19, 0xc2, 0x00, 0x5f, 0x08, + 0x0f, 0x78, 0xc2, 0x00, 0xc2, 0x08, 0x0f, 0x69, 0xc6, 0xcd, 0x67, 0x08, + 0x0f, 0xa8, 0xc5, 0xd5, 0xe2, 0x08, 0x0f, 0xc9, 0xc7, 0xc4, 0x87, 0x08, + 0x0e, 0xb8, 0xc4, 0x02, 0xde, 0x00, 0x00, 0x99, 0xc2, 0x02, 0xa0, 0x00, + 0x00, 0x90, 0xcb, 0x83, 0x0e, 0x00, 0x4a, 0xa1, 0xd0, 0x50, 0xcf, 0x00, + 0x4b, 0x80, 0xcb, 0x1f, 0x0d, 0x00, 0x4a, 0x99, 0xc9, 0x93, 0x31, 0x05, + 0x47, 0xc8, 0x4b, 0x91, 0xc5, 0xc1, 0x7e, 0xd0, 0x44, 0x00, 0xbb, 0x41, + 0x7e, 0xdc, 0x03, 0xc1, 0x7f, 0x11, 0xcf, 0x61, 0x11, 0x00, 0x4a, 0x71, + 0x91, 0x00, 0x4a, 0x5b, 0x01, 0x7f, 0x25, 0x46, 0x2e, 0xee, 0xc1, 0x7f, + 0x2f, 0x47, 0xc7, 0x7b, 0xc1, 0x7f, 0x37, 0x87, 0x00, 0x4a, 0x39, 0x48, + 0xb2, 0x2d, 0xc1, 0x7f, 0x45, 0x97, 0x00, 0x4a, 0x0b, 0x01, 0x7f, 0x53, + 0x8b, 0x00, 0x49, 0xfa, 0x01, 0x7f, 0x5e, 0x0a, 0xc1, 0x7f, 0x62, 0x15, + 0xc1, 0x7f, 0x6c, 0x18, 0xc1, 0x7f, 0x7a, 0x0e, 0xc1, 0x7f, 0x84, 0x14, + 0xc1, 0x7f, 0x8c, 0x1b, 0xc1, 0x7f, 0x9c, 0xc2, 0x01, 0xc3, 0x00, 0x49, + 0x73, 0x01, 0x7f, 0xa6, 0x04, 0xc1, 0x7f, 0xac, 0x12, 0xc1, 0x7f, 0xbc, + 0x10, 0xc1, 0x7f, 0xc6, 0x06, 0xc1, 0x7f, 0xda, 0x16, 0xc1, 0x7f, 0xe8, + 0x0c, 0xc1, 0x7f, 0xf6, 0x05, 0xc1, 0x80, 0x06, 0x09, 0xc1, 0x80, 0x13, + 0x0d, 0xc1, 0x80, 0x27, 0x83, 0x00, 0x48, 0x2b, 0x01, 0x80, 0x2f, 0x91, + 0x00, 0x48, 0x9b, 0x01, 0x80, 0x43, 0x87, 0x00, 0x48, 0x79, 0x97, 0x00, + 0x48, 0x4b, 0x01, 0x80, 0x4d, 0x8b, 0x00, 0x48, 0x3b, 0x01, 0x80, 0x58, + 0xc2, 0x0f, 0x9a, 0x00, 0x4a, 0xc1, 0x1c, 0xc1, 0x80, 0x5c, 0xc2, 0x00, + 0x87, 0x00, 0x4a, 0xf0, 0x45, 0x09, 0x98, 0xc1, 0x80, 0x66, 0xcb, 0x97, + 0xf5, 0x00, 0x4b, 0x29, 0xc4, 0x19, 0x53, 0x00, 0x4b, 0x20, 0xc7, 0xc7, + 0x19, 0x0f, 0x9e, 0xe8, 0x4f, 0x0b, 0x17, 0xc1, 0x80, 0x8a, 0x4d, 0x29, + 0xb9, 0x41, 0x80, 0xec, 0xcf, 0x66, 0x0c, 0x01, 0x1f, 0x41, 0xd4, 0x3b, + 0x10, 0x01, 0x1c, 0xb0, 0x47, 0x07, 0x9a, 0xc1, 0x81, 0x4e, 0x44, 0x00, + 0xf1, 0xc1, 0x81, 0x5a, 0xc4, 0x51, 0xb7, 0x01, 0x1e, 0x30, 0xc8, 0x01, + 0x92, 0x01, 0x1e, 0x19, 0xc6, 0x02, 0xd1, 0x01, 0x1e, 0x00, 0xc4, 0x51, + 0xb7, 0x01, 0x1e, 0x41, 0xc8, 0x01, 0x92, 0x01, 0x1e, 0x29, 0xc6, 0x02, + 0xd1, 0x01, 0x1e, 0x10, 0xc4, 0x51, 0xb7, 0x01, 0x1e, 0x39, 0xc8, 0x01, + 0x92, 0x01, 0x1e, 0x21, 0xc6, 0x02, 0xd1, 0x01, 0x1e, 0x08, 0x44, 0x84, + 0x6c, 0x41, 0x81, 0x66, 0xca, 0xa6, 0xde, 0x0e, 0x98, 0x11, 0xcd, 0x7f, + 0xce, 0x0e, 0x98, 0x08, 0xc2, 0x00, 0x74, 0x01, 0x34, 0x79, 0xc3, 0x01, + 0x95, 0x01, 0x34, 0x60, 0xc3, 0x01, 0x95, 0x01, 0x34, 0x71, 0xc2, 0x00, + 0x74, 0x01, 0x34, 0x68, 0x00, 0x41, 0x81, 0x72, 0x00, 0x41, 0x81, 0x7e, + 0xc4, 0x18, 0x10, 0x00, 0x01, 0xbb, 0x01, 0x81, 0x8a, 0xc2, 0x22, 0xcc, + 0x00, 0x01, 0xb2, 0x01, 0x81, 0x8e, 0xc3, 0x0d, 0x14, 0x00, 0x01, 0xab, + 0x01, 0x81, 0x92, 0xc3, 0x09, 0x9e, 0x00, 0x01, 0xa2, 0x01, 0x81, 0x96, + 0xc4, 0x02, 0xde, 0x00, 0x01, 0x9b, 0x01, 0x81, 0x9a, 0xc2, 0x02, 0xa0, + 0x00, 0x01, 0x92, 0x01, 0x81, 0x9e, 0x00, 0x41, 0x81, 0xa2, 0x00, 0x41, + 0x81, 0xae, 0x45, 0x09, 0x98, 0xc1, 0x81, 0xba, 0xcb, 0x97, 0xf5, 0x08, + 0xca, 0x20, 0xc5, 0x33, 0x5d, 0x08, 0xca, 0x19, 0xc7, 0xc3, 0xa7, 0x08, + 0xc9, 0xe9, 0xcb, 0x1e, 0x89, 0x08, 0xc9, 0xe1, 0xc8, 0x14, 0x38, 0x08, + 0xc9, 0xd8, 0xc2, 0x00, 0x39, 0x08, 0xca, 0x11, 0xc2, 0x19, 0x2c, 0x08, + 0xca, 0x00, 0xc5, 0x1e, 0x96, 0x08, 0xc9, 0xf1, 0x4a, 0x6f, 0xc8, 0x41, + 0x81, 0xde, 0xc2, 0x02, 0x1c, 0x08, 0xc9, 0x79, 0x0e, 0xc1, 0x81, 0xf8, + 0xc2, 0x00, 0xd0, 0x08, 0xc9, 0x69, 0x15, 0xc1, 0x82, 0x02, 0xc2, 0x02, + 0x41, 0x08, 0xc9, 0x49, 0xc2, 0x00, 0x39, 0x08, 0xc9, 0x39, 0x1b, 0xc1, + 0x82, 0x12, 0xc2, 0x01, 0xc3, 0x08, 0xc9, 0x21, 0x04, 0xc1, 0x82, 0x1c, + 0x12, 0xc1, 0x82, 0x26, 0x10, 0xc1, 0x82, 0x30, 0x06, 0xc1, 0x82, 0x46, + 0x16, 0xc1, 0x82, 0x54, 0xc2, 0x25, 0x3b, 0x08, 0xc8, 0x99, 0x05, 0xc1, + 0x82, 0x64, 0x09, 0xc1, 0x82, 0x6e, 0x0d, 0xc1, 0x82, 0x78, 0x91, 0x08, + 0xc8, 0x49, 0x87, 0x08, 0xc8, 0x31, 0x97, 0x08, 0xc8, 0x23, 0x01, 0x82, + 0x82, 0x8b, 0x08, 0xc8, 0x13, 0x01, 0x82, 0x86, 0x83, 0x08, 0xc8, 0x02, + 0x01, 0x82, 0x8a, 0xc5, 0x03, 0x4d, 0x01, 0x16, 0x39, 0x15, 0x41, 0x82, + 0x8e, 0xca, 0xa3, 0x64, 0x01, 0x3c, 0x99, 0x46, 0x09, 0x97, 0x41, 0x82, + 0x9a, 0xc4, 0x26, 0x78, 0x01, 0x3b, 0xf1, 0xc5, 0x06, 0xdb, 0x01, 0x3b, + 0xe9, 0x15, 0xc1, 0x82, 0xbe, 0x08, 0xc1, 0x82, 0xca, 0x16, 0xc1, 0x82, + 0xd6, 0xc3, 0x05, 0x14, 0x01, 0x3b, 0xb0, 0xc4, 0x26, 0x78, 0x01, 0x3c, + 0x41, 0xc5, 0x06, 0xdb, 0x01, 0x3c, 0x39, 0x15, 0xc1, 0x82, 0xe2, 0x08, + 0xc1, 0x82, 0xee, 0x16, 0xc1, 0x82, 0xfa, 0xc3, 0x05, 0x14, 0x01, 0x3c, + 0x01, 0xc4, 0x15, 0xe7, 0x0f, 0x88, 0x58, 0xc4, 0x00, 0x87, 0x0f, 0xb0, + 0xf1, 0xd1, 0x4f, 0x14, 0x0f, 0xb1, 0x28, 0xc8, 0x18, 0x67, 0x01, 0x16, + 0x21, 0xd7, 0x26, 0x1b, 0x0f, 0xa5, 0x01, 0x45, 0x00, 0x8c, 0xc1, 0x83, + 0x06, 0xc6, 0xcf, 0xad, 0x0f, 0xbc, 0xe0, 0xc4, 0x01, 0x23, 0x0f, 0xc8, + 0x43, 0x01, 0x83, 0x1e, 0xcc, 0x84, 0xa5, 0x0f, 0xc8, 0x4a, 0x01, 0x83, + 0x24, 0x16, 0xc1, 0x83, 0x2a, 0x15, 0xc1, 0x83, 0x36, 0x0a, 0xc1, 0x83, + 0x42, 0x03, 0xc1, 0x83, 0x4e, 0xcf, 0x61, 0x4d, 0x01, 0x3f, 0x89, 0xcb, + 0x01, 0xfc, 0x01, 0x0f, 0x4b, 0x01, 0x83, 0x5d, 0x06, 0xc1, 0x83, 0x63, + 0xcd, 0x7c, 0xa8, 0x01, 0x0e, 0x51, 0xcc, 0x2e, 0x48, 0x01, 0x0d, 0x79, + 0xc6, 0xca, 0xa3, 0x0f, 0xb3, 0x79, 0x46, 0x04, 0x8f, 0xc1, 0x83, 0x6f, + 0xd1, 0x56, 0xd9, 0x0f, 0xc1, 0xb9, 0xd0, 0x58, 0x62, 0x0f, 0xc1, 0xf8, + 0xd2, 0x4c, 0xfd, 0x01, 0x57, 0x88, 0xd0, 0x5d, 0x52, 0x01, 0x4f, 0x49, + 0xcf, 0x66, 0x66, 0x01, 0x4f, 0x40, 0x43, 0xe5, 0x0c, 0xc1, 0x83, 0x7b, + 0x43, 0xe5, 0xff, 0xc1, 0x83, 0x97, 0x43, 0xe5, 0xdb, 0xc1, 0x83, 0xb3, + 0x43, 0xe6, 0x6e, 0xc1, 0x83, 0xcf, 0x43, 0xe6, 0x3b, 0xc1, 0x83, 0xeb, + 0x43, 0xe5, 0xa8, 0xc1, 0x84, 0x07, 0x43, 0xe5, 0x45, 0x41, 0x84, 0x23, + 0x43, 0xe5, 0xdb, 0xc1, 0x84, 0x3f, 0x43, 0xe5, 0xff, 0xc1, 0x84, 0x5b, + 0x43, 0xe6, 0x6e, 0xc1, 0x84, 0x77, 0x43, 0xe6, 0x3b, 0xc1, 0x84, 0x93, + 0x43, 0xe5, 0x0c, 0xc1, 0x84, 0xaf, 0x43, 0xe5, 0xa8, 0xc1, 0x84, 0xcb, + 0x43, 0xe5, 0x45, 0x41, 0x84, 0xe7, 0x05, 0xc1, 0x85, 0x03, 0x49, 0x07, + 0xbb, 0xc1, 0x85, 0x15, 0x17, 0xc1, 0x85, 0x24, 0x44, 0x06, 0xbb, 0xc1, + 0x85, 0x30, 0x15, 0xc1, 0x85, 0x3c, 0xcd, 0x2c, 0xb2, 0x01, 0x02, 0x39, + 0xd0, 0x0f, 0x09, 0x01, 0x01, 0xe1, 0x12, 0xc1, 0x85, 0x50, 0x06, 0xc1, + 0x85, 0x5a, 0x0a, 0xc1, 0x85, 0x66, 0x0e, 0xc1, 0x85, 0x72, 0xdb, 0x16, + 0x89, 0x01, 0x4c, 0xb1, 0x47, 0xc4, 0x17, 0xc1, 0x85, 0x7c, 0xcc, 0x83, + 0x0d, 0x00, 0x16, 0xe9, 0xcd, 0x7d, 0x9f, 0x07, 0xf2, 0x61, 0xce, 0x70, + 0x0a, 0x01, 0x70, 0xb8, 0xc9, 0x1b, 0xc7, 0x01, 0x35, 0x19, 0xcb, 0x21, + 0x00, 0x01, 0x35, 0x11, 0xc6, 0x00, 0x91, 0x01, 0x5f, 0xe0, 0x47, 0x73, + 0x59, 0xc1, 0x85, 0x8b, 0xce, 0x6e, 0xd6, 0x01, 0x4e, 0xf9, 0x45, 0x02, + 0x6d, 0x41, 0x85, 0xa3, 0xc5, 0x02, 0xd2, 0x01, 0x2e, 0x61, 0xc4, 0x0d, + 0x21, 0x01, 0x02, 0xe0, 0xc5, 0x0b, 0x0a, 0x01, 0x58, 0xd1, 0xc6, 0x27, + 0x5e, 0x01, 0x72, 0x50, 0xc5, 0x33, 0x5d, 0x08, 0xc1, 0xd1, 0x42, 0x07, + 0xb2, 0xc1, 0x85, 0xaf, 0xc8, 0x14, 0x38, 0x08, 0xc1, 0xb8, 0x03, 0xc1, + 0x85, 0xbb, 0x91, 0x08, 0xc1, 0xa9, 0x87, 0x08, 0xc1, 0x99, 0xc9, 0xb2, + 0x2d, 0x08, 0xc1, 0x8b, 0x01, 0x85, 0xc7, 0x97, 0x08, 0xc1, 0x7b, 0x01, + 0x85, 0xcb, 0x8b, 0x08, 0xc1, 0x6a, 0x01, 0x85, 0xcf, 0x14, 0xc1, 0x85, + 0xd3, 0xc2, 0x00, 0xd0, 0x08, 0xc1, 0x51, 0x15, 0xc1, 0x85, 0xdd, 0xc2, + 0x02, 0x41, 0x08, 0xc1, 0x31, 0xc2, 0x00, 0xdb, 0x08, 0xc1, 0x29, 0xc2, + 0x19, 0x2c, 0x08, 0xc1, 0x19, 0xc2, 0x01, 0xc3, 0x08, 0xc1, 0x11, 0x04, + 0xc1, 0x85, 0xed, 0x12, 0xc1, 0x85, 0xf7, 0x10, 0xc1, 0x86, 0x01, 0x06, + 0xc1, 0x86, 0x17, 0x16, 0xc1, 0x86, 0x25, 0x0c, 0xc1, 0x86, 0x33, 0x05, + 0xc1, 0x86, 0x3d, 0x09, 0xc1, 0x86, 0x47, 0x0d, 0xc1, 0x86, 0x51, 0x83, + 0x08, 0xc0, 0x03, 0x01, 0x86, 0x5b, 0x91, 0x08, 0xc0, 0x41, 0x87, 0x08, + 0xc0, 0x31, 0x97, 0x08, 0xc0, 0x23, 0x01, 0x86, 0x67, 0x8b, 0x08, 0xc0, + 0x12, 0x01, 0x86, 0x6b, 0xc9, 0x23, 0x9f, 0x01, 0x17, 0x68, 0xc9, 0x23, + 0x9f, 0x01, 0x17, 0x00, 0xcc, 0x87, 0xbd, 0x0f, 0xad, 0xd0, 0x43, 0x02, + 0x5f, 0xc1, 0x86, 0x6f, 0xd5, 0x32, 0x57, 0x0d, 0xe3, 0x80, 0xc8, 0x00, + 0x5f, 0x0d, 0xe4, 0x43, 0x01, 0x86, 0x9e, 0xc4, 0x51, 0xb7, 0x0d, 0xe4, + 0x39, 0x0e, 0xc1, 0x86, 0xa4, 0xc6, 0x02, 0xd1, 0x0d, 0xe4, 0x29, 0xc3, + 0x02, 0xa3, 0x0d, 0xe4, 0x21, 0xc5, 0x1f, 0x0c, 0x0d, 0xe4, 0x11, 0xcb, + 0x8f, 0x94, 0x0d, 0xe4, 0x09, 0xc5, 0x31, 0xee, 0x0d, 0xe4, 0x00, 0x42, + 0x01, 0x6f, 0xc1, 0x86, 0xb0, 0xc6, 0xce, 0x8d, 0x0d, 0xe3, 0xd9, 0xc6, + 0x99, 0xc8, 0x0d, 0xe3, 0xd1, 0xd4, 0x3c, 0xdc, 0x0d, 0xe3, 0xb9, 0xc6, + 0x27, 0x9c, 0x0d, 0xe3, 0xb0, 0xcf, 0x61, 0x98, 0x0d, 0xe3, 0xa1, 0xd1, + 0x27, 0x91, 0x0d, 0xe3, 0x88, 0xc2, 0x00, 0x2b, 0x0d, 0xe1, 0xd1, 0x8a, + 0x0d, 0xe1, 0xc9, 0x91, 0x0d, 0xe2, 0xeb, 0x01, 0x86, 0xbf, 0xc2, 0x06, + 0xdb, 0x0d, 0xe2, 0xf9, 0x8b, 0x0d, 0xe2, 0xf1, 0x83, 0x0d, 0xe2, 0xe0, + 0x00, 0xc1, 0x86, 0xc3, 0x8a, 0x0d, 0xe0, 0x88, 0x00, 0xc1, 0x86, 0xcd, + 0x45, 0xd9, 0x89, 0xc1, 0x86, 0xfe, 0xc6, 0xcf, 0x17, 0x0d, 0xe2, 0x48, + 0x00, 0x41, 0x87, 0x1a, 0x00, 0xc1, 0x87, 0x38, 0x45, 0x44, 0xf8, 0x41, + 0x87, 0x49, 0x00, 0x41, 0x87, 0x59, 0x8a, 0x0d, 0xe0, 0xc1, 0xc2, 0x00, + 0x3f, 0x0d, 0xe0, 0x81, 0x48, 0xb5, 0xfa, 0x41, 0x87, 0x6a, 0x8a, 0x0d, + 0xe0, 0xb9, 0x44, 0x08, 0x48, 0x41, 0x87, 0x72, 0x8e, 0x0d, 0xe0, 0xb0, + 0x8d, 0x0d, 0xe0, 0xa1, 0x00, 0x41, 0x87, 0x7a, 0x8a, 0x0d, 0xe0, 0x99, + 0xc2, 0x00, 0x3f, 0x0d, 0xe0, 0x68, 0xc2, 0x04, 0x4d, 0x0d, 0xe0, 0x70, + 0xc2, 0x04, 0x4d, 0x0d, 0xe0, 0x61, 0x47, 0xc0, 0x35, 0x41, 0x87, 0x84, + 0xc4, 0xe4, 0x37, 0x0d, 0xe1, 0xf0, 0xc8, 0xbb, 0x02, 0x0d, 0xe3, 0x50, + 0x99, 0x0d, 0xe2, 0x98, 0x97, 0x0d, 0xe2, 0xd9, 0x99, 0x0d, 0xe2, 0xd1, + 0xc2, 0x38, 0x2a, 0x0d, 0xe2, 0xc9, 0x83, 0x0d, 0xe2, 0x18, 0x8a, 0x0d, + 0xe2, 0xb9, 0xc2, 0x04, 0x4d, 0x0d, 0xe2, 0xa1, 0x8b, 0x0d, 0xe2, 0x50, + 0x97, 0x0d, 0xe2, 0x91, 0x87, 0x0d, 0xe2, 0x58, 0x87, 0x0d, 0xe2, 0x40, + 0xc2, 0x00, 0x59, 0x0d, 0xe2, 0x28, 0xca, 0xa2, 0xc4, 0x01, 0x71, 0xb1, + 0xcb, 0x98, 0x9a, 0x01, 0x71, 0xb8, 0xc5, 0x06, 0x82, 0x00, 0x04, 0x69, + 0x42, 0x01, 0x0f, 0xc1, 0x87, 0x8c, 0xc7, 0x27, 0x5d, 0x00, 0x02, 0xe3, + 0x01, 0x87, 0x98, 0xcd, 0x7b, 0x15, 0x0f, 0xb3, 0xf9, 0x55, 0x33, 0x92, + 0x41, 0x87, 0x9c, 0x14, 0xc1, 0x87, 0xa8, 0xc8, 0x68, 0xc5, 0x01, 0x18, + 0x81, 0x16, 0xc1, 0x87, 0xba, 0x15, 0xc1, 0x87, 0xcf, 0x12, 0xc1, 0x87, + 0xdb, 0x47, 0x00, 0x58, 0xc1, 0x87, 0xe7, 0xe0, 0x09, 0x27, 0x0f, 0xac, + 0xa9, 0xcc, 0x89, 0x79, 0x0f, 0xac, 0xa1, 0xc9, 0xb2, 0xf3, 0x01, 0x4d, + 0x81, 0xc5, 0x01, 0x95, 0x01, 0x4d, 0x1b, 0x01, 0x87, 0xf6, 0xd2, 0x4a, + 0x3f, 0x01, 0x70, 0x89, 0xcd, 0x2c, 0xb2, 0x01, 0x71, 0x71, 0xc5, 0x0a, + 0x8a, 0x01, 0x72, 0x08, 0x9f, 0x01, 0x37, 0x09, 0x9e, 0x01, 0x37, 0x00, + 0xd1, 0x53, 0x54, 0x01, 0x33, 0xd1, 0x45, 0x1a, 0xad, 0x41, 0x87, 0xfc, + 0x87, 0x05, 0x4a, 0x4b, 0x01, 0x88, 0x26, 0x03, 0xc1, 0x88, 0x2e, 0x91, + 0x05, 0x4a, 0x59, 0x97, 0x05, 0x4a, 0x41, 0x8b, 0x05, 0x4a, 0x38, 0x89, + 0x05, 0x4a, 0x78, 0x1b, 0xc1, 0x88, 0x36, 0xc2, 0x0e, 0x9a, 0x05, 0x4a, + 0x21, 0x09, 0xc1, 0x88, 0x40, 0x83, 0x05, 0x49, 0xa8, 0xc2, 0x01, 0x5d, + 0x05, 0x4a, 0x11, 0x83, 0x05, 0x49, 0xc0, 0x07, 0xc1, 0x88, 0x4a, 0xd5, + 0x32, 0x18, 0x01, 0x3e, 0x31, 0xcd, 0x25, 0xae, 0x00, 0x02, 0xeb, 0x01, + 0x88, 0x56, 0x0b, 0xc1, 0x88, 0x5a, 0x42, 0x00, 0x67, 0xc1, 0x88, 0x66, + 0xd3, 0x1f, 0xcd, 0x01, 0x70, 0x18, 0x10, 0xc1, 0x88, 0x75, 0x14, 0x41, + 0x88, 0x7f, 0xc9, 0x9b, 0x77, 0x01, 0x3e, 0xb1, 0x43, 0x02, 0x6f, 0xc1, + 0x88, 0x8b, 0xcf, 0x63, 0x5a, 0x0f, 0xdd, 0xe0, 0x43, 0x01, 0xd0, 0xc1, + 0x88, 0x97, 0xd5, 0x36, 0xb0, 0x0f, 0xab, 0xe8, 0xc7, 0xc9, 0xb2, 0x01, + 0x1d, 0xc9, 0xcd, 0x77, 0xfc, 0x01, 0x71, 0x08, 0xcc, 0x00, 0x33, 0x00, + 0x03, 0xeb, 0x01, 0x88, 0xaf, 0xc6, 0xb7, 0x3b, 0x01, 0x18, 0x49, 0xcd, + 0x69, 0x65, 0x01, 0x80, 0x68, 0x00, 0x41, 0x88, 0xb3, 0xc4, 0x20, 0xe6, + 0x01, 0x18, 0x59, 0x0b, 0x41, 0x88, 0xc5, 0x14, 0xc1, 0x88, 0xd1, 0xc3, + 0x00, 0x3a, 0x01, 0x15, 0x11, 0x0a, 0xc1, 0x88, 0xdd, 0xd5, 0x08, 0x89, + 0x01, 0x80, 0xa8, 0x45, 0x00, 0x5a, 0xc1, 0x88, 0xef, 0xd9, 0x1f, 0xc7, + 0x01, 0x70, 0x28, 0xcb, 0x8a, 0x0a, 0x01, 0x4e, 0xc9, 0x45, 0x01, 0xfd, + 0x41, 0x89, 0x05, 0xd6, 0x08, 0x88, 0x01, 0x4c, 0xc1, 0xd2, 0x21, 0x89, + 0x01, 0x80, 0x88, 0xca, 0x01, 0xfd, 0x01, 0x0f, 0x43, 0x01, 0x89, 0x21, + 0xc9, 0xb0, 0x6b, 0x01, 0x0c, 0xe8, 0x42, 0x00, 0x2c, 0xc1, 0x89, 0x25, + 0x42, 0x02, 0xa0, 0xc1, 0x89, 0x31, 0xd5, 0x37, 0xc1, 0x0f, 0xc5, 0x18, + 0xcf, 0x5b, 0xc3, 0x0f, 0xc2, 0x91, 0x42, 0x00, 0xe3, 0x41, 0x89, 0x3d, + 0x45, 0x11, 0x3a, 0xc1, 0x89, 0x49, 0x03, 0x41, 0x89, 0x55, 0x00, 0xc1, + 0x89, 0x61, 0xc5, 0x14, 0xa5, 0x01, 0x48, 0xd0, 0xcb, 0x82, 0xba, 0x01, + 0x0f, 0x11, 0x46, 0x00, 0x59, 0x41, 0x89, 0x7e, 0xc5, 0xca, 0xa4, 0x0f, + 0xb3, 0x71, 0xd7, 0x2a, 0x6b, 0x0f, 0xc5, 0x28, 0x45, 0x04, 0x90, 0xc1, + 0x89, 0x8d, 0xd8, 0x23, 0xdb, 0x0f, 0xc5, 0x09, 0xdf, 0x0c, 0x65, 0x0f, + 0xc5, 0x48, 0xd0, 0x56, 0xda, 0x0f, 0xc1, 0xb1, 0xe0, 0x01, 0xe7, 0x0f, + 0xc5, 0x58, 0xd0, 0x5a, 0x22, 0x0f, 0xa8, 0x71, 0xcd, 0x0b, 0x91, 0x01, + 0x19, 0x51, 0xd4, 0x3b, 0x9c, 0x01, 0x4f, 0xe9, 0xdb, 0x18, 0x39, 0x00, + 0x05, 0xd8, 0xdc, 0x14, 0x4d, 0x01, 0x3d, 0x49, 0xd7, 0x29, 0xe1, 0x01, + 0x49, 0xc0, 0xc7, 0x00, 0xfa, 0x01, 0x03, 0x39, 0xc8, 0xb6, 0xca, 0x01, + 0x01, 0x71, 0xc9, 0xb3, 0x9e, 0x01, 0x01, 0x59, 0xc4, 0x01, 0xc3, 0x01, + 0x00, 0x78, 0xd6, 0x2d, 0x4c, 0x00, 0x2c, 0x69, 0xc4, 0xb9, 0x3c, 0x0f, + 0xc8, 0xe1, 0xcb, 0x8f, 0xf7, 0x00, 0x7e, 0xaa, 0x01, 0x89, 0x99, 0xc4, + 0x00, 0x49, 0x01, 0x5d, 0x81, 0xc5, 0x00, 0x2c, 0x01, 0x5d, 0x88, 0xc4, + 0x00, 0x49, 0x01, 0x5d, 0x91, 0xc5, 0x00, 0x2c, 0x01, 0x5d, 0x98, 0xc2, + 0x02, 0xae, 0x01, 0x5d, 0xa1, 0xc4, 0x03, 0xc8, 0x01, 0x5d, 0xb0, 0xc2, + 0x02, 0xae, 0x01, 0x5d, 0xa9, 0xc4, 0x03, 0xc8, 0x01, 0x5d, 0xb8, 0xc7, + 0xc9, 0x42, 0x0f, 0x9d, 0x11, 0xc5, 0xdb, 0x41, 0x0f, 0xb7, 0xe0, 0xc6, + 0xd0, 0x2b, 0x0f, 0x93, 0x21, 0xc2, 0x00, 0x59, 0x0f, 0x93, 0x10, 0x00, + 0x41, 0x89, 0x9f, 0x0b, 0xc1, 0x89, 0xb1, 0xc3, 0x09, 0x9e, 0x01, 0x0b, + 0x18, 0xc2, 0x22, 0xcc, 0x01, 0x0b, 0x2b, 0x01, 0x89, 0xc3, 0xc4, 0x18, + 0x10, 0x01, 0x0b, 0x30, 0xc2, 0x00, 0xc4, 0x01, 0x0b, 0x4b, 0x01, 0x89, + 0xc9, 0x19, 0xc1, 0x89, 0xcf, 0xc4, 0x02, 0xde, 0x01, 0x0b, 0x10, 0xc5, + 0x66, 0xb1, 0x01, 0x0b, 0x51, 0xc4, 0x00, 0x2d, 0x01, 0x0b, 0x38, 0x42, + 0x09, 0x40, 0xc1, 0x89, 0xd9, 0xcb, 0x9a, 0x05, 0x08, 0x0c, 0x91, 0xcd, + 0x7a, 0xd4, 0x08, 0x0c, 0xc0, 0x46, 0x00, 0x8b, 0x41, 0x89, 0xe5, 0xc6, + 0x02, 0xe9, 0x0f, 0x8b, 0x61, 0xc6, 0x42, 0xd4, 0x0f, 0x8b, 0x59, 0xc6, + 0x5c, 0x5b, 0x0f, 0x8b, 0x50, 0xd8, 0x21, 0x3b, 0x01, 0x70, 0x38, 0xc5, + 0x06, 0x67, 0x08, 0x73, 0xe9, 0xc7, 0x08, 0x79, 0x08, 0x73, 0xe1, 0xc4, + 0x01, 0xce, 0x08, 0x73, 0xd8, 0xc8, 0x0d, 0x03, 0x08, 0x73, 0xd1, 0xc2, + 0x0d, 0x10, 0x08, 0x73, 0x88, 0xc8, 0x0d, 0x03, 0x08, 0x73, 0xc9, 0x9b, + 0x08, 0x73, 0x80, 0x44, 0x18, 0x10, 0xc1, 0x89, 0xf1, 0x42, 0x22, 0xcc, + 0x41, 0x89, 0xfd, 0x0b, 0xc1, 0x8a, 0x09, 0x11, 0x41, 0x8a, 0x15, 0x0a, + 0xc1, 0x8a, 0x21, 0x19, 0xc1, 0x8a, 0x2d, 0xc2, 0x00, 0xc4, 0x08, 0x73, + 0x48, 0xc4, 0x18, 0x10, 0x08, 0x73, 0x31, 0xc2, 0x22, 0xcc, 0x08, 0x73, + 0x28, 0xc3, 0x0d, 0x14, 0x08, 0x73, 0x21, 0xc3, 0x09, 0x9e, 0x08, 0x73, + 0x18, 0xc4, 0x02, 0xde, 0x08, 0x73, 0x11, 0xc2, 0x02, 0xa0, 0x08, 0x73, + 0x08, 0x08, 0xc1, 0x8a, 0x39, 0x91, 0x00, 0xb5, 0x73, 0x01, 0x8a, 0x45, + 0x15, 0xc1, 0x8a, 0x63, 0x8d, 0x00, 0xb7, 0x8b, 0x01, 0x8a, 0x7c, 0x9a, + 0x00, 0xb7, 0x51, 0x93, 0x00, 0xb7, 0x49, 0x0b, 0xc1, 0x8a, 0x82, 0x0e, + 0xc1, 0x8a, 0xa3, 0x85, 0x00, 0xb6, 0x6b, 0x01, 0x8a, 0xaf, 0x87, 0x00, + 0xb6, 0x13, 0x01, 0x8a, 0xbf, 0x86, 0x00, 0xb6, 0x8b, 0x01, 0x8a, 0xd7, + 0xcc, 0x84, 0xe1, 0x00, 0xb6, 0xb9, 0xd8, 0x25, 0x2b, 0x00, 0xb6, 0x91, + 0x16, 0xc1, 0x8a, 0xe3, 0x9c, 0x00, 0xb6, 0x71, 0x03, 0xc1, 0x8a, 0xef, + 0xcf, 0x60, 0xe4, 0x00, 0xb6, 0x41, 0x89, 0x00, 0xb5, 0xab, 0x01, 0x8b, + 0x07, 0xc7, 0xc7, 0xf2, 0x00, 0xb6, 0x19, 0xd1, 0x57, 0x0c, 0x00, 0xb5, + 0xf1, 0x42, 0x00, 0xd0, 0xc1, 0x8b, 0x11, 0x99, 0x00, 0xb5, 0x2b, 0x01, + 0x8b, 0x1d, 0xd0, 0x5d, 0x82, 0x00, 0xb5, 0x89, 0x9b, 0x00, 0xb5, 0x23, + 0x01, 0x8b, 0x23, 0xc9, 0xb4, 0x88, 0x00, 0xb5, 0x11, 0x98, 0x00, 0xb5, + 0x08, 0xa1, 0x70, 0x0c, 0x49, 0xa0, 0x70, 0x0c, 0x41, 0xa6, 0x70, 0x0c, + 0x71, 0xa5, 0x70, 0x0c, 0x69, 0xa4, 0x70, 0x0c, 0x61, 0xa3, 0x70, 0x0c, + 0x59, 0xa2, 0x70, 0x0c, 0x51, 0x9f, 0x70, 0x0c, 0x39, 0x9e, 0x70, 0x0c, + 0x31, 0x9d, 0x70, 0x0c, 0x28, 0xa0, 0x70, 0x0b, 0x01, 0x9f, 0x70, 0x0a, + 0xf9, 0x9e, 0x70, 0x0a, 0xf1, 0x9d, 0x70, 0x0a, 0xe9, 0xa6, 0x70, 0x0b, + 0x31, 0xa5, 0x70, 0x0b, 0x29, 0xa4, 0x70, 0x0b, 0x21, 0xa3, 0x70, 0x0b, + 0x19, 0xa2, 0x70, 0x0b, 0x11, 0xa1, 0x70, 0x0b, 0x08, 0xa6, 0x70, 0x0a, + 0xe1, 0xa5, 0x70, 0x0a, 0xd9, 0xa4, 0x70, 0x0a, 0xd1, 0xa3, 0x70, 0x0a, + 0xc9, 0xa2, 0x70, 0x0a, 0xc1, 0xa1, 0x70, 0x0a, 0xb9, 0xa0, 0x70, 0x0a, + 0xb1, 0x9f, 0x70, 0x0a, 0xa9, 0x9e, 0x70, 0x0a, 0xa1, 0x9d, 0x70, 0x0a, + 0x98, 0xa6, 0x70, 0x0d, 0xb1, 0xa5, 0x70, 0x0d, 0xa9, 0xa4, 0x70, 0x0d, + 0xa1, 0xa3, 0x70, 0x0d, 0x99, 0xa2, 0x70, 0x0d, 0x91, 0xa1, 0x70, 0x0d, + 0x89, 0xa0, 0x70, 0x0d, 0x81, 0x9f, 0x70, 0x0d, 0x79, 0x9e, 0x70, 0x0d, + 0x71, 0x9d, 0x70, 0x0d, 0x68, 0xa6, 0x70, 0x0d, 0x61, 0xa5, 0x70, 0x0d, + 0x59, 0xa4, 0x70, 0x0d, 0x51, 0xa3, 0x70, 0x0d, 0x49, 0xa2, 0x70, 0x0d, + 0x41, 0xa1, 0x70, 0x0d, 0x39, 0xa0, 0x70, 0x0d, 0x31, 0x9f, 0x70, 0x0d, + 0x29, 0x9e, 0x70, 0x0d, 0x21, 0x9d, 0x70, 0x0d, 0x18, 0xa6, 0x70, 0x0d, + 0x11, 0xa5, 0x70, 0x0d, 0x09, 0xa4, 0x70, 0x0d, 0x01, 0xa3, 0x70, 0x0c, + 0xf9, 0xa2, 0x70, 0x0c, 0xf1, 0xa1, 0x70, 0x0c, 0xe9, 0xa0, 0x70, 0x0c, + 0xe1, 0x9f, 0x70, 0x0c, 0xd9, 0x9e, 0x70, 0x0c, 0xd1, 0x9d, 0x70, 0x0c, + 0xc8, 0xa6, 0x70, 0x0c, 0xc1, 0xa5, 0x70, 0x0c, 0xb9, 0xa4, 0x70, 0x0c, + 0xb1, 0xa3, 0x70, 0x0c, 0xa9, 0xa2, 0x70, 0x0c, 0xa1, 0xa1, 0x70, 0x0c, + 0x99, 0xa0, 0x70, 0x0c, 0x91, 0x9f, 0x70, 0x0c, 0x89, 0x9e, 0x70, 0x0c, + 0x81, 0x9d, 0x70, 0x0c, 0x78, 0xa6, 0x70, 0x0c, 0x21, 0xa5, 0x70, 0x0c, + 0x19, 0xa4, 0x70, 0x0c, 0x11, 0xa3, 0x70, 0x0c, 0x09, 0xa2, 0x70, 0x0c, + 0x01, 0xa1, 0x70, 0x0b, 0xf9, 0xa0, 0x70, 0x0b, 0xf1, 0x9f, 0x70, 0x0b, + 0xe9, 0x9e, 0x70, 0x0b, 0xe1, 0x9d, 0x70, 0x0b, 0xd8, 0xa6, 0x70, 0x0b, + 0xd1, 0xa5, 0x70, 0x0b, 0xc9, 0xa4, 0x70, 0x0b, 0xc1, 0xa3, 0x70, 0x0b, + 0xb9, 0xa2, 0x70, 0x0b, 0xb1, 0xa1, 0x70, 0x0b, 0xa9, 0xa0, 0x70, 0x0b, + 0xa1, 0x9f, 0x70, 0x0b, 0x99, 0x9e, 0x70, 0x0b, 0x91, 0x9d, 0x70, 0x0b, + 0x88, 0xa6, 0x70, 0x0b, 0x81, 0xa5, 0x70, 0x0b, 0x79, 0xa4, 0x70, 0x0b, + 0x71, 0xa3, 0x70, 0x0b, 0x69, 0xa2, 0x70, 0x0b, 0x61, 0xa1, 0x70, 0x0b, + 0x59, 0xa0, 0x70, 0x0b, 0x51, 0x9f, 0x70, 0x0b, 0x49, 0x9e, 0x70, 0x0b, + 0x41, 0x9d, 0x70, 0x0b, 0x38, 0xa3, 0x70, 0x0f, 0x79, 0xa2, 0x70, 0x0f, + 0x71, 0xa1, 0x70, 0x0f, 0x69, 0xa0, 0x70, 0x0f, 0x61, 0x9f, 0x70, 0x0f, + 0x59, 0x9e, 0x70, 0x0f, 0x51, 0x9d, 0x70, 0x0f, 0x48, 0xa6, 0x70, 0x0f, + 0x41, 0xa5, 0x70, 0x0f, 0x39, 0xa4, 0x70, 0x0f, 0x31, 0xa3, 0x70, 0x0f, + 0x29, 0xa2, 0x70, 0x0f, 0x21, 0xa1, 0x70, 0x0f, 0x19, 0xa0, 0x70, 0x0f, + 0x11, 0x9f, 0x70, 0x0f, 0x09, 0x9e, 0x70, 0x0f, 0x01, 0x9d, 0x70, 0x0e, + 0xf8, 0xa6, 0x70, 0x0e, 0xf1, 0xa5, 0x70, 0x0e, 0xe9, 0xa4, 0x70, 0x0e, + 0xe1, 0xa3, 0x70, 0x0e, 0xd9, 0xa2, 0x70, 0x0e, 0xd1, 0xa1, 0x70, 0x0e, + 0xc9, 0xa0, 0x70, 0x0e, 0xc1, 0x9f, 0x70, 0x0e, 0xb9, 0x9e, 0x70, 0x0e, + 0xb1, 0x9d, 0x70, 0x0e, 0xa8, 0xa6, 0x70, 0x0e, 0xa1, 0xa5, 0x70, 0x0e, + 0x99, 0xa4, 0x70, 0x0e, 0x91, 0xa3, 0x70, 0x0e, 0x89, 0xa2, 0x70, 0x0e, + 0x81, 0xa1, 0x70, 0x0e, 0x79, 0xa0, 0x70, 0x0e, 0x71, 0x9f, 0x70, 0x0e, + 0x69, 0x9e, 0x70, 0x0e, 0x61, 0x9d, 0x70, 0x0e, 0x58, 0xa6, 0x70, 0x0e, + 0x51, 0xa5, 0x70, 0x0e, 0x49, 0xa4, 0x70, 0x0e, 0x41, 0xa3, 0x70, 0x0e, + 0x39, 0xa2, 0x70, 0x0e, 0x31, 0xa1, 0x70, 0x0e, 0x29, 0xa0, 0x70, 0x0e, + 0x21, 0x9f, 0x70, 0x0e, 0x19, 0x9e, 0x70, 0x0e, 0x11, 0x9d, 0x70, 0x0e, + 0x08, 0xa6, 0x70, 0x0e, 0x01, 0xa5, 0x70, 0x0d, 0xf9, 0xa4, 0x70, 0x0d, + 0xf1, 0xa3, 0x70, 0x0d, 0xe9, 0xa2, 0x70, 0x0d, 0xe1, 0xa1, 0x70, 0x0d, + 0xd9, 0xa0, 0x70, 0x0d, 0xd1, 0x9f, 0x70, 0x0d, 0xc9, 0x9e, 0x70, 0x0d, + 0xc1, 0x9d, 0x70, 0x0d, 0xb8, 0x87, 0x05, 0x2f, 0x0b, 0x01, 0x8b, 0x27, + 0x0a, 0xc1, 0x8b, 0x32, 0x19, 0xc1, 0x8b, 0x55, 0x12, 0xc1, 0x8b, 0x78, + 0x04, 0xc1, 0x8b, 0x92, 0x0f, 0xc1, 0x8b, 0xb0, 0x0d, 0xc1, 0x8b, 0xd4, + 0x09, 0xc1, 0x8b, 0xf5, 0x08, 0xc1, 0x8c, 0x13, 0x18, 0xc1, 0x8c, 0x2d, + 0x16, 0xc1, 0x8c, 0x47, 0x06, 0xc1, 0x8c, 0x65, 0x0e, 0xc1, 0x8c, 0x83, + 0x14, 0xc1, 0x8c, 0x9d, 0x10, 0xc1, 0x8c, 0xb7, 0x15, 0xc1, 0x8c, 0xe4, + 0x1c, 0xc1, 0x8d, 0x02, 0x05, 0xc1, 0x8d, 0x20, 0x0c, 0xc1, 0x8d, 0x3a, + 0x1b, 0xc1, 0x8d, 0x54, 0x8b, 0x05, 0x29, 0x23, 0x01, 0x8d, 0x6e, 0x83, + 0x05, 0x2a, 0x4b, 0x01, 0x8d, 0x72, 0x91, 0x05, 0x2d, 0xd3, 0x01, 0x8d, + 0x76, 0x97, 0x05, 0x2c, 0xaa, 0x01, 0x8d, 0x81, 0x08, 0xc1, 0x8d, 0x85, + 0x0d, 0xc1, 0x8d, 0x91, 0x16, 0xc1, 0x8d, 0x9d, 0xc3, 0xe6, 0x5f, 0x05, + 0x30, 0xb1, 0xc4, 0x10, 0xd0, 0x05, 0x30, 0xb9, 0x06, 0xc1, 0x8d, 0xaf, + 0xc4, 0x9d, 0xd8, 0x05, 0x30, 0xf8, 0xc2, 0x02, 0xa0, 0x05, 0x31, 0x11, + 0xc4, 0x02, 0xde, 0x05, 0x31, 0x18, 0xc3, 0x09, 0x9e, 0x05, 0x31, 0x21, + 0xc3, 0x0d, 0x14, 0x05, 0x31, 0x28, 0xc2, 0x22, 0xcc, 0x05, 0x31, 0x31, + 0xc4, 0x18, 0x10, 0x05, 0x31, 0x38, 0x9f, 0x0f, 0xdb, 0x81, 0xa0, 0x0f, + 0xdb, 0x89, 0xa1, 0x0f, 0xdb, 0x91, 0xa2, 0x0f, 0xdb, 0x99, 0xa3, 0x0f, + 0xdb, 0xa1, 0xa4, 0x0f, 0xdb, 0xa8, 0xd6, 0x30, 0x7a, 0x01, 0x3e, 0x51, + 0xd5, 0x38, 0x00, 0x01, 0x4e, 0x81, 0xd6, 0x30, 0x38, 0x01, 0x57, 0x11, + 0xd5, 0x34, 0xcd, 0x01, 0x57, 0x20, 0x00, 0x41, 0x8d, 0xb9, 0x42, 0x00, + 0x03, 0xc1, 0x8d, 0xc5, 0xcc, 0x89, 0x55, 0x0f, 0xb5, 0x31, 0xc4, 0x1e, + 0xc9, 0x01, 0x71, 0x78, 0xc4, 0x01, 0xc3, 0x01, 0x81, 0x8b, 0x01, 0x8d, + 0xd4, 0xd6, 0x31, 0x2a, 0x01, 0x81, 0x92, 0x01, 0x8d, 0xd8, 0x46, 0x0f, + 0x88, 0xc1, 0x8d, 0xde, 0xcb, 0x58, 0xc7, 0x0f, 0xbd, 0x31, 0x46, 0x01, + 0xfc, 0xc1, 0x8d, 0xea, 0xcf, 0x61, 0xd4, 0x0f, 0xb3, 0xe9, 0x15, 0xc1, + 0x8d, 0xf6, 0xd4, 0x3c, 0xb4, 0x0f, 0xbd, 0x98, 0xcc, 0x07, 0xc7, 0x01, + 0x16, 0xc9, 0xc9, 0x00, 0xca, 0x01, 0x16, 0xc0, 0xc7, 0xc2, 0xb2, 0x00, + 0xe7, 0xb9, 0xcb, 0x40, 0xe1, 0x00, 0xe7, 0x91, 0x48, 0x14, 0x39, 0x41, + 0x8e, 0x08, 0xd3, 0x40, 0xd9, 0x00, 0xe7, 0x99, 0xd3, 0x3f, 0xa9, 0x00, + 0xe7, 0x81, 0x50, 0x5f, 0x62, 0x41, 0x8e, 0x23, 0xc8, 0x74, 0xc4, 0x00, + 0xe7, 0x2b, 0x01, 0x8e, 0x2f, 0xc6, 0x74, 0xc6, 0x00, 0xe7, 0x1b, 0x01, + 0x8e, 0x35, 0xc7, 0x02, 0x40, 0x00, 0xe7, 0x10, 0x45, 0x00, 0x5a, 0xc1, + 0x8e, 0x3b, 0xc7, 0x0e, 0x70, 0x00, 0xe6, 0xe8, 0xc8, 0x9e, 0xe8, 0x00, + 0xe7, 0xc1, 0x43, 0x61, 0x97, 0x41, 0x8e, 0x47, 0xc5, 0x00, 0xd4, 0x00, + 0xe7, 0xa1, 0xc5, 0x05, 0x02, 0x00, 0xe6, 0xc0, 0xcf, 0x67, 0xce, 0x00, + 0xe6, 0xf9, 0xcd, 0x04, 0xfa, 0x00, 0xe6, 0xf1, 0xcd, 0x7d, 0x78, 0x00, + 0xe6, 0xd8, 0xce, 0x74, 0xbe, 0x00, 0xe6, 0xe1, 0xc6, 0xcd, 0xa9, 0x00, + 0xe6, 0x80, 0xdb, 0x17, 0xe8, 0x00, 0xe6, 0xbb, 0x01, 0x8e, 0x4d, 0xd3, + 0x02, 0x34, 0x00, 0xe6, 0xb1, 0xde, 0x0f, 0xf4, 0x00, 0xe6, 0xa8, 0xc2, + 0x00, 0x51, 0x08, 0x2b, 0x89, 0x87, 0x08, 0x2b, 0x90, 0x87, 0x08, 0x2b, + 0x99, 0xc2, 0x01, 0x7f, 0x08, 0x2b, 0xa0, 0x87, 0x08, 0x2b, 0xa9, 0xc2, + 0x01, 0x7f, 0x08, 0x2b, 0xb0, 0x8b, 0x08, 0x2b, 0xb8, 0xc2, 0x00, 0xd0, + 0x08, 0x2b, 0xe9, 0x83, 0x08, 0x2b, 0xe0, 0xc2, 0x1c, 0x52, 0x08, 0x2b, + 0xf8, 0xc2, 0x00, 0xdb, 0x08, 0x2c, 0x19, 0x83, 0x08, 0x2c, 0x10, 0x87, + 0x08, 0x2c, 0x29, 0xc2, 0x1c, 0x52, 0x08, 0x2c, 0x30, 0xc2, 0x01, 0x7f, + 0x08, 0x2c, 0x69, 0x87, 0x08, 0x2c, 0x60, 0x87, 0x08, 0x2c, 0x71, 0xc2, + 0x01, 0x7f, 0x08, 0x2c, 0x78, 0xc2, 0x00, 0x51, 0x08, 0x2c, 0xc1, 0x87, + 0x08, 0x2c, 0xc8, 0x87, 0x08, 0x2c, 0xd1, 0xc2, 0x01, 0x7f, 0x08, 0x2c, + 0xd8, 0x87, 0x08, 0x2c, 0xe1, 0xc2, 0x01, 0x7f, 0x08, 0x2c, 0xe8, 0x8b, + 0x08, 0x2c, 0xf0, 0x83, 0x08, 0x2d, 0x19, 0xc2, 0x00, 0xd0, 0x08, 0x2d, + 0x20, 0xc2, 0x1c, 0x52, 0x08, 0x2d, 0x30, 0x83, 0x08, 0x2d, 0x49, 0xc2, + 0x00, 0xdb, 0x08, 0x2d, 0x50, 0x87, 0x08, 0x2d, 0x61, 0xc2, 0x1c, 0x52, + 0x08, 0x2d, 0x68, 0x87, 0x08, 0x2d, 0x99, 0xc2, 0x01, 0x7f, 0x08, 0x2d, + 0xa0, 0x87, 0x08, 0x2d, 0xa9, 0xc2, 0x01, 0x7f, 0x08, 0x2d, 0xb0, 0xc7, + 0x3f, 0xe8, 0x01, 0x0a, 0xe9, 0xc6, 0xd3, 0x5b, 0x01, 0x0a, 0xd0, 0xc7, + 0x3f, 0xe8, 0x01, 0x0a, 0xe1, 0xc6, 0x9c, 0x06, 0x01, 0x0a, 0xb9, 0xc8, + 0x08, 0x79, 0x00, 0x05, 0xf0, 0xc6, 0x9c, 0x06, 0x01, 0x0a, 0xb1, 0xc6, + 0x8d, 0x4d, 0x01, 0x0a, 0xa0, 0xc4, 0x9d, 0x74, 0x01, 0x0a, 0xc9, 0xc6, + 0xcf, 0x29, 0x01, 0x0a, 0x80, 0xc4, 0x06, 0x68, 0x01, 0x0a, 0x99, 0xc4, + 0x0f, 0x1f, 0x01, 0x0a, 0x90, 0xca, 0x1f, 0x0e, 0x70, 0x03, 0x01, 0xcf, + 0x54, 0xbb, 0x70, 0x01, 0xf0, 0xc7, 0x80, 0x2f, 0x70, 0x02, 0xf9, 0x07, + 0xc1, 0x8e, 0x53, 0x45, 0x0b, 0x12, 0x41, 0x8e, 0x5f, 0xd0, 0x08, 0xf7, + 0x70, 0x02, 0xf1, 0x11, 0x41, 0x8e, 0x6b, 0x45, 0x00, 0x2d, 0xc1, 0x8e, + 0x77, 0xce, 0x61, 0xd5, 0x70, 0x02, 0xe0, 0xcb, 0x2c, 0xb4, 0x70, 0x01, + 0xf9, 0xcc, 0x01, 0xbb, 0x70, 0x01, 0x10, 0xca, 0x0e, 0xbe, 0x70, 0x01, + 0xe9, 0xcf, 0x0f, 0x0a, 0x70, 0x01, 0x08, 0xc8, 0x52, 0x00, 0x70, 0x01, + 0xd9, 0xc6, 0x27, 0x5e, 0x70, 0x01, 0x79, 0xc4, 0x40, 0x89, 0x70, 0x01, + 0x00, 0x45, 0x09, 0x98, 0xc1, 0x8e, 0x89, 0xca, 0x99, 0x61, 0x70, 0x01, + 0x20, 0xc8, 0x60, 0xf4, 0x70, 0x01, 0x59, 0xcb, 0x8e, 0x13, 0x70, 0x01, + 0x28, 0xc7, 0x0b, 0x00, 0x70, 0x01, 0x51, 0xc9, 0x2d, 0x85, 0x70, 0x01, + 0x39, 0xc8, 0x36, 0x21, 0x70, 0x01, 0x30, 0x97, 0x00, 0xbb, 0x99, 0x8b, + 0x00, 0xbb, 0x90, 0xc2, 0x0d, 0xf6, 0x00, 0xbb, 0x81, 0xc2, 0x01, 0x4a, + 0x00, 0xbb, 0x79, 0xc2, 0x00, 0xdb, 0x00, 0xbb, 0x71, 0xc2, 0x19, 0x2c, + 0x00, 0xbb, 0x61, 0xc2, 0x01, 0xc3, 0x00, 0xbb, 0x59, 0xc2, 0x01, 0x5d, + 0x00, 0xbb, 0x51, 0xc2, 0x00, 0xb0, 0x00, 0xbb, 0x49, 0x10, 0xc1, 0x8e, + 0xad, 0xc2, 0x0e, 0x9a, 0x00, 0xbb, 0x39, 0xc2, 0x01, 0x6f, 0x00, 0xbb, + 0x31, 0xc2, 0x01, 0x30, 0x00, 0xbb, 0x21, 0xc2, 0x02, 0x2b, 0x00, 0xbb, + 0x19, 0x97, 0x00, 0xbb, 0x11, 0x8b, 0x00, 0xbb, 0x09, 0x83, 0x00, 0xbb, + 0x00, 0x83, 0x00, 0xb8, 0x03, 0x01, 0x8e, 0xb7, 0xc2, 0x00, 0xd0, 0x00, + 0xb8, 0x89, 0xc2, 0x0d, 0xf6, 0x00, 0xb8, 0x81, 0xc2, 0x01, 0x4a, 0x00, + 0xb8, 0x79, 0xc2, 0x00, 0xdb, 0x00, 0xb8, 0x71, 0xc2, 0x00, 0x39, 0x00, + 0xb8, 0x69, 0xc2, 0x19, 0x2c, 0x00, 0xb8, 0x61, 0xc2, 0x01, 0xc3, 0x00, + 0xb8, 0x59, 0xc2, 0x01, 0x5d, 0x00, 0xb8, 0x51, 0xc2, 0x00, 0xb0, 0x00, + 0xb8, 0x49, 0x10, 0xc1, 0x8e, 0xbd, 0xc2, 0x0e, 0x9a, 0x00, 0xb8, 0x39, + 0xc2, 0x01, 0x6f, 0x00, 0xb8, 0x31, 0xc2, 0x01, 0x30, 0x00, 0xb8, 0x21, + 0xc2, 0x02, 0x2b, 0x00, 0xb8, 0x19, 0x97, 0x00, 0xb8, 0x11, 0x8b, 0x00, + 0xb8, 0x08, 0xc8, 0x7a, 0x8b, 0x00, 0xb8, 0xa9, 0xc6, 0x1e, 0x95, 0x00, + 0xb8, 0xa0, 0x97, 0x00, 0xb8, 0x99, 0x8b, 0x00, 0xb8, 0x90, 0x4a, 0xa3, + 0x3c, 0xc1, 0x8e, 0xc7, 0xce, 0x1c, 0x92, 0x0b, 0x7f, 0x00, 0x46, 0x09, + 0x97, 0xc1, 0x8e, 0xe7, 0x47, 0x02, 0x0e, 0x41, 0x8f, 0x0b, 0x44, 0x00, + 0xbb, 0xc1, 0x8f, 0x77, 0xd1, 0x55, 0xeb, 0x08, 0xff, 0x79, 0xc9, 0xaf, + 0x9c, 0x08, 0xff, 0x61, 0xcc, 0x8a, 0x69, 0x08, 0xff, 0x38, 0xc9, 0xab, + 0x0a, 0x08, 0xff, 0x69, 0x4b, 0x9a, 0x10, 0x41, 0x8f, 0x9f, 0xcb, 0x94, + 0xfe, 0x08, 0xff, 0x59, 0xcd, 0x73, 0x0d, 0x00, 0x5e, 0xb9, 0xcc, 0x8a, + 0x51, 0x00, 0x5f, 0xc0, 0xcb, 0x97, 0xea, 0x08, 0xff, 0x51, 0xca, 0x97, + 0xa9, 0x00, 0x5f, 0xb8, 0xc8, 0x42, 0xd2, 0x08, 0xff, 0x31, 0x46, 0x02, + 0x0f, 0x41, 0x8f, 0xab, 0xd3, 0x43, 0xf7, 0x08, 0xff, 0x29, 0x45, 0x09, + 0x98, 0xc1, 0x90, 0x12, 0xc7, 0xbf, 0xf6, 0x00, 0x5f, 0x99, 0xc9, 0xb0, + 0x59, 0x00, 0x5f, 0xb0, 0xd8, 0x25, 0x8b, 0x08, 0xfe, 0xa1, 0x46, 0x02, + 0xdd, 0xc1, 0x90, 0x36, 0x44, 0x05, 0x14, 0x41, 0x90, 0x4e, 0x03, 0xc1, + 0x90, 0x74, 0x8b, 0x00, 0x5d, 0xfb, 0x01, 0x90, 0x80, 0x97, 0x00, 0x5e, + 0x0b, 0x01, 0x90, 0x84, 0x87, 0x00, 0x5e, 0x33, 0x01, 0x90, 0x88, 0x91, + 0x00, 0x5e, 0x52, 0x01, 0x90, 0x8c, 0xc3, 0x09, 0x41, 0x00, 0x5f, 0x81, + 0x44, 0x05, 0x14, 0xc1, 0x90, 0x90, 0xc4, 0x00, 0xba, 0x00, 0x5f, 0xd0, + 0xc4, 0x26, 0x78, 0x08, 0xb6, 0x49, 0xc5, 0x06, 0xdb, 0x08, 0xb6, 0x41, + 0x15, 0xc1, 0x90, 0x9c, 0x08, 0xc1, 0x90, 0xa8, 0x16, 0xc1, 0x90, 0xb4, + 0xc3, 0x05, 0x14, 0x08, 0xb6, 0x09, 0xc4, 0x15, 0xe7, 0x08, 0xb6, 0x00, + 0x83, 0x08, 0xb4, 0x03, 0x01, 0x90, 0xc0, 0x14, 0xc1, 0x90, 0xd2, 0xc2, + 0x00, 0xd0, 0x08, 0xb5, 0x49, 0x15, 0xc1, 0x90, 0xdc, 0xc2, 0x02, 0x41, + 0x08, 0xb5, 0x31, 0xc2, 0x00, 0xdb, 0x08, 0xb5, 0x29, 0xc2, 0x19, 0x2c, + 0x08, 0xb5, 0x19, 0xc2, 0x01, 0xc3, 0x08, 0xb5, 0x11, 0x04, 0xc1, 0x90, + 0xe6, 0x12, 0xc1, 0x90, 0xf0, 0x10, 0xc1, 0x90, 0xfa, 0x06, 0xc1, 0x91, + 0x10, 0x16, 0xc1, 0x91, 0x1e, 0x0c, 0xc1, 0x91, 0x2c, 0x05, 0xc1, 0x91, + 0x36, 0x09, 0xc1, 0x91, 0x40, 0x0d, 0xc1, 0x91, 0x4a, 0x91, 0x08, 0xb4, + 0x41, 0x87, 0x08, 0xb4, 0x31, 0x97, 0x08, 0xb4, 0x23, 0x01, 0x91, 0x54, + 0x8b, 0x08, 0xb4, 0x12, 0x01, 0x91, 0x58, 0xc5, 0x33, 0x5d, 0x08, 0xb5, + 0xb9, 0x42, 0x07, 0xb2, 0xc1, 0x91, 0x5c, 0xc8, 0x14, 0x38, 0x08, 0xb5, + 0x58, 0x03, 0xc1, 0x91, 0x68, 0x91, 0x08, 0xb5, 0xa1, 0x87, 0x08, 0xb5, + 0x91, 0x97, 0x08, 0xb5, 0x83, 0x01, 0x91, 0x74, 0x8b, 0x08, 0xb5, 0x72, + 0x01, 0x91, 0x78, 0xc5, 0xde, 0x25, 0x00, 0xd5, 0x69, 0x0a, 0xc1, 0x91, + 0x7c, 0x42, 0x0d, 0xf6, 0xc1, 0x91, 0x88, 0x0d, 0xc1, 0x91, 0x9d, 0x44, + 0x38, 0x7e, 0xc1, 0x91, 0xb2, 0x14, 0xc1, 0x91, 0xc7, 0xc6, 0xca, 0xc7, + 0x00, 0xd5, 0x29, 0xc5, 0xdc, 0xcc, 0x00, 0xd5, 0x03, 0x01, 0x91, 0xd3, + 0x45, 0x28, 0xb1, 0x41, 0x91, 0xd9, 0xc4, 0x26, 0x78, 0x00, 0xd4, 0xc9, + 0xc5, 0x06, 0xdb, 0x00, 0xd4, 0xc1, 0x15, 0xc1, 0x91, 0xe1, 0x08, 0xc1, + 0x91, 0xed, 0x16, 0xc1, 0x91, 0xf9, 0xc3, 0x05, 0x14, 0x00, 0xd4, 0x89, + 0xc4, 0x15, 0xe7, 0x00, 0xd4, 0x80, 0xc4, 0x26, 0x78, 0x00, 0xd4, 0x49, + 0xc5, 0x06, 0xdb, 0x00, 0xd4, 0x41, 0x15, 0xc1, 0x92, 0x05, 0x08, 0xc1, + 0x92, 0x11, 0x16, 0xc1, 0x92, 0x1d, 0xc3, 0x05, 0x14, 0x00, 0xd4, 0x09, + 0xc4, 0x15, 0xe7, 0x00, 0xd4, 0x00, 0xd9, 0x1d, 0xd3, 0x00, 0xd3, 0xf9, + 0x4d, 0x30, 0x92, 0x41, 0x92, 0x29, 0x91, 0x00, 0xd3, 0x5b, 0x01, 0x92, + 0x49, 0x16, 0xc1, 0x92, 0x57, 0x83, 0x00, 0xd3, 0x0b, 0x01, 0x92, 0x63, + 0x87, 0x00, 0xd3, 0x71, 0x97, 0x00, 0xd3, 0x4b, 0x01, 0x92, 0x6f, 0x8b, + 0x00, 0xd3, 0x2b, 0x01, 0x92, 0x7a, 0xc7, 0xc2, 0xce, 0x00, 0xd3, 0x10, + 0xc8, 0xbd, 0xea, 0x00, 0xd2, 0xa1, 0x0e, 0xc1, 0x92, 0x7e, 0xc2, 0x01, + 0x24, 0x00, 0xd2, 0x91, 0xc2, 0x02, 0xe0, 0x00, 0xd2, 0x89, 0x97, 0x00, + 0xd2, 0x7b, 0x01, 0x92, 0x97, 0x8b, 0x00, 0xd2, 0x6b, 0x01, 0x92, 0x9b, + 0x83, 0x00, 0xd2, 0x59, 0x45, 0x08, 0xcb, 0xc1, 0x92, 0x9f, 0xc2, 0x01, + 0x4a, 0x00, 0xd2, 0x29, 0x14, 0xc1, 0x92, 0xcb, 0xc2, 0x01, 0xc3, 0x00, + 0xd1, 0xf1, 0xc2, 0x01, 0x5d, 0x00, 0xd1, 0xb9, 0x10, 0xc1, 0x92, 0xd8, + 0xc2, 0x0e, 0x9a, 0x00, 0xd1, 0x78, 0x44, 0x1a, 0xce, 0xc1, 0x92, 0xe8, + 0x15, 0xc1, 0x92, 0xfc, 0xc2, 0x00, 0xd0, 0x00, 0xca, 0xb9, 0x83, 0x00, + 0xca, 0xb0, 0x8b, 0x00, 0xcb, 0x69, 0xc2, 0x0f, 0xe1, 0x00, 0xcb, 0x60, + 0x8a, 0x00, 0xcb, 0x31, 0x87, 0x00, 0xcb, 0x28, 0x87, 0x00, 0xcb, 0x50, + 0x91, 0x00, 0xcb, 0x40, 0x83, 0x00, 0xcb, 0x11, 0xc2, 0x01, 0x30, 0x00, + 0xca, 0x90, 0xc2, 0x00, 0xd0, 0x00, 0xcb, 0x01, 0x83, 0x00, 0xca, 0x80, + 0xc2, 0x00, 0xd0, 0x00, 0xca, 0xd1, 0x83, 0x00, 0xca, 0xc8, 0x42, 0x00, + 0xe8, 0xc1, 0x93, 0x06, 0xc6, 0xd3, 0x49, 0x05, 0x56, 0xf1, 0xc3, 0x71, + 0xe5, 0x05, 0x56, 0xe9, 0xc5, 0xda, 0x2e, 0x05, 0x56, 0xe0, 0xc4, 0x7b, + 0x07, 0x05, 0x56, 0x11, 0xc3, 0x1c, 0xd6, 0x05, 0x56, 0x09, 0xc5, 0xda, + 0x2e, 0x05, 0x56, 0x01, 0xc2, 0x13, 0x4c, 0x05, 0x55, 0xf8, 0x03, 0xc1, + 0x93, 0x10, 0x97, 0x05, 0x55, 0xa3, 0x01, 0x93, 0x26, 0x8b, 0x05, 0x55, + 0x93, 0x01, 0x93, 0x31, 0x87, 0x05, 0x55, 0xa9, 0x91, 0x05, 0x55, 0xb0, + 0xc3, 0x01, 0x95, 0x05, 0x55, 0x81, 0xc3, 0x01, 0xfd, 0x05, 0x55, 0xb8, + 0x45, 0x08, 0xcb, 0xc1, 0x93, 0x35, 0x44, 0x05, 0x36, 0x41, 0x93, 0x8f, + 0xcb, 0x50, 0x7f, 0x01, 0x36, 0x51, 0xc8, 0xbd, 0x02, 0x01, 0x5e, 0x10, + 0xc6, 0x30, 0x98, 0x01, 0x18, 0xc9, 0x44, 0x06, 0x1f, 0x41, 0x93, 0xe9, + 0x46, 0x10, 0x29, 0xc1, 0x93, 0xf5, 0xc5, 0xce, 0x22, 0x01, 0x71, 0xc0, + 0xc6, 0xd2, 0x71, 0x01, 0x0a, 0x71, 0x52, 0x46, 0xb6, 0xc1, 0x94, 0x01, + 0x45, 0x1a, 0x38, 0xc1, 0x94, 0x0d, 0xc8, 0x52, 0x00, 0x01, 0x71, 0xa8, + 0xc8, 0x36, 0x21, 0x01, 0x0a, 0x59, 0xc4, 0x01, 0x96, 0x01, 0x4d, 0x10, + 0xc8, 0xbd, 0x0a, 0x01, 0x09, 0x91, 0xc4, 0x0a, 0x8b, 0x01, 0x71, 0x90, + 0xd0, 0x59, 0xe2, 0x01, 0x3e, 0x01, 0xce, 0x05, 0x19, 0x01, 0x02, 0xb0, + 0x50, 0x5a, 0x72, 0xc1, 0x94, 0x19, 0xcf, 0x65, 0x85, 0x01, 0x59, 0x88, + 0xd0, 0x27, 0x1f, 0x01, 0x0f, 0xb1, 0x44, 0x39, 0xfd, 0x41, 0x94, 0x25, + 0x4c, 0x89, 0x85, 0xc1, 0x94, 0x3d, 0x4b, 0x95, 0x35, 0xc1, 0x94, 0x49, + 0x43, 0x07, 0x6e, 0xc1, 0x94, 0x4f, 0x4c, 0x80, 0x91, 0x41, 0x94, 0x55, + 0x15, 0xc1, 0x94, 0x5b, 0xcb, 0x58, 0xc7, 0x0f, 0xbd, 0x08, 0xce, 0x73, + 0x7c, 0x01, 0x10, 0x21, 0xc6, 0xd3, 0xc7, 0x01, 0x10, 0x18, 0xc8, 0xb8, + 0xba, 0x00, 0x3d, 0x79, 0xc6, 0xcb, 0xc3, 0x00, 0x3d, 0x71, 0xc8, 0xbb, + 0x1a, 0x00, 0x3d, 0x58, 0xc8, 0xb8, 0x32, 0x00, 0x3d, 0x49, 0xc6, 0xcc, + 0x5f, 0x00, 0x3d, 0x61, 0xc8, 0xb6, 0x52, 0x00, 0x3d, 0x68, 0xc8, 0xb8, + 0xaa, 0x00, 0x3d, 0x39, 0xc6, 0xcd, 0x01, 0x00, 0x3d, 0x30, 0xc5, 0xda, + 0xec, 0x00, 0x3d, 0x29, 0xc5, 0xd8, 0x3a, 0x00, 0x3d, 0x21, 0x09, 0xc1, + 0x94, 0x67, 0x16, 0xc1, 0x94, 0x79, 0x06, 0xc1, 0x94, 0x92, 0x15, 0xc1, + 0x94, 0x9c, 0x0a, 0xc1, 0x94, 0xac, 0xc9, 0xb4, 0xd9, 0x00, 0x3c, 0xb9, + 0xc8, 0xb7, 0x22, 0x00, 0x3c, 0xb1, 0xc8, 0xbd, 0x92, 0x00, 0x3c, 0xa9, + 0xc3, 0xa9, 0x9c, 0x00, 0x3c, 0xa1, 0x1c, 0xc1, 0x94, 0xb8, 0x0e, 0xc1, + 0x94, 0xc0, 0xc5, 0xde, 0x7a, 0x00, 0x3c, 0x51, 0xc5, 0xdb, 0x00, 0x00, + 0x3c, 0x49, 0xc5, 0xd8, 0xd0, 0x00, 0x3c, 0x41, 0x03, 0xc1, 0x94, 0xcc, + 0x0d, 0xc1, 0x94, 0xd8, 0xc3, 0x47, 0x81, 0x00, 0x3c, 0x21, 0xc3, 0x47, + 0xd9, 0x00, 0x3c, 0x19, 0x10, 0x41, 0x94, 0xe4, 0x49, 0x3b, 0x93, 0xc1, + 0x94, 0xf0, 0xd3, 0x44, 0x0a, 0x00, 0x71, 0xf8, 0xc4, 0x15, 0xe7, 0x00, + 0x72, 0x81, 0xc3, 0x05, 0x14, 0x00, 0x72, 0x89, 0x16, 0xc1, 0x95, 0x44, + 0x08, 0xc1, 0x95, 0x50, 0x15, 0xc1, 0x95, 0x5c, 0xc5, 0x06, 0xdb, 0x00, + 0x72, 0xc1, 0xc4, 0x26, 0x78, 0x00, 0x72, 0xc8, 0xc8, 0x1e, 0x3f, 0x01, + 0x19, 0x01, 0xcc, 0x85, 0x71, 0x01, 0x5e, 0x51, 0xcc, 0x83, 0x19, 0x01, + 0x71, 0xc9, 0xd0, 0x1d, 0xec, 0x01, 0x72, 0xc9, 0xd1, 0x1a, 0x4a, 0x01, + 0x72, 0xd0, 0xc5, 0x13, 0x67, 0x01, 0x18, 0xe9, 0xc3, 0x0a, 0xea, 0x01, + 0x18, 0x70, 0xc5, 0x13, 0x67, 0x01, 0x18, 0xe1, 0xc3, 0x0a, 0xea, 0x01, + 0x18, 0x78, 0xca, 0xa1, 0xb6, 0x01, 0x49, 0xe8, 0x83, 0x0f, 0x15, 0x6b, + 0x01, 0x95, 0x68, 0x04, 0xc1, 0x95, 0x6c, 0x91, 0x0f, 0x15, 0x51, 0x87, + 0x0f, 0x15, 0x33, 0x01, 0x95, 0x76, 0x97, 0x0f, 0x15, 0x29, 0x8b, 0x0f, + 0x15, 0x0b, 0x01, 0x95, 0x7a, 0xc2, 0x00, 0xdb, 0x0f, 0x15, 0x01, 0xc2, + 0x00, 0x39, 0x0f, 0x14, 0xf9, 0xc2, 0x00, 0xd0, 0x0f, 0x14, 0xf1, 0xc2, + 0x25, 0x3b, 0x0f, 0x14, 0xe9, 0xc2, 0x01, 0x4a, 0x0f, 0x14, 0xe1, 0xc2, + 0x19, 0x2c, 0x0f, 0x14, 0xd9, 0xc3, 0x1c, 0x63, 0x0f, 0x14, 0xd1, 0xc2, + 0x0d, 0xf6, 0x0f, 0x14, 0xc9, 0x10, 0xc1, 0x95, 0x7e, 0xc2, 0x01, 0xc3, + 0x0f, 0x14, 0xb1, 0xc2, 0x01, 0x30, 0x0f, 0x14, 0xa9, 0xc2, 0x02, 0x2b, + 0x0f, 0x14, 0xa1, 0xc2, 0x0e, 0x9a, 0x0f, 0x14, 0x99, 0xc2, 0x01, 0x6f, + 0x0f, 0x14, 0x91, 0xc2, 0x00, 0xb0, 0x0f, 0x14, 0x80, 0xc2, 0xe6, 0x7d, + 0x0f, 0x92, 0x09, 0xc2, 0x8c, 0x54, 0x0f, 0x92, 0x10, 0xc3, 0xe5, 0x81, + 0x0f, 0x92, 0x41, 0xc3, 0xe6, 0x59, 0x0f, 0x92, 0x29, 0xc3, 0xe5, 0xa5, + 0x0f, 0x92, 0x00, 0xc3, 0xe6, 0x6b, 0x0f, 0x92, 0x39, 0xc3, 0xe5, 0x3f, + 0x0f, 0x92, 0x18, 0xc3, 0xe5, 0x54, 0x0f, 0x92, 0x31, 0xc3, 0xe5, 0xe4, + 0x0f, 0x92, 0x20, 0xd8, 0x03, 0xaf, 0x01, 0x3c, 0xe9, 0x46, 0x00, 0x8b, + 0x41, 0x95, 0x88, 0xc6, 0x1c, 0xb4, 0x01, 0x01, 0x19, 0xc5, 0xcd, 0xce, + 0x0f, 0xa6, 0x81, 0xcc, 0x87, 0x69, 0x0f, 0xb5, 0x48, 0xc4, 0x03, 0xd7, + 0x01, 0x31, 0xa9, 0xc3, 0x02, 0x34, 0x01, 0x31, 0xa0, 0xcf, 0x05, 0x98, + 0x01, 0x15, 0x51, 0xc9, 0x32, 0x24, 0x01, 0x4c, 0x01, 0xcf, 0x27, 0x65, + 0x01, 0x57, 0xa1, 0xd6, 0x30, 0x7a, 0x01, 0x57, 0xa8, 0xc4, 0x18, 0x26, + 0x01, 0x01, 0xa1, 0xc3, 0x25, 0xd6, 0x01, 0x4f, 0xd8, 0xd6, 0x2d, 0x62, + 0x01, 0x53, 0x41, 0xd6, 0x2c, 0x2e, 0x01, 0x53, 0x48, 0xc9, 0x00, 0xca, + 0x01, 0x57, 0xb9, 0xcc, 0x07, 0xc7, 0x01, 0x57, 0xc0, 0xc5, 0xc3, 0x08, + 0x0f, 0x9b, 0xc9, 0xc4, 0x55, 0x81, 0x0f, 0xa1, 0x00, 0xc7, 0xc8, 0x70, + 0x0e, 0x9a, 0xb1, 0xc7, 0xb6, 0x0b, 0x0e, 0x98, 0xc0, 0xc4, 0x1d, 0xa8, + 0x0e, 0x99, 0x59, 0xc7, 0x05, 0x79, 0x0e, 0x98, 0x38, 0xc7, 0xca, 0x37, + 0x0e, 0x9a, 0xa9, 0xca, 0xa3, 0x32, 0x0e, 0x99, 0x68, 0xca, 0x9b, 0xe4, + 0x0e, 0x9a, 0xa1, 0x0f, 0xc1, 0x95, 0xa0, 0xc8, 0xbc, 0xd2, 0x0e, 0x98, + 0x80, 0xc7, 0xb1, 0x21, 0x0e, 0x9a, 0x39, 0xca, 0xa6, 0x20, 0x0e, 0x99, + 0x11, 0xd9, 0x1d, 0xa1, 0x0e, 0x98, 0x78, 0x43, 0x5e, 0x7a, 0xc1, 0x95, + 0xac, 0x10, 0x41, 0x95, 0xb8, 0xc3, 0x14, 0xc8, 0x0e, 0x9a, 0x79, 0x07, + 0x41, 0x95, 0xc2, 0x11, 0xc1, 0x95, 0xce, 0xc6, 0xca, 0xd3, 0x0e, 0x99, + 0x48, 0xc9, 0xab, 0x5b, 0x0e, 0x99, 0x99, 0xc8, 0xba, 0xba, 0x0e, 0x99, + 0x81, 0xc7, 0xc4, 0xc6, 0x0e, 0x98, 0xf8, 0xc3, 0x01, 0xd2, 0x0e, 0x99, + 0xf8, 0x15, 0xc1, 0x95, 0xda, 0xc5, 0xd9, 0x93, 0x0e, 0x98, 0xd1, 0xc3, + 0x29, 0x43, 0x0e, 0x98, 0xa0, 0xc5, 0x83, 0x4f, 0x0e, 0x99, 0xa1, 0xc5, + 0x5b, 0x25, 0x0e, 0x99, 0x20, 0xd7, 0x28, 0xfb, 0x01, 0x3d, 0xd1, 0xcf, + 0x15, 0x36, 0x01, 0x39, 0xd8, 0xcd, 0x7f, 0x59, 0x01, 0x38, 0x31, 0x43, + 0x05, 0xbb, 0xc1, 0x95, 0xe4, 0xc4, 0x00, 0xba, 0x01, 0x09, 0x09, 0xcf, + 0x62, 0x01, 0x0f, 0xac, 0x00, 0x05, 0xc1, 0x95, 0xf3, 0x03, 0xc1, 0x95, + 0xff, 0x42, 0x07, 0xb2, 0xc1, 0x96, 0x0b, 0xc5, 0x33, 0x5d, 0x00, 0x61, + 0xe1, 0xc7, 0xc3, 0x61, 0x00, 0x63, 0xb9, 0xc5, 0xdc, 0x40, 0x00, 0x63, + 0xf8, 0x45, 0x02, 0x10, 0xc1, 0x96, 0x17, 0xc9, 0x36, 0x53, 0x00, 0x62, + 0xa8, 0x03, 0xc1, 0x96, 0x80, 0x8b, 0x00, 0x61, 0xfb, 0x01, 0x96, 0x8c, + 0x97, 0x00, 0x62, 0x0b, 0x01, 0x96, 0x90, 0x48, 0xb2, 0x2d, 0xc1, 0x96, + 0x94, 0x87, 0x00, 0x62, 0x33, 0x01, 0x96, 0xa2, 0x91, 0x00, 0x62, 0x52, + 0x01, 0x96, 0xa6, 0xc4, 0x15, 0xe7, 0x00, 0x63, 0x31, 0xc3, 0x05, 0x14, + 0x00, 0x63, 0x39, 0x16, 0xc1, 0x96, 0xaa, 0x08, 0xc1, 0x96, 0xb6, 0x15, + 0xc1, 0x96, 0xc2, 0xc5, 0x06, 0xdb, 0x00, 0x63, 0x71, 0xc4, 0x26, 0x78, + 0x00, 0x63, 0x78, 0xdb, 0x15, 0xe7, 0x00, 0x63, 0xc1, 0x48, 0xb5, 0xca, + 0xc1, 0x96, 0xce, 0x16, 0x41, 0x96, 0xda, 0x00, 0x41, 0x96, 0xe6, 0xca, + 0x9e, 0xe6, 0x01, 0x70, 0xd9, 0x44, 0x05, 0x18, 0x41, 0x96, 0xf2, 0xc4, + 0x26, 0x78, 0x08, 0xa6, 0xc9, 0xc5, 0x06, 0xdb, 0x08, 0xa6, 0xc1, 0x15, + 0xc1, 0x96, 0xfe, 0x08, 0xc1, 0x97, 0x0a, 0x16, 0xc1, 0x97, 0x16, 0xc3, + 0x05, 0x14, 0x08, 0xa6, 0x89, 0xc4, 0x15, 0xe7, 0x08, 0xa6, 0x80, 0xd0, + 0x50, 0xcf, 0x08, 0xa6, 0x31, 0xc3, 0x7c, 0x50, 0x08, 0xa4, 0x00, 0x03, + 0xc1, 0x97, 0x22, 0xc5, 0x33, 0x5d, 0x08, 0xa6, 0x19, 0xcb, 0x1e, 0x89, + 0x08, 0xa5, 0xf9, 0x42, 0x07, 0xb2, 0x41, 0x97, 0x2e, 0x03, 0xc1, 0x97, + 0x3a, 0x46, 0x2e, 0xee, 0xc1, 0x97, 0x46, 0x91, 0x08, 0xa5, 0xe1, 0x87, + 0x08, 0xa5, 0xc9, 0x48, 0xb2, 0x2d, 0xc1, 0x97, 0x4e, 0x97, 0x08, 0xa5, + 0x9b, 0x01, 0x97, 0x5c, 0x8b, 0x08, 0xa5, 0x8a, 0x01, 0x97, 0x60, 0xc2, + 0x00, 0xd0, 0x08, 0xa5, 0x79, 0x15, 0xc1, 0x97, 0x64, 0x18, 0xc1, 0x97, + 0x74, 0xc2, 0x00, 0xdb, 0x08, 0xa5, 0x51, 0xc2, 0x00, 0x39, 0x08, 0xa5, + 0x49, 0xc2, 0x19, 0x2c, 0x08, 0xa5, 0x41, 0xc2, 0x01, 0xc3, 0x08, 0xa5, + 0x39, 0x04, 0xc1, 0x97, 0x7e, 0x12, 0xc1, 0x97, 0x88, 0x10, 0xc1, 0x97, + 0x92, 0x06, 0xc1, 0x97, 0xa8, 0x16, 0xc1, 0x97, 0xb6, 0x0c, 0xc1, 0x97, + 0xc4, 0x05, 0xc1, 0x97, 0xce, 0x09, 0xc1, 0x97, 0xd8, 0x0d, 0xc1, 0x97, + 0xe2, 0x83, 0x08, 0xa4, 0x0b, 0x01, 0x97, 0xec, 0x91, 0x08, 0xa4, 0x69, + 0x87, 0x08, 0xa4, 0x59, 0x97, 0x08, 0xa4, 0x2b, 0x01, 0x97, 0xf8, 0x8b, + 0x08, 0xa4, 0x1a, 0x01, 0x97, 0xfc, 0xc9, 0xae, 0x7c, 0x00, 0x78, 0x01, + 0x45, 0x10, 0x7a, 0x41, 0x98, 0x00, 0x14, 0xc1, 0x98, 0x1c, 0x42, 0x19, + 0x2c, 0xc1, 0x98, 0x2e, 0x0f, 0xc1, 0x98, 0x3a, 0xce, 0x70, 0x50, 0x00, + 0x7c, 0x11, 0xc8, 0xbb, 0x42, 0x00, 0x7c, 0x19, 0x42, 0x58, 0x61, 0xc1, + 0x98, 0x46, 0x44, 0xe0, 0x6f, 0xc1, 0x98, 0x52, 0xd1, 0x4f, 0x9c, 0x00, + 0x7c, 0x60, 0x45, 0x00, 0xba, 0xc1, 0x98, 0x5e, 0x47, 0x02, 0x0e, 0x41, + 0x98, 0x70, 0x44, 0x02, 0x11, 0xc1, 0x98, 0xd2, 0x4b, 0x8f, 0xec, 0x41, + 0x98, 0xde, 0x46, 0x10, 0xb6, 0xc1, 0x98, 0xea, 0xd1, 0x56, 0xfb, 0x00, + 0x78, 0x58, 0x47, 0x90, 0xa7, 0xc1, 0x98, 0xf6, 0x45, 0x95, 0xf1, 0xc1, + 0x99, 0x02, 0xc6, 0xd3, 0x19, 0x00, 0x79, 0xc0, 0xc9, 0xb4, 0x37, 0x00, + 0x78, 0x41, 0xc3, 0x01, 0xe3, 0x00, 0x78, 0x68, 0x15, 0xc1, 0x99, 0x0e, + 0x49, 0xad, 0x6e, 0x41, 0x99, 0x18, 0x44, 0x97, 0x1a, 0xc1, 0x99, 0x24, + 0x4a, 0x9f, 0xd6, 0x41, 0x99, 0x33, 0x15, 0xc1, 0x99, 0x3f, 0xd3, 0x47, + 0x02, 0x00, 0x7e, 0xd0, 0xd3, 0x45, 0x73, 0x00, 0x78, 0x89, 0xcd, 0x76, + 0x01, 0x00, 0x78, 0x90, 0xc2, 0x00, 0x45, 0x00, 0x79, 0xe1, 0xc2, 0x02, + 0x2c, 0x00, 0x79, 0xe8, 0xca, 0x9c, 0xfc, 0x00, 0x78, 0xa9, 0xca, 0xa4, + 0xfe, 0x00, 0x78, 0xb0, 0x0d, 0xc1, 0x99, 0x4b, 0x09, 0xc1, 0x99, 0x61, + 0x10, 0xc1, 0x99, 0x6b, 0x05, 0xc1, 0x99, 0x81, 0xc2, 0x25, 0x3b, 0x00, + 0x7a, 0x39, 0x16, 0xc1, 0x99, 0x8b, 0x06, 0xc1, 0x99, 0x9d, 0x12, 0xc1, + 0x99, 0xaf, 0x04, 0xc1, 0x99, 0xb9, 0xc2, 0x01, 0xc3, 0x00, 0x7a, 0xc1, + 0xc2, 0x01, 0x4a, 0x00, 0x7a, 0xe9, 0x1c, 0xc1, 0x99, 0xc3, 0xc2, 0x00, + 0x02, 0x00, 0x7b, 0x01, 0xc2, 0x19, 0x2c, 0x00, 0x7b, 0x09, 0x14, 0xc1, + 0x99, 0xcd, 0xc2, 0x00, 0xdb, 0x00, 0x7b, 0x19, 0x15, 0xc1, 0x99, 0xd7, + 0xc2, 0x00, 0xd0, 0x00, 0x7b, 0x39, 0x83, 0x00, 0x7b, 0x41, 0xcd, 0x7f, + 0xe8, 0x00, 0x7b, 0x50, 0xd4, 0x39, 0x1c, 0x00, 0x78, 0xb9, 0xcb, 0x98, + 0x63, 0x00, 0x78, 0xc8, 0xc2, 0x02, 0xa0, 0x00, 0x79, 0x11, 0xc4, 0x02, + 0xde, 0x00, 0x79, 0x18, 0xc3, 0x09, 0x9e, 0x00, 0x79, 0x21, 0xc3, 0x0d, + 0x14, 0x00, 0x79, 0x28, 0xc2, 0x22, 0xcc, 0x00, 0x79, 0x31, 0xc4, 0x18, + 0x10, 0x00, 0x79, 0x38, 0xc3, 0x05, 0x14, 0x00, 0x79, 0x51, 0x16, 0xc1, + 0x99, 0xe7, 0x08, 0xc1, 0x99, 0xf3, 0x15, 0xc1, 0x99, 0xff, 0xc5, 0x06, + 0xdb, 0x00, 0x79, 0x89, 0xc4, 0x26, 0x78, 0x00, 0x79, 0x91, 0xc4, 0x15, + 0xe7, 0x00, 0x79, 0x98, 0x8b, 0x00, 0x7b, 0x98, 0x97, 0x00, 0x7b, 0xa8, + 0x94, 0x00, 0x7b, 0xb3, 0x01, 0x9a, 0x0b, 0x8e, 0x00, 0x7b, 0xc2, 0x01, + 0x9a, 0x0f, 0x87, 0x00, 0x7b, 0xd8, 0x91, 0x00, 0x7b, 0xe8, 0x8b, 0x00, + 0x7c, 0x08, 0x83, 0x01, 0x69, 0x83, 0x01, 0x9a, 0x13, 0x87, 0x01, 0x6b, + 0x33, 0x01, 0x9a, 0x84, 0x8b, 0x01, 0x6a, 0x49, 0x97, 0x01, 0x6a, 0x99, + 0x91, 0x01, 0x6b, 0x38, 0x8c, 0x01, 0x69, 0xa9, 0x8a, 0x01, 0x6a, 0x08, + 0x48, 0xba, 0x82, 0xc1, 0x9a, 0x88, 0xcd, 0x7f, 0x0b, 0x01, 0x6b, 0x20, + 0xcb, 0x8d, 0xfd, 0x01, 0x6a, 0x59, 0xc8, 0xb6, 0x7a, 0x01, 0x6a, 0xc0, + 0x00, 0xc1, 0x9a, 0xa7, 0xda, 0x05, 0x0d, 0x01, 0x71, 0x50, 0xc2, 0x00, + 0xbf, 0x01, 0x52, 0xb1, 0xc3, 0x02, 0x9b, 0x01, 0x52, 0xa8, 0xcb, 0x97, + 0x03, 0x01, 0x50, 0x41, 0xcc, 0x86, 0x6d, 0x01, 0x50, 0x38, 0xc7, 0x09, + 0x0d, 0x01, 0x49, 0xa1, 0xc9, 0x03, 0xc8, 0x01, 0x49, 0xa9, 0xca, 0x3c, + 0xa4, 0x0f, 0xc5, 0x88, 0xc9, 0x01, 0x88, 0x01, 0x49, 0xb1, 0xca, 0x03, + 0x87, 0x01, 0x49, 0xb8, 0x48, 0x19, 0x9b, 0xc1, 0x9a, 0xb3, 0x07, 0xc1, + 0x9b, 0x11, 0x45, 0x17, 0x15, 0x41, 0x9b, 0x1d, 0x43, 0x01, 0xc5, 0xc1, + 0x9b, 0x29, 0x43, 0x2d, 0x2f, 0xc1, 0x9b, 0x35, 0x4b, 0x4c, 0x93, 0x41, + 0x9b, 0x41, 0x03, 0xc1, 0x9b, 0xad, 0x45, 0x00, 0x59, 0xc1, 0x9b, 0xbc, + 0xd3, 0x44, 0x69, 0x00, 0x47, 0x11, 0xd0, 0x5e, 0x52, 0x00, 0x33, 0x58, + 0x4f, 0x2f, 0xa0, 0xc1, 0x9b, 0xcb, 0x03, 0xc1, 0x9b, 0xda, 0x43, 0x0d, + 0xed, 0xc1, 0x9b, 0xe4, 0xcd, 0x75, 0xb3, 0x00, 0x32, 0xe8, 0x00, 0xc1, + 0x9b, 0xea, 0xc3, 0x13, 0x00, 0x00, 0x32, 0x6a, 0x01, 0x9b, 0xfc, 0xc4, + 0x04, 0xa7, 0x00, 0x32, 0x73, 0x01, 0x9c, 0x02, 0xc8, 0x11, 0xf7, 0x00, + 0x36, 0xa1, 0xd0, 0x5c, 0x72, 0x00, 0x33, 0x69, 0xce, 0x6f, 0x7e, 0x00, + 0x30, 0x10, 0x45, 0x03, 0x14, 0xc1, 0x9c, 0x0f, 0x17, 0xc1, 0x9c, 0x39, + 0x46, 0x10, 0x79, 0xc1, 0x9c, 0x4e, 0x44, 0x00, 0xbb, 0xc1, 0x9c, 0x70, + 0xd3, 0x46, 0xa3, 0x00, 0x36, 0xf1, 0xc5, 0xd7, 0x18, 0x00, 0x32, 0x8b, + 0x01, 0x9c, 0x8c, 0xc8, 0x52, 0x00, 0x00, 0x30, 0xd8, 0xc8, 0xb5, 0x52, + 0x00, 0x47, 0x91, 0xc8, 0xb8, 0xc2, 0x00, 0x47, 0x89, 0xc8, 0x6e, 0xbf, + 0x00, 0x47, 0x80, 0x44, 0x05, 0x14, 0xc1, 0x9c, 0x90, 0xd1, 0x52, 0x44, + 0x00, 0x47, 0x19, 0x03, 0xc1, 0x9c, 0xa2, 0xd2, 0x4b, 0x95, 0x00, 0x33, + 0x61, 0xda, 0x1b, 0x1a, 0x00, 0x30, 0xf0, 0x45, 0x00, 0x33, 0xc1, 0x9c, + 0xb1, 0xc4, 0x0a, 0x8b, 0x00, 0x30, 0x60, 0xd3, 0x41, 0xbd, 0x00, 0x44, + 0xf9, 0x44, 0x08, 0x0b, 0x41, 0x9c, 0xcc, 0xd1, 0x53, 0xcb, 0x00, 0x44, + 0x89, 0x11, 0xc1, 0x9c, 0xd8, 0xce, 0x70, 0xa4, 0x00, 0x37, 0x49, 0xcb, + 0x8e, 0x13, 0x00, 0x33, 0x50, 0xcc, 0x41, 0x19, 0x00, 0x44, 0x71, 0x4a, + 0x6f, 0xc8, 0x41, 0x9c, 0xe4, 0x4c, 0x81, 0x09, 0xc1, 0x9c, 0xf6, 0x46, + 0x0a, 0x10, 0x41, 0x9d, 0x02, 0xca, 0x43, 0x42, 0x00, 0x30, 0x29, 0xc4, + 0x00, 0xba, 0x00, 0x30, 0x00, 0xc4, 0x26, 0x78, 0x00, 0x33, 0x49, 0xc5, + 0x06, 0xdb, 0x00, 0x33, 0x41, 0x15, 0xc1, 0x9d, 0x0e, 0x08, 0xc1, 0x9d, + 0x1a, 0x16, 0xc1, 0x9d, 0x26, 0xc3, 0x05, 0x14, 0x00, 0x33, 0x09, 0xc4, + 0x15, 0xe7, 0x00, 0x33, 0x00, 0xd1, 0x57, 0x1d, 0x00, 0x30, 0x51, 0xca, + 0xa8, 0x00, 0x00, 0x30, 0x48, 0x44, 0x40, 0xee, 0xc1, 0x9d, 0x32, 0xc7, + 0xc2, 0xdc, 0x07, 0xd8, 0xb1, 0xc8, 0xb8, 0x22, 0x00, 0x2c, 0x38, 0xc2, + 0x16, 0x5a, 0x00, 0x2b, 0xab, 0x01, 0x9d, 0x4a, 0xc3, 0xb1, 0x0d, 0x00, + 0x2c, 0x31, 0xc2, 0x38, 0x2a, 0x00, 0x2c, 0x29, 0x42, 0x00, 0x3c, 0xc1, + 0x9d, 0x56, 0x12, 0xc1, 0x9d, 0x5e, 0x05, 0xc1, 0x9d, 0x6a, 0x14, 0xc1, + 0x9d, 0x76, 0x16, 0xc1, 0x9d, 0x80, 0x18, 0xc1, 0x9d, 0x90, 0x15, 0xc1, + 0x9d, 0x9a, 0x0c, 0xc1, 0x9d, 0xa6, 0xc3, 0x2a, 0x91, 0x00, 0x2b, 0xb1, + 0xc3, 0x00, 0xc3, 0x00, 0x2b, 0xa1, 0x09, 0xc1, 0x9d, 0xb0, 0xc2, 0x01, + 0x23, 0x00, 0x2b, 0x81, 0xc3, 0xe6, 0x1a, 0x00, 0x2b, 0x69, 0xc4, 0xe1, + 0x0b, 0x00, 0x2b, 0x61, 0xc3, 0x03, 0x0d, 0x00, 0x2b, 0x59, 0x1c, 0xc1, + 0x9d, 0xbc, 0x07, 0xc1, 0x9d, 0xc6, 0xc2, 0x0e, 0x9a, 0x00, 0x2b, 0x21, + 0xc3, 0x18, 0xf2, 0x00, 0x2b, 0x11, 0xc3, 0x36, 0x99, 0x00, 0x2b, 0x08, + 0xc3, 0xb1, 0x0d, 0x00, 0x2a, 0xb1, 0xc2, 0x38, 0x2a, 0x00, 0x2a, 0xa9, + 0x42, 0x00, 0x3c, 0xc1, 0x9d, 0xd4, 0x12, 0xc1, 0x9d, 0xdc, 0xc2, 0x16, + 0x5a, 0x00, 0x2a, 0x2b, 0x01, 0x9d, 0xe8, 0x05, 0xc1, 0x9d, 0xee, 0x14, + 0xc1, 0x9d, 0xfa, 0x16, 0xc1, 0x9e, 0x04, 0x18, 0xc1, 0x9e, 0x0e, 0x15, + 0xc1, 0x9e, 0x18, 0x0c, 0xc1, 0x9e, 0x24, 0xc3, 0x2a, 0x91, 0x00, 0x2a, + 0x31, 0xc3, 0x00, 0xc3, 0x00, 0x2a, 0x21, 0x09, 0xc1, 0x9e, 0x2e, 0xc2, + 0x01, 0x23, 0x00, 0x2a, 0x01, 0xc3, 0xe6, 0x1a, 0x00, 0x29, 0xe9, 0xc4, + 0xe1, 0x0b, 0x00, 0x29, 0xe1, 0xc3, 0x03, 0x0d, 0x00, 0x29, 0xd9, 0x1c, + 0xc1, 0x9e, 0x3a, 0x07, 0xc1, 0x9e, 0x44, 0xc2, 0x0e, 0x9a, 0x00, 0x29, + 0xa1, 0xc3, 0x36, 0x99, 0x00, 0x29, 0x89, 0xc3, 0x18, 0xf2, 0x00, 0x29, + 0x90, 0xc4, 0x6b, 0x52, 0x0f, 0x48, 0x01, 0x06, 0xc1, 0x9e, 0x52, 0xc4, + 0x76, 0x31, 0x0f, 0x48, 0x11, 0xc4, 0xe4, 0xb3, 0x0f, 0x48, 0x19, 0x04, + 0xc1, 0x9e, 0x5e, 0x15, 0xc1, 0x9e, 0x68, 0xc2, 0x00, 0x67, 0x0f, 0x48, + 0x31, 0xc2, 0x00, 0x39, 0x0f, 0x48, 0x41, 0x87, 0x0f, 0x48, 0x49, 0xc2, + 0x00, 0x87, 0x0f, 0x48, 0x51, 0x8b, 0x0f, 0x48, 0x59, 0x91, 0x0f, 0x48, + 0x61, 0x1b, 0xc1, 0x9e, 0x74, 0xc3, 0x7e, 0x89, 0x0f, 0x48, 0x79, 0x10, + 0xc1, 0x9e, 0x7e, 0x0d, 0xc1, 0x9e, 0x90, 0x97, 0x0f, 0x48, 0x99, 0xc4, + 0xe1, 0x4b, 0x0f, 0x48, 0xa1, 0xc3, 0x11, 0xee, 0x0f, 0x48, 0xa9, 0xc2, + 0x00, 0xd0, 0x0f, 0x48, 0xb1, 0xc4, 0xd8, 0x3a, 0x0f, 0x48, 0xb9, 0x09, + 0xc1, 0x9e, 0xa2, 0xc2, 0x00, 0x16, 0x0f, 0x48, 0xd1, 0xc2, 0x02, 0x41, + 0x0f, 0x48, 0xe1, 0xc3, 0xa9, 0xfc, 0x0f, 0x48, 0xf8, 0xc4, 0x14, 0x74, + 0x0f, 0x49, 0x19, 0xc2, 0x00, 0xd0, 0x0f, 0x49, 0x78, 0x83, 0x0f, 0x49, + 0x31, 0xc2, 0x01, 0x7f, 0x0f, 0x49, 0x48, 0xc9, 0xaf, 0x27, 0x0f, 0x49, + 0x39, 0xc2, 0x00, 0xd0, 0x0f, 0x4a, 0x18, 0xc2, 0x01, 0x7f, 0x0f, 0x49, + 0x81, 0x83, 0x0f, 0x49, 0xa0, 0xc2, 0x05, 0x1d, 0x0f, 0x49, 0x91, 0xc2, + 0x19, 0x2c, 0x0f, 0x49, 0xd9, 0xc2, 0x00, 0xd0, 0x0f, 0x49, 0xe8, 0xc2, + 0x0f, 0x9b, 0x0f, 0x49, 0x99, 0xc2, 0x00, 0xd0, 0x0f, 0x49, 0xf9, 0xc2, + 0x01, 0x53, 0x0f, 0x4a, 0x10, 0x83, 0x0f, 0x49, 0xd1, 0xc2, 0x00, 0x51, + 0x0f, 0x4a, 0x00, 0xc2, 0x02, 0xa0, 0x0f, 0x4a, 0x91, 0xc4, 0x02, 0xde, + 0x0f, 0x4a, 0x98, 0xc3, 0x09, 0x9e, 0x0f, 0x4a, 0xa1, 0xc3, 0x0d, 0x14, + 0x0f, 0x4a, 0xa8, 0xc2, 0x22, 0xcc, 0x0f, 0x4a, 0xb1, 0xc4, 0x18, 0x10, + 0x0f, 0x4a, 0xb8, 0xc7, 0xc0, 0xeb, 0x0f, 0xbb, 0x61, 0xc4, 0xe4, 0xab, + 0x0f, 0xbb, 0x58, 0x02, 0x41, 0x9e, 0xac, 0xc6, 0xcf, 0x8f, 0x0f, 0xbb, + 0x2b, 0x01, 0x9e, 0xb4, 0x48, 0xba, 0xf2, 0x41, 0x9e, 0xb8, 0xc3, 0x04, + 0xa1, 0x0f, 0xb9, 0x01, 0xcb, 0x4c, 0x50, 0x0f, 0xb9, 0x28, 0xc2, 0x34, + 0x63, 0x0f, 0xba, 0x61, 0xcb, 0x95, 0xa3, 0x0f, 0xba, 0x71, 0xc6, 0xd1, + 0xed, 0x0f, 0xba, 0x80, 0xc5, 0xd9, 0x25, 0x0f, 0xbb, 0x0b, 0x01, 0x9e, + 0xc7, 0xc4, 0x2d, 0xad, 0x0f, 0xbb, 0x00, 0xc4, 0xdf, 0x63, 0x0f, 0xba, + 0x5b, 0x01, 0x9e, 0xcd, 0xc7, 0xc7, 0x0b, 0x0f, 0xba, 0xc0, 0xc4, 0xde, + 0xcf, 0x0f, 0xbb, 0x19, 0xca, 0x9f, 0x68, 0x0f, 0xbb, 0x20, 0xc2, 0xe5, + 0xfd, 0x0f, 0xba, 0x00, 0xc4, 0x91, 0x3d, 0x0f, 0xb9, 0x49, 0xc5, 0x87, + 0xc4, 0x0f, 0xba, 0x40, 0xc5, 0xd5, 0xe7, 0x0f, 0xb9, 0x93, 0x01, 0x9e, + 0xd3, 0xc5, 0xd9, 0x8e, 0x0f, 0xb9, 0xdb, 0x01, 0x9e, 0xdd, 0xc4, 0x08, + 0x88, 0x0f, 0xbb, 0x68, 0xc2, 0xe5, 0xfd, 0x0f, 0xb8, 0xc8, 0xc5, 0xdb, + 0x7d, 0x0f, 0xb8, 0x53, 0x01, 0x9e, 0xe3, 0xc5, 0xd7, 0xb8, 0x0f, 0xb8, + 0xb2, 0x01, 0x9e, 0xed, 0x46, 0x5d, 0x2b, 0xc1, 0x9e, 0xf3, 0xc4, 0x4e, + 0x2b, 0x0f, 0xb8, 0x68, 0x96, 0x0f, 0xb8, 0xa3, 0x01, 0x9e, 0xff, 0xc9, + 0xad, 0xec, 0x0f, 0xb9, 0xc8, 0xcd, 0x7b, 0x49, 0x0f, 0xba, 0x91, 0xd3, + 0x40, 0xc6, 0x0f, 0xba, 0xe2, 0x01, 0x9f, 0x05, 0x00, 0xc1, 0x9f, 0x0b, + 0xc6, 0xd1, 0xab, 0x0f, 0xb8, 0x28, 0xc4, 0xe1, 0x17, 0x0f, 0xb9, 0xb3, + 0x01, 0x9f, 0x1d, 0xc2, 0x01, 0xdf, 0x0f, 0xba, 0x29, 0xc5, 0xd9, 0x16, + 0x0f, 0xbb, 0x50, 0x02, 0x41, 0x9f, 0x23, 0xc2, 0xe5, 0xfd, 0x0f, 0xb8, + 0xe8, 0xc8, 0xb7, 0x42, 0x0f, 0xba, 0xb1, 0xc2, 0x00, 0x33, 0x0f, 0xbb, + 0x70, 0xc4, 0xb4, 0xbe, 0x0f, 0xbb, 0x91, 0xc5, 0xd5, 0x60, 0x0f, 0xbb, + 0x98, 0x22, 0xc1, 0x9f, 0x2b, 0x21, 0xc1, 0x9f, 0x53, 0x20, 0xc1, 0x9f, + 0x84, 0x1f, 0xc1, 0x9f, 0xaf, 0x1e, 0xc1, 0x9f, 0xda, 0x1d, 0xc1, 0xa0, + 0x05, 0x23, 0xc1, 0xa0, 0x29, 0x24, 0xc1, 0xa0, 0x54, 0x25, 0xc1, 0xa0, + 0x7c, 0x26, 0x41, 0xa0, 0xa4, 0x1d, 0xc1, 0xa0, 0xd2, 0x1e, 0xc1, 0xa1, + 0x0c, 0x1f, 0xc1, 0xa1, 0x3a, 0x20, 0xc1, 0xa1, 0x65, 0x21, 0xc1, 0xa1, + 0x90, 0x22, 0xc1, 0xa1, 0xb8, 0x23, 0xc1, 0xa1, 0xe0, 0x24, 0xc1, 0xa2, + 0x08, 0x25, 0xc1, 0xa2, 0x30, 0x26, 0x41, 0xa2, 0x58, 0x1d, 0xc1, 0xa2, + 0x80, 0x1e, 0xc1, 0xa2, 0xb1, 0x1f, 0xc1, 0xa2, 0xdf, 0x20, 0xc1, 0xa3, + 0x0a, 0x21, 0xc1, 0xa3, 0x32, 0x22, 0xc1, 0xa3, 0x5a, 0x23, 0xc1, 0xa3, + 0x82, 0x24, 0xc1, 0xa3, 0xad, 0x25, 0xc1, 0xa3, 0xd5, 0x26, 0x41, 0xa4, + 0x00, 0x1d, 0xc1, 0xa4, 0x2e, 0x1e, 0xc1, 0xa4, 0x59, 0x1f, 0xc1, 0xa4, + 0x81, 0x20, 0xc1, 0xa4, 0xac, 0x21, 0xc1, 0xa4, 0xd7, 0x22, 0xc1, 0xa4, + 0xff, 0x23, 0xc1, 0xa5, 0x2a, 0x24, 0xc1, 0xa5, 0x58, 0x25, 0xc1, 0xa5, + 0x83, 0x26, 0x41, 0xa5, 0xb1, 0x1d, 0xc1, 0xa5, 0xdb, 0x1e, 0xc1, 0xa6, + 0x03, 0x1f, 0xc1, 0xa6, 0x2b, 0x20, 0xc1, 0xa6, 0x53, 0x21, 0xc1, 0xa6, + 0x7b, 0x22, 0xc1, 0xa6, 0xa3, 0x23, 0xc1, 0xa6, 0xd1, 0x24, 0xc1, 0xa6, + 0xf9, 0x25, 0xc1, 0xa7, 0x21, 0x26, 0x41, 0xa7, 0x49, 0x1d, 0xc1, 0xa7, + 0x69, 0x1e, 0xc1, 0xa7, 0x8d, 0x1f, 0xc1, 0xa7, 0xb5, 0xc2, 0xe6, 0x4a, + 0x0a, 0x32, 0x30, 0xcf, 0x62, 0xe2, 0x01, 0x11, 0x99, 0xd2, 0x4e, 0x77, + 0x01, 0x4a, 0x00, 0xd3, 0x44, 0x7c, 0x01, 0x0d, 0xb1, 0x4f, 0x01, 0x93, + 0x41, 0xa7, 0xdd, 0xe0, 0x09, 0x07, 0x0f, 0xa8, 0x20, 0xc8, 0x52, 0x09, + 0x01, 0x4d, 0x21, 0xc8, 0x4e, 0x9b, 0x01, 0x4c, 0xf0, 0xc9, 0x18, 0x66, + 0x01, 0x10, 0xb8, 0xc2, 0x00, 0xd0, 0x08, 0xba, 0x21, 0x83, 0x08, 0xba, + 0x18, 0xc2, 0x00, 0xd0, 0x08, 0xba, 0x11, 0x83, 0x08, 0xba, 0x08, 0xc2, + 0x01, 0x5d, 0x08, 0xb8, 0xd1, 0xc2, 0x01, 0x30, 0x08, 0xb8, 0xb1, 0xc2, + 0x01, 0x6f, 0x08, 0xb8, 0x28, 0xc6, 0x00, 0x41, 0x08, 0xb9, 0xe9, 0xcc, + 0x82, 0x65, 0x08, 0xb9, 0xe0, 0x00, 0x41, 0xa7, 0xfb, 0xc4, 0x02, 0xb9, + 0x01, 0x1a, 0xf1, 0xc8, 0x52, 0x09, 0x01, 0x1a, 0xc0, 0xc9, 0x52, 0x08, + 0x01, 0x1b, 0xc0, 0xcb, 0x95, 0xf0, 0x01, 0x1b, 0x91, 0x45, 0x9a, 0x3d, + 0xc1, 0xa8, 0x3f, 0xc8, 0xba, 0x22, 0x01, 0x1a, 0xe8, 0x00, 0xc1, 0xa8, + 0x51, 0xca, 0x6c, 0xe2, 0x01, 0x1a, 0xb0, 0x00, 0xc1, 0xa8, 0x63, 0x43, + 0x33, 0x60, 0x41, 0xa8, 0x75, 0xc9, 0xae, 0x22, 0x01, 0x1b, 0x69, 0xcc, + 0x88, 0x89, 0x01, 0x1b, 0x18, 0xc9, 0x20, 0xa8, 0x01, 0x1b, 0x29, 0x42, + 0x00, 0x15, 0xc1, 0xa8, 0x81, 0xc8, 0x52, 0x09, 0x01, 0x1a, 0xe1, 0xc9, + 0x02, 0xfe, 0x01, 0x1a, 0x49, 0xc3, 0xba, 0x27, 0x01, 0x19, 0xf0, 0x46, + 0x00, 0xe2, 0xc1, 0xa8, 0x8d, 0xd9, 0x1f, 0xae, 0x01, 0x12, 0x30, 0x87, + 0x08, 0x59, 0xa9, 0xc2, 0x00, 0x4e, 0x08, 0x59, 0x48, 0xc3, 0x04, 0x65, + 0x08, 0x59, 0xa1, 0x0a, 0xc1, 0xa8, 0x9c, 0x87, 0x08, 0x59, 0x78, 0x87, + 0x08, 0x59, 0x59, 0xc2, 0x0c, 0x43, 0x08, 0x59, 0x50, 0xc2, 0x02, 0x6f, + 0x08, 0x59, 0x39, 0xc2, 0x0c, 0x43, 0x08, 0x59, 0x31, 0x87, 0x08, 0x59, + 0x29, 0x09, 0x41, 0xa8, 0xa6, 0xc2, 0x01, 0x7f, 0x08, 0x58, 0xe1, 0x87, + 0x08, 0x58, 0xd8, 0xc2, 0x01, 0x7f, 0x08, 0x58, 0xd1, 0x87, 0x08, 0x58, + 0xc9, 0xc2, 0x00, 0xac, 0x08, 0x58, 0xe8, 0xc2, 0x01, 0x7f, 0x08, 0x58, + 0xb1, 0xc2, 0x09, 0x3b, 0x08, 0x58, 0xa9, 0x87, 0x08, 0x58, 0xa0, 0xc2, + 0x00, 0x5f, 0x08, 0x58, 0x99, 0x87, 0x08, 0x58, 0x89, 0xc2, 0x0c, 0x43, + 0x08, 0x58, 0x90, 0x97, 0x08, 0x58, 0x78, 0x8b, 0x08, 0x58, 0x68, 0x91, + 0x08, 0x58, 0x58, 0x87, 0x08, 0x58, 0x48, 0x87, 0x08, 0x58, 0x33, 0x01, + 0xa8, 0xb6, 0x83, 0x08, 0x58, 0x0b, 0x01, 0xa8, 0xba, 0x90, 0x08, 0x58, + 0x21, 0x91, 0x08, 0x58, 0x10, 0x87, 0x08, 0x59, 0x01, 0xc2, 0x01, 0x7f, + 0x08, 0x59, 0x08, 0x87, 0x08, 0x59, 0x81, 0xc2, 0x01, 0x7f, 0x08, 0x59, + 0x90, 0x00, 0x41, 0xa8, 0xc2, 0x0a, 0xc1, 0xa8, 0xce, 0xc2, 0x00, 0xc4, + 0x08, 0x08, 0x83, 0x01, 0xa8, 0xe0, 0x19, 0x41, 0xa8, 0xe6, 0x0b, 0xc1, + 0xa8, 0xf6, 0x11, 0x41, 0xa9, 0x08, 0xc2, 0x22, 0xcc, 0x08, 0x08, 0x63, + 0x01, 0xa9, 0x1a, 0xc4, 0x18, 0x10, 0x08, 0x08, 0x6a, 0x01, 0xa9, 0x27, + 0x00, 0xc1, 0xa9, 0x34, 0x9b, 0x08, 0x08, 0xba, 0x01, 0xa9, 0x40, 0x00, + 0xc1, 0xa9, 0x46, 0xc2, 0x0d, 0x10, 0x08, 0x08, 0xc2, 0x01, 0xa9, 0x52, + 0xc9, 0xb3, 0x20, 0x08, 0x09, 0xb9, 0x08, 0xc1, 0xa9, 0x58, 0xce, 0x71, + 0x22, 0x08, 0x09, 0xc9, 0xcd, 0x7d, 0xb9, 0x08, 0x09, 0xd0, 0xc4, 0x02, + 0x6d, 0x08, 0x08, 0x01, 0xc3, 0x02, 0xa3, 0x08, 0x08, 0x08, 0x45, 0x00, + 0x2d, 0xc1, 0xa9, 0x64, 0x44, 0x00, 0x4a, 0x41, 0xa9, 0xa4, 0xc2, 0x02, + 0xae, 0x01, 0x2b, 0xcb, 0x01, 0xa9, 0xbc, 0xc4, 0x00, 0x49, 0x01, 0x2b, + 0xc3, 0x01, 0xa9, 0xc2, 0x42, 0x00, 0x58, 0xc1, 0xa9, 0xc8, 0xc5, 0x00, + 0x2c, 0x01, 0x2b, 0xd1, 0xc8, 0x00, 0x5f, 0x01, 0x28, 0x1b, 0x01, 0xa9, + 0xd7, 0x4f, 0x61, 0x5c, 0xc1, 0xa9, 0xdd, 0x4c, 0x52, 0xbb, 0xc1, 0xa9, + 0xe9, 0xca, 0x01, 0x68, 0x01, 0x28, 0x08, 0x45, 0x00, 0x5a, 0xc1, 0xa9, + 0xf5, 0x43, 0x11, 0x19, 0x41, 0xaa, 0x10, 0x4b, 0x99, 0xb8, 0xc1, 0xaa, + 0x28, 0x4b, 0x8e, 0x76, 0xc1, 0xaa, 0x3a, 0x4a, 0x11, 0x39, 0xc1, 0xaa, + 0x4c, 0x4a, 0x5c, 0x42, 0x41, 0xaa, 0x5e, 0x4b, 0x99, 0xb8, 0xc1, 0xaa, + 0x70, 0x4b, 0x8e, 0x76, 0xc1, 0xaa, 0x82, 0x4a, 0x5c, 0x42, 0xc1, 0xaa, + 0x94, 0x4a, 0x11, 0x39, 0x41, 0xaa, 0xac, 0x4f, 0x66, 0xc0, 0xc1, 0xaa, + 0xc4, 0xdc, 0x12, 0xc5, 0x01, 0x2a, 0x31, 0xdc, 0x13, 0xc1, 0x01, 0x2a, + 0x21, 0x4f, 0x12, 0xca, 0x41, 0xaa, 0xd6, 0xd8, 0x25, 0xa3, 0x01, 0x1d, + 0xb0, 0xc8, 0x1e, 0x3f, 0x01, 0x19, 0x09, 0xcc, 0x85, 0x71, 0x01, 0x5e, + 0x59, 0xd0, 0x1d, 0xec, 0x01, 0x72, 0xd9, 0xd1, 0x1a, 0x4a, 0x01, 0x72, + 0xe0, 0x05, 0xc1, 0xaa, 0xe8, 0xcc, 0x88, 0x65, 0x01, 0x71, 0x28, 0x05, + 0xc1, 0xaa, 0xf4, 0xcc, 0x88, 0x65, 0x01, 0x71, 0x20, 0xd0, 0x5d, 0x52, + 0x01, 0x4e, 0x91, 0xcf, 0x66, 0x66, 0x01, 0x4e, 0x88, 0xca, 0xa7, 0xec, + 0x0f, 0xaa, 0x79, 0xca, 0x9e, 0x78, 0x0f, 0xcb, 0x18, 0xc5, 0xdb, 0xd7, + 0x0f, 0xa6, 0x88, 0x97, 0x01, 0x8d, 0x00, 0x89, 0x01, 0x89, 0x5b, 0x01, + 0xab, 0x00, 0x90, 0x01, 0x89, 0x78, 0x8a, 0x01, 0x8d, 0xc8, 0x90, 0x01, + 0x89, 0x61, 0x97, 0x01, 0x8d, 0x19, 0x8a, 0x01, 0x8d, 0xc1, 0x99, 0x01, + 0x8d, 0xe0, 0x99, 0x01, 0x8d, 0xe8, 0x8b, 0x01, 0x8d, 0x10, 0x8a, 0x01, + 0x88, 0x99, 0x8b, 0x01, 0x8d, 0x09, 0x9b, 0x01, 0x8d, 0xd0, 0x8a, 0x01, + 0x88, 0xa0, 0x8a, 0x01, 0x88, 0xa8, 0x8b, 0x01, 0x88, 0xf3, 0x01, 0xab, + 0x04, 0x97, 0x01, 0x89, 0x03, 0x01, 0xab, 0x0a, 0x90, 0x01, 0x89, 0x13, + 0x01, 0xab, 0x10, 0x8f, 0x01, 0x8d, 0x81, 0x8a, 0x01, 0x8d, 0xf8, 0x97, + 0x01, 0x89, 0x09, 0xcf, 0x33, 0xad, 0x01, 0x89, 0x71, 0x91, 0x01, 0x8d, + 0x31, 0x10, 0xc1, 0xab, 0x18, 0x8f, 0x01, 0x8d, 0x89, 0x87, 0x01, 0x8d, + 0xf0, 0x8a, 0x01, 0x88, 0xe9, 0x8b, 0x01, 0x88, 0xf9, 0x90, 0x01, 0x89, + 0x1b, 0x01, 0xab, 0x20, 0x94, 0x01, 0x89, 0x31, 0x87, 0x01, 0x8d, 0x20, + 0x97, 0x01, 0x89, 0x49, 0x8a, 0x01, 0x89, 0x69, 0x94, 0x01, 0x8d, 0x41, + 0xc2, 0x1b, 0x88, 0x01, 0x8d, 0x53, 0x01, 0xab, 0x28, 0x8f, 0x01, 0x8d, + 0x60, 0xc2, 0x1b, 0x88, 0x01, 0x8d, 0x58, 0xa1, 0x0f, 0xd8, 0x43, 0x01, + 0xab, 0x2c, 0x9f, 0x0f, 0xd8, 0x13, 0x01, 0xab, 0x37, 0xa2, 0x0f, 0xd8, + 0x83, 0x01, 0xab, 0x50, 0xa0, 0x0f, 0xd8, 0x23, 0x01, 0xab, 0x54, 0xa3, + 0x0f, 0xd8, 0xf8, 0xa2, 0x0f, 0xd8, 0x9b, 0x01, 0xab, 0x65, 0xa1, 0x0f, + 0xd8, 0x5b, 0x01, 0xab, 0x69, 0xa3, 0x0f, 0xd9, 0x10, 0xa2, 0x0f, 0xd8, + 0x8b, 0x01, 0xab, 0x74, 0xa0, 0x0f, 0xd8, 0x2b, 0x01, 0xab, 0x78, 0xa3, + 0x0f, 0xd9, 0x01, 0xa1, 0x0f, 0xd8, 0x4a, 0x01, 0xab, 0x8a, 0xa3, 0x0f, + 0xd9, 0x68, 0xa3, 0x0f, 0xd9, 0x31, 0xa2, 0x0f, 0xd8, 0xb2, 0x01, 0xab, + 0x91, 0x05, 0xc1, 0xab, 0x95, 0x15, 0xc1, 0xab, 0xbc, 0x16, 0xc1, 0xab, + 0xff, 0x06, 0xc1, 0xac, 0x1d, 0x14, 0xc1, 0xac, 0x30, 0x0e, 0xc1, 0xac, + 0x42, 0xd6, 0x2c, 0xb2, 0x01, 0x3a, 0x99, 0x08, 0xc1, 0xac, 0x52, 0xc3, + 0xe6, 0x74, 0x01, 0x38, 0x91, 0x0f, 0xc1, 0xac, 0x5a, 0x17, 0xc1, 0xac, + 0x66, 0x0a, 0xc1, 0xac, 0x70, 0x12, 0xc1, 0xac, 0x7e, 0x43, 0x00, 0x5f, + 0xc1, 0xac, 0x90, 0xc6, 0xca, 0x91, 0x01, 0x4e, 0x99, 0xc7, 0xc9, 0x3b, + 0x01, 0x5e, 0x20, 0x4a, 0x14, 0xda, 0xc1, 0xac, 0x9c, 0x4f, 0x66, 0x93, + 0x41, 0xac, 0xae, 0xca, 0x9f, 0xc2, 0x0f, 0xa5, 0xb9, 0xc9, 0xb3, 0x32, + 0x0f, 0xa5, 0xb1, 0xcb, 0x99, 0x60, 0x0f, 0xa5, 0xa9, 0xc8, 0x77, 0x99, + 0x0f, 0xa5, 0xa0, 0xc2, 0x00, 0x45, 0x0f, 0x9c, 0x43, 0x01, 0xac, 0xc2, + 0x42, 0x00, 0x30, 0x41, 0xac, 0xc8, 0x0f, 0xc1, 0xac, 0xd8, 0xc3, 0x01, + 0xad, 0x00, 0xda, 0xd2, 0x01, 0xac, 0xe7, 0x4a, 0xa2, 0x24, 0xc1, 0xac, + 0xed, 0x4b, 0x95, 0x40, 0xc1, 0xac, 0xf9, 0x4a, 0x51, 0x89, 0xc1, 0xad, + 0x05, 0x06, 0x41, 0xad, 0x29, 0x42, 0x00, 0xb0, 0xc1, 0xad, 0x43, 0xc4, + 0xde, 0xcb, 0x00, 0xda, 0xf0, 0xc4, 0x26, 0x78, 0x00, 0xda, 0xc9, 0xc5, + 0x06, 0xdb, 0x00, 0xda, 0xc1, 0x15, 0xc1, 0xad, 0x4f, 0x08, 0xc1, 0xad, + 0x5b, 0x16, 0xc1, 0xad, 0x67, 0xc3, 0x05, 0x14, 0x00, 0xda, 0x89, 0xc4, + 0x15, 0xe7, 0x00, 0xda, 0x80, 0x03, 0xc1, 0xad, 0x73, 0xc9, 0xa9, 0xfc, + 0x00, 0xda, 0x51, 0xc8, 0xbe, 0x12, 0x00, 0xda, 0x49, 0x07, 0xc1, 0xad, + 0x8e, 0x16, 0xc1, 0xad, 0x9a, 0x0d, 0xc1, 0xad, 0xa7, 0xc2, 0x00, 0xd0, + 0x00, 0xd9, 0x99, 0xc2, 0x0d, 0xf6, 0x00, 0xd9, 0x93, 0x01, 0xad, 0xb4, + 0xc2, 0x01, 0x4a, 0x00, 0xd9, 0x79, 0xc2, 0x00, 0xdb, 0x00, 0xd9, 0x73, + 0x01, 0xad, 0xba, 0xc2, 0x00, 0x39, 0x00, 0xd9, 0x6b, 0x01, 0xad, 0xc3, + 0xc2, 0x19, 0x2c, 0x00, 0xd9, 0x61, 0xc2, 0x01, 0xc3, 0x00, 0xd9, 0x59, + 0xc2, 0x01, 0x5d, 0x00, 0xd9, 0x4b, 0x01, 0xad, 0xcc, 0xc2, 0x00, 0xb0, + 0x00, 0xd9, 0x3b, 0x01, 0xad, 0xd2, 0x10, 0xc1, 0xad, 0xd8, 0xc2, 0x0e, + 0x9a, 0x00, 0xd9, 0x23, 0x01, 0xad, 0xeb, 0xc2, 0x25, 0x3b, 0x00, 0xd8, + 0xd3, 0x01, 0xad, 0xf1, 0xc2, 0x00, 0x64, 0x00, 0xd8, 0xc3, 0x01, 0xad, + 0xf7, 0xc2, 0x01, 0x30, 0x00, 0xd8, 0xab, 0x01, 0xad, 0xfd, 0xc5, 0xde, + 0x0c, 0x00, 0xd8, 0x8b, 0x01, 0xae, 0x03, 0xc5, 0xdb, 0x5f, 0x00, 0xd8, + 0x4b, 0x01, 0xae, 0x09, 0xc5, 0xd7, 0xbd, 0x00, 0xd8, 0x3a, 0x01, 0xae, + 0x0f, 0xc5, 0xd8, 0xbc, 0x00, 0xda, 0x13, 0x01, 0xae, 0x15, 0x16, 0xc1, + 0xae, 0x1b, 0xc8, 0xb5, 0xaa, 0x00, 0xd9, 0xe3, 0x01, 0xae, 0x2a, 0xc7, + 0xc4, 0x79, 0x00, 0xd9, 0xd3, 0x01, 0xae, 0x30, 0xc4, 0xc5, 0x6e, 0x00, + 0xd9, 0xc3, 0x01, 0xae, 0x36, 0xc3, 0x96, 0x9c, 0x00, 0xd9, 0xb2, 0x01, + 0xae, 0x3c, 0xc7, 0xc3, 0x8b, 0x00, 0xd9, 0xa1, 0xc5, 0xd4, 0x75, 0x00, + 0xd8, 0x21, 0xc6, 0xcf, 0x59, 0x00, 0xd8, 0x19, 0xc5, 0xde, 0x48, 0x00, + 0xd8, 0x11, 0x44, 0xdf, 0x3f, 0x41, 0xae, 0x42, 0x44, 0x08, 0xcb, 0xc1, + 0xae, 0x4e, 0x43, 0x01, 0xc8, 0xc1, 0xae, 0x5a, 0xc8, 0xaf, 0x82, 0x0b, + 0x57, 0x90, 0x8b, 0x0b, 0x57, 0x69, 0x87, 0x0b, 0x57, 0x63, 0x01, 0xae, + 0x66, 0x97, 0x0b, 0x57, 0x53, 0x01, 0xae, 0x70, 0x91, 0x0b, 0x57, 0x43, + 0x01, 0xae, 0x76, 0x83, 0x0b, 0x57, 0x39, 0xc2, 0x01, 0x4a, 0x0b, 0x56, + 0xdb, 0x01, 0xae, 0x7a, 0xc2, 0x00, 0xb0, 0x0b, 0x57, 0x29, 0x1b, 0xc1, + 0xae, 0x80, 0xc2, 0x5d, 0xb3, 0x0b, 0x57, 0x19, 0xc2, 0x01, 0x5d, 0x0b, + 0x57, 0x11, 0xc2, 0x00, 0xf1, 0x0b, 0x57, 0x09, 0xc2, 0x00, 0x89, 0x0b, + 0x56, 0xf9, 0x06, 0xc1, 0xae, 0x8c, 0x09, 0xc1, 0xae, 0x96, 0xc2, 0x01, + 0x6c, 0x0b, 0x56, 0xe1, 0xc4, 0xdf, 0xdf, 0x0b, 0x56, 0xd1, 0xc2, 0x00, + 0x81, 0x0b, 0x56, 0xc9, 0x0d, 0xc1, 0xae, 0xa2, 0xc3, 0x00, 0x50, 0x0b, + 0x56, 0xa1, 0xc2, 0x00, 0x87, 0x0b, 0x56, 0x99, 0xc2, 0x00, 0x40, 0x0b, + 0x56, 0x90, 0x45, 0xd6, 0x6e, 0xc1, 0xae, 0xac, 0x83, 0x05, 0x35, 0x59, + 0x07, 0xc1, 0xae, 0xd0, 0x17, 0xc1, 0xae, 0xda, 0x8b, 0x05, 0x36, 0xe8, + 0x83, 0x05, 0x35, 0x09, 0x97, 0x05, 0x35, 0x19, 0xc3, 0x17, 0x29, 0x05, + 0x35, 0xd1, 0x07, 0xc1, 0xae, 0xe4, 0x91, 0x05, 0x36, 0xfb, 0x01, 0xae, + 0xf2, 0x8b, 0x05, 0x37, 0x29, 0xc2, 0x00, 0xb0, 0x05, 0x37, 0x48, 0x07, + 0xc1, 0xae, 0xfe, 0x0b, 0xc1, 0xaf, 0x0c, 0x97, 0x05, 0x36, 0x61, 0xc2, + 0x10, 0x11, 0x05, 0x36, 0x88, 0x03, 0xc1, 0xaf, 0x16, 0x8b, 0x05, 0x37, + 0x21, 0x07, 0x41, 0xaf, 0x1e, 0xc2, 0x16, 0x5a, 0x05, 0x35, 0x41, 0xc3, + 0x4f, 0x43, 0x05, 0x35, 0x89, 0x0c, 0xc1, 0xaf, 0x26, 0x97, 0x05, 0x35, + 0xeb, 0x01, 0xaf, 0x38, 0xc3, 0x01, 0xe2, 0x05, 0x36, 0x19, 0x16, 0xc1, + 0xaf, 0x3e, 0x8b, 0x05, 0x36, 0x79, 0x09, 0xc1, 0xaf, 0x4a, 0x83, 0x05, + 0x36, 0xd8, 0x83, 0x05, 0x35, 0x51, 0xc4, 0xe2, 0x9f, 0x05, 0x35, 0x71, + 0x97, 0x05, 0x36, 0x69, 0x8b, 0x05, 0x36, 0xe1, 0xc2, 0x7f, 0xc0, 0x05, + 0x36, 0xf0, 0x07, 0xc1, 0xaf, 0x5a, 0x97, 0x05, 0x35, 0xa9, 0x8b, 0x05, + 0x36, 0x71, 0x04, 0xc1, 0xaf, 0x64, 0x83, 0x05, 0x37, 0x19, 0x91, 0x05, + 0x37, 0x30, 0xc2, 0x5d, 0xa1, 0x05, 0x35, 0xa1, 0x0a, 0xc1, 0xaf, 0x70, + 0x8b, 0x05, 0x35, 0xb9, 0xc3, 0xd7, 0xe2, 0x05, 0x35, 0xc9, 0xc4, 0xbf, + 0xf1, 0x05, 0x37, 0x60, 0xc2, 0x7f, 0xc0, 0x05, 0x35, 0xf9, 0xc2, 0x92, + 0xb5, 0x05, 0x36, 0x09, 0x83, 0x05, 0x36, 0x10, 0xc2, 0x0f, 0xe1, 0x05, + 0x36, 0x49, 0x83, 0x05, 0x36, 0xd0, 0xc2, 0x02, 0xe0, 0x05, 0x36, 0x59, + 0x97, 0x05, 0x36, 0xc1, 0xc2, 0x00, 0x7a, 0x05, 0x36, 0xc9, 0xc5, 0xd8, + 0xe9, 0x05, 0x37, 0x68, 0x4c, 0x85, 0x4d, 0xc1, 0xaf, 0x84, 0xc2, 0x01, + 0xc3, 0x05, 0x37, 0xa8, 0xe0, 0x06, 0x87, 0x01, 0x3d, 0x58, 0xcb, 0x96, + 0x74, 0x0f, 0xac, 0x11, 0xda, 0x1c, 0xee, 0x0f, 0xa8, 0xc8, 0xc4, 0x40, + 0x89, 0x00, 0x00, 0x41, 0x5a, 0x1a, 0x30, 0x41, 0xaf, 0x90, 0x4c, 0x8a, + 0xc9, 0xc1, 0xaf, 0x9c, 0xc9, 0xad, 0xc8, 0x00, 0xdf, 0x30, 0xc7, 0xc6, + 0xc5, 0x00, 0xdf, 0x99, 0xc5, 0xc8, 0x5d, 0x00, 0xdf, 0x90, 0x8a, 0x00, + 0xdf, 0x89, 0xc2, 0x00, 0x75, 0x00, 0xdf, 0x80, 0x97, 0x00, 0xdf, 0x73, + 0x01, 0xaf, 0xac, 0x45, 0xc6, 0xd3, 0xc1, 0xaf, 0xb2, 0x91, 0x00, 0xdf, + 0x61, 0x8b, 0x00, 0xdf, 0x51, 0x87, 0x00, 0xdf, 0x3b, 0x01, 0xaf, 0xba, + 0xc8, 0xbf, 0x0a, 0x00, 0xdf, 0x40, 0x97, 0x00, 0xdf, 0x29, 0x8b, 0x00, + 0xdf, 0x21, 0x0f, 0xc1, 0xaf, 0xbe, 0x10, 0xc1, 0xaf, 0xcb, 0xc2, 0x00, + 0x64, 0x00, 0xdf, 0x09, 0x15, 0xc1, 0xaf, 0xe7, 0xc2, 0x00, 0xdb, 0x00, + 0xde, 0xf1, 0xc2, 0x19, 0x2c, 0x00, 0xde, 0xd9, 0xc2, 0x00, 0x39, 0x00, + 0xde, 0x91, 0xc2, 0x0e, 0x9a, 0x00, 0xde, 0x89, 0xc2, 0x25, 0x3b, 0x00, + 0xde, 0x81, 0xc2, 0x01, 0x30, 0x00, 0xde, 0x71, 0xc2, 0x00, 0xb0, 0x00, + 0xde, 0x3b, 0x01, 0xaf, 0xf7, 0xc2, 0x01, 0x4a, 0x00, 0xde, 0x59, 0xc7, + 0xc6, 0xd3, 0x00, 0xde, 0x31, 0xc2, 0x01, 0x5d, 0x00, 0xde, 0x29, 0xc2, + 0x00, 0xd0, 0x00, 0xde, 0x11, 0x83, 0x00, 0xde, 0x00, 0x0d, 0xc1, 0xaf, + 0xfd, 0xc2, 0x00, 0xd0, 0x00, 0x4d, 0xc9, 0x15, 0xc1, 0xb0, 0x0a, 0xc2, + 0x00, 0xdb, 0x00, 0x4d, 0x91, 0x14, 0xc1, 0xb0, 0x1a, 0x1b, 0xc1, 0xb0, + 0x2d, 0xc2, 0x01, 0xc3, 0x00, 0x4d, 0x71, 0x04, 0xc1, 0xb0, 0x37, 0x12, + 0xc1, 0xb0, 0x41, 0x10, 0xc1, 0xb0, 0x4b, 0x06, 0xc1, 0xb0, 0x61, 0x16, + 0xc1, 0xb0, 0x6f, 0x0c, 0xc1, 0xb0, 0x7d, 0x05, 0xc1, 0xb0, 0x87, 0x09, + 0xc1, 0xb0, 0x91, 0x83, 0x00, 0x4c, 0x2b, 0x01, 0xb0, 0x9b, 0x91, 0x00, + 0x4c, 0x99, 0x8b, 0x00, 0x4c, 0x3b, 0x01, 0xb0, 0xa7, 0x97, 0x00, 0x4c, + 0x4b, 0x01, 0xb0, 0xab, 0x18, 0xc1, 0xb0, 0xaf, 0x87, 0x00, 0x4c, 0x78, + 0x44, 0x00, 0xbb, 0xc1, 0xb0, 0xbb, 0xca, 0xa0, 0x26, 0x00, 0x4f, 0xf0, + 0x03, 0xc1, 0xb0, 0xd1, 0x91, 0x00, 0x4e, 0x59, 0x87, 0x00, 0x4e, 0x39, + 0x48, 0xb2, 0x2d, 0xc1, 0xb0, 0xdd, 0x97, 0x00, 0x4e, 0x0b, 0x01, 0xb0, + 0xeb, 0x8b, 0x00, 0x4d, 0xfa, 0x01, 0xb0, 0xef, 0xcd, 0x73, 0x0d, 0x00, + 0x4e, 0xb9, 0xc3, 0x7c, 0x50, 0x00, 0x4c, 0x01, 0xd0, 0x50, 0xcf, 0x00, + 0x4f, 0xe8, 0xc4, 0x15, 0xe7, 0x00, 0x4f, 0x31, 0xc3, 0x05, 0x14, 0x00, + 0x4f, 0x39, 0x16, 0xc1, 0xb0, 0xf3, 0x08, 0xc1, 0xb0, 0xff, 0x15, 0xc1, + 0xb1, 0x0b, 0xc5, 0x06, 0xdb, 0x00, 0x4f, 0x71, 0xc4, 0x26, 0x78, 0x00, + 0x4f, 0x78, 0xc4, 0x01, 0xc3, 0x00, 0x4f, 0x91, 0xc4, 0x00, 0xba, 0x00, + 0x4f, 0x98, 0x4a, 0x78, 0x64, 0xc1, 0xb1, 0x17, 0xd3, 0x44, 0x8f, 0x00, + 0x4f, 0xc8, 0xe0, 0x06, 0x07, 0x01, 0x5a, 0xf0, 0xc2, 0x10, 0x11, 0x00, + 0xd0, 0xd9, 0x91, 0x00, 0xd0, 0xd1, 0x87, 0x00, 0xd0, 0xc9, 0x97, 0x00, + 0xd0, 0xc1, 0x8b, 0x00, 0xd0, 0xb8, 0xc2, 0x00, 0xd0, 0x00, 0xd0, 0xb1, + 0x83, 0x00, 0xd0, 0xa9, 0xc2, 0x0d, 0xf6, 0x00, 0xd0, 0xa1, 0xc2, 0x02, + 0x41, 0x00, 0xd0, 0x99, 0xc2, 0x00, 0xdb, 0x00, 0xd0, 0x91, 0xc2, 0x00, + 0x39, 0x00, 0xd0, 0x89, 0xc2, 0x19, 0x2c, 0x00, 0xd0, 0x81, 0x10, 0xc1, + 0xb1, 0x2a, 0xc2, 0x25, 0x3b, 0x00, 0xd0, 0x69, 0xc2, 0x00, 0x64, 0x00, + 0xd0, 0x61, 0xc2, 0x0e, 0x9a, 0x00, 0xd0, 0x49, 0xc2, 0x01, 0x6f, 0x00, + 0xd0, 0x41, 0x0f, 0xc1, 0xb1, 0x3c, 0xc2, 0x01, 0x5d, 0x00, 0xd0, 0x29, + 0xc2, 0x00, 0xb0, 0x00, 0xd0, 0x21, 0xc2, 0x01, 0x30, 0x00, 0xd0, 0x09, + 0xc2, 0x02, 0x2b, 0x00, 0xd0, 0x00, 0x83, 0x00, 0xba, 0x41, 0xc2, 0x01, + 0x30, 0x00, 0xba, 0x28, 0x45, 0xda, 0xf1, 0xc1, 0xb1, 0x46, 0xc5, 0xd5, + 0x4c, 0x01, 0x40, 0x00, 0xc6, 0x57, 0xec, 0x08, 0x83, 0xf9, 0xc3, 0x05, + 0x14, 0x08, 0x82, 0x93, 0x01, 0xb1, 0x7b, 0xc4, 0x26, 0x78, 0x08, 0x82, + 0xd3, 0x01, 0xb1, 0x7f, 0xc5, 0x06, 0xdb, 0x08, 0x82, 0xcb, 0x01, 0xb1, + 0x85, 0x15, 0xc1, 0xb1, 0x89, 0x08, 0xc1, 0xb1, 0x9b, 0x16, 0x41, 0xb1, + 0xa3, 0x91, 0x08, 0x80, 0x8b, 0x01, 0xb1, 0xb1, 0x0e, 0xc1, 0xb1, 0xb7, + 0xc2, 0x00, 0xd0, 0x08, 0x81, 0x99, 0xc2, 0x00, 0x39, 0x08, 0x81, 0x69, + 0xc2, 0x19, 0x2c, 0x08, 0x81, 0x61, 0xc2, 0x01, 0xc3, 0x08, 0x81, 0x59, + 0x04, 0xc1, 0xb1, 0xc1, 0x12, 0xc1, 0xb1, 0xcb, 0x10, 0xc1, 0xb1, 0xd5, + 0x06, 0xc1, 0xb1, 0xeb, 0x16, 0xc1, 0xb1, 0xf9, 0x0c, 0xc1, 0xb2, 0x07, + 0x05, 0xc1, 0xb2, 0x11, 0x09, 0xc1, 0xb2, 0x1b, 0x0d, 0xc1, 0xb2, 0x25, + 0x83, 0x08, 0x80, 0x2b, 0x01, 0xb2, 0x2f, 0x87, 0x08, 0x80, 0x79, 0x18, + 0xc1, 0xb2, 0x3b, 0x97, 0x08, 0x80, 0x4b, 0x01, 0xb2, 0x45, 0x8b, 0x08, + 0x80, 0x3b, 0x01, 0xb2, 0x49, 0x15, 0x41, 0xb2, 0x4d, 0x4a, 0x6f, 0xc8, + 0xc1, 0xb2, 0x5d, 0xc5, 0x1e, 0x96, 0x08, 0x82, 0x30, 0xd0, 0x5c, 0x82, + 0x08, 0x83, 0x81, 0xcb, 0x93, 0xf6, 0x08, 0x80, 0x21, 0xcb, 0x8f, 0xe1, + 0x08, 0x80, 0x19, 0xcb, 0x1e, 0x89, 0x08, 0x80, 0x01, 0xc8, 0x14, 0x38, + 0x08, 0x80, 0x09, 0xc7, 0x40, 0xe5, 0x08, 0x80, 0x10, 0x45, 0x09, 0x98, + 0xc1, 0xb2, 0x86, 0xcb, 0x97, 0xf5, 0x08, 0x82, 0x41, 0xc4, 0x19, 0x53, + 0x08, 0x82, 0x38, 0x0e, 0xc1, 0xb2, 0xaa, 0xcc, 0x80, 0xa9, 0x08, 0x82, + 0x61, 0x42, 0x00, 0x58, 0x41, 0xb2, 0xb6, 0x42, 0x0f, 0x7b, 0xc1, 0xb2, + 0xc0, 0x4a, 0x9a, 0xb8, 0x41, 0xb2, 0xcc, 0xc6, 0x2e, 0x82, 0x0e, 0x86, + 0xc9, 0xc6, 0xca, 0x9d, 0x0e, 0x86, 0xc0, 0x00, 0x41, 0xb2, 0xd8, 0x00, + 0xc1, 0xb2, 0xe4, 0xc2, 0x01, 0x6f, 0x0e, 0x80, 0x82, 0x01, 0xb2, 0xf0, + 0xc5, 0x57, 0xbd, 0x0e, 0x84, 0x49, 0xc6, 0xad, 0x17, 0x0e, 0x82, 0x51, + 0xc6, 0xcb, 0xf9, 0x0e, 0x81, 0xd2, 0x01, 0xb2, 0xf4, 0x44, 0xe1, 0x8b, + 0xc1, 0xb2, 0xfa, 0xc6, 0xcf, 0x11, 0x0e, 0x80, 0x60, 0x43, 0x0f, 0xf8, + 0xc1, 0xb3, 0x02, 0xc5, 0xd5, 0x88, 0x0e, 0x80, 0x38, 0x46, 0xd0, 0xc1, + 0xc1, 0xb3, 0x0e, 0x42, 0x0f, 0x7b, 0x41, 0xb3, 0x38, 0x11, 0xc1, 0xb3, + 0x42, 0xc2, 0x01, 0x0f, 0x0e, 0x84, 0x29, 0x45, 0xdd, 0xa3, 0x41, 0xb3, + 0x54, 0x45, 0xd7, 0x81, 0xc1, 0xb3, 0x60, 0x44, 0xcf, 0x3b, 0xc1, 0xb3, + 0x6c, 0x42, 0x00, 0x4e, 0xc1, 0xb3, 0x76, 0x43, 0x07, 0xc5, 0x41, 0xb3, + 0x82, 0x46, 0xd2, 0x7d, 0xc1, 0xb3, 0x8c, 0xca, 0x9b, 0x9e, 0x0e, 0x81, + 0x40, 0xc4, 0x1a, 0x73, 0x0e, 0x87, 0x41, 0xc5, 0xd6, 0x00, 0x0e, 0x83, + 0xf3, 0x01, 0xb3, 0x98, 0xca, 0x9a, 0x68, 0x0e, 0x82, 0x20, 0xc6, 0xcb, + 0xa5, 0x0e, 0x87, 0x13, 0x01, 0xb3, 0x9e, 0xc7, 0xc0, 0xf9, 0x0e, 0x86, + 0xf2, 0x01, 0xb3, 0xa2, 0xc4, 0x77, 0x35, 0x0e, 0x83, 0x48, 0xc3, 0x05, + 0xa9, 0x0e, 0x83, 0x33, 0x01, 0xb3, 0xa6, 0x10, 0x41, 0xb3, 0xac, 0xca, + 0x9e, 0xd2, 0x0e, 0x87, 0x39, 0x09, 0xc1, 0xb3, 0xb8, 0x03, 0xc1, 0xb3, + 0xc7, 0x45, 0x1a, 0x57, 0xc1, 0xb3, 0xd3, 0xc3, 0x1f, 0x1d, 0x0e, 0x84, + 0x32, 0x01, 0xb3, 0xe9, 0x44, 0x1a, 0x13, 0xc1, 0xb3, 0xef, 0x42, 0x00, + 0xbd, 0x41, 0xb4, 0x07, 0x11, 0xc1, 0xb4, 0x13, 0xc4, 0x7a, 0x04, 0x0e, + 0x82, 0x80, 0xd4, 0x39, 0x30, 0x0e, 0x86, 0x61, 0xd6, 0x2e, 0x80, 0x0e, + 0x86, 0x59, 0x10, 0xc1, 0xb4, 0x22, 0x48, 0x1a, 0x02, 0xc1, 0xb4, 0x2e, + 0x4f, 0x67, 0x47, 0xc1, 0xb4, 0x3a, 0x4a, 0xa3, 0x6e, 0xc1, 0xb4, 0x46, + 0xc8, 0x9c, 0xe0, 0x0e, 0x81, 0xa2, 0x01, 0xb4, 0x62, 0xc8, 0xba, 0x3a, + 0x0e, 0x85, 0x81, 0xca, 0xa2, 0xec, 0x0e, 0x85, 0x79, 0xcb, 0x92, 0x33, + 0x0e, 0x85, 0x70, 0xc6, 0xce, 0xd5, 0x0e, 0x86, 0x51, 0xc6, 0xd1, 0x63, + 0x0e, 0x86, 0x49, 0xc5, 0xd6, 0x9b, 0x0e, 0x86, 0x40, 0xc3, 0x63, 0x2b, + 0x0e, 0x83, 0x39, 0xc8, 0x9c, 0xe0, 0x0e, 0x81, 0xd8, 0x8b, 0x0e, 0x82, + 0xb1, 0xc2, 0x00, 0x45, 0x0e, 0x80, 0xc0, 0x08, 0xc1, 0xb4, 0x68, 0xc7, + 0xc2, 0x9d, 0x0e, 0x84, 0xc0, 0xd5, 0x32, 0xc0, 0x0e, 0x85, 0x61, 0x43, + 0x01, 0x55, 0x41, 0xb4, 0x74, 0xd4, 0x3d, 0xcc, 0x0e, 0x85, 0xb1, 0xc7, + 0xc3, 0x45, 0x0e, 0x83, 0xd8, 0xcd, 0x79, 0x75, 0x0e, 0x83, 0xa1, 0xcb, + 0x94, 0x17, 0x0e, 0x83, 0x00, 0x12, 0xc1, 0xb4, 0x80, 0xcb, 0x94, 0xbc, + 0x0e, 0x85, 0x89, 0xcd, 0x7a, 0xfb, 0x0e, 0x85, 0x51, 0x16, 0xc1, 0xb4, + 0x8c, 0x45, 0xd9, 0xed, 0xc1, 0xb4, 0x98, 0xce, 0x6d, 0x5c, 0x0e, 0x85, + 0x20, 0x0b, 0xc1, 0xb4, 0xa4, 0x45, 0xaa, 0x6b, 0x41, 0xb4, 0xb4, 0xc6, + 0xd0, 0xf1, 0x0e, 0x84, 0x41, 0xc5, 0x13, 0x43, 0x0e, 0x81, 0x89, 0xc4, + 0xae, 0x15, 0x0e, 0x80, 0x78, 0x07, 0xc1, 0xb4, 0xca, 0xc3, 0x02, 0x44, + 0x0e, 0x80, 0xa0, 0x45, 0x7c, 0xbe, 0xc1, 0xb4, 0xd9, 0xc3, 0xbe, 0x04, + 0x0e, 0x81, 0x70, 0xc3, 0x63, 0x2b, 0x0e, 0x83, 0xa9, 0xc8, 0x9c, 0xe0, + 0x0e, 0x81, 0x60, 0x00, 0xc1, 0xb4, 0xef, 0xca, 0x9c, 0xde, 0x0e, 0x81, + 0x00, 0xc3, 0x63, 0x2b, 0x0e, 0x82, 0x39, 0xc8, 0x9c, 0xe0, 0x0e, 0x80, + 0xa8, 0x45, 0xb9, 0x3c, 0xc1, 0xb5, 0x01, 0x0e, 0x41, 0xb5, 0x1a, 0x42, + 0x06, 0x4e, 0xc1, 0xb5, 0x24, 0xc5, 0xd8, 0x85, 0x0e, 0x80, 0xf0, 0xc3, + 0x63, 0x2b, 0x0e, 0x82, 0xc9, 0xc8, 0x9c, 0xe0, 0x0e, 0x81, 0x30, 0xc6, + 0xd0, 0x0d, 0x0e, 0x81, 0xc3, 0x01, 0xb5, 0x33, 0x43, 0x13, 0x4f, 0xc1, + 0xb5, 0x39, 0xc9, 0x94, 0x92, 0x0e, 0x80, 0x10, 0x00, 0xc1, 0xb5, 0x43, + 0xca, 0x9c, 0xde, 0x0e, 0x81, 0x08, 0xc2, 0x0d, 0x10, 0x08, 0xe3, 0x48, + 0xc2, 0x0d, 0x10, 0x08, 0xe3, 0x40, 0xc3, 0x45, 0x6b, 0x08, 0xe3, 0x39, + 0xc2, 0x00, 0x5f, 0x08, 0xe2, 0xf0, 0xc3, 0x0d, 0x0f, 0x08, 0xe3, 0x31, + 0xc2, 0x00, 0x33, 0x08, 0xe2, 0xe8, 0xc4, 0x0d, 0x0e, 0x08, 0xe3, 0x29, + 0xc3, 0x02, 0xdf, 0x08, 0xe2, 0xe0, 0xc4, 0x18, 0x12, 0x08, 0xe3, 0x21, + 0x91, 0x08, 0xe2, 0xd8, 0xc4, 0x18, 0x10, 0x08, 0xe2, 0xb9, 0xc2, 0x22, + 0xcc, 0x08, 0xe2, 0xb0, 0xc3, 0x0d, 0x14, 0x08, 0xe2, 0xa9, 0xc3, 0x09, + 0x9e, 0x08, 0xe2, 0xa0, 0xc4, 0x02, 0xde, 0x08, 0xe2, 0x99, 0xc2, 0x02, + 0xa0, 0x08, 0xe2, 0x90, 0x94, 0x08, 0xe1, 0xa8, 0x8e, 0x08, 0xe0, 0x41, + 0x94, 0x08, 0xe0, 0x32, 0x01, 0xb5, 0x55, 0xc2, 0x00, 0xd0, 0x08, 0xe0, + 0xd9, 0x83, 0x08, 0xe0, 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0xe0, 0xc9, 0x83, + 0x08, 0xe0, 0xc0, 0x46, 0x01, 0x92, 0xc1, 0xb5, 0x59, 0x04, 0xc1, 0xb5, + 0x65, 0xd5, 0x37, 0x6d, 0x01, 0x2e, 0xf9, 0xc6, 0xcc, 0x1d, 0x0f, 0xac, + 0x69, 0x12, 0xc1, 0xb5, 0x71, 0xcc, 0x85, 0x7d, 0x0f, 0xac, 0x59, 0xe0, + 0x05, 0xe7, 0x01, 0x49, 0xf8, 0x46, 0x01, 0x92, 0xc1, 0xb5, 0x7d, 0xcf, + 0x68, 0x37, 0x01, 0x3e, 0x99, 0x15, 0xc1, 0xb5, 0x89, 0xda, 0x1a, 0x7e, + 0x01, 0x3a, 0x79, 0xc6, 0xcd, 0x8b, 0x01, 0x38, 0x71, 0xd5, 0x37, 0x6d, + 0x01, 0x2e, 0xf1, 0x4f, 0x60, 0x6c, 0x41, 0xb5, 0x95, 0xdb, 0x14, 0xf4, + 0x0f, 0xdb, 0x79, 0x45, 0x02, 0xde, 0x41, 0xb5, 0xa1, 0xc6, 0x02, 0xd1, + 0x01, 0x2f, 0x09, 0xd4, 0x39, 0x94, 0x01, 0x2e, 0xd9, 0xc5, 0x06, 0xe2, + 0x01, 0x2c, 0x21, 0xcc, 0x01, 0xdb, 0x0f, 0xdc, 0x78, 0xcd, 0x15, 0x02, + 0x01, 0x2c, 0x11, 0xcc, 0x06, 0xdb, 0x01, 0x2c, 0x08, 0xc6, 0xcd, 0x4f, + 0x0f, 0xd5, 0x59, 0xd0, 0x54, 0xdc, 0x0f, 0xa8, 0x28, 0xc9, 0x33, 0xad, + 0x01, 0x72, 0x40, 0xce, 0x6f, 0xfc, 0x01, 0x3f, 0xf9, 0xcc, 0x82, 0x35, + 0x01, 0x3f, 0xcb, 0x01, 0xb5, 0xad, 0xc5, 0x01, 0xa2, 0x01, 0x3f, 0xb2, + 0x01, 0xb5, 0xb3, 0xcc, 0x82, 0x35, 0x01, 0x3f, 0xc3, 0x01, 0xb5, 0xb9, + 0xc5, 0x01, 0xa2, 0x01, 0x3f, 0xab, 0x01, 0xb5, 0xbf, 0xce, 0x6f, 0xfc, + 0x01, 0x59, 0x98, 0x46, 0x00, 0x2c, 0xc1, 0xb5, 0xc5, 0xc4, 0x32, 0xbc, + 0x01, 0x3e, 0xf0, 0xe0, 0x00, 0x47, 0x01, 0x57, 0x30, 0x45, 0x00, 0x8c, + 0xc1, 0xb5, 0xd1, 0xd7, 0x2a, 0x99, 0x01, 0x52, 0xc8, 0xcf, 0x64, 0xd1, + 0x01, 0x52, 0xe1, 0xcb, 0x98, 0x42, 0x01, 0x52, 0xd1, 0x42, 0x00, 0x58, + 0xc1, 0xb5, 0xe3, 0xc8, 0x52, 0x09, 0x01, 0x52, 0xf8, 0x10, 0xc1, 0xb5, + 0xef, 0x14, 0x41, 0xb5, 0xf9, 0x43, 0x01, 0xd0, 0xc1, 0xb6, 0x05, 0xd5, + 0x36, 0xb0, 0x0f, 0xab, 0xd8, 0x45, 0x00, 0x2d, 0xc1, 0xb6, 0x2c, 0xd6, + 0x29, 0x86, 0x01, 0x70, 0x60, 0xc9, 0x9b, 0x77, 0x01, 0x3e, 0xa9, 0x43, + 0x02, 0x6f, 0x41, 0xb6, 0x5a, 0xd5, 0x32, 0x18, 0x01, 0x3e, 0x29, 0x07, + 0xc1, 0xb6, 0x66, 0xcd, 0x25, 0xae, 0x00, 0x02, 0xdb, 0x01, 0xb6, 0x72, + 0x0b, 0xc1, 0xb6, 0x76, 0xcc, 0x6f, 0xb7, 0x0f, 0xaf, 0x41, 0xd3, 0x1f, + 0xcd, 0x01, 0x70, 0x10, 0xcb, 0x90, 0x86, 0x01, 0x36, 0xe1, 0xcc, 0x00, + 0x33, 0x00, 0x03, 0xdb, 0x01, 0xb6, 0x82, 0xc6, 0xb7, 0x3b, 0x01, 0x18, + 0x41, 0xcd, 0x69, 0x65, 0x01, 0x80, 0x60, 0x0a, 0xc1, 0xb6, 0x86, 0xc3, + 0x00, 0x3a, 0x01, 0x15, 0x19, 0x14, 0xc1, 0xb6, 0x98, 0xd5, 0x08, 0x89, + 0x01, 0x80, 0xa0, 0x0b, 0xc1, 0xb6, 0xa4, 0xc4, 0x20, 0xe6, 0x01, 0x18, + 0x50, 0xc7, 0xc9, 0xb2, 0x01, 0x1d, 0xc1, 0xcd, 0x77, 0xfc, 0x01, 0x71, + 0x00, 0x00, 0x41, 0xb6, 0xb0, 0x45, 0x00, 0x5a, 0xc1, 0xb6, 0xc2, 0xd9, + 0x1f, 0xc7, 0x01, 0x70, 0x20, 0xcb, 0x93, 0xd5, 0x0f, 0xac, 0x71, 0xcb, + 0x8a, 0x0a, 0x01, 0x4e, 0xc1, 0x45, 0x01, 0xfd, 0x41, 0xb6, 0xda, 0x45, + 0x04, 0x90, 0xc1, 0xb6, 0xf6, 0x44, 0x01, 0x5e, 0x41, 0xb7, 0x02, 0xc6, + 0xcf, 0x35, 0x0f, 0xb6, 0x29, 0xd5, 0x2c, 0xf5, 0x01, 0x70, 0xe0, 0xca, + 0x01, 0xfd, 0x01, 0x0f, 0x33, 0x01, 0xb7, 0x0e, 0xc9, 0xb0, 0x6b, 0x01, + 0x0c, 0xe0, 0x42, 0x00, 0x2c, 0xc1, 0xb7, 0x14, 0x42, 0x02, 0xa0, 0xc1, + 0xb7, 0x20, 0xd5, 0x37, 0xc1, 0x0f, 0xc5, 0x10, 0x00, 0xc1, 0xb7, 0x2c, + 0xc5, 0x14, 0xa5, 0x01, 0x48, 0xc8, 0xc5, 0xca, 0xa4, 0x0f, 0xb3, 0x61, + 0xd7, 0x2a, 0x6b, 0x0f, 0xc5, 0x30, 0xcb, 0x82, 0xba, 0x01, 0x0f, 0x01, + 0x46, 0x00, 0x59, 0x41, 0xb7, 0x49, 0x42, 0x00, 0xe3, 0xc1, 0xb7, 0x58, + 0xcf, 0x5b, 0xc3, 0x0f, 0xc2, 0x80, 0x03, 0xc1, 0xb7, 0x64, 0x45, 0x11, + 0x3a, 0x41, 0xb7, 0x70, 0x45, 0x04, 0x90, 0xc1, 0xb7, 0x7c, 0xd8, 0x23, + 0xf3, 0x0f, 0xc5, 0x01, 0xdf, 0x0c, 0x65, 0x0f, 0xc5, 0x40, 0xd0, 0x56, + 0xda, 0x0f, 0xc1, 0xa1, 0xe0, 0x01, 0xe7, 0x0f, 0xc5, 0x50, 0xd0, 0x5a, + 0x22, 0x0f, 0xa8, 0x69, 0xcd, 0x0b, 0x91, 0x01, 0x19, 0x49, 0xd4, 0x3b, + 0x9c, 0x01, 0x4f, 0xe1, 0xdb, 0x18, 0x39, 0x00, 0x05, 0x58, 0xdc, 0x14, + 0x4d, 0x01, 0x3d, 0x51, 0xdb, 0x15, 0x60, 0x01, 0x49, 0xc8, 0xc7, 0x00, + 0xfa, 0x01, 0x03, 0x31, 0xc8, 0xb6, 0xca, 0x01, 0x01, 0x69, 0xc9, 0xb3, + 0x9e, 0x01, 0x01, 0x51, 0xc4, 0x01, 0xc3, 0x01, 0x00, 0x70, 0xd6, 0x2d, + 0x4c, 0x00, 0x2c, 0x71, 0xc4, 0xb9, 0x3c, 0x0f, 0xc8, 0xd9, 0xcb, 0x8f, + 0xf7, 0x00, 0x7e, 0xb2, 0x01, 0xb7, 0x88, 0xcc, 0x07, 0xc7, 0x01, 0x13, + 0xb1, 0x43, 0x00, 0xe2, 0xc1, 0xb7, 0x8e, 0xd0, 0x5a, 0x92, 0x01, 0x53, + 0xeb, 0x01, 0xb7, 0x9a, 0xcb, 0x1a, 0x1a, 0x01, 0x54, 0x28, 0xcf, 0x09, + 0xf8, 0x01, 0x4b, 0xb1, 0x44, 0x00, 0x58, 0xc1, 0xb7, 0xa0, 0x15, 0xc1, + 0xb7, 0xa6, 0x44, 0x07, 0xc7, 0x41, 0xb7, 0xb2, 0xd8, 0x24, 0x3b, 0x01, + 0x54, 0x39, 0xcf, 0x62, 0xb5, 0x01, 0x54, 0x48, 0xc2, 0x0e, 0x9a, 0x00, + 0xe2, 0x79, 0xc2, 0x02, 0x1c, 0x00, 0xe0, 0xc9, 0x83, 0x00, 0xe0, 0x60, + 0x16, 0xc1, 0xb7, 0xb8, 0x15, 0xc1, 0xb7, 0xc2, 0xc2, 0x00, 0xd0, 0x00, + 0xe0, 0x59, 0x83, 0x00, 0xe0, 0x50, 0xc2, 0x00, 0xd0, 0x00, 0xe1, 0x09, + 0x83, 0x00, 0xe1, 0x00, 0xc2, 0x00, 0xdb, 0x00, 0xe0, 0xf1, 0x83, 0x00, + 0xe0, 0xe8, 0xc2, 0x00, 0xdb, 0x00, 0xe0, 0xb1, 0x83, 0x00, 0xe0, 0xa8, + 0xc2, 0x00, 0xdb, 0x00, 0xe0, 0xa1, 0x83, 0x00, 0xe0, 0x98, 0xc2, 0x00, + 0xdb, 0x00, 0xe0, 0x91, 0x83, 0x00, 0xe0, 0x88, 0xc2, 0x00, 0xd0, 0x00, + 0xe0, 0x81, 0xc2, 0x00, 0xdb, 0x00, 0xe0, 0x79, 0x83, 0x00, 0xe0, 0x70, + 0x83, 0x00, 0xe0, 0x69, 0xc2, 0x19, 0x2c, 0x00, 0xe0, 0x49, 0xc2, 0x01, + 0x30, 0x00, 0xe0, 0x28, 0xc2, 0x00, 0xd0, 0x00, 0xe0, 0x39, 0x83, 0x00, + 0xe0, 0x30, 0xc2, 0x00, 0xdb, 0x00, 0xe0, 0x21, 0x83, 0x00, 0xe0, 0x18, + 0xc2, 0x00, 0xd0, 0x00, 0xe0, 0x11, 0xc2, 0x00, 0xdb, 0x00, 0xe0, 0x09, + 0x83, 0x00, 0xe0, 0x00, 0xc4, 0x18, 0x10, 0x00, 0xe2, 0x39, 0xc2, 0x22, + 0xcc, 0x00, 0xe2, 0x30, 0xc3, 0x0d, 0x14, 0x00, 0xe2, 0x29, 0xc3, 0x09, + 0x9e, 0x00, 0xe2, 0x20, 0xc4, 0x02, 0xde, 0x00, 0xe2, 0x19, 0xc2, 0x02, + 0xa0, 0x00, 0xe2, 0x10, 0xc5, 0xda, 0x79, 0x00, 0xe1, 0xfb, 0x01, 0xb7, + 0xcc, 0xc5, 0x4e, 0x18, 0x00, 0xe1, 0xd8, 0xc5, 0x33, 0x5d, 0x00, 0xe1, + 0xb9, 0xc3, 0x00, 0xea, 0x00, 0xe1, 0xb0, 0xc2, 0x00, 0x39, 0x00, 0xe1, + 0x29, 0xc2, 0x19, 0x2c, 0x00, 0xe1, 0x20, 0xc3, 0x01, 0x95, 0x00, 0xe1, + 0xa8, 0xc6, 0xd3, 0xbb, 0x00, 0xe1, 0xa0, 0x97, 0x00, 0xe1, 0x58, 0x91, + 0x00, 0xe1, 0x48, 0x15, 0xc1, 0xb7, 0xd2, 0xcc, 0x1a, 0x8c, 0x0f, 0xbc, + 0x71, 0x14, 0xc1, 0xb7, 0xe4, 0x44, 0x00, 0x49, 0xc1, 0xb7, 0xf0, 0xcc, + 0x07, 0xbb, 0x01, 0x3a, 0xc1, 0xca, 0xa7, 0xc4, 0x0f, 0xaf, 0xc1, 0x08, + 0xc1, 0xb7, 0xf6, 0xcb, 0x58, 0xc7, 0x0f, 0xbd, 0x11, 0xd5, 0x34, 0x8e, + 0x0f, 0xbd, 0xd9, 0x16, 0x41, 0xb8, 0x02, 0xc5, 0xd4, 0xe3, 0x0f, 0xaf, + 0x92, 0x01, 0xb8, 0x0e, 0xc2, 0x00, 0xd0, 0x08, 0xfd, 0x81, 0x83, 0x05, + 0x27, 0x60, 0x83, 0x05, 0x26, 0x89, 0xc2, 0x00, 0xd0, 0x05, 0x26, 0x90, + 0x83, 0x05, 0x26, 0x99, 0xc2, 0x02, 0x1c, 0x05, 0x26, 0xe0, 0x83, 0x05, + 0x26, 0xa1, 0xc2, 0x00, 0xd0, 0x05, 0x26, 0xa9, 0x15, 0xc1, 0xb8, 0x14, + 0x44, 0x05, 0x14, 0x41, 0xb8, 0x1e, 0x83, 0x05, 0x26, 0xb1, 0xc2, 0x00, + 0xd0, 0x05, 0x27, 0x68, 0x83, 0x05, 0x26, 0xb9, 0xc2, 0x00, 0xd0, 0x05, + 0x26, 0xc0, 0x83, 0x05, 0x26, 0xd1, 0xc2, 0x00, 0xd0, 0x05, 0x26, 0xd8, + 0x83, 0x05, 0x27, 0x01, 0xc2, 0x01, 0x30, 0x05, 0x27, 0x28, 0x83, 0x05, + 0x27, 0x11, 0xc2, 0x00, 0xd0, 0x05, 0x27, 0x58, 0xc2, 0x00, 0xd0, 0x05, + 0x27, 0x19, 0x83, 0x05, 0x27, 0x20, 0x83, 0x05, 0x27, 0x31, 0xc2, 0x00, + 0xd0, 0x05, 0x27, 0x40, 0x87, 0x05, 0x27, 0x78, 0x97, 0x05, 0x27, 0x88, + 0x87, 0x05, 0x27, 0xb8, 0x87, 0x05, 0x27, 0xa9, 0x8a, 0x05, 0x27, 0xb0, + 0xc9, 0x1b, 0x0a, 0x01, 0x01, 0x41, 0xca, 0x33, 0xdc, 0x00, 0x00, 0x5b, + 0x01, 0xb8, 0x2a, 0xc4, 0x1b, 0x05, 0x00, 0x00, 0x51, 0x4c, 0x87, 0x8d, + 0x41, 0xb8, 0x30, 0x48, 0xba, 0xc2, 0xc1, 0xb8, 0x3c, 0x42, 0x01, 0x60, + 0x41, 0xb8, 0x64, 0xc4, 0x26, 0x78, 0x00, 0xca, 0x79, 0xc5, 0x06, 0xdb, + 0x00, 0xca, 0x71, 0x15, 0xc1, 0xb8, 0x76, 0x08, 0xc1, 0xb8, 0x82, 0x16, + 0xc1, 0xb8, 0x8e, 0xc3, 0x05, 0x14, 0x00, 0xca, 0x39, 0xc4, 0x15, 0xe7, + 0x00, 0xca, 0x30, 0x44, 0x00, 0xbb, 0xc1, 0xb8, 0x9a, 0x4c, 0x29, 0xba, + 0xc1, 0xb8, 0xb2, 0x50, 0x5c, 0xf2, 0x41, 0xb8, 0xe0, 0x46, 0x00, 0xb9, + 0xc1, 0xb8, 0xf2, 0xcf, 0x69, 0x72, 0x00, 0xc8, 0x00, 0x16, 0xc1, 0xb9, + 0x0f, 0x09, 0xc1, 0xb9, 0x1f, 0xc2, 0x00, 0xd0, 0x00, 0xc8, 0xe1, 0x15, + 0xc1, 0xb9, 0x2f, 0xc2, 0x01, 0x4a, 0x00, 0xc8, 0xc1, 0xc2, 0x00, 0xdb, + 0x00, 0xc8, 0xb9, 0xc2, 0x00, 0x39, 0x00, 0xc8, 0xb1, 0xc2, 0x19, 0x2c, + 0x00, 0xc8, 0xab, 0x01, 0xb9, 0x3f, 0xc2, 0x01, 0xc3, 0x00, 0xc8, 0xa1, + 0x04, 0xc1, 0xb9, 0x43, 0x12, 0xc1, 0xb9, 0x4d, 0x10, 0xc1, 0xb9, 0x57, + 0x06, 0xc1, 0xb9, 0x61, 0x0c, 0xc1, 0xb9, 0x6b, 0x05, 0xc1, 0xb9, 0x75, + 0x0d, 0x41, 0xb9, 0x7f, 0x90, 0x08, 0x49, 0xc0, 0x9b, 0x08, 0x49, 0xb8, + 0x90, 0x08, 0x49, 0xb0, 0x90, 0x08, 0x49, 0xa8, 0x96, 0x08, 0x49, 0xa0, + 0x95, 0x08, 0x49, 0x70, 0x04, 0xc1, 0xb9, 0x89, 0x44, 0x0b, 0x0d, 0xc1, + 0xb9, 0x95, 0x46, 0x76, 0x5f, 0xc1, 0xb9, 0xa1, 0xc9, 0x32, 0xb7, 0x01, + 0x3e, 0xc9, 0xc7, 0xc4, 0x5d, 0x01, 0x3e, 0xc1, 0xc6, 0x02, 0xd1, 0x01, + 0x2f, 0x79, 0x11, 0xc1, 0xb9, 0xad, 0x16, 0xc1, 0xb9, 0xb9, 0xd6, 0x2f, + 0x72, 0x01, 0x50, 0xf1, 0x47, 0xc6, 0x9b, 0xc1, 0xb9, 0xc5, 0x47, 0xc1, + 0x69, 0x41, 0xb9, 0xd1, 0xcc, 0x23, 0x9f, 0x01, 0x55, 0x68, 0x0e, 0xc1, + 0xb9, 0xdd, 0x4f, 0x0b, 0x17, 0x41, 0xb9, 0xe9, 0x96, 0x01, 0x04, 0xe1, + 0x95, 0x01, 0x04, 0xdb, 0x01, 0xb9, 0xf5, 0x92, 0x01, 0x04, 0xd1, 0x90, + 0x01, 0x04, 0xc9, 0x8f, 0x01, 0x04, 0xc1, 0x8e, 0x01, 0x04, 0xb9, 0x8d, + 0x01, 0x04, 0xb1, 0x8a, 0x01, 0x04, 0xa9, 0x9a, 0x01, 0x04, 0x99, 0x91, + 0x01, 0x04, 0x91, 0x87, 0x01, 0x04, 0x89, 0x83, 0x01, 0x04, 0x81, 0x98, + 0x00, 0xeb, 0x29, 0x97, 0x00, 0xeb, 0x21, 0x94, 0x00, 0xeb, 0x19, 0x8b, + 0x00, 0xeb, 0x11, 0x8c, 0x01, 0x63, 0xe0, 0x4d, 0x37, 0xb4, 0xc1, 0xb9, + 0xfb, 0xca, 0x9f, 0xe0, 0x00, 0x14, 0xbb, 0x01, 0xba, 0x7a, 0xce, 0x6b, + 0xe2, 0x05, 0x3c, 0x78, 0x46, 0x00, 0x8b, 0x41, 0xba, 0x80, 0xcd, 0x7e, + 0xf1, 0x00, 0x0e, 0x1b, 0x01, 0xba, 0x8c, 0x47, 0x10, 0x30, 0x41, 0xba, + 0x92, 0xc2, 0x00, 0x74, 0x00, 0xe9, 0x29, 0xcd, 0x7c, 0xdc, 0x00, 0x0e, + 0x10, 0xcc, 0x23, 0x3f, 0x00, 0x15, 0x08, 0x47, 0x80, 0x10, 0xc1, 0xba, + 0x9e, 0xd1, 0x54, 0x97, 0x00, 0x15, 0x68, 0x46, 0x02, 0x0f, 0xc1, 0xba, + 0xaa, 0x48, 0x19, 0x9b, 0x41, 0xbb, 0x60, 0x88, 0x05, 0x3f, 0xd9, 0x92, + 0x05, 0x3f, 0xe0, 0xc9, 0x4f, 0x9d, 0x05, 0x3f, 0xe9, 0xc6, 0xcb, 0x3f, + 0x05, 0x3f, 0xf0, 0x91, 0x00, 0x74, 0x09, 0x0a, 0x41, 0xbb, 0x6c, 0x44, + 0x68, 0x00, 0xc1, 0xbb, 0x78, 0x91, 0x00, 0x74, 0xd9, 0x43, 0x60, 0xe8, + 0x41, 0xbb, 0xa4, 0xc2, 0x0f, 0x7b, 0x00, 0x74, 0x39, 0xc2, 0x42, 0xcd, + 0x00, 0x74, 0x69, 0x91, 0x00, 0x74, 0xc8, 0x42, 0x01, 0x7c, 0xc1, 0xbb, + 0xb0, 0x49, 0xb1, 0xd3, 0x41, 0xbb, 0xbc, 0x91, 0x00, 0x74, 0xa9, 0x43, + 0x60, 0xe8, 0x41, 0xbb, 0xc8, 0x08, 0xc1, 0xbb, 0xd4, 0xc3, 0x02, 0x45, + 0x00, 0x74, 0xe9, 0xc4, 0xdf, 0x43, 0x00, 0x74, 0xf8, 0x42, 0x00, 0x48, + 0x41, 0xbb, 0xe0, 0xc4, 0xdf, 0x43, 0x00, 0x75, 0x59, 0xc3, 0x02, 0x45, + 0x00, 0x75, 0x70, 0x83, 0x00, 0x75, 0x91, 0x8f, 0x00, 0x75, 0x99, 0x9b, + 0x00, 0x76, 0x19, 0x8b, 0x00, 0x76, 0x20, 0xc2, 0x00, 0xd1, 0x00, 0x75, + 0x89, 0xc2, 0x00, 0x45, 0x00, 0x75, 0xd8, 0x8b, 0x00, 0x75, 0xa8, 0x9b, + 0x00, 0x75, 0xb8, 0x97, 0x00, 0x75, 0xc8, 0x8b, 0x00, 0x76, 0x08, 0xc2, + 0x01, 0xc8, 0x00, 0x75, 0xe1, 0xc3, 0x4d, 0xc3, 0x00, 0x75, 0xe8, 0xc2, + 0x01, 0x23, 0x00, 0x76, 0x49, 0x8b, 0x00, 0x76, 0x50, 0xc2, 0x02, 0xa0, + 0x00, 0x76, 0x91, 0xc4, 0x02, 0xde, 0x00, 0x76, 0x98, 0xc3, 0x09, 0x9e, + 0x00, 0x76, 0xa1, 0xc3, 0x0d, 0x14, 0x00, 0x76, 0xa8, 0xc2, 0x22, 0xcc, + 0x00, 0x76, 0xb1, 0xc4, 0x18, 0x10, 0x00, 0x76, 0xb8, 0x45, 0x01, 0x93, + 0xc1, 0xbb, 0xec, 0xd1, 0x47, 0x70, 0x0f, 0xdc, 0xc8, 0x46, 0x02, 0xae, + 0xc1, 0xbb, 0xf8, 0x5b, 0x18, 0xc0, 0x41, 0xbc, 0x0a, 0xc6, 0x0b, 0x09, + 0x01, 0x3a, 0x91, 0xc6, 0x02, 0xd1, 0x0f, 0xa9, 0xf8, 0xe0, 0x03, 0x67, + 0x01, 0x1d, 0x88, 0x45, 0x01, 0x93, 0xc1, 0xbc, 0x16, 0xd2, 0x43, 0x27, + 0x0f, 0xdc, 0xc0, 0x5b, 0x16, 0xa4, 0xc1, 0xbc, 0x22, 0x46, 0x01, 0xc8, + 0x41, 0xbc, 0x2e, 0xe0, 0x00, 0x27, 0x01, 0x1d, 0x80, 0x45, 0x00, 0x27, + 0xc1, 0xbc, 0x40, 0x4d, 0x3d, 0x55, 0x41, 0xbc, 0x4c, 0xe0, 0x08, 0x67, + 0x0f, 0xdb, 0x40, 0x0f, 0xc1, 0xbc, 0x52, 0xcc, 0x0d, 0x9e, 0x01, 0x2e, + 0xd0, 0x44, 0x02, 0x9a, 0x41, 0xbc, 0x58, 0xcd, 0x3f, 0xe8, 0x0f, 0xdc, + 0x19, 0xce, 0x08, 0x79, 0x0f, 0xdc, 0x28, 0x00, 0x41, 0xbc, 0x5e, 0xcc, + 0x8a, 0x45, 0x01, 0x0f, 0x78, 0x45, 0x01, 0x95, 0xc1, 0xbc, 0x76, 0xc9, + 0x61, 0x53, 0x01, 0x48, 0x50, 0xcd, 0x7e, 0x3b, 0x01, 0x0c, 0xf9, 0x4e, + 0x6f, 0xa8, 0x41, 0xbc, 0x82, 0x00, 0x41, 0xbc, 0x8e, 0x44, 0x00, 0x49, + 0xc1, 0xbc, 0xac, 0x45, 0x00, 0x2c, 0x41, 0xbc, 0xb6, 0xd0, 0x58, 0x62, + 0x0f, 0xc2, 0x09, 0xc5, 0x01, 0xa2, 0x0f, 0xc2, 0x28, 0x00, 0x41, 0xbc, + 0xc0, 0xca, 0xa8, 0x0a, 0x01, 0x0d, 0x40, 0xcc, 0x81, 0xed, 0x01, 0x4a, + 0x89, 0xcd, 0x7e, 0xfe, 0x01, 0x4a, 0x68, 0xcd, 0x7e, 0xfe, 0x01, 0x4a, + 0x79, 0xcc, 0x81, 0xed, 0x01, 0x4a, 0x60, 0xdc, 0x13, 0x6d, 0x01, 0x52, + 0x51, 0x46, 0x00, 0xd4, 0xc1, 0xbc, 0xcc, 0x45, 0x00, 0x8c, 0x41, 0xbc, + 0xd8, 0xc3, 0x7e, 0x1c, 0x08, 0x1c, 0x91, 0xc2, 0x00, 0x06, 0x08, 0x1c, + 0xa8, 0xce, 0x64, 0xe1, 0x0f, 0xdc, 0xb9, 0xde, 0x0f, 0x04, 0x01, 0x3b, + 0x18, 0x45, 0x00, 0x2d, 0xc1, 0xbc, 0xea, 0x50, 0x0f, 0x0a, 0xc1, 0xbc, + 0xfc, 0xca, 0x0e, 0xbe, 0x0f, 0xbf, 0x80, 0x45, 0x01, 0xfd, 0xc1, 0xbd, + 0x08, 0xdc, 0x14, 0xa1, 0x01, 0x3d, 0xe9, 0xdb, 0x15, 0x7b, 0x01, 0x3c, + 0xa0, 0x03, 0xc1, 0xbd, 0x1a, 0x45, 0x1a, 0x38, 0xc1, 0xbd, 0x26, 0x0b, + 0xc1, 0xbd, 0x32, 0xc6, 0xa8, 0x2a, 0x01, 0x3a, 0x41, 0xda, 0x19, 0x94, + 0x0f, 0xb3, 0x88, 0x45, 0x20, 0x6c, 0xc1, 0xbd, 0x3e, 0x4e, 0x47, 0x15, + 0x41, 0xbd, 0x4a, 0x03, 0xc1, 0xbd, 0x56, 0x42, 0x00, 0x27, 0xc1, 0xbd, + 0x62, 0x43, 0x00, 0x4a, 0xc1, 0xbd, 0x6c, 0xd8, 0x21, 0x9b, 0x0f, 0xb3, + 0x98, 0x49, 0x0a, 0xe6, 0xc1, 0xbd, 0x78, 0xdf, 0x03, 0xa8, 0x01, 0x3c, + 0xf1, 0x4e, 0x22, 0x43, 0x41, 0xbd, 0x84, 0x44, 0x02, 0xc3, 0xc1, 0xbd, + 0x90, 0xc7, 0xc0, 0x74, 0x01, 0x38, 0xc0, 0x49, 0x2c, 0x46, 0xc1, 0xbd, + 0x9a, 0x51, 0x08, 0xa9, 0x41, 0xbd, 0xa0, 0x45, 0x3a, 0x0c, 0xc1, 0xbd, + 0xac, 0x42, 0x01, 0x7f, 0xc1, 0xbd, 0xb2, 0xc5, 0x02, 0xd2, 0x01, 0x5a, + 0xc2, 0x01, 0xbd, 0xbe, 0x46, 0x82, 0xba, 0xc1, 0xbd, 0xca, 0xcc, 0x30, + 0xf2, 0x01, 0x3c, 0xb9, 0x11, 0x41, 0xbd, 0xd0, 0xdc, 0x12, 0x8d, 0x01, + 0x3c, 0xe1, 0x44, 0x00, 0x2d, 0x41, 0xbd, 0xe2, 0xc9, 0x68, 0x55, 0x01, + 0x3c, 0xb1, 0xcf, 0x65, 0x58, 0x01, 0x38, 0xb0, 0xc7, 0x0b, 0x00, 0x01, + 0x39, 0x89, 0xd1, 0x36, 0x21, 0x0f, 0xb3, 0xa1, 0x51, 0x48, 0x5a, 0x41, + 0xbd, 0xf1, 0xd2, 0x4e, 0x65, 0x01, 0x39, 0x71, 0xd0, 0x5a, 0xc2, 0x01, + 0x38, 0xe1, 0xd4, 0x38, 0xb8, 0x01, 0x5a, 0xb0, 0xdb, 0x15, 0x2a, 0x01, + 0x39, 0x21, 0x44, 0x0d, 0x14, 0x41, 0xbe, 0x00, 0xd1, 0x56, 0x62, 0x01, + 0x37, 0xe0, 0xca, 0x95, 0xd0, 0x0f, 0xa4, 0xf9, 0x45, 0x00, 0x8c, 0xc1, + 0xbe, 0x0c, 0xc5, 0x07, 0x73, 0x0f, 0xd7, 0xb0, 0xa0, 0x0d, 0x87, 0xd1, + 0x9f, 0x0d, 0x87, 0xc9, 0x9e, 0x0d, 0x87, 0xc1, 0xa3, 0x0d, 0x87, 0xe9, + 0xa2, 0x0d, 0x87, 0xe1, 0xa1, 0x0d, 0x87, 0xd8, 0xa4, 0x0d, 0x87, 0xb9, + 0xa3, 0x0d, 0x87, 0xb1, 0xa2, 0x0d, 0x87, 0xa9, 0xa1, 0x0d, 0x87, 0xa1, + 0xa0, 0x0d, 0x87, 0x99, 0x9f, 0x0d, 0x87, 0x91, 0x9e, 0x0d, 0x87, 0x88, + 0xa1, 0x0d, 0x87, 0x81, 0xa0, 0x0d, 0x87, 0x79, 0x9f, 0x0d, 0x87, 0x71, + 0x9e, 0x0d, 0x87, 0x68, 0xa3, 0x0d, 0x88, 0x39, 0xa2, 0x0d, 0x88, 0x31, + 0xa1, 0x0d, 0x88, 0x29, 0xa0, 0x0d, 0x88, 0x21, 0x9f, 0x0d, 0x88, 0x19, + 0x9e, 0x0d, 0x88, 0x10, 0xa1, 0x0d, 0x88, 0x09, 0xa0, 0x0d, 0x88, 0x01, + 0x9f, 0x0d, 0x87, 0xf9, 0x9e, 0x0d, 0x87, 0xf0, 0x9e, 0x0d, 0x85, 0xd1, + 0xa5, 0x0d, 0x86, 0x09, 0xa4, 0x0d, 0x86, 0x01, 0xa3, 0x0d, 0x85, 0xf9, + 0xa2, 0x0d, 0x85, 0xf1, 0xa1, 0x0d, 0x85, 0xe9, 0xa0, 0x0d, 0x85, 0xe1, + 0x9f, 0x0d, 0x85, 0xd8, 0xa4, 0x0d, 0x85, 0xc9, 0xa3, 0x0d, 0x85, 0xc1, + 0xa2, 0x0d, 0x85, 0xb9, 0xa1, 0x0d, 0x85, 0xb1, 0xa0, 0x0d, 0x85, 0xa9, + 0x9f, 0x0d, 0x85, 0xa1, 0x9e, 0x0d, 0x85, 0x98, 0xa0, 0x0d, 0x85, 0x91, + 0x9f, 0x0d, 0x85, 0x89, 0x9e, 0x0d, 0x85, 0x80, 0xa4, 0x0d, 0x85, 0x79, + 0xa3, 0x0d, 0x85, 0x71, 0xa2, 0x0d, 0x85, 0x69, 0xa1, 0x0d, 0x85, 0x61, + 0xa0, 0x0d, 0x85, 0x59, 0x9f, 0x0d, 0x85, 0x51, 0x9e, 0x0d, 0x85, 0x48, + 0x9e, 0x0d, 0x84, 0xf3, 0x01, 0xbe, 0x1e, 0xa6, 0x0d, 0x85, 0x31, 0xa5, + 0x0d, 0x85, 0x29, 0xa4, 0x0d, 0x85, 0x21, 0xa3, 0x0d, 0x85, 0x19, 0xa2, + 0x0d, 0x85, 0x11, 0xa1, 0x0d, 0x85, 0x09, 0xa0, 0x0d, 0x85, 0x01, 0x9f, + 0x0d, 0x84, 0xf8, 0xa2, 0x0d, 0x84, 0xe9, 0xa1, 0x0d, 0x84, 0xe1, 0xa0, + 0x0d, 0x84, 0xd9, 0x9f, 0x0d, 0x84, 0xd1, 0x9e, 0x0d, 0x84, 0xc8, 0xc2, + 0x00, 0xe8, 0x0d, 0x84, 0xc1, 0xa3, 0x0d, 0x84, 0xb9, 0xa2, 0x0d, 0x84, + 0xb1, 0xa1, 0x0d, 0x84, 0xa9, 0xa0, 0x0d, 0x84, 0xa1, 0x9f, 0x0d, 0x84, + 0x99, 0x9e, 0x0d, 0x84, 0x90, 0xa0, 0x0d, 0x84, 0x89, 0x9f, 0x0d, 0x84, + 0x81, 0x9e, 0x0d, 0x84, 0x78, 0xc2, 0x00, 0xac, 0x0d, 0x84, 0x71, 0xa4, + 0x0d, 0x84, 0x69, 0xa3, 0x0d, 0x84, 0x61, 0xa2, 0x0d, 0x84, 0x59, 0xa1, + 0x0d, 0x84, 0x51, 0xa0, 0x0d, 0x84, 0x49, 0x9f, 0x0d, 0x84, 0x41, 0x9e, + 0x0d, 0x84, 0x38, 0xa6, 0x0d, 0x84, 0x31, 0xa5, 0x0d, 0x84, 0x29, 0xa4, + 0x0d, 0x84, 0x21, 0xa3, 0x0d, 0x84, 0x19, 0xa2, 0x0d, 0x84, 0x11, 0xa1, + 0x0d, 0x84, 0x09, 0xa0, 0x0d, 0x84, 0x01, 0x9f, 0x0d, 0x83, 0xf9, 0x9e, + 0x0d, 0x83, 0xf0, 0x9f, 0x0d, 0x88, 0xf1, 0x9e, 0x0d, 0x88, 0xe8, 0xa0, + 0x0d, 0x81, 0xd1, 0x9f, 0x0d, 0x81, 0xc9, 0x9e, 0x0d, 0x81, 0xc1, 0xc2, + 0x06, 0x52, 0x0d, 0x81, 0xd8, 0xa3, 0x0d, 0x81, 0xb9, 0xa2, 0x0d, 0x81, + 0xb1, 0xa1, 0x0d, 0x81, 0xa9, 0xa0, 0x0d, 0x81, 0xa1, 0x9f, 0x0d, 0x81, + 0x99, 0x9e, 0x0d, 0x81, 0x90, 0xa4, 0x0d, 0x81, 0x89, 0xa3, 0x0d, 0x81, + 0x81, 0xa2, 0x0d, 0x81, 0x79, 0xa1, 0x0d, 0x81, 0x71, 0xa0, 0x0d, 0x81, + 0x69, 0x9f, 0x0d, 0x81, 0x61, 0x9e, 0x0d, 0x81, 0x58, 0xa5, 0x0d, 0x81, + 0x51, 0xa4, 0x0d, 0x81, 0x49, 0xa3, 0x0d, 0x81, 0x41, 0xa2, 0x0d, 0x81, + 0x39, 0xa1, 0x0d, 0x81, 0x31, 0xa0, 0x0d, 0x81, 0x29, 0x9f, 0x0d, 0x81, + 0x21, 0x9e, 0x0d, 0x81, 0x18, 0xc2, 0x00, 0x3c, 0x0d, 0x81, 0x11, 0x9e, + 0x0d, 0x80, 0xbb, 0x01, 0xbe, 0x26, 0xa6, 0x0d, 0x80, 0xf9, 0xa5, 0x0d, + 0x80, 0xf1, 0xa4, 0x0d, 0x80, 0xe9, 0xa3, 0x0d, 0x80, 0xe1, 0xa2, 0x0d, + 0x80, 0xd9, 0xa1, 0x0d, 0x80, 0xd1, 0xa0, 0x0d, 0x80, 0xc9, 0x9f, 0x0d, + 0x80, 0xc0, 0xa1, 0x0d, 0x88, 0xc9, 0xa0, 0x0d, 0x88, 0xc1, 0x9f, 0x0d, + 0x88, 0xb9, 0x9e, 0x0d, 0x88, 0xb1, 0xa2, 0x0d, 0x88, 0xd1, 0xa3, 0x0d, + 0x88, 0xd9, 0xa4, 0x0d, 0x88, 0xe0, 0xa1, 0x0d, 0x88, 0xa9, 0xa0, 0x0d, + 0x88, 0xa1, 0x9f, 0x0d, 0x88, 0x99, 0x9e, 0x0d, 0x88, 0x90, 0xa2, 0x0d, + 0x88, 0x89, 0xa1, 0x0d, 0x88, 0x81, 0xa0, 0x0d, 0x88, 0x79, 0x9f, 0x0d, + 0x88, 0x71, 0x9e, 0x0d, 0x88, 0x68, 0xa2, 0x0d, 0x88, 0x61, 0xa1, 0x0d, + 0x88, 0x59, 0xa0, 0x0d, 0x88, 0x51, 0x9f, 0x0d, 0x88, 0x49, 0x9e, 0x0d, + 0x88, 0x40, 0xc2, 0x42, 0xcd, 0x0d, 0x87, 0x11, 0xa2, 0x0d, 0x87, 0x09, + 0xa1, 0x0d, 0x87, 0x01, 0xa0, 0x0d, 0x86, 0xf9, 0x9f, 0x0d, 0x86, 0xf1, + 0x9e, 0x0d, 0x86, 0xe8, 0x9e, 0x0d, 0x87, 0x19, 0x9f, 0x0d, 0x87, 0x21, + 0xa0, 0x0d, 0x87, 0x29, 0xa1, 0x0d, 0x87, 0x30, 0x9e, 0x0d, 0x87, 0x39, + 0x9f, 0x0d, 0x87, 0x41, 0xa0, 0x0d, 0x87, 0x49, 0xa1, 0x0d, 0x87, 0x51, + 0xa2, 0x0d, 0x87, 0x59, 0xa3, 0x0d, 0x87, 0x60, 0xa2, 0x0d, 0x86, 0xd9, + 0xa1, 0x0d, 0x86, 0xd1, 0xa0, 0x0d, 0x86, 0xc9, 0x9f, 0x0d, 0x86, 0xc1, + 0x9e, 0x0d, 0x86, 0xb9, 0xa3, 0x0d, 0x86, 0xe0, 0xc2, 0x01, 0xc3, 0x0d, + 0x86, 0xb1, 0x9f, 0x0d, 0x86, 0xa9, 0x9e, 0x0d, 0x86, 0xa0, 0xa1, 0x0d, + 0x86, 0x99, 0xa0, 0x0d, 0x86, 0x91, 0x9f, 0x0d, 0x86, 0x89, 0x9e, 0x0d, + 0x86, 0x80, 0xa4, 0x0d, 0x86, 0x79, 0xa3, 0x0d, 0x86, 0x71, 0xa2, 0x0d, + 0x86, 0x69, 0xa1, 0x0d, 0x86, 0x61, 0xa0, 0x0d, 0x86, 0x59, 0x9f, 0x0d, + 0x86, 0x51, 0x9e, 0x0d, 0x86, 0x48, 0xa4, 0x0d, 0x86, 0x41, 0xa3, 0x0d, + 0x86, 0x39, 0xa2, 0x0d, 0x86, 0x31, 0xa1, 0x0d, 0x86, 0x29, 0xa0, 0x0d, + 0x86, 0x21, 0x9f, 0x0d, 0x86, 0x19, 0x9e, 0x0d, 0x86, 0x10, 0xc2, 0x00, + 0x39, 0x0d, 0x83, 0xe9, 0xa3, 0x0d, 0x83, 0xe1, 0xa2, 0x0d, 0x83, 0xd9, + 0xa1, 0x0d, 0x83, 0xd1, 0xa0, 0x0d, 0x83, 0xc9, 0x9f, 0x0d, 0x83, 0xc1, + 0x9e, 0x0d, 0x83, 0xb8, 0xa6, 0x0d, 0x83, 0xb1, 0xa5, 0x0d, 0x83, 0xa9, + 0xa4, 0x0d, 0x83, 0xa1, 0xa3, 0x0d, 0x83, 0x99, 0xa2, 0x0d, 0x83, 0x91, + 0xa1, 0x0d, 0x83, 0x89, 0xa0, 0x0d, 0x83, 0x81, 0x9f, 0x0d, 0x83, 0x79, + 0x9e, 0x0d, 0x83, 0x70, 0x9f, 0x0d, 0x83, 0x19, 0x9e, 0x0d, 0x83, 0x11, + 0xa0, 0x0d, 0x83, 0x21, 0xa1, 0x0d, 0x83, 0x29, 0xa2, 0x0d, 0x83, 0x31, + 0xa3, 0x0d, 0x83, 0x39, 0xa4, 0x0d, 0x83, 0x40, 0xa1, 0x0d, 0x83, 0x09, + 0xa0, 0x0d, 0x83, 0x01, 0x9f, 0x0d, 0x82, 0xf9, 0x9e, 0x0d, 0x82, 0xf0, + 0x9e, 0x0d, 0x83, 0x49, 0x9f, 0x0d, 0x83, 0x51, 0xa0, 0x0d, 0x83, 0x59, + 0xa1, 0x0d, 0x83, 0x61, 0xc2, 0x00, 0xf1, 0x0d, 0x83, 0x68, 0xa4, 0x0d, + 0x82, 0xe9, 0xa3, 0x0d, 0x82, 0xe1, 0xa2, 0x0d, 0x82, 0xd9, 0xa1, 0x0d, + 0x82, 0xd1, 0xa0, 0x0d, 0x82, 0xc9, 0x9f, 0x0d, 0x82, 0xc1, 0x9e, 0x0d, + 0x82, 0xb8, 0xa2, 0x0d, 0x82, 0xb1, 0xa1, 0x0d, 0x82, 0xa9, 0xa0, 0x0d, + 0x82, 0xa1, 0x9f, 0x0d, 0x82, 0x99, 0x9e, 0x0d, 0x82, 0x90, 0xa5, 0x0d, + 0x82, 0x89, 0xa4, 0x0d, 0x82, 0x81, 0xa3, 0x0d, 0x82, 0x79, 0xa2, 0x0d, + 0x82, 0x71, 0xa1, 0x0d, 0x82, 0x69, 0xa0, 0x0d, 0x82, 0x61, 0x9f, 0x0d, + 0x82, 0x59, 0x9e, 0x0d, 0x82, 0x50, 0xa3, 0x0d, 0x82, 0x49, 0xa2, 0x0d, + 0x82, 0x41, 0xa1, 0x0d, 0x82, 0x39, 0xa0, 0x0d, 0x82, 0x31, 0x9f, 0x0d, + 0x82, 0x29, 0x9e, 0x0d, 0x82, 0x20, 0xa5, 0x0d, 0x82, 0x19, 0xa4, 0x0d, + 0x82, 0x11, 0xa3, 0x0d, 0x82, 0x09, 0xa2, 0x0d, 0x82, 0x01, 0xa1, 0x0d, + 0x81, 0xf9, 0xa0, 0x0d, 0x81, 0xf1, 0x9f, 0x0d, 0x81, 0xe9, 0x9e, 0x0d, + 0x81, 0xe0, 0xca, 0xa2, 0x7e, 0x07, 0xda, 0x79, 0x48, 0xb7, 0xf2, 0x41, + 0xbe, 0x2e, 0xc2, 0x00, 0x67, 0x00, 0x2f, 0x23, 0x01, 0xbe, 0x40, 0xc3, + 0xba, 0x37, 0x00, 0x2e, 0xdb, 0x01, 0xbe, 0x46, 0xc3, 0x0b, 0xc8, 0x00, + 0x2e, 0x8b, 0x01, 0xbe, 0x4c, 0xc3, 0x04, 0xac, 0x00, 0x2e, 0xab, 0x01, + 0xbe, 0x52, 0x16, 0xc1, 0xbe, 0x58, 0x15, 0xc1, 0xbe, 0x73, 0xc4, 0x5d, + 0xe2, 0x00, 0x2f, 0x43, 0x01, 0xbe, 0x85, 0xc3, 0xe5, 0x78, 0x00, 0x2f, + 0x3b, 0x01, 0xbe, 0x8b, 0x46, 0x26, 0xf7, 0xc1, 0xbe, 0x91, 0xc3, 0x20, + 0x18, 0x00, 0x2f, 0x03, 0x01, 0xbe, 0xb5, 0xc3, 0x00, 0x4e, 0x00, 0x2e, + 0xf3, 0x01, 0xbe, 0xbb, 0xc5, 0xa2, 0x83, 0x00, 0x2e, 0xe3, 0x01, 0xbe, + 0xc1, 0xc3, 0x4a, 0xb9, 0x00, 0x2e, 0xcb, 0x01, 0xbe, 0xc7, 0xc5, 0x4a, + 0xb3, 0x00, 0x2e, 0xb3, 0x01, 0xbe, 0xcd, 0xc2, 0x01, 0x7f, 0x00, 0x2e, + 0xa3, 0x01, 0xbe, 0xd3, 0xc5, 0x40, 0x9a, 0x00, 0x2e, 0x9b, 0x01, 0xbe, + 0xdd, 0xc5, 0x9c, 0xa2, 0x00, 0x2e, 0x93, 0x01, 0xbe, 0xe3, 0x03, 0xc1, + 0xbe, 0xe9, 0x45, 0x06, 0xa6, 0x41, 0xbe, 0xf3, 0xd4, 0x3d, 0xa4, 0x07, + 0xd8, 0xf1, 0x13, 0xc1, 0xbf, 0x23, 0x15, 0xc1, 0xbf, 0x32, 0xc4, 0xe4, + 0x8b, 0x00, 0x2d, 0xf9, 0xc5, 0xdb, 0x23, 0x00, 0x2d, 0xe9, 0xcf, 0x64, + 0xa4, 0x00, 0x2d, 0xe1, 0x0a, 0xc1, 0xbf, 0x42, 0xc5, 0x79, 0xbe, 0x00, + 0x2d, 0xb9, 0xc5, 0xd5, 0x7e, 0x00, 0x2d, 0xa8, 0x43, 0x09, 0x3b, 0xc1, + 0xbf, 0x57, 0xcb, 0x97, 0x7c, 0x00, 0x2e, 0x31, 0xc9, 0xae, 0xb2, 0x00, + 0x2e, 0x19, 0xc5, 0xd4, 0x16, 0x00, 0x2e, 0x01, 0xc5, 0xda, 0xa6, 0x00, + 0x2d, 0xf0, 0xc4, 0xe1, 0x23, 0x00, 0x2d, 0x71, 0x03, 0x41, 0xbf, 0x63, + 0xc3, 0x51, 0x3f, 0x00, 0x2d, 0x69, 0xc4, 0x40, 0xe8, 0x00, 0x2d, 0x38, + 0xcc, 0x89, 0x9d, 0x00, 0x2d, 0x51, 0xc3, 0x17, 0xc9, 0x00, 0x2c, 0xd0, + 0x07, 0xc1, 0xbf, 0x6f, 0xc5, 0xd5, 0x24, 0x00, 0x2c, 0xb0, 0xc3, 0x75, + 0x8b, 0x00, 0x2d, 0x41, 0xc9, 0xaf, 0xc9, 0x00, 0x2c, 0xf8, 0xc3, 0x15, + 0xe7, 0x00, 0x2d, 0x09, 0xc4, 0x56, 0x4f, 0x00, 0x2c, 0xc8, 0xc9, 0xb3, + 0xef, 0x00, 0x2c, 0x99, 0xc4, 0xa0, 0x89, 0x00, 0x2c, 0x90, 0xc3, 0x26, + 0x1a, 0x00, 0x2c, 0xe3, 0x01, 0xbf, 0x7b, 0xc6, 0xcb, 0x63, 0x00, 0x2c, + 0xf0, 0xc4, 0xde, 0xbb, 0x00, 0x2d, 0x19, 0xc7, 0xc3, 0x6f, 0x00, 0x2d, + 0x21, 0xc5, 0xdd, 0x35, 0x00, 0x2d, 0x2a, 0x01, 0xbf, 0x81, 0x05, 0xc1, + 0xbf, 0x87, 0xcf, 0x61, 0xb6, 0x02, 0x6e, 0x09, 0x03, 0xc1, 0xbf, 0x99, + 0xc6, 0xd2, 0xb3, 0x02, 0x6f, 0x21, 0x19, 0xc1, 0xbf, 0xa3, 0xd6, 0x2d, + 0xa4, 0x02, 0x6f, 0x99, 0xcf, 0x67, 0x56, 0x02, 0x6f, 0xa9, 0xcb, 0x92, + 0x1d, 0x02, 0x6f, 0xc1, 0xcb, 0x90, 0x39, 0x02, 0x6f, 0xc8, 0xd9, 0x1f, + 0x95, 0x02, 0x6e, 0x11, 0xc8, 0xbb, 0xf2, 0x02, 0x6f, 0xd0, 0xc9, 0xae, + 0xc4, 0x02, 0x6f, 0x39, 0xc6, 0xcc, 0x17, 0x02, 0x6f, 0x41, 0xc9, 0xb1, + 0x94, 0x02, 0x6f, 0xa0, 0xc5, 0xd5, 0x79, 0x02, 0x6e, 0x29, 0xca, 0x9e, + 0x50, 0x02, 0x6e, 0x98, 0xc6, 0xd3, 0x37, 0x02, 0x6e, 0x41, 0xcd, 0x7f, + 0xdb, 0x02, 0x6f, 0xe8, 0x44, 0x3e, 0x62, 0xc1, 0xbf, 0xaf, 0xc3, 0x00, + 0x88, 0x02, 0x6e, 0xa8, 0xc3, 0x05, 0x9f, 0x02, 0x6e, 0xb9, 0xc4, 0x07, + 0xc8, 0x02, 0x6f, 0x00, 0xc6, 0xcc, 0xb9, 0x02, 0x6e, 0xc1, 0xc8, 0xba, + 0x5a, 0x02, 0x6f, 0xe0, 0xc7, 0x12, 0x48, 0x02, 0x6f, 0x29, 0xc7, 0x50, + 0x25, 0x02, 0x6f, 0x70, 0xa1, 0x0f, 0xdb, 0xc1, 0x9f, 0x0f, 0xdb, 0xb1, + 0xa0, 0x0f, 0xdb, 0xb9, 0xa2, 0x0f, 0xdb, 0xc9, 0xa3, 0x0f, 0xdb, 0xd1, + 0xa4, 0x0f, 0xdb, 0xd9, 0xc4, 0xe1, 0x7b, 0x0f, 0xdc, 0x08, 0x45, 0x04, + 0x90, 0xc1, 0xbf, 0xb9, 0xc2, 0x00, 0xb1, 0x01, 0x00, 0xa8, 0xa6, 0x01, + 0x1d, 0xe9, 0xa4, 0x01, 0x1d, 0xe1, 0xa0, 0x01, 0x1d, 0xd9, 0x9e, 0x01, + 0x1d, 0xd0, 0x42, 0x00, 0x03, 0xc1, 0xbf, 0xc5, 0xcc, 0x89, 0x55, 0x0f, + 0xb5, 0x28, 0xc6, 0xce, 0x1b, 0x0f, 0x9e, 0x39, 0xc4, 0x00, 0x87, 0x0f, + 0xa1, 0xa0, 0xcb, 0x93, 0x0f, 0x0f, 0x9f, 0x09, 0xc8, 0x37, 0x8f, 0x0f, + 0x9f, 0x02, 0x01, 0xbf, 0xd4, 0xc4, 0xce, 0x15, 0x01, 0x34, 0x91, 0xc6, + 0xca, 0xb5, 0x01, 0x31, 0x69, 0xc6, 0xcf, 0x6b, 0x0f, 0xb7, 0x00, 0xc2, + 0x02, 0xa7, 0x0f, 0xc9, 0xf1, 0x89, 0x0f, 0xa2, 0xe0, 0xda, 0x1a, 0xb2, + 0x0f, 0xc8, 0xf1, 0xd8, 0x23, 0x7b, 0x0f, 0xd7, 0x80, 0xc4, 0x26, 0x78, + 0x08, 0x69, 0xc9, 0xc5, 0x06, 0xdb, 0x08, 0x69, 0xc1, 0x15, 0xc1, 0xbf, + 0xd8, 0x08, 0xc1, 0xbf, 0xe4, 0x16, 0xc1, 0xbf, 0xf0, 0xc3, 0x05, 0x14, + 0x08, 0x69, 0x89, 0xc4, 0x15, 0xe7, 0x08, 0x69, 0x80, 0x42, 0x01, 0x6f, + 0xc1, 0xbf, 0xfc, 0xc8, 0xbe, 0xea, 0x08, 0x69, 0x20, 0xc9, 0xaa, 0xb0, + 0x08, 0x69, 0x19, 0xc5, 0xd9, 0xe8, 0x08, 0x69, 0x10, 0x91, 0x08, 0x69, + 0x09, 0x87, 0x08, 0x69, 0x01, 0x97, 0x08, 0x68, 0xf9, 0x8b, 0x08, 0x68, + 0xf1, 0x83, 0x08, 0x68, 0xe8, 0xc2, 0x02, 0x41, 0x08, 0x68, 0xe1, 0x10, + 0xc1, 0xc0, 0x0e, 0x0d, 0xc1, 0xc0, 0x1e, 0xc2, 0x19, 0x2c, 0x08, 0x68, + 0xc1, 0xc2, 0x01, 0x4a, 0x08, 0x68, 0xb1, 0xc2, 0x01, 0xc3, 0x08, 0x68, + 0xa1, 0xc2, 0x00, 0xdb, 0x08, 0x68, 0x99, 0xc2, 0x01, 0x30, 0x08, 0x68, + 0x91, 0x14, 0xc1, 0xc0, 0x2e, 0x06, 0xc1, 0xc0, 0x38, 0xc2, 0x00, 0x87, + 0x08, 0x68, 0x49, 0xc2, 0x00, 0xd0, 0x08, 0x68, 0x39, 0xc2, 0x00, 0x64, + 0x08, 0x68, 0x31, 0xc2, 0x25, 0x3b, 0x08, 0x68, 0x29, 0x16, 0xc1, 0xc0, + 0x42, 0x83, 0x08, 0x68, 0x01, 0xc2, 0x01, 0x5d, 0x08, 0x68, 0x09, 0xc2, + 0x00, 0xb0, 0x08, 0x68, 0x11, 0xc2, 0x02, 0x1c, 0x08, 0x68, 0x71, 0x15, + 0x41, 0xc0, 0x4c, 0x97, 0x00, 0xb9, 0x99, 0x8b, 0x00, 0xb9, 0x90, 0xc2, + 0x00, 0xd0, 0x00, 0xb9, 0x89, 0xc2, 0x0d, 0xf6, 0x00, 0xb9, 0x81, 0xc2, + 0x01, 0x4a, 0x00, 0xb9, 0x79, 0xc2, 0x00, 0xdb, 0x00, 0xb9, 0x71, 0xc2, + 0x00, 0x39, 0x00, 0xb9, 0x69, 0xc2, 0x19, 0x2c, 0x00, 0xb9, 0x61, 0xc2, + 0x01, 0xc3, 0x00, 0xb9, 0x59, 0xc2, 0x01, 0x5d, 0x00, 0xb9, 0x51, 0xc2, + 0x00, 0xb0, 0x00, 0xb9, 0x49, 0x10, 0xc1, 0xc0, 0x56, 0xc2, 0x0e, 0x9a, + 0x00, 0xb9, 0x39, 0xc2, 0x01, 0x6f, 0x00, 0xb9, 0x31, 0xc2, 0x01, 0x30, + 0x00, 0xb9, 0x21, 0xc2, 0x02, 0x2b, 0x00, 0xb9, 0x19, 0x97, 0x00, 0xb9, + 0x11, 0x8b, 0x00, 0xb9, 0x09, 0x83, 0x00, 0xb9, 0x00, 0x49, 0xb0, 0x7d, + 0xc1, 0xc0, 0x60, 0x0c, 0xc1, 0xc0, 0xad, 0xd4, 0x3a, 0x5c, 0x01, 0x81, + 0x71, 0xd4, 0x3a, 0x34, 0x01, 0x81, 0x79, 0x47, 0x02, 0x0e, 0xc1, 0xc0, + 0xb9, 0xc6, 0x92, 0x0c, 0x01, 0x8b, 0x20, 0xc3, 0x05, 0x14, 0x01, 0x81, + 0x09, 0x16, 0xc1, 0xc1, 0x16, 0x08, 0xc1, 0xc1, 0x24, 0x15, 0xc1, 0xc1, + 0x30, 0xc5, 0x06, 0xdb, 0x01, 0x81, 0x41, 0xc4, 0x26, 0x78, 0x01, 0x81, + 0x48, 0xc3, 0x05, 0x14, 0x08, 0x47, 0xdb, 0x01, 0xc1, 0x3c, 0x16, 0xc1, + 0xc1, 0x42, 0xc4, 0x0d, 0x13, 0x08, 0x47, 0xe0, 0x16, 0xc1, 0xc1, 0x4e, + 0x15, 0xc1, 0xc1, 0x5a, 0xc4, 0xb9, 0x7e, 0x08, 0x47, 0x91, 0xc2, 0x00, + 0x67, 0x08, 0x47, 0x81, 0x03, 0xc1, 0xc1, 0x64, 0xc3, 0x20, 0x18, 0x08, + 0x47, 0x69, 0xc3, 0x00, 0x4e, 0x08, 0x47, 0x61, 0xc6, 0xcf, 0xd7, 0x08, + 0x47, 0x59, 0xc4, 0xe0, 0xe7, 0x08, 0x47, 0x51, 0xc4, 0x4a, 0xb9, 0x08, + 0x47, 0x49, 0xc2, 0x01, 0x7f, 0x08, 0x47, 0x23, 0x01, 0xc1, 0x70, 0xc4, + 0xdf, 0x07, 0x08, 0x47, 0x31, 0xc3, 0x7e, 0x89, 0x08, 0x47, 0x29, 0xcb, + 0x95, 0x8d, 0x08, 0x47, 0x19, 0xc5, 0x9c, 0xa2, 0x08, 0x47, 0x11, 0xc4, + 0xe3, 0x27, 0x08, 0x47, 0x08, 0xca, 0x3b, 0x06, 0x07, 0xfb, 0x29, 0x47, + 0x02, 0x0e, 0xc1, 0xc1, 0x76, 0xd1, 0x2f, 0xfb, 0x07, 0xfc, 0xf1, 0xd6, + 0x2f, 0xf6, 0x07, 0xfc, 0xf8, 0x0d, 0xc1, 0xc1, 0xb1, 0x15, 0xc1, 0xc1, + 0xc0, 0xc5, 0xd6, 0x8c, 0x07, 0xfd, 0x4b, 0x01, 0xc1, 0xcc, 0xc5, 0xda, + 0xe7, 0x07, 0xfd, 0x89, 0x12, 0xc1, 0xc1, 0xd0, 0x8b, 0x07, 0xfe, 0xe3, + 0x01, 0xc1, 0xdf, 0x05, 0xc1, 0xc1, 0xe5, 0x16, 0xc1, 0xc1, 0xf1, 0xc5, + 0x90, 0xe4, 0x07, 0xfd, 0xf1, 0x83, 0x07, 0xfe, 0x13, 0x01, 0xc1, 0xfd, + 0x1b, 0xc1, 0xc2, 0x01, 0x87, 0x07, 0xfe, 0x3b, 0x01, 0xc2, 0x1b, 0x91, + 0x07, 0xfe, 0x63, 0x01, 0xc2, 0x23, 0x19, 0xc1, 0xc2, 0x27, 0x97, 0x07, + 0xfe, 0x99, 0xc5, 0xd9, 0x61, 0x07, 0xfd, 0x22, 0x01, 0xc2, 0x39, 0xd1, + 0x4e, 0xd0, 0x0f, 0xb4, 0x28, 0x47, 0x78, 0xc0, 0x41, 0xc2, 0x3d, 0x45, + 0x03, 0x14, 0xc1, 0xc2, 0x49, 0x83, 0x01, 0x82, 0xa9, 0x8b, 0x01, 0x82, + 0xb9, 0x97, 0x01, 0x82, 0xc9, 0x87, 0x01, 0x82, 0xd9, 0x91, 0x01, 0x82, + 0xe8, 0x83, 0x01, 0x82, 0x59, 0x8b, 0x01, 0x82, 0x69, 0x97, 0x01, 0x82, + 0x79, 0x87, 0x01, 0x82, 0x89, 0x91, 0x01, 0x82, 0x98, 0x83, 0x01, 0x82, + 0x61, 0x8b, 0x01, 0x82, 0x71, 0x97, 0x01, 0x82, 0x81, 0x87, 0x01, 0x82, + 0x91, 0x91, 0x01, 0x82, 0xa0, 0x83, 0x01, 0x82, 0xb1, 0x8b, 0x01, 0x82, + 0xc1, 0x97, 0x01, 0x82, 0xd1, 0x87, 0x01, 0x82, 0xe1, 0x91, 0x01, 0x82, + 0xf0, 0x83, 0x01, 0x82, 0xf9, 0x8b, 0x01, 0x83, 0x09, 0x97, 0x01, 0x83, + 0x21, 0x87, 0x01, 0x83, 0x31, 0x91, 0x01, 0x83, 0x40, 0x83, 0x01, 0x83, + 0x01, 0x8b, 0x01, 0x83, 0x11, 0x97, 0x01, 0x83, 0x29, 0x87, 0x01, 0x83, + 0x39, 0x91, 0x01, 0x83, 0x48, 0x83, 0x01, 0x83, 0x51, 0x8b, 0x01, 0x83, + 0x59, 0x97, 0x01, 0x83, 0x61, 0x87, 0x01, 0x83, 0x69, 0x91, 0x01, 0x83, + 0x70, 0x83, 0x01, 0x83, 0x79, 0x8b, 0x01, 0x83, 0x91, 0x97, 0x01, 0x83, + 0xa9, 0x87, 0x01, 0x83, 0xc1, 0x91, 0x01, 0x83, 0xd8, 0x83, 0x01, 0x83, + 0x81, 0x8b, 0x01, 0x83, 0x99, 0x97, 0x01, 0x83, 0xb1, 0x87, 0x01, 0x83, + 0xc9, 0x91, 0x01, 0x83, 0xe0, 0x83, 0x01, 0x83, 0x89, 0x8b, 0x01, 0x83, + 0xa1, 0x97, 0x01, 0x83, 0xb9, 0x87, 0x01, 0x83, 0xd1, 0x91, 0x01, 0x83, + 0xe8, 0x83, 0x01, 0x83, 0xf1, 0x8b, 0x01, 0x83, 0xf9, 0x97, 0x01, 0x84, + 0x01, 0x87, 0x01, 0x84, 0x09, 0x91, 0x01, 0x84, 0x10, 0x83, 0x01, 0x84, + 0x21, 0x97, 0x01, 0x84, 0x31, 0x91, 0x01, 0x84, 0x40, 0x83, 0x01, 0x84, + 0x49, 0x8b, 0x01, 0x84, 0x51, 0x97, 0x01, 0x84, 0x59, 0x87, 0x01, 0x84, + 0x61, 0x91, 0x01, 0x84, 0x68, 0x83, 0x01, 0x84, 0x79, 0x8b, 0x01, 0x84, + 0x81, 0x87, 0x01, 0x84, 0x89, 0x91, 0x01, 0x84, 0x90, 0xc6, 0x1c, 0xb4, + 0x01, 0x02, 0x19, 0xce, 0x6b, 0x17, 0x01, 0x70, 0xd0, 0x45, 0x6b, 0x02, + 0xc1, 0xc2, 0x6f, 0xcc, 0x0d, 0x9e, 0x01, 0x2e, 0xc9, 0xc6, 0x1c, 0xb4, + 0x01, 0x2e, 0xc1, 0xcc, 0x01, 0xdb, 0x0f, 0xdc, 0x81, 0x42, 0x00, 0x58, + 0x41, 0xc2, 0x7b, 0xc9, 0x16, 0x2f, 0x01, 0x37, 0x39, 0x0e, 0xc1, 0xc2, + 0x81, 0xc8, 0xb5, 0x82, 0x01, 0x09, 0x39, 0xc8, 0xb9, 0x82, 0x01, 0x02, + 0xa1, 0xd0, 0x0f, 0x09, 0x00, 0x05, 0x09, 0xcd, 0x2c, 0xb2, 0x00, 0x05, + 0xf9, 0xcb, 0x10, 0xc9, 0x01, 0x70, 0xc0, 0xda, 0x1b, 0xb6, 0x01, 0x35, + 0x21, 0x51, 0x55, 0xda, 0x41, 0xc2, 0x90, 0x00, 0x41, 0xc2, 0xa2, 0xc9, + 0x57, 0x36, 0x01, 0x1d, 0x71, 0x45, 0x00, 0x8c, 0xc1, 0xc2, 0xb4, 0x03, + 0x41, 0xc2, 0xd8, 0x47, 0x34, 0x2f, 0xc1, 0xc2, 0xe4, 0x47, 0x02, 0x0e, + 0x41, 0xc2, 0xf7, 0x47, 0x34, 0x2f, 0xc1, 0xc3, 0x50, 0x47, 0x02, 0x0e, + 0x41, 0xc3, 0x63, 0xc5, 0x53, 0x93, 0x01, 0x09, 0xc9, 0x49, 0x1b, 0x0b, + 0x41, 0xc3, 0xc6, 0xd1, 0x31, 0xb3, 0x0f, 0xae, 0xd1, 0xc4, 0x05, 0x4b, + 0x01, 0x4f, 0x08, 0xd3, 0x41, 0x4b, 0x0f, 0x65, 0xa1, 0x47, 0x34, 0x2f, + 0xc1, 0xc3, 0xd6, 0xca, 0xa6, 0xc0, 0x0f, 0x65, 0x81, 0x49, 0x53, 0xa9, + 0xc1, 0xc4, 0x1b, 0xcb, 0x5f, 0x92, 0x0f, 0x65, 0x61, 0xc9, 0x41, 0x55, + 0x0f, 0x65, 0x00, 0xd5, 0x36, 0x08, 0x01, 0x4f, 0x28, 0x08, 0xc1, 0xc4, + 0x27, 0x16, 0xc1, 0xc4, 0x33, 0xc3, 0x05, 0x14, 0x0e, 0x9b, 0x90, 0xda, + 0x1b, 0x00, 0x01, 0x81, 0xb9, 0x4b, 0x19, 0xd1, 0x41, 0xc4, 0x3f, 0x48, + 0x0a, 0x53, 0xc1, 0xc4, 0x6f, 0x49, 0xb0, 0xb3, 0xc1, 0xc4, 0x7b, 0xcd, + 0x7e, 0x2e, 0x01, 0x7f, 0xa1, 0x4e, 0x71, 0xbc, 0xc1, 0xc4, 0x87, 0xc8, + 0x02, 0xf5, 0x01, 0x7f, 0xd8, 0xc7, 0xc2, 0x88, 0x01, 0x8c, 0x99, 0x0a, + 0xc1, 0xc4, 0x9d, 0xc7, 0xc5, 0xf3, 0x01, 0x8c, 0xb0, 0x43, 0x09, 0x9e, + 0xc1, 0xc4, 0xa9, 0xc9, 0xac, 0x2a, 0x01, 0x8c, 0xc8, 0xca, 0x9e, 0xfa, + 0x01, 0x8c, 0xb9, 0xc7, 0xc7, 0xa5, 0x01, 0x8c, 0xf8, 0x16, 0xc1, 0xc4, + 0xb5, 0xc3, 0x05, 0x14, 0x08, 0x42, 0xc2, 0x01, 0xc4, 0xc8, 0x16, 0xc1, + 0xc4, 0xcc, 0x15, 0xc1, 0xc4, 0xd8, 0x03, 0xc1, 0xc4, 0xe2, 0xc3, 0x20, + 0x18, 0x08, 0x42, 0x69, 0xc3, 0x00, 0x4e, 0x08, 0x42, 0x61, 0xc6, 0xcf, + 0xd7, 0x08, 0x42, 0x59, 0xc4, 0xe0, 0xe7, 0x08, 0x42, 0x51, 0xc4, 0x4a, + 0xb9, 0x08, 0x42, 0x49, 0xc2, 0x01, 0x7f, 0x08, 0x42, 0x23, 0x01, 0xc4, + 0xee, 0xc5, 0x4a, 0xb3, 0x08, 0x42, 0x31, 0xc3, 0x7e, 0x89, 0x08, 0x42, + 0x29, 0xc6, 0x40, 0x9a, 0x08, 0x42, 0x19, 0xc5, 0x9c, 0xa2, 0x08, 0x42, + 0x11, 0xc4, 0xe3, 0x27, 0x08, 0x42, 0x09, 0xc2, 0x00, 0x67, 0x08, 0x42, + 0x81, 0xc4, 0xb9, 0x7e, 0x08, 0x42, 0x91, 0xc4, 0x5d, 0xe2, 0x08, 0x42, + 0x98, 0xc7, 0xc9, 0x0a, 0x0f, 0xa2, 0xd1, 0xc3, 0x1c, 0xe4, 0x0f, 0xa2, + 0x91, 0xc6, 0xa8, 0xc4, 0x0f, 0xa3, 0x09, 0xc5, 0xd4, 0xf7, 0x0f, 0xa3, + 0x10, 0x45, 0xa6, 0x50, 0xc1, 0xc4, 0xf4, 0xc5, 0x02, 0xd2, 0x01, 0x2e, + 0x5b, 0x01, 0xc5, 0x2b, 0xd4, 0x3a, 0x0c, 0x01, 0x3f, 0x0b, 0x01, 0xc5, + 0x2f, 0xc8, 0xb8, 0x3a, 0x01, 0x33, 0x38, 0x07, 0xc1, 0xc5, 0x35, 0xd5, + 0x31, 0xc4, 0x0f, 0xad, 0x59, 0x11, 0x41, 0xc5, 0x3f, 0xca, 0x9d, 0x2e, + 0x0f, 0xc5, 0x69, 0xc3, 0x05, 0x14, 0x0f, 0xc5, 0x60, 0xc5, 0x0b, 0x0a, + 0x01, 0x2d, 0x0b, 0x01, 0xc5, 0x4b, 0xc7, 0x37, 0x27, 0x01, 0x38, 0x21, + 0xc9, 0xb0, 0x1a, 0x01, 0x33, 0x21, 0xc2, 0x05, 0x1d, 0x0f, 0x99, 0x1b, + 0x01, 0xc5, 0x4f, 0x0f, 0xc1, 0xc5, 0x53, 0xca, 0x50, 0x80, 0x01, 0x30, + 0xb1, 0xc3, 0x0e, 0x6b, 0x01, 0x30, 0x31, 0xc9, 0xb3, 0x83, 0x07, 0xf2, + 0x30, 0x03, 0xc1, 0xc5, 0x5f, 0x43, 0x00, 0x4a, 0xc1, 0xc5, 0x6b, 0x45, + 0x0a, 0xe1, 0x41, 0xc5, 0x75, 0xc6, 0x3a, 0x1a, 0x01, 0x2e, 0x3b, 0x01, + 0xc5, 0x7b, 0x48, 0xbe, 0x32, 0xc1, 0xc5, 0x7f, 0x43, 0x01, 0x47, 0x41, + 0xc5, 0x8b, 0x14, 0xc1, 0xc5, 0x97, 0xd7, 0x28, 0x5a, 0x01, 0x36, 0xb9, + 0xc8, 0x36, 0xb4, 0x01, 0x30, 0x79, 0xd2, 0x49, 0xaf, 0x0f, 0xab, 0xf0, + 0x0e, 0xc1, 0xc5, 0xa3, 0x4c, 0x0e, 0x55, 0xc1, 0xc5, 0xb0, 0xcc, 0x7d, + 0x5f, 0x01, 0x31, 0xc8, 0x44, 0x00, 0x2d, 0xc1, 0xc5, 0xbc, 0xc8, 0x46, + 0x71, 0x01, 0x2d, 0x68, 0x4a, 0x03, 0x3d, 0xc1, 0xc5, 0xc8, 0x4a, 0x01, + 0xa9, 0x41, 0xc5, 0xd4, 0x46, 0x01, 0xdc, 0xc1, 0xc5, 0xe9, 0xca, 0x9c, + 0x2a, 0x01, 0x5e, 0xe8, 0xcc, 0x88, 0x59, 0x01, 0x2d, 0x89, 0x42, 0x00, + 0xc4, 0x41, 0xc5, 0xf9, 0x46, 0x05, 0x87, 0xc1, 0xc6, 0x05, 0xce, 0x51, + 0x6a, 0x01, 0x58, 0xf0, 0xd5, 0x35, 0xde, 0x0f, 0xc4, 0x39, 0xd0, 0x35, + 0xe3, 0x0f, 0xc3, 0xf9, 0xd0, 0x5c, 0x32, 0x0f, 0xc3, 0x39, 0xca, 0x35, + 0xe9, 0x0f, 0xc3, 0x79, 0xd1, 0x50, 0x46, 0x0f, 0xc3, 0xb8, 0xd5, 0x35, + 0xde, 0x0f, 0xc4, 0x31, 0xd1, 0x50, 0x46, 0x0f, 0xc3, 0xb1, 0xca, 0x35, + 0xe9, 0x0f, 0xc3, 0x71, 0xd0, 0x5c, 0x32, 0x0f, 0xc3, 0x31, 0xd0, 0x35, + 0xe3, 0x0f, 0xc3, 0xf0, 0xd5, 0x35, 0xde, 0x0f, 0xc4, 0x29, 0xd1, 0x50, + 0x46, 0x0f, 0xc3, 0xa9, 0xca, 0x35, 0xe9, 0x0f, 0xc3, 0x69, 0xd0, 0x5c, + 0x32, 0x0f, 0xc3, 0x29, 0xd0, 0x35, 0xe3, 0x0f, 0xc3, 0xe8, 0xd5, 0x35, + 0xde, 0x0f, 0xc4, 0x21, 0xd1, 0x50, 0x46, 0x0f, 0xc3, 0xa1, 0xca, 0x35, + 0xe9, 0x0f, 0xc3, 0x61, 0xd0, 0x5c, 0x32, 0x0f, 0xc3, 0x21, 0xd0, 0x35, + 0xe3, 0x0f, 0xc3, 0xe0, 0xc5, 0xdc, 0xfe, 0x0f, 0x9c, 0x81, 0xcc, 0x87, + 0x15, 0x0f, 0x99, 0x60, 0xc6, 0xcc, 0x83, 0x0f, 0xb5, 0xf1, 0xc4, 0x51, + 0xb7, 0x0f, 0x98, 0x51, 0xc7, 0xc5, 0x75, 0x0f, 0xa0, 0x19, 0xc4, 0xe3, + 0xcf, 0x0f, 0xc9, 0xe8, 0xc4, 0x26, 0x78, 0x0f, 0x17, 0xc9, 0xc5, 0x06, + 0xdb, 0x0f, 0x17, 0xc1, 0x15, 0xc1, 0xc6, 0x17, 0x08, 0xc1, 0xc6, 0x23, + 0x16, 0xc1, 0xc6, 0x2f, 0xc3, 0x05, 0x14, 0x0f, 0x17, 0x89, 0xc4, 0x15, + 0xe7, 0x0f, 0x17, 0x80, 0xc3, 0xd8, 0x41, 0x0f, 0x17, 0x73, 0x01, 0xc6, + 0x3b, 0xc3, 0x12, 0xe0, 0x0f, 0x17, 0x62, 0x01, 0xc6, 0x41, 0x1b, 0xc1, + 0xc6, 0x47, 0x97, 0x0f, 0x16, 0xf3, 0x01, 0xc6, 0x51, 0x10, 0xc1, 0xc6, + 0x57, 0x83, 0x0f, 0x16, 0x0b, 0x01, 0xc6, 0x67, 0x87, 0x0f, 0x16, 0xdb, + 0x01, 0xc6, 0x78, 0x91, 0x0f, 0x16, 0xab, 0x01, 0xc6, 0x7c, 0x8b, 0x0f, + 0x16, 0xe3, 0x01, 0xc6, 0x83, 0x16, 0xc1, 0xc6, 0x89, 0x0e, 0xc1, 0xc6, + 0x9f, 0xc2, 0x00, 0xd0, 0x0f, 0x16, 0xd1, 0x0d, 0xc1, 0xc6, 0xa9, 0xc2, + 0x01, 0xc3, 0x0f, 0x16, 0xc1, 0xc2, 0x00, 0x39, 0x0f, 0x16, 0xb9, 0xc2, + 0x02, 0x41, 0x0f, 0x16, 0x99, 0xc2, 0x01, 0x4a, 0x0f, 0x16, 0x91, 0xc2, + 0x02, 0x1c, 0x0f, 0x16, 0x89, 0xc2, 0x25, 0x3b, 0x0f, 0x16, 0x81, 0x15, + 0xc1, 0xc6, 0xb3, 0xc2, 0x00, 0x87, 0x0f, 0x16, 0x69, 0x12, 0xc1, 0xc6, + 0xbd, 0xc2, 0x01, 0x30, 0x0f, 0x16, 0x29, 0xc2, 0x0e, 0x9a, 0x0f, 0x16, + 0x21, 0xc2, 0x00, 0x64, 0x0f, 0x16, 0x19, 0xc2, 0x01, 0x5d, 0x0f, 0x16, + 0x10, 0xc6, 0x2a, 0xfe, 0x08, 0xc7, 0x91, 0xc6, 0xcf, 0x9b, 0x08, 0xc7, + 0x89, 0x15, 0xc1, 0xc6, 0xc7, 0x08, 0xc1, 0xc6, 0xd3, 0x16, 0x41, 0xc6, + 0xdf, 0xc4, 0x26, 0x78, 0x08, 0xc7, 0x49, 0xc5, 0x06, 0xdb, 0x08, 0xc7, + 0x41, 0x15, 0xc1, 0xc6, 0xf1, 0x08, 0xc1, 0xc6, 0xfd, 0x16, 0xc1, 0xc7, + 0x09, 0xc3, 0x05, 0x14, 0x08, 0xc7, 0x09, 0xc4, 0x15, 0xe7, 0x08, 0xc7, + 0x00, 0xc4, 0xdf, 0x7f, 0x08, 0xc6, 0xf9, 0x15, 0xc1, 0xc7, 0x15, 0x0a, + 0xc1, 0xc7, 0x21, 0xc2, 0x05, 0x1c, 0x08, 0xc6, 0xc1, 0xc2, 0x02, 0xaa, + 0x08, 0xc6, 0xb9, 0x83, 0x08, 0xc6, 0x0b, 0x01, 0xc7, 0x31, 0xc2, 0x0e, + 0x9a, 0x08, 0xc6, 0xa1, 0x10, 0xc1, 0xc7, 0x3f, 0xc3, 0x02, 0x10, 0x08, + 0xc6, 0x91, 0x91, 0x08, 0xc6, 0x4b, 0x01, 0xc7, 0x4b, 0x87, 0x08, 0xc6, + 0x43, 0x01, 0xc7, 0x51, 0x17, 0xc1, 0xc7, 0x55, 0x1b, 0xc1, 0xc7, 0x5d, + 0xc2, 0x00, 0xe8, 0x08, 0xc6, 0x61, 0xc2, 0x01, 0x30, 0x08, 0xc6, 0x59, + 0xc2, 0x25, 0x9f, 0x08, 0xc6, 0x31, 0xc2, 0x00, 0x8c, 0x08, 0xc6, 0x10, + 0xc4, 0xdf, 0x7f, 0x08, 0xc5, 0xf9, 0x15, 0xc1, 0xc7, 0x6c, 0x0a, 0xc1, + 0xc7, 0x78, 0xc2, 0x05, 0x1c, 0x08, 0xc5, 0xc1, 0xc2, 0x02, 0xaa, 0x08, + 0xc5, 0xb9, 0x83, 0x08, 0xc5, 0x0b, 0x01, 0xc7, 0x88, 0xc2, 0x0e, 0x9a, + 0x08, 0xc5, 0xa1, 0x10, 0xc1, 0xc7, 0x96, 0xc3, 0x02, 0x10, 0x08, 0xc5, + 0x91, 0x91, 0x08, 0xc5, 0x4b, 0x01, 0xc7, 0xa2, 0x87, 0x08, 0xc5, 0x43, + 0x01, 0xc7, 0xa8, 0x17, 0xc1, 0xc7, 0xac, 0x1b, 0xc1, 0xc7, 0xb4, 0xc2, + 0x00, 0xe8, 0x08, 0xc5, 0x61, 0xc2, 0x01, 0x30, 0x08, 0xc5, 0x59, 0xc2, + 0x25, 0x9f, 0x08, 0xc5, 0x31, 0xc2, 0x00, 0x8c, 0x08, 0xc5, 0x10, 0xc3, + 0x02, 0x6e, 0x01, 0x18, 0x39, 0xc7, 0x80, 0x2f, 0x07, 0xf2, 0x78, 0xc5, + 0x00, 0x2c, 0x01, 0x49, 0x99, 0xc4, 0x00, 0x49, 0x01, 0x59, 0xf8, 0xcf, + 0x1b, 0x25, 0x01, 0x02, 0xa9, 0xcc, 0x8c, 0x19, 0x0f, 0x9d, 0xa0, 0x05, + 0xc1, 0xc7, 0xc3, 0xd7, 0x15, 0x2e, 0x01, 0x39, 0x19, 0xd8, 0x21, 0x23, + 0x01, 0x39, 0x11, 0x44, 0x05, 0x18, 0xc1, 0xc7, 0xcf, 0xcb, 0x8d, 0xdc, + 0x0f, 0x9a, 0x01, 0xd2, 0x22, 0x49, 0x0f, 0xbe, 0x30, 0xcb, 0x93, 0x5c, + 0x0f, 0x9b, 0xe8, 0x00, 0xc1, 0xc7, 0xdb, 0xc9, 0xab, 0xd9, 0x0f, 0xb1, + 0xb0, 0xd7, 0x29, 0x6e, 0x0f, 0xb0, 0x59, 0xd0, 0x59, 0x32, 0x0f, 0xb1, + 0x88, 0xdf, 0x0d, 0x9b, 0x01, 0x36, 0xf1, 0x49, 0x0d, 0x20, 0x41, 0xc8, + 0x24, 0xe0, 0x06, 0x87, 0x01, 0x3d, 0x60, 0xc9, 0xb2, 0xa2, 0x0f, 0x98, + 0xe9, 0xc6, 0x00, 0x91, 0x0f, 0x98, 0xa8, 0xca, 0x5d, 0xa2, 0x07, 0xf8, + 0x19, 0xc7, 0x68, 0xc6, 0x07, 0xff, 0x10, 0xc7, 0x0b, 0x00, 0x07, 0xf8, + 0x51, 0xc8, 0x36, 0x21, 0x07, 0xf8, 0x31, 0xc9, 0x2d, 0x85, 0x07, 0xf8, + 0x38, 0x45, 0x09, 0x98, 0xc1, 0xc8, 0x30, 0xca, 0x99, 0x61, 0x07, 0xf8, + 0x20, 0x11, 0xc1, 0xc8, 0x54, 0xd0, 0x08, 0xf7, 0x07, 0xf9, 0xf1, 0xc8, + 0x8e, 0x16, 0x07, 0xff, 0x00, 0xc8, 0x52, 0x00, 0x07, 0xf8, 0xd9, 0xc6, + 0x27, 0x5e, 0x07, 0xf8, 0x78, 0x07, 0xc1, 0xc8, 0x60, 0x45, 0x0b, 0x12, + 0xc1, 0xc8, 0x6c, 0xc7, 0x80, 0x2f, 0x07, 0xf9, 0xf8, 0xca, 0x0e, 0xbe, + 0x07, 0xf8, 0xe9, 0xcf, 0x0f, 0x0a, 0x07, 0xf8, 0x08, 0xcf, 0x54, 0xbb, + 0x07, 0xf8, 0xf1, 0xca, 0x1f, 0x0e, 0x07, 0xfa, 0x00, 0xcb, 0x2c, 0xb4, + 0x07, 0xf8, 0xf9, 0xcc, 0x01, 0xbb, 0x07, 0xf8, 0x10, 0xce, 0x61, 0xd5, + 0x07, 0xf9, 0xe1, 0x45, 0x00, 0x2d, 0x41, 0xc8, 0x78, 0xc9, 0x9f, 0xc3, + 0x07, 0xff, 0x09, 0xcb, 0x8e, 0x13, 0x07, 0xf8, 0x29, 0xc8, 0x60, 0xf4, + 0x07, 0xf8, 0x58, 0x00, 0x41, 0xc8, 0x90, 0xc9, 0xa8, 0x28, 0x0f, 0x9c, + 0x39, 0x95, 0x0f, 0x9c, 0x30, 0xc5, 0x91, 0x52, 0x0f, 0xb4, 0x91, 0xcb, + 0x92, 0xf9, 0x0f, 0xcf, 0x78, 0x49, 0xb2, 0xcf, 0xc1, 0xc8, 0x9c, 0xc2, + 0x00, 0xac, 0x0b, 0x7a, 0x50, 0x44, 0x1a, 0xce, 0xc1, 0xc8, 0xa8, 0x15, + 0xc1, 0xc8, 0xc4, 0x87, 0x0b, 0x7a, 0x41, 0x42, 0x07, 0x26, 0xc1, 0xc8, + 0xd8, 0xc2, 0x01, 0x6f, 0x0b, 0x78, 0x71, 0x83, 0x0b, 0x78, 0x50, 0x83, + 0x0b, 0x78, 0x83, 0x01, 0xc8, 0xe2, 0x1b, 0xc1, 0xc8, 0xe8, 0x09, 0xc1, + 0xc8, 0xf2, 0x10, 0xc1, 0xc8, 0xfc, 0xc2, 0x00, 0xd0, 0x0b, 0x78, 0x88, + 0x1c, 0xc1, 0xc9, 0x06, 0x42, 0x07, 0x26, 0xc1, 0xc9, 0x1c, 0xc2, 0x0e, + 0x9a, 0x0b, 0x78, 0x79, 0x83, 0x0b, 0x78, 0x58, 0xc2, 0x16, 0x5a, 0x0b, + 0x7a, 0x31, 0x83, 0x0b, 0x79, 0xd1, 0xc2, 0x0d, 0xf6, 0x0b, 0x79, 0xa1, + 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0x98, 0xc2, 0x00, 0x2c, 0x0b, 0x7a, 0x29, + 0x83, 0x0b, 0x78, 0x08, 0xc2, 0x00, 0xd0, 0x0b, 0x7a, 0x21, 0x83, 0x0b, + 0x79, 0x30, 0x8a, 0x0b, 0x7a, 0x19, 0x47, 0x78, 0xc0, 0x41, 0xc9, 0x26, + 0x1c, 0xc1, 0xc9, 0x36, 0x15, 0xc1, 0xc9, 0x44, 0x83, 0x0b, 0x79, 0xd9, + 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0xa8, 0x16, 0xc1, 0xc9, 0x4e, 0xc4, 0xe2, + 0x83, 0x0b, 0x79, 0x89, 0xc2, 0x02, 0x2b, 0x0b, 0x79, 0x01, 0xc3, 0x3a, + 0x09, 0x0b, 0x78, 0x91, 0xc2, 0x00, 0xb0, 0x0b, 0x78, 0x10, 0x0a, 0xc1, + 0xc9, 0x5c, 0x83, 0x0b, 0x78, 0xf8, 0xc2, 0x01, 0x30, 0x0b, 0x79, 0x11, + 0x83, 0x0b, 0x79, 0x08, 0x0a, 0xc1, 0xc9, 0x66, 0xc2, 0x19, 0x2c, 0x0b, + 0x78, 0xb9, 0x83, 0x0b, 0x78, 0xb0, 0xc2, 0x00, 0x87, 0x0b, 0x78, 0x49, + 0x83, 0x0b, 0x78, 0x40, 0xc2, 0x00, 0xd0, 0x0b, 0x78, 0x29, 0x83, 0x0b, + 0x78, 0x20, 0xc2, 0x00, 0xdb, 0x0b, 0x78, 0x19, 0x83, 0x0b, 0x78, 0x00, + 0x8b, 0x0b, 0x7c, 0x39, 0xc2, 0x13, 0x38, 0x0b, 0x7b, 0xf9, 0xc2, 0x00, + 0x75, 0x0b, 0x7b, 0x81, 0xc2, 0x06, 0xdb, 0x0b, 0x7b, 0x79, 0x97, 0x0b, + 0x7b, 0x71, 0x83, 0x0b, 0x7b, 0x5a, 0x01, 0xc9, 0x70, 0x91, 0x0b, 0x7b, + 0x2b, 0x01, 0xc9, 0x77, 0x89, 0x0b, 0x7c, 0x21, 0xc2, 0x00, 0x75, 0x0b, + 0x7b, 0x49, 0x97, 0x0b, 0x7b, 0x41, 0x8b, 0x0b, 0x7b, 0x39, 0x87, 0x0b, + 0x7b, 0x31, 0x83, 0x0b, 0x7b, 0x12, 0x01, 0xc9, 0x7d, 0x83, 0x0b, 0x7c, + 0x29, 0x8b, 0x0b, 0x7b, 0xd1, 0x94, 0x0b, 0x7b, 0xbb, 0x01, 0xc9, 0x84, + 0x90, 0x0b, 0x7a, 0xf2, 0x01, 0xc9, 0x88, 0x07, 0xc1, 0xc9, 0x8c, 0x89, + 0x0b, 0x7c, 0x09, 0x97, 0x0b, 0x7b, 0xe1, 0x91, 0x0b, 0x7a, 0xd0, 0xc2, + 0x03, 0xd4, 0x0b, 0x7c, 0x01, 0x8b, 0x0b, 0x7b, 0x90, 0x89, 0x0b, 0x7b, + 0xf0, 0x97, 0x0b, 0x7b, 0xd9, 0x8b, 0x0b, 0x7b, 0xc9, 0x87, 0x0b, 0x7b, + 0x9b, 0x01, 0xc9, 0x94, 0x90, 0x0b, 0x7a, 0xbb, 0x01, 0xc9, 0x98, 0xc2, + 0x61, 0x75, 0x0b, 0x7a, 0xb1, 0x83, 0x0b, 0x7a, 0xa8, 0x94, 0x0b, 0x7b, + 0xb0, 0x91, 0x0b, 0x7a, 0xd8, 0xca, 0xa1, 0x84, 0x0b, 0x7a, 0x99, 0xc7, + 0xc1, 0x62, 0x0b, 0x7a, 0x90, 0xc5, 0x1e, 0xc8, 0x01, 0x12, 0x11, 0xc4, + 0x00, 0xba, 0x01, 0x10, 0x92, 0x01, 0xc9, 0x9c, 0x4e, 0x75, 0x20, 0xc1, + 0xc9, 0xa0, 0xcb, 0x58, 0xc7, 0x0f, 0xbd, 0x19, 0x46, 0x01, 0xfc, 0xc1, + 0xc9, 0xac, 0x04, 0xc1, 0xc9, 0xb8, 0x45, 0x00, 0x2c, 0xc1, 0xc9, 0xc4, + 0x44, 0x00, 0x49, 0xc1, 0xc9, 0xce, 0x08, 0xc1, 0xc9, 0xd8, 0xcc, 0x07, + 0xbb, 0x01, 0x3a, 0xc9, 0x15, 0xc1, 0xc9, 0xea, 0xd2, 0x4c, 0x91, 0x01, + 0x02, 0xf9, 0x46, 0x0f, 0x88, 0x41, 0xca, 0x02, 0xc5, 0x0a, 0x8a, 0x01, + 0x72, 0x61, 0xd0, 0x0f, 0x09, 0x01, 0x72, 0x99, 0xcd, 0x2c, 0xb2, 0x01, + 0x72, 0xa0, 0xca, 0x9c, 0x70, 0x0b, 0x74, 0xc9, 0x4c, 0x29, 0xba, 0x41, + 0xca, 0x0e, 0xc4, 0x0a, 0x8b, 0x0b, 0x74, 0xb9, 0x4e, 0x0b, 0x18, 0x41, + 0xca, 0x88, 0x16, 0xc1, 0xcb, 0x02, 0xc3, 0x05, 0x14, 0x0b, 0x74, 0x0b, + 0x01, 0xcb, 0x14, 0xc4, 0x26, 0x78, 0x0b, 0x74, 0x49, 0xc5, 0x06, 0xdb, + 0x0b, 0x74, 0x41, 0x15, 0xc1, 0xcb, 0x1a, 0x08, 0xc1, 0xcb, 0x26, 0xc4, + 0x15, 0xe7, 0x0b, 0x74, 0x00, 0xc8, 0x4b, 0x5f, 0x0b, 0x74, 0x99, 0x07, + 0xc1, 0xcb, 0x32, 0x15, 0xc1, 0xcb, 0x3e, 0x08, 0xc1, 0xcb, 0x4a, 0x16, + 0x41, 0xcb, 0x56, 0xc8, 0xb5, 0x5a, 0x01, 0x1e, 0xc1, 0xc6, 0xcd, 0xe5, + 0x01, 0x1e, 0xb9, 0x4a, 0x9b, 0x12, 0x41, 0xcb, 0x68, 0xca, 0x9c, 0x16, + 0x01, 0x1e, 0xa1, 0xc5, 0x2e, 0xee, 0x01, 0x1e, 0x90, 0x1d, 0xc1, 0xcb, + 0x74, 0x1e, 0x41, 0xcb, 0x9c, 0xc3, 0x05, 0x14, 0x0f, 0x46, 0x39, 0x16, + 0xc1, 0xcb, 0xc4, 0x08, 0xc1, 0xcb, 0xd0, 0x15, 0xc1, 0xcb, 0xdc, 0xc5, + 0x06, 0xdb, 0x0f, 0x46, 0x71, 0xc4, 0x26, 0x78, 0x0f, 0x46, 0x78, 0x16, + 0xc1, 0xcb, 0xe8, 0x47, 0x0d, 0x04, 0xc1, 0xcb, 0xf2, 0xc8, 0x33, 0xee, + 0x0f, 0x46, 0xb0, 0x49, 0x53, 0xa9, 0xc1, 0xcb, 0xfc, 0x47, 0x34, 0x2f, + 0xc1, 0xcc, 0x18, 0x0e, 0x41, 0xcc, 0x3f, 0xcb, 0x91, 0x99, 0x08, 0x4c, + 0xf3, 0x01, 0xcc, 0x4b, 0x47, 0x02, 0x0e, 0x41, 0xcc, 0x51, 0x00, 0x41, + 0xcc, 0xb3, 0xc2, 0x02, 0xa0, 0x05, 0x5f, 0x91, 0xc4, 0x02, 0xde, 0x05, + 0x5f, 0x98, 0xc3, 0x09, 0x9e, 0x05, 0x5f, 0xa1, 0xc3, 0x0d, 0x14, 0x05, + 0x5f, 0xa8, 0xc2, 0x22, 0xcc, 0x05, 0x5f, 0xb1, 0xc4, 0x18, 0x10, 0x05, + 0x5f, 0xb8, 0xc4, 0xe4, 0x73, 0x05, 0x5f, 0x51, 0xc7, 0xc6, 0x16, 0x05, + 0x5f, 0x49, 0xc5, 0xd5, 0x3d, 0x05, 0x5f, 0x31, 0x03, 0xc1, 0xcc, 0xbf, + 0x0b, 0xc1, 0xcc, 0xcd, 0xc4, 0xbd, 0x08, 0x05, 0x5f, 0x19, 0xc7, 0x40, + 0xe5, 0x05, 0x57, 0xa9, 0x17, 0xc1, 0xcc, 0xd7, 0xc6, 0xce, 0x4b, 0x05, + 0x5f, 0x38, 0x8b, 0x05, 0x5e, 0x7b, 0x01, 0xcc, 0xe1, 0x10, 0xc1, 0xcc, + 0xe7, 0x16, 0xc1, 0xcd, 0x03, 0x12, 0xc1, 0xcd, 0x16, 0x0d, 0xc1, 0xcd, + 0x23, 0x04, 0xc1, 0xcd, 0x32, 0x06, 0xc1, 0xcd, 0x3c, 0x09, 0xc1, 0xcd, + 0x4c, 0x15, 0xc1, 0xcd, 0x58, 0x42, 0x11, 0xee, 0xc1, 0xcd, 0x6a, 0x91, + 0x05, 0x57, 0x09, 0x87, 0x05, 0x57, 0x01, 0xc3, 0x18, 0x95, 0x05, 0x5e, + 0xa1, 0xc5, 0xd5, 0x92, 0x05, 0x5e, 0x89, 0xc2, 0x05, 0x1d, 0x05, 0x5e, + 0x71, 0xc3, 0xcc, 0x38, 0x05, 0x5e, 0x69, 0xc4, 0xb0, 0x02, 0x05, 0x5e, + 0x61, 0xc3, 0x27, 0x01, 0x05, 0x5e, 0x1b, 0x01, 0xcd, 0x74, 0xc3, 0x02, + 0xf9, 0x05, 0x5e, 0x13, 0x01, 0xcd, 0x7a, 0xc3, 0x0c, 0x26, 0x05, 0x5e, + 0x59, 0x0c, 0x41, 0xcd, 0x80, 0xc7, 0xc0, 0x82, 0x0f, 0xb7, 0xa9, 0xc4, + 0xd0, 0x81, 0x0f, 0xb7, 0x28, 0x00, 0x41, 0xcd, 0x8c, 0xc4, 0x00, 0x87, + 0x0f, 0xa1, 0x69, 0xc4, 0xd0, 0xf1, 0x0f, 0xd5, 0x20, 0xc5, 0x61, 0xc0, + 0x0e, 0x98, 0x01, 0x1b, 0x41, 0xcd, 0x9e, 0x46, 0x45, 0x87, 0xc1, 0xcd, + 0xaa, 0xd9, 0x1e, 0x69, 0x08, 0xb3, 0x19, 0xcf, 0x62, 0x5b, 0x00, 0xc0, + 0x30, 0xca, 0x01, 0x28, 0x08, 0xb3, 0x4b, 0x01, 0xcd, 0xb0, 0xdc, 0x14, + 0x85, 0x00, 0xc0, 0x38, 0xd5, 0x01, 0x32, 0x08, 0xb3, 0x40, 0x46, 0x00, + 0x8b, 0x41, 0xcd, 0xb6, 0x46, 0x00, 0x8b, 0x41, 0xcd, 0xc2, 0xd9, 0x1e, + 0x9b, 0x08, 0xb3, 0x11, 0x45, 0x09, 0x98, 0x41, 0xcd, 0xce, 0xc2, 0x01, + 0xc3, 0x00, 0xc1, 0x73, 0x01, 0xcd, 0xf2, 0x83, 0x00, 0xc1, 0x03, 0x01, + 0xcd, 0xf8, 0x16, 0xc1, 0xce, 0x04, 0x42, 0x11, 0xee, 0xc1, 0xce, 0x14, + 0x15, 0xc1, 0xce, 0x1f, 0x1c, 0xc1, 0xce, 0x2f, 0x0e, 0xc1, 0xce, 0x3f, + 0xc3, 0x39, 0x6e, 0x00, 0xc1, 0xf1, 0x0d, 0xc1, 0xce, 0x49, 0xc2, 0x00, + 0x87, 0x00, 0xc1, 0xc9, 0xc2, 0x01, 0x4a, 0x00, 0xc1, 0xc1, 0xc2, 0x00, + 0x39, 0x00, 0xc1, 0xb9, 0xc2, 0x19, 0x2c, 0x00, 0xc1, 0xb1, 0xc2, 0x25, + 0x3b, 0x00, 0xc1, 0xa9, 0xc2, 0x0e, 0x9a, 0x00, 0xc1, 0x99, 0xc2, 0x01, + 0x30, 0x00, 0xc1, 0x69, 0xc2, 0x0f, 0x9a, 0x00, 0xc1, 0x61, 0xc2, 0x00, + 0xb0, 0x00, 0xc1, 0x59, 0xc2, 0x01, 0x5d, 0x00, 0xc1, 0x51, 0xc2, 0x00, + 0xc1, 0x00, 0xc1, 0x41, 0x87, 0x00, 0xc1, 0x0b, 0x01, 0xce, 0x53, 0x97, + 0x00, 0xc1, 0x23, 0x01, 0xce, 0x57, 0x91, 0x00, 0xc1, 0x1b, 0x01, 0xce, + 0x5b, 0x8b, 0x00, 0xc1, 0x10, 0x57, 0x28, 0x43, 0xc1, 0xce, 0x5f, 0xc8, + 0x3b, 0x7a, 0x00, 0xc0, 0x29, 0xc8, 0x11, 0xf7, 0x00, 0xc0, 0x18, 0xc9, + 0x11, 0xf6, 0x00, 0xc0, 0x49, 0xc5, 0x0a, 0x8a, 0x00, 0xc0, 0x40, 0xc3, + 0x0d, 0xe5, 0x00, 0xc0, 0x21, 0xc3, 0x0a, 0x8c, 0x00, 0xc0, 0x10, 0xca, + 0xa0, 0xf8, 0x0f, 0xa5, 0xc1, 0xc3, 0x32, 0x20, 0x0f, 0xa5, 0x80, 0x06, + 0xc1, 0xce, 0x6f, 0x45, 0x00, 0xba, 0xc1, 0xce, 0x81, 0xd1, 0x50, 0xce, + 0x08, 0xb2, 0x19, 0x4b, 0x6f, 0xc7, 0xc1, 0xce, 0x91, 0x47, 0x02, 0x0e, + 0x41, 0xce, 0xb1, 0x47, 0x02, 0x0e, 0xc1, 0xcf, 0x16, 0xd9, 0x1d, 0x88, + 0x05, 0x5a, 0xd8, 0x48, 0x0b, 0x17, 0xc1, 0xcf, 0x5c, 0x12, 0xc1, 0xcf, + 0xfd, 0xca, 0x9c, 0xac, 0x0e, 0xb8, 0xd1, 0xcc, 0x8b, 0x65, 0x0e, 0xb8, + 0xc1, 0xcc, 0x89, 0xfd, 0x0e, 0xb8, 0xb9, 0xce, 0x10, 0x3e, 0x0e, 0xb8, + 0xb1, 0x46, 0x03, 0x13, 0xc1, 0xd0, 0x0f, 0xc5, 0xdb, 0xf0, 0x0e, 0xb7, + 0xd8, 0x15, 0xc1, 0xd0, 0xaf, 0x46, 0x09, 0x97, 0xc1, 0xd0, 0xbb, 0x48, + 0x0b, 0x17, 0xc1, 0xd0, 0xdf, 0x47, 0xc7, 0x4a, 0xc1, 0xd1, 0x80, 0x12, + 0xc1, 0xd1, 0xae, 0xca, 0x9c, 0xac, 0x0e, 0xb7, 0x01, 0xcc, 0x8b, 0x65, + 0x0e, 0xb6, 0xf1, 0xcc, 0x89, 0xfd, 0x0e, 0xb6, 0xe9, 0xce, 0x10, 0x3e, + 0x0e, 0xb6, 0xe1, 0xc5, 0xdb, 0xf0, 0x0e, 0xb6, 0x09, 0x48, 0xbd, 0x42, + 0x41, 0xd1, 0xc0, 0x46, 0x09, 0x97, 0xc1, 0xd1, 0xcc, 0x46, 0x03, 0x13, + 0xc1, 0xd1, 0xf0, 0x48, 0x0b, 0x17, 0x41, 0xd2, 0x58, 0x4a, 0x43, 0x55, + 0xc1, 0xd2, 0xc0, 0x46, 0x07, 0x2f, 0x41, 0xd2, 0xde, 0x46, 0x09, 0x97, + 0xc1, 0xd2, 0xea, 0x46, 0x03, 0x13, 0xc1, 0xd3, 0x0e, 0x48, 0x0b, 0x17, + 0x41, 0xd3, 0x76, 0x47, 0xbd, 0x43, 0xc1, 0xd3, 0xc2, 0xcf, 0x35, 0x0c, + 0x01, 0x3e, 0x68, 0x44, 0x00, 0x2e, 0xc1, 0xd3, 0xce, 0xcd, 0x27, 0x2f, + 0x01, 0x3e, 0x58, 0xd5, 0x35, 0x36, 0x01, 0x3f, 0x71, 0x46, 0x01, 0xfc, + 0xc1, 0xd3, 0xe6, 0xd4, 0x38, 0xf4, 0x01, 0x3f, 0x51, 0xcd, 0x0b, 0x91, + 0x01, 0x3f, 0x40, 0xc3, 0x03, 0x26, 0x0e, 0x97, 0x90, 0xc4, 0x14, 0x09, + 0x0e, 0x97, 0x88, 0xc4, 0x14, 0x09, 0x0e, 0x97, 0x80, 0xc5, 0x14, 0x08, + 0x0e, 0x97, 0x79, 0xc2, 0x00, 0x5f, 0x0e, 0x97, 0x28, 0xc4, 0x14, 0x09, + 0x0e, 0x97, 0x70, 0xc6, 0x52, 0xcd, 0x0e, 0x97, 0x69, 0xc3, 0x02, 0xdf, + 0x0e, 0x97, 0x18, 0xc4, 0x22, 0x44, 0x0e, 0x97, 0x61, 0x91, 0x0e, 0x97, + 0x10, 0xc2, 0x19, 0x2c, 0x08, 0xf7, 0x59, 0x83, 0x08, 0xf7, 0x41, 0xc2, + 0x01, 0x30, 0x08, 0xf7, 0x10, 0xc4, 0x26, 0x78, 0x08, 0xea, 0xc9, 0xc5, + 0x06, 0xdb, 0x08, 0xea, 0xc1, 0x15, 0xc1, 0xd3, 0xf2, 0x08, 0xc1, 0xd3, + 0xfe, 0x16, 0xc1, 0xd4, 0x0a, 0xc3, 0x05, 0x14, 0x08, 0xea, 0x89, 0xc4, + 0x15, 0xe7, 0x08, 0xea, 0x80, 0xc6, 0xd1, 0x39, 0x08, 0xea, 0x39, 0xc4, + 0xbb, 0x54, 0x08, 0xea, 0x30, 0xc5, 0x1e, 0x96, 0x08, 0xea, 0x29, 0x4a, + 0x6f, 0xc8, 0x41, 0xd4, 0x16, 0xc7, 0xc3, 0xa7, 0x08, 0xea, 0x21, 0xc6, + 0x1e, 0x89, 0x08, 0xea, 0x19, 0xc5, 0x33, 0x5d, 0x08, 0xea, 0x11, 0xc7, + 0x40, 0xe5, 0x08, 0xea, 0x09, 0xc8, 0x14, 0x38, 0x08, 0xea, 0x00, 0x16, + 0xc1, 0xd4, 0x36, 0x0c, 0xc1, 0xd4, 0x4a, 0x0d, 0xc1, 0xd4, 0x5a, 0x0e, + 0xc1, 0xd4, 0x6a, 0xc2, 0x00, 0xd0, 0x08, 0xe9, 0x61, 0x15, 0xc1, 0xd4, + 0x74, 0xc2, 0x02, 0x41, 0x08, 0xe9, 0x41, 0xc2, 0x00, 0x39, 0x08, 0xe9, + 0x31, 0xc2, 0x19, 0x2c, 0x08, 0xe9, 0x29, 0xc2, 0x01, 0xc3, 0x08, 0xe9, + 0x21, 0x04, 0xc1, 0xd4, 0x84, 0x12, 0xc1, 0xd4, 0x8e, 0x10, 0xc1, 0xd4, + 0x98, 0x06, 0xc1, 0xd4, 0xae, 0x05, 0xc1, 0xd4, 0xbc, 0x09, 0xc1, 0xd4, + 0xc6, 0x83, 0x08, 0xe8, 0x03, 0x01, 0xd4, 0xd0, 0x91, 0x08, 0xe8, 0x49, + 0x87, 0x08, 0xe8, 0x31, 0x97, 0x08, 0xe8, 0x23, 0x01, 0xd4, 0xdc, 0x8b, + 0x08, 0xe8, 0x12, 0x01, 0xd4, 0xe0, 0x44, 0x00, 0xbb, 0xc1, 0xd4, 0xe4, + 0x50, 0x5c, 0xf2, 0x41, 0xd4, 0xf0, 0x91, 0x08, 0xe5, 0xa1, 0x87, 0x08, + 0xe5, 0x99, 0x97, 0x08, 0xe5, 0x91, 0x8b, 0x08, 0xe5, 0x89, 0xc2, 0x04, + 0xc6, 0x08, 0xe5, 0x80, 0x83, 0x08, 0xe4, 0x79, 0xc2, 0x00, 0xd0, 0x08, + 0xe4, 0x71, 0x15, 0xc1, 0xd5, 0x4a, 0xc2, 0x00, 0xdb, 0x08, 0xe4, 0x59, + 0xc2, 0x00, 0x39, 0x08, 0xe4, 0x51, 0xc2, 0x19, 0x2c, 0x08, 0xe4, 0x49, + 0xc2, 0x00, 0x02, 0x08, 0xe4, 0x41, 0x1c, 0xc1, 0xd5, 0x54, 0xc2, 0x01, + 0x4a, 0x08, 0xe4, 0x29, 0x06, 0xc1, 0xd5, 0x5e, 0x16, 0xc1, 0xd5, 0x68, + 0xc2, 0x01, 0xc3, 0x08, 0xe4, 0x09, 0xc2, 0x01, 0x5d, 0x08, 0xe4, 0x01, + 0x12, 0xc1, 0xd5, 0x76, 0x10, 0xc1, 0xd5, 0x80, 0xc2, 0x25, 0x3b, 0x08, + 0xe3, 0xc1, 0x05, 0xc1, 0xd5, 0x90, 0xc2, 0x01, 0x30, 0x08, 0xe3, 0xa1, + 0x0d, 0x41, 0xd5, 0x9a, 0xd8, 0x20, 0xf3, 0x01, 0x35, 0x39, 0xc4, 0x00, + 0xba, 0x01, 0x35, 0x30, 0x05, 0xc1, 0xd5, 0xa4, 0x03, 0xc1, 0xd5, 0xb6, + 0x18, 0xc1, 0xd5, 0xc2, 0xc4, 0x00, 0xb0, 0x00, 0x6a, 0x78, 0x18, 0xc1, + 0xd5, 0xcc, 0x83, 0x00, 0x68, 0x2b, 0x01, 0xd5, 0xdc, 0x8b, 0x00, 0x68, + 0x3b, 0x01, 0xd5, 0xee, 0x97, 0x00, 0x68, 0x4b, 0x01, 0xd5, 0xf2, 0x87, + 0x00, 0x68, 0x73, 0x01, 0xd5, 0xf6, 0x91, 0x00, 0x68, 0x93, 0x01, 0xd5, + 0xfa, 0x0d, 0xc1, 0xd5, 0xfe, 0x09, 0xc1, 0xd6, 0x08, 0x10, 0xc1, 0xd6, + 0x12, 0x05, 0xc1, 0xd6, 0x26, 0x0c, 0xc1, 0xd6, 0x2e, 0x16, 0xc1, 0xd6, + 0x38, 0x06, 0xc1, 0xd6, 0x46, 0x12, 0xc1, 0xd6, 0x5a, 0x04, 0xc1, 0xd6, + 0x64, 0xc2, 0x01, 0xc3, 0x00, 0x69, 0x71, 0xc2, 0x19, 0x2c, 0x00, 0x69, + 0x79, 0x14, 0xc1, 0xd6, 0x6e, 0x0e, 0xc1, 0xd6, 0x78, 0x15, 0xc1, 0xd6, + 0x80, 0xc2, 0x00, 0xd0, 0x00, 0x69, 0xc8, 0x03, 0xc1, 0xd6, 0x90, 0x8b, + 0x00, 0x69, 0xfb, 0x01, 0xd6, 0x9c, 0x97, 0x00, 0x6a, 0x0b, 0x01, 0xd6, + 0xa0, 0x48, 0xb2, 0x2d, 0xc1, 0xd6, 0xa4, 0x87, 0x00, 0x6a, 0x33, 0x01, + 0xd6, 0xb2, 0x91, 0x00, 0x6a, 0x52, 0x01, 0xd6, 0xb6, 0x44, 0x05, 0x14, + 0xc1, 0xd6, 0xba, 0x46, 0x02, 0xdd, 0x41, 0xd6, 0xe0, 0x45, 0x09, 0x98, + 0xc1, 0xd6, 0xf8, 0xc8, 0xbc, 0xda, 0x00, 0x6b, 0xc8, 0xc3, 0x09, 0x41, + 0x00, 0x6b, 0x81, 0x44, 0x05, 0x14, 0x41, 0xd7, 0x1c, 0xcb, 0x92, 0x07, + 0x08, 0x57, 0xb1, 0xc8, 0x02, 0x9f, 0x08, 0x57, 0xa9, 0x42, 0x00, 0x58, + 0xc1, 0xd7, 0x28, 0xc7, 0x2c, 0xab, 0x08, 0x57, 0x89, 0xc4, 0x0e, 0x6a, + 0x08, 0x57, 0x80, 0xc3, 0x05, 0x14, 0x08, 0x57, 0x5b, 0x01, 0xd7, 0x35, + 0x16, 0xc1, 0xd7, 0x3b, 0xc4, 0x0d, 0x13, 0x08, 0x57, 0x60, 0xc5, 0x05, + 0x02, 0x08, 0x57, 0x31, 0xc5, 0x00, 0xd4, 0x08, 0x57, 0x28, 0x16, 0xc1, + 0xd7, 0x47, 0x15, 0xc1, 0xd7, 0x59, 0xc4, 0x5d, 0xe2, 0x08, 0x57, 0x09, + 0x13, 0xc1, 0xd7, 0x69, 0x1a, 0xc1, 0xd7, 0x75, 0xc2, 0x14, 0xda, 0x08, + 0x56, 0xe1, 0xc2, 0x00, 0x67, 0x08, 0x56, 0xd9, 0x03, 0xc1, 0xd7, 0x81, + 0xc3, 0x20, 0x18, 0x08, 0x56, 0xb9, 0xc3, 0x00, 0x4e, 0x08, 0x56, 0xb1, + 0x06, 0xc1, 0xd7, 0x93, 0xc6, 0xcf, 0xd7, 0x08, 0x56, 0x99, 0x0d, 0xc1, + 0xd7, 0x9f, 0xc4, 0x4a, 0xb9, 0x08, 0x56, 0x79, 0xc2, 0x01, 0x7f, 0x08, + 0x56, 0x33, 0x01, 0xd7, 0xab, 0x0c, 0xc1, 0xd7, 0xb1, 0x1c, 0xc1, 0xd7, + 0xbd, 0xc3, 0x7e, 0x89, 0x08, 0x56, 0x39, 0x09, 0xc1, 0xd7, 0xc9, 0x04, + 0x41, 0xd7, 0xd5, 0xd8, 0x22, 0xd3, 0x0f, 0xab, 0xa1, 0xc6, 0xd1, 0xdb, + 0x0f, 0xc9, 0xa8, 0xc6, 0xd2, 0x9b, 0x0f, 0xa3, 0x99, 0xca, 0xa1, 0x66, + 0x0f, 0xa3, 0x90, 0x03, 0xc1, 0xd7, 0xe1, 0xc3, 0xa7, 0x52, 0x00, 0x42, + 0xb9, 0xc8, 0xb9, 0xc2, 0x00, 0x42, 0xb1, 0x0b, 0xc1, 0xd8, 0x28, 0xc7, + 0xb9, 0xc3, 0x00, 0x42, 0x29, 0xc5, 0xd6, 0xc3, 0x00, 0x42, 0x00, 0xcc, + 0x85, 0xd1, 0x08, 0x8b, 0xb1, 0x46, 0x02, 0x0f, 0x41, 0xd8, 0x30, 0xcb, + 0x45, 0x8e, 0x08, 0x8b, 0xa9, 0xc9, 0xad, 0xb6, 0x08, 0x8b, 0x98, 0xc5, + 0x06, 0xbb, 0x0f, 0x81, 0x49, 0xc8, 0xb5, 0xa2, 0x0f, 0x80, 0x11, 0xcb, + 0x8f, 0x3c, 0x0f, 0x80, 0x30, 0xc8, 0xbd, 0xa2, 0x0f, 0x80, 0x01, 0x48, + 0xae, 0x47, 0x41, 0xd8, 0x8a, 0xc9, 0xab, 0xbe, 0x0f, 0x80, 0x09, 0x46, + 0xd1, 0xf9, 0xc1, 0xd8, 0x94, 0x48, 0xb5, 0x32, 0xc1, 0xd8, 0x9e, 0xc5, + 0xc1, 0x78, 0x0f, 0x81, 0x31, 0xc5, 0xda, 0x60, 0x0f, 0x81, 0x38, 0xc9, + 0xac, 0x06, 0x0f, 0x80, 0x19, 0x47, 0xbb, 0x83, 0x41, 0xd8, 0xa8, 0x46, + 0xbb, 0x84, 0xc1, 0xd8, 0xb2, 0xc5, 0xd6, 0xf0, 0x0f, 0x81, 0x18, 0x46, + 0xd2, 0xe9, 0xc1, 0xd8, 0xbc, 0x48, 0xbe, 0x4a, 0x41, 0xd8, 0xc6, 0x47, + 0xc5, 0x7c, 0xc1, 0xd8, 0xd0, 0x47, 0xc7, 0x2e, 0x41, 0xd8, 0xda, 0xc2, + 0x00, 0x3b, 0x0f, 0x81, 0x59, 0xc4, 0x8e, 0x88, 0x0f, 0x81, 0x20, 0x15, + 0xc1, 0xd8, 0xe4, 0xc8, 0x87, 0xb5, 0x0f, 0x9d, 0xcb, 0x01, 0xd8, 0xf0, + 0xc4, 0x23, 0x2e, 0x0f, 0x9d, 0xa8, 0xca, 0xa2, 0xba, 0x01, 0x33, 0x79, + 0xcc, 0x83, 0xf1, 0x01, 0x33, 0x71, 0xc9, 0xb3, 0xb0, 0x01, 0x33, 0x68, + 0x48, 0x1f, 0x1f, 0xc1, 0xd8, 0xf6, 0xcf, 0x65, 0x2b, 0x0f, 0x9d, 0xb0, + 0x00, 0x41, 0xd9, 0x03, 0x14, 0xc1, 0xd9, 0x0f, 0xc2, 0x00, 0xd0, 0x08, + 0x95, 0x31, 0xc2, 0x0d, 0xf6, 0x08, 0x95, 0x29, 0xc2, 0x02, 0x41, 0x08, + 0x95, 0x21, 0xc2, 0x00, 0xdb, 0x08, 0x95, 0x19, 0xc2, 0x19, 0x2c, 0x08, + 0x95, 0x09, 0xc2, 0x01, 0xc3, 0x08, 0x95, 0x01, 0x04, 0xc1, 0xd9, 0x1f, + 0x12, 0xc1, 0xd9, 0x29, 0x10, 0xc1, 0xd9, 0x33, 0x06, 0xc1, 0xd9, 0x43, + 0x16, 0xc1, 0xd9, 0x51, 0x0c, 0xc1, 0xd9, 0x5f, 0x05, 0xc1, 0xd9, 0x69, + 0x09, 0xc1, 0xd9, 0x73, 0x0d, 0xc1, 0xd9, 0x7d, 0x87, 0x08, 0x94, 0x19, + 0x83, 0x08, 0x94, 0x01, 0x8b, 0x08, 0x94, 0x09, 0x97, 0x08, 0x94, 0x10, + 0xc4, 0x18, 0x10, 0x0b, 0x53, 0x39, 0xc2, 0x22, 0xcc, 0x0b, 0x53, 0x30, + 0xc3, 0x0d, 0x14, 0x0b, 0x53, 0x29, 0xc3, 0x09, 0x9e, 0x0b, 0x53, 0x20, + 0xc4, 0x02, 0xde, 0x0b, 0x53, 0x19, 0xc2, 0x02, 0xa0, 0x0b, 0x53, 0x10, + 0xa2, 0x05, 0x53, 0xe9, 0x9f, 0x05, 0x53, 0xe0, 0x44, 0x00, 0xd0, 0xc1, + 0xd9, 0x87, 0xc6, 0x00, 0x41, 0x00, 0x82, 0x58, 0xc7, 0x14, 0x39, 0x00, + 0x81, 0xb1, 0xc3, 0x89, 0x6c, 0x00, 0x81, 0xd0, 0xc5, 0x40, 0xe7, 0x00, + 0x81, 0xc1, 0xc4, 0x1e, 0x97, 0x00, 0x81, 0xc8, 0x9e, 0x00, 0x83, 0x49, + 0x9f, 0x00, 0x83, 0x51, 0xa0, 0x00, 0x83, 0x59, 0xa1, 0x00, 0x83, 0x61, + 0xa2, 0x00, 0x83, 0x68, 0x9e, 0x00, 0x84, 0xd1, 0xa0, 0x00, 0x84, 0xd8, + 0x45, 0xc7, 0x97, 0xc1, 0xd9, 0x99, 0xcd, 0x7b, 0xb1, 0x00, 0x82, 0x70, + 0xc3, 0x05, 0x14, 0x00, 0x84, 0xf1, 0xcb, 0x0f, 0x09, 0x00, 0x84, 0xf8, + 0xc2, 0x02, 0xa0, 0x00, 0x84, 0x91, 0xc4, 0x02, 0xde, 0x00, 0x84, 0x98, + 0xc3, 0x09, 0x9e, 0x00, 0x84, 0xa1, 0xc3, 0x0d, 0x14, 0x00, 0x84, 0xa8, + 0xc2, 0x22, 0xcc, 0x00, 0x84, 0xb1, 0xc4, 0x18, 0x10, 0x00, 0x84, 0xb8, + 0xc7, 0xc7, 0x97, 0x05, 0x53, 0xd1, 0x97, 0x00, 0x81, 0x50, 0xc2, 0x00, + 0xd0, 0x00, 0x80, 0x0b, 0x01, 0xd9, 0xab, 0x83, 0x00, 0x80, 0x00, 0x83, + 0x00, 0x80, 0x83, 0x01, 0xd9, 0xb1, 0x16, 0xc1, 0xd9, 0xb7, 0xc2, 0x00, + 0xd0, 0x00, 0x80, 0x88, 0x0a, 0xc1, 0xd9, 0xc1, 0x83, 0x00, 0x80, 0xf1, + 0xc2, 0x0d, 0xf6, 0x00, 0x82, 0x89, 0xcd, 0x7c, 0x19, 0x00, 0x83, 0x08, + 0x83, 0x00, 0x80, 0x11, 0xc2, 0x00, 0xd0, 0x00, 0x80, 0x19, 0xc7, 0xbd, + 0xeb, 0x00, 0x81, 0xf8, 0xc2, 0x01, 0x30, 0x00, 0x80, 0x21, 0xc2, 0x19, + 0x2c, 0x00, 0x80, 0x49, 0x10, 0xc1, 0xd9, 0xce, 0x83, 0x00, 0x80, 0xa0, + 0x83, 0x00, 0x80, 0x29, 0xc2, 0x00, 0xd0, 0x00, 0x80, 0x30, 0x83, 0x00, + 0x80, 0x39, 0xc2, 0x00, 0xd0, 0x00, 0x80, 0x40, 0x06, 0xc1, 0xd9, 0xd8, + 0x83, 0x00, 0x80, 0x91, 0xc2, 0x00, 0xd0, 0x00, 0x80, 0x98, 0x83, 0x00, + 0x80, 0xa9, 0xc2, 0x00, 0xd0, 0x00, 0x80, 0xb0, 0x83, 0x00, 0x80, 0xb9, + 0xc2, 0x00, 0xd0, 0x00, 0x80, 0xc0, 0x83, 0x00, 0x80, 0xc9, 0x43, 0x01, + 0x55, 0x41, 0xd9, 0xe2, 0x83, 0x00, 0x80, 0xd9, 0xcf, 0x65, 0x0d, 0x00, + 0x84, 0x70, 0x83, 0x00, 0x80, 0xe1, 0xc2, 0x00, 0xdb, 0x00, 0x81, 0x00, + 0x83, 0x00, 0x80, 0xe9, 0x51, 0x28, 0xa0, 0x41, 0xd9, 0xf8, 0x8b, 0x00, + 0x81, 0x20, 0x97, 0x00, 0x81, 0x30, 0x51, 0x50, 0x02, 0x41, 0xda, 0x04, + 0x94, 0x00, 0x82, 0x93, 0x01, 0xda, 0x16, 0x8e, 0x00, 0x82, 0xa2, 0x01, + 0xda, 0x1a, 0xc4, 0x18, 0x10, 0x05, 0x4f, 0xb9, 0xc2, 0x22, 0xcc, 0x05, + 0x4f, 0xb0, 0xc3, 0x0d, 0x14, 0x05, 0x4f, 0xa9, 0xc3, 0x09, 0x9e, 0x05, + 0x4f, 0xa0, 0xc4, 0x02, 0xde, 0x05, 0x4f, 0x99, 0xc2, 0x02, 0xa0, 0x05, + 0x4f, 0x90, 0xc5, 0xd5, 0xc9, 0x00, 0x84, 0xe2, 0x01, 0xda, 0x1e, 0x94, + 0x00, 0x82, 0xb8, 0x8e, 0x00, 0x82, 0xc8, 0xc2, 0x04, 0xc6, 0x00, 0x84, + 0x19, 0x87, 0x00, 0x84, 0x23, 0x01, 0xda, 0x22, 0xc7, 0xca, 0x30, 0x00, + 0x84, 0x30, 0xc2, 0x19, 0x2c, 0x00, 0x81, 0xd9, 0xc2, 0x00, 0x39, 0x00, + 0x81, 0xe1, 0xc2, 0x01, 0x4a, 0x00, 0x81, 0xe9, 0xc2, 0x00, 0xd0, 0x00, + 0x81, 0xf0, 0xc2, 0x00, 0xc1, 0x00, 0x82, 0xf1, 0xc2, 0x01, 0xc3, 0x00, + 0x82, 0xf9, 0xc2, 0x00, 0xdb, 0x00, 0x83, 0x00, 0x15, 0xc1, 0xda, 0x28, + 0x83, 0x01, 0x85, 0x13, 0x01, 0xda, 0x42, 0x0f, 0xc1, 0xda, 0x48, 0x8b, + 0x01, 0x85, 0x21, 0x97, 0x01, 0x85, 0x31, 0x87, 0x01, 0x85, 0x41, 0x91, + 0x01, 0x85, 0x51, 0x0d, 0xc1, 0xda, 0x5f, 0x09, 0xc1, 0xda, 0x73, 0x1c, + 0xc1, 0xda, 0x87, 0x16, 0xc1, 0xda, 0x9b, 0x06, 0xc1, 0xda, 0xaf, 0x90, + 0x01, 0x87, 0x9b, 0x01, 0xda, 0xc3, 0x0a, 0xc1, 0xda, 0xd7, 0x04, 0xc1, + 0xda, 0xeb, 0x12, 0xc1, 0xda, 0xff, 0x1b, 0xc1, 0xdb, 0x13, 0x14, 0xc1, + 0xdb, 0x1f, 0x19, 0xc1, 0xdb, 0x33, 0x18, 0x41, 0xdb, 0x43, 0x97, 0x08, + 0x85, 0xc1, 0x8b, 0x08, 0x85, 0xb1, 0x83, 0x08, 0x85, 0x80, 0x97, 0x08, + 0x85, 0xa0, 0x8b, 0x08, 0x85, 0x90, 0xc5, 0x86, 0x20, 0x08, 0x86, 0x09, + 0xcc, 0x45, 0x8d, 0x08, 0x85, 0xf8, 0xc5, 0x33, 0x5d, 0x08, 0x85, 0xd1, + 0x42, 0x07, 0xb2, 0xc1, 0xdb, 0x57, 0xc8, 0x14, 0x38, 0x08, 0x84, 0x09, + 0xcb, 0x1e, 0x89, 0x08, 0x84, 0x00, 0x83, 0x08, 0x85, 0x71, 0xc2, 0x0d, + 0xf6, 0x08, 0x85, 0x69, 0xc2, 0x00, 0xd0, 0x08, 0x85, 0x60, 0x83, 0x08, + 0x85, 0x49, 0xc2, 0x00, 0xd0, 0x08, 0x84, 0xe0, 0xc2, 0x00, 0xd0, 0x08, + 0x85, 0x31, 0x83, 0x08, 0x85, 0x28, 0xc2, 0x00, 0xd0, 0x08, 0x85, 0x21, + 0x83, 0x08, 0x85, 0x18, 0x83, 0x08, 0x85, 0x11, 0xc2, 0x00, 0xc1, 0x08, + 0x84, 0xe9, 0xc2, 0x19, 0x2c, 0x08, 0x84, 0xb1, 0xc2, 0x01, 0x30, 0x08, + 0x84, 0x88, 0xc2, 0x00, 0xd0, 0x08, 0x85, 0x09, 0x83, 0x08, 0x85, 0x01, + 0x06, 0x41, 0xdb, 0x63, 0xc2, 0x00, 0xd0, 0x08, 0x84, 0xf9, 0x83, 0x08, + 0x84, 0xf1, 0x16, 0x41, 0xdb, 0x73, 0xc2, 0x00, 0xd0, 0x08, 0x84, 0xa9, + 0x83, 0x08, 0x84, 0xa0, 0xc2, 0x00, 0xd0, 0x08, 0x84, 0x99, 0x83, 0x08, + 0x84, 0x90, 0xc2, 0x00, 0xd0, 0x08, 0x84, 0x81, 0x83, 0x08, 0x84, 0x78, + 0xc2, 0x00, 0xd0, 0x08, 0x84, 0x71, 0x83, 0x08, 0x84, 0x68, 0x97, 0x08, + 0x84, 0x61, 0x8b, 0x08, 0x84, 0x51, 0x83, 0x08, 0x84, 0x20, 0x97, 0x08, + 0x84, 0x40, 0x8b, 0x08, 0x84, 0x30, 0xc7, 0xca, 0x76, 0x05, 0x49, 0x68, + 0x87, 0x05, 0x49, 0x48, 0x87, 0x05, 0x49, 0x30, 0x91, 0x05, 0x49, 0x29, + 0x87, 0x05, 0x49, 0x18, 0x83, 0x05, 0x48, 0xf9, 0xc2, 0x01, 0x6f, 0x05, + 0x48, 0x98, 0xc2, 0x00, 0xd0, 0x05, 0x48, 0xf1, 0x83, 0x05, 0x48, 0x90, + 0xc2, 0x00, 0xd0, 0x05, 0x48, 0xb1, 0x83, 0x05, 0x48, 0xa8, 0x83, 0x05, + 0x48, 0xa1, 0xc2, 0x19, 0x2c, 0x05, 0x48, 0x89, 0xc2, 0x01, 0x30, 0x05, + 0x48, 0x68, 0xc2, 0x00, 0xd0, 0x05, 0x48, 0x79, 0x83, 0x05, 0x48, 0x70, + 0xc2, 0x00, 0xd0, 0x05, 0x48, 0x59, 0x83, 0x05, 0x48, 0x50, 0xc4, 0x18, + 0x10, 0x05, 0x48, 0x39, 0xc2, 0x22, 0xcc, 0x05, 0x48, 0x30, 0xc3, 0x0d, + 0x14, 0x05, 0x48, 0x29, 0xc3, 0x09, 0x9e, 0x05, 0x48, 0x20, 0xc4, 0x02, + 0xde, 0x05, 0x48, 0x19, 0xc2, 0x02, 0xa0, 0x05, 0x48, 0x10, 0x15, 0xc1, + 0xdb, 0x7d, 0xcb, 0x1e, 0x89, 0x00, 0x64, 0x09, 0x03, 0xc1, 0xdb, 0x89, + 0x42, 0x07, 0xb2, 0xc1, 0xdb, 0x95, 0xc5, 0x33, 0x5d, 0x00, 0x65, 0xe1, + 0xcb, 0x8f, 0xe1, 0x00, 0x67, 0x89, 0xcb, 0x93, 0xf6, 0x00, 0x67, 0x90, + 0x45, 0x02, 0x10, 0xc1, 0xdb, 0xa1, 0xc9, 0x36, 0x53, 0x00, 0x66, 0xa8, + 0x03, 0xc1, 0xdc, 0x10, 0x8b, 0x00, 0x65, 0xfb, 0x01, 0xdc, 0x1c, 0x97, + 0x00, 0x66, 0x0b, 0x01, 0xdc, 0x20, 0x48, 0xb2, 0x2d, 0xc1, 0xdc, 0x24, + 0x87, 0x00, 0x66, 0x33, 0x01, 0xdc, 0x32, 0x91, 0x00, 0x66, 0x52, 0x01, + 0xdc, 0x36, 0xc4, 0x15, 0xe7, 0x00, 0x67, 0x31, 0xc3, 0x05, 0x14, 0x00, + 0x67, 0x39, 0x16, 0xc1, 0xdc, 0x3a, 0x08, 0xc1, 0xdc, 0x46, 0x15, 0xc1, + 0xdc, 0x52, 0xc5, 0x06, 0xdb, 0x00, 0x67, 0x71, 0xc4, 0x26, 0x78, 0x00, + 0x67, 0x78, 0x11, 0xc1, 0xdc, 0x5e, 0x0e, 0xc1, 0xdc, 0x71, 0x06, 0xc1, + 0xdc, 0x86, 0x15, 0xc1, 0xdc, 0x96, 0x0a, 0xc1, 0xdc, 0xe0, 0x16, 0xc1, + 0xdc, 0xf2, 0x0f, 0xc1, 0xdd, 0x17, 0x07, 0xc1, 0xdd, 0x29, 0x05, 0xc1, + 0xdd, 0x4c, 0x0b, 0xc1, 0xdd, 0x64, 0xc5, 0xa0, 0xc1, 0x01, 0x78, 0x89, + 0x12, 0xc1, 0xdd, 0x6e, 0x19, 0xc1, 0xdd, 0x84, 0x14, 0xc1, 0xdd, 0x9e, + 0x03, 0xc1, 0xdd, 0xb8, 0x09, 0xc1, 0xdd, 0xd0, 0x04, 0xc1, 0xdd, 0xe9, + 0x10, 0xc1, 0xde, 0x03, 0x08, 0xc1, 0xde, 0x0d, 0x42, 0x25, 0x3b, 0xc1, + 0xde, 0x2f, 0xc3, 0x26, 0x9b, 0x01, 0x7b, 0x21, 0x18, 0xc1, 0xde, 0x39, + 0xc6, 0xc6, 0x9b, 0x01, 0x7e, 0x40, 0x06, 0xc1, 0xde, 0x45, 0x05, 0xc1, + 0xde, 0x5d, 0x04, 0xc1, 0xde, 0x9d, 0x03, 0xc1, 0xde, 0xdd, 0x26, 0xc1, + 0xdf, 0x1d, 0x25, 0xc1, 0xdf, 0x5d, 0x24, 0xc1, 0xdf, 0x9d, 0x23, 0xc1, + 0xdf, 0xdd, 0x22, 0xc1, 0xe0, 0x1d, 0x21, 0xc1, 0xe0, 0x5d, 0x20, 0xc1, + 0xe0, 0x9d, 0x1f, 0xc1, 0xe0, 0xdd, 0x1e, 0xc1, 0xe1, 0x1d, 0x1d, 0x41, + 0xe1, 0x5d, 0x08, 0xc1, 0xe1, 0x9d, 0x07, 0xc1, 0xe1, 0xdd, 0x06, 0xc1, + 0xe2, 0x1d, 0x05, 0xc1, 0xe2, 0x5d, 0x04, 0xc1, 0xe2, 0x9d, 0x03, 0xc1, + 0xe2, 0xdd, 0x26, 0xc1, 0xe3, 0x1d, 0x25, 0xc1, 0xe3, 0x5d, 0x24, 0xc1, + 0xe3, 0x9d, 0x23, 0xc1, 0xe3, 0xdd, 0x22, 0xc1, 0xe4, 0x1d, 0x21, 0xc1, + 0xe4, 0x5d, 0x20, 0xc1, 0xe4, 0x9d, 0x1f, 0xc1, 0xe4, 0xdd, 0x1e, 0xc1, + 0xe5, 0x1d, 0x1d, 0x41, 0xe5, 0x5d, 0xc4, 0x18, 0x10, 0x08, 0x97, 0xb9, + 0xc2, 0x22, 0xcc, 0x08, 0x97, 0xb0, 0xc3, 0x0d, 0x14, 0x08, 0x97, 0xa9, + 0xc3, 0x09, 0x9e, 0x08, 0x97, 0xa0, 0xc4, 0x02, 0xde, 0x08, 0x97, 0x99, + 0xc2, 0x02, 0xa0, 0x08, 0x97, 0x90, 0x8b, 0x08, 0x97, 0x31, 0x83, 0x08, + 0x97, 0x01, 0x97, 0x08, 0x97, 0x40, 0x97, 0x08, 0x97, 0x20, 0x8b, 0x08, + 0x97, 0x10, 0x83, 0x08, 0x96, 0xe9, 0xc2, 0x00, 0xd0, 0x08, 0x96, 0xe0, + 0x83, 0x08, 0x96, 0xc9, 0xc2, 0x00, 0x39, 0x08, 0x96, 0x50, 0xc2, 0x00, + 0xd0, 0x08, 0x96, 0xb1, 0xc2, 0x01, 0x5d, 0x08, 0x96, 0xa9, 0x83, 0x08, + 0x96, 0xa0, 0xc2, 0x00, 0xd0, 0x08, 0x96, 0x99, 0x83, 0x08, 0x96, 0x90, + 0x83, 0x08, 0x96, 0x89, 0xc2, 0x00, 0xc1, 0x08, 0x96, 0x61, 0xc2, 0x19, + 0x2c, 0x08, 0x96, 0x29, 0xc2, 0x01, 0x30, 0x08, 0x95, 0xf8, 0xc2, 0x00, + 0xd0, 0x08, 0x96, 0x81, 0x83, 0x08, 0x96, 0x79, 0x06, 0x41, 0xe5, 0x9d, + 0xc2, 0x00, 0xd0, 0x08, 0x96, 0x71, 0x83, 0x08, 0x96, 0x69, 0x16, 0x41, + 0xe5, 0xad, 0xc2, 0x00, 0xd0, 0x08, 0x96, 0x21, 0xc2, 0x25, 0x3b, 0x08, + 0x96, 0x19, 0x83, 0x08, 0x96, 0x10, 0xc2, 0x00, 0xd0, 0x08, 0x96, 0x09, + 0x83, 0x08, 0x96, 0x00, 0xc2, 0x00, 0xd0, 0x08, 0x95, 0xf1, 0xc2, 0x01, + 0x30, 0x08, 0x95, 0xe9, 0x83, 0x08, 0x95, 0xe0, 0xc2, 0x00, 0xd0, 0x08, + 0x95, 0xd9, 0x83, 0x08, 0x95, 0xd0, 0x97, 0x08, 0x95, 0xc9, 0x8b, 0x08, + 0x95, 0xb9, 0x83, 0x08, 0x95, 0x88, 0x97, 0x08, 0x95, 0xa8, 0x8b, 0x08, + 0x95, 0x98, 0x15, 0xc1, 0xe5, 0xb7, 0xc5, 0x33, 0x5d, 0x08, 0x91, 0xb1, + 0xc6, 0x1e, 0x95, 0x08, 0x91, 0xa9, 0xc8, 0x14, 0x38, 0x08, 0x91, 0xa0, + 0xcc, 0x45, 0x8d, 0x08, 0x91, 0xe1, 0xc5, 0x86, 0x20, 0x08, 0x91, 0xc8, + 0x97, 0x08, 0x91, 0x99, 0x8b, 0x08, 0x91, 0x89, 0x83, 0x08, 0x91, 0x60, + 0x8b, 0x08, 0x91, 0x70, 0xc2, 0x00, 0xdb, 0x08, 0x91, 0x59, 0x83, 0x08, + 0x91, 0x38, 0xc2, 0x00, 0xd0, 0x08, 0x91, 0x19, 0xc2, 0x01, 0x5d, 0x08, + 0x91, 0x11, 0x83, 0x08, 0x91, 0x08, 0xc2, 0x00, 0xd0, 0x08, 0x91, 0x01, + 0x83, 0x08, 0x90, 0xf8, 0x83, 0x08, 0x90, 0xf1, 0xc2, 0x00, 0xc1, 0x08, + 0x90, 0xc1, 0xc2, 0x19, 0x2c, 0x08, 0x90, 0x99, 0xc2, 0x01, 0x30, 0x08, + 0x90, 0x68, 0xc2, 0x00, 0xd0, 0x08, 0x90, 0xe9, 0x06, 0xc1, 0xe5, 0xc3, + 0x83, 0x08, 0x90, 0xd8, 0xc2, 0x00, 0xd0, 0x08, 0x90, 0xd1, 0x83, 0x08, + 0x90, 0xc9, 0x16, 0x41, 0xe5, 0xd3, 0xc2, 0x25, 0x3b, 0x08, 0x90, 0x89, + 0x83, 0x08, 0x90, 0x80, 0xc2, 0x00, 0xd0, 0x08, 0x90, 0x79, 0x83, 0x08, + 0x90, 0x70, 0xc2, 0x00, 0xd0, 0x08, 0x90, 0x61, 0xc2, 0x01, 0x30, 0x08, + 0x90, 0x59, 0x83, 0x08, 0x90, 0x50, 0xc2, 0x00, 0xd0, 0x08, 0x90, 0x49, + 0x83, 0x08, 0x90, 0x40, 0x97, 0x08, 0x90, 0x39, 0x8b, 0x08, 0x90, 0x29, + 0x83, 0x08, 0x90, 0x08, 0x43, 0x4e, 0xf0, 0xc1, 0xe5, 0xdd, 0x12, 0xc1, + 0xe5, 0xe5, 0x04, 0xc1, 0xe5, 0xf7, 0x45, 0xda, 0x97, 0xc1, 0xe6, 0x03, + 0xc9, 0xb2, 0x51, 0x00, 0xcf, 0x81, 0x4a, 0xa2, 0x42, 0x41, 0xe6, 0x0f, + 0x03, 0xc1, 0xe6, 0x23, 0x0d, 0xc1, 0xe6, 0x35, 0xcb, 0x93, 0x93, 0x00, + 0xbe, 0xc9, 0x04, 0xc1, 0xe6, 0x47, 0xc7, 0xc2, 0x1f, 0x00, 0xbe, 0xb9, + 0x05, 0xc1, 0xe6, 0x51, 0xc6, 0xcb, 0x69, 0x00, 0xbe, 0x89, 0xcd, 0x78, + 0x23, 0x00, 0xbe, 0x81, 0x16, 0xc1, 0xe6, 0x5d, 0x14, 0xc1, 0xe6, 0x69, + 0xcb, 0x99, 0xfa, 0x00, 0xbe, 0x49, 0xcd, 0x7d, 0x1d, 0x00, 0xbe, 0x41, + 0xc7, 0xc4, 0x41, 0x00, 0xbe, 0x30, 0xc4, 0x18, 0x10, 0x00, 0xbf, 0x39, + 0xc2, 0x22, 0xcc, 0x00, 0xbf, 0x30, 0xc3, 0x0d, 0x14, 0x00, 0xbf, 0x29, + 0xc3, 0x09, 0x9e, 0x00, 0xbf, 0x20, 0xc4, 0x02, 0xde, 0x00, 0xbf, 0x19, + 0xc2, 0x02, 0xa0, 0x00, 0xbf, 0x10, 0x03, 0xc1, 0xe6, 0x75, 0x11, 0xc1, + 0xe6, 0x85, 0x87, 0x00, 0xbe, 0x09, 0x8b, 0x00, 0xbd, 0xbb, 0x01, 0xe6, + 0x8d, 0x9b, 0x00, 0xbd, 0xcb, 0x01, 0xe6, 0x95, 0x97, 0x00, 0xbd, 0xda, + 0x01, 0xe6, 0x9d, 0x83, 0x00, 0xbd, 0xa9, 0x93, 0x00, 0xbd, 0xa0, 0x03, + 0xc1, 0xe6, 0xa5, 0x48, 0xb7, 0x6a, 0xc1, 0xe6, 0xb5, 0x87, 0x00, 0xbd, + 0x79, 0x97, 0x00, 0xbd, 0x3b, 0x01, 0xe6, 0xc1, 0x8b, 0x00, 0xbd, 0x2a, + 0x01, 0xe6, 0xcc, 0x9b, 0x00, 0xbd, 0x70, 0x9b, 0x00, 0xbd, 0x60, 0x83, + 0x00, 0xbd, 0x09, 0x91, 0x00, 0xbc, 0xd8, 0x83, 0x00, 0xbc, 0xf9, 0xc2, + 0x00, 0xfb, 0x00, 0xbc, 0xf1, 0xc2, 0x00, 0xd0, 0x00, 0xbc, 0xe8, 0x0a, + 0xc1, 0xe6, 0xd0, 0x91, 0x00, 0xbc, 0xb0, 0x91, 0x00, 0xbc, 0x99, 0xc2, + 0x00, 0x10, 0x00, 0xbc, 0x71, 0xc2, 0x42, 0xcd, 0x00, 0xbc, 0x49, 0xc2, + 0x0f, 0x7b, 0x00, 0xbc, 0x20, 0x0a, 0xc1, 0xe6, 0xd8, 0x91, 0x00, 0xbc, + 0x89, 0x83, 0x00, 0xbc, 0x79, 0x42, 0x00, 0x8e, 0x41, 0xe6, 0xe0, 0x91, + 0x00, 0xbc, 0x61, 0x83, 0x00, 0xbc, 0x50, 0x0a, 0xc1, 0xe6, 0xe8, 0x91, + 0x00, 0xbc, 0x39, 0x83, 0x00, 0xbc, 0x28, 0x0a, 0xc1, 0xe6, 0xf0, 0x91, + 0x00, 0xbc, 0x11, 0x83, 0x00, 0xbc, 0x00, 0xc4, 0x22, 0xd6, 0x08, 0x52, + 0xc1, 0xc4, 0x6e, 0x13, 0x08, 0x52, 0xa8, 0x11, 0xc1, 0xe6, 0xf8, 0xc4, + 0x19, 0x53, 0x08, 0x52, 0xb0, 0xcb, 0x80, 0xaa, 0x08, 0x52, 0x99, 0xc5, + 0x02, 0xd2, 0x08, 0x52, 0x90, 0xc8, 0x4b, 0x94, 0x08, 0x52, 0x39, 0xc7, + 0x0d, 0x04, 0x08, 0x52, 0x30, 0xc5, 0x28, 0xee, 0x08, 0x52, 0x29, 0xc2, + 0x00, 0xc4, 0x08, 0x52, 0x20, 0xc4, 0x02, 0xde, 0x08, 0x52, 0x11, 0xc2, + 0x02, 0xa0, 0x08, 0x52, 0x08, 0xcb, 0x36, 0x51, 0x08, 0x50, 0x61, 0x45, + 0x00, 0xba, 0x41, 0xe7, 0x02, 0xc7, 0x0e, 0x70, 0x08, 0x51, 0xd1, 0xcf, + 0x65, 0xa3, 0x08, 0x50, 0x68, 0xc2, 0x00, 0xd0, 0x08, 0x51, 0xa9, 0x83, + 0x08, 0x51, 0x60, 0x16, 0xc1, 0xe7, 0x18, 0xc2, 0x00, 0xd0, 0x08, 0x51, + 0x01, 0x83, 0x08, 0x50, 0xf8, 0xc2, 0x00, 0xd0, 0x08, 0x51, 0x39, 0x83, + 0x08, 0x51, 0x30, 0xc2, 0x00, 0xd0, 0x08, 0x51, 0x29, 0x83, 0x08, 0x51, + 0x20, 0x83, 0x08, 0x51, 0x19, 0xc2, 0x00, 0xc1, 0x08, 0x50, 0xf1, 0xc2, + 0x19, 0x2c, 0x08, 0x50, 0xc8, 0xc2, 0x00, 0xd0, 0x08, 0x51, 0x11, 0x83, + 0x08, 0x51, 0x09, 0x06, 0x41, 0xe7, 0x26, 0xc2, 0x00, 0xd0, 0x08, 0x50, + 0xb1, 0x83, 0x08, 0x50, 0xa8, 0xc2, 0x00, 0xd0, 0x08, 0x50, 0x99, 0x83, + 0x08, 0x50, 0x90, 0xc2, 0x00, 0xd0, 0x08, 0x50, 0x89, 0x83, 0x08, 0x50, + 0x81, 0xc2, 0x02, 0x2b, 0x08, 0x51, 0x90, 0xc2, 0x00, 0xd0, 0x08, 0x51, + 0x69, 0xc2, 0x0d, 0xf6, 0x08, 0x51, 0x71, 0x83, 0x08, 0x51, 0x78, 0x46, + 0x00, 0x8b, 0x41, 0xe7, 0x30, 0xca, 0xa7, 0x92, 0x0f, 0xd2, 0x53, 0x01, + 0xe7, 0x3c, 0xc5, 0xa8, 0xf7, 0x0f, 0xd0, 0x0b, 0x01, 0xe7, 0x42, 0x0d, + 0xc1, 0xe7, 0x48, 0xc6, 0xca, 0xfd, 0x0f, 0xd0, 0x1b, 0x01, 0xe7, 0x5a, + 0xc4, 0xde, 0x83, 0x0f, 0xd0, 0x13, 0x01, 0xe7, 0x60, 0xc4, 0xe3, 0x93, + 0x0f, 0xd0, 0x2b, 0x01, 0xe7, 0x66, 0x47, 0x45, 0x86, 0x41, 0xe7, 0x6c, + 0x0b, 0xc1, 0xe7, 0x88, 0xca, 0xa0, 0x26, 0x08, 0xa2, 0xf0, 0x18, 0xc1, + 0xe7, 0x94, 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0xa1, 0x15, 0xc1, 0xe7, 0xa0, + 0x10, 0xc1, 0xe7, 0xb0, 0x06, 0xc1, 0xe7, 0xc8, 0x16, 0xc1, 0xe7, 0xd6, + 0x0c, 0xc1, 0xe7, 0xe4, 0x05, 0xc1, 0xe7, 0xee, 0x09, 0xc1, 0xe7, 0xf8, + 0x0d, 0xc1, 0xe8, 0x02, 0x83, 0x08, 0xa0, 0x03, 0x01, 0xe8, 0x0c, 0x91, + 0x08, 0xa0, 0x61, 0x87, 0x08, 0xa0, 0x51, 0x97, 0x08, 0xa0, 0x23, 0x01, + 0xe8, 0x18, 0x8b, 0x08, 0xa0, 0x13, 0x01, 0xe8, 0x1c, 0x12, 0xc1, 0xe8, + 0x20, 0x04, 0xc1, 0xe8, 0x2a, 0x0f, 0xc1, 0xe8, 0x34, 0xc2, 0x19, 0x2c, + 0x08, 0xa1, 0x59, 0x14, 0xc1, 0xe8, 0x3e, 0x0e, 0xc1, 0xe8, 0x48, 0xc2, + 0x01, 0x4a, 0x08, 0xa1, 0x80, 0x46, 0x00, 0x59, 0xc1, 0xe8, 0x52, 0x45, + 0x09, 0x98, 0xc1, 0xe8, 0x5e, 0xc4, 0x19, 0x53, 0x08, 0xa2, 0x58, 0x03, + 0xc1, 0xe8, 0x82, 0x91, 0x08, 0xa2, 0x01, 0x87, 0x08, 0xa1, 0xf1, 0x48, + 0xb2, 0x2d, 0xc1, 0xe8, 0x8e, 0x97, 0x08, 0xa1, 0xc3, 0x01, 0xe8, 0x9c, + 0x8b, 0x08, 0xa1, 0xb2, 0x01, 0xe8, 0xa0, 0xc8, 0xb9, 0x72, 0x00, 0xce, + 0xf3, 0x01, 0xe8, 0xa4, 0x16, 0xc1, 0xe8, 0xa8, 0x46, 0x09, 0x97, 0xc1, + 0xe8, 0xb4, 0x47, 0x02, 0x0e, 0xc1, 0xe8, 0xd8, 0x4b, 0x6f, 0xc7, 0x41, + 0xe8, 0xea, 0xc9, 0xb2, 0xa2, 0x0f, 0x98, 0xd1, 0xc6, 0x00, 0x91, 0x0f, + 0x98, 0x88, 0xca, 0xa2, 0x88, 0x01, 0x3a, 0x71, 0xc2, 0x15, 0x95, 0x0f, + 0x8c, 0x79, 0xc2, 0x00, 0x03, 0x0f, 0x8c, 0x71, 0xc2, 0x0d, 0xf6, 0x0f, + 0x8c, 0x69, 0xc2, 0x00, 0xb0, 0x0f, 0x8c, 0x61, 0xc2, 0x00, 0x63, 0x0f, + 0x8c, 0x59, 0x55, 0x0b, 0x11, 0xc1, 0xe9, 0x0a, 0xcd, 0x2c, 0xb2, 0x0f, + 0xde, 0x20, 0xca, 0xa3, 0xd2, 0x01, 0x27, 0xf9, 0x47, 0x34, 0x2f, 0xc1, + 0xe9, 0x72, 0x55, 0x0b, 0x11, 0xc1, 0xe9, 0x88, 0xc8, 0x01, 0x92, 0x0f, + 0xbe, 0xb1, 0xc6, 0x0b, 0x09, 0x0f, 0xbe, 0xc0, 0xc5, 0x0d, 0x20, 0x0f, + 0xdd, 0xe9, 0xdc, 0x04, 0xcb, 0x0f, 0xdd, 0xf1, 0xc7, 0x3a, 0x19, 0x0f, + 0xdd, 0xf8, 0xd6, 0x2d, 0xd0, 0x01, 0x14, 0x49, 0xd4, 0x3a, 0x20, 0x01, + 0x14, 0x40, 0xe0, 0x07, 0x47, 0x01, 0x12, 0x38, 0xca, 0x37, 0x4e, 0x01, + 0x13, 0xa9, 0xc5, 0x07, 0x62, 0x01, 0x13, 0x88, 0xca, 0x37, 0x4e, 0x01, + 0x13, 0xa1, 0xc5, 0x07, 0x62, 0x01, 0x13, 0x80, 0xcf, 0x61, 0x11, 0x08, + 0xcf, 0x21, 0x03, 0xc1, 0xe9, 0xf0, 0x91, 0x08, 0xce, 0xe1, 0x87, 0x08, + 0xce, 0xd1, 0xc9, 0xb2, 0x2d, 0x08, 0xce, 0xb3, 0x01, 0xe9, 0xfc, 0x97, + 0x08, 0xce, 0xa3, 0x01, 0xea, 0x00, 0x8b, 0x08, 0xce, 0x92, 0x01, 0xea, + 0x04, 0xc7, 0xc3, 0x61, 0x08, 0xcf, 0x11, 0x03, 0xc1, 0xea, 0x08, 0x42, + 0x07, 0xb2, 0x41, 0xea, 0x14, 0x14, 0xc1, 0xea, 0x20, 0x0e, 0xc1, 0xea, + 0x2a, 0xc2, 0x00, 0xd0, 0x08, 0xce, 0x71, 0x15, 0xc1, 0xea, 0x34, 0x18, + 0xc1, 0xea, 0x44, 0xc2, 0x19, 0x2c, 0x08, 0xce, 0x39, 0xc2, 0x01, 0xc3, + 0x08, 0xce, 0x31, 0x04, 0xc1, 0xea, 0x51, 0x12, 0xc1, 0xea, 0x5b, 0x10, + 0xc1, 0xea, 0x65, 0x06, 0xc1, 0xea, 0x7b, 0x16, 0xc1, 0xea, 0x89, 0x0c, + 0xc1, 0xea, 0x97, 0x05, 0xc1, 0xea, 0xa1, 0x09, 0xc1, 0xea, 0xab, 0x0d, + 0xc1, 0xea, 0xb5, 0x83, 0x08, 0xcd, 0x03, 0x01, 0xea, 0xbf, 0x91, 0x08, + 0xcd, 0x61, 0x87, 0x08, 0xcd, 0x51, 0x97, 0x08, 0xcd, 0x23, 0x01, 0xea, + 0xcb, 0x8b, 0x08, 0xcd, 0x12, 0x01, 0xea, 0xcf, 0xc3, 0x05, 0x14, 0x08, + 0x45, 0x3b, 0x01, 0xea, 0xd3, 0x16, 0xc1, 0xea, 0xd9, 0x08, 0x41, 0xea, + 0xe9, 0x16, 0xc1, 0xea, 0xf5, 0x15, 0xc1, 0xeb, 0x01, 0x46, 0x26, 0xf7, + 0xc1, 0xeb, 0x0b, 0xc4, 0x5d, 0xe2, 0x08, 0x44, 0xd9, 0xc4, 0xb9, 0x7e, + 0x08, 0x44, 0xd1, 0xc2, 0x00, 0x67, 0x08, 0x44, 0xc1, 0x03, 0xc1, 0xeb, + 0x41, 0xc3, 0x20, 0x18, 0x08, 0x44, 0xa9, 0xc3, 0x00, 0x4e, 0x08, 0x44, + 0x99, 0xc6, 0xcf, 0xd7, 0x08, 0x44, 0x89, 0xc4, 0xe0, 0xe7, 0x08, 0x44, + 0x79, 0xc4, 0x4a, 0xb9, 0x08, 0x44, 0x69, 0xc2, 0x01, 0x7f, 0x08, 0x44, + 0x3b, 0x01, 0xeb, 0x4d, 0xc5, 0x4a, 0xb3, 0x08, 0x44, 0x49, 0xc3, 0x7e, + 0x89, 0x08, 0x44, 0x41, 0xc6, 0x40, 0x9a, 0x08, 0x44, 0x29, 0xc5, 0x9c, + 0xa2, 0x08, 0x44, 0x21, 0xc4, 0xe3, 0x27, 0x08, 0x44, 0x18, 0x45, 0x20, + 0x6c, 0xc1, 0xeb, 0x53, 0x45, 0x15, 0xa7, 0xc1, 0xeb, 0x7e, 0x46, 0x09, + 0x91, 0x41, 0xeb, 0xa9, 0xde, 0x0e, 0x32, 0x0f, 0xaa, 0x19, 0x4a, 0x00, + 0x27, 0x41, 0xeb, 0xc1, 0xe0, 0x0c, 0x07, 0x01, 0x3d, 0x88, 0xcc, 0x23, + 0x9f, 0x01, 0x17, 0x60, 0x46, 0x1f, 0x87, 0xc1, 0xeb, 0xc7, 0xc3, 0x00, + 0xbb, 0x00, 0x05, 0x60, 0xc3, 0x33, 0xa8, 0x01, 0x15, 0x69, 0xc4, 0x1e, + 0xc9, 0x01, 0x12, 0x08, 0x43, 0x07, 0x28, 0xc1, 0xeb, 0xd3, 0xce, 0x66, + 0xcf, 0x01, 0x12, 0x49, 0xd6, 0x2b, 0xc0, 0x01, 0x12, 0x21, 0xcc, 0x81, + 0x99, 0x01, 0x10, 0x48, 0xca, 0x37, 0x4e, 0x01, 0x13, 0x69, 0xc5, 0x07, + 0x62, 0x01, 0x13, 0x00, 0x86, 0x0f, 0xae, 0x51, 0xc2, 0x09, 0x3b, 0x0f, + 0xae, 0x48, 0xd6, 0x2b, 0x68, 0x0f, 0xa6, 0xa0, 0x87, 0x0f, 0x09, 0x58, + 0x91, 0x0f, 0x09, 0x48, 0x83, 0x0f, 0x09, 0x28, 0xc2, 0x00, 0x39, 0x0f, + 0x09, 0x19, 0x83, 0x0f, 0x08, 0xb0, 0xc2, 0x00, 0xdb, 0x0f, 0x09, 0x09, + 0x83, 0x0f, 0x08, 0xd0, 0xc2, 0x00, 0xdb, 0x0f, 0x09, 0x01, 0x83, 0x0f, + 0x08, 0x00, 0x8a, 0x0f, 0x08, 0xf8, 0x12, 0xc1, 0xeb, 0xdf, 0xc2, 0x0f, + 0x9a, 0x0f, 0x08, 0xc9, 0x16, 0xc1, 0xeb, 0xe9, 0xc2, 0x00, 0x39, 0x0f, + 0x08, 0x89, 0xc2, 0x19, 0x2c, 0x0f, 0x08, 0x81, 0xc2, 0x00, 0x64, 0x0f, + 0x08, 0x61, 0xc2, 0x02, 0x2b, 0x0f, 0x08, 0x39, 0x83, 0x0f, 0x08, 0x28, + 0xc2, 0x00, 0xdb, 0x0f, 0x08, 0xe9, 0x83, 0x0f, 0x08, 0x78, 0xc2, 0x19, + 0x2c, 0x0f, 0x08, 0xd9, 0x83, 0x0f, 0x08, 0x30, 0xc2, 0x8d, 0x8f, 0x0f, + 0x08, 0xa1, 0x83, 0x0f, 0x08, 0x19, 0xc2, 0x0d, 0xf6, 0x0f, 0x08, 0x08, + 0xcc, 0x86, 0x61, 0x0f, 0x09, 0xd9, 0xc6, 0xcc, 0x9b, 0x0f, 0x09, 0xd1, + 0xc8, 0x7f, 0x59, 0x0f, 0x09, 0xc9, 0xc5, 0xd8, 0x2b, 0x0f, 0x09, 0xc1, + 0xc6, 0x18, 0x8e, 0x0f, 0x09, 0xb8, 0x08, 0xc1, 0xeb, 0xf9, 0x07, 0xc1, + 0xec, 0x29, 0x04, 0xc1, 0xec, 0x69, 0x26, 0xc1, 0xec, 0xa9, 0x25, 0xc1, + 0xec, 0xe9, 0x24, 0xc1, 0xed, 0x29, 0x23, 0xc1, 0xed, 0x69, 0x22, 0xc1, + 0xed, 0xa9, 0x21, 0xc1, 0xed, 0xe9, 0x20, 0xc1, 0xee, 0x29, 0x1f, 0xc1, + 0xee, 0x69, 0x1e, 0xc1, 0xee, 0xa9, 0x1d, 0xc1, 0xee, 0xe9, 0x06, 0xc1, + 0xef, 0x29, 0x05, 0xc1, 0xef, 0x69, 0x03, 0x41, 0xef, 0xa9, 0x08, 0xc1, + 0xef, 0xe9, 0x07, 0xc1, 0xf0, 0x29, 0x06, 0xc1, 0xf0, 0x69, 0x05, 0xc1, + 0xf0, 0xa9, 0x04, 0xc1, 0xf0, 0xe9, 0x03, 0xc1, 0xf1, 0x29, 0x26, 0xc1, + 0xf1, 0x69, 0x25, 0xc1, 0xf1, 0xa9, 0x24, 0x41, 0xf1, 0xe9, 0x42, 0x00, + 0x28, 0xc1, 0xf2, 0x29, 0xd1, 0x52, 0xcc, 0x01, 0x24, 0xa1, 0xcc, 0x48, + 0x29, 0x01, 0x24, 0x88, 0xd1, 0x56, 0xa6, 0x01, 0x24, 0xc9, 0xcf, 0x66, + 0xb1, 0x01, 0x24, 0x90, 0xd2, 0x48, 0x23, 0x01, 0x24, 0xc1, 0x0b, 0x41, + 0xf2, 0x35, 0xd0, 0x59, 0x52, 0x01, 0x24, 0xb1, 0xd1, 0x53, 0x65, 0x01, + 0x24, 0xa8, 0xc4, 0x18, 0x10, 0x00, 0x3e, 0x39, 0xc2, 0x22, 0xcc, 0x00, + 0x3e, 0x30, 0xc3, 0x0d, 0x14, 0x00, 0x3e, 0x29, 0xc3, 0x09, 0x9e, 0x00, + 0x3e, 0x20, 0xc4, 0x02, 0xde, 0x00, 0x3e, 0x19, 0xc2, 0x02, 0xa0, 0x00, + 0x3e, 0x10, 0x44, 0xe4, 0x3f, 0xc1, 0xf2, 0x41, 0x83, 0x00, 0x3e, 0xb0, + 0xc2, 0x19, 0x2c, 0x00, 0x3f, 0x13, 0x01, 0xf2, 0x53, 0x83, 0x00, 0x3f, + 0x1a, 0x01, 0xf2, 0x59, 0xc2, 0x00, 0x39, 0x00, 0x3e, 0xd1, 0x83, 0x00, + 0x3e, 0xc8, 0xc8, 0xbc, 0x92, 0x00, 0x3e, 0x88, 0x91, 0x00, 0x3e, 0x78, + 0x87, 0x00, 0x3e, 0x58, 0xcb, 0x5a, 0x32, 0x00, 0x3f, 0x89, 0xc8, 0xae, + 0xfb, 0x00, 0x3f, 0x81, 0xc9, 0x3d, 0x18, 0x00, 0x3f, 0x79, 0xcf, 0x64, + 0x1d, 0x00, 0x3f, 0x70, 0xcb, 0x5a, 0x32, 0x00, 0x3f, 0x69, 0xc8, 0xae, + 0xfb, 0x00, 0x3f, 0x61, 0xc9, 0x3d, 0x18, 0x00, 0x3f, 0x58, 0x46, 0x00, + 0x8b, 0x41, 0xf2, 0x5f, 0x95, 0x0f, 0xae, 0x68, 0xc3, 0x23, 0x2f, 0x0f, + 0xae, 0x2b, 0x01, 0xf2, 0x77, 0xc3, 0x15, 0xa8, 0x0f, 0xd5, 0xc8, 0xc5, + 0x11, 0x0d, 0x01, 0x1e, 0xd1, 0x45, 0xd9, 0x2f, 0x41, 0xf2, 0x7d, 0xc4, + 0x9e, 0x9c, 0x0f, 0x99, 0xf1, 0xc5, 0xdb, 0x9b, 0x0f, 0x99, 0xe8, 0x20, + 0xc1, 0xf2, 0x87, 0x1f, 0xc1, 0xf2, 0xaa, 0x1e, 0xc1, 0xf2, 0xd8, 0x1d, + 0x41, 0xf3, 0x06, 0xa6, 0x09, 0x82, 0xc9, 0xa5, 0x09, 0x82, 0xc1, 0xa4, + 0x09, 0x82, 0xb9, 0xa3, 0x09, 0x82, 0xb1, 0xa2, 0x09, 0x82, 0xa3, 0x01, + 0xf3, 0x30, 0xa1, 0x09, 0x82, 0x99, 0xa0, 0x09, 0x82, 0x91, 0x9f, 0x09, + 0x82, 0x89, 0x9e, 0x09, 0x82, 0x80, 0x22, 0xc1, 0xf3, 0x34, 0x21, 0xc1, + 0xf3, 0x3f, 0x20, 0xc1, 0xf3, 0x67, 0x1f, 0xc1, 0xf3, 0x98, 0x1e, 0xc1, + 0xf3, 0xcc, 0x1d, 0x41, 0xf3, 0xfa, 0x47, 0x07, 0x9a, 0xc1, 0xf4, 0x27, + 0x44, 0x00, 0xf1, 0x41, 0xf4, 0x33, 0x1e, 0xc1, 0xf4, 0x3f, 0x1d, 0x41, + 0xf4, 0x5d, 0xa5, 0x09, 0x8c, 0x39, 0xa4, 0x09, 0x8c, 0x31, 0xa3, 0x09, + 0x8c, 0x23, 0x01, 0xf4, 0x87, 0xa2, 0x09, 0x8c, 0x19, 0xa1, 0x09, 0x8c, + 0x11, 0xa0, 0x09, 0x8c, 0x09, 0x9f, 0x09, 0x8c, 0x01, 0x9e, 0x09, 0x8b, + 0xf8, 0xc2, 0xe6, 0x77, 0x09, 0x9d, 0x6b, 0x01, 0xf4, 0x8b, 0x20, 0xc1, + 0xf4, 0x8f, 0x1f, 0xc1, 0xf4, 0xc3, 0x1e, 0xc1, 0xf4, 0xf7, 0x1d, 0x41, + 0xf5, 0x25, 0x20, 0xc1, 0xf5, 0x52, 0x1f, 0xc1, 0xf5, 0x5e, 0x1e, 0xc1, + 0xf5, 0x86, 0x1d, 0x41, 0xf5, 0xae, 0xc2, 0xe4, 0xef, 0x09, 0x82, 0x79, + 0x23, 0xc1, 0xf5, 0xd5, 0x22, 0xc1, 0xf5, 0xfd, 0x21, 0xc1, 0xf6, 0x25, + 0x20, 0xc1, 0xf6, 0x59, 0x1f, 0xc1, 0xf6, 0x84, 0x1e, 0xc1, 0xf6, 0xac, + 0x1d, 0x41, 0xf6, 0xda, 0xa3, 0x09, 0xa0, 0x23, 0x01, 0xf7, 0x04, 0xa2, + 0x09, 0x9f, 0xd3, 0x01, 0xf7, 0x24, 0xa1, 0x09, 0x9f, 0xc9, 0xa0, 0x09, + 0x9f, 0xc1, 0x9f, 0x09, 0x9f, 0xb9, 0x9e, 0x09, 0x9f, 0xb1, 0x9d, 0x09, + 0x9f, 0xa8, 0xa6, 0x09, 0x9f, 0xa1, 0xa5, 0x09, 0x9f, 0x99, 0xa4, 0x09, + 0x9f, 0x91, 0xa3, 0x09, 0x9f, 0x89, 0xa2, 0x09, 0x9f, 0x7b, 0x01, 0xf7, + 0x48, 0xa1, 0x09, 0x9f, 0x6b, 0x01, 0xf7, 0x4c, 0xa0, 0x09, 0x9f, 0x53, + 0x01, 0xf7, 0x50, 0x9f, 0x09, 0x9f, 0x2b, 0x01, 0xf7, 0x58, 0x9e, 0x09, + 0x9f, 0x20, 0x83, 0x09, 0x9e, 0xe0, 0x83, 0x09, 0x9e, 0xd0, 0x83, 0x09, + 0x9e, 0xb8, 0x84, 0x09, 0x9e, 0xa1, 0x83, 0x09, 0x9e, 0x98, 0xa2, 0x09, + 0x9e, 0x71, 0xa1, 0x09, 0x9e, 0x63, 0x01, 0xf7, 0x68, 0xa0, 0x09, 0x9e, + 0x59, 0x9f, 0x09, 0x9e, 0x51, 0x9e, 0x09, 0x9e, 0x49, 0x9d, 0x09, 0x9e, + 0x40, 0xa6, 0x09, 0x9e, 0x39, 0xa5, 0x09, 0x9e, 0x2b, 0x01, 0xf7, 0x6c, + 0xa4, 0x09, 0x9e, 0x1b, 0x01, 0xf7, 0x70, 0xa3, 0x09, 0x9e, 0x11, 0xa2, + 0x09, 0x9e, 0x09, 0xa1, 0x09, 0x9d, 0xfb, 0x01, 0xf7, 0x74, 0xa0, 0x09, + 0x9d, 0xf1, 0x9f, 0x09, 0x9d, 0xe9, 0x9e, 0x09, 0x9d, 0xe1, 0x9d, 0x09, + 0x9d, 0xd2, 0x01, 0xf7, 0x78, 0xa6, 0x09, 0x9d, 0xc3, 0x01, 0xf7, 0x7c, + 0xa5, 0x09, 0x9d, 0xb9, 0xa4, 0x09, 0x9d, 0xb1, 0xa3, 0x09, 0x9d, 0xa9, + 0xa2, 0x09, 0x9d, 0xa1, 0xa1, 0x09, 0x9d, 0x99, 0xa0, 0x09, 0x9d, 0x8b, + 0x01, 0xf7, 0x80, 0x9f, 0x09, 0x9d, 0x81, 0x9e, 0x09, 0x9d, 0x78, 0x9f, + 0x09, 0x9b, 0x09, 0x9e, 0x09, 0x9b, 0x01, 0x9d, 0x09, 0x9a, 0xf8, 0xa6, + 0x09, 0x9a, 0xf1, 0xa5, 0x09, 0x9a, 0xe9, 0xa4, 0x09, 0x9a, 0xe1, 0xa3, + 0x09, 0x9a, 0xd9, 0xa2, 0x09, 0x9a, 0xd1, 0xa1, 0x09, 0x9a, 0xc9, 0xa0, + 0x09, 0x9a, 0xc1, 0x9f, 0x09, 0x9a, 0xb3, 0x01, 0xf7, 0x84, 0x9e, 0x09, + 0x9a, 0xa9, 0x9d, 0x09, 0x9a, 0xa0, 0xa6, 0x09, 0x9a, 0x93, 0x01, 0xf7, + 0x88, 0xa5, 0x09, 0x9a, 0x89, 0xa4, 0x09, 0x9a, 0x81, 0xa3, 0x09, 0x9a, + 0x79, 0xa2, 0x09, 0x9a, 0x71, 0xa1, 0x09, 0x9a, 0x69, 0xa0, 0x09, 0x9a, + 0x5b, 0x01, 0xf7, 0x8c, 0x9f, 0x09, 0x9a, 0x51, 0x9e, 0x09, 0x9a, 0x49, + 0x9d, 0x09, 0x9a, 0x40, 0xa6, 0x09, 0x9a, 0x39, 0xa5, 0x09, 0x9a, 0x31, + 0xa4, 0x09, 0x9a, 0x29, 0xa3, 0x09, 0x9a, 0x21, 0xa2, 0x09, 0x9a, 0x19, + 0xa1, 0x09, 0x9a, 0x11, 0xa0, 0x09, 0x9a, 0x09, 0x9f, 0x09, 0x9a, 0x01, + 0x9e, 0x09, 0x99, 0xf9, 0x9d, 0x09, 0x99, 0xf0, 0xa6, 0x09, 0x99, 0xe9, + 0xa5, 0x09, 0x99, 0xe1, 0xa4, 0x09, 0x99, 0xd9, 0xa3, 0x09, 0x99, 0xc3, + 0x01, 0xf7, 0x90, 0xa2, 0x09, 0x99, 0xb9, 0xa1, 0x09, 0x99, 0xb1, 0xa0, + 0x09, 0x99, 0xa9, 0x9f, 0x09, 0x99, 0xa1, 0x9e, 0x09, 0x99, 0x98, 0xa3, + 0x09, 0x99, 0x91, 0xa2, 0x09, 0x99, 0x89, 0xa1, 0x09, 0x99, 0x81, 0xa0, + 0x09, 0x99, 0x73, 0x01, 0xf7, 0x98, 0x9f, 0x09, 0x99, 0x63, 0x01, 0xf7, + 0x9c, 0x9e, 0x09, 0x99, 0x59, 0x9d, 0x09, 0x99, 0x50, 0xa6, 0x09, 0x99, + 0x49, 0xa5, 0x09, 0x99, 0x41, 0xa4, 0x09, 0x99, 0x39, 0xa3, 0x09, 0x99, + 0x31, 0xa2, 0x09, 0x99, 0x29, 0xa1, 0x09, 0x99, 0x21, 0xa0, 0x09, 0x99, + 0x19, 0x9f, 0x09, 0x99, 0x11, 0x9e, 0x09, 0x99, 0x09, 0x9d, 0x09, 0x99, + 0x00, 0xa6, 0x09, 0x98, 0xf9, 0xa5, 0x09, 0x98, 0xf1, 0xa4, 0x09, 0x98, + 0xe9, 0xa3, 0x09, 0x98, 0xdb, 0x01, 0xf7, 0xa0, 0xa2, 0x09, 0x98, 0xd1, + 0xa1, 0x09, 0x98, 0xc9, 0xa0, 0x09, 0x98, 0xc1, 0x9f, 0x09, 0x98, 0xb9, + 0x9e, 0x09, 0x98, 0xab, 0x01, 0xf7, 0xa4, 0x9d, 0x09, 0x98, 0xa0, 0xa6, + 0x09, 0x98, 0x93, 0x01, 0xf7, 0xa8, 0xa5, 0x09, 0x98, 0x83, 0x01, 0xf7, + 0xac, 0xa4, 0x09, 0x98, 0x73, 0x01, 0xf7, 0xb0, 0xa3, 0x09, 0x98, 0x69, + 0xa2, 0x09, 0x98, 0x61, 0xa1, 0x09, 0x98, 0x59, 0xa0, 0x09, 0x98, 0x4b, + 0x01, 0xf7, 0xb4, 0x9f, 0x09, 0x98, 0x41, 0x9e, 0x09, 0x98, 0x38, 0xa3, + 0x09, 0x98, 0x31, 0xa2, 0x09, 0x98, 0x29, 0xa1, 0x09, 0x98, 0x21, 0xa0, + 0x09, 0x98, 0x19, 0x9f, 0x09, 0x98, 0x11, 0x9e, 0x09, 0x98, 0x09, 0x9d, + 0x09, 0x98, 0x00, 0xa6, 0x09, 0x97, 0xf9, 0xa5, 0x09, 0x97, 0xf1, 0xa4, + 0x09, 0x97, 0xe9, 0xa3, 0x09, 0x97, 0xe1, 0xa2, 0x09, 0x97, 0xd3, 0x01, + 0xf7, 0xb8, 0xa1, 0x09, 0x97, 0xc9, 0xa0, 0x09, 0x97, 0xc1, 0x9f, 0x09, + 0x97, 0xb9, 0x9e, 0x09, 0x97, 0xb1, 0x9d, 0x09, 0x97, 0xa8, 0xa6, 0x09, + 0x97, 0xa1, 0xa5, 0x09, 0x97, 0x99, 0xa4, 0x09, 0x97, 0x91, 0xa3, 0x09, + 0x97, 0x7b, 0x01, 0xf7, 0xbc, 0xa2, 0x09, 0x97, 0x71, 0xa1, 0x09, 0x97, + 0x69, 0xa0, 0x09, 0x97, 0x61, 0x9f, 0x09, 0x97, 0x59, 0x9e, 0x09, 0x97, + 0x51, 0x9d, 0x09, 0x97, 0x48, 0xa6, 0x09, 0x97, 0x41, 0xa5, 0x09, 0x97, + 0x39, 0xa4, 0x09, 0x97, 0x2b, 0x01, 0xf7, 0xc4, 0xa3, 0x09, 0x97, 0x21, + 0xa2, 0x09, 0x97, 0x19, 0xa1, 0x09, 0x97, 0x03, 0x01, 0xf7, 0xc8, 0xa0, + 0x09, 0x96, 0xf9, 0x9f, 0x09, 0x96, 0xf1, 0x9e, 0x09, 0x96, 0xe9, 0x9d, + 0x09, 0x96, 0xe0, 0xa6, 0x09, 0x96, 0xd9, 0xa5, 0x09, 0x96, 0xd1, 0xa4, + 0x09, 0x96, 0xc9, 0xa3, 0x09, 0x96, 0xbb, 0x01, 0xf7, 0xd0, 0xa2, 0x09, + 0x96, 0xb1, 0xa1, 0x09, 0x96, 0xa9, 0xa0, 0x09, 0x96, 0xa1, 0x9f, 0x09, + 0x96, 0x93, 0x01, 0xf7, 0xd4, 0x9e, 0x09, 0x96, 0x88, 0xa6, 0x09, 0x96, + 0x81, 0xa5, 0x09, 0x96, 0x79, 0xa4, 0x09, 0x96, 0x71, 0xa3, 0x09, 0x96, + 0x69, 0xa2, 0x09, 0x96, 0x61, 0xa1, 0x09, 0x96, 0x59, 0xa0, 0x09, 0x96, + 0x51, 0x9f, 0x09, 0x96, 0x49, 0x9e, 0x09, 0x96, 0x41, 0x9d, 0x09, 0x96, + 0x38, 0xa6, 0x09, 0x96, 0x31, 0xa5, 0x09, 0x96, 0x29, 0xa4, 0x09, 0x96, + 0x21, 0xa3, 0x09, 0x96, 0x13, 0x01, 0xf7, 0xd8, 0xa2, 0x09, 0x96, 0x09, + 0xa1, 0x09, 0x96, 0x01, 0xa0, 0x09, 0x95, 0xf9, 0x9f, 0x09, 0x95, 0xf1, + 0x9e, 0x09, 0x95, 0xe9, 0x9d, 0x09, 0x95, 0xda, 0x01, 0xf7, 0xdc, 0xa6, + 0x09, 0x95, 0xd1, 0xa5, 0x09, 0x95, 0xc9, 0xa4, 0x09, 0x95, 0xc1, 0xa3, + 0x09, 0x95, 0xb9, 0xa2, 0x09, 0x95, 0xb1, 0xa1, 0x09, 0x95, 0xa9, 0xa0, + 0x09, 0x95, 0x93, 0x01, 0xf7, 0xe0, 0x9f, 0x09, 0x95, 0x83, 0x01, 0xf7, + 0xe8, 0x9e, 0x09, 0x95, 0x78, 0x9e, 0x09, 0x95, 0x39, 0x9d, 0x09, 0x95, + 0x30, 0xa6, 0x09, 0x95, 0x29, 0xa5, 0x09, 0x95, 0x21, 0xa4, 0x09, 0x95, + 0x19, 0xa3, 0x09, 0x95, 0x11, 0xa2, 0x09, 0x95, 0x09, 0xa1, 0x09, 0x95, + 0x01, 0xa0, 0x09, 0x94, 0xf3, 0x01, 0xf7, 0xec, 0x9f, 0x09, 0x94, 0xe9, + 0x9e, 0x09, 0x94, 0xda, 0x01, 0xf7, 0xf0, 0x1f, 0xc1, 0xf7, 0xf4, 0x1e, + 0xc1, 0xf8, 0x03, 0x1d, 0x41, 0xf8, 0x34, 0xc2, 0xdc, 0x39, 0x09, 0x91, + 0xa9, 0x1e, 0xc1, 0xf8, 0x58, 0x1d, 0x41, 0xf8, 0x83, 0x21, 0xc1, 0xf8, + 0xaa, 0x20, 0xc1, 0xf8, 0xb6, 0x1f, 0xc1, 0xf8, 0xea, 0x1e, 0xc1, 0xf9, + 0x15, 0x1d, 0x41, 0xf9, 0x40, 0xa1, 0x09, 0x8f, 0x71, 0xa0, 0x09, 0x8f, + 0x69, 0x9f, 0x09, 0x8f, 0x61, 0x9e, 0x09, 0x8f, 0x59, 0x9d, 0x09, 0x8f, + 0x4a, 0x01, 0xf9, 0x64, 0xa6, 0x09, 0x8f, 0x41, 0xa5, 0x09, 0x8f, 0x39, + 0xa4, 0x09, 0x8f, 0x31, 0xa3, 0x09, 0x8f, 0x29, 0xa2, 0x09, 0x8f, 0x21, + 0xa1, 0x09, 0x8f, 0x19, 0xa0, 0x09, 0x8f, 0x03, 0x01, 0xf9, 0x68, 0x9f, + 0x09, 0x8e, 0xf9, 0x9e, 0x09, 0x8e, 0xeb, 0x01, 0xf9, 0x70, 0x9d, 0x09, + 0x8e, 0xe0, 0xa6, 0x09, 0x8e, 0xd9, 0xa5, 0x09, 0x8e, 0xcb, 0x01, 0xf9, + 0x74, 0xa4, 0x09, 0x8e, 0xc1, 0xa3, 0x09, 0x8e, 0xb9, 0xa2, 0x09, 0x8e, + 0xb1, 0xa1, 0x09, 0x8e, 0xa3, 0x01, 0xf9, 0x78, 0xa0, 0x09, 0x8e, 0x99, + 0x9f, 0x09, 0x8e, 0x8b, 0x01, 0xf9, 0x7c, 0x9e, 0x09, 0x8e, 0x81, 0x9d, + 0x09, 0x8e, 0x78, 0xa6, 0x09, 0x8e, 0x71, 0xa5, 0x09, 0x8e, 0x69, 0xa4, + 0x09, 0x8e, 0x5b, 0x01, 0xf9, 0x80, 0xa3, 0x09, 0x8e, 0x4b, 0x01, 0xf9, + 0x84, 0xa2, 0x09, 0x8e, 0x3b, 0x01, 0xf9, 0x88, 0xa1, 0x09, 0x8e, 0x31, + 0xa0, 0x09, 0x8e, 0x29, 0x9f, 0x09, 0x8d, 0xe3, 0x01, 0xf9, 0x8c, 0x9e, + 0x09, 0x8d, 0xd9, 0x9d, 0x09, 0x8d, 0xca, 0x01, 0xf9, 0xac, 0xa6, 0x09, + 0x8d, 0xc1, 0xa5, 0x09, 0x8d, 0xb9, 0xa4, 0x09, 0x8d, 0xb1, 0xa3, 0x09, + 0x8d, 0xa9, 0xa2, 0x09, 0x8d, 0xa1, 0xa1, 0x09, 0x8d, 0x99, 0xa0, 0x09, + 0x8d, 0x8b, 0x01, 0xf9, 0xb0, 0x9f, 0x09, 0x8d, 0x81, 0x9e, 0x09, 0x8d, + 0x6a, 0x01, 0xf9, 0xb4, 0x83, 0x09, 0x8d, 0x50, 0x83, 0x09, 0x8d, 0x28, + 0xa1, 0x09, 0x8b, 0xf1, 0xa0, 0x09, 0x8b, 0xe9, 0x9f, 0x09, 0x8b, 0xe1, + 0x9e, 0x09, 0x8b, 0xd9, 0x9d, 0x09, 0x8b, 0xd0, 0xa6, 0x09, 0x8b, 0xc9, + 0xa5, 0x09, 0x8b, 0xc1, 0xa4, 0x09, 0x8b, 0xb9, 0xa3, 0x09, 0x8b, 0xb1, + 0xa2, 0x09, 0x8b, 0xa3, 0x01, 0xf9, 0xbc, 0xa1, 0x09, 0x8b, 0x99, 0xa0, + 0x09, 0x8b, 0x8b, 0x01, 0xf9, 0xc0, 0x9f, 0x09, 0x8b, 0x81, 0x9e, 0x09, + 0x8b, 0x79, 0x9d, 0x09, 0x8b, 0x70, 0xa6, 0x09, 0x8b, 0x69, 0xa5, 0x09, + 0x8b, 0x61, 0xa4, 0x09, 0x8b, 0x53, 0x01, 0xf9, 0xc4, 0xa3, 0x09, 0x8b, + 0x43, 0x01, 0xf9, 0xc8, 0xa2, 0x09, 0x8b, 0x39, 0xa1, 0x09, 0x8b, 0x31, + 0xa0, 0x09, 0x8b, 0x29, 0x9f, 0x09, 0x8b, 0x21, 0x9e, 0x09, 0x8b, 0x19, + 0x9d, 0x09, 0x8b, 0x10, 0xa6, 0x09, 0x8b, 0x09, 0xa5, 0x09, 0x8b, 0x01, + 0xa4, 0x09, 0x8a, 0xf9, 0xa3, 0x09, 0x8a, 0xeb, 0x01, 0xf9, 0xcc, 0xa2, + 0x09, 0x8a, 0xe1, 0xa1, 0x09, 0x8a, 0xd9, 0xa0, 0x09, 0x8a, 0xd1, 0x9f, + 0x09, 0x8a, 0xc9, 0x9e, 0x09, 0x8a, 0xc1, 0x9d, 0x09, 0x8a, 0xb2, 0x01, + 0xf9, 0xd0, 0xa6, 0x09, 0x8a, 0xa9, 0xa5, 0x09, 0x8a, 0xa1, 0xa4, 0x09, + 0x8a, 0x99, 0xa3, 0x09, 0x8a, 0x91, 0xa2, 0x09, 0x8a, 0x89, 0xa1, 0x09, + 0x8a, 0x81, 0xa0, 0x09, 0x8a, 0x79, 0x9f, 0x09, 0x8a, 0x71, 0x9e, 0x09, + 0x8a, 0x63, 0x01, 0xf9, 0xd4, 0x9d, 0x09, 0x8a, 0x58, 0xa6, 0x09, 0x8a, + 0x51, 0xa5, 0x09, 0x8a, 0x49, 0xa4, 0x09, 0x8a, 0x33, 0x01, 0xf9, 0xd8, + 0xa3, 0x09, 0x8a, 0x23, 0x01, 0xf9, 0xe0, 0xa2, 0x09, 0x8a, 0x19, 0xa1, + 0x09, 0x8a, 0x11, 0xa0, 0x09, 0x8a, 0x09, 0x9f, 0x09, 0x8a, 0x01, 0x9e, + 0x09, 0x89, 0xf8, 0xa0, 0x09, 0x89, 0xf1, 0x9f, 0x09, 0x89, 0xe9, 0x9e, + 0x09, 0x89, 0xcb, 0x01, 0xf9, 0xe4, 0x9d, 0x09, 0x89, 0xc0, 0xa6, 0x09, + 0x89, 0xb9, 0xa5, 0x09, 0x89, 0xb1, 0xa4, 0x09, 0x89, 0xa3, 0x01, 0xf9, + 0xf0, 0xa3, 0x09, 0x89, 0x93, 0x01, 0xf9, 0xf4, 0xa2, 0x09, 0x89, 0x83, + 0x01, 0xf9, 0xf8, 0xa1, 0x09, 0x89, 0x79, 0xa0, 0x09, 0x89, 0x71, 0x9f, + 0x09, 0x89, 0x69, 0x9e, 0x09, 0x89, 0x61, 0x9d, 0x09, 0x89, 0x58, 0xa6, + 0x09, 0x89, 0x51, 0xa5, 0x09, 0x89, 0x43, 0x01, 0xf9, 0xfc, 0xa4, 0x09, + 0x89, 0x33, 0x01, 0xfa, 0x00, 0xa3, 0x09, 0x89, 0x29, 0xa2, 0x09, 0x89, + 0x21, 0xa1, 0x09, 0x89, 0x19, 0xa0, 0x09, 0x89, 0x11, 0x9f, 0x09, 0x89, + 0x09, 0x9e, 0x09, 0x88, 0xfb, 0x01, 0xfa, 0x04, 0x9d, 0x09, 0x88, 0xf0, + 0xa6, 0x09, 0x88, 0xe9, 0xa5, 0x09, 0x88, 0xe1, 0xa4, 0x09, 0x88, 0xd9, + 0xa3, 0x09, 0x88, 0xd1, 0xa2, 0x09, 0x88, 0xc9, 0xa1, 0x09, 0x88, 0xc1, + 0xa0, 0x09, 0x88, 0xb9, 0x9f, 0x09, 0x88, 0xb1, 0x9e, 0x09, 0x88, 0xa3, + 0x01, 0xfa, 0x08, 0x9d, 0x09, 0x88, 0x98, 0xa6, 0x09, 0x88, 0x91, 0xa5, + 0x09, 0x88, 0x89, 0xa4, 0x09, 0x88, 0x81, 0xa3, 0x09, 0x88, 0x79, 0xa2, + 0x09, 0x88, 0x71, 0xa1, 0x09, 0x88, 0x69, 0xa0, 0x09, 0x88, 0x5b, 0x01, + 0xfa, 0x0c, 0x9f, 0x09, 0x88, 0x51, 0x9e, 0x09, 0x88, 0x49, 0x9d, 0x09, + 0x88, 0x40, 0xa6, 0x09, 0x88, 0x39, 0xa5, 0x09, 0x88, 0x31, 0xa4, 0x09, + 0x88, 0x29, 0xa3, 0x09, 0x88, 0x21, 0xa2, 0x09, 0x88, 0x19, 0xa1, 0x09, + 0x88, 0x11, 0xa0, 0x09, 0x88, 0x09, 0x9f, 0x09, 0x88, 0x01, 0x9e, 0x09, + 0x87, 0xf2, 0x01, 0xfa, 0x10, 0xa4, 0x09, 0x86, 0x4b, 0x01, 0xfa, 0x14, + 0xa3, 0x09, 0x86, 0x41, 0xa2, 0x09, 0x86, 0x39, 0xa1, 0x09, 0x86, 0x31, + 0xa0, 0x09, 0x86, 0x29, 0x9f, 0x09, 0x86, 0x21, 0x9e, 0x09, 0x86, 0x19, + 0x9d, 0x09, 0x86, 0x10, 0xa6, 0x09, 0x86, 0x09, 0xa5, 0x09, 0x86, 0x01, + 0xa4, 0x09, 0x85, 0xf9, 0xa3, 0x09, 0x85, 0xf1, 0xa2, 0x09, 0x85, 0xe9, + 0xa1, 0x09, 0x85, 0xdb, 0x01, 0xfa, 0x34, 0xa0, 0x09, 0x85, 0xd1, 0x9f, + 0x09, 0x85, 0xc3, 0x01, 0xfa, 0x38, 0x9e, 0x09, 0x85, 0xb9, 0x9d, 0x09, + 0x85, 0x6a, 0x01, 0xfa, 0x3c, 0xa6, 0x09, 0x85, 0x61, 0xa5, 0x09, 0x85, + 0x53, 0x01, 0xfa, 0x60, 0xa4, 0x09, 0x85, 0x49, 0xa3, 0x09, 0x85, 0x3b, + 0x01, 0xfa, 0x64, 0xa2, 0x09, 0x85, 0x31, 0xa1, 0x09, 0x85, 0x29, 0xa0, + 0x09, 0x85, 0x21, 0x9f, 0x09, 0x85, 0x19, 0x9e, 0x09, 0x85, 0x11, 0x9d, + 0x09, 0x85, 0x08, 0xa6, 0x09, 0x85, 0x01, 0xa5, 0x09, 0x84, 0xf9, 0xa4, + 0x09, 0x84, 0xf1, 0xa3, 0x09, 0x84, 0xe9, 0xa2, 0x09, 0x84, 0xe1, 0xa1, + 0x09, 0x84, 0xd3, 0x01, 0xfa, 0x68, 0xa0, 0x09, 0x84, 0xc9, 0x9f, 0x09, + 0x84, 0xc1, 0x9e, 0x09, 0x84, 0xb3, 0x01, 0xfa, 0x6c, 0x9d, 0x09, 0x84, + 0xa8, 0xa6, 0x09, 0x84, 0xa1, 0xa5, 0x09, 0x84, 0x99, 0xa4, 0x09, 0x84, + 0x8b, 0x01, 0xfa, 0x70, 0xa3, 0x09, 0x84, 0x81, 0xa2, 0x09, 0x84, 0x79, + 0xa1, 0x09, 0x84, 0x71, 0xa0, 0x09, 0x84, 0x69, 0x9f, 0x09, 0x84, 0x61, + 0x9e, 0x09, 0x84, 0x59, 0x9d, 0x09, 0x84, 0x50, 0xa6, 0x09, 0x84, 0x49, + 0xa5, 0x09, 0x84, 0x41, 0xa4, 0x09, 0x84, 0x39, 0xa3, 0x09, 0x84, 0x31, + 0xa2, 0x09, 0x84, 0x29, 0xa1, 0x09, 0x84, 0x21, 0xa0, 0x09, 0x84, 0x19, + 0x9f, 0x09, 0x84, 0x11, 0x9e, 0x09, 0x84, 0x09, 0x9d, 0x09, 0x84, 0x00, + 0xa6, 0x09, 0x83, 0xf9, 0xa5, 0x09, 0x83, 0xeb, 0x01, 0xfa, 0x74, 0xa4, + 0x09, 0x83, 0xe1, 0xa3, 0x09, 0x83, 0xd9, 0xa2, 0x09, 0x83, 0xd1, 0xa1, + 0x09, 0x83, 0xc9, 0xa0, 0x09, 0x83, 0xc1, 0x9f, 0x09, 0x83, 0xb9, 0x9e, + 0x09, 0x83, 0xb0, 0xa1, 0x09, 0x83, 0xa9, 0xa0, 0x09, 0x83, 0xa1, 0x9f, + 0x09, 0x83, 0x99, 0x9e, 0x09, 0x83, 0x91, 0x9d, 0x09, 0x83, 0x88, 0xa6, + 0x09, 0x83, 0x81, 0xa5, 0x09, 0x83, 0x79, 0xa4, 0x09, 0x83, 0x71, 0xa3, + 0x09, 0x83, 0x69, 0xa2, 0x09, 0x83, 0x61, 0xa1, 0x09, 0x83, 0x59, 0xa0, + 0x09, 0x83, 0x51, 0x9f, 0x09, 0x83, 0x49, 0x9e, 0x09, 0x83, 0x41, 0x9d, + 0x09, 0x83, 0x32, 0x01, 0xfa, 0x78, 0xa6, 0x09, 0x83, 0x29, 0xa5, 0x09, + 0x83, 0x21, 0xa4, 0x09, 0x83, 0x19, 0xa3, 0x09, 0x83, 0x11, 0xa2, 0x09, + 0x83, 0x09, 0xa1, 0x09, 0x83, 0x01, 0xa0, 0x09, 0x82, 0xf9, 0x9f, 0x09, + 0x82, 0xdb, 0x01, 0xfa, 0x7c, 0x9e, 0x09, 0x82, 0xd0, 0xcb, 0x58, 0xc7, + 0x0f, 0xbd, 0x39, 0x46, 0x01, 0xfc, 0xc1, 0xfa, 0x88, 0x15, 0xc1, 0xfa, + 0x94, 0xd4, 0x3c, 0xb4, 0x0f, 0xbd, 0xa0, 0xc4, 0x18, 0x10, 0x00, 0x37, + 0xb9, 0xc2, 0x22, 0xcc, 0x00, 0x37, 0xb0, 0xc3, 0x0d, 0x14, 0x00, 0x37, + 0xa9, 0xc3, 0x09, 0x9e, 0x00, 0x37, 0xa0, 0xc4, 0x02, 0xde, 0x00, 0x37, + 0x99, 0xc2, 0x02, 0xa0, 0x00, 0x37, 0x90, 0x97, 0x00, 0x98, 0x4b, 0x01, + 0xfa, 0xa0, 0x47, 0x23, 0x34, 0xc1, 0xfa, 0xa6, 0x83, 0x00, 0x98, 0x43, + 0x01, 0xfa, 0xc9, 0x8b, 0x00, 0x98, 0x51, 0x87, 0x00, 0x98, 0x6b, 0x01, + 0xfa, 0xcd, 0x91, 0x00, 0x98, 0x73, 0x01, 0xfa, 0xd1, 0x19, 0xc1, 0xfa, + 0xd5, 0x09, 0xc1, 0xfa, 0xe7, 0x1b, 0x41, 0xfb, 0x05, 0x0a, 0xc1, 0xfb, + 0x1f, 0x83, 0x00, 0x90, 0x03, 0x01, 0xfb, 0x41, 0x97, 0x00, 0x90, 0x09, + 0x8b, 0x00, 0x90, 0x11, 0x87, 0x00, 0x90, 0x2b, 0x01, 0xfb, 0x45, 0x91, + 0x00, 0x90, 0x32, 0x01, 0xfb, 0x49, 0x04, 0xc1, 0xfb, 0x4d, 0x83, 0x00, + 0x93, 0x03, 0x01, 0xfb, 0x67, 0x97, 0x00, 0x93, 0x09, 0x8b, 0x00, 0x93, + 0x11, 0x87, 0x00, 0x93, 0x2b, 0x01, 0xfb, 0x6b, 0x91, 0x00, 0x93, 0x33, + 0x01, 0xfb, 0x6f, 0x19, 0x41, 0xfb, 0x73, 0x05, 0xc1, 0xfb, 0x82, 0x83, + 0x00, 0x93, 0xc3, 0x01, 0xfb, 0xa0, 0x97, 0x00, 0x93, 0xc9, 0x8b, 0x00, + 0x93, 0xd1, 0x87, 0x00, 0x93, 0xeb, 0x01, 0xfb, 0xa4, 0x91, 0x00, 0x93, + 0xf3, 0x01, 0xfb, 0xa8, 0xc2, 0x01, 0x4a, 0x00, 0x93, 0xf9, 0x0a, 0x41, + 0xfb, 0xac, 0x1c, 0xc1, 0xfb, 0xcf, 0x06, 0xc1, 0xfb, 0xe8, 0x83, 0x00, + 0x97, 0x83, 0x01, 0xfc, 0x0f, 0x97, 0x00, 0x97, 0x89, 0x8b, 0x00, 0x97, + 0x91, 0x87, 0x00, 0x97, 0xab, 0x01, 0xfc, 0x13, 0x91, 0x00, 0x97, 0xb3, + 0x01, 0xfc, 0x17, 0xc2, 0x01, 0x4a, 0x00, 0x97, 0xb8, 0x42, 0x00, 0x8e, + 0xc1, 0xfc, 0x1b, 0x83, 0x00, 0x93, 0x83, 0x01, 0xfc, 0x34, 0x97, 0x00, + 0x93, 0x89, 0x8b, 0x00, 0x93, 0x91, 0x87, 0x00, 0x93, 0xab, 0x01, 0xfc, + 0x38, 0x91, 0x00, 0x93, 0xb3, 0x01, 0xfc, 0x3c, 0xc2, 0x01, 0x4a, 0x00, + 0x93, 0xb9, 0x0a, 0xc1, 0xfc, 0x40, 0x15, 0xc1, 0xfc, 0x63, 0x1c, 0x41, + 0xfc, 0x83, 0x83, 0x00, 0x90, 0x43, 0x01, 0xfc, 0xa0, 0x97, 0x00, 0x90, + 0x49, 0x8b, 0x00, 0x90, 0x51, 0x87, 0x00, 0x90, 0x6b, 0x01, 0xfc, 0xa4, + 0x91, 0x00, 0x90, 0x73, 0x01, 0xfc, 0xa8, 0xc2, 0x01, 0x4a, 0x00, 0x90, + 0x78, 0x83, 0x00, 0x90, 0xc3, 0x01, 0xfc, 0xac, 0x97, 0x00, 0x90, 0xc9, + 0x8b, 0x00, 0x90, 0xd1, 0x87, 0x00, 0x90, 0xeb, 0x01, 0xfc, 0xb0, 0x91, + 0x00, 0x90, 0xf3, 0x01, 0xfc, 0xb4, 0x19, 0xc1, 0xfc, 0xb8, 0xc2, 0x19, + 0x2c, 0x00, 0x9a, 0xc8, 0x1c, 0xc1, 0xfc, 0xc7, 0x83, 0x00, 0x91, 0x83, + 0x01, 0xfc, 0xe7, 0x97, 0x00, 0x91, 0x89, 0x8b, 0x00, 0x91, 0x91, 0x87, + 0x00, 0x91, 0xab, 0x01, 0xfc, 0xeb, 0x91, 0x00, 0x91, 0xb3, 0x01, 0xfc, + 0xf5, 0xc2, 0x01, 0x4a, 0x00, 0x91, 0xb9, 0x0a, 0xc1, 0xfc, 0xf9, 0x15, + 0x41, 0xfd, 0x1c, 0x83, 0x00, 0x91, 0x43, 0x01, 0xfd, 0x36, 0x97, 0x00, + 0x91, 0x49, 0x8b, 0x00, 0x91, 0x51, 0x87, 0x00, 0x91, 0x6b, 0x01, 0xfd, + 0x3a, 0x91, 0x00, 0x91, 0x73, 0x01, 0xfd, 0x3e, 0xc2, 0x01, 0x4a, 0x00, + 0x91, 0x79, 0xc2, 0x19, 0x2c, 0x00, 0x9a, 0xc0, 0x83, 0x00, 0x92, 0x03, + 0x01, 0xfd, 0x42, 0x97, 0x00, 0x92, 0x09, 0x8b, 0x00, 0x92, 0x11, 0x87, + 0x00, 0x92, 0x2b, 0x01, 0xfd, 0x46, 0x91, 0x00, 0x92, 0x33, 0x01, 0xfd, + 0x4a, 0x19, 0xc1, 0xfd, 0x4e, 0x0a, 0xc1, 0xfd, 0x60, 0x1b, 0x41, 0xfd, + 0x7e, 0x83, 0x00, 0x93, 0x43, 0x01, 0xfd, 0x98, 0x97, 0x00, 0x93, 0x49, + 0x8b, 0x00, 0x93, 0x51, 0x87, 0x00, 0x93, 0x6b, 0x01, 0xfd, 0x9c, 0x91, + 0x00, 0x93, 0x71, 0xc2, 0x01, 0x4a, 0x00, 0x93, 0x78, 0x83, 0x00, 0x94, + 0x03, 0x01, 0xfd, 0xa0, 0x97, 0x00, 0x94, 0x09, 0x8b, 0x00, 0x94, 0x11, + 0x87, 0x00, 0x94, 0x2b, 0x01, 0xfd, 0xa4, 0x91, 0x00, 0x94, 0x33, 0x01, + 0xfd, 0xa8, 0x19, 0xc1, 0xfd, 0xac, 0x1b, 0x41, 0xfd, 0xbe, 0x83, 0x00, + 0x94, 0x83, 0x01, 0xfd, 0xd8, 0x97, 0x00, 0x94, 0x89, 0x8b, 0x00, 0x94, + 0x91, 0x87, 0x00, 0x94, 0xab, 0x01, 0xfd, 0xdc, 0x91, 0x00, 0x94, 0xb3, + 0x01, 0xfd, 0xe0, 0xc2, 0x01, 0x4a, 0x00, 0x94, 0xb9, 0x1b, 0x41, 0xfd, + 0xe4, 0x83, 0x00, 0x95, 0x43, 0x01, 0xfe, 0x07, 0x97, 0x00, 0x95, 0x49, + 0x8b, 0x00, 0x95, 0x51, 0x87, 0x00, 0x95, 0x6b, 0x01, 0xfe, 0x0b, 0x91, + 0x00, 0x95, 0x73, 0x01, 0xfe, 0x0f, 0x19, 0xc1, 0xfe, 0x13, 0x1a, 0xc1, + 0xfe, 0x25, 0x1b, 0x41, 0xfe, 0x43, 0x83, 0x00, 0x96, 0x43, 0x01, 0xfe, + 0x5d, 0x97, 0x00, 0x96, 0x49, 0x8b, 0x00, 0x96, 0x51, 0x87, 0x00, 0x96, + 0x6b, 0x01, 0xfe, 0x61, 0x91, 0x00, 0x96, 0x72, 0x01, 0xfe, 0x65, 0x0a, + 0xc1, 0xfe, 0x69, 0x83, 0x00, 0x9a, 0x83, 0x01, 0xfe, 0x8c, 0x97, 0x00, + 0x9a, 0x89, 0x8b, 0x00, 0x9a, 0x91, 0x87, 0x00, 0x9a, 0xab, 0x01, 0xfe, + 0x90, 0x91, 0x00, 0x9a, 0xb3, 0x01, 0xfe, 0x94, 0x19, 0x41, 0xfe, 0x98, + 0x83, 0x00, 0x96, 0xc3, 0x01, 0xfe, 0xa7, 0x97, 0x00, 0x96, 0xc9, 0x8b, + 0x00, 0x96, 0xd1, 0x87, 0x00, 0x96, 0xeb, 0x01, 0xfe, 0xab, 0x91, 0x00, + 0x96, 0xf3, 0x01, 0xfe, 0xaf, 0xc2, 0x01, 0x4a, 0x00, 0x96, 0xf9, 0x0a, + 0xc1, 0xfe, 0xb3, 0x1c, 0x41, 0xfe, 0xd3, 0x83, 0x00, 0x97, 0x43, 0x01, + 0xfe, 0xed, 0x97, 0x00, 0x97, 0x49, 0x8b, 0x00, 0x97, 0x51, 0x87, 0x00, + 0x97, 0x6b, 0x01, 0xfe, 0xf1, 0x91, 0x00, 0x97, 0x72, 0x01, 0xfe, 0xf5, + 0x83, 0x00, 0x98, 0x03, 0x01, 0xfe, 0xf9, 0x97, 0x00, 0x98, 0x09, 0x8b, + 0x00, 0x98, 0x11, 0x87, 0x00, 0x98, 0x2b, 0x01, 0xfe, 0xfd, 0x91, 0x00, + 0x98, 0x33, 0x01, 0xff, 0x01, 0xc2, 0x01, 0x4a, 0x00, 0x98, 0x38, 0x83, + 0x00, 0x9a, 0x43, 0x01, 0xff, 0x05, 0x97, 0x00, 0x9a, 0x49, 0x8b, 0x00, + 0x9a, 0x51, 0x87, 0x00, 0x9a, 0x6b, 0x01, 0xff, 0x09, 0x91, 0x00, 0x9a, + 0x71, 0x19, 0xc1, 0xff, 0x0d, 0xc2, 0x19, 0x2c, 0x00, 0x9a, 0xd0, 0x4b, + 0x63, 0xff, 0xc1, 0xff, 0x1c, 0xd1, 0x36, 0x4b, 0x00, 0x9a, 0xf0, 0xc9, + 0x57, 0x20, 0x00, 0x9b, 0xe0, 0xc6, 0xce, 0xc9, 0x00, 0x9c, 0xc0, 0x48, + 0x6e, 0x42, 0xc1, 0xff, 0x28, 0x45, 0x00, 0x8c, 0x41, 0xff, 0x34, 0xc5, + 0x01, 0xa2, 0x01, 0x18, 0x09, 0xc5, 0xd8, 0x53, 0x0f, 0xa9, 0x31, 0xc4, + 0xe3, 0xdb, 0x0f, 0xa8, 0x61, 0xca, 0xa5, 0x94, 0x0f, 0xa5, 0x08, 0xc2, + 0x39, 0x8b, 0x08, 0x7f, 0xa9, 0xc3, 0x1e, 0x1b, 0x08, 0x7f, 0x40, 0xc3, + 0x11, 0xef, 0x08, 0x7f, 0xa1, 0x03, 0x41, 0xff, 0x58, 0xc2, 0x00, 0x8e, + 0x08, 0x7f, 0x38, 0xc4, 0x36, 0xb5, 0x08, 0x7f, 0x01, 0xc3, 0x16, 0x5a, + 0x08, 0x7f, 0x78, 0x87, 0x08, 0x29, 0x29, 0xc4, 0x38, 0x2c, 0x08, 0x29, + 0x30, 0xd6, 0x2e, 0x6a, 0x01, 0x39, 0xb9, 0xcd, 0x0e, 0x61, 0x01, 0x39, + 0xa9, 0xca, 0x22, 0x51, 0x01, 0x39, 0xa0, 0xc2, 0x00, 0x55, 0x01, 0x10, + 0x71, 0xcb, 0x6d, 0x97, 0x00, 0x04, 0xb8, 0xcb, 0x98, 0xd1, 0x00, 0x00, + 0x23, 0x01, 0xff, 0x64, 0xc3, 0x09, 0x3f, 0x00, 0x00, 0x18, 0x43, 0x05, + 0xb2, 0xc1, 0xff, 0x6a, 0xcd, 0x76, 0x76, 0x01, 0x12, 0xe8, 0x00, 0x41, + 0xff, 0x82, 0xc4, 0x18, 0x10, 0x08, 0xed, 0x39, 0xc2, 0x22, 0xcc, 0x08, + 0xed, 0x30, 0xc3, 0x0d, 0x14, 0x08, 0xed, 0x29, 0xc3, 0x09, 0x9e, 0x08, + 0xed, 0x20, 0xc4, 0x02, 0xde, 0x08, 0xed, 0x19, 0xc2, 0x02, 0xa0, 0x08, + 0xed, 0x10, 0x03, 0xc1, 0xff, 0x8c, 0xc2, 0x01, 0x24, 0x08, 0xec, 0x99, + 0xc2, 0x02, 0xe0, 0x08, 0xec, 0x81, 0x97, 0x08, 0xec, 0x6b, 0x01, 0xff, + 0x98, 0x8b, 0x08, 0xec, 0x5a, 0x01, 0xff, 0x9c, 0xc2, 0x00, 0xd0, 0x08, + 0xec, 0x31, 0x83, 0x08, 0xec, 0x28, 0xc2, 0x01, 0x30, 0x08, 0xec, 0x21, + 0x83, 0x08, 0xeb, 0xd0, 0x06, 0xc1, 0xff, 0xa0, 0xc2, 0x00, 0xd0, 0x08, + 0xeb, 0xc9, 0x83, 0x08, 0xeb, 0xc0, 0xc2, 0x00, 0xd0, 0x08, 0xec, 0x09, + 0x83, 0x08, 0xec, 0x00, 0xc2, 0x00, 0xdb, 0x08, 0xeb, 0xf9, 0x83, 0x08, + 0xeb, 0xa8, 0x16, 0xc1, 0xff, 0xaa, 0xc2, 0x00, 0xd0, 0x08, 0xeb, 0xa1, + 0x83, 0x08, 0xeb, 0x98, 0xc2, 0x00, 0xd0, 0x08, 0xeb, 0xe1, 0x83, 0x08, + 0xeb, 0xd8, 0xc2, 0x00, 0xd0, 0x08, 0xeb, 0xb9, 0x83, 0x08, 0xeb, 0xb0, + 0xc2, 0x00, 0xd0, 0x08, 0xeb, 0x91, 0x83, 0x08, 0xeb, 0x88, 0xc2, 0x00, + 0xd0, 0x08, 0xeb, 0x79, 0x83, 0x08, 0xeb, 0x70, 0x97, 0x08, 0xeb, 0x59, + 0x8b, 0x08, 0xeb, 0x41, 0x83, 0x08, 0xeb, 0x08, 0x97, 0x08, 0xeb, 0x28, + 0x8b, 0x08, 0xeb, 0x18, 0xc5, 0x40, 0xe7, 0x00, 0x50, 0x19, 0xc4, 0x1e, + 0x97, 0x00, 0x52, 0x68, 0x83, 0x00, 0x50, 0x31, 0x8b, 0x00, 0x50, 0x81, + 0x97, 0x00, 0x50, 0xa0, 0x8b, 0x00, 0x50, 0x40, 0x97, 0x00, 0x50, 0x50, + 0x83, 0x00, 0x50, 0xa9, 0x0a, 0x41, 0xff, 0xb4, 0x83, 0x00, 0x50, 0xb9, + 0x0a, 0x41, 0xff, 0xbe, 0xc2, 0x01, 0x30, 0x00, 0x50, 0xc9, 0xc2, 0x19, + 0x2c, 0x00, 0x50, 0xf1, 0xc2, 0x00, 0xc1, 0x00, 0x51, 0x19, 0x83, 0x00, + 0x51, 0x40, 0x83, 0x00, 0x50, 0xd1, 0xc2, 0x00, 0xd0, 0x00, 0x50, 0xd8, + 0x83, 0x00, 0x50, 0xe1, 0xc2, 0x00, 0xd0, 0x00, 0x50, 0xe8, 0x16, 0xc1, + 0xff, 0xc8, 0x83, 0x00, 0x51, 0x21, 0xc2, 0x00, 0xd0, 0x00, 0x51, 0x28, + 0x06, 0xc1, 0xff, 0xd2, 0x83, 0x00, 0x51, 0x31, 0xc2, 0x00, 0xd0, 0x00, + 0x51, 0x38, 0x83, 0x00, 0x51, 0x51, 0xc2, 0x00, 0xd0, 0x00, 0x51, 0x58, + 0x83, 0x00, 0x51, 0x61, 0xc2, 0x00, 0xd0, 0x00, 0x51, 0x68, 0x83, 0x00, + 0x51, 0x81, 0xc2, 0x00, 0x39, 0x00, 0x52, 0xe0, 0x83, 0x00, 0x51, 0x91, + 0xc2, 0x00, 0xdb, 0x00, 0x51, 0x98, 0xc2, 0x00, 0xd0, 0x00, 0x51, 0xb1, + 0x83, 0x00, 0x51, 0xc0, 0x83, 0x00, 0x51, 0xf1, 0x8b, 0x00, 0x52, 0x41, + 0x97, 0x00, 0x52, 0x60, 0x8b, 0x00, 0x52, 0x00, 0x97, 0x00, 0x52, 0x10, + 0xc2, 0x02, 0xa0, 0x00, 0x53, 0x41, 0xc4, 0x02, 0xde, 0x00, 0x53, 0x48, + 0xc3, 0x09, 0x9e, 0x00, 0x53, 0x51, 0xc3, 0x0d, 0x14, 0x00, 0x53, 0x58, + 0xc2, 0x22, 0xcc, 0x00, 0x53, 0x61, 0xc4, 0x18, 0x10, 0x00, 0x53, 0x68, + 0xca, 0x1e, 0x8a, 0x00, 0x54, 0x09, 0xd1, 0x33, 0x57, 0x00, 0x57, 0xf0, + 0xc7, 0x14, 0x39, 0x00, 0x54, 0x11, 0xc7, 0x7a, 0x7f, 0x00, 0x55, 0xe8, + 0xc5, 0x40, 0xe7, 0x00, 0x54, 0x19, 0xc4, 0x1e, 0x97, 0x00, 0x56, 0x68, + 0xc4, 0xdb, 0xfb, 0x00, 0x57, 0xd1, 0xc5, 0xd7, 0x18, 0x00, 0x57, 0xd8, + 0xd4, 0x3a, 0x84, 0x00, 0x57, 0xe9, 0xd5, 0x33, 0x53, 0x00, 0x57, 0xf8, + 0x83, 0x00, 0x54, 0x31, 0x8b, 0x00, 0x54, 0x81, 0x97, 0x00, 0x54, 0xa0, + 0x8b, 0x00, 0x54, 0x40, 0x97, 0x00, 0x54, 0x50, 0x47, 0xb2, 0x2e, 0xc1, + 0xff, 0xdc, 0x83, 0x00, 0x55, 0xa8, 0x83, 0x00, 0x54, 0xa9, 0xc2, 0x00, + 0xd0, 0x00, 0x54, 0xb0, 0x83, 0x00, 0x54, 0xb9, 0xc2, 0x00, 0xd0, 0x00, + 0x54, 0xc0, 0xc2, 0x01, 0x30, 0x00, 0x54, 0xc9, 0xc2, 0x19, 0x2c, 0x00, + 0x54, 0xf1, 0xc2, 0x00, 0xc1, 0x00, 0x55, 0x19, 0x83, 0x00, 0x55, 0x40, + 0x83, 0x00, 0x54, 0xd1, 0xc2, 0x00, 0xd0, 0x00, 0x54, 0xd8, 0x83, 0x00, + 0x54, 0xe1, 0xc2, 0x00, 0xd0, 0x00, 0x54, 0xe8, 0x16, 0xc1, 0xff, 0xea, + 0x83, 0x00, 0x55, 0x21, 0xc2, 0x00, 0xd0, 0x00, 0x55, 0x28, 0x06, 0xc1, + 0xff, 0xf4, 0x83, 0x00, 0x55, 0x31, 0xc2, 0x00, 0xd0, 0x00, 0x55, 0x38, + 0x83, 0x00, 0x55, 0x51, 0xc2, 0x00, 0xd0, 0x00, 0x55, 0x58, 0x83, 0x00, + 0x55, 0x61, 0xc2, 0x00, 0xd0, 0x00, 0x55, 0x68, 0x83, 0x00, 0x55, 0x91, + 0xc2, 0x00, 0xdb, 0x00, 0x55, 0x98, 0xc2, 0x00, 0xd0, 0x00, 0x55, 0xb1, + 0xc2, 0x0d, 0xf6, 0x00, 0x55, 0xb9, 0x83, 0x00, 0x55, 0xc0, 0x87, 0x00, + 0x54, 0x69, 0x91, 0x00, 0x54, 0x88, 0x03, 0xc1, 0xff, 0xfe, 0x8b, 0x00, + 0x55, 0xfb, 0x02, 0x00, 0x0a, 0x97, 0x00, 0x56, 0x0b, 0x02, 0x00, 0x0e, + 0x48, 0xb2, 0x2d, 0xc2, 0x00, 0x12, 0x47, 0xc7, 0x7b, 0xc2, 0x00, 0x20, + 0x87, 0x00, 0x56, 0x39, 0x91, 0x00, 0x56, 0x58, 0xc2, 0x02, 0xa0, 0x00, + 0x57, 0x41, 0xc4, 0x02, 0xde, 0x00, 0x57, 0x48, 0xc3, 0x09, 0x9e, 0x00, + 0x57, 0x51, 0xc3, 0x0d, 0x14, 0x00, 0x57, 0x58, 0xc2, 0x22, 0xcc, 0x00, + 0x57, 0x61, 0xc4, 0x18, 0x10, 0x00, 0x57, 0x68, 0xc2, 0x0d, 0x10, 0x08, + 0x1a, 0x09, 0xc8, 0x0d, 0x03, 0x08, 0x1a, 0x50, 0x0f, 0xc2, 0x00, 0x28, + 0x42, 0x00, 0x74, 0xc2, 0x00, 0x34, 0x18, 0xc2, 0x00, 0x40, 0x06, 0xc2, + 0x00, 0x4c, 0x11, 0xc2, 0x00, 0x61, 0x48, 0x0b, 0x17, 0xc2, 0x00, 0x79, + 0x15, 0xc2, 0x00, 0x95, 0x12, 0xc2, 0x00, 0xad, 0x0d, 0xc2, 0x00, 0xce, + 0x0e, 0xc2, 0x00, 0xde, 0xcc, 0x56, 0x9a, 0x00, 0x1b, 0xa1, 0x1b, 0xc2, + 0x00, 0xf6, 0xcd, 0x2c, 0xb2, 0x00, 0x1b, 0xf1, 0x16, 0xc2, 0x01, 0x02, + 0x03, 0xc2, 0x01, 0x1e, 0xcb, 0x93, 0xa9, 0x00, 0x1e, 0x81, 0x14, 0xc2, + 0x01, 0x2e, 0x08, 0xc2, 0x01, 0x3a, 0xcb, 0x92, 0x3e, 0x08, 0x0c, 0x29, + 0xcb, 0x8c, 0xb3, 0x08, 0x0c, 0x41, 0xc9, 0xab, 0x7f, 0x08, 0x0c, 0x51, + 0x4d, 0x78, 0x4a, 0x42, 0x01, 0x46, 0xc4, 0xe3, 0x33, 0x0f, 0xa6, 0xb9, + 0xc5, 0x1c, 0xae, 0x0f, 0xa4, 0xd1, 0xc5, 0xd7, 0x1d, 0x0f, 0x9a, 0x79, + 0xc5, 0xd9, 0xfc, 0x0f, 0xca, 0xb8, 0x4a, 0x37, 0x44, 0xc2, 0x01, 0x58, + 0xcf, 0x65, 0xc1, 0x01, 0x55, 0x28, 0xc3, 0x02, 0xa3, 0x01, 0x16, 0xb9, + 0xcd, 0x78, 0x30, 0x01, 0x53, 0xd1, 0xd3, 0x43, 0x39, 0x01, 0x53, 0xe0, + 0x42, 0x00, 0x2a, 0xc2, 0x01, 0x64, 0x43, 0x00, 0x5f, 0x42, 0x01, 0x7f, + 0x45, 0x00, 0xd5, 0xc2, 0x01, 0x8b, 0x43, 0x02, 0x9c, 0x42, 0x01, 0x9d, + 0xd4, 0x00, 0xd3, 0x01, 0x55, 0x48, 0x48, 0xb2, 0x2d, 0xc2, 0x01, 0xa9, + 0x03, 0xc2, 0x01, 0xb7, 0xc2, 0x01, 0x24, 0x08, 0x9a, 0x59, 0xc2, 0x02, + 0xe0, 0x08, 0x9a, 0x39, 0x97, 0x08, 0x9a, 0x0b, 0x02, 0x01, 0xc3, 0x8b, + 0x08, 0x99, 0xfa, 0x02, 0x01, 0xc7, 0x18, 0xc2, 0x01, 0xcb, 0xc2, 0x00, + 0xd0, 0x08, 0x99, 0xc9, 0x15, 0xc2, 0x01, 0xdb, 0x0e, 0xc2, 0x01, 0xeb, + 0xc2, 0x00, 0x39, 0x08, 0x99, 0x81, 0xc2, 0x19, 0x2c, 0x08, 0x99, 0x79, + 0xc2, 0x01, 0xc3, 0x08, 0x99, 0x71, 0x04, 0xc2, 0x01, 0xf5, 0x12, 0xc2, + 0x01, 0xff, 0x06, 0xc2, 0x02, 0x09, 0x16, 0xc2, 0x02, 0x17, 0x10, 0xc2, + 0x02, 0x25, 0x0c, 0xc2, 0x02, 0x3b, 0x05, 0xc2, 0x02, 0x45, 0x09, 0xc2, + 0x02, 0x4f, 0x0d, 0xc2, 0x02, 0x59, 0x83, 0x08, 0x98, 0x2b, 0x02, 0x02, + 0x63, 0xc2, 0x01, 0x24, 0x08, 0x98, 0x99, 0x97, 0x08, 0x98, 0x4b, 0x02, + 0x02, 0x6f, 0x8b, 0x08, 0x98, 0x3b, 0x02, 0x02, 0x73, 0xc2, 0x02, 0xe0, + 0x08, 0x98, 0x78, 0xc5, 0xd7, 0x3b, 0x08, 0x9a, 0xe9, 0x42, 0x07, 0xb2, + 0xc2, 0x02, 0x77, 0x03, 0xc2, 0x02, 0x83, 0xc5, 0x33, 0x5d, 0x08, 0x99, + 0xe1, 0x05, 0x42, 0x02, 0x8f, 0x46, 0x00, 0x8b, 0x42, 0x02, 0x9b, 0xc5, + 0x07, 0x62, 0x01, 0x12, 0x89, 0xca, 0x37, 0x4e, 0x01, 0x12, 0x70, 0x42, + 0x00, 0xdb, 0xc2, 0x02, 0xa5, 0x0a, 0xc2, 0x02, 0xaf, 0x03, 0xc2, 0x02, + 0xc3, 0x16, 0xc2, 0x02, 0xd3, 0x07, 0xc2, 0x02, 0xdd, 0xc2, 0x17, 0xb6, + 0x00, 0xe5, 0xb9, 0xc2, 0x02, 0x09, 0x00, 0xe5, 0xb1, 0xc2, 0x00, 0x28, + 0x00, 0xe5, 0x99, 0x0c, 0xc2, 0x02, 0xe7, 0xc3, 0xe6, 0x47, 0x00, 0xe5, + 0x71, 0x05, 0xc2, 0x02, 0xf3, 0x15, 0xc2, 0x03, 0x03, 0xc3, 0xe5, 0x69, + 0x00, 0xe5, 0x39, 0x09, 0xc2, 0x03, 0x0f, 0x0d, 0xc2, 0x03, 0x1b, 0x12, + 0xc2, 0x03, 0x27, 0xc2, 0x05, 0x1d, 0x00, 0xe5, 0x19, 0xc3, 0x82, 0x78, + 0x00, 0xe5, 0x01, 0x1c, 0xc2, 0x03, 0x33, 0xc2, 0x00, 0x45, 0x00, 0xe4, + 0xe9, 0xc3, 0x09, 0xe6, 0x00, 0xe4, 0xe1, 0xc3, 0x12, 0xb8, 0x00, 0xe4, + 0xd9, 0xc2, 0x00, 0x74, 0x00, 0xe4, 0xc1, 0xc3, 0x21, 0x7e, 0x00, 0xe4, + 0xa9, 0xc3, 0x62, 0xe1, 0x00, 0xe4, 0x99, 0xc3, 0x10, 0xd0, 0x00, 0xe4, + 0x88, 0x03, 0xc2, 0x03, 0x3f, 0xc3, 0x10, 0xd0, 0x00, 0x85, 0x09, 0x09, + 0xc2, 0x03, 0x49, 0xc3, 0x62, 0xe1, 0x00, 0x85, 0x19, 0xc2, 0x00, 0xc4, + 0x00, 0x85, 0x21, 0xc3, 0x21, 0x7e, 0x00, 0x85, 0x29, 0x1c, 0xc2, 0x03, + 0x55, 0x42, 0x01, 0x6f, 0xc2, 0x03, 0x61, 0xc2, 0x00, 0x74, 0x00, 0x85, + 0x41, 0x0d, 0xc2, 0x03, 0x69, 0xc3, 0x03, 0x03, 0x00, 0x85, 0x51, 0xc3, + 0x12, 0xb8, 0x00, 0x85, 0x59, 0xc3, 0x09, 0xe6, 0x00, 0x85, 0x61, 0xc2, + 0x00, 0x45, 0x00, 0x85, 0x69, 0x12, 0xc2, 0x03, 0x75, 0xc3, 0x82, 0x78, + 0x00, 0x85, 0x81, 0x15, 0xc2, 0x03, 0x81, 0xc2, 0x05, 0x1d, 0x00, 0x85, + 0x99, 0xc3, 0xe5, 0x69, 0x00, 0x85, 0xb9, 0x05, 0xc2, 0x03, 0x8d, 0x0c, + 0xc2, 0x03, 0x9d, 0xc3, 0xe6, 0x47, 0x00, 0x85, 0xf1, 0x0a, 0xc2, 0x03, + 0xa9, 0xc2, 0x00, 0x28, 0x00, 0x86, 0x19, 0xc2, 0x17, 0xb6, 0x00, 0x86, + 0x38, 0x03, 0xc2, 0x03, 0xbd, 0xc3, 0x10, 0xd0, 0x00, 0x86, 0x89, 0x09, + 0xc2, 0x03, 0xcd, 0xc3, 0x62, 0xe1, 0x00, 0x86, 0x99, 0x07, 0xc2, 0x03, + 0xd9, 0xc3, 0x21, 0x7e, 0x00, 0x86, 0xa9, 0x1c, 0xc2, 0x03, 0xe3, 0x16, + 0xc2, 0x03, 0xef, 0xc2, 0x00, 0x74, 0x00, 0x86, 0xc1, 0x0d, 0xc2, 0x03, + 0xf9, 0x42, 0x00, 0xdb, 0xc2, 0x04, 0x05, 0xc3, 0x12, 0xb8, 0x00, 0x86, + 0xd9, 0xc3, 0x09, 0xe6, 0x00, 0x86, 0xe1, 0xc2, 0x00, 0x45, 0x00, 0x86, + 0xe9, 0x12, 0xc2, 0x04, 0x0f, 0xc3, 0x82, 0x78, 0x00, 0x87, 0x01, 0x15, + 0xc2, 0x04, 0x1b, 0xc2, 0x05, 0x1d, 0x00, 0x87, 0x19, 0xc3, 0xe5, 0x69, + 0x00, 0x87, 0x39, 0x05, 0xc2, 0x04, 0x27, 0x0c, 0xc2, 0x04, 0x37, 0xc3, + 0xe6, 0x47, 0x00, 0x87, 0x71, 0x0a, 0xc2, 0x04, 0x43, 0xc2, 0x00, 0x28, + 0x00, 0x87, 0x99, 0xc2, 0x02, 0x09, 0x00, 0x87, 0xb1, 0xc2, 0x17, 0xb6, + 0x00, 0x87, 0xb8, 0x03, 0xc2, 0x04, 0x57, 0xc3, 0x10, 0xd0, 0x01, 0x68, + 0x09, 0x09, 0xc2, 0x04, 0x61, 0xc3, 0x62, 0xe1, 0x01, 0x68, 0x19, 0xc2, + 0x00, 0xc4, 0x01, 0x68, 0x21, 0xc3, 0x21, 0x7e, 0x01, 0x68, 0x29, 0x1c, + 0xc2, 0x04, 0x6d, 0x42, 0x01, 0x6f, 0xc2, 0x04, 0x79, 0xc2, 0x00, 0x74, + 0x01, 0x68, 0x41, 0x0d, 0xc2, 0x04, 0x81, 0xc3, 0x03, 0x03, 0x01, 0x68, + 0x51, 0xc3, 0x12, 0xb8, 0x01, 0x68, 0x59, 0xc3, 0x09, 0xe6, 0x01, 0x68, + 0x61, 0xc2, 0x00, 0x45, 0x01, 0x68, 0x69, 0x12, 0xc2, 0x04, 0x8d, 0xc3, + 0x82, 0x78, 0x01, 0x68, 0x81, 0x15, 0xc2, 0x04, 0x99, 0xc2, 0x05, 0x1d, + 0x01, 0x68, 0x99, 0xc3, 0xe5, 0x69, 0x01, 0x68, 0xb9, 0x05, 0xc2, 0x04, + 0xa5, 0x0c, 0xc2, 0x04, 0xb5, 0xc3, 0xe6, 0x47, 0x01, 0x68, 0xf1, 0x0a, + 0xc2, 0x04, 0xc1, 0xc2, 0x00, 0x28, 0x01, 0x69, 0x19, 0xc2, 0x17, 0xb6, + 0x01, 0x69, 0x38, 0xc3, 0xe5, 0x4b, 0x01, 0x60, 0x01, 0x04, 0xc2, 0x04, + 0xd5, 0xc4, 0xdf, 0x83, 0x01, 0x60, 0x11, 0xc7, 0xc1, 0xf5, 0x01, 0x60, + 0x19, 0x06, 0xc2, 0x04, 0xe1, 0x1b, 0xc2, 0x04, 0xf3, 0x1c, 0xc2, 0x05, + 0x05, 0x8b, 0x01, 0x60, 0x5b, 0x02, 0x05, 0x11, 0xc4, 0xe1, 0x6b, 0x01, + 0x60, 0x69, 0x0e, 0xc2, 0x05, 0x23, 0xc7, 0x60, 0xdd, 0x01, 0x60, 0x79, + 0xc5, 0xdb, 0x78, 0x01, 0x60, 0x81, 0x11, 0xc2, 0x05, 0x2f, 0x12, 0xc2, + 0x05, 0x3b, 0xc5, 0xd7, 0xb3, 0x01, 0x60, 0x99, 0x15, 0xc2, 0x05, 0x45, + 0x16, 0xc2, 0x05, 0x5e, 0xc3, 0xc5, 0x6f, 0x01, 0x60, 0xb1, 0x08, 0xc2, + 0x05, 0x70, 0xc4, 0xdf, 0x9f, 0x01, 0x60, 0xc1, 0x05, 0x42, 0x05, 0x7c, + 0xc3, 0xe5, 0x4b, 0x01, 0x61, 0x81, 0x04, 0xc2, 0x05, 0x88, 0xc4, 0xdf, + 0x83, 0x01, 0x61, 0x91, 0xc7, 0xc1, 0xf5, 0x01, 0x61, 0x99, 0x06, 0xc2, + 0x05, 0x94, 0x1b, 0xc2, 0x05, 0xa6, 0x1c, 0xc2, 0x05, 0xb8, 0x8b, 0x01, + 0x61, 0xdb, 0x02, 0x05, 0xc4, 0xc4, 0xe1, 0x6b, 0x01, 0x61, 0xe9, 0x0e, + 0xc2, 0x05, 0xd6, 0xc7, 0x60, 0xdd, 0x01, 0x61, 0xf9, 0xc5, 0xdb, 0x78, + 0x01, 0x62, 0x01, 0x11, 0xc2, 0x05, 0xe2, 0x12, 0xc2, 0x05, 0xee, 0xc5, + 0xd7, 0xb3, 0x01, 0x62, 0x19, 0x15, 0xc2, 0x05, 0xf8, 0x16, 0xc2, 0x06, + 0x11, 0xc3, 0xc5, 0x6f, 0x01, 0x62, 0x31, 0x08, 0xc2, 0x06, 0x23, 0xc4, + 0xdf, 0x9f, 0x01, 0x62, 0x41, 0x05, 0x42, 0x06, 0x2f, 0xcb, 0x1e, 0x89, + 0x00, 0x58, 0x09, 0x03, 0xc2, 0x06, 0x3b, 0x42, 0x07, 0xb2, 0xc2, 0x06, + 0x47, 0xc5, 0x33, 0x5d, 0x00, 0x59, 0xe1, 0xc8, 0x7d, 0xa4, 0x00, 0x5a, + 0xa8, 0x83, 0x00, 0x58, 0x2b, 0x02, 0x06, 0x53, 0x8b, 0x00, 0x58, 0x3b, + 0x02, 0x06, 0x5f, 0x97, 0x00, 0x58, 0x4b, 0x02, 0x06, 0x63, 0x18, 0xc2, + 0x06, 0x67, 0x87, 0x00, 0x58, 0x79, 0x91, 0x00, 0x58, 0x99, 0x0d, 0xc2, + 0x06, 0x71, 0x09, 0xc2, 0x06, 0x7b, 0x10, 0xc2, 0x06, 0x85, 0x05, 0xc2, + 0x06, 0x9b, 0x0c, 0xc2, 0x06, 0xa5, 0x16, 0xc2, 0x06, 0xaf, 0x06, 0xc2, + 0x06, 0xbd, 0x12, 0xc2, 0x06, 0xcb, 0x04, 0xc2, 0x06, 0xd5, 0xc2, 0x01, + 0xc3, 0x00, 0x59, 0x71, 0x1b, 0xc2, 0x06, 0xdf, 0x14, 0xc2, 0x06, 0xe9, + 0x0e, 0xc2, 0x06, 0xf9, 0x15, 0xc2, 0x07, 0x03, 0xc2, 0x00, 0xd0, 0x00, + 0x59, 0xc9, 0xc2, 0x01, 0x4a, 0x00, 0x5b, 0x88, 0x03, 0xc2, 0x07, 0x13, + 0x8b, 0x00, 0x59, 0xfb, 0x02, 0x07, 0x1f, 0x97, 0x00, 0x5a, 0x0b, 0x02, + 0x07, 0x23, 0x48, 0xb2, 0x2d, 0xc2, 0x07, 0x27, 0x87, 0x00, 0x5a, 0x39, + 0x91, 0x00, 0x5a, 0x58, 0xcd, 0x74, 0xcd, 0x00, 0x5a, 0xb1, 0xcd, 0x73, + 0x0d, 0x00, 0x5a, 0xb8, 0xc4, 0x15, 0xe7, 0x00, 0x5b, 0x31, 0xc3, 0x05, + 0x14, 0x00, 0x5b, 0x39, 0x16, 0xc2, 0x07, 0x35, 0x08, 0xc2, 0x07, 0x41, + 0x15, 0xc2, 0x07, 0x4d, 0xc5, 0x06, 0xdb, 0x00, 0x5b, 0x71, 0xc4, 0x26, + 0x78, 0x00, 0x5b, 0x78, 0x44, 0x05, 0x14, 0xc2, 0x07, 0x59, 0x46, 0x02, + 0xdd, 0x42, 0x07, 0x71, 0x0a, 0xc2, 0x07, 0x7d, 0x19, 0xc2, 0x07, 0x8f, + 0xc2, 0x00, 0xc4, 0x0f, 0x68, 0x52, 0x02, 0x07, 0x9f, 0x11, 0xc2, 0x07, + 0xa5, 0x0b, 0x42, 0x07, 0xb7, 0x00, 0x42, 0x07, 0xc9, 0xc2, 0x22, 0xcc, + 0x0f, 0x68, 0x33, 0x02, 0x07, 0xd5, 0xc4, 0x18, 0x10, 0x0f, 0x68, 0x3a, + 0x02, 0x07, 0xe2, 0x9b, 0x0f, 0x68, 0x8b, 0x02, 0x07, 0xef, 0x00, 0x42, + 0x07, 0xf5, 0xc2, 0x0d, 0x10, 0x0f, 0x68, 0x93, 0x02, 0x08, 0x01, 0x00, + 0x42, 0x08, 0x07, 0xc2, 0x02, 0xa0, 0x0f, 0x69, 0x7b, 0x02, 0x08, 0x13, + 0xc4, 0x02, 0xde, 0x0f, 0x69, 0x81, 0xc2, 0x00, 0xc4, 0x0f, 0x69, 0xba, + 0x02, 0x08, 0x19, 0xc3, 0x09, 0x9e, 0x0f, 0x69, 0x8b, 0x02, 0x08, 0x1f, + 0xc3, 0x0d, 0x14, 0x0f, 0x69, 0x90, 0xc2, 0x22, 0xcc, 0x0f, 0x69, 0x9b, + 0x02, 0x08, 0x25, 0xc4, 0x18, 0x10, 0x0f, 0x69, 0xa0, 0xc6, 0x72, 0x26, + 0x01, 0x01, 0x21, 0xd9, 0x11, 0xc9, 0x01, 0x71, 0x58, 0x42, 0x06, 0x62, + 0xc2, 0x08, 0x2b, 0x47, 0x0f, 0x81, 0xc2, 0x08, 0x37, 0x42, 0x00, 0x6b, + 0xc2, 0x08, 0x4f, 0x08, 0xc2, 0x08, 0x59, 0xc4, 0x04, 0x1f, 0x0f, 0xa8, + 0x99, 0x4d, 0x7f, 0x32, 0xc2, 0x08, 0x65, 0xca, 0x6c, 0x80, 0x0f, 0xa2, + 0x80, 0xd9, 0x1d, 0x56, 0x01, 0x3d, 0xf1, 0x4f, 0x66, 0x75, 0x42, 0x08, + 0x71, 0xce, 0x1c, 0x92, 0x0b, 0x7f, 0x19, 0xc9, 0xa9, 0xea, 0x0b, 0x7f, + 0x10, 0x4c, 0x11, 0xe2, 0xc2, 0x08, 0x7d, 0x4a, 0x51, 0x89, 0xc2, 0x08, + 0x8f, 0x47, 0x02, 0x0e, 0x42, 0x08, 0x9b, 0x46, 0xc9, 0x58, 0xc2, 0x08, + 0xf1, 0x4c, 0x86, 0x0d, 0x42, 0x09, 0x01, 0x47, 0x34, 0x2f, 0xc2, 0x09, + 0x0d, 0x4d, 0x29, 0xb9, 0xc2, 0x09, 0x22, 0x4f, 0x0b, 0x17, 0x42, 0x09, + 0x5d, 0x47, 0xc8, 0x07, 0xc2, 0x09, 0x98, 0x48, 0xb6, 0x6a, 0x42, 0x09, + 0xb7, 0x47, 0x34, 0x2f, 0xc2, 0x09, 0xd0, 0x47, 0x02, 0x0e, 0x42, 0x09, + 0xda, 0x15, 0xc2, 0x0a, 0x3c, 0x4b, 0x52, 0x39, 0x42, 0x0a, 0x48, 0x47, + 0x02, 0x0e, 0xc2, 0x0a, 0xbb, 0x48, 0x56, 0x9a, 0x42, 0x0b, 0x18, 0xcd, + 0x77, 0x6d, 0x00, 0xe3, 0xf9, 0xc6, 0x77, 0x74, 0x00, 0xe3, 0xf0, 0x8a, + 0x00, 0xe3, 0xb9, 0x98, 0x00, 0xe3, 0xb1, 0x84, 0x00, 0xe3, 0xa9, 0xc2, + 0x02, 0x10, 0x00, 0xe3, 0xa0, 0x91, 0x00, 0xe3, 0x99, 0x87, 0x00, 0xe3, + 0x71, 0x97, 0x00, 0xe3, 0x49, 0x8b, 0x00, 0xe3, 0x21, 0x83, 0x00, 0xe2, + 0xd2, 0x02, 0x0b, 0x2a, 0xc2, 0x01, 0xa3, 0x00, 0xe3, 0x91, 0x90, 0x00, + 0xe3, 0x89, 0xc2, 0x04, 0xcd, 0x00, 0xe3, 0x81, 0x92, 0x00, 0xe3, 0x78, + 0x9b, 0x00, 0xe3, 0x69, 0xc2, 0x1b, 0x88, 0x00, 0xe3, 0x61, 0x86, 0x00, + 0xe3, 0x59, 0x85, 0x00, 0xe3, 0x50, 0x94, 0x00, 0xe3, 0x41, 0xc2, 0x16, + 0x59, 0x00, 0xe3, 0x39, 0x8a, 0x00, 0xe3, 0x31, 0x95, 0x00, 0xe3, 0x28, + 0x03, 0xc2, 0x0b, 0x2e, 0x8e, 0x00, 0xe2, 0xf1, 0xc2, 0x00, 0x75, 0x00, + 0xe2, 0xe9, 0x89, 0x00, 0xe2, 0xe1, 0x96, 0x00, 0xe2, 0xd8, 0xc4, 0x18, + 0x10, 0x00, 0xe2, 0xb9, 0xc2, 0x22, 0xcc, 0x00, 0xe2, 0xb0, 0xc3, 0x0d, + 0x14, 0x00, 0xe2, 0xa9, 0xc3, 0x09, 0x9e, 0x00, 0xe2, 0xa0, 0xc4, 0x02, + 0xde, 0x00, 0xe2, 0x99, 0xc2, 0x02, 0xa0, 0x00, 0xe2, 0x90, 0x46, 0x01, + 0xfc, 0xc2, 0x0b, 0x3e, 0xcd, 0x56, 0x88, 0x01, 0x5d, 0xe0, 0xc9, 0xaa, + 0x56, 0x00, 0xb4, 0xc9, 0xc5, 0xd7, 0xa9, 0x00, 0xb4, 0xa9, 0xc5, 0xcc, + 0x96, 0x00, 0xb4, 0x98, 0xc3, 0x09, 0x38, 0x00, 0xb4, 0xc1, 0xc6, 0xcc, + 0x95, 0x00, 0xb4, 0xa0, 0xc7, 0xc7, 0x82, 0x00, 0xb4, 0xb9, 0x94, 0x00, + 0xb4, 0x91, 0xc3, 0x04, 0xa7, 0x00, 0xb4, 0x30, 0x94, 0x00, 0xb4, 0xb1, + 0xc2, 0x1b, 0x88, 0x00, 0xb4, 0x88, 0xc5, 0xd8, 0xad, 0x00, 0xb4, 0x71, + 0xc3, 0x14, 0xa7, 0x00, 0xb4, 0x20, 0xc6, 0xd1, 0x15, 0x00, 0xb4, 0x69, + 0xc3, 0x00, 0x44, 0x00, 0xb4, 0x28, 0xc4, 0xe2, 0xef, 0x00, 0xb4, 0x51, + 0xc3, 0x1f, 0x48, 0x00, 0xb4, 0x48, 0xc3, 0x00, 0x49, 0x08, 0x24, 0x01, + 0x83, 0x08, 0x24, 0xd8, 0xc2, 0x00, 0xd0, 0x08, 0x24, 0x29, 0xc3, 0xb8, + 0xac, 0x08, 0x24, 0x78, 0xc3, 0x0e, 0x66, 0x08, 0x24, 0x31, 0xc2, 0x00, + 0xd0, 0x08, 0x24, 0x50, 0x83, 0x08, 0x24, 0x41, 0xc4, 0xdf, 0xb7, 0x08, + 0x24, 0x48, 0x87, 0x08, 0x24, 0xe0, 0x91, 0x08, 0x24, 0xe8, 0xc2, 0x02, + 0xa0, 0x08, 0x25, 0x11, 0xc4, 0x02, 0xde, 0x08, 0x25, 0x18, 0xc3, 0x09, + 0x9e, 0x08, 0x25, 0x21, 0xc3, 0x0d, 0x14, 0x08, 0x25, 0x28, 0xc2, 0x22, + 0xcc, 0x08, 0x25, 0x31, 0xc4, 0x18, 0x10, 0x08, 0x25, 0x38, 0x8b, 0x08, + 0x25, 0x8b, 0x02, 0x0b, 0x4a, 0x8a, 0x08, 0x25, 0x98, 0x0a, 0xc2, 0x0b, + 0x4e, 0xc2, 0x00, 0x74, 0x08, 0x25, 0xc0, 0x83, 0x08, 0x25, 0xc9, 0xc2, + 0x19, 0x2c, 0x08, 0x25, 0xd0, 0x83, 0x08, 0x25, 0xe1, 0xc2, 0x19, 0x2c, + 0x08, 0x25, 0xf1, 0xc2, 0x00, 0xd0, 0x08, 0x26, 0x80, 0xc2, 0x00, 0x74, + 0x08, 0x26, 0x18, 0x83, 0x08, 0x26, 0x31, 0xc2, 0x00, 0xd0, 0x08, 0x26, + 0x38, 0x83, 0x08, 0x26, 0x41, 0x15, 0x42, 0x0b, 0x64, 0x83, 0x08, 0x26, + 0x91, 0xc2, 0x00, 0xd0, 0x08, 0x26, 0x98, 0x8b, 0x08, 0x26, 0xcb, 0x02, + 0x0b, 0x6e, 0x8a, 0x08, 0x26, 0xd8, 0x0a, 0xc2, 0x0b, 0x72, 0xc2, 0x00, + 0x74, 0x08, 0x27, 0x00, 0x83, 0x08, 0x27, 0x09, 0xc2, 0x19, 0x2c, 0x08, + 0x27, 0x10, 0x83, 0x08, 0x27, 0x21, 0xc2, 0x19, 0x2c, 0x08, 0x27, 0x31, + 0xc2, 0x00, 0xd0, 0x08, 0x27, 0xc0, 0xc2, 0x00, 0x74, 0x08, 0x27, 0x58, + 0x83, 0x08, 0x27, 0x71, 0xc2, 0x00, 0xd0, 0x08, 0x27, 0x78, 0x83, 0x08, + 0x27, 0x81, 0x15, 0x42, 0x0b, 0x88, 0x83, 0x08, 0x27, 0xd1, 0xc2, 0x00, + 0xd0, 0x08, 0x27, 0xd8, 0xc2, 0x14, 0x49, 0x0e, 0x7e, 0x19, 0xc3, 0x9c, + 0x8d, 0x0e, 0x7a, 0xe1, 0xc6, 0xcd, 0x49, 0x0e, 0x7a, 0x90, 0xc8, 0xbb, + 0x92, 0x0e, 0x7c, 0x81, 0xc8, 0x93, 0xed, 0x0e, 0x7b, 0x80, 0xcf, 0x69, + 0x9f, 0x0e, 0x7a, 0xc8, 0xd0, 0x5f, 0x82, 0x0e, 0x7b, 0xa9, 0xc6, 0xcd, + 0x85, 0x0e, 0x7b, 0x68, 0x00, 0x42, 0x0b, 0x92, 0xc2, 0x25, 0xa1, 0x0e, + 0x7c, 0x09, 0xc2, 0x14, 0x49, 0x0e, 0x7a, 0x82, 0x02, 0x0b, 0xa2, 0x45, + 0xd6, 0xfa, 0xc2, 0x0b, 0xa8, 0xc4, 0xe1, 0xc7, 0x0e, 0x7c, 0x33, 0x02, + 0x0b, 0xcc, 0xc6, 0xce, 0x03, 0x0e, 0x7a, 0xb2, 0x02, 0x0b, 0xd0, 0x00, + 0x42, 0x0b, 0xd4, 0x4d, 0x75, 0xe7, 0xc2, 0x0b, 0xe0, 0x47, 0x87, 0x3a, + 0xc2, 0x0b, 0xf8, 0x16, 0xc2, 0x0c, 0x04, 0xc8, 0x4e, 0x4b, 0x0e, 0x7b, + 0x91, 0xc9, 0xa9, 0x48, 0x0e, 0x7b, 0x88, 0x47, 0x87, 0x3a, 0xc2, 0x0c, + 0x10, 0xc7, 0xc8, 0x69, 0x0e, 0x7d, 0x40, 0xc7, 0x2d, 0x19, 0x0e, 0x7a, + 0xe9, 0xc6, 0xcb, 0xdb, 0x0e, 0x7a, 0xa8, 0xcb, 0x93, 0x7d, 0x0e, 0x7b, + 0x51, 0xc8, 0x4e, 0x4b, 0x0e, 0x7b, 0x49, 0xc9, 0xa9, 0x48, 0x0e, 0x7b, + 0x41, 0xc8, 0xbf, 0x6a, 0x0e, 0x7b, 0x38, 0xc8, 0xbf, 0x8a, 0x0e, 0x7b, + 0x11, 0xc4, 0xca, 0xab, 0x0e, 0x7a, 0xf8, 0xc4, 0x78, 0xdc, 0x0e, 0x7a, + 0x03, 0x02, 0x0c, 0x22, 0xc5, 0xdb, 0xb4, 0x0e, 0x79, 0x49, 0xc6, 0xcd, + 0xeb, 0x0e, 0x79, 0x40, 0xca, 0x9b, 0x26, 0x0e, 0x79, 0xf9, 0xc6, 0xd2, + 0xad, 0x0e, 0x79, 0xc2, 0x02, 0x0c, 0x28, 0xc9, 0xb3, 0x56, 0x0e, 0x79, + 0xe9, 0xd4, 0x3e, 0xf8, 0x0e, 0x79, 0xa0, 0xc5, 0xbe, 0xad, 0x0e, 0x79, + 0xe1, 0xc6, 0x6d, 0xaa, 0x0e, 0x79, 0x19, 0x45, 0xda, 0x15, 0x42, 0x0c, + 0x2e, 0xce, 0x38, 0x5a, 0x0e, 0x79, 0xd9, 0xc4, 0xe0, 0xab, 0x0e, 0x79, + 0x59, 0xd3, 0x42, 0x68, 0x0e, 0x78, 0xd1, 0x49, 0xa9, 0xd8, 0x42, 0x0c, + 0x3a, 0xc7, 0xc5, 0xde, 0x0e, 0x79, 0xd1, 0xc7, 0xca, 0x5a, 0x0e, 0x79, + 0xa9, 0x90, 0x0e, 0x79, 0x08, 0x06, 0xc2, 0x0c, 0x46, 0x46, 0x75, 0x93, + 0x42, 0x0c, 0x55, 0xc8, 0x3f, 0x04, 0x0e, 0x79, 0x99, 0x07, 0x42, 0x0c, + 0x5f, 0xc5, 0xd6, 0x50, 0x0e, 0x79, 0x61, 0xc3, 0xe5, 0x72, 0x0e, 0x79, + 0x10, 0xc6, 0xc2, 0x7a, 0x0e, 0x78, 0xf9, 0x46, 0xcd, 0xdf, 0x42, 0x0c, + 0x6b, 0x15, 0xc2, 0x0c, 0x77, 0x43, 0x01, 0x55, 0x42, 0x0c, 0x83, 0x43, + 0x3d, 0xd0, 0xc2, 0x0c, 0x8f, 0x43, 0x01, 0x55, 0x42, 0x0c, 0x9b, 0x43, + 0x01, 0x55, 0xc2, 0x0c, 0xa7, 0x4d, 0x78, 0xd9, 0x42, 0x0c, 0xb3, 0xc5, + 0x40, 0xe7, 0x08, 0xd1, 0xc9, 0xc4, 0x1e, 0x97, 0x08, 0xd1, 0xa0, 0xce, + 0x1e, 0x74, 0x08, 0xd1, 0xb9, 0xc5, 0x1e, 0x8f, 0x08, 0xd1, 0xaa, 0x02, + 0x0c, 0xbf, 0xc2, 0x02, 0x41, 0x08, 0xd1, 0xf1, 0xc2, 0x00, 0xdb, 0x08, + 0xd1, 0xe9, 0xc2, 0x00, 0x39, 0x08, 0xd1, 0xe1, 0xc2, 0x19, 0x2c, 0x08, + 0xd1, 0xd8, 0xc2, 0x00, 0xd0, 0x08, 0xd1, 0x31, 0x83, 0x08, 0xd1, 0x28, + 0xc2, 0x00, 0xd0, 0x08, 0xd0, 0xb9, 0x83, 0x08, 0xd0, 0xb0, 0xc2, 0x00, + 0xd0, 0x08, 0xd1, 0x21, 0x83, 0x08, 0xd1, 0x18, 0xc2, 0x00, 0xd0, 0x08, + 0xd0, 0xa9, 0x83, 0x08, 0xd0, 0xa0, 0x97, 0x08, 0xd0, 0x41, 0x8b, 0x08, + 0xd0, 0x38, 0x87, 0x08, 0xd0, 0x28, 0x87, 0x08, 0xd0, 0x10, 0xc9, 0xaf, + 0x1e, 0x01, 0x51, 0x09, 0xc5, 0xd5, 0x6a, 0x01, 0x51, 0x00, 0x03, 0xc2, + 0x0c, 0xc5, 0x12, 0xc2, 0x0c, 0xd4, 0xc5, 0xd5, 0x56, 0x05, 0x4e, 0x31, + 0x0e, 0xc2, 0x0c, 0xe0, 0xc5, 0xdb, 0x91, 0x05, 0x4e, 0x21, 0xcd, 0x79, + 0x9c, 0x05, 0x4e, 0xf1, 0xc9, 0xaa, 0xe6, 0x05, 0x4e, 0xf8, 0xc7, 0xc5, + 0x13, 0x05, 0x4e, 0x79, 0xc3, 0x1f, 0x62, 0x05, 0x4e, 0x00, 0xc2, 0x01, + 0x30, 0x05, 0x4c, 0x93, 0x02, 0x0c, 0xea, 0xc2, 0x00, 0xd0, 0x05, 0x4d, + 0x91, 0xc2, 0x0d, 0xf6, 0x05, 0x4d, 0x8b, 0x02, 0x0c, 0xf0, 0xc2, 0x01, + 0x4a, 0x05, 0x4d, 0x71, 0xc2, 0x00, 0xdb, 0x05, 0x4d, 0x69, 0xc2, 0x00, + 0x39, 0x05, 0x4d, 0x5b, 0x02, 0x0c, 0xf6, 0xc2, 0x19, 0x2c, 0x05, 0x4d, + 0x51, 0xc2, 0x01, 0xc3, 0x05, 0x4d, 0x49, 0xc2, 0x01, 0x5d, 0x05, 0x4d, + 0x3b, 0x02, 0x0c, 0xfc, 0xc2, 0x00, 0xb0, 0x05, 0x4d, 0x2b, 0x02, 0x0d, + 0x02, 0x10, 0xc2, 0x0d, 0x06, 0x06, 0xc2, 0x0d, 0x1f, 0x16, 0xc2, 0x0d, + 0x2f, 0xc2, 0x25, 0x3b, 0x05, 0x4c, 0xbb, 0x02, 0x0d, 0x3f, 0xc2, 0x00, + 0x64, 0x05, 0x4c, 0xab, 0x02, 0x0d, 0x45, 0xc2, 0x02, 0x2b, 0x05, 0x4c, + 0x7b, 0x02, 0x0d, 0x4b, 0x91, 0x05, 0x4c, 0x71, 0x83, 0x05, 0x4c, 0x23, + 0x02, 0x0d, 0x4f, 0x87, 0x05, 0x4c, 0x61, 0x97, 0x05, 0x4c, 0x41, 0x8b, + 0x05, 0x4c, 0x32, 0x02, 0x0d, 0x53, 0xc4, 0x02, 0xde, 0x05, 0x4e, 0x99, + 0xc2, 0x02, 0xa0, 0x05, 0x4e, 0x90, 0xc3, 0x09, 0x9e, 0x05, 0x4e, 0xa1, + 0xc3, 0x0d, 0x14, 0x05, 0x4e, 0xa8, 0xc2, 0x22, 0xcc, 0x05, 0x4e, 0xb1, + 0xc4, 0x18, 0x10, 0x05, 0x4e, 0xb8, 0x03, 0xc2, 0x0d, 0x5d, 0xc5, 0x0d, + 0xe4, 0x05, 0x4d, 0xa8, 0xc7, 0xc5, 0x91, 0x05, 0x4d, 0xc8, 0xc6, 0xcb, + 0xb1, 0x05, 0x4d, 0xb8, 0xc5, 0xda, 0x8d, 0x05, 0x4d, 0x98, 0xc5, 0x00, + 0x2c, 0x01, 0x2c, 0xeb, 0x02, 0x0d, 0x69, 0xc4, 0x00, 0x49, 0x01, 0x2c, + 0xc2, 0x02, 0x0d, 0x72, 0xc5, 0x00, 0x2c, 0x01, 0x2c, 0xb9, 0xc4, 0x00, + 0x49, 0x01, 0x2c, 0xb0, 0x1b, 0xc2, 0x0d, 0x78, 0x0c, 0xc2, 0x0d, 0x8d, + 0x14, 0xc2, 0x0d, 0xa9, 0x09, 0xc2, 0x0d, 0xcc, 0x1c, 0xc2, 0x0d, 0xf3, + 0x04, 0xc2, 0x0e, 0x1a, 0x06, 0xc2, 0x0e, 0x3d, 0x8b, 0x05, 0x0b, 0xfb, + 0x02, 0x0e, 0x60, 0x83, 0x05, 0x0c, 0x2b, 0x02, 0x0e, 0x73, 0x97, 0x05, + 0x0c, 0x9b, 0x02, 0x0e, 0x7b, 0x91, 0x05, 0x0c, 0x63, 0x02, 0x0e, 0x95, + 0x87, 0x05, 0x0c, 0x7a, 0x02, 0x0e, 0xa1, 0x0c, 0xc2, 0x0e, 0xa9, 0x9b, + 0x05, 0x1f, 0xc3, 0x02, 0x0e, 0xc5, 0x97, 0x05, 0x1f, 0x93, 0x02, 0x0e, + 0xd8, 0x91, 0x05, 0x1f, 0x73, 0x02, 0x0e, 0xf2, 0x8b, 0x05, 0x1f, 0x12, + 0x02, 0x0e, 0xfe, 0x9b, 0x05, 0x20, 0xa3, 0x02, 0x0f, 0x11, 0x97, 0x05, + 0x20, 0x73, 0x02, 0x0f, 0x24, 0x91, 0x05, 0x20, 0x53, 0x02, 0x0f, 0x3e, + 0x8b, 0x05, 0x1f, 0xf2, 0x02, 0x0f, 0x4a, 0x9b, 0x05, 0x1e, 0xe3, 0x02, + 0x0f, 0x5d, 0x97, 0x05, 0x1e, 0xb3, 0x02, 0x0f, 0x70, 0x87, 0x05, 0x1e, + 0x93, 0x02, 0x0f, 0x8a, 0x91, 0x05, 0x1e, 0x7b, 0x02, 0x0f, 0x92, 0x83, + 0x05, 0x1e, 0x43, 0x02, 0x0f, 0x9e, 0x14, 0x42, 0x0f, 0xaa, 0x0a, 0xc2, + 0x0f, 0xcd, 0x15, 0xc2, 0x0f, 0xf0, 0x8b, 0x05, 0x18, 0x5b, 0x02, 0x10, + 0x1a, 0x83, 0x05, 0x18, 0x93, 0x02, 0x10, 0x2d, 0x97, 0x05, 0x19, 0x03, + 0x02, 0x10, 0x39, 0x91, 0x05, 0x18, 0xcb, 0x02, 0x10, 0x53, 0x87, 0x05, + 0x18, 0xe3, 0x02, 0x10, 0x5f, 0x9b, 0x05, 0x19, 0x32, 0x02, 0x10, 0x67, + 0x0a, 0xc2, 0x10, 0x7a, 0x9b, 0x05, 0x16, 0x63, 0x02, 0x10, 0x9d, 0x87, + 0x05, 0x16, 0x13, 0x02, 0x10, 0xb0, 0x97, 0x05, 0x16, 0x33, 0x02, 0x10, + 0xb8, 0x8b, 0x05, 0x15, 0x83, 0x02, 0x10, 0xd2, 0x83, 0x05, 0x15, 0xc3, + 0x02, 0x10, 0xe5, 0x91, 0x05, 0x15, 0xfa, 0x02, 0x10, 0xf1, 0x87, 0x05, + 0x15, 0x03, 0x02, 0x10, 0xfd, 0x91, 0x05, 0x14, 0xeb, 0x02, 0x11, 0x05, + 0x97, 0x05, 0x15, 0x23, 0x02, 0x11, 0x11, 0x83, 0x05, 0x14, 0xb3, 0x02, + 0x11, 0x2b, 0x8b, 0x05, 0x14, 0x7b, 0x02, 0x11, 0x37, 0x1c, 0xc2, 0x11, + 0x4a, 0x0a, 0xc2, 0x11, 0x74, 0x9b, 0x05, 0x15, 0x52, 0x02, 0x11, 0x97, + 0x87, 0x05, 0x14, 0x5b, 0x02, 0x11, 0xaa, 0x91, 0x05, 0x14, 0x43, 0x02, + 0x11, 0xb2, 0x97, 0x05, 0x00, 0xab, 0x02, 0x11, 0xba, 0x83, 0x05, 0x14, + 0x12, 0x02, 0x11, 0xc1, 0x87, 0x05, 0x13, 0xf3, 0x02, 0x11, 0xcd, 0x1a, + 0xc2, 0x11, 0xd5, 0x0b, 0xc2, 0x11, 0xfa, 0x83, 0x05, 0x13, 0x9b, 0x02, + 0x12, 0x05, 0xc2, 0x01, 0xba, 0x05, 0x13, 0xbb, 0x02, 0x12, 0x11, 0x91, + 0x05, 0x13, 0xdb, 0x02, 0x12, 0x1d, 0x0f, 0xc2, 0x12, 0x29, 0x10, 0xc2, + 0x12, 0x4c, 0x0e, 0x42, 0x12, 0x69, 0x8b, 0x05, 0x23, 0x9b, 0x02, 0x12, + 0x93, 0x97, 0x05, 0x24, 0x1b, 0x02, 0x12, 0xa6, 0x91, 0x05, 0x23, 0xfb, + 0x02, 0x12, 0xc0, 0x9b, 0x05, 0x24, 0x4a, 0x02, 0x12, 0xcc, 0x9b, 0x05, + 0x23, 0x6b, 0x02, 0x12, 0xdf, 0x8b, 0x05, 0x22, 0xfb, 0x02, 0x12, 0xf2, + 0x91, 0x05, 0x23, 0x4b, 0x02, 0x13, 0x05, 0xc2, 0x01, 0xba, 0x05, 0x23, + 0x32, 0x02, 0x13, 0x11, 0x09, 0xc2, 0x13, 0x15, 0x8b, 0x05, 0x05, 0x83, + 0x02, 0x13, 0x3a, 0x83, 0x05, 0x05, 0xbb, 0x02, 0x13, 0x4d, 0x97, 0x05, + 0x06, 0x2b, 0x02, 0x13, 0x59, 0x91, 0x05, 0x05, 0xfb, 0x02, 0x13, 0x73, + 0x87, 0x05, 0x06, 0x13, 0x02, 0x13, 0x7f, 0x9b, 0x05, 0x06, 0x5a, 0x02, + 0x13, 0x83, 0x96, 0x05, 0x00, 0x03, 0x02, 0x13, 0x8f, 0x9a, 0x05, 0x00, + 0x09, 0x92, 0x05, 0x00, 0x19, 0x87, 0x05, 0x00, 0x32, 0x02, 0x13, 0x95, + 0x96, 0x05, 0x00, 0x41, 0x9a, 0x05, 0x00, 0x49, 0x92, 0x05, 0x00, 0x58, + 0x9a, 0x05, 0x00, 0x61, 0x92, 0x05, 0x00, 0x70, 0x96, 0x05, 0x00, 0x79, + 0x9a, 0x05, 0x00, 0x81, 0x92, 0x05, 0x00, 0x90, 0x9a, 0x05, 0x00, 0x98, + 0x8b, 0x05, 0x00, 0xc3, 0x02, 0x13, 0xa1, 0x83, 0x05, 0x01, 0x03, 0x02, + 0x13, 0xb4, 0x97, 0x05, 0x01, 0x73, 0x02, 0x13, 0xc0, 0x91, 0x05, 0x01, + 0x3b, 0x02, 0x13, 0xda, 0x87, 0x05, 0x01, 0x53, 0x02, 0x13, 0xe6, 0x9b, + 0x05, 0x01, 0xa3, 0x02, 0x13, 0xee, 0x04, 0x42, 0x14, 0x01, 0x8b, 0x05, + 0x01, 0xd3, 0x02, 0x14, 0x2b, 0x83, 0x05, 0x02, 0x0b, 0x02, 0x14, 0x3e, + 0x97, 0x05, 0x02, 0x63, 0x02, 0x14, 0x4a, 0x91, 0x05, 0x02, 0x43, 0x02, + 0x14, 0x64, 0x9b, 0x05, 0x02, 0x92, 0x02, 0x14, 0x70, 0x8b, 0x05, 0x06, + 0x7b, 0x02, 0x14, 0x83, 0x83, 0x05, 0x06, 0x9b, 0x02, 0x14, 0x8f, 0x91, + 0x05, 0x06, 0xb3, 0x02, 0x14, 0x9b, 0x97, 0x05, 0x06, 0xd3, 0x02, 0x14, + 0xa3, 0x9b, 0x05, 0x07, 0x02, 0x02, 0x14, 0xb6, 0x8b, 0x05, 0x07, 0x23, + 0x02, 0x14, 0xc2, 0x83, 0x05, 0x07, 0x63, 0x02, 0x14, 0xd5, 0x91, 0x05, + 0x07, 0x83, 0x02, 0x14, 0xe1, 0x07, 0xc2, 0x14, 0xed, 0x97, 0x05, 0x07, + 0xb3, 0x02, 0x14, 0xf5, 0x9b, 0x05, 0x07, 0xe2, 0x02, 0x15, 0x08, 0x8b, + 0x05, 0x08, 0x13, 0x02, 0x15, 0x1b, 0x83, 0x05, 0x08, 0x4b, 0x02, 0x15, + 0x2e, 0x97, 0x05, 0x08, 0xb3, 0x02, 0x15, 0x3a, 0x91, 0x05, 0x08, 0x7b, + 0x02, 0x15, 0x54, 0x87, 0x05, 0x08, 0x93, 0x02, 0x15, 0x60, 0x06, 0x42, + 0x15, 0x68, 0x8b, 0x05, 0x08, 0xe3, 0x02, 0x15, 0x8b, 0x83, 0x05, 0x09, + 0x1b, 0x02, 0x15, 0x9e, 0x97, 0x05, 0x09, 0x93, 0x02, 0x15, 0xaa, 0x91, + 0x05, 0x09, 0x5b, 0x02, 0x15, 0xc4, 0x87, 0x05, 0x09, 0x72, 0x02, 0x15, + 0xd0, 0x8b, 0x05, 0x0d, 0xcb, 0x02, 0x15, 0xd8, 0x83, 0x05, 0x0e, 0x0b, + 0x02, 0x15, 0xeb, 0x97, 0x05, 0x0e, 0x83, 0x02, 0x15, 0xf7, 0x91, 0x05, + 0x0e, 0x4b, 0x02, 0x16, 0x11, 0x87, 0x05, 0x0e, 0x63, 0x02, 0x16, 0x1d, + 0x9b, 0x05, 0x0e, 0xb2, 0x02, 0x16, 0x25, 0x8b, 0x05, 0x0e, 0xe3, 0x02, + 0x16, 0x38, 0x83, 0x05, 0x0f, 0x23, 0x02, 0x16, 0x4b, 0x97, 0x05, 0x0f, + 0xa3, 0x02, 0x16, 0x57, 0x91, 0x05, 0x0f, 0x63, 0x02, 0x16, 0x71, 0x87, + 0x05, 0x0f, 0x83, 0x02, 0x16, 0x7d, 0x09, 0x42, 0x16, 0x89, 0x8b, 0x05, + 0x0f, 0xd3, 0x02, 0x16, 0xac, 0x83, 0x05, 0x10, 0x0b, 0x02, 0x16, 0xbf, + 0x97, 0x05, 0x10, 0x83, 0x02, 0x16, 0xcb, 0x91, 0x05, 0x10, 0x43, 0x02, + 0x16, 0xe5, 0x87, 0x05, 0x10, 0x62, 0x02, 0x16, 0xf1, 0x8b, 0x05, 0x24, + 0x8b, 0x02, 0x16, 0xfd, 0xc2, 0x1d, 0xc1, 0x05, 0x24, 0xd0, 0xc2, 0x00, + 0x8d, 0x05, 0x24, 0x91, 0x87, 0x05, 0x26, 0x30, 0x1b, 0xc2, 0x17, 0x01, + 0xc3, 0xe4, 0xe8, 0x05, 0x25, 0xa1, 0xc3, 0xa9, 0x68, 0x05, 0x26, 0x28, + 0x9b, 0x05, 0x25, 0xe3, 0x02, 0x17, 0x0d, 0xc3, 0xe4, 0xe5, 0x05, 0x25, + 0xe9, 0xc2, 0x00, 0x7e, 0x05, 0x25, 0xf1, 0xc2, 0x01, 0x7f, 0x05, 0x26, + 0x18, 0xc2, 0x00, 0xba, 0x05, 0x24, 0xa9, 0x0a, 0x42, 0x17, 0x15, 0x09, + 0xc2, 0x17, 0x2b, 0xc2, 0x02, 0x37, 0x05, 0x24, 0xb9, 0x83, 0x05, 0x25, + 0x09, 0xc2, 0x01, 0xbb, 0x05, 0x25, 0xb0, 0x8b, 0x05, 0x24, 0xc1, 0xc2, + 0x00, 0x11, 0x05, 0x24, 0xe0, 0x1a, 0xc2, 0x17, 0x37, 0xc2, 0x00, 0xa2, + 0x05, 0x25, 0x68, 0xc3, 0x02, 0xaa, 0x05, 0x24, 0xd9, 0xc2, 0x00, 0x33, + 0x05, 0x25, 0x28, 0x91, 0x05, 0x24, 0xe9, 0xc2, 0x00, 0x8d, 0x05, 0x25, + 0x70, 0xc2, 0x00, 0xa4, 0x05, 0x24, 0xf1, 0xc2, 0x63, 0xd6, 0x05, 0x25, + 0x60, 0xc2, 0x00, 0xfe, 0x05, 0x25, 0x01, 0x97, 0x05, 0x25, 0x40, 0x17, + 0xc2, 0x17, 0x49, 0xc2, 0x01, 0xbb, 0x05, 0x25, 0x59, 0x83, 0x05, 0x25, + 0x91, 0xc4, 0xdf, 0x23, 0x05, 0x26, 0x20, 0xc3, 0x66, 0x20, 0x05, 0x25, + 0x21, 0x97, 0x05, 0x25, 0xc8, 0x0c, 0xc2, 0x17, 0x51, 0x91, 0x05, 0x25, + 0x98, 0xc2, 0x00, 0x33, 0x05, 0x25, 0x79, 0xc2, 0x02, 0x37, 0x05, 0x25, + 0x88, 0xd6, 0x30, 0x64, 0x08, 0x75, 0x88, 0xcf, 0x33, 0xad, 0x08, 0x75, + 0x80, 0x96, 0x08, 0x75, 0x49, 0x99, 0x08, 0x75, 0x31, 0xc2, 0x17, 0xb6, + 0x08, 0x74, 0xb9, 0xc3, 0x6b, 0x53, 0x08, 0x74, 0x00, 0xc2, 0x0c, 0x42, + 0x08, 0x75, 0x39, 0xc2, 0x00, 0xd0, 0x08, 0x74, 0x48, 0xc3, 0x48, 0x60, + 0x08, 0x74, 0xf1, 0xc2, 0x0f, 0x9b, 0x08, 0x74, 0xe8, 0xcf, 0x6b, 0x25, + 0x08, 0x74, 0xd8, 0xc4, 0xdf, 0xa3, 0x08, 0x74, 0xc1, 0x83, 0x08, 0x74, + 0x50, 0x87, 0x08, 0x74, 0xb1, 0x83, 0x08, 0x74, 0x7a, 0x02, 0x17, 0x61, + 0x83, 0x08, 0x74, 0xa9, 0xc2, 0x01, 0x7f, 0x08, 0x74, 0x20, 0x86, 0x08, + 0x74, 0xa1, 0x8e, 0x08, 0x74, 0x58, 0xc2, 0x01, 0x9d, 0x08, 0x74, 0x99, + 0xc3, 0x11, 0xef, 0x08, 0x74, 0x91, 0xc2, 0x00, 0x74, 0x08, 0x74, 0x89, + 0x87, 0x08, 0x74, 0x28, 0xc2, 0x00, 0xd0, 0x08, 0x74, 0x71, 0x83, 0x08, + 0x74, 0x68, 0x0a, 0xc2, 0x17, 0x65, 0xc2, 0x03, 0x4e, 0x08, 0x74, 0x30, + 0xc2, 0x01, 0x7f, 0x08, 0x74, 0x19, 0x87, 0x08, 0x74, 0x10, 0xc9, 0x1c, + 0x63, 0x00, 0x04, 0xa1, 0xc3, 0x16, 0x32, 0x70, 0x03, 0xf8, 0x83, 0x08, + 0xd5, 0xf9, 0x91, 0x08, 0xd5, 0xf1, 0x8b, 0x08, 0xd5, 0xe9, 0x87, 0x08, + 0xd5, 0xe0, 0x9b, 0x00, 0xc5, 0xfb, 0x02, 0x17, 0x71, 0x83, 0x00, 0xa7, + 0xaa, 0x02, 0x17, 0x77, 0x19, 0xc2, 0x17, 0x7b, 0x83, 0x00, 0xa8, 0xab, + 0x02, 0x17, 0x94, 0x91, 0x00, 0xa8, 0x9b, 0x02, 0x17, 0x9c, 0x8b, 0x00, + 0xa8, 0x8b, 0x02, 0x17, 0xa4, 0x87, 0x00, 0xa8, 0x80, 0x9b, 0x00, 0xc5, + 0xf1, 0x4c, 0x86, 0x01, 0xc2, 0x17, 0xa8, 0x91, 0x00, 0xa7, 0x90, 0x83, + 0x00, 0xa8, 0x03, 0x02, 0x17, 0xc0, 0x87, 0x00, 0xa7, 0xb1, 0x8b, 0x00, + 0xa7, 0xc3, 0x02, 0x17, 0xc4, 0x91, 0x00, 0xa7, 0xe2, 0x02, 0x17, 0xc8, + 0x8b, 0x00, 0xa7, 0x80, 0x47, 0xc6, 0x8d, 0xc2, 0x17, 0xcc, 0x9b, 0x00, + 0xc5, 0xe1, 0x46, 0xd3, 0x4f, 0xc2, 0x17, 0xd6, 0x83, 0x00, 0xa6, 0x42, + 0x02, 0x18, 0x02, 0x91, 0x00, 0xc6, 0x53, 0x02, 0x18, 0x06, 0x8b, 0x00, + 0xc6, 0x33, 0x02, 0x18, 0x0a, 0x87, 0x00, 0xa6, 0x49, 0x83, 0x00, 0xa6, + 0x5a, 0x02, 0x18, 0x0e, 0x9b, 0x00, 0xc5, 0xd9, 0x91, 0x00, 0xa6, 0x28, + 0x83, 0x00, 0xb3, 0xab, 0x02, 0x18, 0x12, 0x91, 0x00, 0xb3, 0x9b, 0x02, + 0x18, 0x16, 0x8b, 0x00, 0xb3, 0x8a, 0x02, 0x18, 0x1a, 0x83, 0x00, 0xac, + 0x9b, 0x02, 0x18, 0x1e, 0x91, 0x00, 0xac, 0x8b, 0x02, 0x18, 0x29, 0x8b, + 0x00, 0xac, 0x7a, 0x02, 0x18, 0x2d, 0xc4, 0x4b, 0x20, 0x00, 0xab, 0xe1, + 0xc4, 0xe1, 0x1f, 0x00, 0xab, 0xda, 0x02, 0x18, 0x31, 0x8b, 0x00, 0xab, + 0x0b, 0x02, 0x18, 0x4a, 0x87, 0x00, 0xaa, 0xf8, 0x8b, 0x00, 0xa6, 0x18, + 0x46, 0x69, 0x75, 0xc2, 0x18, 0x4e, 0x83, 0x00, 0xa4, 0x8a, 0x02, 0x18, + 0xa6, 0x91, 0x00, 0xa4, 0xc3, 0x02, 0x18, 0xaa, 0x8b, 0x00, 0xa4, 0xa3, + 0x02, 0x18, 0xae, 0x87, 0x00, 0xa4, 0x91, 0x83, 0x00, 0xa4, 0xe2, 0x02, + 0x18, 0xb2, 0x91, 0x00, 0xa4, 0x70, 0x8b, 0x00, 0xa4, 0x60, 0x94, 0x00, + 0xc7, 0xa1, 0x8e, 0x00, 0xc7, 0x98, 0x99, 0x00, 0xb3, 0xfb, 0x02, 0x18, + 0xb6, 0x0d, 0xc2, 0x18, 0xc6, 0x10, 0xc2, 0x18, 0xd6, 0x83, 0x00, 0xad, + 0x99, 0x91, 0x00, 0xad, 0x91, 0x8b, 0x00, 0xad, 0x89, 0x87, 0x00, 0xad, + 0x81, 0x95, 0x00, 0xa8, 0x40, 0x91, 0x00, 0xac, 0x43, 0x02, 0x18, 0xe6, + 0xc2, 0x00, 0x28, 0x00, 0xc7, 0x41, 0x83, 0x00, 0xac, 0x49, 0x8b, 0x00, + 0xac, 0x39, 0x87, 0x00, 0xac, 0x30, 0x8a, 0x00, 0xab, 0x7b, 0x02, 0x18, + 0xea, 0x87, 0x00, 0xa3, 0x39, 0x8b, 0x00, 0xa3, 0x41, 0x91, 0x00, 0xa3, + 0x49, 0x83, 0x00, 0xa3, 0x50, 0x19, 0xc2, 0x19, 0x06, 0xc8, 0xbc, 0x52, + 0x00, 0xad, 0x73, 0x02, 0x19, 0x11, 0x83, 0x00, 0xab, 0x33, 0x02, 0x19, + 0x2a, 0x91, 0x00, 0xab, 0x23, 0x02, 0x19, 0x2e, 0x8b, 0x00, 0xab, 0x03, + 0x02, 0x19, 0x32, 0x87, 0x00, 0xaa, 0xf0, 0x9b, 0x00, 0xc5, 0xb9, 0x83, + 0x00, 0xa2, 0xb2, 0x02, 0x19, 0x36, 0x83, 0x00, 0xab, 0x99, 0x91, 0x00, + 0xab, 0x91, 0x8b, 0x00, 0xab, 0x89, 0x87, 0x00, 0xab, 0x80, 0x91, 0x00, + 0xa2, 0xeb, 0x02, 0x19, 0x3a, 0x8b, 0x00, 0xa2, 0xcb, 0x02, 0x19, 0x3e, + 0x87, 0x00, 0xa2, 0xb9, 0x83, 0x00, 0xa3, 0x0a, 0x02, 0x19, 0x42, 0x91, + 0x00, 0xa2, 0x88, 0x8b, 0x00, 0xa2, 0x78, 0x42, 0x00, 0x15, 0x42, 0x19, + 0x46, 0x9b, 0x00, 0xc5, 0x99, 0x83, 0x00, 0xa0, 0xc8, 0x91, 0x00, 0xa0, + 0xa2, 0x02, 0x19, 0x52, 0x8b, 0x00, 0xa0, 0x80, 0xc2, 0x00, 0x28, 0x00, + 0xc7, 0x01, 0x87, 0x00, 0xaa, 0x18, 0x83, 0x00, 0xc6, 0x9b, 0x02, 0x19, + 0x58, 0x91, 0x00, 0xc6, 0x8b, 0x02, 0x19, 0x5c, 0x8b, 0x00, 0xc6, 0x7b, + 0x02, 0x19, 0x60, 0xc2, 0x02, 0xe0, 0x00, 0xc6, 0x70, 0x9b, 0x00, 0xc6, + 0x29, 0x83, 0x00, 0xaa, 0x62, 0x02, 0x19, 0x64, 0x91, 0x00, 0xaa, 0x48, + 0x8b, 0x00, 0xaa, 0x38, 0x44, 0x10, 0x6a, 0xc2, 0x19, 0x68, 0x8b, 0x00, + 0xaa, 0xb0, 0x83, 0x00, 0xaa, 0xd2, 0x02, 0x19, 0x9a, 0x91, 0x00, 0xaa, + 0xc0, 0x95, 0x00, 0xc6, 0xd3, 0x02, 0x19, 0x9e, 0x90, 0x00, 0xc6, 0xcb, + 0x02, 0x19, 0xa2, 0x8f, 0x00, 0xc6, 0xc1, 0x85, 0x00, 0xc6, 0xb9, 0x8d, + 0x00, 0xc6, 0xb1, 0x96, 0x00, 0xc6, 0xa9, 0x92, 0x00, 0xc6, 0xa0, 0x9b, + 0x00, 0xc6, 0x21, 0x83, 0x00, 0xa9, 0x72, 0x02, 0x19, 0xa6, 0x9b, 0x00, + 0xc6, 0x19, 0x91, 0x00, 0xa9, 0x58, 0x83, 0x00, 0xa9, 0xcb, 0x02, 0x19, + 0xaa, 0x91, 0x00, 0xa9, 0xab, 0x02, 0x19, 0xae, 0x8b, 0x00, 0xa9, 0x8b, + 0x02, 0x19, 0xb2, 0x87, 0x00, 0xa9, 0x78, 0xc3, 0x4d, 0xc4, 0x00, 0xa9, + 0x61, 0xc3, 0x2b, 0xd4, 0x00, 0xa2, 0x91, 0x12, 0xc2, 0x19, 0xb6, 0xc3, + 0x90, 0xd8, 0x00, 0xa4, 0x79, 0xc2, 0x01, 0x24, 0x00, 0xa0, 0x39, 0x99, + 0x00, 0xa0, 0xe9, 0xc3, 0x15, 0xdb, 0x00, 0xa5, 0x49, 0xc3, 0x11, 0xf1, + 0x00, 0xa6, 0x31, 0xc3, 0x15, 0x31, 0x00, 0xa6, 0xc9, 0xc3, 0x19, 0xe1, + 0x00, 0xa7, 0x99, 0xc3, 0xd5, 0x5e, 0x00, 0xa3, 0x88, 0x8b, 0x00, 0xa9, + 0x48, 0x9b, 0x00, 0xc5, 0xe9, 0x83, 0x00, 0xa6, 0xda, 0x02, 0x19, 0xc2, + 0x83, 0x00, 0xad, 0x23, 0x02, 0x19, 0xc6, 0x91, 0x00, 0xad, 0x13, 0x02, + 0x19, 0xca, 0x8b, 0x00, 0xad, 0x02, 0x02, 0x19, 0xce, 0x8b, 0x00, 0xa6, + 0xb0, 0x91, 0x00, 0xa6, 0xc0, 0x87, 0x00, 0xa6, 0xe1, 0x8b, 0x00, 0xa6, + 0xf3, 0x02, 0x19, 0xd2, 0x91, 0x00, 0xa7, 0x13, 0x02, 0x19, 0xd6, 0x83, + 0x00, 0xa7, 0x32, 0x02, 0x19, 0xda, 0x9b, 0x00, 0xc5, 0xd1, 0x83, 0x00, + 0xa5, 0x5a, 0x02, 0x19, 0xde, 0x45, 0x30, 0xa1, 0x42, 0x19, 0xe2, 0x91, + 0x00, 0xa5, 0x42, 0x02, 0x19, 0xea, 0x8b, 0x00, 0xa5, 0x30, 0x87, 0x00, + 0xa5, 0x61, 0x8b, 0x00, 0xa5, 0x73, 0x02, 0x19, 0xf0, 0x91, 0x00, 0xa5, + 0x93, 0x02, 0x19, 0xf4, 0x83, 0x00, 0xa5, 0xb2, 0x02, 0x19, 0xf8, 0x83, + 0x00, 0xa3, 0xf3, 0x02, 0x19, 0xfc, 0x87, 0x00, 0xa3, 0xa1, 0x8b, 0x00, + 0xa3, 0xb3, 0x02, 0x1a, 0x04, 0x91, 0x00, 0xa3, 0xd2, 0x02, 0x1a, 0x08, + 0x9b, 0x00, 0xc5, 0xc1, 0x83, 0x00, 0xa3, 0x9a, 0x02, 0x1a, 0x0c, 0x8b, + 0x00, 0xa3, 0x70, 0x91, 0x00, 0xa3, 0x80, 0x91, 0x00, 0xa2, 0x03, 0x02, + 0x1a, 0x10, 0x83, 0x00, 0xa2, 0x23, 0x02, 0x1a, 0x18, 0x8b, 0x00, 0xa1, + 0xe3, 0x02, 0x1a, 0x1c, 0x87, 0x00, 0xa1, 0xd0, 0x9b, 0x00, 0xc5, 0xa9, + 0x83, 0x00, 0xa1, 0xca, 0x02, 0x1a, 0x20, 0x9b, 0x00, 0xc5, 0xa1, 0x91, + 0x00, 0xa1, 0xa0, 0x8b, 0x00, 0xa1, 0x90, 0x9b, 0x00, 0xc5, 0x91, 0x8b, + 0x00, 0xa0, 0x10, 0xc7, 0xc6, 0x4e, 0x00, 0xad, 0x78, 0x95, 0x00, 0xa8, + 0x31, 0x8f, 0x00, 0xa5, 0xf0, 0x8b, 0x00, 0xb3, 0x79, 0x83, 0x00, 0xac, + 0x22, 0x02, 0x1a, 0x24, 0x91, 0x00, 0xac, 0x10, 0x8b, 0x00, 0xac, 0x00, + 0x97, 0x08, 0x15, 0x22, 0x02, 0x1a, 0x28, 0x9f, 0x08, 0x16, 0x70, 0xa0, + 0x08, 0x16, 0x61, 0xa1, 0x08, 0x16, 0x69, 0x9f, 0x08, 0x16, 0x58, 0x9f, + 0x08, 0x15, 0xb0, 0x9f, 0x08, 0x15, 0x78, 0x9f, 0x08, 0x16, 0x18, 0xc2, + 0x00, 0x72, 0x08, 0x29, 0x81, 0xc2, 0x00, 0xbf, 0x08, 0x2a, 0x40, 0xc2, + 0x03, 0x4e, 0x08, 0x29, 0x91, 0xc4, 0xdf, 0x8f, 0x08, 0x2a, 0xc0, 0xc2, + 0x00, 0xfe, 0x08, 0x29, 0x99, 0xc3, 0x2e, 0x0f, 0x08, 0x2a, 0x09, 0x1c, + 0x42, 0x1a, 0x34, 0x84, 0x08, 0x29, 0xa1, 0xc2, 0x17, 0xb6, 0x08, 0x29, + 0xb0, 0xc3, 0x1a, 0xfe, 0x08, 0x29, 0xa9, 0x0a, 0x42, 0x1a, 0x40, 0xc2, + 0x02, 0x2c, 0x08, 0x29, 0xc1, 0xc3, 0x4b, 0x13, 0x08, 0x2a, 0x99, 0xc3, + 0xe5, 0xc0, 0x08, 0x2a, 0xe0, 0x0a, 0xc2, 0x1a, 0x4a, 0x03, 0xc2, 0x1a, + 0x5b, 0x42, 0x19, 0x2c, 0x42, 0x1a, 0x65, 0xc3, 0x02, 0x05, 0x08, 0x29, + 0xd1, 0xc3, 0xe5, 0x51, 0x08, 0x2b, 0x08, 0xc2, 0x01, 0x5f, 0x08, 0x29, + 0xe1, 0xc3, 0x2d, 0xfd, 0x08, 0x29, 0xf9, 0xc2, 0x01, 0x48, 0x08, 0x2a, + 0xf0, 0x0a, 0xc2, 0x1a, 0x6d, 0xc3, 0xe6, 0x44, 0x08, 0x2a, 0xd0, 0xc2, + 0x00, 0xd1, 0x08, 0x29, 0xf1, 0xc3, 0xb7, 0xb1, 0x08, 0x2a, 0x28, 0xc3, + 0xe5, 0xcf, 0x08, 0x2a, 0x19, 0xc3, 0x53, 0x85, 0x08, 0x2a, 0x88, 0xc2, + 0x00, 0xb1, 0x08, 0x2a, 0x21, 0xc2, 0x33, 0x52, 0x08, 0x2b, 0x18, 0x9b, + 0x08, 0x2a, 0x39, 0x94, 0x08, 0x2a, 0x68, 0xc2, 0x00, 0xc4, 0x08, 0x2a, + 0xb9, 0xc3, 0xe5, 0xc0, 0x08, 0x2b, 0x10, 0x9d, 0x17, 0xcf, 0x01, 0x88, + 0x17, 0xcf, 0x79, 0x87, 0x17, 0xcf, 0x71, 0x86, 0x17, 0xcf, 0x69, 0x85, + 0x17, 0xcf, 0x61, 0x84, 0x17, 0xcf, 0x59, 0x83, 0x17, 0xcf, 0x51, 0xa6, + 0x17, 0xcf, 0x49, 0xa5, 0x17, 0xcf, 0x41, 0xa4, 0x17, 0xcf, 0x39, 0xa3, + 0x17, 0xcf, 0x31, 0xa2, 0x17, 0xcf, 0x29, 0xa1, 0x17, 0xcf, 0x21, 0xa0, + 0x17, 0xcf, 0x19, 0x9f, 0x17, 0xcf, 0x11, 0x9e, 0x17, 0xcf, 0x08, 0x88, + 0x17, 0xce, 0xf9, 0x87, 0x17, 0xce, 0xf1, 0xa6, 0x17, 0xce, 0xc9, 0x86, + 0x17, 0xce, 0xe9, 0x85, 0x17, 0xce, 0xe1, 0x84, 0x17, 0xce, 0xd9, 0x83, + 0x17, 0xce, 0xd1, 0xa5, 0x17, 0xce, 0xc1, 0xa4, 0x17, 0xce, 0xb9, 0xa3, + 0x17, 0xce, 0xb1, 0xa2, 0x17, 0xce, 0xa9, 0xa1, 0x17, 0xce, 0xa1, 0xa0, + 0x17, 0xce, 0x99, 0x9f, 0x17, 0xce, 0x91, 0x9e, 0x17, 0xce, 0x89, 0x9d, + 0x17, 0xce, 0x80, 0x83, 0x17, 0xcd, 0x51, 0xa6, 0x17, 0xcd, 0x49, 0xa5, + 0x17, 0xcd, 0x41, 0xa4, 0x17, 0xcd, 0x39, 0xa3, 0x17, 0xcd, 0x31, 0xa2, + 0x17, 0xcd, 0x29, 0xa1, 0x17, 0xcd, 0x21, 0x86, 0x17, 0xcd, 0x69, 0x85, + 0x17, 0xcd, 0x61, 0x84, 0x17, 0xcd, 0x59, 0xa0, 0x17, 0xcd, 0x19, 0x9f, + 0x17, 0xcd, 0x11, 0x9e, 0x17, 0xcd, 0x09, 0x9d, 0x17, 0xcd, 0x01, 0x87, + 0x17, 0xcd, 0x71, 0x88, 0x17, 0xcd, 0x78, 0x88, 0x17, 0xcf, 0xf9, 0x87, + 0x17, 0xcf, 0xf1, 0x86, 0x17, 0xcf, 0xe9, 0x85, 0x17, 0xcf, 0xe1, 0x84, + 0x17, 0xcf, 0xd9, 0x83, 0x17, 0xcf, 0xd1, 0xa6, 0x17, 0xcf, 0xc9, 0xa5, + 0x17, 0xcf, 0xc1, 0xa4, 0x17, 0xcf, 0xb9, 0xa3, 0x17, 0xcf, 0xb1, 0xa2, + 0x17, 0xcf, 0xa9, 0xa1, 0x17, 0xcf, 0xa1, 0xa0, 0x17, 0xcf, 0x99, 0x9f, + 0x17, 0xcf, 0x91, 0x9e, 0x17, 0xcf, 0x89, 0x9d, 0x17, 0xcf, 0x80, 0x9d, + 0x17, 0xcb, 0x81, 0x88, 0x17, 0xcb, 0xf9, 0x87, 0x17, 0xcb, 0xf1, 0x86, + 0x17, 0xcb, 0xe9, 0x85, 0x17, 0xcb, 0xe1, 0x84, 0x17, 0xcb, 0xd9, 0x83, + 0x17, 0xcb, 0xd1, 0xa6, 0x17, 0xcb, 0xc9, 0xa5, 0x17, 0xcb, 0xc1, 0xa4, + 0x17, 0xcb, 0xb9, 0xa3, 0x17, 0xcb, 0xb1, 0xa2, 0x17, 0xcb, 0xa9, 0xa1, + 0x17, 0xcb, 0xa1, 0xa0, 0x17, 0xcb, 0x99, 0x9f, 0x17, 0xcb, 0x91, 0x9e, + 0x17, 0xcb, 0x88, 0x88, 0x17, 0xcb, 0x79, 0x87, 0x17, 0xcb, 0x71, 0x86, + 0x17, 0xcb, 0x69, 0x85, 0x17, 0xcb, 0x61, 0x84, 0x17, 0xcb, 0x59, 0x83, + 0x17, 0xcb, 0x51, 0xa6, 0x17, 0xcb, 0x49, 0xa5, 0x17, 0xcb, 0x41, 0xa4, + 0x17, 0xcb, 0x39, 0xa3, 0x17, 0xcb, 0x31, 0xa2, 0x17, 0xcb, 0x29, 0xa1, + 0x17, 0xcb, 0x21, 0x9d, 0x17, 0xcb, 0x01, 0x9e, 0x17, 0xcb, 0x09, 0x9f, + 0x17, 0xcb, 0x11, 0xa0, 0x17, 0xcb, 0x18, 0x9d, 0x17, 0xc9, 0x81, 0x88, + 0x17, 0xc9, 0xf9, 0x87, 0x17, 0xc9, 0xf1, 0x86, 0x17, 0xc9, 0xe9, 0x85, + 0x17, 0xc9, 0xe1, 0x84, 0x17, 0xc9, 0xd9, 0x83, 0x17, 0xc9, 0xd1, 0xa6, + 0x17, 0xc9, 0xc9, 0xa5, 0x17, 0xc9, 0xc1, 0xa4, 0x17, 0xc9, 0xb9, 0xa3, + 0x17, 0xc9, 0xb1, 0xa2, 0x17, 0xc9, 0xa9, 0xa1, 0x17, 0xc9, 0xa1, 0xa0, + 0x17, 0xc9, 0x99, 0x9f, 0x17, 0xc9, 0x91, 0x9e, 0x17, 0xc9, 0x88, 0x88, + 0x17, 0xc9, 0x79, 0x87, 0x17, 0xc9, 0x71, 0x86, 0x17, 0xc9, 0x69, 0x85, + 0x17, 0xc9, 0x61, 0x84, 0x17, 0xc9, 0x59, 0x83, 0x17, 0xc9, 0x51, 0xa6, + 0x17, 0xc9, 0x49, 0xa5, 0x17, 0xc9, 0x41, 0xa4, 0x17, 0xc9, 0x39, 0xa3, + 0x17, 0xc9, 0x31, 0xa2, 0x17, 0xc9, 0x29, 0xa1, 0x17, 0xc9, 0x21, 0xa0, + 0x17, 0xc9, 0x19, 0x9f, 0x17, 0xc9, 0x11, 0x9e, 0x17, 0xc9, 0x09, 0x9d, + 0x17, 0xc9, 0x00, 0x88, 0x17, 0xc8, 0xf9, 0x87, 0x17, 0xc8, 0xf1, 0x86, + 0x17, 0xc8, 0xe9, 0x85, 0x17, 0xc8, 0xe1, 0x84, 0x17, 0xc8, 0xd9, 0x83, + 0x17, 0xc8, 0xd1, 0xa6, 0x17, 0xc8, 0xc9, 0xa5, 0x17, 0xc8, 0xc1, 0xa4, + 0x17, 0xc8, 0xb9, 0xa3, 0x17, 0xc8, 0xb1, 0xa2, 0x17, 0xc8, 0xa9, 0xa1, + 0x17, 0xc8, 0xa1, 0xa0, 0x17, 0xc8, 0x99, 0x9f, 0x17, 0xc8, 0x91, 0x9e, + 0x17, 0xc8, 0x89, 0x9d, 0x17, 0xc8, 0x80, 0x88, 0x17, 0xc8, 0x79, 0x87, + 0x17, 0xc8, 0x71, 0x86, 0x17, 0xc8, 0x69, 0x85, 0x17, 0xc8, 0x61, 0x84, + 0x17, 0xc8, 0x59, 0x83, 0x17, 0xc8, 0x51, 0xa6, 0x17, 0xc8, 0x49, 0xa5, + 0x17, 0xc8, 0x41, 0xa4, 0x17, 0xc8, 0x39, 0xa3, 0x17, 0xc8, 0x31, 0xa2, + 0x17, 0xc8, 0x29, 0xa1, 0x17, 0xc8, 0x21, 0xa0, 0x17, 0xc8, 0x19, 0x9f, + 0x17, 0xc8, 0x11, 0x9e, 0x17, 0xc8, 0x09, 0x9d, 0x17, 0xc8, 0x00, 0x88, + 0x17, 0xce, 0x79, 0x87, 0x17, 0xce, 0x71, 0x86, 0x17, 0xce, 0x69, 0x85, + 0x17, 0xce, 0x61, 0x84, 0x17, 0xce, 0x59, 0x83, 0x17, 0xce, 0x51, 0xa6, + 0x17, 0xce, 0x49, 0xa5, 0x17, 0xce, 0x41, 0xa4, 0x17, 0xce, 0x39, 0xa3, + 0x17, 0xce, 0x31, 0xa2, 0x17, 0xce, 0x29, 0xa1, 0x17, 0xce, 0x21, 0xa0, + 0x17, 0xce, 0x19, 0x9f, 0x17, 0xce, 0x11, 0x9d, 0x17, 0xce, 0x01, 0x9e, + 0x17, 0xce, 0x08, 0x87, 0x17, 0xcd, 0xf1, 0x86, 0x17, 0xcd, 0xe9, 0x85, + 0x17, 0xcd, 0xe1, 0x84, 0x17, 0xcd, 0xd9, 0x83, 0x17, 0xcd, 0xd1, 0xa6, + 0x17, 0xcd, 0xc9, 0xa5, 0x17, 0xcd, 0xc1, 0xa4, 0x17, 0xcd, 0xb9, 0xa3, + 0x17, 0xcd, 0xb1, 0xa2, 0x17, 0xcd, 0xa9, 0xa1, 0x17, 0xcd, 0xa1, 0x9d, + 0x17, 0xcd, 0x81, 0x9e, 0x17, 0xcd, 0x89, 0x9f, 0x17, 0xcd, 0x91, 0xa0, + 0x17, 0xcd, 0x99, 0x88, 0x17, 0xcd, 0xf8, 0x88, 0x17, 0xcc, 0xf9, 0x87, + 0x17, 0xcc, 0xf1, 0x86, 0x17, 0xcc, 0xe9, 0x85, 0x17, 0xcc, 0xe1, 0x84, + 0x17, 0xcc, 0xd9, 0x83, 0x17, 0xcc, 0xd1, 0xa6, 0x17, 0xcc, 0xc9, 0xa5, + 0x17, 0xcc, 0xc1, 0xa4, 0x17, 0xcc, 0xb9, 0xa3, 0x17, 0xcc, 0xb1, 0xa2, + 0x17, 0xcc, 0xa9, 0xa1, 0x17, 0xcc, 0xa1, 0x9d, 0x17, 0xcc, 0x81, 0x9e, + 0x17, 0xcc, 0x89, 0x9f, 0x17, 0xcc, 0x91, 0xa0, 0x17, 0xcc, 0x98, 0x88, + 0x17, 0xcc, 0x79, 0x87, 0x17, 0xcc, 0x71, 0x86, 0x17, 0xcc, 0x69, 0x85, + 0x17, 0xcc, 0x61, 0x84, 0x17, 0xcc, 0x59, 0x83, 0x17, 0xcc, 0x51, 0xa6, + 0x17, 0xcc, 0x49, 0xa5, 0x17, 0xcc, 0x41, 0xa4, 0x17, 0xcc, 0x39, 0xa3, + 0x17, 0xcc, 0x31, 0xa2, 0x17, 0xcc, 0x29, 0xa1, 0x17, 0xcc, 0x21, 0xa0, + 0x17, 0xcc, 0x19, 0x9f, 0x17, 0xcc, 0x11, 0x9e, 0x17, 0xcc, 0x09, 0x9d, + 0x17, 0xcc, 0x00, 0xa5, 0x17, 0xca, 0xc1, 0xa4, 0x17, 0xca, 0xb9, 0xa3, + 0x17, 0xca, 0xb1, 0xa2, 0x17, 0xca, 0xa9, 0xa1, 0x17, 0xca, 0xa1, 0x9e, + 0x17, 0xca, 0x89, 0x9d, 0x17, 0xca, 0x81, 0x9f, 0x17, 0xca, 0x91, 0xa0, + 0x17, 0xca, 0x99, 0xa6, 0x17, 0xca, 0xc9, 0x83, 0x17, 0xca, 0xd1, 0x84, + 0x17, 0xca, 0xd9, 0x85, 0x17, 0xca, 0xe1, 0x86, 0x17, 0xca, 0xe9, 0x87, + 0x17, 0xca, 0xf1, 0x88, 0x17, 0xca, 0xf8, 0x88, 0x17, 0xca, 0x79, 0x87, + 0x17, 0xca, 0x71, 0x86, 0x17, 0xca, 0x69, 0x85, 0x17, 0xca, 0x61, 0x84, + 0x17, 0xca, 0x59, 0x83, 0x17, 0xca, 0x51, 0xa6, 0x17, 0xca, 0x49, 0xa5, + 0x17, 0xca, 0x41, 0xa4, 0x17, 0xca, 0x39, 0xa3, 0x17, 0xca, 0x31, 0xa2, + 0x17, 0xca, 0x29, 0xa1, 0x17, 0xca, 0x21, 0xa0, 0x17, 0xca, 0x19, 0x9f, + 0x17, 0xca, 0x11, 0x9e, 0x17, 0xca, 0x09, 0x9d, 0x17, 0xca, 0x00, 0xa2, + 0x17, 0xc3, 0xa9, 0x9f, 0x17, 0xc3, 0x91, 0x88, 0x17, 0xc3, 0xf9, 0x87, + 0x17, 0xc3, 0xf1, 0x86, 0x17, 0xc3, 0xe9, 0x85, 0x17, 0xc3, 0xe1, 0x84, + 0x17, 0xc3, 0xd9, 0x83, 0x17, 0xc3, 0xd1, 0xa6, 0x17, 0xc3, 0xc9, 0xa5, + 0x17, 0xc3, 0xc1, 0xa4, 0x17, 0xc3, 0xb9, 0xa3, 0x17, 0xc3, 0xb1, 0xa1, + 0x17, 0xc3, 0xa1, 0xa0, 0x17, 0xc3, 0x99, 0x9e, 0x17, 0xc3, 0x89, 0x9d, + 0x17, 0xc3, 0x80, 0x83, 0x17, 0xc3, 0x51, 0xa2, 0x17, 0xc3, 0x29, 0xa1, + 0x17, 0xc3, 0x21, 0xa0, 0x17, 0xc3, 0x19, 0x9f, 0x17, 0xc3, 0x11, 0x9e, + 0x17, 0xc3, 0x09, 0x88, 0x17, 0xc3, 0x79, 0x87, 0x17, 0xc3, 0x71, 0x86, + 0x17, 0xc3, 0x69, 0x85, 0x17, 0xc3, 0x61, 0x84, 0x17, 0xc3, 0x59, 0xa6, + 0x17, 0xc3, 0x49, 0xa5, 0x17, 0xc3, 0x41, 0xa4, 0x17, 0xc3, 0x39, 0xa3, + 0x17, 0xc3, 0x31, 0x9d, 0x17, 0xc3, 0x00, 0xa6, 0x17, 0xc2, 0xc9, 0xa5, + 0x17, 0xc2, 0xc1, 0xa4, 0x17, 0xc2, 0xb9, 0xa3, 0x17, 0xc2, 0xb1, 0xa2, + 0x17, 0xc2, 0xa9, 0xa1, 0x17, 0xc2, 0xa1, 0xa0, 0x17, 0xc2, 0x99, 0x9f, + 0x17, 0xc2, 0x91, 0x9e, 0x17, 0xc2, 0x89, 0x9d, 0x17, 0xc2, 0x81, 0x85, + 0x17, 0xc2, 0xe1, 0x84, 0x17, 0xc2, 0xd9, 0x83, 0x17, 0xc2, 0xd1, 0x86, + 0x17, 0xc2, 0xe9, 0x87, 0x17, 0xc2, 0xf1, 0x88, 0x17, 0xc2, 0xf8, 0x88, + 0x17, 0xc2, 0x79, 0x87, 0x17, 0xc2, 0x71, 0xa6, 0x17, 0xc2, 0x49, 0xa5, + 0x17, 0xc2, 0x41, 0xa4, 0x17, 0xc2, 0x39, 0xa3, 0x17, 0xc2, 0x31, 0xa2, + 0x17, 0xc2, 0x29, 0xa1, 0x17, 0xc2, 0x21, 0xa0, 0x17, 0xc2, 0x19, 0x86, + 0x17, 0xc2, 0x69, 0x85, 0x17, 0xc2, 0x61, 0x84, 0x17, 0xc2, 0x59, 0x83, + 0x17, 0xc2, 0x51, 0x9f, 0x17, 0xc2, 0x11, 0x9e, 0x17, 0xc2, 0x09, 0x9d, + 0x17, 0xc2, 0x00, 0xa5, 0x17, 0xc1, 0x41, 0xa4, 0x17, 0xc1, 0x39, 0xa3, + 0x17, 0xc1, 0x31, 0xa2, 0x17, 0xc1, 0x29, 0xa1, 0x17, 0xc1, 0x21, 0x88, + 0x17, 0xc1, 0x79, 0x87, 0x17, 0xc1, 0x71, 0x86, 0x17, 0xc1, 0x69, 0x85, + 0x17, 0xc1, 0x61, 0x84, 0x17, 0xc1, 0x59, 0x83, 0x17, 0xc1, 0x51, 0xa6, + 0x17, 0xc1, 0x49, 0xa0, 0x17, 0xc1, 0x19, 0x9f, 0x17, 0xc1, 0x11, 0x9e, + 0x17, 0xc1, 0x09, 0x9d, 0x17, 0xc1, 0x00, 0xa5, 0x17, 0xc0, 0x41, 0xa4, + 0x17, 0xc0, 0x39, 0x88, 0x17, 0xc0, 0x79, 0x87, 0x17, 0xc0, 0x71, 0x86, + 0x17, 0xc0, 0x69, 0x85, 0x17, 0xc0, 0x61, 0x84, 0x17, 0xc0, 0x59, 0x83, + 0x17, 0xc0, 0x51, 0xa6, 0x17, 0xc0, 0x49, 0xa3, 0x17, 0xc0, 0x31, 0xa2, + 0x17, 0xc0, 0x29, 0xa1, 0x17, 0xc0, 0x21, 0x9d, 0x17, 0xc0, 0x01, 0x9e, + 0x17, 0xc0, 0x09, 0x9f, 0x17, 0xc0, 0x11, 0xa0, 0x17, 0xc0, 0x18, 0x88, + 0x17, 0xc7, 0xf9, 0x87, 0x17, 0xc7, 0xf1, 0x86, 0x17, 0xc7, 0xe9, 0x85, + 0x17, 0xc7, 0xe1, 0x84, 0x17, 0xc7, 0xd9, 0x83, 0x17, 0xc7, 0xd1, 0xa6, + 0x17, 0xc7, 0xc9, 0xa5, 0x17, 0xc7, 0xc1, 0xa4, 0x17, 0xc7, 0xb9, 0xa3, + 0x17, 0xc7, 0xb1, 0xa2, 0x17, 0xc7, 0xa9, 0xa1, 0x17, 0xc7, 0xa1, 0xa0, + 0x17, 0xc7, 0x99, 0x9f, 0x17, 0xc7, 0x91, 0x9e, 0x17, 0xc7, 0x89, 0x9d, + 0x17, 0xc7, 0x80, 0x9d, 0x17, 0xc5, 0x81, 0x88, 0x17, 0xc5, 0xf9, 0x87, + 0x17, 0xc5, 0xf1, 0x86, 0x17, 0xc5, 0xe9, 0x85, 0x17, 0xc5, 0xe1, 0x84, + 0x17, 0xc5, 0xd9, 0x83, 0x17, 0xc5, 0xd1, 0xa6, 0x17, 0xc5, 0xc9, 0xa5, + 0x17, 0xc5, 0xc1, 0xa4, 0x17, 0xc5, 0xb9, 0xa3, 0x17, 0xc5, 0xb1, 0xa2, + 0x17, 0xc5, 0xa9, 0xa1, 0x17, 0xc5, 0xa1, 0xa0, 0x17, 0xc5, 0x99, 0x9f, + 0x17, 0xc5, 0x91, 0x9e, 0x17, 0xc5, 0x88, 0x88, 0x17, 0xc5, 0x79, 0x87, + 0x17, 0xc5, 0x71, 0x86, 0x17, 0xc5, 0x69, 0x85, 0x17, 0xc5, 0x61, 0x84, + 0x17, 0xc5, 0x59, 0x83, 0x17, 0xc5, 0x51, 0xa6, 0x17, 0xc5, 0x49, 0xa5, + 0x17, 0xc5, 0x41, 0xa4, 0x17, 0xc5, 0x39, 0xa3, 0x17, 0xc5, 0x31, 0xa2, + 0x17, 0xc5, 0x29, 0xa1, 0x17, 0xc5, 0x21, 0xa0, 0x17, 0xc5, 0x19, 0x9f, + 0x17, 0xc5, 0x11, 0x9e, 0x17, 0xc5, 0x09, 0x9d, 0x17, 0xc5, 0x00, 0x88, + 0x17, 0xc4, 0xf9, 0x87, 0x17, 0xc4, 0xf1, 0x86, 0x17, 0xc4, 0xe9, 0x85, + 0x17, 0xc4, 0xe1, 0x84, 0x17, 0xc4, 0xd9, 0x83, 0x17, 0xc4, 0xd1, 0xa6, + 0x17, 0xc4, 0xc9, 0xa5, 0x17, 0xc4, 0xc1, 0xa4, 0x17, 0xc4, 0xb9, 0xa3, + 0x17, 0xc4, 0xb1, 0xa2, 0x17, 0xc4, 0xa9, 0xa1, 0x17, 0xc4, 0xa1, 0xa0, + 0x17, 0xc4, 0x99, 0x9f, 0x17, 0xc4, 0x91, 0x9e, 0x17, 0xc4, 0x89, 0x9d, + 0x17, 0xc4, 0x80, 0x88, 0x17, 0xc4, 0x79, 0x87, 0x17, 0xc4, 0x71, 0x86, + 0x17, 0xc4, 0x69, 0x85, 0x17, 0xc4, 0x61, 0x84, 0x17, 0xc4, 0x59, 0x83, + 0x17, 0xc4, 0x51, 0xa6, 0x17, 0xc4, 0x49, 0xa5, 0x17, 0xc4, 0x41, 0xa4, + 0x17, 0xc4, 0x39, 0xa3, 0x17, 0xc4, 0x31, 0xa2, 0x17, 0xc4, 0x29, 0xa1, + 0x17, 0xc4, 0x21, 0xa0, 0x17, 0xc4, 0x19, 0x9f, 0x17, 0xc4, 0x11, 0x9e, + 0x17, 0xc4, 0x09, 0x9d, 0x17, 0xc4, 0x00, 0x88, 0x17, 0xc7, 0x79, 0x87, + 0x17, 0xc7, 0x71, 0x86, 0x17, 0xc7, 0x69, 0x85, 0x17, 0xc7, 0x61, 0x84, + 0x17, 0xc7, 0x59, 0x83, 0x17, 0xc7, 0x51, 0xa6, 0x17, 0xc7, 0x49, 0xa5, + 0x17, 0xc7, 0x41, 0xa4, 0x17, 0xc7, 0x39, 0xa3, 0x17, 0xc7, 0x31, 0xa2, + 0x17, 0xc7, 0x29, 0xa1, 0x17, 0xc7, 0x21, 0x9d, 0x17, 0xc7, 0x01, 0x9e, + 0x17, 0xc7, 0x09, 0x9f, 0x17, 0xc7, 0x11, 0xa0, 0x17, 0xc7, 0x18, 0xa6, + 0x17, 0xc6, 0xc9, 0xa5, 0x17, 0xc6, 0xc1, 0xa4, 0x17, 0xc6, 0xb9, 0xa3, + 0x17, 0xc6, 0xb1, 0xa2, 0x17, 0xc6, 0xa9, 0xa1, 0x17, 0xc6, 0xa1, 0xa0, + 0x17, 0xc6, 0x99, 0x9f, 0x17, 0xc6, 0x91, 0x9e, 0x17, 0xc6, 0x89, 0x9d, + 0x17, 0xc6, 0x81, 0x83, 0x17, 0xc6, 0xd1, 0x84, 0x17, 0xc6, 0xd9, 0x85, + 0x17, 0xc6, 0xe1, 0x86, 0x17, 0xc6, 0xe9, 0x87, 0x17, 0xc6, 0xf1, 0x88, + 0x17, 0xc6, 0xf8, 0x88, 0x17, 0xc6, 0x79, 0x87, 0x17, 0xc6, 0x71, 0x86, + 0x17, 0xc6, 0x69, 0x85, 0x17, 0xc6, 0x61, 0x84, 0x17, 0xc6, 0x59, 0x83, + 0x17, 0xc6, 0x51, 0xa6, 0x17, 0xc6, 0x49, 0xa5, 0x17, 0xc6, 0x41, 0xa4, + 0x17, 0xc6, 0x39, 0xa3, 0x17, 0xc6, 0x31, 0xa2, 0x17, 0xc6, 0x29, 0xa1, + 0x17, 0xc6, 0x21, 0xa0, 0x17, 0xc6, 0x19, 0x9f, 0x17, 0xc6, 0x11, 0x9e, + 0x17, 0xc6, 0x09, 0x9d, 0x17, 0xc6, 0x00, 0x88, 0x17, 0xc1, 0xf9, 0x87, + 0x17, 0xc1, 0xf1, 0x86, 0x17, 0xc1, 0xe9, 0x85, 0x17, 0xc1, 0xe1, 0x84, + 0x17, 0xc1, 0xd9, 0x83, 0x17, 0xc1, 0xd1, 0xa6, 0x17, 0xc1, 0xc9, 0xa5, + 0x17, 0xc1, 0xc1, 0xa4, 0x17, 0xc1, 0xb9, 0xa3, 0x17, 0xc1, 0xb1, 0xa2, + 0x17, 0xc1, 0xa9, 0xa1, 0x17, 0xc1, 0xa1, 0xa0, 0x17, 0xc1, 0x99, 0x9f, + 0x17, 0xc1, 0x91, 0x9e, 0x17, 0xc1, 0x89, 0x9d, 0x17, 0xc1, 0x80, 0x88, + 0x17, 0xc0, 0xf9, 0x87, 0x17, 0xc0, 0xf1, 0x86, 0x17, 0xc0, 0xe9, 0x85, + 0x17, 0xc0, 0xe1, 0x84, 0x17, 0xc0, 0xd9, 0x83, 0x17, 0xc0, 0xd1, 0xa6, + 0x17, 0xc0, 0xc9, 0xa5, 0x17, 0xc0, 0xc1, 0xa4, 0x17, 0xc0, 0xb9, 0xa3, + 0x17, 0xc0, 0xb1, 0xa2, 0x17, 0xc0, 0xa9, 0xa1, 0x17, 0xc0, 0xa1, 0xa0, + 0x17, 0xc0, 0x99, 0x9f, 0x17, 0xc0, 0x91, 0x9e, 0x17, 0xc0, 0x89, 0x9d, + 0x17, 0xc0, 0x80, 0x86, 0x17, 0xd0, 0xe9, 0x85, 0x17, 0xd0, 0xe1, 0x84, + 0x17, 0xd0, 0xd9, 0x83, 0x17, 0xd0, 0xd1, 0xa6, 0x17, 0xd0, 0xc9, 0xa5, + 0x17, 0xd0, 0xc1, 0xa4, 0x17, 0xd0, 0xb9, 0xa3, 0x17, 0xd0, 0xb1, 0xa2, + 0x17, 0xd0, 0xa9, 0xa1, 0x17, 0xd0, 0xa1, 0xa0, 0x17, 0xd0, 0x99, 0x9f, + 0x17, 0xd0, 0x91, 0x9e, 0x17, 0xd0, 0x89, 0x9d, 0x17, 0xd0, 0x80, 0x88, + 0x17, 0xd0, 0x79, 0x87, 0x17, 0xd0, 0x71, 0x86, 0x17, 0xd0, 0x69, 0x85, + 0x17, 0xd0, 0x61, 0x84, 0x17, 0xd0, 0x59, 0x83, 0x17, 0xd0, 0x51, 0xa6, + 0x17, 0xd0, 0x49, 0xa5, 0x17, 0xd0, 0x41, 0xa4, 0x17, 0xd0, 0x39, 0xa3, + 0x17, 0xd0, 0x31, 0xa2, 0x17, 0xd0, 0x29, 0xa1, 0x17, 0xd0, 0x21, 0xa0, + 0x17, 0xd0, 0x19, 0x9f, 0x17, 0xd0, 0x11, 0x9e, 0x17, 0xd0, 0x09, 0x9d, + 0x17, 0xd0, 0x00, 0xa6, 0x07, 0xd6, 0xc9, 0xa5, 0x07, 0xd6, 0xc1, 0xa4, + 0x07, 0xd6, 0xb9, 0xa3, 0x07, 0xd6, 0xb1, 0xa2, 0x07, 0xd6, 0xa9, 0xa1, + 0x07, 0xd6, 0xa1, 0xa0, 0x07, 0xd6, 0x99, 0x9f, 0x07, 0xd6, 0x91, 0x9e, + 0x07, 0xd6, 0x89, 0x9d, 0x07, 0xd6, 0x80, 0x88, 0x07, 0xd6, 0x79, 0x87, + 0x07, 0xd6, 0x71, 0x86, 0x07, 0xd6, 0x69, 0x85, 0x07, 0xd6, 0x61, 0x84, + 0x07, 0xd6, 0x59, 0x83, 0x07, 0xd6, 0x51, 0xa6, 0x07, 0xd6, 0x49, 0xa5, + 0x07, 0xd6, 0x41, 0xa4, 0x07, 0xd6, 0x39, 0xa3, 0x07, 0xd6, 0x31, 0xa2, + 0x07, 0xd6, 0x29, 0xa1, 0x07, 0xd6, 0x21, 0xa0, 0x07, 0xd6, 0x19, 0x9f, + 0x07, 0xd6, 0x11, 0x9e, 0x07, 0xd6, 0x09, 0x9d, 0x07, 0xd6, 0x00, 0x88, + 0x07, 0xd5, 0xf9, 0x87, 0x07, 0xd5, 0xf1, 0x86, 0x07, 0xd5, 0xe9, 0x85, + 0x07, 0xd5, 0xe1, 0x84, 0x07, 0xd5, 0xd9, 0x83, 0x07, 0xd5, 0xd1, 0xa6, + 0x07, 0xd5, 0xc9, 0xa5, 0x07, 0xd5, 0xc1, 0xa4, 0x07, 0xd5, 0xb9, 0xa3, + 0x07, 0xd5, 0xb1, 0xa2, 0x07, 0xd5, 0xa9, 0xa1, 0x07, 0xd5, 0xa1, 0xa0, + 0x07, 0xd5, 0x99, 0x9f, 0x07, 0xd5, 0x91, 0x9e, 0x07, 0xd5, 0x89, 0x9d, + 0x07, 0xd5, 0x80, 0x88, 0x07, 0xd5, 0x79, 0x87, 0x07, 0xd5, 0x71, 0x86, + 0x07, 0xd5, 0x69, 0x85, 0x07, 0xd5, 0x61, 0x84, 0x07, 0xd5, 0x59, 0x83, + 0x07, 0xd5, 0x51, 0xa6, 0x07, 0xd5, 0x49, 0xa5, 0x07, 0xd5, 0x41, 0xa4, + 0x07, 0xd5, 0x39, 0xa3, 0x07, 0xd5, 0x31, 0xa2, 0x07, 0xd5, 0x29, 0xa1, + 0x07, 0xd5, 0x21, 0xa0, 0x07, 0xd5, 0x19, 0x9f, 0x07, 0xd5, 0x11, 0x9e, + 0x07, 0xd5, 0x09, 0x9d, 0x07, 0xd5, 0x00, 0x88, 0x07, 0xd4, 0xf9, 0x87, + 0x07, 0xd4, 0xf1, 0x86, 0x07, 0xd4, 0xe9, 0x85, 0x07, 0xd4, 0xe1, 0x84, + 0x07, 0xd4, 0xd9, 0x83, 0x07, 0xd4, 0xd1, 0xa6, 0x07, 0xd4, 0xc9, 0xa5, + 0x07, 0xd4, 0xc1, 0xa4, 0x07, 0xd4, 0xb9, 0xa3, 0x07, 0xd4, 0xb1, 0xa2, + 0x07, 0xd4, 0xa9, 0xa1, 0x07, 0xd4, 0xa1, 0xa0, 0x07, 0xd4, 0x99, 0x9f, + 0x07, 0xd4, 0x91, 0x9e, 0x07, 0xd4, 0x89, 0x9d, 0x07, 0xd4, 0x80, 0x88, + 0x07, 0xd4, 0x79, 0x87, 0x07, 0xd4, 0x71, 0x86, 0x07, 0xd4, 0x69, 0x85, + 0x07, 0xd4, 0x61, 0x84, 0x07, 0xd4, 0x59, 0x83, 0x07, 0xd4, 0x51, 0xa6, + 0x07, 0xd4, 0x49, 0xa5, 0x07, 0xd4, 0x41, 0xa4, 0x07, 0xd4, 0x39, 0xa3, + 0x07, 0xd4, 0x31, 0xa2, 0x07, 0xd4, 0x29, 0xa1, 0x07, 0xd4, 0x21, 0xa0, + 0x07, 0xd4, 0x19, 0x9f, 0x07, 0xd4, 0x11, 0x9e, 0x07, 0xd4, 0x09, 0x9d, + 0x07, 0xd4, 0x00, 0x86, 0x07, 0xd3, 0xe9, 0x85, 0x07, 0xd3, 0xe1, 0x84, + 0x07, 0xd3, 0xd9, 0x83, 0x07, 0xd3, 0xd1, 0xa6, 0x07, 0xd3, 0xc9, 0xa5, + 0x07, 0xd3, 0xc1, 0xa4, 0x07, 0xd3, 0xb9, 0xa3, 0x07, 0xd3, 0xb1, 0xa2, + 0x07, 0xd3, 0xa9, 0xa1, 0x07, 0xd3, 0xa1, 0xa0, 0x07, 0xd3, 0x99, 0x9f, + 0x07, 0xd3, 0x91, 0x9e, 0x07, 0xd3, 0x89, 0x9d, 0x07, 0xd3, 0x81, 0x87, + 0x07, 0xd3, 0xf1, 0x88, 0x07, 0xd3, 0xf8, 0x86, 0x07, 0xd3, 0x69, 0x85, + 0x07, 0xd3, 0x61, 0x84, 0x07, 0xd3, 0x59, 0x83, 0x07, 0xd3, 0x51, 0xa6, + 0x07, 0xd3, 0x49, 0xa5, 0x07, 0xd3, 0x41, 0xa4, 0x07, 0xd3, 0x39, 0xa3, + 0x07, 0xd3, 0x31, 0xa2, 0x07, 0xd3, 0x29, 0xa1, 0x07, 0xd3, 0x21, 0xa0, + 0x07, 0xd3, 0x19, 0x9f, 0x07, 0xd3, 0x11, 0x9e, 0x07, 0xd3, 0x09, 0x9d, + 0x07, 0xd3, 0x00, 0x88, 0x07, 0xd2, 0xf9, 0x87, 0x07, 0xd2, 0xf1, 0x86, + 0x07, 0xd2, 0xe9, 0x85, 0x07, 0xd2, 0xe1, 0x84, 0x07, 0xd2, 0xd9, 0x83, + 0x07, 0xd2, 0xd1, 0xa6, 0x07, 0xd2, 0xc9, 0xa5, 0x07, 0xd2, 0xc1, 0xa4, + 0x07, 0xd2, 0xb9, 0xa3, 0x07, 0xd2, 0xb1, 0xa2, 0x07, 0xd2, 0xa9, 0xa1, + 0x07, 0xd2, 0xa1, 0xa0, 0x07, 0xd2, 0x99, 0x9f, 0x07, 0xd2, 0x91, 0x9e, + 0x07, 0xd2, 0x89, 0x9d, 0x07, 0xd2, 0x80, 0x88, 0x07, 0xd2, 0x79, 0x87, + 0x07, 0xd2, 0x71, 0x86, 0x07, 0xd2, 0x69, 0x85, 0x07, 0xd2, 0x61, 0x84, + 0x07, 0xd2, 0x59, 0x83, 0x07, 0xd2, 0x51, 0xa6, 0x07, 0xd2, 0x49, 0xa5, + 0x07, 0xd2, 0x41, 0xa4, 0x07, 0xd2, 0x39, 0xa3, 0x07, 0xd2, 0x31, 0xa2, + 0x07, 0xd2, 0x29, 0xa1, 0x07, 0xd2, 0x21, 0xa0, 0x07, 0xd2, 0x19, 0x9f, + 0x07, 0xd2, 0x11, 0x9d, 0x07, 0xd2, 0x01, 0x9e, 0x07, 0xd2, 0x08, 0x88, + 0x07, 0xd1, 0xf9, 0x87, 0x07, 0xd1, 0xf1, 0x86, 0x07, 0xd1, 0xe9, 0x85, + 0x07, 0xd1, 0xe1, 0x84, 0x07, 0xd1, 0xd9, 0x83, 0x07, 0xd1, 0xd1, 0xa6, + 0x07, 0xd1, 0xc9, 0xa5, 0x07, 0xd1, 0xc1, 0xa4, 0x07, 0xd1, 0xb9, 0xa3, + 0x07, 0xd1, 0xb1, 0xa2, 0x07, 0xd1, 0xa9, 0xa1, 0x07, 0xd1, 0xa1, 0xa0, + 0x07, 0xd1, 0x99, 0x9f, 0x07, 0xd1, 0x91, 0x9e, 0x07, 0xd1, 0x89, 0x9d, + 0x07, 0xd1, 0x80, 0x88, 0x07, 0xd1, 0x79, 0x87, 0x07, 0xd1, 0x71, 0x86, + 0x07, 0xd1, 0x69, 0x85, 0x07, 0xd1, 0x61, 0x84, 0x07, 0xd1, 0x59, 0x83, + 0x07, 0xd1, 0x51, 0xa6, 0x07, 0xd1, 0x49, 0xa5, 0x07, 0xd1, 0x41, 0xa4, + 0x07, 0xd1, 0x39, 0xa3, 0x07, 0xd1, 0x31, 0xa2, 0x07, 0xd1, 0x29, 0xa1, + 0x07, 0xd1, 0x21, 0xa0, 0x07, 0xd1, 0x19, 0x9f, 0x07, 0xd1, 0x11, 0x9e, + 0x07, 0xd1, 0x09, 0x9d, 0x07, 0xd1, 0x00, 0x88, 0x07, 0xd0, 0xf9, 0x87, + 0x07, 0xd0, 0xf1, 0x86, 0x07, 0xd0, 0xe9, 0x85, 0x07, 0xd0, 0xe1, 0x84, + 0x07, 0xd0, 0xd9, 0x83, 0x07, 0xd0, 0xd1, 0xa6, 0x07, 0xd0, 0xc9, 0xa5, + 0x07, 0xd0, 0xc1, 0xa4, 0x07, 0xd0, 0xb9, 0xa3, 0x07, 0xd0, 0xb1, 0xa2, + 0x07, 0xd0, 0xa9, 0xa1, 0x07, 0xd0, 0xa1, 0xa0, 0x07, 0xd0, 0x99, 0x9f, + 0x07, 0xd0, 0x91, 0x9e, 0x07, 0xd0, 0x89, 0x9d, 0x07, 0xd0, 0x80, 0x88, + 0x07, 0xd0, 0x79, 0x87, 0x07, 0xd0, 0x71, 0x86, 0x07, 0xd0, 0x69, 0x85, + 0x07, 0xd0, 0x61, 0x84, 0x07, 0xd0, 0x59, 0x83, 0x07, 0xd0, 0x51, 0xa6, + 0x07, 0xd0, 0x49, 0xa5, 0x07, 0xd0, 0x41, 0xa4, 0x07, 0xd0, 0x39, 0xa3, + 0x07, 0xd0, 0x31, 0xa2, 0x07, 0xd0, 0x29, 0xa1, 0x07, 0xd0, 0x21, 0xa0, + 0x07, 0xd0, 0x19, 0x9f, 0x07, 0xd0, 0x11, 0x9e, 0x07, 0xd0, 0x09, 0x9d, + 0x07, 0xd0, 0x00, 0x88, 0x07, 0xcf, 0xf9, 0x87, 0x07, 0xcf, 0xf1, 0x86, + 0x07, 0xcf, 0xe9, 0x85, 0x07, 0xcf, 0xe1, 0x84, 0x07, 0xcf, 0xd9, 0x83, + 0x07, 0xcf, 0xd1, 0xa6, 0x07, 0xcf, 0xc9, 0xa5, 0x07, 0xcf, 0xc1, 0xa4, + 0x07, 0xcf, 0xb9, 0xa3, 0x07, 0xcf, 0xb1, 0xa2, 0x07, 0xcf, 0xa9, 0xa1, + 0x07, 0xcf, 0xa1, 0xa0, 0x07, 0xcf, 0x99, 0x9f, 0x07, 0xcf, 0x91, 0x9e, + 0x07, 0xcf, 0x89, 0x9d, 0x07, 0xcf, 0x80, 0x88, 0x07, 0xcf, 0x79, 0x87, + 0x07, 0xcf, 0x71, 0x86, 0x07, 0xcf, 0x69, 0x85, 0x07, 0xcf, 0x61, 0x84, + 0x07, 0xcf, 0x59, 0x83, 0x07, 0xcf, 0x51, 0xa6, 0x07, 0xcf, 0x49, 0xa5, + 0x07, 0xcf, 0x41, 0xa4, 0x07, 0xcf, 0x39, 0xa3, 0x07, 0xcf, 0x31, 0xa2, + 0x07, 0xcf, 0x29, 0xa1, 0x07, 0xcf, 0x21, 0xa0, 0x07, 0xcf, 0x19, 0x9f, + 0x07, 0xcf, 0x11, 0x9e, 0x07, 0xcf, 0x09, 0x9d, 0x07, 0xcf, 0x00, 0x88, + 0x07, 0xce, 0xf9, 0x87, 0x07, 0xce, 0xf1, 0x86, 0x07, 0xce, 0xe9, 0x85, + 0x07, 0xce, 0xe1, 0x84, 0x07, 0xce, 0xd9, 0x83, 0x07, 0xce, 0xd1, 0xa6, + 0x07, 0xce, 0xc9, 0xa5, 0x07, 0xce, 0xc1, 0xa4, 0x07, 0xce, 0xb9, 0xa3, + 0x07, 0xce, 0xb1, 0xa2, 0x07, 0xce, 0xa9, 0xa1, 0x07, 0xce, 0xa1, 0xa0, + 0x07, 0xce, 0x99, 0x9f, 0x07, 0xce, 0x91, 0x9e, 0x07, 0xce, 0x89, 0x9d, + 0x07, 0xce, 0x80, 0x88, 0x07, 0xce, 0x79, 0x87, 0x07, 0xce, 0x71, 0x86, + 0x07, 0xce, 0x69, 0x85, 0x07, 0xce, 0x61, 0x84, 0x07, 0xce, 0x59, 0x83, + 0x07, 0xce, 0x51, 0xa6, 0x07, 0xce, 0x49, 0xa5, 0x07, 0xce, 0x41, 0xa4, + 0x07, 0xce, 0x39, 0xa3, 0x07, 0xce, 0x31, 0xa2, 0x07, 0xce, 0x29, 0xa1, + 0x07, 0xce, 0x21, 0xa0, 0x07, 0xce, 0x19, 0x9f, 0x07, 0xce, 0x11, 0x9e, + 0x07, 0xce, 0x09, 0x9d, 0x07, 0xce, 0x00, 0x88, 0x07, 0xcd, 0xf9, 0x87, + 0x07, 0xcd, 0xf1, 0x86, 0x07, 0xcd, 0xe9, 0x85, 0x07, 0xcd, 0xe1, 0x84, + 0x07, 0xcd, 0xd9, 0x83, 0x07, 0xcd, 0xd1, 0xa6, 0x07, 0xcd, 0xc9, 0xa5, + 0x07, 0xcd, 0xc1, 0xa4, 0x07, 0xcd, 0xb9, 0xa3, 0x07, 0xcd, 0xb1, 0xa2, + 0x07, 0xcd, 0xa9, 0xa1, 0x07, 0xcd, 0xa1, 0xa0, 0x07, 0xcd, 0x99, 0x9f, + 0x07, 0xcd, 0x91, 0x9e, 0x07, 0xcd, 0x89, 0x9d, 0x07, 0xcd, 0x80, 0x88, + 0x07, 0xcd, 0x79, 0x87, 0x07, 0xcd, 0x71, 0x86, 0x07, 0xcd, 0x69, 0x85, + 0x07, 0xcd, 0x61, 0x84, 0x07, 0xcd, 0x59, 0x83, 0x07, 0xcd, 0x51, 0xa6, + 0x07, 0xcd, 0x49, 0xa5, 0x07, 0xcd, 0x41, 0xa4, 0x07, 0xcd, 0x39, 0xa3, + 0x07, 0xcd, 0x31, 0xa2, 0x07, 0xcd, 0x29, 0xa1, 0x07, 0xcd, 0x21, 0xa0, + 0x07, 0xcd, 0x19, 0x9f, 0x07, 0xcd, 0x11, 0x9e, 0x07, 0xcd, 0x09, 0x9d, + 0x07, 0xcd, 0x00, 0x88, 0x07, 0xcc, 0xf9, 0x87, 0x07, 0xcc, 0xf1, 0x86, + 0x07, 0xcc, 0xe9, 0x85, 0x07, 0xcc, 0xe1, 0x84, 0x07, 0xcc, 0xd9, 0x83, + 0x07, 0xcc, 0xd1, 0xa6, 0x07, 0xcc, 0xc9, 0xa5, 0x07, 0xcc, 0xc1, 0xa4, + 0x07, 0xcc, 0xb9, 0xa3, 0x07, 0xcc, 0xb1, 0xa2, 0x07, 0xcc, 0xa9, 0xa1, + 0x07, 0xcc, 0xa1, 0xa0, 0x07, 0xcc, 0x99, 0x9f, 0x07, 0xcc, 0x91, 0x9e, + 0x07, 0xcc, 0x89, 0x9d, 0x07, 0xcc, 0x80, 0x88, 0x07, 0xcc, 0x79, 0x87, + 0x07, 0xcc, 0x71, 0x86, 0x07, 0xcc, 0x69, 0x85, 0x07, 0xcc, 0x61, 0x84, + 0x07, 0xcc, 0x59, 0x83, 0x07, 0xcc, 0x51, 0xa6, 0x07, 0xcc, 0x49, 0xa5, + 0x07, 0xcc, 0x41, 0xa4, 0x07, 0xcc, 0x39, 0xa3, 0x07, 0xcc, 0x31, 0xa2, + 0x07, 0xcc, 0x29, 0xa1, 0x07, 0xcc, 0x21, 0xa0, 0x07, 0xcc, 0x19, 0x9f, + 0x07, 0xcc, 0x11, 0x9e, 0x07, 0xcc, 0x09, 0x9d, 0x07, 0xcc, 0x00, 0x88, + 0x07, 0xcb, 0xf9, 0x87, 0x07, 0xcb, 0xf1, 0x86, 0x07, 0xcb, 0xe9, 0x85, + 0x07, 0xcb, 0xe1, 0x84, 0x07, 0xcb, 0xd9, 0x83, 0x07, 0xcb, 0xd1, 0xa6, + 0x07, 0xcb, 0xc9, 0xa5, 0x07, 0xcb, 0xc1, 0xa4, 0x07, 0xcb, 0xb9, 0xa3, + 0x07, 0xcb, 0xb1, 0xa2, 0x07, 0xcb, 0xa9, 0xa1, 0x07, 0xcb, 0xa1, 0xa0, + 0x07, 0xcb, 0x99, 0x9f, 0x07, 0xcb, 0x91, 0x9e, 0x07, 0xcb, 0x89, 0x9d, + 0x07, 0xcb, 0x80, 0x88, 0x07, 0xcb, 0x79, 0x87, 0x07, 0xcb, 0x71, 0x86, + 0x07, 0xcb, 0x69, 0x85, 0x07, 0xcb, 0x61, 0x84, 0x07, 0xcb, 0x59, 0x83, + 0x07, 0xcb, 0x51, 0xa6, 0x07, 0xcb, 0x49, 0xa5, 0x07, 0xcb, 0x41, 0xa4, + 0x07, 0xcb, 0x39, 0xa3, 0x07, 0xcb, 0x31, 0xa2, 0x07, 0xcb, 0x29, 0xa1, + 0x07, 0xcb, 0x21, 0xa0, 0x07, 0xcb, 0x19, 0x9f, 0x07, 0xcb, 0x11, 0x9e, + 0x07, 0xcb, 0x09, 0x9d, 0x07, 0xcb, 0x00, 0x88, 0x07, 0xca, 0xf9, 0x87, + 0x07, 0xca, 0xf1, 0x86, 0x07, 0xca, 0xe9, 0x85, 0x07, 0xca, 0xe1, 0x84, + 0x07, 0xca, 0xd9, 0x83, 0x07, 0xca, 0xd1, 0xa6, 0x07, 0xca, 0xc9, 0xa5, + 0x07, 0xca, 0xc1, 0xa4, 0x07, 0xca, 0xb9, 0xa3, 0x07, 0xca, 0xb1, 0xa2, + 0x07, 0xca, 0xa9, 0xa1, 0x07, 0xca, 0xa1, 0xa0, 0x07, 0xca, 0x99, 0x9f, + 0x07, 0xca, 0x91, 0x9e, 0x07, 0xca, 0x89, 0x9d, 0x07, 0xca, 0x80, 0x88, + 0x07, 0xca, 0x79, 0x87, 0x07, 0xca, 0x71, 0x86, 0x07, 0xca, 0x69, 0x85, + 0x07, 0xca, 0x61, 0x84, 0x07, 0xca, 0x59, 0x83, 0x07, 0xca, 0x51, 0xa6, + 0x07, 0xca, 0x49, 0xa5, 0x07, 0xca, 0x41, 0xa4, 0x07, 0xca, 0x39, 0xa3, + 0x07, 0xca, 0x31, 0xa2, 0x07, 0xca, 0x29, 0xa1, 0x07, 0xca, 0x21, 0xa0, + 0x07, 0xca, 0x19, 0x9f, 0x07, 0xca, 0x11, 0x9e, 0x07, 0xca, 0x09, 0x9d, + 0x07, 0xca, 0x00, 0x88, 0x07, 0xc9, 0xf9, 0x87, 0x07, 0xc9, 0xf1, 0x86, + 0x07, 0xc9, 0xe9, 0x85, 0x07, 0xc9, 0xe1, 0x84, 0x07, 0xc9, 0xd9, 0x83, + 0x07, 0xc9, 0xd1, 0xa6, 0x07, 0xc9, 0xc9, 0xa5, 0x07, 0xc9, 0xc1, 0xa4, + 0x07, 0xc9, 0xb9, 0xa3, 0x07, 0xc9, 0xb1, 0xa2, 0x07, 0xc9, 0xa9, 0xa1, + 0x07, 0xc9, 0xa1, 0xa0, 0x07, 0xc9, 0x99, 0x9d, 0x07, 0xc9, 0x81, 0x9e, + 0x07, 0xc9, 0x89, 0x9f, 0x07, 0xc9, 0x90, 0xa4, 0x07, 0xc9, 0x39, 0xa3, + 0x07, 0xc9, 0x31, 0xa2, 0x07, 0xc9, 0x29, 0xa1, 0x07, 0xc9, 0x21, 0xa0, + 0x07, 0xc9, 0x19, 0x9f, 0x07, 0xc9, 0x11, 0x9d, 0x07, 0xc9, 0x01, 0x9e, + 0x07, 0xc9, 0x09, 0xa5, 0x07, 0xc9, 0x41, 0xa6, 0x07, 0xc9, 0x49, 0x83, + 0x07, 0xc9, 0x51, 0x84, 0x07, 0xc9, 0x59, 0x85, 0x07, 0xc9, 0x61, 0x86, + 0x07, 0xc9, 0x69, 0x87, 0x07, 0xc9, 0x71, 0x88, 0x07, 0xc9, 0x78, 0x86, + 0x07, 0xc8, 0xe9, 0x85, 0x07, 0xc8, 0xe1, 0x84, 0x07, 0xc8, 0xd9, 0x83, + 0x07, 0xc8, 0xd1, 0xa6, 0x07, 0xc8, 0xc9, 0xa5, 0x07, 0xc8, 0xc1, 0xa4, + 0x07, 0xc8, 0xb9, 0xa3, 0x07, 0xc8, 0xb1, 0xa2, 0x07, 0xc8, 0xa9, 0xa1, + 0x07, 0xc8, 0xa1, 0xa0, 0x07, 0xc8, 0x99, 0x9f, 0x07, 0xc8, 0x91, 0x9e, + 0x07, 0xc8, 0x89, 0x9d, 0x07, 0xc8, 0x81, 0x87, 0x07, 0xc8, 0xf1, 0x88, + 0x07, 0xc8, 0xf8, 0x88, 0x07, 0xc8, 0x79, 0x87, 0x07, 0xc8, 0x71, 0x86, + 0x07, 0xc8, 0x69, 0x85, 0x07, 0xc8, 0x61, 0x84, 0x07, 0xc8, 0x59, 0x83, + 0x07, 0xc8, 0x51, 0xa6, 0x07, 0xc8, 0x49, 0xa5, 0x07, 0xc8, 0x41, 0xa4, + 0x07, 0xc8, 0x39, 0xa3, 0x07, 0xc8, 0x31, 0xa2, 0x07, 0xc8, 0x29, 0xa1, + 0x07, 0xc8, 0x21, 0xa0, 0x07, 0xc8, 0x19, 0x9d, 0x07, 0xc8, 0x01, 0x9e, + 0x07, 0xc8, 0x09, 0x9f, 0x07, 0xc8, 0x10, 0xc3, 0xa6, 0x59, 0x01, 0x75, + 0x81, 0xc2, 0x02, 0xe0, 0x01, 0x76, 0x29, 0xc5, 0x6f, 0xb7, 0x01, 0x76, + 0x41, 0xc4, 0x08, 0x92, 0x01, 0x76, 0x49, 0xc3, 0x07, 0xe5, 0x01, 0x77, + 0x38, 0xc3, 0x1a, 0x7c, 0x01, 0x76, 0x81, 0xc3, 0x00, 0xfe, 0x01, 0x76, + 0xa0, 0xc3, 0x08, 0x48, 0x01, 0x76, 0x99, 0xc3, 0x47, 0x24, 0x01, 0x76, + 0xd0, 0xcd, 0x7f, 0x4c, 0x01, 0x76, 0xc9, 0xc4, 0xe4, 0x33, 0x01, 0x77, + 0x71, 0xc5, 0xd5, 0x65, 0x01, 0x77, 0x98, 0xc2, 0x00, 0x35, 0x01, 0x76, + 0xe1, 0xc3, 0x04, 0x5a, 0x01, 0x77, 0x29, 0xc3, 0x23, 0x6d, 0x01, 0x77, + 0x50, 0xc2, 0x00, 0xfe, 0x01, 0x77, 0x01, 0xc3, 0x18, 0x11, 0x01, 0x77, + 0x60, 0xc3, 0x05, 0x14, 0x01, 0x74, 0x11, 0x16, 0x42, 0x1a, 0x7a, 0xc3, + 0x05, 0x14, 0x01, 0x74, 0xa1, 0xc3, 0x02, 0x9f, 0x01, 0x74, 0xa8, 0x0a, + 0xc2, 0x1a, 0x86, 0x19, 0xc2, 0x1a, 0x92, 0xc6, 0xc6, 0x9b, 0x01, 0x77, + 0x48, 0xc2, 0x02, 0xa0, 0x01, 0x74, 0x79, 0xc4, 0x02, 0xde, 0x01, 0x74, + 0x80, 0xc3, 0x05, 0x14, 0x01, 0x74, 0xb1, 0xc3, 0x02, 0x9f, 0x01, 0x74, + 0xb8, 0xc3, 0x05, 0x14, 0x01, 0x76, 0xa9, 0xc3, 0x02, 0x9f, 0x01, 0x76, + 0xb0, 0xc3, 0x05, 0x14, 0x01, 0x75, 0x09, 0xc3, 0x02, 0x9f, 0x01, 0x75, + 0x10, 0xc3, 0x05, 0x14, 0x01, 0x76, 0x69, 0xc3, 0x02, 0x9f, 0x01, 0x76, + 0x70, 0xc4, 0xe4, 0x33, 0x01, 0x77, 0x69, 0xc5, 0xd5, 0x65, 0x01, 0x77, + 0x90, 0xc2, 0x02, 0xa0, 0x01, 0x76, 0xf1, 0xc4, 0x02, 0xde, 0x01, 0x76, + 0xf8, 0xc2, 0x02, 0xa0, 0x01, 0x75, 0xf9, 0xc4, 0x02, 0xde, 0x01, 0x76, + 0x00, 0x92, 0x01, 0x8e, 0x59, 0x9c, 0x01, 0x8e, 0x72, 0x02, 0x1a, 0x9e, + 0x89, 0x01, 0x8e, 0x40, 0x09, 0xc2, 0x1a, 0xa2, 0x98, 0x05, 0x5b, 0xa9, + 0x97, 0x05, 0x5b, 0xa1, 0x91, 0x05, 0x5b, 0x99, 0x8b, 0x05, 0x5b, 0x91, + 0x87, 0x05, 0x5b, 0x89, 0x83, 0x05, 0x5b, 0x81, 0x1b, 0xc2, 0x1a, 0xba, + 0x19, 0xc2, 0x1a, 0xd2, 0x16, 0xc2, 0x1a, 0xea, 0x10, 0xc2, 0x1a, 0xfe, + 0x0a, 0xc2, 0x1b, 0x19, 0x0f, 0xc2, 0x1b, 0x37, 0x0e, 0xc2, 0x1b, 0x4f, + 0xc2, 0x02, 0x2b, 0x05, 0x5b, 0xb9, 0x42, 0x00, 0xe3, 0xc2, 0x1b, 0x67, + 0x95, 0x05, 0x5c, 0xeb, 0x02, 0x1b, 0x7f, 0x06, 0x42, 0x1b, 0x97, 0x83, + 0x00, 0x9d, 0x01, 0x87, 0x00, 0x9d, 0x09, 0x8b, 0x00, 0x9d, 0x11, 0x91, + 0x00, 0x9d, 0x19, 0x97, 0x00, 0x9d, 0x21, 0x98, 0x00, 0x9d, 0x29, 0x09, + 0xc2, 0x1b, 0xb5, 0xc2, 0x02, 0x2b, 0x00, 0x9d, 0x39, 0x0a, 0xc2, 0x1b, + 0xcd, 0x0e, 0xc2, 0x1b, 0xeb, 0x0f, 0xc2, 0x1c, 0x03, 0x10, 0xc2, 0x1c, + 0x1b, 0x42, 0x00, 0xe3, 0xc2, 0x1c, 0x36, 0x95, 0x00, 0x9e, 0x6b, 0x02, + 0x1c, 0x4e, 0x06, 0xc2, 0x1c, 0x66, 0x16, 0xc2, 0x1c, 0x84, 0x19, 0xc2, + 0x1c, 0x98, 0x1b, 0x42, 0x1c, 0xb0, 0x00, 0x42, 0x1c, 0xc8, 0xcd, 0x77, + 0x94, 0x0f, 0xa5, 0xc8, 0xc3, 0x39, 0x6e, 0x08, 0x8a, 0x21, 0xc2, 0x04, + 0xc6, 0x08, 0x89, 0x18, 0xc2, 0x04, 0xc6, 0x08, 0x89, 0x09, 0xc3, 0xa9, + 0x9c, 0x08, 0x89, 0x00, 0xc3, 0x39, 0x6e, 0x08, 0x88, 0xf1, 0xc2, 0x04, + 0xc6, 0x08, 0x88, 0xe8, 0xc3, 0x39, 0x6e, 0x08, 0x88, 0xe1, 0xc2, 0x04, + 0xc6, 0x08, 0x88, 0xd8, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0xd1, 0xc3, 0x3c, + 0x8a, 0x08, 0x88, 0xa9, 0xc3, 0xa9, 0x9c, 0x08, 0x88, 0x81, 0xc3, 0x4f, + 0x37, 0x08, 0x88, 0x58, 0xc3, 0x39, 0x6e, 0x08, 0x88, 0xc9, 0xc2, 0x04, + 0xc6, 0x08, 0x88, 0xc1, 0x06, 0x42, 0x1c, 0xd4, 0xc3, 0x39, 0x6e, 0x08, + 0x88, 0xb9, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0xb1, 0x16, 0x42, 0x1c, 0xe0, + 0xc3, 0x39, 0x6e, 0x08, 0x88, 0x79, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0x70, + 0xc3, 0x39, 0x6e, 0x08, 0x88, 0x69, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0x60, + 0xc3, 0x39, 0x6e, 0x08, 0x88, 0x51, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0x48, + 0xc3, 0x39, 0x6e, 0x08, 0x88, 0x41, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0x38, + 0x87, 0x08, 0x89, 0x63, 0x02, 0x1c, 0xec, 0x83, 0x08, 0x89, 0x3b, 0x02, + 0x1c, 0xf0, 0x91, 0x08, 0x89, 0x73, 0x02, 0x1c, 0xfc, 0x97, 0x08, 0x89, + 0x53, 0x02, 0x1d, 0x00, 0x8b, 0x08, 0x89, 0x42, 0x02, 0x1d, 0x04, 0xc4, + 0x26, 0x78, 0x08, 0x89, 0xf9, 0xc5, 0x06, 0xdb, 0x08, 0x89, 0xf1, 0x15, + 0xc2, 0x1d, 0x08, 0x08, 0xc2, 0x1d, 0x14, 0x16, 0xc2, 0x1d, 0x20, 0xc3, + 0x05, 0x14, 0x08, 0x89, 0xb9, 0xc4, 0x15, 0xe7, 0x08, 0x89, 0xb0, 0xc7, + 0x40, 0xe5, 0x08, 0x88, 0x11, 0xc8, 0x14, 0x38, 0x08, 0x88, 0x09, 0xcb, + 0x1e, 0x89, 0x08, 0x88, 0x00, 0x8a, 0x05, 0x52, 0x69, 0x8f, 0x05, 0x52, + 0x61, 0xc2, 0x00, 0x75, 0x05, 0x52, 0x18, 0x87, 0x05, 0x51, 0x90, 0x97, + 0x05, 0x51, 0x89, 0x8b, 0x05, 0x51, 0x81, 0x83, 0x05, 0x51, 0x48, 0x87, + 0x05, 0x51, 0x70, 0x8b, 0x05, 0x51, 0x58, 0x83, 0x05, 0x51, 0x39, 0xc2, + 0x0d, 0xf6, 0x05, 0x51, 0x30, 0x09, 0xc2, 0x1d, 0x2c, 0x83, 0x05, 0x50, + 0xc1, 0xc2, 0x0f, 0xe1, 0x05, 0x50, 0xb9, 0x0a, 0x42, 0x1d, 0x36, 0xc2, + 0x00, 0xd0, 0x05, 0x50, 0x49, 0x83, 0x05, 0x50, 0x40, 0xc2, 0x00, 0xd0, + 0x05, 0x50, 0x39, 0x83, 0x05, 0x50, 0x30, 0x8b, 0x05, 0x50, 0x20, 0xc2, + 0x00, 0xcc, 0x05, 0x52, 0x59, 0x8e, 0x05, 0x52, 0x51, 0x94, 0x05, 0x52, + 0x49, 0x9b, 0x05, 0x52, 0x41, 0x92, 0x05, 0x52, 0x39, 0x90, 0x05, 0x52, + 0x33, 0x02, 0x1d, 0x46, 0x96, 0x05, 0x52, 0x29, 0xc2, 0x11, 0xee, 0x05, + 0x52, 0x21, 0x89, 0x05, 0x52, 0x09, 0x8d, 0x05, 0x52, 0x00, 0xc2, 0x01, + 0x5d, 0x05, 0x51, 0x09, 0x83, 0x05, 0x50, 0xe9, 0xc2, 0x00, 0xd0, 0x05, + 0x50, 0xf0, 0x83, 0x05, 0x51, 0x01, 0xc2, 0x0f, 0xe1, 0x05, 0x50, 0xf8, + 0xc2, 0x00, 0xd0, 0x05, 0x50, 0xe1, 0xc2, 0x00, 0xb0, 0x05, 0x50, 0xd9, + 0x83, 0x05, 0x50, 0xd0, 0xc2, 0x0e, 0x9a, 0x05, 0x50, 0xc9, 0xc2, 0x00, + 0xd0, 0x05, 0x50, 0xb1, 0x83, 0x05, 0x50, 0xa8, 0xc2, 0x00, 0xd0, 0x05, + 0x50, 0xa1, 0x83, 0x05, 0x50, 0x98, 0xc2, 0x00, 0xd0, 0x05, 0x50, 0x79, + 0x83, 0x05, 0x50, 0x70, 0xc2, 0x00, 0xd0, 0x05, 0x50, 0x69, 0x83, 0x05, + 0x50, 0x60, 0xcb, 0x97, 0xf5, 0x05, 0x52, 0xf1, 0xc4, 0x19, 0x53, 0x05, + 0x52, 0xe8, 0xc4, 0x18, 0x10, 0x05, 0x52, 0xb9, 0xc2, 0x22, 0xcc, 0x05, + 0x52, 0xb0, 0xc3, 0x0d, 0x14, 0x05, 0x52, 0xa9, 0xc3, 0x09, 0x9e, 0x05, + 0x52, 0xa0, 0xc4, 0x02, 0xde, 0x05, 0x52, 0x99, 0xc2, 0x02, 0xa0, 0x05, + 0x52, 0x90, 0xc8, 0x0d, 0x03, 0x08, 0x7e, 0x58, 0x19, 0xc2, 0x1d, 0x4a, + 0xc2, 0x00, 0xc4, 0x08, 0x7e, 0x49, 0xc4, 0x02, 0xde, 0x08, 0x7e, 0x38, + 0xc3, 0x11, 0xef, 0x08, 0x7e, 0x19, 0xca, 0xa5, 0xf8, 0x08, 0x7d, 0x89, + 0xc5, 0xdc, 0x2c, 0x08, 0x7d, 0xf8, 0xc2, 0x00, 0x8e, 0x08, 0x7d, 0xc8, + 0xc4, 0x36, 0xb5, 0x08, 0x7d, 0x81, 0xc3, 0x16, 0x5a, 0x08, 0x7e, 0x00, + 0xc9, 0xad, 0x41, 0x01, 0x31, 0x49, 0xc8, 0xb8, 0xa2, 0x01, 0x31, 0x40, + 0xc5, 0xcb, 0xf4, 0x0f, 0xaa, 0x13, 0x02, 0x1d, 0x54, 0x4a, 0x9b, 0xf8, + 0x42, 0x1d, 0x5a, 0xe0, 0x0b, 0x07, 0x0f, 0x8c, 0x50, 0x08, 0xc2, 0x1d, + 0x66, 0x8b, 0x0f, 0x00, 0x5b, 0x02, 0x1d, 0x72, 0x04, 0xc2, 0x1d, 0x84, + 0x1b, 0xc2, 0x1d, 0x90, 0x15, 0xc2, 0x1d, 0xa2, 0xc6, 0x7b, 0xab, 0x0f, + 0x00, 0xe9, 0x16, 0xc2, 0x1d, 0xb2, 0xc4, 0xdf, 0x9f, 0x0f, 0x00, 0xc1, + 0xc3, 0xc5, 0x6f, 0x0f, 0x00, 0xb1, 0xc5, 0xd7, 0xb3, 0x0f, 0x00, 0x99, + 0xc6, 0xcb, 0xed, 0x0f, 0x00, 0x91, 0xc3, 0x06, 0xc5, 0x0f, 0x00, 0x89, + 0xc5, 0xdb, 0x78, 0x0f, 0x00, 0x81, 0xc7, 0x60, 0xdd, 0x0f, 0x00, 0x79, + 0xc7, 0xc4, 0x48, 0x0f, 0x00, 0x71, 0xc4, 0xe1, 0x6b, 0x0f, 0x00, 0x69, + 0x06, 0xc2, 0x1d, 0xbe, 0x1c, 0xc2, 0x1d, 0xca, 0xc7, 0xc1, 0xf5, 0x0f, + 0x00, 0x19, 0xc4, 0xdf, 0x83, 0x0f, 0x00, 0x11, 0xc3, 0xe5, 0x4b, 0x0f, + 0x00, 0x00, 0x44, 0x29, 0xb5, 0xc2, 0x1d, 0xd6, 0x03, 0x42, 0x1d, 0xf4, + 0xc5, 0x00, 0xd4, 0x01, 0x07, 0x81, 0xc5, 0x05, 0x02, 0x00, 0x1a, 0xc8, + 0xcc, 0x80, 0xc1, 0x01, 0x07, 0x39, 0x4c, 0x05, 0xf6, 0x42, 0x1e, 0x06, + 0xc5, 0x05, 0x02, 0x00, 0xef, 0xe9, 0xc5, 0x00, 0xd4, 0x00, 0x1a, 0x60, + 0x02, 0xc2, 0x1e, 0x12, 0x00, 0x42, 0x1e, 0x1e, 0x43, 0x00, 0x2e, 0xc2, + 0x1e, 0x2d, 0x43, 0x00, 0x75, 0x42, 0x1e, 0x35, 0x45, 0x01, 0xd5, 0xc2, + 0x1e, 0x47, 0xd2, 0x49, 0xf7, 0x00, 0x19, 0x10, 0x00, 0xc2, 0x1e, 0x53, + 0x46, 0x01, 0x4a, 0x42, 0x1e, 0x6f, 0x43, 0x00, 0x75, 0xc2, 0x1e, 0x7b, + 0xc6, 0x80, 0x30, 0x00, 0x19, 0x90, 0x4d, 0x29, 0xb9, 0xc2, 0x1e, 0x8b, + 0x55, 0x37, 0xac, 0x42, 0x1f, 0x0e, 0xde, 0x0f, 0x22, 0x00, 0xd5, 0xc9, + 0x46, 0x19, 0x9d, 0x42, 0x1f, 0x22, 0xcc, 0x86, 0x91, 0x01, 0x07, 0x49, + 0xd5, 0x32, 0xea, 0x00, 0xef, 0xc8, 0xc8, 0xb9, 0x8a, 0x01, 0x07, 0x41, + 0xcc, 0x83, 0x0d, 0x00, 0xd6, 0x59, 0xc3, 0x02, 0xa3, 0x00, 0xd5, 0xa0, + 0x00, 0x42, 0x1f, 0x34, 0x44, 0x00, 0x5a, 0xc2, 0x1f, 0x4c, 0x16, 0xc2, + 0x1f, 0x56, 0x42, 0x01, 0x48, 0x42, 0x1f, 0x60, 0xcb, 0x8f, 0xc0, 0x00, + 0xef, 0xd9, 0x49, 0xb4, 0x7f, 0x42, 0x1f, 0x6c, 0xc5, 0xd9, 0xf7, 0x00, + 0xd5, 0x89, 0xc6, 0x05, 0x01, 0x00, 0x19, 0x20, 0xd8, 0x24, 0x23, 0x01, + 0x07, 0x21, 0xc6, 0xce, 0xc3, 0x01, 0x07, 0x19, 0x15, 0xc2, 0x1f, 0x7e, + 0xc6, 0x02, 0xd1, 0x01, 0x06, 0xeb, 0x02, 0x1f, 0x8a, 0xc7, 0x3a, 0x19, + 0x01, 0x06, 0xf8, 0xcc, 0x89, 0x25, 0x01, 0x06, 0xc9, 0xcb, 0x02, 0x5c, + 0x01, 0x06, 0xa8, 0xcd, 0x33, 0xee, 0x00, 0x24, 0x49, 0x48, 0x0d, 0x04, + 0xc2, 0x1f, 0x90, 0x12, 0xc2, 0x1f, 0x9c, 0xce, 0x6c, 0xa6, 0x00, 0x24, + 0x29, 0x16, 0xc2, 0x1f, 0xac, 0x47, 0x02, 0x0e, 0xc2, 0x1f, 0xc1, 0xc5, + 0xda, 0x88, 0x05, 0x33, 0x79, 0xc6, 0x4a, 0x9f, 0x05, 0x33, 0xe0, 0xc6, + 0x05, 0x01, 0x00, 0x19, 0x68, 0xc3, 0x01, 0xe7, 0x00, 0x18, 0x63, 0x02, + 0x20, 0x2f, 0xc9, 0x1e, 0x8b, 0x00, 0x18, 0x80, 0x44, 0x0a, 0x8c, 0xc2, + 0x20, 0x35, 0xcf, 0x60, 0xa8, 0x07, 0xf1, 0x32, 0x02, 0x20, 0x44, 0xd5, + 0x36, 0x86, 0x01, 0x06, 0x99, 0x15, 0x42, 0x20, 0x4a, 0xcd, 0x7d, 0x92, + 0x00, 0xd6, 0x29, 0xc4, 0x05, 0x03, 0x00, 0x19, 0xd8, 0xe0, 0x08, 0x47, + 0x00, 0xd5, 0xd0, 0xc3, 0x0f, 0xbe, 0x00, 0x18, 0x33, 0x02, 0x20, 0x56, + 0x45, 0x32, 0xf5, 0x42, 0x20, 0x62, 0xc4, 0x00, 0x49, 0x00, 0xef, 0xb9, + 0xc5, 0x00, 0x2c, 0x00, 0xef, 0xb0, 0xd1, 0x2f, 0xfb, 0x01, 0x84, 0xc9, + 0xd6, 0x2f, 0xf6, 0x01, 0x84, 0xd0, 0x46, 0x9a, 0x3c, 0xc2, 0x20, 0x6e, + 0xd1, 0x3c, 0x67, 0x00, 0x1a, 0x70, 0x47, 0x1d, 0x71, 0xc2, 0x20, 0x7a, + 0xc6, 0x65, 0x43, 0x00, 0xd5, 0x90, 0xc6, 0x00, 0xd3, 0x00, 0xee, 0x70, + 0xc2, 0x00, 0xd1, 0x08, 0x1b, 0xb1, 0xc3, 0x63, 0x78, 0x08, 0x1b, 0xb9, + 0xc4, 0xde, 0xdb, 0x08, 0x1b, 0xc1, 0xc5, 0xdb, 0x6e, 0x08, 0x1b, 0xc9, + 0xc3, 0xe6, 0x0b, 0x08, 0x1b, 0xd0, 0x02, 0xc2, 0x20, 0x86, 0x00, 0x42, + 0x20, 0x98, 0xc5, 0x00, 0xd4, 0x00, 0xd6, 0x41, 0xc5, 0x05, 0x02, 0x00, + 0x18, 0xf8, 0x4a, 0x57, 0x93, 0xc2, 0x20, 0xb0, 0xd4, 0x3e, 0x08, 0x00, + 0x19, 0x08, 0xc5, 0x00, 0xd4, 0x00, 0x19, 0xe9, 0xc5, 0x05, 0x02, 0x00, + 0x1a, 0x98, 0xc5, 0x00, 0xd4, 0x00, 0x18, 0x69, 0xc5, 0x05, 0x02, 0x00, + 0x19, 0x48, 0xc4, 0x26, 0x78, 0x0e, 0x9b, 0x89, 0xc5, 0x06, 0xdb, 0x0e, + 0x9b, 0x81, 0x15, 0xc2, 0x20, 0xc2, 0x08, 0xc2, 0x20, 0xce, 0x16, 0xc2, + 0x20, 0xda, 0xc3, 0x05, 0x14, 0x0e, 0x9b, 0x48, 0xc4, 0x26, 0x78, 0x0e, + 0x9b, 0x41, 0xc5, 0x06, 0xdb, 0x0e, 0x9b, 0x39, 0x15, 0xc2, 0x20, 0xe6, + 0x08, 0xc2, 0x20, 0xf2, 0x16, 0xc2, 0x20, 0xfe, 0xc3, 0x05, 0x14, 0x0e, + 0x9b, 0x00, 0xc7, 0x80, 0x70, 0x01, 0x17, 0xe9, 0x48, 0x00, 0x5f, 0xc2, + 0x21, 0x0a, 0xd6, 0x2c, 0x86, 0x01, 0x17, 0xd0, 0xcf, 0x4c, 0x01, 0x01, + 0x15, 0x9b, 0x02, 0x21, 0x10, 0xc6, 0x00, 0x4e, 0x01, 0x10, 0x58, 0x0d, + 0xc2, 0x21, 0x16, 0x0a, 0xc2, 0x21, 0x26, 0x42, 0x01, 0x30, 0xc2, 0x21, + 0x32, 0x15, 0xc2, 0x21, 0x3e, 0x06, 0xc2, 0x21, 0x54, 0x03, 0xc2, 0x21, + 0x66, 0xc4, 0xdf, 0x33, 0x01, 0x64, 0x19, 0xc3, 0xd1, 0x8c, 0x01, 0x64, + 0x49, 0xc4, 0xde, 0xdb, 0x01, 0x64, 0x69, 0x16, 0xc2, 0x21, 0x72, 0xc5, + 0xd8, 0xcb, 0x01, 0x64, 0x99, 0x0e, 0xc2, 0x21, 0x7e, 0xc2, 0x02, 0x2f, + 0x01, 0x64, 0xc9, 0xc2, 0x00, 0xec, 0x01, 0x64, 0xd9, 0x91, 0x01, 0x64, + 0xfb, 0x02, 0x21, 0x8a, 0x12, 0xc2, 0x21, 0x96, 0xc2, 0x00, 0x79, 0x01, + 0x65, 0x19, 0xc2, 0x00, 0xe4, 0x01, 0x65, 0x49, 0x08, 0xc2, 0x21, 0xa0, + 0x42, 0x07, 0x2f, 0xc2, 0x21, 0xaa, 0xcd, 0x7c, 0x40, 0x01, 0x67, 0x98, + 0x0d, 0xc2, 0x21, 0xb6, 0xc5, 0xda, 0xb5, 0x01, 0x67, 0x29, 0xc5, 0xd8, + 0x08, 0x01, 0x67, 0x31, 0x15, 0xc2, 0x21, 0xc2, 0xc6, 0xd1, 0x45, 0x01, + 0x67, 0x40, 0x0a, 0xc2, 0x21, 0xce, 0x42, 0x01, 0x30, 0xc2, 0x21, 0xda, + 0x15, 0xc2, 0x21, 0xe6, 0x06, 0xc2, 0x21, 0xfc, 0x03, 0xc2, 0x22, 0x0e, + 0xc4, 0xdf, 0x33, 0x01, 0x64, 0x11, 0xc3, 0xd1, 0x8c, 0x01, 0x64, 0x41, + 0xc4, 0xde, 0xdb, 0x01, 0x64, 0x61, 0x16, 0xc2, 0x22, 0x1a, 0xc5, 0xd8, + 0xcb, 0x01, 0x64, 0x91, 0x0d, 0xc2, 0x22, 0x26, 0x0e, 0xc2, 0x22, 0x36, + 0xc2, 0x02, 0x2f, 0x01, 0x64, 0xc1, 0xc2, 0x00, 0xec, 0x01, 0x64, 0xd1, + 0x91, 0x01, 0x64, 0xf3, 0x02, 0x22, 0x42, 0x12, 0xc2, 0x22, 0x4e, 0xc2, + 0x00, 0x79, 0x01, 0x65, 0x11, 0xc2, 0x00, 0xe4, 0x01, 0x65, 0x41, 0x08, + 0xc2, 0x22, 0x58, 0x42, 0x07, 0x2f, 0xc2, 0x22, 0x62, 0xcd, 0x7c, 0x40, + 0x01, 0x67, 0x90, 0xc8, 0xbb, 0xa2, 0x01, 0x67, 0x79, 0x49, 0xac, 0x21, + 0x42, 0x22, 0x6e, 0xc3, 0x05, 0x14, 0x08, 0x17, 0x09, 0x16, 0xc2, 0x22, + 0x7a, 0x08, 0xc2, 0x22, 0x86, 0x15, 0xc2, 0x22, 0x92, 0xc5, 0x06, 0xdb, + 0x08, 0x17, 0x41, 0xc4, 0x26, 0x78, 0x08, 0x17, 0x48, 0x16, 0xc2, 0x22, + 0x9e, 0x08, 0xc2, 0x22, 0xac, 0x15, 0xc2, 0x22, 0xb4, 0x45, 0x06, 0xdb, + 0xc2, 0x22, 0xc0, 0x44, 0x26, 0x78, 0xc2, 0x22, 0xca, 0xcb, 0x0d, 0x00, + 0x08, 0x17, 0x98, 0xcb, 0x9a, 0x5d, 0x0f, 0xa7, 0x59, 0xcc, 0x81, 0x81, + 0x0f, 0xa7, 0x50, 0xc7, 0x57, 0x8b, 0x0f, 0x98, 0x11, 0xd0, 0x59, 0x82, + 0x01, 0x52, 0x62, 0x02, 0x22, 0xd6, 0xc4, 0x0e, 0xa6, 0x01, 0x56, 0x7b, + 0x02, 0x22, 0xdc, 0xc6, 0x2d, 0xd0, 0x01, 0x56, 0x82, 0x02, 0x22, 0xe2, + 0xcf, 0x62, 0xe2, 0x01, 0x11, 0x91, 0xd2, 0x4e, 0x77, 0x01, 0x4a, 0x08, + 0xd3, 0x44, 0x7c, 0x01, 0x0d, 0xb9, 0xe0, 0x0b, 0xa7, 0x01, 0x5b, 0x70, + 0xdb, 0x16, 0xda, 0x0f, 0xae, 0xc1, 0x46, 0x01, 0x4a, 0x42, 0x22, 0xe8, + 0xe0, 0x09, 0x07, 0x0f, 0xa8, 0x18, 0x19, 0xc2, 0x22, 0xf1, 0x42, 0x00, + 0xc4, 0xc2, 0x22, 0xfb, 0x44, 0x02, 0xde, 0x42, 0x23, 0x07, 0x45, 0x66, + 0xb1, 0xc2, 0x23, 0x13, 0x44, 0x00, 0x2d, 0x42, 0x23, 0x1f, 0xc7, 0xc1, + 0xd9, 0x0f, 0xab, 0x21, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xc0, 0x44, 0x18, + 0x10, 0xc2, 0x23, 0x2b, 0x42, 0x22, 0xcc, 0x42, 0x23, 0x37, 0x43, 0x0d, + 0x14, 0xc2, 0x23, 0x43, 0x43, 0x09, 0x9e, 0x42, 0x23, 0x4f, 0xc7, 0xc1, + 0xd9, 0x0f, 0xaa, 0xe1, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0x80, 0x44, 0x0d, + 0x21, 0xc2, 0x23, 0x5b, 0xd8, 0x02, 0xef, 0x0f, 0x8b, 0x71, 0x85, 0x0f, + 0x8b, 0x69, 0x86, 0x0f, 0x89, 0x68, 0xdb, 0x15, 0x45, 0x01, 0x3d, 0x91, + 0xd8, 0x22, 0x8b, 0x01, 0x1c, 0x49, 0xcb, 0x8f, 0x73, 0x0f, 0x8b, 0x79, + 0x46, 0xc3, 0x3f, 0x42, 0x23, 0x65, 0x45, 0x01, 0xfd, 0xc2, 0x23, 0xab, + 0x9c, 0x0f, 0x89, 0x70, 0x0b, 0xc2, 0x23, 0xb7, 0xc3, 0x02, 0x2c, 0x01, + 0x14, 0xe9, 0x11, 0x42, 0x23, 0xc3, 0x45, 0x0b, 0x12, 0xc2, 0x23, 0xcd, + 0xc8, 0x00, 0xcb, 0x01, 0x4e, 0x00, 0x16, 0xc2, 0x23, 0xd9, 0xc8, 0x4b, + 0x5f, 0x01, 0x23, 0x91, 0x07, 0xc2, 0x23, 0xee, 0x15, 0xc2, 0x23, 0xfa, + 0x08, 0x42, 0x24, 0x06, 0xc7, 0x01, 0x93, 0x0f, 0xbe, 0xab, 0x02, 0x24, + 0x10, 0xc4, 0x03, 0x4e, 0x01, 0x14, 0xb8, 0xd0, 0x5b, 0x32, 0x01, 0x14, + 0xd9, 0x4c, 0x04, 0x1b, 0x42, 0x24, 0x16, 0xcc, 0x87, 0x51, 0x01, 0x14, + 0xd1, 0xce, 0x61, 0xd5, 0x01, 0x4d, 0xc0, 0xc4, 0x1d, 0x1e, 0x01, 0x14, + 0xb1, 0x49, 0x1f, 0x19, 0x42, 0x24, 0x22, 0xc3, 0x25, 0xd6, 0x01, 0x14, + 0xa9, 0xcc, 0x8b, 0x29, 0x01, 0x4d, 0xc9, 0xc7, 0x36, 0x16, 0x01, 0x4d, + 0xb9, 0xca, 0x9a, 0xa4, 0x01, 0x81, 0xb0, 0x49, 0x9f, 0x87, 0xc2, 0x24, + 0x28, 0x5b, 0x16, 0xf5, 0xc2, 0x24, 0x74, 0xd1, 0x53, 0x87, 0x0f, 0xb6, + 0x40, 0xc5, 0x1c, 0xb5, 0x01, 0x4d, 0xf9, 0xc5, 0xdc, 0x4a, 0x01, 0x5d, + 0xf8, 0x50, 0x4b, 0xf0, 0xc2, 0x24, 0x7c, 0x48, 0xbc, 0xc2, 0x42, 0x24, + 0x88, 0x03, 0xc2, 0x24, 0xc0, 0x46, 0x02, 0xae, 0xc2, 0x24, 0xc6, 0x0e, + 0xc2, 0x24, 0xd2, 0xd0, 0x5d, 0x52, 0x01, 0x2e, 0x89, 0xcd, 0x7a, 0x93, + 0x01, 0x2e, 0x69, 0x43, 0x02, 0x9f, 0xc2, 0x24, 0xde, 0x15, 0xc2, 0x24, + 0xe4, 0xce, 0x0e, 0xf1, 0x01, 0x4d, 0xa8, 0xe0, 0x07, 0xa7, 0x01, 0x4d, + 0xd0, 0xa2, 0x09, 0x1b, 0x5b, 0x02, 0x24, 0xf0, 0xd1, 0x54, 0x20, 0x09, + 0x2a, 0x11, 0x8f, 0x09, 0x1b, 0x71, 0xc3, 0x2b, 0x88, 0x09, 0x1b, 0x68, + 0xa4, 0x09, 0x2a, 0x09, 0xc2, 0xd1, 0x86, 0x09, 0x1b, 0x09, 0x89, 0x09, + 0x1b, 0x01, 0x00, 0x42, 0x24, 0xf6, 0xc2, 0xde, 0xe9, 0x09, 0x1b, 0x49, + 0x89, 0x09, 0x1b, 0x41, 0x84, 0x09, 0x1b, 0x33, 0x02, 0x25, 0x02, 0xa0, + 0x09, 0x1b, 0x29, 0xc8, 0xb5, 0xba, 0x09, 0x1b, 0x20, 0x97, 0x09, 0x19, + 0xbb, 0x02, 0x25, 0x08, 0x9f, 0x09, 0x19, 0x5b, 0x02, 0x25, 0x17, 0x8b, + 0x09, 0x19, 0xab, 0x02, 0x25, 0x1b, 0xa1, 0x09, 0x19, 0xa1, 0x00, 0x42, + 0x25, 0x1f, 0x97, 0x09, 0x1c, 0xcb, 0x02, 0x25, 0x2b, 0x47, 0x1b, 0x73, + 0xc2, 0x25, 0x31, 0xc3, 0x6c, 0x49, 0x09, 0x18, 0x60, 0x47, 0x03, 0x4c, + 0xc2, 0x25, 0x43, 0xc2, 0x01, 0xdf, 0x09, 0x19, 0x1b, 0x02, 0x25, 0x5c, + 0xc3, 0x1d, 0xd4, 0x09, 0x19, 0x10, 0x97, 0x09, 0x1a, 0xe1, 0xa0, 0x09, + 0x1a, 0xd2, 0x02, 0x25, 0x62, 0xc3, 0xe5, 0x21, 0x09, 0x1a, 0xc1, 0x9f, + 0x09, 0x1a, 0xb9, 0x9a, 0x09, 0x1a, 0xb1, 0x47, 0x03, 0x4c, 0x42, 0x25, + 0x68, 0xc5, 0x39, 0xc7, 0x09, 0x19, 0x38, 0xc2, 0x0b, 0x47, 0x09, 0x18, + 0xe1, 0x00, 0x42, 0x25, 0x7b, 0x8f, 0x09, 0x18, 0x43, 0x02, 0x25, 0x96, + 0x94, 0x09, 0x18, 0x4b, 0x02, 0x25, 0x9c, 0x8d, 0x09, 0x18, 0x39, 0xc2, + 0x0b, 0x48, 0x09, 0x18, 0x30, 0xc2, 0x38, 0xb6, 0x09, 0x17, 0xd3, 0x02, + 0x25, 0xa2, 0x94, 0x09, 0x17, 0xd9, 0x89, 0x09, 0x17, 0x9b, 0x02, 0x25, + 0xa8, 0x84, 0x09, 0x17, 0x83, 0x02, 0x25, 0xae, 0x00, 0x42, 0x25, 0xb2, + 0x9f, 0x09, 0x1c, 0xb9, 0x94, 0x09, 0x18, 0x0b, 0x02, 0x25, 0xc4, 0x8e, + 0x09, 0x18, 0x01, 0xc5, 0x58, 0xf4, 0x09, 0x17, 0xf8, 0xc5, 0x39, 0xc7, + 0x09, 0x17, 0xe8, 0x00, 0xc2, 0x25, 0xc8, 0xc3, 0xd8, 0x33, 0x09, 0x17, + 0x09, 0xc2, 0x9c, 0x98, 0x09, 0x17, 0x01, 0x89, 0x09, 0x16, 0xea, 0x02, + 0x25, 0xd4, 0x97, 0x09, 0x16, 0xbb, 0x02, 0x25, 0xdb, 0x87, 0x09, 0x15, + 0xd3, 0x02, 0x25, 0xee, 0x83, 0x09, 0x15, 0x6b, 0x02, 0x26, 0x05, 0x0b, + 0x42, 0x26, 0x1f, 0x89, 0x09, 0x14, 0xab, 0x02, 0x26, 0x40, 0x94, 0x09, + 0x15, 0x61, 0xc4, 0xe3, 0xaf, 0x09, 0x15, 0x59, 0x8e, 0x09, 0x15, 0x4a, + 0x02, 0x26, 0x44, 0x94, 0x09, 0x17, 0x4b, 0x02, 0x26, 0x4a, 0x8f, 0x09, + 0x17, 0x3b, 0x02, 0x26, 0x4e, 0xc3, 0x06, 0x47, 0x09, 0x17, 0x31, 0x86, + 0x09, 0x17, 0x23, 0x02, 0x26, 0x54, 0xc8, 0x8b, 0x5c, 0x09, 0x17, 0x18, + 0x90, 0x09, 0x1c, 0x7b, 0x02, 0x26, 0x58, 0xc3, 0x78, 0x3e, 0x09, 0x13, + 0x01, 0x8f, 0x09, 0x12, 0x7b, 0x02, 0x26, 0x65, 0x9f, 0x09, 0x12, 0x71, + 0xc8, 0x98, 0x84, 0x09, 0x12, 0x68, 0xc2, 0x2b, 0x85, 0x09, 0x13, 0x13, + 0x02, 0x26, 0x6b, 0x90, 0x09, 0x13, 0x1a, 0x02, 0x26, 0x6f, 0xa1, 0x09, + 0x1c, 0x71, 0x8f, 0x09, 0x12, 0x33, 0x02, 0x26, 0x7c, 0xc2, 0x01, 0x30, + 0x09, 0x12, 0x03, 0x02, 0x26, 0x86, 0x9f, 0x09, 0x11, 0xf8, 0x00, 0x42, + 0x26, 0x8e, 0xc2, 0x01, 0xe2, 0x09, 0x11, 0x93, 0x02, 0x26, 0x9a, 0xc4, + 0xe4, 0xc3, 0x09, 0x11, 0x89, 0xc4, 0xe4, 0x7f, 0x09, 0x11, 0x81, 0x89, + 0x09, 0x11, 0x73, 0x02, 0x26, 0xa5, 0xc8, 0xb5, 0xd2, 0x09, 0x11, 0x68, + 0xc9, 0xab, 0xc7, 0x09, 0x28, 0xf9, 0x90, 0x09, 0x11, 0x58, 0x95, 0x09, + 0x11, 0x4a, 0x02, 0x26, 0xab, 0xc2, 0x01, 0xe2, 0x09, 0x11, 0x33, 0x02, + 0x26, 0xaf, 0x94, 0x09, 0x11, 0x29, 0x8a, 0x09, 0x11, 0x21, 0x9f, 0x09, + 0x11, 0x19, 0x00, 0x42, 0x26, 0xb3, 0x9f, 0x09, 0x0f, 0xeb, 0x02, 0x26, + 0xbf, 0x8f, 0x09, 0x10, 0xeb, 0x02, 0x26, 0xc3, 0x8e, 0x09, 0x10, 0xe1, + 0x8a, 0x09, 0x10, 0xd9, 0xc3, 0x38, 0x73, 0x09, 0x10, 0xbb, 0x02, 0x26, + 0xcc, 0xa0, 0x09, 0x10, 0xb1, 0xca, 0x8d, 0x2d, 0x09, 0x0f, 0xe0, 0x42, + 0x0c, 0x67, 0xc2, 0x26, 0xd0, 0x42, 0x01, 0x30, 0xc2, 0x26, 0xf2, 0x8f, + 0x09, 0x0f, 0xa3, 0x02, 0x27, 0x00, 0x8e, 0x09, 0x0f, 0x93, 0x02, 0x27, + 0x09, 0xc4, 0xdf, 0xdb, 0x09, 0x0f, 0x88, 0xc2, 0x01, 0xe2, 0x09, 0x0f, + 0xd1, 0xc4, 0xe3, 0xe7, 0x09, 0x0f, 0xc9, 0x8e, 0x09, 0x0f, 0xc0, 0x47, + 0x03, 0x4c, 0xc2, 0x27, 0x0f, 0xc9, 0xae, 0xdf, 0x09, 0x1b, 0x79, 0xc4, + 0x45, 0xaf, 0x09, 0x0c, 0xe3, 0x02, 0x27, 0x5b, 0x0f, 0xc2, 0x27, 0x5f, + 0x8e, 0x09, 0x0c, 0xbb, 0x02, 0x27, 0x67, 0x8d, 0x09, 0x0c, 0xab, 0x02, + 0x27, 0x6b, 0x06, 0xc2, 0x27, 0x71, 0x84, 0x09, 0x0c, 0x79, 0x9f, 0x09, + 0x0c, 0x6a, 0x02, 0x27, 0x84, 0xc4, 0x5d, 0xd2, 0x09, 0x0d, 0xa9, 0x94, + 0x09, 0x0d, 0x9b, 0x02, 0x27, 0x8a, 0x90, 0x09, 0x0d, 0x91, 0x8e, 0x09, + 0x0d, 0x83, 0x02, 0x27, 0x90, 0xa4, 0x09, 0x0d, 0x79, 0xa1, 0x09, 0x0d, + 0x6b, 0x02, 0x27, 0x96, 0xa0, 0x09, 0x0d, 0x61, 0x49, 0x05, 0x54, 0x42, + 0x27, 0x9c, 0x15, 0xc2, 0x27, 0xa2, 0x90, 0x09, 0x0d, 0x29, 0x86, 0x09, + 0x0d, 0x21, 0x47, 0x03, 0x4c, 0x42, 0x27, 0xb5, 0x47, 0x03, 0x4c, 0x42, + 0x27, 0xc2, 0x00, 0xc2, 0x27, 0xf3, 0x8e, 0x09, 0x09, 0x60, 0xc2, 0x01, + 0xe2, 0x09, 0x1b, 0xe9, 0xc2, 0xaf, 0x5c, 0x09, 0x09, 0xf1, 0xc2, 0x58, + 0xf2, 0x09, 0x09, 0xc2, 0x02, 0x28, 0x02, 0x86, 0x09, 0x08, 0xf3, 0x02, + 0x28, 0x08, 0x9f, 0x09, 0x08, 0xc3, 0x02, 0x28, 0x0c, 0x94, 0x09, 0x09, + 0x2b, 0x02, 0x28, 0x10, 0x8f, 0x09, 0x09, 0x1b, 0x02, 0x28, 0x18, 0x8e, + 0x09, 0x09, 0x11, 0xcc, 0x88, 0xe9, 0x09, 0x08, 0xb8, 0x15, 0xc2, 0x28, + 0x1e, 0x89, 0x09, 0x1b, 0xe1, 0x14, 0xc2, 0x28, 0x2b, 0xc3, 0x7e, 0x08, + 0x09, 0x08, 0x39, 0xa1, 0x09, 0x08, 0x23, 0x02, 0x28, 0x39, 0x00, 0x42, + 0x28, 0x3d, 0xc5, 0xda, 0xba, 0x09, 0x07, 0xf3, 0x02, 0x28, 0x49, 0xc2, + 0xe1, 0x2e, 0x09, 0x1b, 0xd8, 0xc2, 0x01, 0x5d, 0x09, 0x07, 0x73, 0x02, + 0x28, 0x4f, 0x9f, 0x09, 0x05, 0xbb, 0x02, 0x28, 0x53, 0xc4, 0x09, 0x26, + 0x09, 0x07, 0xe9, 0x94, 0x09, 0x07, 0xdb, 0x02, 0x28, 0x57, 0x90, 0x09, + 0x07, 0xb3, 0x02, 0x28, 0x5b, 0x8f, 0x09, 0x07, 0xa9, 0x8e, 0x09, 0x07, + 0x93, 0x02, 0x28, 0x62, 0x86, 0x09, 0x07, 0x83, 0x02, 0x28, 0x6e, 0xc5, + 0x39, 0xc7, 0x09, 0x05, 0xb0, 0x00, 0x42, 0x28, 0x74, 0xce, 0x73, 0x28, + 0x09, 0x25, 0x60, 0xc3, 0x9e, 0x4d, 0x09, 0x04, 0xfb, 0x02, 0x28, 0x80, + 0xc2, 0x00, 0xc4, 0x09, 0x04, 0xf0, 0x47, 0x03, 0x4c, 0x42, 0x28, 0x86, + 0x00, 0x42, 0x28, 0xac, 0xd3, 0x42, 0x55, 0x09, 0x04, 0x61, 0xc9, 0xa8, + 0xe5, 0x09, 0x04, 0x58, 0x89, 0x09, 0x04, 0x0b, 0x02, 0x28, 0xc4, 0x84, + 0x09, 0x03, 0xf3, 0x02, 0x28, 0xd0, 0xc2, 0x38, 0x6a, 0x09, 0x04, 0x49, + 0x90, 0x09, 0x04, 0x23, 0x02, 0x28, 0xda, 0x8a, 0x09, 0x04, 0x19, 0x00, + 0x42, 0x28, 0xe5, 0x8f, 0x09, 0x03, 0xa3, 0x02, 0x28, 0xf7, 0xc2, 0x01, + 0xe2, 0x09, 0x03, 0xcb, 0x02, 0x29, 0x04, 0x90, 0x09, 0x03, 0xbb, 0x02, + 0x29, 0x0a, 0x84, 0x09, 0x03, 0x98, 0x89, 0x09, 0x02, 0xb3, 0x02, 0x29, + 0x10, 0xcb, 0x38, 0xad, 0x09, 0x24, 0x41, 0x94, 0x09, 0x03, 0x7b, 0x02, + 0x29, 0x18, 0x8f, 0x09, 0x03, 0x70, 0x00, 0xc2, 0x29, 0x1c, 0x94, 0x09, + 0x02, 0x9b, 0x02, 0x29, 0x28, 0xc3, 0x6c, 0x4d, 0x09, 0x02, 0x8a, 0x02, + 0x29, 0x2c, 0xc4, 0x38, 0x68, 0x09, 0x02, 0x1b, 0x02, 0x29, 0x32, 0x86, + 0x09, 0x02, 0x0b, 0x02, 0x29, 0x38, 0x94, 0x09, 0x02, 0x3b, 0x02, 0x29, + 0x3e, 0x8e, 0x09, 0x02, 0x23, 0x02, 0x29, 0x44, 0xc2, 0xe6, 0xad, 0x09, + 0x02, 0x10, 0x47, 0x03, 0x4c, 0x42, 0x29, 0x50, 0xcb, 0x98, 0x84, 0x09, + 0x24, 0x10, 0x00, 0xc2, 0x29, 0x60, 0x9f, 0x09, 0x00, 0xb2, 0x02, 0x29, + 0x6c, 0x47, 0x03, 0x4c, 0x42, 0x29, 0x72, 0x8a, 0x09, 0x01, 0xc3, 0x02, + 0x29, 0x7e, 0xc3, 0xe5, 0x9f, 0x09, 0x01, 0xb8, 0xc3, 0x91, 0xee, 0x09, + 0x01, 0xb1, 0xc2, 0x01, 0x9d, 0x09, 0x01, 0xa2, 0x02, 0x29, 0x8c, 0xc3, + 0x04, 0x65, 0x09, 0x01, 0x91, 0x00, 0x42, 0x29, 0x92, 0xc3, 0x36, 0xb6, + 0x09, 0x01, 0x51, 0xc2, 0x00, 0xd1, 0x09, 0x01, 0x49, 0x47, 0x03, 0x4c, + 0x42, 0x29, 0xa4, 0x47, 0x03, 0x4c, 0x42, 0x29, 0xcc, 0xc3, 0x78, 0x3e, + 0x09, 0x00, 0x41, 0xc4, 0x7a, 0x34, 0x09, 0x00, 0x39, 0xca, 0x39, 0xc2, + 0x09, 0x00, 0x31, 0xc3, 0x04, 0x2a, 0x09, 0x00, 0x29, 0xc2, 0x00, 0xd0, + 0x09, 0x00, 0x21, 0xc9, 0x5d, 0x99, 0x09, 0x00, 0x19, 0xc3, 0x62, 0x19, + 0x09, 0x00, 0x11, 0x83, 0x09, 0x00, 0x08, 0x14, 0xc2, 0x29, 0xd8, 0x00, + 0x42, 0x29, 0xe5, 0xc9, 0x0a, 0xfe, 0x09, 0x1c, 0xa0, 0x92, 0x09, 0x13, + 0xe9, 0x90, 0x09, 0x13, 0xe1, 0x86, 0x09, 0x13, 0xd8, 0x84, 0x09, 0x14, + 0x80, 0xc2, 0x00, 0x74, 0x09, 0x0a, 0x99, 0x00, 0x42, 0x29, 0xf1, 0x9f, + 0x09, 0x0a, 0x69, 0xd0, 0x5d, 0x92, 0x09, 0x0a, 0x60, 0x8b, 0x09, 0x0a, + 0x32, 0x02, 0x2a, 0x09, 0x4b, 0x96, 0xa0, 0x42, 0x2a, 0x0d, 0x97, 0x09, + 0x20, 0xa3, 0x02, 0x2a, 0x19, 0xd3, 0x42, 0xa1, 0x09, 0x22, 0x33, 0x02, + 0x2a, 0x1f, 0xc5, 0xdb, 0xdc, 0x09, 0x21, 0x59, 0xc5, 0xd7, 0xf9, 0x09, + 0x20, 0xe9, 0xc4, 0x04, 0x59, 0x09, 0x20, 0x71, 0xc3, 0x02, 0x2c, 0x09, + 0x20, 0x38, 0xc3, 0x26, 0x1a, 0x09, 0x22, 0xb9, 0xc3, 0x0f, 0xd6, 0x09, + 0x22, 0xb0, 0x97, 0x09, 0x20, 0x9b, 0x02, 0x2a, 0x2d, 0xd1, 0x53, 0x21, + 0x09, 0x22, 0x23, 0x02, 0x2a, 0x33, 0xc5, 0xdb, 0xdc, 0x09, 0x21, 0x51, + 0xc5, 0xd7, 0xf9, 0x09, 0x20, 0xe1, 0xc4, 0x04, 0x59, 0x09, 0x20, 0x69, + 0xc3, 0x02, 0x2c, 0x09, 0x20, 0x30, 0x08, 0xc2, 0x2a, 0x37, 0xca, 0x9d, + 0x2e, 0x09, 0x23, 0x31, 0xc9, 0xac, 0xcc, 0x09, 0x23, 0x28, 0x97, 0x09, + 0x20, 0x93, 0x02, 0x2a, 0x43, 0x51, 0x52, 0xdd, 0xc2, 0x2a, 0x49, 0xc5, + 0xdb, 0xdc, 0x09, 0x21, 0x49, 0xc5, 0xd7, 0xf9, 0x09, 0x20, 0xd9, 0xc4, + 0x04, 0x59, 0x09, 0x20, 0x61, 0xc3, 0x02, 0x2c, 0x09, 0x20, 0x28, 0x97, + 0x09, 0x20, 0x8b, 0x02, 0x2a, 0x51, 0xc3, 0x02, 0x2c, 0x09, 0x20, 0x23, + 0x02, 0x2a, 0x57, 0xd1, 0x54, 0x64, 0x09, 0x22, 0x01, 0xc5, 0xdb, 0xdc, + 0x09, 0x21, 0x41, 0xc5, 0xd7, 0xf9, 0x09, 0x20, 0xd1, 0xc4, 0x04, 0x59, + 0x09, 0x20, 0x58, 0xc3, 0x0f, 0xd6, 0x09, 0x21, 0x99, 0xc4, 0x04, 0x59, + 0x09, 0x21, 0x90, 0x97, 0x09, 0x20, 0x83, 0x02, 0x2a, 0x5d, 0x15, 0xc2, + 0x2a, 0x63, 0x04, 0xc2, 0x2a, 0x6f, 0xc3, 0x02, 0x2c, 0x09, 0x20, 0x1b, + 0x02, 0x2a, 0x7e, 0x44, 0x64, 0xa6, 0xc2, 0x2a, 0x84, 0xc4, 0x04, 0x59, + 0x09, 0x20, 0x50, 0x97, 0x09, 0x20, 0x7b, 0x02, 0x2a, 0x8c, 0x04, 0xc2, + 0x2a, 0x92, 0xc3, 0x02, 0x2c, 0x09, 0x20, 0x13, 0x02, 0x2a, 0xa1, 0xd2, + 0x49, 0x31, 0x09, 0x21, 0xe3, 0x02, 0x2a, 0xa7, 0x44, 0x7a, 0x36, 0xc2, + 0x2a, 0xaf, 0x44, 0x64, 0xa6, 0xc2, 0x2a, 0xb7, 0xc4, 0x04, 0x59, 0x09, + 0x20, 0x48, 0xc8, 0xbf, 0x52, 0x09, 0x23, 0x21, 0x48, 0x15, 0x02, 0xc2, + 0x2a, 0xbf, 0x07, 0xc2, 0x2a, 0xcb, 0x46, 0x06, 0x67, 0xc2, 0x2a, 0xd7, + 0x04, 0xc2, 0x2a, 0xe3, 0xc5, 0xdb, 0x8c, 0x09, 0x21, 0x61, 0x44, 0x64, + 0xa6, 0x42, 0x2a, 0xef, 0xc7, 0x08, 0x79, 0x09, 0x23, 0x11, 0xc5, 0xd3, + 0x5b, 0x09, 0x23, 0x08, 0x47, 0x8d, 0x4d, 0xc2, 0x2a, 0xf7, 0xc5, 0xdd, + 0x62, 0x09, 0x22, 0xc9, 0x04, 0xc2, 0x2b, 0x03, 0xc3, 0x02, 0x2c, 0x09, + 0x20, 0x03, 0x02, 0x2b, 0x0f, 0x44, 0x7a, 0x36, 0xc2, 0x2b, 0x15, 0x44, + 0x64, 0xa6, 0x42, 0x2b, 0x1d, 0x04, 0xc2, 0x2b, 0x25, 0xc3, 0x02, 0x2c, + 0x09, 0x20, 0x0b, 0x02, 0x2b, 0x34, 0x50, 0x57, 0x72, 0xc2, 0x2b, 0x3a, + 0x44, 0x7a, 0x36, 0xc2, 0x2b, 0x46, 0x44, 0x64, 0xa6, 0xc2, 0x2b, 0x54, + 0xc4, 0x04, 0x59, 0x09, 0x20, 0x40, 0xc2, 0x00, 0x11, 0x01, 0x3d, 0x81, + 0x46, 0x19, 0xbb, 0x42, 0x2b, 0x5c, 0xa1, 0x09, 0x7f, 0x81, 0x9f, 0x09, + 0x7f, 0x79, 0x9d, 0x09, 0x7f, 0x70, 0xa6, 0x09, 0x7f, 0x69, 0xa5, 0x09, + 0x7f, 0x61, 0xa4, 0x09, 0x7f, 0x59, 0xa2, 0x09, 0x7f, 0x51, 0xa1, 0x09, + 0x7f, 0x49, 0xa0, 0x09, 0x7f, 0x41, 0x9f, 0x09, 0x7f, 0x39, 0x9e, 0x09, + 0x7f, 0x31, 0x9d, 0x09, 0x7f, 0x28, 0xa6, 0x09, 0x7f, 0x21, 0xa5, 0x09, + 0x7f, 0x19, 0xa4, 0x09, 0x7f, 0x11, 0xa3, 0x09, 0x7f, 0x09, 0xa2, 0x09, + 0x7f, 0x01, 0xa1, 0x09, 0x7e, 0xf9, 0x9f, 0x09, 0x7e, 0xf1, 0x9e, 0x09, + 0x7e, 0xe9, 0x9d, 0x09, 0x7e, 0xe0, 0xa6, 0x09, 0x7e, 0xd9, 0xa5, 0x09, + 0x7e, 0xd1, 0xa4, 0x09, 0x7e, 0xc9, 0xa3, 0x09, 0x7e, 0xc1, 0xa2, 0x09, + 0x7e, 0xb9, 0xa1, 0x09, 0x7e, 0xb1, 0xa0, 0x09, 0x7e, 0xa9, 0x9f, 0x09, + 0x7e, 0xa1, 0x9e, 0x09, 0x7e, 0x99, 0x9d, 0x09, 0x7e, 0x90, 0xa6, 0x09, + 0x7e, 0x89, 0xa5, 0x09, 0x7e, 0x81, 0xa3, 0x09, 0x7e, 0x79, 0xa2, 0x09, + 0x7e, 0x6b, 0x02, 0x2b, 0x68, 0xa1, 0x09, 0x7e, 0x61, 0xa0, 0x09, 0x7e, + 0x59, 0x9f, 0x09, 0x7e, 0x51, 0x9e, 0x09, 0x7e, 0x49, 0x9d, 0x09, 0x7e, + 0x40, 0xa6, 0x09, 0x7e, 0x39, 0xa5, 0x09, 0x7e, 0x31, 0xa4, 0x09, 0x7e, + 0x29, 0xa3, 0x09, 0x7e, 0x21, 0xa1, 0x09, 0x7e, 0x19, 0xa0, 0x09, 0x7e, + 0x11, 0x9f, 0x09, 0x7e, 0x09, 0x9e, 0x09, 0x7e, 0x01, 0x9d, 0x09, 0x7d, + 0xf8, 0xa6, 0x09, 0x7d, 0xf1, 0xa5, 0x09, 0x7d, 0xe9, 0xa3, 0x09, 0x7d, + 0xe1, 0xa2, 0x09, 0x7d, 0xd9, 0xa1, 0x09, 0x7d, 0xd1, 0xa0, 0x09, 0x7d, + 0xc9, 0x9f, 0x09, 0x7d, 0xc1, 0x9e, 0x09, 0x7d, 0xb9, 0x9d, 0x09, 0x7d, + 0xb0, 0xa6, 0x09, 0x7d, 0xa9, 0xa4, 0x09, 0x7d, 0xa1, 0xa3, 0x09, 0x7d, + 0x99, 0xa1, 0x09, 0x7d, 0x91, 0x9e, 0x09, 0x7d, 0x89, 0x9d, 0x09, 0x7d, + 0x80, 0xa6, 0x09, 0x7d, 0x79, 0xa5, 0x09, 0x7d, 0x71, 0xa4, 0x09, 0x7d, + 0x69, 0xa3, 0x09, 0x7d, 0x61, 0xa2, 0x09, 0x7d, 0x59, 0xa1, 0x09, 0x7d, + 0x51, 0xa0, 0x09, 0x7d, 0x49, 0x9d, 0x09, 0x7d, 0x40, 0xa6, 0x09, 0x7d, + 0x39, 0xa5, 0x09, 0x7d, 0x31, 0xa4, 0x09, 0x7d, 0x29, 0xa3, 0x09, 0x7d, + 0x21, 0xa2, 0x09, 0x7d, 0x19, 0xa1, 0x09, 0x7d, 0x11, 0xa0, 0x09, 0x7d, + 0x09, 0x9e, 0x09, 0x7d, 0x00, 0xa6, 0x09, 0x7c, 0xf9, 0xa4, 0x09, 0x7c, + 0xf1, 0xa2, 0x09, 0x7c, 0xe9, 0xa0, 0x09, 0x7c, 0xe1, 0x9f, 0x09, 0x7c, + 0xd3, 0x02, 0x2b, 0x6c, 0x9e, 0x09, 0x7c, 0xc9, 0x9d, 0x09, 0x7c, 0xc0, + 0xa6, 0x09, 0x7c, 0xb9, 0xa5, 0x09, 0x7c, 0xb1, 0xa4, 0x09, 0x7c, 0xa9, + 0xa3, 0x09, 0x7c, 0xa1, 0xa2, 0x09, 0x7c, 0x99, 0xa1, 0x09, 0x7c, 0x91, + 0x9f, 0x09, 0x7c, 0x89, 0x9e, 0x09, 0x7c, 0x80, 0xcb, 0x95, 0x2a, 0x00, + 0xe4, 0x41, 0x46, 0x00, 0x8b, 0xc2, 0x2b, 0x70, 0x8d, 0x00, 0x23, 0xca, + 0x02, 0x2b, 0x7a, 0x44, 0x03, 0x15, 0xc2, 0x2b, 0x80, 0xce, 0x73, 0xec, + 0x00, 0xe4, 0x29, 0x87, 0x00, 0x22, 0x13, 0x02, 0x2b, 0x92, 0x15, 0xc2, + 0x2b, 0x98, 0xc2, 0x00, 0x28, 0x05, 0x34, 0x69, 0xc3, 0x28, 0x28, 0x05, + 0x34, 0x98, 0xc6, 0xd0, 0xf7, 0x00, 0xe4, 0x19, 0x87, 0x00, 0x28, 0xe8, + 0xc7, 0x5b, 0xcd, 0x00, 0xe4, 0x11, 0xca, 0x9c, 0x34, 0x05, 0x32, 0x79, + 0xc2, 0x1c, 0x52, 0x00, 0x22, 0xd0, 0xcd, 0x7b, 0x2f, 0x00, 0xe4, 0x09, + 0xc2, 0x00, 0xd0, 0x00, 0x28, 0xa9, 0xc2, 0x1c, 0x52, 0x00, 0x22, 0xc9, + 0xc9, 0x51, 0x80, 0x00, 0x23, 0x38, 0x44, 0x0d, 0xed, 0xc2, 0x2b, 0xae, + 0xc2, 0x00, 0xd0, 0x00, 0x28, 0xb9, 0x48, 0x10, 0x2f, 0x42, 0x2b, 0xba, + 0x8e, 0x00, 0x21, 0xdb, 0x02, 0x2b, 0xd2, 0x90, 0x00, 0x21, 0xeb, 0x02, + 0x2b, 0xd8, 0xcf, 0x6b, 0x16, 0x00, 0x27, 0x69, 0x8f, 0x00, 0x21, 0xe3, + 0x02, 0x2b, 0xde, 0x95, 0x00, 0x22, 0x0b, 0x02, 0x2b, 0xe4, 0x94, 0x00, + 0x22, 0x03, 0x02, 0x2b, 0xea, 0x88, 0x00, 0x22, 0x20, 0xc3, 0x28, 0x28, + 0x00, 0x29, 0x69, 0x1c, 0xc2, 0x2b, 0xf0, 0x46, 0x00, 0x59, 0xc2, 0x2c, + 0x07, 0xc2, 0x1c, 0x52, 0x00, 0x22, 0x93, 0x02, 0x2c, 0x11, 0x87, 0x00, + 0x21, 0xa1, 0xc2, 0x00, 0x28, 0x05, 0x34, 0x08, 0x0a, 0xc2, 0x2c, 0x17, + 0xc4, 0x74, 0x82, 0x00, 0x26, 0xcb, 0x02, 0x2c, 0x36, 0xc9, 0xb5, 0x06, + 0x00, 0x25, 0x7b, 0x02, 0x2c, 0x3c, 0xcc, 0x84, 0xbd, 0x00, 0x24, 0x69, + 0x44, 0x62, 0x60, 0x42, 0x2c, 0x42, 0x87, 0x00, 0x21, 0xfb, 0x02, 0x2c, + 0x52, 0xc7, 0xbe, 0xab, 0x00, 0x26, 0x79, 0xc2, 0x00, 0xba, 0x00, 0x23, + 0x88, 0xc7, 0xc1, 0xcb, 0x00, 0x28, 0xf9, 0x49, 0xb1, 0x31, 0xc2, 0x2c, + 0x58, 0x46, 0x00, 0x8b, 0x42, 0x2c, 0x6d, 0x83, 0x00, 0x22, 0x7b, 0x02, + 0x2c, 0x79, 0xc3, 0x21, 0x51, 0x00, 0x22, 0x5b, 0x02, 0x2c, 0x81, 0x90, + 0x05, 0x32, 0xf9, 0x97, 0x00, 0x22, 0x71, 0x8b, 0x00, 0x22, 0xb8, 0x11, + 0xc2, 0x2c, 0x87, 0xcd, 0x78, 0x09, 0x00, 0x26, 0x61, 0x83, 0x00, 0x21, + 0xd3, 0x02, 0x2c, 0x93, 0xc2, 0x1c, 0x52, 0x00, 0x22, 0xe1, 0xc2, 0x00, + 0xba, 0x00, 0x23, 0x78, 0x83, 0x00, 0x22, 0x2b, 0x02, 0x2c, 0x99, 0xc2, + 0x00, 0x28, 0x05, 0x34, 0xa8, 0xc2, 0x01, 0x7f, 0x00, 0x21, 0x9b, 0x02, + 0x2c, 0xa5, 0xc2, 0x1c, 0x52, 0x00, 0x22, 0x98, 0x03, 0xc2, 0x2c, 0xab, + 0xca, 0xa0, 0x44, 0x05, 0x32, 0x69, 0x87, 0x00, 0x21, 0x89, 0xca, 0xa2, + 0x60, 0x05, 0x32, 0xd9, 0x0b, 0xc2, 0x2c, 0xba, 0xd7, 0x27, 0x46, 0x00, + 0x22, 0xb0, 0xcf, 0x6b, 0x16, 0x00, 0x27, 0x39, 0xc4, 0x6d, 0xb5, 0x00, + 0x23, 0x0b, 0x02, 0x2c, 0xc6, 0x96, 0x00, 0x23, 0xf8, 0x46, 0x00, 0x8b, + 0xc2, 0x2c, 0xcc, 0x87, 0x00, 0x21, 0xab, 0x02, 0x2c, 0xde, 0xc6, 0xcb, + 0x81, 0x00, 0x23, 0xab, 0x02, 0x2c, 0xe4, 0x91, 0x00, 0x22, 0x8a, 0x02, + 0x2c, 0xea, 0x87, 0x00, 0x21, 0xbb, 0x02, 0x2c, 0xee, 0x0a, 0x42, 0x2c, + 0xfa, 0xc2, 0x01, 0x7f, 0x00, 0x22, 0x3b, 0x02, 0x2d, 0x07, 0xc8, 0xb6, + 0x5a, 0x05, 0x34, 0xd9, 0xd0, 0x51, 0x79, 0x05, 0x32, 0xc9, 0xc3, 0x28, + 0x28, 0x05, 0x34, 0x38, 0xc8, 0x82, 0x09, 0x05, 0x32, 0x59, 0xc7, 0x7f, + 0xba, 0x05, 0x33, 0x48, 0x8e, 0x00, 0x20, 0xdb, 0x02, 0x2d, 0x0d, 0x90, + 0x00, 0x20, 0xeb, 0x02, 0x2d, 0x13, 0xcf, 0x6b, 0x16, 0x00, 0x27, 0x61, + 0x8f, 0x00, 0x20, 0xe3, 0x02, 0x2d, 0x19, 0x95, 0x00, 0x21, 0x0b, 0x02, + 0x2d, 0x1f, 0x94, 0x00, 0x21, 0x03, 0x02, 0x2d, 0x25, 0x88, 0x00, 0x21, + 0x20, 0xc3, 0x28, 0x28, 0x00, 0x29, 0x61, 0x1c, 0xc2, 0x2d, 0x2b, 0x46, + 0x00, 0x59, 0xc2, 0x2d, 0x42, 0xc2, 0x1c, 0x52, 0x00, 0x20, 0x13, 0x02, + 0x2d, 0x4c, 0x87, 0x00, 0x20, 0xa1, 0xc2, 0x00, 0x28, 0x05, 0x34, 0x00, + 0x0a, 0xc2, 0x2d, 0x52, 0xc4, 0x74, 0x82, 0x00, 0x26, 0xc3, 0x02, 0x2d, + 0x71, 0xc9, 0xb5, 0x06, 0x00, 0x25, 0x73, 0x02, 0x2d, 0x77, 0xcc, 0x84, + 0xbd, 0x00, 0x24, 0x61, 0x44, 0x62, 0x60, 0x42, 0x2d, 0x7d, 0x87, 0x00, + 0x20, 0xfb, 0x02, 0x2d, 0x8d, 0xc2, 0x00, 0xba, 0x00, 0x23, 0x80, 0xc7, + 0xc1, 0xcb, 0x00, 0x28, 0xf1, 0x49, 0xb1, 0x31, 0xc2, 0x2d, 0x93, 0x46, + 0x00, 0x8b, 0x42, 0x2d, 0xa8, 0x83, 0x00, 0x21, 0x7b, 0x02, 0x2d, 0xb4, + 0xc3, 0x21, 0x51, 0x00, 0x21, 0x5b, 0x02, 0x2d, 0xbc, 0x8b, 0x00, 0x20, + 0x39, 0x97, 0x00, 0x21, 0x71, 0x90, 0x05, 0x32, 0xf0, 0xc2, 0x00, 0xd0, + 0x00, 0x28, 0xb1, 0x48, 0x10, 0x2f, 0xc2, 0x2d, 0xc2, 0xca, 0x9b, 0xd0, + 0x00, 0x23, 0xd0, 0xc2, 0x00, 0xd0, 0x00, 0x28, 0xa1, 0xc2, 0x1c, 0x52, + 0x00, 0x20, 0x49, 0xc9, 0x51, 0x80, 0x00, 0x23, 0x30, 0x11, 0xc2, 0x2d, + 0xda, 0xcd, 0x78, 0x09, 0x00, 0x26, 0x59, 0x83, 0x00, 0x20, 0xd3, 0x02, + 0x2d, 0xe6, 0xc2, 0x1c, 0x52, 0x00, 0x20, 0x61, 0xc2, 0x00, 0xba, 0x00, + 0x23, 0x70, 0x83, 0x00, 0x21, 0x2b, 0x02, 0x2d, 0xec, 0xc2, 0x00, 0x28, + 0x05, 0x34, 0xa0, 0xc2, 0x01, 0x7f, 0x00, 0x20, 0x9b, 0x02, 0x2d, 0xf8, + 0xc2, 0x1c, 0x52, 0x00, 0x20, 0x18, 0xc2, 0x01, 0x7f, 0x00, 0x21, 0x3b, + 0x02, 0x2d, 0xfe, 0xc8, 0xb6, 0x5a, 0x05, 0x34, 0xd1, 0xd0, 0x51, 0x79, + 0x05, 0x32, 0xc1, 0xc3, 0x28, 0x28, 0x05, 0x34, 0x30, 0x46, 0x00, 0x8b, + 0xc2, 0x2e, 0x04, 0x8d, 0x00, 0x23, 0xc2, 0x02, 0x2e, 0x0e, 0x03, 0xc2, + 0x2e, 0x14, 0xd7, 0x27, 0x46, 0x00, 0x20, 0x31, 0x87, 0x00, 0x20, 0x89, + 0xca, 0xa0, 0x44, 0x05, 0x32, 0x61, 0xca, 0xa2, 0x60, 0x05, 0x32, 0xd1, + 0x0b, 0x42, 0x2e, 0x23, 0xcf, 0x6b, 0x16, 0x00, 0x27, 0x31, 0xc4, 0x6d, + 0xb5, 0x00, 0x23, 0x03, 0x02, 0x2e, 0x2f, 0x96, 0x00, 0x23, 0xf0, 0x46, + 0x00, 0x8b, 0xc2, 0x2e, 0x35, 0x87, 0x00, 0x20, 0xab, 0x02, 0x2e, 0x47, + 0xc6, 0xcb, 0x81, 0x00, 0x23, 0xa3, 0x02, 0x2e, 0x4d, 0x91, 0x00, 0x20, + 0x0a, 0x02, 0x2e, 0x53, 0x87, 0x00, 0x20, 0xbb, 0x02, 0x2e, 0x57, 0x0a, + 0x42, 0x2e, 0x63, 0x87, 0x00, 0x21, 0x13, 0x02, 0x2e, 0x70, 0x15, 0xc2, + 0x2e, 0x76, 0xc2, 0x00, 0x28, 0x05, 0x34, 0x61, 0xc3, 0x28, 0x28, 0x05, + 0x34, 0x90, 0xc2, 0x1c, 0x52, 0x00, 0x20, 0x51, 0xca, 0x9c, 0x34, 0x05, + 0x32, 0x70, 0xc8, 0x82, 0x09, 0x05, 0x32, 0x51, 0xc7, 0x7f, 0xba, 0x05, + 0x33, 0x40, 0xc4, 0x02, 0xde, 0x00, 0x04, 0x79, 0xc2, 0x02, 0xa0, 0x00, + 0x04, 0x70, 0xe0, 0x06, 0x27, 0x01, 0x01, 0xd0, 0x07, 0xc2, 0x2e, 0x8c, + 0xd3, 0x3f, 0x70, 0x01, 0x00, 0xd0, 0x44, 0x05, 0x14, 0xc2, 0x2e, 0x92, + 0xc6, 0x2a, 0xfe, 0x08, 0x8f, 0x91, 0xc6, 0xcf, 0x9b, 0x08, 0x8f, 0x89, + 0x15, 0xc2, 0x2e, 0x9e, 0x08, 0xc2, 0x2e, 0xaa, 0x16, 0x42, 0x2e, 0xb6, + 0xc4, 0x26, 0x78, 0x08, 0x8f, 0x49, 0xc5, 0x06, 0xdb, 0x08, 0x8f, 0x41, + 0x15, 0xc2, 0x2e, 0xc8, 0x08, 0xc2, 0x2e, 0xd4, 0x16, 0xc2, 0x2e, 0xe0, + 0xc3, 0x05, 0x14, 0x08, 0x8f, 0x08, 0xc9, 0xaf, 0x39, 0x00, 0x6c, 0x11, + 0xc8, 0xb5, 0x9a, 0x00, 0x6e, 0x50, 0x03, 0xc2, 0x2e, 0xec, 0x0b, 0xc2, + 0x2f, 0x14, 0x17, 0xc2, 0x2f, 0x2c, 0x07, 0xc2, 0x2f, 0x38, 0x11, 0xc2, + 0x2f, 0x44, 0x0f, 0xc2, 0x2f, 0x50, 0xd2, 0x4b, 0x05, 0x00, 0x6c, 0xf1, + 0x48, 0xbb, 0xfa, 0xc2, 0x2f, 0x5a, 0x48, 0xb6, 0x92, 0xc2, 0x2f, 0x6a, + 0x48, 0xb8, 0xda, 0xc2, 0x2f, 0x76, 0xc7, 0xca, 0x1b, 0x00, 0x6d, 0xd1, + 0xc7, 0xc5, 0x3d, 0x00, 0x6d, 0xd9, 0xc7, 0xc0, 0x04, 0x00, 0x6e, 0x01, + 0xc7, 0xc3, 0xc3, 0x00, 0x6e, 0x21, 0xc7, 0xc8, 0x0e, 0x00, 0x6e, 0x30, + 0xc4, 0x15, 0xe7, 0x00, 0x6f, 0x31, 0xc3, 0x05, 0x14, 0x00, 0x6f, 0x39, + 0x16, 0xc2, 0x2f, 0x88, 0x08, 0xc2, 0x2f, 0x94, 0x15, 0xc2, 0x2f, 0xa0, + 0xc5, 0x06, 0xdb, 0x00, 0x6f, 0x71, 0xc4, 0x26, 0x78, 0x00, 0x6f, 0x78, + 0x45, 0xb0, 0x74, 0xc2, 0x2f, 0xac, 0x44, 0xc8, 0xbe, 0x42, 0x2f, 0xbe, + 0xca, 0xa7, 0x60, 0x00, 0x6e, 0x89, 0xc8, 0xb7, 0x82, 0x00, 0x6e, 0x99, + 0xc9, 0xaf, 0x42, 0x00, 0x6e, 0xb1, 0xc7, 0xc8, 0xbd, 0x00, 0x6e, 0xd1, + 0x42, 0x01, 0x30, 0x42, 0x2f, 0xcd, 0xca, 0x9c, 0xb6, 0x00, 0x6e, 0xc1, + 0xc9, 0x93, 0x53, 0x00, 0x6e, 0xf8, 0x4a, 0x82, 0xf7, 0xc2, 0x2f, 0xd9, + 0x02, 0x42, 0x2f, 0xfd, 0xc7, 0x0b, 0xc8, 0x0e, 0xc8, 0x99, 0xc8, 0x3b, + 0xec, 0x0e, 0xc8, 0x91, 0xc6, 0x24, 0x3b, 0x0e, 0xc8, 0x88, 0x4c, 0x82, + 0xf5, 0xc2, 0x30, 0x09, 0xc4, 0x09, 0x3a, 0x0e, 0xd3, 0xf0, 0xda, 0x19, + 0xe2, 0x0e, 0xd3, 0x81, 0x44, 0x01, 0x10, 0x42, 0x30, 0x1d, 0xc8, 0xbe, + 0x0a, 0x0e, 0xd0, 0x99, 0xc7, 0xc1, 0xb6, 0x0e, 0xd0, 0x91, 0xc7, 0x81, + 0x92, 0x0e, 0xd0, 0x88, 0xca, 0xa4, 0x5e, 0x0e, 0xd0, 0x43, 0x02, 0x30, + 0x27, 0xcf, 0x64, 0x2c, 0x0e, 0xd0, 0x38, 0xc3, 0x0d, 0xe5, 0x0e, 0xd4, + 0x51, 0xc3, 0x0a, 0x8c, 0x0e, 0xd4, 0x38, 0xc6, 0xcf, 0x23, 0x0e, 0xd1, + 0x61, 0xc7, 0x81, 0x92, 0x0e, 0xd1, 0x59, 0xc6, 0xcc, 0x7d, 0x0e, 0xd1, + 0x50, 0xd2, 0x4d, 0x9f, 0x0e, 0xd3, 0x89, 0x44, 0x00, 0x7d, 0x42, 0x30, + 0x2d, 0xd1, 0x57, 0x3f, 0x0e, 0xc9, 0x01, 0x15, 0xc2, 0x30, 0x39, 0x46, + 0x17, 0x14, 0x42, 0x30, 0x45, 0xc7, 0x0b, 0xc8, 0x0e, 0xc8, 0xa9, 0xc7, + 0x00, 0x91, 0x0e, 0xc8, 0xa0, 0xc7, 0x0b, 0xc8, 0x0e, 0xc8, 0x69, 0xc8, + 0x3b, 0xec, 0x0e, 0xc8, 0x61, 0xc6, 0x24, 0x3b, 0x0e, 0xc8, 0x58, 0x00, + 0xc2, 0x30, 0x51, 0x02, 0x42, 0x30, 0x6f, 0x43, 0x0e, 0xd1, 0xc2, 0x30, + 0x7b, 0x12, 0x42, 0x30, 0x87, 0x44, 0xdf, 0x47, 0xc2, 0x30, 0x91, 0x45, + 0xd9, 0x6b, 0xc2, 0x30, 0x9d, 0x44, 0xda, 0xac, 0x42, 0x30, 0xc1, 0xc3, + 0x1e, 0x1b, 0x0e, 0xd3, 0x0b, 0x02, 0x30, 0xd3, 0x4b, 0x94, 0x64, 0x42, + 0x30, 0xd7, 0x4b, 0x40, 0xb3, 0xc2, 0x30, 0xe9, 0x4a, 0x18, 0xa5, 0x42, + 0x30, 0xf5, 0x45, 0xd7, 0x95, 0xc2, 0x31, 0x07, 0x47, 0xc0, 0x90, 0xc2, + 0x31, 0x13, 0x00, 0xc2, 0x31, 0x25, 0x42, 0x00, 0x97, 0xc2, 0x31, 0x31, + 0x4f, 0x67, 0x74, 0x42, 0x31, 0x4f, 0xc2, 0x01, 0x29, 0x0e, 0xd3, 0x59, + 0x43, 0x12, 0x8f, 0x42, 0x31, 0x61, 0x00, 0x42, 0x31, 0x7f, 0x19, 0xc2, + 0x31, 0x8b, 0xc7, 0xc1, 0xb6, 0x0e, 0xd1, 0x91, 0xc7, 0x81, 0x92, 0x0e, + 0xd1, 0x88, 0x4b, 0x40, 0xb3, 0xc2, 0x31, 0x97, 0x4a, 0x18, 0xa5, 0xc2, + 0x31, 0xdd, 0x49, 0x1e, 0x56, 0xc2, 0x32, 0x23, 0x46, 0xd3, 0xd3, 0x42, + 0x32, 0x35, 0x49, 0xa9, 0x6c, 0xc2, 0x32, 0x47, 0x05, 0xc2, 0x32, 0x53, + 0xc5, 0xaf, 0xb5, 0x0e, 0xd2, 0x83, 0x02, 0x32, 0x5f, 0xc4, 0x64, 0xa0, + 0x0e, 0xd2, 0x6b, 0x02, 0x32, 0x63, 0x45, 0x05, 0x75, 0xc2, 0x32, 0x67, + 0xc5, 0x7d, 0x5e, 0x0e, 0xd2, 0x0b, 0x02, 0x32, 0x8b, 0xc5, 0xa9, 0x3a, + 0x0e, 0xd1, 0xf2, 0x02, 0x32, 0x8f, 0xc6, 0xd3, 0x3d, 0x0e, 0xd1, 0xd1, + 0xc6, 0xcf, 0xa7, 0x0e, 0xd1, 0xc8, 0xc7, 0x0b, 0xc8, 0x0e, 0xc8, 0x81, + 0xc8, 0x3b, 0xec, 0x0e, 0xc8, 0x79, 0xc6, 0x24, 0x3b, 0x0e, 0xc8, 0x70, + 0xd0, 0x5a, 0x42, 0x0e, 0xd1, 0xc1, 0xc6, 0x03, 0x31, 0x0e, 0xd1, 0xb0, + 0xd0, 0x5a, 0x42, 0x0e, 0xd1, 0xb9, 0xc7, 0x5b, 0x27, 0x0e, 0xd1, 0xa8, + 0x48, 0xb8, 0xea, 0xc2, 0x32, 0x93, 0xca, 0xa4, 0x68, 0x0e, 0xd0, 0x79, + 0xcc, 0x81, 0x8d, 0x0e, 0xd0, 0x70, 0xc7, 0xc0, 0x66, 0x0e, 0xcf, 0xf1, + 0xd0, 0x5f, 0x72, 0x0e, 0xcf, 0xe9, 0x15, 0xc2, 0x32, 0x9f, 0xc7, 0x38, + 0xd9, 0x0e, 0xcf, 0xd1, 0xc5, 0xaf, 0xb5, 0x0e, 0xcf, 0xc9, 0xc4, 0xe0, + 0x43, 0x0e, 0xcf, 0xb9, 0x4a, 0x03, 0xde, 0x42, 0x32, 0xae, 0xca, 0xa3, + 0x78, 0x08, 0xae, 0xe3, 0x02, 0x32, 0xba, 0x97, 0x08, 0xad, 0xd9, 0x8b, + 0x08, 0xad, 0xc9, 0x83, 0x08, 0xad, 0x78, 0x94, 0x08, 0xad, 0xa8, 0x97, + 0x08, 0xad, 0x98, 0x8b, 0x08, 0xad, 0x88, 0xca, 0xa3, 0x78, 0x08, 0xae, + 0xd9, 0x97, 0x08, 0xac, 0x69, 0x8b, 0x08, 0xac, 0x59, 0x83, 0x08, 0xac, + 0x08, 0xd5, 0x33, 0x3e, 0x08, 0xae, 0xcb, 0x02, 0x32, 0xbe, 0x0a, 0xc2, + 0x32, 0xc2, 0x83, 0x08, 0xac, 0xe9, 0x16, 0x42, 0x32, 0xcc, 0x83, 0x08, + 0xad, 0x69, 0xc2, 0x0d, 0xf6, 0x08, 0xad, 0x61, 0xc2, 0x00, 0xd0, 0x08, + 0xad, 0x58, 0x83, 0x08, 0xad, 0x51, 0x47, 0xb2, 0x2e, 0x42, 0x32, 0xd6, + 0xc2, 0x00, 0xd0, 0x08, 0xad, 0x29, 0x83, 0x08, 0xad, 0x20, 0xc2, 0x00, + 0xd0, 0x08, 0xad, 0x19, 0x83, 0x08, 0xad, 0x10, 0x83, 0x08, 0xad, 0x09, + 0xc2, 0x00, 0xc1, 0x08, 0xac, 0xe1, 0xc2, 0x19, 0x2c, 0x08, 0xac, 0xb9, + 0xc2, 0x01, 0x30, 0x08, 0xac, 0x90, 0xc2, 0x00, 0xd0, 0x08, 0xad, 0x01, + 0x83, 0x08, 0xac, 0xf9, 0x06, 0x42, 0x32, 0xe4, 0xc2, 0x00, 0xd0, 0x08, + 0xac, 0xb1, 0x83, 0x08, 0xac, 0xa8, 0xc2, 0x00, 0xd0, 0x08, 0xac, 0xa1, + 0x83, 0x08, 0xac, 0x98, 0xc2, 0x00, 0xd0, 0x08, 0xac, 0x89, 0x83, 0x08, + 0xac, 0x80, 0xc2, 0x00, 0xd0, 0x08, 0xac, 0x79, 0x83, 0x08, 0xac, 0x70, + 0x97, 0x08, 0xac, 0x28, 0x8b, 0x08, 0xac, 0x18, 0x4b, 0x94, 0x7a, 0xc2, + 0x32, 0xee, 0x48, 0x1b, 0x0c, 0x42, 0x32, 0xfd, 0xc7, 0xc3, 0x61, 0x08, + 0xae, 0x09, 0xc5, 0x33, 0x5d, 0x08, 0xae, 0x01, 0x42, 0x07, 0xb2, 0xc2, + 0x33, 0x09, 0xc8, 0x14, 0x38, 0x08, 0xad, 0xe9, 0xcb, 0x1e, 0x89, 0x08, + 0xad, 0xe0, 0xc7, 0xc7, 0x3c, 0x01, 0x39, 0x09, 0xc7, 0x18, 0x68, 0x01, + 0x16, 0x30, 0xcf, 0x66, 0x66, 0x01, 0x5f, 0x51, 0xd0, 0x5d, 0x52, 0x01, + 0x5f, 0x58, 0xcc, 0x7e, 0x49, 0x00, 0x04, 0x31, 0xc5, 0x0d, 0x5c, 0x00, + 0x04, 0xc0, 0xc4, 0x1e, 0x97, 0x05, 0x46, 0x21, 0xc5, 0x40, 0xe7, 0x05, + 0x44, 0x08, 0x97, 0x05, 0x46, 0x19, 0x8b, 0x05, 0x46, 0x01, 0x83, 0x05, + 0x45, 0xa8, 0x91, 0x05, 0x46, 0x10, 0x87, 0x05, 0x45, 0xf8, 0x8e, 0x05, + 0x45, 0xe3, 0x02, 0x33, 0x15, 0x94, 0x05, 0x45, 0xd2, 0x02, 0x33, 0x19, + 0x8b, 0x05, 0x45, 0xb8, 0x97, 0x05, 0x45, 0xc8, 0xc2, 0x0d, 0xf6, 0x05, + 0x45, 0x81, 0x83, 0x05, 0x45, 0x89, 0xc2, 0x00, 0xd0, 0x05, 0x45, 0x78, + 0xc2, 0x00, 0xdb, 0x05, 0x45, 0x99, 0x83, 0x05, 0x45, 0x68, 0x83, 0x05, + 0x44, 0x19, 0x8b, 0x05, 0x44, 0x71, 0x97, 0x05, 0x44, 0x88, 0x8b, 0x05, + 0x44, 0x28, 0x97, 0x05, 0x44, 0x38, 0x47, 0xb2, 0x2e, 0xc2, 0x33, 0x1d, + 0x83, 0x05, 0x45, 0x70, 0x87, 0x05, 0x44, 0x68, 0x91, 0x05, 0x44, 0x80, + 0x83, 0x05, 0x44, 0x91, 0xc2, 0x00, 0xd0, 0x05, 0x44, 0x98, 0x83, 0x05, + 0x44, 0xa1, 0xc2, 0x00, 0xd0, 0x05, 0x44, 0xa8, 0xc2, 0x01, 0x30, 0x05, + 0x44, 0xb1, 0xc2, 0x19, 0x2c, 0x05, 0x44, 0xd9, 0xc2, 0x00, 0xc1, 0x05, + 0x45, 0x01, 0x83, 0x05, 0x45, 0x28, 0x83, 0x05, 0x44, 0xb9, 0xc2, 0x00, + 0xd0, 0x05, 0x44, 0xc0, 0x83, 0x05, 0x44, 0xc9, 0xc2, 0x00, 0xd0, 0x05, + 0x44, 0xd0, 0x16, 0xc2, 0x33, 0x2b, 0x83, 0x05, 0x45, 0x09, 0xc2, 0x00, + 0xd0, 0x05, 0x45, 0x10, 0x06, 0xc2, 0x33, 0x35, 0x83, 0x05, 0x45, 0x19, + 0xc2, 0x00, 0xd0, 0x05, 0x45, 0x20, 0x83, 0x05, 0x45, 0x31, 0xc2, 0x00, + 0xd0, 0x05, 0x45, 0x38, 0x83, 0x05, 0x45, 0x41, 0xc2, 0x00, 0xd0, 0x05, + 0x45, 0x48, 0xc4, 0x15, 0xe7, 0x05, 0x46, 0x81, 0xc3, 0x05, 0x14, 0x05, + 0x46, 0x89, 0x16, 0xc2, 0x33, 0x3f, 0x08, 0xc2, 0x33, 0x4b, 0x15, 0xc2, + 0x33, 0x57, 0xc5, 0x06, 0xdb, 0x05, 0x46, 0xc1, 0xc4, 0x26, 0x78, 0x05, + 0x46, 0xc8, 0xdd, 0x0a, 0x8a, 0x0f, 0xb3, 0xb9, 0x44, 0x05, 0x9e, 0x42, + 0x33, 0x63, 0xe0, 0x0b, 0x27, 0x0f, 0xb3, 0xc0, 0xc4, 0xe3, 0xf3, 0x00, + 0x41, 0xf1, 0xc3, 0x0d, 0xe8, 0x00, 0x41, 0x90, 0xc5, 0xd8, 0xe4, 0x00, + 0x40, 0xb8, 0x83, 0x00, 0x40, 0xf0, 0x83, 0x00, 0x40, 0xf8, 0xd0, 0x5f, + 0xc2, 0x01, 0x54, 0xb8, 0xd0, 0x5f, 0xc2, 0x01, 0x54, 0xc0, 0x07, 0xc2, + 0x33, 0x69, 0x44, 0x00, 0xbb, 0xc2, 0x33, 0x75, 0xc9, 0xb0, 0x98, 0x08, + 0x8e, 0x69, 0xca, 0xa0, 0x26, 0x08, 0x8e, 0x48, 0xc3, 0xad, 0x77, 0x08, + 0x8e, 0xd1, 0xd5, 0x34, 0x64, 0x08, 0x8e, 0x60, 0x45, 0x09, 0x98, 0xc2, + 0x33, 0xa3, 0xcb, 0x97, 0xf5, 0x08, 0x8e, 0x31, 0xc4, 0x19, 0x53, 0x08, + 0x8e, 0x28, 0x45, 0x00, 0xba, 0xc2, 0x33, 0xc7, 0xcd, 0x7b, 0xbe, 0x08, + 0x8e, 0x58, 0xc2, 0x00, 0xd0, 0x08, 0x8d, 0x91, 0x15, 0xc2, 0x33, 0xed, + 0x18, 0xc2, 0x33, 0xfd, 0x0e, 0xc2, 0x34, 0x07, 0xc2, 0x00, 0x39, 0x08, + 0x8d, 0x59, 0xc2, 0x19, 0x2c, 0x08, 0x8d, 0x51, 0xc2, 0x01, 0xc3, 0x08, + 0x8d, 0x49, 0x04, 0xc2, 0x34, 0x11, 0x12, 0xc2, 0x34, 0x1b, 0x10, 0xc2, + 0x34, 0x25, 0x06, 0xc2, 0x34, 0x3b, 0x16, 0xc2, 0x34, 0x49, 0x0c, 0xc2, + 0x34, 0x57, 0x05, 0xc2, 0x34, 0x61, 0x09, 0xc2, 0x34, 0x6b, 0x0d, 0xc2, + 0x34, 0x75, 0x83, 0x08, 0x8c, 0x1b, 0x02, 0x34, 0x7f, 0x91, 0x08, 0x8c, + 0x79, 0x87, 0x08, 0x8c, 0x69, 0x97, 0x08, 0x8c, 0x3b, 0x02, 0x34, 0x8b, + 0x8b, 0x08, 0x8c, 0x2a, 0x02, 0x34, 0x8f, 0xc2, 0x01, 0xbb, 0x08, 0x22, + 0x89, 0x0a, 0x42, 0x34, 0x93, 0x91, 0x08, 0x22, 0xa9, 0xc3, 0x14, 0xc8, + 0x08, 0x22, 0xb0, 0x83, 0x08, 0x22, 0xc1, 0x99, 0x08, 0x23, 0xf8, 0xc3, + 0x38, 0x86, 0x08, 0x22, 0xc9, 0xc4, 0xe2, 0x3b, 0x08, 0x23, 0x18, 0xc6, + 0x14, 0xc5, 0x08, 0x23, 0x01, 0xc3, 0x03, 0x4e, 0x08, 0x23, 0x28, 0x87, + 0x08, 0x23, 0x11, 0xc2, 0x00, 0x95, 0x08, 0x23, 0x58, 0x88, 0x08, 0x23, + 0x31, 0xc2, 0x00, 0x89, 0x08, 0x23, 0x91, 0xc2, 0x00, 0xd1, 0x08, 0x23, + 0xf0, 0xc2, 0x0f, 0xf5, 0x08, 0x23, 0x39, 0x03, 0xc2, 0x34, 0x9f, 0xc2, + 0x01, 0xa3, 0x08, 0x23, 0xd8, 0xc2, 0x01, 0xe2, 0x08, 0x23, 0x41, 0xc2, + 0x00, 0x58, 0x08, 0x23, 0x49, 0x8a, 0x08, 0x23, 0x69, 0xc2, 0x09, 0x3b, + 0x08, 0x23, 0x89, 0xc2, 0x00, 0x28, 0x08, 0x23, 0xb9, 0x14, 0xc2, 0x34, + 0xa7, 0xc2, 0x01, 0x29, 0x08, 0x23, 0xd0, 0x90, 0x08, 0x23, 0x51, 0xc2, + 0x00, 0xa4, 0x08, 0x23, 0x61, 0xc2, 0x06, 0x4e, 0x08, 0x23, 0xa1, 0xc3, + 0x0a, 0xe1, 0x08, 0x23, 0xa9, 0xc2, 0x00, 0x71, 0x08, 0x23, 0xb1, 0x94, + 0x08, 0x23, 0xc8, 0xe0, 0x09, 0xe7, 0x01, 0x4a, 0x20, 0xcd, 0x80, 0x6a, + 0x01, 0x57, 0x38, 0x00, 0x42, 0x34, 0xb1, 0xd6, 0x2e, 0x54, 0x01, 0x5a, + 0x79, 0x4c, 0x81, 0xa5, 0x42, 0x34, 0xbd, 0x00, 0x42, 0x34, 0xc3, 0xc3, + 0xe5, 0x8a, 0x0f, 0xb3, 0x09, 0xc9, 0xb4, 0x91, 0x0f, 0xb2, 0xc9, 0xc4, + 0x47, 0x23, 0x0f, 0xb2, 0x88, 0xc7, 0x10, 0x9c, 0x01, 0x5b, 0xc8, 0x00, + 0x42, 0x34, 0xcf, 0xc3, 0xe5, 0x8a, 0x0f, 0xb3, 0x19, 0xc9, 0xb4, 0x91, + 0x0f, 0xb2, 0xd9, 0xc4, 0x47, 0x23, 0x0f, 0xb2, 0x98, 0xc7, 0x10, 0x9c, + 0x01, 0x5b, 0xc0, 0xc2, 0x00, 0xd0, 0x08, 0xd3, 0x49, 0x83, 0x08, 0xd3, + 0x40, 0xc2, 0x00, 0xd0, 0x08, 0xd3, 0xb1, 0x83, 0x08, 0xd3, 0xa8, 0xc2, + 0x00, 0xd0, 0x08, 0xd3, 0x39, 0x83, 0x08, 0xd3, 0x30, 0x8e, 0x08, 0xd2, + 0xd1, 0x94, 0x08, 0xd2, 0xc8, 0x97, 0x08, 0xd2, 0xc1, 0x8b, 0x08, 0xd2, + 0xb8, 0x87, 0x08, 0xd2, 0xb0, 0x87, 0x08, 0xd2, 0x90, 0xca, 0x50, 0x5e, + 0x08, 0x7a, 0xb0, 0xc3, 0x77, 0x79, 0x08, 0x79, 0xf9, 0xc4, 0xdc, 0x2d, + 0x08, 0x79, 0xe0, 0xc5, 0xcf, 0xae, 0x0f, 0xbc, 0xb1, 0xc2, 0x00, 0x45, + 0x01, 0x99, 0x39, 0xc2, 0xd4, 0x88, 0x01, 0x9c, 0xa0, 0x11, 0xc2, 0x34, + 0xe7, 0x8f, 0x01, 0x9c, 0xc8, 0x44, 0x00, 0x8d, 0xc2, 0x34, 0xf3, 0xc4, + 0x89, 0x91, 0x01, 0x9a, 0xb9, 0x84, 0x01, 0x9e, 0xe8, 0x11, 0xc2, 0x35, + 0x2d, 0xd5, 0x32, 0x2d, 0x01, 0x56, 0x69, 0x8f, 0x01, 0x9e, 0x81, 0x90, + 0x01, 0x9e, 0x89, 0x9a, 0x01, 0x9e, 0x98, 0xca, 0x27, 0xba, 0x01, 0x14, + 0x83, 0x02, 0x35, 0x37, 0xc3, 0x67, 0x21, 0x01, 0x98, 0x49, 0xc3, 0x14, + 0x47, 0x01, 0x98, 0x51, 0x98, 0x01, 0x9b, 0xa8, 0xc7, 0x3c, 0x51, 0x01, + 0x14, 0x7b, 0x02, 0x35, 0x3d, 0x90, 0x01, 0x9e, 0x63, 0x02, 0x35, 0x43, + 0x97, 0x01, 0x9b, 0xd0, 0xc2, 0x02, 0xae, 0x01, 0x14, 0xa1, 0x03, 0xc2, + 0x35, 0x4f, 0x85, 0x01, 0x9e, 0x21, 0x86, 0x01, 0x9e, 0x29, 0xc8, 0xb5, + 0x42, 0x01, 0x9e, 0x31, 0x91, 0x01, 0x9e, 0x3b, 0x02, 0x35, 0x57, 0x8f, + 0x01, 0x9c, 0xea, 0x02, 0x35, 0x5d, 0xc3, 0x65, 0xba, 0x01, 0x10, 0xd1, + 0x0b, 0xc2, 0x35, 0x61, 0x17, 0xc2, 0x35, 0x73, 0x07, 0xc2, 0x35, 0x7f, + 0xc2, 0x01, 0x9d, 0x01, 0x9d, 0x6a, 0x02, 0x35, 0x8b, 0xcc, 0x86, 0xc1, + 0x0f, 0x90, 0x01, 0x89, 0x01, 0x96, 0x61, 0x83, 0x01, 0x9e, 0x53, 0x02, + 0x35, 0x94, 0x17, 0xc2, 0x35, 0x9a, 0x07, 0xc2, 0x35, 0xac, 0x11, 0xc2, + 0x35, 0xb8, 0x92, 0x01, 0x9e, 0x5b, 0x02, 0x35, 0xc0, 0x9c, 0x01, 0x9c, + 0x80, 0x8c, 0x0f, 0x8c, 0x81, 0x83, 0x01, 0x9b, 0x93, 0x02, 0x35, 0xc4, + 0xc3, 0x13, 0x6e, 0x01, 0x99, 0x29, 0xc3, 0x27, 0x49, 0x01, 0x99, 0x31, + 0x84, 0x01, 0x9e, 0x41, 0x8f, 0x01, 0x9b, 0xbb, 0x02, 0x35, 0xca, 0x8e, + 0x01, 0x9c, 0xb8, 0x11, 0xc2, 0x35, 0xce, 0x83, 0x01, 0x9d, 0x4b, 0x02, + 0x35, 0xe0, 0x0b, 0xc2, 0x35, 0xea, 0x07, 0xc2, 0x35, 0xf4, 0x8a, 0x01, + 0x9e, 0xb9, 0x8f, 0x01, 0x9e, 0xc1, 0xc2, 0x4c, 0x90, 0x01, 0x9e, 0xc9, + 0x94, 0x01, 0x9e, 0xd1, 0x85, 0x01, 0x9b, 0xb1, 0x88, 0x01, 0x9c, 0x51, + 0x95, 0x01, 0x9d, 0x81, 0x98, 0x01, 0x9d, 0xa1, 0x99, 0x01, 0x9d, 0xd0, + 0x14, 0xc2, 0x36, 0x04, 0x98, 0x01, 0x96, 0x71, 0xc7, 0xc5, 0x6e, 0x01, + 0x98, 0x39, 0xc4, 0x90, 0x43, 0x01, 0x98, 0x40, 0xc5, 0xd4, 0x07, 0x01, + 0x98, 0x01, 0xc5, 0xdc, 0xa4, 0x01, 0x98, 0x09, 0xc4, 0xe4, 0x4f, 0x01, + 0x98, 0x11, 0xc3, 0x3d, 0x51, 0x01, 0x98, 0x19, 0x97, 0x01, 0x9b, 0x99, + 0x8f, 0x01, 0x9e, 0x11, 0xc7, 0x23, 0x58, 0x01, 0x9e, 0xf8, 0x83, 0x01, + 0x9c, 0x23, 0x02, 0x36, 0x0e, 0xc5, 0xd9, 0x0c, 0x01, 0x98, 0x91, 0xc3, + 0x1a, 0x05, 0x01, 0x98, 0xa3, 0x02, 0x36, 0x18, 0x42, 0x00, 0x33, 0xc2, + 0x36, 0x2a, 0xc4, 0x2b, 0x09, 0x01, 0x98, 0xe1, 0x11, 0xc2, 0x36, 0x36, + 0x89, 0x01, 0x9c, 0x79, 0x8d, 0x01, 0x9e, 0x69, 0x8f, 0x01, 0x9c, 0xf3, + 0x02, 0x36, 0x42, 0x96, 0x01, 0x9e, 0x79, 0x84, 0x01, 0x9c, 0x29, 0xc3, + 0x00, 0x64, 0x01, 0x9c, 0x49, 0xc2, 0xd4, 0x88, 0x01, 0x9c, 0x89, 0x8e, + 0x01, 0x9c, 0xc1, 0xc2, 0x00, 0xb0, 0x01, 0x9d, 0x51, 0x98, 0x01, 0x9d, + 0xc1, 0x99, 0x01, 0x9d, 0xf1, 0xc4, 0xe3, 0xb3, 0x01, 0x9e, 0x00, 0x03, + 0xc2, 0x36, 0x46, 0x0b, 0xc2, 0x36, 0x56, 0xc5, 0xd2, 0x2a, 0x01, 0x98, + 0xc3, 0x02, 0x36, 0x68, 0x9b, 0x01, 0x9e, 0x49, 0x84, 0x01, 0x9c, 0x39, + 0xc2, 0xd4, 0x88, 0x01, 0x9c, 0x99, 0xc2, 0x00, 0xb0, 0x01, 0x9d, 0x60, + 0x03, 0xc2, 0x36, 0x6e, 0xc6, 0xd3, 0x31, 0x01, 0x99, 0x09, 0x43, 0x00, + 0xc4, 0xc2, 0x36, 0x7a, 0x94, 0x01, 0x9e, 0xd9, 0x98, 0x01, 0x9e, 0xe0, + 0x83, 0x01, 0x9c, 0x0b, 0x02, 0x36, 0x82, 0xc4, 0x07, 0x9b, 0x01, 0x99, + 0x49, 0x88, 0x01, 0x9c, 0x59, 0x8f, 0x01, 0x9c, 0xd1, 0x95, 0x01, 0x9d, + 0x89, 0x98, 0x01, 0x9d, 0xa9, 0x99, 0x01, 0x9d, 0xd8, 0x03, 0xc2, 0x36, + 0x88, 0xc3, 0xcd, 0xc8, 0x01, 0x99, 0x89, 0xc7, 0xc6, 0x86, 0x01, 0x99, + 0xa1, 0xc4, 0xe2, 0xdb, 0x01, 0x99, 0xe1, 0xc5, 0xde, 0x07, 0x01, 0x99, + 0xf1, 0x93, 0x01, 0x9e, 0x18, 0x83, 0x01, 0x9c, 0x1b, 0x02, 0x36, 0x92, + 0x0b, 0xc2, 0x36, 0xa8, 0x07, 0xc2, 0x36, 0xbb, 0x42, 0x03, 0x53, 0xc2, + 0x36, 0xca, 0x89, 0x01, 0x9c, 0x71, 0x00, 0xc2, 0x36, 0xea, 0x84, 0x01, + 0x9c, 0x33, 0x02, 0x36, 0xfa, 0xc2, 0x00, 0x95, 0x01, 0x9e, 0xb1, 0xc2, + 0xd4, 0x88, 0x01, 0x9c, 0x91, 0x8e, 0x01, 0x9c, 0xb1, 0x8f, 0x01, 0x9c, + 0xe3, 0x02, 0x37, 0x00, 0xc2, 0x00, 0xb0, 0x01, 0x9d, 0x59, 0x95, 0x01, + 0x9d, 0x99, 0x98, 0x01, 0x9d, 0xbb, 0x02, 0x37, 0x04, 0x99, 0x01, 0x9d, + 0xea, 0x02, 0x37, 0x0a, 0x42, 0x04, 0xc6, 0xc2, 0x37, 0x10, 0xc3, 0x93, + 0x9b, 0x01, 0x9a, 0x80, 0x11, 0xc2, 0x37, 0x1c, 0x45, 0x0b, 0x12, 0x42, + 0x37, 0x28, 0xc6, 0x13, 0x52, 0x01, 0x36, 0xe9, 0xc2, 0x00, 0xa6, 0x0f, + 0x8d, 0x51, 0xc6, 0xd2, 0x1d, 0x0f, 0x8d, 0x19, 0x07, 0xc2, 0x37, 0x34, + 0xc2, 0x07, 0xa3, 0x0f, 0x8c, 0xc1, 0xc5, 0x0b, 0x0a, 0x01, 0x4e, 0x41, + 0xcb, 0x12, 0x2e, 0x01, 0x4e, 0x39, 0x86, 0x0f, 0x8a, 0x61, 0x95, 0x0f, + 0x8a, 0x68, 0xc2, 0x17, 0x28, 0x01, 0x35, 0xf9, 0x48, 0xbc, 0xc2, 0x42, + 0x37, 0x40, 0xc4, 0x03, 0x4e, 0x01, 0x15, 0x01, 0x19, 0xc2, 0x37, 0x52, + 0xc6, 0x02, 0xde, 0x0f, 0x8c, 0xd8, 0xc4, 0x1d, 0x1e, 0x01, 0x14, 0xf9, + 0x98, 0x0f, 0x8a, 0x58, 0xc3, 0x25, 0xd6, 0x01, 0x14, 0xf1, 0xc2, 0x52, + 0xdc, 0x0f, 0x8a, 0x70, 0x55, 0x30, 0x23, 0xc2, 0x37, 0x5e, 0xc3, 0x8d, + 0x08, 0x0f, 0x8c, 0x91, 0x8e, 0x0f, 0x8c, 0x88, 0xc2, 0x00, 0x6c, 0x0f, + 0x8d, 0x61, 0x95, 0x0f, 0x8c, 0xd0, 0xc2, 0x7e, 0x61, 0x0f, 0x8d, 0x59, + 0xd7, 0x28, 0xcd, 0x0f, 0x8c, 0xc8, 0xc5, 0xd8, 0x62, 0x0f, 0x8d, 0x41, + 0xc2, 0x02, 0xbc, 0x0f, 0x8d, 0x39, 0x98, 0x0f, 0x8a, 0x51, 0x85, 0x0f, + 0x8d, 0x30, 0xd3, 0x40, 0x54, 0x0f, 0x8d, 0x21, 0x8d, 0x0f, 0x8c, 0xb8, + 0xcd, 0x77, 0x60, 0x0f, 0x8d, 0x01, 0x44, 0x09, 0x9e, 0xc2, 0x37, 0x78, + 0xc3, 0x02, 0xdf, 0x0f, 0x8c, 0x99, 0xd5, 0x35, 0x0c, 0x01, 0x4e, 0x28, + 0x89, 0x0f, 0x8c, 0xb1, 0xc2, 0x04, 0xe6, 0x0f, 0x8c, 0xa8, 0xc9, 0x2a, + 0xec, 0x01, 0x21, 0x30, 0xc2, 0x00, 0x74, 0x01, 0x20, 0x79, 0xc3, 0x00, + 0xa3, 0x01, 0x20, 0x70, 0xc4, 0x27, 0xe3, 0x01, 0x20, 0x11, 0xc7, 0xc3, + 0x92, 0x01, 0x20, 0x08, 0xc4, 0x6e, 0x67, 0x01, 0x21, 0x0b, 0x02, 0x37, + 0x82, 0x4d, 0x7e, 0xd7, 0x42, 0x37, 0x88, 0xc5, 0xd5, 0xc4, 0x01, 0x21, + 0x21, 0xd2, 0x49, 0x43, 0x01, 0x20, 0xa8, 0x45, 0x0a, 0x11, 0xc2, 0x37, + 0x98, 0xc5, 0xd6, 0x0f, 0x01, 0x20, 0x28, 0x49, 0xb3, 0xd4, 0xc2, 0x37, + 0xa2, 0xc2, 0x03, 0x4e, 0x00, 0x39, 0x08, 0x46, 0xcd, 0xbb, 0x42, 0x37, + 0xca, 0xc2, 0x39, 0x8b, 0x00, 0x39, 0x61, 0xc3, 0x1e, 0x1b, 0x00, 0x38, + 0xda, 0x02, 0x37, 0xdc, 0xc3, 0x11, 0xef, 0x00, 0x39, 0x59, 0xc4, 0x77, + 0x78, 0x00, 0x39, 0x41, 0xc6, 0x7e, 0x1b, 0x00, 0x39, 0x19, 0xd0, 0x58, + 0x42, 0x00, 0x38, 0x89, 0x47, 0xc9, 0x57, 0x42, 0x37, 0xe2, 0xc3, 0x04, + 0x5a, 0x00, 0x39, 0x51, 0xca, 0x9d, 0xc4, 0x00, 0x39, 0x38, 0xc3, 0x11, + 0x38, 0x00, 0x38, 0xf0, 0xc2, 0x00, 0x8e, 0x00, 0x38, 0xd0, 0xd2, 0x49, + 0x79, 0x00, 0x38, 0xb1, 0xc5, 0x49, 0x81, 0x00, 0x38, 0xa8, 0xc9, 0xad, + 0xa4, 0x00, 0x38, 0xa0, 0x00, 0xc2, 0x37, 0xf4, 0xcd, 0x75, 0x7f, 0x00, + 0x39, 0xe0, 0xca, 0xa1, 0x02, 0x00, 0x38, 0x69, 0xc9, 0xaa, 0x71, 0x00, + 0x38, 0x61, 0xc6, 0xaa, 0x74, 0x00, 0x38, 0x58, 0xc5, 0x05, 0x02, 0x00, + 0x39, 0xb9, 0xc5, 0x00, 0xd4, 0x00, 0x39, 0xb0, 0xc5, 0x00, 0x2c, 0x00, + 0x38, 0x39, 0xc4, 0x00, 0x49, 0x00, 0x38, 0x30, 0xc5, 0x33, 0x24, 0x00, + 0x38, 0x23, 0x02, 0x38, 0x00, 0xc9, 0x11, 0xf6, 0x00, 0x38, 0x10, 0xc5, + 0x33, 0x24, 0x00, 0x38, 0x1b, 0x02, 0x38, 0x06, 0xc9, 0x11, 0xf6, 0x00, + 0x38, 0x08, 0xc5, 0x00, 0xd4, 0x00, 0x39, 0xe9, 0xc5, 0x05, 0x02, 0x00, + 0x39, 0xf0, 0xc5, 0x00, 0xd4, 0x00, 0x3a, 0x19, 0xc5, 0x05, 0x02, 0x00, + 0x3a, 0x20, 0xc5, 0x00, 0xd4, 0x00, 0x3a, 0x29, 0xc5, 0x05, 0x02, 0x00, + 0x3a, 0x30, 0xc2, 0x01, 0x23, 0x05, 0x40, 0x89, 0x91, 0x05, 0x40, 0x80, + 0x91, 0x05, 0x40, 0x91, 0xc2, 0x01, 0x23, 0x05, 0x40, 0x98, 0xd1, 0x52, + 0xff, 0x0f, 0xa8, 0x51, 0xce, 0x6f, 0x1c, 0x0f, 0xa8, 0x49, 0xd3, 0x23, + 0xc8, 0x0f, 0xa8, 0x38, 0x00, 0x42, 0x38, 0x0c, 0xcf, 0x09, 0xf8, 0x01, + 0x4b, 0xd9, 0x42, 0x06, 0x62, 0x42, 0x38, 0x21, 0xc3, 0x02, 0xa3, 0x01, + 0x55, 0xf1, 0xcf, 0x60, 0xf3, 0x01, 0x56, 0x01, 0xd9, 0x1f, 0x18, 0x01, + 0x56, 0x10, 0xc6, 0x0e, 0xa4, 0x01, 0x56, 0xb9, 0xde, 0x0e, 0x8c, 0x01, + 0x56, 0xc0, 0x52, 0x47, 0xdb, 0xc2, 0x38, 0x2d, 0xcf, 0x1d, 0xed, 0x01, + 0x03, 0xe8, 0xca, 0x0e, 0xbe, 0x01, 0x03, 0xe1, 0xc4, 0x00, 0x2d, 0x01, + 0x03, 0xc0, 0xc4, 0x18, 0x10, 0x01, 0x03, 0xb9, 0xc2, 0x22, 0xcc, 0x01, + 0x03, 0xb0, 0xc3, 0x0d, 0x14, 0x01, 0x03, 0xa9, 0xc3, 0x09, 0x9e, 0x01, + 0x03, 0xa0, 0xc2, 0x02, 0xa0, 0x00, 0x05, 0x91, 0xc4, 0x02, 0xde, 0x00, + 0x05, 0x98, 0xc6, 0xca, 0xf7, 0x00, 0xe6, 0x11, 0xc7, 0xc6, 0x2b, 0x00, + 0xe6, 0x08, 0x45, 0x21, 0xed, 0xc2, 0x38, 0x35, 0x83, 0x00, 0xdc, 0xb0, + 0xc2, 0x00, 0xd0, 0x00, 0xdd, 0xe9, 0x83, 0x00, 0xdc, 0xc0, 0xc2, 0x2c, + 0x43, 0x00, 0xdd, 0xe1, 0x83, 0x00, 0xdc, 0xe0, 0xc2, 0x2c, 0x43, 0x00, + 0xdd, 0xd9, 0x83, 0x00, 0xdc, 0xd8, 0xc2, 0x19, 0x2c, 0x00, 0xdd, 0x79, + 0x83, 0x00, 0xdc, 0xf0, 0xc2, 0x00, 0xd0, 0x00, 0xdd, 0x71, 0x83, 0x00, + 0xdc, 0x50, 0x83, 0x00, 0xdc, 0xa1, 0xc2, 0x19, 0x2c, 0x00, 0xdc, 0x89, + 0xc2, 0x01, 0x30, 0x00, 0xdc, 0x68, 0x97, 0x00, 0xdc, 0x48, 0x87, 0x00, + 0xdc, 0x30, 0xc4, 0x18, 0x10, 0x00, 0xdd, 0xb9, 0xc2, 0x22, 0xcc, 0x00, + 0xdd, 0xb0, 0xc3, 0x0d, 0x14, 0x00, 0xdd, 0xa9, 0xc3, 0x09, 0x9e, 0x00, + 0xdd, 0xa0, 0xc4, 0x02, 0xde, 0x00, 0xdd, 0x99, 0xc2, 0x02, 0xa0, 0x00, + 0xdd, 0x90, 0xc2, 0x01, 0x4a, 0x00, 0xdd, 0x69, 0xc2, 0x01, 0xc3, 0x00, + 0xdd, 0x60, 0xc3, 0xd7, 0xd6, 0x00, 0xdd, 0x19, 0xc4, 0x89, 0x32, 0x00, + 0xdd, 0x10, 0xc5, 0xdb, 0xc3, 0x00, 0xdd, 0x51, 0x10, 0x42, 0x38, 0x3d, + 0xc7, 0xc6, 0x08, 0x00, 0xdd, 0x49, 0xc5, 0x0d, 0xe4, 0x00, 0xdd, 0x39, + 0xc7, 0xc3, 0xbc, 0x00, 0xdd, 0x31, 0xc4, 0xde, 0xff, 0x00, 0xdd, 0x29, + 0xc5, 0xd8, 0x9e, 0x00, 0xdd, 0x20, 0xcb, 0x0e, 0xbd, 0x01, 0x55, 0x81, + 0xcc, 0x24, 0x47, 0x01, 0x55, 0x90, 0xc8, 0x07, 0x5f, 0x01, 0x55, 0xb1, + 0xcf, 0x6a, 0x8f, 0x01, 0x55, 0xd0, 0xd1, 0x55, 0x52, 0x01, 0x14, 0x51, + 0xcb, 0x23, 0xa0, 0x01, 0x14, 0x33, 0x02, 0x38, 0x47, 0x46, 0x00, 0xd4, + 0x42, 0x38, 0x4d, 0xc6, 0x2d, 0xd0, 0x01, 0x56, 0x99, 0xc4, 0x0e, 0xa6, + 0x01, 0x56, 0xa8, 0xca, 0x22, 0x09, 0x0f, 0xb0, 0x1b, 0x02, 0x38, 0x65, + 0x0a, 0xc2, 0x38, 0x6b, 0x15, 0xc2, 0x38, 0x7d, 0xc4, 0x21, 0x23, 0x0f, + 0xcb, 0x90, 0xca, 0x22, 0x09, 0x0f, 0xb1, 0xd1, 0xd1, 0x55, 0x0e, 0x0f, + 0xb1, 0xd8, 0x47, 0xc2, 0x11, 0xc2, 0x38, 0x8c, 0x42, 0x0a, 0x8c, 0xc2, + 0x38, 0x98, 0xc3, 0x0d, 0xe5, 0x07, 0xf2, 0xa8, 0xc9, 0x81, 0x9c, 0x01, + 0x10, 0x53, 0x02, 0x38, 0xa2, 0xcf, 0x0f, 0x0a, 0x07, 0xf2, 0xb9, 0xc6, + 0xbc, 0x34, 0x07, 0xf2, 0xc1, 0xca, 0x0e, 0xbe, 0x07, 0xf3, 0x30, 0x4d, + 0x78, 0x7e, 0xc2, 0x38, 0xa8, 0x45, 0x00, 0x2d, 0xc2, 0x38, 0xc7, 0xce, + 0x61, 0xd5, 0x07, 0xf3, 0x40, 0xe0, 0x05, 0x07, 0x08, 0x59, 0xd9, 0xc4, + 0x1e, 0xc9, 0x00, 0x16, 0xe0, 0xc7, 0x2e, 0x21, 0x0f, 0xb7, 0x49, 0xc8, + 0x36, 0x21, 0x07, 0xf3, 0x01, 0xc7, 0x0b, 0x00, 0x07, 0xf3, 0x08, 0x43, + 0x00, 0x4b, 0xc2, 0x38, 0xd9, 0xcc, 0x8b, 0x11, 0x07, 0xf3, 0x20, 0xc8, + 0x60, 0xf4, 0x07, 0xf3, 0x11, 0xcb, 0x8e, 0x13, 0x07, 0xf3, 0x50, 0x9f, + 0x00, 0x04, 0x91, 0x9e, 0x00, 0x04, 0x88, 0xc3, 0x02, 0x9f, 0x00, 0x04, + 0x91, 0xc3, 0x05, 0x14, 0x00, 0x04, 0x88, 0xc5, 0xd7, 0xa4, 0x0f, 0xad, + 0xb0, 0xca, 0x37, 0x4e, 0x01, 0x13, 0xf1, 0xc5, 0x07, 0x62, 0x01, 0x13, + 0xe0, 0x4c, 0x24, 0x3b, 0xc2, 0x38, 0xeb, 0xcb, 0x0e, 0xbd, 0x01, 0x55, + 0x99, 0x44, 0x1f, 0xb2, 0xc2, 0x38, 0xf7, 0xcf, 0x6a, 0x8f, 0x01, 0x55, + 0xb8, 0xc3, 0x0d, 0xe5, 0x07, 0xf0, 0x99, 0xc3, 0x0a, 0x8c, 0x07, 0xf0, + 0x80, 0xcf, 0x0f, 0x0a, 0x07, 0xf0, 0xa9, 0xc6, 0xbc, 0x34, 0x07, 0xf1, + 0x89, 0xc6, 0xcb, 0x5d, 0x07, 0xf1, 0x90, 0x44, 0x00, 0x4a, 0xc2, 0x39, + 0x03, 0xc7, 0x80, 0x2f, 0x07, 0xf1, 0x98, 0xcb, 0x1a, 0x50, 0x07, 0xf1, + 0xb1, 0x05, 0xc2, 0x39, 0x31, 0xd6, 0x08, 0x88, 0x07, 0xf1, 0xd1, 0xd8, + 0x21, 0x83, 0x07, 0xf1, 0xe1, 0xd4, 0x38, 0xf4, 0x07, 0xf1, 0xf1, 0xce, + 0x25, 0xad, 0x07, 0xf2, 0x41, 0x46, 0x01, 0xfc, 0xc2, 0x39, 0x3d, 0xcd, + 0x0b, 0x91, 0x07, 0xf2, 0x00, 0xc5, 0x0a, 0x8a, 0x07, 0xf0, 0x89, 0xc9, + 0x11, 0xf6, 0x07, 0xf0, 0x90, 0xc3, 0x00, 0x3a, 0x0f, 0x85, 0x01, 0xca, + 0xa6, 0x98, 0x0f, 0x86, 0x78, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x09, 0xc6, + 0x78, 0x78, 0x0f, 0x85, 0x89, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x09, 0xc5, + 0xdd, 0x49, 0x0f, 0x86, 0x88, 0x46, 0xd2, 0xe9, 0xc2, 0x39, 0x49, 0x48, + 0xbe, 0x4a, 0xc2, 0x39, 0x61, 0x46, 0xa8, 0xfa, 0xc2, 0x39, 0x79, 0x45, + 0xdc, 0xf9, 0x42, 0x39, 0x91, 0x11, 0xc2, 0x39, 0xbb, 0x47, 0xc7, 0x2e, + 0x42, 0x39, 0xc7, 0x46, 0xd1, 0xf9, 0xc2, 0x39, 0xdf, 0x48, 0xb5, 0x32, + 0x42, 0x39, 0xf7, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x41, 0xc6, 0x78, 0x78, + 0x0f, 0x85, 0xc1, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x41, 0xc5, 0xdd, 0x49, + 0x0f, 0x86, 0xc0, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x49, 0xc6, 0x78, 0x78, + 0x0f, 0x85, 0xc9, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x49, 0xc5, 0xdd, 0x49, + 0x0f, 0x86, 0xc8, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x59, 0xc6, 0x78, 0x78, + 0x0f, 0x85, 0xd9, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x59, 0xc5, 0xdd, 0x49, + 0x0f, 0x86, 0xd8, 0x49, 0xae, 0x46, 0xc2, 0x3a, 0x0f, 0x47, 0x35, 0xce, + 0x42, 0x3a, 0x27, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x69, 0xc6, 0x78, 0x78, + 0x0f, 0x85, 0xe9, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x69, 0xc5, 0xdd, 0x49, + 0x0f, 0x86, 0xe8, 0xc2, 0x02, 0xa0, 0x01, 0x5e, 0x99, 0xc4, 0x02, 0xde, + 0x01, 0x5e, 0xa0, 0xc3, 0x09, 0x9e, 0x01, 0x5e, 0xa9, 0xc3, 0x0d, 0x14, + 0x01, 0x5e, 0xb0, 0x43, 0x03, 0x35, 0xc2, 0x3a, 0x3f, 0x45, 0x00, 0x8c, + 0xc2, 0x3a, 0x51, 0xd1, 0x0e, 0xb7, 0x01, 0x53, 0x90, 0xcb, 0x90, 0xe9, + 0x0f, 0xae, 0xf9, 0xc3, 0x00, 0x33, 0x0f, 0xa6, 0x18, 0x45, 0x02, 0x6d, + 0xc2, 0x3a, 0x6d, 0xcc, 0x43, 0x07, 0x01, 0x10, 0x10, 0x9c, 0x01, 0x25, + 0xa9, 0x9b, 0x01, 0x25, 0xa1, 0x9a, 0x01, 0x25, 0x99, 0x99, 0x01, 0x25, + 0x91, 0x98, 0x01, 0x25, 0x89, 0x97, 0x01, 0x25, 0x81, 0x96, 0x01, 0x25, + 0x79, 0x95, 0x01, 0x25, 0x71, 0x94, 0x01, 0x25, 0x69, 0x93, 0x01, 0x25, + 0x61, 0x92, 0x01, 0x25, 0x59, 0x91, 0x01, 0x25, 0x51, 0x90, 0x01, 0x25, + 0x49, 0x8f, 0x01, 0x25, 0x41, 0x8e, 0x01, 0x25, 0x39, 0x8d, 0x01, 0x25, + 0x31, 0x8c, 0x01, 0x25, 0x29, 0x8b, 0x01, 0x25, 0x21, 0x8a, 0x01, 0x25, + 0x19, 0x89, 0x01, 0x25, 0x11, 0x88, 0x01, 0x25, 0x09, 0x87, 0x01, 0x25, + 0x01, 0x86, 0x01, 0x24, 0xf9, 0x85, 0x01, 0x24, 0xf1, 0x84, 0x01, 0x24, + 0xe9, 0x83, 0x01, 0x24, 0xe0, 0x99, 0x0f, 0x89, 0x31, 0x9a, 0x0f, 0x89, + 0x39, 0x9b, 0x0f, 0x89, 0x41, 0x9c, 0x0f, 0x89, 0x49, 0x83, 0x0f, 0x88, + 0x81, 0x84, 0x0f, 0x88, 0x89, 0x85, 0x0f, 0x88, 0x91, 0x86, 0x0f, 0x88, + 0x99, 0x87, 0x0f, 0x88, 0xa1, 0x88, 0x0f, 0x88, 0xa9, 0x89, 0x0f, 0x88, + 0xb1, 0x8a, 0x0f, 0x88, 0xb9, 0x8b, 0x0f, 0x88, 0xc1, 0x8c, 0x0f, 0x88, + 0xc9, 0x8d, 0x0f, 0x88, 0xd1, 0x8e, 0x0f, 0x88, 0xd9, 0x8f, 0x0f, 0x88, + 0xe1, 0x90, 0x0f, 0x88, 0xe9, 0x91, 0x0f, 0x88, 0xf1, 0x92, 0x0f, 0x88, + 0xf9, 0x93, 0x0f, 0x89, 0x01, 0x94, 0x0f, 0x89, 0x09, 0x95, 0x0f, 0x89, + 0x11, 0x96, 0x0f, 0x89, 0x19, 0x97, 0x0f, 0x89, 0x21, 0x98, 0x0f, 0x89, + 0x28, 0x42, 0x00, 0x28, 0xc2, 0x3a, 0x85, 0xc7, 0x52, 0xcc, 0x01, 0x24, + 0x01, 0xc2, 0x00, 0xc4, 0x01, 0x23, 0xe8, 0xc7, 0x1f, 0x6e, 0x01, 0x24, + 0x29, 0xc5, 0x66, 0xb1, 0x01, 0x23, 0xf0, 0xc8, 0x48, 0x23, 0x01, 0x24, + 0x21, 0xc6, 0x44, 0x9c, 0x01, 0x24, 0x18, 0xc6, 0x14, 0x07, 0x01, 0x24, + 0x11, 0xc7, 0x34, 0x37, 0x01, 0x24, 0x08, 0xc4, 0x18, 0x10, 0x01, 0x23, + 0xd1, 0xc2, 0x22, 0xcc, 0x01, 0x23, 0xc8, 0xc3, 0x0d, 0x14, 0x01, 0x23, + 0xc1, 0xc3, 0x09, 0x9e, 0x01, 0x23, 0xb8, 0xc4, 0x02, 0xde, 0x01, 0x23, + 0xb1, 0xc2, 0x02, 0xa0, 0x01, 0x23, 0xa8, 0xc5, 0x8e, 0xdf, 0x01, 0x90, + 0x03, 0x02, 0x3a, 0x91, 0xc6, 0xbb, 0xec, 0x01, 0x90, 0x52, 0x02, 0x3a, + 0x97, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0x78, 0xc5, 0xc0, 0x7d, 0x01, 0x90, + 0x13, 0x02, 0x3a, 0x9d, 0xc6, 0xc1, 0x86, 0x01, 0x90, 0x5a, 0x02, 0x3a, + 0xa3, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0x88, 0xc2, 0x00, 0xd3, 0x01, 0x90, + 0x90, 0xc4, 0x79, 0xf3, 0x01, 0x90, 0x2b, 0x02, 0x3a, 0xa9, 0xc6, 0xba, + 0x7c, 0x01, 0x90, 0x62, 0x02, 0x3a, 0xaf, 0xc2, 0x00, 0xd3, 0x01, 0x90, + 0xa0, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0xa8, 0xc4, 0xc6, 0x7a, 0x01, 0x90, + 0x43, 0x02, 0x3a, 0xb5, 0xc6, 0xc6, 0x79, 0x01, 0x90, 0x4a, 0x02, 0x3a, + 0xb9, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0xd8, 0xc2, 0x02, 0xa0, 0x01, 0x91, + 0x09, 0xc4, 0x02, 0xde, 0x01, 0x91, 0x11, 0xc2, 0x00, 0xc4, 0x01, 0x91, + 0x48, 0xc3, 0x09, 0x9e, 0x01, 0x91, 0x19, 0x0b, 0xc2, 0x3a, 0xbf, 0xc7, + 0xc8, 0x9a, 0x01, 0x92, 0x00, 0xc2, 0x22, 0xcc, 0x01, 0x91, 0x29, 0x07, + 0xc2, 0x3a, 0xd1, 0x17, 0xc2, 0x3a, 0xdd, 0x16, 0xc2, 0x3a, 0xe7, 0xc6, + 0xcc, 0xbf, 0x01, 0x91, 0x99, 0xc6, 0xca, 0xe5, 0x01, 0x91, 0xa8, 0xc4, + 0x00, 0x2d, 0x01, 0x91, 0x39, 0xc4, 0x61, 0xc1, 0x01, 0x91, 0x79, 0xc9, + 0xaf, 0x8a, 0x01, 0x91, 0xe8, 0xc3, 0x02, 0x6e, 0x01, 0x91, 0x41, 0xc3, + 0x00, 0xc2, 0x01, 0x91, 0xa0, 0xc3, 0x01, 0x54, 0x01, 0x91, 0x51, 0xc4, + 0x04, 0x87, 0x01, 0x91, 0x70, 0xc4, 0x03, 0xd7, 0x01, 0x91, 0x61, 0xc3, + 0x29, 0x82, 0x01, 0x91, 0x68, 0xcd, 0x7b, 0x3c, 0x01, 0x91, 0xb9, 0xc3, + 0x03, 0x15, 0x01, 0x91, 0xd0, 0xc7, 0x75, 0x78, 0x01, 0x91, 0xc9, 0x15, + 0xc2, 0x3a, 0xf3, 0xc3, 0x29, 0x43, 0x01, 0x92, 0x18, 0xd1, 0x01, 0x68, + 0x01, 0x57, 0x91, 0xce, 0x33, 0x92, 0x01, 0x57, 0x98, 0xc5, 0x26, 0xf7, + 0x08, 0xd7, 0xb9, 0xc4, 0x0d, 0xe5, 0x08, 0xd7, 0x9a, 0x02, 0x3a, 0xfd, + 0x45, 0x21, 0xed, 0xc2, 0x3b, 0x03, 0x83, 0x08, 0xd6, 0x98, 0x83, 0x08, + 0xd6, 0xd8, 0x83, 0x08, 0xd6, 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0xd6, 0xa1, + 0x83, 0x08, 0xd6, 0x68, 0xc2, 0x00, 0xd0, 0x08, 0xd6, 0x89, 0x83, 0x08, + 0xd6, 0x00, 0x83, 0x08, 0xd6, 0x81, 0xc2, 0x01, 0x30, 0x08, 0xd6, 0x38, + 0xc2, 0x00, 0xd0, 0x08, 0xd6, 0x79, 0x83, 0x08, 0xd6, 0x70, 0xc2, 0x00, + 0xd0, 0x08, 0xd6, 0x51, 0x83, 0x08, 0xd6, 0x08, 0xc5, 0x26, 0xf7, 0x08, + 0xd7, 0x71, 0xc4, 0x0d, 0xe5, 0x08, 0xd7, 0x5a, 0x02, 0x3b, 0x26, 0xc6, + 0x26, 0xf6, 0x08, 0xd7, 0x40, 0x16, 0xc2, 0x3b, 0x2c, 0x08, 0xc2, 0x3b, + 0x3c, 0xc3, 0x05, 0x14, 0x08, 0x43, 0xc8, 0xd3, 0x42, 0xb4, 0x08, 0x43, + 0xb9, 0x45, 0x02, 0x10, 0x42, 0x3b, 0x48, 0xc2, 0xbe, 0xd3, 0x0b, 0x5c, + 0x79, 0xc2, 0x19, 0x2d, 0x0b, 0x5c, 0x50, 0xc2, 0x24, 0x82, 0x0b, 0x5c, + 0x71, 0xc3, 0xa4, 0xa3, 0x0b, 0x5c, 0x41, 0xc2, 0x01, 0x24, 0x0b, 0x5c, + 0x10, 0x15, 0xc2, 0x3b, 0xb1, 0xc3, 0xe5, 0xcc, 0x0b, 0x5c, 0x28, 0xc2, + 0x19, 0x2d, 0x0b, 0x5c, 0x61, 0xc3, 0xe0, 0x95, 0x0b, 0x5b, 0xf0, 0x8f, + 0x0b, 0x5c, 0x49, 0xc2, 0xbe, 0xd3, 0x0b, 0x5c, 0x18, 0xc3, 0xe5, 0x33, + 0x0b, 0x5c, 0x01, 0xc2, 0x00, 0xfa, 0x0b, 0x5b, 0xf8, 0xc2, 0x20, 0xec, + 0x0b, 0x59, 0x79, 0xc3, 0xa6, 0x62, 0x0b, 0x59, 0x38, 0xc2, 0x20, 0xec, + 0x0b, 0x59, 0x61, 0x16, 0xc2, 0x3b, 0xc3, 0xc4, 0xe3, 0x17, 0x0b, 0x59, + 0x41, 0xc3, 0xdb, 0xb7, 0x0b, 0x59, 0x11, 0xc3, 0x20, 0xeb, 0x0b, 0x59, + 0x00, 0xc3, 0x57, 0x0c, 0x0b, 0x59, 0x49, 0xc3, 0x20, 0xeb, 0x0b, 0x59, + 0x29, 0xc2, 0x20, 0xec, 0x0b, 0x58, 0xf8, 0xc3, 0xe6, 0x53, 0x0b, 0x5b, + 0xa3, 0x02, 0x3b, 0xcf, 0xc7, 0xbf, 0xe1, 0x0b, 0x5a, 0x28, 0xca, 0xa7, + 0x6a, 0x0b, 0x5b, 0x99, 0xc4, 0x12, 0xc1, 0x0b, 0x59, 0xc8, 0xc5, 0xd5, + 0x51, 0x0b, 0x5b, 0x71, 0xc4, 0xdf, 0x2f, 0x0b, 0x5a, 0x08, 0xc2, 0x01, + 0x24, 0x0b, 0x5b, 0x21, 0x44, 0x19, 0x61, 0x42, 0x3b, 0xd5, 0x0a, 0xc2, + 0x3b, 0xed, 0xc9, 0xa8, 0x82, 0x0b, 0x59, 0xc0, 0x00, 0xc2, 0x3b, 0xf9, + 0x95, 0x0b, 0x5a, 0xd8, 0x98, 0x0b, 0x58, 0xd9, 0x84, 0x0b, 0x58, 0xd0, + 0x98, 0x0b, 0x58, 0x79, 0x84, 0x0b, 0x58, 0x70, 0x98, 0x0b, 0x58, 0x59, + 0x84, 0x0b, 0x58, 0x50, 0x98, 0x0b, 0x58, 0x29, 0x84, 0x0b, 0x58, 0x20, + 0x98, 0x0b, 0x58, 0xa9, 0x84, 0x0b, 0x58, 0xa0, 0x98, 0x0b, 0x58, 0x69, + 0x84, 0x0b, 0x58, 0x60, 0x98, 0x0b, 0x58, 0x89, 0x84, 0x0b, 0x58, 0x80, + 0x98, 0x0b, 0x58, 0x09, 0x84, 0x0b, 0x58, 0x00, 0xc5, 0x11, 0x55, 0x01, + 0x81, 0x00, 0x45, 0x00, 0x8c, 0xc2, 0x3c, 0x05, 0xc8, 0x7d, 0x5e, 0x0f, + 0xb2, 0x69, 0x14, 0xc2, 0x3c, 0x21, 0xcd, 0x80, 0x5d, 0x0f, 0xb2, 0x39, + 0xcf, 0x63, 0x78, 0x0f, 0xc9, 0xc1, 0x43, 0x03, 0x35, 0xc2, 0x3c, 0x27, + 0xc8, 0xb5, 0xb2, 0x0f, 0xce, 0xb8, 0xc4, 0x02, 0xde, 0x08, 0x48, 0xd9, + 0x19, 0xc2, 0x3c, 0x33, 0xc2, 0x00, 0xc4, 0x08, 0x48, 0xb8, 0xc8, 0x0d, + 0x03, 0x08, 0x48, 0xc8, 0xc2, 0x20, 0xec, 0x08, 0x48, 0xa9, 0xc2, 0x00, + 0x3d, 0x08, 0x48, 0x40, 0xc3, 0x11, 0xef, 0x08, 0x48, 0xa1, 0xc3, 0x01, + 0x9d, 0x08, 0x48, 0x89, 0xc3, 0x7e, 0x1b, 0x08, 0x48, 0x70, 0xc2, 0x00, + 0x74, 0x08, 0x48, 0x79, 0xc2, 0x01, 0xd0, 0x08, 0x48, 0x00, 0x96, 0x08, + 0x48, 0x38, 0x83, 0x05, 0x42, 0x01, 0xc2, 0x00, 0xd0, 0x05, 0x42, 0x08, + 0x83, 0x05, 0x42, 0x11, 0xc2, 0x01, 0x30, 0x05, 0x43, 0x28, 0xc2, 0x01, + 0x30, 0x05, 0x42, 0x19, 0xc2, 0x19, 0x2c, 0x05, 0x42, 0x39, 0x83, 0x05, + 0x42, 0x59, 0xc2, 0x00, 0xc1, 0x05, 0x43, 0x60, 0x83, 0x05, 0x42, 0x23, + 0x02, 0x3c, 0x3d, 0xc2, 0x00, 0xd0, 0x05, 0x42, 0x28, 0x83, 0x05, 0x42, + 0x41, 0xc2, 0x00, 0xd0, 0x05, 0x42, 0x49, 0x15, 0xc2, 0x3c, 0x43, 0x16, + 0x42, 0x3c, 0x4d, 0x83, 0x05, 0x42, 0x51, 0xc2, 0x02, 0x1c, 0x05, 0x42, + 0x91, 0xc2, 0x0e, 0x9a, 0x05, 0x43, 0x58, 0x83, 0x05, 0x42, 0x61, 0xc2, + 0x00, 0xd0, 0x05, 0x42, 0x68, 0xc2, 0x00, 0xd0, 0x05, 0x42, 0xa1, 0x83, + 0x05, 0x42, 0xa8, 0xc6, 0x24, 0x9c, 0x05, 0x42, 0xb1, 0xc2, 0x00, 0xd0, + 0x05, 0x42, 0xd1, 0x83, 0x05, 0x42, 0xd8, 0xcb, 0x91, 0xf1, 0x05, 0x43, + 0x69, 0xcb, 0x8f, 0xaa, 0x05, 0x43, 0x80, 0x87, 0x05, 0x43, 0x30, 0xc8, + 0xbc, 0x12, 0x05, 0x43, 0x71, 0xc4, 0x0c, 0x2b, 0x05, 0x43, 0x78, 0x4f, + 0x5c, 0xf3, 0xc2, 0x3c, 0x57, 0xd2, 0x47, 0xc9, 0x05, 0x43, 0x90, 0xc9, + 0xb4, 0xf4, 0x08, 0x0e, 0x89, 0xc8, 0xbf, 0x22, 0x08, 0x0f, 0x90, 0xc5, + 0x61, 0xba, 0x08, 0x0e, 0x99, 0xcd, 0x76, 0x1b, 0x08, 0x0f, 0x11, 0x96, + 0x08, 0x0f, 0x60, 0xc2, 0x00, 0x50, 0x08, 0x0f, 0x23, 0x02, 0x3c, 0x69, + 0xc4, 0xe4, 0x9f, 0x08, 0x0f, 0x30, 0x99, 0x08, 0x0e, 0xd1, 0xc7, 0xc9, + 0xce, 0x08, 0x0f, 0x08, 0xc4, 0xd3, 0x73, 0x08, 0x0f, 0x38, 0xc3, 0x19, + 0x78, 0x08, 0x0e, 0xd9, 0x92, 0x08, 0x0f, 0x40, 0xc8, 0x74, 0xc4, 0x00, + 0x4a, 0x91, 0xc6, 0x74, 0xc6, 0x00, 0x4a, 0x88, 0x42, 0x07, 0xb2, 0xc2, + 0x3c, 0x6f, 0x03, 0xc2, 0x3c, 0x7b, 0xc5, 0x33, 0x5d, 0x00, 0x49, 0xe1, + 0xcb, 0x1e, 0x89, 0x00, 0x48, 0x0b, 0x02, 0x3c, 0x87, 0xd4, 0x39, 0xa8, + 0x00, 0x48, 0x01, 0x15, 0xc2, 0x3c, 0x8b, 0xc8, 0xbe, 0xca, 0x05, 0x47, + 0xc1, 0xd9, 0x1e, 0x82, 0x05, 0x47, 0xa1, 0xd0, 0x5a, 0x12, 0x00, 0x4b, + 0x88, 0x99, 0x00, 0x4a, 0x79, 0x97, 0x00, 0x4a, 0x61, 0x8b, 0x00, 0x4a, + 0x41, 0x83, 0x00, 0x49, 0xf1, 0x9b, 0x05, 0x47, 0xf8, 0xc2, 0x49, 0x0c, + 0x00, 0x49, 0xd9, 0x87, 0x00, 0x49, 0xd0, 0x91, 0x00, 0x4a, 0x51, 0x87, + 0x00, 0x4a, 0x30, 0x91, 0x00, 0x4a, 0x49, 0x87, 0x00, 0x4a, 0x29, 0xc6, + 0xcf, 0x2f, 0x00, 0x4a, 0xa8, 0x94, 0x00, 0x4a, 0x1b, 0x02, 0x3c, 0x97, + 0x8e, 0x00, 0x4b, 0x12, 0x02, 0x3c, 0x9b, 0x97, 0x00, 0x4a, 0x13, 0x02, + 0x3c, 0x9f, 0x87, 0x00, 0x4a, 0xb0, 0x8b, 0x00, 0x4a, 0x00, 0x83, 0x00, + 0x49, 0xc9, 0xc7, 0xc4, 0xb1, 0x00, 0x4b, 0xd0, 0x83, 0x00, 0x49, 0xc1, + 0xc2, 0x0d, 0xf6, 0x00, 0x49, 0xb9, 0x0a, 0x42, 0x3c, 0xa3, 0x83, 0x00, + 0x49, 0xa9, 0x47, 0xb2, 0x2e, 0x42, 0x3c, 0xad, 0x0e, 0xc2, 0x3c, 0xbb, + 0x83, 0x00, 0x49, 0x90, 0xc2, 0x00, 0x39, 0x00, 0x49, 0x89, 0x83, 0x00, + 0x49, 0x81, 0xc2, 0x00, 0xd0, 0x00, 0x4a, 0xe8, 0x83, 0x00, 0x49, 0x79, + 0xc2, 0x19, 0x2c, 0x00, 0x4a, 0xf8, 0xc9, 0xad, 0x53, 0x00, 0x4b, 0xc0, + 0xc2, 0x00, 0xd0, 0x00, 0x49, 0x69, 0x83, 0x00, 0x49, 0x61, 0xc2, 0x01, + 0x5d, 0x00, 0x4b, 0xf8, 0xc2, 0x00, 0xd0, 0x00, 0x49, 0x59, 0x83, 0x00, + 0x49, 0x50, 0x10, 0xc2, 0x3c, 0xc5, 0x83, 0x00, 0x49, 0x41, 0xc2, 0x19, + 0x2c, 0x00, 0x48, 0xf1, 0xc2, 0x01, 0x30, 0x00, 0x48, 0xc8, 0xc2, 0x00, + 0xd0, 0x00, 0x49, 0x39, 0x83, 0x00, 0x49, 0x31, 0x06, 0x42, 0x3c, 0xcf, + 0xc2, 0x00, 0xd0, 0x00, 0x49, 0x29, 0x83, 0x00, 0x49, 0x21, 0x16, 0x42, + 0x3c, 0xdd, 0xc2, 0x00, 0xd0, 0x00, 0x48, 0xe9, 0x83, 0x00, 0x48, 0xe1, + 0xc2, 0x25, 0x3b, 0x00, 0x4b, 0xe0, 0xc2, 0x00, 0xd0, 0x00, 0x48, 0xd9, + 0x83, 0x00, 0x48, 0xd2, 0x02, 0x3c, 0xe7, 0x0a, 0xc2, 0x3c, 0xed, 0x83, + 0x00, 0x48, 0xb9, 0xc2, 0x01, 0x30, 0x00, 0x4b, 0xd9, 0xcb, 0x23, 0x34, + 0x00, 0x4b, 0xe8, 0x0a, 0xc2, 0x3c, 0xf7, 0x83, 0x00, 0x48, 0xa8, 0x97, + 0x00, 0x48, 0xa1, 0x8b, 0x00, 0x48, 0x81, 0x83, 0x00, 0x48, 0x31, 0x9b, + 0x05, 0x47, 0xf1, 0x99, 0x00, 0x4b, 0xa8, 0x87, 0x00, 0x4b, 0x99, 0xc2, + 0x49, 0x0c, 0x00, 0x4b, 0xa0, 0x97, 0x00, 0x48, 0x53, 0x02, 0x3d, 0x01, + 0x87, 0x00, 0x4b, 0xb0, 0x8b, 0x00, 0x48, 0x40, 0x83, 0x00, 0x4a, 0xd9, + 0xc2, 0x00, 0xd0, 0x00, 0x4b, 0xc8, 0xc4, 0x26, 0x78, 0x00, 0x4b, 0x79, + 0xc5, 0x06, 0xdb, 0x00, 0x4b, 0x71, 0x15, 0xc2, 0x3d, 0x05, 0x08, 0xc2, + 0x3d, 0x11, 0x16, 0xc2, 0x3d, 0x1d, 0xc3, 0x05, 0x14, 0x00, 0x4b, 0x39, + 0xc4, 0x15, 0xe7, 0x00, 0x4b, 0x30, 0x45, 0x2c, 0x86, 0xc2, 0x3d, 0x29, + 0x46, 0x2e, 0xee, 0xc2, 0x3d, 0x3f, 0xc2, 0x0c, 0x42, 0x08, 0x20, 0x61, + 0x11, 0xc2, 0x3d, 0x55, 0xc2, 0x14, 0x68, 0x08, 0x20, 0x71, 0xc3, 0x17, + 0x29, 0x08, 0x20, 0x79, 0x8a, 0x08, 0x20, 0x81, 0xc3, 0x6f, 0xb7, 0x08, + 0x20, 0x89, 0xc3, 0xb2, 0x36, 0x08, 0x20, 0x91, 0x16, 0xc2, 0x3d, 0x5d, + 0xc3, 0x80, 0x64, 0x08, 0x20, 0xa1, 0xc4, 0x46, 0xfd, 0x08, 0x20, 0xa9, + 0xc3, 0x30, 0xc1, 0x08, 0x20, 0xb1, 0xc3, 0x72, 0xc8, 0x08, 0x20, 0xb9, + 0xc3, 0x93, 0x51, 0x08, 0x20, 0xc1, 0x07, 0xc2, 0x3d, 0x69, 0xc3, 0x0a, + 0x85, 0x08, 0x20, 0xd1, 0x1c, 0x42, 0x3d, 0x91, 0x45, 0x2c, 0x86, 0xc2, + 0x3d, 0x9d, 0x46, 0x2e, 0xee, 0xc2, 0x3d, 0xb3, 0xc2, 0x0c, 0x42, 0x08, + 0x21, 0xa1, 0x11, 0xc2, 0x3d, 0xc9, 0xc2, 0x14, 0x68, 0x08, 0x21, 0xb1, + 0xc3, 0x17, 0x29, 0x08, 0x21, 0xb9, 0x8a, 0x08, 0x21, 0xc1, 0xc3, 0x6f, + 0xb7, 0x08, 0x21, 0xc9, 0xc3, 0xb2, 0x36, 0x08, 0x21, 0xd1, 0x16, 0xc2, + 0x3d, 0xd1, 0xc3, 0x80, 0x64, 0x08, 0x21, 0xe1, 0xc4, 0x46, 0xfd, 0x08, + 0x21, 0xe9, 0xc3, 0x30, 0xc1, 0x08, 0x21, 0xf1, 0xc3, 0x72, 0xc8, 0x08, + 0x21, 0xf9, 0xc3, 0x93, 0x51, 0x08, 0x22, 0x01, 0x07, 0xc2, 0x3d, 0xdd, + 0xc3, 0x0a, 0x85, 0x08, 0x22, 0x11, 0x1c, 0x42, 0x3e, 0x05, 0xc4, 0x00, + 0x49, 0x01, 0x1e, 0x61, 0xc5, 0x00, 0x2c, 0x01, 0x1d, 0xf8, 0xc4, 0x00, + 0x49, 0x01, 0x1e, 0x59, 0xc5, 0x00, 0x2c, 0x01, 0x1d, 0xf0, 0xc4, 0x8f, + 0x73, 0x0e, 0x98, 0x21, 0xc5, 0x73, 0xcb, 0x0e, 0x98, 0x18, 0xc9, 0x11, + 0xf6, 0x01, 0x24, 0x81, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x50, 0xc9, 0x11, + 0xf6, 0x01, 0x24, 0x79, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x48, 0x00, 0x42, + 0x3e, 0x11, 0x00, 0x42, 0x3e, 0x1d, 0x00, 0x42, 0x3e, 0x29, 0x00, 0x42, + 0x3e, 0x35, 0x00, 0x42, 0x3e, 0x41, 0x00, 0x42, 0x3e, 0x4d, 0xc9, 0x11, + 0xf6, 0x01, 0x24, 0x41, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x10, 0xc9, 0x11, + 0xf6, 0x0f, 0x88, 0x01, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x08, 0xc4, 0x26, + 0x78, 0x08, 0xca, 0xc9, 0xc5, 0x06, 0xdb, 0x08, 0xca, 0xc1, 0x15, 0xc2, + 0x3e, 0x59, 0x08, 0xc2, 0x3e, 0x65, 0x16, 0xc2, 0x3e, 0x71, 0xc3, 0x05, + 0x14, 0x08, 0xca, 0x89, 0xc4, 0x15, 0xe7, 0x08, 0xca, 0x80, 0x91, 0x08, + 0xc9, 0xc1, 0x03, 0xc2, 0x3e, 0x7d, 0x87, 0x08, 0xc9, 0xa9, 0x97, 0x08, + 0xc9, 0x9b, 0x02, 0x3e, 0x85, 0x8b, 0x08, 0xc9, 0x8a, 0x02, 0x3e, 0x89, + 0xc2, 0x00, 0xdb, 0x08, 0xc9, 0x71, 0x83, 0x08, 0xc9, 0x40, 0x83, 0x08, + 0xc9, 0x61, 0xc2, 0x0d, 0xf6, 0x08, 0xc9, 0x59, 0xc2, 0x00, 0xd0, 0x08, + 0xc9, 0x50, 0xc2, 0x19, 0x2c, 0x08, 0xc9, 0x31, 0x83, 0x08, 0xc9, 0x28, + 0xc2, 0x00, 0xd0, 0x08, 0xc9, 0x19, 0x83, 0x08, 0xc9, 0x10, 0xc2, 0x00, + 0xd0, 0x08, 0xc9, 0x09, 0x83, 0x08, 0xc9, 0x00, 0x83, 0x08, 0xc8, 0xf9, + 0xc2, 0x00, 0xc1, 0x08, 0xc8, 0xd1, 0xc2, 0x19, 0x2c, 0x08, 0xc8, 0xa9, + 0xc2, 0x01, 0x30, 0x08, 0xc8, 0x80, 0xc2, 0x00, 0xd0, 0x08, 0xc8, 0xf1, + 0x83, 0x08, 0xc8, 0xe9, 0x06, 0x42, 0x3e, 0x8d, 0xc2, 0x00, 0xd0, 0x08, + 0xc8, 0xe1, 0x83, 0x08, 0xc8, 0xd9, 0xc2, 0x01, 0x6f, 0x08, 0xc8, 0xb0, + 0xc2, 0x00, 0xd0, 0x08, 0xc8, 0x91, 0x83, 0x08, 0xc8, 0x88, 0xc2, 0x00, + 0xd0, 0x08, 0xc8, 0x79, 0x83, 0x08, 0xc8, 0x70, 0xc2, 0x00, 0xd0, 0x08, + 0xc8, 0x69, 0x83, 0x08, 0xc8, 0x60, 0x97, 0x08, 0xc8, 0x28, 0x8b, 0x08, + 0xc8, 0x18, 0x83, 0x08, 0xc8, 0x08, 0xc4, 0x03, 0x03, 0x01, 0x10, 0xa9, + 0xc3, 0x00, 0xbb, 0x00, 0x07, 0xb8, 0xc4, 0x26, 0x78, 0x01, 0x3c, 0x91, + 0xc5, 0x06, 0xdb, 0x01, 0x3c, 0x89, 0x15, 0xc2, 0x3e, 0x97, 0x08, 0xc2, + 0x3e, 0xa3, 0x16, 0xc2, 0x3e, 0xaf, 0xc3, 0x05, 0x14, 0x01, 0x3c, 0x51, + 0xc4, 0x15, 0xe7, 0x0f, 0x88, 0x60, 0xc4, 0x18, 0x10, 0x01, 0x3b, 0xe1, + 0xc2, 0x22, 0xcc, 0x01, 0x3b, 0xd8, 0xc3, 0x0d, 0x14, 0x01, 0x3b, 0xd1, + 0xc3, 0x09, 0x9e, 0x01, 0x3b, 0xc8, 0xc4, 0x02, 0xde, 0x01, 0x3b, 0xc1, + 0xc2, 0x02, 0xa0, 0x01, 0x3b, 0xb8, 0xc4, 0x18, 0x10, 0x01, 0x3c, 0x31, + 0xc2, 0x22, 0xcc, 0x01, 0x3c, 0x28, 0xc3, 0x0d, 0x14, 0x01, 0x3c, 0x21, + 0xc3, 0x09, 0x9e, 0x01, 0x3c, 0x18, 0xc4, 0x02, 0xde, 0x01, 0x3c, 0x11, + 0xc2, 0x02, 0xa0, 0x01, 0x3c, 0x08, 0xcf, 0x66, 0x66, 0x01, 0x58, 0xb1, + 0xd0, 0x5d, 0x52, 0x01, 0x58, 0xb9, 0xce, 0x74, 0xda, 0x01, 0x58, 0xc1, + 0xd1, 0x53, 0xba, 0x01, 0x58, 0xc8, 0xc9, 0x33, 0xad, 0x0f, 0xc8, 0x50, + 0xc9, 0x33, 0xad, 0x0f, 0xc8, 0x58, 0x42, 0x00, 0x2c, 0xc2, 0x3e, 0xbb, + 0x42, 0x02, 0xa0, 0x42, 0x3e, 0xc7, 0xcf, 0x5b, 0xc3, 0x0f, 0xc2, 0x99, + 0xcc, 0x88, 0xdd, 0x0f, 0xc1, 0xd8, 0x45, 0x11, 0x3a, 0xc2, 0x3e, 0xd3, + 0x51, 0x01, 0x51, 0x42, 0x3e, 0xdf, 0xc4, 0x01, 0xa3, 0x01, 0x0c, 0x9b, + 0x02, 0x3e, 0xeb, 0xc5, 0xdb, 0x50, 0x01, 0x70, 0xa0, 0xda, 0x1b, 0xd0, + 0x0f, 0xc4, 0xb8, 0xcb, 0x82, 0xba, 0x01, 0x0f, 0x19, 0xcb, 0x82, 0x36, + 0x01, 0x0e, 0x98, 0xc5, 0x01, 0xa2, 0x01, 0x58, 0x39, 0xd3, 0x43, 0xe4, + 0x01, 0x5c, 0x58, 0xa3, 0x0f, 0x82, 0x99, 0x9d, 0x0f, 0x82, 0x69, 0x9e, + 0x0f, 0x82, 0x71, 0x9f, 0x0f, 0x82, 0x79, 0xa0, 0x0f, 0x82, 0x81, 0xa1, + 0x0f, 0x82, 0x89, 0xa2, 0x0f, 0x82, 0x90, 0xa3, 0x0f, 0x81, 0xf1, 0xa1, + 0x0f, 0x81, 0xe1, 0x9d, 0x0f, 0x81, 0xc1, 0x9e, 0x0f, 0x81, 0xc9, 0x9f, + 0x0f, 0x81, 0xd1, 0xa0, 0x0f, 0x81, 0xd9, 0xa2, 0x0f, 0x81, 0xe8, 0xa0, + 0x0f, 0x81, 0xa1, 0x9f, 0x0f, 0x81, 0x99, 0x9e, 0x0f, 0x81, 0x91, 0x9d, + 0x0f, 0x81, 0x89, 0xa1, 0x0f, 0x81, 0xa9, 0xa2, 0x0f, 0x81, 0xb1, 0xa3, + 0x0f, 0x81, 0xb8, 0x9d, 0x0f, 0x81, 0xf9, 0x9e, 0x0f, 0x82, 0x01, 0x9f, + 0x0f, 0x82, 0x09, 0xa0, 0x0f, 0x82, 0x11, 0xa1, 0x0f, 0x82, 0x19, 0xa2, + 0x0f, 0x82, 0x21, 0xa3, 0x0f, 0x82, 0x28, 0x9d, 0x0f, 0x82, 0x31, 0x9e, + 0x0f, 0x82, 0x39, 0x9f, 0x0f, 0x82, 0x41, 0xa0, 0x0f, 0x82, 0x49, 0xa1, + 0x0f, 0x82, 0x51, 0xa2, 0x0f, 0x82, 0x59, 0xa3, 0x0f, 0x82, 0x60, 0x9d, + 0x0f, 0x82, 0xa1, 0x9e, 0x0f, 0x82, 0xa9, 0x9f, 0x0f, 0x82, 0xb1, 0xa0, + 0x0f, 0x82, 0xb9, 0xa1, 0x0f, 0x82, 0xc1, 0xa2, 0x0f, 0x82, 0xc9, 0xa3, + 0x0f, 0x82, 0xd0, 0x9d, 0x0f, 0x82, 0xd9, 0x9e, 0x0f, 0x82, 0xe1, 0x9f, + 0x0f, 0x82, 0xe9, 0xa0, 0x0f, 0x82, 0xf1, 0xa1, 0x0f, 0x82, 0xf9, 0xa2, + 0x0f, 0x83, 0x01, 0xa3, 0x0f, 0x83, 0x08, 0x9d, 0x0f, 0x83, 0x19, 0x9e, + 0x0f, 0x83, 0x21, 0x9f, 0x0f, 0x83, 0x29, 0xa0, 0x0f, 0x83, 0x31, 0xa1, + 0x0f, 0x83, 0x39, 0xa2, 0x0f, 0x83, 0x41, 0xa3, 0x0f, 0x83, 0x48, 0x9d, + 0x0f, 0x83, 0x51, 0x9e, 0x0f, 0x83, 0x59, 0x9f, 0x0f, 0x83, 0x61, 0xa0, + 0x0f, 0x83, 0x69, 0xa1, 0x0f, 0x83, 0x71, 0xa2, 0x0f, 0x83, 0x79, 0xa3, + 0x0f, 0x83, 0x80, 0x9d, 0x0f, 0x83, 0x89, 0x9e, 0x0f, 0x83, 0x91, 0x9f, + 0x0f, 0x83, 0x99, 0xa0, 0x0f, 0x83, 0xa1, 0xa1, 0x0f, 0x83, 0xa9, 0xa2, + 0x0f, 0x83, 0xb1, 0xa3, 0x0f, 0x83, 0xb8, 0x9d, 0x0f, 0x83, 0xc1, 0x9e, + 0x0f, 0x83, 0xc9, 0x9f, 0x0f, 0x83, 0xd1, 0xa0, 0x0f, 0x83, 0xd9, 0xa1, + 0x0f, 0x83, 0xe1, 0xa2, 0x0f, 0x83, 0xe9, 0xa3, 0x0f, 0x83, 0xf0, 0x9d, + 0x0f, 0x83, 0xf9, 0x9e, 0x0f, 0x84, 0x01, 0x9f, 0x0f, 0x84, 0x09, 0xa0, + 0x0f, 0x84, 0x11, 0xa1, 0x0f, 0x84, 0x19, 0xa2, 0x0f, 0x84, 0x21, 0xa3, + 0x0f, 0x84, 0x28, 0x9e, 0x0f, 0x84, 0x39, 0x9f, 0x0f, 0x84, 0x41, 0xa0, + 0x0f, 0x84, 0x49, 0xa1, 0x0f, 0x84, 0x51, 0xa2, 0x0f, 0x84, 0x59, 0xa3, + 0x0f, 0x84, 0x61, 0x9d, 0x0f, 0x84, 0x30, 0x9d, 0x0f, 0x84, 0x69, 0x9e, + 0x0f, 0x84, 0x71, 0x9f, 0x0f, 0x84, 0x79, 0xa0, 0x0f, 0x84, 0x81, 0xa1, + 0x0f, 0x84, 0x89, 0xa2, 0x0f, 0x84, 0x91, 0xa3, 0x0f, 0x84, 0x98, 0xc9, + 0xb0, 0x86, 0x01, 0x3d, 0xf9, 0x47, 0x20, 0x7d, 0xc2, 0x3e, 0xef, 0xca, + 0xa6, 0x8e, 0x01, 0x53, 0xa0, 0xc3, 0x01, 0x5d, 0x01, 0x1f, 0xc3, 0x02, + 0x3e, 0xfb, 0xc4, 0x02, 0x6d, 0x01, 0x00, 0xb0, 0xc4, 0x13, 0x85, 0x01, + 0x16, 0x99, 0xc6, 0xc4, 0x5e, 0x01, 0x57, 0x58, 0xc8, 0x06, 0xbf, 0x01, + 0x16, 0x91, 0xc4, 0x1e, 0x43, 0x01, 0x11, 0x60, 0x17, 0xc2, 0x3e, 0xff, + 0x46, 0x1f, 0x87, 0xc2, 0x3f, 0x17, 0x16, 0xc2, 0x3f, 0x23, 0xcf, 0x62, + 0xa6, 0x01, 0x57, 0xe8, 0x14, 0xc2, 0x3f, 0x2f, 0xc3, 0x25, 0xd6, 0x01, + 0x4f, 0xd0, 0xc5, 0xce, 0x22, 0x01, 0x01, 0x09, 0xc8, 0x32, 0xb8, 0x01, + 0x57, 0x50, 0xdd, 0x0f, 0xb9, 0x01, 0x00, 0xf9, 0xc5, 0x59, 0x93, 0x01, + 0x72, 0x00, 0x11, 0xc2, 0x3f, 0x3e, 0xdc, 0x13, 0x19, 0x01, 0x4c, 0xa8, + 0xc9, 0x00, 0xca, 0x01, 0x55, 0x0b, 0x02, 0x3f, 0x48, 0xcc, 0x07, 0xc7, + 0x01, 0x55, 0x10, 0x47, 0xc7, 0x4a, 0xc2, 0x3f, 0x4e, 0xcf, 0x60, 0x4e, + 0x01, 0x0a, 0x01, 0x48, 0x0b, 0x17, 0xc2, 0x3f, 0x5a, 0x46, 0x03, 0x13, + 0x42, 0x3f, 0x7f, 0x4c, 0x24, 0xe3, 0xc2, 0x3f, 0x8b, 0x48, 0x00, 0xda, + 0x42, 0x3f, 0x97, 0xc4, 0x1e, 0x97, 0x08, 0xc1, 0xc9, 0xc5, 0x40, 0xe7, + 0x08, 0xc1, 0xc0, 0x97, 0x08, 0xc1, 0xb1, 0x8b, 0x08, 0xc1, 0xa1, 0x83, + 0x08, 0xc1, 0x60, 0x94, 0x08, 0xc1, 0x90, 0x97, 0x08, 0xc1, 0x80, 0x8b, + 0x08, 0xc1, 0x70, 0xc2, 0x00, 0x39, 0x08, 0xc1, 0x59, 0x83, 0x08, 0xc1, + 0x20, 0x83, 0x08, 0xc1, 0x49, 0xc2, 0x0d, 0xf6, 0x08, 0xc1, 0x41, 0xc2, + 0x00, 0xd0, 0x08, 0xc1, 0x38, 0xc2, 0x00, 0xd0, 0x08, 0xc1, 0x09, 0x83, + 0x08, 0xc1, 0x00, 0xc2, 0x00, 0xd0, 0x08, 0xc0, 0xf9, 0x83, 0x08, 0xc0, + 0xf0, 0x83, 0x08, 0xc0, 0xe9, 0xc2, 0x00, 0xc1, 0x08, 0xc0, 0xc1, 0xc2, + 0x19, 0x2c, 0x08, 0xc0, 0x99, 0xc2, 0x01, 0x30, 0x08, 0xc0, 0x70, 0xc2, + 0x00, 0xd0, 0x08, 0xc0, 0xe1, 0x83, 0x08, 0xc0, 0xd9, 0x06, 0x42, 0x3f, + 0xa9, 0xc2, 0x00, 0xd0, 0x08, 0xc0, 0xd1, 0x83, 0x08, 0xc0, 0xc9, 0x16, + 0x42, 0x3f, 0xb3, 0xc2, 0x00, 0xd0, 0x08, 0xc0, 0x91, 0x83, 0x08, 0xc0, + 0x88, 0xc2, 0x00, 0xd0, 0x08, 0xc0, 0x81, 0x83, 0x08, 0xc0, 0x78, 0xc2, + 0x00, 0xd0, 0x08, 0xc0, 0x69, 0x83, 0x08, 0xc0, 0x60, 0xc2, 0x00, 0xd0, + 0x08, 0xc0, 0x59, 0x83, 0x08, 0xc0, 0x50, 0x97, 0x08, 0xc0, 0x49, 0x8b, + 0x08, 0xc0, 0x39, 0x83, 0x08, 0xc0, 0x08, 0x97, 0x08, 0xc0, 0x28, 0x8b, + 0x08, 0xc0, 0x18, 0x03, 0xc2, 0x3f, 0xbd, 0xc8, 0x00, 0x5f, 0x0d, 0xe4, + 0xc3, 0x02, 0x3f, 0xc9, 0xc4, 0x51, 0xb7, 0x0d, 0xe4, 0xb9, 0x0e, 0xc2, + 0x3f, 0xcf, 0xc6, 0x02, 0xd1, 0x0d, 0xe4, 0xa9, 0xc3, 0x02, 0xa3, 0x0d, + 0xe4, 0xa1, 0xc5, 0x1f, 0x0c, 0x0d, 0xe4, 0x91, 0xcb, 0x8f, 0x94, 0x0d, + 0xe4, 0x88, 0xc7, 0x27, 0x9b, 0x0d, 0xe3, 0xa8, 0xc3, 0x02, 0x6e, 0x0d, + 0xe4, 0x31, 0xc9, 0xac, 0xf0, 0x0d, 0xe4, 0x18, 0xc5, 0xd9, 0x39, 0x0d, + 0xe3, 0xc3, 0x02, 0x3f, 0xdb, 0xc2, 0x00, 0x71, 0x0d, 0xe3, 0xc8, 0x99, + 0x0d, 0xe3, 0x00, 0xc3, 0x02, 0xe9, 0x0d, 0xe1, 0xb9, 0x95, 0x0d, 0xe1, + 0xb0, 0x92, 0x0d, 0xe1, 0xa3, 0x02, 0x3f, 0xe1, 0x96, 0x0d, 0xe1, 0x93, + 0x02, 0x3f, 0xe7, 0x8c, 0x0d, 0xe1, 0x03, 0x02, 0x3f, 0xed, 0x95, 0x0d, + 0xe1, 0x51, 0xc8, 0x33, 0xae, 0x0d, 0xe1, 0x2b, 0x02, 0x3f, 0xf3, 0x8d, + 0x0d, 0xe1, 0xfb, 0x02, 0x3f, 0xf9, 0x8f, 0x0d, 0xe1, 0xe1, 0x90, 0x0d, + 0xe1, 0xd8, 0x8c, 0x0d, 0xe0, 0xa9, 0xc2, 0x08, 0x06, 0x0d, 0xe0, 0x91, + 0x11, 0xc2, 0x3f, 0xff, 0xc2, 0x00, 0xd1, 0x0d, 0xe3, 0x41, 0x07, 0xc2, + 0x40, 0x07, 0x97, 0x0d, 0xe2, 0xc0, 0x90, 0x0d, 0xe1, 0x83, 0x02, 0x40, + 0x13, 0x95, 0x0d, 0xe1, 0x4b, 0x02, 0x40, 0x19, 0x8f, 0x0d, 0xe0, 0xfb, + 0x02, 0x40, 0x1f, 0xc8, 0x33, 0xae, 0x0d, 0xe1, 0x1a, 0x02, 0x40, 0x25, + 0x8f, 0x0d, 0xe0, 0xf3, 0x02, 0x40, 0x2b, 0x95, 0x0d, 0xe1, 0x41, 0xc8, + 0x33, 0xae, 0x0d, 0xe1, 0x10, 0x83, 0x0d, 0xe3, 0x21, 0x8b, 0x0d, 0xe3, + 0x19, 0x91, 0x0d, 0xe3, 0x11, 0x97, 0x0d, 0xe3, 0x08, 0x90, 0x0d, 0xe0, + 0xeb, 0x02, 0x40, 0x31, 0x95, 0x0d, 0xe1, 0x39, 0xc8, 0x33, 0xae, 0x0d, + 0xe1, 0x08, 0x97, 0x0d, 0xe2, 0xb1, 0x8b, 0x0d, 0xe2, 0x68, 0x97, 0x0d, + 0xe2, 0xa9, 0x8b, 0x0d, 0xe2, 0x78, 0x8f, 0x0d, 0xe0, 0x79, 0xc3, 0x02, + 0xe9, 0x0d, 0xe1, 0xe8, 0x8f, 0x0d, 0xe3, 0x31, 0x90, 0x0d, 0xe3, 0x28, + 0xc7, 0x1b, 0x02, 0x00, 0x04, 0x69, 0xde, 0x0e, 0x50, 0x0f, 0xbe, 0x40, + 0x00, 0x42, 0x40, 0x37, 0xcf, 0x09, 0xf8, 0x01, 0x5a, 0x09, 0xd0, 0x03, + 0xb7, 0x01, 0x5a, 0x38, 0xda, 0x1c, 0xa0, 0x01, 0x30, 0xc9, 0xdf, 0x0c, + 0x27, 0x0f, 0xac, 0x89, 0xca, 0x3f, 0x35, 0x01, 0x5f, 0xf0, 0xc4, 0x1e, + 0xc9, 0x01, 0x11, 0xeb, 0x02, 0x40, 0x49, 0xcb, 0x94, 0x59, 0x01, 0x01, + 0xb9, 0x46, 0xcf, 0x95, 0x42, 0x40, 0x4f, 0xd3, 0x46, 0xb6, 0x01, 0x0a, + 0x19, 0xc8, 0x52, 0x00, 0x01, 0x02, 0x78, 0xcb, 0x92, 0xd8, 0x01, 0x02, + 0x59, 0xc4, 0x18, 0x26, 0x01, 0x01, 0xa8, 0xc5, 0x18, 0x25, 0x01, 0x01, + 0xb3, 0x02, 0x40, 0x5b, 0xcf, 0x68, 0xbe, 0x01, 0x57, 0x68, 0xce, 0x55, + 0x99, 0x01, 0x4d, 0x28, 0xca, 0xa1, 0x34, 0x01, 0x33, 0xc9, 0xca, 0x9d, + 0xce, 0x01, 0x33, 0xc1, 0xca, 0x9d, 0x42, 0x01, 0x33, 0xb9, 0xca, 0xa1, + 0x48, 0x01, 0x33, 0xb1, 0xca, 0x9d, 0x9c, 0x01, 0x33, 0xa9, 0xca, 0xa0, + 0x58, 0x01, 0x33, 0xa1, 0xca, 0x9a, 0x7c, 0x01, 0x33, 0x98, 0x83, 0x05, + 0x4a, 0x71, 0x97, 0x05, 0x4a, 0x68, 0x97, 0x05, 0x4a, 0x61, 0x8b, 0x05, + 0x4a, 0x50, 0xc2, 0x25, 0x3b, 0x05, 0x4a, 0x29, 0x83, 0x05, 0x49, 0xd8, + 0xc2, 0x01, 0x30, 0x05, 0x4a, 0x19, 0x83, 0x05, 0x49, 0x90, 0xd1, 0x3f, + 0xe4, 0x0f, 0xdc, 0x59, 0xd0, 0x05, 0xb7, 0x01, 0x16, 0x60, 0x00, 0x42, + 0x40, 0x61, 0xd3, 0x01, 0xb4, 0x01, 0x00, 0xc9, 0xd0, 0x58, 0xd2, 0x01, + 0x71, 0x38, 0xca, 0x6f, 0xb9, 0x0f, 0xaf, 0x49, 0xc4, 0x21, 0xdf, 0x0f, + 0xab, 0x42, 0x02, 0x40, 0x79, 0x42, 0x00, 0xa9, 0xc2, 0x40, 0x7f, 0x09, + 0x42, 0x40, 0x8b, 0x49, 0x05, 0xcb, 0xc2, 0x40, 0x9a, 0xd6, 0x13, 0x1f, + 0x01, 0x4c, 0xa0, 0xcc, 0x06, 0xdb, 0x01, 0x2c, 0xa9, 0xcd, 0x15, 0x02, + 0x0f, 0xdc, 0x38, 0x42, 0x00, 0x5b, 0xc2, 0x40, 0xa6, 0xcc, 0x01, 0xdb, + 0x0f, 0xdc, 0x69, 0xcb, 0x96, 0x7f, 0x0f, 0xdd, 0x99, 0xc6, 0x9e, 0xf4, + 0x0f, 0xdd, 0xd0, 0x00, 0x42, 0x40, 0xb2, 0xca, 0xa2, 0x74, 0x01, 0x1d, + 0x01, 0xc9, 0x57, 0x36, 0x01, 0x1c, 0xf9, 0xca, 0xa3, 0x5a, 0x01, 0x1c, + 0xf0, 0xc7, 0xb2, 0xec, 0x01, 0x4b, 0xe9, 0xd0, 0x4a, 0x77, 0x0f, 0xdc, + 0x48, 0x44, 0x01, 0x94, 0xc2, 0x40, 0xc4, 0xd3, 0x41, 0xf6, 0x01, 0x70, + 0x50, 0xcc, 0x86, 0xcd, 0x0f, 0xaf, 0x69, 0x44, 0x02, 0xdf, 0xc2, 0x40, + 0xd3, 0xde, 0x06, 0x69, 0x0f, 0xde, 0x18, 0xce, 0x01, 0xb9, 0x01, 0x00, + 0xe9, 0xcc, 0x8a, 0x09, 0x01, 0x4e, 0xd9, 0x03, 0xc2, 0x40, 0xdf, 0xcb, + 0x1a, 0x50, 0x01, 0x71, 0x48, 0xcb, 0x1a, 0x50, 0x01, 0x4c, 0x31, 0x05, + 0xc2, 0x40, 0xeb, 0xd2, 0x21, 0x89, 0x01, 0x80, 0xb9, 0xd6, 0x08, 0x88, + 0x01, 0x80, 0xc9, 0xce, 0x25, 0xad, 0x01, 0x80, 0xd8, 0x00, 0x42, 0x40, + 0xf7, 0x45, 0x01, 0x95, 0xc2, 0x41, 0x03, 0x44, 0x0b, 0x26, 0x42, 0x41, + 0x0f, 0xcd, 0x7e, 0x3b, 0x01, 0x0d, 0x01, 0x48, 0x01, 0x9a, 0x42, 0x41, + 0x1b, 0xcb, 0x6f, 0xff, 0x01, 0x0e, 0xe9, 0xca, 0x88, 0xdf, 0x0f, 0xc1, + 0xd0, 0xd0, 0x58, 0x62, 0x0f, 0xc2, 0x11, 0xc5, 0x01, 0xa2, 0x0f, 0xc2, + 0x30, 0x46, 0x01, 0x52, 0xc2, 0x41, 0x27, 0xc2, 0x02, 0x35, 0x0f, 0xd7, + 0x88, 0x45, 0x00, 0x8c, 0xc2, 0x41, 0x33, 0x16, 0xc2, 0x41, 0x6f, 0xd4, + 0x3b, 0x38, 0x01, 0x0e, 0x21, 0xc8, 0xae, 0xbc, 0x01, 0x0d, 0x33, 0x02, + 0x41, 0x7b, 0x03, 0x42, 0x41, 0x81, 0xc5, 0x01, 0xa2, 0x01, 0x0e, 0x93, + 0x02, 0x41, 0x8d, 0xca, 0x52, 0xc2, 0x01, 0x48, 0x68, 0xd3, 0x43, 0xe4, + 0x01, 0x5c, 0x51, 0xc5, 0x01, 0xa2, 0x01, 0x5c, 0xa8, 0xca, 0x50, 0x5e, + 0x00, 0x7e, 0xb8, 0xc7, 0x0d, 0x04, 0x01, 0x0b, 0x6b, 0x02, 0x41, 0x97, + 0xc8, 0x4b, 0x94, 0x01, 0x0b, 0x7a, 0x02, 0x41, 0x9d, 0xc3, 0x45, 0x6b, + 0x01, 0x0b, 0x63, 0x02, 0x41, 0xa3, 0xc2, 0x00, 0x5f, 0x01, 0x0b, 0x22, + 0x02, 0x41, 0xa7, 0xca, 0xa0, 0xda, 0x01, 0x0c, 0x28, 0xc9, 0x57, 0x20, + 0x01, 0x0c, 0x10, 0xc4, 0x22, 0x44, 0x01, 0x0b, 0x59, 0x91, 0x01, 0x0b, + 0x08, 0xc8, 0xbd, 0x82, 0x08, 0x0c, 0x81, 0xc8, 0x45, 0xf0, 0x08, 0x0c, + 0x98, 0x44, 0x1c, 0x74, 0xc2, 0x41, 0xab, 0xcf, 0x0c, 0x37, 0x0f, 0xac, + 0x80, 0xc8, 0x0d, 0x03, 0x08, 0x73, 0xc1, 0xc2, 0x0d, 0x10, 0x08, 0x73, + 0x78, 0xc8, 0x0d, 0x03, 0x08, 0x73, 0xb9, 0xc2, 0x0d, 0x10, 0x08, 0x73, + 0x70, 0xca, 0x37, 0x63, 0x08, 0x73, 0xb1, 0xc3, 0x45, 0x6b, 0x08, 0x73, + 0x68, 0xca, 0x9c, 0x5c, 0x08, 0x73, 0xa9, 0xc3, 0x0d, 0x0f, 0x08, 0x73, + 0x60, 0xcb, 0x13, 0xfa, 0x08, 0x73, 0xa1, 0xc4, 0x0d, 0x0e, 0x08, 0x73, + 0x58, 0xc9, 0x18, 0x05, 0x08, 0x73, 0x99, 0xc4, 0x18, 0x12, 0x08, 0x73, + 0x50, 0x4d, 0x7e, 0xbd, 0xc2, 0x41, 0xb1, 0xcd, 0x7e, 0x21, 0x00, 0xb5, + 0x00, 0x91, 0x00, 0xb7, 0x99, 0xce, 0x75, 0x12, 0x00, 0xb6, 0xf9, 0xc5, + 0xd4, 0xac, 0x00, 0xb6, 0xa9, 0x90, 0x00, 0xb5, 0x81, 0x87, 0x00, 0xb5, + 0x79, 0xc3, 0x05, 0x0d, 0x00, 0xb5, 0x48, 0x8a, 0x00, 0xb7, 0x93, 0x02, + 0x41, 0xc7, 0xc3, 0x13, 0x00, 0x00, 0xb7, 0x29, 0xd6, 0x2e, 0x28, 0x00, + 0xb6, 0x59, 0xc7, 0xc9, 0x5e, 0x00, 0xb6, 0x50, 0x43, 0x38, 0x85, 0x42, + 0x41, 0xcd, 0xcb, 0x96, 0xc1, 0x00, 0xb7, 0x41, 0xc2, 0x00, 0xbf, 0x00, + 0xb7, 0x09, 0xc2, 0x00, 0x75, 0x00, 0xb6, 0xeb, 0x02, 0x41, 0xd7, 0xc7, + 0xc5, 0x2f, 0x00, 0xb6, 0x39, 0xcc, 0x84, 0xf9, 0x00, 0xb6, 0x08, 0x4b, + 0x2e, 0x2e, 0xc2, 0x41, 0xdd, 0xd1, 0x55, 0xb8, 0x00, 0xb6, 0xd0, 0x07, + 0xc2, 0x41, 0xfb, 0xc3, 0x67, 0x02, 0x00, 0xb7, 0x19, 0xc6, 0xce, 0xf9, + 0x00, 0xb7, 0x10, 0xc2, 0x00, 0xb1, 0x00, 0xb7, 0x01, 0xc9, 0xaa, 0x0e, + 0x00, 0xb6, 0xb1, 0xc2, 0x00, 0x75, 0x00, 0xb5, 0xb1, 0xc2, 0x00, 0x8e, + 0x00, 0xb5, 0x38, 0xcb, 0x99, 0x97, 0x00, 0xb6, 0xf1, 0x46, 0xcb, 0xbd, + 0x42, 0x42, 0x05, 0xce, 0x72, 0x56, 0x00, 0xb6, 0x79, 0xd3, 0x42, 0xda, + 0x00, 0xb5, 0x30, 0xca, 0xa5, 0x08, 0x00, 0xb6, 0x49, 0xc3, 0x23, 0x1c, + 0x00, 0xb5, 0x59, 0xc3, 0x15, 0x66, 0x00, 0xb5, 0x51, 0xc6, 0xcb, 0xc9, + 0x00, 0xb5, 0x40, 0x07, 0xc2, 0x42, 0x11, 0xc2, 0x00, 0xb1, 0x00, 0xb5, + 0xc0, 0xc5, 0xd9, 0x75, 0x00, 0xb5, 0xd9, 0xc6, 0xcf, 0xa1, 0x00, 0xb5, + 0xd0, 0xcb, 0x95, 0x4b, 0x00, 0xb5, 0xc8, 0x94, 0x00, 0xb5, 0x18, 0x87, + 0x05, 0x28, 0x03, 0x02, 0x42, 0x1b, 0x90, 0x05, 0x2f, 0x10, 0x87, 0x05, + 0x2f, 0x23, 0x02, 0x42, 0x1f, 0x8b, 0x05, 0x29, 0x33, 0x02, 0x42, 0x27, + 0x83, 0x05, 0x2a, 0x63, 0x02, 0x42, 0x2b, 0x91, 0x05, 0x2d, 0xeb, 0x02, + 0x42, 0x2f, 0x97, 0x05, 0x2c, 0xba, 0x02, 0x42, 0x37, 0x87, 0x05, 0x2f, + 0x33, 0x02, 0x42, 0x3b, 0x8b, 0x05, 0x29, 0x43, 0x02, 0x42, 0x46, 0x83, + 0x05, 0x2a, 0x73, 0x02, 0x42, 0x4a, 0x91, 0x05, 0x2d, 0xfb, 0x02, 0x42, + 0x4e, 0x97, 0x05, 0x2c, 0xca, 0x02, 0x42, 0x59, 0x87, 0x05, 0x2f, 0x43, + 0x02, 0x42, 0x5d, 0x8b, 0x05, 0x29, 0x51, 0x83, 0x05, 0x2a, 0x81, 0x91, + 0x05, 0x2e, 0x0b, 0x02, 0x42, 0x61, 0x97, 0x05, 0x2c, 0xd8, 0x0a, 0xc2, + 0x42, 0x65, 0x87, 0x05, 0x2f, 0x53, 0x02, 0x42, 0x7f, 0x8b, 0x05, 0x29, + 0x61, 0x83, 0x05, 0x2a, 0x91, 0x91, 0x05, 0x2e, 0x1b, 0x02, 0x42, 0x83, + 0x97, 0x05, 0x2c, 0xe8, 0x04, 0xc2, 0x42, 0x87, 0x42, 0x1f, 0xad, 0xc2, + 0x42, 0xa1, 0x87, 0x05, 0x30, 0x43, 0x02, 0x42, 0xbb, 0x8b, 0x05, 0x2a, + 0x31, 0x83, 0x05, 0x2b, 0x71, 0x91, 0x05, 0x2e, 0xf3, 0x02, 0x42, 0xbf, + 0x97, 0x05, 0x2d, 0xb8, 0x12, 0xc2, 0x42, 0xc3, 0x87, 0x05, 0x30, 0x1b, + 0x02, 0x42, 0xe0, 0x8b, 0x05, 0x2a, 0x19, 0x83, 0x05, 0x2b, 0x53, 0x02, + 0x42, 0xe4, 0x91, 0x05, 0x2e, 0xdb, 0x02, 0x42, 0xe8, 0x97, 0x05, 0x2d, + 0xa0, 0x04, 0xc2, 0x42, 0xec, 0x87, 0x05, 0x30, 0x33, 0x02, 0x43, 0x06, + 0x8b, 0x05, 0x2a, 0x29, 0x83, 0x05, 0x2b, 0x69, 0x91, 0x05, 0x2e, 0xeb, + 0x02, 0x43, 0x0e, 0x97, 0x05, 0x2d, 0xb0, 0x87, 0x05, 0x2f, 0x8b, 0x02, + 0x43, 0x12, 0x8b, 0x05, 0x29, 0x89, 0x83, 0x05, 0x2a, 0xc1, 0x91, 0x05, + 0x2e, 0x4b, 0x02, 0x43, 0x16, 0x97, 0x05, 0x2d, 0x10, 0x87, 0x05, 0x2f, + 0x93, 0x02, 0x43, 0x1a, 0x8b, 0x05, 0x29, 0x91, 0x83, 0x05, 0x2a, 0xc9, + 0x91, 0x05, 0x2e, 0x53, 0x02, 0x43, 0x1e, 0x97, 0x05, 0x2d, 0x18, 0x87, + 0x05, 0x2f, 0x9b, 0x02, 0x43, 0x22, 0x0a, 0xc2, 0x43, 0x26, 0x8b, 0x05, + 0x29, 0x99, 0x83, 0x05, 0x2a, 0xd1, 0x91, 0x05, 0x2e, 0x5b, 0x02, 0x43, + 0x40, 0x97, 0x05, 0x2d, 0x20, 0x0a, 0xc2, 0x43, 0x44, 0x87, 0x05, 0x2f, + 0xcb, 0x02, 0x43, 0x62, 0x8b, 0x05, 0x29, 0xc9, 0x83, 0x05, 0x2b, 0x01, + 0x91, 0x05, 0x2e, 0x8b, 0x02, 0x43, 0x66, 0x97, 0x05, 0x2d, 0x50, 0x87, + 0x05, 0x2f, 0xbb, 0x02, 0x43, 0x6a, 0x8b, 0x05, 0x29, 0xb9, 0x83, 0x05, + 0x2a, 0xf1, 0x91, 0x05, 0x2e, 0x7b, 0x02, 0x43, 0x74, 0x97, 0x05, 0x2d, + 0x40, 0x87, 0x05, 0x2f, 0xc3, 0x02, 0x43, 0x78, 0x8b, 0x05, 0x29, 0xc1, + 0x83, 0x05, 0x2a, 0xf9, 0x91, 0x05, 0x2e, 0x83, 0x02, 0x43, 0x7c, 0x97, + 0x05, 0x2d, 0x48, 0x06, 0xc2, 0x43, 0x80, 0x0c, 0xc2, 0x43, 0x9a, 0x89, + 0x05, 0x30, 0x5b, 0x02, 0x43, 0xb4, 0x87, 0x05, 0x30, 0x4b, 0x02, 0x43, + 0xca, 0x1b, 0xc2, 0x43, 0xce, 0x8b, 0x05, 0x2a, 0x39, 0x83, 0x05, 0x2b, + 0x79, 0x91, 0x05, 0x2e, 0xfb, 0x02, 0x43, 0xe8, 0x97, 0x05, 0x2d, 0xc0, + 0x87, 0x05, 0x2f, 0xdb, 0x02, 0x43, 0xec, 0x0a, 0xc2, 0x43, 0xf0, 0x8b, + 0x05, 0x29, 0xd9, 0x83, 0x05, 0x2b, 0x11, 0x91, 0x05, 0x2e, 0x9b, 0x02, + 0x44, 0x0a, 0x97, 0x05, 0x2d, 0x60, 0x87, 0x05, 0x2f, 0xeb, 0x02, 0x44, + 0x0e, 0x0a, 0xc2, 0x44, 0x12, 0x8b, 0x05, 0x29, 0xe9, 0x83, 0x05, 0x2b, + 0x21, 0x91, 0x05, 0x2e, 0xab, 0x02, 0x44, 0x2c, 0x97, 0x05, 0x2d, 0x70, + 0x87, 0x05, 0x2f, 0xfb, 0x02, 0x44, 0x30, 0x8b, 0x05, 0x29, 0xf9, 0x83, + 0x05, 0x2b, 0x31, 0x91, 0x05, 0x2e, 0xbb, 0x02, 0x44, 0x34, 0x97, 0x05, + 0x2d, 0x80, 0x87, 0x05, 0x30, 0x03, 0x02, 0x44, 0x38, 0x8b, 0x05, 0x2a, + 0x01, 0x83, 0x05, 0x2b, 0x39, 0x91, 0x05, 0x2e, 0xc3, 0x02, 0x44, 0x3c, + 0x97, 0x05, 0x2d, 0x88, 0x87, 0x05, 0x30, 0x13, 0x02, 0x44, 0x40, 0x8b, + 0x05, 0x2a, 0x11, 0x83, 0x05, 0x2b, 0x49, 0x91, 0x05, 0x2e, 0xd3, 0x02, + 0x44, 0x44, 0x97, 0x05, 0x2d, 0x98, 0x90, 0x05, 0x29, 0x28, 0x90, 0x05, + 0x2a, 0x50, 0x91, 0x05, 0x2b, 0x8b, 0x02, 0x44, 0x48, 0x90, 0x05, 0x2d, + 0xd8, 0x90, 0x05, 0x2c, 0xb0, 0xc4, 0xe2, 0xaf, 0x05, 0x30, 0x99, 0xc2, + 0x04, 0xc6, 0x05, 0x30, 0xc0, 0xc4, 0xe2, 0xaf, 0x05, 0x30, 0xa1, 0xc3, + 0x38, 0x86, 0x05, 0x30, 0xe0, 0xc3, 0x00, 0x74, 0x05, 0x30, 0xa9, 0xc2, + 0x04, 0xc6, 0x05, 0x30, 0xc9, 0xc3, 0x08, 0x48, 0x05, 0x30, 0xe8, 0xc3, + 0x01, 0x95, 0x05, 0x30, 0xd1, 0x11, 0x42, 0x44, 0x4c, 0xc9, 0x57, 0x36, + 0x01, 0x1e, 0x81, 0x45, 0x00, 0x8c, 0x42, 0x44, 0x58, 0xc7, 0x33, 0xdf, + 0x00, 0x00, 0x5b, 0x02, 0x44, 0x64, 0xc4, 0x3b, 0x19, 0x01, 0x5b, 0xf8, + 0x00, 0x42, 0x44, 0x6a, 0xcb, 0x99, 0x1e, 0x01, 0x81, 0xa0, 0xcf, 0x15, + 0x36, 0x0f, 0xbd, 0xf9, 0xd2, 0x22, 0x49, 0x0f, 0xbe, 0x80, 0xc6, 0x02, + 0xd1, 0x0f, 0xbc, 0x41, 0xc6, 0x0b, 0x09, 0x0f, 0xbc, 0x90, 0xc6, 0x27, + 0x5e, 0x0f, 0xb3, 0xe1, 0xc6, 0x13, 0x52, 0x0f, 0xbd, 0x69, 0xd2, 0x4d, + 0x57, 0x0f, 0xbd, 0xc8, 0xce, 0x70, 0x5e, 0x00, 0xe7, 0x89, 0xcb, 0x95, + 0x98, 0x00, 0xe7, 0x5b, 0x02, 0x44, 0x76, 0xcc, 0x88, 0xc5, 0x00, 0xe7, + 0x51, 0xcc, 0x14, 0x41, 0x00, 0xe7, 0x48, 0xc8, 0x74, 0xc4, 0x00, 0xe7, + 0x31, 0xc6, 0x74, 0xc6, 0x00, 0xe7, 0x20, 0xca, 0xa5, 0x12, 0x00, 0xe7, + 0x40, 0xca, 0xa5, 0x12, 0x00, 0xe7, 0x38, 0xca, 0x9e, 0xe6, 0x00, 0xe7, + 0xc9, 0xc7, 0x02, 0x40, 0x00, 0xe6, 0xd0, 0xe0, 0x02, 0x27, 0x00, 0xe7, + 0x00, 0xca, 0xa4, 0x90, 0x00, 0xe6, 0xc8, 0x43, 0x00, 0x4b, 0xc2, 0x44, + 0x7c, 0xcc, 0x8b, 0x11, 0x70, 0x01, 0xe0, 0x4f, 0x0b, 0x17, 0xc2, 0x44, + 0x8e, 0x4d, 0x29, 0xb9, 0x42, 0x44, 0xf6, 0x42, 0x0a, 0x8c, 0xc2, 0x45, + 0x5e, 0xc3, 0x0d, 0xe5, 0x70, 0x01, 0xd0, 0xce, 0x25, 0xad, 0x70, 0x02, + 0xe9, 0xcb, 0x1a, 0x50, 0x70, 0x01, 0x49, 0xcd, 0x00, 0x32, 0x70, 0x03, + 0xe8, 0xc4, 0x26, 0x78, 0x70, 0x01, 0xc9, 0xc5, 0x06, 0xdb, 0x70, 0x01, + 0xc1, 0x15, 0xc2, 0x45, 0x68, 0x08, 0xc2, 0x45, 0x74, 0x16, 0xc2, 0x45, + 0x80, 0xc3, 0x05, 0x14, 0x70, 0x01, 0x89, 0xc4, 0x15, 0xe7, 0x70, 0x01, + 0x80, 0x83, 0x00, 0xbb, 0x41, 0xc2, 0x01, 0x30, 0x00, 0xbb, 0x28, 0xc9, + 0xa9, 0xc6, 0x00, 0xb8, 0xf8, 0x83, 0x00, 0xb8, 0x41, 0xc2, 0x01, 0x30, + 0x00, 0xb8, 0x28, 0x24, 0xc2, 0x45, 0x8c, 0x23, 0xc2, 0x45, 0xa8, 0x22, + 0xc2, 0x45, 0xd0, 0x21, 0xc2, 0x45, 0xf8, 0x20, 0xc2, 0x46, 0x20, 0x1f, + 0xc2, 0x46, 0x48, 0x1e, 0xc2, 0x46, 0x70, 0x1d, 0x42, 0x46, 0x98, 0xc4, + 0x26, 0x78, 0x0b, 0x56, 0x49, 0xc5, 0x06, 0xdb, 0x0b, 0x56, 0x41, 0x15, + 0xc2, 0x46, 0xc0, 0x08, 0xc2, 0x46, 0xcc, 0x16, 0xc2, 0x46, 0xd8, 0xc3, + 0x05, 0x14, 0x0b, 0x56, 0x09, 0xc4, 0x15, 0xe7, 0x0b, 0x56, 0x00, 0xc2, + 0x02, 0x1c, 0x0b, 0x55, 0xf1, 0x05, 0xc2, 0x46, 0xe4, 0x06, 0xc2, 0x46, + 0xee, 0x08, 0xc2, 0x46, 0xf8, 0xc2, 0x8d, 0x8f, 0x0b, 0x55, 0xd1, 0x16, + 0xc2, 0x47, 0x02, 0x0a, 0xc2, 0x47, 0x12, 0x09, 0xc2, 0x47, 0x1a, 0x15, + 0xc2, 0x47, 0x24, 0x10, 0xc2, 0x47, 0x2c, 0xc2, 0x00, 0x39, 0x0b, 0x55, + 0x91, 0x0e, 0xc2, 0x47, 0x42, 0x0f, 0xc2, 0x47, 0x4c, 0xc2, 0x01, 0x5d, + 0x0b, 0x55, 0x51, 0x12, 0xc2, 0x47, 0x60, 0xc2, 0x01, 0x4a, 0x0b, 0x55, + 0x31, 0xc2, 0x19, 0x2c, 0x0b, 0x55, 0x29, 0x0d, 0xc2, 0x47, 0x6a, 0x17, + 0xc2, 0x47, 0x74, 0x03, 0xc2, 0x47, 0x8c, 0x0b, 0xc2, 0x47, 0xa0, 0x07, + 0xc2, 0x47, 0xb0, 0x18, 0xc2, 0x47, 0xc0, 0x11, 0x42, 0x47, 0xd0, 0x18, + 0xc2, 0x47, 0xe0, 0x42, 0x14, 0x48, 0xc2, 0x47, 0xee, 0x0d, 0xc2, 0x48, + 0x00, 0x12, 0xc2, 0x48, 0x0a, 0xc7, 0xb4, 0xa5, 0x08, 0xfe, 0xc1, 0x03, + 0xc2, 0x48, 0x14, 0xc6, 0xcd, 0xd9, 0x08, 0xfe, 0xb1, 0xc3, 0x1e, 0xe5, + 0x08, 0xfe, 0xa8, 0xcb, 0x97, 0x9d, 0x08, 0xff, 0x49, 0xcb, 0x97, 0xa8, + 0x08, 0xff, 0x40, 0x83, 0x00, 0x5c, 0x2b, 0x02, 0x48, 0x20, 0x8b, 0x00, + 0x5c, 0x3b, 0x02, 0x48, 0x2c, 0x97, 0x00, 0x5c, 0x4b, 0x02, 0x48, 0x30, + 0x87, 0x00, 0x5c, 0x73, 0x02, 0x48, 0x34, 0x91, 0x00, 0x5c, 0x93, 0x02, + 0x48, 0x38, 0xc2, 0x02, 0x2b, 0x00, 0x5c, 0xa9, 0x10, 0xc2, 0x48, 0x3c, + 0xc2, 0x00, 0x64, 0x00, 0x5c, 0xd1, 0xc2, 0x25, 0x3b, 0x00, 0x5c, 0xe1, + 0x16, 0xc2, 0x48, 0x50, 0xc2, 0x00, 0xb0, 0x00, 0x5d, 0x51, 0xc2, 0x01, + 0xc3, 0x00, 0x5d, 0x71, 0xc2, 0x19, 0x2c, 0x00, 0x5d, 0x79, 0x14, 0xc2, + 0x48, 0x5a, 0x0e, 0xc2, 0x48, 0x64, 0xc2, 0x02, 0x41, 0x00, 0x5d, 0xa9, + 0x15, 0xc2, 0x48, 0x6c, 0xc2, 0x00, 0xd0, 0x00, 0x5d, 0xc8, 0xc4, 0x15, + 0xe7, 0x00, 0x5f, 0x31, 0xc3, 0x05, 0x14, 0x00, 0x5f, 0x39, 0x16, 0xc2, + 0x48, 0x7c, 0x08, 0xc2, 0x48, 0x88, 0x15, 0xc2, 0x48, 0x94, 0xc5, 0x06, + 0xdb, 0x00, 0x5f, 0x71, 0xc4, 0x26, 0x78, 0x00, 0x5f, 0x78, 0xc8, 0x08, + 0x79, 0x08, 0xfe, 0x99, 0x44, 0x22, 0xcb, 0xc2, 0x48, 0xa0, 0xca, 0x1e, + 0x15, 0x08, 0xfe, 0x69, 0xca, 0xa3, 0xfa, 0x08, 0xfe, 0x30, 0x45, 0x27, + 0x7a, 0xc2, 0x48, 0xac, 0xc7, 0x08, 0x79, 0x08, 0xfe, 0x81, 0x08, 0xc2, + 0x48, 0xb4, 0x45, 0x06, 0xdb, 0xc2, 0x48, 0xc0, 0x16, 0xc2, 0x48, 0xca, + 0x44, 0x22, 0xcb, 0xc2, 0x48, 0xda, 0xd8, 0x22, 0xbb, 0x08, 0xfe, 0x08, + 0x83, 0x00, 0x5d, 0xf1, 0x8b, 0x00, 0x5e, 0x41, 0x97, 0x00, 0x5e, 0x60, + 0x8b, 0x00, 0x5e, 0x00, 0x97, 0x00, 0x5e, 0x10, 0x87, 0x00, 0x5e, 0x38, + 0x91, 0x00, 0x5e, 0x58, 0xc7, 0x0d, 0x04, 0x00, 0x5f, 0x89, 0xc8, 0x4b, + 0x94, 0x00, 0x5f, 0x90, 0xc4, 0x18, 0x10, 0x08, 0xb6, 0x39, 0xc2, 0x22, + 0xcc, 0x08, 0xb6, 0x30, 0xc3, 0x0d, 0x14, 0x08, 0xb6, 0x29, 0xc3, 0x09, + 0x9e, 0x08, 0xb6, 0x20, 0xc4, 0x02, 0xde, 0x08, 0xb6, 0x19, 0xc2, 0x02, + 0xa0, 0x08, 0xb6, 0x10, 0xca, 0x9e, 0xaa, 0x08, 0xb5, 0xc1, 0x97, 0x08, + 0xb4, 0x49, 0x8b, 0x08, 0xb4, 0x39, 0x83, 0x08, 0xb4, 0x08, 0xc2, 0x00, + 0x39, 0x08, 0xb5, 0x51, 0x83, 0x08, 0xb5, 0x20, 0x83, 0x08, 0xb5, 0x41, + 0xc2, 0x00, 0xd0, 0x08, 0xb5, 0x38, 0xc2, 0x00, 0xd0, 0x08, 0xb5, 0x09, + 0x83, 0x08, 0xb5, 0x00, 0xc2, 0x00, 0xd0, 0x08, 0xb4, 0xf9, 0x83, 0x08, + 0xb4, 0xf0, 0x83, 0x08, 0xb4, 0xe9, 0xc2, 0x00, 0xc1, 0x08, 0xb4, 0xc1, + 0xc2, 0x19, 0x2c, 0x08, 0xb4, 0x99, 0xc2, 0x01, 0x30, 0x08, 0xb4, 0x70, + 0xc2, 0x00, 0xd0, 0x08, 0xb4, 0xe1, 0x83, 0x08, 0xb4, 0xd9, 0x06, 0x42, + 0x48, 0xe6, 0xc2, 0x00, 0xd0, 0x08, 0xb4, 0xd1, 0x83, 0x08, 0xb4, 0xc9, + 0x16, 0x42, 0x48, 0xf0, 0xc2, 0x00, 0xd0, 0x08, 0xb4, 0x91, 0x83, 0x08, + 0xb4, 0x88, 0xc2, 0x00, 0xd0, 0x08, 0xb4, 0x81, 0x83, 0x08, 0xb4, 0x78, + 0xc2, 0x00, 0xd0, 0x08, 0xb4, 0x69, 0x83, 0x08, 0xb4, 0x60, 0xc2, 0x00, + 0xd0, 0x08, 0xb4, 0x59, 0x83, 0x08, 0xb4, 0x50, 0x97, 0x08, 0xb4, 0x28, + 0x8b, 0x08, 0xb4, 0x18, 0xc4, 0x1e, 0x97, 0x08, 0xb5, 0xb1, 0xc5, 0x40, + 0xe7, 0x08, 0xb5, 0x60, 0x97, 0x08, 0xb5, 0xa9, 0x8b, 0x08, 0xb5, 0x99, + 0x83, 0x08, 0xb5, 0x68, 0x97, 0x08, 0xb5, 0x88, 0x8b, 0x08, 0xb5, 0x78, + 0xc3, 0x01, 0x95, 0x00, 0xd5, 0x61, 0xc2, 0x69, 0xa6, 0x00, 0xd5, 0x20, + 0xc5, 0xd7, 0x04, 0x00, 0xd5, 0x53, 0x02, 0x48, 0xfa, 0xc3, 0x29, 0xf7, + 0x00, 0xd5, 0x11, 0xc3, 0x1c, 0x9f, 0x00, 0xd3, 0x00, 0xc3, 0x04, 0xc6, + 0x00, 0xd5, 0x43, 0x02, 0x49, 0x00, 0xc3, 0x3f, 0x6f, 0x00, 0xd5, 0x19, + 0x44, 0xdf, 0xcf, 0x42, 0x49, 0x06, 0xc5, 0xd4, 0x98, 0x00, 0xd5, 0x39, + 0xc3, 0x71, 0xe5, 0x00, 0xd3, 0xd9, 0xc4, 0xe0, 0xe3, 0x00, 0xd3, 0xa2, + 0x02, 0x49, 0x12, 0xd4, 0x3c, 0x78, 0x00, 0xd5, 0x31, 0xc6, 0xd1, 0x81, + 0x00, 0xd3, 0xd0, 0xc4, 0xde, 0xb7, 0x00, 0xd5, 0x08, 0x9f, 0x00, 0xd3, + 0xb1, 0x9e, 0x00, 0xd3, 0xa8, 0xc4, 0x18, 0x10, 0x00, 0xd4, 0xb9, 0xc2, + 0x22, 0xcc, 0x00, 0xd4, 0xb0, 0xc3, 0x0d, 0x14, 0x00, 0xd4, 0xa9, 0xc3, + 0x09, 0x9e, 0x00, 0xd4, 0xa0, 0xc4, 0x02, 0xde, 0x00, 0xd4, 0x99, 0xc2, + 0x02, 0xa0, 0x00, 0xd4, 0x90, 0xc4, 0x18, 0x10, 0x00, 0xd4, 0x39, 0xc2, + 0x22, 0xcc, 0x00, 0xd4, 0x30, 0xc3, 0x0d, 0x14, 0x00, 0xd4, 0x29, 0xc3, + 0x09, 0x9e, 0x00, 0xd4, 0x20, 0xc4, 0x02, 0xde, 0x00, 0xd4, 0x19, 0xc2, + 0x02, 0xa0, 0x00, 0xd4, 0x10, 0xc2, 0x0d, 0xf6, 0x00, 0xd2, 0xf1, 0xc2, + 0x01, 0x5d, 0x00, 0xd2, 0xe9, 0x0f, 0xc2, 0x49, 0x18, 0xd4, 0x3c, 0xf0, + 0x00, 0xd2, 0xd9, 0x0e, 0xc2, 0x49, 0x22, 0xc9, 0xb4, 0x2e, 0x00, 0xd2, + 0xc8, 0x42, 0x01, 0x31, 0xc2, 0x49, 0x2e, 0x91, 0x00, 0xd3, 0x81, 0x9b, + 0x00, 0xd3, 0x68, 0xc6, 0xd2, 0xbf, 0x00, 0xd3, 0x91, 0xc6, 0xc6, 0xb8, + 0x00, 0xd3, 0x20, 0x8b, 0x00, 0xd3, 0x89, 0x87, 0x00, 0xd3, 0x79, 0x83, + 0x00, 0xd3, 0x18, 0x97, 0x00, 0xd3, 0x53, 0x02, 0x49, 0x3a, 0x87, 0x00, + 0xd3, 0x38, 0x8b, 0x00, 0xd3, 0x30, 0x83, 0x00, 0xd2, 0x1b, 0x02, 0x49, + 0x3e, 0x43, 0x02, 0x5f, 0xc2, 0x49, 0x42, 0xc2, 0x00, 0xdb, 0x00, 0xd2, + 0x51, 0xc2, 0x0f, 0xe1, 0x00, 0xd2, 0x20, 0x97, 0x00, 0xd2, 0x80, 0x8b, + 0x00, 0xd2, 0x70, 0xc2, 0x00, 0xd0, 0x00, 0xd2, 0x49, 0x15, 0xc2, 0x49, + 0x70, 0xc2, 0x19, 0x2c, 0x00, 0xd2, 0x01, 0xc2, 0x00, 0x87, 0x00, 0xd1, + 0xd1, 0x12, 0xc2, 0x49, 0x80, 0x16, 0xc2, 0x49, 0x8a, 0xc5, 0x3c, 0xf5, + 0x00, 0xd1, 0x71, 0x05, 0xc2, 0x49, 0x94, 0x0d, 0x42, 0x49, 0x9e, 0xc2, + 0x0f, 0xe1, 0x00, 0xd2, 0x11, 0x83, 0x00, 0xd2, 0x0a, 0x02, 0x49, 0xae, + 0x83, 0x00, 0xd1, 0xb1, 0xc2, 0x19, 0x2c, 0x00, 0xd1, 0x61, 0xc2, 0x01, + 0x30, 0x00, 0xd1, 0x30, 0xa3, 0x00, 0xcb, 0xa1, 0xa2, 0x00, 0xcb, 0x99, + 0xa1, 0x00, 0xcb, 0x91, 0xa0, 0x00, 0xcb, 0x89, 0x9f, 0x00, 0xcb, 0x80, + 0xc2, 0x00, 0xd0, 0x00, 0xcb, 0x09, 0x83, 0x00, 0xca, 0x98, 0xc5, 0xd8, + 0x3f, 0x05, 0x56, 0xf9, 0x90, 0x05, 0x56, 0xd8, 0x8f, 0x05, 0x55, 0xf1, + 0x90, 0x05, 0x55, 0xe9, 0x9b, 0x05, 0x55, 0xe1, 0xc2, 0x0f, 0xe1, 0x05, + 0x55, 0xd9, 0x83, 0x05, 0x55, 0x88, 0x83, 0x05, 0x55, 0xd1, 0x87, 0x05, + 0x55, 0x9a, 0x02, 0x49, 0xba, 0x83, 0x05, 0x55, 0xc0, 0x91, 0x05, 0x55, + 0x79, 0xc2, 0x01, 0x23, 0x05, 0x55, 0x69, 0xc2, 0x17, 0xbd, 0x05, 0x55, + 0x59, 0xc2, 0x01, 0xc8, 0x05, 0x55, 0x49, 0xc2, 0x00, 0x79, 0x05, 0x55, + 0x39, 0xc2, 0x42, 0xcd, 0x05, 0x55, 0x29, 0xc2, 0x00, 0xa2, 0x05, 0x55, + 0x19, 0xc2, 0x01, 0x03, 0x05, 0x55, 0x09, 0x12, 0xc2, 0x49, 0xbe, 0xc2, + 0x00, 0x6b, 0x05, 0x54, 0xd9, 0x10, 0xc2, 0x49, 0xc8, 0x16, 0xc2, 0x49, + 0xd8, 0xc2, 0x00, 0x58, 0x05, 0x54, 0x99, 0x05, 0xc2, 0x49, 0xe2, 0xc2, + 0x0f, 0x7b, 0x05, 0x54, 0x39, 0x0d, 0xc2, 0x49, 0xec, 0xc2, 0x00, 0xfb, + 0x05, 0x54, 0x78, 0x91, 0x05, 0x55, 0x71, 0xc2, 0x01, 0x23, 0x05, 0x55, + 0x61, 0xc2, 0x17, 0xbd, 0x05, 0x55, 0x51, 0xc2, 0x01, 0xc8, 0x05, 0x55, + 0x41, 0xc2, 0x00, 0x79, 0x05, 0x55, 0x31, 0xc2, 0x42, 0xcd, 0x05, 0x55, + 0x21, 0xc2, 0x00, 0xa2, 0x05, 0x55, 0x11, 0xc2, 0x01, 0x03, 0x05, 0x55, + 0x01, 0x12, 0xc2, 0x49, 0xf4, 0xc2, 0x00, 0x6b, 0x05, 0x54, 0xd1, 0x10, + 0xc2, 0x49, 0xfe, 0x16, 0xc2, 0x4a, 0x0e, 0xc2, 0x00, 0x58, 0x05, 0x54, + 0x91, 0x05, 0xc2, 0x4a, 0x18, 0xc2, 0x0f, 0x7b, 0x05, 0x54, 0x31, 0x0d, + 0xc2, 0x4a, 0x22, 0xc2, 0x00, 0xfb, 0x05, 0x54, 0x70, 0xd2, 0x49, 0xe5, + 0x0f, 0xb2, 0xb1, 0xd2, 0x47, 0x15, 0x0f, 0xb2, 0xa0, 0xc4, 0x02, 0xde, + 0x01, 0x0c, 0x59, 0xc2, 0x02, 0xa0, 0x01, 0x0c, 0x50, 0x9b, 0x01, 0x0a, + 0x21, 0x8e, 0x01, 0x0a, 0x11, 0x89, 0x01, 0x0a, 0x08, 0xd2, 0x49, 0xe5, + 0x0f, 0xb2, 0xb9, 0xd2, 0x47, 0x15, 0x0f, 0xb2, 0xa8, 0xc4, 0x00, 0x49, + 0x01, 0x34, 0xf9, 0xc5, 0x00, 0x2c, 0x01, 0x34, 0xf0, 0xc5, 0x00, 0x2c, + 0x0f, 0xaf, 0x39, 0xc4, 0x00, 0x49, 0x0f, 0xaf, 0x31, 0xc5, 0x05, 0x02, + 0x0f, 0xaf, 0x29, 0xc5, 0x00, 0xd4, 0x0f, 0xaf, 0x20, 0x4b, 0x03, 0x87, + 0xc2, 0x4a, 0x2a, 0xdf, 0x0d, 0x7c, 0x01, 0x5c, 0xc0, 0xe0, 0x0b, 0xe7, + 0x01, 0x5c, 0xc8, 0xe0, 0x07, 0xe7, 0x01, 0x3d, 0x18, 0xe0, 0x03, 0xc7, + 0x01, 0x5c, 0xd8, 0xc6, 0x13, 0x52, 0x0f, 0xbd, 0x41, 0xc4, 0x40, 0x89, + 0x01, 0x00, 0x48, 0xc5, 0xd6, 0x91, 0x00, 0x3d, 0x19, 0xc8, 0xb8, 0x1a, + 0x00, 0x3c, 0x79, 0xc4, 0xd8, 0x3b, 0x00, 0x3c, 0x70, 0x91, 0x00, 0x3d, + 0x01, 0xc7, 0xb4, 0xdb, 0x00, 0x3c, 0x99, 0xc3, 0x39, 0x6e, 0x00, 0x3c, + 0x63, 0x02, 0x4a, 0x36, 0xc3, 0x04, 0xc5, 0x00, 0x3c, 0xc0, 0x03, 0xc2, + 0x4a, 0x3c, 0xc5, 0xd7, 0x22, 0x00, 0x3c, 0x58, 0xc5, 0xd9, 0x20, 0x00, + 0x3c, 0xf1, 0x0a, 0xc2, 0x4a, 0x48, 0xc4, 0xe2, 0xd7, 0x00, 0x3c, 0x80, + 0xc3, 0x39, 0x6e, 0x00, 0x3c, 0xc9, 0xc2, 0x04, 0xc6, 0x00, 0x3c, 0x00, + 0x03, 0xc2, 0x4a, 0x54, 0x91, 0x00, 0x3d, 0x08, 0xc4, 0xe1, 0xff, 0x00, + 0x3c, 0x69, 0xc8, 0xb4, 0xda, 0x00, 0x3c, 0x28, 0xc4, 0xe1, 0x03, 0x00, + 0x3c, 0x39, 0xc3, 0x16, 0xc3, 0x00, 0x3d, 0x10, 0xc4, 0xd8, 0x3b, 0x00, + 0x3c, 0x31, 0xc3, 0x39, 0x6e, 0x00, 0x3c, 0xd0, 0xc4, 0x2b, 0xa7, 0x00, + 0x3c, 0x11, 0xc2, 0x04, 0xc6, 0x00, 0x3d, 0x88, 0x0d, 0xc2, 0x4a, 0x5e, + 0x10, 0xc2, 0x4a, 0x6a, 0x46, 0xcc, 0x6b, 0xc2, 0x4a, 0x7c, 0x15, 0xc2, + 0x4a, 0x91, 0x1b, 0xc2, 0x4a, 0x9d, 0x43, 0x5d, 0x85, 0xc2, 0x4a, 0xa9, + 0x16, 0xc2, 0x4a, 0xb5, 0xc9, 0xb4, 0x0a, 0x00, 0x70, 0xd1, 0x12, 0xc2, + 0x4a, 0xbf, 0x42, 0x01, 0x03, 0xc2, 0x4a, 0xcf, 0x0f, 0xc2, 0x4a, 0xde, + 0x14, 0xc2, 0x4a, 0xea, 0x0e, 0xc2, 0x4a, 0xf4, 0xc7, 0xc2, 0x5e, 0x00, + 0x71, 0x39, 0x43, 0x60, 0xe8, 0xc2, 0x4b, 0x04, 0xc5, 0xd9, 0xd9, 0x00, + 0x71, 0x69, 0xca, 0x9e, 0xbe, 0x00, 0x72, 0xd0, 0xc2, 0x02, 0xa0, 0x00, + 0x72, 0x91, 0xc4, 0x02, 0xde, 0x00, 0x72, 0x98, 0xc3, 0x09, 0x9e, 0x00, + 0x72, 0xa1, 0xc3, 0x0d, 0x14, 0x00, 0x72, 0xa8, 0xc2, 0x22, 0xcc, 0x00, + 0x72, 0xb1, 0xc4, 0x18, 0x10, 0x00, 0x72, 0xb8, 0x87, 0x0f, 0x15, 0x58, + 0x47, 0xc2, 0xe3, 0xc2, 0x4b, 0x10, 0x83, 0x0f, 0x14, 0x88, 0x91, 0x0f, + 0x15, 0x40, 0x97, 0x0f, 0x15, 0x18, 0xc2, 0x01, 0x30, 0x0f, 0x14, 0xc1, + 0x83, 0x0f, 0x14, 0xb8, 0xd0, 0x59, 0x72, 0x01, 0x4e, 0x69, 0xc8, 0x52, + 0x09, 0x01, 0x4e, 0x59, 0xc9, 0x16, 0x14, 0x01, 0x4e, 0x51, 0xcf, 0x13, + 0x5e, 0x0f, 0xb6, 0x30, 0xc4, 0x55, 0x73, 0x0e, 0x9a, 0x49, 0xc9, 0xaf, + 0x15, 0x0e, 0x99, 0xe0, 0xc5, 0xba, 0x65, 0x0e, 0x9a, 0x91, 0xc5, 0x08, + 0xe6, 0x0e, 0x9a, 0x70, 0xc6, 0xd0, 0x55, 0x0e, 0x99, 0xc1, 0x16, 0x42, + 0x4b, 0x24, 0xc7, 0xc0, 0x58, 0x0e, 0x99, 0xe9, 0xc4, 0x1d, 0xa8, 0x0e, + 0x99, 0x30, 0xc5, 0xd7, 0x63, 0x0e, 0x9a, 0x61, 0xc2, 0x00, 0x5f, 0x0e, + 0x99, 0x88, 0xc5, 0xd7, 0x7c, 0x0e, 0x99, 0x71, 0x0b, 0x42, 0x4b, 0x36, + 0xc5, 0x7c, 0xec, 0x01, 0x18, 0xa9, 0xc5, 0x36, 0xc0, 0x0f, 0xa6, 0xf2, + 0x02, 0x4b, 0x42, 0x49, 0x29, 0x29, 0xc2, 0x4b, 0x48, 0xca, 0x1e, 0x8a, + 0x00, 0x60, 0x08, 0xc7, 0x14, 0x39, 0x00, 0x60, 0x11, 0xc7, 0x7a, 0x7f, + 0x00, 0x61, 0xe8, 0xc5, 0x40, 0xe7, 0x00, 0x60, 0x19, 0xc4, 0x1e, 0x97, + 0x00, 0x62, 0x68, 0x83, 0x00, 0x60, 0x2b, 0x02, 0x4b, 0x54, 0x8b, 0x00, + 0x60, 0x3b, 0x02, 0x4b, 0x60, 0x97, 0x00, 0x60, 0x4b, 0x02, 0x4b, 0x64, + 0x18, 0xc2, 0x4b, 0x68, 0x87, 0x00, 0x60, 0x73, 0x02, 0x4b, 0x72, 0x91, + 0x00, 0x60, 0x93, 0x02, 0x4b, 0x76, 0x0d, 0xc2, 0x4b, 0x7a, 0x09, 0xc2, + 0x4b, 0x84, 0x10, 0xc2, 0x4b, 0x8e, 0x05, 0xc2, 0x4b, 0xa7, 0x0c, 0xc2, + 0x4b, 0xb1, 0x16, 0xc2, 0x4b, 0xbb, 0x06, 0xc2, 0x4b, 0xcf, 0x12, 0xc2, + 0x4b, 0xe3, 0x04, 0xc2, 0x4b, 0xed, 0xc2, 0x01, 0xc3, 0x00, 0x61, 0x71, + 0xc2, 0x19, 0x2c, 0x00, 0x61, 0x79, 0x14, 0xc2, 0x4b, 0xf7, 0x0e, 0xc2, + 0x4b, 0xff, 0x15, 0xc2, 0x4c, 0x07, 0xc2, 0x00, 0xd0, 0x00, 0x61, 0xc8, + 0x83, 0x00, 0x61, 0xf1, 0x8b, 0x00, 0x62, 0x41, 0x97, 0x00, 0x62, 0x60, + 0x8b, 0x00, 0x62, 0x00, 0x97, 0x00, 0x62, 0x10, 0x94, 0x00, 0x62, 0x1b, + 0x02, 0x4c, 0x17, 0x8e, 0x00, 0x63, 0x12, 0x02, 0x4c, 0x1b, 0x87, 0x00, + 0x62, 0x38, 0x91, 0x00, 0x62, 0x58, 0xc2, 0x02, 0xa0, 0x00, 0x63, 0x41, + 0xc4, 0x02, 0xde, 0x00, 0x63, 0x48, 0xc3, 0x09, 0x9e, 0x00, 0x63, 0x51, + 0xc3, 0x0d, 0x14, 0x00, 0x63, 0x58, 0xc2, 0x22, 0xcc, 0x00, 0x63, 0x61, + 0xc4, 0x18, 0x10, 0x00, 0x63, 0x68, 0xd2, 0x15, 0xf0, 0x00, 0x63, 0xc9, + 0xd3, 0x45, 0xbf, 0x00, 0x63, 0xe0, 0x47, 0xc3, 0x99, 0xc2, 0x4c, 0x1f, + 0x49, 0xaa, 0x8c, 0x42, 0x4c, 0x2b, 0x46, 0x00, 0xd4, 0xc2, 0x4c, 0x37, + 0x45, 0x00, 0x8c, 0x42, 0x4c, 0x43, 0xc5, 0x00, 0xd4, 0x01, 0x70, 0xf1, + 0xc5, 0x05, 0x02, 0x01, 0x70, 0xf8, 0xc4, 0x18, 0x10, 0x08, 0xa6, 0xb9, + 0xc2, 0x22, 0xcc, 0x08, 0xa6, 0xb0, 0xc3, 0x0d, 0x14, 0x08, 0xa6, 0xa9, + 0xc3, 0x09, 0x9e, 0x08, 0xa6, 0xa0, 0xc4, 0x02, 0xde, 0x08, 0xa6, 0x99, + 0xc2, 0x02, 0xa0, 0x08, 0xa6, 0x90, 0xc7, 0x7a, 0x7f, 0x08, 0xa6, 0x21, + 0xc7, 0x14, 0x39, 0x08, 0xa6, 0x00, 0xc5, 0x40, 0xe7, 0x08, 0xa6, 0x09, + 0xc4, 0x1e, 0x97, 0x08, 0xa6, 0x10, 0x97, 0x08, 0xa5, 0xf1, 0x8b, 0x08, + 0xa5, 0xd9, 0x83, 0x08, 0xa5, 0x80, 0x91, 0x08, 0xa5, 0xe9, 0x87, 0x08, + 0xa5, 0xd0, 0x8e, 0x08, 0xa5, 0xbb, 0x02, 0x4c, 0x4f, 0x94, 0x08, 0xa5, + 0xaa, 0x02, 0x4c, 0x53, 0x97, 0x08, 0xa5, 0xa0, 0x8b, 0x08, 0xa5, 0x90, + 0x83, 0x08, 0xa5, 0x71, 0xc2, 0x0d, 0xf6, 0x08, 0xa5, 0x69, 0xc2, 0x00, + 0xd0, 0x08, 0xa5, 0x60, 0x83, 0x08, 0xa5, 0x59, 0x47, 0xb2, 0x2e, 0x42, + 0x4c, 0x57, 0xc2, 0x00, 0xd0, 0x08, 0xa5, 0x31, 0x83, 0x08, 0xa5, 0x28, + 0xc2, 0x00, 0xd0, 0x08, 0xa5, 0x21, 0x83, 0x08, 0xa5, 0x18, 0x83, 0x08, + 0xa5, 0x11, 0xc2, 0x00, 0xc1, 0x08, 0xa4, 0xe9, 0xc2, 0x19, 0x2c, 0x08, + 0xa4, 0xc1, 0xc2, 0x01, 0x30, 0x08, 0xa4, 0x98, 0xc2, 0x00, 0xd0, 0x08, + 0xa5, 0x09, 0x83, 0x08, 0xa5, 0x01, 0x06, 0x42, 0x4c, 0x65, 0xc2, 0x00, + 0xd0, 0x08, 0xa4, 0xf9, 0x83, 0x08, 0xa4, 0xf1, 0x16, 0x42, 0x4c, 0x6f, + 0xc2, 0x00, 0xd0, 0x08, 0xa4, 0xb9, 0x83, 0x08, 0xa4, 0xb0, 0xc2, 0x00, + 0xd0, 0x08, 0xa4, 0xa9, 0x83, 0x08, 0xa4, 0xa0, 0xc2, 0x00, 0xd0, 0x08, + 0xa4, 0x91, 0x83, 0x08, 0xa4, 0x88, 0xc2, 0x00, 0xd0, 0x08, 0xa4, 0x81, + 0x83, 0x08, 0xa4, 0x78, 0x97, 0x08, 0xa4, 0x71, 0x8b, 0x08, 0xa4, 0x61, + 0x83, 0x08, 0xa4, 0x10, 0x97, 0x08, 0xa4, 0x30, 0x8b, 0x08, 0xa4, 0x20, + 0xc7, 0xc2, 0xa4, 0x00, 0x7e, 0x21, 0xc7, 0xc4, 0xfe, 0x00, 0x7e, 0x2b, + 0x02, 0x4c, 0x79, 0x12, 0xc2, 0x4c, 0x7f, 0xc6, 0xcc, 0x47, 0x00, 0x7e, + 0x4a, 0x02, 0x4c, 0x8b, 0x44, 0xa9, 0xbe, 0xc2, 0x4c, 0x8f, 0xcd, 0x75, + 0xf4, 0x00, 0x7b, 0xf1, 0xc8, 0x85, 0x06, 0x00, 0x7b, 0xf8, 0xc7, 0xbe, + 0xe3, 0x00, 0x79, 0xf1, 0xc8, 0xb8, 0xd2, 0x00, 0x7c, 0x38, 0xc8, 0xbe, + 0xe2, 0x00, 0x79, 0xf9, 0xc7, 0x4f, 0xa6, 0x00, 0x7c, 0x48, 0xc7, 0xc1, + 0x3f, 0x00, 0x7c, 0x31, 0xc9, 0x8e, 0x8e, 0x00, 0x7c, 0x40, 0xcb, 0x95, + 0xda, 0x00, 0x7c, 0x51, 0xcb, 0x99, 0x08, 0x00, 0x7c, 0x58, 0xcb, 0x8e, + 0x8c, 0x00, 0x7c, 0x69, 0xc8, 0x4f, 0xa5, 0x00, 0x7c, 0x71, 0xd1, 0x4f, + 0x9c, 0x00, 0x7c, 0x78, 0x0d, 0xc2, 0x4c, 0x9b, 0x09, 0xc2, 0x4c, 0xab, + 0x10, 0xc2, 0x4c, 0xb5, 0x05, 0xc2, 0x4c, 0xcb, 0xc2, 0x25, 0x3b, 0x00, + 0x7c, 0xb9, 0x16, 0xc2, 0x4c, 0xd5, 0x06, 0xc2, 0x4c, 0xe7, 0x12, 0xc2, + 0x4c, 0xf9, 0x04, 0xc2, 0x4d, 0x03, 0xc2, 0x01, 0xc3, 0x00, 0x7d, 0x41, + 0xc2, 0x01, 0x4a, 0x00, 0x7d, 0x69, 0x1c, 0xc2, 0x4d, 0x0d, 0xc2, 0x00, + 0x02, 0x00, 0x7d, 0x81, 0xc2, 0x19, 0x2c, 0x00, 0x7d, 0x89, 0xc2, 0x00, + 0x39, 0x00, 0x7d, 0x91, 0xc2, 0x00, 0xdb, 0x00, 0x7d, 0x99, 0x15, 0xc2, + 0x4d, 0x17, 0xc2, 0x00, 0xd0, 0x00, 0x7d, 0xb9, 0x83, 0x00, 0x7d, 0xc1, + 0x4b, 0x7f, 0xe8, 0x42, 0x4d, 0x27, 0x48, 0x16, 0x5f, 0xc2, 0x4d, 0x39, + 0xc5, 0x32, 0x89, 0x00, 0x78, 0xa0, 0xc2, 0x00, 0x45, 0x00, 0x79, 0xd1, + 0xc2, 0x02, 0x2c, 0x00, 0x79, 0xd8, 0xcf, 0x16, 0x5f, 0x00, 0x78, 0x21, + 0xdb, 0x16, 0x53, 0x00, 0x7e, 0x98, 0xcf, 0x16, 0x7a, 0x00, 0x78, 0x29, + 0xdb, 0x16, 0x6e, 0x00, 0x7e, 0xa0, 0xd4, 0x3f, 0x48, 0x00, 0x78, 0x31, + 0x4c, 0x82, 0xad, 0x42, 0x4d, 0x45, 0x0d, 0xc2, 0x4d, 0x51, 0xc9, 0xb5, + 0x0f, 0x00, 0x79, 0xa0, 0xc7, 0x16, 0x5f, 0x00, 0x78, 0x51, 0xcc, 0x2e, + 0x06, 0x00, 0x7e, 0x80, 0xc4, 0x01, 0xe2, 0x00, 0x78, 0x71, 0xc5, 0x32, + 0x89, 0x00, 0x7e, 0x92, 0x02, 0x4d, 0x5d, 0xc7, 0x70, 0x50, 0x00, 0x79, + 0xa9, 0xca, 0xa3, 0xe6, 0x00, 0x79, 0xb8, 0xc8, 0x32, 0x8b, 0x00, 0x78, + 0x79, 0xc7, 0xc1, 0x70, 0x00, 0x79, 0xc8, 0x83, 0x00, 0x7a, 0x01, 0xc2, + 0x00, 0xd0, 0x00, 0x7a, 0x09, 0xc3, 0x1d, 0x35, 0x00, 0x7b, 0x49, 0xc2, + 0x02, 0x2b, 0x00, 0x7b, 0x58, 0x83, 0x00, 0x7a, 0x11, 0xc2, 0x00, 0xd0, + 0x00, 0x7a, 0x18, 0xc2, 0x01, 0x30, 0x00, 0x7a, 0x21, 0xc2, 0x19, 0x2c, + 0x00, 0x7a, 0x49, 0xc2, 0x00, 0xc1, 0x00, 0x7a, 0x71, 0x83, 0x00, 0x7a, + 0x98, 0x83, 0x00, 0x7a, 0x29, 0xc2, 0x00, 0xd0, 0x00, 0x7a, 0x30, 0x16, + 0xc2, 0x4d, 0x63, 0x83, 0x00, 0x7a, 0x79, 0xc2, 0x00, 0xd0, 0x00, 0x7a, + 0x81, 0x15, 0x42, 0x4d, 0x6d, 0x06, 0xc2, 0x4d, 0x77, 0x83, 0x00, 0x7a, + 0x89, 0xc2, 0x00, 0xd0, 0x00, 0x7a, 0x91, 0x1c, 0x42, 0x4d, 0x81, 0x83, + 0x00, 0x7a, 0xa1, 0xc2, 0x00, 0xd0, 0x00, 0x7a, 0xa8, 0x83, 0x00, 0x7a, + 0xb1, 0xc2, 0x00, 0xd0, 0x00, 0x7a, 0xb8, 0xc2, 0x00, 0xd0, 0x00, 0x7a, + 0xf1, 0x83, 0x00, 0x7a, 0xf8, 0x83, 0x00, 0x7b, 0x11, 0xc2, 0x00, 0x39, + 0x00, 0x7b, 0x60, 0xc2, 0x00, 0xd0, 0x00, 0x7b, 0x21, 0xc2, 0x0d, 0xf6, + 0x00, 0x7b, 0x29, 0x83, 0x00, 0x7b, 0x30, 0xc2, 0x02, 0xa0, 0x00, 0x79, + 0x59, 0xc4, 0x02, 0xde, 0x00, 0x79, 0x60, 0xc3, 0x09, 0x9e, 0x00, 0x79, + 0x69, 0xc3, 0x0d, 0x14, 0x00, 0x79, 0x70, 0xc2, 0x22, 0xcc, 0x00, 0x79, + 0x79, 0xc4, 0x18, 0x10, 0x00, 0x79, 0x80, 0x94, 0x00, 0x7b, 0xb8, 0x8e, + 0x00, 0x7b, 0xc8, 0x84, 0x01, 0x69, 0x8b, 0x02, 0x4d, 0x8b, 0x89, 0x01, + 0x69, 0x9b, 0x02, 0x4d, 0x8f, 0x8c, 0x01, 0x69, 0xb1, 0x86, 0x01, 0x69, + 0xbb, 0x02, 0x4d, 0x96, 0x88, 0x01, 0x69, 0xe1, 0x8d, 0x01, 0x69, 0xeb, + 0x02, 0x4d, 0xa1, 0x8a, 0x01, 0x6a, 0x03, 0x02, 0x4d, 0xa8, 0x83, 0x01, + 0x6a, 0x21, 0x93, 0x01, 0x6a, 0x39, 0x9c, 0x01, 0x6b, 0x1b, 0x02, 0x4d, + 0xac, 0x8e, 0x01, 0x6a, 0x69, 0x8f, 0x01, 0x6a, 0x71, 0x90, 0x01, 0x6a, + 0x79, 0x92, 0x01, 0x6a, 0x91, 0x94, 0x01, 0x6a, 0xa3, 0x02, 0x4d, 0xb4, + 0x95, 0x01, 0x6a, 0xcb, 0x02, 0x4d, 0xb8, 0x96, 0x01, 0x6a, 0xe3, 0x02, + 0x4d, 0xc0, 0xc2, 0x11, 0xee, 0x01, 0x6a, 0xf1, 0x98, 0x01, 0x6b, 0x01, + 0x99, 0x01, 0x6b, 0x09, 0x9b, 0x01, 0x6b, 0x10, 0x9b, 0x01, 0x69, 0xd8, + 0x8d, 0x01, 0x69, 0xf3, 0x02, 0x4d, 0xc8, 0x8a, 0x01, 0x6a, 0x11, 0x93, + 0x01, 0x6a, 0x41, 0xc2, 0x25, 0xa1, 0x01, 0x6a, 0x61, 0x09, 0xc2, 0x4d, + 0xcc, 0xc2, 0x00, 0x75, 0x01, 0x6a, 0x88, 0xcb, 0x05, 0x1c, 0x01, 0x02, + 0xd1, 0xc6, 0x72, 0x26, 0x01, 0x01, 0x28, 0x0c, 0xc2, 0x4d, 0xd4, 0x0a, + 0xc2, 0x4d, 0xe0, 0x15, 0xc2, 0x4d, 0xec, 0x4b, 0x92, 0x75, 0xc2, 0x4e, + 0x00, 0x03, 0xc2, 0x4e, 0x18, 0x16, 0xc2, 0x4e, 0x2e, 0x49, 0xab, 0xf4, + 0xc2, 0x4e, 0x3c, 0x4a, 0x60, 0x7b, 0xc2, 0x4e, 0x70, 0x0d, 0xc2, 0x4e, + 0xa4, 0x49, 0x0d, 0xff, 0xc2, 0x4e, 0xb0, 0x13, 0xc2, 0x4e, 0xd2, 0x49, + 0xb1, 0x0d, 0xc2, 0x4e, 0xdc, 0x04, 0xc2, 0x4f, 0x00, 0x14, 0xc2, 0x4f, + 0x0c, 0x0f, 0xc2, 0x4f, 0x16, 0x4e, 0x74, 0x6a, 0xc2, 0x4f, 0x22, 0x49, + 0xb2, 0x00, 0xc2, 0x4f, 0x2c, 0x56, 0x2b, 0xaa, 0xc2, 0x4f, 0x56, 0xd6, + 0x30, 0xd2, 0x07, 0xef, 0xc0, 0x4d, 0x7f, 0x8d, 0xc2, 0x4f, 0x5c, 0x45, + 0x02, 0x10, 0x42, 0x4f, 0x68, 0x4a, 0x9a, 0xea, 0xc2, 0x4f, 0xe9, 0xcc, + 0x27, 0x7f, 0x00, 0x46, 0x88, 0xd4, 0x39, 0xf8, 0x00, 0x47, 0xf9, 0xcb, + 0x3a, 0x01, 0x00, 0x32, 0xc0, 0xc7, 0xc3, 0xca, 0x00, 0x44, 0xe1, 0xc7, + 0x2b, 0x4a, 0x00, 0x32, 0x98, 0x06, 0xc2, 0x4f, 0xfb, 0x03, 0xc2, 0x50, + 0x03, 0xc3, 0x85, 0xf5, 0x0f, 0x70, 0x09, 0xc4, 0x30, 0xc1, 0x0f, 0x70, + 0x11, 0xc3, 0x7e, 0x89, 0x0f, 0x70, 0x29, 0x42, 0x02, 0x1c, 0xc2, 0x50, + 0x0f, 0xc3, 0x14, 0x4b, 0x0f, 0x70, 0x39, 0x16, 0xc2, 0x50, 0x19, 0xc3, + 0x2b, 0xb9, 0x0f, 0x70, 0x49, 0x0d, 0xc2, 0x50, 0x27, 0x0e, 0xc2, 0x50, + 0x33, 0xc4, 0x19, 0x60, 0x0f, 0x70, 0x61, 0xc4, 0x3a, 0x01, 0x0f, 0x70, + 0x69, 0x15, 0xc2, 0x50, 0x3f, 0xc3, 0x0f, 0x9a, 0x0f, 0x70, 0x91, 0xc3, + 0x72, 0xf0, 0x0f, 0x70, 0x99, 0x48, 0x10, 0xb4, 0xc2, 0x50, 0x57, 0x49, + 0x18, 0x67, 0xc2, 0x50, 0xa9, 0xc3, 0xb1, 0x0d, 0x0f, 0x70, 0x81, 0xc5, + 0x92, 0x75, 0x0f, 0x70, 0xd8, 0xc3, 0x0a, 0x8c, 0x00, 0x32, 0x7b, 0x02, + 0x50, 0xb5, 0xcc, 0x85, 0x29, 0x00, 0x30, 0x68, 0xd6, 0x2f, 0x9e, 0x00, + 0x47, 0xdb, 0x02, 0x50, 0xc2, 0xc7, 0xc0, 0x51, 0x00, 0x44, 0xf0, 0xc5, + 0x00, 0xd4, 0x00, 0x47, 0xc3, 0x02, 0x50, 0xc8, 0xc5, 0x05, 0x02, 0x00, + 0x47, 0xd0, 0xce, 0x71, 0x14, 0x00, 0x44, 0x41, 0x9b, 0x00, 0x30, 0x40, + 0xe0, 0x08, 0xc7, 0x00, 0x37, 0x60, 0xce, 0x6d, 0xe8, 0x00, 0x47, 0xb1, + 0xcd, 0x00, 0xfa, 0x07, 0xf3, 0xd1, 0xcb, 0x64, 0x7b, 0x07, 0xf3, 0xd8, + 0xce, 0x00, 0xf9, 0x07, 0xf3, 0xa0, 0x00, 0xc2, 0x50, 0xce, 0xc3, 0x13, + 0x00, 0x00, 0x32, 0x5a, 0x02, 0x50, 0xe0, 0x45, 0x08, 0xcb, 0xc2, 0x50, + 0xe6, 0x44, 0x05, 0x36, 0xc2, 0x51, 0x3a, 0x42, 0x00, 0x87, 0xc2, 0x51, + 0x50, 0xc3, 0x2b, 0xb9, 0x00, 0x37, 0x31, 0xc3, 0x7e, 0x89, 0x00, 0x37, + 0x29, 0xc5, 0x4d, 0x40, 0x00, 0x30, 0xd1, 0xc5, 0x52, 0x4a, 0x00, 0x30, + 0xc8, 0xc3, 0x2d, 0x2c, 0x00, 0x32, 0x93, 0x02, 0x51, 0x5c, 0xd8, 0x22, + 0xeb, 0x00, 0x44, 0xe9, 0xcc, 0x86, 0x9d, 0x00, 0x32, 0xb0, 0x4a, 0xa3, + 0xf0, 0xc2, 0x51, 0x60, 0xc4, 0x00, 0x9d, 0x07, 0xdd, 0xf9, 0x16, 0xc2, + 0x51, 0x6c, 0x42, 0x00, 0x58, 0xc2, 0x51, 0x78, 0x4a, 0x3b, 0x79, 0xc2, + 0x51, 0x84, 0xcb, 0x8f, 0x7e, 0x07, 0xde, 0x10, 0x15, 0xc2, 0x51, 0x90, + 0xc9, 0xac, 0x0f, 0x00, 0x30, 0xa1, 0x42, 0x00, 0x39, 0xc2, 0x51, 0x9a, + 0xcf, 0x6b, 0x70, 0x00, 0x30, 0x89, 0xc5, 0xda, 0xc4, 0x00, 0x30, 0x78, + 0x00, 0x42, 0x51, 0xa6, 0x45, 0xd9, 0x57, 0xc2, 0x51, 0xb2, 0x49, 0x04, + 0xf9, 0xc2, 0x51, 0xbe, 0x48, 0x05, 0x14, 0x42, 0x51, 0xca, 0xc5, 0x19, + 0x75, 0x00, 0x32, 0x03, 0x02, 0x51, 0xd6, 0xcb, 0x92, 0xee, 0x07, 0xf3, + 0x98, 0xc5, 0x4d, 0x40, 0x00, 0x47, 0x33, 0x02, 0x51, 0xdc, 0xc5, 0x52, + 0x4a, 0x00, 0x47, 0x2b, 0x02, 0x51, 0xe2, 0xc5, 0x63, 0x73, 0x00, 0x47, + 0x22, 0x02, 0x51, 0xe8, 0xc5, 0x00, 0xd4, 0x00, 0x32, 0xa1, 0xc5, 0x05, + 0x02, 0x00, 0x32, 0xa8, 0xce, 0x74, 0x5c, 0x00, 0x44, 0x81, 0xcf, 0x65, + 0xee, 0x00, 0x30, 0x70, 0xc9, 0x0e, 0x6e, 0x00, 0x32, 0xe1, 0xd6, 0x31, + 0x6c, 0x00, 0x32, 0xd9, 0xcd, 0x31, 0x75, 0x00, 0x32, 0xd0, 0xc9, 0x08, + 0xcb, 0x00, 0x37, 0x59, 0xc8, 0xb9, 0x12, 0x00, 0x37, 0x50, 0xc4, 0x44, + 0x78, 0x00, 0x36, 0xe9, 0xc9, 0x5c, 0xe9, 0x00, 0x30, 0xe8, 0xc4, 0x18, + 0x10, 0x00, 0x33, 0x39, 0xc2, 0x22, 0xcc, 0x00, 0x33, 0x30, 0xc3, 0x0d, + 0x14, 0x00, 0x33, 0x29, 0xc3, 0x09, 0x9e, 0x00, 0x33, 0x20, 0xc4, 0x02, + 0xde, 0x00, 0x33, 0x19, 0xc2, 0x02, 0xa0, 0x00, 0x33, 0x10, 0xc3, 0xe6, + 0x1a, 0x07, 0xd8, 0xb9, 0xc3, 0x03, 0x0d, 0x07, 0xd8, 0xa9, 0xc3, 0x5f, + 0x44, 0x07, 0xd8, 0xa1, 0xc3, 0x2a, 0x91, 0x07, 0xd8, 0x98, 0xcc, 0x23, + 0x3f, 0x00, 0x2c, 0x41, 0xc2, 0x01, 0x48, 0x00, 0x2c, 0x10, 0x8a, 0x00, + 0x2c, 0x21, 0x90, 0x00, 0x2b, 0x78, 0xc3, 0xe5, 0xc0, 0x00, 0x2c, 0x19, + 0xc2, 0x16, 0x1c, 0x00, 0x2b, 0xd0, 0x91, 0x00, 0x2c, 0x09, 0x0a, 0xc2, + 0x51, 0xee, 0x83, 0x00, 0x2b, 0x70, 0xc2, 0x16, 0x1c, 0x00, 0x2c, 0x01, + 0x83, 0x00, 0x2b, 0xe0, 0xc3, 0xb8, 0x27, 0x00, 0x2b, 0xf9, 0x91, 0x00, + 0x2b, 0x49, 0xc9, 0xb0, 0x47, 0x00, 0x2b, 0x00, 0xc2, 0x04, 0xe6, 0x00, + 0x2b, 0xf1, 0x91, 0x00, 0x2b, 0xc0, 0xc2, 0x16, 0x1c, 0x00, 0x2b, 0xe9, + 0xc2, 0x00, 0xd0, 0x00, 0x2b, 0xb8, 0xc3, 0x64, 0x77, 0x00, 0x2b, 0xd9, + 0x83, 0x00, 0x2b, 0x88, 0xc3, 0x01, 0xe3, 0x00, 0x2b, 0x91, 0xc2, 0x03, + 0x4e, 0x00, 0x2b, 0x18, 0xc2, 0x01, 0x7f, 0x00, 0x2b, 0x51, 0x83, 0x00, + 0x2b, 0x30, 0x96, 0x00, 0x2b, 0x41, 0x8a, 0x00, 0x2b, 0x39, 0xc2, 0x11, + 0xee, 0x00, 0x2b, 0x28, 0x8a, 0x00, 0x2a, 0xa1, 0x90, 0x00, 0x29, 0xf8, + 0xc3, 0xe5, 0xc0, 0x00, 0x2a, 0x99, 0xc2, 0x16, 0x1c, 0x00, 0x2a, 0x50, + 0xc2, 0x01, 0x48, 0x00, 0x2a, 0x90, 0x91, 0x00, 0x2a, 0x89, 0x0a, 0xc2, + 0x51, 0xf8, 0x83, 0x00, 0x29, 0xf0, 0xc2, 0x16, 0x1c, 0x00, 0x2a, 0x81, + 0x83, 0x00, 0x2a, 0x60, 0xc3, 0xb8, 0x27, 0x00, 0x2a, 0x79, 0x91, 0x00, + 0x29, 0xc8, 0xc2, 0x04, 0xe6, 0x00, 0x2a, 0x71, 0x91, 0x00, 0x2a, 0x40, + 0xc2, 0x16, 0x1c, 0x00, 0x2a, 0x69, 0xc2, 0x00, 0xd0, 0x00, 0x2a, 0x38, + 0xc3, 0x64, 0x77, 0x00, 0x2a, 0x59, 0x83, 0x00, 0x2a, 0x08, 0xc3, 0x01, + 0xe3, 0x00, 0x2a, 0x11, 0xc2, 0x03, 0x4e, 0x00, 0x29, 0x98, 0xc2, 0x01, + 0x7f, 0x00, 0x29, 0xd1, 0x83, 0x00, 0x29, 0xb0, 0x96, 0x00, 0x29, 0xc1, + 0x8a, 0x00, 0x29, 0xb9, 0xc2, 0x11, 0xee, 0x00, 0x29, 0xa8, 0xc4, 0x14, + 0x74, 0x0f, 0x48, 0x09, 0xc2, 0x00, 0xd0, 0x0f, 0x48, 0x68, 0x83, 0x0f, + 0x48, 0x21, 0xc2, 0x01, 0x7f, 0x0f, 0x48, 0x38, 0xc9, 0xaf, 0x27, 0x0f, + 0x48, 0x29, 0xc2, 0x00, 0xd0, 0x0f, 0x49, 0x08, 0xc2, 0x01, 0x7f, 0x0f, + 0x48, 0x71, 0x83, 0x0f, 0x48, 0x90, 0xc2, 0x05, 0x1d, 0x0f, 0x48, 0x81, + 0xc2, 0x19, 0x2c, 0x0f, 0x48, 0xc9, 0xc2, 0x00, 0xd0, 0x0f, 0x48, 0xd8, + 0xc2, 0x0f, 0x9b, 0x0f, 0x48, 0x89, 0xc2, 0x00, 0xd0, 0x0f, 0x48, 0xe9, + 0xc2, 0x01, 0x53, 0x0f, 0x49, 0x00, 0x83, 0x0f, 0x48, 0xc1, 0xc2, 0x00, + 0x51, 0x0f, 0x48, 0xf0, 0x9f, 0x0f, 0xba, 0x19, 0xa0, 0x0f, 0xba, 0x20, + 0x02, 0x42, 0x52, 0x02, 0xc4, 0x1a, 0x05, 0x0f, 0xb8, 0xf1, 0xc6, 0x4c, + 0x49, 0x0f, 0xb9, 0x1a, 0x02, 0x52, 0x12, 0xc2, 0xe5, 0xfd, 0x0f, 0xbb, + 0x10, 0xc8, 0xb8, 0x5a, 0x0f, 0xba, 0xd0, 0x02, 0xc2, 0x52, 0x18, 0x44, + 0x00, 0x54, 0x42, 0x52, 0x24, 0xc2, 0xe5, 0xfd, 0x0f, 0xb9, 0xe0, 0xcc, + 0x8c, 0x85, 0x0f, 0xb9, 0x79, 0x02, 0x42, 0x52, 0x33, 0xc2, 0xe5, 0xfd, + 0x0f, 0xb8, 0xb8, 0x45, 0x3c, 0x54, 0xc2, 0x52, 0x3b, 0xc3, 0x00, 0x44, + 0x0f, 0xba, 0xf0, 0x44, 0x00, 0x54, 0x42, 0x52, 0x4d, 0xc2, 0xe5, 0xfd, + 0x0f, 0xba, 0xe8, 0xc5, 0xdd, 0x80, 0x0f, 0xb8, 0x43, 0x02, 0x52, 0x59, + 0xc5, 0xd7, 0x09, 0x0f, 0xb8, 0x32, 0x02, 0x52, 0x5f, 0xc2, 0xe5, 0xfd, + 0x0f, 0xb9, 0xb8, 0xa0, 0x0f, 0xb8, 0x91, 0x9f, 0x0f, 0xb8, 0x88, 0x9f, + 0x0a, 0x21, 0xd1, 0x9e, 0x0a, 0x21, 0xc9, 0x9d, 0x0a, 0x21, 0xc1, 0xa0, + 0x0a, 0x21, 0xd9, 0xa1, 0x0a, 0x21, 0xe1, 0xa2, 0x0a, 0x21, 0xe9, 0xa3, + 0x0a, 0x21, 0xf1, 0xa4, 0x0a, 0x21, 0xf9, 0xa5, 0x0a, 0x22, 0x01, 0xa6, + 0x0a, 0x22, 0x08, 0xa6, 0x0a, 0x21, 0xb9, 0xa5, 0x0a, 0x21, 0xb1, 0xa4, + 0x0a, 0x21, 0xa9, 0xa3, 0x0a, 0x21, 0x93, 0x02, 0x52, 0x65, 0xa2, 0x0a, + 0x21, 0x83, 0x02, 0x52, 0x6d, 0xa1, 0x0a, 0x21, 0x79, 0xa0, 0x0a, 0x21, + 0x71, 0x9f, 0x0a, 0x21, 0x69, 0x9e, 0x0a, 0x21, 0x5b, 0x02, 0x52, 0x71, + 0x9d, 0x0a, 0x21, 0x50, 0xa6, 0x0a, 0x21, 0x43, 0x02, 0x52, 0x75, 0xa5, + 0x0a, 0x21, 0x39, 0xa4, 0x0a, 0x21, 0x31, 0xa3, 0x0a, 0x21, 0x29, 0xa2, + 0x0a, 0x21, 0x21, 0xa1, 0x0a, 0x21, 0x19, 0xa0, 0x0a, 0x21, 0x11, 0x9f, + 0x0a, 0x21, 0x09, 0x9e, 0x0a, 0x21, 0x01, 0x9d, 0x0a, 0x20, 0xf8, 0xa6, + 0x0a, 0x20, 0xf1, 0xa5, 0x0a, 0x20, 0xe9, 0xa4, 0x0a, 0x20, 0xe1, 0xa3, + 0x0a, 0x20, 0xd3, 0x02, 0x52, 0x79, 0xa2, 0x0a, 0x20, 0xc9, 0xa1, 0x0a, + 0x20, 0xc1, 0xa0, 0x0a, 0x20, 0xb9, 0x9f, 0x0a, 0x20, 0xb1, 0x9e, 0x0a, + 0x20, 0xa9, 0x9d, 0x0a, 0x20, 0xa0, 0xa6, 0x0a, 0x20, 0x99, 0xa5, 0x0a, + 0x20, 0x91, 0xa4, 0x0a, 0x20, 0x89, 0xa3, 0x0a, 0x20, 0x81, 0xa2, 0x0a, + 0x20, 0x79, 0xa1, 0x0a, 0x20, 0x71, 0xa0, 0x0a, 0x20, 0x69, 0x9f, 0x0a, + 0x20, 0x61, 0x9e, 0x0a, 0x20, 0x59, 0x9d, 0x0a, 0x20, 0x4a, 0x02, 0x52, + 0x7d, 0xa6, 0x0a, 0x20, 0x41, 0xa5, 0x0a, 0x20, 0x39, 0xa4, 0x0a, 0x20, + 0x31, 0xa3, 0x0a, 0x20, 0x29, 0xa2, 0x0a, 0x20, 0x21, 0xa1, 0x0a, 0x20, + 0x19, 0xa0, 0x0a, 0x20, 0x11, 0x9f, 0x0a, 0x20, 0x09, 0x9e, 0x0a, 0x20, + 0x00, 0x9d, 0x0a, 0x22, 0x11, 0x9e, 0x0a, 0x22, 0x19, 0x9f, 0x0a, 0x22, + 0x21, 0xa0, 0x0a, 0x22, 0x29, 0xa1, 0x0a, 0x22, 0x31, 0xa2, 0x0a, 0x22, + 0x39, 0xa3, 0x0a, 0x22, 0x43, 0x02, 0x52, 0x81, 0xa4, 0x0a, 0x22, 0x61, + 0xa5, 0x0a, 0x22, 0x69, 0xa6, 0x0a, 0x22, 0x70, 0x9d, 0x0a, 0x22, 0x79, + 0x9e, 0x0a, 0x22, 0x81, 0x9f, 0x0a, 0x22, 0x89, 0xa0, 0x0a, 0x22, 0x91, + 0xa1, 0x0a, 0x22, 0x99, 0xa2, 0x0a, 0x22, 0xa1, 0xa3, 0x0a, 0x22, 0xa9, + 0xa4, 0x0a, 0x22, 0xb1, 0xa5, 0x0a, 0x22, 0xb9, 0xa6, 0x0a, 0x22, 0xc0, + 0x9d, 0x0a, 0x22, 0xc9, 0x9e, 0x0a, 0x22, 0xd1, 0x9f, 0x0a, 0x22, 0xd9, + 0xa0, 0x0a, 0x22, 0xe1, 0xa1, 0x0a, 0x22, 0xe9, 0xa2, 0x0a, 0x22, 0xf1, + 0xa3, 0x0a, 0x22, 0xf9, 0xa4, 0x0a, 0x23, 0x01, 0xa5, 0x0a, 0x23, 0x09, + 0xa6, 0x0a, 0x23, 0x10, 0x9d, 0x0a, 0x23, 0x19, 0x9e, 0x0a, 0x23, 0x21, + 0x9f, 0x0a, 0x23, 0x29, 0xa0, 0x0a, 0x23, 0x31, 0xa1, 0x0a, 0x23, 0x39, + 0xa2, 0x0a, 0x23, 0x41, 0xa3, 0x0a, 0x23, 0x49, 0xa4, 0x0a, 0x23, 0x53, + 0x02, 0x52, 0x8d, 0xa5, 0x0a, 0x23, 0x63, 0x02, 0x52, 0x91, 0xa6, 0x0a, + 0x23, 0x70, 0x9d, 0x0a, 0x23, 0x7b, 0x02, 0x52, 0x95, 0x9e, 0x0a, 0x23, + 0x8b, 0x02, 0x52, 0x99, 0x9f, 0x0a, 0x23, 0x9b, 0x02, 0x52, 0x9d, 0xa0, + 0x0a, 0x23, 0xa9, 0xa1, 0x0a, 0x23, 0xb3, 0x02, 0x52, 0xa1, 0xa2, 0x0a, + 0x23, 0xd3, 0x02, 0x52, 0xad, 0xa3, 0x0a, 0x23, 0xe9, 0xa4, 0x0a, 0x23, + 0xf3, 0x02, 0x52, 0xb5, 0xa5, 0x0a, 0x24, 0x11, 0xa6, 0x0a, 0x24, 0x18, + 0x9d, 0x0a, 0x24, 0x23, 0x02, 0x52, 0xc1, 0x9e, 0x0a, 0x24, 0x39, 0x9f, + 0x0a, 0x24, 0x41, 0xa0, 0x0a, 0x24, 0x49, 0xa1, 0x0a, 0x24, 0x51, 0xa2, + 0x0a, 0x24, 0x5b, 0x02, 0x52, 0xc9, 0xa3, 0x0a, 0x24, 0x69, 0xa4, 0x0a, + 0x24, 0x71, 0xa5, 0x0a, 0x24, 0x79, 0xa6, 0x0a, 0x24, 0x80, 0x9d, 0x0a, + 0x24, 0x89, 0x9e, 0x0a, 0x24, 0x91, 0x9f, 0x0a, 0x24, 0x99, 0xa0, 0x0a, + 0x24, 0xa1, 0xa1, 0x0a, 0x24, 0xa9, 0xa2, 0x0a, 0x24, 0xb3, 0x02, 0x52, + 0xcd, 0xa3, 0x0a, 0x24, 0xc1, 0xa4, 0x0a, 0x24, 0xc9, 0xa5, 0x0a, 0x24, + 0xd1, 0xa6, 0x0a, 0x24, 0xd8, 0x9d, 0x0a, 0x24, 0xe1, 0x9e, 0x0a, 0x24, + 0xe9, 0x9f, 0x0a, 0x24, 0xf1, 0xa0, 0x0a, 0x24, 0xf9, 0xa1, 0x0a, 0x25, + 0x01, 0xa2, 0x0a, 0x25, 0x0b, 0x02, 0x52, 0xd1, 0xa3, 0x0a, 0x25, 0x19, + 0xa4, 0x0a, 0x25, 0x21, 0xa5, 0x0a, 0x25, 0x29, 0xa6, 0x0a, 0x25, 0x30, + 0x9d, 0x0a, 0x25, 0x39, 0x9e, 0x0a, 0x25, 0x41, 0x9f, 0x0a, 0x25, 0x49, + 0xa0, 0x0a, 0x25, 0x51, 0xa1, 0x0a, 0x25, 0x59, 0xa2, 0x0a, 0x25, 0x61, + 0xa3, 0x0a, 0x25, 0x69, 0xa4, 0x0a, 0x25, 0x71, 0xa5, 0x0a, 0x25, 0x79, + 0xa6, 0x0a, 0x25, 0x80, 0x9d, 0x0a, 0x25, 0x89, 0x9e, 0x0a, 0x25, 0x91, + 0x9f, 0x0a, 0x25, 0x99, 0xa0, 0x0a, 0x25, 0xa1, 0xa1, 0x0a, 0x25, 0xa9, + 0xa2, 0x0a, 0x25, 0xb1, 0xa3, 0x0a, 0x25, 0xb9, 0xa4, 0x0a, 0x25, 0xc1, + 0xa5, 0x0a, 0x25, 0xc9, 0xa6, 0x0a, 0x25, 0xd0, 0x9d, 0x0a, 0x25, 0xd9, + 0x9e, 0x0a, 0x25, 0xe1, 0x9f, 0x0a, 0x25, 0xe9, 0xa0, 0x0a, 0x25, 0xf1, + 0xa1, 0x0a, 0x25, 0xf9, 0xa2, 0x0a, 0x26, 0x01, 0xa3, 0x0a, 0x26, 0x09, + 0xa4, 0x0a, 0x26, 0x11, 0xa5, 0x0a, 0x26, 0x19, 0xa6, 0x0a, 0x26, 0x20, + 0x9d, 0x0a, 0x26, 0x29, 0x9e, 0x0a, 0x26, 0x31, 0x9f, 0x0a, 0x26, 0x39, + 0xa0, 0x0a, 0x26, 0x41, 0xa1, 0x0a, 0x26, 0x49, 0xa2, 0x0a, 0x26, 0x51, + 0xa3, 0x0a, 0x26, 0x59, 0xa4, 0x0a, 0x26, 0x61, 0xa5, 0x0a, 0x26, 0x69, + 0xa6, 0x0a, 0x26, 0x70, 0x9d, 0x0a, 0x26, 0x79, 0x9e, 0x0a, 0x26, 0x81, + 0x9f, 0x0a, 0x26, 0x89, 0xa0, 0x0a, 0x26, 0x91, 0xa1, 0x0a, 0x26, 0x99, + 0xa2, 0x0a, 0x26, 0xa1, 0xa3, 0x0a, 0x26, 0xa9, 0xa4, 0x0a, 0x26, 0xb1, + 0xa5, 0x0a, 0x26, 0xb9, 0xa6, 0x0a, 0x26, 0xc0, 0x9d, 0x0a, 0x26, 0xc9, + 0x9e, 0x0a, 0x26, 0xd1, 0x9f, 0x0a, 0x26, 0xd9, 0xa0, 0x0a, 0x26, 0xe1, + 0xa1, 0x0a, 0x26, 0xe9, 0xa2, 0x0a, 0x26, 0xf1, 0xa3, 0x0a, 0x26, 0xf9, + 0xa4, 0x0a, 0x27, 0x01, 0xa5, 0x0a, 0x27, 0x09, 0xa6, 0x0a, 0x27, 0x10, + 0x9d, 0x0a, 0x27, 0x19, 0x9e, 0x0a, 0x27, 0x21, 0x9f, 0x0a, 0x27, 0x2b, + 0x02, 0x52, 0xd5, 0xa0, 0x0a, 0x27, 0x41, 0xa1, 0x0a, 0x27, 0x49, 0xa2, + 0x0a, 0x27, 0x51, 0xa3, 0x0a, 0x27, 0x59, 0xa4, 0x0a, 0x27, 0x63, 0x02, + 0x52, 0xdd, 0xa5, 0x0a, 0x27, 0x71, 0xa6, 0x0a, 0x27, 0x7a, 0x02, 0x52, + 0xe1, 0x9d, 0x0a, 0x27, 0x89, 0x9e, 0x0a, 0x27, 0x91, 0x9f, 0x0a, 0x27, + 0x99, 0xa0, 0x0a, 0x27, 0xa1, 0xa1, 0x0a, 0x27, 0xa9, 0xa2, 0x0a, 0x27, + 0xb3, 0x02, 0x52, 0xe5, 0xa3, 0x0a, 0x27, 0xc3, 0x02, 0x52, 0xe9, 0xa4, + 0x0a, 0x27, 0xd1, 0xa5, 0x0a, 0x27, 0xd9, 0xa6, 0x0a, 0x27, 0xe0, 0x9d, + 0x0a, 0x27, 0xe9, 0x9e, 0x0a, 0x27, 0xf1, 0x9f, 0x0a, 0x27, 0xf9, 0xa0, + 0x0a, 0x28, 0x01, 0xa1, 0x0a, 0x28, 0x09, 0xa2, 0x0a, 0x28, 0x11, 0xa3, + 0x0a, 0x28, 0x19, 0xa4, 0x0a, 0x28, 0x23, 0x02, 0x52, 0xed, 0xa5, 0x0a, + 0x28, 0x31, 0xa6, 0x0a, 0x28, 0x38, 0x9d, 0x0a, 0x28, 0x41, 0x9e, 0x0a, + 0x28, 0x49, 0x9f, 0x0a, 0x28, 0x51, 0xa0, 0x0a, 0x28, 0x59, 0xa1, 0x0a, + 0x28, 0x61, 0xa2, 0x0a, 0x28, 0x69, 0xa3, 0x0a, 0x28, 0x71, 0xa4, 0x0a, + 0x28, 0x79, 0xa5, 0x0a, 0x28, 0x81, 0xa6, 0x0a, 0x28, 0x88, 0x9d, 0x0a, + 0x28, 0x91, 0x9e, 0x0a, 0x28, 0x99, 0x9f, 0x0a, 0x28, 0xa1, 0xa0, 0x0a, + 0x28, 0xa9, 0xa1, 0x0a, 0x28, 0xb1, 0xa2, 0x0a, 0x28, 0xb9, 0xa3, 0x0a, + 0x28, 0xc1, 0xa4, 0x0a, 0x28, 0xc9, 0xa5, 0x0a, 0x28, 0xd1, 0xa6, 0x0a, + 0x28, 0xd8, 0x9d, 0x0a, 0x28, 0xe1, 0x9e, 0x0a, 0x28, 0xe9, 0x9f, 0x0a, + 0x28, 0xf1, 0xa0, 0x0a, 0x28, 0xf9, 0xa1, 0x0a, 0x29, 0x01, 0xa2, 0x0a, + 0x29, 0x09, 0xa3, 0x0a, 0x29, 0x11, 0xa4, 0x0a, 0x29, 0x19, 0xa5, 0x0a, + 0x29, 0x21, 0xa6, 0x0a, 0x29, 0x28, 0x9d, 0x0a, 0x29, 0x31, 0x9e, 0x0a, + 0x29, 0x39, 0x9f, 0x0a, 0x29, 0x41, 0xa0, 0x0a, 0x29, 0x49, 0xa1, 0x0a, + 0x29, 0x51, 0xa2, 0x0a, 0x29, 0x59, 0xa3, 0x0a, 0x29, 0x61, 0xa4, 0x0a, + 0x29, 0x6b, 0x02, 0x52, 0xf1, 0xa5, 0x0a, 0x29, 0x79, 0xa6, 0x0a, 0x29, + 0x80, 0x9d, 0x0a, 0x29, 0x89, 0x9e, 0x0a, 0x29, 0x91, 0x9f, 0x0a, 0x29, + 0x99, 0xa0, 0x0a, 0x29, 0xa1, 0xa1, 0x0a, 0x29, 0xa9, 0xa2, 0x0a, 0x29, + 0xb1, 0xa3, 0x0a, 0x29, 0xb9, 0xa4, 0x0a, 0x29, 0xc1, 0xa5, 0x0a, 0x29, + 0xc9, 0xa6, 0x0a, 0x29, 0xd0, 0x9d, 0x0a, 0x29, 0xd9, 0x9e, 0x0a, 0x29, + 0xe1, 0x9f, 0x0a, 0x29, 0xe9, 0xa0, 0x0a, 0x29, 0xf1, 0xa1, 0x0a, 0x29, + 0xf9, 0xa2, 0x0a, 0x2a, 0x01, 0xa3, 0x0a, 0x2a, 0x09, 0xa4, 0x0a, 0x2a, + 0x11, 0xa5, 0x0a, 0x2a, 0x19, 0xa6, 0x0a, 0x2a, 0x22, 0x02, 0x52, 0xf5, + 0x9d, 0x0a, 0x2a, 0x31, 0x9e, 0x0a, 0x2a, 0x39, 0x9f, 0x0a, 0x2a, 0x41, + 0xa0, 0x0a, 0x2a, 0x49, 0xa1, 0x0a, 0x2a, 0x53, 0x02, 0x52, 0xf9, 0xa2, + 0x0a, 0x2a, 0x61, 0xa3, 0x0a, 0x2a, 0x69, 0xa4, 0x0a, 0x2a, 0x71, 0xa5, + 0x0a, 0x2a, 0x79, 0xa6, 0x0a, 0x2a, 0x82, 0x02, 0x52, 0xfd, 0x9d, 0x0a, + 0x2a, 0x91, 0x9e, 0x0a, 0x2a, 0x99, 0x9f, 0x0a, 0x2a, 0xa1, 0xa0, 0x0a, + 0x2a, 0xa9, 0xa1, 0x0a, 0x2a, 0xb1, 0xa2, 0x0a, 0x2a, 0xb9, 0xa3, 0x0a, + 0x2a, 0xc1, 0xa4, 0x0a, 0x2a, 0xc9, 0xa5, 0x0a, 0x2a, 0xd1, 0xa6, 0x0a, + 0x2a, 0xda, 0x02, 0x53, 0x01, 0x9d, 0x0a, 0x2a, 0xe9, 0x9e, 0x0a, 0x2a, + 0xf1, 0x9f, 0x0a, 0x2a, 0xf9, 0xa0, 0x0a, 0x2b, 0x01, 0xa1, 0x0a, 0x2b, + 0x09, 0xa2, 0x0a, 0x2b, 0x11, 0xa3, 0x0a, 0x2b, 0x19, 0xa4, 0x0a, 0x2b, + 0x21, 0xa5, 0x0a, 0x2b, 0x29, 0xa6, 0x0a, 0x2b, 0x30, 0x9d, 0x0a, 0x2b, + 0x39, 0x9e, 0x0a, 0x2b, 0x41, 0x9f, 0x0a, 0x2b, 0x49, 0xa0, 0x0a, 0x2b, + 0x51, 0xa1, 0x0a, 0x2b, 0x59, 0xa2, 0x0a, 0x2b, 0x61, 0xa3, 0x0a, 0x2b, + 0x69, 0xa4, 0x0a, 0x2b, 0x71, 0xa5, 0x0a, 0x2b, 0x79, 0xa6, 0x0a, 0x2b, + 0x82, 0x02, 0x53, 0x05, 0x9d, 0x0a, 0x2b, 0x91, 0x9e, 0x0a, 0x2b, 0x99, + 0x1f, 0xc2, 0x53, 0x09, 0xa0, 0x0a, 0x2b, 0xb9, 0xa1, 0x0a, 0x2b, 0xc1, + 0xa2, 0x0a, 0x2b, 0xc9, 0xa3, 0x0a, 0x2b, 0xd3, 0x02, 0x53, 0x15, 0xa4, + 0x0a, 0x2b, 0xf1, 0xa5, 0x0a, 0x2b, 0xf9, 0xa6, 0x0a, 0x2c, 0x00, 0x9d, + 0x0a, 0x2c, 0x09, 0x9e, 0x0a, 0x2c, 0x11, 0x9f, 0x0a, 0x2c, 0x19, 0xa0, + 0x0a, 0x2c, 0x21, 0xa1, 0x0a, 0x2c, 0x29, 0xa2, 0x0a, 0x2c, 0x31, 0xa3, + 0x0a, 0x2c, 0x39, 0xa4, 0x0a, 0x2c, 0x41, 0xa5, 0x0a, 0x2c, 0x49, 0xa6, + 0x0a, 0x2c, 0x50, 0x9d, 0x0a, 0x2c, 0x59, 0x9e, 0x0a, 0x2c, 0x61, 0x9f, + 0x0a, 0x2c, 0x69, 0xa0, 0x0a, 0x2c, 0x71, 0xa1, 0x0a, 0x2c, 0x79, 0xa2, + 0x0a, 0x2c, 0x81, 0xa3, 0x0a, 0x2c, 0x89, 0xa4, 0x0a, 0x2c, 0x91, 0xa5, + 0x0a, 0x2c, 0x99, 0xa6, 0x0a, 0x2c, 0xa2, 0x02, 0x53, 0x21, 0x9d, 0x0a, + 0x2c, 0xb1, 0x9e, 0x0a, 0x2c, 0xb9, 0x9f, 0x0a, 0x2c, 0xc1, 0xa0, 0x0a, + 0x2c, 0xc9, 0xa1, 0x0a, 0x2c, 0xd3, 0x02, 0x53, 0x25, 0xa2, 0x0a, 0x2c, + 0xe1, 0xa3, 0x0a, 0x2c, 0xe9, 0xa4, 0x0a, 0x2c, 0xf1, 0xa5, 0x0a, 0x2c, + 0xfb, 0x02, 0x53, 0x29, 0xa6, 0x0a, 0x2d, 0x08, 0x9d, 0x0a, 0x2d, 0x11, + 0x9e, 0x0a, 0x2d, 0x1b, 0x02, 0x53, 0x2d, 0x9f, 0x0a, 0x2d, 0x29, 0xa0, + 0x0a, 0x2d, 0x31, 0xa1, 0x0a, 0x2d, 0x39, 0xa2, 0x0a, 0x2d, 0x41, 0xa3, + 0x0a, 0x2d, 0x49, 0xa4, 0x0a, 0x2d, 0x51, 0xa5, 0x0a, 0x2d, 0x59, 0xa6, + 0x0a, 0x2d, 0x60, 0x9d, 0x0a, 0x2d, 0x69, 0x9e, 0x0a, 0x2d, 0x73, 0x02, + 0x53, 0x31, 0x9f, 0x0a, 0x2d, 0x81, 0x20, 0xc2, 0x53, 0x35, 0xa1, 0x0a, + 0x2d, 0x99, 0xa2, 0x0a, 0x2d, 0xa1, 0xa3, 0x0a, 0x2d, 0xab, 0x02, 0x53, + 0x3f, 0xa4, 0x0a, 0x2d, 0xb9, 0xa5, 0x0a, 0x2d, 0xc1, 0xa6, 0x0a, 0x2d, + 0xc8, 0x9d, 0x0a, 0x2d, 0xd1, 0x9e, 0x0a, 0x2d, 0xd9, 0x9f, 0x0a, 0x2d, + 0xe1, 0xc7, 0xc6, 0xa9, 0x0a, 0x2d, 0xe9, 0xa1, 0x0a, 0x2d, 0xf1, 0xa2, + 0x0a, 0x2d, 0xf9, 0xa3, 0x0a, 0x2e, 0x01, 0xa4, 0x0a, 0x2e, 0x09, 0xa5, + 0x0a, 0x2e, 0x11, 0xa6, 0x0a, 0x2e, 0x18, 0x9d, 0x0a, 0x2e, 0x21, 0x9e, + 0x0a, 0x2e, 0x29, 0x9f, 0x0a, 0x2e, 0x31, 0xa0, 0x0a, 0x2e, 0x39, 0xa1, + 0x0a, 0x2e, 0x41, 0xa2, 0x0a, 0x2e, 0x49, 0xa3, 0x0a, 0x2e, 0x51, 0xa4, + 0x0a, 0x2e, 0x59, 0xa5, 0x0a, 0x2e, 0x61, 0xa6, 0x0a, 0x2e, 0x68, 0x1d, + 0xc2, 0x53, 0x43, 0x9e, 0x0a, 0x2e, 0x81, 0x9f, 0x0a, 0x2e, 0x89, 0xa0, + 0x0a, 0x2e, 0x91, 0xa1, 0x0a, 0x2e, 0x99, 0xa2, 0x0a, 0x2e, 0xa1, 0xa3, + 0x0a, 0x2e, 0xa9, 0xa4, 0x0a, 0x2e, 0xb1, 0xa5, 0x0a, 0x2e, 0xb9, 0xa6, + 0x0a, 0x2e, 0xc0, 0x9d, 0x0a, 0x2e, 0xc9, 0x9e, 0x0a, 0x2e, 0xd1, 0x9f, + 0x0a, 0x2e, 0xd9, 0xa0, 0x0a, 0x2e, 0xe1, 0xa1, 0x0a, 0x2e, 0xe9, 0xa2, + 0x0a, 0x2e, 0xf1, 0xa3, 0x0a, 0x2e, 0xf9, 0xa4, 0x0a, 0x2f, 0x01, 0xa5, + 0x0a, 0x2f, 0x09, 0xa6, 0x0a, 0x2f, 0x10, 0x9d, 0x0a, 0x2f, 0x19, 0x9e, + 0x0a, 0x2f, 0x21, 0x9f, 0x0a, 0x2f, 0x29, 0xa0, 0x0a, 0x2f, 0x31, 0xa1, + 0x0a, 0x2f, 0x39, 0xa2, 0x0a, 0x2f, 0x41, 0xa3, 0x0a, 0x2f, 0x49, 0xa4, + 0x0a, 0x2f, 0x51, 0xa5, 0x0a, 0x2f, 0x59, 0xa6, 0x0a, 0x2f, 0x60, 0x9d, + 0x0a, 0x2f, 0x69, 0x9e, 0x0a, 0x2f, 0x71, 0x9f, 0x0a, 0x2f, 0x79, 0xa0, + 0x0a, 0x2f, 0x81, 0xa1, 0x0a, 0x2f, 0x89, 0xa2, 0x0a, 0x2f, 0x91, 0xa3, + 0x0a, 0x2f, 0x99, 0xa4, 0x0a, 0x2f, 0xa1, 0xa5, 0x0a, 0x2f, 0xa9, 0xa6, + 0x0a, 0x2f, 0xb0, 0x9d, 0x0a, 0x2f, 0xbb, 0x02, 0x53, 0x4f, 0x9e, 0x0a, + 0x2f, 0xc9, 0x9f, 0x0a, 0x2f, 0xd1, 0xa0, 0x0a, 0x2f, 0xd9, 0xa1, 0x0a, + 0x2f, 0xe1, 0xa2, 0x0a, 0x2f, 0xe9, 0xa3, 0x0a, 0x2f, 0xf1, 0xa4, 0x0a, + 0x2f, 0xfb, 0x02, 0x53, 0x53, 0xa5, 0x0a, 0x30, 0x09, 0xa6, 0x0a, 0x30, + 0x10, 0x9d, 0x0a, 0x30, 0x19, 0x9e, 0x0a, 0x30, 0x21, 0x9f, 0x0a, 0x30, + 0x29, 0xa0, 0x0a, 0x30, 0x31, 0xa1, 0x0a, 0x30, 0x39, 0xa2, 0x0a, 0x30, + 0x41, 0xa3, 0x0a, 0x30, 0x49, 0xa4, 0x0a, 0x30, 0x51, 0xa5, 0x0a, 0x30, + 0x59, 0xa6, 0x0a, 0x30, 0x60, 0x9d, 0x0a, 0x30, 0x69, 0x9e, 0x0a, 0x30, + 0x71, 0x9f, 0x0a, 0x30, 0x79, 0xa0, 0x0a, 0x30, 0x81, 0xa1, 0x0a, 0x30, + 0x89, 0xa2, 0x0a, 0x30, 0x91, 0xa3, 0x0a, 0x30, 0x99, 0xa4, 0x0a, 0x30, + 0xa1, 0xa5, 0x0a, 0x30, 0xa9, 0xa6, 0x0a, 0x30, 0xb0, 0x9d, 0x0a, 0x30, + 0xb9, 0x9e, 0x0a, 0x30, 0xc1, 0x9f, 0x0a, 0x30, 0xc9, 0xa0, 0x0a, 0x30, + 0xd1, 0xa1, 0x0a, 0x30, 0xd9, 0xa2, 0x0a, 0x30, 0xe1, 0xa3, 0x0a, 0x30, + 0xe9, 0xa4, 0x0a, 0x30, 0xf1, 0xa5, 0x0a, 0x30, 0xf9, 0xa6, 0x0a, 0x31, + 0x00, 0x9d, 0x0a, 0x31, 0x09, 0x9e, 0x0a, 0x31, 0x11, 0x9f, 0x0a, 0x31, + 0x19, 0xa0, 0x0a, 0x31, 0x21, 0xa1, 0x0a, 0x31, 0x29, 0xa2, 0x0a, 0x31, + 0x31, 0xa3, 0x0a, 0x31, 0x39, 0xa4, 0x0a, 0x31, 0x40, 0x9e, 0x0a, 0x31, + 0x49, 0x9f, 0x0a, 0x31, 0x51, 0xa0, 0x0a, 0x31, 0x59, 0xa1, 0x0a, 0x31, + 0x61, 0xa2, 0x0a, 0x31, 0x69, 0xa3, 0x0a, 0x31, 0x71, 0xa4, 0x0a, 0x31, + 0x79, 0xa5, 0x0a, 0x31, 0x81, 0xa6, 0x0a, 0x31, 0x88, 0x9d, 0x0a, 0x31, + 0x91, 0x9e, 0x0a, 0x31, 0x99, 0x9f, 0x0a, 0x31, 0xa1, 0xa0, 0x0a, 0x31, + 0xa9, 0xa1, 0x0a, 0x31, 0xb1, 0xa2, 0x0a, 0x31, 0xb9, 0xa3, 0x0a, 0x31, + 0xc1, 0xa4, 0x0a, 0x31, 0xc9, 0xa5, 0x0a, 0x31, 0xd1, 0xa6, 0x0a, 0x31, + 0xd8, 0x9d, 0x0a, 0x31, 0xe1, 0x9e, 0x0a, 0x31, 0xe9, 0x9f, 0x0a, 0x31, + 0xf1, 0xa0, 0x0a, 0x31, 0xf9, 0xa1, 0x0a, 0x32, 0x01, 0xa2, 0x0a, 0x32, + 0x09, 0xa3, 0x0a, 0x32, 0x11, 0xa4, 0x0a, 0x32, 0x19, 0xa5, 0x0a, 0x32, + 0x21, 0xa6, 0x0a, 0x32, 0x28, 0xd1, 0x05, 0x75, 0x01, 0x5b, 0x79, 0xd4, + 0x3e, 0x1c, 0x01, 0x5c, 0x61, 0xd5, 0x36, 0x9b, 0x01, 0x5c, 0x69, 0xd3, + 0x44, 0xa2, 0x01, 0x5c, 0x71, 0xd2, 0x47, 0x93, 0x01, 0x5c, 0x78, 0xc8, + 0x2c, 0xb2, 0x01, 0x1b, 0x81, 0xc9, 0x24, 0x47, 0x01, 0x1b, 0x79, 0x05, + 0xc2, 0x53, 0x57, 0x06, 0xc2, 0x53, 0x63, 0x42, 0x02, 0xae, 0xc2, 0x53, + 0x76, 0xd0, 0x03, 0xb7, 0x01, 0x1a, 0x41, 0x42, 0x00, 0x49, 0xc2, 0x53, + 0x82, 0xcc, 0x07, 0xc7, 0x01, 0x1a, 0x21, 0xc9, 0x02, 0xfe, 0x01, 0x1a, + 0x11, 0xc5, 0x03, 0x02, 0x01, 0x1a, 0x09, 0xc3, 0xba, 0x27, 0x01, 0x19, + 0xd9, 0xc5, 0x00, 0xe2, 0x01, 0x19, 0xc0, 0xc9, 0x20, 0xa8, 0x01, 0x1b, + 0x09, 0xc3, 0xba, 0x27, 0x01, 0x1a, 0xa9, 0xc7, 0x80, 0x70, 0x01, 0x1a, + 0x88, 0xcb, 0x95, 0xf0, 0x01, 0x1b, 0x89, 0xca, 0x94, 0xf4, 0x01, 0x1b, + 0x31, 0x45, 0x9a, 0x3d, 0x42, 0x53, 0x8e, 0xc5, 0x1e, 0xc8, 0x01, 0x1b, + 0x59, 0xc9, 0x20, 0xa8, 0x01, 0x1b, 0x11, 0xc5, 0x05, 0xa2, 0x01, 0x1a, + 0x90, 0xc8, 0x52, 0x09, 0x01, 0x1a, 0xc9, 0xc5, 0x05, 0xa2, 0x01, 0x1a, + 0x58, 0xc2, 0x00, 0xb1, 0x01, 0x1a, 0xf9, 0xc3, 0x05, 0xa4, 0x01, 0x19, + 0xe8, 0xc2, 0x00, 0xf1, 0x01, 0x12, 0x2b, 0x02, 0x53, 0x9a, 0xcb, 0x23, + 0xa0, 0x01, 0x53, 0x80, 0xc2, 0x0c, 0x43, 0x08, 0x59, 0x99, 0x87, 0x08, + 0x59, 0x88, 0xc2, 0x00, 0x5f, 0x08, 0x59, 0x21, 0xc2, 0x0c, 0x43, 0x08, + 0x59, 0x19, 0x87, 0x08, 0x59, 0x10, 0x87, 0x08, 0x58, 0x38, 0x90, 0x08, + 0x58, 0x29, 0x91, 0x08, 0x58, 0x18, 0xc7, 0x0d, 0x04, 0x08, 0x08, 0xc9, + 0xc8, 0x4b, 0x94, 0x08, 0x09, 0x10, 0xc3, 0x02, 0xdf, 0x08, 0x08, 0x4b, + 0x02, 0x53, 0xa0, 0xc4, 0x0d, 0x0e, 0x08, 0x08, 0x92, 0x02, 0x53, 0xa4, + 0xc9, 0x57, 0x20, 0x08, 0x09, 0x58, 0xc4, 0x18, 0x12, 0x08, 0x08, 0x8b, + 0x02, 0x53, 0xaa, 0x91, 0x08, 0x08, 0x42, 0x02, 0x53, 0xb0, 0xc2, 0x00, + 0x5f, 0x08, 0x08, 0x5b, 0x02, 0x53, 0xb4, 0xc3, 0x45, 0x6b, 0x08, 0x08, + 0xa2, 0x02, 0x53, 0xb8, 0xc2, 0x00, 0x33, 0x08, 0x08, 0x53, 0x02, 0x53, + 0xbe, 0xc3, 0x0d, 0x0f, 0x08, 0x08, 0x9a, 0x02, 0x53, 0xc2, 0x00, 0xc2, + 0x53, 0xc8, 0xc2, 0x0d, 0x10, 0x08, 0x08, 0xaa, 0x02, 0x53, 0xd4, 0x00, + 0xc2, 0x53, 0xda, 0xc2, 0x0d, 0x10, 0x08, 0x08, 0xb2, 0x02, 0x53, 0xe6, + 0xc7, 0x0d, 0x04, 0x08, 0x09, 0x01, 0xc8, 0x4b, 0x94, 0x08, 0x09, 0x48, + 0xc9, 0x57, 0x20, 0x08, 0x09, 0x90, 0xc7, 0x0d, 0x04, 0x08, 0x09, 0x09, + 0xc8, 0x4b, 0x94, 0x08, 0x09, 0x50, 0xc9, 0x57, 0x20, 0x08, 0x09, 0x98, + 0xcc, 0x14, 0xcd, 0x08, 0x09, 0xc1, 0xcd, 0x7e, 0xb0, 0x08, 0x09, 0xd8, + 0xca, 0x01, 0x68, 0x01, 0x28, 0x03, 0x02, 0x53, 0xec, 0x06, 0xc2, 0x53, + 0xf2, 0xc2, 0x02, 0xae, 0x01, 0x2b, 0xab, 0x02, 0x53, 0xfc, 0xc4, 0x00, + 0x49, 0x01, 0x2b, 0xa3, 0x02, 0x54, 0x02, 0xc5, 0x00, 0x2c, 0x01, 0x2b, + 0xb1, 0x44, 0x13, 0x1d, 0xc2, 0x54, 0x08, 0xc8, 0x00, 0x5f, 0x01, 0x28, + 0x13, 0x02, 0x54, 0x14, 0x4f, 0x61, 0x5c, 0xc2, 0x54, 0x1a, 0x4c, 0x52, + 0xbb, 0x42, 0x54, 0x26, 0x50, 0x5c, 0x42, 0xc2, 0x54, 0x32, 0xdd, 0x11, + 0x34, 0x01, 0x2a, 0x29, 0xdd, 0x11, 0xff, 0x01, 0x2a, 0x19, 0x50, 0x11, + 0x39, 0x42, 0x54, 0x44, 0x45, 0x02, 0x9a, 0x42, 0x54, 0x56, 0xd0, 0x5e, + 0x62, 0x01, 0x2b, 0xf0, 0xc2, 0x01, 0x48, 0x01, 0x2b, 0xdb, 0x02, 0x54, + 0x66, 0x4a, 0xa2, 0xa6, 0x42, 0x54, 0x6c, 0x45, 0x02, 0x9a, 0x42, 0x54, + 0x78, 0xc8, 0x00, 0x5f, 0x01, 0x28, 0x59, 0xca, 0x01, 0x68, 0x01, 0x28, + 0x48, 0xc8, 0x00, 0x5f, 0x01, 0x28, 0x39, 0xca, 0x01, 0x68, 0x01, 0x28, + 0x28, 0xc8, 0x00, 0x5f, 0x01, 0x2a, 0x8b, 0x02, 0x54, 0x8a, 0x47, 0x54, + 0x42, 0xc2, 0x54, 0x90, 0x49, 0x45, 0xd2, 0xc2, 0x54, 0xa2, 0xca, 0x01, + 0x68, 0x01, 0x2a, 0x80, 0x4b, 0x99, 0xb8, 0xc2, 0x54, 0xb4, 0x4b, 0x8e, + 0x76, 0xc2, 0x54, 0xc6, 0x4a, 0x5c, 0x42, 0xc2, 0x54, 0xd8, 0x4a, 0x11, + 0x39, 0x42, 0x54, 0xf0, 0xd1, 0x53, 0x43, 0x01, 0x2b, 0x59, 0xcb, 0x8d, + 0x84, 0x01, 0x2b, 0x11, 0xcc, 0x89, 0xd9, 0x01, 0x2a, 0xf8, 0xd1, 0x53, + 0x32, 0x01, 0x2b, 0x51, 0xcb, 0x8e, 0xce, 0x01, 0x2b, 0x09, 0xcc, 0x87, + 0xa5, 0x01, 0x2a, 0xf0, 0xd0, 0x32, 0x47, 0x01, 0x2a, 0x11, 0xca, 0xa2, + 0xce, 0x01, 0x29, 0x41, 0xcb, 0x98, 0xe7, 0x01, 0x29, 0x00, 0xd0, 0x32, + 0x71, 0x01, 0x29, 0xf9, 0xca, 0xa2, 0xe2, 0x01, 0x29, 0x29, 0xcb, 0x98, + 0xdc, 0x01, 0x28, 0xe8, 0xd1, 0x53, 0x43, 0x01, 0x2b, 0x41, 0xcb, 0x8d, + 0x84, 0x01, 0x2a, 0xe1, 0xcc, 0x89, 0xd9, 0x01, 0x2a, 0xc8, 0xd1, 0x53, + 0x32, 0x01, 0x2b, 0x39, 0xcb, 0x8e, 0xce, 0x01, 0x2a, 0xd9, 0xcc, 0x87, + 0xa5, 0x01, 0x2a, 0xc0, 0xd5, 0x32, 0x6c, 0x01, 0x2a, 0x41, 0xd0, 0x32, + 0x71, 0x01, 0x29, 0xb9, 0x45, 0x00, 0x49, 0xc2, 0x55, 0x08, 0x46, 0x00, + 0x2c, 0x42, 0x55, 0x14, 0xd5, 0x32, 0x42, 0x01, 0x2a, 0x01, 0xd0, 0x32, + 0x47, 0x01, 0x29, 0xc1, 0x45, 0x00, 0x49, 0xc2, 0x55, 0x20, 0x46, 0x00, + 0x2c, 0x42, 0x55, 0x2c, 0xce, 0x72, 0xaa, 0x01, 0x2a, 0x49, 0xc8, 0x11, + 0xff, 0x01, 0x29, 0xc9, 0xca, 0x11, 0x34, 0x01, 0x29, 0x88, 0xce, 0x73, + 0x44, 0x01, 0x29, 0xf1, 0xc8, 0x11, 0x49, 0x01, 0x29, 0xb1, 0xca, 0x12, + 0x12, 0x01, 0x29, 0x70, 0xc5, 0x13, 0x67, 0x01, 0x18, 0xf9, 0xc3, 0x0a, + 0xea, 0x01, 0x18, 0x60, 0xc5, 0x13, 0x67, 0x01, 0x18, 0xf1, 0xc3, 0x0a, + 0xea, 0x01, 0x18, 0x68, 0x89, 0x01, 0x8d, 0x68, 0xc2, 0x1b, 0x88, 0x01, + 0x8d, 0x70, 0xc2, 0x1b, 0x88, 0x01, 0x8d, 0x78, 0x89, 0x01, 0x89, 0x21, + 0x90, 0x01, 0x8d, 0x48, 0x90, 0x01, 0x8d, 0x39, 0x89, 0x01, 0x8d, 0x90, + 0x89, 0x01, 0x89, 0x29, 0x90, 0x01, 0x8d, 0x28, 0x90, 0x01, 0x8d, 0x98, + 0xa2, 0x0f, 0xd8, 0xbb, 0x02, 0x55, 0x38, 0xa3, 0x0f, 0xd9, 0x38, 0xa0, + 0x0f, 0xd8, 0x33, 0x02, 0x55, 0x3c, 0xa2, 0x0f, 0xd8, 0x93, 0x02, 0x55, + 0x4e, 0xa1, 0x0f, 0xd8, 0x53, 0x02, 0x55, 0x52, 0xa3, 0x0f, 0xd9, 0x08, + 0xa3, 0x0f, 0xd9, 0x70, 0xa1, 0x0f, 0xd8, 0x63, 0x02, 0x55, 0x5d, 0xa3, + 0x0f, 0xd9, 0x19, 0xc2, 0x00, 0x22, 0x0f, 0xd9, 0x90, 0xa3, 0x0f, 0xd9, + 0x88, 0xa3, 0x0f, 0xd9, 0x49, 0xa2, 0x0f, 0xd8, 0xd2, 0x02, 0x55, 0x68, + 0xa3, 0x0f, 0xd9, 0x78, 0xa1, 0x0f, 0xd8, 0x6b, 0x02, 0x55, 0x6c, 0xa3, + 0x0f, 0xd9, 0x21, 0xa2, 0x0f, 0xd8, 0xa2, 0x02, 0x55, 0x77, 0xa2, 0x0f, + 0xd8, 0xc2, 0x02, 0x55, 0x7b, 0xa3, 0x0f, 0xd9, 0xa8, 0x45, 0xa6, 0x50, + 0xc2, 0x55, 0x7f, 0x46, 0x3b, 0x9d, 0xc2, 0x55, 0xb6, 0xd0, 0x5d, 0x02, + 0x01, 0x39, 0x61, 0xce, 0x71, 0x4c, 0x01, 0x37, 0x41, 0xc5, 0x02, 0xd2, + 0x01, 0x2e, 0x7b, 0x02, 0x55, 0xce, 0xc8, 0xb8, 0x3a, 0x01, 0x33, 0x18, + 0x4e, 0x70, 0xce, 0xc2, 0x55, 0xd2, 0xc7, 0x37, 0x27, 0x01, 0x38, 0x11, + 0xce, 0x73, 0xa6, 0x01, 0x38, 0x01, 0xc6, 0xcb, 0xcf, 0x01, 0x36, 0x39, + 0xc9, 0xb0, 0x1a, 0x01, 0x33, 0x01, 0x0f, 0xc2, 0x55, 0xde, 0xca, 0x50, + 0x80, 0x01, 0x30, 0xb9, 0xc3, 0x0e, 0x6b, 0x01, 0x30, 0x29, 0xcc, 0x83, + 0x01, 0x01, 0x30, 0x01, 0xc5, 0x0b, 0x0a, 0x01, 0x2d, 0x03, 0x02, 0x55, + 0xea, 0xd3, 0x40, 0x08, 0x0f, 0xab, 0x88, 0x44, 0xe1, 0x27, 0xc2, 0x55, + 0xee, 0xc4, 0x73, 0x5b, 0x01, 0x36, 0xf9, 0xd7, 0x28, 0x5a, 0x01, 0x36, + 0xb1, 0xc8, 0x36, 0xb4, 0x01, 0x30, 0x71, 0xd2, 0x49, 0xaf, 0x0f, 0xab, + 0xf8, 0x43, 0x01, 0x47, 0xc2, 0x56, 0x00, 0xc6, 0x3a, 0x1a, 0x01, 0x2e, + 0x33, 0x02, 0x56, 0x12, 0x14, 0x42, 0x56, 0x16, 0x44, 0x00, 0x2d, 0xc2, + 0x56, 0x22, 0xc8, 0x46, 0x71, 0x01, 0x2d, 0x61, 0xc6, 0xcd, 0x67, 0x0f, + 0x9f, 0xb0, 0x43, 0x00, 0x4a, 0xc2, 0x56, 0x34, 0x11, 0xc2, 0x56, 0x44, + 0x45, 0x17, 0x15, 0x42, 0x56, 0x50, 0x0e, 0xc2, 0x56, 0x5c, 0x11, 0x42, + 0x56, 0x68, 0xca, 0x9c, 0x20, 0x01, 0x35, 0xc1, 0x46, 0x01, 0xdc, 0x42, + 0x56, 0x74, 0xd9, 0x1f, 0x31, 0x01, 0x33, 0xd9, 0x12, 0x42, 0x56, 0x92, + 0x07, 0xc2, 0x56, 0xaa, 0xd5, 0x31, 0xc4, 0x0f, 0xad, 0x51, 0x11, 0x42, + 0x56, 0xb9, 0xcc, 0x88, 0x59, 0x01, 0x2d, 0x81, 0xc6, 0xc1, 0x01, 0x0f, + 0xac, 0x41, 0x42, 0x00, 0xc4, 0x42, 0x56, 0xc5, 0x46, 0x05, 0x87, 0xc2, + 0x56, 0xd1, 0x48, 0x4a, 0x54, 0x42, 0x56, 0xdd, 0xd0, 0x20, 0x66, 0x01, + 0x3d, 0xb1, 0xd0, 0x03, 0xb7, 0x01, 0x3d, 0xa9, 0xd0, 0x3c, 0x90, 0x01, + 0x3d, 0xa0, 0x85, 0x01, 0x09, 0x69, 0x9c, 0x01, 0x09, 0x41, 0x94, 0x01, + 0x08, 0xe1, 0x8b, 0x01, 0x08, 0x89, 0x8a, 0x01, 0x08, 0x60, 0xd0, 0x15, + 0x35, 0x01, 0x3a, 0x48, 0x9a, 0x01, 0x38, 0xb9, 0x42, 0x00, 0x6b, 0xc2, + 0x56, 0xef, 0xc8, 0x8e, 0xa5, 0x0f, 0xaf, 0xa0, 0xc3, 0x45, 0xa1, 0x00, + 0xda, 0xdb, 0x02, 0x56, 0xfc, 0xc5, 0xda, 0x3d, 0x00, 0xdb, 0x00, 0xc8, + 0xb6, 0xb2, 0x00, 0xdb, 0xe8, 0x46, 0xce, 0x2d, 0xc2, 0x57, 0x02, 0x49, + 0xb3, 0x7a, 0x42, 0x57, 0x14, 0x48, 0xb5, 0xf2, 0xc2, 0x57, 0x20, 0x46, + 0xce, 0x33, 0x42, 0x57, 0x2c, 0xc4, 0x8f, 0x44, 0x00, 0xdb, 0x99, 0xc5, + 0xd7, 0x4a, 0x00, 0xdb, 0x91, 0x44, 0xac, 0xc3, 0xc2, 0x57, 0x38, 0xc7, + 0x7c, 0x94, 0x00, 0xdb, 0x79, 0xc5, 0xdb, 0xc8, 0x00, 0xdb, 0x61, 0xc5, + 0xd7, 0xd1, 0x00, 0xdb, 0x58, 0x03, 0xc2, 0x57, 0x4a, 0x07, 0xc2, 0x57, + 0x5f, 0xc3, 0x00, 0x74, 0x00, 0xdb, 0x31, 0xc3, 0x38, 0x86, 0x00, 0xdb, + 0x19, 0xc3, 0x08, 0x48, 0x00, 0xdb, 0x08, 0xc5, 0x60, 0xcc, 0x00, 0xda, + 0xf9, 0xc7, 0xc2, 0x65, 0x00, 0xda, 0xe8, 0xc4, 0x18, 0x10, 0x00, 0xda, + 0xb9, 0xc2, 0x22, 0xcc, 0x00, 0xda, 0xb0, 0xc3, 0x0d, 0x14, 0x00, 0xda, + 0xa9, 0xc3, 0x09, 0x9e, 0x00, 0xda, 0xa0, 0xc4, 0x02, 0xde, 0x00, 0xda, + 0x99, 0xc2, 0x02, 0xa0, 0x00, 0xda, 0x90, 0xcb, 0x98, 0x37, 0x00, 0xda, + 0x61, 0xcb, 0x91, 0x6d, 0x00, 0xda, 0x59, 0xc5, 0xd7, 0xbd, 0x00, 0xd8, + 0x81, 0xc4, 0xa2, 0x33, 0x00, 0xd8, 0x2a, 0x02, 0x57, 0x6b, 0xc7, 0xc7, + 0x20, 0x00, 0xda, 0x41, 0xc4, 0xa2, 0x33, 0x00, 0xd8, 0x78, 0xc9, 0xae, + 0x61, 0x00, 0xda, 0x39, 0x83, 0x00, 0xd9, 0x12, 0x02, 0x57, 0x71, 0xc9, + 0xa9, 0x5a, 0x00, 0xda, 0x31, 0x83, 0x00, 0xd8, 0x9a, 0x02, 0x57, 0x75, + 0x43, 0x20, 0x27, 0x42, 0x57, 0x81, 0xc6, 0xb5, 0xac, 0x00, 0xd8, 0x6a, + 0x02, 0x57, 0x8d, 0xc5, 0xc4, 0x7b, 0x00, 0xd8, 0x5a, 0x02, 0x57, 0x93, + 0xc8, 0xbf, 0xaa, 0x00, 0xd9, 0x50, 0xc6, 0xcb, 0x93, 0x00, 0xd9, 0x40, + 0x83, 0x00, 0xd9, 0x33, 0x02, 0x57, 0x99, 0xc2, 0x19, 0x2c, 0x00, 0xd8, + 0xe1, 0xc2, 0x01, 0x30, 0x00, 0xd8, 0xb8, 0x42, 0x00, 0x4d, 0x42, 0x57, + 0x9f, 0xc5, 0xd4, 0xa2, 0x00, 0xd8, 0xd8, 0xc5, 0xd4, 0x48, 0x00, 0xd8, + 0xc8, 0xc5, 0xd7, 0x4f, 0x00, 0xd8, 0xb0, 0xc7, 0xc2, 0x6c, 0x00, 0xd8, + 0x90, 0xc7, 0xc2, 0x6c, 0x00, 0xd8, 0x50, 0xc7, 0xc2, 0x6c, 0x00, 0xd8, + 0x40, 0xc7, 0xc2, 0x6c, 0x00, 0xda, 0x18, 0xc5, 0x25, 0x91, 0x00, 0xd9, + 0xf3, 0x02, 0x57, 0xab, 0xc5, 0xc2, 0x6e, 0x00, 0xd9, 0xa8, 0xc7, 0xc2, + 0x6c, 0x00, 0xd9, 0xe8, 0xc7, 0xc2, 0x6c, 0x00, 0xd9, 0xd8, 0xc5, 0xd7, + 0xcc, 0x00, 0xd9, 0xc8, 0xc5, 0xd9, 0x70, 0x00, 0xd9, 0xb8, 0xc6, 0x1e, + 0x89, 0x00, 0xd8, 0x09, 0xc5, 0xd6, 0xaa, 0x00, 0xd8, 0x00, 0xc9, 0xae, + 0xfa, 0x0b, 0x57, 0xa1, 0xc5, 0x28, 0xb0, 0x0b, 0x57, 0x80, 0xc9, 0xaf, + 0x81, 0x0b, 0x57, 0x99, 0xc5, 0x28, 0xb0, 0x0b, 0x57, 0x88, 0x87, 0x0b, + 0x57, 0x59, 0xc3, 0x1b, 0x88, 0x0b, 0x56, 0x80, 0xc2, 0x14, 0x68, 0x0b, + 0x57, 0x00, 0x91, 0x0b, 0x57, 0x48, 0xc3, 0x2d, 0x2f, 0x0b, 0x57, 0x30, + 0xc3, 0x26, 0x76, 0x0b, 0x57, 0x21, 0xc2, 0x02, 0x0a, 0x0b, 0x56, 0xa8, + 0x91, 0x0b, 0x56, 0xf1, 0xc3, 0xdf, 0xb7, 0x0b, 0x56, 0xb8, 0xc2, 0x02, + 0xaa, 0x0b, 0x56, 0xe9, 0xc2, 0x02, 0x98, 0x0b, 0x56, 0xb0, 0xc3, 0x62, + 0x26, 0x0b, 0x56, 0xc1, 0x83, 0x0b, 0x56, 0x88, 0x42, 0x00, 0x56, 0xc2, + 0x57, 0xaf, 0x42, 0x00, 0x5d, 0xc2, 0x57, 0xf0, 0x42, 0x00, 0xa9, 0xc2, + 0x58, 0x30, 0x42, 0x00, 0xee, 0xc2, 0x58, 0x65, 0x42, 0x01, 0x60, 0xc2, + 0x58, 0xa5, 0x42, 0x01, 0x31, 0x42, 0x58, 0xdd, 0xc2, 0xd0, 0x00, 0x05, + 0x36, 0x29, 0x87, 0x05, 0x36, 0x50, 0x87, 0x05, 0x36, 0x41, 0xc2, 0x10, + 0x11, 0x05, 0x36, 0xb8, 0x96, 0x05, 0x35, 0xd9, 0xc2, 0xd0, 0x00, 0x05, + 0x36, 0x21, 0x90, 0x05, 0x36, 0x90, 0xc3, 0xe5, 0xab, 0x05, 0x37, 0x71, + 0xc4, 0xe0, 0xa3, 0x05, 0x37, 0x78, 0x87, 0x05, 0x35, 0x29, 0xc2, 0xd0, + 0x00, 0x05, 0x36, 0x81, 0x90, 0x05, 0x37, 0x08, 0x8b, 0x05, 0x35, 0x61, + 0xc2, 0x02, 0xe0, 0x05, 0x35, 0x68, 0x87, 0x05, 0x35, 0x31, 0x83, 0x05, + 0x35, 0x80, 0x96, 0x05, 0x37, 0x41, 0x90, 0x05, 0x37, 0x50, 0xc3, 0x7c, + 0x57, 0x05, 0x35, 0x91, 0xc3, 0x8b, 0xa9, 0x05, 0x35, 0xf1, 0xc2, 0x02, + 0xe0, 0x05, 0x36, 0x30, 0xc2, 0x10, 0x11, 0x05, 0x35, 0xe0, 0xc2, 0x02, + 0xe0, 0x05, 0x36, 0x39, 0xc2, 0x5d, 0xa1, 0x05, 0x37, 0x58, 0xc5, 0xde, + 0x75, 0x05, 0x36, 0x99, 0xc2, 0x01, 0x30, 0x05, 0x36, 0xa1, 0x83, 0x05, + 0x36, 0xa8, 0xc3, 0xd0, 0xd7, 0x05, 0x35, 0x79, 0x90, 0x05, 0x37, 0x10, + 0xc2, 0x00, 0xc4, 0x05, 0x37, 0x01, 0xc2, 0x04, 0xc6, 0x05, 0x37, 0x38, + 0xc2, 0x25, 0x9f, 0x05, 0x35, 0xb1, 0xc3, 0xd7, 0xe2, 0x05, 0x35, 0xc1, + 0x97, 0x05, 0x36, 0x01, 0x91, 0x05, 0x36, 0xb0, 0xc7, 0xc8, 0xd2, 0x05, + 0x37, 0x81, 0xc9, 0xb1, 0x16, 0x05, 0x37, 0x88, 0xc9, 0xab, 0x88, 0x01, + 0x5a, 0xd9, 0xcd, 0x7d, 0x2a, 0x01, 0x5a, 0xe8, 0x12, 0xc2, 0x59, 0x13, + 0xc5, 0xdd, 0x67, 0x00, 0xdf, 0xf1, 0xc8, 0xb8, 0x82, 0x00, 0xdf, 0xe0, + 0xd2, 0x48, 0x7d, 0x00, 0xdf, 0x78, 0x91, 0x00, 0xdf, 0x69, 0x8b, 0x00, + 0xdf, 0x58, 0x87, 0x00, 0xdf, 0x48, 0xc2, 0x01, 0x5d, 0x00, 0xdf, 0x19, + 0x83, 0x00, 0xde, 0xa2, 0x02, 0x59, 0x1f, 0xc2, 0x0e, 0x9a, 0x00, 0xdf, + 0x11, 0xc2, 0x19, 0x2c, 0x00, 0xdf, 0x01, 0xc2, 0x01, 0x30, 0x00, 0xde, + 0xe9, 0xca, 0x9d, 0x60, 0x00, 0xde, 0xb9, 0x83, 0x00, 0xde, 0x48, 0x4a, + 0x48, 0x83, 0xc2, 0x59, 0x25, 0x83, 0x00, 0xde, 0xc1, 0xca, 0x9b, 0x94, + 0x00, 0xde, 0xb0, 0xc7, 0xc8, 0xaf, 0x00, 0xde, 0x68, 0xc2, 0x00, 0xd0, + 0x00, 0x4c, 0xb3, 0x02, 0x59, 0x5f, 0x83, 0x00, 0x4c, 0xa8, 0x83, 0x00, + 0x4d, 0xc1, 0xc2, 0x0d, 0xf6, 0x00, 0x4d, 0xb9, 0xc2, 0x00, 0xd0, 0x00, + 0x4d, 0xb0, 0x83, 0x00, 0x4d, 0x83, 0x02, 0x59, 0x65, 0xc2, 0x00, 0x39, + 0x00, 0x4e, 0xe1, 0xc2, 0x00, 0xd0, 0x00, 0x4e, 0xe8, 0x83, 0x00, 0x4d, + 0x79, 0xc2, 0x19, 0x2c, 0x00, 0x4e, 0xf8, 0xc2, 0x00, 0xd0, 0x00, 0x4d, + 0x69, 0x83, 0x00, 0x4d, 0x60, 0xc2, 0x00, 0xd0, 0x00, 0x4d, 0x59, 0x83, + 0x00, 0x4d, 0x50, 0x83, 0x00, 0x4d, 0x41, 0xc2, 0x00, 0xc1, 0x00, 0x4d, + 0x19, 0xc2, 0x19, 0x2c, 0x00, 0x4c, 0xf1, 0xc2, 0x01, 0x30, 0x00, 0x4c, + 0xc8, 0xc2, 0x00, 0xd0, 0x00, 0x4d, 0x39, 0x83, 0x00, 0x4d, 0x31, 0x06, + 0x42, 0x59, 0x6b, 0xc2, 0x00, 0xd0, 0x00, 0x4d, 0x29, 0x83, 0x00, 0x4d, + 0x21, 0x16, 0x42, 0x59, 0x75, 0xc2, 0x00, 0xd0, 0x00, 0x4c, 0xe9, 0x83, + 0x00, 0x4c, 0xe0, 0xc2, 0x00, 0xd0, 0x00, 0x4c, 0xd9, 0x83, 0x00, 0x4c, + 0xd0, 0xc2, 0x00, 0xd0, 0x00, 0x4c, 0xc1, 0x83, 0x00, 0x4c, 0xb8, 0x97, + 0x00, 0x4c, 0xa1, 0x8b, 0x00, 0x4c, 0x81, 0x83, 0x00, 0x4c, 0x30, 0x8b, + 0x00, 0x4c, 0x40, 0x97, 0x00, 0x4c, 0x50, 0x47, 0xb2, 0x2e, 0xc2, 0x59, + 0x7f, 0xcd, 0x80, 0x36, 0x00, 0x4f, 0xe0, 0x42, 0x07, 0xb2, 0xc2, 0x59, + 0x8d, 0x03, 0xc2, 0x59, 0x99, 0xc5, 0x33, 0x5d, 0x00, 0x4d, 0xe1, 0xcb, + 0x1e, 0x89, 0x00, 0x4c, 0x08, 0x97, 0x00, 0x4e, 0x61, 0x8b, 0x00, 0x4e, + 0x41, 0x83, 0x00, 0x4d, 0xf0, 0x94, 0x00, 0x4e, 0x1b, 0x02, 0x59, 0xa5, + 0x8e, 0x00, 0x4f, 0x12, 0x02, 0x59, 0xa9, 0x97, 0x00, 0x4e, 0x10, 0x8b, + 0x00, 0x4e, 0x00, 0xc2, 0x02, 0xa0, 0x00, 0x4f, 0x41, 0xc4, 0x02, 0xde, + 0x00, 0x4f, 0x48, 0xc3, 0x09, 0x9e, 0x00, 0x4f, 0x51, 0xc3, 0x0d, 0x14, + 0x00, 0x4f, 0x58, 0xc2, 0x22, 0xcc, 0x00, 0x4f, 0x61, 0xc4, 0x18, 0x10, + 0x00, 0x4f, 0x68, 0xc3, 0x05, 0x14, 0x00, 0x4f, 0xa3, 0x02, 0x59, 0xad, + 0x16, 0xc2, 0x59, 0xb3, 0xc4, 0x09, 0x9d, 0x00, 0x4f, 0xb8, 0x1b, 0xc2, + 0x59, 0xbf, 0xc2, 0x00, 0x39, 0x00, 0xd0, 0x59, 0x83, 0x00, 0xd0, 0x51, + 0x09, 0x42, 0x59, 0xc9, 0xc2, 0x00, 0xb0, 0x00, 0xd0, 0x39, 0x83, 0x00, + 0xd0, 0x30, 0xa4, 0x01, 0x42, 0x03, 0x02, 0x59, 0xd3, 0x9e, 0x01, 0x40, + 0x0b, 0x02, 0x59, 0xd7, 0x9f, 0x01, 0x40, 0x13, 0x02, 0x5a, 0x05, 0xa0, + 0x01, 0x40, 0x23, 0x02, 0x5a, 0x2c, 0xa1, 0x01, 0x40, 0x43, 0x02, 0x5a, + 0x4c, 0xa2, 0x01, 0x40, 0x83, 0x02, 0x5a, 0x65, 0xa3, 0x01, 0x41, 0x03, + 0x02, 0x5a, 0x77, 0xa5, 0x01, 0x44, 0x00, 0x00, 0x42, 0x5a, 0x82, 0xc2, + 0x0d, 0x10, 0x08, 0x83, 0x18, 0x9b, 0x08, 0x83, 0x10, 0xc4, 0x18, 0x10, + 0x08, 0x82, 0xc3, 0x02, 0x5a, 0x8e, 0xc2, 0x22, 0xcc, 0x08, 0x82, 0xba, + 0x02, 0x5a, 0x94, 0x0b, 0xc2, 0x5a, 0x9a, 0x11, 0x42, 0x5a, 0xa6, 0x0a, + 0xc2, 0x5a, 0xb2, 0x19, 0xc2, 0x5a, 0xbe, 0xc2, 0x00, 0xc4, 0x08, 0x82, + 0xd8, 0x49, 0x5c, 0x83, 0x42, 0x5a, 0xc8, 0xc2, 0x00, 0xdb, 0x08, 0x81, + 0xa1, 0x83, 0x08, 0x81, 0x70, 0xc2, 0x00, 0xd0, 0x08, 0x81, 0x51, 0x83, + 0x08, 0x81, 0x48, 0xc2, 0x00, 0xd0, 0x08, 0x81, 0x41, 0x83, 0x08, 0x81, + 0x38, 0x83, 0x08, 0x81, 0x31, 0xc2, 0x00, 0xc1, 0x08, 0x81, 0x09, 0xc2, + 0x19, 0x2c, 0x08, 0x80, 0xe1, 0xc2, 0x01, 0x30, 0x08, 0x80, 0xb8, 0xc2, + 0x00, 0xd0, 0x08, 0x81, 0x29, 0x83, 0x08, 0x81, 0x21, 0x06, 0x42, 0x5a, + 0xe0, 0xc2, 0x00, 0xd0, 0x08, 0x81, 0x19, 0x83, 0x08, 0x81, 0x11, 0x16, + 0x42, 0x5a, 0xea, 0xc2, 0x00, 0xd0, 0x08, 0x80, 0xd9, 0x83, 0x08, 0x80, + 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0x80, 0xc9, 0x83, 0x08, 0x80, 0xc0, 0xc2, + 0x00, 0xd0, 0x08, 0x80, 0xb1, 0x83, 0x08, 0x80, 0xa8, 0xc2, 0x00, 0xd0, + 0x08, 0x80, 0xa1, 0x83, 0x08, 0x80, 0x98, 0x97, 0x08, 0x80, 0x91, 0x8b, + 0x08, 0x80, 0x81, 0x83, 0x08, 0x80, 0x30, 0x47, 0xb2, 0x2e, 0xc2, 0x5a, + 0xf4, 0x83, 0x08, 0x81, 0x78, 0x97, 0x08, 0x80, 0x50, 0x8b, 0x08, 0x80, + 0x40, 0xc2, 0x00, 0xd0, 0x08, 0x81, 0x81, 0xc2, 0x0d, 0xf6, 0x08, 0x81, + 0x89, 0x83, 0x08, 0x81, 0x90, 0x91, 0x08, 0x82, 0x23, 0x02, 0x5b, 0x02, + 0x03, 0xc2, 0x5b, 0x08, 0x87, 0x08, 0x82, 0x11, 0x48, 0xb2, 0x2d, 0xc2, + 0x5b, 0x14, 0x97, 0x08, 0x81, 0xe3, 0x02, 0x5b, 0x22, 0x8b, 0x08, 0x81, + 0xd3, 0x02, 0x5b, 0x26, 0xce, 0x6e, 0x2e, 0x08, 0x81, 0xc8, 0xc4, 0x26, + 0x78, 0x08, 0x83, 0x79, 0xc5, 0x06, 0xdb, 0x08, 0x83, 0x71, 0x15, 0xc2, + 0x5b, 0x2a, 0x08, 0xc2, 0x5b, 0x36, 0x16, 0xc2, 0x5b, 0x42, 0xc3, 0x05, + 0x14, 0x08, 0x83, 0x39, 0xc4, 0x15, 0xe7, 0x08, 0x83, 0x30, 0xc4, 0x6e, + 0x13, 0x08, 0x82, 0x69, 0xc3, 0x02, 0x6e, 0x08, 0x82, 0x58, 0xc8, 0x3a, + 0x36, 0x08, 0x82, 0x51, 0x96, 0x08, 0x82, 0x48, 0x42, 0x00, 0xbd, 0xc2, + 0x5b, 0x4e, 0xc9, 0x79, 0x79, 0x0e, 0x83, 0x90, 0xc7, 0xc3, 0x1b, 0x0e, + 0x85, 0xa9, 0xc6, 0xc5, 0x06, 0x0e, 0x85, 0xa0, 0xc4, 0x99, 0xff, 0x0e, + 0x87, 0xa1, 0xc3, 0x2e, 0xd7, 0x0e, 0x83, 0xf8, 0x44, 0xe3, 0xbb, 0xc2, + 0x5b, 0x60, 0xc8, 0x9c, 0xe0, 0x0e, 0x80, 0xd8, 0x00, 0x42, 0x5b, 0x72, + 0xc5, 0xd6, 0xa5, 0x0e, 0x82, 0x10, 0x03, 0xc2, 0x5b, 0x7e, 0x11, 0x42, + 0x5b, 0x88, 0xc3, 0x03, 0x13, 0x0e, 0x83, 0xd1, 0xc9, 0xaa, 0x68, 0x0e, + 0x81, 0xb8, 0xc2, 0x00, 0xec, 0x0e, 0x87, 0x79, 0xc2, 0x01, 0x6c, 0x0e, + 0x87, 0x71, 0xc2, 0x00, 0x3c, 0x0e, 0x87, 0x69, 0xc2, 0x01, 0xdd, 0x0e, + 0x87, 0x61, 0xc2, 0x01, 0x30, 0x0e, 0x87, 0x59, 0xc3, 0x29, 0x6f, 0x0e, + 0x87, 0x51, 0xc2, 0x00, 0xb0, 0x0e, 0x87, 0x48, 0x90, 0x0e, 0x84, 0xb9, + 0xc9, 0x79, 0x79, 0x0e, 0x83, 0x98, 0x46, 0xce, 0xab, 0xc2, 0x5b, 0x94, + 0x46, 0xcb, 0x03, 0xc2, 0x5b, 0xa1, 0xc5, 0x4c, 0x93, 0x0e, 0x81, 0x18, + 0xc6, 0xd0, 0x01, 0x0e, 0x81, 0x99, 0xca, 0x6d, 0x0c, 0x0e, 0x80, 0x68, + 0xc5, 0xd6, 0x0a, 0x0e, 0x85, 0x09, 0xc4, 0xe1, 0x7f, 0x0e, 0x84, 0xd0, + 0xc5, 0xda, 0x42, 0x0e, 0x85, 0x01, 0x8b, 0x0e, 0x84, 0xf8, 0xc2, 0x00, + 0xba, 0x0e, 0x84, 0xf1, 0xc4, 0x01, 0x92, 0x0e, 0x84, 0xe8, 0x8b, 0x0e, + 0x84, 0xe1, 0xc5, 0xda, 0x42, 0x0e, 0x84, 0xd8, 0xc7, 0xc8, 0x93, 0x0e, + 0x83, 0x11, 0xc2, 0x01, 0xc3, 0x0e, 0x82, 0xe0, 0xc9, 0xa9, 0xe1, 0x0e, + 0x80, 0xf8, 0x00, 0x42, 0x5b, 0xad, 0x00, 0x42, 0x5b, 0xb7, 0xc4, 0xcf, + 0x8b, 0x0e, 0x80, 0x40, 0x45, 0xda, 0xe2, 0xc2, 0x5b, 0xc1, 0xc4, 0xc8, + 0x2c, 0x0e, 0x80, 0x98, 0xc8, 0xbe, 0x2a, 0x0e, 0x87, 0x31, 0xc5, 0xcf, + 0x3c, 0x0e, 0x84, 0x92, 0x02, 0x5b, 0xd3, 0x46, 0xd0, 0x07, 0xc2, 0x5b, + 0xd9, 0xc4, 0xc2, 0xa0, 0x0e, 0x84, 0xc8, 0x16, 0xc2, 0x5b, 0xeb, 0xd5, + 0x35, 0x4b, 0x0e, 0x86, 0x91, 0xdc, 0x13, 0x35, 0x0e, 0x86, 0x89, 0xd1, + 0x4f, 0x58, 0x0e, 0x86, 0x80, 0xc9, 0x9c, 0xdf, 0x0e, 0x84, 0x00, 0x43, + 0x01, 0x92, 0xc2, 0x5b, 0xf7, 0xd5, 0x35, 0x4b, 0x0e, 0x86, 0xb1, 0xdc, + 0x13, 0x35, 0x0e, 0x86, 0xa9, 0xd1, 0x4f, 0x58, 0x0e, 0x86, 0xa0, 0xc3, + 0x2e, 0xd7, 0x0e, 0x83, 0xe9, 0xc4, 0x99, 0xff, 0x0e, 0x83, 0xe0, 0xc4, + 0xde, 0x8f, 0x0e, 0x82, 0x99, 0xc6, 0xd0, 0x19, 0x0e, 0x80, 0x52, 0x02, + 0x5c, 0x03, 0xc5, 0xda, 0x1a, 0x0e, 0x86, 0x39, 0xc9, 0xb1, 0x9d, 0x0e, + 0x85, 0xe0, 0x47, 0x1a, 0x0a, 0xc2, 0x5c, 0x09, 0xcb, 0x98, 0xbb, 0x0e, + 0x85, 0xf0, 0xca, 0xa2, 0xec, 0x0e, 0x86, 0x21, 0xc8, 0xba, 0x3a, 0x0e, + 0x86, 0x18, 0x10, 0xc2, 0x5c, 0x15, 0xc2, 0x01, 0x6c, 0x0e, 0x86, 0x01, + 0xc2, 0x00, 0x3c, 0x0e, 0x85, 0xf9, 0xc2, 0x01, 0xdd, 0x0e, 0x85, 0xe9, + 0xc2, 0x00, 0xb0, 0x0e, 0x85, 0xd0, 0xcf, 0x6b, 0x07, 0x0e, 0x85, 0xc8, + 0x44, 0x3b, 0xaf, 0xc2, 0x5c, 0x21, 0xc4, 0x65, 0xea, 0x0e, 0x85, 0xb8, + 0xc3, 0x63, 0x2b, 0x0e, 0x82, 0x31, 0xc8, 0x9c, 0xe0, 0x0e, 0x80, 0xd0, + 0x47, 0xc9, 0x11, 0xc2, 0x5c, 0x2b, 0x44, 0x89, 0x3e, 0x42, 0x5c, 0x37, + 0x48, 0x6d, 0x79, 0xc2, 0x5c, 0x43, 0x42, 0x00, 0x2c, 0x42, 0x5c, 0x4f, + 0xce, 0x6d, 0x5c, 0x0e, 0x85, 0x29, 0xcc, 0x89, 0x3d, 0x0e, 0x85, 0x18, + 0xc6, 0xcf, 0x3b, 0x0e, 0x84, 0xb1, 0xc3, 0x1f, 0x1d, 0x0e, 0x84, 0x39, + 0x83, 0x0e, 0x81, 0x80, 0xc7, 0xc8, 0x31, 0x0e, 0x83, 0x81, 0x12, 0xc2, + 0x5c, 0x5b, 0xc7, 0xc4, 0xaa, 0x0e, 0x83, 0x69, 0x42, 0x00, 0xbd, 0x42, + 0x5c, 0x67, 0xcd, 0x78, 0xe6, 0x0e, 0x83, 0xc9, 0xc2, 0x01, 0xc3, 0x0e, + 0x81, 0x6a, 0x02, 0x5c, 0x71, 0xcf, 0x68, 0x0a, 0x0e, 0x84, 0x71, 0x16, + 0xc2, 0x5c, 0x7d, 0xcb, 0x8f, 0x52, 0x0e, 0x84, 0x59, 0xcc, 0x80, 0xd9, + 0x0e, 0x84, 0x50, 0xc3, 0x63, 0x2b, 0x0e, 0x82, 0x41, 0xc5, 0xcc, 0xcc, + 0x0e, 0x80, 0x21, 0xcb, 0x6d, 0x0b, 0x0e, 0x80, 0x18, 0xc7, 0xc8, 0x31, + 0x0e, 0x83, 0x89, 0xcb, 0x94, 0x17, 0x0e, 0x83, 0x79, 0xc7, 0xc4, 0xaa, + 0x0e, 0x83, 0x61, 0x90, 0x0e, 0x81, 0xca, 0x02, 0x5c, 0x89, 0xc2, 0x00, + 0x45, 0x0e, 0x80, 0xb9, 0x8b, 0x0e, 0x80, 0x00, 0x47, 0xc1, 0xee, 0xc2, + 0x5c, 0x8f, 0xc6, 0xcf, 0x89, 0x0e, 0x80, 0x4a, 0x02, 0x5c, 0x9b, 0xc4, + 0x77, 0x35, 0x0e, 0x82, 0x68, 0x16, 0xc2, 0x5c, 0x9f, 0xc2, 0x01, 0xc3, + 0x0e, 0x82, 0x08, 0xc3, 0x63, 0x2b, 0x0e, 0x82, 0xc1, 0xc5, 0xcc, 0xcc, + 0x0e, 0x80, 0x31, 0xcb, 0x6d, 0x0b, 0x0e, 0x80, 0x28, 0x94, 0x08, 0xe0, + 0x38, 0xd1, 0x51, 0xbc, 0x0f, 0xdc, 0xf9, 0xc2, 0x00, 0x49, 0x01, 0x2f, + 0xd0, 0x4e, 0x60, 0x6d, 0xc2, 0x5c, 0xa9, 0xcc, 0x80, 0xf1, 0x0f, 0xac, + 0x50, 0xc9, 0xb4, 0xac, 0x0f, 0xac, 0x61, 0xc5, 0xcd, 0x8c, 0x0f, 0xac, + 0x48, 0xd1, 0x51, 0xbc, 0x0f, 0xdc, 0xf1, 0xc2, 0x00, 0x49, 0x01, 0x2f, + 0xf8, 0x4e, 0x01, 0xf4, 0xc2, 0x5c, 0xb5, 0xdb, 0x17, 0x61, 0x01, 0x49, + 0xf0, 0x5b, 0x16, 0xa4, 0xc2, 0x5c, 0xc1, 0x46, 0x01, 0xc8, 0x42, 0x5c, + 0xcd, 0xce, 0x08, 0x79, 0x01, 0x2c, 0x31, 0xcd, 0x3f, 0xe8, 0x01, 0x2c, + 0x18, 0xc9, 0xae, 0xbb, 0x01, 0x3f, 0xf0, 0xc9, 0xae, 0xbb, 0x01, 0x3f, + 0xe0, 0xc9, 0xae, 0xbb, 0x01, 0x3f, 0xe8, 0xc9, 0xae, 0xbb, 0x01, 0x3f, + 0xd8, 0xcc, 0x82, 0x35, 0x01, 0x3f, 0xd1, 0xc5, 0x01, 0xa2, 0x01, 0x3f, + 0xb8, 0xcf, 0x64, 0xd1, 0x01, 0x52, 0xe9, 0xcb, 0x98, 0x42, 0x01, 0x52, + 0xd9, 0x42, 0x00, 0x58, 0x42, 0x5c, 0xdf, 0xc7, 0x16, 0x16, 0x01, 0x52, + 0x89, 0x45, 0x00, 0x5a, 0x42, 0x5c, 0xeb, 0x42, 0x00, 0xa9, 0xc2, 0x5c, + 0xf7, 0x09, 0x42, 0x5d, 0x09, 0xd3, 0x16, 0x91, 0x01, 0x4c, 0x99, 0x49, + 0x05, 0xcb, 0x42, 0x5d, 0x18, 0x49, 0x01, 0xd3, 0xc2, 0x5d, 0x24, 0xcc, + 0x01, 0xdb, 0x0f, 0xdc, 0x61, 0xc6, 0x02, 0xd1, 0x0f, 0xc8, 0x3b, 0x02, + 0x5d, 0x2a, 0x42, 0x00, 0x5b, 0xc2, 0x5d, 0x30, 0xcb, 0x96, 0x7f, 0x0f, + 0xdd, 0x91, 0xc6, 0x9e, 0xf4, 0x0f, 0xdd, 0xc8, 0xd0, 0x5b, 0xc2, 0x0f, + 0xc2, 0xc1, 0xd1, 0x55, 0x30, 0x01, 0x0f, 0xf9, 0xc5, 0x01, 0xa2, 0x01, + 0x0c, 0xa3, 0x02, 0x5d, 0x3c, 0xcc, 0x82, 0x35, 0x01, 0x0e, 0xa3, 0x02, + 0x5d, 0x40, 0x19, 0xc2, 0x5d, 0x46, 0xcb, 0x94, 0x22, 0x01, 0x58, 0x61, + 0xd5, 0x01, 0x92, 0x01, 0x5b, 0x20, 0xcc, 0x06, 0xdb, 0x01, 0x2c, 0x79, + 0xcd, 0x15, 0x02, 0x01, 0x2c, 0x70, 0xd1, 0x3f, 0xe4, 0x01, 0x2c, 0x49, + 0xd0, 0x05, 0xb7, 0x01, 0x16, 0x58, 0x00, 0x42, 0x5d, 0x52, 0xd3, 0x01, + 0xb4, 0x01, 0x00, 0xc1, 0xd0, 0x58, 0xd2, 0x01, 0x71, 0x30, 0x00, 0x42, + 0x5d, 0x6a, 0x44, 0x02, 0xdf, 0xc2, 0x5d, 0x7c, 0xcc, 0x86, 0xcd, 0x0f, + 0xaf, 0x61, 0xde, 0x06, 0x69, 0x0f, 0xde, 0x08, 0x44, 0x01, 0x94, 0xc2, + 0x5d, 0x88, 0xd3, 0x41, 0xf6, 0x01, 0x70, 0x48, 0xd0, 0x4a, 0x77, 0x01, + 0x2c, 0x59, 0xc7, 0xb2, 0xec, 0x01, 0x4b, 0xe0, 0xca, 0xa2, 0x74, 0x01, + 0x1c, 0xe9, 0xc9, 0x57, 0x36, 0x01, 0x1c, 0xe1, 0xca, 0xa3, 0x5a, 0x01, + 0x1c, 0xd8, 0xce, 0x01, 0xb9, 0x01, 0x00, 0xe1, 0xcc, 0x8a, 0x09, 0x01, + 0x4e, 0xd1, 0xcb, 0x1a, 0x50, 0x01, 0x71, 0x41, 0xcd, 0x0b, 0x91, 0x01, + 0x80, 0x50, 0xcb, 0x1a, 0x50, 0x01, 0x4c, 0x29, 0x05, 0xc2, 0x5d, 0x94, + 0xd2, 0x21, 0x89, 0x01, 0x80, 0xb1, 0xd6, 0x08, 0x88, 0x01, 0x80, 0xc1, + 0xce, 0x25, 0xad, 0x01, 0x80, 0xd0, 0xd6, 0x08, 0x88, 0x01, 0x4c, 0xb9, + 0xd2, 0x21, 0x89, 0x01, 0x80, 0x80, 0x50, 0x58, 0xb2, 0xc2, 0x5d, 0xa0, + 0x4e, 0x6c, 0x36, 0x42, 0x5d, 0xac, 0xda, 0x1b, 0xd0, 0x0f, 0xc4, 0xa0, + 0x45, 0x01, 0x95, 0xc2, 0x5d, 0xb8, 0x44, 0x0b, 0x26, 0x42, 0x5d, 0xc4, + 0xcd, 0x7e, 0x3b, 0x01, 0x0c, 0xf1, 0x48, 0x01, 0x9a, 0x42, 0x5d, 0xd0, + 0x45, 0x00, 0x8c, 0xc2, 0x5d, 0xdc, 0x16, 0xc2, 0x5e, 0x12, 0xd5, 0x10, + 0x87, 0x01, 0x0e, 0x31, 0xc8, 0xae, 0xbc, 0x01, 0x0d, 0x23, 0x02, 0x5e, + 0x1e, 0x03, 0x42, 0x5e, 0x24, 0xc5, 0x01, 0xa2, 0x01, 0x0e, 0x83, 0x02, + 0x5e, 0x30, 0xca, 0x52, 0xc2, 0x01, 0x48, 0x60, 0xcb, 0x6f, 0xff, 0x01, + 0x0e, 0xe1, 0xca, 0x88, 0xdf, 0x0f, 0xc1, 0xc0, 0x46, 0x01, 0x52, 0xc2, + 0x5e, 0x3a, 0xc2, 0x02, 0x35, 0x0f, 0xd7, 0x90, 0xd0, 0x58, 0x62, 0x0f, + 0xc2, 0x01, 0xc5, 0x01, 0xa2, 0x0f, 0xc2, 0x20, 0xc5, 0x01, 0xa2, 0x01, + 0x58, 0x29, 0xd3, 0x43, 0xe4, 0x01, 0x5c, 0x40, 0xca, 0x50, 0x5e, 0x00, + 0x7e, 0xc0, 0xca, 0x37, 0x4e, 0x01, 0x13, 0x91, 0xc5, 0x07, 0x62, 0x01, + 0x13, 0x20, 0x4a, 0x33, 0xad, 0x42, 0x5e, 0x46, 0xe0, 0x09, 0xc7, 0x01, + 0x54, 0x58, 0x47, 0xc7, 0x35, 0xc2, 0x5e, 0x55, 0x53, 0x40, 0x1b, 0x42, + 0x5e, 0x61, 0xe0, 0x07, 0x07, 0x01, 0x54, 0x88, 0xc2, 0x00, 0xd0, 0x00, + 0xe2, 0x71, 0x83, 0x00, 0xe2, 0x68, 0xc2, 0x00, 0xd0, 0x00, 0xe0, 0xc1, + 0x83, 0x00, 0xe0, 0xb8, 0xc7, 0xc0, 0x97, 0x00, 0xe1, 0xf0, 0xd2, 0x4d, + 0x57, 0x0f, 0xbd, 0xa9, 0xc6, 0x13, 0x52, 0x0f, 0xbd, 0x49, 0xc4, 0x01, + 0xe3, 0x01, 0x2c, 0x88, 0x44, 0x00, 0x2d, 0xc2, 0x5e, 0x67, 0xc3, 0x14, + 0xa7, 0x0f, 0xb4, 0x40, 0xe0, 0x08, 0x87, 0x01, 0x3b, 0x90, 0x52, 0x11, + 0x92, 0xc2, 0x5e, 0x6d, 0x44, 0x0d, 0x14, 0x42, 0x5e, 0x79, 0xd7, 0x2a, + 0xb0, 0x0f, 0xbe, 0x01, 0xd8, 0x22, 0x43, 0x0f, 0xbe, 0x90, 0xc7, 0x6f, + 0xbc, 0x0f, 0xaf, 0x88, 0x83, 0x05, 0x26, 0xe9, 0xc2, 0x00, 0xd0, 0x05, + 0x26, 0xf0, 0x44, 0x5d, 0xb5, 0xc2, 0x5e, 0x85, 0xc5, 0xdb, 0x87, 0x05, + 0x27, 0xc8, 0xc4, 0xb2, 0xf8, 0x00, 0x04, 0x50, 0xd6, 0x2e, 0xd8, 0x01, + 0x50, 0xa1, 0x45, 0x00, 0x8c, 0x42, 0x5e, 0xa3, 0x24, 0xc2, 0x5e, 0xaf, + 0x23, 0xc2, 0x5e, 0xc3, 0x42, 0xe5, 0x28, 0xc2, 0x5e, 0xdf, 0x04, 0xc2, + 0x5e, 0xff, 0xc4, 0xe4, 0xb7, 0x08, 0x30, 0xd9, 0x1e, 0xc2, 0x5f, 0x07, + 0x20, 0xc2, 0x5f, 0x19, 0x21, 0xc2, 0x5f, 0x39, 0x22, 0x42, 0x5f, 0x41, + 0x42, 0x00, 0x91, 0xc2, 0x5f, 0x69, 0x49, 0xa8, 0xca, 0xc2, 0x5f, 0x75, + 0x4a, 0xa2, 0xd8, 0x42, 0x5f, 0x7f, 0xc4, 0x18, 0x10, 0x00, 0xca, 0x69, + 0xc2, 0x22, 0xcc, 0x00, 0xca, 0x60, 0xc3, 0x0d, 0x14, 0x00, 0xca, 0x59, + 0xc3, 0x09, 0x9e, 0x00, 0xca, 0x50, 0xc4, 0x02, 0xde, 0x00, 0xca, 0x49, + 0xc2, 0x02, 0xa0, 0x00, 0xca, 0x40, 0xc3, 0x15, 0x31, 0x00, 0xca, 0x01, + 0xc4, 0xdf, 0x0f, 0x00, 0xc9, 0xd9, 0xc9, 0xac, 0xc3, 0x00, 0xc9, 0xd1, + 0xc9, 0xa9, 0x87, 0x00, 0xc9, 0xc8, 0xc2, 0x00, 0xdb, 0x00, 0xc9, 0xc1, + 0xc2, 0x00, 0x39, 0x00, 0xc9, 0xb9, 0xc2, 0x01, 0xc3, 0x00, 0xc9, 0xb1, + 0xc2, 0x00, 0xb0, 0x00, 0xc9, 0xa9, 0x10, 0xc2, 0x5f, 0x89, 0xc2, 0x01, + 0x6f, 0x00, 0xc9, 0x99, 0xc8, 0x14, 0x38, 0x00, 0xc9, 0x91, 0xc2, 0x02, + 0x2b, 0x00, 0xc9, 0x80, 0xc2, 0x01, 0x4a, 0x00, 0xc9, 0x59, 0xc2, 0x00, + 0x39, 0x00, 0xc9, 0x51, 0xc2, 0x19, 0x2c, 0x00, 0xc9, 0x48, 0x91, 0x00, + 0xc9, 0x43, 0x02, 0x5f, 0x93, 0x87, 0x00, 0xc9, 0x3b, 0x02, 0x5f, 0x97, + 0x83, 0x00, 0xc9, 0x03, 0x02, 0x5f, 0x9b, 0x97, 0x00, 0xc9, 0x11, 0x8b, + 0x00, 0xc9, 0x08, 0xc2, 0x00, 0x39, 0x00, 0xc8, 0xf1, 0xc2, 0x00, 0xd0, + 0x00, 0xc8, 0x61, 0x83, 0x00, 0xc8, 0x58, 0xc3, 0x2e, 0x0f, 0x00, 0xc8, + 0xe9, 0xc2, 0x00, 0xd0, 0x00, 0xc8, 0x21, 0x83, 0x00, 0xc8, 0x18, 0x83, + 0x00, 0xc8, 0xd9, 0xc2, 0x0d, 0xf6, 0x00, 0xc8, 0xd1, 0xc2, 0x00, 0xd0, + 0x00, 0xc8, 0xc8, 0x90, 0x00, 0xc8, 0x50, 0xc2, 0x00, 0xd0, 0x00, 0xc8, + 0x99, 0x83, 0x00, 0xc8, 0x90, 0xc2, 0x00, 0xd0, 0x00, 0xc8, 0x89, 0x83, + 0x00, 0xc8, 0x80, 0x83, 0x00, 0xc8, 0x79, 0xc2, 0x01, 0x30, 0x00, 0xc8, + 0x28, 0xc2, 0x00, 0xd0, 0x00, 0xc8, 0x71, 0x83, 0x00, 0xc8, 0x68, 0xc2, + 0x00, 0xd0, 0x00, 0xc8, 0x49, 0x83, 0x00, 0xc8, 0x40, 0xc2, 0x00, 0xd0, + 0x00, 0xc8, 0x39, 0x83, 0x00, 0xc8, 0x30, 0xc2, 0x00, 0xd0, 0x00, 0xc8, + 0x11, 0x83, 0x00, 0xc8, 0x08, 0x45, 0xdc, 0x72, 0xc2, 0x5f, 0xa3, 0x44, + 0x87, 0x22, 0x42, 0x5f, 0xaf, 0xc6, 0x0b, 0x09, 0x0f, 0xbf, 0x29, 0xc6, + 0x02, 0xd1, 0x0f, 0xa9, 0xa0, 0xc6, 0x02, 0xd1, 0x0f, 0xbf, 0x11, 0xc6, + 0x0b, 0x09, 0x0f, 0xbf, 0x48, 0x43, 0x02, 0x6f, 0xc2, 0x5f, 0xc1, 0x46, + 0x19, 0x02, 0x42, 0x5f, 0xcd, 0x43, 0x02, 0xa0, 0xc2, 0x5f, 0xdf, 0xdb, + 0x18, 0x54, 0x01, 0x57, 0xe0, 0xc6, 0x02, 0xd1, 0x0f, 0xbf, 0x09, 0xc6, + 0x0b, 0x09, 0x0f, 0xbf, 0x40, 0xc6, 0x02, 0xd1, 0x0f, 0xbf, 0x19, 0xc6, + 0x0b, 0x09, 0x0f, 0xbf, 0x50, 0x46, 0x02, 0x0f, 0xc2, 0x5f, 0xeb, 0x48, + 0x19, 0x9b, 0x42, 0x60, 0xa1, 0xcd, 0x78, 0x57, 0x00, 0xeb, 0xf1, 0xcd, + 0x7b, 0x63, 0x00, 0xeb, 0xd8, 0xc4, 0x74, 0x82, 0x01, 0x04, 0xa0, 0x96, + 0x00, 0xe8, 0xdb, 0x02, 0x60, 0xbd, 0x8e, 0x00, 0x14, 0xfb, 0x02, 0x60, + 0xc3, 0x87, 0x00, 0xe8, 0x3b, 0x02, 0x60, 0xc9, 0x9c, 0x00, 0xe9, 0x11, + 0x99, 0x00, 0xe9, 0x09, 0x98, 0x00, 0xe9, 0x01, 0x97, 0x00, 0xe8, 0xe1, + 0x94, 0x00, 0x14, 0x03, 0x02, 0x60, 0xd5, 0x92, 0x00, 0xe8, 0xc1, 0x91, + 0x00, 0xe8, 0x7b, 0x02, 0x60, 0xe7, 0x8f, 0x00, 0xe8, 0x69, 0x8d, 0x00, + 0xe8, 0x59, 0x8c, 0x00, 0xe8, 0x51, 0x86, 0x00, 0xe8, 0x29, 0x85, 0x00, + 0xe8, 0x21, 0x84, 0x00, 0x14, 0xcb, 0x02, 0x60, 0xf5, 0x83, 0x00, 0xe8, + 0x03, 0x02, 0x60, 0xfb, 0x89, 0x00, 0x13, 0x13, 0x02, 0x60, 0xff, 0x8b, + 0x00, 0x13, 0x53, 0x02, 0x61, 0x05, 0x90, 0x00, 0x13, 0xa1, 0x9b, 0x00, + 0x14, 0x79, 0x8a, 0x00, 0x14, 0xe1, 0x88, 0x05, 0x39, 0x81, 0x95, 0x05, + 0x39, 0x89, 0x93, 0x05, 0x3d, 0x78, 0xca, 0x45, 0x1d, 0x0e, 0xf8, 0x78, + 0xc4, 0x00, 0x32, 0x0e, 0xf8, 0x71, 0xc6, 0x01, 0x73, 0x00, 0x0d, 0xf0, + 0xd4, 0x01, 0x13, 0x0e, 0xf8, 0x50, 0xd8, 0x23, 0x33, 0x00, 0x15, 0x11, + 0xc8, 0xba, 0xda, 0x00, 0x0d, 0x50, 0xc5, 0x01, 0x0e, 0x00, 0x14, 0xc1, + 0xca, 0x54, 0x9e, 0x00, 0x15, 0x60, 0x9b, 0x00, 0x02, 0xcb, 0x02, 0x61, + 0x0b, 0x8f, 0x00, 0x02, 0x6b, 0x02, 0x61, 0x17, 0x97, 0x00, 0x02, 0xab, + 0x02, 0x61, 0x23, 0x91, 0x00, 0x02, 0x7b, 0x02, 0x61, 0x2d, 0x8b, 0x00, + 0x02, 0x4b, 0x02, 0x61, 0x51, 0x87, 0x00, 0x02, 0x2b, 0x02, 0x61, 0x67, + 0x83, 0x00, 0x02, 0x0b, 0x02, 0x61, 0x8f, 0x95, 0x00, 0x02, 0x9b, 0x02, + 0x61, 0xc5, 0x9c, 0x00, 0x02, 0xd3, 0x02, 0x61, 0xe7, 0x9a, 0x00, 0x02, + 0xc3, 0x02, 0x61, 0xed, 0x99, 0x00, 0x02, 0xbb, 0x02, 0x61, 0xf3, 0x98, + 0x00, 0x02, 0xb3, 0x02, 0x61, 0xff, 0x96, 0x00, 0x02, 0xa3, 0x02, 0x62, + 0x1b, 0x94, 0x00, 0x02, 0x93, 0x02, 0x62, 0x40, 0x92, 0x00, 0x02, 0x83, + 0x02, 0x62, 0x50, 0x90, 0x00, 0x02, 0x73, 0x02, 0x62, 0x56, 0x8e, 0x00, + 0x02, 0x63, 0x02, 0x62, 0x60, 0x8d, 0x00, 0x02, 0x5b, 0x02, 0x62, 0x6a, + 0x8a, 0x00, 0x02, 0x43, 0x02, 0x62, 0x70, 0x89, 0x00, 0x02, 0x3b, 0x02, + 0x62, 0x88, 0x88, 0x00, 0x02, 0x33, 0x02, 0x62, 0xa0, 0x86, 0x00, 0x02, + 0x23, 0x02, 0x62, 0xa6, 0x85, 0x00, 0x02, 0x1b, 0x02, 0x62, 0xb3, 0x84, + 0x00, 0x02, 0x13, 0x02, 0x62, 0xd4, 0x8c, 0x00, 0x02, 0x53, 0x02, 0x62, + 0xe6, 0x93, 0x00, 0x02, 0x8a, 0x02, 0x62, 0xec, 0xc2, 0x00, 0x0b, 0x00, + 0x09, 0x91, 0xc2, 0x49, 0x0c, 0x00, 0x0a, 0x90, 0x42, 0x01, 0x7c, 0xc2, + 0x62, 0xf2, 0x43, 0xe5, 0xc3, 0x42, 0x62, 0xfe, 0xc3, 0x91, 0x00, 0x00, + 0x74, 0x31, 0xc3, 0x1c, 0x63, 0x00, 0x74, 0x49, 0xc3, 0xe5, 0xf0, 0x00, + 0x74, 0x61, 0x10, 0xc2, 0x63, 0x0a, 0x42, 0x02, 0x10, 0xc2, 0x63, 0x16, + 0x06, 0xc2, 0x63, 0x20, 0xc3, 0x39, 0x6d, 0x00, 0x75, 0x01, 0xc3, 0x12, + 0xad, 0x00, 0x75, 0x60, 0xc4, 0xdf, 0x43, 0x00, 0x74, 0xe1, 0xc3, 0x02, + 0x45, 0x00, 0x74, 0xf0, 0xc3, 0x02, 0x45, 0x00, 0x74, 0x51, 0xc4, 0xdf, + 0x43, 0x00, 0x75, 0x50, 0xc2, 0x00, 0xd0, 0x00, 0x75, 0x41, 0xc2, 0x0d, + 0xf6, 0x00, 0x75, 0x48, 0xc4, 0xdf, 0x43, 0x00, 0x74, 0xb1, 0xc3, 0x02, + 0x45, 0x00, 0x74, 0xb8, 0xc2, 0x00, 0x45, 0x00, 0x74, 0xe9, 0xc2, 0x0c, + 0x42, 0x00, 0x74, 0xf8, 0xc3, 0x00, 0x74, 0x00, 0x75, 0x19, 0xc3, 0x65, + 0xba, 0x00, 0x75, 0x28, 0xd1, 0x51, 0xbc, 0x0f, 0xdc, 0xe9, 0xc2, 0x00, + 0x49, 0x01, 0x2f, 0xc8, 0x55, 0x0a, 0x4c, 0xc2, 0x63, 0x2a, 0x48, 0x0a, + 0x53, 0xc2, 0x63, 0x3c, 0x4a, 0x13, 0xe3, 0x42, 0x63, 0x48, 0xc6, 0x04, + 0xe1, 0x0f, 0xda, 0x91, 0xc5, 0x00, 0x2c, 0x0f, 0xda, 0x98, 0xd1, 0x51, + 0xbc, 0x0f, 0xdc, 0xe1, 0xc2, 0x00, 0x49, 0x01, 0x2f, 0xc0, 0xc6, 0x04, + 0xe1, 0x0f, 0xda, 0xb9, 0xc5, 0x00, 0x2c, 0x0f, 0xda, 0xc0, 0x55, 0x16, + 0xaa, 0xc2, 0x63, 0x54, 0x48, 0x0a, 0x53, 0xc2, 0x63, 0x66, 0x4a, 0x13, + 0xe3, 0x42, 0x63, 0x72, 0xd5, 0x35, 0x60, 0x0f, 0xdc, 0xd1, 0xd0, 0x06, + 0xd7, 0x0f, 0xdc, 0x00, 0xe0, 0x08, 0x67, 0x0f, 0xdb, 0x50, 0xe0, 0x0a, + 0x27, 0x0f, 0xdc, 0x90, 0xe0, 0x01, 0xc7, 0x0f, 0xdc, 0x88, 0xd9, 0x1b, + 0xd1, 0x0f, 0xc4, 0xa9, 0xcb, 0x8a, 0x46, 0x01, 0x0f, 0x5b, 0x02, 0x63, + 0x7e, 0xc8, 0xae, 0xbc, 0x01, 0x0f, 0x52, 0x02, 0x63, 0x84, 0xca, 0x03, + 0xdd, 0x0f, 0xc4, 0x89, 0x48, 0x01, 0x9a, 0x42, 0x63, 0x8a, 0xd1, 0x53, + 0x98, 0x01, 0x4a, 0x49, 0xd8, 0x05, 0xcf, 0x01, 0x5f, 0x68, 0x45, 0x00, + 0x8c, 0xc2, 0x63, 0x9f, 0xdc, 0x14, 0x15, 0x01, 0x0e, 0x29, 0xc8, 0xae, + 0xbc, 0x01, 0x0d, 0x29, 0xc6, 0x10, 0x9d, 0x01, 0x48, 0x91, 0xda, 0x1c, + 0x1e, 0x0f, 0xdd, 0xc0, 0xc5, 0x01, 0x4a, 0x01, 0x0d, 0xf9, 0x00, 0x42, + 0x63, 0xcf, 0xc5, 0x01, 0x4a, 0x01, 0x0d, 0xf1, 0x00, 0x42, 0x63, 0xe1, + 0xdb, 0x15, 0xb1, 0x01, 0x19, 0x21, 0xd2, 0x46, 0x6b, 0x01, 0x5d, 0xc8, + 0xd6, 0x31, 0x98, 0x01, 0x52, 0x41, 0xcc, 0x06, 0xbb, 0x01, 0x52, 0x30, + 0xca, 0xa4, 0xcc, 0x01, 0x52, 0x29, 0xc7, 0x80, 0x70, 0x01, 0x52, 0x11, + 0xca, 0x8d, 0xb1, 0x01, 0x52, 0x08, 0xcf, 0x15, 0x36, 0x0f, 0xbd, 0xf1, + 0x42, 0x00, 0xac, 0xc2, 0x63, 0xed, 0x48, 0x0a, 0xa9, 0x42, 0x63, 0xf3, + 0xc8, 0x00, 0xbf, 0x01, 0x3b, 0x11, 0xc6, 0x00, 0x91, 0x01, 0x3a, 0xb8, + 0xc6, 0x02, 0xd1, 0x0f, 0xbc, 0x39, 0xd6, 0x2e, 0xac, 0x01, 0x36, 0xd9, + 0xc6, 0x0b, 0x09, 0x0f, 0xbc, 0x88, 0xdd, 0x10, 0xc0, 0x0f, 0xb3, 0xd9, + 0xc5, 0x13, 0x53, 0x0f, 0xbd, 0x60, 0x4e, 0x47, 0x15, 0xc2, 0x64, 0x05, + 0x45, 0x20, 0x6c, 0x42, 0x64, 0x11, 0x45, 0x01, 0xb4, 0xc2, 0x64, 0x1d, + 0x42, 0x01, 0x0c, 0x42, 0x64, 0x29, 0x49, 0x01, 0xaa, 0xc2, 0x64, 0x35, + 0xc5, 0x01, 0xa2, 0x01, 0x3c, 0xd0, 0xc3, 0xe5, 0x8a, 0x0f, 0xb3, 0x21, + 0xc9, 0xb4, 0x91, 0x0f, 0xb2, 0xe0, 0xc9, 0x8e, 0x0a, 0x0f, 0xaa, 0x39, + 0xca, 0x9c, 0x48, 0x01, 0x5a, 0xa8, 0x48, 0x00, 0x29, 0xc2, 0x64, 0x41, + 0x00, 0x42, 0x64, 0x47, 0x50, 0x01, 0xa9, 0xc2, 0x64, 0x53, 0x51, 0x08, + 0xa9, 0x42, 0x64, 0x5f, 0xd7, 0x28, 0x88, 0x01, 0x3d, 0xd9, 0x46, 0x0a, + 0xef, 0x42, 0x64, 0x6b, 0xca, 0x22, 0x51, 0x0f, 0xbe, 0x99, 0xcd, 0x0e, + 0x61, 0x0f, 0xbe, 0xa0, 0x4b, 0x14, 0xd9, 0xc2, 0x64, 0x77, 0x00, 0x42, + 0x64, 0x89, 0xe0, 0x0c, 0x07, 0x01, 0x3d, 0x70, 0xd5, 0x03, 0xd2, 0x0f, + 0xc0, 0xc9, 0xdb, 0x17, 0x46, 0x0f, 0xc0, 0xe8, 0xe0, 0x0a, 0xa7, 0x01, + 0x3d, 0x40, 0xce, 0x6c, 0x60, 0x01, 0x3a, 0x31, 0xc7, 0xa7, 0xc7, 0x01, + 0x38, 0xa0, 0x46, 0x00, 0x8b, 0xc2, 0x64, 0x95, 0xc9, 0xb2, 0x48, 0x01, + 0x5a, 0xc8, 0xe0, 0x03, 0xa7, 0x01, 0x3d, 0x00, 0x45, 0x00, 0x5a, 0xc2, + 0x64, 0xa1, 0xc9, 0x99, 0x62, 0x0f, 0xa5, 0x91, 0x53, 0x08, 0xa7, 0x42, + 0x64, 0xad, 0xcb, 0x03, 0xbc, 0x01, 0x3c, 0xcb, 0x02, 0x64, 0xb9, 0x50, + 0x01, 0xa9, 0x42, 0x64, 0xbf, 0xc3, 0x05, 0x14, 0x0f, 0xc4, 0xe3, 0x02, + 0x64, 0xcb, 0xca, 0x9d, 0x2e, 0x0f, 0xc4, 0xe8, 0xcf, 0x15, 0x36, 0x0f, + 0xbd, 0x91, 0xd2, 0x22, 0x49, 0x0f, 0xbe, 0x50, 0xc6, 0x7c, 0x7b, 0x0f, + 0xa4, 0xe9, 0xc5, 0x01, 0xa2, 0x0f, 0xa4, 0xc1, 0xcf, 0x64, 0x68, 0x0f, + 0x9c, 0xa0, 0x9e, 0x0d, 0x85, 0x41, 0x9d, 0x0d, 0x85, 0x38, 0x9e, 0x0d, + 0x81, 0x09, 0x9d, 0x0d, 0x81, 0x00, 0xcd, 0x79, 0xb6, 0x07, 0xd8, 0xf9, + 0x47, 0x00, 0x58, 0xc2, 0x64, 0xcf, 0xc7, 0xc1, 0xaf, 0x00, 0x2f, 0x88, + 0x46, 0x00, 0x8b, 0x42, 0x64, 0xdb, 0x46, 0x00, 0x8b, 0x42, 0x64, 0xe7, + 0x46, 0x00, 0x8b, 0x42, 0x64, 0xf3, 0x46, 0x00, 0x8b, 0x42, 0x64, 0xff, + 0xc2, 0x04, 0xad, 0x00, 0x2f, 0x53, 0x02, 0x65, 0x0b, 0xc4, 0xd4, 0xda, + 0x00, 0x2f, 0x33, 0x02, 0x65, 0x11, 0xc2, 0x00, 0x3d, 0x00, 0x2e, 0xc2, + 0x02, 0x65, 0x17, 0xc3, 0x11, 0xef, 0x00, 0x2f, 0x4b, 0x02, 0x65, 0x1d, + 0xc5, 0xdc, 0x2c, 0x00, 0x2f, 0x0a, 0x02, 0x65, 0x23, 0xcc, 0x84, 0x75, + 0x07, 0xda, 0x40, 0xcc, 0x84, 0x75, 0x07, 0xda, 0x38, 0xc2, 0x00, 0x67, + 0x00, 0x2f, 0x1b, 0x02, 0x65, 0x29, 0xc3, 0xba, 0x37, 0x00, 0x2e, 0xd3, + 0x02, 0x65, 0x2f, 0xc5, 0xd4, 0xd9, 0x00, 0x2f, 0x29, 0xc3, 0x20, 0x18, + 0x00, 0x2e, 0xf9, 0xc3, 0x00, 0x4e, 0x00, 0x2e, 0xe8, 0xcc, 0x84, 0x75, + 0x07, 0xda, 0x00, 0xcc, 0x84, 0x75, 0x07, 0xd9, 0xf0, 0xcc, 0x84, 0x75, + 0x07, 0xd9, 0xe0, 0x46, 0x00, 0x8b, 0x42, 0x65, 0x35, 0xcc, 0x84, 0x75, + 0x07, 0xd9, 0xb0, 0xcb, 0x91, 0xa4, 0x07, 0xd9, 0xa1, 0x96, 0x00, 0x2e, + 0xb8, 0xcc, 0x84, 0x75, 0x07, 0xd9, 0x98, 0xcc, 0x84, 0x75, 0x07, 0xd9, + 0x90, 0x0e, 0xc2, 0x65, 0x41, 0xc3, 0x16, 0x5a, 0x00, 0x2f, 0x10, 0xc3, + 0x22, 0x14, 0x07, 0xd9, 0x41, 0xc4, 0x5d, 0xe2, 0x07, 0xd9, 0x39, 0xc9, + 0xb4, 0xb5, 0x07, 0xd9, 0x31, 0xc5, 0xa2, 0x83, 0x07, 0xd9, 0x29, 0xc3, + 0xba, 0x37, 0x07, 0xd9, 0x21, 0xc2, 0x01, 0x7f, 0x07, 0xd9, 0x19, 0xc5, + 0x40, 0x9a, 0x07, 0xd9, 0x11, 0xc4, 0x06, 0x5a, 0x07, 0xd9, 0x08, 0xc5, + 0xcc, 0xe4, 0x00, 0x2d, 0xc3, 0x02, 0x65, 0x50, 0xc5, 0xd8, 0xfd, 0x00, + 0x2d, 0xd8, 0xc6, 0x44, 0x50, 0x00, 0x2e, 0x11, 0x0a, 0xc2, 0x65, 0x56, + 0xc4, 0xa0, 0x89, 0x00, 0x2d, 0xb0, 0xc4, 0xd5, 0xa7, 0x00, 0x2d, 0xcb, + 0x02, 0x65, 0x62, 0xc4, 0xd5, 0x84, 0x00, 0x2d, 0xa1, 0x45, 0xd5, 0xb5, + 0x42, 0x65, 0x68, 0xc6, 0xcb, 0x63, 0x00, 0x2f, 0xa1, 0xc3, 0x26, 0x1a, + 0x00, 0x2f, 0x98, 0xc3, 0x0f, 0x99, 0x00, 0x2c, 0xc1, 0x44, 0xe3, 0xeb, + 0x42, 0x65, 0x7a, 0x46, 0xcf, 0x7d, 0xc2, 0x65, 0x86, 0xc3, 0x1e, 0x95, + 0x00, 0x2c, 0xd8, 0xc7, 0xc5, 0xad, 0x00, 0x2c, 0xe8, 0xc7, 0xc5, 0xfa, + 0x00, 0x2d, 0x30, 0xce, 0x73, 0xc2, 0x02, 0x6e, 0x01, 0xcc, 0x83, 0x31, + 0x02, 0x6e, 0xe9, 0xc7, 0xc4, 0x8e, 0x02, 0x6f, 0x88, 0x14, 0xc2, 0x65, + 0x92, 0xcc, 0x8b, 0xf5, 0x02, 0x6e, 0xe0, 0xc3, 0x08, 0x93, 0x02, 0x6f, + 0x79, 0xc7, 0xc8, 0xf5, 0x02, 0x6f, 0xb8, 0x12, 0xc2, 0x65, 0x9e, 0xc6, + 0xd3, 0x37, 0x02, 0x6e, 0xc8, 0xc7, 0xc9, 0x3b, 0x01, 0x5e, 0x19, 0xc7, + 0xc2, 0xc0, 0x01, 0x59, 0x18, 0xc7, 0x33, 0xdf, 0x00, 0x00, 0x4b, 0x02, + 0x65, 0xa8, 0xc4, 0x3b, 0x19, 0x01, 0x5b, 0xf0, 0x95, 0x0f, 0x9e, 0xc0, + 0xc4, 0x18, 0x10, 0x08, 0x69, 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0x69, 0xb0, + 0xc3, 0x0d, 0x14, 0x08, 0x69, 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0x69, 0xa0, + 0xc4, 0x02, 0xde, 0x08, 0x69, 0x99, 0xc2, 0x02, 0xa0, 0x08, 0x69, 0x90, + 0xc3, 0x0d, 0x23, 0x08, 0x69, 0x39, 0xc2, 0x00, 0xc1, 0x08, 0x69, 0x31, + 0xc4, 0x75, 0x13, 0x08, 0x69, 0x28, 0xc2, 0x19, 0x2c, 0x08, 0x68, 0xd9, + 0xc2, 0x01, 0x30, 0x08, 0x68, 0xd1, 0x83, 0x08, 0x68, 0xa8, 0x45, 0xd4, + 0x11, 0xc2, 0x65, 0xac, 0x83, 0x08, 0x68, 0x89, 0xc2, 0x00, 0xd0, 0x08, + 0x68, 0x40, 0xc2, 0x00, 0x39, 0x08, 0x68, 0x69, 0x83, 0x08, 0x68, 0x60, + 0xc2, 0x0e, 0x9a, 0x08, 0x68, 0x59, 0x83, 0x08, 0x68, 0x50, 0xc2, 0x01, + 0x6f, 0x08, 0x68, 0x21, 0x83, 0x08, 0x68, 0x18, 0x83, 0x08, 0x68, 0x79, + 0xc2, 0x00, 0xd0, 0x08, 0x68, 0x80, 0x83, 0x00, 0xb9, 0x41, 0xc2, 0x01, + 0x30, 0x00, 0xb9, 0x28, 0xc5, 0xd6, 0x8c, 0x00, 0x88, 0x2b, 0x02, 0x65, + 0xb8, 0x15, 0xc2, 0x65, 0xbc, 0xc5, 0x90, 0xe4, 0x00, 0x88, 0x93, 0x02, + 0x65, 0xcb, 0x12, 0xc2, 0x65, 0xd1, 0xc5, 0xb7, 0x9d, 0x00, 0x88, 0x5b, + 0x02, 0x65, 0xe9, 0xc5, 0xda, 0xe7, 0x00, 0x88, 0x33, 0x02, 0x65, 0xed, + 0x16, 0xc2, 0x65, 0xf1, 0x0d, 0xc2, 0x66, 0x00, 0xc5, 0xd9, 0x61, 0x00, + 0x88, 0x13, 0x02, 0x66, 0x15, 0x05, 0xc2, 0x66, 0x19, 0x42, 0x0c, 0x43, + 0xc2, 0x66, 0x2e, 0xc6, 0x92, 0x0c, 0x00, 0x8a, 0xf8, 0x49, 0xb4, 0x76, + 0xc2, 0x66, 0x3a, 0x49, 0xad, 0x02, 0x42, 0x66, 0x71, 0x0d, 0xc2, 0x66, + 0xb8, 0x15, 0xc2, 0x66, 0xcd, 0xc5, 0xd9, 0x61, 0x01, 0x89, 0xa3, 0x02, + 0x66, 0xdc, 0x16, 0xc2, 0x66, 0xe0, 0xc5, 0xd6, 0x8c, 0x01, 0x89, 0xcb, + 0x02, 0x66, 0xec, 0xc5, 0xda, 0xe7, 0x01, 0x8a, 0x0b, 0x02, 0x66, 0xf0, + 0x12, 0xc2, 0x66, 0xf4, 0x8b, 0x01, 0x8b, 0x1b, 0x02, 0x67, 0x09, 0x05, + 0xc2, 0x67, 0x0f, 0xc5, 0x90, 0xe4, 0x01, 0x8a, 0x71, 0x83, 0x01, 0x8a, + 0x7b, 0x02, 0x67, 0x1b, 0x1b, 0xc2, 0x67, 0x28, 0x87, 0x01, 0x8a, 0xa3, + 0x02, 0x67, 0x48, 0x91, 0x01, 0x8a, 0xbb, 0x02, 0x67, 0x50, 0x19, 0xc2, + 0x67, 0x54, 0x97, 0x01, 0x8a, 0xe0, 0x19, 0xc2, 0x67, 0x66, 0x0a, 0xc2, + 0x67, 0x70, 0xc2, 0x00, 0xc4, 0x01, 0x81, 0xc0, 0xc3, 0x09, 0x9e, 0x01, + 0x81, 0x21, 0xc3, 0x0d, 0x14, 0x01, 0x81, 0x28, 0xc2, 0x22, 0xcc, 0x01, + 0x81, 0x31, 0xc4, 0x18, 0x10, 0x01, 0x81, 0x38, 0xc8, 0x0d, 0x03, 0x08, + 0x47, 0xf8, 0xc5, 0x28, 0xee, 0x08, 0x47, 0xf1, 0xc2, 0x00, 0xc4, 0x08, + 0x47, 0xe8, 0xc2, 0x39, 0x8b, 0x08, 0x47, 0xa9, 0xc3, 0x1e, 0x1b, 0x08, + 0x47, 0x40, 0xc3, 0x11, 0xef, 0x08, 0x47, 0xa1, 0x03, 0x42, 0x67, 0x7c, + 0xc2, 0x17, 0xb6, 0x08, 0x47, 0x79, 0xc4, 0x36, 0xb5, 0x08, 0x47, 0x00, + 0xc2, 0x00, 0x8e, 0x08, 0x47, 0x38, 0x19, 0xc2, 0x67, 0x88, 0x15, 0xc2, + 0x67, 0x90, 0x83, 0x07, 0xfb, 0x89, 0x8b, 0x07, 0xfb, 0x91, 0x97, 0x07, + 0xfb, 0x99, 0x87, 0x07, 0xfb, 0xa1, 0x91, 0x07, 0xfb, 0xa9, 0x0d, 0xc2, + 0x67, 0xaa, 0x16, 0xc2, 0x67, 0xbe, 0x90, 0x07, 0xfc, 0xeb, 0x02, 0x67, + 0xd2, 0x0a, 0xc2, 0x67, 0xe6, 0x0f, 0xc2, 0x67, 0xfa, 0x1b, 0xc2, 0x68, + 0x0e, 0x14, 0x42, 0x68, 0x1a, 0xc5, 0x8e, 0xdf, 0x07, 0xfd, 0x0b, 0x02, + 0x68, 0x2e, 0xc6, 0xbb, 0xec, 0x07, 0xfd, 0xd8, 0x44, 0x3a, 0xbf, 0xc2, + 0x68, 0x34, 0xc3, 0x39, 0x37, 0x07, 0xfd, 0xa8, 0x02, 0x42, 0x68, 0x52, + 0xc4, 0x79, 0xf3, 0x07, 0xfd, 0x93, 0x02, 0x68, 0x74, 0xc6, 0xba, 0x7c, + 0x07, 0xfd, 0xe8, 0xc4, 0xb7, 0x9e, 0x07, 0xfd, 0xb8, 0xc4, 0xc6, 0x7a, + 0x07, 0xfd, 0xc1, 0xc6, 0xc6, 0x79, 0x07, 0xfd, 0xd0, 0xc6, 0xc1, 0x86, + 0x07, 0xfd, 0xe1, 0xc5, 0xc0, 0x7d, 0x07, 0xfd, 0x38, 0x87, 0x07, 0xfe, + 0x18, 0x83, 0x07, 0xfe, 0x23, 0x02, 0x68, 0x7a, 0x87, 0x07, 0xfe, 0x5b, + 0x02, 0x68, 0x7e, 0x91, 0x07, 0xfe, 0x91, 0x97, 0x07, 0xfe, 0xb9, 0x8b, + 0x07, 0xfe, 0xd8, 0x91, 0x07, 0xfe, 0x31, 0x97, 0x07, 0xfe, 0xd0, 0x87, + 0x07, 0xfe, 0x78, 0x83, 0x07, 0xfe, 0x6b, 0x02, 0x68, 0x82, 0x87, 0x07, + 0xfe, 0xab, 0x02, 0x68, 0x86, 0x8b, 0x07, 0xfe, 0xb0, 0x02, 0x42, 0x68, + 0x8a, 0xc2, 0x0c, 0x43, 0x0d, 0x80, 0x09, 0xc2, 0x14, 0x68, 0x0d, 0x88, + 0xf8, 0x19, 0xc2, 0x68, 0x96, 0x83, 0x01, 0x82, 0x09, 0x8b, 0x01, 0x82, + 0x19, 0x97, 0x01, 0x82, 0x29, 0x87, 0x01, 0x82, 0x39, 0x91, 0x01, 0x82, + 0x49, 0xc2, 0x00, 0x16, 0x01, 0x83, 0x19, 0x1b, 0xc2, 0x68, 0xa6, 0x0d, + 0x42, 0x68, 0xb2, 0xcd, 0x78, 0xcc, 0x0f, 0xdc, 0xb1, 0xc5, 0x01, 0xc2, + 0x0f, 0xdd, 0x88, 0xe0, 0x08, 0xa7, 0x0f, 0xdd, 0xa0, 0xc5, 0x68, 0x6e, + 0x01, 0x11, 0xf1, 0xc9, 0xaf, 0x4b, 0x01, 0x72, 0x2a, 0x02, 0x68, 0xba, + 0xc6, 0xca, 0xcd, 0x07, 0xff, 0xc9, 0xc9, 0x1b, 0x0a, 0x07, 0xff, 0xd1, + 0xca, 0x7c, 0x02, 0x07, 0xff, 0xd8, 0x43, 0x13, 0x6d, 0xc2, 0x68, 0xc0, + 0x46, 0x00, 0xd4, 0xc2, 0x68, 0xc6, 0x45, 0x00, 0x8c, 0x42, 0x68, 0xd2, + 0x42, 0x05, 0x1d, 0xc2, 0x68, 0xe4, 0xc7, 0x80, 0x70, 0x01, 0x50, 0xd9, + 0xcc, 0x06, 0xbb, 0x01, 0x50, 0xc9, 0xca, 0x9d, 0xb0, 0x01, 0x50, 0xc1, + 0xd9, 0x1f, 0x4a, 0x01, 0x50, 0xb9, 0xcd, 0x75, 0xa6, 0x01, 0x50, 0x70, + 0xd6, 0x30, 0xa6, 0x01, 0x50, 0xa9, 0xd1, 0x56, 0x40, 0x01, 0x50, 0x78, + 0xc3, 0x05, 0x14, 0x08, 0x5b, 0xc3, 0x02, 0x68, 0xf0, 0x16, 0xc2, 0x68, + 0xf4, 0xc4, 0x09, 0x9d, 0x08, 0x5b, 0xd8, 0x16, 0xc2, 0x69, 0x04, 0x15, + 0xc2, 0x69, 0x10, 0xc2, 0x00, 0x67, 0x08, 0x5b, 0x79, 0xc3, 0x20, 0x18, + 0x08, 0x5b, 0x69, 0xc8, 0xb9, 0x7a, 0x08, 0x5b, 0x61, 0xc6, 0xcf, 0xd7, + 0x08, 0x5b, 0x59, 0xc4, 0xe0, 0xe7, 0x08, 0x5b, 0x51, 0xc4, 0x4a, 0xb9, + 0x08, 0x5b, 0x49, 0xc2, 0x01, 0x7f, 0x08, 0x5b, 0x23, 0x02, 0x69, 0x1a, + 0xc5, 0x4a, 0xb3, 0x08, 0x5b, 0x31, 0xcd, 0x7e, 0x89, 0x08, 0x5b, 0x29, + 0xc6, 0x40, 0x9a, 0x08, 0x5b, 0x19, 0xc5, 0x9c, 0xa2, 0x08, 0x5b, 0x11, + 0xc4, 0xe3, 0x27, 0x08, 0x5b, 0x09, 0xc5, 0xa5, 0xfd, 0x08, 0x5b, 0x00, + 0xc3, 0x05, 0x14, 0x08, 0x5a, 0xc3, 0x02, 0x69, 0x20, 0x16, 0xc2, 0x69, + 0x24, 0xc4, 0x09, 0x9d, 0x08, 0x5a, 0xd8, 0x16, 0xc2, 0x69, 0x34, 0x15, + 0xc2, 0x69, 0x40, 0xc4, 0x5d, 0xe2, 0x08, 0x5a, 0x99, 0xc3, 0x00, 0x4e, + 0x08, 0x5a, 0x61, 0xc6, 0xcf, 0xd7, 0x08, 0x5a, 0x59, 0xc4, 0xe0, 0xe7, + 0x08, 0x5a, 0x51, 0xc4, 0x4a, 0xb9, 0x08, 0x5a, 0x49, 0xc2, 0x01, 0x7f, + 0x08, 0x5a, 0x23, 0x02, 0x69, 0x4a, 0xc5, 0x4a, 0xb3, 0x08, 0x5a, 0x31, + 0xc3, 0x7e, 0x89, 0x08, 0x5a, 0x29, 0xc6, 0x40, 0x9a, 0x08, 0x5a, 0x19, + 0xc5, 0x9c, 0xa2, 0x08, 0x5a, 0x11, 0xc4, 0xe3, 0x27, 0x08, 0x5a, 0x09, + 0x03, 0xc2, 0x69, 0x50, 0xc3, 0x20, 0x18, 0x08, 0x5a, 0x69, 0xc2, 0x00, + 0x67, 0x08, 0x5a, 0x81, 0xc4, 0xb9, 0x7e, 0x08, 0x5a, 0x90, 0xc3, 0x05, + 0x14, 0x00, 0x00, 0xf9, 0x16, 0xc2, 0x69, 0x5c, 0xc4, 0x09, 0x9d, 0x00, + 0x00, 0xe0, 0x4a, 0x0c, 0x8c, 0xc2, 0x69, 0x68, 0x49, 0x44, 0xee, 0xc2, + 0x69, 0x72, 0xc5, 0xdc, 0xa9, 0x0f, 0x65, 0x0b, 0x02, 0x69, 0x90, 0xc4, + 0x41, 0x55, 0x0f, 0x64, 0xf3, 0x02, 0x69, 0x96, 0xc4, 0x26, 0x78, 0x0f, + 0x63, 0xcb, 0x02, 0x69, 0x9c, 0xc5, 0x06, 0xdb, 0x0f, 0x63, 0xc3, 0x02, + 0x69, 0xa9, 0x15, 0xc2, 0x69, 0xb4, 0x08, 0xc2, 0x69, 0xc6, 0x16, 0xc2, + 0x69, 0xce, 0xc3, 0x05, 0x14, 0x0f, 0x63, 0x8a, 0x02, 0x69, 0xdf, 0xce, + 0x08, 0x73, 0x0f, 0x65, 0x79, 0x44, 0x05, 0x14, 0x42, 0x69, 0xe3, 0xc3, + 0x0d, 0x14, 0x0e, 0x9b, 0xb1, 0xc3, 0x09, 0x9e, 0x0e, 0x9b, 0xa8, 0xc4, + 0x02, 0xde, 0x0e, 0x9b, 0xa1, 0xc2, 0x02, 0xa0, 0x0e, 0x9b, 0x98, 0x0c, + 0xc2, 0x69, 0xef, 0xc8, 0xb6, 0x8a, 0x01, 0x96, 0x09, 0x42, 0x01, 0xc3, + 0xc2, 0x69, 0xf9, 0x03, 0xc2, 0x6a, 0x03, 0xc9, 0xa8, 0xee, 0x01, 0x96, + 0x41, 0xc7, 0xc9, 0x2d, 0x01, 0x96, 0x49, 0xc8, 0xbc, 0x22, 0x01, 0x96, + 0x51, 0x06, 0xc2, 0x6a, 0x0f, 0x45, 0xd6, 0x19, 0x42, 0x6a, 0x1b, 0xc5, + 0x00, 0x2c, 0x01, 0x7f, 0x81, 0xd0, 0x5d, 0x62, 0x01, 0x7f, 0x90, 0xc5, + 0x05, 0x02, 0x01, 0x7f, 0x89, 0xd0, 0x5d, 0x72, 0x01, 0x7f, 0x98, 0xc5, + 0x00, 0xd4, 0x01, 0x7f, 0xa9, 0xc5, 0x05, 0x02, 0x01, 0x7f, 0xb1, 0x0e, + 0xc2, 0x6a, 0x40, 0x46, 0x02, 0xae, 0x42, 0x6a, 0x4c, 0xc8, 0xbd, 0x1a, + 0x01, 0x8c, 0xa1, 0xc8, 0xb6, 0x72, 0x01, 0x8c, 0xd8, 0xc5, 0x01, 0xc2, + 0x01, 0x8c, 0xa9, 0xc7, 0x36, 0x55, 0x01, 0x8c, 0xe0, 0xc2, 0x00, 0xc4, + 0x08, 0x42, 0xdb, 0x02, 0x6a, 0x58, 0x19, 0xc2, 0x6a, 0x5e, 0xc4, 0x02, + 0xde, 0x08, 0x42, 0xd0, 0x00, 0x42, 0x6a, 0x68, 0xc2, 0x39, 0x8b, 0x08, + 0x42, 0xa9, 0xc3, 0x1e, 0x1b, 0x08, 0x42, 0x40, 0xc3, 0x11, 0xef, 0x08, + 0x42, 0xa1, 0x03, 0x42, 0x6a, 0x74, 0xc3, 0x16, 0x5a, 0x08, 0x42, 0x79, + 0xc4, 0x36, 0xb5, 0x08, 0x42, 0x00, 0xc2, 0x00, 0x8e, 0x08, 0x42, 0x38, + 0xca, 0xa7, 0x92, 0x0f, 0xd2, 0x43, 0x02, 0x6a, 0x80, 0xc4, 0xde, 0x83, + 0x01, 0x32, 0xb3, 0x02, 0x6a, 0x86, 0xc4, 0xe3, 0x93, 0x01, 0x32, 0xcb, + 0x02, 0x6a, 0x8c, 0x0d, 0xc2, 0x6a, 0x92, 0xc6, 0xca, 0xfd, 0x01, 0x32, + 0xbb, 0x02, 0x6a, 0xa4, 0xc5, 0xa8, 0xf7, 0x01, 0x32, 0xab, 0x02, 0x6a, + 0xaa, 0x47, 0x45, 0x86, 0x42, 0x6a, 0xb0, 0x00, 0x42, 0x6a, 0xcc, 0x46, + 0x00, 0x8b, 0x42, 0x6a, 0xd8, 0x03, 0xc2, 0x6a, 0xe4, 0xc5, 0xc2, 0xc2, + 0x01, 0x59, 0x08, 0xc7, 0xc6, 0xef, 0x01, 0x4e, 0xb1, 0xd0, 0x5a, 0x62, + 0x01, 0x59, 0x68, 0x00, 0x42, 0x6a, 0xf3, 0x00, 0x42, 0x6b, 0x05, 0xca, + 0x82, 0xd3, 0x01, 0x31, 0xd1, 0x44, 0x03, 0x15, 0x42, 0x6b, 0x14, 0xc9, + 0x8e, 0x0a, 0x0f, 0xaa, 0x31, 0xca, 0x9d, 0x1a, 0x01, 0x58, 0xe0, 0x00, + 0xc2, 0x6b, 0x1e, 0x4a, 0x01, 0xa9, 0x42, 0x6b, 0x2a, 0xe0, 0x0a, 0xc7, + 0x0f, 0xbd, 0x00, 0x00, 0x42, 0x6b, 0x3c, 0xc4, 0x5b, 0x26, 0x01, 0x36, + 0x09, 0xc3, 0x12, 0xb8, 0x01, 0x36, 0x00, 0x4a, 0x03, 0x3d, 0xc2, 0x6b, + 0x54, 0x4a, 0x01, 0xa9, 0x42, 0x6b, 0x66, 0x46, 0x01, 0x94, 0xc2, 0x6b, + 0x72, 0xc7, 0xc4, 0x80, 0x01, 0x1f, 0x10, 0x11, 0xc2, 0x6b, 0x78, 0xc2, + 0x00, 0xb3, 0x01, 0x34, 0x82, 0x02, 0x6b, 0x84, 0xc4, 0x0e, 0x6a, 0x01, + 0x39, 0x39, 0xc4, 0x11, 0xa4, 0x01, 0x5e, 0x70, 0x4a, 0x03, 0x3d, 0xc2, + 0x6b, 0x8a, 0x4a, 0x01, 0xa9, 0x42, 0x6b, 0x96, 0xc5, 0x06, 0x82, 0x01, + 0x30, 0xe9, 0xce, 0x24, 0xd5, 0x0f, 0xa2, 0x30, 0xc8, 0x01, 0x92, 0x01, + 0x2d, 0x9b, 0x02, 0x6b, 0xa6, 0xce, 0x6c, 0x8a, 0x01, 0x2d, 0xa9, 0xc7, + 0xc6, 0x6a, 0x0f, 0xde, 0x50, 0x15, 0xc2, 0x6b, 0xac, 0xc7, 0x3a, 0x19, + 0x01, 0x59, 0x31, 0xc7, 0x0a, 0xe0, 0x01, 0x59, 0x40, 0xc4, 0x2b, 0xf1, + 0x0f, 0x9f, 0x89, 0xc5, 0xbb, 0xcd, 0x01, 0x59, 0x00, 0xc9, 0x46, 0x70, + 0x01, 0x2d, 0x79, 0xc3, 0x01, 0x5d, 0x01, 0x57, 0xf1, 0xc7, 0x5a, 0x6b, + 0x01, 0x59, 0x78, 0xc4, 0x18, 0x10, 0x0f, 0x17, 0xb9, 0xc2, 0x22, 0xcc, + 0x0f, 0x17, 0xb0, 0xc3, 0x0d, 0x14, 0x0f, 0x17, 0xa9, 0xc3, 0x09, 0x9e, + 0x0f, 0x17, 0xa0, 0xc4, 0x02, 0xde, 0x0f, 0x17, 0x99, 0xc2, 0x02, 0xa0, + 0x0f, 0x17, 0x90, 0xc2, 0x00, 0xec, 0x0f, 0x17, 0x78, 0xc2, 0x00, 0xec, + 0x0f, 0x17, 0x68, 0xc2, 0x14, 0x77, 0x0f, 0x17, 0x59, 0x83, 0x0f, 0x16, + 0x30, 0xc2, 0x00, 0xc4, 0x0f, 0x17, 0x50, 0xc2, 0x19, 0x2c, 0x0f, 0x17, + 0x49, 0xc2, 0x01, 0x30, 0x0f, 0x16, 0xe9, 0x83, 0x0f, 0x16, 0x48, 0x83, + 0x0f, 0x16, 0x03, 0x02, 0x6b, 0xbe, 0xc2, 0x00, 0x75, 0x0f, 0x17, 0x21, + 0x97, 0x0f, 0x16, 0xb0, 0x90, 0x0f, 0x17, 0x38, 0x90, 0x0f, 0x17, 0x32, + 0x02, 0x6b, 0xc5, 0xc2, 0x00, 0x75, 0x0f, 0x17, 0x28, 0xc2, 0x00, 0x39, + 0x0f, 0x17, 0x09, 0xc2, 0x0d, 0xf6, 0x0f, 0x17, 0x01, 0xc2, 0x00, 0xd0, + 0x0f, 0x16, 0x61, 0x83, 0x0f, 0x16, 0x58, 0xc3, 0x64, 0x58, 0x0f, 0x16, + 0xf9, 0x83, 0x0f, 0x16, 0x40, 0xc2, 0x00, 0xd0, 0x0f, 0x16, 0xc9, 0x83, + 0x0f, 0x16, 0xa0, 0xc2, 0x00, 0xd0, 0x0f, 0x16, 0x79, 0x83, 0x0f, 0x16, + 0x70, 0x83, 0x0f, 0x16, 0x51, 0xc2, 0x00, 0xd0, 0x0f, 0x16, 0x38, 0xc6, + 0x18, 0x10, 0x08, 0xc7, 0x81, 0xc4, 0xd2, 0x1d, 0x08, 0xc7, 0x78, 0xc4, + 0x45, 0x6a, 0x08, 0xc7, 0x71, 0xc4, 0x4a, 0x2e, 0x08, 0xc7, 0x68, 0xc5, + 0x0d, 0x0d, 0x08, 0xc7, 0x61, 0xc5, 0x28, 0xee, 0x08, 0xc7, 0x59, 0xc2, + 0x00, 0xc4, 0x08, 0xc7, 0x50, 0xc4, 0x18, 0x10, 0x08, 0xc7, 0x39, 0xc2, + 0x22, 0xcc, 0x08, 0xc7, 0x30, 0xc3, 0x0d, 0x14, 0x08, 0xc7, 0x29, 0xc3, + 0x09, 0x9e, 0x08, 0xc7, 0x20, 0xc4, 0x02, 0xde, 0x08, 0xc7, 0x19, 0xc2, + 0x02, 0xa0, 0x08, 0xc7, 0x10, 0xc2, 0x25, 0x9f, 0x08, 0xc6, 0xf1, 0xc3, + 0xe5, 0xed, 0x08, 0xc6, 0xe8, 0xc2, 0x00, 0xb1, 0x08, 0xc6, 0xe1, 0x11, + 0xc2, 0x6b, 0xc9, 0xc3, 0xbe, 0x83, 0x08, 0xc6, 0xc8, 0x8f, 0x08, 0xc6, + 0xb1, 0x96, 0x08, 0xc6, 0xa9, 0xc2, 0x00, 0x75, 0x08, 0xc6, 0x50, 0xc3, + 0x38, 0x86, 0x08, 0xc6, 0x99, 0xc3, 0x4f, 0x37, 0x08, 0xc6, 0x00, 0xc2, + 0x04, 0xcd, 0x08, 0xc6, 0x88, 0x10, 0x42, 0x6b, 0xd5, 0x85, 0x08, 0xc6, + 0x79, 0x97, 0x08, 0xc6, 0x38, 0x97, 0x08, 0xc6, 0x1b, 0x02, 0x6b, 0xdd, + 0x91, 0x08, 0xc6, 0x29, 0x83, 0x08, 0xc6, 0x20, 0xc2, 0x25, 0x9f, 0x08, + 0xc5, 0xf1, 0xc3, 0xe5, 0xed, 0x08, 0xc5, 0xe8, 0xc2, 0x00, 0xb1, 0x08, + 0xc5, 0xe1, 0x11, 0xc2, 0x6b, 0xe1, 0xc3, 0xbe, 0x83, 0x08, 0xc5, 0xc8, + 0x8f, 0x08, 0xc5, 0xb1, 0x96, 0x08, 0xc5, 0xa9, 0xc2, 0x00, 0x75, 0x08, + 0xc5, 0x50, 0xc3, 0x38, 0x86, 0x08, 0xc5, 0x99, 0xc3, 0x4f, 0x37, 0x08, + 0xc5, 0x00, 0xc2, 0x04, 0xcd, 0x08, 0xc5, 0x88, 0x10, 0x42, 0x6b, 0xed, + 0x85, 0x08, 0xc5, 0x79, 0x97, 0x08, 0xc5, 0x38, 0x97, 0x08, 0xc5, 0x1b, + 0x02, 0x6b, 0xf5, 0x91, 0x08, 0xc5, 0x29, 0x83, 0x08, 0xc5, 0x20, 0xd3, + 0x46, 0x7d, 0x01, 0x39, 0x29, 0x43, 0x00, 0xbf, 0x42, 0x6b, 0xf9, 0xc4, + 0x01, 0xc3, 0x01, 0x02, 0xd9, 0xcb, 0x05, 0x1c, 0x01, 0x02, 0xc0, 0x12, + 0xc2, 0x6b, 0xff, 0xcc, 0x88, 0x1d, 0x0f, 0xc8, 0xa9, 0x16, 0xc2, 0x6c, + 0x11, 0x11, 0xc2, 0x6c, 0x1d, 0xcf, 0x60, 0x99, 0x0f, 0xb2, 0x29, 0xcc, + 0x87, 0x75, 0x0f, 0xb2, 0x21, 0xd0, 0x5a, 0xf2, 0x0f, 0xb0, 0xdb, 0x02, + 0x6c, 0x2f, 0x42, 0x00, 0x99, 0xc2, 0x6c, 0x35, 0xcf, 0x67, 0x0b, 0x0f, + 0xb1, 0x21, 0x0f, 0xc2, 0x6c, 0x41, 0xdb, 0x17, 0x7c, 0x0f, 0xc9, 0x59, + 0xda, 0x1b, 0xea, 0x0f, 0xcb, 0xa1, 0xce, 0x6d, 0x6a, 0x0f, 0xd7, 0x20, + 0xcf, 0x36, 0xc5, 0x01, 0x49, 0x61, 0xd0, 0x20, 0x66, 0x01, 0x49, 0x78, + 0xc4, 0x26, 0x78, 0x07, 0xf8, 0xc9, 0xc4, 0x15, 0xe7, 0x07, 0xf8, 0x81, + 0xc3, 0x05, 0x14, 0x07, 0xf8, 0x89, 0x16, 0xc2, 0x6c, 0x4d, 0x08, 0xc2, + 0x6c, 0x59, 0x15, 0xc2, 0x6c, 0x65, 0xc5, 0x06, 0xdb, 0x07, 0xf8, 0xc0, + 0xc3, 0x0d, 0xe5, 0x07, 0xf8, 0xd1, 0x42, 0x0a, 0x8c, 0x42, 0x6c, 0x71, + 0xcc, 0x8b, 0x11, 0x07, 0xf8, 0xe1, 0x43, 0x00, 0x4b, 0x42, 0x6c, 0x7b, + 0x4f, 0x0b, 0x17, 0xc2, 0x6c, 0x93, 0x4d, 0x29, 0xb9, 0x42, 0x6c, 0xfb, + 0xce, 0x25, 0xad, 0x07, 0xf9, 0xe9, 0xcd, 0x00, 0x32, 0x07, 0xfa, 0xe9, + 0xd1, 0x4f, 0x7a, 0x07, 0xfb, 0x01, 0xcb, 0x1a, 0x50, 0x07, 0xf8, 0x48, + 0xc9, 0xb2, 0xa2, 0x0f, 0x98, 0xd9, 0xc6, 0x00, 0x91, 0x0f, 0x98, 0x98, + 0x44, 0x1a, 0xce, 0xc2, 0x6d, 0x63, 0xc3, 0x01, 0xe2, 0x0b, 0x79, 0x90, + 0xa5, 0x0b, 0x7c, 0xc9, 0xa4, 0x0b, 0x7c, 0xc1, 0xa3, 0x0b, 0x7c, 0xb9, + 0xa2, 0x0b, 0x7c, 0xb1, 0xa1, 0x0b, 0x7c, 0xa9, 0xa0, 0x0b, 0x7c, 0xa1, + 0x9f, 0x0b, 0x7c, 0x98, 0x87, 0x0b, 0x7a, 0x49, 0x83, 0x0b, 0x79, 0xb9, + 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0x71, 0xc2, 0x0d, 0xf6, 0x0b, 0x79, 0x50, + 0xc2, 0x19, 0x2c, 0x0b, 0x78, 0xe1, 0x83, 0x0b, 0x78, 0xd0, 0xca, 0x56, + 0xca, 0x0b, 0x7a, 0x80, 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0x69, 0x83, 0x0b, + 0x79, 0x60, 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0x21, 0x83, 0x0b, 0x79, 0x18, + 0xc2, 0x00, 0xd0, 0x0b, 0x78, 0xa9, 0x83, 0x0b, 0x78, 0xa0, 0xc2, 0x16, + 0x5a, 0x0b, 0x7a, 0x39, 0x83, 0x0b, 0x79, 0xc1, 0xc2, 0x00, 0xd0, 0x0b, + 0x79, 0x79, 0xc2, 0x02, 0x1c, 0x0b, 0x79, 0x58, 0xc2, 0x19, 0x2c, 0x0b, + 0x78, 0xe9, 0x83, 0x0b, 0x78, 0xd8, 0xc3, 0x90, 0x65, 0x0b, 0x79, 0xf9, + 0x10, 0xc2, 0x6d, 0x7b, 0xc2, 0x01, 0xc3, 0x0b, 0x78, 0x30, 0x15, 0xc2, + 0x6d, 0x85, 0xc2, 0x19, 0x2c, 0x0b, 0x7a, 0x01, 0x83, 0x0b, 0x79, 0xe8, + 0x83, 0x0b, 0x79, 0xe1, 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0xb0, 0x15, 0xc2, + 0x6d, 0x8f, 0x83, 0x0b, 0x78, 0x69, 0xc2, 0x01, 0x6f, 0x0b, 0x78, 0x60, + 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0x49, 0x83, 0x0b, 0x79, 0x40, 0xc2, 0x19, + 0x2c, 0x0b, 0x78, 0xc9, 0x83, 0x0b, 0x78, 0xc0, 0x90, 0x0b, 0x7b, 0x62, + 0x02, 0x6d, 0x99, 0xc2, 0x00, 0x75, 0x0b, 0x7c, 0x30, 0x90, 0x0b, 0x7b, + 0x1a, 0x02, 0x6d, 0x9d, 0x94, 0x0b, 0x7b, 0xa8, 0x89, 0x0b, 0x7a, 0xf8, + 0x94, 0x0b, 0x7c, 0x11, 0x9b, 0x0b, 0x7b, 0x00, 0x87, 0x0b, 0x7b, 0xa0, + 0x89, 0x0b, 0x7a, 0xc0, 0x00, 0x42, 0x6d, 0xa1, 0xcd, 0x0e, 0x61, 0x0f, + 0xbe, 0x19, 0xca, 0x22, 0x51, 0x0f, 0xbe, 0x08, 0xc6, 0x0b, 0x09, 0x0f, + 0xbc, 0x79, 0xc6, 0x02, 0xd1, 0x01, 0x35, 0x50, 0xd0, 0x5c, 0x62, 0x0f, + 0xbc, 0x29, 0xcb, 0x85, 0x72, 0x01, 0x35, 0x58, 0x00, 0xc2, 0x6d, 0xad, + 0xe0, 0x0b, 0x87, 0x01, 0x3b, 0x68, 0x00, 0xc2, 0x6d, 0xb9, 0xe0, 0x0b, + 0x87, 0x01, 0x3b, 0x60, 0x49, 0x35, 0x21, 0xc2, 0x6d, 0xc5, 0xd3, 0x3c, + 0xb5, 0x0f, 0xbd, 0x81, 0x4c, 0x0e, 0x55, 0x42, 0x6d, 0xd1, 0xd1, 0x52, + 0x11, 0x01, 0x35, 0x61, 0xc4, 0x01, 0xe3, 0x01, 0x2c, 0x91, 0xc6, 0x13, + 0x52, 0x0f, 0xbd, 0x51, 0x43, 0x4d, 0x57, 0x42, 0x6d, 0xdd, 0xcf, 0x15, + 0x36, 0x0f, 0xbd, 0xe1, 0xd2, 0x22, 0x49, 0x0f, 0xbe, 0x70, 0x9b, 0x0b, + 0x73, 0xfb, 0x02, 0x6d, 0xe9, 0x83, 0x0b, 0x73, 0x6b, 0x02, 0x6d, 0xed, + 0x91, 0x0b, 0x73, 0xeb, 0x02, 0x6d, 0xf7, 0x94, 0x0b, 0x73, 0xe1, 0x90, + 0x0b, 0x73, 0xdb, 0x02, 0x6d, 0xfb, 0x86, 0x0b, 0x73, 0xc9, 0x9a, 0x0b, + 0x73, 0xc1, 0x8a, 0x0b, 0x73, 0xb3, 0x02, 0x6e, 0x03, 0x93, 0x0b, 0x73, + 0xa9, 0x8e, 0x0b, 0x73, 0xa1, 0x97, 0x0b, 0x73, 0x91, 0x85, 0x0b, 0x73, + 0x89, 0x84, 0x0b, 0x73, 0x81, 0x87, 0x0b, 0x73, 0x79, 0x8c, 0x0b, 0x73, + 0x71, 0x8d, 0x0b, 0x73, 0x63, 0x02, 0x6e, 0x07, 0x8b, 0x0b, 0x73, 0x59, + 0x88, 0x0b, 0x73, 0x51, 0x89, 0x0b, 0x73, 0x49, 0x96, 0x0b, 0x73, 0x41, + 0x92, 0x0b, 0x73, 0x39, 0x9c, 0x0b, 0x73, 0x29, 0x99, 0x0b, 0x73, 0x19, + 0x98, 0x0b, 0x73, 0x11, 0x95, 0x0b, 0x73, 0x09, 0x8f, 0x0b, 0x73, 0x00, + 0x9b, 0x0b, 0x72, 0xfb, 0x02, 0x6e, 0x0b, 0x83, 0x0b, 0x72, 0x6b, 0x02, + 0x6e, 0x0f, 0x91, 0x0b, 0x72, 0xeb, 0x02, 0x6e, 0x19, 0x94, 0x0b, 0x72, + 0xe1, 0x90, 0x0b, 0x72, 0xdb, 0x02, 0x6e, 0x1d, 0x86, 0x0b, 0x72, 0xc9, + 0x9a, 0x0b, 0x72, 0xc1, 0x8a, 0x0b, 0x72, 0xb3, 0x02, 0x6e, 0x25, 0x93, + 0x0b, 0x72, 0xa9, 0x8e, 0x0b, 0x72, 0xa1, 0x97, 0x0b, 0x72, 0x91, 0x85, + 0x0b, 0x72, 0x89, 0x84, 0x0b, 0x72, 0x81, 0x87, 0x0b, 0x72, 0x79, 0x8c, + 0x0b, 0x72, 0x71, 0x8d, 0x0b, 0x72, 0x63, 0x02, 0x6e, 0x29, 0x8b, 0x0b, + 0x72, 0x59, 0x88, 0x0b, 0x72, 0x51, 0x89, 0x0b, 0x72, 0x49, 0x96, 0x0b, + 0x72, 0x41, 0x92, 0x0b, 0x72, 0x39, 0x9c, 0x0b, 0x72, 0x29, 0x99, 0x0b, + 0x72, 0x19, 0x98, 0x0b, 0x72, 0x11, 0x95, 0x0b, 0x72, 0x09, 0x8f, 0x0b, + 0x72, 0x00, 0xc4, 0x02, 0xde, 0x0b, 0x74, 0x1b, 0x02, 0x6e, 0x2d, 0xc2, + 0x02, 0xa0, 0x0b, 0x74, 0x12, 0x02, 0x6e, 0x33, 0xcf, 0x6b, 0x25, 0x0b, + 0x74, 0xa0, 0xc4, 0x18, 0x10, 0x0b, 0x74, 0x39, 0xc2, 0x22, 0xcc, 0x0b, + 0x74, 0x30, 0xc3, 0x0d, 0x14, 0x0b, 0x74, 0x29, 0xc3, 0x09, 0x9e, 0x0b, + 0x74, 0x20, 0xc7, 0x1f, 0x6e, 0x0b, 0x74, 0x91, 0xc5, 0x66, 0xb1, 0x0b, + 0x74, 0x58, 0xc8, 0x48, 0x23, 0x0b, 0x74, 0x89, 0xc6, 0x44, 0x9c, 0x0b, + 0x74, 0x80, 0xc6, 0x14, 0x07, 0x0b, 0x74, 0x79, 0xc7, 0x34, 0x37, 0x0b, + 0x74, 0x70, 0xc7, 0x52, 0xcc, 0x0b, 0x74, 0x69, 0xc5, 0x22, 0x43, 0x0b, + 0x74, 0x61, 0xc2, 0x00, 0xc4, 0x0b, 0x74, 0x50, 0xc6, 0x06, 0xaf, 0x01, + 0x1e, 0xb1, 0xc9, 0x67, 0xa7, 0x01, 0x1e, 0xa8, 0x24, 0xc2, 0x6e, 0x39, + 0x25, 0xc2, 0x6e, 0x75, 0x1f, 0xc2, 0x6e, 0xb1, 0x1e, 0xc2, 0x6e, 0xed, + 0x26, 0xc2, 0x6f, 0x29, 0x22, 0xc2, 0x6f, 0x65, 0x1d, 0xc2, 0x6f, 0xa1, + 0x21, 0xc2, 0x6f, 0xd7, 0x23, 0xc2, 0x70, 0x13, 0x20, 0x42, 0x70, 0x4f, + 0x26, 0xc2, 0x70, 0x8b, 0x20, 0xc2, 0x70, 0xbb, 0x1e, 0xc2, 0x70, 0xf7, + 0x23, 0xc2, 0x71, 0x33, 0x24, 0xc2, 0x71, 0x6f, 0x21, 0xc2, 0x71, 0xab, + 0x1d, 0xc2, 0x71, 0xe7, 0x22, 0xc2, 0x72, 0x23, 0x25, 0xc2, 0x72, 0x5f, + 0x1f, 0x42, 0x72, 0x9b, 0xc2, 0x02, 0xa0, 0x0f, 0x46, 0x41, 0xc4, 0x02, + 0xde, 0x0f, 0x46, 0x48, 0xc3, 0x09, 0x9e, 0x0f, 0x46, 0x51, 0xc3, 0x0d, + 0x14, 0x0f, 0x46, 0x58, 0xc2, 0x22, 0xcc, 0x0f, 0x46, 0x61, 0xc4, 0x18, + 0x10, 0x0f, 0x46, 0x68, 0x07, 0xc2, 0x72, 0xd7, 0xc8, 0x4b, 0x95, 0x0f, + 0x46, 0x98, 0x95, 0x0f, 0x46, 0x91, 0xca, 0xa2, 0x92, 0x0f, 0x46, 0xa8, + 0x16, 0xc2, 0x72, 0xe1, 0xcd, 0x76, 0xf8, 0x08, 0x4f, 0xf1, 0x07, 0xc2, + 0x72, 0xf3, 0x15, 0xc2, 0x72, 0xff, 0x08, 0xc2, 0x73, 0x0b, 0x44, 0x05, + 0x14, 0x42, 0x73, 0x17, 0xc4, 0x26, 0x78, 0x08, 0x4e, 0x43, 0x02, 0x73, + 0x23, 0xc5, 0x06, 0xdb, 0x08, 0x4e, 0x3b, 0x02, 0x73, 0x2d, 0x15, 0xc2, + 0x73, 0x37, 0x08, 0xc2, 0x73, 0x49, 0x16, 0xc2, 0x73, 0x51, 0xc3, 0x05, + 0x14, 0x08, 0x4e, 0x02, 0x02, 0x73, 0x62, 0x48, 0x3f, 0x14, 0xc2, 0x73, + 0x66, 0x46, 0x02, 0x0f, 0x42, 0x73, 0x72, 0xc2, 0xe5, 0xfd, 0x08, 0x4c, + 0xf8, 0xc2, 0x0e, 0x9a, 0x08, 0x4c, 0xe9, 0x16, 0xc2, 0x73, 0xd1, 0xc2, + 0x0f, 0x9a, 0x08, 0x4c, 0xb9, 0x0d, 0xc2, 0x73, 0xe3, 0x15, 0xc2, 0x73, + 0xed, 0xc3, 0xe6, 0x71, 0x08, 0x4c, 0x91, 0x83, 0x08, 0x4c, 0x01, 0x87, + 0x08, 0x4c, 0x09, 0x8b, 0x08, 0x4c, 0x11, 0x91, 0x08, 0x4c, 0x19, 0xc2, + 0x19, 0x2c, 0x08, 0x4c, 0x21, 0xc2, 0x01, 0x4a, 0x08, 0x4c, 0x29, 0xc2, + 0x01, 0x5d, 0x08, 0x4c, 0x33, 0x02, 0x73, 0xf8, 0xc2, 0x00, 0xb0, 0x08, + 0x4c, 0x41, 0xc2, 0x01, 0xc3, 0x08, 0x4c, 0x49, 0x10, 0xc2, 0x73, 0xfe, + 0xc2, 0x00, 0x39, 0x08, 0x4c, 0x73, 0x02, 0x74, 0x0c, 0xc2, 0x00, 0xdb, + 0x08, 0x4c, 0x80, 0x47, 0x22, 0x04, 0xc2, 0x74, 0x12, 0xcc, 0x8b, 0x4d, + 0x01, 0x4c, 0xd8, 0xc3, 0x7f, 0x18, 0x05, 0x5f, 0x29, 0x03, 0xc2, 0x74, + 0x18, 0x97, 0x05, 0x57, 0x70, 0xc3, 0x7f, 0x18, 0x05, 0x5f, 0x21, 0x8b, + 0x05, 0x57, 0x58, 0x97, 0x05, 0x57, 0x61, 0xc3, 0x7f, 0x18, 0x05, 0x5f, + 0x40, 0xc7, 0xc9, 0xe3, 0x05, 0x5f, 0x10, 0xc3, 0x71, 0x83, 0x05, 0x5e, + 0x4b, 0x02, 0x74, 0x20, 0x83, 0x05, 0x5e, 0x2b, 0x02, 0x74, 0x26, 0xc2, + 0x00, 0xc1, 0x05, 0x57, 0x41, 0xc2, 0x19, 0x2c, 0x05, 0x57, 0x18, 0xc2, + 0x00, 0x71, 0x05, 0x5e, 0x3b, 0x02, 0x74, 0x2c, 0x16, 0xc2, 0x74, 0x32, + 0xc3, 0x18, 0xb0, 0x05, 0x5e, 0x50, 0x83, 0x05, 0x5e, 0x23, 0x02, 0x74, + 0x3c, 0xc3, 0x08, 0x09, 0x05, 0x5e, 0x80, 0xc2, 0x01, 0x25, 0x05, 0x5e, + 0x03, 0x02, 0x74, 0x42, 0xc3, 0x18, 0xb0, 0x05, 0x5e, 0x40, 0xc3, 0x08, + 0x09, 0x05, 0x5e, 0xd1, 0x83, 0x05, 0x5e, 0xa8, 0xc3, 0x18, 0xb0, 0x05, + 0x5e, 0xc9, 0x06, 0xc2, 0x74, 0x48, 0xc2, 0x00, 0x71, 0x05, 0x5e, 0xb8, + 0xc3, 0x18, 0xb0, 0x05, 0x5e, 0xc1, 0xc2, 0x01, 0x25, 0x05, 0x5e, 0x90, + 0xc2, 0x0d, 0xf6, 0x05, 0x57, 0x51, 0xc2, 0x00, 0xd0, 0x05, 0x57, 0x49, + 0xc2, 0x00, 0xc2, 0x05, 0x5e, 0x08, 0x83, 0x05, 0x57, 0x11, 0xc2, 0x00, + 0x71, 0x05, 0x5e, 0x30, 0xc7, 0xc9, 0xe3, 0x05, 0x5e, 0xe8, 0xc7, 0xc9, + 0xe3, 0x05, 0x5e, 0xe0, 0xc3, 0x08, 0x09, 0x05, 0x5e, 0x99, 0xc2, 0x00, + 0x71, 0x05, 0x5e, 0xb0, 0xc9, 0xb1, 0xc1, 0x0f, 0xb5, 0xa9, 0xc7, 0x61, + 0x82, 0x0f, 0xb4, 0xf1, 0xc8, 0xb7, 0xaa, 0x0f, 0xb5, 0x00, 0xc2, 0x00, + 0x74, 0x01, 0x34, 0x59, 0xc3, 0x01, 0x95, 0x01, 0x34, 0x50, 0xe0, 0x01, + 0x27, 0x08, 0xb3, 0x60, 0x46, 0x00, 0x8b, 0x42, 0x74, 0x52, 0xcf, 0x01, + 0x38, 0x08, 0xb3, 0x31, 0xc8, 0x00, 0xbf, 0x08, 0xb3, 0x28, 0xcf, 0x01, + 0x38, 0x08, 0xb3, 0x21, 0xc8, 0x00, 0xbf, 0x08, 0xb3, 0x00, 0xc4, 0x26, + 0x78, 0x00, 0xc0, 0xc9, 0xc5, 0x06, 0xdb, 0x00, 0xc0, 0xc1, 0x15, 0xc2, + 0x74, 0x5e, 0x08, 0xc2, 0x74, 0x6a, 0x16, 0xc2, 0x74, 0x76, 0xc3, 0x05, + 0x14, 0x00, 0xc0, 0x89, 0xc4, 0x15, 0xe7, 0x00, 0xc0, 0x80, 0x45, 0xc2, + 0x59, 0x42, 0x74, 0x82, 0x48, 0xb1, 0x71, 0xc2, 0x74, 0xa4, 0xc2, 0x00, + 0x75, 0x00, 0xc1, 0x48, 0x44, 0x62, 0x5b, 0xc2, 0x74, 0xf0, 0xc2, 0x0d, + 0xf6, 0x00, 0xc1, 0xe1, 0x83, 0x00, 0xc1, 0x90, 0x83, 0x00, 0xc1, 0xa3, + 0x02, 0x75, 0x61, 0x8b, 0x00, 0xc2, 0x10, 0x44, 0x14, 0x85, 0xc2, 0x75, + 0x67, 0xc2, 0x00, 0xd0, 0x00, 0xc1, 0x89, 0x83, 0x00, 0xc1, 0x80, 0xc2, + 0x00, 0x0a, 0x00, 0xc2, 0x09, 0xc2, 0x00, 0x39, 0x00, 0xc1, 0xf9, 0x83, + 0x00, 0xc1, 0xe8, 0xc2, 0x00, 0xd0, 0x00, 0xc2, 0x01, 0x83, 0x00, 0xc1, + 0x78, 0xc2, 0x00, 0xd0, 0x00, 0xc1, 0xd9, 0x83, 0x00, 0xc1, 0xd0, 0x87, + 0x00, 0xc1, 0x38, 0x87, 0x00, 0xc1, 0x30, 0x87, 0x00, 0xc1, 0x28, 0xc4, + 0x09, 0x9d, 0x00, 0xc0, 0x79, 0x16, 0xc2, 0x75, 0xc9, 0xc3, 0x05, 0x14, + 0x00, 0xc0, 0x58, 0x45, 0x09, 0x98, 0xc2, 0x75, 0xd5, 0xcb, 0x97, 0xf5, + 0x08, 0xb2, 0x11, 0xc4, 0x19, 0x53, 0x08, 0xb2, 0x08, 0xc4, 0xe3, 0x83, + 0x08, 0xb2, 0x21, 0x03, 0xc2, 0x75, 0xf9, 0x42, 0x07, 0xb2, 0x42, 0x76, + 0x05, 0x03, 0xc2, 0x76, 0x11, 0x91, 0x08, 0xb1, 0xd9, 0x87, 0x08, 0xb1, + 0xc9, 0x48, 0xb2, 0x2d, 0xc2, 0x76, 0x1d, 0x97, 0x08, 0xb1, 0x9b, 0x02, + 0x76, 0x2b, 0x8b, 0x08, 0xb1, 0x8a, 0x02, 0x76, 0x2f, 0x0e, 0xc2, 0x76, + 0x33, 0xc2, 0x00, 0xd0, 0x08, 0xb1, 0x71, 0x15, 0xc2, 0x76, 0x3d, 0x18, + 0xc2, 0x76, 0x4d, 0xc2, 0x00, 0x39, 0x08, 0xb1, 0x41, 0xc2, 0x19, 0x2c, + 0x08, 0xb1, 0x39, 0xc2, 0x01, 0xc3, 0x08, 0xb1, 0x31, 0x04, 0xc2, 0x76, + 0x57, 0x12, 0xc2, 0x76, 0x61, 0x10, 0xc2, 0x76, 0x6b, 0x06, 0xc2, 0x76, + 0x81, 0x16, 0xc2, 0x76, 0x8f, 0x0c, 0xc2, 0x76, 0x9d, 0x05, 0xc2, 0x76, + 0xa7, 0x09, 0xc2, 0x76, 0xb1, 0x0d, 0xc2, 0x76, 0xbb, 0x83, 0x08, 0xb0, + 0x03, 0x02, 0x76, 0xc5, 0x91, 0x08, 0xb0, 0x61, 0x87, 0x08, 0xb0, 0x51, + 0x97, 0x08, 0xb0, 0x23, 0x02, 0x76, 0xd1, 0x8b, 0x08, 0xb0, 0x12, 0x02, + 0x76, 0xd5, 0x15, 0xc2, 0x76, 0xd9, 0x05, 0xc2, 0x76, 0xef, 0x14, 0xc2, + 0x77, 0x19, 0x0e, 0xc2, 0x77, 0x2f, 0x09, 0xc2, 0x77, 0x41, 0x04, 0xc2, + 0x77, 0x56, 0x06, 0xc2, 0x77, 0x62, 0x03, 0xc2, 0x77, 0x6c, 0x12, 0xc2, + 0x77, 0x7e, 0x16, 0xc2, 0x77, 0x8a, 0x17, 0xc2, 0x77, 0x96, 0x18, 0xc2, + 0x77, 0xa6, 0x0f, 0xc2, 0x77, 0xb2, 0x07, 0xc2, 0x77, 0xbc, 0x0a, 0xc2, + 0x77, 0xc8, 0x1b, 0xc2, 0x77, 0xd4, 0xca, 0x9c, 0xf2, 0x00, 0x17, 0xf0, + 0x89, 0x0e, 0xa1, 0xd3, 0x02, 0x77, 0xe0, 0x88, 0x0e, 0xa1, 0xc9, 0x87, + 0x0e, 0xa1, 0xc3, 0x02, 0x77, 0xe6, 0x86, 0x0e, 0xa1, 0xbb, 0x02, 0x77, + 0xf2, 0x85, 0x0e, 0xa1, 0xb3, 0x02, 0x77, 0xf8, 0x84, 0x0e, 0xa1, 0xab, + 0x02, 0x77, 0xfe, 0x83, 0x0e, 0xa1, 0xa3, 0x02, 0x78, 0x04, 0x91, 0x0e, + 0xa2, 0x13, 0x02, 0x78, 0x0a, 0x92, 0x0e, 0xa2, 0x1b, 0x02, 0x78, 0x0e, + 0x97, 0x0e, 0xa2, 0x43, 0x02, 0x78, 0x1e, 0x96, 0x0e, 0xa2, 0x3b, 0x02, + 0x78, 0x24, 0x95, 0x0e, 0xa2, 0x33, 0x02, 0x78, 0x33, 0x94, 0x0e, 0xa2, + 0x2b, 0x02, 0x78, 0x39, 0x9a, 0x0e, 0xa2, 0x5b, 0x02, 0x78, 0x3f, 0x90, + 0x0e, 0xa2, 0x0b, 0x02, 0x78, 0x43, 0x8f, 0x0e, 0xa2, 0x03, 0x02, 0x78, + 0x47, 0x8e, 0x0e, 0xa1, 0xfb, 0x02, 0x78, 0x4b, 0x8d, 0x0e, 0xa1, 0xf3, + 0x02, 0x78, 0x51, 0x8b, 0x0e, 0xa1, 0xe3, 0x02, 0x78, 0x57, 0x9c, 0x0e, + 0xa2, 0x6b, 0x02, 0x78, 0x5d, 0x9b, 0x0e, 0xa2, 0x61, 0x99, 0x0e, 0xa2, + 0x51, 0x98, 0x0e, 0xa2, 0x49, 0x93, 0x0e, 0xa2, 0x21, 0x8c, 0x0e, 0xa1, + 0xe9, 0x8a, 0x0e, 0xa1, 0xd8, 0xc8, 0x9c, 0x0e, 0x0e, 0xb8, 0xd9, 0xc9, + 0xaa, 0x9e, 0x0e, 0xb8, 0xc9, 0xd3, 0x43, 0x00, 0x0e, 0xb8, 0xa8, 0x91, + 0x0e, 0xa2, 0xe3, 0x02, 0x78, 0x63, 0x92, 0x0e, 0xa2, 0xeb, 0x02, 0x78, + 0x67, 0x85, 0x0e, 0xa2, 0x83, 0x02, 0x78, 0x77, 0x97, 0x0e, 0xa3, 0x13, + 0x02, 0x78, 0x7d, 0x96, 0x0e, 0xa3, 0x0b, 0x02, 0x78, 0x83, 0x95, 0x0e, + 0xa3, 0x03, 0x02, 0x78, 0x8f, 0x88, 0x0e, 0xa2, 0x9b, 0x02, 0x78, 0x95, + 0x94, 0x0e, 0xa2, 0xfb, 0x02, 0x78, 0x9b, 0x9a, 0x0e, 0xa3, 0x2b, 0x02, + 0x78, 0xa1, 0x90, 0x0e, 0xa2, 0xdb, 0x02, 0x78, 0xa5, 0x8f, 0x0e, 0xa2, + 0xd3, 0x02, 0x78, 0xa9, 0x8e, 0x0e, 0xa2, 0xcb, 0x02, 0x78, 0xad, 0x8d, + 0x0e, 0xa2, 0xc3, 0x02, 0x78, 0xb3, 0x8b, 0x0e, 0xa2, 0xb3, 0x02, 0x78, + 0xb9, 0x87, 0x0e, 0xa2, 0x93, 0x02, 0x78, 0xbf, 0x9c, 0x0e, 0xa3, 0x3b, + 0x02, 0x78, 0xcb, 0x86, 0x0e, 0xa2, 0x8b, 0x02, 0x78, 0xd1, 0x89, 0x0e, + 0xa2, 0xa3, 0x02, 0x78, 0xdd, 0x84, 0x0e, 0xa2, 0x7b, 0x02, 0x78, 0xe3, + 0x83, 0x0e, 0xa2, 0x73, 0x02, 0x78, 0xe9, 0x9b, 0x0e, 0xa3, 0x31, 0x99, + 0x0e, 0xa3, 0x21, 0x98, 0x0e, 0xa3, 0x19, 0x93, 0x0e, 0xa2, 0xf1, 0x8c, + 0x0e, 0xa2, 0xb8, 0x45, 0x03, 0x14, 0xc2, 0x78, 0xef, 0x46, 0x07, 0x2f, + 0x42, 0x79, 0x93, 0xc4, 0x26, 0x78, 0x0e, 0xbe, 0xb9, 0xc5, 0x06, 0xdb, + 0x0e, 0xbe, 0xb1, 0x15, 0xc2, 0x79, 0x9f, 0x08, 0xc2, 0x79, 0xab, 0x16, + 0xc2, 0x79, 0xb7, 0xc3, 0x05, 0x14, 0x0e, 0xbe, 0x79, 0xc4, 0x15, 0xe7, + 0x0e, 0xbe, 0x70, 0x86, 0x0e, 0xa0, 0x1b, 0x02, 0x79, 0xc3, 0x91, 0x0e, + 0xa0, 0x73, 0x02, 0x79, 0xcf, 0x92, 0x0e, 0xa0, 0x7b, 0x02, 0x79, 0xd3, + 0x85, 0x0e, 0xa0, 0x13, 0x02, 0x79, 0xe3, 0x97, 0x0e, 0xa0, 0xa3, 0x02, + 0x79, 0xe9, 0x96, 0x0e, 0xa0, 0x9b, 0x02, 0x79, 0xef, 0x95, 0x0e, 0xa0, + 0x93, 0x02, 0x79, 0xfe, 0x94, 0x0e, 0xa0, 0x8b, 0x02, 0x7a, 0x04, 0x9a, + 0x0e, 0xa0, 0xbb, 0x02, 0x7a, 0x0a, 0x90, 0x0e, 0xa0, 0x6b, 0x02, 0x7a, + 0x0e, 0x8f, 0x0e, 0xa0, 0x63, 0x02, 0x7a, 0x12, 0x8e, 0x0e, 0xa0, 0x5b, + 0x02, 0x7a, 0x16, 0x8d, 0x0e, 0xa0, 0x53, 0x02, 0x7a, 0x1c, 0x8b, 0x0e, + 0xa0, 0x43, 0x02, 0x7a, 0x22, 0x87, 0x0e, 0xa0, 0x23, 0x02, 0x7a, 0x28, + 0x9c, 0x0e, 0xa0, 0xcb, 0x02, 0x7a, 0x34, 0x89, 0x0e, 0xa0, 0x33, 0x02, + 0x7a, 0x3a, 0x84, 0x0e, 0xa0, 0x0b, 0x02, 0x7a, 0x40, 0x83, 0x0e, 0xa0, + 0x03, 0x02, 0x7a, 0x46, 0x9b, 0x0e, 0xa0, 0xc1, 0x99, 0x0e, 0xa0, 0xb1, + 0x98, 0x0e, 0xa0, 0xa9, 0x93, 0x0e, 0xa0, 0x81, 0x8c, 0x0e, 0xa0, 0x49, + 0x8a, 0x0e, 0xa0, 0x39, 0x88, 0x0e, 0xa0, 0x28, 0x12, 0xc2, 0x7a, 0x4c, + 0xca, 0x9c, 0xac, 0x0e, 0xba, 0xa1, 0xcc, 0x8b, 0x65, 0x0e, 0xba, 0x91, + 0xcc, 0x89, 0xfd, 0x0e, 0xba, 0x89, 0xce, 0x10, 0x3e, 0x0e, 0xba, 0x81, + 0x46, 0x03, 0x13, 0xc2, 0x7a, 0x5e, 0xc5, 0xdb, 0xf0, 0x0e, 0xb9, 0xa9, + 0x48, 0x0b, 0x17, 0x42, 0x7b, 0x02, 0xc8, 0x9c, 0x0e, 0x0e, 0xb7, 0x09, + 0xc9, 0xaa, 0x9e, 0x0e, 0xb6, 0xf9, 0xd3, 0x43, 0x00, 0x0e, 0xb6, 0xd8, + 0x46, 0x03, 0x13, 0xc2, 0x7b, 0xa3, 0x48, 0x0b, 0x17, 0x42, 0x7c, 0x0b, + 0xc4, 0x26, 0x78, 0x0e, 0xbf, 0xf9, 0xc5, 0x06, 0xdb, 0x0e, 0xbf, 0xf1, + 0x15, 0xc2, 0x7c, 0x73, 0x08, 0xc2, 0x7c, 0x7f, 0x16, 0xc2, 0x7c, 0x8b, + 0xc3, 0x05, 0x14, 0x0e, 0xbf, 0xb9, 0xc4, 0x15, 0xe7, 0x0e, 0xbf, 0xb0, + 0x9c, 0x0e, 0xb5, 0x19, 0x9b, 0x0e, 0xb5, 0x11, 0x9a, 0x0e, 0xb5, 0x09, + 0x99, 0x0e, 0xb5, 0x01, 0x98, 0x0e, 0xb4, 0xf9, 0x97, 0x0e, 0xb4, 0xf1, + 0x96, 0x0e, 0xb4, 0xe9, 0x95, 0x0e, 0xb4, 0xe1, 0x94, 0x0e, 0xb4, 0xd9, + 0x93, 0x0e, 0xb4, 0xd1, 0x92, 0x0e, 0xb4, 0xc9, 0x91, 0x0e, 0xb4, 0xc1, + 0x90, 0x0e, 0xb4, 0xb9, 0x8f, 0x0e, 0xb4, 0xb1, 0x8e, 0x0e, 0xb4, 0xa9, + 0x8d, 0x0e, 0xb4, 0xa1, 0x8c, 0x0e, 0xb4, 0x99, 0x8b, 0x0e, 0xb4, 0x91, + 0x8a, 0x0e, 0xb4, 0x89, 0x89, 0x0e, 0xb4, 0x81, 0x88, 0x0e, 0xb4, 0x79, + 0x87, 0x0e, 0xb4, 0x71, 0x86, 0x0e, 0xb4, 0x69, 0x85, 0x0e, 0xb4, 0x61, + 0x84, 0x0e, 0xb4, 0x59, 0x83, 0x0e, 0xb4, 0x50, 0x9c, 0x0e, 0xb4, 0x49, + 0x9b, 0x0e, 0xb4, 0x41, 0x9a, 0x0e, 0xb4, 0x39, 0x99, 0x0e, 0xb4, 0x31, + 0x98, 0x0e, 0xb4, 0x29, 0x97, 0x0e, 0xb4, 0x21, 0x96, 0x0e, 0xb4, 0x19, + 0x95, 0x0e, 0xb4, 0x11, 0x94, 0x0e, 0xb4, 0x09, 0x93, 0x0e, 0xb4, 0x01, + 0x92, 0x0e, 0xb3, 0xf9, 0x91, 0x0e, 0xb3, 0xf1, 0x90, 0x0e, 0xb3, 0xe9, + 0x8f, 0x0e, 0xb3, 0xe1, 0x8e, 0x0e, 0xb3, 0xd9, 0x8d, 0x0e, 0xb3, 0xd1, + 0x8c, 0x0e, 0xb3, 0xc9, 0x8b, 0x0e, 0xb3, 0xc1, 0x8a, 0x0e, 0xb3, 0xb9, + 0x89, 0x0e, 0xb3, 0xb1, 0x88, 0x0e, 0xb3, 0xa9, 0x87, 0x0e, 0xb3, 0xa1, + 0x86, 0x0e, 0xb3, 0x99, 0x85, 0x0e, 0xb3, 0x91, 0x84, 0x0e, 0xb3, 0x89, + 0x83, 0x0e, 0xb3, 0x80, 0x45, 0x58, 0xc2, 0xc2, 0x7c, 0x97, 0x46, 0x09, + 0x97, 0xc2, 0x7c, 0xd1, 0x47, 0xc7, 0x4a, 0xc2, 0x7c, 0xf5, 0x46, 0x03, + 0x13, 0xc2, 0x7d, 0x01, 0x48, 0x0b, 0x17, 0x42, 0x7d, 0x69, 0x46, 0x03, + 0x13, 0xc2, 0x7d, 0xd1, 0x48, 0x0b, 0x17, 0x42, 0x7e, 0x2d, 0xc4, 0x26, + 0x78, 0x0e, 0xbf, 0x09, 0xc5, 0x06, 0xdb, 0x0e, 0xbf, 0x01, 0x15, 0xc2, + 0x7e, 0x75, 0x08, 0xc2, 0x7e, 0x81, 0x16, 0xc2, 0x7e, 0x8d, 0xc3, 0x05, + 0x14, 0x0e, 0xbe, 0xc9, 0xc4, 0x15, 0xe7, 0x0e, 0xbe, 0xc0, 0x9c, 0x0e, + 0xab, 0x59, 0x9b, 0x0e, 0xab, 0x51, 0x9a, 0x0e, 0xab, 0x49, 0x99, 0x0e, + 0xab, 0x41, 0x98, 0x0e, 0xab, 0x39, 0x97, 0x0e, 0xab, 0x31, 0x96, 0x0e, + 0xab, 0x29, 0x95, 0x0e, 0xab, 0x21, 0x94, 0x0e, 0xab, 0x19, 0x93, 0x0e, + 0xab, 0x11, 0x92, 0x0e, 0xab, 0x09, 0x91, 0x0e, 0xab, 0x01, 0x90, 0x0e, + 0xaa, 0xf9, 0x8f, 0x0e, 0xaa, 0xf1, 0x8e, 0x0e, 0xaa, 0xe9, 0x8d, 0x0e, + 0xaa, 0xe1, 0x8c, 0x0e, 0xaa, 0xd9, 0x8b, 0x0e, 0xaa, 0xd1, 0x8a, 0x0e, + 0xaa, 0xc9, 0x89, 0x0e, 0xaa, 0xc1, 0x88, 0x0e, 0xaa, 0xb9, 0x87, 0x0e, + 0xaa, 0xb1, 0x86, 0x0e, 0xaa, 0xa9, 0x85, 0x0e, 0xaa, 0xa1, 0x84, 0x0e, + 0xaa, 0x99, 0x83, 0x0e, 0xaa, 0x90, 0x9b, 0x0e, 0xaa, 0x81, 0x9a, 0x0e, + 0xaa, 0x79, 0x99, 0x0e, 0xaa, 0x71, 0x98, 0x0e, 0xaa, 0x69, 0x97, 0x0e, + 0xaa, 0x61, 0x96, 0x0e, 0xaa, 0x59, 0x95, 0x0e, 0xaa, 0x51, 0x91, 0x0e, + 0xaa, 0x31, 0x8f, 0x0e, 0xaa, 0x21, 0x8e, 0x0e, 0xaa, 0x19, 0x8d, 0x0e, + 0xaa, 0x11, 0x8c, 0x0e, 0xaa, 0x09, 0x8b, 0x0e, 0xaa, 0x01, 0x89, 0x0e, + 0xa9, 0xf1, 0x88, 0x0e, 0xa9, 0xe9, 0x87, 0x0e, 0xa9, 0xe1, 0x86, 0x0e, + 0xa9, 0xd9, 0x84, 0x0e, 0xa9, 0xc9, 0x83, 0x0e, 0xa9, 0xc0, 0x46, 0x03, + 0x13, 0xc2, 0x7e, 0x99, 0x48, 0x0b, 0x17, 0x42, 0x7f, 0x01, 0xd5, 0x35, + 0x36, 0x01, 0x3f, 0x79, 0x46, 0x01, 0xfc, 0xc2, 0x7f, 0x55, 0xd4, 0x38, + 0xf4, 0x01, 0x3f, 0x59, 0xcd, 0x0b, 0x91, 0x01, 0x3f, 0x48, 0xd6, 0x08, + 0x88, 0x01, 0x3f, 0x61, 0xce, 0x25, 0xad, 0x01, 0x3f, 0x30, 0xc4, 0x18, + 0x10, 0x08, 0xea, 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0xea, 0xb0, 0xc3, 0x0d, + 0x14, 0x08, 0xea, 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0xea, 0xa0, 0xc4, 0x02, + 0xde, 0x08, 0xea, 0x99, 0xc2, 0x02, 0xa0, 0x08, 0xea, 0x90, 0x03, 0xc2, + 0x7f, 0x61, 0x91, 0x08, 0xe9, 0xe9, 0x87, 0x08, 0xe9, 0xd1, 0xc9, 0xb2, + 0x2d, 0x08, 0xe9, 0xb1, 0x97, 0x08, 0xe9, 0xa3, 0x02, 0x7f, 0x6d, 0x8b, + 0x08, 0xe9, 0x92, 0x02, 0x7f, 0x71, 0xc2, 0x00, 0x39, 0x08, 0xe9, 0x81, + 0xc2, 0x00, 0xd0, 0x08, 0xe8, 0xe1, 0x83, 0x08, 0xe8, 0xd9, 0x16, 0x42, + 0x7f, 0x75, 0xc3, 0x2d, 0xfd, 0x08, 0xe9, 0x79, 0xc2, 0x00, 0xd0, 0x08, + 0xe8, 0xa1, 0x83, 0x08, 0xe8, 0x98, 0xc3, 0x1d, 0x35, 0x08, 0xe9, 0x71, + 0xc2, 0x00, 0xd0, 0x08, 0xe8, 0x69, 0x83, 0x08, 0xe8, 0x60, 0xc2, 0x00, + 0xdb, 0x08, 0xe9, 0x69, 0x83, 0x08, 0xe9, 0x38, 0x83, 0x08, 0xe9, 0x59, + 0xc2, 0x0d, 0xf6, 0x08, 0xe9, 0x51, 0xc2, 0x00, 0xd0, 0x08, 0xe9, 0x48, + 0xc2, 0x00, 0xd0, 0x08, 0xe9, 0x19, 0x83, 0x08, 0xe9, 0x10, 0xc2, 0x00, + 0xd0, 0x08, 0xe9, 0x09, 0x83, 0x08, 0xe9, 0x00, 0x83, 0x08, 0xe8, 0xf9, + 0xc2, 0x00, 0xc1, 0x08, 0xe8, 0xd1, 0xc2, 0x19, 0x2c, 0x08, 0xe8, 0xa9, + 0xc2, 0x01, 0x30, 0x08, 0xe8, 0x80, 0xc2, 0x00, 0xd0, 0x08, 0xe8, 0xf1, + 0x83, 0x08, 0xe8, 0xe9, 0x06, 0x42, 0x7f, 0x7f, 0xc2, 0x00, 0xd0, 0x08, + 0xe8, 0x91, 0x83, 0x08, 0xe8, 0x88, 0xc2, 0x00, 0xd0, 0x08, 0xe8, 0x79, + 0x83, 0x08, 0xe8, 0x70, 0x97, 0x08, 0xe8, 0x59, 0x8b, 0x08, 0xe8, 0x41, + 0x83, 0x08, 0xe8, 0x08, 0x97, 0x08, 0xe8, 0x28, 0x8b, 0x08, 0xe8, 0x18, + 0xcb, 0x1e, 0x89, 0x08, 0xe5, 0xb1, 0xc8, 0x14, 0x38, 0x08, 0xe5, 0xa8, + 0x83, 0x08, 0xe5, 0x79, 0xc2, 0x00, 0xd0, 0x08, 0xe5, 0x71, 0x15, 0xc2, + 0x7f, 0x89, 0xc2, 0x00, 0xdb, 0x08, 0xe5, 0x59, 0xc2, 0x00, 0x39, 0x08, + 0xe5, 0x51, 0xc2, 0x19, 0x2c, 0x08, 0xe5, 0x49, 0x1c, 0xc2, 0x7f, 0x93, + 0xc2, 0x01, 0x4a, 0x08, 0xe5, 0x29, 0x06, 0xc2, 0x7f, 0x9d, 0x16, 0xc2, + 0x7f, 0xa7, 0xc2, 0x01, 0xc3, 0x08, 0xe5, 0x09, 0xc2, 0x01, 0x5d, 0x08, + 0xe5, 0x01, 0x12, 0xc2, 0x7f, 0xb5, 0x10, 0xc2, 0x7f, 0xbf, 0xc2, 0x25, + 0x3b, 0x08, 0xe4, 0xc1, 0x05, 0xc2, 0x7f, 0xcf, 0xc2, 0x01, 0x30, 0x08, + 0xe4, 0xa1, 0x0d, 0x42, 0x7f, 0xd9, 0x83, 0x08, 0xe4, 0x69, 0xc2, 0x00, + 0xd0, 0x08, 0xe4, 0x60, 0x83, 0x08, 0xe4, 0x39, 0xc2, 0x00, 0xd0, 0x08, + 0xe4, 0x30, 0xc2, 0x02, 0x1c, 0x08, 0xe4, 0x21, 0x83, 0x08, 0xe3, 0xe0, + 0x15, 0xc2, 0x7f, 0xe3, 0xc2, 0x00, 0xd0, 0x08, 0xe3, 0xd9, 0x83, 0x08, + 0xe3, 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0xe3, 0xf9, 0x83, 0x08, 0xe3, 0xf0, + 0x83, 0x08, 0xe3, 0xe9, 0xc2, 0x19, 0x2c, 0x08, 0xe3, 0xc9, 0xc2, 0x01, + 0x30, 0x08, 0xe3, 0xa8, 0xc2, 0x00, 0xd0, 0x08, 0xe3, 0xb9, 0x83, 0x08, + 0xe3, 0xb0, 0xc2, 0x00, 0xd0, 0x08, 0xe3, 0x99, 0x83, 0x08, 0xe3, 0x90, + 0xd7, 0x29, 0x29, 0x00, 0x68, 0x01, 0xca, 0x1e, 0x8a, 0x00, 0x68, 0x09, + 0xce, 0x71, 0x5a, 0x00, 0x69, 0xe0, 0xc7, 0x14, 0x39, 0x00, 0x68, 0x11, + 0xc7, 0x7a, 0x7f, 0x00, 0x69, 0xe8, 0x0b, 0xc2, 0x7f, 0xed, 0xd2, 0x48, + 0xb3, 0x00, 0x69, 0xd8, 0xcd, 0x80, 0x36, 0x00, 0x68, 0x21, 0x47, 0xb2, + 0x2e, 0xc2, 0x7f, 0xf9, 0x83, 0x00, 0x69, 0xa8, 0x83, 0x00, 0x68, 0x31, + 0x8b, 0x00, 0x68, 0x81, 0x97, 0x00, 0x68, 0xa1, 0xc9, 0xa9, 0x90, 0x00, + 0x6a, 0xf8, 0x8b, 0x00, 0x68, 0x40, 0x97, 0x00, 0x68, 0x50, 0x87, 0x00, + 0x68, 0x78, 0x91, 0x00, 0x68, 0x98, 0x83, 0x00, 0x68, 0xa9, 0xc2, 0x00, + 0xd0, 0x00, 0x68, 0xb0, 0x83, 0x00, 0x68, 0xb9, 0xc2, 0x00, 0xd0, 0x00, + 0x68, 0xc0, 0xc2, 0x01, 0x30, 0x00, 0x68, 0xc9, 0xc2, 0x19, 0x2c, 0x00, + 0x68, 0xf1, 0x10, 0xc2, 0x80, 0x07, 0x83, 0x00, 0x69, 0x40, 0x83, 0x00, + 0x68, 0xd1, 0x0a, 0x42, 0x80, 0x11, 0x83, 0x00, 0x68, 0xe1, 0xc2, 0x00, + 0xd0, 0x00, 0x68, 0xe8, 0x16, 0xc2, 0x80, 0x1b, 0x83, 0x00, 0x69, 0x21, + 0xc2, 0x00, 0xd0, 0x00, 0x69, 0x28, 0x06, 0xc2, 0x80, 0x2b, 0x83, 0x00, + 0x69, 0x31, 0xc2, 0x00, 0xd0, 0x00, 0x69, 0x39, 0xc7, 0xc7, 0x58, 0x00, + 0x6a, 0x70, 0x83, 0x00, 0x69, 0x51, 0xc2, 0x00, 0xd0, 0x00, 0x69, 0x58, + 0x83, 0x00, 0x69, 0x61, 0xc2, 0x00, 0xd0, 0x00, 0x69, 0x68, 0x83, 0x00, + 0x69, 0x81, 0xc2, 0x00, 0x39, 0x00, 0x69, 0x88, 0x83, 0x00, 0x69, 0x91, + 0x0e, 0x42, 0x80, 0x35, 0xc2, 0x00, 0xd0, 0x00, 0x69, 0xb1, 0xc2, 0x0d, + 0xf6, 0x00, 0x69, 0xb9, 0x83, 0x00, 0x69, 0xc0, 0x83, 0x00, 0x69, 0xf1, + 0x8b, 0x00, 0x6a, 0x41, 0x97, 0x00, 0x6a, 0x60, 0x8b, 0x00, 0x6a, 0x00, + 0x97, 0x00, 0x6a, 0x10, 0x94, 0x00, 0x6a, 0x1b, 0x02, 0x80, 0x3f, 0x8e, + 0x00, 0x6b, 0x12, 0x02, 0x80, 0x43, 0x87, 0x00, 0x6a, 0x38, 0x91, 0x00, + 0x6a, 0x58, 0xd8, 0x22, 0xbb, 0x00, 0x6a, 0xc1, 0x08, 0xc2, 0x80, 0x47, + 0x16, 0xc2, 0x80, 0x53, 0xc7, 0x08, 0x79, 0x00, 0x6b, 0x99, 0xc4, 0x01, + 0xce, 0x00, 0x6b, 0xa1, 0xc9, 0x67, 0x38, 0x00, 0x6b, 0xb1, 0xc6, 0x06, + 0xdb, 0x00, 0x6b, 0xb8, 0xca, 0xa3, 0xfa, 0x00, 0x6a, 0xd1, 0xca, 0x1e, + 0x15, 0x00, 0x6a, 0xe9, 0xc8, 0x08, 0x79, 0x00, 0x6b, 0xa9, 0xca, 0xa7, + 0x88, 0x00, 0x6b, 0xc0, 0xc4, 0x15, 0xe7, 0x00, 0x6b, 0x31, 0xc3, 0x05, + 0x14, 0x00, 0x6b, 0x39, 0x16, 0xc2, 0x80, 0x5f, 0x08, 0xc2, 0x80, 0x6b, + 0x15, 0xc2, 0x80, 0x77, 0xc5, 0x06, 0xdb, 0x00, 0x6b, 0x71, 0xc4, 0x26, + 0x78, 0x00, 0x6b, 0x78, 0xc7, 0x0d, 0x04, 0x00, 0x6b, 0x89, 0xc8, 0x4b, + 0x94, 0x00, 0x6b, 0x90, 0x96, 0x08, 0x57, 0xa3, 0x02, 0x80, 0x83, 0xd3, + 0x44, 0x43, 0x08, 0x57, 0x90, 0xc8, 0x0d, 0x03, 0x08, 0x57, 0x78, 0xc5, + 0x28, 0xee, 0x08, 0x57, 0x71, 0xc2, 0x00, 0xc4, 0x08, 0x57, 0x68, 0xc2, + 0x39, 0x8b, 0x08, 0x57, 0x21, 0xc6, 0xd2, 0xc5, 0x08, 0x56, 0xa9, 0xc3, + 0x1e, 0x1b, 0x08, 0x56, 0x70, 0xc4, 0x3e, 0x5a, 0x08, 0x57, 0x19, 0xc3, + 0x11, 0xef, 0x08, 0x57, 0x11, 0x03, 0x42, 0x80, 0x89, 0xc4, 0xe0, 0x03, + 0x08, 0x57, 0x01, 0xc3, 0x2d, 0x8a, 0x08, 0x56, 0xf0, 0xc3, 0x2d, 0x8a, + 0x08, 0x56, 0xf9, 0xc3, 0x00, 0xb6, 0x08, 0x56, 0x88, 0xc4, 0x40, 0x95, + 0x08, 0x56, 0xd1, 0xc3, 0x16, 0x5a, 0x08, 0x56, 0xc9, 0xc4, 0x36, 0xb5, + 0x08, 0x56, 0x00, 0xc6, 0xd2, 0xc5, 0x08, 0x56, 0xa1, 0xc5, 0x40, 0x9b, + 0x08, 0x56, 0x28, 0xc4, 0xdc, 0xe6, 0x08, 0x56, 0x91, 0xc3, 0x00, 0xb6, + 0x08, 0x56, 0x80, 0xc2, 0x00, 0x8e, 0x08, 0x56, 0x68, 0xc5, 0xd6, 0x78, + 0x08, 0x56, 0x61, 0xc4, 0x40, 0x95, 0x08, 0x56, 0x58, 0xc5, 0xd6, 0x78, + 0x08, 0x56, 0x51, 0xc4, 0x40, 0x95, 0x08, 0x56, 0x48, 0xc5, 0xd5, 0xdd, + 0x08, 0x56, 0x21, 0xc4, 0x9c, 0xa3, 0x08, 0x56, 0x18, 0xc4, 0x9b, 0x90, + 0x08, 0x56, 0x11, 0xc3, 0x1e, 0x1b, 0x08, 0x56, 0x08, 0xc2, 0x00, 0x74, + 0x00, 0x42, 0xc1, 0x96, 0x00, 0x42, 0xab, 0x02, 0x80, 0x95, 0x95, 0x00, + 0x42, 0x73, 0x02, 0x80, 0x99, 0x94, 0x00, 0x42, 0x99, 0x93, 0x00, 0x42, + 0x91, 0x92, 0x00, 0x42, 0x81, 0x90, 0x00, 0x42, 0x69, 0x8f, 0x00, 0x42, + 0x61, 0x8e, 0x00, 0x42, 0x59, 0x8d, 0x00, 0x42, 0x53, 0x02, 0x80, 0xa1, + 0x9c, 0x00, 0x42, 0x31, 0x8a, 0x00, 0x42, 0x21, 0x86, 0x00, 0x42, 0x19, + 0x89, 0x00, 0x42, 0x11, 0x84, 0x00, 0x42, 0x08, 0x90, 0x00, 0x42, 0x79, + 0x96, 0x00, 0x42, 0x38, 0x14, 0xc2, 0x80, 0xa7, 0xc2, 0x00, 0xd0, 0x08, + 0x8b, 0x89, 0xc2, 0x0d, 0xf6, 0x08, 0x8b, 0x81, 0xc2, 0x02, 0x41, 0x08, + 0x8b, 0x79, 0xc2, 0x00, 0xdb, 0x08, 0x8b, 0x71, 0xc2, 0x01, 0xc3, 0x08, + 0x8b, 0x61, 0x04, 0xc2, 0x80, 0xb1, 0x12, 0xc2, 0x80, 0xbb, 0x10, 0xc2, + 0x80, 0xc5, 0x06, 0xc2, 0x80, 0xd5, 0x16, 0xc2, 0x80, 0xe3, 0x0c, 0xc2, + 0x80, 0xf1, 0x05, 0xc2, 0x80, 0xfb, 0x09, 0xc2, 0x81, 0x05, 0x0d, 0xc2, + 0x81, 0x0f, 0x91, 0x08, 0x8a, 0xa1, 0x87, 0x08, 0x8a, 0x99, 0x97, 0x08, + 0x8a, 0x91, 0x8b, 0x08, 0x8a, 0x89, 0x83, 0x08, 0x8a, 0x80, 0x05, 0xc2, + 0x81, 0x19, 0xc7, 0xc0, 0xcf, 0x0f, 0x80, 0xb8, 0x05, 0xc2, 0x81, 0x25, + 0xc7, 0xc0, 0xcf, 0x0f, 0x80, 0xa8, 0x05, 0xc2, 0x81, 0x31, 0xc7, 0xc0, + 0xcf, 0x0f, 0x80, 0xb0, 0x05, 0xc2, 0x81, 0x3d, 0xc7, 0xc0, 0xcf, 0x0f, + 0x80, 0xc0, 0x05, 0xc2, 0x81, 0x49, 0xc7, 0xc0, 0xcf, 0x0f, 0x80, 0x80, + 0x05, 0xc2, 0x81, 0x55, 0xc7, 0xc0, 0xcf, 0x0f, 0x80, 0x88, 0x05, 0xc2, + 0x81, 0x61, 0xc7, 0xc0, 0xcf, 0x0f, 0x80, 0x90, 0x05, 0xc2, 0x81, 0x6d, + 0xc7, 0xc0, 0xcf, 0x0f, 0x80, 0x98, 0x05, 0xc2, 0x81, 0x79, 0xc7, 0xc0, + 0xcf, 0x0f, 0x80, 0xa0, 0x46, 0x10, 0x79, 0xc2, 0x81, 0x85, 0xc4, 0xe3, + 0x7f, 0x0f, 0x9d, 0xe0, 0xcb, 0x8d, 0x0b, 0x0f, 0x9c, 0xc0, 0x9a, 0x01, + 0x38, 0xa9, 0xc4, 0x00, 0xba, 0x00, 0x06, 0xba, 0x02, 0x81, 0xeb, 0xc5, + 0x13, 0x84, 0x01, 0x14, 0x71, 0xce, 0x1f, 0x18, 0x01, 0x14, 0x68, 0xc2, + 0x00, 0xd0, 0x08, 0x95, 0x41, 0xc2, 0x00, 0x39, 0x08, 0x95, 0x39, 0x83, + 0x08, 0x95, 0x10, 0xc2, 0x00, 0xd0, 0x08, 0x94, 0xf9, 0x83, 0x08, 0x94, + 0xe8, 0xc2, 0x00, 0xd0, 0x08, 0x94, 0xe1, 0x83, 0x08, 0x94, 0xd8, 0x83, + 0x08, 0x94, 0xd1, 0xc2, 0x00, 0xc1, 0x08, 0x94, 0xa9, 0xc2, 0x19, 0x2c, + 0x08, 0x94, 0x78, 0xc2, 0x00, 0xd0, 0x08, 0x94, 0xc9, 0x83, 0x08, 0x94, + 0xc1, 0x06, 0x42, 0x81, 0xef, 0xc2, 0x00, 0xd0, 0x08, 0x94, 0xb9, 0x83, + 0x08, 0x94, 0xb1, 0x16, 0x42, 0x81, 0xff, 0x83, 0x08, 0x94, 0x61, 0xc2, + 0x25, 0x3b, 0x08, 0x94, 0x68, 0x83, 0x08, 0x94, 0x51, 0xc2, 0x00, 0xd0, + 0x08, 0x94, 0x58, 0xc2, 0x00, 0xd0, 0x08, 0x94, 0x41, 0x83, 0x08, 0x94, + 0x30, 0xc2, 0x00, 0xd0, 0x08, 0x94, 0x29, 0x83, 0x08, 0x94, 0x20, 0xc3, + 0x4d, 0x47, 0x05, 0x4f, 0x29, 0x45, 0x28, 0xb1, 0xc2, 0x82, 0x09, 0x48, + 0xba, 0xb2, 0x42, 0x82, 0x19, 0xc3, 0x02, 0x9f, 0x05, 0x53, 0xc9, 0xc3, + 0x05, 0x14, 0x05, 0x53, 0xc1, 0xcb, 0x0f, 0x09, 0x05, 0x53, 0xb8, 0x44, + 0x3d, 0xbb, 0x42, 0x82, 0x25, 0x48, 0x68, 0x93, 0x42, 0x82, 0x69, 0x83, + 0x00, 0x80, 0x59, 0xc2, 0x00, 0xd0, 0x00, 0x80, 0x60, 0x83, 0x00, 0x82, + 0x83, 0x02, 0x82, 0x89, 0x4b, 0x91, 0x8e, 0x42, 0x82, 0x8f, 0xc2, 0x19, + 0x2c, 0x00, 0x80, 0x51, 0x83, 0x00, 0x80, 0x78, 0x83, 0x00, 0x80, 0x69, + 0xc2, 0x00, 0xd0, 0x00, 0x80, 0x70, 0x87, 0x00, 0x81, 0x41, 0xc3, 0x20, + 0xf1, 0x00, 0x82, 0xd1, 0xc3, 0xe5, 0xf0, 0x00, 0x82, 0xd9, 0x42, 0x3f, + 0x98, 0x42, 0x82, 0x9b, 0xc3, 0x00, 0xcf, 0x00, 0x83, 0x29, 0xc3, 0x09, + 0x0e, 0x00, 0x83, 0x30, 0xc3, 0x3a, 0x09, 0x00, 0x83, 0x71, 0xc3, 0xdf, + 0x5b, 0x00, 0x83, 0x79, 0xc4, 0xaa, 0x0d, 0x00, 0x83, 0x80, 0x94, 0x00, + 0x82, 0x98, 0x8e, 0x00, 0x82, 0xa8, 0x8b, 0x00, 0x84, 0xe8, 0xc6, 0x00, + 0xd3, 0x00, 0x84, 0x28, 0x45, 0x03, 0x14, 0xc2, 0x82, 0xa3, 0x83, 0x01, + 0x85, 0xa9, 0x8b, 0x01, 0x85, 0xb9, 0x97, 0x01, 0x85, 0xc9, 0x87, 0x01, + 0x85, 0xd9, 0x91, 0x01, 0x85, 0xe8, 0x47, 0x78, 0xc0, 0x42, 0x82, 0xe0, + 0x8b, 0x01, 0x86, 0xfb, 0x02, 0x82, 0xee, 0x83, 0x01, 0x86, 0xf1, 0x97, + 0x01, 0x87, 0x01, 0x87, 0x01, 0x87, 0x09, 0x91, 0x01, 0x87, 0x10, 0x83, + 0x01, 0x85, 0x59, 0x8b, 0x01, 0x85, 0x69, 0x97, 0x01, 0x85, 0x79, 0x87, + 0x01, 0x85, 0x89, 0x91, 0x01, 0x85, 0x98, 0x83, 0x01, 0x85, 0x61, 0x8b, + 0x01, 0x85, 0x71, 0x97, 0x01, 0x85, 0x81, 0x87, 0x01, 0x85, 0x91, 0x91, + 0x01, 0x85, 0xa0, 0x83, 0x01, 0x85, 0xb1, 0x8b, 0x01, 0x85, 0xc1, 0x97, + 0x01, 0x85, 0xd1, 0x87, 0x01, 0x85, 0xe1, 0x91, 0x01, 0x85, 0xf0, 0x83, + 0x01, 0x85, 0xf9, 0x8b, 0x01, 0x86, 0x09, 0x97, 0x01, 0x86, 0x21, 0x87, + 0x01, 0x86, 0x31, 0x91, 0x01, 0x86, 0x40, 0x83, 0x01, 0x86, 0x01, 0x8b, + 0x01, 0x86, 0x11, 0x97, 0x01, 0x86, 0x29, 0x87, 0x01, 0x86, 0x39, 0x91, + 0x01, 0x86, 0x48, 0x83, 0x01, 0x86, 0x51, 0x8b, 0x01, 0x86, 0x59, 0x97, + 0x01, 0x86, 0x61, 0x87, 0x01, 0x86, 0x69, 0x91, 0x01, 0x86, 0x70, 0x83, + 0x01, 0x86, 0x79, 0x8b, 0x01, 0x86, 0x91, 0x97, 0x01, 0x86, 0xa9, 0x87, + 0x01, 0x86, 0xc1, 0x91, 0x01, 0x86, 0xd8, 0x83, 0x01, 0x86, 0x81, 0x8b, + 0x01, 0x86, 0x99, 0x97, 0x01, 0x86, 0xb1, 0x87, 0x01, 0x86, 0xc9, 0x91, + 0x01, 0x86, 0xe0, 0x83, 0x01, 0x86, 0x89, 0x8b, 0x01, 0x86, 0xa1, 0x97, + 0x01, 0x86, 0xb9, 0x87, 0x01, 0x86, 0xd1, 0x91, 0x01, 0x86, 0xe8, 0x83, + 0x01, 0x87, 0x21, 0x97, 0x01, 0x87, 0x31, 0x91, 0x01, 0x87, 0x40, 0x83, + 0x01, 0x87, 0x49, 0x8b, 0x01, 0x87, 0x51, 0x97, 0x01, 0x87, 0x59, 0x87, + 0x01, 0x87, 0x61, 0x91, 0x01, 0x87, 0x68, 0x83, 0x01, 0x87, 0x79, 0x8b, + 0x01, 0x87, 0x81, 0x87, 0x01, 0x87, 0x89, 0x91, 0x01, 0x87, 0x90, 0x97, + 0x01, 0x87, 0xa1, 0x83, 0x01, 0x87, 0xb9, 0x8b, 0x01, 0x87, 0xc1, 0x87, + 0x01, 0x87, 0xc9, 0x91, 0x01, 0x87, 0xd0, 0xc4, 0x1e, 0x97, 0x08, 0x85, + 0xc9, 0xc5, 0x40, 0xe7, 0x08, 0x84, 0x10, 0xc2, 0x00, 0xd0, 0x08, 0x84, + 0xd9, 0xc3, 0x40, 0xe2, 0x08, 0x84, 0xd1, 0x83, 0x08, 0x84, 0xc8, 0xc2, + 0x00, 0xd0, 0x08, 0x84, 0xc1, 0x83, 0x08, 0x84, 0xb8, 0xd2, 0x4a, 0x87, + 0x00, 0x64, 0x01, 0xc6, 0xc3, 0x62, 0x00, 0x64, 0x20, 0xc7, 0x14, 0x39, + 0x00, 0x64, 0x11, 0xc7, 0x7a, 0x7f, 0x00, 0x65, 0xe8, 0xc5, 0x40, 0xe7, + 0x00, 0x64, 0x19, 0xc4, 0x1e, 0x97, 0x00, 0x66, 0x68, 0x83, 0x00, 0x64, + 0x2b, 0x02, 0x82, 0xf4, 0x8b, 0x00, 0x64, 0x3b, 0x02, 0x83, 0x00, 0x97, + 0x00, 0x64, 0x4b, 0x02, 0x83, 0x04, 0x18, 0xc2, 0x83, 0x08, 0x87, 0x00, + 0x64, 0x73, 0x02, 0x83, 0x12, 0x91, 0x00, 0x64, 0x93, 0x02, 0x83, 0x16, + 0x0d, 0xc2, 0x83, 0x1a, 0x09, 0xc2, 0x83, 0x24, 0x10, 0xc2, 0x83, 0x2e, + 0x05, 0xc2, 0x83, 0x47, 0x0c, 0xc2, 0x83, 0x51, 0x16, 0xc2, 0x83, 0x5b, + 0x06, 0xc2, 0x83, 0x69, 0x12, 0xc2, 0x83, 0x77, 0x04, 0xc2, 0x83, 0x81, + 0xc2, 0x01, 0xc3, 0x00, 0x65, 0x71, 0xc2, 0x19, 0x2c, 0x00, 0x65, 0x79, + 0x14, 0xc2, 0x83, 0x8b, 0x0e, 0xc2, 0x83, 0x95, 0x15, 0xc2, 0x83, 0x9d, + 0xc2, 0x00, 0xd0, 0x00, 0x65, 0xc9, 0xc2, 0x00, 0x87, 0x00, 0x66, 0xf0, + 0x83, 0x00, 0x65, 0xf1, 0x8b, 0x00, 0x66, 0x41, 0x97, 0x00, 0x66, 0x60, + 0x8b, 0x00, 0x66, 0x00, 0x97, 0x00, 0x66, 0x10, 0x94, 0x00, 0x66, 0x1b, + 0x02, 0x83, 0xad, 0x8e, 0x00, 0x67, 0x12, 0x02, 0x83, 0xb1, 0x87, 0x00, + 0x66, 0x38, 0x91, 0x00, 0x66, 0x58, 0xc2, 0x02, 0xa0, 0x00, 0x67, 0x41, + 0xc4, 0x02, 0xde, 0x00, 0x67, 0x48, 0xc3, 0x09, 0x9e, 0x00, 0x67, 0x51, + 0xc3, 0x0d, 0x14, 0x00, 0x67, 0x58, 0xc2, 0x22, 0xcc, 0x00, 0x67, 0x61, + 0xc4, 0x18, 0x10, 0x00, 0x67, 0x68, 0xc2, 0x02, 0x6f, 0x01, 0x78, 0x03, + 0x02, 0x83, 0xb5, 0x12, 0xc2, 0x83, 0xbb, 0xc2, 0x18, 0xb3, 0x01, 0x7b, + 0xe0, 0x0b, 0xc2, 0x83, 0xc7, 0x07, 0xc2, 0x83, 0xd7, 0x03, 0xc2, 0x83, + 0xe7, 0xc3, 0x08, 0x48, 0x01, 0x7d, 0x3a, 0x02, 0x83, 0xf3, 0x11, 0xc2, + 0x83, 0xf9, 0x0b, 0xc2, 0x84, 0x1c, 0x14, 0xc2, 0x84, 0x2c, 0x07, 0x42, + 0x84, 0x3c, 0x0e, 0xc2, 0x84, 0x48, 0x07, 0xc2, 0x84, 0x52, 0x12, 0xc2, + 0x84, 0x68, 0x05, 0xc2, 0x84, 0x7e, 0xc4, 0x03, 0x14, 0x01, 0x79, 0x49, + 0x0a, 0xc2, 0x84, 0x8a, 0xc4, 0xb0, 0xd3, 0x01, 0x79, 0xc9, 0x16, 0xc2, + 0x84, 0x92, 0xc5, 0x0b, 0x0a, 0x01, 0x7a, 0x29, 0xc2, 0x05, 0x1d, 0x01, + 0x7a, 0x39, 0x03, 0xc2, 0x84, 0xa0, 0xc4, 0x49, 0x26, 0x01, 0x7b, 0x11, + 0x0b, 0xc2, 0x84, 0xb0, 0xc3, 0x56, 0x1d, 0x01, 0x7b, 0x51, 0xc4, 0x0d, + 0xed, 0x01, 0x7d, 0x98, 0x11, 0xc2, 0x84, 0xbc, 0xcf, 0x67, 0xec, 0x01, + 0x78, 0xb1, 0x07, 0xc2, 0x84, 0xc6, 0x03, 0x42, 0x84, 0xd0, 0xc2, 0x02, + 0xa0, 0x01, 0x78, 0x33, 0x02, 0x84, 0xe0, 0x03, 0xc2, 0x84, 0xe6, 0xc2, + 0x00, 0xc4, 0x01, 0x78, 0xb9, 0x42, 0x00, 0x33, 0xc2, 0x84, 0xf8, 0x14, + 0xc2, 0x85, 0x04, 0x0b, 0xc2, 0x85, 0x16, 0x11, 0x42, 0x85, 0x22, 0xc2, + 0x00, 0xd1, 0x01, 0x78, 0x41, 0x11, 0xc2, 0x85, 0x2e, 0x07, 0xc2, 0x85, + 0x3c, 0x0b, 0x42, 0x85, 0x48, 0x10, 0xc2, 0x85, 0x54, 0xc4, 0x00, 0x2d, + 0x01, 0x78, 0x59, 0x03, 0xc2, 0x85, 0x60, 0xc3, 0x18, 0x11, 0x01, 0x7e, + 0x8b, 0x02, 0x85, 0x6b, 0xc2, 0x0c, 0x43, 0x01, 0x7b, 0x61, 0xc9, 0xa9, + 0xf3, 0x01, 0x7e, 0x58, 0x11, 0xc2, 0x85, 0x71, 0x0e, 0xc2, 0x85, 0x8d, + 0xc4, 0xdf, 0xbb, 0x01, 0x79, 0x31, 0x03, 0xc2, 0x85, 0x9d, 0xc3, 0x25, + 0x4d, 0x01, 0x7d, 0x10, 0xc2, 0x00, 0x89, 0x01, 0x78, 0x71, 0x10, 0x42, + 0x85, 0xaf, 0xc4, 0x00, 0x27, 0x01, 0x78, 0x91, 0x14, 0xc2, 0x85, 0xbb, + 0xc3, 0x01, 0xc8, 0x01, 0x7b, 0xf1, 0xc2, 0x00, 0x2d, 0x01, 0x7c, 0xb8, + 0x14, 0xc2, 0x85, 0xc7, 0x11, 0xc2, 0x85, 0xd3, 0x07, 0xc2, 0x85, 0xdf, + 0x03, 0xc2, 0x85, 0xeb, 0x0a, 0xc2, 0x85, 0xfa, 0x42, 0x00, 0x74, 0x42, + 0x86, 0x06, 0x0b, 0xc2, 0x86, 0x0e, 0xc3, 0xbb, 0x1c, 0x01, 0x79, 0x39, + 0x03, 0xc2, 0x86, 0x20, 0xc2, 0x00, 0xa8, 0x01, 0x7c, 0xd1, 0xc2, 0x05, + 0x1d, 0x01, 0x7c, 0xd8, 0xc4, 0x46, 0xf6, 0x01, 0x78, 0xe1, 0xc2, 0x24, + 0xe2, 0x01, 0x7a, 0x21, 0x42, 0x01, 0xa3, 0xc2, 0x86, 0x2e, 0xc2, 0x02, + 0x35, 0x01, 0x7b, 0xe8, 0x91, 0x01, 0x79, 0x0b, 0x02, 0x86, 0x3a, 0x42, + 0x00, 0x39, 0xc2, 0x86, 0x46, 0xc3, 0x00, 0xfe, 0x01, 0x7d, 0x41, 0xc4, + 0xe0, 0x07, 0x01, 0x7e, 0x08, 0x0b, 0xc2, 0x86, 0x52, 0x11, 0xc2, 0x86, + 0x62, 0x14, 0xc2, 0x86, 0x7e, 0x03, 0xc2, 0x86, 0x90, 0x0e, 0xc2, 0x86, + 0x9c, 0xc3, 0x0e, 0x8b, 0x01, 0x7c, 0xb0, 0x11, 0xc2, 0x86, 0xae, 0xc2, + 0x00, 0x3d, 0x01, 0x7b, 0xc8, 0xc2, 0x00, 0x33, 0x01, 0x7a, 0x89, 0x0b, + 0xc2, 0x86, 0xb8, 0x03, 0xc2, 0x86, 0xd0, 0xc6, 0x14, 0xdb, 0x01, 0x7b, + 0xd9, 0xc3, 0x65, 0xba, 0x01, 0x7c, 0xe1, 0x0e, 0xc2, 0x86, 0xe2, 0x14, + 0x42, 0x86, 0xec, 0xc2, 0x00, 0x06, 0x01, 0x7a, 0xf9, 0x94, 0x01, 0x7b, + 0xc0, 0xc5, 0xd9, 0xf2, 0x01, 0x7c, 0xa9, 0xc6, 0xd0, 0xaf, 0x01, 0x7d, + 0x28, 0xa2, 0x0c, 0x66, 0xa9, 0xa1, 0x0c, 0x66, 0xa1, 0xa0, 0x0c, 0x66, + 0x99, 0x9f, 0x0c, 0x66, 0x91, 0x9e, 0x0c, 0x66, 0x89, 0x9d, 0x0c, 0x66, + 0x80, 0x88, 0x0c, 0x66, 0x79, 0x87, 0x0c, 0x66, 0x71, 0x86, 0x0c, 0x66, + 0x69, 0x85, 0x0c, 0x66, 0x61, 0x84, 0x0c, 0x66, 0x59, 0x83, 0x0c, 0x66, + 0x51, 0xa6, 0x0c, 0x66, 0x49, 0xa5, 0x0c, 0x66, 0x41, 0xa4, 0x0c, 0x66, + 0x39, 0xa3, 0x0c, 0x66, 0x31, 0xa2, 0x0c, 0x66, 0x29, 0xa1, 0x0c, 0x66, + 0x21, 0xa0, 0x0c, 0x66, 0x19, 0x9f, 0x0c, 0x66, 0x11, 0x9e, 0x0c, 0x66, + 0x09, 0x9d, 0x0c, 0x66, 0x00, 0x88, 0x0c, 0x65, 0xf9, 0x87, 0x0c, 0x65, + 0xf1, 0x86, 0x0c, 0x65, 0xe9, 0x85, 0x0c, 0x65, 0xe1, 0x84, 0x0c, 0x65, + 0xd9, 0x83, 0x0c, 0x65, 0xd1, 0xa6, 0x0c, 0x65, 0xc9, 0xa5, 0x0c, 0x65, + 0xc1, 0xa4, 0x0c, 0x65, 0xb9, 0xa3, 0x0c, 0x65, 0xb1, 0xa2, 0x0c, 0x65, + 0xa9, 0xa1, 0x0c, 0x65, 0xa1, 0xa0, 0x0c, 0x65, 0x99, 0x9f, 0x0c, 0x65, + 0x91, 0x9e, 0x0c, 0x65, 0x89, 0x9d, 0x0c, 0x65, 0x80, 0x88, 0x0c, 0x65, + 0x79, 0x87, 0x0c, 0x65, 0x71, 0x86, 0x0c, 0x65, 0x69, 0x85, 0x0c, 0x65, + 0x61, 0x84, 0x0c, 0x65, 0x59, 0x83, 0x0c, 0x65, 0x51, 0xa6, 0x0c, 0x65, + 0x49, 0xa5, 0x0c, 0x65, 0x41, 0xa4, 0x0c, 0x65, 0x39, 0xa3, 0x0c, 0x65, + 0x31, 0xa2, 0x0c, 0x65, 0x29, 0xa1, 0x0c, 0x65, 0x21, 0xa0, 0x0c, 0x65, + 0x19, 0x9f, 0x0c, 0x65, 0x11, 0x9e, 0x0c, 0x65, 0x09, 0x9d, 0x0c, 0x65, + 0x00, 0x88, 0x0c, 0x64, 0xf9, 0x87, 0x0c, 0x64, 0xf1, 0x86, 0x0c, 0x64, + 0xe9, 0x85, 0x0c, 0x64, 0xe1, 0x84, 0x0c, 0x64, 0xd9, 0x83, 0x0c, 0x64, + 0xd1, 0xa6, 0x0c, 0x64, 0xc9, 0xa5, 0x0c, 0x64, 0xc1, 0xa4, 0x0c, 0x64, + 0xb9, 0xa3, 0x0c, 0x64, 0xb1, 0xa2, 0x0c, 0x64, 0xa9, 0xa1, 0x0c, 0x64, + 0xa1, 0xa0, 0x0c, 0x64, 0x99, 0x9f, 0x0c, 0x64, 0x91, 0x9e, 0x0c, 0x64, + 0x89, 0x9d, 0x0c, 0x64, 0x80, 0x88, 0x0c, 0x64, 0x79, 0x87, 0x0c, 0x64, + 0x71, 0x86, 0x0c, 0x64, 0x69, 0x85, 0x0c, 0x64, 0x61, 0x84, 0x0c, 0x64, + 0x59, 0x83, 0x0c, 0x64, 0x51, 0xa6, 0x0c, 0x64, 0x49, 0xa5, 0x0c, 0x64, + 0x41, 0xa4, 0x0c, 0x64, 0x39, 0xa3, 0x0c, 0x64, 0x31, 0xa2, 0x0c, 0x64, + 0x29, 0xa1, 0x0c, 0x64, 0x21, 0xa0, 0x0c, 0x64, 0x19, 0x9f, 0x0c, 0x64, + 0x11, 0x9e, 0x0c, 0x64, 0x09, 0x9d, 0x0c, 0x64, 0x00, 0x88, 0x0c, 0x63, + 0xf9, 0x87, 0x0c, 0x63, 0xf1, 0x86, 0x0c, 0x63, 0xe9, 0x85, 0x0c, 0x63, + 0xe1, 0x84, 0x0c, 0x63, 0xd9, 0x83, 0x0c, 0x63, 0xd1, 0xa6, 0x0c, 0x63, + 0xc9, 0xa5, 0x0c, 0x63, 0xc1, 0xa4, 0x0c, 0x63, 0xb9, 0xa3, 0x0c, 0x63, + 0xb1, 0xa2, 0x0c, 0x63, 0xa9, 0xa1, 0x0c, 0x63, 0xa1, 0xa0, 0x0c, 0x63, + 0x99, 0x9f, 0x0c, 0x63, 0x91, 0x9e, 0x0c, 0x63, 0x89, 0x9d, 0x0c, 0x63, + 0x80, 0x88, 0x0c, 0x63, 0x79, 0x87, 0x0c, 0x63, 0x71, 0x86, 0x0c, 0x63, + 0x69, 0x85, 0x0c, 0x63, 0x61, 0x84, 0x0c, 0x63, 0x59, 0x83, 0x0c, 0x63, + 0x51, 0xa6, 0x0c, 0x63, 0x49, 0xa5, 0x0c, 0x63, 0x41, 0xa4, 0x0c, 0x63, + 0x39, 0xa3, 0x0c, 0x63, 0x31, 0xa2, 0x0c, 0x63, 0x29, 0xa1, 0x0c, 0x63, + 0x21, 0xa0, 0x0c, 0x63, 0x19, 0x9f, 0x0c, 0x63, 0x11, 0x9e, 0x0c, 0x63, + 0x09, 0x9d, 0x0c, 0x63, 0x00, 0x88, 0x0c, 0x62, 0xf9, 0x87, 0x0c, 0x62, + 0xf1, 0x86, 0x0c, 0x62, 0xe9, 0x85, 0x0c, 0x62, 0xe1, 0x84, 0x0c, 0x62, + 0xd9, 0x83, 0x0c, 0x62, 0xd1, 0xa6, 0x0c, 0x62, 0xc9, 0xa5, 0x0c, 0x62, + 0xc1, 0xa4, 0x0c, 0x62, 0xb9, 0xa3, 0x0c, 0x62, 0xb1, 0xa2, 0x0c, 0x62, + 0xa9, 0xa1, 0x0c, 0x62, 0xa1, 0xa0, 0x0c, 0x62, 0x99, 0x9f, 0x0c, 0x62, + 0x91, 0x9e, 0x0c, 0x62, 0x89, 0x9d, 0x0c, 0x62, 0x80, 0x88, 0x0c, 0x62, + 0x79, 0x87, 0x0c, 0x62, 0x71, 0x86, 0x0c, 0x62, 0x69, 0x85, 0x0c, 0x62, + 0x61, 0x84, 0x0c, 0x62, 0x59, 0x83, 0x0c, 0x62, 0x51, 0xa6, 0x0c, 0x62, + 0x49, 0xa5, 0x0c, 0x62, 0x41, 0xa4, 0x0c, 0x62, 0x39, 0xa3, 0x0c, 0x62, + 0x31, 0xa2, 0x0c, 0x62, 0x29, 0xa1, 0x0c, 0x62, 0x21, 0xa0, 0x0c, 0x62, + 0x19, 0x9f, 0x0c, 0x62, 0x11, 0x9e, 0x0c, 0x62, 0x09, 0x9d, 0x0c, 0x62, + 0x00, 0x88, 0x0c, 0x61, 0xf9, 0x87, 0x0c, 0x61, 0xf1, 0x86, 0x0c, 0x61, + 0xe9, 0x85, 0x0c, 0x61, 0xe1, 0x84, 0x0c, 0x61, 0xd9, 0x83, 0x0c, 0x61, + 0xd1, 0xa6, 0x0c, 0x61, 0xc9, 0xa5, 0x0c, 0x61, 0xc1, 0xa4, 0x0c, 0x61, + 0xb9, 0xa3, 0x0c, 0x61, 0xb1, 0xa2, 0x0c, 0x61, 0xa9, 0xa1, 0x0c, 0x61, + 0xa1, 0xa0, 0x0c, 0x61, 0x99, 0x9f, 0x0c, 0x61, 0x91, 0x9e, 0x0c, 0x61, + 0x89, 0x9d, 0x0c, 0x61, 0x80, 0x88, 0x0c, 0x61, 0x79, 0x87, 0x0c, 0x61, + 0x71, 0x86, 0x0c, 0x61, 0x69, 0x85, 0x0c, 0x61, 0x61, 0x84, 0x0c, 0x61, + 0x59, 0x83, 0x0c, 0x61, 0x51, 0xa6, 0x0c, 0x61, 0x49, 0xa5, 0x0c, 0x61, + 0x41, 0xa4, 0x0c, 0x61, 0x39, 0xa3, 0x0c, 0x61, 0x31, 0xa2, 0x0c, 0x61, + 0x29, 0xa1, 0x0c, 0x61, 0x21, 0xa0, 0x0c, 0x61, 0x19, 0x9f, 0x0c, 0x61, + 0x11, 0x9e, 0x0c, 0x61, 0x09, 0x9d, 0x0c, 0x61, 0x00, 0x88, 0x0c, 0x60, + 0xf9, 0x87, 0x0c, 0x60, 0xf1, 0x86, 0x0c, 0x60, 0xe9, 0x85, 0x0c, 0x60, + 0xe1, 0x84, 0x0c, 0x60, 0xd9, 0x83, 0x0c, 0x60, 0xd1, 0xa6, 0x0c, 0x60, + 0xc9, 0xa5, 0x0c, 0x60, 0xc1, 0xa4, 0x0c, 0x60, 0xb9, 0xa3, 0x0c, 0x60, + 0xb1, 0xa2, 0x0c, 0x60, 0xa9, 0xa1, 0x0c, 0x60, 0xa1, 0xa0, 0x0c, 0x60, + 0x99, 0x9f, 0x0c, 0x60, 0x91, 0x9e, 0x0c, 0x60, 0x89, 0x9d, 0x0c, 0x60, + 0x80, 0x88, 0x0c, 0x60, 0x79, 0x87, 0x0c, 0x60, 0x71, 0x86, 0x0c, 0x60, + 0x69, 0x85, 0x0c, 0x60, 0x61, 0x84, 0x0c, 0x60, 0x59, 0x83, 0x0c, 0x60, + 0x51, 0xa6, 0x0c, 0x60, 0x49, 0xa5, 0x0c, 0x60, 0x41, 0xa4, 0x0c, 0x60, + 0x39, 0xa3, 0x0c, 0x60, 0x31, 0xa2, 0x0c, 0x60, 0x29, 0xa1, 0x0c, 0x60, + 0x21, 0xa0, 0x0c, 0x60, 0x19, 0x9f, 0x0c, 0x60, 0x11, 0x9e, 0x0c, 0x60, + 0x09, 0x9d, 0x0c, 0x60, 0x00, 0x88, 0x0c, 0x5f, 0xf9, 0x87, 0x0c, 0x5f, + 0xf1, 0x86, 0x0c, 0x5f, 0xe9, 0x85, 0x0c, 0x5f, 0xe1, 0x84, 0x0c, 0x5f, + 0xd9, 0x83, 0x0c, 0x5f, 0xd1, 0xa6, 0x0c, 0x5f, 0xc9, 0xa5, 0x0c, 0x5f, + 0xc1, 0xa4, 0x0c, 0x5f, 0xb9, 0xa3, 0x0c, 0x5f, 0xb1, 0xa2, 0x0c, 0x5f, + 0xa9, 0xa1, 0x0c, 0x5f, 0xa1, 0xa0, 0x0c, 0x5f, 0x99, 0x9f, 0x0c, 0x5f, + 0x91, 0x9e, 0x0c, 0x5f, 0x89, 0x9d, 0x0c, 0x5f, 0x80, 0x88, 0x0c, 0x5f, + 0x79, 0x87, 0x0c, 0x5f, 0x71, 0x86, 0x0c, 0x5f, 0x69, 0x85, 0x0c, 0x5f, + 0x61, 0x84, 0x0c, 0x5f, 0x59, 0x83, 0x0c, 0x5f, 0x51, 0xa6, 0x0c, 0x5f, + 0x49, 0xa5, 0x0c, 0x5f, 0x41, 0xa4, 0x0c, 0x5f, 0x39, 0xa3, 0x0c, 0x5f, + 0x31, 0xa2, 0x0c, 0x5f, 0x29, 0xa1, 0x0c, 0x5f, 0x21, 0xa0, 0x0c, 0x5f, + 0x19, 0x9f, 0x0c, 0x5f, 0x11, 0x9e, 0x0c, 0x5f, 0x09, 0x9d, 0x0c, 0x5f, + 0x00, 0x88, 0x0c, 0x5e, 0xf9, 0x87, 0x0c, 0x5e, 0xf1, 0x86, 0x0c, 0x5e, + 0xe9, 0x85, 0x0c, 0x5e, 0xe1, 0x84, 0x0c, 0x5e, 0xd9, 0x83, 0x0c, 0x5e, + 0xd1, 0xa6, 0x0c, 0x5e, 0xc9, 0xa5, 0x0c, 0x5e, 0xc1, 0xa4, 0x0c, 0x5e, + 0xb9, 0xa3, 0x0c, 0x5e, 0xb1, 0xa2, 0x0c, 0x5e, 0xa9, 0xa1, 0x0c, 0x5e, + 0xa1, 0xa0, 0x0c, 0x5e, 0x99, 0x9f, 0x0c, 0x5e, 0x91, 0x9e, 0x0c, 0x5e, + 0x89, 0x9d, 0x0c, 0x5e, 0x80, 0x88, 0x0c, 0x5e, 0x79, 0x87, 0x0c, 0x5e, + 0x71, 0x86, 0x0c, 0x5e, 0x69, 0x85, 0x0c, 0x5e, 0x61, 0x84, 0x0c, 0x5e, + 0x59, 0x83, 0x0c, 0x5e, 0x51, 0xa6, 0x0c, 0x5e, 0x49, 0xa5, 0x0c, 0x5e, + 0x41, 0xa4, 0x0c, 0x5e, 0x39, 0xa3, 0x0c, 0x5e, 0x31, 0xa2, 0x0c, 0x5e, + 0x29, 0xa1, 0x0c, 0x5e, 0x21, 0xa0, 0x0c, 0x5e, 0x19, 0x9f, 0x0c, 0x5e, + 0x11, 0x9e, 0x0c, 0x5e, 0x09, 0x9d, 0x0c, 0x5e, 0x00, 0x88, 0x0c, 0x5d, + 0xf9, 0x87, 0x0c, 0x5d, 0xf1, 0x86, 0x0c, 0x5d, 0xe9, 0x85, 0x0c, 0x5d, + 0xe1, 0x84, 0x0c, 0x5d, 0xd9, 0x83, 0x0c, 0x5d, 0xd1, 0xa6, 0x0c, 0x5d, + 0xc9, 0xa5, 0x0c, 0x5d, 0xc1, 0xa4, 0x0c, 0x5d, 0xb9, 0xa3, 0x0c, 0x5d, + 0xb1, 0xa2, 0x0c, 0x5d, 0xa9, 0xa1, 0x0c, 0x5d, 0xa1, 0xa0, 0x0c, 0x5d, + 0x99, 0x9f, 0x0c, 0x5d, 0x91, 0x9e, 0x0c, 0x5d, 0x89, 0x9d, 0x0c, 0x5d, + 0x80, 0x88, 0x0c, 0x5d, 0x79, 0x87, 0x0c, 0x5d, 0x71, 0x86, 0x0c, 0x5d, + 0x69, 0x85, 0x0c, 0x5d, 0x61, 0x84, 0x0c, 0x5d, 0x59, 0x83, 0x0c, 0x5d, + 0x51, 0xa6, 0x0c, 0x5d, 0x49, 0xa5, 0x0c, 0x5d, 0x41, 0xa4, 0x0c, 0x5d, + 0x39, 0xa3, 0x0c, 0x5d, 0x31, 0xa2, 0x0c, 0x5d, 0x29, 0xa1, 0x0c, 0x5d, + 0x21, 0xa0, 0x0c, 0x5d, 0x19, 0x9f, 0x0c, 0x5d, 0x11, 0x9e, 0x0c, 0x5d, + 0x09, 0x9d, 0x0c, 0x5d, 0x00, 0x88, 0x0c, 0x5c, 0xf9, 0x87, 0x0c, 0x5c, + 0xf1, 0x86, 0x0c, 0x5c, 0xe9, 0x85, 0x0c, 0x5c, 0xe1, 0x84, 0x0c, 0x5c, + 0xd9, 0x83, 0x0c, 0x5c, 0xd1, 0xa6, 0x0c, 0x5c, 0xc9, 0xa5, 0x0c, 0x5c, + 0xc1, 0xa4, 0x0c, 0x5c, 0xb9, 0xa3, 0x0c, 0x5c, 0xb1, 0xa2, 0x0c, 0x5c, + 0xa9, 0xa1, 0x0c, 0x5c, 0xa1, 0xa0, 0x0c, 0x5c, 0x99, 0x9f, 0x0c, 0x5c, + 0x91, 0x9e, 0x0c, 0x5c, 0x89, 0x9d, 0x0c, 0x5c, 0x80, 0x88, 0x0c, 0x5c, + 0x79, 0x87, 0x0c, 0x5c, 0x71, 0x86, 0x0c, 0x5c, 0x69, 0x85, 0x0c, 0x5c, + 0x61, 0x84, 0x0c, 0x5c, 0x59, 0x83, 0x0c, 0x5c, 0x51, 0xa6, 0x0c, 0x5c, + 0x49, 0xa5, 0x0c, 0x5c, 0x41, 0xa4, 0x0c, 0x5c, 0x39, 0xa3, 0x0c, 0x5c, + 0x31, 0xa2, 0x0c, 0x5c, 0x29, 0xa1, 0x0c, 0x5c, 0x21, 0xa0, 0x0c, 0x5c, + 0x19, 0x9f, 0x0c, 0x5c, 0x11, 0x9e, 0x0c, 0x5c, 0x09, 0x9d, 0x0c, 0x5c, + 0x00, 0x88, 0x0c, 0x5b, 0xf9, 0x87, 0x0c, 0x5b, 0xf1, 0x86, 0x0c, 0x5b, + 0xe9, 0x85, 0x0c, 0x5b, 0xe1, 0x84, 0x0c, 0x5b, 0xd9, 0x83, 0x0c, 0x5b, + 0xd1, 0xa6, 0x0c, 0x5b, 0xc9, 0xa5, 0x0c, 0x5b, 0xc1, 0xa4, 0x0c, 0x5b, + 0xb9, 0xa3, 0x0c, 0x5b, 0xb1, 0xa2, 0x0c, 0x5b, 0xa9, 0xa1, 0x0c, 0x5b, + 0xa1, 0xa0, 0x0c, 0x5b, 0x99, 0x9f, 0x0c, 0x5b, 0x91, 0x9e, 0x0c, 0x5b, + 0x89, 0x9d, 0x0c, 0x5b, 0x80, 0x88, 0x0c, 0x5b, 0x79, 0x87, 0x0c, 0x5b, + 0x71, 0x86, 0x0c, 0x5b, 0x69, 0x85, 0x0c, 0x5b, 0x61, 0x84, 0x0c, 0x5b, + 0x59, 0x83, 0x0c, 0x5b, 0x51, 0xa6, 0x0c, 0x5b, 0x49, 0xa5, 0x0c, 0x5b, + 0x41, 0xa4, 0x0c, 0x5b, 0x39, 0xa3, 0x0c, 0x5b, 0x31, 0xa2, 0x0c, 0x5b, + 0x29, 0xa1, 0x0c, 0x5b, 0x21, 0xa0, 0x0c, 0x5b, 0x19, 0x9f, 0x0c, 0x5b, + 0x11, 0x9e, 0x0c, 0x5b, 0x09, 0x9d, 0x0c, 0x5b, 0x00, 0x88, 0x0c, 0x5a, + 0xf9, 0x87, 0x0c, 0x5a, 0xf1, 0x86, 0x0c, 0x5a, 0xe9, 0x85, 0x0c, 0x5a, + 0xe1, 0x84, 0x0c, 0x5a, 0xd9, 0x83, 0x0c, 0x5a, 0xd1, 0xa6, 0x0c, 0x5a, + 0xc9, 0xa5, 0x0c, 0x5a, 0xc1, 0xa4, 0x0c, 0x5a, 0xb9, 0xa3, 0x0c, 0x5a, + 0xb1, 0xa2, 0x0c, 0x5a, 0xa9, 0xa1, 0x0c, 0x5a, 0xa1, 0xa0, 0x0c, 0x5a, + 0x99, 0x9f, 0x0c, 0x5a, 0x91, 0x9e, 0x0c, 0x5a, 0x89, 0x9d, 0x0c, 0x5a, + 0x80, 0x88, 0x0c, 0x5a, 0x79, 0x87, 0x0c, 0x5a, 0x71, 0x86, 0x0c, 0x5a, + 0x69, 0x85, 0x0c, 0x5a, 0x61, 0x84, 0x0c, 0x5a, 0x59, 0x83, 0x0c, 0x5a, + 0x51, 0xa6, 0x0c, 0x5a, 0x49, 0xa5, 0x0c, 0x5a, 0x41, 0xa4, 0x0c, 0x5a, + 0x39, 0xa3, 0x0c, 0x5a, 0x31, 0xa2, 0x0c, 0x5a, 0x29, 0xa1, 0x0c, 0x5a, + 0x21, 0xa0, 0x0c, 0x5a, 0x19, 0x9f, 0x0c, 0x5a, 0x11, 0x9e, 0x0c, 0x5a, + 0x09, 0x9d, 0x0c, 0x5a, 0x00, 0x88, 0x0c, 0x59, 0xf9, 0x87, 0x0c, 0x59, + 0xf1, 0x86, 0x0c, 0x59, 0xe9, 0x85, 0x0c, 0x59, 0xe1, 0x84, 0x0c, 0x59, + 0xd9, 0x83, 0x0c, 0x59, 0xd1, 0xa6, 0x0c, 0x59, 0xc9, 0xa5, 0x0c, 0x59, + 0xc1, 0xa4, 0x0c, 0x59, 0xb9, 0xa3, 0x0c, 0x59, 0xb1, 0xa2, 0x0c, 0x59, + 0xa9, 0xa1, 0x0c, 0x59, 0xa1, 0xa0, 0x0c, 0x59, 0x99, 0x9f, 0x0c, 0x59, + 0x91, 0x9e, 0x0c, 0x59, 0x89, 0x9d, 0x0c, 0x59, 0x80, 0x88, 0x0c, 0x59, + 0x79, 0x87, 0x0c, 0x59, 0x71, 0x86, 0x0c, 0x59, 0x69, 0x85, 0x0c, 0x59, + 0x61, 0x84, 0x0c, 0x59, 0x59, 0x83, 0x0c, 0x59, 0x51, 0xa6, 0x0c, 0x59, + 0x49, 0xa5, 0x0c, 0x59, 0x41, 0xa4, 0x0c, 0x59, 0x39, 0xa3, 0x0c, 0x59, + 0x31, 0xa2, 0x0c, 0x59, 0x29, 0xa1, 0x0c, 0x59, 0x21, 0xa0, 0x0c, 0x59, + 0x19, 0x9f, 0x0c, 0x59, 0x11, 0x9e, 0x0c, 0x59, 0x09, 0x9d, 0x0c, 0x59, + 0x00, 0x88, 0x0c, 0x58, 0xf9, 0x87, 0x0c, 0x58, 0xf1, 0x86, 0x0c, 0x58, + 0xe9, 0x85, 0x0c, 0x58, 0xe1, 0x84, 0x0c, 0x58, 0xd9, 0x83, 0x0c, 0x58, + 0xd1, 0xa6, 0x0c, 0x58, 0xc9, 0xa5, 0x0c, 0x58, 0xc1, 0xa4, 0x0c, 0x58, + 0xb9, 0xa3, 0x0c, 0x58, 0xb1, 0xa2, 0x0c, 0x58, 0xa9, 0xa1, 0x0c, 0x58, + 0xa1, 0xa0, 0x0c, 0x58, 0x99, 0x9f, 0x0c, 0x58, 0x91, 0x9e, 0x0c, 0x58, + 0x89, 0x9d, 0x0c, 0x58, 0x80, 0x88, 0x0c, 0x58, 0x79, 0x87, 0x0c, 0x58, + 0x71, 0x86, 0x0c, 0x58, 0x69, 0x85, 0x0c, 0x58, 0x61, 0x84, 0x0c, 0x58, + 0x59, 0x83, 0x0c, 0x58, 0x51, 0xa6, 0x0c, 0x58, 0x49, 0xa5, 0x0c, 0x58, + 0x41, 0xa4, 0x0c, 0x58, 0x39, 0xa3, 0x0c, 0x58, 0x31, 0xa2, 0x0c, 0x58, + 0x29, 0xa1, 0x0c, 0x58, 0x21, 0xa0, 0x0c, 0x58, 0x19, 0x9f, 0x0c, 0x58, + 0x11, 0x9e, 0x0c, 0x58, 0x09, 0x9d, 0x0c, 0x58, 0x00, 0xc2, 0x00, 0xd0, + 0x08, 0x96, 0x59, 0xc2, 0x0e, 0x9a, 0x08, 0x96, 0x49, 0x83, 0x08, 0x96, + 0x40, 0xc2, 0x00, 0xd0, 0x08, 0x96, 0x39, 0x83, 0x08, 0x96, 0x30, 0xc4, + 0xdb, 0xfb, 0x08, 0x91, 0xf1, 0xc5, 0xd7, 0x18, 0x08, 0x91, 0xb8, 0xc2, + 0x0e, 0x9a, 0x08, 0x90, 0xe1, 0xc2, 0x00, 0xd0, 0x08, 0x90, 0xb9, 0x83, + 0x08, 0x90, 0xb0, 0xc2, 0x00, 0xd0, 0x08, 0x90, 0xa9, 0x83, 0x08, 0x90, + 0xa0, 0x02, 0xc2, 0x86, 0xf8, 0x00, 0x42, 0x87, 0x06, 0x43, 0x13, 0x3a, + 0xc2, 0x87, 0x12, 0x43, 0x71, 0xed, 0xc2, 0x87, 0x1a, 0xc9, 0xb0, 0xc5, + 0x00, 0xcf, 0x00, 0x44, 0xdf, 0x37, 0xc2, 0x87, 0x26, 0x43, 0x93, 0x74, + 0x42, 0x87, 0x32, 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0x89, 0xc4, 0xe0, 0xaf, + 0x00, 0xcf, 0x08, 0x12, 0xc2, 0x87, 0x3e, 0x04, 0xc2, 0x87, 0x4d, 0xc4, + 0xda, 0x97, 0x00, 0xbf, 0x89, 0xc3, 0x18, 0x91, 0x00, 0xbf, 0x80, 0xc7, + 0xc6, 0x24, 0x00, 0xbe, 0xe9, 0xcc, 0x89, 0x31, 0x00, 0xbe, 0xe1, 0xc4, + 0xe0, 0x0b, 0x00, 0xbe, 0x78, 0xc6, 0xcd, 0xb5, 0x00, 0xbe, 0xd1, 0xc3, + 0x00, 0xd0, 0x00, 0xbe, 0xa1, 0xc6, 0xcd, 0x97, 0x00, 0xbe, 0x70, 0xc5, + 0xdc, 0x22, 0x00, 0xbe, 0xc1, 0x03, 0x42, 0x87, 0x59, 0xce, 0x71, 0xe6, + 0x00, 0xbe, 0xb1, 0xc4, 0xe4, 0x1f, 0x00, 0xbe, 0x90, 0xca, 0xa3, 0x50, + 0x00, 0xbe, 0x69, 0xc6, 0xcc, 0xdd, 0x00, 0xbe, 0x50, 0xc4, 0xe4, 0x17, + 0x00, 0xbe, 0x61, 0xc6, 0xd3, 0xd9, 0x00, 0xbe, 0x38, 0x97, 0x00, 0xbe, + 0x29, 0x8b, 0x00, 0xbe, 0x19, 0x87, 0x00, 0xbe, 0x11, 0x83, 0x00, 0xbd, + 0xb0, 0x91, 0x00, 0xbe, 0x21, 0x87, 0x00, 0xbd, 0xf0, 0x87, 0x00, 0xbe, + 0x01, 0x8b, 0x00, 0xbd, 0xc0, 0x83, 0x00, 0xbd, 0xf9, 0x9b, 0x00, 0xbd, + 0xd0, 0x83, 0x00, 0xbd, 0xe9, 0x97, 0x00, 0xbd, 0xe0, 0x97, 0x00, 0xbd, + 0x99, 0x8b, 0x00, 0xbd, 0x81, 0x83, 0x00, 0xbd, 0x21, 0x93, 0x00, 0xbd, + 0x18, 0xc3, 0x02, 0x9f, 0x00, 0xbd, 0x91, 0xc3, 0x05, 0x14, 0x00, 0xbd, + 0x88, 0x97, 0x00, 0xbd, 0x4b, 0x02, 0x87, 0x6b, 0x8d, 0x00, 0xbd, 0x40, + 0x8b, 0x00, 0xbd, 0x30, 0x91, 0x00, 0xbc, 0xb9, 0x83, 0x00, 0xbc, 0xa8, + 0x91, 0x00, 0xbc, 0x91, 0x83, 0x00, 0xbc, 0x80, 0x91, 0x00, 0xbc, 0x69, + 0x83, 0x00, 0xbc, 0x58, 0x91, 0x00, 0xbc, 0x41, 0x83, 0x00, 0xbc, 0x30, + 0x91, 0x00, 0xbc, 0x19, 0x83, 0x00, 0xbc, 0x08, 0xca, 0x97, 0xf6, 0x08, + 0x52, 0xb9, 0x96, 0x08, 0x52, 0x80, 0x91, 0x08, 0x50, 0x31, 0x87, 0x08, + 0x50, 0x29, 0xc9, 0xb2, 0x2d, 0x08, 0x50, 0x19, 0x97, 0x08, 0x50, 0x11, + 0x8b, 0x08, 0x50, 0x08, 0x16, 0xc2, 0x87, 0x6f, 0xc2, 0x00, 0xd0, 0x08, + 0x50, 0xd9, 0x83, 0x08, 0x50, 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0x50, 0xe9, + 0x83, 0x08, 0x50, 0xe0, 0xcb, 0x20, 0x9d, 0x0f, 0xb0, 0xd1, 0xcc, 0x1d, + 0x4a, 0x0f, 0xb0, 0xc8, 0xd7, 0x2a, 0xf5, 0x0f, 0xd2, 0x68, 0x49, 0x2a, + 0xf5, 0x42, 0x87, 0x79, 0xc3, 0x00, 0x74, 0x0f, 0xd0, 0x03, 0x02, 0x87, + 0x85, 0xc5, 0x56, 0xa5, 0x0f, 0xd0, 0x22, 0x02, 0x87, 0x8b, 0x49, 0x2a, + 0xf5, 0x42, 0x87, 0x91, 0x49, 0x2a, 0xf5, 0x42, 0x87, 0x9d, 0x49, 0x2a, + 0xf5, 0x42, 0x87, 0xa9, 0x0d, 0xc2, 0x87, 0xb5, 0xc5, 0xa8, 0xf7, 0x0f, + 0xd1, 0x59, 0xc4, 0xde, 0x83, 0x0f, 0xd1, 0x61, 0xc6, 0xca, 0xfd, 0x0f, + 0xd1, 0x69, 0xc4, 0xe3, 0x93, 0x0f, 0xd1, 0x78, 0x43, 0x00, 0xbc, 0xc2, + 0x87, 0xc1, 0xc4, 0xe3, 0x5b, 0x08, 0xa2, 0x50, 0xcd, 0x80, 0x36, 0x08, + 0xa2, 0xf9, 0x47, 0xb2, 0x2e, 0x42, 0x87, 0xe9, 0x83, 0x08, 0xa1, 0x99, + 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x89, 0xc2, 0x0d, 0xf6, 0x08, 0xa1, 0x90, + 0x83, 0x08, 0xa1, 0x19, 0xc2, 0x00, 0xc1, 0x08, 0xa0, 0xf1, 0x1b, 0xc2, + 0x87, 0xf7, 0x09, 0xc2, 0x88, 0x01, 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x20, + 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x11, 0x83, 0x08, 0xa1, 0x09, 0x06, 0x42, + 0x88, 0x0b, 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x01, 0x83, 0x08, 0xa0, 0xf9, + 0x16, 0x42, 0x88, 0x15, 0xc2, 0x00, 0xd0, 0x08, 0xa0, 0xb9, 0x83, 0x08, + 0xa0, 0xb0, 0xc2, 0x00, 0xd0, 0x08, 0xa0, 0xa9, 0x83, 0x08, 0xa0, 0xa0, + 0xc2, 0x00, 0xd0, 0x08, 0xa0, 0x89, 0x83, 0x08, 0xa0, 0x80, 0xc2, 0x00, + 0xd0, 0x08, 0xa0, 0x79, 0x83, 0x08, 0xa0, 0x70, 0x97, 0x08, 0xa0, 0x69, + 0x8b, 0x08, 0xa0, 0x59, 0x83, 0x08, 0xa0, 0x08, 0x97, 0x08, 0xa0, 0x28, + 0x8b, 0x08, 0xa0, 0x18, 0x83, 0x08, 0xa1, 0x29, 0xc2, 0x00, 0xd0, 0x08, + 0xa1, 0x30, 0x83, 0x08, 0xa1, 0x39, 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x40, + 0x83, 0x08, 0xa1, 0x49, 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x50, 0x83, 0x08, + 0xa1, 0x61, 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x68, 0x83, 0x08, 0xa1, 0x71, + 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x78, 0xc5, 0x0a, 0x8a, 0x08, 0xa2, 0xd1, + 0xc5, 0x86, 0x20, 0x08, 0xa2, 0x60, 0xc4, 0x26, 0x78, 0x08, 0xa2, 0xc9, + 0xc5, 0x06, 0xdb, 0x08, 0xa2, 0xc1, 0x15, 0xc2, 0x88, 0x1f, 0x08, 0xc2, + 0x88, 0x2b, 0x16, 0xc2, 0x88, 0x37, 0xc3, 0x05, 0x14, 0x08, 0xa2, 0x89, + 0xc4, 0x15, 0xe7, 0x08, 0xa2, 0x80, 0x97, 0x08, 0xa2, 0x09, 0x8b, 0x08, + 0xa1, 0xf9, 0x83, 0x08, 0xa1, 0xa8, 0x8e, 0x08, 0xa1, 0xe3, 0x02, 0x88, + 0x43, 0x94, 0x08, 0xa1, 0xd2, 0x02, 0x88, 0x47, 0x97, 0x08, 0xa1, 0xc8, + 0x8b, 0x08, 0xa1, 0xb8, 0x98, 0x00, 0xce, 0xf8, 0xcd, 0x78, 0xf3, 0x00, + 0xce, 0xd1, 0x49, 0xac, 0xb1, 0x42, 0x88, 0x4b, 0xc4, 0x26, 0x78, 0x00, + 0xce, 0xc9, 0xc5, 0x06, 0xdb, 0x00, 0xce, 0xc1, 0x15, 0xc2, 0x88, 0x53, + 0x08, 0xc2, 0x88, 0x5f, 0x16, 0xc2, 0x88, 0x6b, 0xc3, 0x05, 0x14, 0x00, + 0xce, 0x89, 0xc4, 0x15, 0xe7, 0x00, 0xce, 0x80, 0x46, 0x26, 0xf7, 0xc2, + 0x88, 0x77, 0x44, 0x05, 0x36, 0xc2, 0x88, 0x92, 0x45, 0x08, 0xcb, 0x42, + 0x88, 0xe0, 0x0b, 0xc2, 0x89, 0x2e, 0x97, 0x00, 0xcd, 0x9b, 0x02, 0x89, + 0x36, 0x91, 0x00, 0xcd, 0xbb, 0x02, 0x89, 0x45, 0x03, 0xc2, 0x89, 0x50, + 0x87, 0x00, 0xcd, 0xa9, 0xcf, 0x6a, 0x35, 0x00, 0xcd, 0x80, 0x9c, 0x0f, + 0x8c, 0x49, 0x9b, 0x0f, 0x8c, 0x41, 0x9a, 0x0f, 0x8c, 0x39, 0x99, 0x0f, + 0x8c, 0x31, 0x98, 0x0f, 0x8c, 0x29, 0x97, 0x0f, 0x8c, 0x21, 0x96, 0x0f, + 0x8c, 0x19, 0x95, 0x0f, 0x8c, 0x11, 0x94, 0x0f, 0x8c, 0x09, 0x93, 0x0f, + 0x8c, 0x01, 0x92, 0x0f, 0x8b, 0xf9, 0x91, 0x0f, 0x8b, 0xf1, 0x90, 0x0f, + 0x8b, 0xe9, 0x8f, 0x0f, 0x8b, 0xe1, 0x8e, 0x0f, 0x8b, 0xd9, 0x8d, 0x0f, + 0x8b, 0xd1, 0x8c, 0x0f, 0x8b, 0xc9, 0x8b, 0x0f, 0x8b, 0xc1, 0x8a, 0x0f, + 0x8b, 0xb9, 0x89, 0x0f, 0x8b, 0xb1, 0x88, 0x0f, 0x8b, 0xa9, 0x87, 0x0f, + 0x8b, 0xa1, 0x86, 0x0f, 0x8b, 0x99, 0x85, 0x0f, 0x8b, 0x91, 0x84, 0x0f, + 0x8b, 0x89, 0x83, 0x0f, 0x8b, 0x80, 0x16, 0xc2, 0x89, 0x5f, 0xc8, 0x4b, + 0x5f, 0x01, 0x27, 0x99, 0x07, 0xc2, 0x89, 0x6b, 0x15, 0xc2, 0x89, 0x77, + 0x08, 0x42, 0x89, 0x83, 0x9c, 0x0f, 0x8b, 0x49, 0x9b, 0x0f, 0x8b, 0x41, + 0x9a, 0x0f, 0x8b, 0x39, 0x99, 0x0f, 0x8b, 0x31, 0x98, 0x0f, 0x8b, 0x29, + 0x97, 0x0f, 0x8b, 0x21, 0x96, 0x0f, 0x8b, 0x19, 0x95, 0x0f, 0x8b, 0x11, + 0x94, 0x0f, 0x8b, 0x09, 0x93, 0x0f, 0x8b, 0x01, 0x92, 0x0f, 0x8a, 0xf9, + 0x91, 0x0f, 0x8a, 0xf1, 0x90, 0x0f, 0x8a, 0xe9, 0x8f, 0x0f, 0x8a, 0xe1, + 0x8e, 0x0f, 0x8a, 0xd9, 0x8d, 0x0f, 0x8a, 0xd1, 0x8c, 0x0f, 0x8a, 0xc9, + 0x8b, 0x0f, 0x8a, 0xc1, 0x8a, 0x0f, 0x8a, 0xb9, 0x89, 0x0f, 0x8a, 0xb1, + 0x88, 0x0f, 0x8a, 0xa9, 0x87, 0x0f, 0x8a, 0xa1, 0x86, 0x0f, 0x8a, 0x99, + 0x85, 0x0f, 0x8a, 0x91, 0x84, 0x0f, 0x8a, 0x89, 0x83, 0x0f, 0x8a, 0x80, + 0x97, 0x08, 0xce, 0xe9, 0x8b, 0x08, 0xce, 0xd9, 0x83, 0x08, 0xce, 0x88, + 0x94, 0x08, 0xce, 0xb8, 0x97, 0x08, 0xce, 0xa8, 0x8b, 0x08, 0xce, 0x98, + 0xc7, 0x7a, 0x7f, 0x08, 0xcf, 0x09, 0xc7, 0x14, 0x39, 0x08, 0xce, 0xf0, + 0xc4, 0x1e, 0x97, 0x08, 0xcf, 0x01, 0xc5, 0x40, 0xe7, 0x08, 0xce, 0xf8, + 0xc2, 0x00, 0x39, 0x08, 0xce, 0x81, 0x83, 0x08, 0xce, 0x40, 0xc2, 0x00, + 0xdb, 0x08, 0xce, 0x79, 0x83, 0x08, 0xce, 0x48, 0x83, 0x08, 0xce, 0x69, + 0xc2, 0x0d, 0xf6, 0x08, 0xce, 0x61, 0xc2, 0x00, 0xd0, 0x08, 0xce, 0x58, + 0x83, 0x08, 0xce, 0x51, 0xc8, 0xb2, 0x2e, 0x08, 0xcd, 0x32, 0x02, 0x89, + 0x8f, 0xc2, 0x00, 0xd0, 0x08, 0xce, 0x29, 0x83, 0x08, 0xce, 0x20, 0xc2, + 0x00, 0xd0, 0x08, 0xce, 0x19, 0x83, 0x08, 0xce, 0x10, 0x83, 0x08, 0xce, + 0x09, 0xc2, 0x00, 0xc1, 0x08, 0xcd, 0xe1, 0xc2, 0x19, 0x2c, 0x08, 0xcd, + 0xb9, 0xc2, 0x01, 0x30, 0x08, 0xcd, 0x90, 0xc2, 0x00, 0xd0, 0x08, 0xce, + 0x01, 0x83, 0x08, 0xcd, 0xf9, 0x06, 0x42, 0x89, 0x93, 0xc2, 0x00, 0xd0, + 0x08, 0xcd, 0xf1, 0x83, 0x08, 0xcd, 0xe9, 0x16, 0x42, 0x89, 0x9d, 0xc2, + 0x00, 0xd0, 0x08, 0xcd, 0xb1, 0x83, 0x08, 0xcd, 0xa8, 0xc2, 0x00, 0xd0, + 0x08, 0xcd, 0xa1, 0x83, 0x08, 0xcd, 0x98, 0xc2, 0x00, 0xd0, 0x08, 0xcd, + 0x89, 0x83, 0x08, 0xcd, 0x80, 0xc2, 0x00, 0xd0, 0x08, 0xcd, 0x79, 0x83, + 0x08, 0xcd, 0x70, 0x97, 0x08, 0xcd, 0x69, 0x8b, 0x08, 0xcd, 0x59, 0x83, + 0x08, 0xcd, 0x08, 0x97, 0x08, 0xcd, 0x28, 0x8b, 0x08, 0xcd, 0x18, 0xc8, + 0x0d, 0x03, 0x08, 0x45, 0x78, 0x19, 0xc2, 0x89, 0xa7, 0xc2, 0x00, 0xc4, + 0x08, 0x45, 0x69, 0xc4, 0x02, 0xde, 0x08, 0x45, 0x48, 0xc3, 0x0d, 0x14, + 0x08, 0x45, 0x61, 0xc3, 0x09, 0x9e, 0x08, 0x45, 0x50, 0xc2, 0x39, 0x8b, + 0x08, 0x44, 0xf1, 0xc3, 0x1e, 0x1b, 0x08, 0x44, 0x58, 0xc3, 0x11, 0xef, + 0x08, 0x44, 0xe9, 0x03, 0x42, 0x89, 0xb1, 0xc4, 0x3e, 0x5a, 0x08, 0x44, + 0xe1, 0xc3, 0x20, 0x18, 0x08, 0x44, 0xa1, 0xc3, 0x00, 0x4e, 0x08, 0x44, + 0x91, 0xc6, 0xcf, 0xd7, 0x08, 0x44, 0x81, 0xc4, 0xe0, 0xe7, 0x08, 0x44, + 0x71, 0xc4, 0x4a, 0xb9, 0x08, 0x44, 0x61, 0xc2, 0x01, 0x7f, 0x08, 0x44, + 0x31, 0xc4, 0xe3, 0x27, 0x08, 0x44, 0x11, 0xc5, 0xa5, 0xfd, 0x08, 0x44, + 0x00, 0xc3, 0x16, 0x5a, 0x08, 0x44, 0xb9, 0xc4, 0x36, 0xb5, 0x08, 0x44, + 0x08, 0xc2, 0x00, 0x8e, 0x08, 0x44, 0x50, 0x49, 0x01, 0xaa, 0xc2, 0x89, + 0xbd, 0xcc, 0x82, 0x35, 0x01, 0x0e, 0xb9, 0x03, 0xc2, 0x89, 0xcf, 0xcb, + 0x01, 0xfc, 0x01, 0x58, 0x01, 0xcb, 0x94, 0x22, 0x01, 0x58, 0x41, 0xd5, + 0x01, 0x92, 0x01, 0x5b, 0x3b, 0x02, 0x89, 0xde, 0xd0, 0x5b, 0xc2, 0x0f, + 0xc2, 0xa8, 0x03, 0xc2, 0x89, 0xe4, 0xcc, 0x82, 0x35, 0x01, 0x0e, 0xb1, + 0x49, 0x01, 0xaa, 0xc2, 0x89, 0xf3, 0xcb, 0x01, 0xfc, 0x01, 0x58, 0x09, + 0xcb, 0x94, 0x22, 0x01, 0x58, 0x49, 0xd5, 0x01, 0x92, 0x01, 0x5b, 0x33, + 0x02, 0x8a, 0x05, 0xd0, 0x5b, 0xc2, 0x0f, 0xc2, 0xa0, 0x49, 0x53, 0xa9, + 0xc2, 0x8a, 0x0b, 0x43, 0x00, 0xe3, 0xc2, 0x8a, 0x17, 0xd0, 0x5f, 0x92, + 0x05, 0x41, 0xb9, 0xca, 0xa6, 0xc0, 0x05, 0x41, 0xc0, 0xe0, 0x0c, 0x07, + 0x01, 0x3d, 0x78, 0xd7, 0x27, 0xb9, 0x01, 0x17, 0x19, 0xd4, 0x3c, 0x50, + 0x01, 0x17, 0x10, 0xc9, 0x2d, 0xd0, 0x01, 0x14, 0x29, 0xc7, 0x3a, 0x20, + 0x01, 0x14, 0x20, 0xc2, 0x00, 0xdb, 0x0f, 0x08, 0xf1, 0x83, 0x0f, 0x08, + 0xe0, 0xc2, 0x8d, 0x8f, 0x0f, 0x08, 0x99, 0xc2, 0x0d, 0xf6, 0x0f, 0x08, + 0x69, 0x83, 0x0f, 0x08, 0x10, 0x84, 0x0d, 0x97, 0xd9, 0x83, 0x0d, 0x97, + 0xd1, 0xa6, 0x0d, 0x97, 0xc9, 0xa5, 0x0d, 0x97, 0xc1, 0xa4, 0x0d, 0x97, + 0xb9, 0xa3, 0x0d, 0x97, 0xb1, 0xa2, 0x0d, 0x97, 0xa9, 0xa1, 0x0d, 0x97, + 0xa1, 0xa0, 0x0d, 0x97, 0x99, 0x9f, 0x0d, 0x97, 0x91, 0x9e, 0x0d, 0x97, + 0x89, 0x9d, 0x0d, 0x97, 0x80, 0x88, 0x0d, 0x97, 0x79, 0x87, 0x0d, 0x97, + 0x71, 0x86, 0x0d, 0x97, 0x69, 0x83, 0x0d, 0x97, 0x51, 0xa6, 0x0d, 0x97, + 0x49, 0xa2, 0x0d, 0x97, 0x29, 0x85, 0x0d, 0x97, 0x61, 0x84, 0x0d, 0x97, + 0x59, 0xa5, 0x0d, 0x97, 0x41, 0xa4, 0x0d, 0x97, 0x39, 0xa3, 0x0d, 0x97, + 0x31, 0xa1, 0x0d, 0x97, 0x21, 0xa0, 0x0d, 0x97, 0x19, 0x9f, 0x0d, 0x97, + 0x11, 0x9e, 0x0d, 0x97, 0x09, 0x9d, 0x0d, 0x97, 0x00, 0x83, 0x0d, 0x95, + 0xd1, 0x88, 0x0d, 0x95, 0xf9, 0x87, 0x0d, 0x95, 0xf1, 0xa6, 0x0d, 0x95, + 0xc9, 0xa5, 0x0d, 0x95, 0xc1, 0xa4, 0x0d, 0x95, 0xb9, 0xa3, 0x0d, 0x95, + 0xb1, 0xa2, 0x0d, 0x95, 0xa9, 0xa1, 0x0d, 0x95, 0xa1, 0xa0, 0x0d, 0x95, + 0x99, 0x9f, 0x0d, 0x95, 0x91, 0x9e, 0x0d, 0x95, 0x89, 0x9d, 0x0d, 0x95, + 0x81, 0x84, 0x0d, 0x95, 0xd9, 0x85, 0x0d, 0x95, 0xe1, 0x86, 0x0d, 0x95, + 0xe8, 0x83, 0x0d, 0x94, 0xd1, 0xa6, 0x0d, 0x94, 0xc9, 0xa5, 0x0d, 0x94, + 0xc1, 0xa4, 0x0d, 0x94, 0xb9, 0xa3, 0x0d, 0x94, 0xb1, 0xa2, 0x0d, 0x94, + 0xa9, 0xa1, 0x0d, 0x94, 0xa1, 0xa0, 0x0d, 0x94, 0x99, 0x9f, 0x0d, 0x94, + 0x91, 0x9e, 0x0d, 0x94, 0x89, 0x9d, 0x0d, 0x94, 0x81, 0x88, 0x0d, 0x94, + 0xf9, 0x87, 0x0d, 0x94, 0xf1, 0x86, 0x0d, 0x94, 0xe9, 0x85, 0x0d, 0x94, + 0xe1, 0x84, 0x0d, 0x94, 0xd8, 0x88, 0x0d, 0x94, 0x79, 0x87, 0x0d, 0x94, + 0x71, 0x86, 0x0d, 0x94, 0x69, 0x85, 0x0d, 0x94, 0x61, 0x84, 0x0d, 0x94, + 0x59, 0x83, 0x0d, 0x94, 0x51, 0xa6, 0x0d, 0x94, 0x49, 0xa5, 0x0d, 0x94, + 0x41, 0xa4, 0x0d, 0x94, 0x39, 0xa3, 0x0d, 0x94, 0x31, 0xa2, 0x0d, 0x94, + 0x29, 0xa1, 0x0d, 0x94, 0x21, 0xa0, 0x0d, 0x94, 0x19, 0x9f, 0x0d, 0x94, + 0x11, 0x9e, 0x0d, 0x94, 0x09, 0x9d, 0x0d, 0x94, 0x00, 0x88, 0x0d, 0x93, + 0xf9, 0x87, 0x0d, 0x93, 0xf1, 0x86, 0x0d, 0x93, 0xe9, 0x85, 0x0d, 0x93, + 0xe1, 0x84, 0x0d, 0x93, 0xd9, 0x83, 0x0d, 0x93, 0xd1, 0xa6, 0x0d, 0x93, + 0xc9, 0xa5, 0x0d, 0x93, 0xc1, 0xa4, 0x0d, 0x93, 0xb9, 0xa3, 0x0d, 0x93, + 0xb1, 0xa2, 0x0d, 0x93, 0xa9, 0xa1, 0x0d, 0x93, 0xa1, 0xa0, 0x0d, 0x93, + 0x99, 0x9f, 0x0d, 0x93, 0x91, 0x9e, 0x0d, 0x93, 0x89, 0x9d, 0x0d, 0x93, + 0x80, 0x88, 0x0d, 0x93, 0x79, 0x87, 0x0d, 0x93, 0x71, 0x86, 0x0d, 0x93, + 0x69, 0x85, 0x0d, 0x93, 0x61, 0x84, 0x0d, 0x93, 0x59, 0x83, 0x0d, 0x93, + 0x51, 0xa6, 0x0d, 0x93, 0x49, 0xa5, 0x0d, 0x93, 0x41, 0xa4, 0x0d, 0x93, + 0x39, 0xa3, 0x0d, 0x93, 0x31, 0xa2, 0x0d, 0x93, 0x29, 0xa1, 0x0d, 0x93, + 0x21, 0xa0, 0x0d, 0x93, 0x19, 0x9f, 0x0d, 0x93, 0x11, 0x9e, 0x0d, 0x93, + 0x09, 0x9d, 0x0d, 0x93, 0x00, 0x88, 0x0d, 0x92, 0xf9, 0x87, 0x0d, 0x92, + 0xf1, 0x86, 0x0d, 0x92, 0xe9, 0x85, 0x0d, 0x92, 0xe1, 0x84, 0x0d, 0x92, + 0xd9, 0x83, 0x0d, 0x92, 0xd1, 0xa6, 0x0d, 0x92, 0xc9, 0xa5, 0x0d, 0x92, + 0xc1, 0xa4, 0x0d, 0x92, 0xb9, 0xa3, 0x0d, 0x92, 0xb1, 0xa2, 0x0d, 0x92, + 0xa9, 0xa1, 0x0d, 0x92, 0xa1, 0xa0, 0x0d, 0x92, 0x99, 0x9f, 0x0d, 0x92, + 0x91, 0x9e, 0x0d, 0x92, 0x89, 0x9d, 0x0d, 0x92, 0x80, 0x88, 0x0d, 0x92, + 0x79, 0x87, 0x0d, 0x92, 0x71, 0x86, 0x0d, 0x92, 0x69, 0x85, 0x0d, 0x92, + 0x61, 0x84, 0x0d, 0x92, 0x59, 0x83, 0x0d, 0x92, 0x51, 0xa6, 0x0d, 0x92, + 0x49, 0xa5, 0x0d, 0x92, 0x41, 0xa4, 0x0d, 0x92, 0x39, 0xa3, 0x0d, 0x92, + 0x31, 0xa2, 0x0d, 0x92, 0x29, 0xa1, 0x0d, 0x92, 0x21, 0xa0, 0x0d, 0x92, + 0x19, 0x9f, 0x0d, 0x92, 0x11, 0x9e, 0x0d, 0x92, 0x09, 0x9d, 0x0d, 0x92, + 0x00, 0x88, 0x0d, 0x91, 0xf9, 0x87, 0x0d, 0x91, 0xf1, 0x86, 0x0d, 0x91, + 0xe9, 0x85, 0x0d, 0x91, 0xe1, 0x84, 0x0d, 0x91, 0xd9, 0x83, 0x0d, 0x91, + 0xd1, 0xa6, 0x0d, 0x91, 0xc9, 0xa5, 0x0d, 0x91, 0xc1, 0xa4, 0x0d, 0x91, + 0xb9, 0xa3, 0x0d, 0x91, 0xb1, 0xa2, 0x0d, 0x91, 0xa9, 0xa1, 0x0d, 0x91, + 0xa1, 0xa0, 0x0d, 0x91, 0x99, 0x9f, 0x0d, 0x91, 0x91, 0x9e, 0x0d, 0x91, + 0x89, 0x9d, 0x0d, 0x91, 0x80, 0x88, 0x0d, 0x91, 0x79, 0x87, 0x0d, 0x91, + 0x71, 0x86, 0x0d, 0x91, 0x69, 0x85, 0x0d, 0x91, 0x61, 0x84, 0x0d, 0x91, + 0x59, 0x83, 0x0d, 0x91, 0x51, 0xa6, 0x0d, 0x91, 0x49, 0xa5, 0x0d, 0x91, + 0x41, 0xa4, 0x0d, 0x91, 0x39, 0xa3, 0x0d, 0x91, 0x31, 0xa2, 0x0d, 0x91, + 0x29, 0xa1, 0x0d, 0x91, 0x21, 0xa0, 0x0d, 0x91, 0x19, 0x9f, 0x0d, 0x91, + 0x11, 0x9e, 0x0d, 0x91, 0x09, 0x9d, 0x0d, 0x91, 0x00, 0x88, 0x0d, 0x90, + 0xf9, 0x87, 0x0d, 0x90, 0xf1, 0x86, 0x0d, 0x90, 0xe9, 0x85, 0x0d, 0x90, + 0xe1, 0x84, 0x0d, 0x90, 0xd9, 0x83, 0x0d, 0x90, 0xd1, 0xa6, 0x0d, 0x90, + 0xc9, 0xa5, 0x0d, 0x90, 0xc1, 0xa4, 0x0d, 0x90, 0xb9, 0xa3, 0x0d, 0x90, + 0xb1, 0xa2, 0x0d, 0x90, 0xa9, 0xa1, 0x0d, 0x90, 0xa1, 0xa0, 0x0d, 0x90, + 0x99, 0x9f, 0x0d, 0x90, 0x91, 0x9e, 0x0d, 0x90, 0x89, 0x9d, 0x0d, 0x90, + 0x80, 0x88, 0x0d, 0x90, 0x79, 0x87, 0x0d, 0x90, 0x71, 0x86, 0x0d, 0x90, + 0x69, 0x85, 0x0d, 0x90, 0x61, 0x84, 0x0d, 0x90, 0x59, 0x83, 0x0d, 0x90, + 0x51, 0xa6, 0x0d, 0x90, 0x49, 0xa5, 0x0d, 0x90, 0x41, 0xa4, 0x0d, 0x90, + 0x39, 0xa3, 0x0d, 0x90, 0x31, 0xa2, 0x0d, 0x90, 0x29, 0xa1, 0x0d, 0x90, + 0x21, 0xa0, 0x0d, 0x90, 0x19, 0x9f, 0x0d, 0x90, 0x11, 0x9e, 0x0d, 0x90, + 0x09, 0x9d, 0x0d, 0x90, 0x00, 0x88, 0x0d, 0x96, 0xf9, 0x87, 0x0d, 0x96, + 0xf1, 0x86, 0x0d, 0x96, 0xe9, 0x85, 0x0d, 0x96, 0xe1, 0x84, 0x0d, 0x96, + 0xd9, 0x83, 0x0d, 0x96, 0xd1, 0xa6, 0x0d, 0x96, 0xc9, 0xa5, 0x0d, 0x96, + 0xc1, 0xa4, 0x0d, 0x96, 0xb9, 0xa3, 0x0d, 0x96, 0xb1, 0xa2, 0x0d, 0x96, + 0xa9, 0xa1, 0x0d, 0x96, 0xa1, 0xa0, 0x0d, 0x96, 0x99, 0x9f, 0x0d, 0x96, + 0x91, 0x9e, 0x0d, 0x96, 0x89, 0x9d, 0x0d, 0x96, 0x80, 0x88, 0x0d, 0x96, + 0x79, 0x87, 0x0d, 0x96, 0x71, 0x86, 0x0d, 0x96, 0x69, 0x85, 0x0d, 0x96, + 0x61, 0x84, 0x0d, 0x96, 0x59, 0x83, 0x0d, 0x96, 0x51, 0xa6, 0x0d, 0x96, + 0x49, 0xa5, 0x0d, 0x96, 0x41, 0xa4, 0x0d, 0x96, 0x39, 0xa3, 0x0d, 0x96, + 0x31, 0xa2, 0x0d, 0x96, 0x29, 0xa1, 0x0d, 0x96, 0x21, 0xa0, 0x0d, 0x96, + 0x19, 0x9f, 0x0d, 0x96, 0x11, 0x9e, 0x0d, 0x96, 0x09, 0x9d, 0x0d, 0x96, + 0x00, 0x88, 0x0d, 0x95, 0x79, 0x87, 0x0d, 0x95, 0x71, 0x86, 0x0d, 0x95, + 0x69, 0x85, 0x0d, 0x95, 0x61, 0x84, 0x0d, 0x95, 0x59, 0x83, 0x0d, 0x95, + 0x51, 0xa6, 0x0d, 0x95, 0x49, 0xa5, 0x0d, 0x95, 0x41, 0xa4, 0x0d, 0x95, + 0x39, 0xa3, 0x0d, 0x95, 0x31, 0xa2, 0x0d, 0x95, 0x29, 0xa1, 0x0d, 0x95, + 0x21, 0xa0, 0x0d, 0x95, 0x19, 0x9f, 0x0d, 0x95, 0x11, 0x9e, 0x0d, 0x95, + 0x09, 0x9d, 0x0d, 0x95, 0x00, 0x88, 0x0d, 0x8f, 0xf9, 0x87, 0x0d, 0x8f, + 0xf1, 0x86, 0x0d, 0x8f, 0xe9, 0x85, 0x0d, 0x8f, 0xe1, 0x84, 0x0d, 0x8f, + 0xd9, 0x83, 0x0d, 0x8f, 0xd1, 0xa6, 0x0d, 0x8f, 0xc9, 0xa5, 0x0d, 0x8f, + 0xc1, 0xa4, 0x0d, 0x8f, 0xb9, 0xa3, 0x0d, 0x8f, 0xb1, 0xa2, 0x0d, 0x8f, + 0xa9, 0xa1, 0x0d, 0x8f, 0xa1, 0xa0, 0x0d, 0x8f, 0x99, 0x9f, 0x0d, 0x8f, + 0x91, 0x9e, 0x0d, 0x8f, 0x89, 0x9d, 0x0d, 0x8f, 0x80, 0x88, 0x0d, 0x8f, + 0x79, 0x87, 0x0d, 0x8f, 0x71, 0x86, 0x0d, 0x8f, 0x69, 0x85, 0x0d, 0x8f, + 0x61, 0x84, 0x0d, 0x8f, 0x59, 0x83, 0x0d, 0x8f, 0x51, 0xa6, 0x0d, 0x8f, + 0x49, 0xa5, 0x0d, 0x8f, 0x41, 0xa4, 0x0d, 0x8f, 0x39, 0xa3, 0x0d, 0x8f, + 0x31, 0xa2, 0x0d, 0x8f, 0x29, 0xa1, 0x0d, 0x8f, 0x21, 0xa0, 0x0d, 0x8f, + 0x19, 0x9f, 0x0d, 0x8f, 0x11, 0x9e, 0x0d, 0x8f, 0x09, 0x9d, 0x0d, 0x8f, + 0x00, 0x88, 0x0d, 0x8e, 0xf9, 0x87, 0x0d, 0x8e, 0xf1, 0x86, 0x0d, 0x8e, + 0xe9, 0x85, 0x0d, 0x8e, 0xe1, 0x84, 0x0d, 0x8e, 0xd9, 0x83, 0x0d, 0x8e, + 0xd1, 0xa6, 0x0d, 0x8e, 0xc9, 0xa5, 0x0d, 0x8e, 0xc1, 0xa4, 0x0d, 0x8e, + 0xb9, 0xa3, 0x0d, 0x8e, 0xb1, 0xa2, 0x0d, 0x8e, 0xa9, 0xa1, 0x0d, 0x8e, + 0xa1, 0xa0, 0x0d, 0x8e, 0x99, 0x9f, 0x0d, 0x8e, 0x91, 0x9e, 0x0d, 0x8e, + 0x89, 0x9d, 0x0d, 0x8e, 0x80, 0x88, 0x0d, 0x8e, 0x79, 0x87, 0x0d, 0x8e, + 0x71, 0x86, 0x0d, 0x8e, 0x69, 0x85, 0x0d, 0x8e, 0x61, 0x84, 0x0d, 0x8e, + 0x59, 0x83, 0x0d, 0x8e, 0x51, 0xa6, 0x0d, 0x8e, 0x49, 0xa5, 0x0d, 0x8e, + 0x41, 0xa4, 0x0d, 0x8e, 0x39, 0xa3, 0x0d, 0x8e, 0x31, 0xa2, 0x0d, 0x8e, + 0x29, 0xa1, 0x0d, 0x8e, 0x21, 0xa0, 0x0d, 0x8e, 0x19, 0x9f, 0x0d, 0x8e, + 0x11, 0x9e, 0x0d, 0x8e, 0x09, 0x9d, 0x0d, 0x8e, 0x00, 0x88, 0x0d, 0x8d, + 0xf9, 0x87, 0x0d, 0x8d, 0xf1, 0x86, 0x0d, 0x8d, 0xe9, 0x85, 0x0d, 0x8d, + 0xe1, 0x84, 0x0d, 0x8d, 0xd9, 0x83, 0x0d, 0x8d, 0xd1, 0xa6, 0x0d, 0x8d, + 0xc9, 0xa5, 0x0d, 0x8d, 0xc1, 0xa4, 0x0d, 0x8d, 0xb9, 0xa3, 0x0d, 0x8d, + 0xb1, 0xa2, 0x0d, 0x8d, 0xa9, 0xa1, 0x0d, 0x8d, 0xa1, 0xa0, 0x0d, 0x8d, + 0x99, 0x9f, 0x0d, 0x8d, 0x91, 0x9e, 0x0d, 0x8d, 0x89, 0x9d, 0x0d, 0x8d, + 0x80, 0x88, 0x0d, 0x8d, 0x79, 0x87, 0x0d, 0x8d, 0x71, 0x86, 0x0d, 0x8d, + 0x69, 0x85, 0x0d, 0x8d, 0x61, 0x84, 0x0d, 0x8d, 0x59, 0x83, 0x0d, 0x8d, + 0x51, 0xa6, 0x0d, 0x8d, 0x49, 0xa5, 0x0d, 0x8d, 0x41, 0xa4, 0x0d, 0x8d, + 0x39, 0xa3, 0x0d, 0x8d, 0x31, 0xa2, 0x0d, 0x8d, 0x29, 0xa1, 0x0d, 0x8d, + 0x21, 0xa0, 0x0d, 0x8d, 0x19, 0x9f, 0x0d, 0x8d, 0x11, 0x9e, 0x0d, 0x8d, + 0x09, 0x9d, 0x0d, 0x8d, 0x00, 0x88, 0x0d, 0x8c, 0xf9, 0x87, 0x0d, 0x8c, + 0xf1, 0x86, 0x0d, 0x8c, 0xe9, 0x85, 0x0d, 0x8c, 0xe1, 0x84, 0x0d, 0x8c, + 0xd9, 0x83, 0x0d, 0x8c, 0xd1, 0xa6, 0x0d, 0x8c, 0xc9, 0xa5, 0x0d, 0x8c, + 0xc1, 0xa4, 0x0d, 0x8c, 0xb9, 0xa3, 0x0d, 0x8c, 0xb1, 0xa2, 0x0d, 0x8c, + 0xa9, 0xa1, 0x0d, 0x8c, 0xa1, 0xa0, 0x0d, 0x8c, 0x99, 0x9f, 0x0d, 0x8c, + 0x91, 0x9e, 0x0d, 0x8c, 0x89, 0x9d, 0x0d, 0x8c, 0x80, 0x88, 0x0d, 0x8c, + 0x79, 0x87, 0x0d, 0x8c, 0x71, 0x86, 0x0d, 0x8c, 0x69, 0x85, 0x0d, 0x8c, + 0x61, 0x84, 0x0d, 0x8c, 0x59, 0x83, 0x0d, 0x8c, 0x51, 0xa6, 0x0d, 0x8c, + 0x49, 0xa5, 0x0d, 0x8c, 0x41, 0xa4, 0x0d, 0x8c, 0x39, 0xa3, 0x0d, 0x8c, + 0x31, 0xa2, 0x0d, 0x8c, 0x29, 0xa1, 0x0d, 0x8c, 0x21, 0xa0, 0x0d, 0x8c, + 0x19, 0x9f, 0x0d, 0x8c, 0x11, 0x9e, 0x0d, 0x8c, 0x09, 0x9d, 0x0d, 0x8c, + 0x00, 0x88, 0x0d, 0x8b, 0xf9, 0x87, 0x0d, 0x8b, 0xf1, 0x86, 0x0d, 0x8b, + 0xe9, 0x85, 0x0d, 0x8b, 0xe1, 0x84, 0x0d, 0x8b, 0xd9, 0x83, 0x0d, 0x8b, + 0xd1, 0xa6, 0x0d, 0x8b, 0xc9, 0xa5, 0x0d, 0x8b, 0xc1, 0xa4, 0x0d, 0x8b, + 0xb9, 0xa3, 0x0d, 0x8b, 0xb1, 0xa2, 0x0d, 0x8b, 0xa9, 0xa1, 0x0d, 0x8b, + 0xa1, 0xa0, 0x0d, 0x8b, 0x99, 0x9f, 0x0d, 0x8b, 0x91, 0x9e, 0x0d, 0x8b, + 0x89, 0x9d, 0x0d, 0x8b, 0x80, 0xcd, 0x79, 0x1a, 0x01, 0x24, 0xd9, 0xcd, + 0x7d, 0xac, 0x01, 0x24, 0x98, 0xcf, 0x69, 0x36, 0x01, 0x24, 0xb9, 0xc2, + 0x00, 0xbc, 0x00, 0x01, 0x18, 0xc2, 0x00, 0x39, 0x00, 0x3f, 0x51, 0xc3, + 0x1c, 0x63, 0x00, 0x3f, 0x49, 0xc2, 0x25, 0x3b, 0x00, 0x3f, 0x40, 0xc7, + 0xc3, 0xf4, 0x00, 0x3f, 0x38, 0xc7, 0xc3, 0xf4, 0x00, 0x3f, 0x00, 0xd0, + 0x5b, 0xa2, 0x01, 0x4d, 0xa1, 0xd1, 0x02, 0x56, 0x01, 0x4d, 0x99, 0xd2, + 0x4b, 0xdd, 0x01, 0x4d, 0x91, 0xc7, 0x80, 0x70, 0x01, 0x4d, 0x88, 0x43, + 0x00, 0xaf, 0x42, 0x8a, 0x23, 0x03, 0xc2, 0x8a, 0x2d, 0xcd, 0x79, 0xa9, + 0x0f, 0x98, 0x68, 0xa5, 0x09, 0x87, 0xe9, 0xa4, 0x09, 0x87, 0xe1, 0xa3, + 0x09, 0x87, 0xd9, 0xa1, 0x09, 0x87, 0xcb, 0x02, 0x8a, 0x39, 0xa0, 0x09, + 0x87, 0xc1, 0x9f, 0x09, 0x87, 0xb9, 0x9e, 0x09, 0x87, 0xb1, 0x9d, 0x09, + 0x87, 0xa8, 0xa6, 0x09, 0x87, 0xa1, 0xa5, 0x09, 0x87, 0x93, 0x02, 0x8a, + 0x3d, 0xa4, 0x09, 0x87, 0x89, 0xa3, 0x09, 0x87, 0x81, 0xa2, 0x09, 0x87, + 0x79, 0xa1, 0x09, 0x87, 0x71, 0xa0, 0x09, 0x87, 0x69, 0x9f, 0x09, 0x87, + 0x61, 0x9e, 0x09, 0x87, 0x59, 0x9d, 0x09, 0x87, 0x4a, 0x02, 0x8a, 0x41, + 0xa6, 0x09, 0x87, 0x41, 0xa5, 0x09, 0x87, 0x39, 0xa4, 0x09, 0x87, 0x2b, + 0x02, 0x8a, 0x45, 0xa3, 0x09, 0x87, 0x1b, 0x02, 0x8a, 0x49, 0xa2, 0x09, + 0x87, 0x11, 0xa1, 0x09, 0x87, 0x09, 0xa0, 0x09, 0x87, 0x01, 0x9f, 0x09, + 0x86, 0xf9, 0x9e, 0x09, 0x86, 0xf1, 0x9d, 0x09, 0x86, 0xe8, 0xa6, 0x09, + 0x86, 0xdb, 0x02, 0x8a, 0x4d, 0xa5, 0x09, 0x86, 0xcb, 0x02, 0x8a, 0x51, + 0xa4, 0x09, 0x86, 0xc1, 0xa3, 0x09, 0x86, 0xb9, 0xa2, 0x09, 0x86, 0xb1, + 0xa1, 0x09, 0x86, 0xa9, 0xa0, 0x09, 0x86, 0xa1, 0x9f, 0x09, 0x86, 0x99, + 0x9e, 0x09, 0x86, 0x90, 0x83, 0x09, 0x82, 0xa8, 0x9e, 0x09, 0x94, 0xd1, + 0x9d, 0x09, 0x94, 0xba, 0x02, 0x8a, 0x55, 0xa6, 0x09, 0x94, 0xb1, 0xa5, + 0x09, 0x94, 0xa9, 0xa4, 0x09, 0x94, 0xa1, 0xa3, 0x09, 0x94, 0x99, 0xa2, + 0x09, 0x94, 0x91, 0xa1, 0x09, 0x94, 0x89, 0xa0, 0x09, 0x94, 0x81, 0x9f, + 0x09, 0x94, 0x79, 0x9e, 0x09, 0x94, 0x71, 0x9d, 0x09, 0x94, 0x68, 0xa6, + 0x09, 0x94, 0x61, 0xa5, 0x09, 0x94, 0x59, 0xa4, 0x09, 0x94, 0x51, 0xa3, + 0x09, 0x94, 0x2b, 0x02, 0x8a, 0x5d, 0xa2, 0x09, 0x94, 0x21, 0xa1, 0x09, + 0x94, 0x19, 0xa0, 0x09, 0x94, 0x0b, 0x02, 0x8a, 0x6d, 0x9f, 0x09, 0x94, + 0x01, 0x9e, 0x09, 0x93, 0xf9, 0x9d, 0x09, 0x93, 0xea, 0x02, 0x8a, 0x71, + 0xa6, 0x09, 0x93, 0xdb, 0x02, 0x8a, 0x75, 0xa5, 0x09, 0x93, 0xd1, 0xa4, + 0x09, 0x93, 0xc9, 0xa3, 0x09, 0x93, 0xc1, 0xa2, 0x09, 0x93, 0xb3, 0x02, + 0x8a, 0x79, 0xa1, 0x09, 0x93, 0xa3, 0x02, 0x8a, 0x7d, 0xa0, 0x09, 0x93, + 0x99, 0x9f, 0x09, 0x93, 0x91, 0x9e, 0x09, 0x93, 0x89, 0x9d, 0x09, 0x93, + 0x7a, 0x02, 0x8a, 0x81, 0xa6, 0x09, 0x93, 0x6b, 0x02, 0x8a, 0x85, 0xa5, + 0x09, 0x93, 0x61, 0xa4, 0x09, 0x93, 0x59, 0xa3, 0x09, 0x93, 0x51, 0xa2, + 0x09, 0x93, 0x49, 0xa1, 0x09, 0x93, 0x41, 0xa0, 0x09, 0x93, 0x39, 0x9f, + 0x09, 0x93, 0x31, 0x9e, 0x09, 0x93, 0x29, 0x9d, 0x09, 0x93, 0x0a, 0x02, + 0x8a, 0x89, 0xa6, 0x09, 0x93, 0x01, 0xa5, 0x09, 0x92, 0xf9, 0xa4, 0x09, + 0x92, 0xf1, 0xa3, 0x09, 0x92, 0xbb, 0x02, 0x8a, 0x95, 0xa2, 0x09, 0x92, + 0xab, 0x02, 0x8a, 0xad, 0xa1, 0x09, 0x92, 0xa1, 0xa0, 0x09, 0x92, 0x99, + 0x9f, 0x09, 0x92, 0x91, 0x9e, 0x09, 0x92, 0x82, 0x02, 0x8a, 0xb1, 0xc3, + 0x02, 0x39, 0x09, 0xa1, 0xa9, 0xc5, 0xdd, 0xd0, 0x09, 0xa1, 0x98, 0xc3, + 0x02, 0x39, 0x09, 0xa1, 0xa1, 0xc5, 0xdd, 0xd0, 0x09, 0xa1, 0x90, 0xa2, + 0x09, 0x8c, 0xd1, 0xa1, 0x09, 0x8c, 0xc9, 0xa0, 0x09, 0x8c, 0xc1, 0x9f, + 0x09, 0x8c, 0xb9, 0x9e, 0x09, 0x8c, 0xab, 0x02, 0x8a, 0xb5, 0x9d, 0x09, + 0x8c, 0x9a, 0x02, 0x8a, 0xb9, 0xa6, 0x09, 0x8c, 0x8b, 0x02, 0x8a, 0xbd, + 0xa5, 0x09, 0x8c, 0x81, 0xa4, 0x09, 0x8c, 0x79, 0xa3, 0x09, 0x8c, 0x71, + 0xa2, 0x09, 0x8c, 0x63, 0x02, 0x8a, 0xc1, 0xa1, 0x09, 0x8c, 0x59, 0xa0, + 0x09, 0x8c, 0x51, 0x9f, 0x09, 0x8c, 0x49, 0x9e, 0x09, 0x8c, 0x40, 0x83, + 0x09, 0x8c, 0x28, 0x83, 0x09, 0x9d, 0x70, 0xa6, 0x09, 0x9d, 0x61, 0xa5, + 0x09, 0x9d, 0x59, 0xa4, 0x09, 0x9d, 0x4b, 0x02, 0x8a, 0xc5, 0xa3, 0x09, + 0x9d, 0x41, 0xa2, 0x09, 0x9d, 0x39, 0xa1, 0x09, 0x9d, 0x31, 0xa0, 0x09, + 0x9d, 0x23, 0x02, 0x8a, 0xc9, 0x9f, 0x09, 0x9d, 0x19, 0x9e, 0x09, 0x9d, + 0x0b, 0x02, 0x8a, 0xcd, 0x9d, 0x09, 0x9c, 0xfa, 0x02, 0x8a, 0xd1, 0xa6, + 0x09, 0x9c, 0xeb, 0x02, 0x8a, 0xd5, 0xa5, 0x09, 0x9c, 0xdb, 0x02, 0x8a, + 0xd9, 0xa4, 0x09, 0x9c, 0xd1, 0xa3, 0x09, 0x9c, 0xc9, 0xa2, 0x09, 0x9c, + 0xc1, 0xa1, 0x09, 0x9c, 0xb9, 0xa0, 0x09, 0x9c, 0xab, 0x02, 0x8a, 0xdd, + 0x9f, 0x09, 0x9c, 0xa1, 0x9e, 0x09, 0x9c, 0x99, 0x9d, 0x09, 0x9c, 0x32, + 0x02, 0x8a, 0xe1, 0xa6, 0x09, 0x9c, 0x29, 0xa5, 0x09, 0x9c, 0x21, 0xa4, + 0x09, 0x9c, 0x19, 0xa3, 0x09, 0x9c, 0x11, 0xa2, 0x09, 0x9c, 0x09, 0xa1, + 0x09, 0x9c, 0x01, 0xa0, 0x09, 0x9b, 0xf9, 0x9f, 0x09, 0x9b, 0xe3, 0x02, + 0x8b, 0x11, 0x9e, 0x09, 0x9b, 0xc3, 0x02, 0x8b, 0x19, 0x9d, 0x09, 0x9b, + 0xb8, 0xa6, 0x09, 0x9b, 0xb1, 0xa5, 0x09, 0x9b, 0xa9, 0xa4, 0x09, 0x9b, + 0x93, 0x02, 0x8b, 0x25, 0xa3, 0x09, 0x9b, 0x89, 0xa2, 0x09, 0x9b, 0x81, + 0xa1, 0x09, 0x9b, 0x79, 0xa0, 0x09, 0x9b, 0x71, 0x9f, 0x09, 0x9b, 0x63, + 0x02, 0x8b, 0x2d, 0x9e, 0x09, 0x9b, 0x12, 0x02, 0x8b, 0x31, 0x9f, 0x09, + 0xa1, 0x71, 0x9e, 0x09, 0xa1, 0x69, 0x9d, 0x09, 0xa1, 0x60, 0xa6, 0x09, + 0xa1, 0x59, 0xa5, 0x09, 0xa1, 0x51, 0xa4, 0x09, 0xa1, 0x49, 0xa3, 0x09, + 0xa1, 0x41, 0xa2, 0x09, 0xa1, 0x39, 0xa1, 0x09, 0xa1, 0x31, 0xa0, 0x09, + 0xa1, 0x29, 0x9f, 0x09, 0xa1, 0x21, 0x9e, 0x09, 0xa1, 0x19, 0x9d, 0x09, + 0xa1, 0x10, 0xa6, 0x09, 0xa1, 0x09, 0xa5, 0x09, 0xa1, 0x01, 0xa4, 0x09, + 0xa0, 0xf9, 0xa3, 0x09, 0xa0, 0xf1, 0xa2, 0x09, 0xa0, 0xe9, 0xa1, 0x09, + 0xa0, 0xe1, 0xa0, 0x09, 0xa0, 0xd9, 0x9f, 0x09, 0xa0, 0xd1, 0x9e, 0x09, + 0xa0, 0xc9, 0x9d, 0x09, 0xa0, 0xc0, 0xa6, 0x09, 0xa0, 0xb9, 0xa5, 0x09, + 0xa0, 0xb1, 0xa4, 0x09, 0xa0, 0x9b, 0x02, 0x8b, 0x55, 0xa3, 0x09, 0xa0, + 0x91, 0xa2, 0x09, 0xa0, 0x89, 0xa1, 0x09, 0xa0, 0x81, 0xa0, 0x09, 0xa0, + 0x79, 0x9f, 0x09, 0xa0, 0x71, 0x9e, 0x09, 0xa0, 0x68, 0xa6, 0x09, 0x82, + 0x71, 0xa5, 0x09, 0x82, 0x69, 0xa4, 0x09, 0x82, 0x61, 0xa3, 0x09, 0x82, + 0x59, 0xa2, 0x09, 0x82, 0x51, 0xa1, 0x09, 0x82, 0x49, 0xa0, 0x09, 0x82, + 0x41, 0x9f, 0x09, 0x82, 0x39, 0x9e, 0x09, 0x82, 0x31, 0x9d, 0x09, 0x82, + 0x28, 0xa6, 0x09, 0x82, 0x21, 0xa5, 0x09, 0x82, 0x19, 0xa4, 0x09, 0x82, + 0x11, 0xa3, 0x09, 0x82, 0x09, 0xa2, 0x09, 0x82, 0x01, 0xa1, 0x09, 0x81, + 0xf9, 0xa0, 0x09, 0x81, 0xf1, 0x9f, 0x09, 0x81, 0xe9, 0x9e, 0x09, 0x81, + 0xe1, 0x9d, 0x09, 0x81, 0xd8, 0xa6, 0x09, 0x81, 0xd1, 0xa5, 0x09, 0x81, + 0xc9, 0xa4, 0x09, 0x81, 0xc1, 0xa3, 0x09, 0x81, 0xb9, 0xa2, 0x09, 0x81, + 0xab, 0x02, 0x8b, 0x5d, 0xa1, 0x09, 0x81, 0xa1, 0xa0, 0x09, 0x81, 0x93, + 0x02, 0x8b, 0x61, 0x9f, 0x09, 0x81, 0x83, 0x02, 0x8b, 0x65, 0x9e, 0x09, + 0x81, 0x79, 0x9d, 0x09, 0x81, 0x6a, 0x02, 0x8b, 0x69, 0xa6, 0x09, 0x81, + 0x61, 0xa5, 0x09, 0x81, 0x59, 0xa4, 0x09, 0x81, 0x51, 0xa3, 0x09, 0x81, + 0x49, 0xa2, 0x09, 0x81, 0x41, 0xa1, 0x09, 0x81, 0x39, 0xa0, 0x09, 0x81, + 0x31, 0x9f, 0x09, 0x81, 0x23, 0x02, 0x8b, 0x6d, 0x9e, 0x09, 0x81, 0x19, + 0x9d, 0x09, 0x81, 0x10, 0xa6, 0x09, 0x81, 0x09, 0xa5, 0x09, 0x81, 0x01, + 0xa4, 0x09, 0x80, 0xf9, 0xa3, 0x09, 0x80, 0xf1, 0xa2, 0x09, 0x80, 0xe9, + 0xa1, 0x09, 0x80, 0xe1, 0xa0, 0x09, 0x80, 0xd9, 0x9f, 0x09, 0x80, 0xd1, + 0x9e, 0x09, 0x80, 0xc9, 0x9d, 0x09, 0x80, 0xc0, 0xa6, 0x09, 0x80, 0xb9, + 0xa5, 0x09, 0x80, 0xb1, 0xa4, 0x09, 0x80, 0xa3, 0x02, 0x8b, 0x71, 0xa3, + 0x09, 0x80, 0x99, 0xa2, 0x09, 0x80, 0x91, 0xa1, 0x09, 0x80, 0x83, 0x02, + 0x8b, 0x75, 0xa0, 0x09, 0x80, 0x79, 0x9f, 0x09, 0x80, 0x71, 0x9e, 0x09, + 0x80, 0x69, 0x9d, 0x09, 0x80, 0x60, 0xa6, 0x09, 0x80, 0x59, 0xa5, 0x09, + 0x80, 0x51, 0xa4, 0x09, 0x80, 0x49, 0xa3, 0x09, 0x80, 0x33, 0x02, 0x8b, + 0x79, 0xa2, 0x09, 0x80, 0x23, 0x02, 0x8b, 0x81, 0xa1, 0x09, 0x80, 0x19, + 0xa0, 0x09, 0x80, 0x11, 0x9f, 0x09, 0x80, 0x09, 0x9e, 0x09, 0x80, 0x00, + 0x8a, 0x09, 0xa0, 0x61, 0x89, 0x09, 0xa0, 0x59, 0x88, 0x09, 0xa0, 0x51, + 0x87, 0x09, 0xa0, 0x49, 0x86, 0x09, 0xa0, 0x41, 0x85, 0x09, 0xa0, 0x39, + 0x84, 0x09, 0xa0, 0x31, 0x83, 0x09, 0xa0, 0x28, 0x8b, 0x09, 0xa0, 0x19, + 0x8a, 0x09, 0xa0, 0x11, 0x89, 0x09, 0xa0, 0x09, 0x88, 0x09, 0xa0, 0x01, + 0x87, 0x09, 0x9f, 0xf9, 0x86, 0x09, 0x9f, 0xf1, 0x85, 0x09, 0x9f, 0xe9, + 0x84, 0x09, 0x9f, 0xe1, 0x83, 0x09, 0x9f, 0xd8, 0x83, 0x09, 0x9f, 0x80, + 0x83, 0x09, 0x9f, 0x70, 0x84, 0x09, 0x9f, 0x61, 0x83, 0x09, 0x9f, 0x58, + 0x86, 0x09, 0x9f, 0x49, 0x85, 0x09, 0x9f, 0x41, 0x84, 0x09, 0x9f, 0x39, + 0x83, 0x09, 0x9f, 0x30, 0x83, 0x09, 0x9e, 0x68, 0x83, 0x09, 0x9e, 0x30, + 0x83, 0x09, 0x9e, 0x20, 0x83, 0x09, 0x9e, 0x00, 0x83, 0x09, 0x9d, 0xd8, + 0x83, 0x09, 0x9d, 0xc8, 0x83, 0x09, 0x9d, 0x90, 0x83, 0x09, 0x9a, 0xb8, + 0x83, 0x09, 0x9a, 0x98, 0x83, 0x09, 0x9a, 0x60, 0x84, 0x09, 0x99, 0xd1, + 0x83, 0x09, 0x99, 0xc8, 0x83, 0x09, 0x99, 0x78, 0x83, 0x09, 0x99, 0x68, + 0x83, 0x09, 0x98, 0xe0, 0x83, 0x09, 0x98, 0xb0, 0x83, 0x09, 0x98, 0x98, + 0x83, 0x09, 0x98, 0x88, 0x83, 0x09, 0x98, 0x78, 0x83, 0x09, 0x98, 0x50, + 0x83, 0x09, 0x97, 0xd8, 0x84, 0x09, 0x97, 0x89, 0x83, 0x09, 0x97, 0x80, + 0x83, 0x09, 0x97, 0x30, 0x84, 0x09, 0x97, 0x11, 0x83, 0x09, 0x97, 0x08, + 0x83, 0x09, 0x96, 0xc0, 0x83, 0x09, 0x96, 0x98, 0x83, 0x09, 0x96, 0x18, + 0x83, 0x09, 0x95, 0xe0, 0x84, 0x09, 0x95, 0xa1, 0x83, 0x09, 0x95, 0x98, + 0x83, 0x09, 0x95, 0x88, 0x83, 0x09, 0x94, 0xf8, 0x83, 0x09, 0x94, 0xe0, + 0x9f, 0x09, 0x92, 0x73, 0x02, 0x8b, 0x85, 0x9e, 0x09, 0x92, 0x69, 0x9d, + 0x09, 0x92, 0x60, 0xa6, 0x09, 0x92, 0x59, 0xa5, 0x09, 0x92, 0x4b, 0x02, + 0x8b, 0x89, 0xa4, 0x09, 0x92, 0x41, 0xa3, 0x09, 0x92, 0x39, 0xa2, 0x09, + 0x92, 0x31, 0xa1, 0x09, 0x92, 0x29, 0xa0, 0x09, 0x92, 0x21, 0x9f, 0x09, + 0x92, 0x19, 0x9e, 0x09, 0x92, 0x0b, 0x02, 0x8b, 0x8d, 0x9d, 0x09, 0x91, + 0xfa, 0x02, 0x8b, 0x91, 0xa6, 0x09, 0x91, 0xf1, 0xa5, 0x09, 0x91, 0xe9, + 0xa4, 0x09, 0x91, 0xe1, 0xa3, 0x09, 0x91, 0xd9, 0xa2, 0x09, 0x91, 0xd1, + 0xa1, 0x09, 0x91, 0xc9, 0xa0, 0x09, 0x91, 0xc1, 0x9f, 0x09, 0x91, 0xb9, + 0x9e, 0x09, 0x91, 0xb0, 0xa6, 0x09, 0x91, 0xa1, 0xa5, 0x09, 0x91, 0x99, + 0xa4, 0x09, 0x91, 0x8b, 0x02, 0x8b, 0x95, 0xa3, 0x09, 0x91, 0x81, 0xa2, + 0x09, 0x91, 0x79, 0xa1, 0x09, 0x91, 0x71, 0xa0, 0x09, 0x91, 0x69, 0x9f, + 0x09, 0x91, 0x61, 0x9e, 0x09, 0x91, 0x59, 0x9d, 0x09, 0x91, 0x50, 0xa6, + 0x09, 0x91, 0x49, 0xa5, 0x09, 0x91, 0x41, 0xa4, 0x09, 0x91, 0x39, 0xa3, + 0x09, 0x91, 0x31, 0xa2, 0x09, 0x91, 0x23, 0x02, 0x8b, 0x99, 0xa1, 0x09, + 0x91, 0x19, 0xa0, 0x09, 0x91, 0x11, 0x9f, 0x09, 0x91, 0x09, 0x9e, 0x09, + 0x91, 0x00, 0x9f, 0x09, 0x90, 0xf9, 0x9e, 0x09, 0x90, 0xf1, 0x9d, 0x09, + 0x90, 0xe8, 0xa6, 0x09, 0x90, 0xe1, 0xa5, 0x09, 0x90, 0xd9, 0xa4, 0x09, + 0x90, 0xcb, 0x02, 0x8b, 0x9d, 0xa3, 0x09, 0x90, 0xc1, 0xa2, 0x09, 0x90, + 0xb3, 0x02, 0x8b, 0xa1, 0xa1, 0x09, 0x90, 0xa3, 0x02, 0x8b, 0xa5, 0xa0, + 0x09, 0x90, 0x93, 0x02, 0x8b, 0xa9, 0x9f, 0x09, 0x90, 0x89, 0x9e, 0x09, + 0x90, 0x81, 0x9d, 0x09, 0x90, 0x78, 0xa6, 0x09, 0x90, 0x71, 0xa5, 0x09, + 0x90, 0x69, 0xa4, 0x09, 0x90, 0x61, 0xa3, 0x09, 0x90, 0x59, 0xa2, 0x09, + 0x90, 0x4b, 0x02, 0x8b, 0xad, 0xa1, 0x09, 0x90, 0x41, 0xa0, 0x09, 0x90, + 0x39, 0x9f, 0x09, 0x90, 0x31, 0x9e, 0x09, 0x90, 0x29, 0x9d, 0x09, 0x90, + 0x20, 0xa6, 0x09, 0x90, 0x19, 0xa5, 0x09, 0x90, 0x03, 0x02, 0x8b, 0xb1, + 0xa4, 0x09, 0x8f, 0xf9, 0xa3, 0x09, 0x8f, 0xf1, 0xa2, 0x09, 0x8f, 0xe9, + 0xa1, 0x09, 0x8f, 0xe1, 0xa0, 0x09, 0x8f, 0xd9, 0x9f, 0x09, 0x8f, 0xd1, + 0x9e, 0x09, 0x8f, 0xc9, 0x9d, 0x09, 0x8f, 0xc0, 0xa6, 0x09, 0x8f, 0xb9, + 0xa5, 0x09, 0x8f, 0xb1, 0xa4, 0x09, 0x8f, 0xa9, 0xa3, 0x09, 0x8f, 0xa1, + 0xa2, 0x09, 0x8f, 0x99, 0xa1, 0x09, 0x8f, 0x91, 0xa0, 0x09, 0x8f, 0x89, + 0x9f, 0x09, 0x8f, 0x81, 0x9e, 0x09, 0x8f, 0x78, 0x83, 0x09, 0x8f, 0x50, + 0x84, 0x09, 0x8f, 0x11, 0x83, 0x09, 0x8f, 0x08, 0x83, 0x09, 0x8e, 0xf0, + 0x83, 0x09, 0x8e, 0xd0, 0x83, 0x09, 0x8e, 0xa8, 0x83, 0x09, 0x8e, 0x90, + 0x83, 0x09, 0x8e, 0x60, 0x83, 0x09, 0x8e, 0x50, 0x83, 0x09, 0x8e, 0x40, + 0x8a, 0x09, 0x8e, 0x21, 0x89, 0x09, 0x8e, 0x19, 0x88, 0x09, 0x8e, 0x11, + 0x87, 0x09, 0x8e, 0x09, 0x86, 0x09, 0x8e, 0x01, 0x85, 0x09, 0x8d, 0xf9, + 0x84, 0x09, 0x8d, 0xf1, 0x83, 0x09, 0x8d, 0xe8, 0x83, 0x09, 0x8d, 0xd0, + 0x83, 0x09, 0x8d, 0x90, 0x84, 0x09, 0x8d, 0x79, 0x83, 0x09, 0x8d, 0x70, + 0x83, 0x09, 0x8b, 0xa8, 0x83, 0x09, 0x8b, 0x90, 0x83, 0x09, 0x8b, 0x58, + 0x83, 0x09, 0x8b, 0x48, 0x83, 0x09, 0x8a, 0xf0, 0x83, 0x09, 0x8a, 0xb8, + 0x83, 0x09, 0x8a, 0x68, 0x84, 0x09, 0x8a, 0x41, 0x83, 0x09, 0x8a, 0x38, + 0x83, 0x09, 0x8a, 0x28, 0x85, 0x09, 0x89, 0xe1, 0x84, 0x09, 0x89, 0xd9, + 0x83, 0x09, 0x89, 0xd0, 0x83, 0x09, 0x89, 0xa8, 0x83, 0x09, 0x89, 0x98, + 0x83, 0x09, 0x89, 0x88, 0x83, 0x09, 0x89, 0x48, 0x83, 0x09, 0x89, 0x38, + 0x83, 0x09, 0x89, 0x00, 0x83, 0x09, 0x88, 0xa8, 0x83, 0x09, 0x88, 0x60, + 0x83, 0x09, 0x87, 0xf8, 0x8a, 0x09, 0x86, 0x89, 0x89, 0x09, 0x86, 0x81, + 0x88, 0x09, 0x86, 0x79, 0x87, 0x09, 0x86, 0x71, 0x86, 0x09, 0x86, 0x69, + 0x85, 0x09, 0x86, 0x61, 0x84, 0x09, 0x86, 0x59, 0x83, 0x09, 0x86, 0x50, + 0x83, 0x09, 0x85, 0xe0, 0x83, 0x09, 0x85, 0xc8, 0x8b, 0x09, 0x85, 0xb1, + 0x8a, 0x09, 0x85, 0xa9, 0x89, 0x09, 0x85, 0xa1, 0x88, 0x09, 0x85, 0x99, + 0x87, 0x09, 0x85, 0x91, 0x86, 0x09, 0x85, 0x89, 0x85, 0x09, 0x85, 0x81, + 0x84, 0x09, 0x85, 0x79, 0x83, 0x09, 0x85, 0x70, 0x83, 0x09, 0x85, 0x58, + 0x83, 0x09, 0x85, 0x40, 0x83, 0x09, 0x84, 0xd8, 0x83, 0x09, 0x84, 0xb8, + 0x83, 0x09, 0x84, 0x90, 0x83, 0x09, 0x83, 0xf0, 0x83, 0x09, 0x83, 0x38, + 0x85, 0x09, 0x82, 0xf1, 0x84, 0x09, 0x82, 0xe9, 0x83, 0x09, 0x82, 0xe0, + 0xc6, 0x02, 0xd1, 0x0f, 0xbc, 0x49, 0xc6, 0x0b, 0x09, 0x0f, 0xbc, 0x98, + 0xc6, 0x13, 0x52, 0x0f, 0xbd, 0x71, 0xd2, 0x4d, 0x57, 0x0f, 0xbd, 0xd0, + 0x45, 0x56, 0x42, 0x42, 0x8b, 0xb9, 0x83, 0x00, 0x95, 0x03, 0x02, 0x8b, + 0xe9, 0x97, 0x00, 0x95, 0x09, 0x8b, 0x00, 0x95, 0x11, 0x87, 0x00, 0x95, + 0x2b, 0x02, 0x8b, 0xed, 0x91, 0x00, 0x95, 0x33, 0x02, 0x8b, 0xf1, 0xc2, + 0x01, 0x4a, 0x00, 0x95, 0x38, 0x83, 0x00, 0x98, 0x58, 0x87, 0x00, 0x98, + 0x60, 0x83, 0x00, 0x98, 0x78, 0x83, 0x00, 0x98, 0x83, 0x02, 0x8b, 0xf5, + 0x8b, 0x00, 0x98, 0x91, 0x87, 0x00, 0x98, 0xaa, 0x02, 0x8b, 0xf9, 0x83, + 0x00, 0x98, 0xc3, 0x02, 0x8b, 0xfd, 0x97, 0x00, 0x98, 0xc9, 0x8b, 0x00, + 0x98, 0xd1, 0x87, 0x00, 0x98, 0xeb, 0x02, 0x8c, 0x01, 0x91, 0x00, 0x98, + 0xf1, 0x19, 0x42, 0x8c, 0x05, 0x83, 0x01, 0x6e, 0xc3, 0x02, 0x8c, 0x17, + 0x97, 0x01, 0x6e, 0xc9, 0x8b, 0x01, 0x6e, 0xd1, 0x87, 0x01, 0x6e, 0xeb, + 0x02, 0x8c, 0x1b, 0x91, 0x01, 0x6e, 0xf0, 0x19, 0xc2, 0x8c, 0x1f, 0x1b, + 0xc2, 0x8c, 0x2e, 0x83, 0x00, 0x90, 0x83, 0x02, 0x8c, 0x48, 0x97, 0x00, + 0x90, 0x89, 0x8b, 0x00, 0x90, 0x91, 0x87, 0x00, 0x90, 0xab, 0x02, 0x8c, + 0x4c, 0x91, 0x00, 0x90, 0xb0, 0x83, 0x00, 0x90, 0x18, 0x87, 0x00, 0x90, + 0x20, 0x83, 0x00, 0x90, 0x38, 0x91, 0x05, 0x59, 0x71, 0x87, 0x05, 0x59, + 0x6b, 0x02, 0x8c, 0x50, 0x83, 0x05, 0x59, 0x43, 0x02, 0x8c, 0x54, 0x8b, + 0x05, 0x59, 0x51, 0x97, 0x05, 0x59, 0x48, 0x83, 0x00, 0x93, 0x18, 0x87, + 0x00, 0x93, 0x20, 0x83, 0x01, 0x6c, 0x28, 0x83, 0x00, 0x93, 0x39, 0x8b, + 0x00, 0x9c, 0x29, 0x87, 0x00, 0x9c, 0x3a, 0x02, 0x8c, 0x58, 0x0a, 0xc2, + 0x8c, 0x5c, 0x83, 0x01, 0x6d, 0x43, 0x02, 0x8c, 0x7a, 0x97, 0x01, 0x6d, + 0x49, 0x8b, 0x01, 0x6d, 0x51, 0x87, 0x01, 0x6d, 0x6b, 0x02, 0x8c, 0x7e, + 0x91, 0x01, 0x6d, 0x70, 0x83, 0x00, 0x93, 0xd8, 0x87, 0x00, 0x93, 0xe0, + 0x83, 0x01, 0x6c, 0x38, 0x83, 0x00, 0x99, 0x43, 0x02, 0x8c, 0x82, 0x97, + 0x00, 0x99, 0x49, 0x8b, 0x00, 0x99, 0x51, 0x87, 0x00, 0x99, 0x6b, 0x02, + 0x8c, 0x86, 0x91, 0x00, 0x99, 0x73, 0x02, 0x8c, 0x8a, 0xc2, 0x01, 0x4a, + 0x00, 0x99, 0x78, 0x91, 0x05, 0x58, 0xb1, 0x87, 0x05, 0x58, 0xab, 0x02, + 0x8c, 0x8e, 0xc2, 0x04, 0xc6, 0x05, 0x58, 0x99, 0x8b, 0x05, 0x58, 0x91, + 0x97, 0x05, 0x58, 0x88, 0x0a, 0xc2, 0x8c, 0x92, 0x83, 0x00, 0x97, 0xc3, + 0x02, 0x8c, 0xab, 0x97, 0x00, 0x97, 0xc9, 0x8b, 0x00, 0x97, 0xd1, 0x87, + 0x00, 0x97, 0xeb, 0x02, 0x8c, 0xaf, 0x91, 0x00, 0x97, 0xf3, 0x02, 0x8c, + 0xb3, 0xc2, 0x01, 0x4a, 0x00, 0x97, 0xf8, 0x83, 0x00, 0x97, 0x98, 0x87, + 0x00, 0x97, 0xa0, 0x83, 0x01, 0x6c, 0x60, 0x91, 0x05, 0x58, 0x31, 0x87, + 0x05, 0x58, 0x2b, 0x02, 0x8c, 0xb7, 0xc2, 0x04, 0xc6, 0x05, 0x58, 0x19, + 0x8b, 0x05, 0x58, 0x11, 0x97, 0x05, 0x58, 0x08, 0x83, 0x00, 0x93, 0x98, + 0x87, 0x00, 0x93, 0xa0, 0x83, 0x01, 0x6c, 0x30, 0x83, 0x00, 0x99, 0x03, + 0x02, 0x8c, 0xbb, 0x97, 0x00, 0x99, 0x09, 0x8b, 0x00, 0x99, 0x11, 0x87, + 0x00, 0x99, 0x2b, 0x02, 0x8c, 0xbf, 0x91, 0x00, 0x99, 0x33, 0x02, 0x8c, + 0xc3, 0xc2, 0x01, 0x4a, 0x00, 0x99, 0x38, 0x83, 0x00, 0x99, 0xc3, 0x02, + 0x8c, 0xc7, 0x97, 0x00, 0x99, 0xc9, 0x8b, 0x00, 0x99, 0xd1, 0x87, 0x00, + 0x99, 0xeb, 0x02, 0x8c, 0xcb, 0x91, 0x00, 0x99, 0xf1, 0xc2, 0x01, 0x4a, + 0x00, 0x99, 0xf8, 0x83, 0x00, 0x9a, 0x03, 0x02, 0x8c, 0xcf, 0x97, 0x00, + 0x9a, 0x09, 0x8b, 0x00, 0x9a, 0x11, 0x87, 0x00, 0x9a, 0x2b, 0x02, 0x8c, + 0xd3, 0x91, 0x00, 0x9a, 0x32, 0x02, 0x8c, 0xd7, 0x83, 0x00, 0x90, 0x58, + 0x87, 0x00, 0x90, 0x60, 0x83, 0x01, 0x6c, 0x00, 0x83, 0x00, 0x90, 0xd8, + 0x87, 0x00, 0x90, 0xe0, 0x83, 0x01, 0x6c, 0x08, 0x83, 0x00, 0x90, 0xf9, + 0x8b, 0x00, 0x9c, 0x09, 0x87, 0x00, 0x9c, 0x1a, 0x02, 0x8c, 0xdb, 0x83, + 0x00, 0x91, 0x03, 0x02, 0x8c, 0xdf, 0x97, 0x00, 0x91, 0x09, 0x8b, 0x00, + 0x91, 0x11, 0x87, 0x00, 0x91, 0x2b, 0x02, 0x8c, 0xe3, 0x91, 0x00, 0x91, + 0x31, 0xc2, 0x01, 0x4a, 0x00, 0x91, 0x38, 0x83, 0x00, 0x91, 0x98, 0x87, + 0x00, 0x91, 0xa1, 0x48, 0xbd, 0x4a, 0x42, 0x8c, 0xe7, 0x83, 0x01, 0x6c, + 0x18, 0x83, 0x00, 0x91, 0xc3, 0x02, 0x8c, 0xff, 0x97, 0x00, 0x91, 0xc9, + 0x8b, 0x00, 0x91, 0xd1, 0x87, 0x00, 0x91, 0xeb, 0x02, 0x8d, 0x03, 0x91, + 0x00, 0x91, 0xf3, 0x02, 0x8d, 0x07, 0xc2, 0x01, 0x4a, 0x00, 0x91, 0xf8, + 0x83, 0x01, 0x6d, 0x03, 0x02, 0x8d, 0x0b, 0x97, 0x01, 0x6d, 0x09, 0x8b, + 0x01, 0x6d, 0x11, 0x87, 0x01, 0x6d, 0x2b, 0x02, 0x8d, 0x0f, 0x91, 0x01, + 0x6d, 0x30, 0x83, 0x00, 0x91, 0x58, 0x87, 0x00, 0x91, 0x60, 0x83, 0x01, + 0x6c, 0x10, 0x83, 0x00, 0x92, 0x18, 0x87, 0x00, 0x92, 0x20, 0x83, 0x00, + 0x92, 0x38, 0x83, 0x00, 0x92, 0x43, 0x02, 0x8d, 0x13, 0x8b, 0x00, 0x92, + 0x51, 0x87, 0x00, 0x92, 0x6a, 0x02, 0x8d, 0x17, 0x83, 0x00, 0x92, 0x83, + 0x02, 0x8d, 0x1b, 0x97, 0x00, 0x92, 0x89, 0x8b, 0x00, 0x92, 0x91, 0x87, + 0x00, 0x92, 0xab, 0x02, 0x8d, 0x1f, 0x91, 0x00, 0x92, 0xb1, 0x19, 0x42, + 0x8d, 0x23, 0x83, 0x01, 0x6e, 0x03, 0x02, 0x8d, 0x35, 0x97, 0x01, 0x6e, + 0x09, 0x8b, 0x01, 0x6e, 0x11, 0x87, 0x01, 0x6e, 0x2b, 0x02, 0x8d, 0x39, + 0x91, 0x01, 0x6e, 0x30, 0x83, 0x00, 0x93, 0x58, 0x87, 0x00, 0x93, 0x60, + 0x83, 0x00, 0x94, 0x18, 0x87, 0x00, 0x94, 0x20, 0x83, 0x00, 0x94, 0x38, + 0x83, 0x00, 0x94, 0x43, 0x02, 0x8d, 0x3d, 0x8b, 0x00, 0x94, 0x51, 0x87, + 0x00, 0x94, 0x6a, 0x02, 0x8d, 0x41, 0x83, 0x01, 0x6e, 0x83, 0x02, 0x8d, + 0x45, 0x97, 0x01, 0x6e, 0x89, 0x8b, 0x01, 0x6e, 0x91, 0x87, 0x01, 0x6e, + 0xab, 0x02, 0x8d, 0x49, 0x91, 0x01, 0x6e, 0xb0, 0x83, 0x00, 0x94, 0x98, + 0x87, 0x00, 0x94, 0xa0, 0x83, 0x01, 0x6c, 0x40, 0x83, 0x00, 0x94, 0xc3, + 0x02, 0x8d, 0x4d, 0x97, 0x00, 0x94, 0xc9, 0x8b, 0x00, 0x94, 0xd1, 0x87, + 0x00, 0x94, 0xeb, 0x02, 0x8d, 0x51, 0x91, 0x00, 0x94, 0xf3, 0x02, 0x8d, + 0x55, 0xc2, 0x01, 0x4a, 0x00, 0x94, 0xf8, 0x83, 0x00, 0x95, 0x58, 0x87, + 0x00, 0x95, 0x60, 0x83, 0x00, 0x95, 0x78, 0x83, 0x00, 0x95, 0x83, 0x02, + 0x8d, 0x59, 0x8b, 0x00, 0x95, 0x91, 0x87, 0x00, 0x95, 0xaa, 0x02, 0x8d, + 0x5d, 0x83, 0x00, 0x95, 0xc3, 0x02, 0x8d, 0x61, 0x97, 0x00, 0x95, 0xc9, + 0x8b, 0x00, 0x95, 0xd1, 0x87, 0x00, 0x95, 0xeb, 0x02, 0x8d, 0x65, 0x91, + 0x00, 0x95, 0xf1, 0x19, 0x42, 0x8d, 0x69, 0x83, 0x01, 0x6e, 0x43, 0x02, + 0x8d, 0x7b, 0x97, 0x01, 0x6e, 0x49, 0x8b, 0x01, 0x6e, 0x51, 0x87, 0x01, + 0x6e, 0x6b, 0x02, 0x8d, 0x7f, 0x91, 0x01, 0x6e, 0x70, 0x83, 0x00, 0x96, + 0x58, 0x87, 0x00, 0x96, 0x60, 0x83, 0x00, 0x96, 0x78, 0x83, 0x00, 0x99, + 0x83, 0x02, 0x8d, 0x83, 0x97, 0x00, 0x99, 0x89, 0x8b, 0x00, 0x99, 0x91, + 0x87, 0x00, 0x99, 0xab, 0x02, 0x8d, 0x8d, 0x91, 0x00, 0x99, 0xb3, 0x02, + 0x8d, 0x91, 0xc2, 0x01, 0x4a, 0x00, 0x99, 0xb8, 0x83, 0x00, 0x9a, 0x98, + 0x87, 0x00, 0x9a, 0xa0, 0x83, 0x01, 0x6c, 0x90, 0x83, 0x00, 0x9a, 0xb9, + 0x8b, 0x00, 0x9c, 0x69, 0x87, 0x00, 0x9c, 0x7a, 0x02, 0x8d, 0x95, 0x83, + 0x00, 0x96, 0xd8, 0x87, 0x00, 0x96, 0xe0, 0x83, 0x01, 0x6c, 0x58, 0x83, + 0x00, 0x97, 0x03, 0x02, 0x8d, 0x99, 0x97, 0x00, 0x97, 0x09, 0x8b, 0x00, + 0x97, 0x11, 0x87, 0x00, 0x97, 0x2b, 0x02, 0x8d, 0x9d, 0x91, 0x00, 0x97, + 0x31, 0xc2, 0x01, 0x4a, 0x00, 0x97, 0x38, 0x83, 0x01, 0x6d, 0x83, 0x02, + 0x8d, 0xa1, 0x97, 0x01, 0x6d, 0x89, 0x8b, 0x01, 0x6d, 0x91, 0x87, 0x01, + 0x6d, 0xab, 0x02, 0x8d, 0xa5, 0x91, 0x01, 0x6d, 0xb0, 0x83, 0x00, 0x97, + 0x58, 0x87, 0x00, 0x97, 0x60, 0x83, 0x00, 0x97, 0x78, 0x83, 0x00, 0x98, + 0x18, 0x87, 0x00, 0x98, 0x20, 0x83, 0x01, 0x6c, 0x70, 0x83, 0x00, 0x9a, + 0x58, 0x87, 0x00, 0x9a, 0x60, 0x83, 0x00, 0x9a, 0x79, 0x8b, 0x00, 0x9c, + 0x49, 0x87, 0x00, 0x9c, 0x5a, 0x02, 0x8d, 0xa9, 0xd5, 0x36, 0x47, 0x00, + 0x9a, 0xe9, 0xc4, 0x01, 0xc3, 0x00, 0x9a, 0xf8, 0xc7, 0x09, 0x0d, 0x01, + 0x3e, 0x91, 0xc9, 0x03, 0xc8, 0x01, 0x56, 0xc8, 0xd6, 0x2d, 0xba, 0x01, + 0x17, 0xc9, 0xc8, 0x52, 0x09, 0x01, 0x17, 0xc1, 0xc7, 0x80, 0x70, 0x01, + 0x17, 0xb1, 0xc9, 0x16, 0x14, 0x01, 0x17, 0xa9, 0x48, 0x00, 0x5f, 0xc2, + 0x8d, 0xad, 0xd6, 0x2c, 0x86, 0x01, 0x17, 0x90, 0xc3, 0x77, 0x79, 0x08, + 0x7f, 0x89, 0xc4, 0xdc, 0x2d, 0x08, 0x7f, 0x70, 0xc6, 0x06, 0xe1, 0x00, + 0x00, 0xb8, 0xc8, 0xb7, 0x72, 0x01, 0x16, 0xf9, 0xc8, 0xbf, 0x4a, 0x01, + 0x16, 0xf1, 0xcc, 0x07, 0xc7, 0x01, 0x16, 0xe9, 0xc9, 0x00, 0xca, 0x01, + 0x16, 0xe0, 0x03, 0xc2, 0x8d, 0xb3, 0x45, 0x00, 0x8c, 0x42, 0x8d, 0xc2, + 0x97, 0x08, 0xec, 0xa1, 0x8b, 0x08, 0xec, 0x89, 0x83, 0x08, 0xec, 0x50, + 0x97, 0x08, 0xec, 0x70, 0x8b, 0x08, 0xec, 0x60, 0xc2, 0x00, 0xd0, 0x08, + 0xec, 0x19, 0x83, 0x08, 0xec, 0x10, 0xc2, 0x00, 0xd0, 0x08, 0xeb, 0xf1, + 0x83, 0x08, 0xeb, 0xe8, 0x83, 0x00, 0x50, 0xb1, 0xc2, 0x00, 0xd0, 0x00, + 0x52, 0xc8, 0x83, 0x00, 0x50, 0xc1, 0xc2, 0x00, 0xd0, 0x00, 0x52, 0xd0, + 0x83, 0x00, 0x50, 0xf9, 0xc2, 0x00, 0xd0, 0x00, 0x51, 0x00, 0x83, 0x00, + 0x51, 0x09, 0xc2, 0x00, 0xd0, 0x00, 0x51, 0x10, 0x94, 0x00, 0x54, 0x5b, + 0x02, 0x8d, 0xd8, 0x8e, 0x00, 0x54, 0x62, 0x02, 0x8d, 0xdc, 0x83, 0x00, + 0x54, 0xf9, 0xc2, 0x00, 0xd0, 0x00, 0x55, 0x00, 0x83, 0x00, 0x55, 0x09, + 0xc2, 0x00, 0xd0, 0x00, 0x55, 0x10, 0x83, 0x00, 0x55, 0xf1, 0x8b, 0x00, + 0x56, 0x41, 0x97, 0x00, 0x56, 0x60, 0x8b, 0x00, 0x56, 0x00, 0x97, 0x00, + 0x56, 0x10, 0x94, 0x00, 0x56, 0x1b, 0x02, 0x8d, 0xe0, 0x8e, 0x00, 0x57, + 0x12, 0x02, 0x8d, 0xe4, 0x87, 0x00, 0x56, 0x29, 0x91, 0x00, 0x56, 0x48, + 0xcd, 0x7c, 0xb5, 0x0e, 0x92, 0x29, 0xcc, 0x8c, 0x0d, 0x08, 0x0c, 0x08, + 0x5b, 0x17, 0x97, 0xc2, 0x8d, 0xe8, 0xcc, 0x81, 0x21, 0x08, 0x0c, 0x68, + 0x55, 0x37, 0xd6, 0xc2, 0x8e, 0x10, 0xc4, 0x28, 0x48, 0x00, 0xff, 0x78, + 0xc4, 0x59, 0x13, 0x00, 0xff, 0xf3, 0x02, 0x8e, 0x3d, 0x49, 0x63, 0xd3, + 0xc2, 0x8e, 0x43, 0xcb, 0x9a, 0x26, 0x08, 0x0b, 0xd8, 0xc3, 0x46, 0x46, + 0x00, 0xff, 0xe9, 0x43, 0x02, 0x6f, 0xc2, 0x8e, 0x4f, 0xc8, 0xb6, 0xa2, + 0x08, 0x0b, 0xe1, 0xca, 0xa4, 0xf4, 0x08, 0x0c, 0x20, 0x0e, 0xc2, 0x8e, + 0x5e, 0xca, 0x9c, 0x84, 0x00, 0x1e, 0x79, 0xcc, 0x89, 0xfd, 0x00, 0x1f, + 0xa1, 0x49, 0x11, 0x74, 0xc2, 0x8e, 0x6a, 0xda, 0x1a, 0x64, 0x00, 0x1f, + 0xf0, 0x45, 0x03, 0x14, 0xc2, 0x8e, 0x76, 0x56, 0x2c, 0xde, 0xc2, 0x8e, + 0x88, 0xcc, 0x86, 0x49, 0x08, 0x0c, 0x61, 0xcd, 0x79, 0xdd, 0x08, 0x0d, + 0x00, 0xc4, 0x7a, 0x04, 0x00, 0xfd, 0xfb, 0x02, 0x8e, 0xa6, 0xca, 0x94, + 0x91, 0x00, 0xfe, 0x01, 0xcd, 0x42, 0x94, 0x00, 0xfd, 0xf1, 0xc8, 0x9c, + 0x0e, 0x00, 0x1e, 0xb1, 0xc9, 0xaa, 0x9e, 0x00, 0x1e, 0xa8, 0xc6, 0x57, + 0xbc, 0x00, 0xfd, 0xe9, 0x03, 0xc2, 0x8e, 0xac, 0xd0, 0x5b, 0xe2, 0x08, + 0x0c, 0x10, 0x46, 0x02, 0x0f, 0xc2, 0x8e, 0xb8, 0xd1, 0x56, 0x95, 0x00, + 0x1b, 0xa9, 0x46, 0x10, 0x38, 0xc2, 0x8e, 0xd4, 0xc9, 0xab, 0x76, 0x08, + 0x0c, 0x18, 0xcc, 0x4a, 0x69, 0x00, 0x1b, 0xd1, 0xc8, 0xab, 0xe3, 0x08, + 0x0b, 0xc8, 0xc4, 0x63, 0xdd, 0x00, 0x1c, 0x21, 0x0a, 0xc2, 0x8e, 0xe0, + 0x43, 0x02, 0xa0, 0xc2, 0x8e, 0xec, 0xca, 0xa1, 0xf2, 0x08, 0x0b, 0xd1, + 0xd1, 0x54, 0x53, 0x08, 0x0c, 0x48, 0xc9, 0xae, 0x10, 0x00, 0x1c, 0x39, + 0x4a, 0xa2, 0x9c, 0xc2, 0x8e, 0xf8, 0x14, 0x42, 0x8f, 0x2a, 0x43, 0x60, + 0xe8, 0xc2, 0x8f, 0x36, 0xdd, 0x10, 0x2f, 0x00, 0x1f, 0xb0, 0xce, 0x71, + 0x84, 0x08, 0x0b, 0xf9, 0xce, 0x72, 0x64, 0x08, 0x0c, 0x00, 0xcb, 0x20, + 0xb6, 0x00, 0x1e, 0x91, 0xd5, 0x31, 0xee, 0x00, 0x1e, 0x99, 0xd9, 0x20, + 0xa8, 0x00, 0x1e, 0xa0, 0xca, 0x37, 0x4e, 0x01, 0x17, 0x39, 0xc5, 0x07, + 0x62, 0x01, 0x13, 0x48, 0xc9, 0x00, 0xca, 0x01, 0x13, 0xb9, 0x43, 0x00, + 0xe2, 0xc2, 0x8f, 0x42, 0xd0, 0x5a, 0x92, 0x01, 0x53, 0xf3, 0x02, 0x8f, + 0x4e, 0xcb, 0x1a, 0x1a, 0x01, 0x54, 0x30, 0xc9, 0x07, 0x5e, 0x01, 0x13, + 0x39, 0xd1, 0x51, 0x01, 0x01, 0x55, 0x20, 0xd0, 0x03, 0xb7, 0x01, 0x4b, + 0xc1, 0x06, 0xc2, 0x8f, 0x54, 0x15, 0xc2, 0x8f, 0x5a, 0x0e, 0x42, 0x8f, + 0x66, 0xd8, 0x24, 0x3b, 0x01, 0x54, 0x41, 0xcf, 0x62, 0xb5, 0x01, 0x54, + 0x50, 0x8e, 0x08, 0x9b, 0x13, 0x02, 0x8f, 0x6c, 0x94, 0x08, 0x9a, 0x1a, + 0x02, 0x8f, 0x70, 0x97, 0x08, 0x9a, 0x61, 0x8b, 0x08, 0x9a, 0x41, 0x83, + 0x08, 0x99, 0xf0, 0x97, 0x08, 0x9a, 0x10, 0x8b, 0x08, 0x9a, 0x00, 0x47, + 0xb2, 0x2e, 0xc2, 0x8f, 0x74, 0x45, 0x04, 0xaf, 0xc2, 0x8f, 0x82, 0x83, + 0x08, 0x99, 0xa8, 0x83, 0x08, 0x99, 0xc1, 0xc2, 0x0d, 0xf6, 0x08, 0x99, + 0xb9, 0xc2, 0x00, 0xd0, 0x08, 0x99, 0xb0, 0xc2, 0x00, 0xdb, 0x08, 0x99, + 0x99, 0x83, 0x08, 0x99, 0x90, 0xc2, 0x00, 0xd0, 0x08, 0x99, 0x69, 0x83, + 0x08, 0x99, 0x60, 0xc2, 0x00, 0xd0, 0x08, 0x99, 0x59, 0x83, 0x08, 0x99, + 0x50, 0xc2, 0x00, 0xd0, 0x08, 0x99, 0x39, 0x83, 0x08, 0x99, 0x31, 0x06, + 0x42, 0x8f, 0x8e, 0xc2, 0x00, 0xd0, 0x08, 0x99, 0x29, 0x16, 0xc2, 0x8f, + 0x98, 0x83, 0x08, 0x99, 0x20, 0xc2, 0x19, 0x2c, 0x08, 0x98, 0xf1, 0xc2, + 0x01, 0x30, 0x08, 0x98, 0xc9, 0xc2, 0x00, 0xc1, 0x08, 0x99, 0x19, 0x83, + 0x08, 0x99, 0x40, 0xc2, 0x00, 0xd0, 0x08, 0x98, 0xe9, 0x83, 0x08, 0x98, + 0xe0, 0xc2, 0x00, 0xd0, 0x08, 0x98, 0xd9, 0x83, 0x08, 0x98, 0xd0, 0xc2, + 0x00, 0xd0, 0x08, 0x98, 0xc1, 0x83, 0x08, 0x98, 0xb8, 0xc2, 0x00, 0xd0, + 0x08, 0x98, 0xb1, 0x83, 0x08, 0x98, 0xa8, 0x97, 0x08, 0x98, 0xa1, 0x8b, + 0x08, 0x98, 0x81, 0x83, 0x08, 0x98, 0x30, 0x97, 0x08, 0x98, 0x50, 0x8b, + 0x08, 0x98, 0x40, 0xc4, 0x1e, 0x97, 0x08, 0x9a, 0x69, 0xc5, 0x40, 0xe7, + 0x08, 0x98, 0x18, 0xc7, 0x7a, 0x7f, 0x08, 0x99, 0xe9, 0xc7, 0x14, 0x39, + 0x08, 0x98, 0x10, 0xca, 0x1e, 0x8a, 0x08, 0x98, 0x09, 0xd7, 0x29, 0x29, + 0x08, 0x98, 0x00, 0x15, 0xc2, 0x8f, 0xa2, 0xdb, 0x17, 0x10, 0x0f, 0xc9, + 0x50, 0xc9, 0xb1, 0xee, 0x00, 0xe5, 0xf9, 0x95, 0x00, 0xe4, 0xd0, 0x03, + 0xc2, 0x8f, 0xae, 0xc2, 0x49, 0x0c, 0x00, 0xe5, 0xa9, 0xc2, 0x02, 0x0a, + 0x00, 0xe5, 0x91, 0x87, 0x00, 0xe5, 0x88, 0xc2, 0x00, 0xc4, 0x00, 0xe5, + 0xe9, 0xc2, 0x00, 0x74, 0x00, 0xe5, 0xd1, 0x90, 0x00, 0xe4, 0x80, 0xc9, + 0xb1, 0x04, 0x00, 0xe5, 0xc9, 0x03, 0x42, 0x8f, 0xb9, 0xc4, 0x8c, 0x72, + 0x00, 0xe5, 0xc1, 0x90, 0x00, 0xe4, 0xa0, 0xc3, 0x00, 0xd0, 0x00, 0xe5, + 0x79, 0xc2, 0x00, 0x71, 0x00, 0xe5, 0x58, 0x0a, 0xc2, 0x8f, 0xc1, 0xc2, + 0x00, 0x71, 0x00, 0xe5, 0x61, 0xc2, 0x00, 0xd1, 0x00, 0xe5, 0x50, 0xc3, + 0x11, 0xef, 0x00, 0xe5, 0x41, 0xc2, 0x00, 0xd1, 0x00, 0xe5, 0x08, 0xc3, + 0x00, 0xd0, 0x00, 0xe5, 0x31, 0xc2, 0x00, 0xd1, 0x00, 0xe4, 0x90, 0xc3, + 0x01, 0x50, 0x00, 0xe5, 0x29, 0xc2, 0x00, 0xd1, 0x00, 0xe4, 0xc8, 0xc3, + 0x01, 0x50, 0x00, 0xe5, 0x21, 0xc2, 0x00, 0xb1, 0x00, 0xe4, 0xf0, 0xc3, + 0x01, 0x50, 0x00, 0xe4, 0xf9, 0xc2, 0x00, 0xc4, 0x00, 0xe4, 0xb0, 0x90, + 0x00, 0x85, 0x01, 0xc2, 0x00, 0xc4, 0x00, 0x86, 0x68, 0xc2, 0x00, 0xd1, + 0x00, 0x85, 0x11, 0xc3, 0x00, 0xd0, 0x00, 0x85, 0xb0, 0xc2, 0x00, 0xc4, + 0x00, 0x85, 0x31, 0xc3, 0x01, 0x50, 0x00, 0x85, 0x78, 0x90, 0x00, 0x85, + 0x39, 0x94, 0x00, 0x85, 0x90, 0xc2, 0x00, 0xd1, 0x00, 0x85, 0x49, 0xc3, + 0x01, 0x50, 0x00, 0x85, 0xa8, 0xc2, 0x00, 0xb1, 0x00, 0x85, 0x71, 0xc3, + 0x01, 0x50, 0x00, 0x85, 0xa0, 0xc2, 0x00, 0xd1, 0x00, 0x85, 0x89, 0xc3, + 0x11, 0xef, 0x00, 0x85, 0xc0, 0x0a, 0xc2, 0x8f, 0xcd, 0xc2, 0x00, 0xd1, + 0x00, 0x85, 0xd1, 0xc2, 0x00, 0x71, 0x00, 0x85, 0xe0, 0xc2, 0x00, 0x71, + 0x00, 0x85, 0xd9, 0xc3, 0x00, 0xd0, 0x00, 0x85, 0xf8, 0x03, 0xc2, 0x8f, + 0xd9, 0x87, 0x00, 0x86, 0x09, 0xc2, 0x02, 0x0a, 0x00, 0x86, 0x11, 0xc2, + 0x49, 0x0c, 0x00, 0x86, 0x28, 0x90, 0x00, 0x86, 0x81, 0xc2, 0x00, 0x74, + 0x00, 0x87, 0xd1, 0xc2, 0x00, 0xc4, 0x00, 0x87, 0xe8, 0xc2, 0x00, 0xd1, + 0x00, 0x86, 0x91, 0xc3, 0x00, 0xd0, 0x00, 0x87, 0x30, 0x90, 0x00, 0x86, + 0xa1, 0xc4, 0x8c, 0x72, 0x00, 0x87, 0xc0, 0xc2, 0x00, 0xc4, 0x00, 0x86, + 0xb1, 0xc3, 0x01, 0x50, 0x00, 0x86, 0xf8, 0x03, 0xc2, 0x8f, 0xe1, 0xc9, + 0xb1, 0x04, 0x00, 0x87, 0xc8, 0xc2, 0x00, 0xd1, 0x00, 0x86, 0xc9, 0xc3, + 0x01, 0x50, 0x00, 0x87, 0x28, 0x95, 0x00, 0x86, 0xd1, 0xc9, 0xb1, 0xee, + 0x00, 0x87, 0xf8, 0xc2, 0x00, 0xb1, 0x00, 0x86, 0xf1, 0xc3, 0x01, 0x50, + 0x00, 0x87, 0x20, 0xc2, 0x00, 0xd1, 0x00, 0x87, 0x09, 0xc3, 0x11, 0xef, + 0x00, 0x87, 0x40, 0x0a, 0xc2, 0x8f, 0xe9, 0xc2, 0x00, 0xd1, 0x00, 0x87, + 0x51, 0xc2, 0x00, 0x71, 0x00, 0x87, 0x60, 0xc2, 0x00, 0x71, 0x00, 0x87, + 0x59, 0xc3, 0x00, 0xd0, 0x00, 0x87, 0x78, 0x03, 0xc2, 0x8f, 0xf5, 0x87, + 0x00, 0x87, 0x89, 0xc2, 0x02, 0x0a, 0x00, 0x87, 0x91, 0xc2, 0x49, 0x0c, + 0x00, 0x87, 0xa8, 0x90, 0x01, 0x68, 0x01, 0xc2, 0x00, 0xc4, 0x01, 0x69, + 0x68, 0xc2, 0x00, 0xd1, 0x01, 0x68, 0x11, 0xc3, 0x00, 0xd0, 0x01, 0x68, + 0xb0, 0xc2, 0x00, 0xc4, 0x01, 0x68, 0x31, 0xc3, 0x01, 0x50, 0x01, 0x68, + 0x78, 0x90, 0x01, 0x68, 0x39, 0x94, 0x01, 0x68, 0x90, 0xc2, 0x00, 0xd1, + 0x01, 0x68, 0x49, 0xc3, 0x01, 0x50, 0x01, 0x68, 0xa8, 0xc2, 0x00, 0xb1, + 0x01, 0x68, 0x71, 0xc3, 0x01, 0x50, 0x01, 0x68, 0xa0, 0xc2, 0x00, 0xd1, + 0x01, 0x68, 0x89, 0xc3, 0x11, 0xef, 0x01, 0x68, 0xc0, 0x0a, 0xc2, 0x90, + 0x00, 0xc2, 0x00, 0xd1, 0x01, 0x68, 0xd1, 0xc2, 0x00, 0x71, 0x01, 0x68, + 0xe0, 0xc2, 0x00, 0x71, 0x01, 0x68, 0xd9, 0xc3, 0x00, 0xd0, 0x01, 0x68, + 0xf8, 0x03, 0xc2, 0x90, 0x0c, 0x87, 0x01, 0x69, 0x09, 0xc2, 0x02, 0x0a, + 0x01, 0x69, 0x11, 0xc2, 0x49, 0x0c, 0x01, 0x69, 0x28, 0xc3, 0xc8, 0x92, + 0x01, 0x60, 0x09, 0xc6, 0xc8, 0x01, 0x01, 0x61, 0x40, 0xc4, 0xe4, 0x2b, + 0x01, 0x60, 0x21, 0xc4, 0xdf, 0x03, 0x01, 0x60, 0x39, 0xc5, 0xdd, 0xb2, + 0x01, 0x60, 0x60, 0x07, 0xc2, 0x90, 0x14, 0xc3, 0x01, 0xbd, 0x01, 0x61, + 0x09, 0x97, 0x01, 0x61, 0x19, 0x91, 0x01, 0x61, 0x30, 0xc6, 0xd3, 0x73, + 0x01, 0x60, 0x31, 0xc5, 0xda, 0x24, 0x01, 0x60, 0x40, 0x42, 0x25, 0xa1, + 0xc2, 0x90, 0x1e, 0xcb, 0x98, 0x00, 0x01, 0x60, 0x51, 0x47, 0x1c, 0xa0, + 0x42, 0x90, 0x28, 0xc6, 0xc4, 0x49, 0x01, 0x60, 0x71, 0xcf, 0x60, 0xd5, + 0x01, 0x61, 0x70, 0xc2, 0x06, 0xc6, 0x01, 0x60, 0x89, 0xc2, 0x00, 0x16, + 0x01, 0x60, 0xc8, 0xc5, 0xcb, 0xee, 0x01, 0x60, 0x91, 0x87, 0x01, 0x60, + 0xd0, 0xc4, 0xe4, 0xc7, 0x01, 0x60, 0xa1, 0x0a, 0xc2, 0x90, 0x34, 0xc9, + 0xae, 0x19, 0x01, 0x61, 0x11, 0xc8, 0xae, 0x6b, 0x01, 0x61, 0x22, 0x02, + 0x90, 0x41, 0xc5, 0xd9, 0x52, 0x01, 0x60, 0xa9, 0xc2, 0x00, 0xba, 0x01, + 0x60, 0xe1, 0xcb, 0x97, 0x92, 0x01, 0x61, 0x68, 0xc4, 0xac, 0x24, 0x01, + 0x60, 0xb9, 0xc3, 0x02, 0x44, 0x01, 0x61, 0x50, 0xc5, 0x7b, 0xac, 0x01, + 0x60, 0xe9, 0xcd, 0x7b, 0xa4, 0x01, 0x61, 0x78, 0xc3, 0xc8, 0x92, 0x01, + 0x61, 0x89, 0xc6, 0xc8, 0x01, 0x01, 0x62, 0xc0, 0xc4, 0xe4, 0x2b, 0x01, + 0x61, 0xa1, 0xc4, 0xdf, 0x03, 0x01, 0x61, 0xb9, 0xc5, 0xdd, 0xb2, 0x01, + 0x61, 0xe0, 0x07, 0xc2, 0x90, 0x47, 0xc3, 0x01, 0xbd, 0x01, 0x62, 0x89, + 0x97, 0x01, 0x62, 0x99, 0x91, 0x01, 0x62, 0xb0, 0xc6, 0xd3, 0x73, 0x01, + 0x61, 0xb1, 0xc5, 0xda, 0x24, 0x01, 0x61, 0xc0, 0x42, 0x25, 0xa1, 0xc2, + 0x90, 0x51, 0xcb, 0x98, 0x00, 0x01, 0x61, 0xd1, 0x47, 0x1c, 0xa0, 0x42, + 0x90, 0x5b, 0xc6, 0xc4, 0x49, 0x01, 0x61, 0xf1, 0xcf, 0x60, 0xd5, 0x01, + 0x62, 0xf0, 0xc2, 0x06, 0xc6, 0x01, 0x62, 0x09, 0xc2, 0x00, 0x16, 0x01, + 0x62, 0x48, 0xc5, 0xcb, 0xee, 0x01, 0x62, 0x11, 0x87, 0x01, 0x62, 0x50, + 0xc4, 0xe4, 0xc7, 0x01, 0x62, 0x21, 0x0a, 0xc2, 0x90, 0x67, 0xc9, 0xae, + 0x19, 0x01, 0x62, 0x91, 0xc8, 0xae, 0x6b, 0x01, 0x62, 0xa2, 0x02, 0x90, + 0x74, 0xc5, 0xd9, 0x52, 0x01, 0x62, 0x29, 0xc2, 0x00, 0xba, 0x01, 0x62, + 0x61, 0xcb, 0x97, 0x92, 0x01, 0x62, 0xe8, 0xc4, 0xac, 0x24, 0x01, 0x62, + 0x39, 0xc3, 0x02, 0x44, 0x01, 0x62, 0xd0, 0xc5, 0x7b, 0xac, 0x01, 0x62, + 0x69, 0xcd, 0x7b, 0xa4, 0x01, 0x62, 0xf8, 0xc7, 0x14, 0x39, 0x00, 0x58, + 0x11, 0xc7, 0x7a, 0x7f, 0x00, 0x59, 0xe8, 0xc5, 0x40, 0xe7, 0x00, 0x58, + 0x19, 0xc4, 0x1e, 0x97, 0x00, 0x5a, 0x68, 0x83, 0x00, 0x58, 0x31, 0x8b, + 0x00, 0x58, 0x81, 0x97, 0x00, 0x58, 0xa0, 0x8b, 0x00, 0x58, 0x40, 0x97, + 0x00, 0x58, 0x50, 0x47, 0xb2, 0x2e, 0xc2, 0x90, 0x7a, 0x83, 0x00, 0x59, + 0xa8, 0x83, 0x00, 0x58, 0xa9, 0xc2, 0x00, 0xd0, 0x00, 0x58, 0xb0, 0x83, + 0x00, 0x58, 0xb9, 0xc2, 0x00, 0xd0, 0x00, 0x58, 0xc0, 0xc2, 0x01, 0x30, + 0x00, 0x58, 0xc9, 0xc2, 0x19, 0x2c, 0x00, 0x58, 0xf1, 0xc2, 0x00, 0xc1, + 0x00, 0x59, 0x19, 0x83, 0x00, 0x59, 0x40, 0x83, 0x00, 0x58, 0xd1, 0xc2, + 0x00, 0xd0, 0x00, 0x58, 0xd8, 0x83, 0x00, 0x58, 0xe1, 0xc2, 0x00, 0xd0, + 0x00, 0x58, 0xe8, 0x16, 0xc2, 0x90, 0x88, 0x83, 0x00, 0x59, 0x21, 0xc2, + 0x00, 0xd0, 0x00, 0x59, 0x28, 0x06, 0xc2, 0x90, 0x92, 0x83, 0x00, 0x59, + 0x31, 0xc2, 0x00, 0xd0, 0x00, 0x59, 0x38, 0x83, 0x00, 0x59, 0x51, 0xc2, + 0x00, 0xd0, 0x00, 0x59, 0x58, 0x83, 0x00, 0x59, 0x61, 0xc2, 0x00, 0xd0, + 0x00, 0x59, 0x68, 0x83, 0x00, 0x59, 0x79, 0xc2, 0x19, 0x2c, 0x00, 0x5a, + 0xf8, 0x83, 0x00, 0x59, 0x81, 0xc2, 0x00, 0x39, 0x00, 0x5a, 0xe1, 0xc2, + 0x00, 0xd0, 0x00, 0x5a, 0xe8, 0x83, 0x00, 0x59, 0x91, 0xc2, 0x00, 0xdb, + 0x00, 0x59, 0x98, 0xc2, 0x00, 0xd0, 0x00, 0x59, 0xb1, 0xc2, 0x0d, 0xf6, + 0x00, 0x59, 0xb9, 0x83, 0x00, 0x59, 0xc0, 0x83, 0x00, 0x59, 0xf1, 0x8b, + 0x00, 0x5a, 0x41, 0x97, 0x00, 0x5a, 0x60, 0x8b, 0x00, 0x5a, 0x00, 0x97, + 0x00, 0x5a, 0x10, 0x94, 0x00, 0x5a, 0x1b, 0x02, 0x90, 0x9c, 0x8e, 0x00, + 0x5b, 0x12, 0x02, 0x90, 0xa0, 0xc2, 0x02, 0xa0, 0x00, 0x5b, 0x41, 0xc4, + 0x02, 0xde, 0x00, 0x5b, 0x48, 0xc3, 0x09, 0x9e, 0x00, 0x5b, 0x51, 0xc3, + 0x0d, 0x14, 0x00, 0x5b, 0x58, 0xc2, 0x22, 0xcc, 0x00, 0x5b, 0x61, 0xc4, + 0x18, 0x10, 0x00, 0x5b, 0x68, 0xc7, 0x08, 0x79, 0x00, 0x5b, 0x91, 0xc4, + 0x01, 0xce, 0x00, 0x5b, 0x99, 0xc9, 0x67, 0x38, 0x00, 0x5b, 0xa9, 0xc6, + 0x06, 0xdb, 0x00, 0x5b, 0xb0, 0xc8, 0x08, 0x79, 0x00, 0x5b, 0xa1, 0xca, + 0xa7, 0x88, 0x00, 0x5b, 0xb8, 0xc3, 0x02, 0xdf, 0x0f, 0x68, 0x1b, 0x02, + 0x90, 0xa4, 0xc4, 0x0d, 0x0e, 0x0f, 0x68, 0x62, 0x02, 0x90, 0xa8, 0x91, + 0x0f, 0x68, 0x13, 0x02, 0x90, 0xae, 0xc4, 0x18, 0x12, 0x0f, 0x68, 0x5a, + 0x02, 0x90, 0xb2, 0xc9, 0x57, 0x20, 0x0f, 0x69, 0x28, 0xc2, 0x00, 0x33, + 0x0f, 0x68, 0x23, 0x02, 0x90, 0xb8, 0xc3, 0x0d, 0x0f, 0x0f, 0x68, 0x6a, + 0x02, 0x90, 0xbc, 0xc2, 0x00, 0x5f, 0x0f, 0x68, 0x2b, 0x02, 0x90, 0xc2, + 0xc3, 0x45, 0x6b, 0x0f, 0x68, 0x72, 0x02, 0x90, 0xc6, 0xc7, 0x0d, 0x04, + 0x0f, 0x68, 0x99, 0xc8, 0x4b, 0x94, 0x0f, 0x68, 0xe0, 0xc2, 0x0d, 0x10, + 0x0f, 0x68, 0x7b, 0x02, 0x90, 0xcc, 0x00, 0x42, 0x90, 0xd2, 0xc2, 0x0d, + 0x10, 0x0f, 0x68, 0x83, 0x02, 0x90, 0xde, 0x00, 0x42, 0x90, 0xe4, 0xc9, + 0x57, 0x20, 0x0f, 0x69, 0x60, 0xc7, 0x0d, 0x04, 0x0f, 0x68, 0xd1, 0xc8, + 0x4b, 0x94, 0x0f, 0x69, 0x18, 0xc9, 0x57, 0x20, 0x0f, 0x69, 0x68, 0xc7, + 0x0d, 0x04, 0x0f, 0x68, 0xd9, 0xc8, 0x4b, 0x94, 0x0f, 0x69, 0x20, 0xc9, + 0x57, 0x20, 0x0f, 0x69, 0xd0, 0xc9, 0x57, 0x20, 0x0f, 0x69, 0xd8, 0xc8, + 0x0d, 0x03, 0x0f, 0x69, 0xc0, 0xc8, 0x0d, 0x03, 0x0f, 0x69, 0xc8, 0xc6, + 0x2d, 0xd0, 0x01, 0x3e, 0x21, 0xc4, 0x0e, 0xa6, 0x01, 0x3e, 0x18, 0xd8, + 0x21, 0x23, 0x01, 0x39, 0xe1, 0xc8, 0x0a, 0xff, 0x01, 0x39, 0x91, 0xca, + 0x22, 0x51, 0x01, 0x39, 0x59, 0xc5, 0x0d, 0x20, 0x01, 0x38, 0xd8, 0x9a, + 0x01, 0x21, 0x19, 0xc2, 0x01, 0x25, 0x0f, 0xa6, 0xb0, 0xc5, 0x5f, 0x98, + 0x0f, 0xae, 0x09, 0xca, 0x9e, 0xf0, 0x0f, 0xa6, 0x10, 0xcc, 0x81, 0x81, + 0x0f, 0xa7, 0x69, 0xcb, 0x9a, 0x5d, 0x0f, 0xa7, 0x60, 0xcd, 0x78, 0xa5, + 0x01, 0x1c, 0x81, 0xcd, 0x7a, 0x45, 0x01, 0x1c, 0x78, 0xc9, 0x3b, 0x79, + 0x08, 0x7c, 0x49, 0x44, 0x02, 0x9f, 0xc2, 0x90, 0xf0, 0xc3, 0x01, 0x5d, + 0x08, 0x7c, 0x30, 0x49, 0x04, 0xf9, 0xc2, 0x90, 0xfc, 0x44, 0x05, 0x18, + 0x42, 0x91, 0x08, 0x0e, 0xc2, 0x91, 0x14, 0xc3, 0xb5, 0x3e, 0x08, 0x7c, + 0x01, 0xc2, 0x00, 0x67, 0x08, 0x7b, 0xe1, 0x15, 0xc2, 0x91, 0x20, 0xc3, + 0x20, 0x18, 0x08, 0x7b, 0xd1, 0xc3, 0x00, 0x4e, 0x08, 0x7b, 0xc9, 0xc4, + 0xe0, 0xe7, 0x08, 0x7b, 0xb9, 0xc4, 0x4a, 0xb9, 0x08, 0x7b, 0xb1, 0xca, + 0x9b, 0x8a, 0x08, 0x7b, 0xa9, 0xc5, 0x4a, 0xb3, 0x08, 0x7b, 0xa1, 0xc3, + 0x7e, 0x89, 0x08, 0x7b, 0x99, 0xca, 0x9c, 0xa2, 0x08, 0x7b, 0x91, 0xc4, + 0xe3, 0x27, 0x08, 0x7b, 0x89, 0xc5, 0xa5, 0xfd, 0x08, 0x7b, 0x81, 0xc4, + 0x5d, 0xe2, 0x08, 0x7b, 0xf0, 0xd1, 0x53, 0xa9, 0x08, 0x79, 0x31, 0x47, + 0x34, 0x2f, 0xc2, 0x91, 0x2a, 0x0e, 0x42, 0x91, 0x3b, 0x43, 0x2f, 0x2a, + 0xc2, 0x91, 0x47, 0x47, 0x02, 0x0e, 0x42, 0x91, 0x53, 0xc3, 0x09, 0x41, + 0x08, 0x67, 0xe1, 0x42, 0x02, 0x09, 0xc2, 0x91, 0xb0, 0xc3, 0x05, 0x14, + 0x08, 0x67, 0xd2, 0x02, 0x91, 0xbc, 0x97, 0x08, 0x67, 0x53, 0x02, 0x91, + 0xc0, 0x87, 0x08, 0x66, 0x4b, 0x02, 0x91, 0xce, 0x4a, 0xa7, 0x74, 0xc2, + 0x92, 0x2e, 0x4b, 0x95, 0x61, 0xc2, 0x92, 0x3a, 0xc8, 0xb9, 0x6a, 0x08, + 0x67, 0x19, 0x91, 0x08, 0x66, 0xdb, 0x02, 0x92, 0x46, 0x83, 0x08, 0x66, + 0x03, 0x02, 0x92, 0x50, 0x8b, 0x08, 0x66, 0x83, 0x02, 0x92, 0x64, 0xc7, + 0xc9, 0x9d, 0x08, 0x66, 0x50, 0x87, 0x08, 0x64, 0x4b, 0x02, 0x92, 0x68, + 0xc8, 0xb9, 0x6a, 0x08, 0x65, 0x19, 0x91, 0x08, 0x64, 0xdb, 0x02, 0x92, + 0xc8, 0x4a, 0xa7, 0x74, 0xc2, 0x92, 0xd2, 0x4b, 0x95, 0x61, 0xc2, 0x92, + 0xde, 0x97, 0x08, 0x65, 0x53, 0x02, 0x92, 0xea, 0x83, 0x08, 0x64, 0x03, + 0x02, 0x92, 0xf8, 0x8b, 0x08, 0x64, 0x83, 0x02, 0x93, 0x0c, 0xc7, 0xc9, + 0x9d, 0x08, 0x64, 0x50, 0xc4, 0xe1, 0xaf, 0x08, 0x62, 0x41, 0x91, 0x08, + 0x60, 0x33, 0x02, 0x93, 0x10, 0x83, 0x08, 0x60, 0x03, 0x02, 0x93, 0x23, + 0x07, 0xc2, 0x93, 0x52, 0x8b, 0x08, 0x60, 0x1a, 0x02, 0x93, 0x72, 0x83, + 0x08, 0x60, 0x0b, 0x02, 0x93, 0x7a, 0x87, 0x08, 0x60, 0x2b, 0x02, 0x93, + 0xad, 0x11, 0xc2, 0x93, 0xbf, 0x8b, 0x08, 0x60, 0x22, 0x02, 0x93, 0xca, + 0x16, 0xc2, 0x93, 0xce, 0xc3, 0x05, 0x14, 0x08, 0x54, 0xe8, 0x42, 0x02, + 0x1c, 0xc2, 0x93, 0xda, 0x16, 0xc2, 0x93, 0xe4, 0xc3, 0x2b, 0xb9, 0x08, + 0x54, 0xd1, 0x09, 0xc2, 0x93, 0xf4, 0x42, 0x0e, 0x9a, 0xc2, 0x94, 0x00, + 0x43, 0xe6, 0x2c, 0xc2, 0x94, 0x08, 0xc3, 0x7e, 0x89, 0x08, 0x54, 0x29, + 0xc3, 0x0f, 0x9a, 0x08, 0x54, 0x21, 0xc4, 0x19, 0x60, 0x08, 0x54, 0x19, + 0x0a, 0xc2, 0x94, 0x14, 0xc3, 0x0d, 0xff, 0x08, 0x54, 0x09, 0xc3, 0x72, + 0xf0, 0x08, 0x54, 0x39, 0xc3, 0x85, 0xf5, 0x08, 0x54, 0x41, 0x0d, 0xc2, + 0x94, 0x20, 0xc4, 0x3a, 0x01, 0x08, 0x54, 0x61, 0xc3, 0x0d, 0xf6, 0x08, + 0x54, 0x71, 0xc3, 0xb1, 0x0d, 0x08, 0x54, 0x81, 0x03, 0x42, 0x94, 0x2c, + 0xcd, 0x7a, 0xa0, 0x0f, 0xad, 0x99, 0x44, 0x19, 0xb0, 0x42, 0x94, 0x38, + 0xc2, 0x00, 0xd1, 0x08, 0x1a, 0x81, 0xc3, 0x2b, 0x88, 0x08, 0x1a, 0x89, + 0xc3, 0x46, 0xf6, 0x08, 0x1a, 0x91, 0x06, 0xc2, 0x94, 0x4a, 0x87, 0x08, + 0x1a, 0xa3, 0x02, 0x94, 0x54, 0x1c, 0xc2, 0x94, 0x58, 0x8b, 0x08, 0x1a, + 0xcb, 0x02, 0x94, 0x64, 0xc4, 0xe0, 0xb3, 0x08, 0x1a, 0xd1, 0xc3, 0x39, + 0xa6, 0x08, 0x1a, 0xd9, 0xc5, 0xdb, 0x19, 0x08, 0x1a, 0xe1, 0xc5, 0xdb, + 0x6e, 0x08, 0x1a, 0xe9, 0x18, 0xc2, 0x94, 0x6c, 0xc4, 0xcf, 0x74, 0x08, + 0x1a, 0xf9, 0xc3, 0x26, 0x92, 0x08, 0x1b, 0x01, 0x15, 0xc2, 0x94, 0x78, + 0x16, 0xc2, 0x94, 0x82, 0x97, 0x08, 0x1b, 0x19, 0xc5, 0xdd, 0x1c, 0x08, + 0x1b, 0x21, 0x1b, 0xc2, 0x94, 0x8e, 0x91, 0x08, 0x1b, 0x4b, 0x02, 0x94, + 0xa8, 0xc2, 0x00, 0xd0, 0x08, 0x1b, 0x60, 0xc2, 0x00, 0x51, 0x08, 0x18, + 0x09, 0x0d, 0xc2, 0x94, 0xac, 0xc2, 0x00, 0x06, 0x08, 0x18, 0x19, 0x87, + 0x08, 0x18, 0x23, 0x02, 0x94, 0xbe, 0xc2, 0x00, 0x5f, 0x08, 0x18, 0x29, + 0xc2, 0x0a, 0xe2, 0x08, 0x18, 0x31, 0xc2, 0x01, 0x7f, 0x08, 0x18, 0x39, + 0x16, 0xc2, 0x94, 0xe2, 0x8b, 0x08, 0x18, 0x4b, 0x02, 0x94, 0xec, 0x83, + 0x08, 0x18, 0x01, 0x91, 0x08, 0x18, 0x79, 0x12, 0xc2, 0x94, 0xf0, 0x15, + 0xc2, 0x94, 0xfa, 0x97, 0x08, 0x18, 0xb3, 0x02, 0x95, 0x06, 0xc3, 0x28, + 0x28, 0x08, 0x18, 0xe1, 0xc2, 0x0c, 0x43, 0x08, 0x19, 0x69, 0xcc, 0x82, + 0xc5, 0x08, 0x19, 0x70, 0xc3, 0x05, 0x14, 0x08, 0x19, 0x01, 0x42, 0x02, + 0x09, 0xc2, 0x95, 0x0a, 0xc3, 0x09, 0x41, 0x08, 0x19, 0x10, 0x83, 0x00, + 0xe2, 0xf8, 0x99, 0x00, 0xe3, 0x19, 0x8f, 0x00, 0xe3, 0x11, 0x8c, 0x00, + 0xe3, 0x09, 0x8d, 0x00, 0xe3, 0x00, 0xc7, 0x56, 0x8e, 0x01, 0x5d, 0xd1, + 0xd1, 0x56, 0x84, 0x01, 0x5d, 0xd8, 0x90, 0x08, 0x25, 0x90, 0xc3, 0x1c, + 0x63, 0x08, 0x25, 0xb1, 0xc2, 0x02, 0x2b, 0x08, 0x25, 0xe9, 0xc2, 0x00, + 0xb0, 0x08, 0x26, 0x29, 0x16, 0x42, 0x95, 0x16, 0x83, 0x08, 0x26, 0x51, + 0xc2, 0x00, 0xd0, 0x08, 0x26, 0x60, 0x90, 0x08, 0x26, 0xd0, 0xc3, 0x1c, + 0x63, 0x08, 0x26, 0xf1, 0xc2, 0x02, 0x2b, 0x08, 0x27, 0x29, 0xc2, 0x00, + 0xb0, 0x08, 0x27, 0x69, 0x16, 0x42, 0x95, 0x20, 0x83, 0x08, 0x27, 0x91, + 0xc2, 0x00, 0xd0, 0x08, 0x27, 0xa0, 0x0d, 0xc2, 0x95, 0x2a, 0xcb, 0x93, + 0x7d, 0x0e, 0x7d, 0x89, 0xc8, 0x4e, 0x4b, 0x0e, 0x7d, 0x80, 0xc6, 0xca, + 0xa9, 0x0e, 0x7a, 0x88, 0x0d, 0xc2, 0x95, 0x36, 0x16, 0xc2, 0x95, 0x42, + 0x44, 0xe0, 0x6b, 0xc2, 0x95, 0x4e, 0x49, 0x75, 0xe7, 0xc2, 0x95, 0x5b, + 0xce, 0x69, 0xa0, 0x0e, 0x7c, 0xb9, 0x12, 0xc2, 0x95, 0x68, 0xce, 0x6d, + 0xa2, 0x0e, 0x7c, 0x98, 0x00, 0x42, 0x95, 0x72, 0x00, 0x42, 0x95, 0x87, + 0x42, 0x00, 0x97, 0xc2, 0x95, 0x93, 0xc8, 0xb8, 0x72, 0x0e, 0x7b, 0xf8, + 0xcb, 0x87, 0x3a, 0x0e, 0x7b, 0xe1, 0xce, 0x69, 0xa0, 0x0e, 0x7b, 0xd9, + 0xc8, 0x4e, 0x4b, 0x0e, 0x7b, 0xd1, 0xc8, 0xbf, 0x6a, 0x0e, 0x7b, 0xc8, + 0x45, 0x4e, 0x46, 0xc2, 0x95, 0x9f, 0xce, 0x69, 0xa0, 0x0e, 0x7b, 0xb8, + 0xc6, 0x6d, 0xaa, 0x0e, 0x7b, 0xa1, 0xca, 0x93, 0x7e, 0x0e, 0x7b, 0x98, + 0xcc, 0x84, 0x51, 0x0e, 0x7d, 0x59, 0xc7, 0xc8, 0x69, 0x0e, 0x7d, 0x51, + 0xc3, 0xe5, 0x9c, 0x0e, 0x7d, 0x48, 0xc8, 0xb8, 0x12, 0x0e, 0x79, 0x68, + 0xc8, 0xbb, 0xe2, 0x0e, 0x79, 0xc8, 0xc9, 0x78, 0xd9, 0x0e, 0x78, 0xc1, + 0x43, 0x01, 0x55, 0x42, 0x95, 0xab, 0xc5, 0x00, 0x2c, 0x0e, 0x78, 0x89, + 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x28, 0xc7, 0x93, 0xee, 0x0e, 0x79, 0xb3, + 0x02, 0x95, 0xb7, 0xc6, 0xcb, 0x33, 0x0e, 0x79, 0x30, 0x15, 0xc2, 0x95, + 0xbd, 0x43, 0x01, 0x55, 0x42, 0x95, 0xc9, 0xc3, 0xe5, 0x2d, 0x0e, 0x79, + 0x51, 0xc2, 0x01, 0xc8, 0x0e, 0x79, 0x00, 0x43, 0x01, 0x55, 0xc2, 0x95, + 0xd5, 0x4d, 0x78, 0xd9, 0x42, 0x95, 0xe1, 0xc6, 0x42, 0x68, 0x0e, 0x78, + 0xf1, 0x42, 0x00, 0xe7, 0x42, 0x95, 0xed, 0xc5, 0x00, 0x2c, 0x0e, 0x78, + 0x91, 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x30, 0xc6, 0x78, 0xdc, 0x0e, 0x78, + 0xe9, 0x4b, 0x8e, 0xfa, 0x42, 0x95, 0xf9, 0xc5, 0x00, 0x2c, 0x0e, 0x78, + 0xa1, 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x40, 0xc5, 0x00, 0x2c, 0x0e, 0x78, + 0x81, 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x20, 0xc5, 0x00, 0x2c, 0x0e, 0x78, + 0x69, 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x08, 0xce, 0x1e, 0x74, 0x08, 0xd1, + 0xb0, 0xc3, 0x0d, 0x18, 0x05, 0x4e, 0x53, 0x02, 0x96, 0x05, 0xc4, 0xe3, + 0x8f, 0x05, 0x4e, 0x18, 0xc6, 0xcd, 0xfd, 0x05, 0x4e, 0x39, 0xc6, 0x45, + 0xa6, 0x05, 0x4e, 0x60, 0x17, 0xc2, 0x96, 0x0b, 0xc5, 0x3a, 0xbc, 0x05, + 0x4e, 0x40, 0xc6, 0xcb, 0x27, 0x05, 0x4c, 0x98, 0x42, 0x00, 0x4d, 0x42, + 0x96, 0x17, 0xc6, 0xcb, 0x21, 0x05, 0x4d, 0x60, 0xc6, 0xcb, 0x27, 0x05, + 0x4d, 0x40, 0x00, 0x42, 0x96, 0x23, 0x83, 0x05, 0x4d, 0x23, 0x02, 0x96, + 0x2f, 0xc2, 0x19, 0x2c, 0x05, 0x4c, 0xd3, 0x02, 0x96, 0x35, 0xc2, 0x01, + 0x30, 0x05, 0x4c, 0xa2, 0x02, 0x96, 0x3b, 0x83, 0x05, 0x4d, 0x13, 0x02, + 0x96, 0x44, 0xc2, 0x0e, 0x9a, 0x05, 0x4c, 0xea, 0x02, 0x96, 0x4a, 0x83, + 0x05, 0x4d, 0x03, 0x02, 0x96, 0x50, 0xc2, 0x01, 0x6f, 0x05, 0x4c, 0xda, + 0x02, 0x96, 0x56, 0xca, 0x60, 0x26, 0x05, 0x4c, 0xc8, 0xc6, 0xcb, 0x27, + 0x05, 0x4c, 0xb0, 0x00, 0x42, 0x96, 0x5c, 0x8b, 0x05, 0x4c, 0x68, 0x8b, + 0x05, 0x4c, 0x39, 0xc5, 0xd5, 0x2e, 0x05, 0x4c, 0x28, 0xc4, 0x04, 0x15, + 0x05, 0x4d, 0xd1, 0xc4, 0xdf, 0x53, 0x05, 0x4d, 0xa0, 0xcf, 0x6a, 0xe9, + 0x01, 0x2c, 0xf2, 0x02, 0x96, 0x68, 0x45, 0x02, 0x9a, 0x42, 0x96, 0x6e, + 0x97, 0x05, 0x22, 0xdb, 0x02, 0x96, 0x7a, 0x91, 0x05, 0x22, 0xbb, 0x02, + 0x96, 0x8d, 0x8b, 0x05, 0x22, 0x62, 0x02, 0x96, 0x99, 0x9b, 0x05, 0x22, + 0x33, 0x02, 0x96, 0xac, 0x97, 0x05, 0x22, 0x03, 0x02, 0x96, 0xbf, 0x91, + 0x05, 0x21, 0xeb, 0x02, 0x96, 0xd5, 0x8b, 0x05, 0x21, 0x9a, 0x02, 0x96, + 0xe1, 0x9b, 0x05, 0x1d, 0x3b, 0x02, 0x96, 0xf4, 0x97, 0x05, 0x1d, 0x0b, + 0x02, 0x97, 0x07, 0x87, 0x05, 0x1c, 0xeb, 0x02, 0x97, 0x1a, 0x91, 0x05, + 0x1c, 0xcb, 0x02, 0x97, 0x26, 0x83, 0x05, 0x1c, 0xb2, 0x02, 0x97, 0x2e, + 0xc2, 0x02, 0x0a, 0x05, 0x12, 0xf3, 0x02, 0x97, 0x3a, 0x83, 0x05, 0x13, + 0x13, 0x02, 0x97, 0x42, 0xc2, 0x01, 0xba, 0x05, 0x13, 0x33, 0x02, 0x97, + 0x4e, 0x91, 0x05, 0x13, 0x4b, 0x02, 0x97, 0x56, 0x87, 0x05, 0x13, 0x62, + 0x02, 0x97, 0x62, 0x8b, 0x05, 0x17, 0x7b, 0x02, 0x97, 0x6a, 0x83, 0x05, + 0x17, 0xb3, 0x02, 0x97, 0x7d, 0x97, 0x05, 0x17, 0xfb, 0x02, 0x97, 0x89, + 0x11, 0xc2, 0x97, 0x9f, 0x87, 0x05, 0x17, 0xeb, 0x02, 0x97, 0xa7, 0x9b, + 0x05, 0x18, 0x2a, 0x02, 0x97, 0xab, 0x8b, 0x05, 0x03, 0xc3, 0x02, 0x97, + 0xbe, 0x83, 0x05, 0x03, 0xfb, 0x02, 0x97, 0xd1, 0x91, 0x05, 0x04, 0x1b, + 0x02, 0x97, 0xdd, 0x97, 0x05, 0x04, 0x3b, 0x02, 0x97, 0xe9, 0x9b, 0x05, + 0x04, 0x6a, 0x02, 0x97, 0xfc, 0x8b, 0x05, 0x0a, 0x9b, 0x02, 0x98, 0x0f, + 0x83, 0x05, 0x0a, 0xcb, 0x02, 0x98, 0x22, 0x91, 0x05, 0x0a, 0xeb, 0x02, + 0x98, 0x2e, 0x87, 0x05, 0x0b, 0x03, 0x02, 0x98, 0x3a, 0x97, 0x05, 0x0b, + 0x22, 0x02, 0x98, 0x42, 0x96, 0x05, 0x0b, 0xe9, 0x9a, 0x05, 0x0b, 0xf1, + 0x92, 0x05, 0x0c, 0x01, 0x87, 0x05, 0x0c, 0x12, 0x02, 0x98, 0x55, 0x9a, + 0x05, 0x0c, 0x21, 0x92, 0x05, 0x0c, 0x30, 0x91, 0x05, 0x0c, 0x43, 0x02, + 0x98, 0x5d, 0x96, 0x05, 0x0c, 0x89, 0x9a, 0x05, 0x0c, 0x91, 0x92, 0x05, + 0x0c, 0xa1, 0x94, 0x05, 0x0c, 0xb2, 0x02, 0x98, 0x65, 0x96, 0x05, 0x0c, + 0x51, 0x9a, 0x05, 0x0c, 0x59, 0x92, 0x05, 0x0c, 0x68, 0x9a, 0x05, 0x0c, + 0x71, 0x92, 0x05, 0x0c, 0x80, 0x9b, 0x05, 0x21, 0x7b, 0x02, 0x98, 0x69, + 0x97, 0x05, 0x21, 0x4b, 0x02, 0x98, 0x75, 0x91, 0x05, 0x21, 0x2b, 0x02, + 0x98, 0x8f, 0x8b, 0x05, 0x20, 0xd2, 0x02, 0x98, 0x9b, 0x94, 0x05, 0x1f, + 0xdb, 0x02, 0x98, 0xae, 0x92, 0x05, 0x1f, 0xc9, 0x9a, 0x05, 0x1f, 0xb9, + 0x96, 0x05, 0x1f, 0xb0, 0x94, 0x05, 0x1f, 0xab, 0x02, 0x98, 0xb2, 0x92, + 0x05, 0x1f, 0x99, 0x9a, 0x05, 0x1f, 0x89, 0x96, 0x05, 0x1f, 0x81, 0x91, + 0x05, 0x1f, 0x52, 0x02, 0x98, 0xb6, 0x92, 0x05, 0x1f, 0x79, 0x9a, 0x05, + 0x1f, 0x69, 0x96, 0x05, 0x1f, 0x60, 0x87, 0x05, 0x1f, 0x33, 0x02, 0x98, + 0xc2, 0x92, 0x05, 0x1f, 0x19, 0x9a, 0x05, 0x1f, 0x09, 0x96, 0x05, 0x1f, + 0x00, 0x94, 0x05, 0x20, 0xbb, 0x02, 0x98, 0xce, 0x92, 0x05, 0x20, 0xa9, + 0x9a, 0x05, 0x20, 0x99, 0x96, 0x05, 0x20, 0x90, 0x94, 0x05, 0x20, 0x8b, + 0x02, 0x98, 0xd2, 0x92, 0x05, 0x20, 0x79, 0x9a, 0x05, 0x20, 0x69, 0x96, + 0x05, 0x20, 0x61, 0x91, 0x05, 0x20, 0x32, 0x02, 0x98, 0xd6, 0x92, 0x05, + 0x20, 0x59, 0x9a, 0x05, 0x20, 0x49, 0x96, 0x05, 0x20, 0x40, 0x87, 0x05, + 0x20, 0x13, 0x02, 0x98, 0xe2, 0x92, 0x05, 0x1f, 0xf9, 0x9a, 0x05, 0x1f, + 0xe9, 0x96, 0x05, 0x1f, 0xe0, 0x94, 0x05, 0x1e, 0xfb, 0x02, 0x98, 0xee, + 0x92, 0x05, 0x1e, 0xe9, 0x9a, 0x05, 0x1e, 0xd9, 0x96, 0x05, 0x1e, 0xd0, + 0x94, 0x05, 0x1e, 0xcb, 0x02, 0x98, 0xf2, 0x92, 0x05, 0x1e, 0xb9, 0x9a, + 0x05, 0x1e, 0xa9, 0x96, 0x05, 0x1e, 0xa1, 0x91, 0x05, 0x1e, 0x5a, 0x02, + 0x98, 0xf6, 0x92, 0x05, 0x1e, 0x99, 0x9a, 0x05, 0x1e, 0x88, 0x92, 0x05, + 0x1e, 0x81, 0x9a, 0x05, 0x1e, 0x71, 0x96, 0x05, 0x1e, 0x68, 0x92, 0x05, + 0x1e, 0x49, 0x9a, 0x05, 0x1e, 0x39, 0x96, 0x05, 0x1e, 0x30, 0x9b, 0x05, + 0x1c, 0x83, 0x02, 0x98, 0xfe, 0x97, 0x05, 0x1c, 0x53, 0x02, 0x99, 0x11, + 0x87, 0x05, 0x1c, 0x33, 0x02, 0x99, 0x2b, 0x91, 0x05, 0x1c, 0x13, 0x02, + 0x99, 0x37, 0x83, 0x05, 0x1b, 0xea, 0x02, 0x99, 0x43, 0x9b, 0x05, 0x1e, + 0x13, 0x02, 0x99, 0x47, 0x97, 0x05, 0x1d, 0xe3, 0x02, 0x99, 0x5a, 0x87, + 0x05, 0x1d, 0xc3, 0x02, 0x99, 0x74, 0x91, 0x05, 0x1d, 0xa3, 0x02, 0x99, + 0x80, 0x83, 0x05, 0x1d, 0x6a, 0x02, 0x99, 0x8c, 0x9b, 0x05, 0x1a, 0x13, + 0x02, 0x99, 0x98, 0x8b, 0x05, 0x19, 0x63, 0x02, 0x99, 0xab, 0x83, 0x05, + 0x19, 0x9b, 0x02, 0x99, 0xbe, 0x91, 0x05, 0x19, 0xbb, 0x02, 0x99, 0xca, + 0x87, 0x05, 0x19, 0xd3, 0x02, 0x99, 0xd6, 0x97, 0x05, 0x19, 0xf2, 0x02, + 0x99, 0xde, 0x96, 0x05, 0x18, 0x49, 0x9a, 0x05, 0x18, 0x51, 0x92, 0x05, + 0x18, 0x61, 0x87, 0x05, 0x18, 0x72, 0x02, 0x99, 0xea, 0x96, 0x05, 0x18, + 0x81, 0x9a, 0x05, 0x18, 0x89, 0x92, 0x05, 0x18, 0x98, 0x91, 0x05, 0x18, + 0xab, 0x02, 0x99, 0xf2, 0x96, 0x05, 0x18, 0xf1, 0x9a, 0x05, 0x18, 0xf9, + 0x92, 0x05, 0x19, 0x09, 0x94, 0x05, 0x19, 0x1a, 0x02, 0x99, 0xfa, 0x96, + 0x05, 0x18, 0xb9, 0x9a, 0x05, 0x18, 0xc1, 0x92, 0x05, 0x18, 0xd0, 0x9a, + 0x05, 0x18, 0xd9, 0x92, 0x05, 0x18, 0xe8, 0x96, 0x05, 0x19, 0x21, 0x9a, + 0x05, 0x19, 0x29, 0x92, 0x05, 0x19, 0x39, 0x94, 0x05, 0x19, 0x4a, 0x02, + 0x99, 0xfe, 0x9b, 0x05, 0x1b, 0xc3, 0x02, 0x9a, 0x02, 0x97, 0x05, 0x1b, + 0x93, 0x02, 0x9a, 0x15, 0x87, 0x05, 0x1b, 0x7b, 0x02, 0x9a, 0x2b, 0x91, + 0x05, 0x1b, 0x5b, 0x02, 0x9a, 0x37, 0x83, 0x05, 0x1b, 0x1a, 0x02, 0x9a, + 0x43, 0x94, 0x05, 0x16, 0x7b, 0x02, 0x9a, 0x4f, 0x96, 0x05, 0x16, 0x51, + 0x9a, 0x05, 0x16, 0x59, 0x92, 0x05, 0x16, 0x68, 0x92, 0x05, 0x16, 0x19, + 0x9a, 0x05, 0x16, 0x08, 0x96, 0x05, 0x16, 0x21, 0x9a, 0x05, 0x16, 0x29, + 0x92, 0x05, 0x16, 0x39, 0x94, 0x05, 0x16, 0x4b, 0x02, 0x9a, 0x53, 0x91, + 0x05, 0x15, 0xda, 0x02, 0x9a, 0x57, 0x96, 0x05, 0x15, 0x71, 0x9a, 0x05, + 0x15, 0x79, 0x92, 0x05, 0x15, 0x89, 0x87, 0x05, 0x15, 0xa2, 0x02, 0x9a, + 0x5f, 0x96, 0x05, 0x15, 0xb1, 0x9a, 0x05, 0x15, 0xb9, 0x92, 0x05, 0x15, + 0xc8, 0x96, 0x05, 0x15, 0xe9, 0x9a, 0x05, 0x15, 0xf1, 0x92, 0x05, 0x16, + 0x00, 0x9a, 0x05, 0x14, 0xf9, 0x92, 0x05, 0x15, 0x08, 0x92, 0x05, 0x14, + 0xf1, 0x9a, 0x05, 0x14, 0xe1, 0x96, 0x05, 0x14, 0xd8, 0x91, 0x05, 0x14, + 0xcb, 0x02, 0x9a, 0x6b, 0x96, 0x05, 0x15, 0x11, 0x9a, 0x05, 0x15, 0x19, + 0x92, 0x05, 0x15, 0x29, 0x94, 0x05, 0x15, 0x3a, 0x02, 0x9a, 0x73, 0x92, + 0x05, 0x14, 0xb9, 0x9a, 0x05, 0x14, 0xa9, 0x96, 0x05, 0x14, 0xa0, 0x87, + 0x05, 0x14, 0x93, 0x02, 0x9a, 0x77, 0x92, 0x05, 0x14, 0x81, 0x9a, 0x05, + 0x14, 0x71, 0x96, 0x05, 0x14, 0x68, 0x91, 0x05, 0x16, 0xeb, 0x02, 0x9a, + 0x7f, 0x83, 0x05, 0x16, 0xd3, 0x02, 0x9a, 0x87, 0x8b, 0x05, 0x16, 0x93, + 0x02, 0x9a, 0x93, 0x87, 0x05, 0x17, 0x03, 0x02, 0x9a, 0xa6, 0x97, 0x05, + 0x17, 0x1b, 0x02, 0x9a, 0xae, 0x9b, 0x05, 0x17, 0x4a, 0x02, 0x9a, 0xbd, + 0x9b, 0x05, 0x1a, 0xeb, 0x02, 0x9a, 0xd0, 0x97, 0x05, 0x1a, 0xbb, 0x02, + 0x9a, 0xe3, 0x87, 0x05, 0x1a, 0x9b, 0x02, 0x9a, 0xfd, 0x91, 0x05, 0x1a, + 0x7b, 0x02, 0x9b, 0x09, 0x83, 0x05, 0x1a, 0x42, 0x02, 0x9b, 0x15, 0x96, + 0x05, 0x15, 0x41, 0x9a, 0x05, 0x15, 0x49, 0x92, 0x05, 0x15, 0x59, 0x94, + 0x05, 0x15, 0x6a, 0x02, 0x9b, 0x21, 0x92, 0x05, 0x14, 0x61, 0x9a, 0x05, + 0x14, 0x50, 0x92, 0x05, 0x14, 0x49, 0x9a, 0x05, 0x14, 0x38, 0x91, 0x05, + 0x14, 0x2a, 0x02, 0x9b, 0x25, 0x92, 0x05, 0x14, 0x19, 0x9a, 0x05, 0x14, + 0x09, 0x96, 0x05, 0x14, 0x00, 0x92, 0x05, 0x13, 0xf9, 0x9a, 0x05, 0x13, + 0xe8, 0x87, 0x05, 0x12, 0xdb, 0x02, 0x9b, 0x2d, 0x91, 0x05, 0x12, 0xc3, + 0x02, 0x9b, 0x35, 0xc2, 0x01, 0xba, 0x05, 0x12, 0xa3, 0x02, 0x9b, 0x41, + 0x83, 0x05, 0x12, 0x83, 0x02, 0x9b, 0x4d, 0x8b, 0x05, 0x12, 0x42, 0x02, + 0x9b, 0x59, 0x96, 0x05, 0x13, 0x71, 0x87, 0x05, 0x13, 0x82, 0x02, 0x9b, + 0x6c, 0x96, 0x05, 0x13, 0x89, 0x9a, 0x05, 0x13, 0x91, 0x92, 0x05, 0x13, + 0xa0, 0x96, 0x05, 0x13, 0xa9, 0x9a, 0x05, 0x13, 0xb1, 0x92, 0x05, 0x13, + 0xc0, 0x96, 0x05, 0x13, 0xc9, 0x9a, 0x05, 0x13, 0xd1, 0x92, 0x05, 0x13, + 0xe0, 0x8b, 0x05, 0x04, 0x9b, 0x02, 0x9b, 0x70, 0x83, 0x05, 0x04, 0xd3, + 0x02, 0x9b, 0x83, 0x97, 0x05, 0x05, 0x2b, 0x02, 0x9b, 0x8f, 0x91, 0x05, + 0x05, 0x0b, 0x02, 0x9b, 0xa9, 0x9b, 0x05, 0x05, 0x52, 0x02, 0x9b, 0xb5, + 0x8b, 0x05, 0x0b, 0x53, 0x02, 0x9b, 0xc4, 0x83, 0x05, 0x0b, 0x93, 0x02, + 0x9b, 0xd7, 0x17, 0xc2, 0x9b, 0xe3, 0x11, 0xc2, 0x9b, 0xee, 0x87, 0x05, + 0x0b, 0xd2, 0x02, 0x9b, 0xfa, 0x8b, 0x05, 0x0c, 0xcb, 0x02, 0x9c, 0x02, + 0x83, 0x05, 0x0d, 0x03, 0x02, 0x9c, 0x15, 0x97, 0x05, 0x0d, 0x6b, 0x02, + 0x9c, 0x21, 0x91, 0x05, 0x0d, 0x33, 0x02, 0x9c, 0x3b, 0x87, 0x05, 0x0d, + 0x4b, 0x02, 0x9c, 0x43, 0x9b, 0x05, 0x0d, 0x9a, 0x02, 0x9c, 0x4b, 0x87, + 0x05, 0x23, 0xbb, 0x02, 0x9c, 0x5e, 0x92, 0x05, 0x23, 0xa1, 0x9a, 0x05, + 0x23, 0x91, 0x96, 0x05, 0x23, 0x88, 0x91, 0x05, 0x23, 0xdb, 0x02, 0x9c, + 0x6a, 0x96, 0x05, 0x24, 0x09, 0x9a, 0x05, 0x24, 0x11, 0x92, 0x05, 0x24, + 0x21, 0x94, 0x05, 0x24, 0x32, 0x02, 0x9c, 0x76, 0x96, 0x05, 0x23, 0xe9, + 0x9a, 0x05, 0x23, 0xf1, 0x92, 0x05, 0x24, 0x00, 0x96, 0x05, 0x24, 0x39, + 0x9a, 0x05, 0x24, 0x41, 0x92, 0x05, 0x24, 0x51, 0x94, 0x05, 0x24, 0x62, + 0x02, 0x9c, 0x7a, 0x94, 0x05, 0x23, 0x83, 0x02, 0x9c, 0x7e, 0x92, 0x05, + 0x23, 0x71, 0x9a, 0x05, 0x23, 0x61, 0x96, 0x05, 0x23, 0x58, 0x96, 0x05, + 0x22, 0xe9, 0x9a, 0x05, 0x22, 0xf1, 0x92, 0x05, 0x23, 0x01, 0x87, 0x05, + 0x23, 0x1a, 0x02, 0x9c, 0x82, 0x9a, 0x05, 0x23, 0x41, 0x92, 0x05, 0x23, + 0x51, 0x96, 0x05, 0x23, 0x38, 0x9a, 0x05, 0x23, 0x28, 0x97, 0x05, 0x12, + 0x13, 0x02, 0x9c, 0x8e, 0xc2, 0x02, 0x0a, 0x05, 0x11, 0x8b, 0x02, 0x9c, + 0xa8, 0x83, 0x05, 0x11, 0xa3, 0x02, 0x9c, 0xac, 0x91, 0x05, 0x11, 0xdb, + 0x02, 0x9c, 0xb8, 0x87, 0x05, 0x11, 0xf2, 0x02, 0x9c, 0xc4, 0x96, 0x05, + 0x05, 0x71, 0x9a, 0x05, 0x05, 0x79, 0x92, 0x05, 0x05, 0x89, 0x87, 0x05, + 0x05, 0x9a, 0x02, 0x9c, 0xcc, 0x96, 0x05, 0x05, 0xa9, 0x9a, 0x05, 0x05, + 0xb1, 0x92, 0x05, 0x05, 0xc0, 0x91, 0x05, 0x05, 0xdb, 0x02, 0x9c, 0xd4, + 0x96, 0x05, 0x06, 0x19, 0x9a, 0x05, 0x06, 0x21, 0x92, 0x05, 0x06, 0x31, + 0x94, 0x05, 0x06, 0x42, 0x02, 0x9c, 0xe0, 0x96, 0x05, 0x05, 0xe9, 0x9a, + 0x05, 0x05, 0xf1, 0x92, 0x05, 0x06, 0x00, 0x9a, 0x05, 0x06, 0x08, 0x96, + 0x05, 0x06, 0x49, 0x9a, 0x05, 0x06, 0x51, 0x92, 0x05, 0x06, 0x60, 0xcc, + 0x1c, 0x94, 0x05, 0x00, 0xa8, 0x96, 0x05, 0x00, 0x21, 0x9a, 0x05, 0x00, + 0x29, 0x92, 0x05, 0x00, 0x38, 0x96, 0x05, 0x00, 0xb1, 0x9a, 0x05, 0x00, + 0xb9, 0x92, 0x05, 0x00, 0xc9, 0x87, 0x05, 0x00, 0xe2, 0x02, 0x9c, 0xe4, + 0x96, 0x05, 0x00, 0xf1, 0x9a, 0x05, 0x00, 0xf9, 0x92, 0x05, 0x01, 0x08, + 0x91, 0x05, 0x01, 0x1b, 0x02, 0x9c, 0xf0, 0x96, 0x05, 0x01, 0x61, 0x9a, + 0x05, 0x01, 0x69, 0x92, 0x05, 0x01, 0x79, 0x94, 0x05, 0x01, 0x8a, 0x02, + 0x9c, 0xf8, 0x96, 0x05, 0x01, 0x29, 0x9a, 0x05, 0x01, 0x31, 0x92, 0x05, + 0x01, 0x40, 0x9a, 0x05, 0x01, 0x49, 0x92, 0x05, 0x01, 0x58, 0x96, 0x05, + 0x01, 0x91, 0x9a, 0x05, 0x01, 0x99, 0x92, 0x05, 0x01, 0xa9, 0x94, 0x05, + 0x01, 0xba, 0x02, 0x9c, 0xfc, 0x8b, 0x05, 0x02, 0xc3, 0x02, 0x9d, 0x00, + 0x83, 0x05, 0x03, 0x03, 0x02, 0x9d, 0x13, 0x97, 0x05, 0x03, 0x73, 0x02, + 0x9d, 0x1f, 0x91, 0x05, 0x03, 0x3b, 0x02, 0x9d, 0x39, 0x87, 0x05, 0x03, + 0x53, 0x02, 0x9d, 0x45, 0x9b, 0x05, 0x03, 0xa2, 0x02, 0x9d, 0x4d, 0x96, + 0x05, 0x01, 0xc1, 0x9a, 0x05, 0x01, 0xc9, 0x92, 0x05, 0x01, 0xd9, 0x87, + 0x05, 0x01, 0xea, 0x02, 0x9d, 0x59, 0x96, 0x05, 0x01, 0xf9, 0x9a, 0x05, + 0x02, 0x01, 0x92, 0x05, 0x02, 0x10, 0x91, 0x05, 0x02, 0x23, 0x02, 0x9d, + 0x61, 0x96, 0x05, 0x02, 0x51, 0x9a, 0x05, 0x02, 0x59, 0x92, 0x05, 0x02, + 0x69, 0x94, 0x05, 0x02, 0x7a, 0x02, 0x9d, 0x69, 0x96, 0x05, 0x02, 0x31, + 0x9a, 0x05, 0x02, 0x39, 0x92, 0x05, 0x02, 0x48, 0x96, 0x05, 0x02, 0x81, + 0x9a, 0x05, 0x02, 0x89, 0x92, 0x05, 0x02, 0x99, 0x94, 0x05, 0x02, 0xaa, + 0x02, 0x9d, 0x6d, 0x96, 0x05, 0x06, 0x69, 0x9a, 0x05, 0x06, 0x71, 0x92, + 0x05, 0x06, 0x80, 0x96, 0x05, 0x06, 0x89, 0x9a, 0x05, 0x06, 0x91, 0x92, + 0x05, 0x06, 0xa0, 0x9a, 0x05, 0x06, 0xa9, 0x92, 0x05, 0x06, 0xb8, 0x96, + 0x05, 0x06, 0xc1, 0x9a, 0x05, 0x06, 0xc9, 0x92, 0x05, 0x06, 0xd9, 0x94, + 0x05, 0x06, 0xea, 0x02, 0x9d, 0x71, 0x96, 0x05, 0x06, 0xf1, 0x9a, 0x05, + 0x06, 0xf9, 0x92, 0x05, 0x07, 0x08, 0x96, 0x05, 0x07, 0x11, 0x9a, 0x05, + 0x07, 0x19, 0x92, 0x05, 0x07, 0x29, 0x87, 0x05, 0x07, 0x42, 0x02, 0x9d, + 0x75, 0x96, 0x05, 0x07, 0x51, 0x9a, 0x05, 0x07, 0x59, 0x92, 0x05, 0x07, + 0x68, 0x96, 0x05, 0x07, 0x71, 0x9a, 0x05, 0x07, 0x79, 0x92, 0x05, 0x07, + 0x88, 0x9a, 0x05, 0x07, 0x91, 0x92, 0x05, 0x07, 0x98, 0x96, 0x05, 0x07, + 0xa1, 0x9a, 0x05, 0x07, 0xa9, 0x92, 0x05, 0x07, 0xb9, 0x94, 0x05, 0x07, + 0xca, 0x02, 0x9d, 0x81, 0x96, 0x05, 0x07, 0xd1, 0x9a, 0x05, 0x07, 0xd9, + 0x92, 0x05, 0x07, 0xe9, 0x94, 0x05, 0x07, 0xfa, 0x02, 0x9d, 0x85, 0x96, + 0x05, 0x08, 0x01, 0x9a, 0x05, 0x08, 0x09, 0x92, 0x05, 0x08, 0x19, 0x87, + 0x05, 0x08, 0x2a, 0x02, 0x9d, 0x89, 0x96, 0x05, 0x08, 0x39, 0x9a, 0x05, + 0x08, 0x41, 0x92, 0x05, 0x08, 0x50, 0x91, 0x05, 0x08, 0x63, 0x02, 0x9d, + 0x91, 0x96, 0x05, 0x08, 0xa1, 0x9a, 0x05, 0x08, 0xa9, 0x92, 0x05, 0x08, + 0xb9, 0x94, 0x05, 0x08, 0xca, 0x02, 0x9d, 0x95, 0x96, 0x05, 0x08, 0x69, + 0x9a, 0x05, 0x08, 0x71, 0x92, 0x05, 0x08, 0x80, 0x9a, 0x05, 0x08, 0x89, + 0x92, 0x05, 0x08, 0x98, 0x8b, 0x05, 0x09, 0xc3, 0x02, 0x9d, 0x99, 0x83, + 0x05, 0x09, 0xfb, 0x02, 0x9d, 0xac, 0x97, 0x05, 0x0a, 0x6b, 0x02, 0x9d, + 0xb8, 0x91, 0x05, 0x0a, 0x33, 0x02, 0x9d, 0xd2, 0x87, 0x05, 0x0a, 0x4a, + 0x02, 0x9d, 0xde, 0x96, 0x05, 0x08, 0xd1, 0x9a, 0x05, 0x08, 0xd9, 0x92, + 0x05, 0x08, 0xe9, 0x87, 0x05, 0x08, 0xfa, 0x02, 0x9d, 0xe6, 0x96, 0x05, + 0x09, 0x09, 0x9a, 0x05, 0x09, 0x11, 0x92, 0x05, 0x09, 0x20, 0x91, 0x05, + 0x09, 0x3b, 0x02, 0x9d, 0xee, 0x96, 0x05, 0x09, 0x81, 0x9a, 0x05, 0x09, + 0x89, 0x92, 0x05, 0x09, 0x99, 0x94, 0x05, 0x09, 0xaa, 0x02, 0x9d, 0xfa, + 0x96, 0x05, 0x09, 0x49, 0x9a, 0x05, 0x09, 0x51, 0x92, 0x05, 0x09, 0x60, + 0x9a, 0x05, 0x09, 0x69, 0x92, 0x05, 0x09, 0x78, 0x96, 0x05, 0x0d, 0xb9, + 0x9a, 0x05, 0x0d, 0xc1, 0x92, 0x05, 0x0d, 0xd1, 0x87, 0x05, 0x0d, 0xea, + 0x02, 0x9d, 0xfe, 0x96, 0x05, 0x0d, 0xf9, 0x9a, 0x05, 0x0e, 0x01, 0x92, + 0x05, 0x0e, 0x10, 0x91, 0x05, 0x0e, 0x2b, 0x02, 0x9e, 0x0a, 0x96, 0x05, + 0x0e, 0x71, 0x9a, 0x05, 0x0e, 0x79, 0x92, 0x05, 0x0e, 0x89, 0x94, 0x05, + 0x0e, 0x9a, 0x02, 0x9e, 0x16, 0x96, 0x05, 0x0e, 0x39, 0x9a, 0x05, 0x0e, + 0x41, 0x92, 0x05, 0x0e, 0x50, 0x9a, 0x05, 0x0e, 0x59, 0x92, 0x05, 0x0e, + 0x68, 0x96, 0x05, 0x0e, 0xa1, 0x9a, 0x05, 0x0e, 0xa9, 0x92, 0x05, 0x0e, + 0xb9, 0x94, 0x05, 0x0e, 0xca, 0x02, 0x9e, 0x1a, 0x96, 0x05, 0x0e, 0xd1, + 0x9a, 0x05, 0x0e, 0xd9, 0x92, 0x05, 0x0e, 0xe9, 0x87, 0x05, 0x0f, 0x02, + 0x02, 0x9e, 0x1e, 0x96, 0x05, 0x0f, 0x11, 0x9a, 0x05, 0x0f, 0x19, 0x92, + 0x05, 0x0f, 0x28, 0x91, 0x05, 0x0f, 0x43, 0x02, 0x9e, 0x2a, 0x96, 0x05, + 0x0f, 0x91, 0x9a, 0x05, 0x0f, 0x99, 0x92, 0x05, 0x0f, 0xa9, 0x94, 0x05, + 0x0f, 0xba, 0x02, 0x9e, 0x36, 0x96, 0x05, 0x0f, 0x51, 0x9a, 0x05, 0x0f, + 0x59, 0x92, 0x05, 0x0f, 0x68, 0x96, 0x05, 0x0f, 0x71, 0x9a, 0x05, 0x0f, + 0x79, 0x92, 0x05, 0x0f, 0x88, 0x8b, 0x05, 0x10, 0xb3, 0x02, 0x9e, 0x3a, + 0x83, 0x05, 0x10, 0xe3, 0x02, 0x9e, 0x49, 0x97, 0x05, 0x11, 0x63, 0x02, + 0x9e, 0x55, 0x91, 0x05, 0x11, 0x23, 0x02, 0x9e, 0x6f, 0x87, 0x05, 0x11, + 0x42, 0x02, 0x9e, 0x7b, 0x96, 0x05, 0x0f, 0xc1, 0x9a, 0x05, 0x0f, 0xc9, + 0x92, 0x05, 0x0f, 0xd9, 0x87, 0x05, 0x0f, 0xea, 0x02, 0x9e, 0x87, 0x96, + 0x05, 0x0f, 0xf9, 0x9a, 0x05, 0x10, 0x01, 0x92, 0x05, 0x10, 0x10, 0x91, + 0x05, 0x10, 0x23, 0x02, 0x9e, 0x8f, 0x96, 0x05, 0x10, 0x71, 0x9a, 0x05, + 0x10, 0x79, 0x92, 0x05, 0x10, 0x89, 0x94, 0x05, 0x10, 0x9a, 0x02, 0x9e, + 0x97, 0x96, 0x05, 0x10, 0x31, 0x9a, 0x05, 0x10, 0x39, 0x92, 0x05, 0x10, + 0x48, 0x96, 0x05, 0x10, 0x51, 0x9a, 0x05, 0x10, 0x59, 0x92, 0x05, 0x10, + 0x68, 0x87, 0x05, 0x25, 0xd8, 0xc2, 0x00, 0x7e, 0x05, 0x24, 0x99, 0xc2, + 0x00, 0x11, 0x05, 0x25, 0x38, 0x92, 0x05, 0x24, 0xa1, 0x96, 0x05, 0x25, + 0x18, 0x9b, 0x05, 0x25, 0x81, 0xc2, 0x00, 0x33, 0x05, 0x25, 0xd1, 0xc2, + 0x00, 0xfe, 0x05, 0x26, 0x01, 0xc2, 0x00, 0x11, 0x05, 0x26, 0x10, 0xc2, + 0x00, 0x11, 0x05, 0x24, 0xb1, 0xc2, 0x01, 0xba, 0x05, 0x25, 0x30, 0xc2, + 0x00, 0x8d, 0x05, 0x24, 0xc9, 0xc2, 0x01, 0xba, 0x05, 0x24, 0xf9, 0xc2, + 0x00, 0x11, 0x05, 0x25, 0xf8, 0x92, 0x05, 0x25, 0x11, 0x94, 0x05, 0x26, + 0x08, 0xc2, 0x00, 0xa4, 0x05, 0x25, 0x51, 0x9b, 0x05, 0x25, 0xa9, 0xc2, + 0x02, 0x0a, 0x05, 0x25, 0xb8, 0x8e, 0x08, 0x74, 0x60, 0xc3, 0x32, 0xce, + 0x08, 0x74, 0x41, 0xc2, 0x03, 0x4e, 0x08, 0x74, 0x38, 0x44, 0xe1, 0x77, + 0x42, 0x9e, 0x9b, 0x8b, 0x00, 0xa7, 0x70, 0x91, 0x00, 0xa8, 0xeb, 0x02, + 0x9e, 0xb9, 0x83, 0x00, 0xa9, 0x0b, 0x02, 0x9e, 0xc1, 0x8b, 0x00, 0xa8, + 0xcb, 0x02, 0x9e, 0xc5, 0x87, 0x00, 0xa8, 0xb8, 0x9b, 0x00, 0xc6, 0x09, + 0x83, 0x00, 0xa8, 0xb0, 0x9b, 0x00, 0xc6, 0x01, 0x91, 0x00, 0xa8, 0xa0, + 0x8b, 0x00, 0xa8, 0x90, 0xc2, 0x16, 0x1c, 0x00, 0xa4, 0x29, 0xc2, 0x14, + 0x77, 0x00, 0xa4, 0x31, 0xc2, 0x38, 0x2a, 0x00, 0xa4, 0x39, 0xc2, 0x02, + 0x98, 0x00, 0xa4, 0x40, 0x83, 0x00, 0xa8, 0x10, 0x8b, 0x00, 0xa7, 0xd0, + 0x91, 0x00, 0xa7, 0xf0, 0x43, 0x67, 0xcd, 0xc2, 0x9e, 0xc9, 0x0a, 0x42, + 0x9e, 0xde, 0xc4, 0xdf, 0x5b, 0x00, 0xa9, 0xe9, 0x19, 0xc2, 0x9e, 0xf3, + 0x15, 0xc2, 0x9e, 0xff, 0xc4, 0xe0, 0xc3, 0x00, 0xa4, 0x11, 0xc4, 0xe3, + 0x43, 0x00, 0xa5, 0x01, 0xc4, 0xda, 0xeb, 0x00, 0xa5, 0xd1, 0xc4, 0xe4, + 0x67, 0x00, 0xa6, 0x79, 0xc4, 0xde, 0xb6, 0x00, 0xa3, 0x28, 0x8b, 0x00, + 0xa6, 0x08, 0x91, 0x00, 0xc6, 0x60, 0x8b, 0x00, 0xc6, 0x40, 0x83, 0x00, + 0xa6, 0x68, 0x83, 0x00, 0xb3, 0xb0, 0x91, 0x00, 0xb3, 0xa0, 0x8b, 0x00, + 0xb3, 0x90, 0x8b, 0x00, 0xb3, 0x81, 0x83, 0x00, 0xac, 0xa2, 0x02, 0x9f, + 0x26, 0x91, 0x00, 0xac, 0x90, 0x8b, 0x00, 0xac, 0x80, 0x83, 0x00, 0xab, + 0xcb, 0x02, 0x9f, 0x2a, 0x91, 0x00, 0xab, 0xbb, 0x02, 0x9f, 0x2e, 0x8b, + 0x00, 0xab, 0xab, 0x02, 0x9f, 0x32, 0x87, 0x00, 0xab, 0xa0, 0x8b, 0x00, + 0xab, 0x18, 0x06, 0xc2, 0x9f, 0x36, 0x0c, 0xc2, 0x9f, 0x46, 0x09, 0xc2, + 0x9f, 0x67, 0x16, 0xc2, 0x9f, 0x89, 0x42, 0x11, 0xee, 0xc2, 0x9f, 0x99, + 0x1b, 0xc2, 0x9f, 0xb0, 0x0f, 0xc2, 0x9f, 0xc7, 0x10, 0xc2, 0x9f, 0xde, + 0x0d, 0xc2, 0x9f, 0xf9, 0x92, 0x00, 0xaf, 0x73, 0x02, 0xa0, 0x04, 0x8a, + 0x00, 0xa2, 0x5b, 0x02, 0xa0, 0x1b, 0x19, 0xc2, 0xa0, 0x29, 0x14, 0xc2, + 0xa0, 0x40, 0x0e, 0xc2, 0xa0, 0x57, 0xc2, 0x02, 0xe0, 0x00, 0xa0, 0x41, + 0x8b, 0x00, 0xa0, 0x4b, 0x02, 0xa0, 0x72, 0x9c, 0x00, 0xb2, 0x33, 0x02, + 0xa0, 0x78, 0x15, 0x42, 0xa0, 0x8f, 0x8b, 0x00, 0xa4, 0x50, 0x91, 0x00, + 0xa4, 0xd0, 0x8b, 0x00, 0xa4, 0xb0, 0x83, 0x00, 0xa4, 0xf0, 0x83, 0x00, + 0xad, 0xb9, 0x91, 0x00, 0xad, 0xb1, 0x8b, 0x00, 0xad, 0xa9, 0x87, 0x00, + 0xad, 0xa0, 0x83, 0x00, 0xad, 0xf9, 0x91, 0x00, 0xad, 0xf1, 0x8b, 0x00, + 0xad, 0xe9, 0x87, 0x00, 0xad, 0xe0, 0x83, 0x00, 0xad, 0xd9, 0x91, 0x00, + 0xad, 0xd1, 0x8b, 0x00, 0xad, 0xc9, 0x87, 0x00, 0xad, 0xc0, 0x91, 0x00, + 0xc7, 0x48, 0x83, 0x00, 0xab, 0x73, 0x02, 0xa0, 0xad, 0x91, 0x00, 0xab, + 0x6b, 0x02, 0xa0, 0xb1, 0xc2, 0x00, 0x28, 0x00, 0xc7, 0x29, 0x8b, 0x00, + 0xab, 0x61, 0x87, 0x00, 0xab, 0x58, 0x83, 0x00, 0xc7, 0x23, 0x02, 0xa0, + 0xb5, 0x87, 0x00, 0xc7, 0x18, 0x83, 0x00, 0xad, 0x63, 0x02, 0xa0, 0xb9, + 0x91, 0x00, 0xad, 0x53, 0x02, 0xa0, 0xbd, 0x8b, 0x00, 0xad, 0x43, 0x02, + 0xa0, 0xc1, 0x87, 0x00, 0xad, 0x38, 0x83, 0x00, 0xab, 0x38, 0x91, 0x00, + 0xab, 0x28, 0x8b, 0x00, 0xab, 0x10, 0x8b, 0x00, 0xa2, 0x68, 0x91, 0x00, + 0xa2, 0xf8, 0x8b, 0x00, 0xa2, 0xd8, 0x83, 0x00, 0xa3, 0x18, 0x46, 0x92, + 0x9a, 0xc2, 0xa0, 0xc5, 0xc5, 0xbc, 0x9d, 0x00, 0xc6, 0xe8, 0x48, 0xba, + 0x1a, 0x42, 0xa1, 0x0c, 0x83, 0x00, 0xaa, 0x70, 0x91, 0x00, 0xc6, 0x90, + 0x8b, 0x00, 0xc6, 0x80, 0x8b, 0x00, 0xaa, 0x28, 0x14, 0xc2, 0xa1, 0x1b, + 0x15, 0xc2, 0xa1, 0x25, 0xc5, 0x31, 0xee, 0x00, 0xa0, 0xf9, 0xc5, 0x1f, + 0x0c, 0x00, 0xa1, 0x01, 0xd0, 0x58, 0x02, 0x00, 0xa1, 0x09, 0xcd, 0x7f, + 0x3f, 0x00, 0xa1, 0x11, 0x42, 0x00, 0x58, 0xc2, 0xa1, 0x31, 0xca, 0x3b, + 0x06, 0x00, 0xa1, 0x39, 0xc4, 0x25, 0xd5, 0x00, 0xa1, 0x48, 0x8b, 0x00, + 0xaa, 0xa0, 0x8a, 0x00, 0xc6, 0xd8, 0x19, 0x42, 0xa1, 0x3d, 0x8b, 0x00, + 0xa9, 0x38, 0x83, 0x00, 0xa9, 0xd8, 0x91, 0x00, 0xa9, 0xb8, 0x8b, 0x00, + 0xa9, 0x98, 0xc3, 0x14, 0x72, 0x00, 0xa2, 0x41, 0xc2, 0x01, 0x24, 0x00, + 0xa1, 0xa8, 0x8b, 0x00, 0xa6, 0xa0, 0x83, 0x00, 0xad, 0x28, 0x91, 0x00, + 0xad, 0x18, 0x8b, 0x00, 0xad, 0x08, 0x8b, 0x00, 0xa7, 0x00, 0x91, 0x00, + 0xa7, 0x20, 0x83, 0x00, 0xa7, 0x40, 0x8b, 0x00, 0xa5, 0x20, 0x94, 0x00, + 0xaa, 0x91, 0x8e, 0x00, 0xa7, 0x60, 0xca, 0xa5, 0x8a, 0x00, 0xa8, 0x48, + 0x8b, 0x00, 0xa5, 0x80, 0x91, 0x00, 0xa5, 0xa0, 0x83, 0x00, 0xa5, 0xc0, + 0x9b, 0x00, 0xc5, 0xc9, 0x83, 0x00, 0xa4, 0x00, 0x8b, 0x00, 0xa3, 0xc0, + 0x91, 0x00, 0xa3, 0xe0, 0x8b, 0x00, 0xa3, 0x60, 0x9b, 0x00, 0xc5, 0xb1, + 0x91, 0x00, 0xa2, 0x10, 0x83, 0x00, 0xa2, 0x30, 0x8b, 0x00, 0xa1, 0xf0, + 0x8b, 0x00, 0xa1, 0x80, 0x8b, 0x00, 0xab, 0xf0, 0x97, 0x08, 0x15, 0xd9, + 0x9f, 0x08, 0x16, 0x41, 0xa0, 0x08, 0x16, 0x80, 0xc3, 0x4b, 0x13, 0x08, + 0x2a, 0x79, 0xc2, 0x0c, 0x42, 0x08, 0x2a, 0xa8, 0xc2, 0x00, 0x71, 0x08, + 0x29, 0xb9, 0x83, 0x08, 0x29, 0xd8, 0x83, 0x08, 0x29, 0xcb, 0x02, 0xa1, + 0x4b, 0xc2, 0x69, 0xa6, 0x08, 0x2a, 0x49, 0x8b, 0x08, 0x2a, 0x50, 0x94, + 0x08, 0x2a, 0x11, 0xc2, 0x17, 0xb6, 0x08, 0x2b, 0x00, 0x9b, 0x08, 0x2a, + 0x59, 0x99, 0x08, 0x2a, 0xf8, 0x83, 0x08, 0x29, 0xeb, 0x02, 0xa1, 0x4f, + 0xc2, 0x69, 0xa6, 0x08, 0x2a, 0xe8, 0xc2, 0x02, 0xa0, 0x01, 0x74, 0x19, + 0xc4, 0x02, 0xde, 0x01, 0x74, 0x20, 0xce, 0x70, 0x88, 0x01, 0x75, 0x31, + 0xc3, 0x00, 0xbf, 0x01, 0x76, 0x30, 0xc3, 0xac, 0xc1, 0x01, 0x76, 0x61, + 0xc4, 0x8e, 0x34, 0x01, 0x77, 0x40, 0x89, 0x01, 0x8f, 0x08, 0x83, 0x05, + 0x5b, 0xb1, 0x87, 0x05, 0x5b, 0xc1, 0x8b, 0x05, 0x5b, 0xc9, 0x91, 0x05, + 0x5b, 0xd1, 0x97, 0x05, 0x5b, 0xd9, 0x98, 0x05, 0x5b, 0xe0, 0x83, 0x05, + 0x5d, 0xf9, 0x87, 0x00, 0x9f, 0xc1, 0x8b, 0x00, 0x9f, 0xc9, 0x91, 0x00, + 0x9f, 0xd1, 0x97, 0x00, 0x9f, 0xd9, 0x98, 0x00, 0x9f, 0xe0, 0x98, 0x05, + 0x5d, 0xf1, 0x97, 0x05, 0x5d, 0xe9, 0x91, 0x05, 0x5d, 0xe1, 0x8b, 0x05, + 0x5d, 0xd9, 0x87, 0x05, 0x5d, 0xd1, 0x83, 0x05, 0x5d, 0xc8, 0x15, 0xc2, + 0xa1, 0x53, 0x0e, 0xc2, 0xa1, 0x6b, 0x83, 0x05, 0x5d, 0x21, 0x8b, 0x05, + 0x5d, 0x41, 0x87, 0x05, 0x5d, 0x30, 0x91, 0x05, 0x5c, 0x99, 0x8b, 0x05, + 0x5c, 0x91, 0x87, 0x05, 0x5c, 0x89, 0x83, 0x05, 0x5c, 0x73, 0x02, 0xa1, + 0x83, 0x97, 0x05, 0x5c, 0xa1, 0x98, 0x05, 0x5c, 0xa8, 0xc2, 0x00, 0xc1, + 0x05, 0x5c, 0x79, 0x83, 0x05, 0x5b, 0xe9, 0x87, 0x05, 0x5b, 0xf1, 0x8b, + 0x05, 0x5b, 0xf9, 0x91, 0x05, 0x5c, 0x01, 0x97, 0x05, 0x5c, 0x09, 0x98, + 0x05, 0x5c, 0x10, 0x97, 0x05, 0x5c, 0x69, 0x91, 0x05, 0x5c, 0x61, 0x8b, + 0x05, 0x5c, 0x59, 0x87, 0x05, 0x5c, 0x51, 0x83, 0x05, 0x5c, 0x49, 0x98, + 0x00, 0x9f, 0xe8, 0x98, 0x05, 0x5c, 0x41, 0x97, 0x05, 0x5c, 0x39, 0x91, + 0x05, 0x5c, 0x31, 0x8b, 0x05, 0x5c, 0x29, 0x87, 0x05, 0x5c, 0x21, 0x83, + 0x05, 0x5c, 0x18, 0x83, 0x05, 0x5c, 0xb1, 0x87, 0x05, 0x5c, 0xb9, 0x8b, + 0x05, 0x5c, 0xc1, 0x91, 0x05, 0x5c, 0xc9, 0x97, 0x05, 0x5c, 0xd1, 0x98, + 0x05, 0x5c, 0xd8, 0x83, 0x05, 0x5c, 0xe1, 0x87, 0x05, 0x5c, 0xf1, 0x8b, + 0x05, 0x5c, 0xf9, 0x91, 0x05, 0x5d, 0x01, 0x97, 0x05, 0x5d, 0x09, 0x98, + 0x05, 0x5d, 0x10, 0x83, 0x05, 0x5d, 0x19, 0x87, 0x05, 0x5d, 0x29, 0x8b, + 0x05, 0x5d, 0x39, 0x91, 0x05, 0x5d, 0x49, 0x97, 0x05, 0x5d, 0x51, 0x98, + 0x05, 0x5d, 0x59, 0xc2, 0x00, 0xdb, 0x05, 0x5d, 0x60, 0x83, 0x00, 0x9d, + 0x31, 0x87, 0x00, 0x9d, 0x41, 0x8b, 0x00, 0x9d, 0x49, 0x91, 0x00, 0x9d, + 0x51, 0x97, 0x00, 0x9d, 0x59, 0x98, 0x00, 0x9d, 0x60, 0x83, 0x00, 0x9d, + 0x69, 0x87, 0x00, 0x9d, 0x71, 0x8b, 0x00, 0x9d, 0x79, 0x91, 0x00, 0x9d, + 0x81, 0x97, 0x00, 0x9d, 0x89, 0x98, 0x00, 0x9d, 0x91, 0xc2, 0x00, 0xc1, + 0x00, 0x9d, 0xf8, 0x83, 0x00, 0x9d, 0x99, 0x87, 0x00, 0x9d, 0xa1, 0x8b, + 0x00, 0x9d, 0xa9, 0x91, 0x00, 0x9d, 0xb1, 0x97, 0x00, 0x9d, 0xb9, 0x98, + 0x00, 0x9d, 0xc0, 0x83, 0x00, 0x9d, 0xc9, 0x87, 0x00, 0x9d, 0xd1, 0x8b, + 0x00, 0x9d, 0xd9, 0x91, 0x00, 0x9d, 0xe1, 0x97, 0x00, 0x9d, 0xe9, 0x98, + 0x00, 0x9f, 0xa8, 0x83, 0x00, 0x9d, 0xf3, 0x02, 0xa1, 0x87, 0x87, 0x00, + 0x9e, 0x09, 0x8b, 0x00, 0x9e, 0x11, 0x91, 0x00, 0x9e, 0x19, 0x97, 0x00, + 0x9e, 0x21, 0x98, 0x00, 0x9e, 0x28, 0x83, 0x00, 0x9e, 0x31, 0x87, 0x00, + 0x9e, 0x39, 0x8b, 0x00, 0x9e, 0x41, 0x91, 0x00, 0x9e, 0x49, 0x97, 0x00, + 0x9e, 0x51, 0x98, 0x00, 0x9e, 0x58, 0x83, 0x00, 0x9e, 0x61, 0x87, 0x00, + 0x9e, 0x71, 0x8b, 0x00, 0x9e, 0x79, 0x91, 0x00, 0x9e, 0x81, 0x97, 0x00, + 0x9e, 0x89, 0x98, 0x00, 0x9e, 0x90, 0x83, 0x00, 0x9e, 0x99, 0x87, 0x00, + 0x9e, 0xa9, 0x8b, 0x00, 0x9e, 0xb9, 0x91, 0x00, 0x9e, 0xc9, 0x97, 0x00, + 0x9e, 0xd1, 0x98, 0x00, 0x9e, 0xd9, 0xc2, 0x00, 0xdb, 0x00, 0x9e, 0xe0, + 0x83, 0x00, 0x9e, 0xa1, 0x87, 0x00, 0x9e, 0xb1, 0x8b, 0x00, 0x9e, 0xc1, + 0x0e, 0xc2, 0xa1, 0x8b, 0x15, 0x42, 0xa1, 0xa3, 0x83, 0x00, 0x9f, 0x49, + 0x87, 0x00, 0x9f, 0x51, 0x8b, 0x00, 0x9f, 0x59, 0x91, 0x00, 0x9f, 0x61, + 0x97, 0x00, 0x9f, 0x69, 0x98, 0x00, 0x9f, 0x70, 0x83, 0x00, 0x9f, 0x79, + 0x87, 0x00, 0x9f, 0x81, 0x8b, 0x00, 0x9f, 0x89, 0x91, 0x00, 0x9f, 0x91, + 0x97, 0x00, 0x9f, 0x99, 0x98, 0x00, 0x9f, 0xa0, 0xc3, 0x0e, 0xa7, 0x00, + 0x04, 0x41, 0xd2, 0x49, 0x55, 0x00, 0x04, 0x48, 0xc3, 0x39, 0x6e, 0x08, + 0x88, 0xa1, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0x98, 0xc3, 0x39, 0x6e, 0x08, + 0x88, 0x91, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0x88, 0x8b, 0x08, 0x8a, 0x30, + 0x83, 0x08, 0x8a, 0x29, 0x97, 0x08, 0x89, 0x79, 0x8b, 0x08, 0x89, 0x68, + 0x8b, 0x08, 0x89, 0x80, 0x97, 0x08, 0x89, 0x58, 0x8b, 0x08, 0x89, 0x48, + 0xc4, 0x18, 0x10, 0x08, 0x89, 0xe9, 0xc2, 0x22, 0xcc, 0x08, 0x89, 0xe0, + 0xc3, 0x0d, 0x14, 0x08, 0x89, 0xd9, 0xc3, 0x09, 0x9e, 0x08, 0x89, 0xd0, + 0xc4, 0x02, 0xde, 0x08, 0x89, 0xc9, 0xc2, 0x02, 0xa0, 0x08, 0x89, 0xc0, + 0xc2, 0x0f, 0xe1, 0x05, 0x50, 0x51, 0x83, 0x05, 0x50, 0x58, 0xc2, 0x25, + 0x3b, 0x05, 0x50, 0x91, 0x83, 0x05, 0x50, 0x89, 0xc2, 0x0f, 0xe1, 0x05, + 0x50, 0x80, 0x89, 0x05, 0x52, 0x10, 0xc4, 0x18, 0x12, 0x08, 0x7e, 0x51, + 0x91, 0x08, 0x7e, 0x30, 0xd7, 0x27, 0x74, 0x0f, 0xaa, 0x08, 0xce, 0x74, + 0x24, 0x01, 0x72, 0x81, 0xcd, 0x79, 0x5b, 0x01, 0x72, 0x88, 0xc3, 0x02, + 0x44, 0x0f, 0x01, 0x51, 0xc4, 0xac, 0x24, 0x0f, 0x00, 0xb8, 0x47, 0x1c, + 0xa0, 0xc2, 0xa1, 0xbb, 0xcb, 0x98, 0x00, 0x0f, 0x00, 0x51, 0xc3, 0x78, + 0xde, 0x0f, 0x00, 0x48, 0xc6, 0xc8, 0x01, 0x0f, 0x01, 0x41, 0xc3, 0xc8, + 0x92, 0x0f, 0x00, 0x08, 0x91, 0x0f, 0x01, 0x31, 0x97, 0x0f, 0x01, 0x19, + 0xc3, 0x01, 0xbd, 0x0f, 0x01, 0x09, 0x07, 0x42, 0xa1, 0xc7, 0xc8, 0xae, + 0x6b, 0x0f, 0x01, 0x21, 0x0a, 0xc2, 0xa1, 0xd1, 0xc4, 0xe4, 0xc7, 0x0f, + 0x00, 0xa0, 0xc2, 0x00, 0xba, 0x0f, 0x00, 0xe1, 0xc5, 0xd9, 0x52, 0x0f, + 0x00, 0xa8, 0xc5, 0xdd, 0xb2, 0x0f, 0x00, 0x61, 0xc4, 0xe4, 0x2b, 0x0f, + 0x00, 0x20, 0xc5, 0xda, 0x24, 0x0f, 0x00, 0x41, 0xc6, 0xd3, 0x73, 0x0f, + 0x00, 0x30, 0x48, 0x23, 0x26, 0xc2, 0xa1, 0xdb, 0xcb, 0x94, 0x90, 0x00, + 0x1a, 0x11, 0xc7, 0xc8, 0xd9, 0x00, 0x1a, 0x19, 0xcf, 0x63, 0xd2, 0x00, + 0x1a, 0x21, 0xcd, 0x4a, 0x68, 0x00, 0x1a, 0x28, 0x45, 0xda, 0x51, 0xc2, + 0xa1, 0xe5, 0x42, 0x00, 0x5f, 0xc2, 0xa1, 0xf1, 0xcc, 0x85, 0x59, 0x00, + 0x1a, 0x78, 0xcc, 0x89, 0x25, 0x01, 0x06, 0xd1, 0xcb, 0x02, 0x5c, 0x01, + 0x06, 0xa0, 0xcb, 0x8e, 0xe4, 0x00, 0xee, 0x49, 0xc6, 0x60, 0xb1, 0x00, + 0xee, 0x38, 0xc6, 0x09, 0x01, 0x00, 0x18, 0x0b, 0x02, 0xa1, 0xf9, 0xc9, + 0x2b, 0x5f, 0x00, 0x1a, 0x08, 0x00, 0xc2, 0xa1, 0xff, 0x19, 0x42, 0xa2, + 0x17, 0xc7, 0x20, 0x88, 0x01, 0x06, 0xc1, 0xc5, 0x00, 0xd4, 0x00, 0x18, + 0x51, 0xc5, 0x05, 0x02, 0x00, 0x19, 0x28, 0xd0, 0x2c, 0x60, 0x01, 0x07, + 0x29, 0xcd, 0x52, 0x59, 0x00, 0x18, 0xa0, 0x03, 0xc2, 0xa2, 0x1d, 0x4c, + 0x02, 0x56, 0xc2, 0xa2, 0x29, 0x42, 0x00, 0xd0, 0xc2, 0xa2, 0x35, 0x4c, + 0x1a, 0x50, 0xc2, 0xa2, 0x41, 0xca, 0x9a, 0x3d, 0x00, 0x18, 0xc0, 0xdb, + 0x0b, 0x6c, 0x01, 0x07, 0x69, 0xcd, 0x7a, 0x38, 0x01, 0x07, 0x50, 0xd6, + 0x2c, 0x5a, 0x01, 0x07, 0x59, 0xd5, 0x36, 0x86, 0x01, 0x06, 0x91, 0x15, + 0x42, 0xa2, 0x4d, 0x97, 0x00, 0x1b, 0x3b, 0x02, 0xa2, 0x59, 0x91, 0x00, + 0x1b, 0x33, 0x02, 0xa2, 0x5f, 0x83, 0x00, 0x1b, 0x1b, 0x02, 0xa2, 0x65, + 0x99, 0x00, 0xef, 0x8b, 0x02, 0xa2, 0x7d, 0x87, 0x00, 0x1b, 0x23, 0x02, + 0xa2, 0x83, 0x92, 0x00, 0xef, 0x71, 0x8e, 0x00, 0xee, 0xeb, 0x02, 0xa2, + 0x8f, 0x88, 0x00, 0xef, 0x5b, 0x02, 0xa2, 0x9b, 0x95, 0x00, 0xef, 0x23, + 0x02, 0xa2, 0xa1, 0x84, 0x00, 0xef, 0x43, 0x02, 0xa2, 0xa7, 0x9c, 0x00, + 0xef, 0x31, 0x94, 0x00, 0x1b, 0x63, 0x02, 0xa2, 0xad, 0x90, 0x00, 0xef, + 0x01, 0x8d, 0x00, 0xee, 0xe1, 0x89, 0x00, 0xee, 0xd1, 0x8b, 0x00, 0x1b, + 0x2b, 0x02, 0xa2, 0xb1, 0x85, 0x00, 0x1b, 0x43, 0x02, 0xa2, 0xb7, 0x96, + 0x00, 0x1b, 0x6b, 0x02, 0xa2, 0xbd, 0x86, 0x00, 0x1b, 0x49, 0x8a, 0x00, + 0x1b, 0x51, 0x8f, 0x00, 0x1b, 0x59, 0x98, 0x00, 0x1b, 0x71, 0x9a, 0x00, + 0x1b, 0x78, 0x94, 0x00, 0xef, 0x11, 0x90, 0x00, 0xef, 0x09, 0x8f, 0x00, + 0xee, 0xf9, 0x8e, 0x00, 0xee, 0xf1, 0x89, 0x00, 0xee, 0xd8, 0xc9, 0x0f, + 0x6e, 0x07, 0xf1, 0x03, 0x02, 0xa2, 0xc3, 0xca, 0x09, 0xb7, 0x07, 0xf1, + 0x0a, 0x02, 0xa2, 0xc9, 0xc5, 0x05, 0x02, 0x00, 0x19, 0x81, 0xc7, 0x20, + 0x88, 0x00, 0x19, 0xa1, 0xcf, 0x66, 0x57, 0x07, 0xf1, 0x49, 0xd0, 0x5d, + 0x42, 0x07, 0xf1, 0x50, 0x00, 0xc2, 0xa2, 0xcf, 0xd3, 0x41, 0x5e, 0x00, + 0xd5, 0x80, 0x00, 0xc2, 0xa3, 0x1f, 0x44, 0x00, 0xde, 0x42, 0xa3, 0x31, + 0xcb, 0x03, 0xbc, 0x00, 0xd5, 0x99, 0xcb, 0x9a, 0x3c, 0x00, 0x18, 0xf0, + 0xcd, 0x7a, 0x79, 0x05, 0x47, 0x89, 0x47, 0x02, 0x0e, 0xc2, 0xa3, 0x3d, + 0x46, 0x09, 0x97, 0x42, 0xa3, 0x63, 0xc5, 0x50, 0xb1, 0x01, 0x07, 0x11, + 0xc5, 0x0b, 0x0a, 0x01, 0x06, 0xf0, 0xca, 0x02, 0xfd, 0x01, 0x07, 0x00, + 0xce, 0x74, 0xb0, 0x00, 0x24, 0x41, 0xcd, 0x33, 0xee, 0x05, 0x33, 0x88, + 0xc7, 0xc8, 0xee, 0x00, 0x24, 0x39, 0xcd, 0x7a, 0x04, 0x00, 0x24, 0x31, + 0x03, 0x42, 0xa3, 0x87, 0xc4, 0x90, 0x77, 0x00, 0x24, 0x1b, 0x02, 0xa3, + 0x93, 0xd0, 0x5c, 0xc2, 0x05, 0x33, 0x81, 0xd5, 0x33, 0xe6, 0x05, 0x33, + 0x90, 0x07, 0xc2, 0xa3, 0x97, 0x8b, 0x05, 0x33, 0xab, 0x02, 0xa3, 0xb2, + 0x97, 0x05, 0x33, 0xbb, 0x02, 0xa3, 0xbc, 0x1b, 0xc2, 0xa3, 0xc2, 0xc2, + 0x00, 0xd0, 0x01, 0x6f, 0x7b, 0x02, 0xa3, 0xd6, 0x15, 0xc2, 0xa3, 0xdc, + 0x91, 0x01, 0x6f, 0x53, 0x02, 0xa3, 0xe6, 0x04, 0xc2, 0xa3, 0xec, 0xc2, + 0x00, 0x5f, 0x01, 0x6f, 0x09, 0xc3, 0xc0, 0x19, 0x01, 0x6f, 0x11, 0x06, + 0xc2, 0xa3, 0xf6, 0x1c, 0xc2, 0xa4, 0x00, 0xc2, 0x02, 0x2b, 0x01, 0x6f, + 0x31, 0xc2, 0x00, 0x67, 0x01, 0x6f, 0x59, 0x16, 0xc2, 0xa4, 0x0a, 0xc3, + 0x28, 0x28, 0x01, 0x6f, 0x89, 0xc4, 0xe0, 0x1b, 0x01, 0x6f, 0xa1, 0x83, + 0x01, 0x6f, 0xb1, 0xcc, 0x82, 0x05, 0x01, 0x6f, 0xc9, 0xca, 0x51, 0x7f, + 0x01, 0x6f, 0xe8, 0xc6, 0x05, 0x01, 0x00, 0x19, 0x60, 0xc5, 0x00, 0xd4, + 0x00, 0x18, 0x9b, 0x02, 0xa4, 0x14, 0xc5, 0x05, 0x02, 0x00, 0x19, 0x30, + 0xc6, 0x05, 0x01, 0x07, 0xf1, 0x68, 0xcd, 0x42, 0x35, 0x00, 0x19, 0xa9, + 0xce, 0x2c, 0x62, 0x00, 0x19, 0xb8, 0xc7, 0xc1, 0x31, 0x00, 0xee, 0x59, + 0xc6, 0x05, 0x01, 0x00, 0x19, 0x70, 0xc5, 0x05, 0x02, 0x00, 0x19, 0x51, + 0xc5, 0x00, 0xd4, 0x00, 0x1a, 0x30, 0xc5, 0x00, 0xd4, 0x00, 0xef, 0xa9, + 0xc5, 0x05, 0x02, 0x00, 0x18, 0xe8, 0x4c, 0x83, 0x6d, 0xc2, 0xa4, 0x1a, + 0x42, 0x00, 0x38, 0x42, 0xa4, 0x26, 0xc5, 0x1d, 0x88, 0x00, 0xee, 0x61, + 0xc5, 0x1f, 0x0c, 0x00, 0xee, 0x31, 0xc5, 0x31, 0xee, 0x00, 0xee, 0x20, + 0xc5, 0x05, 0x02, 0x00, 0x19, 0x89, 0xc9, 0x0f, 0x6e, 0x07, 0xf1, 0x23, + 0x02, 0xa4, 0x35, 0xca, 0x09, 0xb7, 0x07, 0xf1, 0x2a, 0x02, 0xa4, 0x3b, + 0xc7, 0x20, 0x88, 0x00, 0xd5, 0xf1, 0xc5, 0x05, 0x02, 0x00, 0xd5, 0xe9, + 0xc5, 0x00, 0xd4, 0x00, 0xd5, 0xd8, 0xc4, 0x18, 0x10, 0x0e, 0x9b, 0x79, + 0xc2, 0x22, 0xcc, 0x0e, 0x9b, 0x70, 0xc3, 0x0d, 0x14, 0x0e, 0x9b, 0x69, + 0xc3, 0x09, 0x9e, 0x0e, 0x9b, 0x60, 0xc4, 0x02, 0xde, 0x0e, 0x9b, 0x59, + 0xc2, 0x02, 0xa0, 0x0e, 0x9b, 0x50, 0xc4, 0x18, 0x10, 0x0e, 0x9b, 0x31, + 0xc2, 0x22, 0xcc, 0x0e, 0x9b, 0x28, 0xc3, 0x0d, 0x14, 0x0e, 0x9b, 0x21, + 0xc3, 0x09, 0x9e, 0x0e, 0x9b, 0x18, 0xc4, 0x02, 0xde, 0x0e, 0x9b, 0x11, + 0xc2, 0x02, 0xa0, 0x0e, 0x9b, 0x08, 0xe0, 0x0a, 0x07, 0x01, 0x17, 0xd8, + 0xcc, 0x23, 0x9f, 0x01, 0x15, 0xa8, 0x0a, 0xc2, 0xa4, 0x41, 0xc3, 0x0b, + 0x65, 0x01, 0x64, 0xa9, 0xc2, 0x00, 0xba, 0x01, 0x64, 0xe8, 0xc3, 0x01, + 0x69, 0x00, 0x1f, 0x49, 0xc3, 0x00, 0xfe, 0x01, 0x64, 0x78, 0xc4, 0xd0, + 0x3f, 0x00, 0x1f, 0x59, 0xc3, 0x0a, 0x8c, 0x01, 0x64, 0x28, 0x0a, 0xc2, + 0xa4, 0x4b, 0xc2, 0x00, 0x59, 0x01, 0x64, 0x59, 0xc3, 0x07, 0x4a, 0x01, + 0x65, 0x29, 0xc4, 0x87, 0xf5, 0x01, 0x66, 0x08, 0xc2, 0x06, 0xdb, 0x00, + 0x1f, 0x79, 0xc4, 0xe2, 0x73, 0x01, 0x64, 0x39, 0x49, 0xa9, 0x00, 0x42, + 0xa4, 0x57, 0xc3, 0xe5, 0xe7, 0x01, 0x64, 0x09, 0xcc, 0x8c, 0x3d, 0x01, + 0x66, 0x48, 0xc5, 0xd6, 0xd7, 0x01, 0x64, 0x89, 0xc2, 0x20, 0xec, 0x01, + 0x65, 0x38, 0xc4, 0xe1, 0xcf, 0x01, 0x64, 0xb9, 0xca, 0xa7, 0x7e, 0x01, + 0x66, 0x88, 0xc2, 0x00, 0x59, 0x01, 0x65, 0x89, 0x43, 0x1d, 0xbb, 0x42, + 0xa4, 0x6f, 0x8b, 0x01, 0x65, 0x09, 0xc2, 0x00, 0xba, 0x01, 0x65, 0x78, + 0x8b, 0x01, 0x65, 0x59, 0xc2, 0x06, 0xdb, 0x00, 0x1f, 0x28, 0x4c, 0x1d, + 0xdd, 0xc2, 0xa4, 0x7b, 0xca, 0x9b, 0xa8, 0x01, 0x66, 0x18, 0xc2, 0x02, + 0xfa, 0x01, 0x67, 0x21, 0xc5, 0xd6, 0xe1, 0x01, 0x67, 0x48, 0xc6, 0xd1, + 0x21, 0x01, 0x67, 0x39, 0xc9, 0xa9, 0x75, 0x01, 0x67, 0x50, 0xc3, 0x01, + 0x69, 0x00, 0x1f, 0x41, 0xc3, 0x00, 0xfe, 0x01, 0x64, 0x70, 0xc4, 0xd0, + 0x3f, 0x00, 0x1f, 0x51, 0xc3, 0x0a, 0x8c, 0x01, 0x64, 0x20, 0x0a, 0xc2, + 0xa4, 0x93, 0xc2, 0x00, 0x59, 0x01, 0x64, 0x51, 0xc3, 0x07, 0x4a, 0x01, + 0x65, 0x21, 0xc4, 0x87, 0xf5, 0x01, 0x66, 0x00, 0xc2, 0x06, 0xdb, 0x00, + 0x1f, 0x71, 0xc4, 0xe2, 0x73, 0x01, 0x64, 0x31, 0x49, 0xa9, 0x00, 0x42, + 0xa4, 0x9f, 0xc3, 0xe5, 0xe7, 0x01, 0x64, 0x01, 0xcc, 0x8c, 0x3d, 0x01, + 0x66, 0x40, 0xc5, 0xd6, 0xd7, 0x01, 0x64, 0x81, 0xc2, 0x20, 0xec, 0x01, + 0x65, 0x30, 0xc3, 0x0b, 0x65, 0x01, 0x64, 0xa1, 0xc2, 0x00, 0xba, 0x01, + 0x64, 0xe1, 0x0a, 0x42, 0xa4, 0xb7, 0xc4, 0xe1, 0xcf, 0x01, 0x64, 0xb1, + 0xca, 0xa7, 0x7e, 0x01, 0x66, 0x80, 0xc2, 0x00, 0x59, 0x01, 0x65, 0x81, + 0x43, 0x1d, 0xbb, 0x42, 0xa4, 0xc1, 0x8b, 0x01, 0x65, 0x01, 0xc2, 0x00, + 0xba, 0x01, 0x65, 0x70, 0x8b, 0x01, 0x65, 0x51, 0xc2, 0x06, 0xdb, 0x00, + 0x1f, 0x20, 0x4c, 0x1d, 0xdd, 0xc2, 0xa4, 0xcd, 0xca, 0x9b, 0xa8, 0x01, + 0x66, 0x10, 0xc5, 0xd6, 0xc8, 0x01, 0x67, 0x81, 0xc5, 0x3b, 0x5e, 0x01, + 0x67, 0x88, 0xc2, 0x02, 0xa0, 0x08, 0x17, 0x11, 0xc4, 0x02, 0xde, 0x08, + 0x17, 0x18, 0xc3, 0x09, 0x9e, 0x08, 0x17, 0x21, 0xc3, 0x0d, 0x14, 0x08, + 0x17, 0x28, 0xc2, 0x22, 0xcc, 0x08, 0x17, 0x31, 0xc4, 0x18, 0x10, 0x08, + 0x17, 0x38, 0xc2, 0x00, 0xc4, 0x08, 0x17, 0x51, 0x19, 0xc2, 0xa4, 0xe5, + 0x0a, 0x42, 0xa4, 0xf1, 0x11, 0xc2, 0xa4, 0xfd, 0x0b, 0x42, 0xa5, 0x09, + 0x42, 0x22, 0xcc, 0xc2, 0xa5, 0x15, 0x44, 0x18, 0x10, 0x42, 0xa5, 0x21, + 0x9b, 0x08, 0x17, 0x89, 0xc8, 0x0d, 0x03, 0x08, 0x17, 0xd0, 0xc2, 0x0d, + 0x10, 0x08, 0x17, 0x91, 0xc8, 0x0d, 0x03, 0x08, 0x17, 0xd8, 0xd2, 0x4a, + 0x09, 0x01, 0x52, 0x80, 0xcc, 0x23, 0x9f, 0x01, 0x56, 0x88, 0xcc, 0x23, + 0x9f, 0x01, 0x56, 0x90, 0xe0, 0x05, 0x67, 0x0f, 0xa8, 0x0a, 0x02, 0xa5, + 0x2d, 0x44, 0x22, 0x44, 0xc2, 0xa5, 0x33, 0x11, 0x42, 0xa5, 0x3f, 0xc7, + 0xc1, 0xd9, 0x0f, 0xab, 0x29, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xc8, 0xc7, + 0xc1, 0xd9, 0x0f, 0xaa, 0xf1, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0x90, 0xc7, + 0xc1, 0xd9, 0x0f, 0xab, 0x31, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xd0, 0xc7, + 0xc1, 0xd9, 0x0f, 0xab, 0x19, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xb8, 0xc7, + 0xc1, 0xd9, 0x0f, 0xab, 0x11, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xb0, 0xc7, + 0xc1, 0xd9, 0x0f, 0xab, 0x09, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xa8, 0xc7, + 0xc1, 0xd9, 0x0f, 0xab, 0x01, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xa0, 0xc7, + 0xc1, 0xd9, 0x0f, 0xaa, 0xf9, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0x98, 0x00, + 0xc2, 0xa5, 0x4b, 0xc9, 0xae, 0xe8, 0x01, 0x36, 0x90, 0x0d, 0xc2, 0xa5, + 0x5a, 0xc5, 0xd9, 0x61, 0x01, 0x93, 0x0b, 0x02, 0xa5, 0x6c, 0x16, 0xc2, + 0xa5, 0x72, 0xc5, 0xd6, 0x8c, 0x01, 0x93, 0x1b, 0x02, 0xa5, 0x84, 0xc5, + 0xda, 0xe7, 0x01, 0x93, 0x23, 0x02, 0xa5, 0x8a, 0x12, 0xc2, 0xa5, 0x90, + 0xc4, 0xad, 0x2b, 0x01, 0x93, 0x33, 0x02, 0xa5, 0xa2, 0xc5, 0xb7, 0x9d, + 0x01, 0x93, 0x3b, 0x02, 0xa5, 0xa8, 0x05, 0xc2, 0xa5, 0xac, 0xc5, 0x90, + 0xe4, 0x01, 0x93, 0x6a, 0x02, 0xa5, 0xbe, 0xc4, 0x0e, 0x6a, 0x01, 0x39, + 0x51, 0xc6, 0x1c, 0xb4, 0x01, 0x4d, 0xf0, 0x44, 0x09, 0x99, 0xc2, 0xa5, + 0xc4, 0x48, 0x30, 0xf3, 0x42, 0xa5, 0xe8, 0xca, 0x30, 0xb2, 0x01, 0x14, + 0xc9, 0x0e, 0x42, 0xa5, 0xf4, 0x4d, 0x29, 0xb9, 0xc2, 0xa5, 0xfa, 0x4f, + 0x0b, 0x17, 0x42, 0xa6, 0x62, 0x42, 0x00, 0x28, 0xc2, 0xa6, 0xca, 0x44, + 0x0d, 0x0d, 0xc2, 0xa6, 0xd9, 0xc2, 0x00, 0xc4, 0x01, 0x23, 0x4a, 0x02, + 0xa6, 0xe6, 0x44, 0x00, 0x2d, 0xc2, 0xa6, 0xec, 0xc5, 0x66, 0xb1, 0x01, + 0x23, 0x50, 0x45, 0x18, 0x10, 0xc2, 0xa6, 0xf8, 0x43, 0x22, 0xcc, 0x42, + 0xa7, 0x04, 0x43, 0x14, 0x07, 0xc2, 0xa7, 0x10, 0x11, 0x42, 0xa7, 0x1d, + 0xc5, 0x03, 0xc7, 0x01, 0x1c, 0x50, 0xd6, 0x30, 0xe8, 0x01, 0x4d, 0xe1, + 0xc6, 0x01, 0xa1, 0x0f, 0x88, 0x70, 0xe0, 0x08, 0xe7, 0x01, 0x51, 0xb0, + 0x03, 0xc2, 0xa7, 0x2c, 0xc8, 0x2c, 0xb2, 0x01, 0x92, 0x21, 0x0d, 0xc2, + 0xa7, 0x44, 0x15, 0xc2, 0xa7, 0x50, 0xc3, 0x05, 0x14, 0x01, 0x94, 0x01, + 0x16, 0xc2, 0xa7, 0x74, 0x08, 0xc2, 0xa7, 0x86, 0x07, 0xc2, 0xa7, 0x96, + 0x10, 0xc2, 0xa7, 0xae, 0x0f, 0xc2, 0xa7, 0xb8, 0x19, 0xc2, 0xa7, 0xc8, + 0x0a, 0xc2, 0xa7, 0xd4, 0x05, 0xc2, 0xa7, 0xe0, 0x0e, 0xc2, 0xa7, 0xea, + 0xc5, 0xb9, 0xbc, 0x01, 0x94, 0xf1, 0xc4, 0xaa, 0xbb, 0x01, 0x95, 0x01, + 0x14, 0x42, 0xa7, 0xfc, 0x85, 0x0f, 0x89, 0x59, 0x94, 0x0f, 0x89, 0x60, + 0xc6, 0xcd, 0xf7, 0x01, 0x93, 0xe1, 0xc5, 0xde, 0x34, 0x01, 0x93, 0xe8, + 0x83, 0x01, 0x96, 0x81, 0x8b, 0x01, 0x96, 0x89, 0x97, 0x01, 0x96, 0x91, + 0x87, 0x01, 0x96, 0x99, 0x91, 0x01, 0x96, 0xa1, 0x0d, 0xc2, 0xa8, 0x06, + 0x15, 0xc2, 0xa8, 0x1a, 0x16, 0xc2, 0xa8, 0x2e, 0x10, 0xc2, 0xa8, 0x42, + 0x0a, 0xc2, 0xa8, 0x56, 0x0f, 0xc2, 0xa8, 0x6a, 0x1b, 0xc2, 0xa8, 0x7e, + 0x14, 0xc2, 0xa8, 0x8a, 0x19, 0x42, 0xa8, 0x9e, 0xe0, 0x02, 0xa7, 0x01, + 0x2e, 0xa8, 0xd4, 0x3d, 0x7c, 0x01, 0x2e, 0xa1, 0xca, 0x1e, 0x5f, 0x01, + 0x2e, 0x98, 0xcf, 0x63, 0x2d, 0x01, 0x2e, 0x91, 0xce, 0x66, 0x67, 0x01, + 0x2e, 0x80, 0xe0, 0x01, 0x67, 0x01, 0x4e, 0x18, 0xd8, 0x24, 0x83, 0x01, + 0x4e, 0x11, 0xcd, 0x76, 0x90, 0x01, 0x4d, 0xd8, 0x47, 0x03, 0x4c, 0x42, + 0xa8, 0xae, 0xd1, 0x51, 0xcd, 0x09, 0x1a, 0xf9, 0xc4, 0x58, 0xf5, 0x09, + 0x1a, 0xf0, 0xca, 0xa1, 0x20, 0x09, 0x1b, 0x38, 0x47, 0x03, 0x4c, 0xc2, + 0xa8, 0xb8, 0xc2, 0x0e, 0x9a, 0x09, 0x1a, 0x7a, 0x02, 0xa8, 0xfb, 0x00, + 0x42, 0xa9, 0x01, 0xa0, 0x09, 0x19, 0xb0, 0xc7, 0x6c, 0xd0, 0x09, 0x19, + 0x51, 0xcb, 0x94, 0x0c, 0x09, 0x19, 0x48, 0xc2, 0x02, 0xad, 0x09, 0x18, + 0x68, 0xda, 0x1b, 0x68, 0x09, 0x18, 0x81, 0xcc, 0x8b, 0x59, 0x09, 0x18, + 0x79, 0xd7, 0x29, 0x9c, 0x09, 0x18, 0x70, 0xc2, 0x00, 0x4e, 0x09, 0x1c, + 0xc3, 0x02, 0xa9, 0x0d, 0x97, 0x09, 0x19, 0x09, 0xc4, 0x55, 0x25, 0x09, + 0x19, 0x01, 0xc5, 0x03, 0x47, 0x09, 0x18, 0xf0, 0x47, 0x03, 0x4c, 0x42, + 0xa9, 0x13, 0xcd, 0x80, 0x84, 0x09, 0x1a, 0xd8, 0xc4, 0x38, 0xb4, 0x09, + 0x1a, 0xa9, 0xc2, 0x05, 0x52, 0x09, 0x1a, 0x9b, 0x02, 0xa9, 0x1f, 0x83, + 0x09, 0x1a, 0x90, 0xc7, 0x6c, 0xd0, 0x09, 0x18, 0xd3, 0x02, 0xa9, 0x23, + 0xc4, 0x39, 0xc8, 0x09, 0x18, 0xc9, 0x46, 0x03, 0x4d, 0xc2, 0xa9, 0x29, + 0xc6, 0xd0, 0x97, 0x09, 0x18, 0xa0, 0x47, 0x03, 0x4c, 0x42, 0xa9, 0x3e, + 0xd4, 0x39, 0x58, 0x09, 0x18, 0x50, 0xc9, 0xac, 0x18, 0x09, 0x29, 0xc8, + 0x47, 0x03, 0x4c, 0x42, 0xa9, 0x4a, 0x00, 0x42, 0xa9, 0x68, 0xc4, 0x39, + 0xc8, 0x09, 0x17, 0x79, 0x46, 0x03, 0x4d, 0xc2, 0xa9, 0x74, 0xc8, 0x0a, + 0xff, 0x09, 0x17, 0x60, 0x00, 0x42, 0xa9, 0x80, 0xca, 0x38, 0xae, 0x09, + 0x29, 0xc1, 0xc4, 0x39, 0xc8, 0x09, 0x16, 0xe0, 0xa1, 0x09, 0x16, 0xf2, + 0x02, 0xa9, 0x8f, 0x9f, 0x09, 0x16, 0xcb, 0x02, 0xa9, 0x95, 0xc3, 0x2b, + 0x88, 0x09, 0x16, 0xd1, 0xd2, 0x47, 0x27, 0x09, 0x16, 0xc0, 0x00, 0xc2, + 0xa9, 0x9b, 0xc2, 0x01, 0xe2, 0x09, 0x16, 0x03, 0x02, 0xa9, 0xb0, 0x90, + 0x09, 0x15, 0xf9, 0xc2, 0xe6, 0xab, 0x09, 0x15, 0xf0, 0xa3, 0x09, 0x15, + 0xbb, 0x02, 0xa9, 0xba, 0xc2, 0x38, 0x6a, 0x09, 0x15, 0xc9, 0xc2, 0xe5, + 0x8e, 0x09, 0x15, 0xc1, 0xa0, 0x09, 0x15, 0x72, 0x02, 0xa9, 0xc0, 0xc2, + 0x01, 0x6f, 0x09, 0x16, 0xb1, 0x94, 0x09, 0x16, 0x9b, 0x02, 0xa9, 0xc6, + 0xc3, 0x56, 0xa5, 0x09, 0x16, 0x91, 0x8f, 0x09, 0x16, 0x33, 0x02, 0xa9, + 0xca, 0x86, 0x09, 0x16, 0x1a, 0x02, 0xa9, 0xd0, 0x00, 0x42, 0xa9, 0xd6, + 0xd1, 0x56, 0xb7, 0x09, 0x15, 0x50, 0xa6, 0x09, 0x17, 0x50, 0xc3, 0x02, + 0x2c, 0x09, 0x17, 0x40, 0x9f, 0x09, 0x17, 0x28, 0xc3, 0xe4, 0xe2, 0x09, + 0x12, 0x93, 0x02, 0xa9, 0xf1, 0xa6, 0x09, 0x1c, 0x80, 0x49, 0x38, 0x6c, + 0x42, 0xa9, 0xf7, 0x00, 0x42, 0xaa, 0x03, 0xc2, 0x4d, 0x4c, 0x09, 0x13, + 0x6b, 0x02, 0xaa, 0x15, 0x00, 0x42, 0xaa, 0x19, 0x9f, 0x09, 0x12, 0x39, + 0xc8, 0xb7, 0xa2, 0x09, 0x12, 0x28, 0x94, 0x09, 0x12, 0x21, 0x00, 0x42, + 0xaa, 0x34, 0xc7, 0x6c, 0xd0, 0x09, 0x12, 0x59, 0x46, 0x03, 0x4d, 0x42, + 0xaa, 0x46, 0x00, 0xc2, 0xaa, 0x50, 0xa0, 0x09, 0x11, 0xca, 0x02, 0xaa, + 0x65, 0xc5, 0x39, 0xc7, 0x09, 0x11, 0x78, 0x8a, 0x09, 0x1c, 0x60, 0x9f, + 0x09, 0x11, 0x38, 0xc4, 0x39, 0xc8, 0x09, 0x11, 0x11, 0xca, 0x38, 0xae, + 0x09, 0x11, 0x08, 0x00, 0x42, 0xaa, 0x69, 0xc9, 0xac, 0xa8, 0x09, 0x10, + 0xf2, 0x02, 0xaa, 0x83, 0x00, 0x42, 0xaa, 0x89, 0x24, 0xc2, 0xaa, 0x93, + 0x23, 0xc2, 0xaa, 0x9f, 0xc3, 0xe5, 0x7e, 0x09, 0x27, 0xf9, 0x21, 0xc2, + 0xaa, 0xbd, 0x20, 0xc2, 0xaa, 0xd5, 0x1f, 0xc2, 0xaa, 0xe3, 0x1e, 0xc2, + 0xaa, 0xf5, 0x1d, 0x42, 0xab, 0x01, 0x84, 0x09, 0x0d, 0xc3, 0x02, 0xab, + 0x2b, 0x94, 0x09, 0x0f, 0x62, 0x02, 0xab, 0x2f, 0xca, 0x51, 0xd4, 0x09, + 0x0f, 0xaa, 0x02, 0xab, 0x33, 0xca, 0x8c, 0xf6, 0x09, 0x0f, 0x98, 0x97, + 0x09, 0x0c, 0x3b, 0x02, 0xab, 0x39, 0x0d, 0xc2, 0xab, 0x5a, 0x04, 0xc2, + 0xab, 0x68, 0x16, 0xc2, 0xab, 0x74, 0x15, 0xc2, 0xab, 0x7e, 0x12, 0xc2, + 0xab, 0x95, 0x0e, 0xc2, 0xab, 0x9d, 0xcd, 0x05, 0x5a, 0x09, 0x1c, 0x11, + 0x09, 0xc2, 0xab, 0xa8, 0x83, 0x09, 0x0a, 0xc3, 0x02, 0xab, 0xbd, 0xc2, + 0x2e, 0x48, 0x09, 0x0c, 0x61, 0xc2, 0x17, 0x99, 0x09, 0x0b, 0xe9, 0x10, + 0xc2, 0xab, 0xd0, 0x0f, 0xc2, 0xab, 0xda, 0x0b, 0xc2, 0xab, 0xe8, 0x07, + 0x42, 0xab, 0xf2, 0x00, 0x42, 0xab, 0xfe, 0xa1, 0x09, 0x0c, 0xd9, 0x9f, + 0x09, 0x0c, 0xd0, 0x00, 0x42, 0xac, 0x0a, 0xcf, 0x6a, 0x17, 0x09, 0x0c, + 0xb0, 0xa2, 0x09, 0x0c, 0x9b, 0x02, 0xac, 0x16, 0xa1, 0x09, 0x0c, 0x91, + 0xa0, 0x09, 0x0c, 0x89, 0x9f, 0x09, 0x0c, 0x80, 0xcd, 0x7b, 0x8a, 0x09, + 0x0c, 0x70, 0xcd, 0x7a, 0x5f, 0x09, 0x0d, 0xa0, 0xc5, 0x39, 0xc7, 0x09, + 0x0d, 0x88, 0xcd, 0x77, 0xe2, 0x09, 0x0d, 0x70, 0xe0, 0x05, 0x47, 0x09, + 0x0d, 0x58, 0xc3, 0x68, 0xd0, 0x09, 0x0d, 0x43, 0x02, 0xac, 0x1c, 0x8a, + 0x09, 0x0d, 0x39, 0xc2, 0x00, 0x65, 0x09, 0x0d, 0x30, 0x97, 0x09, 0x0d, + 0x13, 0x02, 0xac, 0x22, 0xc3, 0x62, 0x19, 0x09, 0x0d, 0x08, 0xc3, 0x02, + 0x2c, 0x09, 0x09, 0x73, 0x02, 0xac, 0x26, 0x97, 0x09, 0x09, 0xb1, 0xc3, + 0x04, 0x65, 0x09, 0x09, 0xa9, 0xc3, 0x20, 0x18, 0x09, 0x09, 0xa1, 0xc3, + 0x56, 0x1d, 0x09, 0x09, 0x99, 0xc3, 0x1a, 0xe7, 0x09, 0x09, 0x91, 0xc4, + 0x04, 0x59, 0x09, 0x09, 0x89, 0xc3, 0x62, 0x19, 0x09, 0x09, 0x80, 0xc4, + 0x58, 0xf5, 0x09, 0x09, 0x53, 0x02, 0xac, 0x30, 0xc4, 0x39, 0xc8, 0x09, + 0x09, 0x58, 0x47, 0x03, 0x4c, 0x42, 0xac, 0x36, 0x00, 0x42, 0xac, 0x54, + 0x00, 0x42, 0xac, 0x66, 0x17, 0xc2, 0xac, 0x72, 0xa4, 0x09, 0x09, 0x30, + 0xca, 0xa6, 0x48, 0x09, 0x09, 0x20, 0x8a, 0x09, 0x08, 0x8b, 0x02, 0xac, + 0x7c, 0xc2, 0x00, 0x65, 0x09, 0x08, 0x80, 0xa0, 0x09, 0x08, 0x53, 0x02, + 0xac, 0x80, 0x9f, 0x09, 0x08, 0x42, 0x02, 0xac, 0x86, 0x00, 0x42, 0xac, + 0x8c, 0xcb, 0x47, 0xaa, 0x09, 0x08, 0x19, 0x46, 0x03, 0x4d, 0x42, 0xac, + 0x98, 0x47, 0x03, 0x4c, 0x42, 0xac, 0xa0, 0x00, 0x42, 0xac, 0xaa, 0x00, + 0x42, 0xac, 0xb6, 0xa0, 0x09, 0x07, 0xe0, 0x9f, 0x09, 0x07, 0xba, 0x02, + 0xac, 0xc2, 0xc2, 0x00, 0xc2, 0x09, 0x07, 0xa1, 0xda, 0x1a, 0xe6, 0x09, + 0x07, 0x98, 0xd6, 0x1a, 0xea, 0x09, 0x07, 0x88, 0x46, 0x03, 0x4d, 0xc2, + 0xac, 0xc6, 0x4e, 0x6c, 0xd0, 0x42, 0xad, 0x01, 0xc2, 0x5c, 0x27, 0x09, + 0x25, 0x58, 0xc3, 0x0b, 0x64, 0x09, 0x25, 0x51, 0xc3, 0x51, 0xdb, 0x09, + 0x25, 0x49, 0x97, 0x09, 0x04, 0x99, 0x15, 0xc2, 0xad, 0x2b, 0xc2, 0x02, + 0x2f, 0x09, 0x04, 0x81, 0xc3, 0x1a, 0xf4, 0x09, 0x04, 0x79, 0xd1, 0x4e, + 0xe1, 0x09, 0x04, 0x70, 0xc7, 0x0b, 0x09, 0x09, 0x04, 0xe9, 0xcb, 0x96, + 0xed, 0x09, 0x04, 0xe1, 0xcb, 0x94, 0x38, 0x09, 0x04, 0xd9, 0x46, 0x03, + 0x4d, 0x42, 0xad, 0x37, 0x47, 0x03, 0x4c, 0xc2, 0xad, 0x46, 0xc2, 0x04, + 0x3d, 0x09, 0x04, 0x10, 0x47, 0x03, 0x4c, 0xc2, 0xad, 0x7e, 0x9f, 0x09, + 0x04, 0x00, 0xa1, 0x09, 0x04, 0x41, 0xa0, 0x09, 0x04, 0x2a, 0x02, 0xad, + 0x8a, 0xc7, 0x6c, 0xd0, 0x09, 0x03, 0xe9, 0xc4, 0x39, 0xc8, 0x09, 0x03, + 0xe1, 0xc7, 0xc6, 0x47, 0x09, 0x03, 0xd8, 0x9f, 0x09, 0x03, 0xb3, 0x02, + 0xad, 0x93, 0x47, 0x03, 0x4c, 0x42, 0xad, 0x99, 0xc9, 0xa3, 0x1e, 0x09, + 0x1b, 0xa8, 0xd3, 0x45, 0xac, 0x09, 0x03, 0xc0, 0x00, 0xc2, 0xad, 0xab, + 0xa0, 0x09, 0x1b, 0xa0, 0x03, 0x42, 0xad, 0xb7, 0x48, 0xb6, 0x2a, 0xc2, + 0xad, 0xbf, 0xcb, 0x94, 0x2d, 0x09, 0x02, 0x80, 0x9f, 0x09, 0x02, 0xa0, + 0xcb, 0x96, 0x95, 0x09, 0x02, 0x90, 0x47, 0x03, 0x4c, 0x42, 0xad, 0xd1, + 0xd0, 0x5d, 0xc2, 0x09, 0x24, 0x18, 0xc2, 0x7b, 0x95, 0x09, 0x02, 0x40, + 0xc2, 0x00, 0xb3, 0x09, 0x02, 0x31, 0xc9, 0xac, 0xba, 0x09, 0x02, 0x28, + 0xc8, 0x6a, 0x1e, 0x09, 0x02, 0x61, 0xc3, 0x1a, 0xf4, 0x09, 0x02, 0x59, + 0x83, 0x09, 0x02, 0x50, 0x46, 0x03, 0x4d, 0xc2, 0xad, 0xe3, 0xc4, 0x39, + 0xc8, 0x09, 0x00, 0xa8, 0x47, 0x03, 0x4c, 0x42, 0xae, 0x1a, 0xc3, 0xd1, + 0x2b, 0x09, 0x1b, 0x91, 0xc3, 0x04, 0x65, 0x09, 0x01, 0x60, 0xc3, 0x03, + 0x49, 0x09, 0x01, 0xf9, 0x9f, 0x09, 0x01, 0xf1, 0x00, 0x42, 0xae, 0x3c, + 0xca, 0x51, 0xd4, 0x09, 0x01, 0xa8, 0x4a, 0x9e, 0x64, 0xc2, 0xae, 0x4e, + 0xcb, 0x8f, 0x05, 0x09, 0x01, 0x79, 0xc7, 0xc6, 0x0f, 0x09, 0x01, 0x70, + 0xc3, 0x5d, 0xd1, 0x09, 0x01, 0x41, 0xc3, 0x04, 0x65, 0x09, 0x01, 0x39, + 0x0d, 0xc2, 0xae, 0x5a, 0xc2, 0x00, 0xd0, 0x09, 0x01, 0x21, 0xc4, 0x38, + 0xa9, 0x09, 0x01, 0x19, 0xc4, 0xe2, 0xab, 0x09, 0x01, 0x11, 0xc2, 0x00, + 0x65, 0x09, 0x01, 0x08, 0xcf, 0x68, 0x73, 0x09, 0x00, 0xf9, 0xc5, 0x9e, + 0x4b, 0x09, 0x00, 0xf0, 0x9f, 0x09, 0x1c, 0xa9, 0xc2, 0x00, 0x2d, 0x09, + 0x14, 0x52, 0x02, 0xae, 0x64, 0xcb, 0x94, 0x4e, 0x09, 0x14, 0x49, 0x46, + 0x03, 0x4d, 0x42, 0xae, 0x68, 0xc7, 0x0b, 0x09, 0x09, 0x0a, 0x91, 0xcb, + 0x96, 0xf8, 0x09, 0x0a, 0x89, 0xcb, 0x94, 0x43, 0x09, 0x0a, 0x81, 0xca, + 0x38, 0xae, 0x09, 0x0a, 0x78, 0x00, 0x42, 0xae, 0x85, 0xc7, 0x0b, 0x09, + 0x09, 0x0a, 0x21, 0xc3, 0x2b, 0x88, 0x09, 0x0a, 0x18, 0xcd, 0x77, 0xe2, + 0x09, 0x23, 0x70, 0xc2, 0x00, 0xd3, 0x09, 0x22, 0x49, 0xa1, 0x09, 0x22, + 0x41, 0xa0, 0x09, 0x22, 0x38, 0xcd, 0x77, 0xe2, 0x09, 0x23, 0x68, 0xa0, + 0x09, 0x22, 0x28, 0xc4, 0x45, 0x6a, 0x09, 0x23, 0x41, 0xc4, 0x4a, 0x2e, + 0x09, 0x23, 0x38, 0xcd, 0x77, 0xe2, 0x09, 0x23, 0x60, 0x00, 0xc2, 0xae, + 0x9d, 0xa0, 0x09, 0x22, 0x08, 0xcd, 0x77, 0xe2, 0x09, 0x23, 0x58, 0xc5, + 0x58, 0xf4, 0x09, 0x22, 0x70, 0xcd, 0x77, 0xe2, 0x09, 0x23, 0x50, 0xca, + 0x9d, 0x74, 0x09, 0x22, 0xe1, 0x43, 0x01, 0x50, 0x42, 0xae, 0xa5, 0xc3, + 0x5d, 0x9a, 0x09, 0x22, 0xa3, 0x02, 0xae, 0xad, 0xc3, 0x9f, 0x30, 0x09, + 0x21, 0xc8, 0xc5, 0x58, 0xf4, 0x09, 0x22, 0x68, 0x97, 0x09, 0x21, 0x11, + 0x9f, 0x09, 0x20, 0xc8, 0xcd, 0x77, 0xe2, 0x09, 0x23, 0x48, 0xc3, 0x5d, + 0x9a, 0x09, 0x22, 0x93, 0x02, 0xae, 0xb3, 0xc3, 0x9f, 0x30, 0x09, 0x21, + 0xc0, 0xc5, 0x58, 0xf4, 0x09, 0x22, 0x60, 0x00, 0xc2, 0xae, 0xb9, 0xa1, + 0x09, 0x21, 0xe8, 0x97, 0x09, 0x21, 0x81, 0x9f, 0x09, 0x21, 0x30, 0x97, + 0x09, 0x21, 0x09, 0x9f, 0x09, 0x20, 0xc0, 0xc3, 0x8f, 0x7a, 0x09, 0x23, + 0x19, 0xc3, 0x02, 0x2c, 0x09, 0x23, 0x00, 0xc9, 0xad, 0xf5, 0x09, 0x22, + 0xf9, 0xc4, 0xdd, 0x63, 0x09, 0x22, 0xc0, 0xce, 0x54, 0x64, 0x09, 0x22, + 0xe9, 0xc4, 0x04, 0x59, 0x09, 0x22, 0xd0, 0xc3, 0x5d, 0x9a, 0x09, 0x22, + 0x79, 0xc3, 0x9f, 0x30, 0x09, 0x21, 0xa0, 0x97, 0x09, 0x20, 0xf1, 0x9f, + 0x09, 0x20, 0xa8, 0xce, 0x54, 0x64, 0x09, 0x22, 0xf1, 0xc4, 0x04, 0x59, + 0x09, 0x22, 0xd8, 0xc3, 0x5d, 0x9a, 0x09, 0x22, 0x81, 0xc3, 0x9f, 0x30, + 0x09, 0x21, 0xa8, 0xc5, 0x58, 0xf4, 0x09, 0x22, 0x50, 0x97, 0x09, 0x21, + 0x69, 0x9f, 0x09, 0x21, 0x18, 0x97, 0x09, 0x20, 0xf9, 0x9f, 0x09, 0x20, + 0xb0, 0xc3, 0x5d, 0x9a, 0x09, 0x22, 0x89, 0xc3, 0x9f, 0x30, 0x09, 0x21, + 0xb2, 0x02, 0xae, 0xc1, 0xc5, 0x58, 0xf4, 0x09, 0x22, 0x58, 0xc2, 0xe5, + 0xf7, 0x09, 0x21, 0xd9, 0xc2, 0xe6, 0x89, 0x09, 0x21, 0xd0, 0x97, 0x09, + 0x21, 0x73, 0x02, 0xae, 0xc7, 0x9f, 0x09, 0x21, 0x22, 0x02, 0xae, 0xcd, + 0x97, 0x09, 0x21, 0x01, 0x9f, 0x09, 0x20, 0xb8, 0xc3, 0x02, 0x9b, 0x01, + 0x16, 0x79, 0xc2, 0x00, 0xbf, 0x01, 0x16, 0x70, 0x84, 0x09, 0x7e, 0x70, + 0x84, 0x09, 0x7c, 0xd8, 0x06, 0xc2, 0xae, 0xd3, 0xc6, 0x60, 0xb1, 0x00, + 0x27, 0x78, 0xca, 0x91, 0xbb, 0x00, 0x22, 0xa0, 0xc3, 0x2d, 0x1a, 0x00, + 0xe4, 0x39, 0xc9, 0xa8, 0x3a, 0x00, 0xe4, 0x31, 0xc2, 0x00, 0xac, 0x00, + 0xe4, 0x20, 0x46, 0x00, 0x8b, 0x42, 0xae, 0xdf, 0x87, 0x00, 0x22, 0x31, + 0xc2, 0x01, 0x7f, 0x00, 0x22, 0xd9, 0xc2, 0x00, 0x28, 0x05, 0x34, 0x79, + 0xc2, 0x00, 0x40, 0x05, 0x34, 0x88, 0xc5, 0x13, 0xb4, 0x00, 0xe4, 0x01, + 0xc6, 0x9b, 0xd4, 0x00, 0x23, 0xd8, 0xc2, 0x0a, 0xe2, 0x00, 0x28, 0x89, + 0xc3, 0xe5, 0x2a, 0x05, 0x32, 0x29, 0xc2, 0x13, 0xc0, 0x05, 0x32, 0xa9, + 0xc3, 0x3b, 0x0f, 0x05, 0x33, 0x08, 0x46, 0x00, 0x8b, 0x42, 0xae, 0xeb, + 0x46, 0x00, 0x8b, 0x42, 0xaf, 0x03, 0xca, 0xa5, 0x12, 0x00, 0x26, 0x70, + 0xcf, 0x69, 0x54, 0x00, 0x25, 0x58, 0xca, 0xa5, 0xb2, 0x00, 0x24, 0x78, + 0x1c, 0xc2, 0xaf, 0x21, 0x87, 0x00, 0x22, 0xab, 0x02, 0xaf, 0x2b, 0xc2, + 0x01, 0x7f, 0x00, 0x22, 0xf9, 0xc2, 0x00, 0x28, 0x05, 0x34, 0x18, 0x91, + 0x05, 0x34, 0xc9, 0xcb, 0x98, 0xa5, 0x05, 0x33, 0x68, 0xc2, 0x04, 0xab, + 0x05, 0x32, 0x48, 0xc2, 0x00, 0xd0, 0x00, 0x25, 0xdb, 0x02, 0xaf, 0x31, + 0x44, 0x2e, 0xf0, 0xc2, 0xaf, 0x37, 0xc2, 0x00, 0x28, 0x05, 0x34, 0xb9, + 0x83, 0x00, 0x22, 0x41, 0xc3, 0x1c, 0x63, 0x00, 0x22, 0x48, 0xcf, 0x6b, + 0x16, 0x00, 0x26, 0xd8, 0xcc, 0x23, 0x3f, 0x00, 0x25, 0x88, 0xc2, 0x00, + 0x06, 0x05, 0x33, 0x19, 0x07, 0xc2, 0xaf, 0x42, 0xc4, 0x00, 0xba, 0x00, + 0x22, 0x60, 0x46, 0x00, 0x8b, 0x42, 0xaf, 0x4a, 0xc3, 0xe5, 0x2a, 0x00, + 0x27, 0x09, 0xc3, 0x28, 0x28, 0x00, 0x25, 0xeb, 0x02, 0xaf, 0x56, 0xc2, + 0x00, 0xd0, 0x00, 0x25, 0x48, 0xc9, 0x20, 0xa8, 0x00, 0x26, 0x99, 0xc5, + 0x1d, 0x88, 0x00, 0x26, 0x88, 0x87, 0x00, 0x28, 0xc9, 0x96, 0x00, 0x23, + 0x18, 0x46, 0x00, 0x8b, 0x42, 0xaf, 0x5c, 0x43, 0x5d, 0xc0, 0xc2, 0xaf, + 0x68, 0xc3, 0x78, 0xc9, 0x00, 0x24, 0x08, 0x46, 0x00, 0x8b, 0x42, 0xaf, + 0x8a, 0x46, 0x00, 0x8b, 0xc2, 0xaf, 0xa2, 0xc7, 0x8a, 0x86, 0x00, 0x22, + 0x50, 0x46, 0x00, 0x8b, 0x42, 0xaf, 0xb4, 0xc6, 0xc3, 0x77, 0x00, 0x27, + 0x4b, 0x02, 0xaf, 0xcf, 0xc8, 0xba, 0x0a, 0x00, 0x25, 0x08, 0xc9, 0x98, + 0xa7, 0x05, 0x33, 0x59, 0xc5, 0xc8, 0x02, 0x00, 0x23, 0x58, 0xcb, 0x90, + 0x70, 0x00, 0x23, 0xe8, 0xc9, 0x20, 0xa8, 0x00, 0x27, 0x29, 0xc6, 0x60, + 0xb1, 0x00, 0x27, 0x19, 0xc5, 0x1f, 0x0c, 0x00, 0x22, 0xe8, 0x46, 0x00, + 0x8b, 0x42, 0xaf, 0xd5, 0xd9, 0x1e, 0xff, 0x00, 0x23, 0xb8, 0x16, 0x42, + 0xaf, 0xe1, 0x47, 0x01, 0x32, 0xc2, 0xaf, 0xeb, 0xc4, 0xe4, 0xbf, 0x05, + 0x32, 0x08, 0x87, 0x00, 0x21, 0xb3, 0x02, 0xaf, 0xf7, 0xc2, 0x00, 0x28, + 0x05, 0x34, 0x28, 0x46, 0x00, 0x8b, 0x42, 0xaf, 0xfd, 0x46, 0x00, 0x8b, + 0x42, 0xb0, 0x07, 0x46, 0x00, 0x8b, 0x42, 0xb0, 0x1f, 0xca, 0xa5, 0x12, + 0x00, 0x26, 0x68, 0xcf, 0x69, 0x54, 0x00, 0x25, 0x50, 0xca, 0xa5, 0xb2, + 0x00, 0x24, 0x70, 0x1c, 0xc2, 0xb0, 0x3d, 0x87, 0x00, 0x20, 0x2b, 0x02, + 0xb0, 0x47, 0xc2, 0x01, 0x7f, 0x00, 0x20, 0x79, 0xc2, 0x00, 0x28, 0x05, + 0x34, 0x10, 0x91, 0x05, 0x34, 0xc1, 0xcb, 0x98, 0xa5, 0x05, 0x33, 0x60, + 0xc2, 0x04, 0xab, 0x05, 0x32, 0x40, 0xc2, 0x00, 0xd0, 0x00, 0x25, 0xd3, + 0x02, 0xb0, 0x4d, 0x44, 0x2e, 0xf0, 0xc2, 0xb0, 0x53, 0x83, 0x00, 0x21, + 0x41, 0xc3, 0x1c, 0x63, 0x00, 0x21, 0x49, 0xc2, 0x00, 0x28, 0x05, 0x34, + 0xb0, 0xcf, 0x6b, 0x16, 0x00, 0x26, 0xd0, 0xcc, 0x23, 0x3f, 0x00, 0x25, + 0x80, 0xc4, 0x00, 0xba, 0x00, 0x21, 0x61, 0xc2, 0x00, 0x06, 0x05, 0x33, + 0x11, 0x07, 0x42, 0xb0, 0x5e, 0x46, 0x00, 0x8b, 0x42, 0xb0, 0x66, 0xc3, + 0xe5, 0x2a, 0x00, 0x27, 0x01, 0xc3, 0x28, 0x28, 0x00, 0x25, 0xe3, 0x02, + 0xb0, 0x72, 0xc2, 0x00, 0xd0, 0x00, 0x25, 0x40, 0xc9, 0x20, 0xa8, 0x00, + 0x26, 0x91, 0xc5, 0x1d, 0x88, 0x00, 0x26, 0x80, 0x87, 0x00, 0x28, 0xc1, + 0x96, 0x00, 0x23, 0x10, 0x46, 0x00, 0x8b, 0x42, 0xb0, 0x78, 0xc2, 0x0a, + 0xe2, 0x00, 0x28, 0x81, 0xc3, 0xe5, 0x2a, 0x05, 0x32, 0x21, 0xc2, 0x13, + 0xc0, 0x05, 0x32, 0xa1, 0xc3, 0x3b, 0x0f, 0x05, 0x33, 0x00, 0x43, 0x5d, + 0xc0, 0xc2, 0xb0, 0x84, 0xc3, 0x78, 0xc9, 0x00, 0x24, 0x00, 0x46, 0x00, + 0x8b, 0x42, 0xb0, 0xa6, 0x46, 0x00, 0x8b, 0xc2, 0xb0, 0xbe, 0xc7, 0x8a, + 0x86, 0x00, 0x21, 0x50, 0x46, 0x00, 0x8b, 0x42, 0xb0, 0xd0, 0x46, 0x00, + 0x8b, 0x42, 0xb0, 0xeb, 0x06, 0xc2, 0xb0, 0xf5, 0xc6, 0x60, 0xb1, 0x00, + 0x27, 0x70, 0xca, 0x91, 0xbb, 0x00, 0x20, 0x20, 0xc6, 0xc3, 0x77, 0x00, + 0x27, 0x43, 0x02, 0xb1, 0x01, 0xc8, 0xba, 0x0a, 0x00, 0x25, 0x00, 0xc9, + 0x98, 0xa7, 0x05, 0x33, 0x51, 0xc5, 0xc8, 0x02, 0x00, 0x23, 0x50, 0xcb, + 0x90, 0x70, 0x00, 0x23, 0xe0, 0xc9, 0x20, 0xa8, 0x00, 0x27, 0x21, 0xc6, + 0x60, 0xb1, 0x00, 0x27, 0x11, 0xc5, 0x1f, 0x0c, 0x00, 0x20, 0x68, 0x46, + 0x00, 0x8b, 0x42, 0xb1, 0x07, 0xd9, 0x1e, 0xff, 0x00, 0x23, 0xb0, 0x16, + 0x42, 0xb1, 0x13, 0x47, 0x01, 0x32, 0xc2, 0xb1, 0x1d, 0xc4, 0xe4, 0xbf, + 0x05, 0x32, 0x00, 0x87, 0x00, 0x20, 0xb3, 0x02, 0xb1, 0x29, 0xc2, 0x00, + 0x28, 0x05, 0x34, 0x20, 0x46, 0x00, 0x8b, 0x42, 0xb1, 0x2f, 0xc2, 0x01, + 0x7f, 0x00, 0x20, 0x59, 0x87, 0x00, 0x21, 0x31, 0xc2, 0x00, 0x28, 0x05, + 0x34, 0x71, 0xc2, 0x00, 0x40, 0x05, 0x34, 0x80, 0xe0, 0x01, 0xa7, 0x01, + 0x01, 0xc8, 0xc8, 0x4b, 0x94, 0x08, 0x8f, 0xa1, 0xc7, 0x0d, 0x04, 0x08, + 0x8f, 0x98, 0xc6, 0x18, 0x10, 0x08, 0x8f, 0x81, 0xc4, 0xd2, 0x1d, 0x08, + 0x8f, 0x78, 0xc4, 0x45, 0x6a, 0x08, 0x8f, 0x71, 0xc4, 0x4a, 0x2e, 0x08, + 0x8f, 0x68, 0xc5, 0x0d, 0x0d, 0x08, 0x8f, 0x61, 0xc5, 0x28, 0xee, 0x08, + 0x8f, 0x59, 0xc2, 0x00, 0xc4, 0x08, 0x8f, 0x50, 0xc4, 0x18, 0x10, 0x08, + 0x8f, 0x39, 0xc2, 0x22, 0xcc, 0x08, 0x8f, 0x30, 0xc3, 0x0d, 0x14, 0x08, + 0x8f, 0x29, 0xc3, 0x09, 0x9e, 0x08, 0x8f, 0x20, 0xc4, 0x02, 0xde, 0x08, + 0x8f, 0x19, 0xc2, 0x02, 0xa0, 0x08, 0x8f, 0x10, 0xc5, 0x69, 0xa7, 0x00, + 0x6c, 0x29, 0xc6, 0x8e, 0x9c, 0x00, 0x6c, 0x31, 0x07, 0xc2, 0xb1, 0x3b, + 0xc6, 0xd2, 0x47, 0x00, 0x6c, 0x99, 0xc6, 0xcc, 0xd1, 0x00, 0x6c, 0xb1, + 0x4a, 0xa1, 0xa2, 0xc2, 0xb1, 0x47, 0xcb, 0x8e, 0x97, 0x00, 0x6d, 0xc8, + 0xc5, 0x69, 0xa7, 0x00, 0x6c, 0x49, 0xc6, 0xd2, 0x47, 0x00, 0x6c, 0x51, + 0x42, 0x17, 0x99, 0xc2, 0xb1, 0x73, 0x42, 0x10, 0x37, 0x42, 0xb1, 0x7f, + 0xc5, 0x69, 0xa7, 0x00, 0x6c, 0x59, 0xc6, 0xcc, 0xd1, 0x00, 0x6c, 0x60, + 0xc5, 0x69, 0xa7, 0x00, 0x6c, 0x89, 0xc6, 0xd2, 0x3b, 0x00, 0x6c, 0x90, + 0xc5, 0x69, 0xa7, 0x00, 0x6c, 0xa1, 0xc6, 0x69, 0xa6, 0x00, 0x6c, 0xa8, + 0x03, 0xc2, 0xb1, 0x8b, 0x49, 0xb0, 0xe9, 0x42, 0xb1, 0x97, 0xc7, 0xca, + 0x29, 0x00, 0x6c, 0xf9, 0xc7, 0xc7, 0xc1, 0x00, 0x6d, 0x31, 0x06, 0x42, + 0xb1, 0xa9, 0xca, 0x4b, 0x0d, 0x00, 0x6d, 0x21, 0x42, 0x0d, 0xf6, 0x42, + 0xb1, 0xb5, 0xc7, 0xc4, 0xdb, 0x00, 0x6d, 0x89, 0xc7, 0xc2, 0x18, 0x00, + 0x6d, 0xe9, 0xc7, 0xc1, 0xa8, 0x00, 0x6e, 0x18, 0xc2, 0x02, 0xa0, 0x00, + 0x6f, 0x41, 0xc4, 0x02, 0xde, 0x00, 0x6f, 0x48, 0xc3, 0x09, 0x9e, 0x00, + 0x6f, 0x51, 0xc3, 0x0d, 0x14, 0x00, 0x6f, 0x58, 0xc2, 0x22, 0xcc, 0x00, + 0x6f, 0x61, 0xc4, 0x18, 0x10, 0x00, 0x6f, 0x68, 0xca, 0xa7, 0x60, 0x00, + 0x6e, 0x81, 0xc8, 0xb7, 0x82, 0x00, 0x6e, 0x91, 0xc9, 0xaf, 0x42, 0x00, + 0x6e, 0xa0, 0xc2, 0x02, 0x41, 0x00, 0x6e, 0xcb, 0x02, 0xb1, 0xc1, 0xc5, + 0xd8, 0x21, 0x00, 0x6e, 0xd8, 0xca, 0x9c, 0xb6, 0x00, 0x6f, 0x91, 0xc9, + 0x93, 0x53, 0x00, 0x6f, 0x98, 0x1e, 0xc2, 0xb1, 0xc7, 0xa6, 0x0e, 0xd5, + 0x41, 0xa5, 0x0e, 0xd5, 0x39, 0xa4, 0x0e, 0xd5, 0x31, 0xa3, 0x0e, 0xd5, + 0x29, 0xa2, 0x0e, 0xd5, 0x21, 0xa1, 0x0e, 0xd5, 0x19, 0xa0, 0x0e, 0xd5, + 0x11, 0x9f, 0x0e, 0xd5, 0x08, 0x4b, 0x40, 0xb3, 0xc2, 0xb1, 0xe3, 0x4a, + 0x18, 0xa5, 0x42, 0xb1, 0xfe, 0xa3, 0x0e, 0xd4, 0xf9, 0xa2, 0x0e, 0xd4, + 0xf1, 0xa1, 0x0e, 0xd4, 0xe9, 0xa0, 0x0e, 0xd4, 0xe1, 0x9f, 0x0e, 0xd4, + 0xd8, 0x15, 0xc2, 0xb2, 0x16, 0x46, 0x17, 0x14, 0x42, 0xb2, 0x22, 0xc8, + 0x00, 0x6f, 0x0e, 0xd0, 0x48, 0xc9, 0x6e, 0x18, 0x0e, 0xd3, 0x71, 0xc5, + 0xda, 0x5b, 0x0e, 0xd3, 0x68, 0xc9, 0x65, 0x4f, 0x0e, 0xc8, 0xd1, 0x45, + 0x03, 0x14, 0x42, 0xb2, 0x2e, 0xc8, 0x3b, 0xec, 0x0e, 0xc8, 0xc1, 0xc6, + 0x24, 0x3b, 0x0e, 0xc8, 0xb0, 0xcc, 0x83, 0x61, 0x0e, 0xd4, 0x31, 0xc5, + 0xd8, 0x1c, 0x0e, 0xd4, 0x29, 0x42, 0x01, 0x7f, 0xc2, 0xb2, 0x3a, 0xc5, + 0xdb, 0x5a, 0x0e, 0xd4, 0x19, 0xc5, 0x48, 0x65, 0x0e, 0xd4, 0x10, 0xd0, + 0x60, 0x02, 0x0e, 0xd4, 0x01, 0xcf, 0x6a, 0xad, 0x0e, 0xd3, 0xf8, 0x47, + 0xc2, 0x2d, 0xc2, 0xb2, 0x46, 0xcb, 0x98, 0x0b, 0x0e, 0xd3, 0xb0, 0x00, + 0xc2, 0xb2, 0x62, 0xd2, 0x4d, 0xf9, 0x0e, 0xd2, 0x98, 0xd3, 0x40, 0xb3, + 0x0e, 0xd3, 0xa1, 0x4a, 0x18, 0xa5, 0x42, 0xb2, 0x6e, 0x47, 0x0f, 0x81, + 0xc2, 0xb2, 0x7a, 0xd3, 0x46, 0xef, 0x0e, 0xd2, 0xf1, 0xd4, 0x38, 0xcc, + 0x0e, 0xd2, 0xe9, 0x44, 0x08, 0xba, 0xc2, 0xb2, 0x86, 0xcc, 0x82, 0x95, + 0x0e, 0xd2, 0xd1, 0xd0, 0x5b, 0x22, 0x0e, 0xd2, 0xc8, 0xc7, 0x0b, 0xc8, + 0x0e, 0xc8, 0x39, 0xc8, 0x3b, 0xec, 0x0e, 0xc8, 0x31, 0xc6, 0x24, 0x3b, + 0x0e, 0xc8, 0x28, 0x00, 0x42, 0xb2, 0x92, 0xc3, 0x01, 0xc8, 0x0e, 0xd1, + 0x79, 0xc6, 0x04, 0xcb, 0x0e, 0xd1, 0x71, 0xc4, 0x08, 0xcb, 0x0e, 0xd1, + 0x68, 0xc7, 0xc4, 0xe9, 0x0e, 0xcc, 0x39, 0x49, 0xab, 0x01, 0x42, 0xb2, + 0xa4, 0x4b, 0x99, 0x4a, 0xc2, 0xb2, 0xb0, 0xc7, 0xc4, 0xe9, 0x0e, 0xca, + 0x89, 0x49, 0xab, 0x01, 0x42, 0xb2, 0xc2, 0x4a, 0x18, 0xa5, 0xc2, 0xb2, + 0xce, 0x4b, 0x40, 0xb3, 0x42, 0xb2, 0xdb, 0xca, 0x45, 0x02, 0x0e, 0xd1, + 0x01, 0xc4, 0x03, 0xc8, 0x0e, 0xd0, 0xf9, 0xc2, 0x02, 0xae, 0x0e, 0xd0, + 0xf0, 0xc4, 0x91, 0x78, 0x0e, 0xd0, 0xe9, 0x46, 0xca, 0xbb, 0x42, 0xb2, + 0xea, 0x44, 0x06, 0xa6, 0xc2, 0xb2, 0xf6, 0x45, 0x01, 0xce, 0xc2, 0xb3, + 0x02, 0xc6, 0x07, 0xa1, 0x0e, 0xd0, 0xb1, 0xc8, 0xba, 0x92, 0x0e, 0xd0, + 0xa9, 0xc4, 0x05, 0x75, 0x0e, 0xd0, 0xa0, 0xc4, 0x03, 0xc8, 0x0e, 0xd0, + 0x61, 0xc7, 0x81, 0x92, 0x0e, 0xd0, 0x59, 0xc2, 0x02, 0xae, 0x0e, 0xd0, + 0x50, 0x08, 0xc2, 0xb3, 0x0e, 0xc5, 0x01, 0x95, 0x0e, 0xc4, 0x2b, 0x02, + 0xb3, 0x20, 0x0a, 0xc2, 0xb3, 0x24, 0x05, 0xc2, 0xb3, 0x36, 0xc4, 0x38, + 0xc1, 0x0e, 0xc3, 0xba, 0x02, 0xb3, 0x4c, 0x48, 0x51, 0x1b, 0xc2, 0xb3, + 0x50, 0xc3, 0x18, 0x26, 0x0e, 0xd0, 0x00, 0xc6, 0xd0, 0x37, 0x0e, 0xd1, + 0xa1, 0xc7, 0xa9, 0x6d, 0x0e, 0xd1, 0x98, 0xc3, 0xe5, 0x35, 0x0e, 0xd3, + 0x49, 0x48, 0x17, 0x7c, 0xc2, 0xb3, 0x5a, 0x19, 0xc2, 0xb3, 0x66, 0x58, + 0x22, 0x2b, 0xc2, 0xb3, 0x72, 0x15, 0xc2, 0xb3, 0x84, 0x45, 0xd9, 0x57, + 0xc2, 0xb3, 0x90, 0x45, 0xd8, 0x76, 0xc2, 0xb3, 0x9c, 0x05, 0xc2, 0xb3, + 0xa8, 0x46, 0xcb, 0x0f, 0xc2, 0xb3, 0xc0, 0x47, 0x2e, 0x48, 0xc2, 0xb3, + 0xd2, 0x04, 0xc2, 0xb3, 0xe4, 0x47, 0x2c, 0x2e, 0xc2, 0xb3, 0xf0, 0x47, + 0x00, 0x58, 0x42, 0xb4, 0x02, 0xc3, 0xe5, 0x35, 0x0e, 0xd3, 0x41, 0x48, + 0x17, 0x7c, 0xc2, 0xb4, 0x17, 0x19, 0xc2, 0xb4, 0x23, 0x4b, 0x22, 0x2b, + 0xc2, 0xb4, 0x2f, 0x45, 0xd9, 0x57, 0xc2, 0xb4, 0x3b, 0x45, 0xd8, 0x76, + 0xc2, 0xb4, 0x56, 0x05, 0xc2, 0xb4, 0x6e, 0x15, 0xc2, 0xb4, 0x86, 0x46, + 0xcb, 0x0f, 0xc2, 0xb4, 0x92, 0x47, 0x2e, 0x48, 0xc2, 0xb4, 0xa4, 0x04, + 0xc2, 0xb4, 0xb6, 0x47, 0x2c, 0x2e, 0xc2, 0xb4, 0xc2, 0x47, 0x00, 0x58, + 0x42, 0xb4, 0xd7, 0x48, 0x0b, 0xc8, 0xc2, 0xb4, 0xec, 0x48, 0xbf, 0xc2, + 0xc2, 0xb4, 0xf8, 0x45, 0xd5, 0xf1, 0x42, 0xb5, 0x0d, 0xd5, 0x37, 0x19, + 0x0e, 0xc9, 0x39, 0x43, 0x11, 0x49, 0xc2, 0xb5, 0x22, 0xcf, 0x65, 0x49, + 0x0e, 0xc9, 0x20, 0xc6, 0x00, 0x58, 0x0e, 0xd2, 0xc1, 0xc6, 0x24, 0x3b, + 0x0e, 0xd2, 0xb8, 0xc6, 0x13, 0x67, 0x0e, 0xd2, 0xb1, 0x46, 0x17, 0x8d, + 0x42, 0xb5, 0x2e, 0x00, 0x42, 0xb5, 0x40, 0x00, 0x42, 0xb5, 0x4c, 0xc9, + 0x46, 0x70, 0x0e, 0xd2, 0x53, 0x02, 0xb5, 0x58, 0xc4, 0x38, 0xc1, 0x0e, + 0xd2, 0x3b, 0x02, 0xb5, 0x5c, 0xc8, 0xbe, 0x0a, 0x0e, 0xd2, 0x31, 0xc7, + 0x27, 0xb2, 0x0e, 0xd2, 0x29, 0xc6, 0x02, 0xd1, 0x0e, 0xd2, 0x20, 0x00, + 0x42, 0xb5, 0x60, 0x00, 0x42, 0xb5, 0x6c, 0xc2, 0x02, 0xae, 0x0e, 0xd0, + 0x81, 0xc4, 0x03, 0xc8, 0x0e, 0xd0, 0x68, 0xcb, 0x90, 0xbd, 0x0e, 0xcf, + 0xdb, 0x02, 0xb5, 0x78, 0xc3, 0x01, 0xc8, 0x0e, 0xcf, 0xc0, 0xc5, 0x17, + 0x14, 0x0e, 0xcf, 0xb1, 0xc5, 0x03, 0x13, 0x0e, 0xcf, 0xa8, 0x97, 0x08, + 0xae, 0xe8, 0x8b, 0x08, 0xae, 0xd0, 0xd6, 0x2e, 0x96, 0x08, 0xae, 0xc1, + 0x83, 0x08, 0xac, 0xf0, 0xc2, 0x00, 0xd0, 0x08, 0xac, 0xc9, 0x83, 0x08, + 0xac, 0xc0, 0x8e, 0x08, 0xac, 0x43, 0x02, 0xb5, 0x7e, 0x94, 0x08, 0xac, + 0x32, 0x02, 0xb5, 0x82, 0xc2, 0x00, 0xd0, 0x08, 0xac, 0xd9, 0x83, 0x08, + 0xac, 0xd0, 0x45, 0x00, 0x8c, 0xc2, 0xb5, 0x86, 0xcb, 0x99, 0x76, 0x08, + 0xae, 0x7a, 0x02, 0xb5, 0xaa, 0xc3, 0x01, 0x5d, 0x08, 0xae, 0x29, 0xc3, + 0x02, 0xa3, 0x08, 0xae, 0x20, 0xc4, 0x1e, 0x97, 0x08, 0xad, 0xf9, 0xc5, + 0x40, 0xe7, 0x08, 0xad, 0xf0, 0x8e, 0x05, 0x45, 0xe8, 0x94, 0x05, 0x45, + 0xd8, 0x94, 0x05, 0x44, 0x43, 0x02, 0xb5, 0xb0, 0x8e, 0x05, 0x44, 0x52, + 0x02, 0xb5, 0xb4, 0x83, 0x05, 0x44, 0xe1, 0xc2, 0x00, 0xd0, 0x05, 0x44, + 0xe8, 0x83, 0x05, 0x44, 0xf1, 0xc2, 0x00, 0xd0, 0x05, 0x44, 0xf8, 0xc2, + 0x02, 0xa0, 0x05, 0x46, 0x91, 0xc4, 0x02, 0xde, 0x05, 0x46, 0x98, 0xc3, + 0x09, 0x9e, 0x05, 0x46, 0xa1, 0xc3, 0x0d, 0x14, 0x05, 0x46, 0xa8, 0xc2, + 0x22, 0xcc, 0x05, 0x46, 0xb1, 0xc4, 0x18, 0x10, 0x05, 0x46, 0xb8, 0xe0, + 0x0a, 0x87, 0x0f, 0xb3, 0xb0, 0x4b, 0x94, 0x85, 0xc2, 0xb5, 0xb8, 0xc7, + 0x1b, 0x0c, 0x08, 0x8e, 0x40, 0xc7, 0xc3, 0x61, 0x08, 0x8e, 0xd9, 0xd4, + 0x39, 0xa8, 0x08, 0x8e, 0x79, 0xc5, 0x33, 0x5d, 0x08, 0x8e, 0x51, 0xcb, + 0x93, 0xf6, 0x08, 0x8e, 0x19, 0xcb, 0x8f, 0xe1, 0x08, 0x8e, 0x11, 0x03, + 0xc2, 0xb5, 0xc0, 0x42, 0x07, 0xb2, 0xc2, 0xb5, 0xcc, 0xcb, 0x1e, 0x89, + 0x08, 0x8c, 0x00, 0xc4, 0x26, 0x78, 0x08, 0x8e, 0xc9, 0xc5, 0x06, 0xdb, + 0x08, 0x8e, 0xc1, 0x15, 0xc2, 0xb5, 0xd8, 0x08, 0xc2, 0xb5, 0xe4, 0x16, + 0xc2, 0xb5, 0xf0, 0xc3, 0x05, 0x14, 0x08, 0x8e, 0x89, 0xc4, 0x15, 0xe7, + 0x08, 0x8e, 0x80, 0xcf, 0x61, 0x11, 0x08, 0x8e, 0x71, 0x03, 0xc2, 0xb5, + 0xfc, 0x91, 0x08, 0x8d, 0xf1, 0x87, 0x08, 0x8d, 0xe1, 0x48, 0xb2, 0x2d, + 0xc2, 0xb6, 0x08, 0x97, 0x08, 0x8d, 0xb3, 0x02, 0xb6, 0x16, 0x8b, 0x08, + 0x8d, 0xa2, 0x02, 0xb6, 0x1a, 0x83, 0x08, 0x8d, 0x89, 0xc2, 0x0d, 0xf6, + 0x08, 0x8d, 0x81, 0xc2, 0x00, 0xd0, 0x08, 0x8d, 0x78, 0x83, 0x08, 0x8d, + 0x71, 0x47, 0xb2, 0x2e, 0x42, 0xb6, 0x1e, 0xc2, 0x00, 0xdb, 0x08, 0x8d, + 0x69, 0x83, 0x08, 0x8d, 0x60, 0xc2, 0x00, 0xd0, 0x08, 0x8d, 0x41, 0x83, + 0x08, 0x8d, 0x38, 0xc2, 0x00, 0xd0, 0x08, 0x8d, 0x31, 0x83, 0x08, 0x8d, + 0x28, 0x83, 0x08, 0x8d, 0x21, 0xc2, 0x00, 0xc1, 0x08, 0x8c, 0xf9, 0xc2, + 0x19, 0x2c, 0x08, 0x8c, 0xd1, 0xc2, 0x01, 0x30, 0x08, 0x8c, 0xa8, 0xc2, + 0x00, 0xd0, 0x08, 0x8d, 0x19, 0x83, 0x08, 0x8d, 0x11, 0x06, 0x42, 0xb6, + 0x2c, 0xc2, 0x00, 0xd0, 0x08, 0x8d, 0x09, 0x83, 0x08, 0x8d, 0x01, 0x16, + 0x42, 0xb6, 0x36, 0xc2, 0x00, 0xd0, 0x08, 0x8c, 0xc9, 0x83, 0x08, 0x8c, + 0xc0, 0xc2, 0x00, 0xd0, 0x08, 0x8c, 0xb9, 0x83, 0x08, 0x8c, 0xb0, 0xc2, + 0x00, 0xd0, 0x08, 0x8c, 0xa1, 0x83, 0x08, 0x8c, 0x98, 0xc2, 0x00, 0xd0, + 0x08, 0x8c, 0x91, 0x83, 0x08, 0x8c, 0x88, 0x97, 0x08, 0x8c, 0x81, 0x8b, + 0x08, 0x8c, 0x71, 0x83, 0x08, 0x8c, 0x20, 0x97, 0x08, 0x8c, 0x40, 0x8b, + 0x08, 0x8c, 0x30, 0xc3, 0x00, 0x2d, 0x08, 0x22, 0xa1, 0xc2, 0x17, 0x28, + 0x08, 0x22, 0xf0, 0x96, 0x08, 0x23, 0x81, 0x94, 0x08, 0x23, 0xe8, 0x87, + 0x08, 0x23, 0xc1, 0xc3, 0x5d, 0x32, 0x08, 0x23, 0xe0, 0xcd, 0x55, 0x9a, + 0x01, 0x57, 0x41, 0xd5, 0x32, 0xab, 0x01, 0x57, 0x48, 0xe0, 0x06, 0x07, + 0x01, 0x5a, 0xf8, 0xc9, 0x1f, 0x5a, 0x01, 0x49, 0x31, 0xd4, 0x3c, 0x28, + 0x01, 0x49, 0x50, 0xc9, 0xb4, 0x5b, 0x01, 0x0f, 0x91, 0xc9, 0x1f, 0x5a, + 0x01, 0x49, 0x29, 0xd4, 0x3c, 0xa0, 0x01, 0x49, 0x49, 0xd9, 0x20, 0x5d, + 0x01, 0x49, 0x68, 0xca, 0x9d, 0x06, 0x01, 0x37, 0xb1, 0xc2, 0x01, 0xbb, + 0x01, 0x1e, 0x68, 0x0e, 0xc2, 0xb6, 0x40, 0x46, 0x02, 0xae, 0xc2, 0xb6, + 0x4c, 0xd0, 0x5d, 0x52, 0x01, 0x2f, 0x41, 0xd8, 0x24, 0x0b, 0x01, 0x2d, + 0x49, 0xda, 0x1c, 0xd4, 0x01, 0x2d, 0x31, 0xcd, 0x7a, 0x93, 0x01, 0x2d, + 0x29, 0xcf, 0x64, 0xe0, 0x01, 0x2d, 0x21, 0xd1, 0x4f, 0xbe, 0x01, 0x4f, + 0x01, 0xce, 0x74, 0xda, 0x01, 0x58, 0x91, 0xd1, 0x53, 0xba, 0x01, 0x58, + 0x98, 0xc5, 0x0a, 0xe2, 0x01, 0x18, 0x89, 0x89, 0x01, 0x9e, 0x90, 0x44, + 0x1a, 0x16, 0x42, 0xb6, 0x58, 0x44, 0x1a, 0x16, 0x42, 0xb6, 0x64, 0xc4, + 0x78, 0x47, 0x01, 0x98, 0x21, 0xc2, 0x00, 0x43, 0x01, 0x98, 0x28, 0x92, + 0x01, 0x14, 0x99, 0x8e, 0x01, 0x9c, 0x40, 0xc9, 0xad, 0x0b, 0x01, 0x9b, + 0xf8, 0x00, 0x42, 0xb6, 0x70, 0xd5, 0x35, 0x8a, 0x01, 0x56, 0x71, 0xc5, + 0xd5, 0x06, 0x01, 0x9a, 0x89, 0xc2, 0x00, 0x39, 0x01, 0x9a, 0x90, 0xc3, + 0x71, 0xec, 0x01, 0x9a, 0x99, 0xc5, 0xd8, 0xf8, 0x01, 0x9a, 0xa0, 0xc2, + 0x14, 0x48, 0x01, 0x9a, 0xa9, 0xc6, 0xcc, 0x0b, 0x01, 0x9a, 0xb0, 0xc7, + 0x04, 0x32, 0x01, 0x9d, 0x72, 0x02, 0xb6, 0x7c, 0xc3, 0x19, 0x86, 0x01, + 0x99, 0x50, 0xc6, 0xca, 0x8b, 0x01, 0x99, 0x91, 0xc4, 0xe1, 0x3f, 0x01, + 0x99, 0x99, 0xc3, 0x00, 0xea, 0x01, 0x99, 0xa8, 0xc7, 0xc8, 0x8c, 0x01, + 0x99, 0xb1, 0xc4, 0xde, 0xfb, 0x01, 0x99, 0xc8, 0x90, 0x01, 0x99, 0xf9, + 0x11, 0x42, 0xb6, 0x82, 0x83, 0x01, 0x9b, 0x88, 0xc3, 0x14, 0xc6, 0x01, + 0x99, 0x20, 0x00, 0x42, 0xb6, 0x8c, 0xd0, 0x5f, 0x32, 0x01, 0x5e, 0x81, + 0xc4, 0x0f, 0xd7, 0x01, 0x99, 0xe9, 0xc3, 0x2d, 0x61, 0x01, 0x9a, 0x00, + 0x03, 0xc2, 0xb6, 0x98, 0xc5, 0xd6, 0x64, 0x01, 0x9c, 0x00, 0xc7, 0xc4, + 0xbf, 0x01, 0x99, 0x71, 0x0d, 0x42, 0xb6, 0xa4, 0xc2, 0x00, 0xfb, 0x01, + 0x99, 0xb9, 0x10, 0xc2, 0xb6, 0xae, 0xc3, 0x90, 0x19, 0x01, 0x99, 0xd8, + 0x89, 0x01, 0x96, 0x69, 0x47, 0xc0, 0xe4, 0x42, 0xb6, 0xba, 0xc3, 0x02, + 0x30, 0x01, 0x98, 0x59, 0x14, 0x42, 0xb6, 0xd8, 0xc6, 0xd2, 0x29, 0x01, + 0x98, 0xa9, 0xc7, 0xc3, 0xd1, 0x01, 0x98, 0xb1, 0xc5, 0xdb, 0xaa, 0x01, + 0x98, 0xb8, 0xc6, 0xcc, 0xfb, 0x01, 0x98, 0xd1, 0xc4, 0xe4, 0x27, 0x01, + 0x98, 0xd8, 0xc4, 0xdf, 0x4f, 0x01, 0x98, 0xe9, 0xc3, 0x79, 0x25, 0x01, + 0x98, 0xf0, 0x00, 0x42, 0xb6, 0xe4, 0xc3, 0x01, 0xe7, 0x01, 0x98, 0x71, + 0xc3, 0x51, 0xee, 0x01, 0x98, 0x79, 0x8e, 0x01, 0x9f, 0xf8, 0xc2, 0x01, + 0x30, 0x01, 0x98, 0x81, 0xc3, 0xe6, 0x5f, 0x01, 0x98, 0x89, 0xc5, 0xdc, + 0xae, 0x01, 0x98, 0x98, 0xc3, 0x0f, 0xd9, 0x01, 0x98, 0xc8, 0xc5, 0xd7, + 0xd6, 0x01, 0x98, 0xf9, 0xc6, 0xcb, 0xff, 0x01, 0x99, 0x00, 0x8b, 0x01, + 0x99, 0x11, 0x91, 0x01, 0x99, 0x18, 0xc2, 0x00, 0x10, 0x01, 0x99, 0x40, + 0xc5, 0xd6, 0xbe, 0x01, 0x99, 0x69, 0x94, 0x01, 0x9b, 0xa0, 0x0b, 0xc2, + 0xb6, 0xee, 0xc3, 0xe6, 0x71, 0x01, 0x9a, 0x29, 0xc4, 0xdf, 0x7b, 0x01, + 0x9a, 0x31, 0xc5, 0xda, 0x06, 0x01, 0x9a, 0x38, 0xc5, 0xdd, 0xdf, 0x01, + 0x9a, 0x41, 0xc2, 0x00, 0x2c, 0x01, 0x9a, 0x4b, 0x02, 0xb6, 0xfa, 0x8e, + 0x01, 0x9e, 0xa8, 0xc2, 0x01, 0x30, 0x01, 0x9a, 0x5b, 0x02, 0xb7, 0x00, + 0xc5, 0xc3, 0xd3, 0x01, 0x9a, 0x68, 0x88, 0x01, 0x9c, 0x61, 0x89, 0x01, + 0x9c, 0x69, 0x83, 0x01, 0x9c, 0x11, 0x8e, 0x01, 0x9c, 0xa9, 0x8f, 0x01, + 0x9c, 0xd9, 0x95, 0x01, 0x9d, 0x91, 0x98, 0x01, 0x9d, 0xb1, 0x99, 0x01, + 0x9d, 0xe0, 0x11, 0xc2, 0xb7, 0x06, 0xc7, 0x0b, 0x09, 0x01, 0x9d, 0x09, + 0xc5, 0xd9, 0x11, 0x01, 0x9d, 0x28, 0xc6, 0x03, 0x12, 0x01, 0x9e, 0xa0, + 0x00, 0x42, 0xb7, 0x15, 0xc5, 0x6d, 0xb4, 0x01, 0x9d, 0xc8, 0xc5, 0x6d, + 0xb4, 0x01, 0x9d, 0xf8, 0xc2, 0x00, 0x58, 0x01, 0x9a, 0x71, 0xc2, 0x17, + 0x99, 0x01, 0x9a, 0x78, 0x46, 0x19, 0xbb, 0xc2, 0xb7, 0x21, 0xc6, 0xd0, + 0xa3, 0x0f, 0x8d, 0x48, 0xce, 0x6e, 0x20, 0x0f, 0x8d, 0x29, 0x4f, 0x0b, + 0x17, 0x42, 0xb7, 0x2d, 0xcd, 0x7b, 0xf2, 0x0f, 0x8d, 0x09, 0xcb, 0x97, + 0x66, 0x0f, 0x8c, 0xe0, 0xc2, 0x00, 0x06, 0x0f, 0x90, 0x99, 0xc2, 0x0d, + 0xf6, 0x0f, 0x90, 0x11, 0xc4, 0xe0, 0xb7, 0x0f, 0x90, 0x08, 0xd2, 0x48, + 0xfb, 0x0f, 0x8d, 0x11, 0xc3, 0x28, 0xa9, 0x0f, 0x8c, 0xe8, 0x26, 0xc2, + 0xb7, 0x95, 0x22, 0xc2, 0xb7, 0xa1, 0x24, 0xc2, 0xb7, 0xd5, 0x23, 0xc2, + 0xb7, 0xf1, 0x25, 0xc2, 0xb8, 0x15, 0x42, 0xe6, 0x8f, 0x42, 0xb8, 0x27, + 0x8d, 0x0f, 0x8c, 0xf1, 0xcf, 0x05, 0x18, 0x01, 0x71, 0x60, 0xc9, 0x2a, + 0xec, 0x01, 0x21, 0x28, 0xc4, 0x09, 0x9d, 0x01, 0x20, 0xa1, 0x16, 0xc2, + 0xb8, 0x3d, 0xc3, 0x05, 0x14, 0x01, 0x20, 0x88, 0xc6, 0x01, 0xdb, 0x01, + 0x20, 0xc9, 0x16, 0x42, 0xb8, 0x49, 0xc3, 0x1d, 0x35, 0x00, 0x43, 0x51, + 0x42, 0x02, 0xa7, 0xc2, 0xb8, 0x58, 0xc2, 0x00, 0x39, 0x00, 0x43, 0x39, + 0xc3, 0x39, 0x6d, 0x00, 0x43, 0x31, 0x10, 0xc2, 0xb8, 0x62, 0xc3, 0x1f, + 0xdf, 0x00, 0x43, 0x19, 0xc2, 0x25, 0x3b, 0x00, 0x43, 0x08, 0xc7, 0xc2, + 0xf8, 0x00, 0x39, 0x79, 0xc6, 0xce, 0xdb, 0x00, 0x39, 0x71, 0xc5, 0xd7, + 0xae, 0x00, 0x39, 0x68, 0xc9, 0xad, 0xa4, 0x00, 0x38, 0xe0, 0xc2, 0x14, + 0xda, 0x00, 0x3a, 0x79, 0xc5, 0xdc, 0xe5, 0x00, 0x3a, 0x71, 0xc5, 0xd4, + 0x20, 0x00, 0x3a, 0x68, 0xc5, 0x05, 0x02, 0x00, 0x39, 0xd9, 0xc5, 0x00, + 0xd4, 0x00, 0x39, 0xd0, 0x48, 0x84, 0x8d, 0x42, 0xb8, 0x72, 0xcc, 0x84, + 0x8d, 0x00, 0x38, 0x40, 0xd1, 0x55, 0x52, 0x01, 0x14, 0x59, 0xcb, 0x23, + 0xa0, 0x01, 0x14, 0x3b, 0x02, 0xb8, 0x7e, 0x46, 0x00, 0xd4, 0x42, 0xb8, + 0x84, 0xc4, 0x0e, 0xa6, 0x01, 0x56, 0xa1, 0xc6, 0x2d, 0xd0, 0x01, 0x56, + 0xb0, 0x90, 0x01, 0x03, 0xf9, 0x8b, 0x01, 0x03, 0x88, 0x8f, 0x00, 0xdd, + 0xf9, 0x8d, 0x00, 0xdd, 0xf0, 0x09, 0xc2, 0xb8, 0x9c, 0xc5, 0xd4, 0xc0, + 0x00, 0xdc, 0x00, 0xcf, 0x33, 0xad, 0x01, 0x56, 0x18, 0xcb, 0x0e, 0xbd, + 0x01, 0x56, 0x29, 0xce, 0x33, 0x92, 0x01, 0x56, 0x39, 0xcf, 0x6a, 0x8f, + 0x01, 0x56, 0x49, 0xcc, 0x24, 0x47, 0x01, 0x56, 0x58, 0x45, 0x02, 0x9a, + 0x42, 0xb8, 0xa8, 0xc3, 0x3b, 0x36, 0x0f, 0xb0, 0x39, 0xc4, 0x75, 0x6e, + 0x0f, 0xb0, 0x41, 0xd0, 0x55, 0x0f, 0x0f, 0xb0, 0x68, 0xcb, 0x1d, 0x4b, + 0x0f, 0xb0, 0x53, 0x02, 0xb8, 0xba, 0xc9, 0xb4, 0xd0, 0x0f, 0xb0, 0x70, + 0x45, 0x00, 0x8c, 0xc2, 0xb8, 0xc0, 0xc9, 0xb4, 0x49, 0x01, 0x10, 0x68, + 0x83, 0x07, 0xf2, 0x81, 0xc9, 0xb4, 0x64, 0x07, 0xf3, 0x58, 0x46, 0x00, + 0x8b, 0x42, 0xb8, 0xcc, 0xc3, 0x05, 0x14, 0x01, 0x0b, 0x83, 0x02, 0xb8, + 0xd8, 0x08, 0xc2, 0xb8, 0xdc, 0x16, 0xc2, 0xb8, 0xe6, 0x07, 0xc2, 0xb8, + 0xf6, 0xc4, 0x26, 0x78, 0x01, 0x0b, 0xc1, 0x15, 0x42, 0xb9, 0x02, 0xcb, + 0x1a, 0x50, 0x07, 0xf2, 0xd1, 0xd6, 0x08, 0x88, 0x07, 0xf2, 0xf1, 0xcd, + 0x00, 0x32, 0x07, 0xf2, 0xe0, 0xcb, 0x1a, 0x50, 0x07, 0xf2, 0xc9, 0xcd, + 0x00, 0x32, 0x07, 0xf2, 0xd9, 0xd6, 0x08, 0x88, 0x07, 0xf2, 0xe8, 0xcb, + 0x0e, 0xbd, 0x01, 0x55, 0x79, 0xcc, 0x24, 0x47, 0x01, 0x55, 0x88, 0xc8, + 0x07, 0x5f, 0x01, 0x55, 0xa9, 0xcf, 0x6a, 0x8f, 0x01, 0x55, 0xc8, 0xcb, + 0x1a, 0x50, 0x07, 0xf1, 0xa9, 0xd6, 0x08, 0x88, 0x07, 0xf1, 0xc9, 0xd8, + 0x21, 0x83, 0x07, 0xf1, 0xd9, 0xd4, 0x38, 0xf4, 0x07, 0xf1, 0xe9, 0xcd, + 0x0b, 0x91, 0x07, 0xf1, 0xf9, 0x46, 0x01, 0xfc, 0xc2, 0xb9, 0x0e, 0xce, + 0x25, 0xad, 0x07, 0xf2, 0x39, 0x05, 0x42, 0xb9, 0x1a, 0xcc, 0x00, 0x33, + 0x07, 0xf1, 0xc1, 0xcd, 0x69, 0x65, 0x07, 0xf2, 0x10, 0x4e, 0x21, 0x89, + 0xc2, 0xb9, 0x26, 0xce, 0x69, 0x64, 0x07, 0xf2, 0x20, 0xc6, 0xcf, 0x05, + 0x0f, 0x85, 0x11, 0xc6, 0x78, 0x78, 0x0f, 0x85, 0x91, 0xc8, 0xba, 0x2a, + 0x0f, 0x86, 0x11, 0xc5, 0xdd, 0x49, 0x0f, 0x86, 0x90, 0xc6, 0xcf, 0x05, + 0x0f, 0x85, 0x19, 0xc6, 0x78, 0x78, 0x0f, 0x85, 0x99, 0xc8, 0xba, 0x2a, + 0x0f, 0x86, 0x19, 0xc5, 0xdd, 0x49, 0x0f, 0x86, 0x98, 0xc6, 0xcf, 0x05, + 0x0f, 0x85, 0x51, 0xc6, 0x78, 0x78, 0x0f, 0x85, 0xd1, 0xc8, 0xba, 0x2a, + 0x0f, 0x86, 0x51, 0xc5, 0xdd, 0x49, 0x0f, 0x86, 0xd0, 0x9e, 0x0f, 0x87, + 0x0b, 0x02, 0xb9, 0x32, 0x9f, 0x0f, 0x87, 0x13, 0x02, 0xb9, 0x5a, 0xa0, + 0x0f, 0x87, 0x19, 0xa1, 0x0f, 0x87, 0x21, 0xa2, 0x0f, 0x87, 0x29, 0xa3, + 0x0f, 0x87, 0x31, 0xa4, 0x0f, 0x87, 0x39, 0xa5, 0x0f, 0x87, 0x41, 0xa6, + 0x0f, 0x87, 0x48, 0x46, 0xc5, 0x7d, 0xc2, 0xb9, 0x62, 0xc2, 0x00, 0x95, + 0x0f, 0x87, 0x00, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x29, 0xc6, 0x78, 0x78, + 0x0f, 0x85, 0xa9, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x29, 0xc5, 0xdd, 0x49, + 0x0f, 0x86, 0xa8, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x31, 0xc6, 0x78, 0x78, + 0x0f, 0x85, 0xb1, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x31, 0xc5, 0xdd, 0x49, + 0x0f, 0x86, 0xb0, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x39, 0xc6, 0x78, 0x78, + 0x0f, 0x85, 0xb9, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x39, 0xc5, 0xdd, 0x49, + 0x0f, 0x86, 0xb8, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x61, 0xc6, 0x78, 0x78, + 0x0f, 0x85, 0xe1, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x61, 0xc5, 0xdd, 0x49, + 0x0f, 0x86, 0xe0, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x71, 0xc6, 0x78, 0x78, + 0x0f, 0x85, 0xf1, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x71, 0xc5, 0xdd, 0x49, + 0x0f, 0x86, 0xf0, 0xc8, 0x01, 0x92, 0x01, 0x51, 0xc9, 0xd1, 0x51, 0x56, + 0x01, 0x51, 0x71, 0xd0, 0x5b, 0x92, 0x01, 0x51, 0x68, 0xce, 0x6b, 0x8e, + 0x01, 0x51, 0x41, 0x15, 0xc2, 0xb9, 0x7a, 0x46, 0x33, 0x92, 0xc2, 0xb9, + 0x86, 0xc9, 0x0e, 0x6e, 0x01, 0x51, 0x29, 0xd7, 0x26, 0x60, 0x01, 0x51, + 0x18, 0xc2, 0x02, 0xae, 0x00, 0x04, 0x61, 0xc8, 0xbd, 0x3a, 0x00, 0x04, + 0x61, 0xc4, 0x03, 0xc8, 0x00, 0x04, 0x59, 0xc7, 0x27, 0xb2, 0x00, 0x04, + 0x58, 0xc3, 0x18, 0x13, 0x01, 0x24, 0x39, 0xc3, 0x22, 0x45, 0x01, 0x23, + 0xf8, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0x70, 0xc2, 0x00, 0xd3, 0x01, 0x90, + 0xc0, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0x80, 0xc2, 0x00, 0xd3, 0x01, 0x90, + 0xc8, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0x98, 0xc2, 0x00, 0xd3, 0x01, 0x90, + 0xd0, 0x00, 0x42, 0xb9, 0x92, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0xb8, 0xc2, + 0x00, 0x5f, 0x01, 0x91, 0x21, 0xc2, 0x01, 0x19, 0x01, 0x91, 0x59, 0xc7, + 0xc4, 0xf0, 0x01, 0x91, 0xb0, 0xc3, 0x18, 0x11, 0x01, 0x91, 0x31, 0xc2, + 0x01, 0xd0, 0x01, 0x92, 0x10, 0x90, 0x01, 0x91, 0x81, 0xc7, 0xc8, 0x54, + 0x01, 0x91, 0xe0, 0xc3, 0x04, 0x20, 0x01, 0x91, 0x89, 0xc3, 0xe5, 0x0f, + 0x01, 0x91, 0xd8, 0xc5, 0x53, 0x93, 0x01, 0x91, 0xf1, 0x96, 0x01, 0x92, + 0x08, 0xc6, 0x26, 0xf6, 0x08, 0xd7, 0xb0, 0x9b, 0x08, 0xd7, 0x21, 0x90, + 0x08, 0xd7, 0x03, 0x02, 0xb9, 0x9a, 0x99, 0x08, 0xd7, 0x11, 0x8e, 0x08, + 0xd7, 0x09, 0x8f, 0x08, 0xd6, 0xf9, 0x96, 0x08, 0xd6, 0xf1, 0x8d, 0x08, + 0xd6, 0xe9, 0x92, 0x08, 0xd6, 0xe0, 0xc6, 0x26, 0xf6, 0x08, 0xd7, 0x68, + 0x19, 0xc2, 0xb9, 0x9e, 0xc2, 0x00, 0xc4, 0x08, 0x43, 0xf1, 0xc4, 0x02, + 0xde, 0x08, 0x43, 0xd8, 0xc3, 0x0d, 0x14, 0x08, 0x43, 0xe9, 0xc3, 0x09, + 0x9e, 0x08, 0x43, 0xe0, 0x16, 0xc2, 0xb9, 0xa8, 0x15, 0xc2, 0xb9, 0xb4, + 0xc4, 0x5d, 0xe2, 0x08, 0x43, 0xa1, 0xc4, 0xb9, 0x7e, 0x08, 0x43, 0x99, + 0xc2, 0x00, 0x67, 0x08, 0x43, 0x89, 0x03, 0xc2, 0xb9, 0xbe, 0xc3, 0x20, + 0x18, 0x08, 0x43, 0x71, 0xc9, 0xb3, 0x5f, 0x08, 0x43, 0x69, 0xc3, 0x00, + 0x4e, 0x08, 0x43, 0x61, 0xc6, 0xcf, 0xd7, 0x08, 0x43, 0x59, 0xc4, 0xe0, + 0xe7, 0x08, 0x43, 0x51, 0xc4, 0x4a, 0xb9, 0x08, 0x43, 0x49, 0xc2, 0x01, + 0x7f, 0x08, 0x43, 0x23, 0x02, 0xb9, 0xca, 0xc5, 0x4a, 0xb3, 0x08, 0x43, + 0x31, 0xc3, 0x7e, 0x89, 0x08, 0x43, 0x29, 0xc6, 0x40, 0x9a, 0x08, 0x43, + 0x19, 0xc5, 0x9c, 0xa2, 0x08, 0x43, 0x11, 0xc4, 0xe3, 0x27, 0x08, 0x43, + 0x08, 0xc2, 0x15, 0xb0, 0x0b, 0x5c, 0x69, 0xc2, 0x00, 0x03, 0x0b, 0x5c, + 0x31, 0xc4, 0x9f, 0x7d, 0x0b, 0x5b, 0xe8, 0xc3, 0xa6, 0x62, 0x0b, 0x59, + 0x59, 0xc3, 0x48, 0x8d, 0x0b, 0x58, 0xe8, 0xc5, 0xd6, 0x23, 0x0b, 0x5b, + 0xa8, 0xc4, 0xe0, 0x3f, 0x0b, 0x59, 0xf9, 0xc3, 0x49, 0x2f, 0x0b, 0x59, + 0xf1, 0xc3, 0x79, 0xe7, 0x0b, 0x59, 0xe9, 0xc5, 0xda, 0x38, 0x0b, 0x59, + 0xe0, 0xc3, 0x44, 0x23, 0x0b, 0x59, 0xd1, 0xc2, 0x00, 0x7a, 0x0b, 0x59, + 0xb8, 0xc8, 0xbe, 0x3a, 0x0b, 0x5b, 0x01, 0xc9, 0x4b, 0x94, 0x0b, 0x5a, + 0xe8, 0x04, 0xc2, 0xb9, 0xd0, 0xcc, 0x87, 0x09, 0x0f, 0xb2, 0x79, 0xcc, + 0x85, 0xc5, 0x0f, 0xb2, 0x71, 0xc9, 0xa8, 0x31, 0x0f, 0xce, 0xa9, 0xc5, + 0xda, 0x01, 0x0f, 0xd6, 0x28, 0xe0, 0x07, 0x67, 0x0f, 0xb2, 0x60, 0xcb, + 0x92, 0x6a, 0x0f, 0xce, 0xb1, 0xce, 0x6e, 0x12, 0x0f, 0xce, 0xc0, 0x91, + 0x08, 0x48, 0xd1, 0xc4, 0x18, 0x12, 0x08, 0x48, 0xc0, 0xc9, 0x1e, 0x8b, + 0x05, 0x43, 0x98, 0x83, 0x05, 0x42, 0x81, 0xc2, 0x00, 0xd0, 0x05, 0x42, + 0x88, 0x83, 0x05, 0x43, 0x49, 0xc2, 0x00, 0xd0, 0x05, 0x43, 0x50, 0xc2, + 0x01, 0x4a, 0x05, 0x43, 0x39, 0xc2, 0x19, 0x2c, 0x05, 0x43, 0x41, 0xc2, + 0x00, 0x39, 0x05, 0x43, 0x88, 0xd4, 0x38, 0xe0, 0x08, 0x0f, 0xe8, 0xc4, + 0x1e, 0x97, 0x00, 0x4a, 0x69, 0xc5, 0x40, 0xe7, 0x00, 0x48, 0x18, 0xc7, + 0x7a, 0x7f, 0x00, 0x49, 0xe9, 0xc7, 0x14, 0x39, 0x00, 0x48, 0x10, 0x00, + 0x42, 0xb9, 0xdc, 0xc6, 0xc3, 0x62, 0x05, 0x47, 0xe1, 0xd2, 0x4a, 0x87, + 0x05, 0x47, 0x90, 0x94, 0x00, 0x4a, 0x20, 0x8e, 0x00, 0x4b, 0x18, 0x87, + 0x00, 0x4a, 0xb8, 0x83, 0x00, 0x49, 0xb1, 0x44, 0x2e, 0xf0, 0x42, 0xb9, + 0xec, 0x8e, 0x00, 0x48, 0x63, 0x02, 0xb9, 0xf8, 0x94, 0x00, 0x48, 0x5a, + 0x02, 0xb9, 0xfc, 0xc2, 0x00, 0xdb, 0x00, 0x49, 0xa1, 0x83, 0x00, 0x49, + 0x98, 0xc2, 0x00, 0xc1, 0x00, 0x49, 0x49, 0x83, 0x00, 0x49, 0x18, 0xc2, + 0x00, 0xd0, 0x00, 0x49, 0x11, 0x83, 0x00, 0x49, 0x09, 0x06, 0x42, 0xba, + 0x00, 0xc2, 0x00, 0xd0, 0x00, 0x49, 0x01, 0x83, 0x00, 0x48, 0xf8, 0x45, + 0xc7, 0x7d, 0x42, 0xba, 0x0a, 0x83, 0x00, 0x48, 0xc1, 0xc2, 0x00, 0xd0, + 0x00, 0x4a, 0xd0, 0x83, 0x00, 0x48, 0xb1, 0xc2, 0x00, 0xd0, 0x00, 0x4a, + 0xc8, 0x87, 0x00, 0x4b, 0xb8, 0xc4, 0x18, 0x10, 0x00, 0x4b, 0x69, 0xc2, + 0x22, 0xcc, 0x00, 0x4b, 0x60, 0xc3, 0x0d, 0x14, 0x00, 0x4b, 0x59, 0xc3, + 0x09, 0x9e, 0x00, 0x4b, 0x50, 0xc4, 0x02, 0xde, 0x00, 0x4b, 0x49, 0xc2, + 0x02, 0xa0, 0x00, 0x4b, 0x40, 0x8b, 0x08, 0x20, 0x01, 0x83, 0x08, 0x20, + 0x13, 0x02, 0xba, 0x16, 0x91, 0x08, 0x20, 0x23, 0x02, 0xba, 0x1a, 0x87, + 0x08, 0x20, 0x08, 0x8b, 0x08, 0x20, 0x31, 0x87, 0x08, 0x20, 0x39, 0x83, + 0x08, 0x20, 0x43, 0x02, 0xba, 0x1e, 0x91, 0x08, 0x20, 0x52, 0x02, 0xba, + 0x22, 0x99, 0x08, 0x20, 0x69, 0x8b, 0x08, 0x21, 0x30, 0xc2, 0x02, 0xe0, + 0x08, 0x20, 0x99, 0xc3, 0x0e, 0x65, 0x08, 0x20, 0xe0, 0x88, 0x08, 0x20, + 0xc9, 0xc2, 0x00, 0x8e, 0x08, 0x20, 0xd9, 0x95, 0x08, 0x20, 0xeb, 0x02, + 0xba, 0x26, 0x94, 0x08, 0x21, 0x09, 0x8e, 0x08, 0x21, 0x11, 0x8f, 0x08, + 0x21, 0x19, 0x90, 0x08, 0x21, 0x23, 0x02, 0xba, 0x2a, 0x99, 0x08, 0x21, + 0x38, 0xc2, 0x02, 0xe0, 0x08, 0x20, 0xf1, 0xc3, 0x0e, 0x65, 0x08, 0x21, + 0x00, 0x8b, 0x08, 0x21, 0x41, 0x87, 0x08, 0x21, 0x49, 0x83, 0x08, 0x21, + 0x53, 0x02, 0xba, 0x2e, 0x91, 0x08, 0x21, 0x62, 0x02, 0xba, 0x32, 0x8b, + 0x08, 0x21, 0x71, 0x87, 0x08, 0x21, 0x79, 0x83, 0x08, 0x21, 0x83, 0x02, + 0xba, 0x36, 0x91, 0x08, 0x21, 0x92, 0x02, 0xba, 0x3a, 0x99, 0x08, 0x21, + 0xa9, 0x8b, 0x08, 0x22, 0x70, 0xc2, 0x02, 0xe0, 0x08, 0x21, 0xd9, 0xc3, + 0x0e, 0x65, 0x08, 0x22, 0x20, 0x88, 0x08, 0x22, 0x09, 0xc2, 0x00, 0x8e, + 0x08, 0x22, 0x19, 0x95, 0x08, 0x22, 0x2b, 0x02, 0xba, 0x3e, 0x94, 0x08, + 0x22, 0x49, 0x8e, 0x08, 0x22, 0x51, 0x8f, 0x08, 0x22, 0x59, 0x90, 0x08, + 0x22, 0x63, 0x02, 0xba, 0x42, 0x99, 0x08, 0x22, 0x78, 0xc2, 0x02, 0xe0, + 0x08, 0x22, 0x31, 0xc3, 0x0e, 0x65, 0x08, 0x22, 0x40, 0xc9, 0x11, 0xf6, + 0x01, 0x24, 0x71, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x40, 0xc9, 0x11, 0xf6, + 0x01, 0x24, 0x69, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x38, 0xc9, 0x11, 0xf6, + 0x01, 0x24, 0x61, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x30, 0xc9, 0x11, 0xf6, + 0x01, 0x24, 0x59, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x28, 0xc9, 0x11, 0xf6, + 0x01, 0x24, 0x51, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x20, 0xc9, 0x11, 0xf6, + 0x01, 0x24, 0x49, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x18, 0xc4, 0x18, 0x10, + 0x08, 0xca, 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0xca, 0xb0, 0xc3, 0x0d, 0x14, + 0x08, 0xca, 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0xca, 0xa0, 0xc4, 0x02, 0xde, + 0x08, 0xca, 0x99, 0xc2, 0x02, 0xa0, 0x08, 0xca, 0x90, 0x8b, 0x08, 0xc9, + 0xb9, 0x83, 0x08, 0xc9, 0x80, 0x97, 0x08, 0xc9, 0xa0, 0x8b, 0x08, 0xc9, + 0x90, 0xc2, 0x00, 0xd0, 0x08, 0xc8, 0xc9, 0x83, 0x08, 0xc8, 0xc0, 0xc4, + 0x18, 0x10, 0x01, 0x3c, 0x81, 0xc2, 0x22, 0xcc, 0x01, 0x3c, 0x78, 0xc3, + 0x0d, 0x14, 0x01, 0x3c, 0x71, 0xc3, 0x09, 0x9e, 0x01, 0x3c, 0x68, 0xc4, + 0x02, 0xde, 0x01, 0x3c, 0x61, 0xc2, 0x02, 0xa0, 0x01, 0x3c, 0x58, 0x45, + 0x01, 0x95, 0xc2, 0xba, 0x46, 0xc9, 0x61, 0x53, 0x01, 0x48, 0x58, 0xcd, + 0x7e, 0x3b, 0x01, 0x0d, 0x09, 0x46, 0x01, 0x9a, 0x42, 0xba, 0x52, 0xc5, + 0x01, 0xa2, 0x0f, 0xc2, 0x39, 0xd0, 0x58, 0x62, 0x0f, 0xc2, 0x18, 0x44, + 0x00, 0x49, 0xc2, 0xba, 0x58, 0x45, 0x00, 0x2c, 0x42, 0xba, 0x62, 0x00, + 0x42, 0xba, 0x6c, 0xca, 0xa3, 0x64, 0x01, 0x27, 0xf1, 0x46, 0x09, 0x97, + 0x42, 0xba, 0x8a, 0x00, 0x42, 0xba, 0xa8, 0xc6, 0x2d, 0xd0, 0x01, 0x16, + 0x89, 0xc4, 0x0e, 0xa6, 0x01, 0x16, 0x81, 0xc6, 0xb7, 0x74, 0x01, 0x55, + 0xe1, 0xcd, 0x6c, 0x99, 0x01, 0x72, 0x20, 0xc5, 0x13, 0x84, 0x01, 0x52, + 0x79, 0xcc, 0x06, 0xbb, 0x01, 0x52, 0x70, 0xcd, 0x68, 0xc0, 0x01, 0x57, + 0x61, 0xcb, 0x8d, 0x42, 0x01, 0x72, 0x48, 0xc3, 0x03, 0x4e, 0x01, 0x01, + 0x9b, 0x02, 0xba, 0xb4, 0xc6, 0xbf, 0x4c, 0x01, 0x55, 0xd8, 0x19, 0xc2, + 0xba, 0xba, 0x46, 0x19, 0xbb, 0x42, 0xba, 0xc4, 0xce, 0x55, 0x99, 0x01, + 0x55, 0x18, 0x46, 0x03, 0x13, 0xc2, 0xba, 0xd0, 0xc9, 0xb2, 0xbd, 0x01, + 0x0a, 0x28, 0x92, 0x01, 0x08, 0xcb, 0x02, 0xba, 0xe0, 0xc5, 0x51, 0x51, + 0x01, 0x09, 0xf1, 0x9c, 0x01, 0x09, 0x21, 0x94, 0x01, 0x08, 0xe9, 0x93, + 0x01, 0x08, 0xd1, 0x90, 0x01, 0x08, 0xa9, 0x8a, 0x01, 0x08, 0x69, 0x85, + 0x01, 0x08, 0x10, 0xc5, 0x51, 0x51, 0x01, 0x09, 0xe9, 0xc2, 0x0b, 0x19, + 0x01, 0x09, 0xe0, 0xc9, 0x00, 0xca, 0x01, 0x54, 0xc9, 0xcc, 0x07, 0xc7, + 0x01, 0x54, 0xd0, 0x4c, 0x24, 0xe3, 0xc2, 0xba, 0xe4, 0xd5, 0x38, 0x3f, + 0x01, 0x57, 0xc9, 0xd8, 0x23, 0x93, 0x01, 0x57, 0xd0, 0xc2, 0x00, 0xd0, + 0x08, 0xc0, 0xb9, 0x83, 0x08, 0xc0, 0xb0, 0xc2, 0x00, 0xd0, 0x08, 0xc0, + 0xa9, 0x83, 0x08, 0xc0, 0xa0, 0xc4, 0x01, 0xa3, 0x0d, 0xe4, 0xc9, 0xc4, + 0x31, 0xef, 0x0d, 0xe4, 0x80, 0xc7, 0x27, 0x9b, 0x0d, 0xe3, 0x98, 0xc3, + 0x02, 0x6e, 0x0d, 0xe4, 0xb1, 0xc9, 0xac, 0xf0, 0x0d, 0xe4, 0x98, 0xc5, + 0x01, 0x22, 0x0d, 0xe3, 0xe0, 0xc2, 0x00, 0x2b, 0x0d, 0xe1, 0xa8, 0xc2, + 0x00, 0x2b, 0x0d, 0xe1, 0x98, 0xc2, 0x00, 0x3f, 0x0d, 0xe1, 0x70, 0xc6, + 0x05, 0x01, 0x0d, 0xe1, 0x30, 0xc2, 0x00, 0x2b, 0x0d, 0xe2, 0x00, 0x90, + 0x0d, 0xe3, 0x49, 0x99, 0x0d, 0xe2, 0x10, 0x90, 0x0d, 0xe3, 0x39, 0x87, + 0x0d, 0xe2, 0x71, 0x8a, 0x0d, 0xe2, 0x60, 0xc2, 0x00, 0x3f, 0x0d, 0xe1, + 0x88, 0xc9, 0x33, 0xad, 0x0d, 0xe1, 0x78, 0xc2, 0x00, 0x3f, 0x0d, 0xe1, + 0x68, 0xd2, 0x4e, 0x9b, 0x0d, 0xe1, 0x20, 0xc2, 0x00, 0x3f, 0x0d, 0xe1, + 0x60, 0xc2, 0x00, 0x3f, 0x0d, 0xe1, 0x58, 0xd0, 0x5e, 0xe2, 0x01, 0x3e, + 0x41, 0xd6, 0x30, 0x7a, 0x01, 0x4f, 0xb9, 0xc8, 0x18, 0x67, 0x01, 0x4f, + 0xa8, 0xc7, 0x0e, 0xbc, 0x01, 0x16, 0x68, 0xc9, 0xb2, 0xfc, 0x0f, 0xac, + 0x99, 0xc7, 0xc4, 0x6b, 0x0f, 0xac, 0x90, 0xcf, 0x01, 0xb8, 0x01, 0x80, + 0xe8, 0xcc, 0x84, 0x99, 0x01, 0x1d, 0x31, 0xc9, 0x57, 0x36, 0x01, 0x1d, + 0x29, 0xcc, 0x80, 0xcd, 0x01, 0x1d, 0x21, 0x45, 0x00, 0x8c, 0x42, 0xba, + 0xf0, 0x46, 0x00, 0x8b, 0x42, 0xbb, 0x0e, 0xd6, 0x06, 0xd1, 0x0f, 0xdb, + 0xf9, 0xd6, 0x2d, 0x36, 0x0f, 0xdb, 0xf0, 0xc2, 0x00, 0x49, 0x01, 0x10, + 0xfb, 0x02, 0xbb, 0x1a, 0xc9, 0xb2, 0x75, 0x0f, 0xaf, 0x78, 0xcc, 0x8a, + 0xed, 0x01, 0x3f, 0xa1, 0xcc, 0x12, 0x2d, 0x01, 0x0f, 0xa0, 0x44, 0x04, + 0x91, 0xc2, 0xbb, 0x1e, 0xc3, 0x04, 0x20, 0x01, 0x2c, 0x80, 0xca, 0xa2, + 0x74, 0x01, 0x1d, 0x69, 0xcc, 0x82, 0xe9, 0x01, 0x1d, 0x61, 0xca, 0xa3, + 0x5a, 0x01, 0x1d, 0x58, 0xc2, 0x00, 0x49, 0x01, 0x15, 0xfb, 0x02, 0xbb, + 0x2a, 0xd6, 0x14, 0xf9, 0x0f, 0xdb, 0x70, 0xcd, 0x3f, 0xe8, 0x0f, 0xdc, + 0x41, 0xce, 0x08, 0x79, 0x0f, 0xdc, 0x50, 0xd6, 0x30, 0xfe, 0x01, 0x4b, + 0x81, 0xcc, 0x0b, 0x92, 0x01, 0x80, 0x58, 0xcc, 0x00, 0x33, 0x01, 0x4c, + 0x21, 0xcd, 0x69, 0x65, 0x01, 0x80, 0x78, 0xd9, 0x1b, 0xd1, 0x0f, 0xc4, + 0xb1, 0xc9, 0xb0, 0x8f, 0x01, 0x0f, 0x80, 0xca, 0x03, 0xdd, 0x0f, 0xc4, + 0x91, 0x48, 0x01, 0x9a, 0x42, 0xbb, 0x30, 0xc5, 0x01, 0xa2, 0x01, 0x0e, + 0xd9, 0xca, 0x52, 0xc2, 0x01, 0x48, 0x78, 0x46, 0x02, 0x5c, 0xc2, 0xbb, + 0x45, 0xd1, 0x52, 0xbb, 0x01, 0x48, 0x80, 0xd6, 0x2b, 0x94, 0x01, 0x0e, + 0x61, 0x4a, 0x01, 0x58, 0x42, 0xbb, 0x51, 0xd5, 0x03, 0xd2, 0x0f, 0xc0, + 0xb1, 0x0e, 0xc2, 0xbb, 0x5d, 0x15, 0xc2, 0xbb, 0x69, 0x42, 0x00, 0x58, + 0xc2, 0xbb, 0x75, 0xcf, 0x2c, 0x35, 0x01, 0x0f, 0xc1, 0xd0, 0x58, 0x12, + 0x01, 0x0d, 0xa1, 0xc4, 0x01, 0x23, 0x01, 0x0d, 0x51, 0x16, 0xc2, 0xbb, + 0x81, 0xca, 0x9e, 0x28, 0x01, 0x4a, 0x29, 0xd9, 0x1f, 0xf9, 0x0f, 0xc0, + 0x31, 0xcc, 0x84, 0xb1, 0x0f, 0xc4, 0xd0, 0x43, 0x10, 0x9e, 0xc2, 0xbb, + 0x90, 0x47, 0x25, 0xf3, 0x42, 0xbb, 0x9c, 0xd1, 0x56, 0x73, 0x01, 0x49, + 0x00, 0x45, 0x00, 0xd5, 0xc2, 0xbb, 0xac, 0x43, 0x02, 0x9c, 0x42, 0xbb, + 0xc4, 0x00, 0xc2, 0xbb, 0xca, 0xc5, 0x14, 0xa5, 0x01, 0x48, 0xe0, 0xc9, + 0x57, 0x20, 0x01, 0x0c, 0x40, 0xc4, 0xe4, 0x87, 0x01, 0x0c, 0x00, 0x00, + 0x42, 0xbb, 0xd6, 0x00, 0x42, 0xbb, 0xe2, 0xe0, 0x0b, 0xc7, 0x0f, 0xac, + 0xb0, 0x03, 0xc2, 0xbb, 0xee, 0xc2, 0x16, 0x1c, 0x00, 0xb7, 0xb1, 0xc2, + 0x00, 0xfa, 0x00, 0xb7, 0xa9, 0xc2, 0x07, 0xa3, 0x00, 0xb7, 0xa0, 0x49, + 0xad, 0x1d, 0x42, 0xbb, 0xf8, 0xc2, 0x00, 0xe7, 0x00, 0xb5, 0xa1, 0x83, + 0x00, 0xb5, 0x90, 0xc3, 0x72, 0x57, 0x00, 0xb6, 0xe0, 0xc2, 0x1d, 0xc1, + 0x00, 0xb7, 0x31, 0xc6, 0xd2, 0x35, 0x00, 0xb6, 0xc1, 0xc5, 0xd6, 0x82, + 0x00, 0xb6, 0x29, 0xc8, 0xbf, 0x3a, 0x00, 0xb5, 0xe1, 0xc5, 0x71, 0x4d, + 0x00, 0xb5, 0x60, 0xc3, 0x67, 0x02, 0x00, 0xb7, 0x21, 0x90, 0x00, 0xb5, + 0x98, 0x8e, 0x00, 0xb6, 0xd9, 0x92, 0x00, 0xb6, 0xa1, 0x90, 0x00, 0xb6, + 0x00, 0x94, 0x00, 0xb6, 0x21, 0xc9, 0xb3, 0xe6, 0x00, 0xb5, 0xb8, 0x90, + 0x05, 0x28, 0x08, 0x87, 0x05, 0x28, 0x11, 0x90, 0x05, 0x2f, 0x28, 0x90, + 0x05, 0x29, 0x38, 0x90, 0x05, 0x2a, 0x68, 0x91, 0x05, 0x2b, 0x99, 0x90, + 0x05, 0x2d, 0xf0, 0x90, 0x05, 0x2c, 0xc0, 0x87, 0x05, 0x28, 0x1b, 0x02, + 0xbc, 0x2e, 0x90, 0x05, 0x2f, 0x38, 0x90, 0x05, 0x29, 0x48, 0x90, 0x05, + 0x2a, 0x78, 0x91, 0x05, 0x2b, 0xa3, 0x02, 0xbc, 0x32, 0x90, 0x05, 0x2e, + 0x00, 0x90, 0x05, 0x2c, 0xd0, 0x87, 0x05, 0x28, 0x28, 0x91, 0x05, 0x2b, + 0xb0, 0x87, 0x05, 0x2f, 0x4b, 0x02, 0xbc, 0x36, 0x8b, 0x05, 0x29, 0x59, + 0x83, 0x05, 0x2a, 0x89, 0x91, 0x05, 0x2e, 0x13, 0x02, 0xbc, 0x3a, 0x97, + 0x05, 0x2c, 0xe0, 0x87, 0x05, 0x28, 0x38, 0x91, 0x05, 0x2b, 0xc0, 0x87, + 0x05, 0x2f, 0x5b, 0x02, 0xbc, 0x3e, 0x8b, 0x05, 0x29, 0x69, 0x83, 0x05, + 0x2a, 0x99, 0x91, 0x05, 0x2e, 0x23, 0x02, 0xbc, 0x42, 0x97, 0x05, 0x2c, + 0xf0, 0x87, 0x05, 0x2f, 0x73, 0x02, 0xbc, 0x46, 0x8b, 0x05, 0x29, 0x79, + 0x83, 0x05, 0x2a, 0xb1, 0x91, 0x05, 0x2e, 0x33, 0x02, 0xbc, 0x4a, 0x97, + 0x05, 0x2d, 0x00, 0x87, 0x05, 0x29, 0x08, 0x91, 0x05, 0x2c, 0x90, 0x87, + 0x05, 0x2f, 0x63, 0x02, 0xbc, 0x4e, 0x8b, 0x05, 0x29, 0x71, 0x83, 0x05, + 0x2a, 0xa3, 0x02, 0xbc, 0x56, 0x91, 0x05, 0x2e, 0x2b, 0x02, 0xbc, 0x5a, + 0x97, 0x05, 0x2c, 0xf8, 0x87, 0x05, 0x28, 0xf0, 0x90, 0x05, 0x2b, 0x58, + 0x91, 0x05, 0x2c, 0x78, 0x87, 0x05, 0x2f, 0x7b, 0x02, 0xbc, 0x5e, 0x8b, + 0x05, 0x29, 0x81, 0x83, 0x05, 0x2a, 0xb9, 0x91, 0x05, 0x2e, 0x3b, 0x02, + 0xbc, 0x66, 0x97, 0x05, 0x2d, 0x08, 0x87, 0x05, 0x29, 0x01, 0x90, 0x05, + 0x30, 0x38, 0x91, 0x05, 0x2c, 0x88, 0x87, 0x05, 0x28, 0x60, 0x91, 0x05, + 0x2b, 0xe8, 0x87, 0x05, 0x28, 0x68, 0x91, 0x05, 0x2b, 0xf0, 0x87, 0x05, + 0x28, 0x70, 0x87, 0x05, 0x2f, 0xa3, 0x02, 0xbc, 0x6e, 0x8b, 0x05, 0x29, + 0xa1, 0x83, 0x05, 0x2a, 0xd9, 0x91, 0x05, 0x2e, 0x63, 0x02, 0xbc, 0x72, + 0x97, 0x05, 0x2d, 0x28, 0x91, 0x05, 0x2b, 0xf8, 0x87, 0x05, 0x2f, 0xab, + 0x02, 0xbc, 0x76, 0x0a, 0xc2, 0xbc, 0x7a, 0x8b, 0x05, 0x29, 0xa9, 0x83, + 0x05, 0x2a, 0xe1, 0x91, 0x05, 0x2e, 0x6b, 0x02, 0xbc, 0x94, 0x97, 0x05, + 0x2d, 0x30, 0x87, 0x05, 0x28, 0xa0, 0x91, 0x05, 0x2c, 0x28, 0x87, 0x05, + 0x28, 0x91, 0xc8, 0x4a, 0xd9, 0x05, 0x30, 0x60, 0x91, 0x05, 0x2c, 0x18, + 0x87, 0x05, 0x28, 0x98, 0x91, 0x05, 0x2c, 0x20, 0x87, 0x05, 0x2f, 0xd3, + 0x02, 0xbc, 0x98, 0x8b, 0x05, 0x29, 0xd1, 0x83, 0x05, 0x2b, 0x09, 0x91, + 0x05, 0x2e, 0x93, 0x02, 0xbc, 0x9c, 0x97, 0x05, 0x2d, 0x58, 0x87, 0x05, + 0x30, 0x0b, 0x02, 0xbc, 0xa6, 0x8b, 0x05, 0x2a, 0x09, 0x83, 0x05, 0x2b, + 0x41, 0x91, 0x05, 0x2e, 0xcb, 0x02, 0xbc, 0xaa, 0x97, 0x05, 0x2d, 0x90, + 0x09, 0xc2, 0xbc, 0xae, 0xc2, 0x00, 0xd1, 0x05, 0x2a, 0x59, 0xc2, 0x00, + 0x45, 0x05, 0x2d, 0xe1, 0xc2, 0x00, 0xc4, 0x05, 0x2f, 0x18, 0x87, 0x05, + 0x29, 0x10, 0x87, 0x05, 0x30, 0x53, 0x02, 0xbc, 0xc8, 0x8b, 0x05, 0x2a, + 0x41, 0x83, 0x05, 0x2b, 0x81, 0x91, 0x05, 0x2f, 0x03, 0x02, 0xbc, 0xcc, + 0x97, 0x05, 0x2d, 0xc8, 0x91, 0x05, 0x2c, 0x98, 0x87, 0x05, 0x28, 0xb0, + 0x87, 0x05, 0x2f, 0xe3, 0x02, 0xbc, 0xd0, 0x8b, 0x05, 0x29, 0xe1, 0x83, + 0x05, 0x2b, 0x19, 0x91, 0x05, 0x2e, 0xa3, 0x02, 0xbc, 0xd4, 0x97, 0x05, + 0x2d, 0x68, 0x91, 0x05, 0x2c, 0x38, 0x87, 0x05, 0x28, 0xc0, 0x87, 0x05, + 0x2f, 0xf3, 0x02, 0xbc, 0xd8, 0x8b, 0x05, 0x29, 0xf1, 0x83, 0x05, 0x2b, + 0x29, 0x91, 0x05, 0x2e, 0xb3, 0x02, 0xbc, 0xdc, 0x97, 0x05, 0x2d, 0x78, + 0x91, 0x05, 0x2c, 0x48, 0x87, 0x05, 0x28, 0xd0, 0x91, 0x05, 0x2c, 0x58, + 0x87, 0x05, 0x28, 0xd8, 0x91, 0x05, 0x2c, 0x60, 0x87, 0x05, 0x28, 0xe8, + 0x91, 0x05, 0x2c, 0x70, 0x90, 0x05, 0x2b, 0x90, 0xc3, 0x08, 0x48, 0x05, + 0x30, 0xd9, 0xc2, 0x37, 0xea, 0x05, 0x30, 0xf0, 0xca, 0x3b, 0x06, 0x01, + 0x1b, 0xf9, 0x47, 0x02, 0xd1, 0x42, 0xbc, 0xe0, 0xc4, 0xb2, 0xf8, 0x00, + 0x04, 0x50, 0xca, 0x99, 0x1f, 0x01, 0x81, 0x99, 0xca, 0x01, 0xc8, 0x01, + 0x81, 0xa8, 0xca, 0xa5, 0x12, 0x00, 0xe7, 0x60, 0xce, 0x25, 0xad, 0x70, + 0x02, 0xd9, 0xcb, 0x1a, 0x50, 0x70, 0x01, 0x41, 0xcd, 0x00, 0x32, 0x70, + 0x03, 0xd8, 0x9c, 0x70, 0x02, 0xd1, 0x9b, 0x70, 0x02, 0xc9, 0x9a, 0x70, + 0x02, 0xc1, 0x99, 0x70, 0x02, 0xb9, 0x98, 0x70, 0x02, 0xb1, 0x97, 0x70, + 0x02, 0xa9, 0x96, 0x70, 0x02, 0xa1, 0x95, 0x70, 0x02, 0x99, 0x94, 0x70, + 0x02, 0x91, 0x93, 0x70, 0x02, 0x89, 0x92, 0x70, 0x02, 0x81, 0x91, 0x70, + 0x02, 0x79, 0x90, 0x70, 0x02, 0x71, 0x8f, 0x70, 0x02, 0x69, 0x8e, 0x70, + 0x02, 0x61, 0x8d, 0x70, 0x02, 0x59, 0x8c, 0x70, 0x02, 0x51, 0x8b, 0x70, + 0x02, 0x49, 0x8a, 0x70, 0x02, 0x41, 0x89, 0x70, 0x02, 0x39, 0x88, 0x70, + 0x02, 0x31, 0x87, 0x70, 0x02, 0x29, 0x86, 0x70, 0x02, 0x21, 0x85, 0x70, + 0x02, 0x19, 0x84, 0x70, 0x02, 0x11, 0x83, 0x70, 0x02, 0x08, 0x9c, 0x70, + 0x03, 0xd1, 0x9b, 0x70, 0x03, 0xc9, 0x9a, 0x70, 0x03, 0xc1, 0x99, 0x70, + 0x03, 0xb9, 0x98, 0x70, 0x03, 0xb1, 0x97, 0x70, 0x03, 0xa9, 0x96, 0x70, + 0x03, 0xa1, 0x95, 0x70, 0x03, 0x99, 0x94, 0x70, 0x03, 0x91, 0x93, 0x70, + 0x03, 0x89, 0x92, 0x70, 0x03, 0x81, 0x91, 0x70, 0x03, 0x79, 0x90, 0x70, + 0x03, 0x71, 0x8f, 0x70, 0x03, 0x69, 0x8e, 0x70, 0x03, 0x61, 0x8d, 0x70, + 0x03, 0x59, 0x8c, 0x70, 0x03, 0x51, 0x8b, 0x70, 0x03, 0x49, 0x8a, 0x70, + 0x03, 0x41, 0x89, 0x70, 0x03, 0x39, 0x88, 0x70, 0x03, 0x31, 0x87, 0x70, + 0x03, 0x29, 0x86, 0x70, 0x03, 0x21, 0x85, 0x70, 0x03, 0x19, 0x84, 0x70, + 0x03, 0x11, 0x83, 0x70, 0x03, 0x08, 0xc9, 0xb4, 0x64, 0x70, 0x02, 0x01, + 0x83, 0x70, 0x01, 0x60, 0xc4, 0x18, 0x10, 0x70, 0x01, 0xb9, 0xc2, 0x22, + 0xcc, 0x70, 0x01, 0xb0, 0xc3, 0x0d, 0x14, 0x70, 0x01, 0xa9, 0xc3, 0x09, + 0x9e, 0x70, 0x01, 0xa0, 0xc4, 0x02, 0xde, 0x70, 0x01, 0x99, 0xc2, 0x02, + 0xa0, 0x70, 0x01, 0x90, 0x23, 0xc2, 0xbc, 0xec, 0x22, 0xc2, 0xbd, 0x10, + 0x21, 0xc2, 0xbd, 0x38, 0x20, 0xc2, 0xbd, 0x60, 0x1f, 0xc2, 0xbd, 0x88, + 0x1e, 0xc2, 0xbd, 0xb0, 0x1d, 0x42, 0xbd, 0xd8, 0x26, 0xc2, 0xbe, 0x00, + 0x25, 0xc2, 0xbe, 0x28, 0x24, 0xc2, 0xbe, 0x50, 0x23, 0xc2, 0xbe, 0x78, + 0x22, 0xc2, 0xbe, 0xa0, 0x21, 0xc2, 0xbe, 0xc8, 0x20, 0xc2, 0xbe, 0xf0, + 0x1f, 0xc2, 0xbf, 0x18, 0x1e, 0xc2, 0xbf, 0x40, 0x1d, 0x42, 0xbf, 0x68, + 0x26, 0xc2, 0xbf, 0x90, 0x25, 0xc2, 0xbf, 0xb8, 0x24, 0xc2, 0xbf, 0xe0, + 0x23, 0xc2, 0xc0, 0x08, 0x22, 0xc2, 0xc0, 0x30, 0x21, 0xc2, 0xc0, 0x58, + 0x20, 0xc2, 0xc0, 0x80, 0x1f, 0xc2, 0xc0, 0xa8, 0x1e, 0xc2, 0xc0, 0xd0, + 0x1d, 0x42, 0xc0, 0xf8, 0x26, 0xc2, 0xc1, 0x20, 0x25, 0xc2, 0xc1, 0x48, + 0x24, 0xc2, 0xc1, 0x70, 0x23, 0xc2, 0xc1, 0x98, 0x22, 0xc2, 0xc1, 0xc0, + 0x21, 0xc2, 0xc1, 0xe8, 0x20, 0xc2, 0xc2, 0x10, 0x1f, 0xc2, 0xc2, 0x38, + 0x1e, 0xc2, 0xc2, 0x60, 0x1d, 0x42, 0xc2, 0x88, 0x26, 0xc2, 0xc2, 0xb0, + 0x25, 0xc2, 0xc2, 0xd8, 0x24, 0xc2, 0xc3, 0x00, 0x23, 0xc2, 0xc3, 0x28, + 0x22, 0xc2, 0xc3, 0x50, 0x21, 0xc2, 0xc3, 0x78, 0x20, 0xc2, 0xc3, 0xa0, + 0x1f, 0xc2, 0xc3, 0xc8, 0x1e, 0xc2, 0xc3, 0xf0, 0x1d, 0x42, 0xc4, 0x18, + 0x26, 0xc2, 0xc4, 0x40, 0x25, 0xc2, 0xc4, 0x68, 0x24, 0xc2, 0xc4, 0x90, + 0x23, 0xc2, 0xc4, 0xb8, 0x22, 0xc2, 0xc4, 0xe0, 0x21, 0xc2, 0xc5, 0x08, + 0x20, 0xc2, 0xc5, 0x30, 0x1f, 0xc2, 0xc5, 0x58, 0x1e, 0xc2, 0xc5, 0x80, + 0x1d, 0x42, 0xc5, 0xa8, 0x26, 0xc2, 0xc5, 0xd0, 0x25, 0xc2, 0xc5, 0xf8, + 0x24, 0xc2, 0xc6, 0x20, 0x23, 0xc2, 0xc6, 0x48, 0x22, 0xc2, 0xc6, 0x70, + 0x21, 0xc2, 0xc6, 0x98, 0x20, 0xc2, 0xc6, 0xc0, 0x1f, 0xc2, 0xc6, 0xe8, + 0x1e, 0xc2, 0xc7, 0x10, 0x1d, 0x42, 0xc7, 0x38, 0x26, 0xc2, 0xc7, 0x60, + 0x25, 0xc2, 0xc7, 0x88, 0x24, 0xc2, 0xc7, 0xb0, 0x23, 0xc2, 0xc7, 0xd8, + 0x22, 0xc2, 0xc8, 0x00, 0x21, 0xc2, 0xc8, 0x28, 0x20, 0xc2, 0xc8, 0x50, + 0x1f, 0xc2, 0xc8, 0x78, 0x1e, 0xc2, 0xc8, 0xa0, 0x1d, 0x42, 0xc8, 0xc8, + 0xc4, 0x18, 0x10, 0x0b, 0x56, 0x39, 0xc2, 0x22, 0xcc, 0x0b, 0x56, 0x30, + 0xc3, 0x0d, 0x14, 0x0b, 0x56, 0x29, 0xc3, 0x09, 0x9e, 0x0b, 0x56, 0x20, + 0xc4, 0x02, 0xde, 0x0b, 0x56, 0x19, 0xc2, 0x02, 0xa0, 0x0b, 0x56, 0x10, + 0xc2, 0x00, 0xd0, 0x0b, 0x55, 0xe9, 0x83, 0x0b, 0x55, 0xa8, 0xc2, 0x00, + 0xd0, 0x0b, 0x55, 0xe1, 0x83, 0x0b, 0x55, 0x88, 0x83, 0x0b, 0x55, 0xd9, + 0xc7, 0xb4, 0x2f, 0x0b, 0x54, 0x80, 0xc2, 0x00, 0xd0, 0x0b, 0x55, 0xc9, + 0xc2, 0x0d, 0xf6, 0x0b, 0x55, 0xb1, 0x83, 0x0b, 0x55, 0x80, 0x16, 0xc2, + 0xc8, 0xec, 0x83, 0x0b, 0x55, 0x68, 0xc2, 0x00, 0xd0, 0x0b, 0x55, 0xb9, + 0x83, 0x0b, 0x55, 0x10, 0x0a, 0xc2, 0xc8, 0xf6, 0x83, 0x0b, 0x55, 0x20, + 0xc2, 0x00, 0xd0, 0x0b, 0x55, 0x99, 0x83, 0x0b, 0x55, 0x61, 0xc2, 0x19, + 0x2c, 0x0b, 0x55, 0x41, 0xc2, 0x01, 0x30, 0x0b, 0x55, 0x18, 0x83, 0x0b, + 0x55, 0x71, 0xc7, 0xc6, 0xda, 0x0b, 0x54, 0x88, 0x83, 0x0b, 0x55, 0x59, + 0x9a, 0x0b, 0x54, 0xf9, 0x93, 0x0b, 0x54, 0xf1, 0x85, 0x0b, 0x54, 0xe9, + 0x9c, 0x0b, 0x54, 0xe0, 0xc2, 0x00, 0xd0, 0x0b, 0x55, 0x49, 0x83, 0x0b, + 0x55, 0x38, 0xc2, 0x00, 0xd0, 0x0b, 0x55, 0x09, 0x83, 0x0b, 0x55, 0x00, + 0x0b, 0xc2, 0xc9, 0x00, 0x07, 0xc2, 0xc9, 0x14, 0x9a, 0x0b, 0x54, 0x39, + 0x93, 0x0b, 0x54, 0x31, 0x85, 0x0b, 0x54, 0x29, 0x9c, 0x0b, 0x54, 0x20, + 0x19, 0xc2, 0xc9, 0x24, 0x9a, 0x0b, 0x53, 0xb9, 0x93, 0x0b, 0x53, 0xb1, + 0x85, 0x0b, 0x53, 0xa9, 0x9c, 0x0b, 0x53, 0xa0, 0x9a, 0x0b, 0x54, 0x19, + 0x93, 0x0b, 0x54, 0x11, 0x85, 0x0b, 0x54, 0x09, 0x9c, 0x0b, 0x54, 0x00, + 0x9a, 0x0b, 0x53, 0xf9, 0x93, 0x0b, 0x53, 0xf1, 0x85, 0x0b, 0x53, 0xe9, + 0x9c, 0x0b, 0x53, 0xe0, 0x9a, 0x0b, 0x53, 0xd9, 0x93, 0x0b, 0x53, 0xd1, + 0x85, 0x0b, 0x53, 0xc9, 0x9c, 0x0b, 0x53, 0xc0, 0x9a, 0x0b, 0x53, 0x99, + 0x93, 0x0b, 0x53, 0x91, 0x85, 0x0b, 0x53, 0x89, 0x9c, 0x0b, 0x53, 0x80, + 0x03, 0xc2, 0xc9, 0x34, 0xc3, 0x29, 0x78, 0x08, 0xff, 0x19, 0x0b, 0x42, + 0xc9, 0x40, 0xc7, 0xc9, 0x1f, 0x08, 0xff, 0x81, 0xc7, 0xc9, 0xea, 0x08, + 0xfe, 0xe1, 0xc9, 0xb4, 0xa3, 0x08, 0xfe, 0xc8, 0x17, 0xc2, 0xc9, 0x4c, + 0xc4, 0xe2, 0x47, 0x08, 0xfe, 0xe8, 0x03, 0xc2, 0xc9, 0x58, 0xc2, 0x00, + 0x45, 0x08, 0xfe, 0xf8, 0xc8, 0xbc, 0xaa, 0x08, 0xfe, 0xb9, 0xc7, 0x14, + 0x39, 0x00, 0x5c, 0x10, 0x83, 0x00, 0x5c, 0x31, 0x8b, 0x00, 0x5c, 0x81, + 0x97, 0x00, 0x5c, 0xa0, 0x8b, 0x00, 0x5c, 0x40, 0x97, 0x00, 0x5c, 0x50, + 0x87, 0x00, 0x5c, 0x78, 0x91, 0x00, 0x5c, 0x98, 0xc2, 0x01, 0x30, 0x00, + 0x5c, 0xc9, 0xc2, 0x19, 0x2c, 0x00, 0x5c, 0xf1, 0x10, 0xc2, 0xc9, 0x6a, + 0x83, 0x00, 0x5d, 0x40, 0xc2, 0x01, 0x6f, 0x00, 0x5c, 0xf9, 0x83, 0x00, + 0x5d, 0x20, 0x83, 0x00, 0x5d, 0x81, 0xc2, 0x00, 0x39, 0x00, 0x5d, 0x88, + 0x83, 0x00, 0x5d, 0x91, 0x0e, 0x42, 0xc9, 0x74, 0xc2, 0x00, 0xd0, 0x00, + 0x5d, 0xb1, 0xc2, 0x0d, 0xf6, 0x00, 0x5d, 0xb9, 0x83, 0x00, 0x5d, 0xc0, + 0xc2, 0x02, 0xa0, 0x00, 0x5f, 0x41, 0xc4, 0x02, 0xde, 0x00, 0x5f, 0x48, + 0xc3, 0x09, 0x9e, 0x00, 0x5f, 0x51, 0xc3, 0x0d, 0x14, 0x00, 0x5f, 0x58, + 0xc2, 0x22, 0xcc, 0x00, 0x5f, 0x61, 0xc4, 0x18, 0x10, 0x00, 0x5f, 0x68, + 0xc6, 0xa7, 0x8c, 0x08, 0xfe, 0x71, 0xc9, 0xaf, 0xdb, 0x08, 0xfe, 0x38, + 0x9f, 0x08, 0xfe, 0x91, 0x9e, 0x08, 0xfe, 0x88, 0xc4, 0x9c, 0x07, 0x08, + 0xfe, 0x79, 0xc7, 0xc7, 0x74, 0x08, 0xfe, 0x20, 0x8a, 0x08, 0xfe, 0x61, + 0xc4, 0x1e, 0x1a, 0x08, 0xfe, 0x10, 0xc4, 0x0f, 0x1f, 0x08, 0xfe, 0x59, + 0xc8, 0x1e, 0x16, 0x08, 0xfe, 0x41, 0x0a, 0x42, 0xc9, 0x7e, 0x46, 0xcf, + 0x4d, 0xc2, 0xc9, 0x8a, 0xc8, 0xaf, 0xd2, 0x08, 0xfe, 0x18, 0xc2, 0x00, + 0xd0, 0x08, 0xb4, 0xb9, 0x83, 0x08, 0xb4, 0xb0, 0xc2, 0x00, 0xd0, 0x08, + 0xb4, 0xa9, 0x83, 0x08, 0xb4, 0xa0, 0xc3, 0x71, 0xf0, 0x00, 0xd5, 0x58, + 0xc3, 0x71, 0xf0, 0x00, 0xd5, 0x48, 0xca, 0xa2, 0x2e, 0x00, 0xd3, 0xe1, + 0x46, 0x28, 0xb0, 0x42, 0xc9, 0x92, 0xc4, 0x68, 0x94, 0x00, 0xd2, 0xc0, + 0x83, 0x00, 0xd2, 0xe1, 0x46, 0x30, 0xa0, 0x42, 0xc9, 0x9e, 0xc5, 0x2c, + 0xf5, 0x00, 0xd2, 0xd1, 0xca, 0xa1, 0xc0, 0x00, 0xd2, 0xb8, 0xc5, 0x00, + 0xd4, 0x00, 0xd3, 0x99, 0xc5, 0x05, 0x02, 0x00, 0xd3, 0x60, 0x87, 0x00, + 0xd3, 0x40, 0x87, 0x00, 0xd2, 0x98, 0xc2, 0x00, 0xd0, 0x00, 0xd2, 0x61, + 0xc2, 0x19, 0x2c, 0x00, 0xd1, 0xf9, 0x12, 0xc2, 0xc9, 0xaa, 0xc2, 0x00, + 0x87, 0x00, 0xd1, 0xe1, 0x16, 0xc2, 0xc9, 0xb4, 0xc5, 0x3c, 0xf5, 0x00, + 0xd1, 0x81, 0x05, 0xc2, 0xc9, 0xbe, 0xc2, 0x0d, 0xf6, 0x00, 0xd1, 0x51, + 0x0d, 0x42, 0xc9, 0xc8, 0x83, 0x00, 0xd2, 0x41, 0xc2, 0x0d, 0xf6, 0x00, + 0xd2, 0x39, 0xc2, 0x00, 0xd0, 0x00, 0xd2, 0x30, 0xc2, 0x00, 0xd0, 0x00, + 0xd1, 0xc9, 0x83, 0x00, 0xd1, 0xc0, 0xc2, 0x00, 0xd0, 0x00, 0xd1, 0x99, + 0x83, 0x00, 0xd1, 0x90, 0xc2, 0x00, 0xd0, 0x00, 0xd1, 0x41, 0x83, 0x00, + 0xd1, 0x38, 0xc2, 0x8d, 0x8f, 0x00, 0xd1, 0x11, 0xc2, 0x00, 0xd0, 0x00, + 0xd1, 0x09, 0x83, 0x00, 0xd1, 0x00, 0xc2, 0x00, 0xc1, 0x00, 0xd1, 0x89, + 0xc2, 0x01, 0x6f, 0x00, 0xd1, 0x68, 0x83, 0x05, 0x55, 0xc8, 0xc2, 0x01, + 0x23, 0x05, 0x54, 0xf9, 0x91, 0x05, 0x54, 0xe8, 0x91, 0x05, 0x54, 0xc9, + 0xc2, 0x0f, 0x7b, 0x05, 0x54, 0x49, 0xc2, 0x42, 0xcd, 0x05, 0x54, 0x88, + 0xc2, 0x01, 0x23, 0x05, 0x54, 0xb9, 0x91, 0x05, 0x54, 0xa8, 0x91, 0x05, + 0x54, 0x59, 0xc2, 0x01, 0x23, 0x05, 0x54, 0x68, 0x0a, 0xc2, 0xc9, 0xd8, + 0x91, 0x05, 0x54, 0x08, 0xc2, 0x01, 0x23, 0x05, 0x54, 0xf1, 0x91, 0x05, + 0x54, 0xe0, 0x91, 0x05, 0x54, 0xc1, 0xc2, 0x0f, 0x7b, 0x05, 0x54, 0x41, + 0xc2, 0x42, 0xcd, 0x05, 0x54, 0x80, 0xc2, 0x01, 0x23, 0x05, 0x54, 0xb1, + 0x91, 0x05, 0x54, 0xa0, 0xc2, 0x01, 0x23, 0x05, 0x54, 0x61, 0x91, 0x05, + 0x54, 0x50, 0x0a, 0xc2, 0xc9, 0xe2, 0x91, 0x05, 0x54, 0x00, 0xd5, 0x03, + 0xd2, 0x01, 0x5c, 0xd1, 0xc9, 0x03, 0xde, 0x01, 0x3d, 0x10, 0xc2, 0x10, + 0x37, 0x00, 0x3c, 0xd8, 0xc4, 0xd9, 0x21, 0x00, 0x3c, 0xf9, 0xc6, 0xb4, + 0xdc, 0x00, 0x3c, 0x88, 0xc4, 0xe2, 0xd7, 0x00, 0x3c, 0xe9, 0xc7, 0xb4, + 0xdb, 0x00, 0x3c, 0x08, 0xc6, 0xb4, 0xdc, 0x00, 0x3c, 0x91, 0x83, 0x00, + 0x3c, 0xe0, 0xc5, 0xd9, 0xd4, 0x00, 0x70, 0x09, 0x42, 0x01, 0x23, 0x42, + 0xc9, 0xec, 0xc6, 0xcf, 0x47, 0x00, 0x70, 0x39, 0x43, 0xcf, 0x48, 0xc2, + 0xc9, 0xf6, 0xc7, 0xc8, 0x38, 0x00, 0x72, 0x68, 0xc2, 0x00, 0xd1, 0x00, + 0x70, 0x43, 0x02, 0xca, 0x00, 0xc3, 0x00, 0x74, 0x00, 0x70, 0x49, 0xc2, + 0x49, 0x0c, 0x00, 0x70, 0x60, 0x42, 0x01, 0x7c, 0xc2, 0xca, 0x04, 0x44, + 0x14, 0x3d, 0x42, 0xca, 0x0e, 0x43, 0xe6, 0x14, 0xc2, 0xca, 0x2b, 0xc7, + 0xca, 0x68, 0x00, 0x72, 0x70, 0xc5, 0xdc, 0x90, 0x00, 0x70, 0x71, 0xc3, + 0x13, 0x4b, 0x00, 0x70, 0xa0, 0x42, 0x01, 0x7c, 0xc2, 0xca, 0x37, 0x0a, + 0x42, 0xca, 0x43, 0xc5, 0xd9, 0xc5, 0x00, 0x70, 0xd9, 0x0a, 0xc2, 0xca, + 0x4f, 0xc8, 0xb8, 0x7a, 0x00, 0x71, 0x78, 0xc3, 0x05, 0xad, 0x00, 0x70, + 0xeb, 0x02, 0xca, 0x5b, 0xc5, 0xd9, 0x7f, 0x00, 0x72, 0x78, 0xc4, 0x42, + 0x6d, 0x00, 0x71, 0x09, 0x42, 0x02, 0xfa, 0x42, 0xca, 0x5f, 0xc5, 0xd9, + 0xc0, 0x00, 0x71, 0x19, 0x97, 0x00, 0x71, 0x20, 0x42, 0x01, 0x7c, 0xc2, + 0xca, 0x6f, 0x97, 0x00, 0x71, 0x31, 0xca, 0xa4, 0xd6, 0x00, 0x72, 0x28, + 0xc3, 0x00, 0x7d, 0x00, 0x71, 0x59, 0xc6, 0xcc, 0x35, 0x00, 0x71, 0x70, + 0xc2, 0x10, 0x11, 0x0f, 0x15, 0x61, 0x87, 0x0f, 0x15, 0x3b, 0x02, 0xca, + 0x7b, 0x8b, 0x0f, 0x15, 0x12, 0x02, 0xca, 0x7f, 0xc6, 0x7b, 0xb6, 0x0e, + 0x98, 0xf1, 0xc3, 0x05, 0xaf, 0x0e, 0x98, 0xa9, 0xc7, 0xc5, 0x1a, 0x0e, + 0x98, 0x58, 0xc5, 0xdb, 0x55, 0x0e, 0x99, 0x61, 0xc6, 0xd0, 0x5b, 0x0e, + 0x98, 0xd8, 0xca, 0xa1, 0xfc, 0x0f, 0xab, 0xe0, 0xd1, 0x50, 0x9b, 0x00, + 0x60, 0x01, 0xce, 0x29, 0x32, 0x00, 0x60, 0x20, 0x83, 0x00, 0x60, 0x31, + 0x8b, 0x00, 0x60, 0x81, 0x97, 0x00, 0x60, 0xa0, 0x8b, 0x00, 0x60, 0x40, + 0x97, 0x00, 0x60, 0x50, 0x47, 0xb2, 0x2e, 0xc2, 0xca, 0x83, 0x83, 0x00, + 0x61, 0xa8, 0x87, 0x00, 0x60, 0x78, 0x91, 0x00, 0x60, 0x98, 0x83, 0x00, + 0x60, 0xa9, 0xc2, 0x00, 0xd0, 0x00, 0x60, 0xb0, 0x83, 0x00, 0x60, 0xb9, + 0xc2, 0x00, 0xd0, 0x00, 0x60, 0xc0, 0xc2, 0x01, 0x30, 0x00, 0x60, 0xc9, + 0xc2, 0x19, 0x2c, 0x00, 0x60, 0xf1, 0xc2, 0x00, 0xc1, 0x00, 0x61, 0x19, + 0x83, 0x00, 0x61, 0x42, 0x02, 0xca, 0x91, 0x83, 0x00, 0x60, 0xd1, 0xc2, + 0x00, 0xd0, 0x00, 0x60, 0xd8, 0x83, 0x00, 0x60, 0xe1, 0xc2, 0x00, 0xd0, + 0x00, 0x60, 0xe8, 0x16, 0xc2, 0xca, 0x97, 0x83, 0x00, 0x61, 0x21, 0xc2, + 0x00, 0xd0, 0x00, 0x61, 0x29, 0xc2, 0x0d, 0xf6, 0x00, 0x62, 0xc0, 0x06, + 0xc2, 0xca, 0xa1, 0x83, 0x00, 0x61, 0x31, 0xc2, 0x00, 0xd0, 0x00, 0x61, + 0x39, 0xc2, 0x02, 0x1c, 0x00, 0x62, 0xc8, 0x83, 0x00, 0x61, 0x51, 0xc2, + 0x00, 0xd0, 0x00, 0x61, 0x58, 0x83, 0x00, 0x61, 0x61, 0xc2, 0x00, 0xd0, + 0x00, 0x61, 0x68, 0x83, 0x00, 0x61, 0x81, 0x14, 0x42, 0xca, 0xab, 0x83, + 0x00, 0x61, 0x91, 0x0e, 0x42, 0xca, 0xb5, 0xc2, 0x00, 0xd0, 0x00, 0x61, + 0xb1, 0xc2, 0x0d, 0xf6, 0x00, 0x61, 0xb9, 0x83, 0x00, 0x61, 0xc0, 0x94, + 0x00, 0x62, 0x20, 0x8e, 0x00, 0x63, 0x18, 0xd2, 0x15, 0xf0, 0x00, 0x63, + 0xd1, 0xd3, 0x45, 0xbf, 0x00, 0x63, 0xe8, 0xd2, 0x15, 0xf0, 0x00, 0x63, + 0xd9, 0xd3, 0x45, 0xbf, 0x00, 0x63, 0xf0, 0xd0, 0x03, 0xb7, 0x01, 0x4b, + 0x91, 0xcf, 0x09, 0xf8, 0x01, 0x5a, 0x48, 0xcb, 0x93, 0x9e, 0x01, 0x53, + 0x59, 0xc9, 0x16, 0x14, 0x01, 0x53, 0x50, 0x8e, 0x08, 0xa5, 0xc0, 0x94, + 0x08, 0xa5, 0xb0, 0x8e, 0x08, 0xa4, 0x4b, 0x02, 0xca, 0xbf, 0x94, 0x08, + 0xa4, 0x3a, 0x02, 0xca, 0xc3, 0xc2, 0x00, 0xd0, 0x08, 0xa4, 0xe1, 0x83, + 0x08, 0xa4, 0xd8, 0xc2, 0x00, 0xd0, 0x08, 0xa4, 0xd1, 0x83, 0x08, 0xa4, + 0xc8, 0xca, 0xa5, 0xc6, 0x00, 0x7e, 0x38, 0xc9, 0xb3, 0x17, 0x00, 0x7e, + 0x31, 0xc6, 0xcf, 0x83, 0x00, 0x7e, 0x40, 0x00, 0x42, 0xca, 0xc7, 0x45, + 0xda, 0xbf, 0xc2, 0xca, 0xd9, 0x44, 0xe3, 0xef, 0x42, 0xca, 0xe3, 0x83, + 0x00, 0x7c, 0x81, 0xc2, 0x00, 0xd0, 0x00, 0x7c, 0x89, 0xc3, 0x1d, 0x35, + 0x00, 0x7d, 0xc8, 0x83, 0x00, 0x7c, 0x91, 0xc2, 0x00, 0xd0, 0x00, 0x7c, + 0x98, 0xc2, 0x01, 0x30, 0x00, 0x7c, 0xa1, 0xc2, 0x19, 0x2c, 0x00, 0x7c, + 0xc9, 0xc2, 0x00, 0xc1, 0x00, 0x7c, 0xf1, 0x83, 0x00, 0x7d, 0x18, 0x83, + 0x00, 0x7c, 0xa9, 0xc2, 0x00, 0xd0, 0x00, 0x7c, 0xb0, 0x16, 0xc2, 0xca, + 0xed, 0x83, 0x00, 0x7c, 0xf9, 0xc2, 0x00, 0xd0, 0x00, 0x7d, 0x01, 0x15, + 0x42, 0xca, 0xf7, 0x06, 0xc2, 0xcb, 0x01, 0x83, 0x00, 0x7d, 0x09, 0xc2, + 0x00, 0xd0, 0x00, 0x7d, 0x11, 0x1c, 0x42, 0xcb, 0x0b, 0x83, 0x00, 0x7d, + 0x21, 0xc2, 0x00, 0xd0, 0x00, 0x7d, 0x28, 0x83, 0x00, 0x7d, 0x31, 0xc2, + 0x00, 0xd0, 0x00, 0x7d, 0x38, 0xc2, 0x00, 0xd0, 0x00, 0x7d, 0x71, 0x83, + 0x00, 0x7d, 0x78, 0xc2, 0x00, 0xd0, 0x00, 0x7d, 0xa1, 0xc2, 0x0d, 0xf6, + 0x00, 0x7d, 0xa9, 0x83, 0x00, 0x7d, 0xb0, 0xc2, 0x01, 0x4a, 0x00, 0x7d, + 0xd1, 0xc2, 0x19, 0x2c, 0x00, 0x7d, 0xd9, 0xc2, 0x00, 0x39, 0x00, 0x7d, + 0xe0, 0xcb, 0x90, 0x0d, 0x00, 0x78, 0x09, 0x44, 0xe3, 0xbf, 0x42, 0xcb, + 0x15, 0xcb, 0x98, 0xfd, 0x00, 0x78, 0x99, 0xcc, 0x7c, 0xc3, 0x00, 0x79, + 0xb0, 0xca, 0x9c, 0x52, 0x00, 0x78, 0x49, 0xd4, 0x39, 0x44, 0x00, 0x7e, + 0x80, 0xc5, 0x01, 0xe1, 0x00, 0x78, 0x80, 0x83, 0x00, 0x7a, 0x51, 0xc2, + 0x00, 0xd0, 0x00, 0x7a, 0x58, 0x83, 0x00, 0x7a, 0xc9, 0xc2, 0x00, 0xd0, + 0x00, 0x7a, 0xd0, 0x83, 0x00, 0x7a, 0x61, 0xc2, 0x00, 0xd0, 0x00, 0x7a, + 0x68, 0x83, 0x00, 0x7a, 0xd9, 0xc2, 0x00, 0xd0, 0x00, 0x7a, 0xe0, 0x8a, + 0x01, 0x69, 0x90, 0x8a, 0x01, 0x6a, 0xb2, 0x02, 0xcb, 0x21, 0x8a, 0x01, + 0x69, 0xc1, 0x86, 0x01, 0x69, 0xca, 0x02, 0xcb, 0x25, 0x8a, 0x01, 0x6a, + 0x2a, 0x02, 0xcb, 0x29, 0x8a, 0x01, 0x6a, 0x18, 0x8a, 0x01, 0x6a, 0x51, + 0x9c, 0x01, 0x6b, 0x28, 0x94, 0x01, 0x6a, 0xa8, 0x95, 0x01, 0x6a, 0xd1, + 0x8a, 0x01, 0x6a, 0xd8, 0x8a, 0x01, 0x6a, 0xe9, 0x96, 0x01, 0x6a, 0xf8, + 0x8a, 0x01, 0x6a, 0x30, 0x90, 0x01, 0x6a, 0x81, 0x8a, 0x01, 0x6a, 0xb8, + 0x49, 0x19, 0x61, 0xc2, 0xcb, 0x2d, 0xce, 0x6f, 0xd2, 0x07, 0xef, 0xd8, + 0x48, 0x19, 0x6b, 0xc2, 0xcb, 0x45, 0x48, 0xab, 0xf5, 0x42, 0xcb, 0x5d, + 0x0a, 0xc2, 0xcb, 0x7b, 0x49, 0xb2, 0x6c, 0xc2, 0xcb, 0x87, 0x03, 0xc2, + 0xcb, 0xaf, 0xd4, 0x39, 0x6c, 0x07, 0xef, 0xf0, 0x44, 0x2b, 0xb9, 0xc2, + 0xcb, 0xb9, 0x45, 0x19, 0x60, 0xc2, 0xcb, 0xc5, 0x46, 0x30, 0xc1, 0xc2, + 0xcb, 0xcf, 0x4d, 0x06, 0x5a, 0x42, 0xcb, 0xdb, 0x48, 0x92, 0x78, 0xc2, + 0xcb, 0xe7, 0x0e, 0xc2, 0xcb, 0xff, 0xd2, 0x4b, 0x29, 0x07, 0xef, 0x99, + 0xcb, 0x90, 0x65, 0x07, 0xef, 0xf8, 0x03, 0xc2, 0xcc, 0x11, 0x0a, 0xc2, + 0xcc, 0x1d, 0x48, 0xab, 0xf5, 0x42, 0xcc, 0x29, 0x0a, 0xc2, 0xcc, 0x5d, + 0x45, 0x19, 0x60, 0xc2, 0xcc, 0x67, 0x44, 0x2b, 0xb9, 0xc2, 0xcc, 0x7d, + 0x4d, 0x06, 0x5a, 0xc2, 0xcc, 0x89, 0x46, 0x50, 0xf0, 0xc2, 0xcc, 0x95, + 0x45, 0x30, 0xc1, 0xc2, 0xcc, 0xa1, 0xce, 0x72, 0xf0, 0x07, 0xe4, 0x89, + 0xcf, 0x69, 0x81, 0x07, 0xe4, 0x91, 0xcf, 0x60, 0x8a, 0x07, 0xe4, 0xa0, + 0x0a, 0xc2, 0xcc, 0xab, 0x44, 0x2b, 0xb9, 0xc2, 0xcc, 0xb7, 0x4d, 0x06, + 0x5a, 0xc2, 0xcc, 0xc3, 0x45, 0x19, 0x60, 0xc2, 0xcc, 0xcf, 0x46, 0x50, + 0xf0, 0xc2, 0xcc, 0xe5, 0x45, 0x30, 0xc1, 0xc2, 0xcc, 0xf1, 0xce, 0x72, + 0xf0, 0x07, 0xe4, 0x51, 0xcf, 0x69, 0x81, 0x07, 0xe4, 0x59, 0xcf, 0x60, + 0x8a, 0x07, 0xe4, 0x68, 0x48, 0x0f, 0x9b, 0xc2, 0xcc, 0xfb, 0x49, 0x19, + 0x6a, 0x42, 0xcd, 0x25, 0x44, 0x2b, 0xb9, 0xc2, 0xcd, 0x43, 0x45, 0x06, + 0x5a, 0xc2, 0xcd, 0x4f, 0x45, 0x19, 0x60, 0xc2, 0xcd, 0x67, 0x45, 0x50, + 0xf0, 0xc2, 0xcd, 0x7d, 0x0a, 0xc2, 0xcd, 0x87, 0x45, 0x30, 0xc1, 0x42, + 0xcd, 0x93, 0x03, 0xc2, 0xcd, 0x9d, 0xcd, 0x7e, 0x55, 0x07, 0xea, 0x58, + 0x44, 0x2b, 0xb9, 0xc2, 0xcd, 0xa9, 0x4d, 0x06, 0x5a, 0xc2, 0xcd, 0xb5, + 0x45, 0x19, 0x60, 0xc2, 0xcd, 0xc1, 0x45, 0x50, 0xf0, 0xc2, 0xcd, 0xcb, + 0x45, 0x50, 0xf1, 0xc2, 0xcd, 0xd5, 0x46, 0x30, 0xc1, 0x42, 0xcd, 0xe1, + 0x48, 0xab, 0xf5, 0xc2, 0xcd, 0xed, 0xdc, 0x12, 0xa9, 0x07, 0xef, 0xe8, + 0x46, 0x2b, 0xba, 0xc2, 0xce, 0x21, 0x03, 0x42, 0xce, 0x27, 0x49, 0x19, + 0x61, 0xc2, 0xce, 0x3c, 0xd5, 0x38, 0x2a, 0x07, 0xef, 0xa0, 0x0b, 0xc2, + 0xce, 0x60, 0xcb, 0x64, 0x7b, 0x07, 0xe9, 0xd8, 0x46, 0x50, 0x13, 0xc2, + 0xce, 0x6c, 0x45, 0x50, 0xf0, 0xc2, 0xce, 0x78, 0x44, 0x19, 0x6a, 0xc2, + 0xce, 0x82, 0x46, 0x30, 0xc1, 0xc2, 0xce, 0x8c, 0x44, 0x72, 0xf0, 0xc2, + 0xce, 0x98, 0x4d, 0x06, 0x5a, 0xc2, 0xce, 0xa4, 0x44, 0x2b, 0xb9, 0x42, + 0xce, 0xb0, 0x60, 0x08, 0x07, 0x42, 0xce, 0xbc, 0xc5, 0x05, 0x02, 0x00, + 0x47, 0xc9, 0xc5, 0x00, 0xd4, 0x00, 0x47, 0xb8, 0x08, 0xc2, 0xce, 0xc6, + 0x09, 0xc2, 0xce, 0xd8, 0x0e, 0xc2, 0xce, 0xf9, 0x42, 0x1c, 0x52, 0xc2, + 0xcf, 0x08, 0x03, 0xc2, 0xcf, 0x18, 0x0d, 0xc2, 0xcf, 0x34, 0x16, 0xc2, + 0xcf, 0x50, 0xc3, 0xd5, 0x25, 0x00, 0x33, 0xf3, 0x02, 0xcf, 0x78, 0x1b, + 0xc2, 0xcf, 0x85, 0x14, 0xc2, 0xcf, 0x95, 0x42, 0x00, 0x51, 0xc2, 0xcf, + 0xb6, 0x97, 0x00, 0x36, 0x3b, 0x02, 0xcf, 0xc6, 0xc3, 0x0f, 0x9a, 0x00, + 0x32, 0x13, 0x02, 0xcf, 0xd0, 0x87, 0x00, 0x36, 0x83, 0x02, 0xcf, 0xd4, + 0x42, 0x02, 0x1c, 0xc2, 0xcf, 0xd8, 0x15, 0xc2, 0xcf, 0xe8, 0x06, 0xc2, + 0xd0, 0x15, 0xc2, 0x00, 0x5f, 0x00, 0x36, 0x5b, 0x02, 0xd0, 0x37, 0xc3, + 0x7e, 0x89, 0x00, 0x32, 0x43, 0x02, 0xd0, 0x42, 0x0f, 0xc2, 0xd0, 0x46, + 0xc2, 0x49, 0x0c, 0x00, 0x36, 0x33, 0x02, 0xd0, 0x55, 0x10, 0xc2, 0xd0, + 0x59, 0x0a, 0x42, 0xd0, 0x72, 0xd3, 0x43, 0xd1, 0x00, 0x46, 0x91, 0xc5, + 0x05, 0x02, 0x00, 0x46, 0x79, 0xc5, 0x00, 0xd4, 0x00, 0x46, 0x70, 0x11, + 0xc2, 0xd0, 0x88, 0x03, 0x42, 0xd0, 0x94, 0xc3, 0x00, 0x49, 0x0f, 0x70, + 0x01, 0xc2, 0x00, 0x74, 0x0f, 0x70, 0x78, 0xc2, 0x00, 0x74, 0x0f, 0x70, + 0x31, 0x8a, 0x0f, 0x70, 0xd0, 0x03, 0xc2, 0xd0, 0x9c, 0xc2, 0x16, 0x1c, + 0x0f, 0x70, 0xa9, 0x0a, 0x42, 0xd0, 0xa6, 0xc2, 0x0f, 0x9b, 0x0f, 0x70, + 0x51, 0xc3, 0x14, 0x4b, 0x0f, 0x70, 0xb8, 0xc2, 0x00, 0xc2, 0x0f, 0x70, + 0x59, 0x46, 0xce, 0x45, 0x42, 0xd0, 0xb2, 0xc3, 0x03, 0x26, 0x0f, 0x70, + 0x71, 0xc4, 0xdf, 0x93, 0x0f, 0x70, 0xa1, 0x49, 0x9f, 0xe0, 0xc2, 0xd1, + 0x16, 0xc2, 0x01, 0x9d, 0x0f, 0x70, 0x88, 0xc3, 0x85, 0xf5, 0x0f, 0x71, + 0x09, 0xc4, 0x30, 0xc1, 0x0f, 0x71, 0x11, 0x0a, 0xc2, 0xd1, 0x66, 0xc3, + 0x2b, 0xb9, 0x0f, 0x71, 0x49, 0x0d, 0xc2, 0xd1, 0x72, 0xc3, 0x0d, 0xff, + 0x0f, 0x71, 0x59, 0xc4, 0x19, 0x60, 0x0f, 0x71, 0x61, 0xc4, 0x3a, 0x01, + 0x0f, 0x71, 0x69, 0x15, 0xc2, 0xd1, 0x7e, 0xc3, 0x03, 0x0c, 0x0f, 0x71, + 0x79, 0xc3, 0xb1, 0x0d, 0x0f, 0x71, 0x81, 0xc3, 0x0f, 0x9a, 0x0f, 0x71, + 0x91, 0x16, 0xc2, 0xd1, 0x90, 0xc3, 0xb2, 0x00, 0x0f, 0x71, 0xc9, 0xc5, + 0x92, 0x75, 0x0f, 0x71, 0xd8, 0xda, 0x19, 0x60, 0x0f, 0x77, 0x81, 0xcc, + 0x88, 0x95, 0x0f, 0x77, 0x88, 0x00, 0xc2, 0xd1, 0x9c, 0xc3, 0x13, 0x00, + 0x00, 0x32, 0x62, 0x02, 0xd1, 0xae, 0xc9, 0x33, 0xad, 0x00, 0x47, 0xe0, + 0xc9, 0x33, 0xad, 0x00, 0x47, 0xe8, 0x45, 0x00, 0x8c, 0xc2, 0xd1, 0xb4, + 0xcd, 0x00, 0xfa, 0x07, 0xf3, 0xb1, 0xcb, 0x64, 0x7b, 0x07, 0xf3, 0xb8, + 0xce, 0x00, 0xf9, 0x07, 0xf3, 0x80, 0x19, 0xc2, 0xd1, 0xc6, 0x15, 0xc2, + 0xd1, 0xd2, 0x08, 0xc2, 0xd1, 0xe4, 0xc4, 0x3a, 0x01, 0x00, 0x37, 0x43, + 0x02, 0xd1, 0xf0, 0xc3, 0x0f, 0x9a, 0x00, 0x46, 0xb9, 0xc3, 0x03, 0x0c, + 0x00, 0x46, 0xb1, 0x42, 0x02, 0x1c, 0xc2, 0xd1, 0xf6, 0xc3, 0x2b, 0xb9, + 0x00, 0x37, 0x3b, 0x02, 0xd2, 0x00, 0x0f, 0xc2, 0xd2, 0x06, 0xd4, 0x3c, + 0x3c, 0x00, 0x37, 0x09, 0xd8, 0x21, 0x53, 0x00, 0x37, 0x01, 0xcc, 0x8c, + 0x91, 0x00, 0x36, 0xf9, 0x16, 0xc2, 0xd2, 0x12, 0xc4, 0x30, 0xc1, 0x00, + 0x36, 0xd1, 0x0e, 0x42, 0xd2, 0x1e, 0xcf, 0x60, 0x7b, 0x00, 0x46, 0xc9, + 0x19, 0xc2, 0xd2, 0x2a, 0xc4, 0x19, 0x60, 0x00, 0x37, 0x69, 0xc4, 0xdf, + 0x6b, 0x00, 0x37, 0x18, 0xc7, 0xbf, 0x83, 0x00, 0x46, 0x49, 0xc3, 0x00, + 0xcf, 0x00, 0x30, 0xc0, 0x00, 0x42, 0xd2, 0x36, 0xc5, 0x05, 0x02, 0x07, + 0xde, 0x09, 0xc5, 0x00, 0xd4, 0x07, 0xde, 0x00, 0x48, 0x04, 0xe7, 0xc2, + 0xd2, 0x48, 0x4a, 0x0e, 0x7d, 0x42, 0xd2, 0x5a, 0xd7, 0x2b, 0x0c, 0x07, + 0xdd, 0xe1, 0x42, 0x00, 0x30, 0x42, 0xd2, 0x6c, 0xc5, 0x05, 0x02, 0x07, + 0xdd, 0xd9, 0xc5, 0x00, 0xd4, 0x07, 0xdd, 0xd0, 0x46, 0xd1, 0x0f, 0xc2, + 0xd2, 0x78, 0x03, 0x42, 0xd2, 0x84, 0xcf, 0x63, 0xf0, 0x00, 0x30, 0x99, + 0xd0, 0x5a, 0x82, 0x00, 0x30, 0x90, 0xcd, 0x00, 0xfa, 0x07, 0xf3, 0xe1, + 0xcb, 0x64, 0x7b, 0x07, 0xf3, 0xe8, 0xc5, 0x05, 0x02, 0x00, 0x47, 0x79, + 0xc5, 0x00, 0xd4, 0x00, 0x47, 0x60, 0xc5, 0x05, 0x02, 0x00, 0x47, 0x71, + 0xc5, 0x00, 0xd4, 0x00, 0x47, 0x58, 0xc5, 0x05, 0x02, 0x00, 0x47, 0x69, + 0xc5, 0x00, 0xd4, 0x00, 0x47, 0x50, 0x46, 0x00, 0x8b, 0x42, 0xd2, 0x9c, + 0xc3, 0x13, 0x00, 0x00, 0x47, 0x48, 0xc3, 0x13, 0x00, 0x00, 0x47, 0x40, + 0xc3, 0x13, 0x00, 0x00, 0x47, 0x38, 0x83, 0x00, 0x2b, 0xc9, 0xc2, 0x16, + 0x1c, 0x00, 0x2b, 0x98, 0x83, 0x00, 0x2a, 0x49, 0xc2, 0x16, 0x1c, 0x00, + 0x2a, 0x18, 0x9f, 0x0f, 0xbb, 0x31, 0xa0, 0x0f, 0xbb, 0x39, 0xa1, 0x0f, + 0xbb, 0x41, 0xa2, 0x0f, 0xbb, 0x48, 0xc2, 0xe5, 0xfd, 0x0f, 0xb9, 0x20, + 0xa1, 0x0f, 0xb9, 0xa9, 0x9f, 0x0f, 0xb9, 0x99, 0xa0, 0x0f, 0xb9, 0xa0, + 0xc8, 0x8c, 0x89, 0x0f, 0xb9, 0x83, 0x02, 0xd2, 0xae, 0xc4, 0x1a, 0x05, + 0x0f, 0xb8, 0xf8, 0x9f, 0x0f, 0xb8, 0x59, 0xa0, 0x0f, 0xb8, 0x60, 0x48, + 0xba, 0x6a, 0xc2, 0xd2, 0xb4, 0xc8, 0x8c, 0x89, 0x0f, 0xb9, 0x61, 0xc6, + 0x4c, 0x49, 0x0f, 0xb9, 0x10, 0xc8, 0x8c, 0x89, 0x0f, 0xb9, 0x69, 0xd2, + 0x4c, 0x49, 0x0f, 0xb9, 0x30, 0xc2, 0xe5, 0xfd, 0x0f, 0xb8, 0x48, 0xc2, + 0xe5, 0xfd, 0x0f, 0xb8, 0x38, 0x84, 0x0a, 0x21, 0xa1, 0x83, 0x0a, 0x21, + 0x98, 0x83, 0x0a, 0x21, 0x88, 0x83, 0x0a, 0x21, 0x60, 0x83, 0x0a, 0x21, + 0x48, 0x83, 0x0a, 0x20, 0xd8, 0x83, 0x0a, 0x20, 0x50, 0x83, 0x0a, 0x22, + 0x49, 0x84, 0x0a, 0x22, 0x51, 0x85, 0x0a, 0x22, 0x58, 0x83, 0x0a, 0x23, + 0x58, 0x83, 0x0a, 0x23, 0x68, 0x83, 0x0a, 0x23, 0x80, 0x83, 0x0a, 0x23, + 0x90, 0x83, 0x0a, 0x23, 0xa0, 0x83, 0x0a, 0x23, 0xb9, 0x84, 0x0a, 0x23, + 0xc1, 0x85, 0x0a, 0x23, 0xc8, 0x83, 0x0a, 0x23, 0xd9, 0x84, 0x0a, 0x23, + 0xe0, 0x83, 0x0a, 0x23, 0xf9, 0x84, 0x0a, 0x24, 0x01, 0x85, 0x0a, 0x24, + 0x08, 0x83, 0x0a, 0x24, 0x29, 0x84, 0x0a, 0x24, 0x30, 0x83, 0x0a, 0x24, + 0x60, 0x83, 0x0a, 0x24, 0xb8, 0x83, 0x0a, 0x25, 0x10, 0x83, 0x0a, 0x27, + 0x31, 0x84, 0x0a, 0x27, 0x38, 0x83, 0x0a, 0x27, 0x68, 0x83, 0x0a, 0x27, + 0x80, 0x83, 0x0a, 0x27, 0xb8, 0x83, 0x0a, 0x27, 0xc8, 0x83, 0x0a, 0x28, + 0x28, 0x83, 0x0a, 0x29, 0x70, 0x83, 0x0a, 0x2a, 0x28, 0x83, 0x0a, 0x2a, + 0x58, 0x83, 0x0a, 0x2a, 0x88, 0x83, 0x0a, 0x2a, 0xe0, 0x83, 0x0a, 0x2b, + 0x88, 0x83, 0x0a, 0x2b, 0xa1, 0x84, 0x0a, 0x2b, 0xa9, 0x85, 0x0a, 0x2b, + 0xb0, 0x83, 0x0a, 0x2b, 0xd9, 0x84, 0x0a, 0x2b, 0xe1, 0x85, 0x0a, 0x2b, + 0xe8, 0x83, 0x0a, 0x2c, 0xa8, 0x83, 0x0a, 0x2c, 0xd8, 0x83, 0x0a, 0x2d, + 0x00, 0x83, 0x0a, 0x2d, 0x20, 0x83, 0x0a, 0x2d, 0x78, 0xc9, 0xae, 0xa0, + 0x0a, 0x2d, 0x89, 0x83, 0x0a, 0x2d, 0x90, 0x83, 0x0a, 0x2d, 0xb0, 0xd4, + 0x3f, 0x0c, 0x0a, 0x2e, 0x71, 0xd3, 0x44, 0xc8, 0x0a, 0x2e, 0x78, 0x83, + 0x0a, 0x2f, 0xc0, 0x83, 0x0a, 0x30, 0x00, 0xc4, 0x0d, 0xe4, 0x01, 0x1b, + 0x01, 0xc5, 0x02, 0xd2, 0x01, 0x19, 0xe0, 0x43, 0x01, 0x47, 0xc2, 0xd2, + 0xc0, 0xc2, 0x05, 0x03, 0x01, 0x1a, 0xa3, 0x02, 0xd2, 0xcc, 0x0b, 0x42, + 0xd2, 0xd2, 0xc6, 0xcd, 0xc1, 0x01, 0x1a, 0x99, 0xcb, 0x03, 0xbc, 0x01, + 0x1a, 0x80, 0xcd, 0x09, 0xfa, 0x01, 0x1a, 0x39, 0xc7, 0x00, 0xcc, 0x01, + 0x1a, 0x18, 0xc3, 0xba, 0x27, 0x01, 0x1a, 0x71, 0xc8, 0x52, 0x09, 0x01, + 0x1a, 0x50, 0xd0, 0x5b, 0xd2, 0x01, 0x12, 0x90, 0x00, 0x42, 0xd2, 0xde, + 0xc9, 0x57, 0x20, 0x08, 0x09, 0x68, 0xc9, 0x57, 0x20, 0x08, 0x09, 0x60, + 0x00, 0x42, 0xd2, 0xea, 0x00, 0x42, 0xd2, 0xf6, 0xc9, 0x57, 0x20, 0x08, + 0x09, 0x78, 0x00, 0x42, 0xd3, 0x02, 0xc9, 0x57, 0x20, 0x08, 0x09, 0x70, + 0xc7, 0x0d, 0x04, 0x08, 0x08, 0xf1, 0xc8, 0x4b, 0x94, 0x08, 0x09, 0x38, + 0xc9, 0x57, 0x20, 0x08, 0x09, 0x80, 0xc7, 0x0d, 0x04, 0x08, 0x08, 0xf9, + 0xc8, 0x4b, 0x94, 0x08, 0x09, 0x40, 0xc9, 0x57, 0x20, 0x08, 0x09, 0x88, + 0xd5, 0x35, 0xf3, 0x0f, 0xdd, 0x78, 0x48, 0x1e, 0x57, 0xc2, 0xd3, 0x0e, + 0x11, 0x42, 0xd3, 0x26, 0x45, 0x02, 0x9a, 0x42, 0xd3, 0x35, 0xd0, 0x5e, + 0x32, 0x01, 0x2b, 0xe0, 0x47, 0x54, 0x42, 0xc2, 0xd3, 0x45, 0x49, 0x45, + 0xd2, 0x42, 0xd3, 0x51, 0x45, 0x02, 0x9a, 0x42, 0xd3, 0x5d, 0xc8, 0x00, + 0x5f, 0x01, 0x28, 0x51, 0xca, 0x01, 0x68, 0x01, 0x28, 0x40, 0xc8, 0x00, + 0x5f, 0x01, 0x28, 0x31, 0xca, 0x01, 0x68, 0x01, 0x28, 0x20, 0xce, 0x72, + 0xaa, 0x01, 0x2a, 0x51, 0xc8, 0x11, 0xff, 0x01, 0x29, 0xd1, 0xca, 0x11, + 0x34, 0x01, 0x29, 0x90, 0xce, 0x73, 0x44, 0x01, 0x29, 0xe9, 0xc8, 0x11, + 0x49, 0x01, 0x29, 0xa9, 0xca, 0x12, 0x12, 0x01, 0x29, 0x68, 0x0e, 0xc2, + 0xd3, 0x6f, 0xca, 0x01, 0x68, 0x01, 0x29, 0xd9, 0xc5, 0x00, 0x2c, 0x01, + 0x28, 0xb8, 0x45, 0x02, 0x9a, 0x42, 0xd3, 0x7b, 0xc8, 0x00, 0x5f, 0x01, + 0x2a, 0x79, 0xca, 0x01, 0x68, 0x01, 0x2a, 0x68, 0xca, 0x01, 0x68, 0x01, + 0x2a, 0x59, 0xc4, 0x00, 0x49, 0x01, 0x29, 0x59, 0xc5, 0x00, 0x2c, 0x01, + 0x29, 0x18, 0x45, 0x02, 0x9a, 0x42, 0xd3, 0x8d, 0xca, 0x01, 0x68, 0x01, + 0x2b, 0x49, 0xc4, 0x00, 0x49, 0x01, 0x2a, 0xe9, 0xc5, 0x00, 0x2c, 0x01, + 0x2a, 0xd0, 0xca, 0x01, 0x68, 0x01, 0x2b, 0x31, 0xc4, 0x00, 0x49, 0x01, + 0x2a, 0xb9, 0xc5, 0x00, 0x2c, 0x01, 0x2a, 0xa0, 0xd1, 0x53, 0x43, 0x01, + 0x2b, 0x29, 0xcb, 0x8d, 0x84, 0x01, 0x2a, 0xb1, 0xcc, 0x89, 0xd9, 0x01, + 0x2a, 0x98, 0xd1, 0x53, 0x32, 0x01, 0x2b, 0x21, 0xcb, 0x8e, 0xce, 0x01, + 0x2a, 0xa9, 0xcc, 0x87, 0xa5, 0x01, 0x2a, 0x90, 0xd3, 0x42, 0x7b, 0x01, + 0x2a, 0x39, 0xd0, 0x32, 0x71, 0x01, 0x29, 0x79, 0x45, 0x00, 0x49, 0xc2, + 0xd3, 0x9f, 0x46, 0x00, 0x2c, 0x42, 0xd3, 0xab, 0xd3, 0x41, 0xaa, 0x01, + 0x2a, 0x09, 0xd0, 0x32, 0x47, 0x01, 0x29, 0x81, 0x45, 0x00, 0x49, 0xc2, + 0xd3, 0xb7, 0x46, 0x00, 0x2c, 0x42, 0xd3, 0xc3, 0xca, 0x11, 0x34, 0x01, + 0x29, 0x51, 0xc5, 0x11, 0x39, 0x01, 0x28, 0xc8, 0xca, 0x11, 0x34, 0x01, + 0x29, 0x11, 0xc5, 0x11, 0x39, 0x01, 0x28, 0xa8, 0xca, 0x12, 0x12, 0x01, + 0x29, 0x31, 0xc5, 0x07, 0xeb, 0x01, 0x28, 0xd0, 0xca, 0x12, 0x12, 0x01, + 0x28, 0xf1, 0xc5, 0x07, 0xeb, 0x01, 0x28, 0xb0, 0xa3, 0x0f, 0xd9, 0xb0, + 0xa2, 0x0f, 0xd8, 0xab, 0x02, 0xd3, 0xcf, 0xa1, 0x0f, 0xd8, 0x73, 0x02, + 0xd3, 0xd3, 0xa3, 0x0f, 0xd9, 0x28, 0xa3, 0x0f, 0xd9, 0x80, 0xa3, 0x0f, + 0xd9, 0x41, 0xa2, 0x0f, 0xd8, 0xca, 0x02, 0xd3, 0xdb, 0xa3, 0x0f, 0xd9, + 0x51, 0xa2, 0x0f, 0xd8, 0xda, 0x02, 0xd3, 0xdf, 0xa3, 0x0f, 0xd9, 0xc8, + 0xa3, 0x0f, 0xd9, 0x59, 0xa2, 0x0f, 0xd8, 0xe2, 0x02, 0xd3, 0xe3, 0xa3, + 0x0f, 0xd9, 0x98, 0xa3, 0x0f, 0xd9, 0xb8, 0xca, 0xa7, 0x92, 0x0f, 0xd2, + 0x4b, 0x02, 0xd3, 0xe7, 0x0d, 0xc2, 0xd3, 0xed, 0xc4, 0xe3, 0x93, 0x01, + 0x32, 0xfb, 0x02, 0xd3, 0xff, 0xc6, 0xca, 0xfd, 0x01, 0x32, 0xeb, 0x02, + 0xd4, 0x05, 0xc4, 0xde, 0x83, 0x01, 0x32, 0xe3, 0x02, 0xd4, 0x0b, 0xc5, + 0xa8, 0xf7, 0x01, 0x32, 0xdb, 0x02, 0xd4, 0x11, 0x47, 0x45, 0x86, 0x42, + 0xd4, 0x17, 0x4e, 0x6e, 0xe4, 0xc2, 0xd4, 0x33, 0x4e, 0x0e, 0x14, 0xc2, + 0xd4, 0x3f, 0x4c, 0x12, 0xe1, 0xc2, 0xd4, 0x4b, 0x4f, 0x61, 0x3e, 0x42, + 0xd4, 0x57, 0x00, 0x42, 0xd4, 0x63, 0xc6, 0x0b, 0x09, 0x0f, 0xbc, 0x69, + 0xc6, 0x02, 0xd1, 0x0f, 0xbc, 0x20, 0xca, 0x82, 0xd3, 0x01, 0x31, 0xd9, + 0x44, 0x03, 0x15, 0x42, 0xd4, 0x6f, 0x00, 0x42, 0xd4, 0x7f, 0xc6, 0x0b, + 0x09, 0x0f, 0xbc, 0x61, 0xc7, 0x3a, 0x19, 0x0f, 0xbc, 0xb9, 0xc7, 0x0a, + 0xe0, 0x0f, 0xbc, 0xe8, 0x4a, 0x01, 0xa9, 0xc2, 0xd4, 0x91, 0xd8, 0x24, + 0xcb, 0x0f, 0xad, 0x19, 0xdb, 0x03, 0xcc, 0x01, 0x5c, 0xf8, 0x00, 0x42, + 0xd4, 0xa9, 0x47, 0xbe, 0x33, 0xc2, 0xd4, 0xc7, 0xc5, 0xdd, 0x76, 0x0f, + 0x99, 0x10, 0x4a, 0x01, 0xa9, 0xc2, 0xd4, 0xd3, 0x46, 0x01, 0x4a, 0xc2, + 0xd4, 0xf5, 0x4a, 0x03, 0x3d, 0x42, 0xd5, 0x0a, 0x4a, 0x01, 0xa9, 0xc2, + 0xd5, 0x16, 0x00, 0xc2, 0xd5, 0x37, 0x46, 0x01, 0x4a, 0x42, 0xd5, 0x43, + 0x44, 0x00, 0x28, 0xc2, 0xd5, 0x4f, 0xc5, 0x0a, 0xe2, 0x01, 0x4f, 0x58, + 0xc6, 0x0b, 0x09, 0x01, 0x58, 0xd9, 0xc6, 0x02, 0xd1, 0x01, 0x59, 0x20, + 0xc6, 0x04, 0xa1, 0x01, 0x39, 0xf9, 0xc2, 0x00, 0xb3, 0x01, 0x34, 0x88, + 0xcf, 0x66, 0xde, 0x01, 0x39, 0x31, 0xc4, 0x18, 0xb3, 0x0f, 0xad, 0xf8, + 0x15, 0xc2, 0xd5, 0x5b, 0x06, 0xc2, 0xd5, 0x67, 0xd4, 0x3c, 0x14, 0x01, + 0x1f, 0xb3, 0x02, 0xd5, 0x76, 0xd7, 0x2a, 0x0f, 0x01, 0x1f, 0xab, 0x02, + 0xd5, 0x7c, 0x0e, 0x42, 0xd5, 0x82, 0x44, 0x00, 0x67, 0xc2, 0xd5, 0x91, + 0x4a, 0x01, 0xa9, 0xc2, 0xd5, 0x9d, 0xd8, 0x24, 0xcb, 0x0f, 0xad, 0x11, + 0xdb, 0x03, 0xcc, 0x01, 0x5c, 0xe8, 0xc3, 0x08, 0x7b, 0x0f, 0xad, 0x23, + 0x02, 0xd5, 0xb5, 0xc5, 0xc2, 0xc2, 0x01, 0x59, 0x10, 0xc7, 0xc6, 0xef, + 0x01, 0x4e, 0xb9, 0xd0, 0x5a, 0x62, 0x01, 0x59, 0x60, 0xc4, 0x2b, 0xf1, + 0x0f, 0x9f, 0x91, 0xc5, 0xbb, 0xcd, 0x01, 0x58, 0xf8, 0xc9, 0x46, 0x70, + 0x01, 0x2d, 0x71, 0xc7, 0x5a, 0x6b, 0x01, 0x59, 0x70, 0xc6, 0x0b, 0x09, + 0x01, 0x58, 0xe9, 0xc7, 0x3a, 0x19, 0x0f, 0xbc, 0xc1, 0xc7, 0x0a, 0xe0, + 0x0f, 0xbc, 0xf0, 0x9a, 0x01, 0x30, 0x83, 0x02, 0xd5, 0xbb, 0xcb, 0x8e, + 0xa2, 0x0f, 0xaf, 0xb0, 0xc8, 0xb6, 0xb2, 0x00, 0xdb, 0xf0, 0xc3, 0x00, + 0x74, 0x00, 0xdb, 0xe1, 0xc3, 0x38, 0x86, 0x00, 0xdb, 0xc9, 0xc3, 0x01, + 0x95, 0x00, 0xdb, 0xc0, 0xc2, 0x14, 0x49, 0x00, 0xdb, 0xd9, 0xc2, 0x06, + 0x4e, 0x00, 0xdb, 0xd0, 0xc2, 0x06, 0x4e, 0x00, 0xdb, 0xb9, 0xc2, 0x14, + 0x49, 0x00, 0xdb, 0xb0, 0xc2, 0x00, 0xb3, 0x00, 0xdb, 0xa9, 0xc2, 0x0b, + 0x47, 0x00, 0xdb, 0xa0, 0xc2, 0x01, 0x0f, 0x00, 0xdb, 0x73, 0x02, 0xd5, + 0xc1, 0xc2, 0x03, 0x66, 0x00, 0xdb, 0x6a, 0x02, 0xd5, 0xc7, 0xc2, 0x00, + 0x75, 0x00, 0xdb, 0x23, 0x02, 0xd5, 0xcd, 0xc3, 0x00, 0x74, 0x00, 0xdb, + 0x49, 0xc3, 0x0a, 0xe3, 0x00, 0xdb, 0x38, 0xc3, 0x38, 0x86, 0x00, 0xdb, + 0x41, 0xc2, 0x00, 0x75, 0x00, 0xdb, 0x10, 0xc7, 0xc2, 0x6c, 0x00, 0xd8, + 0x30, 0x00, 0x42, 0xd5, 0xd1, 0xc7, 0xc7, 0x20, 0x00, 0xda, 0x29, 0xca, + 0x60, 0x26, 0x00, 0xd8, 0xa0, 0xc2, 0x00, 0xb0, 0x00, 0xd9, 0x89, 0xc2, + 0x01, 0x30, 0x00, 0xd9, 0x80, 0xc7, 0xc2, 0x6c, 0x00, 0xd8, 0x70, 0xc7, + 0xc2, 0x6c, 0x00, 0xd8, 0x60, 0xc7, 0xbf, 0xef, 0x00, 0xd9, 0x08, 0xc3, + 0x1b, 0xe8, 0x00, 0xd9, 0x29, 0x45, 0x60, 0x22, 0x42, 0xd5, 0xe3, 0x00, + 0x42, 0xd5, 0xef, 0x0d, 0xc2, 0xd5, 0xfe, 0x97, 0x0b, 0x50, 0x21, 0xc4, + 0xdf, 0x8b, 0x0b, 0x51, 0xc1, 0x15, 0xc2, 0xd6, 0x1a, 0x16, 0xc2, 0xd6, + 0x34, 0x8f, 0x0b, 0x50, 0x8b, 0x02, 0xd6, 0x3e, 0x14, 0xc2, 0xd6, 0x50, + 0x0e, 0xc2, 0xd6, 0x5c, 0x19, 0xc2, 0xd6, 0x6a, 0xc3, 0xe6, 0x0e, 0x0b, + 0x51, 0x59, 0x12, 0xc2, 0xd6, 0x74, 0x10, 0xc2, 0xd6, 0x7e, 0x1b, 0xc2, + 0xd6, 0xa9, 0xc2, 0x02, 0xe0, 0x0b, 0x50, 0x30, 0x09, 0xc2, 0xd6, 0xb3, + 0x19, 0xc2, 0xd6, 0xbd, 0x0d, 0xc2, 0xd6, 0xc7, 0x10, 0xc2, 0xd6, 0xdd, + 0x16, 0xc2, 0xd7, 0x0a, 0x12, 0xc2, 0xd7, 0x1a, 0x14, 0xc2, 0xd7, 0x37, + 0x15, 0xc2, 0xd7, 0x47, 0x0e, 0xc2, 0xd7, 0x61, 0x18, 0xc2, 0xd7, 0x73, + 0x0f, 0xc2, 0xd7, 0x7d, 0x08, 0xc2, 0xd7, 0xb5, 0x1b, 0xc2, 0xd7, 0xcc, + 0x8b, 0x0b, 0x4e, 0xc1, 0x91, 0x0b, 0x4e, 0xb9, 0x83, 0x0b, 0x4e, 0xa8, + 0x10, 0xc2, 0xd7, 0xe6, 0x0e, 0xc2, 0xd8, 0x06, 0x8f, 0x0b, 0x4a, 0x8b, + 0x02, 0xd8, 0x1c, 0x16, 0xc2, 0xd8, 0x42, 0x0d, 0xc2, 0xd8, 0x5d, 0x15, + 0xc2, 0xd8, 0x74, 0x08, 0xc2, 0xd8, 0x8c, 0x1b, 0xc2, 0xd8, 0x98, 0x14, + 0xc2, 0xd8, 0xa8, 0x12, 0xc2, 0xd8, 0xba, 0x42, 0x00, 0x09, 0xc2, 0xd8, + 0xce, 0x19, 0x42, 0xd8, 0xda, 0x0d, 0xc2, 0xd8, 0xe6, 0x15, 0xc2, 0xd8, + 0xfa, 0x16, 0xc2, 0xd9, 0x08, 0x12, 0xc2, 0xd9, 0x18, 0x0e, 0xc2, 0xd9, + 0x22, 0x10, 0xc2, 0xd9, 0x30, 0x0f, 0xc2, 0xd9, 0x52, 0x1b, 0xc2, 0xd9, + 0x6c, 0x19, 0xc2, 0xd9, 0x7c, 0xc2, 0x17, 0x99, 0x0b, 0x46, 0x19, 0x43, + 0x2c, 0xdc, 0xc2, 0xd9, 0x88, 0xc4, 0xe3, 0x03, 0x0b, 0x46, 0x01, 0xc3, + 0xe6, 0x29, 0x0b, 0x45, 0xe1, 0x09, 0x42, 0xd9, 0x92, 0x10, 0xc2, 0xd9, + 0x9e, 0x0f, 0xc2, 0xd9, 0xb6, 0x12, 0xc2, 0xd9, 0xd1, 0x47, 0xc0, 0x19, + 0xc2, 0xd9, 0xe9, 0x0d, 0xc2, 0xd9, 0xf3, 0x0e, 0xc2, 0xda, 0x03, 0x42, + 0x14, 0xda, 0xc2, 0xda, 0x13, 0x15, 0xc2, 0xda, 0x1d, 0x16, 0xc2, 0xda, + 0x3b, 0xc5, 0xd5, 0xfb, 0x0b, 0x43, 0xb1, 0xc4, 0xa6, 0xdc, 0x0b, 0x43, + 0x99, 0x1b, 0x42, 0xda, 0x47, 0xc3, 0x0a, 0x85, 0x0b, 0x42, 0x91, 0x15, + 0xc2, 0xda, 0x53, 0x16, 0xc2, 0xda, 0x6d, 0x0d, 0xc2, 0xda, 0x7d, 0x0f, + 0xc2, 0xda, 0x91, 0x10, 0xc2, 0xda, 0xb1, 0x0e, 0xc2, 0xda, 0xe7, 0x12, + 0xc2, 0xdb, 0x00, 0x17, 0xc2, 0xdb, 0x16, 0xc3, 0x00, 0x79, 0x0b, 0x41, + 0xd1, 0xc4, 0xe0, 0x17, 0x0b, 0x41, 0xc9, 0x09, 0x42, 0xdb, 0x22, 0xc7, + 0xc8, 0x5b, 0x00, 0xdf, 0xf9, 0xc9, 0xaf, 0xc0, 0x00, 0xdf, 0xe8, 0x49, + 0xa9, 0x09, 0x42, 0xdb, 0x2e, 0xc2, 0x00, 0xdb, 0x00, 0xde, 0xf9, 0xc2, + 0x19, 0x2c, 0x00, 0xde, 0xe1, 0xc2, 0x0d, 0xf6, 0x00, 0xde, 0xc9, 0xc2, + 0x01, 0xc3, 0x00, 0xde, 0xa9, 0xc2, 0x00, 0x39, 0x00, 0xde, 0x99, 0xc2, + 0x01, 0x30, 0x00, 0xde, 0x79, 0xc2, 0x01, 0x4a, 0x00, 0xde, 0x61, 0xc2, + 0x00, 0xb0, 0x00, 0xde, 0x41, 0xc2, 0x00, 0xd0, 0x00, 0xde, 0x19, 0x83, + 0x00, 0xde, 0x08, 0xc6, 0xcd, 0x7f, 0x00, 0x4e, 0x70, 0x46, 0x00, 0x8b, + 0x42, 0xdb, 0x40, 0xc2, 0x00, 0xd0, 0x00, 0x4d, 0x11, 0x83, 0x00, 0x4d, + 0x08, 0xc2, 0x00, 0xd0, 0x00, 0x4d, 0x01, 0x83, 0x00, 0x4c, 0xf8, 0x94, + 0x00, 0x4c, 0x5b, 0x02, 0xdb, 0x4c, 0x8e, 0x00, 0x4c, 0x62, 0x02, 0xdb, + 0x50, 0xc4, 0x1e, 0x97, 0x00, 0x4e, 0x69, 0xc5, 0x40, 0xe7, 0x00, 0x4c, + 0x18, 0xc7, 0x7a, 0x7f, 0x00, 0x4d, 0xe9, 0xc7, 0x14, 0x39, 0x00, 0x4c, + 0x10, 0x94, 0x00, 0x4e, 0x20, 0x8e, 0x00, 0x4f, 0x18, 0xda, 0x1c, 0x04, + 0x00, 0x4f, 0xc0, 0xc2, 0x02, 0xa0, 0x00, 0x4f, 0xa9, 0xc4, 0x02, 0xde, + 0x00, 0x4f, 0xb0, 0xc2, 0x00, 0x64, 0x00, 0xd0, 0x79, 0x83, 0x00, 0xd0, + 0x70, 0xc2, 0x02, 0x2b, 0x00, 0xd0, 0x19, 0x83, 0x00, 0xd0, 0x10, 0xa5, + 0x01, 0x46, 0x00, 0x9f, 0x01, 0x40, 0x1b, 0x02, 0xdb, 0x54, 0xa0, 0x01, + 0x40, 0x2b, 0x02, 0xdb, 0x7b, 0xa1, 0x01, 0x40, 0x4b, 0x02, 0xdb, 0x9b, + 0xa2, 0x01, 0x40, 0x8b, 0x02, 0xdb, 0xb4, 0xa3, 0x01, 0x41, 0x0b, 0x02, + 0xdb, 0xc6, 0xa5, 0x01, 0x44, 0x09, 0xa4, 0x01, 0x42, 0x0a, 0x02, 0xdb, + 0xd1, 0xa0, 0x01, 0x40, 0x33, 0x02, 0xdb, 0xd5, 0xa1, 0x01, 0x40, 0x53, + 0x02, 0xdb, 0xf5, 0xa2, 0x01, 0x40, 0x93, 0x02, 0xdc, 0x0e, 0xa3, 0x01, + 0x41, 0x13, 0x02, 0xdc, 0x20, 0xa5, 0x01, 0x44, 0x11, 0xa4, 0x01, 0x42, + 0x12, 0x02, 0xdc, 0x2b, 0xa1, 0x01, 0x40, 0x63, 0x02, 0xdc, 0x2f, 0xa2, + 0x01, 0x40, 0xa3, 0x02, 0xdc, 0x48, 0xa3, 0x01, 0x41, 0x23, 0x02, 0xdc, + 0x5a, 0xa5, 0x01, 0x44, 0x21, 0xa4, 0x01, 0x42, 0x22, 0x02, 0xdc, 0x65, + 0xa2, 0x01, 0x40, 0xc3, 0x02, 0xdc, 0x69, 0xa3, 0x01, 0x41, 0x43, 0x02, + 0xdc, 0x7b, 0xa5, 0x01, 0x44, 0x41, 0xa4, 0x01, 0x42, 0x42, 0x02, 0xdc, + 0x86, 0xa3, 0x01, 0x41, 0x83, 0x02, 0xdc, 0x8a, 0xa5, 0x01, 0x44, 0x81, + 0xa4, 0x01, 0x42, 0x82, 0x02, 0xdc, 0x95, 0xa5, 0x01, 0x45, 0x01, 0xa4, + 0x01, 0x43, 0x02, 0x02, 0xdc, 0x99, 0xc8, 0x4b, 0x94, 0x08, 0x83, 0x29, + 0xc7, 0x0d, 0x04, 0x08, 0x83, 0x20, 0xc2, 0x0d, 0x10, 0x08, 0x83, 0x08, + 0xc2, 0x0d, 0x10, 0x08, 0x83, 0x00, 0xc3, 0x45, 0x6b, 0x08, 0x82, 0xf9, + 0xc2, 0x00, 0x5f, 0x08, 0x82, 0xb0, 0xc3, 0x0d, 0x0f, 0x08, 0x82, 0xf1, + 0xc2, 0x00, 0x33, 0x08, 0x82, 0xa8, 0xc4, 0x0d, 0x0e, 0x08, 0x82, 0xe9, + 0xc3, 0x02, 0xdf, 0x08, 0x82, 0xa0, 0xc4, 0x18, 0x12, 0x08, 0x82, 0xe1, + 0x91, 0x08, 0x82, 0x98, 0x42, 0x02, 0xa7, 0xc2, 0xdc, 0x9d, 0x46, 0x2e, + 0xee, 0xc2, 0xdc, 0xa7, 0xc4, 0xd8, 0xde, 0x08, 0x81, 0xb9, 0xc3, 0x7e, + 0x5e, 0x08, 0x81, 0xb0, 0xc2, 0x00, 0xd0, 0x08, 0x81, 0x01, 0x83, 0x08, + 0x80, 0xf8, 0xc2, 0x00, 0xd0, 0x08, 0x80, 0xf1, 0x83, 0x08, 0x80, 0xe8, + 0x8e, 0x08, 0x80, 0x6b, 0x02, 0xdc, 0xaf, 0x94, 0x08, 0x80, 0x5a, 0x02, + 0xdc, 0xb3, 0x4f, 0x66, 0x39, 0x42, 0xdc, 0xb7, 0x97, 0x08, 0x82, 0x29, + 0x8b, 0x08, 0x82, 0x19, 0x83, 0x08, 0x81, 0xc0, 0x8e, 0x08, 0x82, 0x03, + 0x02, 0xdc, 0xbf, 0x94, 0x08, 0x81, 0xf2, 0x02, 0xdc, 0xc3, 0x97, 0x08, + 0x81, 0xe8, 0x8b, 0x08, 0x81, 0xd8, 0xc4, 0x18, 0x10, 0x08, 0x83, 0x69, + 0xc2, 0x22, 0xcc, 0x08, 0x83, 0x60, 0xc3, 0x0d, 0x14, 0x08, 0x83, 0x59, + 0xc3, 0x09, 0x9e, 0x08, 0x83, 0x50, 0xc4, 0x02, 0xde, 0x08, 0x83, 0x49, + 0xc2, 0x02, 0xa0, 0x08, 0x83, 0x40, 0x44, 0xe3, 0xbb, 0xc2, 0xdc, 0xc7, + 0x4e, 0x6b, 0x44, 0xc2, 0xdc, 0xd3, 0xc8, 0x9c, 0xe0, 0x0e, 0x80, 0xb0, + 0xc4, 0x99, 0xff, 0x0e, 0x87, 0x99, 0xc4, 0xe4, 0xa7, 0x0e, 0x87, 0x89, + 0xc3, 0x2e, 0xd7, 0x0e, 0x82, 0x78, 0x44, 0xe3, 0xbb, 0xc2, 0xdc, 0xdf, + 0xc8, 0x9c, 0xe0, 0x0e, 0x80, 0xe0, 0x00, 0xc2, 0xdc, 0xf1, 0xc2, 0x01, + 0x6f, 0x0e, 0x81, 0x90, 0xc8, 0xbb, 0x0a, 0x0e, 0x82, 0xa1, 0xc8, 0xad, + 0x15, 0x0e, 0x82, 0x60, 0x42, 0x02, 0x32, 0xc2, 0xdc, 0xfb, 0x95, 0x0e, + 0x80, 0x8a, 0x02, 0xdd, 0x07, 0xc3, 0x63, 0x2b, 0x0e, 0x84, 0x21, 0xc8, + 0x9c, 0xe0, 0x0e, 0x81, 0x10, 0x16, 0xc2, 0xdd, 0x0b, 0xc7, 0xc3, 0x22, + 0x0e, 0x87, 0x18, 0x16, 0xc2, 0xdd, 0x17, 0xc7, 0xc3, 0x22, 0x0e, 0x86, + 0xf8, 0xc3, 0x63, 0x2b, 0x0e, 0x83, 0x29, 0xcc, 0x84, 0x5d, 0x0e, 0x81, + 0x59, 0xc8, 0x9c, 0xe0, 0x0e, 0x81, 0x50, 0x4f, 0x6b, 0x43, 0x42, 0xdd, + 0x23, 0xc7, 0xc0, 0xf9, 0x0e, 0x86, 0xe9, 0xc5, 0xcc, 0xcc, 0x0e, 0x86, + 0xe1, 0x46, 0xca, 0xf1, 0x42, 0xdd, 0x2f, 0x42, 0x00, 0x2c, 0xc2, 0xdd, + 0x3b, 0xcc, 0x2e, 0x8a, 0x0e, 0x86, 0x78, 0xd5, 0x35, 0xb4, 0x0e, 0x86, + 0xb9, 0xc8, 0x2e, 0x8e, 0x0e, 0x86, 0x68, 0xc6, 0xcc, 0xcb, 0x0e, 0x80, + 0x58, 0xc6, 0xd2, 0x5f, 0x0e, 0x86, 0x31, 0xc5, 0x1a, 0x11, 0x0e, 0x86, + 0x28, 0x42, 0x02, 0x32, 0xc2, 0xdd, 0x47, 0xc3, 0x09, 0xe5, 0x0e, 0x85, + 0xd8, 0xc2, 0x00, 0x45, 0x0e, 0x85, 0xc1, 0x83, 0x0e, 0x81, 0xa8, 0xce, + 0x6d, 0x78, 0x0e, 0x85, 0x99, 0xc5, 0x6d, 0x65, 0x0e, 0x85, 0x58, 0xcb, + 0x94, 0xbc, 0x0e, 0x85, 0x91, 0xc7, 0x6d, 0x63, 0x0e, 0x85, 0x10, 0xcd, + 0x7a, 0xfb, 0x0e, 0x85, 0x49, 0xc5, 0x6d, 0x65, 0x0e, 0x85, 0x40, 0xc6, + 0x92, 0x38, 0x0e, 0x85, 0x39, 0xc9, 0x6d, 0x7d, 0x0e, 0x85, 0x30, 0xca, + 0x94, 0x18, 0x0e, 0x83, 0x71, 0xc8, 0xb9, 0x3a, 0x0e, 0x83, 0x58, 0xc3, + 0x63, 0x2b, 0x0e, 0x83, 0x19, 0x03, 0x42, 0xdd, 0x53, 0xc7, 0xc5, 0x05, + 0x0e, 0x83, 0xc1, 0x48, 0xbf, 0x1a, 0x42, 0xdd, 0x5f, 0xcf, 0x65, 0xdf, + 0x0e, 0x84, 0x69, 0xcc, 0x85, 0xb9, 0x0e, 0x84, 0x60, 0xc4, 0x77, 0x35, + 0x0e, 0x82, 0xd0, 0xc3, 0x63, 0x2b, 0x0e, 0x82, 0xf9, 0xc8, 0x9c, 0xe0, + 0x0e, 0x81, 0xe8, 0x00, 0x42, 0xdd, 0x6b, 0xc9, 0xad, 0x14, 0x0e, 0x82, + 0x59, 0x8b, 0x0e, 0x82, 0x48, 0x5b, 0x18, 0xc0, 0xc2, 0xdd, 0x77, 0x46, + 0x02, 0xae, 0x42, 0xdd, 0x83, 0xc6, 0x0b, 0x09, 0x01, 0x3a, 0x89, 0xc6, + 0x02, 0xd1, 0x0f, 0xa9, 0xf0, 0xc6, 0x04, 0xe1, 0x0f, 0xda, 0x09, 0xc5, + 0x00, 0x2c, 0x0f, 0xda, 0x10, 0x55, 0x16, 0xaa, 0xc2, 0xdd, 0x95, 0x48, + 0x0a, 0x53, 0xc2, 0xdd, 0xa7, 0x4a, 0x13, 0xe3, 0x42, 0xdd, 0xb3, 0xc7, + 0x16, 0x16, 0x01, 0x52, 0x91, 0x45, 0x00, 0x5a, 0x42, 0xdd, 0xbf, 0xc7, + 0x80, 0x70, 0x01, 0x52, 0xf1, 0xc8, 0x52, 0x09, 0x01, 0x53, 0x00, 0x46, + 0x00, 0x2c, 0xc2, 0xdd, 0xcb, 0x46, 0x01, 0xc8, 0xc2, 0xdd, 0xd5, 0x46, + 0x02, 0xae, 0x42, 0xdd, 0xe1, 0xc9, 0xb2, 0x75, 0x0f, 0xaf, 0x71, 0xca, + 0x0b, 0x94, 0x01, 0x80, 0x42, 0x02, 0xdd, 0xed, 0xcc, 0x12, 0x2d, 0x01, + 0x59, 0x81, 0xcc, 0x8a, 0xed, 0x01, 0x59, 0x90, 0xe0, 0x09, 0xa7, 0x0f, + 0xdc, 0xa0, 0x46, 0x00, 0x8b, 0x42, 0xdd, 0xf3, 0x44, 0x04, 0x91, 0xc2, + 0xde, 0x03, 0xc3, 0x04, 0x20, 0x01, 0x2c, 0x60, 0x00, 0x42, 0xde, 0x0f, + 0x46, 0x00, 0x8b, 0x42, 0xde, 0x1b, 0xc9, 0xb0, 0x6b, 0x01, 0x0d, 0x69, + 0xca, 0x01, 0xfd, 0x01, 0x58, 0x20, 0xcc, 0x84, 0x99, 0x01, 0x1d, 0x19, + 0xc9, 0x57, 0x36, 0x01, 0x1d, 0x11, 0xcc, 0x80, 0xcd, 0x01, 0x1d, 0x09, + 0x45, 0x00, 0x8c, 0x42, 0xde, 0x27, 0xca, 0xa2, 0x74, 0x01, 0x1d, 0x49, + 0xcc, 0x82, 0xe9, 0x01, 0x1d, 0x41, 0xca, 0xa3, 0x5a, 0x01, 0x1d, 0x38, + 0xcd, 0x3f, 0xe8, 0x01, 0x2c, 0x69, 0xce, 0x08, 0x79, 0x01, 0x2c, 0x50, + 0xd6, 0x31, 0x40, 0x01, 0x4e, 0x79, 0xd6, 0x14, 0xf9, 0x0f, 0xdb, 0x60, + 0xcc, 0x00, 0x33, 0x01, 0x4c, 0x19, 0xcd, 0x69, 0x65, 0x01, 0x80, 0x70, + 0xcc, 0x84, 0x15, 0x01, 0x4a, 0x81, 0xca, 0xa4, 0x18, 0x01, 0x4a, 0x58, + 0xcc, 0x84, 0x15, 0x01, 0x4a, 0x51, 0xca, 0xa4, 0x18, 0x01, 0x4a, 0x70, + 0xca, 0x03, 0xdd, 0x0f, 0xc4, 0x81, 0x48, 0x01, 0x9a, 0x42, 0xde, 0x45, + 0xc5, 0x01, 0xa2, 0x01, 0x0e, 0xd1, 0xca, 0x52, 0xc2, 0x01, 0x48, 0x70, + 0x46, 0x02, 0x5c, 0xc2, 0xde, 0x5a, 0xd1, 0x52, 0xbb, 0x01, 0x59, 0xb8, + 0xd9, 0x1f, 0xf9, 0x0f, 0xc0, 0x21, 0x15, 0xc2, 0xde, 0x66, 0x42, 0x00, + 0x58, 0xc2, 0xde, 0x72, 0xcf, 0x2c, 0x35, 0x01, 0x0f, 0xb9, 0x0e, 0xc2, + 0xde, 0x7e, 0xc4, 0x01, 0x23, 0x01, 0x0d, 0x49, 0x16, 0xc2, 0xde, 0x8a, + 0xca, 0x9e, 0x28, 0x01, 0x4a, 0x31, 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0xa1, + 0xcc, 0x84, 0xb1, 0x0f, 0xc4, 0xc0, 0x43, 0x10, 0x9e, 0xc2, 0xde, 0x99, + 0x47, 0x25, 0xf3, 0x42, 0xde, 0xa8, 0xd1, 0x56, 0x73, 0x01, 0x48, 0xf8, + 0x45, 0x00, 0xd5, 0xc2, 0xde, 0xb8, 0x43, 0x02, 0x9c, 0x42, 0xde, 0xd0, + 0x00, 0xc2, 0xde, 0xd6, 0xc5, 0x14, 0xa5, 0x01, 0x48, 0xd8, 0xd7, 0x2a, + 0x26, 0x01, 0x0e, 0x59, 0x4a, 0x01, 0x58, 0x42, 0xde, 0xe2, 0xc6, 0x0e, + 0xe0, 0x01, 0x53, 0xf9, 0xc5, 0x00, 0xd4, 0x01, 0x54, 0x0a, 0x02, 0xde, + 0xee, 0xc8, 0x23, 0xa0, 0x01, 0x54, 0x69, 0xd2, 0x09, 0xd5, 0x01, 0x54, + 0x78, 0xe0, 0x07, 0xc7, 0x01, 0x54, 0x98, 0xe0, 0x08, 0x87, 0x01, 0x3b, + 0x98, 0xc4, 0x11, 0xa4, 0x01, 0x5e, 0x61, 0xc4, 0x0e, 0x6a, 0x0f, 0xbe, + 0x20, 0xcf, 0x15, 0x36, 0x0f, 0xbd, 0x79, 0xd2, 0x22, 0x49, 0x0f, 0xbe, + 0x48, 0xc2, 0x00, 0x43, 0x05, 0x27, 0xc1, 0xc3, 0xe4, 0xfa, 0x05, 0x27, + 0xd1, 0xc2, 0x00, 0x6b, 0x05, 0x27, 0xd9, 0xc2, 0x00, 0xc1, 0x05, 0x27, + 0xe1, 0xc3, 0xe6, 0x02, 0x05, 0x27, 0xe8, 0xdd, 0x10, 0xfa, 0x01, 0x50, + 0x99, 0xdc, 0x12, 0xfd, 0x01, 0x50, 0x90, 0x1e, 0xc2, 0xde, 0xf4, 0x1d, + 0xc2, 0xdf, 0x1e, 0xc7, 0xc8, 0x15, 0x08, 0x3a, 0xa1, 0xc5, 0xd6, 0x1e, + 0x08, 0x3a, 0xa8, 0x23, 0xc2, 0xdf, 0x52, 0x1d, 0xc2, 0xdf, 0x66, 0x1e, + 0xc2, 0xdf, 0x86, 0x1f, 0xc2, 0xdf, 0xae, 0x20, 0xc2, 0xdf, 0xd2, 0x21, + 0xc2, 0xdf, 0xde, 0x22, 0x42, 0xdf, 0xfe, 0x9d, 0x08, 0x3b, 0x01, 0x9e, + 0x08, 0x3b, 0x09, 0x9f, 0x08, 0x3b, 0x11, 0xa0, 0x08, 0x3b, 0x19, 0xa1, + 0x08, 0x3b, 0x21, 0xa2, 0x08, 0x3b, 0x29, 0xa3, 0x08, 0x3b, 0x31, 0xa4, + 0x08, 0x3b, 0x38, 0x1d, 0xc2, 0xe0, 0x22, 0x1e, 0x42, 0xe0, 0x46, 0xc6, + 0xcf, 0x41, 0x08, 0x32, 0x39, 0xc3, 0xe6, 0x50, 0x08, 0x32, 0x79, 0xc3, + 0xe6, 0x5c, 0x08, 0x32, 0x50, 0x1d, 0xc2, 0xe0, 0x6c, 0x1e, 0xc2, 0xe0, + 0x90, 0x1f, 0xc2, 0xe0, 0xb8, 0x20, 0xc2, 0xe0, 0xe0, 0x21, 0xc2, 0xe1, + 0x08, 0x22, 0xc2, 0xe1, 0x30, 0x23, 0xc2, 0xe1, 0x58, 0x24, 0x42, 0xe1, + 0x80, 0x1d, 0xc2, 0xe1, 0x88, 0x1e, 0x42, 0xe1, 0xc4, 0x1d, 0xc2, 0xe1, + 0xfa, 0x1e, 0xc2, 0xe2, 0x1a, 0x1f, 0xc2, 0xe2, 0x32, 0x20, 0xc2, 0xe2, + 0x56, 0x21, 0xc2, 0xe2, 0x7a, 0x22, 0xc2, 0xe2, 0x96, 0x23, 0xc2, 0xe2, + 0xba, 0x24, 0xc2, 0xe2, 0xd2, 0x25, 0xc2, 0xe2, 0xfa, 0x26, 0x42, 0xe3, + 0x22, 0x49, 0xae, 0x8e, 0xc2, 0xe3, 0x3a, 0x47, 0xc3, 0x7d, 0x42, 0xe3, + 0x62, 0x04, 0xc2, 0xe3, 0x8a, 0x48, 0xbf, 0x62, 0x42, 0xe3, 0x92, 0x1e, + 0xc2, 0xe3, 0xa2, 0xc9, 0xae, 0x2b, 0x08, 0x06, 0x90, 0x83, 0x00, 0xc9, + 0xa1, 0xc2, 0x01, 0x30, 0x00, 0xc9, 0x88, 0x91, 0x00, 0xc9, 0x28, 0x87, + 0x00, 0xc9, 0x18, 0x97, 0x00, 0xc9, 0x31, 0x8b, 0x00, 0xc9, 0x20, 0xc6, + 0x0b, 0x09, 0x0f, 0xbf, 0x59, 0xc6, 0x02, 0xd1, 0x0f, 0xbf, 0x20, 0xc7, + 0x3a, 0x19, 0x0f, 0xa9, 0xb9, 0xc6, 0x02, 0xd1, 0x0f, 0xa9, 0xa9, 0xc6, + 0x0b, 0x09, 0x0f, 0xbf, 0x30, 0xdf, 0x0d, 0x5d, 0x08, 0x59, 0xf9, 0xdd, + 0x11, 0xc5, 0x08, 0x59, 0xe8, 0xc7, 0x3a, 0x19, 0x0f, 0xa9, 0xb1, 0xc6, + 0x02, 0xd1, 0x0f, 0xbf, 0x01, 0xc6, 0x0b, 0x09, 0x0f, 0xbf, 0x38, 0xdf, + 0x0c, 0xa3, 0x08, 0x59, 0xf1, 0xdd, 0x05, 0x0a, 0x08, 0x59, 0xe0, 0x95, + 0x00, 0x03, 0x9b, 0x02, 0xe3, 0xb0, 0x85, 0x00, 0x03, 0x1b, 0x02, 0xe3, + 0xd4, 0x96, 0x00, 0x03, 0xa3, 0x02, 0xe3, 0xf8, 0x91, 0x00, 0x03, 0x7b, + 0x02, 0xe4, 0x32, 0x8b, 0x00, 0x03, 0x4b, 0x02, 0xe4, 0x56, 0x86, 0x00, + 0x03, 0x23, 0x02, 0xe4, 0x6a, 0x87, 0x00, 0x03, 0x2b, 0x02, 0xe4, 0x8b, + 0x94, 0x00, 0x03, 0x93, 0x02, 0xe4, 0xb9, 0x8e, 0x00, 0x03, 0x63, 0x02, + 0xe4, 0xd2, 0x88, 0x00, 0x03, 0x33, 0x02, 0xe5, 0x01, 0x9b, 0x00, 0x03, + 0xcb, 0x02, 0xe5, 0x10, 0x8f, 0x00, 0x03, 0x6b, 0x02, 0xe5, 0x1c, 0x97, + 0x00, 0x03, 0xab, 0x02, 0xe5, 0x2e, 0x83, 0x00, 0x03, 0x0b, 0x02, 0xe5, + 0x4b, 0x99, 0x00, 0x03, 0xbb, 0x02, 0xe5, 0x7c, 0x8a, 0x00, 0x03, 0x43, + 0x02, 0xe5, 0x82, 0x9c, 0x00, 0x03, 0xd3, 0x02, 0xe5, 0x9b, 0x9a, 0x00, + 0x03, 0xc3, 0x02, 0xe5, 0xa1, 0x98, 0x00, 0x03, 0xb3, 0x02, 0xe5, 0xa7, + 0x92, 0x00, 0x03, 0x83, 0x02, 0xe5, 0xc3, 0x90, 0x00, 0x03, 0x73, 0x02, + 0xe5, 0xcf, 0x8d, 0x00, 0x03, 0x5b, 0x02, 0xe5, 0xdd, 0x89, 0x00, 0x03, + 0x3b, 0x02, 0xe5, 0xe9, 0x84, 0x00, 0x03, 0x13, 0x02, 0xe6, 0x01, 0x8c, + 0x00, 0x03, 0x53, 0x02, 0xe6, 0x23, 0x93, 0x00, 0x03, 0x8a, 0x02, 0xe6, + 0x29, 0xc2, 0x00, 0x15, 0x07, 0xd8, 0x31, 0xc8, 0xb8, 0xe2, 0x07, 0xd8, + 0x29, 0x08, 0xc2, 0xe6, 0x35, 0xc2, 0x00, 0x0b, 0x00, 0x09, 0x99, 0xc2, + 0x49, 0x0c, 0x00, 0x0a, 0x98, 0x46, 0x45, 0x87, 0x42, 0xe6, 0x44, 0x46, + 0x00, 0x8b, 0x42, 0xe6, 0x58, 0xc2, 0x25, 0xa1, 0x00, 0xe9, 0x19, 0xc2, + 0x00, 0x8e, 0x00, 0xe8, 0x30, 0x48, 0x10, 0x2f, 0xc2, 0xe6, 0x64, 0xcf, + 0x6a, 0x26, 0x05, 0x5a, 0x31, 0xc2, 0x01, 0xdf, 0x05, 0x3b, 0xb0, 0x97, + 0x00, 0xe8, 0xa9, 0xc5, 0xd4, 0x9d, 0x00, 0xe8, 0x81, 0x87, 0x00, 0x13, + 0xb0, 0xc7, 0xc3, 0x84, 0x00, 0xe8, 0x18, 0x87, 0x00, 0xe8, 0x08, 0xca, + 0x1f, 0x59, 0x00, 0x14, 0xd8, 0xc9, 0xab, 0xb5, 0x00, 0x14, 0x08, 0x46, + 0x00, 0x8b, 0xc2, 0xe6, 0x6c, 0xc3, 0x3c, 0x63, 0x00, 0x10, 0xe0, 0x45, + 0x04, 0xcc, 0xc2, 0xe6, 0xa3, 0x46, 0x00, 0x8b, 0x42, 0xe6, 0xaf, 0x00, + 0xc2, 0xe6, 0xc1, 0xc6, 0x10, 0x3f, 0x00, 0x0d, 0x88, 0x46, 0x00, 0x8b, + 0xc2, 0xe6, 0xcd, 0x91, 0x05, 0x3a, 0x71, 0xc4, 0x6d, 0xb5, 0x05, 0x3d, + 0xb1, 0xcb, 0x8e, 0xc3, 0x05, 0x3e, 0x01, 0x44, 0x05, 0x76, 0xc2, 0xe7, + 0x18, 0x8b, 0x00, 0x0d, 0x11, 0x97, 0x00, 0x11, 0x10, 0x46, 0x00, 0x8b, + 0xc2, 0xe7, 0x20, 0x95, 0x05, 0x3b, 0x61, 0x47, 0x67, 0x21, 0xc2, 0xe7, + 0x61, 0xc3, 0x01, 0xbb, 0x00, 0x0c, 0xb0, 0x46, 0x00, 0x8b, 0xc2, 0xe7, + 0x79, 0x4e, 0x73, 0x36, 0xc2, 0xe7, 0xbd, 0x96, 0x05, 0x3b, 0x53, 0x02, + 0xe7, 0xc9, 0xc2, 0x00, 0x75, 0x00, 0x0a, 0x51, 0xc2, 0x01, 0xe2, 0x00, + 0x0d, 0x49, 0xc2, 0x25, 0xa1, 0x00, 0x0d, 0xba, 0x02, 0xe7, 0xcd, 0x46, + 0x00, 0x8b, 0xc2, 0xe7, 0xd1, 0x87, 0x00, 0x06, 0x33, 0x02, 0xe8, 0x18, + 0x83, 0x05, 0x39, 0x91, 0x91, 0x05, 0x39, 0xa1, 0x97, 0x05, 0x39, 0xb1, + 0x98, 0x05, 0x39, 0xc3, 0x02, 0xe8, 0x1e, 0x9b, 0x05, 0x39, 0xe1, 0xca, + 0xa4, 0x72, 0x05, 0x3e, 0x11, 0xc4, 0xde, 0x3f, 0x01, 0x63, 0x69, 0xc8, + 0xbd, 0x8a, 0x00, 0x0c, 0x48, 0xc6, 0xa2, 0xbb, 0x00, 0xf4, 0xf1, 0x46, + 0x00, 0x8b, 0xc2, 0xe8, 0x24, 0xc7, 0xc8, 0xfc, 0x05, 0x3c, 0x59, 0x05, + 0xc2, 0xe8, 0x47, 0xc8, 0xbe, 0x02, 0x05, 0x3e, 0xc1, 0x45, 0x03, 0x14, + 0x42, 0xe8, 0x53, 0x46, 0x00, 0x8b, 0x42, 0xe8, 0x5f, 0x47, 0x01, 0x32, + 0x42, 0xe8, 0x83, 0x46, 0x00, 0x8b, 0xc2, 0xe8, 0x8f, 0xc3, 0x95, 0x51, + 0x00, 0x0f, 0xb8, 0x46, 0x00, 0x8b, 0xc2, 0xe8, 0xab, 0x9b, 0x05, 0x3b, + 0x01, 0xcb, 0x91, 0x15, 0x05, 0x3b, 0x11, 0xc3, 0x02, 0x39, 0x05, 0x3b, + 0x41, 0x47, 0xc8, 0xcb, 0x42, 0xe8, 0xbb, 0x46, 0x00, 0x8b, 0xc2, 0xe8, + 0xcd, 0x9c, 0x05, 0x39, 0x41, 0xc7, 0xc3, 0xa0, 0x05, 0x39, 0x51, 0xc4, + 0x2a, 0xcc, 0x00, 0x06, 0xf3, 0x02, 0xe8, 0xed, 0x46, 0x45, 0x87, 0xc2, + 0xe8, 0xf6, 0x44, 0x05, 0x14, 0x42, 0xe9, 0x1b, 0x00, 0xc2, 0xe9, 0x2d, + 0x48, 0x10, 0x2f, 0xc2, 0xe9, 0x39, 0xca, 0xa6, 0x66, 0x05, 0x3a, 0xe0, + 0x46, 0x00, 0x8b, 0x42, 0xe9, 0x4f, 0x46, 0x00, 0x8b, 0xc2, 0xe9, 0x6b, + 0x8c, 0x00, 0x0e, 0x50, 0x46, 0x00, 0x8b, 0xc2, 0xe9, 0x95, 0x8c, 0x00, + 0x0e, 0x38, 0x46, 0x00, 0x8b, 0x42, 0xe9, 0xbf, 0x46, 0x00, 0x8b, 0xc2, + 0xe9, 0xe8, 0xc4, 0xde, 0xa3, 0x00, 0x0f, 0xb1, 0xc3, 0x0a, 0xe3, 0x05, + 0x39, 0x31, 0xc5, 0xd3, 0x2c, 0x01, 0x63, 0xa8, 0x46, 0x00, 0x8b, 0xc2, + 0xea, 0x02, 0x47, 0x23, 0x34, 0xc2, 0xea, 0x30, 0xc4, 0x38, 0x2c, 0x00, + 0x0c, 0xa1, 0xc2, 0x00, 0xd0, 0x00, 0x0d, 0x10, 0x46, 0x00, 0x8b, 0x42, + 0xea, 0x42, 0x46, 0x00, 0x8b, 0xc2, 0xea, 0x54, 0x9c, 0x00, 0x0f, 0x8a, + 0x02, 0xea, 0x74, 0x46, 0x00, 0x8b, 0xc2, 0xea, 0x7a, 0xc2, 0x00, 0x0a, + 0x05, 0x3d, 0x99, 0xc8, 0xba, 0x4a, 0x05, 0x39, 0x63, 0x02, 0xea, 0xa2, + 0xc2, 0x00, 0x45, 0x05, 0x3b, 0x71, 0xcf, 0x67, 0x1a, 0x05, 0x3e, 0x80, + 0x46, 0x00, 0x8b, 0xc2, 0xea, 0xa8, 0xc3, 0x04, 0x87, 0x05, 0x3d, 0xa1, + 0xc7, 0xc9, 0xb9, 0x05, 0x3a, 0x30, 0x46, 0x00, 0x8b, 0x42, 0xea, 0xcc, + 0x46, 0x00, 0x8b, 0x42, 0xea, 0xd6, 0xc4, 0xdf, 0x43, 0x00, 0x74, 0x11, + 0xc3, 0x02, 0x45, 0x00, 0x74, 0x20, 0xc2, 0x0f, 0x7b, 0x00, 0x76, 0xf1, + 0xc3, 0x4d, 0xc3, 0x00, 0x76, 0xf8, 0xc2, 0x19, 0x2c, 0x00, 0x74, 0x71, + 0xc2, 0x00, 0xc1, 0x00, 0x74, 0x98, 0x83, 0x00, 0x74, 0x79, 0xc2, 0x00, + 0xd0, 0x00, 0x74, 0x80, 0x06, 0xc2, 0xea, 0xe2, 0xc2, 0x00, 0xd0, 0x00, + 0x74, 0xc0, 0xc5, 0x00, 0x2c, 0x0f, 0xda, 0xa9, 0xc6, 0x04, 0xe1, 0x0f, + 0xda, 0xa1, 0xcc, 0x04, 0xcb, 0x0f, 0xdb, 0x38, 0x46, 0x01, 0xc8, 0xc2, + 0xea, 0xec, 0xd2, 0x4b, 0x83, 0x0f, 0xdb, 0x18, 0xd2, 0x4b, 0x83, 0x0f, + 0xdb, 0x11, 0x46, 0x01, 0xc8, 0x42, 0xea, 0xf8, 0xc6, 0x04, 0xe1, 0x0f, + 0xda, 0xc9, 0xc5, 0x00, 0x2c, 0x0f, 0xda, 0xd1, 0xcc, 0x04, 0xcb, 0x0f, + 0xda, 0xe0, 0x46, 0x02, 0xae, 0xc2, 0xeb, 0x04, 0xd2, 0x4c, 0x37, 0x0f, + 0xda, 0xf0, 0xd2, 0x4c, 0x37, 0x0f, 0xda, 0xe9, 0x46, 0x02, 0xae, 0x42, + 0xeb, 0x10, 0x46, 0x00, 0x8b, 0x42, 0xeb, 0x1c, 0xd4, 0x3e, 0x6c, 0x01, + 0x5d, 0xc0, 0xc5, 0x01, 0xa2, 0x01, 0x5b, 0x0b, 0x02, 0xeb, 0x28, 0xcc, + 0x82, 0xb9, 0x01, 0x5b, 0x59, 0xcd, 0x7c, 0xa8, 0x01, 0x5c, 0x28, 0xd5, + 0x03, 0xd2, 0x0f, 0xc0, 0xa9, 0xd8, 0x22, 0x5b, 0x0f, 0xc0, 0x49, 0xd9, + 0x1f, 0xf9, 0x0f, 0xc0, 0x29, 0x46, 0x03, 0x13, 0xc2, 0xeb, 0x2c, 0xcd, + 0x75, 0xa6, 0x01, 0x0e, 0xf1, 0x44, 0x08, 0xba, 0xc2, 0xeb, 0x38, 0xd1, + 0x01, 0x68, 0x01, 0x48, 0x49, 0xcc, 0x84, 0xb1, 0x0f, 0xc4, 0xc8, 0x47, + 0x13, 0x6d, 0xc2, 0xeb, 0x44, 0xc6, 0x10, 0x9d, 0x01, 0x4a, 0xc1, 0xc8, + 0xae, 0xbc, 0x01, 0x4b, 0x00, 0xc8, 0xae, 0xbc, 0x01, 0x4a, 0xe1, 0xc6, + 0x10, 0x9d, 0x01, 0x4a, 0xa0, 0xe0, 0x0a, 0xe7, 0x01, 0x3a, 0x58, 0xd6, + 0x2e, 0x6a, 0x01, 0x39, 0xc1, 0xca, 0x22, 0x51, 0x0f, 0xbe, 0x79, 0xcd, + 0x0e, 0x61, 0x0f, 0xbe, 0x88, 0xc3, 0xe5, 0x8a, 0x0f, 0xb3, 0x29, 0xc9, + 0xb4, 0x91, 0x0f, 0xb2, 0xe8, 0xc5, 0x01, 0xa2, 0x01, 0x3c, 0xc1, 0x49, + 0x01, 0xaa, 0x42, 0xeb, 0x4e, 0xdd, 0x0a, 0x8a, 0x01, 0x3a, 0xe1, 0x44, + 0x05, 0x9e, 0x42, 0xeb, 0x5a, 0xcf, 0x15, 0x36, 0x0f, 0xbd, 0xc1, 0xd2, + 0x22, 0x49, 0x0f, 0xbe, 0x60, 0xc3, 0xe5, 0x8a, 0x0f, 0xb3, 0x31, 0xc9, + 0xb4, 0x91, 0x0f, 0xb2, 0xf0, 0xe0, 0x0c, 0x07, 0x01, 0x3d, 0x68, 0x44, + 0x00, 0x58, 0xc2, 0xeb, 0x60, 0x44, 0x07, 0x69, 0x42, 0xeb, 0x66, 0xd0, + 0x08, 0x97, 0x01, 0x3b, 0x81, 0xd7, 0x0a, 0x90, 0x01, 0x3b, 0x70, 0xd5, + 0x03, 0xd2, 0x0f, 0xc0, 0xc1, 0xdb, 0x17, 0x46, 0x0f, 0xc0, 0xe0, 0xd1, + 0x56, 0x0d, 0x01, 0x3a, 0x19, 0xc8, 0x0a, 0xff, 0x01, 0x39, 0xe8, 0xd0, + 0x20, 0x66, 0x01, 0x3d, 0xc9, 0xd0, 0x03, 0xb7, 0x01, 0x3d, 0xc1, 0xd0, + 0x3c, 0x90, 0x01, 0x3d, 0xb8, 0x47, 0x3b, 0x9c, 0xc2, 0xeb, 0x6c, 0xc5, + 0x1c, 0xae, 0x01, 0x3b, 0x20, 0xd9, 0x1e, 0x1e, 0x01, 0x37, 0x19, 0xcd, + 0x78, 0x30, 0x01, 0x5a, 0xb8, 0xdd, 0x0a, 0x8a, 0x01, 0x3a, 0xf1, 0x44, + 0x05, 0x9e, 0x42, 0xeb, 0x78, 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0xd9, 0xdb, + 0x17, 0x46, 0x0f, 0xc0, 0xf8, 0x46, 0x00, 0x8b, 0x42, 0xeb, 0x7e, 0xd0, + 0x08, 0x97, 0x01, 0x3b, 0x89, 0xd7, 0x0a, 0x90, 0x01, 0x3b, 0x78, 0x00, + 0x42, 0xeb, 0x8a, 0xc3, 0x4a, 0xb9, 0x00, 0x2f, 0x91, 0xc3, 0x04, 0xac, + 0x00, 0x2f, 0x80, 0xc4, 0xe4, 0x8b, 0x07, 0xda, 0x71, 0xc6, 0x64, 0xa4, + 0x07, 0xda, 0x20, 0xc4, 0xe4, 0x8b, 0x07, 0xda, 0x69, 0xc6, 0x64, 0xa4, + 0x07, 0xd9, 0xd8, 0xc4, 0xe4, 0x8b, 0x07, 0xda, 0x61, 0xc6, 0x64, 0xa4, + 0x07, 0xd9, 0x88, 0xc5, 0xd5, 0xa6, 0x07, 0xda, 0x59, 0xc6, 0x64, 0xa4, + 0x07, 0xd9, 0xa8, 0xcc, 0x84, 0x75, 0x07, 0xda, 0x50, 0xcc, 0x84, 0x75, + 0x07, 0xda, 0x30, 0xcc, 0x84, 0x75, 0x07, 0xd9, 0xc0, 0x46, 0x00, 0x8b, + 0x42, 0xeb, 0x96, 0xcc, 0x84, 0x75, 0x07, 0xda, 0x08, 0xcc, 0x84, 0x75, + 0x07, 0xda, 0x18, 0xcc, 0x84, 0x75, 0x07, 0xd9, 0xd0, 0xc6, 0x64, 0xa4, + 0x07, 0xd9, 0xc9, 0xc5, 0xd5, 0x83, 0x07, 0xd8, 0xe8, 0xc2, 0x00, 0x07, + 0x00, 0x2e, 0x83, 0x02, 0xeb, 0xa3, 0x4a, 0x9f, 0x18, 0x42, 0xeb, 0xa9, + 0xc6, 0xcc, 0x59, 0x00, 0x2e, 0x38, 0xc6, 0x44, 0x50, 0x00, 0x2e, 0x09, + 0xc3, 0x62, 0x7d, 0x00, 0x2d, 0x80, 0xce, 0x6d, 0xda, 0x00, 0x2d, 0xd0, + 0xc6, 0xcc, 0xe3, 0x00, 0x2d, 0x99, 0xc5, 0x79, 0xbe, 0x00, 0x2d, 0x91, + 0xc5, 0xa0, 0x88, 0x00, 0x2d, 0x88, 0xc5, 0xd7, 0xfe, 0x00, 0x2c, 0xa9, + 0xc5, 0xcc, 0x5a, 0x00, 0x2c, 0xa0, 0xc6, 0xcc, 0xd7, 0x00, 0x2d, 0x49, + 0xc6, 0xd0, 0xe5, 0x00, 0x2d, 0x00, 0xc2, 0x4a, 0xce, 0x02, 0x6e, 0x31, + 0xce, 0x71, 0xa0, 0x02, 0x6f, 0x90, 0x11, 0xc2, 0xeb, 0xb5, 0xcc, 0x7f, + 0xdc, 0x02, 0x6e, 0xd8, 0x00, 0x42, 0xeb, 0xc1, 0xc2, 0x19, 0x2c, 0x08, + 0x68, 0xc9, 0xc2, 0x01, 0x4a, 0x08, 0x68, 0xb8, 0x02, 0x42, 0xeb, 0xcd, + 0x44, 0x3a, 0xbf, 0xc2, 0xeb, 0xf9, 0xc3, 0x39, 0x37, 0x00, 0x88, 0x4a, + 0x02, 0xec, 0x39, 0xc5, 0xd9, 0xca, 0x05, 0x4b, 0xd8, 0xc6, 0xba, 0x7c, + 0x00, 0x88, 0x8b, 0x02, 0xec, 0x3d, 0xc4, 0x79, 0xf3, 0x00, 0x88, 0x3b, + 0x02, 0xec, 0x41, 0xc6, 0xca, 0x0e, 0x00, 0x8a, 0x00, 0x02, 0x42, 0xec, + 0x45, 0x02, 0x42, 0xec, 0x6f, 0xc5, 0xc0, 0x7d, 0x00, 0x88, 0x1b, 0x02, + 0xec, 0x87, 0xc6, 0xc1, 0x86, 0x00, 0x88, 0x80, 0xc5, 0x8e, 0xdf, 0x00, + 0x88, 0x03, 0x02, 0xec, 0x8b, 0xc6, 0xbb, 0xec, 0x00, 0x88, 0x79, 0x47, + 0x79, 0xeb, 0x42, 0xec, 0x91, 0x02, 0x42, 0xec, 0xa7, 0xc4, 0xc6, 0x7a, + 0x00, 0x88, 0x63, 0x02, 0xec, 0xcb, 0x42, 0x00, 0x0a, 0xc2, 0xec, 0xd1, + 0x4a, 0xa3, 0x00, 0x42, 0xec, 0xe0, 0xc6, 0xb7, 0x9c, 0x00, 0x8a, 0x61, + 0xc9, 0x90, 0xe0, 0x00, 0x8a, 0xc8, 0xc6, 0x92, 0x0c, 0x00, 0x8b, 0x01, + 0x83, 0x00, 0x8b, 0x0b, 0x02, 0xec, 0xe8, 0x1b, 0xc2, 0xec, 0xf9, 0x87, + 0x00, 0x8b, 0x33, 0x02, 0xed, 0x1c, 0x91, 0x00, 0x8b, 0x4b, 0x02, 0xed, + 0x2a, 0x19, 0xc2, 0xed, 0x32, 0x97, 0x00, 0x8b, 0x73, 0x02, 0xed, 0x44, + 0x8b, 0x00, 0x8b, 0xab, 0x02, 0xed, 0x48, 0xca, 0xa6, 0x02, 0x00, 0x8d, + 0x10, 0x0d, 0xc2, 0xed, 0x4c, 0x15, 0xc2, 0xed, 0x61, 0xc5, 0xd9, 0x61, + 0x00, 0x8d, 0x5b, 0x02, 0xed, 0x70, 0x16, 0xc2, 0xed, 0x74, 0xc5, 0xd6, + 0x8c, 0x00, 0x8d, 0x7b, 0x02, 0xed, 0x83, 0xc5, 0xda, 0xe7, 0x00, 0x8d, + 0xbb, 0x02, 0xed, 0x87, 0x12, 0xc2, 0xed, 0x8b, 0xc5, 0xb7, 0x9d, 0x00, + 0x8d, 0xe3, 0x02, 0xed, 0xa6, 0x05, 0xc2, 0xed, 0xaa, 0xc5, 0x90, 0xe4, + 0x00, 0x8e, 0x13, 0x02, 0xed, 0xb9, 0x42, 0x0c, 0x43, 0x42, 0xed, 0xbd, + 0xc5, 0x8e, 0xdf, 0x01, 0x89, 0x8b, 0x02, 0xed, 0xcc, 0xc6, 0xbb, 0xec, + 0x01, 0x8a, 0x59, 0x47, 0x79, 0xeb, 0x42, 0xed, 0xd2, 0x44, 0x3a, 0xbf, + 0xc2, 0xed, 0xe2, 0xc3, 0x39, 0x37, 0x01, 0x8a, 0x2a, 0x02, 0xee, 0x12, + 0x02, 0x42, 0xee, 0x16, 0xc5, 0xc0, 0x7d, 0x01, 0x89, 0xb9, 0xc6, 0xc1, + 0x86, 0x01, 0x8a, 0x60, 0x02, 0x42, 0xee, 0x34, 0x02, 0x42, 0xee, 0x5d, + 0xc4, 0x79, 0xf3, 0x01, 0x8a, 0x13, 0x02, 0xee, 0x67, 0xc6, 0xba, 0x7c, + 0x01, 0x8a, 0x69, 0xc6, 0xca, 0x0e, 0x01, 0x8b, 0xf8, 0xc4, 0xb7, 0x9e, + 0x01, 0x8a, 0x38, 0xc4, 0xc6, 0x7a, 0x01, 0x8a, 0x41, 0xc6, 0xc6, 0x79, + 0x01, 0x8a, 0x50, 0x87, 0x01, 0x8a, 0x81, 0xc4, 0xa6, 0x08, 0x01, 0x8c, + 0x6a, 0x02, 0xee, 0x6b, 0x83, 0x01, 0x8a, 0x8b, 0x02, 0xee, 0x6f, 0x87, + 0x01, 0x8a, 0xb3, 0x02, 0xee, 0x73, 0x91, 0x01, 0x8a, 0xdb, 0x02, 0xee, + 0x83, 0x97, 0x01, 0x8b, 0x03, 0x02, 0xee, 0x87, 0x8b, 0x01, 0x8b, 0x10, + 0x91, 0x01, 0x8a, 0x99, 0x97, 0x01, 0x8b, 0x08, 0x87, 0x01, 0x8a, 0xd0, + 0x83, 0x01, 0x8a, 0xc3, 0x02, 0xee, 0x8b, 0x87, 0x01, 0x8a, 0xf3, 0x02, + 0xee, 0x8f, 0x8b, 0x01, 0x8a, 0xf8, 0x91, 0x01, 0x81, 0x11, 0xc4, 0x18, + 0x12, 0x01, 0x81, 0xc8, 0xc3, 0x02, 0xdf, 0x01, 0x81, 0x19, 0xc4, 0x0d, + 0x0e, 0x01, 0x81, 0xd0, 0xc3, 0x77, 0x79, 0x08, 0x47, 0x89, 0xc4, 0xdc, + 0x2d, 0x08, 0x47, 0x70, 0x91, 0x07, 0xfb, 0x31, 0x83, 0x07, 0xfc, 0xe0, + 0x45, 0x03, 0x14, 0xc2, 0xee, 0x93, 0x83, 0x07, 0xfb, 0xd9, 0x97, 0x07, + 0xfb, 0xe9, 0x87, 0x07, 0xfb, 0xf1, 0x91, 0x07, 0xfb, 0xf9, 0x8b, 0x07, + 0xfb, 0xe0, 0x83, 0x07, 0xfb, 0xb1, 0x8b, 0x07, 0xfb, 0xb9, 0x87, 0x07, + 0xfb, 0xc9, 0x91, 0x07, 0xfb, 0xd1, 0x97, 0x07, 0xfb, 0xc0, 0x83, 0x07, + 0xfc, 0x01, 0x8b, 0x07, 0xfc, 0x09, 0x97, 0x07, 0xfc, 0x11, 0x87, 0x07, + 0xfc, 0x19, 0x91, 0x07, 0xfc, 0x20, 0x87, 0x07, 0xfc, 0x41, 0x91, 0x07, + 0xfc, 0x49, 0x83, 0x07, 0xfc, 0x29, 0x8b, 0x07, 0xfc, 0x31, 0x97, 0x07, + 0xfc, 0x38, 0x8b, 0x07, 0xfc, 0x59, 0x97, 0x07, 0xfc, 0x61, 0x87, 0x07, + 0xfc, 0x69, 0x83, 0x07, 0xfc, 0x51, 0x91, 0x07, 0xfc, 0x70, 0x8b, 0x07, + 0xfc, 0x81, 0x91, 0x07, 0xfc, 0x99, 0x83, 0x07, 0xfc, 0x79, 0x97, 0x07, + 0xfc, 0x89, 0x87, 0x07, 0xfc, 0x90, 0x83, 0x07, 0xfc, 0xa1, 0x97, 0x07, + 0xfc, 0xa9, 0x91, 0x07, 0xfc, 0xb0, 0x97, 0x07, 0xfc, 0xc9, 0x87, 0x07, + 0xfc, 0xd1, 0x91, 0x07, 0xfc, 0xd9, 0x83, 0x07, 0xfc, 0xb9, 0x8b, 0x07, + 0xfc, 0xc0, 0xc5, 0xd9, 0xca, 0x07, 0xfd, 0x18, 0xc6, 0x8e, 0xde, 0x07, + 0xfd, 0x11, 0xc5, 0x79, 0xf2, 0x07, 0xfd, 0x99, 0xc4, 0xad, 0x2b, 0x07, + 0xfd, 0xb1, 0xc5, 0xdb, 0xff, 0x07, 0xfd, 0xc9, 0xc6, 0xc0, 0x7c, 0x07, + 0xfd, 0x40, 0xc6, 0x8e, 0xde, 0x07, 0xfd, 0x51, 0xc5, 0xda, 0xe7, 0x07, + 0xfd, 0x59, 0x12, 0xc2, 0xee, 0xb1, 0xc4, 0xad, 0x2b, 0x07, 0xfd, 0x69, + 0xc7, 0xc1, 0x85, 0x07, 0xfd, 0x71, 0xc5, 0x90, 0xe4, 0x07, 0xfd, 0x80, + 0xc5, 0xd9, 0xca, 0x07, 0xfd, 0xa0, 0x87, 0x07, 0xfe, 0x28, 0x91, 0x07, + 0xfe, 0x50, 0x87, 0x07, 0xfe, 0x70, 0x91, 0x07, 0xfe, 0xa0, 0xc5, 0xdb, + 0xff, 0x07, 0xfd, 0x29, 0xc5, 0x90, 0xe4, 0x07, 0xfd, 0x30, 0x91, 0x0d, + 0x8a, 0x91, 0x87, 0x0d, 0x8a, 0x89, 0x8b, 0x0d, 0x8a, 0x81, 0x83, 0x01, + 0x84, 0x70, 0x83, 0x01, 0x84, 0x19, 0x97, 0x01, 0x84, 0x29, 0x91, 0x01, + 0x84, 0x38, 0x83, 0x01, 0x84, 0xa9, 0x87, 0x01, 0x84, 0xb0, 0xd2, 0x4a, + 0x99, 0x01, 0x72, 0x30, 0xe0, 0x06, 0xa7, 0x01, 0x52, 0x58, 0xcf, 0x62, + 0x97, 0x01, 0x52, 0x49, 0xc5, 0x13, 0x84, 0x01, 0x52, 0x38, 0xcb, 0x2a, + 0xa5, 0x01, 0x52, 0x21, 0xc7, 0x80, 0x70, 0x01, 0x52, 0x19, 0xc3, 0x02, + 0xa3, 0x01, 0x52, 0x00, 0xc6, 0x52, 0x0b, 0x01, 0x50, 0xe1, 0xc3, 0x00, + 0x44, 0x01, 0x50, 0xd0, 0x00, 0x42, 0xee, 0xbd, 0x19, 0xc2, 0xee, 0xc9, + 0xc2, 0x00, 0xc4, 0x08, 0x5b, 0xe1, 0xc4, 0x02, 0xde, 0x08, 0x5b, 0xd0, + 0xc2, 0x39, 0x8b, 0x08, 0x5b, 0x91, 0xc3, 0x1e, 0x1b, 0x08, 0x5b, 0x40, + 0xc3, 0x11, 0xef, 0x08, 0x5b, 0x89, 0x03, 0x42, 0xee, 0xd3, 0xc2, 0x00, + 0x8e, 0x08, 0x5b, 0x38, 0x00, 0x42, 0xee, 0xdf, 0x19, 0xc2, 0xee, 0xeb, + 0xc2, 0x00, 0xc4, 0x08, 0x5a, 0xe1, 0xc4, 0x02, 0xde, 0x08, 0x5a, 0xd0, + 0xc2, 0x39, 0x8b, 0x08, 0x5a, 0xa9, 0xc3, 0x1e, 0x1b, 0x08, 0x5a, 0x40, + 0xc3, 0x11, 0xef, 0x08, 0x5a, 0xa1, 0x03, 0x42, 0xee, 0xf5, 0xc2, 0x00, + 0x8e, 0x08, 0x5a, 0x38, 0xc4, 0x36, 0xb5, 0x08, 0x5a, 0x01, 0xc3, 0x16, + 0x5a, 0x08, 0x5a, 0x78, 0xc2, 0x02, 0xa0, 0x00, 0x00, 0xf1, 0xc4, 0x02, + 0xde, 0x00, 0x00, 0xe8, 0x16, 0xc2, 0xef, 0x01, 0xc3, 0x05, 0x14, 0x0f, + 0x65, 0x88, 0xc4, 0x26, 0x78, 0x0f, 0x65, 0x59, 0xc5, 0x06, 0xdb, 0x0f, + 0x65, 0x51, 0x15, 0xc2, 0xef, 0x0d, 0x08, 0xc2, 0xef, 0x19, 0x16, 0xc2, + 0xef, 0x25, 0xc3, 0x05, 0x14, 0x0f, 0x65, 0x18, 0xc2, 0x00, 0xd1, 0x0f, + 0x65, 0x10, 0xc2, 0x00, 0xd1, 0x0f, 0x64, 0xf8, 0xc2, 0x0d, 0x10, 0x0f, + 0x64, 0x13, 0x02, 0xef, 0x31, 0x00, 0x42, 0xef, 0x37, 0x9b, 0x0f, 0x64, + 0x0b, 0x02, 0xef, 0x43, 0x00, 0x42, 0xef, 0x49, 0xc4, 0x18, 0x10, 0x0f, + 0x63, 0xbb, 0x02, 0xef, 0x55, 0xc2, 0x22, 0xcc, 0x0f, 0x63, 0xb2, 0x02, + 0xef, 0x62, 0x0b, 0xc2, 0xef, 0x6f, 0x11, 0x42, 0xef, 0x81, 0x0a, 0xc2, + 0xef, 0x93, 0x19, 0xc2, 0xef, 0xa5, 0xc2, 0x00, 0xc4, 0x0f, 0x63, 0xd2, + 0x02, 0xef, 0xb5, 0x00, 0x42, 0xef, 0xbb, 0xc4, 0x01, 0xce, 0x0f, 0x65, + 0x71, 0xc7, 0x08, 0x79, 0x0f, 0x65, 0x68, 0xc6, 0xcc, 0x2f, 0x01, 0x96, + 0x01, 0x17, 0x42, 0xef, 0xc7, 0xc3, 0x78, 0xc0, 0x01, 0x96, 0x11, 0x9b, + 0x01, 0x96, 0x20, 0xc4, 0xe3, 0xdf, 0x01, 0x96, 0x19, 0xc5, 0xd9, 0x4d, + 0x01, 0x96, 0x38, 0xc7, 0xc5, 0x44, 0x01, 0x96, 0x59, 0x43, 0x1a, 0xd3, + 0x42, 0xef, 0xd3, 0xc4, 0x15, 0xe7, 0x01, 0x9a, 0xc1, 0xc3, 0x05, 0x14, + 0x01, 0x9a, 0xc9, 0x16, 0xc2, 0xef, 0xf2, 0x08, 0xc2, 0xf0, 0x00, 0x15, + 0xc2, 0xf0, 0x0d, 0x07, 0xc2, 0xf0, 0x1f, 0xc4, 0x26, 0x78, 0x01, 0x9b, + 0x0a, 0x02, 0xf0, 0x2e, 0xc3, 0x00, 0x4a, 0x01, 0x7f, 0xb9, 0xc9, 0x03, + 0x68, 0x01, 0x7f, 0xd0, 0xc4, 0x00, 0x49, 0x01, 0x7f, 0xc1, 0xc5, 0x00, + 0x2c, 0x01, 0x7f, 0xc8, 0xc9, 0x57, 0x20, 0x08, 0x42, 0xf8, 0xc4, 0x18, + 0x12, 0x08, 0x42, 0xe1, 0x91, 0x08, 0x42, 0xc8, 0xc8, 0x4b, 0x94, 0x08, + 0x42, 0xf1, 0xc7, 0x0d, 0x04, 0x08, 0x42, 0xe8, 0xc4, 0xdc, 0x2d, 0x08, + 0x42, 0x71, 0xc3, 0x77, 0x79, 0x08, 0x42, 0x88, 0xd7, 0x2a, 0xf5, 0x0f, + 0xd2, 0x58, 0x49, 0x2a, 0xf5, 0x42, 0xf0, 0x34, 0x49, 0x2a, 0xf5, 0x42, + 0xf0, 0x40, 0xc5, 0x56, 0xa5, 0x01, 0x32, 0xc3, 0x02, 0xf0, 0x4c, 0xc3, + 0x00, 0x74, 0x01, 0x32, 0xa2, 0x02, 0xf0, 0x56, 0x49, 0x2a, 0xf5, 0x42, + 0xf0, 0x5c, 0x49, 0x2a, 0xf5, 0x42, 0xf0, 0x68, 0x0d, 0xc2, 0xf0, 0x74, + 0xc5, 0xa8, 0xf7, 0x0f, 0xd0, 0xf9, 0xc4, 0xde, 0x83, 0x0f, 0xd1, 0x01, + 0xc6, 0xca, 0xfd, 0x0f, 0xd1, 0x09, 0xc4, 0xe3, 0x93, 0x0f, 0xd1, 0x18, + 0xdd, 0x12, 0x1c, 0x0f, 0xbc, 0x51, 0x45, 0x00, 0x8c, 0x42, 0xf0, 0x80, + 0xcf, 0x61, 0x2f, 0x01, 0x3f, 0x19, 0xce, 0x6f, 0x2a, 0x01, 0x3f, 0x10, + 0xc2, 0x00, 0x61, 0x0f, 0xc8, 0x6b, 0x02, 0xf0, 0x98, 0x43, 0x11, 0x3c, + 0x42, 0xf0, 0x9e, 0x51, 0x0a, 0xc9, 0xc2, 0xf0, 0xaa, 0x45, 0x00, 0x8c, + 0xc2, 0xf0, 0xbc, 0xc6, 0x86, 0xfd, 0x0f, 0xa9, 0x98, 0x45, 0x00, 0x8c, + 0xc2, 0xf0, 0xd6, 0xcc, 0x85, 0xf5, 0x0f, 0x99, 0x2a, 0x02, 0xf0, 0xe2, + 0x15, 0xc2, 0xf0, 0xe8, 0xc7, 0x0a, 0xe0, 0x01, 0x59, 0x58, 0xca, 0xa6, + 0x34, 0x01, 0x36, 0xc9, 0x49, 0x01, 0xaa, 0x42, 0xf0, 0xf4, 0xc7, 0x46, + 0x3d, 0x01, 0x2e, 0x29, 0xce, 0x6c, 0x8a, 0x01, 0x2e, 0x19, 0xc8, 0x01, + 0x92, 0x01, 0x2e, 0x08, 0xd0, 0x5e, 0xa2, 0x01, 0x3e, 0x81, 0xc9, 0xaf, + 0xa5, 0x01, 0x36, 0x59, 0xc4, 0x22, 0xdc, 0x01, 0x33, 0x11, 0x51, 0x0a, + 0xc9, 0x42, 0xf1, 0x00, 0xc5, 0x06, 0x82, 0x01, 0x30, 0xf9, 0xcf, 0x66, + 0x84, 0x0f, 0xac, 0xb9, 0xce, 0x24, 0xd5, 0x0f, 0xa2, 0x38, 0xce, 0x6c, + 0x8a, 0x01, 0x2d, 0xf9, 0xc8, 0x01, 0x92, 0x01, 0x2d, 0xe8, 0xe0, 0x03, + 0x07, 0x01, 0x3e, 0x08, 0xc5, 0x04, 0xa2, 0x01, 0x3a, 0x01, 0xc3, 0x00, + 0x28, 0x0f, 0xa5, 0x70, 0x44, 0x00, 0x8b, 0x42, 0xf1, 0x12, 0xc5, 0x06, + 0x82, 0x01, 0x30, 0xf1, 0xce, 0x24, 0xd5, 0x0f, 0xa2, 0x48, 0x12, 0xc2, + 0xf1, 0x18, 0xce, 0x6c, 0x8a, 0x01, 0x2d, 0xc9, 0xc8, 0x01, 0x92, 0x01, + 0x2d, 0xb8, 0xc9, 0x33, 0xad, 0x01, 0x2f, 0x60, 0xcb, 0x51, 0x6d, 0x01, + 0x2f, 0xe9, 0xc5, 0x0b, 0x0a, 0x01, 0x2f, 0xd9, 0xc3, 0x0e, 0x6b, 0x01, + 0x5a, 0x80, 0x90, 0x0f, 0x17, 0x42, 0x02, 0xf1, 0x24, 0x89, 0x0f, 0x17, + 0x10, 0xc2, 0x01, 0xa3, 0x08, 0xc6, 0xd9, 0xc2, 0x01, 0xc8, 0x08, 0xc6, + 0xd0, 0x90, 0x08, 0xc6, 0x81, 0x9b, 0x08, 0xc6, 0x68, 0x8c, 0x08, 0xc6, + 0x70, 0xc2, 0x01, 0xa3, 0x08, 0xc5, 0xd9, 0xc2, 0x01, 0xc8, 0x08, 0xc5, + 0xd0, 0x90, 0x08, 0xc5, 0x81, 0x9b, 0x08, 0xc5, 0x68, 0x8c, 0x08, 0xc5, + 0x70, 0xe0, 0x04, 0x07, 0x01, 0x5c, 0xa0, 0xcc, 0x81, 0x2d, 0x0f, 0xcb, + 0xd1, 0xd7, 0x2a, 0xc7, 0x0f, 0xcb, 0x99, 0xca, 0xa5, 0xbc, 0x0f, 0xd7, + 0x18, 0xcb, 0x85, 0x1e, 0x0f, 0xb0, 0x11, 0xca, 0x9b, 0x3a, 0x0f, 0xc8, + 0x90, 0xc9, 0xad, 0x65, 0x0f, 0xb2, 0x31, 0x44, 0x05, 0x76, 0xc2, 0xf1, + 0x28, 0xd1, 0x55, 0xfc, 0x0f, 0xc9, 0x40, 0x45, 0x02, 0x9a, 0x42, 0xf1, + 0x37, 0xc8, 0x6c, 0x12, 0x0f, 0xb0, 0x99, 0xc8, 0xb8, 0xb2, 0x0f, 0xc9, + 0x00, 0xcb, 0x92, 0xcd, 0x0f, 0xb1, 0xb9, 0xc6, 0xcc, 0x29, 0x0f, 0xce, + 0x80, 0xc2, 0x02, 0xa0, 0x07, 0xf8, 0x91, 0xc4, 0x02, 0xde, 0x07, 0xf8, + 0x98, 0xc3, 0x09, 0x9e, 0x07, 0xf8, 0xa1, 0xc3, 0x0d, 0x14, 0x07, 0xf8, + 0xa8, 0xc2, 0x22, 0xcc, 0x07, 0xf8, 0xb1, 0xc4, 0x18, 0x10, 0x07, 0xf8, + 0xb8, 0xc9, 0xb4, 0x64, 0x07, 0xf9, 0x01, 0x83, 0x07, 0xf8, 0x60, 0xce, + 0x25, 0xad, 0x07, 0xf9, 0xd9, 0xcd, 0x00, 0x32, 0x07, 0xfa, 0xd9, 0xd1, + 0x4f, 0x7a, 0x07, 0xfa, 0xf9, 0xcb, 0x1a, 0x50, 0x07, 0xf8, 0x40, 0x83, + 0x07, 0xf9, 0x09, 0x84, 0x07, 0xf9, 0x11, 0x85, 0x07, 0xf9, 0x19, 0x86, + 0x07, 0xf9, 0x21, 0x87, 0x07, 0xf9, 0x29, 0x88, 0x07, 0xf9, 0x31, 0x89, + 0x07, 0xf9, 0x39, 0x8a, 0x07, 0xf9, 0x41, 0x8b, 0x07, 0xf9, 0x49, 0x8c, + 0x07, 0xf9, 0x51, 0x8d, 0x07, 0xf9, 0x59, 0x8e, 0x07, 0xf9, 0x61, 0x8f, + 0x07, 0xf9, 0x69, 0x95, 0x07, 0xf9, 0x99, 0x96, 0x07, 0xf9, 0xa1, 0x97, + 0x07, 0xf9, 0xa9, 0x98, 0x07, 0xf9, 0xb1, 0x99, 0x07, 0xf9, 0xb9, 0x9a, + 0x07, 0xf9, 0xc1, 0x9b, 0x07, 0xf9, 0xc9, 0x9c, 0x07, 0xf9, 0xd1, 0x90, + 0x07, 0xf9, 0x71, 0x91, 0x07, 0xf9, 0x79, 0x92, 0x07, 0xf9, 0x81, 0x93, + 0x07, 0xf9, 0x89, 0x94, 0x07, 0xf9, 0x90, 0x83, 0x07, 0xfa, 0x09, 0x84, + 0x07, 0xfa, 0x11, 0x85, 0x07, 0xfa, 0x19, 0x87, 0x07, 0xfa, 0x29, 0x88, + 0x07, 0xfa, 0x31, 0x89, 0x07, 0xfa, 0x39, 0x8a, 0x07, 0xfa, 0x41, 0x8b, + 0x07, 0xfa, 0x49, 0x8c, 0x07, 0xfa, 0x51, 0x8d, 0x07, 0xfa, 0x59, 0x8e, + 0x07, 0xfa, 0x61, 0x8f, 0x07, 0xfa, 0x69, 0x90, 0x07, 0xfa, 0x71, 0x91, + 0x07, 0xfa, 0x79, 0x92, 0x07, 0xfa, 0x81, 0x93, 0x07, 0xfa, 0x89, 0x94, + 0x07, 0xfa, 0x91, 0x95, 0x07, 0xfa, 0x99, 0x96, 0x07, 0xfa, 0xa1, 0x97, + 0x07, 0xfa, 0xa9, 0x98, 0x07, 0xfa, 0xb1, 0x99, 0x07, 0xfa, 0xb9, 0x9a, + 0x07, 0xfa, 0xc1, 0x9b, 0x07, 0xfa, 0xc9, 0x9c, 0x07, 0xfa, 0xd1, 0x86, + 0x07, 0xfa, 0x20, 0xa5, 0x0b, 0x7c, 0xf9, 0xa3, 0x0b, 0x7c, 0xf1, 0xa2, + 0x0b, 0x7c, 0xe9, 0xa1, 0x0b, 0x7c, 0xe1, 0x9f, 0x0b, 0x7c, 0xd9, 0x9e, + 0x0b, 0x7c, 0xd0, 0xc2, 0x01, 0x30, 0x0b, 0x79, 0x29, 0x83, 0x0b, 0x78, + 0x98, 0xc2, 0x19, 0x2c, 0x0b, 0x7a, 0x09, 0x83, 0x0b, 0x79, 0xf0, 0x83, + 0x0b, 0x79, 0xc9, 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0x80, 0x89, 0x0b, 0x7b, + 0x68, 0x89, 0x0b, 0x7b, 0x20, 0xcb, 0x1b, 0xd5, 0x01, 0x51, 0xd1, 0x45, + 0x00, 0x8c, 0x42, 0xf1, 0x43, 0xd6, 0x30, 0x0c, 0x01, 0x3b, 0xa9, 0xd4, + 0x1a, 0x50, 0x01, 0x3b, 0x48, 0xd6, 0x30, 0x0c, 0x01, 0x3b, 0xa1, 0xd4, + 0x1a, 0x50, 0x01, 0x3b, 0x40, 0xda, 0x1a, 0x4a, 0x01, 0x3b, 0x59, 0xd9, + 0x1d, 0xec, 0x01, 0x3b, 0x50, 0xca, 0x22, 0x51, 0x0f, 0xbe, 0x29, 0xcd, + 0x0e, 0x61, 0x0f, 0xbe, 0x38, 0xcf, 0x15, 0x36, 0x0f, 0xbd, 0xb1, 0xd2, + 0x22, 0x49, 0x0f, 0xbe, 0x58, 0x97, 0x0b, 0x73, 0x98, 0x8b, 0x0b, 0x73, + 0xf1, 0xc3, 0x7a, 0xd8, 0x0b, 0x73, 0x20, 0x87, 0x0b, 0x73, 0xd0, 0x89, + 0x0b, 0x73, 0xb9, 0x9b, 0x0b, 0x73, 0xb8, 0x92, 0x0b, 0x73, 0xb0, 0x92, + 0x0b, 0x73, 0x30, 0x97, 0x0b, 0x72, 0x98, 0x8b, 0x0b, 0x72, 0xf1, 0xc3, + 0x7a, 0xd8, 0x0b, 0x72, 0x20, 0x87, 0x0b, 0x72, 0xd0, 0x89, 0x0b, 0x72, + 0xb9, 0x9b, 0x0b, 0x72, 0xb8, 0x92, 0x0b, 0x72, 0xb0, 0x92, 0x0b, 0x72, + 0x30, 0xcf, 0x6b, 0x25, 0x0b, 0x74, 0xb0, 0xcf, 0x6b, 0x25, 0x0b, 0x74, + 0xa8, 0xc4, 0xe0, 0x37, 0x0f, 0x41, 0xd1, 0xc4, 0xe2, 0x23, 0x0f, 0x41, + 0xa1, 0xc5, 0xd6, 0x14, 0x0f, 0x40, 0x29, 0xc4, 0xe2, 0x7b, 0x0f, 0x42, + 0xf1, 0xc5, 0xd4, 0x8e, 0x0f, 0x42, 0xe9, 0xc5, 0xd4, 0xf2, 0x0f, 0x44, + 0xc1, 0xc5, 0xd6, 0x28, 0x0f, 0x45, 0x09, 0xc6, 0xd2, 0x11, 0x0f, 0x45, + 0x59, 0xc5, 0xde, 0x43, 0x0f, 0x45, 0x61, 0xc4, 0xe2, 0x4f, 0x0f, 0x45, + 0xf8, 0xc5, 0xd6, 0x3c, 0x0f, 0x41, 0xc9, 0xc5, 0xd8, 0x03, 0x0f, 0x43, + 0x99, 0xc6, 0xd0, 0x49, 0x0f, 0x43, 0x79, 0xc4, 0xe2, 0x07, 0x0f, 0x43, + 0x01, 0xc4, 0xe4, 0x93, 0x0f, 0x42, 0xb9, 0xc5, 0xdc, 0xb3, 0x0f, 0x42, + 0x09, 0xc6, 0xce, 0x81, 0x0f, 0x43, 0xc9, 0xcb, 0x8d, 0xa5, 0x0f, 0x44, + 0x01, 0xc5, 0xd4, 0x61, 0x0f, 0x44, 0x79, 0xc4, 0xe3, 0x87, 0x0f, 0x45, + 0xe8, 0xc4, 0xe1, 0x93, 0x0f, 0x41, 0xc1, 0xc4, 0xe1, 0xbb, 0x0f, 0x41, + 0xb9, 0xc4, 0xe2, 0xa7, 0x0f, 0x41, 0xb1, 0xc4, 0xe1, 0x57, 0x0f, 0x41, + 0x81, 0xc4, 0xe4, 0x8f, 0x0f, 0x41, 0x79, 0xc4, 0xe2, 0x13, 0x0f, 0x42, + 0x61, 0xc4, 0xe1, 0xfb, 0x0f, 0x42, 0x59, 0xc4, 0xe2, 0xf7, 0x0f, 0x42, + 0x31, 0xc4, 0xe0, 0x2b, 0x0f, 0x42, 0x29, 0xc4, 0x38, 0x6b, 0x0f, 0x42, + 0x20, 0xc4, 0xe2, 0x27, 0x0f, 0x41, 0x71, 0xc3, 0xe4, 0xf7, 0x0f, 0x41, + 0x21, 0xc3, 0xd6, 0x5f, 0x0f, 0x41, 0x19, 0xc3, 0xe6, 0x26, 0x0f, 0x41, + 0x11, 0xc4, 0xe1, 0x37, 0x0f, 0x40, 0xe9, 0xc4, 0xb7, 0x12, 0x0f, 0x40, + 0xe1, 0xc4, 0xe3, 0x97, 0x0f, 0x40, 0xd9, 0xc4, 0xe2, 0x63, 0x0f, 0x42, + 0x01, 0xc4, 0xe1, 0xd7, 0x0f, 0x41, 0xf9, 0xc4, 0xe2, 0xff, 0x0f, 0x41, + 0xf0, 0xc4, 0xe1, 0xdb, 0x0f, 0x40, 0xf9, 0xc5, 0xd6, 0x2d, 0x0f, 0x40, + 0xc1, 0xc4, 0xd6, 0x96, 0x0f, 0x40, 0x21, 0xc4, 0xe3, 0x2f, 0x0f, 0x43, + 0x61, 0xc5, 0xd5, 0x0b, 0x0f, 0x42, 0x39, 0xc6, 0xd1, 0x9f, 0x0f, 0x43, + 0xb9, 0xc4, 0xe2, 0x33, 0x0f, 0x44, 0x69, 0xc5, 0xd5, 0x42, 0x0f, 0x45, + 0x01, 0xc6, 0xd0, 0x43, 0x0f, 0x45, 0x49, 0xc6, 0xd1, 0xb7, 0x0f, 0x46, + 0x18, 0xc5, 0xdd, 0x26, 0x0f, 0x40, 0xb9, 0xc5, 0xd4, 0x5c, 0x0f, 0x43, + 0xa1, 0xc5, 0xd7, 0x31, 0x0f, 0x43, 0x89, 0xc4, 0xe3, 0x53, 0x0f, 0x42, + 0x41, 0xc5, 0xd6, 0x37, 0x0f, 0x41, 0xd9, 0xc6, 0xd0, 0x13, 0x0f, 0x44, + 0x51, 0xc4, 0xe3, 0xe3, 0x0f, 0x44, 0x71, 0xc4, 0xd4, 0x61, 0x0f, 0x44, + 0x81, 0xc5, 0xd5, 0x9c, 0x0f, 0x45, 0x39, 0xc6, 0xd1, 0xd5, 0x0f, 0x46, + 0x08, 0xc5, 0xdc, 0x59, 0x0f, 0x40, 0xb1, 0xc5, 0xdd, 0x3a, 0x0f, 0x40, + 0xa9, 0xc5, 0xd4, 0x52, 0x0f, 0x40, 0xa1, 0xc4, 0xe1, 0xdf, 0x0f, 0x40, + 0x51, 0xc4, 0xe3, 0x77, 0x0f, 0x40, 0x49, 0xc4, 0xe2, 0xe7, 0x0f, 0x40, + 0x41, 0xc4, 0xe0, 0x7f, 0x0f, 0x40, 0x11, 0xc4, 0xe1, 0x2f, 0x0f, 0x40, + 0x09, 0xc4, 0xe0, 0xbf, 0x0f, 0x40, 0x00, 0xc5, 0xdd, 0xad, 0x0f, 0x40, + 0x91, 0xc4, 0xd2, 0x6b, 0x0f, 0x40, 0x71, 0xc4, 0xe1, 0xf7, 0x0f, 0x40, + 0x31, 0xc5, 0xd4, 0xa7, 0x0f, 0x43, 0x69, 0xc5, 0xdd, 0xcb, 0x0f, 0x43, + 0x59, 0xc4, 0xe0, 0xcb, 0x0f, 0x43, 0x49, 0xc6, 0xd3, 0xaf, 0x0f, 0x43, + 0xb1, 0xc6, 0xce, 0xed, 0x0f, 0x43, 0xc1, 0xc6, 0xd0, 0x61, 0x0f, 0x44, + 0xb1, 0xc6, 0xcf, 0x71, 0x0f, 0x45, 0x10, 0xc5, 0xd4, 0x34, 0x0f, 0x40, + 0x89, 0xc5, 0xd6, 0x96, 0x0f, 0x40, 0x19, 0xc4, 0xe0, 0x83, 0x0f, 0x42, + 0x89, 0xc4, 0xe2, 0x1b, 0x0f, 0x42, 0x51, 0xc4, 0xe1, 0xf3, 0x0f, 0x44, + 0x61, 0xc4, 0xe1, 0xb3, 0x0f, 0x44, 0x91, 0xc5, 0xd5, 0x10, 0x0f, 0x44, + 0xa1, 0xc6, 0xd0, 0x8b, 0x0f, 0x45, 0x99, 0xc5, 0xd5, 0x97, 0x0f, 0x45, + 0xa1, 0xc6, 0xd1, 0x33, 0x0f, 0x46, 0x20, 0xc5, 0xde, 0x52, 0x0f, 0x43, + 0x29, 0xc5, 0xdd, 0xf8, 0x0f, 0x43, 0x21, 0xc5, 0xd5, 0x33, 0x0f, 0x43, + 0x19, 0xc4, 0xe3, 0x3b, 0x0f, 0x42, 0xe1, 0xc4, 0xe2, 0x7f, 0x0f, 0x42, + 0xd9, 0xc4, 0xe2, 0xbf, 0x0f, 0x42, 0xd1, 0xc4, 0xe0, 0x33, 0x0f, 0x42, + 0xa9, 0xc4, 0xdf, 0xaf, 0x0f, 0x42, 0xa1, 0xc4, 0xe1, 0x4f, 0x0f, 0x42, + 0x99, 0xc4, 0xe3, 0xff, 0x0f, 0x42, 0x68, 0xc5, 0xd8, 0x80, 0x0f, 0x41, + 0xa9, 0xc4, 0xe2, 0x93, 0x0f, 0x41, 0x61, 0xc5, 0xd5, 0xb0, 0x0f, 0x40, + 0x79, 0xc5, 0xd7, 0x9f, 0x0f, 0x43, 0xa9, 0xc5, 0xd7, 0xef, 0x0f, 0x43, + 0x09, 0xc5, 0xde, 0x66, 0x0f, 0x44, 0x31, 0xc6, 0xd2, 0xa1, 0x0f, 0x45, + 0x89, 0xc5, 0xd4, 0x7f, 0x0f, 0x45, 0xb0, 0xc5, 0xd8, 0x7b, 0x0f, 0x41, + 0x99, 0xc4, 0xe2, 0xb7, 0x0f, 0x41, 0x59, 0xc4, 0xe0, 0xc7, 0x0f, 0x41, + 0x51, 0xc4, 0xe1, 0xcb, 0x0f, 0x41, 0x49, 0xc4, 0xe2, 0x67, 0x0f, 0x41, + 0x09, 0xc5, 0xdd, 0xa8, 0x0f, 0x40, 0x99, 0xc5, 0xde, 0x6b, 0x0f, 0x43, + 0x91, 0xc5, 0xd7, 0x59, 0x0f, 0x42, 0xf9, 0xc5, 0xd5, 0x47, 0x0f, 0x44, + 0xf9, 0xc6, 0xd3, 0x61, 0x0f, 0x45, 0xc0, 0xc4, 0xe1, 0xe3, 0x0f, 0x41, + 0x91, 0xc5, 0xd5, 0xba, 0x0f, 0x40, 0x69, 0xc4, 0xe2, 0x3f, 0x0f, 0x40, + 0x61, 0xc5, 0xd4, 0x4d, 0x0f, 0x43, 0x31, 0xc4, 0xe0, 0x87, 0x0f, 0x42, + 0x79, 0xc9, 0xac, 0xe7, 0x0f, 0x41, 0xe9, 0xc7, 0xc3, 0xb5, 0x0f, 0x43, + 0xd1, 0xc4, 0xe0, 0xcf, 0x0f, 0x44, 0x21, 0xc6, 0xcf, 0xb9, 0x0f, 0x45, + 0x21, 0xc5, 0xde, 0x16, 0x0f, 0x45, 0x90, 0xc5, 0xd4, 0xde, 0x0f, 0x41, + 0x89, 0xc4, 0xe3, 0x5f, 0x0f, 0x41, 0x39, 0xc4, 0xe0, 0x93, 0x0f, 0x41, + 0x29, 0xc5, 0xde, 0x5c, 0x0f, 0x43, 0x39, 0xc5, 0xdd, 0xc6, 0x0f, 0x42, + 0x81, 0xc4, 0xe2, 0x03, 0x0f, 0x44, 0x29, 0xc6, 0xd3, 0xb5, 0x0f, 0x44, + 0x39, 0xc6, 0xd0, 0xc7, 0x0f, 0x44, 0x41, 0xca, 0x9a, 0xc2, 0x0f, 0x44, + 0xe1, 0xc6, 0xd3, 0xcd, 0x0f, 0x46, 0x00, 0xc4, 0xe2, 0x0f, 0x0f, 0x41, + 0x69, 0xc5, 0xdc, 0xc2, 0x0f, 0x40, 0x39, 0xc4, 0xe3, 0x1f, 0x0f, 0x43, + 0x41, 0xc9, 0xa9, 0x24, 0x0f, 0x42, 0x91, 0xc7, 0xc5, 0xc2, 0x0f, 0x44, + 0x59, 0xc6, 0xce, 0x99, 0x0f, 0x44, 0xc9, 0xc5, 0xd6, 0x32, 0x0f, 0x44, + 0xd1, 0xc4, 0xe0, 0xdf, 0x0f, 0x45, 0x69, 0xc5, 0xd8, 0x35, 0x0f, 0x45, + 0xe1, 0xc6, 0xd1, 0x3f, 0x0f, 0x46, 0x10, 0xc3, 0xe5, 0x66, 0x0f, 0x41, + 0x41, 0xc5, 0xd8, 0x6c, 0x0f, 0x40, 0x81, 0xc4, 0xe3, 0xfb, 0x0f, 0x43, + 0x71, 0xc5, 0xd4, 0xe8, 0x0f, 0x42, 0xc1, 0xc6, 0xce, 0x9f, 0x0f, 0x43, + 0xd9, 0xc5, 0xd6, 0xa0, 0x0f, 0x44, 0x99, 0xca, 0xa0, 0x12, 0x0f, 0x44, + 0xf1, 0xc5, 0xd3, 0xf8, 0x0f, 0x45, 0x41, 0xc6, 0xd0, 0xbb, 0x0f, 0x45, + 0xb9, 0xc5, 0xd5, 0xbf, 0x0f, 0x45, 0xf0, 0xc3, 0xe5, 0xd2, 0x0f, 0x41, + 0x31, 0xc5, 0xd7, 0xea, 0x0f, 0x41, 0x01, 0xc5, 0xdc, 0x18, 0x0f, 0x43, + 0x11, 0xc5, 0xdd, 0xe9, 0x0f, 0x42, 0xb1, 0xc5, 0xd5, 0xab, 0x0f, 0x42, + 0x49, 0xcc, 0x89, 0xa9, 0x0f, 0x44, 0x09, 0xc5, 0xd4, 0xb6, 0x0f, 0x44, + 0x89, 0xcb, 0x8e, 0x81, 0x0f, 0x44, 0xe9, 0xc5, 0xd3, 0xee, 0x0f, 0x45, + 0x19, 0xc5, 0xd7, 0xf4, 0x0f, 0x45, 0x50, 0xc5, 0xdd, 0x94, 0x0f, 0x40, + 0xf1, 0xc6, 0xd3, 0xa3, 0x0f, 0x40, 0xc9, 0xc5, 0xd8, 0x0d, 0x0f, 0x42, + 0x71, 0xc4, 0x92, 0x28, 0x0f, 0x41, 0xe1, 0xc7, 0xc1, 0x46, 0x0f, 0x43, + 0xe1, 0xc7, 0xc8, 0x85, 0x0f, 0x43, 0xf1, 0xc4, 0xe2, 0xbb, 0x0f, 0x44, + 0x19, 0xc5, 0xd5, 0x38, 0x0f, 0x45, 0x29, 0xc5, 0xd4, 0x57, 0x0f, 0x45, + 0xa9, 0xc4, 0xe1, 0x53, 0x0f, 0x45, 0xd8, 0xc6, 0xce, 0x51, 0x0f, 0x40, + 0xd1, 0xc4, 0xd3, 0xaf, 0x0f, 0x43, 0x51, 0xc4, 0xe0, 0xdb, 0x0f, 0x42, + 0x19, 0xc5, 0xdd, 0xc1, 0x0f, 0x42, 0x11, 0xcb, 0x92, 0x28, 0x0f, 0x44, + 0x11, 0xc6, 0xd3, 0x55, 0x0f, 0x44, 0x49, 0xc6, 0xd2, 0x89, 0x0f, 0x44, + 0xb9, 0xc6, 0xd0, 0x85, 0x0f, 0x44, 0xd9, 0xc4, 0xdf, 0xf7, 0x0f, 0x45, + 0xc9, 0xc4, 0xe3, 0x3f, 0x0f, 0x45, 0xd0, 0xc5, 0xd7, 0x68, 0x0f, 0x40, + 0x59, 0xc6, 0xd3, 0x43, 0x0f, 0x43, 0x81, 0xc4, 0xd4, 0xe8, 0x0f, 0x42, + 0xc9, 0xc6, 0xd0, 0xdf, 0x0f, 0x43, 0xe9, 0xc7, 0xc7, 0x90, 0x0f, 0x43, + 0xf9, 0xc5, 0xd4, 0xc5, 0x0f, 0x44, 0xa9, 0xc5, 0xd6, 0x4b, 0x0f, 0x45, + 0x31, 0xc5, 0xd8, 0xd5, 0x0f, 0x45, 0x71, 0xc5, 0xde, 0x20, 0x0f, 0x45, + 0x79, 0xc5, 0xd6, 0x69, 0x0f, 0x45, 0x80, 0xc3, 0x57, 0x39, 0x0f, 0x46, + 0x81, 0x10, 0x42, 0xf1, 0x5b, 0xcb, 0x71, 0xb1, 0x08, 0x4f, 0xf9, 0xcd, + 0x7c, 0x9b, 0x08, 0x4f, 0xc1, 0xcb, 0x8d, 0xf2, 0x08, 0x4f, 0xb8, 0xcd, + 0x7d, 0x85, 0x08, 0x4f, 0xe9, 0xce, 0x71, 0xae, 0x08, 0x4d, 0xe0, 0xcd, + 0x71, 0xaf, 0x08, 0x4f, 0xe1, 0xcb, 0x91, 0x83, 0x08, 0x4f, 0xd8, 0xcc, + 0x8c, 0x79, 0x08, 0x4f, 0xd1, 0xcc, 0x86, 0xa9, 0x08, 0x4f, 0xc8, 0xc7, + 0x71, 0xb4, 0x08, 0x4f, 0xb1, 0xc4, 0x01, 0xce, 0x08, 0x4d, 0xe8, 0x00, + 0xc2, 0xf1, 0x65, 0xcb, 0x92, 0xb7, 0x08, 0x4f, 0x60, 0x00, 0xc2, 0xf1, + 0x74, 0xca, 0x92, 0xb8, 0x08, 0x4f, 0x58, 0xc4, 0x18, 0x10, 0x08, 0x4e, + 0x33, 0x02, 0xf1, 0x83, 0xc2, 0x22, 0xcc, 0x08, 0x4e, 0x2a, 0x02, 0xf1, + 0x90, 0x0b, 0xc2, 0xf1, 0x9d, 0x11, 0x42, 0xf1, 0xaf, 0x0a, 0xc2, 0xf1, + 0xc1, 0x19, 0xc2, 0xf1, 0xd3, 0xc2, 0x00, 0xc4, 0x08, 0x4e, 0x4a, 0x02, + 0xf1, 0xe3, 0x00, 0x42, 0xf1, 0xe9, 0xc3, 0xe5, 0xb1, 0x08, 0x4d, 0xf9, + 0xc3, 0x64, 0x84, 0x08, 0x4d, 0xf0, 0xc2, 0x0e, 0x9a, 0x08, 0x4d, 0xb9, + 0x16, 0xc2, 0xf1, 0xf8, 0xc2, 0x0f, 0x9a, 0x08, 0x4d, 0x99, 0x0d, 0xc2, + 0xf2, 0x04, 0x15, 0xc2, 0xf2, 0x0e, 0x83, 0x08, 0x4d, 0x03, 0x02, 0xf2, + 0x16, 0xc3, 0xe6, 0x71, 0x08, 0x4d, 0x71, 0xc2, 0x00, 0xdb, 0x08, 0x4d, + 0x61, 0xc2, 0x00, 0x39, 0x08, 0x4d, 0x59, 0x10, 0xc2, 0xf2, 0x1c, 0xc2, + 0x01, 0xc3, 0x08, 0x4d, 0x41, 0xc2, 0x00, 0xb0, 0x08, 0x4d, 0x39, 0xc2, + 0x01, 0x5d, 0x08, 0x4d, 0x31, 0xc2, 0x01, 0x4a, 0x08, 0x4d, 0x29, 0xc2, + 0x19, 0x2c, 0x08, 0x4d, 0x21, 0x91, 0x08, 0x4d, 0x19, 0x8b, 0x08, 0x4d, + 0x11, 0x87, 0x08, 0x4d, 0x08, 0x91, 0x08, 0x4c, 0xe1, 0x87, 0x08, 0x4c, + 0xd3, 0x02, 0xf2, 0x24, 0x83, 0x08, 0x4c, 0xc2, 0x02, 0xf2, 0x2a, 0x83, + 0x08, 0x4c, 0xb1, 0xc2, 0x00, 0xd0, 0x08, 0x4c, 0x88, 0x87, 0x08, 0x4c, + 0xa9, 0x83, 0x08, 0x4c, 0x9a, 0x02, 0xf2, 0x30, 0xc2, 0xe5, 0xfd, 0x08, + 0x4c, 0x38, 0x83, 0x08, 0x4c, 0x53, 0x02, 0xf2, 0x36, 0x87, 0x08, 0x4c, + 0x62, 0x02, 0xf2, 0x3c, 0xc2, 0xe5, 0xfd, 0x08, 0x4c, 0x78, 0x60, 0x03, + 0x27, 0x42, 0xf2, 0x42, 0x97, 0x05, 0x57, 0x79, 0x8b, 0x05, 0x57, 0x68, + 0xc7, 0xc9, 0xe3, 0x05, 0x5f, 0x08, 0xc7, 0xc9, 0xe3, 0x05, 0x5e, 0xf8, + 0xc7, 0xc9, 0xe3, 0x05, 0x5f, 0x00, 0xc2, 0x00, 0xd0, 0x05, 0x57, 0x29, + 0x83, 0x05, 0x57, 0x20, 0xc7, 0xc9, 0xe3, 0x05, 0x5e, 0xf0, 0xc7, 0xc9, + 0xe3, 0x05, 0x5e, 0xd8, 0xc2, 0x00, 0xd0, 0x05, 0x57, 0x39, 0x83, 0x05, + 0x57, 0x30, 0xcf, 0x01, 0x38, 0x08, 0xb3, 0x59, 0xc8, 0x00, 0xbf, 0x08, + 0xb3, 0x50, 0xc4, 0x18, 0x10, 0x00, 0xc0, 0xb9, 0xc2, 0x22, 0xcc, 0x00, + 0xc0, 0xb0, 0xc3, 0x0d, 0x14, 0x00, 0xc0, 0xa9, 0xc3, 0x09, 0x9e, 0x00, + 0xc0, 0xa0, 0xc4, 0x02, 0xde, 0x00, 0xc0, 0x99, 0xc2, 0x02, 0xa0, 0x00, + 0xc0, 0x90, 0x49, 0xb1, 0x70, 0xc2, 0xf2, 0x5a, 0xc3, 0xb4, 0xa6, 0x00, + 0xc3, 0xb9, 0xc2, 0x00, 0x87, 0x00, 0xc3, 0xb1, 0xc2, 0x00, 0x39, 0x00, + 0xc3, 0xa9, 0xc2, 0x02, 0x2b, 0x00, 0xc3, 0xa1, 0x8b, 0x00, 0xc3, 0x98, + 0x06, 0xc2, 0xf2, 0x8e, 0x45, 0x01, 0xce, 0xc2, 0xf2, 0x9b, 0x83, 0x00, + 0xc4, 0x3b, 0x02, 0xf2, 0xa5, 0x1c, 0xc2, 0xf2, 0xaf, 0xc3, 0x1d, 0x35, + 0x00, 0xc4, 0xa1, 0x12, 0xc2, 0xf2, 0xb9, 0x16, 0xc2, 0xf2, 0xc3, 0x10, + 0xc2, 0xf2, 0xd1, 0xc2, 0x00, 0x64, 0x00, 0xc4, 0x59, 0xc2, 0x02, 0x2b, + 0x00, 0xc4, 0x49, 0x8b, 0x00, 0xc4, 0x43, 0x02, 0xf2, 0xdd, 0xc6, 0x8c, + 0xa2, 0x00, 0xc4, 0x29, 0xc7, 0x62, 0x18, 0x00, 0xc4, 0x19, 0xcb, 0x96, + 0x32, 0x00, 0xc4, 0x08, 0x03, 0xc2, 0xf2, 0xe3, 0x06, 0xc2, 0xf2, 0xef, + 0xc3, 0x27, 0x57, 0x00, 0xc2, 0xd9, 0x0c, 0xc2, 0xf2, 0xf9, 0xc3, 0x39, + 0x6e, 0x00, 0xc2, 0xc9, 0xc2, 0x01, 0x30, 0x00, 0xc2, 0x73, 0x02, 0xf3, + 0x03, 0xc2, 0x02, 0x2b, 0x00, 0xc2, 0xb9, 0xc2, 0x01, 0x4a, 0x00, 0xc2, + 0xb1, 0xc2, 0x19, 0x2c, 0x00, 0xc2, 0xa9, 0x16, 0xc2, 0xf3, 0x07, 0xc3, + 0x1c, 0x63, 0x00, 0xc2, 0x91, 0xc2, 0x01, 0xc3, 0x00, 0xc2, 0x79, 0xc2, + 0x0f, 0x9a, 0x00, 0xc2, 0x69, 0xc2, 0x00, 0xb0, 0x00, 0xc2, 0x61, 0xc2, + 0x01, 0x5d, 0x00, 0xc2, 0x59, 0x97, 0x00, 0xc2, 0x3b, 0x02, 0xf3, 0x11, + 0x91, 0x00, 0xc2, 0x33, 0x02, 0xf3, 0x15, 0x8b, 0x00, 0xc2, 0x29, 0x87, + 0x00, 0xc2, 0x21, 0xcf, 0x66, 0x2a, 0x00, 0xc2, 0x18, 0xce, 0x17, 0xd4, + 0x00, 0xc3, 0xc0, 0x1c, 0xc2, 0xf3, 0x19, 0xc3, 0x1c, 0x63, 0x00, 0xc3, + 0x89, 0xc3, 0x47, 0xd9, 0x00, 0xc3, 0x81, 0x16, 0xc2, 0xf3, 0x23, 0xc2, + 0x00, 0xd0, 0x00, 0xc3, 0x2b, 0x02, 0xf3, 0x2d, 0xc2, 0x01, 0x30, 0x00, + 0xc3, 0x23, 0x02, 0xf3, 0x31, 0xc2, 0x00, 0x87, 0x00, 0xc3, 0x59, 0xc2, + 0x25, 0x3b, 0x00, 0xc3, 0x51, 0xc2, 0x0e, 0x9a, 0x00, 0xc3, 0x49, 0xc3, + 0x01, 0xe2, 0x00, 0xc3, 0x39, 0xc2, 0x00, 0xb0, 0x00, 0xc3, 0x31, 0xc2, + 0x02, 0x2b, 0x00, 0xc3, 0x19, 0xc3, 0x01, 0x95, 0x00, 0xc3, 0x11, 0x97, + 0x00, 0xc3, 0x0b, 0x02, 0xf3, 0x35, 0x8b, 0x00, 0xc2, 0xf3, 0x02, 0xf3, + 0x39, 0x87, 0x00, 0xc2, 0xe8, 0xc4, 0x02, 0xde, 0x00, 0xc0, 0x69, 0xc2, + 0x02, 0xa0, 0x00, 0xc0, 0x60, 0xc4, 0x26, 0x78, 0x08, 0xb2, 0xc9, 0xc5, + 0x06, 0xdb, 0x08, 0xb2, 0xc1, 0x15, 0xc2, 0xf3, 0x3d, 0x08, 0xc2, 0xf3, + 0x49, 0x16, 0xc2, 0xf3, 0x55, 0xc3, 0x05, 0x14, 0x08, 0xb2, 0x89, 0xc4, + 0x15, 0xe7, 0x08, 0xb2, 0x80, 0xca, 0xa0, 0xee, 0x08, 0xb2, 0x01, 0xc7, + 0x14, 0x39, 0x08, 0xb1, 0xe8, 0xc4, 0x1e, 0x97, 0x08, 0xb1, 0xf9, 0xc5, + 0x40, 0xe7, 0x08, 0xb1, 0xf0, 0x97, 0x08, 0xb1, 0xe1, 0x8b, 0x08, 0xb1, + 0xd1, 0x83, 0x08, 0xb1, 0x80, 0x8e, 0x08, 0xb1, 0xbb, 0x02, 0xf3, 0x61, + 0x94, 0x08, 0xb1, 0xaa, 0x02, 0xf3, 0x65, 0x97, 0x08, 0xb1, 0xa0, 0x8b, + 0x08, 0xb1, 0x90, 0xc2, 0x00, 0xdb, 0x08, 0xb1, 0x79, 0x83, 0x08, 0xb1, + 0x48, 0x83, 0x08, 0xb1, 0x69, 0xc2, 0x0d, 0xf6, 0x08, 0xb1, 0x61, 0xc2, + 0x00, 0xd0, 0x08, 0xb1, 0x58, 0x83, 0x08, 0xb1, 0x51, 0x47, 0xb2, 0x2e, + 0x42, 0xf3, 0x69, 0xc2, 0x00, 0xd0, 0x08, 0xb1, 0x29, 0x83, 0x08, 0xb1, + 0x20, 0xc2, 0x00, 0xd0, 0x08, 0xb1, 0x19, 0x83, 0x08, 0xb1, 0x10, 0x83, + 0x08, 0xb1, 0x09, 0xc2, 0x00, 0xc1, 0x08, 0xb0, 0xe1, 0xc2, 0x19, 0x2c, + 0x08, 0xb0, 0xb9, 0xc2, 0x01, 0x30, 0x08, 0xb0, 0x90, 0xc2, 0x00, 0xd0, + 0x08, 0xb1, 0x01, 0x83, 0x08, 0xb0, 0xf9, 0x06, 0x42, 0xf3, 0x77, 0xc2, + 0x00, 0xd0, 0x08, 0xb0, 0xf1, 0x83, 0x08, 0xb0, 0xe9, 0x16, 0x42, 0xf3, + 0x81, 0xc2, 0x00, 0xd0, 0x08, 0xb0, 0xb1, 0x83, 0x08, 0xb0, 0xa8, 0xc2, + 0x00, 0xd0, 0x08, 0xb0, 0xa1, 0x83, 0x08, 0xb0, 0x98, 0xc2, 0x00, 0xd0, + 0x08, 0xb0, 0x89, 0x83, 0x08, 0xb0, 0x80, 0xc2, 0x00, 0xd0, 0x08, 0xb0, + 0x79, 0x83, 0x08, 0xb0, 0x70, 0x97, 0x08, 0xb0, 0x69, 0x8b, 0x08, 0xb0, + 0x59, 0x83, 0x08, 0xb0, 0x08, 0x97, 0x08, 0xb0, 0x28, 0x8b, 0x08, 0xb0, + 0x18, 0x45, 0x03, 0x14, 0xc2, 0xf3, 0x8b, 0x4b, 0x07, 0x2a, 0xc2, 0xf4, + 0x2e, 0x4a, 0x9f, 0x5e, 0xc2, 0xf4, 0x3a, 0x0a, 0x42, 0xf4, 0x46, 0x48, + 0xba, 0x9a, 0xc2, 0xf4, 0x52, 0x47, 0x0b, 0x18, 0xc2, 0xf4, 0x64, 0x4d, + 0x77, 0x1f, 0xc2, 0xf4, 0xcb, 0xd0, 0x08, 0xf7, 0x00, 0x16, 0x31, 0x47, + 0x5e, 0xa8, 0xc2, 0xf4, 0xd7, 0xcb, 0x98, 0x21, 0x00, 0x16, 0xf9, 0xc4, + 0x0d, 0xe4, 0x05, 0x3c, 0x48, 0x45, 0x00, 0x2d, 0xc2, 0xf4, 0xe3, 0x07, + 0xc2, 0xf4, 0xf5, 0xca, 0x9f, 0xb8, 0x00, 0x16, 0xf1, 0x46, 0x0c, 0x27, + 0x42, 0xf4, 0xff, 0x44, 0x00, 0x4a, 0xc2, 0xf5, 0x1d, 0xcc, 0x79, 0x0e, + 0x08, 0x3d, 0xb9, 0x42, 0x00, 0x27, 0x42, 0xf5, 0x2f, 0xcb, 0x23, 0x34, + 0x00, 0x16, 0x03, 0x02, 0xf5, 0x39, 0xcb, 0x1f, 0x0d, 0x00, 0x16, 0x59, + 0xcb, 0x8f, 0x10, 0x00, 0x87, 0xe0, 0xcd, 0x80, 0x0f, 0x08, 0x3d, 0xa9, + 0x45, 0x3f, 0x0e, 0x42, 0xf5, 0x3f, 0xcb, 0x83, 0xe6, 0x08, 0x3d, 0xb1, + 0x11, 0x42, 0xf5, 0x4b, 0xcd, 0x7c, 0xdc, 0x08, 0x3d, 0xc1, 0xc9, 0x2d, + 0x85, 0x00, 0x15, 0xe1, 0xcb, 0x83, 0x0e, 0x00, 0x16, 0x50, 0xc4, 0x18, + 0x26, 0x00, 0x15, 0xc9, 0xc8, 0x60, 0xf4, 0x00, 0x16, 0xb0, 0xcb, 0x52, + 0x55, 0x00, 0x15, 0xd9, 0xcf, 0x33, 0x1a, 0x00, 0x16, 0x80, 0x42, 0x00, + 0x7f, 0xc2, 0xf5, 0x5d, 0xca, 0xa4, 0xea, 0x00, 0x17, 0x69, 0x95, 0x05, + 0x3b, 0x80, 0xcc, 0x36, 0x87, 0x00, 0x16, 0x41, 0xc6, 0xc1, 0x63, 0x00, + 0x17, 0x60, 0xc5, 0x60, 0xb2, 0x00, 0x16, 0x49, 0x0b, 0x42, 0xf5, 0x69, + 0x45, 0xd7, 0xc7, 0xc2, 0xf5, 0x73, 0x43, 0x02, 0x9c, 0x42, 0xf5, 0x7f, + 0x44, 0x08, 0xcc, 0xc2, 0xf5, 0x8b, 0xd4, 0x33, 0x15, 0x00, 0x16, 0x88, + 0xd6, 0x2f, 0x30, 0x00, 0x17, 0x51, 0xd7, 0x2b, 0x51, 0x00, 0x17, 0x58, + 0xc4, 0x38, 0x2c, 0x0e, 0xb7, 0x20, 0xc2, 0x01, 0x6f, 0x0e, 0xb7, 0x41, + 0xc6, 0x10, 0x3f, 0x0e, 0xb7, 0x30, 0xc4, 0xdb, 0x4c, 0x0e, 0xb7, 0x28, + 0xc2, 0x00, 0x0a, 0x0e, 0xb7, 0xc0, 0xc3, 0x04, 0x87, 0x0e, 0xb7, 0x18, + 0xc4, 0xde, 0x3f, 0x0e, 0xb7, 0x10, 0x0f, 0x42, 0xf5, 0x9d, 0xc2, 0x00, + 0xba, 0x0e, 0xb7, 0xc9, 0xc2, 0x00, 0x0a, 0x0e, 0xb7, 0xb9, 0x8b, 0x0e, + 0xb7, 0x88, 0xc6, 0x10, 0x3f, 0x0e, 0xb7, 0xb0, 0xc2, 0x20, 0xec, 0x0e, + 0xb7, 0xa9, 0xc4, 0x89, 0xfe, 0x0e, 0xb7, 0x4a, 0x02, 0xf5, 0xa9, 0xc4, + 0x1a, 0x73, 0x0e, 0xb7, 0xa0, 0xc2, 0x01, 0x23, 0x0e, 0xb7, 0x90, 0x8b, + 0x0e, 0xb7, 0x78, 0x97, 0x0e, 0xb7, 0x70, 0x97, 0x0e, 0xb7, 0x68, 0xc4, + 0xdd, 0x9a, 0x0e, 0xb7, 0x60, 0xc4, 0x8b, 0x66, 0x0e, 0xb7, 0x58, 0xc3, + 0x01, 0xbb, 0x0e, 0xb7, 0x50, 0xc3, 0x04, 0x87, 0x0e, 0xb7, 0x38, 0x0f, + 0x42, 0xf5, 0xaf, 0xc2, 0x00, 0xba, 0x0e, 0xb8, 0x99, 0xc2, 0x00, 0x0a, + 0x0e, 0xb8, 0x89, 0x8b, 0x0e, 0xb8, 0x58, 0xc2, 0x00, 0x0a, 0x0e, 0xb8, + 0x90, 0xc6, 0x10, 0x3f, 0x0e, 0xb8, 0x80, 0xc2, 0x20, 0xec, 0x0e, 0xb8, + 0x79, 0xc4, 0x89, 0xfe, 0x0e, 0xb8, 0x18, 0xc4, 0x1a, 0x73, 0x0e, 0xb8, + 0x70, 0xca, 0x91, 0x2c, 0x0e, 0xb8, 0x68, 0xc2, 0x01, 0x23, 0x0e, 0xb8, + 0x60, 0x8b, 0x0e, 0xb8, 0x48, 0x97, 0x0e, 0xb8, 0x40, 0x97, 0x0e, 0xb8, + 0x38, 0xc4, 0xdd, 0x9a, 0x0e, 0xb8, 0x30, 0xc4, 0x8b, 0x66, 0x0e, 0xb8, + 0x28, 0xc3, 0x01, 0xbb, 0x0e, 0xb8, 0x20, 0xc2, 0x01, 0x6f, 0x0e, 0xb8, + 0x11, 0xc6, 0x10, 0x3f, 0x0e, 0xb8, 0x00, 0xc3, 0x04, 0x87, 0x0e, 0xb8, + 0x08, 0xc4, 0xdb, 0x4c, 0x0e, 0xb7, 0xf9, 0x47, 0x3b, 0xc4, 0x42, 0xf5, + 0xbb, 0xc4, 0x38, 0x2c, 0x0e, 0xb7, 0xf0, 0xc3, 0x04, 0x87, 0x0e, 0xb7, + 0xe8, 0xc4, 0xde, 0x3f, 0x0e, 0xb7, 0xe0, 0x9c, 0x0e, 0xa1, 0x9b, 0x02, + 0xf5, 0xc3, 0x9b, 0x0e, 0xa1, 0x91, 0x9a, 0x0e, 0xa1, 0x8b, 0x02, 0xf5, + 0xc9, 0x99, 0x0e, 0xa1, 0x81, 0x98, 0x0e, 0xa1, 0x79, 0x97, 0x0e, 0xa1, + 0x73, 0x02, 0xf5, 0xcd, 0x86, 0x0e, 0xa0, 0xeb, 0x02, 0xf5, 0xd3, 0x91, + 0x0e, 0xa1, 0x43, 0x02, 0xf5, 0xdf, 0x92, 0x0e, 0xa1, 0x4b, 0x02, 0xf5, + 0xe3, 0x85, 0x0e, 0xa0, 0xe3, 0x02, 0xf5, 0xf3, 0x96, 0x0e, 0xa1, 0x6b, + 0x02, 0xf5, 0xf9, 0x95, 0x0e, 0xa1, 0x63, 0x02, 0xf6, 0x05, 0x88, 0x0e, + 0xa0, 0xfb, 0x02, 0xf6, 0x0b, 0x94, 0x0e, 0xa1, 0x5b, 0x02, 0xf6, 0x11, + 0x90, 0x0e, 0xa1, 0x3b, 0x02, 0xf6, 0x17, 0x8f, 0x0e, 0xa1, 0x33, 0x02, + 0xf6, 0x1b, 0x8e, 0x0e, 0xa1, 0x2b, 0x02, 0xf6, 0x1f, 0x8d, 0x0e, 0xa1, + 0x23, 0x02, 0xf6, 0x25, 0x8b, 0x0e, 0xa1, 0x13, 0x02, 0xf6, 0x2b, 0x87, + 0x0e, 0xa0, 0xf3, 0x02, 0xf6, 0x31, 0x89, 0x0e, 0xa1, 0x03, 0x02, 0xf6, + 0x3d, 0x84, 0x0e, 0xa0, 0xdb, 0x02, 0xf6, 0x43, 0x83, 0x0e, 0xa0, 0xd3, + 0x02, 0xf6, 0x49, 0x93, 0x0e, 0xa1, 0x51, 0x8c, 0x0e, 0xa1, 0x19, 0x8a, + 0x0e, 0xa1, 0x08, 0x46, 0x03, 0x13, 0xc2, 0xf6, 0x4f, 0x48, 0x0b, 0x17, + 0x42, 0xf6, 0xb7, 0xc4, 0x18, 0x10, 0x0e, 0xbe, 0xa9, 0xc2, 0x22, 0xcc, + 0x0e, 0xbe, 0xa0, 0xc3, 0x0d, 0x14, 0x0e, 0xbe, 0x99, 0xc3, 0x09, 0x9e, + 0x0e, 0xbe, 0x90, 0xc4, 0x02, 0xde, 0x0e, 0xbe, 0x89, 0xc2, 0x02, 0xa0, + 0x0e, 0xbe, 0x80, 0xc6, 0x51, 0x50, 0x0e, 0xbe, 0x51, 0xc4, 0xdb, 0x4c, + 0x0e, 0xb5, 0x58, 0x0f, 0x42, 0xf7, 0x1f, 0xc2, 0x00, 0xba, 0x0e, 0xb5, + 0xf9, 0xc2, 0x00, 0x0a, 0x0e, 0xb5, 0xe9, 0x8b, 0x0e, 0xb5, 0xb8, 0xc2, + 0x00, 0x0a, 0x0e, 0xb5, 0xf0, 0xc6, 0x10, 0x3f, 0x0e, 0xb5, 0xe0, 0xc2, + 0x20, 0xec, 0x0e, 0xb5, 0xd9, 0xc4, 0x89, 0xfe, 0x0e, 0xb5, 0x7a, 0x02, + 0xf7, 0x2b, 0xc4, 0x1a, 0x73, 0x0e, 0xb5, 0xd0, 0xc2, 0x01, 0x23, 0x0e, + 0xb5, 0xc0, 0x8b, 0x0e, 0xb5, 0xa8, 0x97, 0x0e, 0xb5, 0xa0, 0x97, 0x0e, + 0xb5, 0x98, 0xc4, 0xdd, 0x9a, 0x0e, 0xb5, 0x90, 0xc4, 0x8b, 0x66, 0x0e, + 0xb5, 0x88, 0xc3, 0x01, 0xbb, 0x0e, 0xb5, 0x80, 0xc2, 0x01, 0x6f, 0x0e, + 0xb5, 0x71, 0xc6, 0x10, 0x3f, 0x0e, 0xb5, 0x60, 0xc3, 0x04, 0x87, 0x0e, + 0xb5, 0x68, 0xc4, 0x38, 0x2c, 0x0e, 0xb5, 0x50, 0xc3, 0x04, 0x87, 0x0e, + 0xb5, 0x48, 0xc4, 0xde, 0x3f, 0x0e, 0xb5, 0x40, 0xc8, 0x9c, 0x0e, 0x0e, + 0xba, 0xa9, 0xc9, 0xaa, 0x9e, 0x0e, 0xba, 0x99, 0xd3, 0x43, 0x00, 0x0e, + 0xba, 0x78, 0x91, 0x0e, 0xa4, 0x83, 0x02, 0xf7, 0x31, 0x92, 0x0e, 0xa4, + 0x8b, 0x02, 0xf7, 0x35, 0x85, 0x0e, 0xa4, 0x23, 0x02, 0xf7, 0x45, 0x97, + 0x0e, 0xa4, 0xb3, 0x02, 0xf7, 0x4b, 0x96, 0x0e, 0xa4, 0xab, 0x02, 0xf7, + 0x51, 0x95, 0x0e, 0xa4, 0xa3, 0x02, 0xf7, 0x5d, 0x88, 0x0e, 0xa4, 0x3b, + 0x02, 0xf7, 0x63, 0x94, 0x0e, 0xa4, 0x9b, 0x02, 0xf7, 0x69, 0x9a, 0x0e, + 0xa4, 0xcb, 0x02, 0xf7, 0x6f, 0x90, 0x0e, 0xa4, 0x7b, 0x02, 0xf7, 0x73, + 0x8f, 0x0e, 0xa4, 0x73, 0x02, 0xf7, 0x77, 0x8e, 0x0e, 0xa4, 0x6b, 0x02, + 0xf7, 0x7b, 0x8d, 0x0e, 0xa4, 0x63, 0x02, 0xf7, 0x81, 0x8b, 0x0e, 0xa4, + 0x53, 0x02, 0xf7, 0x87, 0x87, 0x0e, 0xa4, 0x33, 0x02, 0xf7, 0x8d, 0x9c, + 0x0e, 0xa4, 0xdb, 0x02, 0xf7, 0x99, 0x86, 0x0e, 0xa4, 0x2b, 0x02, 0xf7, + 0x9f, 0x89, 0x0e, 0xa4, 0x43, 0x02, 0xf7, 0xa5, 0x84, 0x0e, 0xa4, 0x1b, + 0x02, 0xf7, 0xab, 0x83, 0x0e, 0xa4, 0x13, 0x02, 0xf7, 0xb1, 0x9b, 0x0e, + 0xa4, 0xd1, 0x99, 0x0e, 0xa4, 0xc1, 0x98, 0x0e, 0xa4, 0xb9, 0x93, 0x0e, + 0xa4, 0x91, 0x8c, 0x0e, 0xa4, 0x59, 0x8a, 0x0e, 0xa4, 0x48, 0x91, 0x0e, + 0xa3, 0xb3, 0x02, 0xf7, 0xb7, 0x92, 0x0e, 0xa3, 0xbb, 0x02, 0xf7, 0xbb, + 0x85, 0x0e, 0xa3, 0x53, 0x02, 0xf7, 0xcb, 0x97, 0x0e, 0xa3, 0xe3, 0x02, + 0xf7, 0xd1, 0x96, 0x0e, 0xa3, 0xdb, 0x02, 0xf7, 0xd7, 0x95, 0x0e, 0xa3, + 0xd3, 0x02, 0xf7, 0xe6, 0x94, 0x0e, 0xa3, 0xcb, 0x02, 0xf7, 0xec, 0x9a, + 0x0e, 0xa3, 0xfb, 0x02, 0xf7, 0xf2, 0x90, 0x0e, 0xa3, 0xab, 0x02, 0xf7, + 0xf6, 0x8f, 0x0e, 0xa3, 0xa3, 0x02, 0xf7, 0xfa, 0x8e, 0x0e, 0xa3, 0x9b, + 0x02, 0xf7, 0xfe, 0x8d, 0x0e, 0xa3, 0x93, 0x02, 0xf8, 0x04, 0x8b, 0x0e, + 0xa3, 0x83, 0x02, 0xf8, 0x0a, 0x87, 0x0e, 0xa3, 0x63, 0x02, 0xf8, 0x10, + 0x9c, 0x0e, 0xa4, 0x0b, 0x02, 0xf8, 0x1c, 0x86, 0x0e, 0xa3, 0x5b, 0x02, + 0xf8, 0x22, 0x89, 0x0e, 0xa3, 0x73, 0x02, 0xf8, 0x28, 0x84, 0x0e, 0xa3, + 0x4b, 0x02, 0xf8, 0x2e, 0x83, 0x0e, 0xa3, 0x43, 0x02, 0xf8, 0x34, 0x9b, + 0x0e, 0xa4, 0x01, 0x99, 0x0e, 0xa3, 0xf1, 0x98, 0x0e, 0xa3, 0xe9, 0x93, + 0x0e, 0xa3, 0xc1, 0x8c, 0x0e, 0xa3, 0x89, 0x8a, 0x0e, 0xa3, 0x79, 0x88, + 0x0e, 0xa3, 0x68, 0x9c, 0x0e, 0xac, 0xf9, 0x9b, 0x0e, 0xac, 0xf1, 0x9a, + 0x0e, 0xac, 0xe9, 0x99, 0x0e, 0xac, 0xe1, 0x98, 0x0e, 0xac, 0xd9, 0x97, + 0x0e, 0xac, 0xd1, 0x96, 0x0e, 0xac, 0xc9, 0x95, 0x0e, 0xac, 0xc1, 0x94, + 0x0e, 0xac, 0xb9, 0x93, 0x0e, 0xac, 0xb1, 0x92, 0x0e, 0xac, 0xa9, 0x91, + 0x0e, 0xac, 0xa1, 0x90, 0x0e, 0xac, 0x99, 0x8f, 0x0e, 0xac, 0x91, 0x8e, + 0x0e, 0xac, 0x89, 0x8d, 0x0e, 0xac, 0x81, 0x8c, 0x0e, 0xac, 0x79, 0x8b, + 0x0e, 0xac, 0x71, 0x8a, 0x0e, 0xac, 0x69, 0x89, 0x0e, 0xac, 0x61, 0x88, + 0x0e, 0xac, 0x59, 0x87, 0x0e, 0xac, 0x51, 0x86, 0x0e, 0xac, 0x49, 0x85, + 0x0e, 0xac, 0x41, 0x84, 0x0e, 0xac, 0x39, 0x83, 0x0e, 0xac, 0x30, 0x9c, + 0x0e, 0xac, 0x29, 0x9b, 0x0e, 0xac, 0x21, 0x9a, 0x0e, 0xac, 0x19, 0x99, + 0x0e, 0xac, 0x11, 0x98, 0x0e, 0xac, 0x09, 0x97, 0x0e, 0xac, 0x01, 0x96, + 0x0e, 0xab, 0xf9, 0x95, 0x0e, 0xab, 0xf1, 0x94, 0x0e, 0xab, 0xe9, 0x93, + 0x0e, 0xab, 0xe1, 0x92, 0x0e, 0xab, 0xd9, 0x91, 0x0e, 0xab, 0xd1, 0x90, + 0x0e, 0xab, 0xc9, 0x8f, 0x0e, 0xab, 0xc1, 0x8e, 0x0e, 0xab, 0xb9, 0x8d, + 0x0e, 0xab, 0xb1, 0x8c, 0x0e, 0xab, 0xa9, 0x8b, 0x0e, 0xab, 0xa1, 0x8a, + 0x0e, 0xab, 0x99, 0x89, 0x0e, 0xab, 0x91, 0x88, 0x0e, 0xab, 0x89, 0x87, + 0x0e, 0xab, 0x81, 0x86, 0x0e, 0xab, 0x79, 0x85, 0x0e, 0xab, 0x71, 0x84, + 0x0e, 0xab, 0x69, 0x83, 0x0e, 0xab, 0x60, 0xc4, 0x18, 0x10, 0x0e, 0xbf, + 0xe9, 0xc2, 0x22, 0xcc, 0x0e, 0xbf, 0xe0, 0xc3, 0x0d, 0x14, 0x0e, 0xbf, + 0xd9, 0xc3, 0x09, 0x9e, 0x0e, 0xbf, 0xd0, 0xc4, 0x02, 0xde, 0x0e, 0xbf, + 0xc9, 0xc2, 0x02, 0xa0, 0x0e, 0xbf, 0xc0, 0x46, 0x09, 0x97, 0xc2, 0xf8, + 0x3a, 0x47, 0xc7, 0x4a, 0xc2, 0xf8, 0x5e, 0x12, 0xc2, 0xf8, 0x8c, 0xca, + 0x9c, 0xac, 0x0e, 0xbc, 0x71, 0xcc, 0x8b, 0x65, 0x0e, 0xbc, 0x61, 0xcc, + 0x89, 0xfd, 0x0e, 0xbc, 0x59, 0xce, 0x10, 0x3e, 0x0e, 0xbc, 0x51, 0x46, + 0x03, 0x13, 0xc2, 0xf8, 0x9e, 0xc5, 0xdb, 0xf0, 0x0e, 0xbb, 0x79, 0x48, + 0x0b, 0x17, 0x42, 0xf9, 0x42, 0xc4, 0x26, 0x78, 0x0e, 0xbf, 0x59, 0xc5, + 0x06, 0xdb, 0x0e, 0xbf, 0x51, 0x15, 0xc2, 0xf9, 0xe3, 0x08, 0xc2, 0xf9, + 0xef, 0x16, 0xc2, 0xf9, 0xfb, 0xc3, 0x05, 0x14, 0x0e, 0xbf, 0x19, 0xc4, + 0x15, 0xe7, 0x0e, 0xbf, 0x10, 0x46, 0x03, 0x13, 0xc2, 0xfa, 0x07, 0x48, + 0x0b, 0x17, 0x42, 0xfa, 0x6f, 0x9c, 0x0e, 0xae, 0x99, 0x9b, 0x0e, 0xae, + 0x91, 0x9a, 0x0e, 0xae, 0x89, 0x99, 0x0e, 0xae, 0x81, 0x98, 0x0e, 0xae, + 0x79, 0x97, 0x0e, 0xae, 0x71, 0x96, 0x0e, 0xae, 0x69, 0x95, 0x0e, 0xae, + 0x61, 0x94, 0x0e, 0xae, 0x59, 0x93, 0x0e, 0xae, 0x51, 0x92, 0x0e, 0xae, + 0x49, 0x91, 0x0e, 0xae, 0x41, 0x90, 0x0e, 0xae, 0x39, 0x8f, 0x0e, 0xae, + 0x31, 0x8e, 0x0e, 0xae, 0x29, 0x8d, 0x0e, 0xae, 0x21, 0x8c, 0x0e, 0xae, + 0x19, 0x8b, 0x0e, 0xae, 0x11, 0x8a, 0x0e, 0xae, 0x09, 0x89, 0x0e, 0xae, + 0x01, 0x88, 0x0e, 0xad, 0xf9, 0x87, 0x0e, 0xad, 0xf1, 0x86, 0x0e, 0xad, + 0xe9, 0x85, 0x0e, 0xad, 0xe1, 0x84, 0x0e, 0xad, 0xd9, 0x83, 0x0e, 0xad, + 0xd0, 0x9c, 0x0e, 0xad, 0xc9, 0x9b, 0x0e, 0xad, 0xc1, 0x9a, 0x0e, 0xad, + 0xb9, 0x99, 0x0e, 0xad, 0xb1, 0x98, 0x0e, 0xad, 0xa9, 0x97, 0x0e, 0xad, + 0xa1, 0x96, 0x0e, 0xad, 0x99, 0x95, 0x0e, 0xad, 0x91, 0x94, 0x0e, 0xad, + 0x89, 0x93, 0x0e, 0xad, 0x81, 0x92, 0x0e, 0xad, 0x79, 0x91, 0x0e, 0xad, + 0x71, 0x90, 0x0e, 0xad, 0x69, 0x8f, 0x0e, 0xad, 0x61, 0x8e, 0x0e, 0xad, + 0x59, 0x8d, 0x0e, 0xad, 0x51, 0x8c, 0x0e, 0xad, 0x49, 0x8b, 0x0e, 0xad, + 0x41, 0x8a, 0x0e, 0xad, 0x39, 0x89, 0x0e, 0xad, 0x31, 0x88, 0x0e, 0xad, + 0x29, 0x87, 0x0e, 0xad, 0x21, 0x86, 0x0e, 0xad, 0x19, 0x85, 0x0e, 0xad, + 0x11, 0x84, 0x0e, 0xad, 0x09, 0x83, 0x0e, 0xad, 0x00, 0x9c, 0x0e, 0xa6, + 0x79, 0x9b, 0x0e, 0xa6, 0x71, 0x9a, 0x0e, 0xa6, 0x69, 0x99, 0x0e, 0xa6, + 0x61, 0x98, 0x0e, 0xa6, 0x59, 0x97, 0x0e, 0xa6, 0x51, 0x96, 0x0e, 0xa6, + 0x49, 0x95, 0x0e, 0xa6, 0x41, 0x94, 0x0e, 0xa6, 0x39, 0x93, 0x0e, 0xa6, + 0x31, 0x92, 0x0e, 0xa6, 0x29, 0x90, 0x0e, 0xa6, 0x19, 0x8f, 0x0e, 0xa6, + 0x11, 0x8e, 0x0e, 0xa6, 0x09, 0x8d, 0x0e, 0xa6, 0x01, 0x8c, 0x0e, 0xa5, + 0xf9, 0x8b, 0x0e, 0xa5, 0xf1, 0x8a, 0x0e, 0xa5, 0xe9, 0x88, 0x0e, 0xa5, + 0xd9, 0x86, 0x0e, 0xa5, 0xc9, 0x85, 0x0e, 0xa5, 0xc1, 0x84, 0x0e, 0xa5, + 0xb9, 0x83, 0x0e, 0xa5, 0xb0, 0x9c, 0x0e, 0xa5, 0xa9, 0x9b, 0x0e, 0xa5, + 0xa1, 0x9a, 0x0e, 0xa5, 0x99, 0x99, 0x0e, 0xa5, 0x91, 0x98, 0x0e, 0xa5, + 0x89, 0x97, 0x0e, 0xa5, 0x81, 0x96, 0x0e, 0xa5, 0x79, 0x95, 0x0e, 0xa5, + 0x71, 0x93, 0x0e, 0xa5, 0x61, 0x92, 0x0e, 0xa5, 0x59, 0x91, 0x0e, 0xa5, + 0x51, 0x90, 0x0e, 0xa5, 0x49, 0x8d, 0x0e, 0xa5, 0x31, 0x8c, 0x0e, 0xa5, + 0x29, 0x89, 0x0e, 0xa5, 0x11, 0x86, 0x0e, 0xa4, 0xf9, 0x85, 0x0e, 0xa4, + 0xf1, 0x83, 0x0e, 0xa4, 0xe0, 0xc4, 0x18, 0x10, 0x0e, 0xbe, 0xf9, 0xc2, + 0x22, 0xcc, 0x0e, 0xbe, 0xf0, 0xc3, 0x0d, 0x14, 0x0e, 0xbe, 0xe9, 0xc3, + 0x09, 0x9e, 0x0e, 0xbe, 0xe0, 0xc4, 0x02, 0xde, 0x0e, 0xbe, 0xd9, 0xc2, + 0x02, 0xa0, 0x0e, 0xbe, 0xd0, 0x9c, 0x0e, 0xa9, 0xb9, 0x9b, 0x0e, 0xa9, + 0xb1, 0x9a, 0x0e, 0xa9, 0xa9, 0x99, 0x0e, 0xa9, 0xa1, 0x98, 0x0e, 0xa9, + 0x99, 0x97, 0x0e, 0xa9, 0x91, 0x96, 0x0e, 0xa9, 0x89, 0x95, 0x0e, 0xa9, + 0x81, 0x94, 0x0e, 0xa9, 0x79, 0x93, 0x0e, 0xa9, 0x71, 0x92, 0x0e, 0xa9, + 0x69, 0x91, 0x0e, 0xa9, 0x61, 0x90, 0x0e, 0xa9, 0x59, 0x8f, 0x0e, 0xa9, + 0x51, 0x8e, 0x0e, 0xa9, 0x49, 0x8d, 0x0e, 0xa9, 0x41, 0x8c, 0x0e, 0xa9, + 0x39, 0x8b, 0x0e, 0xa9, 0x31, 0x8a, 0x0e, 0xa9, 0x29, 0x89, 0x0e, 0xa9, + 0x21, 0x88, 0x0e, 0xa9, 0x19, 0x87, 0x0e, 0xa9, 0x11, 0x86, 0x0e, 0xa9, + 0x09, 0x85, 0x0e, 0xa9, 0x01, 0x84, 0x0e, 0xa8, 0xf9, 0x83, 0x0e, 0xa8, + 0xf0, 0x9b, 0x0e, 0xa8, 0xe1, 0x9a, 0x0e, 0xa8, 0xd9, 0x99, 0x0e, 0xa8, + 0xd1, 0x98, 0x0e, 0xa8, 0xc9, 0x97, 0x0e, 0xa8, 0xc1, 0x96, 0x0e, 0xa8, + 0xb9, 0x95, 0x0e, 0xa8, 0xb1, 0x93, 0x0e, 0xa8, 0xa1, 0x92, 0x0e, 0xa8, + 0x99, 0x91, 0x0e, 0xa8, 0x91, 0x90, 0x0e, 0xa8, 0x89, 0x8f, 0x0e, 0xa8, + 0x81, 0x8e, 0x0e, 0xa8, 0x79, 0x8d, 0x0e, 0xa8, 0x71, 0x8c, 0x0e, 0xa8, + 0x69, 0x89, 0x0e, 0xa8, 0x51, 0x88, 0x0e, 0xa8, 0x49, 0x87, 0x0e, 0xa8, + 0x41, 0x86, 0x0e, 0xa8, 0x39, 0x84, 0x0e, 0xa8, 0x29, 0x83, 0x0e, 0xa8, + 0x20, 0xd6, 0x08, 0x88, 0x01, 0x3f, 0x69, 0xce, 0x25, 0xad, 0x01, 0x3f, + 0x38, 0x97, 0x08, 0xe9, 0xf9, 0x8b, 0x08, 0xe9, 0xe1, 0x83, 0x08, 0xe9, + 0x88, 0x97, 0x08, 0xe9, 0xa8, 0x8b, 0x08, 0xe9, 0x98, 0xc2, 0x00, 0xd0, + 0x08, 0xe8, 0xb9, 0x83, 0x08, 0xe8, 0xb0, 0xc2, 0x00, 0xd0, 0x08, 0xe8, + 0xc9, 0x83, 0x08, 0xe8, 0xc0, 0x83, 0x08, 0xe5, 0x69, 0xc2, 0x00, 0xd0, + 0x08, 0xe5, 0x60, 0x83, 0x08, 0xe5, 0x39, 0xc2, 0x00, 0xd0, 0x08, 0xe5, + 0x30, 0xc2, 0x02, 0x1c, 0x08, 0xe5, 0x21, 0x83, 0x08, 0xe4, 0xe0, 0x15, + 0xc2, 0xfa, 0xd7, 0xc2, 0x00, 0xd0, 0x08, 0xe4, 0xd9, 0x83, 0x08, 0xe4, + 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0xe4, 0xf9, 0x83, 0x08, 0xe4, 0xf0, 0x83, + 0x08, 0xe4, 0xe9, 0xc2, 0x19, 0x2c, 0x08, 0xe4, 0xc9, 0xc2, 0x01, 0x30, + 0x08, 0xe4, 0xa8, 0xc2, 0x00, 0xd0, 0x08, 0xe4, 0xb9, 0x83, 0x08, 0xe4, + 0xb0, 0xc2, 0x00, 0xd0, 0x08, 0xe4, 0x99, 0x83, 0x08, 0xe4, 0x90, 0xc2, + 0x00, 0xd0, 0x08, 0xe4, 0x19, 0x83, 0x08, 0xe4, 0x10, 0xc5, 0x40, 0xe7, + 0x00, 0x68, 0x19, 0xc4, 0x1e, 0x97, 0x00, 0x6a, 0x68, 0x94, 0x00, 0x68, + 0x5b, 0x02, 0xfa, 0xe1, 0x8e, 0x00, 0x68, 0x62, 0x02, 0xfa, 0xe5, 0x83, + 0x00, 0x69, 0x19, 0xc2, 0x00, 0xc1, 0x00, 0x69, 0x48, 0x83, 0x00, 0x68, + 0xd9, 0x45, 0xd4, 0x7a, 0x42, 0xfa, 0xe9, 0x83, 0x00, 0x68, 0xf9, 0xc2, + 0x00, 0xd0, 0x00, 0x69, 0x01, 0xc2, 0x01, 0x6f, 0x00, 0x69, 0xd0, 0x83, + 0x00, 0x69, 0x09, 0xc2, 0x00, 0xd0, 0x00, 0x69, 0x10, 0x83, 0x00, 0x69, + 0x99, 0xc2, 0x00, 0xdb, 0x00, 0x69, 0xa0, 0x94, 0x00, 0x6a, 0x20, 0x8e, + 0x00, 0x6b, 0x18, 0xc7, 0xc7, 0x74, 0x00, 0x6a, 0xc9, 0xc4, 0x9c, 0x07, + 0x00, 0x6a, 0xf0, 0xc8, 0x1e, 0x16, 0x00, 0x6a, 0xd9, 0xc4, 0x0f, 0x1f, + 0x00, 0x6a, 0xe0, 0xc2, 0x02, 0xa0, 0x00, 0x6b, 0x41, 0xc4, 0x02, 0xde, + 0x00, 0x6b, 0x48, 0xc3, 0x09, 0x9e, 0x00, 0x6b, 0x51, 0xc3, 0x0d, 0x14, + 0x00, 0x6b, 0x58, 0xc2, 0x22, 0xcc, 0x00, 0x6b, 0x61, 0xc4, 0x18, 0x10, + 0x00, 0x6b, 0x68, 0xcb, 0x44, 0x4b, 0x08, 0x57, 0x98, 0xc3, 0x77, 0x79, + 0x08, 0x56, 0xe9, 0xc4, 0xdc, 0x2d, 0x08, 0x56, 0xc0, 0x96, 0x00, 0x42, + 0x40, 0x8a, 0x00, 0x42, 0xa1, 0x9c, 0x00, 0x42, 0x88, 0xc2, 0x0d, 0xf6, + 0x00, 0x42, 0x48, 0xc2, 0x00, 0x39, 0x08, 0x8b, 0x91, 0x83, 0x08, 0x8b, + 0x68, 0xc2, 0x00, 0xd0, 0x08, 0x8b, 0x59, 0x83, 0x08, 0x8b, 0x50, 0xc2, + 0x00, 0xd0, 0x08, 0x8b, 0x49, 0x83, 0x08, 0x8b, 0x40, 0x83, 0x08, 0x8b, + 0x39, 0xc2, 0x00, 0xc1, 0x08, 0x8b, 0x11, 0xc2, 0x19, 0x2c, 0x08, 0x8a, + 0xe8, 0xc2, 0x00, 0xd0, 0x08, 0x8b, 0x31, 0x83, 0x08, 0x8b, 0x29, 0x06, + 0x42, 0xfb, 0x09, 0xc2, 0x00, 0xd0, 0x08, 0x8b, 0x21, 0x83, 0x08, 0x8b, + 0x19, 0x16, 0x42, 0xfb, 0x13, 0xc2, 0x00, 0xd0, 0x08, 0x8a, 0xe1, 0x83, + 0x08, 0x8a, 0xd8, 0xc2, 0x00, 0xd0, 0x08, 0x8a, 0xd1, 0x83, 0x08, 0x8a, + 0xc8, 0xc2, 0x00, 0xd0, 0x08, 0x8a, 0xc1, 0x83, 0x08, 0x8a, 0xb8, 0xc2, + 0x00, 0xd0, 0x08, 0x8a, 0xb1, 0x83, 0x08, 0x8a, 0xa8, 0xc9, 0xa8, 0x4c, + 0x0f, 0x80, 0x71, 0xc6, 0x3a, 0x70, 0x0f, 0x81, 0x00, 0xc9, 0xa8, 0x4c, + 0x0f, 0x80, 0x61, 0xc6, 0x3a, 0x70, 0x0f, 0x80, 0xf0, 0xc9, 0xa8, 0x4c, + 0x0f, 0x80, 0x69, 0xc6, 0x3a, 0x70, 0x0f, 0x80, 0xf8, 0xc9, 0xa8, 0x4c, + 0x0f, 0x80, 0x79, 0xc6, 0x3a, 0x70, 0x0f, 0x81, 0x08, 0xc9, 0xa8, 0x4c, + 0x0f, 0x80, 0x39, 0xc6, 0x3a, 0x70, 0x0f, 0x80, 0xc8, 0xc9, 0xa8, 0x4c, + 0x0f, 0x80, 0x41, 0xc6, 0x3a, 0x70, 0x0f, 0x80, 0xd0, 0xc9, 0xa8, 0x4c, + 0x0f, 0x80, 0x49, 0xc6, 0x3a, 0x70, 0x0f, 0x80, 0xd8, 0xc9, 0xa8, 0x4c, + 0x0f, 0x80, 0x51, 0xc6, 0x3a, 0x70, 0x0f, 0x80, 0xe0, 0xc9, 0xa8, 0x4c, + 0x0f, 0x80, 0x59, 0xc6, 0x3a, 0x70, 0x0f, 0x80, 0xe8, 0x0d, 0xc2, 0xfb, + 0x1d, 0x15, 0xc2, 0xfb, 0x29, 0x12, 0xc2, 0xfb, 0x50, 0x16, 0xc2, 0xfb, + 0x6a, 0x05, 0xc2, 0xfb, 0x93, 0x18, 0xc2, 0xfb, 0xb7, 0x09, 0xc2, 0xfb, + 0xc3, 0x0f, 0xc2, 0xfb, 0xd6, 0x04, 0xc2, 0xfb, 0xf7, 0x0e, 0xc2, 0xfc, + 0x01, 0x08, 0xc2, 0xfc, 0x10, 0x06, 0xc2, 0xfc, 0x36, 0x19, 0xc2, 0xfc, + 0x4a, 0x42, 0x00, 0xd0, 0xc2, 0xfc, 0x56, 0x07, 0xc2, 0xfc, 0x62, 0x10, + 0xc2, 0xfc, 0x6e, 0x11, 0xc2, 0xfc, 0x86, 0xcd, 0x77, 0x05, 0x0e, 0x8c, + 0xc1, 0x9c, 0x0e, 0x8c, 0x71, 0x14, 0xc2, 0xfc, 0x98, 0x4b, 0x99, 0x8c, + 0xc2, 0xfc, 0xa0, 0x42, 0x00, 0xe3, 0xc2, 0xfc, 0xac, 0xca, 0x5c, 0x28, + 0x0e, 0x8a, 0x18, 0x00, 0x42, 0xfc, 0xb8, 0xc2, 0x00, 0xd0, 0x08, 0x94, + 0xa1, 0xc2, 0x0e, 0x9a, 0x08, 0x94, 0x99, 0x83, 0x08, 0x94, 0x90, 0x83, + 0x08, 0x94, 0x81, 0xc2, 0x00, 0xd0, 0x08, 0x94, 0x88, 0x9f, 0x00, 0x84, + 0x39, 0xa0, 0x00, 0x84, 0x41, 0xa2, 0x00, 0x84, 0x49, 0xa3, 0x00, 0x84, + 0x50, 0x45, 0x28, 0xb1, 0xc2, 0xfc, 0xc4, 0xcd, 0x7a, 0x6c, 0x00, 0x84, + 0x68, 0xc2, 0x00, 0x39, 0x05, 0x53, 0x99, 0xc2, 0x02, 0x1c, 0x05, 0x53, + 0x91, 0xc2, 0x8d, 0x8f, 0x05, 0x53, 0x89, 0xc2, 0x00, 0x87, 0x05, 0x53, + 0x79, 0xc3, 0x12, 0xad, 0x05, 0x53, 0x71, 0x0a, 0xc2, 0xfc, 0xcc, 0xc2, + 0x0d, 0xf6, 0x05, 0x53, 0x61, 0x10, 0xc2, 0xfc, 0xd6, 0x06, 0xc2, 0xfc, + 0xe0, 0x42, 0x02, 0x10, 0xc2, 0xfc, 0xea, 0x0c, 0xc2, 0xfc, 0xf4, 0x05, + 0xc2, 0xfc, 0xfe, 0xc2, 0x01, 0x30, 0x05, 0x53, 0x00, 0x04, 0xc2, 0xfd, + 0x08, 0x06, 0xc2, 0xfd, 0x12, 0xc3, 0x12, 0xad, 0x05, 0x4f, 0xd1, 0x10, + 0xc2, 0xfd, 0x20, 0x0c, 0xc2, 0xfd, 0x2c, 0x09, 0xc2, 0xfd, 0x36, 0xc2, + 0x00, 0x87, 0x05, 0x4f, 0x40, 0x42, 0x00, 0xbd, 0x42, 0xfd, 0x40, 0xc3, + 0x1c, 0x63, 0x05, 0x53, 0xf1, 0xc3, 0x01, 0xe2, 0x05, 0x53, 0xf8, 0x83, + 0x00, 0x82, 0xe1, 0x87, 0x00, 0x82, 0xe8, 0x90, 0x0d, 0x8b, 0x3b, 0x02, + 0xfd, 0x82, 0x19, 0xc2, 0xfd, 0x86, 0x83, 0x01, 0x85, 0x09, 0x8b, 0x01, + 0x85, 0x19, 0x97, 0x01, 0x85, 0x29, 0x87, 0x01, 0x85, 0x39, 0x91, 0x01, + 0x85, 0x49, 0x16, 0xc2, 0xfd, 0x96, 0x1b, 0xc2, 0xfd, 0x9e, 0x0d, 0xc2, + 0xfd, 0xaa, 0x15, 0xc2, 0xfd, 0xb6, 0x0a, 0xc2, 0xfd, 0xbe, 0xc2, 0x14, + 0x48, 0x01, 0x8f, 0xd1, 0x14, 0x42, 0xfd, 0xd2, 0x87, 0x0d, 0x80, 0x01, + 0xc2, 0x14, 0x68, 0x0d, 0x89, 0x11, 0x1b, 0x42, 0xfd, 0xe6, 0x45, 0xd8, + 0xdf, 0x42, 0xfd, 0xee, 0x83, 0x00, 0x64, 0x31, 0x8b, 0x00, 0x64, 0x81, + 0x97, 0x00, 0x64, 0xa0, 0x8b, 0x00, 0x64, 0x40, 0x97, 0x00, 0x64, 0x50, + 0x47, 0xb2, 0x2e, 0xc2, 0xfd, 0xfa, 0x83, 0x00, 0x65, 0xa8, 0x87, 0x00, + 0x64, 0x78, 0x91, 0x00, 0x64, 0x98, 0x83, 0x00, 0x64, 0xa9, 0xc2, 0x00, + 0xd0, 0x00, 0x64, 0xb0, 0x83, 0x00, 0x64, 0xb9, 0xc2, 0x00, 0xd0, 0x00, + 0x64, 0xc0, 0xc2, 0x01, 0x30, 0x00, 0x64, 0xc9, 0xc2, 0x19, 0x2c, 0x00, + 0x64, 0xf1, 0xc2, 0x00, 0xc1, 0x00, 0x65, 0x19, 0x83, 0x00, 0x65, 0x42, + 0x02, 0xfe, 0x08, 0x83, 0x00, 0x64, 0xd1, 0xc2, 0x00, 0xd0, 0x00, 0x64, + 0xd8, 0x83, 0x00, 0x64, 0xe1, 0xc2, 0x00, 0xd0, 0x00, 0x64, 0xe8, 0x16, + 0xc2, 0xfe, 0x0e, 0x83, 0x00, 0x65, 0x21, 0xc2, 0x00, 0xd0, 0x00, 0x65, + 0x28, 0x06, 0xc2, 0xfe, 0x18, 0x83, 0x00, 0x65, 0x31, 0xc2, 0x00, 0xd0, + 0x00, 0x65, 0x38, 0x83, 0x00, 0x65, 0x51, 0xc2, 0x00, 0xd0, 0x00, 0x65, + 0x58, 0x83, 0x00, 0x65, 0x61, 0xc2, 0x00, 0xd0, 0x00, 0x65, 0x68, 0x83, + 0x00, 0x65, 0x81, 0xc2, 0x00, 0x39, 0x00, 0x65, 0x88, 0x83, 0x00, 0x65, + 0x91, 0x0e, 0x42, 0xfe, 0x22, 0xc2, 0x00, 0xd0, 0x00, 0x65, 0xb1, 0xc2, + 0x0d, 0xf6, 0x00, 0x65, 0xb9, 0x83, 0x00, 0x65, 0xc0, 0x94, 0x00, 0x66, + 0x20, 0x8e, 0x00, 0x67, 0x18, 0xc4, 0xdf, 0x5f, 0x01, 0x79, 0x80, 0xc6, + 0x32, 0x33, 0x01, 0x78, 0x81, 0xc4, 0x76, 0x96, 0x01, 0x7c, 0x38, 0xc2, + 0x02, 0x6f, 0x01, 0x78, 0x09, 0x86, 0x01, 0x78, 0x39, 0xc2, 0x14, 0xda, + 0x01, 0x7b, 0x18, 0xc2, 0x0c, 0xa6, 0x01, 0x78, 0x49, 0x03, 0xc2, 0xfe, + 0x2c, 0xc2, 0x13, 0x4c, 0x01, 0x7d, 0x90, 0xc2, 0x00, 0x4e, 0x01, 0x79, + 0x51, 0xc2, 0x00, 0x3b, 0x01, 0x7a, 0x58, 0xc7, 0xc4, 0xe2, 0x01, 0x79, + 0xa8, 0x96, 0x01, 0x78, 0x13, 0x02, 0xfe, 0x36, 0xc6, 0xd1, 0xc9, 0x01, + 0x78, 0x61, 0xc2, 0x00, 0xbf, 0x01, 0x79, 0xf1, 0xc4, 0x17, 0xa1, 0x01, + 0x7a, 0x79, 0xc6, 0xce, 0x63, 0x01, 0x7a, 0xc1, 0x89, 0x01, 0x7a, 0xe8, + 0xc8, 0xab, 0xad, 0x01, 0x78, 0xc1, 0xc4, 0x02, 0xaf, 0x01, 0x7a, 0x19, + 0x15, 0x42, 0xfe, 0x3c, 0x9b, 0x01, 0x79, 0x91, 0xc2, 0x01, 0xdf, 0x01, + 0x7e, 0x71, 0xc4, 0x1e, 0x58, 0x01, 0x7e, 0x98, 0xc3, 0x04, 0xa7, 0x01, + 0x7a, 0x69, 0xc2, 0x00, 0x29, 0x01, 0x7e, 0x28, 0x03, 0xc2, 0xfe, 0x46, + 0xc3, 0x17, 0xbf, 0x01, 0x7a, 0xd0, 0xc4, 0x71, 0x24, 0x01, 0x78, 0x21, + 0xc2, 0x00, 0x65, 0x01, 0x78, 0xc9, 0xc2, 0x01, 0xd0, 0x01, 0x7c, 0x19, + 0x87, 0x01, 0x7c, 0x90, 0xc3, 0x01, 0x54, 0x01, 0x78, 0xa1, 0xc4, 0xab, + 0xbd, 0x01, 0x79, 0x61, 0x07, 0xc2, 0xfe, 0x52, 0xc4, 0xac, 0x23, 0x01, + 0x7b, 0x80, 0xc5, 0xd5, 0xd8, 0x01, 0x79, 0x01, 0xc4, 0x07, 0x30, 0x01, + 0x7a, 0x10, 0x11, 0xc2, 0xfe, 0x5e, 0x07, 0x42, 0xfe, 0x6a, 0x07, 0xc2, + 0xfe, 0x76, 0x11, 0xc2, 0xfe, 0x80, 0xc3, 0x02, 0x9b, 0x01, 0x7b, 0xa0, + 0x9b, 0x01, 0x7a, 0x41, 0xce, 0x6e, 0xf2, 0x01, 0x7d, 0xf9, 0xc2, 0x00, + 0x72, 0x01, 0x7e, 0x20, 0xc6, 0xcd, 0x0d, 0x01, 0x7b, 0x39, 0xc2, 0x8c, + 0x30, 0x01, 0x7b, 0xb8, 0xc2, 0x01, 0x25, 0x01, 0x78, 0x29, 0x14, 0x42, + 0xfe, 0x8d, 0x03, 0xc2, 0xfe, 0x97, 0xc2, 0x15, 0x10, 0x01, 0x7e, 0x38, + 0x0e, 0xc2, 0xfe, 0xa1, 0xc2, 0x02, 0x35, 0x01, 0x79, 0xf9, 0xc2, 0x01, + 0x29, 0x01, 0x7d, 0xe8, 0xc6, 0x07, 0x72, 0x01, 0x79, 0xb0, 0xc3, 0x00, + 0x5b, 0x01, 0x78, 0x79, 0xcc, 0x7f, 0x4d, 0x01, 0x7d, 0x89, 0xc2, 0x02, + 0xa7, 0x01, 0x7d, 0xe0, 0xc3, 0x10, 0xd0, 0x01, 0x79, 0x89, 0xc3, 0x0f, + 0xed, 0x01, 0x7e, 0xa0, 0xc2, 0x02, 0xe0, 0x01, 0x7a, 0x51, 0xc3, 0x00, + 0x3a, 0x01, 0x7b, 0x89, 0xc4, 0xe1, 0x6f, 0x01, 0x7e, 0x68, 0xc2, 0x00, + 0x49, 0x01, 0x7b, 0x09, 0xc3, 0x09, 0x3b, 0x01, 0x7c, 0x60, 0xc4, 0x5a, + 0xfe, 0x01, 0x7c, 0x31, 0xc3, 0x07, 0x6f, 0x01, 0x7e, 0x90, 0x17, 0xc2, + 0xfe, 0xad, 0xc2, 0x00, 0x45, 0x01, 0x7a, 0x49, 0x14, 0x42, 0xfe, 0xb7, + 0xc3, 0x0d, 0xe5, 0x01, 0x7b, 0x01, 0xc2, 0x00, 0xfe, 0x01, 0x7c, 0x08, + 0xc6, 0xcd, 0xa3, 0x01, 0x7c, 0x11, 0xc4, 0x1c, 0xb6, 0x01, 0x7e, 0x48, + 0xc3, 0x02, 0x11, 0x01, 0x78, 0x51, 0xc7, 0x63, 0x9d, 0x01, 0x78, 0xf0, + 0x94, 0x01, 0x7b, 0xfb, 0x02, 0xfe, 0xc3, 0x96, 0x01, 0x7d, 0xb8, 0xc3, + 0x00, 0x74, 0x01, 0x79, 0x18, 0xc3, 0x00, 0x5f, 0x01, 0x78, 0x69, 0xc4, + 0xde, 0xc3, 0x01, 0x79, 0x59, 0xc5, 0xda, 0x83, 0x01, 0x7a, 0x81, 0x99, + 0x01, 0x7a, 0xe1, 0xc3, 0x04, 0xa0, 0x01, 0x7c, 0x50, 0xc3, 0x43, 0x08, + 0x01, 0x78, 0xd1, 0x03, 0xc2, 0xfe, 0xc9, 0xc5, 0x78, 0xee, 0x01, 0x7c, + 0x80, 0xc2, 0x00, 0x5f, 0x01, 0x7b, 0x99, 0xc2, 0x00, 0x61, 0x01, 0x7c, + 0xf1, 0xc6, 0xc4, 0x8f, 0x01, 0x7e, 0x00, 0xc2, 0x11, 0xee, 0x01, 0x79, + 0x41, 0xc4, 0x00, 0x40, 0x01, 0x7c, 0x68, 0xc5, 0xc2, 0xd6, 0x01, 0x78, + 0xd9, 0xc6, 0xcc, 0x77, 0x01, 0x7a, 0xf0, 0xc2, 0x00, 0xb6, 0x01, 0x78, + 0x99, 0xc3, 0x08, 0x48, 0x01, 0x7d, 0x70, 0xc3, 0x12, 0xb8, 0x01, 0x79, + 0x29, 0xc2, 0x01, 0xc5, 0x01, 0x79, 0x78, 0xc4, 0xe1, 0x97, 0x01, 0x7a, + 0x71, 0xc2, 0x00, 0x15, 0x01, 0x7c, 0x88, 0xc3, 0x02, 0x11, 0x01, 0x7a, + 0xa1, 0xc2, 0x8c, 0x30, 0x01, 0x7d, 0x0a, 0x02, 0xfe, 0xd1, 0xc3, 0x01, + 0xfe, 0x01, 0x7b, 0x49, 0xc3, 0x04, 0xa6, 0x01, 0x7e, 0x30, 0x87, 0x01, + 0x7d, 0x19, 0x86, 0x01, 0x7d, 0xa8, 0xcc, 0x35, 0x8a, 0x01, 0x78, 0xa9, + 0xc3, 0x00, 0x5f, 0x01, 0x79, 0x71, 0xc2, 0x00, 0x89, 0x01, 0x7b, 0xb0, + 0x92, 0x01, 0x7a, 0x09, 0xc2, 0x00, 0x74, 0x01, 0x7d, 0x61, 0x96, 0x01, + 0x7e, 0x78, 0xc2, 0x00, 0x27, 0x01, 0x7b, 0x71, 0xc3, 0x0d, 0x14, 0x01, + 0x7c, 0x20, 0xc7, 0xc5, 0x4b, 0x01, 0x79, 0x11, 0xc2, 0x18, 0xb3, 0x01, + 0x7d, 0x30, 0xc2, 0x00, 0x74, 0x01, 0x7b, 0x91, 0xc2, 0x00, 0xcc, 0x01, + 0x7c, 0x58, 0x89, 0x01, 0x79, 0x21, 0xc4, 0x02, 0x10, 0x01, 0x7c, 0xf9, + 0xc2, 0x01, 0x4c, 0x01, 0x7e, 0x18, 0x99, 0x01, 0x79, 0xc1, 0xcb, 0x93, + 0x67, 0x01, 0x7b, 0x31, 0xc2, 0x00, 0xfe, 0x01, 0x7c, 0x41, 0xc2, 0x00, + 0x6d, 0x01, 0x7c, 0xe9, 0xc2, 0x02, 0x6f, 0x01, 0x7d, 0xd8, 0xc5, 0xdd, + 0xbc, 0x01, 0x79, 0xd1, 0xc4, 0x1e, 0xce, 0x01, 0x7a, 0x01, 0xc3, 0x5d, + 0xd1, 0x01, 0x7c, 0x00, 0xc4, 0x8e, 0x88, 0x01, 0x7b, 0xa9, 0xc4, 0xe2, + 0xa3, 0x01, 0x7c, 0xc0, 0xc3, 0x29, 0x82, 0x01, 0x7c, 0x71, 0xc2, 0x0f, + 0xe1, 0x01, 0x7d, 0x69, 0xc3, 0x00, 0x3a, 0x01, 0x7e, 0x50, 0x96, 0x01, + 0x7a, 0x31, 0xc2, 0x00, 0x40, 0x01, 0x7e, 0x80, 0xc2, 0x01, 0x19, 0x01, + 0x7a, 0xa9, 0xc3, 0x19, 0x78, 0x01, 0x7b, 0x29, 0xc3, 0x00, 0x2e, 0x01, + 0x7d, 0xf1, 0xc2, 0x01, 0xe2, 0x01, 0x7e, 0x10, 0xc4, 0x14, 0xdd, 0x01, + 0x7a, 0xb9, 0xc2, 0x00, 0x75, 0x01, 0x7a, 0xd9, 0xc2, 0x00, 0x89, 0x01, + 0x7d, 0x78, 0x9b, 0x01, 0x7d, 0xb1, 0xc3, 0x31, 0xf0, 0x01, 0x7e, 0xa8, + 0xc6, 0xd2, 0x59, 0x01, 0x7d, 0xc9, 0xc2, 0x13, 0x38, 0x01, 0x7e, 0x60, + 0x12, 0xc2, 0xfe, 0xd7, 0x04, 0xc2, 0xfe, 0xe3, 0x45, 0xda, 0x97, 0x42, + 0xfe, 0xef, 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xd1, 0xc4, 0xe0, 0xaf, 0x00, + 0xcf, 0x50, 0x02, 0xc2, 0xfe, 0xfb, 0x00, 0x42, 0xff, 0x0b, 0xc3, 0x38, + 0x5b, 0x00, 0xcf, 0x91, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x10, 0xc3, 0x38, + 0x5b, 0x00, 0xcf, 0xa1, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x20, 0xc3, 0x38, + 0x5b, 0x00, 0xcf, 0x99, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x18, 0xc3, 0x13, + 0x3a, 0x00, 0xbf, 0xab, 0x02, 0xff, 0x17, 0xc2, 0x25, 0x9f, 0x00, 0xbf, + 0x90, 0xc3, 0xdf, 0x37, 0x00, 0xbf, 0xa1, 0xc2, 0x06, 0xdb, 0x00, 0xbf, + 0x98, 0xc8, 0xbe, 0x82, 0x00, 0xbe, 0xa9, 0xc8, 0xb0, 0xc6, 0x00, 0xbe, + 0x99, 0xc4, 0xe4, 0x77, 0x00, 0xbe, 0x58, 0x98, 0x00, 0xbd, 0x50, 0x83, + 0x08, 0x51, 0xa1, 0xc2, 0x00, 0xd0, 0x08, 0x51, 0x98, 0xce, 0x2a, 0xfe, + 0x0f, 0xd0, 0xb1, 0xdb, 0x18, 0x03, 0x0f, 0xd2, 0x00, 0x49, 0x2a, 0xf5, + 0x42, 0xff, 0x1b, 0x49, 0x2a, 0xf5, 0x42, 0xff, 0x27, 0xce, 0x2a, 0xfe, + 0x0f, 0xd0, 0xc1, 0xdb, 0x18, 0x03, 0x0f, 0xd2, 0x10, 0xce, 0x2a, 0xfe, + 0x0f, 0xd0, 0xb9, 0xdb, 0x18, 0x03, 0x0f, 0xd2, 0x08, 0xce, 0x2a, 0xfe, + 0x0f, 0xd0, 0xd1, 0xdb, 0x18, 0x03, 0x0f, 0xd2, 0x20, 0xc3, 0x00, 0x74, + 0x0f, 0xd1, 0x51, 0xc5, 0x56, 0xa5, 0x0f, 0xd1, 0x70, 0xcb, 0x93, 0xf6, + 0x08, 0xa3, 0x09, 0xcb, 0x8f, 0xe1, 0x08, 0xa3, 0x01, 0xce, 0x6b, 0xfe, + 0x08, 0xa2, 0x41, 0x03, 0xc2, 0xff, 0x3d, 0xc5, 0x33, 0x5d, 0x08, 0xa2, + 0x31, 0x42, 0x07, 0xb2, 0xc2, 0xff, 0x49, 0xcb, 0x1e, 0x89, 0x08, 0xa2, + 0x18, 0x8e, 0x08, 0xa0, 0x43, 0x02, 0xff, 0x55, 0x94, 0x08, 0xa0, 0x32, + 0x02, 0xff, 0x59, 0xc2, 0x00, 0xd0, 0x08, 0xa0, 0xc9, 0x83, 0x08, 0xa0, + 0xc0, 0xc2, 0x00, 0xd0, 0x08, 0xa0, 0x99, 0x83, 0x08, 0xa0, 0x90, 0xc2, + 0x00, 0xd0, 0x08, 0xa0, 0xe9, 0x83, 0x08, 0xa0, 0xe0, 0xc2, 0x00, 0xd0, + 0x08, 0xa0, 0xd9, 0x83, 0x08, 0xa0, 0xd0, 0xc4, 0x18, 0x10, 0x08, 0xa2, + 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0xa2, 0xb0, 0xc3, 0x0d, 0x14, 0x08, 0xa2, + 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0xa2, 0xa0, 0xc4, 0x02, 0xde, 0x08, 0xa2, + 0x99, 0xc2, 0x02, 0xa0, 0x08, 0xa2, 0x90, 0x8e, 0x08, 0xa1, 0xe8, 0x94, + 0x08, 0xa1, 0xd8, 0x9f, 0x00, 0xce, 0x49, 0x9e, 0x00, 0xce, 0x40, 0xc4, + 0x18, 0x10, 0x00, 0xce, 0xb9, 0xc2, 0x22, 0xcc, 0x00, 0xce, 0xb0, 0xc3, + 0x0d, 0x14, 0x00, 0xce, 0xa9, 0xc3, 0x09, 0x9e, 0x00, 0xce, 0xa0, 0xc4, + 0x02, 0xde, 0x00, 0xce, 0x99, 0xc2, 0x02, 0xa0, 0x00, 0xce, 0x90, 0x84, + 0x00, 0xce, 0x39, 0x86, 0x00, 0xce, 0x31, 0x8d, 0x00, 0xce, 0x29, 0x8f, + 0x00, 0xce, 0x21, 0x90, 0x00, 0xce, 0x1b, 0x02, 0xff, 0x5d, 0x98, 0x00, + 0xce, 0x08, 0x15, 0xc2, 0xff, 0x61, 0x1a, 0xc2, 0xff, 0x6b, 0x0d, 0xc2, + 0xff, 0x75, 0xc2, 0x01, 0x5d, 0x00, 0xcd, 0x29, 0xc2, 0x0e, 0x9a, 0x00, + 0xcd, 0x21, 0xc2, 0x00, 0xd0, 0x00, 0xcd, 0x19, 0xc2, 0x00, 0xdb, 0x00, + 0xcc, 0xf9, 0xc2, 0x02, 0x41, 0x00, 0xcc, 0xf1, 0xc2, 0x00, 0x87, 0x00, + 0xcc, 0xe9, 0xc2, 0x01, 0xc3, 0x00, 0xcc, 0xc9, 0x12, 0xc2, 0xff, 0x7f, + 0x10, 0xc2, 0xff, 0x89, 0x16, 0xc2, 0xff, 0x93, 0xc2, 0x19, 0x2c, 0x00, + 0xcc, 0x69, 0xc2, 0x0f, 0x9a, 0x00, 0xcc, 0x08, 0x15, 0xc2, 0xff, 0xa3, + 0x1a, 0xc2, 0xff, 0xad, 0x0d, 0xc2, 0xff, 0xb7, 0xc2, 0x01, 0x5d, 0x00, + 0xcd, 0x11, 0xc2, 0x0e, 0x9a, 0x00, 0xcd, 0x09, 0xc2, 0x00, 0xd0, 0x00, + 0xcd, 0x01, 0xc2, 0x00, 0xdb, 0x00, 0xcc, 0xe1, 0xc2, 0x02, 0x41, 0x00, + 0xcc, 0xd9, 0xc2, 0x00, 0x87, 0x00, 0xcc, 0xd1, 0xc2, 0x01, 0xc3, 0x00, + 0xcc, 0xb1, 0x12, 0xc2, 0xff, 0xc1, 0x10, 0xc2, 0xff, 0xcb, 0x16, 0xc2, + 0xff, 0xd5, 0xc2, 0x19, 0x2c, 0x00, 0xcc, 0x51, 0xc2, 0x0f, 0x9a, 0x00, + 0xcc, 0x00, 0x9b, 0x00, 0xce, 0x01, 0x8b, 0x00, 0xcd, 0x90, 0x87, 0x00, + 0xcd, 0xcb, 0x02, 0xff, 0xe5, 0x9b, 0x00, 0xcd, 0xe1, 0x97, 0x00, 0xcd, + 0xa0, 0x83, 0x00, 0xcd, 0xc3, 0x02, 0xff, 0xe9, 0x9b, 0x00, 0xcd, 0xe8, + 0x83, 0x00, 0xcd, 0x8b, 0x02, 0xff, 0xed, 0x9b, 0x00, 0xcd, 0xd1, 0x87, + 0x00, 0xcd, 0xb0, 0x42, 0x00, 0x28, 0xc2, 0xff, 0xf1, 0xc7, 0x52, 0xcc, + 0x01, 0x27, 0x68, 0xc7, 0x1f, 0x6e, 0x01, 0x27, 0x91, 0xc5, 0x66, 0xb1, + 0x01, 0x27, 0x58, 0xc8, 0x48, 0x23, 0x01, 0x27, 0x89, 0xc6, 0x44, 0x9c, + 0x01, 0x27, 0x80, 0xc6, 0x14, 0x07, 0x01, 0x27, 0x79, 0xc7, 0x34, 0x37, + 0x01, 0x27, 0x70, 0x94, 0x08, 0xcd, 0x38, 0xc2, 0x00, 0xd0, 0x08, 0xcd, + 0xd9, 0x83, 0x08, 0xcd, 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0xcd, 0xc9, 0x83, + 0x08, 0xcd, 0xc0, 0xc4, 0x18, 0x12, 0x08, 0x45, 0x71, 0x91, 0x08, 0x45, + 0x40, 0xc3, 0x77, 0x79, 0x08, 0x44, 0xc9, 0xc4, 0xdc, 0x2d, 0x08, 0x44, + 0xb0, 0xc3, 0xe5, 0x8a, 0x0f, 0xb3, 0x11, 0xc9, 0xb4, 0x91, 0x0f, 0xb2, + 0xd1, 0xc4, 0x47, 0x23, 0x0f, 0xb2, 0x90, 0xc4, 0x01, 0xa3, 0x01, 0x0c, + 0xbb, 0x02, 0xff, 0xfd, 0xd3, 0x3c, 0xa1, 0x01, 0x49, 0x10, 0xc7, 0x10, + 0x9c, 0x01, 0x5b, 0xb8, 0xc4, 0x01, 0xa3, 0x01, 0x0c, 0xb3, 0x03, 0x00, + 0x01, 0xd3, 0x3c, 0x8d, 0x01, 0x49, 0x08, 0xc3, 0xe5, 0x8a, 0x0f, 0xb3, + 0x01, 0xc9, 0xb4, 0x91, 0x0f, 0xb2, 0xc1, 0xc4, 0x47, 0x23, 0x0f, 0xb2, + 0x80, 0xc7, 0x10, 0x9c, 0x01, 0x5b, 0xb0, 0x44, 0x05, 0x14, 0xc3, 0x00, + 0x05, 0x46, 0x02, 0xdd, 0x43, 0x00, 0x1d, 0xc9, 0xac, 0x7b, 0x05, 0x41, + 0xb1, 0xca, 0xa1, 0xd4, 0x05, 0x41, 0xc8, 0x86, 0x0f, 0xae, 0x39, 0xc2, + 0x09, 0x3b, 0x0f, 0xae, 0x30, 0xcd, 0x7c, 0x81, 0x0f, 0x98, 0x79, 0xc7, + 0xc3, 0x29, 0x0f, 0x98, 0x70, 0x83, 0x09, 0x87, 0xd0, 0x83, 0x09, 0x87, + 0x98, 0x83, 0x09, 0x87, 0x50, 0x83, 0x09, 0x87, 0x30, 0x83, 0x09, 0x87, + 0x20, 0x83, 0x09, 0x86, 0xe0, 0x83, 0x09, 0x86, 0xd0, 0x84, 0x09, 0x94, + 0xc9, 0x83, 0x09, 0x94, 0xc0, 0x86, 0x09, 0x94, 0x49, 0x85, 0x09, 0x94, + 0x41, 0x84, 0x09, 0x94, 0x39, 0x83, 0x09, 0x94, 0x30, 0x83, 0x09, 0x94, + 0x10, 0x83, 0x09, 0x93, 0xf0, 0x83, 0x09, 0x93, 0xe0, 0x83, 0x09, 0x93, + 0xb8, 0x83, 0x09, 0x93, 0xa8, 0x83, 0x09, 0x93, 0x80, 0x83, 0x09, 0x93, + 0x70, 0x85, 0x09, 0x93, 0x21, 0x84, 0x09, 0x93, 0x19, 0x83, 0x09, 0x93, + 0x10, 0x88, 0x09, 0x92, 0xe9, 0x87, 0x09, 0x92, 0xe1, 0x86, 0x09, 0x92, + 0xd9, 0x85, 0x09, 0x92, 0xd1, 0x84, 0x09, 0x92, 0xc9, 0x83, 0x09, 0x92, + 0xc0, 0x83, 0x09, 0x92, 0xb0, 0x83, 0x09, 0x92, 0x88, 0x83, 0x09, 0x8c, + 0xb0, 0x83, 0x09, 0x8c, 0xa0, 0x83, 0x09, 0x8c, 0x90, 0x83, 0x09, 0x8c, + 0x68, 0x83, 0x09, 0x9d, 0x50, 0x83, 0x09, 0x9d, 0x28, 0x83, 0x09, 0x9d, + 0x10, 0x83, 0x09, 0x9d, 0x00, 0x83, 0x09, 0x9c, 0xf0, 0x83, 0x09, 0x9c, + 0xe0, 0x83, 0x09, 0x9c, 0xb0, 0x8e, 0x09, 0x9c, 0x91, 0x8d, 0x09, 0x9c, + 0x89, 0x8c, 0x09, 0x9c, 0x81, 0x8b, 0x09, 0x9c, 0x79, 0x8a, 0x09, 0x9c, + 0x71, 0x89, 0x09, 0x9c, 0x69, 0x88, 0x09, 0x9c, 0x61, 0x87, 0x09, 0x9c, + 0x59, 0x86, 0x09, 0x9c, 0x51, 0x85, 0x09, 0x9c, 0x49, 0x84, 0x09, 0x9c, + 0x41, 0x83, 0x09, 0x9c, 0x38, 0x84, 0x09, 0x9b, 0xf1, 0x83, 0x09, 0x9b, + 0xe8, 0x85, 0x09, 0x9b, 0xd9, 0x84, 0x09, 0x9b, 0xd1, 0x83, 0x09, 0x9b, + 0xc8, 0x84, 0x09, 0x9b, 0xa1, 0x83, 0x09, 0x9b, 0x98, 0x83, 0x09, 0x9b, + 0x68, 0x8b, 0x09, 0x9b, 0x59, 0x8a, 0x09, 0x9b, 0x51, 0x89, 0x09, 0x9b, + 0x49, 0x88, 0x09, 0x9b, 0x41, 0x87, 0x09, 0x9b, 0x39, 0x86, 0x09, 0x9b, + 0x31, 0x85, 0x09, 0x9b, 0x29, 0x84, 0x09, 0x9b, 0x21, 0x83, 0x09, 0x9b, + 0x18, 0x84, 0x09, 0xa0, 0xa9, 0x83, 0x09, 0xa0, 0xa0, 0x83, 0x09, 0x81, + 0xb0, 0x83, 0x09, 0x81, 0x98, 0x83, 0x09, 0x81, 0x88, 0x83, 0x09, 0x81, + 0x70, 0x83, 0x09, 0x81, 0x28, 0x83, 0x09, 0x80, 0xa8, 0x83, 0x09, 0x80, + 0x88, 0x84, 0x09, 0x80, 0x41, 0x83, 0x09, 0x80, 0x38, 0x83, 0x09, 0x80, + 0x28, 0x83, 0x09, 0x92, 0x78, 0x83, 0x09, 0x92, 0x50, 0x83, 0x09, 0x92, + 0x10, 0x83, 0x09, 0x92, 0x00, 0x83, 0x09, 0x91, 0x90, 0x83, 0x09, 0x91, + 0x28, 0x83, 0x09, 0x90, 0xd0, 0x83, 0x09, 0x90, 0xb8, 0x83, 0x09, 0x90, + 0xa8, 0x83, 0x09, 0x90, 0x98, 0x83, 0x09, 0x90, 0x50, 0x84, 0x09, 0x90, + 0x11, 0x83, 0x09, 0x90, 0x08, 0x42, 0x09, 0x0e, 0xc3, 0x00, 0x29, 0x42, + 0xbc, 0x43, 0xc3, 0x00, 0x33, 0x42, 0xc3, 0x98, 0xc3, 0x00, 0x3d, 0x42, + 0x04, 0x22, 0xc3, 0x00, 0x48, 0x42, 0xc5, 0xb6, 0xc3, 0x00, 0x53, 0x42, + 0xe6, 0x95, 0xc3, 0x00, 0x5d, 0x42, 0x30, 0xd1, 0xc3, 0x00, 0x68, 0xc4, + 0xdf, 0xab, 0x0f, 0x3f, 0x40, 0x83, 0x00, 0x95, 0x18, 0x87, 0x00, 0x95, + 0x20, 0x83, 0x01, 0x6c, 0x50, 0x83, 0x00, 0x98, 0x98, 0x87, 0x00, 0x98, + 0xa0, 0x83, 0x00, 0x98, 0xd8, 0x87, 0x00, 0x98, 0xe0, 0x83, 0x01, 0x6c, + 0x9b, 0x03, 0x00, 0x72, 0x8b, 0x01, 0x6c, 0xa1, 0x87, 0x01, 0x6c, 0xb2, + 0x03, 0x00, 0x76, 0x83, 0x01, 0x6e, 0xd8, 0x87, 0x01, 0x6e, 0xe0, 0x87, + 0x0f, 0x3f, 0x5b, 0x03, 0x00, 0x7a, 0x8b, 0x0f, 0x3f, 0x49, 0x83, 0x00, + 0x90, 0xb8, 0x91, 0x0f, 0x3f, 0x31, 0x87, 0x0f, 0x3f, 0x2b, 0x03, 0x00, + 0x7e, 0x83, 0x0f, 0x3f, 0x03, 0x03, 0x00, 0x82, 0x8b, 0x0f, 0x3f, 0x11, + 0x97, 0x0f, 0x3f, 0x08, 0x83, 0x00, 0x90, 0x98, 0x87, 0x00, 0x90, 0xa0, + 0x87, 0x05, 0x59, 0x60, 0x83, 0x05, 0x59, 0x58, 0x87, 0x00, 0x9c, 0x30, + 0x0a, 0xc3, 0x00, 0x86, 0x83, 0x01, 0x6d, 0xc3, 0x03, 0x00, 0xa0, 0x97, + 0x01, 0x6d, 0xc9, 0x8b, 0x01, 0x6d, 0xd1, 0x87, 0x01, 0x6d, 0xeb, 0x03, + 0x00, 0xa4, 0x91, 0x01, 0x6d, 0xf0, 0x83, 0x01, 0x6d, 0x58, 0x87, 0x01, + 0x6d, 0x60, 0x83, 0x00, 0x99, 0x58, 0x87, 0x00, 0x99, 0x60, 0x83, 0x01, + 0x6c, 0x80, 0x87, 0x05, 0x58, 0xa0, 0x91, 0x05, 0x58, 0x71, 0x87, 0x05, + 0x58, 0x6b, 0x03, 0x00, 0xa8, 0xc2, 0x04, 0xc6, 0x05, 0x58, 0x59, 0x8b, + 0x05, 0x58, 0x51, 0x97, 0x05, 0x58, 0x48, 0x83, 0x00, 0x97, 0xd8, 0x87, + 0x00, 0x97, 0xe0, 0x83, 0x01, 0x6c, 0x68, 0x87, 0x05, 0x58, 0x20, 0x83, + 0x00, 0x99, 0x18, 0x87, 0x00, 0x99, 0x20, 0x83, 0x01, 0x6c, 0x78, 0x83, + 0x00, 0x99, 0xd8, 0x87, 0x00, 0x99, 0xe0, 0x83, 0x00, 0x9a, 0x18, 0x87, + 0x00, 0x9a, 0x20, 0x83, 0x00, 0x9a, 0x38, 0x87, 0x00, 0x9c, 0x10, 0x83, + 0x00, 0x91, 0x18, 0x87, 0x00, 0x91, 0x20, 0xc3, 0x30, 0xd1, 0x00, 0x9c, + 0x01, 0xc3, 0xc5, 0xb6, 0x00, 0x9c, 0x21, 0xc3, 0xe0, 0x77, 0x00, 0x9c, + 0x41, 0xc3, 0x09, 0x0e, 0x00, 0x9c, 0x60, 0x83, 0x00, 0x91, 0xd8, 0x87, + 0x00, 0x91, 0xe0, 0x83, 0x01, 0x6c, 0x20, 0x83, 0x01, 0x6d, 0x18, 0x87, + 0x01, 0x6d, 0x20, 0x83, 0x00, 0x92, 0x58, 0x87, 0x00, 0x92, 0x60, 0x83, + 0x00, 0x92, 0x98, 0x87, 0x00, 0x92, 0xa0, 0x83, 0x00, 0x92, 0xc3, 0x03, + 0x00, 0xac, 0x8b, 0x00, 0x92, 0xd1, 0x87, 0x00, 0x92, 0xea, 0x03, 0x00, + 0xb0, 0x83, 0x01, 0x6e, 0x18, 0x87, 0x01, 0x6e, 0x20, 0x83, 0x00, 0x94, + 0x58, 0x87, 0x00, 0x94, 0x60, 0x83, 0x01, 0x6e, 0x98, 0x87, 0x01, 0x6e, + 0xa0, 0x83, 0x00, 0x94, 0xd8, 0x87, 0x00, 0x94, 0xe0, 0x83, 0x01, 0x6c, + 0x48, 0x83, 0x00, 0x95, 0x98, 0x87, 0x00, 0x95, 0xa0, 0x83, 0x00, 0x95, + 0xd8, 0x87, 0x00, 0x95, 0xe0, 0x83, 0x00, 0x96, 0x03, 0x03, 0x00, 0xb4, + 0x8b, 0x00, 0x96, 0x11, 0x87, 0x00, 0x96, 0x2a, 0x03, 0x00, 0xb8, 0x83, + 0x01, 0x6e, 0x58, 0x87, 0x01, 0x6e, 0x60, 0x48, 0x17, 0xb5, 0xc3, 0x00, + 0xbc, 0x83, 0x00, 0x99, 0x98, 0x87, 0x00, 0x99, 0xa0, 0x83, 0x01, 0x6c, + 0x88, 0x87, 0x00, 0x9c, 0x70, 0x83, 0x00, 0x97, 0x18, 0x87, 0x00, 0x97, + 0x20, 0x83, 0x01, 0x6d, 0x98, 0x87, 0x01, 0x6d, 0xa0, 0x87, 0x00, 0x9c, + 0x50, 0xe0, 0x0a, 0x07, 0x01, 0x17, 0x98, 0xd3, 0x36, 0x0a, 0x01, 0x4f, + 0x1b, 0x03, 0x00, 0xd6, 0x45, 0x00, 0xd5, 0x43, 0x00, 0xdc, 0x16, 0xc3, + 0x00, 0xf4, 0xc9, 0x0e, 0x6e, 0x01, 0x53, 0x31, 0xcb, 0x98, 0x2c, 0x01, + 0x55, 0x71, 0xce, 0x6c, 0xb4, 0x01, 0x5f, 0xc8, 0x94, 0x00, 0x57, 0x00, + 0x8e, 0x00, 0x57, 0x08, 0x94, 0x00, 0x56, 0x20, 0x8e, 0x00, 0x57, 0x18, + 0xa2, 0x0e, 0x91, 0x03, 0x03, 0x00, 0xfa, 0xa1, 0x0e, 0x90, 0xfb, 0x03, + 0x01, 0x0e, 0x20, 0xc3, 0x01, 0x2a, 0x9f, 0x0e, 0x90, 0xf3, 0x03, 0x01, + 0x42, 0x9e, 0x0e, 0x90, 0xeb, 0x03, 0x01, 0x5a, 0xa5, 0x0e, 0x91, 0x11, + 0xa4, 0x0e, 0x91, 0x08, 0xa2, 0x0e, 0x90, 0x23, 0x03, 0x01, 0x76, 0x9f, + 0x0e, 0x90, 0x0b, 0x03, 0x01, 0x8a, 0x9e, 0x0e, 0x90, 0x03, 0x03, 0x01, + 0x9e, 0xa6, 0x0e, 0x90, 0x41, 0xa5, 0x0e, 0x90, 0x39, 0xa4, 0x0e, 0x90, + 0x31, 0xa3, 0x0e, 0x90, 0x29, 0xa1, 0x0e, 0x90, 0x19, 0xa0, 0x0e, 0x90, + 0x10, 0x45, 0x02, 0x9a, 0x43, 0x01, 0xc6, 0x44, 0x02, 0x9b, 0xc3, 0x01, + 0xd8, 0xc5, 0x63, 0xdc, 0x00, 0x1c, 0x28, 0xc9, 0xb4, 0x13, 0x08, 0x0b, + 0xab, 0x03, 0x01, 0xea, 0xcc, 0x88, 0xd1, 0x08, 0x0c, 0x58, 0x46, 0x02, + 0x0f, 0xc3, 0x01, 0xf0, 0xd2, 0x1a, 0x6c, 0x00, 0x1f, 0xc8, 0xd3, 0x1a, + 0x6b, 0x00, 0x1f, 0xe9, 0xda, 0x1a, 0x64, 0x00, 0x1f, 0xf8, 0x47, 0x02, + 0x0e, 0xc3, 0x02, 0x6d, 0x49, 0x11, 0x74, 0xc3, 0x02, 0xe6, 0xda, 0x1a, + 0x64, 0x00, 0x1b, 0xe0, 0xc3, 0x11, 0xee, 0x00, 0xeb, 0x51, 0xc3, 0x1c, + 0x8d, 0x00, 0xeb, 0x49, 0xc3, 0x79, 0xe7, 0x00, 0xeb, 0x41, 0xc5, 0x51, + 0x51, 0x00, 0xeb, 0x39, 0xc4, 0x93, 0xa9, 0x00, 0xeb, 0x30, 0x45, 0x02, + 0x9a, 0x43, 0x02, 0xf2, 0xc8, 0x9c, 0x0e, 0x00, 0x1e, 0xb9, 0xca, 0x8b, + 0x67, 0x00, 0x1f, 0x80, 0x15, 0xc3, 0x03, 0x04, 0xcd, 0x78, 0xbf, 0x00, + 0x1e, 0xc1, 0xc3, 0xe5, 0xb4, 0x00, 0x1f, 0x99, 0xc7, 0x51, 0x4f, 0x00, + 0x1e, 0xe1, 0xc5, 0x78, 0xc7, 0x00, 0x1e, 0xf0, 0xcc, 0x1a, 0x72, 0x00, + 0x1f, 0x91, 0xce, 0x10, 0x3e, 0x00, 0x1f, 0xa8, 0xca, 0x89, 0xff, 0x00, + 0x1e, 0x89, 0x44, 0x02, 0xdf, 0x43, 0x03, 0x16, 0xcb, 0x8d, 0x4d, 0x08, + 0x0b, 0xb9, 0xca, 0x71, 0x88, 0x08, 0x0b, 0xe8, 0x46, 0xcd, 0x55, 0xc3, + 0x03, 0x22, 0x43, 0x14, 0xde, 0xc3, 0x03, 0x34, 0x16, 0xc3, 0x03, 0x40, + 0x4b, 0x8e, 0x4a, 0xc3, 0x03, 0x4c, 0x05, 0xc3, 0x03, 0x5b, 0xcd, 0x75, + 0xda, 0x08, 0x0b, 0x19, 0xd1, 0x4f, 0x8b, 0x08, 0x0b, 0x99, 0xd3, 0x45, + 0x60, 0x08, 0x0b, 0xa1, 0xd3, 0x46, 0x44, 0x08, 0x0b, 0x80, 0xc9, 0xac, + 0x33, 0x08, 0x0c, 0x31, 0xc9, 0xab, 0xfd, 0x08, 0x0c, 0x38, 0xc6, 0x00, + 0x91, 0x00, 0x1f, 0x89, 0xd2, 0x47, 0x5d, 0x00, 0x1f, 0xe0, 0xca, 0x37, + 0x4e, 0x01, 0x13, 0x99, 0xc5, 0x07, 0x62, 0x01, 0x13, 0x28, 0x4a, 0x33, + 0xad, 0x43, 0x03, 0x67, 0xe0, 0x02, 0x67, 0x01, 0x54, 0x60, 0x47, 0xc7, + 0x35, 0xc3, 0x03, 0x76, 0x50, 0x40, 0x1b, 0x43, 0x03, 0x82, 0xe0, 0x04, + 0x67, 0x01, 0x54, 0x90, 0x8e, 0x08, 0x9b, 0x18, 0x94, 0x08, 0x9a, 0x20, + 0x8e, 0x08, 0x98, 0x63, 0x03, 0x03, 0x88, 0x94, 0x08, 0x98, 0x5a, 0x03, + 0x03, 0x8c, 0xcf, 0x14, 0x31, 0x08, 0x9a, 0xf9, 0xc8, 0x14, 0x38, 0x08, + 0x9a, 0xf0, 0xc2, 0x00, 0xd0, 0x08, 0x99, 0x11, 0x83, 0x08, 0x99, 0x08, + 0xc2, 0x00, 0xd0, 0x08, 0x99, 0x01, 0x83, 0x08, 0x98, 0xf8, 0xcb, 0x1d, + 0x4b, 0x0f, 0xb0, 0x09, 0xc8, 0xb6, 0xc2, 0x0f, 0xc9, 0x48, 0x94, 0x00, + 0xe5, 0xa3, 0x03, 0x03, 0x90, 0x87, 0x00, 0xe5, 0x80, 0x94, 0x00, 0xe5, + 0x11, 0x90, 0x00, 0xe4, 0xb8, 0xc2, 0x00, 0xb1, 0x00, 0xe5, 0x69, 0xc2, + 0x00, 0x74, 0x00, 0xe5, 0x48, 0xc2, 0x00, 0x74, 0x00, 0x85, 0xc9, 0xc2, + 0x00, 0xb1, 0x00, 0x85, 0xe8, 0x87, 0x00, 0x86, 0x01, 0x94, 0x00, 0x86, + 0x20, 0x90, 0x00, 0x86, 0xb9, 0x94, 0x00, 0x87, 0x10, 0xc2, 0x00, 0x74, + 0x00, 0x87, 0x49, 0xc2, 0x00, 0xb1, 0x00, 0x87, 0x68, 0x87, 0x00, 0x87, + 0x81, 0x94, 0x00, 0x87, 0xa2, 0x03, 0x03, 0x96, 0xc2, 0x00, 0x74, 0x01, + 0x68, 0xc9, 0xc2, 0x00, 0xb1, 0x01, 0x68, 0xe8, 0x87, 0x01, 0x69, 0x01, + 0x94, 0x01, 0x69, 0x20, 0xc3, 0x00, 0x15, 0x01, 0x60, 0x29, 0x14, 0x43, + 0x03, 0x9c, 0x87, 0x01, 0x60, 0x49, 0xc4, 0x7a, 0xc3, 0x01, 0x61, 0x58, + 0xc9, 0xae, 0x6a, 0x01, 0x61, 0x39, 0xc7, 0xc8, 0x00, 0x01, 0x61, 0x48, + 0xc2, 0x01, 0x6f, 0x01, 0x60, 0xdb, 0x03, 0x03, 0xa4, 0x83, 0x01, 0x60, + 0xf0, 0xca, 0xa5, 0x12, 0x01, 0x61, 0x28, 0xc3, 0x00, 0x15, 0x01, 0x61, + 0xa9, 0x14, 0x43, 0x03, 0xaa, 0x87, 0x01, 0x61, 0xc9, 0xc4, 0x7a, 0xc3, + 0x01, 0x62, 0xd8, 0xc9, 0xae, 0x6a, 0x01, 0x62, 0xb9, 0xc7, 0xc8, 0x00, + 0x01, 0x62, 0xc8, 0xc2, 0x01, 0x6f, 0x01, 0x62, 0x5b, 0x03, 0x03, 0xb2, + 0x83, 0x01, 0x62, 0x70, 0xca, 0xa5, 0x12, 0x01, 0x62, 0xa8, 0x94, 0x00, + 0x58, 0x5b, 0x03, 0x03, 0xb8, 0x8e, 0x00, 0x58, 0x62, 0x03, 0x03, 0xbc, + 0x83, 0x00, 0x58, 0xf9, 0xc2, 0x00, 0xd0, 0x00, 0x59, 0x00, 0x83, 0x00, + 0x59, 0x09, 0xc2, 0x00, 0xd0, 0x00, 0x59, 0x10, 0x94, 0x00, 0x5a, 0x20, + 0x8e, 0x00, 0x5b, 0x18, 0x00, 0x43, 0x03, 0xc0, 0xc9, 0x57, 0x20, 0x0f, + 0x69, 0x38, 0x00, 0x43, 0x03, 0xcc, 0xc9, 0x57, 0x20, 0x0f, 0x69, 0x30, + 0x00, 0x43, 0x03, 0xd8, 0xc9, 0x57, 0x20, 0x0f, 0x69, 0x40, 0x00, 0x43, + 0x03, 0xe4, 0xc9, 0x57, 0x20, 0x0f, 0x69, 0x48, 0xc9, 0x57, 0x20, 0x0f, + 0x69, 0x50, 0xc7, 0x0d, 0x04, 0x0f, 0x68, 0xc1, 0xc8, 0x4b, 0x94, 0x0f, + 0x69, 0x08, 0xc9, 0x57, 0x20, 0x0f, 0x69, 0x58, 0xc7, 0x0d, 0x04, 0x0f, + 0x68, 0xc9, 0xc8, 0x4b, 0x94, 0x0f, 0x69, 0x10, 0xc4, 0x02, 0xa3, 0x08, + 0x7c, 0x41, 0xc4, 0x10, 0xa0, 0x08, 0x7c, 0x38, 0xc5, 0x05, 0x02, 0x08, + 0x7c, 0x29, 0xc5, 0x00, 0xd4, 0x08, 0x7c, 0x20, 0xc5, 0x05, 0x02, 0x08, + 0x7c, 0x19, 0xc5, 0x00, 0xd4, 0x08, 0x7c, 0x10, 0xc3, 0x26, 0x1a, 0x08, + 0x7c, 0x09, 0xc5, 0xcf, 0xd8, 0x08, 0x7b, 0xc0, 0x03, 0xc3, 0x03, 0xf0, + 0xc3, 0x11, 0xef, 0x08, 0x7b, 0xf8, 0xc3, 0x05, 0x14, 0x08, 0x78, 0xeb, + 0x03, 0x03, 0xfc, 0x16, 0xc3, 0x04, 0x02, 0x08, 0x43, 0x04, 0x10, 0x46, + 0x02, 0x0f, 0xc3, 0x04, 0x1c, 0xd3, 0x40, 0x8d, 0x08, 0x79, 0x38, 0xce, + 0x70, 0x26, 0x08, 0x53, 0xf9, 0x44, 0x00, 0x51, 0x43, 0x04, 0x7b, 0x16, + 0xc3, 0x04, 0x87, 0xc4, 0x4a, 0xb9, 0x08, 0x53, 0xd1, 0x06, 0xc3, 0x04, + 0x97, 0xc4, 0xdf, 0x07, 0x08, 0x53, 0xc1, 0x09, 0xc3, 0x04, 0xa3, 0xc4, + 0xe3, 0x27, 0x08, 0x53, 0x41, 0xc4, 0x5d, 0xe2, 0x08, 0x53, 0x39, 0x15, + 0xc3, 0x04, 0xaf, 0xc3, 0x7e, 0x89, 0x08, 0x53, 0x29, 0xc4, 0xb9, 0x7e, + 0x08, 0x53, 0x21, 0xc3, 0x00, 0x4e, 0x08, 0x53, 0x19, 0xc2, 0x01, 0x7f, + 0x08, 0x53, 0x03, 0x03, 0x04, 0xb9, 0xc6, 0xcf, 0xd7, 0x08, 0x53, 0x09, + 0x0d, 0xc3, 0x04, 0xbf, 0xc3, 0x20, 0x18, 0x08, 0x53, 0x61, 0xc2, 0x14, + 0xda, 0x08, 0x53, 0x81, 0x03, 0x43, 0x04, 0xcb, 0xc2, 0x00, 0x5f, 0x08, + 0x67, 0xd9, 0xc3, 0x45, 0x6b, 0x08, 0x67, 0xe8, 0x00, 0x43, 0x04, 0xd7, + 0x95, 0x08, 0x67, 0x91, 0x97, 0x08, 0x67, 0x59, 0xc2, 0x1e, 0xd5, 0x08, + 0x66, 0xa8, 0x90, 0x08, 0x66, 0xcb, 0x03, 0x04, 0xe3, 0x9c, 0x08, 0x67, + 0x7b, 0x03, 0x04, 0xf2, 0x98, 0x08, 0x67, 0x71, 0x85, 0x08, 0x66, 0x23, + 0x03, 0x04, 0xf6, 0x96, 0x08, 0x67, 0x33, 0x03, 0x04, 0xfe, 0x95, 0x08, + 0x67, 0x23, 0x03, 0x05, 0x02, 0x8f, 0x08, 0x66, 0xc3, 0x03, 0x05, 0x06, + 0x8e, 0x08, 0x66, 0xb3, 0x03, 0x05, 0x0a, 0x8d, 0x08, 0x66, 0x99, 0x8c, + 0x08, 0x66, 0x91, 0x8a, 0x08, 0x66, 0x79, 0x89, 0x08, 0x66, 0x6b, 0x03, + 0x05, 0x0e, 0x88, 0x08, 0x66, 0x61, 0x87, 0x08, 0x66, 0x59, 0x86, 0x08, + 0x66, 0x39, 0x84, 0x08, 0x66, 0x11, 0x92, 0x08, 0x67, 0x01, 0x94, 0x08, + 0x67, 0x10, 0xc2, 0x0f, 0xe1, 0x08, 0x67, 0x69, 0xc2, 0x49, 0x0c, 0x08, + 0x66, 0xf0, 0xc2, 0x0f, 0xe1, 0x08, 0x67, 0x61, 0xc2, 0x49, 0x0c, 0x08, + 0x66, 0xe8, 0x91, 0x08, 0x66, 0xe1, 0xc2, 0x02, 0xe0, 0x08, 0x66, 0xf8, + 0x8d, 0x08, 0x66, 0xa1, 0xc2, 0x02, 0x35, 0x08, 0x66, 0x41, 0xc2, 0x00, + 0x50, 0x08, 0x66, 0x19, 0x83, 0x08, 0x66, 0x08, 0x8b, 0x08, 0x66, 0x88, + 0x90, 0x08, 0x64, 0xcb, 0x03, 0x05, 0x12, 0x96, 0x08, 0x65, 0x33, 0x03, + 0x05, 0x21, 0x95, 0x08, 0x65, 0x23, 0x03, 0x05, 0x25, 0x92, 0x08, 0x65, + 0x01, 0x8f, 0x08, 0x64, 0xc3, 0x03, 0x05, 0x29, 0x8e, 0x08, 0x64, 0xb3, + 0x03, 0x05, 0x2d, 0x8d, 0x08, 0x64, 0x99, 0x8c, 0x08, 0x64, 0x91, 0x8a, + 0x08, 0x64, 0x79, 0x89, 0x08, 0x64, 0x6b, 0x03, 0x05, 0x31, 0x88, 0x08, + 0x64, 0x61, 0x87, 0x08, 0x64, 0x59, 0x86, 0x08, 0x64, 0x39, 0x85, 0x08, + 0x64, 0x23, 0x03, 0x05, 0x35, 0x84, 0x08, 0x64, 0x11, 0x94, 0x08, 0x65, + 0x11, 0x98, 0x08, 0x65, 0x71, 0x9c, 0x08, 0x65, 0x7a, 0x03, 0x05, 0x3d, + 0xc2, 0x02, 0xe0, 0x08, 0x64, 0xf9, 0x91, 0x08, 0x64, 0xe0, 0xc2, 0x49, + 0x0c, 0x08, 0x64, 0xf1, 0xc2, 0x0f, 0xe1, 0x08, 0x65, 0x68, 0xc2, 0x49, + 0x0c, 0x08, 0x64, 0xe9, 0xc2, 0x0f, 0xe1, 0x08, 0x65, 0x60, 0xc2, 0x1e, + 0xd5, 0x08, 0x64, 0xa9, 0x97, 0x08, 0x65, 0x59, 0x95, 0x08, 0x65, 0x90, + 0x8d, 0x08, 0x64, 0xa1, 0xc2, 0x02, 0x35, 0x08, 0x64, 0x41, 0xc2, 0x00, + 0x50, 0x08, 0x64, 0x19, 0x83, 0x08, 0x64, 0x08, 0x8b, 0x08, 0x64, 0x88, + 0x96, 0x08, 0x62, 0x39, 0x93, 0x08, 0x61, 0xc1, 0x87, 0x08, 0x60, 0x3b, + 0x03, 0x05, 0x41, 0x92, 0x08, 0x61, 0x80, 0x07, 0xc3, 0x05, 0x45, 0x96, + 0x08, 0x62, 0x19, 0x95, 0x08, 0x61, 0xeb, 0x03, 0x05, 0x6d, 0x94, 0x08, + 0x61, 0xd1, 0x93, 0x08, 0x61, 0xa1, 0x90, 0x08, 0x61, 0x19, 0x8e, 0x08, + 0x60, 0xf1, 0x9b, 0x08, 0x60, 0xb1, 0x86, 0x08, 0x60, 0x89, 0x89, 0x08, + 0x60, 0x69, 0x84, 0x08, 0x60, 0x48, 0xc2, 0x01, 0xe2, 0x08, 0x62, 0x09, + 0x10, 0xc3, 0x05, 0x71, 0x8f, 0x08, 0x61, 0x11, 0xc2, 0x00, 0x72, 0x08, + 0x61, 0x09, 0x9c, 0x08, 0x60, 0xa1, 0x92, 0x08, 0x61, 0x79, 0x85, 0x08, + 0x61, 0x90, 0x93, 0x08, 0x61, 0xb1, 0x85, 0x08, 0x61, 0x88, 0x87, 0x08, + 0x60, 0x13, 0x03, 0x05, 0x81, 0x96, 0x08, 0x62, 0x21, 0xc2, 0x01, 0xe2, + 0x08, 0x62, 0x01, 0x94, 0x08, 0x61, 0xd9, 0x93, 0x08, 0x61, 0xa9, 0x8e, + 0x08, 0x60, 0xf9, 0x9b, 0x08, 0x60, 0xb9, 0x86, 0x08, 0x60, 0x91, 0x89, + 0x08, 0x60, 0x71, 0x84, 0x08, 0x60, 0x51, 0xc2, 0x00, 0x75, 0x08, 0x61, + 0x60, 0xc2, 0x01, 0xe2, 0x08, 0x62, 0x11, 0x85, 0x08, 0x61, 0x99, 0x10, + 0xc3, 0x05, 0x9c, 0x9c, 0x08, 0x60, 0xa8, 0x93, 0x08, 0x61, 0xc9, 0x87, + 0x08, 0x60, 0x42, 0x03, 0x05, 0xa8, 0x93, 0x08, 0x61, 0xb8, 0xc5, 0x28, + 0xee, 0x08, 0x54, 0xf9, 0xc2, 0x00, 0xc4, 0x08, 0x54, 0xf0, 0x8a, 0x08, + 0x54, 0xe1, 0xc2, 0x00, 0x74, 0x08, 0x54, 0xc0, 0x0a, 0xc3, 0x05, 0xac, + 0xc2, 0x02, 0x98, 0x08, 0x54, 0xb9, 0xc2, 0x16, 0x1c, 0x08, 0x54, 0x48, + 0xc4, 0x92, 0x76, 0x08, 0x54, 0xb1, 0xc3, 0x12, 0xc2, 0x08, 0x54, 0xa0, + 0x8e, 0x08, 0x54, 0xa9, 0x86, 0x08, 0x54, 0x98, 0x9f, 0x08, 0x54, 0x31, + 0x9e, 0x08, 0x54, 0x51, 0xa0, 0x08, 0x54, 0x78, 0xc2, 0x02, 0x98, 0x08, + 0x54, 0x11, 0xc2, 0x16, 0x1c, 0x08, 0x54, 0x00, 0xc2, 0x0f, 0x9b, 0x08, + 0x54, 0x59, 0xc3, 0x14, 0x4b, 0x08, 0x54, 0x68, 0xc3, 0x00, 0x49, 0x08, + 0x54, 0x89, 0xc2, 0x00, 0x74, 0x08, 0x54, 0x90, 0x45, 0x00, 0xba, 0xc3, + 0x05, 0xb8, 0xcc, 0x1d, 0xc7, 0x08, 0x1e, 0x81, 0x47, 0x34, 0x2f, 0x43, + 0x06, 0x21, 0xc2, 0x00, 0x82, 0x08, 0x1a, 0x99, 0x1c, 0x43, 0x06, 0x31, + 0x88, 0x08, 0x1b, 0x58, 0xc3, 0xd3, 0x4c, 0x08, 0x1a, 0xa9, 0xc3, 0x13, + 0x4e, 0x08, 0x1a, 0xb8, 0x87, 0x08, 0x1b, 0x91, 0x83, 0x08, 0x1b, 0xa8, + 0xc3, 0xc1, 0x4b, 0x08, 0x1a, 0xf1, 0xc2, 0x00, 0x29, 0x08, 0x1b, 0x70, + 0xc2, 0x25, 0x9f, 0x08, 0x1b, 0x09, 0x0a, 0x43, 0x06, 0x3d, 0xc2, 0x02, + 0xfa, 0x08, 0x1b, 0x11, 0xc3, 0xc5, 0xef, 0x08, 0x1b, 0x68, 0xc2, 0x14, + 0x98, 0x08, 0x1b, 0x39, 0xc2, 0x00, 0x29, 0x08, 0x1b, 0x7b, 0x03, 0x06, + 0x49, 0x83, 0x08, 0x1b, 0xa3, 0x03, 0x06, 0x51, 0x97, 0x08, 0x1b, 0x98, + 0x91, 0x08, 0x1b, 0x50, 0x87, 0x08, 0x18, 0x11, 0x83, 0x08, 0x18, 0x51, + 0x97, 0x08, 0x18, 0x91, 0xc2, 0x01, 0x7f, 0x08, 0x18, 0xc8, 0x8e, 0x08, + 0x18, 0x59, 0x8f, 0x08, 0x18, 0x61, 0x90, 0x08, 0x18, 0x69, 0x95, 0x08, + 0x18, 0xa3, 0x03, 0x06, 0x55, 0x94, 0x08, 0x18, 0x9b, 0x03, 0x06, 0x5d, + 0xc2, 0x01, 0x26, 0x08, 0x18, 0xb9, 0x88, 0x08, 0x18, 0xd0, 0xc2, 0x01, + 0x7f, 0x08, 0x18, 0x41, 0x87, 0x08, 0x18, 0xa8, 0x8b, 0x08, 0x18, 0xe8, + 0x87, 0x08, 0x18, 0x81, 0xc2, 0x01, 0x7f, 0x08, 0x18, 0xc0, 0xc2, 0x01, + 0x7f, 0x08, 0x18, 0x89, 0xcb, 0x97, 0x3a, 0x08, 0x19, 0x78, 0x97, 0x08, + 0x18, 0xf0, 0xc2, 0x00, 0x5f, 0x08, 0x19, 0x09, 0xc3, 0x45, 0x6b, 0x08, + 0x19, 0x18, 0x83, 0x08, 0x26, 0x49, 0xc2, 0x0d, 0xf6, 0x08, 0x26, 0x58, + 0x83, 0x08, 0x27, 0x89, 0xc2, 0x0d, 0xf6, 0x08, 0x27, 0x98, 0x4b, 0x8b, + 0x36, 0xc3, 0x06, 0x61, 0xd2, 0x4e, 0x41, 0x0e, 0x7d, 0x90, 0x42, 0x14, + 0x98, 0xc3, 0x06, 0x6d, 0x46, 0x87, 0x3b, 0x43, 0x06, 0x7c, 0x45, 0xdc, + 0x3b, 0xc3, 0x06, 0x88, 0xce, 0x6e, 0x9e, 0x0e, 0x7c, 0xd0, 0x11, 0xc3, + 0x06, 0x9a, 0xc4, 0x69, 0xaa, 0x0e, 0x7d, 0x12, 0x03, 0x06, 0xac, 0x11, + 0xc3, 0x06, 0xb2, 0xc3, 0x2d, 0x1d, 0x0e, 0x7c, 0xda, 0x03, 0x06, 0xc1, + 0x11, 0xc3, 0x06, 0xc7, 0xc7, 0xbf, 0x6b, 0x0e, 0x7c, 0x90, 0xce, 0x69, + 0xa0, 0x0e, 0x7c, 0x89, 0x42, 0x00, 0x97, 0xc3, 0x06, 0xd3, 0xc9, 0xaa, + 0x83, 0x0e, 0x7c, 0x5a, 0x03, 0x06, 0xf1, 0xd4, 0x38, 0x54, 0x0e, 0x7a, + 0xd1, 0xc8, 0xbc, 0x2a, 0x0e, 0x7a, 0xb8, 0xc7, 0x78, 0xdb, 0x0e, 0x7c, + 0x01, 0xc8, 0x94, 0x9e, 0x0e, 0x7b, 0xf0, 0xc7, 0x6d, 0xa9, 0x0e, 0x7b, + 0xc1, 0xc8, 0x4e, 0x4b, 0x0e, 0x7b, 0xb0, 0xc5, 0x00, 0x2c, 0x0e, 0x78, + 0x71, 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x10, 0xd5, 0x35, 0xf3, 0x0e, 0x79, + 0xb8, 0xc6, 0x42, 0x68, 0x0e, 0x78, 0xe1, 0x42, 0x00, 0xe7, 0x43, 0x06, + 0xf7, 0xc5, 0x00, 0x2c, 0x0e, 0x78, 0x99, 0xc4, 0x00, 0x49, 0x0e, 0x78, + 0x38, 0xc5, 0x00, 0x2c, 0x0e, 0x78, 0x79, 0xc4, 0x00, 0x49, 0x0e, 0x78, + 0x18, 0xc5, 0x00, 0x2c, 0x0e, 0x78, 0x61, 0xc4, 0x00, 0x49, 0x0e, 0x78, + 0x00, 0xc6, 0x78, 0xdc, 0x0e, 0x78, 0xc9, 0x4b, 0x8e, 0xfa, 0x43, 0x07, + 0x03, 0xc5, 0x00, 0x2c, 0x0e, 0x78, 0xb9, 0xc4, 0x00, 0x49, 0x0e, 0x78, + 0x58, 0xc5, 0xb3, 0x3f, 0x05, 0x4e, 0x58, 0xc4, 0xad, 0x29, 0x05, 0x4e, + 0x49, 0xc3, 0x06, 0x47, 0x05, 0x4e, 0x28, 0xc8, 0x60, 0x28, 0x05, 0x4d, + 0x81, 0xc4, 0x60, 0x22, 0x05, 0x4d, 0x78, 0xc5, 0x60, 0x21, 0x05, 0x4d, + 0x31, 0xc5, 0xdc, 0x68, 0x05, 0x4c, 0x48, 0xc6, 0xcb, 0x27, 0x05, 0x4c, + 0xf8, 0xc6, 0xcb, 0x27, 0x05, 0x4c, 0xc0, 0xc6, 0xcb, 0x45, 0x05, 0x4c, + 0x52, 0x03, 0x07, 0x0f, 0xca, 0x60, 0x26, 0x05, 0x4d, 0x18, 0xca, 0x60, + 0x26, 0x05, 0x4c, 0xf0, 0xc6, 0xcb, 0x27, 0x05, 0x4d, 0x08, 0xca, 0x60, + 0x26, 0x05, 0x4c, 0xe0, 0xc5, 0x60, 0x21, 0x05, 0x4c, 0x89, 0xc5, 0x91, + 0x73, 0x05, 0x4c, 0x80, 0xd0, 0x3d, 0x58, 0x01, 0x2c, 0xf8, 0x56, 0x2c, + 0x44, 0xc3, 0x07, 0x15, 0x46, 0x01, 0xc8, 0x43, 0x07, 0x21, 0x9a, 0x05, + 0x22, 0xd1, 0x96, 0x05, 0x22, 0xc9, 0x91, 0x05, 0x22, 0x9b, 0x03, 0x07, + 0x2d, 0x92, 0x05, 0x22, 0xe0, 0x92, 0x05, 0x22, 0xc1, 0x9a, 0x05, 0x22, + 0xb1, 0x96, 0x05, 0x22, 0xa8, 0x87, 0x05, 0x22, 0x83, 0x03, 0x07, 0x35, + 0x92, 0x05, 0x22, 0x69, 0x9a, 0x05, 0x22, 0x59, 0x96, 0x05, 0x22, 0x50, + 0x94, 0x05, 0x22, 0x4b, 0x03, 0x07, 0x41, 0x92, 0x05, 0x22, 0x39, 0x9a, + 0x05, 0x22, 0x29, 0x96, 0x05, 0x22, 0x20, 0x94, 0x05, 0x22, 0x1b, 0x03, + 0x07, 0x45, 0x92, 0x05, 0x22, 0x09, 0x9a, 0x05, 0x21, 0xf9, 0x91, 0x05, + 0x21, 0xd2, 0x03, 0x07, 0x49, 0x92, 0x05, 0x21, 0xf1, 0x9a, 0x05, 0x21, + 0xe1, 0x96, 0x05, 0x21, 0xd8, 0x87, 0x05, 0x21, 0xbb, 0x03, 0x07, 0x4d, + 0x92, 0x05, 0x21, 0xa1, 0x9a, 0x05, 0x21, 0x91, 0x96, 0x05, 0x21, 0x88, + 0x94, 0x05, 0x1d, 0x53, 0x03, 0x07, 0x59, 0x92, 0x05, 0x1d, 0x41, 0x9a, + 0x05, 0x1d, 0x31, 0x96, 0x05, 0x1d, 0x28, 0x94, 0x05, 0x1d, 0x23, 0x03, + 0x07, 0x5d, 0x92, 0x05, 0x1d, 0x11, 0x9a, 0x05, 0x1d, 0x01, 0x96, 0x05, + 0x1c, 0xf8, 0x92, 0x05, 0x1c, 0xf1, 0x9a, 0x05, 0x1c, 0xe1, 0x96, 0x05, + 0x1c, 0xd8, 0x92, 0x05, 0x1c, 0xd1, 0x9a, 0x05, 0x1c, 0xc0, 0x92, 0x05, + 0x1c, 0xb9, 0x9a, 0x05, 0x1c, 0xa9, 0x96, 0x05, 0x1c, 0xa0, 0x9a, 0x05, + 0x12, 0xe9, 0x92, 0x05, 0x12, 0xf8, 0x96, 0x05, 0x13, 0x01, 0x9a, 0x05, + 0x13, 0x09, 0x92, 0x05, 0x13, 0x18, 0x96, 0x05, 0x13, 0x21, 0x9a, 0x05, + 0x13, 0x28, 0x96, 0x05, 0x13, 0x39, 0x9a, 0x05, 0x13, 0x41, 0x92, 0x05, + 0x13, 0x50, 0x9a, 0x05, 0x13, 0x59, 0x92, 0x05, 0x13, 0x68, 0x96, 0x05, + 0x17, 0x69, 0x9a, 0x05, 0x17, 0x71, 0x92, 0x05, 0x17, 0x81, 0x87, 0x05, + 0x17, 0x92, 0x03, 0x07, 0x61, 0x96, 0x05, 0x17, 0xa1, 0x9a, 0x05, 0x17, + 0xa9, 0x92, 0x05, 0x17, 0xb8, 0x91, 0x05, 0x17, 0xcb, 0x03, 0x07, 0x69, + 0x9a, 0x05, 0x17, 0xf1, 0x92, 0x05, 0x18, 0x01, 0x94, 0x05, 0x18, 0x12, + 0x03, 0x07, 0x6d, 0x9a, 0x05, 0x17, 0xd1, 0x92, 0x05, 0x17, 0xd8, 0x9a, + 0x05, 0x17, 0xe0, 0x96, 0x05, 0x18, 0x19, 0x9a, 0x05, 0x18, 0x21, 0x92, + 0x05, 0x18, 0x31, 0x94, 0x05, 0x18, 0x42, 0x03, 0x07, 0x71, 0x96, 0x05, + 0x03, 0xb1, 0x9a, 0x05, 0x03, 0xb9, 0x92, 0x05, 0x03, 0xc9, 0x87, 0x05, + 0x03, 0xda, 0x03, 0x07, 0x75, 0x96, 0x05, 0x03, 0xe9, 0x9a, 0x05, 0x03, + 0xf1, 0x92, 0x05, 0x04, 0x00, 0x96, 0x05, 0x04, 0x09, 0x9a, 0x05, 0x04, + 0x11, 0x92, 0x05, 0x04, 0x20, 0x96, 0x05, 0x04, 0x29, 0x9a, 0x05, 0x04, + 0x31, 0x92, 0x05, 0x04, 0x41, 0x94, 0x05, 0x04, 0x52, 0x03, 0x07, 0x7d, + 0x96, 0x05, 0x04, 0x59, 0x9a, 0x05, 0x04, 0x61, 0x92, 0x05, 0x04, 0x71, + 0x94, 0x05, 0x04, 0x82, 0x03, 0x07, 0x81, 0x96, 0x05, 0x0a, 0x89, 0x9a, + 0x05, 0x0a, 0x91, 0x92, 0x05, 0x0a, 0xa1, 0x87, 0x05, 0x0a, 0xb2, 0x03, + 0x07, 0x85, 0x96, 0x05, 0x0a, 0xb9, 0x9a, 0x05, 0x0a, 0xc1, 0x92, 0x05, + 0x0a, 0xd0, 0x96, 0x05, 0x0a, 0xd9, 0x9a, 0x05, 0x0a, 0xe1, 0x92, 0x05, + 0x0a, 0xf0, 0x9a, 0x05, 0x0a, 0xf9, 0x92, 0x05, 0x0b, 0x08, 0x96, 0x05, + 0x0b, 0x11, 0x9a, 0x05, 0x0b, 0x19, 0x92, 0x05, 0x0b, 0x29, 0x94, 0x05, + 0x0b, 0x3a, 0x03, 0x07, 0x89, 0x9a, 0x05, 0x0c, 0x09, 0x92, 0x05, 0x0c, + 0x18, 0x9a, 0x05, 0x0c, 0x39, 0x92, 0x05, 0x0c, 0x48, 0x9a, 0x05, 0x0c, + 0xa8, 0x92, 0x05, 0x21, 0x81, 0x9a, 0x05, 0x21, 0x71, 0x96, 0x05, 0x21, + 0x68, 0x94, 0x05, 0x21, 0x63, 0x03, 0x07, 0x8d, 0x92, 0x05, 0x21, 0x51, + 0x9a, 0x05, 0x21, 0x41, 0x96, 0x05, 0x21, 0x39, 0x91, 0x05, 0x21, 0x0a, + 0x03, 0x07, 0x91, 0x92, 0x05, 0x21, 0x31, 0x9a, 0x05, 0x21, 0x21, 0x96, + 0x05, 0x21, 0x18, 0x87, 0x05, 0x20, 0xf3, 0x03, 0x07, 0x99, 0x92, 0x05, + 0x20, 0xd9, 0x9a, 0x05, 0x20, 0xc9, 0x96, 0x05, 0x20, 0xc0, 0x9a, 0x05, + 0x1f, 0xd0, 0x9a, 0x05, 0x1f, 0xa0, 0x92, 0x05, 0x1f, 0x59, 0x9a, 0x05, + 0x1f, 0x49, 0x96, 0x05, 0x1f, 0x40, 0x92, 0x05, 0x1f, 0x39, 0x9a, 0x05, + 0x1f, 0x29, 0x96, 0x05, 0x1f, 0x20, 0x9a, 0x05, 0x20, 0xb0, 0x9a, 0x05, + 0x20, 0x80, 0x92, 0x05, 0x20, 0x39, 0x9a, 0x05, 0x20, 0x29, 0x96, 0x05, + 0x20, 0x20, 0x92, 0x05, 0x20, 0x19, 0x9a, 0x05, 0x20, 0x09, 0x96, 0x05, + 0x20, 0x00, 0x9a, 0x05, 0x1e, 0xf0, 0x9a, 0x05, 0x1e, 0xc0, 0x92, 0x05, + 0x1e, 0x61, 0x9a, 0x05, 0x1e, 0x50, 0x94, 0x05, 0x1c, 0x9b, 0x03, 0x07, + 0xa5, 0x92, 0x05, 0x1c, 0x89, 0x9a, 0x05, 0x1c, 0x79, 0x96, 0x05, 0x1c, + 0x70, 0x94, 0x05, 0x1c, 0x6b, 0x03, 0x07, 0xa9, 0x92, 0x05, 0x1c, 0x59, + 0x9a, 0x05, 0x1c, 0x49, 0x96, 0x05, 0x1c, 0x41, 0x91, 0x05, 0x1b, 0xfa, + 0x03, 0x07, 0xad, 0x92, 0x05, 0x1c, 0x39, 0x9a, 0x05, 0x1c, 0x29, 0x96, + 0x05, 0x1c, 0x20, 0x92, 0x05, 0x1c, 0x19, 0x9a, 0x05, 0x1c, 0x09, 0x96, + 0x05, 0x1c, 0x00, 0x9a, 0x05, 0x1b, 0xe0, 0x94, 0x05, 0x1e, 0x2b, 0x03, + 0x07, 0xb1, 0x92, 0x05, 0x1e, 0x19, 0x9a, 0x05, 0x1e, 0x09, 0x96, 0x05, + 0x1e, 0x00, 0x94, 0x05, 0x1d, 0xfb, 0x03, 0x07, 0xb5, 0x92, 0x05, 0x1d, + 0xe9, 0x9a, 0x05, 0x1d, 0xd9, 0x96, 0x05, 0x1d, 0xd1, 0x91, 0x05, 0x1d, + 0x82, 0x03, 0x07, 0xb9, 0x92, 0x05, 0x1d, 0xc9, 0x9a, 0x05, 0x1d, 0xb9, + 0x96, 0x05, 0x1d, 0xb0, 0x92, 0x05, 0x1d, 0xa9, 0x9a, 0x05, 0x1d, 0x99, + 0x96, 0x05, 0x1d, 0x90, 0x92, 0x05, 0x1d, 0x71, 0x9a, 0x05, 0x1d, 0x61, + 0x96, 0x05, 0x1d, 0x58, 0x92, 0x05, 0x1a, 0x19, 0x94, 0x05, 0x1a, 0x2b, + 0x03, 0x07, 0xc1, 0x96, 0x05, 0x1a, 0x01, 0x9a, 0x05, 0x1a, 0x08, 0x96, + 0x05, 0x19, 0x51, 0x9a, 0x05, 0x19, 0x59, 0x92, 0x05, 0x19, 0x69, 0x87, + 0x05, 0x19, 0x7a, 0x03, 0x07, 0xc5, 0x96, 0x05, 0x19, 0x89, 0x9a, 0x05, + 0x19, 0x91, 0x92, 0x05, 0x19, 0xa0, 0x96, 0x05, 0x19, 0xa9, 0x9a, 0x05, + 0x19, 0xb1, 0x92, 0x05, 0x19, 0xc0, 0x9a, 0x05, 0x19, 0xc9, 0x92, 0x05, + 0x19, 0xd8, 0x96, 0x05, 0x19, 0xe1, 0x9a, 0x05, 0x19, 0xe9, 0x92, 0x05, + 0x19, 0xf8, 0x9a, 0x05, 0x18, 0x69, 0x92, 0x05, 0x18, 0x78, 0x9a, 0x05, + 0x18, 0xa1, 0x92, 0x05, 0x18, 0xb0, 0x9a, 0x05, 0x19, 0x10, 0x9a, 0x05, + 0x19, 0x40, 0x94, 0x05, 0x1b, 0xdb, 0x03, 0x07, 0xcd, 0x92, 0x05, 0x1b, + 0xc9, 0x9a, 0x05, 0x1b, 0xb9, 0x96, 0x05, 0x1b, 0xb0, 0x94, 0x05, 0x1b, + 0xab, 0x03, 0x07, 0xd1, 0x92, 0x05, 0x1b, 0x99, 0x9a, 0x05, 0x1b, 0x89, + 0x91, 0x05, 0x1b, 0x3a, 0x03, 0x07, 0xd5, 0x92, 0x05, 0x1b, 0x81, 0x9a, + 0x05, 0x1b, 0x71, 0x96, 0x05, 0x1b, 0x68, 0x92, 0x05, 0x1b, 0x61, 0x9a, + 0x05, 0x1b, 0x51, 0x96, 0x05, 0x1b, 0x48, 0x92, 0x05, 0x1b, 0x21, 0x96, + 0x05, 0x1b, 0x09, 0x9a, 0x05, 0x1b, 0x10, 0x9a, 0x05, 0x16, 0x70, 0x9a, + 0x05, 0x16, 0x40, 0x9a, 0x05, 0x15, 0xd1, 0x92, 0x05, 0x15, 0xe0, 0x96, + 0x05, 0x15, 0x91, 0x9a, 0x05, 0x15, 0x99, 0x92, 0x05, 0x15, 0xa8, 0x92, + 0x05, 0x14, 0xd1, 0x9a, 0x05, 0x14, 0xc0, 0x9a, 0x05, 0x15, 0x30, 0x92, + 0x05, 0x14, 0x99, 0x9a, 0x05, 0x14, 0x88, 0x9a, 0x05, 0x16, 0xe1, 0x92, + 0x05, 0x16, 0xf0, 0x92, 0x05, 0x16, 0xd9, 0x9a, 0x05, 0x16, 0xc9, 0x96, + 0x05, 0x16, 0xc0, 0x87, 0x05, 0x16, 0xb3, 0x03, 0x07, 0xe1, 0x92, 0x05, + 0x16, 0x99, 0x9a, 0x05, 0x16, 0x89, 0x96, 0x05, 0x16, 0x80, 0x9a, 0x05, + 0x16, 0xf9, 0x92, 0x05, 0x17, 0x08, 0x9a, 0x05, 0x17, 0x11, 0x92, 0x05, + 0x17, 0x21, 0x94, 0x05, 0x17, 0x32, 0x03, 0x07, 0xed, 0x96, 0x05, 0x17, + 0x39, 0x9a, 0x05, 0x17, 0x41, 0x92, 0x05, 0x17, 0x51, 0x94, 0x05, 0x17, + 0x62, 0x03, 0x07, 0xf1, 0x94, 0x05, 0x1b, 0x03, 0x03, 0x07, 0xf5, 0x92, + 0x05, 0x1a, 0xf1, 0x9a, 0x05, 0x1a, 0xe1, 0x96, 0x05, 0x1a, 0xd8, 0x94, + 0x05, 0x1a, 0xd3, 0x03, 0x07, 0xf9, 0x92, 0x05, 0x1a, 0xc1, 0x9a, 0x05, + 0x1a, 0xb1, 0x96, 0x05, 0x1a, 0xa9, 0x91, 0x05, 0x1a, 0x5a, 0x03, 0x07, + 0xfd, 0x92, 0x05, 0x1a, 0xa1, 0x9a, 0x05, 0x1a, 0x91, 0x96, 0x05, 0x1a, + 0x88, 0x92, 0x05, 0x1a, 0x81, 0x96, 0x05, 0x1a, 0x69, 0x9a, 0x05, 0x1a, + 0x70, 0x96, 0x05, 0x1a, 0x31, 0x9a, 0x05, 0x1a, 0x39, 0x92, 0x05, 0x1a, + 0x48, 0x9a, 0x05, 0x15, 0x60, 0x92, 0x05, 0x14, 0x31, 0x9a, 0x05, 0x14, + 0x20, 0x92, 0x05, 0x12, 0xe1, 0x9a, 0x05, 0x12, 0xd0, 0x92, 0x05, 0x12, + 0xc9, 0x9a, 0x05, 0x12, 0xb9, 0x96, 0x05, 0x12, 0xb0, 0x92, 0x05, 0x12, + 0xa9, 0x9a, 0x05, 0x12, 0x99, 0x96, 0x05, 0x12, 0x90, 0x92, 0x05, 0x12, + 0x89, 0x9a, 0x05, 0x12, 0x79, 0x96, 0x05, 0x12, 0x70, 0x96, 0x05, 0x12, + 0x31, 0x9a, 0x05, 0x12, 0x39, 0x92, 0x05, 0x12, 0x49, 0x87, 0x05, 0x12, + 0x62, 0x03, 0x08, 0x05, 0x9a, 0x05, 0x13, 0x78, 0x96, 0x05, 0x04, 0x89, + 0x9a, 0x05, 0x04, 0x91, 0x92, 0x05, 0x04, 0xa1, 0x87, 0x05, 0x04, 0xb2, + 0x03, 0x08, 0x11, 0x96, 0x05, 0x04, 0xc1, 0x9a, 0x05, 0x04, 0xc9, 0x92, + 0x05, 0x04, 0xd8, 0x91, 0x05, 0x04, 0xeb, 0x03, 0x08, 0x19, 0x96, 0x05, + 0x05, 0x19, 0x9a, 0x05, 0x05, 0x21, 0x92, 0x05, 0x05, 0x31, 0x94, 0x05, + 0x05, 0x42, 0x03, 0x08, 0x21, 0x96, 0x05, 0x04, 0xf9, 0x9a, 0x05, 0x05, + 0x01, 0x92, 0x05, 0x05, 0x10, 0x9a, 0x05, 0x05, 0x49, 0x92, 0x05, 0x05, + 0x59, 0x94, 0x05, 0x05, 0x6a, 0x03, 0x08, 0x25, 0x96, 0x05, 0x0b, 0x41, + 0x9a, 0x05, 0x0b, 0x49, 0x92, 0x05, 0x0b, 0x59, 0x87, 0x05, 0x0b, 0x72, + 0x03, 0x08, 0x29, 0x96, 0x05, 0x0b, 0x81, 0x9a, 0x05, 0x0b, 0x89, 0x92, + 0x05, 0x0b, 0x98, 0x91, 0x05, 0x0b, 0xab, 0x03, 0x08, 0x35, 0x96, 0x05, + 0x0b, 0xe0, 0x96, 0x05, 0x0b, 0xb1, 0x9a, 0x05, 0x0b, 0xb9, 0x92, 0x05, + 0x0b, 0xc0, 0x9a, 0x05, 0x0b, 0xc9, 0x92, 0x05, 0x0b, 0xd8, 0x96, 0x05, + 0x0c, 0xb9, 0x9a, 0x05, 0x0c, 0xc1, 0x92, 0x05, 0x0c, 0xd1, 0x87, 0x05, + 0x0c, 0xe2, 0x03, 0x08, 0x39, 0x96, 0x05, 0x0c, 0xf1, 0x9a, 0x05, 0x0c, + 0xf9, 0x92, 0x05, 0x0d, 0x08, 0x91, 0x05, 0x0d, 0x1b, 0x03, 0x08, 0x41, + 0x96, 0x05, 0x0d, 0x59, 0x9a, 0x05, 0x0d, 0x61, 0x92, 0x05, 0x0d, 0x71, + 0x94, 0x05, 0x0d, 0x82, 0x03, 0x08, 0x49, 0x9a, 0x05, 0x0d, 0x29, 0x92, + 0x05, 0x0d, 0x38, 0x9a, 0x05, 0x0d, 0x41, 0x92, 0x05, 0x0d, 0x50, 0x96, + 0x05, 0x0d, 0x89, 0x9a, 0x05, 0x0d, 0x91, 0x92, 0x05, 0x0d, 0xa1, 0x94, + 0x05, 0x0d, 0xb2, 0x03, 0x08, 0x4d, 0x9a, 0x05, 0x23, 0xb1, 0x96, 0x05, + 0x23, 0xa9, 0x92, 0x05, 0x23, 0xc0, 0x96, 0x05, 0x23, 0xc9, 0x9a, 0x05, + 0x23, 0xd1, 0x92, 0x05, 0x23, 0xe0, 0x9a, 0x05, 0x24, 0x28, 0x9a, 0x05, + 0x24, 0x58, 0x9a, 0x05, 0x23, 0x78, 0x96, 0x05, 0x23, 0x09, 0x9a, 0x05, + 0x23, 0x11, 0x92, 0x05, 0x23, 0x20, 0x92, 0x05, 0x12, 0x19, 0x94, 0x05, + 0x12, 0x2b, 0x03, 0x08, 0x51, 0x91, 0x05, 0x11, 0xbb, 0x03, 0x08, 0x55, + 0x96, 0x05, 0x12, 0x01, 0x9a, 0x05, 0x12, 0x08, 0x9a, 0x05, 0x11, 0x80, + 0x96, 0x05, 0x11, 0x91, 0x9a, 0x05, 0x11, 0x99, 0x92, 0x05, 0x11, 0xa8, + 0x96, 0x05, 0x11, 0xc9, 0x9a, 0x05, 0x11, 0xd1, 0x92, 0x05, 0x11, 0xe0, + 0x9a, 0x05, 0x11, 0xe9, 0x92, 0x05, 0x11, 0xf8, 0x9a, 0x05, 0x05, 0x91, + 0x92, 0x05, 0x05, 0xa0, 0x96, 0x05, 0x05, 0xc9, 0x9a, 0x05, 0x05, 0xd1, + 0x92, 0x05, 0x05, 0xe0, 0x9a, 0x05, 0x06, 0x38, 0x96, 0x05, 0x00, 0xd1, + 0x9a, 0x05, 0x00, 0xd9, 0x92, 0x05, 0x00, 0xe8, 0x9a, 0x05, 0x01, 0x11, + 0x92, 0x05, 0x01, 0x20, 0x9a, 0x05, 0x01, 0x80, 0x9a, 0x05, 0x01, 0xb0, + 0x96, 0x05, 0x02, 0xb1, 0x9a, 0x05, 0x02, 0xb9, 0x92, 0x05, 0x02, 0xc9, + 0x87, 0x05, 0x02, 0xe2, 0x03, 0x08, 0x5d, 0x96, 0x05, 0x02, 0xf1, 0x9a, + 0x05, 0x02, 0xf9, 0x92, 0x05, 0x03, 0x08, 0x91, 0x05, 0x03, 0x1b, 0x03, + 0x08, 0x69, 0x96, 0x05, 0x03, 0x61, 0x9a, 0x05, 0x03, 0x69, 0x92, 0x05, + 0x03, 0x79, 0x94, 0x05, 0x03, 0x8a, 0x03, 0x08, 0x71, 0x96, 0x05, 0x03, + 0x29, 0x9a, 0x05, 0x03, 0x31, 0x92, 0x05, 0x03, 0x40, 0x9a, 0x05, 0x03, + 0x49, 0x92, 0x05, 0x03, 0x58, 0x96, 0x05, 0x03, 0x91, 0x9a, 0x05, 0x03, + 0x99, 0x92, 0x05, 0x03, 0xa8, 0x9a, 0x05, 0x01, 0xe1, 0x92, 0x05, 0x01, + 0xf0, 0x9a, 0x05, 0x02, 0x19, 0x92, 0x05, 0x02, 0x28, 0x9a, 0x05, 0x02, + 0x70, 0x9a, 0x05, 0x02, 0xa0, 0x9a, 0x05, 0x06, 0xe0, 0x96, 0x05, 0x07, + 0x31, 0x9a, 0x05, 0x07, 0x39, 0x92, 0x05, 0x07, 0x48, 0x9a, 0x05, 0x07, + 0xc0, 0x9a, 0x05, 0x07, 0xf0, 0x9a, 0x05, 0x08, 0x21, 0x92, 0x05, 0x08, + 0x30, 0x9a, 0x05, 0x08, 0x58, 0x9a, 0x05, 0x08, 0xc0, 0x96, 0x05, 0x09, + 0xb1, 0x9a, 0x05, 0x09, 0xb9, 0x92, 0x05, 0x09, 0xc9, 0x87, 0x05, 0x09, + 0xda, 0x03, 0x08, 0x75, 0x96, 0x05, 0x09, 0xe9, 0x9a, 0x05, 0x09, 0xf1, + 0x92, 0x05, 0x0a, 0x00, 0x91, 0x05, 0x0a, 0x13, 0x03, 0x08, 0x7d, 0x96, + 0x05, 0x0a, 0x59, 0x9a, 0x05, 0x0a, 0x61, 0x92, 0x05, 0x0a, 0x71, 0x94, + 0x05, 0x0a, 0x82, 0x03, 0x08, 0x85, 0x96, 0x05, 0x0a, 0x21, 0x9a, 0x05, + 0x0a, 0x29, 0x92, 0x05, 0x0a, 0x38, 0x9a, 0x05, 0x0a, 0x41, 0x92, 0x05, + 0x0a, 0x50, 0x9a, 0x05, 0x08, 0xf1, 0x92, 0x05, 0x09, 0x00, 0x96, 0x05, + 0x09, 0x29, 0x9a, 0x05, 0x09, 0x31, 0x92, 0x05, 0x09, 0x40, 0x9a, 0x05, + 0x09, 0xa0, 0x96, 0x05, 0x0d, 0xd9, 0x9a, 0x05, 0x0d, 0xe1, 0x92, 0x05, + 0x0d, 0xf0, 0x96, 0x05, 0x0e, 0x19, 0x9a, 0x05, 0x0e, 0x21, 0x92, 0x05, + 0x0e, 0x30, 0x9a, 0x05, 0x0e, 0x90, 0x9a, 0x05, 0x0e, 0xc0, 0x96, 0x05, + 0x0e, 0xf1, 0x9a, 0x05, 0x0e, 0xf9, 0x92, 0x05, 0x0f, 0x08, 0x96, 0x05, + 0x0f, 0x31, 0x9a, 0x05, 0x0f, 0x39, 0x92, 0x05, 0x0f, 0x48, 0x9a, 0x05, + 0x0f, 0xb0, 0x96, 0x05, 0x10, 0xa1, 0x9a, 0x05, 0x10, 0xa9, 0x87, 0x05, + 0x10, 0xc2, 0x03, 0x08, 0x89, 0x96, 0x05, 0x10, 0xd1, 0x9a, 0x05, 0x10, + 0xd9, 0x92, 0x05, 0x10, 0xe8, 0x91, 0x05, 0x11, 0x03, 0x03, 0x08, 0x91, + 0x96, 0x05, 0x11, 0x51, 0x9a, 0x05, 0x11, 0x59, 0x92, 0x05, 0x11, 0x69, + 0x94, 0x05, 0x11, 0x7a, 0x03, 0x08, 0x9d, 0x96, 0x05, 0x11, 0x11, 0x9a, + 0x05, 0x11, 0x19, 0x92, 0x05, 0x11, 0x28, 0x96, 0x05, 0x11, 0x31, 0x9a, + 0x05, 0x11, 0x39, 0x92, 0x05, 0x11, 0x48, 0x9a, 0x05, 0x0f, 0xe1, 0x92, + 0x05, 0x0f, 0xf0, 0x9a, 0x05, 0x10, 0x19, 0x92, 0x05, 0x10, 0x28, 0x9a, + 0x05, 0x10, 0x90, 0x0c, 0xc3, 0x08, 0xa1, 0x0a, 0xc3, 0x08, 0xac, 0x42, + 0x01, 0xe2, 0xc3, 0x08, 0xbf, 0xc2, 0x16, 0x5a, 0x00, 0xaa, 0x09, 0xc2, + 0x00, 0x8e, 0x00, 0xa5, 0x11, 0x8f, 0x00, 0xa5, 0xf8, 0x9b, 0x00, 0xc6, + 0x11, 0x91, 0x00, 0xa8, 0xf8, 0x83, 0x00, 0xa9, 0x18, 0x8b, 0x00, 0xa8, + 0xd8, 0x83, 0x08, 0xd5, 0xd3, 0x03, 0x08, 0xd8, 0x91, 0x08, 0xd5, 0xc3, + 0x03, 0x08, 0xdc, 0x8b, 0x08, 0xd5, 0xb2, 0x03, 0x08, 0xe0, 0x83, 0x08, + 0xd5, 0xa3, 0x03, 0x08, 0xe4, 0x91, 0x08, 0xd5, 0x93, 0x03, 0x08, 0xe8, + 0x8b, 0x08, 0xd5, 0x82, 0x03, 0x08, 0xec, 0xc2, 0x04, 0xc6, 0x00, 0xa0, + 0xd9, 0xc2, 0x01, 0x24, 0x00, 0xa0, 0xb0, 0xc3, 0xe3, 0x43, 0x00, 0xa8, + 0x79, 0xc2, 0x04, 0x22, 0x00, 0xa8, 0x53, 0x03, 0x08, 0xf0, 0xc3, 0x01, + 0x8b, 0x00, 0xa8, 0x69, 0xc3, 0x14, 0x72, 0x00, 0xa8, 0x21, 0xc2, 0x1a, + 0xd1, 0x00, 0xa8, 0x59, 0xc3, 0x09, 0x0e, 0x00, 0xa8, 0x60, 0x8b, 0x00, + 0xac, 0x70, 0x83, 0x00, 0xab, 0xd0, 0x91, 0x00, 0xab, 0xc0, 0x8b, 0x00, + 0xab, 0xb0, 0x07, 0xc3, 0x08, 0xf4, 0x8b, 0x00, 0xa2, 0xa1, 0x0e, 0xc3, + 0x08, 0xfc, 0x1c, 0x43, 0x09, 0x13, 0xc2, 0x01, 0x4a, 0x00, 0xc7, 0x91, + 0x83, 0x00, 0xb0, 0xd9, 0x8b, 0x00, 0xb0, 0xc9, 0x87, 0x00, 0xb0, 0xbb, + 0x03, 0x09, 0x2a, 0x91, 0x00, 0xb0, 0xb1, 0x97, 0x00, 0xb0, 0xa1, 0x0c, + 0x43, 0x09, 0x2e, 0x19, 0xc3, 0x09, 0x45, 0x83, 0x00, 0xaf, 0xa3, 0x03, + 0x09, 0x4d, 0x8b, 0x00, 0xaf, 0x99, 0x87, 0x00, 0xaf, 0x8b, 0x03, 0x09, + 0x51, 0x91, 0x00, 0xaf, 0x81, 0x97, 0x00, 0xaf, 0x79, 0x0a, 0x43, 0x09, + 0x55, 0x16, 0xc3, 0x09, 0x6c, 0x15, 0xc3, 0x09, 0x87, 0x0a, 0xc3, 0x09, + 0x9e, 0x0e, 0x43, 0x09, 0xb5, 0x83, 0x00, 0xb3, 0x31, 0x8b, 0x00, 0xb3, + 0x29, 0x87, 0x00, 0xb3, 0x1b, 0x03, 0x09, 0xd0, 0x91, 0x00, 0xb3, 0x11, + 0x97, 0x00, 0xb3, 0x08, 0x83, 0x00, 0xb0, 0x99, 0x8b, 0x00, 0xb0, 0x91, + 0x87, 0x00, 0xb0, 0x83, 0x03, 0x09, 0xd4, 0x91, 0x00, 0xb0, 0x79, 0x97, + 0x00, 0xb0, 0x70, 0x83, 0x00, 0xb0, 0x69, 0x8b, 0x00, 0xb0, 0x61, 0x87, + 0x00, 0xb0, 0x53, 0x03, 0x09, 0xd8, 0x91, 0x00, 0xb0, 0x49, 0x97, 0x00, + 0xb0, 0x40, 0x83, 0x00, 0xb0, 0x39, 0x8b, 0x00, 0xb0, 0x31, 0x87, 0x00, + 0xb0, 0x23, 0x03, 0x09, 0xdc, 0x91, 0x00, 0xb0, 0x19, 0x97, 0x00, 0xb0, + 0x11, 0x89, 0x00, 0xa6, 0x88, 0x8d, 0x00, 0xb0, 0x0b, 0x03, 0x09, 0xe0, + 0x0a, 0x43, 0x09, 0xf7, 0x83, 0x00, 0xaf, 0x69, 0x8b, 0x00, 0xaf, 0x61, + 0x87, 0x00, 0xaf, 0x53, 0x03, 0x0a, 0x0e, 0x91, 0x00, 0xaf, 0x49, 0x97, + 0x00, 0xaf, 0x40, 0x19, 0xc3, 0x0a, 0x12, 0xc2, 0x02, 0xe0, 0x00, 0xa1, + 0xb1, 0x8b, 0x00, 0xa1, 0xb8, 0x83, 0x00, 0xae, 0xa9, 0x8b, 0x00, 0xae, + 0xa1, 0x87, 0x00, 0xae, 0x93, 0x03, 0x0a, 0x29, 0x91, 0x00, 0xae, 0x89, + 0x97, 0x00, 0xae, 0x80, 0x83, 0x00, 0xae, 0x79, 0x8b, 0x00, 0xae, 0x71, + 0x87, 0x00, 0xae, 0x63, 0x03, 0x0a, 0x2d, 0x91, 0x00, 0xae, 0x59, 0x97, + 0x00, 0xae, 0x50, 0x0a, 0xc3, 0x0a, 0x31, 0x97, 0x00, 0xb1, 0x11, 0x91, + 0x00, 0xb1, 0x19, 0x87, 0x00, 0xb1, 0x23, 0x03, 0x0a, 0x48, 0x8b, 0x00, + 0xb1, 0x31, 0x83, 0x00, 0xb1, 0x38, 0xc8, 0xbb, 0x62, 0x00, 0xb2, 0x38, + 0x97, 0x00, 0xb2, 0x01, 0x91, 0x00, 0xb2, 0x09, 0x87, 0x00, 0xb2, 0x13, + 0x03, 0x0a, 0x4c, 0x8b, 0x00, 0xb2, 0x21, 0x83, 0x00, 0xb2, 0x28, 0x97, + 0x00, 0xb2, 0x71, 0x91, 0x00, 0xb2, 0x79, 0x87, 0x00, 0xb2, 0x83, 0x03, + 0x0a, 0x50, 0x8b, 0x00, 0xb2, 0x91, 0x83, 0x00, 0xb2, 0x99, 0x8a, 0x00, + 0xb2, 0xd2, 0x03, 0x0a, 0x54, 0x83, 0x00, 0xc7, 0x38, 0x91, 0x00, 0xc7, + 0x30, 0x83, 0x00, 0xab, 0x40, 0x83, 0x00, 0xad, 0x68, 0x91, 0x00, 0xad, + 0x58, 0x8b, 0x00, 0xad, 0x48, 0x8e, 0x00, 0xa7, 0x5b, 0x03, 0x0a, 0x6b, + 0x94, 0x00, 0xaa, 0x8b, 0x03, 0x0a, 0x81, 0x16, 0xc3, 0x0a, 0x97, 0xc4, + 0xe0, 0x77, 0x00, 0xaa, 0xe1, 0x9b, 0x00, 0xaa, 0x03, 0x03, 0x0a, 0xa1, + 0x15, 0xc3, 0x0a, 0xa5, 0x92, 0x00, 0xa2, 0x53, 0x03, 0x0a, 0xaf, 0x42, + 0x28, 0x70, 0xc3, 0x0a, 0xb3, 0x19, 0xc3, 0x0a, 0xcc, 0x42, 0x04, 0x22, + 0xc3, 0x0a, 0xe5, 0x8f, 0x00, 0xa5, 0xe3, 0x03, 0x0a, 0xfe, 0x42, 0x01, + 0x49, 0x43, 0x0b, 0x02, 0xc8, 0xb7, 0x02, 0x00, 0xb3, 0xf1, 0xc2, 0x00, + 0x8e, 0x00, 0xac, 0xfa, 0x03, 0x0b, 0x0d, 0xc9, 0xb4, 0x25, 0x00, 0xc6, + 0xf9, 0x0b, 0x43, 0x0b, 0x21, 0xc9, 0xaa, 0x05, 0x00, 0xc6, 0xf1, 0xd6, + 0x2f, 0xca, 0x00, 0xa1, 0x40, 0x45, 0x00, 0x5a, 0xc3, 0x0b, 0x2d, 0xc7, + 0x32, 0xb9, 0x00, 0xa1, 0x50, 0x91, 0x00, 0xc6, 0x5b, 0x03, 0x0b, 0x39, + 0x8b, 0x00, 0xc6, 0x3a, 0x03, 0x0b, 0x3d, 0x96, 0x08, 0x2a, 0xb0, 0x8d, + 0x08, 0x2a, 0x80, 0x98, 0x05, 0x5d, 0xc1, 0x97, 0x05, 0x5d, 0xb9, 0x91, + 0x05, 0x5d, 0xb1, 0x8b, 0x05, 0x5d, 0xa9, 0x83, 0x05, 0x5d, 0x99, 0x87, + 0x05, 0x5d, 0xa0, 0x98, 0x05, 0x5d, 0x91, 0x83, 0x05, 0x5d, 0x69, 0x87, + 0x05, 0x5d, 0x71, 0x97, 0x05, 0x5d, 0x89, 0x8b, 0x05, 0x5d, 0x79, 0x91, + 0x05, 0x5d, 0x80, 0x8a, 0x05, 0x5c, 0x80, 0x8a, 0x00, 0x9e, 0x00, 0x83, + 0x00, 0x9e, 0xe9, 0x87, 0x00, 0x9e, 0xf1, 0x8b, 0x00, 0x9e, 0xf9, 0x91, + 0x00, 0x9f, 0x01, 0x97, 0x00, 0x9f, 0x09, 0x98, 0x00, 0x9f, 0x10, 0x83, + 0x00, 0x9f, 0x19, 0x87, 0x00, 0x9f, 0x21, 0x8b, 0x00, 0x9f, 0x29, 0x91, + 0x00, 0x9f, 0x31, 0x97, 0x00, 0x9f, 0x39, 0x98, 0x00, 0x9f, 0x40, 0xc7, + 0xc8, 0x00, 0x0f, 0x01, 0x49, 0xc9, 0xae, 0x6a, 0x0f, 0x01, 0x38, 0x14, + 0xc3, 0x0b, 0x41, 0xc3, 0x00, 0x15, 0x0f, 0x00, 0x28, 0x83, 0x0f, 0x00, + 0xf1, 0xc2, 0x01, 0x6f, 0x0f, 0x00, 0xd8, 0xc9, 0xaf, 0x93, 0x0e, 0x92, + 0x21, 0x16, 0x43, 0x0b, 0x49, 0x47, 0x02, 0x0e, 0xc3, 0x0b, 0x55, 0x46, + 0x09, 0x97, 0x43, 0x0b, 0x71, 0x02, 0xc3, 0x0b, 0x8b, 0x00, 0x43, 0x0b, + 0x97, 0xc6, 0x05, 0x01, 0x00, 0x18, 0xb8, 0x45, 0x01, 0xa2, 0xc3, 0x0b, + 0xa6, 0x42, 0x00, 0xd0, 0xc3, 0x0b, 0xb0, 0x4c, 0x1a, 0x50, 0xc3, 0x0b, + 0xbc, 0xca, 0x9a, 0x3d, 0x00, 0x18, 0xc8, 0xe0, 0x0b, 0x67, 0x01, 0x07, + 0x60, 0x44, 0x01, 0xa3, 0xc3, 0x0b, 0xc8, 0x45, 0x01, 0xb4, 0x43, 0x0b, + 0xd2, 0xc5, 0x00, 0xd4, 0x01, 0x07, 0x09, 0xc5, 0x05, 0x02, 0x00, 0x1a, + 0x68, 0xcb, 0x95, 0xe5, 0x01, 0x06, 0x81, 0x48, 0xbc, 0x3a, 0x43, 0x0b, + 0xde, 0xca, 0x9e, 0x6e, 0x00, 0xd6, 0x19, 0xca, 0x09, 0xf2, 0x00, 0xd6, + 0x08, 0xcd, 0x42, 0x35, 0x00, 0x19, 0xb1, 0xce, 0x2c, 0x62, 0x00, 0x19, + 0xc0, 0x46, 0x00, 0x8b, 0x43, 0x0b, 0xea, 0x46, 0x00, 0x8b, 0x43, 0x0b, + 0xf6, 0xcf, 0x6b, 0x16, 0x00, 0xef, 0x91, 0xc4, 0xde, 0x3f, 0x00, 0xef, + 0x39, 0x98, 0x00, 0xee, 0xb1, 0x91, 0x00, 0xee, 0xa9, 0x87, 0x00, 0xee, + 0xa0, 0xc6, 0x05, 0x01, 0x00, 0xd5, 0xf8, 0xc2, 0x01, 0xe2, 0x00, 0xef, + 0x79, 0xc2, 0x00, 0x8e, 0x00, 0xee, 0xc8, 0xd9, 0x1e, 0xb4, 0x00, 0xef, + 0x61, 0xc5, 0xb8, 0xe3, 0x00, 0xef, 0x28, 0xd5, 0x35, 0x21, 0x00, 0xee, + 0x98, 0xc4, 0x74, 0x82, 0x00, 0xef, 0x50, 0xc3, 0x04, 0x87, 0x00, 0xef, + 0x48, 0x00, 0x43, 0x0c, 0x02, 0x47, 0x67, 0x21, 0x43, 0x0c, 0x0e, 0xc8, + 0xbd, 0xf2, 0x00, 0xee, 0xb8, 0xcd, 0x76, 0x83, 0x00, 0xd6, 0x00, 0xc6, + 0x05, 0x01, 0x07, 0xf1, 0x38, 0xc6, 0x05, 0x01, 0x07, 0xf1, 0x40, 0x49, + 0x07, 0xbb, 0xc3, 0x0c, 0x1e, 0xce, 0x1d, 0x93, 0x00, 0x1b, 0x0b, 0x03, + 0x0c, 0x2a, 0xd0, 0x30, 0x6a, 0x00, 0xee, 0x69, 0x12, 0xc3, 0x0c, 0x30, + 0x11, 0xc3, 0x0c, 0x3c, 0xcc, 0x83, 0x0d, 0x00, 0x18, 0x59, 0xcc, 0x1f, + 0x0c, 0x00, 0x18, 0x79, 0xc8, 0x80, 0x2e, 0x00, 0x19, 0x99, 0x42, 0x00, + 0x2c, 0xc3, 0x0c, 0x48, 0xc5, 0x1d, 0x88, 0x00, 0x1a, 0xeb, 0x03, 0x0c, + 0x54, 0xc6, 0x60, 0xb1, 0x00, 0x1a, 0xf3, 0x03, 0x0c, 0x5a, 0xc5, 0x1e, + 0xc8, 0x00, 0x1b, 0x02, 0x03, 0x0c, 0x60, 0xc5, 0x05, 0x02, 0x00, 0x19, + 0x1b, 0x03, 0x0c, 0x64, 0xc5, 0x00, 0xd4, 0x00, 0x18, 0x3a, 0x03, 0x0c, + 0x6a, 0xcc, 0x83, 0x0d, 0x00, 0xee, 0x09, 0xcc, 0x1f, 0x0c, 0x00, 0xee, + 0x00, 0xc2, 0x07, 0xb2, 0x05, 0x47, 0x81, 0xc2, 0x00, 0x39, 0x05, 0x47, + 0x79, 0xc2, 0x00, 0xb0, 0x05, 0x47, 0x71, 0xc2, 0x00, 0xc1, 0x05, 0x47, + 0x69, 0xc2, 0x02, 0x2b, 0x05, 0x47, 0x61, 0x97, 0x05, 0x47, 0x59, 0x83, + 0x05, 0x47, 0x50, 0xc4, 0x26, 0x78, 0x05, 0x47, 0x49, 0xc5, 0x06, 0xdb, + 0x05, 0x47, 0x41, 0x15, 0xc3, 0x0c, 0x6e, 0x08, 0xc3, 0x0c, 0x7a, 0x16, + 0xc3, 0x0c, 0x86, 0xc3, 0x05, 0x14, 0x05, 0x47, 0x09, 0xc4, 0x15, 0xe7, + 0x05, 0x47, 0x00, 0xcc, 0x8a, 0x8d, 0x00, 0x24, 0x21, 0xc5, 0xc7, 0x29, + 0x05, 0x33, 0xe8, 0x00, 0x43, 0x0c, 0x92, 0x88, 0x05, 0x34, 0xf1, 0x8e, + 0x01, 0x6f, 0x39, 0x8f, 0x01, 0x6f, 0x41, 0x90, 0x01, 0x6f, 0x49, 0x94, + 0x01, 0x6f, 0x61, 0x95, 0x01, 0x6f, 0x6a, 0x03, 0x0c, 0x9e, 0x48, 0xb7, + 0x92, 0xc3, 0x0c, 0xa4, 0x87, 0x01, 0x6f, 0xb8, 0xcb, 0x91, 0xba, 0x05, + 0x33, 0xa0, 0x8b, 0x05, 0x33, 0xb1, 0xc3, 0x21, 0x51, 0x05, 0x33, 0xc9, + 0xc2, 0x00, 0xfe, 0x01, 0x6f, 0xd1, 0x97, 0x01, 0x6f, 0xd8, 0xc7, 0x8a, + 0x86, 0x05, 0x33, 0xc0, 0xc8, 0x84, 0xc1, 0x05, 0x33, 0xd1, 0x0a, 0x43, + 0x0c, 0xb2, 0xc4, 0x6d, 0xb5, 0x05, 0x33, 0xd8, 0x87, 0x01, 0x6f, 0x01, + 0xc6, 0xc8, 0x01, 0x01, 0x6f, 0xf0, 0x87, 0x01, 0x6f, 0x19, 0xc4, 0xdd, + 0xb2, 0x01, 0x6f, 0xc0, 0xc2, 0x01, 0x7f, 0x01, 0x6f, 0x21, 0x87, 0x01, + 0x6f, 0x28, 0x87, 0x01, 0x6f, 0x71, 0xc2, 0x00, 0x40, 0x01, 0x6f, 0x80, + 0xc6, 0x00, 0x2b, 0x00, 0x18, 0xa8, 0xc5, 0x00, 0xd4, 0x00, 0xd6, 0x39, + 0xc5, 0x05, 0x02, 0x00, 0x19, 0x58, 0xc3, 0x0f, 0xbe, 0x00, 0x18, 0x8b, + 0x03, 0x0c, 0xbc, 0xca, 0x32, 0xf5, 0x00, 0x19, 0xd0, 0xc6, 0x05, 0x01, + 0x07, 0xf1, 0x58, 0xc6, 0x05, 0x01, 0x07, 0xf1, 0x60, 0xc2, 0x06, 0xdb, + 0x00, 0x1f, 0x39, 0x8b, 0x01, 0x65, 0x68, 0xc3, 0x07, 0x4a, 0x00, 0x1f, + 0x69, 0xc2, 0x06, 0xdb, 0x00, 0x1f, 0x18, 0xc4, 0x06, 0x5a, 0x01, 0x65, + 0x99, 0xc4, 0xca, 0x0b, 0x01, 0x65, 0xc9, 0xc2, 0x00, 0xec, 0x01, 0x65, + 0xd9, 0xc4, 0x01, 0x68, 0x01, 0x66, 0x58, 0x47, 0xc1, 0x7e, 0xc3, 0x0c, + 0xc2, 0x47, 0x96, 0x0a, 0x43, 0x0c, 0xea, 0xc3, 0xd1, 0x8c, 0x01, 0x65, + 0xb9, 0xc2, 0x00, 0xec, 0x01, 0x65, 0xe9, 0xc4, 0x9b, 0xae, 0x01, 0x67, + 0x61, 0xc6, 0xd0, 0x3d, 0x01, 0x67, 0x70, 0xc3, 0x07, 0x4a, 0x00, 0x1f, + 0x61, 0xc2, 0x06, 0xdb, 0x00, 0x1f, 0x10, 0xc4, 0x06, 0x5a, 0x01, 0x65, + 0x91, 0xc4, 0xca, 0x0b, 0x01, 0x65, 0xc1, 0xc2, 0x00, 0xec, 0x01, 0x65, + 0xd1, 0xc4, 0x01, 0x68, 0x01, 0x66, 0x50, 0x8b, 0x01, 0x65, 0x61, 0xc2, + 0x06, 0xdb, 0x00, 0x1f, 0x30, 0x47, 0xc1, 0x7e, 0xc3, 0x0c, 0xfa, 0x47, + 0x96, 0x0a, 0x43, 0x0d, 0x22, 0xc3, 0xd1, 0x8c, 0x01, 0x65, 0xb1, 0xc2, + 0x00, 0xec, 0x01, 0x65, 0xe1, 0xc4, 0x9b, 0xae, 0x01, 0x67, 0x59, 0xc6, + 0xd0, 0x3d, 0x01, 0x67, 0x68, 0xc4, 0x18, 0x12, 0x08, 0x17, 0x59, 0xc9, + 0x18, 0x05, 0x08, 0x17, 0xa0, 0xc4, 0x0d, 0x0e, 0x08, 0x17, 0x61, 0xcb, + 0x13, 0xfa, 0x08, 0x17, 0xa8, 0xc3, 0x0d, 0x0f, 0x08, 0x17, 0x69, 0xca, + 0x9c, 0x5c, 0x08, 0x17, 0xb0, 0xc3, 0x45, 0x6b, 0x08, 0x17, 0x71, 0xca, + 0x37, 0x63, 0x08, 0x17, 0xb8, 0xc2, 0x0d, 0x10, 0x08, 0x17, 0x79, 0xc8, + 0x0d, 0x03, 0x08, 0x17, 0xc0, 0xc8, 0x0d, 0x03, 0x08, 0x17, 0xc9, 0xc2, + 0x0d, 0x10, 0x08, 0x17, 0x80, 0xd9, 0x20, 0x76, 0x0f, 0xa8, 0x10, 0xc7, + 0xc1, 0xd9, 0x0f, 0xab, 0x39, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xd8, 0xc7, + 0xc1, 0xd9, 0x0f, 0xaa, 0xe9, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0x88, 0xc6, + 0xd0, 0xfd, 0x0f, 0xc8, 0x13, 0x03, 0x0d, 0x32, 0xc6, 0xcb, 0xf3, 0x0f, + 0xaa, 0x00, 0xc5, 0x8e, 0xdf, 0x01, 0x93, 0x03, 0x03, 0x0d, 0x38, 0xc6, + 0xbb, 0xec, 0x01, 0x93, 0x52, 0x03, 0x0d, 0x3e, 0xc2, 0x00, 0xd3, 0x01, + 0x93, 0x78, 0xc5, 0xc0, 0x7d, 0x01, 0x93, 0x13, 0x03, 0x0d, 0x44, 0xc6, + 0xc1, 0x86, 0x01, 0x93, 0x5a, 0x03, 0x0d, 0x4a, 0xc2, 0x00, 0xd3, 0x01, + 0x93, 0x88, 0xc2, 0x00, 0xd3, 0x01, 0x93, 0x90, 0xc4, 0x79, 0xf3, 0x01, + 0x93, 0x2b, 0x03, 0x0d, 0x50, 0xc6, 0xba, 0x7c, 0x01, 0x93, 0x62, 0x03, + 0x0d, 0x56, 0xc2, 0x00, 0xd3, 0x01, 0x93, 0xa0, 0x00, 0x43, 0x0d, 0x5c, + 0xc4, 0xc6, 0x7a, 0x01, 0x93, 0x43, 0x03, 0x0d, 0x64, 0xc6, 0xc6, 0x79, + 0x01, 0x93, 0x4a, 0x03, 0x0d, 0x6a, 0xc2, 0x00, 0xd3, 0x01, 0x93, 0xd8, + 0xc4, 0x15, 0xe7, 0x01, 0x27, 0x51, 0xc4, 0x26, 0x78, 0x01, 0x23, 0x41, + 0xc5, 0x06, 0xdb, 0x01, 0x23, 0x39, 0x15, 0xc3, 0x0d, 0x70, 0x08, 0xc3, + 0x0d, 0x7c, 0x16, 0xc3, 0x0d, 0x88, 0xc3, 0x05, 0x14, 0x01, 0x23, 0x00, + 0xc4, 0x03, 0x03, 0x01, 0x14, 0xc1, 0xc3, 0x00, 0xbb, 0x01, 0x51, 0xc0, + 0xe0, 0x02, 0xe7, 0x0f, 0x88, 0x78, 0x9c, 0x01, 0x27, 0x49, 0x9b, 0x01, + 0x27, 0x41, 0x9a, 0x01, 0x27, 0x39, 0x99, 0x01, 0x27, 0x31, 0x98, 0x01, + 0x27, 0x29, 0x97, 0x01, 0x27, 0x21, 0x96, 0x01, 0x27, 0x19, 0x95, 0x01, + 0x27, 0x11, 0x94, 0x01, 0x27, 0x09, 0x93, 0x01, 0x27, 0x01, 0x92, 0x01, + 0x26, 0xf9, 0x91, 0x01, 0x26, 0xf1, 0x90, 0x01, 0x26, 0xe9, 0x8f, 0x01, + 0x26, 0xe1, 0x8e, 0x01, 0x26, 0xd9, 0x8d, 0x01, 0x26, 0xd1, 0x8c, 0x01, + 0x26, 0xc9, 0x8b, 0x01, 0x26, 0xc1, 0x8a, 0x01, 0x26, 0xb9, 0x89, 0x01, + 0x26, 0xb1, 0x88, 0x01, 0x26, 0xa9, 0x87, 0x01, 0x26, 0xa1, 0x86, 0x01, + 0x26, 0x99, 0x85, 0x01, 0x26, 0x91, 0x84, 0x01, 0x26, 0x89, 0x83, 0x01, + 0x26, 0x80, 0x9c, 0x01, 0x26, 0x79, 0x9b, 0x01, 0x26, 0x71, 0x9a, 0x01, + 0x26, 0x69, 0x99, 0x01, 0x26, 0x61, 0x98, 0x01, 0x26, 0x59, 0x97, 0x01, + 0x26, 0x51, 0x96, 0x01, 0x26, 0x49, 0x95, 0x01, 0x26, 0x41, 0x94, 0x01, + 0x26, 0x39, 0x93, 0x01, 0x26, 0x31, 0x92, 0x01, 0x26, 0x29, 0x91, 0x01, + 0x26, 0x21, 0x90, 0x01, 0x26, 0x19, 0x8f, 0x01, 0x26, 0x11, 0x8e, 0x01, + 0x26, 0x09, 0x8d, 0x01, 0x26, 0x01, 0x8c, 0x01, 0x25, 0xf9, 0x8b, 0x01, + 0x25, 0xf1, 0x8a, 0x01, 0x25, 0xe9, 0x89, 0x01, 0x25, 0xe1, 0x88, 0x01, + 0x25, 0xd9, 0x87, 0x01, 0x25, 0xd1, 0x86, 0x01, 0x25, 0xc9, 0x85, 0x01, + 0x25, 0xc1, 0x84, 0x01, 0x25, 0xb9, 0x83, 0x01, 0x25, 0xb0, 0xc3, 0x18, + 0x13, 0x01, 0x23, 0x9b, 0x03, 0x0d, 0x94, 0xc3, 0x22, 0x45, 0x01, 0x23, + 0x58, 0xc3, 0x03, 0x26, 0x01, 0x23, 0x61, 0x9b, 0x01, 0x92, 0xd2, 0x03, + 0x0d, 0x98, 0xd0, 0x55, 0xa8, 0x01, 0x92, 0x40, 0xc3, 0x03, 0x26, 0x01, + 0x23, 0x89, 0xd1, 0x55, 0xa7, 0x01, 0x92, 0x78, 0xc3, 0x03, 0x26, 0x01, + 0x23, 0x81, 0xd1, 0x55, 0xa7, 0x01, 0x92, 0x70, 0xc3, 0x03, 0x26, 0x01, + 0x23, 0x79, 0xd1, 0x55, 0xa7, 0x01, 0x92, 0x68, 0xc3, 0x03, 0x26, 0x01, + 0x23, 0x71, 0x9b, 0x01, 0x95, 0xfa, 0x03, 0x0d, 0x9c, 0xc6, 0x34, 0x38, + 0x01, 0x23, 0x69, 0xc3, 0x0d, 0x0f, 0x01, 0x95, 0xaa, 0x03, 0x0d, 0xa2, + 0xc5, 0xdc, 0x13, 0x0f, 0x92, 0x89, 0xc8, 0xb9, 0xfa, 0x0f, 0x92, 0x81, + 0xc8, 0xb6, 0xda, 0x01, 0x94, 0xf9, 0xc7, 0xba, 0x63, 0x01, 0x95, 0x78, + 0xcb, 0x90, 0x2e, 0x01, 0x92, 0x29, 0xc3, 0x81, 0x06, 0x01, 0x92, 0x38, + 0xc5, 0xdc, 0xef, 0x01, 0x92, 0x31, 0xc2, 0x22, 0xcc, 0x01, 0x94, 0x29, + 0x07, 0xc3, 0x0d, 0xa6, 0x17, 0xc3, 0x0d, 0xb2, 0x16, 0xc3, 0x0d, 0xc2, + 0xc6, 0xcc, 0xbf, 0x01, 0x94, 0x99, 0xc6, 0xca, 0xe5, 0x01, 0x94, 0xa8, + 0xc2, 0x02, 0xa0, 0x01, 0x94, 0x09, 0xc4, 0x02, 0xde, 0x01, 0x94, 0x11, + 0xc2, 0x00, 0xc4, 0x01, 0x94, 0x48, 0xc3, 0x09, 0x9e, 0x01, 0x94, 0x19, + 0x0b, 0xc3, 0x0d, 0xce, 0xc5, 0x1b, 0xbd, 0x01, 0x94, 0xd8, 0xc4, 0x00, + 0x2d, 0x01, 0x94, 0x39, 0xc4, 0x61, 0xc1, 0x01, 0x94, 0x79, 0xc8, 0xbc, + 0xca, 0x01, 0x94, 0xe9, 0xc9, 0xaf, 0x8a, 0x01, 0x95, 0x68, 0x0b, 0xc3, + 0x0d, 0xe0, 0xc3, 0x00, 0xc2, 0x01, 0x94, 0xa0, 0xc3, 0x01, 0x54, 0x01, + 0x94, 0x51, 0x07, 0xc3, 0x0d, 0xec, 0xc3, 0x04, 0x85, 0x01, 0x94, 0xd0, + 0xc4, 0x03, 0xd7, 0x01, 0x94, 0x61, 0xc3, 0x29, 0x82, 0x01, 0x94, 0x68, + 0xc3, 0x04, 0xad, 0x01, 0x94, 0x91, 0xc3, 0x00, 0x2d, 0x01, 0x95, 0x20, + 0x11, 0xc3, 0x0d, 0xf8, 0xc5, 0x04, 0xe2, 0x01, 0x95, 0x28, 0xc4, 0xdd, + 0x72, 0x01, 0x94, 0xc1, 0xc2, 0x00, 0x27, 0x01, 0x95, 0x31, 0xc3, 0x00, + 0x4a, 0x01, 0x95, 0x38, 0x07, 0xc3, 0x0e, 0x0a, 0xc4, 0x00, 0x2d, 0x01, + 0x95, 0x40, 0x83, 0x01, 0x96, 0xa9, 0x8b, 0x01, 0x96, 0xb1, 0x97, 0x01, + 0x96, 0xb9, 0x87, 0x01, 0x96, 0xc1, 0x91, 0x01, 0x96, 0xc8, 0x83, 0x01, + 0x96, 0xd1, 0x8b, 0x01, 0x96, 0xd9, 0x97, 0x01, 0x96, 0xe1, 0x87, 0x01, + 0x96, 0xe9, 0x91, 0x01, 0x96, 0xf0, 0x83, 0x01, 0x96, 0xf9, 0x8b, 0x01, + 0x97, 0x01, 0x97, 0x01, 0x97, 0x09, 0x87, 0x01, 0x97, 0x11, 0x91, 0x01, + 0x97, 0x18, 0x83, 0x01, 0x97, 0x21, 0x8b, 0x01, 0x97, 0x29, 0x97, 0x01, + 0x97, 0x31, 0x87, 0x01, 0x97, 0x39, 0x91, 0x01, 0x97, 0x40, 0x83, 0x01, + 0x97, 0x49, 0x8b, 0x01, 0x97, 0x51, 0x97, 0x01, 0x97, 0x59, 0x87, 0x01, + 0x97, 0x61, 0x91, 0x01, 0x97, 0x68, 0x83, 0x01, 0x97, 0x71, 0x8b, 0x01, + 0x97, 0x79, 0x97, 0x01, 0x97, 0x81, 0x87, 0x01, 0x97, 0x89, 0x91, 0x01, + 0x97, 0x90, 0x83, 0x01, 0x97, 0x99, 0x97, 0x01, 0x97, 0xa1, 0x91, 0x01, + 0x97, 0xa8, 0x83, 0x01, 0x97, 0xb1, 0x8b, 0x01, 0x97, 0xb9, 0x97, 0x01, + 0x97, 0xc1, 0x87, 0x01, 0x97, 0xc9, 0x91, 0x01, 0x97, 0xd0, 0x83, 0x01, + 0x97, 0xd9, 0x8b, 0x01, 0x97, 0xe1, 0x87, 0x01, 0x97, 0xe9, 0x91, 0x01, + 0x97, 0xf0, 0xcf, 0x64, 0xc2, 0x09, 0x2a, 0x19, 0x83, 0x09, 0x1b, 0x60, + 0x0e, 0xc3, 0x0e, 0x14, 0x06, 0xc3, 0x0e, 0x1e, 0x17, 0xc3, 0x0e, 0x2a, + 0xc2, 0x00, 0x16, 0x09, 0x1a, 0x59, 0x15, 0xc3, 0x0e, 0x3a, 0xc2, 0x00, + 0xb0, 0x09, 0x1a, 0x41, 0xc3, 0x0f, 0xd6, 0x09, 0x1a, 0x39, 0xc2, 0x06, + 0x52, 0x09, 0x1a, 0x29, 0x0b, 0xc3, 0x0e, 0x46, 0xc2, 0x00, 0xd0, 0x09, + 0x1a, 0x09, 0x09, 0xc3, 0x0e, 0x56, 0xc3, 0x01, 0x5d, 0x09, 0x19, 0xd1, + 0x83, 0x09, 0x19, 0xc2, 0x03, 0x0e, 0x61, 0xc8, 0x03, 0x4c, 0x09, 0x1a, + 0x80, 0x46, 0x03, 0x4d, 0xc3, 0x0e, 0x67, 0xc8, 0x1d, 0x6f, 0x09, 0x29, + 0xe0, 0xc8, 0x4e, 0xea, 0x09, 0x18, 0xf8, 0xc2, 0x00, 0xb0, 0x09, 0x19, + 0x29, 0xc6, 0x45, 0xad, 0x09, 0x19, 0x20, 0x94, 0x09, 0x1a, 0xa0, 0xca, + 0x8d, 0x2d, 0x09, 0x18, 0xd8, 0xcf, 0x65, 0xd0, 0x09, 0x18, 0xbb, 0x03, + 0x0e, 0x7b, 0xc2, 0x02, 0x2f, 0x09, 0x18, 0xb1, 0xc3, 0x62, 0x19, 0x09, + 0x18, 0xa8, 0xca, 0x64, 0xc2, 0x09, 0x29, 0xd9, 0xc9, 0x5d, 0x99, 0x09, + 0x29, 0xd0, 0xc2, 0x04, 0x3d, 0x09, 0x17, 0xc9, 0xc4, 0x0b, 0x46, 0x09, + 0x17, 0xc1, 0x42, 0x01, 0xe2, 0xc3, 0x0e, 0x81, 0xc3, 0x6c, 0x49, 0x09, + 0x17, 0xa9, 0xc2, 0x01, 0x2d, 0x09, 0x17, 0xa0, 0xc7, 0x0b, 0x09, 0x09, + 0x17, 0x91, 0x42, 0x00, 0x9a, 0x43, 0x0e, 0x89, 0xc2, 0x02, 0x2f, 0x09, + 0x17, 0x71, 0xc2, 0x00, 0x0a, 0x09, 0x17, 0x68, 0xc8, 0xb6, 0xe2, 0x09, + 0x18, 0x1b, 0x03, 0x0e, 0x8f, 0xca, 0x38, 0xae, 0x09, 0x18, 0x10, 0xcf, + 0x69, 0x90, 0x09, 0x16, 0xf8, 0x46, 0x25, 0xd4, 0x43, 0x0e, 0x95, 0x45, + 0x25, 0xd5, 0xc3, 0x0e, 0xa1, 0xc8, 0xb6, 0xea, 0x09, 0x29, 0x93, 0x03, + 0x0e, 0xb3, 0xc2, 0x06, 0x47, 0x09, 0x15, 0xd8, 0xc3, 0x0d, 0xff, 0x09, + 0x16, 0x11, 0x9f, 0x09, 0x16, 0x08, 0xc5, 0x58, 0xf4, 0x09, 0x29, 0x88, + 0x47, 0x03, 0x4c, 0x43, 0x0e, 0xb7, 0x00, 0x43, 0x0e, 0xe0, 0x47, 0x03, + 0x4c, 0x43, 0x0e, 0xec, 0x47, 0x03, 0x4c, 0x43, 0x0f, 0x21, 0x46, 0x03, + 0x4d, 0xc3, 0x0f, 0x2b, 0xc4, 0x39, 0xc8, 0x09, 0x15, 0x43, 0x03, 0x0f, + 0x6e, 0xc8, 0xb6, 0xf2, 0x09, 0x15, 0x39, 0xc7, 0xb7, 0xa3, 0x09, 0x14, + 0xa0, 0x47, 0x03, 0x4c, 0x43, 0x0f, 0x74, 0xd0, 0x5f, 0xa2, 0x09, 0x12, + 0x89, 0xc7, 0x5d, 0x9b, 0x09, 0x12, 0x80, 0xd6, 0x2a, 0xf6, 0x09, 0x1c, + 0x99, 0xd6, 0x2b, 0x7e, 0x09, 0x16, 0xa9, 0xc4, 0x58, 0xf5, 0x09, 0x16, + 0xa0, 0x00, 0x43, 0x0f, 0xb8, 0xcc, 0x81, 0xf9, 0x09, 0x13, 0x5b, 0x03, + 0x0f, 0xc7, 0xc8, 0x20, 0x13, 0x09, 0x13, 0x51, 0xc4, 0x58, 0xf5, 0x09, + 0x13, 0x49, 0x4c, 0x20, 0x1c, 0x43, 0x0f, 0xcd, 0xcd, 0x76, 0x0e, 0x09, + 0x12, 0x19, 0xce, 0x75, 0x2e, 0x09, 0x12, 0x11, 0xc8, 0x1d, 0x6f, 0x09, + 0x12, 0x08, 0xc2, 0x04, 0x3d, 0x09, 0x12, 0x51, 0x83, 0x09, 0x12, 0x48, + 0xc9, 0xaf, 0x66, 0x09, 0x11, 0xb3, 0x03, 0x0f, 0xe8, 0xcd, 0x7a, 0x2b, + 0x09, 0x11, 0xc1, 0x46, 0x03, 0x4d, 0x43, 0x0f, 0xee, 0x00, 0x43, 0x0f, + 0xfe, 0x16, 0xc3, 0x10, 0x0a, 0xce, 0x73, 0x98, 0x09, 0x28, 0xc9, 0x15, + 0xc3, 0x10, 0x16, 0xcc, 0x8a, 0x15, 0x09, 0x10, 0x99, 0xcc, 0x83, 0xc1, + 0x09, 0x10, 0x90, 0xcd, 0x1a, 0xf3, 0x09, 0x10, 0xf8, 0xc7, 0x6c, 0xd0, + 0x09, 0x10, 0xd1, 0x11, 0x43, 0x10, 0x25, 0xc2, 0xe6, 0x8b, 0x09, 0x28, + 0xc1, 0xc2, 0xae, 0x2b, 0x09, 0x28, 0xb8, 0xc2, 0xe6, 0x79, 0x09, 0x28, + 0x6b, 0x03, 0x10, 0x31, 0xc2, 0xe1, 0xa2, 0x09, 0x28, 0x61, 0xc2, 0xe6, + 0x87, 0x09, 0x28, 0x0b, 0x03, 0x10, 0x37, 0xc2, 0x71, 0x49, 0x09, 0x28, + 0x00, 0x26, 0xc3, 0x10, 0x3d, 0xc2, 0xe6, 0x7b, 0x09, 0x27, 0xd1, 0xc2, + 0xe4, 0xef, 0x09, 0x27, 0xc9, 0x22, 0xc3, 0x10, 0x4d, 0x21, 0x43, 0x10, + 0x55, 0xc2, 0xe6, 0xa7, 0x09, 0x27, 0x79, 0x25, 0xc3, 0x10, 0x60, 0x21, + 0x43, 0x10, 0x68, 0x23, 0xc3, 0x10, 0x74, 0xc2, 0xe6, 0x83, 0x09, 0x27, + 0x39, 0x1f, 0xc3, 0x10, 0x7c, 0x1e, 0x43, 0x10, 0x88, 0xc2, 0xe4, 0xf2, + 0x09, 0x27, 0x09, 0xc2, 0xe6, 0x4a, 0x09, 0x27, 0x00, 0xc2, 0xe6, 0xa9, + 0x09, 0x26, 0xf9, 0x25, 0xc3, 0x10, 0x90, 0xd4, 0x3c, 0xc8, 0x09, 0x26, + 0xe1, 0xc2, 0xe5, 0x48, 0x09, 0x26, 0xd9, 0x22, 0xc3, 0x10, 0x9a, 0xc2, + 0xe6, 0x4a, 0x09, 0x26, 0xc1, 0x1f, 0xc3, 0x10, 0xa2, 0xc2, 0xe6, 0x4f, + 0x09, 0x26, 0xa8, 0x00, 0x43, 0x10, 0xaa, 0x00, 0x43, 0x10, 0xb6, 0xc8, + 0x38, 0x76, 0x09, 0x0f, 0xb0, 0x94, 0x09, 0x26, 0x9b, 0x03, 0x10, 0xc8, + 0xc4, 0xdd, 0x2c, 0x09, 0x26, 0x91, 0xc2, 0x01, 0xe2, 0x09, 0x0c, 0x59, + 0xcc, 0x82, 0x11, 0x09, 0x0c, 0x51, 0x86, 0x09, 0x0c, 0x49, 0x9f, 0x09, + 0x0c, 0x40, 0x83, 0x09, 0x26, 0x8b, 0x03, 0x10, 0xcc, 0x8b, 0x09, 0x0b, + 0x82, 0x03, 0x10, 0xd0, 0x97, 0x09, 0x26, 0x81, 0x8b, 0x09, 0x0a, 0xf9, + 0x03, 0x43, 0x10, 0xd4, 0x97, 0x09, 0x1c, 0x31, 0xc2, 0x00, 0xb1, 0x09, + 0x0c, 0x30, 0x0a, 0xc3, 0x10, 0xe2, 0xc4, 0xdf, 0x77, 0x09, 0x0c, 0x29, + 0xc2, 0x00, 0x2d, 0x09, 0x0c, 0x21, 0x83, 0x09, 0x0b, 0xf2, 0x03, 0x10, + 0xf7, 0x83, 0x09, 0x1c, 0x21, 0x8b, 0x09, 0x0b, 0xe0, 0x97, 0x09, 0x0b, + 0x9b, 0x03, 0x10, 0xfb, 0x8b, 0x09, 0x0b, 0x90, 0x97, 0x09, 0x0b, 0x5b, + 0x03, 0x10, 0xff, 0x8b, 0x09, 0x0b, 0x3b, 0x03, 0x11, 0x09, 0x83, 0x09, + 0x0b, 0x12, 0x03, 0x11, 0x18, 0x42, 0x01, 0xe2, 0xc3, 0x11, 0x29, 0xc4, + 0x99, 0xe3, 0x09, 0x1b, 0xf1, 0x86, 0x09, 0x0a, 0xca, 0x03, 0x11, 0x31, + 0xc2, 0x05, 0x1d, 0x09, 0x0b, 0xd9, 0x87, 0x09, 0x0b, 0xd0, 0x8b, 0x09, + 0x0b, 0xc3, 0x03, 0x11, 0x37, 0x87, 0x09, 0x0b, 0xa2, 0x03, 0x11, 0x3d, + 0x8f, 0x09, 0x0b, 0x71, 0xc2, 0x04, 0x2b, 0x09, 0x0b, 0x68, 0xc3, 0x05, + 0x4e, 0x09, 0x0b, 0x09, 0xc4, 0x9e, 0x4c, 0x09, 0x0b, 0x00, 0x4c, 0x87, + 0x99, 0xc3, 0x11, 0x43, 0xe0, 0x03, 0x47, 0x09, 0x0c, 0xe8, 0xcc, 0x83, + 0xcd, 0x09, 0x0c, 0xc9, 0xc9, 0x8d, 0x2e, 0x09, 0x0c, 0xc0, 0xca, 0xa7, + 0x4c, 0x09, 0x0c, 0xa0, 0xcc, 0x8a, 0x21, 0x09, 0x0d, 0x48, 0x86, 0x09, + 0x0d, 0x18, 0xd2, 0x05, 0x54, 0x09, 0x26, 0x79, 0x9f, 0x09, 0x09, 0x78, + 0xc5, 0x39, 0xc7, 0x09, 0x26, 0x70, 0xc2, 0x04, 0x3d, 0x09, 0x09, 0xe9, + 0xc4, 0x81, 0x55, 0x09, 0x09, 0xe1, 0xc6, 0x45, 0xad, 0x09, 0x09, 0xd9, + 0xc3, 0x01, 0xce, 0x09, 0x09, 0xd1, 0xc2, 0x00, 0xd1, 0x09, 0x09, 0xc8, + 0xd4, 0x38, 0xa4, 0x09, 0x26, 0x69, 0xce, 0x6c, 0x44, 0x09, 0x09, 0x09, + 0x46, 0x03, 0x4d, 0x43, 0x11, 0x49, 0x46, 0x03, 0x4d, 0xc3, 0x11, 0x55, + 0xc4, 0x39, 0xc8, 0x09, 0x08, 0xe8, 0xc2, 0x01, 0xe2, 0x09, 0x09, 0x41, + 0x90, 0x09, 0x09, 0x38, 0x00, 0x43, 0x11, 0x70, 0x47, 0x03, 0x4c, 0x43, + 0x11, 0x7a, 0xc5, 0x39, 0xc7, 0x09, 0x08, 0x48, 0xcc, 0x83, 0xd9, 0x09, + 0x08, 0x31, 0xc8, 0xb6, 0xfa, 0x09, 0x08, 0x28, 0x97, 0x09, 0x08, 0x11, + 0x87, 0x09, 0x08, 0x08, 0x97, 0x09, 0x26, 0x51, 0xc3, 0x51, 0xdb, 0x09, + 0x07, 0xf8, 0xd6, 0x2a, 0xf6, 0x09, 0x26, 0x49, 0xcd, 0x7a, 0x11, 0x09, + 0x07, 0x78, 0x46, 0x03, 0x4d, 0xc3, 0x11, 0x98, 0xc8, 0xb6, 0x22, 0x09, + 0x07, 0x68, 0x00, 0x43, 0x11, 0xe1, 0x15, 0xc3, 0x11, 0xf3, 0xc3, 0x6c, + 0x49, 0x09, 0x1b, 0xb9, 0x17, 0xc3, 0x11, 0xfd, 0x0e, 0xc3, 0x12, 0x05, + 0x0d, 0xc3, 0x12, 0x14, 0xc8, 0x6a, 0x1e, 0x09, 0x05, 0x59, 0xc2, 0x00, + 0xd0, 0x09, 0x05, 0x4b, 0x03, 0x12, 0x23, 0xc9, 0x75, 0x04, 0x09, 0x05, + 0x3b, 0x03, 0x12, 0x29, 0xc3, 0x62, 0x19, 0x09, 0x05, 0x31, 0x83, 0x09, + 0x05, 0x12, 0x03, 0x12, 0x2f, 0xc2, 0x06, 0x62, 0x09, 0x25, 0xa1, 0xc2, + 0x00, 0x4e, 0x09, 0x25, 0x93, 0x03, 0x12, 0x3c, 0xc2, 0x00, 0xdb, 0x09, + 0x25, 0x83, 0x03, 0x12, 0x40, 0xc8, 0x6a, 0x1e, 0x09, 0x25, 0x79, 0xc2, + 0x00, 0x0a, 0x09, 0x25, 0x71, 0xc3, 0x02, 0x2c, 0x09, 0x25, 0x68, 0xc2, + 0x01, 0x7f, 0x09, 0x04, 0x91, 0xc2, 0x00, 0x65, 0x09, 0x04, 0x88, 0xc2, + 0x00, 0x4e, 0x09, 0x04, 0xd1, 0xc4, 0x5d, 0x99, 0x09, 0x04, 0xc2, 0x03, + 0x12, 0x44, 0x15, 0xc3, 0x12, 0x4a, 0xc2, 0x0b, 0x19, 0x09, 0x25, 0x31, + 0xc2, 0x00, 0xec, 0x09, 0x25, 0x29, 0x0f, 0xc3, 0x12, 0x56, 0x0e, 0xc3, + 0x12, 0x66, 0x0d, 0xc3, 0x12, 0x70, 0xc8, 0x6a, 0x1e, 0x09, 0x24, 0xc9, + 0x0a, 0xc3, 0x12, 0x7a, 0x09, 0xc3, 0x12, 0x82, 0xc5, 0x9e, 0x4b, 0x09, + 0x24, 0x91, 0x06, 0xc3, 0x12, 0x8d, 0x03, 0x43, 0x12, 0x99, 0xc3, 0x04, + 0x65, 0x09, 0x1b, 0xb1, 0xc4, 0x73, 0x32, 0x09, 0x03, 0xf8, 0xc5, 0x39, + 0xc7, 0x09, 0x04, 0x32, 0x03, 0x12, 0xa8, 0xc9, 0xaa, 0xdd, 0x09, 0x24, + 0x60, 0xc5, 0xdd, 0x2b, 0x09, 0x24, 0x59, 0xc3, 0x04, 0x2a, 0x09, 0x24, + 0x51, 0xc3, 0x04, 0x65, 0x09, 0x03, 0xa8, 0xc9, 0x51, 0xd5, 0x09, 0x24, + 0x49, 0x4d, 0x68, 0xcd, 0x43, 0x12, 0xae, 0xa1, 0x09, 0x03, 0x89, 0xa0, + 0x09, 0x03, 0x80, 0xc9, 0xaa, 0x20, 0x09, 0x24, 0x39, 0xc2, 0x05, 0x1d, + 0x09, 0x02, 0x79, 0xc2, 0x00, 0x03, 0x09, 0x02, 0x70, 0xc2, 0x02, 0x1c, + 0x09, 0x24, 0x31, 0xc2, 0x00, 0xec, 0x09, 0x24, 0x29, 0xc3, 0x58, 0xf1, + 0x09, 0x24, 0x20, 0x42, 0x01, 0xe2, 0xc3, 0x12, 0xef, 0xc3, 0x20, 0x18, + 0x09, 0x1b, 0x83, 0x03, 0x12, 0xfb, 0xcf, 0x65, 0xd0, 0x09, 0x00, 0xa1, + 0xc5, 0x03, 0x47, 0x09, 0x00, 0x91, 0x0b, 0xc3, 0x13, 0x01, 0xc2, 0x00, + 0xd0, 0x09, 0x00, 0x79, 0x42, 0x01, 0x30, 0xc3, 0x13, 0x0d, 0xc9, 0x75, + 0x04, 0x09, 0x00, 0x61, 0xc4, 0x05, 0x4d, 0x09, 0x00, 0x58, 0x83, 0x09, + 0x1b, 0x89, 0xc4, 0x38, 0xb4, 0x09, 0x00, 0xd9, 0xc4, 0x55, 0x25, 0x09, + 0x00, 0xd1, 0xca, 0xa7, 0xb0, 0x09, 0x00, 0xc9, 0xc9, 0x5d, 0x99, 0x09, + 0x00, 0xc1, 0xc5, 0xd8, 0xa8, 0x09, 0x00, 0xb8, 0x49, 0x0d, 0x2d, 0xc3, + 0x13, 0x17, 0xc9, 0xa1, 0x21, 0x09, 0x01, 0xd1, 0xc9, 0x83, 0xac, 0x09, + 0x01, 0xc8, 0xc7, 0x0b, 0x09, 0x09, 0x01, 0x89, 0xd5, 0x37, 0xeb, 0x09, + 0x01, 0x80, 0x8b, 0x09, 0x01, 0x31, 0xc3, 0xe1, 0x68, 0x09, 0x01, 0x28, + 0x00, 0x43, 0x13, 0x24, 0x97, 0x09, 0x14, 0x3b, 0x03, 0x13, 0x30, 0x8b, + 0x09, 0x14, 0x2b, 0x03, 0x13, 0x34, 0x87, 0x09, 0x14, 0x21, 0x04, 0xc3, + 0x13, 0x38, 0x83, 0x09, 0x14, 0x02, 0x03, 0x13, 0x40, 0xc4, 0x39, 0xc8, + 0x09, 0x0a, 0x51, 0x42, 0x00, 0x9a, 0xc3, 0x13, 0x44, 0xc2, 0x00, 0x2c, + 0x09, 0x0a, 0x41, 0xc3, 0xe3, 0x01, 0x09, 0x0a, 0x38, 0x84, 0x09, 0x22, + 0x19, 0x83, 0x09, 0x22, 0x10, 0x97, 0x09, 0x21, 0x89, 0x9f, 0x09, 0x21, + 0x38, 0xcd, 0x77, 0xe2, 0x09, 0x22, 0xa8, 0xcd, 0x77, 0xe2, 0x09, 0x22, + 0x98, 0x84, 0x09, 0x21, 0xf9, 0x83, 0x09, 0x21, 0xf0, 0xcd, 0x77, 0xe2, + 0x09, 0x21, 0xb8, 0xcd, 0x77, 0xe2, 0x09, 0x21, 0x78, 0xcd, 0x77, 0xe2, + 0x09, 0x21, 0x28, 0xcb, 0x97, 0xc9, 0x00, 0x27, 0x99, 0xc8, 0x20, 0xa9, + 0x00, 0x27, 0x88, 0xc9, 0x25, 0xfa, 0x00, 0x25, 0x69, 0xcb, 0x99, 0xc3, + 0x05, 0x34, 0x58, 0xc9, 0x25, 0xfa, 0x00, 0x29, 0x79, 0xcb, 0x99, 0xc3, + 0x00, 0x29, 0x09, 0xc4, 0x01, 0x23, 0x00, 0x28, 0x99, 0xc4, 0x14, 0xa6, + 0x00, 0x26, 0x30, 0xc9, 0x6d, 0x45, 0x00, 0x29, 0x49, 0xcb, 0x99, 0xc3, + 0x00, 0x29, 0x19, 0xc4, 0x14, 0xa6, 0x00, 0x26, 0x51, 0xc4, 0x01, 0x23, + 0x00, 0x26, 0x41, 0xc9, 0x25, 0xfa, 0x00, 0x25, 0x18, 0xc2, 0x01, 0x7f, + 0x00, 0x29, 0x59, 0x87, 0x05, 0x34, 0x48, 0xc2, 0x01, 0xc8, 0x05, 0x32, + 0x18, 0xcf, 0x69, 0x54, 0x00, 0x29, 0x38, 0x8b, 0x00, 0x21, 0xcb, 0x03, + 0x13, 0x4a, 0x97, 0x00, 0x22, 0xf0, 0x8e, 0x05, 0x33, 0x29, 0x8f, 0x05, + 0x33, 0x38, 0xc9, 0x25, 0xfa, 0x00, 0x29, 0x29, 0xcb, 0x99, 0xc3, 0x00, + 0x25, 0x38, 0xcf, 0x69, 0x54, 0x00, 0x25, 0xf8, 0xc9, 0x20, 0xa8, 0x00, + 0x27, 0xc9, 0xc8, 0xbd, 0x7a, 0x05, 0x32, 0x88, 0xc3, 0xe6, 0x68, 0x00, + 0x28, 0x79, 0xc3, 0xc7, 0xce, 0x00, 0x28, 0x69, 0xc3, 0xd0, 0xbd, 0x00, + 0x28, 0x59, 0xc3, 0xe5, 0xde, 0x00, 0x28, 0x49, 0x06, 0xc3, 0x13, 0x50, + 0xc3, 0xe5, 0x3c, 0x00, 0x28, 0x28, 0xc4, 0x01, 0x23, 0x00, 0x26, 0x21, + 0xc6, 0x01, 0x73, 0x00, 0x24, 0xf9, 0xc9, 0x25, 0xfa, 0x00, 0x24, 0xd9, + 0xcf, 0x2c, 0x35, 0x00, 0x24, 0xe8, 0xc6, 0x01, 0x73, 0x00, 0x27, 0xf9, + 0xc4, 0x01, 0x23, 0x00, 0x27, 0xe9, 0xc9, 0x25, 0xfa, 0x00, 0x25, 0x98, + 0xc6, 0x01, 0x73, 0x00, 0x24, 0x9b, 0x03, 0x13, 0x60, 0xc9, 0x25, 0xfa, + 0x00, 0x27, 0xb9, 0xc6, 0x5e, 0xdc, 0x00, 0x24, 0x89, 0xcb, 0x99, 0xc3, + 0x00, 0x24, 0xa8, 0xcf, 0x6b, 0x16, 0x00, 0x27, 0x58, 0xc5, 0x1d, 0x88, + 0x00, 0x26, 0xb9, 0xc5, 0x1f, 0x0c, 0x00, 0x22, 0x80, 0x83, 0x05, 0x32, + 0x39, 0x46, 0x30, 0x28, 0x43, 0x13, 0x66, 0xc8, 0x20, 0xa9, 0x00, 0x26, + 0xf9, 0xc8, 0x25, 0xfb, 0x00, 0x24, 0xc8, 0x46, 0x00, 0x8b, 0x43, 0x13, + 0x86, 0xcf, 0x2c, 0x35, 0x00, 0x25, 0xc9, 0x06, 0x43, 0x13, 0x90, 0xc9, + 0x25, 0xfa, 0x00, 0x29, 0x71, 0xcb, 0x99, 0xc3, 0x00, 0x29, 0x01, 0xc4, + 0x01, 0x23, 0x00, 0x28, 0x91, 0xc4, 0x14, 0xa6, 0x00, 0x26, 0x28, 0xc9, + 0x6d, 0x45, 0x00, 0x29, 0x41, 0xcb, 0x99, 0xc3, 0x00, 0x29, 0x11, 0xc4, + 0x14, 0xa6, 0x00, 0x26, 0x49, 0xc4, 0x01, 0x23, 0x00, 0x26, 0x39, 0xc9, + 0x25, 0xfa, 0x00, 0x25, 0x10, 0xc2, 0x01, 0x7f, 0x00, 0x29, 0x51, 0x87, + 0x05, 0x34, 0x40, 0xc2, 0x01, 0xc8, 0x05, 0x32, 0x10, 0xcf, 0x69, 0x54, + 0x00, 0x29, 0x30, 0x8b, 0x00, 0x20, 0xcb, 0x03, 0x13, 0x9c, 0x97, 0x00, + 0x20, 0x70, 0x8e, 0x05, 0x33, 0x21, 0x8f, 0x05, 0x33, 0x30, 0xc9, 0x25, + 0xfa, 0x00, 0x29, 0x21, 0xcb, 0x99, 0xc3, 0x00, 0x25, 0x30, 0xcf, 0x69, + 0x54, 0x00, 0x25, 0xf0, 0xc9, 0x20, 0xa8, 0x00, 0x27, 0xc1, 0xc8, 0xbd, + 0x7a, 0x05, 0x32, 0x80, 0xc3, 0xe6, 0x68, 0x00, 0x28, 0x71, 0xc3, 0xc7, + 0xce, 0x00, 0x28, 0x61, 0xc3, 0xd0, 0xbd, 0x00, 0x28, 0x51, 0xc3, 0xe5, + 0xde, 0x00, 0x28, 0x41, 0x06, 0xc3, 0x13, 0xa2, 0xc3, 0xe5, 0x3c, 0x00, + 0x28, 0x20, 0xc4, 0x01, 0x23, 0x00, 0x26, 0x19, 0xc9, 0x25, 0xfa, 0x00, + 0x24, 0xd1, 0xcf, 0x2c, 0x35, 0x00, 0x24, 0xe1, 0xc6, 0x01, 0x73, 0x00, + 0x24, 0xf0, 0xc6, 0x01, 0x73, 0x00, 0x27, 0xf1, 0xc4, 0x01, 0x23, 0x00, + 0x27, 0xe1, 0xc9, 0x25, 0xfa, 0x00, 0x25, 0x90, 0xc6, 0x01, 0x73, 0x00, + 0x24, 0x93, 0x03, 0x13, 0xb2, 0xc9, 0x25, 0xfa, 0x00, 0x27, 0xb1, 0xc6, + 0x5e, 0xdc, 0x00, 0x24, 0x81, 0xcb, 0x99, 0xc3, 0x00, 0x24, 0xa0, 0x06, + 0xc3, 0x13, 0xb8, 0xcf, 0x2c, 0x35, 0x00, 0x25, 0xc0, 0xcb, 0x97, 0xc9, + 0x00, 0x27, 0x91, 0xc8, 0x20, 0xa9, 0x00, 0x27, 0x80, 0xcf, 0x6b, 0x16, + 0x00, 0x27, 0x50, 0xc5, 0x1d, 0x88, 0x00, 0x26, 0xb1, 0xc5, 0x1f, 0x0c, + 0x00, 0x20, 0x00, 0x83, 0x05, 0x32, 0x31, 0x46, 0x30, 0x28, 0x43, 0x13, + 0xc4, 0xc8, 0x20, 0xa9, 0x00, 0x26, 0xf1, 0xc8, 0x25, 0xfb, 0x00, 0x24, + 0xc0, 0x46, 0x00, 0x8b, 0x43, 0x13, 0xe4, 0xc9, 0x25, 0xfa, 0x00, 0x25, + 0x61, 0xcb, 0x99, 0xc3, 0x05, 0x34, 0x50, 0xc5, 0x69, 0xa7, 0x00, 0x6c, + 0x39, 0xc6, 0xd2, 0x3b, 0x00, 0x6c, 0x40, 0xc7, 0xc6, 0x32, 0x00, 0x6c, + 0xd1, 0xc7, 0xca, 0x29, 0x00, 0x6c, 0xe1, 0xc7, 0xc7, 0xdd, 0x00, 0x6d, + 0x01, 0xc7, 0xc7, 0xc1, 0x00, 0x6d, 0x11, 0x16, 0xc3, 0x13, 0xee, 0x06, + 0xc3, 0x13, 0xfa, 0xc7, 0xc8, 0x1c, 0x00, 0x6d, 0xa1, 0xc7, 0x8e, 0x9b, + 0x00, 0x6d, 0xb0, 0xc5, 0x69, 0xa7, 0x00, 0x6c, 0x69, 0xc6, 0xcc, 0xd1, + 0x00, 0x6c, 0x70, 0xc5, 0x69, 0xa7, 0x00, 0x6c, 0x79, 0xc6, 0xcc, 0xd1, + 0x00, 0x6c, 0x80, 0x4a, 0x9b, 0x62, 0xc3, 0x14, 0x06, 0xc5, 0x69, 0xa7, + 0x00, 0x6d, 0xc0, 0xc7, 0xc4, 0xdb, 0x00, 0x6d, 0x59, 0xc7, 0xc1, 0xa8, + 0x00, 0x6e, 0x11, 0xc7, 0xc2, 0x18, 0x00, 0x6e, 0x28, 0xc7, 0xc4, 0x25, + 0x00, 0x6d, 0x61, 0xc6, 0x8e, 0x9c, 0x00, 0x6d, 0x98, 0xd2, 0x4d, 0xc3, + 0x00, 0x6d, 0x29, 0xc5, 0x69, 0xa7, 0x00, 0x6e, 0x08, 0x45, 0xd7, 0x40, + 0x43, 0x14, 0x32, 0xa3, 0x0e, 0xd5, 0x79, 0xa2, 0x0e, 0xd5, 0x71, 0xa1, + 0x0e, 0xd5, 0x69, 0xa0, 0x0e, 0xd5, 0x61, 0x9f, 0x0e, 0xd5, 0x59, 0x9e, + 0x0e, 0xd5, 0x51, 0x9d, 0x0e, 0xd5, 0x48, 0xcb, 0x57, 0x45, 0x0e, 0xcf, + 0x0b, 0x03, 0x14, 0x44, 0xc6, 0x00, 0x58, 0x0e, 0xcf, 0x03, 0x03, 0x14, + 0x4a, 0xc6, 0x24, 0x3b, 0x0e, 0xce, 0xfa, 0x03, 0x14, 0x50, 0x48, 0x0c, + 0x8c, 0xc3, 0x14, 0x56, 0xc6, 0x00, 0x58, 0x0e, 0xcd, 0x1b, 0x03, 0x14, + 0x60, 0xc6, 0x24, 0x3b, 0x0e, 0xcd, 0x12, 0x03, 0x14, 0x66, 0xc9, 0x65, + 0x4f, 0x0e, 0xc8, 0xf9, 0x45, 0x03, 0x14, 0x43, 0x14, 0x6c, 0xc8, 0x3b, + 0xec, 0x0e, 0xc8, 0xe9, 0xc6, 0x24, 0x3b, 0x0e, 0xc8, 0xd8, 0xc8, 0x3b, + 0xec, 0x0e, 0xc8, 0xc9, 0xc6, 0x24, 0x3b, 0x0e, 0xc8, 0xb8, 0xc7, 0xc3, + 0x0d, 0x0e, 0xd4, 0x21, 0xc4, 0x00, 0x2d, 0x0e, 0xd4, 0x08, 0xa4, 0x0e, + 0xd3, 0xe9, 0xa3, 0x0e, 0xd3, 0xe1, 0xa2, 0x0e, 0xd3, 0xd9, 0xa1, 0x0e, + 0xd3, 0xd1, 0xa0, 0x0e, 0xd3, 0xc9, 0x9f, 0x0e, 0xd3, 0xc1, 0x9e, 0x0e, + 0xd3, 0xb8, 0xd0, 0x58, 0x82, 0x0e, 0xd2, 0xa9, 0xd0, 0x5a, 0xe2, 0x0e, + 0xd2, 0xa0, 0xcb, 0x93, 0xca, 0x0e, 0xd3, 0x99, 0xd0, 0x5b, 0x12, 0x0e, + 0xd3, 0x90, 0xcc, 0x35, 0xa8, 0x0e, 0xd3, 0x01, 0xcc, 0x5b, 0x22, 0x0e, + 0xd2, 0xf8, 0xd5, 0x35, 0x9f, 0x0e, 0xd2, 0xe1, 0xcc, 0x86, 0x31, 0x0e, + 0xd2, 0xd8, 0xc9, 0xb0, 0x50, 0x0e, 0xd3, 0x39, 0x43, 0x01, 0x55, 0xc3, + 0x14, 0x78, 0xc8, 0x51, 0x1b, 0x0e, 0xd3, 0x10, 0x4a, 0x18, 0xa5, 0xc3, + 0x14, 0x8a, 0x4b, 0x40, 0xb3, 0x43, 0x14, 0x9c, 0xc6, 0x2c, 0x2e, 0x0e, + 0xca, 0xa1, 0xc6, 0x00, 0x58, 0x0e, 0xca, 0x99, 0xc6, 0x24, 0x3b, 0x0e, + 0xca, 0x90, 0x4b, 0x40, 0xb3, 0xc3, 0x14, 0xae, 0x4a, 0x18, 0xa5, 0x43, + 0x14, 0xc0, 0x05, 0xc3, 0x14, 0xd2, 0xc8, 0x45, 0x27, 0x0e, 0xd1, 0x0a, + 0x03, 0x14, 0xde, 0xc6, 0x3b, 0x9c, 0x0e, 0xd1, 0x41, 0xc8, 0x45, 0x27, + 0x0e, 0xd1, 0x22, 0x03, 0x14, 0xe2, 0xc8, 0x3b, 0xec, 0x0e, 0xd0, 0xc1, + 0xc6, 0x24, 0x3b, 0x0e, 0xd0, 0xb8, 0xcd, 0x76, 0xd1, 0x0e, 0xd0, 0xe1, + 0xc5, 0x05, 0x74, 0x0e, 0xd0, 0xd0, 0xc6, 0x07, 0xa1, 0x0e, 0xd0, 0xd9, + 0xc4, 0x05, 0x75, 0x0e, 0xd0, 0xc8, 0xc3, 0x1d, 0xb1, 0x0e, 0xc8, 0x1b, + 0x03, 0x14, 0xe6, 0xc3, 0x00, 0xfd, 0x0e, 0xc2, 0xd2, 0x03, 0x14, 0xea, + 0x00, 0x43, 0x14, 0xee, 0xc4, 0x09, 0x39, 0x0e, 0xc3, 0xeb, 0x03, 0x15, + 0x0c, 0xc3, 0x01, 0x24, 0x0e, 0xc3, 0x5a, 0x03, 0x15, 0x10, 0x17, 0xc3, + 0x15, 0x14, 0xc3, 0xc9, 0xd8, 0x0e, 0xc3, 0x33, 0x03, 0x15, 0x24, 0xc5, + 0x02, 0xd2, 0x0e, 0xc3, 0xb2, 0x03, 0x15, 0x28, 0x00, 0x43, 0x15, 0x2c, + 0xc7, 0x05, 0x79, 0x0e, 0xd0, 0x31, 0x02, 0x43, 0x15, 0x50, 0x54, 0x3a, + 0x70, 0xc3, 0x15, 0x5c, 0xc6, 0xc1, 0xb7, 0x0e, 0xc9, 0x48, 0x59, 0x20, + 0x2b, 0xc3, 0x15, 0x68, 0x44, 0x1f, 0x0e, 0x43, 0x15, 0x74, 0x46, 0x17, + 0x14, 0xc3, 0x15, 0x84, 0x47, 0x01, 0xdb, 0xc3, 0x15, 0x90, 0x46, 0x03, + 0x13, 0x43, 0x15, 0x9c, 0xcf, 0x64, 0x4a, 0x0e, 0xcf, 0x11, 0x46, 0x2d, + 0x11, 0x43, 0x15, 0xa8, 0xc5, 0x03, 0x13, 0x0e, 0xce, 0xd9, 0x48, 0x20, + 0x37, 0x43, 0x15, 0xb4, 0xc5, 0x03, 0x13, 0x0e, 0xce, 0xd1, 0x48, 0x20, + 0x37, 0x43, 0x15, 0xc0, 0x45, 0x0e, 0xd5, 0xc3, 0x15, 0xcc, 0xc4, 0x6b, + 0x03, 0x0e, 0xcb, 0xb9, 0x46, 0x35, 0x01, 0xc3, 0x15, 0xed, 0xc4, 0x0d, + 0x21, 0x0e, 0xcb, 0x70, 0xc5, 0x17, 0x14, 0x0e, 0xcc, 0x01, 0xc6, 0x01, + 0xdb, 0x0e, 0xcb, 0xf9, 0xc5, 0x03, 0x13, 0x0e, 0xcb, 0xf0, 0xc5, 0x17, + 0x14, 0x0e, 0xcb, 0xe9, 0xc6, 0x01, 0xdb, 0x0e, 0xcb, 0xe1, 0xc5, 0x03, + 0x13, 0x0e, 0xcb, 0xd8, 0x43, 0x32, 0x37, 0xc3, 0x15, 0xff, 0xc3, 0x02, + 0x39, 0x0e, 0xcb, 0x98, 0x4c, 0x8b, 0xc5, 0xc3, 0x16, 0x11, 0xca, 0x91, + 0x42, 0x0e, 0xcb, 0x81, 0xd1, 0x51, 0x12, 0x0e, 0xcb, 0x78, 0xcb, 0x57, + 0x45, 0x0e, 0xcb, 0x63, 0x03, 0x16, 0x1d, 0xca, 0x91, 0x42, 0x0e, 0xcb, + 0x59, 0xc8, 0x45, 0x27, 0x0e, 0xcb, 0x50, 0x47, 0x3a, 0x70, 0xc3, 0x16, + 0x23, 0xc6, 0xc1, 0xb7, 0x0e, 0xc9, 0x40, 0x52, 0x47, 0xed, 0xc3, 0x16, + 0x2f, 0x44, 0x1f, 0x0e, 0x43, 0x16, 0x3b, 0x47, 0x01, 0xdb, 0xc3, 0x16, + 0x4d, 0x46, 0x03, 0x13, 0x43, 0x16, 0x59, 0x48, 0x20, 0x37, 0xc3, 0x16, + 0x65, 0xc5, 0x03, 0x13, 0x0e, 0xcc, 0xab, 0x03, 0x16, 0x71, 0xc5, 0x17, + 0x14, 0x0e, 0xcc, 0xb9, 0xc6, 0x01, 0xdb, 0x0e, 0xcc, 0xb0, 0x48, 0x20, + 0x37, 0xc3, 0x16, 0x77, 0xc5, 0x17, 0x14, 0x0e, 0xcc, 0xa1, 0xc6, 0x01, + 0xdb, 0x0e, 0xcc, 0x99, 0xc5, 0x03, 0x13, 0x0e, 0xcc, 0x90, 0x44, 0x0e, + 0xd5, 0xc3, 0x16, 0x83, 0x45, 0x6b, 0x03, 0xc3, 0x16, 0x8d, 0x46, 0x35, + 0x01, 0xc3, 0x16, 0x9f, 0xc4, 0x0d, 0x21, 0x0e, 0xc9, 0x98, 0xc6, 0x64, + 0x4a, 0x0e, 0xcd, 0x29, 0x46, 0x2d, 0x11, 0x43, 0x16, 0xb7, 0xc5, 0x17, + 0x14, 0x0e, 0xca, 0x51, 0xc6, 0x01, 0xdb, 0x0e, 0xca, 0x49, 0xc5, 0x03, + 0x13, 0x0e, 0xca, 0x40, 0xc5, 0x17, 0x14, 0x0e, 0xca, 0x39, 0xc6, 0x01, + 0xdb, 0x0e, 0xca, 0x31, 0xc5, 0x03, 0x13, 0x0e, 0xca, 0x28, 0x43, 0x32, + 0x37, 0xc3, 0x16, 0xc3, 0x44, 0x0a, 0x0f, 0x43, 0x16, 0xd5, 0xcb, 0x57, + 0x45, 0x0e, 0xc9, 0xb3, 0x03, 0x16, 0xe7, 0xca, 0x91, 0x42, 0x0e, 0xc9, + 0xa9, 0xd1, 0x51, 0x12, 0x0e, 0xc9, 0xa0, 0xcb, 0x57, 0x45, 0x0e, 0xc9, + 0x8b, 0x03, 0x16, 0xed, 0xca, 0x91, 0x42, 0x0e, 0xc9, 0x81, 0xc8, 0x45, + 0x27, 0x0e, 0xc9, 0x78, 0x48, 0xbf, 0xc2, 0xc3, 0x16, 0xf3, 0x45, 0xd5, + 0xf1, 0x43, 0x17, 0x08, 0xc5, 0x17, 0x14, 0x0e, 0xca, 0xdb, 0x03, 0x17, + 0x1d, 0xc6, 0x01, 0xdb, 0x0e, 0xca, 0xd1, 0xc5, 0x03, 0x13, 0x0e, 0xca, + 0xc8, 0xc5, 0x17, 0x14, 0x0e, 0xca, 0xbb, 0x03, 0x17, 0x23, 0xc6, 0x01, + 0xdb, 0x0e, 0xca, 0xb1, 0xc5, 0x03, 0x13, 0x0e, 0xca, 0xa8, 0x45, 0x11, + 0x17, 0xc3, 0x17, 0x29, 0xca, 0x65, 0x4e, 0x0e, 0xc9, 0x18, 0xc7, 0xc1, + 0xb6, 0x0e, 0xd1, 0xe9, 0xc7, 0x27, 0xb2, 0x0e, 0xd1, 0xe1, 0xc7, 0x81, + 0x92, 0x0e, 0xd1, 0xd8, 0xc6, 0xcf, 0x23, 0x0e, 0xd2, 0x91, 0xc7, 0x27, + 0xb2, 0x0e, 0xd2, 0x88, 0xc8, 0xbe, 0x0a, 0x0e, 0xd2, 0x79, 0xc7, 0x27, + 0xb2, 0x0e, 0xd2, 0x70, 0x00, 0x43, 0x17, 0x3b, 0x00, 0x43, 0x17, 0x47, + 0xc4, 0x05, 0x75, 0x0e, 0xd2, 0x19, 0xc8, 0xbe, 0x0a, 0x0e, 0xd2, 0x10, + 0xc4, 0x05, 0x75, 0x0e, 0xd2, 0x01, 0xc8, 0xbe, 0x0a, 0x0e, 0xd1, 0xf8, + 0xcc, 0x57, 0x44, 0x0e, 0xcf, 0xe0, 0x8e, 0x08, 0xac, 0x48, 0x94, 0x08, + 0xac, 0x38, 0x4c, 0x8b, 0x71, 0xc3, 0x17, 0x53, 0xd2, 0x4b, 0x3b, 0x08, + 0xae, 0xa1, 0xd3, 0x44, 0xb5, 0x08, 0xae, 0x99, 0x43, 0x01, 0x92, 0xc3, + 0x17, 0x65, 0xd0, 0x58, 0x22, 0x08, 0xae, 0x89, 0x50, 0x5d, 0x32, 0x43, + 0x17, 0x71, 0xca, 0x83, 0x03, 0x08, 0xae, 0x80, 0x94, 0x05, 0x44, 0x48, + 0x8e, 0x05, 0x44, 0x58, 0x9f, 0x08, 0x8e, 0xf9, 0x9e, 0x08, 0x8e, 0xf0, + 0xc7, 0x7a, 0x7f, 0x08, 0x8e, 0x09, 0xc7, 0x14, 0x39, 0x08, 0x8c, 0x08, + 0xc4, 0x1e, 0x97, 0x08, 0x8e, 0x01, 0xc5, 0x40, 0xe7, 0x08, 0x8c, 0x10, + 0xc4, 0x18, 0x10, 0x08, 0x8e, 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0x8e, 0xb0, + 0xc3, 0x0d, 0x14, 0x08, 0x8e, 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0x8e, 0xa0, + 0xc4, 0x02, 0xde, 0x08, 0x8e, 0x99, 0xc2, 0x02, 0xa0, 0x08, 0x8e, 0x90, + 0x97, 0x08, 0x8d, 0xf9, 0x8b, 0x08, 0x8d, 0xe9, 0x83, 0x08, 0x8d, 0x98, + 0x8e, 0x08, 0x8d, 0xd3, 0x03, 0x17, 0x83, 0x94, 0x08, 0x8d, 0xc2, 0x03, + 0x17, 0x87, 0x97, 0x08, 0x8d, 0xb8, 0x8b, 0x08, 0x8d, 0xa8, 0x8e, 0x08, + 0x8c, 0x5b, 0x03, 0x17, 0x8b, 0x94, 0x08, 0x8c, 0x4a, 0x03, 0x17, 0x8f, + 0xc2, 0x00, 0xd0, 0x08, 0x8c, 0xf1, 0x83, 0x08, 0x8c, 0xe8, 0xc2, 0x00, + 0xd0, 0x08, 0x8c, 0xe1, 0x83, 0x08, 0x8c, 0xd8, 0x45, 0x00, 0x27, 0xc3, + 0x17, 0x93, 0xce, 0x66, 0x67, 0x01, 0x2f, 0x38, 0x45, 0x00, 0x49, 0xc3, + 0x17, 0x9f, 0x46, 0x00, 0x2c, 0x43, 0x17, 0xab, 0xcc, 0x24, 0x47, 0x01, + 0x17, 0x29, 0xc8, 0x07, 0x5f, 0x01, 0x14, 0x90, 0xcc, 0x24, 0x47, 0x01, + 0x17, 0x21, 0xc8, 0x07, 0x5f, 0x01, 0x14, 0x88, 0xc7, 0x0b, 0x09, 0x01, + 0x9d, 0x01, 0xc5, 0xd9, 0x11, 0x01, 0x9d, 0x20, 0xc8, 0x0b, 0x08, 0x01, + 0x9d, 0x78, 0xc2, 0x17, 0x99, 0x01, 0x9a, 0x09, 0x90, 0x01, 0x9a, 0x10, + 0xc7, 0x0b, 0x09, 0x01, 0x9b, 0xc1, 0xc5, 0xd9, 0x11, 0x01, 0x9b, 0xc8, + 0xc5, 0xd9, 0x34, 0x01, 0x99, 0x59, 0xc2, 0x00, 0x16, 0x01, 0x99, 0x60, + 0xc3, 0x9f, 0x30, 0x01, 0x99, 0x79, 0x91, 0x01, 0x99, 0x80, 0xc3, 0xa9, + 0x98, 0x01, 0x99, 0xc1, 0xc2, 0x06, 0x62, 0x01, 0x99, 0xd0, 0xc5, 0xd6, + 0xff, 0x01, 0x97, 0xf9, 0xc6, 0xd1, 0xff, 0x01, 0x9b, 0xd9, 0xc6, 0xd1, + 0xcf, 0x01, 0x9b, 0xe1, 0xc7, 0xc5, 0x52, 0x01, 0x9b, 0xe9, 0xc5, 0xdb, + 0x05, 0x01, 0x9b, 0xf0, 0xc4, 0x89, 0x91, 0x01, 0x98, 0x61, 0xc4, 0xe4, + 0x6f, 0x01, 0x98, 0x68, 0x05, 0xc3, 0x17, 0xb7, 0xc7, 0x0b, 0x09, 0x01, + 0x9d, 0x10, 0xc4, 0xdd, 0xdf, 0x01, 0x9a, 0x19, 0xc2, 0x17, 0x99, 0x01, + 0x9a, 0x20, 0xc5, 0xd8, 0x26, 0x01, 0x9a, 0x50, 0xc3, 0x0f, 0xd9, 0x01, + 0x9a, 0x60, 0xc2, 0x02, 0x2e, 0x01, 0x9e, 0x09, 0xc5, 0x04, 0x34, 0x01, + 0x9d, 0x3a, 0x03, 0x17, 0xc3, 0xc7, 0x0b, 0x09, 0x01, 0x9c, 0xf9, 0xc5, + 0xd9, 0x11, 0x01, 0x9d, 0x18, 0xc2, 0x00, 0xbf, 0x01, 0x3e, 0x79, 0xc3, + 0x02, 0x9b, 0x01, 0x3e, 0x70, 0x95, 0x0f, 0x8a, 0x11, 0x94, 0x0f, 0x8a, + 0x09, 0x93, 0x0f, 0x8a, 0x01, 0x92, 0x0f, 0x89, 0xf9, 0x91, 0x0f, 0x89, + 0xf1, 0x90, 0x0f, 0x89, 0xe9, 0x8f, 0x0f, 0x89, 0xe1, 0x8e, 0x0f, 0x89, + 0xd9, 0x8d, 0x0f, 0x89, 0xd1, 0x8c, 0x0f, 0x89, 0xc9, 0x8b, 0x0f, 0x89, + 0xc1, 0x8a, 0x0f, 0x89, 0xb9, 0x89, 0x0f, 0x89, 0xb1, 0x88, 0x0f, 0x89, + 0xa9, 0x87, 0x0f, 0x89, 0xa1, 0x86, 0x0f, 0x89, 0x99, 0x83, 0x0f, 0x89, + 0x81, 0x84, 0x0f, 0x89, 0x89, 0x85, 0x0f, 0x89, 0x91, 0x96, 0x0f, 0x8a, + 0x19, 0x97, 0x0f, 0x8a, 0x21, 0x98, 0x0f, 0x8a, 0x29, 0x99, 0x0f, 0x8a, + 0x31, 0x9a, 0x0f, 0x8a, 0x39, 0x9b, 0x0f, 0x8a, 0x41, 0x9c, 0x0f, 0x8a, + 0x48, 0xc3, 0xe5, 0xbd, 0x0f, 0x91, 0xd9, 0xc3, 0xe5, 0xe1, 0x0f, 0x91, + 0x58, 0xc3, 0xe5, 0x8d, 0x0f, 0x91, 0xd1, 0x1f, 0xc3, 0x17, 0xc9, 0x21, + 0xc3, 0x17, 0xdb, 0x20, 0xc3, 0x17, 0xe7, 0xc3, 0xe4, 0xdf, 0x0f, 0x91, + 0x61, 0xc3, 0xe5, 0x09, 0x0f, 0x91, 0x21, 0xc3, 0xe5, 0xb7, 0x0f, 0x90, + 0xf1, 0xc3, 0xe6, 0x35, 0x0f, 0x90, 0xe9, 0x26, 0xc3, 0x17, 0xf3, 0xc3, + 0xe5, 0x42, 0x0f, 0x90, 0x88, 0x22, 0xc3, 0x17, 0xff, 0xc3, 0xe5, 0x12, + 0x0f, 0x91, 0x99, 0xc3, 0xe5, 0x1b, 0x0f, 0x91, 0x91, 0xc3, 0xe4, 0xf1, + 0x0f, 0x91, 0x09, 0xc3, 0xe5, 0xf6, 0x0f, 0x90, 0xd0, 0x42, 0xe4, 0xef, + 0xc3, 0x18, 0x0b, 0xc3, 0xe5, 0x27, 0x0f, 0x91, 0xa9, 0x1f, 0xc3, 0x18, + 0x13, 0x20, 0xc3, 0x18, 0x25, 0xc3, 0xe6, 0x38, 0x0f, 0x91, 0x31, 0x22, + 0xc3, 0x18, 0x31, 0xc3, 0xe5, 0x48, 0x0f, 0x90, 0xc8, 0xc3, 0xe4, 0xee, + 0x0f, 0x91, 0x81, 0xc3, 0xe5, 0x7b, 0x0f, 0x91, 0x19, 0xc3, 0xe5, 0x1e, + 0x0f, 0x90, 0xb0, 0xc2, 0x81, 0x20, 0x0f, 0x91, 0x69, 0x1d, 0xc3, 0x18, + 0x3d, 0xc2, 0xd5, 0x96, 0x0f, 0x90, 0xc1, 0xc2, 0x8c, 0x54, 0x0f, 0x90, + 0xa0, 0xc4, 0x02, 0xde, 0x01, 0x20, 0x99, 0xc2, 0x02, 0xa0, 0x01, 0x20, + 0x90, 0xcb, 0x98, 0xd1, 0x01, 0x20, 0x23, 0x03, 0x18, 0x45, 0xc3, 0x09, + 0x3f, 0x01, 0x20, 0x18, 0xc2, 0x00, 0xdb, 0x00, 0x43, 0x49, 0x83, 0x00, + 0x43, 0x40, 0x10, 0xc3, 0x18, 0x4b, 0xc2, 0x19, 0x2c, 0x00, 0x43, 0x11, + 0xc2, 0x01, 0x30, 0x00, 0x43, 0x00, 0xc4, 0x00, 0x49, 0x00, 0x38, 0x49, + 0xc5, 0x00, 0x2c, 0x00, 0x38, 0x48, 0xcf, 0x33, 0xad, 0x01, 0x56, 0x20, + 0xcb, 0x0e, 0xbd, 0x01, 0x56, 0x31, 0xce, 0x33, 0x92, 0x01, 0x56, 0x41, + 0xcf, 0x6a, 0x8f, 0x01, 0x56, 0x51, 0xcc, 0x24, 0x47, 0x01, 0x56, 0x60, + 0xc5, 0xd4, 0xcf, 0x00, 0xdc, 0x11, 0xc5, 0xd9, 0xcf, 0x00, 0xdc, 0x08, + 0xca, 0x6c, 0x10, 0x0f, 0xb0, 0x29, 0xcc, 0x1d, 0x4a, 0x0f, 0xb0, 0x21, + 0xd3, 0x41, 0x38, 0x0f, 0xb0, 0x30, 0x45, 0x02, 0x9a, 0x43, 0x18, 0x55, + 0xc7, 0x80, 0x70, 0x01, 0x17, 0xf1, 0x48, 0x00, 0x5f, 0x43, 0x18, 0x61, + 0xc7, 0x80, 0x70, 0x01, 0x17, 0xb9, 0x48, 0x00, 0x5f, 0x43, 0x18, 0x67, + 0x00, 0x43, 0x18, 0x6d, 0x0b, 0xc3, 0x18, 0x79, 0xc3, 0x09, 0x9e, 0x01, + 0x0b, 0x98, 0x19, 0xc3, 0x18, 0x88, 0xc2, 0x00, 0xc4, 0x01, 0x0b, 0xc9, + 0xc4, 0x02, 0xde, 0x01, 0x0b, 0x90, 0xc5, 0x66, 0xb1, 0x01, 0x0b, 0xd1, + 0xc4, 0x00, 0x2d, 0x01, 0x0b, 0xb8, 0xc4, 0x18, 0x10, 0x01, 0x0b, 0xb1, + 0xc2, 0x22, 0xcc, 0x01, 0x0b, 0xa8, 0xce, 0x69, 0x64, 0x07, 0xf2, 0x19, + 0xd2, 0x21, 0x89, 0x07, 0xf0, 0xb8, 0xcc, 0x00, 0x33, 0x07, 0xf1, 0xb9, + 0xcd, 0x69, 0x65, 0x07, 0xf2, 0x08, 0xc4, 0x00, 0x3b, 0x07, 0xf0, 0xc1, + 0xc4, 0xe0, 0xeb, 0x07, 0xf0, 0xc0, 0x9d, 0x0f, 0x87, 0x51, 0x9e, 0x0f, + 0x87, 0x59, 0x9f, 0x0f, 0x87, 0x61, 0xa0, 0x0f, 0x87, 0x69, 0xa1, 0x0f, + 0x87, 0x71, 0xa2, 0x0f, 0x87, 0x79, 0xa3, 0x0f, 0x87, 0x81, 0xa4, 0x0f, + 0x87, 0x89, 0xa5, 0x0f, 0x87, 0x91, 0xa6, 0x0f, 0x87, 0x98, 0x9d, 0x0f, + 0x87, 0xa1, 0x9e, 0x0f, 0x87, 0xa8, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x21, + 0xc6, 0x78, 0x78, 0x0f, 0x85, 0xa1, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x21, + 0xc5, 0xdd, 0x49, 0x0f, 0x86, 0xa0, 0xcc, 0x82, 0x4d, 0x01, 0x51, 0x39, + 0xd1, 0x4b, 0xde, 0x01, 0x51, 0x10, 0xc5, 0x05, 0x02, 0x01, 0x51, 0x31, + 0xc5, 0x00, 0xd4, 0x01, 0x51, 0x20, 0x83, 0x01, 0x90, 0xb1, 0x97, 0x01, + 0x90, 0xe0, 0x89, 0x08, 0xd7, 0x18, 0xc4, 0x18, 0x12, 0x08, 0x43, 0xf9, + 0x91, 0x08, 0x43, 0xd0, 0xc2, 0x39, 0x8b, 0x08, 0x43, 0xb1, 0xc3, 0x1e, + 0x1b, 0x08, 0x43, 0x40, 0xc3, 0x11, 0xef, 0x08, 0x43, 0xa9, 0x03, 0x43, + 0x18, 0x92, 0xc3, 0x16, 0x5a, 0x08, 0x43, 0x81, 0xc4, 0x36, 0xb5, 0x08, + 0x43, 0x00, 0xc2, 0x00, 0x8e, 0x08, 0x43, 0x38, 0xc3, 0x03, 0x15, 0x01, + 0x37, 0xc9, 0xc9, 0xa8, 0x8b, 0x0f, 0xa3, 0x88, 0xc8, 0x7a, 0x7e, 0x05, + 0x47, 0xb9, 0x16, 0xc3, 0x18, 0x9e, 0xc6, 0x1e, 0x95, 0x05, 0x47, 0x98, + 0x91, 0x00, 0x48, 0x91, 0x87, 0x00, 0x48, 0x71, 0x83, 0x00, 0x48, 0x20, + 0x8e, 0x00, 0x4b, 0x08, 0x94, 0x00, 0x4b, 0x00, 0xc2, 0x00, 0xd0, 0x00, + 0x4a, 0xe1, 0x83, 0x00, 0x4b, 0xf0, 0x91, 0x00, 0x48, 0x89, 0x87, 0x00, + 0x48, 0x69, 0x83, 0x00, 0x4b, 0x90, 0x8a, 0x08, 0x20, 0x18, 0x91, 0x08, + 0x20, 0x28, 0x8a, 0x08, 0x20, 0x48, 0x91, 0x08, 0x20, 0x58, 0x8a, 0x08, + 0x20, 0xf8, 0x89, 0x08, 0x21, 0x28, 0x8a, 0x08, 0x21, 0x58, 0x91, 0x08, + 0x21, 0x68, 0x8a, 0x08, 0x21, 0x88, 0x91, 0x08, 0x21, 0x98, 0x8a, 0x08, + 0x22, 0x38, 0x89, 0x08, 0x22, 0x68, 0xca, 0x03, 0xdd, 0x0f, 0xc4, 0x99, + 0x48, 0x01, 0x9a, 0x43, 0x18, 0xaa, 0xe0, 0x05, 0xc7, 0x01, 0x5f, 0x78, + 0xc5, 0x01, 0x4a, 0x01, 0x0e, 0x19, 0x00, 0x43, 0x18, 0xc5, 0xc5, 0x01, + 0x4a, 0x01, 0x0e, 0x11, 0x00, 0x43, 0x18, 0xd7, 0x45, 0x00, 0x8c, 0xc3, + 0x18, 0xe3, 0xda, 0x1b, 0x34, 0x01, 0x0f, 0xa9, 0xc8, 0xae, 0xbc, 0x01, + 0x0d, 0x39, 0xc6, 0x10, 0x9d, 0x01, 0x48, 0x99, 0xda, 0x1c, 0x1e, 0x0f, + 0xdd, 0xb8, 0xc4, 0x26, 0x78, 0x01, 0x27, 0xe9, 0xc5, 0x06, 0xdb, 0x01, + 0x27, 0xe1, 0x15, 0xc3, 0x19, 0x19, 0x08, 0xc3, 0x19, 0x25, 0x16, 0xc3, + 0x19, 0x31, 0xc3, 0x05, 0x14, 0x01, 0x27, 0xa8, 0x47, 0x00, 0x58, 0xc3, + 0x19, 0x3d, 0xce, 0x34, 0xd4, 0x01, 0x57, 0x18, 0xcf, 0x01, 0xb8, 0x01, + 0x80, 0xf0, 0x02, 0xc3, 0x19, 0x49, 0xc5, 0x27, 0xf9, 0x01, 0x00, 0xb8, + 0xc2, 0x00, 0xbf, 0x01, 0x52, 0xa1, 0xc3, 0x02, 0x9b, 0x01, 0x52, 0x98, + 0x8c, 0x01, 0x0a, 0x49, 0x8b, 0x01, 0x0a, 0x41, 0x87, 0x01, 0x0a, 0x39, + 0x86, 0x01, 0x0a, 0x30, 0x8b, 0x01, 0x09, 0xf8, 0xc9, 0x00, 0xca, 0x01, + 0x54, 0xd9, 0xcc, 0x07, 0xc7, 0x01, 0x54, 0xe0, 0xc5, 0x78, 0x04, 0x01, + 0x02, 0x31, 0x48, 0xbc, 0xfa, 0xc3, 0x19, 0x55, 0xc8, 0x52, 0x09, 0x01, + 0x4c, 0x61, 0xc6, 0x01, 0x73, 0x01, 0x72, 0xb1, 0xcd, 0x75, 0xa6, 0x01, + 0x72, 0xc0, 0xd1, 0x52, 0xff, 0x0f, 0xab, 0x51, 0xce, 0x6f, 0x1c, 0x0f, + 0xab, 0x48, 0x00, 0x43, 0x19, 0x61, 0xc6, 0x02, 0xd1, 0x01, 0x2e, 0xb9, + 0xc4, 0x0e, 0x6a, 0x01, 0x5f, 0x48, 0xd4, 0x3f, 0x5c, 0x01, 0x4e, 0x70, + 0xc5, 0x01, 0xa2, 0x01, 0x5b, 0x13, 0x03, 0x19, 0x82, 0xcc, 0x82, 0xb9, + 0x01, 0x5b, 0x61, 0xcd, 0x7c, 0xa8, 0x01, 0x5c, 0x30, 0x45, 0x00, 0x8c, + 0xc3, 0x19, 0x86, 0xc8, 0xae, 0xbc, 0x01, 0x48, 0x28, 0x44, 0x03, 0xc8, + 0xc3, 0x19, 0x96, 0x42, 0x02, 0xae, 0x43, 0x19, 0xa0, 0xd7, 0x22, 0x5c, + 0x0f, 0xc0, 0x51, 0xc3, 0x7e, 0x79, 0x01, 0x0d, 0x60, 0x45, 0x03, 0x14, + 0xc3, 0x19, 0xaa, 0xc5, 0x01, 0x74, 0x01, 0x0c, 0xd8, 0xd4, 0x2d, 0x64, + 0x01, 0x0f, 0xd9, 0xc9, 0xb3, 0xf8, 0x01, 0x48, 0x88, 0xc3, 0x14, 0xa7, + 0x01, 0x0d, 0x1b, 0x03, 0x19, 0xb6, 0x43, 0x00, 0x7e, 0x43, 0x19, 0xbc, + 0xc2, 0x00, 0xb1, 0x01, 0x0f, 0x29, 0xcc, 0x56, 0x78, 0x01, 0x48, 0xf0, + 0x9a, 0x01, 0x4a, 0x39, 0xcc, 0x07, 0xc7, 0x01, 0x5a, 0x19, 0xc8, 0xb7, + 0x52, 0x01, 0x5a, 0x20, 0xcf, 0x6a, 0x8f, 0x01, 0x4b, 0xa9, 0xce, 0x33, + 0x92, 0x01, 0x4b, 0xa1, 0xd5, 0x36, 0xef, 0x01, 0x4a, 0x11, 0x48, 0x61, + 0xd4, 0x43, 0x19, 0xc8, 0xe0, 0x06, 0xc7, 0x0f, 0xdd, 0xb0, 0x45, 0x00, + 0x8c, 0xc3, 0x19, 0xd4, 0xc8, 0xae, 0xbc, 0x01, 0x48, 0x38, 0xc8, 0x4b, + 0x94, 0x01, 0x0c, 0x39, 0xca, 0xa7, 0xce, 0x01, 0x0c, 0x30, 0xc8, 0x4b, + 0x94, 0x01, 0x0c, 0x09, 0xc7, 0x0d, 0x04, 0x01, 0x0b, 0x70, 0xc3, 0x23, + 0x1c, 0x00, 0xb7, 0xc1, 0x85, 0x00, 0xb7, 0xb8, 0xc2, 0x1d, 0xc1, 0x00, + 0xb7, 0x39, 0xc6, 0xd2, 0x35, 0x00, 0xb6, 0xc9, 0xc9, 0x25, 0x3a, 0x00, + 0xb6, 0x99, 0xc5, 0x72, 0x5f, 0x00, 0xb6, 0x81, 0xc5, 0x2e, 0x39, 0x00, + 0xb6, 0x61, 0xc4, 0x05, 0xf1, 0x00, 0xb6, 0x31, 0xc6, 0x57, 0x17, 0x00, + 0xb5, 0xf9, 0xc8, 0xbf, 0x3a, 0x00, 0xb5, 0xe9, 0xc5, 0x71, 0x4d, 0x00, + 0xb5, 0x68, 0x90, 0x05, 0x28, 0x20, 0x90, 0x05, 0x2b, 0xa8, 0x87, 0x05, + 0x28, 0x30, 0x91, 0x05, 0x2b, 0xb8, 0x87, 0x05, 0x28, 0x40, 0x91, 0x05, + 0x2b, 0xc8, 0x87, 0x05, 0x28, 0x50, 0x91, 0x05, 0x2b, 0xd8, 0x87, 0x05, + 0x28, 0x49, 0x90, 0x05, 0x2f, 0x68, 0x90, 0x05, 0x2a, 0xa8, 0x91, 0x05, + 0x2b, 0xd0, 0x87, 0x05, 0x28, 0x59, 0x90, 0x05, 0x2f, 0x80, 0x91, 0x05, + 0x2b, 0xe1, 0x90, 0x05, 0x2e, 0x40, 0x87, 0x05, 0x28, 0x78, 0x91, 0x05, + 0x2c, 0x00, 0x87, 0x05, 0x28, 0x80, 0x87, 0x05, 0x2f, 0xb3, 0x03, 0x19, + 0xe0, 0x8b, 0x05, 0x29, 0xb1, 0x83, 0x05, 0x2a, 0xe9, 0x91, 0x05, 0x2e, + 0x73, 0x03, 0x19, 0xe4, 0x97, 0x05, 0x2d, 0x38, 0x91, 0x05, 0x2c, 0x08, + 0x87, 0x05, 0x28, 0xa8, 0x91, 0x05, 0x2c, 0x31, 0x43, 0x00, 0x5c, 0x43, + 0x19, 0xe8, 0x87, 0x05, 0x28, 0xe0, 0x91, 0x05, 0x2c, 0x68, 0x87, 0x05, + 0x30, 0x23, 0x03, 0x1a, 0x06, 0x8b, 0x05, 0x2a, 0x21, 0x83, 0x05, 0x2b, + 0x61, 0x91, 0x05, 0x2e, 0xe3, 0x03, 0x1a, 0x0e, 0x97, 0x05, 0x2d, 0xa8, + 0x87, 0x05, 0x29, 0x18, 0x91, 0x05, 0x2c, 0xa0, 0x87, 0x05, 0x28, 0xb8, + 0x91, 0x05, 0x2c, 0x40, 0x87, 0x05, 0x28, 0xc8, 0x91, 0x05, 0x2c, 0x50, + 0xc5, 0x00, 0xd4, 0x01, 0x57, 0x79, 0xc5, 0x05, 0x02, 0x01, 0x57, 0x80, + 0xa5, 0x0c, 0x57, 0xf9, 0xa4, 0x0c, 0x57, 0xf1, 0xa3, 0x0c, 0x57, 0xe9, + 0xa2, 0x0c, 0x57, 0xe1, 0xa1, 0x0c, 0x57, 0xd9, 0xa0, 0x0c, 0x57, 0xd1, + 0x9f, 0x0c, 0x57, 0xc9, 0x9e, 0x0c, 0x57, 0xc1, 0x9d, 0x0c, 0x57, 0xb8, + 0xa6, 0x0c, 0x57, 0xb1, 0xa5, 0x0c, 0x57, 0xa9, 0xa4, 0x0c, 0x57, 0xa1, + 0xa3, 0x0c, 0x57, 0x99, 0xa2, 0x0c, 0x57, 0x91, 0xa1, 0x0c, 0x57, 0x89, + 0xa0, 0x0c, 0x57, 0x81, 0x9f, 0x0c, 0x57, 0x79, 0x9e, 0x0c, 0x57, 0x71, + 0x9d, 0x0c, 0x57, 0x68, 0xa6, 0x0c, 0x57, 0x61, 0xa5, 0x0c, 0x57, 0x59, + 0xa4, 0x0c, 0x57, 0x51, 0xa3, 0x0c, 0x57, 0x49, 0xa2, 0x0c, 0x57, 0x41, + 0xa1, 0x0c, 0x57, 0x39, 0xa0, 0x0c, 0x57, 0x31, 0x9f, 0x0c, 0x57, 0x29, + 0x9e, 0x0c, 0x57, 0x21, 0x9d, 0x0c, 0x57, 0x18, 0xa6, 0x0c, 0x57, 0x11, + 0xa5, 0x0c, 0x57, 0x09, 0xa4, 0x0c, 0x57, 0x01, 0xa3, 0x0c, 0x56, 0xf9, + 0xa2, 0x0c, 0x56, 0xf1, 0xa1, 0x0c, 0x56, 0xe9, 0xa0, 0x0c, 0x56, 0xe1, + 0x9f, 0x0c, 0x56, 0xd9, 0x9e, 0x0c, 0x56, 0xd1, 0x9d, 0x0c, 0x56, 0xc8, + 0xa6, 0x0c, 0x56, 0xc1, 0xa5, 0x0c, 0x56, 0xb9, 0xa4, 0x0c, 0x56, 0xb1, + 0xa3, 0x0c, 0x56, 0xa9, 0xa2, 0x0c, 0x56, 0xa1, 0xa1, 0x0c, 0x56, 0x99, + 0xa0, 0x0c, 0x56, 0x91, 0x9f, 0x0c, 0x56, 0x89, 0x9e, 0x0c, 0x56, 0x81, + 0x9d, 0x0c, 0x56, 0x78, 0xa6, 0x0c, 0x56, 0x71, 0xa5, 0x0c, 0x56, 0x69, + 0xa4, 0x0c, 0x56, 0x61, 0xa3, 0x0c, 0x56, 0x59, 0xa2, 0x0c, 0x56, 0x51, + 0xa1, 0x0c, 0x56, 0x49, 0xa0, 0x0c, 0x56, 0x41, 0x9f, 0x0c, 0x56, 0x39, + 0x9e, 0x0c, 0x56, 0x31, 0x9d, 0x0c, 0x56, 0x28, 0xa6, 0x0c, 0x56, 0x21, + 0xa5, 0x0c, 0x56, 0x19, 0xa4, 0x0c, 0x56, 0x11, 0xa3, 0x0c, 0x56, 0x09, + 0xa2, 0x0c, 0x56, 0x01, 0xa1, 0x0c, 0x55, 0xf9, 0xa0, 0x0c, 0x55, 0xf1, + 0x9f, 0x0c, 0x55, 0xe9, 0x9e, 0x0c, 0x55, 0xe1, 0x9d, 0x0c, 0x55, 0xd8, + 0xa6, 0x0c, 0x55, 0xd1, 0xa5, 0x0c, 0x55, 0xc9, 0xa4, 0x0c, 0x55, 0xc1, + 0xa3, 0x0c, 0x55, 0xb9, 0xa2, 0x0c, 0x55, 0xb1, 0xa1, 0x0c, 0x55, 0xa9, + 0xa0, 0x0c, 0x55, 0xa1, 0x9f, 0x0c, 0x55, 0x99, 0x9e, 0x0c, 0x55, 0x91, + 0x9d, 0x0c, 0x55, 0x88, 0xa6, 0x0c, 0x55, 0x81, 0xa5, 0x0c, 0x55, 0x79, + 0xa4, 0x0c, 0x55, 0x71, 0xa3, 0x0c, 0x55, 0x69, 0xa2, 0x0c, 0x55, 0x61, + 0xa1, 0x0c, 0x55, 0x59, 0xa0, 0x0c, 0x55, 0x51, 0x9f, 0x0c, 0x55, 0x49, + 0x9e, 0x0c, 0x55, 0x41, 0x9d, 0x0c, 0x55, 0x38, 0xa6, 0x0c, 0x55, 0x31, + 0xa5, 0x0c, 0x55, 0x29, 0xa4, 0x0c, 0x55, 0x21, 0xa3, 0x0c, 0x55, 0x19, + 0xa2, 0x0c, 0x55, 0x11, 0xa1, 0x0c, 0x55, 0x09, 0xa0, 0x0c, 0x55, 0x01, + 0x9f, 0x0c, 0x54, 0xf9, 0x9e, 0x0c, 0x54, 0xf1, 0x9d, 0x0c, 0x54, 0xe8, + 0xa6, 0x0c, 0x54, 0xe1, 0xa5, 0x0c, 0x54, 0xd9, 0xa4, 0x0c, 0x54, 0xd1, + 0xa3, 0x0c, 0x54, 0xc9, 0xa2, 0x0c, 0x54, 0xc1, 0xa1, 0x0c, 0x54, 0xb9, + 0xa0, 0x0c, 0x54, 0xb1, 0x9f, 0x0c, 0x54, 0xa9, 0x9e, 0x0c, 0x54, 0xa1, + 0x9d, 0x0c, 0x54, 0x98, 0xa6, 0x0c, 0x54, 0x91, 0xa5, 0x0c, 0x54, 0x89, + 0xa4, 0x0c, 0x54, 0x81, 0xa3, 0x0c, 0x54, 0x79, 0xa2, 0x0c, 0x54, 0x71, + 0xa1, 0x0c, 0x54, 0x69, 0xa0, 0x0c, 0x54, 0x61, 0x9f, 0x0c, 0x54, 0x59, + 0x9e, 0x0c, 0x54, 0x51, 0x9d, 0x0c, 0x54, 0x48, 0xa6, 0x0c, 0x54, 0x41, + 0xa5, 0x0c, 0x54, 0x39, 0xa4, 0x0c, 0x54, 0x31, 0xa3, 0x0c, 0x54, 0x29, + 0xa2, 0x0c, 0x54, 0x21, 0xa1, 0x0c, 0x54, 0x19, 0xa0, 0x0c, 0x54, 0x11, + 0x9f, 0x0c, 0x54, 0x09, 0x9e, 0x0c, 0x54, 0x01, 0x9d, 0x0c, 0x53, 0xf8, + 0xa6, 0x0c, 0x53, 0xf1, 0xa5, 0x0c, 0x53, 0xe9, 0xa4, 0x0c, 0x53, 0xe1, + 0xa3, 0x0c, 0x53, 0xd9, 0xa2, 0x0c, 0x53, 0xd1, 0xa1, 0x0c, 0x53, 0xc9, + 0xa0, 0x0c, 0x53, 0xc1, 0x9f, 0x0c, 0x53, 0xb9, 0x9e, 0x0c, 0x53, 0xb1, + 0x9d, 0x0c, 0x53, 0xa8, 0xa6, 0x0c, 0x53, 0xa1, 0xa5, 0x0c, 0x53, 0x99, + 0xa4, 0x0c, 0x53, 0x91, 0xa3, 0x0c, 0x53, 0x89, 0xa2, 0x0c, 0x53, 0x81, + 0xa1, 0x0c, 0x53, 0x79, 0xa0, 0x0c, 0x53, 0x71, 0x9f, 0x0c, 0x53, 0x69, + 0x9e, 0x0c, 0x53, 0x61, 0x9d, 0x0c, 0x53, 0x58, 0xa6, 0x0c, 0x53, 0x51, + 0xa5, 0x0c, 0x53, 0x49, 0xa4, 0x0c, 0x53, 0x41, 0xa3, 0x0c, 0x53, 0x39, + 0xa2, 0x0c, 0x53, 0x31, 0xa1, 0x0c, 0x53, 0x29, 0xa0, 0x0c, 0x53, 0x21, + 0x9f, 0x0c, 0x53, 0x19, 0x9e, 0x0c, 0x53, 0x11, 0x9d, 0x0c, 0x53, 0x08, + 0xa6, 0x0c, 0x53, 0x01, 0xa5, 0x0c, 0x52, 0xf9, 0xa4, 0x0c, 0x52, 0xf1, + 0xa3, 0x0c, 0x52, 0xe9, 0xa2, 0x0c, 0x52, 0xe1, 0xa1, 0x0c, 0x52, 0xd9, + 0xa0, 0x0c, 0x52, 0xd1, 0x9f, 0x0c, 0x52, 0xc9, 0x9e, 0x0c, 0x52, 0xc1, + 0x9d, 0x0c, 0x52, 0xb8, 0xa6, 0x0c, 0x52, 0xb1, 0xa5, 0x0c, 0x52, 0xa9, + 0xa4, 0x0c, 0x52, 0xa1, 0xa3, 0x0c, 0x52, 0x99, 0xa2, 0x0c, 0x52, 0x91, + 0xa1, 0x0c, 0x52, 0x89, 0xa0, 0x0c, 0x52, 0x81, 0x9f, 0x0c, 0x52, 0x79, + 0x9e, 0x0c, 0x52, 0x71, 0x9d, 0x0c, 0x52, 0x68, 0xa6, 0x0c, 0x52, 0x61, + 0xa5, 0x0c, 0x52, 0x59, 0xa4, 0x0c, 0x52, 0x51, 0xa3, 0x0c, 0x52, 0x49, + 0xa2, 0x0c, 0x52, 0x41, 0xa1, 0x0c, 0x52, 0x39, 0xa0, 0x0c, 0x52, 0x31, + 0x9f, 0x0c, 0x52, 0x29, 0x9e, 0x0c, 0x52, 0x21, 0x9d, 0x0c, 0x52, 0x18, + 0xa6, 0x0c, 0x52, 0x11, 0xa5, 0x0c, 0x52, 0x09, 0xa4, 0x0c, 0x52, 0x01, + 0xa3, 0x0c, 0x51, 0xf9, 0xa2, 0x0c, 0x51, 0xf1, 0xa1, 0x0c, 0x51, 0xe9, + 0xa0, 0x0c, 0x51, 0xe1, 0x9f, 0x0c, 0x51, 0xd9, 0x9e, 0x0c, 0x51, 0xd1, + 0x9d, 0x0c, 0x51, 0xc8, 0xa6, 0x0c, 0x51, 0xc1, 0xa5, 0x0c, 0x51, 0xb9, + 0xa4, 0x0c, 0x51, 0xb1, 0xa3, 0x0c, 0x51, 0xa9, 0xa2, 0x0c, 0x51, 0xa1, + 0xa1, 0x0c, 0x51, 0x99, 0xa0, 0x0c, 0x51, 0x91, 0x9f, 0x0c, 0x51, 0x89, + 0x9e, 0x0c, 0x51, 0x81, 0x9d, 0x0c, 0x51, 0x78, 0xa6, 0x0c, 0x51, 0x71, + 0xa5, 0x0c, 0x51, 0x69, 0xa4, 0x0c, 0x51, 0x61, 0xa3, 0x0c, 0x51, 0x59, + 0xa2, 0x0c, 0x51, 0x51, 0xa1, 0x0c, 0x51, 0x49, 0xa0, 0x0c, 0x51, 0x41, + 0x9f, 0x0c, 0x51, 0x39, 0x9e, 0x0c, 0x51, 0x31, 0x9d, 0x0c, 0x51, 0x28, + 0xa6, 0x0c, 0x51, 0x21, 0xa5, 0x0c, 0x51, 0x19, 0xa4, 0x0c, 0x51, 0x11, + 0xa3, 0x0c, 0x51, 0x09, 0xa2, 0x0c, 0x51, 0x01, 0xa1, 0x0c, 0x50, 0xf9, + 0xa0, 0x0c, 0x50, 0xf1, 0x9f, 0x0c, 0x50, 0xe9, 0x9e, 0x0c, 0x50, 0xe1, + 0x9d, 0x0c, 0x50, 0xd8, 0xa6, 0x0c, 0x50, 0xd1, 0xa5, 0x0c, 0x50, 0xc9, + 0xa4, 0x0c, 0x50, 0xc1, 0xa3, 0x0c, 0x50, 0xb9, 0xa2, 0x0c, 0x50, 0xb1, + 0xa1, 0x0c, 0x50, 0xa9, 0xa0, 0x0c, 0x50, 0xa1, 0x9f, 0x0c, 0x50, 0x99, + 0x9e, 0x0c, 0x50, 0x91, 0x9d, 0x0c, 0x50, 0x88, 0xa6, 0x0c, 0x50, 0x81, + 0xa5, 0x0c, 0x50, 0x79, 0xa4, 0x0c, 0x50, 0x71, 0xa3, 0x0c, 0x50, 0x69, + 0xa2, 0x0c, 0x50, 0x61, 0xa1, 0x0c, 0x50, 0x59, 0xa0, 0x0c, 0x50, 0x51, + 0x9f, 0x0c, 0x50, 0x49, 0x9e, 0x0c, 0x50, 0x41, 0x9d, 0x0c, 0x50, 0x38, + 0xa6, 0x0c, 0x50, 0x31, 0xa5, 0x0c, 0x50, 0x29, 0xa4, 0x0c, 0x50, 0x21, + 0xa3, 0x0c, 0x50, 0x19, 0xa2, 0x0c, 0x50, 0x11, 0xa1, 0x0c, 0x50, 0x09, + 0xa0, 0x0c, 0x50, 0x01, 0x9f, 0x0c, 0x4f, 0xf9, 0x9e, 0x0c, 0x4f, 0xf1, + 0x9d, 0x0c, 0x4f, 0xe8, 0xa6, 0x0c, 0x4f, 0xe1, 0xa5, 0x0c, 0x4f, 0xd9, + 0xa4, 0x0c, 0x4f, 0xd1, 0xa3, 0x0c, 0x4f, 0xc9, 0xa2, 0x0c, 0x4f, 0xc1, + 0xa1, 0x0c, 0x4f, 0xb9, 0xa0, 0x0c, 0x4f, 0xb1, 0x9f, 0x0c, 0x4f, 0xa9, + 0x9e, 0x0c, 0x4f, 0xa1, 0x9d, 0x0c, 0x4f, 0x98, 0xa6, 0x0c, 0x4f, 0x91, + 0xa5, 0x0c, 0x4f, 0x89, 0xa4, 0x0c, 0x4f, 0x81, 0xa3, 0x0c, 0x4f, 0x79, + 0xa2, 0x0c, 0x4f, 0x71, 0xa1, 0x0c, 0x4f, 0x69, 0xa0, 0x0c, 0x4f, 0x61, + 0x9f, 0x0c, 0x4f, 0x59, 0x9e, 0x0c, 0x4f, 0x51, 0x9d, 0x0c, 0x4f, 0x48, + 0xa6, 0x0c, 0x4f, 0x41, 0xa5, 0x0c, 0x4f, 0x39, 0xa4, 0x0c, 0x4f, 0x31, + 0xa3, 0x0c, 0x4f, 0x29, 0xa2, 0x0c, 0x4f, 0x21, 0xa1, 0x0c, 0x4f, 0x19, + 0xa0, 0x0c, 0x4f, 0x11, 0x9f, 0x0c, 0x4f, 0x09, 0x9e, 0x0c, 0x4f, 0x01, + 0x9d, 0x0c, 0x4e, 0xf8, 0xa6, 0x0c, 0x4e, 0xf1, 0xa5, 0x0c, 0x4e, 0xe9, + 0xa4, 0x0c, 0x4e, 0xe1, 0xa3, 0x0c, 0x4e, 0xd9, 0xa2, 0x0c, 0x4e, 0xd1, + 0xa1, 0x0c, 0x4e, 0xc9, 0xa0, 0x0c, 0x4e, 0xc1, 0x9f, 0x0c, 0x4e, 0xb9, + 0x9e, 0x0c, 0x4e, 0xb1, 0x9d, 0x0c, 0x4e, 0xa8, 0xa6, 0x0c, 0x4e, 0xa1, + 0xa5, 0x0c, 0x4e, 0x99, 0xa4, 0x0c, 0x4e, 0x91, 0xa3, 0x0c, 0x4e, 0x89, + 0xa2, 0x0c, 0x4e, 0x81, 0xa1, 0x0c, 0x4e, 0x79, 0xa0, 0x0c, 0x4e, 0x71, + 0x9f, 0x0c, 0x4e, 0x69, 0x9e, 0x0c, 0x4e, 0x61, 0x9d, 0x0c, 0x4e, 0x58, + 0xa6, 0x0c, 0x4e, 0x51, 0xa5, 0x0c, 0x4e, 0x49, 0xa4, 0x0c, 0x4e, 0x41, + 0xa3, 0x0c, 0x4e, 0x39, 0xa2, 0x0c, 0x4e, 0x31, 0xa1, 0x0c, 0x4e, 0x29, + 0xa0, 0x0c, 0x4e, 0x21, 0x9f, 0x0c, 0x4e, 0x19, 0x9e, 0x0c, 0x4e, 0x11, + 0x9d, 0x0c, 0x4e, 0x08, 0xa6, 0x0c, 0x4e, 0x01, 0xa5, 0x0c, 0x4d, 0xf9, + 0xa4, 0x0c, 0x4d, 0xf1, 0xa3, 0x0c, 0x4d, 0xe9, 0xa2, 0x0c, 0x4d, 0xe1, + 0xa1, 0x0c, 0x4d, 0xd9, 0xa0, 0x0c, 0x4d, 0xd1, 0x9f, 0x0c, 0x4d, 0xc9, + 0x9e, 0x0c, 0x4d, 0xc1, 0x9d, 0x0c, 0x4d, 0xb8, 0xa6, 0x0c, 0x4d, 0xb1, + 0xa5, 0x0c, 0x4d, 0xa9, 0xa4, 0x0c, 0x4d, 0xa1, 0xa3, 0x0c, 0x4d, 0x99, + 0xa2, 0x0c, 0x4d, 0x91, 0xa1, 0x0c, 0x4d, 0x89, 0xa0, 0x0c, 0x4d, 0x81, + 0x9f, 0x0c, 0x4d, 0x79, 0x9e, 0x0c, 0x4d, 0x71, 0x9d, 0x0c, 0x4d, 0x68, + 0xa6, 0x0c, 0x4d, 0x61, 0xa5, 0x0c, 0x4d, 0x59, 0xa4, 0x0c, 0x4d, 0x51, + 0xa3, 0x0c, 0x4d, 0x49, 0xa2, 0x0c, 0x4d, 0x41, 0xa1, 0x0c, 0x4d, 0x39, + 0xa0, 0x0c, 0x4d, 0x31, 0x9f, 0x0c, 0x4d, 0x29, 0x9e, 0x0c, 0x4d, 0x21, + 0x9d, 0x0c, 0x4d, 0x18, 0xa6, 0x0c, 0x4d, 0x11, 0xa5, 0x0c, 0x4d, 0x09, + 0xa4, 0x0c, 0x4d, 0x01, 0xa3, 0x0c, 0x4c, 0xf9, 0xa2, 0x0c, 0x4c, 0xf1, + 0xa1, 0x0c, 0x4c, 0xe9, 0xa0, 0x0c, 0x4c, 0xe1, 0x9f, 0x0c, 0x4c, 0xd9, + 0x9e, 0x0c, 0x4c, 0xd1, 0x9d, 0x0c, 0x4c, 0xc8, 0xa6, 0x0c, 0x4c, 0xc1, + 0xa5, 0x0c, 0x4c, 0xb9, 0xa4, 0x0c, 0x4c, 0xb1, 0xa3, 0x0c, 0x4c, 0xa9, + 0xa2, 0x0c, 0x4c, 0xa1, 0xa1, 0x0c, 0x4c, 0x99, 0xa0, 0x0c, 0x4c, 0x91, + 0x9f, 0x0c, 0x4c, 0x89, 0x9e, 0x0c, 0x4c, 0x81, 0x9d, 0x0c, 0x4c, 0x78, + 0xa6, 0x0c, 0x4c, 0x71, 0xa5, 0x0c, 0x4c, 0x69, 0xa4, 0x0c, 0x4c, 0x61, + 0xa3, 0x0c, 0x4c, 0x59, 0xa2, 0x0c, 0x4c, 0x51, 0xa1, 0x0c, 0x4c, 0x49, + 0xa0, 0x0c, 0x4c, 0x41, 0x9f, 0x0c, 0x4c, 0x39, 0x9e, 0x0c, 0x4c, 0x31, + 0x9d, 0x0c, 0x4c, 0x28, 0xa6, 0x0c, 0x4c, 0x21, 0xa5, 0x0c, 0x4c, 0x19, + 0xa4, 0x0c, 0x4c, 0x11, 0xa3, 0x0c, 0x4c, 0x09, 0xa2, 0x0c, 0x4c, 0x01, + 0xa1, 0x0c, 0x4b, 0xf9, 0xa0, 0x0c, 0x4b, 0xf1, 0x9f, 0x0c, 0x4b, 0xe9, + 0x9e, 0x0c, 0x4b, 0xe1, 0x9d, 0x0c, 0x4b, 0xd8, 0xa6, 0x0c, 0x4b, 0xd1, + 0xa5, 0x0c, 0x4b, 0xc9, 0xa4, 0x0c, 0x4b, 0xc1, 0xa3, 0x0c, 0x4b, 0xb9, + 0xa2, 0x0c, 0x4b, 0xb1, 0xa1, 0x0c, 0x4b, 0xa9, 0xa0, 0x0c, 0x4b, 0xa1, + 0x9f, 0x0c, 0x4b, 0x99, 0x9e, 0x0c, 0x4b, 0x91, 0x9d, 0x0c, 0x4b, 0x88, + 0xa6, 0x0c, 0x4b, 0x81, 0xa5, 0x0c, 0x4b, 0x79, 0xa4, 0x0c, 0x4b, 0x71, + 0xa3, 0x0c, 0x4b, 0x69, 0xa2, 0x0c, 0x4b, 0x61, 0xa1, 0x0c, 0x4b, 0x59, + 0xa0, 0x0c, 0x4b, 0x51, 0x9f, 0x0c, 0x4b, 0x49, 0x9e, 0x0c, 0x4b, 0x41, + 0x9d, 0x0c, 0x4b, 0x38, 0xa6, 0x0c, 0x4b, 0x31, 0xa5, 0x0c, 0x4b, 0x29, + 0xa4, 0x0c, 0x4b, 0x21, 0xa3, 0x0c, 0x4b, 0x19, 0xa2, 0x0c, 0x4b, 0x11, + 0xa1, 0x0c, 0x4b, 0x09, 0xa0, 0x0c, 0x4b, 0x01, 0x9f, 0x0c, 0x4a, 0xf9, + 0x9e, 0x0c, 0x4a, 0xf1, 0x9d, 0x0c, 0x4a, 0xe8, 0xa6, 0x0c, 0x4a, 0xe1, + 0xa5, 0x0c, 0x4a, 0xd9, 0xa4, 0x0c, 0x4a, 0xd1, 0xa3, 0x0c, 0x4a, 0xc9, + 0xa2, 0x0c, 0x4a, 0xc1, 0xa1, 0x0c, 0x4a, 0xb9, 0xa0, 0x0c, 0x4a, 0xb1, + 0x9f, 0x0c, 0x4a, 0xa9, 0x9e, 0x0c, 0x4a, 0xa1, 0x9d, 0x0c, 0x4a, 0x98, + 0xa6, 0x0c, 0x4a, 0x91, 0xa5, 0x0c, 0x4a, 0x89, 0xa4, 0x0c, 0x4a, 0x81, + 0xa3, 0x0c, 0x4a, 0x79, 0xa2, 0x0c, 0x4a, 0x71, 0xa1, 0x0c, 0x4a, 0x69, + 0xa0, 0x0c, 0x4a, 0x61, 0x9f, 0x0c, 0x4a, 0x59, 0x9e, 0x0c, 0x4a, 0x51, + 0x9d, 0x0c, 0x4a, 0x48, 0xa6, 0x0c, 0x4a, 0x41, 0xa5, 0x0c, 0x4a, 0x39, + 0xa4, 0x0c, 0x4a, 0x31, 0xa3, 0x0c, 0x4a, 0x29, 0xa2, 0x0c, 0x4a, 0x21, + 0xa1, 0x0c, 0x4a, 0x19, 0xa0, 0x0c, 0x4a, 0x11, 0x9f, 0x0c, 0x4a, 0x09, + 0x9e, 0x0c, 0x4a, 0x01, 0x9d, 0x0c, 0x49, 0xf8, 0xa6, 0x0c, 0x49, 0xf1, + 0xa5, 0x0c, 0x49, 0xe9, 0xa4, 0x0c, 0x49, 0xe1, 0xa3, 0x0c, 0x49, 0xd9, + 0xa2, 0x0c, 0x49, 0xd1, 0xa1, 0x0c, 0x49, 0xc9, 0xa0, 0x0c, 0x49, 0xc1, + 0x9f, 0x0c, 0x49, 0xb9, 0x9e, 0x0c, 0x49, 0xb1, 0x9d, 0x0c, 0x49, 0xa8, + 0xa6, 0x0c, 0x49, 0xa1, 0xa5, 0x0c, 0x49, 0x99, 0xa4, 0x0c, 0x49, 0x91, + 0xa3, 0x0c, 0x49, 0x89, 0xa2, 0x0c, 0x49, 0x81, 0xa1, 0x0c, 0x49, 0x79, + 0xa0, 0x0c, 0x49, 0x71, 0x9f, 0x0c, 0x49, 0x69, 0x9e, 0x0c, 0x49, 0x61, + 0x9d, 0x0c, 0x49, 0x58, 0xa6, 0x0c, 0x49, 0x51, 0xa5, 0x0c, 0x49, 0x49, + 0xa4, 0x0c, 0x49, 0x41, 0xa3, 0x0c, 0x49, 0x39, 0xa2, 0x0c, 0x49, 0x31, + 0xa1, 0x0c, 0x49, 0x29, 0xa0, 0x0c, 0x49, 0x21, 0x9f, 0x0c, 0x49, 0x19, + 0x9e, 0x0c, 0x49, 0x11, 0x9d, 0x0c, 0x49, 0x08, 0xa6, 0x0c, 0x49, 0x01, + 0xa5, 0x0c, 0x48, 0xf9, 0xa4, 0x0c, 0x48, 0xf1, 0xa3, 0x0c, 0x48, 0xe9, + 0xa2, 0x0c, 0x48, 0xe1, 0xa1, 0x0c, 0x48, 0xd9, 0xa0, 0x0c, 0x48, 0xd1, + 0x9f, 0x0c, 0x48, 0xc9, 0x9e, 0x0c, 0x48, 0xc1, 0x9d, 0x0c, 0x48, 0xb8, + 0xa6, 0x0c, 0x48, 0xb1, 0xa5, 0x0c, 0x48, 0xa9, 0xa4, 0x0c, 0x48, 0xa1, + 0xa3, 0x0c, 0x48, 0x99, 0xa2, 0x0c, 0x48, 0x91, 0xa1, 0x0c, 0x48, 0x89, + 0xa0, 0x0c, 0x48, 0x81, 0x9f, 0x0c, 0x48, 0x79, 0x9e, 0x0c, 0x48, 0x71, + 0x9d, 0x0c, 0x48, 0x68, 0xa6, 0x0c, 0x48, 0x61, 0xa5, 0x0c, 0x48, 0x59, + 0xa4, 0x0c, 0x48, 0x51, 0xa3, 0x0c, 0x48, 0x49, 0xa2, 0x0c, 0x48, 0x41, + 0xa1, 0x0c, 0x48, 0x39, 0xa0, 0x0c, 0x48, 0x31, 0x9f, 0x0c, 0x48, 0x29, + 0x9e, 0x0c, 0x48, 0x21, 0x9d, 0x0c, 0x48, 0x18, 0xa6, 0x0c, 0x48, 0x11, + 0xa5, 0x0c, 0x48, 0x09, 0xa4, 0x0c, 0x48, 0x01, 0xa3, 0x0c, 0x47, 0xf9, + 0xa2, 0x0c, 0x47, 0xf1, 0xa1, 0x0c, 0x47, 0xe9, 0xa0, 0x0c, 0x47, 0xe1, + 0x9f, 0x0c, 0x47, 0xd9, 0x9e, 0x0c, 0x47, 0xd1, 0x9d, 0x0c, 0x47, 0xc8, + 0xa6, 0x0c, 0x47, 0xc1, 0xa5, 0x0c, 0x47, 0xb9, 0xa4, 0x0c, 0x47, 0xb1, + 0xa3, 0x0c, 0x47, 0xa9, 0xa2, 0x0c, 0x47, 0xa1, 0xa1, 0x0c, 0x47, 0x99, + 0xa0, 0x0c, 0x47, 0x91, 0x9f, 0x0c, 0x47, 0x89, 0x9e, 0x0c, 0x47, 0x81, + 0x9d, 0x0c, 0x47, 0x78, 0xa6, 0x0c, 0x47, 0x71, 0xa5, 0x0c, 0x47, 0x69, + 0xa4, 0x0c, 0x47, 0x61, 0xa3, 0x0c, 0x47, 0x59, 0xa2, 0x0c, 0x47, 0x51, + 0xa1, 0x0c, 0x47, 0x49, 0xa0, 0x0c, 0x47, 0x41, 0x9f, 0x0c, 0x47, 0x39, + 0x9e, 0x0c, 0x47, 0x31, 0x9d, 0x0c, 0x47, 0x28, 0xa6, 0x0c, 0x47, 0x21, + 0xa5, 0x0c, 0x47, 0x19, 0xa4, 0x0c, 0x47, 0x11, 0xa3, 0x0c, 0x47, 0x09, + 0xa2, 0x0c, 0x47, 0x01, 0xa1, 0x0c, 0x46, 0xf9, 0xa0, 0x0c, 0x46, 0xf1, + 0x9f, 0x0c, 0x46, 0xe9, 0x9e, 0x0c, 0x46, 0xe1, 0x9d, 0x0c, 0x46, 0xd8, + 0xa6, 0x0c, 0x46, 0xd1, 0xa5, 0x0c, 0x46, 0xc9, 0xa4, 0x0c, 0x46, 0xc1, + 0xa3, 0x0c, 0x46, 0xb9, 0xa2, 0x0c, 0x46, 0xb1, 0xa1, 0x0c, 0x46, 0xa9, + 0xa0, 0x0c, 0x46, 0xa1, 0x9f, 0x0c, 0x46, 0x99, 0x9e, 0x0c, 0x46, 0x91, + 0x9d, 0x0c, 0x46, 0x88, 0xa6, 0x0c, 0x46, 0x81, 0xa5, 0x0c, 0x46, 0x79, + 0xa4, 0x0c, 0x46, 0x71, 0xa3, 0x0c, 0x46, 0x69, 0xa2, 0x0c, 0x46, 0x61, + 0xa1, 0x0c, 0x46, 0x59, 0xa0, 0x0c, 0x46, 0x51, 0x9f, 0x0c, 0x46, 0x49, + 0x9e, 0x0c, 0x46, 0x41, 0x9d, 0x0c, 0x46, 0x38, 0xa6, 0x0c, 0x46, 0x31, + 0xa5, 0x0c, 0x46, 0x29, 0xa4, 0x0c, 0x46, 0x21, 0xa3, 0x0c, 0x46, 0x19, + 0xa2, 0x0c, 0x46, 0x11, 0xa1, 0x0c, 0x46, 0x09, 0xa0, 0x0c, 0x46, 0x01, + 0x9f, 0x0c, 0x45, 0xf9, 0x9e, 0x0c, 0x45, 0xf1, 0x9d, 0x0c, 0x45, 0xe8, + 0xa6, 0x0c, 0x45, 0xe1, 0xa5, 0x0c, 0x45, 0xd9, 0xa4, 0x0c, 0x45, 0xd1, + 0xa3, 0x0c, 0x45, 0xc9, 0xa2, 0x0c, 0x45, 0xc1, 0xa1, 0x0c, 0x45, 0xb9, + 0xa0, 0x0c, 0x45, 0xb1, 0x9f, 0x0c, 0x45, 0xa9, 0x9e, 0x0c, 0x45, 0xa1, + 0x9d, 0x0c, 0x45, 0x98, 0xa6, 0x0c, 0x45, 0x91, 0xa5, 0x0c, 0x45, 0x89, + 0xa4, 0x0c, 0x45, 0x81, 0xa3, 0x0c, 0x45, 0x79, 0xa2, 0x0c, 0x45, 0x71, + 0xa1, 0x0c, 0x45, 0x69, 0xa0, 0x0c, 0x45, 0x61, 0x9f, 0x0c, 0x45, 0x59, + 0x9e, 0x0c, 0x45, 0x51, 0x9d, 0x0c, 0x45, 0x48, 0xa6, 0x0c, 0x45, 0x41, + 0xa5, 0x0c, 0x45, 0x39, 0xa4, 0x0c, 0x45, 0x31, 0xa3, 0x0c, 0x45, 0x29, + 0xa2, 0x0c, 0x45, 0x21, 0xa1, 0x0c, 0x45, 0x19, 0xa0, 0x0c, 0x45, 0x11, + 0x9f, 0x0c, 0x45, 0x09, 0x9e, 0x0c, 0x45, 0x01, 0x9d, 0x0c, 0x44, 0xf8, + 0xa6, 0x0c, 0x44, 0xf1, 0xa5, 0x0c, 0x44, 0xe9, 0xa4, 0x0c, 0x44, 0xe1, + 0xa3, 0x0c, 0x44, 0xd9, 0xa2, 0x0c, 0x44, 0xd1, 0xa1, 0x0c, 0x44, 0xc9, + 0xa0, 0x0c, 0x44, 0xc1, 0x9f, 0x0c, 0x44, 0xb9, 0x9e, 0x0c, 0x44, 0xb1, + 0x9d, 0x0c, 0x44, 0xa8, 0xa6, 0x0c, 0x44, 0xa1, 0xa5, 0x0c, 0x44, 0x99, + 0xa4, 0x0c, 0x44, 0x91, 0xa3, 0x0c, 0x44, 0x89, 0xa2, 0x0c, 0x44, 0x81, + 0xa1, 0x0c, 0x44, 0x79, 0xa0, 0x0c, 0x44, 0x71, 0x9f, 0x0c, 0x44, 0x69, + 0x9e, 0x0c, 0x44, 0x61, 0x9d, 0x0c, 0x44, 0x58, 0xa6, 0x0c, 0x44, 0x51, + 0xa5, 0x0c, 0x44, 0x49, 0xa4, 0x0c, 0x44, 0x41, 0xa3, 0x0c, 0x44, 0x39, + 0xa2, 0x0c, 0x44, 0x31, 0xa1, 0x0c, 0x44, 0x29, 0xa0, 0x0c, 0x44, 0x21, + 0x9f, 0x0c, 0x44, 0x19, 0x9e, 0x0c, 0x44, 0x11, 0x9d, 0x0c, 0x44, 0x08, + 0xa6, 0x0c, 0x44, 0x01, 0xa5, 0x0c, 0x43, 0xf9, 0xa4, 0x0c, 0x43, 0xf1, + 0xa3, 0x0c, 0x43, 0xe9, 0xa2, 0x0c, 0x43, 0xe1, 0xa1, 0x0c, 0x43, 0xd9, + 0xa0, 0x0c, 0x43, 0xd1, 0x9f, 0x0c, 0x43, 0xc9, 0x9e, 0x0c, 0x43, 0xc1, + 0x9d, 0x0c, 0x43, 0xb8, 0xa6, 0x0c, 0x43, 0xb1, 0xa5, 0x0c, 0x43, 0xa9, + 0xa4, 0x0c, 0x43, 0xa1, 0xa3, 0x0c, 0x43, 0x99, 0xa2, 0x0c, 0x43, 0x91, + 0xa1, 0x0c, 0x43, 0x89, 0xa0, 0x0c, 0x43, 0x81, 0x9f, 0x0c, 0x43, 0x79, + 0x9e, 0x0c, 0x43, 0x71, 0x9d, 0x0c, 0x43, 0x68, 0xa6, 0x0c, 0x43, 0x61, + 0xa5, 0x0c, 0x43, 0x59, 0xa4, 0x0c, 0x43, 0x51, 0xa3, 0x0c, 0x43, 0x49, + 0xa2, 0x0c, 0x43, 0x41, 0xa1, 0x0c, 0x43, 0x39, 0xa0, 0x0c, 0x43, 0x31, + 0x9f, 0x0c, 0x43, 0x29, 0x9e, 0x0c, 0x43, 0x21, 0x9d, 0x0c, 0x43, 0x18, + 0xa6, 0x0c, 0x43, 0x11, 0xa5, 0x0c, 0x43, 0x09, 0xa4, 0x0c, 0x43, 0x01, + 0xa3, 0x0c, 0x42, 0xf9, 0xa2, 0x0c, 0x42, 0xf1, 0xa1, 0x0c, 0x42, 0xe9, + 0xa0, 0x0c, 0x42, 0xe1, 0x9f, 0x0c, 0x42, 0xd9, 0x9e, 0x0c, 0x42, 0xd1, + 0x9d, 0x0c, 0x42, 0xc8, 0xa6, 0x0c, 0x42, 0xc1, 0xa5, 0x0c, 0x42, 0xb9, + 0xa4, 0x0c, 0x42, 0xb1, 0xa3, 0x0c, 0x42, 0xa9, 0xa2, 0x0c, 0x42, 0xa1, + 0xa1, 0x0c, 0x42, 0x99, 0xa0, 0x0c, 0x42, 0x91, 0x9f, 0x0c, 0x42, 0x89, + 0x9e, 0x0c, 0x42, 0x81, 0x9d, 0x0c, 0x42, 0x78, 0xa6, 0x0c, 0x42, 0x71, + 0xa5, 0x0c, 0x42, 0x69, 0xa4, 0x0c, 0x42, 0x61, 0xa3, 0x0c, 0x42, 0x59, + 0xa2, 0x0c, 0x42, 0x51, 0xa1, 0x0c, 0x42, 0x49, 0xa0, 0x0c, 0x42, 0x41, + 0x9f, 0x0c, 0x42, 0x39, 0x9e, 0x0c, 0x42, 0x31, 0x9d, 0x0c, 0x42, 0x28, + 0xa6, 0x0c, 0x42, 0x21, 0xa5, 0x0c, 0x42, 0x19, 0xa4, 0x0c, 0x42, 0x11, + 0xa3, 0x0c, 0x42, 0x09, 0xa2, 0x0c, 0x42, 0x01, 0xa1, 0x0c, 0x41, 0xf9, + 0xa0, 0x0c, 0x41, 0xf1, 0x9f, 0x0c, 0x41, 0xe9, 0x9e, 0x0c, 0x41, 0xe1, + 0x9d, 0x0c, 0x41, 0xd8, 0xa6, 0x0c, 0x41, 0xd1, 0xa5, 0x0c, 0x41, 0xc9, + 0xa4, 0x0c, 0x41, 0xc1, 0xa3, 0x0c, 0x41, 0xb9, 0xa2, 0x0c, 0x41, 0xb1, + 0xa1, 0x0c, 0x41, 0xa9, 0xa0, 0x0c, 0x41, 0xa1, 0x9f, 0x0c, 0x41, 0x99, + 0x9e, 0x0c, 0x41, 0x91, 0x9d, 0x0c, 0x41, 0x88, 0xa6, 0x0c, 0x41, 0x81, + 0xa5, 0x0c, 0x41, 0x79, 0xa4, 0x0c, 0x41, 0x71, 0xa3, 0x0c, 0x41, 0x69, + 0xa2, 0x0c, 0x41, 0x61, 0xa1, 0x0c, 0x41, 0x59, 0xa0, 0x0c, 0x41, 0x51, + 0x9f, 0x0c, 0x41, 0x49, 0x9e, 0x0c, 0x41, 0x41, 0x9d, 0x0c, 0x41, 0x38, + 0xa6, 0x0c, 0x41, 0x31, 0xa5, 0x0c, 0x41, 0x29, 0xa4, 0x0c, 0x41, 0x21, + 0xa3, 0x0c, 0x41, 0x19, 0xa2, 0x0c, 0x41, 0x11, 0xa1, 0x0c, 0x41, 0x09, + 0xa0, 0x0c, 0x41, 0x01, 0x9f, 0x0c, 0x40, 0xf9, 0x9e, 0x0c, 0x40, 0xf1, + 0x9d, 0x0c, 0x40, 0xe8, 0xa6, 0x0c, 0x40, 0xe1, 0xa5, 0x0c, 0x40, 0xd9, + 0xa4, 0x0c, 0x40, 0xd1, 0xa3, 0x0c, 0x40, 0xc9, 0xa2, 0x0c, 0x40, 0xc1, + 0xa1, 0x0c, 0x40, 0xb9, 0xa0, 0x0c, 0x40, 0xb1, 0x9f, 0x0c, 0x40, 0xa9, + 0x9e, 0x0c, 0x40, 0xa1, 0x9d, 0x0c, 0x40, 0x98, 0xa6, 0x0c, 0x40, 0x91, + 0xa5, 0x0c, 0x40, 0x89, 0xa4, 0x0c, 0x40, 0x81, 0xa3, 0x0c, 0x40, 0x79, + 0xa2, 0x0c, 0x40, 0x71, 0xa1, 0x0c, 0x40, 0x69, 0xa0, 0x0c, 0x40, 0x61, + 0x9f, 0x0c, 0x40, 0x59, 0x9e, 0x0c, 0x40, 0x51, 0x9d, 0x0c, 0x40, 0x48, + 0xa6, 0x0c, 0x40, 0x41, 0xa5, 0x0c, 0x40, 0x39, 0xa4, 0x0c, 0x40, 0x31, + 0xa3, 0x0c, 0x40, 0x29, 0xa2, 0x0c, 0x40, 0x21, 0xa1, 0x0c, 0x40, 0x19, + 0xa0, 0x0c, 0x40, 0x11, 0x9f, 0x0c, 0x40, 0x09, 0x9e, 0x0c, 0x40, 0x00, + 0xc2, 0x01, 0x6f, 0x0b, 0x55, 0xc1, 0x83, 0x0b, 0x55, 0x78, 0x83, 0x0b, + 0x55, 0xa1, 0x44, 0x2e, 0xf0, 0x43, 0x1a, 0x12, 0x17, 0xc3, 0x1a, 0x1e, + 0x9a, 0x0b, 0x54, 0x79, 0x93, 0x0b, 0x54, 0x71, 0x85, 0x0b, 0x54, 0x69, + 0x9c, 0x0b, 0x54, 0x60, 0x9a, 0x0b, 0x54, 0xb9, 0x93, 0x0b, 0x54, 0xb1, + 0x9c, 0x0b, 0x54, 0xa9, 0x85, 0x0b, 0x54, 0xa0, 0x9a, 0x0b, 0x54, 0x59, + 0x93, 0x0b, 0x54, 0x51, 0x85, 0x0b, 0x54, 0x49, 0x9c, 0x0b, 0x54, 0x40, + 0xc8, 0xb5, 0x2a, 0x08, 0xff, 0x89, 0xc6, 0xce, 0x27, 0x08, 0xff, 0x00, + 0xc5, 0x40, 0xe7, 0x00, 0x5c, 0x19, 0xc4, 0x1e, 0x97, 0x00, 0x5e, 0x68, + 0xc3, 0x7c, 0xc4, 0x08, 0xff, 0x11, 0xc4, 0xc9, 0xed, 0x08, 0xfe, 0xd0, + 0xc4, 0x70, 0x1e, 0x08, 0xff, 0x09, 0xc3, 0x00, 0xc1, 0x08, 0xfe, 0xf1, + 0xc6, 0xd1, 0x27, 0x08, 0xfe, 0xd8, 0x83, 0x00, 0x5d, 0x19, 0xc2, 0x00, + 0xc1, 0x00, 0x5d, 0x48, 0x83, 0x00, 0x5d, 0x99, 0xc2, 0x00, 0xdb, 0x00, + 0x5d, 0xa0, 0xcb, 0x8b, 0xe9, 0x08, 0xfe, 0x29, 0xd9, 0x1e, 0x05, 0x08, + 0xfe, 0x00, 0x9f, 0x08, 0xfe, 0x51, 0x9e, 0x08, 0xfe, 0x48, 0xa2, 0x00, + 0xd3, 0xc9, 0xa1, 0x00, 0xd3, 0xc1, 0xa0, 0x00, 0xd3, 0xb8, 0xc2, 0x00, + 0xdb, 0x00, 0xd2, 0xb1, 0xc2, 0x00, 0x39, 0x00, 0xd2, 0xa8, 0xc2, 0x00, + 0xd0, 0x00, 0xd1, 0xe9, 0x83, 0x00, 0xd1, 0xd8, 0xc2, 0x00, 0xd0, 0x00, + 0xd1, 0xa9, 0x83, 0x00, 0xd1, 0xa0, 0xc2, 0x00, 0xd0, 0x00, 0xd1, 0x59, + 0x83, 0x00, 0xd1, 0x48, 0xc2, 0x00, 0xd0, 0x00, 0xd1, 0x29, 0xc2, 0x8d, + 0x8f, 0x00, 0xd1, 0x21, 0x83, 0x00, 0xd1, 0x18, 0xc2, 0x01, 0x23, 0x05, + 0x54, 0x29, 0x91, 0x05, 0x54, 0x18, 0xc2, 0x01, 0x23, 0x05, 0x54, 0x21, + 0x91, 0x05, 0x54, 0x10, 0x00, 0xc3, 0x1a, 0x2e, 0xc3, 0x9b, 0x00, 0x00, + 0x72, 0xd8, 0xc2, 0x00, 0xc4, 0x00, 0x70, 0x99, 0x97, 0x00, 0x70, 0xc8, + 0x89, 0x00, 0x70, 0x50, 0x15, 0xc3, 0x1a, 0x3a, 0xc4, 0xde, 0xf3, 0x00, + 0x71, 0x48, 0x83, 0x00, 0x71, 0x83, 0x03, 0x1a, 0x4a, 0x8b, 0x00, 0x71, + 0xa3, 0x03, 0x1a, 0x5c, 0x97, 0x00, 0x71, 0xc3, 0x03, 0x1a, 0x60, 0x87, + 0x00, 0x72, 0x01, 0x91, 0x00, 0x72, 0x10, 0xc3, 0x00, 0x74, 0x00, 0x70, + 0x69, 0xc2, 0x06, 0x4e, 0x00, 0x71, 0x10, 0xc5, 0xd4, 0x25, 0x00, 0x70, + 0x79, 0xc3, 0x97, 0x59, 0x00, 0x70, 0xa8, 0x42, 0x01, 0x7c, 0xc3, 0x1a, + 0x6b, 0xc9, 0xb1, 0x3a, 0x00, 0x72, 0x60, 0x42, 0x01, 0x7c, 0xc3, 0x1a, + 0x7d, 0xc5, 0xd4, 0x2f, 0x00, 0x71, 0xd0, 0x90, 0x00, 0x70, 0xf8, 0x00, + 0xc3, 0x1a, 0x89, 0xc5, 0xd4, 0x98, 0x00, 0x72, 0x31, 0xc6, 0xd3, 0x07, + 0x00, 0x72, 0x38, 0xc4, 0x04, 0x15, 0x00, 0x71, 0x29, 0xc5, 0xdb, 0xf5, + 0x00, 0x71, 0x60, 0x91, 0x0f, 0x15, 0x48, 0x97, 0x0f, 0x15, 0x20, 0x94, + 0x00, 0x60, 0x5b, 0x03, 0x1a, 0x9f, 0x8e, 0x00, 0x60, 0x62, 0x03, 0x1a, + 0xa3, 0xcb, 0x90, 0x44, 0x00, 0x62, 0xe8, 0x83, 0x00, 0x60, 0xf9, 0xc2, + 0x00, 0xd0, 0x00, 0x61, 0x00, 0x83, 0x00, 0x61, 0x09, 0xc2, 0x00, 0xd0, + 0x00, 0x61, 0x10, 0x83, 0x00, 0x61, 0x89, 0xc2, 0x00, 0x39, 0x00, 0x62, + 0xd0, 0x83, 0x00, 0x61, 0x99, 0xc2, 0x00, 0xdb, 0x00, 0x61, 0xa0, 0x8e, + 0x08, 0xa4, 0x50, 0x94, 0x08, 0xa4, 0x40, 0xcb, 0x97, 0x19, 0x00, 0x7e, + 0x51, 0xcb, 0x8f, 0x47, 0x00, 0x7e, 0x59, 0xcb, 0x97, 0xd4, 0x00, 0x7e, + 0x60, 0x09, 0xc3, 0x1a, 0xa7, 0xc8, 0xbc, 0x82, 0x00, 0x78, 0xf8, 0x09, + 0xc3, 0x1a, 0xb9, 0xc9, 0xa9, 0xbd, 0x00, 0x7e, 0x70, 0x83, 0x00, 0x7c, + 0xd1, 0xc2, 0x00, 0xd0, 0x00, 0x7c, 0xd8, 0x83, 0x00, 0x7d, 0x49, 0xc2, + 0x00, 0xd0, 0x00, 0x7d, 0x50, 0x83, 0x00, 0x7c, 0xe1, 0xc2, 0x00, 0xd0, + 0x00, 0x7c, 0xe8, 0x83, 0x00, 0x7d, 0x59, 0xc2, 0x00, 0xd0, 0x00, 0x7d, + 0x60, 0xcc, 0x85, 0x05, 0x00, 0x78, 0x11, 0xcd, 0x75, 0x99, 0x00, 0x78, + 0x18, 0x8a, 0x01, 0x69, 0xa0, 0x8a, 0x01, 0x69, 0xd0, 0x8a, 0x01, 0x69, + 0xf8, 0x4d, 0x06, 0x5a, 0xc3, 0x1a, 0xcb, 0x45, 0x19, 0x60, 0xc3, 0x1a, + 0xd7, 0x44, 0x19, 0x6a, 0xc3, 0x1a, 0xe1, 0x44, 0x2b, 0xb9, 0x43, 0x1a, + 0xeb, 0x44, 0x2b, 0xb9, 0xc3, 0x1a, 0xf7, 0x4d, 0x06, 0x5a, 0xc3, 0x1b, + 0x03, 0x45, 0x19, 0x60, 0xc3, 0x1b, 0x0f, 0x45, 0x30, 0xc1, 0x43, 0x1b, + 0x19, 0xd1, 0x4f, 0xad, 0x07, 0xe2, 0xa1, 0xda, 0x1c, 0xba, 0x07, 0xe2, + 0x99, 0x45, 0x19, 0x60, 0xc3, 0x1b, 0x23, 0x46, 0x30, 0xc1, 0xc3, 0x1b, + 0x2d, 0xdd, 0x10, 0xa3, 0x07, 0xe6, 0xc8, 0x49, 0xb2, 0x6c, 0xc3, 0x1b, + 0x39, 0x4a, 0xa7, 0xe2, 0x43, 0x1b, 0x61, 0x4d, 0x06, 0x5a, 0xc3, 0x1b, + 0x79, 0x45, 0x19, 0x60, 0xc3, 0x1b, 0x85, 0x45, 0x50, 0xf0, 0xc3, 0x1b, + 0x95, 0x0a, 0xc3, 0x1b, 0xa5, 0x45, 0x30, 0xc1, 0xc3, 0x1b, 0xb1, 0x44, + 0x72, 0xf0, 0xc3, 0x1b, 0xc1, 0x44, 0x2b, 0xb9, 0x43, 0x1b, 0xcd, 0x47, + 0x06, 0xb4, 0xc3, 0x1b, 0xd9, 0x0e, 0x43, 0x1b, 0xfd, 0xcd, 0x00, 0xfa, + 0x07, 0xe7, 0xd1, 0xca, 0x26, 0xf7, 0x07, 0xe8, 0xb0, 0x0b, 0xc3, 0x1c, + 0x07, 0x45, 0x00, 0x8c, 0x43, 0x1c, 0x13, 0xcc, 0x00, 0xfb, 0x07, 0xe1, + 0x59, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xe0, 0xca, 0x26, 0xf7, 0x07, 0xe8, + 0xa9, 0xcd, 0x00, 0xfa, 0x07, 0xe7, 0xc8, 0x4d, 0x06, 0x5a, 0xc3, 0x1c, + 0x25, 0x45, 0x19, 0x60, 0xc3, 0x1c, 0x31, 0x45, 0x30, 0xc1, 0xc3, 0x1c, + 0x3b, 0x44, 0x2b, 0xb9, 0x43, 0x1c, 0x45, 0x43, 0x06, 0x5c, 0xc3, 0x1c, + 0x51, 0x43, 0x14, 0x6d, 0xc3, 0x1c, 0x5d, 0xd1, 0x51, 0x9a, 0x07, 0xef, + 0x90, 0x47, 0x0e, 0x9d, 0xc3, 0x1c, 0x6d, 0xd2, 0x47, 0x81, 0x07, 0xea, + 0x70, 0x48, 0xab, 0xf5, 0xc3, 0x1c, 0x85, 0x46, 0x38, 0xb9, 0x43, 0x1c, + 0xb5, 0x44, 0x2b, 0xb9, 0xc3, 0x1c, 0xbb, 0x4d, 0x06, 0x5a, 0xc3, 0x1c, + 0xc7, 0xcf, 0x60, 0x8a, 0x07, 0xe3, 0x99, 0x45, 0x19, 0x60, 0xc3, 0x1c, + 0xd3, 0xcf, 0x69, 0x81, 0x07, 0xe3, 0x89, 0xce, 0x72, 0xf0, 0x07, 0xe3, + 0x81, 0x45, 0x50, 0xf0, 0xc3, 0x1c, 0xe9, 0x0a, 0xc3, 0x1c, 0xf3, 0x45, + 0x30, 0xc1, 0x43, 0x1c, 0xff, 0x43, 0x2b, 0xba, 0xc3, 0x1d, 0x09, 0x03, + 0x43, 0x1d, 0x15, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x81, 0x0b, 0xc3, 0x1d, + 0x21, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x99, 0x45, 0x00, 0x8c, 0x43, 0x1d, + 0x2d, 0xcd, 0x00, 0xfa, 0x07, 0xe2, 0xd1, 0xca, 0x26, 0xf7, 0x07, 0xe4, + 0xb0, 0xcd, 0x00, 0xfa, 0x07, 0xe2, 0xc9, 0xca, 0x26, 0xf7, 0x07, 0xe4, + 0xa8, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0xb9, 0xcb, 0x10, 0xb5, 0x07, 0xe6, + 0xe0, 0x0b, 0xc3, 0x1d, 0x39, 0xd3, 0x43, 0x72, 0x07, 0xed, 0x78, 0x43, + 0x2b, 0xba, 0xc3, 0x1d, 0x45, 0x43, 0x02, 0x98, 0x43, 0x1d, 0x51, 0xcd, + 0x00, 0xfa, 0x07, 0xe2, 0x81, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x78, 0xcd, + 0x00, 0xfa, 0x07, 0xe2, 0x79, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x70, 0x0b, + 0xc3, 0x1d, 0x5b, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x61, 0x45, 0x00, 0x8c, + 0xc3, 0x1d, 0x67, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x70, 0xcc, 0x00, 0xfb, + 0x07, 0xe2, 0x69, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0xa0, 0x0b, 0xc3, 0x1d, + 0x73, 0x45, 0x00, 0x8c, 0x43, 0x1d, 0x7f, 0x45, 0x19, 0x60, 0xc3, 0x1d, + 0x97, 0x44, 0x0d, 0xff, 0xc3, 0x1d, 0xad, 0x44, 0x2b, 0xb9, 0xc3, 0x1d, + 0xbd, 0x45, 0x06, 0x5a, 0xc3, 0x1d, 0xc9, 0x46, 0x50, 0xf0, 0xc3, 0x1d, + 0xdb, 0x45, 0x50, 0xf1, 0xc3, 0x1d, 0xe7, 0x46, 0x30, 0xc1, 0x43, 0x1d, + 0xf3, 0x46, 0x50, 0x13, 0xc3, 0x1d, 0xff, 0xd1, 0x54, 0x31, 0x07, 0xe0, + 0xd1, 0x46, 0x30, 0xc1, 0xc3, 0x1e, 0x0b, 0x4d, 0x06, 0x5a, 0xc3, 0x1e, + 0x17, 0x44, 0x2b, 0xb9, 0x43, 0x1e, 0x23, 0xca, 0x26, 0xf7, 0x07, 0xe4, + 0x39, 0xcd, 0x00, 0xfa, 0x07, 0xe2, 0x20, 0x48, 0x06, 0x5f, 0xc3, 0x1e, + 0x2f, 0x45, 0x00, 0x8c, 0xc3, 0x1e, 0x3b, 0xcd, 0x00, 0xfa, 0x07, 0xf7, + 0xd9, 0xca, 0x26, 0xf7, 0x07, 0xf7, 0xe0, 0xca, 0x26, 0xf7, 0x07, 0xe4, + 0x29, 0x0b, 0xc3, 0x1e, 0x47, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x69, 0x45, + 0x00, 0x8c, 0x43, 0x1e, 0x53, 0x0b, 0xc3, 0x1e, 0x5f, 0x4a, 0x74, 0x6e, + 0x43, 0x1e, 0x6b, 0x43, 0x02, 0x98, 0xc3, 0x1e, 0x77, 0xcf, 0x64, 0xef, + 0x07, 0xe6, 0x68, 0x0b, 0xc3, 0x1e, 0x81, 0x45, 0x00, 0x8c, 0x43, 0x1e, + 0x8d, 0x47, 0x0f, 0x9c, 0xc3, 0x1e, 0x9f, 0x4a, 0xa6, 0xca, 0x43, 0x1e, + 0xb7, 0xca, 0x26, 0xf7, 0x07, 0xe3, 0xe9, 0xcd, 0x00, 0xfa, 0x07, 0xe1, + 0x90, 0xca, 0x26, 0xf7, 0x07, 0xe3, 0xe1, 0xcd, 0x00, 0xfa, 0x07, 0xe1, + 0x88, 0x0b, 0xc3, 0x1e, 0xbd, 0xd3, 0x43, 0x72, 0x07, 0xee, 0x08, 0x0b, + 0xc3, 0x1e, 0xc9, 0x4a, 0x74, 0x6e, 0x43, 0x1e, 0xd5, 0xcc, 0x00, 0xfb, + 0x07, 0xe1, 0x71, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xf8, 0xcc, 0x00, 0xfb, + 0x07, 0xe1, 0x69, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xf0, 0x44, 0x2b, 0xb9, + 0xc3, 0x1e, 0xe1, 0x4d, 0x06, 0x5a, 0xc3, 0x1e, 0xed, 0xcf, 0x60, 0x8a, + 0x07, 0xe3, 0x69, 0x45, 0x19, 0x60, 0xc3, 0x1e, 0xf9, 0xcf, 0x69, 0x81, + 0x07, 0xe3, 0x59, 0xce, 0x72, 0xf0, 0x07, 0xe3, 0x51, 0x45, 0x50, 0xf0, + 0xc3, 0x1f, 0x09, 0x0a, 0xc3, 0x1f, 0x13, 0x46, 0x30, 0xc1, 0x43, 0x1f, + 0x1f, 0xe0, 0x07, 0x27, 0x07, 0xe2, 0xe0, 0xce, 0x6d, 0x32, 0x07, 0xea, + 0x0b, 0x03, 0x1f, 0x2b, 0x46, 0xd2, 0x23, 0xc3, 0x1f, 0x35, 0xd2, 0x4e, + 0xad, 0x07, 0xef, 0xb0, 0xd1, 0x4f, 0xad, 0x07, 0xe2, 0x51, 0x45, 0x06, + 0x5a, 0xc3, 0x1f, 0x41, 0x45, 0x19, 0x60, 0xc3, 0x1f, 0x4d, 0x45, 0x50, + 0xf0, 0xc3, 0x1f, 0x5d, 0x44, 0x19, 0x6a, 0xc3, 0x1f, 0x67, 0x45, 0x30, + 0xc1, 0x43, 0x1f, 0x71, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x41, 0xcb, 0x10, + 0xb5, 0x07, 0xe5, 0xc8, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x29, 0xcb, 0x10, + 0xb5, 0x07, 0xe5, 0xb8, 0x0b, 0xc3, 0x1f, 0x7b, 0x4a, 0x74, 0x6e, 0x43, + 0x1f, 0x87, 0x0b, 0xc3, 0x1f, 0x93, 0x45, 0x00, 0x8c, 0x43, 0x1f, 0x9f, + 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x11, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xa0, + 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x81, 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x60, + 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x19, 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x38, + 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x21, 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x40, + 0x0b, 0xc3, 0x1f, 0xab, 0xca, 0x26, 0xf7, 0x07, 0xdf, 0xd0, 0xc8, 0xbf, + 0x82, 0x00, 0x36, 0x63, 0x03, 0x1f, 0xb7, 0xc2, 0x16, 0x1c, 0x00, 0x32, + 0x0a, 0x03, 0x1f, 0xbb, 0xc3, 0x1a, 0xe0, 0x00, 0x46, 0x41, 0xc4, 0x92, + 0x76, 0x00, 0x31, 0xd3, 0x03, 0x1f, 0xbf, 0xc2, 0x0f, 0x9b, 0x00, 0x35, + 0x7b, 0x03, 0x1f, 0xc3, 0xc3, 0xe5, 0x03, 0x00, 0x35, 0x9a, 0x03, 0x1f, + 0xc7, 0xc2, 0x00, 0xc2, 0x00, 0x32, 0x23, 0x03, 0x1f, 0xcb, 0xc7, 0xca, + 0x6f, 0x00, 0x45, 0x68, 0xc2, 0x00, 0x4f, 0x00, 0x31, 0x63, 0x03, 0x1f, + 0xcf, 0x8a, 0x00, 0x34, 0xc2, 0x03, 0x1f, 0xd3, 0x47, 0xbd, 0x8a, 0xc3, + 0x1f, 0xd7, 0xc2, 0x00, 0x74, 0x00, 0x31, 0xcb, 0x03, 0x1f, 0xec, 0xc3, + 0x00, 0x49, 0x00, 0x31, 0x3b, 0x03, 0x1f, 0xf0, 0x87, 0x00, 0x36, 0xa8, + 0xc4, 0xe2, 0xe3, 0x00, 0x35, 0x4b, 0x03, 0x1f, 0xf4, 0x03, 0xc3, 0x1f, + 0xf8, 0x47, 0x06, 0x53, 0xc3, 0x20, 0x05, 0xc3, 0x14, 0x4b, 0x00, 0x31, + 0x72, 0x03, 0x20, 0x17, 0xc4, 0xe3, 0x1b, 0x00, 0x34, 0x33, 0x03, 0x20, + 0x1b, 0xc3, 0x2f, 0xc8, 0x00, 0x33, 0xcb, 0x03, 0x20, 0x28, 0xc2, 0x16, + 0x1c, 0x00, 0x31, 0x53, 0x03, 0x20, 0x35, 0xc2, 0x02, 0x98, 0x00, 0x31, + 0xbb, 0x03, 0x20, 0x42, 0x0a, 0x43, 0x20, 0x46, 0x00, 0xc3, 0x20, 0x5e, + 0xc2, 0x16, 0x1c, 0x00, 0x35, 0x32, 0x03, 0x20, 0x74, 0xc2, 0x16, 0x1c, + 0x00, 0x32, 0x53, 0x03, 0x20, 0x78, 0x97, 0x00, 0x36, 0x42, 0x03, 0x20, + 0x7c, 0xc2, 0x16, 0x1c, 0x00, 0x31, 0x8b, 0x03, 0x20, 0x80, 0xcb, 0x96, + 0x53, 0x00, 0x45, 0x61, 0xc4, 0x3a, 0x01, 0x00, 0x35, 0xdb, 0x03, 0x20, + 0x84, 0xc3, 0x72, 0xf0, 0x00, 0x34, 0x8a, 0x03, 0x20, 0x88, 0x8a, 0x00, + 0x31, 0x43, 0x03, 0x20, 0x8c, 0xc2, 0x16, 0x1c, 0x00, 0x33, 0xda, 0x03, + 0x20, 0x99, 0x42, 0x00, 0x2d, 0xc3, 0x20, 0x9d, 0x00, 0x43, 0x20, 0xa3, + 0x00, 0x43, 0x20, 0xb8, 0x00, 0x43, 0x20, 0xce, 0xc2, 0x00, 0x74, 0x00, + 0x31, 0x93, 0x03, 0x20, 0xde, 0x8a, 0x00, 0x31, 0xc2, 0x03, 0x20, 0xe2, + 0xcb, 0x8c, 0xdf, 0x00, 0x45, 0x89, 0xc2, 0x01, 0x9d, 0x00, 0x31, 0xab, + 0x03, 0x20, 0xe6, 0xc4, 0xdf, 0x93, 0x00, 0x31, 0xa3, 0x03, 0x20, 0xea, + 0xc8, 0xba, 0x32, 0x00, 0x35, 0x51, 0xc3, 0x03, 0x26, 0x00, 0x31, 0x9b, + 0x03, 0x20, 0xee, 0xcf, 0x07, 0x2a, 0x00, 0x33, 0x80, 0x03, 0xc3, 0x20, + 0xf2, 0x42, 0x0e, 0x9a, 0xc3, 0x21, 0x09, 0xc2, 0x03, 0x66, 0x00, 0x34, + 0x73, 0x03, 0x21, 0x19, 0xc3, 0x2b, 0xb9, 0x00, 0x34, 0x23, 0x03, 0x21, + 0x1d, 0x47, 0x3b, 0xc4, 0x43, 0x21, 0x21, 0x00, 0xc3, 0x21, 0x33, 0x8a, + 0x00, 0x35, 0x22, 0x03, 0x21, 0x3f, 0x00, 0x43, 0x21, 0x43, 0xc3, 0x12, + 0xc2, 0x00, 0x32, 0x2b, 0x03, 0x21, 0x55, 0xc3, 0x01, 0xc4, 0x00, 0x30, + 0xe0, 0x00, 0x43, 0x21, 0x59, 0x89, 0x00, 0x35, 0x6b, 0x03, 0x21, 0x65, + 0xc3, 0x01, 0x54, 0x00, 0x32, 0x33, 0x03, 0x21, 0x72, 0xc3, 0x2b, 0xb9, + 0x00, 0x34, 0x1a, 0x03, 0x21, 0x76, 0x03, 0xc3, 0x21, 0x7a, 0xc2, 0x16, + 0x1c, 0x00, 0x32, 0x3b, 0x03, 0x21, 0x8a, 0xc9, 0xae, 0xd6, 0x00, 0x33, + 0xa2, 0x03, 0x21, 0x8e, 0x4c, 0x73, 0x54, 0xc3, 0x21, 0x92, 0x46, 0x3b, + 0xc5, 0x43, 0x21, 0xfa, 0x8e, 0x0f, 0x70, 0x19, 0x86, 0x0f, 0x70, 0xc8, + 0x8a, 0x0f, 0x70, 0x41, 0x45, 0x14, 0xa8, 0x43, 0x22, 0x12, 0xc2, 0x16, + 0x1c, 0x0f, 0x70, 0xb1, 0xc2, 0x00, 0x65, 0x0f, 0x70, 0xc0, 0x03, 0xc3, + 0x22, 0x50, 0xc3, 0x85, 0xf5, 0x0f, 0x74, 0x09, 0xc4, 0x30, 0xc1, 0x0f, + 0x74, 0x11, 0x42, 0x0e, 0x9a, 0xc3, 0x22, 0x5c, 0x0a, 0xc3, 0x22, 0x64, + 0xc3, 0x7e, 0x89, 0x0f, 0x74, 0x29, 0x42, 0x02, 0x1c, 0xc3, 0x22, 0x70, + 0x16, 0xc3, 0x22, 0x7a, 0xc3, 0x2b, 0xb9, 0x0f, 0x74, 0x49, 0xc3, 0x0d, + 0xff, 0x0f, 0x74, 0x59, 0xc4, 0x19, 0x60, 0x0f, 0x74, 0x61, 0xc4, 0x3a, + 0x01, 0x0f, 0x74, 0x69, 0x15, 0xc3, 0x22, 0x8a, 0xc3, 0xb1, 0x0d, 0x0f, + 0x74, 0x81, 0xc3, 0x0f, 0x9a, 0x0f, 0x74, 0x91, 0xc3, 0x72, 0xf0, 0x0f, + 0x74, 0x99, 0xc4, 0x14, 0x4a, 0x0f, 0x74, 0xb9, 0xc5, 0x92, 0x75, 0x0f, + 0x74, 0xd8, 0xc3, 0x85, 0xf5, 0x0f, 0x73, 0x09, 0xc4, 0x30, 0xc1, 0x0f, + 0x73, 0x11, 0x0a, 0xc3, 0x22, 0x9c, 0x16, 0xc3, 0x22, 0xa8, 0xc3, 0x2b, + 0xb9, 0x0f, 0x73, 0x49, 0x0d, 0xc3, 0x22, 0xba, 0xc4, 0x19, 0x60, 0x0f, + 0x73, 0x61, 0xc4, 0x3a, 0x01, 0x0f, 0x73, 0x69, 0x15, 0xc3, 0x22, 0xc6, + 0xc3, 0x03, 0x0c, 0x0f, 0x73, 0x79, 0xc3, 0xb1, 0x0d, 0x0f, 0x73, 0x81, + 0xc3, 0x0f, 0x9a, 0x0f, 0x73, 0x91, 0x06, 0xc3, 0x22, 0xd8, 0xc3, 0x74, + 0x6a, 0x0f, 0x73, 0xd1, 0xc5, 0x92, 0x75, 0x0f, 0x73, 0xd8, 0xc2, 0x16, + 0x1c, 0x0f, 0x71, 0x21, 0xc2, 0x02, 0x98, 0x0f, 0x71, 0x38, 0xc2, 0x0f, + 0x9b, 0x0f, 0x71, 0x51, 0xc3, 0x14, 0x4b, 0x0f, 0x71, 0xb8, 0xc3, 0x03, + 0x26, 0x0f, 0x71, 0x71, 0xc2, 0x01, 0x9d, 0x0f, 0x71, 0x89, 0xc4, 0xdf, + 0x93, 0x0f, 0x71, 0xa0, 0xc2, 0x16, 0x1c, 0x0f, 0x71, 0xa9, 0xc3, 0x64, + 0x77, 0x0f, 0x71, 0xb0, 0xc8, 0x33, 0xae, 0x00, 0x47, 0xf1, 0xcd, 0x00, + 0xfa, 0x07, 0xf3, 0xc1, 0xcb, 0x64, 0x7b, 0x07, 0xf3, 0xc8, 0xce, 0x00, + 0xf9, 0x07, 0xf3, 0x90, 0xc9, 0x16, 0x14, 0x00, 0x47, 0xa9, 0xc4, 0x00, + 0x9d, 0x00, 0x47, 0xa1, 0xc8, 0x02, 0x9f, 0x00, 0x32, 0xf0, 0xc2, 0x39, + 0x8b, 0x00, 0x47, 0x99, 0x44, 0x1d, 0xc8, 0x43, 0x22, 0xe4, 0xc9, 0xad, + 0xbf, 0x00, 0x47, 0x09, 0xc2, 0x01, 0x9d, 0x00, 0x46, 0xa9, 0xc3, 0x03, + 0x26, 0x00, 0x36, 0xe0, 0xce, 0x6f, 0x7e, 0x00, 0x47, 0x01, 0xc8, 0xbf, + 0x82, 0x00, 0x46, 0x50, 0xcb, 0x60, 0x7f, 0x00, 0x46, 0xc0, 0x8a, 0x00, + 0x46, 0x69, 0xc2, 0x00, 0x74, 0x00, 0x30, 0xb8, 0xdb, 0x17, 0xcd, 0x00, + 0x46, 0x58, 0xc4, 0x41, 0xc1, 0x00, 0x37, 0x21, 0x45, 0x30, 0xc2, 0x43, + 0x22, 0xfa, 0xc9, 0x02, 0xde, 0x00, 0x36, 0xd9, 0xc2, 0x02, 0x98, 0x00, + 0x30, 0xa8, 0xc7, 0xca, 0x7d, 0x00, 0x36, 0xc9, 0x48, 0x19, 0x9b, 0x43, + 0x23, 0x06, 0xc2, 0x39, 0x8b, 0x00, 0x46, 0x99, 0x44, 0x1d, 0xc8, 0x43, + 0x23, 0x18, 0xc5, 0x05, 0x02, 0x00, 0x46, 0x81, 0xcd, 0x00, 0xfa, 0x07, + 0xf3, 0xf1, 0xcb, 0x64, 0x7b, 0x07, 0xf3, 0xf8, 0x4b, 0x05, 0x29, 0xc3, + 0x23, 0x22, 0xc5, 0x05, 0x02, 0x07, 0xdd, 0xa9, 0xc5, 0x00, 0xd4, 0x07, + 0xdd, 0xa0, 0x53, 0x26, 0x03, 0xc3, 0x23, 0x2e, 0xc5, 0x05, 0x02, 0x07, + 0xdd, 0xb9, 0xc5, 0x00, 0xd4, 0x07, 0xdd, 0xb0, 0xc5, 0x05, 0x02, 0x07, + 0xdd, 0x99, 0xc5, 0x00, 0xd4, 0x07, 0xdd, 0x90, 0xd0, 0x5f, 0xd2, 0x00, + 0x37, 0xf1, 0xc9, 0x36, 0x20, 0x00, 0x37, 0xe8, 0xda, 0x1d, 0x22, 0x00, + 0x30, 0x81, 0xc4, 0xe4, 0x0f, 0x00, 0x30, 0x21, 0xc3, 0xa8, 0x39, 0x00, + 0x30, 0x19, 0xc3, 0x39, 0x71, 0x00, 0x30, 0x08, 0xce, 0x04, 0xf9, 0x00, + 0x44, 0x29, 0x4b, 0x97, 0x5b, 0xc3, 0x23, 0x3a, 0xce, 0x71, 0x06, 0x07, + 0xf3, 0x88, 0xc2, 0xe5, 0xfd, 0x0f, 0xb9, 0x88, 0xc8, 0x8c, 0x89, 0x0f, + 0xb9, 0x71, 0xc6, 0x4c, 0x49, 0x0f, 0xb9, 0x38, 0xcb, 0x03, 0xbc, 0x01, + 0x1a, 0xb9, 0xc6, 0xcd, 0xc1, 0x01, 0x1a, 0x60, 0xc2, 0x01, 0x6f, 0x01, + 0x1a, 0x68, 0xc5, 0x3a, 0x1b, 0x01, 0x19, 0xd1, 0xc4, 0x07, 0xb2, 0x01, + 0x19, 0xc8, 0xc7, 0x0d, 0x04, 0x08, 0x08, 0xd9, 0xc8, 0x4b, 0x94, 0x08, + 0x09, 0x20, 0xc7, 0x0d, 0x04, 0x08, 0x08, 0xd1, 0xc8, 0x4b, 0x94, 0x08, + 0x09, 0x18, 0xc7, 0x0d, 0x04, 0x08, 0x08, 0xe9, 0xc8, 0x4b, 0x94, 0x08, + 0x09, 0x30, 0xc7, 0x0d, 0x04, 0x08, 0x08, 0xe1, 0xc8, 0x4b, 0x94, 0x08, + 0x09, 0x28, 0xc7, 0x3a, 0x19, 0x0f, 0xdd, 0x71, 0x47, 0x04, 0xcb, 0xc3, + 0x23, 0x46, 0x46, 0x02, 0xae, 0xc3, 0x23, 0x52, 0xc5, 0x0d, 0x20, 0x01, + 0x2b, 0x98, 0xc2, 0x01, 0x48, 0x01, 0x2b, 0xbb, 0x03, 0x23, 0x64, 0x4a, + 0xa2, 0xa6, 0x43, 0x23, 0x6a, 0x0a, 0xc3, 0x23, 0x76, 0xc4, 0x00, 0x49, + 0x01, 0x28, 0xc1, 0xc5, 0x00, 0x2c, 0x01, 0x28, 0xa0, 0xc5, 0x00, 0x2c, + 0x01, 0x2b, 0x81, 0xc4, 0x00, 0x49, 0x01, 0x2b, 0x78, 0xc4, 0x00, 0x49, + 0x01, 0x2b, 0x71, 0xc5, 0x00, 0x2c, 0x01, 0x2b, 0x68, 0xca, 0x01, 0x68, + 0x01, 0x29, 0xe1, 0xc4, 0x00, 0x49, 0x01, 0x29, 0x21, 0xc5, 0x00, 0x2c, + 0x01, 0x28, 0xe0, 0xc9, 0x12, 0x0d, 0x01, 0x2b, 0xf9, 0xc3, 0x00, 0x4a, + 0x01, 0x28, 0xd8, 0xca, 0x01, 0x68, 0x01, 0x29, 0x99, 0xc4, 0x00, 0x49, + 0x01, 0x28, 0x99, 0xc5, 0x00, 0x2c, 0x01, 0x28, 0x78, 0xca, 0x01, 0x68, + 0x01, 0x2b, 0x61, 0xc4, 0x00, 0x49, 0x01, 0x2b, 0x19, 0xc5, 0x00, 0x2c, + 0x01, 0x2b, 0x00, 0xc8, 0x11, 0xff, 0x01, 0x29, 0x49, 0xc5, 0x11, 0x39, + 0x01, 0x28, 0x88, 0xc8, 0x11, 0xff, 0x01, 0x29, 0x09, 0xc5, 0x11, 0x39, + 0x01, 0x28, 0x68, 0xc8, 0x11, 0x49, 0x01, 0x29, 0x39, 0xc5, 0x07, 0xeb, + 0x01, 0x28, 0x90, 0xc8, 0x11, 0x49, 0x01, 0x28, 0xf9, 0xc5, 0x07, 0xeb, + 0x01, 0x28, 0x70, 0xa3, 0x0f, 0xd9, 0xa0, 0xa3, 0x0f, 0xd9, 0x61, 0xa2, + 0x0f, 0xd8, 0xe8, 0xa3, 0x0f, 0xd9, 0xc0, 0xa3, 0x0f, 0xd9, 0xd0, 0xa3, + 0x0f, 0xd9, 0xd8, 0xd7, 0x2a, 0xf5, 0x0f, 0xd2, 0x60, 0xc5, 0x56, 0xa5, + 0x01, 0x32, 0xf3, 0x03, 0x23, 0x82, 0xc3, 0x00, 0x74, 0x01, 0x32, 0xd2, + 0x03, 0x23, 0x8c, 0x49, 0x2a, 0xf5, 0x43, 0x23, 0x92, 0x49, 0x2a, 0xf5, + 0x43, 0x23, 0x9e, 0x49, 0x2a, 0xf5, 0x43, 0x23, 0xaa, 0x49, 0x2a, 0xf5, + 0x43, 0x23, 0xb6, 0x0d, 0xc3, 0x23, 0xc2, 0xc5, 0xa8, 0xf7, 0x0f, 0xd1, + 0x29, 0xc4, 0xde, 0x83, 0x0f, 0xd1, 0x31, 0xc6, 0xca, 0xfd, 0x0f, 0xd1, + 0x39, 0xc4, 0xe3, 0x93, 0x0f, 0xd1, 0x48, 0xcf, 0x14, 0x22, 0x01, 0x5d, + 0x71, 0xcd, 0x1b, 0x41, 0x01, 0x5d, 0x60, 0xcf, 0x09, 0xf8, 0x01, 0x5d, + 0x41, 0xd0, 0x03, 0xb7, 0x01, 0x5d, 0x48, 0xcf, 0x09, 0xf8, 0x01, 0x5d, + 0x51, 0xd0, 0x03, 0xb7, 0x01, 0x5d, 0x58, 0xcd, 0x1b, 0x41, 0x01, 0x5d, + 0x69, 0xcf, 0x14, 0x22, 0x01, 0x5d, 0x78, 0x45, 0x00, 0x8c, 0xc3, 0x23, + 0xce, 0xca, 0xa0, 0x62, 0x01, 0x1f, 0xd0, 0x15, 0xc3, 0x23, 0xe0, 0xc7, + 0x3a, 0x19, 0x01, 0x59, 0x49, 0xc7, 0x0a, 0xe0, 0x01, 0x59, 0x50, 0xc8, + 0xbe, 0xa2, 0x01, 0x1f, 0xc9, 0xc6, 0x86, 0xfd, 0x0f, 0xa9, 0x91, 0xc7, + 0x5e, 0xa7, 0x01, 0x5e, 0x00, 0xd8, 0x23, 0xab, 0x0f, 0xbc, 0x19, 0xce, + 0x6c, 0x8a, 0x01, 0x2d, 0xf1, 0xc8, 0x01, 0x92, 0x01, 0x2d, 0xe1, 0xcf, + 0x65, 0x94, 0x01, 0x1f, 0x60, 0xcd, 0x7d, 0xc6, 0x01, 0x3a, 0xb1, 0xc4, + 0x22, 0xdc, 0x01, 0x33, 0x31, 0xcf, 0x6a, 0x44, 0x01, 0x4f, 0x51, 0xc7, + 0x5e, 0xa7, 0x01, 0x5e, 0x09, 0xc8, 0xb5, 0xe2, 0x01, 0x5e, 0xf0, 0xc4, + 0x5b, 0x26, 0x01, 0x36, 0x19, 0xc3, 0x12, 0xb8, 0x01, 0x36, 0x10, 0xd8, + 0x23, 0xab, 0x0f, 0xbc, 0x11, 0x12, 0xc3, 0x23, 0xec, 0xce, 0x6c, 0x8a, + 0x01, 0x2d, 0xc1, 0xc8, 0x01, 0x92, 0x01, 0x2d, 0xb3, 0x03, 0x23, 0xf8, + 0xcf, 0x65, 0x94, 0x01, 0x1f, 0x4a, 0x03, 0x23, 0xfe, 0xc5, 0x01, 0xa2, + 0x01, 0x3d, 0x0b, 0x03, 0x24, 0x04, 0xc6, 0x1c, 0xb4, 0x01, 0x02, 0x69, + 0xd5, 0x03, 0xd2, 0x01, 0x5c, 0xf0, 0xc5, 0x06, 0x82, 0x01, 0x30, 0xd9, + 0xce, 0x24, 0xd5, 0x0f, 0xac, 0xe8, 0xd8, 0x23, 0xab, 0x0f, 0xbc, 0x01, + 0xc7, 0x46, 0x3d, 0x01, 0x2e, 0x21, 0xce, 0x6c, 0x8a, 0x01, 0x2e, 0x11, + 0xc8, 0x01, 0x92, 0x01, 0x2e, 0x01, 0xcf, 0x65, 0x94, 0x01, 0x1f, 0x52, + 0x03, 0x24, 0x0a, 0xca, 0xa6, 0x34, 0x01, 0x36, 0xc1, 0x49, 0x01, 0xaa, + 0x43, 0x24, 0x10, 0xc6, 0x1c, 0xb4, 0x01, 0x02, 0x61, 0xd5, 0x03, 0xd2, + 0x01, 0x5c, 0xe0, 0xcd, 0x2f, 0x72, 0x01, 0x2f, 0x19, 0xce, 0x23, 0xb5, + 0x01, 0x2f, 0x10, 0x45, 0x03, 0x14, 0xc3, 0x24, 0x1c, 0xc5, 0x0b, 0x0a, + 0x01, 0x2f, 0xe0, 0xd5, 0x2e, 0xad, 0x01, 0x1f, 0xbb, 0x03, 0x24, 0x2e, + 0xc6, 0x3a, 0x1a, 0x01, 0x59, 0x28, 0xc8, 0x5e, 0xa6, 0x01, 0x5e, 0x28, + 0xc8, 0x5e, 0xa6, 0x01, 0x5e, 0x40, 0xd5, 0x32, 0x03, 0x01, 0x1f, 0xa3, + 0x03, 0x24, 0x34, 0xc6, 0x0a, 0xe1, 0x01, 0x59, 0x38, 0xce, 0x23, 0xb5, + 0x01, 0x2f, 0x29, 0xcd, 0x2f, 0x72, 0x01, 0x2f, 0x20, 0xce, 0x6c, 0x8a, + 0x01, 0x2d, 0xa1, 0xc8, 0x01, 0x92, 0x01, 0x2d, 0x91, 0xcf, 0x65, 0x94, + 0x01, 0x1f, 0x59, 0xd8, 0x23, 0xab, 0x0f, 0xbc, 0x08, 0xc5, 0x22, 0xdb, + 0x01, 0x33, 0x28, 0x46, 0x00, 0x8b, 0x43, 0x24, 0x3a, 0xcd, 0x7c, 0x8e, + 0x00, 0xdb, 0x88, 0xcd, 0x7c, 0x8e, 0x00, 0xdb, 0x80, 0x00, 0x43, 0x24, + 0x54, 0xc4, 0xb5, 0x3e, 0x00, 0xd9, 0x19, 0xcf, 0x60, 0x21, 0x00, 0xd8, + 0xf1, 0xc5, 0xdc, 0x9f, 0x00, 0xd8, 0xe8, 0xc9, 0x60, 0x27, 0x00, 0xd9, + 0x01, 0xc9, 0xb3, 0x8c, 0x00, 0xd8, 0xf8, 0xc4, 0xa1, 0x14, 0x00, 0xd9, + 0xfb, 0x03, 0x24, 0x60, 0xc6, 0xc2, 0x6d, 0x00, 0xda, 0x00, 0x97, 0x0b, + 0x50, 0x29, 0x83, 0x0b, 0x50, 0x19, 0xc2, 0x00, 0xb0, 0x0b, 0x51, 0xb1, + 0x91, 0x0b, 0x51, 0x79, 0x07, 0xc3, 0x24, 0x66, 0xc3, 0x17, 0x29, 0x0b, + 0x50, 0xb0, 0xc4, 0xbf, 0xf1, 0x0b, 0x51, 0xb9, 0x0a, 0xc3, 0x24, 0x6e, + 0xc3, 0xd7, 0xe2, 0x0b, 0x50, 0xa9, 0x8b, 0x0b, 0x50, 0xa1, 0xc2, 0x5d, + 0xa1, 0x0b, 0x50, 0x90, 0xc2, 0x00, 0x3d, 0x0b, 0x51, 0xa9, 0x03, 0x43, + 0x24, 0x7c, 0x04, 0xc3, 0x24, 0x84, 0x91, 0x0b, 0x51, 0x99, 0x83, 0x0b, + 0x51, 0x91, 0xc4, 0xe2, 0x9f, 0x0b, 0x50, 0x68, 0x07, 0xc3, 0x24, 0x90, + 0x97, 0x0b, 0x51, 0x19, 0x0b, 0x43, 0x24, 0x9e, 0xc2, 0x7f, 0xc0, 0x0b, + 0x51, 0x71, 0x8b, 0x0b, 0x51, 0x69, 0x83, 0x0b, 0x50, 0x50, 0x83, 0x0b, + 0x51, 0x61, 0xc2, 0x0f, 0xe1, 0x0b, 0x51, 0x08, 0xc3, 0x8b, 0xa9, 0x0b, + 0x51, 0x51, 0x07, 0x43, 0x24, 0xa8, 0x09, 0xc3, 0x24, 0xb2, 0x8b, 0x0b, + 0x51, 0x21, 0xc3, 0x14, 0x09, 0x0b, 0x51, 0x01, 0xc3, 0x01, 0xe2, 0x0b, + 0x50, 0xf1, 0x0c, 0xc3, 0x24, 0xbe, 0x97, 0x0b, 0x50, 0xcb, 0x03, 0x24, + 0xca, 0xc3, 0x4f, 0x43, 0x0b, 0x50, 0x79, 0xc2, 0x16, 0x5a, 0x0b, 0x50, + 0x48, 0x83, 0x0b, 0x50, 0xe9, 0xc2, 0x7f, 0xc0, 0x0b, 0x50, 0xd8, 0x0a, + 0xc3, 0x24, 0xd0, 0x42, 0x00, 0x51, 0x43, 0x24, 0xe0, 0x17, 0xc3, 0x24, + 0xea, 0xc3, 0xd7, 0xe2, 0x0b, 0x4c, 0xf0, 0xc4, 0xe3, 0xf7, 0x0b, 0x4b, + 0xa1, 0x8b, 0x0b, 0x4f, 0xf1, 0x91, 0x0b, 0x4f, 0xc9, 0x07, 0xc3, 0x24, + 0xf2, 0x17, 0x43, 0x24, 0xfa, 0x09, 0xc3, 0x25, 0x0a, 0x06, 0xc3, 0x25, + 0x29, 0x42, 0x01, 0xe2, 0xc3, 0x25, 0x37, 0x83, 0x0b, 0x4f, 0xb3, 0x03, + 0x25, 0x41, 0x0c, 0xc3, 0x25, 0x45, 0x16, 0xc3, 0x25, 0x4f, 0x1c, 0xc3, + 0x25, 0x5b, 0x43, 0x70, 0x51, 0xc3, 0x25, 0x67, 0xc3, 0xbc, 0x2f, 0x0b, + 0x4d, 0x40, 0x03, 0xc3, 0x25, 0x73, 0x11, 0xc3, 0x25, 0x88, 0x07, 0xc3, + 0x25, 0x93, 0x17, 0x43, 0x25, 0x9e, 0x97, 0x0b, 0x4d, 0x03, 0x03, 0x25, + 0xab, 0x03, 0xc3, 0x25, 0xb7, 0x8b, 0x0b, 0x4f, 0xbb, 0x03, 0x25, 0xc4, + 0x07, 0xc3, 0x25, 0xc8, 0x91, 0x0b, 0x4c, 0xc2, 0x03, 0x25, 0xd2, 0x03, + 0xc3, 0x25, 0xd8, 0xc3, 0xd7, 0xe2, 0x0b, 0x4f, 0x79, 0xc5, 0xd4, 0x39, + 0x0b, 0x4c, 0x10, 0xc2, 0x00, 0x7a, 0x0b, 0x4b, 0x69, 0x0a, 0xc3, 0x25, + 0xe0, 0xc4, 0xb5, 0x1a, 0x0b, 0x4c, 0xd9, 0x07, 0xc3, 0x25, 0xf3, 0xc2, + 0x04, 0xc6, 0x0b, 0x4c, 0x28, 0x11, 0xc3, 0x25, 0xfb, 0x03, 0xc3, 0x26, + 0x07, 0x97, 0x0b, 0x4f, 0x69, 0xc5, 0xdc, 0xea, 0x0b, 0x4d, 0x98, 0xc2, + 0x00, 0x7a, 0x0b, 0x4b, 0x51, 0x07, 0x43, 0x26, 0x15, 0x42, 0x00, 0x51, + 0xc3, 0x26, 0x1f, 0xc2, 0x00, 0x45, 0x0b, 0x4f, 0xf9, 0x83, 0x0b, 0x4f, + 0xdb, 0x03, 0x26, 0x29, 0xc2, 0x00, 0xc4, 0x0b, 0x4f, 0xd1, 0x8b, 0x0b, + 0x4f, 0x73, 0x03, 0x26, 0x38, 0xc2, 0x07, 0xb2, 0x0b, 0x4e, 0x49, 0xc3, + 0x8b, 0xa9, 0x0b, 0x4e, 0x31, 0xc4, 0xe0, 0x4f, 0x0b, 0x4d, 0x79, 0x42, + 0x1f, 0xad, 0x43, 0x26, 0x3e, 0x83, 0x0b, 0x4d, 0xdb, 0x03, 0x26, 0x48, + 0x17, 0xc3, 0x26, 0x4c, 0xc2, 0x02, 0xe0, 0x0b, 0x4f, 0x59, 0xc2, 0x00, + 0x7a, 0x0b, 0x4e, 0x98, 0x17, 0xc3, 0x26, 0x57, 0x43, 0x8a, 0x2d, 0xc3, + 0x26, 0x6b, 0x42, 0x2c, 0x43, 0xc3, 0x26, 0x77, 0x0b, 0xc3, 0x26, 0x88, + 0xc2, 0x00, 0xb6, 0x0b, 0x4d, 0x60, 0x09, 0xc3, 0x26, 0x92, 0x15, 0xc3, + 0x26, 0x9a, 0x16, 0xc3, 0x26, 0xaa, 0x06, 0xc3, 0x26, 0xb4, 0x8b, 0x0b, + 0x4a, 0xd9, 0x97, 0x0b, 0x4a, 0xb9, 0x1b, 0xc3, 0x26, 0xc4, 0x0c, 0x43, + 0x26, 0xda, 0x07, 0xc3, 0x26, 0xf3, 0xc2, 0x7f, 0xc0, 0x0b, 0x4a, 0xf9, + 0xc2, 0x01, 0xdf, 0x0b, 0x48, 0xf1, 0xc3, 0x8f, 0x8a, 0x0b, 0x47, 0xb0, + 0x03, 0xc3, 0x27, 0x01, 0x07, 0xc3, 0x27, 0x0d, 0x04, 0xc3, 0x27, 0x17, + 0xc3, 0x9c, 0xc7, 0x0b, 0x4a, 0xf1, 0x97, 0x0b, 0x4a, 0x99, 0x08, 0xc3, + 0x27, 0x26, 0x42, 0x1f, 0xad, 0xc3, 0x27, 0x39, 0xc3, 0x07, 0x85, 0x0b, + 0x48, 0xc8, 0x07, 0xc3, 0x27, 0x4b, 0x97, 0x0b, 0x48, 0x8b, 0x03, 0x27, + 0x55, 0x8b, 0x0b, 0x4b, 0x09, 0xc2, 0x7f, 0xc0, 0x0b, 0x4a, 0x61, 0xc2, + 0x10, 0x11, 0x0b, 0x4a, 0x58, 0x97, 0x0b, 0x4a, 0x4b, 0x03, 0x27, 0x5b, + 0xc3, 0x17, 0x29, 0x0b, 0x4a, 0xb1, 0x07, 0xc3, 0x27, 0x69, 0xc4, 0xde, + 0xb3, 0x0b, 0x49, 0x08, 0x17, 0xc3, 0x27, 0x71, 0x03, 0xc3, 0x27, 0x7f, + 0x0a, 0xc3, 0x27, 0x87, 0xc2, 0x01, 0xbb, 0x0b, 0x49, 0x21, 0xc5, 0x8b, + 0xa8, 0x0b, 0x48, 0x60, 0xc8, 0xb5, 0xc2, 0x0b, 0x48, 0xa1, 0xc2, 0x04, + 0xc6, 0x0b, 0x4b, 0x28, 0xc6, 0xcb, 0x1b, 0x0b, 0x48, 0x29, 0x17, 0xc3, + 0x27, 0x9b, 0xc2, 0x00, 0xc4, 0x0b, 0x48, 0x68, 0x43, 0x03, 0x27, 0xc3, + 0x27, 0xa5, 0xc2, 0x25, 0x9f, 0x0b, 0x4a, 0x71, 0xc3, 0x7c, 0x57, 0x0b, + 0x49, 0x38, 0x17, 0xc3, 0x27, 0xb1, 0x07, 0xc3, 0x27, 0xbb, 0xc2, 0x00, + 0xb6, 0x0b, 0x49, 0xa9, 0xc2, 0x00, 0x7e, 0x0b, 0x49, 0x68, 0xc4, 0x8b, + 0xa8, 0x0b, 0x4a, 0x41, 0xc2, 0x04, 0xc6, 0x0b, 0x48, 0x90, 0xc4, 0xb5, + 0x1a, 0x0b, 0x47, 0xd9, 0xc2, 0x00, 0xb6, 0x0b, 0x47, 0x90, 0x07, 0xc3, + 0x27, 0xc5, 0x17, 0xc3, 0x27, 0xd3, 0xc2, 0x04, 0xc6, 0x0b, 0x45, 0x49, + 0xc5, 0x5c, 0x98, 0x0b, 0x45, 0x40, 0x0a, 0xc3, 0x27, 0xdd, 0x07, 0xc3, + 0x27, 0xe9, 0xc4, 0xa1, 0xee, 0x0b, 0x45, 0x78, 0x07, 0xc3, 0x27, 0xf5, + 0x42, 0x00, 0x8d, 0xc3, 0x27, 0xff, 0xc6, 0xcf, 0xc5, 0x0b, 0x45, 0x60, + 0xc2, 0x00, 0xc4, 0x0b, 0x47, 0x79, 0x0b, 0x43, 0x28, 0x0b, 0xc2, 0x14, + 0xbe, 0x0b, 0x47, 0x69, 0x97, 0x0b, 0x46, 0x69, 0x03, 0x43, 0x28, 0x15, + 0x03, 0xc3, 0x28, 0x1d, 0x09, 0xc3, 0x28, 0x27, 0x0c, 0xc3, 0x28, 0x3b, + 0x06, 0xc3, 0x28, 0x49, 0x15, 0xc3, 0x28, 0x5f, 0x16, 0xc3, 0x28, 0x79, + 0x1c, 0xc3, 0x28, 0x89, 0xd0, 0x5c, 0x92, 0x0b, 0x44, 0xc8, 0xc3, 0x8b, + 0xa9, 0x0b, 0x47, 0x39, 0xc3, 0x8f, 0x8a, 0x0b, 0x47, 0x31, 0x04, 0xc3, + 0x28, 0x93, 0x03, 0xc3, 0x28, 0xa6, 0xc6, 0xd1, 0x93, 0x0b, 0x45, 0xc0, + 0x17, 0xc3, 0x28, 0xae, 0xc2, 0x04, 0xc6, 0x0b, 0x46, 0xc9, 0xc3, 0x92, + 0xb4, 0x0b, 0x45, 0x38, 0xc2, 0x02, 0xae, 0x0b, 0x46, 0x89, 0xc7, 0xc5, + 0xbb, 0x0b, 0x44, 0x90, 0xc5, 0xdb, 0x0a, 0x0b, 0x46, 0x09, 0x9a, 0x0b, + 0x45, 0x88, 0x42, 0x00, 0xd0, 0xc3, 0x28, 0xbe, 0xc4, 0xe1, 0xb7, 0x0b, + 0x44, 0xc0, 0x09, 0xc3, 0x28, 0xc8, 0x15, 0xc3, 0x28, 0xd8, 0x1b, 0xc3, + 0x28, 0xe4, 0xc7, 0xc2, 0x81, 0x0b, 0x43, 0x29, 0xcb, 0x8f, 0x89, 0x0b, + 0x43, 0x20, 0x08, 0xc3, 0x28, 0xf0, 0x83, 0x0b, 0x44, 0x63, 0x03, 0x28, + 0xfc, 0x04, 0xc3, 0x29, 0x02, 0x42, 0x2c, 0x43, 0xc3, 0x29, 0x18, 0xc7, + 0xc5, 0xd7, 0x0b, 0x43, 0xf8, 0xc2, 0x00, 0x8d, 0x0b, 0x43, 0x39, 0xc6, + 0xcb, 0x4b, 0x0b, 0x44, 0x09, 0xc4, 0xdb, 0x8e, 0x0b, 0x43, 0x91, 0xc5, + 0xd9, 0x48, 0x0b, 0x43, 0x08, 0xc4, 0xdc, 0xeb, 0x0b, 0x43, 0x31, 0x90, + 0x0b, 0x43, 0x78, 0x0b, 0xc3, 0x29, 0x22, 0x42, 0x2c, 0x43, 0xc3, 0x29, + 0x2c, 0xc2, 0x00, 0xc2, 0x0b, 0x43, 0x00, 0xc2, 0x00, 0x3d, 0x0b, 0x44, + 0x49, 0x03, 0xc3, 0x29, 0x3e, 0xc8, 0xb6, 0x02, 0x0b, 0x42, 0xd8, 0x87, + 0x0b, 0x44, 0x29, 0xc2, 0xd0, 0x00, 0x0b, 0x44, 0x18, 0xc2, 0x0f, 0xe1, + 0x0b, 0x43, 0xe9, 0xc6, 0xcd, 0x3d, 0x0b, 0x43, 0xb9, 0x42, 0x01, 0x7f, + 0xc3, 0x29, 0x4a, 0xc5, 0xdd, 0xee, 0x0b, 0x42, 0xd1, 0xc3, 0x8f, 0x8a, + 0x0b, 0x42, 0xc8, 0xc3, 0x76, 0x32, 0x0b, 0x43, 0xc1, 0x42, 0x03, 0x53, + 0x43, 0x29, 0x56, 0xcc, 0x82, 0x71, 0x0b, 0x43, 0x11, 0xc5, 0xdc, 0x9a, + 0x0b, 0x42, 0xf0, 0x11, 0xc3, 0x29, 0x62, 0x0a, 0xc3, 0x29, 0x70, 0xc3, + 0x40, 0xe6, 0x0b, 0x41, 0x19, 0xc2, 0x5d, 0xa1, 0x0b, 0x40, 0xa9, 0xc6, + 0xce, 0x69, 0x0b, 0x40, 0x88, 0x42, 0x2c, 0x43, 0xc3, 0x29, 0x7e, 0x17, + 0xc3, 0x29, 0x8a, 0xc8, 0xb7, 0xb2, 0x0b, 0x40, 0x30, 0xc3, 0xe5, 0x6c, + 0x0b, 0x41, 0xd9, 0x03, 0xc3, 0x29, 0x96, 0xc3, 0x8f, 0x91, 0x0b, 0x41, + 0xa9, 0x07, 0x43, 0x29, 0xa0, 0x03, 0xc3, 0x29, 0xaa, 0x42, 0x01, 0x5d, + 0xc3, 0x29, 0xba, 0x11, 0xc3, 0x29, 0xc4, 0xcb, 0x92, 0xac, 0x0b, 0x41, + 0x29, 0xc5, 0xd1, 0x93, 0x0b, 0x41, 0x21, 0xc9, 0xb5, 0x18, 0x0b, 0x40, + 0x80, 0x03, 0xc3, 0x29, 0xd0, 0xc2, 0x00, 0xc4, 0x0b, 0x42, 0xa1, 0x42, + 0x01, 0xe2, 0xc3, 0x29, 0xda, 0x1b, 0xc3, 0x29, 0xe4, 0xc3, 0xe4, 0x60, + 0x0b, 0x42, 0x39, 0x09, 0xc3, 0x29, 0xf1, 0x0d, 0xc3, 0x2a, 0x03, 0x16, + 0xc3, 0x2a, 0x0f, 0x42, 0x0e, 0x9a, 0xc3, 0x2a, 0x1e, 0xc3, 0x3d, 0xb5, + 0x0b, 0x41, 0x61, 0x1c, 0x43, 0x2a, 0x2a, 0x97, 0x0b, 0x42, 0x9b, 0x03, + 0x2a, 0x36, 0xc5, 0x8e, 0x46, 0x0b, 0x41, 0xc1, 0xc6, 0xd0, 0xa9, 0x0b, + 0x40, 0xc1, 0xc4, 0xe1, 0x8f, 0x0b, 0x40, 0xb8, 0x03, 0xc3, 0x2a, 0x3c, + 0xc2, 0x02, 0xae, 0x0b, 0x41, 0x69, 0xc2, 0x00, 0x3d, 0x0b, 0x41, 0x51, + 0x43, 0x01, 0x55, 0x43, 0x2a, 0x52, 0xc6, 0xcc, 0xe9, 0x0b, 0x42, 0x21, + 0xc8, 0xbb, 0xaa, 0x0b, 0x41, 0x00, 0x45, 0xcf, 0x0c, 0xc3, 0x2a, 0x5e, + 0xc8, 0xbe, 0x62, 0x0b, 0x40, 0x08, 0xc2, 0x0d, 0xf6, 0x00, 0xde, 0xd1, + 0xc2, 0x00, 0xc1, 0x00, 0xde, 0x51, 0xc2, 0x00, 0xd0, 0x00, 0xde, 0x20, + 0xcf, 0x67, 0x92, 0x00, 0x4f, 0x81, 0xce, 0x6e, 0x74, 0x00, 0x4f, 0x88, + 0x94, 0x00, 0x4f, 0x00, 0x8e, 0x00, 0x4f, 0x08, 0xa0, 0x01, 0x40, 0x3b, + 0x03, 0x2a, 0x6a, 0xa1, 0x01, 0x40, 0x5b, 0x03, 0x2a, 0x8a, 0xa2, 0x01, + 0x40, 0x9b, 0x03, 0x2a, 0xa3, 0xa3, 0x01, 0x41, 0x1b, 0x03, 0x2a, 0xb5, + 0xa5, 0x01, 0x44, 0x19, 0xa4, 0x01, 0x42, 0x1a, 0x03, 0x2a, 0xc0, 0xa1, + 0x01, 0x40, 0x6b, 0x03, 0x2a, 0xc4, 0xa2, 0x01, 0x40, 0xab, 0x03, 0x2a, + 0xdd, 0xa3, 0x01, 0x41, 0x2b, 0x03, 0x2a, 0xef, 0xa5, 0x01, 0x44, 0x29, + 0xa4, 0x01, 0x42, 0x2a, 0x03, 0x2a, 0xfa, 0xa2, 0x01, 0x40, 0xcb, 0x03, + 0x2a, 0xfe, 0xa3, 0x01, 0x41, 0x4b, 0x03, 0x2b, 0x10, 0xa5, 0x01, 0x44, + 0x49, 0xa4, 0x01, 0x42, 0x4a, 0x03, 0x2b, 0x1b, 0xa3, 0x01, 0x41, 0x8b, + 0x03, 0x2b, 0x1f, 0xa5, 0x01, 0x44, 0x89, 0xa4, 0x01, 0x42, 0x8a, 0x03, + 0x2b, 0x2a, 0xa5, 0x01, 0x45, 0x09, 0xa4, 0x01, 0x43, 0x0a, 0x03, 0x2b, + 0x2e, 0xa5, 0x01, 0x46, 0x08, 0xa1, 0x01, 0x40, 0x73, 0x03, 0x2b, 0x32, + 0xa2, 0x01, 0x40, 0xb3, 0x03, 0x2b, 0x4b, 0xa3, 0x01, 0x41, 0x33, 0x03, + 0x2b, 0x5d, 0xa5, 0x01, 0x44, 0x31, 0xa4, 0x01, 0x42, 0x32, 0x03, 0x2b, + 0x68, 0xa2, 0x01, 0x40, 0xd3, 0x03, 0x2b, 0x6c, 0xa3, 0x01, 0x41, 0x53, + 0x03, 0x2b, 0x7e, 0xa5, 0x01, 0x44, 0x51, 0xa4, 0x01, 0x42, 0x52, 0x03, + 0x2b, 0x89, 0xa3, 0x01, 0x41, 0x93, 0x03, 0x2b, 0x8d, 0xa5, 0x01, 0x44, + 0x91, 0xa4, 0x01, 0x42, 0x92, 0x03, 0x2b, 0x98, 0xa5, 0x01, 0x45, 0x11, + 0xa4, 0x01, 0x43, 0x12, 0x03, 0x2b, 0x9c, 0xa5, 0x01, 0x46, 0x10, 0xa2, + 0x01, 0x40, 0xe3, 0x03, 0x2b, 0xa0, 0xa3, 0x01, 0x41, 0x63, 0x03, 0x2b, + 0xb2, 0xa5, 0x01, 0x44, 0x61, 0xa4, 0x01, 0x42, 0x62, 0x03, 0x2b, 0xbd, + 0xa3, 0x01, 0x41, 0xa3, 0x03, 0x2b, 0xc1, 0xa5, 0x01, 0x44, 0xa1, 0xa4, + 0x01, 0x42, 0xa2, 0x03, 0x2b, 0xcc, 0xa5, 0x01, 0x45, 0x21, 0xa4, 0x01, + 0x43, 0x22, 0x03, 0x2b, 0xd0, 0xa5, 0x01, 0x46, 0x20, 0xa3, 0x01, 0x41, + 0xc3, 0x03, 0x2b, 0xd4, 0xa5, 0x01, 0x44, 0xc1, 0xa4, 0x01, 0x42, 0xc2, + 0x03, 0x2b, 0xdf, 0xa5, 0x01, 0x45, 0x41, 0xa4, 0x01, 0x43, 0x42, 0x03, + 0x2b, 0xe3, 0xa5, 0x01, 0x46, 0x40, 0xa5, 0x01, 0x45, 0x81, 0xa4, 0x01, + 0x43, 0x82, 0x03, 0x2b, 0xe7, 0xa5, 0x01, 0x46, 0x80, 0xa5, 0x01, 0x47, + 0x00, 0x83, 0x08, 0x83, 0xa9, 0xc2, 0x00, 0xdb, 0x08, 0x81, 0xa8, 0x91, + 0x08, 0x83, 0x91, 0x87, 0x08, 0x83, 0x88, 0x8e, 0x08, 0x80, 0x70, 0x94, + 0x08, 0x80, 0x60, 0x91, 0x08, 0x83, 0xa1, 0x87, 0x08, 0x83, 0x98, 0x8e, + 0x08, 0x82, 0x08, 0x94, 0x08, 0x81, 0xf8, 0xc4, 0x99, 0xff, 0x0e, 0x87, + 0xa9, 0xc3, 0x2e, 0xd7, 0x0e, 0x84, 0x78, 0xc5, 0xa9, 0xe5, 0x0e, 0x84, + 0x89, 0xc8, 0xb2, 0xd8, 0x0e, 0x84, 0x80, 0xc4, 0x99, 0xff, 0x0e, 0x87, + 0x91, 0xc4, 0xe4, 0xa7, 0x0e, 0x87, 0x81, 0xc3, 0x2e, 0xd7, 0x0e, 0x82, + 0x70, 0xc3, 0x63, 0x2b, 0x0e, 0x84, 0x19, 0x03, 0x43, 0x2b, 0xeb, 0xd0, + 0x32, 0xc5, 0x0e, 0x85, 0x69, 0xcd, 0x77, 0x2c, 0x0e, 0x82, 0x90, 0x00, + 0x43, 0x2b, 0xf7, 0xc9, 0xb0, 0x35, 0x0e, 0x87, 0x29, 0xc7, 0xc5, 0x83, + 0x0e, 0x87, 0x20, 0xc9, 0xb0, 0x35, 0x0e, 0x87, 0x09, 0xc7, 0xc5, 0x83, + 0x0e, 0x87, 0x00, 0xc5, 0xa9, 0xe5, 0x0e, 0x84, 0xa9, 0x49, 0xb2, 0xd8, + 0x43, 0x2c, 0x03, 0xc5, 0xd9, 0x3e, 0x0e, 0x86, 0xd9, 0xc4, 0x80, 0xbc, + 0x0e, 0x86, 0xd0, 0xd5, 0x35, 0xb4, 0x0e, 0x86, 0x99, 0xc8, 0x2e, 0x8e, + 0x0e, 0x86, 0x70, 0xc3, 0x2e, 0xd7, 0x0e, 0x86, 0x11, 0xc4, 0x99, 0xff, + 0x0e, 0x86, 0x08, 0xc3, 0x15, 0x30, 0x0e, 0x82, 0x19, 0xc7, 0x9c, 0xe1, + 0x0e, 0x81, 0xb0, 0xc2, 0x6d, 0x08, 0x0e, 0x83, 0xb9, 0xc2, 0x00, 0xfb, + 0x0e, 0x83, 0xb0, 0xc3, 0x63, 0x2b, 0x0e, 0x82, 0xf1, 0xc8, 0x9c, 0xe0, + 0x0e, 0x81, 0xf0, 0xc6, 0x04, 0xe1, 0x0f, 0xd9, 0xe1, 0xc5, 0x00, 0x2c, + 0x0f, 0xd9, 0xe8, 0x55, 0x0a, 0x4c, 0xc3, 0x2c, 0x0f, 0x48, 0x0a, 0x53, + 0xc3, 0x2c, 0x21, 0x4a, 0x13, 0xe3, 0x43, 0x2c, 0x2d, 0xc6, 0x04, 0xe1, + 0x0f, 0xda, 0x19, 0xc5, 0x00, 0x2c, 0x0f, 0xda, 0x21, 0xcc, 0x04, 0xcb, + 0x0f, 0xda, 0x30, 0x46, 0x02, 0xae, 0xc3, 0x2c, 0x39, 0xd2, 0x4c, 0x37, + 0x0f, 0xda, 0x40, 0xd2, 0x4c, 0x37, 0x0f, 0xda, 0x39, 0x46, 0x02, 0xae, + 0x43, 0x2c, 0x45, 0xc7, 0x80, 0x70, 0x01, 0x53, 0x11, 0xc8, 0x52, 0x09, + 0x01, 0x53, 0x18, 0x16, 0xc3, 0x2c, 0x51, 0xd0, 0x57, 0xa2, 0x01, 0x3e, + 0xd0, 0x49, 0x09, 0xb3, 0xc3, 0x2c, 0x5d, 0xd0, 0x06, 0xd7, 0x0f, 0xdb, + 0xe0, 0x49, 0x09, 0xb3, 0xc3, 0x2c, 0x63, 0xd0, 0x06, 0xd7, 0x0f, 0xdb, + 0xe8, 0xc9, 0x33, 0xad, 0x01, 0x4c, 0x88, 0x16, 0xc3, 0x2c, 0x69, 0xc9, + 0x3b, 0x79, 0x0f, 0xc8, 0x19, 0xc3, 0x02, 0xa3, 0x0f, 0xc8, 0x30, 0xc6, + 0x02, 0xd1, 0x01, 0x2e, 0xb1, 0xc4, 0x0e, 0x6a, 0x01, 0x5f, 0x40, 0x45, + 0x00, 0x8c, 0xc3, 0x2c, 0x75, 0xd4, 0x3b, 0x4c, 0x01, 0x4a, 0x40, 0xc6, + 0x01, 0x73, 0x01, 0x0e, 0x71, 0xcf, 0x2c, 0x35, 0x01, 0x48, 0x20, 0xc5, + 0x78, 0x04, 0x01, 0x02, 0x29, 0x48, 0xbc, 0xfa, 0xc3, 0x2c, 0x87, 0xc8, + 0x52, 0x09, 0x01, 0x4c, 0x59, 0xc6, 0x01, 0x73, 0x01, 0x72, 0xa9, 0xcd, + 0x75, 0xa6, 0x01, 0x72, 0xb8, 0xc5, 0x01, 0xa2, 0x01, 0x5b, 0x03, 0x03, + 0x2c, 0x93, 0xcc, 0x82, 0xb9, 0x01, 0x5b, 0x51, 0xcd, 0x7c, 0xa8, 0x01, + 0x5c, 0x20, 0x45, 0x00, 0x8c, 0xc3, 0x2c, 0x97, 0xc8, 0xae, 0xbc, 0x01, + 0x59, 0xb0, 0x45, 0x03, 0x14, 0xc3, 0x2c, 0xa7, 0xc5, 0x01, 0x74, 0x01, + 0x0c, 0xd0, 0xd4, 0x2d, 0x64, 0x01, 0x0f, 0xd1, 0xc9, 0xb3, 0xf8, 0x01, + 0x59, 0xc0, 0xc3, 0x7e, 0x79, 0x01, 0x0d, 0x59, 0xd7, 0x22, 0x5c, 0x0f, + 0xc0, 0x40, 0xc3, 0x14, 0xa7, 0x01, 0x0d, 0x13, 0x03, 0x2c, 0xb3, 0x43, + 0x00, 0x7e, 0x43, 0x2c, 0xb9, 0xc2, 0x00, 0xb1, 0x01, 0x0f, 0x23, 0x03, + 0x2c, 0xc5, 0xcc, 0x56, 0x78, 0x01, 0x48, 0xe8, 0xc6, 0x0e, 0xa4, 0x01, + 0x4b, 0xd1, 0xc9, 0x00, 0xca, 0x01, 0x4b, 0xb9, 0x9a, 0x01, 0x59, 0xf0, + 0xce, 0x33, 0x92, 0x01, 0x4b, 0x99, 0xd6, 0x2f, 0x5c, 0x01, 0x4a, 0x19, + 0x48, 0x61, 0xd4, 0xc3, 0x2c, 0xcb, 0xcf, 0x6a, 0x8f, 0x01, 0x5a, 0x50, + 0xe0, 0x06, 0xc7, 0x0f, 0xdd, 0xa8, 0x45, 0x00, 0x8c, 0xc3, 0x2c, 0xd7, + 0xc8, 0xae, 0xbc, 0x01, 0x48, 0x30, 0x44, 0x03, 0xc8, 0xc3, 0x2c, 0xe3, + 0x42, 0x02, 0xae, 0x43, 0x2c, 0xed, 0xc6, 0x00, 0x2b, 0x01, 0x54, 0x18, + 0xc3, 0xe5, 0xea, 0x08, 0x3a, 0x71, 0xc3, 0x52, 0x99, 0x08, 0x3a, 0x69, + 0xc3, 0xdf, 0xaf, 0x08, 0x3a, 0x79, 0xc7, 0xc0, 0xc8, 0x08, 0x3a, 0x81, + 0xc5, 0xd6, 0x5f, 0x08, 0x3a, 0x89, 0xc4, 0xe2, 0x8b, 0x08, 0x3a, 0x91, + 0xc4, 0xe1, 0xd3, 0x08, 0x3a, 0x98, 0x26, 0xc3, 0x2c, 0xf7, 0xc3, 0xb6, + 0x4a, 0x08, 0x3a, 0x39, 0xc3, 0xd8, 0x0d, 0x08, 0x3a, 0x31, 0xc3, 0xd3, + 0xaf, 0x08, 0x3a, 0x29, 0xc3, 0xe2, 0x7b, 0x08, 0x3a, 0x21, 0xc3, 0xe6, + 0x32, 0x08, 0x3a, 0x19, 0xc3, 0xe6, 0x65, 0x08, 0x3a, 0x11, 0xc3, 0xe1, + 0x37, 0x08, 0x3a, 0x09, 0xc3, 0xc7, 0x9e, 0x08, 0x3a, 0x00, 0x9e, 0x08, + 0x39, 0x99, 0x9f, 0x08, 0x39, 0xa1, 0xa0, 0x08, 0x39, 0xa9, 0xa1, 0x08, + 0x39, 0xb1, 0x9d, 0x08, 0x39, 0x90, 0x9d, 0x08, 0x38, 0x19, 0x9e, 0x08, + 0x38, 0x21, 0x9f, 0x08, 0x38, 0x29, 0xa0, 0x08, 0x38, 0x31, 0xa1, 0x08, + 0x38, 0x39, 0xa3, 0x08, 0x38, 0x41, 0xa5, 0x08, 0x38, 0x49, 0xa6, 0x08, + 0x38, 0x50, 0x9d, 0x08, 0x38, 0x59, 0x9e, 0x08, 0x38, 0x61, 0x9f, 0x08, + 0x38, 0x69, 0xa0, 0x08, 0x38, 0x71, 0xa1, 0x08, 0x38, 0x79, 0xa2, 0x08, + 0x38, 0x81, 0xa3, 0x08, 0x38, 0x89, 0xa4, 0x08, 0x38, 0x91, 0xa5, 0x08, + 0x38, 0x99, 0xa6, 0x08, 0x38, 0xa0, 0x9d, 0x08, 0x38, 0xa9, 0x9e, 0x08, + 0x38, 0xb1, 0x9f, 0x08, 0x38, 0xb9, 0xa0, 0x08, 0x38, 0xc1, 0xa1, 0x08, + 0x38, 0xc9, 0xa3, 0x08, 0x38, 0xd1, 0xa4, 0x08, 0x38, 0xd9, 0xa5, 0x08, + 0x38, 0xe1, 0xa6, 0x08, 0x38, 0xe8, 0xa1, 0x08, 0x38, 0xf1, 0xa4, 0x08, + 0x38, 0xf9, 0xa5, 0x08, 0x39, 0x00, 0x9d, 0x08, 0x39, 0x09, 0x9f, 0x08, + 0x39, 0x11, 0xa0, 0x08, 0x39, 0x19, 0xa1, 0x08, 0x39, 0x21, 0xa2, 0x08, + 0x39, 0x29, 0xa3, 0x08, 0x39, 0x31, 0xa5, 0x08, 0x39, 0x39, 0xa6, 0x08, + 0x39, 0x40, 0xa0, 0x08, 0x39, 0x59, 0xa1, 0x08, 0x39, 0x61, 0xa2, 0x08, + 0x39, 0x69, 0xa3, 0x08, 0x39, 0x71, 0xa4, 0x08, 0x39, 0x79, 0xa5, 0x08, + 0x39, 0x81, 0x9e, 0x08, 0x39, 0x49, 0x9f, 0x08, 0x39, 0x51, 0xa6, 0x08, + 0x39, 0x88, 0x1d, 0xc3, 0x2d, 0x01, 0x1e, 0xc3, 0x2d, 0x25, 0x1f, 0xc3, + 0x2d, 0x39, 0x20, 0xc3, 0x2d, 0x66, 0x21, 0xc3, 0x2d, 0x7e, 0x22, 0xc3, + 0x2d, 0x9e, 0x23, 0xc3, 0x2d, 0xc2, 0x24, 0xc3, 0x2d, 0xda, 0x25, 0x43, + 0x2d, 0xf6, 0xc2, 0x8c, 0x53, 0x08, 0x32, 0x41, 0x1f, 0xc3, 0x2e, 0x0e, + 0x42, 0xd5, 0xf8, 0xc3, 0x2e, 0x1a, 0xc2, 0xe6, 0x8a, 0x08, 0x32, 0x81, + 0xc2, 0xe6, 0x7f, 0x08, 0x32, 0x89, 0x25, 0xc3, 0x2e, 0x22, 0xc2, 0xe6, + 0x86, 0x08, 0x32, 0xa0, 0x9e, 0x08, 0x32, 0xa9, 0x9f, 0x08, 0x32, 0xb1, + 0xa0, 0x08, 0x32, 0xb9, 0xa1, 0x08, 0x32, 0xc1, 0xa2, 0x08, 0x32, 0xc9, + 0xa3, 0x08, 0x32, 0xd1, 0xa4, 0x08, 0x32, 0xd9, 0xa5, 0x08, 0x32, 0xe1, + 0x26, 0x43, 0x2e, 0x2a, 0x9d, 0x08, 0x33, 0x01, 0x9e, 0x08, 0x33, 0x09, + 0x9f, 0x08, 0x33, 0x11, 0x20, 0xc3, 0x2e, 0x36, 0xa1, 0x08, 0x33, 0x31, + 0xa2, 0x08, 0x33, 0x39, 0xa3, 0x08, 0x33, 0x41, 0xa4, 0x08, 0x33, 0x49, + 0xa5, 0x08, 0x33, 0x51, 0xa6, 0x08, 0x33, 0x58, 0x9d, 0x08, 0x33, 0x61, + 0x9e, 0x08, 0x33, 0x69, 0x9f, 0x08, 0x33, 0x71, 0xa0, 0x08, 0x33, 0x79, + 0xa1, 0x08, 0x33, 0x81, 0xa2, 0x08, 0x33, 0x89, 0xa3, 0x08, 0x33, 0x91, + 0xa4, 0x08, 0x33, 0x99, 0xa5, 0x08, 0x33, 0xa1, 0xa6, 0x08, 0x33, 0xa8, + 0x9d, 0x08, 0x33, 0xb1, 0x9e, 0x08, 0x33, 0xb9, 0x9f, 0x08, 0x33, 0xc1, + 0xa0, 0x08, 0x33, 0xc9, 0xa1, 0x08, 0x33, 0xd1, 0xa2, 0x08, 0x33, 0xd9, + 0xa3, 0x08, 0x33, 0xe1, 0xa4, 0x08, 0x33, 0xe9, 0xa5, 0x08, 0x33, 0xf1, + 0xa6, 0x08, 0x33, 0xf8, 0x9d, 0x08, 0x34, 0x01, 0x9e, 0x08, 0x34, 0x09, + 0x9f, 0x08, 0x34, 0x11, 0xa0, 0x08, 0x34, 0x19, 0xa1, 0x08, 0x34, 0x21, + 0xa2, 0x08, 0x34, 0x29, 0xa3, 0x08, 0x34, 0x31, 0xa4, 0x08, 0x34, 0x39, + 0xa5, 0x08, 0x34, 0x41, 0xa6, 0x08, 0x34, 0x48, 0x9d, 0x08, 0x34, 0x51, + 0x9e, 0x08, 0x34, 0x59, 0x9f, 0x08, 0x34, 0x61, 0xa0, 0x08, 0x34, 0x69, + 0xa3, 0x08, 0x34, 0x81, 0xa4, 0x08, 0x34, 0x89, 0xa5, 0x08, 0x34, 0x91, + 0xa6, 0x08, 0x34, 0x99, 0xa1, 0x08, 0x34, 0x71, 0xa2, 0x08, 0x34, 0x78, + 0x9d, 0x08, 0x34, 0xa1, 0x9e, 0x08, 0x34, 0xa9, 0x9f, 0x08, 0x34, 0xb1, + 0xa0, 0x08, 0x34, 0xb9, 0xa1, 0x08, 0x34, 0xc1, 0xa2, 0x08, 0x34, 0xc9, + 0xa3, 0x08, 0x34, 0xd1, 0xa4, 0x08, 0x34, 0xd9, 0xa5, 0x08, 0x34, 0xe1, + 0xa6, 0x08, 0x34, 0xe8, 0x9d, 0x08, 0x34, 0xf1, 0x9e, 0x08, 0x34, 0xf8, + 0xc5, 0xdc, 0xb8, 0x08, 0x35, 0x01, 0xc5, 0xd5, 0x15, 0x08, 0x35, 0x09, + 0xc5, 0xd4, 0x1b, 0x08, 0x35, 0x11, 0xc5, 0xd8, 0x58, 0x08, 0x35, 0x19, + 0xc5, 0xd6, 0xd2, 0x08, 0x35, 0x21, 0xc5, 0xd6, 0xeb, 0x08, 0x35, 0x29, + 0xc5, 0xd7, 0x77, 0x08, 0x35, 0x31, 0xc5, 0xd5, 0x74, 0x08, 0x35, 0x39, + 0xc5, 0xdd, 0x9e, 0x08, 0x35, 0x41, 0xc5, 0xd9, 0xbb, 0x08, 0x35, 0x48, + 0xc5, 0xdc, 0xb8, 0x08, 0x35, 0x51, 0xc5, 0xd5, 0x15, 0x08, 0x35, 0x59, + 0xc5, 0xd4, 0x1b, 0x08, 0x35, 0x61, 0xc5, 0xd8, 0x58, 0x08, 0x35, 0x69, + 0xc5, 0xd6, 0xd2, 0x08, 0x35, 0x71, 0xc5, 0xd6, 0xeb, 0x08, 0x35, 0x79, + 0xc5, 0xd7, 0x77, 0x08, 0x35, 0x81, 0xc5, 0xd5, 0x74, 0x08, 0x35, 0x89, + 0xc5, 0xdd, 0x9e, 0x08, 0x35, 0x90, 0x9e, 0x08, 0x35, 0x99, 0x9f, 0x08, + 0x35, 0xa1, 0xa0, 0x08, 0x35, 0xa9, 0xa1, 0x08, 0x35, 0xb1, 0xa2, 0x08, + 0x35, 0xb9, 0xa3, 0x08, 0x35, 0xc1, 0xa5, 0x08, 0x35, 0xc9, 0xa6, 0x08, + 0x35, 0xd0, 0x9d, 0x08, 0x35, 0xd9, 0x9e, 0x08, 0x35, 0xe1, 0x9f, 0x08, + 0x35, 0xe9, 0xa0, 0x08, 0x35, 0xf1, 0xa2, 0x08, 0x35, 0xf9, 0xa3, 0x08, + 0x36, 0x00, 0x9d, 0x08, 0x36, 0x09, 0x9e, 0x08, 0x36, 0x11, 0xa0, 0x08, + 0x36, 0x19, 0xa1, 0x08, 0x36, 0x21, 0xa2, 0x08, 0x36, 0x29, 0xa3, 0x08, + 0x36, 0x31, 0xa4, 0x08, 0x36, 0x39, 0xa5, 0x08, 0x36, 0x41, 0xa6, 0x08, + 0x36, 0x48, 0x9d, 0x08, 0x36, 0x51, 0x9e, 0x08, 0x36, 0x59, 0x9f, 0x08, + 0x36, 0x61, 0xa1, 0x08, 0x36, 0x69, 0xa2, 0x08, 0x36, 0x71, 0xa3, 0x08, + 0x36, 0x79, 0xa4, 0x08, 0x36, 0x81, 0xa5, 0x08, 0x36, 0x89, 0xa6, 0x08, + 0x36, 0x90, 0x9d, 0x08, 0x36, 0x99, 0x9e, 0x08, 0x36, 0xa1, 0x9f, 0x08, + 0x36, 0xa9, 0xa2, 0x08, 0x36, 0xb1, 0xa4, 0x08, 0x36, 0xb9, 0xa5, 0x08, + 0x36, 0xc1, 0xa6, 0x08, 0x36, 0xc8, 0x9d, 0x08, 0x36, 0xd1, 0x9e, 0x08, + 0x36, 0xd9, 0x9f, 0x08, 0x36, 0xe1, 0xa0, 0x08, 0x36, 0xe9, 0xa1, 0x08, + 0x36, 0xf1, 0xa2, 0x08, 0x36, 0xf9, 0xa3, 0x08, 0x37, 0x01, 0xa4, 0x08, + 0x37, 0x09, 0xa6, 0x08, 0x37, 0x10, 0xa0, 0x08, 0x37, 0x19, 0xa1, 0x08, + 0x37, 0x21, 0xa2, 0x08, 0x37, 0x29, 0xa3, 0x08, 0x37, 0x31, 0xa5, 0x08, + 0x37, 0x39, 0xa6, 0x08, 0x37, 0x40, 0x9d, 0x08, 0x37, 0x49, 0x9e, 0x08, + 0x37, 0x51, 0x9f, 0x08, 0x37, 0x59, 0xa0, 0x08, 0x37, 0x61, 0xa1, 0x08, + 0x37, 0x69, 0xa2, 0x08, 0x37, 0x71, 0xa3, 0x08, 0x37, 0x79, 0xa4, 0x08, + 0x37, 0x81, 0xa5, 0x08, 0x37, 0x89, 0xa6, 0x08, 0x37, 0x90, 0x9d, 0x08, + 0x37, 0x99, 0x9e, 0x08, 0x37, 0xa1, 0x9f, 0x08, 0x37, 0xa9, 0xa0, 0x08, + 0x37, 0xb1, 0xa1, 0x08, 0x37, 0xb9, 0xa2, 0x08, 0x37, 0xc1, 0xa3, 0x08, + 0x37, 0xc9, 0xa4, 0x08, 0x37, 0xd1, 0xa5, 0x08, 0x37, 0xd9, 0xa6, 0x08, + 0x37, 0xe0, 0x9e, 0x08, 0x37, 0xe9, 0x9f, 0x08, 0x37, 0xf1, 0xa1, 0x08, + 0x37, 0xf9, 0xa2, 0x08, 0x38, 0x01, 0xa3, 0x08, 0x38, 0x09, 0xa5, 0x08, + 0x38, 0x10, 0x1d, 0xc3, 0x2e, 0x42, 0x1e, 0xc3, 0x2e, 0x78, 0x22, 0xc3, + 0x2e, 0xa8, 0x21, 0xc3, 0x2e, 0xde, 0x23, 0xc3, 0x2f, 0x0e, 0x25, 0xc3, + 0x2f, 0x3e, 0x24, 0xc3, 0x2f, 0x56, 0x1f, 0xc3, 0x2f, 0x8c, 0x20, 0xc3, + 0x2f, 0xc2, 0x26, 0x43, 0x2f, 0xf2, 0x1e, 0xc3, 0x2f, 0xfe, 0xc2, 0xe1, + 0x2e, 0x08, 0x02, 0x91, 0xc2, 0x00, 0x20, 0x08, 0x02, 0x99, 0x21, 0xc3, + 0x30, 0x06, 0xc2, 0x00, 0x22, 0x08, 0x02, 0xb1, 0x23, 0xc3, 0x30, 0x0e, + 0xc2, 0x3c, 0xc8, 0x08, 0x02, 0xc9, 0x25, 0x43, 0x30, 0x16, 0x1e, 0xc3, + 0x30, 0x26, 0x1f, 0x43, 0x30, 0x4a, 0xc3, 0xe5, 0xba, 0x08, 0x06, 0xf1, + 0x1f, 0xc3, 0x30, 0x5a, 0xc3, 0xe6, 0x4a, 0x08, 0x07, 0xd0, 0x1f, 0xc3, + 0x30, 0x6c, 0x20, 0xc3, 0x30, 0x78, 0xc8, 0xbe, 0x92, 0x08, 0x05, 0x20, + 0x46, 0x00, 0x8b, 0xc3, 0x30, 0x84, 0x05, 0xc3, 0x30, 0xb3, 0x0b, 0xc3, + 0x30, 0xc2, 0x03, 0xc3, 0x30, 0xce, 0xc8, 0xbf, 0x12, 0x05, 0x5a, 0x29, + 0xd1, 0x52, 0x66, 0x00, 0x14, 0x29, 0xc6, 0xa2, 0xbb, 0x00, 0x06, 0xf8, + 0x46, 0x00, 0x8b, 0xc3, 0x30, 0xda, 0xc2, 0x00, 0x0a, 0x05, 0x5a, 0x9b, + 0x03, 0x31, 0x08, 0x46, 0x17, 0x8d, 0xc3, 0x31, 0x0e, 0xc8, 0xba, 0x4a, + 0x05, 0x39, 0x6b, 0x03, 0x31, 0x1e, 0xc2, 0x00, 0x45, 0x05, 0x3b, 0x78, + 0xcb, 0x8d, 0x37, 0x00, 0x15, 0x3b, 0x03, 0x31, 0x24, 0x17, 0xc3, 0x31, + 0x2a, 0x46, 0x00, 0x8b, 0xc3, 0x31, 0x34, 0x0a, 0xc3, 0x31, 0x63, 0x11, + 0xc3, 0x31, 0x72, 0xc9, 0xab, 0x40, 0x00, 0x15, 0x33, 0x03, 0x31, 0x7e, + 0xd3, 0x45, 0x14, 0x00, 0x15, 0x41, 0x9c, 0x05, 0x39, 0x49, 0xc7, 0xc3, + 0xa0, 0x05, 0x39, 0x59, 0xcb, 0x98, 0x8f, 0x01, 0x63, 0xb8, 0x46, 0x00, + 0x8b, 0xc3, 0x31, 0x84, 0x44, 0x05, 0x76, 0xc3, 0x31, 0xda, 0x91, 0x05, + 0x3a, 0x79, 0xc4, 0x6d, 0xb5, 0x05, 0x3d, 0xb9, 0xcb, 0x8e, 0xc3, 0x05, + 0x3e, 0x09, 0x8b, 0x00, 0x0d, 0x19, 0x97, 0x00, 0x11, 0x18, 0x46, 0x00, + 0x8b, 0xc3, 0x31, 0xe8, 0x42, 0x01, 0xbb, 0xc3, 0x32, 0x32, 0x10, 0xc3, + 0x32, 0x3f, 0x95, 0x05, 0x3b, 0x68, 0x07, 0xc3, 0x32, 0x4b, 0x46, 0x00, + 0x8b, 0xc3, 0x32, 0x5a, 0x9c, 0x00, 0x0f, 0x9b, 0x03, 0x32, 0x87, 0x11, + 0xc3, 0x32, 0x8b, 0xc2, 0x01, 0xdf, 0x05, 0x3b, 0x89, 0xc9, 0xb2, 0x09, + 0x00, 0x11, 0xc0, 0xc2, 0x25, 0xa1, 0x00, 0x14, 0x93, 0x03, 0x32, 0x97, + 0xc2, 0x00, 0x75, 0x00, 0x0a, 0x5b, 0x03, 0x32, 0x9b, 0xc2, 0x01, 0xe2, + 0x00, 0x14, 0x1b, 0x03, 0x32, 0xa1, 0x46, 0x00, 0x8b, 0xc3, 0x32, 0xa7, + 0x4e, 0x73, 0x36, 0xc3, 0x32, 0xfd, 0x96, 0x05, 0x3b, 0x5a, 0x03, 0x33, + 0x09, 0x00, 0xc3, 0x33, 0x0d, 0x48, 0x10, 0x2f, 0xc3, 0x33, 0x19, 0xc8, + 0xb7, 0xda, 0x00, 0x13, 0x21, 0xc2, 0x01, 0xdf, 0x05, 0x3b, 0xaa, 0x03, + 0x33, 0x46, 0x46, 0x00, 0x8b, 0xc3, 0x33, 0x4c, 0x07, 0xc3, 0x33, 0x93, + 0xc5, 0xb8, 0xe3, 0x00, 0x0b, 0xfb, 0x03, 0x33, 0xa2, 0xc9, 0xab, 0x40, + 0x00, 0x15, 0x51, 0xc9, 0xa8, 0x67, 0x00, 0x15, 0x59, 0xc2, 0x01, 0xdf, + 0x05, 0x3b, 0x91, 0xd1, 0x4f, 0x47, 0x00, 0x0c, 0xd9, 0x8c, 0x00, 0x0e, + 0x48, 0xcb, 0x92, 0x5f, 0x00, 0x15, 0x4b, 0x03, 0x33, 0xa8, 0x46, 0x00, + 0x8b, 0x43, 0x33, 0xae, 0x46, 0x00, 0x8b, 0xc3, 0x33, 0xcc, 0xc3, 0x3c, + 0x63, 0x00, 0x10, 0xe8, 0x45, 0x04, 0xcc, 0xc3, 0x34, 0x07, 0x46, 0x00, + 0x8b, 0xc3, 0x34, 0x13, 0xc2, 0x01, 0xdf, 0x05, 0x3b, 0x98, 0x00, 0xc3, + 0x34, 0x37, 0xc6, 0x10, 0x3f, 0x00, 0x14, 0x53, 0x03, 0x34, 0x46, 0x87, + 0x00, 0xeb, 0x59, 0x91, 0x05, 0x5b, 0x19, 0x8b, 0x05, 0x5a, 0x81, 0x8f, + 0x05, 0x3b, 0xc0, 0x00, 0xc3, 0x34, 0x4c, 0xc4, 0xde, 0x3f, 0x00, 0x12, + 0x8b, 0x03, 0x34, 0x58, 0x87, 0x00, 0x07, 0x33, 0x03, 0x34, 0x5e, 0x83, + 0x05, 0x39, 0x99, 0x91, 0x05, 0x39, 0xa9, 0x97, 0x05, 0x39, 0xb9, 0x98, + 0x05, 0x39, 0xcb, 0x03, 0x34, 0x64, 0x9b, 0x05, 0x39, 0xe9, 0xca, 0xa4, + 0x72, 0x05, 0x3e, 0x18, 0x46, 0x00, 0x8b, 0x43, 0x34, 0x6a, 0x46, 0x00, + 0x8b, 0xc3, 0x34, 0x8c, 0xc3, 0x0a, 0xe3, 0x05, 0x39, 0x3b, 0x03, 0x34, + 0xb2, 0x98, 0x00, 0x0c, 0xa9, 0xc5, 0xd3, 0x2c, 0x01, 0x63, 0xb0, 0x46, + 0x00, 0x8b, 0x43, 0x34, 0xb8, 0x46, 0x00, 0x8b, 0x43, 0x34, 0xe8, 0x46, + 0x00, 0x8b, 0xc3, 0x34, 0xf8, 0x9b, 0x05, 0x3b, 0x09, 0xcb, 0x91, 0x15, + 0x05, 0x3b, 0x19, 0xc3, 0x02, 0x39, 0x05, 0x3b, 0x49, 0x47, 0xc8, 0xcb, + 0x43, 0x35, 0x1a, 0x46, 0x00, 0x8b, 0xc3, 0x35, 0x2c, 0xc2, 0x00, 0x0a, + 0x00, 0x13, 0xc0, 0x00, 0xc3, 0x35, 0x54, 0xc2, 0x01, 0xdf, 0x05, 0x3b, + 0xa1, 0x8c, 0x00, 0x0e, 0x60, 0x46, 0x00, 0x8b, 0xc3, 0x35, 0x60, 0xc2, + 0x00, 0x39, 0x00, 0x09, 0xc0, 0x46, 0x00, 0x8b, 0xc3, 0x35, 0x8f, 0x47, + 0x23, 0x34, 0xc3, 0x35, 0xc3, 0xc4, 0x38, 0x2c, 0x00, 0x13, 0x19, 0xc2, + 0x00, 0xd0, 0x00, 0x0d, 0x18, 0x46, 0x00, 0x8b, 0xc3, 0x35, 0xd5, 0xcc, + 0x8b, 0x95, 0x00, 0xe8, 0xb9, 0x03, 0xc3, 0x36, 0x05, 0x4b, 0x8d, 0x58, + 0xc3, 0x36, 0x11, 0xc7, 0xc9, 0xb9, 0x05, 0x3a, 0x39, 0xc3, 0x04, 0x87, + 0x05, 0x3d, 0xa8, 0x46, 0x00, 0x8b, 0x43, 0x36, 0x1c, 0x46, 0x00, 0x8b, + 0xc3, 0x36, 0x26, 0xc9, 0xae, 0xf1, 0x00, 0x11, 0xc8, 0x88, 0x07, 0xd8, + 0x03, 0x03, 0x36, 0x3b, 0x8e, 0x07, 0xd8, 0x11, 0x8b, 0x07, 0xd8, 0x08, + 0x8d, 0x0e, 0xf8, 0x81, 0x89, 0x0e, 0xf8, 0x11, 0x94, 0x00, 0xe8, 0xd1, + 0x8f, 0x05, 0x3f, 0xd1, 0x87, 0x01, 0x63, 0xd8, 0xc4, 0xa8, 0x1a, 0x0e, + 0xf8, 0x21, 0xc6, 0x01, 0x73, 0x00, 0xe8, 0x60, 0x94, 0x00, 0xe8, 0xc9, + 0x90, 0x00, 0xe8, 0x70, 0xc4, 0xb0, 0x8b, 0x00, 0xf7, 0xf1, 0xc5, 0x1e, + 0xc8, 0x00, 0xf7, 0xc1, 0xc4, 0x01, 0x23, 0x00, 0x0d, 0x9b, 0x03, 0x36, + 0x43, 0x06, 0xc3, 0x36, 0x49, 0xc5, 0x1f, 0x0c, 0x00, 0xf7, 0x91, 0xc5, + 0x31, 0xee, 0x00, 0x06, 0xe9, 0xca, 0x08, 0xf6, 0x00, 0x0b, 0xb1, 0xc6, + 0x60, 0xb1, 0x00, 0x11, 0x91, 0xc6, 0x01, 0x73, 0x00, 0x12, 0x70, 0x47, + 0xc0, 0x2e, 0xc3, 0x36, 0x55, 0xc8, 0xba, 0x02, 0x05, 0x3e, 0xb0, 0x44, + 0x05, 0x18, 0xc3, 0x36, 0x5f, 0xc5, 0x31, 0xee, 0x00, 0xf1, 0xf1, 0xc4, + 0x01, 0x23, 0x01, 0x63, 0x70, 0x45, 0x00, 0x8c, 0xc3, 0x36, 0x6b, 0xc3, + 0x01, 0x5d, 0x00, 0x12, 0x20, 0x42, 0x01, 0x23, 0xc3, 0x36, 0xb5, 0x05, + 0xc3, 0x36, 0xc4, 0x06, 0xc3, 0x36, 0xd3, 0x0f, 0xc3, 0x36, 0xe0, 0xc5, + 0x1e, 0xc8, 0x00, 0x06, 0xab, 0x03, 0x36, 0xef, 0xc6, 0x01, 0x73, 0x00, + 0x06, 0xc3, 0x03, 0x36, 0xf5, 0xc5, 0x1f, 0x0c, 0x00, 0x06, 0x91, 0xc5, + 0x31, 0xee, 0x00, 0x06, 0x99, 0x42, 0x01, 0xc8, 0xc3, 0x36, 0xfb, 0xc5, + 0x1d, 0x88, 0x00, 0x0a, 0x71, 0xc6, 0xcc, 0x8f, 0x00, 0x0f, 0x53, 0x03, + 0x37, 0x07, 0xce, 0x1d, 0x93, 0x00, 0x10, 0x70, 0x91, 0x00, 0x0c, 0x31, + 0x87, 0x00, 0x0c, 0x80, 0x06, 0xc3, 0x37, 0x0d, 0xca, 0x9e, 0x5a, 0x00, + 0xf6, 0x41, 0xc5, 0x1e, 0xc8, 0x00, 0x09, 0x43, 0x03, 0x37, 0x1a, 0xc5, + 0x1f, 0x0c, 0x00, 0x06, 0x61, 0xc5, 0x31, 0xee, 0x00, 0x06, 0x69, 0x05, + 0xc3, 0x37, 0x20, 0xc6, 0x60, 0xb1, 0x00, 0x09, 0x51, 0xc5, 0x1d, 0x88, + 0x00, 0x09, 0x61, 0xc6, 0xcc, 0x8f, 0x00, 0x09, 0x71, 0xc6, 0x01, 0x73, + 0x00, 0x0c, 0xb9, 0xce, 0x1d, 0x93, 0x00, 0x10, 0x50, 0x88, 0x05, 0x3b, + 0xd9, 0x89, 0x05, 0x3b, 0xe9, 0x94, 0x05, 0x3c, 0x11, 0x95, 0x05, 0x3c, + 0x21, 0x96, 0x05, 0x3c, 0x31, 0x86, 0x05, 0x3b, 0xc8, 0x05, 0xc3, 0x37, + 0x2c, 0xc5, 0x1e, 0xc8, 0x00, 0xf5, 0xe3, 0x03, 0x37, 0x44, 0xca, 0x9e, + 0x5a, 0x00, 0xf5, 0xd1, 0x06, 0xc3, 0x37, 0x4a, 0xc6, 0x60, 0xb1, 0x00, + 0x08, 0x93, 0x03, 0x37, 0x54, 0xc5, 0x1f, 0x0c, 0x00, 0x06, 0x41, 0xc5, + 0x31, 0xee, 0x00, 0x06, 0x49, 0xc5, 0x1d, 0x88, 0x00, 0x08, 0xa1, 0xc6, + 0xcc, 0x8f, 0x00, 0x08, 0xc1, 0xce, 0x1d, 0x93, 0x00, 0x10, 0x31, 0xc6, + 0x01, 0x73, 0x00, 0x12, 0x30, 0xc3, 0x00, 0x49, 0x05, 0x39, 0x11, 0xc2, + 0x00, 0x74, 0x05, 0x39, 0x20, 0x8a, 0x00, 0x06, 0x80, 0x00, 0x43, 0x37, + 0x5a, 0xc5, 0x1d, 0x88, 0x00, 0x08, 0x13, 0x03, 0x37, 0x66, 0x05, 0xc3, + 0x37, 0x6c, 0xca, 0x9e, 0x5a, 0x00, 0xf5, 0x11, 0x06, 0xc3, 0x37, 0x7b, + 0x45, 0x00, 0x9d, 0xc3, 0x37, 0x88, 0xce, 0x1d, 0x93, 0x00, 0x10, 0x11, + 0xc5, 0x1f, 0x0c, 0x00, 0x06, 0x01, 0xc5, 0x31, 0xee, 0x00, 0x06, 0x09, + 0xc5, 0x1e, 0xc8, 0x00, 0x06, 0x19, 0xc6, 0x60, 0xb1, 0x00, 0x08, 0x01, + 0xc6, 0xcc, 0x8f, 0x00, 0x08, 0x21, 0xc6, 0x01, 0x73, 0x00, 0x11, 0xd0, + 0x46, 0x00, 0x8b, 0x43, 0x37, 0x97, 0xd4, 0x3e, 0x6c, 0x05, 0x39, 0xd0, + 0x44, 0x05, 0x18, 0xc3, 0x37, 0xa3, 0x05, 0xc3, 0x37, 0xb2, 0xc5, 0x31, + 0xee, 0x00, 0x0a, 0xd3, 0x03, 0x37, 0xcd, 0xce, 0x38, 0xe6, 0x05, 0x3d, + 0x41, 0xc4, 0x01, 0x23, 0x05, 0x3e, 0x29, 0x15, 0x43, 0x37, 0xd3, 0xc6, + 0xbb, 0x8c, 0x05, 0x3d, 0x61, 0xc3, 0x74, 0x83, 0x00, 0x0c, 0x78, 0xd0, + 0x5f, 0x12, 0x00, 0x12, 0x51, 0xc9, 0xb1, 0xca, 0x05, 0x3d, 0x70, 0xca, + 0x64, 0x13, 0x00, 0xf4, 0xa1, 0x06, 0xc3, 0x37, 0xdf, 0x05, 0xc3, 0x37, + 0xeb, 0xcc, 0x51, 0x28, 0x05, 0x3e, 0x31, 0xc5, 0x31, 0xee, 0x00, 0x0b, + 0xc9, 0x15, 0xc3, 0x37, 0xf7, 0xc4, 0x01, 0x23, 0x00, 0x11, 0x20, 0xc8, + 0x20, 0xa9, 0x00, 0xf4, 0x61, 0xc8, 0x16, 0x15, 0x00, 0xf4, 0x50, 0x06, + 0xc3, 0x38, 0x03, 0xc5, 0x31, 0xee, 0x00, 0xf4, 0x11, 0xc5, 0x1f, 0x0c, + 0x00, 0xf4, 0x01, 0xc4, 0x01, 0x23, 0x01, 0x63, 0x91, 0xca, 0x08, 0xf6, + 0x00, 0x0b, 0xa0, 0x06, 0xc3, 0x38, 0x0f, 0xc5, 0x1e, 0xc8, 0x00, 0xf3, + 0xe1, 0xc4, 0x01, 0x23, 0x00, 0x0d, 0x90, 0xc2, 0x10, 0x11, 0x05, 0x3c, + 0xd1, 0xc2, 0x49, 0x0c, 0x05, 0x3c, 0xe1, 0xc2, 0x0f, 0xe1, 0x05, 0x3c, + 0xf0, 0x05, 0xc3, 0x38, 0x1b, 0xca, 0x64, 0x13, 0x00, 0xf3, 0x71, 0x06, + 0xc3, 0x38, 0x33, 0xc6, 0x01, 0x73, 0x00, 0x0b, 0x31, 0xc4, 0x01, 0x23, + 0x00, 0x0d, 0x61, 0xce, 0x01, 0x19, 0x00, 0x0d, 0x70, 0xcc, 0x23, 0x3f, + 0x05, 0x3b, 0x22, 0x03, 0x38, 0x3f, 0xc9, 0x67, 0x20, 0x05, 0x3b, 0xf1, + 0x8e, 0x05, 0x3c, 0x01, 0x8a, 0x05, 0x3c, 0x69, 0x8d, 0x05, 0x3d, 0x81, + 0x96, 0x05, 0x3d, 0x89, 0x8f, 0x00, 0x0c, 0xe1, 0x98, 0x00, 0x12, 0x29, + 0x83, 0x01, 0x63, 0x7a, 0x03, 0x38, 0x45, 0xc3, 0x22, 0xcb, 0x00, 0x0c, + 0x21, 0xc3, 0x02, 0x9f, 0x00, 0x0d, 0x39, 0xc4, 0x0d, 0x13, 0x00, 0x0d, + 0xe0, 0x45, 0x00, 0x8c, 0xc3, 0x38, 0x4b, 0xc7, 0xa6, 0x69, 0x05, 0x3a, + 0xd0, 0xca, 0x9a, 0xe0, 0x05, 0x39, 0xf1, 0xc6, 0x21, 0xa3, 0x05, 0x3d, + 0x59, 0x87, 0x00, 0x0c, 0x71, 0xc6, 0xd3, 0x2b, 0x05, 0x3f, 0xa8, 0xc9, + 0x16, 0x14, 0x00, 0xf2, 0xb1, 0xc5, 0x31, 0xee, 0x00, 0xf2, 0xa1, 0x15, + 0xc3, 0x38, 0x79, 0xc4, 0x01, 0x23, 0x00, 0x0d, 0x21, 0xc8, 0xbe, 0x9a, + 0x05, 0x3a, 0x90, 0x05, 0xc3, 0x38, 0x88, 0x0e, 0xc3, 0x38, 0x9a, 0x06, + 0xc3, 0x38, 0xac, 0xc5, 0x1f, 0x0c, 0x00, 0x0f, 0xc1, 0xc5, 0x1e, 0xc8, + 0x00, 0x06, 0x89, 0xc5, 0x31, 0xee, 0x00, 0x0a, 0x19, 0xce, 0x38, 0xe6, + 0x05, 0x3d, 0x21, 0xce, 0x6e, 0x04, 0x00, 0x0e, 0x58, 0x05, 0xc3, 0x38, + 0xb8, 0xca, 0x64, 0x13, 0x00, 0xf1, 0xd1, 0x42, 0x00, 0x58, 0xc3, 0x38, + 0xca, 0xcb, 0x8f, 0xb5, 0x05, 0x3a, 0x41, 0xc5, 0x31, 0xee, 0x00, 0x09, + 0xc9, 0x47, 0x04, 0xcb, 0xc3, 0x38, 0xd9, 0x15, 0xc3, 0x38, 0xe5, 0x04, + 0x43, 0x38, 0xf1, 0xca, 0x64, 0x13, 0x00, 0xf1, 0xa1, 0x06, 0xc3, 0x38, + 0xfd, 0xc5, 0x31, 0xee, 0x00, 0xf1, 0x81, 0xc6, 0x01, 0x73, 0x05, 0x3a, + 0x03, 0x03, 0x39, 0x0f, 0x05, 0xc3, 0x39, 0x15, 0xce, 0x38, 0xe6, 0x05, + 0x3d, 0x11, 0xc4, 0x01, 0x23, 0x00, 0x0c, 0xc0, 0xcb, 0x97, 0x2f, 0x00, + 0xf1, 0x51, 0x05, 0xc3, 0x39, 0x21, 0x06, 0xc3, 0x39, 0x33, 0xc6, 0x01, + 0x73, 0x00, 0x09, 0x31, 0xc4, 0x01, 0x23, 0x05, 0x3d, 0x50, 0xc6, 0x60, + 0xb1, 0x00, 0xf1, 0x01, 0xc5, 0x31, 0xee, 0x00, 0x0f, 0xa1, 0x05, 0xc3, + 0x39, 0x45, 0xc5, 0x1d, 0x88, 0x00, 0x08, 0xf1, 0xc9, 0x16, 0x14, 0x00, + 0x09, 0x01, 0xce, 0x38, 0xe6, 0x05, 0x3d, 0x01, 0xc4, 0x01, 0x23, 0x00, + 0x0c, 0x99, 0xc6, 0x01, 0x73, 0x00, 0x0f, 0x20, 0x97, 0x05, 0x3d, 0xf1, + 0x8b, 0x05, 0x3d, 0xe1, 0x83, 0x05, 0x3d, 0xd1, 0xc4, 0x00, 0xf0, 0x00, + 0x12, 0x08, 0xc9, 0x16, 0x14, 0x00, 0xf0, 0xf1, 0xc6, 0x01, 0x73, 0x05, + 0x3c, 0xc1, 0xc4, 0x01, 0x23, 0x00, 0x0c, 0x88, 0x05, 0xc3, 0x39, 0x57, + 0xca, 0x64, 0x13, 0x00, 0xf0, 0x71, 0x44, 0x05, 0x18, 0xc3, 0x39, 0x69, + 0x15, 0xc3, 0x39, 0x75, 0xc4, 0x01, 0x23, 0x00, 0x0c, 0x51, 0xc6, 0xcf, + 0xcb, 0x00, 0x0c, 0x58, 0xcb, 0x8e, 0x60, 0x00, 0x0e, 0x20, 0x05, 0xc3, + 0x39, 0x8a, 0xc5, 0x31, 0xee, 0x00, 0x08, 0x31, 0xc9, 0x16, 0x14, 0x00, + 0x08, 0x51, 0xc3, 0x01, 0x5d, 0x05, 0x3c, 0x91, 0xcc, 0x51, 0x28, 0x05, + 0x3e, 0x21, 0xc4, 0x01, 0x23, 0x00, 0x0c, 0x39, 0xc6, 0x01, 0x73, 0x00, + 0x11, 0xd8, 0xcb, 0x8e, 0x3f, 0x05, 0x39, 0x70, 0xca, 0x64, 0x13, 0x00, + 0xf0, 0x31, 0x44, 0x05, 0x18, 0xc3, 0x39, 0x9f, 0xc8, 0xbe, 0x9a, 0x05, + 0x3c, 0xb1, 0xc4, 0x01, 0x23, 0x00, 0x0c, 0x09, 0xc6, 0xcf, 0xcb, 0x00, + 0x0c, 0x11, 0xc6, 0x01, 0x73, 0x00, 0x12, 0x18, 0x05, 0xc3, 0x39, 0xab, + 0xc6, 0x01, 0x73, 0x00, 0x12, 0x40, 0xd8, 0x25, 0xeb, 0x05, 0x3a, 0xb1, + 0xcf, 0x3e, 0xad, 0x05, 0x3a, 0xc0, 0x83, 0x00, 0x74, 0x89, 0xc2, 0x00, + 0xd0, 0x00, 0x74, 0x90, 0xc6, 0x04, 0xe1, 0x0f, 0xda, 0xb1, 0xcc, 0x04, + 0xcb, 0x0f, 0xdb, 0x28, 0xcc, 0x04, 0xcb, 0x0f, 0xdb, 0x21, 0xc5, 0x00, + 0x2c, 0x0f, 0xdb, 0x30, 0xc6, 0x04, 0xe1, 0x0f, 0xda, 0xd9, 0xcc, 0x04, + 0xcb, 0x0f, 0xdb, 0x00, 0xcc, 0x04, 0xcb, 0x0f, 0xda, 0xf9, 0xc5, 0x00, + 0x2c, 0x0f, 0xdb, 0x08, 0xcc, 0x07, 0xbb, 0x01, 0x0f, 0x69, 0xce, 0x0e, + 0xf1, 0x01, 0x0f, 0x60, 0x00, 0x43, 0x39, 0xb7, 0xd2, 0x05, 0xd4, 0x0f, + 0xc0, 0x09, 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0x88, 0xca, 0x03, 0x87, 0x01, + 0x0d, 0x89, 0xc9, 0x01, 0x88, 0x01, 0x0d, 0x80, 0x06, 0xc3, 0x39, 0xc9, + 0xdf, 0x0d, 0x3e, 0x01, 0x4b, 0x18, 0xc3, 0xe5, 0x8a, 0x0f, 0xb3, 0x39, + 0xc9, 0xb4, 0x91, 0x0f, 0xb2, 0xf8, 0xe0, 0x0a, 0x87, 0x01, 0x3a, 0xd8, + 0xe0, 0x0b, 0x27, 0x01, 0x3b, 0x00, 0xe0, 0x0b, 0x27, 0x01, 0x3a, 0xf8, + 0xdc, 0x12, 0xe1, 0x01, 0x3d, 0x31, 0xde, 0x0e, 0x14, 0x01, 0x3d, 0x28, + 0xe0, 0x0a, 0x87, 0x01, 0x3a, 0xe8, 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0xd1, + 0xdb, 0x17, 0x46, 0x0f, 0xc0, 0xf0, 0xc4, 0x01, 0xce, 0x0f, 0xc4, 0xf1, + 0xc5, 0x06, 0x67, 0x0f, 0xc4, 0xf8, 0xc6, 0x64, 0xa4, 0x07, 0xda, 0x4b, + 0x03, 0x39, 0xcf, 0x15, 0x43, 0x39, 0xd5, 0x46, 0x00, 0x8b, 0x43, 0x39, + 0xe1, 0xc9, 0x60, 0xf3, 0x07, 0xd9, 0x49, 0xc4, 0x40, 0x95, 0x07, 0xd9, + 0x00, 0xc8, 0x4c, 0xcc, 0x02, 0x6e, 0x69, 0xc3, 0x00, 0x28, 0x02, 0x6f, + 0x08, 0xc3, 0x0e, 0xa7, 0x00, 0x04, 0x41, 0xd2, 0x49, 0x55, 0x00, 0x04, + 0x48, 0x0d, 0xc3, 0x39, 0xf3, 0x15, 0xc3, 0x3a, 0x05, 0xc5, 0x79, 0xf2, + 0x05, 0x4b, 0x49, 0xc5, 0xda, 0xe7, 0x05, 0x4b, 0x41, 0xc6, 0xc0, 0x7c, + 0x05, 0x4b, 0x31, 0xc5, 0xd9, 0x61, 0x00, 0x88, 0xc1, 0xc5, 0x90, 0xe4, + 0x00, 0x88, 0xd1, 0xc5, 0xdb, 0xff, 0x05, 0x4b, 0x68, 0xcb, 0x90, 0xde, + 0x05, 0x4b, 0xe1, 0x16, 0xc3, 0x3a, 0x11, 0xc5, 0xdb, 0xff, 0x00, 0x88, + 0x6b, 0x03, 0x3a, 0x1d, 0xc4, 0xad, 0x2b, 0x00, 0x88, 0x53, 0x03, 0x3a, + 0x23, 0xc6, 0x8e, 0xde, 0x00, 0x88, 0x09, 0xc5, 0x79, 0xf2, 0x00, 0x88, + 0x41, 0xc5, 0xd9, 0x61, 0x00, 0x88, 0xa1, 0xc5, 0xd6, 0x8c, 0x00, 0x88, + 0xc9, 0xc5, 0xb7, 0x9d, 0x00, 0x8a, 0x39, 0xc5, 0x90, 0xe4, 0x00, 0x8a, + 0xc0, 0x02, 0x43, 0x3a, 0x29, 0x02, 0x43, 0x3a, 0x5d, 0x02, 0x43, 0x3a, + 0x69, 0xc5, 0x90, 0xe4, 0x05, 0x4b, 0xb9, 0xc5, 0xd6, 0x8c, 0x05, 0x4b, + 0xb1, 0xc6, 0x8e, 0xde, 0x00, 0x8a, 0x09, 0x16, 0xc3, 0x3a, 0x8b, 0xc5, + 0xda, 0xe7, 0x00, 0x8a, 0x19, 0x12, 0xc3, 0x3a, 0x97, 0xc4, 0xad, 0x2b, + 0x00, 0x8a, 0x29, 0x05, 0x43, 0x3a, 0xa9, 0xc4, 0xad, 0x2b, 0x05, 0x4b, + 0x89, 0xc6, 0xc0, 0x7c, 0x05, 0x4b, 0x81, 0xc6, 0x8e, 0xde, 0x05, 0x4b, + 0x79, 0xc5, 0x79, 0xf2, 0x00, 0x88, 0xe0, 0x02, 0x43, 0x3a, 0xb5, 0xc7, + 0xc0, 0x7b, 0x00, 0x8a, 0xd0, 0xc5, 0xd6, 0x8c, 0x00, 0x88, 0xd9, 0xc5, + 0xda, 0xe7, 0x00, 0x88, 0xe9, 0x12, 0xc3, 0x3a, 0xd9, 0xca, 0xa7, 0x2e, + 0x00, 0x89, 0x60, 0xc6, 0x8e, 0xde, 0x00, 0x88, 0x99, 0xc6, 0xc0, 0x7c, + 0x00, 0x88, 0xa9, 0xc5, 0x79, 0xf2, 0x00, 0x88, 0xb1, 0xc4, 0xad, 0x2b, + 0x00, 0x8a, 0xd9, 0xc5, 0xdb, 0xff, 0x00, 0x8a, 0xe1, 0xc5, 0x90, 0xe4, + 0x00, 0x8a, 0xe8, 0xc6, 0xd1, 0x03, 0x00, 0x8a, 0x68, 0xc4, 0xc6, 0x7b, + 0x00, 0x88, 0x73, 0x03, 0x3a, 0xe5, 0x45, 0xd5, 0x1f, 0x43, 0x3a, 0xe9, + 0x15, 0xc3, 0x3a, 0xf1, 0x05, 0x43, 0x3a, 0xfd, 0x87, 0x00, 0x8b, 0x11, + 0x02, 0xc3, 0x3b, 0x09, 0xc4, 0xa6, 0x08, 0x00, 0x8c, 0xf2, 0x03, 0x3b, + 0x17, 0x83, 0x00, 0x8b, 0x1b, 0x03, 0x3b, 0x1b, 0x87, 0x00, 0x8b, 0x43, + 0x03, 0x3b, 0x23, 0x91, 0x00, 0x8b, 0x6b, 0x03, 0x3b, 0x2a, 0x97, 0x00, + 0x8b, 0x93, 0x03, 0x3b, 0x2e, 0x8b, 0x00, 0x8b, 0xa2, 0x03, 0x3b, 0x32, + 0x91, 0x00, 0x8b, 0x2b, 0x03, 0x3b, 0x38, 0x97, 0x00, 0x8b, 0x9a, 0x03, + 0x3b, 0x3c, 0x87, 0x00, 0x8b, 0x61, 0x02, 0x43, 0x3b, 0x40, 0x83, 0x00, + 0x8b, 0x53, 0x03, 0x3b, 0x56, 0x87, 0x00, 0x8b, 0x83, 0x03, 0x3b, 0x5a, + 0x8b, 0x00, 0x8b, 0x88, 0x02, 0x43, 0x3b, 0x5e, 0x02, 0x43, 0x3b, 0x7e, + 0xc5, 0x8e, 0xdf, 0x00, 0x8d, 0x43, 0x03, 0x3b, 0x9e, 0xc6, 0xbb, 0xec, + 0x00, 0x8d, 0xf9, 0x47, 0x79, 0xeb, 0x43, 0x3b, 0xa2, 0x44, 0x3a, 0xbf, + 0xc3, 0x3b, 0xb2, 0xc3, 0x39, 0x37, 0x00, 0x8d, 0xd2, 0x03, 0x3b, 0xf7, + 0x02, 0x43, 0x3b, 0xfb, 0xc5, 0xc0, 0x7d, 0x00, 0x8d, 0x73, 0x03, 0x3c, + 0x21, 0xc6, 0xc1, 0x86, 0x00, 0x8e, 0x00, 0x02, 0x43, 0x3c, 0x25, 0x02, + 0x43, 0x3c, 0x50, 0xc4, 0x79, 0xf3, 0x00, 0x8d, 0xc3, 0x03, 0x3c, 0x74, + 0xc6, 0xba, 0x7c, 0x00, 0x8e, 0x0b, 0x03, 0x3c, 0x78, 0xc6, 0xca, 0x0e, + 0x00, 0x8f, 0x5a, 0x03, 0x3c, 0x7c, 0x02, 0x43, 0x3c, 0x80, 0xc4, 0xc6, + 0x7a, 0x00, 0x8d, 0xeb, 0x03, 0x3c, 0x8a, 0xc6, 0xc6, 0x79, 0x00, 0x8d, + 0xf0, 0x02, 0x43, 0x3c, 0x8e, 0xc6, 0xb7, 0x9c, 0x00, 0x8f, 0x83, 0x03, + 0x3c, 0xa6, 0xc9, 0x90, 0xe0, 0x00, 0x8f, 0xc8, 0xc5, 0xd9, 0xca, 0x01, + 0x89, 0x98, 0xc5, 0xda, 0xe7, 0x01, 0x8b, 0x89, 0x12, 0xc3, 0x3c, 0xaa, + 0xca, 0xa7, 0x2e, 0x01, 0x8b, 0xc8, 0xc6, 0x8e, 0xde, 0x01, 0x89, 0x91, + 0xc6, 0xc0, 0x7c, 0x01, 0x89, 0xc1, 0xc5, 0x79, 0xf2, 0x01, 0x8a, 0x19, + 0xc4, 0xad, 0x2b, 0x01, 0x8a, 0x31, 0xc5, 0xdb, 0xff, 0x01, 0x8a, 0x49, + 0xc5, 0xd9, 0x61, 0x01, 0x8b, 0x29, 0xc5, 0xb7, 0x9d, 0x01, 0x8c, 0x01, + 0xc5, 0x90, 0xe4, 0x01, 0x8c, 0x28, 0x02, 0x43, 0x3c, 0xb6, 0xc5, 0xdb, + 0xff, 0x01, 0x89, 0xa9, 0xc5, 0x90, 0xe4, 0x01, 0x89, 0xb1, 0xc6, 0xc0, + 0x7c, 0x01, 0x8b, 0x31, 0xc4, 0xad, 0x2b, 0x01, 0x8b, 0x39, 0xc7, 0xca, + 0x0d, 0x01, 0x8b, 0x40, 0xc6, 0x8e, 0xde, 0x01, 0x89, 0xd3, 0x03, 0x3c, + 0xd4, 0xc5, 0xda, 0xe7, 0x01, 0x89, 0xd9, 0x12, 0xc3, 0x3c, 0xda, 0xc4, + 0xad, 0x2b, 0x01, 0x89, 0xe9, 0x16, 0xc3, 0x3c, 0xef, 0xc5, 0x90, 0xe4, + 0x01, 0x8a, 0x01, 0xcb, 0x90, 0xde, 0x01, 0x8b, 0x68, 0x12, 0xc3, 0x3c, + 0xfb, 0xc4, 0xad, 0x2b, 0x01, 0x8b, 0x78, 0x02, 0x43, 0x3d, 0x07, 0x87, + 0x01, 0x8c, 0x70, 0x87, 0x01, 0x8a, 0x90, 0x91, 0x01, 0x8a, 0xab, 0x03, + 0x3d, 0x20, 0xc6, 0xb7, 0x9c, 0x01, 0x8c, 0x0a, 0x03, 0x3d, 0x26, 0x02, + 0x43, 0x3d, 0x2a, 0x02, 0x43, 0x3d, 0x37, 0x87, 0x01, 0x8a, 0xc8, 0x91, + 0x01, 0x8a, 0xe8, 0x83, 0x07, 0xfb, 0x39, 0x8b, 0x07, 0xfb, 0x41, 0x97, + 0x07, 0xfb, 0x49, 0x87, 0x07, 0xfb, 0x51, 0x91, 0x07, 0xfb, 0x59, 0x1b, + 0xc3, 0x3d, 0x44, 0xc2, 0x00, 0x16, 0x07, 0xfb, 0x78, 0xc4, 0x79, 0xf3, + 0x07, 0xfd, 0x61, 0xc6, 0xba, 0x7c, 0x07, 0xfd, 0x78, 0xc8, 0x4b, 0x94, + 0x08, 0x5b, 0xf9, 0xc7, 0x0d, 0x04, 0x08, 0x5b, 0xf0, 0xc4, 0x18, 0x12, + 0x08, 0x5b, 0xe9, 0x91, 0x08, 0x5b, 0xc8, 0xc3, 0x77, 0x79, 0x08, 0x5b, + 0x81, 0xc4, 0xdc, 0x2d, 0x08, 0x5b, 0x70, 0xc8, 0x4b, 0x94, 0x08, 0x5a, + 0xf9, 0xc7, 0x0d, 0x04, 0x08, 0x5a, 0xf0, 0xc4, 0x18, 0x12, 0x08, 0x5a, + 0xe9, 0x91, 0x08, 0x5a, 0xc8, 0xc4, 0xdc, 0x2d, 0x08, 0x5a, 0x71, 0xc3, + 0x77, 0x79, 0x08, 0x5a, 0x88, 0xcb, 0x57, 0x1e, 0x0f, 0x65, 0x99, 0xc2, + 0x02, 0xa0, 0x0f, 0x65, 0x90, 0xc4, 0x18, 0x10, 0x0f, 0x65, 0x49, 0xc2, + 0x22, 0xcc, 0x0f, 0x65, 0x40, 0xc3, 0x0d, 0x14, 0x0f, 0x65, 0x39, 0xc3, + 0x09, 0x9e, 0x0f, 0x65, 0x30, 0xc4, 0x02, 0xde, 0x0f, 0x65, 0x29, 0xc2, + 0x02, 0xa0, 0x0f, 0x65, 0x20, 0xc9, 0x57, 0x20, 0x0f, 0x64, 0xe8, 0xc8, + 0x4b, 0x94, 0x0f, 0x64, 0xa1, 0xc7, 0x0d, 0x04, 0x0f, 0x64, 0x58, 0xc9, + 0x57, 0x20, 0x0f, 0x64, 0xe0, 0xc8, 0x4b, 0x94, 0x0f, 0x64, 0x99, 0xc7, + 0x0d, 0x04, 0x0f, 0x64, 0x50, 0xc2, 0x0d, 0x10, 0x0f, 0x64, 0x03, 0x03, + 0x3d, 0x50, 0x00, 0x43, 0x3d, 0x56, 0xc2, 0x0d, 0x10, 0x0f, 0x63, 0xfb, + 0x03, 0x3d, 0x62, 0x00, 0x43, 0x3d, 0x68, 0xc3, 0x45, 0x6b, 0x0f, 0x63, + 0xf3, 0x03, 0x3d, 0x74, 0xc2, 0x00, 0x5f, 0x0f, 0x63, 0xaa, 0x03, 0x3d, + 0x7a, 0xc3, 0x0d, 0x0f, 0x0f, 0x63, 0xeb, 0x03, 0x3d, 0x7e, 0xc2, 0x00, + 0x33, 0x0f, 0x63, 0xa2, 0x03, 0x3d, 0x84, 0xc4, 0x0d, 0x0e, 0x0f, 0x63, + 0xe3, 0x03, 0x3d, 0x88, 0xc3, 0x02, 0xdf, 0x0f, 0x63, 0x9a, 0x03, 0x3d, + 0x8e, 0xc4, 0x18, 0x12, 0x0f, 0x63, 0xdb, 0x03, 0x3d, 0x92, 0x91, 0x0f, + 0x63, 0x92, 0x03, 0x3d, 0x98, 0xc9, 0x57, 0x20, 0x0f, 0x64, 0xa8, 0xc8, + 0x4b, 0x94, 0x0f, 0x64, 0x61, 0xc7, 0x0d, 0x04, 0x0f, 0x64, 0x18, 0xc2, + 0x02, 0x6f, 0x01, 0x96, 0x29, 0xc2, 0x00, 0x35, 0x01, 0x96, 0x30, 0xc3, + 0x05, 0x14, 0x01, 0x9f, 0x01, 0x16, 0xc3, 0x3d, 0x9c, 0x08, 0xc3, 0x3d, + 0xaa, 0x15, 0xc3, 0x3d, 0xb7, 0x07, 0xc3, 0x3d, 0xc9, 0xc4, 0x26, 0x78, + 0x01, 0x9f, 0x42, 0x03, 0x3d, 0xd8, 0x19, 0xc3, 0x3d, 0xde, 0x0a, 0xc3, + 0x3d, 0xe6, 0xc2, 0x00, 0xc4, 0x01, 0x9b, 0x10, 0xc3, 0x09, 0x9e, 0x01, + 0x9a, 0xe3, 0x03, 0x3d, 0xf2, 0x0b, 0x43, 0x3d, 0xf8, 0xc2, 0x22, 0xcc, + 0x01, 0x9a, 0xf3, 0x03, 0x3e, 0x04, 0xc4, 0x18, 0x10, 0x01, 0x9a, 0xfa, + 0x03, 0x3e, 0x0a, 0xc4, 0x00, 0x2d, 0x01, 0x9b, 0x03, 0x03, 0x3e, 0x10, + 0xc5, 0x66, 0xb1, 0x01, 0x9b, 0x18, 0xc4, 0x14, 0x09, 0x01, 0x9b, 0x58, + 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0xa9, 0xce, 0x2a, 0xfe, 0x0f, 0xd0, 0x58, + 0xce, 0x2a, 0xfe, 0x0f, 0xd0, 0x71, 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0xc0, + 0x49, 0x2a, 0xf5, 0xc3, 0x3e, 0x16, 0x02, 0x43, 0x3e, 0x2c, 0x49, 0x2a, + 0xf5, 0x43, 0x3e, 0x3e, 0xce, 0x2a, 0xfe, 0x0f, 0xd0, 0x61, 0xdb, 0x18, + 0x03, 0x0f, 0xd1, 0xb0, 0xce, 0x2a, 0xfe, 0x0f, 0xd0, 0x51, 0xdb, 0x18, + 0x03, 0x0f, 0xd1, 0xa0, 0xc3, 0x00, 0x74, 0x0f, 0xd0, 0xf1, 0xc5, 0x56, + 0xa5, 0x0f, 0xd1, 0x10, 0xc8, 0x02, 0x9f, 0x01, 0x34, 0x39, 0x42, 0x00, + 0x58, 0xc3, 0x3e, 0x4a, 0x46, 0x02, 0xae, 0xc3, 0x3e, 0x56, 0x46, 0x01, + 0xc8, 0x43, 0x3e, 0x62, 0xc5, 0x22, 0xdb, 0x01, 0x33, 0x08, 0xca, 0xa7, + 0xc4, 0x01, 0x38, 0x29, 0xdc, 0x13, 0x51, 0x0f, 0xde, 0x00, 0xcd, 0x77, + 0xd5, 0x0f, 0xbc, 0xa9, 0xcc, 0x51, 0x6c, 0x01, 0x2d, 0x19, 0xd1, 0x51, + 0x67, 0x0f, 0xbc, 0xa0, 0x14, 0xc3, 0x3e, 0x6e, 0x0e, 0xc3, 0x3e, 0x7a, + 0x46, 0x02, 0xae, 0xc3, 0x3e, 0x86, 0xd7, 0x27, 0xe7, 0x01, 0x2f, 0x59, + 0xd4, 0x3d, 0x68, 0x01, 0x1c, 0x28, 0xc4, 0x5d, 0x32, 0x01, 0x31, 0xe1, + 0xcb, 0x93, 0x3b, 0x0f, 0x99, 0x20, 0xca, 0xa1, 0xac, 0x0f, 0x99, 0x30, + 0xc5, 0x0b, 0x0a, 0x01, 0x2d, 0x59, 0xc3, 0x0e, 0x6b, 0x01, 0x5a, 0x90, + 0xc5, 0x06, 0x82, 0x01, 0x30, 0xe1, 0xce, 0x24, 0xd5, 0x0f, 0xa2, 0x40, + 0xcd, 0x4a, 0x56, 0x01, 0x2e, 0x41, 0xd2, 0x4a, 0x51, 0x0f, 0xbc, 0xd1, + 0xce, 0x74, 0xa2, 0x0f, 0xbc, 0xd8, 0xe0, 0x08, 0x27, 0x01, 0x37, 0xf8, + 0xc6, 0x46, 0x3e, 0x01, 0x2d, 0xd9, 0xc7, 0xbb, 0xcb, 0x01, 0x5a, 0xa0, + 0x89, 0x0f, 0x17, 0x18, 0xc5, 0x00, 0xa2, 0x0f, 0xb1, 0x73, 0x03, 0x3e, + 0x92, 0xd8, 0x23, 0x4b, 0x0f, 0xd7, 0x10, 0xd3, 0x41, 0x38, 0x0f, 0xb0, + 0xe9, 0xcb, 0x91, 0x78, 0x0f, 0xb0, 0xe0, 0xcb, 0x93, 0x9e, 0x01, 0x51, + 0x61, 0xcc, 0x8b, 0xd1, 0x01, 0x51, 0x59, 0xc9, 0x0e, 0x6e, 0x01, 0x51, + 0x51, 0xcb, 0x52, 0x5b, 0x01, 0x51, 0x48, 0x95, 0x0f, 0x46, 0x89, 0xca, + 0xa2, 0x92, 0x0f, 0x46, 0xa0, 0xc7, 0x0d, 0x04, 0x08, 0x4e, 0xd3, 0x03, + 0x3e, 0x96, 0xc8, 0x4b, 0x94, 0x08, 0x4f, 0x18, 0xc7, 0x0d, 0x04, 0x08, + 0x4e, 0xcb, 0x03, 0x3e, 0x9c, 0xc8, 0x4b, 0x94, 0x08, 0x4f, 0x10, 0x00, + 0xc3, 0x3e, 0xa2, 0xc2, 0x0d, 0x10, 0x08, 0x4e, 0x7a, 0x03, 0x3e, 0xb1, + 0x00, 0xc3, 0x3e, 0xb7, 0xc2, 0x0d, 0x10, 0x08, 0x4e, 0x72, 0x03, 0x3e, + 0xc6, 0xc2, 0x00, 0x5f, 0x08, 0x4e, 0x23, 0x03, 0x3e, 0xcc, 0xc3, 0x45, + 0x6b, 0x08, 0x4e, 0x6a, 0x03, 0x3e, 0xd0, 0xc2, 0x00, 0x33, 0x08, 0x4e, + 0x1b, 0x03, 0x3e, 0xd6, 0xc3, 0x0d, 0x0f, 0x08, 0x4e, 0x62, 0x03, 0x3e, + 0xda, 0xc3, 0x02, 0xdf, 0x08, 0x4e, 0x13, 0x03, 0x3e, 0xe0, 0xc4, 0x0d, + 0x0e, 0x08, 0x4e, 0x5a, 0x03, 0x3e, 0xe4, 0x91, 0x08, 0x4e, 0x0b, 0x03, + 0x3e, 0xea, 0xc4, 0x18, 0x12, 0x08, 0x4e, 0x52, 0x03, 0x3e, 0xee, 0xc9, + 0x57, 0x20, 0x08, 0x4f, 0x20, 0xc7, 0x0d, 0x04, 0x08, 0x4e, 0x93, 0x03, + 0x3e, 0xf4, 0xc8, 0x4b, 0x94, 0x08, 0x4e, 0xd8, 0x91, 0x08, 0x4d, 0xb1, + 0x87, 0x08, 0x4d, 0xa9, 0x83, 0x08, 0x4d, 0xa0, 0x83, 0x08, 0x4d, 0x91, + 0xc2, 0x00, 0xd0, 0x08, 0x4d, 0x68, 0x87, 0x08, 0x4d, 0x89, 0x83, 0x08, + 0x4d, 0x78, 0xc9, 0x87, 0xed, 0x08, 0x4d, 0x80, 0x87, 0x08, 0x4d, 0x51, + 0x83, 0x08, 0x4d, 0x48, 0xc2, 0xe5, 0xfd, 0x08, 0x4c, 0xd8, 0xc2, 0xe5, + 0xfd, 0x08, 0x4c, 0xc8, 0xc2, 0xe5, 0xfd, 0x08, 0x4c, 0xa0, 0xc2, 0xe5, + 0xfd, 0x08, 0x4c, 0x58, 0xc2, 0xe5, 0xfd, 0x08, 0x4c, 0x68, 0x49, 0x3d, + 0x54, 0xc3, 0x3e, 0xfa, 0x4a, 0x2c, 0x4a, 0xc3, 0x3f, 0x06, 0x49, 0x45, + 0xd2, 0xc3, 0x3f, 0x12, 0x47, 0x54, 0x42, 0x43, 0x3f, 0x1e, 0xc3, 0x64, + 0x58, 0x00, 0xc5, 0x51, 0xc3, 0x39, 0x6d, 0x00, 0xc5, 0x41, 0x1c, 0xc3, + 0x3f, 0x2a, 0x05, 0xc3, 0x3f, 0x34, 0xc3, 0x1d, 0x35, 0x00, 0xc5, 0x11, + 0x06, 0xc3, 0x3f, 0x3e, 0x16, 0xc3, 0x3f, 0x4a, 0xc3, 0xe5, 0xf0, 0x00, + 0xc4, 0xe9, 0xc3, 0x20, 0xf1, 0x00, 0xc4, 0xd9, 0xc3, 0x91, 0x00, 0x00, + 0xc4, 0xd0, 0x83, 0x00, 0xc4, 0x8b, 0x03, 0x3f, 0x54, 0xc2, 0x0e, 0x9a, + 0x00, 0xc4, 0x70, 0xc2, 0x19, 0x2c, 0x00, 0xc5, 0x39, 0x97, 0x00, 0xc5, + 0x30, 0x8a, 0x00, 0xc4, 0xb9, 0xcb, 0x97, 0x71, 0x00, 0xc4, 0x00, 0x83, + 0x00, 0xc4, 0xb1, 0xc2, 0x00, 0xd0, 0x00, 0xc4, 0xa8, 0xc2, 0x00, 0xd0, + 0x00, 0xc4, 0x99, 0x83, 0x00, 0xc4, 0x90, 0x83, 0x00, 0xc4, 0x81, 0x16, + 0xc3, 0x3f, 0x60, 0xcb, 0x8c, 0x9d, 0x00, 0xc4, 0x30, 0xc2, 0x00, 0xc1, + 0x00, 0xc4, 0x79, 0xc2, 0x01, 0x30, 0x00, 0xc4, 0x50, 0xcf, 0x62, 0x10, + 0x00, 0xc4, 0x20, 0x48, 0xb1, 0x71, 0xc3, 0x3f, 0x6a, 0xc2, 0x00, 0x75, + 0x00, 0xc2, 0x50, 0xc2, 0x02, 0x1c, 0x00, 0xc2, 0xe1, 0x83, 0x00, 0xc2, + 0x88, 0xc2, 0x01, 0x94, 0x00, 0xc2, 0xd1, 0x83, 0x00, 0xc2, 0x98, 0x83, + 0x00, 0xc2, 0xc0, 0xc2, 0x0d, 0xf6, 0x00, 0xc2, 0xa1, 0x83, 0x00, 0xc2, + 0x80, 0x87, 0x00, 0xc2, 0x48, 0x87, 0x00, 0xc2, 0x40, 0xc2, 0x00, 0xd0, + 0x00, 0xc3, 0x91, 0x83, 0x00, 0xc3, 0x78, 0xc2, 0x0d, 0xf6, 0x00, 0xc3, + 0x71, 0x83, 0x00, 0xc3, 0x40, 0x83, 0x00, 0xc3, 0x68, 0x83, 0x00, 0xc3, + 0x60, 0x87, 0x00, 0xc3, 0x00, 0x9b, 0x00, 0xc2, 0xf8, 0xc4, 0x18, 0x10, + 0x08, 0xb2, 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0xb2, 0xb0, 0xc3, 0x0d, 0x14, + 0x08, 0xb2, 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0xb2, 0xa0, 0xc4, 0x02, 0xde, + 0x08, 0xb2, 0x99, 0xc2, 0x02, 0xa0, 0x08, 0xb2, 0x90, 0x8e, 0x08, 0xb1, + 0xc0, 0x94, 0x08, 0xb1, 0xb0, 0x8e, 0x08, 0xb0, 0x43, 0x03, 0x3f, 0x76, + 0x94, 0x08, 0xb0, 0x32, 0x03, 0x3f, 0x7a, 0xc2, 0x00, 0xd0, 0x08, 0xb0, + 0xd9, 0x83, 0x08, 0xb0, 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0xb0, 0xc9, 0x83, + 0x08, 0xb0, 0xc0, 0x96, 0x00, 0xea, 0xbb, 0x03, 0x3f, 0x7e, 0x87, 0x00, + 0xea, 0x4b, 0x03, 0x3f, 0xab, 0x9c, 0x00, 0xed, 0xdb, 0x03, 0x3f, 0xc3, + 0x98, 0x00, 0xea, 0xdb, 0x03, 0x3f, 0xc9, 0x85, 0x00, 0xec, 0xe3, 0x03, + 0x3f, 0xcf, 0x97, 0x00, 0xea, 0xc3, 0x03, 0x3f, 0xe7, 0x95, 0x00, 0x17, + 0x13, 0x03, 0x3f, 0xf1, 0x92, 0x00, 0xea, 0xb3, 0x03, 0x40, 0x01, 0x84, + 0x00, 0xea, 0x3b, 0x03, 0x40, 0x07, 0x47, 0x01, 0x56, 0xc3, 0x40, 0x1f, + 0x8f, 0x00, 0xea, 0x83, 0x03, 0x40, 0x2b, 0x8e, 0x00, 0x17, 0x0b, 0x03, + 0x40, 0x31, 0x8c, 0x00, 0x15, 0x93, 0x03, 0x40, 0x52, 0x0b, 0xc3, 0x40, + 0x58, 0x86, 0x00, 0xea, 0x43, 0x03, 0x40, 0x64, 0x88, 0x00, 0xed, 0x03, + 0x03, 0x40, 0x80, 0x94, 0x00, 0x15, 0x9b, 0x03, 0x40, 0x86, 0x89, 0x00, + 0xea, 0x6b, 0x03, 0x40, 0x98, 0x83, 0x00, 0xea, 0x1b, 0x03, 0x40, 0xaa, + 0x91, 0x00, 0xea, 0x93, 0x03, 0x40, 0xba, 0x8d, 0x00, 0xea, 0x79, 0x8a, + 0x00, 0x15, 0x83, 0x03, 0x40, 0xc6, 0x99, 0x00, 0x15, 0xb9, 0x9b, 0x00, + 0x15, 0xc1, 0x9a, 0x00, 0x17, 0x19, 0x93, 0x08, 0x3d, 0x28, 0xd5, 0x33, + 0x14, 0x08, 0x3c, 0x11, 0xd0, 0x33, 0x19, 0x08, 0x3c, 0x08, 0xc9, 0x3d, + 0x18, 0x05, 0x39, 0x01, 0xc8, 0xae, 0xfb, 0x05, 0x39, 0x08, 0xc3, 0x63, + 0x85, 0x00, 0x17, 0xe9, 0xcf, 0x63, 0x00, 0x05, 0x3c, 0x50, 0xc2, 0x00, + 0xc4, 0x00, 0xeb, 0xc1, 0xc9, 0xa8, 0x3a, 0x05, 0x34, 0xe1, 0xc9, 0x84, + 0xc0, 0x05, 0x34, 0xe8, 0x99, 0x00, 0xea, 0x11, 0x97, 0x00, 0xea, 0x09, + 0x96, 0x00, 0xea, 0x01, 0x94, 0x00, 0xe9, 0xfb, 0x03, 0x40, 0xd5, 0x92, + 0x00, 0xe9, 0xf1, 0x91, 0x00, 0xe9, 0xe3, 0x03, 0x40, 0xdb, 0x90, 0x00, + 0xe9, 0xd1, 0x8f, 0x00, 0xe9, 0xc9, 0x8e, 0x00, 0xe9, 0xc1, 0x8d, 0x00, + 0xe9, 0xb9, 0x8c, 0x00, 0xe9, 0xb1, 0x8b, 0x00, 0xe9, 0xa9, 0x8a, 0x00, + 0xe9, 0xa3, 0x03, 0x40, 0xdf, 0x89, 0x00, 0xe9, 0x99, 0x87, 0x00, 0xe9, + 0x89, 0x86, 0x00, 0xe9, 0x81, 0x84, 0x00, 0xe9, 0x73, 0x03, 0x40, 0xe5, + 0x83, 0x00, 0xe9, 0x63, 0x03, 0x40, 0xeb, 0x85, 0x05, 0x3f, 0x91, 0x88, + 0x05, 0x3f, 0x99, 0x93, 0x05, 0x3f, 0xa1, 0x98, 0x01, 0x63, 0xe8, 0x43, + 0x03, 0x35, 0xc3, 0x40, 0xef, 0x44, 0x10, 0xd1, 0x43, 0x41, 0x07, 0xcf, + 0x61, 0x89, 0x00, 0x16, 0x91, 0xce, 0x0f, 0x6e, 0x00, 0x16, 0x98, 0xc4, + 0x32, 0xbc, 0x05, 0x5b, 0x59, 0xc9, 0x0f, 0x73, 0x00, 0x15, 0xf1, 0xc9, + 0x03, 0xde, 0x00, 0x16, 0x18, 0x47, 0x10, 0x30, 0xc3, 0x41, 0x1f, 0x16, + 0x43, 0x41, 0x2e, 0xc8, 0x4d, 0x8d, 0x05, 0x38, 0xd9, 0xca, 0x3e, 0xe4, + 0x05, 0x38, 0xe1, 0xd0, 0x0f, 0x09, 0x05, 0x38, 0xe9, 0xd9, 0x1d, 0x6f, + 0x05, 0x38, 0xf1, 0xc5, 0x33, 0x24, 0x00, 0x17, 0xc0, 0xc4, 0x32, 0xbc, + 0x05, 0x5b, 0x51, 0xc9, 0x0f, 0x73, 0x00, 0x15, 0xf9, 0xc9, 0x03, 0xde, + 0x00, 0x16, 0x10, 0x00, 0xc3, 0x41, 0x34, 0xd5, 0x34, 0xf7, 0x05, 0x38, + 0xd0, 0xcc, 0x23, 0x3f, 0x08, 0x3d, 0x98, 0xc9, 0x3d, 0x18, 0x00, 0x17, + 0xc9, 0xc8, 0xae, 0xfb, 0x00, 0x17, 0xd8, 0x45, 0x00, 0x5a, 0xc3, 0x41, + 0x74, 0x43, 0x11, 0x19, 0xc3, 0x41, 0x80, 0x42, 0x00, 0x30, 0x43, 0x41, + 0x8c, 0xc9, 0x03, 0xde, 0x00, 0x16, 0x21, 0xc4, 0x32, 0xbc, 0x00, 0x16, + 0xa0, 0x06, 0xc3, 0x41, 0x9e, 0xc8, 0x68, 0x56, 0x00, 0x16, 0xb8, 0x45, + 0x08, 0xcb, 0xc3, 0x41, 0xa8, 0x44, 0x05, 0x36, 0x43, 0x41, 0xba, 0xc9, + 0x3d, 0x18, 0x00, 0x17, 0xd1, 0xc8, 0xae, 0xfb, 0x00, 0x17, 0xe0, 0x47, + 0x19, 0x7a, 0xc3, 0x41, 0xcc, 0xd2, 0x4e, 0x89, 0x05, 0x38, 0x99, 0xc8, + 0x4e, 0x93, 0x00, 0x17, 0x30, 0xc3, 0x11, 0x7e, 0x0e, 0xb7, 0xd1, 0xc5, + 0xd8, 0x8f, 0x0e, 0xb7, 0x80, 0xc7, 0x00, 0x90, 0x0e, 0xb7, 0x98, 0xc3, + 0x11, 0x7e, 0x0e, 0xb8, 0xa1, 0xc5, 0xd8, 0x8f, 0x0e, 0xb8, 0x50, 0x8c, + 0x0e, 0xb5, 0x29, 0x8b, 0x0e, 0xb5, 0x20, 0xc3, 0x04, 0x87, 0x0e, 0xb6, + 0x38, 0x8b, 0x0e, 0xb6, 0x78, 0xc6, 0x10, 0x3f, 0x0e, 0xb6, 0xb0, 0xc6, + 0x51, 0x50, 0x0e, 0xbe, 0x59, 0xc4, 0xdb, 0x4c, 0x0e, 0xb6, 0x28, 0x0f, + 0x43, 0x41, 0xd8, 0xc2, 0x00, 0xba, 0x0e, 0xb6, 0xc9, 0xc2, 0x00, 0x0a, + 0x0e, 0xb6, 0xb9, 0x8b, 0x0e, 0xb6, 0x88, 0xc2, 0x00, 0x0a, 0x0e, 0xb6, + 0xc0, 0xc2, 0x20, 0xec, 0x0e, 0xb6, 0xa9, 0xc4, 0x89, 0xfe, 0x0e, 0xb6, + 0x48, 0xc4, 0x1a, 0x73, 0x0e, 0xb6, 0xa0, 0xca, 0x91, 0x2c, 0x0e, 0xb6, + 0x98, 0xc2, 0x01, 0x23, 0x0e, 0xb6, 0x90, 0x97, 0x0e, 0xb6, 0x70, 0x97, + 0x0e, 0xb6, 0x68, 0xc4, 0xdd, 0x9a, 0x0e, 0xb6, 0x60, 0xc4, 0x8b, 0x66, + 0x0e, 0xb6, 0x58, 0xc3, 0x01, 0xbb, 0x0e, 0xb6, 0x50, 0xc2, 0x01, 0x6f, + 0x0e, 0xb6, 0x41, 0xc6, 0x10, 0x3f, 0x0e, 0xb6, 0x30, 0xc4, 0x38, 0x2c, + 0x0e, 0xb6, 0x20, 0xc3, 0x04, 0x87, 0x0e, 0xb6, 0x18, 0xc4, 0xde, 0x3f, + 0x0e, 0xb6, 0x10, 0x9c, 0x0e, 0xa8, 0x19, 0x9b, 0x0e, 0xa8, 0x11, 0x9a, + 0x0e, 0xa8, 0x09, 0x99, 0x0e, 0xa8, 0x01, 0x98, 0x0e, 0xa7, 0xf9, 0x97, + 0x0e, 0xa7, 0xf1, 0x96, 0x0e, 0xa7, 0xe9, 0x95, 0x0e, 0xa7, 0xe1, 0x94, + 0x0e, 0xa7, 0xd9, 0x93, 0x0e, 0xa7, 0xd1, 0x92, 0x0e, 0xa7, 0xc9, 0x91, + 0x0e, 0xa7, 0xc1, 0x90, 0x0e, 0xa7, 0xb9, 0x8f, 0x0e, 0xa7, 0xb1, 0x8e, + 0x0e, 0xa7, 0xa9, 0x8d, 0x0e, 0xa7, 0xa1, 0x8c, 0x0e, 0xa7, 0x99, 0x8b, + 0x0e, 0xa7, 0x91, 0x8a, 0x0e, 0xa7, 0x89, 0x89, 0x0e, 0xa7, 0x81, 0x88, + 0x0e, 0xa7, 0x79, 0x87, 0x0e, 0xa7, 0x71, 0x86, 0x0e, 0xa7, 0x69, 0x85, + 0x0e, 0xa7, 0x61, 0x84, 0x0e, 0xa7, 0x59, 0x83, 0x0e, 0xa7, 0x50, 0x9c, + 0x0e, 0xa7, 0x49, 0x9b, 0x0e, 0xa7, 0x41, 0x9a, 0x0e, 0xa7, 0x39, 0x99, + 0x0e, 0xa7, 0x31, 0x98, 0x0e, 0xa7, 0x29, 0x97, 0x0e, 0xa7, 0x21, 0x96, + 0x0e, 0xa7, 0x19, 0x95, 0x0e, 0xa7, 0x11, 0x94, 0x0e, 0xa7, 0x09, 0x93, + 0x0e, 0xa7, 0x01, 0x92, 0x0e, 0xa6, 0xf9, 0x91, 0x0e, 0xa6, 0xf1, 0x90, + 0x0e, 0xa6, 0xe9, 0x8f, 0x0e, 0xa6, 0xe1, 0x8e, 0x0e, 0xa6, 0xd9, 0x8d, + 0x0e, 0xa6, 0xd1, 0x8c, 0x0e, 0xa6, 0xc9, 0x8b, 0x0e, 0xa6, 0xc1, 0x8a, + 0x0e, 0xa6, 0xb9, 0x89, 0x0e, 0xa6, 0xb1, 0x88, 0x0e, 0xa6, 0xa9, 0x87, + 0x0e, 0xa6, 0xa1, 0x86, 0x0e, 0xa6, 0x99, 0x85, 0x0e, 0xa6, 0x91, 0x84, + 0x0e, 0xa6, 0x89, 0x83, 0x0e, 0xa6, 0x80, 0xc3, 0x11, 0x7e, 0x0e, 0xb6, + 0x01, 0xc5, 0xd8, 0x8f, 0x0e, 0xb5, 0xb0, 0xc7, 0x00, 0x90, 0x0e, 0xb5, + 0xc8, 0x0f, 0x43, 0x41, 0xe4, 0xc2, 0x00, 0xba, 0x0e, 0xba, 0x69, 0xc2, + 0x00, 0x0a, 0x0e, 0xba, 0x59, 0x8b, 0x0e, 0xba, 0x28, 0xc2, 0x00, 0x0a, + 0x0e, 0xba, 0x60, 0xc6, 0x10, 0x3f, 0x0e, 0xba, 0x50, 0xc2, 0x20, 0xec, + 0x0e, 0xba, 0x49, 0xc4, 0x89, 0xfe, 0x0e, 0xb9, 0xe8, 0xc4, 0x1a, 0x73, + 0x0e, 0xba, 0x40, 0xca, 0x91, 0x2c, 0x0e, 0xba, 0x38, 0xc2, 0x01, 0x23, + 0x0e, 0xba, 0x30, 0x8b, 0x0e, 0xba, 0x18, 0x97, 0x0e, 0xba, 0x10, 0x97, + 0x0e, 0xba, 0x08, 0xc4, 0xdd, 0x9a, 0x0e, 0xba, 0x00, 0xc4, 0x8b, 0x66, + 0x0e, 0xb9, 0xf8, 0xc3, 0x01, 0xbb, 0x0e, 0xb9, 0xf0, 0xc2, 0x01, 0x6f, + 0x0e, 0xb9, 0xe1, 0xc6, 0x10, 0x3f, 0x0e, 0xb9, 0xd0, 0xc3, 0x04, 0x87, + 0x0e, 0xb9, 0xd8, 0xc4, 0xdb, 0x4c, 0x0e, 0xb9, 0xc8, 0xc4, 0x38, 0x2c, + 0x0e, 0xb9, 0xc0, 0xc3, 0x04, 0x87, 0x0e, 0xb9, 0xb8, 0xc4, 0xde, 0x3f, + 0x0e, 0xb9, 0xb0, 0x0f, 0x43, 0x41, 0xf0, 0xc2, 0x00, 0xba, 0x0e, 0xb9, + 0x99, 0xc2, 0x00, 0x0a, 0x0e, 0xb9, 0x89, 0x8b, 0x0e, 0xb9, 0x58, 0xc2, + 0x00, 0x0a, 0x0e, 0xb9, 0x90, 0xc6, 0x10, 0x3f, 0x0e, 0xb9, 0x80, 0xc2, + 0x20, 0xec, 0x0e, 0xb9, 0x79, 0xc4, 0x89, 0xfe, 0x0e, 0xb9, 0x1a, 0x03, + 0x41, 0xfc, 0xc4, 0x1a, 0x73, 0x0e, 0xb9, 0x70, 0xc2, 0x01, 0x23, 0x0e, + 0xb9, 0x60, 0x8b, 0x0e, 0xb9, 0x48, 0x97, 0x0e, 0xb9, 0x40, 0x97, 0x0e, + 0xb9, 0x38, 0xc4, 0xdd, 0x9a, 0x0e, 0xb9, 0x30, 0xc4, 0x8b, 0x66, 0x0e, + 0xb9, 0x28, 0xc3, 0x01, 0xbb, 0x0e, 0xb9, 0x20, 0xc2, 0x01, 0x6f, 0x0e, + 0xb9, 0x11, 0xc6, 0x10, 0x3f, 0x0e, 0xb9, 0x00, 0xc3, 0x04, 0x87, 0x0e, + 0xb9, 0x08, 0xc4, 0xdb, 0x4c, 0x0e, 0xb8, 0xf8, 0xc4, 0x38, 0x2c, 0x0e, + 0xb8, 0xf0, 0xc3, 0x04, 0x87, 0x0e, 0xb8, 0xe8, 0xc4, 0xde, 0x3f, 0x0e, + 0xb8, 0xe0, 0xc4, 0x26, 0x78, 0x0e, 0xbf, 0xa9, 0xc5, 0x06, 0xdb, 0x0e, + 0xbf, 0xa1, 0x15, 0xc3, 0x42, 0x02, 0x08, 0xc3, 0x42, 0x0e, 0x16, 0xc3, + 0x42, 0x1a, 0xc3, 0x05, 0x14, 0x0e, 0xbf, 0x69, 0xc4, 0x15, 0xe7, 0x0e, + 0xbf, 0x60, 0x12, 0xc3, 0x42, 0x26, 0xca, 0x9c, 0xac, 0x0e, 0xbe, 0x41, + 0xcc, 0x8b, 0x65, 0x0e, 0xbe, 0x31, 0xcc, 0x89, 0xfd, 0x0e, 0xbe, 0x29, + 0xce, 0x10, 0x3e, 0x0e, 0xbe, 0x21, 0x46, 0x03, 0x13, 0xc3, 0x42, 0x38, + 0xc5, 0xdb, 0xf0, 0x0e, 0xbd, 0x49, 0x48, 0x0b, 0x17, 0x43, 0x42, 0xdc, + 0xc8, 0x9c, 0x0e, 0x0e, 0xbc, 0x79, 0xc9, 0xaa, 0x9e, 0x0e, 0xbc, 0x69, + 0xd3, 0x43, 0x00, 0x0e, 0xbc, 0x48, 0x91, 0x0e, 0xaf, 0xe3, 0x03, 0x43, + 0x7d, 0x92, 0x0e, 0xaf, 0xeb, 0x03, 0x43, 0x81, 0x85, 0x0e, 0xaf, 0x83, + 0x03, 0x43, 0x91, 0x97, 0x0e, 0xb0, 0x13, 0x03, 0x43, 0x97, 0x96, 0x0e, + 0xb0, 0x0b, 0x03, 0x43, 0x9d, 0x95, 0x0e, 0xb0, 0x03, 0x03, 0x43, 0xa9, + 0x88, 0x0e, 0xaf, 0x9b, 0x03, 0x43, 0xaf, 0x94, 0x0e, 0xaf, 0xfb, 0x03, + 0x43, 0xb5, 0x9a, 0x0e, 0xb0, 0x2b, 0x03, 0x43, 0xbb, 0x90, 0x0e, 0xaf, + 0xdb, 0x03, 0x43, 0xbf, 0x8f, 0x0e, 0xaf, 0xd3, 0x03, 0x43, 0xc3, 0x8e, + 0x0e, 0xaf, 0xcb, 0x03, 0x43, 0xc7, 0x8d, 0x0e, 0xaf, 0xc3, 0x03, 0x43, + 0xcd, 0x8b, 0x0e, 0xaf, 0xb3, 0x03, 0x43, 0xd3, 0x87, 0x0e, 0xaf, 0x93, + 0x03, 0x43, 0xd9, 0x9c, 0x0e, 0xb0, 0x3b, 0x03, 0x43, 0xe5, 0x86, 0x0e, + 0xaf, 0x8b, 0x03, 0x43, 0xeb, 0x89, 0x0e, 0xaf, 0xa3, 0x03, 0x43, 0xf1, + 0x84, 0x0e, 0xaf, 0x7b, 0x03, 0x43, 0xf7, 0x83, 0x0e, 0xaf, 0x73, 0x03, + 0x43, 0xfd, 0x9b, 0x0e, 0xb0, 0x31, 0x99, 0x0e, 0xb0, 0x21, 0x98, 0x0e, + 0xb0, 0x19, 0x93, 0x0e, 0xaf, 0xf1, 0x8c, 0x0e, 0xaf, 0xb9, 0x8a, 0x0e, + 0xaf, 0xa8, 0x91, 0x0e, 0xaf, 0x13, 0x03, 0x44, 0x03, 0x92, 0x0e, 0xaf, + 0x1b, 0x03, 0x44, 0x07, 0x85, 0x0e, 0xae, 0xb3, 0x03, 0x44, 0x17, 0x97, + 0x0e, 0xaf, 0x43, 0x03, 0x44, 0x1d, 0x96, 0x0e, 0xaf, 0x3b, 0x03, 0x44, + 0x23, 0x95, 0x0e, 0xaf, 0x33, 0x03, 0x44, 0x32, 0x94, 0x0e, 0xaf, 0x2b, + 0x03, 0x44, 0x38, 0x9a, 0x0e, 0xaf, 0x5b, 0x03, 0x44, 0x3e, 0x90, 0x0e, + 0xaf, 0x0b, 0x03, 0x44, 0x42, 0x8f, 0x0e, 0xaf, 0x03, 0x03, 0x44, 0x46, + 0x8e, 0x0e, 0xae, 0xfb, 0x03, 0x44, 0x4a, 0x8d, 0x0e, 0xae, 0xf3, 0x03, + 0x44, 0x50, 0x8b, 0x0e, 0xae, 0xe3, 0x03, 0x44, 0x56, 0x87, 0x0e, 0xae, + 0xc3, 0x03, 0x44, 0x5c, 0x9c, 0x0e, 0xaf, 0x6b, 0x03, 0x44, 0x68, 0x86, + 0x0e, 0xae, 0xbb, 0x03, 0x44, 0x6e, 0x89, 0x0e, 0xae, 0xd3, 0x03, 0x44, + 0x74, 0x84, 0x0e, 0xae, 0xab, 0x03, 0x44, 0x7a, 0x83, 0x0e, 0xae, 0xa3, + 0x03, 0x44, 0x80, 0x9b, 0x0e, 0xaf, 0x61, 0x99, 0x0e, 0xaf, 0x51, 0x98, + 0x0e, 0xaf, 0x49, 0x93, 0x0e, 0xaf, 0x21, 0x8c, 0x0e, 0xae, 0xe9, 0x8a, + 0x0e, 0xae, 0xd9, 0x88, 0x0e, 0xae, 0xc8, 0xc4, 0x18, 0x10, 0x0e, 0xbf, + 0x49, 0xc2, 0x22, 0xcc, 0x0e, 0xbf, 0x40, 0xc3, 0x0d, 0x14, 0x0e, 0xbf, + 0x39, 0xc3, 0x09, 0x9e, 0x0e, 0xbf, 0x30, 0xc4, 0x02, 0xde, 0x0e, 0xbf, + 0x29, 0xc2, 0x02, 0xa0, 0x0e, 0xbf, 0x20, 0x9c, 0x0e, 0xb1, 0xd9, 0x9b, + 0x0e, 0xb1, 0xd1, 0x9a, 0x0e, 0xb1, 0xc9, 0x99, 0x0e, 0xb1, 0xc1, 0x98, + 0x0e, 0xb1, 0xb9, 0x97, 0x0e, 0xb1, 0xb1, 0x96, 0x0e, 0xb1, 0xa9, 0x95, + 0x0e, 0xb1, 0xa1, 0x94, 0x0e, 0xb1, 0x99, 0x93, 0x0e, 0xb1, 0x91, 0x92, + 0x0e, 0xb1, 0x89, 0x91, 0x0e, 0xb1, 0x81, 0x90, 0x0e, 0xb1, 0x79, 0x8f, + 0x0e, 0xb1, 0x71, 0x8e, 0x0e, 0xb1, 0x69, 0x8d, 0x0e, 0xb1, 0x61, 0x8c, + 0x0e, 0xb1, 0x59, 0x8b, 0x0e, 0xb1, 0x51, 0x8a, 0x0e, 0xb1, 0x49, 0x89, + 0x0e, 0xb1, 0x41, 0x88, 0x0e, 0xb1, 0x39, 0x87, 0x0e, 0xb1, 0x31, 0x86, + 0x0e, 0xb1, 0x29, 0x85, 0x0e, 0xb1, 0x21, 0x84, 0x0e, 0xb1, 0x19, 0x83, + 0x0e, 0xb1, 0x10, 0x9c, 0x0e, 0xb1, 0x09, 0x9b, 0x0e, 0xb1, 0x01, 0x9a, + 0x0e, 0xb0, 0xf9, 0x99, 0x0e, 0xb0, 0xf1, 0x98, 0x0e, 0xb0, 0xe9, 0x97, + 0x0e, 0xb0, 0xe1, 0x96, 0x0e, 0xb0, 0xd9, 0x95, 0x0e, 0xb0, 0xd1, 0x94, + 0x0e, 0xb0, 0xc9, 0x93, 0x0e, 0xb0, 0xc1, 0x92, 0x0e, 0xb0, 0xb9, 0x91, + 0x0e, 0xb0, 0xb1, 0x90, 0x0e, 0xb0, 0xa9, 0x8f, 0x0e, 0xb0, 0xa1, 0x8e, + 0x0e, 0xb0, 0x99, 0x8d, 0x0e, 0xb0, 0x91, 0x8c, 0x0e, 0xb0, 0x89, 0x8b, + 0x0e, 0xb0, 0x81, 0x8a, 0x0e, 0xb0, 0x79, 0x89, 0x0e, 0xb0, 0x71, 0x88, + 0x0e, 0xb0, 0x69, 0x87, 0x0e, 0xb0, 0x61, 0x86, 0x0e, 0xb0, 0x59, 0x85, + 0x0e, 0xb0, 0x51, 0x84, 0x0e, 0xb0, 0x49, 0x83, 0x0e, 0xb0, 0x40, 0xc2, + 0x00, 0xd0, 0x08, 0xe5, 0x19, 0x83, 0x08, 0xe5, 0x10, 0x94, 0x00, 0x6b, + 0x00, 0x8e, 0x00, 0x6b, 0x08, 0x8f, 0x00, 0x6a, 0xa1, 0x9b, 0x00, 0x6a, + 0xa9, 0x8e, 0x00, 0x6b, 0xeb, 0x03, 0x44, 0x86, 0x90, 0x00, 0x6b, 0xdb, + 0x03, 0x44, 0x8d, 0xc2, 0x01, 0xa3, 0x00, 0x6b, 0xe1, 0x8d, 0x00, 0x6b, + 0xf8, 0xc2, 0x00, 0xd0, 0x08, 0x8b, 0x09, 0x83, 0x08, 0x8b, 0x00, 0xc2, + 0x00, 0xd0, 0x08, 0x8a, 0xf9, 0x83, 0x08, 0x8a, 0xf0, 0xc4, 0x57, 0xbc, + 0x0e, 0x8f, 0x51, 0x46, 0xd1, 0x8d, 0x43, 0x44, 0x91, 0xc3, 0x01, 0x69, + 0x0e, 0x8f, 0x49, 0xc8, 0xb7, 0x7a, 0x0e, 0x8e, 0xb3, 0x03, 0x44, 0xb7, + 0x46, 0x1f, 0x87, 0xc3, 0x44, 0xbd, 0x07, 0xc3, 0x44, 0xc7, 0xc5, 0xd9, + 0xb1, 0x0e, 0x8c, 0x69, 0x0b, 0xc3, 0x44, 0xd3, 0x0a, 0x43, 0x44, 0xdd, + 0x07, 0xc3, 0x44, 0xe9, 0x11, 0xc3, 0x44, 0xf5, 0xc4, 0xdf, 0xe7, 0x0e, + 0x8c, 0x79, 0xd3, 0x42, 0x1c, 0x0e, 0x8a, 0xb1, 0xcc, 0x81, 0x75, 0x0e, + 0x8a, 0x20, 0xc7, 0xc8, 0x46, 0x0e, 0x8e, 0xc3, 0x03, 0x45, 0x04, 0x46, + 0xce, 0xcf, 0xc3, 0x45, 0x0a, 0xc3, 0x05, 0x9f, 0x0e, 0x8c, 0xbb, 0x03, + 0x45, 0x16, 0x94, 0x0e, 0x8c, 0xb3, 0x03, 0x45, 0x1a, 0x0a, 0xc3, 0x45, + 0x20, 0xcd, 0x79, 0xd0, 0x0e, 0x88, 0xb8, 0x0e, 0xc3, 0x45, 0x2c, 0x14, + 0xc3, 0x45, 0x36, 0x11, 0xc3, 0x45, 0x42, 0xd0, 0x5c, 0x22, 0x0e, 0x8a, + 0x29, 0xc7, 0xc8, 0x4d, 0x0e, 0x89, 0xa9, 0xc5, 0xac, 0x87, 0x0e, 0x89, + 0x09, 0xc6, 0xd3, 0x1f, 0x0e, 0x88, 0x98, 0xc4, 0x01, 0x2e, 0x0e, 0x8e, + 0x99, 0xcc, 0x8b, 0x89, 0x0e, 0x8a, 0xb8, 0x14, 0xc3, 0x45, 0x4c, 0x49, + 0xad, 0xad, 0xc3, 0x45, 0x58, 0xc5, 0xac, 0x87, 0x0e, 0x88, 0xf2, 0x03, + 0x45, 0x64, 0xc5, 0xc3, 0x54, 0x0e, 0x8d, 0xdb, 0x03, 0x45, 0x6a, 0xc5, + 0xc0, 0x9e, 0x0e, 0x8d, 0xb1, 0xc4, 0xe0, 0x2f, 0x0e, 0x8c, 0x81, 0x4d, + 0x7a, 0x1e, 0xc3, 0x45, 0x6e, 0x44, 0x1f, 0x19, 0x43, 0x45, 0x7a, 0x14, + 0xc3, 0x45, 0x86, 0x45, 0x3f, 0x0e, 0x43, 0x45, 0x90, 0xc4, 0xcb, 0x41, + 0x0e, 0x8d, 0xbb, 0x03, 0x45, 0xa8, 0xcf, 0x65, 0x76, 0x0e, 0x88, 0x30, + 0x44, 0xa1, 0xbe, 0xc3, 0x45, 0xac, 0x11, 0xc3, 0x45, 0xb8, 0x0b, 0xc3, + 0x45, 0xc4, 0x44, 0xb3, 0xb1, 0xc3, 0x45, 0xce, 0xc5, 0xac, 0x87, 0x0e, + 0x89, 0x13, 0x03, 0x45, 0xda, 0xc6, 0xcf, 0xef, 0x0e, 0x88, 0x82, 0x03, + 0x45, 0xe0, 0x03, 0xc3, 0x45, 0xe6, 0x07, 0xc3, 0x46, 0x01, 0x46, 0x00, + 0x59, 0xc3, 0x46, 0x0d, 0x49, 0xac, 0x84, 0x43, 0x46, 0x1f, 0xcf, 0x68, + 0xa0, 0x0e, 0x8d, 0x99, 0x45, 0xa6, 0x7b, 0x43, 0x46, 0x27, 0x43, 0x01, + 0xd0, 0xc3, 0x46, 0x33, 0xc9, 0xb4, 0x9a, 0x0e, 0x8d, 0x30, 0x43, 0x02, + 0x9c, 0xc3, 0x46, 0x45, 0x46, 0x06, 0xdc, 0x43, 0x46, 0x63, 0xca, 0xa3, + 0xbe, 0x0e, 0x8d, 0x39, 0xcc, 0x81, 0xb1, 0x0e, 0x8a, 0xc9, 0xcd, 0x77, + 0xae, 0x0e, 0x8a, 0xc1, 0x47, 0x83, 0xf2, 0x43, 0x46, 0x6f, 0x4f, 0x63, + 0x3c, 0xc3, 0x46, 0x7b, 0x42, 0x02, 0x6f, 0xc3, 0x46, 0xa2, 0x46, 0xb7, + 0xd4, 0x43, 0x46, 0xae, 0x0b, 0xc3, 0x46, 0xba, 0x07, 0x43, 0x46, 0xc6, + 0xc4, 0x03, 0xc8, 0x0e, 0x8c, 0x21, 0xc2, 0x02, 0xae, 0x0e, 0x8c, 0x18, + 0x46, 0x15, 0x04, 0xc3, 0x46, 0xd2, 0x4b, 0x90, 0x02, 0x43, 0x46, 0xe4, + 0x43, 0x03, 0x35, 0xc3, 0x46, 0xf0, 0x45, 0x00, 0x8c, 0x43, 0x47, 0x08, + 0x9f, 0x00, 0x84, 0x59, 0xa0, 0x00, 0x84, 0x60, 0xc2, 0x00, 0xd0, 0x05, + 0x53, 0x71, 0x83, 0x05, 0x53, 0x68, 0x83, 0x05, 0x53, 0x59, 0xc2, 0x19, + 0x2c, 0x05, 0x53, 0x28, 0xc2, 0x00, 0xd0, 0x05, 0x53, 0x51, 0x06, 0x43, + 0x47, 0x14, 0xc2, 0x00, 0xd0, 0x05, 0x53, 0x39, 0x83, 0x05, 0x53, 0x30, + 0xc2, 0x00, 0xd0, 0x05, 0x53, 0x21, 0x83, 0x05, 0x53, 0x18, 0xc2, 0x00, + 0xd0, 0x05, 0x53, 0x11, 0x83, 0x05, 0x53, 0x08, 0xc2, 0x00, 0xd0, 0x05, + 0x4f, 0xf1, 0x83, 0x05, 0x4f, 0xe8, 0xc2, 0x00, 0xd0, 0x05, 0x4f, 0xe1, + 0x83, 0x05, 0x4f, 0xd9, 0x06, 0x43, 0x47, 0x1e, 0xc2, 0x00, 0xc1, 0x05, + 0x4f, 0x79, 0xc2, 0x19, 0x2c, 0x05, 0x4f, 0x38, 0xc2, 0x00, 0xd0, 0x05, + 0x4f, 0x61, 0x83, 0x05, 0x4f, 0x58, 0xc2, 0x00, 0xd0, 0x05, 0x4f, 0x51, + 0x83, 0x05, 0x4f, 0x48, 0x04, 0xc3, 0x47, 0x28, 0x10, 0xc3, 0x47, 0x32, + 0xc3, 0xe5, 0xf0, 0x05, 0x4f, 0x11, 0x83, 0x00, 0x81, 0x11, 0x0d, 0xc3, + 0x47, 0x42, 0x09, 0xc3, 0x47, 0x4c, 0x05, 0xc3, 0x47, 0x56, 0xc2, 0x02, + 0x1c, 0x00, 0x83, 0xc9, 0xc2, 0x0e, 0x9a, 0x00, 0x83, 0xd9, 0xc3, 0x17, + 0xb2, 0x00, 0x83, 0xe9, 0xc2, 0x00, 0x87, 0x00, 0x83, 0xf1, 0xc3, 0x00, + 0xcf, 0x00, 0x84, 0x01, 0xc2, 0x00, 0xd0, 0x00, 0x84, 0x08, 0x97, 0x01, + 0x8f, 0xa0, 0x91, 0x0d, 0x8b, 0x31, 0x87, 0x0d, 0x8b, 0x29, 0x8b, 0x0d, + 0x8b, 0x21, 0x83, 0x01, 0x87, 0x70, 0x97, 0x01, 0x86, 0x19, 0x91, 0x01, + 0x8f, 0x98, 0x83, 0x01, 0x87, 0x19, 0x97, 0x01, 0x87, 0x29, 0x91, 0x01, + 0x87, 0x38, 0x83, 0x01, 0x87, 0xa9, 0x87, 0x01, 0x87, 0xb1, 0x97, 0x01, + 0x8f, 0x80, 0x8b, 0x01, 0x8f, 0x89, 0x97, 0x01, 0x8f, 0x90, 0x83, 0x01, + 0x8f, 0xa9, 0x8b, 0x01, 0x8f, 0xb1, 0x97, 0x01, 0x8f, 0xb9, 0x87, 0x01, + 0x8f, 0xc1, 0x91, 0x01, 0x8f, 0xc8, 0x83, 0x01, 0x8f, 0xd9, 0x8b, 0x01, + 0x8f, 0xe1, 0x97, 0x01, 0x8f, 0xe9, 0x87, 0x01, 0x8f, 0xf1, 0x91, 0x01, + 0x8f, 0xf8, 0x87, 0x0d, 0x89, 0x09, 0x8b, 0x0d, 0x89, 0x00, 0x4f, 0x60, + 0x3f, 0xc3, 0x47, 0x60, 0x45, 0x28, 0xb1, 0x43, 0x47, 0x7c, 0x94, 0x00, + 0x64, 0x5b, 0x03, 0x47, 0x94, 0x8e, 0x00, 0x64, 0x62, 0x03, 0x47, 0x98, + 0xcb, 0x90, 0x44, 0x00, 0x66, 0xe8, 0x83, 0x00, 0x64, 0xf9, 0xc2, 0x00, + 0xd0, 0x00, 0x65, 0x00, 0x83, 0x00, 0x65, 0x09, 0xc2, 0x00, 0xd0, 0x00, + 0x65, 0x10, 0x83, 0x00, 0x65, 0x99, 0xc2, 0x00, 0xdb, 0x00, 0x66, 0xf0, + 0xc4, 0x14, 0xdd, 0x01, 0x7d, 0x81, 0x88, 0x01, 0x7d, 0xa0, 0x44, 0x00, + 0xde, 0x43, 0x47, 0x9c, 0x8a, 0x01, 0x7b, 0x59, 0xc8, 0x92, 0xfa, 0x01, + 0x7d, 0x20, 0xc2, 0x01, 0xe2, 0x01, 0x78, 0x19, 0xc2, 0x00, 0x5f, 0x01, + 0x7d, 0x50, 0xc2, 0x00, 0xb1, 0x01, 0x7b, 0x69, 0xc3, 0x5f, 0x44, 0x01, + 0x7c, 0xa0, 0x44, 0xdf, 0x4b, 0xc3, 0x47, 0xa8, 0xc2, 0x01, 0xbb, 0x01, + 0x79, 0xb8, 0xc2, 0x02, 0x37, 0x01, 0x7b, 0xd1, 0xc2, 0x02, 0xa7, 0x01, + 0x7c, 0xc8, 0x92, 0x01, 0x79, 0xd9, 0xc2, 0x00, 0xc2, 0x01, 0x7a, 0x98, + 0x92, 0x01, 0x7a, 0x63, 0x03, 0x47, 0xb4, 0xc2, 0x02, 0x6f, 0x01, 0x7b, + 0x78, 0x90, 0x01, 0x7c, 0x99, 0xc2, 0x00, 0x40, 0x01, 0x7d, 0xd0, 0xc2, + 0x00, 0x61, 0x01, 0x79, 0xe1, 0x86, 0x01, 0x7d, 0xc0, 0xc4, 0xe3, 0x23, + 0x01, 0x79, 0xe9, 0xcc, 0x70, 0x8a, 0x01, 0x7a, 0xc8, 0xc2, 0x00, 0x8e, + 0x01, 0x78, 0xe9, 0x10, 0x43, 0x47, 0xba, 0xc3, 0x0e, 0x6b, 0x01, 0x7c, + 0x29, 0xc4, 0x03, 0x0e, 0x01, 0x7d, 0x00, 0xc2, 0x00, 0x8e, 0x01, 0x78, + 0xf8, 0x90, 0x01, 0x7a, 0x91, 0x99, 0x01, 0x7a, 0xb0, 0xca, 0x63, 0x9a, + 0x01, 0x7c, 0x78, 0x44, 0x23, 0x70, 0xc3, 0x47, 0xc4, 0x43, 0x71, 0xed, + 0x43, 0x47, 0xd0, 0x44, 0xdf, 0x37, 0xc3, 0x47, 0xdc, 0x43, 0x93, 0x74, + 0x43, 0x47, 0xe8, 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xd9, 0xc4, 0xe0, 0xaf, + 0x00, 0xcf, 0x58, 0x04, 0xc3, 0x47, 0xf4, 0x44, 0x71, 0xec, 0xc3, 0x48, + 0x00, 0x45, 0xda, 0x97, 0x43, 0x48, 0x0c, 0xc3, 0x38, 0x5b, 0x00, 0xcf, + 0xa9, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x28, 0x02, 0x43, 0x48, 0x18, 0xce, + 0x2a, 0xfe, 0x0f, 0xd0, 0xa9, 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0xf8, 0xd2, + 0x4a, 0x2d, 0x0f, 0xd0, 0x41, 0xce, 0x2a, 0xfe, 0x0f, 0xd0, 0xc9, 0xdf, + 0x0d, 0x00, 0x0f, 0xd0, 0xe9, 0x16, 0x43, 0x48, 0x28, 0xc7, 0x7a, 0x7f, + 0x08, 0xa2, 0x39, 0xc7, 0x14, 0x39, 0x08, 0xa2, 0x20, 0xc5, 0x40, 0xe7, + 0x08, 0xa2, 0x29, 0xc4, 0x1e, 0x97, 0x08, 0xa2, 0x10, 0x8e, 0x08, 0xa0, + 0x48, 0x94, 0x08, 0xa0, 0x38, 0x89, 0x00, 0xce, 0x10, 0xc2, 0x00, 0xe4, + 0x00, 0xcd, 0x59, 0x83, 0x00, 0xcc, 0x60, 0xc2, 0x02, 0x41, 0x00, 0xcd, + 0x49, 0x83, 0x00, 0xcc, 0x30, 0xc2, 0x02, 0x41, 0x00, 0xcd, 0x41, 0x83, + 0x00, 0xcc, 0x28, 0xc2, 0x00, 0xd0, 0x00, 0xcc, 0xc1, 0x83, 0x00, 0xcc, + 0xb8, 0x83, 0x00, 0xcc, 0x99, 0xc2, 0x01, 0x30, 0x00, 0xcc, 0x38, 0xc2, + 0x00, 0xd0, 0x00, 0xcc, 0x91, 0x83, 0x00, 0xcc, 0x89, 0xc2, 0x0d, 0xf6, + 0x00, 0xcc, 0x58, 0xc2, 0x00, 0xe4, 0x00, 0xcd, 0x51, 0x83, 0x00, 0xcc, + 0x48, 0xc2, 0x02, 0x41, 0x00, 0xcd, 0x39, 0x83, 0x00, 0xcc, 0x18, 0xc2, + 0x02, 0x41, 0x00, 0xcd, 0x31, 0x83, 0x00, 0xcc, 0x10, 0xc2, 0x00, 0xd0, + 0x00, 0xcc, 0xa9, 0x83, 0x00, 0xcc, 0xa0, 0x83, 0x00, 0xcc, 0x81, 0xc2, + 0x01, 0x30, 0x00, 0xcc, 0x20, 0xc2, 0x00, 0xd0, 0x00, 0xcc, 0x79, 0x83, + 0x00, 0xcc, 0x71, 0xc2, 0x0d, 0xf6, 0x00, 0xcc, 0x40, 0x9b, 0x00, 0xcd, + 0xf8, 0x9b, 0x00, 0xcd, 0xf0, 0x9b, 0x00, 0xcd, 0xd8, 0xc3, 0x18, 0x13, + 0x01, 0x27, 0xa1, 0xc3, 0x22, 0x45, 0x01, 0x27, 0x60, 0x00, 0x43, 0x48, + 0x34, 0x00, 0x43, 0x48, 0x46, 0xc7, 0x08, 0x79, 0x05, 0x41, 0x81, 0xc4, + 0x01, 0xce, 0x05, 0x41, 0x89, 0xc9, 0x67, 0x38, 0x05, 0x41, 0x99, 0xc6, + 0x06, 0xdb, 0x05, 0x41, 0xa0, 0xc8, 0x08, 0x79, 0x05, 0x41, 0x91, 0xca, + 0xa7, 0x88, 0x05, 0x41, 0xa8, 0xc2, 0x02, 0xe0, 0x0f, 0x3f, 0xf1, 0x8b, + 0x0f, 0x3f, 0xe8, 0xc2, 0x02, 0xe0, 0x0f, 0x3f, 0xe1, 0x8b, 0x0f, 0x3f, + 0xd8, 0x87, 0x0f, 0x3f, 0xd3, 0x03, 0x48, 0x5e, 0x8b, 0x0f, 0x3f, 0xc0, + 0x87, 0x0f, 0x3f, 0xbb, 0x03, 0x48, 0x62, 0x8b, 0x0f, 0x3f, 0xa8, 0xc2, + 0x02, 0xe0, 0x0f, 0x3f, 0xa1, 0x8b, 0x0f, 0x3f, 0x98, 0x87, 0x0f, 0x3f, + 0x93, 0x03, 0x48, 0x66, 0x8b, 0x0f, 0x3f, 0x80, 0xc2, 0x02, 0xe0, 0x0f, + 0x3f, 0x71, 0x8b, 0x0f, 0x3f, 0x68, 0x83, 0x00, 0x98, 0xf8, 0x87, 0x01, + 0x6c, 0xa8, 0x87, 0x0f, 0x3f, 0x50, 0x87, 0x0f, 0x3f, 0x20, 0x83, 0x0f, + 0x3f, 0x18, 0x91, 0x05, 0x59, 0x31, 0x87, 0x05, 0x59, 0x2b, 0x03, 0x48, + 0x6a, 0x83, 0x05, 0x59, 0x03, 0x03, 0x48, 0x6e, 0x8b, 0x05, 0x59, 0x11, + 0x97, 0x05, 0x59, 0x08, 0x83, 0x01, 0x6d, 0xd8, 0x87, 0x01, 0x6d, 0xe0, + 0x87, 0x05, 0x58, 0x60, 0x83, 0x00, 0x92, 0xd8, 0x87, 0x00, 0x92, 0xe0, + 0x83, 0x00, 0x96, 0x18, 0x87, 0x00, 0x96, 0x20, 0x83, 0x00, 0x96, 0x83, + 0x03, 0x48, 0x72, 0x97, 0x00, 0x96, 0x89, 0x8b, 0x00, 0x96, 0x91, 0x87, + 0x00, 0x96, 0xab, 0x03, 0x48, 0x76, 0x91, 0x00, 0x96, 0xb0, 0xd1, 0x50, + 0xbd, 0x01, 0x4f, 0x20, 0xd0, 0x03, 0xb7, 0x01, 0x4b, 0x89, 0xce, 0x33, + 0x92, 0x01, 0x53, 0x99, 0xc9, 0x60, 0xf3, 0x01, 0x53, 0x89, 0xcf, 0x09, + 0xf8, 0x01, 0x5a, 0x00, 0xe0, 0x04, 0xe7, 0x01, 0x53, 0xb8, 0xa1, 0x0e, + 0x92, 0x09, 0xa0, 0x0e, 0x92, 0x01, 0x9f, 0x0e, 0x91, 0xf9, 0x9e, 0x0e, + 0x91, 0xf1, 0x9d, 0x0e, 0x91, 0xe8, 0xa6, 0x0e, 0x91, 0xe1, 0xa5, 0x0e, + 0x91, 0xd9, 0xa4, 0x0e, 0x91, 0xd1, 0xa2, 0x0e, 0x91, 0xc9, 0xa0, 0x0e, + 0x91, 0xc1, 0x9f, 0x0e, 0x91, 0xb9, 0x9d, 0x0e, 0x91, 0xb0, 0xa6, 0x0e, + 0x91, 0xa9, 0xa5, 0x0e, 0x91, 0xa1, 0xa4, 0x0e, 0x91, 0x99, 0xa3, 0x0e, + 0x91, 0x91, 0x9f, 0x0e, 0x91, 0x89, 0x9d, 0x0e, 0x91, 0x80, 0xa6, 0x0e, + 0x91, 0x79, 0xa4, 0x0e, 0x91, 0x71, 0xa3, 0x0e, 0x91, 0x69, 0xa2, 0x0e, + 0x91, 0x61, 0xa1, 0x0e, 0x91, 0x59, 0xa0, 0x0e, 0x91, 0x50, 0xa6, 0x0e, + 0x91, 0x49, 0xa5, 0x0e, 0x91, 0x41, 0xa4, 0x0e, 0x91, 0x39, 0xa1, 0x0e, + 0x91, 0x31, 0xa0, 0x0e, 0x91, 0x29, 0x9f, 0x0e, 0x91, 0x21, 0x9e, 0x0e, + 0x91, 0x18, 0xa1, 0x0e, 0x90, 0xe1, 0xa0, 0x0e, 0x90, 0xd9, 0x9f, 0x0e, + 0x90, 0xd1, 0x9e, 0x0e, 0x90, 0xc9, 0x9d, 0x0e, 0x90, 0xc0, 0xa1, 0x0e, + 0x90, 0xb9, 0xa0, 0x0e, 0x90, 0xb1, 0x9f, 0x0e, 0x90, 0xa9, 0x9e, 0x0e, + 0x90, 0xa1, 0x9d, 0x0e, 0x90, 0x98, 0xa6, 0x0e, 0x90, 0x91, 0xa5, 0x0e, + 0x90, 0x89, 0xa4, 0x0e, 0x90, 0x81, 0xa3, 0x0e, 0x90, 0x79, 0xa2, 0x0e, + 0x90, 0x71, 0xa1, 0x0e, 0x90, 0x69, 0xa0, 0x0e, 0x90, 0x61, 0x9f, 0x0e, + 0x90, 0x59, 0x9e, 0x0e, 0x90, 0x51, 0x9d, 0x0e, 0x90, 0x48, 0xcb, 0x94, + 0x90, 0x00, 0xfe, 0xf9, 0xc4, 0xe3, 0xab, 0x00, 0xfe, 0xf1, 0xc5, 0x28, + 0x47, 0x00, 0xfe, 0xe8, 0xc4, 0xe3, 0xab, 0x00, 0xff, 0x71, 0xc5, 0x28, + 0x47, 0x00, 0xff, 0x69, 0xcb, 0x94, 0x90, 0x00, 0xfe, 0x08, 0xcf, 0x6b, + 0x25, 0x08, 0x0b, 0xb0, 0x42, 0x00, 0x7a, 0xc3, 0x48, 0x7a, 0xc3, 0x79, + 0xe7, 0x00, 0x1d, 0x0b, 0x03, 0x48, 0x8c, 0xc7, 0x78, 0x4a, 0x00, 0x1d, + 0x2b, 0x03, 0x48, 0x92, 0xc4, 0x29, 0xc6, 0x00, 0x1c, 0xcb, 0x03, 0x48, + 0x98, 0x07, 0xc3, 0x48, 0x9e, 0x03, 0xc3, 0x48, 0xb0, 0xc4, 0x89, 0xfe, + 0x00, 0x1b, 0x81, 0x12, 0xc3, 0x48, 0xbf, 0xc3, 0xe5, 0xb4, 0x00, 0x1b, + 0xf9, 0xc4, 0x93, 0xa9, 0x00, 0x1c, 0x91, 0xc5, 0x51, 0x51, 0x00, 0x1c, + 0x99, 0xc5, 0xdb, 0x4b, 0x00, 0x1c, 0xa1, 0xc4, 0xde, 0x9b, 0x00, 0x1c, + 0xb1, 0x16, 0xc3, 0x48, 0xd5, 0xc5, 0x8b, 0x65, 0x00, 0x1c, 0xd1, 0xc5, + 0xdd, 0x99, 0x00, 0x1c, 0xd9, 0xc2, 0x14, 0x48, 0x00, 0x1c, 0xe1, 0xc2, + 0x06, 0xc6, 0x00, 0x1c, 0xe9, 0xc2, 0x07, 0x49, 0x00, 0x1c, 0xf1, 0x15, + 0xc3, 0x48, 0xe1, 0xc3, 0x11, 0xee, 0x00, 0x1d, 0x38, 0x42, 0x00, 0x7a, + 0xc3, 0x48, 0xf3, 0xc7, 0x78, 0x4a, 0x00, 0x1e, 0x2b, 0x03, 0x49, 0x05, + 0xc3, 0x79, 0xe7, 0x00, 0x1e, 0x0b, 0x03, 0x49, 0x0b, 0xc4, 0x29, 0xc6, + 0x00, 0x1d, 0xcb, 0x03, 0x49, 0x11, 0x07, 0xc3, 0x49, 0x17, 0x03, 0xc3, + 0x49, 0x29, 0xc4, 0x89, 0xfe, 0x00, 0x1b, 0x89, 0xc4, 0x93, 0xa9, 0x00, + 0x1d, 0x91, 0xc5, 0x51, 0x51, 0x00, 0x1d, 0x99, 0x06, 0xc3, 0x49, 0x38, + 0xc4, 0xde, 0x9b, 0x00, 0x1d, 0xb1, 0x16, 0xc3, 0x49, 0x44, 0x0d, 0xc3, + 0x49, 0x50, 0xc5, 0xdd, 0x99, 0x00, 0x1d, 0xd9, 0xc2, 0x14, 0x48, 0x00, + 0x1d, 0xe1, 0xc2, 0x06, 0xc6, 0x00, 0x1d, 0xe9, 0xc2, 0x07, 0x49, 0x00, + 0x1d, 0xf1, 0x12, 0xc3, 0x49, 0x5c, 0xcb, 0x91, 0x2b, 0x00, 0x1e, 0x11, + 0x15, 0xc3, 0x49, 0x72, 0xc3, 0x11, 0xee, 0x00, 0x1e, 0x38, 0xd3, 0x1a, + 0x6b, 0x00, 0x1b, 0xd9, 0xda, 0x1a, 0x64, 0x00, 0x1b, 0xe8, 0xcb, 0x94, + 0x90, 0x00, 0xfe, 0x79, 0xc4, 0xe3, 0xab, 0x00, 0xfe, 0x71, 0xc5, 0x28, + 0x47, 0x00, 0xfe, 0x68, 0x4d, 0x37, 0xb4, 0xc3, 0x49, 0x88, 0xc5, 0xd6, + 0xe6, 0x00, 0x1e, 0xd1, 0xc4, 0x87, 0xf5, 0x00, 0x1f, 0x00, 0xcd, 0x7f, + 0xc1, 0x08, 0x0b, 0xc1, 0xca, 0x71, 0x88, 0x08, 0x0b, 0xf0, 0x44, 0x05, + 0x14, 0xc3, 0x49, 0xa4, 0x42, 0x02, 0x09, 0xc3, 0x49, 0xba, 0x44, 0x57, + 0x1d, 0x43, 0x49, 0xcc, 0xd1, 0x52, 0x88, 0x08, 0x0a, 0xc1, 0x48, 0xb9, + 0xaa, 0x43, 0x49, 0xdc, 0x48, 0xbd, 0x62, 0xc3, 0x49, 0xee, 0x4a, 0x9f, + 0xea, 0x43, 0x4a, 0x01, 0xc3, 0x02, 0x9f, 0x08, 0x0a, 0xdb, 0x03, 0x4a, + 0x10, 0xcc, 0x37, 0x61, 0x08, 0x0b, 0x60, 0xd4, 0x3d, 0xf4, 0x08, 0x0a, + 0xe9, 0xd5, 0x37, 0x58, 0x08, 0x0b, 0x78, 0xc6, 0x0e, 0xe0, 0x01, 0x54, + 0x01, 0xc5, 0x00, 0xd4, 0x01, 0x54, 0x12, 0x03, 0x4a, 0x16, 0xc8, 0x23, + 0xa0, 0x01, 0x54, 0x71, 0xcf, 0x02, 0x78, 0x01, 0x54, 0x80, 0xe0, 0x00, + 0xc7, 0x01, 0x54, 0xa0, 0x8e, 0x08, 0x9b, 0x08, 0x94, 0x08, 0x9b, 0x00, + 0xc6, 0x42, 0xd4, 0x00, 0xe5, 0xf0, 0xc6, 0x42, 0xd4, 0x00, 0x87, 0xf0, + 0x97, 0x01, 0x60, 0xf9, 0x8b, 0x01, 0x61, 0x00, 0xc3, 0x87, 0xc2, 0x01, + 0x61, 0x60, 0x97, 0x01, 0x62, 0x79, 0x8b, 0x01, 0x62, 0x80, 0xc3, 0x87, + 0xc2, 0x01, 0x62, 0xe0, 0x94, 0x00, 0x5b, 0x00, 0x8e, 0x00, 0x5b, 0x08, + 0xc7, 0x0d, 0x04, 0x0f, 0x68, 0xa9, 0xc8, 0x4b, 0x94, 0x0f, 0x68, 0xf0, + 0xc7, 0x0d, 0x04, 0x0f, 0x68, 0xa1, 0xc8, 0x4b, 0x94, 0x0f, 0x68, 0xe8, + 0xc7, 0x0d, 0x04, 0x0f, 0x68, 0xb1, 0xc8, 0x4b, 0x94, 0x0f, 0x68, 0xf8, + 0xc7, 0x0d, 0x04, 0x0f, 0x68, 0xb9, 0xc8, 0x4b, 0x94, 0x0f, 0x69, 0x00, + 0xc4, 0xdc, 0x2d, 0x08, 0x7b, 0xd9, 0xc3, 0x77, 0x79, 0x08, 0x7b, 0xe8, + 0xc8, 0x0d, 0x03, 0x08, 0x79, 0x28, 0x0a, 0xc3, 0x4a, 0x1c, 0x19, 0xc3, + 0x4a, 0x28, 0xc2, 0x00, 0xc4, 0x08, 0x79, 0x10, 0xc3, 0x0d, 0x14, 0x08, + 0x79, 0x09, 0xc3, 0x09, 0x9e, 0x08, 0x79, 0x00, 0x46, 0x26, 0xf7, 0xc3, + 0x4a, 0x32, 0xc3, 0xb5, 0x3e, 0x08, 0x78, 0xd1, 0x15, 0xc3, 0x4a, 0x5f, + 0xd0, 0x5d, 0xe2, 0x08, 0x78, 0xc1, 0xc2, 0x00, 0x67, 0x08, 0x78, 0xa1, + 0x03, 0xc3, 0x4a, 0x69, 0xc3, 0x20, 0x18, 0x08, 0x78, 0x71, 0xc3, 0x00, + 0x4e, 0x08, 0x78, 0x69, 0xc6, 0xcf, 0xd7, 0x08, 0x78, 0x61, 0xc4, 0xe0, + 0xe7, 0x08, 0x78, 0x59, 0xc4, 0x4a, 0xb9, 0x08, 0x78, 0x51, 0xc2, 0x01, + 0x7f, 0x08, 0x78, 0x2b, 0x03, 0x4a, 0x73, 0xc5, 0x4a, 0xb3, 0x08, 0x78, + 0x41, 0xc3, 0x7e, 0x89, 0x08, 0x78, 0x39, 0xc5, 0x9c, 0xa2, 0x08, 0x78, + 0x21, 0xc4, 0xe3, 0x27, 0x08, 0x78, 0x10, 0xc5, 0x45, 0x69, 0x08, 0x53, + 0xf1, 0xc3, 0x05, 0x14, 0x08, 0x53, 0xe8, 0x0a, 0xc3, 0x4a, 0x79, 0xc3, + 0x1e, 0x1b, 0x08, 0x53, 0xb9, 0xc2, 0x39, 0x8b, 0x08, 0x53, 0x48, 0x42, + 0x00, 0xd0, 0xc3, 0x4a, 0x85, 0xc5, 0x40, 0x9b, 0x08, 0x53, 0xa8, 0xc4, + 0xdf, 0xc3, 0x08, 0x53, 0xb1, 0xc4, 0x9c, 0xa3, 0x08, 0x53, 0xa0, 0xc3, + 0x11, 0xef, 0x08, 0x53, 0x31, 0x03, 0x43, 0x4a, 0x91, 0xc2, 0x00, 0x8e, + 0x08, 0x53, 0x10, 0xc3, 0x00, 0xb6, 0x08, 0x53, 0x59, 0xc4, 0x9b, 0x90, + 0x08, 0x53, 0x68, 0xc3, 0x00, 0x49, 0x08, 0x53, 0x89, 0xc2, 0x17, 0xb6, + 0x08, 0x53, 0x90, 0xc7, 0x0d, 0x04, 0x08, 0x67, 0xf1, 0xc8, 0x4b, 0x94, + 0x08, 0x67, 0xf8, 0x96, 0x08, 0x67, 0x3b, 0x03, 0x4a, 0xa1, 0x9b, 0x08, + 0x66, 0xd1, 0x85, 0x08, 0x66, 0x28, 0x95, 0x08, 0x67, 0x80, 0x8a, 0x08, + 0x67, 0x49, 0x95, 0x08, 0x66, 0x30, 0x9b, 0x08, 0x67, 0x40, 0x9c, 0x08, + 0x67, 0x28, 0x92, 0x08, 0x67, 0x08, 0x9b, 0x08, 0x66, 0xb8, 0x9b, 0x08, + 0x66, 0x70, 0x96, 0x08, 0x65, 0x3b, 0x03, 0x4a, 0xa7, 0x9b, 0x08, 0x64, + 0xd1, 0x85, 0x08, 0x64, 0x28, 0x9b, 0x08, 0x65, 0x40, 0x9c, 0x08, 0x65, + 0x28, 0x92, 0x08, 0x65, 0x08, 0x9b, 0x08, 0x64, 0xb8, 0x9b, 0x08, 0x64, + 0x70, 0x95, 0x08, 0x64, 0x31, 0x8a, 0x08, 0x65, 0x48, 0x95, 0x08, 0x65, + 0x80, 0x8d, 0x08, 0x60, 0xe0, 0x96, 0x08, 0x62, 0x29, 0x95, 0x08, 0x61, + 0xf1, 0x94, 0x08, 0x61, 0xe1, 0x90, 0x08, 0x61, 0x21, 0x8e, 0x08, 0x61, + 0x01, 0x8d, 0x08, 0x60, 0xd1, 0x9b, 0x08, 0x60, 0xc1, 0x86, 0x08, 0x60, + 0x99, 0x89, 0x08, 0x60, 0x79, 0x84, 0x08, 0x60, 0x58, 0x8a, 0x08, 0x61, + 0xf8, 0x85, 0x08, 0x61, 0x41, 0x96, 0x08, 0x61, 0x31, 0x9b, 0x08, 0x61, + 0x51, 0x89, 0x08, 0x61, 0x68, 0x96, 0x08, 0x62, 0x31, 0x90, 0x08, 0x61, + 0x2b, 0x03, 0x4a, 0xad, 0x8d, 0x08, 0x60, 0xd9, 0x9b, 0x08, 0x60, 0xc9, + 0x89, 0x08, 0x60, 0x81, 0x84, 0x08, 0x60, 0x60, 0x96, 0x08, 0x61, 0x39, + 0x85, 0x08, 0x61, 0x49, 0x9b, 0x08, 0x61, 0x58, 0x8d, 0x08, 0x60, 0xe8, + 0xc2, 0x16, 0x1c, 0x08, 0x54, 0xd9, 0xc2, 0x00, 0x65, 0x08, 0x54, 0xc8, + 0x83, 0x08, 0x1d, 0x03, 0x03, 0x4a, 0xb1, 0x8b, 0x08, 0x1d, 0x09, 0x97, + 0x08, 0x1d, 0x11, 0x0d, 0xc3, 0x4a, 0xba, 0x09, 0xc3, 0x4a, 0xc2, 0x1a, + 0xc3, 0x4a, 0xca, 0xc2, 0x00, 0x64, 0x08, 0x1d, 0x41, 0x0c, 0xc3, 0x4a, + 0xd4, 0x16, 0xc3, 0x4a, 0xdc, 0x06, 0xc3, 0x4a, 0xea, 0xc2, 0x00, 0xb0, + 0x08, 0x1d, 0x89, 0x04, 0xc3, 0x4a, 0xf9, 0xc2, 0x00, 0x87, 0x08, 0x1d, + 0x99, 0x10, 0xc3, 0x4b, 0x06, 0x0f, 0xc3, 0x4b, 0x0e, 0xc2, 0x19, 0x2c, + 0x08, 0x1d, 0xc9, 0x18, 0xc3, 0x4b, 0x1a, 0x14, 0xc3, 0x4b, 0x22, 0xc2, + 0x00, 0xdb, 0x08, 0x1d, 0xf1, 0x15, 0xc3, 0x4b, 0x2a, 0xc2, 0x02, 0x1c, + 0x08, 0x1e, 0x01, 0xc2, 0x00, 0xd0, 0x08, 0x1e, 0x18, 0xc3, 0x05, 0x14, + 0x08, 0x1e, 0x89, 0x16, 0xc3, 0x4b, 0x3a, 0xc7, 0x0d, 0x04, 0x08, 0x1e, + 0xa8, 0xc3, 0xd3, 0x4c, 0x08, 0x1a, 0xb1, 0xc3, 0x02, 0x44, 0x08, 0x1a, + 0xc0, 0xc3, 0xc1, 0x4b, 0x08, 0x1b, 0x29, 0xc5, 0xdc, 0xf4, 0x08, 0x1b, + 0x30, 0x97, 0x08, 0x1b, 0x41, 0x8b, 0x08, 0x1b, 0x80, 0x96, 0x08, 0x1b, + 0x88, 0x8a, 0x08, 0x18, 0x71, 0x95, 0x08, 0x18, 0xf8, 0x95, 0x08, 0x18, + 0xd8, 0xce, 0x69, 0xa0, 0x0e, 0x7d, 0xa1, 0xc8, 0x4e, 0x4b, 0x0e, 0x7d, + 0x98, 0xc7, 0x4e, 0x43, 0x0e, 0x7d, 0xab, 0x03, 0x4b, 0x44, 0xc7, 0xa6, + 0x73, 0x0e, 0x7c, 0xa0, 0xce, 0x69, 0xa0, 0x0e, 0x7c, 0xc9, 0xc9, 0x92, + 0x8d, 0x0e, 0x7c, 0xc0, 0xc9, 0xac, 0xd5, 0x0e, 0x7d, 0x71, 0xc9, 0x92, + 0x8d, 0x0e, 0x7d, 0x69, 0xc8, 0xbc, 0xa2, 0x0e, 0x7d, 0x60, 0xca, 0xa6, + 0x70, 0x0e, 0x7d, 0x2b, 0x03, 0x4b, 0x48, 0xc9, 0x92, 0x8d, 0x0e, 0x7d, + 0x1a, 0x03, 0x4b, 0x4e, 0xd6, 0x2d, 0x0a, 0x0e, 0x7d, 0x00, 0xc9, 0x92, + 0x8d, 0x0e, 0x7c, 0xeb, 0x03, 0x4b, 0x54, 0xca, 0xa6, 0x70, 0x0e, 0x7c, + 0xe0, 0xcc, 0x87, 0x39, 0x0e, 0x7c, 0xf0, 0xc7, 0x92, 0x8f, 0x0e, 0x7c, + 0xb1, 0xcb, 0x92, 0x8b, 0x0e, 0x7c, 0xa8, 0xc8, 0x94, 0x9e, 0x0e, 0x7c, + 0x3b, 0x03, 0x4b, 0x5a, 0xd0, 0x5d, 0xb2, 0x0e, 0x7c, 0x71, 0xc5, 0xd4, + 0xca, 0x0e, 0x7c, 0x69, 0xc7, 0x78, 0xdb, 0x0e, 0x7c, 0x42, 0x03, 0x4b, + 0x60, 0xcb, 0x95, 0x56, 0x0e, 0x7c, 0x60, 0xc6, 0x78, 0xdc, 0x0e, 0x78, + 0xd9, 0x4b, 0x8e, 0xfa, 0x43, 0x4b, 0x66, 0xc5, 0x00, 0x2c, 0x0e, 0x78, + 0xa9, 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x48, 0xc8, 0xbc, 0x4a, 0x05, 0x4c, + 0x58, 0xc5, 0x00, 0x2c, 0x01, 0x2c, 0xe1, 0xc4, 0x00, 0x49, 0x01, 0x2c, + 0xd8, 0xc5, 0x00, 0x2c, 0x01, 0x2c, 0xd1, 0xd4, 0x3d, 0x54, 0x01, 0x2c, + 0xc8, 0x92, 0x05, 0x22, 0xa1, 0x9a, 0x05, 0x22, 0x90, 0x92, 0x05, 0x22, + 0x89, 0x9a, 0x05, 0x22, 0x79, 0x96, 0x05, 0x22, 0x70, 0x9a, 0x05, 0x22, + 0x40, 0x9a, 0x05, 0x22, 0x10, 0x9a, 0x05, 0x21, 0xc8, 0x92, 0x05, 0x21, + 0xc1, 0x9a, 0x05, 0x21, 0xb1, 0x96, 0x05, 0x21, 0xa8, 0x9a, 0x05, 0x1d, + 0x48, 0x9a, 0x05, 0x1d, 0x18, 0x9a, 0x05, 0x17, 0x89, 0x92, 0x05, 0x17, + 0x98, 0x9a, 0x05, 0x17, 0xc0, 0x9a, 0x05, 0x18, 0x08, 0x9a, 0x05, 0x18, + 0x38, 0x9a, 0x05, 0x03, 0xd1, 0x92, 0x05, 0x03, 0xe0, 0x9a, 0x05, 0x04, + 0x48, 0x9a, 0x05, 0x04, 0x78, 0x9a, 0x05, 0x0a, 0xa8, 0x9a, 0x05, 0x0b, + 0x30, 0x9a, 0x05, 0x21, 0x58, 0x92, 0x05, 0x21, 0x11, 0x9a, 0x05, 0x21, + 0x00, 0x92, 0x05, 0x20, 0xf9, 0x9a, 0x05, 0x20, 0xe9, 0x96, 0x05, 0x20, + 0xe0, 0x9a, 0x05, 0x1c, 0x90, 0x9a, 0x05, 0x1c, 0x60, 0x9a, 0x05, 0x1b, + 0xf0, 0x9a, 0x05, 0x1e, 0x20, 0x9a, 0x05, 0x1d, 0xf0, 0x92, 0x05, 0x1d, + 0x89, 0x9a, 0x05, 0x1d, 0x78, 0x9a, 0x05, 0x1a, 0x20, 0x9a, 0x05, 0x19, + 0x71, 0x92, 0x05, 0x19, 0x80, 0x9a, 0x05, 0x1b, 0xd0, 0x9a, 0x05, 0x1b, + 0xa0, 0x92, 0x05, 0x1b, 0x41, 0x9a, 0x05, 0x1b, 0x31, 0x96, 0x05, 0x1b, + 0x28, 0x92, 0x05, 0x16, 0xb9, 0x9a, 0x05, 0x16, 0xa9, 0x96, 0x05, 0x16, + 0xa0, 0x9a, 0x05, 0x17, 0x28, 0x9a, 0x05, 0x17, 0x58, 0x9a, 0x05, 0x1a, + 0xf8, 0x9a, 0x05, 0x1a, 0xc8, 0x9a, 0x05, 0x1a, 0x51, 0x92, 0x05, 0x1a, + 0x60, 0x96, 0x05, 0x12, 0x51, 0x9a, 0x05, 0x12, 0x59, 0x92, 0x05, 0x12, + 0x68, 0x9a, 0x05, 0x04, 0xa9, 0x92, 0x05, 0x04, 0xb8, 0x9a, 0x05, 0x04, + 0xe1, 0x92, 0x05, 0x04, 0xf0, 0x9a, 0x05, 0x05, 0x38, 0x9a, 0x05, 0x05, + 0x60, 0x96, 0x05, 0x0b, 0x61, 0x9a, 0x05, 0x0b, 0x69, 0x92, 0x05, 0x0b, + 0x78, 0x9a, 0x05, 0x0b, 0xa0, 0x9a, 0x05, 0x0c, 0xd9, 0x92, 0x05, 0x0c, + 0xe8, 0x9a, 0x05, 0x0d, 0x11, 0x92, 0x05, 0x0d, 0x20, 0x9a, 0x05, 0x0d, + 0x78, 0x9a, 0x05, 0x0d, 0xa8, 0x9a, 0x05, 0x12, 0x20, 0x9a, 0x05, 0x11, + 0xb1, 0x92, 0x05, 0x11, 0xc0, 0x96, 0x05, 0x02, 0xd1, 0x9a, 0x05, 0x02, + 0xd9, 0x92, 0x05, 0x02, 0xe8, 0x9a, 0x05, 0x03, 0x11, 0x92, 0x05, 0x03, + 0x20, 0x9a, 0x05, 0x03, 0x80, 0x9a, 0x05, 0x09, 0xd1, 0x92, 0x05, 0x09, + 0xe0, 0x9a, 0x05, 0x0a, 0x09, 0x92, 0x05, 0x0a, 0x18, 0x9a, 0x05, 0x0a, + 0x78, 0x9a, 0x05, 0x10, 0xb9, 0x92, 0x05, 0x10, 0xc8, 0x96, 0x05, 0x10, + 0xf1, 0x9a, 0x05, 0x10, 0xf9, 0x92, 0x05, 0x11, 0x08, 0x9a, 0x05, 0x11, + 0x70, 0x97, 0x00, 0xb0, 0xab, 0x03, 0x4b, 0x72, 0x8b, 0x00, 0xb0, 0xd0, + 0x91, 0x00, 0xae, 0x13, 0x03, 0x4b, 0x76, 0x83, 0x00, 0xae, 0x19, 0x8b, + 0x00, 0xae, 0x09, 0x87, 0x00, 0xae, 0x00, 0x91, 0x00, 0xac, 0xcb, 0x03, + 0x4b, 0x7a, 0xc2, 0x00, 0x28, 0x00, 0xc7, 0x51, 0x83, 0x00, 0xac, 0xd1, + 0x8b, 0x00, 0xac, 0xc1, 0x87, 0x00, 0xac, 0xb8, 0x83, 0x08, 0xd5, 0xd8, + 0x91, 0x08, 0xd5, 0xc8, 0x8b, 0x08, 0xd5, 0xb8, 0x83, 0x08, 0xd5, 0xa8, + 0x91, 0x08, 0xd5, 0x98, 0x8b, 0x08, 0xd5, 0x88, 0x83, 0x00, 0xa8, 0x70, + 0x10, 0xc3, 0x4b, 0x7e, 0x87, 0x00, 0xa2, 0x98, 0x83, 0x00, 0xb1, 0x69, + 0x8b, 0x00, 0xb1, 0x61, 0x87, 0x00, 0xb1, 0x53, 0x03, 0x4b, 0x8a, 0x91, + 0x00, 0xb1, 0x49, 0x97, 0x00, 0xb1, 0x40, 0x97, 0x00, 0xb2, 0x41, 0x91, + 0x00, 0xb2, 0x49, 0x87, 0x00, 0xb2, 0x53, 0x03, 0x4b, 0x8e, 0x8b, 0x00, + 0xb2, 0x61, 0x83, 0x00, 0xb2, 0x68, 0x87, 0x00, 0xb0, 0xc0, 0x97, 0x00, + 0xb0, 0xe1, 0x91, 0x00, 0xb0, 0xe9, 0x87, 0x00, 0xb0, 0xf3, 0x03, 0x4b, + 0x92, 0x8b, 0x00, 0xb1, 0x01, 0x83, 0x00, 0xb1, 0x08, 0x83, 0x00, 0xc7, + 0x81, 0x97, 0x00, 0xc7, 0x68, 0x83, 0x00, 0xc7, 0x78, 0x87, 0x00, 0xaf, + 0x90, 0x83, 0x00, 0xae, 0x49, 0x8b, 0x00, 0xae, 0x41, 0x87, 0x00, 0xae, + 0x33, 0x03, 0x4b, 0x96, 0x91, 0x00, 0xae, 0x29, 0x97, 0x00, 0xae, 0x20, + 0x15, 0xc3, 0x4b, 0x9a, 0x83, 0x00, 0xaf, 0x39, 0x8b, 0x00, 0xaf, 0x31, + 0x87, 0x00, 0xaf, 0x23, 0x03, 0x4b, 0xb1, 0x91, 0x00, 0xaf, 0x19, 0x97, + 0x00, 0xaf, 0x10, 0x83, 0x00, 0xb3, 0x01, 0x8b, 0x00, 0xb2, 0xf9, 0x87, + 0x00, 0xb2, 0xeb, 0x03, 0x4b, 0xb5, 0x97, 0x00, 0xb2, 0xd9, 0x91, 0x00, + 0xb2, 0xe0, 0x83, 0x00, 0xaf, 0x09, 0x8b, 0x00, 0xaf, 0x01, 0x87, 0x00, + 0xae, 0xf3, 0x03, 0x4b, 0xb9, 0x91, 0x00, 0xae, 0xe9, 0x97, 0x00, 0xae, + 0xe0, 0x0a, 0xc3, 0x4b, 0xbd, 0x97, 0x00, 0xb1, 0xd1, 0x91, 0x00, 0xb1, + 0xd9, 0x87, 0x00, 0xb1, 0xe3, 0x03, 0x4b, 0xd4, 0x8b, 0x00, 0xb1, 0xf1, + 0x83, 0x00, 0xb1, 0xf8, 0x87, 0x00, 0xb3, 0x20, 0x87, 0x00, 0xb0, 0x88, + 0x87, 0x00, 0xb0, 0x58, 0x87, 0x00, 0xb0, 0x28, 0x83, 0x00, 0xb0, 0x01, + 0x8b, 0x00, 0xaf, 0xf9, 0x87, 0x00, 0xaf, 0xeb, 0x03, 0x4b, 0xd8, 0x91, + 0x00, 0xaf, 0xe1, 0x97, 0x00, 0xaf, 0xd8, 0x83, 0x00, 0xaf, 0xd1, 0x8b, + 0x00, 0xaf, 0xc9, 0x87, 0x00, 0xaf, 0xbb, 0x03, 0x4b, 0xdc, 0x91, 0x00, + 0xaf, 0xb1, 0x97, 0x00, 0xaf, 0xa8, 0x87, 0x00, 0xaf, 0x58, 0x83, 0x00, + 0xae, 0xd9, 0x8b, 0x00, 0xae, 0xd1, 0x87, 0x00, 0xae, 0xc3, 0x03, 0x4b, + 0xe0, 0x91, 0x00, 0xae, 0xb9, 0x97, 0x00, 0xae, 0xb0, 0x87, 0x00, 0xae, + 0x98, 0x87, 0x00, 0xae, 0x68, 0x83, 0x00, 0xb1, 0x99, 0x8b, 0x00, 0xb1, + 0x91, 0x87, 0x00, 0xb1, 0x83, 0x03, 0x4b, 0xe4, 0x91, 0x00, 0xb1, 0x79, + 0x97, 0x00, 0xb1, 0x70, 0x87, 0x00, 0xb1, 0x28, 0x87, 0x00, 0xb2, 0x18, + 0x87, 0x00, 0xb2, 0x88, 0x97, 0x00, 0xb2, 0xa1, 0x91, 0x00, 0xb2, 0xa9, + 0x87, 0x00, 0xb2, 0xb3, 0x03, 0x4b, 0xe8, 0x8b, 0x00, 0xb2, 0xc1, 0x83, + 0x00, 0xb2, 0xc8, 0x83, 0x00, 0xaa, 0x6b, 0x03, 0x4b, 0xec, 0x91, 0x00, + 0xaa, 0x53, 0x03, 0x4b, 0xf0, 0x87, 0x00, 0xaa, 0x21, 0x19, 0x43, 0x4b, + 0xf4, 0x83, 0x00, 0xac, 0x69, 0x91, 0x00, 0xac, 0x61, 0x8b, 0x00, 0xac, + 0x59, 0x87, 0x00, 0xac, 0x51, 0xc3, 0x14, 0x72, 0x00, 0xaa, 0x78, 0xc4, + 0xdf, 0xc7, 0x00, 0xab, 0x49, 0x19, 0x43, 0x4c, 0x0d, 0x19, 0x43, 0x4c, + 0x26, 0x42, 0x15, 0xa6, 0xc3, 0x4c, 0x3f, 0x19, 0x43, 0x4c, 0x58, 0x19, + 0x43, 0x4c, 0x71, 0x91, 0x00, 0xa4, 0xcb, 0x03, 0x4c, 0x8a, 0x8b, 0x00, + 0xa4, 0xab, 0x03, 0x4c, 0x8e, 0x87, 0x00, 0xa4, 0x99, 0x83, 0x00, 0xa4, + 0xea, 0x03, 0x4c, 0x92, 0x83, 0x00, 0xa0, 0xc3, 0x03, 0x4c, 0x96, 0x91, + 0x00, 0xa0, 0x9b, 0x03, 0x4c, 0x9a, 0x8b, 0x00, 0xa0, 0x7b, 0x03, 0x4c, + 0x9e, 0x87, 0x00, 0xa0, 0x68, 0x83, 0x00, 0xa3, 0xfb, 0x03, 0x4c, 0xa2, + 0x87, 0x00, 0xa3, 0xa9, 0x8b, 0x00, 0xa3, 0xbb, 0x03, 0x4c, 0xa6, 0x91, + 0x00, 0xa3, 0xda, 0x03, 0x4c, 0xaa, 0x19, 0x43, 0x4c, 0xae, 0x87, 0x00, + 0xa6, 0x51, 0x83, 0x00, 0xa6, 0x62, 0x03, 0x4c, 0xc7, 0x19, 0xc3, 0x4c, + 0xcb, 0x83, 0x00, 0xac, 0xf1, 0x91, 0x00, 0xac, 0xe9, 0x8b, 0x00, 0xac, + 0xe1, 0x87, 0x00, 0xac, 0xd8, 0xcd, 0x61, 0x8b, 0x00, 0xa1, 0x19, 0xc2, + 0x00, 0x75, 0x00, 0xa1, 0x20, 0xc5, 0x31, 0xee, 0x00, 0xa1, 0x29, 0xd6, + 0x2e, 0xee, 0x00, 0xa1, 0x30, 0x91, 0x00, 0xc6, 0x68, 0x8b, 0x00, 0xc6, + 0x48, 0x8b, 0x0f, 0x01, 0x01, 0x97, 0x0f, 0x00, 0xf8, 0xc8, 0xb5, 0x5a, + 0x0e, 0x92, 0x19, 0xc6, 0xcd, 0xe5, 0x0e, 0x92, 0x10, 0xc2, 0x00, 0xb0, + 0x08, 0x9b, 0xa1, 0xc2, 0x07, 0xb2, 0x08, 0x9b, 0x99, 0xc2, 0x00, 0xc1, + 0x08, 0x9b, 0x91, 0xc2, 0x02, 0x2b, 0x08, 0x9b, 0x89, 0x83, 0x08, 0x9b, + 0x80, 0xc3, 0x22, 0xcb, 0x08, 0x9b, 0x61, 0x08, 0xc3, 0x4c, 0xe6, 0x16, + 0xc3, 0x4c, 0xf2, 0xc3, 0x05, 0x14, 0x08, 0x9b, 0x39, 0xc4, 0x15, 0xe7, + 0x08, 0x9b, 0x30, 0xcb, 0x8e, 0xef, 0x00, 0xee, 0x41, 0xc6, 0x60, 0xb1, + 0x00, 0xee, 0x28, 0xc6, 0x09, 0x01, 0x00, 0x18, 0x03, 0x03, 0x4c, 0xfe, + 0xc9, 0x2b, 0x5f, 0x00, 0x1a, 0x00, 0x00, 0xc3, 0x4d, 0x04, 0x45, 0x03, + 0xe3, 0x43, 0x4d, 0x10, 0xcb, 0x95, 0xe5, 0x01, 0x06, 0x89, 0x48, 0xbc, + 0x3a, 0x43, 0x4d, 0x1a, 0xcb, 0x93, 0xe0, 0x00, 0xd6, 0x21, 0xcb, 0x92, + 0xe3, 0x00, 0xd6, 0x10, 0x00, 0xc3, 0x4d, 0x26, 0x45, 0x03, 0xe3, 0x43, + 0x4d, 0x32, 0xc5, 0x00, 0xd4, 0x00, 0x18, 0xd1, 0xc5, 0x05, 0x02, 0x00, + 0x1a, 0x48, 0xc5, 0x05, 0x02, 0x00, 0x18, 0xe1, 0xc5, 0x00, 0xd4, 0x00, + 0x1a, 0x88, 0xc9, 0x20, 0xa8, 0x00, 0xef, 0xa1, 0xdb, 0x19, 0x11, 0x00, + 0xef, 0x80, 0xc9, 0x20, 0xa8, 0x00, 0xef, 0x99, 0xdb, 0x19, 0x11, 0x00, + 0xef, 0x68, 0xc7, 0xa6, 0x69, 0x00, 0xef, 0x19, 0xc5, 0x05, 0x02, 0x00, + 0xee, 0x50, 0x86, 0x00, 0xee, 0xc1, 0x96, 0x00, 0xd6, 0x71, 0x94, 0x00, + 0xd6, 0x69, 0x89, 0x00, 0xd6, 0x60, 0xce, 0x42, 0x34, 0x01, 0x07, 0x31, + 0x45, 0x02, 0x6d, 0x43, 0x4d, 0x3e, 0xc6, 0x05, 0x01, 0x00, 0xef, 0xe0, + 0x49, 0x60, 0xf4, 0xc3, 0x4d, 0x4a, 0xd0, 0x57, 0x92, 0x00, 0xd5, 0xe0, + 0xce, 0x6d, 0xf6, 0x00, 0xd5, 0xc1, 0xc7, 0x7d, 0xa5, 0x00, 0x19, 0xf8, + 0xc8, 0x65, 0xaa, 0x00, 0x1a, 0xd1, 0xd4, 0x3c, 0x64, 0x00, 0x1b, 0x10, + 0xc6, 0x05, 0x01, 0x00, 0x1a, 0xe0, 0xc6, 0x05, 0x01, 0x00, 0x1a, 0xf8, + 0x00, 0x43, 0x4d, 0x56, 0xc5, 0x00, 0x48, 0x00, 0xef, 0xd0, 0x00, 0x43, + 0x4d, 0x62, 0xc4, 0x18, 0x10, 0x05, 0x47, 0x39, 0xc2, 0x22, 0xcc, 0x05, + 0x47, 0x30, 0xc3, 0x0d, 0x14, 0x05, 0x47, 0x29, 0xc3, 0x09, 0x9e, 0x05, + 0x47, 0x20, 0xc4, 0x02, 0xde, 0x05, 0x47, 0x19, 0xc2, 0x02, 0xa0, 0x05, + 0x47, 0x10, 0xc9, 0x0f, 0x6e, 0x07, 0xf1, 0x71, 0xca, 0x09, 0xb7, 0x07, + 0xf1, 0x78, 0xc3, 0xe6, 0x62, 0x01, 0x6f, 0xa8, 0x87, 0x05, 0x34, 0xf9, + 0x83, 0x01, 0x6f, 0xe1, 0xc7, 0xc8, 0x00, 0x01, 0x6f, 0xf8, 0x83, 0x01, + 0x6f, 0x91, 0xc3, 0x1c, 0x63, 0x01, 0x6f, 0x98, 0xc6, 0x05, 0x01, 0x00, + 0x19, 0x78, 0xc3, 0x03, 0x0c, 0x01, 0x65, 0xa9, 0xc3, 0xb8, 0xf8, 0x01, + 0x65, 0xf9, 0x42, 0x01, 0xe2, 0xc3, 0x4d, 0x6e, 0xc3, 0x26, 0x1a, 0x01, + 0x66, 0x39, 0x0a, 0xc3, 0x4d, 0x7a, 0xc6, 0xd0, 0x3d, 0x01, 0x66, 0xb9, + 0xc3, 0xe5, 0x24, 0x01, 0x66, 0xc8, 0xc5, 0xda, 0x9c, 0x01, 0x66, 0xe9, + 0x10, 0xc3, 0x4d, 0x8d, 0xc3, 0xe4, 0xf4, 0x01, 0x67, 0x18, 0xc3, 0x03, + 0x0c, 0x01, 0x65, 0xa1, 0xc3, 0xb8, 0xf8, 0x01, 0x65, 0xf1, 0x42, 0x01, + 0xe2, 0xc3, 0x4d, 0x99, 0xc3, 0x26, 0x1a, 0x01, 0x66, 0x31, 0x0a, 0xc3, + 0x4d, 0xa5, 0xc6, 0xd0, 0x3d, 0x01, 0x66, 0xb1, 0xc3, 0xe5, 0x24, 0x01, + 0x66, 0xc0, 0xc5, 0xda, 0x9c, 0x01, 0x66, 0xe1, 0x10, 0xc3, 0x4d, 0xb8, + 0xc3, 0xe4, 0xf4, 0x01, 0x67, 0x10, 0x46, 0x00, 0x8b, 0x43, 0x4d, 0xc4, + 0xc2, 0x00, 0xd3, 0x01, 0x93, 0x70, 0xc2, 0x00, 0xd3, 0x01, 0x93, 0xc0, + 0xc2, 0x00, 0xd3, 0x01, 0x93, 0x80, 0xc2, 0x00, 0xd3, 0x01, 0x93, 0xc8, + 0xc2, 0x00, 0xd3, 0x01, 0x93, 0x98, 0xc2, 0x00, 0xd3, 0x01, 0x93, 0xd0, + 0x83, 0x01, 0x93, 0xa9, 0x97, 0x01, 0x93, 0xf0, 0xc2, 0x00, 0xd3, 0x01, + 0x93, 0xb0, 0xc2, 0x00, 0xd3, 0x01, 0x93, 0xb8, 0xc4, 0x18, 0x10, 0x01, + 0x23, 0x31, 0xc2, 0x22, 0xcc, 0x01, 0x23, 0x28, 0xc3, 0x0d, 0x14, 0x01, + 0x23, 0x21, 0xc3, 0x09, 0x9e, 0x01, 0x23, 0x18, 0xc4, 0x02, 0xde, 0x01, + 0x23, 0x11, 0xc2, 0x02, 0xa0, 0x01, 0x23, 0x08, 0x00, 0x43, 0x4d, 0xd0, + 0x00, 0x43, 0x4d, 0xee, 0xd0, 0x55, 0xa8, 0x01, 0x92, 0x60, 0x00, 0x43, + 0x4e, 0x0c, 0xc3, 0x18, 0x11, 0x01, 0x94, 0x31, 0xc4, 0xe3, 0x8b, 0x01, + 0x94, 0xc8, 0x90, 0x01, 0x94, 0x81, 0xc6, 0xd2, 0x8f, 0x01, 0x94, 0xe1, + 0xc7, 0xc8, 0x54, 0x01, 0x95, 0x60, 0xc3, 0x04, 0x20, 0x01, 0x94, 0x89, + 0xc3, 0xe5, 0x0f, 0x01, 0x95, 0x58, 0xc2, 0x00, 0x5f, 0x01, 0x94, 0x21, + 0xc2, 0x01, 0x19, 0x01, 0x94, 0x59, 0xc7, 0xc4, 0xf0, 0x01, 0x94, 0xb0, + 0xc2, 0x02, 0x6f, 0x01, 0x94, 0x41, 0xc3, 0x00, 0x2e, 0x01, 0x95, 0x80, + 0xc3, 0x01, 0x6f, 0x01, 0x94, 0x71, 0xc6, 0xca, 0xc1, 0x01, 0x95, 0x48, + 0xcc, 0x7b, 0x3d, 0x01, 0x94, 0xb9, 0xc2, 0x18, 0x8b, 0x01, 0x95, 0x11, + 0xc5, 0xc7, 0xc8, 0x01, 0x95, 0x18, 0x15, 0xc3, 0x4e, 0x2a, 0xc6, 0xce, + 0x75, 0x01, 0x95, 0x50, 0x17, 0xc3, 0x4e, 0x34, 0xc6, 0xcd, 0x79, 0x09, + 0x29, 0xf8, 0xc4, 0xe1, 0x9f, 0x09, 0x29, 0xf1, 0xc2, 0x05, 0x1d, 0x09, + 0x19, 0xd8, 0xc4, 0xdc, 0xae, 0x09, 0x1a, 0x71, 0x86, 0x09, 0x1a, 0x69, + 0xc9, 0xab, 0x25, 0x09, 0x1a, 0x60, 0xc3, 0x69, 0x97, 0x09, 0x1a, 0x51, + 0xc2, 0x01, 0x7f, 0x09, 0x1a, 0x48, 0xc2, 0x01, 0xe2, 0x09, 0x1a, 0x21, + 0x8f, 0x09, 0x1a, 0x19, 0xc2, 0x04, 0x2b, 0x09, 0x1a, 0x10, 0x97, 0x09, + 0x1a, 0x01, 0x83, 0x09, 0x19, 0xe2, 0x03, 0x4e, 0x3c, 0xc5, 0xcb, 0x88, + 0x09, 0x19, 0xc8, 0x17, 0xc3, 0x4e, 0x4a, 0xc3, 0x20, 0x18, 0x09, 0x19, + 0x81, 0xc2, 0x00, 0xd0, 0x09, 0x19, 0x79, 0x03, 0x43, 0x4e, 0x55, 0xc5, + 0x39, 0xc7, 0x09, 0x18, 0xc0, 0x97, 0x09, 0x17, 0xb9, 0x87, 0x09, 0x17, + 0xb0, 0xe0, 0x04, 0x47, 0x09, 0x17, 0x88, 0xda, 0x1a, 0xe6, 0x09, 0x18, + 0x20, 0xcb, 0x8d, 0xc6, 0x09, 0x29, 0xb9, 0xcc, 0x84, 0x21, 0x09, 0x29, + 0xb0, 0xc3, 0x40, 0xe7, 0x09, 0x29, 0xa9, 0xc4, 0xe3, 0xa3, 0x09, 0x29, + 0xa1, 0xc4, 0xc5, 0xa3, 0x09, 0x29, 0x98, 0x00, 0x43, 0x4e, 0x5f, 0x97, + 0x09, 0x15, 0xab, 0x03, 0x4e, 0x6b, 0xc3, 0x05, 0x9e, 0x09, 0x15, 0xa1, + 0xc4, 0x5d, 0xd2, 0x09, 0x15, 0x99, 0xc2, 0x02, 0x6f, 0x09, 0x15, 0x91, + 0xc4, 0x38, 0xa9, 0x09, 0x15, 0x89, 0xc3, 0x62, 0x19, 0x09, 0x15, 0x81, + 0x83, 0x09, 0x15, 0x78, 0xd6, 0x2b, 0xd6, 0x09, 0x16, 0xa9, 0xc4, 0x58, + 0xf5, 0x09, 0x16, 0xa0, 0xc3, 0x13, 0x51, 0x09, 0x16, 0x89, 0xc3, 0x49, + 0x41, 0x09, 0x16, 0x81, 0xc3, 0x65, 0x57, 0x09, 0x16, 0x79, 0xc6, 0xd0, + 0x97, 0x09, 0x16, 0x71, 0xc3, 0x04, 0x2a, 0x09, 0x16, 0x63, 0x03, 0x4e, + 0x71, 0xc3, 0x1a, 0xf4, 0x09, 0x16, 0x59, 0xc3, 0x03, 0x30, 0x09, 0x16, + 0x51, 0x04, 0xc3, 0x4e, 0x77, 0x83, 0x09, 0x16, 0x38, 0xc2, 0x03, 0x4e, + 0x09, 0x16, 0x29, 0x83, 0x09, 0x16, 0x20, 0x42, 0x01, 0x6f, 0xc3, 0x4e, + 0x83, 0x15, 0xc3, 0x4e, 0x8d, 0xc2, 0x00, 0xc4, 0x09, 0x29, 0x71, 0xc8, + 0x6a, 0x1e, 0x09, 0x1c, 0xb1, 0x17, 0xc3, 0x4e, 0x97, 0xc3, 0x20, 0x18, + 0x09, 0x14, 0xf1, 0xc2, 0x02, 0x2f, 0x09, 0x14, 0xe9, 0xc3, 0x81, 0xc8, + 0x09, 0x14, 0xe1, 0x0d, 0xc3, 0x4e, 0xad, 0xc2, 0x00, 0xd0, 0x09, 0x14, + 0xc9, 0xc2, 0x05, 0xc3, 0x09, 0x14, 0xbb, 0x03, 0x4e, 0xb9, 0x83, 0x09, + 0x14, 0xb0, 0xc9, 0xa9, 0xab, 0x09, 0x29, 0x68, 0x97, 0x09, 0x29, 0x53, + 0x03, 0x4e, 0xbd, 0xcc, 0x36, 0x5c, 0x09, 0x29, 0x49, 0x0f, 0xc3, 0x4e, + 0xd5, 0xc7, 0xc9, 0x26, 0x09, 0x29, 0x39, 0xc5, 0xdd, 0x0d, 0x09, 0x29, + 0x31, 0xc2, 0x00, 0x0a, 0x09, 0x29, 0x29, 0x09, 0xc3, 0x4e, 0xe1, 0xc8, + 0xb9, 0x4a, 0x09, 0x29, 0x11, 0xc3, 0x15, 0x2e, 0x09, 0x1c, 0x89, 0xc3, + 0x04, 0x65, 0x09, 0x12, 0xd3, 0x03, 0x4e, 0xec, 0x10, 0xc3, 0x4e, 0xf2, + 0x03, 0x43, 0x4e, 0xfc, 0xcf, 0x68, 0xcd, 0x09, 0x13, 0xc3, 0x03, 0x4f, + 0x09, 0x4a, 0xa4, 0xa4, 0x43, 0x4f, 0x0f, 0xd1, 0x56, 0xea, 0x09, 0x13, + 0x60, 0xc3, 0x5d, 0xd1, 0x09, 0x13, 0x41, 0xc3, 0x13, 0x51, 0x09, 0x13, + 0x33, 0x03, 0x4f, 0x4b, 0xc4, 0x4a, 0x0f, 0x09, 0x13, 0x29, 0xc3, 0x1a, + 0xf4, 0x09, 0x13, 0x20, 0x47, 0x03, 0x4c, 0x43, 0x4f, 0x51, 0xc2, 0x02, + 0x1c, 0x09, 0x11, 0xa9, 0xc3, 0x51, 0xdb, 0x09, 0x11, 0xa1, 0x83, 0x09, + 0x11, 0x98, 0x46, 0x03, 0x4d, 0xc3, 0x4f, 0x63, 0xc4, 0x39, 0xc8, 0x09, + 0x11, 0xe8, 0x45, 0x03, 0x4e, 0xc3, 0x4f, 0x76, 0xc3, 0x58, 0xf6, 0x09, + 0x10, 0x88, 0xc6, 0x6c, 0xd1, 0x09, 0x10, 0xab, 0x03, 0x4f, 0xc6, 0xc6, + 0x0b, 0x0a, 0x09, 0x10, 0xa0, 0xcd, 0x7c, 0x0c, 0x09, 0x10, 0xc9, 0xc9, + 0xb2, 0x3f, 0x09, 0x10, 0xc0, 0x47, 0x03, 0x4c, 0x43, 0x4f, 0xcc, 0x47, + 0x03, 0x4c, 0x43, 0x4f, 0xf7, 0xa2, 0x09, 0x27, 0xf1, 0xa0, 0x09, 0x27, + 0xe9, 0x9f, 0x09, 0x27, 0xe1, 0x9d, 0x09, 0x27, 0xd8, 0xa4, 0x09, 0x27, + 0xc1, 0x9d, 0x09, 0x27, 0xb8, 0xa6, 0x09, 0x27, 0x8b, 0x03, 0x50, 0x1d, + 0x9e, 0x09, 0x27, 0x80, 0xa1, 0x09, 0x27, 0x71, 0xa0, 0x09, 0x27, 0x68, + 0xa5, 0x09, 0x27, 0x61, 0xa4, 0x09, 0x27, 0x59, 0xa0, 0x09, 0x27, 0x50, + 0xa3, 0x09, 0x27, 0x49, 0xa2, 0x09, 0x27, 0x40, 0xa5, 0x09, 0x27, 0x31, + 0xa2, 0x09, 0x27, 0x29, 0x9d, 0x09, 0x27, 0x20, 0xa6, 0x09, 0x27, 0x19, + 0x9d, 0x09, 0x27, 0x10, 0xce, 0x71, 0x3e, 0x09, 0x26, 0xf1, 0x9d, 0x09, + 0x26, 0xe8, 0x9e, 0x09, 0x26, 0xd1, 0x9d, 0x09, 0x26, 0xc8, 0xa2, 0x09, + 0x26, 0xb9, 0x9e, 0x09, 0x26, 0xb0, 0x46, 0x03, 0x4d, 0xc3, 0x50, 0x23, + 0xc7, 0x0b, 0x09, 0x09, 0x0f, 0x58, 0xc4, 0x39, 0xc8, 0x09, 0x0f, 0x7b, + 0x03, 0x50, 0x6d, 0xc9, 0xa6, 0x49, 0x09, 0x0f, 0x6a, 0x03, 0x50, 0x73, + 0x9f, 0x09, 0x1c, 0x38, 0x8d, 0x09, 0x0b, 0x78, 0x86, 0x09, 0x0b, 0x88, + 0x94, 0x09, 0x0a, 0xf1, 0xc3, 0x03, 0x47, 0x09, 0x0a, 0xe9, 0x86, 0x09, + 0x0a, 0xe0, 0x97, 0x09, 0x0c, 0x1b, 0x03, 0x50, 0x79, 0xc2, 0x02, 0xfb, + 0x09, 0x0c, 0x11, 0x87, 0x09, 0x0c, 0x09, 0x83, 0x09, 0x0c, 0x00, 0x94, + 0x09, 0x0b, 0xf8, 0x8f, 0x09, 0x1c, 0x18, 0x86, 0x09, 0x1c, 0x09, 0xc2, + 0xe6, 0x97, 0x09, 0x0b, 0x60, 0xc2, 0x01, 0xe2, 0x09, 0x1c, 0x03, 0x03, + 0x50, 0x7d, 0xc2, 0x38, 0x6a, 0x09, 0x0b, 0x40, 0x94, 0x09, 0x0b, 0x2b, + 0x03, 0x50, 0x81, 0xc7, 0x5d, 0x9b, 0x09, 0x0b, 0x21, 0x8e, 0x09, 0x0b, + 0x18, 0xa0, 0x09, 0x1b, 0xf9, 0x9f, 0x09, 0x0a, 0xd8, 0xc9, 0xaa, 0xf8, + 0x09, 0x0a, 0xd0, 0xcb, 0x97, 0xdf, 0x09, 0x0b, 0xc8, 0x46, 0x25, 0xd4, + 0x43, 0x50, 0x87, 0xe0, 0x03, 0x47, 0x09, 0x0c, 0xf0, 0xc3, 0x51, 0xdb, + 0x09, 0x09, 0x01, 0xca, 0xa3, 0x82, 0x09, 0x08, 0xf8, 0xc8, 0x6a, 0x1e, + 0x09, 0x26, 0x61, 0xcd, 0x79, 0x68, 0x09, 0x08, 0xe1, 0xc3, 0x20, 0x18, + 0x09, 0x08, 0xd9, 0xc3, 0x32, 0xbf, 0x09, 0x08, 0xca, 0x03, 0x50, 0x99, + 0x16, 0xc3, 0x50, 0x9f, 0xcd, 0x47, 0xaa, 0x09, 0x08, 0x90, 0xc2, 0x00, + 0xb0, 0x09, 0x08, 0x79, 0xcb, 0x92, 0x12, 0x09, 0x08, 0x71, 0xc3, 0x04, + 0x2a, 0x09, 0x08, 0x69, 0xc9, 0x5d, 0x99, 0x09, 0x08, 0x61, 0xca, 0xa3, + 0xb4, 0x09, 0x08, 0x58, 0xc4, 0xde, 0xe7, 0x09, 0x26, 0x41, 0x15, 0xc3, + 0x50, 0xab, 0x10, 0xc3, 0x50, 0xb9, 0x0f, 0xc3, 0x50, 0xc9, 0x0e, 0xc3, + 0x50, 0xd9, 0x0d, 0xc3, 0x50, 0xe6, 0x0a, 0xc3, 0x50, 0xf7, 0x09, 0xc3, + 0x51, 0x07, 0x07, 0xc3, 0x51, 0x15, 0x06, 0xc3, 0x51, 0x29, 0x04, 0xc3, + 0x51, 0x38, 0x03, 0xc3, 0x51, 0x45, 0x97, 0x09, 0x07, 0x53, 0x03, 0x51, + 0x61, 0xc4, 0x38, 0xb4, 0x09, 0x07, 0x49, 0xc2, 0x00, 0xb0, 0x09, 0x07, + 0x11, 0x0b, 0x43, 0x51, 0x68, 0xcd, 0x79, 0xc3, 0x09, 0x07, 0xd1, 0xc9, + 0xaf, 0x78, 0x09, 0x07, 0xc9, 0xc4, 0x58, 0xf5, 0x09, 0x07, 0xc0, 0x97, + 0x09, 0x25, 0xa9, 0xc2, 0x01, 0x7f, 0x09, 0x1b, 0xc0, 0x86, 0x09, 0x05, + 0xa1, 0x9f, 0x09, 0x05, 0x98, 0x97, 0x09, 0x05, 0x91, 0x8b, 0x09, 0x05, + 0x89, 0x83, 0x09, 0x05, 0x7a, 0x03, 0x51, 0x74, 0xc2, 0x36, 0x6f, 0x09, + 0x05, 0x71, 0xc5, 0x45, 0xae, 0x09, 0x05, 0x62, 0x03, 0x51, 0x7a, 0xc5, + 0x39, 0xc7, 0x09, 0x05, 0x50, 0xc5, 0x39, 0xc7, 0x09, 0x05, 0x40, 0x90, + 0x09, 0x05, 0x29, 0xc9, 0xaa, 0xef, 0x09, 0x05, 0x1a, 0x03, 0x51, 0x80, + 0x95, 0x09, 0x25, 0x98, 0x8e, 0x09, 0x25, 0x88, 0xc5, 0x58, 0xf4, 0x09, + 0x04, 0xc8, 0xc6, 0x6a, 0x20, 0x09, 0x25, 0x41, 0xc2, 0x01, 0x7f, 0x09, + 0x25, 0x38, 0x8b, 0x09, 0x25, 0x21, 0xc2, 0x00, 0xcb, 0x09, 0x25, 0x19, + 0xc3, 0x02, 0x2c, 0x09, 0x25, 0x10, 0xcc, 0x84, 0xed, 0x09, 0x25, 0x09, + 0x03, 0x43, 0x51, 0x86, 0x17, 0xc3, 0x51, 0x93, 0xc5, 0x45, 0xae, 0x09, + 0x24, 0xd0, 0x8b, 0x09, 0x24, 0xc1, 0x83, 0x09, 0x24, 0xb8, 0x8b, 0x09, + 0x24, 0xa3, 0x03, 0x51, 0xa0, 0x83, 0x09, 0x24, 0x98, 0xc2, 0x05, 0x1d, + 0x09, 0x24, 0x89, 0xc2, 0x00, 0x74, 0x09, 0x24, 0x80, 0xc2, 0x01, 0xe2, + 0x09, 0x24, 0x73, 0x03, 0x51, 0xac, 0xc4, 0x99, 0xe3, 0x09, 0x24, 0x68, + 0xc5, 0x39, 0xc7, 0x09, 0x04, 0x38, 0x17, 0xc3, 0x51, 0xb2, 0xc4, 0x38, + 0xb4, 0x09, 0x03, 0x59, 0xc2, 0x00, 0xba, 0x09, 0x03, 0x51, 0xcc, 0x36, + 0x5c, 0x09, 0x03, 0x49, 0xc2, 0x02, 0x6f, 0x09, 0x03, 0x41, 0x0e, 0xc3, + 0x51, 0xbe, 0xc3, 0x32, 0xbf, 0x09, 0x03, 0x19, 0xc2, 0x01, 0x29, 0x09, + 0x03, 0x0b, 0x03, 0x51, 0xc9, 0xc2, 0x00, 0xd0, 0x09, 0x03, 0x01, 0x09, + 0xc3, 0x51, 0xcf, 0x04, 0xc3, 0x51, 0xe3, 0x03, 0x43, 0x51, 0xed, 0xc2, + 0x5d, 0xd4, 0x09, 0x24, 0x09, 0xc3, 0x26, 0x1a, 0x09, 0x00, 0x98, 0xc5, + 0x58, 0xf4, 0x09, 0x24, 0x00, 0xc3, 0x0f, 0xd6, 0x09, 0x00, 0x89, 0xc7, + 0x6a, 0x1f, 0x09, 0x00, 0x80, 0xc7, 0x5d, 0x9b, 0x09, 0x00, 0x71, 0x8e, + 0x09, 0x00, 0x68, 0xc8, 0x0d, 0x2d, 0x09, 0x01, 0xe3, 0x03, 0x51, 0xf9, + 0x16, 0x43, 0x51, 0xff, 0xce, 0x71, 0xca, 0x09, 0x14, 0x71, 0x46, 0x03, + 0x4d, 0x43, 0x52, 0x05, 0x9f, 0x09, 0x14, 0x40, 0x84, 0x09, 0x14, 0x30, + 0x97, 0x09, 0x14, 0x19, 0x8b, 0x09, 0x14, 0x10, 0x84, 0x09, 0x14, 0x08, + 0xe0, 0x04, 0x27, 0x09, 0x0a, 0x48, 0xca, 0xa5, 0x12, 0x00, 0x24, 0x58, + 0xc3, 0xe5, 0x3c, 0x00, 0x28, 0x39, 0xc2, 0x1c, 0x52, 0x00, 0x28, 0x19, + 0x87, 0x00, 0x28, 0x08, 0xc9, 0x20, 0xb1, 0x00, 0x27, 0xd8, 0xc3, 0x2d, + 0x1a, 0x05, 0x32, 0x99, 0x83, 0x05, 0x32, 0xb9, 0xd1, 0x51, 0x78, 0x05, + 0x32, 0xe9, 0x87, 0x00, 0x23, 0x29, 0xca, 0x51, 0x7f, 0x00, 0x23, 0x49, + 0xc7, 0xc8, 0x00, 0x00, 0x23, 0x68, 0x06, 0xc3, 0x52, 0x17, 0xc5, 0x1d, + 0x88, 0x00, 0x26, 0x10, 0xc8, 0x25, 0xfb, 0x00, 0x25, 0xb9, 0xc8, 0x20, + 0xa9, 0x00, 0x27, 0xa8, 0xca, 0xa5, 0x12, 0x00, 0x24, 0x50, 0xc3, 0xe5, + 0x3c, 0x00, 0x28, 0x31, 0xc2, 0x1c, 0x52, 0x00, 0x28, 0x11, 0x87, 0x00, + 0x28, 0x00, 0xc9, 0x20, 0xb1, 0x00, 0x27, 0xd0, 0xc8, 0x20, 0xa9, 0x00, + 0x27, 0xa1, 0xc8, 0x25, 0xfb, 0x00, 0x25, 0xb0, 0xc3, 0x2d, 0x1a, 0x05, + 0x32, 0x91, 0x83, 0x05, 0x32, 0xb1, 0xd1, 0x51, 0x78, 0x05, 0x32, 0xe1, + 0x87, 0x00, 0x23, 0x21, 0xca, 0x51, 0x7f, 0x00, 0x23, 0x41, 0xc7, 0xc8, + 0x00, 0x00, 0x23, 0x60, 0x06, 0xc3, 0x52, 0x23, 0xc5, 0x1d, 0x88, 0x00, + 0x26, 0x08, 0xc7, 0xc7, 0xeb, 0x00, 0x6d, 0x39, 0xc6, 0x8e, 0x9c, 0x00, + 0x6d, 0x68, 0xc7, 0xc4, 0x25, 0x00, 0x6d, 0x49, 0xc6, 0x8e, 0x9c, 0x00, + 0x6d, 0x78, 0xc7, 0xc6, 0x32, 0x00, 0x6c, 0xd9, 0xc7, 0xca, 0x29, 0x00, + 0x6c, 0xe9, 0xc7, 0xc7, 0xdd, 0x00, 0x6d, 0x09, 0xc7, 0xc7, 0xc1, 0x00, + 0x6d, 0x19, 0x16, 0xc3, 0x52, 0x2f, 0x06, 0xc3, 0x52, 0x3b, 0xc7, 0xc8, + 0x1c, 0x00, 0x6d, 0xa9, 0xc7, 0x8e, 0x9b, 0x00, 0x6d, 0xb8, 0xca, 0x63, + 0xc8, 0x00, 0x6e, 0xe1, 0xcf, 0x63, 0xc3, 0x00, 0x6e, 0xe9, 0xcb, 0x93, + 0x51, 0x00, 0x6e, 0xf0, 0x49, 0x20, 0x36, 0x43, 0x52, 0x47, 0x49, 0x20, + 0x36, 0x43, 0x52, 0x53, 0x49, 0x20, 0x36, 0x43, 0x52, 0x5f, 0x4c, 0x87, + 0x45, 0xc3, 0x52, 0x6b, 0x87, 0x0e, 0xcd, 0x20, 0x49, 0x20, 0x36, 0x43, + 0x52, 0x77, 0x49, 0x20, 0x36, 0x43, 0x52, 0x83, 0xc8, 0x3b, 0xec, 0x0e, + 0xc8, 0xf1, 0xc6, 0x24, 0x3b, 0x0e, 0xc8, 0xe0, 0xc4, 0x17, 0x93, 0x0e, + 0xd3, 0x2b, 0x03, 0x52, 0x8f, 0xc6, 0x5a, 0xfc, 0x0e, 0xd3, 0x1a, 0x03, + 0x52, 0x95, 0xcb, 0x57, 0x45, 0x0e, 0xcc, 0x31, 0xc6, 0x00, 0x58, 0x0e, + 0xcc, 0x29, 0xc6, 0x24, 0x3b, 0x0e, 0xcc, 0x20, 0xcb, 0x57, 0x45, 0x0e, + 0xcc, 0x19, 0xc6, 0x00, 0x58, 0x0e, 0xcc, 0x11, 0xc6, 0x24, 0x3b, 0x0e, + 0xcc, 0x08, 0xcb, 0x57, 0x45, 0x0e, 0xca, 0x81, 0xc6, 0x00, 0x58, 0x0e, + 0xca, 0x79, 0xc6, 0x24, 0x3b, 0x0e, 0xca, 0x70, 0xcb, 0x57, 0x45, 0x0e, + 0xca, 0x69, 0xc6, 0x00, 0x58, 0x0e, 0xca, 0x61, 0xc6, 0x24, 0x3b, 0x0e, + 0xca, 0x58, 0xc7, 0x04, 0x12, 0x0e, 0xd1, 0x49, 0xc5, 0x19, 0x2f, 0x0e, + 0xd1, 0x38, 0x00, 0x43, 0x52, 0x9b, 0x00, 0x43, 0x52, 0xa7, 0x00, 0x43, + 0x52, 0xb3, 0x00, 0x43, 0x52, 0xe3, 0xc5, 0x06, 0x82, 0x0e, 0xc0, 0x2b, + 0x03, 0x53, 0x02, 0xd2, 0x13, 0x89, 0x0e, 0xc6, 0xa3, 0x03, 0x53, 0x06, + 0x45, 0x00, 0x9d, 0xc3, 0x53, 0x0a, 0x47, 0x13, 0x95, 0x43, 0x53, 0x16, + 0x00, 0x43, 0x53, 0x25, 0x00, 0x43, 0x53, 0x68, 0x92, 0x0e, 0xc3, 0x6b, + 0x03, 0x53, 0x80, 0xc6, 0xbc, 0x5c, 0x0e, 0xc3, 0xaa, 0x03, 0x53, 0x84, + 0x00, 0x43, 0x53, 0x88, 0x00, 0x43, 0x53, 0xa9, 0xcb, 0x13, 0x90, 0x0e, + 0xc5, 0x91, 0xc9, 0xad, 0x9b, 0x0e, 0xc4, 0xa9, 0x46, 0x0e, 0xce, 0xc3, + 0x53, 0xc4, 0xc8, 0xbc, 0x62, 0x0e, 0xc3, 0xc9, 0xd3, 0x46, 0x57, 0x0e, + 0xc2, 0xb1, 0xc5, 0x06, 0x82, 0x0e, 0xc0, 0x18, 0x4b, 0x40, 0xb3, 0xc3, + 0x53, 0xd0, 0x4a, 0x18, 0xa5, 0x43, 0x53, 0xdc, 0xc6, 0x00, 0x58, 0x0e, + 0xcf, 0xa1, 0xc6, 0x24, 0x3b, 0x0e, 0xcf, 0x98, 0xc6, 0x00, 0x58, 0x0e, + 0xcf, 0x81, 0xc6, 0x24, 0x3b, 0x0e, 0xcf, 0x78, 0xc5, 0x17, 0x14, 0x0e, + 0xce, 0xf1, 0x15, 0xc3, 0x53, 0xee, 0x48, 0x20, 0x37, 0x43, 0x53, 0xfa, + 0xc6, 0x00, 0x58, 0x0e, 0xcf, 0x61, 0xc6, 0x24, 0x3b, 0x0e, 0xcf, 0x48, + 0xc6, 0x00, 0x58, 0x0e, 0xcf, 0x59, 0xc6, 0x24, 0x3b, 0x0e, 0xcf, 0x40, + 0xc6, 0x00, 0x58, 0x0e, 0xcf, 0x51, 0xc6, 0x24, 0x3b, 0x0e, 0xcf, 0x38, + 0xca, 0x91, 0x42, 0x0e, 0xcb, 0x49, 0x49, 0x45, 0x27, 0x43, 0x54, 0x06, + 0x46, 0x20, 0xe5, 0xc3, 0x54, 0x1b, 0x48, 0xb7, 0x3a, 0x43, 0x54, 0x27, + 0x46, 0x20, 0xe5, 0xc3, 0x54, 0x33, 0x48, 0xb7, 0x3a, 0x43, 0x54, 0x45, + 0xc8, 0xbb, 0x22, 0x0e, 0xce, 0xc9, 0xc5, 0x17, 0x14, 0x0e, 0xce, 0xbb, + 0x03, 0x54, 0x51, 0xc6, 0x01, 0xdb, 0x0e, 0xce, 0xb1, 0xc5, 0x03, 0x13, + 0x0e, 0xce, 0xa9, 0x48, 0x20, 0x37, 0x43, 0x54, 0x57, 0xc5, 0x17, 0x14, + 0x0e, 0xcb, 0xb1, 0xc6, 0x01, 0xdb, 0x0e, 0xcb, 0xa9, 0xc5, 0x03, 0x13, + 0x0e, 0xcb, 0xa0, 0xc5, 0x17, 0x14, 0x0e, 0xcb, 0xd1, 0xc6, 0x01, 0xdb, + 0x0e, 0xcb, 0xc9, 0xc5, 0x03, 0x13, 0x0e, 0xcb, 0xc0, 0xca, 0x91, 0x42, + 0x0e, 0xcb, 0x91, 0xc8, 0x51, 0x1b, 0x0e, 0xcb, 0x88, 0xcb, 0x91, 0x41, + 0x0e, 0xcb, 0x68, 0xc6, 0x00, 0x58, 0x0e, 0xcf, 0x91, 0xc6, 0x24, 0x3b, + 0x0e, 0xcf, 0x88, 0xc6, 0x00, 0x58, 0x0e, 0xcf, 0x71, 0xc6, 0x24, 0x3b, + 0x0e, 0xcf, 0x68, 0x4e, 0x6d, 0x86, 0xc3, 0x54, 0x63, 0x48, 0x20, 0x37, + 0xc3, 0x54, 0x75, 0x46, 0x0e, 0xd4, 0x43, 0x54, 0x81, 0xc6, 0x00, 0x58, + 0x0e, 0xcf, 0x31, 0xc6, 0x24, 0x3b, 0x0e, 0xcf, 0x20, 0xc6, 0x00, 0x58, + 0x0e, 0xcf, 0x29, 0xc6, 0x24, 0x3b, 0x0e, 0xcf, 0x18, 0xc5, 0xdd, 0x17, + 0x0e, 0xcd, 0x79, 0xca, 0x9e, 0x8c, 0x0e, 0xcd, 0x40, 0xc7, 0x00, 0x57, + 0x0e, 0xcc, 0xc0, 0xc5, 0xdd, 0x17, 0x0e, 0xcd, 0x71, 0xca, 0x9e, 0x8c, + 0x0e, 0xcd, 0x38, 0x00, 0xc3, 0x54, 0x8d, 0x48, 0xbb, 0x7a, 0x43, 0x54, + 0x9d, 0xc5, 0x17, 0x14, 0x0e, 0xca, 0x09, 0xc6, 0x01, 0xdb, 0x0e, 0xca, + 0x01, 0xc5, 0x03, 0x13, 0x0e, 0xc9, 0xf8, 0xc8, 0x5a, 0x49, 0x0e, 0xc9, + 0xf1, 0xc5, 0x17, 0x14, 0x0e, 0xc9, 0xe9, 0xc6, 0x01, 0xdb, 0x0e, 0xc9, + 0xe1, 0xc5, 0x03, 0x13, 0x0e, 0xc9, 0xd8, 0xca, 0x91, 0x42, 0x0e, 0xc9, + 0x71, 0x49, 0x45, 0x27, 0x43, 0x54, 0xa9, 0xc5, 0x17, 0x14, 0x0e, 0xca, + 0x21, 0xc6, 0x01, 0xdb, 0x0e, 0xca, 0x19, 0xc5, 0x03, 0x13, 0x0e, 0xca, + 0x10, 0xc5, 0x17, 0x14, 0x0e, 0xc9, 0xd1, 0xc6, 0x01, 0xdb, 0x0e, 0xc9, + 0xc9, 0xc5, 0x03, 0x13, 0x0e, 0xc9, 0xc0, 0xcb, 0x91, 0x41, 0x0e, 0xc9, + 0xb8, 0xcb, 0x91, 0x41, 0x0e, 0xc9, 0x90, 0xc5, 0x17, 0x14, 0x0e, 0xcb, + 0x1b, 0x03, 0x54, 0xbe, 0xc6, 0x01, 0xdb, 0x0e, 0xcb, 0x11, 0xc5, 0x03, + 0x13, 0x0e, 0xcb, 0x08, 0xc5, 0x17, 0x14, 0x0e, 0xca, 0xfb, 0x03, 0x54, + 0xc4, 0xc6, 0x01, 0xdb, 0x0e, 0xca, 0xf1, 0xc5, 0x03, 0x13, 0x0e, 0xca, + 0xe8, 0xc2, 0x00, 0x15, 0x0e, 0xca, 0xe0, 0xc2, 0x00, 0x15, 0x0e, 0xca, + 0xc0, 0x4c, 0x8b, 0xc5, 0xc3, 0x54, 0xca, 0xc5, 0x03, 0x13, 0x0e, 0xc9, + 0x11, 0xc5, 0x17, 0x14, 0x0e, 0xc9, 0x08, 0xc4, 0x94, 0xa5, 0x0e, 0xd2, + 0x61, 0xc8, 0xbe, 0x0a, 0x0e, 0xd2, 0x58, 0xc4, 0x94, 0xa5, 0x0e, 0xd2, + 0x49, 0xc8, 0xbe, 0x0a, 0x0e, 0xd2, 0x40, 0xcf, 0x63, 0x96, 0x08, 0xae, + 0xb9, 0xce, 0x6f, 0x8c, 0x08, 0xae, 0xb1, 0xc4, 0x5d, 0x32, 0x08, 0xae, + 0xa8, 0xcd, 0x44, 0xbb, 0x08, 0xae, 0x91, 0x49, 0xb1, 0x43, 0x43, 0x54, + 0xd6, 0xd0, 0x5f, 0x22, 0x08, 0xae, 0x71, 0xd0, 0x58, 0x22, 0x08, 0xae, + 0x69, 0xc9, 0x44, 0xbf, 0x08, 0xae, 0x60, 0x8e, 0x08, 0x8d, 0xd8, 0x94, + 0x08, 0x8d, 0xc8, 0x8e, 0x08, 0x8c, 0x60, 0x94, 0x08, 0x8c, 0x50, 0xd9, + 0x1e, 0x50, 0x01, 0x2f, 0x51, 0xd8, 0x25, 0x5b, 0x01, 0x58, 0xa8, 0xd3, + 0x1e, 0x56, 0x01, 0x2f, 0x49, 0xd3, 0x43, 0x26, 0x01, 0x2d, 0x38, 0xd2, + 0x47, 0x6f, 0x01, 0x2d, 0x41, 0xd3, 0x1e, 0x56, 0x01, 0x58, 0xa0, 0xc6, + 0x0b, 0x18, 0x01, 0x9e, 0x71, 0xc4, 0xd9, 0x12, 0x01, 0x9d, 0x30, 0xc8, + 0x0b, 0x08, 0x01, 0x9d, 0x40, 0xc2, 0xe5, 0xa5, 0x0f, 0x91, 0xc9, 0xc2, + 0xe6, 0x91, 0x0f, 0x91, 0x01, 0xc2, 0x83, 0xe4, 0x0f, 0x90, 0xe0, 0xc2, + 0x71, 0x49, 0x0f, 0x91, 0xa1, 0xc2, 0xe6, 0x81, 0x0f, 0x91, 0x28, 0xc2, + 0xe6, 0xa5, 0x0f, 0x91, 0x71, 0xc2, 0x09, 0x02, 0x0f, 0x90, 0x90, 0xc2, + 0xe6, 0x85, 0x0f, 0x90, 0xb9, 0xc2, 0xe6, 0x93, 0x0f, 0x90, 0xa8, 0xc2, + 0xe0, 0x7e, 0x0f, 0x91, 0xc1, 0xc2, 0x7e, 0x13, 0x0f, 0x91, 0x10, 0xa5, + 0x0f, 0x91, 0xb9, 0xa6, 0x0f, 0x91, 0xb0, 0xc2, 0xe6, 0x5a, 0x0f, 0x91, + 0x89, 0xc2, 0xe5, 0x7d, 0x0f, 0x91, 0x39, 0xc2, 0xe6, 0x8d, 0x0f, 0x90, + 0x80, 0xc2, 0x3c, 0xd4, 0x0f, 0x91, 0x79, 0xc2, 0xe5, 0x7e, 0x0f, 0x91, + 0x40, 0xc2, 0xae, 0x95, 0x0f, 0x90, 0xf9, 0xc2, 0xe6, 0xa3, 0x0f, 0x90, + 0xd8, 0xa6, 0x0f, 0x91, 0x51, 0x9d, 0x0f, 0x91, 0x48, 0xc6, 0x06, 0xe1, + 0x01, 0x20, 0xb8, 0xc2, 0x00, 0xc1, 0x00, 0x43, 0x29, 0x83, 0x00, 0x43, + 0x20, 0xd3, 0x43, 0x5f, 0x0f, 0xc9, 0x69, 0xcc, 0x87, 0xf9, 0x0f, 0xcb, + 0x80, 0xe0, 0x0a, 0x07, 0x01, 0x17, 0xe0, 0xe0, 0x0a, 0x07, 0x01, 0x17, + 0xa0, 0xc8, 0x4b, 0x94, 0x01, 0x0b, 0xf9, 0xc7, 0x0d, 0x04, 0x01, 0x0b, + 0xe8, 0xc2, 0x00, 0x5f, 0x01, 0x0b, 0xa3, 0x03, 0x54, 0xe2, 0xc3, 0x45, + 0x6b, 0x01, 0x0b, 0xe0, 0xc4, 0x22, 0x44, 0x01, 0x0b, 0xd9, 0x91, 0x01, + 0x0b, 0x88, 0xc3, 0x77, 0x79, 0x08, 0x43, 0x91, 0xc4, 0xdc, 0x2d, 0x08, + 0x43, 0x78, 0xc4, 0x02, 0xde, 0x05, 0x47, 0xb1, 0xc2, 0x02, 0xa0, 0x05, + 0x47, 0xa8, 0xc5, 0x01, 0xa2, 0x01, 0x5b, 0x1b, 0x03, 0x54, 0xe8, 0xcc, + 0x2e, 0x48, 0x01, 0x5a, 0x69, 0xcc, 0x82, 0xb9, 0x01, 0x5b, 0x69, 0xcd, + 0x7c, 0xa8, 0x01, 0x5c, 0x38, 0x47, 0x13, 0x6d, 0xc3, 0x54, 0xec, 0xc6, + 0x10, 0x9d, 0x01, 0x4a, 0xc9, 0xc8, 0xae, 0xbc, 0x01, 0x4b, 0x08, 0xc8, + 0xae, 0xbc, 0x01, 0x4a, 0xe9, 0xc6, 0x10, 0x9d, 0x01, 0x4a, 0xa8, 0xd8, + 0x22, 0x5b, 0x0f, 0xc0, 0x59, 0x46, 0x03, 0x13, 0xc3, 0x54, 0xf6, 0xcd, + 0x75, 0xa6, 0x01, 0x0e, 0xf9, 0xd0, 0x59, 0x42, 0x01, 0x0d, 0xa9, 0x44, + 0x08, 0xba, 0xc3, 0x55, 0x02, 0xd1, 0x01, 0x68, 0x01, 0x48, 0x41, 0xd9, + 0x1f, 0xf9, 0x0f, 0xc0, 0x39, 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0xb9, 0xcc, + 0x84, 0xb1, 0x0f, 0xc4, 0xd8, 0xc4, 0x18, 0x10, 0x01, 0x27, 0xd9, 0xc2, + 0x22, 0xcc, 0x01, 0x27, 0xd0, 0xc3, 0x0d, 0x14, 0x01, 0x27, 0xc9, 0xc3, + 0x09, 0x9e, 0x01, 0x27, 0xc0, 0xc4, 0x02, 0xde, 0x01, 0x27, 0xb9, 0xc2, + 0x02, 0xa0, 0x01, 0x27, 0xb0, 0xcf, 0x05, 0x98, 0x01, 0x15, 0x59, 0xce, + 0x34, 0xd4, 0x01, 0x57, 0x28, 0xd0, 0x0f, 0xc6, 0x01, 0x00, 0xf1, 0xd9, + 0x0f, 0xbd, 0x01, 0x72, 0x10, 0xca, 0x9f, 0xa4, 0x01, 0x4c, 0x81, 0xcd, + 0x7f, 0x80, 0x01, 0x4c, 0x70, 0x45, 0x00, 0x8c, 0xc3, 0x55, 0x0e, 0xd3, + 0x41, 0x71, 0x01, 0x4c, 0xe1, 0xc7, 0x00, 0x38, 0x01, 0x80, 0x4b, 0x03, + 0x55, 0x1a, 0xd3, 0x19, 0x81, 0x01, 0x70, 0x01, 0xda, 0x19, 0x7a, 0x01, + 0x70, 0x08, 0x00, 0x43, 0x55, 0x20, 0xcf, 0x2c, 0x35, 0x01, 0x48, 0x01, + 0xd6, 0x2d, 0x62, 0x01, 0x48, 0x09, 0x16, 0x43, 0x55, 0x32, 0xc5, 0x01, + 0x4a, 0x01, 0x0e, 0x09, 0x00, 0x43, 0x55, 0x41, 0xc5, 0x01, 0x4a, 0x01, + 0x0e, 0x01, 0x00, 0x43, 0x55, 0x59, 0xd2, 0x05, 0xd4, 0x0f, 0xc0, 0x11, + 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0x90, 0x46, 0x00, 0x8b, 0x43, 0x55, 0x6b, + 0xc9, 0x03, 0xc8, 0x01, 0x58, 0x71, 0xc7, 0x09, 0x0d, 0x01, 0x58, 0x78, + 0xcf, 0x6a, 0x8f, 0x01, 0x5a, 0x41, 0xce, 0x33, 0x92, 0x01, 0x5a, 0x60, + 0xc6, 0x01, 0x73, 0x01, 0x0e, 0x79, 0xcf, 0x2c, 0x35, 0x01, 0x48, 0x18, + 0x87, 0x05, 0x28, 0x88, 0x91, 0x05, 0x2c, 0x10, 0xc2, 0x00, 0x87, 0x05, + 0x30, 0x81, 0xc2, 0x02, 0x2b, 0x05, 0x30, 0x89, 0xc3, 0x19, 0xe1, 0x05, + 0x30, 0x91, 0xc2, 0x01, 0xc3, 0x05, 0x31, 0x51, 0xc2, 0x00, 0x58, 0x05, + 0x31, 0x58, 0x87, 0x05, 0x28, 0xf9, 0x90, 0x05, 0x30, 0x28, 0x91, 0x05, + 0x2c, 0x80, 0xc3, 0xe5, 0x36, 0x0b, 0x54, 0x99, 0xc3, 0xe5, 0x06, 0x0b, + 0x54, 0x90, 0x9a, 0x0b, 0x54, 0xd9, 0x93, 0x0b, 0x54, 0xd1, 0x85, 0x0b, + 0x54, 0xc9, 0x9c, 0x0b, 0x54, 0xc0, 0x42, 0x06, 0x46, 0xc3, 0x55, 0x77, + 0xc7, 0xc4, 0x02, 0x00, 0x70, 0x30, 0x91, 0x00, 0x70, 0x59, 0xc3, 0x14, + 0x6b, 0x00, 0x71, 0x41, 0xc2, 0x00, 0xe4, 0x00, 0x71, 0x50, 0x83, 0x00, + 0x71, 0x91, 0x8f, 0x00, 0x71, 0x99, 0x87, 0x00, 0x72, 0x09, 0x46, 0xce, + 0x87, 0x43, 0x55, 0x8f, 0x8b, 0x00, 0x71, 0xa8, 0x87, 0x00, 0x71, 0xb3, + 0x03, 0x55, 0x9b, 0x97, 0x00, 0x71, 0xc8, 0x42, 0x00, 0x8e, 0xc3, 0x55, + 0x9f, 0xca, 0xa5, 0x30, 0x00, 0x70, 0x89, 0xc7, 0xc6, 0xbe, 0x00, 0x70, + 0x90, 0x42, 0x00, 0xb7, 0xc3, 0x55, 0xaf, 0xc7, 0xc1, 0x54, 0x00, 0x71, + 0x00, 0xc8, 0xb9, 0x42, 0x00, 0x71, 0x89, 0xc2, 0x13, 0x4c, 0x00, 0x72, + 0x41, 0x16, 0xc3, 0x55, 0xbb, 0xc8, 0xb5, 0x3a, 0x00, 0x72, 0x58, 0x94, + 0x00, 0x63, 0x00, 0x8e, 0x00, 0x63, 0x08, 0xc3, 0xad, 0xf4, 0x00, 0x78, + 0xd1, 0xc4, 0x97, 0x19, 0x00, 0x78, 0xd9, 0xc3, 0x60, 0x54, 0x00, 0x78, + 0xe0, 0xc3, 0xad, 0xf4, 0x00, 0x78, 0xe9, 0xc4, 0x97, 0x19, 0x00, 0x78, + 0xf1, 0xc3, 0x60, 0x54, 0x00, 0x7e, 0x78, 0xcd, 0x00, 0xfa, 0x07, 0xe8, + 0x09, 0xca, 0x26, 0xf7, 0x07, 0xe8, 0xe8, 0x0b, 0xc3, 0x55, 0xc7, 0x45, + 0x00, 0x8c, 0x43, 0x55, 0xd3, 0x0b, 0xc3, 0x55, 0xe5, 0x45, 0x00, 0x8c, + 0x43, 0x55, 0xf1, 0xca, 0x26, 0xf7, 0x07, 0xe8, 0xf1, 0xcd, 0x00, 0xfa, + 0x07, 0xe8, 0x10, 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x01, 0xca, 0x26, 0xf7, + 0x07, 0xe8, 0xe0, 0xcd, 0x00, 0xfa, 0x07, 0xe7, 0xf9, 0xca, 0x26, 0xf7, + 0x07, 0xe8, 0xd8, 0x0b, 0xc3, 0x55, 0xfd, 0x45, 0x00, 0x8c, 0x43, 0x56, + 0x09, 0x0b, 0xc3, 0x56, 0x15, 0xd3, 0x43, 0x72, 0x07, 0xed, 0xf8, 0x0b, + 0xc3, 0x56, 0x21, 0x45, 0x00, 0x8c, 0x43, 0x56, 0x2d, 0xcc, 0x00, 0xfb, + 0x07, 0xe2, 0x89, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0xb8, 0x44, 0x2b, 0xb9, + 0xc3, 0x56, 0x39, 0x0a, 0xc3, 0x56, 0x45, 0x45, 0x19, 0x60, 0xc3, 0x56, + 0x51, 0x4d, 0x06, 0x5a, 0xc3, 0x56, 0x67, 0x45, 0x30, 0xc1, 0xc3, 0x56, + 0x73, 0x45, 0x50, 0xf0, 0xc3, 0x56, 0x89, 0x44, 0x72, 0xf0, 0x43, 0x56, + 0x99, 0x45, 0x4d, 0x40, 0xc3, 0x56, 0xa5, 0x45, 0x52, 0x4a, 0xc3, 0x56, + 0xaf, 0x46, 0xd2, 0xa7, 0xc3, 0x56, 0xb9, 0xde, 0x07, 0x29, 0x07, 0xe3, + 0x18, 0xcd, 0x00, 0xfa, 0x07, 0xe7, 0xd9, 0xca, 0x26, 0xf7, 0x07, 0xe8, + 0xb8, 0x0b, 0xc3, 0x56, 0xc5, 0x45, 0x00, 0x8c, 0xc3, 0x56, 0xd1, 0xcb, + 0x64, 0x7b, 0x07, 0xe7, 0x38, 0x0b, 0xc3, 0x56, 0xe3, 0xcb, 0x64, 0x7b, + 0x07, 0xe9, 0xb1, 0x45, 0x00, 0x8c, 0x43, 0x56, 0xef, 0x43, 0x02, 0x98, + 0xc3, 0x56, 0xfb, 0x43, 0x2b, 0xba, 0x43, 0x57, 0x0b, 0x0b, 0xc3, 0x57, + 0x17, 0xcb, 0x64, 0x7b, 0x07, 0xe9, 0xa1, 0x45, 0x00, 0x8c, 0x43, 0x57, + 0x23, 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x51, 0xcd, 0x00, 0xfa, 0x07, 0xe8, + 0x70, 0xcd, 0x00, 0xfa, 0x07, 0xe7, 0xe1, 0xca, 0x26, 0xf7, 0x07, 0xe8, + 0xc0, 0x45, 0x19, 0x60, 0xc3, 0x57, 0x2f, 0x44, 0x19, 0x6a, 0xc3, 0x57, + 0x39, 0x44, 0x72, 0xf0, 0xc3, 0x57, 0x43, 0xd1, 0x50, 0xf0, 0x07, 0xe5, + 0x91, 0x4d, 0x06, 0x5a, 0xc3, 0x57, 0x4f, 0x44, 0x2b, 0xb9, 0x43, 0x57, + 0x5b, 0x42, 0x00, 0xdb, 0xc3, 0x57, 0x67, 0x03, 0x43, 0x57, 0x71, 0xcc, + 0x00, 0xfb, 0x07, 0xe1, 0x61, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xe8, 0xce, + 0x43, 0x77, 0x07, 0xeb, 0xd1, 0xd7, 0x26, 0xea, 0x07, 0xeb, 0xd9, 0xcf, + 0x67, 0x65, 0x07, 0xeb, 0xc8, 0xcd, 0x00, 0xfa, 0x07, 0xe7, 0xb9, 0xca, + 0x26, 0xf7, 0x07, 0xe8, 0x98, 0x0b, 0xc3, 0x57, 0x7d, 0x45, 0x00, 0x8c, + 0x43, 0x57, 0x89, 0x0b, 0xc3, 0x57, 0x9b, 0x4a, 0x74, 0x6e, 0x43, 0x57, + 0xa7, 0xca, 0x26, 0xf7, 0x07, 0xe8, 0xa1, 0xcd, 0x00, 0xfa, 0x07, 0xe7, + 0xc0, 0x5e, 0x0d, 0xba, 0xc3, 0x57, 0xb3, 0x4e, 0x6e, 0xba, 0x43, 0x57, + 0xbf, 0x0b, 0xc3, 0x57, 0xcb, 0xcc, 0x82, 0xa1, 0x07, 0xea, 0x69, 0xcf, + 0x65, 0x1c, 0x07, 0xef, 0xb8, 0x44, 0x2b, 0xb9, 0xc3, 0x57, 0xd5, 0x4d, + 0x06, 0x5a, 0xc3, 0x57, 0xe1, 0x45, 0x19, 0x60, 0xc3, 0x57, 0xed, 0x45, + 0x50, 0xf1, 0x43, 0x57, 0xfd, 0x44, 0x2b, 0xb9, 0xc3, 0x58, 0x09, 0x4d, + 0x06, 0x5a, 0xc3, 0x58, 0x15, 0xcf, 0x60, 0x8a, 0x07, 0xe3, 0xc9, 0x45, + 0x19, 0x60, 0xc3, 0x58, 0x21, 0xcf, 0x69, 0x81, 0x07, 0xe3, 0xb9, 0xce, + 0x72, 0xf0, 0x07, 0xe3, 0xb1, 0xd2, 0x4a, 0xbd, 0x07, 0xe0, 0x89, 0xcf, + 0x64, 0x77, 0x07, 0xe7, 0x30, 0xe0, 0x07, 0x27, 0x07, 0xe2, 0xd8, 0xca, + 0x26, 0xf7, 0x07, 0xe3, 0xa9, 0xcd, 0x00, 0xfa, 0x07, 0xe0, 0x80, 0xca, + 0x26, 0xf7, 0x07, 0xe3, 0xa1, 0xcd, 0x00, 0xfa, 0x07, 0xe0, 0x78, 0xca, + 0x26, 0xf7, 0x07, 0xe3, 0x91, 0x0b, 0xc3, 0x58, 0x31, 0xcb, 0x64, 0x7b, + 0x07, 0xe7, 0x19, 0x45, 0x00, 0x8c, 0x43, 0x58, 0x3d, 0x0b, 0xc3, 0x58, + 0x5b, 0x45, 0x00, 0x8c, 0x43, 0x58, 0x67, 0x43, 0x02, 0x98, 0xc3, 0x58, + 0x79, 0x43, 0x2b, 0xba, 0x43, 0x58, 0x83, 0x0b, 0xc3, 0x58, 0x8f, 0x45, + 0x00, 0x8c, 0x43, 0x58, 0x9b, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x89, 0xcc, + 0x10, 0xb4, 0x07, 0xe6, 0xf0, 0x4f, 0x08, 0x0b, 0xc3, 0x58, 0xad, 0x42, + 0x00, 0x8f, 0x43, 0x58, 0xf5, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0xc1, 0xcb, + 0x10, 0xb5, 0x07, 0xe6, 0xe8, 0x45, 0x19, 0x60, 0xc3, 0x58, 0xff, 0xce, + 0x43, 0x77, 0x07, 0xed, 0x80, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0xa9, 0xcb, + 0x10, 0xb5, 0x07, 0xe6, 0xd0, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x79, 0xcc, + 0x10, 0xb4, 0x07, 0xe6, 0xb0, 0x0b, 0xc3, 0x59, 0x0b, 0x45, 0x00, 0x8c, + 0x43, 0x59, 0x17, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0x71, 0xcb, 0x10, 0xb5, + 0x07, 0xe6, 0xa8, 0xce, 0x43, 0x77, 0x07, 0xec, 0xd1, 0xd7, 0x26, 0xea, + 0x07, 0xec, 0xd8, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0x59, 0xcb, 0x10, 0xb5, + 0x07, 0xe6, 0x90, 0xd7, 0x26, 0xea, 0x07, 0xec, 0xc9, 0x44, 0x19, 0x6a, + 0xc3, 0x59, 0x29, 0xce, 0x43, 0x77, 0x07, 0xee, 0x39, 0x45, 0x19, 0x60, + 0x43, 0x59, 0x35, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x61, 0xca, 0x26, 0xf7, + 0x07, 0xe4, 0x11, 0x0b, 0xc3, 0x59, 0x41, 0x45, 0x00, 0x8c, 0x43, 0x59, + 0x4d, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x59, 0xca, 0x26, 0xf7, 0x07, 0xe4, + 0x09, 0x0b, 0x43, 0x59, 0x59, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x21, 0xcd, + 0x00, 0xfa, 0x07, 0xe1, 0xf0, 0x48, 0x06, 0x5f, 0xc3, 0x59, 0x65, 0xca, + 0x26, 0xf7, 0x07, 0xe4, 0x01, 0xcd, 0x00, 0xfa, 0x07, 0xe1, 0xb8, 0xcc, + 0x00, 0xfb, 0x07, 0xe1, 0xd1, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x30, 0xcc, + 0x00, 0xfb, 0x07, 0xe1, 0xc9, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x28, 0xcc, + 0x00, 0xfb, 0x07, 0xe1, 0xc1, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x20, 0xcc, + 0x00, 0xfb, 0x07, 0xe0, 0xd9, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0x60, 0xcc, + 0x00, 0xfb, 0x07, 0xe0, 0xc9, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0x58, 0xca, + 0x26, 0xf7, 0x07, 0xe8, 0xf9, 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x18, 0xca, + 0x26, 0xf7, 0x07, 0xe9, 0x01, 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x20, 0xca, + 0x26, 0xf7, 0x07, 0xe4, 0x31, 0xcd, 0x00, 0xfa, 0x07, 0xe2, 0x18, 0x4c, + 0x82, 0x59, 0xc3, 0x59, 0x71, 0x46, 0x08, 0x09, 0x43, 0x59, 0x7d, 0xcc, + 0x00, 0xfb, 0x07, 0xe2, 0x11, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x60, 0x44, + 0x19, 0x6a, 0xc3, 0x59, 0x89, 0xce, 0x43, 0x77, 0x07, 0xed, 0x68, 0xcc, + 0x00, 0xfb, 0x07, 0xe2, 0x09, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x58, 0xca, + 0x26, 0xf7, 0x07, 0xec, 0x29, 0xcc, 0x10, 0xb4, 0x07, 0xec, 0x30, 0x0b, + 0xc3, 0x59, 0x95, 0x45, 0x00, 0x8c, 0x43, 0x59, 0xa1, 0xcc, 0x00, 0xfb, + 0x07, 0xe1, 0xf9, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x48, 0x45, 0x30, 0xc1, + 0xc3, 0x59, 0xb3, 0x45, 0x19, 0x60, 0xc3, 0x59, 0xbf, 0xce, 0x43, 0x77, + 0x07, 0xed, 0x60, 0x44, 0x2b, 0xb9, 0xc3, 0x59, 0xcb, 0x4d, 0x06, 0x5a, + 0xc3, 0x59, 0xd7, 0x45, 0x19, 0x60, 0xc3, 0x59, 0xe3, 0x45, 0x50, 0xf1, + 0x43, 0x59, 0xed, 0xe0, 0x00, 0xe7, 0x07, 0xef, 0x88, 0xcc, 0x00, 0xfb, + 0x07, 0xe1, 0x81, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x08, 0xcc, 0x00, 0xfb, + 0x07, 0xe1, 0x79, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x00, 0xca, 0x26, 0xf7, + 0x07, 0xeb, 0xe1, 0xcc, 0x10, 0xb4, 0x07, 0xeb, 0xe8, 0xca, 0x26, 0xf7, + 0x07, 0xe3, 0x79, 0xcd, 0x00, 0xfa, 0x07, 0xe0, 0x50, 0xca, 0x26, 0xf7, + 0x07, 0xe3, 0x71, 0xcd, 0x00, 0xfa, 0x07, 0xe0, 0x48, 0xca, 0x26, 0xf7, + 0x07, 0xe3, 0x61, 0x0b, 0xc3, 0x59, 0xf9, 0xcb, 0x64, 0x7b, 0x07, 0xe7, + 0x08, 0x0b, 0xc3, 0x5a, 0x05, 0xd3, 0x43, 0x72, 0x07, 0xec, 0xf0, 0x43, + 0x02, 0x98, 0xc3, 0x5a, 0x11, 0x43, 0x2b, 0xba, 0x43, 0x5a, 0x1b, 0xcc, + 0x00, 0xfb, 0x07, 0xe0, 0x29, 0xcb, 0x10, 0xb5, 0x07, 0xe4, 0xe0, 0xc2, + 0x04, 0xc6, 0x07, 0xea, 0x11, 0x17, 0x43, 0x5a, 0x27, 0xc8, 0xb8, 0x52, + 0x07, 0xea, 0x79, 0xc7, 0x6d, 0x34, 0x07, 0xea, 0x00, 0xd5, 0x1c, 0xbf, + 0x07, 0xe2, 0x49, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x40, 0x0b, 0xc3, 0x5a, + 0x34, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x49, 0xd3, 0x43, 0x72, 0x07, 0xed, + 0x88, 0x0b, 0xc3, 0x5a, 0x40, 0x45, 0x00, 0x8c, 0x43, 0x5a, 0x4c, 0x0b, + 0xc3, 0x5a, 0x5e, 0x45, 0x00, 0x8c, 0x43, 0x5a, 0x6a, 0x0b, 0xc3, 0x5a, + 0x7c, 0x45, 0x00, 0x8c, 0x43, 0x5a, 0x88, 0xcc, 0x00, 0xfb, 0x07, 0xe1, + 0x21, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xb0, 0xca, 0x26, 0xf7, 0x07, 0xeb, + 0x79, 0xcc, 0x10, 0xb4, 0x07, 0xeb, 0x80, 0xcc, 0x00, 0xfb, 0x07, 0xe1, + 0x19, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xa8, 0xd7, 0x26, 0xea, 0x07, 0xeb, + 0x71, 0xce, 0x43, 0x77, 0x07, 0xed, 0x58, 0xcb, 0x10, 0xb5, 0x07, 0xdf, + 0xd9, 0xcc, 0x00, 0xfb, 0x07, 0xdf, 0xc8, 0x00, 0x43, 0x5a, 0xa0, 0x00, + 0x43, 0x5a, 0xb6, 0x00, 0x43, 0x5a, 0xcc, 0x00, 0x43, 0x5a, 0xe2, 0x00, + 0x43, 0x5a, 0xf8, 0x00, 0x43, 0x5b, 0x08, 0x00, 0x43, 0x5b, 0x1e, 0x00, + 0x43, 0x5b, 0x34, 0xc3, 0x0f, 0x9a, 0x00, 0x45, 0xe3, 0x03, 0x5b, 0x40, + 0xc4, 0x3a, 0x01, 0x00, 0x45, 0xe9, 0xc3, 0xb1, 0x0d, 0x00, 0x45, 0xd8, + 0x00, 0x43, 0x5b, 0x46, 0x00, 0x43, 0x5b, 0x5c, 0x00, 0x43, 0x5b, 0x75, + 0x88, 0x00, 0x32, 0x1b, 0x03, 0x5b, 0x8b, 0xca, 0xa2, 0x1a, 0x00, 0x31, + 0x00, 0xc2, 0x13, 0xc0, 0x00, 0x36, 0x4b, 0x03, 0x5b, 0x8f, 0xc2, 0x49, + 0x0c, 0x00, 0x36, 0x2a, 0x03, 0x5b, 0x93, 0x00, 0x43, 0x5b, 0x97, 0x00, + 0xc3, 0x5b, 0xa7, 0xc2, 0x16, 0x1c, 0x00, 0x34, 0x3a, 0x03, 0x5b, 0xbd, + 0x00, 0xc3, 0x5b, 0xc1, 0xc2, 0x16, 0x1c, 0x00, 0x33, 0xd2, 0x03, 0x5b, + 0xd7, 0x00, 0xc3, 0x5b, 0xdb, 0xc2, 0x16, 0x1c, 0x00, 0x33, 0xfa, 0x03, + 0x5b, 0xef, 0x00, 0x43, 0x5b, 0xf3, 0xc6, 0xd0, 0x91, 0x00, 0x44, 0x31, + 0xc2, 0x00, 0x65, 0x00, 0x31, 0x83, 0x03, 0x5c, 0x09, 0xc2, 0x16, 0x1c, + 0x00, 0x31, 0x5a, 0x03, 0x5c, 0x0d, 0x4b, 0x88, 0x05, 0xc3, 0x5c, 0x11, + 0xcb, 0x64, 0x7b, 0x07, 0xda, 0xc9, 0x0b, 0xc3, 0x5c, 0x1b, 0xca, 0x26, + 0xf7, 0x07, 0xda, 0xb8, 0x00, 0x43, 0x5c, 0x27, 0x00, 0x43, 0x5c, 0x37, + 0x00, 0x43, 0x5c, 0x56, 0x00, 0x43, 0x5c, 0x62, 0x00, 0x43, 0x5c, 0x74, + 0x00, 0x43, 0x5c, 0x84, 0x00, 0xc3, 0x5c, 0x90, 0xc2, 0x16, 0x1c, 0x00, + 0x34, 0x02, 0x03, 0x5c, 0xa6, 0x00, 0x43, 0x5c, 0xaa, 0x60, 0x06, 0x47, + 0x43, 0x5c, 0xba, 0xd0, 0x5f, 0x02, 0x00, 0x33, 0xbb, 0x03, 0x5c, 0xc6, + 0xca, 0x26, 0xf7, 0x07, 0xde, 0xc1, 0xcd, 0x00, 0xfa, 0x07, 0xde, 0xb8, + 0x45, 0x00, 0x8c, 0xc3, 0x5c, 0xcc, 0xca, 0x26, 0xf7, 0x07, 0xf6, 0xb1, + 0x0b, 0xc3, 0x5c, 0xd8, 0xcb, 0x64, 0x7b, 0x07, 0xf6, 0xc0, 0xcb, 0x64, + 0x7b, 0x07, 0xdf, 0x39, 0x0b, 0xc3, 0x5c, 0xe4, 0xca, 0x26, 0xf7, 0x07, + 0xdf, 0x28, 0x00, 0x43, 0x5c, 0xf0, 0x00, 0x43, 0x5d, 0x02, 0x00, 0x43, + 0x5d, 0x12, 0x00, 0x43, 0x5d, 0x28, 0x00, 0x43, 0x5d, 0x3e, 0x8e, 0x00, + 0x31, 0x7b, 0x03, 0x5d, 0x54, 0xc3, 0x01, 0xce, 0x00, 0x34, 0x63, 0x03, + 0x5d, 0x58, 0x86, 0x00, 0x31, 0xb2, 0x03, 0x5d, 0x5c, 0x8e, 0x00, 0x34, + 0x43, 0x03, 0x5d, 0x60, 0xc3, 0x01, 0xce, 0x00, 0x34, 0x6a, 0x03, 0x5d, + 0x64, 0x00, 0x43, 0x5d, 0x68, 0x00, 0x43, 0x5d, 0x74, 0xc3, 0xb1, 0x0d, + 0x00, 0x35, 0x09, 0xc3, 0x0f, 0x9a, 0x00, 0x33, 0x79, 0xc3, 0x85, 0xf5, + 0x00, 0x33, 0x70, 0xca, 0x26, 0xf7, 0x07, 0xde, 0xf9, 0xcd, 0x00, 0xfa, + 0x07, 0xde, 0xf0, 0x00, 0x43, 0x5d, 0x84, 0x45, 0x00, 0x8c, 0xc3, 0x5d, + 0x94, 0xcd, 0x00, 0xfa, 0x07, 0xf7, 0x69, 0xca, 0x26, 0xf7, 0x07, 0xf7, + 0x70, 0x00, 0x43, 0x5d, 0xb5, 0xca, 0x26, 0xf7, 0x07, 0xde, 0xd1, 0xcd, + 0x00, 0xfa, 0x07, 0xde, 0xc8, 0x00, 0xc3, 0x5d, 0xcb, 0xc3, 0xe6, 0x23, + 0x00, 0x35, 0x8a, 0x03, 0x5d, 0xdb, 0x00, 0x43, 0x5d, 0xdf, 0x00, 0x43, + 0x5d, 0xfe, 0x8a, 0x00, 0x31, 0x6b, 0x03, 0x5e, 0x0e, 0xc3, 0x08, 0x0b, + 0x00, 0x31, 0x0a, 0x03, 0x5e, 0x12, 0x00, 0x43, 0x5e, 0x18, 0x00, 0x43, + 0x5e, 0x40, 0x16, 0xc3, 0x5e, 0x52, 0x15, 0xc3, 0x5e, 0x62, 0xc3, 0x72, + 0xf0, 0x0f, 0x75, 0x99, 0xc3, 0x0f, 0x9a, 0x0f, 0x75, 0x91, 0xc3, 0xb1, + 0x0d, 0x0f, 0x75, 0x81, 0xc3, 0x03, 0x0c, 0x0f, 0x75, 0x79, 0xc4, 0x3a, + 0x01, 0x0f, 0x75, 0x69, 0xc4, 0x19, 0x60, 0x0f, 0x75, 0x61, 0xc3, 0x0d, + 0xff, 0x0f, 0x75, 0x59, 0xc3, 0x2b, 0xb9, 0x0f, 0x75, 0x49, 0xc3, 0x14, + 0x4b, 0x0f, 0x75, 0x39, 0x42, 0x02, 0x1c, 0xc3, 0x5e, 0x74, 0xc3, 0x7e, + 0x89, 0x0f, 0x75, 0x29, 0x42, 0x0e, 0x9a, 0xc3, 0x5e, 0x7e, 0xc4, 0x30, + 0xc1, 0x0f, 0x75, 0x11, 0xc3, 0x85, 0xf5, 0x0f, 0x75, 0x09, 0xc4, 0x14, + 0x4a, 0x0f, 0x75, 0xb9, 0xc5, 0x92, 0x75, 0x0f, 0x75, 0xd8, 0xc3, 0x85, + 0xf5, 0x0f, 0x70, 0xe1, 0xc4, 0x3a, 0x01, 0x0f, 0x70, 0xe9, 0xc3, 0xb1, + 0x0d, 0x0f, 0x70, 0xf1, 0xc3, 0x0f, 0x9a, 0x0f, 0x70, 0xf8, 0xc4, 0x30, + 0xc1, 0x0f, 0x72, 0x11, 0xc3, 0x14, 0x4b, 0x0f, 0x72, 0x39, 0xc3, 0x2b, + 0xb9, 0x0f, 0x72, 0x49, 0xc3, 0x0d, 0xff, 0x0f, 0x72, 0x59, 0xc4, 0x3a, + 0x01, 0x0f, 0x72, 0x69, 0x15, 0xc3, 0x5e, 0x86, 0xc3, 0x03, 0x0c, 0x0f, + 0x72, 0x79, 0xc3, 0x0f, 0x9a, 0x0f, 0x72, 0x91, 0xc4, 0x14, 0x4a, 0x0f, + 0x72, 0xb9, 0x06, 0xc3, 0x5e, 0x98, 0xc5, 0x92, 0x75, 0x0f, 0x72, 0xd8, + 0xc3, 0x00, 0x49, 0x0f, 0x74, 0x01, 0xc2, 0x00, 0x74, 0x0f, 0x74, 0x78, + 0x8e, 0x0f, 0x74, 0x19, 0x86, 0x0f, 0x74, 0xc8, 0xc2, 0x16, 0x1c, 0x0f, + 0x74, 0x21, 0xc2, 0x02, 0x98, 0x0f, 0x74, 0x38, 0xc2, 0x00, 0x74, 0x0f, + 0x74, 0x31, 0x8a, 0x0f, 0x74, 0xd0, 0xc2, 0x02, 0x98, 0x0f, 0x74, 0x41, + 0xc2, 0x16, 0x1c, 0x0f, 0x74, 0xa9, 0x0a, 0x43, 0x5e, 0xa4, 0xc3, 0x03, + 0x26, 0x0f, 0x74, 0x71, 0xc2, 0x01, 0x9d, 0x0f, 0x74, 0x89, 0xc4, 0xdf, + 0x93, 0x0f, 0x74, 0xa0, 0xc2, 0x16, 0x1c, 0x0f, 0x73, 0x21, 0xc2, 0x02, + 0x98, 0x0f, 0x73, 0x38, 0xc2, 0x02, 0x98, 0x0f, 0x73, 0x41, 0xc2, 0x16, + 0x1c, 0x0f, 0x73, 0xa9, 0xc3, 0x64, 0x77, 0x0f, 0x73, 0xb0, 0xc2, 0x0f, + 0x9b, 0x0f, 0x73, 0x51, 0xc3, 0x14, 0x4b, 0x0f, 0x73, 0xb8, 0xc3, 0x03, + 0x26, 0x0f, 0x73, 0x71, 0xc2, 0x01, 0x9d, 0x0f, 0x73, 0x89, 0xc4, 0xdf, + 0x93, 0x0f, 0x73, 0xa0, 0xc2, 0x01, 0x9d, 0x0f, 0x73, 0xc9, 0x47, 0x3b, + 0xc4, 0x43, 0x5e, 0xb0, 0xc5, 0xda, 0xd3, 0x00, 0x46, 0xf9, 0xc3, 0xe5, + 0x63, 0x00, 0x46, 0xf1, 0x42, 0x0d, 0xf6, 0xc3, 0x5e, 0xbc, 0x03, 0x43, + 0x5e, 0xc6, 0xcc, 0x00, 0xfb, 0x00, 0x37, 0x11, 0xcb, 0x10, 0xb5, 0x00, + 0x36, 0xc0, 0xde, 0x0f, 0x9a, 0x00, 0x36, 0xb9, 0xde, 0x0d, 0xf6, 0x00, + 0x36, 0xb1, 0xd6, 0x2f, 0x88, 0x00, 0x30, 0xb0, 0xc7, 0xc9, 0xf8, 0x00, + 0x44, 0xd9, 0x0b, 0x43, 0x5e, 0xe4, 0xc5, 0x05, 0x02, 0x07, 0xdd, 0xf1, + 0xc5, 0x00, 0xd4, 0x07, 0xdd, 0xe8, 0xc5, 0x05, 0x02, 0x07, 0xdd, 0xc9, + 0xc5, 0x00, 0xd4, 0x07, 0xdd, 0xc0, 0xc3, 0x7e, 0x89, 0x00, 0x44, 0x21, + 0xc5, 0x08, 0x09, 0x00, 0x44, 0x18, 0x49, 0x04, 0xd2, 0xc3, 0x5e, 0xf0, + 0x48, 0x0a, 0x53, 0x43, 0x5e, 0xfc, 0x51, 0x13, 0xe3, 0xc3, 0x5f, 0x0e, + 0xd3, 0x43, 0x98, 0x01, 0x2b, 0x91, 0xd3, 0x43, 0xbe, 0x01, 0x2b, 0x88, + 0x45, 0x02, 0x9a, 0x43, 0x5f, 0x20, 0xc8, 0x00, 0x5f, 0x01, 0x2a, 0x71, + 0xca, 0x01, 0x68, 0x01, 0x2a, 0x60, 0xc9, 0xb0, 0x3e, 0x01, 0x2b, 0xe9, + 0xc9, 0x01, 0x69, 0x01, 0x29, 0xa0, 0x49, 0x2a, 0xf5, 0xc3, 0x5f, 0x32, + 0x02, 0x43, 0x5f, 0x48, 0x49, 0x2a, 0xf5, 0x43, 0x5f, 0x5a, 0xce, 0x2a, + 0xfe, 0x0f, 0xd0, 0xa1, 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0xf0, 0xce, 0x2a, + 0xfe, 0x0f, 0xd0, 0x91, 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0xe0, 0xce, 0x2a, + 0xfe, 0x0f, 0xd0, 0x89, 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0xd8, 0xce, 0x2a, + 0xfe, 0x0f, 0xd0, 0x81, 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0xd0, 0xc3, 0x00, + 0x74, 0x0f, 0xd1, 0x21, 0xc5, 0x56, 0xa5, 0x0f, 0xd1, 0x40, 0xce, 0x6f, + 0x38, 0x01, 0x34, 0x49, 0xcf, 0x6a, 0x9e, 0x01, 0x34, 0x41, 0xca, 0x3e, + 0xe4, 0x01, 0x4f, 0x68, 0xc5, 0x0b, 0x0a, 0x01, 0x2d, 0x51, 0xc3, 0x0e, + 0x6b, 0x01, 0x5a, 0x88, 0xc6, 0x46, 0x3e, 0x01, 0x2d, 0xd1, 0xc7, 0xbb, + 0xcb, 0x01, 0x5a, 0x98, 0xd9, 0x20, 0x44, 0x01, 0x1f, 0x78, 0xd2, 0x1c, + 0x40, 0x01, 0x1f, 0x68, 0xc4, 0x01, 0x9b, 0x01, 0x3d, 0x20, 0xd2, 0x1c, + 0x40, 0x01, 0x1f, 0x70, 0xc5, 0x06, 0x82, 0x01, 0x30, 0xd1, 0xce, 0x24, + 0xd5, 0x0f, 0xac, 0xe0, 0xc6, 0x0b, 0x09, 0x01, 0x2f, 0xf1, 0xc7, 0x3a, + 0x19, 0x0f, 0xbc, 0xc9, 0xc7, 0x0a, 0xe0, 0x0f, 0xbc, 0xf8, 0xc8, 0x5e, + 0xa6, 0x01, 0x5e, 0x30, 0xc8, 0x5e, 0xa6, 0x01, 0x5e, 0x38, 0x9a, 0x01, + 0x30, 0x91, 0xc5, 0x6b, 0x02, 0x01, 0x30, 0x89, 0x04, 0xc3, 0x5f, 0x66, + 0xc8, 0x8e, 0xa5, 0x0f, 0xaf, 0xa9, 0xc7, 0xc0, 0xba, 0x01, 0x5d, 0xe8, + 0xc4, 0xe0, 0x97, 0x00, 0xdb, 0x51, 0xc6, 0xcf, 0x59, 0x00, 0xdb, 0x28, + 0xc7, 0xc2, 0x6c, 0x00, 0xda, 0x08, 0x90, 0x0b, 0x51, 0x31, 0x96, 0x0b, + 0x50, 0xb8, 0x91, 0x0b, 0x51, 0x49, 0x97, 0x0b, 0x50, 0xe1, 0xc2, 0x25, + 0x9f, 0x0b, 0x50, 0x98, 0x83, 0x0b, 0x50, 0x71, 0x87, 0x0b, 0x50, 0x40, + 0xc2, 0x04, 0xc6, 0x0b, 0x51, 0xa1, 0xc2, 0x00, 0xc4, 0x0b, 0x51, 0x80, + 0x90, 0x0b, 0x51, 0x89, 0xc2, 0xd0, 0x00, 0x0b, 0x51, 0x29, 0x87, 0x0b, + 0x50, 0x38, 0xc2, 0x02, 0xe0, 0x0b, 0x50, 0x61, 0x8b, 0x0b, 0x50, 0x58, + 0x87, 0x0b, 0x51, 0x11, 0xc2, 0xd0, 0x00, 0x0b, 0x50, 0xf8, 0xc2, 0x01, + 0x30, 0x0b, 0x51, 0x41, 0xc5, 0xde, 0x75, 0x0b, 0x51, 0x38, 0xc3, 0x8b, + 0xa9, 0x0b, 0x50, 0xd1, 0xc3, 0x7c, 0x57, 0x0b, 0x50, 0x80, 0xc2, 0x10, + 0x11, 0x0b, 0x50, 0xc0, 0xc2, 0x00, 0x7a, 0x0b, 0x50, 0x11, 0x07, 0xc3, + 0x5f, 0x72, 0xc5, 0xd8, 0xe9, 0x0b, 0x4d, 0x10, 0xc2, 0xd0, 0x00, 0x0b, + 0x4d, 0xa9, 0x96, 0x0b, 0x4d, 0x48, 0x91, 0x0b, 0x4b, 0xa9, 0x87, 0x0b, + 0x4f, 0x50, 0x17, 0xc3, 0x5f, 0x7a, 0x96, 0x0b, 0x4d, 0xb8, 0x96, 0x0b, + 0x4e, 0x61, 0xc2, 0x00, 0x3d, 0x0b, 0x4d, 0x59, 0xc2, 0x00, 0x11, 0x0b, + 0x4b, 0xd0, 0x0d, 0xc3, 0x5f, 0x84, 0x83, 0x0b, 0x4f, 0x91, 0xc3, 0x8b, + 0xa9, 0x0b, 0x4f, 0x03, 0x03, 0x5f, 0x95, 0x09, 0xc3, 0x5f, 0x99, 0xc6, + 0xce, 0xa5, 0x0b, 0x4d, 0x19, 0x11, 0x43, 0x5f, 0xa1, 0xc2, 0x05, 0x1d, + 0x0b, 0x4b, 0x81, 0x03, 0xc3, 0x5f, 0xa9, 0x0b, 0x43, 0x5f, 0xb3, 0x17, + 0xc3, 0x5f, 0xbd, 0xc3, 0x8f, 0x8a, 0x0b, 0x4b, 0xe0, 0x87, 0x0b, 0x4e, + 0x28, 0x07, 0xc3, 0x5f, 0xc7, 0xc5, 0xc0, 0x3e, 0x0b, 0x4c, 0x50, 0xc2, + 0x00, 0xb6, 0x0b, 0x4e, 0x71, 0xc2, 0x01, 0xdf, 0x0b, 0x4d, 0xe0, 0xc2, + 0x92, 0xb5, 0x0b, 0x4e, 0x09, 0xc2, 0x5c, 0x9b, 0x0b, 0x4d, 0x38, 0xc7, + 0x0b, 0xc8, 0x0b, 0x4e, 0x01, 0xc7, 0xc8, 0xa1, 0x0b, 0x4d, 0x68, 0x8f, + 0x0b, 0x4b, 0x91, 0x93, 0x0b, 0x4e, 0xe1, 0x83, 0x0b, 0x4e, 0xdb, 0x03, + 0x5f, 0xd4, 0xc8, 0xbc, 0x02, 0x0b, 0x4c, 0x78, 0x91, 0x0b, 0x4b, 0xcb, + 0x03, 0x5f, 0xd8, 0x93, 0x0b, 0x4e, 0xb0, 0x90, 0x0b, 0x50, 0x01, 0x97, + 0x0b, 0x4f, 0xea, 0x03, 0x5f, 0xdc, 0x8f, 0x0b, 0x4d, 0x53, 0x03, 0x5f, + 0xe2, 0xc2, 0x10, 0x11, 0x0b, 0x4c, 0xb0, 0x03, 0xc3, 0x5f, 0xe8, 0x87, + 0x0b, 0x4f, 0x49, 0x8f, 0x0b, 0x4c, 0x88, 0x83, 0x0b, 0x4b, 0x63, 0x03, + 0x5f, 0xf0, 0x42, 0x00, 0xaf, 0x43, 0x5f, 0xf4, 0x07, 0x43, 0x60, 0x00, + 0x17, 0xc3, 0x60, 0x0a, 0xc2, 0x00, 0x4f, 0x0b, 0x4c, 0x20, 0xc2, 0x00, + 0x45, 0x0b, 0x4e, 0x10, 0x93, 0x0b, 0x4b, 0x71, 0x87, 0x0b, 0x4f, 0x80, + 0x91, 0x0b, 0x4f, 0x9b, 0x03, 0x60, 0x12, 0xc2, 0x14, 0xbe, 0x0b, 0x4e, + 0xf1, 0xc5, 0x8b, 0xa8, 0x0b, 0x4d, 0x20, 0x96, 0x0b, 0x4c, 0x81, 0x87, + 0x0b, 0x4b, 0xb0, 0x11, 0xc3, 0x60, 0x16, 0x93, 0x0b, 0x4f, 0xc1, 0x8f, + 0x0b, 0x4b, 0xd8, 0x92, 0x0b, 0x4b, 0x49, 0x93, 0x0b, 0x4e, 0xc9, 0xc2, + 0x00, 0xc2, 0x0b, 0x4c, 0xf8, 0x87, 0x0b, 0x4f, 0x61, 0xc3, 0x8b, 0xa9, + 0x0b, 0x4c, 0xe8, 0xc2, 0x01, 0xdf, 0x0b, 0x4b, 0x41, 0x87, 0x0b, 0x4d, + 0x30, 0x93, 0x0b, 0x4f, 0xe1, 0x87, 0x0b, 0x4d, 0xc3, 0x03, 0x60, 0x1e, + 0x92, 0x0b, 0x4c, 0x58, 0xc2, 0x02, 0xe0, 0x0b, 0x4e, 0x18, 0xc2, 0x00, + 0xc4, 0x0b, 0x4d, 0x29, 0x83, 0x0b, 0x4c, 0x38, 0x93, 0x0b, 0x50, 0x08, + 0x00, 0xc3, 0x60, 0x22, 0x87, 0x0b, 0x4d, 0xa2, 0x03, 0x60, 0x32, 0x90, + 0x0b, 0x4f, 0x29, 0x93, 0x0b, 0x4f, 0x21, 0xc3, 0xb5, 0x1b, 0x0b, 0x4f, + 0x09, 0xc2, 0x00, 0xe2, 0x0b, 0x4d, 0x90, 0xc5, 0x00, 0x99, 0x0b, 0x4f, + 0x19, 0xc8, 0xb7, 0x2a, 0x0b, 0x4f, 0x10, 0x9a, 0x0b, 0x4e, 0xf9, 0xc2, + 0x10, 0x11, 0x0b, 0x4c, 0xbb, 0x03, 0x60, 0x36, 0x8f, 0x0b, 0x4d, 0xf0, + 0x96, 0x0b, 0x4d, 0x71, 0xc2, 0x02, 0xe0, 0x0b, 0x4c, 0xa0, 0x09, 0xc3, + 0x60, 0x3a, 0x0d, 0x43, 0x60, 0x50, 0xc2, 0x01, 0xdf, 0x0b, 0x4a, 0x01, + 0x0a, 0xc3, 0x60, 0x6e, 0x43, 0x8f, 0x8a, 0x43, 0x60, 0x7a, 0x07, 0xc3, + 0x60, 0x82, 0xc2, 0x5d, 0xa1, 0x0b, 0x4b, 0x10, 0xc2, 0x00, 0xc2, 0x0b, + 0x49, 0xb9, 0x07, 0xc3, 0x60, 0x8c, 0xc2, 0x00, 0x45, 0x0b, 0x48, 0xc0, + 0x8b, 0x0b, 0x4a, 0x69, 0xc2, 0x0f, 0xe1, 0x0b, 0x49, 0x79, 0xc2, 0x00, + 0x3d, 0x0b, 0x49, 0x11, 0xc2, 0x00, 0xc2, 0x0b, 0x47, 0xd0, 0xc3, 0xdf, + 0x8c, 0x0b, 0x4a, 0x39, 0x42, 0x2c, 0x43, 0xc3, 0x60, 0x96, 0xc2, 0x00, + 0xb6, 0x0b, 0x48, 0x11, 0x8b, 0x0b, 0x47, 0x9a, 0x03, 0x60, 0xa0, 0x17, + 0xc3, 0x60, 0xa6, 0xc3, 0xd0, 0xd7, 0x0b, 0x4a, 0x79, 0x96, 0x0b, 0x49, + 0x80, 0xc5, 0xda, 0x33, 0x0b, 0x4a, 0x11, 0xc5, 0xd9, 0x9d, 0x0b, 0x48, + 0x50, 0x17, 0xc3, 0x60, 0xb0, 0xc3, 0xd0, 0xd7, 0x0b, 0x4a, 0x80, 0xc2, + 0x04, 0xc6, 0x0b, 0x49, 0x03, 0x03, 0x60, 0xb8, 0xc2, 0x01, 0xba, 0x0b, + 0x47, 0x88, 0xc3, 0x8f, 0x8a, 0x0b, 0x49, 0x91, 0x42, 0x2c, 0x43, 0xc3, + 0x60, 0xbe, 0x91, 0x0b, 0x48, 0xea, 0x03, 0x60, 0xc8, 0xc3, 0x8f, 0x8a, + 0x0b, 0x48, 0xe1, 0xc3, 0x5c, 0x9f, 0x0b, 0x48, 0xd1, 0xc4, 0xe4, 0x1b, + 0x0b, 0x48, 0xb0, 0x17, 0xc3, 0x60, 0xcc, 0xc3, 0xd0, 0xd7, 0x0b, 0x49, + 0x40, 0xc2, 0x01, 0xbb, 0x0b, 0x49, 0xe8, 0x93, 0x0b, 0x49, 0xf9, 0x90, + 0x0b, 0x49, 0xd1, 0xc2, 0x00, 0x7a, 0x0b, 0x48, 0x30, 0x17, 0xc3, 0x60, + 0xda, 0x96, 0x0b, 0x48, 0x20, 0xc2, 0x10, 0x11, 0x0b, 0x49, 0xc9, 0x97, + 0x0b, 0x4a, 0x91, 0x87, 0x0b, 0x48, 0x18, 0x93, 0x0b, 0x4b, 0x21, 0x92, + 0x0b, 0x48, 0x38, 0xc2, 0x7f, 0xc0, 0x0b, 0x4a, 0xe1, 0x97, 0x0b, 0x4a, + 0xc1, 0x07, 0xc3, 0x60, 0xee, 0xc2, 0x25, 0x9f, 0x0b, 0x4a, 0xa0, 0x11, + 0xc3, 0x60, 0xf6, 0xc3, 0xe5, 0x00, 0x0b, 0x49, 0x28, 0xc4, 0xb5, 0xd8, + 0x0b, 0x4b, 0x01, 0xc3, 0x1a, 0x7c, 0x0b, 0x4a, 0x50, 0x93, 0x0b, 0x4a, + 0xe9, 0xc2, 0x00, 0xa4, 0x0b, 0x48, 0xd8, 0x87, 0x0b, 0x4a, 0xd1, 0xc4, + 0xc3, 0x35, 0x0b, 0x49, 0x70, 0x42, 0x00, 0xbd, 0xc3, 0x60, 0xfe, 0x17, + 0xc3, 0x61, 0x0a, 0x96, 0x0b, 0x46, 0x48, 0xca, 0x9c, 0x7a, 0x0b, 0x46, + 0xa9, 0x96, 0x0b, 0x46, 0x70, 0xc2, 0x14, 0xbe, 0x0b, 0x47, 0x41, 0xc3, + 0xdf, 0x8c, 0x0b, 0x46, 0xd8, 0xc4, 0xdf, 0x1b, 0x0b, 0x46, 0xe1, 0xc2, + 0xd0, 0x00, 0x0b, 0x45, 0x50, 0x96, 0x0b, 0x47, 0x81, 0xc5, 0xd7, 0xe0, + 0x0b, 0x45, 0xd0, 0xc4, 0xd2, 0x85, 0x0b, 0x46, 0x31, 0xc5, 0xda, 0x56, + 0x0b, 0x45, 0x70, 0x90, 0x0b, 0x47, 0x71, 0xc5, 0xd6, 0x87, 0x0b, 0x44, + 0xe0, 0x8f, 0x0b, 0x46, 0x29, 0x92, 0x0b, 0x45, 0xb0, 0x93, 0x0b, 0x47, + 0x61, 0xc6, 0xcb, 0xe7, 0x0b, 0x45, 0x90, 0xc2, 0x5c, 0x9b, 0x0b, 0x47, + 0x59, 0x09, 0xc3, 0x61, 0x18, 0xc2, 0x00, 0x7a, 0x0b, 0x46, 0x81, 0x0d, + 0x43, 0x61, 0x25, 0x07, 0xc3, 0x61, 0x31, 0x03, 0xc3, 0x61, 0x3d, 0xc3, + 0xdf, 0x8c, 0x0b, 0x45, 0x68, 0x03, 0xc3, 0x61, 0x47, 0x42, 0x2c, 0x43, + 0xc3, 0x61, 0x4f, 0xc3, 0x83, 0xad, 0x0b, 0x45, 0x59, 0xc4, 0xc8, 0xbe, + 0x0b, 0x44, 0xe8, 0x17, 0xc3, 0x61, 0x59, 0xc2, 0x00, 0x7a, 0x0b, 0x46, + 0x99, 0xc3, 0x88, 0xcf, 0x0b, 0x45, 0xf9, 0x83, 0x0b, 0x45, 0xf1, 0xc5, + 0xb5, 0x19, 0x0b, 0x45, 0x28, 0x07, 0xc3, 0x61, 0x63, 0xc2, 0x04, 0xc6, + 0x0b, 0x45, 0xa1, 0xc6, 0xd0, 0x79, 0x0b, 0x44, 0xd0, 0xc3, 0x47, 0x4a, + 0x0b, 0x45, 0x19, 0x83, 0x0b, 0x44, 0x80, 0x03, 0xc3, 0x61, 0x6d, 0x07, + 0xc3, 0x61, 0x79, 0x8b, 0x0b, 0x46, 0xeb, 0x03, 0x61, 0x89, 0x17, 0x43, + 0x61, 0x93, 0x07, 0xc3, 0x61, 0x9d, 0x00, 0x43, 0x61, 0xa9, 0xc3, 0xe5, + 0x00, 0x0b, 0x47, 0x21, 0xc7, 0xc5, 0xd0, 0x0b, 0x45, 0x11, 0x8f, 0x0b, + 0x44, 0x88, 0x92, 0x0b, 0x45, 0x01, 0xc3, 0x82, 0x78, 0x0b, 0x44, 0xb0, + 0x09, 0xc3, 0x61, 0xb5, 0xc2, 0x00, 0x7a, 0x0b, 0x44, 0x71, 0xca, 0x9c, + 0xc0, 0x0b, 0x43, 0xa0, 0xc2, 0x00, 0xc4, 0x0b, 0x44, 0x59, 0xc4, 0xc1, + 0x3b, 0x0b, 0x42, 0xb8, 0xc5, 0xda, 0xfb, 0x0b, 0x44, 0x01, 0xc7, 0xc1, + 0x4d, 0x0b, 0x43, 0x68, 0xc9, 0xac, 0x69, 0x0b, 0x43, 0x59, 0xc4, 0x96, + 0xdd, 0x0b, 0x43, 0xe0, 0x43, 0x7c, 0x4f, 0x43, 0x61, 0xca, 0xc3, 0x8f, + 0x91, 0x0b, 0x44, 0x21, 0xc4, 0x85, 0xb7, 0x0b, 0x43, 0xf1, 0xca, 0x9a, + 0xd6, 0x0b, 0x43, 0x61, 0x03, 0x43, 0x61, 0xd6, 0xc8, 0xb7, 0xe2, 0x0b, + 0x44, 0x11, 0x93, 0x0b, 0x43, 0xc8, 0x93, 0x0b, 0x44, 0x69, 0xc3, 0x12, + 0xc2, 0x0b, 0x42, 0xe8, 0xc3, 0x7c, 0x57, 0x0b, 0x44, 0x31, 0xc4, 0xde, + 0xab, 0x0b, 0x43, 0x81, 0xc3, 0xe5, 0x5d, 0x0b, 0x43, 0x70, 0xc4, 0xb3, + 0x92, 0x0b, 0x43, 0x89, 0xcc, 0x83, 0x91, 0x0b, 0x43, 0x18, 0xc6, 0xcf, + 0xfb, 0x0b, 0x43, 0x51, 0xc6, 0xd3, 0x25, 0x0b, 0x43, 0x48, 0xc5, 0xda, + 0x10, 0x0b, 0x43, 0x41, 0xc9, 0xa9, 0x63, 0x0b, 0x42, 0xc0, 0x96, 0x0b, + 0x42, 0x59, 0x93, 0x0b, 0x41, 0xe1, 0xc4, 0xe4, 0x5f, 0x0b, 0x41, 0x80, + 0xcc, 0x8a, 0x2d, 0x0b, 0x42, 0x01, 0x0b, 0xc3, 0x61, 0xe2, 0x17, 0x43, + 0x61, 0xee, 0xc3, 0xb5, 0x1b, 0x0b, 0x42, 0x51, 0xc6, 0xd1, 0x99, 0x0b, + 0x41, 0x88, 0xc3, 0x48, 0xc4, 0x0b, 0x41, 0x71, 0xc7, 0xb1, 0xde, 0x0b, + 0x40, 0x60, 0x93, 0x0b, 0x42, 0x81, 0xc2, 0x00, 0x87, 0x0b, 0x41, 0x38, + 0x96, 0x0b, 0x41, 0x99, 0xc8, 0xb8, 0xfa, 0x0b, 0x40, 0x98, 0x07, 0xc3, + 0x61, 0xf8, 0xc7, 0xc9, 0x49, 0x0b, 0x41, 0xe9, 0xc5, 0xda, 0x0b, 0x0b, + 0x40, 0x78, 0x93, 0x0b, 0x42, 0xb1, 0xc3, 0x16, 0x59, 0x0b, 0x42, 0x40, + 0x42, 0x00, 0x7a, 0xc3, 0x62, 0x11, 0xca, 0xa1, 0xe8, 0x0b, 0x40, 0xf0, + 0x93, 0x0b, 0x42, 0xa9, 0xc6, 0xb7, 0xb4, 0x0b, 0x40, 0x20, 0x83, 0x0b, + 0x42, 0x89, 0xc3, 0x8f, 0x8a, 0x0b, 0x42, 0x68, 0x8b, 0x0b, 0x42, 0x7b, + 0x03, 0x62, 0x1d, 0xc2, 0x00, 0x3d, 0x0b, 0x42, 0x48, 0xc3, 0x53, 0x54, + 0x0b, 0x42, 0x29, 0x43, 0xe6, 0x05, 0xc3, 0x62, 0x23, 0xc4, 0x08, 0x6b, + 0x0b, 0x40, 0x68, 0xc5, 0x9c, 0x7f, 0x0b, 0x42, 0x19, 0xc4, 0x09, 0x91, + 0x0b, 0x40, 0xa0, 0xc2, 0x00, 0xb6, 0x0b, 0x41, 0xfb, 0x03, 0x62, 0x2f, + 0xc5, 0xdc, 0xbd, 0x0b, 0x40, 0x90, 0xc9, 0xb2, 0xc6, 0x0b, 0x41, 0xa1, + 0xc9, 0x82, 0x74, 0x0b, 0x41, 0x48, 0xc7, 0xc3, 0x30, 0x0b, 0x40, 0xf9, + 0xc6, 0xb7, 0xb4, 0x0b, 0x40, 0x38, 0xc3, 0x48, 0xc4, 0x0b, 0x41, 0x78, + 0x03, 0xc3, 0x62, 0x33, 0xc9, 0x82, 0x74, 0x0b, 0x41, 0x41, 0xc5, 0xda, + 0x65, 0x0b, 0x40, 0xe9, 0xc4, 0x99, 0x41, 0x0b, 0x40, 0xd8, 0x4d, 0x7c, + 0x4d, 0xc3, 0x62, 0x3d, 0x4b, 0x98, 0xf2, 0x43, 0x62, 0x49, 0xc6, 0xcf, + 0x0b, 0x0b, 0x41, 0x09, 0xc3, 0x82, 0x78, 0x0b, 0x40, 0xe0, 0xa1, 0x01, + 0x40, 0x7b, 0x03, 0x62, 0x55, 0xa2, 0x01, 0x40, 0xbb, 0x03, 0x62, 0x6e, + 0xa3, 0x01, 0x41, 0x3b, 0x03, 0x62, 0x80, 0xa5, 0x01, 0x44, 0x39, 0xa4, + 0x01, 0x42, 0x3a, 0x03, 0x62, 0x8b, 0xa2, 0x01, 0x40, 0xdb, 0x03, 0x62, + 0x8f, 0xa3, 0x01, 0x41, 0x5b, 0x03, 0x62, 0xa1, 0xa5, 0x01, 0x44, 0x59, + 0xa4, 0x01, 0x42, 0x5a, 0x03, 0x62, 0xac, 0xa3, 0x01, 0x41, 0x9b, 0x03, + 0x62, 0xb0, 0xa5, 0x01, 0x44, 0x99, 0xa4, 0x01, 0x42, 0x9a, 0x03, 0x62, + 0xbb, 0xa5, 0x01, 0x45, 0x19, 0xa4, 0x01, 0x43, 0x1a, 0x03, 0x62, 0xbf, + 0xa5, 0x01, 0x46, 0x18, 0xa2, 0x01, 0x40, 0xeb, 0x03, 0x62, 0xc3, 0xa3, + 0x01, 0x41, 0x6b, 0x03, 0x62, 0xd5, 0xa5, 0x01, 0x44, 0x69, 0xa4, 0x01, + 0x42, 0x6a, 0x03, 0x62, 0xe0, 0xa3, 0x01, 0x41, 0xab, 0x03, 0x62, 0xe4, + 0xa5, 0x01, 0x44, 0xa9, 0xa4, 0x01, 0x42, 0xaa, 0x03, 0x62, 0xef, 0xa5, + 0x01, 0x45, 0x29, 0xa4, 0x01, 0x43, 0x2a, 0x03, 0x62, 0xf3, 0xa5, 0x01, + 0x46, 0x28, 0xa3, 0x01, 0x41, 0xcb, 0x03, 0x62, 0xf7, 0xa5, 0x01, 0x44, + 0xc9, 0xa4, 0x01, 0x42, 0xca, 0x03, 0x63, 0x02, 0xa5, 0x01, 0x45, 0x49, + 0xa4, 0x01, 0x43, 0x4a, 0x03, 0x63, 0x06, 0xa5, 0x01, 0x46, 0x48, 0xa5, + 0x01, 0x45, 0x89, 0xa4, 0x01, 0x43, 0x8a, 0x03, 0x63, 0x0a, 0xa5, 0x01, + 0x46, 0x88, 0xa5, 0x01, 0x47, 0x08, 0xa2, 0x01, 0x40, 0xf3, 0x03, 0x63, + 0x0e, 0xa3, 0x01, 0x41, 0x73, 0x03, 0x63, 0x20, 0xa5, 0x01, 0x44, 0x71, + 0xa4, 0x01, 0x42, 0x72, 0x03, 0x63, 0x2b, 0xa3, 0x01, 0x41, 0xb3, 0x03, + 0x63, 0x2f, 0xa5, 0x01, 0x44, 0xb1, 0xa4, 0x01, 0x42, 0xb2, 0x03, 0x63, + 0x3a, 0xa5, 0x01, 0x45, 0x31, 0xa4, 0x01, 0x43, 0x32, 0x03, 0x63, 0x3e, + 0xa5, 0x01, 0x46, 0x30, 0xa3, 0x01, 0x41, 0xd3, 0x03, 0x63, 0x42, 0xa5, + 0x01, 0x44, 0xd1, 0xa4, 0x01, 0x42, 0xd2, 0x03, 0x63, 0x4d, 0xa5, 0x01, + 0x45, 0x51, 0xa4, 0x01, 0x43, 0x52, 0x03, 0x63, 0x51, 0xa5, 0x01, 0x46, + 0x50, 0xa5, 0x01, 0x45, 0x91, 0xa4, 0x01, 0x43, 0x92, 0x03, 0x63, 0x55, + 0xa5, 0x01, 0x46, 0x90, 0xa5, 0x01, 0x47, 0x10, 0xa3, 0x01, 0x41, 0xe3, + 0x03, 0x63, 0x59, 0xa5, 0x01, 0x44, 0xe1, 0xa4, 0x01, 0x42, 0xe2, 0x03, + 0x63, 0x64, 0xa5, 0x01, 0x45, 0x61, 0xa4, 0x01, 0x43, 0x62, 0x03, 0x63, + 0x68, 0xa5, 0x01, 0x46, 0x60, 0xa5, 0x01, 0x45, 0xa1, 0xa4, 0x01, 0x43, + 0xa2, 0x03, 0x63, 0x6c, 0xa5, 0x01, 0x46, 0xa0, 0xa5, 0x01, 0x47, 0x20, + 0xa5, 0x01, 0x45, 0xc1, 0xa4, 0x01, 0x43, 0xc2, 0x03, 0x63, 0x70, 0xa5, + 0x01, 0x46, 0xc0, 0xa5, 0x01, 0x47, 0x40, 0xa5, 0x01, 0x47, 0x80, 0xc3, + 0x15, 0x30, 0x0e, 0x84, 0x11, 0xc7, 0x9c, 0xe1, 0x0e, 0x84, 0x08, 0xc3, + 0x63, 0x2b, 0x0e, 0x82, 0x89, 0xc5, 0xcc, 0xcc, 0x0e, 0x80, 0x90, 0xc3, + 0x2e, 0xd7, 0x0e, 0x84, 0xa1, 0xc4, 0x99, 0xff, 0x0e, 0x84, 0x98, 0xc6, + 0x04, 0xe1, 0x0f, 0xd9, 0xf1, 0xc5, 0x00, 0x2c, 0x0f, 0xd9, 0xf9, 0xcc, + 0x04, 0xcb, 0x0f, 0xda, 0x88, 0x46, 0x01, 0xc8, 0xc3, 0x63, 0x74, 0xd2, + 0x4b, 0x83, 0x0f, 0xda, 0x68, 0xd2, 0x4b, 0x83, 0x0f, 0xda, 0x61, 0x46, + 0x01, 0xc8, 0x43, 0x63, 0x80, 0xc6, 0x04, 0xe1, 0x0f, 0xda, 0x29, 0xcc, + 0x04, 0xcb, 0x0f, 0xda, 0x50, 0xcc, 0x04, 0xcb, 0x0f, 0xda, 0x49, 0xc5, + 0x00, 0x2c, 0x0f, 0xda, 0x58, 0xd4, 0x35, 0x61, 0x0f, 0xdc, 0xd9, 0xc3, + 0x00, 0x3a, 0x01, 0x3e, 0xd8, 0xe0, 0x08, 0x67, 0x0f, 0xdb, 0x48, 0xe0, + 0x08, 0x67, 0x0f, 0xdb, 0x58, 0xc7, 0x02, 0xa0, 0x0f, 0xc8, 0x29, 0xc9, + 0x02, 0xde, 0x0f, 0xc8, 0x20, 0xd6, 0x2d, 0x62, 0x01, 0x0f, 0xe1, 0xcf, + 0x2c, 0x35, 0x01, 0x0f, 0xc9, 0xc6, 0x01, 0x73, 0x01, 0x0d, 0x70, 0xcd, + 0x7f, 0x80, 0x01, 0x4c, 0x79, 0xca, 0x9f, 0xa4, 0x01, 0x4c, 0x68, 0x00, + 0x43, 0x63, 0x8c, 0xcf, 0x2c, 0x35, 0x01, 0x59, 0xa1, 0xd6, 0x2d, 0x62, + 0x01, 0x59, 0xa9, 0x16, 0x43, 0x63, 0x9e, 0xd2, 0x05, 0xd4, 0x0f, 0xc0, + 0x01, 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0x80, 0x46, 0x00, 0x8b, 0x43, 0x63, + 0xad, 0xc9, 0x03, 0xc8, 0x01, 0x58, 0x81, 0xc7, 0x09, 0x0d, 0x01, 0x58, + 0x88, 0xdd, 0x10, 0x86, 0x01, 0x0d, 0xc8, 0xcf, 0x6a, 0x8f, 0x01, 0x5a, + 0x11, 0xce, 0x33, 0x92, 0x01, 0x5a, 0x58, 0xc6, 0x01, 0x73, 0x01, 0x0e, + 0x69, 0xcf, 0x2c, 0x35, 0x01, 0x48, 0x10, 0xc5, 0x01, 0x4a, 0x01, 0x0d, + 0xe9, 0x00, 0x43, 0x63, 0xb9, 0xc5, 0x01, 0x4a, 0x01, 0x0d, 0xe1, 0x00, + 0x43, 0x63, 0xd1, 0x02, 0xc3, 0x63, 0xe3, 0xc2, 0x00, 0x48, 0x08, 0x3a, + 0x40, 0x9e, 0x08, 0x30, 0x01, 0x9f, 0x08, 0x30, 0x09, 0xa0, 0x08, 0x30, + 0x11, 0xa1, 0x08, 0x30, 0x19, 0xa2, 0x08, 0x30, 0x21, 0xa3, 0x08, 0x30, + 0x29, 0xa4, 0x08, 0x30, 0x31, 0xa5, 0x08, 0x30, 0x39, 0xa6, 0x08, 0x30, + 0x40, 0x9d, 0x08, 0x30, 0x49, 0xa0, 0x08, 0x30, 0x59, 0xa3, 0x08, 0x30, + 0x61, 0xa4, 0x08, 0x30, 0x69, 0x9e, 0x08, 0x30, 0x50, 0x9d, 0x08, 0x30, + 0x71, 0x9e, 0x08, 0x30, 0x7b, 0x03, 0x63, 0xfb, 0x9f, 0x08, 0x30, 0x93, + 0x03, 0x64, 0x03, 0xa0, 0x08, 0x30, 0xab, 0x03, 0x64, 0x0b, 0xa1, 0x08, + 0x30, 0xb9, 0xa3, 0x08, 0x30, 0xc1, 0xa4, 0x08, 0x30, 0xc9, 0xa5, 0x08, + 0x30, 0xd1, 0xa6, 0x08, 0x30, 0xe0, 0x9d, 0x08, 0x30, 0xe9, 0x9e, 0x08, + 0x30, 0xf1, 0xa1, 0x08, 0x30, 0xf9, 0xa4, 0x08, 0x31, 0x01, 0xa5, 0x08, + 0x31, 0x09, 0xa6, 0x08, 0x31, 0x10, 0x9d, 0x08, 0x31, 0x19, 0x9e, 0x08, + 0x31, 0x21, 0xa1, 0x08, 0x31, 0x29, 0xa2, 0x08, 0x31, 0x31, 0xa3, 0x08, + 0x31, 0x39, 0xa4, 0x08, 0x31, 0x41, 0xa5, 0x08, 0x31, 0x49, 0xa6, 0x08, + 0x31, 0x50, 0x9d, 0x08, 0x31, 0x59, 0x9e, 0x08, 0x31, 0x61, 0xa0, 0x08, + 0x31, 0x69, 0xa1, 0x08, 0x31, 0x71, 0xa2, 0x08, 0x31, 0x79, 0xa3, 0x08, + 0x31, 0x81, 0xa4, 0x08, 0x31, 0x89, 0xa5, 0x08, 0x31, 0x91, 0xa6, 0x08, + 0x31, 0x98, 0x9d, 0x08, 0x31, 0xa1, 0x9e, 0x08, 0x31, 0xa9, 0xa2, 0x08, + 0x31, 0xb1, 0xa3, 0x08, 0x31, 0xb9, 0xa4, 0x08, 0x31, 0xc1, 0xa6, 0x08, + 0x31, 0xc8, 0x9d, 0x08, 0x31, 0xd1, 0xa0, 0x08, 0x31, 0xd9, 0xa1, 0x08, + 0x31, 0xe1, 0xa3, 0x08, 0x31, 0xe9, 0xa4, 0x08, 0x31, 0xf1, 0xa5, 0x08, + 0x31, 0xf9, 0xa6, 0x08, 0x32, 0x00, 0x9d, 0x08, 0x32, 0x09, 0x9e, 0x08, + 0x32, 0x11, 0x9f, 0x08, 0x32, 0x19, 0xa3, 0x08, 0x32, 0x29, 0xa4, 0x08, + 0x32, 0x31, 0xa2, 0x08, 0x32, 0x20, 0x9f, 0x08, 0x32, 0x59, 0xa0, 0x08, + 0x32, 0x61, 0x9d, 0x08, 0x32, 0x48, 0x83, 0x08, 0x32, 0x69, 0x84, 0x08, + 0x32, 0x70, 0x9d, 0x08, 0x32, 0x91, 0xa5, 0x08, 0x32, 0x98, 0x83, 0x08, + 0x32, 0xe9, 0x84, 0x08, 0x32, 0xf1, 0x85, 0x08, 0x32, 0xf8, 0x83, 0x08, + 0x33, 0x19, 0x84, 0x08, 0x33, 0x21, 0x85, 0x08, 0x33, 0x28, 0xc3, 0xe5, + 0x90, 0x08, 0x00, 0x01, 0xc4, 0xe1, 0xa3, 0x08, 0x00, 0xc9, 0xc4, 0xe0, + 0x1f, 0x08, 0x00, 0xf1, 0xc4, 0xe2, 0xfb, 0x08, 0x01, 0x99, 0xc4, 0xe3, + 0x47, 0x08, 0x01, 0xa9, 0xc4, 0xe1, 0x13, 0x08, 0x00, 0x29, 0xc4, 0xae, + 0x2d, 0x08, 0x00, 0x39, 0xc4, 0xdf, 0xd3, 0x08, 0x01, 0x59, 0xc4, 0xe2, + 0x2b, 0x08, 0x01, 0x70, 0xc4, 0xe2, 0xcf, 0x08, 0x00, 0x41, 0xc4, 0xe0, + 0x27, 0x08, 0x00, 0xa9, 0xc4, 0xe0, 0xf3, 0x08, 0x01, 0x09, 0xc4, 0xe2, + 0x6f, 0x08, 0x01, 0xe1, 0xc3, 0xe5, 0xd5, 0x08, 0x00, 0x21, 0xc4, 0xe3, + 0x6b, 0x08, 0x00, 0xb9, 0xc4, 0xe1, 0xab, 0x08, 0x01, 0x19, 0xc4, 0xdf, + 0xcb, 0x08, 0x01, 0x80, 0xc4, 0xe0, 0xd3, 0x08, 0x00, 0x49, 0xc4, 0xdf, + 0xef, 0x08, 0x00, 0xe1, 0xc4, 0xe3, 0x67, 0x08, 0x00, 0xe9, 0xc4, 0xe3, + 0xe3, 0x08, 0x01, 0x11, 0xc4, 0xe2, 0xd3, 0x08, 0x01, 0xb9, 0xc4, 0xe1, + 0x87, 0x08, 0x00, 0x51, 0xc4, 0xe0, 0x3b, 0x08, 0x01, 0x51, 0xc4, 0xe2, + 0x43, 0x08, 0x01, 0x89, 0xc4, 0xe2, 0x17, 0x08, 0x01, 0x90, 0xc4, 0xe2, + 0xcb, 0x08, 0x00, 0x81, 0xc4, 0xe4, 0xcf, 0x08, 0x01, 0xc9, 0xc4, 0xc5, + 0xa6, 0x08, 0x01, 0xd1, 0xc4, 0xe0, 0x9f, 0x08, 0x02, 0x09, 0xc5, 0xdc, + 0x31, 0x08, 0x02, 0x29, 0xc4, 0xe2, 0x87, 0x08, 0x00, 0x31, 0xc4, 0xe3, + 0x2b, 0x08, 0x00, 0x59, 0xc4, 0xe1, 0x5b, 0x08, 0x01, 0x78, 0xc4, 0xe1, + 0x9b, 0x08, 0x00, 0x89, 0xc4, 0xe2, 0x1f, 0x08, 0x01, 0xb1, 0xc5, 0xd4, + 0xbb, 0x08, 0x02, 0x39, 0xc5, 0xdc, 0xd6, 0x08, 0x02, 0x51, 0xc5, 0xd8, + 0x8a, 0x08, 0x02, 0x59, 0xc3, 0x71, 0x3e, 0x08, 0x00, 0x19, 0xc4, 0xe2, + 0xb3, 0x08, 0x00, 0x71, 0xc4, 0xe4, 0xdb, 0x08, 0x01, 0x40, 0xc4, 0xe0, + 0xbb, 0x08, 0x00, 0x99, 0xc4, 0xdc, 0x27, 0x08, 0x00, 0xa1, 0xc4, 0xe2, + 0x8f, 0x08, 0x02, 0x11, 0xc5, 0xd5, 0x29, 0x08, 0x02, 0x60, 0xc4, 0xe0, + 0x23, 0x08, 0x00, 0xb1, 0xc4, 0xdf, 0xe3, 0x08, 0x00, 0xf9, 0xc4, 0xe1, + 0xef, 0x08, 0x01, 0x21, 0xc4, 0xe3, 0x73, 0x08, 0x01, 0xc1, 0xc4, 0xe2, + 0xdf, 0x08, 0x01, 0xe9, 0xc5, 0xdc, 0xe0, 0x08, 0x02, 0x19, 0xc5, 0xd8, + 0xa3, 0x08, 0x02, 0x41, 0xc4, 0xd0, 0x73, 0x08, 0x00, 0x79, 0xc4, 0xe4, + 0x0b, 0x08, 0x00, 0x90, 0xc4, 0xe2, 0xc3, 0x08, 0x00, 0xd1, 0xc4, 0xe0, + 0xef, 0x08, 0x01, 0x29, 0xc4, 0xe4, 0x83, 0x08, 0x01, 0xf9, 0xc5, 0xde, + 0x2f, 0x08, 0x02, 0x31, 0xc3, 0xe4, 0xeb, 0x08, 0x00, 0x11, 0xc4, 0xe0, + 0x0f, 0x08, 0x00, 0xc1, 0xc4, 0xe2, 0x5b, 0x08, 0x01, 0x49, 0xc4, 0xe1, + 0xa7, 0x08, 0x01, 0x61, 0xc4, 0xe2, 0x97, 0x08, 0x02, 0x00, 0xc4, 0xe3, + 0xd7, 0x08, 0x00, 0xd9, 0xc4, 0xe2, 0x2f, 0x08, 0x01, 0x01, 0xc4, 0xe2, + 0x53, 0x08, 0x01, 0xa1, 0xc5, 0xd8, 0x12, 0x08, 0x02, 0x49, 0xc3, 0xe2, + 0x0f, 0x08, 0x00, 0x09, 0xc4, 0xe1, 0xc3, 0x08, 0x00, 0x69, 0xc4, 0xdf, + 0xd7, 0x08, 0x01, 0x31, 0xc4, 0xe1, 0x5f, 0x08, 0x01, 0x68, 0xc5, 0xd4, + 0xb1, 0x08, 0x02, 0x69, 0xc5, 0xdd, 0x30, 0x08, 0x02, 0x20, 0xa5, 0x08, + 0x02, 0x81, 0xa6, 0x08, 0x02, 0x88, 0xa4, 0x08, 0x02, 0xa1, 0xa6, 0x08, + 0x02, 0xa8, 0xa0, 0x08, 0x02, 0xb9, 0xa1, 0x08, 0x02, 0xc0, 0x9f, 0x08, + 0x02, 0xd1, 0xa0, 0x08, 0x02, 0xd9, 0xa3, 0x08, 0x02, 0xe1, 0xa6, 0x08, + 0x02, 0xe8, 0x1d, 0xc3, 0x64, 0x0f, 0x1f, 0xc3, 0x64, 0x35, 0x20, 0xc3, + 0x64, 0x53, 0x21, 0xc3, 0x64, 0x63, 0x22, 0xc3, 0x64, 0x7d, 0x23, 0xc3, + 0x64, 0xa1, 0x24, 0xc3, 0x64, 0xcd, 0x25, 0xc3, 0x64, 0xf5, 0x26, 0x43, + 0x65, 0x11, 0x1f, 0xc3, 0x65, 0x1b, 0x20, 0xc3, 0x65, 0x27, 0x21, 0xc3, + 0x65, 0x45, 0x22, 0x43, 0x65, 0x6d, 0x1d, 0xc3, 0x65, 0x93, 0x1e, 0xc3, + 0x65, 0xbb, 0x1f, 0xc3, 0x65, 0xe3, 0xc2, 0xc9, 0x2b, 0x08, 0x07, 0xc8, + 0xc6, 0xd0, 0x73, 0x08, 0x04, 0x99, 0xc8, 0xb6, 0x4a, 0x08, 0x04, 0xa0, + 0xc6, 0xd2, 0xe3, 0x08, 0x04, 0xc9, 0xc7, 0xc5, 0x59, 0x08, 0x04, 0xc0, + 0x05, 0xc3, 0x65, 0xfb, 0x44, 0x05, 0x18, 0xc3, 0x66, 0x1c, 0xc5, 0x31, + 0xee, 0x00, 0x0a, 0xdb, 0x03, 0x66, 0x2b, 0xcc, 0x51, 0x28, 0x00, 0xec, + 0x51, 0xcc, 0x1e, 0xc1, 0x00, 0xeb, 0xa1, 0xc4, 0x01, 0x23, 0x00, 0x14, + 0x11, 0xce, 0x38, 0xe6, 0x05, 0x3d, 0x49, 0x15, 0x43, 0x66, 0x31, 0xc3, + 0x74, 0x83, 0x00, 0x12, 0xcb, 0x03, 0x66, 0x3d, 0x45, 0x07, 0x30, 0x43, + 0x66, 0x43, 0x47, 0x39, 0xfa, 0xc3, 0x66, 0x51, 0xc7, 0xbe, 0x03, 0x05, + 0x3e, 0xc8, 0xc7, 0xca, 0x22, 0x05, 0x5b, 0x01, 0xc6, 0xc8, 0xfd, 0x05, + 0x3c, 0x60, 0xce, 0x01, 0x19, 0x0e, 0xf8, 0xe9, 0x05, 0xc3, 0x66, 0x68, + 0xc5, 0x31, 0xee, 0x00, 0x08, 0x39, 0xc9, 0x16, 0x14, 0x00, 0x08, 0x59, + 0xc3, 0x01, 0x5d, 0x05, 0x3c, 0x99, 0xcc, 0x51, 0x28, 0x05, 0x3c, 0xa1, + 0xc4, 0x01, 0x23, 0x00, 0x0c, 0x41, 0xc6, 0x01, 0x73, 0x00, 0x11, 0xe0, + 0x4a, 0xa3, 0xc8, 0x43, 0x66, 0x83, 0xcf, 0x61, 0xe3, 0x00, 0x12, 0xf1, + 0x11, 0xc3, 0x66, 0x8f, 0xc9, 0x67, 0x20, 0x05, 0x3e, 0x88, 0xcb, 0x8e, + 0x3f, 0x05, 0x39, 0x78, 0x46, 0x00, 0x8b, 0x43, 0x66, 0x9b, 0x45, 0x45, + 0x88, 0xc3, 0x66, 0xa7, 0x8f, 0x05, 0x3b, 0xb8, 0xc4, 0x01, 0x23, 0x00, + 0x0d, 0x6b, 0x03, 0x67, 0x00, 0x06, 0xc3, 0x67, 0x06, 0x05, 0xc3, 0x67, + 0x12, 0xca, 0x64, 0x13, 0x00, 0xf3, 0x79, 0xcc, 0x1e, 0xc1, 0x00, 0xeb, + 0xa9, 0xce, 0x01, 0x19, 0x00, 0x14, 0x41, 0xcc, 0x51, 0x28, 0x00, 0x0d, + 0x59, 0xc6, 0x01, 0x73, 0x00, 0x0b, 0x38, 0xd3, 0x3f, 0x83, 0x00, 0xeb, + 0xd1, 0xc3, 0x00, 0xbf, 0x00, 0x07, 0xf2, 0x03, 0x67, 0x30, 0xc8, 0xad, + 0x81, 0x00, 0xe8, 0xb1, 0x43, 0x02, 0x6f, 0x43, 0x67, 0x39, 0xd4, 0x01, + 0x13, 0x05, 0x5b, 0x38, 0xce, 0x01, 0x19, 0x0e, 0xf8, 0xd9, 0x42, 0x01, + 0x23, 0xc3, 0x67, 0x4b, 0x05, 0xc3, 0x67, 0x5a, 0x06, 0xc3, 0x67, 0x69, + 0xc6, 0x60, 0xb1, 0x00, 0x0a, 0x6b, 0x03, 0x67, 0x76, 0xc5, 0x1e, 0xc8, + 0x00, 0x07, 0xab, 0x03, 0x67, 0x7c, 0xc6, 0x01, 0x73, 0x00, 0x07, 0xc3, + 0x03, 0x67, 0x82, 0xc5, 0x1f, 0x0c, 0x00, 0x07, 0x91, 0xc5, 0x31, 0xee, + 0x00, 0x07, 0x99, 0x42, 0x01, 0xc8, 0xc3, 0x67, 0x88, 0xc5, 0x1d, 0x88, + 0x00, 0x0a, 0x79, 0xc6, 0xcc, 0x8f, 0x00, 0x0f, 0x5b, 0x03, 0x67, 0x9a, + 0xce, 0x1d, 0x93, 0x00, 0x10, 0x78, 0x91, 0x00, 0x12, 0xa3, 0x03, 0x67, + 0xa0, 0x87, 0x00, 0x12, 0xda, 0x03, 0x67, 0xaa, 0xc6, 0x01, 0x73, 0x00, + 0x13, 0x43, 0x03, 0x67, 0xb0, 0x06, 0xc3, 0x67, 0xb6, 0xca, 0x9e, 0x5a, + 0x00, 0xf6, 0x49, 0xc5, 0x1e, 0xc8, 0x00, 0x09, 0x4b, 0x03, 0x67, 0xc3, + 0xce, 0x01, 0x19, 0x00, 0xec, 0xb1, 0xc5, 0x1f, 0x0c, 0x00, 0x07, 0x61, + 0xc5, 0x31, 0xee, 0x00, 0x07, 0x69, 0x05, 0xc3, 0x67, 0xc9, 0xc6, 0x60, + 0xb1, 0x00, 0x09, 0x59, 0xc5, 0x1d, 0x88, 0x00, 0x09, 0x69, 0xc6, 0xcc, + 0x8f, 0x00, 0x09, 0x79, 0xce, 0x1d, 0x93, 0x00, 0x10, 0x58, 0x83, 0x00, + 0x13, 0x4b, 0x03, 0x67, 0xd5, 0xc7, 0xca, 0x53, 0x05, 0x5b, 0x08, 0x46, + 0x51, 0xbb, 0xc3, 0x67, 0xdb, 0x47, 0x1d, 0x71, 0x43, 0x67, 0xf3, 0xca, + 0x9a, 0x86, 0x00, 0x15, 0x23, 0x03, 0x67, 0xff, 0xc3, 0x80, 0x9f, 0x00, + 0xf4, 0xf8, 0x05, 0xc3, 0x68, 0x05, 0xca, 0x64, 0x13, 0x00, 0xf0, 0x79, + 0x44, 0x05, 0x18, 0xc3, 0x68, 0x1d, 0xc4, 0x01, 0x23, 0x00, 0x12, 0xbb, + 0x03, 0x68, 0x29, 0xcc, 0x51, 0x28, 0x00, 0xec, 0x09, 0xcc, 0x1e, 0xc1, + 0x00, 0xeb, 0x69, 0x15, 0xc3, 0x68, 0x2f, 0x16, 0x43, 0x68, 0x3b, 0x00, + 0x43, 0x68, 0x47, 0x45, 0x00, 0x5a, 0xc3, 0x68, 0x56, 0x46, 0x3b, 0xc5, + 0x43, 0x68, 0x69, 0x00, 0x43, 0x68, 0x74, 0x46, 0x00, 0x8b, 0x43, 0x68, + 0x80, 0x46, 0x00, 0x8b, 0x43, 0x68, 0x8c, 0x05, 0xc3, 0x68, 0xa7, 0xc5, + 0x1e, 0xc8, 0x00, 0xf5, 0xeb, 0x03, 0x68, 0xbf, 0xca, 0x9e, 0x5a, 0x00, + 0xf5, 0xd9, 0x06, 0xc3, 0x68, 0xc5, 0xc6, 0x60, 0xb1, 0x00, 0x08, 0x9b, + 0x03, 0x68, 0xcf, 0xce, 0x01, 0x19, 0x00, 0xec, 0x91, 0xc8, 0xbe, 0x9a, + 0x05, 0x59, 0xa1, 0xc5, 0x1f, 0x0c, 0x00, 0x07, 0x41, 0xc5, 0x31, 0xee, + 0x00, 0x07, 0x49, 0xc5, 0x1d, 0x88, 0x00, 0x08, 0xa9, 0xc6, 0xcc, 0x8f, + 0x00, 0x08, 0xc9, 0xce, 0x1d, 0x93, 0x00, 0x10, 0x39, 0xc6, 0x01, 0x73, + 0x00, 0x12, 0x39, 0xc5, 0x22, 0x9e, 0x01, 0x63, 0xc0, 0xc3, 0x00, 0x49, + 0x05, 0x39, 0x19, 0xc2, 0x00, 0x74, 0x05, 0x39, 0x28, 0x8a, 0x00, 0x07, + 0x80, 0x44, 0x00, 0x8c, 0xc3, 0x68, 0xd5, 0xc7, 0xa6, 0x69, 0x05, 0x3a, + 0xd8, 0x87, 0x00, 0x12, 0xc3, 0x03, 0x68, 0xdf, 0x8d, 0x0e, 0xf8, 0x19, + 0xc8, 0xbb, 0x8a, 0x0e, 0xf8, 0x09, 0x85, 0x01, 0x0c, 0x23, 0x03, 0x68, + 0xe5, 0xc6, 0x21, 0xa3, 0x00, 0x12, 0xe3, 0x03, 0x68, 0xeb, 0xcf, 0x61, + 0x6b, 0x00, 0x13, 0xf9, 0xc6, 0xd3, 0x2b, 0x05, 0x3f, 0xb0, 0xc8, 0xa6, + 0x68, 0x05, 0x3a, 0xe8, 0x04, 0xc3, 0x68, 0xf1, 0xc8, 0x61, 0x72, 0x0e, + 0xf8, 0x89, 0x05, 0xc3, 0x69, 0x00, 0xca, 0x64, 0x13, 0x00, 0xf1, 0xd9, + 0x42, 0x00, 0x58, 0xc3, 0x69, 0x18, 0xcc, 0x51, 0x28, 0x00, 0xec, 0x29, + 0x47, 0x04, 0xcb, 0xc3, 0x69, 0x27, 0xcf, 0x68, 0x64, 0x05, 0x59, 0xb9, + 0xce, 0x01, 0x19, 0x00, 0x13, 0x6b, 0x03, 0x69, 0x39, 0xcb, 0x8f, 0xb5, + 0x05, 0x3a, 0x49, 0xc5, 0x31, 0xee, 0x00, 0x09, 0xd1, 0xc6, 0x01, 0x73, + 0x00, 0x0a, 0x10, 0xc2, 0x25, 0xa1, 0x00, 0x13, 0x73, 0x03, 0x69, 0x3f, + 0xc5, 0xd9, 0x07, 0x05, 0x59, 0xa8, 0x46, 0x00, 0x8b, 0x43, 0x69, 0x45, + 0xcb, 0x90, 0x5a, 0x0e, 0xf8, 0x00, 0xc9, 0x16, 0x14, 0x00, 0xf0, 0xf9, + 0xcc, 0x51, 0x28, 0x00, 0xec, 0x11, 0xcc, 0x1e, 0xc1, 0x00, 0xeb, 0x71, + 0xc6, 0x01, 0x73, 0x05, 0x3c, 0xc9, 0xc4, 0x01, 0x23, 0x00, 0x0c, 0x90, + 0xc4, 0xb0, 0x8b, 0x00, 0xf7, 0xf9, 0xc5, 0x1e, 0xc8, 0x00, 0xf7, 0xc9, + 0xc4, 0x01, 0x23, 0x00, 0x0d, 0xa3, 0x03, 0x69, 0x4f, 0x06, 0xc3, 0x69, + 0x55, 0xc5, 0x1f, 0x0c, 0x00, 0xf7, 0x99, 0xca, 0x9e, 0xe6, 0x00, 0xf4, + 0xc9, 0x15, 0xc3, 0x69, 0x61, 0xc5, 0x31, 0xee, 0x00, 0x07, 0xe9, 0xca, + 0x08, 0xf6, 0x00, 0x0b, 0xb9, 0xc6, 0x60, 0xb1, 0x00, 0x11, 0x98, 0x47, + 0xc0, 0x2e, 0xc3, 0x69, 0x6d, 0xc8, 0xba, 0x02, 0x05, 0x3e, 0xb8, 0x44, + 0x05, 0x18, 0xc3, 0x69, 0x77, 0xc5, 0x31, 0xee, 0x00, 0xf1, 0xf9, 0xcc, + 0x51, 0x28, 0x00, 0xec, 0x31, 0xcc, 0x1e, 0xc1, 0x00, 0xeb, 0x79, 0xcc, + 0x4d, 0x15, 0x05, 0x59, 0xd1, 0xc4, 0x01, 0x23, 0x00, 0x13, 0x88, 0x45, + 0x00, 0x8c, 0xc3, 0x69, 0x83, 0xc3, 0x01, 0x5d, 0x00, 0x14, 0x4a, 0x03, + 0x69, 0xcf, 0xcc, 0x23, 0x3f, 0x00, 0xeb, 0xf8, 0x45, 0x00, 0x8c, 0xc3, + 0x69, 0xd5, 0xce, 0x74, 0x78, 0x05, 0x59, 0x88, 0xd4, 0x01, 0x13, 0x00, + 0xec, 0x80, 0x46, 0x00, 0x8b, 0x43, 0x6a, 0x1a, 0xd4, 0x3e, 0x6c, 0x05, + 0x39, 0xd8, 0xca, 0x9e, 0xe6, 0x00, 0xf4, 0xc1, 0x06, 0xc3, 0x6a, 0x26, + 0xc5, 0x31, 0xee, 0x00, 0xf4, 0x19, 0xc5, 0x1f, 0x0c, 0x00, 0xf4, 0x09, + 0xca, 0x08, 0xf6, 0x00, 0x0b, 0xa9, 0xc4, 0x01, 0x23, 0x01, 0x63, 0x98, + 0xca, 0x64, 0x13, 0x00, 0xf4, 0xb1, 0xcb, 0x97, 0x2f, 0x00, 0xf1, 0x59, + 0x05, 0xc3, 0x6a, 0x32, 0x06, 0xc3, 0x6a, 0x44, 0xc4, 0x01, 0x23, 0x00, + 0x13, 0x31, 0xc6, 0x01, 0x73, 0x00, 0x09, 0x39, 0xcc, 0x51, 0x28, 0x05, + 0x3c, 0xa8, 0xca, 0x1f, 0x59, 0x00, 0x13, 0x38, 0xca, 0x64, 0x13, 0x00, + 0xf4, 0xa9, 0x06, 0xc3, 0x6a, 0x56, 0x05, 0xc3, 0x6a, 0x62, 0xcc, 0x51, + 0x28, 0x00, 0xec, 0x71, 0xcc, 0x1e, 0xc1, 0x00, 0xeb, 0xb1, 0xce, 0x01, + 0x19, 0x00, 0x14, 0x81, 0xc5, 0x31, 0xee, 0x00, 0x0b, 0xd1, 0x15, 0xc3, + 0x6a, 0x74, 0xc4, 0x01, 0x23, 0x00, 0x11, 0x28, 0x06, 0xc3, 0x6a, 0x80, + 0xcc, 0x51, 0x28, 0x00, 0xec, 0x69, 0x42, 0x01, 0xc8, 0x43, 0x6a, 0x8c, + 0x06, 0xc3, 0x6a, 0x9b, 0xc5, 0x1e, 0xc8, 0x00, 0xf3, 0xe9, 0xcc, 0x51, + 0x28, 0x00, 0xec, 0x61, 0xc4, 0x01, 0x23, 0x00, 0x14, 0x59, 0xca, 0x9f, + 0x4a, 0x01, 0x63, 0x89, 0xc4, 0x00, 0x32, 0x01, 0x63, 0xa0, 0xc2, 0x10, + 0x11, 0x05, 0x3c, 0xd9, 0xc2, 0x49, 0x0c, 0x05, 0x3c, 0xe9, 0xc2, 0x0f, + 0xe1, 0x05, 0x3c, 0xf8, 0xc9, 0x16, 0x14, 0x00, 0xf2, 0xb9, 0xc5, 0x31, + 0xee, 0x00, 0xf2, 0xa9, 0xcc, 0x51, 0x28, 0x00, 0xec, 0x41, 0x15, 0xc3, + 0x6a, 0xa7, 0xcc, 0x1e, 0xc1, 0x00, 0xeb, 0x89, 0xc8, 0xbe, 0x9a, 0x05, + 0x3a, 0x99, 0xc4, 0x01, 0x23, 0x00, 0x0d, 0x28, 0x45, 0x00, 0x8c, 0xc3, + 0x6a, 0xb6, 0xd6, 0x2d, 0x78, 0x00, 0x0a, 0x48, 0xca, 0x64, 0x13, 0x00, + 0xf1, 0xa9, 0x06, 0xc3, 0x6a, 0xec, 0xc5, 0x31, 0xee, 0x00, 0xf1, 0x89, + 0xcc, 0x51, 0x28, 0x00, 0xec, 0x21, 0xc6, 0x01, 0x73, 0x05, 0x3a, 0x0b, + 0x03, 0x6a, 0xfe, 0x05, 0xc3, 0x6b, 0x04, 0xce, 0x38, 0xe6, 0x05, 0x3d, + 0x19, 0xc4, 0x01, 0x23, 0x00, 0x0c, 0xc8, 0xc6, 0x60, 0xb1, 0x00, 0xf1, + 0x09, 0xcc, 0x51, 0x28, 0x00, 0xec, 0x19, 0xc5, 0x31, 0xee, 0x00, 0x0f, + 0xa9, 0xc4, 0x01, 0x23, 0x00, 0x13, 0x01, 0x05, 0xc3, 0x6b, 0x10, 0xc5, + 0x1d, 0x88, 0x00, 0x08, 0xf9, 0xc9, 0x16, 0x14, 0x00, 0x09, 0x09, 0xce, + 0x38, 0xe6, 0x05, 0x3d, 0x09, 0xc6, 0x01, 0x73, 0x00, 0x0f, 0x28, 0x8b, + 0x05, 0x3d, 0xe9, 0x83, 0x05, 0x3d, 0xd9, 0x97, 0x05, 0x3d, 0xf9, 0xc4, + 0x00, 0xf0, 0x00, 0x12, 0x10, 0xca, 0x64, 0x13, 0x00, 0xf0, 0x39, 0x44, + 0x05, 0x18, 0xc3, 0x6b, 0x22, 0xcc, 0x51, 0x28, 0x00, 0xec, 0x01, 0xcc, + 0x1e, 0xc1, 0x00, 0xeb, 0x61, 0xc8, 0xbe, 0x9a, 0x05, 0x3c, 0xb9, 0xc6, + 0x01, 0x73, 0x00, 0x0c, 0x01, 0xc6, 0xcf, 0xcb, 0x00, 0x0c, 0x19, 0xc4, + 0x01, 0x23, 0x00, 0x12, 0x98, 0xca, 0xa4, 0x9a, 0x05, 0x5a, 0x69, 0x45, + 0x7b, 0x4a, 0x43, 0x6b, 0x2e, 0x91, 0x05, 0x59, 0xeb, 0x03, 0x6b, 0x3c, + 0x87, 0x05, 0x59, 0x90, 0x05, 0xc3, 0x6b, 0x42, 0xc6, 0x01, 0x73, 0x00, + 0x12, 0x48, 0xc4, 0x01, 0x23, 0x00, 0x15, 0x03, 0x03, 0x6b, 0x54, 0xd8, + 0x25, 0xeb, 0x05, 0x3a, 0xb9, 0xcf, 0x3e, 0xad, 0x05, 0x3a, 0xc8, 0x8e, + 0x07, 0xd8, 0x21, 0x8b, 0x07, 0xd8, 0x18, 0xc6, 0x00, 0xd3, 0x00, 0xf7, + 0xb0, 0x43, 0x05, 0x19, 0xc3, 0x6b, 0x5a, 0xc8, 0x20, 0xa9, 0x00, 0x0b, + 0xc0, 0x98, 0x00, 0xf7, 0xe1, 0xc2, 0x02, 0xa7, 0x00, 0xf7, 0xd0, 0xc5, + 0x05, 0x02, 0x00, 0xf2, 0x11, 0xc5, 0x00, 0xd4, 0x00, 0xf2, 0x00, 0x42, + 0x01, 0x23, 0xc3, 0x6b, 0x66, 0x06, 0xc3, 0x6b, 0x75, 0xc6, 0x60, 0xb1, + 0x00, 0x0b, 0x53, 0x03, 0x6b, 0x82, 0xc5, 0x1e, 0xc8, 0x00, 0x0b, 0x43, + 0x03, 0x6b, 0x88, 0x05, 0xc3, 0x6b, 0x8c, 0xc5, 0x1f, 0x0c, 0x00, 0x06, + 0xc9, 0xc5, 0x31, 0xee, 0x00, 0x06, 0xd1, 0xc6, 0x01, 0x73, 0x05, 0x3d, + 0xc1, 0xc5, 0x1d, 0x88, 0x00, 0x0b, 0x61, 0xca, 0x9e, 0xe6, 0x00, 0x0b, + 0x71, 0xce, 0x1d, 0x93, 0x00, 0x10, 0xb1, 0xc6, 0xcc, 0x8f, 0x00, 0x0b, + 0x90, 0xc2, 0x00, 0xc0, 0x00, 0x0d, 0x03, 0x03, 0x6b, 0x9b, 0xc8, 0x9e, + 0x5c, 0x00, 0xf6, 0x70, 0xc9, 0x08, 0xf7, 0x00, 0x06, 0xa3, 0x03, 0x6b, + 0xa1, 0xc4, 0x65, 0xe2, 0x00, 0x0e, 0x88, 0x11, 0xc3, 0x6b, 0xa7, 0xc8, + 0x20, 0xa9, 0x00, 0x06, 0xb2, 0x03, 0x6b, 0xb3, 0xc5, 0x60, 0xb2, 0x00, + 0x0a, 0x63, 0x03, 0x6b, 0xb9, 0xcb, 0x1e, 0xc2, 0x00, 0x0c, 0xf8, 0x45, + 0x02, 0x9a, 0x43, 0x6b, 0xbf, 0xca, 0x9b, 0xda, 0x00, 0x0f, 0xf0, 0xd1, + 0x53, 0x76, 0x05, 0x3a, 0x51, 0xc2, 0x00, 0x11, 0x05, 0x3a, 0x60, 0xcb, + 0x98, 0x58, 0x00, 0x0f, 0x60, 0x11, 0xc3, 0x6b, 0xd1, 0xc8, 0x20, 0xa9, + 0x00, 0x06, 0x7a, 0x03, 0x6b, 0xdd, 0xc6, 0x05, 0x01, 0x00, 0xf1, 0x60, + 0xc9, 0x08, 0xf7, 0x00, 0x06, 0x71, 0xc4, 0x65, 0xe2, 0x00, 0x0e, 0x78, + 0xc9, 0x08, 0xf7, 0x00, 0x06, 0x53, 0x03, 0x6b, 0xe3, 0xc6, 0xbd, 0xf4, + 0x00, 0x11, 0x43, 0x03, 0x6b, 0xe7, 0xc4, 0x65, 0xe2, 0x00, 0x08, 0xd0, + 0xc6, 0x05, 0x01, 0x00, 0xf0, 0xd0, 0x11, 0xc3, 0x6b, 0xed, 0xc8, 0x20, + 0xa9, 0x00, 0x06, 0x58, 0x45, 0x02, 0x9a, 0x43, 0x6b, 0xf9, 0xc8, 0x0f, + 0xbd, 0x00, 0x0d, 0xc1, 0xca, 0x8e, 0x61, 0x00, 0x0f, 0x70, 0x45, 0x02, + 0x9a, 0x43, 0x6c, 0x05, 0xc9, 0x08, 0xf7, 0x00, 0x06, 0x13, 0x03, 0x6c, + 0x23, 0xc4, 0x65, 0xe2, 0x00, 0x0e, 0x68, 0x11, 0xc3, 0x6c, 0x29, 0xc8, + 0x20, 0xa9, 0x00, 0x06, 0x22, 0x03, 0x6c, 0x35, 0xc5, 0x05, 0x02, 0x00, + 0xf0, 0x01, 0xc5, 0x00, 0xd4, 0x00, 0x06, 0x2a, 0x03, 0x6c, 0x3b, 0xc5, + 0x31, 0xee, 0x00, 0x0f, 0xe1, 0xc6, 0x60, 0xb1, 0x00, 0x0f, 0x10, 0xc5, + 0x05, 0x02, 0x00, 0xf3, 0x13, 0x03, 0x6c, 0x41, 0xc5, 0x00, 0xd4, 0x00, + 0xf3, 0x00, 0xc4, 0x65, 0xe2, 0x00, 0x0b, 0x03, 0x03, 0x6c, 0x47, 0xc9, + 0x08, 0xf7, 0x00, 0x0a, 0xe1, 0xc6, 0xbd, 0xf4, 0x00, 0x0a, 0xf1, 0xca, + 0xa7, 0x1a, 0x00, 0x10, 0xc0, 0xce, 0x16, 0x0f, 0x00, 0xf3, 0x20, 0xd3, + 0x42, 0x2f, 0x05, 0x3e, 0x49, 0xc9, 0xb4, 0xeb, 0x01, 0x63, 0xf0, 0x43, + 0x05, 0x19, 0xc3, 0x6c, 0x4d, 0xc8, 0x25, 0xfb, 0x01, 0x63, 0x58, 0xc9, + 0x08, 0xf7, 0x00, 0xf4, 0x81, 0xc4, 0x65, 0xe2, 0x00, 0x0b, 0xe8, 0xc5, + 0x01, 0x74, 0x00, 0x0d, 0xa9, 0xc9, 0xb4, 0xeb, 0x01, 0x63, 0xf8, 0x43, + 0x05, 0x19, 0xc3, 0x6c, 0x59, 0xc8, 0x20, 0xa9, 0x00, 0xf4, 0x20, 0xc8, + 0x0e, 0x6f, 0x00, 0xf3, 0xf1, 0xce, 0x3e, 0xae, 0x05, 0x3a, 0xf0, 0xcf, + 0x68, 0x82, 0x00, 0xf3, 0x81, 0xc6, 0xbd, 0xf4, 0x00, 0x0b, 0x11, 0xc4, + 0x65, 0xe2, 0x00, 0x0b, 0x21, 0xca, 0xa7, 0x1a, 0x00, 0x10, 0xd0, 0x43, + 0x05, 0x19, 0xc3, 0x6c, 0x65, 0xce, 0x3e, 0xae, 0x00, 0x11, 0xf0, 0xd2, + 0x25, 0xf1, 0x05, 0x3b, 0x30, 0xc4, 0xde, 0x3f, 0x01, 0x63, 0x80, 0xca, + 0x64, 0x13, 0x00, 0xf2, 0xf1, 0x42, 0x00, 0x58, 0xc3, 0x6c, 0x71, 0xce, + 0x38, 0xe6, 0x05, 0x3d, 0x31, 0xc5, 0x31, 0xee, 0x00, 0x0a, 0xa1, 0x05, + 0xc3, 0x6c, 0x7d, 0xce, 0x1d, 0x93, 0x00, 0x10, 0x91, 0xc6, 0x01, 0x73, + 0x00, 0x12, 0x61, 0xc4, 0x14, 0xa6, 0x01, 0x63, 0x20, 0xc5, 0x01, 0x74, + 0x01, 0x63, 0x1b, 0x03, 0x6c, 0x89, 0xcc, 0x89, 0x01, 0x05, 0x3a, 0xa0, + 0xcf, 0x68, 0x82, 0x00, 0xf2, 0x51, 0xc6, 0xbd, 0xf4, 0x00, 0x0a, 0x29, + 0xc4, 0x65, 0xe2, 0x00, 0x0a, 0x38, 0xc9, 0x64, 0x14, 0x00, 0xf2, 0x41, + 0xc8, 0x6d, 0x46, 0x00, 0x0c, 0xe9, 0xcd, 0x7b, 0x08, 0x00, 0x11, 0x00, + 0x43, 0x05, 0x19, 0xc3, 0x6c, 0x8f, 0xc8, 0x25, 0xfb, 0x05, 0x3c, 0x80, + 0xcf, 0x68, 0x82, 0x00, 0xf1, 0xe1, 0xc6, 0xbd, 0xf4, 0x00, 0x09, 0xd9, + 0xc4, 0x65, 0xe2, 0x00, 0x09, 0xe8, 0xc7, 0x0e, 0x70, 0x00, 0xf1, 0xb3, + 0x03, 0x6c, 0x9b, 0xc8, 0xa7, 0x26, 0x01, 0x63, 0x00, 0xc3, 0x02, 0xa3, + 0x00, 0x09, 0xf9, 0xc5, 0x1e, 0xc8, 0x01, 0x63, 0x10, 0xc5, 0x01, 0x74, + 0x00, 0x0a, 0x09, 0xcd, 0x6e, 0x05, 0x00, 0x0e, 0x40, 0xc2, 0x00, 0xb1, + 0x00, 0x11, 0xe9, 0xc3, 0x3a, 0xe6, 0x05, 0x3d, 0x68, 0xc8, 0x0e, 0x6f, + 0x00, 0xf1, 0x91, 0xce, 0x3e, 0xae, 0x05, 0x3a, 0x11, 0xc8, 0x25, 0xfb, + 0x01, 0x63, 0x48, 0xd4, 0x3e, 0xa8, 0x05, 0x3a, 0x20, 0xc6, 0xbd, 0xf4, + 0x00, 0x09, 0xb1, 0xc4, 0x65, 0xe2, 0x00, 0x0f, 0x40, 0xc6, 0xbd, 0xf4, + 0x00, 0xf1, 0x41, 0xc9, 0x08, 0xf7, 0x00, 0x09, 0x21, 0xc4, 0x65, 0xe2, + 0x00, 0x10, 0xf0, 0xc8, 0x20, 0xa9, 0x00, 0xf1, 0x31, 0x43, 0x05, 0x19, + 0xc3, 0x6c, 0xa1, 0xc8, 0x25, 0xfb, 0x01, 0x63, 0x38, 0xc9, 0x08, 0xf7, + 0x00, 0x08, 0xe1, 0xc6, 0xbd, 0xf4, 0x00, 0x09, 0x11, 0xc4, 0x65, 0xe2, + 0x00, 0x0f, 0x30, 0xcf, 0x68, 0x82, 0x00, 0xf0, 0x91, 0xc6, 0xbd, 0xf4, + 0x00, 0xf0, 0x81, 0xc4, 0x65, 0xe2, 0x00, 0x08, 0x70, 0xc5, 0x05, 0x02, + 0x00, 0xf0, 0x61, 0xc5, 0x00, 0xd4, 0x00, 0xf0, 0x50, 0xcd, 0x77, 0xa1, + 0x00, 0x0f, 0x93, 0x03, 0x6c, 0xad, 0xc5, 0x01, 0x74, 0x00, 0x08, 0x81, + 0xd3, 0x42, 0x2f, 0x05, 0x3e, 0x38, 0xc6, 0xbd, 0xf4, 0x00, 0x06, 0x3b, + 0x03, 0x6c, 0xb3, 0xc9, 0x08, 0xf7, 0x00, 0x08, 0x41, 0xc4, 0x65, 0xe2, + 0x00, 0x08, 0x60, 0xc5, 0x05, 0x02, 0x00, 0xf0, 0x21, 0xc5, 0x00, 0xd4, + 0x00, 0xf0, 0x10, 0xc9, 0x08, 0xf7, 0x00, 0x09, 0xa1, 0xcb, 0x4d, 0x16, + 0x05, 0x3d, 0x90, 0x45, 0x00, 0x8c, 0xc3, 0x6c, 0xb9, 0xc6, 0x10, 0x9d, + 0x01, 0x5b, 0x89, 0x4c, 0x14, 0x15, 0x43, 0x6c, 0xe3, 0xe0, 0x01, 0x47, + 0x01, 0x4b, 0x70, 0x46, 0x05, 0x39, 0x43, 0x6c, 0xe9, 0xc6, 0x44, 0x50, + 0x07, 0xd9, 0x59, 0xc7, 0x44, 0x4f, 0x07, 0xd9, 0x50, 0xc5, 0x64, 0xae, + 0x07, 0xd9, 0x81, 0xc5, 0x79, 0xbe, 0x07, 0xd9, 0x71, 0xc6, 0xcc, 0xe3, + 0x07, 0xd9, 0x78, 0xcc, 0x79, 0xeb, 0x05, 0x4b, 0x59, 0xc5, 0x8e, 0xdf, + 0x05, 0x4b, 0x21, 0xc6, 0xbb, 0xec, 0x05, 0x4b, 0x70, 0xc3, 0x39, 0x37, + 0x05, 0x4b, 0x61, 0x44, 0x3a, 0xbf, 0x43, 0x6c, 0xf5, 0xc6, 0xc1, 0x86, + 0x05, 0x4b, 0xc9, 0xc5, 0xc0, 0x7d, 0x00, 0x88, 0x20, 0xc6, 0xce, 0xb1, + 0x05, 0x4b, 0xc0, 0xc6, 0xd1, 0x57, 0x05, 0x4b, 0xa8, 0x0d, 0xc3, 0x6d, + 0x07, 0xc5, 0xd9, 0x61, 0x00, 0x89, 0x71, 0x16, 0xc3, 0x6d, 0x13, 0xc5, + 0xd6, 0x8c, 0x00, 0x89, 0x81, 0xc5, 0xda, 0xe7, 0x00, 0x89, 0x89, 0x12, + 0xc3, 0x6d, 0x1f, 0xc9, 0xad, 0x26, 0x00, 0x89, 0xa1, 0xc5, 0xb7, 0x9d, + 0x00, 0x89, 0xa9, 0x05, 0xc3, 0x6d, 0x2e, 0xc5, 0x90, 0xe4, 0x00, 0x89, + 0xd8, 0xc5, 0x90, 0xe4, 0x05, 0x4b, 0xd1, 0xc5, 0x79, 0xf2, 0x00, 0x8a, + 0xb0, 0xc5, 0x90, 0xe4, 0x05, 0x4b, 0xa1, 0x0d, 0xc3, 0x6d, 0x3a, 0x15, + 0xc3, 0x6d, 0x46, 0xc5, 0xd9, 0x61, 0x00, 0x88, 0xf9, 0x16, 0xc3, 0x6d, + 0x55, 0x05, 0xc3, 0x6d, 0x61, 0xc7, 0xba, 0x7b, 0x00, 0x89, 0x50, 0xc5, + 0xc0, 0x7d, 0x00, 0x8a, 0x11, 0xc6, 0xc1, 0x86, 0x00, 0x8a, 0x50, 0xc4, + 0x79, 0xf3, 0x00, 0x8a, 0x21, 0xc6, 0xca, 0x0e, 0x00, 0x8a, 0x31, 0xc6, + 0xba, 0x7c, 0x00, 0x8a, 0x58, 0xc4, 0xc6, 0x7a, 0x00, 0x8a, 0x41, 0xc6, + 0xc6, 0x79, 0x00, 0x8a, 0x48, 0xc5, 0xdb, 0xff, 0x05, 0x4b, 0x19, 0xc4, + 0xad, 0x2b, 0x05, 0x4b, 0x11, 0xc5, 0x79, 0xf2, 0x05, 0x4b, 0x09, 0xc5, + 0xda, 0xe7, 0x05, 0x4b, 0x01, 0xc6, 0x8e, 0xde, 0x00, 0x88, 0xb9, 0xc5, + 0xd6, 0x8c, 0x00, 0x8a, 0xf0, 0xc4, 0x79, 0xf3, 0x00, 0x89, 0x59, 0xc6, + 0xba, 0x7c, 0x00, 0x8a, 0xb8, 0x02, 0x43, 0x6d, 0x6d, 0x15, 0xc3, 0x6d, + 0x79, 0x05, 0x43, 0x6d, 0x85, 0xc3, 0x39, 0x37, 0x00, 0x89, 0xf1, 0x44, + 0x3a, 0xbf, 0x43, 0x6d, 0x91, 0xc4, 0xc6, 0x7a, 0x00, 0x8a, 0x81, 0xc6, + 0xc6, 0x79, 0x00, 0x8a, 0xa8, 0x91, 0x00, 0x8b, 0xb1, 0x97, 0x00, 0x8b, + 0xb9, 0xc2, 0x2c, 0x43, 0x00, 0x8d, 0x18, 0x02, 0x43, 0x6d, 0x9d, 0x87, + 0x00, 0x8b, 0x21, 0x02, 0x43, 0x6d, 0xb0, 0x91, 0x00, 0x8b, 0x3a, 0x03, + 0x6d, 0xbe, 0x02, 0x43, 0x6d, 0xc2, 0x02, 0x43, 0x6d, 0xdb, 0xc2, 0x27, + 0x51, 0x00, 0x8c, 0xb8, 0x02, 0x43, 0x6d, 0xfe, 0x02, 0x43, 0x6e, 0x0c, + 0x87, 0x00, 0x8c, 0x03, 0x03, 0x6e, 0x1f, 0x1b, 0xc3, 0x6e, 0x23, 0x91, + 0x00, 0x8c, 0x13, 0x03, 0x6e, 0x31, 0x97, 0x00, 0x8c, 0x18, 0x87, 0x00, + 0x8b, 0x58, 0x91, 0x00, 0x8b, 0x78, 0x83, 0x00, 0x8c, 0x4b, 0x03, 0x6e, + 0x37, 0xc5, 0xd9, 0xa7, 0x00, 0x8c, 0x59, 0xc2, 0x0c, 0x43, 0x00, 0x8c, + 0x63, 0x03, 0x6e, 0x3b, 0x97, 0x00, 0x8c, 0x69, 0xc3, 0xe5, 0xc9, 0x06, + 0xbd, 0xb0, 0x83, 0x00, 0x8c, 0xc3, 0x03, 0x6e, 0x3f, 0x1b, 0xc3, 0x6e, + 0x45, 0x91, 0x00, 0x8c, 0xd3, 0x03, 0x6e, 0x5b, 0x97, 0x00, 0x8c, 0xd9, + 0xc2, 0x2c, 0x43, 0x00, 0x8c, 0xe1, 0x8b, 0x06, 0xbe, 0x20, 0x02, 0x43, + 0x6e, 0x61, 0xc5, 0xda, 0xe7, 0x00, 0x8f, 0x11, 0x12, 0xc3, 0x6e, 0x8e, + 0xc5, 0xd6, 0x8c, 0x06, 0xbe, 0xe8, 0xc6, 0x8e, 0xde, 0x00, 0x8d, 0x49, + 0xc4, 0xad, 0x2b, 0x00, 0x8d, 0xdb, 0x03, 0x6e, 0x9a, 0xc5, 0xd6, 0x8c, + 0x00, 0x8e, 0x83, 0x03, 0x6e, 0x9e, 0xc8, 0xb7, 0x9a, 0x00, 0x8f, 0x71, + 0xc5, 0xb7, 0x9d, 0x00, 0x8f, 0x71, 0xc5, 0xd9, 0x61, 0x00, 0x8f, 0xf9, + 0xc6, 0xc0, 0x7c, 0x06, 0xbe, 0x6b, 0x03, 0x6e, 0xa4, 0xc5, 0xda, 0xe7, + 0x06, 0xbf, 0x01, 0xc5, 0x79, 0xf2, 0x06, 0xbf, 0x31, 0xc5, 0xdb, 0xff, + 0x06, 0xbf, 0xc8, 0x02, 0x43, 0x6e, 0xaa, 0x05, 0xc3, 0x6e, 0xcc, 0xc5, + 0x90, 0xe4, 0x00, 0x8d, 0x69, 0xc6, 0x8e, 0xde, 0x00, 0x8e, 0x29, 0x16, + 0xc3, 0x6e, 0xd8, 0xc4, 0xad, 0x2b, 0x00, 0x8e, 0x39, 0xc7, 0xca, 0x0d, + 0x00, 0x8e, 0x41, 0xc5, 0xd6, 0x8c, 0x06, 0xbe, 0x58, 0x02, 0x43, 0x6e, + 0xe4, 0x0d, 0xc3, 0x6f, 0x09, 0xc5, 0xda, 0xe7, 0x00, 0x8d, 0x8b, 0x03, + 0x6f, 0x1e, 0x12, 0xc3, 0x6f, 0x22, 0x15, 0xc3, 0x6f, 0x37, 0x16, 0xc3, + 0x6f, 0x43, 0xc5, 0x90, 0xe4, 0x00, 0x8d, 0xb1, 0xc5, 0xd9, 0x61, 0x00, + 0x8e, 0x69, 0x42, 0x0c, 0x43, 0x43, 0x6f, 0x52, 0xc6, 0x8e, 0xde, 0x00, + 0x8e, 0xd1, 0xc5, 0xd6, 0x8c, 0x00, 0x8e, 0xd9, 0x12, 0xc3, 0x6f, 0x61, + 0x15, 0xc3, 0x6f, 0x70, 0x05, 0xc3, 0x6f, 0x7c, 0xc5, 0x90, 0xe4, 0x00, + 0x8f, 0x09, 0xc5, 0xd9, 0x61, 0x06, 0xbe, 0xf0, 0x02, 0x43, 0x6f, 0x88, + 0x02, 0x43, 0x6f, 0xb6, 0x02, 0x43, 0x6f, 0xc8, 0x0d, 0xc3, 0x6f, 0xd4, + 0xcb, 0x8e, 0xd9, 0x00, 0x8f, 0x68, 0x02, 0x43, 0x6f, 0xe0, 0xc5, 0xd9, + 0x61, 0x00, 0x8f, 0xa9, 0xc5, 0xd6, 0x8c, 0x00, 0x8f, 0xb1, 0xc5, 0xda, + 0xe7, 0x00, 0x8f, 0xb9, 0xc5, 0x79, 0xf2, 0x00, 0x8f, 0xc0, 0x02, 0x43, + 0x6f, 0xec, 0xc4, 0x79, 0xf3, 0x01, 0x8b, 0xc1, 0xc6, 0xba, 0x7c, 0x01, + 0x8c, 0x20, 0xc6, 0x8e, 0xde, 0x01, 0x8b, 0xd1, 0xc5, 0xd9, 0x61, 0x01, + 0x8b, 0xd9, 0xc6, 0xc0, 0x7c, 0x01, 0x8b, 0xe1, 0xc5, 0x79, 0xf2, 0x01, + 0x8b, 0xe9, 0xc5, 0xdb, 0xff, 0x01, 0x8b, 0xf0, 0xc5, 0xd9, 0xca, 0x01, + 0x8b, 0x48, 0xc4, 0x79, 0xf3, 0x01, 0x89, 0xe3, 0x03, 0x70, 0x06, 0xc6, + 0xba, 0x7c, 0x01, 0x89, 0xf9, 0xc6, 0xca, 0x0e, 0x01, 0x8b, 0x60, 0xc6, + 0xc1, 0x86, 0x01, 0x89, 0xf1, 0xc5, 0xc0, 0x7d, 0x01, 0x8b, 0x50, 0xc4, + 0x79, 0xf3, 0x01, 0x8b, 0x71, 0xc6, 0xca, 0x0e, 0x01, 0x8b, 0x80, 0xc4, + 0xad, 0x2b, 0x01, 0x8a, 0x23, 0x03, 0x70, 0x0c, 0xc6, 0x8e, 0xde, 0x01, + 0x8b, 0x91, 0x16, 0xc3, 0x70, 0x10, 0xc5, 0xdb, 0xff, 0x01, 0x8b, 0xb0, + 0xc8, 0x90, 0xe1, 0x01, 0x8c, 0x30, 0x02, 0x43, 0x70, 0x1c, 0xc2, 0x19, + 0x2c, 0x01, 0x8c, 0x3b, 0x03, 0x70, 0x28, 0x8b, 0x01, 0x8c, 0x48, 0xc2, + 0x0c, 0x43, 0x01, 0x8c, 0x5b, 0x03, 0x70, 0x2c, 0x8b, 0x01, 0x8c, 0x60, + 0x83, 0x07, 0xfb, 0x61, 0x97, 0x07, 0xfb, 0x69, 0x91, 0x07, 0xfb, 0x70, + 0xc9, 0x57, 0x20, 0x0f, 0x64, 0xd8, 0xc8, 0x4b, 0x94, 0x0f, 0x64, 0x91, + 0xc7, 0x0d, 0x04, 0x0f, 0x64, 0x48, 0xc9, 0x57, 0x20, 0x0f, 0x64, 0xd0, + 0xc8, 0x4b, 0x94, 0x0f, 0x64, 0x89, 0xc7, 0x0d, 0x04, 0x0f, 0x64, 0x40, + 0xc9, 0x57, 0x20, 0x0f, 0x64, 0xc8, 0x00, 0x43, 0x70, 0x30, 0xc9, 0x57, + 0x20, 0x0f, 0x64, 0xc0, 0x00, 0x43, 0x70, 0x3c, 0xc9, 0x57, 0x20, 0x0f, + 0x64, 0xb8, 0x00, 0x43, 0x70, 0x48, 0xc9, 0x57, 0x20, 0x0f, 0x64, 0xb0, + 0x00, 0x43, 0x70, 0x54, 0x19, 0xc3, 0x70, 0x60, 0x0a, 0xc3, 0x70, 0x68, + 0xc2, 0x00, 0xc4, 0x01, 0x9f, 0x48, 0xc3, 0x09, 0x9e, 0x01, 0x9f, 0x1b, + 0x03, 0x70, 0x74, 0x0b, 0x43, 0x70, 0x7a, 0xc2, 0x22, 0xcc, 0x01, 0x9f, + 0x2b, 0x03, 0x70, 0x86, 0xc4, 0x18, 0x10, 0x01, 0x9f, 0x32, 0x03, 0x70, + 0x8c, 0xc4, 0x00, 0x2d, 0x01, 0x9f, 0x3b, 0x03, 0x70, 0x92, 0xc5, 0x66, + 0xb1, 0x01, 0x9f, 0x50, 0xc4, 0x14, 0x09, 0x01, 0x9f, 0x90, 0x91, 0x01, + 0x9a, 0xd1, 0x07, 0x43, 0x70, 0x98, 0xc3, 0x02, 0xdf, 0x01, 0x9a, 0xd9, + 0xc6, 0x52, 0xcd, 0x01, 0x9b, 0x28, 0xc4, 0x14, 0x09, 0x01, 0x9b, 0x30, + 0xc2, 0x00, 0x5f, 0x01, 0x9a, 0xe9, 0xc5, 0x14, 0x08, 0x01, 0x9b, 0x38, + 0xc4, 0x14, 0x09, 0x01, 0x9b, 0x40, 0xc4, 0x14, 0x09, 0x01, 0x9b, 0x48, + 0xc3, 0x03, 0x26, 0x01, 0x9b, 0x50, 0xd2, 0x4a, 0x2d, 0x0f, 0xd0, 0x31, + 0xce, 0x2a, 0xfe, 0x0f, 0xd0, 0x69, 0xdf, 0x0d, 0x00, 0x0f, 0xd0, 0xd9, + 0x16, 0x43, 0x70, 0xa7, 0xc5, 0xa8, 0xf7, 0x0f, 0xd2, 0x71, 0xc4, 0xde, + 0x83, 0x0f, 0xd2, 0x79, 0xc6, 0xca, 0xfd, 0x0f, 0xd2, 0x80, 0xce, 0x2a, + 0xfe, 0x0f, 0xd0, 0x49, 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0x98, 0xc7, 0x02, + 0x54, 0x01, 0x34, 0x31, 0xc8, 0x3e, 0xe6, 0x01, 0x4f, 0x60, 0xce, 0x3d, + 0x7c, 0x01, 0x2f, 0xb9, 0xcd, 0x02, 0xb4, 0x01, 0x2f, 0xa0, 0xce, 0x3d, + 0x7c, 0x01, 0x2f, 0xb1, 0xcd, 0x02, 0xb4, 0x01, 0x2f, 0xa8, 0xce, 0x61, + 0x30, 0x01, 0x3f, 0x29, 0xce, 0x13, 0x5f, 0x01, 0x2d, 0x10, 0xcd, 0x6f, + 0x2b, 0x01, 0x3f, 0x21, 0x45, 0x00, 0x27, 0x43, 0x70, 0xb3, 0xce, 0x3d, + 0x7c, 0x01, 0x2f, 0x99, 0xcd, 0x02, 0xb4, 0x01, 0x2f, 0x80, 0x00, 0x43, + 0x70, 0xbf, 0xc9, 0x57, 0x20, 0x08, 0x4f, 0xa8, 0xc9, 0x57, 0x20, 0x08, + 0x4f, 0xa0, 0xc7, 0x0d, 0x04, 0x08, 0x4e, 0xc3, 0x03, 0x70, 0xcb, 0xc8, + 0x4b, 0x94, 0x08, 0x4f, 0x08, 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x50, 0xc7, + 0x0d, 0x04, 0x08, 0x4e, 0xbb, 0x03, 0x70, 0xd1, 0xc8, 0x4b, 0x94, 0x08, + 0x4f, 0x00, 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x48, 0x00, 0x43, 0x70, 0xd7, + 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x40, 0x00, 0x43, 0x70, 0xe6, 0xc9, 0x57, + 0x20, 0x08, 0x4f, 0x38, 0x00, 0x43, 0x70, 0xf5, 0xc9, 0x57, 0x20, 0x08, + 0x4f, 0x30, 0x00, 0x43, 0x71, 0x04, 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x28, + 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x68, 0xc4, 0x03, 0xc8, 0x01, 0x4d, 0x79, + 0xc2, 0x02, 0xae, 0x01, 0x4d, 0x68, 0xc4, 0x03, 0xc8, 0x01, 0x4d, 0x71, + 0xc2, 0x02, 0xae, 0x01, 0x4d, 0x60, 0xc4, 0x00, 0x49, 0x01, 0x4d, 0x59, + 0xc5, 0x00, 0x2c, 0x01, 0x4d, 0x50, 0xc4, 0x00, 0x49, 0x01, 0x4d, 0x49, + 0xc5, 0x00, 0x2c, 0x01, 0x4d, 0x40, 0x83, 0x00, 0xc5, 0x29, 0xc2, 0x00, + 0xd0, 0x00, 0xc5, 0x20, 0xc2, 0x19, 0x2c, 0x00, 0xc5, 0x19, 0x83, 0x00, + 0xc4, 0xe0, 0xc2, 0x00, 0xd0, 0x00, 0xc5, 0x09, 0xc3, 0x40, 0xe2, 0x00, + 0xc4, 0xf8, 0x83, 0x00, 0xc5, 0x01, 0xc2, 0x01, 0x6f, 0x00, 0xc4, 0xf0, + 0xc5, 0x7c, 0x16, 0x00, 0xc5, 0x49, 0xc4, 0xe4, 0xa3, 0x00, 0xc4, 0x10, + 0xc2, 0x00, 0xd0, 0x00, 0xc4, 0x69, 0x83, 0x00, 0xc4, 0x60, 0xc3, 0xb4, + 0xa6, 0x00, 0xc4, 0xc9, 0xc2, 0x01, 0x6f, 0x00, 0xc4, 0xc0, 0x8e, 0x08, + 0xb0, 0x48, 0x94, 0x08, 0xb0, 0x38, 0xc4, 0x89, 0xfe, 0x00, 0xed, 0xf9, + 0x46, 0x45, 0x87, 0xc3, 0x71, 0x13, 0x46, 0x00, 0x8b, 0xc3, 0x71, 0x45, + 0xc9, 0xad, 0x80, 0x00, 0xea, 0xa1, 0xd3, 0x45, 0x14, 0x08, 0x3d, 0x59, + 0xc9, 0xab, 0x40, 0x08, 0x3d, 0x63, 0x03, 0x71, 0x51, 0xcb, 0x8d, 0x37, + 0x08, 0x3d, 0x70, 0xc2, 0x25, 0xa1, 0x00, 0xed, 0xf1, 0xc2, 0x01, 0xe2, + 0x00, 0xed, 0xa1, 0xc2, 0x00, 0x8e, 0x00, 0xec, 0xf1, 0xc2, 0x00, 0x75, + 0x00, 0xea, 0x88, 0x46, 0x00, 0x8b, 0x43, 0x71, 0x57, 0x46, 0x00, 0x8b, + 0x43, 0x71, 0x63, 0x47, 0x0b, 0x18, 0xc3, 0x71, 0x6f, 0xca, 0x45, 0x1d, + 0x00, 0xec, 0xe9, 0xc2, 0x00, 0x0a, 0x00, 0xeb, 0x09, 0x46, 0x17, 0x8d, + 0x43, 0x71, 0xa8, 0xc6, 0x10, 0x3f, 0x00, 0xed, 0xb9, 0x00, 0x43, 0x71, + 0xb4, 0x46, 0x00, 0x8b, 0xc3, 0x71, 0xc0, 0x05, 0xc3, 0x71, 0xcc, 0xc9, + 0xa8, 0x94, 0x00, 0xea, 0xc8, 0xc2, 0x00, 0x0a, 0x00, 0xed, 0x90, 0xc7, + 0xc3, 0x76, 0x00, 0xed, 0x89, 0xc3, 0x04, 0x87, 0x00, 0xea, 0xe9, 0xcc, + 0x8b, 0x95, 0x00, 0xea, 0xa9, 0xca, 0x1f, 0x59, 0x08, 0x3c, 0x28, 0xce, + 0x01, 0x19, 0x00, 0xed, 0x79, 0xc9, 0x6d, 0x45, 0x00, 0xed, 0x70, 0xca, + 0x1f, 0x59, 0x00, 0xed, 0x60, 0x46, 0x00, 0x8b, 0xc3, 0x71, 0xd8, 0xca, + 0x9f, 0xcc, 0x05, 0x3f, 0xc9, 0xc9, 0xab, 0x40, 0x08, 0x3c, 0xc9, 0xc9, + 0xa8, 0x67, 0x08, 0x3c, 0xd1, 0xc3, 0xe6, 0x41, 0x08, 0x3c, 0xf2, 0x03, + 0x71, 0xf9, 0xd2, 0x4d, 0x0f, 0x00, 0xed, 0x40, 0xc3, 0x01, 0xbb, 0x00, + 0xed, 0x29, 0xcc, 0x23, 0x3f, 0x00, 0xed, 0x20, 0xd4, 0x3b, 0xc4, 0x00, + 0xed, 0x0b, 0x03, 0x71, 0xff, 0x07, 0xc3, 0x72, 0x05, 0x46, 0x00, 0x8b, + 0xc3, 0x72, 0x11, 0xc9, 0xa8, 0x67, 0x08, 0x3c, 0x3a, 0x03, 0x72, 0x20, + 0xcb, 0x92, 0x5f, 0x08, 0x3c, 0x80, 0x48, 0x10, 0x2f, 0xc3, 0x72, 0x26, + 0xc8, 0xb7, 0xda, 0x08, 0x3c, 0x89, 0x46, 0x00, 0x8b, 0x43, 0x72, 0x36, + 0x45, 0x29, 0xb4, 0xc3, 0x72, 0x42, 0xc4, 0x38, 0x2c, 0x00, 0x17, 0x01, + 0xca, 0x1f, 0x59, 0x08, 0x3c, 0x98, 0xc2, 0x00, 0x74, 0x00, 0xea, 0xe1, + 0xc4, 0xde, 0x3f, 0x00, 0xea, 0x29, 0x87, 0x08, 0x3c, 0x18, 0x44, 0x05, + 0x76, 0xc3, 0x72, 0x4e, 0xcc, 0x23, 0x3f, 0x08, 0x3d, 0x10, 0xc3, 0x0a, + 0xe3, 0x05, 0x5a, 0xe3, 0x03, 0x72, 0x56, 0x46, 0x00, 0x8b, 0x43, 0x72, + 0x5c, 0x48, 0x10, 0x2f, 0x43, 0x72, 0x68, 0x97, 0x00, 0xe9, 0xe8, 0xcc, + 0x23, 0x3f, 0x05, 0x3f, 0xc0, 0xc7, 0xc3, 0x84, 0x00, 0xe9, 0x78, 0x87, + 0x00, 0xe9, 0x68, 0xc4, 0x2a, 0xa0, 0x05, 0x38, 0x01, 0xc5, 0xdb, 0xcd, + 0x05, 0x38, 0x11, 0xc2, 0x00, 0xe3, 0x05, 0x38, 0x21, 0xc2, 0x17, 0x99, + 0x05, 0x38, 0x30, 0xc4, 0x2a, 0xa0, 0x05, 0x38, 0x09, 0xc5, 0xdb, 0xcd, + 0x05, 0x38, 0x19, 0xc2, 0x00, 0xe3, 0x05, 0x38, 0x29, 0xc2, 0x17, 0x99, + 0x05, 0x38, 0x38, 0xcc, 0x23, 0x33, 0x00, 0x16, 0x0b, 0x03, 0x72, 0x70, + 0xc5, 0x0a, 0x8a, 0x00, 0x15, 0xe8, 0xe0, 0x01, 0x07, 0x08, 0x3d, 0xc8, + 0xcd, 0x36, 0x86, 0x00, 0x16, 0x61, 0xc6, 0x60, 0xb1, 0x00, 0x16, 0x69, + 0xcc, 0x1f, 0x0c, 0x00, 0x16, 0x71, 0xcc, 0x83, 0x0d, 0x00, 0x16, 0x79, + 0x42, 0x00, 0x58, 0xc3, 0x72, 0x76, 0x44, 0x00, 0x49, 0xc3, 0x72, 0x82, + 0xd9, 0x1d, 0x6f, 0x05, 0x38, 0xf9, 0x16, 0xc3, 0x72, 0x91, 0xcc, 0x4d, + 0x8d, 0x00, 0x17, 0x81, 0x42, 0x00, 0x2c, 0xc3, 0x72, 0x9d, 0xd1, 0x08, + 0xf6, 0x05, 0x3c, 0x40, 0xc5, 0x18, 0x25, 0x00, 0x15, 0xd1, 0xca, 0x2d, + 0x84, 0x00, 0x17, 0x70, 0xc9, 0x03, 0xde, 0x00, 0x16, 0x29, 0xc4, 0x32, + 0xbc, 0x00, 0x16, 0xa8, 0xcc, 0x07, 0xbb, 0x05, 0x38, 0xb9, 0xc5, 0x03, + 0x02, 0x05, 0x38, 0xc1, 0xce, 0x0e, 0xf1, 0x05, 0x38, 0xc8, 0x00, 0xc3, + 0x72, 0xa9, 0x44, 0x04, 0xce, 0x43, 0x72, 0xbb, 0x47, 0x19, 0x7a, 0xc3, + 0x72, 0xc7, 0xd2, 0x4e, 0x89, 0x05, 0x38, 0x91, 0xc8, 0x4e, 0x93, 0x00, + 0x17, 0x28, 0x47, 0x19, 0x7a, 0xc3, 0x72, 0xd3, 0xd2, 0x4e, 0x89, 0x05, + 0x38, 0xb1, 0xc8, 0x4e, 0x93, 0x00, 0x17, 0x48, 0xc8, 0x4e, 0x93, 0x05, + 0x38, 0x49, 0xd2, 0x4e, 0x89, 0x05, 0x38, 0x70, 0xc3, 0x11, 0x7e, 0x0e, + 0xb6, 0xd1, 0xc5, 0xd8, 0x8f, 0x0e, 0xb6, 0x80, 0xc3, 0x11, 0x7e, 0x0e, + 0xba, 0x71, 0xc5, 0xd8, 0x8f, 0x0e, 0xba, 0x20, 0xc3, 0x11, 0x7e, 0x0e, + 0xb9, 0xa1, 0xc5, 0xd8, 0x8f, 0x0e, 0xb9, 0x50, 0xc7, 0x00, 0x90, 0x0e, + 0xb9, 0x68, 0xc4, 0x18, 0x10, 0x0e, 0xbf, 0x99, 0xc2, 0x22, 0xcc, 0x0e, + 0xbf, 0x90, 0xc3, 0x0d, 0x14, 0x0e, 0xbf, 0x89, 0xc3, 0x09, 0x9e, 0x0e, + 0xbf, 0x80, 0xc4, 0x02, 0xde, 0x0e, 0xbf, 0x79, 0xc2, 0x02, 0xa0, 0x0e, + 0xbf, 0x70, 0xc8, 0x9c, 0x0e, 0x0e, 0xbe, 0x49, 0xc9, 0xaa, 0x9e, 0x0e, + 0xbe, 0x39, 0xd3, 0x43, 0x00, 0x0e, 0xbe, 0x18, 0x91, 0x0e, 0xb3, 0x23, + 0x03, 0x72, 0xdf, 0x92, 0x0e, 0xb3, 0x2b, 0x03, 0x72, 0xe3, 0x85, 0x0e, + 0xb2, 0xc3, 0x03, 0x72, 0xf3, 0x97, 0x0e, 0xb3, 0x53, 0x03, 0x72, 0xf9, + 0x96, 0x0e, 0xb3, 0x4b, 0x03, 0x72, 0xff, 0x95, 0x0e, 0xb3, 0x43, 0x03, + 0x73, 0x0b, 0x88, 0x0e, 0xb2, 0xdb, 0x03, 0x73, 0x11, 0x94, 0x0e, 0xb3, + 0x3b, 0x03, 0x73, 0x17, 0x9a, 0x0e, 0xb3, 0x6b, 0x03, 0x73, 0x1d, 0x90, + 0x0e, 0xb3, 0x1b, 0x03, 0x73, 0x21, 0x8f, 0x0e, 0xb3, 0x13, 0x03, 0x73, + 0x25, 0x8e, 0x0e, 0xb3, 0x0b, 0x03, 0x73, 0x29, 0x8d, 0x0e, 0xb3, 0x03, + 0x03, 0x73, 0x2f, 0x8b, 0x0e, 0xb2, 0xf3, 0x03, 0x73, 0x35, 0x87, 0x0e, + 0xb2, 0xd3, 0x03, 0x73, 0x3b, 0x9c, 0x0e, 0xb3, 0x7b, 0x03, 0x73, 0x47, + 0x86, 0x0e, 0xb2, 0xcb, 0x03, 0x73, 0x4d, 0x89, 0x0e, 0xb2, 0xe3, 0x03, + 0x73, 0x53, 0x84, 0x0e, 0xb2, 0xbb, 0x03, 0x73, 0x59, 0x83, 0x0e, 0xb2, + 0xb3, 0x03, 0x73, 0x5f, 0x9b, 0x0e, 0xb3, 0x71, 0x99, 0x0e, 0xb3, 0x61, + 0x98, 0x0e, 0xb3, 0x59, 0x93, 0x0e, 0xb3, 0x31, 0x8c, 0x0e, 0xb2, 0xf9, + 0x8a, 0x0e, 0xb2, 0xe8, 0x91, 0x0e, 0xb2, 0x53, 0x03, 0x73, 0x65, 0x92, + 0x0e, 0xb2, 0x5b, 0x03, 0x73, 0x69, 0x85, 0x0e, 0xb1, 0xf3, 0x03, 0x73, + 0x79, 0x97, 0x0e, 0xb2, 0x83, 0x03, 0x73, 0x7f, 0x96, 0x0e, 0xb2, 0x7b, + 0x03, 0x73, 0x85, 0x95, 0x0e, 0xb2, 0x73, 0x03, 0x73, 0x94, 0x94, 0x0e, + 0xb2, 0x6b, 0x03, 0x73, 0x9a, 0x9a, 0x0e, 0xb2, 0x9b, 0x03, 0x73, 0xa0, + 0x90, 0x0e, 0xb2, 0x4b, 0x03, 0x73, 0xa4, 0x8f, 0x0e, 0xb2, 0x43, 0x03, + 0x73, 0xa8, 0x8e, 0x0e, 0xb2, 0x3b, 0x03, 0x73, 0xac, 0x8d, 0x0e, 0xb2, + 0x33, 0x03, 0x73, 0xb2, 0x8b, 0x0e, 0xb2, 0x23, 0x03, 0x73, 0xb8, 0x87, + 0x0e, 0xb2, 0x03, 0x03, 0x73, 0xbe, 0x9c, 0x0e, 0xb2, 0xab, 0x03, 0x73, + 0xca, 0x86, 0x0e, 0xb1, 0xfb, 0x03, 0x73, 0xd0, 0x89, 0x0e, 0xb2, 0x13, + 0x03, 0x73, 0xd6, 0x84, 0x0e, 0xb1, 0xeb, 0x03, 0x73, 0xdc, 0x83, 0x0e, + 0xb1, 0xe3, 0x03, 0x73, 0xe2, 0x9b, 0x0e, 0xb2, 0xa1, 0x99, 0x0e, 0xb2, + 0x91, 0x98, 0x0e, 0xb2, 0x89, 0x93, 0x0e, 0xb2, 0x61, 0x8c, 0x0e, 0xb2, + 0x29, 0x8a, 0x0e, 0xb2, 0x19, 0x88, 0x0e, 0xb2, 0x08, 0x0f, 0x43, 0x73, + 0xe8, 0xc2, 0x00, 0xba, 0x0e, 0xbc, 0x39, 0xc2, 0x00, 0x0a, 0x0e, 0xbc, + 0x29, 0x8b, 0x0e, 0xbb, 0xf8, 0xc2, 0x00, 0x0a, 0x0e, 0xbc, 0x30, 0xc6, + 0x10, 0x3f, 0x0e, 0xbc, 0x20, 0xc2, 0x20, 0xec, 0x0e, 0xbc, 0x19, 0xc4, + 0x89, 0xfe, 0x0e, 0xbb, 0xb8, 0xc4, 0x1a, 0x73, 0x0e, 0xbc, 0x10, 0xca, + 0x91, 0x2c, 0x0e, 0xbc, 0x08, 0xc2, 0x01, 0x23, 0x0e, 0xbc, 0x00, 0x8b, + 0x0e, 0xbb, 0xe8, 0x97, 0x0e, 0xbb, 0xe0, 0x97, 0x0e, 0xbb, 0xd8, 0xc4, + 0xdd, 0x9a, 0x0e, 0xbb, 0xd0, 0xc4, 0x8b, 0x66, 0x0e, 0xbb, 0xc8, 0xc3, + 0x01, 0xbb, 0x0e, 0xbb, 0xc0, 0xc2, 0x01, 0x6f, 0x0e, 0xbb, 0xb1, 0xc6, + 0x10, 0x3f, 0x0e, 0xbb, 0xa0, 0xc3, 0x04, 0x87, 0x0e, 0xbb, 0xa8, 0xc4, + 0xdb, 0x4c, 0x0e, 0xbb, 0x98, 0xc4, 0x38, 0x2c, 0x0e, 0xbb, 0x90, 0xc3, + 0x04, 0x87, 0x0e, 0xbb, 0x88, 0xc4, 0xde, 0x3f, 0x0e, 0xbb, 0x80, 0x0f, + 0x43, 0x73, 0xf4, 0xc2, 0x00, 0xba, 0x0e, 0xbb, 0x69, 0xc2, 0x00, 0x0a, + 0x0e, 0xbb, 0x59, 0x8b, 0x0e, 0xbb, 0x28, 0xc2, 0x00, 0x0a, 0x0e, 0xbb, + 0x60, 0xc6, 0x10, 0x3f, 0x0e, 0xbb, 0x50, 0xc2, 0x20, 0xec, 0x0e, 0xbb, + 0x49, 0xc4, 0x89, 0xfe, 0x0e, 0xba, 0xea, 0x03, 0x74, 0x00, 0xc4, 0x1a, + 0x73, 0x0e, 0xbb, 0x40, 0xc2, 0x01, 0x23, 0x0e, 0xbb, 0x30, 0x8b, 0x0e, + 0xbb, 0x18, 0x97, 0x0e, 0xbb, 0x10, 0x97, 0x0e, 0xbb, 0x08, 0xc4, 0xdd, + 0x9a, 0x0e, 0xbb, 0x00, 0xc4, 0x8b, 0x66, 0x0e, 0xba, 0xf8, 0xc3, 0x01, + 0xbb, 0x0e, 0xba, 0xf0, 0xc2, 0x01, 0x6f, 0x0e, 0xba, 0xe1, 0xc6, 0x10, + 0x3f, 0x0e, 0xba, 0xd0, 0xc3, 0x04, 0x87, 0x0e, 0xba, 0xd8, 0xc4, 0xdb, + 0x4c, 0x0e, 0xba, 0xc8, 0xc4, 0x38, 0x2c, 0x0e, 0xba, 0xc0, 0xc3, 0x04, + 0x87, 0x0e, 0xba, 0xb8, 0xc4, 0xde, 0x3f, 0x0e, 0xba, 0xb0, 0x8e, 0x00, + 0x6b, 0xf2, 0x03, 0x74, 0x06, 0x90, 0x00, 0x6b, 0xd0, 0x08, 0xc3, 0x74, + 0x0a, 0x07, 0xc3, 0x74, 0x16, 0x52, 0x48, 0xa1, 0xc3, 0x74, 0x22, 0xc9, + 0xb2, 0xe1, 0x0e, 0x8f, 0x19, 0xca, 0xa6, 0x7a, 0x0e, 0x8f, 0x11, 0xcf, + 0x61, 0xc5, 0x0e, 0x8f, 0x09, 0xc6, 0xcb, 0x39, 0x0e, 0x8e, 0xf0, 0xc7, + 0xc8, 0xe7, 0x0e, 0x8e, 0xd8, 0x84, 0x0e, 0x8e, 0x91, 0x49, 0x32, 0x9d, + 0x43, 0x74, 0x2e, 0x42, 0x02, 0x2f, 0xc3, 0x74, 0x3a, 0xc3, 0x61, 0xff, + 0x0e, 0x88, 0x58, 0x1a, 0xc3, 0x74, 0x46, 0xcc, 0x82, 0x29, 0x0e, 0x88, + 0x00, 0x44, 0x28, 0xcb, 0xc3, 0x74, 0x4e, 0xcb, 0x96, 0x11, 0x0e, 0x88, + 0x28, 0xcc, 0x81, 0x69, 0x0e, 0x8e, 0xe9, 0x44, 0xa1, 0x2c, 0x43, 0x74, + 0x5a, 0xc7, 0xc7, 0xc8, 0x0e, 0x8e, 0xcb, 0x03, 0x74, 0x66, 0xc5, 0xda, + 0x4c, 0x0e, 0x8e, 0xa0, 0xca, 0xa5, 0xe4, 0x0e, 0x8e, 0xe0, 0x5b, 0x15, + 0x0f, 0xc3, 0x74, 0x6c, 0x59, 0x15, 0x11, 0x43, 0x74, 0x7b, 0x00, 0x43, + 0x74, 0x8a, 0x46, 0x01, 0x94, 0x43, 0x74, 0x96, 0x4c, 0x8b, 0xe9, 0xc3, + 0x74, 0xa2, 0xce, 0x74, 0x94, 0x0e, 0x88, 0xc0, 0x0b, 0xc3, 0x74, 0xae, + 0x4f, 0x60, 0x5d, 0x43, 0x74, 0xba, 0xc3, 0xe6, 0x2f, 0x0e, 0x8e, 0x79, + 0xc7, 0xb2, 0x1d, 0x0e, 0x8c, 0x90, 0x0f, 0xc3, 0x74, 0xc6, 0xc2, 0x0e, + 0x9a, 0x0e, 0x88, 0x60, 0x48, 0xbb, 0xc2, 0xc3, 0x74, 0xd2, 0x49, 0xb1, + 0x67, 0x43, 0x74, 0xde, 0xc4, 0x03, 0xc8, 0x0e, 0x8d, 0x91, 0xc2, 0x02, + 0xae, 0x0e, 0x8d, 0x88, 0x48, 0xb7, 0xd2, 0x43, 0x74, 0xea, 0x00, 0x43, + 0x74, 0xf6, 0xc5, 0x02, 0xc2, 0x0e, 0x8a, 0x99, 0xc5, 0x01, 0xfc, 0x0e, + 0x8a, 0x90, 0xc5, 0x5e, 0x2d, 0x0e, 0x89, 0xd1, 0xd0, 0x5e, 0x22, 0x0e, + 0x89, 0x48, 0x07, 0xc3, 0x75, 0x02, 0x42, 0x00, 0x3a, 0x43, 0x75, 0x0c, + 0xc6, 0x2c, 0xfc, 0x0e, 0x8b, 0xc9, 0xc4, 0xdf, 0x3b, 0x0e, 0x8b, 0xb9, + 0xc3, 0x1e, 0x19, 0x0e, 0x8b, 0xa9, 0xc4, 0xd8, 0xf4, 0x0e, 0x8b, 0x98, + 0x00, 0x43, 0x75, 0x16, 0xc5, 0x02, 0xc2, 0x0e, 0x8e, 0x01, 0xc5, 0x01, + 0xfc, 0x0e, 0x8d, 0xf8, 0xc3, 0x08, 0x7c, 0x0e, 0x8c, 0x89, 0x43, 0xb1, + 0x5e, 0x43, 0x75, 0x22, 0x10, 0xc3, 0x75, 0x2e, 0xcd, 0x7d, 0x03, 0x0e, + 0x88, 0xd0, 0xc4, 0x03, 0xc8, 0x0e, 0x89, 0x69, 0xc2, 0x02, 0xae, 0x0e, + 0x89, 0x60, 0x48, 0xb7, 0xd2, 0x43, 0x75, 0x3a, 0xc6, 0x05, 0x01, 0x0e, + 0x88, 0x88, 0xc2, 0x15, 0x10, 0x0e, 0x8d, 0xa3, 0x03, 0x75, 0x46, 0xc5, + 0xd6, 0x5a, 0x0e, 0x88, 0x51, 0xc7, 0xc9, 0x65, 0x0e, 0x88, 0x49, 0xcc, + 0x81, 0xd5, 0x0e, 0x88, 0x20, 0xca, 0x9e, 0xa0, 0x0e, 0x8d, 0x49, 0xc9, + 0xb2, 0x1b, 0x0e, 0x8c, 0x98, 0xc4, 0x35, 0x36, 0x0e, 0x89, 0x59, 0xc5, + 0xa2, 0xba, 0x0e, 0x89, 0x51, 0xc7, 0x44, 0x3c, 0x0e, 0x88, 0x08, 0x9f, + 0x0e, 0x89, 0x31, 0x9e, 0x0e, 0x89, 0x28, 0xc4, 0x23, 0x2e, 0x0e, 0x8a, + 0xe9, 0xc4, 0x2c, 0x0d, 0x0e, 0x89, 0xd8, 0xca, 0xa1, 0x2a, 0x0e, 0x8d, + 0x81, 0xc4, 0x23, 0x2e, 0x0e, 0x8a, 0xf1, 0xc4, 0x2c, 0x0d, 0x0e, 0x89, + 0xe0, 0xc9, 0xab, 0x13, 0x0e, 0x8d, 0x41, 0xc6, 0x2c, 0xfc, 0x0e, 0x8b, + 0xd1, 0xc4, 0xdf, 0x3b, 0x0e, 0x8b, 0xc1, 0xc3, 0x1e, 0x19, 0x0e, 0x8b, + 0xb1, 0xc4, 0xd8, 0xf4, 0x0e, 0x8b, 0xa0, 0xc4, 0x23, 0x2e, 0x0e, 0x8b, + 0x01, 0xc4, 0x2c, 0x0d, 0x0e, 0x89, 0xf0, 0xc4, 0x03, 0xc8, 0x0e, 0x89, + 0x79, 0xc2, 0x02, 0xae, 0x0e, 0x89, 0x70, 0x9e, 0x0e, 0x8c, 0xdb, 0x03, + 0x75, 0x4c, 0xa6, 0x0e, 0x8d, 0x19, 0xa5, 0x0e, 0x8d, 0x11, 0xa4, 0x0e, + 0x8d, 0x09, 0xa3, 0x0e, 0x8d, 0x01, 0xa2, 0x0e, 0x8c, 0xf9, 0xa1, 0x0e, + 0x8c, 0xf1, 0xa0, 0x0e, 0x8c, 0xe9, 0x9f, 0x0e, 0x8c, 0xe0, 0x57, 0x28, + 0xe4, 0xc3, 0x75, 0x54, 0xcb, 0x74, 0x97, 0x0e, 0x88, 0xb0, 0xc5, 0xd7, + 0x6d, 0x0e, 0x89, 0xb9, 0xc4, 0xe2, 0x4b, 0x0e, 0x89, 0xb0, 0xc9, 0xa8, + 0x79, 0x0e, 0x8c, 0x61, 0xcf, 0x61, 0xf2, 0x0e, 0x88, 0x38, 0x44, 0x61, + 0xf8, 0xc3, 0x75, 0x60, 0xd3, 0x44, 0x30, 0x0e, 0x88, 0x18, 0xc4, 0x23, + 0x2e, 0x0e, 0x8a, 0xf9, 0xc4, 0x2c, 0x0d, 0x0e, 0x89, 0xe9, 0x45, 0x2b, + 0x5f, 0x43, 0x75, 0x6c, 0xc5, 0xd7, 0x6d, 0x0e, 0x89, 0xc9, 0xc4, 0xe2, + 0x4b, 0x0e, 0x89, 0xc0, 0xc8, 0x01, 0x92, 0x01, 0x51, 0xd9, 0xcd, 0x76, + 0x35, 0x01, 0x51, 0xb9, 0xd1, 0x51, 0x56, 0x01, 0x51, 0xa9, 0xd0, 0x5b, + 0x92, 0x01, 0x51, 0xa0, 0xc8, 0x52, 0x09, 0x01, 0x51, 0x89, 0xc9, 0x16, + 0x14, 0x01, 0x51, 0x80, 0xc2, 0x00, 0xd0, 0x05, 0x53, 0x49, 0x83, 0x05, + 0x53, 0x40, 0xc2, 0x00, 0xd0, 0x05, 0x4f, 0x71, 0x83, 0x05, 0x4f, 0x68, + 0xc2, 0x00, 0xd0, 0x05, 0x4f, 0x21, 0x83, 0x00, 0x83, 0xf8, 0xc2, 0x00, + 0xc1, 0x05, 0x4f, 0x19, 0xc2, 0x19, 0x2c, 0x00, 0x83, 0xd1, 0x83, 0x00, + 0x83, 0xe0, 0x83, 0x00, 0x83, 0xa9, 0xc2, 0x00, 0xd0, 0x00, 0x83, 0xb0, + 0x83, 0x00, 0x83, 0xb9, 0xc2, 0x00, 0xd0, 0x05, 0x4f, 0x00, 0x83, 0x00, + 0x83, 0xc1, 0xc2, 0x00, 0xd0, 0x05, 0x4f, 0x08, 0xa5, 0x0d, 0x7f, 0xf1, + 0xa4, 0x0d, 0x7f, 0xe9, 0xa2, 0x0d, 0x7f, 0xd9, 0xa1, 0x0d, 0x7f, 0xd1, + 0xa0, 0x0d, 0x7f, 0xc9, 0x9f, 0x0d, 0x7f, 0xc1, 0x9e, 0x0d, 0x7f, 0xb8, + 0xa5, 0x0d, 0x7f, 0xb1, 0xa4, 0x0d, 0x7f, 0xa9, 0xa2, 0x0d, 0x7f, 0x99, + 0xa1, 0x0d, 0x7f, 0x91, 0xa0, 0x0d, 0x7f, 0x89, 0x9f, 0x0d, 0x7f, 0x80, + 0x94, 0x00, 0x67, 0x00, 0x8e, 0x00, 0x67, 0x08, 0xc5, 0xde, 0x4d, 0x01, + 0x79, 0xa1, 0xc4, 0xb6, 0xdb, 0x01, 0x7b, 0x40, 0xc5, 0x8c, 0xf0, 0x01, + 0x79, 0x99, 0xca, 0xa3, 0x14, 0x01, 0x7d, 0x58, 0xc4, 0x2a, 0xa0, 0x01, + 0x7c, 0x48, 0xc4, 0x03, 0x0b, 0x01, 0x79, 0x69, 0x86, 0x01, 0x7d, 0x48, + 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xf9, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x78, + 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xe1, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x60, + 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xf1, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x70, + 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xe9, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x68, + 0x44, 0xdf, 0x37, 0xc3, 0x75, 0x78, 0x43, 0x93, 0x74, 0x43, 0x75, 0x84, + 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xb9, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x38, + 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xb1, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x30, + 0x04, 0xc3, 0x75, 0x90, 0xc3, 0x71, 0xec, 0x00, 0xbf, 0xb9, 0xc4, 0xda, + 0x97, 0x00, 0xbf, 0xb0, 0x4b, 0x18, 0x04, 0xc3, 0x75, 0x9c, 0xdc, 0x13, + 0xf9, 0x0f, 0xd2, 0x38, 0xc9, 0x1f, 0x5a, 0x01, 0x49, 0x21, 0xd4, 0x3c, + 0x8c, 0x01, 0x49, 0x41, 0x49, 0x0d, 0x20, 0x43, 0x75, 0xa8, 0x43, 0x01, + 0x7b, 0xc3, 0x75, 0xb4, 0xc9, 0x1f, 0x5a, 0x01, 0x49, 0x19, 0xd4, 0x39, + 0xd0, 0x01, 0x49, 0x39, 0xd9, 0x20, 0x5d, 0x01, 0x49, 0x90, 0x87, 0x0f, + 0x3f, 0xc8, 0x87, 0x0f, 0x3f, 0xb0, 0x87, 0x0f, 0x3f, 0x88, 0x87, 0x05, + 0x59, 0x20, 0x83, 0x05, 0x59, 0x18, 0x83, 0x00, 0x96, 0x98, 0x87, 0x00, + 0x96, 0xa0, 0xc3, 0x11, 0x7e, 0x00, 0x1d, 0x4b, 0x03, 0x75, 0xc0, 0xc5, + 0xd8, 0x8f, 0x00, 0x1c, 0xfa, 0x03, 0x75, 0xc6, 0xcb, 0x8f, 0x9f, 0x00, + 0xff, 0x60, 0x46, 0x00, 0x8b, 0x43, 0x75, 0xcc, 0x46, 0x00, 0x8b, 0x43, + 0x75, 0xe6, 0xc2, 0x01, 0x6f, 0x00, 0x1c, 0xbb, 0x03, 0x76, 0x09, 0xc6, + 0x10, 0x3f, 0x00, 0x1c, 0xaa, 0x03, 0x76, 0x0f, 0xc4, 0xde, 0x3f, 0x00, + 0x1c, 0x8b, 0x03, 0x76, 0x15, 0xcc, 0x87, 0xed, 0x00, 0x1b, 0x90, 0xd1, + 0x51, 0x45, 0x00, 0x1b, 0xb1, 0x8b, 0x00, 0x1d, 0x01, 0xc2, 0x00, 0x0a, + 0x00, 0x1d, 0x31, 0xc2, 0x00, 0xba, 0x00, 0x1d, 0x40, 0xc4, 0x89, 0xfe, + 0x00, 0x1c, 0xc1, 0xc2, 0x20, 0xec, 0x00, 0x1d, 0x20, 0xc4, 0x1a, 0x73, + 0x00, 0x1d, 0x19, 0xc2, 0x01, 0x23, 0x00, 0x1f, 0xb9, 0xc2, 0x00, 0xd1, + 0x00, 0x1f, 0xd0, 0xc3, 0x11, 0x7e, 0x00, 0x1e, 0x4b, 0x03, 0x76, 0x1b, + 0xc5, 0xd8, 0x8f, 0x00, 0x1d, 0xfa, 0x03, 0x76, 0x21, 0x46, 0x00, 0x8b, + 0x43, 0x76, 0x27, 0x46, 0x00, 0x8b, 0x43, 0x76, 0x45, 0x46, 0x00, 0x8b, + 0x43, 0x76, 0x51, 0xc2, 0x01, 0x6f, 0x00, 0x1d, 0xbb, 0x03, 0x76, 0x6f, + 0xc6, 0x10, 0x3f, 0x00, 0x1d, 0xaa, 0x03, 0x76, 0x75, 0xc4, 0xde, 0x3f, + 0x00, 0x1d, 0x8b, 0x03, 0x76, 0x7b, 0x47, 0x78, 0xc0, 0x43, 0x76, 0x81, + 0xc4, 0xdb, 0x4c, 0x00, 0x1d, 0xa1, 0xc6, 0x51, 0x50, 0x00, 0x1e, 0xe8, + 0xc4, 0x89, 0xfe, 0x00, 0x1d, 0xc1, 0xc2, 0x20, 0xec, 0x00, 0x1e, 0x20, + 0xc4, 0x8b, 0x66, 0x00, 0x1d, 0xd1, 0xc4, 0x78, 0xc8, 0x00, 0x1e, 0xf8, + 0x8b, 0x00, 0x1e, 0x01, 0xc2, 0x00, 0x0a, 0x00, 0x1e, 0x31, 0xc2, 0x00, + 0xba, 0x00, 0x1e, 0x41, 0xd1, 0x51, 0x45, 0x00, 0x1b, 0xb8, 0xc4, 0x1a, + 0x73, 0x00, 0x1e, 0x19, 0xc5, 0xd6, 0xe6, 0x00, 0x1e, 0xd9, 0xc2, 0x01, + 0x23, 0x00, 0x1f, 0xc1, 0x03, 0x43, 0x76, 0x8d, 0x12, 0xc3, 0x76, 0x97, + 0xc3, 0x79, 0xe7, 0x00, 0xe9, 0x49, 0xc5, 0xdd, 0x99, 0x00, 0xe9, 0x39, + 0xc5, 0x51, 0x51, 0x00, 0xe9, 0x31, 0xc5, 0x9b, 0xd5, 0x05, 0x5b, 0x28, + 0xc7, 0x08, 0x79, 0x08, 0x0a, 0x01, 0x0a, 0xc3, 0x76, 0xa1, 0xc7, 0x3e, + 0x00, 0x08, 0x0a, 0x11, 0x49, 0x57, 0x21, 0x43, 0x76, 0xad, 0xc2, 0x00, + 0x5f, 0x08, 0x0a, 0x1b, 0x03, 0x76, 0xb9, 0xc3, 0x45, 0x6b, 0x08, 0x0a, + 0x22, 0x03, 0x76, 0xbd, 0x16, 0xc3, 0x76, 0xc1, 0xc7, 0x67, 0xc7, 0x08, + 0x0a, 0x81, 0xc4, 0x45, 0x6f, 0x08, 0x0a, 0xb8, 0xc3, 0x05, 0x14, 0x08, + 0x0a, 0xd1, 0xc3, 0x09, 0x41, 0x08, 0x0b, 0x11, 0xc5, 0x45, 0x69, 0x08, + 0x0b, 0x40, 0xc3, 0x05, 0x14, 0x08, 0x0a, 0xcb, 0x03, 0x76, 0xcd, 0x16, + 0xc3, 0x76, 0xd1, 0x42, 0x02, 0x09, 0x43, 0x76, 0xe1, 0x42, 0x02, 0x09, + 0xc3, 0x76, 0xed, 0xc3, 0x09, 0x41, 0x08, 0x0b, 0x02, 0x03, 0x76, 0xff, + 0xc9, 0x3d, 0xff, 0x08, 0x0a, 0xf0, 0xc5, 0x00, 0x48, 0x01, 0x54, 0x20, + 0xc4, 0x0d, 0x0e, 0x08, 0x79, 0x21, 0xc3, 0x02, 0xdf, 0x08, 0x78, 0xf8, + 0xc4, 0x18, 0x12, 0x08, 0x79, 0x19, 0x91, 0x08, 0x78, 0xf0, 0xc3, 0xb5, + 0x3e, 0x08, 0x78, 0xdb, 0x03, 0x77, 0x05, 0xc5, 0xd9, 0xde, 0x08, 0x78, + 0xb3, 0x03, 0x77, 0x0b, 0xc3, 0x20, 0x18, 0x08, 0x78, 0x7b, 0x03, 0x77, + 0x11, 0xc2, 0x01, 0x7f, 0x08, 0x78, 0x31, 0xc4, 0xe3, 0x27, 0x08, 0x78, + 0x19, 0xc5, 0xa5, 0xfd, 0x08, 0x78, 0x08, 0xc3, 0x11, 0xef, 0x08, 0x78, + 0xc9, 0x03, 0x43, 0x77, 0x17, 0x0e, 0xc3, 0x77, 0x23, 0xc3, 0x16, 0x5a, + 0x08, 0x78, 0x90, 0xc2, 0x00, 0x8e, 0x08, 0x78, 0x48, 0xc3, 0x1e, 0x1b, + 0x08, 0x53, 0xe1, 0xc2, 0x39, 0x8b, 0x08, 0x53, 0xd8, 0xc4, 0x40, 0x9c, + 0x08, 0x53, 0xc9, 0xc3, 0x77, 0x79, 0x08, 0x53, 0x98, 0x96, 0x08, 0x53, + 0x51, 0xc3, 0x77, 0x79, 0x08, 0x53, 0x71, 0xc4, 0xdc, 0x2d, 0x08, 0x53, + 0x78, 0xcc, 0x89, 0xb5, 0x08, 0x67, 0x88, 0xcc, 0x89, 0xb5, 0x08, 0x65, + 0x88, 0x89, 0x08, 0x61, 0x70, 0xc9, 0xb1, 0x28, 0x08, 0x1e, 0x42, 0x03, + 0x77, 0x2f, 0x83, 0x08, 0x1d, 0x19, 0x97, 0x08, 0x1d, 0x20, 0x83, 0x08, + 0x1d, 0x29, 0x97, 0x08, 0x1d, 0x30, 0x83, 0x08, 0x1d, 0x39, 0xcb, 0x95, + 0x09, 0x08, 0x1e, 0x58, 0x83, 0x08, 0x1d, 0x49, 0x8b, 0x08, 0x1d, 0x50, + 0x83, 0x08, 0x1d, 0x59, 0x97, 0x08, 0x1d, 0x61, 0xc2, 0x00, 0xd0, 0x08, + 0x1d, 0x80, 0x83, 0x08, 0x1d, 0x6b, 0x03, 0x77, 0x3b, 0x8b, 0x08, 0x1d, + 0x71, 0x97, 0x08, 0x1d, 0x78, 0x83, 0x08, 0x1d, 0x93, 0x03, 0x77, 0x44, + 0xc6, 0xcc, 0x11, 0x08, 0x1e, 0x78, 0x83, 0x08, 0x1d, 0xa1, 0x97, 0x08, + 0x1d, 0xa8, 0x83, 0x08, 0x1d, 0xb1, 0x8b, 0x08, 0x1d, 0xb9, 0x97, 0x08, + 0x1d, 0xc0, 0x83, 0x08, 0x1d, 0xd1, 0x8b, 0x08, 0x1d, 0xd8, 0x83, 0x08, + 0x1d, 0xe1, 0x97, 0x08, 0x1d, 0xe8, 0x83, 0x08, 0x1d, 0xf9, 0xc2, 0x00, + 0xd0, 0x08, 0x1e, 0x09, 0xc2, 0x0d, 0xf6, 0x08, 0x1e, 0x10, 0x19, 0xc3, + 0x77, 0x4a, 0xc2, 0x00, 0xc4, 0x08, 0x1e, 0x98, 0x00, 0x43, 0x77, 0x54, + 0xca, 0xa2, 0x6a, 0x0e, 0x7d, 0x30, 0x46, 0x00, 0x8b, 0x43, 0x77, 0x66, + 0xcc, 0x87, 0x39, 0x0e, 0x7c, 0xf8, 0x43, 0x94, 0x9b, 0x43, 0x77, 0x72, + 0xcb, 0x94, 0x9b, 0x0e, 0x7c, 0x50, 0xc5, 0x00, 0x2c, 0x0e, 0x78, 0xb1, + 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x50, 0x97, 0x00, 0xc7, 0x88, 0x91, 0x00, + 0xc7, 0x60, 0x91, 0x00, 0xc7, 0x58, 0xc5, 0x01, 0x6f, 0x00, 0xc7, 0xa9, + 0xc5, 0xdb, 0xa5, 0x00, 0xc7, 0x70, 0x87, 0x00, 0xb1, 0x58, 0x87, 0x00, + 0xb2, 0x58, 0x87, 0x00, 0xb0, 0xf8, 0x87, 0x00, 0xae, 0x38, 0x83, 0x00, + 0xb3, 0x61, 0x8b, 0x00, 0xb3, 0x59, 0x87, 0x00, 0xb3, 0x4b, 0x03, 0x77, + 0x7e, 0x91, 0x00, 0xb3, 0x41, 0x97, 0x00, 0xb3, 0x38, 0x87, 0x00, 0xaf, + 0x28, 0x87, 0x00, 0xb2, 0xf0, 0x87, 0x00, 0xae, 0xf8, 0x8b, 0x00, 0xb1, + 0xc1, 0x87, 0x00, 0xb1, 0xb3, 0x03, 0x77, 0x82, 0x91, 0x00, 0xb1, 0xa9, + 0x97, 0x00, 0xb1, 0xa1, 0x83, 0x00, 0xb1, 0xc8, 0x87, 0x00, 0xb1, 0xe8, + 0x87, 0x00, 0xaf, 0xf0, 0x87, 0x00, 0xaf, 0xc0, 0x87, 0x00, 0xae, 0xc8, + 0x87, 0x00, 0xb1, 0x88, 0x87, 0x00, 0xb2, 0xb8, 0x83, 0x00, 0xc7, 0x10, + 0x91, 0x00, 0xc7, 0x08, 0x87, 0x00, 0xa6, 0xe9, 0x8b, 0x00, 0xa6, 0xfb, + 0x03, 0x77, 0x86, 0x91, 0x00, 0xa7, 0x1b, 0x03, 0x77, 0x8a, 0x83, 0x00, + 0xa7, 0x3a, 0x03, 0x77, 0x8e, 0x8b, 0x00, 0xa2, 0xd3, 0x03, 0x77, 0x92, + 0x87, 0x00, 0xa2, 0xc1, 0x91, 0x00, 0xa2, 0xf3, 0x03, 0x77, 0x96, 0x83, + 0x00, 0xa3, 0x12, 0x03, 0x77, 0x9a, 0x83, 0x00, 0xa9, 0xd3, 0x03, 0x77, + 0x9e, 0x91, 0x00, 0xa9, 0xb3, 0x03, 0x77, 0xa2, 0x8b, 0x00, 0xa9, 0x93, + 0x03, 0x77, 0xa6, 0x87, 0x00, 0xa9, 0x80, 0x83, 0x00, 0xa9, 0x13, 0x03, + 0x77, 0xaa, 0x8b, 0x00, 0xa8, 0xd3, 0x03, 0x77, 0xae, 0x87, 0x00, 0xa8, + 0xc1, 0x91, 0x00, 0xa8, 0xf2, 0x03, 0x77, 0xb2, 0x83, 0x00, 0xa8, 0x0b, + 0x03, 0x77, 0xb6, 0x87, 0x00, 0xa7, 0xb9, 0x8b, 0x00, 0xa7, 0xcb, 0x03, + 0x77, 0xba, 0x91, 0x00, 0xa7, 0xea, 0x03, 0x77, 0xbe, 0x83, 0x00, 0xa2, + 0x2b, 0x03, 0x77, 0xc2, 0x91, 0x00, 0xa2, 0x0b, 0x03, 0x77, 0xc6, 0x8b, + 0x00, 0xa1, 0xeb, 0x03, 0x77, 0xca, 0x87, 0x00, 0xa1, 0xd8, 0x91, 0x00, + 0xa4, 0xd8, 0x8b, 0x00, 0xa4, 0xb8, 0x83, 0x00, 0xa4, 0xf8, 0x83, 0x00, + 0xa0, 0xd0, 0x91, 0x00, 0xa0, 0xa8, 0x8b, 0x00, 0xa0, 0x88, 0x83, 0x00, + 0xa4, 0x08, 0x8b, 0x00, 0xa3, 0xc8, 0x91, 0x00, 0xa3, 0xe8, 0x87, 0x00, + 0xa5, 0x69, 0x8b, 0x00, 0xa5, 0x7b, 0x03, 0x77, 0xce, 0x91, 0x00, 0xa5, + 0x9b, 0x03, 0x77, 0xd2, 0x83, 0x00, 0xa5, 0xba, 0x03, 0x77, 0xd6, 0x83, + 0x00, 0xa6, 0x70, 0x83, 0x00, 0xb3, 0xe3, 0x03, 0x77, 0xda, 0x91, 0x00, + 0xb3, 0xd3, 0x03, 0x77, 0xde, 0x8b, 0x00, 0xb3, 0xc3, 0x03, 0x77, 0xe2, + 0xc2, 0x02, 0xe0, 0x00, 0xb3, 0xb8, 0xc3, 0x0d, 0x14, 0x08, 0x9b, 0x59, + 0xc3, 0x09, 0x9e, 0x08, 0x9b, 0x50, 0xc4, 0x02, 0xde, 0x08, 0x9b, 0x49, + 0xc2, 0x02, 0xa0, 0x08, 0x9b, 0x40, 0xc6, 0x05, 0x01, 0x00, 0x18, 0xb0, + 0xc5, 0x05, 0x02, 0x01, 0x07, 0x79, 0xc5, 0x00, 0xd4, 0x01, 0x06, 0xb8, + 0x03, 0xc3, 0x77, 0xe6, 0xc5, 0x05, 0x02, 0x00, 0x1a, 0xa8, 0xc5, 0x05, + 0x02, 0x00, 0x19, 0xc9, 0xc5, 0x00, 0xd4, 0x00, 0x1a, 0xb8, 0xc5, 0x05, + 0x02, 0x01, 0x07, 0x71, 0xc5, 0x00, 0xd4, 0x01, 0x06, 0xb0, 0xc5, 0x00, + 0xd4, 0x00, 0xef, 0xf1, 0xc5, 0x05, 0x02, 0x00, 0x1a, 0xa0, 0xc5, 0x00, + 0xd4, 0x00, 0x18, 0x71, 0xc5, 0x05, 0x02, 0x00, 0x1a, 0x40, 0xc5, 0x05, + 0x02, 0x00, 0xd6, 0x51, 0xc5, 0x00, 0xd4, 0x00, 0xd6, 0x48, 0xc9, 0x0f, + 0x6e, 0x07, 0xf1, 0x11, 0xca, 0x09, 0xb7, 0x07, 0xf1, 0x18, 0xc4, 0x00, + 0x49, 0x00, 0xef, 0xc1, 0xc5, 0x00, 0x2c, 0x00, 0x1a, 0xc0, 0xc2, 0x06, + 0xdb, 0x01, 0x66, 0x29, 0xc3, 0x07, 0x4a, 0x01, 0x66, 0xd8, 0xc3, 0x01, + 0x69, 0x01, 0x66, 0x69, 0x83, 0x01, 0x66, 0x7b, 0x03, 0x77, 0xf2, 0xc2, + 0x06, 0xdb, 0x01, 0x66, 0x98, 0xc2, 0x04, 0x2b, 0x01, 0x66, 0xf9, 0xc2, + 0x16, 0x5a, 0x01, 0x67, 0x08, 0xc2, 0x06, 0xdb, 0x01, 0x66, 0x21, 0xc3, + 0x07, 0x4a, 0x01, 0x66, 0xd0, 0xc3, 0x01, 0x69, 0x01, 0x66, 0x61, 0x83, + 0x01, 0x66, 0x73, 0x03, 0x77, 0xf6, 0xc2, 0x06, 0xdb, 0x01, 0x66, 0x90, + 0xc2, 0x04, 0x2b, 0x01, 0x66, 0xf1, 0xc2, 0x16, 0x5a, 0x01, 0x67, 0x00, + 0xc8, 0x02, 0x9f, 0x0f, 0xc8, 0x09, 0xc9, 0x3b, 0x79, 0x0f, 0xc8, 0x00, + 0x42, 0x00, 0x45, 0xc3, 0x77, 0xfa, 0x16, 0xc3, 0x78, 0x04, 0x08, 0xc3, + 0x78, 0x10, 0x15, 0xc3, 0x78, 0x1c, 0xc5, 0x06, 0xdb, 0x01, 0x92, 0xc1, + 0xc4, 0x26, 0x78, 0x01, 0x92, 0xc8, 0x42, 0x00, 0x45, 0xc3, 0x78, 0x28, + 0x16, 0xc3, 0x78, 0x32, 0x08, 0xc3, 0x78, 0x3e, 0x15, 0xc3, 0x78, 0x4a, + 0xc5, 0x06, 0xdb, 0x01, 0x95, 0x99, 0xc4, 0x26, 0x78, 0x01, 0x95, 0xa0, + 0x42, 0x00, 0x45, 0xc3, 0x78, 0x56, 0x16, 0xc3, 0x78, 0x60, 0x08, 0xc3, + 0x78, 0x6c, 0x15, 0xc3, 0x78, 0x78, 0xc5, 0x06, 0xdb, 0x01, 0x95, 0xe9, + 0xc4, 0x26, 0x78, 0x01, 0x95, 0xf0, 0x96, 0x01, 0x95, 0x09, 0xc5, 0x53, + 0x93, 0x01, 0x95, 0x70, 0xa0, 0x09, 0x2a, 0x01, 0x8f, 0x09, 0x1a, 0x30, + 0x94, 0x09, 0x19, 0xf9, 0xc7, 0x5d, 0x9b, 0x09, 0x19, 0xf1, 0x8e, 0x09, + 0x19, 0xe8, 0x86, 0x09, 0x29, 0xe9, 0x9f, 0x09, 0x19, 0x8a, 0x03, 0x78, + 0x84, 0x8e, 0x09, 0x19, 0x71, 0x46, 0x25, 0xd4, 0x43, 0x78, 0x8a, 0xd9, + 0x1f, 0xe0, 0x09, 0x15, 0xe9, 0xd9, 0x1a, 0xe7, 0x09, 0x15, 0xe0, 0xc7, + 0x25, 0xd4, 0x09, 0x15, 0xb0, 0xc5, 0x39, 0xc7, 0x09, 0x16, 0x68, 0xc4, + 0x96, 0x9c, 0x09, 0x16, 0x49, 0xc2, 0x00, 0x65, 0x09, 0x16, 0x40, 0xc2, + 0x38, 0xb6, 0x09, 0x29, 0x81, 0x84, 0x09, 0x15, 0x08, 0x0a, 0xc3, 0x78, + 0x96, 0xc2, 0x00, 0x65, 0x09, 0x14, 0xf8, 0xc2, 0x01, 0xe2, 0x09, 0x15, + 0x31, 0x94, 0x09, 0x15, 0x29, 0x8f, 0x09, 0x15, 0x21, 0x84, 0x09, 0x15, + 0x19, 0x9f, 0x09, 0x15, 0x10, 0xc2, 0x00, 0x33, 0x09, 0x14, 0xd9, 0xc2, + 0x06, 0x4e, 0x09, 0x14, 0xd0, 0x84, 0x09, 0x14, 0xc0, 0xc4, 0xdc, 0xae, + 0x09, 0x29, 0x61, 0xc7, 0x65, 0xd1, 0x09, 0x29, 0x59, 0xc2, 0x01, 0xe2, + 0x09, 0x12, 0xf9, 0xca, 0xa0, 0xb2, 0x09, 0x12, 0xf0, 0xc3, 0x02, 0x2c, + 0x09, 0x29, 0x41, 0xd0, 0x5e, 0x12, 0x09, 0x12, 0xb8, 0x17, 0xc3, 0x78, + 0xa2, 0x8b, 0x09, 0x1c, 0x92, 0x03, 0x78, 0xaa, 0x47, 0x25, 0xd4, 0x43, + 0x78, 0xb0, 0xc2, 0x05, 0x1d, 0x09, 0x12, 0xc9, 0x87, 0x09, 0x12, 0xc0, + 0xc2, 0x01, 0xe2, 0x09, 0x12, 0xa3, 0x03, 0x78, 0xbf, 0x90, 0x09, 0x12, + 0x98, 0xc2, 0x02, 0xad, 0x09, 0x13, 0xc8, 0xc2, 0x5d, 0xd4, 0x09, 0x13, + 0xb9, 0xc5, 0xda, 0x7e, 0x09, 0x13, 0xb1, 0xc2, 0x02, 0x6f, 0x09, 0x13, + 0xa9, 0xc2, 0x00, 0xdb, 0x09, 0x13, 0xa1, 0xc4, 0xe1, 0x67, 0x09, 0x13, + 0x99, 0xc8, 0x6a, 0x1e, 0x09, 0x13, 0x91, 0xc3, 0x6c, 0x49, 0x09, 0x13, + 0x89, 0xc3, 0x84, 0x21, 0x09, 0x13, 0x81, 0xc2, 0x01, 0x2d, 0x09, 0x13, + 0x79, 0xc6, 0xcb, 0x87, 0x09, 0x13, 0x70, 0xd9, 0x20, 0x12, 0x09, 0x13, + 0x38, 0xc3, 0x32, 0xbf, 0x09, 0x29, 0x09, 0xc2, 0x01, 0x30, 0x09, 0x29, + 0x01, 0xc9, 0xb1, 0x8b, 0x09, 0x11, 0xb8, 0xc2, 0x02, 0x1c, 0x09, 0x1c, + 0x69, 0xc2, 0x01, 0xdd, 0x09, 0x11, 0xe1, 0x83, 0x09, 0x11, 0xd2, 0x03, + 0x78, 0xc5, 0x16, 0xc3, 0x78, 0xcb, 0xc3, 0x0b, 0x64, 0x09, 0x28, 0xe3, + 0x03, 0x78, 0xd7, 0x0a, 0xc3, 0x78, 0xdd, 0xc4, 0x04, 0x59, 0x09, 0x28, + 0xd1, 0x15, 0xc3, 0x78, 0xe9, 0xc4, 0x73, 0x32, 0x09, 0x10, 0x03, 0x03, + 0x78, 0xf3, 0x10, 0xc3, 0x78, 0xf7, 0xca, 0xa7, 0xb0, 0x09, 0x10, 0x59, + 0x42, 0x00, 0xdb, 0xc3, 0x78, 0xff, 0x0d, 0xc3, 0x79, 0x0b, 0xc2, 0x03, + 0x4e, 0x09, 0x10, 0x21, 0xc9, 0x5d, 0x99, 0x09, 0x10, 0x11, 0xc3, 0x62, + 0x19, 0x09, 0x0f, 0xf9, 0xc2, 0x00, 0x65, 0x09, 0x0f, 0xf0, 0xca, 0x8d, + 0x2d, 0x09, 0x1c, 0x48, 0x17, 0xc3, 0x79, 0x15, 0xcd, 0x7b, 0x56, 0x09, + 0x28, 0xa1, 0xd5, 0x36, 0x5c, 0x09, 0x28, 0x99, 0xc2, 0x00, 0xec, 0x09, + 0x28, 0x91, 0xc3, 0x04, 0x2a, 0x09, 0x28, 0x83, 0x03, 0x79, 0x1f, 0xc2, + 0x01, 0x30, 0x09, 0x28, 0x79, 0xc3, 0xd5, 0x59, 0x09, 0x28, 0x70, 0x17, + 0xc3, 0x79, 0x25, 0x16, 0xc3, 0x79, 0x33, 0xc2, 0x00, 0xdb, 0x09, 0x28, + 0x31, 0xc3, 0xaa, 0xfe, 0x09, 0x28, 0x29, 0xce, 0x75, 0x04, 0x09, 0x28, + 0x21, 0xc3, 0x62, 0x19, 0x09, 0x28, 0x19, 0xc3, 0x02, 0x2c, 0x09, 0x28, + 0x10, 0x47, 0x03, 0x4c, 0x43, 0x79, 0x3d, 0xca, 0x9e, 0x00, 0x09, 0x26, + 0xa1, 0x09, 0xc3, 0x79, 0x55, 0x97, 0x09, 0x0f, 0x2b, 0x03, 0x79, 0x69, + 0x16, 0xc3, 0x79, 0x7f, 0x15, 0xc3, 0x79, 0x89, 0xc2, 0x02, 0x6f, 0x09, + 0x0e, 0xd9, 0x0f, 0xc3, 0x79, 0x93, 0x0e, 0xc3, 0x79, 0xa0, 0x0d, 0xc3, + 0x79, 0xb3, 0x0b, 0xc3, 0x79, 0xbe, 0x0a, 0xc3, 0x79, 0xcb, 0xc2, 0x00, + 0xc4, 0x09, 0x0e, 0x19, 0xc3, 0x14, 0x96, 0x09, 0x0e, 0x11, 0x04, 0xc3, + 0x79, 0xd8, 0x83, 0x09, 0x0d, 0xca, 0x03, 0x79, 0xe2, 0xd4, 0x39, 0xbc, + 0x09, 0x0f, 0x80, 0xc9, 0xa6, 0x17, 0x09, 0x0f, 0x70, 0x8e, 0x09, 0x1c, + 0x28, 0x00, 0x43, 0x79, 0xf6, 0xd1, 0x55, 0x1f, 0x09, 0x0b, 0x30, 0xc2, + 0x00, 0xac, 0x09, 0x0b, 0xb9, 0xc2, 0x04, 0x2b, 0x09, 0x0b, 0xb1, 0xc2, + 0x05, 0xc3, 0x09, 0x0b, 0xa8, 0xcf, 0x6a, 0x17, 0x09, 0x08, 0xd0, 0x45, + 0x03, 0x4e, 0xc3, 0x7a, 0x02, 0xc3, 0x58, 0xf6, 0x09, 0x08, 0xa8, 0x0a, + 0xc3, 0x7a, 0x14, 0xc2, 0x01, 0xdf, 0x09, 0x07, 0x41, 0x03, 0x43, 0x7a, + 0x1f, 0x87, 0x09, 0x26, 0x23, 0x03, 0x7a, 0x27, 0xc2, 0x05, 0x1d, 0x09, + 0x07, 0x02, 0x03, 0x7a, 0x2d, 0xc3, 0x5d, 0xd1, 0x09, 0x26, 0x19, 0x8b, + 0x09, 0x06, 0xf9, 0xc9, 0xa7, 0xb1, 0x09, 0x06, 0xf0, 0xc2, 0x53, 0x31, + 0x09, 0x26, 0x11, 0x83, 0x09, 0x06, 0xea, 0x03, 0x7a, 0x33, 0x17, 0xc3, + 0x7a, 0x3a, 0xc2, 0x02, 0xfb, 0x09, 0x06, 0xd3, 0x03, 0x7a, 0x46, 0x03, + 0x43, 0x7a, 0x4c, 0x03, 0xc3, 0x7a, 0x56, 0xc3, 0xc5, 0xa4, 0x09, 0x06, + 0xa9, 0xc9, 0xaa, 0x44, 0x09, 0x06, 0xa0, 0x83, 0x09, 0x25, 0xdb, 0x03, + 0x7a, 0x63, 0x8b, 0x09, 0x06, 0x6a, 0x03, 0x7a, 0x70, 0xc3, 0x1a, 0x52, + 0x09, 0x25, 0xd1, 0x90, 0x09, 0x06, 0x4b, 0x03, 0x7a, 0x7d, 0x8e, 0x09, + 0x06, 0x3a, 0x03, 0x7a, 0x83, 0x17, 0xc3, 0x7a, 0x89, 0x8b, 0x09, 0x06, + 0x23, 0x03, 0x7a, 0x93, 0x83, 0x09, 0x06, 0x18, 0x03, 0xc3, 0x7a, 0x99, + 0xc2, 0x00, 0x33, 0x09, 0x06, 0x0a, 0x03, 0x7a, 0xa9, 0xc2, 0x01, 0xe2, + 0x09, 0x05, 0xeb, 0x03, 0x7a, 0xaf, 0x90, 0x09, 0x05, 0xe3, 0x03, 0x7a, + 0xb6, 0xd0, 0x58, 0xf2, 0x09, 0x05, 0xd9, 0x46, 0x25, 0xd4, 0x43, 0x7a, + 0xbc, 0x86, 0x09, 0x07, 0x5a, 0x03, 0x7a, 0xce, 0xd3, 0x40, 0xa0, 0x09, + 0x06, 0xb9, 0xc7, 0x6a, 0x1f, 0x09, 0x06, 0xb0, 0xcb, 0x8c, 0xf5, 0x09, + 0x05, 0x80, 0xc8, 0x0b, 0x08, 0x09, 0x05, 0x68, 0xca, 0x8c, 0xf6, 0x09, + 0x05, 0x20, 0x8f, 0x09, 0x24, 0xfb, 0x03, 0x7a, 0xd4, 0xc5, 0xdc, 0x36, + 0x09, 0x24, 0xf0, 0xc4, 0x5d, 0xd2, 0x09, 0x24, 0xe3, 0x03, 0x7a, 0xda, + 0x94, 0x09, 0x24, 0xd8, 0xc2, 0x01, 0xe2, 0x09, 0x24, 0xb1, 0xc7, 0xc4, + 0x4f, 0x09, 0x24, 0xa8, 0xc8, 0x10, 0x61, 0x09, 0x24, 0x78, 0x47, 0x5d, + 0xd5, 0xc3, 0x7a, 0xe0, 0xc2, 0x01, 0xe2, 0x09, 0x03, 0x68, 0x97, 0x09, + 0x03, 0x2b, 0x03, 0x7a, 0xec, 0x83, 0x09, 0x03, 0x20, 0xc8, 0x36, 0x68, + 0x09, 0x03, 0x10, 0xc2, 0x04, 0x3d, 0x09, 0x02, 0xf9, 0x8b, 0x09, 0x02, + 0xeb, 0x03, 0x7a, 0xf6, 0x83, 0x09, 0x02, 0xda, 0x03, 0x7a, 0xfc, 0x8b, + 0x09, 0x02, 0xd1, 0xc4, 0x4f, 0x68, 0x09, 0x02, 0xc8, 0xc3, 0x01, 0xc3, + 0x09, 0x02, 0xc1, 0xca, 0x97, 0xbe, 0x09, 0x02, 0xb8, 0xdf, 0x0d, 0x1f, + 0x09, 0x01, 0xe8, 0xe0, 0x0b, 0x47, 0x09, 0x01, 0xd8, 0xc2, 0x02, 0x1c, + 0x09, 0x14, 0x69, 0xc2, 0x04, 0x3d, 0x09, 0x14, 0x61, 0xc3, 0x45, 0xb0, + 0x09, 0x14, 0x58, 0xc8, 0x20, 0xa9, 0x00, 0x26, 0xe9, 0xc8, 0x25, 0xfb, + 0x00, 0x24, 0xb8, 0xc8, 0x20, 0xa9, 0x00, 0x26, 0xe1, 0xc8, 0x25, 0xfb, + 0x00, 0x24, 0xb0, 0xc7, 0xc7, 0xeb, 0x00, 0x6d, 0x41, 0xc6, 0x8e, 0x9c, + 0x00, 0x6d, 0x70, 0xc7, 0xc4, 0x25, 0x00, 0x6d, 0x51, 0xc6, 0x8e, 0x9c, + 0x00, 0x6d, 0x80, 0xc5, 0x20, 0xe5, 0x0e, 0xce, 0xa1, 0xc7, 0xb7, 0x3a, + 0x0e, 0xce, 0x28, 0xc5, 0x20, 0xe5, 0x0e, 0xce, 0x99, 0xc7, 0xb7, 0x3a, + 0x0e, 0xce, 0x20, 0xc5, 0x20, 0xe5, 0x0e, 0xce, 0x91, 0xc7, 0xb7, 0x3a, + 0x0e, 0xce, 0x18, 0xc5, 0xdd, 0x17, 0x0e, 0xcd, 0x99, 0xca, 0x9e, 0x8c, + 0x0e, 0xcd, 0x60, 0xc5, 0xdd, 0x17, 0x0e, 0xcd, 0x91, 0xca, 0x9e, 0x8c, + 0x0e, 0xcd, 0x58, 0xc5, 0xdd, 0x17, 0x0e, 0xcd, 0x89, 0xca, 0x9e, 0x8c, + 0x0e, 0xcd, 0x50, 0xc9, 0x51, 0x1a, 0x0e, 0xd3, 0x30, 0xc9, 0x51, 0x1a, + 0x0e, 0xd3, 0x20, 0xcb, 0x57, 0x45, 0x0e, 0xd1, 0x19, 0xc6, 0x00, 0x58, + 0x0e, 0xd1, 0x10, 0xcb, 0x57, 0x45, 0x0e, 0xd1, 0x31, 0xc6, 0x00, 0x58, + 0x0e, 0xd1, 0x28, 0xc4, 0x0e, 0x65, 0x0e, 0xc8, 0x21, 0xc5, 0x0e, 0xce, + 0x0e, 0xc7, 0xab, 0x03, 0x7b, 0x02, 0xc5, 0x06, 0x82, 0x0e, 0xc0, 0x03, + 0x03, 0x7b, 0x06, 0x47, 0x04, 0xcb, 0xc3, 0x7b, 0x0a, 0x45, 0x00, 0x9d, + 0xc3, 0x7b, 0x2f, 0x47, 0x13, 0x95, 0xc3, 0x7b, 0x5c, 0xdb, 0x18, 0xdb, + 0x0e, 0xc2, 0x50, 0x46, 0xd1, 0x5d, 0xc3, 0x7b, 0x84, 0x46, 0x0e, 0xce, + 0xc3, 0x7b, 0x99, 0xc4, 0x0e, 0x65, 0x0e, 0xc2, 0xe3, 0x03, 0x7b, 0xab, + 0xd4, 0x3a, 0xac, 0x0e, 0xc2, 0xd9, 0x08, 0x43, 0x7b, 0xaf, 0x00, 0x43, + 0x7b, 0xc1, 0x00, 0x43, 0x7b, 0xd9, 0xc6, 0x13, 0x95, 0x0e, 0xc5, 0x99, + 0xdd, 0x11, 0x17, 0x0e, 0xc5, 0x68, 0xc5, 0x06, 0x82, 0x0e, 0xc5, 0x1b, + 0x03, 0x7b, 0xe5, 0xc2, 0x02, 0xae, 0x0e, 0xc4, 0xb0, 0xc5, 0x06, 0x82, + 0x0e, 0xc0, 0x23, 0x03, 0x7b, 0xee, 0xc6, 0x04, 0xcb, 0x0e, 0xc6, 0x2b, + 0x03, 0x7b, 0xf2, 0xc4, 0x00, 0x9d, 0x0e, 0xc5, 0x3b, 0x03, 0x7b, 0xf8, + 0xc6, 0x13, 0x95, 0x0e, 0xc4, 0x53, 0x03, 0x7b, 0xfe, 0x46, 0x0e, 0xce, + 0xc3, 0x7c, 0x02, 0xc8, 0xbc, 0x62, 0x0e, 0xc4, 0x11, 0xc4, 0x05, 0x75, + 0x0e, 0xc3, 0xdb, 0x03, 0x7c, 0x11, 0xc5, 0x03, 0x13, 0x0e, 0xc3, 0xf1, + 0x08, 0x43, 0x7c, 0x15, 0x47, 0x04, 0xcb, 0xc3, 0x7c, 0x21, 0x52, 0x3c, + 0x00, 0xc3, 0x7c, 0x30, 0xca, 0x4c, 0x69, 0x0e, 0xc5, 0xc9, 0xc8, 0xbc, + 0x5a, 0x0e, 0xc3, 0x50, 0x00, 0x43, 0x7c, 0x42, 0x00, 0x43, 0x7c, 0x6f, + 0xde, 0x0e, 0xc8, 0x0e, 0xc7, 0x49, 0xdc, 0x13, 0x89, 0x0e, 0xc6, 0xb3, + 0x03, 0x7c, 0x81, 0x46, 0x0e, 0xce, 0xc3, 0x7c, 0x87, 0xc8, 0xbc, 0x62, + 0x0e, 0xc3, 0x41, 0xd6, 0x18, 0xdb, 0x0e, 0xc2, 0x48, 0x47, 0x04, 0xcb, + 0xc3, 0x7c, 0x93, 0xc5, 0x06, 0x82, 0x0e, 0xc0, 0x0b, 0x03, 0x7c, 0xa2, + 0xcb, 0x13, 0x90, 0x0e, 0xc5, 0x89, 0x47, 0x13, 0x95, 0x43, 0x7c, 0xa6, + 0xc7, 0x27, 0xb2, 0x0e, 0xc3, 0xd1, 0xc4, 0x0e, 0xe2, 0x0e, 0xc3, 0xc0, + 0xc5, 0x0e, 0xd4, 0x0e, 0xd0, 0x29, 0xc8, 0x45, 0x27, 0x0e, 0xd0, 0x18, + 0xc5, 0x0e, 0xd4, 0x0e, 0xd0, 0x21, 0xc4, 0x00, 0x70, 0x0e, 0xd0, 0x11, + 0xc8, 0x45, 0x27, 0x0e, 0xd0, 0x08, 0xc4, 0x03, 0x14, 0x0e, 0xce, 0xe9, + 0xc4, 0xa2, 0x4c, 0x0e, 0xce, 0xe0, 0x46, 0x20, 0xe5, 0xc3, 0x7c, 0xb2, + 0x48, 0xb7, 0x3a, 0x43, 0x7c, 0xbe, 0xc5, 0x17, 0x14, 0x0e, 0xcb, 0x3b, + 0x03, 0x7c, 0xca, 0xc6, 0x01, 0xdb, 0x0e, 0xcb, 0x31, 0xc5, 0x03, 0x13, + 0x0e, 0xcb, 0x28, 0x46, 0x17, 0x14, 0xc3, 0x7c, 0xd0, 0x46, 0x03, 0x13, + 0x43, 0x7c, 0xdc, 0x46, 0x17, 0x14, 0xc3, 0x7c, 0xe8, 0x46, 0x03, 0x13, + 0x43, 0x7c, 0xf4, 0x47, 0x2c, 0x2e, 0xc3, 0x7d, 0x00, 0xcc, 0x8a, 0x39, + 0x0e, 0xce, 0x49, 0xcc, 0x81, 0xe1, 0x0e, 0xce, 0x40, 0x46, 0x17, 0x14, + 0xc3, 0x7d, 0x0c, 0x46, 0x03, 0x13, 0x43, 0x7d, 0x18, 0xc2, 0x00, 0x15, + 0x0e, 0xce, 0xc0, 0x46, 0x20, 0xe5, 0xc3, 0x7d, 0x24, 0x48, 0xb7, 0x3a, + 0x43, 0x7d, 0x30, 0xc5, 0x17, 0x14, 0x0e, 0xcd, 0xb1, 0xc6, 0x01, 0xdb, + 0x0e, 0xcd, 0xa9, 0xc5, 0x03, 0x13, 0x0e, 0xcd, 0xa0, 0xc5, 0xdd, 0x17, + 0x0e, 0xcd, 0x81, 0xca, 0x9e, 0x8c, 0x0e, 0xcd, 0x48, 0x47, 0x2c, 0x2e, + 0xc3, 0x7d, 0x3c, 0x47, 0x00, 0x58, 0x43, 0x7d, 0x4e, 0x0a, 0xc3, 0x7d, + 0x60, 0x42, 0x00, 0x8e, 0xc3, 0x7d, 0x6c, 0x48, 0x15, 0x02, 0x43, 0x7d, + 0x78, 0xc6, 0x01, 0xdb, 0x0e, 0xcd, 0x09, 0xc5, 0x03, 0x13, 0x0e, 0xcd, + 0x00, 0xc5, 0x17, 0x14, 0x0e, 0xc9, 0x63, 0x03, 0x7d, 0x8d, 0xc6, 0x01, + 0xdb, 0x0e, 0xc9, 0x59, 0xc5, 0x03, 0x13, 0x0e, 0xc9, 0x50, 0xc2, 0x00, + 0x15, 0x0e, 0xcb, 0x20, 0xc2, 0x00, 0x15, 0x0e, 0xcb, 0x00, 0xc5, 0x03, + 0x13, 0x0e, 0xc9, 0x31, 0xc5, 0x17, 0x14, 0x0e, 0xc9, 0x28, 0xd0, 0x59, + 0x02, 0x08, 0xae, 0x59, 0xd2, 0x48, 0x8f, 0x08, 0xae, 0x50, 0xc8, 0x0d, + 0x03, 0x01, 0x0b, 0xf0, 0x00, 0x43, 0x7d, 0x93, 0xdf, 0x0d, 0x3e, 0x01, + 0x4b, 0x79, 0x06, 0x43, 0x7d, 0xa5, 0xd2, 0x05, 0xd4, 0x0f, 0xc0, 0x19, + 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0x98, 0xca, 0x03, 0x87, 0x01, 0x0d, 0x99, + 0xc9, 0x01, 0x88, 0x01, 0x0d, 0x90, 0xd6, 0x2e, 0x3e, 0x01, 0x1b, 0xe1, + 0xc3, 0x13, 0x1d, 0x01, 0x15, 0xf0, 0xc9, 0x33, 0xad, 0x01, 0x4c, 0x90, + 0x45, 0x00, 0x8c, 0xc3, 0x7d, 0xab, 0xc6, 0x10, 0x9d, 0x01, 0x5b, 0x91, + 0x44, 0x00, 0x9a, 0x43, 0x7d, 0xd5, 0xc3, 0x14, 0xa7, 0x01, 0x48, 0xb3, + 0x03, 0x7d, 0xdb, 0xd2, 0x05, 0xd5, 0x01, 0x5f, 0x70, 0xcf, 0x62, 0x3d, + 0x01, 0x4b, 0x69, 0x46, 0x00, 0xd4, 0xc3, 0x7d, 0xe1, 0xc6, 0x10, 0x9d, + 0x01, 0x4a, 0xb9, 0xc8, 0xae, 0xbc, 0x01, 0x4a, 0xf8, 0x46, 0x00, 0xd4, + 0xc3, 0x7d, 0xe7, 0xc8, 0xae, 0xbc, 0x01, 0x4a, 0xd9, 0xc6, 0x10, 0x9d, + 0x01, 0x4a, 0x98, 0xcf, 0x2c, 0x35, 0x01, 0x48, 0xa1, 0xd6, 0x2d, 0x62, + 0x01, 0x48, 0xa8, 0xc2, 0x02, 0xfa, 0x00, 0x70, 0x11, 0xc3, 0x05, 0x21, + 0x00, 0x70, 0x19, 0xc3, 0x0c, 0x26, 0x00, 0x70, 0x21, 0xc2, 0x00, 0x45, + 0x00, 0x70, 0x28, 0xc3, 0x93, 0x9b, 0x00, 0x72, 0x19, 0xc4, 0xcb, 0x97, + 0x00, 0x72, 0x20, 0x87, 0x00, 0x71, 0xb8, 0x03, 0xc3, 0x7d, 0xef, 0xc3, + 0x38, 0x86, 0x00, 0x70, 0xb1, 0xc3, 0x08, 0x48, 0x00, 0x70, 0xc0, 0xc3, + 0x38, 0x86, 0x00, 0x70, 0xe1, 0xc2, 0x00, 0xd1, 0x00, 0x70, 0xf0, 0xc2, + 0x01, 0x23, 0x00, 0x72, 0x49, 0xc2, 0x00, 0x2c, 0x00, 0x72, 0x50, 0xcc, + 0x00, 0xfb, 0x07, 0xe0, 0xb1, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0x40, 0x44, + 0x19, 0x6a, 0xc3, 0x7d, 0xf9, 0xce, 0x43, 0x77, 0x07, 0xed, 0x29, 0xd7, + 0x26, 0xea, 0x07, 0xed, 0x38, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0xa9, 0xcb, + 0x10, 0xb5, 0x07, 0xe5, 0x38, 0xd7, 0x26, 0xea, 0x07, 0xed, 0x31, 0xce, + 0x43, 0x77, 0x07, 0xed, 0xf0, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0xc1, 0xcb, + 0x10, 0xb5, 0x07, 0xe5, 0x50, 0xce, 0x43, 0x77, 0x07, 0xea, 0xd1, 0xd7, + 0x26, 0xea, 0x07, 0xea, 0xd8, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0xb9, 0xcb, + 0x10, 0xb5, 0x07, 0xe5, 0x48, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0x91, 0xcb, + 0x10, 0xb5, 0x07, 0xe6, 0xc0, 0xd1, 0x30, 0xc1, 0x07, 0xec, 0x99, 0xd1, + 0x50, 0x13, 0x07, 0xec, 0xa0, 0xcd, 0x00, 0xfa, 0x07, 0xe7, 0xf1, 0xca, + 0x26, 0xf7, 0x07, 0xe8, 0xd0, 0x43, 0x2b, 0xba, 0xc3, 0x7e, 0x05, 0x43, + 0x02, 0x98, 0x43, 0x7e, 0x11, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x49, 0xca, + 0x26, 0xf7, 0x07, 0xe9, 0x41, 0x0b, 0xc3, 0x7e, 0x27, 0x45, 0x00, 0x8c, + 0x43, 0x7e, 0x33, 0xca, 0x26, 0xf7, 0x07, 0xe8, 0xc9, 0xcd, 0x00, 0xfa, + 0x07, 0xe7, 0xe8, 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x29, 0x0b, 0xc3, 0x7e, + 0x3f, 0xd3, 0x43, 0x72, 0x07, 0xeb, 0x49, 0xcb, 0x64, 0x7b, 0x07, 0xe9, + 0xb8, 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x39, 0x0b, 0xc3, 0x7e, 0x4b, 0xcb, + 0x64, 0x7b, 0x07, 0xe9, 0xc8, 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x49, 0xcd, + 0x00, 0xfa, 0x07, 0xe8, 0x68, 0x00, 0xc3, 0x7e, 0x57, 0xd1, 0x56, 0x51, + 0x07, 0xe2, 0xf8, 0x00, 0xc3, 0x7e, 0x63, 0xd1, 0x56, 0x51, 0x07, 0xe2, + 0xf0, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x91, 0xcd, 0x00, 0xfa, 0x07, 0xe3, + 0x00, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0xf9, 0xcb, 0x10, 0xb5, 0x07, 0xe5, + 0x80, 0x44, 0x19, 0x6a, 0xc3, 0x7e, 0x6f, 0xd1, 0x30, 0xc1, 0x07, 0xeb, + 0x09, 0x45, 0x19, 0x60, 0x43, 0x7e, 0x7b, 0xcc, 0x00, 0xfb, 0x07, 0xe0, + 0xf1, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0x78, 0xd7, 0x26, 0xea, 0x07, 0xed, + 0x41, 0xce, 0x43, 0x77, 0x07, 0xee, 0x30, 0x0b, 0xc3, 0x7e, 0x87, 0xcb, + 0x64, 0x7b, 0x07, 0xe9, 0xa9, 0xd6, 0x30, 0xbc, 0x07, 0xea, 0xe0, 0xcc, + 0x10, 0xb4, 0x07, 0xe9, 0x89, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x40, 0xcc, + 0x00, 0xfb, 0x07, 0xe0, 0xe1, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0x68, 0xd0, + 0x50, 0xf1, 0x07, 0xea, 0xe9, 0xd7, 0x26, 0xea, 0x07, 0xea, 0xf0, 0x0b, + 0xc3, 0x7e, 0x93, 0x4a, 0x74, 0x6e, 0x43, 0x7e, 0x9f, 0x0b, 0xc3, 0x7e, + 0xab, 0x45, 0x00, 0x8c, 0x43, 0x7e, 0xb7, 0xcd, 0x00, 0xfa, 0x07, 0xe8, + 0x79, 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x58, 0xca, 0x26, 0xf7, 0x07, 0xe9, + 0x09, 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x28, 0xca, 0x26, 0xf7, 0x07, 0xe9, + 0x11, 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x30, 0x43, 0x12, 0xad, 0xc3, 0x7e, + 0xc3, 0x00, 0x43, 0x7e, 0xcd, 0xcd, 0x77, 0x53, 0x07, 0xee, 0x79, 0xcf, + 0x30, 0xd9, 0x07, 0xef, 0xa8, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x51, 0xcb, + 0x10, 0xb5, 0x07, 0xe5, 0xd8, 0xce, 0x43, 0x77, 0x07, 0xed, 0xb1, 0x45, + 0x19, 0x60, 0xc3, 0x7e, 0xd9, 0xd7, 0x26, 0xea, 0x07, 0xeb, 0xc0, 0xcc, + 0x00, 0xfb, 0x07, 0xe1, 0x49, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xd0, 0xca, + 0x26, 0xf7, 0x07, 0xeb, 0xa9, 0xcc, 0x10, 0xb4, 0x07, 0xee, 0x20, 0xcd, + 0x00, 0xfa, 0x07, 0xe2, 0xe9, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x80, 0xca, + 0x26, 0xf7, 0x07, 0xe9, 0xe1, 0xcd, 0x00, 0xfa, 0x07, 0xe9, 0xe8, 0x49, + 0x82, 0xa3, 0xc3, 0x7e, 0xe5, 0x0f, 0x43, 0x7e, 0xef, 0xcd, 0x00, 0xfa, + 0x07, 0xe7, 0xb1, 0xca, 0x26, 0xf7, 0x07, 0xe8, 0x90, 0xcd, 0x00, 0xfa, + 0x07, 0xe7, 0xa9, 0xca, 0x26, 0xf7, 0x07, 0xe8, 0x88, 0x0b, 0xc3, 0x7e, + 0xfb, 0xcb, 0x64, 0x7b, 0x07, 0xe9, 0xd1, 0x45, 0x00, 0x8c, 0x43, 0x7f, + 0x07, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x31, 0xcb, 0x10, 0xb5, 0x07, 0xe5, + 0xc0, 0xca, 0x26, 0xf7, 0x07, 0xe3, 0xd9, 0xcd, 0x00, 0xfa, 0x07, 0xe0, + 0xa0, 0xca, 0x26, 0xf7, 0x07, 0xe3, 0xd1, 0xcd, 0x00, 0xfa, 0x07, 0xe0, + 0x98, 0xca, 0x26, 0xf7, 0x07, 0xe3, 0xc1, 0x0b, 0xc3, 0x7f, 0x19, 0xcb, + 0x64, 0x7b, 0x07, 0xe7, 0x28, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x71, 0xcb, + 0x10, 0xb5, 0x07, 0xe5, 0x20, 0xd1, 0x30, 0xc1, 0x07, 0xea, 0xa9, 0xd0, + 0x50, 0xf1, 0x07, 0xea, 0xb1, 0xd1, 0x50, 0xf0, 0x07, 0xea, 0xb9, 0xce, + 0x43, 0x77, 0x07, 0xed, 0x19, 0xd7, 0x26, 0xea, 0x07, 0xed, 0x20, 0xcc, + 0x00, 0xfb, 0x07, 0xe0, 0x69, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0x18, 0xd1, + 0x50, 0x13, 0x07, 0xea, 0xa1, 0xce, 0x43, 0x77, 0x07, 0xed, 0x09, 0xd7, + 0x26, 0xea, 0x07, 0xed, 0x10, 0x0b, 0xc3, 0x7f, 0x25, 0x45, 0x00, 0x8c, + 0x43, 0x7f, 0x31, 0xcc, 0x10, 0xb4, 0x07, 0xe5, 0x29, 0xcb, 0x64, 0x7b, + 0x07, 0xe7, 0x20, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x59, 0xcb, 0x10, 0xb5, + 0x07, 0xe5, 0x08, 0xd1, 0x50, 0x13, 0x07, 0xea, 0x81, 0xce, 0x43, 0x77, + 0x07, 0xec, 0xf9, 0xd7, 0x26, 0xea, 0x07, 0xed, 0x00, 0x1b, 0xc3, 0x7f, + 0x3d, 0x03, 0xc3, 0x7f, 0x49, 0xcf, 0x60, 0x8a, 0x07, 0xe3, 0x39, 0x45, + 0x19, 0x60, 0xc3, 0x7f, 0x55, 0xcf, 0x69, 0x81, 0x07, 0xe3, 0x29, 0xce, + 0x72, 0xf0, 0x07, 0xe3, 0x21, 0x0a, 0xc3, 0x7f, 0x65, 0x46, 0x30, 0xc1, + 0xc3, 0x7f, 0x71, 0x42, 0x00, 0x5d, 0xc3, 0x7f, 0x7d, 0x43, 0x94, 0xf6, + 0xc3, 0x7f, 0x87, 0x42, 0x03, 0x53, 0xc3, 0x7f, 0x93, 0x44, 0xdf, 0x2b, + 0xc3, 0x7f, 0x9f, 0xd1, 0x50, 0xf0, 0x07, 0xe4, 0xc8, 0x0b, 0xc3, 0x7f, + 0xab, 0xd3, 0x43, 0x72, 0x07, 0xed, 0x70, 0xca, 0x26, 0xf7, 0x07, 0xec, + 0xe1, 0xcc, 0x10, 0xb4, 0x07, 0xec, 0xe8, 0xcc, 0x00, 0xfb, 0x07, 0xe2, + 0x61, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x98, 0xd1, 0x50, 0x13, 0x07, 0xec, + 0xa9, 0xd7, 0x26, 0xea, 0x07, 0xec, 0xb1, 0xce, 0x43, 0x77, 0x07, 0xed, + 0x98, 0xcc, 0x10, 0xb4, 0x07, 0xed, 0xc1, 0xca, 0x26, 0xf7, 0x07, 0xed, + 0xe8, 0xca, 0x26, 0xf7, 0x07, 0xec, 0xb9, 0xcc, 0x10, 0xb4, 0x07, 0xec, + 0xc0, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0xe1, 0xcb, 0x10, 0xb5, 0x07, 0xe6, + 0x40, 0x45, 0x19, 0x60, 0xc3, 0x7f, 0xb7, 0xce, 0x43, 0x77, 0x07, 0xed, + 0xb8, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0xd9, 0xcb, 0x10, 0xb5, 0x07, 0xe6, + 0x38, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x19, 0xcd, 0x00, 0xfa, 0x07, 0xe1, + 0xe8, 0xcd, 0x00, 0xfa, 0x07, 0xf7, 0xa9, 0xca, 0x26, 0xf7, 0x07, 0xf7, + 0xb0, 0x46, 0x05, 0x34, 0xc3, 0x7f, 0xc3, 0x46, 0x00, 0xd4, 0x43, 0x7f, + 0xcf, 0xca, 0x26, 0xf7, 0x07, 0xec, 0x39, 0xcc, 0x10, 0xb4, 0x07, 0xec, + 0x40, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0x01, 0xcb, 0x10, 0xb5, 0x07, 0xe6, + 0x50, 0x45, 0x19, 0x60, 0xc3, 0x7f, 0xdb, 0xce, 0x43, 0x77, 0x07, 0xec, + 0x09, 0xd7, 0x26, 0xea, 0x07, 0xec, 0x10, 0xca, 0x26, 0xf7, 0x07, 0xec, + 0x21, 0xcc, 0x10, 0xb4, 0x07, 0xec, 0x18, 0xcc, 0x10, 0xb4, 0x07, 0xed, + 0xd1, 0xca, 0x26, 0xf7, 0x07, 0xed, 0xe0, 0xca, 0x26, 0xf7, 0x07, 0xe3, + 0xf9, 0xcd, 0x00, 0xfa, 0x07, 0xe1, 0xb0, 0xca, 0x26, 0xf7, 0x07, 0xe3, + 0xf1, 0xcd, 0x00, 0xfa, 0x07, 0xe1, 0xa8, 0x0b, 0xc3, 0x7f, 0xe7, 0x45, + 0x00, 0x8c, 0x43, 0x7f, 0xf3, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x99, 0xcb, + 0x10, 0xb5, 0x07, 0xe6, 0x10, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x41, 0xcb, + 0x10, 0xb5, 0x07, 0xe4, 0xf8, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x39, 0xcb, + 0x10, 0xb5, 0x07, 0xe4, 0xf0, 0x0b, 0xc3, 0x80, 0x05, 0xd3, 0x43, 0x72, + 0x07, 0xee, 0x10, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x11, 0xcc, 0x10, 0xb4, + 0x07, 0xe5, 0x00, 0x8f, 0x07, 0xea, 0x1b, 0x03, 0x80, 0x11, 0xc3, 0x3a, + 0x09, 0x07, 0xea, 0x28, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0x41, 0xcb, 0x10, + 0xb5, 0x07, 0xe6, 0x88, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0x39, 0xcb, 0x10, + 0xb5, 0x07, 0xe6, 0x80, 0xd1, 0x30, 0xc1, 0x07, 0xec, 0x71, 0xd1, 0x50, + 0x13, 0x07, 0xec, 0x79, 0xce, 0x43, 0x77, 0x07, 0xed, 0xc8, 0xcc, 0x00, + 0xfb, 0x07, 0xe2, 0x31, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x78, 0xd1, 0x30, + 0xc1, 0x07, 0xec, 0x49, 0xd1, 0x50, 0x13, 0x07, 0xec, 0x51, 0xce, 0x43, + 0x77, 0x07, 0xec, 0x58, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0x29, 0xcb, 0x10, + 0xb5, 0x07, 0xe6, 0x70, 0xd0, 0x50, 0xf1, 0x07, 0xec, 0x61, 0xd1, 0x50, + 0x13, 0x07, 0xec, 0x69, 0xce, 0x43, 0x77, 0x07, 0xee, 0x01, 0xd1, 0x50, + 0xf0, 0x07, 0xec, 0x90, 0xcb, 0x64, 0x7b, 0x07, 0xdf, 0xf9, 0x0b, 0xc3, + 0x80, 0x17, 0xca, 0x26, 0xf7, 0x07, 0xdf, 0xe9, 0x45, 0x00, 0x8c, 0x43, + 0x80, 0x23, 0x45, 0x00, 0x8c, 0xc3, 0x80, 0x33, 0x0b, 0xc3, 0x80, 0x3d, + 0xca, 0x26, 0xf7, 0x07, 0xf6, 0x91, 0xcb, 0x64, 0x7b, 0x07, 0xf6, 0xa0, + 0x45, 0x00, 0x8c, 0xc3, 0x80, 0x49, 0x0b, 0xc3, 0x80, 0x55, 0xca, 0x26, + 0xf7, 0x07, 0xf6, 0x71, 0xcb, 0x64, 0x7b, 0x07, 0xf6, 0x80, 0x45, 0x00, + 0x8c, 0xc3, 0x80, 0x61, 0xcb, 0x64, 0x7b, 0x07, 0xdc, 0xa9, 0x0b, 0xc3, + 0x80, 0x71, 0xca, 0x26, 0xf7, 0x07, 0xdc, 0x98, 0xcb, 0x64, 0x7b, 0x07, + 0xdc, 0xc9, 0x0b, 0xc3, 0x80, 0x7d, 0xca, 0x26, 0xf7, 0x07, 0xdc, 0xb8, + 0x45, 0x00, 0x8c, 0xc3, 0x80, 0x89, 0x0b, 0xc3, 0x80, 0xa1, 0xca, 0x26, + 0xf7, 0x07, 0xf6, 0xf1, 0xcb, 0x64, 0x7b, 0x07, 0xf7, 0x00, 0x46, 0x02, + 0xd8, 0xc3, 0x80, 0xad, 0x0b, 0xc3, 0x80, 0xb9, 0xca, 0x26, 0xf7, 0x07, + 0xf4, 0xf1, 0xcb, 0x64, 0x7b, 0x07, 0xf5, 0x00, 0xca, 0x26, 0xf7, 0x07, + 0xdc, 0x59, 0xcd, 0x00, 0xfa, 0x07, 0xdc, 0x50, 0xd6, 0x2c, 0xc8, 0x00, + 0x46, 0x20, 0x46, 0x02, 0xd8, 0xc3, 0x80, 0xc5, 0xcb, 0x64, 0x7b, 0x07, + 0xf6, 0x61, 0x0b, 0xc3, 0x80, 0xd1, 0xca, 0x26, 0xf7, 0x07, 0xf6, 0x50, + 0x19, 0xc3, 0x80, 0xdd, 0xc7, 0x06, 0x5f, 0x00, 0x32, 0x4b, 0x03, 0x80, + 0xec, 0xcd, 0x00, 0xfa, 0x07, 0xf4, 0x69, 0xca, 0x26, 0xf7, 0x07, 0xf4, + 0x70, 0x45, 0x00, 0x8c, 0xc3, 0x80, 0xf0, 0xcb, 0x64, 0x7b, 0x07, 0xdc, + 0x89, 0x0b, 0xc3, 0x81, 0x00, 0xca, 0x26, 0xf7, 0x07, 0xdc, 0x78, 0x00, + 0x43, 0x81, 0x0c, 0x00, 0x43, 0x81, 0x22, 0x00, 0x43, 0x81, 0x2e, 0x0b, + 0xc3, 0x81, 0x3a, 0xca, 0x26, 0xf7, 0x07, 0xf5, 0x31, 0xcb, 0x64, 0x7b, + 0x07, 0xf5, 0x40, 0x45, 0x00, 0x8c, 0xc3, 0x81, 0x46, 0xcb, 0x64, 0x7b, + 0x07, 0xdb, 0xe9, 0x0b, 0xc3, 0x81, 0x52, 0xca, 0x26, 0xf7, 0x07, 0xdb, + 0xd8, 0x00, 0x43, 0x81, 0x5e, 0xcc, 0x88, 0x05, 0x00, 0x46, 0x01, 0xcb, + 0x64, 0x7b, 0x07, 0xdb, 0x49, 0x0b, 0xc3, 0x81, 0x6e, 0xca, 0x26, 0xf7, + 0x07, 0xdb, 0x38, 0x00, 0x43, 0x81, 0x7a, 0x45, 0x00, 0x8c, 0xc3, 0x81, + 0x8a, 0x0f, 0xc3, 0x81, 0x9c, 0x0b, 0xc3, 0x81, 0xab, 0xca, 0x26, 0xf7, + 0x07, 0xf4, 0xb0, 0x00, 0x43, 0x81, 0xb7, 0x45, 0x00, 0x8c, 0xc3, 0x81, + 0xc7, 0x0b, 0xc3, 0x81, 0xd1, 0xca, 0x26, 0xf7, 0x07, 0xf6, 0x11, 0xcb, + 0x64, 0x7b, 0x07, 0xf6, 0x20, 0x00, 0x43, 0x81, 0xdd, 0x00, 0x43, 0x81, + 0xe9, 0x98, 0x00, 0x45, 0xf1, 0xca, 0xa6, 0xd4, 0x00, 0x45, 0xb8, 0xcb, + 0x10, 0xb5, 0x07, 0xda, 0xc1, 0xcc, 0x00, 0xfb, 0x07, 0xda, 0xb0, 0xcb, + 0x64, 0x7b, 0x07, 0xdb, 0x89, 0x0b, 0xc3, 0x81, 0xf9, 0xca, 0x26, 0xf7, + 0x07, 0xdb, 0x78, 0x45, 0x00, 0x8c, 0xc3, 0x82, 0x05, 0xc6, 0x17, 0xce, + 0x00, 0x36, 0x93, 0x03, 0x82, 0x18, 0x0b, 0xc3, 0x82, 0x1c, 0xca, 0x26, + 0xf7, 0x07, 0xf7, 0x91, 0xcb, 0x64, 0x7b, 0x07, 0xf7, 0xa0, 0xca, 0x26, + 0xf7, 0x07, 0xde, 0xe1, 0xcd, 0x00, 0xfa, 0x07, 0xde, 0xd8, 0x45, 0x00, + 0x8c, 0xc3, 0x82, 0x28, 0xcd, 0x00, 0xfa, 0x07, 0xf5, 0x69, 0xca, 0x26, + 0xf7, 0x07, 0xf5, 0x70, 0xcb, 0x64, 0x7b, 0x07, 0xdd, 0x19, 0x0b, 0xc3, + 0x82, 0x59, 0xca, 0x26, 0xf7, 0x07, 0xdd, 0x08, 0xca, 0x26, 0xf7, 0x07, + 0xdc, 0x69, 0xcd, 0x00, 0xfa, 0x07, 0xdc, 0x60, 0x45, 0x00, 0x8c, 0xc3, + 0x82, 0x65, 0x0b, 0xc3, 0x82, 0x81, 0xca, 0x26, 0xf7, 0x07, 0xf4, 0x81, + 0xcb, 0x64, 0x7b, 0x07, 0xf4, 0x90, 0x00, 0x43, 0x82, 0x8d, 0xcb, 0x64, + 0x7b, 0x07, 0xda, 0xa9, 0x0b, 0xc3, 0x82, 0x9d, 0xca, 0x26, 0xf7, 0x07, + 0xda, 0x98, 0xcb, 0x64, 0x7b, 0x07, 0xdf, 0x49, 0xcc, 0x10, 0xb4, 0x07, + 0xdf, 0x40, 0xce, 0x00, 0xf9, 0x07, 0xde, 0xe8, 0x44, 0x05, 0x18, 0xc3, + 0x82, 0xa9, 0xd0, 0x0e, 0x7c, 0x00, 0x35, 0x40, 0xcb, 0x10, 0xb5, 0x07, + 0xf6, 0xb9, 0xcc, 0x00, 0xfb, 0x07, 0xf6, 0xa8, 0xcb, 0x10, 0xb5, 0x07, + 0xdf, 0x31, 0xcc, 0x00, 0xfb, 0x07, 0xdf, 0x20, 0xd5, 0x35, 0x75, 0x00, + 0x45, 0x91, 0xcd, 0x00, 0xfa, 0x07, 0xf5, 0x79, 0xca, 0x26, 0xf7, 0x07, + 0xf5, 0x80, 0x0b, 0xc3, 0x82, 0xb8, 0xca, 0x26, 0xf7, 0x07, 0xf6, 0x31, + 0xcb, 0x64, 0x7b, 0x07, 0xf6, 0x40, 0x46, 0x02, 0xd8, 0xc3, 0x82, 0xc4, + 0x0b, 0xc3, 0x82, 0xd0, 0xca, 0x26, 0xf7, 0x07, 0xf5, 0xd1, 0xcb, 0x64, + 0x7b, 0x07, 0xf5, 0xe0, 0xce, 0x6d, 0xe8, 0x00, 0x37, 0xd1, 0x0b, 0xc3, + 0x82, 0xdc, 0xca, 0x26, 0xf7, 0x07, 0xf5, 0xb1, 0xcb, 0x64, 0x7b, 0x07, + 0xf5, 0xc0, 0x45, 0x00, 0x8c, 0xc3, 0x82, 0xe8, 0x0b, 0xc3, 0x83, 0x0a, + 0xca, 0x26, 0xf7, 0x07, 0xf5, 0x91, 0xcb, 0x64, 0x7b, 0x07, 0xf5, 0xa0, + 0x00, 0x43, 0x83, 0x16, 0x00, 0x43, 0x83, 0x28, 0x00, 0x43, 0x83, 0x34, + 0x00, 0x43, 0x83, 0x4a, 0x00, 0x43, 0x83, 0x56, 0xca, 0x26, 0xf7, 0x07, + 0xdc, 0x39, 0xcd, 0x00, 0xfa, 0x07, 0xdc, 0x30, 0xcb, 0x64, 0x7b, 0x07, + 0xdb, 0xa9, 0x0b, 0xc3, 0x83, 0x62, 0xca, 0x26, 0xf7, 0x07, 0xdb, 0x98, + 0xcb, 0x64, 0x7b, 0x07, 0xdb, 0x69, 0x0b, 0xc3, 0x83, 0x6e, 0xca, 0x26, + 0xf7, 0x07, 0xdb, 0x58, 0x44, 0x05, 0x18, 0xc3, 0x83, 0x7a, 0xce, 0x1e, + 0x29, 0x00, 0x36, 0x51, 0xc4, 0x00, 0x9d, 0x00, 0x36, 0x21, 0xcb, 0x08, + 0x09, 0x00, 0x31, 0x23, 0x03, 0x83, 0x86, 0x5d, 0x10, 0x12, 0x43, 0x83, + 0x8a, 0x45, 0x00, 0x8c, 0xc3, 0x83, 0x96, 0x0b, 0xc3, 0x83, 0xa2, 0xca, + 0x26, 0xf7, 0x07, 0xf7, 0x11, 0xcb, 0x64, 0x7b, 0x07, 0xf7, 0x20, 0xcb, + 0x64, 0x7b, 0x07, 0xde, 0xb1, 0x0b, 0xc3, 0x83, 0xae, 0xca, 0x26, 0xf7, + 0x07, 0xde, 0xa0, 0x00, 0x43, 0x83, 0xba, 0x45, 0x00, 0x8c, 0xc3, 0x83, + 0xca, 0xc6, 0x3a, 0x06, 0x00, 0x35, 0xd3, 0x03, 0x83, 0xe6, 0x0b, 0xc3, + 0x83, 0xea, 0xca, 0x26, 0xf7, 0x07, 0xf7, 0x31, 0xcb, 0x64, 0x7b, 0x07, + 0xf7, 0x40, 0xcb, 0x64, 0x7b, 0x07, 0xdb, 0xc9, 0x0b, 0xc3, 0x83, 0xf6, + 0xca, 0x26, 0xf7, 0x07, 0xdb, 0xb8, 0x00, 0x43, 0x84, 0x02, 0xce, 0x00, + 0xf9, 0x07, 0xf4, 0x00, 0xcb, 0x98, 0x6e, 0x00, 0x35, 0xf3, 0x03, 0x84, + 0x18, 0xc4, 0xe0, 0x63, 0x00, 0x36, 0x0b, 0x03, 0x84, 0x1c, 0x45, 0x00, + 0x8c, 0xc3, 0x84, 0x20, 0x0b, 0xc3, 0x84, 0x2f, 0xca, 0x26, 0xf7, 0x07, + 0xf7, 0x51, 0xcb, 0x64, 0x7b, 0x07, 0xf7, 0x60, 0xc3, 0x2b, 0xb9, 0x00, + 0x33, 0xc1, 0xc4, 0x06, 0x5a, 0x00, 0x33, 0xa9, 0xc3, 0x7e, 0x89, 0x00, + 0x33, 0xb0, 0xc2, 0x16, 0x1c, 0x0f, 0x75, 0xa9, 0xc2, 0x02, 0x98, 0x0f, + 0x75, 0x41, 0x0a, 0x43, 0x84, 0x3b, 0xc4, 0xdf, 0x93, 0x0f, 0x75, 0xa1, + 0xc2, 0x01, 0x9d, 0x0f, 0x75, 0x89, 0xc3, 0x03, 0x26, 0x0f, 0x75, 0x70, + 0xc2, 0x00, 0x74, 0x0f, 0x75, 0x31, 0x8a, 0x0f, 0x75, 0xd0, 0x8e, 0x0f, + 0x75, 0x19, 0x86, 0x0f, 0x75, 0xc8, 0xc3, 0x03, 0x26, 0x0f, 0x72, 0x71, + 0xc2, 0x01, 0x9d, 0x0f, 0x72, 0x89, 0xc4, 0xdf, 0x93, 0x0f, 0x72, 0xa0, + 0xc2, 0x01, 0x9d, 0x0f, 0x72, 0xc9, 0x47, 0x3b, 0xc4, 0x43, 0x84, 0x47, + 0xc2, 0x16, 0x1c, 0x0f, 0x74, 0xb1, 0xc2, 0x00, 0x65, 0x0f, 0x74, 0xc0, + 0xc3, 0x85, 0xf5, 0x0f, 0x73, 0xe1, 0xc3, 0xb1, 0x0d, 0x0f, 0x73, 0xf0, + 0xc3, 0x33, 0x5f, 0x00, 0x46, 0xe9, 0x8a, 0x00, 0x46, 0x60, 0xc6, 0xcb, + 0x9f, 0x00, 0x46, 0xe1, 0xc7, 0xc1, 0xc4, 0x00, 0x46, 0xd9, 0xcb, 0x92, + 0x49, 0x00, 0x46, 0xd1, 0xc5, 0xd6, 0x55, 0x00, 0x46, 0xa1, 0xc5, 0xde, + 0x57, 0x00, 0x44, 0xc0, 0xc5, 0xdb, 0xaf, 0x00, 0x44, 0xd1, 0xc6, 0xcb, + 0x6f, 0x00, 0x44, 0xc8, 0x4b, 0x13, 0xdd, 0xc3, 0x84, 0x53, 0xcc, 0x04, + 0xdb, 0x0f, 0xdd, 0x18, 0xdc, 0x13, 0xdd, 0x0f, 0xdd, 0x3b, 0x03, 0x84, + 0x59, 0xcc, 0x04, 0xdb, 0x0f, 0xdd, 0x12, 0x03, 0x84, 0x5f, 0xc4, 0x00, + 0x49, 0x0f, 0xdd, 0x03, 0x03, 0x84, 0x65, 0xc5, 0x00, 0x2c, 0x0f, 0xdd, + 0x0a, 0x03, 0x84, 0x69, 0xca, 0x01, 0x68, 0x01, 0x29, 0x61, 0xc4, 0x00, + 0x49, 0x01, 0x28, 0x81, 0xc5, 0x00, 0x2c, 0x01, 0x28, 0x60, 0x16, 0xc3, + 0x84, 0x6d, 0xd2, 0x4a, 0x2d, 0x0f, 0xd0, 0x39, 0xce, 0x2a, 0xfe, 0x0f, + 0xd0, 0x99, 0xdf, 0x0d, 0x00, 0x0f, 0xd0, 0xe0, 0xc5, 0xa8, 0xf7, 0x0f, + 0xd2, 0x89, 0xc4, 0xde, 0x83, 0x0f, 0xd2, 0x91, 0xc6, 0xca, 0xfd, 0x0f, + 0xd2, 0x98, 0xce, 0x2a, 0xfe, 0x0f, 0xd0, 0x79, 0xdb, 0x18, 0x03, 0x0f, + 0xd1, 0xc8, 0x44, 0x1d, 0xba, 0xc3, 0x84, 0x79, 0xc5, 0xc0, 0x74, 0x0f, + 0xaf, 0x98, 0x17, 0xc3, 0x84, 0x85, 0x96, 0x0b, 0x4d, 0xd0, 0x9a, 0x0b, + 0x4f, 0x31, 0xc2, 0x10, 0x11, 0x0b, 0x4c, 0xd0, 0x83, 0x0b, 0x4b, 0x9b, + 0x03, 0x84, 0x93, 0x17, 0xc3, 0x84, 0x99, 0x42, 0x2c, 0x43, 0x43, 0x84, + 0xa1, 0x96, 0x0b, 0x4f, 0x88, 0x17, 0xc3, 0x84, 0xab, 0x07, 0x43, 0x84, + 0xbb, 0x93, 0x0b, 0x4c, 0x01, 0x92, 0x0b, 0x4b, 0xe8, 0x42, 0x01, 0x31, + 0xc3, 0x84, 0xca, 0x92, 0x0b, 0x4b, 0x30, 0xc2, 0x5c, 0x9b, 0x0b, 0x4d, + 0x81, 0x93, 0x0b, 0x4c, 0x70, 0xc2, 0x00, 0x11, 0x0b, 0x4b, 0x79, 0x87, + 0x0b, 0x4c, 0x08, 0x87, 0x0b, 0x4e, 0xa3, 0x03, 0x84, 0xd6, 0xc2, 0xd0, + 0x00, 0x0b, 0x4c, 0x18, 0x93, 0x0b, 0x4d, 0x08, 0x90, 0x0b, 0x4b, 0x38, + 0xc3, 0x8b, 0xaa, 0x0b, 0x4c, 0xe0, 0xc2, 0x10, 0x11, 0x0b, 0x4c, 0xc8, + 0x87, 0x0b, 0x4b, 0x89, 0x93, 0x0b, 0x4e, 0x50, 0x8f, 0x0b, 0x4b, 0xc0, + 0xc5, 0xdb, 0x32, 0x0b, 0x4e, 0xd1, 0xc5, 0xd9, 0xa2, 0x0b, 0x4e, 0x88, + 0x96, 0x0b, 0x4e, 0x69, 0xc2, 0x00, 0xe2, 0x0b, 0x4d, 0x88, 0x9a, 0x0b, + 0x4f, 0x39, 0x96, 0x0b, 0x4d, 0xe8, 0x93, 0x0b, 0x4f, 0xa0, 0x90, 0x0b, + 0x4b, 0x59, 0x96, 0x0b, 0x4c, 0x60, 0x8f, 0x0b, 0x4b, 0xf0, 0xc6, 0xcb, + 0x7b, 0x0b, 0x4f, 0xa9, 0xc4, 0x05, 0x2e, 0x0b, 0x4e, 0x91, 0x8b, 0x0b, + 0x4e, 0x40, 0x96, 0x0b, 0x4e, 0x20, 0x96, 0x0b, 0x4e, 0x78, 0xc3, 0xc5, + 0xd2, 0x0b, 0x4a, 0x29, 0x03, 0xc3, 0x84, 0xdc, 0xc3, 0xd7, 0xe2, 0x0b, + 0x49, 0xd9, 0xc4, 0xc2, 0x61, 0x0b, 0x49, 0x98, 0xc3, 0x8f, 0x8a, 0x0b, + 0x49, 0xe1, 0xc3, 0x17, 0x29, 0x0b, 0x48, 0x99, 0x42, 0x2c, 0x43, 0xc3, + 0x84, 0xe9, 0xc2, 0x00, 0xb6, 0x0b, 0x47, 0xf1, 0xc2, 0x05, 0x1d, 0x0b, + 0x47, 0xe0, 0xc2, 0x00, 0xa4, 0x0b, 0x4a, 0x31, 0xc2, 0x02, 0xe0, 0x0b, + 0x47, 0xc0, 0x96, 0x0b, 0x49, 0x59, 0x92, 0x0b, 0x48, 0xf8, 0xc2, 0x01, + 0xdf, 0x0b, 0x49, 0xc1, 0x87, 0x0b, 0x4a, 0xc8, 0x87, 0x0b, 0x48, 0xa9, + 0xc2, 0xd0, 0x00, 0x0b, 0x48, 0x48, 0xc3, 0x7c, 0x57, 0x0b, 0x48, 0x71, + 0x96, 0x0b, 0x47, 0xb8, 0xc2, 0x02, 0xe0, 0x0b, 0x47, 0xa8, 0x8f, 0x0b, + 0x4a, 0x21, 0xc3, 0x48, 0xc4, 0x0b, 0x48, 0xb8, 0x90, 0x0b, 0x49, 0xf1, + 0x96, 0x0b, 0x48, 0x58, 0xc6, 0x18, 0x0e, 0x0b, 0x4b, 0x18, 0xc2, 0x10, + 0x11, 0x0b, 0x49, 0x51, 0x96, 0x0b, 0x48, 0x40, 0x90, 0x0b, 0x47, 0xa0, + 0x90, 0x0b, 0x4a, 0x09, 0xc3, 0xb5, 0x1b, 0x0b, 0x49, 0x19, 0x96, 0x0b, + 0x48, 0x00, 0x92, 0x0b, 0x49, 0x61, 0x8f, 0x0b, 0x49, 0x31, 0xc8, 0xb7, + 0xba, 0x0b, 0x48, 0x79, 0xc7, 0xc3, 0x37, 0x0b, 0x47, 0xf8, 0x17, 0xc3, + 0x84, 0xf5, 0x87, 0x0b, 0x47, 0xe8, 0x92, 0x0b, 0x49, 0xb1, 0x8f, 0x0b, + 0x49, 0xa0, 0xc3, 0xc9, 0xd8, 0x0b, 0x47, 0x49, 0xc7, 0xc7, 0x66, 0x0b, + 0x47, 0x50, 0x8f, 0x0b, 0x47, 0x11, 0x15, 0xc3, 0x84, 0xff, 0xc3, 0xe6, + 0x08, 0x0b, 0x45, 0x08, 0x97, 0x0b, 0x46, 0x53, 0x03, 0x85, 0x0b, 0xc2, + 0x00, 0xc4, 0x0b, 0x44, 0x98, 0xc2, 0x5c, 0x9b, 0x0b, 0x44, 0xa9, 0xc9, + 0xb1, 0xdc, 0x0b, 0x44, 0x78, 0xc2, 0xd0, 0x00, 0x0b, 0x47, 0x29, 0xc3, + 0xd0, 0xd7, 0x0b, 0x46, 0x40, 0x8f, 0x0b, 0x46, 0x79, 0xc2, 0x00, 0x4f, + 0x0b, 0x46, 0x20, 0x92, 0x0b, 0x46, 0xd1, 0x8f, 0x0b, 0x46, 0xb8, 0x96, + 0x0b, 0x45, 0xe9, 0xc5, 0xdb, 0x14, 0x0b, 0x44, 0xa0, 0x90, 0x0b, 0x46, + 0xb1, 0xc7, 0xc7, 0x43, 0x0b, 0x46, 0x38, 0x90, 0x0b, 0x46, 0xa1, 0xc5, + 0xda, 0x6f, 0x0b, 0x45, 0xc8, 0x42, 0x01, 0x31, 0xc3, 0x85, 0x21, 0xc3, + 0x16, 0x59, 0x0b, 0x46, 0xf8, 0x17, 0xc3, 0x85, 0x2d, 0xc3, 0x82, 0x78, + 0x0b, 0x46, 0x11, 0xc5, 0xd8, 0xee, 0x0b, 0x44, 0xb8, 0xc5, 0xd6, 0x87, + 0x0b, 0x45, 0xb9, 0x96, 0x0b, 0x45, 0x30, 0xc3, 0x7c, 0x57, 0x0b, 0x46, + 0x61, 0x87, 0x0b, 0x45, 0x20, 0xc3, 0x8e, 0x97, 0x0b, 0x46, 0xf1, 0xc2, + 0x00, 0xba, 0x0b, 0x46, 0x58, 0xc5, 0xda, 0x10, 0x0b, 0x46, 0xc1, 0xc7, + 0xc6, 0x71, 0x0b, 0x45, 0x98, 0xc6, 0xd1, 0x1b, 0x0b, 0x43, 0xa9, 0xc3, + 0x76, 0x32, 0x0b, 0x44, 0x51, 0xc3, 0x8f, 0x91, 0x0b, 0x43, 0xd2, 0x03, + 0x85, 0x35, 0xc3, 0xe5, 0x93, 0x0b, 0x44, 0x41, 0xc6, 0xce, 0xbd, 0x0b, + 0x44, 0x38, 0xc4, 0x9c, 0x80, 0x0b, 0x42, 0xf9, 0xc7, 0xca, 0x4c, 0x0b, + 0x42, 0xe0, 0xc3, 0x82, 0x78, 0x0b, 0x41, 0xf1, 0xca, 0xa5, 0x80, 0x0b, + 0x40, 0x40, 0x8f, 0x0b, 0x41, 0xb9, 0xc7, 0xc1, 0x38, 0x0b, 0x40, 0x28, + 0x8f, 0x0b, 0x42, 0x73, 0x03, 0x85, 0x3b, 0xc2, 0x00, 0xba, 0x0b, 0x42, + 0x31, 0xc3, 0x16, 0x59, 0x0b, 0x41, 0x91, 0xc4, 0x2c, 0x42, 0x0b, 0x40, + 0xd0, 0xc3, 0x4e, 0x64, 0x0b, 0x41, 0xb1, 0xc3, 0xe5, 0x5d, 0x0b, 0x41, + 0x30, 0xcc, 0x8b, 0xa1, 0x0b, 0x42, 0x08, 0xc5, 0xd9, 0xe3, 0x0b, 0x40, + 0xb1, 0xc5, 0xb7, 0xb5, 0x0b, 0x40, 0x00, 0x00, 0x43, 0x85, 0x4d, 0x8f, + 0x0b, 0x42, 0x61, 0xc3, 0x82, 0x78, 0x0b, 0x42, 0x10, 0xc2, 0x01, 0x5d, + 0x0b, 0x40, 0x51, 0xc5, 0xa9, 0x67, 0x0b, 0x40, 0x48, 0xc2, 0x01, 0x5d, + 0x0b, 0x40, 0x19, 0xc5, 0xa9, 0x67, 0x0b, 0x40, 0x10, 0xa2, 0x01, 0x40, + 0xfb, 0x03, 0x85, 0x59, 0xa3, 0x01, 0x41, 0x7b, 0x03, 0x85, 0x6b, 0xa5, + 0x01, 0x44, 0x79, 0xa4, 0x01, 0x42, 0x7a, 0x03, 0x85, 0x76, 0xa3, 0x01, + 0x41, 0xbb, 0x03, 0x85, 0x7a, 0xa5, 0x01, 0x44, 0xb9, 0xa4, 0x01, 0x42, + 0xba, 0x03, 0x85, 0x85, 0xa5, 0x01, 0x45, 0x39, 0xa4, 0x01, 0x43, 0x3a, + 0x03, 0x85, 0x89, 0xa5, 0x01, 0x46, 0x38, 0xa3, 0x01, 0x41, 0xdb, 0x03, + 0x85, 0x8d, 0xa5, 0x01, 0x44, 0xd9, 0xa4, 0x01, 0x42, 0xda, 0x03, 0x85, + 0x98, 0xa5, 0x01, 0x45, 0x59, 0xa4, 0x01, 0x43, 0x5a, 0x03, 0x85, 0x9c, + 0xa5, 0x01, 0x46, 0x58, 0xa5, 0x01, 0x45, 0x99, 0xa4, 0x01, 0x43, 0x9a, + 0x03, 0x85, 0xa0, 0xa5, 0x01, 0x46, 0x98, 0xa5, 0x01, 0x47, 0x18, 0xa3, + 0x01, 0x41, 0xeb, 0x03, 0x85, 0xa4, 0xa5, 0x01, 0x44, 0xe9, 0xa4, 0x01, + 0x42, 0xea, 0x03, 0x85, 0xaf, 0xa5, 0x01, 0x45, 0x69, 0xa4, 0x01, 0x43, + 0x6a, 0x03, 0x85, 0xb3, 0xa5, 0x01, 0x46, 0x68, 0xa5, 0x01, 0x45, 0xa9, + 0xa4, 0x01, 0x43, 0xaa, 0x03, 0x85, 0xb7, 0xa5, 0x01, 0x46, 0xa8, 0xa5, + 0x01, 0x47, 0x28, 0xa5, 0x01, 0x45, 0xc9, 0xa4, 0x01, 0x43, 0xca, 0x03, + 0x85, 0xbb, 0xa5, 0x01, 0x46, 0xc8, 0xa5, 0x01, 0x47, 0x48, 0xa5, 0x01, + 0x47, 0x88, 0xa3, 0x01, 0x41, 0xf3, 0x03, 0x85, 0xbf, 0xa5, 0x01, 0x44, + 0xf1, 0xa4, 0x01, 0x42, 0xf2, 0x03, 0x85, 0xca, 0xa5, 0x01, 0x45, 0x71, + 0xa4, 0x01, 0x43, 0x72, 0x03, 0x85, 0xce, 0xa5, 0x01, 0x46, 0x70, 0xa5, + 0x01, 0x45, 0xb1, 0xa4, 0x01, 0x43, 0xb2, 0x03, 0x85, 0xd2, 0xa5, 0x01, + 0x46, 0xb0, 0xa5, 0x01, 0x47, 0x30, 0xa5, 0x01, 0x45, 0xd1, 0xa4, 0x01, + 0x43, 0xd2, 0x03, 0x85, 0xd6, 0xa5, 0x01, 0x46, 0xd0, 0xa5, 0x01, 0x47, + 0x50, 0xa5, 0x01, 0x47, 0x90, 0xa5, 0x01, 0x45, 0xe1, 0xa4, 0x01, 0x43, + 0xe2, 0x03, 0x85, 0xda, 0xa5, 0x01, 0x46, 0xe0, 0xa5, 0x01, 0x47, 0x60, + 0xa5, 0x01, 0x47, 0xa0, 0xa5, 0x01, 0x47, 0xc0, 0xc6, 0x04, 0xe1, 0x0f, + 0xda, 0x01, 0xcc, 0x04, 0xcb, 0x0f, 0xda, 0x78, 0xcc, 0x04, 0xcb, 0x0f, + 0xda, 0x71, 0xc5, 0x00, 0x2c, 0x0f, 0xda, 0x80, 0x45, 0x00, 0x8c, 0xc3, + 0x85, 0xde, 0xc6, 0x10, 0x9d, 0x01, 0x5b, 0x81, 0x45, 0x03, 0x55, 0x43, + 0x86, 0x08, 0xc3, 0x14, 0xa7, 0x01, 0x59, 0xdb, 0x03, 0x86, 0x0e, 0xd2, + 0x05, 0xd5, 0x01, 0x5f, 0x60, 0xcf, 0x2c, 0x35, 0x01, 0x59, 0xc9, 0xd6, + 0x2d, 0x62, 0x01, 0x59, 0xd0, 0xcf, 0x62, 0x3d, 0x01, 0x4b, 0x59, 0x47, + 0x92, 0xe3, 0xc3, 0x86, 0x14, 0xc8, 0xae, 0xbc, 0x01, 0x4a, 0xf1, 0xc6, + 0x10, 0x9d, 0x01, 0x4a, 0xb0, 0x46, 0x00, 0xd4, 0xc3, 0x86, 0x1a, 0xc8, + 0xae, 0xbc, 0x01, 0x4a, 0xd1, 0xc6, 0x10, 0x9d, 0x01, 0x4a, 0x90, 0xc4, + 0xe1, 0xbf, 0x08, 0x3a, 0x61, 0xc4, 0xe2, 0xc7, 0x08, 0x3a, 0x59, 0xc4, + 0xe0, 0x7b, 0x08, 0x3a, 0x51, 0xc4, 0xe1, 0x2b, 0x08, 0x3a, 0x48, 0x88, + 0x08, 0x30, 0x81, 0x8f, 0x08, 0x30, 0x88, 0x88, 0x08, 0x30, 0x99, 0x8f, + 0x08, 0x30, 0xa0, 0x8f, 0x08, 0x30, 0xb0, 0xc5, 0xdc, 0x27, 0x08, 0x04, + 0x01, 0xc7, 0xc5, 0xa6, 0x08, 0x04, 0x09, 0xc6, 0xcf, 0x65, 0x08, 0x04, + 0x11, 0x23, 0xc3, 0x86, 0x24, 0x24, 0xc3, 0x86, 0x30, 0x25, 0xc3, 0x86, + 0x3c, 0x26, 0xc3, 0x86, 0x48, 0x22, 0x43, 0x86, 0x54, 0xc7, 0xc6, 0x1d, + 0x08, 0x04, 0x71, 0xc8, 0xb7, 0x12, 0x08, 0x04, 0x79, 0xc7, 0xc9, 0xc0, + 0x08, 0x04, 0x81, 0xc7, 0xc1, 0x23, 0x08, 0x04, 0x89, 0xc9, 0xa9, 0x1b, + 0x08, 0x04, 0x90, 0xc5, 0xdd, 0x03, 0x08, 0x04, 0xa9, 0xc6, 0xd1, 0xb1, + 0x08, 0x04, 0xb1, 0x9f, 0x08, 0x04, 0xb8, 0xc8, 0xba, 0xea, 0x08, 0x04, + 0xd1, 0xc6, 0xd2, 0x17, 0x08, 0x04, 0xd9, 0x9f, 0x08, 0x04, 0xe1, 0xc6, + 0xd2, 0x6b, 0x08, 0x04, 0xe9, 0xa3, 0x08, 0x04, 0xf0, 0x9d, 0x08, 0x04, + 0xf9, 0xc6, 0xd3, 0x01, 0x08, 0x05, 0x01, 0x9f, 0x08, 0x05, 0x09, 0xa0, + 0x08, 0x05, 0x11, 0xa1, 0x08, 0x05, 0x19, 0xa4, 0x08, 0x05, 0x29, 0xa5, + 0x08, 0x05, 0x31, 0xc7, 0xc5, 0x8a, 0x08, 0x05, 0x38, 0x9d, 0x08, 0x05, + 0x41, 0x9e, 0x08, 0x05, 0x49, 0xc9, 0xaf, 0x5d, 0x08, 0x05, 0x51, 0xc8, + 0xbe, 0x1a, 0x08, 0x05, 0x59, 0xa1, 0x08, 0x05, 0x61, 0xa2, 0x08, 0x05, + 0x69, 0xa3, 0x08, 0x05, 0x71, 0xa4, 0x08, 0x05, 0x79, 0xa5, 0x08, 0x05, + 0x81, 0xa6, 0x08, 0x05, 0x88, 0x9d, 0x08, 0x05, 0x91, 0x9f, 0x08, 0x05, + 0xa1, 0xc7, 0xc8, 0xa8, 0x08, 0x05, 0xa9, 0xa1, 0x08, 0x05, 0xb1, 0xa4, + 0x08, 0x05, 0xc1, 0xa5, 0x08, 0x05, 0xc9, 0xa6, 0x08, 0x05, 0xd1, 0x9e, + 0x08, 0x05, 0x99, 0xc6, 0xd0, 0xd3, 0x08, 0x05, 0xb8, 0x9d, 0x08, 0x05, + 0xd9, 0x9e, 0x08, 0x05, 0xe1, 0x9f, 0x08, 0x05, 0xe9, 0xa0, 0x08, 0x05, + 0xf1, 0xa1, 0x08, 0x05, 0xf9, 0xa2, 0x08, 0x06, 0x01, 0xa6, 0x08, 0x06, + 0x08, 0x9d, 0x08, 0x06, 0x11, 0xc8, 0xb7, 0xea, 0x08, 0x06, 0x18, 0xcb, + 0x8d, 0x00, 0x08, 0x06, 0x21, 0xc9, 0xaa, 0x32, 0x08, 0x06, 0x28, 0xc7, + 0xc6, 0x40, 0x08, 0x06, 0x31, 0xc7, 0xc7, 0x9e, 0x08, 0x06, 0x39, 0x9f, + 0x08, 0x06, 0x41, 0xc7, 0xc1, 0x2a, 0x08, 0x06, 0x49, 0xa1, 0x08, 0x06, + 0x51, 0xa3, 0x08, 0x06, 0x58, 0xc9, 0xad, 0x2f, 0x08, 0x06, 0x69, 0xcf, + 0x6b, 0x61, 0x08, 0x06, 0x71, 0xc7, 0xc2, 0x26, 0x08, 0x06, 0x79, 0xa2, + 0x08, 0x06, 0x81, 0xa3, 0x08, 0x06, 0x89, 0xa5, 0x08, 0x06, 0x99, 0xa6, + 0x08, 0x06, 0xa1, 0xd1, 0x52, 0x99, 0x08, 0x06, 0x60, 0x9e, 0x08, 0x06, + 0xa9, 0x9f, 0x08, 0x06, 0xb1, 0xa0, 0x08, 0x06, 0xb9, 0xc6, 0xcf, 0x5f, + 0x08, 0x06, 0xc1, 0xa2, 0x08, 0x06, 0xc9, 0xa3, 0x08, 0x06, 0xd1, 0xa4, + 0x08, 0x06, 0xd9, 0xa5, 0x08, 0x06, 0xe1, 0xa6, 0x08, 0x06, 0xe8, 0x9d, + 0x08, 0x06, 0xf9, 0x9e, 0x08, 0x07, 0x01, 0x9f, 0x08, 0x07, 0x09, 0xa0, + 0x08, 0x07, 0x11, 0xa1, 0x08, 0x07, 0x19, 0xa2, 0x08, 0x07, 0x21, 0xa4, + 0x08, 0x07, 0x31, 0xa5, 0x08, 0x07, 0x39, 0xa6, 0x08, 0x07, 0x41, 0xa3, + 0x08, 0x07, 0x28, 0x9d, 0x08, 0x07, 0x49, 0x9e, 0x08, 0x07, 0x51, 0x9f, + 0x08, 0x07, 0x59, 0xa0, 0x08, 0x07, 0x61, 0xa1, 0x08, 0x07, 0x69, 0xa2, + 0x08, 0x07, 0x71, 0xa4, 0x08, 0x07, 0x81, 0xa3, 0x08, 0x07, 0x79, 0xa5, + 0x08, 0x07, 0x89, 0xa6, 0x08, 0x07, 0x90, 0x9e, 0x08, 0x07, 0x99, 0x9f, + 0x08, 0x07, 0xa1, 0xa3, 0x08, 0x07, 0xa9, 0xa4, 0x08, 0x07, 0xb1, 0xa5, + 0x08, 0x07, 0xb9, 0xa6, 0x08, 0x07, 0xc0, 0xc3, 0x00, 0x33, 0x0e, 0xf8, + 0xf1, 0xc4, 0x65, 0xe2, 0x00, 0x0b, 0x0b, 0x03, 0x86, 0x66, 0xc9, 0x08, + 0xf7, 0x00, 0x0a, 0xe9, 0xca, 0xa7, 0x1a, 0x00, 0x10, 0xc9, 0xc6, 0xbd, + 0xf4, 0x00, 0x0a, 0xf8, 0xc5, 0x05, 0x02, 0x00, 0xf3, 0x1b, 0x03, 0x86, + 0x6c, 0xc5, 0x00, 0xd4, 0x00, 0xf3, 0x08, 0xce, 0x16, 0x0f, 0x00, 0xf3, + 0x28, 0xd3, 0x42, 0x2f, 0x05, 0x3e, 0x51, 0xc9, 0xb4, 0xeb, 0x00, 0x11, + 0xf8, 0x46, 0x00, 0x8b, 0x43, 0x86, 0x72, 0x94, 0x05, 0x5a, 0x5b, 0x03, + 0x86, 0x7e, 0x89, 0x00, 0x13, 0x0a, 0x03, 0x86, 0x84, 0xc8, 0xb7, 0xc2, + 0x00, 0xe8, 0xf9, 0xcd, 0x7c, 0x26, 0x00, 0xe8, 0xf1, 0x97, 0x00, 0xe8, + 0xe9, 0x91, 0x00, 0xe8, 0x8a, 0x03, 0x86, 0x8a, 0xc6, 0xbd, 0xf4, 0x00, + 0x07, 0x3b, 0x03, 0x86, 0x96, 0xc9, 0x08, 0xf7, 0x00, 0x08, 0x49, 0xc4, + 0x65, 0xe2, 0x00, 0x08, 0x69, 0xc3, 0x00, 0x33, 0x00, 0x12, 0xa8, 0xca, + 0xa3, 0xaa, 0x05, 0x5a, 0xa9, 0xca, 0x4c, 0x63, 0x05, 0x5a, 0xa0, 0xc4, + 0x6d, 0xb5, 0x00, 0x13, 0xb9, 0xc5, 0x21, 0xa4, 0x00, 0x14, 0xd0, 0xce, + 0x01, 0x19, 0x0e, 0xf8, 0xe1, 0xcc, 0x51, 0x28, 0x0e, 0xf8, 0xb8, 0x94, + 0x00, 0x13, 0xcb, 0x03, 0x86, 0x9c, 0x96, 0x00, 0x14, 0x3b, 0x03, 0x86, + 0xa2, 0x9b, 0x00, 0x14, 0x73, 0x03, 0x86, 0xa8, 0x89, 0x00, 0xeb, 0xb9, + 0x11, 0xc3, 0x86, 0xae, 0x8b, 0x00, 0xe8, 0x4b, 0x03, 0x86, 0xc4, 0x83, + 0x00, 0x12, 0x83, 0x03, 0x86, 0xca, 0xc2, 0x03, 0xd4, 0x05, 0x5a, 0x89, + 0x8a, 0x00, 0x13, 0x2b, 0x03, 0x86, 0xd4, 0x8f, 0x00, 0x13, 0x7b, 0x03, + 0x86, 0xdd, 0x98, 0x00, 0x14, 0x61, 0x99, 0x00, 0x14, 0x69, 0x8d, 0x00, + 0x14, 0xf1, 0x8e, 0x05, 0x3c, 0x09, 0xc5, 0xdb, 0x4b, 0x00, 0x0c, 0x69, + 0x87, 0x00, 0x0e, 0xe8, 0xd3, 0x42, 0xed, 0x0e, 0xf8, 0x48, 0x42, 0x01, + 0x94, 0xc3, 0x86, 0xe3, 0x43, 0x05, 0x19, 0x43, 0x86, 0xef, 0xcf, 0x68, + 0x82, 0x00, 0xf3, 0x89, 0xc6, 0xbd, 0xf4, 0x00, 0x0b, 0x19, 0xc4, 0x65, + 0xe2, 0x00, 0x0b, 0x29, 0xca, 0xa7, 0x1a, 0x00, 0x10, 0xd9, 0xc3, 0x00, + 0x33, 0x00, 0x11, 0xb0, 0xcc, 0x23, 0x3f, 0x05, 0x3b, 0x2a, 0x03, 0x86, + 0xfb, 0xc3, 0x22, 0xcb, 0x00, 0x0c, 0x29, 0xc3, 0x02, 0x9f, 0x00, 0x0d, + 0x41, 0xc4, 0x0d, 0x13, 0x00, 0x0d, 0xe8, 0xc2, 0x00, 0xc0, 0x00, 0x0d, + 0x0b, 0x03, 0x87, 0x01, 0xc8, 0x9e, 0x5c, 0x00, 0xf6, 0x78, 0xc9, 0x08, + 0xf7, 0x00, 0x07, 0xa3, 0x03, 0x87, 0x07, 0xc4, 0x65, 0xe2, 0x00, 0x0e, + 0x90, 0x11, 0xc3, 0x87, 0x0d, 0xc8, 0x20, 0xa9, 0x00, 0x07, 0xb2, 0x03, + 0x87, 0x19, 0x45, 0x02, 0x9a, 0x43, 0x87, 0x1f, 0x45, 0x02, 0x9a, 0x43, + 0x87, 0x2b, 0xca, 0x9b, 0xda, 0x00, 0x0f, 0xf8, 0xd1, 0x53, 0x76, 0x05, + 0x3a, 0x59, 0xc2, 0x00, 0x11, 0x05, 0x3a, 0x69, 0xcd, 0x7d, 0xfa, 0x01, + 0x63, 0xd0, 0xcb, 0x98, 0x58, 0x00, 0x0f, 0x68, 0x46, 0x00, 0x8b, 0xc3, + 0x87, 0x3d, 0x87, 0x05, 0x5b, 0x10, 0xd4, 0x01, 0x13, 0x00, 0xec, 0x98, + 0xd3, 0x42, 0xed, 0x0e, 0xf8, 0xd0, 0x11, 0xc3, 0x87, 0x49, 0xc8, 0x20, + 0xa9, 0x00, 0x07, 0x7a, 0x03, 0x87, 0x55, 0xc6, 0x05, 0x01, 0x00, 0xf1, + 0x68, 0xc9, 0x08, 0xf7, 0x00, 0x07, 0x71, 0xc4, 0x65, 0xe2, 0x00, 0x0e, + 0x80, 0xcc, 0x23, 0x3f, 0x00, 0xeb, 0xe0, 0x89, 0x00, 0xeb, 0xc9, 0x88, + 0x05, 0x3b, 0xe1, 0x94, 0x05, 0x3c, 0x19, 0x95, 0x05, 0x3c, 0x29, 0x96, + 0x05, 0x3c, 0x39, 0x86, 0x05, 0x3b, 0xd0, 0xc5, 0xde, 0x3e, 0x05, 0x5b, + 0x21, 0xc2, 0x49, 0x0c, 0x05, 0x5a, 0x00, 0x46, 0x00, 0x8b, 0x43, 0x87, + 0x5b, 0xcf, 0x68, 0x82, 0x00, 0xf0, 0x99, 0xc6, 0xbd, 0xf4, 0x00, 0xf0, + 0x89, 0xc4, 0x65, 0xe2, 0x00, 0x08, 0x79, 0xc3, 0x00, 0x33, 0x00, 0x11, + 0x08, 0xc5, 0x05, 0x02, 0x00, 0xf0, 0x69, 0xc5, 0x00, 0xd4, 0x00, 0xf0, + 0x58, 0xc9, 0xa9, 0x2d, 0x00, 0xec, 0x88, 0xd3, 0x42, 0x2f, 0x05, 0x3e, + 0x41, 0xc5, 0x01, 0x74, 0x00, 0x08, 0x88, 0xc5, 0xcf, 0xcc, 0x00, 0x0c, + 0x61, 0xc3, 0x14, 0xa7, 0x00, 0x12, 0xb0, 0xc7, 0x45, 0x16, 0x00, 0x15, + 0x1b, 0x03, 0x87, 0x67, 0xca, 0x8e, 0x61, 0x00, 0x0e, 0x30, 0x94, 0x05, + 0x5a, 0x43, 0x03, 0x87, 0x6d, 0xc5, 0x42, 0xe8, 0x05, 0x3e, 0x99, 0xc4, + 0x95, 0x50, 0x05, 0x3e, 0xa8, 0x8c, 0x00, 0x11, 0xbb, 0x03, 0x87, 0x73, + 0x8b, 0x00, 0x09, 0x88, 0x45, 0x00, 0x8c, 0xc3, 0x87, 0x7c, 0xc8, 0x0f, + 0xbd, 0x00, 0x0d, 0xc8, 0xcc, 0x51, 0x28, 0x0e, 0xf8, 0xa1, 0xcc, 0x4d, + 0x15, 0x05, 0x59, 0xe0, 0xca, 0xa7, 0x24, 0x0e, 0xf8, 0x5b, 0x03, 0x87, + 0x92, 0xce, 0x01, 0x19, 0x00, 0xec, 0xc1, 0xcc, 0x51, 0x28, 0x00, 0xec, + 0x59, 0xc4, 0x00, 0x32, 0x00, 0x14, 0x30, 0xc9, 0x08, 0xf7, 0x00, 0x07, + 0x53, 0x03, 0x87, 0x98, 0xc6, 0xbd, 0xf4, 0x00, 0x11, 0x4b, 0x03, 0x87, + 0x9c, 0xc4, 0x65, 0xe2, 0x00, 0x08, 0xd8, 0xc6, 0x05, 0x01, 0x00, 0xf0, + 0xd8, 0x11, 0xc3, 0x87, 0xa2, 0xc8, 0x20, 0xa9, 0x00, 0x07, 0x58, 0x45, + 0x02, 0x9a, 0x43, 0x87, 0xae, 0x00, 0xc3, 0x87, 0xba, 0xca, 0x4b, 0x1f, + 0x05, 0x5a, 0x38, 0xc2, 0x00, 0x75, 0x0e, 0xf8, 0x38, 0xc9, 0x33, 0xad, + 0x05, 0x39, 0xf8, 0x46, 0x00, 0x8b, 0x43, 0x87, 0xfb, 0xc3, 0x3a, 0xe6, + 0x00, 0x13, 0x63, 0x03, 0x88, 0x07, 0xc2, 0x00, 0xb1, 0x00, 0x0c, 0xd0, + 0xcf, 0x68, 0x82, 0x00, 0xf1, 0xe9, 0xc6, 0xbd, 0xf4, 0x00, 0x09, 0xe1, + 0xc4, 0x65, 0xe2, 0x00, 0x09, 0xf1, 0xc3, 0x00, 0x33, 0x00, 0x11, 0xa0, + 0xc7, 0x0e, 0x70, 0x00, 0xf1, 0xbb, 0x03, 0x88, 0x0d, 0x45, 0x00, 0x5a, + 0x43, 0x88, 0x13, 0xc4, 0x00, 0x9d, 0x05, 0x59, 0xc9, 0xc5, 0x1e, 0xc8, + 0x00, 0x13, 0x59, 0xc3, 0x02, 0xa3, 0x00, 0x0a, 0x00, 0xc9, 0xaa, 0xcb, + 0x05, 0x3c, 0x70, 0xd4, 0x01, 0x13, 0x0e, 0xf8, 0x28, 0xcb, 0x8f, 0xb5, + 0x00, 0xf4, 0xe9, 0x06, 0x43, 0x88, 0x1f, 0xc6, 0x00, 0xd3, 0x00, 0xf7, + 0xb8, 0x43, 0x05, 0x19, 0xc3, 0x88, 0x2b, 0xc8, 0x20, 0xa9, 0x00, 0x07, + 0xf8, 0xce, 0x36, 0x39, 0x05, 0x5a, 0xd1, 0xc5, 0x01, 0x74, 0x00, 0x12, + 0x78, 0x98, 0x00, 0xf7, 0xe9, 0xc2, 0x02, 0xa7, 0x00, 0xf7, 0xd8, 0xc5, + 0x05, 0x02, 0x00, 0xf2, 0x19, 0xc5, 0x00, 0xd4, 0x00, 0xf2, 0x08, 0x42, + 0x01, 0x23, 0xc3, 0x88, 0x37, 0x06, 0xc3, 0x88, 0x46, 0xc6, 0x60, 0xb1, + 0x00, 0x0b, 0x5b, 0x03, 0x88, 0x53, 0xc5, 0x1e, 0xc8, 0x00, 0x0b, 0x4b, + 0x03, 0x88, 0x59, 0x05, 0xc3, 0x88, 0x5d, 0x14, 0xc3, 0x88, 0x6c, 0xc9, + 0x6d, 0x45, 0x05, 0x5a, 0x91, 0x15, 0xc3, 0x88, 0x78, 0xc5, 0x1f, 0x0c, + 0x00, 0x07, 0xc9, 0xc5, 0x31, 0xee, 0x00, 0x07, 0xd1, 0xc5, 0x1d, 0x88, + 0x00, 0x0b, 0x69, 0xc6, 0xcc, 0x8f, 0x00, 0x0b, 0x99, 0xce, 0x1d, 0x93, + 0x00, 0x10, 0xb8, 0xd5, 0x36, 0x32, 0x05, 0x5a, 0x78, 0xc5, 0x1d, 0x88, + 0x00, 0x08, 0x1b, 0x03, 0x88, 0x84, 0x05, 0xc3, 0x88, 0x8a, 0xca, 0x9e, + 0x5a, 0x00, 0xf5, 0x19, 0x06, 0xc3, 0x88, 0x99, 0x14, 0xc3, 0x88, 0xa6, + 0xce, 0x1d, 0x93, 0x00, 0x10, 0x19, 0xc5, 0x1f, 0x0c, 0x00, 0x07, 0x01, + 0xc5, 0x31, 0xee, 0x00, 0x07, 0x09, 0xc5, 0x1e, 0xc8, 0x00, 0x07, 0x19, + 0xc6, 0x60, 0xb1, 0x00, 0x08, 0x09, 0xc6, 0xcc, 0x8f, 0x00, 0x08, 0x29, + 0xc6, 0x01, 0x73, 0x01, 0x63, 0x28, 0xc5, 0x31, 0xee, 0x00, 0x0f, 0xe9, + 0xc6, 0x60, 0xb1, 0x00, 0x0f, 0x18, 0x43, 0x05, 0x19, 0xc3, 0x88, 0xb0, + 0xc8, 0x20, 0xa9, 0x00, 0xf4, 0x28, 0xc6, 0xbd, 0xf4, 0x00, 0xf1, 0x49, + 0xc9, 0x08, 0xf7, 0x00, 0x09, 0x29, 0xc4, 0x65, 0xe2, 0x00, 0x10, 0xf8, + 0xc8, 0x20, 0xa9, 0x00, 0xf1, 0x39, 0x43, 0x05, 0x19, 0xc3, 0x88, 0xbc, + 0xc8, 0x25, 0xfb, 0x01, 0x63, 0x40, 0x43, 0x05, 0x19, 0xc3, 0x88, 0xc8, + 0xc8, 0x25, 0xfb, 0x01, 0x63, 0x60, 0xc9, 0x08, 0xf7, 0x00, 0xf4, 0x89, + 0xc3, 0x00, 0x33, 0x00, 0x14, 0x89, 0xc4, 0x65, 0xe2, 0x00, 0x0b, 0xf0, + 0xc5, 0x01, 0x74, 0x00, 0x0d, 0xb1, 0xc9, 0xb4, 0xeb, 0x00, 0x12, 0x00, + 0xc8, 0x20, 0xa9, 0x00, 0xf4, 0x69, 0xc8, 0x16, 0x15, 0x00, 0xf4, 0x58, + 0xcb, 0x95, 0xae, 0x05, 0x5a, 0xbb, 0x03, 0x88, 0xd4, 0xcc, 0x4c, 0x61, + 0x05, 0x5a, 0xb0, 0xc8, 0x0e, 0x6f, 0x00, 0xf3, 0xf9, 0xce, 0x3e, 0xae, + 0x05, 0x3a, 0xf8, 0xc5, 0x01, 0x74, 0x00, 0xeb, 0xeb, 0x03, 0x88, 0xd8, + 0xcc, 0x89, 0x01, 0x05, 0x3a, 0xa8, 0x05, 0xc3, 0x88, 0xde, 0x0e, 0xc3, + 0x88, 0xfc, 0x06, 0xc3, 0x89, 0x0e, 0xcc, 0x51, 0x28, 0x00, 0xec, 0x39, + 0xcc, 0x1e, 0xc1, 0x00, 0xeb, 0x81, 0xc5, 0x1f, 0x0c, 0x00, 0x0f, 0xc9, + 0xce, 0x01, 0x19, 0x00, 0x13, 0x99, 0xc5, 0x1e, 0xc8, 0x00, 0x07, 0x89, + 0xc5, 0x31, 0xee, 0x00, 0x0a, 0x21, 0xce, 0x38, 0xe6, 0x05, 0x3d, 0x28, + 0xc8, 0x0e, 0x6f, 0x00, 0xf1, 0x99, 0xce, 0x3e, 0xae, 0x05, 0x3a, 0x19, + 0xc8, 0x25, 0xfb, 0x01, 0x63, 0x50, 0xd4, 0x3e, 0xa8, 0x05, 0x3a, 0x28, + 0xc6, 0xbd, 0xf4, 0x00, 0x09, 0xb9, 0xc4, 0x65, 0xe2, 0x00, 0x0f, 0x48, + 0xc9, 0x08, 0xf7, 0x00, 0x08, 0xe9, 0xc6, 0xbd, 0xf4, 0x00, 0x09, 0x19, + 0xc4, 0x65, 0xe2, 0x00, 0x0f, 0x38, 0xc5, 0x05, 0x02, 0x00, 0xf0, 0x29, + 0xc5, 0x00, 0xd4, 0x00, 0xf0, 0x18, 0x87, 0x05, 0x59, 0x99, 0xc5, 0xde, + 0x3e, 0x05, 0x59, 0x81, 0x91, 0x00, 0x13, 0xa8, 0xcc, 0x23, 0x3f, 0x05, + 0x59, 0xf0, 0xcb, 0x4d, 0x16, 0x00, 0x14, 0xe9, 0xc9, 0x08, 0xf7, 0x00, + 0x09, 0xa9, 0xc4, 0x65, 0xe2, 0x00, 0x0f, 0x80, 0xc5, 0x41, 0x20, 0x00, + 0x12, 0x58, 0xc5, 0x05, 0x02, 0x00, 0xf7, 0xa1, 0xc5, 0x00, 0xd4, 0x00, + 0xf4, 0x70, 0xc2, 0x00, 0xc0, 0x00, 0x0d, 0x7b, 0x03, 0x89, 0x1a, 0xc8, + 0x9e, 0x5c, 0x00, 0xf7, 0x30, 0x11, 0xc3, 0x89, 0x20, 0xc8, 0x20, 0xa9, + 0x00, 0x06, 0xe2, 0x03, 0x89, 0x2c, 0xce, 0x74, 0xe8, 0x00, 0xf3, 0xd0, + 0x00, 0x43, 0x89, 0x30, 0xc9, 0x08, 0xf7, 0x00, 0x06, 0xdb, 0x03, 0x89, + 0x3c, 0xc4, 0x65, 0xe2, 0x00, 0x0e, 0x98, 0x45, 0x02, 0x9a, 0x43, 0x89, + 0x42, 0x45, 0x02, 0x9a, 0x43, 0x89, 0x60, 0x42, 0x00, 0x30, 0xc3, 0x89, + 0x7e, 0x45, 0x00, 0x5a, 0x43, 0x89, 0x8d, 0xcb, 0x98, 0x58, 0x00, 0x11, + 0x50, 0x45, 0x02, 0x9a, 0x43, 0x89, 0x99, 0xc9, 0x20, 0xa8, 0x00, 0xf2, + 0x71, 0xc5, 0x31, 0xee, 0x00, 0xf2, 0x61, 0xc6, 0x60, 0xb1, 0x00, 0x11, + 0x60, 0x42, 0x00, 0x30, 0xc3, 0x89, 0xa5, 0xca, 0x1f, 0x07, 0x00, 0x10, + 0x40, 0xca, 0x9b, 0xda, 0x00, 0xf1, 0x70, 0x00, 0x43, 0x89, 0xb1, 0xca, + 0x9b, 0x80, 0x00, 0xf0, 0xe0, 0x42, 0x00, 0x30, 0xc3, 0x89, 0xbd, 0xca, + 0x1f, 0x07, 0x00, 0x10, 0x20, 0xc5, 0x31, 0xee, 0x00, 0xf0, 0xb1, 0xc5, + 0x1f, 0x0c, 0x00, 0xf0, 0xa0, 0xc9, 0x0e, 0x6e, 0x00, 0xf5, 0xb1, 0xc5, + 0x1e, 0xc8, 0x00, 0xf5, 0xa1, 0xca, 0x9e, 0x5a, 0x00, 0xf5, 0x91, 0xc5, + 0x1f, 0x0c, 0x00, 0xf5, 0x81, 0xc5, 0x31, 0xee, 0x00, 0xf5, 0x70, 0x45, + 0x02, 0x9a, 0x43, 0x89, 0xc9, 0x42, 0x00, 0x30, 0xc3, 0x89, 0xe7, 0xca, + 0x1f, 0x07, 0x00, 0x10, 0x00, 0xcb, 0x98, 0x58, 0x00, 0x0e, 0xf0, 0xca, + 0x9b, 0xda, 0x00, 0x0f, 0xd0, 0xce, 0x16, 0x0f, 0x00, 0xf3, 0x40, 0xce, + 0x16, 0x0f, 0x00, 0xf3, 0x30, 0xc5, 0x05, 0x02, 0x00, 0xf4, 0x91, 0xc5, + 0x00, 0xd4, 0x00, 0x0b, 0xd8, 0xc5, 0x05, 0x02, 0x00, 0xf4, 0x41, 0xc5, + 0x00, 0xd4, 0x00, 0xf4, 0x30, 0xc5, 0x05, 0x02, 0x00, 0xf3, 0x61, 0xc5, + 0x00, 0xd4, 0x00, 0xf3, 0x50, 0x42, 0x00, 0x30, 0xc3, 0x89, 0xf6, 0xca, + 0x1f, 0x07, 0x00, 0x10, 0x80, 0xc6, 0xbd, 0xf4, 0x00, 0x0a, 0xb1, 0xc4, + 0x65, 0xe2, 0x00, 0x0a, 0xc0, 0xd2, 0x25, 0xf1, 0x05, 0x3a, 0x80, 0xc5, + 0x05, 0x02, 0x00, 0xf2, 0x31, 0xc5, 0x00, 0xd4, 0x00, 0xf2, 0x20, 0xcb, + 0x98, 0x58, 0x00, 0xf1, 0xc0, 0xc5, 0x05, 0x02, 0x00, 0xf1, 0x21, 0xc5, + 0x00, 0xd4, 0x00, 0xf1, 0x10, 0xcb, 0x8e, 0x60, 0x00, 0x0e, 0x28, 0xca, + 0x9b, 0xda, 0x00, 0xf0, 0x40, 0xd0, 0x57, 0xc2, 0x0f, 0xc1, 0x89, 0xcb, + 0x57, 0xc7, 0x0f, 0xc1, 0x69, 0xca, 0xa0, 0x08, 0x0f, 0xc1, 0x49, 0x49, + 0xa8, 0xdc, 0xc3, 0x8a, 0x05, 0xd8, 0x24, 0xb3, 0x01, 0x5b, 0xd9, 0xcc, + 0x84, 0x09, 0x0f, 0xc1, 0x09, 0xcc, 0x82, 0x1d, 0x0f, 0xc1, 0x28, 0xe0, + 0x03, 0xe7, 0x01, 0x5c, 0x08, 0xc6, 0x44, 0x50, 0x07, 0xd9, 0x69, 0xc7, + 0x44, 0x4f, 0x07, 0xd9, 0x60, 0xc5, 0x79, 0xf2, 0x05, 0x4b, 0x51, 0xc6, + 0xc0, 0x7c, 0x05, 0x4b, 0x39, 0xc6, 0x8e, 0xde, 0x05, 0x4b, 0x28, 0xc5, + 0x8e, 0xdf, 0x00, 0x89, 0x69, 0xc6, 0xbb, 0xec, 0x00, 0x89, 0xc0, 0xc5, + 0xc0, 0x7d, 0x00, 0x89, 0x79, 0xc6, 0xc1, 0x86, 0x00, 0x89, 0xc8, 0xc4, + 0x79, 0xf3, 0x00, 0x89, 0x93, 0x03, 0x8a, 0x11, 0xc6, 0xba, 0x7c, 0x00, + 0x89, 0xd0, 0xc4, 0xc6, 0x7a, 0x00, 0x89, 0xb1, 0xc6, 0xc6, 0x79, 0x00, + 0x89, 0xb8, 0xc6, 0xbb, 0xec, 0x05, 0x4b, 0x99, 0xc5, 0x8e, 0xdf, 0x00, + 0x88, 0xf0, 0xc3, 0x39, 0x37, 0x00, 0x89, 0x0b, 0x03, 0x8a, 0x17, 0xc8, + 0xad, 0x27, 0x00, 0x89, 0x28, 0xc5, 0xc0, 0x7d, 0x00, 0x89, 0x01, 0xc6, + 0xc1, 0x86, 0x00, 0x89, 0x48, 0xc4, 0xc6, 0x7a, 0x00, 0x89, 0x39, 0xc6, + 0xc6, 0x79, 0x00, 0x89, 0x40, 0xc7, 0xbb, 0xeb, 0x00, 0x8a, 0x91, 0xc5, + 0x90, 0xe4, 0x00, 0x8a, 0x98, 0xc3, 0x39, 0x37, 0x00, 0x89, 0xe1, 0x44, + 0x3a, 0xbf, 0x43, 0x8a, 0x1b, 0xc4, 0xc6, 0x7a, 0x00, 0x8a, 0x71, 0xc6, + 0xc6, 0x79, 0x00, 0x8a, 0xa0, 0xc4, 0xad, 0x2b, 0x00, 0x89, 0xf9, 0xc5, + 0xdb, 0xff, 0x00, 0x8a, 0x88, 0x87, 0x06, 0xbe, 0x33, 0x03, 0x8a, 0x27, + 0x97, 0x00, 0x8d, 0x01, 0x8b, 0x00, 0x8d, 0x09, 0x83, 0x06, 0xbe, 0x28, + 0x91, 0x00, 0x8b, 0xc1, 0xc2, 0x42, 0xcd, 0x00, 0x8b, 0xc9, 0x97, 0x00, + 0x8d, 0x20, 0x02, 0x43, 0x8a, 0x2b, 0x1b, 0xc3, 0x8a, 0x39, 0x91, 0x00, + 0x8c, 0x39, 0x8b, 0x00, 0x8c, 0x41, 0x83, 0x06, 0xbd, 0x93, 0x03, 0x8a, + 0x46, 0xc2, 0x02, 0x66, 0x06, 0xbd, 0xa0, 0x83, 0x00, 0x8c, 0x73, 0x03, + 0x8a, 0x4a, 0x87, 0x00, 0x8c, 0x83, 0x03, 0x8a, 0x4e, 0xc2, 0x0c, 0x43, + 0x00, 0x8c, 0x93, 0x03, 0x8a, 0x52, 0x97, 0x00, 0x8c, 0x99, 0x8b, 0x00, + 0x8c, 0xa1, 0x91, 0x06, 0xbd, 0xc0, 0x91, 0x00, 0x8b, 0xd1, 0x97, 0x00, + 0x8b, 0xd9, 0xc2, 0x2c, 0x43, 0x00, 0x8b, 0xe0, 0x97, 0x00, 0x8c, 0xa9, + 0x87, 0x06, 0xbd, 0xdb, 0x03, 0x8a, 0x56, 0x83, 0x06, 0xbd, 0xc9, 0x91, + 0x06, 0xbd, 0xe0, 0x91, 0x00, 0x8b, 0xf8, 0x87, 0x00, 0x8c, 0x0b, 0x03, + 0x8a, 0x5e, 0x83, 0x00, 0x8d, 0x32, 0x03, 0x8a, 0x62, 0xc2, 0x09, 0x90, + 0x06, 0xbd, 0x88, 0x87, 0x00, 0x8c, 0x50, 0x91, 0x06, 0xbd, 0xa8, 0xc4, + 0xa6, 0x08, 0x00, 0x8c, 0xe8, 0x83, 0x00, 0x8c, 0xcb, 0x03, 0x8a, 0x66, + 0x87, 0x06, 0xbe, 0x03, 0x03, 0x8a, 0x70, 0x91, 0x06, 0xbe, 0x11, 0x97, + 0x06, 0xbe, 0x18, 0xc2, 0x09, 0x90, 0x06, 0xbe, 0x08, 0xc4, 0xad, 0x2b, + 0x00, 0x8d, 0x53, 0x03, 0x8a, 0x74, 0xc5, 0xd6, 0x8c, 0x00, 0x8e, 0x19, + 0xc5, 0xd9, 0x61, 0x00, 0x8f, 0xd1, 0xc5, 0x79, 0xf2, 0x00, 0x8f, 0xd9, + 0xc7, 0xc6, 0x78, 0x00, 0x8f, 0xe1, 0xc7, 0xbb, 0xeb, 0x00, 0x8f, 0xe9, + 0xc5, 0x90, 0xe4, 0x00, 0x8f, 0xf0, 0xc4, 0x79, 0xf3, 0x00, 0x8f, 0x31, + 0xc6, 0xba, 0x7c, 0x00, 0x8f, 0xa0, 0x02, 0x43, 0x8a, 0x7a, 0xc8, 0xbb, + 0xea, 0x06, 0xbe, 0xb8, 0xc6, 0xd1, 0x57, 0x06, 0xbe, 0x70, 0x0d, 0xc3, + 0x8a, 0x86, 0x16, 0xc3, 0x8a, 0x92, 0xc5, 0xd6, 0x8c, 0x00, 0x8f, 0x49, + 0x12, 0xc3, 0x8a, 0x9e, 0xc5, 0xda, 0xe7, 0x06, 0xbf, 0x51, 0x05, 0xc3, + 0x8a, 0xaa, 0xc5, 0x90, 0xe4, 0x06, 0xbf, 0x90, 0xc4, 0xc6, 0x7a, 0x00, + 0x8d, 0x61, 0xc6, 0xc6, 0x79, 0x06, 0xbe, 0x60, 0xc5, 0xc0, 0x7d, 0x00, + 0x8e, 0x31, 0xc6, 0xc1, 0x86, 0x00, 0x8e, 0x48, 0xc6, 0x8e, 0xde, 0x00, + 0x8e, 0x51, 0xc5, 0xd6, 0x8c, 0x00, 0x8e, 0x59, 0xc5, 0x79, 0xf2, 0x06, + 0xbe, 0x79, 0xc4, 0xad, 0x2b, 0x06, 0xbe, 0x83, 0x03, 0x8a, 0xb6, 0x05, + 0xc3, 0x8a, 0xbc, 0xc7, 0xc1, 0x85, 0x06, 0xbe, 0xa0, 0xc5, 0x8e, 0xdf, + 0x00, 0x8d, 0x83, 0x03, 0x8a, 0xc8, 0xcc, 0x79, 0xeb, 0x00, 0x8e, 0xa9, + 0xc6, 0xbb, 0xec, 0x00, 0x8e, 0xc0, 0x02, 0x43, 0x8a, 0xcc, 0xc4, 0x79, + 0xf3, 0x00, 0x8d, 0x93, 0x03, 0x8a, 0xde, 0xc6, 0xba, 0x7c, 0x00, 0x8d, + 0xa9, 0xc6, 0xca, 0x0e, 0x00, 0x8e, 0xb8, 0xc3, 0x39, 0x37, 0x00, 0x8d, + 0x99, 0x44, 0x3a, 0xbf, 0x43, 0x8a, 0xe2, 0xc6, 0xc1, 0x86, 0x00, 0x8d, + 0xa1, 0xc5, 0xc0, 0x7d, 0x00, 0x8e, 0x72, 0x03, 0x8a, 0xee, 0xc9, 0x90, + 0xe0, 0x00, 0x8e, 0xcb, 0x03, 0x8a, 0xf4, 0xc6, 0xb7, 0x9c, 0x06, 0xbe, + 0xd8, 0xc4, 0x79, 0xf3, 0x00, 0x8e, 0xe3, 0x03, 0x8a, 0xfa, 0xc6, 0xca, + 0x0e, 0x00, 0x8e, 0xf8, 0xc3, 0x39, 0x37, 0x00, 0x8e, 0xe9, 0x44, 0x3a, + 0xbf, 0x43, 0x8b, 0x00, 0xc6, 0xc6, 0x79, 0x00, 0x8f, 0x01, 0xc4, 0xc6, + 0x7a, 0x06, 0xbf, 0x10, 0xc4, 0xad, 0x2b, 0x00, 0x8d, 0xcb, 0x03, 0x8b, + 0x0c, 0xc5, 0xd6, 0x8c, 0x00, 0x8f, 0x1b, 0x03, 0x8b, 0x12, 0xc7, 0xba, + 0x7b, 0x00, 0x8f, 0x21, 0xc5, 0x90, 0xe4, 0x00, 0x8f, 0x29, 0xc6, 0xc0, + 0x7c, 0x06, 0xbf, 0x19, 0xc5, 0xda, 0xe7, 0x06, 0xbf, 0x29, 0x05, 0x43, + 0x8b, 0x18, 0xc5, 0x79, 0xf2, 0x00, 0x8f, 0x99, 0xc4, 0xad, 0x2b, 0x06, + 0xbf, 0xd1, 0xc7, 0xc1, 0x85, 0x06, 0xbf, 0xd8, 0xc5, 0x79, 0xf2, 0x06, + 0xbf, 0x99, 0xcd, 0x79, 0xea, 0x06, 0xbf, 0xa0, 0xc5, 0x8e, 0xdf, 0x00, + 0x8f, 0x61, 0xc6, 0xbb, 0xec, 0x00, 0x8f, 0x78, 0xc5, 0x79, 0xf2, 0x06, + 0xbf, 0xb9, 0xca, 0xa7, 0x2e, 0x06, 0xbf, 0xc0, 0x0d, 0xc3, 0x8b, 0x24, + 0x15, 0xc3, 0x8b, 0x30, 0xc7, 0xca, 0x0d, 0x00, 0x8f, 0x91, 0xc5, 0xda, + 0xe7, 0x06, 0xbf, 0xa9, 0xc5, 0x90, 0xe4, 0x06, 0xbf, 0xb0, 0xc5, 0xd9, + 0xca, 0x01, 0x8b, 0x58, 0x02, 0x43, 0x8b, 0x3c, 0xc5, 0xc0, 0x7d, 0x01, + 0x8b, 0x99, 0xc6, 0xc1, 0x86, 0x01, 0x8b, 0xb8, 0xc4, 0xad, 0x2b, 0x01, + 0x8c, 0x11, 0xc7, 0xca, 0x0d, 0x01, 0x8c, 0x18, 0x87, 0x01, 0x8c, 0x40, + 0x91, 0x01, 0x8c, 0x50, 0xc8, 0x4b, 0x94, 0x0f, 0x64, 0x81, 0xc7, 0x0d, + 0x04, 0x0f, 0x64, 0x38, 0xc8, 0x4b, 0x94, 0x0f, 0x64, 0x79, 0xc7, 0x0d, + 0x04, 0x0f, 0x64, 0x30, 0xc8, 0x4b, 0x94, 0x0f, 0x64, 0x71, 0xc7, 0x0d, + 0x04, 0x0f, 0x64, 0x28, 0xc8, 0x4b, 0x94, 0x0f, 0x64, 0x69, 0xc7, 0x0d, + 0x04, 0x0f, 0x64, 0x20, 0x91, 0x01, 0x9f, 0x09, 0x07, 0x43, 0x8b, 0x48, + 0xc3, 0x02, 0xdf, 0x01, 0x9f, 0x11, 0x43, 0x0d, 0x0e, 0x43, 0x8b, 0x57, + 0xc4, 0x14, 0x09, 0x01, 0x9f, 0x68, 0xc2, 0x00, 0x5f, 0x01, 0x9f, 0x21, + 0xc5, 0x14, 0x08, 0x01, 0x9f, 0x70, 0xc4, 0x14, 0x09, 0x01, 0x9f, 0x78, + 0xc4, 0x14, 0x09, 0x01, 0x9f, 0x80, 0xc3, 0x03, 0x26, 0x01, 0x9f, 0x88, + 0xc3, 0x22, 0x45, 0x01, 0x9b, 0x21, 0xc3, 0x18, 0x13, 0x01, 0x9b, 0x62, + 0x03, 0x8b, 0x64, 0x4b, 0x18, 0x04, 0xc3, 0x8b, 0x68, 0xdc, 0x13, 0xf9, + 0x0f, 0xd2, 0x28, 0xce, 0x3d, 0x7c, 0x01, 0x2f, 0x91, 0xcd, 0x02, 0xb4, + 0x01, 0x2f, 0x88, 0xce, 0x6c, 0x0c, 0x0f, 0xb1, 0x81, 0xc8, 0xba, 0xaa, + 0x0f, 0xc9, 0x70, 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x98, 0xc9, 0x57, 0x20, + 0x08, 0x4f, 0x90, 0xc7, 0x0d, 0x04, 0x08, 0x4e, 0xb3, 0x03, 0x8b, 0x74, + 0xc8, 0x4b, 0x94, 0x08, 0x4e, 0xf8, 0xc7, 0x0d, 0x04, 0x08, 0x4e, 0xab, + 0x03, 0x8b, 0x7a, 0xc8, 0x4b, 0x94, 0x08, 0x4e, 0xf0, 0xc7, 0x0d, 0x04, + 0x08, 0x4e, 0xa3, 0x03, 0x8b, 0x80, 0xc8, 0x4b, 0x94, 0x08, 0x4e, 0xe8, + 0xc7, 0x0d, 0x04, 0x08, 0x4e, 0x9b, 0x03, 0x8b, 0x86, 0xc8, 0x4b, 0x94, + 0x08, 0x4e, 0xe0, 0x98, 0x00, 0xed, 0xd1, 0x8f, 0x00, 0xea, 0xd3, 0x03, + 0x8b, 0x8c, 0x8a, 0x00, 0xed, 0x19, 0x83, 0x00, 0xea, 0x23, 0x03, 0x8b, + 0x92, 0x8b, 0x00, 0xea, 0x71, 0xc6, 0x21, 0xa3, 0x00, 0xea, 0x61, 0x99, + 0x05, 0x5b, 0x49, 0x94, 0x00, 0x15, 0xa3, 0x03, 0x8b, 0x9c, 0x9b, 0x08, + 0x3d, 0x02, 0x03, 0x8b, 0xa2, 0xcc, 0x51, 0x28, 0x00, 0xed, 0xa9, 0xce, + 0x01, 0x19, 0x08, 0x3d, 0x78, 0xd4, 0x01, 0x13, 0x08, 0x3d, 0x68, 0xc4, + 0x00, 0x32, 0x00, 0xed, 0xe9, 0xce, 0x01, 0x19, 0x00, 0xed, 0xe0, 0xc4, + 0x01, 0x23, 0x00, 0xed, 0xc9, 0xca, 0x9f, 0x4a, 0x08, 0x3d, 0x80, 0x97, + 0x00, 0xed, 0xc1, 0x90, 0x00, 0xed, 0x81, 0x8e, 0x00, 0xed, 0x5b, 0x03, + 0x8b, 0xa8, 0x8b, 0x00, 0xed, 0x33, 0x03, 0x8b, 0xae, 0x84, 0x08, 0x3c, + 0x21, 0xc2, 0x04, 0xc6, 0x08, 0x3c, 0x01, 0x9b, 0x08, 0x3d, 0x91, 0x89, + 0x08, 0x3c, 0x93, 0x03, 0x8b, 0xba, 0x8a, 0x08, 0x3c, 0xb1, 0xc2, 0x49, + 0x0c, 0x08, 0x3d, 0x19, 0x94, 0x08, 0x3d, 0x50, 0xcf, 0x61, 0xe3, 0x08, + 0x3c, 0x79, 0xc5, 0x9b, 0xd5, 0x08, 0x3d, 0x20, 0xc3, 0x01, 0x5d, 0x00, + 0xed, 0xb1, 0xce, 0x6d, 0x40, 0x05, 0x5a, 0xf8, 0xc4, 0x01, 0x23, 0x00, + 0xed, 0x99, 0xc4, 0x00, 0x32, 0x08, 0x3d, 0xd0, 0xc6, 0xbb, 0x8c, 0x00, + 0xed, 0x11, 0xc3, 0x74, 0x83, 0x00, 0xea, 0x50, 0xcc, 0x51, 0x28, 0x00, + 0xed, 0x51, 0xce, 0x01, 0x19, 0x00, 0xed, 0x4b, 0x03, 0x8b, 0xc0, 0xcc, + 0x1e, 0xc1, 0x05, 0x5a, 0xf1, 0xcf, 0x68, 0x64, 0x05, 0x5a, 0xe9, 0xc4, + 0xa8, 0x1a, 0x08, 0x3c, 0xd8, 0xd4, 0x01, 0x13, 0x08, 0x3c, 0xf8, 0xc9, + 0x20, 0xb1, 0x08, 0x3c, 0xc0, 0xc3, 0x80, 0x9f, 0x00, 0xea, 0xf9, 0xca, + 0x9a, 0x86, 0x08, 0x3c, 0x50, 0xc4, 0x01, 0x23, 0x08, 0x3c, 0x63, 0x03, + 0x8b, 0xc6, 0xc4, 0x14, 0xa6, 0x08, 0x3c, 0x58, 0x46, 0x00, 0x8b, 0x43, + 0x8b, 0xcc, 0xc6, 0x21, 0xa3, 0x00, 0xec, 0xf9, 0x87, 0x08, 0x3c, 0x71, + 0xcc, 0x23, 0x33, 0x00, 0x17, 0x20, 0xc4, 0x14, 0xa6, 0x08, 0x3d, 0x41, + 0xc8, 0x61, 0x72, 0x08, 0x3d, 0x48, 0xc3, 0x1c, 0x8d, 0x00, 0xeb, 0x01, + 0xc5, 0x51, 0x51, 0x00, 0xea, 0xf0, 0x91, 0x00, 0xea, 0x99, 0x87, 0x00, + 0xea, 0x58, 0xca, 0x1f, 0x59, 0x08, 0x3c, 0xb8, 0xc4, 0x01, 0x23, 0x00, + 0x15, 0x89, 0xc6, 0x01, 0x73, 0x08, 0x3c, 0xa8, 0x90, 0x00, 0xe9, 0xd9, + 0x87, 0x00, 0xe9, 0x90, 0xcc, 0x23, 0x3f, 0x08, 0x3d, 0xa0, 0x45, 0x19, + 0x7c, 0xc3, 0x8b, 0xd8, 0xcc, 0x3e, 0xe6, 0x00, 0x17, 0x78, 0xce, 0x4e, + 0x8d, 0x05, 0x38, 0xa9, 0xc6, 0x01, 0xa1, 0x00, 0x17, 0xfa, 0x03, 0x8b, + 0xe4, 0xc7, 0x4e, 0x94, 0x00, 0x17, 0x41, 0xc4, 0x1e, 0xc9, 0x00, 0x17, + 0xb8, 0xcd, 0x2f, 0xa1, 0x00, 0x17, 0x91, 0xc2, 0x00, 0x75, 0x00, 0x17, + 0x98, 0x47, 0x19, 0x7a, 0xc3, 0x8b, 0xea, 0xd2, 0x4e, 0x89, 0x05, 0x38, + 0xa1, 0xc8, 0x4e, 0x93, 0x00, 0x17, 0x38, 0xcc, 0x1f, 0x0c, 0x00, 0x17, + 0xa1, 0x47, 0x00, 0x58, 0x43, 0x8b, 0xf6, 0xc8, 0x4e, 0x93, 0x05, 0x38, + 0x41, 0xd2, 0x4e, 0x89, 0x05, 0x38, 0x68, 0xc8, 0x4e, 0x93, 0x05, 0x38, + 0x61, 0xd2, 0x4e, 0x89, 0x05, 0x38, 0x88, 0x0f, 0x43, 0x8c, 0x02, 0xc2, + 0x00, 0xba, 0x0e, 0xbe, 0x09, 0xc2, 0x00, 0x0a, 0x0e, 0xbd, 0xf9, 0x8b, + 0x0e, 0xbd, 0xc8, 0xc2, 0x00, 0x0a, 0x0e, 0xbe, 0x00, 0xc6, 0x10, 0x3f, + 0x0e, 0xbd, 0xf0, 0xc2, 0x20, 0xec, 0x0e, 0xbd, 0xe9, 0xc4, 0x89, 0xfe, + 0x0e, 0xbd, 0x88, 0xc4, 0x1a, 0x73, 0x0e, 0xbd, 0xe0, 0xca, 0x91, 0x2c, + 0x0e, 0xbd, 0xd8, 0xc2, 0x01, 0x23, 0x0e, 0xbd, 0xd0, 0x8b, 0x0e, 0xbd, + 0xb8, 0x97, 0x0e, 0xbd, 0xb0, 0x97, 0x0e, 0xbd, 0xa8, 0xc4, 0xdd, 0x9a, + 0x0e, 0xbd, 0xa0, 0xc4, 0x8b, 0x66, 0x0e, 0xbd, 0x98, 0xc3, 0x01, 0xbb, + 0x0e, 0xbd, 0x90, 0xc2, 0x01, 0x6f, 0x0e, 0xbd, 0x81, 0xc6, 0x10, 0x3f, + 0x0e, 0xbd, 0x70, 0xc3, 0x04, 0x87, 0x0e, 0xbd, 0x78, 0xc4, 0xdb, 0x4c, + 0x0e, 0xbd, 0x68, 0xc4, 0x38, 0x2c, 0x0e, 0xbd, 0x60, 0xc3, 0x04, 0x87, + 0x0e, 0xbd, 0x58, 0xc4, 0xde, 0x3f, 0x0e, 0xbd, 0x50, 0x0f, 0x43, 0x8c, + 0x0e, 0xc2, 0x00, 0xba, 0x0e, 0xbd, 0x39, 0xc2, 0x00, 0x0a, 0x0e, 0xbd, + 0x29, 0x8b, 0x0e, 0xbc, 0xf8, 0xc2, 0x00, 0x0a, 0x0e, 0xbd, 0x30, 0xc6, + 0x10, 0x3f, 0x0e, 0xbd, 0x20, 0xc2, 0x20, 0xec, 0x0e, 0xbd, 0x19, 0xc4, + 0x89, 0xfe, 0x0e, 0xbc, 0xba, 0x03, 0x8c, 0x1a, 0xc4, 0x1a, 0x73, 0x0e, + 0xbd, 0x10, 0xc2, 0x01, 0x23, 0x0e, 0xbd, 0x00, 0x8b, 0x0e, 0xbc, 0xe8, + 0x97, 0x0e, 0xbc, 0xe0, 0x97, 0x0e, 0xbc, 0xd8, 0xc4, 0xdd, 0x9a, 0x0e, + 0xbc, 0xd0, 0xc4, 0x8b, 0x66, 0x0e, 0xbc, 0xc8, 0xc3, 0x01, 0xbb, 0x0e, + 0xbc, 0xc0, 0xc2, 0x01, 0x6f, 0x0e, 0xbc, 0xb1, 0xc6, 0x10, 0x3f, 0x0e, + 0xbc, 0xa0, 0xc3, 0x04, 0x87, 0x0e, 0xbc, 0xa8, 0xc4, 0xdb, 0x4c, 0x0e, + 0xbc, 0x98, 0xc4, 0x38, 0x2c, 0x0e, 0xbc, 0x90, 0xc3, 0x04, 0x87, 0x0e, + 0xbc, 0x88, 0xc4, 0xde, 0x3f, 0x0e, 0xbc, 0x80, 0xc3, 0x11, 0x7e, 0x0e, + 0xbc, 0x41, 0xc5, 0xd8, 0x8f, 0x0e, 0xbb, 0xf0, 0xc3, 0x11, 0x7e, 0x0e, + 0xbb, 0x71, 0xc5, 0xd8, 0x8f, 0x0e, 0xbb, 0x20, 0xc7, 0x00, 0x90, 0x0e, + 0xbb, 0x38, 0x8e, 0x00, 0x6a, 0xb0, 0xc8, 0xb3, 0xb1, 0x0e, 0x8f, 0x41, + 0xc9, 0xaf, 0xae, 0x0e, 0x8f, 0x00, 0x50, 0x59, 0xd2, 0xc3, 0x8c, 0x20, + 0xcb, 0x94, 0xdd, 0x0e, 0x8e, 0xf8, 0xc2, 0x02, 0xae, 0x0e, 0x8f, 0x29, + 0xc4, 0x03, 0xc8, 0x0e, 0x8f, 0x20, 0xc5, 0x02, 0xc2, 0x0e, 0x8a, 0x39, + 0xc5, 0x01, 0xfc, 0x0e, 0x8a, 0x30, 0x47, 0xc3, 0x53, 0xc3, 0x8c, 0x2c, + 0x47, 0xc6, 0x94, 0x43, 0x8c, 0x3e, 0x16, 0xc3, 0x8c, 0x50, 0x02, 0x43, + 0x8c, 0x5c, 0xc4, 0x03, 0xc8, 0x0e, 0x89, 0x89, 0xc2, 0x02, 0xae, 0x0e, + 0x89, 0x80, 0xc7, 0xc4, 0x9c, 0x0e, 0x8d, 0x79, 0xc4, 0x01, 0xc3, 0x0e, + 0x8d, 0x70, 0xc7, 0xc8, 0xe7, 0x0e, 0x8e, 0xd0, 0xca, 0x68, 0x19, 0x0e, + 0x8e, 0x5b, 0x03, 0x8c, 0x68, 0xc8, 0x68, 0x1b, 0x0e, 0x8e, 0x50, 0xc8, + 0x68, 0x1b, 0x0e, 0x8e, 0x3b, 0x03, 0x8c, 0x6e, 0xca, 0x68, 0x19, 0x0e, + 0x8e, 0x40, 0xc2, 0x02, 0xae, 0x0e, 0x8c, 0xd1, 0xc5, 0x03, 0x02, 0x0e, + 0x8c, 0xc8, 0x55, 0x32, 0x96, 0xc3, 0x8c, 0x74, 0x4a, 0x32, 0x9c, 0x43, + 0x8c, 0x80, 0xc4, 0x23, 0x2e, 0x0e, 0x8b, 0x11, 0xc4, 0x2c, 0x0d, 0x0e, + 0x8a, 0x00, 0xc5, 0xdb, 0xeb, 0x0e, 0x8e, 0xb9, 0xc3, 0x30, 0xf3, 0x0e, + 0x8e, 0xa8, 0xc5, 0x02, 0xc2, 0x0e, 0x8a, 0xd9, 0xc5, 0x01, 0xfc, 0x0e, + 0x8a, 0xd0, 0x47, 0x1d, 0xd4, 0xc3, 0x8c, 0x98, 0xc8, 0xb9, 0x62, 0x0e, + 0x89, 0xa0, 0xc6, 0xd1, 0xe1, 0x0e, 0x8e, 0x89, 0xc6, 0xcb, 0x39, 0x0e, + 0x8e, 0x80, 0xc8, 0xbc, 0x72, 0x0e, 0x8c, 0xa9, 0xc5, 0x03, 0x02, 0x0e, + 0x8c, 0xa0, 0xc5, 0xd7, 0x6d, 0x0e, 0x89, 0x01, 0xc4, 0xe2, 0x4b, 0x0e, + 0x88, 0xf8, 0xc4, 0x2c, 0x0d, 0x0e, 0x8e, 0x29, 0xc5, 0x02, 0xc2, 0x0e, + 0x8d, 0xe0, 0x18, 0xc3, 0x8c, 0xd7, 0xc8, 0xbe, 0x42, 0x0e, 0x88, 0x90, + 0xc3, 0x00, 0x3c, 0x0e, 0x88, 0xa9, 0x87, 0x0e, 0x88, 0xa0, 0xcf, 0x68, + 0x19, 0x0e, 0x8e, 0x11, 0xcd, 0x68, 0x1b, 0x0e, 0x8e, 0x08, 0xd0, 0x5b, + 0x02, 0x0e, 0x88, 0xe9, 0xca, 0x74, 0x98, 0x0e, 0x88, 0xc8, 0x4e, 0x6d, + 0xbe, 0xc3, 0x8c, 0xe4, 0xca, 0x44, 0x39, 0x0e, 0x88, 0x10, 0xc5, 0xd7, + 0x6d, 0x0e, 0x89, 0x21, 0xc4, 0xe2, 0x4b, 0x0e, 0x89, 0x18, 0xc4, 0x63, + 0xf2, 0x0e, 0x8d, 0xa8, 0x9e, 0x0e, 0x8d, 0x29, 0x9d, 0x0e, 0x8d, 0x20, + 0xc4, 0x23, 0x2e, 0x0e, 0x8b, 0x21, 0xc4, 0x2c, 0x0d, 0x0e, 0x8a, 0x10, + 0x4a, 0xa1, 0xde, 0xc3, 0x8c, 0xf0, 0xc5, 0x02, 0xa2, 0x0e, 0x88, 0x40, + 0xc4, 0x35, 0x36, 0x0e, 0x89, 0x99, 0xc5, 0xa2, 0xba, 0x0e, 0x89, 0x90, + 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xc9, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x48, + 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xc1, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x40, + 0xc3, 0xdf, 0x37, 0x00, 0xbf, 0xc9, 0xc2, 0x06, 0xdb, 0x00, 0xbf, 0xc0, + 0xd3, 0x45, 0x4d, 0x0f, 0xd1, 0x91, 0xcf, 0x18, 0x0f, 0x0f, 0xd2, 0x18, + 0xd0, 0x3c, 0x90, 0x01, 0x49, 0x71, 0xd0, 0x3c, 0x2c, 0x01, 0x49, 0x88, + 0xc6, 0x13, 0x66, 0x01, 0x0f, 0x89, 0xc8, 0xb8, 0xca, 0x01, 0x0d, 0xc0, + 0x46, 0x00, 0x8b, 0x43, 0x8c, 0xfc, 0x46, 0x00, 0x8b, 0x43, 0x8d, 0x1b, + 0xc4, 0xe3, 0xab, 0x00, 0xff, 0x59, 0x18, 0xc3, 0x8d, 0x3f, 0xc6, 0x60, + 0xb1, 0x00, 0xff, 0x49, 0x06, 0xc3, 0x8d, 0x4b, 0xc5, 0x63, 0xdc, 0x00, + 0x1c, 0x70, 0xc4, 0xe3, 0xab, 0x00, 0xfe, 0xd9, 0x18, 0xc3, 0x8d, 0x5a, + 0xc6, 0x60, 0xb1, 0x00, 0xfe, 0xc9, 0x06, 0xc3, 0x8d, 0x66, 0xc5, 0xd8, + 0xc1, 0x00, 0xf9, 0xc3, 0x03, 0x8d, 0x75, 0xc5, 0x63, 0xdc, 0x00, 0x1c, + 0x50, 0x46, 0x00, 0x8b, 0x43, 0x8d, 0x7b, 0x46, 0x00, 0x8b, 0x43, 0x8d, + 0x9a, 0x46, 0x00, 0x8b, 0x43, 0x8d, 0xbe, 0x46, 0x00, 0x8b, 0x43, 0x8d, + 0xe1, 0x46, 0x00, 0x8b, 0x43, 0x8e, 0x0c, 0x06, 0xc3, 0x8e, 0x30, 0x12, + 0xc3, 0x8e, 0x42, 0xc6, 0x60, 0xb1, 0x00, 0xff, 0x09, 0x18, 0xc3, 0x8e, + 0x51, 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0xd9, 0xc5, 0x63, 0xdc, 0x00, 0x1e, + 0x68, 0xc5, 0x6c, 0xa6, 0x00, 0xff, 0x29, 0xc5, 0xd8, 0xc1, 0x00, 0xff, + 0x20, 0x06, 0xc3, 0x8e, 0x5d, 0x12, 0xc3, 0x8e, 0x6f, 0xc6, 0x60, 0xb1, + 0x00, 0xfe, 0x89, 0x18, 0xc3, 0x8e, 0x7e, 0xc4, 0xe3, 0xab, 0x00, 0xfb, + 0xb9, 0xc5, 0x63, 0xdc, 0x00, 0x1d, 0x78, 0x46, 0x00, 0x8b, 0x43, 0x8e, + 0x8a, 0x46, 0x00, 0x8b, 0x43, 0x8e, 0xb5, 0x46, 0x00, 0x8b, 0x43, 0x8e, + 0xd9, 0xc5, 0x78, 0xc7, 0x00, 0x1e, 0xc9, 0xc5, 0x87, 0xf4, 0x00, 0x1b, + 0x98, 0x90, 0x00, 0x1f, 0xd9, 0xc3, 0x87, 0xf6, 0x00, 0x1f, 0x08, 0xc2, + 0x00, 0xba, 0x00, 0xe9, 0x51, 0x8b, 0x00, 0xe9, 0x40, 0xc3, 0x01, 0xcf, + 0x08, 0x0a, 0x09, 0x47, 0x0d, 0x05, 0x43, 0x8f, 0x05, 0xc7, 0xb9, 0xdb, + 0x08, 0x0a, 0x69, 0xc7, 0x67, 0xc7, 0x08, 0x0a, 0xa0, 0x00, 0x43, 0x8f, + 0x11, 0x00, 0x43, 0x8f, 0x24, 0xc6, 0xb9, 0xdc, 0x08, 0x0a, 0x49, 0xcf, + 0x67, 0xbf, 0x08, 0x0a, 0xa8, 0x00, 0x43, 0x8f, 0x2e, 0xc2, 0x02, 0xa0, + 0x08, 0x0a, 0xe1, 0xc2, 0x00, 0xc4, 0x08, 0x0b, 0x21, 0x0a, 0x43, 0x8f, + 0x3a, 0xc3, 0x45, 0x6b, 0x08, 0x0b, 0x49, 0x43, 0x00, 0xc7, 0x43, 0x8f, + 0x46, 0xc2, 0x00, 0x5f, 0x08, 0x0a, 0xfb, 0x03, 0x8f, 0x52, 0xc3, 0x45, + 0x6b, 0x08, 0x0b, 0x32, 0x03, 0x8f, 0x58, 0xcf, 0x6b, 0x25, 0x08, 0x0b, + 0x08, 0xd3, 0x41, 0x12, 0x08, 0x78, 0xe0, 0xd3, 0x41, 0x12, 0x08, 0x78, + 0xb8, 0xd3, 0x41, 0x12, 0x08, 0x78, 0x80, 0xc3, 0x77, 0x79, 0x08, 0x78, + 0xa9, 0xc4, 0xdc, 0x2d, 0x08, 0x78, 0x88, 0xcc, 0x85, 0xdd, 0x08, 0x78, + 0x99, 0xc3, 0x36, 0xb6, 0x08, 0x78, 0x00, 0xc2, 0xe5, 0xfd, 0x08, 0x1e, + 0x49, 0xc2, 0x00, 0xd0, 0x08, 0x1e, 0x50, 0xc7, 0xc1, 0x8c, 0x08, 0x1e, + 0x62, 0x03, 0x8f, 0x5e, 0xc2, 0x01, 0x30, 0x08, 0x1e, 0x70, 0x91, 0x08, + 0x1e, 0x91, 0xc4, 0x18, 0x12, 0x08, 0x1e, 0xa0, 0xc7, 0xca, 0x06, 0x0e, + 0x7d, 0xf1, 0x44, 0xe0, 0x6b, 0xc3, 0x8f, 0x64, 0xc9, 0x92, 0x8d, 0x0e, + 0x7d, 0xb0, 0xd0, 0x58, 0xe2, 0x0e, 0x7d, 0x21, 0xd0, 0x2d, 0x10, 0x0e, + 0x7d, 0x08, 0xcb, 0x93, 0xeb, 0x0e, 0x7c, 0x79, 0xc7, 0x78, 0xdb, 0x0e, + 0x7c, 0x48, 0x87, 0x00, 0xb3, 0x50, 0x87, 0x00, 0xb1, 0xb8, 0x8b, 0x00, + 0xa7, 0x08, 0x91, 0x00, 0xa7, 0x28, 0x83, 0x00, 0xa7, 0x48, 0x8b, 0x00, + 0xa2, 0xe0, 0x91, 0x00, 0xa3, 0x00, 0x83, 0x00, 0xa3, 0x20, 0x83, 0x00, + 0xa9, 0xe0, 0x91, 0x00, 0xa9, 0xc0, 0x8b, 0x00, 0xa9, 0xa0, 0x83, 0x00, + 0xa9, 0x20, 0x8b, 0x00, 0xa8, 0xe0, 0x91, 0x00, 0xa9, 0x00, 0x83, 0x00, + 0xa8, 0x18, 0x8b, 0x00, 0xa7, 0xd8, 0x91, 0x00, 0xa7, 0xf8, 0x83, 0x00, + 0xa2, 0x38, 0x91, 0x00, 0xa2, 0x18, 0x8b, 0x00, 0xa1, 0xf8, 0x8b, 0x00, + 0xa5, 0x88, 0x91, 0x00, 0xa5, 0xa8, 0x83, 0x00, 0xa5, 0xc8, 0x83, 0x00, + 0xb3, 0xe8, 0x91, 0x00, 0xb3, 0xd8, 0x8b, 0x00, 0xb3, 0xc8, 0x43, 0x02, + 0x9c, 0xc3, 0x8f, 0x71, 0xc4, 0x00, 0xd5, 0x00, 0x1a, 0x80, 0x96, 0x01, + 0x66, 0xa8, 0x96, 0x01, 0x66, 0xa0, 0xcd, 0x0d, 0xad, 0x01, 0x92, 0x49, + 0x87, 0x01, 0x92, 0x88, 0xc2, 0x02, 0xa0, 0x01, 0x92, 0x91, 0xc4, 0x02, + 0xde, 0x01, 0x92, 0x98, 0xc3, 0x09, 0x9e, 0x01, 0x92, 0xa1, 0xc3, 0x0d, + 0x14, 0x01, 0x92, 0xa8, 0xc2, 0x22, 0xcc, 0x01, 0x92, 0xb1, 0xc4, 0x18, + 0x10, 0x01, 0x92, 0xb8, 0xcd, 0x0d, 0xad, 0x01, 0x92, 0x51, 0x87, 0x01, + 0x92, 0xd8, 0xc2, 0x02, 0xa0, 0x01, 0x92, 0xe1, 0xc4, 0x02, 0xde, 0x01, + 0x92, 0xe8, 0xc3, 0x09, 0x9e, 0x01, 0x92, 0xf1, 0xc3, 0x0d, 0x14, 0x01, + 0x92, 0xf8, 0xc2, 0x22, 0xcc, 0x01, 0x95, 0x89, 0xc4, 0x18, 0x10, 0x01, + 0x95, 0x90, 0xcd, 0x0d, 0xad, 0x01, 0x92, 0x59, 0x87, 0x01, 0x95, 0xb0, + 0xc2, 0x02, 0xa0, 0x01, 0x95, 0xb9, 0xc4, 0x02, 0xde, 0x01, 0x95, 0xc0, + 0xc3, 0x09, 0x9e, 0x01, 0x95, 0xc9, 0xc3, 0x0d, 0x14, 0x01, 0x95, 0xd0, + 0xc2, 0x22, 0xcc, 0x01, 0x95, 0xd9, 0xc4, 0x18, 0x10, 0x01, 0x95, 0xe0, + 0x46, 0x25, 0xd4, 0x43, 0x8f, 0x7d, 0xc2, 0x00, 0xc1, 0x09, 0x19, 0x69, + 0xc2, 0x00, 0xd0, 0x09, 0x19, 0x60, 0xc9, 0xb4, 0x6d, 0x09, 0x29, 0x79, + 0xc2, 0x02, 0xfb, 0x09, 0x15, 0x00, 0x8e, 0x09, 0x29, 0x21, 0x86, 0x09, + 0x12, 0xb0, 0xc2, 0x01, 0xe2, 0x09, 0x29, 0x18, 0xc2, 0x01, 0xe2, 0x09, + 0x12, 0xe3, 0x03, 0x8f, 0x89, 0xc3, 0x01, 0xb2, 0x09, 0x12, 0xd8, 0xc9, + 0x40, 0xaa, 0x09, 0x12, 0xa8, 0xc8, 0xb5, 0x8a, 0x09, 0x11, 0xd8, 0xc3, + 0x38, 0xb5, 0x09, 0x28, 0xf1, 0xc3, 0x0b, 0x47, 0x09, 0x10, 0x80, 0xd2, + 0x36, 0x5f, 0x09, 0x28, 0xe8, 0xc2, 0x00, 0x65, 0x09, 0x28, 0xd9, 0xcb, + 0x8d, 0x2c, 0x09, 0x10, 0x18, 0xc2, 0x06, 0x47, 0x09, 0x1c, 0x59, 0x0b, + 0x43, 0x8f, 0x8f, 0x00, 0x43, 0x8f, 0x9b, 0x97, 0x09, 0x10, 0x69, 0x87, + 0x09, 0x10, 0x60, 0xc3, 0x03, 0x49, 0x09, 0x10, 0x51, 0xc9, 0x40, 0xaa, + 0x09, 0x10, 0x48, 0x8b, 0x09, 0x10, 0x41, 0x42, 0x01, 0x9d, 0x43, 0x8f, + 0xa7, 0xc4, 0xdc, 0xae, 0x09, 0x28, 0xb1, 0x86, 0x09, 0x28, 0xa8, 0xc5, + 0x39, 0xc7, 0x09, 0x28, 0x88, 0xc4, 0xdc, 0xae, 0x09, 0x28, 0x59, 0x86, + 0x09, 0x28, 0x51, 0x9f, 0x09, 0x28, 0x48, 0x87, 0x09, 0x28, 0x41, 0xc2, + 0x00, 0xb1, 0x09, 0x28, 0x38, 0xca, 0xa6, 0xfc, 0x09, 0x27, 0xb1, 0x49, + 0x36, 0x5c, 0xc3, 0x8f, 0xb2, 0xc3, 0x04, 0x2a, 0x09, 0x27, 0x99, 0xc2, + 0x08, 0x6d, 0x09, 0x27, 0x90, 0x8b, 0x09, 0x1c, 0x41, 0xc2, 0x04, 0x3d, + 0x09, 0x0e, 0x33, 0x03, 0x8f, 0xbe, 0x83, 0x09, 0x0e, 0x22, 0x03, 0x8f, + 0xc4, 0xc2, 0x01, 0xe2, 0x09, 0x0f, 0x51, 0x86, 0x09, 0x0f, 0x49, 0xca, + 0xa0, 0xb2, 0x09, 0x0f, 0x41, 0x46, 0x25, 0xd4, 0x43, 0x8f, 0xc8, 0xd8, + 0x25, 0xd3, 0x09, 0x0f, 0x21, 0x03, 0x43, 0x8f, 0xd2, 0xc2, 0x01, 0xdf, + 0x09, 0x0f, 0x09, 0x0a, 0x43, 0x8f, 0xdc, 0xc3, 0x5d, 0xd1, 0x09, 0x0e, + 0xd1, 0x87, 0x09, 0x0e, 0xc2, 0x03, 0x8f, 0xf1, 0x97, 0x09, 0x0e, 0xb3, + 0x03, 0x8f, 0xf7, 0xc3, 0x04, 0x5a, 0x09, 0x0e, 0xa9, 0xc4, 0x03, 0x48, + 0x09, 0x0e, 0xa0, 0x17, 0xc3, 0x8f, 0xfb, 0x8b, 0x09, 0x0e, 0x7a, 0x03, + 0x90, 0x06, 0x8f, 0x09, 0x0e, 0x63, 0x03, 0x90, 0x0a, 0xc7, 0x6a, 0x1f, + 0x09, 0x0e, 0x58, 0xcb, 0x8d, 0x21, 0x09, 0x0e, 0x51, 0x83, 0x09, 0x0e, + 0x42, 0x03, 0x90, 0x10, 0x8b, 0x09, 0x0e, 0x09, 0xc2, 0x01, 0x9d, 0x09, + 0x0e, 0x00, 0xcc, 0x83, 0xa9, 0x09, 0x0d, 0xf9, 0x90, 0x09, 0x0d, 0xf1, + 0x8e, 0x09, 0x0d, 0xe9, 0x46, 0x25, 0xd4, 0x43, 0x90, 0x14, 0xcd, 0x47, + 0xaa, 0x09, 0x0b, 0x51, 0xc8, 0x54, 0x29, 0x09, 0x0b, 0x48, 0xd2, 0x47, + 0xa5, 0x09, 0x26, 0x59, 0xc4, 0x38, 0xb4, 0x09, 0x08, 0xa1, 0xc3, 0x62, + 0x19, 0x09, 0x08, 0x98, 0x0b, 0xc3, 0x90, 0x26, 0x87, 0x09, 0x07, 0x2a, + 0x03, 0x90, 0x2e, 0x94, 0x09, 0x07, 0x21, 0x8e, 0x09, 0x07, 0x18, 0x46, + 0x25, 0xd4, 0x43, 0x90, 0x34, 0xc9, 0x20, 0x12, 0x09, 0x07, 0x08, 0x8f, + 0x09, 0x26, 0x02, 0x03, 0x90, 0x40, 0xd0, 0x5d, 0xd2, 0x09, 0x25, 0xf9, + 0xc9, 0xaa, 0x17, 0x09, 0x06, 0xe0, 0xc9, 0xaa, 0xef, 0x09, 0x06, 0xd8, + 0xc4, 0x45, 0xaf, 0x09, 0x06, 0xc9, 0x8d, 0x09, 0x06, 0xc0, 0x46, 0x25, + 0xd4, 0xc3, 0x90, 0x46, 0x8e, 0x09, 0x06, 0x92, 0x03, 0x90, 0x50, 0x94, + 0x09, 0x06, 0x63, 0x03, 0x90, 0x56, 0xc7, 0x5d, 0x9b, 0x09, 0x06, 0x58, + 0xca, 0x9c, 0x66, 0x09, 0x06, 0x81, 0xa1, 0x09, 0x06, 0x72, 0x03, 0x90, + 0x5c, 0xd0, 0x5d, 0x92, 0x09, 0x06, 0x50, 0xc8, 0xaa, 0xef, 0x09, 0x06, + 0x40, 0x48, 0x6c, 0xd6, 0xc3, 0x90, 0x62, 0x84, 0x09, 0x06, 0x30, 0x42, + 0x00, 0x47, 0x43, 0x90, 0x6e, 0xc4, 0x38, 0x68, 0x09, 0x25, 0xb1, 0xc9, + 0xaa, 0x5f, 0x09, 0x06, 0x01, 0x86, 0x09, 0x05, 0xf8, 0xc8, 0xaa, 0x60, + 0x09, 0x06, 0x10, 0x9f, 0x09, 0x1b, 0xd2, 0x03, 0x90, 0x7a, 0xd0, 0x5b, + 0x42, 0x09, 0x1b, 0xc8, 0xc3, 0x04, 0x2a, 0x09, 0x05, 0xd1, 0xc2, 0x00, + 0xd0, 0x09, 0x05, 0xc9, 0xca, 0xa4, 0x4a, 0x09, 0x05, 0xc0, 0xc8, 0xb5, + 0x92, 0x09, 0x07, 0x60, 0xca, 0x51, 0xd4, 0x09, 0x25, 0x00, 0xcc, 0x5d, + 0xd6, 0x09, 0x24, 0xe8, 0xc4, 0x4a, 0x0f, 0x09, 0x1b, 0x99, 0xc4, 0xe0, + 0x5f, 0x09, 0x03, 0x60, 0x8f, 0x09, 0x03, 0x39, 0xcb, 0x97, 0xbe, 0x09, + 0x03, 0x30, 0xc2, 0x38, 0x6a, 0x09, 0x02, 0xf0, 0xca, 0x97, 0xbe, 0x09, + 0x02, 0xe0, 0x00, 0x43, 0x90, 0x80, 0x00, 0x43, 0x90, 0xa4, 0x14, 0xc3, + 0x90, 0xd8, 0xc6, 0x13, 0x95, 0x0e, 0xc6, 0x61, 0x46, 0x0e, 0xce, 0xc3, + 0x90, 0xe4, 0xc2, 0x02, 0xae, 0x0e, 0xc6, 0x33, 0x03, 0x90, 0xfa, 0xc4, + 0x03, 0xc8, 0x0e, 0xc6, 0x21, 0xcf, 0x62, 0x2e, 0x0e, 0xc0, 0xe0, 0xc5, + 0x0e, 0xce, 0x0e, 0xc5, 0xc1, 0xc5, 0x06, 0x82, 0x0e, 0xc5, 0xb9, 0xc6, + 0x04, 0xcb, 0x0e, 0xc5, 0xa3, 0x03, 0x91, 0x00, 0xc6, 0x13, 0x95, 0x0e, + 0xc5, 0x81, 0xce, 0x3a, 0x9d, 0x0e, 0xc5, 0x79, 0xc2, 0x02, 0xae, 0x0e, + 0xc5, 0x71, 0xc4, 0x03, 0xc8, 0x0e, 0xc5, 0x58, 0xc5, 0x06, 0x82, 0x0e, + 0xc5, 0x03, 0x03, 0x91, 0x04, 0x16, 0xc3, 0x91, 0x0a, 0xc4, 0x18, 0xf2, + 0x0e, 0xc4, 0xc1, 0xce, 0x3a, 0x9d, 0x0e, 0xc4, 0xb9, 0xc2, 0x02, 0xae, + 0x0e, 0xc4, 0x91, 0xc4, 0x03, 0xc8, 0x0e, 0xc4, 0x72, 0x03, 0x91, 0x16, + 0xc6, 0x13, 0x95, 0x0e, 0xc3, 0x29, 0xc6, 0x04, 0xe1, 0x0e, 0xc3, 0x13, + 0x03, 0x91, 0x1a, 0xd0, 0x5a, 0x02, 0x0e, 0xc3, 0x08, 0xc7, 0x27, 0xb2, + 0x0e, 0xc3, 0x01, 0xc4, 0x18, 0xf2, 0x0e, 0xc2, 0xf9, 0xc4, 0x0e, 0xe2, + 0x0e, 0xc2, 0xe8, 0x00, 0x43, 0x91, 0x23, 0xd2, 0x26, 0x32, 0x0e, 0xc2, + 0x63, 0x03, 0x91, 0x32, 0xcb, 0x18, 0xdc, 0x0e, 0xc2, 0x22, 0x03, 0x91, + 0x36, 0xc5, 0x0e, 0xce, 0x0e, 0xc7, 0xa3, 0x03, 0x91, 0x3a, 0xcb, 0x13, + 0x90, 0x0e, 0xc6, 0x1b, 0x03, 0x91, 0x3e, 0x47, 0x04, 0xcb, 0x43, 0x91, + 0x44, 0xc2, 0x00, 0x74, 0x0e, 0xc6, 0x99, 0xc3, 0x00, 0xa3, 0x0e, 0xc6, + 0x90, 0xd2, 0x4c, 0x6d, 0x0e, 0xc4, 0xfa, 0x03, 0x91, 0x50, 0x00, 0x43, + 0x91, 0x56, 0xcc, 0x13, 0x8f, 0x0e, 0xc6, 0x88, 0xdd, 0x11, 0xa8, 0x0e, + 0xc5, 0x60, 0x00, 0x43, 0x91, 0x71, 0xd3, 0x40, 0xff, 0x0e, 0xc4, 0x21, + 0xc4, 0x0e, 0xe2, 0x0e, 0xc4, 0x02, 0x03, 0x91, 0x80, 0x00, 0x43, 0x91, + 0x86, 0xd7, 0x26, 0x32, 0x0e, 0xc2, 0xa9, 0xd5, 0x18, 0xdc, 0x0e, 0xc2, + 0x58, 0xd5, 0x13, 0x90, 0x0e, 0xc6, 0xd3, 0x03, 0x91, 0x92, 0xc5, 0x0e, + 0xce, 0x0e, 0xc6, 0x50, 0xc5, 0x16, 0xca, 0x0e, 0xc5, 0xf9, 0xc2, 0x00, + 0x74, 0x0e, 0xc5, 0xf1, 0xc3, 0x00, 0xa3, 0x0e, 0xc5, 0xe8, 0xc5, 0x06, + 0x82, 0x0e, 0xc0, 0x13, 0x03, 0x91, 0x96, 0xd2, 0x13, 0x89, 0x0e, 0xc6, + 0x81, 0x46, 0x0e, 0xce, 0xc3, 0x91, 0x9a, 0xc4, 0x05, 0x75, 0x0e, 0xc3, + 0x63, 0x03, 0x91, 0xa6, 0xc8, 0xbc, 0x62, 0x0e, 0xc3, 0x89, 0xd3, 0x46, + 0x57, 0x0e, 0xc2, 0x9a, 0x03, 0x91, 0xaa, 0xd5, 0x37, 0x04, 0x0e, 0xc6, + 0x79, 0xd4, 0x3c, 0x00, 0x0e, 0xc5, 0xe1, 0xc4, 0x05, 0x75, 0x0e, 0xc3, + 0xa0, 0xc5, 0x37, 0x20, 0x0e, 0xc6, 0xb8, 0xc7, 0x27, 0xb2, 0x0e, 0xc3, + 0x49, 0xc4, 0x0e, 0xe2, 0x0e, 0xc3, 0x38, 0xcb, 0x13, 0x90, 0x0e, 0xc6, + 0x73, 0x03, 0x91, 0xb0, 0xc2, 0x02, 0xae, 0x0e, 0xc6, 0x38, 0x00, 0x43, + 0x91, 0xb6, 0xc5, 0x06, 0x82, 0x0e, 0xc5, 0x09, 0xc2, 0x02, 0xae, 0x0e, + 0xc4, 0xa0, 0xc5, 0x17, 0x14, 0x0e, 0xce, 0x89, 0xc5, 0x03, 0x13, 0x0e, + 0xce, 0x80, 0xc5, 0x17, 0x14, 0x0e, 0xce, 0x11, 0xc5, 0x03, 0x13, 0x0e, + 0xce, 0x08, 0xc2, 0x00, 0x15, 0x0e, 0xcb, 0x40, 0xc6, 0x00, 0x58, 0x0e, + 0xce, 0x79, 0xc6, 0x24, 0x3b, 0x0e, 0xce, 0x68, 0xc6, 0x00, 0x58, 0x0e, + 0xce, 0x71, 0xc6, 0x24, 0x3b, 0x0e, 0xce, 0x60, 0xc6, 0x00, 0x58, 0x0e, + 0xce, 0x01, 0xc6, 0x24, 0x3b, 0x0e, 0xcd, 0xf0, 0xc6, 0x00, 0x58, 0x0e, + 0xcd, 0xf9, 0xc6, 0x24, 0x3b, 0x0e, 0xcd, 0xe8, 0xcc, 0x8a, 0xf9, 0x0e, + 0xce, 0x59, 0xcc, 0x82, 0x89, 0x0e, 0xce, 0x50, 0xc6, 0x2c, 0x2e, 0x0e, + 0xcd, 0xe1, 0xc6, 0x00, 0x58, 0x0e, 0xcd, 0xd0, 0xc6, 0x2c, 0x2e, 0x0e, + 0xcd, 0xd9, 0xc6, 0x00, 0x58, 0x0e, 0xcd, 0xc8, 0xc5, 0x17, 0x14, 0x0e, + 0xce, 0x39, 0xc5, 0x03, 0x13, 0x0e, 0xce, 0x30, 0xc5, 0x17, 0x14, 0x0e, + 0xcd, 0xc1, 0xc5, 0x03, 0x13, 0x0e, 0xcd, 0xb8, 0xc5, 0x17, 0x14, 0x0e, + 0xcc, 0xf1, 0xc6, 0x01, 0xdb, 0x0e, 0xcc, 0xe9, 0xc5, 0x03, 0x13, 0x0e, + 0xcc, 0xe0, 0xc5, 0x17, 0x14, 0x0e, 0xcc, 0xd9, 0xc6, 0x01, 0xdb, 0x0e, + 0xcc, 0xd1, 0xc5, 0x03, 0x13, 0x0e, 0xcc, 0xc8, 0x47, 0x20, 0x38, 0xc3, + 0x91, 0xd1, 0x4b, 0x27, 0x7b, 0x43, 0x91, 0xdd, 0xcb, 0x93, 0x1a, 0x0e, + 0xcc, 0xf9, 0x53, 0x41, 0xd0, 0x43, 0x91, 0xf2, 0xc5, 0x17, 0x14, 0x0e, + 0xcc, 0x53, 0x03, 0x91, 0xfe, 0xc6, 0x01, 0xdb, 0x0e, 0xcc, 0x49, 0xc5, + 0x03, 0x13, 0x0e, 0xcc, 0x40, 0xc2, 0x00, 0x15, 0x0e, 0xc9, 0x68, 0x45, + 0x00, 0x8c, 0xc3, 0x92, 0x04, 0xc6, 0x10, 0x9d, 0x01, 0x5b, 0x99, 0x4a, + 0x01, 0x88, 0x43, 0x92, 0x2e, 0xe0, 0x01, 0x47, 0x01, 0x4b, 0x28, 0xd0, + 0x57, 0xc2, 0x0f, 0xc1, 0x91, 0xcb, 0x57, 0xc7, 0x0f, 0xc1, 0x71, 0xca, + 0xa0, 0x08, 0x0f, 0xc1, 0x51, 0x47, 0x00, 0x58, 0xc3, 0x92, 0x34, 0x49, + 0xa8, 0xdc, 0xc3, 0x92, 0x40, 0xcc, 0x84, 0x09, 0x0f, 0xc1, 0x11, 0xcc, + 0x82, 0x1d, 0x0f, 0xc1, 0x30, 0xe0, 0x01, 0x87, 0x01, 0x5c, 0x10, 0x46, + 0x00, 0x8b, 0x43, 0x92, 0x4c, 0xe0, 0x09, 0x67, 0x01, 0x4b, 0x48, 0x0e, + 0xc3, 0x92, 0x58, 0x14, 0x43, 0x92, 0x64, 0x90, 0x00, 0x70, 0x81, 0xc3, + 0x00, 0xd0, 0x00, 0x70, 0xb8, 0xca, 0x26, 0xf7, 0x07, 0xea, 0xc1, 0xcc, + 0x10, 0xb4, 0x07, 0xea, 0xc8, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x51, 0xcc, + 0x10, 0xb4, 0x07, 0xe9, 0x90, 0x0b, 0xc3, 0x92, 0x6a, 0xca, 0x26, 0xf7, + 0x07, 0xe9, 0x31, 0xcb, 0x64, 0x7b, 0x07, 0xe9, 0xc1, 0x45, 0x00, 0x8c, + 0x43, 0x92, 0x76, 0xcb, 0x10, 0xb5, 0x07, 0xe9, 0x81, 0xcc, 0x00, 0xfb, + 0x07, 0xe8, 0x60, 0x45, 0x50, 0xf0, 0xc3, 0x92, 0x82, 0x45, 0x19, 0x60, + 0x43, 0x92, 0x8e, 0xcb, 0x10, 0xb5, 0x07, 0xe9, 0x69, 0xcc, 0x00, 0xfb, + 0x07, 0xe8, 0x48, 0xcb, 0x10, 0xb5, 0x07, 0xe9, 0x79, 0xcc, 0x00, 0xfb, + 0x07, 0xe8, 0x58, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0xa1, 0xcd, 0x00, 0xfa, + 0x07, 0xe3, 0x10, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x99, 0xcd, 0x00, 0xfa, + 0x07, 0xe3, 0x08, 0xca, 0x26, 0xf7, 0x07, 0xea, 0xf9, 0xcc, 0x10, 0xb4, + 0x07, 0xeb, 0x00, 0xca, 0x26, 0xf7, 0x07, 0xeb, 0x11, 0xcc, 0x10, 0xb4, + 0x07, 0xeb, 0x18, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0xe9, 0xcb, 0x10, 0xb5, + 0x07, 0xe5, 0x70, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x09, 0xcb, 0x10, 0xb5, + 0x07, 0xe5, 0x98, 0xca, 0x26, 0xf7, 0x07, 0xeb, 0x31, 0xcc, 0x10, 0xb4, + 0x07, 0xee, 0x28, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x01, 0xcb, 0x10, 0xb5, + 0x07, 0xe5, 0x88, 0x44, 0x19, 0x6a, 0xc3, 0x92, 0x9a, 0xce, 0x43, 0x77, + 0x07, 0xed, 0x48, 0xd3, 0x40, 0x41, 0x07, 0xea, 0x31, 0x0a, 0x43, 0x92, + 0xa6, 0x47, 0xa6, 0xcd, 0xc3, 0x92, 0xb2, 0xcd, 0x00, 0xfa, 0x07, 0xef, + 0xc8, 0xca, 0x26, 0xf7, 0x07, 0xeb, 0xb1, 0xcc, 0x10, 0xb4, 0x07, 0xeb, + 0xb8, 0x8f, 0x07, 0xea, 0x39, 0xcd, 0x76, 0x28, 0x07, 0xea, 0x50, 0xca, + 0x82, 0xa3, 0x07, 0xea, 0x41, 0xcc, 0x82, 0xa1, 0x07, 0xea, 0x48, 0xcc, + 0x00, 0xfb, 0x07, 0xe1, 0x39, 0xcb, 0x10, 0xb5, 0x07, 0xe9, 0x98, 0x44, + 0x19, 0x6a, 0xc3, 0x92, 0xb8, 0xd1, 0x50, 0x13, 0x07, 0xeb, 0x99, 0xce, + 0x43, 0x77, 0x07, 0xeb, 0xa0, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x91, 0xcb, + 0x10, 0xb5, 0x07, 0xe5, 0x30, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x61, 0xcb, + 0x10, 0xb5, 0x07, 0xe5, 0x10, 0x45, 0x30, 0xc1, 0xc3, 0x92, 0xc4, 0xd1, + 0x50, 0x13, 0x07, 0xea, 0x98, 0x43, 0x2b, 0xba, 0xc3, 0x92, 0xd0, 0x42, + 0x03, 0x53, 0x43, 0x92, 0xdc, 0x44, 0x06, 0x5b, 0xc3, 0x92, 0xe8, 0x42, + 0x00, 0x5d, 0x43, 0x92, 0xfa, 0xca, 0x26, 0xf7, 0x07, 0xe3, 0x31, 0x0b, + 0xc3, 0x93, 0x06, 0xcb, 0x64, 0x7b, 0x07, 0xe6, 0xf8, 0x44, 0x50, 0xf2, + 0xc3, 0x93, 0x12, 0x43, 0x2b, 0xba, 0x43, 0x93, 0x1e, 0xcc, 0x00, 0xfb, + 0x07, 0xe0, 0x01, 0xcb, 0x10, 0xb5, 0x07, 0xe4, 0xb8, 0x0b, 0xc3, 0x93, + 0x2a, 0xca, 0x26, 0xf7, 0x07, 0xdf, 0xb8, 0xca, 0x26, 0xf7, 0x07, 0xdf, + 0x99, 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0x90, 0xca, 0x26, 0xf7, 0x07, 0xdf, + 0x89, 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0x80, 0xca, 0x26, 0xf7, 0x07, 0xdf, + 0x79, 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0x70, 0xcc, 0x00, 0xfb, 0x07, 0xe2, + 0xb1, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0xd8, 0xca, 0x26, 0xf7, 0x07, 0xed, + 0xd9, 0xcc, 0x10, 0xb4, 0x07, 0xee, 0x18, 0xcd, 0x00, 0xfa, 0x07, 0xf7, + 0xc9, 0xca, 0x26, 0xf7, 0x07, 0xf7, 0xd0, 0xcd, 0x00, 0xfa, 0x07, 0xf7, + 0xb9, 0xca, 0x26, 0xf7, 0x07, 0xf7, 0xc0, 0xca, 0x26, 0xf7, 0x07, 0xec, + 0x01, 0xcc, 0x10, 0xb4, 0x07, 0xed, 0xa8, 0xcc, 0x00, 0xfb, 0x07, 0xe1, + 0xa1, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x18, 0x44, 0x19, 0x6a, 0xc3, 0x93, + 0x36, 0xcf, 0x67, 0x65, 0x07, 0xeb, 0xf9, 0xce, 0x43, 0x77, 0x07, 0xed, + 0x90, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x31, 0xcb, 0x10, 0xb5, 0x07, 0xe4, + 0xe8, 0xc2, 0x04, 0xc6, 0x07, 0xea, 0x20, 0xcb, 0x10, 0xb5, 0x07, 0xdf, + 0xf1, 0xcc, 0x00, 0xfb, 0x07, 0xdf, 0xe0, 0x16, 0xc3, 0x93, 0x42, 0xca, + 0x35, 0x7a, 0x00, 0x31, 0xe9, 0x5c, 0x10, 0x12, 0x43, 0x93, 0x4e, 0x44, + 0x05, 0x18, 0xc3, 0x93, 0x58, 0x16, 0x43, 0x93, 0x67, 0xcc, 0x00, 0xfb, + 0x07, 0xf6, 0x89, 0xcb, 0x10, 0xb5, 0x07, 0xf6, 0x98, 0xd0, 0x0e, 0x7c, + 0x00, 0x46, 0x19, 0xc9, 0x0e, 0x6e, 0x00, 0x37, 0xe0, 0xcc, 0x00, 0xfb, + 0x07, 0xf6, 0x69, 0xcb, 0x10, 0xb5, 0x07, 0xf6, 0x78, 0xcf, 0x67, 0xb0, + 0x00, 0x45, 0x81, 0x16, 0xc3, 0x93, 0x73, 0xc4, 0x00, 0x9d, 0x00, 0x35, + 0x80, 0xcb, 0x10, 0xb5, 0x07, 0xdc, 0xa1, 0xcc, 0x00, 0xfb, 0x07, 0xdc, + 0x90, 0xcb, 0x10, 0xb5, 0x07, 0xdc, 0xc1, 0xcc, 0x00, 0xfb, 0x07, 0xdc, + 0xb0, 0x46, 0x03, 0x13, 0xc3, 0x93, 0x7f, 0x42, 0x00, 0x58, 0xc3, 0x93, + 0x89, 0x4b, 0x0e, 0x7c, 0xc3, 0x93, 0x95, 0xc3, 0x01, 0x5d, 0x00, 0x3b, + 0x50, 0xcc, 0x00, 0xfb, 0x07, 0xf6, 0xe9, 0xcb, 0x10, 0xb5, 0x07, 0xf6, + 0xf8, 0x4a, 0x0e, 0x7d, 0xc3, 0x93, 0xa1, 0xcd, 0x04, 0xe7, 0x00, 0x45, + 0x10, 0xcc, 0x00, 0xfb, 0x07, 0xf4, 0xe9, 0xcb, 0x10, 0xb5, 0x07, 0xf4, + 0xf8, 0x4a, 0x0e, 0x7d, 0xc3, 0x93, 0xad, 0x48, 0x04, 0xe7, 0x43, 0x93, + 0xbf, 0xcc, 0x00, 0xfb, 0x07, 0xf6, 0x49, 0xcb, 0x10, 0xb5, 0x07, 0xf6, + 0x58, 0x44, 0x00, 0x8d, 0xc3, 0x93, 0xcb, 0xc4, 0x3e, 0x06, 0x00, 0x33, + 0x8a, 0x03, 0x94, 0x01, 0x00, 0x43, 0x94, 0x05, 0xc7, 0x31, 0x5f, 0x00, + 0x46, 0x11, 0x16, 0xc3, 0x94, 0x11, 0xc9, 0x16, 0x14, 0x00, 0x3b, 0x10, + 0xcc, 0x00, 0xfb, 0x07, 0xdc, 0x71, 0xcb, 0x10, 0xb5, 0x07, 0xdc, 0x80, + 0x45, 0x00, 0x8c, 0xc3, 0x94, 0x1d, 0x0b, 0xc3, 0x94, 0x2d, 0xcb, 0x64, + 0x7b, 0x07, 0xf6, 0xe1, 0xca, 0x26, 0xf7, 0x07, 0xf6, 0xd0, 0xca, 0x26, + 0xf7, 0x07, 0xdf, 0x19, 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0x10, 0xca, 0x26, + 0xf7, 0x07, 0xdf, 0x09, 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0x00, 0xcc, 0x00, + 0xfb, 0x07, 0xf5, 0x29, 0xcb, 0x10, 0xb5, 0x07, 0xf5, 0x38, 0xc7, 0x31, + 0x5f, 0x00, 0x46, 0x09, 0xc9, 0x16, 0x14, 0x00, 0x35, 0xf8, 0xcb, 0x10, + 0xb5, 0x07, 0xdb, 0xe1, 0xcc, 0x00, 0xfb, 0x07, 0xdb, 0xd0, 0xcb, 0x64, + 0x7b, 0x07, 0xdc, 0x09, 0x0b, 0xc3, 0x94, 0x39, 0xca, 0x26, 0xf7, 0x07, + 0xdb, 0xf8, 0xcb, 0x10, 0xb5, 0x07, 0xdb, 0x41, 0xcc, 0x00, 0xfb, 0x07, + 0xdb, 0x30, 0x0b, 0xc3, 0x94, 0x45, 0xca, 0x26, 0xf7, 0x07, 0xda, 0xf9, + 0xcb, 0x64, 0x7b, 0x07, 0xdb, 0x08, 0x46, 0x03, 0x13, 0xc3, 0x94, 0x51, + 0xc4, 0x00, 0x9d, 0x00, 0x33, 0xe1, 0xda, 0x1b, 0x4e, 0x00, 0x33, 0xe8, + 0xc6, 0xcb, 0x51, 0x00, 0x31, 0x4b, 0x03, 0x94, 0x5b, 0xca, 0x64, 0x7c, + 0x07, 0xf4, 0xc0, 0xcc, 0x00, 0xfb, 0x07, 0xf4, 0xa9, 0xcb, 0x10, 0xb5, + 0x07, 0xf4, 0xb8, 0xcb, 0x64, 0x7b, 0x07, 0xdb, 0x29, 0x0b, 0xc3, 0x94, + 0x5f, 0xca, 0x26, 0xf7, 0x07, 0xdb, 0x18, 0x16, 0xc3, 0x94, 0x6b, 0xc9, + 0x0e, 0x6e, 0x00, 0x44, 0x58, 0xcc, 0x00, 0xfb, 0x07, 0xf6, 0x09, 0xcb, + 0x10, 0xb5, 0x07, 0xf6, 0x18, 0xcd, 0x00, 0xfa, 0x07, 0xf5, 0x59, 0xca, + 0x26, 0xf7, 0x07, 0xf5, 0x60, 0x0b, 0xc3, 0x94, 0x77, 0xca, 0x26, 0xf7, + 0x07, 0xf4, 0xd1, 0xcb, 0x64, 0x7b, 0x07, 0xf4, 0xe0, 0xcb, 0x10, 0xb5, + 0x07, 0xdb, 0x81, 0xcc, 0x00, 0xfb, 0x07, 0xdb, 0x70, 0x16, 0xc3, 0x94, + 0x83, 0xc7, 0x31, 0x5f, 0x00, 0x36, 0x71, 0xcb, 0x08, 0x09, 0x00, 0x31, + 0x32, 0x03, 0x94, 0x95, 0x00, 0x43, 0x94, 0x99, 0xcc, 0x00, 0xfb, 0x07, + 0xf7, 0x89, 0xcb, 0x10, 0xb5, 0x07, 0xf7, 0x98, 0x15, 0xc3, 0x94, 0xab, + 0xc4, 0xb0, 0x8b, 0x00, 0x45, 0x51, 0xca, 0x35, 0x7a, 0x00, 0x37, 0x79, + 0xcf, 0x3b, 0x79, 0x00, 0x34, 0xc9, 0x49, 0x04, 0xf9, 0xc3, 0x94, 0xb7, + 0xc9, 0x0e, 0x6e, 0x00, 0x34, 0xa3, 0x03, 0x94, 0xc3, 0xc4, 0x00, 0x9d, + 0x00, 0x34, 0x99, 0xcb, 0x08, 0x09, 0x00, 0x3b, 0x60, 0xcc, 0x00, 0xfb, + 0x07, 0xdd, 0x01, 0xcb, 0x10, 0xb5, 0x07, 0xdd, 0x10, 0x46, 0x03, 0x13, + 0xc3, 0x94, 0xc9, 0xcb, 0x08, 0x09, 0x00, 0x45, 0x09, 0xd6, 0x31, 0x56, + 0x00, 0x3a, 0xa9, 0x16, 0xc3, 0x94, 0xd6, 0xde, 0x0e, 0x6e, 0x00, 0x3a, + 0x88, 0xcc, 0x00, 0xfb, 0x07, 0xf4, 0x79, 0xcb, 0x10, 0xb5, 0x07, 0xf4, + 0x88, 0xcb, 0x64, 0x7b, 0x07, 0xda, 0xe9, 0x0b, 0xc3, 0x94, 0xe2, 0xca, + 0x26, 0xf7, 0x07, 0xda, 0xd8, 0xcb, 0x10, 0xb5, 0x07, 0xda, 0xa1, 0xcc, + 0x00, 0xfb, 0x07, 0xda, 0x90, 0xc5, 0x05, 0x02, 0x00, 0x45, 0x2b, 0x03, + 0x94, 0xee, 0xc5, 0x00, 0xd4, 0x00, 0x35, 0x38, 0xcc, 0x00, 0xfb, 0x07, + 0xf6, 0x29, 0xcb, 0x10, 0xb5, 0x07, 0xf6, 0x38, 0x4a, 0x0e, 0x7d, 0xc3, + 0x94, 0xf4, 0xcd, 0x04, 0xfa, 0x00, 0x34, 0xe8, 0xcc, 0x00, 0xfb, 0x07, + 0xf5, 0xc9, 0xcb, 0x10, 0xb5, 0x07, 0xf5, 0xd8, 0xcc, 0x00, 0xfb, 0x07, + 0xf5, 0xa9, 0xcb, 0x10, 0xb5, 0x07, 0xf5, 0xb8, 0x16, 0xc3, 0x95, 0x00, + 0xd7, 0x29, 0x57, 0x00, 0x34, 0xd1, 0xca, 0x35, 0x7a, 0x00, 0x3b, 0xf1, + 0x46, 0x09, 0x3f, 0xc3, 0x95, 0x0f, 0xcf, 0x3b, 0x79, 0x00, 0x3a, 0xe1, + 0x44, 0x03, 0x13, 0x43, 0x95, 0x15, 0xcc, 0x00, 0xfb, 0x07, 0xf5, 0x89, + 0xcb, 0x10, 0xb5, 0x07, 0xf5, 0x98, 0x45, 0x00, 0x8c, 0xc3, 0x95, 0x1b, + 0xcd, 0x00, 0xfa, 0x07, 0xf5, 0x49, 0xca, 0x26, 0xf7, 0x07, 0xf5, 0x50, + 0xca, 0x26, 0xf7, 0x07, 0xdc, 0x29, 0xcd, 0x00, 0xfa, 0x07, 0xdc, 0x20, + 0xce, 0x6d, 0xe8, 0x00, 0x37, 0xd9, 0x0b, 0xc3, 0x95, 0x3a, 0xca, 0x26, + 0xf7, 0x07, 0xf5, 0xf1, 0xcb, 0x64, 0x7b, 0x07, 0xf6, 0x00, 0xca, 0x26, + 0xf7, 0x07, 0xdc, 0x49, 0xcd, 0x00, 0xfa, 0x07, 0xdc, 0x40, 0xca, 0x26, + 0xf7, 0x07, 0xdc, 0x19, 0xcd, 0x00, 0xfa, 0x07, 0xdc, 0x10, 0xcb, 0x10, + 0xb5, 0x07, 0xdb, 0xa1, 0xcc, 0x00, 0xfb, 0x07, 0xdb, 0x90, 0xcb, 0x10, + 0xb5, 0x07, 0xdb, 0x61, 0xcc, 0x00, 0xfb, 0x07, 0xdb, 0x50, 0xc6, 0x1b, + 0xd1, 0x00, 0x45, 0x59, 0xc5, 0x00, 0xd4, 0x00, 0x36, 0x78, 0x00, 0x43, + 0x95, 0x46, 0xc8, 0xbf, 0x42, 0x00, 0x3b, 0xc1, 0xca, 0x9f, 0x72, 0x00, + 0x3b, 0xc8, 0xd0, 0x0e, 0x7c, 0x00, 0x45, 0x39, 0x44, 0x05, 0x18, 0x43, + 0x95, 0x52, 0xcc, 0x00, 0xfb, 0x07, 0xf7, 0x09, 0xcb, 0x10, 0xb5, 0x07, + 0xf7, 0x18, 0xcb, 0x10, 0xb5, 0x07, 0xde, 0xa9, 0xcc, 0x00, 0xfb, 0x07, + 0xde, 0x98, 0xcb, 0x64, 0x7b, 0x07, 0xdc, 0xe9, 0x0b, 0xc3, 0x95, 0x5e, + 0xca, 0x26, 0xf7, 0x07, 0xdc, 0xd8, 0xd0, 0x31, 0x56, 0x00, 0x44, 0x49, + 0x16, 0xc3, 0x95, 0x6a, 0xc4, 0x00, 0x9d, 0x00, 0x35, 0xe1, 0xc9, 0x0e, + 0x6e, 0x00, 0x35, 0xc9, 0x46, 0x03, 0x13, 0x43, 0x95, 0x76, 0x00, 0x43, + 0x95, 0x80, 0xcc, 0x00, 0xfb, 0x07, 0xf7, 0x29, 0xcb, 0x10, 0xb5, 0x07, + 0xf7, 0x38, 0xcb, 0x10, 0xb5, 0x07, 0xdb, 0xc1, 0xcc, 0x00, 0xfb, 0x07, + 0xdb, 0xb0, 0x45, 0x00, 0x8c, 0xc3, 0x95, 0x8c, 0x0b, 0xc3, 0x95, 0xa8, + 0xca, 0x26, 0xf7, 0x07, 0xf5, 0x11, 0xcb, 0x64, 0x7b, 0x07, 0xf5, 0x20, + 0x00, 0x43, 0x95, 0xb4, 0x00, 0x43, 0x95, 0xc4, 0xc9, 0xab, 0xeb, 0x00, + 0x36, 0x03, 0x03, 0x95, 0xda, 0xca, 0x35, 0x7a, 0x00, 0x37, 0xf8, 0xcc, + 0x00, 0xfb, 0x07, 0xf7, 0x49, 0xcb, 0x10, 0xb5, 0x07, 0xf7, 0x58, 0xc2, + 0x16, 0x1c, 0x0f, 0x75, 0xb1, 0xc2, 0x00, 0x65, 0x0f, 0x75, 0xc0, 0xc4, + 0x3a, 0x01, 0x0f, 0x72, 0xe9, 0xc3, 0x0f, 0x9a, 0x0f, 0x72, 0xf8, 0xe0, + 0x0a, 0x47, 0x0f, 0xdd, 0x68, 0xd0, 0x04, 0xd7, 0x0f, 0xdd, 0x60, 0xd0, + 0x13, 0xe9, 0x0f, 0xdd, 0x30, 0x00, 0x43, 0x95, 0xde, 0x00, 0x43, 0x95, + 0xed, 0x4b, 0x18, 0x04, 0xc3, 0x95, 0xfc, 0xdc, 0x13, 0xf9, 0x0f, 0xd2, + 0x30, 0xc5, 0x6b, 0x02, 0x0f, 0xaf, 0xc9, 0xc8, 0x8e, 0xa5, 0x0f, 0xaf, + 0xb8, 0xc2, 0x10, 0x11, 0x0b, 0x4e, 0x39, 0x90, 0x0b, 0x4c, 0xa9, 0x9a, + 0x0b, 0x4c, 0x40, 0xc3, 0x14, 0x83, 0x0b, 0x4d, 0xc8, 0x8f, 0x0b, 0x4e, + 0x59, 0x92, 0x0b, 0x4d, 0xb0, 0xc3, 0x7c, 0x57, 0x0b, 0x4c, 0x49, 0x9a, + 0x0b, 0x4b, 0xf8, 0x92, 0x0b, 0x4e, 0x81, 0xcb, 0x99, 0x3f, 0x0b, 0x4c, + 0x99, 0xc3, 0x82, 0x78, 0x0b, 0x4c, 0x30, 0xc3, 0x8b, 0xa9, 0x0b, 0x4d, + 0xfb, 0x03, 0x96, 0x08, 0xc3, 0xd0, 0xd7, 0x0b, 0x4c, 0x68, 0xc8, 0xb9, + 0xd2, 0x0b, 0x4e, 0xe9, 0xc8, 0xbb, 0x72, 0x0b, 0x4c, 0x90, 0xc6, 0xcc, + 0xa7, 0x0b, 0x4f, 0x40, 0x92, 0x0b, 0x4a, 0x19, 0xc2, 0x00, 0xc2, 0x0b, + 0x49, 0x8a, 0x03, 0x96, 0x0c, 0xc3, 0x8b, 0xaa, 0x0b, 0x49, 0x49, 0xc2, + 0x00, 0x2c, 0x0b, 0x48, 0x80, 0x9a, 0x0b, 0x4a, 0xa9, 0xc2, 0x10, 0x11, + 0x0b, 0x48, 0x08, 0xc3, 0xd7, 0xe2, 0x0b, 0x47, 0x01, 0xc6, 0xd2, 0x83, + 0x0b, 0x44, 0xf8, 0xc3, 0x49, 0x27, 0x0b, 0x46, 0x91, 0x8f, 0x0b, 0x45, + 0xd9, 0xc2, 0x00, 0x45, 0x0b, 0x45, 0xa9, 0xc8, 0xb9, 0x5a, 0x0b, 0x45, + 0x80, 0xc6, 0xce, 0x15, 0x0b, 0x47, 0x19, 0xcc, 0x8b, 0xb9, 0x0b, 0x44, + 0xf0, 0x9a, 0x0b, 0x47, 0x09, 0x8f, 0x0b, 0x44, 0xd8, 0xc6, 0x17, 0x13, + 0x0b, 0x43, 0xd8, 0xc4, 0x61, 0x79, 0x0b, 0x41, 0x59, 0xc4, 0xde, 0xc7, + 0x0b, 0x40, 0x71, 0xc6, 0xcd, 0x43, 0x0b, 0x40, 0x58, 0xc4, 0xe4, 0x7b, + 0x0b, 0x41, 0x11, 0xc4, 0xe4, 0x9b, 0x0b, 0x40, 0xc8, 0xa3, 0x01, 0x41, + 0xfb, 0x03, 0x96, 0x12, 0xa5, 0x01, 0x44, 0xf9, 0xa4, 0x01, 0x42, 0xfa, + 0x03, 0x96, 0x1d, 0xa5, 0x01, 0x45, 0x79, 0xa4, 0x01, 0x43, 0x7a, 0x03, + 0x96, 0x21, 0xa5, 0x01, 0x46, 0x78, 0xa5, 0x01, 0x45, 0xb9, 0xa4, 0x01, + 0x43, 0xba, 0x03, 0x96, 0x25, 0xa5, 0x01, 0x46, 0xb8, 0xa5, 0x01, 0x47, + 0x38, 0xa5, 0x01, 0x45, 0xd9, 0xa4, 0x01, 0x43, 0xda, 0x03, 0x96, 0x29, + 0xa5, 0x01, 0x46, 0xd8, 0xa5, 0x01, 0x47, 0x58, 0xa5, 0x01, 0x47, 0x98, + 0xa5, 0x01, 0x45, 0xe9, 0xa4, 0x01, 0x43, 0xea, 0x03, 0x96, 0x2d, 0xa5, + 0x01, 0x46, 0xe8, 0xa5, 0x01, 0x47, 0x68, 0xa5, 0x01, 0x47, 0xa8, 0xa5, + 0x01, 0x47, 0xc8, 0xa5, 0x01, 0x45, 0xf1, 0xa4, 0x01, 0x43, 0xf2, 0x03, + 0x96, 0x31, 0xa5, 0x01, 0x46, 0xf0, 0xa5, 0x01, 0x47, 0x70, 0xa5, 0x01, + 0x47, 0xb0, 0xa5, 0x01, 0x47, 0xd0, 0xa5, 0x01, 0x47, 0xe0, 0xd0, 0x57, + 0xc2, 0x0f, 0xc1, 0x81, 0xcb, 0x57, 0xc7, 0x0f, 0xc1, 0x61, 0x49, 0xa8, + 0xdc, 0xc3, 0x96, 0x35, 0x47, 0x00, 0x58, 0xc3, 0x96, 0x41, 0xcc, 0x84, + 0x09, 0x0f, 0xc1, 0x01, 0xcc, 0x82, 0x1d, 0x0f, 0xc1, 0x21, 0xca, 0xa0, + 0x08, 0x0f, 0xc1, 0x40, 0xe0, 0x03, 0x87, 0x01, 0x5c, 0x00, 0x46, 0x00, + 0x8b, 0x43, 0x96, 0x4d, 0xe0, 0x06, 0xe7, 0x01, 0x4b, 0x38, 0x0e, 0xc3, + 0x96, 0x59, 0xdf, 0x0c, 0xc2, 0x01, 0x4b, 0x30, 0xc5, 0xdb, 0xbe, 0x08, + 0x04, 0x39, 0xc5, 0xdc, 0x81, 0x08, 0x04, 0x30, 0xca, 0x9d, 0x88, 0x08, + 0x04, 0x41, 0xc9, 0xa9, 0xb4, 0x08, 0x04, 0x48, 0xc5, 0xdc, 0x77, 0x08, + 0x04, 0x51, 0xc6, 0xd3, 0x67, 0x08, 0x04, 0x58, 0xc5, 0xdc, 0x04, 0x08, + 0x04, 0x61, 0xc6, 0xd3, 0x6d, 0x08, 0x04, 0x68, 0xc6, 0xcc, 0xb3, 0x08, + 0x04, 0x19, 0xc6, 0xd2, 0x0b, 0x08, 0x04, 0x21, 0xca, 0xa7, 0x38, 0x08, + 0x04, 0x28, 0xce, 0x16, 0x0f, 0x00, 0xf3, 0x38, 0xce, 0x16, 0x0f, 0x00, + 0xf3, 0x48, 0xce, 0x01, 0x19, 0x00, 0xec, 0xa9, 0xc4, 0x01, 0x23, 0x00, + 0x12, 0xd0, 0xca, 0xa2, 0xb0, 0x05, 0x5a, 0x60, 0xd2, 0x4d, 0x0f, 0x05, + 0x59, 0xb0, 0xcc, 0x23, 0x3f, 0x00, 0xe8, 0x99, 0xc5, 0xd4, 0x9d, 0x00, + 0xe8, 0x90, 0xca, 0x9b, 0xda, 0x00, 0xf0, 0x48, 0x46, 0x00, 0x8b, 0x43, + 0x96, 0x65, 0xca, 0x45, 0x1d, 0x0e, 0xf8, 0x68, 0xca, 0xa8, 0x14, 0x0e, + 0xf8, 0x30, 0x87, 0x00, 0xe8, 0xa3, 0x03, 0x96, 0x86, 0xc5, 0x21, 0xa4, + 0x00, 0xe8, 0x41, 0xc7, 0xc5, 0xc9, 0x05, 0x5a, 0x1a, 0x03, 0x96, 0x8c, + 0xc8, 0x67, 0x21, 0x05, 0x3b, 0xf8, 0x87, 0x00, 0xe8, 0x11, 0xc4, 0xde, + 0x3f, 0x00, 0x12, 0x90, 0xce, 0x61, 0x6c, 0x00, 0x15, 0x72, 0x03, 0x96, + 0x92, 0xce, 0x74, 0x86, 0x00, 0x13, 0x80, 0xc6, 0x20, 0xab, 0x00, 0xf4, + 0xb9, 0xcc, 0x3e, 0xb0, 0x01, 0x63, 0x30, 0xc5, 0x05, 0x02, 0x00, 0xf3, + 0x69, 0xc5, 0x00, 0xd4, 0x00, 0xf3, 0x58, 0xd2, 0x25, 0xf1, 0x05, 0x3b, + 0x38, 0x45, 0x02, 0x9a, 0x43, 0x96, 0x98, 0x45, 0x02, 0x9a, 0x43, 0x96, + 0xb6, 0x42, 0x00, 0x30, 0xc3, 0x96, 0xd4, 0x45, 0x00, 0x5a, 0x43, 0x96, + 0xe3, 0xcb, 0x98, 0x58, 0x00, 0x11, 0x58, 0xc5, 0x31, 0xee, 0x00, 0xf2, + 0x99, 0xc5, 0x1f, 0x0c, 0x00, 0xf2, 0x88, 0xc9, 0x20, 0xa8, 0x00, 0xf2, + 0x79, 0xc5, 0x31, 0xee, 0x00, 0xf2, 0x69, 0xc6, 0x60, 0xb1, 0x00, 0x11, + 0x68, 0xce, 0x01, 0x19, 0x00, 0xec, 0xb9, 0xc6, 0x01, 0x73, 0x05, 0x59, + 0xf8, 0xc7, 0x0e, 0x70, 0x00, 0xf6, 0x59, 0xca, 0x1f, 0x07, 0x00, 0x10, + 0x48, 0xca, 0x9b, 0xda, 0x00, 0xf1, 0x78, 0xce, 0x01, 0x19, 0x0e, 0xf8, + 0xc9, 0xcc, 0x51, 0x28, 0x0e, 0xf8, 0x90, 0x46, 0x00, 0x8b, 0x43, 0x96, + 0xef, 0xd2, 0x4d, 0x0f, 0x05, 0x5a, 0x50, 0xcc, 0x23, 0x3f, 0x00, 0x12, + 0xfa, 0x03, 0x96, 0xfb, 0xcc, 0x51, 0x28, 0x0e, 0xf8, 0xc1, 0xce, 0x01, + 0x19, 0x00, 0xec, 0xd1, 0x05, 0xc3, 0x97, 0x01, 0xc4, 0x14, 0xa6, 0x00, + 0x0d, 0xd0, 0xc9, 0xaa, 0x95, 0x0e, 0xf8, 0x60, 0x00, 0x43, 0x97, 0x0d, + 0xca, 0x9b, 0x80, 0x00, 0xf0, 0xe8, 0x42, 0x00, 0x30, 0xc3, 0x97, 0x19, + 0xca, 0x1f, 0x07, 0x00, 0x10, 0x28, 0xc5, 0x31, 0xee, 0x00, 0xf0, 0xb9, + 0xc5, 0x1f, 0x0c, 0x00, 0xf0, 0xa8, 0xc8, 0x61, 0x72, 0x00, 0x13, 0xf3, + 0x03, 0x97, 0x25, 0x0e, 0xc3, 0x97, 0x2b, 0x42, 0x00, 0x58, 0xc3, 0x97, + 0x37, 0xcc, 0x51, 0x28, 0x00, 0xec, 0x49, 0xcc, 0x1e, 0xc1, 0x00, 0xeb, + 0x91, 0x05, 0xc3, 0x97, 0x43, 0xc4, 0x14, 0xa6, 0x00, 0x13, 0xe9, 0xce, + 0x38, 0xe6, 0x05, 0x3d, 0x39, 0xc5, 0x31, 0xee, 0x00, 0x0a, 0xa9, 0xce, + 0x1d, 0x93, 0x00, 0x10, 0x99, 0xc6, 0x01, 0x73, 0x00, 0x12, 0x68, 0xce, + 0x01, 0x19, 0x00, 0xec, 0xa1, 0xc4, 0x01, 0x23, 0x00, 0x12, 0xe8, 0xd1, + 0x51, 0x23, 0x0e, 0xf8, 0x98, 0xcb, 0x98, 0x58, 0x00, 0xf1, 0xc8, 0xcc, + 0x1e, 0xc1, 0x05, 0x59, 0xc1, 0xc3, 0x01, 0x5d, 0x01, 0x63, 0x08, 0xce, + 0x3e, 0xae, 0x00, 0xf4, 0xe1, 0xc8, 0x16, 0x15, 0x00, 0xf4, 0xd8, 0xc5, + 0x05, 0x02, 0x00, 0xf7, 0xa9, 0xc5, 0x00, 0xd4, 0x00, 0xf4, 0x78, 0xc2, + 0x00, 0xc0, 0x00, 0x0d, 0x83, 0x03, 0x97, 0x55, 0xc8, 0x9e, 0x5c, 0x00, + 0xf7, 0x38, 0x11, 0xc3, 0x97, 0x5b, 0xc8, 0x20, 0xa9, 0x00, 0x07, 0xe2, + 0x03, 0x97, 0x67, 0xce, 0x74, 0xe8, 0x00, 0xf3, 0xd8, 0x00, 0x43, 0x97, + 0x6b, 0xc9, 0x08, 0xf7, 0x00, 0x07, 0xdb, 0x03, 0x97, 0x77, 0xc4, 0x65, + 0xe2, 0x00, 0x0e, 0xa0, 0xcd, 0x01, 0x1a, 0x00, 0xec, 0xc9, 0xc9, 0x9e, + 0xe7, 0x00, 0x0b, 0x78, 0xce, 0x36, 0x39, 0x05, 0x5a, 0x71, 0xc5, 0x01, + 0x74, 0x05, 0x3d, 0xc8, 0x45, 0x02, 0x9a, 0x43, 0x97, 0x7d, 0xc9, 0x08, + 0xf7, 0x00, 0x07, 0x13, 0x03, 0x97, 0x9b, 0xc4, 0x65, 0xe2, 0x00, 0x0e, + 0x70, 0x11, 0xc3, 0x97, 0xa1, 0xc8, 0x20, 0xa9, 0x00, 0x07, 0x22, 0x03, + 0x97, 0xad, 0x0b, 0xc3, 0x97, 0xb3, 0xcd, 0x01, 0x1a, 0x00, 0xec, 0x78, + 0xc5, 0x05, 0x02, 0x00, 0xf4, 0x49, 0xc5, 0x00, 0xd4, 0x00, 0xf4, 0x38, + 0xc5, 0x05, 0x02, 0x00, 0xf1, 0x29, 0xc5, 0x00, 0xd4, 0x00, 0xf1, 0x18, + 0xc5, 0x05, 0x02, 0x00, 0xf4, 0x99, 0xc5, 0x00, 0xd4, 0x00, 0x0b, 0xe0, + 0x00, 0x43, 0x97, 0xbf, 0xd2, 0x25, 0xf1, 0x05, 0x3a, 0x88, 0xcf, 0x68, + 0x82, 0x00, 0xf2, 0x59, 0xcb, 0x4d, 0x16, 0x05, 0x59, 0xd9, 0xc6, 0xbd, + 0xf4, 0x00, 0x0a, 0x31, 0xc4, 0x65, 0xe2, 0x00, 0x0a, 0x41, 0xc3, 0x00, + 0x33, 0x00, 0x11, 0xa8, 0xc9, 0x64, 0x14, 0x00, 0xf2, 0x49, 0xc8, 0x6d, + 0x46, 0x00, 0x13, 0x91, 0xcd, 0x7b, 0x08, 0x00, 0x0c, 0xf0, 0x43, 0x05, + 0x19, 0xc3, 0x97, 0xcb, 0xc8, 0x25, 0xfb, 0x05, 0x3c, 0x88, 0x45, 0x02, + 0x9a, 0x43, 0x97, 0xd7, 0xc7, 0x0e, 0x70, 0x00, 0xf7, 0x21, 0x45, 0x00, + 0x5a, 0x43, 0x97, 0xf5, 0x00, 0x43, 0x98, 0x01, 0xc9, 0x9b, 0xdb, 0x00, + 0xf3, 0xc1, 0xc5, 0x05, 0x02, 0x00, 0xf3, 0xa0, 0xc6, 0x05, 0x01, 0x00, + 0xf3, 0xb0, 0xc9, 0x0e, 0x6e, 0x00, 0xf7, 0x11, 0xc5, 0x1e, 0xc8, 0x00, + 0xf7, 0x01, 0xca, 0x9e, 0x5a, 0x00, 0xf6, 0xf1, 0xc5, 0x1f, 0x0c, 0x00, + 0xf6, 0xe1, 0xc5, 0x31, 0xee, 0x00, 0xf6, 0xd0, 0xc9, 0x0e, 0x6e, 0x00, + 0xf6, 0xc1, 0xc5, 0x1e, 0xc8, 0x00, 0xf6, 0xb1, 0xca, 0x9e, 0x5a, 0x00, + 0xf6, 0xa1, 0xc5, 0x1f, 0x0c, 0x00, 0xf6, 0x91, 0xc5, 0x31, 0xee, 0x00, + 0xf6, 0x80, 0xc5, 0x05, 0x02, 0x00, 0xf6, 0x61, 0xc5, 0x00, 0xd4, 0x00, + 0x11, 0x72, 0x03, 0x98, 0x0d, 0xc5, 0x31, 0xee, 0x00, 0x0a, 0x81, 0xc5, + 0x1f, 0x0c, 0x00, 0x10, 0x60, 0xc5, 0x31, 0xee, 0x00, 0xf2, 0x91, 0xc5, + 0x1f, 0x0c, 0x00, 0xf2, 0x80, 0xc5, 0x05, 0x02, 0x00, 0xf6, 0x51, 0xc5, + 0x00, 0xd4, 0x00, 0x09, 0x80, 0x44, 0x02, 0x9b, 0xc3, 0x98, 0x13, 0xc5, + 0x05, 0x02, 0x00, 0xf0, 0xc0, 0xc5, 0x05, 0x02, 0x00, 0xf5, 0xc1, 0xc5, + 0x00, 0xd4, 0x00, 0x08, 0xb0, 0xc9, 0x0e, 0x6e, 0x00, 0xf5, 0x61, 0xc5, + 0x1e, 0xc8, 0x00, 0xf5, 0x51, 0xca, 0x9e, 0x5a, 0x00, 0xf5, 0x41, 0xc5, + 0x1f, 0x0c, 0x00, 0xf5, 0x31, 0xc5, 0x31, 0xee, 0x00, 0xf5, 0x20, 0xc5, + 0x05, 0x02, 0x00, 0xf5, 0x01, 0xc5, 0x00, 0xd4, 0x00, 0x11, 0x32, 0x03, + 0x98, 0x31, 0xc5, 0x05, 0x02, 0x00, 0xf2, 0xd3, 0x03, 0x98, 0x37, 0xc5, + 0x00, 0xd4, 0x00, 0xf2, 0xc0, 0xca, 0x03, 0x87, 0x01, 0x5d, 0x19, 0xc9, + 0x01, 0x88, 0x01, 0x5d, 0x10, 0xc7, 0xc2, 0x03, 0x00, 0x89, 0x98, 0x02, + 0x43, 0x98, 0x3d, 0xc4, 0xad, 0x2b, 0x00, 0x89, 0xe9, 0xc5, 0xdb, 0xff, + 0x00, 0x8a, 0x78, 0x91, 0x00, 0x8c, 0xf8, 0x91, 0x00, 0x8b, 0xe9, 0x97, + 0x00, 0x8b, 0xf1, 0xc2, 0x19, 0x2c, 0x00, 0x8d, 0x28, 0x83, 0x00, 0x8c, + 0x23, 0x03, 0x98, 0x53, 0xc2, 0x02, 0x66, 0x00, 0x8c, 0x30, 0x87, 0x06, + 0xbd, 0x98, 0x87, 0x06, 0xbd, 0xb8, 0x91, 0x00, 0x8c, 0x78, 0x91, 0x00, + 0x8c, 0x88, 0x97, 0x00, 0x8c, 0xb1, 0x91, 0x06, 0xbd, 0xd0, 0x91, 0x06, + 0xbd, 0x80, 0x87, 0x00, 0x8d, 0x38, 0xc2, 0x37, 0xea, 0x06, 0xbd, 0xe9, + 0x87, 0x06, 0xbd, 0xf0, 0x91, 0x06, 0xbd, 0xf8, 0xc7, 0xc2, 0x03, 0x00, + 0x8e, 0x20, 0xc6, 0x8e, 0xde, 0x06, 0xbf, 0x61, 0xc6, 0xc0, 0x7c, 0x06, + 0xbf, 0x68, 0xc5, 0x8e, 0xdf, 0x00, 0x8f, 0x39, 0xcc, 0x79, 0xeb, 0x06, + 0xbf, 0x58, 0xc5, 0xc0, 0x7d, 0x00, 0x8f, 0x41, 0xc6, 0xc1, 0x86, 0x06, + 0xbf, 0x88, 0xc4, 0x79, 0xf3, 0x00, 0x8f, 0x51, 0xc6, 0xca, 0x0e, 0x06, + 0xbf, 0x70, 0xc4, 0xc6, 0x7a, 0x06, 0xbf, 0x79, 0xc6, 0xc6, 0x79, 0x06, + 0xbf, 0x80, 0xc7, 0xc2, 0x03, 0x06, 0xbe, 0x88, 0xc4, 0xc6, 0x7a, 0x06, + 0xbe, 0x91, 0xc6, 0xc6, 0x79, 0x06, 0xbe, 0x98, 0x02, 0x43, 0x98, 0x57, + 0xc6, 0x8e, 0xde, 0x00, 0x8e, 0x89, 0xc4, 0xad, 0x2b, 0x00, 0x8e, 0x91, + 0xc5, 0x90, 0xe4, 0x06, 0xbe, 0xc0, 0x02, 0x43, 0x98, 0x63, 0xc4, 0xad, + 0x2b, 0x00, 0x8e, 0xb1, 0xc6, 0x8e, 0xde, 0x06, 0xbe, 0xa8, 0xc6, 0xce, + 0xb1, 0x00, 0x8e, 0x78, 0xc6, 0xce, 0xb1, 0x06, 0xbe, 0xe0, 0xc5, 0xd9, + 0xca, 0x06, 0xbf, 0x08, 0xc4, 0xad, 0x2b, 0x00, 0x8e, 0xf1, 0xc5, 0xd9, + 0x61, 0x06, 0xbe, 0xf8, 0xc7, 0xc0, 0x7b, 0x06, 0xbf, 0x38, 0xc8, 0xba, + 0x7a, 0x06, 0xbf, 0x20, 0xc4, 0xc6, 0x7a, 0x06, 0xbf, 0x41, 0xc6, 0xc6, + 0x79, 0x06, 0xbf, 0x48, 0xc5, 0x8e, 0xdf, 0x00, 0x8f, 0x61, 0xc6, 0xbb, + 0xec, 0x00, 0x8f, 0x78, 0xca, 0x8e, 0xda, 0x00, 0x8f, 0x69, 0xc3, 0x39, + 0x37, 0x00, 0x8f, 0x88, 0xc6, 0x8e, 0xde, 0x01, 0x8b, 0xa1, 0xc6, 0xc0, + 0x7c, 0x01, 0x8b, 0xa8, 0xc3, 0x22, 0x45, 0x01, 0x9f, 0x59, 0xc3, 0x18, + 0x13, 0x01, 0x9f, 0x9a, 0x03, 0x98, 0x7b, 0xc3, 0x03, 0x26, 0x01, 0x9f, + 0x61, 0x9b, 0x01, 0x9f, 0xea, 0x03, 0x98, 0x7f, 0x02, 0x43, 0x98, 0x85, + 0xd3, 0x45, 0x4d, 0x0f, 0xd1, 0x81, 0xcf, 0x18, 0x0f, 0x0f, 0xd1, 0xb8, + 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x88, 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x80, + 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x78, 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x70, + 0xce, 0x74, 0x86, 0x00, 0xed, 0x68, 0xc4, 0xde, 0x3f, 0x00, 0xec, 0xd9, + 0x87, 0x00, 0xea, 0x30, 0x46, 0x00, 0x8b, 0x43, 0x98, 0x95, 0xca, 0xa8, + 0x14, 0x08, 0x3d, 0x08, 0xca, 0xa8, 0x14, 0x08, 0x3c, 0xe0, 0xcc, 0x23, + 0x3f, 0x00, 0xed, 0x39, 0xc9, 0xab, 0xb5, 0x00, 0x15, 0xb0, 0xca, 0x1f, + 0x59, 0x08, 0x3c, 0xa0, 0xc9, 0xaa, 0xcb, 0x08, 0x3c, 0xe8, 0xc9, 0xa9, + 0x2d, 0x08, 0x3c, 0x68, 0xc4, 0x00, 0x32, 0x08, 0x3c, 0x49, 0xce, 0x01, + 0x19, 0x08, 0x3c, 0x40, 0xc8, 0x4e, 0x93, 0x05, 0x38, 0x59, 0xd2, 0x4e, + 0x89, 0x05, 0x38, 0x80, 0xc4, 0x01, 0x9b, 0x00, 0x17, 0x88, 0xc8, 0x4e, + 0x93, 0x05, 0x38, 0x51, 0xd2, 0x4e, 0x89, 0x05, 0x38, 0x78, 0xcc, 0x1f, + 0x0c, 0x00, 0x17, 0xa9, 0xcc, 0x83, 0x0d, 0x00, 0x17, 0xb0, 0xc3, 0x11, + 0x7e, 0x0e, 0xbe, 0x11, 0xc5, 0xd8, 0x8f, 0x0e, 0xbd, 0xc0, 0xc3, 0x11, + 0x7e, 0x0e, 0xbd, 0x41, 0xc5, 0xd8, 0x8f, 0x0e, 0xbc, 0xf0, 0xc7, 0x00, + 0x90, 0x0e, 0xbd, 0x08, 0xc2, 0x02, 0xae, 0x0e, 0x8f, 0x39, 0xc4, 0x03, + 0xc8, 0x0e, 0x8f, 0x30, 0xc4, 0x2c, 0x0d, 0x0e, 0x8e, 0x31, 0xc5, 0x02, + 0xc2, 0x0e, 0x8d, 0xf1, 0xc5, 0x01, 0xfc, 0x0e, 0x8d, 0xe8, 0xc4, 0x2c, + 0x0d, 0x0e, 0x8e, 0x21, 0xc5, 0x02, 0xc2, 0x0e, 0x8d, 0xd1, 0xc5, 0x01, + 0xfc, 0x0e, 0x8d, 0xc8, 0x49, 0xaf, 0xd2, 0xc3, 0x98, 0xa4, 0x46, 0x67, + 0x3c, 0x43, 0x98, 0xb0, 0xd0, 0x5b, 0x02, 0x0e, 0x88, 0xe1, 0xca, 0x74, + 0x98, 0x0e, 0x88, 0xd8, 0x4c, 0x7e, 0x07, 0x43, 0x98, 0xbc, 0xcd, 0x7e, + 0x07, 0x0e, 0x8e, 0x48, 0xc5, 0x02, 0xc2, 0x0e, 0x8a, 0xa9, 0xc5, 0x01, + 0xfc, 0x0e, 0x8a, 0xa0, 0x43, 0x11, 0x49, 0xc3, 0x98, 0xc8, 0x45, 0x11, + 0x17, 0xc3, 0x98, 0xda, 0x46, 0x00, 0x2c, 0xc3, 0x98, 0xe6, 0x45, 0x00, + 0x49, 0x43, 0x98, 0xf2, 0x15, 0xc3, 0x98, 0xfe, 0xc8, 0xbe, 0xfa, 0x0e, + 0x8d, 0x61, 0xc6, 0xcd, 0x9d, 0x0e, 0x8d, 0x59, 0x42, 0x00, 0x58, 0xc3, + 0x99, 0x14, 0x16, 0xc3, 0x99, 0x26, 0xc4, 0x93, 0xd1, 0x0e, 0x8c, 0x49, + 0x42, 0x01, 0x09, 0xc3, 0x99, 0x30, 0xc3, 0x07, 0x30, 0x0e, 0x8c, 0x31, + 0xc5, 0xdb, 0x69, 0x0e, 0x8c, 0x11, 0x03, 0xc3, 0x99, 0x3c, 0xc7, 0xc2, + 0x73, 0x0e, 0x8b, 0xfa, 0x03, 0x99, 0x4b, 0xc2, 0x00, 0xfa, 0x0e, 0x8d, + 0xc3, 0x03, 0x99, 0x51, 0x87, 0x0e, 0x8a, 0xe0, 0xa0, 0x0e, 0x8b, 0x61, + 0x9f, 0x0e, 0x8b, 0x59, 0x9e, 0x0e, 0x8b, 0x50, 0xa0, 0x0e, 0x88, 0x79, + 0x9f, 0x0e, 0x88, 0x71, 0x9e, 0x0e, 0x88, 0x68, 0x12, 0xc3, 0x99, 0x57, + 0xc4, 0xe3, 0xab, 0x00, 0xff, 0xd9, 0xc5, 0x28, 0x47, 0x00, 0xff, 0xd1, + 0xc5, 0x6c, 0xa6, 0x00, 0xfb, 0x4b, 0x03, 0x99, 0x66, 0xc5, 0x63, 0xdc, + 0x00, 0x1c, 0x78, 0xc4, 0xe3, 0xab, 0x00, 0xff, 0xc9, 0xc5, 0x28, 0x47, + 0x00, 0xff, 0xc1, 0xc5, 0x6c, 0xa6, 0x00, 0xfa, 0x4b, 0x03, 0x99, 0x6c, + 0xc5, 0xd8, 0xc1, 0x00, 0xfa, 0x43, 0x03, 0x99, 0x72, 0xc5, 0x63, 0xdc, + 0x00, 0x1c, 0x60, 0xc4, 0x28, 0x48, 0x00, 0xff, 0x51, 0xc5, 0xd6, 0x41, + 0x00, 0xff, 0x40, 0xc4, 0x59, 0x13, 0x00, 0xfa, 0xcb, 0x03, 0x99, 0x78, + 0xc8, 0x63, 0xd3, 0x00, 0x1d, 0x58, 0xc4, 0x28, 0x48, 0x00, 0xfe, 0xd1, + 0xc5, 0xd6, 0x41, 0x00, 0xfe, 0xc0, 0xc4, 0x59, 0x13, 0x00, 0xf9, 0xcb, + 0x03, 0x99, 0x7e, 0xc8, 0x63, 0xd3, 0x00, 0x1d, 0x50, 0x45, 0x02, 0x9a, + 0x43, 0x99, 0x84, 0x12, 0xc3, 0x99, 0x96, 0xc4, 0xe3, 0xab, 0x00, 0xfe, + 0x59, 0xc5, 0x28, 0x47, 0x00, 0xfe, 0x51, 0xc5, 0x6c, 0xa6, 0x00, 0xf9, + 0x4b, 0x03, 0x99, 0xa5, 0xc5, 0x63, 0xdc, 0x00, 0x1c, 0x48, 0xc4, 0xe3, + 0xab, 0x00, 0xfe, 0x49, 0xc5, 0x28, 0x47, 0x00, 0xfe, 0x41, 0xc5, 0x6c, + 0xa6, 0x00, 0xf8, 0xcb, 0x03, 0x99, 0xab, 0xc5, 0xd8, 0xc1, 0x00, 0xf8, + 0xc3, 0x03, 0x99, 0xb1, 0xc5, 0x63, 0xdc, 0x00, 0x1c, 0x40, 0x12, 0xc3, + 0x99, 0xb7, 0xc4, 0xe3, 0xab, 0x00, 0xfd, 0xd9, 0x18, 0xc3, 0x99, 0xc6, + 0xc6, 0x60, 0xb1, 0x00, 0xfd, 0xc9, 0xc5, 0x6c, 0xa6, 0x00, 0xf8, 0x4b, + 0x03, 0x99, 0xd2, 0xc5, 0x63, 0xdc, 0x00, 0x1c, 0x30, 0x12, 0xc3, 0x99, + 0xd8, 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0xeb, 0x03, 0x99, 0xea, 0xcd, 0x4a, + 0x68, 0x00, 0xff, 0x99, 0xc5, 0x28, 0x47, 0x00, 0xfb, 0xe3, 0x03, 0x99, + 0xf0, 0xc5, 0x6c, 0xa6, 0x00, 0xfb, 0x0b, 0x03, 0x99, 0xf6, 0xc5, 0x63, + 0xdc, 0x00, 0x1e, 0x70, 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0xc9, 0xc5, 0x28, + 0x47, 0x00, 0xfb, 0xc1, 0xc5, 0x6c, 0xa6, 0x00, 0xfa, 0x0b, 0x03, 0x99, + 0xfc, 0xc5, 0xd8, 0xc1, 0x00, 0xfa, 0x03, 0x03, 0x9a, 0x02, 0xc5, 0x63, + 0xdc, 0x00, 0x1e, 0x60, 0xc8, 0x63, 0xd3, 0x00, 0x1e, 0x5b, 0x03, 0x9a, + 0x08, 0xc4, 0x59, 0x13, 0x00, 0xfa, 0x8a, 0x03, 0x9a, 0x0e, 0xca, 0x94, + 0x91, 0x00, 0xff, 0x31, 0xc4, 0x7a, 0x04, 0x00, 0xfa, 0x82, 0x03, 0x9a, + 0x14, 0xc5, 0xd6, 0x41, 0x00, 0xff, 0x01, 0xc4, 0x28, 0x48, 0x00, 0xfb, + 0xd0, 0xc8, 0x63, 0xd3, 0x00, 0x1e, 0x53, 0x03, 0x9a, 0x1a, 0xc4, 0x59, + 0x13, 0x00, 0xf9, 0x8a, 0x03, 0x9a, 0x20, 0xca, 0x94, 0x91, 0x00, 0xfe, + 0xb1, 0xc4, 0x7a, 0x04, 0x00, 0xf9, 0x82, 0x03, 0x9a, 0x26, 0xc5, 0xd6, + 0x41, 0x00, 0xfe, 0x81, 0xc4, 0x28, 0x48, 0x00, 0xfb, 0xb0, 0x12, 0xc3, + 0x9a, 0x2c, 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0xab, 0x03, 0x9a, 0x3e, 0xcd, + 0x4a, 0x68, 0x00, 0xfe, 0x19, 0xc5, 0x28, 0x47, 0x00, 0xfb, 0xa3, 0x03, + 0x9a, 0x44, 0xc5, 0x6c, 0xa6, 0x00, 0xf9, 0x0b, 0x03, 0x9a, 0x4a, 0xc5, + 0x63, 0xdc, 0x00, 0x1d, 0x70, 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0x99, 0xc5, + 0x28, 0x47, 0x00, 0xfb, 0x91, 0xc5, 0x6c, 0xa6, 0x00, 0xf8, 0x8b, 0x03, + 0x9a, 0x50, 0xc5, 0xd8, 0xc1, 0x00, 0xf8, 0x83, 0x03, 0x9a, 0x56, 0xc5, + 0x63, 0xdc, 0x00, 0x1d, 0x68, 0x12, 0xc3, 0x9a, 0x5c, 0xc4, 0xe3, 0xab, + 0x00, 0xfb, 0x8b, 0x03, 0x9a, 0x6e, 0xcd, 0x4a, 0x68, 0x00, 0xfd, 0x99, + 0x18, 0xc3, 0x9a, 0x74, 0xc6, 0x60, 0xb1, 0x00, 0xfd, 0x89, 0xc5, 0x6c, + 0xa6, 0x00, 0xf8, 0x0b, 0x03, 0x9a, 0x83, 0xc5, 0x63, 0xdc, 0x00, 0x1d, + 0x60, 0xc7, 0xb9, 0xdb, 0x08, 0x0a, 0x59, 0xc7, 0x67, 0xc7, 0x08, 0x0a, + 0x90, 0xc7, 0x0d, 0x04, 0x08, 0x0a, 0x2b, 0x03, 0x9a, 0x89, 0x16, 0xc3, + 0x9a, 0x8d, 0xc7, 0x67, 0xc7, 0x08, 0x0a, 0x78, 0x16, 0xc3, 0x9a, 0x9c, + 0xc7, 0x67, 0xc7, 0x08, 0x0a, 0x88, 0xc7, 0x0d, 0x04, 0x08, 0x0b, 0x51, + 0xc8, 0x4b, 0x94, 0x08, 0x0b, 0x88, 0xc4, 0x0d, 0x0e, 0x08, 0x0b, 0x29, + 0xcb, 0x13, 0xfa, 0x08, 0x0b, 0x58, 0xc8, 0x4b, 0x94, 0x08, 0x0b, 0x91, + 0xc7, 0x0d, 0x04, 0x08, 0x0b, 0x70, 0xc8, 0x0d, 0x03, 0x08, 0x0b, 0x68, + 0xcf, 0x6b, 0x25, 0x08, 0x0b, 0x38, 0xc2, 0xe5, 0xfd, 0x08, 0x1e, 0x68, + 0x11, 0xc3, 0x9a, 0xab, 0xc4, 0x69, 0xaa, 0x0e, 0x7d, 0xca, 0x03, 0x9a, + 0xbd, 0xd4, 0x3e, 0xe4, 0x00, 0xef, 0xf9, 0xd2, 0x4d, 0x8d, 0x00, 0x1a, + 0xb0, 0xc2, 0x01, 0x2d, 0x09, 0x19, 0x99, 0xc3, 0x02, 0x2c, 0x09, 0x19, + 0x90, 0xc9, 0x40, 0xaa, 0x09, 0x12, 0xe8, 0xca, 0x9c, 0x98, 0x09, 0x10, + 0x79, 0xc9, 0x40, 0xaa, 0x09, 0x10, 0x70, 0xc8, 0xaa, 0xf0, 0x09, 0x1c, + 0x51, 0xc4, 0x58, 0xf5, 0x09, 0x10, 0x08, 0xa0, 0x09, 0x10, 0x33, 0x03, + 0x9a, 0xc3, 0x9f, 0x09, 0x10, 0x28, 0xcc, 0x36, 0x65, 0x09, 0x27, 0xa9, + 0xc3, 0x36, 0x6e, 0x09, 0x27, 0xa0, 0xc9, 0xab, 0x25, 0x09, 0x0e, 0x38, + 0x94, 0x09, 0x0e, 0x28, 0xc8, 0x65, 0xd0, 0x09, 0x0f, 0x39, 0x83, 0x09, + 0x0f, 0x30, 0xc2, 0x38, 0xb6, 0x09, 0x0f, 0x19, 0x89, 0x09, 0x0f, 0x10, + 0xc2, 0x5d, 0xd4, 0x09, 0x0e, 0xfb, 0x03, 0x9a, 0xc9, 0x4e, 0x72, 0x8e, + 0xc3, 0x9a, 0xcf, 0xca, 0xa6, 0x16, 0x09, 0x0e, 0xe0, 0xc8, 0xa7, 0xb2, + 0x09, 0x0e, 0xc8, 0x8e, 0x09, 0x0e, 0xb8, 0x8e, 0x09, 0x0e, 0x93, 0x03, + 0x9a, 0xdb, 0xa0, 0x09, 0x0e, 0x88, 0x90, 0x09, 0x0e, 0x80, 0x46, 0x25, + 0xd4, 0x43, 0x9a, 0xe1, 0x8e, 0x09, 0x0e, 0x48, 0xc3, 0x1d, 0x23, 0x09, + 0x0d, 0xe1, 0xc3, 0x1a, 0xf4, 0x09, 0x0d, 0xd9, 0xca, 0xa4, 0x4a, 0x09, + 0x0d, 0xd0, 0x8f, 0x09, 0x26, 0x39, 0x86, 0x09, 0x07, 0x38, 0xc9, 0xab, + 0xd0, 0x09, 0x07, 0x30, 0xc2, 0x04, 0x2b, 0x09, 0x26, 0x31, 0xc2, 0x8d, + 0xc6, 0x09, 0x26, 0x28, 0xca, 0x51, 0xd4, 0x09, 0x26, 0x08, 0x83, 0x09, + 0x25, 0xf1, 0xcc, 0x81, 0x15, 0x09, 0x06, 0x88, 0xc8, 0xaa, 0xef, 0x09, + 0x06, 0x98, 0x46, 0x25, 0xd4, 0x43, 0x9a, 0xed, 0xc7, 0x25, 0xd4, 0x09, + 0x06, 0x78, 0xc6, 0x45, 0xad, 0x09, 0x25, 0xc9, 0xc8, 0x6a, 0x1e, 0x09, + 0x25, 0xc0, 0xc4, 0x39, 0xc8, 0x09, 0x25, 0xb9, 0xc9, 0xa6, 0x49, 0x09, + 0x06, 0x28, 0xc9, 0xab, 0x37, 0x09, 0x05, 0xf0, 0x45, 0x03, 0x55, 0xc3, + 0x9a, 0xf9, 0x46, 0x1f, 0x67, 0xc3, 0x9b, 0x05, 0x48, 0x0b, 0xc8, 0xc3, + 0x9b, 0x1b, 0xc7, 0x27, 0xb2, 0x0e, 0xc7, 0xd1, 0x45, 0x13, 0x6f, 0xc3, + 0x9b, 0x30, 0xc4, 0x0e, 0x65, 0x0e, 0xc7, 0xb0, 0x46, 0x0e, 0xce, 0xc3, + 0x9b, 0x42, 0x14, 0xc3, 0x9b, 0x64, 0xc6, 0x04, 0xcb, 0x0e, 0xc0, 0x73, + 0x03, 0x9b, 0x70, 0xc6, 0x58, 0xac, 0x0e, 0xc0, 0x5b, 0x03, 0x9b, 0x74, + 0xd0, 0x58, 0xa2, 0x0e, 0xc0, 0x9b, 0x03, 0x9b, 0x78, 0xc4, 0x18, 0xf2, + 0x0e, 0xc0, 0x33, 0x03, 0x9b, 0x7e, 0xc6, 0xcc, 0x41, 0x0e, 0xc0, 0x50, + 0xca, 0x13, 0x91, 0x0e, 0xc6, 0x69, 0xcd, 0x3a, 0x9e, 0x0e, 0xc6, 0x40, + 0xc6, 0x13, 0x95, 0x0e, 0xc6, 0x59, 0x47, 0xc6, 0xcc, 0xc3, 0x9b, 0x84, + 0x05, 0xc3, 0x9b, 0x90, 0xcf, 0x64, 0xb3, 0x0e, 0xc1, 0x80, 0xcb, 0x4d, + 0x82, 0x0e, 0xc6, 0x48, 0x00, 0x43, 0x9b, 0x9c, 0xc6, 0x0e, 0xcd, 0x0e, + 0xc4, 0xe0, 0xc4, 0x0e, 0xcf, 0x0e, 0xc4, 0xd1, 0xcc, 0x86, 0xe5, 0x0e, + 0xc4, 0xc8, 0x00, 0x43, 0x9b, 0xa8, 0xcb, 0x4d, 0x82, 0x0e, 0xc3, 0x1a, + 0x03, 0x9b, 0xb4, 0xca, 0x4d, 0x83, 0x0e, 0xc2, 0xf1, 0xd3, 0x46, 0x57, + 0x0e, 0xc2, 0x6a, 0x03, 0x9b, 0xba, 0x00, 0x43, 0x9b, 0xbe, 0x00, 0x43, + 0x9b, 0xd9, 0x00, 0x43, 0x9b, 0xee, 0xc4, 0x0c, 0x4d, 0x0e, 0xc6, 0x10, + 0xc6, 0x13, 0x95, 0x0e, 0xc5, 0x41, 0xc4, 0x00, 0x9d, 0x0e, 0xc4, 0x48, + 0xc4, 0x0c, 0x4d, 0x0e, 0xc4, 0xf0, 0xc5, 0x0e, 0xce, 0x0e, 0xc7, 0x83, + 0x03, 0x9b, 0xfa, 0xc6, 0x58, 0xac, 0x0e, 0xc6, 0xd9, 0xcb, 0x13, 0x90, + 0x0e, 0xc6, 0x09, 0x47, 0x04, 0xcb, 0x43, 0x9b, 0xfe, 0xc5, 0x06, 0x82, + 0x0e, 0xc5, 0x13, 0x03, 0x9c, 0x0d, 0xc5, 0x0e, 0xce, 0x0e, 0xc4, 0xd8, + 0xcf, 0x69, 0xdb, 0x0e, 0xc4, 0x18, 0xc8, 0xbc, 0x62, 0x0e, 0xc4, 0x09, + 0x46, 0x0e, 0xce, 0x43, 0x9c, 0x13, 0x00, 0x43, 0x9c, 0x1f, 0x00, 0x43, + 0x9c, 0x2b, 0xc7, 0x27, 0xb2, 0x0e, 0xc3, 0x99, 0xc4, 0x0e, 0xe2, 0x0e, + 0xc3, 0x78, 0x00, 0x43, 0x9c, 0x3a, 0xc5, 0x05, 0x74, 0x0e, 0xc2, 0xa0, + 0xc5, 0x18, 0xf1, 0x0e, 0xc6, 0xa8, 0xcb, 0x13, 0x90, 0x0e, 0xc5, 0xd9, + 0xc6, 0x04, 0xcb, 0x0e, 0xc0, 0x7b, 0x03, 0x9c, 0x46, 0xc5, 0x58, 0xac, + 0x0e, 0xc0, 0x69, 0xc4, 0x18, 0xf2, 0x0e, 0xc0, 0x38, 0xc5, 0xdd, 0x17, + 0x0e, 0xcd, 0x69, 0xca, 0x9e, 0x8c, 0x0e, 0xcd, 0x30, 0xc5, 0x17, 0x14, + 0x0e, 0xcc, 0x73, 0x03, 0x9c, 0x4a, 0xc6, 0x01, 0xdb, 0x0e, 0xcc, 0x69, + 0xc5, 0x03, 0x13, 0x0e, 0xcc, 0x60, 0xc6, 0x01, 0xdb, 0x0e, 0xcc, 0x89, + 0xc5, 0x03, 0x13, 0x0e, 0xcc, 0x80, 0xc2, 0x00, 0x15, 0x0e, 0xcc, 0x58, + 0xcb, 0x57, 0xc7, 0x0f, 0xc1, 0x79, 0xca, 0xa0, 0x08, 0x0f, 0xc1, 0x59, + 0x49, 0xa8, 0xdc, 0xc3, 0x9c, 0x50, 0xd8, 0x24, 0xb3, 0x01, 0x5b, 0xe9, + 0xcc, 0x84, 0x09, 0x0f, 0xc1, 0x19, 0xcc, 0x82, 0x1d, 0x0f, 0xc1, 0x39, + 0xd0, 0x57, 0xc2, 0x0f, 0xc1, 0x98, 0xe0, 0x09, 0x47, 0x01, 0x5c, 0x18, + 0xcf, 0x2c, 0x35, 0x01, 0x5b, 0xe1, 0xd1, 0x01, 0x68, 0x01, 0x5b, 0xe0, + 0xc7, 0x09, 0x0d, 0x01, 0x5d, 0x29, 0xc9, 0x03, 0xc8, 0x01, 0x5d, 0x38, + 0xcf, 0x2c, 0x35, 0x01, 0x48, 0xb9, 0xd6, 0x2d, 0x62, 0x01, 0x48, 0xc0, + 0xc8, 0x62, 0x44, 0x01, 0x4b, 0x61, 0xdd, 0x10, 0xdd, 0x01, 0x4b, 0x40, + 0xe0, 0x06, 0xe7, 0x01, 0x4b, 0x20, 0xcc, 0x00, 0xfb, 0x07, 0xe8, 0x51, + 0xcb, 0x10, 0xb5, 0x07, 0xe9, 0x70, 0x45, 0x19, 0x60, 0xc3, 0x9c, 0x5c, + 0xce, 0x43, 0x77, 0x07, 0xed, 0x50, 0xcc, 0x10, 0xb4, 0x07, 0xeb, 0x59, + 0xca, 0x26, 0xf7, 0x07, 0xeb, 0x50, 0xca, 0x26, 0xf7, 0x07, 0xeb, 0x61, + 0xcc, 0x10, 0xb4, 0x07, 0xeb, 0x68, 0xcc, 0x10, 0xb4, 0x07, 0xeb, 0x29, + 0xca, 0x26, 0xf7, 0x07, 0xeb, 0x20, 0xdc, 0x14, 0x69, 0x07, 0xea, 0x61, + 0xd2, 0x49, 0x9d, 0x07, 0xef, 0xd0, 0xe0, 0x00, 0xe7, 0x07, 0xef, 0x80, + 0xca, 0x26, 0xf7, 0x07, 0xeb, 0x89, 0xcc, 0x10, 0xb4, 0x07, 0xeb, 0x90, + 0xca, 0x26, 0xf7, 0x07, 0xea, 0x89, 0xcc, 0x10, 0xb4, 0x07, 0xea, 0x90, + 0xca, 0x26, 0xf7, 0x07, 0xe3, 0x49, 0xcd, 0x00, 0xfa, 0x07, 0xe0, 0x20, + 0xca, 0x26, 0xf7, 0x07, 0xdf, 0xa9, 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0xa0, + 0x48, 0x06, 0x5f, 0xc3, 0x9c, 0x68, 0xca, 0x26, 0xf7, 0x07, 0xdf, 0x59, + 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0x50, 0xca, 0x26, 0xf7, 0x07, 0xdf, 0x69, + 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0x60, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x11, + 0xcb, 0x10, 0xb5, 0x07, 0xe4, 0xd0, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x09, + 0xcb, 0x10, 0xb5, 0x07, 0xe4, 0xc0, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x01, + 0xcc, 0x10, 0xb4, 0x07, 0xe4, 0xd8, 0xcb, 0x10, 0xb5, 0x07, 0xdf, 0xc1, + 0xcc, 0x00, 0xfb, 0x07, 0xdf, 0xb0, 0xca, 0x26, 0xf7, 0x07, 0xeb, 0xf1, + 0xcc, 0x10, 0xb4, 0x07, 0xed, 0xa0, 0xcf, 0x0e, 0x7d, 0x00, 0x31, 0xf9, + 0xcd, 0x04, 0xe7, 0x00, 0x31, 0xf0, 0xca, 0x09, 0x9d, 0x00, 0x3b, 0xb9, + 0x16, 0x43, 0x9c, 0x74, 0xc5, 0x05, 0x02, 0x00, 0x35, 0x1b, 0x03, 0x9c, + 0x80, 0xcb, 0x98, 0xb0, 0x00, 0x35, 0x10, 0x4a, 0x0e, 0x7d, 0xc3, 0x9c, + 0x86, 0xcd, 0x04, 0xfa, 0x00, 0x3b, 0x00, 0xcf, 0x0e, 0x7d, 0x00, 0x35, + 0xa1, 0xcd, 0x04, 0xfa, 0x00, 0x35, 0x90, 0xd7, 0x2b, 0x3a, 0x00, 0x46, + 0x39, 0x98, 0x00, 0x35, 0xa8, 0xc8, 0xa7, 0x26, 0x00, 0x45, 0x31, 0xc7, + 0x16, 0x16, 0x00, 0x35, 0xb0, 0xc5, 0x05, 0x02, 0x00, 0x35, 0xc1, 0xc5, + 0x00, 0xd4, 0x00, 0x35, 0xb8, 0xc5, 0x05, 0x02, 0x00, 0x46, 0x31, 0xc5, + 0x00, 0xd4, 0x00, 0x46, 0x28, 0xc5, 0x05, 0x02, 0x00, 0x45, 0x99, 0xc5, + 0x00, 0xd4, 0x00, 0x35, 0x01, 0xd8, 0x26, 0x03, 0x00, 0x3a, 0xf0, 0xc5, + 0x00, 0xd4, 0x00, 0x3a, 0xe9, 0xd0, 0x25, 0x7b, 0x00, 0x3a, 0xf8, 0x49, + 0xb2, 0xab, 0xc3, 0x9c, 0x92, 0xd3, 0x45, 0x3a, 0x00, 0x43, 0x93, 0x03, + 0x9c, 0xba, 0xc9, 0x16, 0x14, 0x00, 0x43, 0xd1, 0xd2, 0x4e, 0x53, 0x00, + 0x43, 0x99, 0x4b, 0x5e, 0x02, 0xc3, 0x9c, 0xc0, 0x46, 0x08, 0x09, 0xc3, + 0x9c, 0xcc, 0xcb, 0x82, 0x59, 0x00, 0x31, 0x13, 0x03, 0x9c, 0xde, 0x5d, + 0x10, 0x12, 0x43, 0x9c, 0xe2, 0x00, 0x43, 0x9c, 0xee, 0xcd, 0x00, 0xfa, + 0x07, 0xf7, 0x79, 0xca, 0x26, 0xf7, 0x07, 0xf7, 0x80, 0x48, 0x04, 0xe7, + 0xc3, 0x9c, 0xfa, 0x4a, 0x0e, 0x7d, 0x43, 0x9d, 0x06, 0x44, 0x05, 0x18, + 0xc3, 0x9d, 0x18, 0x16, 0xc3, 0x9d, 0x24, 0xc4, 0x00, 0x9d, 0x00, 0x35, + 0x58, 0xcb, 0x10, 0xb5, 0x07, 0xf6, 0xd9, 0xcc, 0x00, 0xfb, 0x07, 0xf6, + 0xc8, 0xcb, 0x10, 0xb5, 0x07, 0xdc, 0x01, 0xcc, 0x00, 0xfb, 0x07, 0xdb, + 0xf0, 0xcb, 0x10, 0xb5, 0x07, 0xdb, 0x01, 0xcc, 0x00, 0xfb, 0x07, 0xda, + 0xf0, 0x98, 0x00, 0x45, 0xf9, 0xc9, 0xad, 0xda, 0x00, 0x45, 0xc0, 0x00, + 0x43, 0x9d, 0x30, 0xcb, 0x10, 0xb5, 0x07, 0xdb, 0x21, 0xcc, 0x00, 0xfb, + 0x07, 0xdb, 0x10, 0xcd, 0x04, 0xe7, 0x00, 0x45, 0x19, 0x4a, 0x0e, 0x7d, + 0x43, 0x9d, 0x42, 0xcc, 0x00, 0xfb, 0x07, 0xf4, 0xc9, 0xcb, 0x10, 0xb5, + 0x07, 0xf4, 0xd8, 0x52, 0x16, 0x02, 0xc3, 0x9d, 0x4e, 0xcf, 0x67, 0xce, + 0x00, 0x36, 0x89, 0xc3, 0x14, 0xa7, 0x00, 0x36, 0x68, 0x00, 0x43, 0x9d, + 0x60, 0x45, 0x00, 0x8c, 0xc3, 0x9d, 0x70, 0xca, 0x26, 0xf7, 0x07, 0xdd, + 0x79, 0xcd, 0x00, 0xfa, 0x07, 0xdd, 0x70, 0x45, 0x03, 0x14, 0xc3, 0x9d, + 0x7f, 0xc5, 0x01, 0x74, 0x00, 0x3a, 0xd8, 0xc5, 0x00, 0xd4, 0x00, 0x34, + 0xb9, 0xd0, 0x25, 0x7b, 0x00, 0x3b, 0x58, 0xce, 0x16, 0x0f, 0x00, 0x34, + 0xb0, 0xca, 0xa6, 0xd4, 0x00, 0x45, 0xb1, 0x98, 0x00, 0x3a, 0xb2, 0x03, + 0x9d, 0x92, 0xdb, 0x16, 0x02, 0x00, 0x3a, 0xa1, 0x4a, 0x0e, 0x7d, 0x43, + 0x9d, 0x98, 0xcb, 0x10, 0xb5, 0x07, 0xda, 0xe1, 0xcc, 0x00, 0xfb, 0x07, + 0xda, 0xd0, 0xd2, 0x49, 0xc1, 0x00, 0x45, 0xa8, 0xc5, 0x05, 0x02, 0x00, + 0x45, 0x79, 0xc5, 0x00, 0xd4, 0x00, 0x34, 0xf0, 0xcf, 0x67, 0xce, 0x00, + 0x34, 0xdb, 0x03, 0x9d, 0xa7, 0xd8, 0x25, 0x73, 0x00, 0x3b, 0x68, 0xe0, + 0x0a, 0x67, 0x00, 0x3b, 0xe8, 0xe0, 0x02, 0x87, 0x00, 0x3b, 0x80, 0x16, + 0xc3, 0x9d, 0xad, 0x49, 0x1d, 0x6f, 0xc3, 0x9d, 0xb9, 0xcf, 0x3b, 0x79, + 0x00, 0x34, 0x81, 0xc9, 0x0e, 0x6e, 0x00, 0x34, 0x53, 0x03, 0x9d, 0xc3, + 0xc4, 0x00, 0x9d, 0x00, 0x34, 0x48, 0xcc, 0x00, 0xfb, 0x07, 0xf5, 0xe9, + 0xcb, 0x10, 0xb5, 0x07, 0xf5, 0xf8, 0xcd, 0x00, 0xfa, 0x07, 0xf4, 0x29, + 0xca, 0x26, 0xf7, 0x07, 0xf4, 0x30, 0xc5, 0x00, 0xd4, 0x00, 0x3b, 0x29, + 0xc5, 0x05, 0x02, 0x00, 0x3b, 0x30, 0xcb, 0x10, 0xb5, 0x07, 0xdc, 0xe1, + 0xcc, 0x00, 0xfb, 0x07, 0xdc, 0xd0, 0xcf, 0x0e, 0x7d, 0x00, 0x35, 0xe9, + 0xcd, 0x04, 0xfa, 0x00, 0x3b, 0x38, 0xc3, 0x02, 0x97, 0x00, 0x3b, 0x41, + 0x98, 0x00, 0x3b, 0x48, 0xcd, 0x00, 0xfa, 0x07, 0xdc, 0xf1, 0xca, 0x26, + 0xf7, 0x07, 0xdc, 0xf8, 0xd6, 0x31, 0x56, 0x00, 0x44, 0x51, 0x16, 0xc3, + 0x9d, 0xc9, 0xcb, 0x08, 0x09, 0x00, 0x34, 0x09, 0x46, 0x09, 0x3f, 0xc3, + 0x9d, 0xd5, 0x58, 0x24, 0x9b, 0x43, 0x9d, 0xdb, 0xcc, 0x00, 0xfb, 0x07, + 0xf5, 0x09, 0xcb, 0x10, 0xb5, 0x07, 0xf5, 0x18, 0xcb, 0x64, 0x7b, 0x07, + 0xdd, 0x69, 0x0b, 0xc3, 0x9d, 0xe5, 0xca, 0x26, 0xf7, 0x07, 0xdd, 0x58, + 0xcb, 0x64, 0x7b, 0x07, 0xdd, 0x49, 0x0b, 0xc3, 0x9d, 0xf1, 0xca, 0x26, + 0xf7, 0x07, 0xdd, 0x39, 0xd0, 0x5f, 0x02, 0x00, 0x36, 0x10, 0x00, 0x43, + 0x9d, 0xfd, 0xcf, 0x04, 0xd8, 0x0f, 0xdd, 0x23, 0x03, 0x9e, 0x09, 0xe0, + 0x04, 0xc7, 0x0f, 0xdd, 0x40, 0xcf, 0x04, 0xd8, 0x0f, 0xdd, 0x2b, 0x03, + 0x9e, 0x0f, 0xdf, 0x0c, 0xe1, 0x0f, 0xdd, 0x48, 0xd3, 0x45, 0x4d, 0x0f, + 0xd1, 0x89, 0xcf, 0x18, 0x0f, 0x0f, 0xd1, 0xe8, 0x96, 0x0b, 0x4b, 0xb8, + 0xc2, 0x10, 0x11, 0x0b, 0x47, 0xc8, 0xa5, 0x01, 0x45, 0xf9, 0xa4, 0x01, + 0x43, 0xfa, 0x03, 0x9e, 0x15, 0xa5, 0x01, 0x46, 0xf8, 0xa5, 0x01, 0x47, + 0x78, 0xa5, 0x01, 0x47, 0xb8, 0xa5, 0x01, 0x47, 0xd8, 0xa5, 0x01, 0x47, + 0xe8, 0xa5, 0x01, 0x47, 0xf0, 0xc7, 0x09, 0x0d, 0x01, 0x5d, 0x21, 0xc9, + 0x03, 0xc8, 0x01, 0x5d, 0x30, 0xcf, 0x2c, 0x35, 0x01, 0x5b, 0xd1, 0xd1, + 0x01, 0x68, 0x01, 0x5b, 0xd0, 0xcf, 0x2c, 0x35, 0x01, 0x59, 0xe1, 0xd6, + 0x2d, 0x62, 0x01, 0x59, 0xe8, 0xc8, 0x62, 0x44, 0x01, 0x4b, 0x51, 0xdf, + 0x09, 0x68, 0x01, 0x4b, 0x10, 0xcc, 0x51, 0x28, 0x0e, 0xf8, 0xa9, 0xc8, + 0x74, 0x8c, 0x00, 0x13, 0xd3, 0x03, 0x9e, 0x19, 0xcc, 0x1e, 0xc1, 0x05, + 0x5b, 0x41, 0xc4, 0x01, 0x23, 0x00, 0x13, 0xd9, 0xc4, 0x14, 0xa6, 0x01, + 0x63, 0xc8, 0x46, 0x00, 0x8b, 0x43, 0x9e, 0x1f, 0xcc, 0x23, 0x3f, 0x05, + 0x5a, 0x20, 0xc9, 0xa9, 0x2d, 0x00, 0x15, 0x78, 0xc9, 0x0e, 0x6e, 0x00, + 0xf7, 0x19, 0xc5, 0x1e, 0xc8, 0x00, 0xf7, 0x09, 0xca, 0x9e, 0x5a, 0x00, + 0xf6, 0xf9, 0xc5, 0x1f, 0x0c, 0x00, 0xf6, 0xe9, 0xc5, 0x31, 0xee, 0x00, + 0xf6, 0xd8, 0xc9, 0x0e, 0x6e, 0x00, 0xf6, 0xc9, 0xc5, 0x1e, 0xc8, 0x00, + 0xf6, 0xb9, 0xca, 0x9e, 0x5a, 0x00, 0xf6, 0xa9, 0xc5, 0x1f, 0x0c, 0x00, + 0xf6, 0x99, 0xc5, 0x31, 0xee, 0x00, 0xf6, 0x88, 0xc5, 0x05, 0x02, 0x00, + 0xf6, 0x69, 0xc5, 0x00, 0xd4, 0x00, 0x11, 0x7a, 0x03, 0x9e, 0x2b, 0xc5, + 0x31, 0xee, 0x00, 0x0a, 0x89, 0xc5, 0x1f, 0x0c, 0x00, 0x10, 0x68, 0xce, + 0x01, 0x19, 0x05, 0x5b, 0x31, 0xc4, 0x00, 0x32, 0x00, 0x15, 0x28, 0xc9, + 0x20, 0xb1, 0x00, 0x14, 0x20, 0xc3, 0x00, 0x33, 0x00, 0x14, 0x99, 0xc4, + 0x65, 0xe2, 0x00, 0x0f, 0x78, 0x44, 0x02, 0x9b, 0xc3, 0x9e, 0x31, 0xc5, + 0x05, 0x02, 0x00, 0xf0, 0xc8, 0xc5, 0x05, 0x02, 0x00, 0xf5, 0xc9, 0xc5, + 0x00, 0xd4, 0x00, 0x08, 0xb8, 0x45, 0x02, 0x9a, 0x43, 0x9e, 0x4f, 0xc9, + 0x64, 0x14, 0x00, 0xf2, 0xf9, 0xc7, 0x74, 0x8d, 0x00, 0x13, 0xe0, 0x42, + 0x00, 0x30, 0xc3, 0x9e, 0x5b, 0xca, 0x1f, 0x07, 0x00, 0x10, 0x88, 0xcb, + 0x4d, 0x16, 0x05, 0x5a, 0x49, 0xc6, 0xbd, 0xf4, 0x00, 0x0a, 0xb9, 0xc4, + 0x65, 0xe2, 0x00, 0x0a, 0xc8, 0x45, 0x02, 0x9a, 0x43, 0x9e, 0x6a, 0xc7, + 0x0e, 0x70, 0x00, 0xf7, 0x29, 0x45, 0x00, 0x5a, 0x43, 0x9e, 0x88, 0x00, + 0x43, 0x9e, 0x94, 0xc9, 0x9b, 0xdb, 0x00, 0xf3, 0xc9, 0xc5, 0x05, 0x02, + 0x00, 0xf3, 0xa8, 0xc6, 0x05, 0x01, 0x00, 0xf3, 0xb8, 0xc9, 0x0e, 0x6e, + 0x00, 0xf5, 0xb9, 0xc5, 0x1e, 0xc8, 0x00, 0xf5, 0xa9, 0xca, 0x9e, 0x5a, + 0x00, 0xf5, 0x99, 0xc5, 0x1f, 0x0c, 0x00, 0xf5, 0x89, 0xc5, 0x31, 0xee, + 0x00, 0xf5, 0x78, 0x45, 0x02, 0x9a, 0x43, 0x9e, 0xa0, 0x42, 0x00, 0x30, + 0xc3, 0x9e, 0xbe, 0xca, 0x1f, 0x07, 0x00, 0x10, 0x08, 0xcb, 0x98, 0x58, + 0x00, 0x0e, 0xf8, 0xcd, 0x61, 0x8b, 0x00, 0xf4, 0xd1, 0x43, 0x00, 0x75, + 0x43, 0x9e, 0xcd, 0xca, 0x25, 0x08, 0x05, 0x5a, 0xc9, 0xd2, 0x4c, 0x5b, + 0x05, 0x5a, 0xc0, 0xc5, 0x05, 0x02, 0x00, 0xf2, 0x39, 0xc5, 0x00, 0xd4, + 0x00, 0xf2, 0x28, 0xc9, 0x0e, 0x6e, 0x00, 0xf7, 0x81, 0xc5, 0x1e, 0xc8, + 0x00, 0xf7, 0x71, 0xca, 0x9e, 0x5a, 0x00, 0xf7, 0x61, 0xc5, 0x1f, 0x0c, + 0x00, 0xf7, 0x51, 0xc5, 0x31, 0xee, 0x00, 0xf7, 0x40, 0xc5, 0x31, 0xee, + 0x00, 0x0b, 0x81, 0xc5, 0x1f, 0x0c, 0x00, 0x10, 0xa0, 0xc5, 0x05, 0x02, + 0x00, 0xf3, 0x91, 0x44, 0x02, 0x9b, 0x43, 0x9e, 0xdc, 0xcb, 0x98, 0x58, + 0x00, 0x11, 0x80, 0xc9, 0x0e, 0x6e, 0x00, 0xf6, 0x31, 0xc5, 0x1e, 0xc8, + 0x00, 0xf6, 0x21, 0xca, 0x9e, 0x5a, 0x00, 0xf6, 0x11, 0xc5, 0x1f, 0x0c, + 0x00, 0xf6, 0x01, 0xc5, 0x31, 0xee, 0x00, 0xf5, 0xf0, 0xcb, 0x98, 0x58, + 0x00, 0x0f, 0x00, 0xcb, 0x98, 0x58, 0x00, 0xf2, 0xe0, 0x16, 0xc3, 0x9e, + 0xf4, 0xc6, 0x8e, 0xde, 0x00, 0x89, 0x11, 0xc5, 0x79, 0xf2, 0x00, 0x89, + 0x21, 0xc5, 0xdb, 0xff, 0x00, 0x89, 0x30, 0x87, 0x00, 0x8c, 0x28, 0xc4, + 0xad, 0x2b, 0x00, 0x8e, 0x61, 0xc5, 0x90, 0xe4, 0x06, 0xbe, 0xb0, 0xc4, + 0xad, 0x2b, 0x00, 0x8e, 0x99, 0xc5, 0x90, 0xe4, 0x00, 0x8e, 0xa1, 0xc6, + 0xc0, 0x7c, 0x06, 0xbe, 0xc9, 0xc7, 0xba, 0x7b, 0x06, 0xbe, 0xd0, 0x02, + 0x43, 0x9f, 0x00, 0xc4, 0xe4, 0x57, 0x01, 0x9f, 0xf0, 0xc3, 0x05, 0x14, + 0x01, 0x9b, 0x69, 0x16, 0xc3, 0x9f, 0x1e, 0xc4, 0x09, 0x9d, 0x01, 0x9b, + 0x80, 0xc4, 0x01, 0x23, 0x00, 0x15, 0xa9, 0xc8, 0x74, 0x8c, 0x08, 0x3d, + 0x32, 0x03, 0x9f, 0x2a, 0xc4, 0x23, 0x2e, 0x0e, 0x8b, 0x19, 0xc4, 0x2c, + 0x0d, 0x0e, 0x8a, 0x08, 0xc4, 0x23, 0x2e, 0x0e, 0x8b, 0x09, 0xc4, 0x2c, + 0x0d, 0x0e, 0x89, 0xf8, 0xa0, 0x0e, 0x8e, 0x71, 0x9f, 0x0e, 0x8e, 0x69, + 0x9e, 0x0e, 0x8e, 0x60, 0x46, 0x00, 0x2c, 0xc3, 0x9f, 0x30, 0xc5, 0x02, + 0xc2, 0x0e, 0x8a, 0x49, 0xc5, 0x01, 0xfc, 0x0e, 0x8a, 0x40, 0xc5, 0x02, + 0xc2, 0x0e, 0x8a, 0x79, 0xc5, 0x01, 0xfc, 0x0e, 0x8a, 0x70, 0xc5, 0x02, + 0xc2, 0x0e, 0x8a, 0x69, 0xc5, 0x01, 0xfc, 0x0e, 0x8a, 0x60, 0xc5, 0x02, + 0xc2, 0x0e, 0x8a, 0x59, 0xc5, 0x01, 0xfc, 0x0e, 0x8a, 0x50, 0xcd, 0x7f, + 0x18, 0x0e, 0x8d, 0x69, 0xc4, 0xe4, 0xd3, 0x0e, 0x8c, 0x41, 0x16, 0xc3, + 0x9f, 0x3c, 0xd0, 0x5f, 0x42, 0x0e, 0x8b, 0x30, 0xc6, 0xd1, 0xc3, 0x0e, + 0x8d, 0x51, 0xcb, 0x91, 0x62, 0x0e, 0x8c, 0x51, 0xc2, 0x00, 0x8d, 0x0e, + 0x8c, 0x28, 0x14, 0xc3, 0x9f, 0x48, 0xc5, 0xd9, 0xac, 0x0e, 0x8b, 0xe8, + 0xc2, 0x00, 0x7e, 0x0e, 0x8c, 0x39, 0x43, 0xe5, 0x96, 0x43, 0x9f, 0x54, + 0xc5, 0x09, 0x02, 0x0e, 0x8b, 0xdb, 0x03, 0x9f, 0x68, 0xcf, 0x65, 0x67, + 0x0e, 0x8b, 0x68, 0xc9, 0xab, 0x1c, 0x0e, 0x8c, 0x00, 0xc5, 0x5e, 0x2d, + 0x0e, 0x8e, 0x18, 0xcd, 0x42, 0x94, 0x00, 0xff, 0xe1, 0xc4, 0x7a, 0x04, + 0x00, 0xfb, 0x42, 0x03, 0x9f, 0x6e, 0x45, 0x02, 0x9a, 0x43, 0x9f, 0x74, + 0x45, 0x02, 0x9a, 0x43, 0x9f, 0x8a, 0x45, 0x02, 0x9a, 0x43, 0x9f, 0x96, + 0x45, 0x02, 0x9a, 0x43, 0x9f, 0xa2, 0x45, 0x02, 0x9a, 0x43, 0x9f, 0xb4, + 0xcb, 0x94, 0x90, 0x00, 0xf9, 0xf1, 0xc4, 0xe3, 0xab, 0x00, 0xf9, 0xe1, + 0xc5, 0x28, 0x47, 0x00, 0xf9, 0xd0, 0xcd, 0x42, 0x94, 0x00, 0xfe, 0x61, + 0xc4, 0x7a, 0x04, 0x00, 0xf9, 0x42, 0x03, 0x9f, 0xc6, 0x45, 0x02, 0x9a, + 0x43, 0x9f, 0xcc, 0x45, 0x02, 0x9a, 0x43, 0x9f, 0xe2, 0x45, 0x02, 0x9a, + 0x43, 0x9f, 0xee, 0xcd, 0x42, 0x94, 0x00, 0xfd, 0xe1, 0xc4, 0x7a, 0x04, + 0x00, 0xf8, 0x42, 0x03, 0x9f, 0xfa, 0xc4, 0x28, 0x48, 0x00, 0xfd, 0xd1, + 0xc5, 0xd6, 0x41, 0x00, 0xfd, 0xc0, 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x00, + 0xca, 0x94, 0x91, 0x00, 0xff, 0xb3, 0x03, 0xa0, 0x16, 0xc4, 0x7a, 0x04, + 0x00, 0xfb, 0x02, 0x03, 0xa0, 0x1c, 0xd2, 0x4a, 0x63, 0x00, 0xff, 0xa0, + 0xd2, 0x4a, 0x63, 0x00, 0xff, 0x90, 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x22, + 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x43, 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x4f, + 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x5b, 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x73, + 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x85, 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x97, + 0x45, 0x02, 0x9a, 0x43, 0xa0, 0xaf, 0x45, 0x02, 0x9a, 0x43, 0xa0, 0xc1, + 0xca, 0x94, 0x91, 0x00, 0xfe, 0x33, 0x03, 0xa0, 0xd3, 0xc4, 0x7a, 0x04, + 0x00, 0xf9, 0x02, 0x03, 0xa0, 0xd9, 0xd2, 0x4a, 0x63, 0x00, 0xfe, 0x20, + 0xd2, 0x4a, 0x63, 0x00, 0xfe, 0x10, 0x45, 0x02, 0x9a, 0x43, 0xa0, 0xdf, + 0x45, 0x02, 0x9a, 0x43, 0xa1, 0x00, 0x45, 0x02, 0x9a, 0x43, 0xa1, 0x0c, + 0xca, 0x94, 0x91, 0x00, 0xfd, 0xb3, 0x03, 0xa1, 0x18, 0xc4, 0x7a, 0x04, + 0x00, 0xf8, 0x02, 0x03, 0xa1, 0x1e, 0xd2, 0x4a, 0x63, 0x00, 0xfd, 0xa0, + 0xc4, 0x28, 0x48, 0x00, 0xfb, 0x83, 0x03, 0xa1, 0x24, 0xc5, 0xd6, 0x41, + 0x00, 0xfd, 0x80, 0x45, 0x02, 0x9a, 0x43, 0xa1, 0x2a, 0x00, 0x43, 0xa1, + 0x4b, 0xc7, 0x33, 0xe6, 0x08, 0x0a, 0x33, 0x03, 0xa1, 0x57, 0xc6, 0xb9, + 0xdc, 0x08, 0x0a, 0x40, 0xc7, 0x33, 0xe6, 0x08, 0x0a, 0x3b, 0x03, 0xa1, + 0x5d, 0xc6, 0xb9, 0xdc, 0x08, 0x0a, 0x50, 0xca, 0xa6, 0x70, 0x0e, 0x7d, + 0xe3, 0x03, 0xa1, 0x63, 0xc9, 0x92, 0x8d, 0x0e, 0x7d, 0xd2, 0x03, 0xa1, + 0x69, 0xd6, 0x2d, 0x0a, 0x0e, 0x7d, 0xb8, 0xc9, 0x40, 0xaa, 0x09, 0x10, + 0x38, 0xca, 0xa3, 0x1e, 0x09, 0x0f, 0x00, 0xc4, 0x58, 0xf5, 0x09, 0x0e, + 0xf1, 0xca, 0x9e, 0x46, 0x09, 0x0e, 0xe8, 0xcf, 0x6a, 0xbc, 0x09, 0x0e, + 0x98, 0xc2, 0x10, 0x37, 0x09, 0x0e, 0x71, 0xc2, 0x00, 0xd0, 0x09, 0x0e, + 0x68, 0xc2, 0x02, 0x6f, 0x09, 0x25, 0xe9, 0xc2, 0x01, 0xdd, 0x09, 0x25, + 0xe0, 0xd4, 0x3a, 0x98, 0x0e, 0xc8, 0x11, 0xcb, 0x92, 0xa1, 0x0e, 0xc7, + 0xf8, 0xcc, 0x18, 0xdb, 0x0e, 0xc8, 0x09, 0x16, 0xc3, 0xa1, 0x6f, 0xc9, + 0xad, 0x9b, 0x0e, 0xc4, 0x99, 0xca, 0xa1, 0x70, 0x0e, 0xc0, 0x40, 0xcb, + 0x13, 0x90, 0x0e, 0xc7, 0xe9, 0xcb, 0x13, 0x89, 0x0e, 0xc7, 0xe1, 0xcc, + 0x85, 0x95, 0x0e, 0xc7, 0xda, 0x03, 0xa1, 0x7b, 0xc4, 0x18, 0xf2, 0x0e, + 0xc7, 0xc9, 0xc9, 0x13, 0x9c, 0x0e, 0xc7, 0xc1, 0xc8, 0x1e, 0x56, 0x0e, + 0xc7, 0xb8, 0x05, 0xc3, 0xa1, 0x81, 0xc4, 0x01, 0x23, 0x0e, 0xc7, 0x33, + 0x03, 0xa1, 0x8e, 0x4e, 0x6b, 0xd4, 0xc3, 0xa1, 0x94, 0xc4, 0x0e, 0xe2, + 0x0e, 0xc6, 0xe3, 0x03, 0xa1, 0xa0, 0x47, 0xc6, 0xcc, 0x43, 0xa1, 0xa4, + 0xca, 0x13, 0x91, 0x0e, 0xc5, 0xd1, 0xcd, 0x3a, 0x9e, 0x0e, 0xc0, 0x48, + 0x00, 0x43, 0xa1, 0xb0, 0x00, 0x43, 0xa1, 0xe5, 0x47, 0x0e, 0xcd, 0x43, + 0xa1, 0xf4, 0xcc, 0x8a, 0x5d, 0x0e, 0xc0, 0xe8, 0xc8, 0x64, 0xba, 0x0e, + 0xc2, 0x11, 0x4a, 0x9b, 0x1c, 0x43, 0xa2, 0x00, 0x4d, 0x76, 0xc4, 0xc3, + 0xa2, 0x0c, 0xce, 0x70, 0xdc, 0x0e, 0xc1, 0xb0, 0xcf, 0x3a, 0x9d, 0x0e, + 0xc5, 0xb1, 0xc9, 0x13, 0x9c, 0x0e, 0xc5, 0xa8, 0xce, 0x70, 0xea, 0x0e, + 0xc4, 0x89, 0x47, 0xc6, 0x63, 0x43, 0xa2, 0x18, 0xc5, 0x18, 0xf1, 0x0e, + 0xc3, 0x20, 0x00, 0x43, 0xa2, 0x24, 0xc6, 0x58, 0xac, 0x0e, 0xc2, 0xbb, + 0x03, 0xa2, 0x30, 0xcd, 0x27, 0xac, 0x0e, 0xc2, 0x91, 0xc4, 0x18, 0xf2, + 0x0e, 0xc2, 0x81, 0xc9, 0xb4, 0x40, 0x0e, 0xc2, 0x70, 0xc9, 0x13, 0x9c, + 0x0e, 0xc2, 0x3b, 0x03, 0xa2, 0x34, 0xc6, 0x58, 0xac, 0x0e, 0xc2, 0x31, + 0xc4, 0x18, 0xf2, 0x0e, 0xc2, 0x28, 0xc2, 0x00, 0x74, 0x0e, 0xc7, 0x99, + 0xc3, 0x00, 0xa3, 0x0e, 0xc7, 0x90, 0x00, 0x43, 0xa2, 0x3a, 0xc6, 0x13, + 0x95, 0x0e, 0xc5, 0x31, 0xc4, 0x00, 0x9d, 0x0e, 0xc4, 0x42, 0x03, 0xa2, + 0x4a, 0xc6, 0x0e, 0xcd, 0x0e, 0xc4, 0xe8, 0xc4, 0x0e, 0xe2, 0x0e, 0xc3, + 0xf9, 0xc7, 0x27, 0xb2, 0x0e, 0xc3, 0xe0, 0xc2, 0x00, 0x74, 0x0e, 0xc6, + 0xc9, 0xc3, 0x00, 0xa3, 0x0e, 0xc6, 0xc0, 0xc5, 0x0e, 0xce, 0x0e, 0xc7, + 0x63, 0x03, 0xa2, 0x50, 0xcb, 0x13, 0x90, 0x0e, 0xc6, 0x00, 0x46, 0x0e, + 0xce, 0xc3, 0xa2, 0x56, 0xc8, 0xbc, 0x62, 0x0e, 0xc3, 0x80, 0x00, 0x43, + 0xa2, 0x62, 0xc2, 0x00, 0x15, 0x0e, 0xcc, 0x78, 0xca, 0x03, 0x87, 0x01, + 0x5d, 0x09, 0xc9, 0x01, 0x88, 0x01, 0x5d, 0x00, 0xcc, 0x10, 0xb4, 0x07, + 0xeb, 0x41, 0xca, 0x26, 0xf7, 0x07, 0xeb, 0x38, 0xca, 0x26, 0xf7, 0x07, + 0xe3, 0x41, 0xcd, 0x00, 0xfa, 0x07, 0xe0, 0x18, 0xca, 0x9f, 0x72, 0x00, + 0x3b, 0xb1, 0xc8, 0xbf, 0x42, 0x00, 0x3b, 0xa8, 0xd5, 0x0e, 0x77, 0x00, + 0x45, 0x20, 0xc5, 0x05, 0x02, 0x00, 0x35, 0x29, 0xd6, 0x2d, 0x8e, 0x00, + 0x3b, 0x08, 0x45, 0x00, 0x49, 0xc3, 0xa2, 0x7a, 0x14, 0xc3, 0xa2, 0x86, + 0xd2, 0x4d, 0x33, 0x00, 0x43, 0xab, 0x03, 0xa2, 0x92, 0xcf, 0x63, 0x69, + 0x00, 0x43, 0x8b, 0x03, 0xa2, 0x98, 0xc5, 0x4d, 0x40, 0x00, 0x43, 0xa1, + 0xc5, 0x63, 0x73, 0x00, 0x43, 0x80, 0x45, 0x02, 0x9a, 0x43, 0xa2, 0x9e, + 0xc5, 0x05, 0x02, 0x00, 0x33, 0x99, 0xc5, 0x00, 0xd4, 0x00, 0x33, 0x90, + 0xc5, 0x05, 0x02, 0x00, 0x31, 0x2b, 0x03, 0xa2, 0xaa, 0xc5, 0x00, 0xd4, + 0x00, 0x31, 0x1a, 0x03, 0xa2, 0xae, 0x00, 0x43, 0xa2, 0xb2, 0xc8, 0xbf, + 0x42, 0x00, 0x3b, 0x99, 0xca, 0x9f, 0x72, 0x00, 0x3b, 0xa0, 0xca, 0x26, + 0xf7, 0x07, 0xda, 0x89, 0xcd, 0x00, 0xfa, 0x07, 0xda, 0x80, 0xd0, 0x05, + 0x29, 0x00, 0x44, 0x69, 0xc5, 0x00, 0xd4, 0x00, 0x31, 0xd8, 0xc5, 0x05, + 0x02, 0x00, 0x31, 0xe1, 0xc5, 0x00, 0xd4, 0x00, 0x3b, 0x19, 0xd6, 0x2d, + 0x8e, 0x00, 0x3b, 0x20, 0xc5, 0x05, 0x02, 0x00, 0x45, 0xa1, 0xc5, 0x00, + 0xd4, 0x00, 0x35, 0x60, 0xcf, 0x67, 0xce, 0x00, 0x35, 0x71, 0xcd, 0x04, + 0xe7, 0x00, 0x3b, 0xf8, 0xc4, 0xe0, 0x63, 0x00, 0x36, 0x19, 0xcd, 0x00, + 0xfa, 0x07, 0xf4, 0x99, 0xca, 0x26, 0xf7, 0x07, 0xf4, 0xa0, 0xc5, 0x05, + 0x02, 0x00, 0x44, 0x61, 0xc5, 0x00, 0xd4, 0x00, 0x34, 0xf8, 0xd0, 0x59, + 0xa2, 0x00, 0x45, 0xd1, 0xc9, 0x16, 0x14, 0x00, 0x45, 0x49, 0xcb, 0x08, + 0x09, 0x00, 0x45, 0x40, 0x0b, 0xc3, 0xa2, 0xbe, 0xca, 0x26, 0xf7, 0x07, + 0xf4, 0x51, 0xcb, 0x64, 0x7b, 0x07, 0xf4, 0x60, 0xcb, 0x08, 0x09, 0x00, + 0x36, 0x9b, 0x03, 0xa2, 0xca, 0x5d, 0x10, 0x12, 0x43, 0xa2, 0xce, 0xca, + 0x59, 0xa8, 0x00, 0x45, 0xc9, 0x98, 0x00, 0x34, 0x93, 0x03, 0xa2, 0xda, + 0xde, 0x02, 0x89, 0x00, 0x3b, 0x88, 0xc6, 0x05, 0x01, 0x00, 0x45, 0x00, + 0xd6, 0x2d, 0x8e, 0x00, 0x3a, 0x93, 0x03, 0xa2, 0xe0, 0xd2, 0x4b, 0xcb, + 0x00, 0x3a, 0x80, 0xd5, 0x0e, 0x77, 0x00, 0x34, 0xe0, 0x4a, 0x0e, 0x7d, + 0xc3, 0xa2, 0xe6, 0x46, 0x02, 0xa0, 0x43, 0xa2, 0xf2, 0x98, 0x00, 0x37, + 0x71, 0xcd, 0x31, 0x5f, 0x00, 0x3a, 0xd0, 0xce, 0x05, 0x39, 0x00, 0x34, + 0x58, 0x4a, 0x0e, 0x7d, 0xc3, 0xa2, 0xf8, 0x48, 0x04, 0xe7, 0x43, 0xa3, + 0x04, 0xe0, 0x09, 0x87, 0x00, 0x3b, 0xe0, 0xc5, 0x05, 0x02, 0x00, 0x3b, + 0x71, 0x03, 0x43, 0xa3, 0x10, 0xcb, 0x10, 0xb5, 0x07, 0xdd, 0x61, 0xcc, + 0x00, 0xfb, 0x07, 0xdd, 0x50, 0xcb, 0x10, 0xb5, 0x07, 0xdd, 0x41, 0xcc, + 0x00, 0xfb, 0x07, 0xdd, 0x30, 0xca, 0x26, 0xf7, 0x07, 0xdd, 0x29, 0xcd, + 0x00, 0xfa, 0x07, 0xdd, 0x20, 0xd0, 0x13, 0xe9, 0x0f, 0xdd, 0x58, 0xcf, + 0x0a, 0x48, 0x0f, 0xdd, 0x50, 0xa5, 0x01, 0x47, 0xf8, 0xd3, 0x42, 0xed, + 0x0e, 0xf8, 0x40, 0xd1, 0x01, 0x68, 0x05, 0x5a, 0x11, 0xc6, 0x01, 0x73, + 0x05, 0x5a, 0x08, 0xcb, 0x98, 0x58, 0x00, 0x11, 0x88, 0xc9, 0x0e, 0x6e, + 0x00, 0xf6, 0x39, 0xc5, 0x1e, 0xc8, 0x00, 0xf6, 0x29, 0xca, 0x9e, 0x5a, + 0x00, 0xf6, 0x19, 0xc5, 0x1f, 0x0c, 0x00, 0xf6, 0x09, 0xc5, 0x31, 0xee, + 0x00, 0xf5, 0xf8, 0xcc, 0x51, 0x28, 0x0e, 0xf8, 0xb1, 0xcc, 0x1e, 0xc1, + 0x00, 0xeb, 0x98, 0xc5, 0x05, 0x02, 0x00, 0xf2, 0xdb, 0x03, 0xa3, 0x1c, + 0xc5, 0x00, 0xd4, 0x00, 0xf2, 0xc8, 0xc9, 0x0e, 0x6e, 0x00, 0xf7, 0x89, + 0xc5, 0x1e, 0xc8, 0x00, 0xf7, 0x79, 0xca, 0x9e, 0x5a, 0x00, 0xf7, 0x69, + 0xc5, 0x1f, 0x0c, 0x00, 0xf7, 0x59, 0xc5, 0x31, 0xee, 0x00, 0xf7, 0x48, + 0xc5, 0x31, 0xee, 0x00, 0x0b, 0x89, 0xc5, 0x1f, 0x0c, 0x00, 0x10, 0xa8, + 0xc5, 0x05, 0x02, 0x00, 0xf3, 0x99, 0x44, 0x02, 0x9b, 0x43, 0xa3, 0x22, + 0xc9, 0x0e, 0x6e, 0x00, 0xf5, 0x69, 0xc5, 0x1e, 0xc8, 0x00, 0xf5, 0x59, + 0xca, 0x9e, 0x5a, 0x00, 0xf5, 0x49, 0xc5, 0x1f, 0x0c, 0x00, 0xf5, 0x39, + 0xc5, 0x31, 0xee, 0x00, 0xf5, 0x28, 0xc5, 0x05, 0x02, 0x00, 0xf5, 0x09, + 0xc5, 0x00, 0xd4, 0x00, 0x11, 0x3a, 0x03, 0xa3, 0x3a, 0xc5, 0x05, 0x02, + 0x00, 0xf0, 0x09, 0xc5, 0x00, 0xd4, 0x00, 0x07, 0x2a, 0x03, 0xa3, 0x40, + 0xc6, 0x60, 0xb1, 0x00, 0x0e, 0xa9, 0xc5, 0x31, 0xee, 0x00, 0x0e, 0xb9, + 0xc5, 0x8e, 0x66, 0x00, 0x0e, 0xc9, 0xc5, 0x1f, 0x0c, 0x00, 0x0e, 0xd8, + 0xc6, 0xc1, 0x86, 0x05, 0x4b, 0x91, 0xc5, 0xc0, 0x7d, 0x00, 0x89, 0x18, + 0xc3, 0x05, 0x14, 0x01, 0x9f, 0xa1, 0x16, 0xc3, 0xa3, 0x46, 0x08, 0xc3, + 0xa3, 0x52, 0x15, 0xc3, 0xa3, 0x5e, 0xc5, 0x06, 0xdb, 0x01, 0x9f, 0xd9, + 0xc4, 0x26, 0x78, 0x01, 0x9f, 0xe0, 0xc2, 0x02, 0xa0, 0x01, 0x9b, 0x71, + 0xc4, 0x02, 0xde, 0x01, 0x9b, 0x78, 0xd3, 0x42, 0xed, 0x08, 0x3d, 0x38, + 0xc5, 0x02, 0xc2, 0x0e, 0x8a, 0x89, 0xc5, 0x01, 0xfc, 0x0e, 0x8a, 0x80, + 0x45, 0xab, 0x1f, 0xc3, 0xa3, 0x6a, 0xc2, 0x00, 0x4f, 0x0e, 0x8b, 0x28, + 0xcb, 0x90, 0xf4, 0x0e, 0x8c, 0x59, 0x46, 0x6d, 0xc6, 0x43, 0xa3, 0x74, + 0xa2, 0x0e, 0x8b, 0x91, 0xa1, 0x0e, 0x8b, 0x89, 0xa0, 0x0e, 0x8b, 0x81, + 0x9f, 0x0e, 0x8b, 0x79, 0x9e, 0x0e, 0x8b, 0x70, 0xc9, 0xab, 0x1c, 0x0e, + 0x8c, 0x08, 0x45, 0x02, 0x9a, 0x43, 0xa3, 0x80, 0x12, 0xc3, 0xa3, 0x96, + 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0x6b, 0x03, 0xa3, 0xa5, 0xc5, 0x28, 0x47, + 0x00, 0xfb, 0x5a, 0x03, 0xa3, 0xab, 0xc4, 0xe3, 0xab, 0x00, 0xfa, 0x69, + 0xc5, 0x28, 0x47, 0x00, 0xfa, 0x58, 0xc4, 0xe3, 0xab, 0x00, 0xfa, 0x61, + 0xc5, 0x28, 0x47, 0x00, 0xfa, 0x50, 0xcb, 0x94, 0x90, 0x00, 0xfa, 0xf9, + 0xc4, 0xe3, 0xab, 0x00, 0xfa, 0xe9, 0xc5, 0x28, 0x47, 0x00, 0xfa, 0xd8, + 0xcb, 0x94, 0x90, 0x00, 0xf9, 0xf9, 0xc4, 0xe3, 0xab, 0x00, 0xf9, 0xe9, + 0xc5, 0x28, 0x47, 0x00, 0xf9, 0xd8, 0x45, 0x02, 0x9a, 0x43, 0xa3, 0xb1, + 0x12, 0xc3, 0xa3, 0xc7, 0xc4, 0xe3, 0xab, 0x00, 0xf9, 0x6b, 0x03, 0xa3, + 0xd6, 0xc5, 0x28, 0x47, 0x00, 0xf9, 0x5a, 0x03, 0xa3, 0xdc, 0xc4, 0xe3, + 0xab, 0x00, 0xf8, 0xe9, 0xc5, 0x28, 0x47, 0x00, 0xf8, 0xd8, 0xc4, 0xe3, + 0xab, 0x00, 0xf8, 0xe1, 0xc5, 0x28, 0x47, 0x00, 0xf8, 0xd0, 0x45, 0x02, + 0x9a, 0x43, 0xa3, 0xe2, 0x12, 0xc3, 0xa3, 0xf8, 0xc4, 0xe3, 0xab, 0x00, + 0xf8, 0x6b, 0x03, 0xa4, 0x07, 0xc5, 0x28, 0x47, 0x00, 0xf8, 0x5a, 0x03, + 0xa4, 0x0d, 0xd2, 0x4a, 0x63, 0x00, 0xff, 0xb8, 0x45, 0x02, 0x9a, 0x43, + 0xa4, 0x13, 0xcb, 0x94, 0x90, 0x00, 0xfb, 0x3b, 0x03, 0xa4, 0x34, 0xc4, + 0xe3, 0xab, 0x00, 0xfb, 0x2b, 0x03, 0xa4, 0x3a, 0xc5, 0x28, 0x47, 0x00, + 0xfb, 0x1b, 0x03, 0xa4, 0x40, 0xcd, 0x4a, 0x68, 0x00, 0xfd, 0x08, 0xc4, + 0xe3, 0xab, 0x00, 0xfa, 0x29, 0xc5, 0x28, 0x47, 0x00, 0xfa, 0x18, 0xc4, + 0xe3, 0xab, 0x00, 0xfa, 0x21, 0xc5, 0x28, 0x47, 0x00, 0xfa, 0x10, 0xcb, + 0x94, 0x90, 0x00, 0xff, 0x39, 0xc4, 0xe3, 0xab, 0x00, 0xff, 0x19, 0xc5, + 0x28, 0x47, 0x00, 0xff, 0x11, 0xc5, 0x63, 0xdc, 0x00, 0x1d, 0x80, 0xcb, + 0x94, 0x90, 0x00, 0xfa, 0xb9, 0xc4, 0xe3, 0xab, 0x00, 0xfa, 0xa9, 0xc5, + 0x28, 0x47, 0x00, 0xfa, 0x98, 0xcb, 0x94, 0x90, 0x00, 0xfa, 0xb1, 0xc4, + 0xe3, 0xab, 0x00, 0xfa, 0xa1, 0xc5, 0x28, 0x47, 0x00, 0xfa, 0x90, 0xcb, + 0x94, 0x90, 0x00, 0xfe, 0xb9, 0xc4, 0xe3, 0xab, 0x00, 0xfe, 0x99, 0xc5, + 0x28, 0x47, 0x00, 0xfe, 0x91, 0xc5, 0x63, 0xdc, 0x00, 0x1c, 0x80, 0xcb, + 0x94, 0x90, 0x00, 0xf9, 0xb9, 0xc4, 0xe3, 0xab, 0x00, 0xf9, 0xa9, 0xc5, + 0x28, 0x47, 0x00, 0xf9, 0x98, 0xcb, 0x94, 0x90, 0x00, 0xf9, 0xb1, 0xc4, + 0xe3, 0xab, 0x00, 0xf9, 0xa1, 0xc5, 0x28, 0x47, 0x00, 0xf9, 0x90, 0xd2, + 0x4a, 0x63, 0x00, 0xfe, 0x38, 0x45, 0x02, 0x9a, 0x43, 0xa4, 0x46, 0xcb, + 0x94, 0x90, 0x00, 0xf9, 0x3b, 0x03, 0xa4, 0x67, 0xc4, 0xe3, 0xab, 0x00, + 0xf9, 0x2b, 0x03, 0xa4, 0x6d, 0xc5, 0x28, 0x47, 0x00, 0xf9, 0x1b, 0x03, + 0xa4, 0x73, 0xcd, 0x4a, 0x68, 0x00, 0xfc, 0x88, 0xc4, 0xe3, 0xab, 0x00, + 0xf8, 0xa9, 0xc5, 0x28, 0x47, 0x00, 0xf8, 0x98, 0xc4, 0xe3, 0xab, 0x00, + 0xf8, 0xa1, 0xc5, 0x28, 0x47, 0x00, 0xf8, 0x90, 0xd2, 0x4a, 0x63, 0x00, + 0xfd, 0xb8, 0x45, 0x02, 0x9a, 0x43, 0xa4, 0x79, 0xd2, 0x4a, 0x63, 0x00, + 0xfd, 0x90, 0xcb, 0x94, 0x90, 0x00, 0xf8, 0x3b, 0x03, 0xa4, 0x9a, 0xc4, + 0xe3, 0xab, 0x00, 0xf8, 0x2b, 0x03, 0xa4, 0xa0, 0xc5, 0x28, 0x47, 0x00, + 0xf8, 0x1b, 0x03, 0xa4, 0xa6, 0xcd, 0x4a, 0x68, 0x00, 0xfc, 0x08, 0xc7, + 0xb9, 0xdb, 0x08, 0x0a, 0x61, 0xc7, 0x67, 0xc7, 0x08, 0x0a, 0x98, 0xc8, + 0xb9, 0xda, 0x08, 0x0a, 0x70, 0xc8, 0x67, 0xc6, 0x08, 0x0a, 0xb0, 0xca, + 0xa2, 0x6a, 0x0e, 0x7d, 0xe8, 0x46, 0x00, 0x8b, 0x43, 0xa4, 0xac, 0xcc, + 0x89, 0x61, 0x0e, 0xc8, 0x01, 0xca, 0x92, 0xa2, 0x0e, 0xc7, 0xf0, 0xc9, + 0x67, 0x79, 0x0e, 0xc1, 0x60, 0xc5, 0x02, 0xd2, 0x0e, 0xc7, 0x5b, 0x03, + 0xa4, 0xb8, 0x17, 0x43, 0xa4, 0xbe, 0x4a, 0x6d, 0x50, 0x43, 0xa4, 0xc8, + 0xc4, 0x18, 0xf2, 0x0e, 0xc7, 0x29, 0xc8, 0x45, 0x27, 0x0e, 0xc7, 0x20, + 0x00, 0x43, 0xa4, 0xd4, 0xcc, 0x85, 0x41, 0x0e, 0xc1, 0xd9, 0xcd, 0x7e, + 0x96, 0x0e, 0xc1, 0xd0, 0x05, 0xc3, 0xa4, 0xe6, 0xc6, 0x13, 0x95, 0x0e, + 0xc5, 0x21, 0x14, 0xc3, 0xa4, 0xf5, 0xc5, 0x0e, 0xce, 0x0e, 0xc0, 0xf3, + 0x03, 0xa5, 0x04, 0xd7, 0x27, 0xa2, 0x0e, 0xc1, 0x39, 0xc6, 0x58, 0xac, + 0x0e, 0xc0, 0x93, 0x03, 0xa5, 0x08, 0xc4, 0x18, 0xf2, 0x0e, 0xc0, 0x83, + 0x03, 0xa5, 0x0e, 0xd3, 0x45, 0x27, 0x0e, 0xc1, 0x00, 0xc9, 0x6d, 0x53, + 0x0e, 0xc0, 0xa3, 0x03, 0xa5, 0x14, 0xc3, 0x01, 0xc8, 0x0e, 0xc0, 0x60, + 0xc9, 0x13, 0x9c, 0x0e, 0xc1, 0x29, 0xc4, 0x0e, 0xe2, 0x0e, 0xc1, 0x20, + 0xc7, 0x1a, 0xc5, 0x0e, 0xc2, 0x09, 0xc2, 0x02, 0xae, 0x0e, 0xc2, 0x00, + 0xc6, 0x58, 0xac, 0x0e, 0xc1, 0xc9, 0xc2, 0x02, 0xae, 0x0e, 0xc1, 0xc0, + 0xc6, 0x3b, 0x9c, 0x0e, 0xc4, 0x81, 0xc8, 0x45, 0x27, 0x0e, 0xc4, 0x78, + 0xc4, 0x18, 0xf2, 0x0e, 0xc2, 0x89, 0xc9, 0xb4, 0x40, 0x0e, 0xc2, 0x78, + 0x00, 0x43, 0xa5, 0x1a, 0xc6, 0xcd, 0x73, 0x0e, 0xc2, 0x40, 0x15, 0xc3, + 0xa5, 0x26, 0xc5, 0x17, 0x14, 0x0e, 0xc7, 0x79, 0xc4, 0x05, 0x75, 0x0e, + 0xc7, 0x70, 0xca, 0x13, 0x9b, 0x0e, 0xc4, 0x68, 0xc5, 0x05, 0x74, 0x0e, + 0xc7, 0x68, 0xc7, 0x27, 0xb2, 0x0e, 0xc3, 0x91, 0xc4, 0x0e, 0xe2, 0x0e, + 0xc3, 0x70, 0x45, 0x0d, 0x20, 0xc3, 0xa5, 0x32, 0xc6, 0x13, 0x95, 0x0e, + 0xc5, 0x29, 0xc4, 0x00, 0x9d, 0x0e, 0xc4, 0x39, 0xc5, 0x0e, 0xce, 0x0e, + 0xc0, 0xf8, 0xc5, 0x08, 0x09, 0x00, 0x44, 0x11, 0xc9, 0x4d, 0xde, 0x00, + 0x43, 0xc0, 0x45, 0x00, 0x2d, 0xc3, 0xa5, 0x3e, 0x49, 0x9a, 0xeb, 0x43, + 0xa5, 0x4a, 0x45, 0x02, 0x9a, 0x43, 0xa5, 0x56, 0x45, 0x02, 0x9a, 0x43, + 0xa5, 0x62, 0xc9, 0xaf, 0x6f, 0x00, 0x43, 0xf9, 0xc9, 0x16, 0x14, 0x00, + 0x43, 0xe0, 0x00, 0x43, 0xa5, 0x6e, 0x00, 0x43, 0xa5, 0x7a, 0xcd, 0x00, + 0xfa, 0x07, 0xf4, 0x09, 0xca, 0x26, 0xf7, 0x07, 0xf4, 0x10, 0xcc, 0x00, + 0xfb, 0x07, 0xf4, 0x49, 0xcb, 0x10, 0xb5, 0x07, 0xf4, 0x58, 0x00, 0x43, + 0xa5, 0x86, 0xca, 0x9f, 0x72, 0x00, 0x3b, 0xd9, 0xc8, 0xbf, 0x42, 0x00, + 0x3b, 0xd0, 0xc6, 0x05, 0x01, 0x00, 0x34, 0xa8, 0xd3, 0x1e, 0x24, 0x00, + 0x3a, 0x98, 0xc5, 0x05, 0x02, 0x00, 0x45, 0x71, 0xcf, 0x1b, 0x59, 0x00, + 0x34, 0x78, 0xe0, 0x05, 0x27, 0x00, 0x3a, 0xc8, 0xc5, 0x00, 0xd4, 0x00, + 0x34, 0x29, 0xd6, 0x2d, 0x8e, 0x00, 0x3a, 0xc0, 0xce, 0x73, 0x6e, 0x00, + 0x34, 0x11, 0xc5, 0x00, 0xd4, 0x00, 0x3a, 0xb8, 0xcb, 0x02, 0x9c, 0x00, + 0x3b, 0x79, 0xc4, 0x00, 0xd5, 0x00, 0x3b, 0x90, 0xcb, 0x98, 0x58, 0x00, + 0xf2, 0xe8, 0xc6, 0x60, 0xb1, 0x00, 0x0e, 0xb1, 0xc5, 0x31, 0xee, 0x00, + 0x0e, 0xc1, 0xc5, 0x8e, 0x66, 0x00, 0x0e, 0xd1, 0xc5, 0x1f, 0x0c, 0x00, + 0x0e, 0xe0, 0xcb, 0x98, 0x58, 0x00, 0x0f, 0x08, 0xca, 0x9b, 0xda, 0x00, + 0x0f, 0xd8, 0xc2, 0x02, 0xa0, 0x01, 0x9f, 0xa9, 0xc4, 0x02, 0xde, 0x01, + 0x9f, 0xb0, 0xc3, 0x09, 0x9e, 0x01, 0x9f, 0xb9, 0xc3, 0x0d, 0x14, 0x01, + 0x9f, 0xc0, 0xc2, 0x22, 0xcc, 0x01, 0x9f, 0xc9, 0xc4, 0x18, 0x10, 0x01, + 0x9f, 0xd0, 0xc6, 0xd2, 0xb9, 0x0e, 0x8b, 0xf1, 0x91, 0x0e, 0x8b, 0xe0, + 0xa0, 0x0e, 0x8b, 0x49, 0x9f, 0x0e, 0x8b, 0x41, 0x9e, 0x0e, 0x8b, 0x38, + 0x12, 0xc3, 0xa5, 0x92, 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0x63, 0x03, 0xa5, + 0xa1, 0xc5, 0x28, 0x47, 0x00, 0xfb, 0x52, 0x03, 0xa5, 0xa7, 0xca, 0x94, + 0x91, 0x00, 0xfb, 0x7b, 0x03, 0xa5, 0xad, 0xcd, 0x42, 0x94, 0x00, 0xfd, + 0x48, 0xd3, 0x42, 0x8e, 0x00, 0xfd, 0x68, 0xd3, 0x42, 0x8e, 0x00, 0xfd, + 0x58, 0x12, 0xc3, 0xa5, 0xb3, 0xc4, 0xe3, 0xab, 0x00, 0xf9, 0x63, 0x03, + 0xa5, 0xc2, 0xc5, 0x28, 0x47, 0x00, 0xf9, 0x52, 0x03, 0xa5, 0xc8, 0xca, + 0x94, 0x91, 0x00, 0xf9, 0x7b, 0x03, 0xa5, 0xce, 0xcd, 0x42, 0x94, 0x00, + 0xfc, 0xc8, 0xd3, 0x42, 0x8e, 0x00, 0xfc, 0xe8, 0xd3, 0x42, 0x8e, 0x00, + 0xfc, 0xd8, 0x12, 0xc3, 0xa5, 0xd4, 0xc4, 0xe3, 0xab, 0x00, 0xf8, 0x63, + 0x03, 0xa5, 0xe3, 0xc5, 0x28, 0x47, 0x00, 0xf8, 0x52, 0x03, 0xa5, 0xe9, + 0xca, 0x94, 0x91, 0x00, 0xf8, 0x7b, 0x03, 0xa5, 0xef, 0xcd, 0x42, 0x94, + 0x00, 0xfc, 0x48, 0xd3, 0x42, 0x8e, 0x00, 0xfc, 0x68, 0xd3, 0x42, 0x8e, + 0x00, 0xfc, 0x58, 0xcb, 0x94, 0x90, 0x00, 0xfb, 0x33, 0x03, 0xa5, 0xf5, + 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0x23, 0x03, 0xa5, 0xfb, 0xc5, 0x28, 0x47, + 0x00, 0xfb, 0x13, 0x03, 0xa6, 0x01, 0xcd, 0x4a, 0x68, 0x00, 0xfd, 0x00, + 0xd2, 0x4a, 0x63, 0x00, 0xfd, 0x38, 0xd2, 0x4a, 0x63, 0x00, 0xfd, 0x28, + 0xd2, 0x4a, 0x63, 0x00, 0xfd, 0x18, 0xcb, 0x94, 0x90, 0x00, 0xf9, 0x33, + 0x03, 0xa6, 0x07, 0xc4, 0xe3, 0xab, 0x00, 0xf9, 0x23, 0x03, 0xa6, 0x0d, + 0xc5, 0x28, 0x47, 0x00, 0xf9, 0x13, 0x03, 0xa6, 0x13, 0xcd, 0x4a, 0x68, + 0x00, 0xfc, 0x80, 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0xb8, 0xd2, 0x4a, 0x63, + 0x00, 0xfc, 0xa8, 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0x98, 0xcb, 0x94, 0x90, + 0x00, 0xf8, 0x33, 0x03, 0xa6, 0x19, 0xc4, 0xe3, 0xab, 0x00, 0xf8, 0x23, + 0x03, 0xa6, 0x1f, 0xc5, 0x28, 0x47, 0x00, 0xf8, 0x13, 0x03, 0xa6, 0x25, + 0xcd, 0x4a, 0x68, 0x00, 0xfc, 0x00, 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0x38, + 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0x28, 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0x18, + 0xd0, 0x58, 0xe2, 0x0e, 0x7d, 0xd9, 0xd0, 0x2d, 0x10, 0x0e, 0x7d, 0xc0, + 0xcb, 0x6d, 0x51, 0x0e, 0xc1, 0xe0, 0x14, 0xc3, 0xa6, 0x2b, 0xce, 0x6d, + 0x4e, 0x0e, 0xc1, 0xb8, 0xc6, 0x58, 0xac, 0x0e, 0xc2, 0x19, 0xc2, 0x02, + 0xae, 0x0e, 0xc1, 0x88, 0x46, 0x06, 0x82, 0xc3, 0xa6, 0x37, 0xc9, 0xb3, + 0xc2, 0x0e, 0xc7, 0x11, 0x46, 0x0e, 0xce, 0x43, 0xa6, 0x43, 0x44, 0x0d, + 0x21, 0xc3, 0xa6, 0x55, 0xc8, 0x13, 0x9d, 0x0e, 0xc0, 0xaa, 0x03, 0xa6, + 0x64, 0xc3, 0x00, 0x74, 0x0e, 0xc4, 0x33, 0x03, 0xa6, 0x68, 0xce, 0x3a, + 0x9e, 0x0e, 0xc0, 0x88, 0x00, 0x43, 0xa6, 0x6c, 0xd2, 0x4d, 0x7b, 0x0e, + 0xc1, 0x18, 0xcf, 0x69, 0xea, 0x0e, 0xc1, 0x08, 0xcb, 0x4d, 0x82, 0x0e, + 0xc1, 0x30, 0xc8, 0xbc, 0x62, 0x0e, 0xc2, 0xc9, 0xca, 0x4d, 0x83, 0x0e, + 0xc2, 0xc0, 0xc4, 0x03, 0x14, 0x0e, 0xc7, 0x89, 0xc3, 0x06, 0xa7, 0x0e, + 0xc6, 0xe8, 0xc7, 0x13, 0x94, 0x0e, 0xc5, 0x51, 0xc2, 0x00, 0xa8, 0x0e, + 0xc0, 0xd8, 0xc5, 0x08, 0x09, 0x00, 0x44, 0x09, 0xc9, 0x4d, 0xde, 0x00, + 0x43, 0xb8, 0xc5, 0x05, 0x02, 0x00, 0x43, 0xc9, 0xc5, 0x00, 0xd4, 0x00, + 0x43, 0xb0, 0xc9, 0xaf, 0x6f, 0x00, 0x44, 0x01, 0xc9, 0x16, 0x14, 0x00, + 0x43, 0xe8, 0xc9, 0xaf, 0x6f, 0x00, 0x43, 0xf1, 0xc9, 0x16, 0x14, 0x00, + 0x43, 0xd8, 0xca, 0x26, 0xf7, 0x07, 0xf4, 0x41, 0xcd, 0x00, 0xfa, 0x07, + 0xf4, 0x38, 0xcd, 0x00, 0xfa, 0x07, 0xf4, 0x19, 0xca, 0x26, 0xf7, 0x07, + 0xf4, 0x20, 0xca, 0x26, 0xf7, 0x07, 0xdd, 0x89, 0xcd, 0x00, 0xfa, 0x07, + 0xdd, 0x80, 0xca, 0x94, 0x91, 0x00, 0xfb, 0x73, 0x03, 0xa6, 0x83, 0xcd, + 0x42, 0x94, 0x00, 0xfd, 0x40, 0xd3, 0x42, 0x8e, 0x00, 0xfd, 0x60, 0xd3, + 0x42, 0x8e, 0x00, 0xfd, 0x50, 0xd3, 0x42, 0x8e, 0x00, 0xfd, 0x78, 0xca, + 0x94, 0x91, 0x00, 0xf9, 0x73, 0x03, 0xa6, 0x89, 0xcd, 0x42, 0x94, 0x00, + 0xfc, 0xc0, 0xd3, 0x42, 0x8e, 0x00, 0xfc, 0xe0, 0xd3, 0x42, 0x8e, 0x00, + 0xfc, 0xd0, 0xd3, 0x42, 0x8e, 0x00, 0xfc, 0xf8, 0xca, 0x94, 0x91, 0x00, + 0xf8, 0x73, 0x03, 0xa6, 0x8f, 0xcd, 0x42, 0x94, 0x00, 0xfc, 0x40, 0xd3, + 0x42, 0x8e, 0x00, 0xfc, 0x60, 0xd3, 0x42, 0x8e, 0x00, 0xfc, 0x50, 0xd3, + 0x42, 0x8e, 0x00, 0xfc, 0x78, 0xd2, 0x4a, 0x63, 0x00, 0xfd, 0x30, 0xd2, + 0x4a, 0x63, 0x00, 0xfd, 0x20, 0xd2, 0x4a, 0x63, 0x00, 0xfd, 0x10, 0xd2, + 0x4a, 0x63, 0x00, 0xfc, 0xb0, 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0xa0, 0xd2, + 0x4a, 0x63, 0x00, 0xfc, 0x90, 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0x30, 0xd2, + 0x4a, 0x63, 0x00, 0xfc, 0x20, 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0x10, 0x49, + 0x0e, 0xd7, 0xc3, 0xa6, 0x95, 0xc5, 0xbc, 0x5d, 0x0e, 0xc7, 0x38, 0xc5, + 0x58, 0xac, 0x0e, 0xc7, 0x19, 0xc4, 0x18, 0xf2, 0x0e, 0xc7, 0x08, 0xc4, + 0x18, 0xf2, 0x0e, 0xc7, 0x01, 0xc9, 0x13, 0x9c, 0x0e, 0xc6, 0xf9, 0xc8, + 0x1e, 0x56, 0x0e, 0xc6, 0xf0, 0xc7, 0x13, 0x94, 0x0e, 0xc5, 0x49, 0xc2, + 0x00, 0xa8, 0x0e, 0xc0, 0xd2, 0x03, 0xa6, 0xa1, 0x00, 0x43, 0xa6, 0xa7, + 0x00, 0x43, 0xa6, 0xcb, 0xc6, 0x77, 0x82, 0x0e, 0xc1, 0xfb, 0x03, 0xa6, + 0xd7, 0x05, 0xc3, 0xa6, 0xdd, 0x0a, 0xc3, 0xa6, 0xef, 0xc4, 0x18, 0xf2, + 0x0e, 0xc1, 0x10, 0xd3, 0x42, 0x8e, 0x00, 0xfd, 0x70, 0xd3, 0x42, 0x8e, + 0x00, 0xfc, 0xf0, 0xd3, 0x42, 0x8e, 0x00, 0xfc, 0x70, 0xc5, 0x16, 0xca, + 0x0e, 0xc7, 0x51, 0xc6, 0x0e, 0xe0, 0x0e, 0xc7, 0x40, 0xcb, 0x4d, 0x82, + 0x0e, 0xc1, 0x98, 0xc6, 0xcc, 0x41, 0x0e, 0xc0, 0xc3, 0x03, 0xa6, 0xfb, + 0x46, 0x0e, 0xce, 0xc3, 0xa7, 0x01, 0xc6, 0x58, 0xac, 0x0e, 0xc0, 0xcb, + 0x03, 0xa7, 0x10, 0xcb, 0x99, 0xad, 0x0e, 0xc0, 0xb9, 0xca, 0xa1, 0x70, + 0x0e, 0xc0, 0xb0, 0xc9, 0x13, 0x9c, 0x0e, 0xc4, 0x61, 0xc4, 0x18, 0xf2, + 0x0e, 0xc4, 0x58, 0xc4, 0x0c, 0x4d, 0x0e, 0xc1, 0xf0, 0xcf, 0x62, 0xd3, + 0x0e, 0xc1, 0xe9, 0xc6, 0x20, 0x7d, 0x0e, 0xc1, 0x49, 0xc5, 0x70, 0xdc, + 0x0e, 0xc1, 0x40, 0xc5, 0x58, 0xad, 0x0e, 0xc1, 0x59, 0xc5, 0x64, 0xb4, + 0x0e, 0xc1, 0x50, 0xce, 0x27, 0xab, 0x0e, 0xc1, 0xa8, 0xc7, 0x27, 0xb2, + 0x0e, 0xc1, 0xa1, 0xc4, 0x0e, 0xe2, 0x0e, 0xc1, 0x6a, 0x03, 0xa7, 0x16, + 0xcb, 0x4d, 0x82, 0x0e, 0xc1, 0x90, 0x00, 0x43, 0xa7, 0x1a, 0xc4, 0x18, + 0xf2, 0x0e, 0xc1, 0x79, 0xc9, 0x13, 0x9c, 0x0e, 0xc1, 0x70, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0}; +const uint8_t *UnicodeNameToCodepointIndex = UnicodeNameToCodepointIndex_; +const std::size_t UnicodeNameToCodepointIndexSize = 239405; +const std::size_t UnicodeNameToCodepointLargestNameSize = 74; + +} // namespace unicode +} // namespace sys +} // namespace llvm diff --git a/llvm/lib/Support/Unix/COM.inc b/llvm/lib/Support/Unix/COM.inc index 03a690ac3766..d97b59ac02cf 100644 --- a/llvm/lib/Support/Unix/COM.inc +++ b/llvm/lib/Support/Unix/COM.inc @@ -21,6 +21,6 @@ namespace sys { InitializeCOMRAII::InitializeCOMRAII(COMThreadingMode Threading, bool SpeedOverMemory) {} -InitializeCOMRAII::~InitializeCOMRAII() {} +InitializeCOMRAII::~InitializeCOMRAII() = default; } } diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc index b83477e0e4cc..5e008069dd98 100644 --- a/llvm/lib/Support/Unix/Memory.inc +++ b/llvm/lib/Support/Unix/Memory.inc @@ -16,6 +16,7 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Process.h" +#include "llvm/Support/Valgrind.h" #ifdef HAVE_SYS_MMAN_H #include diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc index 788460d657fe..2ae7c6dc47e0 100644 --- a/llvm/lib/Support/Unix/Path.inc +++ b/llvm/lib/Support/Unix/Path.inc @@ -194,7 +194,7 @@ getprogpath(char ret[PATH_MAX], const char *bin) /// GetMainExecutable - Return the path to the main executable, given the /// value of argv[0] from program startup. -std::string getMainExecutable(const char *argv0, void *MainAddr) { +std::string getMainExecutableImpl(const char *argv0, void *MainAddr) { #if defined(__APPLE__) // On OS X the executable path is saved to the stack by dyld. Reading it // from there is much faster than calling dladdr, especially for large @@ -874,12 +874,14 @@ void mapped_file_region::unmapImpl() { void mapped_file_region::dontNeedImpl() { assert(Mode == mapped_file_region::readonly); + if (!Mapping) + return; #if defined(__MVS__) || defined(_AIX) // If we don't have madvise, or it isn't beneficial, treat this as a no-op. - return; +#elif defined(POSIX_MADV_DONTNEED) + ::posix_madvise(Mapping, Size, POSIX_MADV_DONTNEED); #else - if (Mapping) - ::madvise(Mapping, Size, MADV_DONTNEED); + ::madvise(Mapping, Size, MADV_DONTNEED); #endif } @@ -948,7 +950,15 @@ ErrorOr directory_entry::status() const { return s; } -#if !defined(F_GETPATH) +// +// FreeBSD optionally provides /proc/self/fd, but it is incompatible with +// Linux. The thing to use is realpath. +// +#if !defined(__FreeBSD__) +#define TRY_PROC_SELF_FD +#endif + +#if !defined(F_GETPATH) && defined(TRY_PROC_SELF_FD) static bool hasProcSelfFD() { // If we have a /proc filesystem mounted, we can quickly establish the // real name of the file with readlink @@ -1135,6 +1145,7 @@ std::error_code openFileForRead(const Twine &Name, int &ResultFD, RealPath->append(Buffer, Buffer + strlen(Buffer)); #else char Buffer[PATH_MAX]; +#if defined(TRY_PROC_SELF_FD) if (hasProcSelfFD()) { char ProcPath[64]; snprintf(ProcPath, sizeof(ProcPath), "/proc/self/fd/%d", ResultFD); @@ -1142,13 +1153,16 @@ std::error_code openFileForRead(const Twine &Name, int &ResultFD, if (CharCount > 0) RealPath->append(Buffer, Buffer + CharCount); } else { +#endif SmallString<128> Storage; StringRef P = Name.toNullTerminatedStringRef(Storage); // Use ::realpath to get the real path name if (::realpath(P.begin(), Buffer) != nullptr) RealPath->append(Buffer, Buffer + strlen(Buffer)); +#if defined(TRY_PROC_SELF_FD) } +#endif #endif return std::error_code(); } diff --git a/llvm/lib/Support/Unix/Process.inc b/llvm/lib/Support/Unix/Process.inc index d3d9fb7d7187..3c2d118977c5 100644 --- a/llvm/lib/Support/Unix/Process.inc +++ b/llvm/lib/Support/Unix/Process.inc @@ -331,6 +331,23 @@ extern "C" int tigetnum(char *capname); static ManagedStatic TermColorMutex; #endif +bool checkTerminalEnvironmentForColors() { + if (const char *TermStr = std::getenv("TERM")) { + return StringSwitch(TermStr) + .Case("ansi", true) + .Case("cygwin", true) + .Case("linux", true) + .StartsWith("screen", true) + .StartsWith("xterm", true) + .StartsWith("vt100", true) + .StartsWith("rxvt", true) + .EndsWith("color", true) + .Default(false); + } + + return false; +} + static bool terminalHasColors(int fd) { #ifdef LLVM_ENABLE_TERMINFO // First, acquire a global lock because these C routines are thread hostile. @@ -356,7 +373,8 @@ static bool terminalHasColors(int fd) { // // The 'tigetnum' routine returns -2 or -1 on errors, and might return 0 if // the terminfo says that no colors are supported. - bool HasColors = tigetnum(const_cast("colors")) > 0; + int colors_ti = tigetnum(const_cast("colors")); + bool HasColors = colors_ti >= 0 ? colors_ti : checkTerminalEnvironmentForColors(); // Now extract the structure allocated by setupterm and free its memory // through a really silly dance. @@ -364,27 +382,12 @@ static bool terminalHasColors(int fd) { (void)del_curterm(termp); // Drop any errors here. // Return true if we found a color capabilities for the current terminal. - if (HasColors) - return true; + return HasColors; #else // When the terminfo database is not available, check if the current terminal // is one of terminals that are known to support ANSI color escape codes. - if (const char *TermStr = std::getenv("TERM")) { - return StringSwitch(TermStr) - .Case("ansi", true) - .Case("cygwin", true) - .Case("linux", true) - .StartsWith("screen", true) - .StartsWith("xterm", true) - .StartsWith("vt100", true) - .StartsWith("rxvt", true) - .EndsWith("color", true) - .Default(false); - } + return checkTerminalEnvironmentForColors(); #endif - - // Otherwise, be conservative. - return false; } bool Process::FileDescriptorHasColors(int fd) { diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc index 575e2aab1eab..bf145bffe8bf 100644 --- a/llvm/lib/Support/Unix/Signals.inc +++ b/llvm/lib/Support/Unix/Signals.inc @@ -79,8 +79,8 @@ using namespace llvm; -static RETSIGTYPE SignalHandler(int Sig); // defined below. -static RETSIGTYPE InfoSignalHandler(int Sig); // defined below. +static void SignalHandler(int Sig); // defined below. +static void InfoSignalHandler(int Sig); // defined below. using SignalHandlerFunctionType = void (*)(); /// The function to call if ctrl-c is pressed. @@ -362,7 +362,7 @@ void sys::CleanupOnSignal(uintptr_t Context) { } // The signal handler that runs. -static RETSIGTYPE SignalHandler(int Sig) { +static void SignalHandler(int Sig) { // Restore the signal behavior to default, so that the program actually // crashes when we return and the signal reissues. This also ensures that if // we crash in our signal handler that the program will terminate immediately @@ -406,7 +406,7 @@ static RETSIGTYPE SignalHandler(int Sig) { #endif } -static RETSIGTYPE InfoSignalHandler(int Sig) { +static void InfoSignalHandler(int Sig) { SaveAndRestore SaveErrnoDuringASignalHandler(errno); if (SignalHandlerFunctionType CurrentInfoFunction = InfoSignalFunction) CurrentInfoFunction(); @@ -432,6 +432,10 @@ void llvm::sys::SetOneShotPipeSignalFunction(void (*Handler)()) { } void llvm::sys::DefaultOneShotPipeSignalHandler() { + // UNIX03 conformance requires a non-zero exit code and an error message + // to stderr when writing to a closed stdout fails. + errs() << "error: write on a pipe with no reader\n"; + // Send a special return code that drivers can check for, from sysexits.h. exit(EX_IOERR); } diff --git a/llvm/lib/Support/Unix/ThreadLocal.inc b/llvm/lib/Support/Unix/ThreadLocal.inc index a402ae980424..0a958a2b952f 100644 --- a/llvm/lib/Support/Unix/ThreadLocal.inc +++ b/llvm/lib/Support/Unix/ThreadLocal.inc @@ -17,8 +17,6 @@ #include "llvm/Config/config.h" -#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_GETSPECIFIC) - #include #include #include @@ -58,13 +56,3 @@ void ThreadLocalImpl::removeInstance() { } } -#else -namespace llvm { -using namespace sys; -ThreadLocalImpl::ThreadLocalImpl() : data() { } -ThreadLocalImpl::~ThreadLocalImpl() { } -void ThreadLocalImpl::setInstance(const void* d) { data = const_cast(d);} -void *ThreadLocalImpl::getInstance() { return data; } -void ThreadLocalImpl::removeInstance() { setInstance(0); } -} -#endif diff --git a/llvm/lib/Support/Unix/Threading.inc b/llvm/lib/Support/Unix/Threading.inc index 5de1cf071ba9..99f64b4f553d 100644 --- a/llvm/lib/Support/Unix/Threading.inc +++ b/llvm/lib/Support/Unix/Threading.inc @@ -18,6 +18,7 @@ #if defined(__APPLE__) #include #include +#include #endif #include @@ -258,27 +259,29 @@ SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { // SCHED_OTHER the standard round-robin time-sharing policy; return !pthread_setschedparam( pthread_self(), - Priority == ThreadPriority::Background ? SCHED_IDLE : SCHED_OTHER, + // FIXME: consider SCHED_BATCH for Low + Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE, &priority) ? SetThreadPriorityResult::SUCCESS : SetThreadPriorityResult::FAILURE; #elif defined(__APPLE__) - // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/getpriority.2.html - // When setting a thread into background state the scheduling priority is set - // to lowest value, disk and network IO are throttled. Network IO will be - // throttled for any sockets the thread opens after going into background - // state. Any previously opened sockets are not affected. - - // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/getiopolicy_np.3.html - // I/Os with THROTTLE policy are called THROTTLE I/Os. If a THROTTLE I/O - // request occurs within a small time window (usually a fraction of a second) - // of another NORMAL I/O request, the thread that issues the THROTTLE I/O is - // forced to sleep for a certain interval. This slows down the thread that - // issues the THROTTLE I/O so that NORMAL I/Os can utilize most of the disk - // I/O bandwidth. - return !setpriority(PRIO_DARWIN_THREAD, 0, - Priority == ThreadPriority::Background ? PRIO_DARWIN_BG - : 0) + // https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon + // + // Background - Applies to work that isn’t visible to the user and may take significant + // time to complete. Examples include indexing, backing up, or synchronizing data. This + // class emphasizes energy efficiency. + // + // Utility - Applies to work that takes anywhere from a few seconds to a few minutes to + // complete. Examples include downloading a document or importing data. This class + // offers a balance between responsiveness, performance, and energy efficiency. + const auto qosClass = [&](){ + switch (Priority) { + case ThreadPriority::Background: return QOS_CLASS_BACKGROUND; + case ThreadPriority::Low: return QOS_CLASS_UTILITY; + case ThreadPriority::Default: return QOS_CLASS_DEFAULT; + } + }(); + return !pthread_set_qos_class_self_np(qosClass, 0) ? SetThreadPriorityResult::SUCCESS : SetThreadPriorityResult::FAILURE; #endif diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index f15e301874c4..21f0c39bfd6e 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -151,6 +151,10 @@ bool FileSystem::exists(const Twine &Path) { return Status && Status->exists(); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void FileSystem::dump() const { print(dbgs(), PrintType::RecursiveContents); } +#endif + #ifndef NDEBUG static bool isTraversalComponent(StringRef Component) { return Component.equals("..") || Component.equals("."); @@ -273,6 +277,10 @@ public: std::error_code getRealPath(const Twine &Path, SmallVectorImpl &Output) const override; +protected: + void printImpl(raw_ostream &OS, PrintType Type, + unsigned IndentLevel) const override; + private: // If this FS has its own working dir, use it to make Path absolute. // The returned twine is safe to use as long as both Storage and Path live. @@ -354,6 +362,17 @@ RealFileSystem::getRealPath(const Twine &Path, return llvm::sys::fs::real_path(adjustPath(Path, Storage), Output); } +void RealFileSystem::printImpl(raw_ostream &OS, PrintType Type, + unsigned IndentLevel) const { + printIndent(OS, IndentLevel); + OS << "RealFileSystem using "; + if (WD) + OS << "own"; + else + OS << "process"; + OS << " CWD\n"; +} + IntrusiveRefCntPtr vfs::getRealFileSystem() { static IntrusiveRefCntPtr FS(new RealFileSystem(true)); return FS; @@ -459,6 +478,19 @@ OverlayFileSystem::getRealPath(const Twine &Path, return errc::no_such_file_or_directory; } +void OverlayFileSystem::printImpl(raw_ostream &OS, PrintType Type, + unsigned IndentLevel) const { + printIndent(OS, IndentLevel); + OS << "OverlayFileSystem\n"; + if (Type == PrintType::Summary) + return; + + if (Type == PrintType::Contents) + Type = PrintType::Summary; + for (auto FS : overlays_range()) + FS->print(OS, Type, IndentLevel + 1); +} + llvm::vfs::detail::DirIterImpl::~DirIterImpl() = default; namespace { @@ -467,28 +499,25 @@ namespace { class CombiningDirIterImpl : public llvm::vfs::detail::DirIterImpl { using FileSystemPtr = llvm::IntrusiveRefCntPtr; - /// File systems to check for entries in. Processed in reverse order. - SmallVector FSList; - /// The directory iterator for the current filesystem. + /// Iterators to combine, processed in reverse order. + SmallVector IterList; + /// The iterator currently being traversed. directory_iterator CurrentDirIter; - /// The path of the directory to iterate the entries of. - std::string DirPath; /// The set of names already returned as entries. llvm::StringSet<> SeenNames; - /// Sets \c CurrentDirIter to an iterator of \c DirPath in the next file - /// system in the list, or leaves it as is (at its end position) if we've - /// already gone through them all. - std::error_code incrementFS() { - while (!FSList.empty()) { - std::error_code EC; - CurrentDirIter = FSList.back()->dir_begin(DirPath, EC); - FSList.pop_back(); - if (EC && EC != errc::no_such_file_or_directory) - return EC; + /// Sets \c CurrentDirIter to the next iterator in the list, or leaves it as + /// is (at its end position) if we've already gone through them all. + std::error_code incrementIter(bool IsFirstTime) { + while (!IterList.empty()) { + CurrentDirIter = IterList.back(); + IterList.pop_back(); if (CurrentDirIter != directory_iterator()) break; // found } + + if (IsFirstTime && CurrentDirIter == directory_iterator()) + return errc::no_such_file_or_directory; return {}; } @@ -499,7 +528,7 @@ class CombiningDirIterImpl : public llvm::vfs::detail::DirIterImpl { if (!IsFirstTime) CurrentDirIter.increment(EC); if (!EC && CurrentDirIter == directory_iterator()) - EC = incrementFS(); + EC = incrementIter(IsFirstTime); return EC; } @@ -520,23 +549,24 @@ class CombiningDirIterImpl : public llvm::vfs::detail::DirIterImpl { public: CombiningDirIterImpl(ArrayRef FileSystems, std::string Dir, - std::error_code &EC) - : FSList(FileSystems.begin(), FileSystems.end()), - DirPath(std::move(Dir)) { - if (!FSList.empty()) { - CurrentDirIter = FSList.back()->dir_begin(DirPath, EC); - FSList.pop_back(); - if (!EC || EC == errc::no_such_file_or_directory) - EC = incrementImpl(true); + std::error_code &EC) { + for (auto FS : FileSystems) { + std::error_code FEC; + directory_iterator Iter = FS->dir_begin(Dir, FEC); + if (FEC && FEC != errc::no_such_file_or_directory) { + EC = FEC; + return; + } + if (!FEC) + IterList.push_back(Iter); } + EC = incrementImpl(true); } - CombiningDirIterImpl(directory_iterator FirstIter, FileSystemPtr Fallback, - std::string FallbackDir, std::error_code &EC) - : FSList({Fallback}), CurrentDirIter(FirstIter), - DirPath(std::move(FallbackDir)) { - if (!EC || EC == errc::no_such_file_or_directory) - EC = incrementImpl(true); + CombiningDirIterImpl(ArrayRef DirIters, + std::error_code &EC) + : IterList(DirIters.begin(), DirIters.end()) { + EC = incrementImpl(true); } std::error_code increment() override { return incrementImpl(false); } @@ -546,8 +576,11 @@ public: directory_iterator OverlayFileSystem::dir_begin(const Twine &Dir, std::error_code &EC) { - return directory_iterator( + directory_iterator Combined = directory_iterator( std::make_shared(FSList, Dir.str(), EC)); + if (EC) + return {}; + return Combined; } void ProxyFileSystem::anchor() {} @@ -557,10 +590,15 @@ namespace vfs { namespace detail { -enum InMemoryNodeKind { IME_File, IME_Directory, IME_HardLink }; +enum InMemoryNodeKind { + IME_File, + IME_Directory, + IME_HardLink, + IME_SymbolicLink, +}; /// The in memory file system is a tree of Nodes. Every node can either be a -/// file , hardlink or a directory. +/// file, symlink, hardlink or a directory. class InMemoryNode { InMemoryNodeKind Kind; std::string FileName; @@ -629,6 +667,30 @@ public: } }; +class InMemorySymbolicLink : public InMemoryNode { + std::string TargetPath; + Status Stat; + +public: + InMemorySymbolicLink(StringRef Path, StringRef TargetPath, Status Stat) + : InMemoryNode(Path, IME_SymbolicLink), TargetPath(std::move(TargetPath)), + Stat(Stat) {} + + std::string toString(unsigned Indent) const override { + return std::string(Indent, ' ') + "SymbolicLink to -> " + TargetPath; + } + + Status getStatus(const Twine &RequestedName) const override { + return Status::copyWithNewName(Stat, RequestedName); + } + + StringRef getTargetPath() const { return TargetPath; } + + static bool classof(const InMemoryNode *N) { + return N->getKind() == IME_SymbolicLink; + } +}; + /// Adapt a InMemoryFile for VFS' File interface. The goal is to make /// \p InMemoryFileAdaptor mimic as much as possible the behavior of /// \p RealFile. @@ -677,7 +739,7 @@ public: UniqueID getUniqueID() const { return Stat.getUniqueID(); } - InMemoryNode *getChild(StringRef Name) { + InMemoryNode *getChild(StringRef Name) const { auto I = Entries.find(Name); if (I != Entries.end()) return I->second.get(); @@ -773,10 +835,10 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime, detail::InMemoryDirectory *Dir = Root.get(); auto I = llvm::sys::path::begin(Path), E = sys::path::end(Path); - const auto ResolvedUser = User.getValueOr(0); - const auto ResolvedGroup = Group.getValueOr(0); - const auto ResolvedType = Type.getValueOr(sys::fs::file_type::regular_file); - const auto ResolvedPerms = Perms.getValueOr(sys::fs::all_all); + const auto ResolvedUser = User.value_or(0); + const auto ResolvedGroup = Group.value_or(0); + const auto ResolvedType = Type.value_or(sys::fs::file_type::regular_file); + const auto ResolvedPerms = Perms.value_or(sys::fs::all_all); // Any intermediate directories we create should be accessible by // the owner, even if Perms says otherwise for the final path. const auto NewDirectoryPerms = ResolvedPerms | sys::fs::owner_all; @@ -864,22 +926,23 @@ bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime, }); } -static ErrorOr -lookupInMemoryNode(const InMemoryFileSystem &FS, detail::InMemoryDirectory *Dir, - const Twine &P) { +detail::NamedNodeOrError +InMemoryFileSystem::lookupNode(const Twine &P, bool FollowFinalSymlink, + size_t SymlinkDepth) const { SmallString<128> Path; P.toVector(Path); // Fix up relative paths. This just prepends the current working directory. - std::error_code EC = FS.makeAbsolute(Path); + std::error_code EC = makeAbsolute(Path); assert(!EC); (void)EC; - if (FS.useNormalizedPaths()) + if (useNormalizedPaths()) llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true); + const detail::InMemoryDirectory *Dir = Root.get(); if (Path.empty()) - return Dir; + return detail::NamedNodeOrError(Path, Dir); auto I = llvm::sys::path::begin(Path), E = llvm::sys::path::end(Path); while (true) { @@ -888,43 +951,99 @@ lookupInMemoryNode(const InMemoryFileSystem &FS, detail::InMemoryDirectory *Dir, if (!Node) return errc::no_such_file_or_directory; + if (auto Symlink = dyn_cast(Node)) { + // If we're at the end of the path, and we're not following through + // terminal symlinks, then we're done. + if (I == E && !FollowFinalSymlink) + return detail::NamedNodeOrError(Path, Symlink); + + if (SymlinkDepth > InMemoryFileSystem::MaxSymlinkDepth) + return errc::no_such_file_or_directory; + + SmallString<128> TargetPath = Symlink->getTargetPath(); + if (std::error_code EC = makeAbsolute(TargetPath)) + return EC; + + // Keep going with the target. We always want to follow symlinks here + // because we're either at the end of a path that we want to follow, or + // not at the end of a path, in which case we need to follow the symlink + // regardless. + auto Target = + lookupNode(TargetPath, /*FollowFinalSymlink=*/true, SymlinkDepth + 1); + if (!Target || I == E) + return Target; + + if (!isa(*Target)) + return errc::no_such_file_or_directory; + + // Otherwise, continue on the search in the symlinked directory. + Dir = cast(*Target); + continue; + } + // Return the file if it's at the end of the path. if (auto File = dyn_cast(Node)) { if (I == E) - return File; + return detail::NamedNodeOrError(Path, File); return errc::no_such_file_or_directory; } // If Node is HardLink then return the resolved file. if (auto File = dyn_cast(Node)) { if (I == E) - return &File->getResolvedFile(); + return detail::NamedNodeOrError(Path, &File->getResolvedFile()); return errc::no_such_file_or_directory; } // Traverse directories. Dir = cast(Node); if (I == E) - return Dir; + return detail::NamedNodeOrError(Path, Dir); } } -bool InMemoryFileSystem::addHardLink(const Twine &FromPath, - const Twine &ToPath) { - auto FromNode = lookupInMemoryNode(*this, Root.get(), FromPath); - auto ToNode = lookupInMemoryNode(*this, Root.get(), ToPath); +bool InMemoryFileSystem::addHardLink(const Twine &NewLink, + const Twine &Target) { + auto NewLinkNode = lookupNode(NewLink, /*FollowFinalSymlink=*/false); + // Whether symlinks in the hardlink target are followed is + // implementation-defined in POSIX. + // We're following symlinks here to be consistent with macOS. + auto TargetNode = lookupNode(Target, /*FollowFinalSymlink=*/true); // FromPath must not have been added before. ToPath must have been added // before. Resolved ToPath must be a File. - if (!ToNode || FromNode || !isa(*ToNode)) + if (!TargetNode || NewLinkNode || !isa(*TargetNode)) return false; - return addFile(FromPath, 0, nullptr, None, None, None, None, + return addFile(NewLink, 0, nullptr, None, None, None, None, [&](detail::NewInMemoryNodeInfo NNI) { return std::make_unique( - NNI.Path.str(), *cast(*ToNode)); + NNI.Path.str(), + *cast(*TargetNode)); + }); +} + +bool InMemoryFileSystem::addSymbolicLink(const Twine &NewLink, + const Twine &Target, + time_t ModificationTime, + Optional User, + Optional Group, + Optional Perms) { + auto NewLinkNode = lookupNode(NewLink, /*FollowFinalSymlink=*/false); + if (NewLinkNode) + return false; + + SmallString<128> NewLinkStr, TargetStr; + NewLink.toVector(NewLinkStr); + Target.toVector(TargetStr); + + return addFile(NewLinkStr, ModificationTime, nullptr, User, Group, + sys::fs::file_type::symlink_file, Perms, + [&](detail::NewInMemoryNodeInfo NNI) { + return std::make_unique( + NewLinkStr, TargetStr, NNI.makeStatus()); }); } llvm::ErrorOr InMemoryFileSystem::status(const Twine &Path) { - auto Node = lookupInMemoryNode(*this, Root.get(), Path); + auto Node = lookupNode(Path, /*FollowFinalSymlink=*/true); if (Node) return (*Node)->getStatus(Path); return Node.getError(); @@ -932,7 +1051,7 @@ llvm::ErrorOr InMemoryFileSystem::status(const Twine &Path) { llvm::ErrorOr> InMemoryFileSystem::openFileForRead(const Twine &Path) { - auto Node = lookupInMemoryNode(*this, Root.get(), Path); + auto Node = lookupNode(Path,/*FollowFinalSymlink=*/true); if (!Node) return Node.getError(); @@ -946,10 +1065,9 @@ InMemoryFileSystem::openFileForRead(const Twine &Path) { return make_error_code(llvm::errc::invalid_argument); } -namespace { - /// Adaptor from InMemoryDir::iterator to directory_iterator. -class InMemoryDirIterator : public llvm::vfs::detail::DirIterImpl { +class InMemoryFileSystem::DirIterator : public llvm::vfs::detail::DirIterImpl { + const InMemoryFileSystem *FS; detail::InMemoryDirectory::const_iterator I; detail::InMemoryDirectory::const_iterator E; std::string RequestedDirName; @@ -967,6 +1085,13 @@ class InMemoryDirIterator : public llvm::vfs::detail::DirIterImpl { case detail::IME_Directory: Type = sys::fs::file_type::directory_file; break; + case detail::IME_SymbolicLink: + if (auto SymlinkTarget = + FS->lookupNode(Path, /*FollowFinalSymlink=*/true)) { + Path = SymlinkTarget.getName(); + Type = (*SymlinkTarget)->getStatus(Path).getType(); + } + break; } CurrentEntry = directory_entry(std::string(Path.str()), Type); } else { @@ -977,11 +1102,12 @@ class InMemoryDirIterator : public llvm::vfs::detail::DirIterImpl { } public: - InMemoryDirIterator() = default; + DirIterator() = default; - explicit InMemoryDirIterator(const detail::InMemoryDirectory &Dir, - std::string RequestedDirName) - : I(Dir.begin()), E(Dir.end()), + DirIterator(const InMemoryFileSystem *FS, + const detail::InMemoryDirectory &Dir, + std::string RequestedDirName) + : FS(FS), I(Dir.begin()), E(Dir.end()), RequestedDirName(std::move(RequestedDirName)) { setCurrentEntry(); } @@ -993,22 +1119,20 @@ public: } }; -} // namespace - directory_iterator InMemoryFileSystem::dir_begin(const Twine &Dir, std::error_code &EC) { - auto Node = lookupInMemoryNode(*this, Root.get(), Dir); + auto Node = lookupNode(Dir, /*FollowFinalSymlink=*/true); if (!Node) { EC = Node.getError(); - return directory_iterator(std::make_shared()); + return directory_iterator(std::make_shared()); } if (auto *DirNode = dyn_cast(*Node)) return directory_iterator( - std::make_shared(*DirNode, Dir.str())); + std::make_shared(this, *DirNode, Dir.str())); EC = make_error_code(llvm::errc::not_a_directory); - return directory_iterator(std::make_shared()); + return directory_iterator(std::make_shared()); } std::error_code InMemoryFileSystem::setCurrentWorkingDirectory(const Twine &P) { @@ -1046,6 +1170,12 @@ std::error_code InMemoryFileSystem::isLocal(const Twine &Path, bool &Result) { return {}; } +void InMemoryFileSystem::printImpl(raw_ostream &OS, PrintType PrintContents, + unsigned IndentLevel) const { + printIndent(OS, IndentLevel); + OS << "InMemoryFileSystem\n"; +} + } // namespace vfs } // namespace llvm @@ -1079,6 +1209,14 @@ static llvm::SmallString<256> canonicalize(llvm::StringRef Path) { return result; } +/// Whether the error and entry specify a file/directory that was not found. +static bool isFileNotFound(std::error_code EC, + RedirectingFileSystem::Entry *E = nullptr) { + if (E && !isa(E)) + return false; + return EC == llvm::errc::no_such_file_or_directory; +} + } // anonymous namespace @@ -1255,49 +1393,93 @@ directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir, ErrorOr Result = lookupPath(Path); if (!Result) { - EC = Result.getError(); - if (shouldFallBackToExternalFS(EC)) + if (Redirection != RedirectKind::RedirectOnly && + isFileNotFound(Result.getError())) return ExternalFS->dir_begin(Path, EC); + + EC = Result.getError(); return {}; } // Use status to make sure the path exists and refers to a directory. ErrorOr S = status(Path, Dir, *Result); if (!S) { - if (shouldFallBackToExternalFS(S.getError(), Result->E)) + if (Redirection != RedirectKind::RedirectOnly && + isFileNotFound(S.getError(), Result->E)) return ExternalFS->dir_begin(Dir, EC); + EC = S.getError(); return {}; } + if (!S->isDirectory()) { - EC = std::error_code(static_cast(errc::not_a_directory), - std::system_category()); + EC = errc::not_a_directory; return {}; } // Create the appropriate directory iterator based on whether we found a // DirectoryRemapEntry or DirectoryEntry. - directory_iterator DirIter; + directory_iterator RedirectIter; + std::error_code RedirectEC; if (auto ExtRedirect = Result->getExternalRedirect()) { auto RE = cast(Result->E); - DirIter = ExternalFS->dir_begin(*ExtRedirect, EC); + RedirectIter = ExternalFS->dir_begin(*ExtRedirect, RedirectEC); if (!RE->useExternalName(UseExternalNames)) { // Update the paths in the results to use the virtual directory's path. - DirIter = + RedirectIter = directory_iterator(std::make_shared( - std::string(Path), DirIter)); + std::string(Path), RedirectIter)); } } else { auto DE = cast(Result->E); - DirIter = directory_iterator(std::make_shared( - Path, DE->contents_begin(), DE->contents_end(), EC)); + RedirectIter = + directory_iterator(std::make_shared( + Path, DE->contents_begin(), DE->contents_end(), RedirectEC)); + } + + if (RedirectEC) { + if (RedirectEC != errc::no_such_file_or_directory) { + EC = RedirectEC; + return {}; + } + RedirectIter = {}; } - if (!shouldUseExternalFS()) - return DirIter; - return directory_iterator(std::make_shared( - DirIter, ExternalFS, std::string(Path), EC)); + if (Redirection == RedirectKind::RedirectOnly) { + EC = RedirectEC; + return RedirectIter; + } + + std::error_code ExternalEC; + directory_iterator ExternalIter = ExternalFS->dir_begin(Path, ExternalEC); + if (ExternalEC) { + if (ExternalEC != errc::no_such_file_or_directory) { + EC = ExternalEC; + return {}; + } + ExternalIter = {}; + } + + SmallVector Iters; + switch (Redirection) { + case RedirectKind::Fallthrough: + Iters.push_back(ExternalIter); + Iters.push_back(RedirectIter); + break; + case RedirectKind::Fallback: + Iters.push_back(RedirectIter); + Iters.push_back(ExternalIter); + break; + default: + llvm_unreachable("unhandled RedirectKind"); + } + + directory_iterator Combined{ + std::make_shared(Iters, EC)}; + if (EC) + return {}; + return Combined; } void RedirectingFileSystem::setExternalContentsPrefixDir(StringRef PrefixDir) { @@ -1309,7 +1491,16 @@ StringRef RedirectingFileSystem::getExternalContentsPrefixDir() const { } void RedirectingFileSystem::setFallthrough(bool Fallthrough) { - IsFallthrough = Fallthrough; + if (Fallthrough) { + Redirection = RedirectingFileSystem::RedirectKind::Fallthrough; + } else { + Redirection = RedirectingFileSystem::RedirectKind::RedirectOnly; + } +} + +void RedirectingFileSystem::setRedirection( + RedirectingFileSystem::RedirectKind Kind) { + Redirection = Kind; } std::vector RedirectingFileSystem::getRoots() const { @@ -1319,34 +1510,59 @@ std::vector RedirectingFileSystem::getRoots() const { return R; } -void RedirectingFileSystem::dump(raw_ostream &OS) const { +void RedirectingFileSystem::printImpl(raw_ostream &OS, PrintType Type, + unsigned IndentLevel) const { + printIndent(OS, IndentLevel); + OS << "RedirectingFileSystem (UseExternalNames: " + << (UseExternalNames ? "true" : "false") << ")\n"; + if (Type == PrintType::Summary) + return; + for (const auto &Root : Roots) - dumpEntry(OS, Root.get()); + printEntry(OS, Root.get(), IndentLevel); + + printIndent(OS, IndentLevel); + OS << "ExternalFS:\n"; + ExternalFS->print(OS, Type == PrintType::Contents ? PrintType::Summary : Type, + IndentLevel + 1); } -void RedirectingFileSystem::dumpEntry(raw_ostream &OS, - RedirectingFileSystem::Entry *E, - int NumSpaces) const { - StringRef Name = E->getName(); - for (int i = 0, e = NumSpaces; i < e; ++i) - OS << " "; - OS << "'" << Name.str().c_str() << "'" - << "\n"; +void RedirectingFileSystem::printEntry(raw_ostream &OS, + RedirectingFileSystem::Entry *E, + unsigned IndentLevel) const { + printIndent(OS, IndentLevel); + OS << "'" << E->getName() << "'"; - if (E->getKind() == RedirectingFileSystem::EK_Directory) { - auto *DE = dyn_cast(E); - assert(DE && "Should be a directory"); + switch (E->getKind()) { + case EK_Directory: { + auto *DE = cast(E); + OS << "\n"; for (std::unique_ptr &SubEntry : llvm::make_range(DE->contents_begin(), DE->contents_end())) - dumpEntry(OS, SubEntry.get(), NumSpaces + 2); + printEntry(OS, SubEntry.get(), IndentLevel + 1); + break; + } + case EK_DirectoryRemap: + case EK_File: { + auto *RE = cast(E); + OS << " -> '" << RE->getExternalContentsPath() << "'"; + switch (RE->getUseName()) { + case NK_NotSet: + break; + case NK_External: + OS << " (UseExternalName: true)"; + break; + case NK_Virtual: + OS << " (UseExternalName: false)"; + break; + } + OS << "\n"; + break; + } } } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void RedirectingFileSystem::dump() const { dump(dbgs()); } -#endif - /// A helper class to hold the common YAML parsing state. class llvm::vfs::RedirectingFileSystemParser { yaml::Stream &Stream; @@ -1388,6 +1604,23 @@ class llvm::vfs::RedirectingFileSystemParser { return false; } + Optional + parseRedirectKind(yaml::Node *N) { + SmallString<12> Storage; + StringRef Value; + if (!parseScalarString(N, Value, Storage)) + return None; + + if (Value.equals_insensitive("fallthrough")) { + return RedirectingFileSystem::RedirectKind::Fallthrough; + } else if (Value.equals_insensitive("fallback")) { + return RedirectingFileSystem::RedirectKind::Fallback; + } else if (Value.equals_insensitive("redirect-only")) { + return RedirectingFileSystem::RedirectKind::RedirectOnly; + } + return None; + } + struct KeyStatus { bool Required; bool Seen = false; @@ -1731,6 +1964,7 @@ public: KeyStatusPair("use-external-names", false), KeyStatusPair("overlay-relative", false), KeyStatusPair("fallthrough", false), + KeyStatusPair("redirecting-with", false), KeyStatusPair("roots", true), }; @@ -1789,8 +2023,34 @@ public: if (!parseScalarBool(I.getValue(), FS->UseExternalNames)) return false; } else if (Key == "fallthrough") { - if (!parseScalarBool(I.getValue(), FS->IsFallthrough)) + if (Keys["redirecting-with"].Seen) { + error(I.getValue(), + "'fallthrough' and 'redirecting-with' are mutually exclusive"); + return false; + } + + bool ShouldFallthrough = false; + if (!parseScalarBool(I.getValue(), ShouldFallthrough)) + return false; + + if (ShouldFallthrough) { + FS->Redirection = RedirectingFileSystem::RedirectKind::Fallthrough; + } else { + FS->Redirection = RedirectingFileSystem::RedirectKind::RedirectOnly; + } + } else if (Key == "redirecting-with") { + if (Keys["fallthrough"].Seen) { + error(I.getValue(), + "'fallthrough' and 'redirecting-with' are mutually exclusive"); + return false; + } + + if (auto Kind = parseRedirectKind(I.getValue())) { + FS->Redirection = *Kind; + } else { + error(I.getValue(), "expected valid redirect kind"); return false; + } } else { llvm_unreachable("key missing from Keys"); } @@ -1923,13 +2183,6 @@ RedirectingFileSystem::LookupResult::LookupResult( } } -bool RedirectingFileSystem::shouldFallBackToExternalFS( - std::error_code EC, RedirectingFileSystem::Entry *E) const { - if (E && !isa(E)) - return false; - return shouldUseExternalFS() && EC == llvm::errc::no_such_file_or_directory; -} - std::error_code RedirectingFileSystem::makeCanonical(SmallVectorImpl &Path) const { if (std::error_code EC = makeAbsolute(Path)) @@ -2001,9 +2254,16 @@ RedirectingFileSystem::lookupPathImpl( static Status getRedirectedFileStatus(const Twine &OriginalPath, bool UseExternalNames, Status ExternalStatus) { + // The path has been mapped by some nested VFS and exposes an external path, + // don't override it with the original path. + if (ExternalStatus.ExposesExternalVFSPath) + return ExternalStatus; + Status S = ExternalStatus; if (!UseExternalNames) S = Status::copyWithNewName(S, OriginalPath); + else + S.ExposesExternalVFSPath = true; S.IsVFSMapped = true; return S; } @@ -2032,11 +2292,13 @@ ErrorOr RedirectingFileSystem::status( ErrorOr RedirectingFileSystem::getExternalStatus(const Twine &CanonicalPath, const Twine &OriginalPath) const { - if (auto Result = ExternalFS->status(CanonicalPath)) { - return Result.get().copyWithNewName(Result.get(), OriginalPath); - } else { - return Result.getError(); - } + auto Result = ExternalFS->status(CanonicalPath); + + // The path has been mapped by some nested VFS, don't override it with the + // original path. + if (!Result || Result->ExposesExternalVFSPath) + return Result; + return Status::copyWithNewName(Result.get(), OriginalPath); } ErrorOr RedirectingFileSystem::status(const Twine &OriginalPath) { @@ -2046,17 +2308,31 @@ ErrorOr RedirectingFileSystem::status(const Twine &OriginalPath) { if (std::error_code EC = makeCanonical(CanonicalPath)) return EC; + if (Redirection == RedirectKind::Fallback) { + // Attempt to find the original file first, only falling back to the + // mapped file if that fails. + ErrorOr S = getExternalStatus(CanonicalPath, OriginalPath); + if (S) + return S; + } + ErrorOr Result = lookupPath(CanonicalPath); if (!Result) { - if (shouldFallBackToExternalFS(Result.getError())) { + // Was not able to map file, fallthrough to using the original path if + // that was the specified redirection type. + if (Redirection == RedirectKind::Fallthrough && + isFileNotFound(Result.getError())) return getExternalStatus(CanonicalPath, OriginalPath); - } return Result.getError(); } ErrorOr S = status(CanonicalPath, OriginalPath, *Result); - if (!S && shouldFallBackToExternalFS(S.getError(), Result->E)) { + if (!S && Redirection == RedirectKind::Fallthrough && + isFileNotFound(S.getError(), Result->E)) { + // Mapped the file but it wasn't found in the underlying filesystem, + // fallthrough to using the original path if that was the specified + // redirection type. return getExternalStatus(CanonicalPath, OriginalPath); } @@ -2092,7 +2368,9 @@ public: ErrorOr> File::getWithPath(ErrorOr> Result, const Twine &P) { - if (!Result) + // See \c getRedirectedFileStatus - don't update path if it's exposing an + // external path. + if (!Result || (*Result)->status()->ExposesExternalVFSPath) return Result; ErrorOr> F = std::move(*Result); @@ -2110,13 +2388,24 @@ RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) { if (std::error_code EC = makeCanonical(CanonicalPath)) return EC; + if (Redirection == RedirectKind::Fallback) { + // Attempt to find the original file first, only falling back to the + // mapped file if that fails. + auto F = File::getWithPath(ExternalFS->openFileForRead(CanonicalPath), + OriginalPath); + if (F) + return F; + } + ErrorOr Result = lookupPath(CanonicalPath); if (!Result) { - if (shouldFallBackToExternalFS(Result.getError())) + // Was not able to map file, fallthrough to using the original path if + // that was the specified redirection type. + if (Redirection == RedirectKind::Fallthrough && + isFileNotFound(Result.getError())) return File::getWithPath(ExternalFS->openFileForRead(CanonicalPath), OriginalPath); - return Result.getError(); } @@ -2133,9 +2422,14 @@ RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) { auto ExternalFile = File::getWithPath( ExternalFS->openFileForRead(CanonicalRemappedPath), ExtRedirect); if (!ExternalFile) { - if (shouldFallBackToExternalFS(ExternalFile.getError(), Result->E)) + if (Redirection == RedirectKind::Fallthrough && + isFileNotFound(ExternalFile.getError(), Result->E)) { + // Mapped the file but it wasn't found in the underlying filesystem, + // fallthrough to using the original path if that was the specified + // redirection type. return File::getWithPath(ExternalFS->openFileForRead(CanonicalPath), OriginalPath); + } return ExternalFile; } @@ -2143,7 +2437,8 @@ RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) { if (!ExternalStatus) return ExternalStatus.getError(); - // FIXME: Update the status with the name and VFSMapped. + // Otherwise, the file was successfully remapped. Mark it as such. Also + // replace the underlying path if the external name is being used. Status S = getRedirectedFileStatus( OriginalPath, RE->useExternalName(UseExternalNames), *ExternalStatus); return std::unique_ptr( @@ -2151,18 +2446,30 @@ RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) { } std::error_code -RedirectingFileSystem::getRealPath(const Twine &Path_, +RedirectingFileSystem::getRealPath(const Twine &OriginalPath, SmallVectorImpl &Output) const { - SmallString<256> Path; - Path_.toVector(Path); + SmallString<256> CanonicalPath; + OriginalPath.toVector(CanonicalPath); - if (std::error_code EC = makeCanonical(Path)) + if (std::error_code EC = makeCanonical(CanonicalPath)) return EC; - ErrorOr Result = lookupPath(Path); + if (Redirection == RedirectKind::Fallback) { + // Attempt to find the original file first, only falling back to the + // mapped file if that fails. + std::error_code EC = ExternalFS->getRealPath(CanonicalPath, Output); + if (!EC) + return EC; + } + + ErrorOr Result = + lookupPath(CanonicalPath); if (!Result) { - if (shouldFallBackToExternalFS(Result.getError())) - return ExternalFS->getRealPath(Path, Output); + // Was not able to map file, fallthrough to using the original path if + // that was the specified redirection type. + if (Redirection == RedirectKind::Fallthrough && + isFileNotFound(Result.getError())) + return ExternalFS->getRealPath(CanonicalPath, Output); return Result.getError(); } @@ -2170,16 +2477,21 @@ RedirectingFileSystem::getRealPath(const Twine &Path_, // path in the external file system. if (auto ExtRedirect = Result->getExternalRedirect()) { auto P = ExternalFS->getRealPath(*ExtRedirect, Output); - if (!P && shouldFallBackToExternalFS(P, Result->E)) { - return ExternalFS->getRealPath(Path, Output); + if (P && Redirection == RedirectKind::Fallthrough && + isFileNotFound(P, Result->E)) { + // Mapped the file but it wasn't found in the underlying filesystem, + // fallthrough to using the original path if that was the specified + // redirection type. + return ExternalFS->getRealPath(CanonicalPath, Output); } return P; } - // If we found a DirectoryEntry, still fall back to ExternalFS if allowed, - // because directories don't have a single external contents path. - return shouldUseExternalFS() ? ExternalFS->getRealPath(Path, Output) - : llvm::errc::invalid_argument; + // If we found a DirectoryEntry, still fallthrough to the original path if + // allowed, because directories don't have a single external contents path. + if (Redirection == RedirectKind::Fallthrough) + return ExternalFS->getRealPath(CanonicalPath, Output); + return llvm::errc::invalid_argument; } std::unique_ptr @@ -2355,14 +2667,14 @@ void JSONWriter::write(ArrayRef Entries, OS << "{\n" " 'version': 0,\n"; - if (IsCaseSensitive.hasValue()) + if (IsCaseSensitive) OS << " 'case-sensitive': '" << (IsCaseSensitive.getValue() ? "true" : "false") << "',\n"; - if (UseExternalNames.hasValue()) + if (UseExternalNames) OS << " 'use-external-names': '" << (UseExternalNames.getValue() ? "true" : "false") << "',\n"; bool UseOverlayRelative = false; - if (IsOverlayRelative.hasValue()) { + if (IsOverlayRelative) { UseOverlayRelative = IsOverlayRelative.getValue(); OS << " 'overlay-relative': '" << (UseOverlayRelative ? "true" : "false") << "',\n"; diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index 5f1a364ea1a8..433c62900a3f 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -130,7 +130,7 @@ namespace fs { const file_t kInvalidFile = INVALID_HANDLE_VALUE; -std::string getMainExecutable(const char *argv0, void *MainExecAddr) { +std::string getMainExecutableImpl(const char *argv0, void *MainExecAddr) { SmallVector PathName; PathName.resize_for_overwrite(PathName.capacity()); DWORD Size = ::GetModuleFileNameW(NULL, PathName.data(), PathName.size()); diff --git a/llvm/lib/Support/Windows/Process.inc b/llvm/lib/Support/Windows/Process.inc index dfaab1613de1..b0c55a77bc93 100644 --- a/llvm/lib/Support/Windows/Process.inc +++ b/llvm/lib/Support/Windows/Process.inc @@ -156,9 +156,10 @@ static std::error_code WildcardExpand(StringRef Arg, // Don't expand Arg if it does not contain any wildcard characters. This is // the common case. Also don't wildcard expand /?. Always treat it as an - // option. + // option. Paths that start with \\?\ are absolute paths, and aren't + // expected to be used with wildcard expressions. if (Arg.find_first_of("*?") == StringRef::npos || Arg == "/?" || - Arg == "-?") { + Arg == "-?" || Arg.startswith("\\\\?\\")) { Args.push_back(Arg.data()); return EC; } @@ -247,7 +248,7 @@ windows::GetCommandLineArguments(SmallVectorImpl &Args, SmallVector TmpArgs; StringSaver Saver(Alloc); - cl::TokenizeWindowsCommandLine(Cmd, Saver, TmpArgs, /*MarkEOLs=*/false); + cl::TokenizeWindowsCommandLineFull(Cmd, Saver, TmpArgs, /*MarkEOLs=*/false); for (const char *Arg : TmpArgs) { EC = WildcardExpand(Arg, Args, Saver); @@ -255,6 +256,9 @@ windows::GetCommandLineArguments(SmallVectorImpl &Args, return EC; } + if (Args.size() == 0) + return std::make_error_code(std::errc::invalid_argument); + SmallVector Arg0(Args[0], Args[0] + strlen(Args[0])); SmallVector Filename; sys::path::remove_filename(Arg0); diff --git a/llvm/lib/Support/Windows/Program.inc b/llvm/lib/Support/Windows/Program.inc index ee633411584f..58de140a60d1 100644 --- a/llvm/lib/Support/Windows/Program.inc +++ b/llvm/lib/Support/Windows/Program.inc @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Errc.h" @@ -18,12 +19,12 @@ #include "llvm/Support/Windows/WindowsSupport.h" #include "llvm/Support/WindowsError.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include #include #include +#include //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only Win32 specific code diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc index 32186bbe5160..32477de5184b 100644 --- a/llvm/lib/Support/Windows/Signals.inc +++ b/llvm/lib/Support/Windows/Signals.inc @@ -159,6 +159,10 @@ static fpSymInitialize fSymInitialize; typedef BOOL (WINAPI *fpEnumerateLoadedModules)(HANDLE,PENUMLOADED_MODULES_CALLBACK64,PVOID); static fpEnumerateLoadedModules fEnumerateLoadedModules; +static bool isDebugHelpInitialized() { + return fStackWalk64 && fSymInitialize && fSymSetOptions && fMiniDumpWriteDump; +} + static bool load64BitDebugHelp(void) { HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll"); if (hLib) { @@ -181,7 +185,7 @@ static bool load64BitDebugHelp(void) { fEnumerateLoadedModules = (fpEnumerateLoadedModules) ::GetProcAddress(hLib, "EnumerateLoadedModules64"); } - return fStackWalk64 && fSymInitialize && fSymSetOptions && fMiniDumpWriteDump; + return isDebugHelpInitialized(); } using namespace llvm; @@ -296,6 +300,12 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth, static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess, HANDLE hThread, STACKFRAME64 &StackFrame, CONTEXT *Context) { + // It's possible that DbgHelp.dll hasn't been loaded yet (e.g. if this + // function is called before the main program called `llvm::InitLLVM`). + // In this case just return, not stacktrace will be printed. + if (!isDebugHelpInitialized()) + return; + // Initialize the symbol handler. fSymSetOptions(SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES); fSymInitialize(hProcess, NULL, TRUE); @@ -327,24 +337,24 @@ static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess, OS << format("0x%08lX", static_cast(PC)); #endif -// Print the parameters. Assume there are four. -#if defined(_M_X64) || defined(_M_ARM64) - OS << format(" (0x%016llX 0x%016llX 0x%016llX 0x%016llX)", - StackFrame.Params[0], StackFrame.Params[1], StackFrame.Params[2], - StackFrame.Params[3]); -#elif defined(_M_IX86) || defined(_M_ARM) - OS << format(" (0x%08lX 0x%08lX 0x%08lX 0x%08lX)", - static_cast(StackFrame.Params[0]), - static_cast(StackFrame.Params[1]), - static_cast(StackFrame.Params[2]), - static_cast(StackFrame.Params[3])); -#endif // Verify the PC belongs to a module in this process. if (!fSymGetModuleBase64(hProcess, PC)) { OS << " \n"; continue; } + IMAGEHLP_MODULE64 M; + memset(&M, 0, sizeof(IMAGEHLP_MODULE64)); + M.SizeOfStruct = sizeof(IMAGEHLP_MODULE64); + if (fSymGetModuleInfo64(hProcess, fSymGetModuleBase64(hProcess, PC), &M)) { + DWORD64 const disp = PC - M.BaseOfImage; + OS << format(", %s(0x%016llX) + 0x%llX byte(s)", + static_cast(M.ImageName), M.BaseOfImage, + static_cast(disp)); + } else { + OS << ", "; + } + // Print the symbol name. char buffer[512]; IMAGEHLP_SYMBOL64 *symbol = reinterpret_cast(buffer); @@ -359,20 +369,16 @@ static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess, } buffer[511] = 0; - if (dwDisp > 0) - OS << format(", %s() + 0x%llX bytes(s)", (const char*)symbol->Name, - dwDisp); - else - OS << format(", %s", (const char*)symbol->Name); + OS << format(", %s() + 0x%llX byte(s)", static_cast(symbol->Name), + static_cast(dwDisp)); // Print the source file and line number information. IMAGEHLP_LINE64 line = {}; DWORD dwLineDisp; line.SizeOfStruct = sizeof(line); if (fSymGetLineFromAddr64(hProcess, PC, &dwLineDisp, &line)) { - OS << format(", %s, line %lu", line.FileName, line.LineNumber); - if (dwLineDisp > 0) - OS << format(" + 0x%lX byte(s)", dwLineDisp); + OS << format(", %s, line %lu + 0x%lX byte(s)", line.FileName, + line.LineNumber, dwLineDisp); } OS << '\n'; @@ -811,6 +817,12 @@ void sys::CleanupOnSignal(uintptr_t Context) { static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { Cleanup(true); + // Write out the exception code. + if (ep && ep->ExceptionRecord) + llvm::errs() << format("Exception Code: 0x%08X", + ep->ExceptionRecord->ExceptionCode) + << "\n"; + // We'll automatically write a Minidump file here to help diagnose // the nasty sorts of crashes that aren't 100% reproducible from a set of // inputs (or in the event that the user is unable or unwilling to provide a diff --git a/llvm/lib/Support/Windows/Threading.inc b/llvm/lib/Support/Windows/Threading.inc index 7b48ca8fb1fb..11f34817dbbf 100644 --- a/llvm/lib/Support/Windows/Threading.inc +++ b/llvm/lib/Support/Windows/Threading.inc @@ -27,8 +27,8 @@ namespace llvm { HANDLE llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg, llvm::Optional StackSizeInBytes) { - HANDLE hThread = (HANDLE)::_beginthreadex( - NULL, StackSizeInBytes.getValueOr(0), ThreadFunc, Arg, 0, NULL); + HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0), + ThreadFunc, Arg, 0, NULL); if (!hThread) { ReportLastErrorFatal("_beginthreadex failed"); @@ -120,8 +120,10 @@ SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { // End background processing mode. The system restores the resource scheduling // priorities of the thread as they were before the thread entered background // processing mode. + // + // FIXME: consider THREAD_PRIORITY_BELOW_NORMAL for Low return SetThreadPriority(GetCurrentThread(), - Priority == ThreadPriority::Background + Priority != ThreadPriority::Default ? THREAD_MODE_BACKGROUND_BEGIN : THREAD_MODE_BACKGROUND_END) ? SetThreadPriorityResult::SUCCESS diff --git a/llvm/lib/Support/WithColor.cpp b/llvm/lib/Support/WithColor.cpp index b1aa709862d8..abc9fb3e5d60 100644 --- a/llvm/lib/Support/WithColor.cpp +++ b/llvm/lib/Support/WithColor.cpp @@ -33,6 +33,14 @@ struct CreateUseColor { static ManagedStatic, CreateUseColor> UseColor; void llvm::initWithColorOptions() { *UseColor; } +static bool DefaultAutoDetectFunction(const raw_ostream &OS) { + return *UseColor == cl::BOU_UNSET ? OS.has_colors() + : *UseColor == cl::BOU_TRUE; +} + +WithColor::AutoDetectFunctionType WithColor::AutoDetectFunction = + DefaultAutoDetectFunction; + WithColor::WithColor(raw_ostream &OS, HighlightColor Color, ColorMode Mode) : OS(OS), Mode(Mode) { // Detect color from terminal type unless the user passed the --color option. @@ -127,8 +135,7 @@ bool WithColor::colorsEnabled() { case ColorMode::Disable: return false; case ColorMode::Auto: - return *UseColor == cl::BOU_UNSET ? OS.has_colors() - : *UseColor == cl::BOU_TRUE; + return AutoDetectFunction(OS); } llvm_unreachable("All cases handled above."); } @@ -159,3 +166,12 @@ void WithColor::defaultWarningHandler(Error Warning) { WithColor::warning() << Info.message() << '\n'; }); } + +WithColor::AutoDetectFunctionType WithColor::defaultAutoDetectFunction() { + return DefaultAutoDetectFunction; +} + +void WithColor::setAutoDetectFunction( + AutoDetectFunctionType NewAutoDetectFunction) { + AutoDetectFunction = NewAutoDetectFunction; +} diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp index 200261d3ed5c..578ce228079b 100644 --- a/llvm/lib/Support/YAMLParser.cpp +++ b/llvm/lib/Support/YAMLParser.cpp @@ -392,6 +392,9 @@ private: /// Pos is whitespace or a new line bool isBlankOrBreak(StringRef::iterator Position); + /// Return true if the line is a line break, false otherwise. + bool isLineEmpty(StringRef Line); + /// Consume a single b-break[28] if it's present at the current position. /// /// Return false if the code unit at the current position isn't a line break. @@ -470,6 +473,18 @@ private: /// Scan a block scalar starting with | or >. bool scanBlockScalar(bool IsLiteral); + /// Scan a block scalar style indicator and header. + /// + /// Note: This is distinct from scanBlockScalarHeader to mirror the fact that + /// YAML does not consider the style indicator to be a part of the header. + /// + /// Return false if an error occurred. + bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator, + unsigned &IndentIndicator, bool &IsDone); + + /// Scan a style indicator in a block scalar header. + char scanBlockStyleIndicator(); + /// Scan a chomping indicator in a block scalar header. char scanBlockChompingIndicator(); @@ -1034,6 +1049,13 @@ bool Scanner::isBlankOrBreak(StringRef::iterator Position) { *Position == '\n'; } +bool Scanner::isLineEmpty(StringRef Line) { + for (const auto *Position = Line.begin(); Position != Line.end(); ++Position) + if (!isBlankOrBreak(Position)) + return false; + return true; +} + bool Scanner::consumeLineBreakIfPresent() { auto Next = skip_b_break(Current); if (Next == Current) @@ -1516,6 +1538,25 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) { return true; } +bool Scanner::scanBlockScalarIndicators(char &StyleIndicator, + char &ChompingIndicator, + unsigned &IndentIndicator, + bool &IsDone) { + StyleIndicator = scanBlockStyleIndicator(); + if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone)) + return false; + return true; +} + +char Scanner::scanBlockStyleIndicator() { + char Indicator = ' '; + if (Current != End && (*Current == '>' || *Current == '|')) { + Indicator = *Current; + skip(1); + } + return Indicator; +} + char Scanner::scanBlockChompingIndicator() { char Indicator = ' '; if (Current != End && (*Current == '+' || *Current == '-')) { @@ -1654,19 +1695,19 @@ bool Scanner::scanBlockScalarIndent(unsigned BlockIndent, } bool Scanner::scanBlockScalar(bool IsLiteral) { - // Eat '|' or '>' assert(*Current == '|' || *Current == '>'); - skip(1); - + char StyleIndicator; char ChompingIndicator; unsigned BlockIndent; bool IsDone = false; - if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone)) + if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent, + IsDone)) return false; if (IsDone) return true; + bool IsFolded = StyleIndicator == '>'; - auto Start = Current; + const auto *Start = Current; unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent; unsigned LineBreaks = 0; if (BlockIndent == 0) { @@ -1687,6 +1728,22 @@ bool Scanner::scanBlockScalar(bool IsLiteral) { auto LineStart = Current; advanceWhile(&Scanner::skip_nb_char); if (LineStart != Current) { + if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) { + // The folded style "folds" any single line break between content into a + // single space, except when that content is "empty" (only contains + // whitespace) in which case the line break is left as-is. + if (LineBreaks == 1) { + Str.append(LineBreaks, + isLineEmpty(StringRef(LineStart, Current - LineStart)) + ? '\n' + : ' '); + } + // If we saw a single line break, we are completely replacing it and so + // want `LineBreaks == 0`. Otherwise this decrement accounts for the + // fact that the first line break is "trimmed", only being used to + // signal a sequence of line breaks which should not be folded. + LineBreaks--; + } Str.append(LineBreaks, '\n'); Str.append(StringRef(LineStart, Current - LineStart)); LineBreaks = 0; @@ -1840,11 +1897,11 @@ bool Scanner::fetchMoreTokens() { Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors, std::error_code *EC) - : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {} + : scanner(new Scanner(Input, SM, ShowColors, EC)) {} Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors, std::error_code *EC) - : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {} + : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)) {} Stream::~Stream() = default; diff --git a/llvm/lib/Support/Z3Solver.cpp b/llvm/lib/Support/Z3Solver.cpp index 9485536d1312..b49d8d2afbb3 100644 --- a/llvm/lib/Support/Z3Solver.cpp +++ b/llvm/lib/Support/Z3Solver.cpp @@ -6,16 +6,18 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" #include "llvm/Support/SMTAPI.h" -#include using namespace llvm; #if LLVM_WITH_Z3 +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" + +#include + #include namespace { diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp index 69d4fe96bee8..98ceea3c3c7a 100644 --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -408,7 +408,7 @@ raw_ostream &raw_ostream::operator<<(const FormattedBytes &FB) { const size_t Size = Bytes.size(); HexPrintStyle HPS = FB.Upper ? HexPrintStyle::Upper : HexPrintStyle::Lower; uint64_t OffsetWidth = 0; - if (FB.FirstByteOffset.hasValue()) { + if (FB.FirstByteOffset) { // Figure out how many nibbles are needed to print the largest offset // represented by this data set, so that we can align the offset field // to the right width. @@ -428,7 +428,7 @@ raw_ostream &raw_ostream::operator<<(const FormattedBytes &FB) { while (!Bytes.empty()) { indent(FB.IndentLevel); - if (FB.FirstByteOffset.hasValue()) { + if (FB.FirstByteOffset) { uint64_t Offset = FB.FirstByteOffset.getValue(); llvm::write_hex(*this, Offset + LineIndex, HPS, OffsetWidth); *this << ": "; diff --git a/llvm/lib/Support/regcomp.c b/llvm/lib/Support/regcomp.c index ee2a1d87a267..24d01121820b 100644 --- a/llvm/lib/Support/regcomp.c +++ b/llvm/lib/Support/regcomp.c @@ -249,10 +249,10 @@ static char nuls[10]; /* place to point scanner in event of error */ */ #define PEEK() (*p->next) #define PEEK2() (*(p->next+1)) -#define MORE() (p->next < p->end) -#define MORE2() (p->next+1 < p->end) +#define MORE() (p->end - p->next > 0) +#define MORE2() (p->end - p->next > 1) #define SEE(c) (MORE() && PEEK() == (c)) -#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) +#define SEETWO(a, b) (MORE2() && PEEK() == (a) && PEEK2() == (b)) #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) #define NEXT() (p->next++) @@ -800,15 +800,17 @@ p_bracket(struct parse *p) int invert = 0; /* Dept of Truly Sickening Special-Case Kludges */ - if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { - EMIT(OBOW, 0); - NEXTn(6); - return; - } - if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { - EMIT(OEOW, 0); - NEXTn(6); - return; + if (p->end - p->next > 5) { + if (strncmp(p->next, "[:<:]]", 6) == 0) { + EMIT(OBOW, 0); + NEXTn(6); + return; + } + if (strncmp(p->next, "[:>:]]", 6) == 0) { + EMIT(OEOW, 0); + NEXTn(6); + return; + } } if ((cs = allocset(p)) == NULL) { diff --git a/llvm/lib/Support/regengine.inc b/llvm/lib/Support/regengine.inc index 41787aff1242..02680e23ddb8 100644 --- a/llvm/lib/Support/regengine.inc +++ b/llvm/lib/Support/regengine.inc @@ -53,6 +53,7 @@ #define at sat #define match smat #define nope snope +#define step_back sstep_back #endif #ifdef LNAMES #define matcher lmatcher @@ -65,6 +66,7 @@ #define at lat #define match lmat #define nope lnope +#define step_back lstep_back #endif /* another structure passed up and down to avoid zillions of parameters */ @@ -288,6 +290,38 @@ matcher(struct re_guts *g, const char *string, size_t nmatch, return(0); } +/* Step back from "stop" to a position where the strip startst..stopst might + * match. This can always conservatively return "stop - 1", but may return an + * earlier position if matches at later positions are impossible. */ +static const char * +step_back(struct re_guts *g, const char *start, const char *stop, sopno startst, + sopno stopst) +{ + /* Always step back at least one character. */ + assert(stop > start); + const char *res = stop - 1; + + /* Check whether the strip startst..stropst starts with a fixed character, + * ignoring any closing parentheses. If not, return a conservative result. */ + for (;;) { + if (startst >= stopst) + return res; + if (OP(g->strip[startst]) != ORPAREN) + break; + startst++; + } + if (OP(g->strip[startst]) != OCHAR) + return res; + + /* Find the character that starts the following match. */ + char ch = OPND(g->strip[startst]); + for (; res != start; --res) { + if (*res == ch) + break; + } + return res; +} + /* - dissect - figure out what matched what, no back references */ @@ -358,7 +392,7 @@ dissect(struct match *m, const char *start, const char *stop, sopno startst, if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ - stp = rest - 1; + stp = step_back(m->g, sp, rest, es, stopst); assert(stp >= sp); /* it did work */ } ssub = ss + 1; @@ -383,7 +417,7 @@ dissect(struct match *m, const char *start, const char *stop, sopno startst, if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ - stp = rest - 1; + stp = step_back(m->g, sp, rest, es, stopst); assert(stp >= sp); /* it did work */ } ssub = ss + 1; @@ -1032,3 +1066,4 @@ pchar(int ch) #undef at #undef match #undef nope +#undef step_back diff --git a/llvm/lib/Support/xxhash.cpp b/llvm/lib/Support/xxhash.cpp index e9dceed2c4ae..9a3f5faa336b 100644 --- a/llvm/lib/Support/xxhash.cpp +++ b/llvm/lib/Support/xxhash.cpp @@ -39,7 +39,6 @@ #include "llvm/Support/Endian.h" #include -#include using namespace llvm; using namespace support; diff --git a/llvm/lib/TableGen/Error.cpp b/llvm/lib/TableGen/Error.cpp index 6104573b4b25..ebe9129ebaeb 100644 --- a/llvm/lib/TableGen/Error.cpp +++ b/llvm/lib/TableGen/Error.cpp @@ -157,8 +157,8 @@ void PrintFatalError(const RecordVal *RecVal, const Twine &Msg) { // Check an assertion: Obtain the condition value and be sure it is true. // If not, print a nonfatal error along with the message. void CheckAssert(SMLoc Loc, Init *Condition, Init *Message) { - auto *CondValue = dyn_cast_or_null( - Condition->convertInitializerTo(IntRecTy::get())); + auto *CondValue = dyn_cast_or_null(Condition->convertInitializerTo( + IntRecTy::get(Condition->getRecordKeeper()))); if (!CondValue) PrintError(Loc, "assert condition must of type bit, bits, or int."); else if (!CondValue->getValue()) { diff --git a/llvm/lib/TableGen/Parser.cpp b/llvm/lib/TableGen/Parser.cpp new file mode 100644 index 000000000000..818ded19432b --- /dev/null +++ b/llvm/lib/TableGen/Parser.cpp @@ -0,0 +1,39 @@ +//===- Parser.cpp - Top-Level TableGen Parser implementation --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/TableGen/Parser.h" +#include "TGParser.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/TableGen/Record.h" + +using namespace llvm; + +bool llvm::TableGenParseFile(SourceMgr &InputSrcMgr, RecordKeeper &Records) { + // Initialize the global TableGen source manager by temporarily taking control + // of the input buffer in `SrcMgr`. This is kind of a hack, but allows for + // preserving TableGen's current awkward diagnostic behavior. If we can remove + // this reliance, we could drop all of this. + SrcMgr = SourceMgr(); + SrcMgr.takeSourceBuffersFrom(InputSrcMgr); + SrcMgr.setIncludeDirs(InputSrcMgr.getIncludeDirs()); + SrcMgr.setDiagHandler(InputSrcMgr.getDiagHandler(), + InputSrcMgr.getDiagContext()); + + // Setup the record keeper and try to parse the file. + auto *MainFileBuffer = SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID()); + Records.saveInputFilename(MainFileBuffer->getBufferIdentifier().str()); + + TGParser Parser(SrcMgr, /*Macros=*/None, Records); + bool ParseResult = Parser.ParseFile(); + + // After parsing, reclaim the source manager buffers from TableGen's global + // manager. + InputSrcMgr.takeSourceBuffersFrom(SrcMgr); + SrcMgr = SourceMgr(); + return ParseResult; +} diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 58d8c9936896..6c205104d569 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -24,7 +24,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" @@ -46,14 +45,17 @@ using namespace llvm; namespace llvm { namespace detail { -/// This class contains all of the contextual static state of the Record -/// classes. This allows for better lifetime management and control of the used -/// static data. -struct RecordContext { - RecordContext() - : AnyRecord(0), TrueBitInit(true, &SharedBitRecTy), +/// This class represents the internal implementation of the RecordKeeper. +/// It contains all of the contextual static state of the Record classes. It is +/// kept out-of-line to simplify dependencies, and also make it easier for +/// internal classes to access the uniquer state of the keeper. +struct RecordKeeperImpl { + RecordKeeperImpl(RecordKeeper &RK) + : SharedBitRecTy(RK), SharedIntRecTy(RK), SharedStringRecTy(RK), + SharedDagRecTy(RK), AnyRecord(RK, 0), TheUnsetInit(RK), + TrueBitInit(true, &SharedBitRecTy), FalseBitInit(false, &SharedBitRecTy), StringInitStringPool(Allocator), - StringInitCodePool(Allocator), LastRecordID(0) {} + StringInitCodePool(Allocator), AnonCounter(0), LastRecordID(0) {} BumpPtrAllocator Allocator; std::vector SharedBitsRecTys; @@ -77,6 +79,7 @@ struct RecordContext { FoldingSet TheTernOpInitPool; FoldingSet TheFoldOpInitPool; FoldingSet TheIsAOpInitPool; + FoldingSet TheExistsOpInitPool; DenseMap, VarInit *> TheVarInitPool; DenseMap, VarBitInit *> TheVarBitInitPool; DenseMap, VarListElementInit *> @@ -85,14 +88,14 @@ struct RecordContext { DenseMap, FieldInit *> TheFieldInitPool; FoldingSet TheCondOpInitPool; FoldingSet TheDagInitPool; + FoldingSet RecordTypePool; + unsigned AnonCounter; unsigned LastRecordID; }; } // namespace detail } // namespace llvm -ManagedStatic Context; - //===----------------------------------------------------------------------===// // Type implementations //===----------------------------------------------------------------------===// @@ -103,7 +106,7 @@ LLVM_DUMP_METHOD void RecTy::dump() const { print(errs()); } ListRecTy *RecTy::getListTy() { if (!ListTy) - ListTy = new(Context->Allocator) ListRecTy(this); + ListTy = new (RK.getImpl().Allocator) ListRecTy(this); return ListTy; } @@ -114,7 +117,9 @@ bool RecTy::typeIsConvertibleTo(const RecTy *RHS) const { bool RecTy::typeIsA(const RecTy *RHS) const { return this == RHS; } -BitRecTy *BitRecTy::get() { return &Context->SharedBitRecTy; } +BitRecTy *BitRecTy::get(RecordKeeper &RK) { + return &RK.getImpl().SharedBitRecTy; +} bool BitRecTy::typeIsConvertibleTo(const RecTy *RHS) const{ if (RecTy::typeIsConvertibleTo(RHS) || RHS->getRecTyKind() == IntRecTyKind) @@ -124,12 +129,13 @@ bool BitRecTy::typeIsConvertibleTo(const RecTy *RHS) const{ return false; } -BitsRecTy *BitsRecTy::get(unsigned Sz) { - if (Sz >= Context->SharedBitsRecTys.size()) - Context->SharedBitsRecTys.resize(Sz + 1); - BitsRecTy *&Ty = Context->SharedBitsRecTys[Sz]; +BitsRecTy *BitsRecTy::get(RecordKeeper &RK, unsigned Sz) { + detail::RecordKeeperImpl &RKImpl = RK.getImpl(); + if (Sz >= RKImpl.SharedBitsRecTys.size()) + RKImpl.SharedBitsRecTys.resize(Sz + 1); + BitsRecTy *&Ty = RKImpl.SharedBitsRecTys[Sz]; if (!Ty) - Ty = new (Context->Allocator) BitsRecTy(Sz); + Ty = new (RKImpl.Allocator) BitsRecTy(RK, Sz); return Ty; } @@ -150,14 +156,18 @@ bool BitsRecTy::typeIsA(const RecTy *RHS) const { return false; } -IntRecTy *IntRecTy::get() { return &Context->SharedIntRecTy; } +IntRecTy *IntRecTy::get(RecordKeeper &RK) { + return &RK.getImpl().SharedIntRecTy; +} bool IntRecTy::typeIsConvertibleTo(const RecTy *RHS) const { RecTyKind kind = RHS->getRecTyKind(); return kind==BitRecTyKind || kind==BitsRecTyKind || kind==IntRecTyKind; } -StringRecTy *StringRecTy::get() { return &Context->SharedStringRecTy; } +StringRecTy *StringRecTy::get(RecordKeeper &RK) { + return &RK.getImpl().SharedStringRecTy; +} std::string StringRecTy::getAsString() const { return "string"; @@ -184,7 +194,9 @@ bool ListRecTy::typeIsA(const RecTy *RHS) const { return false; } -DagRecTy *DagRecTy::get() { return &Context->SharedDagRecTy; } +DagRecTy *DagRecTy::get(RecordKeeper &RK) { + return &RK.getImpl().SharedDagRecTy; +} std::string DagRecTy::getAsString() const { return "dag"; @@ -197,12 +209,13 @@ static void ProfileRecordRecTy(FoldingSetNodeID &ID, ID.AddPointer(R); } -RecordRecTy *RecordRecTy::get(ArrayRef UnsortedClasses) { +RecordRecTy *RecordRecTy::get(RecordKeeper &RK, + ArrayRef UnsortedClasses) { + detail::RecordKeeperImpl &RKImpl = RK.getImpl(); if (UnsortedClasses.empty()) - return &Context->AnyRecord; + return &RKImpl.AnyRecord; - FoldingSet &ThePool = - UnsortedClasses[0]->getRecords().RecordTypePool; + FoldingSet &ThePool = RKImpl.RecordTypePool; SmallVector Classes(UnsortedClasses.begin(), UnsortedClasses.end()); @@ -227,14 +240,18 @@ RecordRecTy *RecordRecTy::get(ArrayRef UnsortedClasses) { } #endif - void *Mem = Context->Allocator.Allocate( + void *Mem = RKImpl.Allocator.Allocate( totalSizeToAlloc(Classes.size()), alignof(RecordRecTy)); - RecordRecTy *Ty = new(Mem) RecordRecTy(Classes.size()); + RecordRecTy *Ty = new (Mem) RecordRecTy(RK, Classes.size()); std::uninitialized_copy(Classes.begin(), Classes.end(), Ty->getTrailingObjects()); ThePool.InsertNode(Ty, IP); return Ty; } +RecordRecTy *RecordRecTy::get(Record *Class) { + assert(Class && "unexpected null class"); + return get(Class->getRecords(), Class); +} void RecordRecTy::Profile(FoldingSetNodeID &ID) const { ProfileRecordRecTy(ID, getClasses()); @@ -294,7 +311,7 @@ static RecordRecTy *resolveRecordTypes(RecordRecTy *T1, RecordRecTy *T2) { } } - return RecordRecTy::get(CommonSuperClasses); + return RecordRecTy::get(T1->getRecordKeeper(), CommonSuperClasses); } RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) { @@ -333,7 +350,15 @@ void Init::anchor() {} LLVM_DUMP_METHOD void Init::dump() const { return print(errs()); } #endif -UnsetInit *UnsetInit::get() { return &Context->TheUnsetInit; } +RecordKeeper &Init::getRecordKeeper() const { + if (auto *TyInit = dyn_cast(this)) + return TyInit->getType()->getRecordKeeper(); + return cast(this)->getRecordKeeper(); +} + +UnsetInit *UnsetInit::get(RecordKeeper &RK) { + return &RK.getImpl().TheUnsetInit; +} Init *UnsetInit::getCastTo(RecTy *Ty) const { return const_cast(this); @@ -343,8 +368,8 @@ Init *UnsetInit::convertInitializerTo(RecTy *Ty) const { return const_cast(this); } -BitInit *BitInit::get(bool V) { - return V ? &Context->TrueBitInit : &Context->FalseBitInit; +BitInit *BitInit::get(RecordKeeper &RK, bool V) { + return V ? &RK.getImpl().TrueBitInit : &RK.getImpl().FalseBitInit; } Init *BitInit::convertInitializerTo(RecTy *Ty) const { @@ -352,12 +377,12 @@ Init *BitInit::convertInitializerTo(RecTy *Ty) const { return const_cast(this); if (isa(Ty)) - return IntInit::get(getValue()); + return IntInit::get(getRecordKeeper(), getValue()); if (auto *BRT = dyn_cast(Ty)) { // Can only convert single bit. if (BRT->getNumBits() == 1) - return BitsInit::get(const_cast(this)); + return BitsInit::get(getRecordKeeper(), const_cast(this)); } return nullptr; @@ -371,20 +396,21 @@ ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef Range) { ID.AddPointer(I); } -BitsInit *BitsInit::get(ArrayRef Range) { +BitsInit *BitsInit::get(RecordKeeper &RK, ArrayRef Range) { FoldingSetNodeID ID; ProfileBitsInit(ID, Range); + detail::RecordKeeperImpl &RKImpl = RK.getImpl(); void *IP = nullptr; - if (BitsInit *I = Context->TheBitsInitPool.FindNodeOrInsertPos(ID, IP)) + if (BitsInit *I = RKImpl.TheBitsInitPool.FindNodeOrInsertPos(ID, IP)) return I; - void *Mem = Context->Allocator.Allocate( - totalSizeToAlloc(Range.size()), alignof(BitsInit)); - BitsInit *I = new(Mem) BitsInit(Range.size()); + void *Mem = RKImpl.Allocator.Allocate(totalSizeToAlloc(Range.size()), + alignof(BitsInit)); + BitsInit *I = new (Mem) BitsInit(RK, Range.size()); std::uninitialized_copy(Range.begin(), Range.end(), I->getTrailingObjects()); - Context->TheBitsInitPool.InsertNode(I, IP); + RKImpl.TheBitsInitPool.InsertNode(I, IP); return I; } @@ -412,7 +438,7 @@ Init *BitsInit::convertInitializerTo(RecTy *Ty) const { Result |= static_cast(Bit->getValue()) << i; else return nullptr; - return IntInit::get(Result); + return IntInit::get(getRecordKeeper(), Result); } return nullptr; @@ -427,7 +453,7 @@ BitsInit::convertInitializerBitRange(ArrayRef Bits) const { return nullptr; NewBits[i] = getBit(Bits[i]); } - return BitsInit::get(NewBits); + return BitsInit::get(getRecordKeeper(), NewBits); } bool BitsInit::isConcrete() const { @@ -482,15 +508,15 @@ Init *BitsInit::resolveReferences(Resolver &R) const { } if (Changed) - return BitsInit::get(NewBits); + return BitsInit::get(getRecordKeeper(), NewBits); return const_cast(this); } -IntInit *IntInit::get(int64_t V) { - IntInit *&I = Context->TheIntInitPool[V]; +IntInit *IntInit::get(RecordKeeper &RK, int64_t V) { + IntInit *&I = RK.getImpl().TheIntInitPool[V]; if (!I) - I = new (Context->Allocator) IntInit(V); + I = new (RK.getImpl().Allocator) IntInit(RK, V); return I; } @@ -511,7 +537,7 @@ Init *IntInit::convertInitializerTo(RecTy *Ty) const { if (isa(Ty)) { int64_t Val = getValue(); if (Val != 0 && Val != 1) return nullptr; // Only accept 0 or 1 for a bit! - return BitInit::get(Val != 0); + return BitInit::get(getRecordKeeper(), Val != 0); } if (auto *BRT = dyn_cast(Ty)) { @@ -522,9 +548,10 @@ Init *IntInit::convertInitializerTo(RecTy *Ty) const { SmallVector NewBits(BRT->getNumBits()); for (unsigned i = 0; i != BRT->getNumBits(); ++i) - NewBits[i] = BitInit::get(Value & ((i < 64) ? (1LL << i) : 0)); + NewBits[i] = + BitInit::get(getRecordKeeper(), Value & ((i < 64) ? (1LL << i) : 0)); - return BitsInit::get(NewBits); + return BitsInit::get(getRecordKeeper(), NewBits); } return nullptr; @@ -538,17 +565,18 @@ IntInit::convertInitializerBitRange(ArrayRef Bits) const { if (Bits[i] >= 64) return nullptr; - NewBits[i] = BitInit::get(Value & (INT64_C(1) << Bits[i])); + NewBits[i] = + BitInit::get(getRecordKeeper(), Value & (INT64_C(1) << Bits[i])); } - return BitsInit::get(NewBits); + return BitsInit::get(getRecordKeeper(), NewBits); } -AnonymousNameInit *AnonymousNameInit::get(unsigned V) { - return new (Context->Allocator) AnonymousNameInit(V); +AnonymousNameInit *AnonymousNameInit::get(RecordKeeper &RK, unsigned V) { + return new (RK.getImpl().Allocator) AnonymousNameInit(RK, V); } StringInit *AnonymousNameInit::getNameInit() const { - return StringInit::get(getAsString()); + return StringInit::get(getRecordKeeper(), getAsString()); } std::string AnonymousNameInit::getAsString() const { @@ -565,12 +593,13 @@ Init *AnonymousNameInit::resolveReferences(Resolver &R) const { return New; } -StringInit *StringInit::get(StringRef V, StringFormat Fmt) { - auto &InitMap = Fmt == SF_String ? Context->StringInitStringPool - : Context->StringInitCodePool; +StringInit *StringInit::get(RecordKeeper &RK, StringRef V, StringFormat Fmt) { + detail::RecordKeeperImpl &RKImpl = RK.getImpl(); + auto &InitMap = Fmt == SF_String ? RKImpl.StringInitStringPool + : RKImpl.StringInitCodePool; auto &Entry = *InitMap.insert(std::make_pair(V, nullptr)).first; if (!Entry.second) - Entry.second = new (Context->Allocator) StringInit(Entry.getKey(), Fmt); + Entry.second = new (RKImpl.Allocator) StringInit(RK, Entry.getKey(), Fmt); return Entry.second; } @@ -595,19 +624,20 @@ ListInit *ListInit::get(ArrayRef Range, RecTy *EltTy) { FoldingSetNodeID ID; ProfileListInit(ID, Range, EltTy); + detail::RecordKeeperImpl &RK = EltTy->getRecordKeeper().getImpl(); void *IP = nullptr; - if (ListInit *I = Context->TheListInitPool.FindNodeOrInsertPos(ID, IP)) + if (ListInit *I = RK.TheListInitPool.FindNodeOrInsertPos(ID, IP)) return I; assert(Range.empty() || !isa(Range[0]) || cast(Range[0])->getType()->typeIsConvertibleTo(EltTy)); - void *Mem = Context->Allocator.Allocate( - totalSizeToAlloc(Range.size()), alignof(ListInit)); + void *Mem = RK.Allocator.Allocate(totalSizeToAlloc(Range.size()), + alignof(ListInit)); ListInit *I = new (Mem) ListInit(Range.size(), EltTy); std::uninitialized_copy(Range.begin(), Range.end(), I->getTrailingObjects()); - Context->TheListInitPool.InsertNode(I, IP); + RK.TheListInitPool.InsertNode(I, IP); return I; } @@ -714,7 +744,7 @@ std::string ListInit::getAsString() const { } Init *OpInit::getBit(unsigned Bit) const { - if (getType() == BitRecTy::get()) + if (getType() == BitRecTy::get(getRecordKeeper())) return const_cast(this); return VarBitInit::get(const_cast(this), Bit); } @@ -730,12 +760,13 @@ UnOpInit *UnOpInit::get(UnaryOp Opc, Init *LHS, RecTy *Type) { FoldingSetNodeID ID; ProfileUnOpInit(ID, Opc, LHS, Type); + detail::RecordKeeperImpl &RK = Type->getRecordKeeper().getImpl(); void *IP = nullptr; - if (UnOpInit *I = Context->TheUnOpInitPool.FindNodeOrInsertPos(ID, IP)) + if (UnOpInit *I = RK.TheUnOpInitPool.FindNodeOrInsertPos(ID, IP)) return I; - UnOpInit *I = new (Context->Allocator) UnOpInit(Opc, LHS, Type); - Context->TheUnOpInitPool.InsertNode(I, IP); + UnOpInit *I = new (RK.Allocator) UnOpInit(Opc, LHS, Type); + RK.TheUnOpInitPool.InsertNode(I, IP); return I; } @@ -744,6 +775,7 @@ void UnOpInit::Profile(FoldingSetNodeID &ID) const { } Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const { + RecordKeeper &RK = getRecordKeeper(); switch (getOpcode()) { case CAST: if (isa(getType())) { @@ -751,11 +783,11 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const { return LHSs; if (DefInit *LHSd = dyn_cast(LHS)) - return StringInit::get(LHSd->getAsString()); + return StringInit::get(RK, LHSd->getAsString()); - if (IntInit *LHSi = - dyn_cast_or_null(LHS->convertInitializerTo(IntRecTy::get()))) - return StringInit::get(LHSi->getAsString()); + if (IntInit *LHSi = dyn_cast_or_null( + LHS->convertInitializerTo(IntRecTy::get(RK)))) + return StringInit::get(RK, LHSi->getAsString()); } else if (isa(getType())) { if (StringInit *Name = dyn_cast(LHS)) { @@ -800,9 +832,9 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const { break; case NOT: - if (IntInit *LHSi = - dyn_cast_or_null(LHS->convertInitializerTo(IntRecTy::get()))) - return IntInit::get(LHSi->getValue() ? 0 : 1); + if (IntInit *LHSi = dyn_cast_or_null( + LHS->convertInitializerTo(IntRecTy::get(RK)))) + return IntInit::get(RK, LHSi->getValue() ? 0 : 1); break; case HEAD: @@ -823,20 +855,20 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const { case SIZE: if (ListInit *LHSl = dyn_cast(LHS)) - return IntInit::get(LHSl->size()); + return IntInit::get(RK, LHSl->size()); if (DagInit *LHSd = dyn_cast(LHS)) - return IntInit::get(LHSd->arg_size()); + return IntInit::get(RK, LHSd->arg_size()); if (StringInit *LHSs = dyn_cast(LHS)) - return IntInit::get(LHSs->getValue().size()); + return IntInit::get(RK, LHSs->getValue().size()); break; case EMPTY: if (ListInit *LHSl = dyn_cast(LHS)) - return IntInit::get(LHSl->empty()); + return IntInit::get(RK, LHSl->empty()); if (DagInit *LHSd = dyn_cast(LHS)) - return IntInit::get(LHSd->arg_empty()); + return IntInit::get(RK, LHSd->arg_empty()); if (StringInit *LHSs = dyn_cast(LHS)) - return IntInit::get(LHSs->getValue().empty()); + return IntInit::get(RK, LHSs->getValue().empty()); break; case GETDAGOP: @@ -893,12 +925,13 @@ BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS, Init *RHS, RecTy *Type) { FoldingSetNodeID ID; ProfileBinOpInit(ID, Opc, LHS, RHS, Type); + detail::RecordKeeperImpl &RK = LHS->getRecordKeeper().getImpl(); void *IP = nullptr; - if (BinOpInit *I = Context->TheBinOpInitPool.FindNodeOrInsertPos(ID, IP)) + if (BinOpInit *I = RK.TheBinOpInitPool.FindNodeOrInsertPos(ID, IP)) return I; - BinOpInit *I = new (Context->Allocator) BinOpInit(Opc, LHS, RHS, Type); - Context->TheBinOpInitPool.InsertNode(I, IP); + BinOpInit *I = new (RK.Allocator) BinOpInit(Opc, LHS, RHS, Type); + RK.TheBinOpInitPool.InsertNode(I, IP); return I; } @@ -910,15 +943,15 @@ static StringInit *ConcatStringInits(const StringInit *I0, const StringInit *I1) { SmallString<80> Concat(I0->getValue()); Concat.append(I1->getValue()); - return StringInit::get(Concat, - StringInit::determineFormat(I0->getFormat(), - I1->getFormat())); + return StringInit::get( + I0->getRecordKeeper(), Concat, + StringInit::determineFormat(I0->getFormat(), I1->getFormat())); } static StringInit *interleaveStringList(const ListInit *List, const StringInit *Delim) { if (List->size() == 0) - return StringInit::get(""); + return StringInit::get(List->getRecordKeeper(), ""); StringInit *Element = dyn_cast(List->getElement(0)); if (!Element) return nullptr; @@ -933,30 +966,29 @@ static StringInit *interleaveStringList(const ListInit *List, Result.append(Element->getValue()); Fmt = StringInit::determineFormat(Fmt, Element->getFormat()); } - return StringInit::get(Result, Fmt); + return StringInit::get(List->getRecordKeeper(), Result, Fmt); } static StringInit *interleaveIntList(const ListInit *List, const StringInit *Delim) { + RecordKeeper &RK = List->getRecordKeeper(); if (List->size() == 0) - return StringInit::get(""); - IntInit *Element = - dyn_cast_or_null(List->getElement(0) - ->convertInitializerTo(IntRecTy::get())); + return StringInit::get(RK, ""); + IntInit *Element = dyn_cast_or_null( + List->getElement(0)->convertInitializerTo(IntRecTy::get(RK))); if (!Element) return nullptr; SmallString<80> Result(Element->getAsString()); for (unsigned I = 1, E = List->size(); I < E; ++I) { Result.append(Delim->getValue()); - IntInit *Element = - dyn_cast_or_null(List->getElement(I) - ->convertInitializerTo(IntRecTy::get())); + IntInit *Element = dyn_cast_or_null( + List->getElement(I)->convertInitializerTo(IntRecTy::get(RK))); if (!Element) return nullptr; Result.append(Element->getAsString()); } - return StringInit::get(Result); + return StringInit::get(RK, Result); } Init *BinOpInit::getStrConcat(Init *I0, Init *I1) { @@ -964,7 +996,8 @@ Init *BinOpInit::getStrConcat(Init *I0, Init *I1) { if (const StringInit *I0s = dyn_cast(I0)) if (const StringInit *I1s = dyn_cast(I1)) return ConcatStringInits(I0s, I1s); - return BinOpInit::get(BinOpInit::STRCONCAT, I0, I1, StringRecTy::get()); + return BinOpInit::get(BinOpInit::STRCONCAT, I0, I1, + StringRecTy::get(I0->getRecordKeeper())); } static ListInit *ConcatListInits(const ListInit *LHS, @@ -1003,7 +1036,7 @@ Init *BinOpInit::Fold(Record *CurRec) const { } Init *Op = LOp ? LOp : ROp; if (!Op) - Op = UnsetInit::get(); + Op = UnsetInit::get(getRecordKeeper()); SmallVector Args; SmallVector ArgNames; @@ -1067,10 +1100,10 @@ Init *BinOpInit::Fold(Record *CurRec) const { case GE: case GT: { // First see if we have two bit, bits, or int. - IntInit *LHSi = - dyn_cast_or_null(LHS->convertInitializerTo(IntRecTy::get())); - IntInit *RHSi = - dyn_cast_or_null(RHS->convertInitializerTo(IntRecTy::get())); + IntInit *LHSi = dyn_cast_or_null( + LHS->convertInitializerTo(IntRecTy::get(getRecordKeeper()))); + IntInit *RHSi = dyn_cast_or_null( + RHS->convertInitializerTo(IntRecTy::get(getRecordKeeper()))); if (LHSi && RHSi) { bool Result; @@ -1083,7 +1116,7 @@ Init *BinOpInit::Fold(Record *CurRec) const { case GT: Result = LHSi->getValue() > RHSi->getValue(); break; default: llvm_unreachable("unhandled comparison"); } - return BitInit::get(Result); + return BitInit::get(getRecordKeeper(), Result); } // Next try strings. @@ -1101,7 +1134,7 @@ Init *BinOpInit::Fold(Record *CurRec) const { case GT: Result = LHSs->getValue() > RHSs->getValue(); break; default: llvm_unreachable("unhandled comparison"); } - return BitInit::get(Result); + return BitInit::get(getRecordKeeper(), Result); } // Finally, !eq and !ne can be used with records. @@ -1109,8 +1142,8 @@ Init *BinOpInit::Fold(Record *CurRec) const { DefInit *LHSd = dyn_cast(LHS); DefInit *RHSd = dyn_cast(RHS); if (LHSd && RHSd) - return BitInit::get((getOpcode() == EQ) ? LHSd == RHSd - : LHSd != RHSd); + return BitInit::get(getRecordKeeper(), + (getOpcode() == EQ) ? LHSd == RHSd : LHSd != RHSd); } break; @@ -1138,10 +1171,10 @@ Init *BinOpInit::Fold(Record *CurRec) const { case SHL: case SRA: case SRL: { - IntInit *LHSi = - dyn_cast_or_null(LHS->convertInitializerTo(IntRecTy::get())); - IntInit *RHSi = - dyn_cast_or_null(RHS->convertInitializerTo(IntRecTy::get())); + IntInit *LHSi = dyn_cast_or_null( + LHS->convertInitializerTo(IntRecTy::get(getRecordKeeper()))); + IntInit *RHSi = dyn_cast_or_null( + RHS->convertInitializerTo(IntRecTy::get(getRecordKeeper()))); if (LHSi && RHSi) { int64_t LHSv = LHSi->getValue(), RHSv = RHSi->getValue(); int64_t Result; @@ -1157,7 +1190,7 @@ Init *BinOpInit::Fold(Record *CurRec) const { case SRA: Result = LHSv >> RHSv; break; case SRL: Result = (uint64_t)LHSv >> (uint64_t)RHSv; break; } - return IntInit::get(Result); + return IntInit::get(getRecordKeeper(), Result); } break; } @@ -1218,12 +1251,13 @@ TernOpInit *TernOpInit::get(TernaryOp Opc, Init *LHS, Init *MHS, Init *RHS, FoldingSetNodeID ID; ProfileTernOpInit(ID, Opc, LHS, MHS, RHS, Type); + detail::RecordKeeperImpl &RK = LHS->getRecordKeeper().getImpl(); void *IP = nullptr; - if (TernOpInit *I = Context->TheTernOpInitPool.FindNodeOrInsertPos(ID, IP)) + if (TernOpInit *I = RK.TheTernOpInitPool.FindNodeOrInsertPos(ID, IP)) return I; - TernOpInit *I = new (Context->Allocator) TernOpInit(Opc, LHS, MHS, RHS, Type); - Context->TheTernOpInitPool.InsertNode(I, IP); + TernOpInit *I = new (RK.Allocator) TernOpInit(Opc, LHS, MHS, RHS, Type); + RK.TheTernOpInitPool.InsertNode(I, IP); return I; } @@ -1296,8 +1330,9 @@ static Init *FilterHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, Init *Include = ItemApply(LHS, Item, RHS, CurRec); if (!Include) return nullptr; - if (IntInit *IncludeInt = dyn_cast_or_null( - Include->convertInitializerTo(IntRecTy::get()))) { + if (IntInit *IncludeInt = + dyn_cast_or_null(Include->convertInitializerTo( + IntRecTy::get(LHS->getRecordKeeper())))) { if (IncludeInt->getValue()) NewList.push_back(Item); } else { @@ -1311,6 +1346,7 @@ static Init *FilterHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, } Init *TernOpInit::Fold(Record *CurRec) const { + RecordKeeper &RK = getRecordKeeper(); switch (getOpcode()) { case SUBST: { DefInit *LHSd = dyn_cast(LHS); @@ -1351,7 +1387,7 @@ Init *TernOpInit::Fold(Record *CurRec) const { idx = found + MHSs->getValue().size(); } - return StringInit::get(Val); + return StringInit::get(RK, Val); } break; } @@ -1370,7 +1406,7 @@ Init *TernOpInit::Fold(Record *CurRec) const { case IF: { if (IntInit *LHSi = dyn_cast_or_null( - LHS->convertInitializerTo(IntRecTy::get()))) { + LHS->convertInitializerTo(IntRecTy::get(RK)))) { if (LHSi->getValue()) return MHS; return RHS; @@ -1391,8 +1427,8 @@ Init *TernOpInit::Fold(Record *CurRec) const { SmallVector, 8> Children; unsigned Size = MHSl ? MHSl->size() : RHSl->size(); for (unsigned i = 0; i != Size; ++i) { - Init *Node = MHSl ? MHSl->getElement(i) : UnsetInit::get(); - Init *Name = RHSl ? RHSl->getElement(i) : UnsetInit::get(); + Init *Node = MHSl ? MHSl->getElement(i) : UnsetInit::get(RK); + Init *Name = RHSl ? RHSl->getElement(i) : UnsetInit::get(RK); if (!isa(Name) && !isa(Name)) return const_cast(this); Children.emplace_back(Node, dyn_cast(Name)); @@ -1417,7 +1453,7 @@ Init *TernOpInit::Fold(Record *CurRec) const { std::to_string(Start)); if (Length < 0) PrintError(CurRec->getLoc(), "!substr length must be nonnegative"); - return StringInit::get(LHSs->getValue().substr(Start, Length), + return StringInit::get(RK, LHSs->getValue().substr(Start, Length), LHSs->getFormat()); } break; @@ -1437,8 +1473,8 @@ Init *TernOpInit::Fold(Record *CurRec) const { std::to_string(Start)); auto I = LHSs->getValue().find(MHSs->getValue(), Start); if (I == std::string::npos) - return IntInit::get(-1); - return IntInit::get(I); + return IntInit::get(RK, -1); + return IntInit::get(RK, I); } break; } @@ -1452,7 +1488,7 @@ Init *TernOpInit::resolveReferences(Resolver &R) const { if (getOpcode() == IF && lhs != LHS) { if (IntInit *Value = dyn_cast_or_null( - lhs->convertInitializerTo(IntRecTy::get()))) { + lhs->convertInitializerTo(IntRecTy::get(getRecordKeeper())))) { // Short-circuit if (Value->getValue()) return MHS->resolveReferences(R); @@ -1506,17 +1542,16 @@ static void ProfileFoldOpInit(FoldingSetNodeID &ID, Init *Start, Init *List, FoldOpInit *FoldOpInit::get(Init *Start, Init *List, Init *A, Init *B, Init *Expr, RecTy *Type) { - FoldingSetNodeID ID; ProfileFoldOpInit(ID, Start, List, A, B, Expr, Type); + detail::RecordKeeperImpl &RK = Start->getRecordKeeper().getImpl(); void *IP = nullptr; - if (FoldOpInit *I = Context->TheFoldOpInitPool.FindNodeOrInsertPos(ID, IP)) + if (FoldOpInit *I = RK.TheFoldOpInitPool.FindNodeOrInsertPos(ID, IP)) return I; - FoldOpInit *I = - new (Context->Allocator) FoldOpInit(Start, List, A, B, Expr, Type); - Context->TheFoldOpInitPool.InsertNode(I, IP); + FoldOpInit *I = new (RK.Allocator) FoldOpInit(Start, List, A, B, Expr, Type); + RK.TheFoldOpInitPool.InsertNode(I, IP); return I; } @@ -1575,12 +1610,13 @@ IsAOpInit *IsAOpInit::get(RecTy *CheckType, Init *Expr) { FoldingSetNodeID ID; ProfileIsAOpInit(ID, CheckType, Expr); + detail::RecordKeeperImpl &RK = Expr->getRecordKeeper().getImpl(); void *IP = nullptr; - if (IsAOpInit *I = Context->TheIsAOpInitPool.FindNodeOrInsertPos(ID, IP)) + if (IsAOpInit *I = RK.TheIsAOpInitPool.FindNodeOrInsertPos(ID, IP)) return I; - IsAOpInit *I = new (Context->Allocator) IsAOpInit(CheckType, Expr); - Context->TheIsAOpInitPool.InsertNode(I, IP); + IsAOpInit *I = new (RK.Allocator) IsAOpInit(CheckType, Expr); + RK.TheIsAOpInitPool.InsertNode(I, IP); return I; } @@ -1592,17 +1628,17 @@ Init *IsAOpInit::Fold() const { if (TypedInit *TI = dyn_cast(Expr)) { // Is the expression type known to be (a subclass of) the desired type? if (TI->getType()->typeIsConvertibleTo(CheckType)) - return IntInit::get(1); + return IntInit::get(getRecordKeeper(), 1); if (isa(CheckType)) { // If the target type is not a subclass of the expression type, or if // the expression has fully resolved to a record, we know that it can't // be of the required type. if (!CheckType->typeIsConvertibleTo(TI->getType()) || isa(Expr)) - return IntInit::get(0); + return IntInit::get(getRecordKeeper(), 0); } else { // We treat non-record types as not castable. - return IntInit::get(0); + return IntInit::get(getRecordKeeper(), 0); } } return const_cast(this); @@ -1625,6 +1661,81 @@ std::string IsAOpInit::getAsString() const { .str(); } +static void ProfileExistsOpInit(FoldingSetNodeID &ID, RecTy *CheckType, + Init *Expr) { + ID.AddPointer(CheckType); + ID.AddPointer(Expr); +} + +ExistsOpInit *ExistsOpInit::get(RecTy *CheckType, Init *Expr) { + FoldingSetNodeID ID; + ProfileExistsOpInit(ID, CheckType, Expr); + + detail::RecordKeeperImpl &RK = Expr->getRecordKeeper().getImpl(); + void *IP = nullptr; + if (ExistsOpInit *I = RK.TheExistsOpInitPool.FindNodeOrInsertPos(ID, IP)) + return I; + + ExistsOpInit *I = new (RK.Allocator) ExistsOpInit(CheckType, Expr); + RK.TheExistsOpInitPool.InsertNode(I, IP); + return I; +} + +void ExistsOpInit::Profile(FoldingSetNodeID &ID) const { + ProfileExistsOpInit(ID, CheckType, Expr); +} + +Init *ExistsOpInit::Fold(Record *CurRec, bool IsFinal) const { + if (StringInit *Name = dyn_cast(Expr)) { + if (!CurRec && !IsFinal) + return const_cast(this); + + // Self-references are allowed, but their resolution is delayed until + // the final resolve to ensure that we get the correct type for them. + auto *Anonymous = dyn_cast(CurRec->getNameInit()); + if (Name == CurRec->getNameInit() || + (Anonymous && Name == Anonymous->getNameInit())) { + if (!IsFinal) + return const_cast(this); + + // No doubt that there exists a record, so we should check if types are + // compatiable. + return IntInit::get(getRecordKeeper(), + CurRec->getType()->typeIsA(CheckType)); + } + + // Look up all defined records to see if we can find one. + Record *D = CheckType->getRecordKeeper().getDef(Name->getValue()); + if (!D) { + if (IsFinal) + return IntInit::get(getRecordKeeper(), 0); + return const_cast(this); + } + + // Check if types are compatiable. + return IntInit::get(getRecordKeeper(), + DefInit::get(D)->getType()->typeIsA(CheckType)); + } + return const_cast(this); +} + +Init *ExistsOpInit::resolveReferences(Resolver &R) const { + Init *NewExpr = Expr->resolveReferences(R); + if (Expr != NewExpr || R.isFinal()) + return get(CheckType, NewExpr)->Fold(R.getCurrentRecord(), R.isFinal()); + return const_cast(this); +} + +Init *ExistsOpInit::getBit(unsigned Bit) const { + return VarBitInit::get(const_cast(this), Bit); +} + +std::string ExistsOpInit::getAsString() const { + return (Twine("!exists<") + CheckType->getAsString() + ">(" + + Expr->getAsString() + ")") + .str(); +} + RecTy *TypedInit::getFieldType(StringInit *FieldName) const { if (RecordRecTy *RecordType = dyn_cast(getType())) { for (Record *Rec : RecordType->getClasses()) { @@ -1642,7 +1753,7 @@ TypedInit::convertInitializerTo(RecTy *Ty) const { if (isa(getType()) && isa(Ty) && cast(Ty)->getNumBits() == 1) - return BitsInit::get({const_cast(this)}); + return BitsInit::get(getRecordKeeper(), {const_cast(this)}); return nullptr; } @@ -1660,7 +1771,7 @@ Init *TypedInit::convertInitializerBitRange(ArrayRef Bits) const { NewBits.push_back(VarBitInit::get(const_cast(this), Bit)); } - return BitsInit::get(NewBits); + return BitsInit::get(getRecordKeeper(), NewBits); } Init *TypedInit::getCastTo(RecTy *Ty) const { @@ -1698,14 +1809,15 @@ Init *TypedInit::convertInitListSlice(ArrayRef Elements) const { VarInit *VarInit::get(StringRef VN, RecTy *T) { - Init *Value = StringInit::get(VN); + Init *Value = StringInit::get(T->getRecordKeeper(), VN); return VarInit::get(Value, T); } VarInit *VarInit::get(Init *VN, RecTy *T) { - VarInit *&I = Context->TheVarInitPool[std::make_pair(T, VN)]; + detail::RecordKeeperImpl &RK = T->getRecordKeeper().getImpl(); + VarInit *&I = RK.TheVarInitPool[std::make_pair(T, VN)]; if (!I) - I = new (Context->Allocator) VarInit(VN, T); + I = new (RK.Allocator) VarInit(VN, T); return I; } @@ -1715,7 +1827,7 @@ StringRef VarInit::getName() const { } Init *VarInit::getBit(unsigned Bit) const { - if (getType() == BitRecTy::get()) + if (getType() == BitRecTy::get(getRecordKeeper())) return const_cast(this); return VarBitInit::get(const_cast(this), Bit); } @@ -1727,9 +1839,10 @@ Init *VarInit::resolveReferences(Resolver &R) const { } VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) { - VarBitInit *&I = Context->TheVarBitInitPool[std::make_pair(T, B)]; + detail::RecordKeeperImpl &RK = T->getRecordKeeper().getImpl(); + VarBitInit *&I = RK.TheVarBitInitPool[std::make_pair(T, B)]; if (!I) - I = new(Context->Allocator) VarBitInit(T, B); + I = new (RK.Allocator) VarBitInit(T, B); return I; } @@ -1746,10 +1859,10 @@ Init *VarBitInit::resolveReferences(Resolver &R) const { } VarListElementInit *VarListElementInit::get(TypedInit *T, unsigned E) { - VarListElementInit *&I = - Context->TheVarListElementInitPool[std::make_pair(T, E)]; + detail::RecordKeeperImpl &RK = T->getRecordKeeper().getImpl(); + VarListElementInit *&I = RK.TheVarListElementInitPool[std::make_pair(T, E)]; if (!I) - I = new (Context->Allocator) VarListElementInit(T, E); + I = new (RK.Allocator) VarListElementInit(T, E); return I; } @@ -1771,7 +1884,7 @@ Init *VarListElementInit::resolveReferences(Resolver &R) const { } Init *VarListElementInit::getBit(unsigned Bit) const { - if (getType() == BitRecTy::get()) + if (getType() == BitRecTy::get(getRecordKeeper())) return const_cast(this); return VarBitInit::get(const_cast(this), Bit); } @@ -1808,20 +1921,25 @@ static void ProfileVarDefInit(FoldingSetNodeID &ID, ID.AddPointer(I); } +VarDefInit::VarDefInit(Record *Class, unsigned N) + : TypedInit(IK_VarDefInit, RecordRecTy::get(Class)), Class(Class), + NumArgs(N) {} + VarDefInit *VarDefInit::get(Record *Class, ArrayRef Args) { FoldingSetNodeID ID; ProfileVarDefInit(ID, Class, Args); + detail::RecordKeeperImpl &RK = Class->getRecords().getImpl(); void *IP = nullptr; - if (VarDefInit *I = Context->TheVarDefInitPool.FindNodeOrInsertPos(ID, IP)) + if (VarDefInit *I = RK.TheVarDefInitPool.FindNodeOrInsertPos(ID, IP)) return I; - void *Mem = Context->Allocator.Allocate(totalSizeToAlloc(Args.size()), - alignof(VarDefInit)); + void *Mem = RK.Allocator.Allocate(totalSizeToAlloc(Args.size()), + alignof(VarDefInit)); VarDefInit *I = new (Mem) VarDefInit(Class, Args.size()); std::uninitialized_copy(Args.begin(), Args.end(), I->getTrailingObjects()); - Context->TheVarDefInitPool.InsertNode(I, IP); + RK.TheVarDefInitPool.InsertNode(I, IP); return I; } @@ -1927,14 +2045,15 @@ std::string VarDefInit::getAsString() const { } FieldInit *FieldInit::get(Init *R, StringInit *FN) { - FieldInit *&I = Context->TheFieldInitPool[std::make_pair(R, FN)]; + detail::RecordKeeperImpl &RK = R->getRecordKeeper().getImpl(); + FieldInit *&I = RK.TheFieldInitPool[std::make_pair(R, FN)]; if (!I) - I = new (Context->Allocator) FieldInit(R, FN); + I = new (RK.Allocator) FieldInit(R, FN); return I; } Init *FieldInit::getBit(unsigned Bit) const { - if (getType() == BitRecTy::get()) + if (getType() == BitRecTy::get(getRecordKeeper())) return const_cast(this); return VarBitInit::get(const_cast(this), Bit); } @@ -1992,20 +2111,20 @@ void CondOpInit::Profile(FoldingSetNodeID &ID) const { ValType); } -CondOpInit * -CondOpInit::get(ArrayRef CondRange, - ArrayRef ValRange, RecTy *Ty) { +CondOpInit *CondOpInit::get(ArrayRef CondRange, + ArrayRef ValRange, RecTy *Ty) { assert(CondRange.size() == ValRange.size() && "Number of conditions and values must match!"); FoldingSetNodeID ID; ProfileCondOpInit(ID, CondRange, ValRange, Ty); + detail::RecordKeeperImpl &RK = Ty->getRecordKeeper().getImpl(); void *IP = nullptr; - if (CondOpInit *I = Context->TheCondOpInitPool.FindNodeOrInsertPos(ID, IP)) + if (CondOpInit *I = RK.TheCondOpInitPool.FindNodeOrInsertPos(ID, IP)) return I; - void *Mem = Context->Allocator.Allocate( + void *Mem = RK.Allocator.Allocate( totalSizeToAlloc(2 * CondRange.size()), alignof(BitsInit)); CondOpInit *I = new(Mem) CondOpInit(CondRange.size(), Ty); @@ -2013,7 +2132,7 @@ CondOpInit::get(ArrayRef CondRange, I->getTrailingObjects()); std::uninitialized_copy(ValRange.begin(), ValRange.end(), I->getTrailingObjects()+CondRange.size()); - Context->TheCondOpInitPool.InsertNode(I, IP); + RK.TheCondOpInitPool.InsertNode(I, IP); return I; } @@ -2041,16 +2160,18 @@ Init *CondOpInit::resolveReferences(Resolver &R) const { } Init *CondOpInit::Fold(Record *CurRec) const { + RecordKeeper &RK = getRecordKeeper(); for ( unsigned i = 0; i < NumConds; ++i) { Init *Cond = getCond(i); Init *Val = getVal(i); if (IntInit *CondI = dyn_cast_or_null( - Cond->convertInitializerTo(IntRecTy::get()))) { + Cond->convertInitializerTo(IntRecTy::get(RK)))) { if (CondI->getValue()) return Val->convertInitializerTo(getValType()); - } else - return const_cast(this); + } else { + return const_cast(this); + } } PrintFatalError(CurRec->getLoc(), @@ -2120,11 +2241,12 @@ DagInit *DagInit::get(Init *V, StringInit *VN, ArrayRef ArgRange, FoldingSetNodeID ID; ProfileDagInit(ID, V, VN, ArgRange, NameRange); + detail::RecordKeeperImpl &RK = V->getRecordKeeper().getImpl(); void *IP = nullptr; - if (DagInit *I = Context->TheDagInitPool.FindNodeOrInsertPos(ID, IP)) + if (DagInit *I = RK.TheDagInitPool.FindNodeOrInsertPos(ID, IP)) return I; - void *Mem = Context->Allocator.Allocate( + void *Mem = RK.Allocator.Allocate( totalSizeToAlloc(ArgRange.size(), NameRange.size()), alignof(BitsInit)); DagInit *I = new (Mem) DagInit(V, VN, ArgRange.size(), NameRange.size()); @@ -2132,7 +2254,7 @@ DagInit *DagInit::get(Init *V, StringInit *VN, ArrayRef ArgRange, I->getTrailingObjects()); std::uninitialized_copy(NameRange.begin(), NameRange.end(), I->getTrailingObjects()); - Context->TheDagInitPool.InsertNode(I, IP); + RK.TheDagInitPool.InsertNode(I, IP); return I; } @@ -2209,7 +2331,7 @@ std::string DagInit::getAsString() const { RecordVal::RecordVal(Init *N, RecTy *T, FieldKind K) : Name(N), TyAndKind(T, K) { - setValue(UnsetInit::get()); + setValue(UnsetInit::get(N->getRecordKeeper())); assert(Value && "Cannot create unset value for current type!"); } @@ -2217,7 +2339,7 @@ RecordVal::RecordVal(Init *N, RecTy *T, FieldKind K) // a source location. RecordVal::RecordVal(Init *N, SMLoc Loc, RecTy *T, FieldKind K) : Name(N), Loc(Loc), TyAndKind(T, K) { - setValue(UnsetInit::get()); + setValue(UnsetInit::get(N->getRecordKeeper())); assert(Value && "Cannot create unset value for current type!"); } @@ -2226,7 +2348,7 @@ StringRef RecordVal::getName() const { } std::string RecordVal::getPrintType() const { - if (getType() == StringRecTy::get()) { + if (getType() == StringRecTy::get(getRecordKeeper())) { if (auto *StrInit = dyn_cast(Value)) { if (StrInit->hasCodeFormat()) return "code"; @@ -2252,7 +2374,7 @@ bool RecordVal::setValue(Init *V) { Bits.reserve(BTy->getNumBits()); for (unsigned I = 0, E = BTy->getNumBits(); I < E; ++I) Bits.push_back(Value->getBit(I)); - Value = BitsInit::get(Bits); + Value = BitsInit::get(V->getRecordKeeper(), Bits); } } } @@ -2277,7 +2399,7 @@ bool RecordVal::setValue(Init *V, SMLoc NewLoc) { Bits.reserve(BTy->getNumBits()); for (unsigned I = 0, E = BTy->getNumBits(); I < E; ++I) Bits.push_back(Value->getBit(I)); - Value = BitsInit::get(Bits); + Value = BitsInit::get(getRecordKeeper(), Bits); } } } @@ -2313,16 +2435,20 @@ void Record::checkName() { RecordRecTy *Record::getType() { SmallVector DirectSCs; getDirectSuperClasses(DirectSCs); - return RecordRecTy::get(DirectSCs); + return RecordRecTy::get(TrackedRecords, DirectSCs); } DefInit *Record::getDefInit() { - if (!CorrespondingDefInit) - CorrespondingDefInit = new (Context->Allocator) DefInit(this); + if (!CorrespondingDefInit) { + CorrespondingDefInit = + new (TrackedRecords.getImpl().Allocator) DefInit(this); + } return CorrespondingDefInit; } -unsigned Record::getNewUID() { return Context->LastRecordID++; } +unsigned Record::getNewUID(RecordKeeper &RK) { + return RK.getImpl().LastRecordID++; +} void Record::setName(Init *NewName) { Name = NewName; @@ -2472,7 +2598,7 @@ Init *Record::getValueInit(StringRef FieldName) const { StringRef Record::getValueAsString(StringRef FieldName) const { llvm::Optional S = getValueAsOptionalString(FieldName); - if (!S.hasValue()) + if (!S) PrintFatalError(getLoc(), "Record `" + getName() + "' does not have a field named `" + FieldName + "'!\n"); return S.getValue(); @@ -2671,6 +2797,10 @@ void Record::checkUnusedTemplateArgs() { } } +RecordKeeper::RecordKeeper() + : Impl(std::make_unique(*this)) {} +RecordKeeper::~RecordKeeper() = default; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void RecordKeeper::dump() const { errs() << *this; } #endif @@ -2689,7 +2819,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const RecordKeeper &RK) { /// GetNewAnonymousName - Generate a unique anonymous name that can be used as /// an identifier. Init *RecordKeeper::getNewAnonymousName() { - return AnonymousNameInit::get(AnonCounter++); + return AnonymousNameInit::get(*this, getImpl().AnonCounter++); } // These functions implement the phase timing facility. Starting a timer @@ -2733,11 +2863,10 @@ void RecordKeeper::stopBackendTimer() { } } -// We cache the record vectors for single classes. Many backends request -// the same vectors multiple times. -std::vector RecordKeeper::getAllDerivedDefinitions( - StringRef ClassName) const { - +std::vector +RecordKeeper::getAllDerivedDefinitions(StringRef ClassName) const { + // We cache the record vectors for single classes. Many backends request + // the same vectors multiple times. auto Pair = ClassRecordsMap.try_emplace(ClassName); if (Pair.second) Pair.first->second = getAllDerivedDefinitions(makeArrayRef(ClassName)); @@ -2768,6 +2897,12 @@ std::vector RecordKeeper::getAllDerivedDefinitions( return Defs; } +std::vector +RecordKeeper::getAllDerivedDefinitionsIfDefined(StringRef ClassName) const { + return getClass(ClassName) ? getAllDerivedDefinitions(ClassName) + : std::vector(); +} + Init *MapResolver::resolve(Init *VarName) { auto It = Map.find(VarName); if (It == Map.end()) diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp index 25079fe33edb..2a4ee4473b56 100644 --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -55,10 +55,8 @@ TGLexer::TGLexer(SourceMgr &SM, ArrayRef Macros) : SrcMgr(SM) { std::make_unique>()); // Put all macros defined in the command line into the DefinedMacros set. - std::for_each(Macros.begin(), Macros.end(), - [this](const std::string &MacroName) { - DefinedMacros.insert(MacroName); - }); + for (const std::string &MacroName : Macros) + DefinedMacros.insert(MacroName); } SMLoc TGLexer::getLoc() const { @@ -586,6 +584,7 @@ tgtok::TokKind TGLexer::LexExclaim() { .Case("find", tgtok::XFind) .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated. .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated. + .Case("exists", tgtok::XExists) .Default(tgtok::Error); return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator"); diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h index 857ba09782e8..459ba0f4af64 100644 --- a/llvm/lib/TableGen/TGLexer.h +++ b/llvm/lib/TableGen/TGLexer.h @@ -56,6 +56,7 @@ namespace tgtok { XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XFind, XCast, XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp, + XExists, // Boolean literals. TrueVal, FalseVal, @@ -337,7 +338,7 @@ private: // // The method returns true upon reaching the first non-whitespace symbol // or EOF, CurPtr is set to point to this symbol. The method returns false, - // if an error occured during skipping of a C-style comment. + // if an error occurred during skipping of a C-style comment. bool prepSkipLineBegin(); // Skip any whitespaces or comments after a preprocessing directive. @@ -345,7 +346,7 @@ private: // or end of the file. If there is a multiline C-style comment // after the preprocessing directive, the method skips // the comment, so the final CurPtr may point to one of the next lines. - // The method returns false, if an error occured during skipping + // The method returns false, if an error occurred during skipping // C- or C++-style comment, or a non-whitespace symbol appears // after the preprocessing directive. // diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index 90646a0c642d..acf93dc3d792 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -112,14 +112,15 @@ static void checkConcrete(Record &R) { /// Return an Init with a qualifier prefix referring /// to CurRec's name. -static Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass, - Init *Name, StringRef Scoper) { - Init *NewName = - BinOpInit::getStrConcat(CurRec.getNameInit(), StringInit::get(Scoper)); +static Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass, Init *Name, + StringRef Scoper) { + RecordKeeper &RK = CurRec.getRecords(); + Init *NewName = BinOpInit::getStrConcat(CurRec.getNameInit(), + StringInit::get(RK, Scoper)); NewName = BinOpInit::getStrConcat(NewName, Name); if (CurMultiClass && Scoper != "::") { Init *Prefix = BinOpInit::getStrConcat(CurMultiClass->Rec.getNameInit(), - StringInit::get("::")); + StringInit::get(RK, "::")); NewName = BinOpInit::getStrConcat(Prefix, NewName); } @@ -131,7 +132,8 @@ static Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass, /// Return the qualified version of the implicit 'NAME' template argument. static Init *QualifiedNameOfImplicitName(Record &Rec, MultiClass *MC = nullptr) { - return QualifyName(Rec, MC, StringInit::get("NAME"), MC ? "::" : ":"); + return QualifyName(Rec, MC, StringInit::get(Rec.getRecords(), "NAME"), + MC ? "::" : ":"); } static Init *QualifiedNameOfImplicitName(MultiClass *MC) { @@ -187,7 +189,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName, "' is not a bits type"); // Convert the incoming value to a bits type of the appropriate size... - Init *BI = V->getCastTo(BitsRecTy::get(BitList.size())); + Init *BI = V->getCastTo(BitsRecTy::get(Records, BitList.size())); if (!BI) return Error(Loc, "Initializer is not compatible with bit range"); @@ -206,7 +208,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName, if (!NewBits[i]) NewBits[i] = CurVal->getBit(i); - V = BitsInit::get(NewBits); + V = BitsInit::get(Records, NewBits); } if (RV->setValue(V, Loc)) { @@ -262,8 +264,8 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) { Init *Name; if (CurRec->isClass()) - Name = - VarInit::get(QualifiedNameOfImplicitName(*CurRec), StringRecTy::get()); + Name = VarInit::get(QualifiedNameOfImplicitName(*CurRec), + StringRecTy::get(Records)); else Name = CurRec->getNameInit(); R.set(QualifiedNameOfImplicitName(*SC), Name); @@ -333,9 +335,9 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC, } } - TemplateArgs.emplace_back( - QualifiedNameOfImplicitName(SMC), - VarInit::get(QualifiedNameOfImplicitName(CurMC), StringRecTy::get())); + TemplateArgs.emplace_back(QualifiedNameOfImplicitName(SMC), + VarInit::get(QualifiedNameOfImplicitName(CurMC), + StringRecTy::get(Records))); // Add all of the defs in the subclass into the current multiclass. return resolve(SMC->Entries, TemplateArgs, false, &CurMC->Entries); @@ -540,7 +542,7 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) { // These are all of the tokens that can begin an object body. // Some of these can also begin values but we disallow those cases // because they are unlikely to be useful. - return UnsetInit::get(); + return UnsetInit::get(Records); default: break; } @@ -549,7 +551,7 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) { if (CurMultiClass) CurRec = &CurMultiClass->Rec; - Init *Name = ParseValue(CurRec, StringRecTy::get(), ParseNameMode); + Init *Name = ParseValue(CurRec, StringRecTy::get(Records), ParseNameMode); if (!Name) return nullptr; @@ -558,8 +560,8 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) { HasReferenceResolver R(NameStr); Name->resolveReferences(R); if (!R.found()) - Name = BinOpInit::getStrConcat(VarInit::get(NameStr, StringRecTy::get()), - Name); + Name = BinOpInit::getStrConcat( + VarInit::get(NameStr, StringRecTy::get(Records)), Name); } return Name; @@ -812,12 +814,21 @@ RecTy *TGParser::ParseType() { switch (Lex.getCode()) { default: TokError("Unknown token when expecting a type"); return nullptr; case tgtok::String: - case tgtok::Code: Lex.Lex(); return StringRecTy::get(); - case tgtok::Bit: Lex.Lex(); return BitRecTy::get(); - case tgtok::Int: Lex.Lex(); return IntRecTy::get(); - case tgtok::Dag: Lex.Lex(); return DagRecTy::get(); + case tgtok::Code: + Lex.Lex(); + return StringRecTy::get(Records); + case tgtok::Bit: + Lex.Lex(); + return BitRecTy::get(Records); + case tgtok::Int: + Lex.Lex(); + return IntRecTy::get(Records); + case tgtok::Dag: + Lex.Lex(); + return DagRecTy::get(Records); case tgtok::Id: - if (Record *R = ParseClassID()) return RecordRecTy::get(R); + if (Record *R = ParseClassID()) + return RecordRecTy::get(R); TokError("unknown class name"); return nullptr; case tgtok::Bits: { @@ -835,7 +846,7 @@ RecTy *TGParser::ParseType() { return nullptr; } Lex.Lex(); // Eat '>' - return BitsRecTy::get(Val); + return BitsRecTy::get(Records, Val); } case tgtok::List: { if (Lex.Lex() != tgtok::less) { // Eat 'bits' @@ -878,7 +889,7 @@ Init *TGParser::ParseIDValue(Record *CurRec, StringInit *Name, SMLoc NameLoc, RV->setUsed(true); return VarInit::get(TemplateArgName, RV->getType()); } else if (Name->getValue() == "NAME") { - return VarInit::get(TemplateArgName, StringRecTy::get()); + return VarInit::get(TemplateArgName, StringRecTy::get(Records)); } } @@ -947,7 +958,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { case tgtok::XNOT: Lex.Lex(); // eat the operation Code = UnOpInit::NOT; - Type = IntRecTy::get(); + Type = IntRecTy::get(Records); break; case tgtok::XHead: Lex.Lex(); // eat the operation @@ -960,12 +971,12 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { case tgtok::XSize: Lex.Lex(); Code = UnOpInit::SIZE; - Type = IntRecTy::get(); + Type = IntRecTy::get(Records); break; case tgtok::XEmpty: Lex.Lex(); // eat the operation Code = UnOpInit::EMPTY; - Type = IntRecTy::get(); + Type = IntRecTy::get(Records); break; case tgtok::XGetDagOp: Lex.Lex(); // eat the operation @@ -985,7 +996,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { // but keep parsing, to consume the operand } } else { - Type = RecordRecTy::get({}); + Type = RecordRecTy::get(Records, {}); } Code = UnOpInit::GETDAGOP; break; @@ -1085,6 +1096,52 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { return (IsAOpInit::get(Type, LHS))->Fold(); } + case tgtok::XExists: { + // Value ::= !exists '<' Type '>' '(' Value ')' + Lex.Lex(); // eat the operation + + RecTy *Type = ParseOperatorType(); + if (!Type) + return nullptr; + + if (!consume(tgtok::l_paren)) { + TokError("expected '(' after type of !exists"); + return nullptr; + } + + SMLoc ExprLoc = Lex.getLoc(); + Init *Expr = ParseValue(CurRec); + if (!Expr) + return nullptr; + + TypedInit *ExprType = dyn_cast(Expr); + if (!ExprType) { + Error(ExprLoc, "expected string type argument in !exists operator"); + return nullptr; + } + + RecordRecTy *RecType = dyn_cast(ExprType->getType()); + if (RecType) { + Error(ExprLoc, + "expected string type argument in !exists operator, please " + "use !isa instead"); + return nullptr; + } + + StringRecTy *SType = dyn_cast(ExprType->getType()); + if (!SType) { + Error(ExprLoc, "expected string type argument in !exists operator"); + return nullptr; + } + + if (!consume(tgtok::r_paren)) { + TokError("expected ')' in !exists"); + return nullptr; + } + + return (ExistsOpInit::get(Type, Expr))->Fold(CurRec); + } + case tgtok::XConcat: case tgtok::XADD: case tgtok::XSUB: @@ -1143,8 +1200,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { llvm_unreachable("Unhandled code!"); case tgtok::XConcat: case tgtok::XSetDagOp: - Type = DagRecTy::get(); - ArgType = DagRecTy::get(); + Type = DagRecTy::get(Records); + ArgType = DagRecTy::get(Records); break; case tgtok::XAND: case tgtok::XOR: @@ -1155,8 +1212,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { case tgtok::XADD: case tgtok::XSUB: case tgtok::XMUL: - Type = IntRecTy::get(); - ArgType = IntRecTy::get(); + Type = IntRecTy::get(Records); + ArgType = IntRecTy::get(Records); break; case tgtok::XEq: case tgtok::XNe: @@ -1164,7 +1221,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { case tgtok::XLt: case tgtok::XGe: case tgtok::XGt: - Type = BitRecTy::get(); + Type = BitRecTy::get(Records); // ArgType for the comparison operators is not yet known. break; case tgtok::XListConcat: @@ -1175,11 +1232,11 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { // Can't do any typechecking until we parse the first argument. break; case tgtok::XStrConcat: - Type = StringRecTy::get(); - ArgType = StringRecTy::get(); + Type = StringRecTy::get(Records); + ArgType = StringRecTy::get(Records); break; case tgtok::XInterleave: - Type = StringRecTy::get(); + Type = StringRecTy::get(Records); // The first argument type is not yet known. } @@ -1253,9 +1310,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { break; case BinOpInit::EQ: case BinOpInit::NE: - if (!ArgType->typeIsConvertibleTo(IntRecTy::get()) && - !ArgType->typeIsConvertibleTo(StringRecTy::get()) && - !ArgType->typeIsConvertibleTo(RecordRecTy::get({}))) { + if (!ArgType->typeIsConvertibleTo(IntRecTy::get(Records)) && + !ArgType->typeIsConvertibleTo(StringRecTy::get(Records)) && + !ArgType->typeIsConvertibleTo(RecordRecTy::get(Records, {}))) { Error(InitLoc, Twine("expected bit, bits, int, string, or record; " "got value of type '") + ArgType->getAsString() + "'"); @@ -1266,8 +1323,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { case BinOpInit::LT: case BinOpInit::GE: case BinOpInit::GT: - if (!ArgType->typeIsConvertibleTo(IntRecTy::get()) && - !ArgType->typeIsConvertibleTo(StringRecTy::get())) { + if (!ArgType->typeIsConvertibleTo(IntRecTy::get(Records)) && + !ArgType->typeIsConvertibleTo(StringRecTy::get(Records))) { Error(InitLoc, Twine("expected bit, bits, int, or string; " "got value of type '") + ArgType->getAsString() + "'"); @@ -1277,8 +1334,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { case BinOpInit::INTERLEAVE: switch (InitList.size()) { case 1: // First argument must be a list of strings or integers. - if (ArgType != StringRecTy::get()->getListTy() && - !ArgType->typeIsConvertibleTo(IntRecTy::get()->getListTy())) { + if (ArgType != StringRecTy::get(Records)->getListTy() && + !ArgType->typeIsConvertibleTo( + IntRecTy::get(Records)->getListTy())) { Error(InitLoc, Twine("expected list of string, int, bits, or bit; " "got value of type '") + ArgType->getAsString() + "'"); @@ -1323,7 +1381,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { case BinOpInit::SETDAGOP: // After parsing the first dag argument, switch to expecting // a record, with no restriction on its superclasses. - ArgType = RecordRecTy::get({}); + ArgType = RecordRecTy::get(Records, {}); break; default: break; @@ -1383,7 +1441,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { default: llvm_unreachable("Unhandled code!"); case tgtok::XDag: Code = TernOpInit::DAG; - Type = DagRecTy::get(); + Type = DagRecTy::get(Records); ItemType = nullptr; break; case tgtok::XIf: @@ -1445,7 +1503,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { Error(RHSLoc, "could not determine type of the name list in !dag"); return nullptr; } - if (RHSt && StringRecTy::get()->getListTy() != RHSt->getType()) { + if (RHSt && StringRecTy::get(Records)->getListTy() != RHSt->getType()) { Error(RHSLoc, Twine("expected list, got type '") + RHSt->getType()->getAsString() + "'"); return nullptr; @@ -1465,16 +1523,16 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { if (TypedInit *MHSt = dyn_cast(MHS)) MHSTy = MHSt->getType(); if (BitsInit *MHSbits = dyn_cast(MHS)) - MHSTy = BitsRecTy::get(MHSbits->getNumBits()); + MHSTy = BitsRecTy::get(Records, MHSbits->getNumBits()); if (isa(MHS)) - MHSTy = BitRecTy::get(); + MHSTy = BitRecTy::get(Records); if (TypedInit *RHSt = dyn_cast(RHS)) RHSTy = RHSt->getType(); if (BitsInit *RHSbits = dyn_cast(RHS)) - RHSTy = BitsRecTy::get(RHSbits->getNumBits()); + RHSTy = BitsRecTy::get(Records, RHSbits->getNumBits()); if (isa(RHS)) - RHSTy = BitRecTy::get(); + RHSTy = BitRecTy::get(Records); // For UnsetInit, it's typed from the other hand. if (isa(MHS)) @@ -1569,7 +1627,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { return nullptr; } - Init *A = StringInit::get(Lex.getCurStrVal()); + Init *A = StringInit::get(Records, Lex.getCurStrVal()); if (CurRec && CurRec->getValue(A)) { TokError((Twine("left !foldl variable '") + A->getAsString() + "' already defined") @@ -1587,7 +1645,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { return nullptr; } - Init *B = StringInit::get(Lex.getCurStrVal()); + Init *B = StringInit::get(Records, Lex.getCurStrVal()); if (CurRec && CurRec->getValue(B)) { TokError((Twine("right !foldl variable '") + B->getAsString() + "' already defined") @@ -1679,7 +1737,7 @@ RecTy *TGParser::ParseOperatorType() { /// Substr ::= !substr(string, start-int [, length-int]) => string Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) { TernOpInit::TernaryOp Code = TernOpInit::SUBSTR; - RecTy *Type = StringRecTy::get(); + RecTy *Type = StringRecTy::get(Records); Lex.Lex(); // eat the operation @@ -1710,7 +1768,7 @@ Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) { if (!RHS) return nullptr; } else { - RHS = IntInit::get(std::numeric_limits::max()); + RHS = IntInit::get(Records, std::numeric_limits::max()); } if (!consume(tgtok::r_paren)) { @@ -1767,7 +1825,7 @@ Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) { /// Substr ::= !find(string, string [, start-int]) => int Init *TGParser::ParseOperationFind(Record *CurRec, RecTy *ItemType) { TernOpInit::TernaryOp Code = TernOpInit::FIND; - RecTy *Type = IntRecTy::get(); + RecTy *Type = IntRecTy::get(Records); Lex.Lex(); // eat the operation @@ -1798,7 +1856,7 @@ Init *TGParser::ParseOperationFind(Record *CurRec, RecTy *ItemType) { if (!RHS) return nullptr; } else { - RHS = IntInit::get(0); + RHS = IntInit::get(Records, 0); } if (!consume(tgtok::r_paren)) { @@ -1868,7 +1926,7 @@ Init *TGParser::ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType) { return nullptr; } - Init *LHS = StringInit::get(Lex.getCurStrVal()); + Init *LHS = StringInit::get(Records, Lex.getCurStrVal()); Lex.Lex(); // eat the ID. if (CurRec && CurRec->getValue(LHS)) { @@ -1908,7 +1966,7 @@ Init *TGParser::ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType) { if (ListRecTy *OutListTy = dyn_cast(ItemType)) { ExprEltType = (Operation == tgtok::XForEach) ? OutListTy->getElementType() - : IntRecTy::get(); + : IntRecTy::get(Records); } else { Error(OpLoc, "expected value of type '" + @@ -2028,9 +2086,9 @@ Init *TGParser::ParseOperationCond(Record *CurRec, RecTy *ItemType) { if (TypedInit *Vt = dyn_cast(V)) VTy = Vt->getType(); if (BitsInit *Vbits = dyn_cast(V)) - VTy = BitsRecTy::get(Vbits->getNumBits()); + VTy = BitsRecTy::get(Records, Vbits->getNumBits()); if (isa(V)) - VTy = BitRecTy::get(); + VTy = BitRecTy::get(Records); if (Type == nullptr) { if (!isa(V)) @@ -2084,23 +2142,23 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, default: TokError("Unknown or reserved token when parsing a value"); break; case tgtok::TrueVal: - R = IntInit::get(1); + R = IntInit::get(Records, 1); Lex.Lex(); break; case tgtok::FalseVal: - R = IntInit::get(0); + R = IntInit::get(Records, 0); Lex.Lex(); break; case tgtok::IntVal: - R = IntInit::get(Lex.getCurIntVal()); + R = IntInit::get(Records, Lex.getCurIntVal()); Lex.Lex(); break; case tgtok::BinaryIntVal: { auto BinaryVal = Lex.getCurBinaryIntVal(); SmallVector Bits(BinaryVal.second); for (unsigned i = 0, e = BinaryVal.second; i != e; ++i) - Bits[i] = BitInit::get(BinaryVal.first & (1LL << i)); - R = BitsInit::get(Bits); + Bits[i] = BitInit::get(Records, BinaryVal.first & (1LL << i)); + R = BitsInit::get(Records, Bits); Lex.Lex(); break; } @@ -2114,20 +2172,20 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, Lex.Lex(); } - R = StringInit::get(Val); + R = StringInit::get(Records, Val); break; } case tgtok::CodeFragment: - R = StringInit::get(Lex.getCurStrVal(), StringInit::SF_Code); + R = StringInit::get(Records, Lex.getCurStrVal(), StringInit::SF_Code); Lex.Lex(); break; case tgtok::question: - R = UnsetInit::get(); + R = UnsetInit::get(Records); Lex.Lex(); break; case tgtok::Id: { SMLoc NameLoc = Lex.getLoc(); - StringInit *Name = StringInit::get(Lex.getCurStrVal()); + StringInit *Name = StringInit::get(Records, Lex.getCurStrVal()); if (Lex.Lex() != tgtok::less) // consume the Id. return ParseIDValue(CurRec, Name, NameLoc, Mode); // Value ::= IDValue @@ -2202,7 +2260,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, // Fallthrough to try convert this to a bit. } // All other values must be convertible to just a single bit. - Init *Bit = Vals[i]->getCastTo(BitRecTy::get()); + Init *Bit = Vals[i]->getCastTo(BitRecTy::get(Records)); if (!Bit) { Error(BraceLoc, "Element #" + Twine(i) + " (" + Vals[i]->getAsString() + ") is not convertable to a bit"); @@ -2211,7 +2269,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, NewBits.push_back(Bit); } std::reverse(NewBits.begin(), NewBits.end()); - return BitsInit::get(NewBits); + return BitsInit::get(Records, NewBits); } case tgtok::l_square: { // Value ::= '[' ValueList ']' Lex.Lex(); // eat the '[' @@ -2322,7 +2380,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, TokError("expected variable name in dag operator"); return nullptr; } - OperatorName = StringInit::get(Lex.getCurStrVal()); + OperatorName = StringInit::get(Records, Lex.getCurStrVal()); Lex.Lex(); // eat the VarName. } @@ -2346,6 +2404,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, case tgtok::XEmpty: case tgtok::XCast: case tgtok::XGetDagOp: // Value ::= !unop '(' Value ')' + case tgtok::XExists: case tgtok::XIsA: case tgtok::XConcat: case tgtok::XDag: @@ -2451,7 +2510,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) { TokError("expected field identifier after '.'"); return nullptr; } - StringInit *FieldName = StringInit::get(Lex.getCurStrVal()); + StringInit *FieldName = StringInit::get(Records, Lex.getCurStrVal()); if (!Result->getFieldType(FieldName)) { TokError("Cannot access field '" + Lex.getCurStrVal() + "' of value '" + Result->getAsString() + "'"); @@ -2494,9 +2553,9 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) { // Create a !strconcat() operation, first casting each operand to // a string if necessary. - if (LHS->getType() != StringRecTy::get()) { + if (LHS->getType() != StringRecTy::get(Records)) { auto CastLHS = dyn_cast( - UnOpInit::get(UnOpInit::CAST, LHS, StringRecTy::get()) + UnOpInit::get(UnOpInit::CAST, LHS, StringRecTy::get(Records)) ->Fold(CurRec)); if (!CastLHS) { Error(PasteLoc, @@ -2518,7 +2577,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) { // because they are unlikely to be useful. // Trailing paste, concat with an empty string. - RHS = StringInit::get(""); + RHS = StringInit::get(Records, ""); break; default: @@ -2531,9 +2590,9 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) { return nullptr; } - if (RHS->getType() != StringRecTy::get()) { + if (RHS->getType() != StringRecTy::get(Records)) { auto CastRHS = dyn_cast( - UnOpInit::get(UnOpInit::CAST, RHS, StringRecTy::get()) + UnOpInit::get(UnOpInit::CAST, RHS, StringRecTy::get(Records)) ->Fold(CurRec)); if (!CastRHS) { Error(PasteLoc, @@ -2566,8 +2625,8 @@ void TGParser::ParseDagArgList( // DagArg ::= VARNAME if (Lex.getCode() == tgtok::VarName) { // A missing value is treated like '?'. - StringInit *VarName = StringInit::get(Lex.getCurStrVal()); - Result.emplace_back(UnsetInit::get(), VarName); + StringInit *VarName = StringInit::get(Records, Lex.getCurStrVal()); + Result.emplace_back(UnsetInit::get(Records), VarName); Lex.Lex(); } else { // DagArg ::= Value (':' VARNAME)? @@ -2585,7 +2644,7 @@ void TGParser::ParseDagArgList( Result.clear(); return; } - VarName = StringInit::get(Lex.getCurStrVal()); + VarName = StringInit::get(Records, Lex.getCurStrVal()); Lex.Lex(); // eat the VarName. } @@ -2692,7 +2751,7 @@ Init *TGParser::ParseDeclaration(Record *CurRec, } SMLoc IdLoc = Lex.getLoc(); - Init *DeclName = StringInit::get(Str); + Init *DeclName = StringInit::get(Records, Str); Lex.Lex(); bool BadField; @@ -2745,7 +2804,7 @@ VarInit *TGParser::ParseForeachDeclaration(Init *&ForeachListValue) { return nullptr; } - Init *DeclName = StringInit::get(Lex.getCurStrVal()); + Init *DeclName = StringInit::get(Records, Lex.getCurStrVal()); Lex.Lex(); // If a value is present, parse it. @@ -2799,10 +2858,10 @@ VarInit *TGParser::ParseForeachDeclaration(Init *&ForeachListValue) { if (!Ranges.empty()) { assert(!IterType && "Type already initialized?"); - IterType = IntRecTy::get(); + IterType = IntRecTy::get(Records); std::vector Values; for (unsigned R : Ranges) - Values.push_back(IntInit::get(R)); + Values.push_back(IntInit::get(Records, R)); ForeachListValue = ListInit::get(Values, IterType); } @@ -2879,7 +2938,7 @@ bool TGParser::ParseBodyItem(Record *CurRec) { return TokError("expected field identifier after let"); SMLoc IdLoc = Lex.getLoc(); - StringInit *FieldName = StringInit::get(Lex.getCurStrVal()); + StringInit *FieldName = StringInit::get(Records, Lex.getCurStrVal()); Lex.Lex(); // eat the field name. SmallVector BitList; @@ -2898,7 +2957,7 @@ bool TGParser::ParseBodyItem(Record *CurRec) { if (!BitList.empty() && isa(Type)) { // When assigning to a subset of a 'bits' object, expect the RHS to have // the type of that subset instead of the type of the whole object. - Type = BitsRecTy::get(BitList.size()); + Type = BitsRecTy::get(Records, BitList.size()); } Init *Val = ParseValue(CurRec, Type); @@ -3056,7 +3115,7 @@ bool TGParser::ParseDefset() { if (Lex.getCode() != tgtok::Id) return TokError("expected identifier"); - StringInit *DeclName = StringInit::get(Lex.getCurStrVal()); + StringInit *DeclName = StringInit::get(Records, Lex.getCurStrVal()); if (Records.getGlobal(DeclName->getValue())) return TokError("def or global variable of this name already exists"); @@ -3093,7 +3152,7 @@ bool TGParser::ParseDefvar() { if (Lex.getCode() != tgtok::Id) return TokError("expected identifier"); - StringInit *DeclName = StringInit::get(Lex.getCurStrVal()); + StringInit *DeclName = StringInit::get(Records, Lex.getCurStrVal()); if (CurLocalScope) { if (CurLocalScope->varAlreadyDefined(DeclName->getValue())) return TokError("local variable of this name already exists"); @@ -3201,10 +3260,10 @@ bool TGParser::ParseIf(MultiClass *CurMultiClass) { // loop, over a list of length 0 or 1 depending on the condition, and with no // iteration variable being assigned. - ListInit *EmptyList = ListInit::get({}, BitRecTy::get()); + ListInit *EmptyList = ListInit::get({}, BitRecTy::get(Records)); ListInit *SingletonList = - ListInit::get({BitInit::get(true)}, BitRecTy::get()); - RecTy *BitListTy = ListRecTy::get(BitRecTy::get()); + ListInit::get({BitInit::get(Records, true)}, BitRecTy::get(Records)); + RecTy *BitListTy = ListRecTy::get(BitRecTy::get(Records)); // The foreach containing the then-clause selects SingletonList if // the condition is true. @@ -3369,7 +3428,7 @@ void TGParser::ParseLetList(SmallVectorImpl &Result) { return; } - StringInit *Name = StringInit::get(Lex.getCurStrVal()); + StringInit *Name = StringInit::get(Records, Lex.getCurStrVal()); SMLoc NameLoc = Lex.getLoc(); Lex.Lex(); // Eat the identifier. @@ -3570,7 +3629,7 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) { if (CurMultiClass) DefmName = BinOpInit::getStrConcat( VarInit::get(QualifiedNameOfImplicitName(CurMultiClass), - StringRecTy::get()), + StringRecTy::get(Records)), DefmName); } diff --git a/llvm/lib/TableGen/TGParser.h b/llvm/lib/TableGen/TGParser.h index 00883c858d58..d4b928c62fd7 100644 --- a/llvm/lib/TableGen/TGParser.h +++ b/llvm/lib/TableGen/TGParser.h @@ -45,7 +45,7 @@ namespace llvm { void dump() const; - RecordsEntry() {} + RecordsEntry() = default; RecordsEntry(std::unique_ptr Rec) : Rec(std::move(Rec)) {} RecordsEntry(std::unique_ptr Loop) : Loop(std::move(Loop)) {} diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h index 4d1464901777..a6065d4ed9ec 100644 --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -16,6 +16,8 @@ #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "Utils/AArch64BaseInfo.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" #include "llvm/Support/DataTypes.h" #include "llvm/Target/TargetMachine.h" @@ -71,6 +73,7 @@ void initializeAArch64A53Fix835769Pass(PassRegistry&); void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); void initializeAArch64AdvSIMDScalarPass(PassRegistry&); void initializeAArch64BranchTargetsPass(PassRegistry&); +void initializeAArch64CFIFixupPass(PassRegistry&); void initializeAArch64CollectLOHPass(PassRegistry&); void initializeAArch64CondBrTuningPass(PassRegistry &); void initializeAArch64CompressJumpTablesPass(PassRegistry&); diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 9a04b28a8b8f..f092c039b58e 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -64,6 +64,10 @@ def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true", def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true", "Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules">; +def FeatureLDAPR : SubtargetFeature<"ldapr", "HasLDAPR", "true", + "Use LDAPR to lower atomic loads; experimental until we " + "have more testing/a formal correctness proof">; + def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true", "Enable out of line atomics to support LSE instructions">; @@ -154,6 +158,10 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true", "Has zero-cycle zeroing instructions for generic registers">; +// It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0". +// as movi is more efficient across all cores. Newer cores can eliminate +// fmovs early and there is no difference with movi, but this not true for +// all implementations. def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false", "Has no zero-cycle zeroing instructions for FP registers">; @@ -168,7 +176,7 @@ def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround", "The zero-cycle floating-point zeroing instruction has a bug">; def FeatureStrictAlign : SubtargetFeature<"strict-align", - "StrictAlign", "true", + "RequiresStrictAlign", "true", "Disallow all unaligned memory " "access">; @@ -190,11 +198,11 @@ def FeaturePredictableSelectIsExpensive : SubtargetFeature< "Prefer likely predicted branches over selects">; def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move", - "CustomAsCheapAsMove", "true", + "HasCustomCheapAsMoveHandling", "true", "Use custom handling of cheap instructions">; def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move", - "ExynosAsCheapAsMove", "true", + "HasExynosCheapAsMoveHandling", "true", "Use Exynos specific handling of cheap instructions", [FeatureCustomCheapAsMoveHandling]>; @@ -202,12 +210,16 @@ def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", "UsePostRAScheduler", "true", "Schedule again after register allocation">; def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", - "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">; + "IsMisaligned128StoreSlow", "true", "Misaligned 128 bit stores are slow">; def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", - "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">; + "IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">; + +def FeatureAscendStoreAddress : SubtargetFeature<"ascend-store-address", + "IsStoreAddressAscend", "false", + "Schedule vector stores by ascending address">; -def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "STRQroIsSlow", +def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow", "true", "STR of Q register with register offset is slow">; def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< @@ -246,6 +258,10 @@ def FeatureFuseCryptoEOR : SubtargetFeature< "fuse-crypto-eor", "HasFuseCryptoEOR", "true", "CPU fuses AES/PMULL and EOR operations">; +def FeatureFuseAdrpAdd : SubtargetFeature< + "fuse-adrp-add", "HasFuseAdrpAdd", "true", + "CPU fuses adrp+add operations">; + def FeatureFuseLiterals : SubtargetFeature< "fuse-literals", "HasFuseLiterals", "true", "CPU fuses literal generation operations">; @@ -438,13 +454,8 @@ def FeatureEnhancedCounterVirtualization : def FeatureRME : SubtargetFeature<"rme", "HasRME", "true", "Enable Realm Management Extension">; -// A subset of SVE(2) instructions are legal in Streaming SVE execution mode -// defined by SME. -def FeatureStreamingSVE : SubtargetFeature<"streaming-sve", - "HasStreamingSVE", "true", - "Enable subset of SVE(2) instructions for Streaming SVE execution mode">; def FeatureSME : SubtargetFeature<"sme", "HasSME", "true", - "Enable Scalable Matrix Extension (SME)", [FeatureStreamingSVE, FeatureBF16]>; + "Enable Scalable Matrix Extension (SME)", [FeatureBF16, FeatureUseScalarIncVL]>; def FeatureSMEF64 : SubtargetFeature<"sme-f64", "HasSMEF64", "true", "Enable Scalable Matrix Extension (SME) F64F64 instructions", [FeatureSME]>; @@ -464,6 +475,11 @@ def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true", def FeatureFixCortexA53_835769 : SubtargetFeature<"fix-cortex-a53-835769", "FixCortexA53_835769", "true", "Mitigate Cortex-A53 Erratum 835769">; +def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice", + "NoBTIAtReturnTwice", "true", + "Don't place a BTI instruction " + "after a return-twice">; + //===----------------------------------------------------------------------===// // Architectures. // @@ -534,7 +550,18 @@ def HasV8_0rOps : SubtargetFeature< FeaturePAuth, FeatureRCPC, //v8.4 FeatureDotProd, FeatureTRACEV8_4, FeatureTLB_RMI, - FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO]>; + FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO, + // Not mandatory in v8.0-R, but included here on the grounds that it + // only enables names of system registers + FeatureSpecRestrict + ]>; + +// Only intended to be used by disassemblers. +def FeatureAll + : SubtargetFeature<"all", "IsAll", "true", "Enable all instructions", []>; + +class AssemblerPredicateWithAll + : AssemblerPredicate<(any_of FeatureAll, cond), name>; //===----------------------------------------------------------------------===// // Register File Description @@ -552,6 +579,7 @@ include "AArch64Schedule.td" include "AArch64InstrInfo.td" include "AArch64SchedPredicates.td" include "AArch64SchedPredExynos.td" +include "AArch64SchedPredAmpere.td" include "AArch64Combine.td" def AArch64InstrInfo : InstrInfo; @@ -596,7 +624,7 @@ class AArch64Unsupported { list F; } def SVEUnsupported : AArch64Unsupported { let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, - HasSVE2BitPerm, HasSVEorStreamingSVE, HasSVE2orStreamingSVE]; + HasSVE2BitPerm, HasSVEorSME, HasSVE2orSME]; } def PAUnsupported : AArch64Unsupported { @@ -621,6 +649,7 @@ include "AArch64SchedThunderX2T99.td" include "AArch64SchedA64FX.td" include "AArch64SchedThunderX3T110.td" include "AArch64SchedTSV110.td" +include "AArch64SchedAmpere1.td" def TuneA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", "Cortex-A35 ARM processors">; @@ -649,6 +678,7 @@ def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", FeatureFuseAES, FeatureBalanceFPOps, FeatureCustomCheapAsMoveHandling, + FeatureFuseAdrpAdd, FeatureFuseLiterals, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive]>; @@ -657,11 +687,13 @@ def TuneA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65", "Cortex-A65 ARM processors", [ FeatureFuseAES, FeatureFuseAddress, + FeatureFuseAdrpAdd, FeatureFuseLiterals]>; def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", "Cortex-A72 ARM processors", [ FeatureFuseAES, + FeatureFuseAdrpAdd, FeatureFuseLiterals]>; def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", @@ -802,6 +834,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14", FeatureFuseArithmeticLogic, FeatureFuseCCSelect, FeatureFuseCryptoEOR, + FeatureFuseAdrpAdd, FeatureFuseLiterals, FeatureZCRegMove, FeatureZCZeroing]>; @@ -813,13 +846,15 @@ def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", FeatureFuseAddress, FeatureFuseAES, FeatureFuseCCSelect, + FeatureFuseAdrpAdd, FeatureFuseLiterals, FeatureLSLFast, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive]>; -def TuneExynosM4 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", - "Samsung Exynos-M3 processors", +// Re-uses some scheduling and tunings from the ExynosM3 proc family. +def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3", + "Samsung Exynos-M4 processors", [FeatureArithmeticBccFusion, FeatureArithmeticCbzFusion, FeatureExynosCheapAsMoveHandling, @@ -828,6 +863,7 @@ def TuneExynosM4 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", FeatureFuseAES, FeatureFuseArithmeticLogic, FeatureFuseCCSelect, + FeatureFuseAdrpAdd, FeatureFuseLiterals, FeatureLSLFast, FeaturePostRAScheduler, @@ -934,6 +970,16 @@ def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110", FeatureFuseAES, FeaturePostRAScheduler]>; +def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1", + "Ampere Computing Ampere-1 processors", [ + FeaturePostRAScheduler, + FeatureFuseAES, + FeatureLSLFast, + FeatureAggressiveFMA, + FeatureArithmeticBccFusion, + FeatureCmpBccFusion, + FeatureFuseAddress, + FeatureFuseLiterals]>; def ProcessorFeatures { list A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, @@ -947,13 +993,14 @@ def ProcessorFeatures { FeatureFP16FML]; list A65 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC, FeatureSSBS, FeatureRAS]; + FeatureRCPC, FeatureSSBS, FeatureRAS, + FeaturePerfMon]; list A76 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC, FeatureSSBS]; + FeatureRCPC, FeatureSSBS, FeaturePerfMon]; list A77 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC]; + FeatureRCPC, FeaturePerfMon, FeatureSSBS]; list A78 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureFullFP16, FeatureDotProd, FeatureRCPC, FeaturePerfMon, FeatureSPE, @@ -968,14 +1015,15 @@ def ProcessorFeatures { FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8]; list R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16, FeatureFP16FML, FeatureSSBS, FeaturePredRes, - FeatureSB, FeatureSpecRestrict]; + FeatureSB]; list X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureRCPC, FeaturePerfMon, - FeatureSPE, FeatureFullFP16, FeatureDotProd]; + FeatureSPE, FeatureFullFP16, FeatureDotProd, + FeatureSSBS]; list X1C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureRCPC, FeaturePerfMon, FeatureSPE, FeatureFullFP16, FeatureDotProd, - FeaturePAuth]; + FeaturePAuth, FeatureSSBS]; list X2 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, FeatureMatMulInt8, FeatureBF16, FeatureAM, FeatureMTE, FeatureETE, FeatureSVE2BitPerm, @@ -1012,13 +1060,15 @@ def ProcessorFeatures { FeatureRDM]; list NeoverseE1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, FeatureFPARMv8, FeatureFullFP16, FeatureNEON, - FeatureRCPC, FeatureSSBS]; + FeatureRCPC, FeatureSSBS, FeaturePerfMon]; list NeoverseN1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, FeatureFPARMv8, FeatureFullFP16, FeatureNEON, - FeatureRCPC, FeatureSPE, FeatureSSBS]; + FeatureRCPC, FeatureSPE, FeatureSSBS, + FeaturePerfMon]; list NeoverseN2 = [HasV8_5aOps, FeatureBF16, FeatureETE, FeatureMatMulInt8, FeatureMTE, FeatureSVE2, - FeatureSVE2BitPerm, FeatureTRBE, FeatureCrypto]; + FeatureSVE2BitPerm, FeatureTRBE, FeatureCrypto, + FeaturePerfMon]; list Neoverse512TVB = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist, FeatureCrypto, FeatureFPARMv8, FeatureFP16FML, FeatureFullFP16, FeatureMatMulInt8, FeatureNEON, @@ -1041,17 +1091,20 @@ def ProcessorFeatures { list TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureSPE, FeatureFullFP16, FeatureFP16FML, FeatureDotProd]; + list Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, + FeatureMTE, FeatureSSBS]; // ETE and TRBE are future architecture extensions. We temporarily enable them // by default for users targeting generic AArch64. The extensions do not // affect code generated by the compiler and can be used only by explicitly // mentioning the new system register names in assembly. - list Generic = [FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureETE]; + list Generic = [FeatureFPARMv8, FeatureNEON, FeatureETE]; } - +// FeatureFuseAdrpAdd is enabled under Generic to allow linker merging +// optimizations. def : ProcessorModel<"generic", CortexA55Model, ProcessorFeatures.Generic, - [FeatureFuseAES, FeaturePostRAScheduler]>; + [FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler]>; def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53, [TuneA35]>; def : ProcessorModel<"cortex-a34", CortexA53Model, ProcessorFeatures.A53, @@ -1178,6 +1231,10 @@ def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX, def : ProcessorModel<"carmel", NoSchedModel, ProcessorFeatures.Carmel, [TuneCarmel]>; +// Ampere Computing +def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1, + [TuneAmpere1]>; + //===----------------------------------------------------------------------===// // Assembly parser //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp b/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp index 4cdf5f144437..37a65b64a885 100644 --- a/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp +++ b/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp @@ -223,6 +223,7 @@ AArch64A53Fix835769::runOnBasicBlock(MachineBasicBlock &MBB) { if (isFirstInstructionInSequence(PrevInstr) && isSecondInstructionInSequence(CurrInstr)) { LLVM_DEBUG(dbgs() << " ** pattern found at Idx " << Idx << "!\n"); + (void) Idx; Sequences.push_back(CurrInstr); } } diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index b54a0eaba7d1..ef4860979dd3 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -132,7 +132,7 @@ public: bool runOnMachineFunction(MachineFunction &MF) override { AArch64FI = MF.getInfo(); - STI = static_cast(&MF.getSubtarget()); + STI = &MF.getSubtarget(); SetupMachineFunction(MF); @@ -143,10 +143,10 @@ public: int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; - OutStreamer->BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer->EmitCOFFSymbolStorageClass(Scl); - OutStreamer->EmitCOFFSymbolType(Type); - OutStreamer->EndCOFFSymbolDef(); + OutStreamer->beginCOFFSymbolDef(CurrentFnSym); + OutStreamer->emitCOFFSymbolStorageClass(Scl); + OutStreamer->emitCOFFSymbolType(Type); + OutStreamer->endCOFFSymbolDef(); } // Emit the rest of the function body. @@ -204,10 +204,10 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) { // Emit an absolute @feat.00 symbol. This appears to be some kind of // compiler features bitfield read by link.exe. MCSymbol *S = MMI->getContext().getOrCreateSymbol(StringRef("@feat.00")); - OutStreamer->BeginCOFFSymbolDef(S); - OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); - OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL); - OutStreamer->EndCOFFSymbolDef(); + OutStreamer->beginCOFFSymbolDef(S); + OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); + OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL); + OutStreamer->endCOFFSymbolDef(); int64_t Feat00Flags = 0; if (M.getModuleFlag("cfguard")) { @@ -251,7 +251,7 @@ void AArch64AsmPrinter::emitFunctionHeaderComment() { const AArch64FunctionInfo *FI = MF->getInfo(); Optional OutlinerString = FI->getOutliningStyle(); if (OutlinerString != None) - OutStreamer->GetCommentOS() << ' ' << OutlinerString; + OutStreamer->getCommentOS() << ' ' << OutlinerString; } void AArch64AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) @@ -378,10 +378,10 @@ void AArch64AsmPrinter::emitHwasanMemaccessSymbols(Module &M) { bool CompileKernel = (AccessInfo >> HWASanAccessInfo::CompileKernelShift) & 1; - OutStreamer->SwitchSection(OutContext.getELFSection( + OutStreamer->switchSection(OutContext.getELFSection( ".text.hot", ELF::SHT_PROGBITS, - ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, - Sym->getName(), /*IsComdat=*/true)); + ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, Sym->getName(), + /*IsComdat=*/true)); OutStreamer->emitSymbolAttribute(Sym, MCSA_ELF_TypeFunction); OutStreamer->emitSymbolAttribute(Sym, MCSA_Weak); @@ -827,7 +827,7 @@ void AArch64AsmPrinter::emitJumpTableInfo() { const TargetLoweringObjectFile &TLOF = getObjFileLowering(); MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM); - OutStreamer->SwitchSection(ReadOnlySec); + OutStreamer->switchSection(ReadOnlySec); auto AFI = MF->getInfo(); for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { @@ -865,7 +865,7 @@ void AArch64AsmPrinter::emitFunctionEntryLabel() { if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall || MF->getFunction().getCallingConv() == CallingConv::AArch64_SVE_VectorCall || - STI->getRegisterInfo()->hasSVEArgsOrReturn(MF)) { + MF->getInfo()->isSVECC()) { auto *TS = static_cast(OutStreamer->getTargetStreamer()); TS->emitDirectiveVariantPCS(CurrentFnSym); @@ -1129,7 +1129,8 @@ void AArch64AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI) { void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) { Register DestReg = MI.getOperand(0).getReg(); - if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) { + if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround() && + STI->hasNEON()) { // Convert H/S register to corresponding D register if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31) DestReg = AArch64::D0 + (DestReg - AArch64::H0); @@ -1262,7 +1263,7 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { break; case AArch64::DBG_VALUE: - case AArch64::DBG_VALUE_LIST: { + case AArch64::DBG_VALUE_LIST: if (isVerbose() && OutStreamer->hasRawTextSupport()) { SmallString<128> TmpStr; raw_svector_ostream OS(TmpStr); @@ -1282,8 +1283,18 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { OutStreamer->emitCFIBKeyFrame(); return; - } - } + } + + case AArch64::EMITMTETAGGED: { + ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType(); + if (ExceptionHandlingType != ExceptionHandling::DwarfCFI && + ExceptionHandlingType != ExceptionHandling::ARM) + return; + + if (getFunctionCFISectionType(*MF) != CFISection::None) + OutStreamer->emitCFIMTETaggedFrame(); + return; + } // Tail calls use pseudo instructions so they have the proper code-gen // attributes (isCall, isReturn, etc.). We lower them to the real diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index f26151536a58..c0da242a26de 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -82,9 +82,9 @@ def CC_AArch64_AAPCS : CallingConv<[ nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64], CCPassIndirect>, - CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1], CCAssignToReg<[P0, P1, P2, P3]>>, - CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1], CCPassIndirect>, // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, @@ -149,7 +149,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[ nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64], CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>, - CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1], CCAssignToReg<[P0, P1, P2, P3]>> ]>; diff --git a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp index ac243347b24d..d12689970dc5 100644 --- a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -528,10 +528,8 @@ static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) { // count as MultiUser or block optimization. This is especially important on // arm64_32, where any memory operation is likely to be an explicit use of // xN and an implicit use of wN (the base address register). - if (!UsesSeen.count(Idx)) { + if (UsesSeen.insert(Idx).second) handleUse(MI, MO, LOHInfos[Idx]); - UsesSeen.insert(Idx); - } } } @@ -559,7 +557,7 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { // Walk the basic block backwards and update the per register state machine // in the process. for (const MachineInstr &MI : - instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) { + instructionsWithoutDebug(MBB.instr_rbegin(), MBB.instr_rend())) { unsigned Opcode = MI.getOpcode(); switch (Opcode) { case AArch64::ADDXri: diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 1994e0eb7fb9..18c111255e53 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -217,7 +217,7 @@ def AArch64PostLegalizerLoweringHelper // Post-legalization combines which are primarily optimizations. def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", - [copy_prop, erase_undef_store, combines_for_extload, + [copy_prop, combines_for_extload, sext_trunc_sextload, mutate_anyext_to_zext, hoist_logic_op_with_same_opcode_hands, redundant_and, xor_of_and_with_same_reg, @@ -228,6 +228,6 @@ def AArch64PostLegalizerCombinerHelper select_combines, fold_merge_to_zext, constant_fold, identity_combines, ptr_add_immed_chain, overlapping_and, - split_store_zero_128]> { + split_store_zero_128, undef_combines]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp index 82e8df3b73f9..343f888b7552 100644 --- a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -247,8 +247,8 @@ void SSACCmpConv::updateTailPHIs() { for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) { // PHI operands are (Reg, MBB) at (oi-2, oi-1). if (I.getOperand(oi - 1).getMBB() == CmpBB) { - I.RemoveOperand(oi - 1); - I.RemoveOperand(oi - 2); + I.removeOperand(oi - 1); + I.removeOperand(oi - 2); } } } diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index b0f739cc26e6..910f8cdede75 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -86,6 +86,7 @@ private: unsigned N); bool expandCALL_RVMARKER(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); }; @@ -759,6 +760,37 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER( return true; } +bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + // Expand CALL_BTI pseudo to: + // - a branch to the call target + // - a BTI instruction + // Mark the sequence as a bundle, to avoid passes moving other code in + // between. + + MachineInstr &MI = *MBBI; + MachineOperand &CallTarget = MI.getOperand(0); + assert((CallTarget.isGlobal() || CallTarget.isReg()) && + "invalid operand for regular call"); + unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; + MachineInstr *Call = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); + Call->addOperand(CallTarget); + + MachineInstr *BTI = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT)) + // BTI J so that setjmp can to BR to this. + .addImm(36) + .getInstr(); + + if (MI.shouldUpdateCallSiteInfo()) + MBB.getParent()->moveCallSiteInfo(&MI, Call); + + MI.eraseFromParent(); + finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator())); + return true; +} + bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { Register CtxReg = MBBI->getOperand(0).getReg(); @@ -1238,6 +1270,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); case AArch64::BLR_RVMARKER: return expandCALL_RVMARKER(MBB, MBBI); + case AArch64::BLR_BTI: + return expandCALL_BTI(MBB, MBBI); case AArch64::StoreSwiftAsyncContext: return expandStoreSwiftAsyncContext(MBB, MBBI); } diff --git a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp index 793663ef97d7..6de374125466 100644 --- a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp +++ b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -813,7 +813,7 @@ void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) { } bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) { - auto &ST = static_cast(Fn.getSubtarget()); + auto &ST = Fn.getSubtarget(); if (ST.getProcFamily() != AArch64Subtarget::Falkor) return false; diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index c67fa62c7a92..49fffa01a974 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -14,6 +14,7 @@ #include "AArch64.h" #include "AArch64CallingConvention.h" +#include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" @@ -282,8 +283,7 @@ public: explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { - Subtarget = - &static_cast(FuncInfo.MF->getSubtarget()); + Subtarget = &FuncInfo.MF->getSubtarget(); Context = &FuncInfo.Fn->getContext(); } @@ -3127,6 +3127,13 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { if (!Callee && !Symbol) return false; + // Allow SelectionDAG isel to handle calls to functions like setjmp that need + // a bti instruction following the call. + if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) && + !Subtarget->noBTIAtReturnTwice() && + MF->getInfo()->branchTargetEnforcement()) + return false; + // Allow SelectionDAG isel to handle tail calls. if (IsTailCall) return false; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index a4d20735e2b1..78babdf9f1f0 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -117,6 +117,72 @@ // // FIXME: also explain the redzone concept. // +// An example of the prologue: +// +// .globl __foo +// .align 2 +// __foo: +// Ltmp0: +// .cfi_startproc +// .cfi_personality 155, ___gxx_personality_v0 +// Leh_func_begin: +// .cfi_lsda 16, Lexception33 +// +// stp xa,bx, [sp, -#offset]! +// ... +// stp x28, x27, [sp, #offset-32] +// stp fp, lr, [sp, #offset-16] +// add fp, sp, #offset - 16 +// sub sp, sp, #1360 +// +// The Stack: +// +-------------------------------------------+ +// 10000 | ........ | ........ | ........ | ........ | +// 10004 | ........ | ........ | ........ | ........ | +// +-------------------------------------------+ +// 10008 | ........ | ........ | ........ | ........ | +// 1000c | ........ | ........ | ........ | ........ | +// +===========================================+ +// 10010 | X28 Register | +// 10014 | X28 Register | +// +-------------------------------------------+ +// 10018 | X27 Register | +// 1001c | X27 Register | +// +===========================================+ +// 10020 | Frame Pointer | +// 10024 | Frame Pointer | +// +-------------------------------------------+ +// 10028 | Link Register | +// 1002c | Link Register | +// +===========================================+ +// 10030 | ........ | ........ | ........ | ........ | +// 10034 | ........ | ........ | ........ | ........ | +// +-------------------------------------------+ +// 10038 | ........ | ........ | ........ | ........ | +// 1003c | ........ | ........ | ........ | ........ | +// +-------------------------------------------+ +// +// [sp] = 10030 :: >>initial value<< +// sp = 10020 :: stp fp, lr, [sp, #-16]! +// fp = sp == 10020 :: mov fp, sp +// [sp] == 10020 :: stp x28, x27, [sp, #-16]! +// sp == 10010 :: >>final value<< +// +// The frame pointer (w29) points to address 10020. If we use an offset of +// '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 +// for w27, and -32 for w28: +// +// Ltmp1: +// .cfi_def_cfa w29, 16 +// Ltmp2: +// .cfi_offset w30, -8 +// Ltmp3: +// .cfi_offset w29, -16 +// Ltmp4: +// .cfi_offset w27, -24 +// Ltmp5: +// .cfi_offset w28, -32 +// //===----------------------------------------------------------------------===// #include "AArch64FrameLowering.h" @@ -126,6 +192,7 @@ #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -154,7 +221,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -187,7 +253,7 @@ static cl::opt OrderFrameObjects("aarch64-order-frame-objects", cl::init(true), cl::Hidden); cl::opt EnableHomogeneousPrologEpilog( - "homogeneous-prolog-epilog", cl::init(false), cl::ZeroOrMore, cl::Hidden, + "homogeneous-prolog-epilog", cl::Hidden, cl::desc("Emit homogeneous prologue and epilogue for the size " "optimization (default = off)")); @@ -233,6 +299,7 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF, static bool produceCompactUnwindFrame(MachineFunction &MF); static bool needsWinCFI(const MachineFunction &MF); static StackOffset getSVEStackSize(const MachineFunction &MF); +static bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF); /// Returns true if a homogeneous prolog or epilog code can be emitted /// for the size optimization. If possible, a frame helper call is injected. @@ -440,137 +507,309 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( return MBB.erase(I); } -// Convenience function to create a DWARF expression for -// Expr + NumBytes + NumVGScaledBytes * AArch64::VG -static void appendVGScaledOffsetExpr(SmallVectorImpl &Expr, - int NumBytes, int NumVGScaledBytes, unsigned VG, - llvm::raw_string_ostream &Comment) { - uint8_t buffer[16]; +void AArch64FrameLowering::emitCalleeSavedGPRLocations( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = MF.getFrameInfo(); - if (NumBytes) { - Expr.push_back(dwarf::DW_OP_consts); - Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer)); - Expr.push_back((uint8_t)dwarf::DW_OP_plus); - Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes); - } + const std::vector &CSI = MFI.getCalleeSavedInfo(); + if (CSI.empty()) + return; - if (NumVGScaledBytes) { - Expr.push_back((uint8_t)dwarf::DW_OP_consts); - Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer)); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + DebugLoc DL = MBB.findDebugLoc(MBBI); - Expr.push_back((uint8_t)dwarf::DW_OP_bregx); - Expr.append(buffer, buffer + encodeULEB128(VG, buffer)); - Expr.push_back(0); + for (const auto &Info : CSI) { + if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector) + continue; - Expr.push_back((uint8_t)dwarf::DW_OP_mul); - Expr.push_back((uint8_t)dwarf::DW_OP_plus); + assert(!Info.isSpilledToReg() && "Spilling to registers not implemented"); + unsigned DwarfReg = TRI.getDwarfRegNum(Info.getReg(), true); - Comment << (NumVGScaledBytes < 0 ? " - " : " + ") - << std::abs(NumVGScaledBytes) << " * VG"; + int64_t Offset = + MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea(); + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } } -// Creates an MCCFIInstruction: -// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr } -MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP( - const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const { - int64_t NumBytes, NumVGScaledBytes; - AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(OffsetFromSP, NumBytes, - NumVGScaledBytes); +void AArch64FrameLowering::emitCalleeSavedSVELocations( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + // Add callee saved registers to move list. + const std::vector &CSI = MFI.getCalleeSavedInfo(); + if (CSI.empty()) + return; + + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + DebugLoc DL = MBB.findDebugLoc(MBBI); + AArch64FunctionInfo &AFI = *MF.getInfo(); + + for (const auto &Info : CSI) { + if (!(MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector)) + continue; + + // Not all unwinders may know about SVE registers, so assume the lowest + // common demoninator. + assert(!Info.isSpilledToReg() && "Spilling to registers not implemented"); + unsigned Reg = Info.getReg(); + if (!static_cast(TRI).regNeedsCFI(Reg, Reg)) + continue; + + StackOffset Offset = + StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) - + StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI)); - std::string CommentBuffer = "sp"; - llvm::raw_string_ostream Comment(CommentBuffer); + unsigned CFIIndex = MF.addFrameInst(createCFAOffset(TRI, Reg, Offset)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } +} - // Build up the expression (SP + NumBytes + NumVGScaledBytes * AArch64::VG) - SmallString<64> Expr; - Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + /*SP*/ 31)); - Expr.push_back(0); - appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes, - TRI.getDwarfRegNum(AArch64::VG, true), Comment); +void AArch64FrameLowering::emitCalleeSavedFrameMoves( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { + emitCalleeSavedGPRLocations(MBB, MBBI); + emitCalleeSavedSVELocations(MBB, MBBI); +} - // Wrap this into DW_CFA_def_cfa. - SmallString<64> DefCfaExpr; - DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); - uint8_t buffer[16]; - DefCfaExpr.append(buffer, - buffer + encodeULEB128(Expr.size(), buffer)); - DefCfaExpr.append(Expr.str()); - return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), - Comment.str()); +static void insertCFISameValue(const MCInstrDesc &Desc, MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertPt, + unsigned DwarfReg) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, DwarfReg)); + BuildMI(MBB, InsertPt, DebugLoc(), Desc).addCFIIndex(CFIIndex); } -MCCFIInstruction AArch64FrameLowering::createCfaOffset( - const TargetRegisterInfo &TRI, unsigned Reg, - const StackOffset &OffsetFromDefCFA) const { - int64_t NumBytes, NumVGScaledBytes; - AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( - OffsetFromDefCFA, NumBytes, NumVGScaledBytes); +void AArch64FrameLowering::resetCFIToInitialState( + MachineBasicBlock &MBB) const { - unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); + MachineFunction &MF = *MBB.getParent(); + const auto &Subtarget = MF.getSubtarget(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + const auto &TRI = + static_cast(*Subtarget.getRegisterInfo()); + const auto &MFI = *MF.getInfo(); - // Non-scalable offsets can use DW_CFA_offset directly. - if (!NumVGScaledBytes) - return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes); + const MCInstrDesc &CFIDesc = TII.get(TargetOpcode::CFI_INSTRUCTION); + DebugLoc DL; - std::string CommentBuffer; - llvm::raw_string_ostream Comment(CommentBuffer); - Comment << printReg(Reg, &TRI) << " @ cfa"; + // Reset the CFA to `SP + 0`. + MachineBasicBlock::iterator InsertPt = MBB.begin(); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( + nullptr, TRI.getDwarfRegNum(AArch64::SP, true), 0)); + BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex); - // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG) - SmallString<64> OffsetExpr; - appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes, - TRI.getDwarfRegNum(AArch64::VG, true), Comment); + // Flip the RA sign state. + if (MFI.shouldSignReturnAddress()) { + CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); + BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex); + } - // Wrap this into DW_CFA_expression - SmallString<64> CfaExpr; - CfaExpr.push_back(dwarf::DW_CFA_expression); - uint8_t buffer[16]; - CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); - CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer)); - CfaExpr.append(OffsetExpr.str()); + // Shadow call stack uses X18, reset it. + if (needsShadowCallStackPrologueEpilogue(MF)) + insertCFISameValue(CFIDesc, MF, MBB, InsertPt, + TRI.getDwarfRegNum(AArch64::X18, true)); - return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), Comment.str()); + // Emit .cfi_same_value for callee-saved registers. + const std::vector &CSI = + MF.getFrameInfo().getCalleeSavedInfo(); + for (const auto &Info : CSI) { + unsigned Reg = Info.getReg(); + if (!TRI.regNeedsCFI(Reg, Reg)) + continue; + insertCFISameValue(CFIDesc, MF, MBB, InsertPt, + TRI.getDwarfRegNum(Reg, true)); + } } -void AArch64FrameLowering::emitCalleeSavedFrameMoves( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { +static void emitCalleeSavedRestores(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + bool SVE) { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); - const TargetSubtargetInfo &STI = MF.getSubtarget(); - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - const TargetInstrInfo *TII = STI.getInstrInfo(); - DebugLoc DL = MBB.findDebugLoc(MBBI); - // Add callee saved registers to move list. const std::vector &CSI = MFI.getCalleeSavedInfo(); if (CSI.empty()) return; + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + DebugLoc DL = MBB.findDebugLoc(MBBI); + for (const auto &Info : CSI) { - Register Reg = Info.getReg(); + if (SVE != + (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector)) + continue; - // Not all unwinders may know about SVE registers, so assume the lowest - // common demoninator. - unsigned NewReg; - if (static_cast(TRI)->regNeedsCFI(Reg, NewReg)) - Reg = NewReg; - else + unsigned Reg = Info.getReg(); + if (SVE && + !static_cast(TRI).regNeedsCFI(Reg, Reg)) continue; - StackOffset Offset; - if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector) { - AArch64FunctionInfo *AFI = MF.getInfo(); - Offset = - StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) - - StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI)); - } else { - Offset = StackOffset::getFixed(MFI.getObjectOffset(Info.getFrameIdx()) - - getOffsetOfLocalArea()); - } - unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore( + nullptr, TRI.getDwarfRegNum(Info.getReg(), true))); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); + .setMIFlags(MachineInstr::FrameDestroy); + } +} + +void AArch64FrameLowering::emitCalleeSavedGPRRestores( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { + emitCalleeSavedRestores(MBB, MBBI, false); +} + +void AArch64FrameLowering::emitCalleeSavedSVERestores( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { + emitCalleeSavedRestores(MBB, MBBI, true); +} + +static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) { + switch (Reg.id()) { + default: + // The called routine is expected to preserve r19-r28 + // r29 and r30 are used as frame pointer and link register resp. + return 0; + + // GPRs +#define CASE(n) \ + case AArch64::W##n: \ + case AArch64::X##n: \ + return AArch64::X##n + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + CASE(17); + CASE(18); +#undef CASE + + // FPRs +#define CASE(n) \ + case AArch64::B##n: \ + case AArch64::H##n: \ + case AArch64::S##n: \ + case AArch64::D##n: \ + case AArch64::Q##n: \ + return HasSVE ? AArch64::Z##n : AArch64::Q##n + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + CASE(17); + CASE(18); + CASE(19); + CASE(20); + CASE(21); + CASE(22); + CASE(23); + CASE(24); + CASE(25); + CASE(26); + CASE(27); + CASE(28); + CASE(29); + CASE(30); + CASE(31); +#undef CASE + } +} + +void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, + MachineBasicBlock &MBB) const { + // Insertion point. + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + + // Fake a debug loc. + DebugLoc DL; + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + + const MachineFunction &MF = *MBB.getParent(); + const AArch64Subtarget &STI = MF.getSubtarget(); + const AArch64RegisterInfo &TRI = *STI.getRegisterInfo(); + + BitVector GPRsToZero(TRI.getNumRegs()); + BitVector FPRsToZero(TRI.getNumRegs()); + bool HasSVE = STI.hasSVE(); + for (MCRegister Reg : RegsToZero.set_bits()) { + if (TRI.isGeneralPurposeRegister(MF, Reg)) { + // For GPRs, we only care to clear out the 64-bit register. + if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE)) + GPRsToZero.set(XReg); + } else if (AArch64::FPR128RegClass.contains(Reg) || + AArch64::FPR64RegClass.contains(Reg) || + AArch64::FPR32RegClass.contains(Reg) || + AArch64::FPR16RegClass.contains(Reg) || + AArch64::FPR8RegClass.contains(Reg)) { + // For FPRs, + if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE)) + FPRsToZero.set(XReg); + } + } + + const AArch64InstrInfo &TII = *STI.getInstrInfo(); + + // Zero out GPRs. + for (MCRegister Reg : GPRsToZero.set_bits()) + BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), Reg).addImm(0); + + // Zero out FP/vector registers. + for (MCRegister Reg : FPRsToZero.set_bits()) + if (HasSVE) + BuildMI(MBB, MBBI, DL, TII.get(AArch64::DUP_ZI_D), Reg) + .addImm(0) + .addImm(0); + else + BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVIv2d_ns), Reg).addImm(0); + + if (HasSVE) { + for (MCRegister PReg : + {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4, + AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9, + AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14, + AArch64::P15}) { + if (RegsToZero[PReg]) + BuildMI(MBB, MBBI, DL, TII.get(AArch64::PFALSE), PReg); + } } } @@ -881,16 +1120,9 @@ static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, - bool NeedsWinCFI, bool *HasWinCFI, bool InProlog = true) { - // Ignore instructions that do not operate on SP, i.e. shadow call stack - // instructions and associated CFI instruction. - while (MBBI->getOpcode() == AArch64::STRXpost || - MBBI->getOpcode() == AArch64::LDRXpre || - MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) { - if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION) - assert(MBBI->getOperand(0).getReg() != AArch64::SP); - ++MBBI; - } + bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI, + MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup, + int CFAOffset = 0) { unsigned NewOpc; switch (MBBI->getOpcode()) { default: @@ -949,12 +1181,14 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( // If the first store isn't right where we want SP then we can't fold the // update in so create a normal arithmetic instruction instead. + MachineFunction &MF = *MBB.getParent(); if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 || CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(CSStackSizeInc), TII, - InProlog ? MachineInstr::FrameSetup - : MachineInstr::FrameDestroy); + StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag, + false, false, nullptr, EmitCFI, + StackOffset::getFixed(CFAOffset)); + return std::prev(MBBI); } @@ -981,8 +1215,15 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( // Generate a new SEH code that corresponds to the new instruction. if (NeedsWinCFI) { *HasWinCFI = true; - InsertSEH(*MIB, *TII, - InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy); + InsertSEH(*MIB, *TII, FrameFlag); + } + + if (EmitCFI) { + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset - CSStackSizeInc)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(FrameFlag); } return std::prev(MBB.erase(MBBI)); @@ -998,16 +1239,6 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, return; unsigned Opc = MI.getOpcode(); - - // Ignore instructions that do not operate on SP, i.e. shadow call stack - // instructions and associated CFI instruction. - if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre || - Opc == AArch64::CFI_INSTRUCTION) { - if (Opc != AArch64::CFI_INSTRUCTION) - assert(MI.getOperand(0).getReg() != AArch64::SP); - return; - } - unsigned Scale; switch (Opc) { case AArch64::STPXi: @@ -1049,38 +1280,6 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, } } -static void adaptForLdStOpt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator FirstSPPopI, - MachineBasicBlock::iterator LastPopI) { - // Sometimes (when we restore in the same order as we save), we can end up - // with code like this: - // - // ldp x26, x25, [sp] - // ldp x24, x23, [sp, #16] - // ldp x22, x21, [sp, #32] - // ldp x20, x19, [sp, #48] - // add sp, sp, #64 - // - // In this case, it is always better to put the first ldp at the end, so - // that the load-store optimizer can run and merge the ldp and the add into - // a post-index ldp. - // If we managed to grab the first pop instruction, move it to the end. - if (ReverseCSRRestoreSeq) - MBB.splice(FirstSPPopI, &MBB, LastPopI); - // We should end up with something like this now: - // - // ldp x24, x23, [sp, #16] - // ldp x22, x21, [sp, #32] - // ldp x20, x19, [sp, #48] - // ldp x26, x25, [sp] - // add sp, sp, #64 - // - // and the load-store optimizer can merge the last two instructions into: - // - // ldp x26, x25, [sp], #64 - // -} - static bool isTargetWindows(const MachineFunction &MF) { return MF.getSubtarget().isTargetWindows(); } @@ -1099,6 +1298,80 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { } } +static bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF) { + if (!(llvm::any_of( + MF.getFrameInfo().getCalleeSavedInfo(), + [](const auto &Info) { return Info.getReg() == AArch64::LR; }) && + MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack))) + return false; + + if (!MF.getSubtarget().isXRegisterReserved(18)) + report_fatal_error("Must reserve x18 to use shadow call stack"); + + return true; +} + +static void emitShadowCallStackPrologue(const TargetInstrInfo &TII, + MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, bool NeedsWinCFI, + bool NeedsUnwindInfo) { + // Shadow call stack prolog: str x30, [x18], #8 + BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXpost)) + .addReg(AArch64::X18, RegState::Define) + .addReg(AArch64::LR) + .addReg(AArch64::X18) + .addImm(8) + .setMIFlag(MachineInstr::FrameSetup); + + // This instruction also makes x18 live-in to the entry block. + MBB.addLiveIn(AArch64::X18); + + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII.get(AArch64::SEH_Nop)) + .setMIFlag(MachineInstr::FrameSetup); + + if (NeedsUnwindInfo) { + // Emit a CFI instruction that causes 8 to be subtracted from the value of + // x18 when unwinding past this frame. + static const char CFIInst[] = { + dwarf::DW_CFA_val_expression, + 18, // register + 2, // length + static_cast(unsigned(dwarf::DW_OP_breg18)), + static_cast(-8) & 0x7f, // addend (sleb128) + }; + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape( + nullptr, StringRef(CFIInst, sizeof(CFIInst)))); + BuildMI(MBB, MBBI, DL, TII.get(AArch64::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } +} + +static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII, + MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL) { + // Shadow call stack epilog: ldr x30, [x18, #-8]! + BuildMI(MBB, MBBI, DL, TII.get(AArch64::LDRXpre)) + .addReg(AArch64::X18, RegState::Define) + .addReg(AArch64::LR, RegState::Define) + .addReg(AArch64::X18) + .addImm(-8) + .setMIFlag(MachineInstr::FrameDestroy); + + if (MF.getInfo()->needsAsyncDwarfUnwindInfo()) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, 18)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameDestroy); + } +} + void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -1109,8 +1382,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo(); - bool needsFrameMoves = - MF.needsFrameMoves() && !MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool EmitCFI = AFI->needsDwarfUnwindInfo(); bool HasFP = hasFP(MF); bool NeedsWinCFI = needsWinCFI(MF); bool HasWinCFI = false; @@ -1128,8 +1400,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, DebugLoc DL; const auto &MFnI = *MF.getInfo(); - if (MFnI.shouldSignReturnAddress()) { + if (needsShadowCallStackPrologueEpilogue(MF)) + emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI, + MFnI.needsDwarfUnwindInfo()); + if (MFnI.shouldSignReturnAddress()) { unsigned PACI; if (MFnI.shouldSignWithBKey()) { BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY)) @@ -1145,12 +1420,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, .addReg(AArch64::LR) .addReg(AArch64::SP, RegState::InternalRead); MI.setMIFlag(MachineInstr::FrameSetup); - - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); + if (EmitCFI) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + } + if (EmitCFI && MFnI.isMTETagged()) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED)) + .setMIFlag(MachineInstr::FrameSetup); } // We signal the presence of a Swift extended frame to external tools by @@ -1227,7 +1507,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); - if (needsFrameMoves) { + if (EmitCFI) { // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); // Encode the stack size of the leaf function. @@ -1261,14 +1541,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, assert(!SVEStackSize && "Cannot combine SP bump with SVE"); emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(-NumBytes), TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); + MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI, + EmitCFI); NumBytes = 0; } else if (HomPrologEpilog) { // Stack has been already adjusted. NumBytes -= PrologueSaveSize; } else if (PrologueSaveSize != 0) { MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( - MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI); + MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI, + EmitCFI); NumBytes -= PrologueSaveSize; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); @@ -1322,8 +1604,27 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, StackOffset::getFixed(FPOffset), TII, MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); } + if (EmitCFI) { + // Define the current CFA rule to use the provided FP. + const int OffsetToFirstCalleeSaveFromFP = + AFI->getCalleeSaveBaseToFrameRecordOffset() - + AFI->getCalleeSavedStackSize(); + Register FramePtr = RegInfo->getFrameRegister(MF); + unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( + nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } } + // Now emit the moves for whatever callee saved regs we have (including FP, + // LR if those are saved). Frame instructions for SVE register are emitted + // later, after the instruction which actually save SVE regs. + if (EmitCFI) + emitCalleeSavedGPRLocations(MBB, MBBI); + if (windowsRequiresStackProbe(MF, NumBytes)) { uint64_t NumWords = NumBytes >> 4; if (NeedsWinCFI) { @@ -1436,14 +1737,21 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, } // Allocate space for the callee saves (if any). - emitFrameOffset(MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, - -AllocateBefore, TII, - MachineInstr::FrameSetup); + emitFrameOffset( + MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII, + MachineInstr::FrameSetup, false, false, nullptr, + EmitCFI && !HasFP && AllocateBefore, + StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes)); + + if (EmitCFI) + emitCalleeSavedSVELocations(MBB, CalleeSavesEnd); // Finally allocate remaining SVE stack space. emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP, - -AllocateAfter, TII, - MachineInstr::FrameSetup); + -AllocateAfter, TII, MachineInstr::FrameSetup, false, false, + nullptr, EmitCFI && !HasFP && AllocateAfter, + AllocateBefore + StackOffset::getFixed( + (int64_t)MFI.getStackSize() - NumBytes)); // Allocate space for the rest of the frame. if (NumBytes) { @@ -1458,14 +1766,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, } // If we're a leaf function, try using the red zone. - if (!canUseRedZone(MF)) + if (!canUseRedZone(MF)) { // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. - emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, - StackOffset::getFixed(-NumBytes), TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); - + emitFrameOffset( + MBB, MBBI, DL, scratchSPReg, AArch64::SP, + StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup, + false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, + SVEStackSize + + StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes)); + } if (NeedsRealignment) { const unsigned NrBitsToZero = Log2(MFI.getMaxAlign()); assert(NrBitsToZero > 1); @@ -1532,109 +1843,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MBB.addLiveIn(AArch64::X1); } } - - if (needsFrameMoves) { - // An example of the prologue: - // - // .globl __foo - // .align 2 - // __foo: - // Ltmp0: - // .cfi_startproc - // .cfi_personality 155, ___gxx_personality_v0 - // Leh_func_begin: - // .cfi_lsda 16, Lexception33 - // - // stp xa,bx, [sp, -#offset]! - // ... - // stp x28, x27, [sp, #offset-32] - // stp fp, lr, [sp, #offset-16] - // add fp, sp, #offset - 16 - // sub sp, sp, #1360 - // - // The Stack: - // +-------------------------------------------+ - // 10000 | ........ | ........ | ........ | ........ | - // 10004 | ........ | ........ | ........ | ........ | - // +-------------------------------------------+ - // 10008 | ........ | ........ | ........ | ........ | - // 1000c | ........ | ........ | ........ | ........ | - // +===========================================+ - // 10010 | X28 Register | - // 10014 | X28 Register | - // +-------------------------------------------+ - // 10018 | X27 Register | - // 1001c | X27 Register | - // +===========================================+ - // 10020 | Frame Pointer | - // 10024 | Frame Pointer | - // +-------------------------------------------+ - // 10028 | Link Register | - // 1002c | Link Register | - // +===========================================+ - // 10030 | ........ | ........ | ........ | ........ | - // 10034 | ........ | ........ | ........ | ........ | - // +-------------------------------------------+ - // 10038 | ........ | ........ | ........ | ........ | - // 1003c | ........ | ........ | ........ | ........ | - // +-------------------------------------------+ - // - // [sp] = 10030 :: >>initial value<< - // sp = 10020 :: stp fp, lr, [sp, #-16]! - // fp = sp == 10020 :: mov fp, sp - // [sp] == 10020 :: stp x28, x27, [sp, #-16]! - // sp == 10010 :: >>final value<< - // - // The frame pointer (w29) points to address 10020. If we use an offset of - // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 - // for w27, and -32 for w28: - // - // Ltmp1: - // .cfi_def_cfa w29, 16 - // Ltmp2: - // .cfi_offset w30, -8 - // Ltmp3: - // .cfi_offset w29, -16 - // Ltmp4: - // .cfi_offset w27, -24 - // Ltmp5: - // .cfi_offset w28, -32 - - if (HasFP) { - const int OffsetToFirstCalleeSaveFromFP = - AFI->getCalleeSaveBaseToFrameRecordOffset() - - AFI->getCalleeSavedStackSize(); - Register FramePtr = RegInfo->getFrameRegister(MF); - - // Define the current CFA rule to use the provided FP. - unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - } else { - unsigned CFIIndex; - if (SVEStackSize) { - const TargetSubtargetInfo &STI = MF.getSubtarget(); - const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); - StackOffset TotalSize = - SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize()); - CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize)); - } else { - // Encode the stack size of the leaf function. - CFIIndex = MF.addFrameInst( - MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); - } - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - } - - // Now emit the moves for whatever callee saved regs we have (including FP, - // LR if those are saved). - emitCalleeSavedFrameMoves(MBB, MBBI); - } } static void InsertReturnAddressAuth(MachineFunction &MF, @@ -1653,7 +1861,8 @@ static void InsertReturnAddressAuth(MachineFunction &MF, // The AUTIASP instruction assembles to a hint instruction before v8.3a so // this instruction can safely used for any v8a architecture. // From v8.3a onwards there are optimised authenticate LR and return - // instructions, namely RETA{A,B}, that can be used instead. + // instructions, namely RETA{A,B}, that can be used instead. In this case the + // DW_CFA_AARCH64_negate_ra_state can't be emitted. if (Subtarget.hasPAuth() && MBBI != MBB.end() && MBBI->getOpcode() == AArch64::RET_ReallyLR) { BuildMI(MBB, MBBI, DL, @@ -1665,6 +1874,12 @@ static void InsertReturnAddressAuth(MachineFunction &MF, MBB, MBBI, DL, TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP)) .setMIFlag(MachineInstr::FrameDestroy); + + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameDestroy); } } @@ -1686,6 +1901,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool NeedsWinCFI = needsWinCFI(MF); + bool EmitCFI = MF.getInfo()->needsAsyncDwarfUnwindInfo(); bool HasWinCFI = false; bool IsFunclet = false; auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); }); @@ -1695,6 +1911,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, IsFunclet = isFuncletReturnInstr(*MBBI); } + auto FinishingTouches = make_scope_exit([&]() { + InsertReturnAddressAuth(MF, MBB); + if (needsShadowCallStackPrologueEpilogue(MF)) + emitShadowCallStackEpilogue(*TII, MF, MBB, MBB.getFirstTerminator(), DL); + if (EmitCFI) + emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator()); + }); + int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize(); AArch64FunctionInfo *AFI = MF.getInfo(); @@ -1707,36 +1931,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // How much of the stack used by incoming arguments this function is expected // to restore in this particular epilogue. int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB); - - // The stack frame should be like below, - // - // ---------------------- --- - // | | | - // | BytesInStackArgArea| CalleeArgStackSize - // | (NumReusableBytes) | (of tail call) - // | | --- - // | | | - // ---------------------| --- | - // | | | | - // | CalleeSavedReg | | | - // | (CalleeSavedStackSize)| | | - // | | | | - // ---------------------| | NumBytes - // | | StackSize (StackAdjustUp) - // | LocalStackSize | | | - // | (covering callee | | | - // | args) | | | - // | | | | - // ---------------------- --- --- - // - // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize - // = StackSize + ArgumentPopSize - // - // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps - // it as the 2nd argument of AArch64ISD::TC_RETURN. - - auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); }); - bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); @@ -1771,9 +1965,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes); // Assume we can't combine the last pop with the sp restore. + bool CombineAfterCSRBump = false; if (!CombineSPBump && PrologueSaveSize != 0) { MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); - while (AArch64InstrInfo::isSEHInstruction(*Pop)) + while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION || + AArch64InstrInfo::isSEHInstruction(*Pop)) Pop = std::prev(Pop); // Converting the last ldp to a post-index ldp is valid only if the last // ldp's offset is 0. @@ -1781,15 +1977,17 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // If the offset is 0 and the AfterCSR pop is not actually trying to // allocate more stack for arguments (in space that an untimely interrupt // may clobber), convert it to a post-index ldp. - if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) + if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) { convertCalleeSaveRestoreToSPPrePostIncDec( - MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false); - else { + MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI, + MachineInstr::FrameDestroy, PrologueSaveSize); + } else { // If not, make sure to emit an add after the last ldp. // We're doing this by transfering the size to be restored from the // adjustment *before* the CSR pops to the adjustment *after* the CSR // pops. AfterCSRPopSize += PrologueSaveSize; + CombineAfterCSRBump = true; } } @@ -1822,15 +2020,27 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, } if (hasFP(MF) && AFI->hasSwiftAsyncContext()) { - // We need to reset FP to its untagged state on return. Bit 60 is currently - // used to show the presence of an extended frame. - - // BIC x29, x29, #0x1000_0000_0000_0000 - BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri), - AArch64::FP) - .addUse(AArch64::FP) - .addImm(0x10fe) - .setMIFlag(MachineInstr::FrameDestroy); + switch (MF.getTarget().Options.SwiftAsyncFramePointer) { + case SwiftAsyncFramePointerMode::DeploymentBased: + // Avoid the reload as it is GOT relative, and instead fall back to the + // hardcoded value below. This allows a mismatch between the OS and + // application without immediately terminating on the difference. + LLVM_FALLTHROUGH; + case SwiftAsyncFramePointerMode::Always: + // We need to reset FP to its untagged state on return. Bit 60 is + // currently used to show the presence of an extended frame. + + // BIC x29, x29, #0x1000_0000_0000_0000 + BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri), + AArch64::FP) + .addUse(AArch64::FP) + .addImm(0x10fe) + .setMIFlag(MachineInstr::FrameDestroy); + break; + + case SwiftAsyncFramePointerMode::Never: + break; + } } const StackOffset &SVEStackSize = getSVEStackSize(MF); @@ -1838,10 +2048,22 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { assert(!SVEStackSize && "Cannot combine SP bump with SVE"); + + // When we are about to restore the CSRs, the CFA register is SP again. + if (EmitCFI && hasFP(MF)) { + const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo(); + unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true); + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, NumBytes)); + BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameDestroy); + } + emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, - &HasWinCFI); + &HasWinCFI, EmitCFI, StackOffset::getFixed(NumBytes)); if (HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) @@ -1873,30 +2095,44 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // Deallocate the SVE area. if (SVEStackSize) { - if (AFI->isStackRealigned()) { - if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) + // If we have stack realignment or variable sized objects on the stack, + // restore the stack pointer from the frame pointer prior to SVE CSR + // restoration. + if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) { + if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { // Set SP to start of SVE callee-save area from which they can // be reloaded. The code below will deallocate the stack space // space by moving FP -> SP. emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP, StackOffset::getScalable(-CalleeSavedSize), TII, MachineInstr::FrameDestroy); + } } else { if (AFI->getSVECalleeSavedStackSize()) { // Deallocate the non-SVE locals first before we can deallocate (and // restore callee saves) from the SVE area. - emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(NumBytes), TII, - MachineInstr::FrameDestroy); + emitFrameOffset( + MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, + StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy, + false, false, nullptr, EmitCFI && !hasFP(MF), + SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize)); NumBytes = 0; } emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, - DeallocateBefore, TII, MachineInstr::FrameDestroy); + DeallocateBefore, TII, MachineInstr::FrameDestroy, false, + false, nullptr, EmitCFI && !hasFP(MF), + SVEStackSize + + StackOffset::getFixed(NumBytes + PrologueSaveSize)); emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, - DeallocateAfter, TII, MachineInstr::FrameDestroy); + DeallocateAfter, TII, MachineInstr::FrameDestroy, false, + false, nullptr, EmitCFI && !hasFP(MF), + DeallocateAfter + + StackOffset::getFixed(NumBytes + PrologueSaveSize)); } + if (EmitCFI) + emitCalleeSavedSVERestores(MBB, RestoreEnd); } if (!hasFP(MF)) { @@ -1906,23 +2142,24 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, if (RedZone && AfterCSRPopSize == 0) return; + // Pop the local variables off the stack. If there are no callee-saved + // registers, it means we are actually positioned at the terminator and can + // combine stack increment for the locals and the stack increment for + // callee-popped arguments into (possibly) a single instruction and be done. bool NoCalleeSaveRestore = PrologueSaveSize == 0; int64_t StackRestoreBytes = RedZone ? 0 : NumBytes; if (NoCalleeSaveRestore) StackRestoreBytes += AfterCSRPopSize; + emitFrameOffset( + MBB, LastPopI, DL, AArch64::SP, AArch64::SP, + StackOffset::getFixed(StackRestoreBytes), TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI, + StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize)); + // If we were able to combine the local stack pop with the argument pop, // then we're done. - bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0; - - // If we're done after this, make sure to help the load store optimizer. - if (Done) - adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI); - - emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(StackRestoreBytes), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); - if (Done) { + if (NoCalleeSaveRestore || AfterCSRPopSize == 0) { if (HasWinCFI) { BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) @@ -1948,29 +2185,29 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); + // When we are about to restore the CSRs, the CFA register is SP again. + if (EmitCFI && hasFP(MF)) { + const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo(); + unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true); + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfa(nullptr, Reg, PrologueSaveSize)); + BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameDestroy); + } + // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save // code in the prologue. if (AfterCSRPopSize) { assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an " "interrupt may have clobbered"); - // Find an insertion point for the first ldp so that it goes before the - // shadow call stack epilog instruction. This ensures that the restore of - // lr from x18 is placed after the restore from sp. - auto FirstSPPopI = MBB.getFirstTerminator(); - while (FirstSPPopI != Begin) { - auto Prev = std::prev(FirstSPPopI); - if (Prev->getOpcode() != AArch64::LDRXpre || - Prev->getOperand(0).getReg() == AArch64::SP) - break; - FirstSPPopI = Prev; - } - adaptForLdStOpt(MBB, FirstSPPopI, LastPopI); - - emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(AfterCSRPopSize), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); + emitFrameOffset( + MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, + StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy, + false, NeedsWinCFI, &HasWinCFI, EmitCFI, + StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0)); } if (HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) @@ -2061,8 +2298,9 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( // right thing for the emergency spill slot. bool UseFP = false; if (AFI->hasStackFrame() && !isSVE) { - // We shouldn't prefer using the FP when there is an SVE area - // in between the FP and the non-SVE locals/spills. + // We shouldn't prefer using the FP to access fixed-sized stack objects when + // there are scalable (SVE) objects in between the FP and the fixed-sized + // objects. PreferFP &= !SVEStackSize; // Note: Keeping the following as multiple 'if' statements rather than @@ -2083,7 +2321,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( // offsets is smaller than for positive ones. If an offset is available // via the FP and the SP, use whichever is closest. bool FPOffsetFits = !ForSimm || FPOffset >= -256; - PreferFP |= Offset > -FPOffset; + PreferFP |= Offset > -FPOffset && !SVEStackSize; if (MFI.hasVarSizedObjects()) { // If we have variable sized objects, we can use either FP or BP, as the @@ -2270,7 +2508,7 @@ struct RegPairInfo { static void computeCalleeSaveRegisterPairs( MachineFunction &MF, ArrayRef CSI, const TargetRegisterInfo *TRI, SmallVectorImpl &RegPairs, - bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) { + bool NeedsFrameRecord) { if (CSI.empty()) return; @@ -2349,15 +2587,6 @@ static void computeCalleeSaveRegisterPairs( } } - // If either of the registers to be saved is the lr register, it means that - // we also need to save lr in the shadow call stack. - if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) && - MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) { - if (!MF.getSubtarget().isXRegisterReserved(18)) - report_fatal_error("Must reserve x18 to use shadow call stack"); - NeedShadowCallStackProlog = true; - } - // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. @@ -2476,43 +2705,9 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( DebugLoc DL; SmallVector RegPairs; - bool NeedShadowCallStackProlog = false; - computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, - NeedShadowCallStackProlog, hasFP(MF)); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - - if (NeedShadowCallStackProlog) { - // Shadow call stack prolog: str x30, [x18], #8 - BuildMI(MBB, MI, DL, TII.get(AArch64::STRXpost)) - .addReg(AArch64::X18, RegState::Define) - .addReg(AArch64::LR) - .addReg(AArch64::X18) - .addImm(8) - .setMIFlag(MachineInstr::FrameSetup); - - if (NeedsWinCFI) - BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop)) - .setMIFlag(MachineInstr::FrameSetup); - - // Emit a CFI instruction that causes 8 to be subtracted from the value of - // x18 when unwinding past this frame. - static const char CFIInst[] = { - dwarf::DW_CFA_val_expression, - 18, // register - 2, // length - static_cast(unsigned(dwarf::DW_OP_breg18)), - static_cast(-8) & 0x7f, // addend (sleb128) - }; - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape( - nullptr, StringRef(CFIInst, sizeof(CFIInst)))); - BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlag(MachineInstr::FrameSetup); - - // This instruction also makes x18 live-in to the entry block. - MBB.addLiveIn(AArch64::X18); - } + computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF)); + const MachineRegisterInfo &MRI = MF.getRegInfo(); if (homogeneousPrologEpilog(MF)) { auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Prolog)) .setMIFlag(MachineInstr::FrameSetup); @@ -2622,7 +2817,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( } bool AArch64FrameLowering::restoreCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); @@ -2630,14 +2825,12 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( SmallVector RegPairs; bool NeedsWinCFI = needsWinCFI(MF); - if (MI != MBB.end()) - DL = MI->getDebugLoc(); + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); - bool NeedShadowCallStackProlog = false; - computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, - NeedShadowCallStackProlog, hasFP(MF)); + computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF)); - auto EmitMI = [&](const RegPairInfo &RPI) { + auto EmitMI = [&](const RegPairInfo &RPI) -> MachineBasicBlock::iterator { unsigned Reg1 = RPI.Reg1; unsigned Reg2 = RPI.Reg2; @@ -2694,7 +2887,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( std::swap(Reg1, Reg2); std::swap(FrameIdxReg1, FrameIdxReg2); } - MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc)); if (RPI.isPaired()) { MIB.addReg(Reg2, getDefRegState(true)); MIB.addMemOperand(MF.getMachineMemOperand( @@ -2711,6 +2904,8 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( MachineMemOperand::MOLoad, Size, Alignment)); if (NeedsWinCFI) InsertSEH(MIB, TII, MachineInstr::FrameDestroy); + + return MIB->getIterator(); }; // SVE objects are always restored in reverse order. @@ -2718,31 +2913,33 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( if (RPI.isScalable()) EmitMI(RPI); - if (ReverseCSRRestoreSeq) { - for (const RegPairInfo &RPI : reverse(RegPairs)) - if (!RPI.isScalable()) - EmitMI(RPI); - } else if (homogeneousPrologEpilog(MF, &MBB)) { - auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Epilog)) + if (homogeneousPrologEpilog(MF, &MBB)) { + auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog)) .setMIFlag(MachineInstr::FrameDestroy); for (auto &RPI : RegPairs) { MIB.addReg(RPI.Reg1, RegState::Define); MIB.addReg(RPI.Reg2, RegState::Define); } return true; - } else - for (const RegPairInfo &RPI : RegPairs) - if (!RPI.isScalable()) - EmitMI(RPI); - - if (NeedShadowCallStackProlog) { - // Shadow call stack epilog: ldr x30, [x18, #-8]! - BuildMI(MBB, MI, DL, TII.get(AArch64::LDRXpre)) - .addReg(AArch64::X18, RegState::Define) - .addReg(AArch64::LR, RegState::Define) - .addReg(AArch64::X18) - .addImm(-8) - .setMIFlag(MachineInstr::FrameDestroy); + } + + if (ReverseCSRRestoreSeq) { + MachineBasicBlock::iterator First = MBB.end(); + for (const RegPairInfo &RPI : reverse(RegPairs)) { + if (RPI.isScalable()) + continue; + MachineBasicBlock::iterator It = EmitMI(RPI); + if (First == MBB.end()) + First = It; + } + if (First != MBB.end()) + MBB.splice(MBBI, &MBB, First); + } else { + for (const RegPairInfo &RPI : RegPairs) { + if (RPI.isScalable()) + continue; + (void)EmitMI(RPI); + } } return true; @@ -2941,6 +3138,15 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( // stack slots for them. MachineFrameInfo &MFI = MF.getFrameInfo(); auto *AFI = MF.getInfo(); + + bool UsesWinAAPCS = isTargetWindows(MF); + if (UsesWinAAPCS && hasFP(MF) && AFI->hasSwiftAsyncContext()) { + int FrameIdx = MFI.CreateStackObject(8, Align(16), true); + AFI->setSwiftAsyncContextFrameIdx(FrameIdx); + if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; + if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; + } + for (auto &CS : CSI) { Register Reg = CS.getReg(); const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); @@ -2954,7 +3160,8 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; // Grab 8 bytes below FP for the extended asynchronous frame info. - if (hasFP(MF) && AFI->hasSwiftAsyncContext() && Reg == AArch64::FP) { + if (hasFP(MF) && AFI->hasSwiftAsyncContext() && !UsesWinAAPCS && + Reg == AArch64::FP) { FrameIdx = MFI.CreateStackObject(8, Alignment, true); AFI->setSwiftAsyncContextFrameIdx(FrameIdx); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; @@ -3190,7 +3397,7 @@ public: // instructions. May skip if the replacement is not profitable. May invalidate // the input iterator and replace it with a valid one. void emitCode(MachineBasicBlock::iterator &InsertI, - const AArch64FrameLowering *TFI, bool IsLast); + const AArch64FrameLowering *TFI, bool TryMergeSPUpdate); }; void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) { @@ -3329,7 +3536,8 @@ void mergeMemRefs(const SmallVectorImpl &TSE, } void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI, - const AArch64FrameLowering *TFI, bool IsLast) { + const AArch64FrameLowering *TFI, + bool TryMergeSPUpdate) { if (TagStores.empty()) return; TagStoreInstr &FirstTagStore = TagStores[0]; @@ -3359,8 +3567,8 @@ void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI, emitUnrolled(InsertI); } else { MachineInstr *UpdateInstr = nullptr; - int64_t TotalOffset; - if (IsLast) { + int64_t TotalOffset = 0; + if (TryMergeSPUpdate) { // See if we can merge base register update into the STGloop. // This is done in AArch64LoadStoreOptimizer for "normal" stores, // but STGloop is way too unusual for that, and also it only @@ -3505,7 +3713,7 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II, for (auto &Instr : Instrs) { if (EndOffset && *EndOffset != Instr.Offset) { // Found a gap. - TSE.emitCode(InsertI, TFI, /*IsLast = */ false); + TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */ false); TSE.clear(); } @@ -3513,7 +3721,11 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II, EndOffset = Instr.Offset + Instr.Size; } - TSE.emitCode(InsertI, TFI, /*IsLast = */ true); + // Multiple FP/SP updates in a loop cannot be described by CFI instructions. + TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */ + !MBB->getParent() + ->getInfo() + ->needsAsyncDwarfUnwindInfo()); return InsertI; } diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 31f57cbc49f2..f59860a24d9b 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -29,6 +29,8 @@ public: void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const; + void resetCFIToInitialState(MachineBasicBlock &MBB) const override; + MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; @@ -141,13 +143,20 @@ private: int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF, int &MinCSFrameIndex, int &MaxCSFrameIndex) const; - MCCFIInstruction - createDefCFAExpressionFromSP(const TargetRegisterInfo &TRI, - const StackOffset &OffsetFromSP) const; - MCCFIInstruction createCfaOffset(const TargetRegisterInfo &MRI, unsigned DwarfReg, - const StackOffset &OffsetFromDefCFA) const; bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB, unsigned StackBumpBytes) const; + void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const; + void emitCalleeSavedSVELocations(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const; + void emitCalleeSavedGPRRestores(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const; + void emitCalleeSavedSVERestores(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const; + + /// Emit target zero call-used regs. + void emitZeroCallUsedRegs(BitVector RegsToZero, + MachineBasicBlock &MBB) const override; }; } // End llvm namespace diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 899f069abdd4..82fe5772c99d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -159,6 +159,22 @@ public: return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); } + bool SelectExtractHigh(SDValue N, SDValue &Res) { + if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR || + !isa(N->getOperand(1))) + return false; + EVT VT = N->getValueType(0); + EVT LVT = N->getOperand(0).getValueType(); + unsigned Index = N->getConstantOperandVal(1); + if (!VT.is64BitVector() || !LVT.is128BitVector() || + Index != VT.getVectorNumElements()) + return false; + Res = N->getOperand(0); + return true; + } + bool SelectDupZeroOrUndef(SDValue N) { switch(N->getOpcode()) { case ISD::UNDEF: @@ -204,6 +220,11 @@ public: return SelectSVEAddSubImm(N, VT, Imm, Shift); } + template + bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) { + return SelectSVECpyDupImm(N, VT, Imm, Shift); + } + template bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { return SelectSVELogicalImm(N, VT, Imm, Invert); @@ -219,6 +240,16 @@ public: return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); } + bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) { + if (N->getOpcode() != ISD::SPLAT_VECTOR) + return false; + + EVT EltVT = N->getValueType(0).getVectorElementType(); + return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1, + /* High */ EltVT.getFixedSizeInBits(), + /* AllowSaturation */ true, Imm); + } + // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. template bool SelectCntImm(SDValue N, SDValue &Imm) { @@ -257,6 +288,15 @@ public: return false; } + template bool ImmToTile(SDValue N, SDValue &Imm) { + if (auto *CI = dyn_cast(N)) { + uint64_t C = CI->getZExtValue(); + Imm = CurDAG->getRegister(BaseReg + C, MVT::Other); + return true; + } + return false; + } + /// Form sequences of consecutive 64/128-bit registers for use in NEON /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have /// between 1 and 4 elements. If it contains a single element that is returned @@ -300,6 +340,11 @@ public: return SelectSVERegRegAddrMode(N, Scale, Base, Offset); } + template + bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { + return SelectSMETileSlice(N, Scale, Vector, Offset); + } + void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); @@ -357,10 +402,8 @@ private: bool SelectCMP_SWAP(SDNode *N); - bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift); - bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); - + bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); @@ -370,6 +413,8 @@ private: bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, SDValue &Offset); + bool SelectSMETileSlice(SDValue N, unsigned Scale, SDValue &Vector, + SDValue &Offset); bool SelectAllActivePredicate(SDValue N); }; @@ -822,9 +867,17 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, Reg = N.getOperand(0); - // Don't match if free 32-bit -> 64-bit zext can be used instead. - if (Ext == AArch64_AM::UXTW && - Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode())) + // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the + // isDef32 as a heuristic for when the operand is likely to be a 32bit def. + auto isDef32 = [](SDValue N) { + unsigned Opc = N.getOpcode(); + return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && + Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && + Opc != ISD::AssertZext && Opc != ISD::AssertAlign && + Opc != ISD::FREEZE; + }; + if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 && + isDef32(Reg)) return false; } @@ -1852,6 +1905,7 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, VT = Opd0->getValueType(0); } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { Opd0 = Op0->getOperand(0); + ClampMSB = (VT == MVT::i32); } else if (BiggerPattern) { // Let's pretend a 0 shift right has been performed. // The resulting code will be at least as good as the original one @@ -2710,8 +2764,16 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, // shift the needed bits into place. SDLoc DL(N); unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; + uint64_t LsrImm = LSB; + if (Src->hasOneUse() && + isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) && + (LsrImm + LSB) < BitWidth) { + Src = Src->getOperand(0); + LsrImm += LSB; + } + SDNode *LSR = CurDAG->getMachineNode( - ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT), + ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT), CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); // BFXIL is an alias of BFM, so translate to BFM operands. @@ -2827,15 +2889,15 @@ bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { SDValue Add1 = ShiftAmt->getOperand(1); uint64_t Add0Imm; uint64_t Add1Imm; - // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X - // to avoid the ADD/SUB. - if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) + if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) { + // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X + // to avoid the ADD/SUB. NewShiftAmt = Add0; - // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to - // generate a NEG instead of a SUB of a constant. - else if (ShiftAmt->getOpcode() == ISD::SUB && - isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && - (Add0Imm % Size == 0)) { + } else if (ShiftAmt->getOpcode() == ISD::SUB && + isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && + (Add0Imm % Size == 0)) { + // If we are shifting by N-X where N == 0 mod Size, then just shift by -X + // to generate a NEG instead of a SUB from a constant. unsigned NegOpc; unsigned ZeroReg; EVT SubVT = ShiftAmt->getValueType(0); @@ -2852,6 +2914,26 @@ bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); NewShiftAmt = SDValue(Neg, 0); + } else if (ShiftAmt->getOpcode() == ISD::SUB && + isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) { + // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X + // to generate a NOT instead of a SUB from a constant. + unsigned NotOpc; + unsigned ZeroReg; + EVT SubVT = ShiftAmt->getValueType(0); + if (SubVT == MVT::i32) { + NotOpc = AArch64::ORNWrr; + ZeroReg = AArch64::WZR; + } else { + assert(SubVT == MVT::i64); + NotOpc = AArch64::ORNXrr; + ZeroReg = AArch64::XZR; + } + SDValue Zero = + CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); + MachineSDNode *Not = + CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1); + NewShiftAmt = SDValue(Not, 0); } else return false; } else { @@ -3108,72 +3190,81 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { return true; } -bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base, - SDValue &Offset) { - auto C = dyn_cast(N); - if (!C) +bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, + SDValue &Shift) { + if (!isa(N)) return false; - auto Ty = N->getValueType(0); - - int64_t Imm = C->getSExtValue(); SDLoc DL(N); - - if ((Imm >= -128) && (Imm <= 127)) { - Base = CurDAG->getTargetConstant(Imm, DL, Ty); - Offset = CurDAG->getTargetConstant(0, DL, Ty); - return true; - } - - if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) { - Base = CurDAG->getTargetConstant(Imm/256, DL, Ty); - Offset = CurDAG->getTargetConstant(8, DL, Ty); + uint64_t Val = cast(N) + ->getAPIntValue() + .trunc(VT.getFixedSizeInBits()) + .getZExtValue(); + + switch (VT.SimpleTy) { + case MVT::i8: + // All immediates are supported. + Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); + Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); return true; + case MVT::i16: + case MVT::i32: + case MVT::i64: + // Support 8bit unsigned immediates. + if (Val <= 255) { + Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); + Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); + return true; + } + // Support 16bit unsigned immediates that are a multiple of 256. + if (Val <= 65280 && Val % 256 == 0) { + Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); + Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32); + return true; + } + break; + default: + break; } return false; } -bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) { - if (auto CNode = dyn_cast(N)) { - const int64_t ImmVal = CNode->getSExtValue(); - SDLoc DL(N); +bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, + SDValue &Shift) { + if (!isa(N)) + return false; - switch (VT.SimpleTy) { - case MVT::i8: - // Can always select i8s, no shift, mask the immediate value to - // deal with sign-extended value from lowering. + SDLoc DL(N); + int64_t Val = cast(N) + ->getAPIntValue() + .trunc(VT.getFixedSizeInBits()) + .getSExtValue(); + + switch (VT.SimpleTy) { + case MVT::i8: + // All immediates are supported. + Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); + Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); + return true; + case MVT::i16: + case MVT::i32: + case MVT::i64: + // Support 8bit signed immediates. + if (Val >= -128 && Val <= 127) { Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); - Imm = CurDAG->getTargetConstant(ImmVal & 0xFF, DL, MVT::i32); + Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); + return true; + } + // Support 16bit signed immediates that are a multiple of 256. + if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) { + Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); + Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32); return true; - case MVT::i16: - // i16 values get sign-extended to 32-bits during lowering. - if ((ImmVal & 0xFF) == ImmVal) { - Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); - Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); - return true; - } else if ((ImmVal & 0xFF) == 0) { - Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); - Imm = CurDAG->getTargetConstant((ImmVal >> 8) & 0xFF, DL, MVT::i32); - return true; - } - break; - case MVT::i32: - case MVT::i64: - // Range of immediate won't trigger signedness problems for 32/64b. - if ((ImmVal & 0xFF) == ImmVal) { - Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); - Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); - return true; - } else if ((ImmVal & 0xFF00) == ImmVal) { - Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); - Imm = CurDAG->getTargetConstant(ImmVal >> 8, DL, MVT::i32); - return true; - } - break; - default: - break; } + break; + default: + break; } return false; @@ -3901,7 +3992,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { true); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || - (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + VT == MVT::nxv8bf16) { SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H, true); return; @@ -3922,7 +4013,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { true); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || - (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + VT == MVT::nxv8bf16) { SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H, true); return; @@ -3943,7 +4034,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { true); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || - (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + VT == MVT::nxv8bf16) { SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H, true); return; @@ -4267,7 +4358,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || - (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + VT == MVT::nxv8bf16) { SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); return; } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { @@ -4284,7 +4375,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || - (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + VT == MVT::nxv8bf16) { SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); return; } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { @@ -4301,7 +4392,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || - (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + VT == MVT::nxv8bf16) { SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); return; } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { @@ -4911,7 +5002,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || - (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + VT == MVT::nxv8bf16) { SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); return; } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { @@ -4928,7 +5019,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || - (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + VT == MVT::nxv8bf16) { SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); return; } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { @@ -4945,7 +5036,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || - (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + VT == MVT::nxv8bf16) { SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); return; } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { @@ -5033,6 +5124,10 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { const unsigned IntNo = cast(Root->getOperand(1))->getZExtValue(); + if (IntNo == Intrinsic::aarch64_sme_ldr || + IntNo == Intrinsic::aarch64_sme_str) + return MVT::nxv16i8; + if (IntNo != Intrinsic::aarch64_sve_prf) return EVT(); @@ -5051,12 +5146,19 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &OffImm) { const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); const DataLayout &DL = CurDAG->getDataLayout(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); - OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); - return true; + // We can only encode VL scaled offsets, so only fold in frame indexes + // referencing SVE objects. + if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) { + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); + OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); + return true; + } + + return false; } if (MemVT == EVT()) @@ -5083,7 +5185,10 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); + // We can only encode VL scaled offsets, so only fold in frame indexes + // referencing SVE objects. + if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); } OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); @@ -5149,3 +5254,30 @@ bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { return TLI->isAllActivePredicate(*CurDAG, N); } + +bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned Scale, + SDValue &Base, SDValue &Offset) { + if (N.getOpcode() != ISD::ADD) { + Base = N; + Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); + return true; + } + + // Process an ADD node. + const SDValue LHS = N.getOperand(0); + const SDValue RHS = N.getOperand(1); + + if (auto C = dyn_cast(RHS)) { + int64_t ImmOff = C->getSExtValue(); + unsigned MaxSize = (1 << Scale) - 1; + + if (ImmOff < 0 || ImmOff > MaxSize) + return false; + + Base = LHS; + Offset = CurDAG->getTargetConstant(ImmOff, SDLoc(N), MVT::i64); + return true; + } + + return false; +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c539c8617d99..abfe2d507111 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -208,6 +208,7 @@ static bool isMergePassthruOpcode(unsigned Opc) { case AArch64ISD::BSWAP_MERGE_PASSTHRU: case AArch64ISD::REVH_MERGE_PASSTHRU: case AArch64ISD::REVW_MERGE_PASSTHRU: + case AArch64ISD::REVD_MERGE_PASSTHRU: case AArch64ISD::CTLZ_MERGE_PASSTHRU: case AArch64ISD::CTPOP_MERGE_PASSTHRU: case AArch64ISD::DUP_MERGE_PASSTHRU: @@ -289,8 +290,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, addQRTypeForNEON(MVT::v8bf16); } - if (Subtarget->hasSVE()) { + if (Subtarget->hasSVE() || Subtarget->hasSME()) { // Add legal sve predicate types + addRegisterClass(MVT::nxv1i1, &AArch64::PPRRegClass); addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass); addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass); addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass); @@ -324,50 +326,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, if (useSVEForFixedLengthVectorVT(VT)) addRegisterClass(VT, &AArch64::ZPRRegClass); } - - for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) { - setOperationAction(ISD::SADDSAT, VT, Legal); - setOperationAction(ISD::UADDSAT, VT, Legal); - setOperationAction(ISD::SSUBSAT, VT, Legal); - setOperationAction(ISD::USUBSAT, VT, Legal); - setOperationAction(ISD::UREM, VT, Expand); - setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::SDIVREM, VT, Expand); - setOperationAction(ISD::UDIVREM, VT, Expand); - } - - for (auto VT : - { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8, - MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 }) - setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal); - - for (auto VT : - { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32, - MVT::nxv2f64 }) { - setCondCodeAction(ISD::SETO, VT, Expand); - setCondCodeAction(ISD::SETOLT, VT, Expand); - setCondCodeAction(ISD::SETLT, VT, Expand); - setCondCodeAction(ISD::SETOLE, VT, Expand); - setCondCodeAction(ISD::SETLE, VT, Expand); - setCondCodeAction(ISD::SETULT, VT, Expand); - setCondCodeAction(ISD::SETULE, VT, Expand); - setCondCodeAction(ISD::SETUGE, VT, Expand); - setCondCodeAction(ISD::SETUGT, VT, Expand); - setCondCodeAction(ISD::SETUEQ, VT, Expand); - setCondCodeAction(ISD::SETUNE, VT, Expand); - - setOperationAction(ISD::FREM, VT, Expand); - setOperationAction(ISD::FPOW, VT, Expand); - setOperationAction(ISD::FPOWI, VT, Expand); - setOperationAction(ISD::FCOS, VT, Expand); - setOperationAction(ISD::FSIN, VT, Expand); - setOperationAction(ISD::FSINCOS, VT, Expand); - setOperationAction(ISD::FEXP, VT, Expand); - setOperationAction(ISD::FEXP2, VT, Expand); - setOperationAction(ISD::FLOG, VT, Expand); - setOperationAction(ISD::FLOG2, VT, Expand); - setOperationAction(ISD::FLOG10, VT, Expand); - } } // Compute derived properties from the register classes @@ -389,7 +347,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom); setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); - setOperationAction(ISD::BRCOND, MVT::Other, Expand); + setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::BR_CC, MVT::i32, Custom); setOperationAction(ISD::BR_CC, MVT::i64, Custom); setOperationAction(ISD::BR_CC, MVT::f16, Custom); @@ -448,6 +406,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::f128, Custom); setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + // FIXME: f128 FMINIMUM and FMAXIMUM (including STRICT versions) currently + // aren't handled. // Lowering for many of the conversions is actually specified by the non-f128 // type. The LowerXXX function will be trivial when f128 isn't involved. @@ -508,16 +468,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // BlockAddress setOperationAction(ISD::BlockAddress, MVT::i64, Custom); - // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences. - setOperationAction(ISD::ADDC, MVT::i32, Custom); - setOperationAction(ISD::ADDE, MVT::i32, Custom); - setOperationAction(ISD::SUBC, MVT::i32, Custom); - setOperationAction(ISD::SUBE, MVT::i32, Custom); - setOperationAction(ISD::ADDC, MVT::i64, Custom); - setOperationAction(ISD::ADDE, MVT::i64, Custom); - setOperationAction(ISD::SUBC, MVT::i64, Custom); - setOperationAction(ISD::SUBE, MVT::i64, Custom); - // AArch64 lacks both left-rotate and popcount instructions. setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); @@ -568,6 +518,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::UMULO, MVT::i32, Custom); setOperationAction(ISD::UMULO, MVT::i64, Custom); + setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); + setOperationAction(ISD::ADDCARRY, MVT::i64, Custom); + setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); + setOperationAction(ISD::SUBCARRY, MVT::i64, Custom); + setOperationAction(ISD::SADDO_CARRY, MVT::i32, Custom); + setOperationAction(ISD::SADDO_CARRY, MVT::i64, Custom); + setOperationAction(ISD::SSUBO_CARRY, MVT::i32, Custom); + setOperationAction(ISD::SSUBO_CARRY, MVT::i64, Custom); + setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); @@ -581,64 +540,41 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, else setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); - setOperationAction(ISD::FREM, MVT::f16, Promote); - setOperationAction(ISD::FREM, MVT::v4f16, Expand); - setOperationAction(ISD::FREM, MVT::v8f16, Expand); - setOperationAction(ISD::FPOW, MVT::f16, Promote); - setOperationAction(ISD::FPOW, MVT::v4f16, Expand); - setOperationAction(ISD::FPOW, MVT::v8f16, Expand); - setOperationAction(ISD::FPOWI, MVT::f16, Promote); - setOperationAction(ISD::FPOWI, MVT::v4f16, Expand); - setOperationAction(ISD::FPOWI, MVT::v8f16, Expand); - setOperationAction(ISD::FCOS, MVT::f16, Promote); - setOperationAction(ISD::FCOS, MVT::v4f16, Expand); - setOperationAction(ISD::FCOS, MVT::v8f16, Expand); - setOperationAction(ISD::FSIN, MVT::f16, Promote); - setOperationAction(ISD::FSIN, MVT::v4f16, Expand); - setOperationAction(ISD::FSIN, MVT::v8f16, Expand); - setOperationAction(ISD::FSINCOS, MVT::f16, Promote); - setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand); - setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand); - setOperationAction(ISD::FEXP, MVT::f16, Promote); - setOperationAction(ISD::FEXP, MVT::v4f16, Expand); - setOperationAction(ISD::FEXP, MVT::v8f16, Expand); - setOperationAction(ISD::FEXP2, MVT::f16, Promote); - setOperationAction(ISD::FEXP2, MVT::v4f16, Expand); - setOperationAction(ISD::FEXP2, MVT::v8f16, Expand); - setOperationAction(ISD::FLOG, MVT::f16, Promote); - setOperationAction(ISD::FLOG, MVT::v4f16, Expand); - setOperationAction(ISD::FLOG, MVT::v8f16, Expand); - setOperationAction(ISD::FLOG2, MVT::f16, Promote); - setOperationAction(ISD::FLOG2, MVT::v4f16, Expand); - setOperationAction(ISD::FLOG2, MVT::v8f16, Expand); - setOperationAction(ISD::FLOG10, MVT::f16, Promote); - setOperationAction(ISD::FLOG10, MVT::v4f16, Expand); - setOperationAction(ISD::FLOG10, MVT::v8f16, Expand); + for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI, + ISD::FCOS, ISD::FSIN, ISD::FSINCOS, + ISD::FEXP, ISD::FEXP2, ISD::FLOG, + ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM, + ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS, + ISD::STRICT_FSIN, ISD::STRICT_FEXP, ISD::STRICT_FEXP2, + ISD::STRICT_FLOG, ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) { + setOperationAction(Op, MVT::f16, Promote); + setOperationAction(Op, MVT::v4f16, Expand); + setOperationAction(Op, MVT::v8f16, Expand); + } if (!Subtarget->hasFullFP16()) { - setOperationAction(ISD::SELECT, MVT::f16, Promote); - setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); - setOperationAction(ISD::SETCC, MVT::f16, Promote); - setOperationAction(ISD::BR_CC, MVT::f16, Promote); - setOperationAction(ISD::FADD, MVT::f16, Promote); - setOperationAction(ISD::FSUB, MVT::f16, Promote); - setOperationAction(ISD::FMUL, MVT::f16, Promote); - setOperationAction(ISD::FDIV, MVT::f16, Promote); - setOperationAction(ISD::FMA, MVT::f16, Promote); - setOperationAction(ISD::FNEG, MVT::f16, Promote); - setOperationAction(ISD::FABS, MVT::f16, Promote); - setOperationAction(ISD::FCEIL, MVT::f16, Promote); - setOperationAction(ISD::FSQRT, MVT::f16, Promote); - setOperationAction(ISD::FFLOOR, MVT::f16, Promote); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); - setOperationAction(ISD::FRINT, MVT::f16, Promote); - setOperationAction(ISD::FROUND, MVT::f16, Promote); - setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); - setOperationAction(ISD::FTRUNC, MVT::f16, Promote); - setOperationAction(ISD::FMINNUM, MVT::f16, Promote); - setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); - setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); - setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); + for (auto Op : + {ISD::SELECT, ISD::SELECT_CC, ISD::SETCC, + ISD::BR_CC, ISD::FADD, ISD::FSUB, + ISD::FMUL, ISD::FDIV, ISD::FMA, + ISD::FNEG, ISD::FABS, ISD::FCEIL, + ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT, + ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN, + ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM, + ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD, + ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, + ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, + ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT, + ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN, + ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM, + ISD::STRICT_FMAXIMUM}) + setOperationAction(Op, MVT::f16, Promote); + + // Round-to-integer need custom lowering for fp16, as Promote doesn't work + // because the result type is integer. + for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT, + ISD::STRICT_LLRINT}) + setOperationAction(Op, MVT::f16, Custom); // promote v4f16 to v4f32 when that is known to be safe. setOperationAction(ISD::FADD, MVT::v4f16, Promote); @@ -691,37 +627,35 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, } // AArch64 has implementations of a lot of rounding-like FP operations. - for (MVT Ty : {MVT::f32, MVT::f64}) { - setOperationAction(ISD::FFLOOR, Ty, Legal); - setOperationAction(ISD::FNEARBYINT, Ty, Legal); - setOperationAction(ISD::FCEIL, Ty, Legal); - setOperationAction(ISD::FRINT, Ty, Legal); - setOperationAction(ISD::FTRUNC, Ty, Legal); - setOperationAction(ISD::FROUND, Ty, Legal); - setOperationAction(ISD::FROUNDEVEN, Ty, Legal); - setOperationAction(ISD::FMINNUM, Ty, Legal); - setOperationAction(ISD::FMAXNUM, Ty, Legal); - setOperationAction(ISD::FMINIMUM, Ty, Legal); - setOperationAction(ISD::FMAXIMUM, Ty, Legal); - setOperationAction(ISD::LROUND, Ty, Legal); - setOperationAction(ISD::LLROUND, Ty, Legal); - setOperationAction(ISD::LRINT, Ty, Legal); - setOperationAction(ISD::LLRINT, Ty, Legal); - } - - if (Subtarget->hasFullFP16()) { - setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal); - setOperationAction(ISD::FFLOOR, MVT::f16, Legal); - setOperationAction(ISD::FCEIL, MVT::f16, Legal); - setOperationAction(ISD::FRINT, MVT::f16, Legal); - setOperationAction(ISD::FTRUNC, MVT::f16, Legal); - setOperationAction(ISD::FROUND, MVT::f16, Legal); - setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal); - setOperationAction(ISD::FMINNUM, MVT::f16, Legal); - setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); - setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); - setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); - } + for (auto Op : + {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, + ISD::FRINT, ISD::FTRUNC, ISD::FROUND, + ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM, + ISD::FMINIMUM, ISD::FMAXIMUM, ISD::LROUND, + ISD::LLROUND, ISD::LRINT, ISD::LLRINT, + ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FNEARBYINT, + ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN, + ISD::STRICT_FROUND, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, + ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_LROUND, + ISD::STRICT_LLROUND, ISD::STRICT_LRINT, ISD::STRICT_LLRINT}) { + for (MVT Ty : {MVT::f32, MVT::f64}) + setOperationAction(Op, Ty, Legal); + if (Subtarget->hasFullFP16()) + setOperationAction(Op, MVT::f16, Legal); + } + + // Basic strict FP operations are legal + for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, + ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT}) { + for (MVT Ty : {MVT::f32, MVT::f64}) + setOperationAction(Op, Ty, Legal); + if (Subtarget->hasFullFP16()) + setOperationAction(Op, MVT::f16, Legal); + } + + // Strict conversion to a larger type is legal + for (auto VT : {MVT::f32, MVT::f64}) + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); setOperationAction(ISD::PREFETCH, MVT::Other, Custom); @@ -891,47 +825,33 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // Vector add and sub nodes may conceal a high-half opportunity. // Also, try to fold ADD into CSINC/CSINV.. - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::ABS); - setTargetDAGCombine(ISD::SUB); - setTargetDAGCombine(ISD::XOR); - setTargetDAGCombine(ISD::SINT_TO_FP); - setTargetDAGCombine(ISD::UINT_TO_FP); - - setTargetDAGCombine(ISD::FP_TO_SINT); - setTargetDAGCombine(ISD::FP_TO_UINT); - setTargetDAGCombine(ISD::FP_TO_SINT_SAT); - setTargetDAGCombine(ISD::FP_TO_UINT_SAT); - setTargetDAGCombine(ISD::FDIV); + setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP, + ISD::UINT_TO_FP}); + + setTargetDAGCombine({ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT, + ISD::FP_TO_UINT_SAT, ISD::FDIV}); // Try and combine setcc with csel setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); - setTargetDAGCombine(ISD::ANY_EXTEND); - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::VECTOR_SPLICE); - setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); - setTargetDAGCombine(ISD::TRUNCATE); - setTargetDAGCombine(ISD::CONCAT_VECTORS); - setTargetDAGCombine(ISD::INSERT_SUBVECTOR); - setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND, + ISD::VECTOR_SPLICE, ISD::SIGN_EXTEND_INREG, + ISD::CONCAT_VECTORS, ISD::EXTRACT_SUBVECTOR, + ISD::INSERT_SUBVECTOR, ISD::STORE}); if (Subtarget->supportsAddressTopByteIgnored()) setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::MUL); - setTargetDAGCombine(ISD::SELECT); - setTargetDAGCombine(ISD::VSELECT); + setTargetDAGCombine({ISD::SELECT, ISD::VSELECT}); - setTargetDAGCombine(ISD::INTRINSIC_VOID); - setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); - setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); - setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); - setTargetDAGCombine(ISD::VECREDUCE_ADD); - setTargetDAGCombine(ISD::STEP_VECTOR); + setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, + ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, + ISD::VECREDUCE_ADD, ISD::STEP_VECTOR}); + + setTargetDAGCombine({ISD::MGATHER, ISD::MSCATTER}); setTargetDAGCombine(ISD::FP_EXTEND); @@ -980,43 +900,29 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, if (Subtarget->hasNEON()) { // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to // silliness like this: - setOperationAction(ISD::FABS, MVT::v1f64, Expand); - setOperationAction(ISD::FADD, MVT::v1f64, Expand); - setOperationAction(ISD::FCEIL, MVT::v1f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand); - setOperationAction(ISD::FCOS, MVT::v1f64, Expand); - setOperationAction(ISD::FDIV, MVT::v1f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand); - setOperationAction(ISD::FMA, MVT::v1f64, Expand); - setOperationAction(ISD::FMUL, MVT::v1f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand); - setOperationAction(ISD::FNEG, MVT::v1f64, Expand); - setOperationAction(ISD::FPOW, MVT::v1f64, Expand); - setOperationAction(ISD::FREM, MVT::v1f64, Expand); - setOperationAction(ISD::FROUND, MVT::v1f64, Expand); - setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand); - setOperationAction(ISD::FRINT, MVT::v1f64, Expand); - setOperationAction(ISD::FSIN, MVT::v1f64, Expand); - setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); - setOperationAction(ISD::FSQRT, MVT::v1f64, Expand); - setOperationAction(ISD::FSUB, MVT::v1f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand); - setOperationAction(ISD::SETCC, MVT::v1f64, Expand); - setOperationAction(ISD::BR_CC, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand); - setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand); - - setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand); - setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand); - - setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v1i64, Expand); - setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v1i64, Expand); - - setOperationAction(ISD::MUL, MVT::v1i64, Expand); + for (auto Op : + {ISD::SELECT, ISD::SELECT_CC, ISD::SETCC, + ISD::BR_CC, ISD::FADD, ISD::FSUB, + ISD::FMUL, ISD::FDIV, ISD::FMA, + ISD::FNEG, ISD::FABS, ISD::FCEIL, + ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT, + ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN, + ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM, + ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD, + ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, + ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, + ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT, + ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN, + ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM, + ISD::STRICT_FMAXIMUM}) + setOperationAction(Op, MVT::v1f64, Expand); + + for (auto Op : + {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP, + ISD::FP_ROUND, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, ISD::MUL, + ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT, + ISD::STRICT_SINT_TO_FP, ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_ROUND}) + setOperationAction(Op, MVT::v1i64, Expand); // AArch64 doesn't have a direct vector ->f32 conversion instructions for // elements smaller than i32, so promote the input to i32 first. @@ -1024,14 +930,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32); // Similarly, there is no direct i32 -> f64 vector conversion instruction. - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); // Or, direct i32 -> f16 vector conversion. Set it so custom, so the // conversion happens in two steps: v4i32 -> v4f32 -> v4f16 - setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); + for (auto Op : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP, + ISD::STRICT_UINT_TO_FP}) + for (auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32}) + setOperationAction(Op, VT, Custom); if (Subtarget->hasFullFP16()) { setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom); @@ -1088,6 +992,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16, MVT::v4i32}) { + setOperationAction(ISD::AVGFLOORS, VT, Legal); + setOperationAction(ISD::AVGFLOORU, VT, Legal); + setOperationAction(ISD::AVGCEILS, VT, Legal); + setOperationAction(ISD::AVGCEILU, VT, Legal); setOperationAction(ISD::ABDS, VT, Legal); setOperationAction(ISD::ABDU, VT, Legal); } @@ -1141,31 +1049,18 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, } // AArch64 has implementations of a lot of rounding-like FP operations. - for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) { - setOperationAction(ISD::FFLOOR, Ty, Legal); - setOperationAction(ISD::FNEARBYINT, Ty, Legal); - setOperationAction(ISD::FCEIL, Ty, Legal); - setOperationAction(ISD::FRINT, Ty, Legal); - setOperationAction(ISD::FTRUNC, Ty, Legal); - setOperationAction(ISD::FROUND, Ty, Legal); - setOperationAction(ISD::FROUNDEVEN, Ty, Legal); - } - - if (Subtarget->hasFullFP16()) { - for (MVT Ty : {MVT::v4f16, MVT::v8f16}) { - setOperationAction(ISD::FFLOOR, Ty, Legal); - setOperationAction(ISD::FNEARBYINT, Ty, Legal); - setOperationAction(ISD::FCEIL, Ty, Legal); - setOperationAction(ISD::FRINT, Ty, Legal); - setOperationAction(ISD::FTRUNC, Ty, Legal); - setOperationAction(ISD::FROUND, Ty, Legal); - setOperationAction(ISD::FROUNDEVEN, Ty, Legal); - } + for (auto Op : + {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, + ISD::FROUND, ISD::FROUNDEVEN, ISD::STRICT_FFLOOR, + ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL, ISD::STRICT_FRINT, + ISD::STRICT_FTRUNC, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN}) { + for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) + setOperationAction(Op, Ty, Legal); + if (Subtarget->hasFullFP16()) + for (MVT Ty : {MVT::v4f16, MVT::v8f16}) + setOperationAction(Op, Ty, Legal); } - if (Subtarget->hasSVE()) - setOperationAction(ISD::VSCALE, MVT::i32, Custom); - setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom); @@ -1174,6 +1069,17 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); + + // ADDP custom lowering + for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) + setOperationAction(ISD::ADD, VT, Custom); + // FADDP custom lowering + for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 }) + setOperationAction(ISD::FADD, VT, Custom); + } + + if (Subtarget->hasSME()) { + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); } if (Subtarget->hasSVE()) { @@ -1194,7 +1100,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::MUL, VT, Custom); setOperationAction(ISD::MULHS, VT, Custom); setOperationAction(ISD::MULHU, VT, Custom); - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); @@ -1224,6 +1130,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); + + setOperationAction(ISD::SADDSAT, VT, Legal); + setOperationAction(ISD::UADDSAT, VT, Legal); + setOperationAction(ISD::SSUBSAT, VT, Legal); + setOperationAction(ISD::USUBSAT, VT, Legal); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); } // Illegal unpacked integer vector types. @@ -1234,10 +1149,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // Legalize unpacked bitcasts to REINTERPRET_CAST. for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16, - MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32}) + MVT::nxv4bf16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32}) setOperationAction(ISD::BITCAST, VT, Custom); - for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) { + for (auto VT : + { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8, + MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 }) + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal); + + for (auto VT : + {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) { setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); @@ -1269,18 +1190,33 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::MSCATTER, VT, Custom); } - for (MVT VT : MVT::fp_scalable_vector_valuetypes()) { - for (MVT InnerVT : MVT::fp_scalable_vector_valuetypes()) { - // Avoid marking truncating FP stores as legal to prevent the - // DAGCombiner from creating unsupported truncating stores. + // Firstly, exclude all scalable vector extending loads/truncating stores, + // include both integer and floating scalable vector. + for (MVT VT : MVT::scalable_vector_valuetypes()) { + for (MVT InnerVT : MVT::scalable_vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); - // SVE does not have floating-point extending loads. setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } } + // Then, selectively enable those which we directly support. + setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i8, Legal); + setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i16, Legal); + setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i32, Legal); + setTruncStoreAction(MVT::nxv4i32, MVT::nxv4i8, Legal); + setTruncStoreAction(MVT::nxv4i32, MVT::nxv4i16, Legal); + setTruncStoreAction(MVT::nxv8i16, MVT::nxv8i8, Legal); + for (auto Op : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) { + setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i8, Legal); + setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i16, Legal); + setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i32, Legal); + setLoadExtAction(Op, MVT::nxv4i32, MVT::nxv4i8, Legal); + setLoadExtAction(Op, MVT::nxv4i32, MVT::nxv4i16, Legal); + setLoadExtAction(Op, MVT::nxv8i16, MVT::nxv8i8, Legal); + } + // SVE supports truncating stores of 64 and 128-bit vectors setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom); setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom); @@ -1295,7 +1231,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); setOperationAction(ISD::MLOAD, VT, Custom); - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::FADD, VT, Custom); setOperationAction(ISD::FCOPYSIGN, VT, Custom); @@ -1326,6 +1262,29 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FPOW, VT, Expand); + setOperationAction(ISD::FPOWI, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); + setOperationAction(ISD::FEXP, VT, Expand); + setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FLOG, VT, Expand); + setOperationAction(ISD::FLOG2, VT, Expand); + setOperationAction(ISD::FLOG10, VT, Expand); + + setCondCodeAction(ISD::SETO, VT, Expand); + setCondCodeAction(ISD::SETOLT, VT, Expand); + setCondCodeAction(ISD::SETLT, VT, Expand); + setCondCodeAction(ISD::SETOLE, VT, Expand); + setCondCodeAction(ISD::SETLE, VT, Expand); + setCondCodeAction(ISD::SETULT, VT, Expand); + setCondCodeAction(ISD::SETULE, VT, Expand); + setCondCodeAction(ISD::SETUGE, VT, Expand); + setCondCodeAction(ISD::SETUGT, VT, Expand); + setCondCodeAction(ISD::SETUEQ, VT, Expand); + setCondCodeAction(ISD::SETONE, VT, Expand); } for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) { @@ -1334,13 +1293,23 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::MSCATTER, VT, Custom); setOperationAction(ISD::MLOAD, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); } - setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); + // NEON doesn't support integer divides, but SVE does + for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32, + MVT::v4i32, MVT::v1i64, MVT::v2i64}) { + setOperationAction(ISD::SDIV, VT, Custom); + setOperationAction(ISD::UDIV, VT, Custom); + } + + // NEON doesn't support 64-bit vector integer muls, but SVE does. + setOperationAction(ISD::MUL, MVT::v1i64, Custom); + setOperationAction(ISD::MUL, MVT::v2i64, Custom); + // NOTE: Currently this has to happen after computeRegisterProperties rather // than the preferred option of combining it with the addRegisterClass call. if (Subtarget->useSVEForFixedLengthVectors()) { @@ -1367,32 +1336,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTLZ, MVT::v1i64, Custom); setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); - setOperationAction(ISD::MUL, MVT::v1i64, Custom); - setOperationAction(ISD::MUL, MVT::v2i64, Custom); setOperationAction(ISD::MULHS, MVT::v1i64, Custom); setOperationAction(ISD::MULHS, MVT::v2i64, Custom); setOperationAction(ISD::MULHU, MVT::v1i64, Custom); setOperationAction(ISD::MULHU, MVT::v2i64, Custom); - setOperationAction(ISD::SDIV, MVT::v8i8, Custom); - setOperationAction(ISD::SDIV, MVT::v16i8, Custom); - setOperationAction(ISD::SDIV, MVT::v4i16, Custom); - setOperationAction(ISD::SDIV, MVT::v8i16, Custom); - setOperationAction(ISD::SDIV, MVT::v2i32, Custom); - setOperationAction(ISD::SDIV, MVT::v4i32, Custom); - setOperationAction(ISD::SDIV, MVT::v1i64, Custom); - setOperationAction(ISD::SDIV, MVT::v2i64, Custom); setOperationAction(ISD::SMAX, MVT::v1i64, Custom); setOperationAction(ISD::SMAX, MVT::v2i64, Custom); setOperationAction(ISD::SMIN, MVT::v1i64, Custom); setOperationAction(ISD::SMIN, MVT::v2i64, Custom); - setOperationAction(ISD::UDIV, MVT::v8i8, Custom); - setOperationAction(ISD::UDIV, MVT::v16i8, Custom); - setOperationAction(ISD::UDIV, MVT::v4i16, Custom); - setOperationAction(ISD::UDIV, MVT::v8i16, Custom); - setOperationAction(ISD::UDIV, MVT::v2i32, Custom); - setOperationAction(ISD::UDIV, MVT::v4i32, Custom); - setOperationAction(ISD::UDIV, MVT::v1i64, Custom); - setOperationAction(ISD::UDIV, MVT::v2i64, Custom); setOperationAction(ISD::UMAX, MVT::v1i64, Custom); setOperationAction(ISD::UMAX, MVT::v2i64, Custom); setOperationAction(ISD::UMIN, MVT::v1i64, Custom); @@ -1426,6 +1377,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32); setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16); setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8); + + setOperationAction(ISD::VSCALE, MVT::i32, Custom); } if (Subtarget->hasMOPS() && Subtarget->hasMTE()) { @@ -1434,6 +1387,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, } PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); + + IsStrictFPEnabled = true; } void AArch64TargetLowering::addTypeForNEON(MVT VT) { @@ -1490,10 +1445,10 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) { setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); - setOperationAction(ISD::FP_TO_SINT, VT, Custom); - setOperationAction(ISD::FP_TO_UINT, VT, Custom); - setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); - setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); + for (unsigned Opcode : + {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT, + ISD::FP_TO_UINT_SAT, ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) + setOperationAction(Opcode, VT, Custom); if (!VT.isFloatingPoint()) setOperationAction(ISD::ABS, VT, Legal); @@ -1503,14 +1458,39 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) { for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); - // F[MIN|MAX][NUM|NAN] are available for all FP NEON types. + // F[MIN|MAX][NUM|NAN] and simple strict operations are available for all FP + // NEON types. if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::bf16 && (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())) for (unsigned Opcode : - {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM}) + {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM, + ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_FMINNUM, + ISD::STRICT_FMAXNUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, + ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FMA, + ISD::STRICT_FSQRT}) setOperationAction(Opcode, VT, Legal); + // Strict fp extend and trunc are legal + if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 16) + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); + if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 64) + setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); + + // FIXME: We could potentially make use of the vector comparison instructions + // for STRICT_FSETCC and STRICT_FSETCSS, but there's a number of + // complications: + // * FCMPEQ/NE are quiet comparisons, the rest are signalling comparisons, + // so we would need to expand when the condition code doesn't match the + // kind of comparison. + // * Some kinds of comparison require more than one FCMXY instruction so + // would need to be expanded instead. + // * The lowering of the non-strict versions involves target-specific ISD + // nodes so we would likely need to add strict versions of all of them and + // handle them appropriately. + setOperationAction(ISD::STRICT_FSETCC, VT, Expand); + setOperationAction(ISD::STRICT_FSETCCS, VT, Expand); + if (Subtarget->isLittleEndian()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { @@ -1526,9 +1506,11 @@ bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT, if (!Subtarget->hasSVE()) return true; - // We can only support legal predicate result types. + // We can only support legal predicate result types. We can use the SVE + // whilelo instruction for generating fixed-width predicates too. if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 && - ResVT != MVT::nxv16i1) + ResVT != MVT::nxv16i1 && ResVT != MVT::v2i1 && ResVT != MVT::v4i1 && + ResVT != MVT::v8i1 && ResVT != MVT::v16i1) return true; // The whilelo instruction only works with i32 or i64 scalar inputs. @@ -1559,7 +1541,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setCondCodeAction(ISD::SETUGE, VT, Expand); setCondCodeAction(ISD::SETUGT, VT, Expand); setCondCodeAction(ISD::SETUEQ, VT, Expand); - setCondCodeAction(ISD::SETUNE, VT, Expand); + setCondCodeAction(ISD::SETONE, VT, Expand); } // Mark integer truncating stores/extending loads as having custom lowering @@ -1830,11 +1812,21 @@ bool AArch64TargetLowering::targetShrinkDemandedConstant( /// computeKnownBitsForTargetNode - Determine which of the bits specified in /// Mask are known to be either zero or one and return them Known. void AArch64TargetLowering::computeKnownBitsForTargetNode( - const SDValue Op, KnownBits &Known, - const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, unsigned Depth) const { switch (Op.getOpcode()) { default: break; + case AArch64ISD::DUP: { + SDValue SrcOp = Op.getOperand(0); + Known = DAG.computeKnownBits(SrcOp, Depth + 1); + if (SrcOp.getValueSizeInBits() != Op.getScalarValueSizeInBits()) { + assert(SrcOp.getValueSizeInBits() > Op.getScalarValueSizeInBits() && + "Expected DUP implicit truncation"); + Known = Known.trunc(Op.getScalarValueSizeInBits()); + } + break; + } case AArch64ISD::CSEL: { KnownBits Known2; Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); @@ -2006,7 +1998,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ) MAKE_CASE(AArch64ISD::ABDS_PRED) MAKE_CASE(AArch64ISD::ABDU_PRED) - MAKE_CASE(AArch64ISD::ADD_PRED) MAKE_CASE(AArch64ISD::MUL_PRED) MAKE_CASE(AArch64ISD::MULHS_PRED) MAKE_CASE(AArch64ISD::MULHU_PRED) @@ -2016,7 +2007,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::SMIN_PRED) MAKE_CASE(AArch64ISD::SRA_PRED) MAKE_CASE(AArch64ISD::SRL_PRED) - MAKE_CASE(AArch64ISD::SUB_PRED) MAKE_CASE(AArch64ISD::UDIV_PRED) MAKE_CASE(AArch64ISD::UMAX_PRED) MAKE_CASE(AArch64ISD::UMIN_PRED) @@ -2061,6 +2051,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::DUPLANE16) MAKE_CASE(AArch64ISD::DUPLANE32) MAKE_CASE(AArch64ISD::DUPLANE64) + MAKE_CASE(AArch64ISD::DUPLANE128) MAKE_CASE(AArch64ISD::MOVI) MAKE_CASE(AArch64ISD::MOVIshift) MAKE_CASE(AArch64ISD::MOVIedit) @@ -2108,10 +2099,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FCMLTz) MAKE_CASE(AArch64ISD::SADDV) MAKE_CASE(AArch64ISD::UADDV) - MAKE_CASE(AArch64ISD::SRHADD) - MAKE_CASE(AArch64ISD::URHADD) - MAKE_CASE(AArch64ISD::SHADD) - MAKE_CASE(AArch64ISD::UHADD) MAKE_CASE(AArch64ISD::SDOT) MAKE_CASE(AArch64ISD::UDOT) MAKE_CASE(AArch64ISD::SMINV) @@ -2150,6 +2137,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FMINNMV_PRED) MAKE_CASE(AArch64ISD::FMUL_PRED) MAKE_CASE(AArch64ISD::FSUB_PRED) + MAKE_CASE(AArch64ISD::RDSVL) MAKE_CASE(AArch64ISD::BIC) MAKE_CASE(AArch64ISD::BIT) MAKE_CASE(AArch64ISD::CBZ) @@ -2267,10 +2255,13 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::REVH_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::REVW_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::REVD_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::INDEX_VECTOR) + MAKE_CASE(AArch64ISD::ADDP) + MAKE_CASE(AArch64ISD::SADDLP) MAKE_CASE(AArch64ISD::UADDLP) MAKE_CASE(AArch64ISD::CALL_RVMARKER) MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL) @@ -2278,6 +2269,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING) MAKE_CASE(AArch64ISD::MOPS_MEMCOPY) MAKE_CASE(AArch64ISD::MOPS_MEMMOVE) + MAKE_CASE(AArch64ISD::CALL_BTI) } #undef MAKE_CASE return nullptr; @@ -2351,6 +2343,92 @@ MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet( return BB; } +MachineBasicBlock * +AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); + + MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); + MIB.add(MI.getOperand(1)); // slice index register + MIB.add(MI.getOperand(2)); // slice index offset + MIB.add(MI.getOperand(3)); // pg + MIB.add(MI.getOperand(4)); // base + MIB.add(MI.getOperand(5)); // offset + + MI.eraseFromParent(); // The pseudo is gone now. + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB = + BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_ZA)); + + MIB.addReg(AArch64::ZA, RegState::Define); + MIB.add(MI.getOperand(0)); // Vector select register + MIB.add(MI.getOperand(1)); // Vector select offset + MIB.add(MI.getOperand(2)); // Base + MIB.add(MI.getOperand(1)); // Offset, same as vector select offset + + MI.eraseFromParent(); // The pseudo is gone now. + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::EmitMopa(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); + + MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); + MIB.addReg(BaseReg + MI.getOperand(0).getImm()); + MIB.add(MI.getOperand(1)); // pn + MIB.add(MI.getOperand(2)); // pm + MIB.add(MI.getOperand(3)); // zn + MIB.add(MI.getOperand(4)); // zm + + MI.eraseFromParent(); // The pseudo is gone now. + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); + + MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); + MIB.addReg(BaseReg + MI.getOperand(0).getImm()); + MIB.add(MI.getOperand(1)); // Slice index register + MIB.add(MI.getOperand(2)); // Slice index offset + MIB.add(MI.getOperand(3)); // pg + MIB.add(MI.getOperand(4)); // zn + + MI.eraseFromParent(); // The pseudo is gone now. + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB = + BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::ZERO_M)); + MIB.add(MI.getOperand(0)); // Mask + + unsigned Mask = MI.getOperand(0).getImm(); + for (unsigned I = 0; I < 8; I++) { + if (Mask & (1 << I)) + MIB.addDef(AArch64::ZAD0 + I, RegState::ImplicitDefine); + } + + MI.eraseFromParent(); // The pseudo is gone now. + return BB; +} + MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { switch (MI.getOpcode()) { @@ -2366,9 +2444,14 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( case TargetOpcode::STATEPOINT: // STATEPOINT is a pseudo instruction which has no implicit defs/uses // while bl call instruction (where statepoint will be lowered at the end) - // has implicit def. Add this implicit dead def here as a workaround. - MI.addOperand(*MI.getMF(), MachineOperand::CreateReg(AArch64::LR, true, - true, false, true)); + // has implicit def. This def is early-clobber as it will be set at + // the moment of the call and earlier than any use is read. + // Add this implicit dead def here as a workaround. + MI.addOperand(*MI.getMF(), + MachineOperand::CreateReg( + AArch64::LR, /*isDef*/ true, + /*isImp*/ true, /*isKill*/ false, /*isDead*/ true, + /*isUndef*/ false, /*isEarlyClobber*/ true)); LLVM_FALLTHROUGH; case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: @@ -2376,6 +2459,108 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( case AArch64::CATCHRET: return EmitLoweredCatchRet(MI, BB); + case AArch64::LD1_MXIPXX_H_PSEUDO_B: + return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0, MI, BB); + case AArch64::LD1_MXIPXX_H_PSEUDO_H: + return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0, MI, BB); + case AArch64::LD1_MXIPXX_H_PSEUDO_S: + return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0, MI, BB); + case AArch64::LD1_MXIPXX_H_PSEUDO_D: + return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0, MI, BB); + case AArch64::LD1_MXIPXX_H_PSEUDO_Q: + return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0, MI, BB); + case AArch64::LD1_MXIPXX_V_PSEUDO_B: + return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0, MI, BB); + case AArch64::LD1_MXIPXX_V_PSEUDO_H: + return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0, MI, BB); + case AArch64::LD1_MXIPXX_V_PSEUDO_S: + return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0, MI, BB); + case AArch64::LD1_MXIPXX_V_PSEUDO_D: + return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0, MI, BB); + case AArch64::LD1_MXIPXX_V_PSEUDO_Q: + return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB); + case AArch64::LDR_ZA_PSEUDO: + return EmitFill(MI, BB); + case AArch64::BFMOPA_MPPZZ_PSEUDO: + return EmitMopa(AArch64::BFMOPA_MPPZZ, AArch64::ZAS0, MI, BB); + case AArch64::BFMOPS_MPPZZ_PSEUDO: + return EmitMopa(AArch64::BFMOPS_MPPZZ, AArch64::ZAS0, MI, BB); + case AArch64::FMOPAL_MPPZZ_PSEUDO: + return EmitMopa(AArch64::FMOPAL_MPPZZ, AArch64::ZAS0, MI, BB); + case AArch64::FMOPSL_MPPZZ_PSEUDO: + return EmitMopa(AArch64::FMOPSL_MPPZZ, AArch64::ZAS0, MI, BB); + case AArch64::FMOPA_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::FMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::FMOPS_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::FMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::FMOPA_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::FMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::FMOPS_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::FMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::SMOPA_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::SMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::SMOPS_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::SMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::UMOPA_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::UMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::UMOPS_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::UMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::SUMOPA_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::SUMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::SUMOPS_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::SUMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::USMOPA_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::USMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::USMOPS_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::USMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::SMOPA_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::SMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::SMOPS_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::SMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::UMOPA_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::UMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::UMOPS_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::UMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::SUMOPA_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::SUMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::SUMOPS_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::SUMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::USMOPA_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::USMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::USMOPS_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::USMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::INSERT_MXIPZ_H_PSEUDO_B: + return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_B, AArch64::ZAB0, MI, + BB); + case AArch64::INSERT_MXIPZ_H_PSEUDO_H: + return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_H, AArch64::ZAH0, MI, + BB); + case AArch64::INSERT_MXIPZ_H_PSEUDO_S: + return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_S, AArch64::ZAS0, MI, + BB); + case AArch64::INSERT_MXIPZ_H_PSEUDO_D: + return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_D, AArch64::ZAD0, MI, + BB); + case AArch64::INSERT_MXIPZ_H_PSEUDO_Q: + return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_Q, AArch64::ZAQ0, MI, + BB); + case AArch64::INSERT_MXIPZ_V_PSEUDO_B: + return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_B, AArch64::ZAB0, MI, + BB); + case AArch64::INSERT_MXIPZ_V_PSEUDO_H: + return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_H, AArch64::ZAH0, MI, + BB); + case AArch64::INSERT_MXIPZ_V_PSEUDO_S: + return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_S, AArch64::ZAS0, MI, + BB); + case AArch64::INSERT_MXIPZ_V_PSEUDO_D: + return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_D, AArch64::ZAD0, MI, + BB); + case AArch64::INSERT_MXIPZ_V_PSEUDO_Q: + return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_Q, AArch64::ZAQ0, MI, + BB); + case AArch64::ZERO_M_PSEUDO: + return EmitZero(MI, BB); } } @@ -2596,7 +2781,17 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl, bool IsSignaling) { EVT VT = LHS.getValueType(); assert(VT != MVT::f128); - assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented"); + + const bool FullFP16 = DAG.getSubtarget().hasFullFP16(); + + if (VT == MVT::f16 && !FullFP16) { + LHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other}, + {Chain, LHS}); + RHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other}, + {LHS.getValue(1), RHS}); + Chain = RHS.getValue(1); + VT = MVT::f32; + } unsigned Opcode = IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP; return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS}); @@ -2605,8 +2800,7 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl, static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG) { EVT VT = LHS.getValueType(); - const bool FullFP16 = - static_cast(DAG.getSubtarget()).hasFullFP16(); + const bool FullFP16 = DAG.getSubtarget().hasFullFP16(); if (VT.isFloatingPoint()) { assert(VT != MVT::f128); @@ -2714,8 +2908,7 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG) { unsigned Opcode = 0; - const bool FullFP16 = - static_cast(DAG.getSubtarget()).hasFullFP16(); + const bool FullFP16 = DAG.getSubtarget().hasFullFP16(); if (LHS.getValueType().isFloatingPoint()) { assert(LHS.getValueType() != MVT::f128); @@ -3282,40 +3475,68 @@ SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const { return Op; } -static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); +// If Invert is false, sets 'C' bit of NZCV to 0 if value is 0, else sets 'C' +// bit to 1. If Invert is true, sets 'C' bit of NZCV to 1 if value is 0, else +// sets 'C' bit to 0. +static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert) { + SDLoc DL(Value); + EVT VT = Value.getValueType(); + SDValue Op0 = Invert ? DAG.getConstant(0, DL, VT) : Value; + SDValue Op1 = Invert ? Value : DAG.getConstant(1, DL, VT); + SDValue Cmp = + DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::Glue), Op0, Op1); + return Cmp.getValue(1); +} - // Let legalize expand this if it isn't a legal type yet. - if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) +// If Invert is false, value is 1 if 'C' bit of NZCV is 1, else 0. +// If Invert is true, value is 0 if 'C' bit of NZCV is 1, else 1. +static SDValue carryFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG, + bool Invert) { + assert(Flag.getResNo() == 1); + SDLoc DL(Flag); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue One = DAG.getConstant(1, DL, VT); + unsigned Cond = Invert ? AArch64CC::LO : AArch64CC::HS; + SDValue CC = DAG.getConstant(Cond, DL, MVT::i32); + return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag); +} + +// Value is 1 if 'V' bit of NZCV is 1, else 0 +static SDValue overflowFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG) { + assert(Flag.getResNo() == 1); + SDLoc DL(Flag); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue One = DAG.getConstant(1, DL, VT); + SDValue CC = DAG.getConstant(AArch64CC::VS, DL, MVT::i32); + return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag); +} + +// This lowering is inefficient, but it will get cleaned up by +// `foldOverflowCheck` +static SDValue lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG, unsigned Opcode, + bool IsSigned) { + EVT VT0 = Op.getValue(0).getValueType(); + EVT VT1 = Op.getValue(1).getValueType(); + + if (VT0 != MVT::i32 && VT0 != MVT::i64) return SDValue(); - SDVTList VTs = DAG.getVTList(VT, MVT::i32); + bool InvertCarry = Opcode == AArch64ISD::SBCS; + SDValue OpLHS = Op.getOperand(0); + SDValue OpRHS = Op.getOperand(1); + SDValue OpCarryIn = valueToCarryFlag(Op.getOperand(2), DAG, InvertCarry); - unsigned Opc; - bool ExtraOp = false; - switch (Op.getOpcode()) { - default: - llvm_unreachable("Invalid code"); - case ISD::ADDC: - Opc = AArch64ISD::ADDS; - break; - case ISD::SUBC: - Opc = AArch64ISD::SUBS; - break; - case ISD::ADDE: - Opc = AArch64ISD::ADCS; - ExtraOp = true; - break; - case ISD::SUBE: - Opc = AArch64ISD::SBCS; - ExtraOp = true; - break; - } + SDLoc DL(Op); + SDVTList VTs = DAG.getVTList(VT0, VT1); + + SDValue Sum = DAG.getNode(Opcode, DL, DAG.getVTList(VT0, MVT::Glue), OpLHS, + OpRHS, OpCarryIn); + + SDValue OutFlag = + IsSigned ? overflowFlagToValue(Sum.getValue(1), VT1, DAG) + : carryFlagToValue(Sum.getValue(1), VT1, DAG, InvertCarry); - if (!ExtraOp) - return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1)); - return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Sum, OutFlag); } static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { @@ -3417,7 +3638,8 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op, // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. // Any additional optimization in this function should be recorded // in the cost tables. - EVT InVT = Op.getOperand(0).getValueType(); + bool IsStrict = Op->isStrictFPOpcode(); + EVT InVT = Op.getOperand(IsStrict ? 1 : 0).getValueType(); EVT VT = Op.getValueType(); if (VT.isScalableVector()) { @@ -3437,6 +3659,12 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op, !Subtarget->hasFullFP16()) { MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts); SDLoc dl(Op); + if (IsStrict) { + SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NewVT, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other}, + {Ext.getValue(1), Ext.getValue(0)}); + } return DAG.getNode( Op.getOpcode(), dl, Op.getValueType(), DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0))); @@ -3446,6 +3674,13 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op, uint64_t InVTSize = InVT.getFixedSizeInBits(); if (VTSize < InVTSize) { SDLoc dl(Op); + if (IsStrict) { + InVT = InVT.changeVectorElementTypeToInteger(); + SDValue Cv = DAG.getNode(Op.getOpcode(), dl, {InVT, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, Cv); + return DAG.getMergeValues({Trunc, Cv.getValue(1)}, dl); + } SDValue Cv = DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(), Op.getOperand(0)); @@ -3457,10 +3692,30 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op, MVT ExtVT = MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()), VT.getVectorNumElements()); + if (IsStrict) { + SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {ExtVT, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other}, + {Ext.getValue(1), Ext.getValue(0)}); + } SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0)); return DAG.getNode(Op.getOpcode(), dl, VT, Ext); } + // Use a scalar operation for conversions between single-element vectors of + // the same size. + if (NumElts == 1) { + SDLoc dl(Op); + SDValue Extract = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(), + Op.getOperand(IsStrict ? 1 : 0), DAG.getConstant(0, dl, MVT::i64)); + EVT ScalarVT = VT.getScalarType(); + if (IsStrict) + return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other}, + {Op.getOperand(0), Extract}); + return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract); + } + // Type changing conversions are illegal. return Op; } @@ -3475,8 +3730,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, // f16 conversions are promoted to f32 when full fp16 is not supported. if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) { - assert(!IsStrict && "Lowering of strict fp16 not yet implemented"); SDLoc dl(Op); + if (IsStrict) { + SDValue Ext = + DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other}, + {Op.getOperand(0), SrcVal}); + return DAG.getNode(Op.getOpcode(), dl, {Op.getValueType(), MVT::Other}, + {Ext.getValue(1), Ext.getValue(0)}); + } return DAG.getNode( Op.getOpcode(), dl, Op.getValueType(), DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal)); @@ -3507,7 +3768,7 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op, "Saturation width cannot exceed result width"); // TODO: Consider lowering to SVE operations, as in LowerVectorFP_TO_INT. - // Currently, the `llvm.fpto[su]i.sat.*` instrinsics don't accept scalable + // Currently, the `llvm.fpto[su]i.sat.*` intrinsics don't accept scalable // types, so this is hard to reach. if (DstVT.isScalableVector()) return SDValue(); @@ -3545,17 +3806,14 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op, SDValue Sat; if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) { SDValue MinC = DAG.getConstant( - APInt::getSignedMaxValue(SatWidth).sextOrSelf(SrcElementWidth), DL, - IntVT); + APInt::getSignedMaxValue(SatWidth).sext(SrcElementWidth), DL, IntVT); SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC); SDValue MaxC = DAG.getConstant( - APInt::getSignedMinValue(SatWidth).sextOrSelf(SrcElementWidth), DL, - IntVT); + APInt::getSignedMinValue(SatWidth).sext(SrcElementWidth), DL, IntVT); Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC); } else { SDValue MinC = DAG.getConstant( - APInt::getAllOnesValue(SatWidth).zextOrSelf(SrcElementWidth), DL, - IntVT); + APInt::getAllOnesValue(SatWidth).zext(SrcElementWidth), DL, IntVT); Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC); } @@ -3604,14 +3862,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SDValue Sat; if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) { SDValue MinC = DAG.getConstant( - APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth), DL, DstVT); + APInt::getSignedMaxValue(SatWidth).sext(DstWidth), DL, DstVT); SDValue Min = DAG.getNode(ISD::SMIN, DL, DstVT, NativeCvt, MinC); SDValue MaxC = DAG.getConstant( - APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth), DL, DstVT); + APInt::getSignedMinValue(SatWidth).sext(DstWidth), DL, DstVT); Sat = DAG.getNode(ISD::SMAX, DL, DstVT, Min, MaxC); } else { SDValue MinC = DAG.getConstant( - APInt::getAllOnesValue(SatWidth).zextOrSelf(DstWidth), DL, DstVT); + APInt::getAllOnesValue(SatWidth).zext(DstWidth), DL, DstVT); Sat = DAG.getNode(ISD::UMIN, DL, DstVT, NativeCvt, MinC); } @@ -3623,9 +3881,10 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op, // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. // Any additional optimization in this function should be recorded // in the cost tables. + bool IsStrict = Op->isStrictFPOpcode(); EVT VT = Op.getValueType(); SDLoc dl(Op); - SDValue In = Op.getOperand(0); + SDValue In = Op.getOperand(IsStrict ? 1 : 0); EVT InVT = In.getValueType(); unsigned Opc = Op.getOpcode(); bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP; @@ -3653,6 +3912,13 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op, MVT CastVT = MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()), InVT.getVectorNumElements()); + if (IsStrict) { + In = DAG.getNode(Opc, dl, {CastVT, MVT::Other}, + {Op.getOperand(0), In}); + return DAG.getNode( + ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other}, + {In.getValue(1), In.getValue(0), DAG.getIntPtrConstant(0, dl)}); + } In = DAG.getNode(Opc, dl, CastVT, In); return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl)); } @@ -3661,9 +3927,24 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op, unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; EVT CastVT = VT.changeVectorElementTypeToInteger(); In = DAG.getNode(CastOpc, dl, CastVT, In); + if (IsStrict) + return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op.getOperand(0), In}); return DAG.getNode(Opc, dl, VT, In); } + // Use a scalar operation for conversions between single-element vectors of + // the same size. + if (VT.getVectorNumElements() == 1) { + SDValue Extract = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(), + In, DAG.getConstant(0, dl, MVT::i64)); + EVT ScalarVT = VT.getScalarType(); + if (IsStrict) + return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other}, + {Op.getOperand(0), Extract}); + return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract); + } + return Op; } @@ -3676,10 +3957,15 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); // f16 conversions are promoted to f32 when full fp16 is not supported. - if (Op.getValueType() == MVT::f16 && - !Subtarget->hasFullFP16()) { - assert(!IsStrict && "Lowering of strict fp16 not yet implemented"); + if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) { SDLoc dl(Op); + if (IsStrict) { + SDValue Val = DAG.getNode(Op.getOpcode(), dl, {MVT::f32, MVT::Other}, + {Op.getOperand(0), SrcVal}); + return DAG.getNode( + ISD::STRICT_FP_ROUND, dl, {MVT::f16, MVT::Other}, + {Val.getValue(1), Val.getValue(0), DAG.getIntPtrConstant(0, dl)}); + } return DAG.getNode( ISD::FP_ROUND, dl, MVT::f16, DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal), @@ -3742,6 +4028,14 @@ SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op, return LowerFixedLengthBitcastToSVE(Op, DAG); if (OpVT.isScalableVector()) { + // Bitcasting between unpacked vector types of different element counts is + // not a NOP because the live elements are laid out differently. + // 01234567 + // e.g. nxv2i32 = XX??XX?? + // nxv4f16 = X?X?X?X? + if (OpVT.getVectorElementCount() != ArgVT.getVectorElementCount()) + return SDValue(); + if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) { assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() && "Expected int->fp bitcast!"); @@ -3964,7 +4258,7 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64; if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON)) - return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED); // Multiplications are only custom-lowered for 128-bit vectors so that // VMULL can be detected. Otherwise v2i64 multiplications are not legal. @@ -4059,10 +4353,26 @@ static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) { case AArch64ISD::SETCC_MERGE_ZERO: return Reinterpret; case ISD::INTRINSIC_WO_CHAIN: - if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue) + switch (InOp.getConstantOperandVal(0)) { + case Intrinsic::aarch64_sve_ptrue: + case Intrinsic::aarch64_sve_cmpeq_wide: + case Intrinsic::aarch64_sve_cmpne_wide: + case Intrinsic::aarch64_sve_cmpge_wide: + case Intrinsic::aarch64_sve_cmpgt_wide: + case Intrinsic::aarch64_sve_cmplt_wide: + case Intrinsic::aarch64_sve_cmple_wide: + case Intrinsic::aarch64_sve_cmphs_wide: + case Intrinsic::aarch64_sve_cmphi_wide: + case Intrinsic::aarch64_sve_cmplo_wide: + case Intrinsic::aarch64_sve_cmpls_wide: return Reinterpret; + } } + // Splat vectors of one will generate ptrue instructions + if (ISD::isConstantSplatVectorAllOnes(InOp.getNode())) + return Reinterpret; + // Otherwise, zero the newly introduced lanes. SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all); SDValue MaskReinterpret = @@ -4073,12 +4383,12 @@ static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) { SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = Op.getConstantOperandVal(1); + SDLoc DL(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::aarch64_mops_memset_tag: { auto Node = cast(Op.getNode()); - SDLoc DL(Op); SDValue Chain = Node->getChain(); SDValue Dst = Op.getOperand(2); SDValue Val = Op.getOperand(3); @@ -4100,6 +4410,15 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, // changed. return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL); } + case Intrinsic::aarch64_sme_get_pstatesm: { + SDValue Chain = Op.getOperand(0); + SDValue MRS = DAG.getNode( + AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other), + Chain, DAG.getConstant(AArch64SysReg::SVCR, DL, MVT::i64)); + SDValue Mask = DAG.getConstant(/* PSTATE.SM */ 1, DL, MVT::i64); + SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, MRS, Mask); + return DAG.getMergeValues({And, Chain}, DL); + } } } @@ -4196,6 +4515,26 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::aarch64_sve_clz: return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sme_cntsb: + return DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), + DAG.getConstant(1, dl, MVT::i32)); + case Intrinsic::aarch64_sme_cntsh: { + SDValue One = DAG.getConstant(1, dl, MVT::i32); + SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), One); + return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes, One); + } + case Intrinsic::aarch64_sme_cntsw: { + SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), + DAG.getConstant(1, dl, MVT::i32)); + return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes, + DAG.getConstant(2, dl, MVT::i32)); + } + case Intrinsic::aarch64_sme_cntsd: { + SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), + DAG.getConstant(1, dl, MVT::i32)); + return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes, + DAG.getConstant(3, dl, MVT::i32)); + } case Intrinsic::aarch64_sve_cnt: { SDValue Data = Op.getOperand(3); // CTPOP only supports integer operands. @@ -4300,6 +4639,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::aarch64_sve_revw: return DAG.getNode(AArch64ISD::REVW_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_revd: + return DAG.getNode(AArch64ISD::REVD_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_sxtb: return DAG.getNode( AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), @@ -4336,7 +4678,6 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(2), Op.getOperand(3), DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)), Op.getOperand(1)); - case Intrinsic::localaddress: { const auto &MF = DAG.getMachineFunction(); const auto *RegInfo = Subtarget->getRegisterInfo(); @@ -4382,9 +4723,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, IntNo == Intrinsic::aarch64_neon_shadd); bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd || IntNo == Intrinsic::aarch64_neon_urhadd); - unsigned Opcode = - IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD) - : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD); + unsigned Opcode = IsSignedAdd + ? (IsRoundingAdd ? ISD::AVGCEILS : ISD::AVGFLOORS) + : (IsRoundingAdd ? ISD::AVGCEILU : ISD::AVGFLOORU); return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } @@ -4395,8 +4736,11 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } + case Intrinsic::aarch64_neon_saddlp: case Intrinsic::aarch64_neon_uaddlp: { - unsigned Opcode = AArch64ISD::UADDLP; + unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp + ? AArch64ISD::UADDLP + : AArch64ISD::SADDLP; return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1)); } case Intrinsic::aarch64_neon_sdot: @@ -4428,19 +4772,26 @@ bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const { return false; } -bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { - if (VT.getVectorElementType() == MVT::i32 && - VT.getVectorElementCount().getKnownMinValue() >= 4 && - !VT.isFixedLengthVector()) - return true; +bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT, + EVT DataVT) const { + // SVE only supports implicit extension of 32-bit indices. + if (!Subtarget->hasSVE() || IndexVT.getVectorElementType() != MVT::i32) + return false; - return false; + // Indices cannot be smaller than the main data type. + if (IndexVT.getScalarSizeInBits() < DataVT.getScalarSizeInBits()) + return false; + + // Scalable vectors with "vscale * 2" or fewer elements sit within a 64-bit + // element container type, which would violate the previous clause. + return DataVT.isFixedLengthVector() || DataVT.getVectorMinNumElements() > 2; } bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { return ExtVal.getValueType().isScalableVector() || - useSVEForFixedLengthVectorVT(ExtVal.getValueType(), - /*OverrideNEON=*/true); + useSVEForFixedLengthVectorVT( + ExtVal.getValueType(), + /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()); } unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) { @@ -4466,29 +4817,6 @@ unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) { return AddrModes.find(Key)->second; } -unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) { - std::map, unsigned> AddrModes = { - {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false), - AArch64ISD::SST1_PRED}, - {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true), - AArch64ISD::SST1_UXTW_PRED}, - {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false), - AArch64ISD::SST1_PRED}, - {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true), - AArch64ISD::SST1_SXTW_PRED}, - {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false), - AArch64ISD::SST1_SCALED_PRED}, - {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true), - AArch64ISD::SST1_UXTW_SCALED_PRED}, - {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false), - AArch64ISD::SST1_SCALED_PRED}, - {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true), - AArch64ISD::SST1_SXTW_SCALED_PRED}, - }; - auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend); - return AddrModes.find(Key)->second; -} - unsigned getSignExtendedGatherOpcode(unsigned Opcode) { switch (Opcode) { default: @@ -4511,267 +4839,184 @@ unsigned getSignExtendedGatherOpcode(unsigned Opcode) { } } -bool getGatherScatterIndexIsExtended(SDValue Index) { - unsigned Opcode = Index.getOpcode(); - if (Opcode == ISD::SIGN_EXTEND_INREG) - return true; +SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, + SelectionDAG &DAG) const { + MaskedGatherSDNode *MGT = cast(Op); - if (Opcode == ISD::AND) { - SDValue Splat = Index.getOperand(1); - if (Splat.getOpcode() != ISD::SPLAT_VECTOR) - return false; - ConstantSDNode *Mask = dyn_cast(Splat.getOperand(0)); - if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF) - return false; - return true; + SDLoc DL(Op); + SDValue Chain = MGT->getChain(); + SDValue PassThru = MGT->getPassThru(); + SDValue Mask = MGT->getMask(); + SDValue BasePtr = MGT->getBasePtr(); + SDValue Index = MGT->getIndex(); + SDValue Scale = MGT->getScale(); + EVT VT = Op.getValueType(); + EVT MemVT = MGT->getMemoryVT(); + ISD::LoadExtType ExtType = MGT->getExtensionType(); + ISD::MemIndexType IndexType = MGT->getIndexType(); + + // SVE supports zero (and so undef) passthrough values only, everything else + // must be handled manually by an explicit select on the load's output. + if (!PassThru->isUndef() && !isZerosVector(PassThru.getNode())) { + SDValue Ops[] = {Chain, DAG.getUNDEF(VT), Mask, BasePtr, Index, Scale}; + SDValue Load = + DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops, + MGT->getMemOperand(), IndexType, ExtType); + SDValue Select = DAG.getSelect(DL, VT, Mask, Load, PassThru); + return DAG.getMergeValues({Select, Load.getValue(1)}, DL); + } + + bool IsScaled = MGT->isIndexScaled(); + bool IsSigned = MGT->isIndexSigned(); + + // SVE supports an index scaled by sizeof(MemVT.elt) only, everything else + // must be calculated before hand. + uint64_t ScaleVal = cast(Scale)->getZExtValue(); + if (IsScaled && ScaleVal != MemVT.getScalarStoreSize()) { + assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types"); + EVT IndexVT = Index.getValueType(); + Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, + DAG.getConstant(Log2_32(ScaleVal), DL, IndexVT)); + Scale = DAG.getTargetConstant(1, DL, Scale.getValueType()); + + SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; + return DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops, + MGT->getMemOperand(), IndexType, ExtType); + } + + // Lower fixed length gather to a scalable equivalent. + if (VT.isFixedLengthVector()) { + assert(Subtarget->useSVEForFixedLengthVectors() && + "Cannot lower when not using SVE for fixed vectors!"); + + // NOTE: Handle floating-point as if integer then bitcast the result. + EVT DataVT = VT.changeVectorElementTypeToInteger(); + MemVT = MemVT.changeVectorElementTypeToInteger(); + + // Find the smallest integer fixed length vector we can use for the gather. + EVT PromotedVT = VT.changeVectorElementType(MVT::i32); + if (DataVT.getVectorElementType() == MVT::i64 || + Index.getValueType().getVectorElementType() == MVT::i64 || + Mask.getValueType().getVectorElementType() == MVT::i64) + PromotedVT = VT.changeVectorElementType(MVT::i64); + + // Promote vector operands except for passthrough, which we know is either + // undef or zero, and thus best constructed directly. + unsigned ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + Index = DAG.getNode(ExtOpcode, DL, PromotedVT, Index); + Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, PromotedVT, Mask); + + // A promoted result type forces the need for an extending load. + if (PromotedVT != DataVT && ExtType == ISD::NON_EXTLOAD) + ExtType = ISD::EXTLOAD; + + EVT ContainerVT = getContainerForFixedLengthVector(DAG, PromotedVT); + + // Convert fixed length vector operands to scalable. + MemVT = ContainerVT.changeVectorElementType(MemVT.getVectorElementType()); + Index = convertToScalableVector(DAG, ContainerVT, Index); + Mask = convertFixedMaskToScalableVector(Mask, DAG); + PassThru = PassThru->isUndef() ? DAG.getUNDEF(ContainerVT) + : DAG.getConstant(0, DL, ContainerVT); + + // Emit equivalent scalable vector gather. + SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; + SDValue Load = + DAG.getMaskedGather(DAG.getVTList(ContainerVT, MVT::Other), MemVT, DL, + Ops, MGT->getMemOperand(), IndexType, ExtType); + + // Extract fixed length data then convert to the required result type. + SDValue Result = convertFromScalableVector(DAG, PromotedVT, Load); + Result = DAG.getNode(ISD::TRUNCATE, DL, DataVT, Result); + if (VT.isFloatingPoint()) + Result = DAG.getNode(ISD::BITCAST, DL, VT, Result); + + return DAG.getMergeValues({Result, Load.getValue(1)}, DL); } - return false; + // Everything else is legal. + return Op; } -// If the base pointer of a masked gather or scatter is null, we -// may be able to swap BasePtr & Index and use the vector + register -// or vector + immediate addressing mode, e.g. -// VECTOR + REGISTER: -// getelementptr nullptr, (splat(%offset)) + %indices) -// -> getelementptr %offset, %indices -// VECTOR + IMMEDIATE: -// getelementptr nullptr, (splat(#x)) + %indices) -// -> getelementptr #x, %indices -void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT, - unsigned &Opcode, bool IsGather, - SelectionDAG &DAG) { - if (!isNullConstant(BasePtr)) - return; - - // FIXME: This will not match for fixed vector type codegen as the nodes in - // question will have fixed<->scalable conversions around them. This should be - // moved to a DAG combine or complex pattern so that is executes after all of - // the fixed vector insert and extracts have been removed. This deficiency - // will result in a sub-optimal addressing mode being used, i.e. an ADD not - // being folded into the scatter/gather. - ConstantSDNode *Offset = nullptr; - if (Index.getOpcode() == ISD::ADD) - if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) { - if (isa(SplatVal)) - Offset = cast(SplatVal); - else { - BasePtr = SplatVal; - Index = Index->getOperand(0); - return; - } - } - - unsigned NewOp = - IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED; - - if (!Offset) { - std::swap(BasePtr, Index); - Opcode = NewOp; - return; - } - - uint64_t OffsetVal = Offset->getZExtValue(); - unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8; - auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64); - - if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) { - // Index is out of range for the immediate addressing mode - BasePtr = ConstOffset; - Index = Index->getOperand(0); - return; - } - - // Immediate is in range - Opcode = NewOp; - BasePtr = Index->getOperand(0); - Index = ConstOffset; -} +SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, + SelectionDAG &DAG) const { + MaskedScatterSDNode *MSC = cast(Op); -SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, - SelectionDAG &DAG) const { SDLoc DL(Op); - MaskedGatherSDNode *MGT = cast(Op); - assert(MGT && "Can only custom lower gather load nodes"); - - bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector(); - - SDValue Index = MGT->getIndex(); - SDValue Chain = MGT->getChain(); - SDValue PassThru = MGT->getPassThru(); - SDValue Mask = MGT->getMask(); - SDValue BasePtr = MGT->getBasePtr(); - ISD::LoadExtType ExtTy = MGT->getExtensionType(); + SDValue Chain = MSC->getChain(); + SDValue StoreVal = MSC->getValue(); + SDValue Mask = MSC->getMask(); + SDValue BasePtr = MSC->getBasePtr(); + SDValue Index = MSC->getIndex(); + SDValue Scale = MSC->getScale(); + EVT VT = StoreVal.getValueType(); + EVT MemVT = MSC->getMemoryVT(); + ISD::MemIndexType IndexType = MSC->getIndexType(); + bool Truncating = MSC->isTruncatingStore(); - ISD::MemIndexType IndexType = MGT->getIndexType(); - bool IsScaled = - IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED; - bool IsSigned = - IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED; - bool IdxNeedsExtend = - getGatherScatterIndexIsExtended(Index) || - Index.getSimpleValueType().getVectorElementType() == MVT::i32; - bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD; - - EVT VT = PassThru.getSimpleValueType(); - EVT IndexVT = Index.getSimpleValueType(); - EVT MemVT = MGT->getMemoryVT(); - SDValue InputVT = DAG.getValueType(MemVT); + bool IsScaled = MSC->isIndexScaled(); + bool IsSigned = MSC->isIndexSigned(); - if (VT.getVectorElementType() == MVT::bf16 && - !static_cast(DAG.getSubtarget()).hasBF16()) - return SDValue(); + // SVE supports an index scaled by sizeof(MemVT.elt) only, everything else + // must be calculated before hand. + uint64_t ScaleVal = cast(Scale)->getZExtValue(); + if (IsScaled && ScaleVal != MemVT.getScalarStoreSize()) { + assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types"); + EVT IndexVT = Index.getValueType(); + Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, + DAG.getConstant(Log2_32(ScaleVal), DL, IndexVT)); + Scale = DAG.getTargetConstant(1, DL, Scale.getValueType()); - if (IsFixedLength) { - assert(Subtarget->useSVEForFixedLengthVectors() && - "Cannot lower when not using SVE for fixed vectors"); - if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) { - IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); - MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); - } else { - MemVT = getContainerForFixedLengthVector(DAG, MemVT); - IndexVT = MemVT.changeTypeToInteger(); - } - InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); - Mask = DAG.getNode( - ISD::SIGN_EXTEND, DL, - VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask); + SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; + return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops, + MSC->getMemOperand(), IndexType, Truncating); } - if (PassThru->isUndef() || isZerosVector(PassThru.getNode())) - PassThru = SDValue(); + // Lower fixed length scatter to a scalable equivalent. + if (VT.isFixedLengthVector()) { + assert(Subtarget->useSVEForFixedLengthVectors() && + "Cannot lower when not using SVE for fixed vectors!"); - if (VT.isFloatingPoint() && !IsFixedLength) { - // Handle FP data by using an integer gather and casting the result. - if (PassThru) { - EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount()); - PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG); + // Once bitcast we treat floating-point scatters as if integer. + if (VT.isFloatingPoint()) { + VT = VT.changeVectorElementTypeToInteger(); + MemVT = MemVT.changeVectorElementTypeToInteger(); + StoreVal = DAG.getNode(ISD::BITCAST, DL, VT, StoreVal); } - InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger()); - } - SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other); + // Find the smallest integer fixed length vector we can use for the scatter. + EVT PromotedVT = VT.changeVectorElementType(MVT::i32); + if (VT.getVectorElementType() == MVT::i64 || + Index.getValueType().getVectorElementType() == MVT::i64 || + Mask.getValueType().getVectorElementType() == MVT::i64) + PromotedVT = VT.changeVectorElementType(MVT::i64); - if (getGatherScatterIndexIsExtended(Index)) - Index = Index.getOperand(0); + // Promote vector operands. + unsigned ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + Index = DAG.getNode(ExtOpcode, DL, PromotedVT, Index); + Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, PromotedVT, Mask); + StoreVal = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, StoreVal); - unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend); - selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode, - /*isGather=*/true, DAG); + // A promoted value type forces the need for a truncating store. + if (PromotedVT != VT) + Truncating = true; - if (ResNeedsSignExtend) - Opcode = getSignExtendedGatherOpcode(Opcode); + EVT ContainerVT = getContainerForFixedLengthVector(DAG, PromotedVT); - if (IsFixedLength) { - if (Index.getSimpleValueType().isFixedLengthVector()) - Index = convertToScalableVector(DAG, IndexVT, Index); - if (BasePtr.getSimpleValueType().isFixedLengthVector()) - BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr); + // Convert fixed length vector operands to scalable. + MemVT = ContainerVT.changeVectorElementType(MemVT.getVectorElementType()); + Index = convertToScalableVector(DAG, ContainerVT, Index); Mask = convertFixedMaskToScalableVector(Mask, DAG); - } - - SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT}; - SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops); - Chain = Result.getValue(1); - - if (IsFixedLength) { - Result = convertFromScalableVector( - DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()), - Result); - Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result); - Result = DAG.getNode(ISD::BITCAST, DL, VT, Result); - - if (PassThru) - Result = DAG.getSelect(DL, VT, MGT->getMask(), Result, PassThru); - } else { - if (PassThru) - Result = DAG.getSelect(DL, IndexVT, Mask, Result, PassThru); - - if (VT.isFloatingPoint()) - Result = getSVESafeBitCast(VT, Result, DAG); - } - - return DAG.getMergeValues({Result, Chain}, DL); -} - -SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, - SelectionDAG &DAG) const { - SDLoc DL(Op); - MaskedScatterSDNode *MSC = cast(Op); - assert(MSC && "Can only custom lower scatter store nodes"); - - bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector(); - - SDValue Index = MSC->getIndex(); - SDValue Chain = MSC->getChain(); - SDValue StoreVal = MSC->getValue(); - SDValue Mask = MSC->getMask(); - SDValue BasePtr = MSC->getBasePtr(); - - ISD::MemIndexType IndexType = MSC->getIndexType(); - bool IsScaled = - IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED; - bool IsSigned = - IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED; - bool NeedsExtend = - getGatherScatterIndexIsExtended(Index) || - Index.getSimpleValueType().getVectorElementType() == MVT::i32; - - EVT VT = StoreVal.getSimpleValueType(); - EVT IndexVT = Index.getSimpleValueType(); - SDVTList VTs = DAG.getVTList(MVT::Other); - EVT MemVT = MSC->getMemoryVT(); - SDValue InputVT = DAG.getValueType(MemVT); - - if (VT.getVectorElementType() == MVT::bf16 && - !static_cast(DAG.getSubtarget()).hasBF16()) - return SDValue(); + StoreVal = convertToScalableVector(DAG, ContainerVT, StoreVal); - if (IsFixedLength) { - assert(Subtarget->useSVEForFixedLengthVectors() && - "Cannot lower when not using SVE for fixed vectors"); - if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) { - IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); - MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); - } else { - MemVT = getContainerForFixedLengthVector(DAG, MemVT); - IndexVT = MemVT.changeTypeToInteger(); - } - InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); - - StoreVal = - DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal); - StoreVal = DAG.getNode( - ISD::ANY_EXTEND, DL, - VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal); - StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal); - Mask = DAG.getNode( - ISD::SIGN_EXTEND, DL, - VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask); - } else if (VT.isFloatingPoint()) { - // Handle FP data by casting the data so an integer scatter can be used. - EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount()); - StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG); - InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger()); - } - - if (getGatherScatterIndexIsExtended(Index)) - Index = Index.getOperand(0); - - unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend); - selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode, - /*isGather=*/false, DAG); - - if (IsFixedLength) { - if (Index.getSimpleValueType().isFixedLengthVector()) - Index = convertToScalableVector(DAG, IndexVT, Index); - if (BasePtr.getSimpleValueType().isFixedLengthVector()) - BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr); - Mask = convertFixedMaskToScalableVector(Mask, DAG); + // Emit equivalent scalable vector scatter. + SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; + return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops, + MSC->getMemOperand(), IndexType, Truncating); } - SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT}; - return DAG.getNode(Opcode, DL, VTs, Ops); + // Everything else is legal. + return Op; } SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const { @@ -4780,7 +5025,9 @@ SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const { assert(LoadNode && "Expected custom lowering of a masked load node"); EVT VT = Op->getValueType(0); - if (useSVEForFixedLengthVectorVT(VT, true)) + if (useSVEForFixedLengthVectorVT( + VT, + /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) return LowerFixedLengthVectorMLoadToSVE(Op, DAG); SDValue PassThru = LoadNode->getPassThru(); @@ -4847,7 +5094,9 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, EVT MemVT = StoreNode->getMemoryVT(); if (VT.isVector()) { - if (useSVEForFixedLengthVectorVT(VT, true)) + if (useSVEForFixedLengthVectorVT( + VT, + /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) return LowerFixedLengthVectorStoreToSVE(Op, DAG); unsigned AS = StoreNode->getAddressSpace(); @@ -5007,6 +5256,22 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { Cmp.getValue(1)); } +static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) { + SDValue Chain = Op.getOperand(0); + SDValue Cond = Op.getOperand(1); + SDValue Dest = Op.getOperand(2); + + AArch64CC::CondCode CC; + if (SDValue Cmp = emitConjunction(DAG, Cond, CC)) { + SDLoc dl(Op); + SDValue CCVal = DAG.getConstant(CC, dl, MVT::i32); + return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, + Cmp); + } + + return SDValue(); +} + SDValue AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { LLVM_DEBUG(dbgs() << "Custom lowering: "); @@ -5026,6 +5291,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: return LowerSETCC(Op, DAG); + case ISD::BRCOND: + return LowerBRCOND(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::SELECT: @@ -5046,11 +5313,14 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerVACOPY(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); - case ISD::ADDC: - case ISD::ADDE: - case ISD::SUBC: - case ISD::SUBE: - return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::ADDCARRY: + return lowerADDSUBCARRY(Op, DAG, AArch64ISD::ADCS, false /*unsigned*/); + case ISD::SUBCARRY: + return lowerADDSUBCARRY(Op, DAG, AArch64ISD::SBCS, false /*unsigned*/); + case ISD::SADDO_CARRY: + return lowerADDSUBCARRY(Op, DAG, AArch64ISD::ADCS, true /*signed*/); + case ISD::SSUBO_CARRY: + return lowerADDSUBCARRY(Op, DAG, AArch64ISD::SBCS, true /*signed*/); case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: @@ -5165,11 +5435,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::MUL: return LowerMUL(Op, DAG); case ISD::MULHS: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED, - /*OverrideNEON=*/true); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED); case ISD::MULHU: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED, - /*OverrideNEON=*/true); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED); case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: @@ -5234,11 +5502,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerFixedLengthVectorLoadToSVE(Op, DAG); return LowerLOAD(Op, DAG); case ISD::ADD: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED); case ISD::AND: - return LowerToScalableOp(Op, DAG); case ISD::SUB: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED); + return LowerToScalableOp(Op, DAG); case ISD::FMAXIMUM: return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED); case ISD::FMAXNUM: @@ -5260,12 +5526,23 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::BSWAP: return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU); case ISD::CTLZ: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU, - /*OverrideNEON=*/true); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU); case ISD::CTTZ: return LowerCTTZ(Op, DAG); case ISD::VECTOR_SPLICE: return LowerVECTOR_SPLICE(Op, DAG); + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: { + assert(Op.getOperand(1).getValueType() == MVT::f16 && + "Expected custom lowering of rounding operations only for f16"); + SDLoc DL(Op); + SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other}, + {Ext.getValue(1), Ext.getValue(0)}); + } } } @@ -5275,10 +5552,7 @@ bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const { bool AArch64TargetLowering::useSVEForFixedLengthVectorVT( EVT VT, bool OverrideNEON) const { - if (!Subtarget->useSVEForFixedLengthVectors()) - return false; - - if (!VT.isFixedLengthVector()) + if (!VT.isFixedLengthVector() || !VT.isSimple()) return false; // Don't use SVE for vectors we cannot scalarize if required. @@ -5300,12 +5574,16 @@ bool AArch64TargetLowering::useSVEForFixedLengthVectorVT( // All SVE implementations support NEON sized vectors. if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector())) - return true; + return Subtarget->hasSVE(); // Ensure NEON MVTs only belong to a single register class. if (VT.getFixedSizeInBits() <= 128) return false; + // Ensure wider than NEON code generation is enabled. + if (!Subtarget->useSVEForFixedLengthVectors()) + return false; + // Don't use SVE for types that don't fit. if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits()) return false; @@ -5322,6 +5600,36 @@ bool AArch64TargetLowering::useSVEForFixedLengthVectorVT( // Calling Convention Implementation //===----------------------------------------------------------------------===// +static unsigned getIntrinsicID(const SDNode *N) { + unsigned Opcode = N->getOpcode(); + switch (Opcode) { + default: + return Intrinsic::not_intrinsic; + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IID = cast(N->getOperand(0))->getZExtValue(); + if (IID < Intrinsic::num_intrinsics) + return IID; + return Intrinsic::not_intrinsic; + } + } +} + +bool AArch64TargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0, + SDValue N1) const { + if (!N0.hasOneUse()) + return false; + + unsigned IID = getIntrinsicID(N1.getNode()); + // Avoid reassociating expressions that can be lowered to smlal/umlal. + if (IID == Intrinsic::aarch64_neon_umull || + N1.getOpcode() == AArch64ISD::UMULL || + IID == Intrinsic::aarch64_neon_smull || + N1.getOpcode() == AArch64ISD::SMULL) + return N0.getOpcode() != ISD::ADD; + + return true; +} + /// Selects the correct CCAssignFn for a given CallingConvention value. CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const { @@ -5368,8 +5676,16 @@ SDValue AArch64TargetLowering::LowerFormalArguments( const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); + const Function &F = MF.getFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()); + bool IsWin64 = Subtarget->isCallingConvWin64(F.getCallingConv()); + AArch64FunctionInfo *FuncInfo = MF.getInfo(); + + SmallVector Outs; + GetReturnInfo(CallConv, F.getReturnType(), F.getAttributes(), Outs, + DAG.getTargetLoweringInfo(), MF.getDataLayout()); + if (any_of(Outs, [](ISD::OutputArg &Out){ return Out.VT.isScalableVector(); })) + FuncInfo->setIsSVECC(true); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; @@ -5383,7 +5699,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // we use a special version of AnalyzeFormalArguments to pass in ValVT and // LocVT. unsigned NumArgs = Ins.size(); - Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin(); + Function::const_arg_iterator CurOrigArg = F.arg_begin(); unsigned CurArgIdx = 0; for (unsigned i = 0; i != NumArgs; ++i) { MVT ValVT = Ins[i].VT; @@ -5454,11 +5770,13 @@ SDValue AArch64TargetLowering::LowerFormalArguments( else if (RegVT == MVT::f128 || RegVT.is128BitVector()) RC = &AArch64::FPR128RegClass; else if (RegVT.isScalableVector() && - RegVT.getVectorElementType() == MVT::i1) + RegVT.getVectorElementType() == MVT::i1) { + FuncInfo->setIsSVECC(true); RC = &AArch64::PPRRegClass; - else if (RegVT.isScalableVector()) + } else if (RegVT.isScalableVector()) { + FuncInfo->setIsSVECC(true); RC = &AArch64::ZPRRegClass; - else + } else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); // Transform the arguments in physical registers into virtual ones. @@ -5580,7 +5898,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // i1 arguments are zero-extended to i8 by the caller. Emit a // hint to reflect this. if (Ins[i].isOrigArg()) { - Argument *OrigArg = MF.getFunction().getArg(Ins[i].getOrigArgIndex()); + Argument *OrigArg = F.getArg(Ins[i].getOrigArgIndex()); if (OrigArg->getType()->isIntegerTy(1)) { if (!Ins[i].Flags.isZExt()) { ArgValue = DAG.getNode(AArch64ISD::ASSERT_ZEXT_BOOL, DL, @@ -5595,7 +5913,6 @@ SDValue AArch64TargetLowering::LowerFormalArguments( assert((ArgLocs.size() + ExtraArgLocs) == Ins.size()); // varargs - AArch64FunctionInfo *FuncInfo = MF.getInfo(); if (isVarArg) { if (!Subtarget->isTargetDarwin() || IsWin64) { // The AAPCS variadic function ABI is identical to the non-variadic @@ -5843,14 +6160,62 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { } } +static void analyzeCallOperands(const AArch64TargetLowering &TLI, + const AArch64Subtarget *Subtarget, + const TargetLowering::CallLoweringInfo &CLI, + CCState &CCInfo) { + const SelectionDAG &DAG = CLI.DAG; + CallingConv::ID CalleeCC = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + const SmallVector &Outs = CLI.Outs; + bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC); + + unsigned NumArgs = Outs.size(); + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + + bool UseVarArgCC = false; + if (IsVarArg) { + // On Windows, the fixed arguments in a vararg call are passed in GPRs + // too, so use the vararg CC to force them to integer registers. + if (IsCalleeWin64) { + UseVarArgCC = true; + } else { + UseVarArgCC = !Outs[i].IsFixed; + } + } else { + // Get type of the original argument. + EVT ActualVT = + TLI.getValueType(DAG.getDataLayout(), CLI.Args[Outs[i].OrigArgIndex].Ty, + /*AllowUnknown*/ true); + MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ArgVT; + // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. + if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) + ArgVT = MVT::i8; + else if (ActualMVT == MVT::i16) + ArgVT = MVT::i16; + } + + CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CalleeCC, UseVarArgCC); + bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); + assert(!Res && "Call operand has unhandled type"); + (void)Res; + } +} + bool AArch64TargetLowering::isEligibleForTailCallOptimization( - SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, SelectionDAG &DAG) const { + const CallLoweringInfo &CLI) const { + CallingConv::ID CalleeCC = CLI.CallConv; if (!mayTailCallThisCC(CalleeCC)) return false; + SDValue Callee = CLI.Callee; + bool IsVarArg = CLI.IsVarArg; + const SmallVector &Outs = CLI.Outs; + const SmallVector &OutVals = CLI.OutVals; + const SmallVector &Ins = CLI.Ins; + const SelectionDAG &DAG = CLI.DAG; MachineFunction &MF = DAG.getMachineFunction(); const Function &CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF.getCallingConv(); @@ -5860,7 +6225,7 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( // The check for matching callee-saved regs will determine whether it is // eligible for TCO. if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) && - AArch64RegisterInfo::hasSVEArgsOrReturn(&MF)) + MF.getInfo()->isSVECC()) CallerCC = CallingConv::AArch64_SVE_VectorCall; bool CCMatch = CallerCC == CalleeCC; @@ -5915,30 +6280,14 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( // I want anyone implementing a new calling convention to think long and hard // about this assert. - assert((!isVarArg || CalleeCC == CallingConv::C) && + assert((!IsVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"); LLVMContext &C = *DAG.getContext(); - if (isVarArg && !Outs.empty()) { - // At least two cases here: if caller is fastcc then we can't have any - // memory arguments (we'd be expected to clean up the stack afterwards). If - // caller is C then we could potentially use its argument area. - - // FIXME: for now we take the most conservative of these in both cases: - // disallow all variadic memory operands. - SmallVector ArgLocs; - CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); - - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true)); - for (const CCValAssign &ArgLoc : ArgLocs) - if (!ArgLoc.isRegLoc()) - return false; - } - // Check that the call results are passed in the same way. if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, - CCAssignFnForCall(CalleeCC, isVarArg), - CCAssignFnForCall(CallerCC, isVarArg))) + CCAssignFnForCall(CalleeCC, IsVarArg), + CCAssignFnForCall(CallerCC, IsVarArg))) return false; // The callee has to preserve all registers the caller needs to preserve. const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); @@ -5958,9 +6307,22 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( return true; SmallVector ArgLocs; - CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); + CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, C); + + analyzeCallOperands(*this, Subtarget, CLI, CCInfo); + + if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) { + // When we are musttail, additional checks have been done and we can safely ignore this check + // At least two cases here: if caller is fastcc then we can't have any + // memory arguments (we'd be expected to clean up the stack afterwards). If + // caller is C then we could potentially use its argument area. - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); + // FIXME: for now we take the most conservative of these in both cases: + // disallow all variadic memory operands. + for (const CCValAssign &ArgLoc : ArgLocs) + if (!ArgLoc.isRegLoc()) + return false; + } const AArch64FunctionInfo *FuncInfo = MF.getInfo(); @@ -6051,7 +6413,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &IsTailCall = CLI.IsTailCall; - CallingConv::ID CallConv = CLI.CallConv; + CallingConv::ID &CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); @@ -6061,7 +6423,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, AArch64FunctionInfo *FuncInfo = MF.getInfo(); bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; bool IsSibCall = false; - bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv); + bool GuardWithBTI = false; + + if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && + !Subtarget->noBTIAtReturnTwice()) { + GuardWithBTI = FuncInfo->branchTargetEnforcement(); + } // Check callee args/returns for SVE registers and set calling convention // accordingly. @@ -6079,8 +6446,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, if (IsTailCall) { // Check if it's really possible to do a tail call. - IsTailCall = isEligibleForTailCallOptimization( - Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG); + IsTailCall = isEligibleForTailCallOptimization(CLI); // A sibling call is one where we're under the usual C ABI and not planning // to change that but can still do a tail call: @@ -6101,56 +6467,17 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); if (IsVarArg) { - // Handle fixed and variable vector arguments differently. - // Variable vector arguments always go into memory. unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; ++i) { - MVT ArgVT = Outs[i].VT; - if (!Outs[i].IsFixed && ArgVT.isScalableVector()) + if (!Outs[i].IsFixed && Outs[i].VT.isScalableVector()) report_fatal_error("Passing SVE types to variadic functions is " "currently not supported"); - - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - bool UseVarArgCC = !Outs[i].IsFixed; - // On Windows, the fixed arguments in a vararg call are passed in GPRs - // too, so use the vararg CC to force them to integer registers. - if (IsCalleeWin64) - UseVarArgCC = true; - CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC); - bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); - assert(!Res && "Call operand has unhandled type"); - (void)Res; - } - } else { - // At this point, Outs[].VT may already be promoted to i32. To correctly - // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and - // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT. - // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here - // we use a special version of AnalyzeCallOperands to pass in ValVT and - // LocVT. - unsigned NumArgs = Outs.size(); - for (unsigned i = 0; i != NumArgs; ++i) { - MVT ValVT = Outs[i].VT; - // Get type of the original argument. - EVT ActualVT = getValueType(DAG.getDataLayout(), - CLI.getArgs()[Outs[i].OrigArgIndex].Ty, - /*AllowUnknown*/ true); - MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT; - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. - if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) - ValVT = MVT::i8; - else if (ActualMVT == MVT::i16) - ValVT = MVT::i16; - - CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); - bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo); - assert(!Res && "Call operand has unhandled type"); - (void)Res; } } + analyzeCallOperands(*this, Subtarget, CLI, CCInfo); + // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -6536,7 +6863,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB); auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT); Ops.insert(Ops.begin() + 1, GA); - } + } else if (GuardWithBTI) + CallOpc = AArch64ISD::CALL_BTI; // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops); @@ -7313,103 +7641,88 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { + if (!Subtarget->hasNEON()) + return SDValue(); + EVT VT = Op.getValueType(); + EVT IntVT = VT.changeTypeToInteger(); SDLoc DL(Op); SDValue In1 = Op.getOperand(0); SDValue In2 = Op.getOperand(1); EVT SrcVT = In2.getValueType(); - if (VT.isScalableVector()) { - if (VT != SrcVT) - return SDValue(); + if (SrcVT.bitsLT(VT)) + In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2); + else if (SrcVT.bitsGT(VT)) + In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL)); - // copysign(x,y) -> (y & SIGN_MASK) | (x & ~SIGN_MASK) - // - // A possible alternative sequence involves using FNEG_MERGE_PASSTHRU; - // maybe useful for copysign operations with mismatched VTs. - // - // IntVT here is chosen so it's a legal type with the same element width - // as the input. - EVT IntVT = + if (VT.isScalableVector()) + IntVT = getPackedSVEVectorVT(VT.getVectorElementType().changeTypeToInteger()); - unsigned NumBits = VT.getScalarSizeInBits(); - SDValue SignMask = DAG.getConstant(APInt::getSignMask(NumBits), DL, IntVT); - SDValue InvSignMask = DAG.getNOT(DL, SignMask, IntVT); - SDValue Sign = DAG.getNode(ISD::AND, DL, IntVT, SignMask, - getSVESafeBitCast(IntVT, In2, DAG)); - SDValue Magnitude = DAG.getNode(ISD::AND, DL, IntVT, InvSignMask, - getSVESafeBitCast(IntVT, In1, DAG)); - SDValue IntResult = DAG.getNode(ISD::OR, DL, IntVT, Sign, Magnitude); - return getSVESafeBitCast(VT, IntResult, DAG); - } - if (!Subtarget->hasNEON()) + if (VT != In2.getValueType()) return SDValue(); - if (SrcVT.bitsLT(VT)) - In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2); - else if (SrcVT.bitsGT(VT)) - In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL)); + auto BitCast = [this](EVT VT, SDValue Op, SelectionDAG &DAG) { + if (VT.isScalableVector()) + return getSVESafeBitCast(VT, Op, DAG); - EVT VecVT; - uint64_t EltMask; - SDValue VecVal1, VecVal2; + return DAG.getBitcast(VT, Op); + }; - auto setVecVal = [&] (int Idx) { + SDValue VecVal1, VecVal2; + EVT VecVT; + auto SetVecVal = [&](int Idx = -1) { if (!VT.isVector()) { - VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT, - DAG.getUNDEF(VecVT), In1); - VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT, - DAG.getUNDEF(VecVT), In2); + VecVal1 = + DAG.getTargetInsertSubreg(Idx, DL, VecVT, DAG.getUNDEF(VecVT), In1); + VecVal2 = + DAG.getTargetInsertSubreg(Idx, DL, VecVT, DAG.getUNDEF(VecVT), In2); } else { - VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1); - VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2); + VecVal1 = BitCast(VecVT, In1, DAG); + VecVal2 = BitCast(VecVT, In2, DAG); } }; - - if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) { - VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32); - EltMask = 0x80000000ULL; - setVecVal(AArch64::ssub); - } else if (VT == MVT::f64 || VT == MVT::v2f64) { + if (VT.isVector()) { + VecVT = IntVT; + SetVecVal(); + } else if (VT == MVT::f64) { VecVT = MVT::v2i64; - - // We want to materialize a mask with the high bit set, but the AdvSIMD - // immediate moves cannot materialize that in a single instruction for - // 64-bit elements. Instead, materialize zero and then negate it. - EltMask = 0; - - setVecVal(AArch64::dsub); - } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) { - VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16); - EltMask = 0x8000ULL; - setVecVal(AArch64::hsub); + SetVecVal(AArch64::dsub); + } else if (VT == MVT::f32) { + VecVT = MVT::v4i32; + SetVecVal(AArch64::ssub); + } else if (VT == MVT::f16) { + VecVT = MVT::v8i16; + SetVecVal(AArch64::hsub); } else { llvm_unreachable("Invalid type for copysign!"); } - SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT); + unsigned BitWidth = In1.getScalarValueSizeInBits(); + SDValue SignMaskV = DAG.getConstant(~APInt::getSignMask(BitWidth), DL, VecVT); - // If we couldn't materialize the mask above, then the mask vector will be - // the zero vector, and we need to negate it here. + // We want to materialize a mask with every bit but the high bit set, but the + // AdvSIMD immediate moves cannot materialize that in a single instruction for + // 64-bit elements. Instead, materialize all bits set and then negate that. if (VT == MVT::f64 || VT == MVT::v2f64) { - BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec); - BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec); - BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec); + SignMaskV = DAG.getConstant(APInt::getAllOnes(BitWidth), DL, VecVT); + SignMaskV = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, SignMaskV); + SignMaskV = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, SignMaskV); + SignMaskV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, SignMaskV); } - SDValue Sel = - DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec); - + SDValue BSP = + DAG.getNode(AArch64ISD::BSP, DL, VecVT, SignMaskV, VecVal1, VecVal2); if (VT == MVT::f16) - return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel); + return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, BSP); if (VT == MVT::f32) - return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel); - else if (VT == MVT::f64) - return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel); - else - return DAG.getNode(ISD::BITCAST, DL, VT, Sel); + return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, BSP); + if (VT == MVT::f64) + return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, BSP); + + return BitCast(VT, BSP, DAG); } SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { @@ -7485,7 +7798,8 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); assert(VT.isScalableVector() || - useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)); + useSVEForFixedLengthVectorVT( + VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())); SDLoc DL(Op); SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0)); @@ -7517,22 +7831,19 @@ SDValue AArch64TargetLowering::LowerMinMax(SDValue Op, } if (VT.isScalableVector() || - useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) { + useSVEForFixedLengthVectorVT( + VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) { switch (Opcode) { default: llvm_unreachable("Wrong instruction"); case ISD::SMAX: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED, - /*OverrideNEON=*/true); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED); case ISD::SMIN: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED, - /*OverrideNEON=*/true); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED); case ISD::UMAX: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED, - /*OverrideNEON=*/true); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED); case ISD::UMIN: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED, - /*OverrideNEON=*/true); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED); } } @@ -7547,9 +7858,9 @@ SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op, EVT VT = Op.getValueType(); if (VT.isScalableVector() || - useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) - return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU, - true); + useSVEForFixedLengthVectorVT( + VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU); SDLoc DL(Op); SDValue REVB; @@ -8990,12 +9301,13 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, if (V.isUndef()) continue; else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa(V.getOperand(1))) { + !isa(V.getOperand(1)) || + V.getOperand(0).getValueType().isScalableVector()) { LLVM_DEBUG( dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from " - "various elements of other vectors, provided their " - "indices are constant\n"); + "various elements of other fixed-width vectors, provided " + "their indices are constant\n"); return SDValue(); } @@ -9011,10 +9323,72 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, Source->MaxElt = std::max(Source->MaxElt, EltNo); } + // If we have 3 or 4 sources, try to generate a TBL, which will at least be + // better than moving to/from gpr registers for larger vectors. + if ((Sources.size() == 3 || Sources.size() == 4) && NumElts > 4) { + // Construct a mask for the tbl. We may need to adjust the index for types + // larger than i8. + SmallVector Mask; + unsigned OutputFactor = VT.getScalarSizeInBits() / 8; + for (unsigned I = 0; I < NumElts; ++I) { + SDValue V = Op.getOperand(I); + if (V.isUndef()) { + for (unsigned OF = 0; OF < OutputFactor; OF++) + Mask.push_back(-1); + continue; + } + // Set the Mask lanes adjusted for the size of the input and output + // lanes. The Mask is always i8, so it will set OutputFactor lanes per + // output element, adjusted in their positions per input and output types. + unsigned Lane = V.getConstantOperandVal(1); + for (unsigned S = 0; S < Sources.size(); S++) { + if (V.getOperand(0) == Sources[S].Vec) { + unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits(); + unsigned InputBase = 16 * S + Lane * InputSize / 8; + for (unsigned OF = 0; OF < OutputFactor; OF++) + Mask.push_back(InputBase + OF); + break; + } + } + } + + // Construct the tbl3/tbl4 out of an intrinsic, the sources converted to + // v16i8, and the TBLMask + SmallVector TBLOperands; + TBLOperands.push_back(DAG.getConstant(Sources.size() == 3 + ? Intrinsic::aarch64_neon_tbl3 + : Intrinsic::aarch64_neon_tbl4, + dl, MVT::i32)); + for (unsigned i = 0; i < Sources.size(); i++) { + SDValue Src = Sources[i].Vec; + EVT SrcVT = Src.getValueType(); + Src = DAG.getBitcast(SrcVT.is64BitVector() ? MVT::v8i8 : MVT::v16i8, Src); + assert((SrcVT.is64BitVector() || SrcVT.is128BitVector()) && + "Expected a legally typed vector"); + if (SrcVT.is64BitVector()) + Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, Src, + DAG.getUNDEF(MVT::v8i8)); + TBLOperands.push_back(Src); + } + + SmallVector TBLMask; + for (unsigned i = 0; i < Mask.size(); i++) + TBLMask.push_back(DAG.getConstant(Mask[i], dl, MVT::i32)); + assert((Mask.size() == 8 || Mask.size() == 16) && + "Expected a v8i8 or v16i8 Mask"); + TBLOperands.push_back( + DAG.getBuildVector(Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask)); + + SDValue Shuffle = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, + Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands); + return DAG.getBitcast(VT, Shuffle); + } + if (Sources.size() > 2) { - LLVM_DEBUG( - dbgs() << "Reshuffle failed: currently only do something sane when at " - "most two source vectors are involved\n"); + LLVM_DEBUG(dbgs() << "Reshuffle failed: currently only do something " + << "sensible when at most two source vectors are " + << "involved\n"); return SDValue(); } @@ -9039,8 +9413,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, for (auto &Src : Sources) { EVT SrcVT = Src.ShuffleVec.getValueType(); - uint64_t SrcVTSize = SrcVT.getFixedSizeInBits(); - if (SrcVTSize == VTSize) + TypeSize SrcVTSize = SrcVT.getSizeInBits(); + if (SrcVTSize == TypeSize::Fixed(VTSize)) continue; // This stage of the search produces a source with the same element type as @@ -9049,7 +9423,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits(); EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts); - if (SrcVTSize < VTSize) { + if (SrcVTSize.getFixedValue() < VTSize) { assert(2 * SrcVTSize == VTSize); // We can pad out the smaller vector for free, so if it's part of a // shuffle... @@ -9059,7 +9433,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, continue; } - if (SrcVTSize != 2 * VTSize) { + if (SrcVTSize.getFixedValue() != 2 * VTSize) { LLVM_DEBUG( dbgs() << "Reshuffle failed: result vector too small to extract\n"); return SDValue(); @@ -9205,6 +9579,56 @@ static bool isSingletonEXTMask(ArrayRef M, EVT VT, unsigned &Imm) { return true; } +// Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from +// v4i32s. This is really a truncate, which we can construct out of (legal) +// concats and truncate nodes. +static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG) { + if (V.getValueType() != MVT::v16i8) + return SDValue(); + assert(V.getNumOperands() == 16 && "Expected 16 operands on the BUILDVECTOR"); + + for (unsigned X = 0; X < 4; X++) { + // Check the first item in each group is an extract from lane 0 of a v4i32 + // or v4i16. + SDValue BaseExt = V.getOperand(X * 4); + if (BaseExt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + (BaseExt.getOperand(0).getValueType() != MVT::v4i16 && + BaseExt.getOperand(0).getValueType() != MVT::v4i32) || + !isa(BaseExt.getOperand(1)) || + BaseExt.getConstantOperandVal(1) != 0) + return SDValue(); + SDValue Base = BaseExt.getOperand(0); + // And check the other items are extracts from the same vector. + for (unsigned Y = 1; Y < 4; Y++) { + SDValue Ext = V.getOperand(X * 4 + Y); + if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Ext.getOperand(0) != Base || + !isa(Ext.getOperand(1)) || + Ext.getConstantOperandVal(1) != Y) + return SDValue(); + } + } + + // Turn the buildvector into a series of truncates and concates, which will + // become uzip1's. Any v4i32s we found get truncated to v4i16, which are + // concat together to produce 2 v8i16. These are both truncated and concat + // together. + SDLoc DL(V); + SDValue Trunc[4] = { + V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0), + V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)}; + for (int I = 0; I < 4; I++) + if (Trunc[I].getValueType() == MVT::v4i32) + Trunc[I] = DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i16, Trunc[I]); + SDValue Concat0 = + DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Trunc[0], Trunc[1]); + SDValue Concat1 = + DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Trunc[2], Trunc[3]); + SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, Concat0); + SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, Concat1); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Trunc0, Trunc1); +} + /// Check if a vector shuffle corresponds to a DUP instructions with a larger /// element width than the vector lane type. If that is the case the function /// returns true and writes the value of the DUP instruction lane operand into @@ -9534,8 +9958,12 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) { } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit -/// the specified operations to build the shuffle. -static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, +/// the specified operations to build the shuffle. ID is the perfect-shuffle +//ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle +//table entry and LHS/RHS are the immediate inputs for this stage of the +//shuffle. +static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, + SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; @@ -9552,12 +9980,13 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, OP_VEXT1, OP_VEXT2, OP_VEXT3, - OP_VUZPL, // VUZP, left result - OP_VUZPR, // VUZP, right result - OP_VZIPL, // VZIP, left result - OP_VZIPR, // VZIP, right result - OP_VTRNL, // VTRN, left result - OP_VTRNR // VTRN, right result + OP_VUZPL, // VUZP, left result + OP_VUZPR, // VUZP, right result + OP_VZIPL, // VZIP, left result + OP_VZIPR, // VZIP, right result + OP_VTRNL, // VTRN, left result + OP_VTRNR, // VTRN, right result + OP_MOVLANE // Move lane. RHSID is the lane to move into }; if (OpNum == OP_COPY) { @@ -9567,9 +9996,71 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, return RHS; } + if (OpNum == OP_MOVLANE) { + // Decompose a PerfectShuffle ID to get the Mask for lane Elt + auto getPFIDLane = [](unsigned ID, int Elt) -> int { + assert(Elt < 4 && "Expected Perfect Lanes to be less than 4"); + Elt = 3 - Elt; + while (Elt > 0) { + ID /= 9; + Elt--; + } + return (ID % 9 == 8) ? -1 : ID % 9; + }; + + // For OP_MOVLANE shuffles, the RHSID represents the lane to move into. We + // get the lane to move from from the PFID, which is always from the + // original vectors (V1 or V2). + SDValue OpLHS = GeneratePerfectShuffle( + LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); + EVT VT = OpLHS.getValueType(); + assert(RHSID < 8 && "Expected a lane index for RHSID!"); + unsigned ExtLane = 0; + SDValue Input; + + // OP_MOVLANE are either D movs (if bit 0x4 is set) or S movs. D movs + // convert into a higher type. + if (RHSID & 0x4) { + int MaskElt = getPFIDLane(ID, (RHSID & 0x01) << 1) >> 1; + if (MaskElt == -1) + MaskElt = (getPFIDLane(ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1; + assert(MaskElt >= 0 && "Didn't expect an undef movlane index!"); + ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2); + Input = MaskElt < 2 ? V1 : V2; + if (VT.getScalarSizeInBits() == 16) { + Input = DAG.getBitcast(MVT::v2f32, Input); + OpLHS = DAG.getBitcast(MVT::v2f32, OpLHS); + } else { + assert(VT.getScalarSizeInBits() == 32 && + "Expected 16 or 32 bit shuffle elemements"); + Input = DAG.getBitcast(MVT::v2f64, Input); + OpLHS = DAG.getBitcast(MVT::v2f64, OpLHS); + } + } else { + int MaskElt = getPFIDLane(ID, RHSID); + assert(MaskElt >= 0 && "Didn't expect an undef movlane index!"); + ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4); + Input = MaskElt < 4 ? V1 : V2; + // Be careful about creating illegal types. Use f16 instead of i16. + if (VT == MVT::v4i16) { + Input = DAG.getBitcast(MVT::v4f16, Input); + OpLHS = DAG.getBitcast(MVT::v4f16, OpLHS); + } + } + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + Input.getValueType().getVectorElementType(), + Input, DAG.getVectorIdxConstant(ExtLane, dl)); + SDValue Ins = + DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Input.getValueType(), OpLHS, + Ext, DAG.getVectorIdxConstant(RHSID & 0x3, dl)); + return DAG.getBitcast(VT, Ins); + } + SDValue OpLHS, OpRHS; - OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); - OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); + OpLHS = GeneratePerfectShuffle(LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS, + RHS, DAG, dl); + OpRHS = GeneratePerfectShuffle(RHSID, V1, V2, PerfectShuffleTable[RHSID], LHS, + RHS, DAG, dl); EVT VT = OpLHS.getValueType(); switch (OpNum) { @@ -9648,14 +10139,16 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef ShuffleMask, EVT EltVT = Op.getValueType().getVectorElementType(); unsigned BytesPerElt = EltVT.getSizeInBits() / 8; - SmallVector TBLMask; - for (int Val : ShuffleMask) { - for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { - unsigned Offset = Byte + Val * BytesPerElt; - TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32)); - } + bool Swap = false; + if (V1.isUndef() || isZerosVector(V1.getNode())) { + std::swap(V1, V2); + Swap = true; } + // If the V2 source is undef or zero then we can use a tbl1, as tbl1 will fill + // out of range values with 0s. We do need to make sure that any out-of-range + // values are really out-of-range for a v16i8 vector. + bool IsUndefOrZero = V2.isUndef() || isZerosVector(V2.getNode()); MVT IndexVT = MVT::v8i8; unsigned IndexLen = 8; if (Op.getValueSizeInBits() == 128) { @@ -9663,11 +10156,23 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef ShuffleMask, IndexLen = 16; } + SmallVector TBLMask; + for (int Val : ShuffleMask) { + for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { + unsigned Offset = Byte + Val * BytesPerElt; + if (Swap) + Offset = Offset < IndexLen ? Offset + IndexLen : Offset - IndexLen; + if (IsUndefOrZero && Offset >= IndexLen) + Offset = 255; + TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32)); + } + } + SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1); SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2); SDValue Shuffle; - if (V2.getNode()->isUndef()) { + if (IsUndefOrZero) { if (IndexLen == 8) V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst); Shuffle = DAG.getNode( @@ -9732,6 +10237,10 @@ static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT, if (ExtIdxInBits % CastedEltBitWidth != 0) return false; + // Can't handle cases where vector size is not 128-bit + if (!Extract.getOperand(0).getValueType().is128BitVector()) + return false; + // Update the lane value by offsetting with the scaled extract index. LaneC += ExtIdxInBits / CastedEltBitWidth; @@ -10014,10 +10523,8 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + PFIndexes[2] * 9 + PFIndexes[3]; unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; - unsigned Cost = (PFEntry >> 30); - - if (Cost <= 4) - return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); + return GeneratePerfectShuffle(PFTableIndex, V1, V2, PFEntry, V1, V2, DAG, + dl); } return GenerateTBL(Op, ShuffleMask, DAG); @@ -10025,56 +10532,33 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); EVT VT = Op.getValueType(); - EVT ElemVT = VT.getScalarType(); - SDValue SplatVal = Op.getOperand(0); if (useSVEForFixedLengthVectorVT(VT)) return LowerToScalableOp(Op, DAG); - // Extend input splat value where needed to fit into a GPR (32b or 64b only) - // FPRs don't have this restriction. - switch (ElemVT.getSimpleVT().SimpleTy) { - case MVT::i1: { - // The only legal i1 vectors are SVE vectors, so we can use SVE-specific - // lowering code. - if (auto *ConstVal = dyn_cast(SplatVal)) { - // We can hande the zero case during isel. - if (ConstVal->isZero()) - return Op; - if (ConstVal->isOne()) - return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all); - } - // The general case of i1. There isn't any natural way to do this, - // so we use some trickery with whilelo. - SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64); - SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal, - DAG.getValueType(MVT::i1)); - SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl, - MVT::i64); - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID, - DAG.getConstant(0, dl, MVT::i64), SplatVal); - } - case MVT::i8: - case MVT::i16: - case MVT::i32: - SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32); - break; - case MVT::i64: - SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64); - break; - case MVT::f16: - case MVT::bf16: - case MVT::f32: - case MVT::f64: - // Fine as is - break; - default: - report_fatal_error("Unsupported SPLAT_VECTOR input operand type"); - } + assert(VT.isScalableVector() && VT.getVectorElementType() == MVT::i1 && + "Unexpected vector type!"); + + // We can handle the constant cases during isel. + if (isa(Op.getOperand(0))) + return Op; - return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal); + // There isn't a natural way to handle the general i1 case, so we use some + // trickery with whilelo. + SDLoc DL(Op); + SDValue SplatVal = DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, MVT::i64); + SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, SplatVal, + DAG.getValueType(MVT::i1)); + SDValue ID = + DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64); + SDValue Zero = DAG.getConstant(0, DL, MVT::i64); + if (VT == MVT::nxv1i1) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::nxv1i1, + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::nxv2i1, ID, + Zero, SplatVal), + Zero); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, ID, Zero, SplatVal); } SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op, @@ -10090,18 +10574,17 @@ SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op, return SDValue(); // The DUPQ operation is indepedent of element type so normalise to i64s. - SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1)); SDValue Idx128 = Op.getOperand(2); // DUPQ can be used when idx is in range. auto *CIdx = dyn_cast(Idx128); if (CIdx && (CIdx->getZExtValue() <= 3)) { SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64); - SDNode *DUPQ = - DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI); - return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0)); + return DAG.getNode(AArch64ISD::DUPLANE128, DL, VT, Op.getOperand(1), CI); } + SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1)); + // The ACLE says this must produce the same result as: // svtbl(data, svadd_x(svptrue_b64(), // svand_x(svptrue_b64(), svindex_u64(0, 1), 1), @@ -10358,20 +10841,6 @@ static bool isAllConstantBuildVector(const SDValue &PotentialBVec, return true; } -static unsigned getIntrinsicID(const SDNode *N) { - unsigned Opcode = N->getOpcode(); - switch (Opcode) { - default: - return Intrinsic::not_intrinsic; - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IID = cast(N->getOperand(0))->getZExtValue(); - if (IID < Intrinsic::num_intrinsics) - return IID; - return Intrinsic::not_intrinsic; - } - } -} - // Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)), // to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a // BUILD_VECTORs with constant element C1, C2 is a constant, and: @@ -10822,6 +11291,12 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, return SDValue(); } + // Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from + // v4i32s. This is really a truncate, which we can construct out of (legal) + // concats and truncate nodes. + if (SDValue M = ReconstructTruncateFromBuildVector(Op, DAG)) + return M; + // Empirical tests suggest this is rarely worth it for vectors of length <= 2. if (NumElts >= 4) { if (SDValue shuffle = ReconstructShuffle(Op, DAG)) @@ -11121,29 +11596,36 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2)) return SDValue(); - EVT WideVT; - SDValue ExtVec; + // Here narrow and wide refers to the vector element types. After "casting" + // both vectors must have the same bit length and so because the subvector + // has fewer elements, those elements need to be bigger. + EVT NarrowVT = getPackedSVEVectorVT(VT.getVectorElementCount()); + EVT WideVT = getPackedSVEVectorVT(InVT.getVectorElementCount()); + // NOP cast operands to the largest legal vector of the same element count. if (VT.isFloatingPoint()) { - // The InVT type should be legal. We can safely cast the unpacked - // subvector from InVT -> VT. - WideVT = VT; - ExtVec = getSVESafeBitCast(VT, Vec1, DAG); + Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG); + Vec1 = getSVESafeBitCast(WideVT, Vec1, DAG); } else { - // Extend elements of smaller vector... - WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext())); - ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1); + // Legal integer vectors are already their largest so Vec0 is fine as is. + Vec1 = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1); } + // To replace the top/bottom half of vector V with vector SubV we widen the + // preserved half of V, concatenate this to SubV (the order depending on the + // half being replaced) and then narrow the result. + SDValue Narrow; if (Idx == 0) { SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0); - return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0); - } else if (Idx == InVT.getVectorMinNumElements()) { + Narrow = DAG.getNode(AArch64ISD::UZP1, DL, NarrowVT, Vec1, HiVec0); + } else { + assert(Idx == InVT.getVectorMinNumElements() && + "Invalid subvector index!"); SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0); - return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec); + Narrow = DAG.getNode(AArch64ISD::UZP1, DL, NarrowVT, LoVec0, Vec1); } - return SDValue(); + return getSVESafeBitCast(VT, Narrow, DAG); } if (Idx == 0 && isPackedVectorType(VT, DAG)) { @@ -11249,21 +11731,8 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { if (VT.getVectorNumElements() == 4 && (VT.is128BitVector() || VT.is64BitVector())) { - unsigned PFIndexes[4]; - for (unsigned i = 0; i != 4; ++i) { - if (M[i] < 0) - PFIndexes[i] = 8; - else - PFIndexes[i] = M[i]; - } - - // Compute the index in the perfect shuffle table. - unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + - PFIndexes[2] * 9 + PFIndexes[3]; - unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; - unsigned Cost = (PFEntry >> 30); - - if (Cost <= 4) + unsigned Cost = getPerfectShuffleCost(M); + if (Cost <= 1) return true; } @@ -11360,9 +11829,6 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, unsigned EltSize = VT.getScalarSizeInBits(); switch (Op.getOpcode()) { - default: - llvm_unreachable("unexpected shift opcode"); - case ISD::SHL: if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED); @@ -11405,7 +11871,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, return NegShiftLeft; } - return SDValue(); + llvm_unreachable("unexpected shift opcode"); } static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, @@ -11525,8 +11991,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType()); } - const bool FullFP16 = - static_cast(DAG.getSubtarget()).hasFullFP16(); + const bool FullFP16 = DAG.getSubtarget().hasFullFP16(); // Make v4f16 (only) fcmp operations utilise vector instructions // v8f16 support will be a litle more complicated @@ -11594,7 +12059,8 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, (Op.getOpcode() != ISD::VECREDUCE_ADD && SrcVT.getVectorElementType() == MVT::i64); if (SrcVT.isScalableVector() || - useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) { + useSVEForFixedLengthVectorVT( + SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) { if (SrcVT.getVectorElementType() == MVT::i1) return LowerPredReductionToSVE(Op, DAG); @@ -11659,7 +12125,7 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const { - auto &Subtarget = static_cast(DAG.getSubtarget()); + auto &Subtarget = DAG.getSubtarget(); if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics()) return SDValue(); @@ -11676,7 +12142,7 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op, SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const { - auto &Subtarget = static_cast(DAG.getSubtarget()); + auto &Subtarget = DAG.getSubtarget(); if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics()) return SDValue(); @@ -11772,8 +12238,8 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op, SDLoc DL(Op); APInt MulImm = cast(Op.getOperand(0))->getAPIntValue(); - return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sextOrSelf(64)), - DL, VT); + return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sext(64)), DL, + VT); } /// Set the IntrinsicInfo for the `aarch64_sve_st` intrinsics. @@ -11867,23 +12333,23 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, } case Intrinsic::aarch64_ldaxr: case Intrinsic::aarch64_ldxr: { - PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); + Type *ValTy = I.getParamElementType(0); Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(PtrTy->getPointerElementType()); + Info.memVT = MVT::getVT(ValTy); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType()); + Info.align = DL.getABITypeAlign(ValTy); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; } case Intrinsic::aarch64_stlxr: case Intrinsic::aarch64_stxr: { - PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); + Type *ValTy = I.getParamElementType(1); Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(PtrTy->getPointerElementType()); + Info.memVT = MVT::getVT(ValTy); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; - Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType()); + Info.align = DL.getABITypeAlign(ValTy); Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; } @@ -11906,22 +12372,23 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; case Intrinsic::aarch64_sve_ldnt1: { - PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); + Type *ElTy = cast(I.getType())->getElementType(); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(I.getType()); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; - Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType()); + Info.align = DL.getABITypeAlign(ElTy); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal; return true; } case Intrinsic::aarch64_sve_stnt1: { - PointerType *PtrTy = cast(I.getArgOperand(2)->getType()); + Type *ElTy = + cast(I.getArgOperand(0)->getType())->getElementType(); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(I.getOperand(0)->getType()); Info.ptrVal = I.getArgOperand(2); Info.offset = 0; - Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType()); + Info.align = DL.getABITypeAlign(ElTy); Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal; return true; } @@ -12007,8 +12474,7 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const { Instruction *User = I->user_back(); - if (User && - !(User->getOpcode() == Instruction::FSub || + if (!(User->getOpcode() == Instruction::FSub || User->getOpcode() == Instruction::FAdd)) return true; @@ -12194,9 +12660,6 @@ static bool isSplatShuffle(Value *V) { /// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2). bool AArch64TargetLowering::shouldSinkOperands( Instruction *I, SmallVectorImpl &Ops) const { - if (!I->getType()->isVectorTy()) - return false; - if (IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { case Intrinsic::aarch64_neon_smull: @@ -12208,6 +12671,12 @@ bool AArch64TargetLowering::shouldSinkOperands( } LLVM_FALLTHROUGH; + case Intrinsic::fma: + if (isa(I->getType()) && + cast(I->getType())->getElementType()->isHalfTy() && + !Subtarget->hasFullFP16()) + return false; + LLVM_FALLTHROUGH; case Intrinsic::aarch64_neon_sqdmull: case Intrinsic::aarch64_neon_sqdmulh: case Intrinsic::aarch64_neon_sqrdmulh: @@ -12217,7 +12686,52 @@ bool AArch64TargetLowering::shouldSinkOperands( if (isSplatShuffle(II->getOperand(1))) Ops.push_back(&II->getOperandUse(1)); return !Ops.empty(); - + case Intrinsic::aarch64_sme_write_horiz: + case Intrinsic::aarch64_sme_write_vert: + case Intrinsic::aarch64_sme_writeq_horiz: + case Intrinsic::aarch64_sme_writeq_vert: { + auto *Idx = dyn_cast(II->getOperand(1)); + if (!Idx || Idx->getOpcode() != Instruction::Add) + return false; + Ops.push_back(&II->getOperandUse(1)); + return true; + } + case Intrinsic::aarch64_sme_read_horiz: + case Intrinsic::aarch64_sme_read_vert: + case Intrinsic::aarch64_sme_readq_horiz: + case Intrinsic::aarch64_sme_readq_vert: + case Intrinsic::aarch64_sme_ld1b_vert: + case Intrinsic::aarch64_sme_ld1h_vert: + case Intrinsic::aarch64_sme_ld1w_vert: + case Intrinsic::aarch64_sme_ld1d_vert: + case Intrinsic::aarch64_sme_ld1q_vert: + case Intrinsic::aarch64_sme_st1b_vert: + case Intrinsic::aarch64_sme_st1h_vert: + case Intrinsic::aarch64_sme_st1w_vert: + case Intrinsic::aarch64_sme_st1d_vert: + case Intrinsic::aarch64_sme_st1q_vert: + case Intrinsic::aarch64_sme_ld1b_horiz: + case Intrinsic::aarch64_sme_ld1h_horiz: + case Intrinsic::aarch64_sme_ld1w_horiz: + case Intrinsic::aarch64_sme_ld1d_horiz: + case Intrinsic::aarch64_sme_ld1q_horiz: + case Intrinsic::aarch64_sme_st1b_horiz: + case Intrinsic::aarch64_sme_st1h_horiz: + case Intrinsic::aarch64_sme_st1w_horiz: + case Intrinsic::aarch64_sme_st1d_horiz: + case Intrinsic::aarch64_sme_st1q_horiz: { + auto *Idx = dyn_cast(II->getOperand(3)); + if (!Idx || Idx->getOpcode() != Instruction::Add) + return false; + Ops.push_back(&II->getOperandUse(3)); + return true; + } + case Intrinsic::aarch64_neon_pmull: + if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1))) + return false; + Ops.push_back(&II->getOperandUse(0)); + Ops.push_back(&II->getOperandUse(1)); + return true; case Intrinsic::aarch64_neon_pmull64: if (!areOperandsOfVmullHighP64(II->getArgOperand(0), II->getArgOperand(1))) @@ -12225,12 +12739,14 @@ bool AArch64TargetLowering::shouldSinkOperands( Ops.push_back(&II->getArgOperandUse(0)); Ops.push_back(&II->getArgOperandUse(1)); return true; - default: return false; } } + if (!I->getType()->isVectorTy()) + return false; + switch (I->getOpcode()) { case Instruction::Sub: case Instruction::Add: { @@ -12745,12 +13261,15 @@ SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic, assert(VT.isScalableVector() && "Can only lower scalable vectors"); unsigned N, Opcode; - static std::map> IntrinsicMap = { - {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}}, - {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}}, - {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}}; - - std::tie(N, Opcode) = IntrinsicMap[Intrinsic]; + static const std::pair> + IntrinsicMap[] = { + {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}}, + {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}}, + {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}}; + + std::tie(N, Opcode) = llvm::find_if(IntrinsicMap, [&](auto P) { + return P.first == Intrinsic; + })->second; assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 && "invalid tuple vector type!"); @@ -12850,7 +13369,7 @@ bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const { // (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine, // if the folding leads to worse code. bool AArch64TargetLowering::isMulAddWithConstProfitable( - const SDValue &AddNode, const SDValue &ConstNode) const { + SDValue AddNode, SDValue ConstNode) const { // Let the DAGCombiner decide for vector types and large types. const EVT VT = AddNode.getValueType(); if (VT.isVector() || VT.getScalarSizeInBits() > 64) @@ -13025,6 +13544,28 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N, return true; } +bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask( + const SDNode *N, CombineLevel Level) const { + assert(((N->getOpcode() == ISD::SHL && + N->getOperand(0).getOpcode() == ISD::SRL) || + (N->getOpcode() == ISD::SRL && + N->getOperand(0).getOpcode() == ISD::SHL)) && + "Expected shift-shift mask"); + // Don't allow multiuse shift folding with the same shift amount. + if (!N->getOperand(0)->hasOneUse()) + return false; + + // Only fold srl(shl(x,c1),c2) iff C1 >= C2 to prevent loss of UBFX patterns. + EVT VT = N->getValueType(0); + if (N->getOpcode() == ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) { + auto *C1 = dyn_cast(N->getOperand(0).getOperand(1)); + auto *C2 = dyn_cast(N->getOperand(1)); + return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue()); + } + + return true; +} + bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); @@ -13221,6 +13762,61 @@ static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot); } +// Given an (integer) vecreduce, we know the order of the inputs does not +// matter. We can convert UADDV(add(zext(extract_lo(x)), zext(extract_hi(x)))) +// into UADDV(UADDLP(x)). This can also happen through an extra add, where we +// transform UADDV(add(y, add(zext(extract_lo(x)), zext(extract_hi(x))))). +static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) { + auto DetectAddExtract = [&](SDValue A) { + // Look for add(zext(extract_lo(x)), zext(extract_hi(x))), returning + // UADDLP(x) if found. + if (A.getOpcode() != ISD::ADD) + return SDValue(); + EVT VT = A.getValueType(); + SDValue Op0 = A.getOperand(0); + SDValue Op1 = A.getOperand(1); + if (Op0.getOpcode() != Op0.getOpcode() || + (Op0.getOpcode() != ISD::ZERO_EXTEND && + Op0.getOpcode() != ISD::SIGN_EXTEND)) + return SDValue(); + SDValue Ext0 = Op0.getOperand(0); + SDValue Ext1 = Op1.getOperand(0); + if (Ext0.getOpcode() != ISD::EXTRACT_SUBVECTOR || + Ext1.getOpcode() != ISD::EXTRACT_SUBVECTOR || + Ext0.getOperand(0) != Ext1.getOperand(0)) + return SDValue(); + // Check that the type is twice the add types, and the extract are from + // upper/lower parts of the same source. + if (Ext0.getOperand(0).getValueType().getVectorNumElements() != + VT.getVectorNumElements() * 2) + return SDValue(); + if ((Ext0.getConstantOperandVal(1) != 0 && + Ext1.getConstantOperandVal(1) != VT.getVectorNumElements()) && + (Ext1.getConstantOperandVal(1) != 0 && + Ext0.getConstantOperandVal(1) != VT.getVectorNumElements())) + return SDValue(); + unsigned Opcode = Op0.getOpcode() == ISD::ZERO_EXTEND ? AArch64ISD::UADDLP + : AArch64ISD::SADDLP; + return DAG.getNode(Opcode, SDLoc(A), VT, Ext0.getOperand(0)); + }; + + SDValue A = N->getOperand(0); + if (SDValue R = DetectAddExtract(A)) + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R); + if (A.getOpcode() == ISD::ADD) { + if (SDValue R = DetectAddExtract(A.getOperand(0))) + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R, + A.getOperand(1))); + if (SDValue R = DetectAddExtract(A.getOperand(1))) + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R, + A.getOperand(0))); + } + return SDValue(); +} + + static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -13279,6 +13875,60 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); } +SDValue +AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl &Created) const { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N, 0); // Lower SREM as SREM + + EVT VT = N->getValueType(0); + + // For scalable and fixed types, mark them as cheap so we can handle it much + // later. This allows us to handle larger than legal types. + if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors()) + return SDValue(N, 0); + + // fold (srem X, pow2) + if ((VT != MVT::i32 && VT != MVT::i64) || + !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2())) + return SDValue(); + + unsigned Lg2 = Divisor.countTrailingZeros(); + if (Lg2 == 0) + return SDValue(); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue CCVal, CSNeg; + if (Lg2 == 1) { + SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETGE, CCVal, DAG, DL); + SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, Pow2MinusOne); + CSNeg = DAG.getNode(AArch64ISD::CSNEG, DL, VT, And, And, CCVal, Cmp); + + Created.push_back(Cmp.getNode()); + Created.push_back(And.getNode()); + } else { + SDValue CCVal = DAG.getConstant(AArch64CC::MI, DL, MVT_CC); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + + SDValue Negs = DAG.getNode(AArch64ISD::SUBS, DL, VTs, Zero, N0); + SDValue AndPos = DAG.getNode(ISD::AND, DL, VT, N0, Pow2MinusOne); + SDValue AndNeg = DAG.getNode(ISD::AND, DL, VT, Negs, Pow2MinusOne); + CSNeg = DAG.getNode(AArch64ISD::CSNEG, DL, VT, AndPos, AndNeg, CCVal, + Negs.getValue(1)); + + Created.push_back(Negs.getNode()); + Created.push_back(AndPos.getNode()); + Created.push_back(AndNeg.getNode()); + } + + return CSNeg; +} + static bool IsSVECntIntrinsic(SDValue S) { switch(getIntrinsicID(S.getNode())) { default: @@ -13300,11 +13950,10 @@ static bool IsSVECntIntrinsic(SDValue S) { /// operations need a bit more inspection to get this information. /// /// \param Extend The SDNode from the DAG that represents the extend operation -/// \param DAG The SelectionDAG hosting the \p Extend node /// /// \returns The type representing the \p Extend source type, or \p MVT::Other /// if no valid type can be determined -static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) { +static EVT calculatePreExtendType(SDValue Extend) { switch (Extend.getOpcode()) { case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: @@ -13337,102 +13986,90 @@ static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) { default: return MVT::Other; } - - llvm_unreachable("Code path unhandled in calculatePreExtendType!"); } -/// Combines a dup(sext/zext) node pattern into sext/zext(dup) -/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt -static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle, - SelectionDAG &DAG) { - - ShuffleVectorSDNode *ShuffleNode = - dyn_cast(VectorShuffle.getNode()); - if (!ShuffleNode) - return SDValue(); - - // Ensuring the mask is zero before continuing - if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0) - return SDValue(); - - SDValue InsertVectorElt = VectorShuffle.getOperand(0); - - if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT) - return SDValue(); - - SDValue InsertLane = InsertVectorElt.getOperand(2); - ConstantSDNode *Constant = dyn_cast(InsertLane.getNode()); - // Ensures the insert is inserting into lane 0 - if (!Constant || Constant->getZExtValue() != 0) +/// Combines a buildvector(sext/zext) or shuffle(sext/zext, undef) node pattern +/// into sext/zext(buildvector) or sext/zext(shuffle) making use of the vector +/// SExt/ZExt rather than the scalar SExt/ZExt +static SDValue performBuildShuffleExtendCombine(SDValue BV, SelectionDAG &DAG) { + EVT VT = BV.getValueType(); + if (BV.getOpcode() != ISD::BUILD_VECTOR && + BV.getOpcode() != ISD::VECTOR_SHUFFLE) return SDValue(); - SDValue Extend = InsertVectorElt.getOperand(1); + // Use the first item in the buildvector/shuffle to get the size of the + // extend, and make sure it looks valid. + SDValue Extend = BV->getOperand(0); unsigned ExtendOpcode = Extend.getOpcode(); - bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND || ExtendOpcode == ISD::SIGN_EXTEND_INREG || ExtendOpcode == ISD::AssertSext; if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND && ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND) return SDValue(); - - EVT TargetType = VectorShuffle.getValueType(); - EVT PreExtendType = calculatePreExtendType(Extend, DAG); - - if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 && - TargetType != MVT::v2i64) || - (PreExtendType == MVT::Other)) + // Shuffle inputs are vector, limit to SIGN_EXTEND and ZERO_EXTEND to ensure + // calculatePreExtendType will work without issue. + if (BV.getOpcode() == ISD::VECTOR_SHUFFLE && + ExtendOpcode != ISD::SIGN_EXTEND && ExtendOpcode != ISD::ZERO_EXTEND) return SDValue(); // Restrict valid pre-extend data type - if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 && - PreExtendType != MVT::i32) - return SDValue(); - - EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType); - - if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount()) + EVT PreExtendType = calculatePreExtendType(Extend); + if (PreExtendType == MVT::Other || + PreExtendType.getScalarSizeInBits() != VT.getScalarSizeInBits() / 2) return SDValue(); - if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2) - return SDValue(); - - SDLoc DL(VectorShuffle); - - SDValue InsertVectorNode = DAG.getNode( - InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT), - DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType), - DAG.getConstant(0, DL, MVT::i64)); - - std::vector ShuffleMask(TargetType.getVectorNumElements()); - - SDValue VectorShuffleNode = - DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode, - DAG.getUNDEF(PreExtendVT), ShuffleMask); - - SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, - DL, TargetType, VectorShuffleNode); + // Make sure all other operands are equally extended + for (SDValue Op : drop_begin(BV->ops())) { + if (Op.isUndef()) + continue; + unsigned Opc = Op.getOpcode(); + bool OpcIsSExt = Opc == ISD::SIGN_EXTEND || Opc == ISD::SIGN_EXTEND_INREG || + Opc == ISD::AssertSext; + if (OpcIsSExt != IsSExt || calculatePreExtendType(Op) != PreExtendType) + return SDValue(); + } - return ExtendNode; + SDValue NBV; + SDLoc DL(BV); + if (BV.getOpcode() == ISD::BUILD_VECTOR) { + EVT PreExtendVT = VT.changeVectorElementType(PreExtendType); + EVT PreExtendLegalType = + PreExtendType.getScalarSizeInBits() < 32 ? MVT::i32 : PreExtendType; + SmallVector NewOps; + for (SDValue Op : BV->ops()) + NewOps.push_back(Op.isUndef() ? DAG.getUNDEF(PreExtendLegalType) + : DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, + PreExtendLegalType)); + NBV = DAG.getNode(ISD::BUILD_VECTOR, DL, PreExtendVT, NewOps); + } else { // BV.getOpcode() == ISD::VECTOR_SHUFFLE + EVT PreExtendVT = VT.changeVectorElementType(PreExtendType.getScalarType()); + NBV = DAG.getVectorShuffle(PreExtendVT, DL, BV.getOperand(0).getOperand(0), + BV.getOperand(1).isUndef() + ? DAG.getUNDEF(PreExtendVT) + : BV.getOperand(1).getOperand(0), + cast(BV)->getMask()); + } + return DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, NBV); } /// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup)) /// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) { // If the value type isn't a vector, none of the operands are going to be dups - if (!Mul->getValueType(0).isVector()) + EVT VT = Mul->getValueType(0); + if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64) return SDValue(); - SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG); - SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG); + SDValue Op0 = performBuildShuffleExtendCombine(Mul->getOperand(0), DAG); + SDValue Op1 = performBuildShuffleExtendCombine(Mul->getOperand(1), DAG); // Neither operands have been changed, don't make any further changes if (!Op0 && !Op1) return SDValue(); SDLoc DL(Mul); - return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0), - Op0 ? Op0 : Mul->getOperand(0), + return DAG.getNode(Mul->getOpcode(), DL, VT, Op0 ? Op0 : Mul->getOperand(0), Op1 ? Op1 : Mul->getOperand(1)); } @@ -13649,7 +14286,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, !cast(N0)->isVolatile()) { LoadSDNode *LN0 = cast(N0); SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getAlignment(), + LN0->getPointerInfo(), LN0->getAlign(), LN0->getMemOperand()->getFlags()); // Make sure successors of the original load stay after it by updating them @@ -13676,8 +14313,10 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue Op = N->getOperand(0); - if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() || - Op.getOpcode() != ISD::FMUL) + if (!Op.getValueType().isSimple() || Op.getOpcode() != ISD::FMUL) + return SDValue(); + + if (!Op.getValueType().is64BitVector() && !Op.getValueType().is128BitVector()) return SDValue(); SDValue ConstVec = Op->getOperand(1); @@ -13713,7 +14352,7 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, if (N->getOpcode() == ISD::FP_TO_SINT_SAT || N->getOpcode() == ISD::FP_TO_UINT_SAT) { EVT SatVT = cast(N->getOperand(1))->getVT(); - if (SatVT.getScalarSizeInBits() != IntBits) + if (SatVT.getScalarSizeInBits() != IntBits || IntBits != FloatBits) return SDValue(); } @@ -13956,15 +14595,85 @@ static SDValue tryCombineToBSL(SDNode *N, return SDValue(); } +// Given a tree of and/or(csel(0, 1, cc0), csel(0, 1, cc1)), we may be able to +// convert to csel(ccmp(.., cc0)), depending on cc1: + +// (AND (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1))) +// => +// (CSET cc1 (CCMP x1 y1 !cc1 cc0 cmp0)) +// +// (OR (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1))) +// => +// (CSET cc1 (CCMP x1 y1 cc1 !cc0 cmp0)) +static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + SDValue CSel0 = N->getOperand(0); + SDValue CSel1 = N->getOperand(1); + + if (CSel0.getOpcode() != AArch64ISD::CSEL || + CSel1.getOpcode() != AArch64ISD::CSEL) + return SDValue(); + + if (!CSel0->hasOneUse() || !CSel1->hasOneUse()) + return SDValue(); + + if (!isNullConstant(CSel0.getOperand(0)) || + !isOneConstant(CSel0.getOperand(1)) || + !isNullConstant(CSel1.getOperand(0)) || + !isOneConstant(CSel1.getOperand(1))) + return SDValue(); + + SDValue Cmp0 = CSel0.getOperand(3); + SDValue Cmp1 = CSel1.getOperand(3); + AArch64CC::CondCode CC0 = (AArch64CC::CondCode)CSel0.getConstantOperandVal(2); + AArch64CC::CondCode CC1 = (AArch64CC::CondCode)CSel1.getConstantOperandVal(2); + if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse()) + return SDValue(); + if (Cmp1.getOpcode() != AArch64ISD::SUBS && + Cmp0.getOpcode() == AArch64ISD::SUBS) { + std::swap(Cmp0, Cmp1); + std::swap(CC0, CC1); + } + + if (Cmp1.getOpcode() != AArch64ISD::SUBS) + return SDValue(); + + SDLoc DL(N); + SDValue CCmp; + + if (N->getOpcode() == ISD::AND) { + AArch64CC::CondCode InvCC0 = AArch64CC::getInvertedCondCode(CC0); + SDValue Condition = DAG.getConstant(InvCC0, DL, MVT_CC); + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CC1); + SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32); + CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0), + Cmp1.getOperand(1), NZCVOp, Condition, Cmp0); + } else { + SDLoc DL(N); + AArch64CC::CondCode InvCC1 = AArch64CC::getInvertedCondCode(CC1); + SDValue Condition = DAG.getConstant(CC0, DL, MVT_CC); + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvCC1); + SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32); + CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0), + Cmp1.getOperand(1), NZCVOp, Condition, Cmp0); + } + return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0), + CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32), + CCmp); +} + static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { - // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); + if (SDValue R = performANDORCSELCombine(N, DAG)) + return R; + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); + // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) if (SDValue Res = tryCombineToEXTR(N, DCI)) return Res; @@ -14015,7 +14724,7 @@ static SDValue performSVEAndCombine(SDNode *N, SDValue UnpkOp = Src->getOperand(0); SDValue Dup = N->getOperand(1); - if (Dup.getOpcode() != AArch64ISD::DUP) + if (Dup.getOpcode() != ISD::SPLAT_VECTOR) return SDValue(); SDLoc DL(N); @@ -14038,8 +14747,7 @@ static SDValue performSVEAndCombine(SDNode *N, // Otherwise, make sure we propagate the AND to the operand // of the unpack - Dup = DAG.getNode(AArch64ISD::DUP, DL, - UnpkOp->getValueType(0), + Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0), DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32)); SDValue And = DAG.getNode(ISD::AND, DL, @@ -14097,20 +14805,34 @@ static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); EVT VT = N->getValueType(0); - if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT)) + + if (SDValue R = performANDORCSELCombine(N, DAG)) + return R; + + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); + // Although NEON has no EORV instruction, when only the least significant bit + // is required the operation is synonymous with ADDV. + if (LHS.getOpcode() == ISD::VECREDUCE_XOR && isOneConstant(RHS) && + LHS.getOperand(0).getValueType().isFixedLengthVector() && + LHS.hasOneUse()) { + SDLoc DL(N); + SDValue ADDV = DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, LHS.getOperand(0)); + return DAG.getNode(ISD::AND, DL, VT, ADDV, RHS); + } + if (VT.isScalableVector()) return performSVEAndCombine(N, DCI); // The combining code below works only for NEON vectors. In particular, it // does not work for SVE when dealing with vectors wider than 128 bits. - if (!(VT.is64BitVector() || VT.is128BitVector())) + if (!VT.is64BitVector() && !VT.is128BitVector()) return SDValue(); - BuildVectorSDNode *BVN = - dyn_cast(N->getOperand(1).getNode()); + BuildVectorSDNode *BVN = dyn_cast(RHS.getNode()); if (!BVN) return SDValue(); @@ -14141,107 +14863,125 @@ static SDValue performANDCombine(SDNode *N, return SDValue(); } -// Attempt to form urhadd(OpA, OpB) from -// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1)) -// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)). -// The original form of the first expression is -// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the -// (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)). -// Before this function is called the srl will have been lowered to -// AArch64ISD::VLSHR. -// This pass can also recognize signed variants of the patterns that use sign -// extension instead of zero extension and form a srhadd(OpA, OpB) or a -// shadd(OpA, OpB) from them. -static SDValue -performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - EVT VT = N->getValueType(0); +static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) { + switch (Opcode) { + case ISD::STRICT_FADD: + case ISD::FADD: + return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64; + case ISD::ADD: + return VT == MVT::i64; + default: + return false; + } +} - // Since we are looking for a right shift by a constant value of 1 and we are - // operating on types at least 16 bits in length (sign/zero extended OpA and - // OpB, which are at least 8 bits), it follows that the truncate will always - // discard the shifted-in bit and therefore the right shift will be logical - // regardless of the signedness of OpA and OpB. - SDValue Shift = N->getOperand(0); - if (Shift.getOpcode() != AArch64ISD::VLSHR) +static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op, + AArch64CC::CondCode Cond); + +static bool isPredicateCCSettingOp(SDValue N) { + if ((N.getOpcode() == ISD::SETCC) || + (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN && + (N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege || + N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt || + N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi || + N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs || + N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele || + N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo || + N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels || + N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt || + // get_active_lane_mask is lowered to a whilelo instruction. + N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask))) + return true; + + return false; +} + +// Materialize : i1 = extract_vector_elt t37, Constant:i64<0> +// ... into: "ptrue p, all" + PTEST +static SDValue +performFirstTrueTestVectorCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT); + // Make sure PTEST can be legalised with illegal types. + if (!Subtarget->hasSVE() || DCI.isBeforeLegalize()) return SDValue(); - // Is the right shift using an immediate value of 1? - uint64_t ShiftAmount = Shift.getConstantOperandVal(1); - if (ShiftAmount != 1) + SDValue N0 = N->getOperand(0); + EVT VT = N0.getValueType(); + + if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1 || + !isNullConstant(N->getOperand(1))) return SDValue(); - SDValue ExtendOpA, ExtendOpB; - SDValue ShiftOp0 = Shift.getOperand(0); - unsigned ShiftOp0Opc = ShiftOp0.getOpcode(); - if (ShiftOp0Opc == ISD::SUB) { + // Restricted the DAG combine to only cases where we're extracting from a + // flag-setting operation. + if (!isPredicateCCSettingOp(N0)) + return SDValue(); - SDValue Xor = ShiftOp0.getOperand(1); - if (Xor.getOpcode() != ISD::XOR) - return SDValue(); + // Extracts of lane 0 for SVE can be expressed as PTEST(Op, FIRST) ? 1 : 0 + SelectionDAG &DAG = DCI.DAG; + SDValue Pg = getPTrue(DAG, SDLoc(N), VT, AArch64SVEPredPattern::all); + return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::FIRST_ACTIVE); +} - // Is the XOR using a constant amount of all ones in the right hand side? - uint64_t C; - if (!isAllConstantBuildVector(Xor.getOperand(1), C)) - return SDValue(); +// Materialize : Idx = (add (mul vscale, NumEls), -1) +// i1 = extract_vector_elt t37, Constant:i64 +// ... into: "ptrue p, all" + PTEST +static SDValue +performLastTrueTestVectorCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT); + // Make sure PTEST is legal types. + if (!Subtarget->hasSVE() || DCI.isBeforeLegalize()) + return SDValue(); - unsigned ElemSizeInBits = VT.getScalarSizeInBits(); - APInt CAsAPInt(ElemSizeInBits, C); - if (CAsAPInt != APInt::getAllOnes(ElemSizeInBits)) - return SDValue(); + SDValue N0 = N->getOperand(0); + EVT OpVT = N0.getValueType(); - ExtendOpA = Xor.getOperand(0); - ExtendOpB = ShiftOp0.getOperand(0); - } else if (ShiftOp0Opc == ISD::ADD) { - ExtendOpA = ShiftOp0.getOperand(0); - ExtendOpB = ShiftOp0.getOperand(1); - } else + if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1) return SDValue(); - unsigned ExtendOpAOpc = ExtendOpA.getOpcode(); - unsigned ExtendOpBOpc = ExtendOpB.getOpcode(); - if (!(ExtendOpAOpc == ExtendOpBOpc && - (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND))) + // Idx == (add (mul vscale, NumEls), -1) + SDValue Idx = N->getOperand(1); + if (Idx.getOpcode() != ISD::ADD || !isAllOnesConstant(Idx.getOperand(1))) return SDValue(); - // Is the result of the right shift being truncated to the same value type as - // the original operands, OpA and OpB? - SDValue OpA = ExtendOpA.getOperand(0); - SDValue OpB = ExtendOpB.getOperand(0); - EVT OpAVT = OpA.getValueType(); - assert(ExtendOpA.getValueType() == ExtendOpB.getValueType()); - if (!(VT == OpAVT && OpAVT == OpB.getValueType())) + SDValue VS = Idx.getOperand(0); + if (VS.getOpcode() != ISD::VSCALE) return SDValue(); - SDLoc DL(N); - bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND; - bool IsRHADD = ShiftOp0Opc == ISD::SUB; - unsigned HADDOpc = IsSignExtend - ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD) - : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD); - SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB); + unsigned NumEls = OpVT.getVectorElementCount().getKnownMinValue(); + if (VS.getConstantOperandVal(0) != NumEls) + return SDValue(); - return ResultHADD; + // Extracts of lane EC-1 for SVE can be expressed as PTEST(Op, LAST) ? 1 : 0 + SelectionDAG &DAG = DCI.DAG; + SDValue Pg = getPTrue(DAG, SDLoc(N), OpVT, AArch64SVEPredPattern::all); + return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::LAST_ACTIVE); } -static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) { - switch (Opcode) { - case ISD::FADD: - return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64; - case ISD::ADD: - return VT == MVT::i64; - default: - return false; - } -} +static SDValue +performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT); + if (SDValue Res = performFirstTrueTestVectorCombine(N, DCI, Subtarget)) + return Res; + if (SDValue Res = performLastTrueTestVectorCombine(N, DCI, Subtarget)) + return Res; -static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) { + SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); ConstantSDNode *ConstantN1 = dyn_cast(N1); EVT VT = N->getValueType(0); - const bool FullFP16 = - static_cast(DAG.getSubtarget()).hasFullFP16(); + const bool FullFP16 = DAG.getSubtarget().hasFullFP16(); + bool IsStrict = N0->isStrictFPOpcode(); + + // extract(dup x) -> x + if (N0.getOpcode() == AArch64ISD::DUP) + return DAG.getZExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); // Rewrite for pairwise fadd pattern // (f32 (extract_vector_elt @@ -14250,11 +14990,14 @@ static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) { // -> // (f32 (fadd (extract_vector_elt (vXf32 Other) 0) // (extract_vector_elt (vXf32 Other) 1)) + // For strict_fadd we need to make sure the old strict_fadd can be deleted, so + // we can only do this when it's used only by the extract_vector_elt. if (ConstantN1 && ConstantN1->getZExtValue() == 0 && - hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) { + hasPairwiseAdd(N0->getOpcode(), VT, FullFP16) && + (!IsStrict || N0.hasOneUse())) { SDLoc DL(N0); - SDValue N00 = N0->getOperand(0); - SDValue N01 = N0->getOperand(1); + SDValue N00 = N0->getOperand(IsStrict ? 1 : 0); + SDValue N01 = N0->getOperand(IsStrict ? 2 : 1); ShuffleVectorSDNode *Shuffle = dyn_cast(N01); SDValue Other = N00; @@ -14267,11 +15010,23 @@ static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) { if (Shuffle && Shuffle->getMaskElt(0) == 1 && Other == Shuffle->getOperand(0)) { - return DAG.getNode(N0->getOpcode(), DL, VT, - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other, - DAG.getConstant(0, DL, MVT::i64)), - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other, - DAG.getConstant(1, DL, MVT::i64))); + SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other, + DAG.getConstant(0, DL, MVT::i64)); + SDValue Extract2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other, + DAG.getConstant(1, DL, MVT::i64)); + if (!IsStrict) + return DAG.getNode(N0->getOpcode(), DL, VT, Extract1, Extract2); + + // For strict_fadd we need uses of the final extract_vector to be replaced + // with the strict_fadd, but we also need uses of the chain output of the + // original strict_fadd to use the chain output of the new strict_fadd as + // otherwise it may not be deleted. + SDValue Ret = DAG.getNode(N0->getOpcode(), DL, + {VT, MVT::Other}, + {N0->getOperand(0), Extract1, Extract2}); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Ret); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Ret.getValue(1)); + return SDValue(N, 0); } } @@ -14321,25 +15076,61 @@ static SDValue performConcatVectorsCombine(SDNode *N, } } + if (N->getOperand(0).getValueType() == MVT::v4i8) { + // If we have a concat of v4i8 loads, convert them to a buildvector of f32 + // loads to prevent having to go through the v4i8 load legalization that + // needs to extend each element into a larger type. + if (N->getNumOperands() % 2 == 0 && all_of(N->op_values(), [](SDValue V) { + if (V.getValueType() != MVT::v4i8) + return false; + if (V.isUndef()) + return true; + LoadSDNode *LD = dyn_cast(V); + return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() && + LD->getExtensionType() == ISD::NON_EXTLOAD; + })) { + EVT NVT = + EVT::getVectorVT(*DAG.getContext(), MVT::f32, N->getNumOperands()); + SmallVector Ops; + + for (unsigned i = 0; i < N->getNumOperands(); i++) { + SDValue V = N->getOperand(i); + if (V.isUndef()) + Ops.push_back(DAG.getUNDEF(MVT::f32)); + else { + LoadSDNode *LD = cast(V); + SDValue NewLoad = + DAG.getLoad(MVT::f32, dl, LD->getChain(), LD->getBasePtr(), + LD->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1)); + Ops.push_back(NewLoad); + } + } + return DAG.getBitcast(N->getValueType(0), + DAG.getBuildVector(NVT, dl, Ops)); + } + } + + // Wait 'til after everything is legalized to try this. That way we have // legal vector types and such. if (DCI.isBeforeLegalizeOps()) return SDValue(); - // Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted - // subvectors from the same original vectors. Combine these into a single - // [us]rhadd or [us]hadd that operates on the two original vectors. Example: - // (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>), - // extract_subvector (v16i8 OpB, - // <0>))), - // (v8i8 (urhadd (extract_subvector (v16i8 OpA, <8>), - // extract_subvector (v16i8 OpB, - // <8>))))) + // Optimise concat_vectors of two [us]avgceils or [us]avgfloors that use + // extracted subvectors from the same original vectors. Combine these into a + // single avg that operates on the two original vectors. + // avgceil is the target independant name for rhadd, avgfloor is a hadd. + // Example: + // (concat_vectors (v8i8 (avgceils (extract_subvector (v16i8 OpA, <0>), + // extract_subvector (v16i8 OpB, <0>))), + // (v8i8 (avgceils (extract_subvector (v16i8 OpA, <8>), + // extract_subvector (v16i8 OpB, <8>))))) // -> - // (v16i8(urhadd(v16i8 OpA, v16i8 OpB))) + // (v16i8(avgceils(v16i8 OpA, v16i8 OpB))) if (N->getNumOperands() == 2 && N0Opc == N1Opc && - (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD || - N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) { + (N0Opc == ISD::AVGCEILU || N0Opc == ISD::AVGCEILS || + N0Opc == ISD::AVGFLOORU || N0Opc == ISD::AVGFLOORS)) { SDValue N00 = N0->getOperand(0); SDValue N01 = N0->getOperand(1); SDValue N10 = N1->getOperand(0); @@ -14410,6 +15201,29 @@ static SDValue performConcatVectorsCombine(SDNode *N, RHS)); } +static SDValue +performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + EVT VT = N->getValueType(0); + if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1) + return SDValue(); + + SDValue V = N->getOperand(0); + + // NOTE: This combine exists in DAGCombiner, but that version's legality check + // blocks this combine because the non-const case requires custom lowering. + // + // ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const) + if (V.getOpcode() == ISD::SPLAT_VECTOR) + if (isa(V.getOperand(0))) + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V.getOperand(0)); + + return SDValue(); +} + static SDValue performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { @@ -14470,33 +15284,34 @@ static SDValue tryCombineFixedPointConvert(SDNode *N, // Check the operand and see if it originates from a lane extract. SDValue Op1 = N->getOperand(1); - if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { - // Yep, no additional predication needed. Perform the transform. - SDValue IID = N->getOperand(0); - SDValue Shift = N->getOperand(2); - SDValue Vec = Op1.getOperand(0); - SDValue Lane = Op1.getOperand(1); - EVT ResTy = N->getValueType(0); - EVT VecResTy; - SDLoc DL(N); + if (Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); - // The vector width should be 128 bits by the time we get here, even - // if it started as 64 bits (the extract_vector handling will have - // done so). - assert(Vec.getValueSizeInBits() == 128 && - "unexpected vector size on extract_vector_elt!"); - if (Vec.getValueType() == MVT::v4i32) - VecResTy = MVT::v4f32; - else if (Vec.getValueType() == MVT::v2i64) - VecResTy = MVT::v2f64; - else - llvm_unreachable("unexpected vector type!"); + // Yep, no additional predication needed. Perform the transform. + SDValue IID = N->getOperand(0); + SDValue Shift = N->getOperand(2); + SDValue Vec = Op1.getOperand(0); + SDValue Lane = Op1.getOperand(1); + EVT ResTy = N->getValueType(0); + EVT VecResTy; + SDLoc DL(N); - SDValue Convert = - DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane); - } - return SDValue(); + // The vector width should be 128 bits by the time we get here, even + // if it started as 64 bits (the extract_vector handling will have + // done so). Bail if it is not. + if (Vec.getValueSizeInBits() != 128) + return SDValue(); + + if (Vec.getValueType() == MVT::v4i32) + VecResTy = MVT::v4f32; + else if (Vec.getValueType() == MVT::v2i64) + VecResTy = MVT::v2f64; + else + return SDValue(); + + SDValue Convert = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane); } // AArch64 high-vector "long" operations are formed by performing the non-high @@ -14515,6 +15330,11 @@ static SDValue tryCombineFixedPointConvert(SDNode *N, // It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold // similarly here. static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) { + MVT VT = N.getSimpleValueType(); + if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N.getConstantOperandVal(1) == 0) + N = N.getOperand(0); + switch (N.getOpcode()) { case AArch64ISD::DUP: case AArch64ISD::DUPLANE8: @@ -14535,18 +15355,19 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) { return SDValue(); } - MVT NarrowTy = N.getSimpleValueType(); - if (!NarrowTy.is64BitVector()) + if (!VT.is64BitVector()) return SDValue(); - MVT ElementTy = NarrowTy.getVectorElementType(); - unsigned NumElems = NarrowTy.getVectorNumElements(); - MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2); + SDLoc DL(N); + unsigned NumElems = VT.getVectorNumElements(); + if (N.getValueType().is64BitVector()) { + MVT ElementTy = VT.getVectorElementType(); + MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2); + N = DAG.getNode(N->getOpcode(), DL, NewVT, N->ops()); + } - SDLoc dl(N); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy, - DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()), - DAG.getConstant(NumElems, dl, MVT::i64)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, N, + DAG.getConstant(NumElems, DL, MVT::i64)); } static bool isEssentiallyExtractHighSubvector(SDValue N) { @@ -14696,7 +15517,7 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) { } // ADD(UADDV a, UADDV b) --> UADDV(ADD a, b) -static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue performAddUADDVCombine(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); // Only scalar integer and vector types. if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger()) @@ -14708,28 +15529,103 @@ static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) { RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT) return SDValue(); - auto *LHSN1 = dyn_cast(LHS->getOperand(1)); - auto *RHSN1 = dyn_cast(RHS->getOperand(1)); - if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero()) - return SDValue(); + auto *LHSN1 = dyn_cast(LHS->getOperand(1)); + auto *RHSN1 = dyn_cast(RHS->getOperand(1)); + if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero()) + return SDValue(); + + SDValue Op1 = LHS->getOperand(0); + SDValue Op2 = RHS->getOperand(0); + EVT OpVT1 = Op1.getValueType(); + EVT OpVT2 = Op2.getValueType(); + if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 || + Op2.getOpcode() != AArch64ISD::UADDV || + OpVT1.getVectorElementType() != VT) + return SDValue(); + + SDValue Val1 = Op1.getOperand(0); + SDValue Val2 = Op2.getOperand(0); + EVT ValVT = Val1->getValueType(0); + SDLoc DL(N); + SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, + DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal), + DAG.getConstant(0, DL, MVT::i64)); +} + +/// Perform the scalar expression combine in the form of: +/// CSEL(c, 1, cc) + b => CSINC(b+c, b, cc) +/// CSNEG(c, -1, cc) + b => CSINC(b+c, b, cc) +static SDValue performAddCSelIntoCSinc(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + if (!VT.isScalarInteger() || N->getOpcode() != ISD::ADD) + return SDValue(); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // Handle commutivity. + if (LHS.getOpcode() != AArch64ISD::CSEL && + LHS.getOpcode() != AArch64ISD::CSNEG) { + std::swap(LHS, RHS); + if (LHS.getOpcode() != AArch64ISD::CSEL && + LHS.getOpcode() != AArch64ISD::CSNEG) { + return SDValue(); + } + } + + if (!LHS.hasOneUse()) + return SDValue(); + + AArch64CC::CondCode AArch64CC = + static_cast(LHS.getConstantOperandVal(2)); + + // The CSEL should include a const one operand, and the CSNEG should include + // One or NegOne operand. + ConstantSDNode *CTVal = dyn_cast(LHS.getOperand(0)); + ConstantSDNode *CFVal = dyn_cast(LHS.getOperand(1)); + if (!CTVal || !CFVal) + return SDValue(); + + if (!(LHS.getOpcode() == AArch64ISD::CSEL && + (CTVal->isOne() || CFVal->isOne())) && + !(LHS.getOpcode() == AArch64ISD::CSNEG && + (CTVal->isOne() || CFVal->isAllOnes()))) + return SDValue(); + + // Switch CSEL(1, c, cc) to CSEL(c, 1, !cc) + if (LHS.getOpcode() == AArch64ISD::CSEL && CTVal->isOne() && + !CFVal->isOne()) { + std::swap(CTVal, CFVal); + AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC); + } + + SDLoc DL(N); + // Switch CSNEG(1, c, cc) to CSNEG(-c, -1, !cc) + if (LHS.getOpcode() == AArch64ISD::CSNEG && CTVal->isOne() && + !CFVal->isAllOnes()) { + APInt C = -1 * CFVal->getAPIntValue(); + CTVal = cast(DAG.getConstant(C, DL, VT)); + CFVal = cast(DAG.getAllOnesConstant(DL, VT)); + AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC); + } + + // It might be neutral for larger constants, as the immediate need to be + // materialized in a register. + APInt ADDC = CTVal->getAPIntValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) + return SDValue(); + + assert(((LHS.getOpcode() == AArch64ISD::CSEL && CFVal->isOne()) || + (LHS.getOpcode() == AArch64ISD::CSNEG && CFVal->isAllOnes())) && + "Unexpected constant value"); - SDValue Op1 = LHS->getOperand(0); - SDValue Op2 = RHS->getOperand(0); - EVT OpVT1 = Op1.getValueType(); - EVT OpVT2 = Op2.getValueType(); - if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 || - Op2.getOpcode() != AArch64ISD::UADDV || - OpVT1.getVectorElementType() != VT) - return SDValue(); + SDValue NewNode = DAG.getNode(ISD::ADD, DL, VT, RHS, SDValue(CTVal, 0)); + SDValue CCVal = DAG.getConstant(AArch64CC, DL, MVT::i32); + SDValue Cmp = LHS.getOperand(3); - SDValue Val1 = Op1.getOperand(0); - SDValue Val2 = Op2.getOperand(0); - EVT ValVT = Val1->getValueType(0); - SDLoc DL(N); - SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, - DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal), - DAG.getConstant(0, DL, MVT::i64)); + return DAG.getNode(AArch64ISD::CSINC, DL, VT, NewNode, RHS, CCVal, Cmp); } // ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y) @@ -14755,6 +15651,49 @@ static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) { Dot.getOperand(2)); } +static bool isNegatedInteger(SDValue Op) { + return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)); +} + +static SDValue getNegatedInteger(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Zero = DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SUB, DL, VT, Zero, Op); +} + +// Try to fold +// +// (neg (csel X, Y)) -> (csel (neg X), (neg Y)) +// +// The folding helps csel to be matched with csneg without generating +// redundant neg instruction, which includes negation of the csel expansion +// of abs node lowered by lowerABS. +static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG) { + if (!isNegatedInteger(SDValue(N, 0))) + return SDValue(); + + SDValue CSel = N->getOperand(1); + if (CSel.getOpcode() != AArch64ISD::CSEL || !CSel->hasOneUse()) + return SDValue(); + + SDValue N0 = CSel.getOperand(0); + SDValue N1 = CSel.getOperand(1); + + // If both of them is not negations, it's not worth the folding as it + // introduces two additional negations while reducing one negation. + if (!isNegatedInteger(N0) && !isNegatedInteger(N1)) + return SDValue(); + + SDValue N0N = getNegatedInteger(N0, DAG); + SDValue N1N = getNegatedInteger(N1, DAG); + + SDLoc DL(N); + EVT VT = CSel.getValueType(); + return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0N, N1N, CSel.getOperand(2), + CSel.getOperand(3)); +} + // The basic add/sub long vector instructions have variants with "2" on the end // which act on the high-half of their inputs. They are normally matched by // patterns like: @@ -14808,14 +15747,120 @@ static SDValue performAddSubLongCombine(SDNode *N, return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS); } +static bool isCMP(SDValue Op) { + return Op.getOpcode() == AArch64ISD::SUBS && + !Op.getNode()->hasAnyUseOfValue(0); +} + +// (CSEL 1 0 CC Cond) => CC +// (CSEL 0 1 CC Cond) => !CC +static Optional getCSETCondCode(SDValue Op) { + if (Op.getOpcode() != AArch64ISD::CSEL) + return None; + auto CC = static_cast(Op.getConstantOperandVal(2)); + if (CC == AArch64CC::AL || CC == AArch64CC::NV) + return None; + SDValue OpLHS = Op.getOperand(0); + SDValue OpRHS = Op.getOperand(1); + if (isOneConstant(OpLHS) && isNullConstant(OpRHS)) + return CC; + if (isNullConstant(OpLHS) && isOneConstant(OpRHS)) + return getInvertedCondCode(CC); + + return None; +} + +// (ADC{S} l r (CMP (CSET HS carry) 1)) => (ADC{S} l r carry) +// (SBC{S} l r (CMP 0 (CSET LO carry))) => (SBC{S} l r carry) +static SDValue foldOverflowCheck(SDNode *Op, SelectionDAG &DAG, bool IsAdd) { + SDValue CmpOp = Op->getOperand(2); + if (!isCMP(CmpOp)) + return SDValue(); + + if (IsAdd) { + if (!isOneConstant(CmpOp.getOperand(1))) + return SDValue(); + } else { + if (!isNullConstant(CmpOp.getOperand(0))) + return SDValue(); + } + + SDValue CsetOp = CmpOp->getOperand(IsAdd ? 0 : 1); + auto CC = getCSETCondCode(CsetOp); + if (CC != (IsAdd ? AArch64CC::HS : AArch64CC::LO)) + return SDValue(); + + return DAG.getNode(Op->getOpcode(), SDLoc(Op), Op->getVTList(), + Op->getOperand(0), Op->getOperand(1), + CsetOp.getOperand(3)); +} + +// (ADC x 0 cond) => (CINC x HS cond) +static SDValue foldADCToCINC(SDNode *N, SelectionDAG &DAG) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Cond = N->getOperand(2); + + if (!isNullConstant(RHS)) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // (CINC x cc cond) <=> (CSINC x x !cc cond) + SDValue CC = DAG.getConstant(AArch64CC::LO, DL, MVT::i32); + return DAG.getNode(AArch64ISD::CSINC, DL, VT, LHS, LHS, CC, Cond); +} + +// Transform vector add(zext i8 to i32, zext i8 to i32) +// into sext(add(zext(i8 to i16), zext(i8 to i16)) to i32) +// This allows extra uses of saddl/uaddl at the lower vector widths, and less +// extends. +static SDValue performVectorAddSubExtCombine(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + if (!VT.isFixedLengthVector() || VT.getSizeInBits() <= 128 || + (N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && + N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND) || + (N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && + N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND) || + N->getOperand(0).getOperand(0).getValueType() != + N->getOperand(1).getOperand(0).getValueType()) + return SDValue(); + + SDValue N0 = N->getOperand(0).getOperand(0); + SDValue N1 = N->getOperand(1).getOperand(0); + EVT InVT = N0.getValueType(); + + EVT S1 = InVT.getScalarType(); + EVT S2 = VT.getScalarType(); + if ((S2 == MVT::i32 && S1 == MVT::i8) || + (S2 == MVT::i64 && (S1 == MVT::i8 || S1 == MVT::i16))) { + SDLoc DL(N); + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), + S2.getHalfSizedIntegerVT(*DAG.getContext()), + VT.getVectorElementCount()); + SDValue NewN0 = DAG.getNode(N->getOperand(0).getOpcode(), DL, HalfVT, N0); + SDValue NewN1 = DAG.getNode(N->getOperand(1).getOpcode(), DL, HalfVT, N1); + SDValue NewOp = DAG.getNode(N->getOpcode(), DL, HalfVT, NewN0, NewN1); + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewOp); + } + return SDValue(); +} + static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { // Try to change sum of two reductions. - if (SDValue Val = performUADDVCombine(N, DAG)) + if (SDValue Val = performAddUADDVCombine(N, DAG)) return Val; if (SDValue Val = performAddDotCombine(N, DAG)) return Val; + if (SDValue Val = performAddCSelIntoCSinc(N, DAG)) + return Val; + if (SDValue Val = performNegCSelCombine(N, DAG)) + return Val; + if (SDValue Val = performVectorAddSubExtCombine(N, DAG)) + return Val; return performAddSubLongCombine(N, DCI, DAG); } @@ -15176,6 +16221,9 @@ static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) { return false; } + if (ISD::isConstantSplatVectorAllOnes(N.getNode())) + return true; + // "ptrue p., all" can be considered all active when is the same size // or smaller than the implicit element type represented by N. // NOTE: A larger element count implies a smaller element type. @@ -15186,8 +16234,7 @@ static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) { // If we're compiling for a specific vector-length, we can check if the // pattern's VL equals that of the scalable vector at runtime. if (N.getOpcode() == AArch64ISD::PTRUE) { - const auto &Subtarget = - static_cast(DAG.getSubtarget()); + const auto &Subtarget = DAG.getSubtarget(); unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits(); unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits(); if (MaxSVESize && MinSVESize == MaxSVESize) { @@ -15233,6 +16280,39 @@ static SDValue performIntrinsicCombine(SDNode *N, switch (IID) { default: break; + case Intrinsic::get_active_lane_mask: { + SDValue Res = SDValue(); + EVT VT = N->getValueType(0); + if (VT.isFixedLengthVector()) { + // We can use the SVE whilelo instruction to lower this intrinsic by + // creating the appropriate sequence of scalable vector operations and + // then extracting a fixed-width subvector from the scalable vector. + + SDLoc DL(N); + SDValue ID = + DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64); + + EVT WhileVT = EVT::getVectorVT( + *DAG.getContext(), MVT::i1, + ElementCount::getScalable(VT.getVectorNumElements())); + + // Get promoted scalable vector VT, i.e. promote nxv4i1 -> nxv4i32. + EVT PromVT = getPromotedVTForPredicate(WhileVT); + + // Get the fixed-width equivalent of PromVT for extraction. + EVT ExtVT = + EVT::getVectorVT(*DAG.getContext(), PromVT.getVectorElementType(), + VT.getVectorElementCount()); + + Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WhileVT, ID, + N->getOperand(1), N->getOperand(2)); + Res = DAG.getNode(ISD::SIGN_EXTEND, DL, PromVT, Res); + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, Res, + DAG.getConstant(0, DL, MVT::i64)); + Res = DAG.getNode(ISD::TRUNCATE, DL, VT, Res); + } + return Res; + } case Intrinsic::aarch64_neon_vcvtfxs2fp: case Intrinsic::aarch64_neon_vcvtfxu2fp: return tryCombineFixedPointConvert(N, DCI, DAG); @@ -15261,7 +16341,11 @@ static SDValue performIntrinsicCombine(SDNode *N, return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); case Intrinsic::aarch64_neon_smull: + return DAG.getNode(AArch64ISD::SMULL, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2)); case Intrinsic::aarch64_neon_umull: + return DAG.getNode(AArch64ISD::UMULL, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2)); case Intrinsic::aarch64_neon_pmull: case Intrinsic::aarch64_neon_sqdmull: return tryCombineLongOpWithDup(IID, N, DCI, DAG); @@ -15350,6 +16434,10 @@ static SDValue performIntrinsicCombine(SDNode *N, return convertMergedOpToPredOp(N, ISD::XOR, DAG, true); case Intrinsic::aarch64_sve_orr: return convertMergedOpToPredOp(N, ISD::OR, DAG, true); + case Intrinsic::aarch64_sve_sabd: + return convertMergedOpToPredOp(N, ISD::ABDS, DAG, true); + case Intrinsic::aarch64_sve_uabd: + return convertMergedOpToPredOp(N, ISD::ABDU, DAG, true); case Intrinsic::aarch64_sve_sqadd: return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true); case Intrinsic::aarch64_sve_sqsub: @@ -15538,7 +16626,7 @@ static SDValue performExtendCombine(SDNode *N, static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, SDValue SplatVal, unsigned NumVecElts) { assert(!St.isTruncatingStore() && "cannot split truncating vector store"); - unsigned OrigAlignment = St.getAlignment(); + Align OrigAlignment = St.getAlign(); unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8; // Create scalar stores. This is at least as good as the code sequence for a @@ -15563,7 +16651,7 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, unsigned Offset = EltOffset; while (--NumVecElts) { - unsigned Alignment = MinAlign(OrigAlignment, Offset); + Align Alignment = commonAlignment(OrigAlignment, Offset); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, DAG.getConstant(BaseOffset + Offset, DL, MVT::i64)); @@ -15636,10 +16724,6 @@ static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); EVT PtrTy = N->getOperand(3).getValueType(); - if (VT == MVT::nxv8bf16 && - !static_cast(DAG.getSubtarget()).hasBF16()) - return SDValue(); - EVT LoadVT = VT; if (VT.isFloatingPoint()) LoadVT = VT.changeTypeToInteger(); @@ -15667,9 +16751,6 @@ static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) { "Unsupported opcode."); SDLoc DL(N); EVT VT = N->getValueType(0); - if (VT == MVT::nxv8bf16 && - !static_cast(DAG.getSubtarget()).hasBF16()) - return SDValue(); EVT LoadVT = VT; if (VT.isFloatingPoint()) @@ -15692,10 +16773,6 @@ static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG) { EVT HwSrcVt = getSVEContainerType(DataVT); SDValue InputVT = DAG.getValueType(DataVT); - if (DataVT == MVT::nxv8bf16 && - !static_cast(DAG.getSubtarget()).hasBF16()) - return SDValue(); - if (DataVT.isFloatingPoint()) InputVT = DAG.getValueType(HwSrcVt); @@ -15722,10 +16799,6 @@ static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) { EVT DataVT = Data.getValueType(); EVT PtrTy = N->getOperand(4).getValueType(); - if (DataVT == MVT::nxv8bf16 && - !static_cast(DAG.getSubtarget()).hasBF16()) - return SDValue(); - if (DataVT.isFloatingPoint()) Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data); @@ -15912,8 +16985,8 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, // extensions can use this to mark that it does not want splitting to happen // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of // eliminating alignment hazards is only 1 in 8 for alignment of 2. - if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 || - S->getAlignment() <= 2) + if (VT.getSizeInBits() != 128 || S->getAlign() >= Align(16) || + S->getAlign() <= Align(2)) return SDValue(); // If we get a splat of a scalar convert this vector store to a store of @@ -15934,11 +17007,11 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SDValue BasePtr = S->getBasePtr(); SDValue NewST1 = DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(), - S->getAlignment(), S->getMemOperand()->getFlags()); + S->getAlign(), S->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, DAG.getConstant(8, DL, MVT::i64)); return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr, - S->getPointerInfo(), S->getAlignment(), + S->getPointerInfo(), S->getAlign(), S->getMemOperand()->getFlags()); } @@ -15970,6 +17043,33 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) { SDValue Op1 = N->getOperand(1); EVT ResVT = N->getValueType(0); + // uzp1(x, undef) -> concat(truncate(x), undef) + if (Op1.getOpcode() == ISD::UNDEF) { + EVT BCVT = MVT::Other, HalfVT = MVT::Other; + switch (ResVT.getSimpleVT().SimpleTy) { + default: + break; + case MVT::v16i8: + BCVT = MVT::v8i16; + HalfVT = MVT::v8i8; + break; + case MVT::v8i16: + BCVT = MVT::v4i32; + HalfVT = MVT::v4i16; + break; + case MVT::v4i32: + BCVT = MVT::v2i64; + HalfVT = MVT::v2i32; + break; + } + if (BCVT != MVT::Other) { + SDValue BC = DAG.getBitcast(BCVT, Op0); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, BC); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Trunc, + DAG.getUNDEF(HalfVT)); + } + } + // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z) if (Op0.getOpcode() == AArch64ISD::UUNPKLO) { if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) { @@ -16267,6 +17367,152 @@ static SDValue performSTORECombine(SDNode *N, return SDValue(); } +/// \return true if part of the index was folded into the Base. +static bool foldIndexIntoBase(SDValue &BasePtr, SDValue &Index, SDValue Scale, + SDLoc DL, SelectionDAG &DAG) { + // This function assumes a vector of i64 indices. + EVT IndexVT = Index.getValueType(); + if (!IndexVT.isVector() || IndexVT.getVectorElementType() != MVT::i64) + return false; + + // Simplify: + // BasePtr = Ptr + // Index = X + splat(Offset) + // -> + // BasePtr = Ptr + Offset * scale. + // Index = X + if (Index.getOpcode() == ISD::ADD) { + if (auto Offset = DAG.getSplatValue(Index.getOperand(1))) { + Offset = DAG.getNode(ISD::MUL, DL, MVT::i64, Offset, Scale); + BasePtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, Offset); + Index = Index.getOperand(0); + return true; + } + } + + // Simplify: + // BasePtr = Ptr + // Index = (X + splat(Offset)) << splat(Shift) + // -> + // BasePtr = Ptr + (Offset << Shift) * scale) + // Index = X << splat(shift) + if (Index.getOpcode() == ISD::SHL && + Index.getOperand(0).getOpcode() == ISD::ADD) { + SDValue Add = Index.getOperand(0); + SDValue ShiftOp = Index.getOperand(1); + SDValue OffsetOp = Add.getOperand(1); + if (auto Shift = DAG.getSplatValue(ShiftOp)) + if (auto Offset = DAG.getSplatValue(OffsetOp)) { + Offset = DAG.getNode(ISD::SHL, DL, MVT::i64, Offset, Shift); + Offset = DAG.getNode(ISD::MUL, DL, MVT::i64, Offset, Scale); + BasePtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, Offset); + Index = DAG.getNode(ISD::SHL, DL, Index.getValueType(), + Add.getOperand(0), ShiftOp); + return true; + } + } + + return false; +} + +// Analyse the specified address returning true if a more optimal addressing +// mode is available. When returning true all parameters are updated to reflect +// their recommended values. +static bool findMoreOptimalIndexType(const MaskedGatherScatterSDNode *N, + SDValue &BasePtr, SDValue &Index, + SelectionDAG &DAG) { + // Try to iteratively fold parts of the index into the base pointer to + // simplify the index as much as possible. + bool Changed = false; + while (foldIndexIntoBase(BasePtr, Index, N->getScale(), SDLoc(N), DAG)) + Changed = true; + + // Only consider element types that are pointer sized as smaller types can + // be easily promoted. + EVT IndexVT = Index.getValueType(); + if (IndexVT.getVectorElementType() != MVT::i64 || IndexVT == MVT::nxv2i64) + return Changed; + + // Match: + // Index = step(const) + int64_t Stride = 0; + if (Index.getOpcode() == ISD::STEP_VECTOR) + Stride = cast(Index.getOperand(0))->getSExtValue(); + + // Match: + // Index = step(const) << shift(const) + else if (Index.getOpcode() == ISD::SHL && + Index.getOperand(0).getOpcode() == ISD::STEP_VECTOR) { + SDValue RHS = Index.getOperand(1); + if (auto *Shift = + dyn_cast_or_null(DAG.getSplatValue(RHS))) { + int64_t Step = (int64_t)Index.getOperand(0).getConstantOperandVal(1); + Stride = Step << Shift->getZExtValue(); + } + } + + // Return early because no supported pattern is found. + if (Stride == 0) + return Changed; + + if (Stride < std::numeric_limits::min() || + Stride > std::numeric_limits::max()) + return Changed; + + const auto &Subtarget = DAG.getSubtarget(); + unsigned MaxVScale = + Subtarget.getMaxSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock; + int64_t LastElementOffset = + IndexVT.getVectorMinNumElements() * Stride * MaxVScale; + + if (LastElementOffset < std::numeric_limits::min() || + LastElementOffset > std::numeric_limits::max()) + return Changed; + + EVT NewIndexVT = IndexVT.changeVectorElementType(MVT::i32); + // Stride does not scale explicitly by 'Scale', because it happens in + // the gather/scatter addressing mode. + Index = DAG.getNode(ISD::STEP_VECTOR, SDLoc(N), NewIndexVT, + DAG.getTargetConstant(Stride, SDLoc(N), MVT::i32)); + return true; +} + +static SDValue performMaskedGatherScatterCombine( + SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { + MaskedGatherScatterSDNode *MGS = cast(N); + assert(MGS && "Can only combine gather load or scatter store nodes"); + + if (!DCI.isBeforeLegalize()) + return SDValue(); + + SDLoc DL(MGS); + SDValue Chain = MGS->getChain(); + SDValue Scale = MGS->getScale(); + SDValue Index = MGS->getIndex(); + SDValue Mask = MGS->getMask(); + SDValue BasePtr = MGS->getBasePtr(); + ISD::MemIndexType IndexType = MGS->getIndexType(); + + if (!findMoreOptimalIndexType(MGS, BasePtr, Index, DAG)) + return SDValue(); + + // Here we catch such cases early and change MGATHER's IndexType to allow + // the use of an Index that's more legalisation friendly. + if (auto *MGT = dyn_cast(MGS)) { + SDValue PassThru = MGT->getPassThru(); + SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; + return DAG.getMaskedGather( + DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, + Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType()); + } + auto *MSC = cast(MGS); + SDValue Data = MSC->getValue(); + SDValue Ops[] = {Chain, Data, Mask, BasePtr, Index, Scale}; + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, + Ops, MSC->getMemOperand(), IndexType, + MSC->isTruncatingStore()); +} + /// Target-specific DAG combine function for NEON load/store intrinsics /// to merge base address updates. static SDValue performNEONPostLDSTCombine(SDNode *N, @@ -16723,6 +17969,47 @@ static SDValue performBRCONDCombine(SDNode *N, return SDValue(); } +static SDValue foldCSELofCTTZ(SDNode *N, SelectionDAG &DAG) { + unsigned CC = N->getConstantOperandVal(2); + SDValue SUBS = N->getOperand(3); + SDValue Zero, CTTZ; + + if (CC == AArch64CC::EQ && SUBS.getOpcode() == AArch64ISD::SUBS) { + Zero = N->getOperand(0); + CTTZ = N->getOperand(1); + } else if (CC == AArch64CC::NE && SUBS.getOpcode() == AArch64ISD::SUBS) { + Zero = N->getOperand(1); + CTTZ = N->getOperand(0); + } else + return SDValue(); + + if ((CTTZ.getOpcode() != ISD::CTTZ && CTTZ.getOpcode() != ISD::TRUNCATE) || + (CTTZ.getOpcode() == ISD::TRUNCATE && + CTTZ.getOperand(0).getOpcode() != ISD::CTTZ)) + return SDValue(); + + assert((CTTZ.getValueType() == MVT::i32 || CTTZ.getValueType() == MVT::i64) && + "Illegal type in CTTZ folding"); + + if (!isNullConstant(Zero) || !isNullConstant(SUBS.getOperand(1))) + return SDValue(); + + SDValue X = CTTZ.getOpcode() == ISD::TRUNCATE + ? CTTZ.getOperand(0).getOperand(0) + : CTTZ.getOperand(0); + + if (X != SUBS.getOperand(0)) + return SDValue(); + + unsigned BitWidth = CTTZ.getOpcode() == ISD::TRUNCATE + ? CTTZ.getOperand(0).getValueSizeInBits() + : CTTZ.getValueSizeInBits(); + SDValue BitWidthMinusOne = + DAG.getConstant(BitWidth - 1, SDLoc(N), CTTZ.getValueType()); + return DAG.getNode(ISD::AND, SDLoc(N), CTTZ.getValueType(), CTTZ, + BitWidthMinusOne); +} + // Optimize CSEL instructions static SDValue performCSELCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, @@ -16731,6 +18018,11 @@ static SDValue performCSELCombine(SDNode *N, if (N->getOperand(0) == N->getOperand(1)) return N->getOperand(0); + // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1 + // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1 + if (SDValue Folded = foldCSELofCTTZ(N, DAG)) + return Folded; + return performCONDCombine(N, DCI, DAG, 2, 3); } @@ -16739,14 +18031,14 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); ISD::CondCode Cond = cast(N->getOperand(2))->get(); + SDLoc DL(N); + EVT VT = N->getValueType(0); // setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X if (Cond == ISD::SETNE && isOneConstant(RHS) && LHS->getOpcode() == AArch64ISD::CSEL && isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) && LHS->hasOneUse()) { - SDLoc DL(N); - // Invert CSEL's condition. auto *OpCC = cast(LHS.getOperand(2)); auto OldCond = static_cast(OpCC->getZExtValue()); @@ -16757,9 +18049,48 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0), LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32), LHS.getOperand(3)); - return DAG.getZExtOrTrunc(CSEL, DL, N->getValueType(0)); + return DAG.getZExtOrTrunc(CSEL, DL, VT); + } + + // setcc (srl x, imm), 0, ne ==> setcc (and x, (-1 << imm)), 0, ne + if (Cond == ISD::SETNE && isNullConstant(RHS) && + LHS->getOpcode() == ISD::SRL && isa(LHS->getOperand(1)) && + LHS->hasOneUse()) { + EVT TstVT = LHS->getValueType(0); + if (TstVT.isScalarInteger() && TstVT.getFixedSizeInBits() <= 64) { + // this pattern will get better opt in emitComparison + uint64_t TstImm = -1ULL << LHS->getConstantOperandVal(1); + SDValue TST = DAG.getNode(ISD::AND, DL, TstVT, LHS->getOperand(0), + DAG.getConstant(TstImm, DL, TstVT)); + return DAG.getNode(ISD::SETCC, DL, VT, TST, RHS, N->getOperand(2)); + } + } + + return SDValue(); +} + +// Replace a flag-setting operator (eg ANDS) with the generic version +// (eg AND) if the flag is unused. +static SDValue performFlagSettingCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + unsigned GenericOpcode) { + SDLoc DL(N); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT VT = N->getValueType(0); + + // If the flag result isn't used, convert back to a generic opcode. + if (!N->hasAnyUseOfValue(1)) { + SDValue Res = DCI.DAG.getNode(GenericOpcode, DL, VT, N->ops()); + return DCI.DAG.getMergeValues({Res, DCI.DAG.getConstant(0, DL, MVT::i32)}, + DL); } + // Combine identical generic nodes into this node, re-using the result. + if (SDNode *Generic = DCI.DAG.getNodeIfExists( + GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS})) + DCI.CombineTo(Generic, SDValue(N, 0)); + return SDValue(); } @@ -16801,27 +18132,46 @@ static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -static SDValue performSetccMergeZeroCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue +performSetccMergeZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && "Unexpected opcode!"); + SelectionDAG &DAG = DCI.DAG; SDValue Pred = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); ISD::CondCode Cond = cast(N->getOperand(3))->get(); - // setcc_merge_zero pred (sign_extend (setcc_merge_zero ... pred ...)), 0, ne - // => inner setcc_merge_zero - if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) && - LHS->getOpcode() == ISD::SIGN_EXTEND && - LHS->getOperand(0)->getValueType(0) == N->getValueType(0) && - LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && - LHS->getOperand(0)->getOperand(0) == Pred) - return LHS->getOperand(0); - if (SDValue V = performSetCCPunpkCombine(N, DAG)) return V; + if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) && + LHS->getOpcode() == ISD::SIGN_EXTEND && + LHS->getOperand(0)->getValueType(0) == N->getValueType(0)) { + // setcc_merge_zero( + // pred, extend(setcc_merge_zero(pred, ...)), != splat(0)) + // => setcc_merge_zero(pred, ...) + if (LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && + LHS->getOperand(0)->getOperand(0) == Pred) + return LHS->getOperand(0); + + // setcc_merge_zero( + // all_active, extend(nxvNi1 ...), != splat(0)) + // -> nxvNi1 ... + if (isAllActivePredicate(DAG, Pred)) + return LHS->getOperand(0); + + // setcc_merge_zero( + // pred, extend(nxvNi1 ...), != splat(0)) + // -> nxvNi1 and(pred, ...) + if (DCI.isAfterLegalizeDAG()) + // Do this after legalization to allow more folds on setcc_merge_zero + // to be recognized. + return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0), + LHS->getOperand(0), Pred); + } + return SDValue(); } @@ -16928,12 +18278,53 @@ static SDValue performTBZCombine(SDNode *N, DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3)); } +// Swap vselect operands where it may allow a predicated operation to achieve +// the `sel`. +// +// (vselect (setcc ( condcode) (_) (_)) (a) (op (a) (b))) +// => (vselect (setcc (!condcode) (_) (_)) (op (a) (b)) (a)) +static SDValue trySwapVSelectOperands(SDNode *N, SelectionDAG &DAG) { + auto SelectA = N->getOperand(1); + auto SelectB = N->getOperand(2); + auto NTy = N->getValueType(0); + + if (!NTy.isScalableVector()) + return SDValue(); + SDValue SetCC = N->getOperand(0); + if (SetCC.getOpcode() != ISD::SETCC || !SetCC.hasOneUse()) + return SDValue(); + + switch (SelectB.getOpcode()) { + default: + return SDValue(); + case ISD::FMUL: + case ISD::FSUB: + case ISD::FADD: + break; + } + if (SelectA != SelectB.getOperand(0)) + return SDValue(); + + ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); + ISD::CondCode InverseCC = + ISD::getSetCCInverse(CC, SetCC.getOperand(0).getValueType()); + auto InverseSetCC = + DAG.getSetCC(SDLoc(SetCC), SetCC.getValueType(), SetCC.getOperand(0), + SetCC.getOperand(1), InverseCC); + + return DAG.getNode(ISD::VSELECT, SDLoc(N), NTy, + {InverseSetCC, SelectB, SelectA}); +} + // vselect (v1i1 setcc) -> // vselect (v1iXX setcc) (XX is the size of the compared operand type) // FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as // condition. If it can legalize "VSELECT v1i1" correctly, no need to combine // such VSELECT. static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) { + if (auto SwapResult = trySwapVSelectOperands(N, DAG)) + return SwapResult; + SDValue N0 = N->getOperand(0); EVT CCVT = N0.getValueType(); @@ -17064,6 +18455,24 @@ static SDValue performSelectCombine(SDNode *N, return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2)); } +static SDValue performDUPCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + // If "v2i32 DUP(x)" and "v4i32 DUP(x)" both exist, use an extract from the + // 128bit vector version. + if (VT.is64BitVector() && DCI.isAfterLegalizeDAG()) { + EVT LVT = VT.getDoubleNumVectorElementsVT(*DCI.DAG.getContext()); + if (SDNode *LN = DCI.DAG.getNodeIfExists( + N->getOpcode(), DCI.DAG.getVTList(LVT), {N->getOperand(0)})) { + SDLoc DL(N); + return DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SDValue(LN, 0), + DCI.DAG.getConstant(0, DL, MVT::i64)); + } + } + + return performPostLD1Combine(N, DCI, false); +} + /// Get rid of unnecessary NVCASTs (that don't change the type). static SDValue performNVCASTCombine(SDNode *N) { if (N->getValueType(0) == N->getOperand(0).getValueType()) @@ -17104,13 +18513,14 @@ static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG, // Check whether folding this offset is legal. It must not go out of bounds of // the referenced object to avoid violating the code model, and must be - // smaller than 2^21 because this is the largest offset expressible in all - // object formats. + // smaller than 2^20 because this is the largest offset expressible in all + // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF + // stores an immediate signed 21 bit offset.) // // This check also prevents us from folding negative offsets, which will end // up being treated in the same way as large positive ones. They could also // cause code model violations, and aren't really common enough to matter. - if (Offset >= (1 << 21)) + if (Offset >= (1 << 20)) return SDValue(); const GlobalValue *GV = GN->getGlobal(); @@ -17621,7 +19031,7 @@ performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return performPostLD1Combine(N, DCI, true); } -SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) { EVT Ty = N->getValueType(0); if (Ty.isInteger()) return SDValue(); @@ -17643,9 +19053,9 @@ SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) { return DAG.getBitcast(Ty, Trunc); } -SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const AArch64Subtarget *Subtarget) { +static SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -17675,6 +19085,31 @@ SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG, + const AArch64Subtarget *Subtarget, + bool fixedSVEVectorVT) { + EVT VT = N->getValueType(0); + + // Don't expand for SVE2 + if (!VT.isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME()) + return SDValue(); + + // Don't expand for NEON + if (VT.isFixedLengthVector() && !fixedSVEVectorVT) + return SDValue(); + + SDLoc DL(N); + + SDValue Mask = N->getOperand(0); + SDValue In1 = N->getOperand(1); + SDValue In2 = N->getOperand(2); + + SDValue InvMask = DAG.getNOT(DL, Mask, VT); + SDValue Sel = DAG.getNode(ISD::AND, DL, VT, Mask, In1); + SDValue SelInv = DAG.getNode(ISD::AND, DL, VT, InvMask, In2); + return DAG.getNode(ISD::OR, DL, VT, Sel, SelInv); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -17685,6 +19120,22 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case ISD::ADD: case ISD::SUB: return performAddSubCombine(N, DCI, DAG); + case AArch64ISD::ANDS: + return performFlagSettingCombine(N, DCI, ISD::AND); + case AArch64ISD::ADC: + if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true)) + return R; + return foldADCToCINC(N, DAG); + case AArch64ISD::SBC: + return foldOverflowCheck(N, DAG, /* IsAdd */ false); + case AArch64ISD::ADCS: + if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true)) + return R; + return performFlagSettingCombine(N, DCI, AArch64ISD::ADC); + case AArch64ISD::SBCS: + if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false)) + return R; + return performFlagSettingCombine(N, DCI, AArch64ISD::SBC); case ISD::XOR: return performXorCombine(N, DAG, DCI, Subtarget); case ISD::MUL: @@ -17711,10 +19162,10 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performExtendCombine(N, DCI, DAG); case ISD::SIGN_EXTEND_INREG: return performSignExtendInRegCombine(N, DCI, DAG); - case ISD::TRUNCATE: - return performVectorTruncateCombine(N, DCI, DAG); case ISD::CONCAT_VECTORS: return performConcatVectorsCombine(N, DCI, DAG); + case ISD::EXTRACT_SUBVECTOR: + return performExtractSubvectorCombine(N, DCI, DAG); case ISD::INSERT_SUBVECTOR: return performInsertSubvectorCombine(N, DCI, DAG); case ISD::SELECT: @@ -17729,6 +19180,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::STORE: return performSTORECombine(N, DCI, DAG, Subtarget); + case ISD::MGATHER: + case ISD::MSCATTER: + return performMaskedGatherScatterCombine(N, DCI, DAG); case ISD::VECTOR_SPLICE: return performSVESpliceCombine(N, DAG); case ISD::FP_EXTEND: @@ -17741,7 +19195,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case AArch64ISD::CSEL: return performCSELCombine(N, DCI, DAG); case AArch64ISD::DUP: - return performPostLD1Combine(N, DCI, false); + return performDUPCombine(N, DCI); case AArch64ISD::NVCAST: return performNVCASTCombine(N); case AArch64ISD::SPLICE: @@ -17752,7 +19206,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case AArch64ISD::UZP1: return performUzpCombine(N, DAG); case AArch64ISD::SETCC_MERGE_ZERO: - return performSetccMergeZeroCombine(N, DAG); + return performSetccMergeZeroCombine(N, DCI); case AArch64ISD::GLD1_MERGE_ZERO: case AArch64ISD::GLD1_SCALED_MERGE_ZERO: case AArch64ISD::GLD1_UXTW_MERGE_ZERO: @@ -17773,12 +19227,20 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performVectorShiftCombine(N, *this, DCI); case AArch64ISD::SUNPKLO: return performSunpkloCombine(N, DAG); + case AArch64ISD::BSP: + return performBSPExpandForSVE( + N, DAG, Subtarget, useSVEForFixedLengthVectorVT(N->getValueType(0))); case ISD::INSERT_VECTOR_ELT: return performInsertVectorEltCombine(N, DCI); case ISD::EXTRACT_VECTOR_ELT: - return performExtractVectorEltCombine(N, DAG); + return performExtractVectorEltCombine(N, DCI, Subtarget); case ISD::VECREDUCE_ADD: return performVecReduceAddCombine(N, DCI.DAG, Subtarget); + case AArch64ISD::UADDV: + return performUADDVCombine(N, DAG); + case AArch64ISD::SMULL: + case AArch64ISD::UMULL: + return tryCombineLongOpWithDup(Intrinsic::not_intrinsic, N, DCI, DAG); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { @@ -18152,6 +19614,15 @@ void AArch64TargetLowering::ReplaceBITCASTResults( if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) { assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() && "Expected fp->int bitcast!"); + + // Bitcasting between unpacked vector types of different element counts is + // not a NOP because the live elements are laid out differently. + // 01234567 + // e.g. nxv2i32 = XX??XX?? + // nxv4f16 = X?X?X?X? + if (VT.getVectorElementCount() != SrcVT.getVectorElementCount()) + return; + SDValue CastResult = getSVESafeBitCast(getSVEContainerType(VT), Op, DAG); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, CastResult)); return; @@ -18169,6 +19640,53 @@ void AArch64TargetLowering::ReplaceBITCASTResults( Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op)); } +static void ReplaceAddWithADDP(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (!VT.is256BitVector() || + (VT.getScalarType().isFloatingPoint() && + !N->getFlags().hasAllowReassociation()) || + (VT.getScalarType() == MVT::f16 && !Subtarget->hasFullFP16())) + return; + + SDValue X = N->getOperand(0); + auto *Shuf = dyn_cast(N->getOperand(1)); + if (!Shuf) { + Shuf = dyn_cast(N->getOperand(0)); + X = N->getOperand(1); + if (!Shuf) + return; + } + + if (Shuf->getOperand(0) != X || !Shuf->getOperand(1)->isUndef()) + return; + + // Check the mask is 1,0,3,2,5,4,... + ArrayRef Mask = Shuf->getMask(); + for (int I = 0, E = Mask.size(); I < E; I++) + if (Mask[I] != (I % 2 == 0 ? I + 1 : I - 1)) + return; + + SDLoc DL(N); + auto LoHi = DAG.SplitVector(X, DL); + assert(LoHi.first.getValueType() == LoHi.second.getValueType()); + SDValue Addp = DAG.getNode(AArch64ISD::ADDP, N, LoHi.first.getValueType(), + LoHi.first, LoHi.second); + + // Shuffle the elements back into order. + SmallVector NMask; + for (unsigned I = 0, E = VT.getVectorNumElements() / 2; I < E; I++) { + NMask.push_back(I); + NMask.push_back(I); + } + Results.push_back( + DAG.getVectorShuffle(VT, DL, + DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Addp, + DAG.getUNDEF(LoHi.first.getValueType())), + DAG.getUNDEF(VT), NMask)); +} + static void ReplaceReductionResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, unsigned InterOp, @@ -18346,6 +19864,10 @@ void AArch64TargetLowering::ReplaceNodeResults( case ISD::VECREDUCE_UMIN: Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG)); return; + case ISD::ADD: + case ISD::FADD: + ReplaceAddWithADDP(N, Results, DAG, Subtarget); + return; case ISD::CTPOP: if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG)) @@ -18406,8 +19928,10 @@ void AArch64TargetLowering::ReplaceNodeResults( ReplaceExtractSubVectorResults(N, Results, DAG); return; case ISD::INSERT_SUBVECTOR: - // Custom lowering has been requested for INSERT_SUBVECTOR -- but delegate - // to common code for result type legalisation + case ISD::CONCAT_VECTORS: + // Custom lowering has been requested for INSERT_SUBVECTOR and + // CONCAT_VECTORS -- but delegate to common code for result type + // legalisation return; case ISD::INTRINSIC_WO_CHAIN: { EVT VT = N->getValueType(0); @@ -18485,11 +20009,11 @@ bool AArch64TargetLowering::isOpSuitableForLDPSTP(const Instruction *I) const { if (auto LI = dyn_cast(I)) return LI->getType()->getPrimitiveSizeInBits() == 128 && - LI->getAlignment() >= 16; + LI->getAlign() >= Align(16); if (auto SI = dyn_cast(I)) return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 && - SI->getAlignment() >= 16; + SI->getAlign() >= Align(16); return false; } @@ -18502,12 +20026,12 @@ bool AArch64TargetLowering::shouldInsertFencesForAtomic( // Loads and stores less than 128-bits are already atomic; ones above that // are doomed anyway, so defer to the default libcall and blame the OS when // things go wrong. -bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { +TargetLoweringBase::AtomicExpansionKind +AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); - if (Size != 128) - return false; - - return !isOpSuitableForLDPSTP(SI); + if (Size != 128 || isOpSuitableForLDPSTP(SI)) + return AtomicExpansionKind::None; + return AtomicExpansionKind::Expand; } // Loads and stores less than 128-bits are already atomic; ones above that @@ -18627,7 +20151,10 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder, const DataLayout &DL = M->getDataLayout(); IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy)); - Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy); + CallInst *CI = Builder.CreateCall(Ldxr, Addr); + CI->addParamAttr( + 0, Attribute::get(Builder.getContext(), Attribute::ElementType, ValueTy)); + Value *Trunc = Builder.CreateTrunc(CI, IntEltTy); return Builder.CreateBitCast(Trunc, ValueTy); } @@ -18668,10 +20195,13 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder, IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType())); Val = Builder.CreateBitCast(Val, IntValTy); - return Builder.CreateCall(Stxr, - {Builder.CreateZExtOrBitCast( - Val, Stxr->getFunctionType()->getParamType(0)), - Addr}); + CallInst *CI = Builder.CreateCall( + Stxr, {Builder.CreateZExtOrBitCast( + Val, Stxr->getFunctionType()->getParamType(0)), + Addr}); + CI->addParamAttr(1, Attribute::get(Builder.getContext(), + Attribute::ElementType, Val->getType())); + return CI; } bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters( @@ -18993,8 +20523,7 @@ static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL, // For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use // AArch64SVEPredPattern::all, which can enable the use of unpredicated // variants of instructions when available. - const auto &Subtarget = - static_cast(DAG.getSubtarget()); + const auto &Subtarget = DAG.getSubtarget(); unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits(); unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits(); if (MaxSVESize && MinSVESize == MaxSVESize && @@ -19080,22 +20609,23 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE( MemVT = MemVT.changeTypeToInteger(); } - auto NewLoad = DAG.getMaskedLoad( + SDValue NewLoad = DAG.getMaskedLoad( LoadVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Pg, DAG.getUNDEF(LoadVT), MemVT, Load->getMemOperand(), Load->getAddressingMode(), Load->getExtensionType()); + SDValue Result = NewLoad; if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) { EVT ExtendVT = ContainerVT.changeVectorElementType( Load->getMemoryVT().getVectorElementType()); - NewLoad = getSVESafeBitCast(ExtendVT, NewLoad, DAG); - NewLoad = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT, - Pg, NewLoad, DAG.getUNDEF(ContainerVT)); + Result = getSVESafeBitCast(ExtendVT, Result, DAG); + Result = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT, + Pg, Result, DAG.getUNDEF(ContainerVT)); } - auto Result = convertFromScalableVector(DAG, VT, NewLoad); - SDValue MergedValues[2] = {Result, Load->getChain()}; + Result = convertFromScalableVector(DAG, VT, Result); + SDValue MergedValues[2] = {Result, NewLoad.getValue(1)}; return DAG.getMergeValues(MergedValues, DL); } @@ -19143,19 +20673,20 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE( IsPassThruZeroOrUndef = true; } - auto NewLoad = DAG.getMaskedLoad( + SDValue NewLoad = DAG.getMaskedLoad( ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(), Load->getAddressingMode(), Load->getExtensionType()); + SDValue Result = NewLoad; if (!IsPassThruZeroOrUndef) { SDValue OldPassThru = convertToScalableVector(DAG, ContainerVT, Load->getPassThru()); - NewLoad = DAG.getSelect(DL, ContainerVT, Mask, NewLoad, OldPassThru); + Result = DAG.getSelect(DL, ContainerVT, Mask, Result, OldPassThru); } - auto Result = convertFromScalableVector(DAG, VT, NewLoad); - SDValue MergedValues[2] = {Result, Load->getChain()}; + Result = convertFromScalableVector(DAG, VT, Result); + SDValue MergedValues[2] = {Result, NewLoad.getValue(1)}; return DAG.getMergeValues(MergedValues, DL); } @@ -19232,7 +20763,7 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE( // Scalable vector i32/i64 DIV is supported. if (EltVT == MVT::i32 || EltVT == MVT::i64) - return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true); + return LowerToPredicatedOp(Op, DAG, PredOpcode); // Scalable vector i8/i16 DIV is not supported. Promote it to i32. EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); @@ -19387,13 +20918,13 @@ SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt( // NOTE: The results for inactive lanes are undefined. SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, - unsigned NewOp, - bool OverrideNEON) const { + unsigned NewOp) const { EVT VT = Op.getValueType(); SDLoc DL(Op); auto Pg = getPredicateForVector(DAG, DL, VT); - if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) { + if (VT.isFixedLengthVector()) { + assert(isTypeLegal(VT) && "Expected only legal fixed-width types"); EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); // Create list of operands by converting existing ones to scalable types. @@ -19411,8 +20942,8 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op, continue; } - assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) && - "Only fixed length vectors are supported!"); + assert(isTypeLegal(V.getValueType()) && + "Expected only legal fixed-width types"); Operands.push_back(convertToScalableVector(DAG, ContainerVT, V)); } @@ -19543,7 +21074,9 @@ SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode, SDValue VecOp = ScalarOp.getOperand(0); EVT SrcVT = VecOp.getValueType(); - if (useSVEForFixedLengthVectorVT(SrcVT, true)) { + if (useSVEForFixedLengthVectorVT( + SrcVT, + /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) { EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT); VecOp = convertToScalableVector(DAG, ContainerVT, VecOp); } @@ -19950,6 +21483,17 @@ SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op, EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType()); EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType()); + // Safe bitcasting between unpacked vector types of different element counts + // is currently unsupported because the following is missing the necessary + // work to ensure the result's elements live where they're supposed to within + // an SVE register. + // 01234567 + // e.g. nxv2i32 = XX??XX?? + // nxv4f16 = X?X?X?X? + assert((VT.getVectorElementCount() == InVT.getVectorElementCount() || + VT == PackedVT || InVT == PackedInVT) && + "Unexpected bitcast!"); + // Pack input if required. if (InVT != PackedInVT) Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op); @@ -20016,6 +21560,13 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode( Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth); } +bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const { + return Op.getOpcode() == AArch64ISD::DUP || + (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && + Op.getOperand(0).getOpcode() == AArch64ISD::DUP) || + TargetLowering::isTargetCanonicalConstantNode(Op); +} + bool AArch64TargetLowering::isConstantUnsignedBitfieldExtractLegal( unsigned Opc, LLT Ty1, LLT Ty2) const { return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64)); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 2138c0ffe70a..06ea918ea32e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -55,6 +55,8 @@ enum NodeType : unsigned { // x29, x29` marker instruction. CALL_RVMARKER, + CALL_BTI, // Function call followed by a BTI instruction. + // Produces the full sequence of instructions for getting the thread pointer // offset of a variable into X0, using the TLSDesc model. TLSDESC_CALLSEQ, @@ -79,7 +81,6 @@ enum NodeType : unsigned { // Predicated instructions where inactive lanes produce undefined results. ABDS_PRED, ABDU_PRED, - ADD_PRED, FADD_PRED, FDIV_PRED, FMA_PRED, @@ -98,7 +99,6 @@ enum NodeType : unsigned { SMIN_PRED, SRA_PRED, SRL_PRED, - SUB_PRED, UDIV_PRED, UMAX_PRED, UMIN_PRED, @@ -158,6 +158,7 @@ enum NodeType : unsigned { DUPLANE16, DUPLANE32, DUPLANE64, + DUPLANE128, // Vector immedate moves MOVI, @@ -232,15 +233,10 @@ enum NodeType : unsigned { SADDV, UADDV, - // Vector halving addition - SHADD, - UHADD, - - // Vector rounding halving addition - SRHADD, - URHADD, - - // Unsigned Add Long Pairwise + // Add Pairwise of two vectors + ADDP, + // Add Long Pairwise + SADDLP, UADDLP, // udot/sdot instructions @@ -411,6 +407,10 @@ enum NodeType : unsigned { SSTNT1_PRED, SSTNT1_INDEX_PRED, + // SME + RDSVL, + REVD_MERGE_PASSTHRU, + // Asserts that a function argument (i32) is zero-extended to i8 by // the caller ASSERT_ZEXT_BOOL, @@ -462,23 +462,6 @@ enum NodeType : unsigned { } // end namespace AArch64ISD -namespace { - -// Any instruction that defines a 32-bit result zeros out the high half of the -// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may -// be copying from a truncate. But any other 32-bit operation will zero-extend -// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper -// 32 bits, they're probably just qualifying a CopyFromReg. -static inline bool isDef32(const SDNode &N) { - unsigned Opc = N.getOpcode(); - return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && - Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && - Opc != ISD::AssertZext && Opc != ISD::AssertAlign && - Opc != ISD::FREEZE; -} - -} // end anonymous namespace - namespace AArch64 { /// Possible values of current rounding mode, which is specified in bits /// 23:22 of FPCR. @@ -501,6 +484,11 @@ public: explicit AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI); + /// Control the following reassociation of operands: (op (op x, c1), y) -> (op + /// (op x, y), c1) where N0 is (op x, c1) and N1 is y. + bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, + SDValue N1) const override; + /// Selects the correct CCAssignFn for a given CallingConvention value. CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; @@ -573,6 +561,17 @@ public: MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitMopa(unsigned Opc, unsigned BaseReg, MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; @@ -610,8 +609,8 @@ public: bool isLegalAddImmediate(int64_t) const override; bool isLegalICmpImmediate(int64_t) const override; - bool isMulAddWithConstProfitable(const SDValue &AddNode, - const SDValue &ConstNode) const override; + bool isMulAddWithConstProfitable(SDValue AddNode, + SDValue ConstNode) const override; bool shouldConsiderGEPOffsetSplit() const override; @@ -651,6 +650,10 @@ public: bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override; + /// Return true if it is profitable to fold a pair of shifts into a mask. + bool shouldFoldConstantShiftPairToMask(const SDNode *N, + CombineLevel Level) const override; + /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool shouldConvertConstantLoadToIntImm(const APInt &Imm, @@ -680,7 +683,8 @@ public: TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; - bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicStoreInIR(StoreInst *SI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; @@ -898,11 +902,8 @@ private: SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; - bool isEligibleForTailCallOptimization( - SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, SelectionDAG &DAG) const; + bool + isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const; /// Finds the incoming stack arguments which overlap the given fixed stack /// object and incorporates their load into the current chain. This prevents @@ -980,8 +981,8 @@ private: SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp, - bool OverrideNEON = false) const; + SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, + unsigned NewOp) const; SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; @@ -1052,6 +1053,8 @@ private: SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl &Created) const override; + SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + SmallVectorImpl &Created) const override; SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &ExtraSteps, bool &UseOneConst, bool Reciprocal) const override; @@ -1093,7 +1096,7 @@ private: } bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; - bool shouldRemoveExtendFromGSIndex(EVT VT) const override; + bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override; bool isVectorLoadExtDesirable(SDValue ExtVal) const override; bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; @@ -1129,6 +1132,8 @@ private: TargetLoweringOpt &TLO, unsigned Depth) const override; + bool isTargetCanonicalConstantNode(SDValue Op) const override; + // Normally SVE is only used for byte size vectors that do not fit within a // NEON vector. This changes when OverrideNEON is true, allowing SVE to be // used for 64bit and 128bit vectors as well. diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td index b220929514f9..c477a44b13b2 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td @@ -27,22 +27,43 @@ def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>; // supported, but when they're relaxed and anything can be used, all the // standard modes would be valid and may give efficiency gains. +// An atomic load operation that does not need either acquire or release +// semantics. +class relaxed_load + : PatFrag<(ops node:$ptr), (base node:$ptr)> { + let IsAtomic = 1; + let IsAtomicOrderingAcquireOrStronger = 0; +} + // A atomic load operation that actually needs acquire semantics. class acquiring_load : PatFrag<(ops node:$ptr), (base node:$ptr)> { let IsAtomic = 1; - let IsAtomicOrderingAcquireOrStronger = 1; + let IsAtomicOrderingAcquire = 1; } -// An atomic load operation that does not need either acquire or release -// semantics. -class relaxed_load +// An atomic load operation that needs sequential consistency. +class seq_cst_load : PatFrag<(ops node:$ptr), (base node:$ptr)> { let IsAtomic = 1; - let IsAtomicOrderingAcquireOrStronger = 0; + let IsAtomicOrderingSequentiallyConsistent = 1; +} + +// RCPC extension, currently opt-in under a separate feature. +let Predicates = [HasLDAPR] in { + // v8.3 Release Consistent Processor Consistent support, optional in v8.2. + // 8-bit loads + def : Pat<(acquiring_load GPR64sp:$ptr), (LDAPRB GPR64sp:$ptr)>; + // 16-bit loads + def : Pat<(acquiring_load GPR64sp:$ptr), (LDAPRH GPR64sp:$ptr)>; + // 32-bit loads + def : Pat<(acquiring_load GPR64sp:$ptr), (LDAPRW GPR64sp:$ptr)>; + // 64-bit loads + def : Pat<(acquiring_load GPR64sp:$ptr), (LDAPRX GPR64sp:$ptr)>; } // 8-bit loads +def : Pat<(seq_cst_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; def : Pat<(acquiring_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; def : Pat<(relaxed_load (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)), @@ -58,6 +79,7 @@ def : Pat<(relaxed_load (LDURBBi GPR64sp:$Rn, simm9:$offset)>; // 16-bit loads +def : Pat<(seq_cst_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; def : Pat<(acquiring_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; def : Pat<(relaxed_load (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)), @@ -73,6 +95,7 @@ def : Pat<(relaxed_load (LDURHHi GPR64sp:$Rn, simm9:$offset)>; // 32-bit loads +def : Pat<(seq_cst_load GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>; def : Pat<(acquiring_load GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>; def : Pat<(relaxed_load (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)), @@ -88,6 +111,7 @@ def : Pat<(relaxed_load (LDURWi GPR64sp:$Rn, simm9:$offset)>; // 64-bit loads +def : Pat<(seq_cst_load GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>; def : Pat<(acquiring_load GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>; def : Pat<(relaxed_load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)), @@ -490,7 +514,8 @@ def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch), let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch", mayLoad = 1, mayStore = 1 in { -class cmp_swap_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32common:$scratch), +class cmp_swap_128 : Pseudo<(outs GPR64common:$RdLo, GPR64common:$RdHi, + GPR32common:$scratch), (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi, GPR64:$newLo, GPR64:$newHi), []>, Sched<[WriteAtomic]>; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 4c1e41b7efee..78bc1b8c6f02 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -109,15 +109,19 @@ class TriOpFrag : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>; class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; class UnOpFrag : PatFrag<(ops node:$LHS), res>; -// Helper fragment for an extract of the high portion of a 128-bit vector. +// Helper fragment for an extract of the high portion of a 128-bit vector. The +// ComplexPattern match both extract_subvector and bitcast(extract_subvector(..)). def extract_high_v16i8 : - UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>; + ComplexPattern; def extract_high_v8i16 : - UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>; + ComplexPattern; def extract_high_v4i32 : - UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>; -def extract_high_v2i64 : - UnOpFrag<(extract_subvector (v2i64 node:$LHS), (i64 1))>; + ComplexPattern; + +def extract_high_dup_v8i16 : + BinOpFrag<(extract_subvector (v8i16 (AArch64duplane16 (v8i16 node:$LHS), node:$RHS)), (i64 4))>; +def extract_high_dup_v4i32 : + BinOpFrag<(extract_subvector (v4i32 (AArch64duplane32 (v4i32 node:$LHS), node:$RHS)), (i64 2))>; //===----------------------------------------------------------------------===// // Asm Operand Classes. @@ -1178,6 +1182,13 @@ def fpimm32XForm : SDNodeXFormgetTargetConstant(enc, SDLoc(N), MVT::i32); }]>; +def fpimm32SIMDModImmType4XForm : SDNodeXFormgetValueAPF() + .bitcastToAPInt() + .getZExtValue()); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); + }]>; + def fpimm64XForm : SDNodeXFormgetValueAPF(); uint32_t enc = AArch64_AM::getFP64Imm(InVal); @@ -1199,6 +1210,13 @@ def fpimm32 : Operand, let ParserMatchClass = FPImmOperand; let PrintMethod = "printFPImmOperand"; } + +def fpimm32SIMDModImmType4 : FPImmLeaf { +} + def fpimm64 : Operand, FPImmLeaf, GISDNodeXFormEquiv; def gi_fpimm64 : GICustomOperandRenderer<"renderFPImm64">, GISDNodeXFormEquiv; +def gi_fpimm32SIMDModImmType4 : + GICustomOperandRenderer<"renderFPImm32SIMDModImmType4">, + GISDNodeXFormEquiv; // Vector lane operands class AsmVectorIndex : AsmOperandClass { @@ -1261,8 +1282,12 @@ def VectorIndexHOperand : AsmVectorIndex<0, 7>; def VectorIndexSOperand : AsmVectorIndex<0, 3>; def VectorIndexDOperand : AsmVectorIndex<0, 1>; -defm VectorIndex0 : VectorIndex; + } +} defm VectorIndex1 : VectorIndex; defm VectorIndexB : VectorIndex, ImmLeaf { let ParserMatchClass = Imm0_0Operand; let PrintMethod = "printMatrixIndex"; + let OperandNamespace = "AArch64"; + let OperandType = "OPERAND_IMPLICIT_IMM_0"; } def sme_elm_idx0_1 : Operand, ImmLeaf opc1, string insn> { //--- let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class ExceptionGeneration op1, bits<2> ll, string asm> - : I<(outs), (ins timm32_0_65535:$imm), asm, "\t$imm", "", []>, +class ExceptionGeneration op1, bits<2> ll, string asm, + list pattern = []> + : I<(outs), (ins timm32_0_65535:$imm), asm, "\t$imm", "", pattern>, Sched<[WriteSys]> { bits<16> imm; let Inst{31-24} = 0b11010100; @@ -4542,6 +4570,7 @@ let Predicates = [HasFPARMv8] in { // Floating point to integer conversion //--- +let mayRaiseFPException = 1 in class BaseFPToIntegerUnscaled type, bits<2> rmode, bits<3> opcode, RegisterClass srcType, RegisterClass dstType, string asm, list pattern> @@ -4561,7 +4590,7 @@ class BaseFPToIntegerUnscaled type, bits<2> rmode, bits<3> opcode, let Inst{4-0} = Rd; } -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in class BaseFPToInteger type, bits<2> rmode, bits<3> opcode, RegisterClass srcType, RegisterClass dstType, Operand immType, string asm, list pattern> @@ -4683,7 +4712,7 @@ multiclass FPToIntegerScaled rmode, bits<3> opcode, string asm, // Integer to floating point conversion //--- -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +let mayStore = 0, mayLoad = 0, hasSideEffects = 0, mayRaiseFPException = 1 in class BaseIntegerToFP pattern> @@ -4701,6 +4730,7 @@ class BaseIntegerToFP @@ -4937,6 +4967,7 @@ multiclass UnscaledConversion { // Floating point conversion //--- +let mayRaiseFPException = 1 in class BaseFPConversion type, bits<2> opcode, RegisterClass dstType, RegisterClass srcType, string asm, list pattern> : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", pattern>, @@ -4963,15 +4994,15 @@ multiclass FPConversion { // Half-precision to Double-precision def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, - [(set FPR64:$Rd, (fpextend (f16 FPR16:$Rn)))]>; + [(set FPR64:$Rd, (any_fpextend (f16 FPR16:$Rn)))]>; // Half-precision to Single-precision def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm, - [(set FPR32:$Rd, (fpextend (f16 FPR16:$Rn)))]>; + [(set FPR32:$Rd, (any_fpextend (f16 FPR16:$Rn)))]>; // Single-precision to Double-precision def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm, - [(set FPR64:$Rd, (fpextend FPR32:$Rn))]>; + [(set FPR64:$Rd, (any_fpextend FPR32:$Rn))]>; // Single-precision to Half-precision def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, @@ -4999,8 +5030,9 @@ class BaseSingleOperandFPData opcode, RegisterClass regtype, } multiclass SingleOperandFPData opcode, string asm, - SDPatternOperator node = null_frag> { - + SDPatternOperator node = null_frag, + int fpexceptions = 1> { + let mayRaiseFPException = fpexceptions in { def Hr : BaseSingleOperandFPData<{0b00,opcode}, FPR16, f16, asm, node> { let Inst{23-22} = 0b11; // 16-bit size flag let Predicates = [HasFullFP16]; @@ -5013,8 +5045,14 @@ multiclass SingleOperandFPData opcode, string asm, def Dr : BaseSingleOperandFPData<{0b00,opcode}, FPR64, f64, asm, node> { let Inst{23-22} = 0b01; // 64-bit size flag } + } } +multiclass SingleOperandFPDataNoException opcode, string asm, + SDPatternOperator node = null_frag> + : SingleOperandFPData; + +let mayRaiseFPException = 1 in multiclass SingleOperandFPNo16 opcode, string asm, SDPatternOperator node = null_frag>{ @@ -5035,7 +5073,7 @@ multiclass FRIntNNT opcode, string asm, SDPatternOperator node = null_fr // Two operand floating point data processing //--- -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in class BaseTwoOperandFPData opcode, RegisterClass regtype, string asm, list pat> : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), @@ -5075,7 +5113,8 @@ multiclass TwoOperandFPData opcode, string asm, } } -multiclass TwoOperandFPDataNeg opcode, string asm, SDNode node> { +multiclass TwoOperandFPDataNeg opcode, string asm, + SDPatternOperator node> { def Hrr : BaseTwoOperandFPData { let Inst{23-22} = 0b11; // 16-bit size flag @@ -5098,6 +5137,7 @@ multiclass TwoOperandFPDataNeg opcode, string asm, SDNode node> { // Three operand floating point data processing //--- +let mayRaiseFPException = 1 in class BaseThreeOperandFPData pat> : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, regtype: $Ra), @@ -5142,7 +5182,7 @@ multiclass ThreeOperandFPData pat> @@ -5161,7 +5201,7 @@ class BaseOneOperandFPComparison pat> : I<(outs), (ins regtype:$Rn, regtype:$Rm), asm, "\t$Rn, $Rm", "", pat>, @@ -5218,7 +5258,7 @@ multiclass FPComparison pat> : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), @@ -5544,6 +5584,7 @@ multiclass SIMDThreeSameVectorB opc, string asm, } // As above, but only floating point elements supported. +let mayRaiseFPException = 1 in multiclass SIMDThreeSameVectorFP opc, string asm, SDPatternOperator OpNode> { let Predicates = [HasNEON, HasFullFP16] in { @@ -5565,6 +5606,7 @@ multiclass SIMDThreeSameVectorFP opc, [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; } +let mayRaiseFPException = 1 in multiclass SIMDThreeSameVectorFPCmp opc, string asm, SDPatternOperator OpNode> { @@ -5587,6 +5629,7 @@ multiclass SIMDThreeSameVectorFPCmp opc, [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; } +let mayRaiseFPException = 1 in multiclass SIMDThreeSameVectorFPTied opc, string asm, SDPatternOperator OpNode> { let Predicates = [HasNEON, HasFullFP16] in { @@ -5614,6 +5657,7 @@ multiclass SIMDThreeSameVectorFPTied opc, } // As above, but D and B sized elements unsupported. +let mayRaiseFPException = 1 in multiclass SIMDThreeSameVectorHS opc, string asm, SDPatternOperator OpNode> { def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, @@ -5718,6 +5762,7 @@ multiclass SIMDThreeSameVectorDot size, string asm, string kind1, string kind2, RegisterOperand RegType, ValueType AccumType, ValueType InputType, @@ -5986,7 +6031,9 @@ multiclass SIMDTwoVectorBH opc, string asm, // Supports H, S and D element sizes, uses high bit of the size field // as an extra opcode bit. multiclass SIMDTwoVectorFP opc, string asm, - SDPatternOperator OpNode> { + SDPatternOperator OpNode, + int fpexceptions = 1> { + let mayRaiseFPException = fpexceptions in { let Predicates = [HasNEON, HasFullFP16] in { def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, asm, ".4h", ".4h", @@ -6004,9 +6051,15 @@ multiclass SIMDTwoVectorFP opc, string asm, def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; + } } +multiclass SIMDTwoVectorFPNoException opc, string asm, + SDPatternOperator OpNode> + : SIMDTwoVectorFP; + // Supports only S and D element sizes +let mayRaiseFPException = 1 in multiclass SIMDTwoVectorSD opc, string asm, SDPatternOperator OpNode = null_frag> { @@ -6036,7 +6089,7 @@ multiclass SIMDTwoVectorS opc, string asm, [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; } - +let mayRaiseFPException = 1 in multiclass SIMDTwoVectorFPToInt opc, string asm, SDPatternOperator OpNode> { let Predicates = [HasNEON, HasFullFP16] in { @@ -6058,6 +6111,7 @@ multiclass SIMDTwoVectorFPToInt opc, string asm, [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; } +let mayRaiseFPException = 1 in multiclass SIMDTwoVectorIntToFP opc, string asm, SDPatternOperator OpNode> { let Predicates = [HasNEON, HasFullFP16] in { @@ -6209,6 +6263,7 @@ multiclass SIMDCmpTwoVector opc, string asm, multiclass SIMDFPCmpTwoVector opc, string asm, SDNode OpNode> { + let mayRaiseFPException = 1 in { let Predicates = [HasNEON, HasFullFP16] in { def v4i16rz : BaseSIMDCmpTwoVector<0, U, {S,1}, 0b11, opc, V64, asm, ".4h", "0.0", @@ -6226,6 +6281,7 @@ multiclass SIMDFPCmpTwoVector opc, def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b00, opc, V128, asm, ".2d", "0.0", v2i64, v2f64, OpNode>; + } let Predicates = [HasNEON, HasFullFP16] in { def : InstAlias opc, (!cast(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; } -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in class BaseSIMDFPCvtTwoVector size, bits<5> opcode, RegisterOperand outtype, RegisterOperand intype, string asm, string VdTy, string VnTy, @@ -6275,7 +6331,7 @@ class BaseSIMDFPCvtTwoVector size, bits<5> opcode, let Inst{4-0} = Rd; } -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in class BaseSIMDFPCvtTwoVectorTied size, bits<5> opcode, RegisterOperand outtype, RegisterOperand intype, string asm, string VdTy, string VnTy, @@ -6457,8 +6513,8 @@ multiclass SIMDDifferentThreeVectorBD opc, string asm, asm#"2", ".1q", ".2d", ".2d", []>; } - def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)), - (v8i8 (extract_high_v16i8 V128:$Rm)))), + def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn))), + (v8i8 (extract_high_v16i8 (v16i8 V128:$Rm))))), (!cast(NAME#"v16i8") V128:$Rn, V128:$Rm)>; } @@ -6471,8 +6527,8 @@ multiclass SIMDLongThreeVectorHS opc, string asm, def v8i16_v4i32 : BaseSIMDDifferentThreeVector; + [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)), + (extract_high_v8i16 (v8i16 V128:$Rm))))]>; def v2i32_v2i64 : BaseSIMDDifferentThreeVector opc, string asm, def v4i32_v2i64 : BaseSIMDDifferentThreeVector; + [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)), + (extract_high_v4i32 (v4i32 V128:$Rm))))]>; } multiclass SIMDLongThreeVectorBHSabdl opc, string asm, @@ -6495,8 +6551,8 @@ multiclass SIMDLongThreeVectorBHSabdl opc, string asm, V128, V128, V128, asm#"2", ".8h", ".16b", ".16b", [(set (v8i16 V128:$Rd), - (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn), - (extract_high_v16i8 V128:$Rm)))))]>; + (zext (v8i8 (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)), + (extract_high_v16i8 (v16i8 V128:$Rm))))))]>; def v4i16_v4i32 : BaseSIMDDifferentThreeVector opc, string asm, V128, V128, V128, asm#"2", ".4s", ".8h", ".8h", [(set (v4i32 V128:$Rd), - (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 V128:$Rm)))))]>; + (zext (v4i16 (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)), + (extract_high_v8i16 (v8i16 V128:$Rm))))))]>; def v2i32_v2i64 : BaseSIMDDifferentThreeVector opc, string asm, V128, V128, V128, asm#"2", ".2d", ".4s", ".4s", [(set (v2i64 V128:$Rd), - (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 V128:$Rm)))))]>; + (zext (v2i32 (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)), + (extract_high_v4i32 (v4i32 V128:$Rm))))))]>; } multiclass SIMDLongThreeVectorTiedBHSabal opc, @@ -6535,8 +6591,8 @@ multiclass SIMDLongThreeVectorTiedBHSabal opc, asm#"2", ".8h", ".16b", ".16b", [(set (v8i16 V128:$dst), (add (v8i16 V128:$Rd), - (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn), - (extract_high_v16i8 V128:$Rm))))))]>; + (zext (v8i8 (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)), + (extract_high_v16i8 (v16i8 V128:$Rm)))))))]>; def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied opc, asm#"2", ".4s", ".8h", ".8h", [(set (v4i32 V128:$dst), (add (v4i32 V128:$Rd), - (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 V128:$Rm))))))]>; + (zext (v4i16 (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)), + (extract_high_v8i16 (v8i16 V128:$Rm)))))))]>; def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied opc, asm#"2", ".2d", ".4s", ".4s", [(set (v2i64 V128:$dst), (add (v2i64 V128:$Rd), - (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 V128:$Rm))))))]>; + (zext (v2i32 (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)), + (extract_high_v4i32 (v4i32 V128:$Rm)))))))]>; } multiclass SIMDLongThreeVectorBHS opc, string asm, @@ -6574,8 +6630,8 @@ multiclass SIMDLongThreeVectorBHS opc, string asm, def v16i8_v8i16 : BaseSIMDDifferentThreeVector; + [(set (v8i16 V128:$Rd), (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)), + (extract_high_v16i8 (v16i8 V128:$Rm))))]>; def v4i16_v4i32 : BaseSIMDDifferentThreeVector opc, string asm, def v8i16_v4i32 : BaseSIMDDifferentThreeVector; + [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)), + (extract_high_v8i16 (v8i16 V128:$Rm))))]>; def v2i32_v2i64 : BaseSIMDDifferentThreeVector opc, string asm, def v4i32_v2i64 : BaseSIMDDifferentThreeVector; + [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)), + (extract_high_v4i32 (v4i32 V128:$Rm))))]>; } multiclass SIMDLongThreeVectorTiedBHS opc, @@ -6609,8 +6665,8 @@ multiclass SIMDLongThreeVectorTiedBHS opc, asm#"2", ".8h", ".16b", ".16b", [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), - (extract_high_v16i8 V128:$Rn), - (extract_high_v16i8 V128:$Rm)))]>; + (extract_high_v16i8 (v16i8 V128:$Rn)), + (extract_high_v16i8 (v16i8 V128:$Rm))))]>; def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied opc, asm#"2", ".4s", ".8h", ".8h", [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), - (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 V128:$Rm)))]>; + (extract_high_v8i16 (v8i16 V128:$Rn)), + (extract_high_v8i16 (v8i16 V128:$Rm))))]>; def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied opc, asm#"2", ".2d", ".4s", ".4s", [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), - (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 V128:$Rm)))]>; + (extract_high_v4i32 (v4i32 V128:$Rn)), + (extract_high_v4i32 (v4i32 V128:$Rm))))]>; } multiclass SIMDLongThreeVectorSQDMLXTiedHS opc, string asm, @@ -6651,8 +6707,8 @@ multiclass SIMDLongThreeVectorSQDMLXTiedHS opc, string asm, asm#"2", ".4s", ".8h", ".8h", [(set (v4i32 V128:$dst), (Accum (v4i32 V128:$Rd), - (v4i32 (int_aarch64_neon_sqdmull (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 V128:$Rm)))))]>; + (v4i32 (int_aarch64_neon_sqdmull (extract_high_v8i16 (v8i16 V128:$Rn)), + (extract_high_v8i16 (v8i16 V128:$Rm))))))]>; def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied opc, string asm, asm#"2", ".2d", ".4s", ".4s", [(set (v2i64 V128:$dst), (Accum (v2i64 V128:$Rd), - (v2i64 (int_aarch64_neon_sqdmull (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 V128:$Rm)))))]>; + (v2i64 (int_aarch64_neon_sqdmull (extract_high_v4i32 (v4i32 V128:$Rn)), + (extract_high_v4i32 (v4i32 V128:$Rm))))))]>; } multiclass SIMDWideThreeVectorBHS opc, string asm, @@ -6679,7 +6735,7 @@ multiclass SIMDWideThreeVectorBHS opc, string asm, V128, V128, V128, asm#"2", ".8h", ".8h", ".16b", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), - (extract_high_v16i8 V128:$Rm)))]>; + (extract_high_v16i8 (v16i8 V128:$Rm))))]>; def v4i16_v4i32 : BaseSIMDDifferentThreeVector opc, string asm, V128, V128, V128, asm#"2", ".4s", ".4s", ".8h", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), - (extract_high_v8i16 V128:$Rm)))]>; + (extract_high_v8i16 (v8i16 V128:$Rm))))]>; def v2i32_v2i64 : BaseSIMDDifferentThreeVector opc, string asm, V128, V128, V128, asm#"2", ".2d", ".2d", ".4s", [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), - (extract_high_v4i32 V128:$Rm)))]>; + (extract_high_v4i32 (v4i32 V128:$Rm))))]>; } //---------------------------------------------------------------------------- @@ -6876,7 +6932,7 @@ multiclass SIMDThreeScalarHSTied opc, string asm> { multiclass SIMDFPThreeScalar opc, string asm, SDPatternOperator OpNode = null_frag, Predicate pred = HasNEON> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in { let Predicates = [pred] in { def NAME#64 : BaseSIMDThreeScalar; @@ -6895,7 +6951,7 @@ multiclass SIMDFPThreeScalar opc, string asm, multiclass SIMDThreeScalarFPCmp opc, string asm, SDPatternOperator OpNode = null_frag> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in { def NAME#64 : BaseSIMDThreeScalar; def NAME#32 : BaseSIMDThreeScalar size, bits<2> size2, bits<5> opcode, let Inst{4-0} = Rd; } +let mayRaiseFPException = 1 in class SIMDInexactCvtTwoScalar opcode, string asm> : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "", [(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>, @@ -7048,11 +7105,13 @@ multiclass SIMDCmpTwoScalarD opc, string asm, multiclass SIMDFPCmpTwoScalar opc, string asm, SDPatternOperator OpNode> { + let mayRaiseFPException = 1 in { def v1i64rz : BaseSIMDCmpTwoScalar; def v1i32rz : BaseSIMDCmpTwoScalar; let Predicates = [HasNEON, HasFullFP16] in { def v1i16rz : BaseSIMDCmpTwoScalar; } + } def : InstAlias(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>; @@ -7076,6 +7135,7 @@ multiclass SIMDTwoScalarD opc, string asm, (!cast(NAME # "v1i64") FPR64:$Rn)>; } +let mayRaiseFPException = 1 in multiclass SIMDFPTwoScalar opc, string asm, Predicate pred = HasNEON> { let Predicates = [pred] in { @@ -7087,6 +7147,7 @@ multiclass SIMDFPTwoScalar opc, string asm, } } +let mayRaiseFPException = 1 in multiclass SIMDFPTwoScalarCVT opc, string asm, SDPatternOperator OpNode> { def v1i64 : BaseSIMDTwoScalar opc, string asm> { asm, ".2d">; } +let mayRaiseFPException = 1 in multiclass SIMDFPPairwiseScalar opc, string asm> { let Predicates = [HasNEON, HasFullFP16] in { def v2i16p : BaseSIMDPairwiseScalar<0, {S,0}, opc, FPR16Op, V64, @@ -7232,6 +7294,7 @@ multiclass SIMDAcrossLanesHSD opcode, string asm> { asm, ".4s", []>; } +let mayRaiseFPException = 1 in multiclass SIMDFPAcrossLanes opcode, bit sz1, string asm, Intrinsic intOp> { let Predicates = [HasNEON, HasFullFP16] in { @@ -7351,7 +7414,7 @@ class SIMDMovAlias { let Inst{20-16} = 0b00001; } @@ -7398,7 +7461,7 @@ multiclass SMov { multiclass UMov { // UMOV with vector index of 0 are legal in Scalable Matrix Extension (SME) // streaming mode. - let Predicates = [HasNEONorStreamingSVE] in { + let Predicates = [HasNEONorSME] in { def vi8_idx0 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndex0> { let Inst{20-16} = 0b00001; } @@ -8048,6 +8111,7 @@ multiclass SIMDThreeSameVectorBF16DotI { ".2h", V128, v4f32, v8bf16>; } +let mayRaiseFPException = 1 in class SIMDBF16MLAL : BaseSIMDThreeSameVectorTied let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h}"); } +let mayRaiseFPException = 1 in class SIMDBF16MLALIndex : I<(outs V128:$dst), (ins V128:$Rd, V128:$Rn, V128_lo:$Rm, VectorIndexH:$idx), asm, @@ -8095,18 +8160,21 @@ class SIMDThreeSameVectorBF16MatrixMul ", $Rm", ".8h", "}"); } +let mayRaiseFPException = 1 in class SIMD_BFCVTN : BaseSIMDMixedTwoVector<0, 0, 0b10, 0b10110, V128, V128, "bfcvtn", ".4h", ".4s", [(set (v8bf16 V128:$Rd), (int_aarch64_neon_bfcvtn (v4f32 V128:$Rn)))]>; +let mayRaiseFPException = 1 in class SIMD_BFCVTN2 : BaseSIMDMixedTwoVectorTied<1, 0, 0b10, 0b10110, V128, V128, "bfcvtn2", ".8h", ".4s", [(set (v8bf16 V128:$dst), (int_aarch64_neon_bfcvtn2 (v8bf16 V128:$Rd), (v4f32 V128:$Rn)))]>; +let mayRaiseFPException = 1 in class BF16ToSinglePrecision : I<(outs FPR16:$Rd), (ins FPR32:$Rn), asm, "\t$Rd, $Rn", "", [(set (bf16 FPR16:$Rd), (int_aarch64_neon_bfcvt (f32 FPR32:$Rn)))]>, @@ -8160,6 +8228,7 @@ multiclass SIMDThreeSameVectorDotIndex size, string as } // ARMv8.2-A Fused Multiply Add-Long Instructions (Indexed) +let mayRaiseFPException = 1 in class BaseSIMDThreeSameVectorFMLIndex opc, string asm, string dst_kind, string lhs_kind, string rhs_kind, RegisterOperand RegType, @@ -8187,6 +8256,7 @@ multiclass SIMDThreeSameVectorFMLIndex opc, string asm, V128, v4f32, v8f16, OpNode>; } +let mayRaiseFPException = 1 in multiclass SIMDFPIndexed opc, string asm, SDPatternOperator OpNode> { let Predicates = [HasNEON, HasFullFP16] in { @@ -8369,6 +8439,7 @@ multiclass SIMDFPIndexedTiedPatterns { V128:$Rm, VectorIndexD:$idx)>; } +let mayRaiseFPException = 1 in multiclass SIMDFPIndexedTied opc, string asm> { let Predicates = [HasNEON, HasFullFP16] in { def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b00, opc, V64, V64, @@ -8701,9 +8772,8 @@ multiclass SIMDIndexedLongSD opc, string asm, V128_lo, VectorIndexH, asm#"2", ".4s", ".4s", ".8h", ".h", [(set (v4i32 V128:$Rd), - (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))]> { + (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)), + (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> { bits<3> idx; let Inst{11} = idx{2}; @@ -8728,9 +8798,8 @@ multiclass SIMDIndexedLongSD opc, string asm, V128, VectorIndexS, asm#"2", ".2d", ".2d", ".4s", ".s", [(set (v2i64 V128:$Rd), - (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { + (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)), + (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -8793,10 +8862,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied opc, string asm, [(set (v4i32 V128:$dst), (Accum (v4i32 V128:$Rd), (v4i32 (int_aarch64_neon_sqdmull - (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 - (AArch64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))))]> { + (extract_high_v8i16 (v8i16 V128:$Rn)), + (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))))]> { bits<3> idx; let Inst{11} = idx{2}; let Inst{21} = idx{1}; @@ -8825,10 +8892,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied opc, string asm, [(set (v2i64 V128:$dst), (Accum (v2i64 V128:$Rd), (v2i64 (int_aarch64_neon_sqdmull - (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 - (AArch64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))))]> { + (extract_high_v4i32 (v4i32 V128:$Rn)), + (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -8881,9 +8946,8 @@ multiclass SIMDVectorIndexedLongSD opc, string asm, V128_lo, VectorIndexH, asm#"2", ".4s", ".4s", ".8h", ".h", [(set (v4i32 V128:$Rd), - (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))]> { + (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)), + (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> { bits<3> idx; let Inst{11} = idx{2}; @@ -8908,9 +8972,8 @@ multiclass SIMDVectorIndexedLongSD opc, string asm, V128, VectorIndexS, asm#"2", ".2d", ".2d", ".4s", ".s", [(set (v2i64 V128:$Rd), - (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { + (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)), + (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -8940,9 +9003,8 @@ multiclass SIMDVectorIndexedLongSDTied opc, string asm, asm#"2", ".4s", ".4s", ".8h", ".h", [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), - (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))]> { + (extract_high_v8i16 (v8i16 V128:$Rn)), + (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> { bits<3> idx; let Inst{11} = idx{2}; let Inst{21} = idx{1}; @@ -8967,9 +9029,8 @@ multiclass SIMDVectorIndexedLongSDTied opc, string asm, asm#"2", ".2d", ".2d", ".4s", ".s", [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), - (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { + (extract_high_v4i32 (v4i32 V128:$Rn)), + (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -9654,7 +9715,7 @@ multiclass SIMDVectorLShiftLongBHSD opc, string asm, V128, V128, vecshiftL8, asm#"2", ".8h", ".16b", [(set (v8i16 V128:$Rd), - (OpNode (extract_high_v16i8 V128:$Rn), vecshiftL8:$imm))]> { + (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)), vecshiftL8:$imm))]> { bits<3> imm; let Inst{18-16} = imm; } @@ -9670,7 +9731,7 @@ multiclass SIMDVectorLShiftLongBHSD opc, string asm, V128, V128, vecshiftL16, asm#"2", ".4s", ".8h", [(set (v4i32 V128:$Rd), - (OpNode (extract_high_v8i16 V128:$Rn), vecshiftL16:$imm))]> { + (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)), vecshiftL16:$imm))]> { bits<4> imm; let Inst{19-16} = imm; @@ -9687,7 +9748,7 @@ multiclass SIMDVectorLShiftLongBHSD opc, string asm, V128, V128, vecshiftL32, asm#"2", ".2d", ".4s", [(set (v2i64 V128:$Rd), - (OpNode (extract_high_v4i32 V128:$Rn), vecshiftL32:$imm))]> { + (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)), vecshiftL32:$imm))]> { bits<5> imm; let Inst{20-16} = imm; } @@ -10671,7 +10732,7 @@ def complexrotateopodd : Operand, TImmLeaf= 0 && Imm < let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">; let PrintMethod = "printComplexRotationOp<180, 90>"; } -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in class BaseSIMDThreeSameVectorComplex size, bits<3> opcode, RegisterOperand regtype, Operand rottype, string asm, string kind, list pattern> @@ -10742,7 +10803,7 @@ multiclass SIMDThreeSameVectorComplexHSD opcode, Operand rottype, } } -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in class BaseSIMDThreeSameVectorTiedComplex size, bits<3> opcode, RegisterOperand regtype, @@ -10814,7 +10875,7 @@ multiclass SIMDThreeSameVectorTiedComplexHSD opcode, } } -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in class BaseSIMDIndexedTiedComplex size, bit opc1, bit opc2, RegisterOperand dst_reg, RegisterOperand lhs_reg, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index a9191924129c..835a7b6cc81d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -1094,7 +1095,10 @@ bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI, return true; default:; } - return isSEHInstruction(MI); + if (isSEHInstruction(MI)) + return true; + auto Next = std::next(MI.getIterator()); + return Next != MBB->end() && Next->isCFIInstruction(); } /// analyzeCompare - For a comparison instruction, return the source registers @@ -1435,7 +1439,7 @@ bool AArch64InstrInfo::optimizeCompareInstr( return false; const MCInstrDesc &MCID = get(NewOpc); CmpInstr.setDesc(MCID); - CmpInstr.RemoveOperand(DeadNZCVIdx); + CmpInstr.removeOperand(DeadNZCVIdx); bool succeeded = UpdateOperandRegClass(CmpInstr); (void)succeeded; assert(succeeded && "Some operands reg class are incompatible!"); @@ -1547,27 +1551,6 @@ findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) { } } -namespace { - -struct UsedNZCV { - bool N = false; - bool Z = false; - bool C = false; - bool V = false; - - UsedNZCV() = default; - - UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { - this->N |= UsedFlags.N; - this->Z |= UsedFlags.Z; - this->C |= UsedFlags.C; - this->V |= UsedFlags.V; - return *this; - } -}; - -} // end anonymous namespace - /// Find a condition code used by the instruction. /// Returns AArch64CC::Invalid if either the instruction does not use condition /// codes or we don't optimize CmpInstr in the presence of such instructions. @@ -1622,15 +1605,15 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { return UsedFlags; } -/// \returns Conditions flags used after \p CmpInstr in its MachineBB if they -/// are not containing C or V flags and NZCV flags are not alive in successors -/// of the same \p CmpInstr and \p MI parent. \returns None otherwise. +/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV +/// flags are not alive in successors of the same \p CmpInstr and \p MI parent. +/// \returns None otherwise. /// /// Collect instructions using that flags in \p CCUseInstrs if provided. -static Optional -examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, - const TargetRegisterInfo &TRI, - SmallVectorImpl *CCUseInstrs = nullptr) { +Optional +llvm::examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, + const TargetRegisterInfo &TRI, + SmallVectorImpl *CCUseInstrs) { MachineBasicBlock *CmpParent = CmpInstr.getParent(); if (MI.getParent() != CmpParent) return None; @@ -1652,8 +1635,6 @@ examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, if (Instr.modifiesRegister(AArch64::NZCV, &TRI)) break; } - if (NZCVUsedAfterCmp.C || NZCVUsedAfterCmp.V) - return None; return NZCVUsedAfterCmp; } @@ -1684,7 +1665,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode)) return false; - if (!examineCFlagsUse(MI, CmpInstr, TRI)) + Optional NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI); + if (!NZVCUsed || NZVCUsed->C || NZVCUsed->V) return false; AccessKind AccessToCheck = AK_Write; @@ -1773,7 +1755,7 @@ static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs); // Condition flags are not used in CmpInstr basic block successors and only // Z or N flags allowed to be used after CmpInstr within its basic block - if (!NZCVUsedAfterCmp) + if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V) return false; // Z or N flag used after CmpInstr must correspond to the flag used in MI if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) || @@ -2270,6 +2252,19 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::LD1SW_D_IMM: case AArch64::LD1D_IMM: + case AArch64::LD2B_IMM: + case AArch64::LD2H_IMM: + case AArch64::LD2W_IMM: + case AArch64::LD2D_IMM: + case AArch64::LD3B_IMM: + case AArch64::LD3H_IMM: + case AArch64::LD3W_IMM: + case AArch64::LD3D_IMM: + case AArch64::LD4B_IMM: + case AArch64::LD4H_IMM: + case AArch64::LD4W_IMM: + case AArch64::LD4D_IMM: + case AArch64::ST1B_IMM: case AArch64::ST1B_H_IMM: case AArch64::ST1B_S_IMM: @@ -2281,6 +2276,19 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::ST1W_D_IMM: case AArch64::ST1D_IMM: + case AArch64::ST2B_IMM: + case AArch64::ST2H_IMM: + case AArch64::ST2W_IMM: + case AArch64::ST2D_IMM: + case AArch64::ST3B_IMM: + case AArch64::ST3H_IMM: + case AArch64::ST3W_IMM: + case AArch64::ST3D_IMM: + case AArch64::ST4B_IMM: + case AArch64::ST4H_IMM: + case AArch64::ST4W_IMM: + case AArch64::ST4D_IMM: + case AArch64::LD1RB_IMM: case AArch64::LD1RB_H_IMM: case AArch64::LD1RB_S_IMM: @@ -2897,6 +2905,45 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, MinOffset = -8; MaxOffset = 7; break; + case AArch64::LD2B_IMM: + case AArch64::LD2H_IMM: + case AArch64::LD2W_IMM: + case AArch64::LD2D_IMM: + case AArch64::ST2B_IMM: + case AArch64::ST2H_IMM: + case AArch64::ST2W_IMM: + case AArch64::ST2D_IMM: + Scale = TypeSize::Scalable(32); + Width = SVEMaxBytesPerVector * 2; + MinOffset = -8; + MaxOffset = 7; + break; + case AArch64::LD3B_IMM: + case AArch64::LD3H_IMM: + case AArch64::LD3W_IMM: + case AArch64::LD3D_IMM: + case AArch64::ST3B_IMM: + case AArch64::ST3H_IMM: + case AArch64::ST3W_IMM: + case AArch64::ST3D_IMM: + Scale = TypeSize::Scalable(48); + Width = SVEMaxBytesPerVector * 3; + MinOffset = -8; + MaxOffset = 7; + break; + case AArch64::LD4B_IMM: + case AArch64::LD4H_IMM: + case AArch64::LD4W_IMM: + case AArch64::LD4D_IMM: + case AArch64::ST4B_IMM: + case AArch64::ST4H_IMM: + case AArch64::ST4W_IMM: + case AArch64::ST4D_IMM: + Scale = TypeSize::Scalable(64); + Width = SVEMaxBytesPerVector * 4; + MinOffset = -8; + MaxOffset = 7; + break; case AArch64::LD1B_H_IMM: case AArch64::LD1SB_H_IMM: case AArch64::LD1H_S_IMM: @@ -3105,6 +3152,86 @@ bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) { return isPreLd(MI) || isPreSt(MI); } +bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case AArch64::LDPSi: + case AArch64::LDPSWi: + case AArch64::LDPDi: + case AArch64::LDPQi: + case AArch64::LDPWi: + case AArch64::LDPXi: + case AArch64::STPSi: + case AArch64::STPDi: + case AArch64::STPQi: + case AArch64::STPWi: + case AArch64::STPXi: + case AArch64::STGPi: + return true; + } +} + +const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) { + unsigned Idx = + AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2 + : 1; + return MI.getOperand(Idx); +} + +const MachineOperand & +AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) { + unsigned Idx = + AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3 + : 2; + return MI.getOperand(Idx); +} + +static const TargetRegisterClass *getRegClass(const MachineInstr &MI, + Register Reg) { + if (MI.getParent() == nullptr) + return nullptr; + const MachineFunction *MF = MI.getParent()->getParent(); + return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr; +} + +bool AArch64InstrInfo::isQForm(const MachineInstr &MI) { + auto IsQFPR = [&](const MachineOperand &Op) { + if (!Op.isReg()) + return false; + auto Reg = Op.getReg(); + if (Reg.isPhysical()) + return AArch64::FPR128RegClass.contains(Reg); + const TargetRegisterClass *TRC = ::getRegClass(MI, Reg); + return TRC == &AArch64::FPR128RegClass || + TRC == &AArch64::FPR128_loRegClass; + }; + return llvm::any_of(MI.operands(), IsQFPR); +} + +bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) { + auto IsFPR = [&](const MachineOperand &Op) { + if (!Op.isReg()) + return false; + auto Reg = Op.getReg(); + if (Reg.isPhysical()) + return AArch64::FPR128RegClass.contains(Reg) || + AArch64::FPR64RegClass.contains(Reg) || + AArch64::FPR32RegClass.contains(Reg) || + AArch64::FPR16RegClass.contains(Reg) || + AArch64::FPR8RegClass.contains(Reg); + + const TargetRegisterClass *TRC = ::getRegClass(MI, Reg); + return TRC == &AArch64::FPR128RegClass || + TRC == &AArch64::FPR128_loRegClass || + TRC == &AArch64::FPR64RegClass || + TRC == &AArch64::FPR64_loRegClass || + TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass || + TRC == &AArch64::FPR8RegClass; + }; + return llvm::any_of(MI.operands(), IsFPR); +} + // Scale the unscaled offsets. Returns false if the unscaled offset can't be // scaled. static bool scaleOffset(unsigned Opc, int64_t &Offset) { @@ -3370,7 +3497,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a Predicate register by ORRing with itself. if (AArch64::PPRRegClass.contains(DestReg) && AArch64::PPRRegClass.contains(SrcReg)) { - assert(Subtarget.hasSVE() && "Unexpected SVE register."); + assert((Subtarget.hasSVE() || Subtarget.hasSME()) && + "Unexpected SVE register."); BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg) .addReg(SrcReg) // Pg .addReg(SrcReg) @@ -3381,7 +3509,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a Z register by ORRing with itself. if (AArch64::ZPRRegClass.contains(DestReg) && AArch64::ZPRRegClass.contains(SrcReg)) { - assert(Subtarget.hasSVE() && "Unexpected SVE register."); + assert((Subtarget.hasSVE() || Subtarget.hasSME()) && + "Unexpected SVE register."); BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg) .addReg(SrcReg) .addReg(SrcReg, getKillRegState(KillSrc)); @@ -3391,6 +3520,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a Z register pair by copying the individual sub-registers. if (AArch64::ZPR2RegClass.contains(DestReg) && AArch64::ZPR2RegClass.contains(SrcReg)) { + assert((Subtarget.hasSVE() || Subtarget.hasSME()) && + "Unexpected SVE register."); static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, Indices); @@ -3400,6 +3531,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a Z register triple by copying the individual sub-registers. if (AArch64::ZPR3RegClass.contains(DestReg) && AArch64::ZPR3RegClass.contains(SrcReg)) { + assert((Subtarget.hasSVE() || Subtarget.hasSME()) && + "Unexpected SVE register."); static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, AArch64::zsub2}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, @@ -3410,6 +3543,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a Z register quad by copying the individual sub-registers. if (AArch64::ZPR4RegClass.contains(DestReg) && AArch64::ZPR4RegClass.contains(SrcReg)) { + assert((Subtarget.hasSVE() || Subtarget.hasSME()) && + "Unexpected SVE register."); static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, AArch64::zsub2, AArch64::zsub3}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, @@ -3979,6 +4114,119 @@ void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( } } +// Convenience function to create a DWARF expression for +// Expr + NumBytes + NumVGScaledBytes * AArch64::VG +static void appendVGScaledOffsetExpr(SmallVectorImpl &Expr, int NumBytes, + int NumVGScaledBytes, unsigned VG, + llvm::raw_string_ostream &Comment) { + uint8_t buffer[16]; + + if (NumBytes) { + Expr.push_back(dwarf::DW_OP_consts); + Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer)); + Expr.push_back((uint8_t)dwarf::DW_OP_plus); + Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes); + } + + if (NumVGScaledBytes) { + Expr.push_back((uint8_t)dwarf::DW_OP_consts); + Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer)); + + Expr.push_back((uint8_t)dwarf::DW_OP_bregx); + Expr.append(buffer, buffer + encodeULEB128(VG, buffer)); + Expr.push_back(0); + + Expr.push_back((uint8_t)dwarf::DW_OP_mul); + Expr.push_back((uint8_t)dwarf::DW_OP_plus); + + Comment << (NumVGScaledBytes < 0 ? " - " : " + ") + << std::abs(NumVGScaledBytes) << " * VG"; + } +} + +// Creates an MCCFIInstruction: +// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr } +static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, + unsigned Reg, + const StackOffset &Offset) { + int64_t NumBytes, NumVGScaledBytes; + AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes, + NumVGScaledBytes); + std::string CommentBuffer; + llvm::raw_string_ostream Comment(CommentBuffer); + + if (Reg == AArch64::SP) + Comment << "sp"; + else if (Reg == AArch64::FP) + Comment << "fp"; + else + Comment << printReg(Reg, &TRI); + + // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG) + SmallString<64> Expr; + unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); + Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); + Expr.push_back(0); + appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes, + TRI.getDwarfRegNum(AArch64::VG, true), Comment); + + // Wrap this into DW_CFA_def_cfa. + SmallString<64> DefCfaExpr; + DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); + uint8_t buffer[16]; + DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer)); + DefCfaExpr.append(Expr.str()); + return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), + Comment.str()); +} + +MCCFIInstruction llvm::createDefCFA(const TargetRegisterInfo &TRI, + unsigned FrameReg, unsigned Reg, + const StackOffset &Offset, + bool LastAdjustmentWasScalable) { + if (Offset.getScalable()) + return createDefCFAExpression(TRI, Reg, Offset); + + if (FrameReg == Reg && !LastAdjustmentWasScalable) + return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed())); + + unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); + return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed()); +} + +MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI, + unsigned Reg, + const StackOffset &OffsetFromDefCFA) { + int64_t NumBytes, NumVGScaledBytes; + AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( + OffsetFromDefCFA, NumBytes, NumVGScaledBytes); + + unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); + + // Non-scalable offsets can use DW_CFA_offset directly. + if (!NumVGScaledBytes) + return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes); + + std::string CommentBuffer; + llvm::raw_string_ostream Comment(CommentBuffer); + Comment << printReg(Reg, &TRI) << " @ cfa"; + + // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG) + SmallString<64> OffsetExpr; + appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes, + TRI.getDwarfRegNum(AArch64::VG, true), Comment); + + // Wrap this into DW_CFA_expression + SmallString<64> CfaExpr; + CfaExpr.push_back(dwarf::DW_CFA_expression); + uint8_t buffer[16]; + CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); + CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer)); + CfaExpr.append(OffsetExpr.str()); + + return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), Comment.str()); +} + // Helper function to emit a frame offset adjustment from a given // pointer (SrcReg), stored into DestReg. This function is explicit // in that it requires the opcode. @@ -3988,7 +4236,8 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, - bool *HasWinCFI) { + bool *HasWinCFI, bool EmitCFAOffset, + StackOffset CFAOffset, unsigned FrameReg) { int Sign = 1; unsigned MaxEncoding, ShiftSize; switch (Opc) { @@ -4013,6 +4262,13 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB, llvm_unreachable("Unsupported opcode"); } + // `Offset` can be in bytes or in "scalable bytes". + int VScale = 1; + if (Opc == AArch64::ADDVL_XXI) + VScale = 16; + else if (Opc == AArch64::ADDPL_XXI) + VScale = 2; + // FIXME: If the offset won't fit in 24-bits, compute the offset into a // scratch register. If DestReg is a virtual register, use it as the // scratch register; otherwise, create a new virtual register (to be @@ -4050,6 +4306,26 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB, AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize)); MBI = MBI.setMIFlag(Flag); + auto Change = + VScale == 1 + ? StackOffset::getFixed(ThisVal << LocalShiftSize) + : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize)); + if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri) + CFAOffset += Change; + else + CFAOffset -= Change; + if (EmitCFAOffset && DestReg == TmpReg) { + MachineFunction &MF = *MBB.getParent(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + + unsigned CFIIndex = MF.addFrameInst( + createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(Flag); + } + if (NeedsWinCFI) { assert(Sign == 1 && "SEH directives should always have a positive sign"); int Imm = (int)(ThisVal << LocalShiftSize); @@ -4086,7 +4362,9 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool SetNZCV, - bool NeedsWinCFI, bool *HasWinCFI) { + bool NeedsWinCFI, bool *HasWinCFI, + bool EmitCFAOffset, StackOffset CFAOffset, + unsigned FrameReg) { int64_t Bytes, NumPredicateVectors, NumDataVectors; AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( Offset, Bytes, NumPredicateVectors, NumDataVectors); @@ -4101,8 +4379,13 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri; } emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag, - NeedsWinCFI, HasWinCFI); + NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset, + FrameReg); + CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri) + ? StackOffset::getFixed(-Bytes) + : StackOffset::getFixed(Bytes); SrcReg = DestReg; + FrameReg = DestReg; } assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) && @@ -4112,14 +4395,17 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, if (NumDataVectors) { emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors, - AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr); + AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr, + EmitCFAOffset, CFAOffset, FrameReg); + CFAOffset += StackOffset::getScalable(-NumDataVectors * 16); SrcReg = DestReg; } if (NumPredicateVectors) { assert(DestReg != AArch64::SP && "Unaligned access to SP"); emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors, - AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr); + AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr, + EmitCFAOffset, CFAOffset, FrameReg); } } @@ -4151,6 +4437,9 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); return nullptr; } + // Nothing can folded with copy from/to NZCV. + if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV) + return nullptr; } // Handle the case where a copy is being spilled or filled but the source @@ -4577,6 +4866,10 @@ static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, return false; } + if (isCombineInstrSettingFlag(CombineOpc) && + MI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) + return false; + return true; } @@ -4919,6 +5212,10 @@ static bool getFMULPatterns(MachineInstr &Root, MachineInstr *MI = nullptr; if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) MI = MRI.getUniqueVRegDef(MO.getReg()); + // Ignore No-op COPYs in FMUL(COPY(DUP(..))) + if (MI && MI->getOpcode() == TargetOpcode::COPY && + MI->getOperand(1).getReg().isVirtual()) + MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg()); if (MI && MI->getOpcode() == Opcode) { Patterns.push_back(Pattern); return true; @@ -5073,6 +5370,42 @@ bool AArch64InstrInfo::isThroughputPattern( } // end switch (Pattern) return false; } + +/// Find other MI combine patterns. +static bool getMiscPatterns(MachineInstr &Root, + SmallVectorImpl &Patterns) +{ + // A - (B + C) ==> (A - B) - C or (A - C) - B + unsigned Opc = Root.getOpcode(); + MachineBasicBlock &MBB = *Root.getParent(); + + switch (Opc) { + case AArch64::SUBWrr: + case AArch64::SUBSWrr: + case AArch64::SUBXrr: + case AArch64::SUBSXrr: + // Found candidate root. + break; + default: + return false; + } + + if (isCombineInstrSettingFlag(Opc) && + Root.findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) + return false; + + if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) || + canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) || + canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) || + canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) { + Patterns.push_back(MachineCombinerPattern::SUBADD_OP1); + Patterns.push_back(MachineCombinerPattern::SUBADD_OP2); + return true; + } + + return false; +} + /// Return true when there is potentially a faster code sequence for an /// instruction chain ending in \p Root. All potential patterns are listed in /// the \p Pattern vector. Pattern should be sorted in priority order since the @@ -5090,6 +5423,10 @@ bool AArch64InstrInfo::getMachineCombinerPatterns( if (getFMAPatterns(Root, Patterns)) return true; + // Other patterns + if (getMiscPatterns(Root, Patterns)) + return true; + return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, DoRegPressureReduce); } @@ -5190,6 +5527,9 @@ genIndexedMultiply(MachineInstr &Root, MachineInstr *Dup = MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg()); + if (Dup->getOpcode() == TargetOpcode::COPY) + Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg()); + Register DupSrcReg = Dup->getOperand(1).getReg(); MRI.clearKillFlags(DupSrcReg); MRI.constrainRegClass(DupSrcReg, RC); @@ -5337,6 +5677,53 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, return MUL; } +/// Do the following transformation +/// A - (B + C) ==> (A - B) - C +/// A - (B + C) ==> (A - C) - B +static void +genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, MachineInstr &Root, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + unsigned IdxOpd1, + DenseMap &InstrIdxForVirtReg) { + assert(IdxOpd1 == 1 || IdxOpd1 == 2); + unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1; + MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); + + Register ResultReg = Root.getOperand(0).getReg(); + Register RegA = Root.getOperand(1).getReg(); + bool RegAIsKill = Root.getOperand(1).isKill(); + Register RegB = AddMI->getOperand(IdxOpd1).getReg(); + bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill(); + Register RegC = AddMI->getOperand(IdxOtherOpd).getReg(); + bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill(); + Register NewVR = MRI.createVirtualRegister(MRI.getRegClass(RegA)); + + unsigned Opcode = Root.getOpcode(); + if (Opcode == AArch64::SUBSWrr) + Opcode = AArch64::SUBWrr; + else if (Opcode == AArch64::SUBSXrr) + Opcode = AArch64::SUBXrr; + else + assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) && + "Unexpected instruction opcode."); + + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(Opcode), NewVR) + .addReg(RegA, getKillRegState(RegAIsKill)) + .addReg(RegB, getKillRegState(RegBIsKill)); + MachineInstrBuilder MIB2 = + BuildMI(MF, Root.getDebugLoc(), TII->get(Opcode), ResultReg) + .addReg(NewVR, getKillRegState(true)) + .addReg(RegC, getKillRegState(RegCIsKill)); + + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + InsInstrs.push_back(MIB1); + InsInstrs.push_back(MIB2); + DelInstrs.push_back(AddMI); +} + /// When getMachineCombinerPatterns() finds potential patterns, /// this function generates the instructions that could replace the /// original code sequence @@ -5359,6 +5746,18 @@ void AArch64InstrInfo::genAlternativeCodeSequence( TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg); return; + case MachineCombinerPattern::SUBADD_OP1: + // A - (B + C) + // ==> (A - B) - C + genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1, + InstrIdxForVirtReg); + break; + case MachineCombinerPattern::SUBADD_OP2: + // A - (B + C) + // ==> (A - C) - B + genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2, + InstrIdxForVirtReg); + break; case MachineCombinerPattern::MULADDW_OP1: case MachineCombinerPattern::MULADDX_OP1: // MUL I=A,B,0 @@ -6214,6 +6613,14 @@ void AArch64InstrInfo::genAlternativeCodeSequence( if (MUL) DelInstrs.push_back(MUL); DelInstrs.push_back(&Root); + + // Set the flags on the inserted instructions to be the merged flags of the + // instructions that we have combined. + uint16_t Flags = Root.getFlags(); + if (MUL) + Flags = Root.mergeFlagsWith(*MUL); + for (auto *MI : InsInstrs) + MI->setFlags(Flags); } /// Replace csincr-branch sequence by simple conditional branch @@ -6526,13 +6933,12 @@ enum MachineOutlinerMBBFlags { UnsafeRegsDead = 0x8 }; -unsigned -AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { - assert(C.LRUWasSet && "LRU wasn't set?"); +Register +AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const { MachineFunction *MF = C.getMF(); - const AArch64RegisterInfo *ARI = static_cast( - MF->getSubtarget().getRegisterInfo()); - + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + const AArch64RegisterInfo *ARI = + static_cast(&TRI); // Check if there is an available register across the sequence that we can // use. for (unsigned Reg : AArch64::GPR64RegClass) { @@ -6540,12 +6946,11 @@ AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { Reg != AArch64::LR && // LR is not reserved, but don't use it. Reg != AArch64::X16 && // X16 is not guaranteed to be preserved. Reg != AArch64::X17 && // Ditto for X17. - C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) + C.isAvailableAcrossAndOutOfSeq(Reg, TRI) && + C.isAvailableInsideSeq(Reg, TRI)) return Reg; } - - // No suitable register. Return 0. - return 0u; + return Register(); } static bool @@ -6691,10 +7096,8 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( unsigned FlagsSetInAll = 0xF; // Compute liveness information for each candidate, and set FlagsSetInAll. - std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), - [&FlagsSetInAll](outliner::Candidate &C) { - FlagsSetInAll &= C.Flags; - }); + for (outliner::Candidate &C : RepeatedSequenceLocs) + FlagsSetInAll &= C.Flags; // According to the AArch64 Procedure Call Standard, the following are // undefined on entry/exit from a function call: @@ -6712,10 +7115,8 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // to compute liveness here. if (C.Flags & UnsafeRegsDead) return false; - C.initLRU(TRI); - LiveRegUnits LRU = C.LRU; - return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) || - !LRU.available(AArch64::NZCV)); + return C.isAnyUnavailableAcrossOrOutOfSeq( + {AArch64::W16, AArch64::W17, AArch64::NZCV}, TRI); }; // Are there any candidates where those registers are live? @@ -6752,12 +7153,10 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // We check to see if CFI Instructions are present, and if they are // we find the number of CFI Instructions in the candidates. unsigned CFICount = 0; - MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front(); - for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx(); - Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) { - if (MBBI->isCFIInstruction()) + for (auto &I : make_range(RepeatedSequenceLocs[0].front(), + std::next(RepeatedSequenceLocs[0].back()))) { + if (I.isCFIInstruction()) CFICount++; - MBBI++; } // We compare the number of found CFI Instructions to the number of CFI @@ -6860,8 +7259,6 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // Check if we have to save LR. for (outliner::Candidate &C : RepeatedSequenceLocs) { - C.initLRU(TRI); - // If we have a noreturn caller, then we're going to be conservative and // say that we have to save LR. If we don't have a ret at the end of the // block, then we can't reason about liveness accurately. @@ -6872,7 +7269,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn); // Is LR available? If so, we don't need a save. - if (C.LRU.available(AArch64::LR) && !IsNoReturn) { + if (C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) && !IsNoReturn) { NumBytesNoStackCalls += 4; C.setCallInfo(MachineOutlinerNoLRSave, 4); CandidatesWithoutStackFixups.push_back(C); @@ -6888,7 +7285,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // Is SP used in the sequence at all? If not, we don't have to modify // the stack, so we are guaranteed to get the same frame. - else if (C.UsedInSequence.available(AArch64::SP)) { + else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) { NumBytesNoStackCalls += 12; C.setCallInfo(MachineOutlinerDefault, 12); CandidatesWithoutStackFixups.push_back(C); @@ -6957,11 +7354,12 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // LR to (ie one extra stack save/restore). // if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { - erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) { + erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) { return (std::any_of( C.front(), std::next(C.back()), [](const MachineInstr &MI) { return MI.isCall(); })) && - (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C)); + (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) || + !findRegisterToSaveLRTo(C)); }); } } @@ -7032,7 +7430,7 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( // modify the stack. Check if hasRedZone is true or unknown; if yes, don't // outline from it. AArch64FunctionInfo *AFI = MF.getInfo(); - if (!AFI || AFI->hasRedZone().getValueOr(true)) + if (!AFI || AFI->hasRedZone().value_or(true)) return false; // FIXME: Teach the outliner to generate/handle Windows unwind info. @@ -7053,8 +7451,8 @@ bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, "Suitable Machine Function for outlining must track liveness"); LiveRegUnits LRU(getRegisterInfo()); - std::for_each(MBB.rbegin(), MBB.rend(), - [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); + for (MachineInstr &MI : llvm::reverse(MBB)) + LRU.accumulate(MI); // Check if each of the unsafe registers are available... bool W16AvailableInBlock = LRU.available(AArch64::W16); @@ -7333,14 +7731,17 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, .addReg(AArch64::SP, RegState::InternalRead); MI.setMIFlag(MachineInstr::FrameSetup); - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); - BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); + if (MF.getInfo()->needsDwarfUnwindInfo()) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); + BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } // If v8.3a features are available we can replace a RET instruction by - // RETAA or RETAB and omit the AUT instructions + // RETAA or RETAB and omit the AUT instructions. In this case the + // DW_CFA_AARCH64_negate_ra_state can't be emitted. if (Subtarget.hasPAuth() && MBBAUT != MBB.end() && MBBAUT->getOpcode() == AArch64::RET) { BuildMI(MBB, MBBAUT, DL, @@ -7353,6 +7754,11 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP : AArch64::AUTIBSP)) .setMIFlag(MachineInstr::FrameDestroy); + unsigned CFIIndexAuth = + MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); + BuildMI(MBB, MBBAUT, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndexAuth) + .setMIFlags(MachineInstr::FrameDestroy); } } } @@ -7424,24 +7830,26 @@ void AArch64InstrInfo::buildOutlinedFrame( .addImm(-16); It = MBB.insert(It, STRXpre); - const TargetSubtargetInfo &STI = MF.getSubtarget(); - const MCRegisterInfo *MRI = STI.getRegisterInfo(); - unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true); - - // Add a CFI saying the stack was moved 16 B down. - int64_t StackPosEntry = - MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16)); - BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) - .addCFIIndex(StackPosEntry) - .setMIFlags(MachineInstr::FrameSetup); - - // Add a CFI saying that the LR that we want to find is now 16 B higher than - // before. - int64_t LRPosEntry = - MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, -16)); - BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) - .addCFIIndex(LRPosEntry) - .setMIFlags(MachineInstr::FrameSetup); + if (MF.getInfo()->needsDwarfUnwindInfo()) { + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const MCRegisterInfo *MRI = STI.getRegisterInfo(); + unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true); + + // Add a CFI saying the stack was moved 16 B down. + int64_t StackPosEntry = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16)); + BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) + .addCFIIndex(StackPosEntry) + .setMIFlags(MachineInstr::FrameSetup); + + // Add a CFI saying that the LR that we want to find is now 16 B higher + // than before. + int64_t LRPosEntry = MF.addFrameInst( + MCCFIInstruction::createOffset(nullptr, DwarfReg, -16)); + BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) + .addCFIIndex(LRPosEntry) + .setMIFlags(MachineInstr::FrameSetup); + } // Insert a restore before the terminator for the function. MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) @@ -7495,7 +7903,7 @@ void AArch64InstrInfo::buildOutlinedFrame( MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, - MachineFunction &MF, const outliner::Candidate &C) const { + MachineFunction &MF, outliner::Candidate &C) const { // Are we tail calling? if (C.CallConstructionID == MachineOutlinerTailCall) { @@ -7526,8 +7934,8 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( if (C.CallConstructionID == MachineOutlinerRegSave) { // FIXME: This logic should be sunk into a target-specific interface so that // we don't have to recompute the register. - unsigned Reg = findRegisterToSaveLRTo(C); - assert(Reg != 0 && "No callee-saved register available?"); + Register Reg = findRegisterToSaveLRTo(C); + assert(Reg && "No callee-saved register available?"); // LR has to be a live in so that we can save it. if (!MBB.isLiveIn(AArch64::LR)) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 1054bea40e68..b7a6ac301cdc 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -103,6 +103,21 @@ public: /// Returns whether the instruction is a pre-indexed load/store. static bool isPreLdSt(const MachineInstr &MI); + /// Returns whether the instruction is a paired load/store. + static bool isPairedLdSt(const MachineInstr &MI); + + /// Returns the base register operator of a load/store. + static const MachineOperand &getLdStBaseOp(const MachineInstr &MI); + + /// Returns the the immediate offset operator of a load/store. + static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI); + + /// Returns whether the instruction is FP or NEON. + static bool isFpOrNEON(const MachineInstr &MI); + + /// Returns whether the instruction is in Q form (128 bit operands) + static bool isQForm(const MachineInstr &MI); + /// Returns the index for the immediate for a given instruction. static unsigned getLoadStoreImmIdx(unsigned Opc); @@ -283,7 +298,7 @@ public: MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - const outliner::Candidate &C) const override; + outliner::Candidate &C) const override; bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; /// Returns the vector element size (B, H, S or D) of an SVE opcode. uint64_t getElementSizeForOpcode(unsigned Opc) const; @@ -347,7 +362,7 @@ private: /// Returns an unused general-purpose register which can be used for /// constructing an outlined call if one exists. Returns 0 otherwise. - unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; + Register findRegisterToSaveLRTo(outliner::Candidate &C) const; /// Remove a ptest of a predicate-generating operation that already sets, or /// can be made to set, the condition codes in an identical manner @@ -356,12 +371,45 @@ private: const MachineRegisterInfo *MRI) const; }; +struct UsedNZCV { + bool N = false; + bool Z = false; + bool C = false; + bool V = false; + + UsedNZCV() = default; + + UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { + this->N |= UsedFlags.N; + this->Z |= UsedFlags.Z; + this->C |= UsedFlags.C; + this->V |= UsedFlags.V; + return *this; + } +}; + +/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV +/// flags are not alive in successors of the same \p CmpInstr and \p MI parent. +/// \returns None otherwise. +/// +/// Collect instructions using that flags in \p CCUseInstrs if provided. +Optional +examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, + const TargetRegisterInfo &TRI, + SmallVectorImpl *CCUseInstrs = nullptr); + /// Return true if there is an instruction /after/ \p DefMI and before \p UseMI /// which either reads or clobbers NZCV. bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI); +MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, + unsigned Reg, const StackOffset &Offset, + bool LastAdjustmentWasScalable = true); +MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, + const StackOffset &OffsetFromDefCFA); + /// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg /// plus Offset. This is intended to be used from within the prolog/epilog /// insertion (PEI) pass, where a virtual scratch register may be allocated @@ -371,7 +419,9 @@ void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag = MachineInstr::NoFlags, bool SetNZCV = false, bool NeedsWinCFI = false, - bool *HasWinCFI = nullptr); + bool *HasWinCFI = nullptr, bool EmitCFAOffset = false, + StackOffset InitialOffset = {}, + unsigned FrameReg = AArch64::SP); /// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the /// FP. Return false if the offset could not be handled directly in MI, and diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 83bf89ff97c5..3802a45ad6c1 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -14,196 +14,196 @@ // ARM Instruction Predicate Definitions. // def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, - AssemblerPredicate<(all_of HasV8_1aOps), "armv8.1a">; + AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">; def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, - AssemblerPredicate<(all_of HasV8_2aOps), "armv8.2a">; + AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">; def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, - AssemblerPredicate<(all_of HasV8_3aOps), "armv8.3a">; + AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">; def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, - AssemblerPredicate<(all_of HasV8_4aOps), "armv8.4a">; + AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">; def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, - AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">; + AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">; def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">, - AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">; + AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">; def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">, - AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">; + AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">; def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">, - AssemblerPredicate<(all_of HasV9_0aOps), "armv9-a">; + AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">; def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">, - AssemblerPredicate<(all_of HasV9_1aOps), "armv9.1a">; + AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">; def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">, - AssemblerPredicate<(all_of HasV9_2aOps), "armv9.2a">; + AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">; def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">, - AssemblerPredicate<(all_of HasV9_3aOps), "armv9.3a">; + AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">; def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">, - AssemblerPredicate<(all_of HasV8_0rOps), "armv8-r">; + AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">; def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">, - AssemblerPredicate<(all_of FeatureEL2VMSA), "el2vmsa">; + AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">; def HasEL3 : Predicate<"Subtarget->hasEL3()">, - AssemblerPredicate<(all_of FeatureEL3), "el3">; + AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">; def HasVH : Predicate<"Subtarget->hasVH()">, - AssemblerPredicate<(all_of FeatureVH), "vh">; + AssemblerPredicateWithAll<(all_of FeatureVH), "vh">; def HasLOR : Predicate<"Subtarget->hasLOR()">, - AssemblerPredicate<(all_of FeatureLOR), "lor">; + AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">; def HasPAuth : Predicate<"Subtarget->hasPAuth()">, - AssemblerPredicate<(all_of FeaturePAuth), "pauth">; + AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">; def HasJS : Predicate<"Subtarget->hasJS()">, - AssemblerPredicate<(all_of FeatureJS), "jsconv">; + AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">; def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">, - AssemblerPredicate<(all_of FeatureCCIDX), "ccidx">; + AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">; def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">, - AssemblerPredicate<(all_of FeatureComplxNum), "complxnum">; + AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">; def HasNV : Predicate<"Subtarget->hasNV()">, - AssemblerPredicate<(all_of FeatureNV), "nv">; + AssemblerPredicateWithAll<(all_of FeatureNV), "nv">; def HasMPAM : Predicate<"Subtarget->hasMPAM()">, - AssemblerPredicate<(all_of FeatureMPAM), "mpam">; + AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">; def HasDIT : Predicate<"Subtarget->hasDIT()">, - AssemblerPredicate<(all_of FeatureDIT), "dit">; + AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">; def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">, - AssemblerPredicate<(all_of FeatureTRACEV8_4), "tracev8.4">; + AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">; def HasAM : Predicate<"Subtarget->hasAM()">, - AssemblerPredicate<(all_of FeatureAM), "am">; + AssemblerPredicateWithAll<(all_of FeatureAM), "am">; def HasSEL2 : Predicate<"Subtarget->hasSEL2()">, - AssemblerPredicate<(all_of FeatureSEL2), "sel2">; + AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">; def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, - AssemblerPredicate<(all_of FeatureTLB_RMI), "tlb-rmi">; + AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">; def HasFlagM : Predicate<"Subtarget->hasFlagM()">, - AssemblerPredicate<(all_of FeatureFlagM), "flagm">; + AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">; def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">, - AssemblerPredicate<(all_of FeatureRCPC_IMMO), "rcpc-immo">; + AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">; def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, - AssemblerPredicate<(all_of FeatureFPARMv8), "fp-armv8">; + AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">; def HasNEON : Predicate<"Subtarget->hasNEON()">, - AssemblerPredicate<(all_of FeatureNEON), "neon">; + AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">; def HasCrypto : Predicate<"Subtarget->hasCrypto()">, - AssemblerPredicate<(all_of FeatureCrypto), "crypto">; + AssemblerPredicateWithAll<(all_of FeatureCrypto), "crypto">; def HasSM4 : Predicate<"Subtarget->hasSM4()">, - AssemblerPredicate<(all_of FeatureSM4), "sm4">; + AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">; def HasSHA3 : Predicate<"Subtarget->hasSHA3()">, - AssemblerPredicate<(all_of FeatureSHA3), "sha3">; + AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">; def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, - AssemblerPredicate<(all_of FeatureSHA2), "sha2">; + AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">; def HasAES : Predicate<"Subtarget->hasAES()">, - AssemblerPredicate<(all_of FeatureAES), "aes">; + AssemblerPredicateWithAll<(all_of FeatureAES), "aes">; def HasDotProd : Predicate<"Subtarget->hasDotProd()">, - AssemblerPredicate<(all_of FeatureDotProd), "dotprod">; + AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">; def HasCRC : Predicate<"Subtarget->hasCRC()">, - AssemblerPredicate<(all_of FeatureCRC), "crc">; + AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">; def HasLSE : Predicate<"Subtarget->hasLSE()">, - AssemblerPredicate<(all_of FeatureLSE), "lse">; + AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">; def HasNoLSE : Predicate<"!Subtarget->hasLSE()">; def HasRAS : Predicate<"Subtarget->hasRAS()">, - AssemblerPredicate<(all_of FeatureRAS), "ras">; + AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">; def HasRDM : Predicate<"Subtarget->hasRDM()">, - AssemblerPredicate<(all_of FeatureRDM), "rdm">; + AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">; def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">; def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, - AssemblerPredicate<(all_of FeatureFullFP16), "fullfp16">; + AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">; def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">, - AssemblerPredicate<(all_of FeatureFP16FML), "fp16fml">; + AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">; def HasSPE : Predicate<"Subtarget->hasSPE()">, - AssemblerPredicate<(all_of FeatureSPE), "spe">; + AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">; def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, - AssemblerPredicate<(all_of FeatureFuseAES), + AssemblerPredicateWithAll<(all_of FeatureFuseAES), "fuse-aes">; def HasSVE : Predicate<"Subtarget->hasSVE()">, - AssemblerPredicate<(all_of FeatureSVE), "sve">; + AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">; def HasSVE2 : Predicate<"Subtarget->hasSVE2()">, - AssemblerPredicate<(all_of FeatureSVE2), "sve2">; + AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">; def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">, - AssemblerPredicate<(all_of FeatureSVE2AES), "sve2-aes">; + AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">; def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">, - AssemblerPredicate<(all_of FeatureSVE2SM4), "sve2-sm4">; + AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">; def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, - AssemblerPredicate<(all_of FeatureSVE2SHA3), "sve2-sha3">; + AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">; def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, - AssemblerPredicate<(all_of FeatureSVE2BitPerm), "sve2-bitperm">; + AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">; def HasSME : Predicate<"Subtarget->hasSME()">, - AssemblerPredicate<(all_of FeatureSME), "sme">; + AssemblerPredicateWithAll<(all_of FeatureSME), "sme">; def HasSMEF64 : Predicate<"Subtarget->hasSMEF64()">, - AssemblerPredicate<(all_of FeatureSMEF64), "sme-f64">; + AssemblerPredicateWithAll<(all_of FeatureSMEF64), "sme-f64">; def HasSMEI64 : Predicate<"Subtarget->hasSMEI64()">, - AssemblerPredicate<(all_of FeatureSMEI64), "sme-i64">; -def HasStreamingSVE : Predicate<"Subtarget->hasStreamingSVE()">, - AssemblerPredicate<(all_of FeatureStreamingSVE), "streaming-sve">; + AssemblerPredicateWithAll<(all_of FeatureSMEI64), "sme-i64">; // A subset of SVE(2) instructions are legal in Streaming SVE execution mode, // they should be enabled if either has been specified. -def HasSVEorStreamingSVE - : Predicate<"Subtarget->hasSVE() || Subtarget->hasStreamingSVE()">, - AssemblerPredicate<(any_of FeatureSVE, FeatureStreamingSVE), - "streaming-sve or sve">; -def HasSVE2orStreamingSVE - : Predicate<"Subtarget->hasSVE2() || Subtarget->hasStreamingSVE()">, - AssemblerPredicate<(any_of FeatureSVE2, FeatureStreamingSVE), - "streaming-sve or sve2">; +def HasSVEorSME + : Predicate<"Subtarget->hasSVE() || Subtarget->hasSME()">, + AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME), + "sve or sme">; +def HasSVE2orSME + : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">, + AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME), + "sve2 or sme">; // A subset of NEON instructions are legal in Streaming SVE execution mode, // they should be enabled if either has been specified. -def HasNEONorStreamingSVE - : Predicate<"Subtarget->hasNEON() || Subtarget->hasStreamingSVE()">, - AssemblerPredicate<(any_of FeatureNEON, FeatureStreamingSVE), - "streaming-sve or neon">; +def HasNEONorSME + : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">, + AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME), + "neon or sme">; def HasRCPC : Predicate<"Subtarget->hasRCPC()">, - AssemblerPredicate<(all_of FeatureRCPC), "rcpc">; + AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">; +def HasLDAPR : Predicate<"Subtarget->hasLDAPR()">, + AssemblerPredicateWithAll<(all_of FeatureLDAPR), "ldapr">; def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, - AssemblerPredicate<(all_of FeatureAltFPCmp), "altnzcv">; + AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">; def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">, - AssemblerPredicate<(all_of FeatureFRInt3264), "frint3264">; + AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">; def HasSB : Predicate<"Subtarget->hasSB()">, - AssemblerPredicate<(all_of FeatureSB), "sb">; + AssemblerPredicateWithAll<(all_of FeatureSB), "sb">; def HasPredRes : Predicate<"Subtarget->hasPredRes()">, - AssemblerPredicate<(all_of FeaturePredRes), "predres">; + AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">; def HasCCDP : Predicate<"Subtarget->hasCCDP()">, - AssemblerPredicate<(all_of FeatureCacheDeepPersist), "ccdp">; + AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">; def HasBTI : Predicate<"Subtarget->hasBTI()">, - AssemblerPredicate<(all_of FeatureBranchTargetId), "bti">; + AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">; def HasMTE : Predicate<"Subtarget->hasMTE()">, - AssemblerPredicate<(all_of FeatureMTE), "mte">; + AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">; def HasTME : Predicate<"Subtarget->hasTME()">, - AssemblerPredicate<(all_of FeatureTME), "tme">; + AssemblerPredicateWithAll<(all_of FeatureTME), "tme">; def HasETE : Predicate<"Subtarget->hasETE()">, - AssemblerPredicate<(all_of FeatureETE), "ete">; + AssemblerPredicateWithAll<(all_of FeatureETE), "ete">; def HasTRBE : Predicate<"Subtarget->hasTRBE()">, - AssemblerPredicate<(all_of FeatureTRBE), "trbe">; + AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">; def HasBF16 : Predicate<"Subtarget->hasBF16()">, - AssemblerPredicate<(all_of FeatureBF16), "bf16">; + AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">; def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">, - AssemblerPredicate<(all_of FeatureMatMulInt8), "i8mm">; + AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">; def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">, - AssemblerPredicate<(all_of FeatureMatMulFP32), "f32mm">; + AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">; def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">, - AssemblerPredicate<(all_of FeatureMatMulFP64), "f64mm">; + AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">; def HasXS : Predicate<"Subtarget->hasXS()">, - AssemblerPredicate<(all_of FeatureXS), "xs">; + AssemblerPredicateWithAll<(all_of FeatureXS), "xs">; def HasWFxT : Predicate<"Subtarget->hasWFxT()">, - AssemblerPredicate<(all_of FeatureWFxT), "wfxt">; + AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">; def HasLS64 : Predicate<"Subtarget->hasLS64()">, - AssemblerPredicate<(all_of FeatureLS64), "ls64">; + AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">; def HasBRBE : Predicate<"Subtarget->hasBRBE()">, - AssemblerPredicate<(all_of FeatureBRBE), "brbe">; + AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">; def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">, - AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">; + AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">; def HasHBC : Predicate<"Subtarget->hasHBC()">, - AssemblerPredicate<(all_of FeatureHBC), "hbc">; + AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">; def HasMOPS : Predicate<"Subtarget->hasMOPS()">, - AssemblerPredicate<(all_of FeatureMOPS), "mops">; + AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; @@ -350,49 +350,49 @@ def nonext_masked_load : cast(N)->isUnindexed() && !cast(N)->isNonTemporal(); }]>; -// sign extending masked load fragments. -def asext_masked_load : +// Any/Zero extending masked load fragments. +def azext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), (masked_ld node:$ptr, undef, node:$pred, node:$def),[{ return (cast(N)->getExtensionType() == ISD::EXTLOAD || - cast(N)->getExtensionType() == ISD::SEXTLOAD) && + cast(N)->getExtensionType() == ISD::ZEXTLOAD) && cast(N)->isUnindexed(); }]>; -def asext_masked_load_i8 : +def azext_masked_load_i8 : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (asext_masked_load node:$ptr, node:$pred, node:$def), [{ + (azext_masked_load node:$ptr, node:$pred, node:$def), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; -def asext_masked_load_i16 : +def azext_masked_load_i16 : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (asext_masked_load node:$ptr, node:$pred, node:$def), [{ + (azext_masked_load node:$ptr, node:$pred, node:$def), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i16; }]>; -def asext_masked_load_i32 : +def azext_masked_load_i32 : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (asext_masked_load node:$ptr, node:$pred, node:$def), [{ + (azext_masked_load node:$ptr, node:$pred, node:$def), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; -// zero extending masked load fragments. -def zext_masked_load : +// Sign extending masked load fragments. +def sext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ - return cast(N)->getExtensionType() == ISD::ZEXTLOAD && + return cast(N)->getExtensionType() == ISD::SEXTLOAD && cast(N)->isUnindexed(); }]>; -def zext_masked_load_i8 : +def sext_masked_load_i8 : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (zext_masked_load node:$ptr, node:$pred, node:$def), [{ + (sext_masked_load node:$ptr, node:$pred, node:$def), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; -def zext_masked_load_i16 : +def sext_masked_load_i16 : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (zext_masked_load node:$ptr, node:$pred, node:$def), [{ + (sext_masked_load node:$ptr, node:$pred, node:$def), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i16; }]>; -def zext_masked_load_i32 : +def sext_masked_load_i32 : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (zext_masked_load node:$ptr, node:$pred, node:$def), [{ + (sext_masked_load node:$ptr, node:$pred, node:$def), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; @@ -443,6 +443,58 @@ def non_temporal_store : cast(N)->isNonTemporal(); }]>; +multiclass masked_gather_scatter { + // offsets = (signed)Index << sizeof(elt) + def NAME#_signed_scaled : + PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), + (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ + auto MGS = cast(N); + bool Signed = MGS->isIndexSigned() || + MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; + return Signed && MGS->isIndexScaled(); + }]>; + // offsets = (signed)Index + def NAME#_signed_unscaled : + PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), + (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ + auto MGS = cast(N); + bool Signed = MGS->isIndexSigned() || + MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; + return Signed && !MGS->isIndexScaled(); + }]>; + // offsets = (unsigned)Index << sizeof(elt) + def NAME#_unsigned_scaled : + PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), + (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ + auto MGS = cast(N); + bool Signed = MGS->isIndexSigned() || + MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; + return !Signed && MGS->isIndexScaled(); + }]>; + // offsets = (unsigned)Index + def NAME#_unsigned_unscaled : + PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), + (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ + auto MGS = cast(N); + bool Signed = MGS->isIndexSigned() || + MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; + return !Signed && !MGS->isIndexScaled(); + }]>; +} + +defm nonext_masked_gather : masked_gather_scatter; +defm azext_masked_gather_i8 : masked_gather_scatter; +defm azext_masked_gather_i16 : masked_gather_scatter; +defm azext_masked_gather_i32 : masked_gather_scatter; +defm sext_masked_gather_i8 : masked_gather_scatter; +defm sext_masked_gather_i16 : masked_gather_scatter; +defm sext_masked_gather_i32 : masked_gather_scatter; + +defm nontrunc_masked_scatter : masked_gather_scatter; +defm trunc_masked_scatter_i8 : masked_gather_scatter; +defm trunc_masked_scatter_i16 : masked_gather_scatter; +defm trunc_masked_scatter_i32 : masked_gather_scatter; + // top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise def top16Zero: PatLeaf<(i32 GPR32:$src), [{ return SDValue(N,0)->getValueType(0) == MVT::i32 && @@ -473,6 +525,11 @@ def AArch64call : SDNode<"AArch64ISD::CALL", [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI", + SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, @@ -526,6 +583,7 @@ def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; +def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>; def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; @@ -612,8 +670,10 @@ def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>]>; -def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>; -def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>; +def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull, + [SDNPCommutative]>; +def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull, + [SDNPCommutative]>; def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>; @@ -630,11 +690,6 @@ def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; -def AArch64srhadd : SDNode<"AArch64ISD::SRHADD", SDT_AArch64binvec>; -def AArch64urhadd : SDNode<"AArch64ISD::URHADD", SDT_AArch64binvec>; -def AArch64shadd : SDNode<"AArch64ISD::SHADD", SDT_AArch64binvec>; -def AArch64uhadd : SDNode<"AArch64ISD::UHADD", SDT_AArch64binvec>; - def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs), [(abdu node:$lhs, node:$rhs), (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>; @@ -642,10 +697,21 @@ def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs), [(abds node:$lhs, node:$rhs), (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>; +def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>; def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>; +def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>; +def AArch64addp : PatFrags<(ops node:$Rn, node:$Rm), + [(AArch64addp_n node:$Rn, node:$Rm), + (int_aarch64_neon_addp node:$Rn, node:$Rm)]>; def AArch64uaddlp : PatFrags<(ops node:$src), [(AArch64uaddlp_n node:$src), (int_aarch64_neon_uaddlp node:$src)]>; +def AArch64saddlp : PatFrags<(ops node:$src), + [(AArch64saddlp_n node:$src), + (int_aarch64_neon_saddlp node:$src)]>; +def AArch64faddp : PatFrags<(ops node:$Rn, node:$Rm), + [(AArch64addp_n node:$Rn, node:$Rm), + (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>; def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; @@ -669,6 +735,22 @@ def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; def AArch64mrs : SDNode<"AArch64ISD::MRS", SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>, [SDNPHasChain, SDNPOutGlue]>; + +// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands +// have no common bits. +def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), + [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ + if (N->getOpcode() == ISD::ADD) + return true; + return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)); +}]> { + let GISelPredicateCode = [{ + // Only handle G_ADD for now. FIXME. build capability to compute whether + // operands of G_OR have common bits set or not. + return MI.getOpcode() == TargetOpcode::G_ADD; + }]; +} + //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -939,7 +1021,7 @@ def : Pat<(v2f32 (int_aarch64_neon_bfdot VectorIndexS:$idx)>; } -let Predicates = [HasNEONorStreamingSVE, HasBF16] in { +let Predicates = [HasNEONorSME, HasBF16] in { def BFCVT : BF16ToSinglePrecision<"bfcvt">; } @@ -1025,6 +1107,15 @@ def : EOR3_pattern; def : EOR3_pattern; def : EOR3_pattern; +class BCAX_pattern + : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))), + (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; + +def : BCAX_pattern; +def : BCAX_pattern; +def : BCAX_pattern; +def : BCAX_pattern; + def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; @@ -2073,6 +2164,10 @@ def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>; def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>; +def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)), + (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))), + (REV16Xr GPR64:$Rn)>; + //===----------------------------------------------------------------------===// // Bitfield immediate extraction instruction. //===----------------------------------------------------------------------===// @@ -2320,6 +2415,8 @@ let isCall = 1, Defs = [LR], Uses = [SP] in { PseudoInstExpansion<(BLR GPR64:$Rn)>; def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>, Sched<[WriteBrReg]>; + def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>, + Sched<[WriteBrReg]>; } // isCall def : Pat<(AArch64call GPR64:$Rn), @@ -2333,6 +2430,10 @@ def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn), (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>, Requires<[NoSLSBLRMitigation]>; +def : Pat<(AArch64call_bti GPR64:$Rn), + (BLR_BTI GPR64:$Rn)>, + Requires<[NoSLSBLRMitigation]>; + let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; } // isBranch, isTerminator, isBarrier, isIndirectBranch @@ -2359,6 +2460,10 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { // augmentation string. def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {} +// Pseudo instruction to tell the streamer to emit a 'G' character into the +// augmentation string. +def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {} + // FIXME: maybe the scratch register used shouldn't be fixed to X1? // FIXME: can "hasSideEffects be dropped? // This gets lowered to an instruction sequence which takes 16 bytes @@ -2409,7 +2514,8 @@ def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; // Exception generation instructions. //===----------------------------------------------------------------------===// let isTrap = 1 in { -def BRK : ExceptionGeneration<0b001, 0b00, "brk">; +def BRK : ExceptionGeneration<0b001, 0b00, "brk", + [(int_aarch64_break timm32_0_65535:$imm)]>; } def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; @@ -3891,24 +3997,24 @@ defm : FPToIntegerPats; let Predicates = [HasFullFP16] in { - def : Pat<(i32 (lround f16:$Rn)), + def : Pat<(i32 (any_lround f16:$Rn)), (!cast(FCVTASUWHr) f16:$Rn)>; - def : Pat<(i64 (lround f16:$Rn)), + def : Pat<(i64 (any_lround f16:$Rn)), (!cast(FCVTASUXHr) f16:$Rn)>; - def : Pat<(i64 (llround f16:$Rn)), + def : Pat<(i64 (any_llround f16:$Rn)), (!cast(FCVTASUXHr) f16:$Rn)>; } -def : Pat<(i32 (lround f32:$Rn)), +def : Pat<(i32 (any_lround f32:$Rn)), (!cast(FCVTASUWSr) f32:$Rn)>; -def : Pat<(i32 (lround f64:$Rn)), +def : Pat<(i32 (any_lround f64:$Rn)), (!cast(FCVTASUWDr) f64:$Rn)>; -def : Pat<(i64 (lround f32:$Rn)), +def : Pat<(i64 (any_lround f32:$Rn)), (!cast(FCVTASUXSr) f32:$Rn)>; -def : Pat<(i64 (lround f64:$Rn)), +def : Pat<(i64 (any_lround f64:$Rn)), (!cast(FCVTASUXDr) f64:$Rn)>; -def : Pat<(i64 (llround f32:$Rn)), +def : Pat<(i64 (any_llround f32:$Rn)), (!cast(FCVTASUXSr) f32:$Rn)>; -def : Pat<(i64 (llround f64:$Rn)), +def : Pat<(i64 (any_llround f64:$Rn)), (!cast(FCVTASUXDr) f64:$Rn)>; //===----------------------------------------------------------------------===// @@ -3949,20 +4055,20 @@ defm FCVT : FPConversion<"fcvt">; // Floating point single operand instructions. //===----------------------------------------------------------------------===// -defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>; -defm FMOV : SingleOperandFPData<0b0000, "fmov">; -defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>; -defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>; -defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; -defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; -defm FRINTN : SingleOperandFPData<0b1000, "frintn", froundeven>; -defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; +defm FABS : SingleOperandFPDataNoException<0b0001, "fabs", fabs>; +defm FMOV : SingleOperandFPDataNoException<0b0000, "fmov">; +defm FNEG : SingleOperandFPDataNoException<0b0010, "fneg", fneg>; +defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>; +defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>; +defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>; +defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>; +defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>; -defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; -defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; +defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>; +defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>; let SchedRW = [WriteFDiv] in { -defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>; +defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>; } let Predicates = [HasFRInt3264] in { @@ -3972,44 +4078,48 @@ let Predicates = [HasFRInt3264] in { defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>; } // HasFRInt3264 +// Emitting strict_lrint as two instructions is valid as any exceptions that +// occur will happen in exactly one of the instructions (e.g. if the input is +// not an integer the inexact exception will happen in the FRINTX but not then +// in the FCVTZS as the output of FRINTX is an integer). let Predicates = [HasFullFP16] in { - def : Pat<(i32 (lrint f16:$Rn)), + def : Pat<(i32 (any_lrint f16:$Rn)), (FCVTZSUWHr (!cast(FRINTXHr) f16:$Rn))>; - def : Pat<(i64 (lrint f16:$Rn)), + def : Pat<(i64 (any_lrint f16:$Rn)), (FCVTZSUXHr (!cast(FRINTXHr) f16:$Rn))>; - def : Pat<(i64 (llrint f16:$Rn)), + def : Pat<(i64 (any_llrint f16:$Rn)), (FCVTZSUXHr (!cast(FRINTXHr) f16:$Rn))>; } -def : Pat<(i32 (lrint f32:$Rn)), +def : Pat<(i32 (any_lrint f32:$Rn)), (FCVTZSUWSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i32 (lrint f64:$Rn)), +def : Pat<(i32 (any_lrint f64:$Rn)), (FCVTZSUWDr (!cast(FRINTXDr) f64:$Rn))>; -def : Pat<(i64 (lrint f32:$Rn)), +def : Pat<(i64 (any_lrint f32:$Rn)), (FCVTZSUXSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i64 (lrint f64:$Rn)), +def : Pat<(i64 (any_lrint f64:$Rn)), (FCVTZSUXDr (!cast(FRINTXDr) f64:$Rn))>; -def : Pat<(i64 (llrint f32:$Rn)), +def : Pat<(i64 (any_llrint f32:$Rn)), (FCVTZSUXSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i64 (llrint f64:$Rn)), +def : Pat<(i64 (any_llrint f64:$Rn)), (FCVTZSUXDr (!cast(FRINTXDr) f64:$Rn))>; //===----------------------------------------------------------------------===// // Floating point two operand instructions. //===----------------------------------------------------------------------===// -defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; +defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>; let SchedRW = [WriteFDiv] in { -defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; +defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>; } -defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>; -defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaximum>; -defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>; -defm FMIN : TwoOperandFPData<0b0101, "fmin", fminimum>; +defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>; +defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>; +defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>; +defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>; let SchedRW = [WriteFMul] in { -defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; -defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; +defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>; +defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>; } -defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; +defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>; def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; @@ -4024,13 +4134,13 @@ def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), // Floating point three operand instructions. //===----------------------------------------------------------------------===// -defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>; +defm FMADD : ThreeOperandFPData<0, 0, "fmadd", any_fma>; defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", - TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; + TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", - TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >; + TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >; defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", - TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; + TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; // The following def pats catch the case where the LHS of an FMA is negated. // The TriOpFrag above catches the case where the middle operand is negated. @@ -4159,25 +4269,25 @@ def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), (zext (v8i8 V64:$opB))), (AArch64vashr v8i16:$src, (i32 15))))), (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; -def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 V128:$opA)), - (zext (extract_high_v16i8 V128:$opB))))), +def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), + (zext (extract_high_v16i8 (v16i8 V128:$opB)))))), (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), - (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)), - (zext (extract_high_v16i8 V128:$opB))), + (v8i16 (add (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), + (zext (extract_high_v16i8 (v16i8 V128:$opB)))), (AArch64vashr v8i16:$src, (i32 15))))), (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), (zext (v4i16 V64:$opB))))), (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; -def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 V128:$opA)), - (zext (extract_high_v8i16 V128:$opB))))), +def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))), + (zext (extract_high_v8i16 (v8i16 V128:$opB)))))), (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)), (zext (v2i32 V64:$opB))))), (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; -def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 V128:$opA)), - (zext (extract_high_v4i32 V128:$opB))))), +def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))), + (zext (extract_high_v4i32 (v4i32 V128:$opB)))))), (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>; @@ -4189,7 +4299,7 @@ defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; -defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>; +defm FABS : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>; def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))), (CMLTv8i8rz V64:$Rn)>; @@ -4219,9 +4329,9 @@ def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), (i64 4)))), (FCVTLv8i16 V128:$Rn)>; -def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; +def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; -def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; +def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; @@ -4233,16 +4343,16 @@ def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), def : Pat<(concat_vectors V64:$Rd, (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -def : Pat<(v2f32 (fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; -def : Pat<(v4f16 (fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; -def : Pat<(concat_vectors V64:$Rd, (v2f32 (fpround (v2f64 V128:$Rn)))), +def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; +def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; +def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))), (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", int_aarch64_neon_fcvtxn>; -defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>; -defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>; +defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; +defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; // AArch64's FCVT instructions saturate when out of range. multiclass SIMDTwoVectorFPToIntSatPats { @@ -4272,15 +4382,15 @@ def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>; def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>; def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>; -defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>; +defm FNEG : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>; defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; -defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>; -defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; -defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; -defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", froundeven>; -defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; -defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; -defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; +defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>; +defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>; +defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>; +defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>; +defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>; +defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>; +defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>; let Predicates = [HasFRInt3264] in { defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>; @@ -4290,7 +4400,7 @@ let Predicates = [HasFRInt3264] in { } // HasFRInt3264 defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; -defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>; +defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>; defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", UnOpFrag<(sub immAllZerosV, node:$LHS)> >; defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; @@ -4312,9 +4422,9 @@ defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", - BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >; -defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>; -defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>; + BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >; +defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>; +defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>; defm SHLL : SIMDVectorLShiftLongBySizeBHS; defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; @@ -4324,7 +4434,7 @@ defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >; defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>; -defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>; +defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>; defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; @@ -4348,15 +4458,15 @@ def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; multiclass SIMDVectorLShiftLongBySizeBHSPats { def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), (SHLLv8i8 V64:$Rn)>; - def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)), + def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)), (SHLLv16i8 V128:$Rn)>; def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), (SHLLv4i16 V64:$Rn)>; - def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)), + def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)), (SHLLv8i16 V128:$Rn)>; def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), (SHLLv2i32 V64:$Rn)>; - def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)), + def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)), (SHLLv4i32 V128:$Rn)>; } @@ -4426,7 +4536,7 @@ def : Pat<(v8i16 (concat_vectors //===----------------------------------------------------------------------===// defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; -defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>; +defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>; defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; @@ -4447,33 +4557,33 @@ def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast("FABD"#VT) VT:$Rn, V } defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>; defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>; -defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>; -defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>; +defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>; +defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>; defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; -defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>; +defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>; defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; -defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>; +defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>; defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; -defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaximum>; +defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>; defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; -defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>; +defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>; defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; -defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminimum>; +defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>; // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the // instruction expects the addend first, while the fma intrinsic puts it last. defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; + TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", - TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; + TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; -defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>; +defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>; defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; -defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>; +defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>; // MLA and MLS are generated in MachineCombine defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; @@ -4484,7 +4594,7 @@ defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >; defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>; -defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", AArch64shadd>; +defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>; defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>; @@ -4496,14 +4606,14 @@ defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrd defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; -defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", AArch64srhadd>; +defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>; defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >; defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>; -defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", AArch64uhadd>; +defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>; defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>; @@ -4513,7 +4623,7 @@ defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; -defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", AArch64urhadd>; +defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>; defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", @@ -4753,11 +4863,13 @@ defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FABD64 FPR64:$Rn, FPR64:$Rm)>; -let Predicates = [HasFullFP16] in { +let Predicates = [HasNEON, HasFullFP16] in { def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>; } +let Predicates = [HasNEON] in { def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>; def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>; +} defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", int_aarch64_neon_facge>; defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", @@ -4765,9 +4877,9 @@ defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; -defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorStreamingSVE>; -defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorStreamingSVE>; -defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorStreamingSVE>; +defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>; +defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>; +defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>; defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; @@ -4862,9 +4974,9 @@ defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; -defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorStreamingSVE>; -defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorStreamingSVE>; -defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorStreamingSVE>; +defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorSME>; +defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorSME>; +defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorSME>; defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", UnOpFrag<(sub immAllZerosV, node:$LHS)> >; defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; @@ -4980,23 +5092,21 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), // int values in FP registers using the corresponding NEON instructions to // avoid more costly int <-> fp register transfers. let Predicates = [HasNEON] in { -def : Pat<(f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))), +def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; -def : Pat<(f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))), +def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>; -def : Pat<(f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))), +def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>; -def : Pat<(f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))), +def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; let Predicates = [HasFullFP16] in { -def : Pat<(f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))), +def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; -def : Pat<(f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))), +def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>; } -} - // If an integer is about to be converted to a floating point value, // just load it on the floating point unit. // Here are the patterns for 8 and 16-bits to float. @@ -5083,6 +5193,7 @@ def : Pat <(f64 (uint_to_fp (i32 (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; // 64-bits -> double are handled in target specific dag combine: // performIntToFpCombine. +} // let Predicates = [HasNEON] //===----------------------------------------------------------------------===// // Advanced SIMD three different-sized vector instructions. @@ -5102,10 +5213,10 @@ defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", - TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; + TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", - TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; -defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>; + TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; +defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>; defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", int_aarch64_neon_sqadd>; defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", @@ -5123,10 +5234,10 @@ defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>; defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", - TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; + TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", - TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; -defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>; + TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; +defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>; defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>; defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", @@ -5161,74 +5272,15 @@ multiclass Neon_mul_acc_widen_patterns; } -defm : Neon_mul_acc_widen_patterns; -defm : Neon_mul_acc_widen_patterns; -defm : Neon_mul_acc_widen_patterns; -defm : Neon_mul_acc_widen_patterns; -// Additional patterns for SMULL and UMULL -multiclass Neon_mul_widen_patterns { - def : Pat<(v8i16 (opnode (v8i8 V64:$Rn), (v8i8 V64:$Rm))), - (INST8B V64:$Rn, V64:$Rm)>; - def : Pat<(v4i32 (opnode (v4i16 V64:$Rn), (v4i16 V64:$Rm))), - (INST4H V64:$Rn, V64:$Rm)>; - def : Pat<(v2i64 (opnode (v2i32 V64:$Rn), (v2i32 V64:$Rm))), - (INST2S V64:$Rn, V64:$Rm)>; -} - -defm : Neon_mul_widen_patterns; -defm : Neon_mul_widen_patterns; - -// Patterns for smull2/umull2. -multiclass Neon_mul_high_patterns { - def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn), - (extract_high_v16i8 V128:$Rm))), - (INST8B V128:$Rn, V128:$Rm)>; - def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 V128:$Rm))), - (INST4H V128:$Rn, V128:$Rm)>; - def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 V128:$Rm))), - (INST2S V128:$Rn, V128:$Rm)>; -} - -defm : Neon_mul_high_patterns; -defm : Neon_mul_high_patterns; - -// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL -multiclass Neon_mulacc_widen_patterns { - def : Pat<(v8i16 (opnode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm))), - (INST8B V128:$Rd, V64:$Rn, V64:$Rm)>; - def : Pat<(v4i32 (opnode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))), - (INST4H V128:$Rd, V64:$Rn, V64:$Rm)>; - def : Pat<(v2i64 (opnode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))), - (INST2S V128:$Rd, V64:$Rn, V64:$Rm)>; -} - -defm : Neon_mulacc_widen_patterns< - TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>, - SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>; -defm : Neon_mulacc_widen_patterns< - TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>, - UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>; -defm : Neon_mulacc_widen_patterns< - TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>, - SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>; -defm : Neon_mulacc_widen_patterns< - TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>, - UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>; - // Patterns for 64-bit pmull def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm), (PMULLv1i64 V64:$Rn, V64:$Rm)>; @@ -5392,19 +5444,22 @@ defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; +// Only the lower half of the result of the inner FADDP is used in the patterns +// below, so the second operand does not matter. Re-use the first input +// operand, so no additional dependencies need to be introduced. let Predicates = [HasFullFP16] in { def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))), (FADDPv2i16p (EXTRACT_SUBREG - (FADDPv8f16 (FADDPv8f16 V128:$Rn, (v8f16 (IMPLICIT_DEF))), (v8f16 (IMPLICIT_DEF))), + (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn), dsub))>; def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))), - (FADDPv2i16p (FADDPv4f16 V64:$Rn, (v4f16 (IMPLICIT_DEF))))>; + (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>; } def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))), (FADDPv2i32p (EXTRACT_SUBREG - (FADDPv4f32 V128:$Rn, (v4f32 (IMPLICIT_DEF))), + (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))), (FADDPv2i32p V64:$Rn)>; @@ -5856,24 +5911,28 @@ defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>; -// Patterns for uaddv(uaddlp(x)) ==> uaddlv -def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, - (v4i16 (AArch64uaddv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))), - (i64 0))), (i64 0))), - (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), - (UADDLVv8i8v V64:$op), hsub), ssub)>; -def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (AArch64uaddlp - (v16i8 V128:$op))))), (i64 0))), - (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), - (UADDLVv16i8v V128:$op), hsub), ssub)>; -def : Pat<(v4i32 (AArch64uaddv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (UADDLVv8i16v V128:$op), ssub)>; - -// Patterns for addp(uaddlp(x))) ==> uaddlv -def : Pat<(v2i32 (AArch64uaddv (v2i32 (AArch64uaddlp (v4i16 V64:$op))))), - (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (UADDLVv4i16v V64:$op), ssub)>; -def : Pat<(v2i64 (AArch64uaddv (v2i64 (AArch64uaddlp (v4i32 V128:$op))))), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (UADDLVv4i32v V128:$op), dsub)>; +multiclass SIMDAcrossLaneLongPairIntrinsic { + // Patterns for addv(addlp(x)) ==> addlv + def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, + (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))), + (i64 0))), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (!cast(Opc#"v8i8v") V64:$op), hsub), ssub)>; + def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (!cast(Opc#"v16i8v") V128:$op), hsub), ssub)>; + def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast(Opc#"v8i16v") V128:$op), ssub)>; + + // Patterns for addp(addlp(x))) ==> addlv + def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))), + (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast(Opc#"v4i16v") V64:$op), ssub)>; + def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast(Opc#"v4i32v") V128:$op), dsub)>; +} + +defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>; +defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>; // Patterns for across-vector intrinsics, that have a node equivalent, that // returns a vector (with only the low lane defined) instead of a scalar. @@ -6185,6 +6244,14 @@ def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; let isReMaterializable = 1, isAsCheapAsAMove = 1 in defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; +let Predicates = [HasNEON] in { + // Using the MOVI to materialize fp constants. + def : Pat<(f32 fpimm32SIMDModImmType4:$in), + (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in), + (i32 24)), + ssub)>; +} + def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; @@ -6273,18 +6340,18 @@ let hasSideEffects = 0 in { // On the other hand, there are quite a few valid combinatorial options due to // the commutativity of multiplication and the fact that (-x) * y = x * (-y). defm : SIMDFPIndexedTiedPatterns<"FMLA", - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>; + TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>; defm : SIMDFPIndexedTiedPatterns<"FMLA", - TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>; + TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>; defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; + TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; + TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; + TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; + TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; multiclass FMLSIndexedAfterNegPatterns { // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit @@ -6363,22 +6430,22 @@ multiclass FMLSIndexedAfterNegPatterns { } defm : FMLSIndexedAfterNegPatterns< - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; + TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; defm : FMLSIndexedAfterNegPatterns< - TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >; + TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >; defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; -defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", fmul>; +defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>; -def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), +def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), (FMULv2i32_indexed V64:$Rn, (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), (i64 0))>; -def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), +def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), (FMULv4i32_indexed V128:$Rn, (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), (i64 0))>; -def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), +def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), (FMULv2i64_indexed V128:$Rn, (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), (i64 0))>; @@ -6397,11 +6464,10 @@ defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>; defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", - TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; + TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", - TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; -defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", - int_aarch64_neon_smull>; + TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; +defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>; defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", int_aarch64_neon_sqadd>; defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", @@ -6412,11 +6478,10 @@ defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", int_aarch64_neon_sqrdmlsh>; defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", - TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; + TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", - TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; -defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", - int_aarch64_neon_umull>; + TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; +defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>; // A scalar sqdmull with the second operand being a vector lane can be // handled directly with the indexed instruction encoding. @@ -6425,22 +6490,6 @@ def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), VectorIndexS:$idx)), (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; -// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands -// have no common bits. -def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), - [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ - if (N->getOpcode() == ISD::ADD) - return true; - return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)); -}]> { - let GISelPredicateCode = [{ - // Only handle G_ADD for now. FIXME. build capability to compute whether - // operands of G_OR have common bits set or not. - return MI.getOpcode() == TargetOpcode::G_ADD; - }]; -} - - //---------------------------------------------------------------------------- // AdvSIMD scalar shift instructions //---------------------------------------------------------------------------- @@ -6480,7 +6529,7 @@ def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; -// Patterns for FP16 Instrinsics - requires reg copy to/from as i16s not supported. +// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported. def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)), (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; @@ -6787,7 +6836,7 @@ class SExtLoadi8CVTf32Pat dsub)), 0), ssub)))>, - Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>; + Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; @@ -6807,7 +6856,8 @@ class SExtLoadi16CVTf32Pat INST, hsub), 0), - ssub)))>, Requires<[NotForCodeSize]>; + ssub)))>, + Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; @@ -6841,7 +6891,7 @@ class SExtLoadi16CVTf64Pat dsub)), 0), dsub)))>, - Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>; + Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; @@ -6860,7 +6910,8 @@ class SExtLoadi32CVTf64Pat INST, ssub), 0), - dsub)))>, Requires<[NotForCodeSize]>; + dsub)))>, + Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; @@ -7216,14 +7267,6 @@ def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0 //---------------------------------------------------------------------------- // FIXME: Like for X86, these should go in their own separate .td file. -def def32 : PatLeaf<(i32 GPR32:$src), [{ - return isDef32(*N); -}]>; - -// In the case of a 32-bit def that is known to implicitly zero-extend, -// we can use a SUBREG_TO_REG. -def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>; - // For an anyext, we don't care what the high bits are, so we can perform an // INSERT_SUBREF into an IMPLICIT_DEF. def : Pat<(i64 (anyext GPR32:$src)), @@ -7387,99 +7430,16 @@ def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)), // // Natural vector casts (64 bit) -def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4bf16 (AArch64NvCast (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>; -def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; - -def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4bf16 (AArch64NvCast (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>; -def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; - -def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4bf16 (AArch64NvCast (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>; -def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2f32 (AArch64NvCast (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; - -def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4bf16 (AArch64NvCast (f64 FPR64:$src))), (v4bf16 FPR64:$src)>; -def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1f64 (AArch64NvCast (f64 FPR64:$src))), (v1f64 FPR64:$src)>; - -def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1f64 (AArch64NvCast (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; +foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in + foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in + def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))), + (VT FPR64:$src)>; // Natural vector casts (128 bit) -def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8bf16 (AArch64NvCast (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; - -def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8bf16 (AArch64NvCast (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; - -def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8bf16 (AArch64NvCast (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; - -def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8bf16 (AArch64NvCast (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; - -def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8bf16 (AArch64NvCast (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; - -def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8bf16 (AArch64NvCast (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>; -def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; +foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in + foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in + def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))), + (VT FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; @@ -8093,17 +8053,17 @@ defm : InsertSubvectorUndef; def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; -def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), - (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), +def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), + (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, // so we match on v4f32 here, not v2f32. This will also catch adding // the low two lanes of a true v4f32 vector. -def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), - (vector_extract (v4f32 FPR128:$Rn), (i64 1))), +def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), + (vector_extract (v4f32 FPR128:$Rn), (i64 1))), (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; -def : Pat<(fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)), - (vector_extract (v8f16 FPR128:$Rn), (i64 1))), +def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)), + (vector_extract (v8f16 FPR128:$Rn), (i64 1))), (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; // Scalar 64-bit shifts in FPR64 registers. diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 6aefc1fdb599..eaf39fc0dbb1 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -9,6 +9,12 @@ // This file contains a pass that performs load / store related peephole // optimizations. This pass should be run after register allocation. // +// The pass runs after the PrologEpilogInserter where we emit the CFI +// instructions. In order to preserve the correctness of the unwind informaiton, +// the pass should not change the order of any two instructions, one of which +// has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix +// to unwind information. +// //===----------------------------------------------------------------------===// #include "AArch64InstrInfo.h" @@ -31,6 +37,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -549,26 +556,6 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { } } -static bool isPairedLdSt(const MachineInstr &MI) { - switch (MI.getOpcode()) { - default: - return false; - case AArch64::LDPSi: - case AArch64::LDPSWi: - case AArch64::LDPDi: - case AArch64::LDPQi: - case AArch64::LDPWi: - case AArch64::LDPXi: - case AArch64::STPSi: - case AArch64::STPDi: - case AArch64::STPQi: - case AArch64::STPWi: - case AArch64::STPXi: - case AArch64::STGPi: - return true; - } -} - static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) { unsigned OpcA = FirstMI.getOpcode(); @@ -603,7 +590,7 @@ static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) { // Returns the scale and offset range of pre/post indexed variants of MI. static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset) { - bool IsPaired = isPairedLdSt(MI); + bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI); bool IsTagStore = isTagStore(MI); // ST*G and all paired ldst have the same scale in pre/post-indexed variants // as in the "unsigned offset" variant. @@ -625,17 +612,8 @@ static MachineOperand &getLdStRegOp(MachineInstr &MI, bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI); if (IsPreLdSt) PairedRegOp += 1; - unsigned Idx = isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0; - return MI.getOperand(Idx); -} - -static const MachineOperand &getLdStBaseOp(const MachineInstr &MI) { - unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2 : 1; - return MI.getOperand(Idx); -} - -static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI) { - unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3 : 2; + unsigned Idx = + AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0; return MI.getOperand(Idx); } @@ -645,12 +623,14 @@ static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st."); int LoadSize = TII->getMemScale(LoadInst); int StoreSize = TII->getMemScale(StoreInst); - int UnscaledStOffset = TII->hasUnscaledLdStOffset(StoreInst) - ? getLdStOffsetOp(StoreInst).getImm() - : getLdStOffsetOp(StoreInst).getImm() * StoreSize; - int UnscaledLdOffset = TII->hasUnscaledLdStOffset(LoadInst) - ? getLdStOffsetOp(LoadInst).getImm() - : getLdStOffsetOp(LoadInst).getImm() * LoadSize; + int UnscaledStOffset = + TII->hasUnscaledLdStOffset(StoreInst) + ? AArch64InstrInfo::getLdStOffsetOp(StoreInst).getImm() + : AArch64InstrInfo::getLdStOffsetOp(StoreInst).getImm() * StoreSize; + int UnscaledLdOffset = + TII->hasUnscaledLdStOffset(LoadInst) + ? AArch64InstrInfo::getLdStOffsetOp(LoadInst).getImm() + : AArch64InstrInfo::getLdStOffsetOp(LoadInst).getImm() * LoadSize; return (UnscaledStOffset <= UnscaledLdOffset) && (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize)); } @@ -729,7 +709,7 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) { case AArch64::STPWi: case AArch64::STPXi: // Make sure this is a reg+imm (as opposed to an address reloc). - if (!getLdStOffsetOp(MI).isImm()) + if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm()) return false; return true; @@ -763,17 +743,18 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, // Also based on MergeForward is from where we copy the base register operand // so we get the flags compatible with the input code. const MachineOperand &BaseRegOp = - MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I); + MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI) + : AArch64InstrInfo::getLdStBaseOp(*I); // Which register is Rt and which is Rt2 depends on the offset order. MachineInstr *RtMI; - if (getLdStOffsetOp(*I).getImm() == - getLdStOffsetOp(*MergeMI).getImm() + OffsetStride) + if (AArch64InstrInfo::getLdStOffsetOp(*I).getImm() == + AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() + OffsetStride) RtMI = &*MergeMI; else RtMI = &*I; - int OffsetImm = getLdStOffsetOp(*RtMI).getImm(); + int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm(); // Change the scaled offset from small to large type. if (IsScaled) { assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge"); @@ -923,6 +904,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, assert(all_of(MI.operands(), [this, &RenameReg](const MachineOperand &MOP) { return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() || + MOP.isUndef() || !TRI->regsOverlap(MOP.getReg(), *RenameReg); }) && "Rename register used between paired instruction, trashing the " @@ -936,10 +918,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, // Also based on MergeForward is from where we copy the base register operand // so we get the flags compatible with the input code. const MachineOperand &BaseRegOp = - MergeForward ? getLdStBaseOp(*Paired) : getLdStBaseOp(*I); + MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired) + : AArch64InstrInfo::getLdStBaseOp(*I); - int Offset = getLdStOffsetOp(*I).getImm(); - int PairedOffset = getLdStOffsetOp(*Paired).getImm(); + int Offset = AArch64InstrInfo::getLdStOffsetOp(*I).getImm(); + int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm(); bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode()); if (IsUnscaled != PairedIsUnscaled) { // We're trying to pair instructions that differ in how they are scaled. If @@ -974,7 +957,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, RtMI = &*I; Rt2MI = &*Paired; } - int OffsetImm = getLdStOffsetOp(*RtMI).getImm(); + int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm(); // Scale the immediate offset, if necessary. if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) { assert(!(OffsetImm % TII->getMemScale(*RtMI)) && @@ -1132,12 +1115,14 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) && "Unsupported ld/st match"); assert(LoadSize <= StoreSize && "Invalid load size"); - int UnscaledLdOffset = IsUnscaled - ? getLdStOffsetOp(*LoadI).getImm() - : getLdStOffsetOp(*LoadI).getImm() * LoadSize; - int UnscaledStOffset = IsUnscaled - ? getLdStOffsetOp(*StoreI).getImm() - : getLdStOffsetOp(*StoreI).getImm() * StoreSize; + int UnscaledLdOffset = + IsUnscaled + ? AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() + : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize; + int UnscaledStOffset = + IsUnscaled + ? AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() + : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize; int Width = LoadSize * 8; Register DestReg = IsStoreXReg ? Register(TRI->getMatchingSuperReg( @@ -1235,7 +1220,7 @@ bool AArch64LoadStoreOpt::findMatchingStore( MachineBasicBlock::iterator B = I->getParent()->begin(); MachineBasicBlock::iterator MBBI = I; MachineInstr &LoadMI = *I; - Register BaseReg = getLdStBaseOp(LoadMI).getReg(); + Register BaseReg = AArch64InstrInfo::getLdStBaseOp(LoadMI).getReg(); // If the load is the first instruction in the block, there's obviously // not any matching store. @@ -1264,7 +1249,8 @@ bool AArch64LoadStoreOpt::findMatchingStore( // Also we can't handle stores without an immediate offset operand, // while the operand might be the address for a global variable. if (MI.mayStore() && isMatchingStore(LoadMI, MI) && - BaseReg == getLdStBaseOp(MI).getReg() && getLdStOffsetOp(MI).isImm() && + BaseReg == AArch64InstrInfo::getLdStBaseOp(MI).getReg() && + AArch64InstrInfo::getLdStOffsetOp(MI).isImm() && isLdOffsetInRangeOfSt(LoadMI, MI, TII) && ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) { StoreI = MBBI; @@ -1467,18 +1453,19 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, return true; } -// Check if we can find a physical register for renaming. This register must: -// * not be defined up to FirstMI (checking DefinedInBB) -// * not used between the MI and the defining instruction of the register to -// rename (checked using UsedInBetween). +// Check if we can find a physical register for renaming \p Reg. This register +// must: +// * not be defined already in \p DefinedInBB; DefinedInBB must contain all +// defined registers up to the point where the renamed register will be used, +// * not used in \p UsedInBetween; UsedInBetween must contain all accessed +// registers in the range the rename register will be used, // * is available in all used register classes (checked using RequiredClasses). static Optional tryToFindRegisterToRename( - MachineInstr &FirstMI, MachineInstr &MI, LiveRegUnits &DefinedInBB, + const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl &RequiredClasses, const TargetRegisterInfo *TRI) { - auto &MF = *FirstMI.getParent()->getParent(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const MachineRegisterInfo &RegInfo = MF.getRegInfo(); // Checks if any sub- or super-register of PR is callee saved. auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) { @@ -1499,7 +1486,7 @@ static Optional tryToFindRegisterToRename( }); }; - auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg()); + auto *RegClass = TRI->getMinimalPhysRegClass(Reg); for (const MCPhysReg &PR : *RegClass) { if (DefinedInBB.available(PR) && UsedInBetween.available(PR) && !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) && @@ -1530,8 +1517,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, bool MayLoad = FirstMI.mayLoad(); bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI); Register Reg = getLdStRegOp(FirstMI).getReg(); - Register BaseReg = getLdStBaseOp(FirstMI).getReg(); - int Offset = getLdStOffsetOp(FirstMI).getImm(); + Register BaseReg = AArch64InstrInfo::getLdStBaseOp(FirstMI).getReg(); + int Offset = AArch64InstrInfo::getLdStOffsetOp(FirstMI).getImm(); int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1; bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); @@ -1566,7 +1553,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, Flags.setSExtIdx(-1); if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) && - getLdStOffsetOp(MI).isImm()) { + AArch64InstrInfo::getLdStOffsetOp(MI).isImm()) { assert(MI.mayLoadOrStore() && "Expected memory operation."); // If we've found another instruction with the same opcode, check to see // if the base and offset are compatible with our starting instruction. @@ -1574,8 +1561,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // check for +1/-1. Make sure to check the new instruction offset is // actually an immediate and not a symbolic reference destined for // a relocation. - Register MIBaseReg = getLdStBaseOp(MI).getReg(); - int MIOffset = getLdStOffsetOp(MI).getImm(); + Register MIBaseReg = AArch64InstrInfo::getLdStBaseOp(MI).getReg(); + int MIOffset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm(); bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI); if (IsUnscaled != MIIsUnscaled) { // We're trying to pair instructions that differ in how they are scaled. @@ -1606,15 +1593,16 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // can't be paired: bail and keep looking. if (IsPreLdSt) { bool IsOutOfBounds = MIOffset != TII->getMemScale(MI); - bool IsBaseRegUsed = - !UsedRegUnits.available(getLdStBaseOp(MI).getReg()); - bool IsBaseRegModified = - !ModifiedRegUnits.available(getLdStBaseOp(MI).getReg()); + bool IsBaseRegUsed = !UsedRegUnits.available( + AArch64InstrInfo::getLdStBaseOp(MI).getReg()); + bool IsBaseRegModified = !ModifiedRegUnits.available( + AArch64InstrInfo::getLdStBaseOp(MI).getReg()); // If the stored value and the address of the second instruction is // the same, it needs to be using the updated register and therefore // it must not be folded. - bool IsMIRegTheSame = TRI->regsOverlap(getLdStRegOp(MI).getReg(), - getLdStBaseOp(MI).getReg()); + bool IsMIRegTheSame = + TRI->regsOverlap(getLdStRegOp(MI).getReg(), + AArch64InstrInfo::getLdStBaseOp(MI).getReg()); if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified || IsMIRegTheSame) { LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, @@ -1722,8 +1710,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, if (*MaybeCanRename) { Optional MaybeRenameReg = tryToFindRegisterToRename( - FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, - TRI); + *FirstMI.getParent()->getParent(), Reg, DefinedInBB, + UsedInBetween, RequiredClasses, TRI); if (MaybeRenameReg) { Flags.setRenameReg(*MaybeRenameReg); Flags.setMergeForward(true); @@ -1760,6 +1748,28 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, return E; } +static MachineBasicBlock::iterator +maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI) { + auto End = MI.getParent()->end(); + if (MaybeCFI == End || + MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION || + !(MI.getFlag(MachineInstr::FrameSetup) || + MI.getFlag(MachineInstr::FrameDestroy)) || + AArch64InstrInfo::getLdStBaseOp(MI).getReg() != AArch64::SP) + return End; + + const MachineFunction &MF = *MI.getParent()->getParent(); + unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex(); + const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex]; + switch (CFI.getOperation()) { + case MCCFIInstruction::OpDefCfa: + case MCCFIInstruction::OpDefCfaOffset: + return MaybeCFI; + default: + return End; + } +} + MachineBasicBlock::iterator AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update, @@ -1769,6 +1779,12 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, "Unexpected base register update instruction to merge!"); MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator NextI = next_nodbg(I, E); + + // If updating the SP and the following instruction is CFA offset related CFI + // instruction move it after the merged instruction. + MachineBasicBlock::iterator CFI = + IsPreIdx ? maybeMoveCFI(*Update, next_nodbg(Update, E)) : E; + // Return the instruction following the merged instruction, which is // the instruction following our unmerged load. Unless that's the add/sub // instruction we're merging, in which case it's the one after that. @@ -1786,12 +1802,12 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, MachineInstrBuilder MIB; int Scale, MinOffset, MaxOffset; getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset); - if (!isPairedLdSt(*I)) { + if (!AArch64InstrInfo::isPairedLdSt(*I)) { // Non-paired instruction. MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) .add(getLdStRegOp(*Update)) .add(getLdStRegOp(*I)) - .add(getLdStBaseOp(*I)) + .add(AArch64InstrInfo::getLdStBaseOp(*I)) .addImm(Value / Scale) .setMemRefs(I->memoperands()) .setMIFlags(I->mergeFlagsWith(*Update)); @@ -1801,12 +1817,15 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, .add(getLdStRegOp(*Update)) .add(getLdStRegOp(*I, 0)) .add(getLdStRegOp(*I, 1)) - .add(getLdStBaseOp(*I)) + .add(AArch64InstrInfo::getLdStBaseOp(*I)) .addImm(Value / Scale) .setMemRefs(I->memoperands()) .setMIFlags(I->mergeFlagsWith(*Update)); } - (void)MIB; + if (CFI != E) { + MachineBasicBlock *MBB = I->getParent(); + MBB->splice(std::next(MIB.getInstr()->getIterator()), MBB, CFI); + } if (IsPreIdx) { ++NumPreFolded; @@ -1888,8 +1907,9 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( MachineInstr &MemMI = *I; MachineBasicBlock::iterator MBBI = I; - Register BaseReg = getLdStBaseOp(MemMI).getReg(); - int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * TII->getMemScale(MemMI); + Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg(); + int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() * + TII->getMemScale(MemMI); // Scan forward looking for post-index opportunities. Updating instructions // can't be formed if the memory instruction doesn't have the offset we're @@ -1904,7 +1924,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( // behavior in this case unlike normal stores, and always performs writeback // after reading the source register value. if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) { - bool IsPairedInsn = isPairedLdSt(MemMI); + bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI); for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { Register DestReg = getLdStRegOp(MemMI, i).getReg(); if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) @@ -1965,8 +1985,8 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( MachineBasicBlock::iterator MBBI = I; MachineFunction &MF = *MemMI.getMF(); - Register BaseReg = getLdStBaseOp(MemMI).getReg(); - int Offset = getLdStOffsetOp(MemMI).getImm(); + Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg(); + int Offset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm(); // If the load/store is the first instruction in the block, there's obviously // not any matching update. Ditto if the memory offset isn't zero. @@ -1975,7 +1995,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( // If the base register overlaps a destination register, we can't // merge the update. if (!isTagStore(MemMI)) { - bool IsPairedInsn = isPairedLdSt(MemMI); + bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI); for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { Register DestReg = getLdStRegOp(MemMI, i).getReg(); if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) @@ -2045,7 +2065,7 @@ bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( // Make sure this is a reg+imm. // FIXME: It is possible to extend it to handle reg+reg cases. - if (!getLdStOffsetOp(MI).isImm()) + if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm()) return false; // Look backward up to LdStLimit instructions. @@ -2099,7 +2119,7 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { // range, plus allow an extra one in case we find a later insn that matches // with Offset-1) bool IsUnscaled = TII->hasUnscaledLdStOffset(MI); - int Offset = getLdStOffsetOp(MI).getImm(); + int Offset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm(); int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1; // Allow one more for offset. if (Offset > 0) @@ -2166,7 +2186,8 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate // The immediate in the load/store is scaled by the size of the memory // operation. The immediate in the add we're looking for, // however, is not, so adjust here. - int UnscaledOffset = getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI); + int UnscaledOffset = + AArch64InstrInfo::getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI); // Look forward to try to find a pre-index instruction. For example, // ldr x1, [x0, #64] @@ -2268,7 +2289,7 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { if (skipFunction(Fn.getFunction())) return false; - Subtarget = &static_cast(Fn.getSubtarget()); + Subtarget = &Fn.getSubtarget(); TII = static_cast(Subtarget->getInstrInfo()); TRI = Subtarget->getRegisterInfo(); AA = &getAnalysis().getAAResults(); diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp index 1fc5617b49f6..5c7fb0deecd0 100644 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -60,12 +60,13 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass { MachineLoopInfo *MLI; MachineRegisterInfo *MRI; + using OpcodePair = std::pair; template using SplitAndOpcFunc = - std::function(T, unsigned, T &, T &)>; + std::function(T, unsigned, T &, T &)>; using BuildMIFunc = - std::function; + std::function; /// For instructions where an immediate operand could be split into two /// separate immediate instructions, use the splitTwoPartImm two handle the @@ -83,20 +84,19 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass { /// %dst = ri %tmp (encode half IMM) [...] template bool splitTwoPartImm(MachineInstr &MI, - SmallSetVector &ToBeRemoved, SplitAndOpcFunc SplitAndOpc, BuildMIFunc BuildInstr); bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI, MachineInstr *&SubregToRegMI); template - bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI, - SmallSetVector &ToBeRemoved); + bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI); template - bool visitAND(unsigned Opc, MachineInstr &MI, - SmallSetVector &ToBeRemoved); - bool visitORR(MachineInstr &MI, - SmallSetVector &ToBeRemoved); + bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI); + + template + bool visitAND(unsigned Opc, MachineInstr &MI); + bool visitORR(MachineInstr &MI); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -157,8 +157,7 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { template bool AArch64MIPeepholeOpt::visitAND( - unsigned Opc, MachineInstr &MI, - SmallSetVector &ToBeRemoved) { + unsigned Opc, MachineInstr &MI) { // Try below transformation. // // MOVi32imm + ANDWrr ==> ANDWri + ANDWri @@ -170,28 +169,27 @@ bool AArch64MIPeepholeOpt::visitAND( // mov + and instructions. return splitTwoPartImm( - MI, ToBeRemoved, - [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional { + MI, + [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional { if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1)) - return Opc; + return std::make_pair(Opc, Opc); return None; }, - [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0, + [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, unsigned Imm1, Register SrcReg, Register NewTmpReg, Register NewDstReg) { DebugLoc DL = MI.getDebugLoc(); MachineBasicBlock *MBB = MI.getParent(); - BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg) + BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) .addReg(SrcReg) .addImm(Imm0); - BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg) + BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) .addReg(NewTmpReg) .addImm(Imm1); }); } -bool AArch64MIPeepholeOpt::visitORR( - MachineInstr &MI, SmallSetVector &ToBeRemoved) { +bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) { // Check this ORR comes from below zero-extend pattern. // // def : Pat<(i64 (zext GPR32:$src)), @@ -216,19 +214,38 @@ bool AArch64MIPeepholeOpt::visitORR( // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is // real AArch64 instruction and if it is not, do not process the opcode // conservatively. - if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) + if (SrcMI->getOpcode() == TargetOpcode::COPY && + SrcMI->getOperand(1).getReg().isVirtual()) { + const TargetRegisterClass *RC = + MRI->getRegClass(SrcMI->getOperand(1).getReg()); + + // A COPY from an FPR will become a FMOVSWr, so do so now so that we know + // that the upper bits are zero. + if (RC != &AArch64::FPR32RegClass && + ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) || + SrcMI->getOperand(1).getSubReg() != AArch64::ssub)) + return false; + Register CpySrc = SrcMI->getOperand(1).getReg(); + if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) { + CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass); + BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(), + TII->get(TargetOpcode::COPY), CpySrc) + .add(SrcMI->getOperand(1)); + } + BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(), + TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg()) + .addReg(CpySrc); + SrcMI->eraseFromParent(); + } + else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) return false; Register DefReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(2).getReg(); MRI->replaceRegWith(DefReg, SrcReg); MRI->clearKillFlags(SrcReg); - // replaceRegWith changes MI's definition register. Keep it for SSA form until - // deleting MI. - MI.getOperand(0).setReg(DefReg); - ToBeRemoved.insert(&MI); - LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n"); + MI.eraseFromParent(); return true; } @@ -255,8 +272,7 @@ static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) { template bool AArch64MIPeepholeOpt::visitADDSUB( - unsigned PosOpc, unsigned NegOpc, MachineInstr &MI, - SmallSetVector &ToBeRemoved) { + unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) { // Try below transformation. // // MOVi32imm + ADDWrr ==> ADDWri + ADDWri @@ -271,25 +287,65 @@ bool AArch64MIPeepholeOpt::visitADDSUB( // multiple `mov` + `and/sub` instructions. return splitTwoPartImm( - MI, ToBeRemoved, + MI, [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0, - T &Imm1) -> Optional { + T &Imm1) -> Optional { if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) - return PosOpc; + return std::make_pair(PosOpc, PosOpc); if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) - return NegOpc; + return std::make_pair(NegOpc, NegOpc); return None; }, - [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0, + [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, unsigned Imm1, Register SrcReg, Register NewTmpReg, Register NewDstReg) { DebugLoc DL = MI.getDebugLoc(); MachineBasicBlock *MBB = MI.getParent(); - BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg) + BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) .addReg(SrcReg) .addImm(Imm0) .addImm(12); - BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg) + BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) + .addReg(NewTmpReg) + .addImm(Imm1) + .addImm(0); + }); +} + +template +bool AArch64MIPeepholeOpt::visitADDSSUBS( + OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) { + // Try the same transformation as ADDSUB but with additional requirement + // that the condition code usages are only for Equal and Not Equal + return splitTwoPartImm( + MI, + [PosOpcs, NegOpcs, &MI, &TRI = TRI, &MRI = MRI]( + T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional { + OpcodePair OP; + if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) + OP = PosOpcs; + else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) + OP = NegOpcs; + else + return None; + // Check conditional uses last since it is expensive for scanning + // proceeding instructions + MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg()); + Optional NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI); + if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V) + return None; + return OP; + }, + [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, + unsigned Imm1, Register SrcReg, Register NewTmpReg, + Register NewDstReg) { + DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock *MBB = MI.getParent(); + BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) + .addReg(SrcReg) + .addImm(Imm0) + .addImm(12); + BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) .addReg(NewTmpReg) .addImm(Imm1) .addImm(0); @@ -338,7 +394,7 @@ bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI, template bool AArch64MIPeepholeOpt::splitTwoPartImm( - MachineInstr &MI, SmallSetVector &ToBeRemoved, + MachineInstr &MI, SplitAndOpcFunc SplitAndOpc, BuildMIFunc BuildInstr) { unsigned RegSize = sizeof(T) * 8; assert((RegSize == 32 || RegSize == 64) && @@ -357,39 +413,63 @@ bool AArch64MIPeepholeOpt::splitTwoPartImm( // number since it was sign extended when we assign to the 64-bit Imm. if (SubregToRegMI) Imm &= 0xFFFFFFFF; - unsigned Opcode; + OpcodePair Opcode; if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1)) - Opcode = R.getValue(); + Opcode = *R; else return false; - // Create new ADD/SUB MIs. + // Create new MIs using the first and second opcodes. Opcodes might differ for + // flag setting operations that should only set flags on second instruction. + // NewTmpReg = Opcode.first SrcReg Imm0 + // NewDstReg = Opcode.second NewTmpReg Imm1 + + // Determine register classes for destinations and register operands MachineFunction *MF = MI.getMF(); - const TargetRegisterClass *RC = - TII->getRegClass(TII->get(Opcode), 0, TRI, *MF); - const TargetRegisterClass *ORC = - TII->getRegClass(TII->get(Opcode), 1, TRI, *MF); + const TargetRegisterClass *FirstInstrDstRC = + TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF); + const TargetRegisterClass *FirstInstrOperandRC = + TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF); + const TargetRegisterClass *SecondInstrDstRC = + (Opcode.first == Opcode.second) + ? FirstInstrDstRC + : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF); + const TargetRegisterClass *SecondInstrOperandRC = + (Opcode.first == Opcode.second) + ? FirstInstrOperandRC + : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF); + + // Get old registers destinations and new register destinations Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); - Register NewTmpReg = MRI->createVirtualRegister(RC); - Register NewDstReg = MRI->createVirtualRegister(RC); - - MRI->constrainRegClass(SrcReg, RC); - MRI->constrainRegClass(NewTmpReg, ORC); - MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); - + Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC); + // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to + // reuse that same destination register. + Register NewDstReg = DstReg.isVirtual() + ? MRI->createVirtualRegister(SecondInstrDstRC) + : DstReg; + + // Constrain registers based on their new uses + MRI->constrainRegClass(SrcReg, FirstInstrOperandRC); + MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC); + if (DstReg != NewDstReg) + MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); + + // Call the delegating operation to build the instruction BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg); - MRI->replaceRegWith(DstReg, NewDstReg); // replaceRegWith changes MI's definition register. Keep it for SSA form until - // deleting MI. - MI.getOperand(0).setReg(DstReg); + // deleting MI. Only if we made a new destination register. + if (DstReg != NewDstReg) { + MRI->replaceRegWith(DstReg, NewDstReg); + MI.getOperand(0).setReg(DstReg); + } // Record the MIs need to be removed. - ToBeRemoved.insert(&MI); + MI.eraseFromParent(); if (SubregToRegMI) - ToBeRemoved.insert(SubregToRegMI); - ToBeRemoved.insert(MovMI); + SubregToRegMI->eraseFromParent(); + MovMI->eraseFromParent(); return true; } @@ -407,45 +487,57 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { assert(MRI->isSSA() && "Expected to be run on SSA form!"); bool Changed = false; - SmallSetVector ToBeRemoved; for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { + for (MachineInstr &MI : make_early_inc_range(MBB)) { switch (MI.getOpcode()) { default: break; case AArch64::ANDWrr: - Changed = visitAND(AArch64::ANDWri, MI, ToBeRemoved); + Changed = visitAND(AArch64::ANDWri, MI); break; case AArch64::ANDXrr: - Changed = visitAND(AArch64::ANDXri, MI, ToBeRemoved); + Changed = visitAND(AArch64::ANDXri, MI); break; case AArch64::ORRWrs: - Changed = visitORR(MI, ToBeRemoved); + Changed = visitORR(MI); break; case AArch64::ADDWrr: - Changed = visitADDSUB(AArch64::ADDWri, AArch64::SUBWri, MI, - ToBeRemoved); + Changed = visitADDSUB(AArch64::ADDWri, AArch64::SUBWri, MI); break; case AArch64::SUBWrr: - Changed = visitADDSUB(AArch64::SUBWri, AArch64::ADDWri, MI, - ToBeRemoved); + Changed = visitADDSUB(AArch64::SUBWri, AArch64::ADDWri, MI); break; case AArch64::ADDXrr: - Changed = visitADDSUB(AArch64::ADDXri, AArch64::SUBXri, MI, - ToBeRemoved); + Changed = visitADDSUB(AArch64::ADDXri, AArch64::SUBXri, MI); break; case AArch64::SUBXrr: - Changed = visitADDSUB(AArch64::SUBXri, AArch64::ADDXri, MI, - ToBeRemoved); + Changed = visitADDSUB(AArch64::SUBXri, AArch64::ADDXri, MI); + break; + case AArch64::ADDSWrr: + Changed = visitADDSSUBS({AArch64::ADDWri, AArch64::ADDSWri}, + {AArch64::SUBWri, AArch64::SUBSWri}, + MI); + break; + case AArch64::SUBSWrr: + Changed = visitADDSSUBS({AArch64::SUBWri, AArch64::SUBSWri}, + {AArch64::ADDWri, AArch64::ADDSWri}, + MI); + break; + case AArch64::ADDSXrr: + Changed = visitADDSSUBS({AArch64::ADDXri, AArch64::ADDSXri}, + {AArch64::SUBXri, AArch64::SUBSXri}, + MI); + break; + case AArch64::SUBSXrr: + Changed = visitADDSSUBS({AArch64::SUBXri, AArch64::SUBSXri}, + {AArch64::ADDXri, AArch64::ADDSXri}, + MI); break; } } } - for (MachineInstr *MI : ToBeRemoved) - MI->eraseFromParent(); - return Changed; } diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp index 6950675c5d53..a2ab2b855d80 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -15,8 +15,11 @@ #include "AArch64MachineFunctionInfo.h" #include "AArch64InstrInfo.h" -#include -#include +#include "AArch64Subtarget.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" using namespace llvm; @@ -30,7 +33,7 @@ void yaml::AArch64FunctionInfo::mappingImpl(yaml::IO &YamlIO) { void AArch64FunctionInfo::initializeBaseYamlFields( const yaml::AArch64FunctionInfo &YamlMFI) { - if (YamlMFI.HasRedZone.hasValue()) + if (YamlMFI.HasRedZone) HasRedZone = YamlMFI.HasRedZone; } @@ -77,15 +80,17 @@ static bool ShouldSignWithBKey(const Function &F) { return Key.equals_insensitive("b_key"); } -AArch64FunctionInfo::AArch64FunctionInfo(MachineFunction &MF) : MF(MF) { +AArch64FunctionInfo::AArch64FunctionInfo(MachineFunction &MF_) : MF(&MF_) { // If we already know that the function doesn't have a redzone, set // HasRedZone here. - if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone)) + if (MF->getFunction().hasFnAttribute(Attribute::NoRedZone)) HasRedZone = false; - const Function &F = MF.getFunction(); + const Function &F = MF->getFunction(); std::tie(SignReturnAddress, SignReturnAddressAll) = GetSignReturnAddress(F); SignWithBKey = ShouldSignWithBKey(F); + // TODO: skip functions that have no instrumented allocas for optimization + IsMTETagged = F.hasFnAttribute(Attribute::SanitizeMemTag); if (!F.hasFnAttribute("branch-target-enforcement")) { if (const auto *BTE = mdconst::extract_or_null( @@ -101,6 +106,15 @@ AArch64FunctionInfo::AArch64FunctionInfo(MachineFunction &MF) : MF(MF) { BranchTargetEnforcement = BTIEnable.equals_insensitive("true"); } +MachineFunctionInfo *AArch64FunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const { + AArch64FunctionInfo *InfoClone = DestMF.cloneInfo(*this); + InfoClone->MF = &DestMF; + return InfoClone; +} + bool AArch64FunctionInfo::shouldSignReturnAddress(bool SpillsLR) const { if (!SignReturnAddress) return false; @@ -111,6 +125,27 @@ bool AArch64FunctionInfo::shouldSignReturnAddress(bool SpillsLR) const { bool AArch64FunctionInfo::shouldSignReturnAddress() const { return shouldSignReturnAddress(llvm::any_of( - MF.getFrameInfo().getCalleeSavedInfo(), + MF->getFrameInfo().getCalleeSavedInfo(), [](const auto &Info) { return Info.getReg() == AArch64::LR; })); } + +bool AArch64FunctionInfo::needsDwarfUnwindInfo() const { + if (!NeedsDwarfUnwindInfo) + NeedsDwarfUnwindInfo = MF->needsFrameMoves() && + !MF->getTarget().getMCAsmInfo()->usesWindowsCFI(); + + return *NeedsDwarfUnwindInfo; +} + +bool AArch64FunctionInfo::needsAsyncDwarfUnwindInfo() const { + if (!NeedsAsyncDwarfUnwindInfo) { + const Function &F = MF->getFunction(); + // The check got "minsize" is because epilogue unwind info is not emitted + // (yet) for homogeneous epilogues, outlined functions, and functions + // outlined from. + NeedsAsyncDwarfUnwindInfo = needsDwarfUnwindInfo() && + F.getUWTableKind() == UWTableKind::Async && + !F.hasMinSize(); + } + return *NeedsAsyncDwarfUnwindInfo; +} diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index e5e08e6c00d6..f070f989a5b7 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCLinkerOptimizationHint.h" @@ -36,7 +37,7 @@ class MachineInstr; /// contains private AArch64-specific information for each MachineFunction. class AArch64FunctionInfo final : public MachineFunctionInfo { /// Backreference to the machine function. - MachineFunction &MF; + MachineFunction *MF; /// Number of bytes of arguments this function has on the stack. If the callee /// is expected to restore the argument stack this should be a multiple of 16, @@ -115,7 +116,8 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// SRetReturnReg - sret lowering includes returning the value of the /// returned struct in a register. This field holds the virtual register into /// which the sret argument is passed. - unsigned SRetReturnReg = 0; + Register SRetReturnReg; + /// SVE stack size (for predicates and data vectors) are maintained here /// rather than in FrameInfo, as the placement and Stack IDs are target /// specific. @@ -173,9 +175,29 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// The stack slot where the Swift asynchronous context is stored. int SwiftAsyncContextFrameIdx = std::numeric_limits::max(); + bool IsMTETagged = false; + + /// The function has Scalable Vector or Scalable Predicate register argument + /// or return type + bool IsSVECC = false; + + /// True if the function need unwind information. + mutable Optional NeedsDwarfUnwindInfo; + + /// True if the function need asynchronous unwind information. + mutable Optional NeedsAsyncDwarfUnwindInfo; + public: explicit AArch64FunctionInfo(MachineFunction &MF); + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; + + bool isSVECC() const { return IsSVECC; }; + void setIsSVECC(bool s) { IsSVECC = s; }; + void initializeBaseYamlFields(const yaml::AArch64FunctionInfo &YamlMFI); unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; } @@ -395,6 +417,7 @@ public: bool shouldSignReturnAddress(bool SpillsLR) const; bool shouldSignWithBKey() const { return SignWithBKey; } + bool isMTETagged() const { return IsMTETagged; } bool branchTargetEnforcement() const { return BranchTargetEnforcement; } @@ -408,6 +431,9 @@ public: } int getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; } + bool needsDwarfUnwindInfo() const; + bool needsAsyncDwarfUnwindInfo() const; + private: // Hold the lists of LOHs. MILOHContainer LOHContainerSet; diff --git a/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp b/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp new file mode 100644 index 000000000000..6c8845ee8598 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp @@ -0,0 +1,82 @@ +//===- AArch64MachineScheduler.cpp - MI Scheduler for AArch64 -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AArch64MachineScheduler.h" +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" + +using namespace llvm; + +static bool needReorderStoreMI(const MachineInstr *MI) { + if (!MI) + return false; + + switch (MI->getOpcode()) { + default: + return false; + case AArch64::STURQi: + case AArch64::STRQui: + if (MI->getMF()->getSubtarget().isStoreAddressAscend()) + return false; + LLVM_FALLTHROUGH; + case AArch64::STPQi: + return AArch64InstrInfo::getLdStOffsetOp(*MI).isImm(); + } + + return false; +} + +// Return true if two stores with same base address may overlap writes +static bool mayOverlapWrite(const MachineInstr &MI0, const MachineInstr &MI1, + int64_t &Off0, int64_t &Off1) { + const MachineOperand &Base0 = AArch64InstrInfo::getLdStBaseOp(MI0); + const MachineOperand &Base1 = AArch64InstrInfo::getLdStBaseOp(MI1); + + // May overlapping writes if two store instructions without same base + if (!Base0.isIdenticalTo(Base1)) + return true; + + int StoreSize0 = AArch64InstrInfo::getMemScale(MI0); + int StoreSize1 = AArch64InstrInfo::getMemScale(MI1); + Off0 = AArch64InstrInfo::hasUnscaledLdStOffset(MI0.getOpcode()) + ? AArch64InstrInfo::getLdStOffsetOp(MI0).getImm() + : AArch64InstrInfo::getLdStOffsetOp(MI0).getImm() * StoreSize0; + Off1 = AArch64InstrInfo::hasUnscaledLdStOffset(MI1.getOpcode()) + ? AArch64InstrInfo::getLdStOffsetOp(MI1).getImm() + : AArch64InstrInfo::getLdStOffsetOp(MI1).getImm() * StoreSize1; + + const MachineInstr &MI = (Off0 < Off1) ? MI0 : MI1; + int Multiples = AArch64InstrInfo::isPairedLdSt(MI) ? 2 : 1; + int StoreSize = AArch64InstrInfo::getMemScale(MI) * Multiples; + + return llabs(Off0 - Off1) < StoreSize; +} + +bool AArch64PostRASchedStrategy::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand) { + bool OriginalResult = PostGenericScheduler::tryCandidate(Cand, TryCand); + + if (Cand.isValid()) { + MachineInstr *Instr0 = TryCand.SU->getInstr(); + MachineInstr *Instr1 = Cand.SU->getInstr(); + + if (!needReorderStoreMI(Instr0) || !needReorderStoreMI(Instr1)) + return OriginalResult; + + int64_t Off0, Off1; + // With the same base address and non-overlapping writes. + if (!mayOverlapWrite(*Instr0, *Instr1, Off0, Off1)) { + TryCand.Reason = NodeOrder; + // Order them by ascending offsets. + return Off0 < Off1; + } + } + + return OriginalResult; +} diff --git a/llvm/lib/Target/AArch64/AArch64MachineScheduler.h b/llvm/lib/Target/AArch64/AArch64MachineScheduler.h new file mode 100644 index 000000000000..23df015986d1 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64MachineScheduler.h @@ -0,0 +1,33 @@ +//===- AArch64MachineScheduler.h - Custom AArch64 MI scheduler --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Custom AArch64 MI scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINESCHEDULER_H +#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINESCHEDULER_H + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// A MachineSchedStrategy implementation for AArch64 post RA scheduling. +class AArch64PostRASchedStrategy : public PostGenericScheduler { +public: + AArch64PostRASchedStrategy(const MachineSchedContext *C) : + PostGenericScheduler(C) {} + +protected: + bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override; +}; + +} // end namespace llvm + +#endif + diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp index e8217eaf6ed5..c7657f37d16d 100644 --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -157,16 +157,19 @@ static bool isCryptoEORPair(const MachineInstr *FirstMI, return false; } -/// Literal generation. -static bool isLiteralsPair(const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { +static bool isAdrpAddPair(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { // Assume the 1st instr to be a wildcard if it is unspecified. - - // PC relative address. if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::ADRP) && SecondMI.getOpcode() == AArch64::ADDXri) return true; + return false; +} +/// Literal generation. +static bool isLiteralsPair(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + // Assume the 1st instr to be a wildcard if it is unspecified. // 32 bit immediate. if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZWi) && (SecondMI.getOpcode() == AArch64::MOVKWi && @@ -397,6 +400,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, return true; if (ST.hasFuseCryptoEOR() && isCryptoEORPair(FirstMI, SecondMI)) return true; + if (ST.hasFuseAdrpAdd() && isAdrpAddPair(FirstMI, SecondMI)) + return true; if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI)) return true; if (ST.hasFuseAddress() && isAddressLdStPair(FirstMI, SecondMI)) diff --git a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h index f443cd03935c..4555f1a3ebb0 100644 --- a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h +++ b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h @@ -14,6577 +14,6608 @@ #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H #define LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H +#include "llvm/ADT/ArrayRef.h" + // 31 entries have cost 0 -// 242 entries have cost 1 -// 1447 entries have cost 2 -// 3602 entries have cost 3 -// 1237 entries have cost 4 -// 2 entries have cost 5 +// 756 entries have cost 1 +// 3690 entries have cost 2 +// 2084 entries have cost 3 // This table is 6561*4 = 26244 bytes in size. -static const unsigned PerfectShuffleTable[6561+1] = { - 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS - 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS - 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0> - 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0> - 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS - 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3> - 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3> - 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0> - 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS - 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0> - 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS - 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS - 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0> - 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5> - 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7> - 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1> - 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1> - 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS - 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0> - 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1> - 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS - 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0> - 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6> - 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6> - 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7> - 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2> - 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS - 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0> - 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0> - 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0> - 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3> - 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6> - 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6> - 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7> - 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0> - 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0> - 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1> - 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS - 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS - 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4> - 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6> - 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS - 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS - 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5> - 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS - 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7> - 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3> - 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7> - 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5> - 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6> - 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5> - 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7> - 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7> - 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7> - 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7> - 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS - 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3> - 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7> - 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS - 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0> - 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6> - 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0> - 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0> - 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0> - 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0> - 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7> - 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0> - 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6> - 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0> - 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7> - 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7> - 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0> - 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS - 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS - 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS - 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u> - 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS - 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS - 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS - 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u> - 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS - 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1> - 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS - 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1> - 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0> - 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5> - 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7> - 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1> - 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0> - 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS - 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1> - 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1> - 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0> - 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3> - 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS - 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7> - 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3> - 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1> - 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS - 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS - 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1> - 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2> - 835584U, // <0,1,2,3>: Cost 0 copy LHS - 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS - 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7> - 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7> - 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2> - 835584U, // <0,1,2,u>: Cost 0 copy LHS - 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0> - 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3> - 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0> - 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0> - 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS - 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7> - 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0> - 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1> - 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3> - 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS - 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1> - 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1> - 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4> - 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS - 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS - 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS - 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4> - 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS - 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1> - 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1> - 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0> - 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7> - 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6> - 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1> - 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1> - 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1> - 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7> - 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS - 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7> - 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1> - 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7> - 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS - 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7> - 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1> - 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1> - 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1> - 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0> - 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1> - 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0> - 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1> - 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6> - 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0> - 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0> - 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7> - 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2> - 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS - 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS - 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS - 835584U, // <0,1,u,3>: Cost 0 copy LHS - 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS - 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS - 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS - 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u> - 835584U, // <0,1,u,u>: Cost 0 copy LHS - 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0> - 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS - 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS - 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0> - 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6> - 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7> - 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7> - 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0> - 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS - 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2> - 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1> - 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2> - 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2> - 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS - 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7> - 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7> - 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7> - 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2> - 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2> - 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2> - 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2> - 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3> - 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS - 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3> - 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7> - 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2> - 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS - 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2> - 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2> - 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3> - 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3> - 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6> - 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6> - 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3> - 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0> - 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2> - 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS - 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3> - 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4> - 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4> - 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS - 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS - 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS - 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4> - 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS - 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7> - 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3> - 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7> - 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6> - 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6> - 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5> - 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0> - 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS - 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS - 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1> - 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2> - 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3> - 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7> - 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5> - 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6> - 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6> - 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2> - 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7> - 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2> - 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2> - 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2> - 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0> - 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6> - 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2> - 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2> - 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7> - 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2> - 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u> - 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS - 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS - 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3> - 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS - 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS - 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS - 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS - 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS - 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0> - 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2> - 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0> - 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3> - 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS - 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6> - 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7> - 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0> - 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS - 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2> - 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1> - 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3> - 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3> - 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6> - 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6> - 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1> - 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3> - 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2> - 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS - 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2> - 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2> - 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3> - 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS - 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6> - 2598154746U, // <0,3,2,6>: Cost 3 vext1 , <6,2,7,3> - 2598155258U, // <0,3,2,7>: Cost 3 vext1 , <7,0,1,2> - 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS - 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2> - 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3> - 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3> - 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3> - 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6> - 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6> - 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7> - 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7> - 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3> - 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2> - 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4> - 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4> - 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3> - 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6> - 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6> - 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS - 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4> - 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6> - 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS - 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2> - 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2> - 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7> - 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5> - 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7> - 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7> - 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0> - 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5> - 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7> - 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3> - 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7> - 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7> - 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7> - 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6> - 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6> - 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0> - 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0> - 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1> - 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3> - 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7> - 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7> - 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5> - 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7> - 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7> - 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0> - 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3> - 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS - 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u> - 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2> - 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3> - 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS - 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6> - 2598203898U, // <0,3,u,6>: Cost 3 vext1 , <6,2,7,3> - 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0> - 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS - 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4> - 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS - 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4> - 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0> - 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6> - 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1> - 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS - 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0> - 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS - 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS - 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1> - 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0> - 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1> - 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS - 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS - 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS - 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1> - 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS - 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS - 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2> - 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2> - 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4> - 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS - 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS - 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS - 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2> - 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS - 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2> - 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2> - 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4> - 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3> - 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6> - 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6> - 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS - 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4> - 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2> - 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4> - 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0> - 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3> - 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4> - 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4> - 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS - 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS - 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4> - 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS - 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS - 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0> - 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5> - 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5> - 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS - 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6> - 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS - 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5> - 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS - 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6> - 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2> - 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6> - 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0> - 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6> - 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6> - 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0> - 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4> - 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2> - 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS - 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1> - 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4> - 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4> - 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS - 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0> - 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0> - 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2> - 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0> - 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS - 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS - 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS - 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u> - 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS - 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS - 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS - 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u> - 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS - 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0> - 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS - 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2> - 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5> - 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS - 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1> - 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1> - 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS - 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS - 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS - 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3> - 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2> - 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2> - 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1> - 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5> - 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0> - 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3> - 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS - 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS - 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2> - 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7> - 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2> - 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS - 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5> - 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6> - 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS - 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS - 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2> - 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3> - 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1> - 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3> - 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5> - 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0> - 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7> - 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0> - 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0> - 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1> - 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4> - 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4> - 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5> - 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6> - 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS - 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5> - 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6> - 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS - 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0> - 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0> - 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5> - 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0> - 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5> - 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5> - 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0> - 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7> - 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7> - 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS - 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0> - 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3> - 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4> - 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS - 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0> - 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7> - 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0> - 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0> - 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS - 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0> - 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7> - 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2> - 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS - 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7> - 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0> - 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0> - 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS - 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS - 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS - 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0> - 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u> - 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u> - 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS - 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u> - 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0> - 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0> - 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS - 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS - 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2> - 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4> - 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6> - 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0> - 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0> - 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS - 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS - 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS - 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1> - 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3> - 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3> - 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS - 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1> - 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6> - 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS - 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS - 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS - 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2> - 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2> - 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1> - 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2> - 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3> - 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6> - 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS - 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS - 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2> - 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7> - 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0> - 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3> - 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6> - 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7> - 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0> - 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0> - 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0> - 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS - 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2> - 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4> - 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2> - 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS - 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS - 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0> - 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS - 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS - 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> - 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0> - 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7> - 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0> - 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5> - 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0> - 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7> - 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS - 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> - 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0> - 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3> - 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6> - 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0> - 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4> - 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6> - 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6> - 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7> - 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7> - 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1> - 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0> - 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7> - 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0> - 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5> - 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6> - 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2> - 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0> - 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1> - 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS - 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS - 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u> - 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0> - 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u> - 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS - 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0> - 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS - 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS - 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0> - 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS - 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0> - 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0> - 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5> - 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6> - 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7> - 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7> - 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS - 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1> - 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1> - 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0> - 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5> - 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1> - 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3> - 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7> - 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7> - 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1> - 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS - 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2> - 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2> - 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0> - 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS - 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7> - 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2> - 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7> - 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2> - 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2> - 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3> - 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3> - 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3> - 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6> - 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7> - 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> - 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0> - 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7> - 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS - 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4> - 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4> - 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7> - 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6> - 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS - 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7> - 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5> - 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS - 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0> - 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7> - 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5> - 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7> - 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5> - 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7> - 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7> - 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0> - 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7> - 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0> - 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6> - 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7> - 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7> - 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS - 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7> - 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6> - 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0> - 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7> - 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1> - 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0> - 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7> - 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0> - 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS - 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7> - 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7> - 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7> - 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7> - 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u> - 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u> - 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u> - 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0> - 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u> - 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS - 2669066421U, // <0,7,u,6>: Cost 3 vext2 , - 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0> - 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u> - 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS - 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS - 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS - 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, - 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS - 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, - 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS - 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0> - 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS - 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1> - 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS - 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS - 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2> - 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS - 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS - 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, - 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS - 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS - 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS - 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2> - 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS - 835584U, // <0,u,2,3>: Cost 0 copy LHS - 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS - 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, - 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS - 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2> - 835584U, // <0,u,2,u>: Cost 0 copy LHS - 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2> - 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2> - 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u> - 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3> - 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6> - 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6> - 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> - 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u> - 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2> - 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS - 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS - 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS - 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4> - 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS - 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS - 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS - 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, - 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS - 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS - 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0> - 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, - 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, - 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS - 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u> - 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS - 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS - 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS - 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS - 2262496983U, // <0,u,6,1>: Cost 3 vrev - 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u> - 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, - 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS - 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u> - 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u> - 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u> - 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u> - 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS - 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0> - 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7> - 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u> - 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS - 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, - 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7> - 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7> - 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS - 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS - 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS - 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS - 835584U, // <0,u,u,3>: Cost 0 copy LHS - 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS - 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS - 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS - 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u> - 835584U, // <0,u,u,u>: Cost 0 copy LHS - 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0> - 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1> - 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2> - 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0> - 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1> - 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0> - 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7> - 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0> - 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1> - 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS - 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1> - 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS - 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3> - 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS - 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1> - 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7> - 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2> - 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS - 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1> - 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1> - 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0> - 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1> - 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6> - 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7> - 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0> - 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2> - 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1> - 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0> - 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1> - 67944550U, // <1,0,3,2>: Cost 1 vrev LHS - 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3> - 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS - 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7> - 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7> - 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3> - 68386972U, // <1,0,3,u>: Cost 1 vrev LHS - 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1> - 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5> - 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6> - 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1> - 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1> - 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS - 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1> - 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4> - 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS - 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0> - 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS - 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS - 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5> - 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5> - 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0> - 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0> - 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS - 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS - 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1> - 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7> - 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7> - 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6> - 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1> - 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0> - 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0> - 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0> - 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0> - 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0> - 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1> - 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7> - 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0> - 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6> - 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0> - 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0> - 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7> - 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0> - 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0> - 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1> - 67985515U, // <1,0,u,2>: Cost 1 vrev LHS - 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1> - 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6> - 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS - 2669082807U, // <1,0,u,6>: Cost 3 vext2 , - 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u> - 68427937U, // <1,0,u,u>: Cost 1 vrev LHS - 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1> - 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS - 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1> - 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2> - 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5> - 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1> - 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7> - 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0> - 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1> - 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS - 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS - 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0> - 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3> - 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS - 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7> - 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7> - 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1> - 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS - 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2> - 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1> - 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2> - 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1> - 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS - 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7> - 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7> - 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0> - 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1> - 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2> - 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1> - 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2> - 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS - 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6> - 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7> - 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7> - 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3> - 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS - 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS - 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4> - 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0> - 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5> - 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS - 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS - 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS - 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4> - 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS - 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1> - 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3> - 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2> - 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7> - 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5> - 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5> - 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0> - 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS - 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7> - 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2> - 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7> - 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3> - 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7> - 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6> - 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5> - 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6> - 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0> - 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0> - 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1> - 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1> - 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3> - 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS - 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6> - 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7> - 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0> - 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7> - 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1> - 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS - 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS - 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, - 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS - 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS - 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS - 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, - 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS - 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS - 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0> - 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS - 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2> - 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1> - 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5> - 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7> - 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2> - 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1> - 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS - 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2> - 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1> - 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0> - 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS - 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS - 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7> - 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7> - 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0> - 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS - 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2> - 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2> - 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2> - 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3> - 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5> - 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7> - 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7> - 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1> - 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3> - 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS - 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2> - 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2> - 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3> - 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS - 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3> - 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3> - 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2> - 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS - 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2> - 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6> - 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4> - 2598759198U, // <1,2,4,3>: Cost 3 vext1 , <3,u,1,2> - 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4> - 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS - 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6> - 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0> - 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS - 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS - 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7> - 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2> - 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS - 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS - 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5> - 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0> - 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7> - 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS - 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1> - 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2> - 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3> - 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7> - 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5> - 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7> - 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6> - 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2> - 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7> - 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2> - 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2> - 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3> - 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1> - 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6> - 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0> - 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1> - 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1> - 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2> - 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS - 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2> - 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2> - 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2> - 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS - 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS - 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3> - 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2> - 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS - 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0> - 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS - 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2> - 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3> - 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5> - 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6> - 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7> - 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1> - 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS - 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2> - 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1> - 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3> - 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS - 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS - 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7> - 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7> - 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1> - 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS - 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0> - 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3> - 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2> - 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3> - 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4> - 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5> - 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6> - 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3> - 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u> - 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS - 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3> - 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2> - 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3> - 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS - 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5> - 2598826490U, // <1,3,3,6>: Cost 3 vext1 , <6,2,7,3> - 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7> - 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS - 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS - 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3> - 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3> - 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4> - 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS - 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS - 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6> - 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4> - 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS - 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS - 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7> - 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5> - 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5> - 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS - 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5> - 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4> - 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS - 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS - 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0> - 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1> - 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3> - 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7> - 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3> - 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, - 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3> - 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7> - 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u> - 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1> - 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3> - 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7> - 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3> - 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6> - 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5> - 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1> - 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7> - 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1> - 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS - 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u> - 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2> - 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS - 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS - 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS - 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6> - 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS - 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS - 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4> - 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS - 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4> - 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4> - 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5> - 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1> - 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2> - 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1> - 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1> - 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2> - 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4> - 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0> - 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6> - 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1> - 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS - 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS - 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0> - 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS - 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2> - 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3> - 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2> - 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1> - 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4> - 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS - 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7> - 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2> - 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS - 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS - 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4> - 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3> - 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3> - 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6> - 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5> - 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6> - 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3> - 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u> - 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1> - 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4> - 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4> - 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4> - 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4> - 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS - 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6> - 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1> - 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS - 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS - 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5> - 2598913774U, // <1,4,5,2>: Cost 3 vext1 , <2,3,u,1> - 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2> - 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS - 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS - 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS - 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS - 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS - 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS - 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1> - 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2> - 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2> - 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS - 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7> - 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7> - 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1> - 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS - 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4> - 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1> - 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4> - 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4> - 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS - 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0> - 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1> - 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2> - 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4> - 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS - 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS - 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u> - 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1> - 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, - 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1> - 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS - 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1> - 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS - 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0> - 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS - 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5> - 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4> - 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5> - 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5> - 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1> - 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1> - 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS - 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2> - 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1> - 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0> - 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7> - 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5> - 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1> - 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7> - 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS - 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3> - 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1> - 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5> - 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2> - 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1> - 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5> - 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7> - 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7> - 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS - 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1> - 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2> - 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7> - 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2> - 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3> - 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6> - 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5> - 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6> - 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS - 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS - 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1> - 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0> - 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3> - 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4> - 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4> - 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS - 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS - 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6> - 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS - 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS - 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5> - 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5> - 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3> - 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS - 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5> - 2667794530U, // <1,5,5,6>: Cost 3 vext2 , <5,6,7,0> - 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7> - 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS - 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1> - 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5> - 2667794938U, // <1,5,6,2>: Cost 3 vext2 , <6,2,7,3> - 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4> - 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6> - 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6> - 2667795256U, // <1,5,6,6>: Cost 3 vext2 , <6,6,6,6> - 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0> - 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0> - 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS - 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1> - 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1> - 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7> - 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS - 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7> - 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0> - 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1> - 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS - 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, - 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS - 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, - 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, - 1594054682U, // <1,5,u,4>: Cost 2 vext2 , - 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS - 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, - 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS - 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS - 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0> - 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS - 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6> - 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1> - 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5> - 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6> - 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6> - 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS - 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS - 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2> - 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1> - 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0> - 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3> - 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6> - 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7> - 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1> - 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS - 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS - 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1> - 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0> - 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2> - 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1> - 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS - 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7> - 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7> - 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS - 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS - 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS - 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1> - 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3> - 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3> - 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS - 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, - 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3> - 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7> - 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u> - 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1> - 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0> - 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4> - 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4> - 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5> - 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS - 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS - 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS - 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS - 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2> - 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5> - 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5> - 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5> - 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6> - 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5> - 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0> - 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS - 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS - 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1> - 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6> - 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7> - 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0> - 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS - 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7> - 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6> - 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7> - 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7> - 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1> - 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1> - 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3> - 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7> - 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5> - 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1> - 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7> - 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS - 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1> - 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1> - 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS - 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2> - 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, - 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5> - 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS - 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, - 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7> - 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1> - 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1> - 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS - 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1> - 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1> - 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1> - 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0> - 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7> - 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7> - 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS - 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS - 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1> - 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0> - 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1> - 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS - 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7> - 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7> - 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1> - 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7> - 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS - 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7> - 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2> - 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1> - 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS - 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7> - 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2> - 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2> - 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS - 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS - 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7> - 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2> - 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3> - 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS - 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3> - 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3> - 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2> - 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS - 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1> - 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1> - 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4> - 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4> - 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4> - 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS - 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0> - 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6> - 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS - 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS - 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7> - 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2> - 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5> - 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS - 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6> - 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7> - 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS - 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS - 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1> - 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0> - 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7> - 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6> - 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS - 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6> - 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6> - 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0> - 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0> - 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1> - 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1> - 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1> - 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0> - 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS - 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7> - 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7> - 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7> - 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1> - 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS - 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS - 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2> - 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u> - 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS - 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u> - 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3> - 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2> - 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS - 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u> - 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS - 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2> - 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u> - 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u> - 1658771190U, // <1,u,0,5>: Cost 2 vext3 , - 2736789248U, // <1,u,0,6>: Cost 3 vext3 , - 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1> - 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS - 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS - 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS - 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS - 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS - 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS - 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7> - 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS - 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS - 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS - 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0> - 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS - 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2> - 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3> - 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4> - 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS - 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6> - 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS - 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u> - 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS - 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2> - 115726126U, // <1,u,3,2>: Cost 1 vrev LHS - 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS - 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS - 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3> - 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3> - 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS - 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS - 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1> - 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0> - 2263217967U, // <1,u,4,2>: Cost 3 vrev - 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4> - 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS - 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS - 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6> - 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, - 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS - 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS - 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7> - 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS - 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS - 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS - 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS - 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS - 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS - 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS - 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u> - 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, - 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3> - 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, - 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4> - 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, - 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u> - 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7> - 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u> - 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u> - 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1> - 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1> - 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7> - 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS - 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u> - 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, - 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7> - 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u> - 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS - 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS - 115767091U, // <1,u,u,2>: Cost 1 vrev LHS - 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS - 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS - 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS - 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS - 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS - 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS - 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0> - 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1> - 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2> - 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0> - 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS - 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5> - 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0> - 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7> - 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2> - 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1> - 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0> - 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS - 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS - 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS - 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7> - 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1> - 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2> - 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS - 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS - 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2> - 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0> - 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2> - 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS - 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3> - 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2> - 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2> - 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS - 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0> - 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1> - 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2> - 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3> - 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS - 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5> - 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6> - 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7> - 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u> - 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS - 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5> - 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6> - 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2> - 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS - 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS - 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS - 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5> - 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6> - 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS - 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5> - 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7> - 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5> - 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5> - 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5> - 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0> - 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS - 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5> - 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0> - 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS - 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7> - 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6> - 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6> - 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0> - 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6> - 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7> - 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6> - 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> - 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3> - 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1> - 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0> - 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6> - 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2> - 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0> - 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7> - 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> - 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS - 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1> - 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS - 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2> - 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS - 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS - 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS - 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS - 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS - 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS - 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS - 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0> - 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2> - 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS - 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2> - 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0> - 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1> - 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2> - 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS - 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1> - 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0> - 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3> - 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS - 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7> - 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1> - 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2> - 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3> - 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS - 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1> - 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2> - 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0> - 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS - 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3> - 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7> - 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0> - 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0> - 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS - 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1> - 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2> - 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3> - 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS - 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5> - 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3> - 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7> - 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u> - 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5> - 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6> - 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5> - 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4> - 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS - 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS - 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS - 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6> - 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4> - 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS - 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7> - 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1> - 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7> - 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS - 3740913668U, // <2,1,5,5>: Cost 4 vext2 , <5,5,5,5> - 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5> - 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS - 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7> - 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS - 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2> - 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7> - 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS - 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS - 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5> - 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6> - 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1> - 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS - 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2> - 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1> - 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0> - 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7> - 3740915046U, // <2,1,7,4>: Cost 4 vext2 , <7,4,5,6> - 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7> - 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1> - 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u> - 2669827714U, // <2,1,7,u>: Cost 3 vext2 , <7,u,1,2> - 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS - 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1> - 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2> - 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u> - 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS - 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5> - 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3> - 2669828370U, // <2,1,u,7>: Cost 3 vext2 , - 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u> - 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2> - 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS - 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0> - 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2> - 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS - 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7> - 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6> - 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2> - 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2> - 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2> - 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1> - 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0> - 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS - 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS - 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7> - 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3> - 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1> - 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS - 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS - 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2> - 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS - 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3> - 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS - 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7> - 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7> - 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2> - 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS - 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1> - 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0> - 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2> - 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS - 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5> - 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5> - 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6> - 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3> - 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS - 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS - 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2> - 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5> - 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5> - 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS - 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS - 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS - 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4> - 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS - 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2> - 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3> - 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7> - 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS - 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5> - 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5> - 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0> - 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS - 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS - 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6> - 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3> - 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3> - 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7> - 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS - 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7> - 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6> - 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2> - 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7> - 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1> - 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5> - 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2> - 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS - 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6> - 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7> - 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7> - 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7> - 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1> - 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS - 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS - 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS - 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS - 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS - 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS - 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6> - 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS - 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS - 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0> - 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS - 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2> - 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3> - 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5> - 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7> - 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7> - 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0> - 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS - 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2> - 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1> - 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0> - 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3> - 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS - 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7> - 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7> - 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0> - 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3> - 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2> - 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1> - 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2> - 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1> - 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5> - 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7> - 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7> - 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1> - 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1> - 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2> - 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3> - 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3> - 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3> - 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6> - 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7> - 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3> - 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7> - 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2> - 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS - 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4> - 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0> - 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2> - 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS - 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS - 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6> - 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4> - 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS - 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS - 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3> - 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4> - 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6> - 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6> - 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5> - 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0> - 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7> - 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7> - 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2> - 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6> - 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3> - 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5> - 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6> - 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7> - 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6> - 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1> - 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1> - 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2> - 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3> - 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3> - 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1> - 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6> - 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2> - 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1> - 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7> - 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2> - 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, - 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS - 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, - 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, - 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, - 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS - 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, - 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, - 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS - 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4> - 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS - 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4> - 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4> - 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2> - 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1> - 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2> - 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1> - 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2> - 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2> - 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1> - 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4> - 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS - 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4> - 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7> - 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3> - 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1> - 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS - 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4> - 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3> - 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4> - 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5> - 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4> - 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS - 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS - 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0> - 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS - 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4> - 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1> - 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4> - 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3> - 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4> - 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5> - 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6> - 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7> - 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u> - 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2> - 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1> - 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2> - 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3> - 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4> - 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS - 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS - 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7> - 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS - 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5> - 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2> - 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5> - 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5> - 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS - 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5> - 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS - 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS - 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS - 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS - 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2> - 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4> - 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2> - 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS - 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS - 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6> - 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1> - 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS - 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> - 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2> - 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4> - 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4> - 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6> - 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0> - 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1> - 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7> - 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> - 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS - 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS - 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2> - 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2> - 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS - 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5> - 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS - 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS - 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS - 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0> - 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS - 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2> - 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5> - 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5> - 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5> - 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7> - 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS - 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS - 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2> - 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1> - 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0> - 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7> - 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5> - 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7> - 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2> - 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3> - 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5> - 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2> - 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1> - 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2> - 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1> - 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5> - 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1> - 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7> - 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS - 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS - 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS - 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3> - 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5> - 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3> - 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6> - 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3> - 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6> - 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7> - 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u> - 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS - 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4> - 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5> - 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4> - 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS - 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS - 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6> - 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS - 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS - 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS - 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7> - 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5> - 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5> - 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS - 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5> - 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6> - 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7> - 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7> - 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS - 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6> - 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3> - 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6> - 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5> - 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7> - 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6> - 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS - 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS - 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS - 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2> - 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7> - 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2> - 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS - 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7> - 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2> - 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS - 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS - 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS - 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS - 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u> - 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u> - 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, - 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS - 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6> - 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7> - 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS - 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0> - 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS - 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2> - 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0> - 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6> - 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6> - 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1> - 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2> - 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS - 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2> - 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1> - 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0> - 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS - 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6> - 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7> - 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7> - 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2> - 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS - 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1> - 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3> - 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2> - 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1> - 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6> - 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7> - 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6> - 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS - 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1> - 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2> - 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1> - 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7> - 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3> - 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6> - 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5> - 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6> - 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS - 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS - 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2> - 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3> - 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0> - 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4> - 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4> - 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS - 2667875700U, // <2,6,4,6>: Cost 3 vext2 , <4,6,4,6> - 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS - 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS - 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2> - 2667876048U, // <2,6,5,1>: Cost 3 vext2 , <5,1,7,3> - 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7> - 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5> - 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5> - 2667876356U, // <2,6,5,5>: Cost 3 vext2 , <5,5,5,5> - 2667876450U, // <2,6,5,6>: Cost 3 vext2 , <5,6,7,0> - 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS - 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS - 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS - 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1> - 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6> - 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3> - 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS - 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5> - 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6> - 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS - 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS - 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1> - 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2> - 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2> - 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0> - 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5> - 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7> - 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2> - 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0> - 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1> - 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, - 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS - 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, - 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS - 1594136612U, // <2,6,u,4>: Cost 2 vext2 , - 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS - 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, - 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS - 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS - 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2> - 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2> - 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2> - 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0> - 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2> - 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7> - 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7> - 2599760953U, // <2,7,0,7>: Cost 3 vext1 , <7,0,u,2> - 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2> - 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> - 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1> - 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0> - 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7> - 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5> - 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7> - 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7> - 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0> - 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> - 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2> - 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7> - 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2> - 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1> - 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6> - 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7> - 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7> - 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1> - 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7> - 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS - 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7> - 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2> - 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3> - 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS - 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3> - 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3> - 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2> - 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS - 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6> - 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4> - 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7> - 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4> - 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4> - 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS - 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u> - 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0> - 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS - 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2> - 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7> - 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7> - 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5> - 2599800118U, // <2,7,5,4>: Cost 3 vext1 , RHS - 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5> - 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7> - 2599802214U, // <2,7,5,7>: Cost 3 vext1 , <7,4,5,6> - 2599802670U, // <2,7,5,u>: Cost 3 vext1 , LHS - 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS - 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7> - 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3> - 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6> - 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS - 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6> - 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6> - 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0> - 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS - 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1> - 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2> - 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7> - 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7> - 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS - 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5> - 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7> - 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7> - 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1> - 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS - 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2> - 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2> - 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u> - 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS - 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS - 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u> - 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2> - 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS - 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0> - 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS - 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2> - 1658631909U, // <2,u,0,3>: Cost 2 vext3 , - 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5> - 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7> - 1658853120U, // <2,u,0,6>: Cost 2 vext3 , - 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS - 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS - 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2> - 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1> - 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0> - 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3> - 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5> - 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7> - 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7> - 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, - 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3> - 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS - 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1> - 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS - 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1> - 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS - 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6> - 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7> - 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS - 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS - 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2> - 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1> - 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3> - 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS - 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6> - 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5> - 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3> - 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS - 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS - 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS - 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4> - 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5> - 1190213513U, // <2,u,4,3>: Cost 2 vrev - 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS - 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS - 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6> - 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS - 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS - 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5> - 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3> - 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5> - 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, - 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6> - 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5> - 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS - 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7> - 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS - 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS - 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2> - 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3> - 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, - 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS - 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS - 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6> - 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1> - 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS - 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2> - 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u> - 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7> - 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1> - 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6> - 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2> - 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7> - 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7> - 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2> - 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS - 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS - 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS - 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS - 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS - 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS - 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS - 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS - 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS - 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0> - 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1> - 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2> - 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1> - 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1> - 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7> - 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1> - 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0> - 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2> - 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS - 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0> - 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS - 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3> - 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS - 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7> - 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7> - 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1> - 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS - 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2> - 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1> - 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0> - 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1> - 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6> - 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7> - 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7> - 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7> - 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2> - 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2> - 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0> - 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1> - 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3> - 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6> - 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7> - 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7> - 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1> - 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3> - 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4> - 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5> - 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6> - 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1> - 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6> - 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS - 2666573172U, // <3,0,4,6>: Cost 3 vext2 , <4,6,4,6> - 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4> - 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6> - 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7> - 2666573520U, // <3,0,5,1>: Cost 3 vext2 , <5,1,7,3> - 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS - 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6> - 2666573766U, // <3,0,5,4>: Cost 3 vext2 , <5,4,7,6> - 2666573828U, // <3,0,5,5>: Cost 3 vext2 , <5,5,5,5> - 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7> - 2666573992U, // <3,0,5,7>: Cost 3 vext2 , <5,7,5,7> - 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS - 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7> - 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7> - 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7> - 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2> - 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1> - 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0> - 2666574648U, // <3,0,6,6>: Cost 3 vext2 , <6,6,6,6> - 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0> - 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7> - 2666574842U, // <3,0,7,0>: Cost 3 vext2 , <7,0,1,2> - 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7> - 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0> - 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7> - 2666575206U, // <3,0,7,4>: Cost 3 vext2 , <7,4,5,6> - 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7> - 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3> - 2666575468U, // <3,0,7,7>: Cost 3 vext2 , <7,7,7,7> - 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0> - 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2> - 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1> - 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS - 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1> - 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6> - 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS - 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, - 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u> - 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS - 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS - 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS - 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1> - 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2> - 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS - 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1> - 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6> - 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0> - 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2> - 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1> - 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1> - 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1> - 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3> - 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5> - 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5> - 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5> - 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3> - 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3> - 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1> - 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3> - 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2> - 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0> - 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS - 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3> - 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7> - 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0> - 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0> - 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS - 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3> - 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0> - 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1> - 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS - 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7> - 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7> - 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3> - 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3> - 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS - 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5> - 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5> - 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5> - 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS - 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS - 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS - 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4> - 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5> - 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1> - 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7> - 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5> - 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7> - 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5> - 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7> - 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0> - 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS - 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7> - 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1> - 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7> - 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7> - 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7> - 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5> - 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7> - 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7> - 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0> - 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7> - 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS - 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1> - 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2> - 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS - 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS - 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7> - 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7> - 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7> - 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS - 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS - 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3> - 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0> - 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0> - 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS - 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7> - 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7> - 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS - 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3> - 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0> - 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS - 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0> - 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0> - 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5> - 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7> - 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4> - 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0> - 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS - 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2> - 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1> - 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0> - 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1> - 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS - 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7> - 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3> - 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1> - 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1> - 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1> - 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3> - 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2> - 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3> - 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5> - 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7> - 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6> - 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3> - 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3> - 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1> - 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0> - 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2> - 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3> - 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5> - 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1> - 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2> - 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0> - 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1> - 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS - 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4> - 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4> - 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5> - 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS - 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS - 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0> - 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4> - 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS - 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS - 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5> - 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7> - 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6> - 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5> - 2665263108U, // <3,2,5,5>: Cost 3 vext2 , <5,5,5,5> - 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7> - 2665263272U, // <3,2,5,7>: Cost 3 vext2 , <5,7,5,7> - 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5> - 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1> - 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3> - 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6> - 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7> - 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5> - 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7> - 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7> - 2665263950U, // <3,2,6,7>: Cost 3 vext2 , <6,7,0,1> - 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7> - 2665264122U, // <3,2,7,0>: Cost 3 vext2 , <7,0,1,2> - 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3> - 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2> - 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS - 2665264486U, // <3,2,7,4>: Cost 3 vext2 , <7,4,5,6> - 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7> - 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7> - 2665264748U, // <3,2,7,7>: Cost 3 vext2 , <7,7,7,7> - 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS - 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1> - 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS - 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2> - 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3> - 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5> - 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS - 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0> - 2665265408U, // <3,2,u,7>: Cost 3 vext2 , - 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1> - 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0> - 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2> - 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0> - 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2> - 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1> - 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2> - 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2> - 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7> - 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2> - 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3> - 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3> - 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3> - 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1> - 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS - 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3> - 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3> - 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3> - 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3> - 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS - 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3> - 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3> - 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0> - 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS - 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4> - 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3> - 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3> - 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3> - 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS - 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3> - 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3> - 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS - 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS - 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5> - 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7> - 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3> - 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS - 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS - 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4> - 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4> - 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6> - 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4> - 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6> - 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS - 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7> - 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6> - 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS - 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5> - 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5> - 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5> - 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS - 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5> - 2665934946U, // <3,3,5,6>: Cost 3 vext2 , <5,6,7,0> - 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS - 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS - 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7> - 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7> - 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7> - 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7> - 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7> - 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7> - 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6> - 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3> - 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3> - 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS - 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7> - 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7> - 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3> - 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS - 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7> - 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3> - 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7> - 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS - 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS - 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2> - 1592858504U, // <3,3,u,2>: Cost 2 vext2 , - 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS - 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS - 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6> - 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3> - 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS - 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS - 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0> - 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS - 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2> - 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4> - 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5> - 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1> - 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2> - 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0> - 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS - 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2> - 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1> - 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4> - 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3> - 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS - 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0> - 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3> - 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4> - 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4> - 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS - 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3> - 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2> - 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1> - 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3> - 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3> - 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0> - 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4> - 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0> - 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2> - 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4> - 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4> - 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3> - 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1> - 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS - 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS - 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1> - 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2> - 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS - 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4> - 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4> - 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4> - 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4> - 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS - 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6> - 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4> - 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS - 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS - 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3> - 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5> - 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2> - 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS - 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7> - 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS - 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5> - 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS - 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1> - 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6> - 2666607098U, // <3,4,6,2>: Cost 3 vext2 , <6,2,7,3> - 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6> - 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6> - 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7> - 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7> - 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4> - 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2> - 2666607610U, // <3,4,7,0>: Cost 3 vext2 , <7,0,1,2> - 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5> - 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4> - 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7> - 2666607974U, // <3,4,7,4>: Cost 3 vext2 , <7,4,5,6> - 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0> - 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0> - 2666608236U, // <3,4,7,7>: Cost 3 vext2 , <7,7,7,7> - 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4> - 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS - 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS - 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u> - 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, - 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS - 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS - 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS - 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u> - 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS - 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0> - 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS - 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5> - 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4> - 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1> - 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1> - 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1> - 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0> - 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS - 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS - 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1> - 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5> - 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5> - 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5> - 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3> - 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7> - 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3> - 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3> - 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3> - 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5> - 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2> - 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5> - 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5> - 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3> - 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7> - 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3> - 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5> - 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2> - 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5> - 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4> - 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3> - 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6> - 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5> - 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6> - 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS - 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS - 2600304742U, // <3,5,4,0>: Cost 3 vext1 , LHS - 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5> - 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4> - 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0> - 2600308022U, // <3,5,4,4>: Cost 3 vext1 , RHS - 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS - 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS - 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6> - 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6> - 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS - 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3> - 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5> - 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5> - 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS - 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5> - 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6> - 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7> - 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7> - 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1> - 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7> - 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6> - 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4> - 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5> - 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7> - 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7> - 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0> - 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0> - 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS - 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7> - 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2> - 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2> - 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS - 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7> - 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0> - 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7> - 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS - 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS - 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u> - 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2> - 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2> - 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS - 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7> - 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS - 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3> - 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS - 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS - 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2> - 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4> - 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4> - 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2> - 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7> - 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0> - 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS - 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2> - 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3> - 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1> - 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6> - 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1> - 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6> - 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3> - 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3> - 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3> - 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6> - 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS - 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3> - 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6> - 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0> - 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6> - 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6> - 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3> - 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3> - 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3> - 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2> - 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3> - 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3> - 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3> - 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6> - 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6> - 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2> - 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS - 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS - 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6> - 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3> - 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5> - 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6> - 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6> - 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6> - 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0> - 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS - 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6> - 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS - 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2> - 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7> - 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6> - 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5> - 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6> - 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6> - 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5> - 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS - 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1> - 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3> - 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3> - 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6> - 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4> - 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7> - 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6> - 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7> - 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7> - 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1> - 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7> - 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7> - 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3> - 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5> - 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1> - 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2> - 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS - 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1> - 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1> - 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2> - 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u> - 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6> - 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5> - 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6> - 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6> - 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3> - 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1> - 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0> - 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS - 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2> - 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0> - 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5> - 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0> - 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0> - 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1> - 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS - 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2> - 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1> - 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0> - 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7> - 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS - 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7> - 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7> - 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3> - 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7> - 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS - 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3> - 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2> - 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1> - 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS - 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7> - 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7> - 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3> - 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7> - 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2> - 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3> - 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3> - 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3> - 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6> - 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7> - 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7> - 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7> - 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2> - 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS - 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7> - 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7> - 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7> - 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS - 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS - 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4> - 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6> - 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS - 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2> - 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3> - 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3> - 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0> - 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5> - 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5> - 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7> - 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS - 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS - 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1> - 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0> - 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3> - 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0> - 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5> - 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4> - 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6> - 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0> - 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7> - 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS - 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7> - 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7> - 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7> - 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS - 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7> - 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7> - 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7> - 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7> - 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, - 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS - 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, - 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS - 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, - 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS - 1595545808U, // <3,7,u,6>: Cost 2 vext2 , - 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7> - 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS - 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0> - 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, - 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, - 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, - 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, - 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, - 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, - 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS - 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, - 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u> - 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u> - 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS - 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, - 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS - 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u> - 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u> - 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, - 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS - 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2> - 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3> - 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u> - 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, - 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6> - 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, - 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u> - 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, - 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, - 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, - 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3> - 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, - 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS - 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, - 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, - 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, - 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS - 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS - 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS - 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, - 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, - 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, - 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4> - 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, - 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, - 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, - 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, - 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS - 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, - 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5> - 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, - 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS - 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5> - 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS - 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, - 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS - 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, - 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6> - 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, - 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, - 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6> - 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, - 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6> - 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, - 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, - 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS - 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7> - 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2> - 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS - 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS - 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7> - 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, - 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7> - 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS - 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, - 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, - 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS - 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS - 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, - 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, - 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS - 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, - 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS - 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0> - 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1> - 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2> - 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4> - 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4> - 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0> - 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0> - 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0> - 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1> - 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS - 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4> - 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS - 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1> - 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS - 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4> - 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1> - 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1> - 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS - 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4> - 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4> - 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4> - 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0> - 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6> - 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7> - 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4> - 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2> - 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4> - 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS - 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4> - 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4> - 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4> - 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS - 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5> - 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7> - 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0> - 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4> - 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS - 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5> - 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6> - 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2> - 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS - 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS - 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2> - 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4> - 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS - 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS - 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS - 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2> - 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5> - 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS - 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7> - 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7> - 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5> - 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS - 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS - 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1> - 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS - 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6> - 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS - 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6> - 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6> - 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0> - 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS - 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2> - 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS - 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS - 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0> - 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5> - 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5> - 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0> - 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7> - 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS - 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS - 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS - 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS - 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u> - 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS - 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS - 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4> - 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u> - 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS - 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1> - 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS - 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6> - 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2> - 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5> - 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4> - 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1> - 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4> - 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS - 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2> - 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4> - 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4> - 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3> - 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5> - 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1> - 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7> - 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1> - 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3> - 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS - 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4> - 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2> - 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4> - 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS - 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3> - 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7> - 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2> - 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4> - 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS - 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3> - 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4> - 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4> - 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS - 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7> - 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> - 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2> - 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3> - 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1> - 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0> - 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5> - 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS - 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5> - 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS - 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4> - 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0> - 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS - 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS - 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2> - 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5> - 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2> - 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS - 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7> - 2600686074U, // <4,1,5,6>: Cost 3 vext1 , <6,2,7,3> - 2600686586U, // <4,1,5,7>: Cost 3 vext1 , <7,0,1,2> - 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS - 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS - 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1> - 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2> - 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS - 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS - 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6> - 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7> - 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1> - 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS - 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> - 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1> - 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1> - 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4> - 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6> - 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4> - 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7> - 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7> - 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> - 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS - 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2> - 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2> - 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4> - 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS - 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS - 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> - 2600686586U, // <4,1,u,7>: Cost 3 vext1 , <7,0,1,2> - 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS - 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2> - 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS - 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6> - 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2> - 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6> - 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7> - 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4> - 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2> - 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS - 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2> - 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1> - 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0> - 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS - 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> - 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7> - 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3> - 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3> - 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> - 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4> - 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3> - 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2> - 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3> - 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0> - 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7> - 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6> - 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2> - 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3> - 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1> - 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1> - 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2> - 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4> - 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5> - 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4> - 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4> - 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4> - 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1> - 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS - 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4> - 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4> - 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4> - 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS - 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS - 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4> - 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0> - 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS - 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS - 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0> - 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2> - 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS - 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS - 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7> - 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7> - 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS - 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS - 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS - 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2> - 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2> - 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS - 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS - 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6> - 2600767994U, // <4,2,6,6>: Cost 3 vext1 , <6,2,7,3> - 2600768506U, // <4,2,6,7>: Cost 3 vext1 , <7,0,1,2> - 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS - 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> - 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2> - 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2> - 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4> - 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4> - 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7> - 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4> - 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7> - 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> - 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS - 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2> - 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2> - 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS - 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS - 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS - 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u> - 2600784890U, // <4,2,u,7>: Cost 3 vext1 , <7,0,1,2> - 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS - 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0> - 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2> - 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4> - 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3> - 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1> - 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0> - 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0> - 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0> - 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2> - 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1> - 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1> - 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4> - 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4> - 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0> - 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3> - 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1> - 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3> - 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4> - 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS - 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3> - 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2> - 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4> - 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS - 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4> - 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3> - 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3> - 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3> - 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1> - 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1> - 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3> - 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3> - 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4> - 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7> - 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7> - 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7> - 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3> - 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1> - 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2> - 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4> - 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0> - 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5> - 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6> - 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4> - 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2> - 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1> - 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS - 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5> - 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5> - 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3> - 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS - 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5> - 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5> - 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4> - 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS - 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS - 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6> - 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6> - 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3> - 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS - 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6> - 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6> - 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4> - 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6> - 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1> - 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5> - 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7> - 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7> - 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5> - 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7> - 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7> - 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4> - 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1> - 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS - 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2> - 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u> - 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4> - 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS - 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u> - 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u> - 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4> - 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u> - 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4> - 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS - 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS - 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1> - 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0> - 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1> - 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2> - 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0> - 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS - 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2> - 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1> - 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3> - 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3> - 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3> - 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4> - 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3> - 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3> - 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3> - 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4> - 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4> - 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2> - 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4> - 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4> - 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7> - 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4> - 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4> - 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4> - 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2> - 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4> - 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3> - 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4> - 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4> - 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5> - 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4> - 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4> - 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4> - 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS - 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4> - 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2> - 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4> - 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS - 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS - 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS - 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4> - 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS - 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS - 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4> - 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5> - 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5> - 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS - 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS - 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS - 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5> - 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS - 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS - 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2> - 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5> - 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6> - 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS - 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6> - 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS - 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4> - 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS - 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2> - 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4> - 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7> - 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4> - 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4> - 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4> - 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4> - 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7> - 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4> - 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS - 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS - 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS - 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u> - 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS - 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS - 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS - 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u> - 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS - 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0> - 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS - 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5> - 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0> - 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5> - 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7> - 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5> - 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0> - 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS - 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2> - 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5> - 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0> - 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS - 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4> - 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5> - 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6> - 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3> - 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS - 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS - 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3> - 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5> - 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5> - 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5> - 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7> - 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7> - 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS - 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5> - 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2> - 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1> - 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4> - 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3> - 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0> - 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5> - 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4> - 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5> - 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2> - 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS - 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4> - 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5> - 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4> - 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS - 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS - 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5> - 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6> - 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS - 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS - 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3> - 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4> - 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2> - 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5> - 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5> - 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0> - 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS - 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS - 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS - 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6> - 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6> - 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6> - 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS - 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5> - 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6> - 27705344U, // <4,5,6,7>: Cost 0 copy RHS - 27705344U, // <4,5,6,u>: Cost 0 copy RHS - 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS - 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4> - 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7> - 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5> - 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS - 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7> - 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4> - 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4> - 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS - 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS - 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS - 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, - 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u> - 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS - 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS - 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, - 27705344U, // <4,5,u,7>: Cost 0 copy RHS - 27705344U, // <4,5,u,u>: Cost 0 copy RHS - 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0> - 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS - 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6> - 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0> - 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5> - 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7> - 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0> - 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS - 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS - 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2> - 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1> - 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0> - 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3> - 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5> - 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7> - 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7> - 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS - 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3> - 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4> - 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3> - 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2> - 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1> - 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6> - 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7> - 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7> - 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3> - 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1> - 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2> - 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6> - 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2> - 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3> - 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6> - 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6> - 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6> - 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4> - 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2> - 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS - 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3> - 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4> - 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4> - 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS - 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS - 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS - 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4> - 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS - 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS - 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3> - 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3> - 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3> - 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6> - 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5> - 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6> - 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS - 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS - 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS - 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2> - 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3> - 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2> - 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6> - 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6> - 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6> - 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS - 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS - 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2> - 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2> - 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7> - 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4> - 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6> - 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6> - 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3> - 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7> - 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2> - 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS - 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS - 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS - 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, - 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u> - 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS - 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS - 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS - 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS - 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0> - 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS - 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4> - 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4> - 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5> - 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0> - 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7> - 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4> - 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS - 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1> - 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1> - 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4> - 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5> - 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS - 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7> - 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7> - 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3> - 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1> - 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS - 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3> - 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2> - 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4> - 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7> - 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7> - 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7> - 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3> - 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7> - 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2> - 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4> - 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4> - 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3> - 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6> - 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7> - 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7> - 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4> - 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7> - 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1> - 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3> - 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7> - 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5> - 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4> - 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS - 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4> - 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7> - 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS - 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2> - 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7> - 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5> - 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5> - 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6> - 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5> - 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5> - 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7> - 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2> - 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS - 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2> - 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2> - 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2> - 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS - 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6> - 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3> - 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7> - 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS - 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS - 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4> - 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7> - 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4> - 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7> - 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7> - 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7> - 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7> - 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7> - 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS - 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS - 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2> - 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2> - 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS - 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u> - 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3> - 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7> - 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS - 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0> - 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS - 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u> - 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, - 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5> - 2265397305U, // <4,u,0,5>: Cost 3 vrev - 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u> - 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0> - 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS - 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2> - 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1> - 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS - 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3> - 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, - 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7> - 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7> - 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, - 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS - 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS - 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u> - 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2> - 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u> - 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u> - 2733864859U, // <4,u,2,5>: Cost 3 vext3 , - 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7> - 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, - 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u> - 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2> - 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2> - 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u> - 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3> - 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6> - 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, - 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> - 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u> - 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2> - 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS - 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, - 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4> - 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4> - 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS - 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS - 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS - 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, - 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS - 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS - 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS - 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5> - 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS - 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS - 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS - 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS - 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS - 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS - 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS - 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2> - 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS - 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6> - 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS - 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6> - 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS - 27705344U, // <4,u,6,7>: Cost 0 copy RHS - 27705344U, // <4,u,6,u>: Cost 0 copy RHS - 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS - 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4> - 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7> - 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u> - 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS - 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6> - 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7> - 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7> - 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS - 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS - 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS - 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS - 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u> - 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS - 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS - 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS - 27705344U, // <4,u,u,7>: Cost 0 copy RHS - 27705344U, // <4,u,u,u>: Cost 0 copy RHS - 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0> - 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1> - 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2> - 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5> - 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5> - 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0> - 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0> - 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0> - 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2> - 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS - 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1> - 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS - 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7> - 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS - 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1> - 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7> - 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2> - 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS - 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2> - 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> - 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4> - 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5> - 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5> - 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5> - 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4> - 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6> - 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5> - 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5> - 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4> - 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5> - 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5> - 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0> - 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4> - 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0> - 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7> - 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5> - 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS - 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> - 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6> - 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5> - 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS - 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS - 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5> - 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0> - 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> - 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1> - 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS - 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS - 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0> - 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5> - 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0> - 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0> - 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS - 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS - 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0> - 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS - 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7> - 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6> - 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5> - 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7> - 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6> - 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5> - 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS - 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS - 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0> - 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7> - 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2> - 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS - 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0> - 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7> - 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7> - 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS - 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2> - 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5> - 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS - 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5> - 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6> - 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS - 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u> - 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6> - 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS - 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0> - 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS - 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2> - 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2> - 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5> - 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0> - 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7> - 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0> - 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS - 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1> - 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1> - 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0> - 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3> - 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5> - 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5> - 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5> - 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5> - 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3> - 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2> - 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3> - 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2> - 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0> - 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5> - 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3> - 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7> - 3808199610U, // <5,1,2,7>: Cost 4 vext3 , <1,2,7,0> - 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0> - 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS - 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3> - 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5> - 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5> - 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5> - 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7> - 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7> - 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5> - 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3> - 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1> - 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5> - 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5> - 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5> - 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4> - 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS - 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6> - 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4> - 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1> - 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1> - 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1> - 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1> - 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7> - 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5> - 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5> - 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0> - 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7> - 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1> - 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS - 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7> - 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3> - 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7> - 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS - 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7> - 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6> - 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1> - 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1> - 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS - 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1> - 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1> - 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS - 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS - 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3> - 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6> - 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7> - 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS - 1591662326U, // <5,1,u,0>: Cost 2 vext2 , - 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS - 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5> - 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS - 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5> - 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS - 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, - 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, - 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS - 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0> - 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS - 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2> - 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2> - 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1> - 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1> - 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4> - 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0> - 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS - 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2> - 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2> - 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5> - 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5> - 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS - 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0> - 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3> - 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1> - 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5> - 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS - 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3> - 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2> - 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3> - 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5> - 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7> - 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6> - 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5> - 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3> - 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1> - 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5> - 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5> - 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5> - 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5> - 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5> - 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5> - 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7> - 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5> - 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2> - 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3> - 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5> - 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5> - 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6> - 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS - 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2> - 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4> - 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS - 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS - 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3> - 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7> - 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS - 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS - 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5> - 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7> - 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1> - 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS - 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS - 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3> - 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3> - 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7> - 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5> - 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7> - 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7> - 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1> - 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7> - 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS - 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2> - 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7> - 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS - 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS - 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5> - 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6> - 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7> - 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS - 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1> - 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS - 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u> - 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3> - 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5> - 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS - 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5> - 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u> - 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5> - 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0> - 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2> - 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0> - 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2> - 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1> - 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2> - 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0> - 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0> - 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2> - 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3> - 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1> - 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3> - 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5> - 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3> - 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7> - 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7> - 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5> - 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3> - 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1> - 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5> - 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2> - 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4> - 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5> - 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4> - 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3> - 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3> - 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4> - 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1> - 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3> - 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2> - 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3> - 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5> - 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5> - 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7> - 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5> - 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5> - 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5> - 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0> - 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3> - 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5> - 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5> - 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6> - 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5> - 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4> - 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6> - 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS - 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5> - 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5> - 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5> - 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS - 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5> - 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0> - 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5> - 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS - 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS - 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6> - 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6> - 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6> - 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS - 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0> - 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6> - 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4> - 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS - 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS - 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7> - 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2> - 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2> - 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS - 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3> - 2601513466U, // <5,3,7,6>: Cost 3 vext1 , <6,2,7,3> - 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7> - 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS - 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS - 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u> - 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2> - 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2> - 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS - 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6> - 2601521658U, // <5,3,u,6>: Cost 3 vext1 , <6,2,7,3> - 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u> - 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS - 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS - 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS - 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2> - 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5> - 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5> - 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1> - 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0> - 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0> - 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS - 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1> - 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4> - 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4> - 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7> - 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4> - 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0> - 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5> - 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1> - 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1> - 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4> - 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4> - 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4> - 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5> - 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4> - 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3> - 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3> - 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5> - 2666752099U, // <5,4,2,u>: Cost 3 vext2 , <2,u,4,5> - 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS - 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4> - 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4> - 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3> - 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4> - 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0> - 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5> - 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7> - 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4> - 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS - 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4> - 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3> - 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4> - 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4> - 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5> - 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4> - 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4> - 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5> - 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS - 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5> - 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3> - 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2> - 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS - 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5> - 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS - 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS - 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS - 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS - 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7> - 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2> - 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6> - 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS - 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5> - 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7> - 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5> - 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS - 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS - 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4> - 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7> - 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7> - 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS - 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5> - 94817590U, // <5,4,7,6>: Cost 1 vrev RHS - 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7> - 94965064U, // <5,4,7,u>: Cost 1 vrev RHS - 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS - 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u> - 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u> - 2667419628U, // <5,4,u,3>: Cost 3 vext2 , - 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS - 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5> - 94825783U, // <5,4,u,6>: Cost 1 vrev RHS - 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5> - 94973257U, // <5,4,u,u>: Cost 1 vrev RHS - 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0> - 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS - 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2> - 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2> - 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1> - 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0> - 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7> - 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS - 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS - 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2> - 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5> - 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0> - 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3> - 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5> - 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7> - 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7> - 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3> - 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5> - 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS - 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3> - 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2> - 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4> - 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3> - 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3> - 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7> - 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7> - 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4> - 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2> - 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5> - 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3> - 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5> - 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6> - 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5> - 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7> - 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1> - 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2> - 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1> - 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5> - 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3> - 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4> - 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5> - 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS - 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5> - 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6> - 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5> - 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS - 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3> - 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2> - 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2> - 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS - 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS - 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0> - 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7> - 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS - 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS - 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6> - 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3> - 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6> - 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5> - 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5> - 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6> - 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1> - 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1> - 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS - 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7> - 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5> - 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7> - 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS - 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5> - 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6> - 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS - 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS - 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS - 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS - 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5> - 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u> - 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS - 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS - 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, - 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS - 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS - 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0> - 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS - 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2> - 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4> - 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5> - 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6> - 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7> - 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS - 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS - 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2> - 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1> - 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0> - 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3> - 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6> - 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7> - 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7> - 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS - 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6> - 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2> - 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3> - 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2> - 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1> - 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6> - 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6> - 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7> - 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3> - 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6> - 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2> - 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3> - 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6> - 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3> - 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6> - 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6> - 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7> - 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS - 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6> - 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS - 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5> - 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5> - 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5> - 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6> - 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS - 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6> - 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5> - 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS - 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS - 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3> - 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6> - 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4> - 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6> - 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5> - 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1> - 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS - 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS - 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS - 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4> - 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3> - 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6> - 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS - 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6> - 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6> - 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS - 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS - 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS - 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2> - 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2> - 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2> - 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS - 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6> - 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6> - 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7> - 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS - 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS - 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS - 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2> - 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2> - 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS - 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS - 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3> - 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2> - 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS - 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0> - 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS - 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2> - 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0> - 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5> - 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7> - 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7> - 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0> - 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS - 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2> - 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1> - 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0> - 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7> - 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS - 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7> - 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7> - 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7> - 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7> - 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7> - 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3> - 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2> - 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1> - 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7> - 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7> - 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7> - 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7> - 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1> - 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2> - 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5> - 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1> - 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3> - 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6> - 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0> - 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7> - 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7> - 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2> - 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS - 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7> - 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0> - 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4> - 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS - 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS - 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6> - 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7> - 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS - 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS - 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3> - 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3> - 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7> - 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS - 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5> - 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7> - 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS - 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS - 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0> - 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5> - 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2> - 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6> - 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4> - 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u> - 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6> - 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7> - 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u> - 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS - 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1> - 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2> - 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3> - 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS - 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7> - 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3> - 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7> - 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS - 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS - 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS - 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, - 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS - 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS - 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS - 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, - 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS - 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS - 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0> - 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS - 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2> - 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, - 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, - 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, - 2266134675U, // <5,u,0,6>: Cost 3 vrev - 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0> - 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS - 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2> - 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1> - 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS - 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u> - 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u> - 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, - 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u> - 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS - 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS - 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0> - 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> - 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2> - 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, - 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u> - 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u> - 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7> - 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, - 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1> - 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, - 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u> - 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u> - 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3> - 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u> - 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0> - 2734610422U, // <5,u,3,6>: Cost 3 vext3 , - 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u> - 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u> - 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u> - 1661163546U, // <5,u,4,1>: Cost 2 vext3 , - 2734463012U, // <5,u,4,2>: Cost 3 vext3 , - 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, - 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5> - 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS - 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6> - 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u> - 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS - 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS - 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5> - 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, - 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, - 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS - 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS - 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS - 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS - 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS - 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS - 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS - 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6> - 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, - 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS - 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS - 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6> - 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS - 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, - 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS - 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7> - 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2> - 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS - 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS - 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3> - 118708378U, // <5,u,7,6>: Cost 1 vrev RHS - 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS - 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS - 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS - 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS - 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS - 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS - 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS - 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS - 118716571U, // <5,u,u,6>: Cost 1 vrev RHS - 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS - 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS - 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0> - 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1> - 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2> - 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5> - 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6> - 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0> - 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6> - 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7> - 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2> - 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS - 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0> - 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS - 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6> - 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS - 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1> - 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1> - 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1> - 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS - 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2> - 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6> - 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6> - 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5> - 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6> - 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7> - 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6> - 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2> - 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6> - 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2> - 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4> - 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5> - 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3> - 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6> - 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6> - 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6> - 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7> - 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5> - 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6> - 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5> - 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6> - 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6> - 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6> - 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS - 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0> - 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0> - 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6> - 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS - 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6> - 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6> - 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0> - 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6> - 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6> - 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0> - 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS - 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS - 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0> - 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS - 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS - 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5> - 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0> - 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7> - 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0> - 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1> - 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS - 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS - 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0> - 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2> - 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7> - 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS - 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5> - 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0> - 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7> - 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS - 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2> - 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1> - 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS - 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5> - 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6> - 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS - 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u> - 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS - 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS - 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS - 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS - 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6> - 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2> - 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS - 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2> - 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1> - 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0> - 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2> - 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1> - 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1> - 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6> - 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3> - 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6> - 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5> - 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6> - 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1> - 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3> - 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS - 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3> - 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2> - 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0> - 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS - 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3> - 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3> - 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0> - 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0> - 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS - 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3> - 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6> - 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1> - 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6> - 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7> - 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3> - 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2> - 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3> - 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1> - 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6> - 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4> - 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6> - 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS - 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6> - 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0> - 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1> - 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6> - 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1> - 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7> - 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6> - 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7> - 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6> - 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6> - 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0> - 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS - 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7> - 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS - 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7> - 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6> - 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS - 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS - 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7> - 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6> - 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1> - 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS - 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS - 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7> - 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2> - 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS - 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS - 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5> - 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0> - 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7> - 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS - 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS - 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3> - 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6> - 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0> - 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6> - 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7> - 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u> - 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u> - 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0> - 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0> - 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS - 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2> - 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0> - 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6> - 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3> - 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4> - 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0> - 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS - 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1> - 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1> - 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0> - 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS - 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6> - 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7> - 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3> - 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1> - 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1> - 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1> - 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3> - 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2> - 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3> - 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6> - 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7> - 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6> - 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7> - 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3> - 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1> - 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0> - 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6> - 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4> - 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5> - 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6> - 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6> - 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4> - 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1> - 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2> - 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u> - 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6> - 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6> - 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6> - 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS - 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0> - 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2> - 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2> - 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3> - 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3> - 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7> - 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6> - 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5> - 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5> - 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0> - 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS - 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6> - 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1> - 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3> - 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6> - 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7> - 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5> - 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7> - 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6> - 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1> - 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7> - 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS - 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2> - 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7> - 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS - 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS - 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7> - 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6> - 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7> - 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS - 1591744256U, // <6,2,u,0>: Cost 2 vext2 , - 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS - 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6> - 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS - 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5> - 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS - 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0> - 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS - 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS - 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0> - 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2> - 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4> - 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2> - 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2> - 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2> - 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0> - 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0> - 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2> - 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3> - 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1> - 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3> - 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1> - 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> - 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3> - 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0> - 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3> - 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> - 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4> - 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> - 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2> - 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0> - 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6> - 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7> - 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7> - 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6> - 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> - 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1> - 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3> - 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3> - 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3> - 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6> - 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5> - 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7> - 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7> - 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5> - 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS - 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3> - 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3> - 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6> - 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS - 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6> - 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6> - 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4> - 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6> - 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS - 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7> - 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5> - 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5> - 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6> - 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7> - 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6> - 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0> - 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6> - 2602164326U, // <6,3,6,0>: Cost 3 vext1 , LHS - 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3> - 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6> - 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1> - 2602167524U, // <6,3,6,4>: Cost 3 vext1 , <4,4,6,6> - 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7> - 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6> - 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7> - 2602170158U, // <6,3,6,u>: Cost 3 vext1 , LHS - 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS - 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7> - 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7> - 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2> - 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS - 2602176208U, // <6,3,7,5>: Cost 3 vext1 , <5,1,7,3> - 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3> - 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7> - 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS - 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS - 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2> - 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u> - 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2> - 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS - 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6> - 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0> - 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7> - 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS - 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0> - 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS - 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6> - 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1> - 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6> - 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1> - 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2> - 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0> - 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS - 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1> - 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1> - 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0> - 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS - 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS - 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0> - 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3> - 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1> - 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1> - 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4> - 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3> - 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2> - 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1> - 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4> - 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS - 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0> - 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0> - 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4> - 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2> - 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3> - 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6> - 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3> - 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6> - 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6> - 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5> - 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7> - 2668817222U, // <6,4,3,u>: Cost 3 vext2 , <3,u,5,6> - 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS - 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4> - 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4> - 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4> - 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4> - 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS - 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6> - 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4> - 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6> - 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS - 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3> - 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5> - 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6> - 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS - 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5> - 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS - 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS - 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS - 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS - 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2> - 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2> - 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2> - 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS - 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3> - 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3> - 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2> - 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS - 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS - 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4> - 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5> - 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7> - 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS - 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5> - 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6> - 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7> - 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS - 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS - 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS - 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u> - 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6> - 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6> - 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS - 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS - 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS - 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS - 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0> - 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS - 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6> - 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2> - 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1> - 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5> - 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7> - 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0> - 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0> - 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1> - 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5> - 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0> - 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS - 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6> - 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7> - 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4> - 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3> - 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3> - 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS - 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5> - 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2> - 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5> - 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS - 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6> - 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7> - 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS - 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5> - 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2> - 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5> - 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3> - 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3> - 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6> - 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6> - 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0> - 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3> - 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3> - 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS - 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5> - 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5> - 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5> - 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS - 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS - 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0> - 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6> - 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6> - 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS - 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2> - 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2> - 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2> - 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS - 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5> - 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6> - 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7> - 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7> - 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1> - 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4> - 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4> - 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4> - 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5> - 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5> - 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6> - 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0> - 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1> - 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS - 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7> - 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7> - 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2> - 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS - 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5> - 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6> - 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS - 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS - 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS - 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u> - 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u> - 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, - 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS - 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS - 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6> - 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u> - 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u> - 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS - 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS - 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4> - 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1> - 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2> - 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3> - 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0> - 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS - 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS - 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2> - 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1> - 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0> - 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS - 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3> - 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7> - 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3> - 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3> - 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS - 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6> - 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3> - 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6> - 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1> - 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3> - 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7> - 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7> - 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3> - 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3> - 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2> - 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4> - 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6> - 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3> - 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5> - 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4> - 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6> - 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS - 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5> - 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS - 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2> - 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6> - 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6> - 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6> - 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS - 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0> - 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6> - 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6> - 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS - 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3> - 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5> - 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0> - 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6> - 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5> - 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0> - 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS - 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS - 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS - 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2> - 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3> - 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2> - 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS - 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3> - 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS - 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7> - 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS - 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS - 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7> - 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7> - 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6> - 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS - 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4> - 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6> - 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS - 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS - 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS - 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS - 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u> - 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS - 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS - 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS - 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS - 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS - 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS - 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0> - 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS - 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2> - 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0> - 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5> - 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0> - 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7> - 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2> - 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS - 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2> - 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1> - 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0> - 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3> - 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5> - 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7> - 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7> - 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2> - 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3> - 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2> - 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3> - 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2> - 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1> - 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6> - 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7> - 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7> - 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1> - 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1> - 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2> - 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3> - 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1> - 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3> - 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6> - 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7> - 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7> - 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1> - 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2> - 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1> - 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3> - 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3> - 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5> - 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4> - 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS - 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6> - 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7> - 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS - 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2> - 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3> - 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4> - 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0> - 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6> - 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5> - 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0> - 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7> - 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7> - 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2> - 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3> - 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3> - 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5> - 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6> - 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7> - 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6> - 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1> - 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1> - 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2> - 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7> - 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3> - 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7> - 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6> - 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7> - 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7> - 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7> - 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2> - 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, - 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS - 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, - 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, - 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, - 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS - 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, - 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, - 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS - 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0> - 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS - 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2> - 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, - 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5> - 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0> - 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7> - 1193130221U, // <6,u,0,7>: Cost 2 vrev - 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS - 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2> - 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1> - 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS - 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3> - 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS - 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7> - 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7> - 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, - 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS - 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS - 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3> - 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2> - 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1> - 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS - 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u> - 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7> - 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, - 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1> - 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2> - 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3> - 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3> - 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3> - 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6> - 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, - 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, - 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1> - 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2> - 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1> - 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3> - 1661245476U, // <6,u,4,2>: Cost 2 vext3 , - 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, - 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4> - 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS - 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6> - 1661614161U, // <6,u,4,7>: Cost 2 vext3 , - 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS - 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS - 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3> - 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5> - 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, - 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6> - 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5> - 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS - 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7> - 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS - 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS - 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2> - 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3> - 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, - 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS - 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5> - 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS - 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1> - 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS - 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS - 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7> - 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7> - 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS - 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS - 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5> - 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7> - 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS - 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS - 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS - 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS - 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS - 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, - 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS - 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS - 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS - 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS - 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS - 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0> - 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1> - 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2> - 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0> - 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1> - 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6> - 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0> - 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7> - 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1> - 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS - 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5> - 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS - 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7> - 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS - 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7> - 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1> - 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0> - 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS - 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2> - 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5> - 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0> - 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1> - 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6> - 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7> - 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7> - 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7> - 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2> - 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2> - 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0> - 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0> - 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3> - 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6> - 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0> - 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0> - 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7> - 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0> - 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4> - 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5> - 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6> - 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4> - 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6> - 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS - 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6> - 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5> - 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5> - 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS - 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3> - 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7> - 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0> - 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6> - 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5> - 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0> - 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7> - 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0> - 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7> - 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7> - 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7> - 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0> - 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7> - 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7> - 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6> - 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1> - 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7> - 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2> - 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS - 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS - 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7> - 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6> - 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7> - 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0> - 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7> - 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2> - 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2> - 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1> - 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS - 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u> - 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6> - 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS - 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u> - 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, - 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS - 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS - 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS - 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS - 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2> - 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS - 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1> - 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0> - 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1> - 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2> - 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1> - 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1> - 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6> - 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3> - 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5> - 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7> - 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1> - 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5> - 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3> - 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1> - 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3> - 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2> - 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0> - 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5> - 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3> - 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2> - 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0> - 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0> - 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0> - 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3> - 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0> - 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7> - 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5> - 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7> - 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7> - 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3> - 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7> - 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5> - 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5> - 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5> - 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5> - 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5> - 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS - 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1> - 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0> - 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS - 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS - 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7> - 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6> - 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7> - 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS - 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7> - 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1> - 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1> - 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7> - 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7> - 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7> - 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7> - 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7> - 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7> - 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7> - 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7> - 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1> - 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7> - 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2> - 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1> - 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3> - 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS - 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS - 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7> - 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0> - 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7> - 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS - 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS - 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3> - 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0> - 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7> - 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS - 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7> - 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7> - 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS - 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7> - 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2> - 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2> - 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0> - 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0> - 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6> - 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7> - 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1> - 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2> - 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7> - 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3> - 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0> - 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0> - 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1> - 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3> - 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0> - 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3> - 2602718850U, // <7,2,1,7>: Cost 3 vext1 , <7,u,1,2> - 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1> - 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1> - 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3> - 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2> - 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3> - 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5> - 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7> - 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6> - 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5> - 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3> - 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1> - 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5> - 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6> - 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7> - 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5> - 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7> - 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6> - 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0> - 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1> - 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6> - 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3> - 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4> - 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5> - 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6> - 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7> - 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0> - 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0> - 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7> - 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7> - 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3> - 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7> - 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7> - 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7> - 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7> - 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7> - 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0> - 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7> - 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS - 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3> - 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7> - 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7> - 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS - 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7> - 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7> - 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7> - 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7> - 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1> - 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0> - 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5> - 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS - 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS - 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7> - 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6> - 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7> - 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS - 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1> - 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5> - 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2> - 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7> - 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5> - 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7> - 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7> - 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0> - 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7> - 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0> - 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2> - 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0> - 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2> - 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1> - 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2> - 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0> - 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0> - 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2> - 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1> - 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1> - 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3> - 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5> - 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6> - 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3> - 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1> - 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5> - 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5> - 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1> - 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0> - 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2> - 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0> - 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5> - 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4> - 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3> - 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3> - 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0> - 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1> - 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3> - 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3> - 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3> - 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4> - 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7> - 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3> - 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7> - 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3> - 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1> - 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2> - 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4> - 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5> - 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5> - 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6> - 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6> - 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4> - 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6> - 2602819686U, // <7,3,5,0>: Cost 3 vext1 , LHS - 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3> - 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3> - 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7> - 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5> - 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7> - 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0> - 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0> - 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3> - 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7> - 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3> - 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3> - 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7> - 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7> - 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7> - 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6> - 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7> - 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3> - 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1> - 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5> - 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6> - 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7> - 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5> - 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7> - 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7> - 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7> - 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1> - 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u> - 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2> - 1593153452U, // <7,3,u,2>: Cost 2 vext2 , - 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3> - 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u> - 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6> - 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3> - 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0> - 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2> - 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0> - 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS - 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2> - 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1> - 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5> - 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1> - 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2> - 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1> - 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1> - 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2> - 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1> - 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3> - 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7> - 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3> - 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0> - 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3> - 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3> - 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0> - 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1> - 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3> - 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2> - 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1> - 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7> - 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3> - 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0> - 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5> - 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0> - 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2> - 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5> - 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1> - 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3> - 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6> - 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4> - 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5> - 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7> - 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4> - 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2> - 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1> - 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4> - 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3> - 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4> - 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5> - 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6> - 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7> - 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5> - 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS - 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7> - 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3> - 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5> - 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS - 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7> - 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS - 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7> - 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS - 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1> - 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5> - 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3> - 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4> - 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6> - 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7> - 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7> - 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1> - 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2> - 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2> - 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2> - 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5> - 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6> - 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1> - 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0> - 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1> - 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7> - 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7> - 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS - 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS - 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3> - 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u> - 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6> - 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1> - 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS - 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5> - 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS - 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS - 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS - 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS - 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0> - 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1> - 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1> - 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1> - 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2> - 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS - 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1> - 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1> - 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0> - 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7> - 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5> - 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3> - 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1> - 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3> - 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3> - 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1> - 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0> - 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7> - 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4> - 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7> - 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3> - 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3> - 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3> - 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4> - 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS - 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5> - 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5> - 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3> - 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS - 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5> - 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5> - 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0> - 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0> - 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS - 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7> - 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5> - 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4> - 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS - 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS - 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5> - 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6> - 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6> - 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1> - 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3> - 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3> - 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3> - 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4> - 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5> - 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6> - 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7> - 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7> - 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1> - 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7> - 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2> - 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4> - 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5> - 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7> - 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7> - 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0> - 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0> - 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS - 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3> - 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2> - 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7> - 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS - 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7> - 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0> - 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1> - 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3> - 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS - 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS - 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0> - 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u> - 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS - 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7> - 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7> - 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0> - 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3> - 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0> - 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS - 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4> - 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0> - 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2> - 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0> - 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0> - 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2> - 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS - 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1> - 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1> - 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0> - 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3> - 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS - 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7> - 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3> - 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3> - 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1> - 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1> - 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3> - 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2> - 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1> - 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5> - 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7> - 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7> - 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3> - 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3> - 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2> - 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3> - 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3> - 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3> - 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5> - 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7> - 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6> - 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0> - 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0> - 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1> - 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3> - 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5> - 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4> - 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6> - 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS - 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0> - 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5> - 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS - 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS - 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3> - 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7> - 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4> - 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6> - 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5> - 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0> - 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7> - 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5> - 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1> - 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3> - 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3> - 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4> - 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4> - 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7> - 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6> - 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7> - 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7> - 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1> - 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0> - 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2> - 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0> - 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5> - 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4> - 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6> - 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7> - 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1> - 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1> - 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS - 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2> - 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0> - 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5> - 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS - 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6> - 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3> - 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1> - 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0> - 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2> - 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0> - 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0> - 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1> - 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0> - 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0> - 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2> - 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2> - 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3> - 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1> - 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3> - 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5> - 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS - 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3> - 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1> - 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1> - 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3> - 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5> - 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0> - 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2> - 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0> - 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5> - 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3> - 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3> - 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3> - 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3> - 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1> - 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3> - 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6> - 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7> - 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5> - 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3> - 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7> - 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7> - 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7> - 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5> - 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7> - 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3> - 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4> - 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4> - 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6> - 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4> - 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6> - 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6> - 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS - 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7> - 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3> - 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7> - 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7> - 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7> - 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7> - 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5> - 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7> - 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS - 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0> - 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7> - 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7> - 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS - 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7> - 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7> - 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0> - 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7> - 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS - 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2> - 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2> - 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2> - 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS - 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7> - 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7> - 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS - 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS - 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS - 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2> - 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3> - 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0> - 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS - 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6> - 1595840756U, // <7,7,u,6>: Cost 2 vext2 , - 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS - 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS - 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0> - 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, - 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, - 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, - 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, - 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, - 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, - 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS - 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, - 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS - 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1> - 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS - 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, - 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS - 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, - 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1> - 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, - 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS - 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2> - 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, - 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2> - 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, - 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6> - 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, - 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, - 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, - 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, - 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, - 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3> - 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, - 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3> - 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, - 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, - 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, - 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, - 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, - 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, - 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, - 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, - 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, - 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4> - 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, - 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, - 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, - 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, - 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS - 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u> - 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, - 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, - 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u> - 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u> - 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS - 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, - 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS - 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, - 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, - 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u> - 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, - 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6> - 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, - 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u> - 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, - 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, - 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, - 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, - 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, - 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7> - 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, - 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7> - 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, - 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS - 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS - 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, - 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, - 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS - 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, - 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, - 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, - 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS - 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS - 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS - 135053414U, // : Cost 1 vdup0 LHS - 1611489290U, // : Cost 2 vext3 LHS, <0,0,1,1> - 1611489300U, // : Cost 2 vext3 LHS, <0,0,2,2> - 2568054923U, // : Cost 3 vext1 <3,0,0,0>, <3,0,0,0> - 1481706806U, // : Cost 2 vext1 <0,u,0,0>, RHS - 2555449040U, // : Cost 3 vext1 <0,u,0,0>, <5,1,7,3> - 2591282078U, // : Cost 3 vext1 <6,u,0,0>, <6,u,0,0> - 2591945711U, // : Cost 3 vext1 <7,0,0,0>, <7,0,0,0> - 135053414U, // : Cost 1 vdup0 LHS - 1493655654U, // : Cost 2 vext1 <2,u,0,1>, LHS - 1860550758U, // : Cost 2 vzipl LHS, LHS - 537747563U, // : Cost 1 vext3 LHS, LHS - 2625135576U, // : Cost 3 vext2 <1,2,u,0>, <1,3,1,3> - 1493658934U, // : Cost 2 vext1 <2,u,0,1>, RHS - 2625135760U, // : Cost 3 vext2 <1,2,u,0>, <1,5,3,7> - 1517548447U, // : Cost 2 vext1 <6,u,0,1>, <6,u,0,1> - 2591290362U, // : Cost 3 vext1 <6,u,0,1>, <7,0,1,2> - 537747612U, // : Cost 1 vext3 LHS, LHS - 1611489444U, // : Cost 2 vext3 LHS, <0,2,0,2> - 2685231276U, // : Cost 3 vext3 LHS, <0,2,1,1> - 1994768486U, // : Cost 2 vtrnl LHS, LHS - 2685231294U, // : Cost 3 vext3 LHS, <0,2,3,1> - 1611489484U, // : Cost 2 vext3 LHS, <0,2,4,6> - 2712068310U, // : Cost 3 vext3 RHS, <0,2,5,7> - 2625136570U, // : Cost 3 vext2 <1,2,u,0>, <2,6,3,7> - 2591962097U, // : Cost 3 vext1 <7,0,0,2>, <7,0,0,2> - 1611489516U, // : Cost 2 vext3 LHS, <0,2,u,2> - 2954067968U, // : Cost 3 vzipr LHS, <0,0,0,0> - 2685231356U, // : Cost 3 vext3 LHS, <0,3,1,0> - 72589981U, // : Cost 1 vrev LHS - 2625137052U, // : Cost 3 vext2 <1,2,u,0>, <3,3,3,3> - 2625137154U, // : Cost 3 vext2 <1,2,u,0>, <3,4,5,6> - 2639071848U, // : Cost 3 vext2 <3,5,u,0>, <3,5,u,0> - 2639735481U, // : Cost 3 vext2 <3,6,u,0>, <3,6,u,0> - 2597279354U, // : Cost 3 vext1 <7,u,0,3>, <7,u,0,3> - 73032403U, // : Cost 1 vrev LHS - 2687074636U, // : Cost 3 vext3 <0,4,0,u>, <0,4,0,u> - 1611489618U, // : Cost 2 vext3 LHS, <0,4,1,5> - 1611489628U, // : Cost 2 vext3 LHS, <0,4,2,6> - 3629222038U, // : Cost 4 vext1 <0,u,0,4>, <3,0,1,2> - 2555481398U, // : Cost 3 vext1 <0,u,0,4>, RHS - 1551396150U, // : Cost 2 vext2 <1,2,u,0>, RHS - 2651680116U, // : Cost 3 vext2 <5,6,u,0>, <4,6,4,6> - 2646150600U, // : Cost 3 vext2 <4,7,5,0>, <4,7,5,0> - 1611932050U, // : Cost 2 vext3 LHS, <0,4,u,6> - 2561458278U, // : Cost 3 vext1 <1,u,0,5>, LHS - 1863532646U, // : Cost 2 vzipl RHS, LHS - 2712068526U, // : Cost 3 vext3 RHS, <0,5,2,7> - 2649689976U, // : Cost 3 vext2 <5,3,u,0>, <5,3,u,0> - 2220237489U, // : Cost 3 vrev <0,u,4,5> - 2651680772U, // : Cost 3 vext2 <5,6,u,0>, <5,5,5,5> - 1577939051U, // : Cost 2 vext2 <5,6,u,0>, <5,6,u,0> - 2830077238U, // : Cost 3 vuzpr <1,u,3,0>, RHS - 1579266317U, // : Cost 2 vext2 <5,u,u,0>, <5,u,u,0> - 2555494502U, // : Cost 3 vext1 <0,u,0,6>, LHS - 2712068598U, // : Cost 3 vext3 RHS, <0,6,1,7> - 1997750374U, // : Cost 2 vtrnl RHS, LHS - 2655662673U, // : Cost 3 vext2 <6,3,u,0>, <6,3,u,0> - 2555497782U, // : Cost 3 vext1 <0,u,0,6>, RHS - 2651681459U, // : Cost 3 vext2 <5,6,u,0>, <6,5,0,u> - 2651681592U, // : Cost 3 vext2 <5,6,u,0>, <6,6,6,6> - 2651681614U, // : Cost 3 vext2 <5,6,u,0>, <6,7,0,1> - 1997750428U, // : Cost 2 vtrnl RHS, LHS - 2567446630U, // : Cost 3 vext1 <2,u,0,7>, LHS - 2567447446U, // : Cost 3 vext1 <2,u,0,7>, <1,2,3,0> - 2567448641U, // : Cost 3 vext1 <2,u,0,7>, <2,u,0,7> - 2573421338U, // : Cost 3 vext1 <3,u,0,7>, <3,u,0,7> - 2567449910U, // : Cost 3 vext1 <2,u,0,7>, RHS - 2651682242U, // : Cost 3 vext2 <5,6,u,0>, <7,5,6,u> - 2591339429U, // : Cost 3 vext1 <6,u,0,7>, <6,u,0,7> - 2651682412U, // : Cost 3 vext2 <5,6,u,0>, <7,7,7,7> - 2567452462U, // : Cost 3 vext1 <2,u,0,7>, LHS - 135053414U, // : Cost 1 vdup0 LHS - 1611489938U, // : Cost 2 vext3 LHS, <0,u,1,1> - 537748125U, // : Cost 1 vext3 LHS, LHS - 2685674148U, // : Cost 3 vext3 LHS, <0,u,3,1> - 1611932338U, // : Cost 2 vext3 LHS, <0,u,4,6> - 1551399066U, // : Cost 2 vext2 <1,2,u,0>, RHS - 1517605798U, // : Cost 2 vext1 <6,u,0,u>, <6,u,0,u> - 2830077481U, // : Cost 3 vuzpr <1,u,3,0>, RHS - 537748179U, // : Cost 1 vext3 LHS, LHS - 1544101961U, // : Cost 2 vext2 <0,0,u,1>, <0,0,u,1> - 1558036582U, // : Cost 2 vext2 <2,3,u,1>, LHS - 2619171051U, // : Cost 3 vext2 <0,2,u,1>, <0,2,u,1> - 1611490038U, // : Cost 2 vext3 LHS, <1,0,3,2> - 2555522358U, // : Cost 3 vext1 <0,u,1,0>, RHS - 2712068871U, // : Cost 3 vext3 RHS, <1,0,5,1> - 2591355815U, // : Cost 3 vext1 <6,u,1,0>, <6,u,1,0> - 2597328512U, // : Cost 3 vext1 <7,u,1,0>, <7,u,1,0> - 1611490083U, // : Cost 2 vext3 LHS, <1,0,u,2> - 1481785446U, // : Cost 2 vext1 <0,u,1,1>, LHS - 202162278U, // : Cost 1 vdup1 LHS - 2555528808U, // : Cost 3 vext1 <0,u,1,1>, <2,2,2,2> - 1611490120U, // : Cost 2 vext3 LHS, <1,1,3,3> - 1481788726U, // : Cost 2 vext1 <0,u,1,1>, RHS - 2689876828U, // : Cost 3 vext3 LHS, <1,1,5,5> - 2591364008U, // : Cost 3 vext1 <6,u,1,1>, <6,u,1,1> - 2592691274U, // : Cost 3 vext1 <7,1,1,1>, <7,1,1,1> - 202162278U, // : Cost 1 vdup1 LHS - 1499709542U, // : Cost 2 vext1 <3,u,1,2>, LHS - 2689876871U, // : Cost 3 vext3 LHS, <1,2,1,3> - 2631116445U, // : Cost 3 vext2 <2,2,u,1>, <2,2,u,1> - 835584U, // : Cost 0 copy LHS - 1499712822U, // : Cost 2 vext1 <3,u,1,2>, RHS - 2689876907U, // : Cost 3 vext3 LHS, <1,2,5,3> - 2631780282U, // : Cost 3 vext2 <2,3,u,1>, <2,6,3,7> - 1523603074U, // : Cost 2 vext1 <7,u,1,2>, <7,u,1,2> - 835584U, // : Cost 0 copy LHS - 1487773798U, // : Cost 2 vext1 <1,u,1,3>, LHS - 1611490264U, // : Cost 2 vext3 LHS, <1,3,1,3> - 2685232094U, // : Cost 3 vext3 LHS, <1,3,2,0> - 2018746470U, // : Cost 2 vtrnr LHS, LHS - 1487777078U, // : Cost 2 vext1 <1,u,1,3>, RHS - 1611490304U, // : Cost 2 vext3 LHS, <1,3,5,7> - 2685674505U, // : Cost 3 vext3 LHS, <1,3,6,7> - 2640407307U, // : Cost 3 vext2 <3,7,u,1>, <3,7,u,1> - 1611490327U, // : Cost 2 vext3 LHS, <1,3,u,3> - 1567992749U, // : Cost 2 vext2 <4,0,u,1>, <4,0,u,1> - 2693121070U, // : Cost 3 vext3 <1,4,1,u>, <1,4,1,u> - 2693194807U, // : Cost 3 vext3 <1,4,2,u>, <1,4,2,u> - 1152386432U, // : Cost 2 vrev <1,u,3,4> - 2555555126U, // : Cost 3 vext1 <0,u,1,4>, RHS - 1558039862U, // : Cost 2 vext2 <2,3,u,1>, RHS - 2645716371U, // : Cost 3 vext2 <4,6,u,1>, <4,6,u,1> - 2597361284U, // : Cost 3 vext1 <7,u,1,4>, <7,u,1,4> - 1152755117U, // : Cost 2 vrev <1,u,u,4> - 1481818214U, // : Cost 2 vext1 <0,u,1,5>, LHS - 2555560694U, // : Cost 3 vext1 <0,u,1,5>, <1,0,3,2> - 2555561576U, // : Cost 3 vext1 <0,u,1,5>, <2,2,2,2> - 1611490448U, // : Cost 2 vext3 LHS, <1,5,3,7> - 1481821494U, // : Cost 2 vext1 <0,u,1,5>, RHS - 2651025435U, // : Cost 3 vext2 <5,5,u,1>, <5,5,u,1> - 2651689068U, // : Cost 3 vext2 <5,6,u,1>, <5,6,u,1> - 2823966006U, // : Cost 3 vuzpr <0,u,1,1>, RHS - 1611932861U, // : Cost 2 vext3 LHS, <1,5,u,7> - 2555568230U, // : Cost 3 vext1 <0,u,1,6>, LHS - 2689877199U, // : Cost 3 vext3 LHS, <1,6,1,7> - 2712069336U, // : Cost 3 vext3 RHS, <1,6,2,7> - 2685232353U, // : Cost 3 vext3 LHS, <1,6,3,7> - 2555571510U, // : Cost 3 vext1 <0,u,1,6>, RHS - 2689877235U, // : Cost 3 vext3 LHS, <1,6,5,7> - 2657661765U, // : Cost 3 vext2 <6,6,u,1>, <6,6,u,1> - 1584583574U, // : Cost 2 vext2 <6,7,u,1>, <6,7,u,1> - 1585247207U, // : Cost 2 vext2 <6,u,u,1>, <6,u,u,1> - 2561548390U, // : Cost 3 vext1 <1,u,1,7>, LHS - 2561549681U, // : Cost 3 vext1 <1,u,1,7>, <1,u,1,7> - 2573493926U, // : Cost 3 vext1 <3,u,1,7>, <2,3,0,1> - 2042962022U, // : Cost 2 vtrnr RHS, LHS - 2561551670U, // : Cost 3 vext1 <1,u,1,7>, RHS - 2226300309U, // : Cost 3 vrev <1,u,5,7> - 2658325990U, // : Cost 3 vext2 <6,7,u,1>, <7,6,1,u> - 2658326124U, // : Cost 3 vext2 <6,7,u,1>, <7,7,7,7> - 2042962027U, // : Cost 2 vtrnr RHS, LHS - 1481842790U, // : Cost 2 vext1 <0,u,1,u>, LHS - 202162278U, // : Cost 1 vdup1 LHS - 2685674867U, // : Cost 3 vext3 LHS, <1,u,2,0> - 835584U, // : Cost 0 copy LHS - 1481846070U, // : Cost 2 vext1 <0,u,1,u>, RHS - 1611933077U, // : Cost 2 vext3 LHS, <1,u,5,7> - 2685674910U, // : Cost 3 vext3 LHS, <1,u,6,7> - 1523652232U, // : Cost 2 vext1 <7,u,1,u>, <7,u,1,u> - 835584U, // : Cost 0 copy LHS - 1544110154U, // : Cost 2 vext2 <0,0,u,2>, <0,0,u,2> - 1545437286U, // : Cost 2 vext2 <0,2,u,2>, LHS - 1545437420U, // : Cost 2 vext2 <0,2,u,2>, <0,2,u,2> - 2685232589U, // : Cost 3 vext3 LHS, <2,0,3,0> - 2619179346U, // : Cost 3 vext2 <0,2,u,2>, <0,4,1,5> - 2712069606U, // : Cost 3 vext3 RHS, <2,0,5,7> - 2689877484U, // : Cost 3 vext3 LHS, <2,0,6,4> - 2659656273U, // : Cost 3 vext2 <7,0,u,2>, <0,7,2,u> - 1545437853U, // : Cost 2 vext2 <0,2,u,2>, LHS - 1550082851U, // : Cost 2 vext2 <1,0,u,2>, <1,0,u,2> - 2619179828U, // : Cost 3 vext2 <0,2,u,2>, <1,1,1,1> - 2619179926U, // : Cost 3 vext2 <0,2,u,2>, <1,2,3,0> - 2685232671U, // : Cost 3 vext3 LHS, <2,1,3,1> - 2555604278U, // : Cost 3 vext1 <0,u,2,1>, RHS - 2619180176U, // : Cost 3 vext2 <0,2,u,2>, <1,5,3,7> - 2689877564U, // : Cost 3 vext3 LHS, <2,1,6,3> - 2602718850U, // : Cost 3 vext1 , <7,u,1,2> - 1158703235U, // : Cost 2 vrev <2,u,u,1> - 1481867366U, // : Cost 2 vext1 <0,u,2,2>, LHS - 2555609846U, // : Cost 3 vext1 <0,u,2,2>, <1,0,3,2> - 269271142U, // : Cost 1 vdup2 LHS - 1611490930U, // : Cost 2 vext3 LHS, <2,2,3,3> - 1481870646U, // : Cost 2 vext1 <0,u,2,2>, RHS - 2689877640U, // : Cost 3 vext3 LHS, <2,2,5,7> - 2619180986U, // : Cost 3 vext2 <0,2,u,2>, <2,6,3,7> - 2593436837U, // : Cost 3 vext1 <7,2,2,2>, <7,2,2,2> - 269271142U, // : Cost 1 vdup2 LHS - 408134301U, // : Cost 1 vext1 LHS, LHS - 1481876214U, // : Cost 2 vext1 LHS, <1,0,3,2> - 1481877096U, // : Cost 2 vext1 LHS, <2,2,2,2> - 1880326246U, // : Cost 2 vzipr LHS, LHS - 408137014U, // : Cost 1 vext1 LHS, RHS - 1529654992U, // : Cost 2 vext1 LHS, <5,1,7,3> - 1529655802U, // : Cost 2 vext1 LHS, <6,2,7,3> - 1529656314U, // : Cost 2 vext1 LHS, <7,0,1,2> - 408139566U, // : Cost 1 vext1 LHS, LHS - 1567853468U, // : Cost 2 vext2 <4,0,6,2>, <4,0,6,2> - 2561598362U, // : Cost 3 vext1 <1,u,2,4>, <1,2,3,4> - 2555627214U, // : Cost 3 vext1 <0,u,2,4>, <2,3,4,5> - 2685232918U, // : Cost 3 vext3 LHS, <2,4,3,5> - 2555628854U, // : Cost 3 vext1 <0,u,2,4>, RHS - 1545440566U, // : Cost 2 vext2 <0,2,u,2>, RHS - 1571982740U, // : Cost 2 vext2 <4,6,u,2>, <4,6,u,2> - 2592125957U, // : Cost 3 vext1 <7,0,2,4>, <7,0,2,4> - 1545440809U, // : Cost 2 vext2 <0,2,u,2>, RHS - 2555633766U, // : Cost 3 vext1 <0,u,2,5>, LHS - 2561606550U, // : Cost 3 vext1 <1,u,2,5>, <1,2,3,0> - 2689877856U, // : Cost 3 vext3 LHS, <2,5,2,7> - 2685233000U, // : Cost 3 vext3 LHS, <2,5,3,6> - 1158441059U, // : Cost 2 vrev <2,u,4,5> - 2645725188U, // : Cost 3 vext2 <4,6,u,2>, <5,5,5,5> - 2689877892U, // : Cost 3 vext3 LHS, <2,5,6,7> - 2823900470U, // : Cost 3 vuzpr <0,u,0,2>, RHS - 1158736007U, // : Cost 2 vrev <2,u,u,5> - 1481900134U, // : Cost 2 vext1 <0,u,2,6>, LHS - 2555642614U, // : Cost 3 vext1 <0,u,2,6>, <1,0,3,2> - 2555643496U, // : Cost 3 vext1 <0,u,2,6>, <2,2,2,2> - 1611491258U, // : Cost 2 vext3 LHS, <2,6,3,7> - 1481903414U, // : Cost 2 vext1 <0,u,2,6>, RHS - 2689877964U, // : Cost 3 vext3 LHS, <2,6,5,7> - 2689877973U, // : Cost 3 vext3 LHS, <2,6,6,7> - 2645726030U, // : Cost 3 vext2 <4,6,u,2>, <6,7,0,1> - 1611933671U, // : Cost 2 vext3 LHS, <2,6,u,7> - 1585919033U, // : Cost 2 vext2 <7,0,u,2>, <7,0,u,2> - 2573566710U, // : Cost 3 vext1 <3,u,2,7>, <1,0,3,2> - 2567596115U, // : Cost 3 vext1 <2,u,2,7>, <2,u,2,7> - 1906901094U, // : Cost 2 vzipr RHS, LHS - 2555653430U, // : Cost 3 vext1 <0,u,2,7>, RHS - 2800080230U, // : Cost 3 vuzpl LHS, <7,4,5,6> - 2980643164U, // : Cost 3 vzipr RHS, <0,4,2,6> - 2645726828U, // : Cost 3 vext2 <4,6,u,2>, <7,7,7,7> - 1906901099U, // : Cost 2 vzipr RHS, LHS - 408175266U, // : Cost 1 vext1 LHS, LHS - 1545443118U, // : Cost 2 vext2 <0,2,u,2>, LHS - 269271142U, // : Cost 1 vdup2 LHS - 1611491416U, // : Cost 2 vext3 LHS, <2,u,3,3> - 408177974U, // : Cost 1 vext1 LHS, RHS - 1545443482U, // : Cost 2 vext2 <0,2,u,2>, RHS - 1726339226U, // : Cost 2 vuzpl LHS, RHS - 1529697274U, // : Cost 2 vext1 LHS, <7,0,1,2> - 408180526U, // : Cost 1 vext1 LHS, LHS - 1544781824U, // : Cost 2 vext2 LHS, <0,0,0,0> - 471040156U, // : Cost 1 vext2 LHS, LHS - 1544781988U, // : Cost 2 vext2 LHS, <0,2,0,2> - 2618523900U, // : Cost 3 vext2 LHS, <0,3,1,0> - 1544782162U, // : Cost 2 vext2 LHS, <0,4,1,5> - 2238188352U, // : Cost 3 vrev <3,u,5,0> - 2623169023U, // : Cost 3 vext2 LHS, <0,6,2,7> - 2238335826U, // : Cost 3 vrev <3,u,7,0> - 471040669U, // : Cost 1 vext2 LHS, LHS - 1544782582U, // : Cost 2 vext2 LHS, <1,0,3,2> - 1544782644U, // : Cost 2 vext2 LHS, <1,1,1,1> - 1544782742U, // : Cost 2 vext2 LHS, <1,2,3,0> - 1544782808U, // : Cost 2 vext2 LHS, <1,3,1,3> - 2618524733U, // : Cost 3 vext2 LHS, <1,4,3,5> - 1544782992U, // : Cost 2 vext2 LHS, <1,5,3,7> - 2618524897U, // : Cost 3 vext2 LHS, <1,6,3,7> - 2703517987U, // : Cost 3 vext3 <3,1,7,u>, <3,1,7,u> - 1544783213U, // : Cost 2 vext2 LHS, <1,u,1,3> - 1529716838U, // : Cost 2 vext1 , LHS - 1164167966U, // : Cost 2 vrev <3,u,1,2> - 1544783464U, // : Cost 2 vext2 LHS, <2,2,2,2> - 1544783526U, // : Cost 2 vext2 LHS, <2,3,0,1> - 1529720118U, // : Cost 2 vext1 , RHS - 2618525544U, // : Cost 3 vext2 LHS, <2,5,3,6> - 1544783802U, // : Cost 2 vext2 LHS, <2,6,3,7> - 2704181620U, // : Cost 3 vext3 <3,2,7,u>, <3,2,7,u> - 1544783931U, // : Cost 2 vext2 LHS, <2,u,0,1> - 1544784022U, // : Cost 2 vext2 LHS, <3,0,1,2> - 1487922559U, // : Cost 2 vext1 <1,u,3,3>, <1,u,3,3> - 1493895256U, // : Cost 2 vext1 <2,u,3,3>, <2,u,3,3> - 336380006U, // : Cost 1 vdup3 LHS - 1544784386U, // : Cost 2 vext2 LHS, <3,4,5,6> - 2824054478U, // : Cost 3 vuzpr LHS, <2,3,4,5> - 2238286668U, // : Cost 3 vrev <3,u,6,3> - 2954069136U, // : Cost 3 vzipr LHS, <1,5,3,7> - 336380006U, // : Cost 1 vdup3 LHS - 1487929446U, // : Cost 2 vext1 <1,u,3,4>, LHS - 1487930752U, // : Cost 2 vext1 <1,u,3,4>, <1,u,3,4> - 2623171644U, // : Cost 3 vext2 LHS, <4,2,6,0> - 2561673366U, // : Cost 3 vext1 <1,u,3,4>, <3,0,1,2> - 1487932726U, // : Cost 2 vext1 <1,u,3,4>, RHS - 471043382U, // : Cost 1 vext2 LHS, RHS - 1592561012U, // : Cost 2 vext2 LHS, <4,6,4,6> - 2238368598U, // : Cost 3 vrev <3,u,7,4> - 471043625U, // : Cost 1 vext2 LHS, RHS - 2555707494U, // : Cost 3 vext1 <0,u,3,5>, LHS - 1574645465U, // : Cost 2 vext2 <5,1,u,3>, <5,1,u,3> - 2567653106U, // : Cost 3 vext1 <2,u,3,5>, <2,3,u,5> - 2555709954U, // : Cost 3 vext1 <0,u,3,5>, <3,4,5,6> - 1592561606U, // : Cost 2 vext2 LHS, <5,4,7,6> - 1592561668U, // : Cost 2 vext2 LHS, <5,5,5,5> - 1592561762U, // : Cost 2 vext2 LHS, <5,6,7,0> - 1750314294U, // : Cost 2 vuzpr LHS, RHS - 1750314295U, // : Cost 2 vuzpr LHS, RHS - 2623172897U, // : Cost 3 vext2 LHS, <6,0,1,2> - 2561688962U, // : Cost 3 vext1 <1,u,3,6>, <1,u,3,6> - 1581281795U, // : Cost 2 vext2 <6,2,u,3>, <6,2,u,3> - 2706541204U, // : Cost 3 vext3 <3,6,3,u>, <3,6,3,u> - 2623173261U, // : Cost 3 vext2 LHS, <6,4,5,6> - 1164495686U, // : Cost 2 vrev <3,u,5,6> - 1592562488U, // : Cost 2 vext2 LHS, <6,6,6,6> - 1592562510U, // : Cost 2 vext2 LHS, <6,7,0,1> - 1164716897U, // : Cost 2 vrev <3,u,u,6> - 1487954022U, // : Cost 2 vext1 <1,u,3,7>, LHS - 1487955331U, // : Cost 2 vext1 <1,u,3,7>, <1,u,3,7> - 1493928028U, // : Cost 2 vext1 <2,u,3,7>, <2,u,3,7> - 2561697942U, // : Cost 3 vext1 <1,u,3,7>, <3,0,1,2> - 1487957302U, // : Cost 2 vext1 <1,u,3,7>, RHS - 2707352311U, // : Cost 3 vext3 <3,7,5,u>, <3,7,5,u> - 2655024623U, // : Cost 3 vext2 <6,2,u,3>, <7,6,2,u> - 1592563308U, // : Cost 2 vext2 LHS, <7,7,7,7> - 1487959854U, // : Cost 2 vext1 <1,u,3,7>, LHS - 1544787667U, // : Cost 2 vext2 LHS, - 471045934U, // : Cost 1 vext2 LHS, LHS - 1549432709U, // : Cost 2 vext2 LHS, - 336380006U, // : Cost 1 vdup3 LHS - 1544788031U, // : Cost 2 vext2 LHS, - 471046298U, // : Cost 1 vext2 LHS, RHS - 1549433040U, // : Cost 2 vext2 LHS, - 1750314537U, // : Cost 2 vuzpr LHS, RHS - 471046501U, // : Cost 1 vext2 LHS, LHS - 2625167360U, // : Cost 3 vext2 <1,2,u,4>, <0,0,0,0> - 1551425638U, // : Cost 2 vext2 <1,2,u,4>, LHS - 2619195630U, // : Cost 3 vext2 <0,2,u,4>, <0,2,u,4> - 2619343104U, // : Cost 3 vext2 <0,3,1,4>, <0,3,1,4> - 2625167698U, // : Cost 3 vext2 <1,2,u,4>, <0,4,1,5> - 1638329234U, // : Cost 2 vext3 RHS, <4,0,5,1> - 1638329244U, // : Cost 2 vext3 RHS, <4,0,6,2> - 3787803556U, // : Cost 4 vext3 RHS, <4,0,7,1> - 1551426205U, // : Cost 2 vext2 <1,2,u,4>, LHS - 2555748454U, // : Cost 3 vext1 <0,u,4,1>, LHS - 2625168180U, // : Cost 3 vext2 <1,2,u,4>, <1,1,1,1> - 1551426503U, // : Cost 2 vext2 <1,2,u,4>, <1,2,u,4> - 2625168344U, // : Cost 3 vext2 <1,2,u,4>, <1,3,1,3> - 2555751734U, // : Cost 3 vext1 <0,u,4,1>, RHS - 1860554038U, // : Cost 2 vzipl LHS, RHS - 2689879022U, // : Cost 3 vext3 LHS, <4,1,6,3> - 2592248852U, // : Cost 3 vext1 <7,0,4,1>, <7,0,4,1> - 1555408301U, // : Cost 2 vext2 <1,u,u,4>, <1,u,u,4> - 2555756646U, // : Cost 3 vext1 <0,u,4,2>, LHS - 2625168943U, // : Cost 3 vext2 <1,2,u,4>, <2,1,4,u> - 2625169000U, // : Cost 3 vext2 <1,2,u,4>, <2,2,2,2> - 2619197134U, // : Cost 3 vext2 <0,2,u,4>, <2,3,4,5> - 2555759926U, // : Cost 3 vext1 <0,u,4,2>, RHS - 2712071222U, // : Cost 3 vext3 RHS, <4,2,5,3> - 1994771766U, // : Cost 2 vtrnl LHS, RHS - 2592257045U, // : Cost 3 vext1 <7,0,4,2>, <7,0,4,2> - 1994771784U, // : Cost 2 vtrnl LHS, RHS - 2625169558U, // : Cost 3 vext2 <1,2,u,4>, <3,0,1,2> - 2567709594U, // : Cost 3 vext1 <2,u,4,3>, <1,2,3,4> - 2567710817U, // : Cost 3 vext1 <2,u,4,3>, <2,u,4,3> - 2625169820U, // : Cost 3 vext2 <1,2,u,4>, <3,3,3,3> - 2625169922U, // : Cost 3 vext2 <1,2,u,4>, <3,4,5,6> - 2954069710U, // : Cost 3 vzipr LHS, <2,3,4,5> - 2954068172U, // : Cost 3 vzipr LHS, <0,2,4,6> - 3903849472U, // : Cost 4 vuzpr <1,u,3,4>, <1,3,5,7> - 2954068174U, // : Cost 3 vzipr LHS, <0,2,4,u> - 1505919078U, // : Cost 2 vext1 <4,u,4,4>, LHS - 2567717831U, // : Cost 3 vext1 <2,u,4,4>, <1,2,u,4> - 2567719010U, // : Cost 3 vext1 <2,u,4,4>, <2,u,4,4> - 2570373542U, // : Cost 3 vext1 <3,3,4,4>, <3,3,4,4> - 161926454U, // : Cost 1 vdup0 RHS - 1551428918U, // : Cost 2 vext2 <1,2,u,4>, RHS - 1638329572U, // : Cost 2 vext3 RHS, <4,4,6,6> - 2594927963U, // : Cost 3 vext1 <7,4,4,4>, <7,4,4,4> - 161926454U, // : Cost 1 vdup0 RHS - 1493983334U, // : Cost 2 vext1 <2,u,4,5>, LHS - 2689879301U, // : Cost 3 vext3 LHS, <4,5,1,3> - 1493985379U, // : Cost 2 vext1 <2,u,4,5>, <2,u,4,5> - 2567727254U, // : Cost 3 vext1 <2,u,4,5>, <3,0,1,2> - 1493986614U, // : Cost 2 vext1 <2,u,4,5>, RHS - 1863535926U, // : Cost 2 vzipl RHS, RHS - 537750838U, // : Cost 1 vext3 LHS, RHS - 2830110006U, // : Cost 3 vuzpr <1,u,3,4>, RHS - 537750856U, // : Cost 1 vext3 LHS, RHS - 1482047590U, // : Cost 2 vext1 <0,u,4,6>, LHS - 2555790070U, // : Cost 3 vext1 <0,u,4,6>, <1,0,3,2> - 2555790952U, // : Cost 3 vext1 <0,u,4,6>, <2,2,2,2> - 2555791510U, // : Cost 3 vext1 <0,u,4,6>, <3,0,1,2> - 1482050870U, // : Cost 2 vext1 <0,u,4,6>, RHS - 2689879422U, // : Cost 3 vext3 LHS, <4,6,5,7> - 1997753654U, // : Cost 2 vtrnl RHS, RHS - 2712071562U, // : Cost 3 vext3 RHS, <4,6,7,1> - 1482053422U, // : Cost 2 vext1 <0,u,4,6>, LHS - 2567741542U, // : Cost 3 vext1 <2,u,4,7>, LHS - 2567742362U, // : Cost 3 vext1 <2,u,4,7>, <1,2,3,4> - 2567743589U, // : Cost 3 vext1 <2,u,4,7>, <2,u,4,7> - 2573716286U, // : Cost 3 vext1 <3,u,4,7>, <3,u,4,7> - 2567744822U, // : Cost 3 vext1 <2,u,4,7>, RHS - 2712071624U, // : Cost 3 vext3 RHS, <4,7,5,0> - 96808489U, // : Cost 1 vrev RHS - 2651715180U, // : Cost 3 vext2 <5,6,u,4>, <7,7,7,7> - 96955963U, // : Cost 1 vrev RHS - 1482063974U, // : Cost 2 vext1 <0,u,4,u>, LHS - 1551431470U, // : Cost 2 vext2 <1,2,u,4>, LHS - 1494009958U, // : Cost 2 vext1 <2,u,4,u>, <2,u,4,u> - 2555807894U, // : Cost 3 vext1 <0,u,4,u>, <3,0,1,2> - 161926454U, // : Cost 1 vdup0 RHS - 1551431834U, // : Cost 2 vext2 <1,2,u,4>, RHS - 537751081U, // : Cost 1 vext3 LHS, RHS - 2830110249U, // : Cost 3 vuzpr <1,u,3,4>, RHS - 537751099U, // : Cost 1 vext3 LHS, RHS - 2631811072U, // : Cost 3 vext2 <2,3,u,5>, <0,0,0,0> - 1558069350U, // : Cost 2 vext2 <2,3,u,5>, LHS - 2619203823U, // : Cost 3 vext2 <0,2,u,5>, <0,2,u,5> - 2619867456U, // : Cost 3 vext2 <0,3,u,5>, <0,3,u,5> - 1546273106U, // : Cost 2 vext2 <0,4,1,5>, <0,4,1,5> - 2733010539U, // : Cost 3 vext3 LHS, <5,0,5,1> - 2597622682U, // : Cost 3 vext1 <7,u,5,0>, <6,7,u,5> - 1176539396U, // : Cost 2 vrev <5,u,7,0> - 1558069917U, // : Cost 2 vext2 <2,3,u,5>, LHS - 1505968230U, // : Cost 2 vext1 <4,u,5,1>, LHS - 2624512887U, // : Cost 3 vext2 <1,1,u,5>, <1,1,u,5> - 2631811990U, // : Cost 3 vext2 <2,3,u,5>, <1,2,3,0> - 2618541056U, // : Cost 3 vext2 <0,1,u,5>, <1,3,5,7> - 1505971510U, // : Cost 2 vext1 <4,u,5,1>, RHS - 2627167419U, // : Cost 3 vext2 <1,5,u,5>, <1,5,u,5> - 2579714554U, // : Cost 3 vext1 <4,u,5,1>, <6,2,7,3> - 1638330064U, // : Cost 2 vext3 RHS, <5,1,7,3> - 1638477529U, // : Cost 2 vext3 RHS, <5,1,u,3> - 2561802342U, // : Cost 3 vext1 <1,u,5,2>, LHS - 2561803264U, // : Cost 3 vext1 <1,u,5,2>, <1,3,5,7> - 2631149217U, // : Cost 3 vext2 <2,2,u,5>, <2,2,u,5> - 1558071026U, // : Cost 2 vext2 <2,3,u,5>, <2,3,u,5> - 2561805622U, // : Cost 3 vext1 <1,u,5,2>, RHS - 2714062607U, // : Cost 3 vext3 RHS, <5,2,5,3> - 2631813050U, // : Cost 3 vext2 <2,3,u,5>, <2,6,3,7> - 3092335926U, // : Cost 3 vtrnr <0,u,0,2>, RHS - 1561389191U, // : Cost 2 vext2 <2,u,u,5>, <2,u,u,5> - 2561810534U, // : Cost 3 vext1 <1,u,5,3>, LHS - 2561811857U, // : Cost 3 vext1 <1,u,5,3>, <1,u,5,3> - 2631813474U, // : Cost 3 vext2 <2,3,u,5>, <3,2,5,u> - 2631813532U, // : Cost 3 vext2 <2,3,u,5>, <3,3,3,3> - 2619869698U, // : Cost 3 vext2 <0,3,u,5>, <3,4,5,6> - 3001847002U, // : Cost 3 vzipr LHS, <4,4,5,5> - 2954070530U, // : Cost 3 vzipr LHS, <3,4,5,6> - 2018749750U, // : Cost 2 vtrnr LHS, RHS - 2018749751U, // : Cost 2 vtrnr LHS, RHS - 2573762662U, // : Cost 3 vext1 <3,u,5,4>, LHS - 2620017634U, // : Cost 3 vext2 <0,4,1,5>, <4,1,5,0> - 2573764338U, // : Cost 3 vext1 <3,u,5,4>, <2,3,u,5> - 2573765444U, // : Cost 3 vext1 <3,u,5,4>, <3,u,5,4> - 1570680053U, // : Cost 2 vext2 <4,4,u,5>, <4,4,u,5> - 1558072630U, // : Cost 2 vext2 <2,3,u,5>, RHS - 2645749143U, // : Cost 3 vext2 <4,6,u,5>, <4,6,u,5> - 1638330310U, // : Cost 2 vext3 RHS, <5,4,7,6> - 1558072873U, // : Cost 2 vext2 <2,3,u,5>, RHS - 1506000998U, // : Cost 2 vext1 <4,u,5,5>, LHS - 2561827984U, // : Cost 3 vext1 <1,u,5,5>, <1,5,3,7> - 2579744360U, // : Cost 3 vext1 <4,u,5,5>, <2,2,2,2> - 2579744918U, // : Cost 3 vext1 <4,u,5,5>, <3,0,1,2> - 1506004278U, // : Cost 2 vext1 <4,u,5,5>, RHS - 229035318U, // : Cost 1 vdup1 RHS - 2712072206U, // : Cost 3 vext3 RHS, <5,5,6,6> - 1638330392U, // : Cost 2 vext3 RHS, <5,5,7,7> - 229035318U, // : Cost 1 vdup1 RHS - 1500037222U, // : Cost 2 vext1 <3,u,5,6>, LHS - 2561836436U, // : Cost 3 vext1 <1,u,5,6>, <1,u,5,6> - 2567809133U, // : Cost 3 vext1 <2,u,5,6>, <2,u,5,6> - 1500040006U, // : Cost 2 vext1 <3,u,5,6>, <3,u,5,6> - 1500040502U, // : Cost 2 vext1 <3,u,5,6>, RHS - 2714062935U, // : Cost 3 vext3 RHS, <5,6,5,7> - 2712072288U, // : Cost 3 vext3 RHS, <5,6,6,7> - 27705344U, // : Cost 0 copy RHS - 27705344U, // : Cost 0 copy RHS - 1488101478U, // : Cost 2 vext1 <1,u,5,7>, LHS - 1488102805U, // : Cost 2 vext1 <1,u,5,7>, <1,u,5,7> - 2561844840U, // : Cost 3 vext1 <1,u,5,7>, <2,2,2,2> - 2561845398U, // : Cost 3 vext1 <1,u,5,7>, <3,0,1,2> - 1488104758U, // : Cost 2 vext1 <1,u,5,7>, RHS - 1638330536U, // : Cost 2 vext3 RHS, <5,7,5,7> - 2712072362U, // : Cost 3 vext3 RHS, <5,7,6,0> - 2042965302U, // : Cost 2 vtrnr RHS, RHS - 1488107310U, // : Cost 2 vext1 <1,u,5,7>, LHS - 1488109670U, // : Cost 2 vext1 <1,u,5,u>, LHS - 1488110998U, // : Cost 2 vext1 <1,u,5,u>, <1,u,5,u> - 2561853032U, // : Cost 3 vext1 <1,u,5,u>, <2,2,2,2> - 1500056392U, // : Cost 2 vext1 <3,u,5,u>, <3,u,5,u> - 1488112950U, // : Cost 2 vext1 <1,u,5,u>, RHS - 229035318U, // : Cost 1 vdup1 RHS - 2954111490U, // : Cost 3 vzipr LHS, <3,4,5,6> - 27705344U, // : Cost 0 copy RHS - 27705344U, // : Cost 0 copy RHS - 2619211776U, // : Cost 3 vext2 <0,2,u,6>, <0,0,0,0> - 1545470054U, // : Cost 2 vext2 <0,2,u,6>, LHS - 1545470192U, // : Cost 2 vext2 <0,2,u,6>, <0,2,u,6> - 2255958969U, // : Cost 3 vrev <6,u,3,0> - 1546797458U, // : Cost 2 vext2 <0,4,u,6>, <0,4,u,6> - 2720624971U, // : Cost 3 vext3 <6,0,5,u>, <6,0,5,u> - 2256180180U, // : Cost 3 vrev <6,u,6,0> - 2960682294U, // : Cost 3 vzipr <1,2,u,0>, RHS - 1545470621U, // : Cost 2 vext2 <0,2,u,6>, LHS - 1182004127U, // : Cost 2 vrev <6,u,0,1> - 2619212596U, // : Cost 3 vext2 <0,2,u,6>, <1,1,1,1> - 2619212694U, // : Cost 3 vext2 <0,2,u,6>, <1,2,3,0> - 2619212760U, // : Cost 3 vext2 <0,2,u,6>, <1,3,1,3> - 2626511979U, // : Cost 3 vext2 <1,4,u,6>, <1,4,u,6> - 2619212944U, // : Cost 3 vext2 <0,2,u,6>, <1,5,3,7> - 2714063264U, // : Cost 3 vext3 RHS, <6,1,6,3> - 2967326006U, // : Cost 3 vzipr <2,3,u,1>, RHS - 1182594023U, // : Cost 2 vrev <6,u,u,1> - 1506050150U, // : Cost 2 vext1 <4,u,6,2>, LHS - 2579792630U, // : Cost 3 vext1 <4,u,6,2>, <1,0,3,2> - 2619213416U, // : Cost 3 vext2 <0,2,u,6>, <2,2,2,2> - 2619213478U, // : Cost 3 vext2 <0,2,u,6>, <2,3,0,1> - 1506053430U, // : Cost 2 vext1 <4,u,6,2>, RHS - 2633148309U, // : Cost 3 vext2 <2,5,u,6>, <2,5,u,6> - 2619213754U, // : Cost 3 vext2 <0,2,u,6>, <2,6,3,7> - 1638330874U, // : Cost 2 vext3 RHS, <6,2,7,3> - 1638478339U, // : Cost 2 vext3 RHS, <6,2,u,3> - 2619213974U, // : Cost 3 vext2 <0,2,u,6>, <3,0,1,2> - 2255836074U, // : Cost 3 vrev <6,u,1,3> - 2255909811U, // : Cost 3 vrev <6,u,2,3> - 2619214236U, // : Cost 3 vext2 <0,2,u,6>, <3,3,3,3> - 1564715549U, // : Cost 2 vext2 <3,4,u,6>, <3,4,u,6> - 2639121006U, // : Cost 3 vext2 <3,5,u,6>, <3,5,u,6> - 3001847012U, // : Cost 3 vzipr LHS, <4,4,6,6> - 1880329526U, // : Cost 2 vzipr LHS, RHS - 1880329527U, // : Cost 2 vzipr LHS, RHS - 2567864422U, // : Cost 3 vext1 <2,u,6,4>, LHS - 2733011558U, // : Cost 3 vext3 LHS, <6,4,1,3> - 2567866484U, // : Cost 3 vext1 <2,u,6,4>, <2,u,6,4> - 2638458005U, // : Cost 3 vext2 <3,4,u,6>, <4,3,6,u> - 1570540772U, // : Cost 2 vext2 <4,4,6,6>, <4,4,6,6> - 1545473334U, // : Cost 2 vext2 <0,2,u,6>, RHS - 1572015512U, // : Cost 2 vext2 <4,6,u,6>, <4,6,u,6> - 2960715062U, // : Cost 3 vzipr <1,2,u,4>, RHS - 1545473577U, // : Cost 2 vext2 <0,2,u,6>, RHS - 2567872614U, // : Cost 3 vext1 <2,u,6,5>, LHS - 2645757648U, // : Cost 3 vext2 <4,6,u,6>, <5,1,7,3> - 2567874490U, // : Cost 3 vext1 <2,u,6,5>, <2,6,3,7> - 2576501250U, // : Cost 3 vext1 <4,3,6,5>, <3,4,5,6> - 1576660943U, // : Cost 2 vext2 <5,4,u,6>, <5,4,u,6> - 2645757956U, // : Cost 3 vext2 <4,6,u,6>, <5,5,5,5> - 2645758050U, // : Cost 3 vext2 <4,6,u,6>, <5,6,7,0> - 2824080694U, // : Cost 3 vuzpr <0,u,2,6>, RHS - 1182626795U, // : Cost 2 vrev <6,u,u,5> - 1506082918U, // : Cost 2 vext1 <4,u,6,6>, LHS - 2579825398U, // : Cost 3 vext1 <4,u,6,6>, <1,0,3,2> - 2645758458U, // : Cost 3 vext2 <4,6,u,6>, <6,2,7,3> - 2579826838U, // : Cost 3 vext1 <4,u,6,6>, <3,0,1,2> - 1506086198U, // : Cost 2 vext1 <4,u,6,6>, RHS - 2579828432U, // : Cost 3 vext1 <4,u,6,6>, <5,1,7,3> - 296144182U, // : Cost 1 vdup2 RHS - 1638331202U, // : Cost 2 vext3 RHS, <6,6,7,7> - 296144182U, // : Cost 1 vdup2 RHS - 432349286U, // : Cost 1 vext1 RHS, LHS - 1506091766U, // : Cost 2 vext1 RHS, <1,0,3,2> - 1506092648U, // : Cost 2 vext1 RHS, <2,2,2,2> - 1506093206U, // : Cost 2 vext1 RHS, <3,0,1,2> - 432352809U, // : Cost 1 vext1 RHS, RHS - 1506094800U, // : Cost 2 vext1 RHS, <5,1,7,3> - 1506095610U, // : Cost 2 vext1 RHS, <6,2,7,3> - 1906904374U, // : Cost 2 vzipr RHS, RHS - 432355118U, // : Cost 1 vext1 RHS, LHS - 432357478U, // : Cost 1 vext1 RHS, LHS - 1545475886U, // : Cost 2 vext2 <0,2,u,6>, LHS - 1506100840U, // : Cost 2 vext1 RHS, <2,2,2,2> - 1506101398U, // : Cost 2 vext1 RHS, <3,0,1,2> - 432361002U, // : Cost 1 vext1 RHS, RHS - 1545476250U, // : Cost 2 vext2 <0,2,u,6>, RHS - 296144182U, // : Cost 1 vdup2 RHS - 1880370486U, // : Cost 2 vzipr LHS, RHS - 432363310U, // : Cost 1 vext1 RHS, LHS - 1571356672U, // : Cost 2 vext2 RHS, <0,0,0,0> - 497614950U, // : Cost 1 vext2 RHS, LHS - 1571356836U, // : Cost 2 vext2 RHS, <0,2,0,2> - 2573880146U, // : Cost 3 vext1 <3,u,7,0>, <3,u,7,0> - 1571357010U, // : Cost 2 vext2 RHS, <0,4,1,5> - 1512083716U, // : Cost 2 vext1 <5,u,7,0>, <5,u,7,0> - 2621874741U, // : Cost 3 vext2 <0,6,u,7>, <0,6,u,7> - 2585826298U, // : Cost 3 vext1 <5,u,7,0>, <7,0,1,2> - 497615517U, // : Cost 1 vext2 RHS, LHS - 1571357430U, // : Cost 2 vext2 RHS, <1,0,3,2> - 1571357492U, // : Cost 2 vext2 RHS, <1,1,1,1> - 1571357590U, // : Cost 2 vext2 RHS, <1,2,3,0> - 1552114715U, // : Cost 2 vext2 <1,3,u,7>, <1,3,u,7> - 2573888822U, // : Cost 3 vext1 <3,u,7,1>, RHS - 1553441981U, // : Cost 2 vext2 <1,5,u,7>, <1,5,u,7> - 2627847438U, // : Cost 3 vext2 <1,6,u,7>, <1,6,u,7> - 2727408775U, // : Cost 3 vext3 <7,1,7,u>, <7,1,7,u> - 1555432880U, // : Cost 2 vext2 <1,u,u,7>, <1,u,u,7> - 2629838337U, // : Cost 3 vext2 <2,0,u,7>, <2,0,u,7> - 1188058754U, // : Cost 2 vrev <7,u,1,2> - 1571358312U, // : Cost 2 vext2 RHS, <2,2,2,2> - 1571358374U, // : Cost 2 vext2 RHS, <2,3,0,1> - 2632492869U, // : Cost 3 vext2 <2,4,u,7>, <2,4,u,7> - 2633156502U, // : Cost 3 vext2 <2,5,u,7>, <2,5,u,7> - 1560078311U, // : Cost 2 vext2 <2,6,u,7>, <2,6,u,7> - 2728072408U, // : Cost 3 vext3 <7,2,7,u>, <7,2,7,u> - 1561405577U, // : Cost 2 vext2 <2,u,u,7>, <2,u,u,7> - 1571358870U, // : Cost 2 vext2 RHS, <3,0,1,2> - 2627184913U, // : Cost 3 vext2 <1,5,u,7>, <3,1,5,u> - 2633820523U, // : Cost 3 vext2 <2,6,u,7>, <3,2,6,u> - 1571359132U, // : Cost 2 vext2 RHS, <3,3,3,3> - 1571359234U, // : Cost 2 vext2 RHS, <3,4,5,6> - 1512108295U, // : Cost 2 vext1 <5,u,7,3>, <5,u,7,3> - 1518080992U, // : Cost 2 vext1 <6,u,7,3>, <6,u,7,3> - 2640456465U, // : Cost 3 vext2 <3,7,u,7>, <3,7,u,7> - 1571359518U, // : Cost 2 vext2 RHS, <3,u,1,2> - 1571359634U, // : Cost 2 vext2 RHS, <4,0,5,1> - 2573911067U, // : Cost 3 vext1 <3,u,7,4>, <1,3,u,7> - 2645101622U, // : Cost 3 vext2 RHS, <4,2,5,3> - 2573912918U, // : Cost 3 vext1 <3,u,7,4>, <3,u,7,4> - 1571359952U, // : Cost 2 vext2 RHS, <4,4,4,4> - 497618248U, // : Cost 1 vext2 RHS, RHS - 1571360116U, // : Cost 2 vext2 RHS, <4,6,4,6> - 2645102024U, // : Cost 3 vext2 RHS, <4,7,5,0> - 497618473U, // : Cost 1 vext2 RHS, RHS - 2645102152U, // : Cost 3 vext2 RHS, <5,0,1,2> - 1571360464U, // : Cost 2 vext2 RHS, <5,1,7,3> - 2645102334U, // : Cost 3 vext2 RHS, <5,2,3,4> - 2645102447U, // : Cost 3 vext2 RHS, <5,3,7,0> - 1571360710U, // : Cost 2 vext2 RHS, <5,4,7,6> - 1571360772U, // : Cost 2 vext2 RHS, <5,5,5,5> - 1571360866U, // : Cost 2 vext2 RHS, <5,6,7,0> - 1571360936U, // : Cost 2 vext2 RHS, <5,7,5,7> - 1571361017U, // : Cost 2 vext2 RHS, <5,u,5,7> - 1530044518U, // : Cost 2 vext1 , LHS - 2645103016U, // : Cost 3 vext2 RHS, <6,1,7,2> - 1571361274U, // : Cost 2 vext2 RHS, <6,2,7,3> - 2645103154U, // : Cost 3 vext2 RHS, <6,3,4,5> - 1530047798U, // : Cost 2 vext1 , RHS - 1188386474U, // : Cost 2 vrev <7,u,5,6> - 1571361592U, // : Cost 2 vext2 RHS, <6,6,6,6> - 1571361614U, // : Cost 2 vext2 RHS, <6,7,0,1> - 1571361695U, // : Cost 2 vext2 RHS, <6,u,0,1> - 1571361786U, // : Cost 2 vext2 RHS, <7,0,1,2> - 2573935616U, // : Cost 3 vext1 <3,u,7,7>, <1,3,5,7> - 2645103781U, // : Cost 3 vext2 RHS, <7,2,2,2> - 2573937497U, // : Cost 3 vext1 <3,u,7,7>, <3,u,7,7> - 1571362150U, // : Cost 2 vext2 RHS, <7,4,5,6> - 1512141067U, // : Cost 2 vext1 <5,u,7,7>, <5,u,7,7> - 1518113764U, // : Cost 2 vext1 <6,u,7,7>, <6,u,7,7> - 363253046U, // : Cost 1 vdup3 RHS - 363253046U, // : Cost 1 vdup3 RHS - 1571362515U, // : Cost 2 vext2 RHS, - 497620782U, // : Cost 1 vext2 RHS, LHS - 1571362693U, // : Cost 2 vext2 RHS, - 1571362748U, // : Cost 2 vext2 RHS, - 1571362879U, // : Cost 2 vext2 RHS, - 497621146U, // : Cost 1 vext2 RHS, RHS - 1571363024U, // : Cost 2 vext2 RHS, - 363253046U, // : Cost 1 vdup3 RHS - 497621349U, // : Cost 1 vext2 RHS, LHS - 135053414U, // : Cost 1 vdup0 LHS - 471081121U, // : Cost 1 vext2 LHS, LHS - 1544822948U, // : Cost 2 vext2 LHS, <0,2,0,2> - 1616140005U, // : Cost 2 vext3 LHS, - 1544823122U, // : Cost 2 vext2 LHS, <0,4,1,5> - 1512157453U, // : Cost 2 vext1 <5,u,u,0>, <5,u,u,0> - 1662220032U, // : Cost 2 vext3 RHS, - 1194457487U, // : Cost 2 vrev - 471081629U, // : Cost 1 vext2 LHS, LHS - 1544823542U, // : Cost 2 vext2 LHS, <1,0,3,2> - 202162278U, // : Cost 1 vdup1 LHS - 537753390U, // : Cost 1 vext3 LHS, LHS - 1544823768U, // : Cost 2 vext2 LHS, <1,3,1,3> - 1494248758U, // : Cost 2 vext1 <2,u,u,1>, RHS - 1544823952U, // : Cost 2 vext2 LHS, <1,5,3,7> - 1518138343U, // : Cost 2 vext1 <6,u,u,1>, <6,u,u,1> - 1640322907U, // : Cost 2 vext3 RHS, - 537753444U, // : Cost 1 vext3 LHS, LHS - 1482309734U, // : Cost 2 vext1 <0,u,u,2>, LHS - 1194031451U, // : Cost 2 vrev - 269271142U, // : Cost 1 vdup2 LHS - 835584U, // : Cost 0 copy LHS - 1482313014U, // : Cost 2 vext1 <0,u,u,2>, RHS - 2618566504U, // : Cost 3 vext2 LHS, <2,5,3,6> - 1544824762U, // : Cost 2 vext2 LHS, <2,6,3,7> - 1638479788U, // : Cost 2 vext3 RHS, - 835584U, // : Cost 0 copy LHS - 408576723U, // : Cost 1 vext1 LHS, LHS - 1482318582U, // : Cost 2 vext1 LHS, <1,0,3,2> - 120371557U, // : Cost 1 vrev LHS - 336380006U, // : Cost 1 vdup3 LHS - 408579382U, // : Cost 1 vext1 LHS, RHS - 1616140271U, // : Cost 2 vext3 LHS, - 1530098170U, // : Cost 2 vext1 LHS, <6,2,7,3> - 1880329544U, // : Cost 2 vzipr LHS, RHS - 408581934U, // : Cost 1 vext1 LHS, LHS - 1488298086U, // : Cost 2 vext1 <1,u,u,4>, LHS - 1488299437U, // : Cost 2 vext1 <1,u,u,4>, <1,u,u,4> - 1659271204U, // : Cost 2 vext3 LHS, - 1194195311U, // : Cost 2 vrev - 161926454U, // : Cost 1 vdup0 RHS - 471084342U, // : Cost 1 vext2 LHS, RHS - 1571368308U, // : Cost 2 vext2 RHS, <4,6,4,6> - 1640323153U, // : Cost 2 vext3 RHS, - 471084585U, // : Cost 1 vext2 LHS, RHS - 1494278246U, // : Cost 2 vext1 <2,u,u,5>, LHS - 1571368656U, // : Cost 2 vext2 RHS, <5,1,7,3> - 1494280327U, // : Cost 2 vext1 <2,u,u,5>, <2,u,u,5> - 1616140415U, // : Cost 2 vext3 LHS, - 1494281526U, // : Cost 2 vext1 <2,u,u,5>, RHS - 229035318U, // : Cost 1 vdup1 RHS - 537753754U, // : Cost 1 vext3 LHS, RHS - 1750355254U, // : Cost 2 vuzpr LHS, RHS - 537753772U, // : Cost 1 vext3 LHS, RHS - 1482342502U, // : Cost 2 vext1 <0,u,u,6>, LHS - 2556084982U, // : Cost 3 vext1 <0,u,u,6>, <1,0,3,2> - 1571369466U, // : Cost 2 vext2 RHS, <6,2,7,3> - 1611938000U, // : Cost 2 vext3 LHS, - 1482345782U, // : Cost 2 vext1 <0,u,u,6>, RHS - 1194359171U, // : Cost 2 vrev - 296144182U, // : Cost 1 vdup2 RHS - 27705344U, // : Cost 0 copy RHS - 27705344U, // : Cost 0 copy RHS - 432496742U, // : Cost 1 vext1 RHS, LHS - 1488324016U, // : Cost 2 vext1 <1,u,u,7>, <1,u,u,7> - 1494296713U, // : Cost 2 vext1 <2,u,u,7>, <2,u,u,7> - 1906901148U, // : Cost 2 vzipr RHS, LHS - 432500283U, // : Cost 1 vext1 RHS, RHS - 1506242256U, // : Cost 2 vext1 RHS, <5,1,7,3> - 120699277U, // : Cost 1 vrev RHS - 363253046U, // : Cost 1 vdup3 RHS - 432502574U, // : Cost 1 vext1 RHS, LHS - 408617688U, // : Cost 1 vext1 LHS, LHS - 471086894U, // : Cost 1 vext2 LHS, LHS - 537753957U, // : Cost 1 vext3 LHS, LHS - 835584U, // : Cost 0 copy LHS - 408620342U, // : Cost 1 vext1 LHS, RHS - 471087258U, // : Cost 1 vext2 LHS, RHS - 537753997U, // : Cost 1 vext3 LHS, RHS - 27705344U, // : Cost 0 copy RHS - 835584U, // : Cost 0 copy LHS - 0 -}; +static const unsigned PerfectShuffleTable[6561 + 1] = { + 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS + 2080972802U, // <0,0,0,1>: Cost 2 ins <0,0,u,1>, lane 2 + 1679065190U, // <0,0,0,2>: Cost 2 vuzpl <0,2,0,2>, LHS + 2085707777U, // <0,0,0,3>: Cost 2 ins <0,u,0,3>, lane 1 + 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS + 2080440323U, // <0,0,0,5>: Cost 2 ins <0,0,0,u>, lane 3 + 2080440323U, // <0,0,0,6>: Cost 2 ins <0,0,0,u>, lane 3 + 2080440323U, // <0,0,0,7>: Cost 2 ins <0,0,0,u>, lane 3 + 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS + 1812774912U, // <0,0,1,0>: Cost 2 vzipl LHS, <0,0,0,0> + 739033190U, // <0,0,1,1>: Cost 1 vzipl LHS, LHS + 1812775076U, // <0,0,1,2>: Cost 2 vzipl LHS, <0,2,0,2> + 2080514051U, // <0,0,1,3>: Cost 2 ins <0,0,1,u>, lane 3 + 1812816210U, // <0,0,1,4>: Cost 2 vzipl LHS, <0,4,1,5> + 2085797889U, // <0,0,1,5>: Cost 2 ins <0,u,1,5>, lane 1 + 2080514051U, // <0,0,1,6>: Cost 2 ins <0,0,1,u>, lane 3 + 2080514051U, // <0,0,1,7>: Cost 2 ins <0,0,1,u>, lane 3 + 739033757U, // <0,0,1,u>: Cost 1 vzipl LHS, LHS + 1946992640U, // <0,0,2,0>: Cost 2 vtrnl LHS, <0,0,0,0> + 1946992650U, // <0,0,2,1>: Cost 2 vtrnl LHS, <0,0,1,1> + 873250918U, // <0,0,2,2>: Cost 1 vtrnl LHS, LHS + 1012113409U, // <0,0,2,3>: Cost 1 ins LHS, lane 1 + 1946992844U, // <0,0,2,4>: Cost 2 vtrnl LHS, <0,2,4,6> + 2080587779U, // <0,0,2,5>: Cost 2 ins <0,0,2,u>, lane 3 + 2085879809U, // <0,0,2,6>: Cost 2 ins <0,u,2,6>, lane 1 + 2080587779U, // <0,0,2,7>: Cost 2 ins <0,0,2,u>, lane 3 + 873250972U, // <0,0,2,u>: Cost 1 vtrnl LHS, LHS + 2080964610U, // <0,0,3,0>: Cost 2 ins <0,0,u,0>, lane 2 + 2080972802U, // <0,0,3,1>: Cost 2 ins <0,0,u,1>, lane 2 + 2128388096U, // <0,0,3,2>: Cost 2 ins , lane 0 + 2013437973U, // <0,0,3,3>: Cost 2 vtrnr <0,0,2,3>, <0,0,2,3> + 3154739202U, // <0,0,3,4>: Cost 3 ins <0,0,u,4>, lane 2 + 2752809474U, // <0,0,3,5>: Cost 3 vuzpl <0,2,0,2>, <3,4,5,6> + 3154755586U, // <0,0,3,6>: Cost 3 ins <0,0,u,6>, lane 2 + 2818573312U, // <0,0,3,7>: Cost 3 vuzpr <0,0,0,0>, <1,3,5,7> + 2080972802U, // <0,0,3,u>: Cost 2 ins <0,0,u,1>, lane 2 + 2080964610U, // <0,0,4,0>: Cost 2 ins <0,0,u,0>, lane 2 + 1814708326U, // <0,0,4,1>: Cost 2 vzipl <0,4,1,5>, LHS + 1947828326U, // <0,0,4,2>: Cost 2 vtrnl <0,2,4,6>, LHS + 2086002689U, // <0,0,4,3>: Cost 2 ins <0,u,4,3>, lane 1 + 1947828428U, // <0,0,4,4>: Cost 2 vtrnl <0,2,4,6>, <0,2,4,6> + 2081030149U, // <0,0,4,5>: Cost 2 ins <0,0,u,u>, lane 5 + 1679068470U, // <0,0,4,6>: Cost 2 vuzpl <0,2,0,2>, RHS + 3154477059U, // <0,0,4,7>: Cost 3 ins <0,0,4,u>, lane 3 + 1679068488U, // <0,0,4,u>: Cost 2 vuzpl <0,2,0,2>, RHS + 2080964610U, // <0,0,5,0>: Cost 2 ins <0,0,u,0>, lane 2 + 2128527360U, // <0,0,5,1>: Cost 2 ins , lane 0 + 2080980994U, // <0,0,5,2>: Cost 2 ins <0,0,u,2>, lane 2 + 2086076417U, // <0,0,5,3>: Cost 2 ins <0,u,5,3>, lane 1 + 3202293760U, // <0,0,5,4>: Cost 3 ins , lane 0 + 1947213953U, // <0,0,5,5>: Cost 2 vtrnl <0,1,5,3>, <0,1,5,3> + 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7> + 1744833846U, // <0,0,5,7>: Cost 2 vuzpr <0,0,0,0>, RHS + 2128527360U, // <0,0,5,u>: Cost 2 ins , lane 0 + 2080964610U, // <0,0,6,0>: Cost 2 ins <0,0,u,0>, lane 2 + 2080972802U, // <0,0,6,1>: Cost 2 ins <0,0,u,1>, lane 2 + 2128609280U, // <0,0,6,2>: Cost 2 ins , lane 0 + 2086150145U, // <0,0,6,3>: Cost 2 ins <0,u,6,3>, lane 1 + 3202367488U, // <0,0,6,4>: Cost 3 ins , lane 0 + 2617250536U, // <0,0,6,5>: Cost 3 vext2 <0,0,0,0>, <6,5,6,7> + 1947287690U, // <0,0,6,6>: Cost 2 vtrnl <0,1,6,3>, <0,1,6,3> + 2081030149U, // <0,0,6,7>: Cost 2 ins <0,0,u,u>, lane 5 + 2080972802U, // <0,0,6,u>: Cost 2 ins <0,0,u,1>, lane 2 + 2080964610U, // <0,0,7,0>: Cost 2 ins <0,0,u,0>, lane 2 + 2080972802U, // <0,0,7,1>: Cost 2 ins <0,0,u,1>, lane 2 + 2080980994U, // <0,0,7,2>: Cost 2 ins <0,0,u,2>, lane 2 + 2086223873U, // <0,0,7,3>: Cost 2 ins <0,u,7,3>, lane 1 + 3154739202U, // <0,0,7,4>: Cost 3 ins <0,0,u,4>, lane 2 + 2617251265U, // <0,0,7,5>: Cost 3 vext2 <0,0,0,0>, <7,5,6,7> + 3154755586U, // <0,0,7,6>: Cost 3 ins <0,0,u,6>, lane 2 + 1947361427U, // <0,0,7,7>: Cost 2 vtrnl <0,1,7,3>, <0,1,7,3> + 2080972802U, // <0,0,7,u>: Cost 2 ins <0,0,u,1>, lane 2 + 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS + 743678054U, // <0,0,u,1>: Cost 1 vzipl LHS, LHS + 873693286U, // <0,0,u,2>: Cost 1 vtrnl LHS, LHS + 1012113409U, // <0,0,u,3>: Cost 1 ins LHS, lane 1 + 1947435212U, // <0,0,u,4>: Cost 2 vtrnl LHS, <0,2,4,6> + 2085797889U, // <0,0,u,5>: Cost 2 ins <0,u,1,5>, lane 1 + 1679071386U, // <0,0,u,6>: Cost 2 vuzpl <0,2,0,2>, RHS + 2080514051U, // <0,0,u,7>: Cost 2 ins <0,0,1,u>, lane 3 + 873693340U, // <0,0,u,u>: Cost 1 vtrnl LHS, LHS + 2085683201U, // <0,1,0,0>: Cost 2 ins <0,u,0,0>, lane 1 + 1007951877U, // <0,1,0,1>: Cost 1 ins LHS, lane 5 + 1680490598U, // <0,1,0,2>: Cost 2 vuzpl <0,4,1,5>, LHS + 1007910914U, // <0,1,0,3>: Cost 1 ins LHS, lane 2 + 2081660930U, // <0,1,0,4>: Cost 2 ins <0,1,u,4>, lane 2 + 2081669122U, // <0,1,0,5>: Cost 2 ins <0,1,u,5>, lane 2 + 2081677314U, // <0,1,0,6>: Cost 2 ins <0,1,u,6>, lane 2 + 2081685506U, // <0,1,0,7>: Cost 2 ins <0,1,u,7>, lane 2 + 1007951877U, // <0,1,0,u>: Cost 1 ins LHS, lane 5 + 1812775670U, // <0,1,1,0>: Cost 2 vzipl LHS, <1,0,3,2> + 1812775732U, // <0,1,1,1>: Cost 2 vzipl LHS, <1,1,1,1> + 1812775830U, // <0,1,1,2>: Cost 2 vzipl LHS, <1,2,3,0> + 1007910914U, // <0,1,1,3>: Cost 1 ins LHS, lane 2 + 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS + 1812817040U, // <0,1,1,5>: Cost 2 vzipl LHS, <1,5,3,7> + 2081677314U, // <0,1,1,6>: Cost 2 ins <0,1,u,6>, lane 2 + 2081685506U, // <0,1,1,7>: Cost 2 ins <0,1,u,7>, lane 2 + 1007910914U, // <0,1,1,u>: Cost 1 ins LHS, lane 2 + 1007509507U, // <0,1,2,0>: Cost 1 ins LHS, lane 3 + 1007509507U, // <0,1,2,1>: Cost 1 ins LHS, lane 3 + 1007509507U, // <0,1,2,2>: Cost 1 ins LHS, lane 3 + 835584U, // <0,1,2,3>: Cost 0 copy LHS + 1007509507U, // <0,1,2,4>: Cost 1 ins LHS, lane 3 + 1007509507U, // <0,1,2,5>: Cost 1 ins LHS, lane 3 + 1007509507U, // <0,1,2,6>: Cost 1 ins LHS, lane 3 + 1007509507U, // <0,1,2,7>: Cost 1 ins LHS, lane 3 + 835584U, // <0,1,2,u>: Cost 0 copy LHS + 2133680132U, // <0,1,3,0>: Cost 2 ins , lane 4 + 2081636354U, // <0,1,3,1>: Cost 2 ins <0,1,u,1>, lane 2 + 2133696516U, // <0,1,3,2>: Cost 2 ins , lane 4 + 1007910914U, // <0,1,3,3>: Cost 1 ins LHS, lane 2 + 2133712900U, // <0,1,3,4>: Cost 2 ins , lane 4 + 2081669122U, // <0,1,3,5>: Cost 2 ins <0,1,u,5>, lane 2 + 2081677314U, // <0,1,3,6>: Cost 2 ins <0,1,u,6>, lane 2 + 2133737476U, // <0,1,3,7>: Cost 2 ins , lane 4 + 1007910914U, // <0,1,3,u>: Cost 1 ins LHS, lane 2 + 2081628162U, // <0,1,4,0>: Cost 2 ins <0,1,u,0>, lane 2 + 2081636354U, // <0,1,4,1>: Cost 2 ins <0,1,u,1>, lane 2 + 2081644546U, // <0,1,4,2>: Cost 2 ins <0,1,u,2>, lane 2 + 1007910914U, // <0,1,4,3>: Cost 1 ins LHS, lane 2 + 2081660930U, // <0,1,4,4>: Cost 2 ins <0,1,u,4>, lane 2 + 1007951877U, // <0,1,4,5>: Cost 1 ins LHS, lane 5 + 1680493878U, // <0,1,4,6>: Cost 2 vuzpl <0,4,1,5>, RHS + 2081685506U, // <0,1,4,7>: Cost 2 ins <0,1,u,7>, lane 2 + 1007910914U, // <0,1,4,u>: Cost 1 ins LHS, lane 2 + 2081628162U, // <0,1,5,0>: Cost 2 ins <0,1,u,0>, lane 2 + 2133835780U, // <0,1,5,1>: Cost 2 ins , lane 4 + 2081644546U, // <0,1,5,2>: Cost 2 ins <0,1,u,2>, lane 2 + 1007910914U, // <0,1,5,3>: Cost 1 ins LHS, lane 2 + 2081660930U, // <0,1,5,4>: Cost 2 ins <0,1,u,4>, lane 2 + 2133868548U, // <0,1,5,5>: Cost 2 ins , lane 4 + 2133876740U, // <0,1,5,6>: Cost 2 ins , lane 4 + 2133884932U, // <0,1,5,7>: Cost 2 ins , lane 4 + 1007910914U, // <0,1,5,u>: Cost 1 ins LHS, lane 2 + 2081628162U, // <0,1,6,0>: Cost 2 ins <0,1,u,0>, lane 2 + 2081636354U, // <0,1,6,1>: Cost 2 ins <0,1,u,1>, lane 2 + 2133917700U, // <0,1,6,2>: Cost 2 ins , lane 4 + 1007910914U, // <0,1,6,3>: Cost 1 ins LHS, lane 2 + 2081660930U, // <0,1,6,4>: Cost 2 ins <0,1,u,4>, lane 2 + 2081669122U, // <0,1,6,5>: Cost 2 ins <0,1,u,5>, lane 2 + 2133950468U, // <0,1,6,6>: Cost 2 ins , lane 4 + 1060216836U, // <0,1,6,7>: Cost 1 ins RHS, lane 4 + 1007910914U, // <0,1,6,u>: Cost 1 ins LHS, lane 2 + 2133975044U, // <0,1,7,0>: Cost 2 ins , lane 4 + 2081636354U, // <0,1,7,1>: Cost 2 ins <0,1,u,1>, lane 2 + 2081644546U, // <0,1,7,2>: Cost 2 ins <0,1,u,2>, lane 2 + 1007910914U, // <0,1,7,3>: Cost 1 ins LHS, lane 2 + 2134007812U, // <0,1,7,4>: Cost 2 ins , lane 4 + 2081669122U, // <0,1,7,5>: Cost 2 ins <0,1,u,5>, lane 2 + 2134024196U, // <0,1,7,6>: Cost 2 ins , lane 4 + 2134032388U, // <0,1,7,7>: Cost 2 ins , lane 4 + 1007910914U, // <0,1,7,u>: Cost 1 ins LHS, lane 2 + 1007509507U, // <0,1,u,0>: Cost 1 ins LHS, lane 3 + 1007951877U, // <0,1,u,1>: Cost 1 ins LHS, lane 5 + 1007509507U, // <0,1,u,2>: Cost 1 ins LHS, lane 3 + 835584U, // <0,1,u,3>: Cost 0 copy LHS + 1007509507U, // <0,1,u,4>: Cost 1 ins LHS, lane 3 + 1007509507U, // <0,1,u,5>: Cost 1 ins LHS, lane 3 + 1007509507U, // <0,1,u,6>: Cost 1 ins LHS, lane 3 + 1007509507U, // <0,1,u,7>: Cost 1 ins LHS, lane 3 + 835584U, // <0,1,u,u>: Cost 0 copy LHS + 1678557184U, // <0,2,0,0>: Cost 2 vuzpl LHS, <0,0,0,0> + 1678598154U, // <0,2,0,1>: Cost 2 vuzpl LHS, <0,0,1,1> + 604815462U, // <0,2,0,2>: Cost 1 vuzpl LHS, LHS + 2081767427U, // <0,2,0,3>: Cost 2 ins <0,2,0,u>, lane 3 + 1678598348U, // <0,2,0,4>: Cost 2 vuzpl LHS, <0,2,4,6> + 2081767427U, // <0,2,0,5>: Cost 2 ins <0,2,0,u>, lane 3 + 2082340866U, // <0,2,0,6>: Cost 2 ins <0,2,u,6>, lane 2 + 2081767427U, // <0,2,0,7>: Cost 2 ins <0,2,0,u>, lane 3 + 604815516U, // <0,2,0,u>: Cost 1 vuzpl LHS, LHS + 2752340940U, // <0,2,1,0>: Cost 3 vuzpl LHS, <1,3,0,0> + 1678558004U, // <0,2,1,1>: Cost 2 vuzpl LHS, <1,1,1,1> + 1812776552U, // <0,2,1,2>: Cost 2 vzipl LHS, <2,2,2,2> + 1678557942U, // <0,2,1,3>: Cost 2 vuzpl LHS, <1,0,3,2> + 2752340982U, // <0,2,1,4>: Cost 3 vuzpl LHS, <1,3,4,6> + 1678599168U, // <0,2,1,5>: Cost 2 vuzpl LHS, <1,3,5,7> + 1812817850U, // <0,2,1,6>: Cost 2 vzipl LHS, <2,6,3,7> + 2860466282U, // <0,2,1,7>: Cost 3 vuzpr <7,0,1,2>, <0,1,2,7> + 1678598947U, // <0,2,1,u>: Cost 2 vuzpl LHS, <1,0,u,2> + 1678558886U, // <0,2,2,0>: Cost 2 vuzpl LHS, <2,3,0,1> + 2085838849U, // <0,2,2,1>: Cost 2 ins <0,u,2,1>, lane 1 + 1678558824U, // <0,2,2,2>: Cost 2 vuzpl LHS, <2,2,2,2> + 1012113409U, // <0,2,2,3>: Cost 1 ins LHS, lane 1 + 1678558926U, // <0,2,2,4>: Cost 2 vuzpl LHS, <2,3,4,5> + 2085871617U, // <0,2,2,5>: Cost 2 ins <0,u,2,5>, lane 1 + 2085879809U, // <0,2,2,6>: Cost 2 ins <0,u,2,6>, lane 1 + 2085888001U, // <0,2,2,7>: Cost 2 ins <0,u,2,7>, lane 1 + 1012113409U, // <0,2,2,u>: Cost 1 ins LHS, lane 1 + 2129698816U, // <0,2,3,0>: Cost 2 ins , lane 0 + 1678559382U, // <0,2,3,1>: Cost 2 vuzpl LHS, <3,0,1,2> + 2082308098U, // <0,2,3,2>: Cost 2 ins <0,2,u,2>, lane 2 + 1678559644U, // <0,2,3,3>: Cost 2 vuzpl LHS, <3,3,3,3> + 2129731584U, // <0,2,3,4>: Cost 2 ins , lane 0 + 1678559746U, // <0,2,3,5>: Cost 2 vuzpl LHS, <3,4,5,6> + 2082340866U, // <0,2,3,6>: Cost 2 ins <0,2,u,6>, lane 2 + 2824782848U, // <0,2,3,7>: Cost 3 vuzpr <1,0,3,2>, <1,3,5,7> + 1678559445U, // <0,2,3,u>: Cost 2 vuzpl LHS, <3,0,u,2> + 2082062339U, // <0,2,4,0>: Cost 2 ins <0,2,4,u>, lane 3 + 2082062339U, // <0,2,4,1>: Cost 2 ins <0,2,4,u>, lane 3 + 2082308098U, // <0,2,4,2>: Cost 2 ins <0,2,u,2>, lane 2 + 2082062339U, // <0,2,4,3>: Cost 2 ins <0,2,4,u>, lane 3 + 2082062339U, // <0,2,4,4>: Cost 2 ins <0,2,4,u>, lane 3 + 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS + 604818742U, // <0,2,4,6>: Cost 1 vuzpl LHS, RHS + 2082062339U, // <0,2,4,7>: Cost 2 ins <0,2,4,u>, lane 3 + 604818760U, // <0,2,4,u>: Cost 1 vuzpl LHS, RHS + 3105260438U, // <0,2,5,0>: Cost 3 vtrnr <3,0,4,5>, <1,2,3,0> + 1678561408U, // <0,2,5,1>: Cost 2 vuzpl LHS, <5,7,1,3> + 2082308098U, // <0,2,5,2>: Cost 2 ins <0,2,u,2>, lane 2 + 2086076417U, // <0,2,5,3>: Cost 2 ins <0,u,5,3>, lane 1 + 2756947554U, // <0,2,5,4>: Cost 3 vuzpl LHS, <5,0,4,1> + 1678561284U, // <0,2,5,5>: Cost 2 vuzpl LHS, <5,5,5,5> + 2082340866U, // <0,2,5,6>: Cost 2 ins <0,2,u,6>, lane 2 + 1751043382U, // <0,2,5,7>: Cost 2 vuzpr <1,0,3,2>, RHS + 1751043383U, // <0,2,5,u>: Cost 2 vuzpr <1,0,3,2>, RHS + 1678562126U, // <0,2,6,0>: Cost 2 vuzpl LHS, <6,7,0,1> + 2756948257U, // <0,2,6,1>: Cost 3 vuzpl LHS, <6,0,1,2> + 2082308098U, // <0,2,6,2>: Cost 2 ins <0,2,u,2>, lane 2 + 2086150145U, // <0,2,6,3>: Cost 2 ins <0,u,6,3>, lane 1 + 1678562166U, // <0,2,6,4>: Cost 2 vuzpl LHS, <6,7,4,5> + 2756948621U, // <0,2,6,5>: Cost 3 vuzpl LHS, <6,4,5,6> + 2082340866U, // <0,2,6,6>: Cost 2 ins <0,2,u,6>, lane 2 + 2082357253U, // <0,2,6,7>: Cost 2 ins <0,2,u,u>, lane 5 + 2082308098U, // <0,2,6,u>: Cost 2 ins <0,2,u,2>, lane 2 + 3099378582U, // <0,2,7,0>: Cost 3 vtrnr <2,0,5,7>, <1,2,3,0> + 1678562298U, // <0,2,7,1>: Cost 2 vuzpl LHS, <7,0,1,2> + 2082308098U, // <0,2,7,2>: Cost 2 ins <0,2,u,2>, lane 2 + 2130018304U, // <0,2,7,3>: Cost 2 ins , lane 0 + 2645136742U, // <0,2,7,4>: Cost 3 vext2 <4,6,0,2>, <7,4,5,6> + 1678562662U, // <0,2,7,5>: Cost 2 vuzpl LHS, <7,4,5,6> + 2082340866U, // <0,2,7,6>: Cost 2 ins <0,2,u,6>, lane 2 + 1678562924U, // <0,2,7,7>: Cost 2 vuzpl LHS, <7,7,7,7> + 2082308098U, // <0,2,7,u>: Cost 2 ins <0,2,u,2>, lane 2 + 1947436710U, // <0,2,u,0>: Cost 2 vtrnl LHS, <2,3,0,1> + 1678603987U, // <0,2,u,1>: Cost 2 vuzpl LHS, + 604821294U, // <0,2,u,2>: Cost 1 vuzpl LHS, LHS + 1012113409U, // <0,2,u,3>: Cost 1 ins LHS, lane 1 + 1947436750U, // <0,2,u,4>: Cost 2 vtrnl LHS, <2,3,4,5> + 1678604351U, // <0,2,u,5>: Cost 2 vuzpl LHS, + 604821658U, // <0,2,u,6>: Cost 1 vuzpl LHS, RHS + 1751043625U, // <0,2,u,7>: Cost 2 vuzpr <1,0,3,2>, RHS + 604821348U, // <0,2,u,u>: Cost 1 vuzpl LHS, LHS + 2085683201U, // <0,3,0,0>: Cost 2 ins <0,u,0,0>, lane 1 + 2130149376U, // <0,3,0,1>: Cost 2 ins , lane 0 + 2085699585U, // <0,3,0,2>: Cost 2 ins <0,u,0,2>, lane 1 + 1745002517U, // <0,3,0,3>: Cost 2 vuzpr <0,0,2,3>, <0,0,2,3> + 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS + 3021244930U, // <0,3,0,5>: Cost 3 vtrnl <0,2,0,2>, <3,4,5,6> + 3159474177U, // <0,3,0,6>: Cost 3 ins <0,u,0,6>, lane 1 + 2952791184U, // <0,3,0,7>: Cost 3 vzipr <0,0,0,0>, <1,5,3,7> + 2130149376U, // <0,3,0,u>: Cost 2 ins , lane 0 + 1812777110U, // <0,3,1,0>: Cost 2 vzipl LHS, <3,0,1,2> + 2085765121U, // <0,3,1,1>: Cost 2 ins <0,u,1,1>, lane 1 + 2886519105U, // <0,3,1,2>: Cost 3 vzipl LHS, <3,2,2,2> + 1812777372U, // <0,3,1,3>: Cost 2 vzipl LHS, <3,3,3,3> + 1812777474U, // <0,3,1,4>: Cost 2 vzipl LHS, <3,4,5,6> + 2085797889U, // <0,3,1,5>: Cost 2 ins <0,u,1,5>, lane 1 + 3159547905U, // <0,3,1,6>: Cost 3 ins <0,u,1,6>, lane 1 + 2966733968U, // <0,3,1,7>: Cost 3 vzipr <2,3,0,1>, <1,5,3,7> + 1812777758U, // <0,3,1,u>: Cost 2 vzipl LHS, <3,u,1,2> + 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS + 1946994838U, // <0,3,2,1>: Cost 2 vtrnl LHS, <3,0,1,2> + 2085847041U, // <0,3,2,2>: Cost 2 ins <0,u,2,2>, lane 1 + 1012113409U, // <0,3,2,3>: Cost 1 ins LHS, lane 1 + 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS + 1946995202U, // <0,3,2,5>: Cost 2 vtrnl LHS, <3,4,5,6> + 2085879809U, // <0,3,2,6>: Cost 2 ins <0,u,2,6>, lane 1 + 2085888001U, // <0,3,2,7>: Cost 2 ins <0,u,2,7>, lane 1 + 1012113409U, // <0,3,2,u>: Cost 1 ins LHS, lane 1 + 2887747734U, // <0,3,3,0>: Cost 3 vzipl <0,3,1,0>, <3,0,1,2> + 2753022102U, // <0,3,3,1>: Cost 3 vuzpl <0,2,3,1>, <3,0,1,2> + 2965422838U, // <0,3,3,2>: Cost 3 vzipr <2,1,0,3>, <1,0,3,2> + 2130386944U, // <0,3,3,3>: Cost 2 ins , lane 0 + 2887780866U, // <0,3,3,4>: Cost 3 vzipl <0,3,1,4>, <3,4,5,6> + 2753055234U, // <0,3,3,5>: Cost 3 vuzpl <0,2,3,5>, <3,4,5,6> + 2752375389U, // <0,3,3,6>: Cost 3 vuzpl <0,1,3,3>, <3,5,6,7> + 3204161536U, // <0,3,3,7>: Cost 3 ins , lane 0 + 2130386944U, // <0,3,3,u>: Cost 2 ins , lane 0 + 2888452246U, // <0,3,4,0>: Cost 3 vzipl <0,4,1,5>, <3,0,1,2> + 3021572246U, // <0,3,4,1>: Cost 3 vtrnl <0,2,4,6>, <3,0,1,2> + 3021572257U, // <0,3,4,2>: Cost 3 vtrnl <0,2,4,6>, <3,0,2,4> + 2086002689U, // <0,3,4,3>: Cost 2 ins <0,u,4,3>, lane 1 + 2888452610U, // <0,3,4,4>: Cost 3 vzipl <0,4,1,5>, <3,4,5,6> + 2130477056U, // <0,3,4,5>: Cost 2 ins , lane 0 + 2086027265U, // <0,3,4,6>: Cost 2 ins <0,u,4,6>, lane 1 + 2818747621U, // <0,3,4,7>: Cost 3 vuzpr <0,0,2,3>, <4,4,6,7> + 2130477056U, // <0,3,4,u>: Cost 2 ins , lane 0 + 3204251648U, // <0,3,5,0>: Cost 3 ins , lane 0 + 3204259840U, // <0,3,5,1>: Cost 3 ins , lane 0 + 2961457910U, // <0,3,5,2>: Cost 3 vzipr <1,4,0,5>, <1,0,3,2> + 2086076417U, // <0,3,5,3>: Cost 2 ins <0,u,5,3>, lane 1 + 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5> + 3204292608U, // <0,3,5,5>: Cost 3 ins , lane 0 + 2653769826U, // <0,3,5,6>: Cost 3 vext2 <6,1,0,3>, <5,6,7,0> + 2130567168U, // <0,3,5,7>: Cost 2 ins , lane 0 + 2130567168U, // <0,3,5,u>: Cost 2 ins , lane 0 + 2854506594U, // <0,3,6,0>: Cost 3 vuzpr <6,0,1,3>, <5,6,7,0> + 2653770090U, // <0,3,6,1>: Cost 3 vext2 <6,1,0,3>, <6,1,0,3> + 3204341760U, // <0,3,6,2>: Cost 3 ins , lane 0 + 2086150145U, // <0,3,6,3>: Cost 2 ins <0,u,6,3>, lane 1 + 3204358144U, // <0,3,6,4>: Cost 3 ins , lane 0 + 3204366336U, // <0,3,6,5>: Cost 3 ins , lane 0 + 3204374528U, // <0,3,6,6>: Cost 3 ins , lane 0 + 2130640896U, // <0,3,6,7>: Cost 2 ins , lane 0 + 2086150145U, // <0,3,6,u>: Cost 2 ins <0,u,6,3>, lane 1 + 2968109974U, // <0,3,7,0>: Cost 3 vzipr <2,5,0,7>, <1,2,3,0> + 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3> + 2660406420U, // <0,3,7,2>: Cost 3 vext2 <7,2,0,3>, <7,2,0,3> + 2086223873U, // <0,3,7,3>: Cost 2 ins <0,u,7,3>, lane 1 + 3204431872U, // <0,3,7,4>: Cost 3 ins , lane 0 + 3204440064U, // <0,3,7,5>: Cost 3 ins , lane 0 + 2752378305U, // <0,3,7,6>: Cost 3 vuzpl <0,1,3,3>, <7,5,6,7> + 3204456448U, // <0,3,7,7>: Cost 3 ins , lane 0 + 2086223873U, // <0,3,7,u>: Cost 2 ins <0,u,7,3>, lane 1 + 1817421974U, // <0,3,u,0>: Cost 2 vzipl LHS, <3,0,1,2> + 1947437206U, // <0,3,u,1>: Cost 2 vtrnl LHS, <3,0,1,2> + 2085699585U, // <0,3,u,2>: Cost 2 ins <0,u,0,2>, lane 1 + 1012113409U, // <0,3,u,3>: Cost 1 ins LHS, lane 1 + 1817422338U, // <0,3,u,4>: Cost 2 vzipl LHS, <3,4,5,6> + 1947437570U, // <0,3,u,5>: Cost 2 vtrnl LHS, <3,4,5,6> + 2085879809U, // <0,3,u,6>: Cost 2 ins <0,u,2,6>, lane 1 + 2130567168U, // <0,3,u,7>: Cost 2 ins , lane 0 + 1012113409U, // <0,3,u,u>: Cost 1 ins LHS, lane 1 + 2085683201U, // <0,4,0,0>: Cost 2 ins <0,u,0,0>, lane 1 + 2083684357U, // <0,4,0,1>: Cost 2 ins <0,4,u,u>, lane 5 + 1679392870U, // <0,4,0,2>: Cost 2 vuzpl <0,2,4,6>, LHS + 2085707777U, // <0,4,0,3>: Cost 2 ins <0,u,0,3>, lane 1 + 1679392972U, // <0,4,0,4>: Cost 2 vuzpl <0,2,4,6>, <0,2,4,6> + 2083659778U, // <0,4,0,5>: Cost 2 ins <0,4,u,5>, lane 2 + 1947503926U, // <0,4,0,6>: Cost 2 vtrnl <0,2,0,2>, RHS + 3156836355U, // <0,4,0,7>: Cost 3 ins <0,4,0,u>, lane 3 + 1947503944U, // <0,4,0,u>: Cost 2 vtrnl <0,2,0,2>, RHS + 2083168259U, // <0,4,1,0>: Cost 2 ins <0,4,1,u>, lane 3 + 2085765121U, // <0,4,1,1>: Cost 2 ins <0,u,1,1>, lane 1 + 2083168259U, // <0,4,1,2>: Cost 2 ins <0,4,1,u>, lane 3 + 2083168259U, // <0,4,1,3>: Cost 2 ins <0,4,1,u>, lane 3 + 2083168259U, // <0,4,1,4>: Cost 2 ins <0,4,1,u>, lane 3 + 739036470U, // <0,4,1,5>: Cost 1 vzipl LHS, RHS + 1948929334U, // <0,4,1,6>: Cost 2 vtrnl <0,4,1,5>, RHS + 2083168259U, // <0,4,1,7>: Cost 2 ins <0,4,1,u>, lane 3 + 739036713U, // <0,4,1,u>: Cost 1 vzipl LHS, RHS + 2083241987U, // <0,4,2,0>: Cost 2 ins <0,4,2,u>, lane 3 + 2083241987U, // <0,4,2,1>: Cost 2 ins <0,4,2,u>, lane 3 + 2085847041U, // <0,4,2,2>: Cost 2 ins <0,u,2,2>, lane 1 + 1012113409U, // <0,4,2,3>: Cost 1 ins LHS, lane 1 + 2083241987U, // <0,4,2,4>: Cost 2 ins <0,4,2,u>, lane 3 + 1813286198U, // <0,4,2,5>: Cost 2 vzipl <0,2,0,2>, RHS + 873254198U, // <0,4,2,6>: Cost 1 vtrnl LHS, RHS + 2083241987U, // <0,4,2,7>: Cost 2 ins <0,4,2,u>, lane 3 + 873254216U, // <0,4,2,u>: Cost 1 vtrnl LHS, RHS + 3020811514U, // <0,4,3,0>: Cost 3 vtrnl <0,1,3,3>, <4,5,0,1> + 2753136790U, // <0,4,3,1>: Cost 3 vuzpl <0,2,4,6>, <3,0,1,2> + 2753136801U, // <0,4,3,2>: Cost 3 vuzpl <0,2,4,6>, <3,0,2,4> + 2085928961U, // <0,4,3,3>: Cost 2 ins <0,u,3,3>, lane 1 + 3204800512U, // <0,4,3,4>: Cost 3 ins , lane 0 + 2083659778U, // <0,4,3,5>: Cost 2 ins <0,4,u,5>, lane 2 + 2083667970U, // <0,4,3,6>: Cost 2 ins <0,4,u,6>, lane 2 + 3087183077U, // <0,4,3,7>: Cost 3 vtrnr <0,0,2,3>, <4,4,6,7> + 2083659778U, // <0,4,3,u>: Cost 2 ins <0,4,u,5>, lane 2 + 2753137995U, // <0,4,4,0>: Cost 3 vuzpl <0,2,4,6>, <4,6,0,1> + 2888453090U, // <0,4,4,1>: Cost 3 vzipl <0,4,1,5>, <4,1,5,0> + 2888535100U, // <0,4,4,2>: Cost 3 vzipl <0,4,2,6>, <4,2,6,0> + 2086002689U, // <0,4,4,3>: Cost 2 ins <0,u,4,3>, lane 1 + 2131132416U, // <0,4,4,4>: Cost 2 ins , lane 0 + 1814711606U, // <0,4,4,5>: Cost 2 vzipl <0,4,1,5>, RHS + 1679396150U, // <0,4,4,6>: Cost 2 vuzpl <0,2,4,6>, RHS + 3157131267U, // <0,4,4,7>: Cost 3 ins <0,4,4,u>, lane 3 + 1679396168U, // <0,4,4,u>: Cost 2 vuzpl <0,2,4,6>, RHS + 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS + 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0> + 3204931584U, // <0,4,5,2>: Cost 3 ins , lane 0 + 2086076417U, // <0,4,5,3>: Cost 2 ins <0,u,5,3>, lane 1 + 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS + 2131214336U, // <0,4,5,5>: Cost 2 ins , lane 0 + 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS + 2830699830U, // <0,4,5,7>: Cost 3 vuzpr <2,0,2,4>, RHS + 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS + 2712227146U, // <0,4,6,0>: Cost 3 vext3 <4,6,0,0>, <4,6,0,0> + 2753138977U, // <0,4,6,1>: Cost 3 vuzpl <0,2,4,6>, <6,0,1,2> + 2753138988U, // <0,4,6,2>: Cost 3 vuzpl <0,2,4,6>, <6,0,2,4> + 2086150145U, // <0,4,6,3>: Cost 2 ins <0,u,6,3>, lane 1 + 2712522094U, // <0,4,6,4>: Cost 3 vext3 <4,6,4,0>, <4,6,4,0> + 2083659778U, // <0,4,6,5>: Cost 2 ins <0,4,u,5>, lane 2 + 2131296256U, // <0,4,6,6>: Cost 2 ins , lane 0 + 2083684357U, // <0,4,6,7>: Cost 2 ins <0,4,u,u>, lane 5 + 2083659778U, // <0,4,6,u>: Cost 2 ins <0,4,u,5>, lane 2 + 3021106426U, // <0,4,7,0>: Cost 3 vtrnl <0,1,7,3>, <4,5,0,1> + 2860487502U, // <0,4,7,1>: Cost 3 vuzpr <7,0,1,4>, <6,7,0,1> + 3157377026U, // <0,4,7,2>: Cost 3 ins <0,4,u,2>, lane 2 + 2086223873U, // <0,4,7,3>: Cost 2 ins <0,u,7,3>, lane 1 + 3205095424U, // <0,4,7,4>: Cost 3 ins , lane 0 + 2083659778U, // <0,4,7,5>: Cost 2 ins <0,4,u,5>, lane 2 + 2131369984U, // <0,4,7,6>: Cost 2 ins , lane 0 + 2752452204U, // <0,4,7,7>: Cost 3 vuzpl <0,1,4,3>, <7,7,7,7> + 2083659778U, // <0,4,7,u>: Cost 2 ins <0,4,u,5>, lane 2 + 2083168259U, // <0,4,u,0>: Cost 2 ins <0,4,1,u>, lane 3 + 2083684357U, // <0,4,u,1>: Cost 2 ins <0,4,u,u>, lane 5 + 1679398702U, // <0,4,u,2>: Cost 2 vuzpl <0,2,4,6>, LHS + 1012113409U, // <0,4,u,3>: Cost 1 ins LHS, lane 1 + 1679392972U, // <0,4,u,4>: Cost 2 vuzpl <0,2,4,6>, <0,2,4,6> + 743681334U, // <0,4,u,5>: Cost 1 vzipl LHS, RHS + 873696566U, // <0,4,u,6>: Cost 1 vtrnl LHS, RHS + 2083168259U, // <0,4,u,7>: Cost 2 ins <0,4,1,u>, lane 3 + 873696584U, // <0,4,u,u>: Cost 1 vtrnl LHS, RHS + 2085683201U, // <0,5,0,0>: Cost 2 ins <0,u,0,0>, lane 1 + 2131476480U, // <0,5,0,1>: Cost 2 ins , lane 0 + 2085699585U, // <0,5,0,2>: Cost 2 ins <0,u,0,2>, lane 1 + 2085707777U, // <0,5,0,3>: Cost 2 ins <0,u,0,3>, lane 1 + 3159457793U, // <0,5,0,4>: Cost 3 ins <0,u,0,4>, lane 1 + 1678778497U, // <0,5,0,5>: Cost 2 vuzpl <0,1,5,3>, <0,1,5,3> + 3159474177U, // <0,5,0,6>: Cost 3 ins <0,u,0,6>, lane 1 + 2013269302U, // <0,5,0,7>: Cost 2 vtrnr <0,0,0,0>, RHS + 2085699585U, // <0,5,0,u>: Cost 2 ins <0,u,0,2>, lane 1 + 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS + 2085765121U, // <0,5,1,1>: Cost 2 ins <0,u,1,1>, lane 1 + 3159515137U, // <0,5,1,2>: Cost 3 ins <0,u,1,2>, lane 1 + 2085781505U, // <0,5,1,3>: Cost 2 ins <0,u,1,3>, lane 1 + 1812778950U, // <0,5,1,4>: Cost 2 vzipl LHS, <5,4,7,6> + 2085797889U, // <0,5,1,5>: Cost 2 ins <0,u,1,5>, lane 1 + 1812779106U, // <0,5,1,6>: Cost 2 vzipl LHS, <5,6,7,0> + 2013351222U, // <0,5,1,7>: Cost 2 vtrnr <0,0,1,1>, RHS + 2085765121U, // <0,5,1,u>: Cost 2 ins <0,u,1,1>, lane 1 + 2085830657U, // <0,5,2,0>: Cost 2 ins <0,u,2,0>, lane 1 + 1946996864U, // <0,5,2,1>: Cost 2 vtrnl LHS, <5,7,1,3> + 2085847041U, // <0,5,2,2>: Cost 2 ins <0,u,2,2>, lane 1 + 1012113409U, // <0,5,2,3>: Cost 1 ins LHS, lane 1 + 2085863425U, // <0,5,2,4>: Cost 2 ins <0,u,2,4>, lane 1 + 1946996740U, // <0,5,2,5>: Cost 2 vtrnl LHS, <5,5,5,5> + 2085879809U, // <0,5,2,6>: Cost 2 ins <0,u,2,6>, lane 1 + 2019478838U, // <0,5,2,7>: Cost 2 vtrnr <1,0,3,2>, RHS + 1012113409U, // <0,5,2,u>: Cost 1 ins LHS, lane 1 + 2637858966U, // <0,5,3,0>: Cost 3 vext2 <3,4,0,5>, <3,0,1,2> + 3205439488U, // <0,5,3,1>: Cost 3 ins , lane 0 + 3087183153U, // <0,5,3,2>: Cost 3 vtrnr <0,0,2,3>, <4,5,6,2> + 2085928961U, // <0,5,3,3>: Cost 2 ins <0,u,3,3>, lane 1 + 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5> + 3205472256U, // <0,5,3,5>: Cost 3 ins , lane 0 + 3205480448U, // <0,5,3,6>: Cost 3 ins , lane 0 + 2131746816U, // <0,5,3,7>: Cost 2 ins , lane 0 + 2131746816U, // <0,5,3,u>: Cost 2 ins , lane 0 + 2888453704U, // <0,5,4,0>: Cost 3 vzipl <0,4,1,5>, <5,0,1,2> + 3159728129U, // <0,5,4,1>: Cost 3 ins <0,u,4,1>, lane 1 + 3159736321U, // <0,5,4,2>: Cost 3 ins <0,u,4,2>, lane 1 + 2086002689U, // <0,5,4,3>: Cost 2 ins <0,u,4,3>, lane 1 + 2888454068U, // <0,5,4,4>: Cost 3 vzipl <0,4,1,5>, <5,4,5,6> + 2131804160U, // <0,5,4,5>: Cost 2 ins , lane 0 + 2086027265U, // <0,5,4,6>: Cost 2 ins <0,u,4,6>, lane 1 + 2131820544U, // <0,5,4,7>: Cost 2 ins , lane 0 + 2086027265U, // <0,5,4,u>: Cost 2 ins <0,u,4,6>, lane 1 + 3205578752U, // <0,5,5,0>: Cost 3 ins , lane 0 + 2997291922U, // <0,5,5,1>: Cost 3 vzipr <7,4,0,5>, <4,0,5,1> + 2752523939U, // <0,5,5,2>: Cost 3 vuzpl <0,1,5,3>, <5,1,2,3> + 2086076417U, // <0,5,5,3>: Cost 2 ins <0,u,5,3>, lane 1 + 3205611520U, // <0,5,5,4>: Cost 3 ins , lane 0 + 2131877888U, // <0,5,5,5>: Cost 2 ins , lane 0 + 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0> + 2131894272U, // <0,5,5,7>: Cost 2 ins , lane 0 + 2086076417U, // <0,5,5,u>: Cost 2 ins <0,u,5,3>, lane 1 + 2131910656U, // <0,5,6,0>: Cost 2 ins , lane 0 + 2131918848U, // <0,5,6,1>: Cost 2 ins , lane 0 + 2131927040U, // <0,5,6,2>: Cost 2 ins , lane 0 + 2131935232U, // <0,5,6,3>: Cost 2 ins , lane 0 + 2131943424U, // <0,5,6,4>: Cost 2 ins , lane 0 + 2131951616U, // <0,5,6,5>: Cost 2 ins , lane 0 + 2131959808U, // <0,5,6,6>: Cost 2 ins , lane 0 + 1058226176U, // <0,5,6,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <0,5,6,u>: Cost 1 ins RHS, lane 0 + 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS + 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0> + 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7> + 2086223873U, // <0,5,7,3>: Cost 2 ins <0,u,7,3>, lane 1 + 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS + 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7> + 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0> + 2132041728U, // <0,5,7,7>: Cost 2 ins , lane 0 + 2132041728U, // <0,5,7,u>: Cost 2 ins , lane 0 + 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS + 2085765121U, // <0,5,u,1>: Cost 2 ins <0,u,1,1>, lane 1 + 2085699585U, // <0,5,u,2>: Cost 2 ins <0,u,0,2>, lane 1 + 1012113409U, // <0,5,u,3>: Cost 1 ins LHS, lane 1 + 1817423814U, // <0,5,u,4>: Cost 2 vzipl LHS, <5,4,7,6> + 2085797889U, // <0,5,u,5>: Cost 2 ins <0,u,1,5>, lane 1 + 2085879809U, // <0,5,u,6>: Cost 2 ins <0,u,2,6>, lane 1 + 1058226176U, // <0,5,u,7>: Cost 1 ins RHS, lane 0 + 1012113409U, // <0,5,u,u>: Cost 1 ins LHS, lane 1 + 2085683201U, // <0,6,0,0>: Cost 2 ins <0,u,0,0>, lane 1 + 2085691393U, // <0,6,0,1>: Cost 2 ins <0,u,0,1>, lane 1 + 2132148224U, // <0,6,0,2>: Cost 2 ins , lane 0 + 2085707777U, // <0,6,0,3>: Cost 2 ins <0,u,0,3>, lane 1 + 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6> + 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0> + 1678852234U, // <0,6,0,6>: Cost 2 vuzpl <0,1,6,3>, <0,1,6,3> + 1879051574U, // <0,6,0,7>: Cost 2 vzipr <0,0,0,0>, RHS + 2132148224U, // <0,6,0,u>: Cost 2 ins , lane 0 + 2993278336U, // <0,6,1,0>: Cost 3 vzipr <6,7,0,1>, <4,6,6,0> + 2085765121U, // <0,6,1,1>: Cost 2 ins <0,u,1,1>, lane 1 + 1812779514U, // <0,6,1,2>: Cost 2 vzipl LHS, <6,2,7,3> + 2085781505U, // <0,6,1,3>: Cost 2 ins <0,u,1,3>, lane 1 + 3159531521U, // <0,6,1,4>: Cost 3 ins <0,u,1,4>, lane 1 + 2085797889U, // <0,6,1,5>: Cost 2 ins <0,u,1,5>, lane 1 + 1812779832U, // <0,6,1,6>: Cost 2 vzipl LHS, <6,6,6,6> + 1892994358U, // <0,6,1,7>: Cost 2 vzipr <2,3,0,1>, RHS + 1892994359U, // <0,6,1,u>: Cost 2 vzipr <2,3,0,1>, RHS + 1946997582U, // <0,6,2,0>: Cost 2 vtrnl LHS, <6,7,0,1> + 2085838849U, // <0,6,2,1>: Cost 2 ins <0,u,2,1>, lane 1 + 2085847041U, // <0,6,2,2>: Cost 2 ins <0,u,2,2>, lane 1 + 1012113409U, // <0,6,2,3>: Cost 1 ins LHS, lane 1 + 1946997622U, // <0,6,2,4>: Cost 2 vtrnl LHS, <6,7,4,5> + 2085871617U, // <0,6,2,5>: Cost 2 ins <0,u,2,5>, lane 1 + 2085879809U, // <0,6,2,6>: Cost 2 ins <0,u,2,6>, lane 1 + 1880395062U, // <0,6,2,7>: Cost 2 vzipr <0,2,0,2>, RHS + 1012113409U, // <0,6,2,u>: Cost 1 ins LHS, lane 1 + 3122942050U, // <0,6,3,0>: Cost 3 vtrnr <6,0,1,3>, <5,6,7,0> + 2250527010U, // <0,6,3,1>: Cost 3 vrev <6,0,1,3> + 3206111232U, // <0,6,3,2>: Cost 3 ins , lane 0 + 2085928961U, // <0,6,3,3>: Cost 2 ins <0,u,3,3>, lane 1 + 3206127616U, // <0,6,3,4>: Cost 3 ins , lane 0 + 3206135808U, // <0,6,3,5>: Cost 3 ins , lane 0 + 3206144000U, // <0,6,3,6>: Cost 3 ins , lane 0 + 2132410368U, // <0,6,3,7>: Cost 2 ins , lane 0 + 2132410368U, // <0,6,3,u>: Cost 2 ins , lane 0 + 2888536380U, // <0,6,4,0>: Cost 3 vzipl <0,4,2,6>, <6,0,4,2> + 3021574433U, // <0,6,4,1>: Cost 3 vtrnl <0,2,4,6>, <6,0,1,2> + 3021574444U, // <0,6,4,2>: Cost 3 vtrnl <0,2,4,6>, <6,0,2,4> + 2086002689U, // <0,6,4,3>: Cost 2 ins <0,u,4,3>, lane 1 + 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS + 2086019073U, // <0,6,4,5>: Cost 2 ins <0,u,4,5>, lane 1 + 2132475904U, // <0,6,4,6>: Cost 2 ins , lane 0 + 2954153270U, // <0,6,4,7>: Cost 3 vzipr <0,2,0,4>, RHS + 2132475904U, // <0,6,4,u>: Cost 2 ins , lane 0 + 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> + 3206250496U, // <0,6,5,1>: Cost 3 ins , lane 0 + 3206258688U, // <0,6,5,2>: Cost 3 ins , lane 0 + 2086076417U, // <0,6,5,3>: Cost 2 ins <0,u,5,3>, lane 1 + 3206275072U, // <0,6,5,4>: Cost 3 ins , lane 0 + 3206283264U, // <0,6,5,5>: Cost 3 ins , lane 0 + 3206291456U, // <0,6,5,6>: Cost 3 ins , lane 0 + 2961460534U, // <0,6,5,7>: Cost 3 vzipr <1,4,0,5>, RHS + 2086076417U, // <0,6,5,u>: Cost 2 ins <0,u,5,3>, lane 1 + 2724172540U, // <0,6,6,0>: Cost 3 vext3 <6,6,0,0>, <6,6,0,0> + 2889838972U, // <0,6,6,1>: Cost 3 vzipl <0,6,2,3>, <6,1,2,3> + 2997300124U, // <0,6,6,2>: Cost 3 vzipr <7,4,0,6>, <4,0,6,2> + 2086150145U, // <0,6,6,3>: Cost 2 ins <0,u,6,3>, lane 1 + 3206348800U, // <0,6,6,4>: Cost 3 ins , lane 0 + 2889839336U, // <0,6,6,5>: Cost 3 vzipl <0,6,2,3>, <6,5,6,7> + 2132623360U, // <0,6,6,6>: Cost 2 ins , lane 0 + 2132631552U, // <0,6,6,7>: Cost 2 ins , lane 0 + 2086150145U, // <0,6,6,u>: Cost 2 ins <0,u,6,3>, lane 1 + 2132647936U, // <0,6,7,0>: Cost 2 ins , lane 0 + 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0> + 3206406144U, // <0,6,7,2>: Cost 3 ins , lane 0 + 2086223873U, // <0,6,7,3>: Cost 2 ins <0,u,7,3>, lane 1 + 2132680704U, // <0,6,7,4>: Cost 2 ins , lane 0 + 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6> + 3206438912U, // <0,6,7,6>: Cost 3 ins , lane 0 + 2132705280U, // <0,6,7,7>: Cost 2 ins , lane 0 + 2132647936U, // <0,6,7,u>: Cost 2 ins , lane 0 + 2132647936U, // <0,6,u,0>: Cost 2 ins , lane 0 + 2085765121U, // <0,6,u,1>: Cost 2 ins <0,u,1,1>, lane 1 + 2132148224U, // <0,6,u,2>: Cost 2 ins , lane 0 + 1012113409U, // <0,6,u,3>: Cost 1 ins LHS, lane 1 + 2132680704U, // <0,6,u,4>: Cost 2 ins , lane 0 + 2085797889U, // <0,6,u,5>: Cost 2 ins <0,u,1,5>, lane 1 + 2085879809U, // <0,6,u,6>: Cost 2 ins <0,u,2,6>, lane 1 + 1880444214U, // <0,6,u,7>: Cost 2 vzipr <0,2,0,u>, RHS + 1012113409U, // <0,6,u,u>: Cost 1 ins LHS, lane 1 + 2085683201U, // <0,7,0,0>: Cost 2 ins <0,u,0,0>, lane 1 + 2132803584U, // <0,7,0,1>: Cost 2 ins , lane 0 + 2085699585U, // <0,7,0,2>: Cost 2 ins <0,u,0,2>, lane 1 + 2085707777U, // <0,7,0,3>: Cost 2 ins <0,u,0,3>, lane 1 + 2580516150U, // <0,7,0,4>: Cost 3 vext1 <5,0,7,0>, RHS + 2580516476U, // <0,7,0,5>: Cost 3 vext1 <5,0,7,0>, <5,0,7,0> + 2586489173U, // <0,7,0,6>: Cost 3 vext1 <6,0,7,0>, <6,0,7,0> + 1678925971U, // <0,7,0,7>: Cost 2 vuzpl <0,1,7,3>, <0,1,7,3> + 2132803584U, // <0,7,0,u>: Cost 2 ins , lane 0 + 1812780026U, // <0,7,1,0>: Cost 2 vzipl LHS, <7,0,1,2> + 2085765121U, // <0,7,1,1>: Cost 2 ins <0,u,1,1>, lane 1 + 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0> + 2132893696U, // <0,7,1,3>: Cost 2 ins , lane 0 + 1812780390U, // <0,7,1,4>: Cost 2 vzipl LHS, <7,4,5,6> + 2085797889U, // <0,7,1,5>: Cost 2 ins <0,u,1,5>, lane 1 + 2586497366U, // <0,7,1,6>: Cost 3 vext1 <6,0,7,1>, <6,0,7,1> + 1812780652U, // <0,7,1,7>: Cost 2 vzipl LHS, <7,7,7,7> + 2085765121U, // <0,7,1,u>: Cost 2 ins <0,u,1,1>, lane 1 + 2085830657U, // <0,7,2,0>: Cost 2 ins <0,u,2,0>, lane 1 + 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2> + 2085847041U, // <0,7,2,2>: Cost 2 ins <0,u,2,2>, lane 1 + 1012113409U, // <0,7,2,3>: Cost 1 ins LHS, lane 1 + 2085863425U, // <0,7,2,4>: Cost 2 ins <0,u,2,4>, lane 1 + 1946998118U, // <0,7,2,5>: Cost 2 vtrnl LHS, <7,4,5,6> + 2085879809U, // <0,7,2,6>: Cost 2 ins <0,u,2,6>, lane 1 + 1946998380U, // <0,7,2,7>: Cost 2 vtrnl LHS, <7,7,7,7> + 1012113409U, // <0,7,2,u>: Cost 1 ins LHS, lane 1 + 2989314146U, // <0,7,3,0>: Cost 3 vzipr <6,1,0,3>, <5,6,7,0> + 3206766592U, // <0,7,3,1>: Cost 3 ins , lane 0 + 3020813397U, // <0,7,3,2>: Cost 3 vtrnl <0,1,3,3>, <7,1,2,3> + 2085928961U, // <0,7,3,3>: Cost 2 ins <0,u,3,3>, lane 1 + 3206791168U, // <0,7,3,4>: Cost 3 ins , lane 0 + 3206799360U, // <0,7,3,5>: Cost 3 ins , lane 0 + 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> + 3206815744U, // <0,7,3,7>: Cost 3 ins , lane 0 + 2085928961U, // <0,7,3,u>: Cost 2 ins <0,u,3,3>, lane 1 + 3206832128U, // <0,7,4,0>: Cost 3 ins , lane 0 + 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4> + 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4> + 2086002689U, // <0,7,4,3>: Cost 2 ins <0,u,4,3>, lane 1 + 3206864896U, // <0,7,4,4>: Cost 3 ins , lane 0 + 2133131264U, // <0,7,4,5>: Cost 2 ins , lane 0 + 2086027265U, // <0,7,4,6>: Cost 2 ins <0,u,4,6>, lane 1 + 3020887660U, // <0,7,4,7>: Cost 3 vtrnl <0,1,4,3>, <7,7,7,7> + 2133131264U, // <0,7,4,u>: Cost 2 ins , lane 0 + 2993311842U, // <0,7,5,0>: Cost 3 vzipr <6,7,0,5>, <5,6,7,0> + 3206914048U, // <0,7,5,1>: Cost 3 ins , lane 0 + 3020960853U, // <0,7,5,2>: Cost 3 vtrnl <0,1,5,3>, <7,1,2,3> + 2086076417U, // <0,7,5,3>: Cost 2 ins <0,u,5,3>, lane 1 + 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5> + 3206946816U, // <0,7,5,5>: Cost 3 ins , lane 0 + 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7> + 2133221376U, // <0,7,5,7>: Cost 2 ins , lane 0 + 2133221376U, // <0,7,5,u>: Cost 2 ins , lane 0 + 2854834274U, // <0,7,6,0>: Cost 3 vuzpr <6,0,5,7>, <5,6,7,0> + 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6> + 3206995968U, // <0,7,6,2>: Cost 3 ins , lane 0 + 2086150145U, // <0,7,6,3>: Cost 2 ins <0,u,6,3>, lane 1 + 3207012352U, // <0,7,6,4>: Cost 3 ins , lane 0 + 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7> + 3207028736U, // <0,7,6,6>: Cost 3 ins , lane 0 + 2133295104U, // <0,7,6,7>: Cost 2 ins , lane 0 + 2086150145U, // <0,7,6,u>: Cost 2 ins <0,u,6,3>, lane 1 + 2992001122U, // <0,7,7,0>: Cost 3 vzipr <6,5,0,7>, <5,6,7,0> + 3207061504U, // <0,7,7,1>: Cost 3 ins , lane 0 + 2752672853U, // <0,7,7,2>: Cost 3 vuzpl <0,1,7,3>, <7,1,2,3> + 2086223873U, // <0,7,7,3>: Cost 2 ins <0,u,7,3>, lane 1 + 3207086080U, // <0,7,7,4>: Cost 3 ins , lane 0 + 3207094272U, // <0,7,7,5>: Cost 3 ins , lane 0 + 2663093724U, // <0,7,7,6>: Cost 3 vext2 <7,6,0,7>, <7,6,0,7> + 2133368832U, // <0,7,7,7>: Cost 2 ins , lane 0 + 2086223873U, // <0,7,7,u>: Cost 2 ins <0,u,7,3>, lane 1 + 1817424890U, // <0,7,u,0>: Cost 2 vzipl LHS, <7,0,1,2> + 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u> + 2085699585U, // <0,7,u,2>: Cost 2 ins <0,u,0,2>, lane 1 + 1012113409U, // <0,7,u,3>: Cost 1 ins LHS, lane 1 + 1817425254U, // <0,7,u,4>: Cost 2 vzipl LHS, <7,4,5,6> + 2085797889U, // <0,7,u,5>: Cost 2 ins <0,u,1,5>, lane 1 + 2085879809U, // <0,7,u,6>: Cost 2 ins <0,u,2,6>, lane 1 + 2133221376U, // <0,7,u,7>: Cost 2 ins , lane 0 + 1012113409U, // <0,7,u,u>: Cost 1 ins LHS, lane 1 + 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS + 1007951877U, // <0,u,0,1>: Cost 1 ins LHS, lane 5 + 605257830U, // <0,u,0,2>: Cost 1 vuzpl LHS, LHS + 1007910914U, // <0,u,0,3>: Cost 1 ins LHS, lane 2 + 1678999756U, // <0,u,0,4>: Cost 2 vuzpl LHS, <0,2,4,6> + 2081767427U, // <0,u,0,5>: Cost 2 ins <0,2,0,u>, lane 3 + 1947506842U, // <0,u,0,6>: Cost 2 vtrnl <0,2,0,2>, RHS + 2081767427U, // <0,u,0,7>: Cost 2 ins <0,2,0,u>, lane 3 + 605257884U, // <0,u,0,u>: Cost 1 vuzpl LHS, LHS + 1812821715U, // <0,u,1,0>: Cost 2 vzipl LHS, + 739039022U, // <0,u,1,1>: Cost 1 vzipl LHS, LHS + 1813264264U, // <0,u,1,2>: Cost 2 vzipl LHS, + 1007910914U, // <0,u,1,3>: Cost 1 ins LHS, lane 2 + 1812822079U, // <0,u,1,4>: Cost 2 vzipl LHS, + 739039386U, // <0,u,1,5>: Cost 1 vzipl LHS, RHS + 1813264592U, // <0,u,1,6>: Cost 2 vzipl LHS, + 1892994376U, // <0,u,1,7>: Cost 2 vzipr <2,3,0,1>, RHS + 739039589U, // <0,u,1,u>: Cost 1 vzipl LHS, LHS + 1007509507U, // <0,u,2,0>: Cost 1 ins LHS, lane 3 + 1007509507U, // <0,u,2,1>: Cost 1 ins LHS, lane 3 + 873256750U, // <0,u,2,2>: Cost 1 vtrnl LHS, LHS + 835584U, // <0,u,2,3>: Cost 0 copy LHS + 1007509507U, // <0,u,2,4>: Cost 1 ins LHS, lane 3 + 1007509507U, // <0,u,2,5>: Cost 1 ins LHS, lane 3 + 873257114U, // <0,u,2,6>: Cost 1 vtrnl LHS, RHS + 1007509507U, // <0,u,2,7>: Cost 1 ins LHS, lane 3 + 835584U, // <0,u,2,u>: Cost 0 copy LHS + 2133680132U, // <0,u,3,0>: Cost 2 ins , lane 4 + 1679001750U, // <0,u,3,1>: Cost 2 vuzpl LHS, <3,0,1,2> + 2128388096U, // <0,u,3,2>: Cost 2 ins , lane 0 + 1007910914U, // <0,u,3,3>: Cost 1 ins LHS, lane 2 + 2133712900U, // <0,u,3,4>: Cost 2 ins , lane 4 + 1679002114U, // <0,u,3,5>: Cost 2 vuzpl LHS, <3,4,5,6> + 2082340866U, // <0,u,3,6>: Cost 2 ins <0,2,u,6>, lane 2 + 2133737476U, // <0,u,3,7>: Cost 2 ins , lane 4 + 1007910914U, // <0,u,3,u>: Cost 1 ins LHS, lane 2 + 2082062339U, // <0,u,4,0>: Cost 2 ins <0,2,4,u>, lane 3 + 1814714158U, // <0,u,4,1>: Cost 2 vzipl <0,4,1,5>, LHS + 1947834158U, // <0,u,4,2>: Cost 2 vtrnl <0,2,4,6>, LHS + 1007910914U, // <0,u,4,3>: Cost 1 ins LHS, lane 2 + 1947828428U, // <0,u,4,4>: Cost 2 vtrnl <0,2,4,6>, <0,2,4,6> + 1007951877U, // <0,u,4,5>: Cost 1 ins LHS, lane 5 + 605261110U, // <0,u,4,6>: Cost 1 vuzpl LHS, RHS + 2082062339U, // <0,u,4,7>: Cost 2 ins <0,2,4,u>, lane 3 + 605261128U, // <0,u,4,u>: Cost 1 vuzpl LHS, RHS + 2080964610U, // <0,u,5,0>: Cost 2 ins <0,0,u,0>, lane 2 + 2128527360U, // <0,u,5,1>: Cost 2 ins , lane 0 + 2080980994U, // <0,u,5,2>: Cost 2 ins <0,0,u,2>, lane 2 + 1007910914U, // <0,u,5,3>: Cost 1 ins LHS, lane 2 + 2081660930U, // <0,u,5,4>: Cost 2 ins <0,1,u,4>, lane 2 + 2133868548U, // <0,u,5,5>: Cost 2 ins , lane 4 + 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS + 1751092534U, // <0,u,5,7>: Cost 2 vuzpr <1,0,3,u>, RHS + 1007910914U, // <0,u,5,u>: Cost 1 ins LHS, lane 2 + 1679004494U, // <0,u,6,0>: Cost 2 vuzpl LHS, <6,7,0,1> + 2080972802U, // <0,u,6,1>: Cost 2 ins <0,0,u,1>, lane 2 + 2128609280U, // <0,u,6,2>: Cost 2 ins , lane 0 + 1007910914U, // <0,u,6,3>: Cost 1 ins LHS, lane 2 + 1679004534U, // <0,u,6,4>: Cost 2 vuzpl LHS, <6,7,4,5> + 2083659778U, // <0,u,6,5>: Cost 2 ins <0,4,u,5>, lane 2 + 2133950468U, // <0,u,6,6>: Cost 2 ins , lane 4 + 1060216836U, // <0,u,6,7>: Cost 1 ins RHS, lane 4 + 1007910914U, // <0,u,6,u>: Cost 1 ins LHS, lane 2 + 2133975044U, // <0,u,7,0>: Cost 2 ins , lane 4 + 2080972802U, // <0,u,7,1>: Cost 2 ins <0,0,u,1>, lane 2 + 2080980994U, // <0,u,7,2>: Cost 2 ins <0,0,u,2>, lane 2 + 1007910914U, // <0,u,7,3>: Cost 1 ins LHS, lane 2 + 2134007812U, // <0,u,7,4>: Cost 2 ins , lane 4 + 2083659778U, // <0,u,7,5>: Cost 2 ins <0,4,u,5>, lane 2 + 2134024196U, // <0,u,7,6>: Cost 2 ins , lane 4 + 2134032388U, // <0,u,7,7>: Cost 2 ins , lane 4 + 1007910914U, // <0,u,7,u>: Cost 1 ins LHS, lane 2 + 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS + 743683886U, // <0,u,u,1>: Cost 1 vzipl LHS, LHS + 605263662U, // <0,u,u,2>: Cost 1 vuzpl LHS, LHS + 835584U, // <0,u,u,3>: Cost 0 copy LHS + 1007509507U, // <0,u,u,4>: Cost 1 ins LHS, lane 3 + 743684250U, // <0,u,u,5>: Cost 1 vzipl LHS, RHS + 605264026U, // <0,u,u,6>: Cost 1 vuzpl LHS, RHS + 1007509507U, // <0,u,u,7>: Cost 1 ins LHS, lane 3 + 835584U, // <0,u,u,u>: Cost 0 copy LHS + 2128150528U, // <1,0,0,0>: Cost 2 ins , lane 0 + 1818148966U, // <1,0,0,1>: Cost 2 vzipl <1,0,3,2>, LHS + 2086952962U, // <1,0,0,2>: Cost 2 ins <1,0,u,2>, lane 2 + 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0> + 2891891026U, // <1,0,0,4>: Cost 3 vzipl <1,0,3,2>, <0,4,1,5> + 3165437953U, // <1,0,0,5>: Cost 3 ins <1,u,0,5>, lane 1 + 3160154115U, // <1,0,0,6>: Cost 3 ins <1,0,0,u>, lane 3 + 3160154115U, // <1,0,0,7>: Cost 3 ins <1,0,0,u>, lane 3 + 1818149533U, // <1,0,0,u>: Cost 2 vzipl <1,0,3,2>, LHS + 1141522514U, // <1,0,1,0>: Cost 2 vrev <0,1,0,1> + 1818656870U, // <1,0,1,1>: Cost 2 vzipl <1,1,1,1>, LHS + 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS + 2091753473U, // <1,0,1,3>: Cost 2 ins <1,u,1,3>, lane 1 + 1477070134U, // <1,0,1,4>: Cost 2 vext1 <0,1,0,1>, RHS + 2760770560U, // <1,0,1,5>: Cost 3 vuzpl <1,5,0,2>, <1,3,5,7> + 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7> + 3165528065U, // <1,0,1,7>: Cost 3 ins <1,u,1,7>, lane 1 + 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS + 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1> + 1819459686U, // <1,0,2,1>: Cost 2 vzipl <1,2,3,0>, LHS + 2128314368U, // <1,0,2,2>: Cost 2 ins , lane 0 + 2087002117U, // <1,0,2,3>: Cost 2 ins <1,0,u,u>, lane 5 + 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6> + 2970798548U, // <1,0,2,5>: Cost 3 vzipr <3,0,1,2>, <3,4,0,5> + 3165593601U, // <1,0,2,6>: Cost 3 ins <1,u,2,6>, lane 1 + 2592625730U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,1,0,2> + 1819460253U, // <1,0,2,u>: Cost 2 vzipl <1,2,3,0>, LHS + 2014101504U, // <1,0,3,0>: Cost 2 vtrnr LHS, <0,0,0,0> + 2014101514U, // <1,0,3,1>: Cost 2 vtrnr LHS, <0,0,1,1> + 67944550U, // <1,0,3,2>: Cost 1 vrev LHS + 2091900929U, // <1,0,3,3>: Cost 2 ins <1,u,3,3>, lane 1 + 2091909121U, // <1,0,3,4>: Cost 2 ins <1,u,3,4>, lane 1 + 2086633475U, // <1,0,3,5>: Cost 2 ins <1,0,3,u>, lane 3 + 2086633475U, // <1,0,3,6>: Cost 2 ins <1,0,3,u>, lane 3 + 2091933697U, // <1,0,3,7>: Cost 2 ins <1,u,3,7>, lane 1 + 68386972U, // <1,0,3,u>: Cost 1 vrev LHS + 2667752338U, // <1,0,4,0>: Cost 3 vext2 , <4,0,5,1> + 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5> + 2086952962U, // <1,0,4,2>: Cost 2 ins <1,0,u,2>, lane 2 + 2819383641U, // <1,0,4,3>: Cost 3 vuzpr <0,1,2,0>, <0,4,2,3> + 2894569810U, // <1,0,4,4>: Cost 3 vzipl <1,4,3,5>, <0,4,1,5> + 2087002117U, // <1,0,4,5>: Cost 2 ins <1,0,u,u>, lane 5 + 2758102326U, // <1,0,4,6>: Cost 3 vuzpl <1,1,0,0>, RHS + 2819386597U, // <1,0,4,7>: Cost 3 vuzpr <0,1,2,0>, <4,4,6,7> + 2086952962U, // <1,0,4,u>: Cost 2 ins <1,0,u,2>, lane 2 + 2955558912U, // <1,0,5,0>: Cost 3 vzipr <0,4,1,5>, <0,0,0,0> + 1821507686U, // <1,0,5,1>: Cost 2 vzipl <1,5,3,7>, LHS + 1954545766U, // <1,0,5,2>: Cost 2 vtrnl <1,3,5,7>, LHS + 3165790209U, // <1,0,5,3>: Cost 3 ins <1,u,5,3>, lane 1 + 1141850234U, // <1,0,5,4>: Cost 2 vrev <0,1,4,5> + 3165806593U, // <1,0,5,5>: Cost 3 ins <1,u,5,5>, lane 1 + 3202310144U, // <1,0,5,6>: Cost 3 ins , lane 0 + 2092081153U, // <1,0,5,7>: Cost 2 ins <1,u,5,7>, lane 1 + 1954545820U, // <1,0,5,u>: Cost 2 vtrnl <1,3,5,7>, LHS + 3202334720U, // <1,0,6,0>: Cost 3 ins , lane 0 + 2895765606U, // <1,0,6,1>: Cost 3 vzipl <1,6,1,7>, LHS + 2128609280U, // <1,0,6,2>: Cost 2 ins , lane 0 + 2819383803U, // <1,0,6,3>: Cost 3 vuzpr <0,1,2,0>, <0,6,2,3> + 2896060754U, // <1,0,6,4>: Cost 3 vzipl <1,6,5,7>, <0,4,1,5> + 2215673988U, // <1,0,6,5>: Cost 3 vrev <0,1,5,6> + 3165888513U, // <1,0,6,6>: Cost 3 ins <1,u,6,6>, lane 1 + 2087002117U, // <1,0,6,7>: Cost 2 ins <1,0,u,u>, lane 5 + 2128609280U, // <1,0,6,u>: Cost 2 ins , lane 0 + 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0> + 2974156454U, // <1,0,7,1>: Cost 3 vzipr <3,5,1,7>, <2,3,0,1> + 2086952962U, // <1,0,7,2>: Cost 2 ins <1,0,u,2>, lane 2 + 2861265024U, // <1,0,7,3>: Cost 3 vuzpr <7,1,3,0>, <5,7,1,3> + 3202441216U, // <1,0,7,4>: Cost 3 ins , lane 0 + 3165954049U, // <1,0,7,5>: Cost 3 ins <1,u,7,5>, lane 1 + 1142014094U, // <1,0,7,6>: Cost 2 vrev <0,1,6,7> + 3165970433U, // <1,0,7,7>: Cost 3 ins <1,u,7,7>, lane 1 + 2086952962U, // <1,0,7,u>: Cost 2 ins <1,0,u,2>, lane 2 + 2014142464U, // <1,0,u,0>: Cost 2 vtrnr LHS, <0,0,0,0> + 2014142474U, // <1,0,u,1>: Cost 2 vtrnr LHS, <0,0,1,1> + 67985515U, // <1,0,u,2>: Cost 1 vrev LHS + 2091753473U, // <1,0,u,3>: Cost 2 ins <1,u,1,3>, lane 1 + 2091909121U, // <1,0,u,4>: Cost 2 ins <1,u,3,4>, lane 1 + 2086633475U, // <1,0,u,5>: Cost 2 ins <1,0,3,u>, lane 3 + 2086633475U, // <1,0,u,6>: Cost 2 ins <1,0,3,u>, lane 3 + 2091933697U, // <1,0,u,7>: Cost 2 ins <1,u,3,7>, lane 1 + 68427937U, // <1,0,u,u>: Cost 1 vrev LHS + 1818149622U, // <1,1,0,0>: Cost 2 vzipl <1,0,3,2>, <1,0,3,2> + 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS + 1684439142U, // <1,1,0,2>: Cost 2 vuzpl <1,1,1,1>, LHS + 2087624706U, // <1,1,0,3>: Cost 2 ins <1,1,u,3>, lane 2 + 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5> + 2891891856U, // <1,1,0,5>: Cost 3 vzipl <1,0,3,2>, <1,5,3,7> + 3161391106U, // <1,1,0,6>: Cost 3 ins <1,1,u,6>, lane 2 + 3161399298U, // <1,1,0,7>: Cost 3 ins <1,1,u,7>, lane 2 + 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1> + 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS + 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS + 2087149571U, // <1,1,1,2>: Cost 2 ins <1,1,1,u>, lane 3 + 1751548006U, // <1,1,1,3>: Cost 2 vuzpr <1,1,1,1>, LHS + 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS + 2087149571U, // <1,1,1,5>: Cost 2 ins <1,1,1,u>, lane 3 + 2087149571U, // <1,1,1,6>: Cost 2 ins <1,1,1,u>, lane 3 + 2087149571U, // <1,1,1,7>: Cost 2 ins <1,1,1,u>, lane 3 + 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS + 2128961536U, // <1,1,2,0>: Cost 2 ins , lane 0 + 2128969728U, // <1,1,2,1>: Cost 2 ins , lane 0 + 1819460502U, // <1,1,2,2>: Cost 2 vzipl <1,2,3,0>, <1,2,3,0> + 1055244288U, // <1,1,2,3>: Cost 1 ins LHS, lane 0 + 2128994304U, // <1,1,2,4>: Cost 2 ins , lane 0 + 2129002496U, // <1,1,2,5>: Cost 2 ins , lane 0 + 2129010688U, // <1,1,2,6>: Cost 2 ins , lane 0 + 2129018880U, // <1,1,2,7>: Cost 2 ins , lane 0 + 1055244288U, // <1,1,2,u>: Cost 1 ins LHS, lane 0 + 2091876353U, // <1,1,3,0>: Cost 2 ins <1,u,3,0>, lane 1 + 2014102324U, // <1,1,3,1>: Cost 2 vtrnr LHS, <1,1,1,1> + 2091892737U, // <1,1,3,2>: Cost 2 ins <1,u,3,2>, lane 1 + 940359782U, // <1,1,3,3>: Cost 1 vtrnr LHS, LHS + 2091909121U, // <1,1,3,4>: Cost 2 ins <1,u,3,4>, lane 1 + 2087297027U, // <1,1,3,5>: Cost 2 ins <1,1,3,u>, lane 3 + 2087297027U, // <1,1,3,6>: Cost 2 ins <1,1,3,u>, lane 3 + 2091933697U, // <1,1,3,7>: Cost 2 ins <1,u,3,7>, lane 1 + 940359787U, // <1,1,3,u>: Cost 1 vtrnr LHS, LHS + 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS + 2087608322U, // <1,1,4,1>: Cost 2 ins <1,1,u,1>, lane 2 + 2894496662U, // <1,1,4,2>: Cost 3 vzipl <1,4,2,5>, <1,2,3,0> + 2087624706U, // <1,1,4,3>: Cost 2 ins <1,1,u,3>, lane 2 + 2014109799U, // <1,1,4,4>: Cost 2 vtrnr <0,1,2,4>, <0,1,2,4> + 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS + 1684442422U, // <1,1,4,6>: Cost 2 vuzpl <1,1,1,1>, RHS + 3161399298U, // <1,1,4,7>: Cost 3 ins <1,1,u,7>, lane 2 + 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS + 3028288624U, // <1,1,5,0>: Cost 3 vtrnl <1,3,5,7>, <1,5,0,2> + 2087608322U, // <1,1,5,1>: Cost 2 ins <1,1,u,1>, lane 2 + 2955561110U, // <1,1,5,2>: Cost 3 vzipr <0,4,1,5>, <3,0,1,2> + 2087624706U, // <1,1,5,3>: Cost 2 ins <1,1,u,3>, lane 2 + 2955558925U, // <1,1,5,4>: Cost 3 vzipr <0,4,1,5>, <0,0,1,4> + 1881817426U, // <1,1,5,5>: Cost 2 vzipr <0,4,1,5>, <0,4,1,5> + 2670415970U, // <1,1,5,6>: Cost 3 vext2 , <5,6,7,0> + 1751551286U, // <1,1,5,7>: Cost 2 vuzpr <1,1,1,1>, RHS + 1751551287U, // <1,1,5,u>: Cost 2 vuzpr <1,1,1,1>, RHS + 3165839361U, // <1,1,6,0>: Cost 3 ins <1,u,6,0>, lane 1 + 2087608322U, // <1,1,6,1>: Cost 2 ins <1,1,u,1>, lane 2 + 2973485206U, // <1,1,6,2>: Cost 3 vzipr <3,4,1,6>, <3,0,1,2> + 2087624706U, // <1,1,6,3>: Cost 2 ins <1,1,u,3>, lane 2 + 2221572948U, // <1,1,6,4>: Cost 3 vrev <1,1,4,6> + 2955567442U, // <1,1,6,5>: Cost 3 vzipr <0,4,1,6>, <0,4,1,5> + 2014126185U, // <1,1,6,6>: Cost 2 vtrnr <0,1,2,6>, <0,1,2,6> + 2087665669U, // <1,1,6,7>: Cost 2 ins <1,1,u,u>, lane 5 + 2087624706U, // <1,1,6,u>: Cost 2 ins <1,1,u,3>, lane 2 + 2670416890U, // <1,1,7,0>: Cost 3 vext2 , <7,0,1,2> + 2087608322U, // <1,1,7,1>: Cost 2 ins <1,1,u,1>, lane 2 + 3203088384U, // <1,1,7,2>: Cost 3 ins , lane 0 + 2129354752U, // <1,1,7,3>: Cost 2 ins , lane 0 + 2670417254U, // <1,1,7,4>: Cost 3 vext2 , <7,4,5,6> + 2221654878U, // <1,1,7,5>: Cost 3 vrev <1,1,5,7> + 3161391106U, // <1,1,7,6>: Cost 3 ins <1,1,u,6>, lane 2 + 2014134378U, // <1,1,7,7>: Cost 2 vtrnr <0,1,2,7>, <0,1,2,7> + 2129354752U, // <1,1,7,u>: Cost 2 ins , lane 0 + 1818149622U, // <1,1,u,0>: Cost 2 vzipl <1,0,3,2>, <1,0,3,2> + 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS + 1684444974U, // <1,1,u,2>: Cost 2 vuzpl <1,1,1,1>, LHS + 940400742U, // <1,1,u,3>: Cost 1 vtrnr LHS, LHS + 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS + 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS + 1684445338U, // <1,1,u,6>: Cost 2 vuzpl <1,1,1,1>, RHS + 1751551529U, // <1,1,u,7>: Cost 2 vuzpr <1,1,1,1>, RHS + 940400747U, // <1,1,u,u>: Cost 1 vtrnr LHS, LHS + 2088263682U, // <1,2,0,0>: Cost 2 ins <1,2,u,0>, lane 2 + 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS + 2129494016U, // <1,2,0,2>: Cost 2 ins , lane 0 + 2954854502U, // <1,2,0,3>: Cost 3 vzipr <0,3,1,0>, LHS + 2088296450U, // <1,2,0,4>: Cost 2 ins <1,2,u,4>, lane 2 + 3165437953U, // <1,2,0,5>: Cost 3 ins <1,u,0,5>, lane 1 + 2891892666U, // <1,2,0,6>: Cost 3 vzipl <1,0,3,2>, <2,6,3,7> + 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1> + 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS + 2088263682U, // <1,2,1,0>: Cost 2 ins <1,2,u,0>, lane 2 + 2091737089U, // <1,2,1,1>: Cost 2 ins <1,u,1,1>, lane 1 + 1745657957U, // <1,2,1,2>: Cost 2 vuzpr <0,1,2,2>, <0,1,2,2> + 1884438630U, // <1,2,1,3>: Cost 2 vzipr <0,u,1,1>, LHS + 2088296450U, // <1,2,1,4>: Cost 2 ins <1,2,u,4>, lane 2 + 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7> + 2958180700U, // <1,2,1,6>: Cost 3 vzipr <0,u,1,1>, <0,4,2,6> + 3165528065U, // <1,2,1,7>: Cost 3 ins <1,u,1,7>, lane 1 + 1884438635U, // <1,2,1,u>: Cost 2 vzipr <0,u,1,1>, LHS + 2088263682U, // <1,2,2,0>: Cost 2 ins <1,2,u,0>, lane 2 + 2893235754U, // <1,2,2,1>: Cost 3 vzipl <1,2,3,4>, <2,1,4,3> + 2129641472U, // <1,2,2,2>: Cost 2 ins , lane 0 + 1897054310U, // <1,2,2,3>: Cost 2 vzipr <3,0,1,2>, LHS + 2088296450U, // <1,2,2,4>: Cost 2 ins <1,2,u,4>, lane 2 + 3165585409U, // <1,2,2,5>: Cost 3 ins <1,u,2,5>, lane 1 + 2893203386U, // <1,2,2,6>: Cost 3 vzipl <1,2,3,0>, <2,6,3,7> + 2994684010U, // <1,2,2,7>: Cost 3 vzipr <7,0,1,2>, <0,1,2,7> + 1897054315U, // <1,2,2,u>: Cost 2 vzipr <3,0,1,2>, LHS + 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS + 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2> + 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2> + 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3> + 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS + 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3> + 2014101708U, // <1,2,3,6>: Cost 2 vtrnr LHS, <0,2,4,6> + 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2> + 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS + 2088263682U, // <1,2,4,0>: Cost 2 ins <1,2,u,0>, lane 2 + 3162013698U, // <1,2,4,1>: Cost 3 ins <1,2,u,1>, lane 2 + 3162021890U, // <1,2,4,2>: Cost 3 ins <1,2,u,2>, lane 2 + 2954887270U, // <1,2,4,3>: Cost 3 vzipr <0,3,1,4>, LHS + 2088296450U, // <1,2,4,4>: Cost 2 ins <1,2,u,4>, lane 2 + 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS + 2129821696U, // <1,2,4,6>: Cost 2 ins , lane 0 + 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0> + 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS + 2088263682U, // <1,2,5,0>: Cost 2 ins <1,2,u,0>, lane 2 + 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7> + 2955558932U, // <1,2,5,2>: Cost 3 vzipr <0,4,1,5>, <0,0,2,2> + 1881817190U, // <1,2,5,3>: Cost 2 vzipr <0,4,1,5>, LHS + 2088296450U, // <1,2,5,4>: Cost 2 ins <1,2,u,4>, lane 2 + 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5> + 2955559260U, // <1,2,5,6>: Cost 3 vzipr <0,4,1,5>, <0,4,2,6> + 2092081153U, // <1,2,5,7>: Cost 2 ins <1,u,5,7>, lane 1 + 1881817195U, // <1,2,5,u>: Cost 2 vzipr <0,4,1,5>, LHS + 2088263682U, // <1,2,6,0>: Cost 2 ins <1,2,u,0>, lane 2 + 3162013698U, // <1,2,6,1>: Cost 3 ins <1,2,u,1>, lane 2 + 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3> + 2954240102U, // <1,2,6,3>: Cost 3 vzipr <0,2,1,6>, LHS + 2088296450U, // <1,2,6,4>: Cost 2 ins <1,2,u,4>, lane 2 + 3162046466U, // <1,2,6,5>: Cost 3 ins <1,2,u,5>, lane 2 + 2895914938U, // <1,2,6,6>: Cost 3 vzipl <1,6,3,7>, <2,6,3,7> + 2088329221U, // <1,2,6,7>: Cost 2 ins <1,2,u,u>, lane 5 + 2088263682U, // <1,2,6,u>: Cost 2 ins <1,2,u,0>, lane 2 + 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2> + 3203743744U, // <1,2,7,1>: Cost 3 ins , lane 0 + 3203751936U, // <1,2,7,2>: Cost 3 ins , lane 0 + 2130018304U, // <1,2,7,3>: Cost 2 ins , lane 0 + 2088296450U, // <1,2,7,4>: Cost 2 ins <1,2,u,4>, lane 2 + 3203776512U, // <1,2,7,5>: Cost 3 ins , lane 0 + 3203784704U, // <1,2,7,6>: Cost 3 ins , lane 0 + 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1> + 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2> + 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS + 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2> + 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2> + 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2> + 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS + 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS + 2014142668U, // <1,2,u,6>: Cost 2 vtrnr LHS, <0,2,4,6> + 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2> + 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS + 1745666048U, // <1,3,0,0>: Cost 2 vuzpr LHS, <0,0,0,0> + 1746108426U, // <1,3,0,1>: Cost 2 vuzpr LHS, <0,0,1,1> + 1745666806U, // <1,3,0,2>: Cost 2 vuzpr LHS, <1,0,3,2> + 2088951810U, // <1,3,0,3>: Cost 2 ins <1,3,u,3>, lane 2 + 2819850253U, // <1,3,0,4>: Cost 3 vuzpr LHS, <0,0,1,4> + 2758984055U, // <1,3,0,5>: Cost 3 vuzpl <1,2,3,0>, <0,4,5,6> + 2867183658U, // <1,3,0,6>: Cost 3 vuzpr LHS, <0,0,4,6> + 2088984578U, // <1,3,0,7>: Cost 2 ins <1,3,u,7>, lane 2 + 1745668252U, // <1,3,0,u>: Cost 2 vuzpr LHS, <3,0,1,u> + 2088476675U, // <1,3,1,0>: Cost 2 ins <1,3,1,u>, lane 3 + 1745666868U, // <1,3,1,1>: Cost 2 vuzpr LHS, <1,1,1,1> + 2088476675U, // <1,3,1,2>: Cost 2 ins <1,3,1,u>, lane 3 + 671924326U, // <1,3,1,3>: Cost 1 vuzpr LHS, LHS + 2088476675U, // <1,3,1,4>: Cost 2 ins <1,3,1,u>, lane 3 + 2088476675U, // <1,3,1,5>: Cost 2 ins <1,3,1,u>, lane 3 + 2088476675U, // <1,3,1,6>: Cost 2 ins <1,3,1,u>, lane 3 + 2088984578U, // <1,3,1,7>: Cost 2 ins <1,3,u,7>, lane 2 + 671924331U, // <1,3,1,u>: Cost 1 vuzpr LHS, LHS + 1745666966U, // <1,3,2,0>: Cost 2 vuzpr LHS, <1,2,3,0> + 2819408044U, // <1,3,2,1>: Cost 3 vuzpr LHS, <0,2,1,1> + 1745666212U, // <1,3,2,2>: Cost 2 vuzpr LHS, <0,2,0,2> + 1746110066U, // <1,3,2,3>: Cost 2 vuzpr LHS, <2,2,3,3> + 1745666970U, // <1,3,2,4>: Cost 2 vuzpr LHS, <1,2,3,4> + 2819408066U, // <1,3,2,5>: Cost 3 vuzpr LHS, <0,2,3,5> + 1745666252U, // <1,3,2,6>: Cost 2 vuzpr LHS, <0,2,4,6> + 2088984578U, // <1,3,2,7>: Cost 2 ins <1,3,u,7>, lane 2 + 1745666218U, // <1,3,2,u>: Cost 2 vuzpr LHS, <0,2,0,u> + 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS + 1745667750U, // <1,3,3,1>: Cost 2 vuzpr LHS, <2,3,0,1> + 2091892737U, // <1,3,3,2>: Cost 2 ins <1,u,3,2>, lane 1 + 1745667032U, // <1,3,3,3>: Cost 2 vuzpr LHS, <1,3,1,3> + 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS + 1745667790U, // <1,3,3,5>: Cost 2 vuzpr LHS, <2,3,4,5> + 2819408868U, // <1,3,3,6>: Cost 3 vuzpr LHS, <1,3,2,6> + 2014102528U, // <1,3,3,7>: Cost 2 vtrnr LHS, <1,3,5,7> + 1745667037U, // <1,3,3,u>: Cost 2 vuzpr LHS, <1,3,1,u> + 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS + 2759019375U, // <1,3,4,1>: Cost 3 vuzpl <1,2,3,4>, <4,0,1,2> + 2759019466U, // <1,3,4,2>: Cost 3 vuzpl <1,2,3,4>, <4,1,2,3> + 2088951810U, // <1,3,4,3>: Cost 2 ins <1,3,u,3>, lane 2 + 1793445072U, // <1,3,4,4>: Cost 2 vuzpr LHS, <4,4,4,4> + 1746108754U, // <1,3,4,5>: Cost 2 vuzpr LHS, <0,4,1,5> + 1745668610U, // <1,3,4,6>: Cost 2 vuzpr LHS, <3,4,5,6> + 2088984578U, // <1,3,4,7>: Cost 2 ins <1,3,u,7>, lane 2 + 1745668612U, // <1,3,4,u>: Cost 2 vuzpr LHS, <3,4,5,u> + 2088771587U, // <1,3,5,0>: Cost 2 ins <1,3,5,u>, lane 3 + 2088771587U, // <1,3,5,1>: Cost 2 ins <1,3,5,u>, lane 3 + 2088771587U, // <1,3,5,2>: Cost 2 ins <1,3,5,u>, lane 3 + 2088951810U, // <1,3,5,3>: Cost 2 ins <1,3,u,3>, lane 2 + 2088771587U, // <1,3,5,4>: Cost 2 ins <1,3,5,u>, lane 3 + 1793445892U, // <1,3,5,5>: Cost 2 vuzpr LHS, <5,5,5,5> + 2088771587U, // <1,3,5,6>: Cost 2 ins <1,3,5,u>, lane 3 + 671927606U, // <1,3,5,7>: Cost 1 vuzpr LHS, RHS + 671927607U, // <1,3,5,u>: Cost 1 vuzpr LHS, RHS + 1793445986U, // <1,3,6,0>: Cost 2 vuzpr LHS, <5,6,7,0> + 2867185561U, // <1,3,6,1>: Cost 3 vuzpr LHS, <2,6,0,1> + 1793445196U, // <1,3,6,2>: Cost 2 vuzpr LHS, <4,6,0,2> + 2088951810U, // <1,3,6,3>: Cost 2 ins <1,3,u,3>, lane 2 + 1793445990U, // <1,3,6,4>: Cost 2 vuzpr LHS, <5,6,7,4> + 2849642738U, // <1,3,6,5>: Cost 3 vuzpr <5,1,7,3>, + 1793445236U, // <1,3,6,6>: Cost 2 vuzpr LHS, <4,6,4,6> + 1746110394U, // <1,3,6,7>: Cost 2 vuzpr LHS, <2,6,3,7> + 1746110395U, // <1,3,6,u>: Cost 2 vuzpr LHS, <2,6,3,u> + 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1> + 1793446734U, // <1,3,7,1>: Cost 2 vuzpr LHS, <6,7,0,1> + 2867187830U, // <1,3,7,2>: Cost 3 vuzpr LHS, <5,7,0,2> + 1793446016U, // <1,3,7,3>: Cost 2 vuzpr LHS, <5,7,1,3> + 2849637679U, // <1,3,7,4>: Cost 3 vuzpr <5,1,7,3>, <1,7,3,4> + 1793446774U, // <1,3,7,5>: Cost 2 vuzpr LHS, <6,7,4,5> + 2867185674U, // <1,3,7,6>: Cost 3 vuzpr LHS, <2,7,3,6> + 1793446056U, // <1,3,7,7>: Cost 2 vuzpr LHS, <5,7,5,7> + 1793446021U, // <1,3,7,u>: Cost 2 vuzpr LHS, <5,7,1,u> + 1746109820U, // <1,3,u,0>: Cost 2 vuzpr LHS, <1,u,3,0> + 2014144166U, // <1,3,u,1>: Cost 2 vtrnr LHS, <2,3,0,1> + 1745668894U, // <1,3,u,2>: Cost 2 vuzpr LHS, <3,u,1,2> + 671924893U, // <1,3,u,3>: Cost 1 vuzpr LHS, LHS + 1746109824U, // <1,3,u,4>: Cost 2 vuzpr LHS, <1,u,3,4> + 2014144206U, // <1,3,u,5>: Cost 2 vtrnr LHS, <2,3,4,5> + 1745668934U, // <1,3,u,6>: Cost 2 vuzpr LHS, <3,u,5,6> + 671927849U, // <1,3,u,7>: Cost 1 vuzpr LHS, RHS + 671924898U, // <1,3,u,u>: Cost 1 vuzpr LHS, LHS + 3165396993U, // <1,4,0,0>: Cost 3 ins <1,u,0,0>, lane 1 + 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS + 2758434918U, // <1,4,0,2>: Cost 3 vuzpl <1,1,4,5>, LHS + 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4> + 3165429761U, // <1,4,0,4>: Cost 3 ins <1,u,0,4>, lane 1 + 1818152246U, // <1,4,0,5>: Cost 2 vzipl <1,0,3,2>, RHS + 3026537782U, // <1,4,0,6>: Cost 3 vtrnl <1,1,0,0>, RHS + 3162808323U, // <1,4,0,7>: Cost 3 ins <1,4,0,u>, lane 3 + 1818152489U, // <1,4,0,u>: Cost 2 vzipl <1,0,3,2>, RHS + 3204620288U, // <1,4,1,0>: Cost 3 ins , lane 0 + 2091737089U, // <1,4,1,1>: Cost 2 ins <1,u,1,1>, lane 1 + 3204636672U, // <1,4,1,2>: Cost 3 ins , lane 0 + 2091753473U, // <1,4,1,3>: Cost 2 ins <1,u,1,3>, lane 1 + 1745674343U, // <1,4,1,4>: Cost 2 vuzpr <0,1,2,4>, <0,1,2,4> + 1818660150U, // <1,4,1,5>: Cost 2 vzipl <1,1,1,1>, RHS + 1952877878U, // <1,4,1,6>: Cost 2 vtrnl <1,1,1,1>, RHS + 3165528065U, // <1,4,1,7>: Cost 3 ins <1,u,1,7>, lane 1 + 1818660393U, // <1,4,1,u>: Cost 2 vzipl <1,1,1,1>, RHS + 2893237103U, // <1,4,2,0>: Cost 3 vzipl <1,2,3,4>, <4,0,1,2> + 2893237194U, // <1,4,2,1>: Cost 3 vzipl <1,2,3,4>, <4,1,2,3> + 3165560833U, // <1,4,2,2>: Cost 3 ins <1,u,2,2>, lane 1 + 2130976768U, // <1,4,2,3>: Cost 2 ins , lane 0 + 2893237467U, // <1,4,2,4>: Cost 3 vzipl <1,2,3,4>, <4,4,5,6> + 1819462966U, // <1,4,2,5>: Cost 2 vzipl <1,2,3,0>, RHS + 2131001344U, // <1,4,2,6>: Cost 2 ins , lane 0 + 3165601793U, // <1,4,2,7>: Cost 3 ins <1,u,2,7>, lane 1 + 1819463209U, // <1,4,2,u>: Cost 2 vzipl <1,2,3,0>, RHS + 2091876353U, // <1,4,3,0>: Cost 2 ins <1,u,3,0>, lane 1 + 3027454831U, // <1,4,3,1>: Cost 3 vtrnl <1,2,3,4>, <4,0,1,2> + 2091892737U, // <1,4,3,2>: Cost 2 ins <1,u,3,2>, lane 1 + 2091900929U, // <1,4,3,3>: Cost 2 ins <1,u,3,3>, lane 1 + 2061880528U, // <1,4,3,4>: Cost 2 vtrnr LHS, <4,4,4,4> + 2014101842U, // <1,4,3,5>: Cost 2 vtrnr LHS, <0,4,1,5> + 2014101852U, // <1,4,3,6>: Cost 2 vtrnr LHS, <0,4,2,6> + 2091933697U, // <1,4,3,7>: Cost 2 ins <1,u,3,7>, lane 1 + 2014101845U, // <1,4,3,u>: Cost 2 vtrnr LHS, <0,4,1,u> + 2557100134U, // <1,4,4,0>: Cost 3 vext1 <1,1,4,4>, LHS + 2557100882U, // <1,4,4,1>: Cost 3 vext1 <1,1,4,4>, <1,1,4,4> + 3165708289U, // <1,4,4,2>: Cost 3 ins <1,u,4,2>, lane 1 + 2819416409U, // <1,4,4,3>: Cost 3 vuzpr <0,1,2,4>, <0,4,2,3> + 2131132416U, // <1,4,4,4>: Cost 2 ins , lane 0 + 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS + 2758438198U, // <1,4,4,6>: Cost 3 vuzpl <1,1,4,5>, RHS + 2819419365U, // <1,4,4,7>: Cost 3 vuzpr <0,1,2,4>, <4,4,6,7> + 2131132416U, // <1,4,4,u>: Cost 2 ins , lane 0 + 1477394554U, // <1,4,5,0>: Cost 2 vext1 <0,1,4,5>, <0,1,4,5> + 2955558949U, // <1,4,5,1>: Cost 3 vzipr <0,4,1,5>, <0,0,4,1> + 3204931584U, // <1,4,5,2>: Cost 3 ins , lane 0 + 3165790209U, // <1,4,5,3>: Cost 3 ins <1,u,5,3>, lane 1 + 1477397814U, // <1,4,5,4>: Cost 2 vext1 <0,1,4,5>, RHS + 1821510966U, // <1,4,5,5>: Cost 2 vzipl <1,5,3,7>, RHS + 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS + 2092081153U, // <1,4,5,7>: Cost 2 ins <1,u,5,7>, lane 1 + 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS + 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS + 2557117268U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,4,6> + 3165855745U, // <1,4,6,2>: Cost 3 ins <1,u,6,2>, lane 1 + 2569062662U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,1,4,6> + 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS + 2895768886U, // <1,4,6,5>: Cost 3 vzipl <1,6,1,7>, RHS + 2131296256U, // <1,4,6,6>: Cost 2 ins , lane 0 + 2131304448U, // <1,4,6,7>: Cost 2 ins , lane 0 + 2131296256U, // <1,4,6,u>: Cost 2 ins , lane 0 + 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4> + 3165921281U, // <1,4,7,1>: Cost 3 ins <1,u,7,1>, lane 1 + 3205079040U, // <1,4,7,2>: Cost 3 ins , lane 0 + 2861297792U, // <1,4,7,3>: Cost 3 vuzpr <7,1,3,4>, <5,7,1,3> + 2669778278U, // <1,4,7,4>: Cost 3 vext2 , <7,4,5,6> + 3205103616U, // <1,4,7,5>: Cost 3 ins , lane 0 + 2131369984U, // <1,4,7,6>: Cost 2 ins , lane 0 + 3165970433U, // <1,4,7,7>: Cost 3 ins <1,u,7,7>, lane 1 + 2131369984U, // <1,4,7,u>: Cost 2 ins , lane 0 + 2091876353U, // <1,4,u,0>: Cost 2 ins <1,u,3,0>, lane 1 + 2091737089U, // <1,4,u,1>: Cost 2 ins <1,u,1,1>, lane 1 + 2091892737U, // <1,4,u,2>: Cost 2 ins <1,u,3,2>, lane 1 + 2091753473U, // <1,4,u,3>: Cost 2 ins <1,u,1,3>, lane 1 + 2061921488U, // <1,4,u,4>: Cost 2 vtrnr LHS, <4,4,4,4> + 2014142802U, // <1,4,u,5>: Cost 2 vtrnr LHS, <0,4,1,5> + 2014142812U, // <1,4,u,6>: Cost 2 vtrnr LHS, <0,4,2,6> + 2091933697U, // <1,4,u,7>: Cost 2 ins <1,u,3,7>, lane 1 + 2014142805U, // <1,4,u,u>: Cost 2 vtrnr LHS, <0,4,1,u> + 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0> + 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS + 1686110310U, // <1,5,0,2>: Cost 2 vuzpl <1,3,5,7>, LHS + 3163471875U, // <1,5,0,3>: Cost 3 ins <1,5,0,u>, lane 3 + 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5> + 3165437953U, // <1,5,0,5>: Cost 3 ins <1,u,0,5>, lane 1 + 3164045314U, // <1,5,0,6>: Cost 3 ins <1,5,u,6>, lane 2 + 2090311682U, // <1,5,0,7>: Cost 2 ins <1,5,u,7>, lane 2 + 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS + 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2> + 2091737089U, // <1,5,1,1>: Cost 2 ins <1,u,1,1>, lane 1 + 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0> + 2091753473U, // <1,5,1,3>: Cost 2 ins <1,u,1,3>, lane 1 + 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5> + 1686111232U, // <1,5,1,5>: Cost 2 vuzpl <1,3,5,7>, <1,3,5,7> + 2958181456U, // <1,5,1,6>: Cost 3 vzipr <0,u,1,1>, <1,4,5,6> + 2019986742U, // <1,5,1,7>: Cost 2 vtrnr <1,1,1,1>, RHS + 2019986743U, // <1,5,1,u>: Cost 2 vtrnr <1,1,1,1>, RHS + 2759853734U, // <1,5,2,0>: Cost 3 vuzpl <1,3,5,7>, <2,3,0,1> + 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5> + 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2> + 2090319877U, // <1,5,2,3>: Cost 2 ins <1,5,u,u>, lane 5 + 2759853774U, // <1,5,2,4>: Cost 3 vuzpl <1,3,5,7>, <2,3,4,5> + 2994687194U, // <1,5,2,5>: Cost 3 vzipr <7,0,1,2>, <4,4,5,5> + 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7> + 2090311682U, // <1,5,2,7>: Cost 2 ins <1,5,u,7>, lane 2 + 2090319877U, // <1,5,2,u>: Cost 2 ins <1,5,u,u>, lane 5 + 2091876353U, // <1,5,3,0>: Cost 2 ins <1,u,3,0>, lane 1 + 2089951235U, // <1,5,3,1>: Cost 2 ins <1,5,3,u>, lane 3 + 2091892737U, // <1,5,3,2>: Cost 2 ins <1,u,3,2>, lane 1 + 2091900929U, // <1,5,3,3>: Cost 2 ins <1,u,3,3>, lane 1 + 2091909121U, // <1,5,3,4>: Cost 2 ins <1,u,3,4>, lane 1 + 2061881348U, // <1,5,3,5>: Cost 2 vtrnr LHS, <5,5,5,5> + 2089951235U, // <1,5,3,6>: Cost 2 ins <1,5,3,u>, lane 3 + 940363062U, // <1,5,3,7>: Cost 1 vtrnr LHS, RHS + 940363063U, // <1,5,3,u>: Cost 1 vtrnr LHS, RHS + 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1> + 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0> + 3164012546U, // <1,5,4,2>: Cost 3 ins <1,5,u,2>, lane 2 + 3163766787U, // <1,5,4,3>: Cost 3 ins <1,5,4,u>, lane 3 + 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4> + 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS + 1686113590U, // <1,5,4,6>: Cost 2 vuzpl <1,3,5,7>, RHS + 2090311682U, // <1,5,4,7>: Cost 2 ins <1,5,u,7>, lane 2 + 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS + 2955561954U, // <1,5,5,0>: Cost 3 vzipr <0,4,1,5>, <4,1,5,0> + 2955561874U, // <1,5,5,1>: Cost 3 vzipr <0,4,1,5>, <4,0,5,1> + 3165782017U, // <1,5,5,2>: Cost 3 ins <1,u,5,2>, lane 1 + 2955559851U, // <1,5,5,3>: Cost 3 vzipr <0,4,1,5>, <1,2,5,3> + 2955561958U, // <1,5,5,4>: Cost 3 vzipr <0,4,1,5>, <4,1,5,4> + 2131877888U, // <1,5,5,5>: Cost 2 ins , lane 0 + 2955561474U, // <1,5,5,6>: Cost 3 vzipr <0,4,1,5>, <3,4,5,6> + 2092081153U, // <1,5,5,7>: Cost 2 ins <1,u,5,7>, lane 1 + 2092081153U, // <1,5,5,u>: Cost 2 ins <1,u,5,7>, lane 1 + 2131910656U, // <1,5,6,0>: Cost 2 ins , lane 0 + 2131918848U, // <1,5,6,1>: Cost 2 ins , lane 0 + 2131927040U, // <1,5,6,2>: Cost 2 ins , lane 0 + 2131935232U, // <1,5,6,3>: Cost 2 ins , lane 0 + 2131943424U, // <1,5,6,4>: Cost 2 ins , lane 0 + 2131951616U, // <1,5,6,5>: Cost 2 ins , lane 0 + 2131959808U, // <1,5,6,6>: Cost 2 ins , lane 0 + 1058226176U, // <1,5,6,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <1,5,6,u>: Cost 1 ins RHS, lane 0 + 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS + 2557199198U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,5,7> + 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1> + 2759857248U, // <1,5,7,3>: Cost 3 vuzpl <1,3,5,7>, <7,1,3,5> + 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS + 2759857510U, // <1,5,7,5>: Cost 3 vuzpl <1,3,5,7>, <7,4,5,6> + 2593035086U, // <1,5,7,6>: Cost 3 vext1 <7,1,5,7>, <6,7,0,1> + 2132041728U, // <1,5,7,7>: Cost 2 ins , lane 0 + 2132041728U, // <1,5,7,u>: Cost 2 ins , lane 0 + 2091876353U, // <1,5,u,0>: Cost 2 ins <1,u,3,0>, lane 1 + 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS + 1686116142U, // <1,5,u,2>: Cost 2 vuzpl <1,3,5,7>, LHS + 2091753473U, // <1,5,u,3>: Cost 2 ins <1,u,1,3>, lane 1 + 1594054682U, // <1,5,u,4>: Cost 2 vext2 , + 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS + 1686116506U, // <1,5,u,6>: Cost 2 vuzpl <1,3,5,7>, RHS + 940404022U, // <1,5,u,7>: Cost 1 vtrnr LHS, RHS + 940404023U, // <1,5,u,u>: Cost 1 vtrnr LHS, RHS + 3205873664U, // <1,6,0,0>: Cost 3 ins , lane 0 + 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS + 2132148224U, // <1,6,0,2>: Cost 2 ins , lane 0 + 3087819259U, // <1,6,0,3>: Cost 3 vtrnr <0,1,2,0>, <0,6,2,3> + 2620023123U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,6> + 3165437953U, // <1,6,0,5>: Cost 3 ins <1,u,0,5>, lane 1 + 3164708866U, // <1,6,0,6>: Cost 3 ins <1,6,u,6>, lane 2 + 2954857782U, // <1,6,0,7>: Cost 3 vzipr <0,3,1,0>, RHS + 2132148224U, // <1,6,0,u>: Cost 2 ins , lane 0 + 3205947392U, // <1,6,1,0>: Cost 3 ins , lane 0 + 2091737089U, // <1,6,1,1>: Cost 2 ins <1,u,1,1>, lane 1 + 3005959068U, // <1,6,1,2>: Cost 3 vzipr , <4,0,6,2> + 2091753473U, // <1,6,1,3>: Cost 2 ins <1,u,1,3>, lane 1 + 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6> + 3205988352U, // <1,6,1,5>: Cost 3 ins , lane 0 + 1745690729U, // <1,6,1,6>: Cost 2 vuzpr <0,1,2,6>, <0,1,2,6> + 1884441910U, // <1,6,1,7>: Cost 2 vzipr <0,u,1,1>, RHS + 1884441911U, // <1,6,1,u>: Cost 2 vzipr <0,u,1,1>, RHS + 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1> + 2994687442U, // <1,6,2,1>: Cost 3 vzipr <7,0,1,2>, <4,7,6,1> + 2994686876U, // <1,6,2,2>: Cost 3 vzipr <7,0,1,2>, <4,0,6,2> + 2132303872U, // <1,6,2,3>: Cost 2 ins , lane 0 + 3206053888U, // <1,6,2,4>: Cost 3 ins , lane 0 + 3165585409U, // <1,6,2,5>: Cost 3 ins <1,u,2,5>, lane 1 + 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7> + 1897057590U, // <1,6,2,7>: Cost 2 vzipr <3,0,1,2>, RHS + 1897057591U, // <1,6,2,u>: Cost 2 vzipr <3,0,1,2>, RHS + 2061881442U, // <1,6,3,0>: Cost 2 vtrnr LHS, <5,6,7,0> + 2987396400U, // <1,6,3,1>: Cost 3 vzipr <5,7,1,3>, <4,5,6,1> + 2061880652U, // <1,6,3,2>: Cost 2 vtrnr LHS, <4,6,0,2> + 2091900929U, // <1,6,3,3>: Cost 2 ins <1,u,3,3>, lane 1 + 2061881446U, // <1,6,3,4>: Cost 2 vtrnr LHS, <5,6,7,4> + 3118078194U, // <1,6,3,5>: Cost 3 vtrnr <5,1,7,3>, + 2061880692U, // <1,6,3,6>: Cost 2 vtrnr LHS, <4,6,4,6> + 2014103482U, // <1,6,3,7>: Cost 2 vtrnr LHS, <2,6,3,7> + 2014103483U, // <1,6,3,u>: Cost 2 vtrnr LHS, <2,6,3,u> + 3206168576U, // <1,6,4,0>: Cost 3 ins , lane 0 + 2761256201U, // <1,6,4,1>: Cost 3 vuzpl <1,5,6,7>, <4,5,1,7> + 3164676098U, // <1,6,4,2>: Cost 3 ins <1,6,u,2>, lane 2 + 3087852027U, // <1,6,4,3>: Cost 3 vtrnr <0,1,2,4>, <0,6,2,3> + 3206201344U, // <1,6,4,4>: Cost 3 ins , lane 0 + 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS + 2132475904U, // <1,6,4,6>: Cost 2 ins , lane 0 + 2954890550U, // <1,6,4,7>: Cost 3 vzipr <0,3,1,4>, RHS + 2132475904U, // <1,6,4,u>: Cost 2 ins , lane 0 + 3164659714U, // <1,6,5,0>: Cost 3 ins <1,6,u,0>, lane 2 + 3206250496U, // <1,6,5,1>: Cost 3 ins , lane 0 + 3003337628U, // <1,6,5,2>: Cost 3 vzipr , <4,0,6,2> + 3165790209U, // <1,6,5,3>: Cost 3 ins <1,u,5,3>, lane 1 + 3206275072U, // <1,6,5,4>: Cost 3 ins , lane 0 + 3206283264U, // <1,6,5,5>: Cost 3 ins , lane 0 + 3003337956U, // <1,6,5,6>: Cost 3 vzipr , <4,4,6,6> + 1881820470U, // <1,6,5,7>: Cost 2 vzipr <0,4,1,5>, RHS + 1881820471U, // <1,6,5,u>: Cost 2 vzipr <0,4,1,5>, RHS + 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1> + 2557264742U, // <1,6,6,1>: Cost 3 vext1 <1,1,6,6>, <1,1,6,6> + 3165855745U, // <1,6,6,2>: Cost 3 ins <1,u,6,2>, lane 1 + 2819432955U, // <1,6,6,3>: Cost 3 vuzpr <0,1,2,6>, <0,6,2,3> + 3206348800U, // <1,6,6,4>: Cost 3 ins , lane 0 + 3206356992U, // <1,6,6,5>: Cost 3 ins , lane 0 + 2132623360U, // <1,6,6,6>: Cost 2 ins , lane 0 + 2132631552U, // <1,6,6,7>: Cost 2 ins , lane 0 + 2132623360U, // <1,6,6,u>: Cost 2 ins , lane 0 + 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1> + 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1> + 3206406144U, // <1,6,7,2>: Cost 3 ins , lane 0 + 3206414336U, // <1,6,7,3>: Cost 3 ins , lane 0 + 2132680704U, // <1,6,7,4>: Cost 2 ins , lane 0 + 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1> + 2725507979U, // <1,6,7,6>: Cost 3 vext3 <6,u,0,1>, <6,7,6,u> + 2132705280U, // <1,6,7,7>: Cost 2 ins , lane 0 + 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1> + 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1> + 2091737089U, // <1,6,u,1>: Cost 2 ins <1,u,1,1>, lane 1 + 2061921612U, // <1,6,u,2>: Cost 2 vtrnr LHS, <4,6,0,2> + 2091753473U, // <1,6,u,3>: Cost 2 ins <1,u,1,3>, lane 1 + 2061922406U, // <1,6,u,4>: Cost 2 vtrnr LHS, <5,6,7,4> + 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS + 2061921652U, // <1,6,u,6>: Cost 2 vtrnr LHS, <4,6,4,6> + 2014144442U, // <1,6,u,7>: Cost 2 vtrnr LHS, <2,6,3,7> + 2014144443U, // <1,6,u,u>: Cost 2 vtrnr LHS, <2,6,3,u> + 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1> + 2132803584U, // <1,7,0,1>: Cost 2 ins , lane 0 + 3206553600U, // <1,7,0,2>: Cost 3 ins , lane 0 + 2257286235U, // <1,7,0,3>: Cost 3 vrev <7,1,3,0> + 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1> + 3206578176U, // <1,7,0,5>: Cost 3 ins , lane 0 + 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7> + 3165380610U, // <1,7,0,7>: Cost 3 ins <1,7,u,7>, lane 2 + 2132803584U, // <1,7,0,u>: Cost 2 ins , lane 0 + 2581184614U, // <1,7,1,0>: Cost 3 vext1 <5,1,7,1>, LHS + 2091737089U, // <1,7,1,1>: Cost 2 ins <1,u,1,1>, lane 1 + 3206627328U, // <1,7,1,2>: Cost 3 ins , lane 0 + 2132893696U, // <1,7,1,3>: Cost 2 ins , lane 0 + 2581187894U, // <1,7,1,4>: Cost 3 vext1 <5,1,7,1>, RHS + 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7> + 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7> + 1745698922U, // <1,7,1,7>: Cost 2 vuzpr <0,1,2,7>, <0,1,2,7> + 2132893696U, // <1,7,1,u>: Cost 2 ins , lane 0 + 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS + 2994687370U, // <1,7,2,1>: Cost 3 vzipr <7,0,1,2>, <4,6,7,1> + 3206701056U, // <1,7,2,2>: Cost 3 ins , lane 0 + 2132967424U, // <1,7,2,3>: Cost 2 ins , lane 0 + 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS + 3206725632U, // <1,7,2,5>: Cost 3 ins , lane 0 + 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2> + 2994688024U, // <1,7,2,7>: Cost 3 vzipr <7,0,1,2>, <5,5,7,7> + 2132967424U, // <1,7,2,u>: Cost 2 ins , lane 0 + 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS + 2061882190U, // <1,7,3,1>: Cost 2 vtrnr LHS, <6,7,0,1> + 2091892737U, // <1,7,3,2>: Cost 2 ins <1,u,3,2>, lane 1 + 2061881472U, // <1,7,3,3>: Cost 2 vtrnr LHS, <5,7,1,3> + 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS + 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3> + 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3> + 2061881512U, // <1,7,3,7>: Cost 2 vtrnr LHS, <5,7,5,7> + 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS + 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1> + 3165331458U, // <1,7,4,1>: Cost 3 ins <1,7,u,1>, lane 2 + 2644585539U, // <1,7,4,2>: Cost 3 vext2 <4,5,1,7>, <4,2,6,7> + 2257319007U, // <1,7,4,3>: Cost 3 vrev <7,1,3,4> + 3206864896U, // <1,7,4,4>: Cost 3 ins , lane 0 + 2133131264U, // <1,7,4,5>: Cost 2 ins , lane 0 + 3206881280U, // <1,7,4,6>: Cost 3 ins , lane 0 + 3165380610U, // <1,7,4,7>: Cost 3 ins <1,7,u,7>, lane 2 + 2133131264U, // <1,7,4,u>: Cost 2 ins , lane 0 + 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS + 3028292602U, // <1,7,5,1>: Cost 3 vtrnl <1,3,5,7>, <7,0,1,2> + 3165782017U, // <1,7,5,2>: Cost 3 ins <1,u,5,2>, lane 1 + 3028292704U, // <1,7,5,3>: Cost 3 vtrnl <1,3,5,7>, <7,1,3,5> + 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS + 3028292966U, // <1,7,5,5>: Cost 3 vtrnl <1,3,5,7>, <7,4,5,6> + 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7> + 2133221376U, // <1,7,5,7>: Cost 2 ins , lane 0 + 2133221376U, // <1,7,5,u>: Cost 2 ins , lane 0 + 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1> + 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0> + 3206995968U, // <1,7,6,2>: Cost 3 ins , lane 0 + 3165347842U, // <1,7,6,3>: Cost 3 ins <1,7,u,3>, lane 2 + 2257409130U, // <1,7,6,4>: Cost 3 vrev <7,1,4,6> + 3207020544U, // <1,7,6,5>: Cost 3 ins , lane 0 + 3207028736U, // <1,7,6,6>: Cost 3 ins , lane 0 + 2133295104U, // <1,7,6,7>: Cost 2 ins , lane 0 + 2133295104U, // <1,7,6,u>: Cost 2 ins , lane 0 + 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1> + 2861470542U, // <1,7,7,1>: Cost 3 vuzpr <7,1,5,7>, <6,7,0,1> + 3165929473U, // <1,7,7,2>: Cost 3 ins <1,u,7,2>, lane 1 + 2998046416U, // <1,7,7,3>: Cost 3 vzipr <7,5,1,7>, <5,1,7,3> + 3207086080U, // <1,7,7,4>: Cost 3 ins , lane 0 + 2257491060U, // <1,7,7,5>: Cost 3 vrev <7,1,5,7> + 3207102464U, // <1,7,7,6>: Cost 3 ins , lane 0 + 2133368832U, // <1,7,7,7>: Cost 2 ins , lane 0 + 2133368832U, // <1,7,7,u>: Cost 2 ins , lane 0 + 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS + 2061923150U, // <1,7,u,1>: Cost 2 vtrnr LHS, <6,7,0,1> + 2091892737U, // <1,7,u,2>: Cost 2 ins <1,u,3,2>, lane 1 + 2061922432U, // <1,7,u,3>: Cost 2 vtrnr LHS, <5,7,1,3> + 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS + 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u> + 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3> + 2061922472U, // <1,7,u,7>: Cost 2 vtrnr LHS, <5,7,5,7> + 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS + 1745707008U, // <1,u,0,0>: Cost 2 vuzpr LHS, <0,0,0,0> + 1745707018U, // <1,u,0,1>: Cost 2 vuzpr LHS, <0,0,1,1> + 1745707028U, // <1,u,0,2>: Cost 2 vuzpr LHS, <0,0,2,2> + 2087624706U, // <1,u,0,3>: Cost 2 ins <1,1,u,3>, lane 2 + 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u> + 1818155162U, // <1,u,0,5>: Cost 2 vzipl <1,0,3,2>, RHS + 2891897040U, // <1,u,0,6>: Cost 3 vzipl <1,0,3,2>, + 2088984578U, // <1,u,0,7>: Cost 2 ins <1,3,u,7>, lane 2 + 1745707025U, // <1,u,0,u>: Cost 2 vuzpr LHS, <0,0,1,u> + 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS + 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS + 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS + 671965286U, // <1,u,1,3>: Cost 1 vuzpr LHS, LHS + 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS + 1818663066U, // <1,u,1,5>: Cost 2 vzipl <1,1,1,1>, RHS + 1952880794U, // <1,u,1,6>: Cost 2 vtrnl <1,1,1,1>, RHS + 1884441928U, // <1,u,1,7>: Cost 2 vzipr <0,u,1,1>, RHS + 671965291U, // <1,u,1,u>: Cost 1 vuzpr LHS, LHS + 1745707926U, // <1,u,2,0>: Cost 2 vuzpr LHS, <1,2,3,0> + 1819465518U, // <1,u,2,1>: Cost 2 vzipl <1,2,3,0>, LHS + 1745707172U, // <1,u,2,2>: Cost 2 vuzpr LHS, <0,2,0,2> + 1055244288U, // <1,u,2,3>: Cost 1 ins LHS, lane 0 + 1745707930U, // <1,u,2,4>: Cost 2 vuzpr LHS, <1,2,3,4> + 1819465882U, // <1,u,2,5>: Cost 2 vzipl <1,2,3,0>, RHS + 1745707212U, // <1,u,2,6>: Cost 2 vuzpr LHS, <0,2,4,6> + 1897057608U, // <1,u,2,7>: Cost 2 vzipr <3,0,1,2>, RHS + 1055244288U, // <1,u,2,u>: Cost 1 ins LHS, lane 0 + 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS + 2014102162U, // <1,u,3,1>: Cost 2 vtrnr LHS, <0,u,1,1> + 115726126U, // <1,u,3,2>: Cost 1 vrev LHS + 940360349U, // <1,u,3,3>: Cost 1 vtrnr LHS, LHS + 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS + 2014102166U, // <1,u,3,5>: Cost 2 vtrnr LHS, <0,u,1,5> + 2014102176U, // <1,u,3,6>: Cost 2 vtrnr LHS, <0,u,2,6> + 940363305U, // <1,u,3,7>: Cost 1 vtrnr LHS, RHS + 940360354U, // <1,u,3,u>: Cost 1 vtrnr LHS, LHS + 2088263682U, // <1,u,4,0>: Cost 2 ins <1,2,u,0>, lane 2 + 2087608322U, // <1,u,4,1>: Cost 2 ins <1,1,u,1>, lane 2 + 2086952962U, // <1,u,4,2>: Cost 2 ins <1,0,u,2>, lane 2 + 2087624706U, // <1,u,4,3>: Cost 2 ins <1,1,u,3>, lane 2 + 1793486032U, // <1,u,4,4>: Cost 2 vuzpr LHS, <4,4,4,4> + 1745707346U, // <1,u,4,5>: Cost 2 vuzpr LHS, <0,4,1,5> + 1745707356U, // <1,u,4,6>: Cost 2 vuzpr LHS, <0,4,2,6> + 2088984578U, // <1,u,4,7>: Cost 2 ins <1,3,u,7>, lane 2 + 1745707349U, // <1,u,4,u>: Cost 2 vuzpr LHS, <0,4,1,u> + 2088263682U, // <1,u,5,0>: Cost 2 ins <1,2,u,0>, lane 2 + 1821513518U, // <1,u,5,1>: Cost 2 vzipl <1,5,3,7>, LHS + 1954551598U, // <1,u,5,2>: Cost 2 vtrnl <1,3,5,7>, LHS + 1881817244U, // <1,u,5,3>: Cost 2 vzipr <0,4,1,5>, LHS + 2088296450U, // <1,u,5,4>: Cost 2 ins <1,2,u,4>, lane 2 + 1821513882U, // <1,u,5,5>: Cost 2 vzipl <1,5,3,7>, RHS + 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS + 671968566U, // <1,u,5,7>: Cost 1 vuzpr LHS, RHS + 671968567U, // <1,u,5,u>: Cost 1 vuzpr LHS, RHS + 1793486946U, // <1,u,6,0>: Cost 2 vuzpr LHS, <5,6,7,0> + 2087608322U, // <1,u,6,1>: Cost 2 ins <1,1,u,1>, lane 2 + 1793486156U, // <1,u,6,2>: Cost 2 vuzpr LHS, <4,6,0,2> + 2087624706U, // <1,u,6,3>: Cost 2 ins <1,1,u,3>, lane 2 + 1793486950U, // <1,u,6,4>: Cost 2 vuzpr LHS, <5,6,7,4> + 2131951616U, // <1,u,6,5>: Cost 2 ins , lane 0 + 1793486196U, // <1,u,6,6>: Cost 2 vuzpr LHS, <4,6,4,6> + 1058226176U, // <1,u,6,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <1,u,6,u>: Cost 1 ins RHS, lane 0 + 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u> + 1793487694U, // <1,u,7,1>: Cost 2 vuzpr LHS, <6,7,0,1> + 2086952962U, // <1,u,7,2>: Cost 2 ins <1,0,u,2>, lane 2 + 1793486976U, // <1,u,7,3>: Cost 2 vuzpr LHS, <5,7,1,3> + 2088296450U, // <1,u,7,4>: Cost 2 ins <1,2,u,4>, lane 2 + 1793487734U, // <1,u,7,5>: Cost 2 vuzpr LHS, <6,7,4,5> + 2131369984U, // <1,u,7,6>: Cost 2 ins , lane 0 + 1793487016U, // <1,u,7,7>: Cost 2 vuzpr LHS, <5,7,5,7> + 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u> + 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS + 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS + 115767091U, // <1,u,u,2>: Cost 1 vrev LHS + 671965853U, // <1,u,u,3>: Cost 1 vuzpr LHS, LHS + 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS + 1745707670U, // <1,u,u,5>: Cost 2 vuzpr LHS, <0,u,1,5> + 1745707680U, // <1,u,u,6>: Cost 2 vuzpr LHS, <0,u,2,6> + 671968809U, // <1,u,u,7>: Cost 1 vuzpr LHS, RHS + 671965858U, // <1,u,u,u>: Cost 1 vuzpr LHS, LHS + 2128150528U, // <2,0,0,0>: Cost 2 ins , lane 0 + 2097635329U, // <2,0,0,1>: Cost 2 ins <2,u,0,1>, lane 1 + 1691664486U, // <2,0,0,2>: Cost 2 vuzpl <2,3,0,1>, LHS + 2826094014U, // <2,0,0,3>: Cost 3 vuzpr <1,2,3,0>, <2,0,1,3> + 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS + 2826094772U, // <2,0,0,5>: Cost 3 vuzpr <1,2,3,0>, <3,0,4,5> + 3171418113U, // <2,0,0,6>: Cost 3 ins <2,u,0,6>, lane 1 + 3094529510U, // <2,0,0,7>: Cost 3 vtrnr <1,2,3,0>, <2,0,5,7> + 1691664540U, // <2,0,0,u>: Cost 2 vuzpl <2,3,0,1>, LHS + 2215927971U, // <2,0,1,0>: Cost 3 vrev <0,2,0,1> + 2128232448U, // <2,0,1,1>: Cost 2 ins , lane 0 + 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS + 1752350822U, // <2,0,1,3>: Cost 2 vuzpr <1,2,3,0>, LHS + 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS + 2765407232U, // <2,0,1,5>: Cost 3 vuzpl <2,3,0,1>, <1,3,5,7> + 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1> + 3166707714U, // <2,0,1,7>: Cost 3 ins <2,0,u,7>, lane 2 + 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS + 1142194340U, // <2,0,2,0>: Cost 2 vrev <0,2,0,2> + 1825374310U, // <2,0,2,1>: Cost 2 vzipl <2,2,2,2>, LHS + 1959592038U, // <2,0,2,2>: Cost 2 vtrnl <2,2,2,2>, LHS + 2128322560U, // <2,0,2,3>: Cost 2 ins , lane 0 + 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS + 2599259856U, // <2,0,2,5>: Cost 3 vext1 , <5,1,7,3> + 3088351274U, // <2,0,2,6>: Cost 3 vtrnr <0,2,0,2>, <0,0,4,6> + 2599261178U, // <2,0,2,7>: Cost 3 vext1 , <7,0,1,2> + 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS + 1879883776U, // <2,0,3,0>: Cost 2 vzipr LHS, <0,0,0,0> + 1879885478U, // <2,0,3,1>: Cost 2 vzipr LHS, <2,3,0,1> + 1879883940U, // <2,0,3,2>: Cost 2 vzipr LHS, <0,2,0,2> + 2097872897U, // <2,0,3,3>: Cost 2 ins <2,u,3,3>, lane 1 + 2958270630U, // <2,0,3,4>: Cost 3 vzipr LHS, <0,2,0,4> + 2826094286U, // <2,0,3,5>: Cost 3 vuzpr <1,2,3,0>, <2,3,4,5> + 2958270794U, // <2,0,3,6>: Cost 3 vzipr LHS, <0,4,0,6> + 2097905665U, // <2,0,3,7>: Cost 2 ins <2,u,3,7>, lane 1 + 1879883946U, // <2,0,3,u>: Cost 2 vzipr LHS, <0,2,0,u> + 2215952550U, // <2,0,4,0>: Cost 3 vrev <0,2,0,4> + 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5> + 1960427622U, // <2,0,4,2>: Cost 2 vtrnl <2,3,4,5>, LHS + 3171688449U, // <2,0,4,3>: Cost 3 ins <2,u,4,3>, lane 1 + 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS + 2097963009U, // <2,0,4,5>: Cost 2 ins <2,u,4,5>, lane 1 + 1691667766U, // <2,0,4,6>: Cost 2 vuzpl <2,3,0,1>, RHS + 3171721217U, // <2,0,4,7>: Cost 3 ins <2,u,4,7>, lane 1 + 1691667784U, // <2,0,4,u>: Cost 2 vuzpl <2,3,0,1>, RHS + 3033596068U, // <2,0,5,0>: Cost 3 vtrnl <2,2,5,7>, <0,2,0,2> + 2128527360U, // <2,0,5,1>: Cost 2 ins , lane 0 + 2955632804U, // <2,0,5,2>: Cost 3 vzipr <0,4,2,5>, <0,2,0,2> + 2216181954U, // <2,0,5,3>: Cost 3 vrev <0,2,3,5> + 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5> + 2867900420U, // <2,0,5,5>: Cost 3 vuzpr , <5,5,5,5> + 3202310144U, // <2,0,5,6>: Cost 3 ins , lane 0 + 1752354102U, // <2,0,5,7>: Cost 2 vuzpr <1,2,3,0>, RHS + 1752354103U, // <2,0,5,u>: Cost 2 vuzpr <1,2,3,0>, RHS + 3088678912U, // <2,0,6,0>: Cost 3 vtrnr <0,2,4,6>, <0,0,0,0> + 1828143206U, // <2,0,6,1>: Cost 2 vzipl <2,6,3,7>, LHS + 2128609280U, // <2,0,6,2>: Cost 2 ins , lane 0 + 3171835905U, // <2,0,6,3>: Cost 3 ins <2,u,6,3>, lane 1 + 1142522060U, // <2,0,6,4>: Cost 2 vrev <0,2,4,6> + 3171852289U, // <2,0,6,5>: Cost 3 ins <2,u,6,5>, lane 1 + 2867899764U, // <2,0,6,6>: Cost 3 vuzpr , <4,6,4,6> + 2128650240U, // <2,0,6,7>: Cost 2 ins , lane 0 + 1142817008U, // <2,0,6,u>: Cost 2 vrev <0,2,u,6> + 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> + 2867901262U, // <2,0,7,1>: Cost 3 vuzpr , <6,7,0,1> + 2956976292U, // <2,0,7,2>: Cost 3 vzipr <0,6,2,7>, <0,2,0,2> + 2867900544U, // <2,0,7,3>: Cost 3 vuzpr , <5,7,1,3> + 3171917825U, // <2,0,7,4>: Cost 3 ins <2,u,7,4>, lane 1 + 2867901302U, // <2,0,7,5>: Cost 3 vuzpr , <6,7,4,5> + 3166699522U, // <2,0,7,6>: Cost 3 ins <2,0,u,6>, lane 2 + 2867900584U, // <2,0,7,7>: Cost 3 vuzpr , <5,7,5,7> + 2867900549U, // <2,0,7,u>: Cost 3 vuzpr , <5,7,1,u> + 1879924736U, // <2,0,u,0>: Cost 2 vzipr LHS, <0,0,0,0> + 1879926438U, // <2,0,u,1>: Cost 2 vzipr LHS, <2,3,0,1> + 1879924900U, // <2,0,u,2>: Cost 2 vzipr LHS, <0,2,0,2> + 1752351389U, // <2,0,u,3>: Cost 2 vuzpr <1,2,3,0>, LHS + 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS + 2097963009U, // <2,0,u,5>: Cost 2 ins <2,u,4,5>, lane 1 + 1691670682U, // <2,0,u,6>: Cost 2 vuzpl <2,3,0,1>, RHS + 1752354345U, // <2,0,u,7>: Cost 2 vuzpr <1,2,3,0>, RHS + 1879924906U, // <2,0,u,u>: Cost 2 vzipr LHS, <0,2,0,u> + 2763497636U, // <2,1,0,0>: Cost 3 vuzpl <2,0,1,2>, <0,2,0,2> + 2097635329U, // <2,1,0,1>: Cost 2 ins <2,u,0,1>, lane 1 + 2820130966U, // <2,1,0,2>: Cost 3 vuzpr <0,2,3,1>, <3,0,1,2> + 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2> + 2767487180U, // <2,1,0,4>: Cost 3 vuzpl <2,6,1,3>, <0,2,4,6> + 3033842688U, // <2,1,0,5>: Cost 3 vtrnl <2,3,0,1>, <1,3,5,7> + 3171418113U, // <2,1,0,6>: Cost 3 ins <2,u,0,6>, lane 1 + 3171426305U, // <2,1,0,7>: Cost 3 ins <2,u,0,7>, lane 1 + 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2> + 2551546028U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, <0,2,1,1> + 2128896000U, // <2,1,1,1>: Cost 2 ins , lane 0 + 2954938518U, // <2,1,1,2>: Cost 3 vzipr <0,3,2,1>, <3,0,1,2> + 2128912384U, // <2,1,1,3>: Cost 2 ins , lane 0 + 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS + 3202670592U, // <2,1,1,5>: Cost 3 ins , lane 0 + 3202678784U, // <2,1,1,6>: Cost 3 ins , lane 0 + 2953612553U, // <2,1,1,7>: Cost 3 vzipr <0,1,2,1>, <4,5,1,7> + 2128896000U, // <2,1,1,u>: Cost 2 ins , lane 0 + 2128961536U, // <2,1,2,0>: Cost 2 ins , lane 0 + 2128969728U, // <2,1,2,1>: Cost 2 ins , lane 0 + 2128977920U, // <2,1,2,2>: Cost 2 ins , lane 0 + 1055244288U, // <2,1,2,3>: Cost 1 ins LHS, lane 0 + 2128994304U, // <2,1,2,4>: Cost 2 ins , lane 0 + 2129002496U, // <2,1,2,5>: Cost 2 ins , lane 0 + 2129010688U, // <2,1,2,6>: Cost 2 ins , lane 0 + 2129018880U, // <2,1,2,7>: Cost 2 ins , lane 0 + 1055244288U, // <2,1,2,u>: Cost 1 ins LHS, lane 0 + 2953625609U, // <2,1,3,0>: Cost 3 vzipr LHS, <0,0,1,0> + 1879883786U, // <2,1,3,1>: Cost 2 vzipr LHS, <0,0,1,1> + 1879885974U, // <2,1,3,2>: Cost 2 vzipr LHS, <3,0,1,2> + 1879884760U, // <2,1,3,3>: Cost 2 vzipr LHS, <1,3,1,3> + 2953625856U, // <2,1,3,4>: Cost 3 vzipr LHS, <0,3,1,4> + 1879884114U, // <2,1,3,5>: Cost 2 vzipr LHS, <0,4,1,5> + 2958270641U, // <2,1,3,6>: Cost 3 vzipr LHS, <0,2,1,6> + 2097905665U, // <2,1,3,7>: Cost 2 ins <2,u,3,7>, lane 1 + 1879883793U, // <2,1,3,u>: Cost 2 vzipr LHS, <0,0,1,u> + 3171663873U, // <2,1,4,0>: Cost 3 ins <2,u,4,0>, lane 1 + 3094561588U, // <2,1,4,1>: Cost 3 vtrnr <1,2,3,4>, <1,1,1,1> + 2900378522U, // <2,1,4,2>: Cost 3 vzipl <2,4,1,3>, <1,2,3,4> + 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4> + 3171696641U, // <2,1,4,4>: Cost 3 ins <2,u,4,4>, lane 1 + 2097963009U, // <2,1,4,5>: Cost 2 ins <2,u,4,5>, lane 1 + 2763500854U, // <2,1,4,6>: Cost 3 vuzpl <2,0,1,2>, RHS + 3171721217U, // <2,1,4,7>: Cost 3 ins <2,u,4,7>, lane 1 + 2020819051U, // <2,1,4,u>: Cost 2 vtrnr <1,2,3,4>, LHS + 2551578800U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, <0,2,1,5> + 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7> + 2901001110U, // <2,1,5,2>: Cost 3 vzipl <2,5,0,7>, <1,2,3,0> + 2129207296U, // <2,1,5,3>: Cost 2 ins , lane 0 + 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS + 3202965504U, // <2,1,5,5>: Cost 3 ins , lane 0 + 3171786753U, // <2,1,5,6>: Cost 3 ins <2,u,5,6>, lane 1 + 2819910966U, // <2,1,5,7>: Cost 3 vuzpr <0,2,0,1>, RHS + 2129207296U, // <2,1,5,u>: Cost 2 ins , lane 0 + 2551586993U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, <0,2,1,6> + 3088679732U, // <2,1,6,1>: Cost 3 vtrnr <0,2,4,6>, <1,1,1,1> + 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7> + 2014937190U, // <2,1,6,3>: Cost 2 vtrnr <0,2,4,6>, LHS + 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS + 2955641170U, // <2,1,6,5>: Cost 3 vzipr <0,4,2,6>, <0,4,1,5> + 2901886177U, // <2,1,6,6>: Cost 3 vzipl <2,6,3,7>, <1,6,3,7> + 2129313792U, // <2,1,6,7>: Cost 2 ins , lane 0 + 2014937195U, // <2,1,6,u>: Cost 2 vtrnr <0,2,4,6>, LHS + 3171885057U, // <2,1,7,0>: Cost 3 ins <2,u,7,0>, lane 1 + 3203080192U, // <2,1,7,1>: Cost 3 ins , lane 0 + 3001439874U, // <2,1,7,2>: Cost 3 vzipr , <7,u,1,2> + 2129354752U, // <2,1,7,3>: Cost 2 ins , lane 0 + 3171917825U, // <2,1,7,4>: Cost 3 ins <2,u,7,4>, lane 1 + 3203112960U, // <2,1,7,5>: Cost 3 ins , lane 0 + 2222392248U, // <2,1,7,6>: Cost 3 vrev <1,2,6,7> + 3171942401U, // <2,1,7,7>: Cost 3 ins <2,u,7,7>, lane 1 + 2129354752U, // <2,1,7,u>: Cost 2 ins , lane 0 + 2128961536U, // <2,1,u,0>: Cost 2 ins , lane 0 + 1879924746U, // <2,1,u,1>: Cost 2 vzipr LHS, <0,0,1,1> + 1879926934U, // <2,1,u,2>: Cost 2 vzipr LHS, <3,0,1,2> + 1055244288U, // <2,1,u,3>: Cost 1 ins LHS, lane 0 + 2128994304U, // <2,1,u,4>: Cost 2 ins , lane 0 + 1879925074U, // <2,1,u,5>: Cost 2 vzipr LHS, <0,4,1,5> + 2129010688U, // <2,1,u,6>: Cost 2 ins , lane 0 + 2097905665U, // <2,1,u,7>: Cost 2 ins <2,u,3,7>, lane 1 + 1055244288U, // <2,1,u,u>: Cost 1 ins LHS, lane 0 + 2020787094U, // <2,2,0,0>: Cost 2 vtrnr <1,2,3,0>, <1,2,3,0> + 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS + 1691156582U, // <2,2,0,2>: Cost 2 vuzpl <2,2,2,2>, LHS + 2094260226U, // <2,2,0,3>: Cost 2 ins <2,2,u,3>, lane 2 + 2819917256U, // <2,2,0,4>: Cost 3 vuzpr <0,2,0,2>, <2,0,2,4> + 3168018434U, // <2,2,0,5>: Cost 3 ins <2,2,u,5>, lane 2 + 2819915818U, // <2,2,0,6>: Cost 3 vuzpr <0,2,0,2>, <0,0,4,6> + 3171426305U, // <2,2,0,7>: Cost 3 ins <2,u,0,7>, lane 1 + 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2> + 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2> + 1879867492U, // <2,2,1,1>: Cost 2 vzipr <0,1,2,1>, <0,1,2,1> + 2094252034U, // <2,2,1,2>: Cost 2 ins <2,2,u,2>, lane 2 + 1746174054U, // <2,2,1,3>: Cost 2 vuzpr <0,2,0,2>, LHS + 3167526915U, // <2,2,1,4>: Cost 3 ins <2,2,1,u>, lane 3 + 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7> + 3203342336U, // <2,2,1,6>: Cost 3 ins , lane 0 + 3168034818U, // <2,2,1,7>: Cost 3 ins <2,2,u,7>, lane 2 + 1746174059U, // <2,2,1,u>: Cost 2 vuzpr <0,2,0,2>, LHS + 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS + 2093858819U, // <2,2,2,1>: Cost 2 ins <2,2,2,u>, lane 3 + 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS + 1884520550U, // <2,2,2,3>: Cost 2 vzipr <0,u,2,2>, LHS + 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS + 2093858819U, // <2,2,2,5>: Cost 2 ins <2,2,2,u>, lane 3 + 2093858819U, // <2,2,2,6>: Cost 2 ins <2,2,2,u>, lane 3 + 2093858819U, // <2,2,2,7>: Cost 2 ins <2,2,2,u>, lane 3 + 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS + 2129698816U, // <2,2,3,0>: Cost 2 ins , lane 0 + 2093932547U, // <2,2,3,1>: Cost 2 ins <2,2,3,u>, lane 3 + 1879885416U, // <2,2,3,2>: Cost 2 vzipr LHS, <2,2,2,2> + 806142054U, // <2,2,3,3>: Cost 1 vzipr LHS, LHS + 2129731584U, // <2,2,3,4>: Cost 2 ins , lane 0 + 2093932547U, // <2,2,3,5>: Cost 2 ins <2,2,3,u>, lane 3 + 1884528988U, // <2,2,3,6>: Cost 2 vzipr LHS, <0,4,2,6> + 2097905665U, // <2,2,3,7>: Cost 2 ins <2,u,3,7>, lane 1 + 806142059U, // <2,2,3,u>: Cost 1 vzipr LHS, LHS + 2551644344U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, <0,2,2,4> + 3171672065U, // <2,2,4,1>: Cost 3 ins <2,u,4,1>, lane 1 + 2094252034U, // <2,2,4,2>: Cost 2 ins <2,2,u,2>, lane 2 + 2094260226U, // <2,2,4,3>: Cost 2 ins <2,2,u,3>, lane 2 + 2020819866U, // <2,2,4,4>: Cost 2 vtrnr <1,2,3,4>, <1,2,3,4> + 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS + 1691159862U, // <2,2,4,6>: Cost 2 vuzpl <2,2,2,2>, RHS + 3171721217U, // <2,2,4,7>: Cost 3 ins <2,u,4,7>, lane 1 + 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS + 3167821827U, // <2,2,5,0>: Cost 3 ins <2,2,5,u>, lane 3 + 2670497488U, // <2,2,5,1>: Cost 3 vext2 , <5,1,7,3> + 2094252034U, // <2,2,5,2>: Cost 2 ins <2,2,u,2>, lane 2 + 2094260226U, // <2,2,5,3>: Cost 2 ins <2,2,u,3>, lane 2 + 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5> + 1879900264U, // <2,2,5,5>: Cost 2 vzipr <0,1,2,5>, <0,1,2,5> + 2670497890U, // <2,2,5,6>: Cost 3 vext2 , <5,6,7,0> + 1746177334U, // <2,2,5,7>: Cost 2 vuzpr <0,2,0,2>, RHS + 1746177335U, // <2,2,5,u>: Cost 2 vuzpr <0,2,0,2>, RHS + 3088679830U, // <2,2,6,0>: Cost 3 vtrnr <0,2,4,6>, <1,2,3,0> + 3171819521U, // <2,2,6,1>: Cost 3 ins <2,u,6,1>, lane 1 + 2094252034U, // <2,2,6,2>: Cost 2 ins <2,2,u,2>, lane 2 + 1881899110U, // <2,2,6,3>: Cost 2 vzipr <0,4,2,6>, LHS + 3088679078U, // <2,2,6,4>: Cost 3 vtrnr <0,2,4,6>, <0,2,0,4> + 3171852289U, // <2,2,6,5>: Cost 3 ins <2,u,6,5>, lane 1 + 2014937292U, // <2,2,6,6>: Cost 2 vtrnr <0,2,4,6>, <0,2,4,6> + 2094301189U, // <2,2,6,7>: Cost 2 ins <2,2,u,u>, lane 5 + 1881899115U, // <2,2,6,u>: Cost 2 vzipr <0,4,2,6>, LHS + 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1> + 2867696462U, // <2,2,7,1>: Cost 3 vuzpr , <6,7,0,1> + 2094252034U, // <2,2,7,2>: Cost 2 ins <2,2,u,2>, lane 2 + 2130018304U, // <2,2,7,3>: Cost 2 ins , lane 0 + 2670499174U, // <2,2,7,4>: Cost 3 vext2 , <7,4,5,6> + 2228291208U, // <2,2,7,5>: Cost 3 vrev <2,2,5,7> + 3203784704U, // <2,2,7,6>: Cost 3 ins , lane 0 + 1879916650U, // <2,2,7,7>: Cost 2 vzipr <0,1,2,7>, <0,1,2,7> + 2130018304U, // <2,2,7,u>: Cost 2 ins , lane 0 + 2020787094U, // <2,2,u,0>: Cost 2 vtrnr <1,2,3,0>, <1,2,3,0> + 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS + 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS + 806183014U, // <2,2,u,3>: Cost 1 vzipr LHS, LHS + 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS + 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS + 1879925084U, // <2,2,u,6>: Cost 2 vzipr LHS, <0,4,2,6> + 1746177577U, // <2,2,u,7>: Cost 2 vuzpr <0,2,0,2>, RHS + 806183019U, // <2,2,u,u>: Cost 1 vzipr LHS, LHS + 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0> + 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS + 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2> + 2094374915U, // <2,3,0,3>: Cost 2 ins <2,3,0,u>, lane 3 + 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5> + 2094940162U, // <2,3,0,5>: Cost 2 ins <2,3,u,5>, lane 2 + 2094374915U, // <2,3,0,6>: Cost 2 ins <2,3,0,u>, lane 3 + 2094374915U, // <2,3,0,7>: Cost 2 ins <2,3,0,u>, lane 3 + 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS + 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2> + 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1> + 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0> + 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3> + 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS + 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7> + 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7> + 2094956546U, // <2,3,1,7>: Cost 2 ins <2,3,u,7>, lane 2 + 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3> + 2094522371U, // <2,3,2,0>: Cost 2 ins <2,3,2,u>, lane 3 + 2094907394U, // <2,3,2,1>: Cost 2 ins <2,3,u,1>, lane 2 + 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2> + 1059889156U, // <2,3,2,3>: Cost 1 ins LHS, lane 4 + 2094522371U, // <2,3,2,4>: Cost 2 ins <2,3,2,u>, lane 3 + 2094940162U, // <2,3,2,5>: Cost 2 ins <2,3,u,5>, lane 2 + 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7> + 2094956546U, // <2,3,2,7>: Cost 2 ins <2,3,u,7>, lane 2 + 1059889156U, // <2,3,2,u>: Cost 1 ins LHS, lane 4 + 1879884694U, // <2,3,3,0>: Cost 2 vzipr LHS, <1,2,3,0> + 2094907394U, // <2,3,3,1>: Cost 2 ins <2,3,u,1>, lane 2 + 1879884534U, // <2,3,3,2>: Cost 2 vzipr LHS, <1,0,3,2> + 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3> + 1879884698U, // <2,3,3,4>: Cost 2 vzipr LHS, <1,2,3,4> + 2094940162U, // <2,3,3,5>: Cost 2 ins <2,3,u,5>, lane 2 + 2953627415U, // <2,3,3,6>: Cost 3 vzipr LHS, <2,4,3,6> + 1884529808U, // <2,3,3,7>: Cost 2 vzipr LHS, <1,5,3,7> + 1879884702U, // <2,3,3,u>: Cost 2 vzipr LHS, <1,2,3,u> + 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS + 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4> + 2094669827U, // <2,3,4,2>: Cost 2 ins <2,3,4,u>, lane 3 + 2094669827U, // <2,3,4,3>: Cost 2 ins <2,3,4,u>, lane 3 + 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS + 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS + 1691241782U, // <2,3,4,6>: Cost 2 vuzpl <2,2,3,3>, RHS + 2094669827U, // <2,3,4,7>: Cost 2 ins <2,3,4,u>, lane 3 + 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS + 2551726274U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, <0,2,3,5> + 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3> + 2665860843U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,1,3> + 2094923778U, // <2,3,5,3>: Cost 2 ins <2,3,u,3>, lane 2 + 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6> + 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5> + 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0> + 1758350646U, // <2,3,5,7>: Cost 2 vuzpr <2,2,3,3>, RHS + 1758350647U, // <2,3,5,u>: Cost 2 vuzpr <2,2,3,3>, RHS + 2094817283U, // <2,3,6,0>: Cost 2 ins <2,3,6,u>, lane 3 + 2094907394U, // <2,3,6,1>: Cost 2 ins <2,3,u,1>, lane 2 + 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3> + 2094923778U, // <2,3,6,3>: Cost 2 ins <2,3,u,3>, lane 2 + 2094817283U, // <2,3,6,4>: Cost 2 ins <2,3,6,u>, lane 3 + 2094940162U, // <2,3,6,5>: Cost 2 ins <2,3,u,5>, lane 2 + 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6> + 1060216836U, // <2,3,6,7>: Cost 1 ins RHS, lane 4 + 1060216836U, // <2,3,6,u>: Cost 1 ins RHS, lane 4 + 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2> + 2094907394U, // <2,3,7,1>: Cost 2 ins <2,3,u,1>, lane 2 + 2974892790U, // <2,3,7,2>: Cost 3 vzipr <3,6,2,7>, <1,0,3,2> + 2133999620U, // <2,3,7,3>: Cost 2 ins , lane 4 + 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6> + 2094940162U, // <2,3,7,5>: Cost 2 ins <2,3,u,5>, lane 2 + 2134024196U, // <2,3,7,6>: Cost 2 ins , lane 4 + 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7> + 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2> + 1879925654U, // <2,3,u,0>: Cost 2 vzipr LHS, <1,2,3,0> + 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS + 1879925494U, // <2,3,u,2>: Cost 2 vzipr LHS, <1,0,3,2> + 1059889156U, // <2,3,u,3>: Cost 1 ins LHS, lane 4 + 1879925658U, // <2,3,u,4>: Cost 2 vzipr LHS, <1,2,3,4> + 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS + 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, + 1060216836U, // <2,3,u,7>: Cost 1 ins RHS, lane 4 + 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS + 2826125312U, // <2,4,0,0>: Cost 3 vuzpr <1,2,3,4>, <0,0,0,0> + 2097635329U, // <2,4,0,1>: Cost 2 ins <2,u,0,1>, lane 1 + 1691992166U, // <2,4,0,2>: Cost 2 vuzpl <2,3,4,5>, LHS + 3171393537U, // <2,4,0,3>: Cost 3 ins <2,u,0,3>, lane 1 + 2765734092U, // <2,4,0,4>: Cost 3 vuzpl <2,3,4,5>, <0,2,4,6> + 3094528338U, // <2,4,0,5>: Cost 3 vtrnr <1,2,3,0>, <0,4,1,5> + 1960103222U, // <2,4,0,6>: Cost 2 vtrnl <2,3,0,1>, RHS + 3171426305U, // <2,4,0,7>: Cost 3 ins <2,u,0,7>, lane 1 + 1960103240U, // <2,4,0,u>: Cost 2 vtrnl <2,3,0,1>, RHS + 3204620288U, // <2,4,1,0>: Cost 3 ins , lane 0 + 2826126132U, // <2,4,1,1>: Cost 3 vuzpr <1,2,3,4>, <1,1,1,1> + 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4> + 1752383590U, // <2,4,1,3>: Cost 2 vuzpr <1,2,3,4>, LHS + 3204653056U, // <2,4,1,4>: Cost 3 ins , lane 0 + 2130919424U, // <2,4,1,5>: Cost 2 ins , lane 0 + 3031936310U, // <2,4,1,6>: Cost 3 vtrnl <2,0,1,2>, RHS + 3169361922U, // <2,4,1,7>: Cost 3 ins <2,4,u,7>, lane 2 + 1752383595U, // <2,4,1,u>: Cost 2 vuzpr <1,2,3,4>, LHS + 2826126230U, // <2,4,2,0>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,0> + 3171524609U, // <2,4,2,1>: Cost 3 ins <2,u,2,1>, lane 1 + 2097790977U, // <2,4,2,2>: Cost 2 ins <2,u,2,2>, lane 1 + 2130976768U, // <2,4,2,3>: Cost 2 ins , lane 0 + 1752384410U, // <2,4,2,4>: Cost 2 vuzpr <1,2,3,4>, <1,2,3,4> + 1825377590U, // <2,4,2,5>: Cost 2 vzipl <2,2,2,2>, RHS + 1959595318U, // <2,4,2,6>: Cost 2 vtrnl <2,2,2,2>, RHS + 3171573761U, // <2,4,2,7>: Cost 3 ins <2,u,2,7>, lane 1 + 1825377833U, // <2,4,2,u>: Cost 2 vzipl <2,2,2,2>, RHS + 2826127049U, // <2,4,3,0>: Cost 3 vuzpr <1,2,3,4>, <2,3,4,0> + 2958270501U, // <2,4,3,1>: Cost 3 vzipr LHS, <0,0,4,1> + 2958270502U, // <2,4,3,2>: Cost 3 vzipr LHS, <0,0,4,2> + 2097872897U, // <2,4,3,3>: Cost 2 ins <2,u,3,3>, lane 1 + 1927662800U, // <2,4,3,4>: Cost 2 vzipr LHS, <4,4,4,4> + 1879885518U, // <2,4,3,5>: Cost 2 vzipr LHS, <2,3,4,5> + 1879883980U, // <2,4,3,6>: Cost 2 vzipr LHS, <0,2,4,6> + 2097905665U, // <2,4,3,7>: Cost 2 ins <2,u,3,7>, lane 1 + 1879883982U, // <2,4,3,u>: Cost 2 vzipr LHS, <0,2,4,u> + 2563735654U, // <2,4,4,0>: Cost 3 vext1 <2,2,4,4>, LHS + 2826127824U, // <2,4,4,1>: Cost 3 vuzpr <1,2,3,4>, <3,4,0,1> + 2826127834U, // <2,4,4,2>: Cost 3 vuzpr <1,2,3,4>, <3,4,1,2> + 2826127106U, // <2,4,4,3>: Cost 3 vuzpr <1,2,3,4>, <2,4,1,3> + 2131132416U, // <2,4,4,4>: Cost 2 ins , lane 0 + 2097963009U, // <2,4,4,5>: Cost 2 ins <2,u,4,5>, lane 1 + 1691995446U, // <2,4,4,6>: Cost 2 vuzpl <2,3,4,5>, RHS + 3094562602U, // <2,4,4,7>: Cost 3 vtrnr <1,2,3,4>, <2,4,5,7> + 1691995464U, // <2,4,4,u>: Cost 2 vuzpl <2,3,4,5>, RHS + 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5> + 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2> + 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5> + 2765737726U, // <2,4,5,3>: Cost 3 vuzpl <2,3,4,5>, <5,2,3,4> + 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS + 2131214336U, // <2,4,5,5>: Cost 2 ins , lane 0 + 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS + 1752386870U, // <2,4,5,7>: Cost 2 vuzpr <1,2,3,4>, RHS + 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS + 1478066380U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, <0,2,4,6> + 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2> + 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4> + 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2> + 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS + 1828146486U, // <2,4,6,5>: Cost 2 vzipl <2,6,3,7>, RHS + 2131296256U, // <2,4,6,6>: Cost 2 ins , lane 0 + 2131304448U, // <2,4,6,7>: Cost 2 ins , lane 0 + 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS + 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> + 2867934030U, // <2,4,7,1>: Cost 3 vuzpr , <6,7,0,1> + 3169320962U, // <2,4,7,2>: Cost 3 ins <2,4,u,2>, lane 2 + 2867933312U, // <2,4,7,3>: Cost 3 vuzpr , <5,7,1,3> + 3205095424U, // <2,4,7,4>: Cost 3 ins , lane 0 + 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0> + 2131369984U, // <2,4,7,6>: Cost 2 ins , lane 0 + 2867933352U, // <2,4,7,7>: Cost 3 vuzpr , <5,7,5,7> + 2131369984U, // <2,4,7,u>: Cost 2 ins , lane 0 + 1478082766U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, <0,2,4,u> + 2097635329U, // <2,4,u,1>: Cost 2 ins <2,u,0,1>, lane 1 + 1691997998U, // <2,4,u,2>: Cost 2 vuzpl <2,3,4,5>, LHS + 1752384157U, // <2,4,u,3>: Cost 2 vuzpr <1,2,3,4>, LHS + 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS + 1879926478U, // <2,4,u,5>: Cost 2 vzipr LHS, <2,3,4,5> + 1879924940U, // <2,4,u,6>: Cost 2 vzipr LHS, <0,2,4,6> + 1752387113U, // <2,4,u,7>: Cost 2 vuzpr <1,2,3,4>, RHS + 1879924942U, // <2,4,u,u>: Cost 2 vzipr LHS, <0,2,4,u> + 2765160612U, // <2,5,0,0>: Cost 3 vuzpl <2,2,5,7>, <0,2,0,2> + 2097635329U, // <2,5,0,1>: Cost 2 ins <2,u,0,1>, lane 1 + 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2> + 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5> + 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5> + 3136335876U, // <2,5,0,5>: Cost 3 vtrnr , <5,5,5,5> + 3171418113U, // <2,5,0,6>: Cost 3 ins <2,u,0,6>, lane 1 + 2020789558U, // <2,5,0,7>: Cost 2 vtrnr <1,2,3,0>, RHS + 2020789559U, // <2,5,0,u>: Cost 2 vtrnr <1,2,3,0>, RHS + 2599616614U, // <2,5,1,0>: Cost 3 vext1 , LHS + 3205292032U, // <2,5,1,1>: Cost 3 ins , lane 0 + 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0> + 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7> + 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5> + 2599620736U, // <2,5,1,5>: Cost 3 vext1 , <5,7,1,3> + 3205332992U, // <2,5,1,6>: Cost 3 ins , lane 0 + 2131599360U, // <2,5,1,7>: Cost 2 ins , lane 0 + 2131599360U, // <2,5,1,u>: Cost 2 ins , lane 0 + 3171516417U, // <2,5,2,0>: Cost 3 ins <2,u,2,0>, lane 1 + 3006040978U, // <2,5,2,1>: Cost 3 vzipr , <4,0,5,1> + 2097790977U, // <2,5,2,2>: Cost 2 ins <2,u,2,2>, lane 1 + 2131640320U, // <2,5,2,3>: Cost 2 ins , lane 0 + 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5> + 2820014256U, // <2,5,2,5>: Cost 3 vuzpr <0,2,1,5>, <0,2,1,5> + 2958264834U, // <2,5,2,6>: Cost 3 vzipr <0,u,2,2>, <3,4,5,6> + 2014612790U, // <2,5,2,7>: Cost 2 vtrnr <0,2,0,2>, RHS + 2014612791U, // <2,5,2,u>: Cost 2 vtrnr <0,2,0,2>, RHS + 2958273506U, // <2,5,3,0>: Cost 3 vzipr LHS, <4,1,5,0> + 1927662482U, // <2,5,3,1>: Cost 2 vzipr LHS, <4,0,5,1> + 2899955454U, // <2,5,3,2>: Cost 3 vzipl <2,3,4,5>, <5,2,3,4> + 2097872897U, // <2,5,3,3>: Cost 2 ins <2,u,3,3>, lane 1 + 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6> + 1927662810U, // <2,5,3,5>: Cost 2 vzipr LHS, <4,4,5,5> + 1879886338U, // <2,5,3,6>: Cost 2 vzipr LHS, <3,4,5,6> + 1879884800U, // <2,5,3,7>: Cost 2 vzipr LHS, <1,3,5,7> + 1879884801U, // <2,5,3,u>: Cost 2 vzipr LHS, <1,3,5,u> + 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS + 3171672065U, // <2,5,4,1>: Cost 3 ins <2,u,4,1>, lane 1 + 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5> + 3034173182U, // <2,5,4,3>: Cost 3 vtrnl <2,3,4,5>, <5,2,3,4> + 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS + 2097963009U, // <2,5,4,5>: Cost 2 ins <2,u,4,5>, lane 1 + 2820164098U, // <2,5,4,6>: Cost 3 vuzpr <0,2,3,5>, <3,4,5,6> + 2020822326U, // <2,5,4,7>: Cost 2 vtrnr <1,2,3,4>, RHS + 2020822327U, // <2,5,4,u>: Cost 2 vtrnr <1,2,3,4>, RHS + 2599649382U, // <2,5,5,0>: Cost 3 vext1 , LHS + 3003411346U, // <2,5,5,1>: Cost 3 vzipr , <4,0,5,1> + 2563819142U, // <2,5,5,2>: Cost 3 vext1 <2,2,5,5>, <2,2,5,5> + 2953642113U, // <2,5,5,3>: Cost 3 vzipr <0,1,2,5>, <0,1,5,3> + 2599652662U, // <2,5,5,4>: Cost 3 vext1 , RHS + 2131877888U, // <2,5,5,5>: Cost 2 ins , lane 0 + 2954971650U, // <2,5,5,6>: Cost 3 vzipr <0,3,2,5>, <3,4,5,6> + 2131894272U, // <2,5,5,7>: Cost 2 ins , lane 0 + 2131877888U, // <2,5,5,u>: Cost 2 ins , lane 0 + 2131910656U, // <2,5,6,0>: Cost 2 ins , lane 0 + 2131918848U, // <2,5,6,1>: Cost 2 ins , lane 0 + 2131927040U, // <2,5,6,2>: Cost 2 ins , lane 0 + 2131935232U, // <2,5,6,3>: Cost 2 ins , lane 0 + 2131943424U, // <2,5,6,4>: Cost 2 ins , lane 0 + 2131951616U, // <2,5,6,5>: Cost 2 ins , lane 0 + 2131959808U, // <2,5,6,6>: Cost 2 ins , lane 0 + 1058226176U, // <2,5,6,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <2,5,6,u>: Cost 1 ins RHS, lane 0 + 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS + 2712244352U, // <2,5,7,1>: Cost 3 vext3 <4,6,0,2>, <5,7,1,3> + 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7> + 2953658497U, // <2,5,7,3>: Cost 3 vzipr <0,1,2,7>, <0,1,5,3> + 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS + 2712244392U, // <2,5,7,5>: Cost 3 vext3 <4,6,0,2>, <5,7,5,7> + 2712244396U, // <2,5,7,6>: Cost 3 vext3 <4,6,0,2>, <5,7,6,2> + 2132041728U, // <2,5,7,7>: Cost 2 ins , lane 0 + 2132041728U, // <2,5,7,u>: Cost 2 ins , lane 0 + 2131910656U, // <2,5,u,0>: Cost 2 ins , lane 0 + 1927703442U, // <2,5,u,1>: Cost 2 vzipr LHS, <4,0,5,1> + 2097790977U, // <2,5,u,2>: Cost 2 ins <2,u,2,2>, lane 1 + 2097872897U, // <2,5,u,3>: Cost 2 ins <2,u,3,3>, lane 1 + 2131943424U, // <2,5,u,4>: Cost 2 ins , lane 0 + 1927703770U, // <2,5,u,5>: Cost 2 vzipr LHS, <4,4,5,5> + 1879927298U, // <2,5,u,6>: Cost 2 vzipr LHS, <3,4,5,6> + 1058226176U, // <2,5,u,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <2,5,u,u>: Cost 1 ins RHS, lane 0 + 2820243456U, // <2,6,0,0>: Cost 3 vuzpr <0,2,4,6>, <0,0,0,0> + 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS + 2132148224U, // <2,6,0,2>: Cost 2 ins , lane 0 + 3171393537U, // <2,6,0,3>: Cost 3 ins <2,u,0,3>, lane 1 + 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6> + 3170672642U, // <2,6,0,5>: Cost 3 ins <2,6,u,5>, lane 2 + 3136335220U, // <2,6,0,6>: Cost 3 vtrnr , <4,6,4,6> + 2096947202U, // <2,6,0,7>: Cost 2 ins <2,6,u,7>, lane 2 + 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS + 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2> + 2820244276U, // <2,6,1,1>: Cost 3 vuzpr <0,2,4,6>, <1,1,1,1> + 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0> + 1746501734U, // <2,6,1,3>: Cost 2 vuzpr <0,2,4,6>, LHS + 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6> + 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7> + 3205996544U, // <2,6,1,6>: Cost 3 ins , lane 0 + 2096947202U, // <2,6,1,7>: Cost 2 ins <2,6,u,7>, lane 2 + 1746501739U, // <2,6,1,u>: Cost 2 vuzpr <0,2,4,6>, LHS + 2820244374U, // <2,6,2,0>: Cost 3 vuzpr <0,2,4,6>, <1,2,3,0> + 3171524609U, // <2,6,2,1>: Cost 3 ins <2,u,2,1>, lane 1 + 2097790977U, // <2,6,2,2>: Cost 2 ins <2,u,2,2>, lane 1 + 2096955397U, // <2,6,2,3>: Cost 2 ins <2,6,u,u>, lane 5 + 2820243622U, // <2,6,2,4>: Cost 3 vuzpr <0,2,4,6>, <0,2,0,4> + 3171557377U, // <2,6,2,5>: Cost 3 ins <2,u,2,5>, lane 1 + 1746501836U, // <2,6,2,6>: Cost 2 vuzpr <0,2,4,6>, <0,2,4,6> + 1884523830U, // <2,6,2,7>: Cost 2 vzipr <0,u,2,2>, RHS + 1884523831U, // <2,6,2,u>: Cost 2 vzipr <0,u,2,2>, RHS + 2096586755U, // <2,6,3,0>: Cost 2 ins <2,6,3,u>, lane 3 + 2096586755U, // <2,6,3,1>: Cost 2 ins <2,6,3,u>, lane 3 + 1927662492U, // <2,6,3,2>: Cost 2 vzipr LHS, <4,0,6,2> + 2097872897U, // <2,6,3,3>: Cost 2 ins <2,u,3,3>, lane 1 + 2096586755U, // <2,6,3,4>: Cost 2 ins <2,6,3,u>, lane 3 + 2096586755U, // <2,6,3,5>: Cost 2 ins <2,6,3,u>, lane 3 + 1927662820U, // <2,6,3,6>: Cost 2 vzipr LHS, <4,4,6,6> + 806145334U, // <2,6,3,7>: Cost 1 vzipr LHS, RHS + 806145335U, // <2,6,3,u>: Cost 1 vzipr LHS, RHS + 2820245292U, // <2,6,4,0>: Cost 3 vuzpr <0,2,4,6>, <2,4,6,0> + 3171672065U, // <2,6,4,1>: Cost 3 ins <2,u,4,1>, lane 1 + 2820243782U, // <2,6,4,2>: Cost 3 vuzpr <0,2,4,6>, <0,4,0,2> + 3171688449U, // <2,6,4,3>: Cost 3 ins <2,u,4,3>, lane 1 + 2820243784U, // <2,6,4,4>: Cost 3 vuzpr <0,2,4,6>, <0,4,0,4> + 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS + 2132475904U, // <2,6,4,6>: Cost 2 ins , lane 0 + 2096947202U, // <2,6,4,7>: Cost 2 ins <2,6,u,7>, lane 2 + 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS + 3170476035U, // <2,6,5,0>: Cost 3 ins <2,6,5,u>, lane 3 + 2667876048U, // <2,6,5,1>: Cost 3 vext2 , <5,1,7,3> + 3206258688U, // <2,6,5,2>: Cost 3 ins , lane 0 + 3170656258U, // <2,6,5,3>: Cost 3 ins <2,6,u,3>, lane 2 + 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5> + 2868023300U, // <2,6,5,5>: Cost 3 vuzpr , <5,5,5,5> + 2667876450U, // <2,6,5,6>: Cost 3 vext2 , <5,6,7,0> + 1746505014U, // <2,6,5,7>: Cost 2 vuzpr <0,2,4,6>, RHS + 1746505015U, // <2,6,5,u>: Cost 2 vuzpr <0,2,4,6>, RHS + 2955643964U, // <2,6,6,0>: Cost 3 vzipr <0,4,2,6>, <4,2,6,0> + 2820246859U, // <2,6,6,1>: Cost 3 vuzpr <0,2,4,6>, <4,6,0,1> + 2820246860U, // <2,6,6,2>: Cost 3 vuzpr <0,2,4,6>, <4,6,0,2> + 2820245412U, // <2,6,6,3>: Cost 3 vuzpr <0,2,4,6>, <2,6,1,3> + 2955643968U, // <2,6,6,4>: Cost 3 vzipr <0,4,2,6>, <4,2,6,4> + 2820246899U, // <2,6,6,5>: Cost 3 vuzpr <0,2,4,6>, <4,6,4,5> + 2132623360U, // <2,6,6,6>: Cost 2 ins , lane 0 + 1881902390U, // <2,6,6,7>: Cost 2 vzipr <0,4,2,6>, RHS + 1881902391U, // <2,6,6,u>: Cost 2 vzipr <0,4,2,6>, RHS + 2132647936U, // <2,6,7,0>: Cost 2 ins , lane 0 + 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2> + 3124596044U, // <2,6,7,2>: Cost 3 vtrnr <6,2,5,7>, <4,6,0,2> + 2868023424U, // <2,6,7,3>: Cost 3 vuzpr , <5,7,1,3> + 2132680704U, // <2,6,7,4>: Cost 2 ins , lane 0 + 2252181996U, // <2,6,7,5>: Cost 3 vrev <6,2,5,7> + 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2> + 2132705280U, // <2,6,7,7>: Cost 2 ins , lane 0 + 2132647936U, // <2,6,7,u>: Cost 2 ins , lane 0 + 2096586755U, // <2,6,u,0>: Cost 2 ins <2,6,3,u>, lane 3 + 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS + 1927703452U, // <2,6,u,2>: Cost 2 vzipr LHS, <4,0,6,2> + 1746502301U, // <2,6,u,3>: Cost 2 vuzpr <0,2,4,6>, LHS + 1594136612U, // <2,6,u,4>: Cost 2 vext2 , + 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS + 1927703780U, // <2,6,u,6>: Cost 2 vzipr LHS, <4,4,6,6> + 806186294U, // <2,6,u,7>: Cost 1 vzipr LHS, RHS + 806186295U, // <2,6,u,u>: Cost 1 vzipr LHS, RHS + 2581839974U, // <2,7,0,0>: Cost 3 vext1 <5,2,7,0>, LHS + 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2> + 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2> + 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0> + 2581843254U, // <2,7,0,4>: Cost 3 vext1 <5,2,7,0>, RHS + 2581843742U, // <2,7,0,5>: Cost 3 vext1 <5,2,7,0>, <5,2,7,0> + 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7> + 3136336040U, // <2,7,0,7>: Cost 3 vtrnr , <5,7,5,7> + 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2> + 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> + 3206619136U, // <2,7,1,1>: Cost 3 ins , lane 0 + 3206627328U, // <2,7,1,2>: Cost 3 ins , lane 0 + 2132893696U, // <2,7,1,3>: Cost 2 ins , lane 0 + 2599767350U, // <2,7,1,4>: Cost 3 vext1 , RHS + 3206651904U, // <2,7,1,5>: Cost 3 ins , lane 0 + 3171344386U, // <2,7,1,6>: Cost 3 ins <2,7,u,6>, lane 2 + 2599769082U, // <2,7,1,7>: Cost 3 vext1 , <7,0,1,2> + 2132893696U, // <2,7,1,u>: Cost 2 ins , lane 0 + 2581856358U, // <2,7,2,0>: Cost 3 vext1 <5,2,7,2>, LHS + 3136131918U, // <2,7,2,1>: Cost 3 vtrnr , <6,7,0,1> + 2097790977U, // <2,7,2,2>: Cost 2 ins <2,u,2,2>, lane 1 + 2132967424U, // <2,7,2,3>: Cost 2 ins , lane 0 + 2581859638U, // <2,7,2,4>: Cost 3 vext1 <5,2,7,2>, RHS + 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7> + 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7> + 1770548291U, // <2,7,2,7>: Cost 2 vuzpr <4,2,6,7>, <4,2,6,7> + 2097790977U, // <2,7,2,u>: Cost 2 ins <2,u,2,2>, lane 1 + 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS + 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7> + 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2> + 1927663312U, // <2,7,3,3>: Cost 2 vzipr LHS, <5,1,7,3> + 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS + 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3> + 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3> + 1927663640U, // <2,7,3,7>: Cost 2 vzipr LHS, <5,5,7,7> + 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS + 2581872742U, // <2,7,4,0>: Cost 3 vext1 <5,2,7,4>, LHS + 2581873562U, // <2,7,4,1>: Cost 3 vext1 <5,2,7,4>, <1,2,3,4> + 3171680257U, // <2,7,4,2>: Cost 3 ins <2,u,4,2>, lane 1 + 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4> + 2581876022U, // <2,7,4,4>: Cost 3 vext1 <5,2,7,4>, RHS + 2133131264U, // <2,7,4,5>: Cost 2 ins , lane 0 + 2712245609U, // <2,7,4,6>: Cost 3 vext3 <4,6,0,2>, <7,4,6,0> + 3136368808U, // <2,7,4,7>: Cost 3 vtrnr , <5,7,5,7> + 2133131264U, // <2,7,4,u>: Cost 2 ins , lane 0 + 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2> + 3206914048U, // <2,7,5,1>: Cost 3 ins , lane 0 + 2844290353U, // <2,7,5,2>: Cost 3 vuzpr <4,2,6,7>, <4,5,6,2> + 2991469050U, // <2,7,5,3>: Cost 3 vzipr <6,4,2,5>, <6,2,7,3> + 2599800118U, // <2,7,5,4>: Cost 3 vext1 , RHS + 3206946816U, // <2,7,5,5>: Cost 3 ins , lane 0 + 3206955008U, // <2,7,5,6>: Cost 3 ins , lane 0 + 2133221376U, // <2,7,5,7>: Cost 2 ins , lane 0 + 2133221376U, // <2,7,5,u>: Cost 2 ins , lane 0 + 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS + 3136459598U, // <2,7,6,1>: Cost 3 vtrnr , <6,7,0,1> + 2901890250U, // <2,7,6,2>: Cost 3 vzipl <2,6,3,7>, <7,2,6,3> + 3136458880U, // <2,7,6,3>: Cost 3 vtrnr , <5,7,1,3> + 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS + 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6> + 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6> + 2133295104U, // <2,7,6,7>: Cost 2 ins , lane 0 + 2133295104U, // <2,7,6,u>: Cost 2 ins , lane 0 + 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1> + 3207061504U, // <2,7,7,1>: Cost 3 ins , lane 0 + 2563983002U, // <2,7,7,2>: Cost 3 vext1 <2,2,7,7>, <2,2,7,7> + 2998784506U, // <2,7,7,3>: Cost 3 vzipr <7,6,2,7>, <6,2,7,3> + 2599816502U, // <2,7,7,4>: Cost 3 vext1 , RHS + 3207094272U, // <2,7,7,5>: Cost 3 ins , lane 0 + 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7> + 2133368832U, // <2,7,7,7>: Cost 2 ins , lane 0 + 2133368832U, // <2,7,7,u>: Cost 2 ins , lane 0 + 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS + 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2> + 2097790977U, // <2,7,u,2>: Cost 2 ins <2,u,2,2>, lane 1 + 1927704272U, // <2,7,u,3>: Cost 2 vzipr LHS, <5,1,7,3> + 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS + 2133131264U, // <2,7,u,5>: Cost 2 ins , lane 0 + 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u> + 1927704600U, // <2,7,u,7>: Cost 2 vzipr LHS, <5,5,7,7> + 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS + 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0> + 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS + 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2> + 2020786845U, // <2,u,0,3>: Cost 2 vtrnr <1,2,3,0>, LHS + 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5> + 2094940162U, // <2,u,0,5>: Cost 2 ins <2,3,u,5>, lane 2 + 1960106138U, // <2,u,0,6>: Cost 2 vtrnl <2,3,0,1>, RHS + 2020789801U, // <2,u,0,7>: Cost 2 vtrnr <1,2,3,0>, RHS + 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS + 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2> + 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1> + 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0> + 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3> + 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5> + 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7> + 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7> + 2096947202U, // <2,u,1,7>: Cost 2 ins <2,6,u,7>, lane 2 + 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3> + 1478328556U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, <0,2,u,2> + 1825380142U, // <2,u,2,1>: Cost 2 vzipl <2,2,2,2>, LHS + 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS + 1055244288U, // <2,u,2,3>: Cost 1 ins LHS, lane 0 + 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS + 1825380506U, // <2,u,2,5>: Cost 2 vzipl <2,2,2,2>, RHS + 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7> + 2014613033U, // <2,u,2,7>: Cost 2 vtrnr <0,2,0,2>, RHS + 1055244288U, // <2,u,2,u>: Cost 1 ins LHS, lane 0 + 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2> + 1879885550U, // <2,u,3,1>: Cost 2 vzipr LHS, <2,3,u,1> + 1879884012U, // <2,u,3,2>: Cost 2 vzipr LHS, <0,2,u,2> + 806142108U, // <2,u,3,3>: Cost 1 vzipr LHS, LHS + 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6> + 1879885554U, // <2,u,3,5>: Cost 2 vzipr LHS, <2,3,u,5> + 1879884016U, // <2,u,3,6>: Cost 2 vzipr LHS, <0,2,u,6> + 806145352U, // <2,u,3,7>: Cost 1 vzipr LHS, RHS + 806142113U, // <2,u,3,u>: Cost 1 vzipr LHS, LHS + 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS + 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4> + 1960433454U, // <2,u,4,2>: Cost 2 vtrnl <2,3,4,5>, LHS + 2020819613U, // <2,u,4,3>: Cost 2 vtrnr <1,2,3,4>, LHS + 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS + 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS + 1691610422U, // <2,u,4,6>: Cost 2 vuzpl <2,2,u,3>, RHS + 2020822569U, // <2,u,4,7>: Cost 2 vtrnr <1,2,3,4>, RHS + 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS + 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5> + 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3> + 2094252034U, // <2,u,5,2>: Cost 2 ins <2,2,u,2>, lane 2 + 2094260226U, // <2,u,5,3>: Cost 2 ins <2,2,u,3>, lane 2 + 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6> + 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5> + 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS + 1746226486U, // <2,u,5,7>: Cost 2 vuzpr <0,2,0,u>, RHS + 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS + 1478361328U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, <0,2,u,6> + 1828149038U, // <2,u,6,1>: Cost 2 vzipl <2,6,3,7>, LHS + 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3> + 2014937757U, // <2,u,6,3>: Cost 2 vtrnr <0,2,4,6>, LHS + 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS + 1828149402U, // <2,u,6,5>: Cost 2 vzipl <2,6,3,7>, RHS + 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6> + 1060216836U, // <2,u,6,7>: Cost 1 ins RHS, lane 4 + 1060216836U, // <2,u,6,u>: Cost 1 ins RHS, lane 4 + 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2> + 2094907394U, // <2,u,7,1>: Cost 2 ins <2,3,u,1>, lane 2 + 2094252034U, // <2,u,7,2>: Cost 2 ins <2,2,u,2>, lane 2 + 2129354752U, // <2,u,7,3>: Cost 2 ins , lane 0 + 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6> + 2094940162U, // <2,u,7,5>: Cost 2 ins <2,3,u,5>, lane 2 + 2134024196U, // <2,u,7,6>: Cost 2 ins , lane 4 + 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7> + 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2> + 1879925699U, // <2,u,u,0>: Cost 2 vzipr LHS, <1,2,u,0> + 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS + 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS + 806183068U, // <2,u,u,3>: Cost 1 vzipr LHS, LHS + 1879925703U, // <2,u,u,4>: Cost 2 vzipr LHS, <1,2,u,4> + 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS + 1879924976U, // <2,u,u,6>: Cost 2 vzipr LHS, <0,2,u,6> + 806186312U, // <2,u,u,7>: Cost 1 vzipr LHS, RHS + 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS + 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0> + 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1> + 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2> + 2960312624U, // <3,0,0,3>: Cost 3 vzipr <1,2,3,0>, <3,2,0,3> + 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1> + 3177381889U, // <3,0,0,5>: Cost 3 ins <3,u,0,5>, lane 1 + 3177390081U, // <3,0,0,6>: Cost 3 ins <3,u,0,6>, lane 1 + 3177398273U, // <3,0,0,7>: Cost 3 ins <3,u,0,7>, lane 1 + 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2> + 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS + 2128232448U, // <3,0,1,1>: Cost 2 ins , lane 0 + 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS + 2098429955U, // <3,0,1,3>: Cost 2 ins <3,0,1,u>, lane 3 + 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS + 2098429955U, // <3,0,1,5>: Cost 2 ins <3,0,1,u>, lane 3 + 2098429955U, // <3,0,1,6>: Cost 2 ins <3,0,1,u>, lane 3 + 2098429955U, // <3,0,1,7>: Cost 2 ins <3,0,1,u>, lane 3 + 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS + 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2> + 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1> + 2128314368U, // <3,0,2,2>: Cost 2 ins , lane 0 + 2098946053U, // <3,0,2,3>: Cost 2 ins <3,0,u,u>, lane 5 + 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6> + 2959000610U, // <3,0,2,5>: Cost 3 vzipr <1,0,3,2>, <1,4,0,5> + 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7> + 3177545729U, // <3,0,2,7>: Cost 3 ins <3,u,2,7>, lane 1 + 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2> + 2820636924U, // <3,0,3,0>: Cost 3 vuzpr <0,3,1,0>, <0,3,1,0> + 1832091750U, // <3,0,3,1>: Cost 2 vzipl <3,3,3,3>, LHS + 1966309478U, // <3,0,3,2>: Cost 2 vtrnl <3,3,3,3>, LHS + 2103844865U, // <3,0,3,3>: Cost 2 ins <3,u,3,3>, lane 1 + 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6> + 2772716034U, // <3,0,3,5>: Cost 3 vuzpl <3,5,0,2>, <3,4,5,6> + 3177611265U, // <3,0,3,6>: Cost 3 ins <3,u,3,6>, lane 1 + 3177619457U, // <3,0,3,7>: Cost 3 ins <3,u,3,7>, lane 1 + 1832092317U, // <3,0,3,u>: Cost 2 vzipl <3,3,3,3>, LHS + 2689835334U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,2> + 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5> + 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6> + 2906669312U, // <3,0,4,3>: Cost 3 vzipl <3,4,5,6>, <0,3,1,4> + 2689835373U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,5> + 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS + 2769382710U, // <3,0,4,6>: Cost 3 vuzpl <3,0,0,0>, RHS + 3177693185U, // <3,0,4,7>: Cost 3 ins <3,u,4,7>, lane 1 + 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6> + 3101278208U, // <3,0,5,0>: Cost 3 vtrnr <2,3,4,5>, <0,0,0,0> + 2128527360U, // <3,0,5,1>: Cost 2 ins , lane 0 + 1967145062U, // <3,0,5,2>: Cost 2 vtrnl <3,4,5,6>, LHS + 3040886978U, // <3,0,5,3>: Cost 3 vtrnl <3,4,5,6>, <0,2,3,5> + 3040886988U, // <3,0,5,4>: Cost 3 vtrnl <3,4,5,6>, <0,2,4,6> + 2666573828U, // <3,0,5,5>: Cost 3 vext2 , <5,5,5,5> + 2104016897U, // <3,0,5,6>: Cost 2 ins <3,u,5,6>, lane 1 + 2820640054U, // <3,0,5,7>: Cost 3 vuzpr <0,3,1,0>, RHS + 1967145116U, // <3,0,5,u>: Cost 2 vtrnl <3,4,5,6>, LHS + 3202334720U, // <3,0,6,0>: Cost 3 ins , lane 0 + 2907635814U, // <3,0,6,1>: Cost 3 vzipl <3,6,0,7>, LHS + 2128609280U, // <3,0,6,2>: Cost 2 ins , lane 0 + 3177807873U, // <3,0,6,3>: Cost 3 ins <3,u,6,3>, lane 1 + 3202367488U, // <3,0,6,4>: Cost 3 ins , lane 0 + 3172663298U, // <3,0,6,5>: Cost 3 ins <3,0,u,5>, lane 2 + 2666574648U, // <3,0,6,6>: Cost 3 vext2 , <6,6,6,6> + 2098946053U, // <3,0,6,7>: Cost 2 ins <3,0,u,u>, lane 5 + 2128609280U, // <3,0,6,u>: Cost 2 ins , lane 0 + 3095396352U, // <3,0,7,0>: Cost 3 vtrnr <1,3,5,7>, <0,0,0,0> + 3095396362U, // <3,0,7,1>: Cost 3 vtrnr <1,3,5,7>, <0,0,1,1> + 2098896898U, // <3,0,7,2>: Cost 2 ins <3,0,u,2>, lane 2 + 3177881601U, // <3,0,7,3>: Cost 3 ins <3,u,7,3>, lane 1 + 2666575206U, // <3,0,7,4>: Cost 3 vext2 , <7,4,5,6> + 3177897985U, // <3,0,7,5>: Cost 3 ins <3,u,7,5>, lane 1 + 3202457600U, // <3,0,7,6>: Cost 3 ins , lane 0 + 2666575468U, // <3,0,7,7>: Cost 3 vext2 , <7,7,7,7> + 2098896898U, // <3,0,7,u>: Cost 2 ins <3,0,u,2>, lane 2 + 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2> + 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1> + 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS + 2098429955U, // <3,0,u,3>: Cost 2 ins <3,0,1,u>, lane 3 + 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6> + 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS + 2098429955U, // <3,0,u,6>: Cost 2 ins <3,0,1,u>, lane 3 + 2098429955U, // <3,0,u,7>: Cost 2 ins <3,0,1,u>, lane 3 + 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS + 2552201468U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, <0,3,1,0> + 2128822272U, // <3,1,0,1>: Cost 2 ins , lane 0 + 1695727718U, // <3,1,0,2>: Cost 2 vuzpl <3,0,1,2>, LHS + 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2> + 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS + 2960310610U, // <3,1,0,5>: Cost 3 vzipr <1,2,3,0>, <0,4,1,5> + 2832516572U, // <3,1,0,6>: Cost 3 vuzpr <2,3,0,1>, <2,0,4,6> + 3177398273U, // <3,1,0,7>: Cost 3 ins <3,u,0,7>, lane 1 + 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2> + 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1> + 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1> + 2103689217U, // <3,1,1,2>: Cost 2 ins <3,u,1,2>, lane 1 + 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3> + 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5> + 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5> + 3177463809U, // <3,1,1,6>: Cost 3 ins <3,u,1,6>, lane 1 + 3100952848U, // <3,1,1,7>: Cost 3 vtrnr <2,3,0,1>, <3,1,5,7> + 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3> + 2128961536U, // <3,1,2,0>: Cost 2 ins , lane 0 + 2128969728U, // <3,1,2,1>: Cost 2 ins , lane 0 + 2128977920U, // <3,1,2,2>: Cost 2 ins , lane 0 + 1055244288U, // <3,1,2,3>: Cost 1 ins LHS, lane 0 + 2128994304U, // <3,1,2,4>: Cost 2 ins , lane 0 + 2129002496U, // <3,1,2,5>: Cost 2 ins , lane 0 + 2129010688U, // <3,1,2,6>: Cost 2 ins , lane 0 + 2129018880U, // <3,1,2,7>: Cost 2 ins , lane 0 + 1055244288U, // <3,1,2,u>: Cost 1 ins LHS, lane 0 + 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS + 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3> + 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0> + 2021326950U, // <3,1,3,3>: Cost 2 vtrnr <1,3,1,3>, LHS + 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS + 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7> + 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7> + 2832516096U, // <3,1,3,7>: Cost 3 vuzpr <2,3,0,1>, <1,3,5,7> + 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3> + 2552234240U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, <0,3,1,4> + 2960343050U, // <3,1,4,1>: Cost 3 vzipr <1,2,3,4>, <0,0,1,1> + 2960345238U, // <3,1,4,2>: Cost 3 vzipr <1,2,3,4>, <3,0,1,2> + 2129133568U, // <3,1,4,3>: Cost 2 ins , lane 0 + 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS + 2129149952U, // <3,1,4,5>: Cost 2 ins , lane 0 + 1695730998U, // <3,1,4,6>: Cost 2 vuzpl <3,0,1,2>, RHS + 3177693185U, // <3,1,4,7>: Cost 3 ins <3,u,4,7>, lane 1 + 1695731016U, // <3,1,4,u>: Cost 2 vuzpl <3,0,1,2>, RHS + 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1> + 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7> + 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5> + 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7> + 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5> + 2961678674U, // <3,1,5,5>: Cost 3 vzipr <1,4,3,5>, <0,4,1,5> + 2104016897U, // <3,1,5,6>: Cost 2 ins <3,u,5,6>, lane 1 + 1758776630U, // <3,1,5,7>: Cost 2 vuzpr <2,3,0,1>, RHS + 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7> + 2907783926U, // <3,1,6,0>: Cost 3 vzipl <3,6,2,7>, <1,0,3,2> + 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7> + 2222752740U, // <3,1,6,2>: Cost 3 vrev <1,3,2,6> + 2129281024U, // <3,1,6,3>: Cost 2 ins , lane 0 + 2222900214U, // <3,1,6,4>: Cost 3 vrev <1,3,4,6> + 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7> + 2868350324U, // <3,1,6,6>: Cost 3 vuzpr , <4,6,4,6> + 2129313792U, // <3,1,6,7>: Cost 2 ins , lane 0 + 2129281024U, // <3,1,6,u>: Cost 2 ins , lane 0 + 3177857025U, // <3,1,7,0>: Cost 3 ins <3,u,7,0>, lane 1 + 3095397172U, // <3,1,7,1>: Cost 3 vtrnr <1,3,5,7>, <1,1,1,1> + 2962360470U, // <3,1,7,2>: Cost 3 vzipr <1,5,3,7>, <3,0,1,2> + 2021654630U, // <3,1,7,3>: Cost 2 vtrnr <1,3,5,7>, LHS + 3177889793U, // <3,1,7,4>: Cost 3 ins <3,u,7,4>, lane 1 + 1149240320U, // <3,1,7,5>: Cost 2 vrev <1,3,5,7> + 2223055881U, // <3,1,7,6>: Cost 3 vrev <1,3,6,7> + 2868351144U, // <3,1,7,7>: Cost 3 vuzpr , <5,7,5,7> + 2021654635U, // <3,1,7,u>: Cost 2 vtrnr <1,3,5,7>, LHS + 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS + 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3> + 1695733550U, // <3,1,u,2>: Cost 2 vuzpl <3,0,1,2>, LHS + 1055244288U, // <3,1,u,3>: Cost 1 ins LHS, lane 0 + 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS + 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7> + 1695733914U, // <3,1,u,6>: Cost 2 vuzpl <3,0,1,2>, RHS + 1758776873U, // <3,1,u,7>: Cost 2 vuzpr <2,3,0,1>, RHS + 1055244288U, // <3,1,u,u>: Cost 1 ins LHS, lane 0 + 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0> + 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS + 2129494016U, // <3,2,0,2>: Cost 2 ins , lane 0 + 1886568550U, // <3,2,0,3>: Cost 2 vzipr <1,2,3,0>, LHS + 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5> + 2960311348U, // <3,2,0,5>: Cost 3 vzipr <1,2,3,0>, <1,4,2,5> + 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4> + 3177398273U, // <3,2,0,7>: Cost 3 ins <3,u,0,7>, lane 1 + 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS + 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2> + 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1> + 2103689217U, // <3,2,1,2>: Cost 2 ins <3,u,1,2>, lane 1 + 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1> + 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS + 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7> + 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3> + 3177472001U, // <3,2,1,7>: Cost 3 ins <3,u,1,7>, lane 1 + 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1> + 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1> + 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3> + 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2> + 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3> + 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5> + 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7> + 2689836685U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,3> + 3177545729U, // <3,2,2,7>: Cost 3 ins <3,u,2,7>, lane 1 + 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3> + 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1> + 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0> + 1611450042U, // <3,2,3,2>: Cost 2 vext3 LHS, <2,3,2,3> + 1885929574U, // <3,2,3,3>: Cost 2 vzipr <1,1,3,3>, LHS + 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5> + 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1> + 1611450082U, // <3,2,3,6>: Cost 2 vext3 LHS, <2,3,6,7> + 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0> + 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1> + 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS + 2558280674U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,3,2,4> + 2960343060U, // <3,2,4,2>: Cost 3 vzipr <1,2,3,4>, <0,0,2,2> + 1886601318U, // <3,2,4,3>: Cost 2 vzipr <1,2,3,4>, LHS + 2960344034U, // <3,2,4,4>: Cost 3 vzipr <1,2,3,4>, <1,3,2,4> + 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS + 2129821696U, // <3,2,4,6>: Cost 2 ins , lane 0 + 3177693185U, // <3,2,4,7>: Cost 3 ins <3,u,4,7>, lane 1 + 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS + 2552316170U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, <0,3,2,5> + 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5> + 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7> + 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6> + 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5> + 2665263108U, // <3,2,5,5>: Cost 3 vext2 , <5,5,5,5> + 2104016897U, // <3,2,5,6>: Cost 2 ins <3,u,5,6>, lane 1 + 2826554678U, // <3,2,5,7>: Cost 3 vuzpr <1,3,0,2>, RHS + 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5> + 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1> + 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3> + 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6> + 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7> + 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5> + 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7> + 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7> + 2129977344U, // <3,2,6,7>: Cost 2 ins , lane 0 + 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7> + 3095397270U, // <3,2,7,0>: Cost 3 vtrnr <1,3,5,7>, <1,2,3,0> + 3203743744U, // <3,2,7,1>: Cost 3 ins , lane 0 + 3095396516U, // <3,2,7,2>: Cost 3 vtrnr <1,3,5,7>, <0,2,0,2> + 1888616550U, // <3,2,7,3>: Cost 2 vzipr <1,5,3,7>, LHS + 3095397274U, // <3,2,7,4>: Cost 3 vtrnr <1,3,5,7>, <1,2,3,4> + 3095396528U, // <3,2,7,5>: Cost 3 vtrnr <1,3,5,7>, <0,2,1,5> + 1155286754U, // <3,2,7,6>: Cost 2 vrev <2,3,6,7> + 2665264748U, // <3,2,7,7>: Cost 3 vext2 , <7,7,7,7> + 1888616555U, // <3,2,7,u>: Cost 2 vzipr <1,5,3,7>, LHS + 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1> + 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS + 2129494016U, // <3,2,u,2>: Cost 2 ins , lane 0 + 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3> + 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5> + 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS + 2129821696U, // <3,2,u,6>: Cost 2 ins , lane 0 + 2129977344U, // <3,2,u,7>: Cost 2 ins , lane 0 + 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1> + 1886569366U, // <3,3,0,0>: Cost 2 vzipr <1,2,3,0>, <1,2,3,0> + 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2> + 1697874022U, // <3,3,0,2>: Cost 2 vuzpl <3,3,3,3>, LHS + 2100895746U, // <3,3,0,3>: Cost 2 ins <3,3,u,3>, lane 2 + 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1> + 3041151490U, // <3,3,0,5>: Cost 3 vtrnl <3,5,0,2>, <3,4,5,6> + 3177390081U, // <3,3,0,6>: Cost 3 ins <3,u,0,6>, lane 1 + 2960311440U, // <3,3,0,7>: Cost 3 vzipr <1,2,3,0>, <1,5,3,7> + 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2> + 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3> + 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3> + 2103689217U, // <3,3,1,2>: Cost 2 ins <3,u,1,2>, lane 1 + 1752891494U, // <3,3,1,3>: Cost 2 vuzpr <1,3,1,3>, LHS + 2826635515U, // <3,3,1,4>: Cost 3 vuzpr <1,3,1,3>, <3,1,3,4> + 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3> + 3177463809U, // <3,3,1,6>: Cost 3 ins <3,u,1,6>, lane 1 + 3100951552U, // <3,3,1,7>: Cost 3 vtrnr <2,3,0,1>, <1,3,5,7> + 1752891499U, // <3,3,1,u>: Cost 2 vuzpr <1,3,1,3>, LHS + 2959000470U, // <3,3,2,0>: Cost 3 vzipr <1,0,3,2>, <1,2,3,0> + 2959000471U, // <3,3,2,1>: Cost 3 vzipr <1,0,3,2>, <1,2,3,1> + 1885258486U, // <3,3,2,2>: Cost 2 vzipr <1,0,3,2>, <1,0,3,2> + 2130313216U, // <3,3,2,3>: Cost 2 ins , lane 0 + 2959000474U, // <3,3,2,4>: Cost 3 vzipr <1,0,3,2>, <1,2,3,4> + 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4> + 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3> + 2959000720U, // <3,3,2,7>: Cost 3 vzipr <1,0,3,2>, <1,5,3,7> + 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3> + 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS + 2100568067U, // <3,3,3,1>: Cost 2 ins <3,3,3,u>, lane 3 + 2100568067U, // <3,3,3,2>: Cost 2 ins <3,3,3,u>, lane 3 + 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS + 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS + 2100568067U, // <3,3,3,5>: Cost 2 ins <3,3,3,u>, lane 3 + 2100568067U, // <3,3,3,6>: Cost 2 ins <3,3,3,u>, lane 3 + 2100568067U, // <3,3,3,7>: Cost 2 ins <3,3,3,u>, lane 3 + 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS + 2960343958U, // <3,3,4,0>: Cost 3 vzipr <1,2,3,4>, <1,2,3,0> + 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4> + 2960343798U, // <3,3,4,2>: Cost 3 vzipr <1,2,3,4>, <1,0,3,2> + 2100895746U, // <3,3,4,3>: Cost 2 ins <3,3,u,3>, lane 2 + 1886602138U, // <3,3,4,4>: Cost 2 vzipr <1,2,3,4>, <1,2,3,4> + 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6> + 1697877302U, // <3,3,4,6>: Cost 2 vuzpl <3,3,3,3>, RHS + 2960344208U, // <3,3,4,7>: Cost 3 vzipr <1,2,3,4>, <1,5,3,7> + 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6> + 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS + 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5> + 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5> + 2100895746U, // <3,3,5,3>: Cost 2 ins <3,3,u,3>, lane 2 + 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS + 2027538126U, // <3,3,5,5>: Cost 2 vtrnr <2,3,4,5>, <2,3,4,5> + 2104016897U, // <3,3,5,6>: Cost 2 ins <3,u,5,6>, lane 1 + 1752894774U, // <3,3,5,7>: Cost 2 vuzpr <1,3,1,3>, RHS + 1752894775U, // <3,3,5,u>: Cost 2 vuzpr <1,3,1,3>, RHS + 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7> + 3204333568U, // <3,3,6,1>: Cost 3 ins , lane 0 + 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7> + 2100895746U, // <3,3,6,3>: Cost 2 ins <3,3,u,3>, lane 2 + 2234845608U, // <3,3,6,4>: Cost 3 vrev <3,3,4,6> + 3204366336U, // <3,3,6,5>: Cost 3 ins , lane 0 + 1967893085U, // <3,3,6,6>: Cost 2 vtrnl <3,5,6,7>, <3,5,6,7> + 2130640896U, // <3,3,6,7>: Cost 2 ins , lane 0 + 2100895746U, // <3,3,6,u>: Cost 2 ins <3,3,u,3>, lane 2 + 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS + 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7> + 2962359030U, // <3,3,7,2>: Cost 3 vzipr <1,5,3,7>, <1,0,3,2> + 2100895746U, // <3,3,7,3>: Cost 2 ins <3,3,u,3>, lane 2 + 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS + 3095398094U, // <3,3,7,5>: Cost 3 vtrnr <1,3,5,7>, <2,3,4,5> + 3174662146U, // <3,3,7,6>: Cost 3 ins <3,3,u,6>, lane 2 + 2021655552U, // <3,3,7,7>: Cost 2 vtrnr <1,3,5,7>, <1,3,5,7> + 2021655552U, // <3,3,7,u>: Cost 2 vtrnr <1,3,5,7>, <1,3,5,7> + 1886569366U, // <3,3,u,0>: Cost 2 vzipr <1,2,3,0>, <1,2,3,0> + 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2> + 1697879854U, // <3,3,u,2>: Cost 2 vuzpl <3,3,3,3>, LHS + 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS + 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS + 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6> + 1697880218U, // <3,3,u,6>: Cost 2 vuzpl <3,3,3,3>, RHS + 1752895017U, // <3,3,u,7>: Cost 2 vuzpr <1,3,1,3>, RHS + 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS + 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0> + 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS + 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2> + 3177365505U, // <3,4,0,3>: Cost 3 ins <3,u,0,3>, lane 1 + 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5> + 1829948726U, // <3,4,0,5>: Cost 2 vzipl <3,0,1,2>, RHS + 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2> + 3177398273U, // <3,4,0,7>: Cost 3 ins <3,u,0,7>, lane 1 + 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS + 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2> + 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1> + 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4> + 2820669542U, // <3,4,1,3>: Cost 3 vuzpr <0,3,1,4>, LHS + 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS + 2130919424U, // <3,4,1,5>: Cost 2 ins , lane 0 + 1964166454U, // <3,4,1,6>: Cost 2 vtrnl <3,0,1,2>, RHS + 3177472001U, // <3,4,1,7>: Cost 3 ins <3,u,1,7>, lane 1 + 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4> + 3204694016U, // <3,4,2,0>: Cost 3 ins , lane 0 + 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3> + 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2> + 2101600261U, // <3,4,2,3>: Cost 2 ins <3,4,u,u>, lane 5 + 2826716058U, // <3,4,2,4>: Cost 3 vuzpr <1,3,2,4>, <1,2,3,4> + 2959001294U, // <3,4,2,5>: Cost 3 vzipr <1,0,3,2>, <2,3,4,5> + 2131001344U, // <3,4,2,6>: Cost 2 ins , lane 0 + 3177545729U, // <3,4,2,7>: Cost 3 ins <3,u,2,7>, lane 1 + 2101600261U, // <3,4,2,u>: Cost 2 ins <3,4,u,u>, lane 5 + 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2> + 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4> + 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4> + 2103844865U, // <3,4,3,3>: Cost 2 ins <3,u,3,3>, lane 1 + 2820669696U, // <3,4,3,4>: Cost 3 vuzpr <0,3,1,4>, <0,3,1,4> + 1832095030U, // <3,4,3,5>: Cost 2 vzipl <3,3,3,3>, RHS + 1966312758U, // <3,4,3,6>: Cost 2 vtrnl <3,3,3,3>, RHS + 3177619457U, // <3,4,3,7>: Cost 3 ins <3,u,3,7>, lane 1 + 1832095273U, // <3,4,3,u>: Cost 2 vzipl <3,3,3,3>, RHS + 2960344777U, // <3,4,4,0>: Cost 3 vzipr <1,2,3,4>, <2,3,4,0> + 2960344778U, // <3,4,4,1>: Cost 3 vzipr <1,2,3,4>, <2,3,4,1> + 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4> + 2960344618U, // <3,4,4,3>: Cost 3 vzipr <1,2,3,4>, <2,1,4,3> + 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4> + 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS + 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6> + 3177693185U, // <3,4,4,7>: Cost 3 ins <3,u,4,7>, lane 1 + 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS + 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS + 2101379075U, // <3,4,5,1>: Cost 2 ins <3,4,5,u>, lane 3 + 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5> + 2101379075U, // <3,4,5,3>: Cost 2 ins <3,4,5,u>, lane 3 + 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS + 2131214336U, // <3,4,5,5>: Cost 2 ins , lane 0 + 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS + 2101379075U, // <3,4,5,7>: Cost 2 ins <3,4,5,u>, lane 3 + 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS + 1659227468U, // <3,4,6,0>: Cost 2 vext3 LHS, <4,6,0,2> + 2689838422U, // <3,4,6,1>: Cost 3 vext3 LHS, <4,6,1,3> + 2564417231U, // <3,4,6,2>: Cost 3 vext1 <2,3,4,6>, <2,3,4,6> + 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6> + 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6> + 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7> + 2131296256U, // <3,4,6,6>: Cost 2 ins , lane 0 + 2101600261U, // <3,4,6,7>: Cost 2 ins <3,4,u,u>, lane 5 + 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2> + 2666607610U, // <3,4,7,0>: Cost 3 vext2 , <7,0,1,2> + 2659972191U, // <3,4,7,1>: Cost 3 vext2 <7,1,3,4>, <7,1,3,4> + 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4> + 3177881601U, // <3,4,7,3>: Cost 3 ins <3,u,7,3>, lane 1 + 2666607974U, // <3,4,7,4>: Cost 3 vext2 , <7,4,5,6> + 3095396690U, // <3,4,7,5>: Cost 3 vtrnr <1,3,5,7>, <0,4,1,5> + 2131369984U, // <3,4,7,6>: Cost 2 ins , lane 0 + 2666608236U, // <3,4,7,7>: Cost 3 vext2 , <7,7,7,7> + 2131369984U, // <3,4,7,u>: Cost 2 ins , lane 0 + 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS + 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS + 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u> + 2101600261U, // <3,4,u,3>: Cost 2 ins <3,4,u,u>, lane 5 + 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS + 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS + 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS + 2101379075U, // <3,4,u,7>: Cost 2 ins <3,4,5,u>, lane 3 + 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS + 2832842752U, // <3,5,0,0>: Cost 3 vuzpr <2,3,4,5>, <0,0,0,0> + 2131476480U, // <3,5,0,1>: Cost 2 ins , lane 0 + 1698709606U, // <3,5,0,2>: Cost 2 vuzpl <3,4,5,6>, LHS + 2772451522U, // <3,5,0,3>: Cost 3 vuzpl <3,4,5,6>, <0,2,3,5> + 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1> + 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1> + 2960310647U, // <3,5,0,6>: Cost 3 vzipr <1,2,3,0>, <0,4,5,6> + 2131525632U, // <3,5,0,7>: Cost 2 ins , lane 0 + 1698709660U, // <3,5,0,u>: Cost 2 vuzpl <3,4,5,6>, LHS + 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS + 2832843572U, // <3,5,1,1>: Cost 3 vuzpr <2,3,4,5>, <1,1,1,1> + 2103689217U, // <3,5,1,2>: Cost 2 ins <3,u,1,2>, lane 1 + 1759101030U, // <3,5,1,3>: Cost 2 vuzpr <2,3,4,5>, LHS + 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5> + 2772452352U, // <3,5,1,5>: Cost 3 vuzpl <3,4,5,6>, <1,3,5,7> + 3205332992U, // <3,5,1,6>: Cost 3 ins , lane 0 + 2027212086U, // <3,5,1,7>: Cost 2 vtrnr <2,3,0,1>, RHS + 2027212087U, // <3,5,1,u>: Cost 2 vtrnr <2,3,0,1>, RHS + 2832843670U, // <3,5,2,0>: Cost 3 vuzpr <2,3,4,5>, <1,2,3,0> + 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5> + 2832842916U, // <3,5,2,2>: Cost 3 vuzpr <2,3,4,5>, <0,2,0,2> + 2131640320U, // <3,5,2,3>: Cost 2 ins , lane 0 + 2832842936U, // <3,5,2,4>: Cost 3 vuzpr <2,3,4,5>, <0,2,2,4> + 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3> + 2959002114U, // <3,5,2,6>: Cost 3 vzipr <1,0,3,2>, <3,4,5,6> + 2131673088U, // <3,5,2,7>: Cost 2 ins , lane 0 + 2131640320U, // <3,5,2,u>: Cost 2 ins , lane 0 + 2772453922U, // <3,5,3,0>: Cost 3 vuzpl <3,4,5,6>, <3,5,0,2> + 2832844454U, // <3,5,3,1>: Cost 3 vuzpr <2,3,4,5>, <2,3,0,1> + 3177578497U, // <3,5,3,2>: Cost 3 ins <3,u,3,2>, lane 1 + 2103844865U, // <3,5,3,3>: Cost 2 ins <3,u,3,3>, lane 1 + 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6> + 1759102670U, // <3,5,3,5>: Cost 2 vuzpr <2,3,4,5>, <2,3,4,5> + 2959673858U, // <3,5,3,6>: Cost 3 vzipr <1,1,3,3>, <3,4,5,6> + 2021330230U, // <3,5,3,7>: Cost 2 vtrnr <1,3,1,3>, RHS + 2021330231U, // <3,5,3,u>: Cost 2 vtrnr <1,3,1,3>, RHS + 2832845308U, // <3,5,4,0>: Cost 3 vuzpr <2,3,4,5>, <3,4,5,0> + 2732969871U, // <3,5,4,1>: Cost 3 vext3 LHS, <5,4,1,5> + 2832844536U, // <3,5,4,2>: Cost 3 vuzpr <2,3,4,5>, <2,4,0,2> + 3177660417U, // <3,5,4,3>: Cost 3 ins <3,u,4,3>, lane 1 + 2832845312U, // <3,5,4,4>: Cost 3 vuzpr <2,3,4,5>, <3,4,5,4> + 2131804160U, // <3,5,4,5>: Cost 2 ins , lane 0 + 1698712886U, // <3,5,4,6>: Cost 2 vuzpl <3,4,5,6>, RHS + 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6> + 1698712904U, // <3,5,4,u>: Cost 2 vuzpl <3,4,5,6>, RHS + 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS + 2832846074U, // <3,5,5,1>: Cost 3 vuzpr <2,3,4,5>, <4,5,0,1> + 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5> + 2832845356U, // <3,5,5,3>: Cost 3 vuzpr <2,3,4,5>, <3,5,1,3> + 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS + 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5> + 2104016897U, // <3,5,5,6>: Cost 2 ins <3,u,5,6>, lane 1 + 1759104310U, // <3,5,5,7>: Cost 2 vuzpr <2,3,4,5>, RHS + 1759104311U, // <3,5,5,u>: Cost 2 vuzpr <2,3,4,5>, RHS + 2131910656U, // <3,5,6,0>: Cost 2 ins , lane 0 + 2131918848U, // <3,5,6,1>: Cost 2 ins , lane 0 + 2131927040U, // <3,5,6,2>: Cost 2 ins , lane 0 + 2131935232U, // <3,5,6,3>: Cost 2 ins , lane 0 + 2131943424U, // <3,5,6,4>: Cost 2 ins , lane 0 + 2131951616U, // <3,5,6,5>: Cost 2 ins , lane 0 + 2131959808U, // <3,5,6,6>: Cost 2 ins , lane 0 + 1058226176U, // <3,5,6,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <3,5,6,u>: Cost 1 ins RHS, lane 0 + 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS + 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7> + 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2> + 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2> + 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS + 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7> + 3095397528U, // <3,5,7,6>: Cost 3 vtrnr <1,3,5,7>, <1,5,4,6> + 2021657910U, // <3,5,7,7>: Cost 2 vtrnr <1,3,5,7>, RHS + 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS + 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS + 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u> + 1698715438U, // <3,5,u,2>: Cost 2 vuzpl <3,4,5,6>, LHS + 1759101597U, // <3,5,u,3>: Cost 2 vuzpr <2,3,4,5>, LHS + 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS + 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7> + 1698715802U, // <3,5,u,6>: Cost 2 vuzpl <3,4,5,6>, RHS + 1058226176U, // <3,5,u,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <3,5,u,u>: Cost 1 ins RHS, lane 0 + 2732970264U, // <3,6,0,0>: Cost 3 vext3 LHS, <6,0,0,2> + 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2> + 2132148224U, // <3,6,0,2>: Cost 2 ins , lane 0 + 3177365505U, // <3,6,0,3>: Cost 3 ins <3,u,0,3>, lane 1 + 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2> + 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7> + 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0> + 1886571830U, // <3,6,0,7>: Cost 2 vzipr <1,2,3,0>, RHS + 1886571831U, // <3,6,0,u>: Cost 2 vzipr <1,2,3,0>, RHS + 2720878954U, // <3,6,1,0>: Cost 3 vext3 <6,1,0,3>, <6,1,0,3> + 3205955584U, // <3,6,1,1>: Cost 3 ins , lane 0 + 2103689217U, // <3,6,1,2>: Cost 2 ins <3,u,1,2>, lane 1 + 2826731622U, // <3,6,1,3>: Cost 3 vuzpr <1,3,2,6>, LHS + 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6> + 3205988352U, // <3,6,1,5>: Cost 3 ins , lane 0 + 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3> + 2954349878U, // <3,6,1,7>: Cost 3 vzipr <0,2,3,1>, RHS + 2103689217U, // <3,6,1,u>: Cost 2 ins <3,u,1,2>, lane 1 + 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS + 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3> + 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6> + 2132303872U, // <3,6,2,3>: Cost 2 ins , lane 0 + 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6> + 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6> + 2826731724U, // <3,6,2,6>: Cost 3 vuzpr <1,3,2,6>, <0,2,4,6> + 1885261110U, // <3,6,2,7>: Cost 2 vzipr <1,0,3,2>, RHS + 1885261111U, // <3,6,2,u>: Cost 2 vzipr <1,0,3,2>, RHS + 3136876642U, // <3,6,3,0>: Cost 3 vtrnr , <5,6,7,0> + 3206103040U, // <3,6,3,1>: Cost 3 ins , lane 0 + 3001478044U, // <3,6,3,2>: Cost 3 vzipr , <4,0,6,2> + 2103844865U, // <3,6,3,3>: Cost 2 ins <3,u,3,3>, lane 1 + 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6> + 3206135808U, // <3,6,3,5>: Cost 3 ins , lane 0 + 1699457629U, // <3,6,3,6>: Cost 2 vuzpl <3,5,6,7>, <3,5,6,7> + 1885932854U, // <3,6,3,7>: Cost 2 vzipr <1,1,3,3>, RHS + 1885932855U, // <3,6,3,u>: Cost 2 vzipr <1,1,3,3>, RHS + 2732970588U, // <3,6,4,0>: Cost 3 vext3 LHS, <6,4,0,2> + 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3> + 2732970604U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,0> + 2906673714U, // <3,6,4,3>: Cost 3 vzipl <3,4,5,6>, <6,3,4,5> + 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6> + 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6> + 2132475904U, // <3,6,4,6>: Cost 2 ins , lane 0 + 1886604598U, // <3,6,4,7>: Cost 2 vzipr <1,2,3,4>, RHS + 1886604599U, // <3,6,4,u>: Cost 2 vzipr <1,2,3,4>, RHS + 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS + 3206250496U, // <3,6,5,1>: Cost 3 ins , lane 0 + 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7> + 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6> + 3040891442U, // <3,6,5,4>: Cost 3 vtrnl <3,4,5,6>, <6,3,4,5> + 3206283264U, // <3,6,5,5>: Cost 3 ins , lane 0 + 2104016897U, // <3,6,5,6>: Cost 2 ins <3,u,5,6>, lane 1 + 2954382646U, // <3,6,5,7>: Cost 3 vzipr <0,2,3,5>, RHS + 2104016897U, // <3,6,5,u>: Cost 2 ins <3,u,5,6>, lane 1 + 2732970748U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,0> + 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3> + 2732970768U, // <3,6,6,2>: Cost 3 vext3 LHS, <6,6,2,2> + 3177807873U, // <3,6,6,3>: Cost 3 ins <3,u,6,3>, lane 1 + 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4> + 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7> + 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6> + 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7> + 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7> + 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1> + 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7> + 1611453282U, // <3,6,7,2>: Cost 2 vext3 LHS, <6,7,2,3> + 2968996198U, // <3,6,7,3>: Cost 3 vzipr <2,6,3,7>, <3,2,6,3> + 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5> + 2968995633U, // <3,6,7,5>: Cost 3 vzipr <2,6,3,7>, <2,4,6,5> + 1611453322U, // <3,6,7,6>: Cost 2 vext3 LHS, <6,7,6,7> + 1888619830U, // <3,6,7,7>: Cost 2 vzipr <1,5,3,7>, RHS + 1888619831U, // <3,6,7,u>: Cost 2 vzipr <1,5,3,7>, RHS + 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1> + 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2> + 2132148224U, // <3,6,u,2>: Cost 2 ins , lane 0 + 2132303872U, // <3,6,u,3>: Cost 2 ins , lane 0 + 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5> + 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6> + 2132475904U, // <3,6,u,6>: Cost 2 ins , lane 0 + 1885310262U, // <3,6,u,7>: Cost 2 vzipr <1,0,3,u>, RHS + 1885310263U, // <3,6,u,u>: Cost 2 vzipr <1,0,3,u>, RHS + 2826960896U, // <3,7,0,0>: Cost 3 vuzpr <1,3,5,7>, <0,0,0,0> + 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS + 2826960916U, // <3,7,0,2>: Cost 3 vuzpr <1,3,5,7>, <0,0,2,2> + 3002117840U, // <3,7,0,3>: Cost 3 vzipr , <5,1,7,3> + 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5> + 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0> + 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0> + 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1> + 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS + 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2> + 2826961716U, // <3,7,1,1>: Cost 3 vuzpr <1,3,5,7>, <1,1,1,1> + 2103689217U, // <3,7,1,2>: Cost 2 ins <3,u,1,2>, lane 1 + 1753219174U, // <3,7,1,3>: Cost 2 vuzpr <1,3,5,7>, LHS + 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS + 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7> + 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7> + 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3> + 1753219179U, // <3,7,1,u>: Cost 2 vuzpr <1,3,5,7>, LHS + 2826961814U, // <3,7,2,0>: Cost 3 vuzpr <1,3,5,7>, <1,2,3,0> + 3206692864U, // <3,7,2,1>: Cost 3 ins , lane 0 + 2826961060U, // <3,7,2,2>: Cost 3 vuzpr <1,3,5,7>, <0,2,0,2> + 2132967424U, // <3,7,2,3>: Cost 2 ins , lane 0 + 2826961818U, // <3,7,2,4>: Cost 3 vuzpr <1,3,5,7>, <1,2,3,4> + 2826961072U, // <3,7,2,5>: Cost 3 vuzpr <1,3,5,7>, <0,2,1,5> + 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7> + 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3> + 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7> + 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2> + 2826962598U, // <3,7,3,1>: Cost 3 vuzpr <1,3,5,7>, <2,3,0,1> + 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3> + 2103844865U, // <3,7,3,3>: Cost 2 ins <3,u,3,3>, lane 1 + 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6> + 2826962638U, // <3,7,3,5>: Cost 3 vuzpr <1,3,5,7>, <2,3,4,5> + 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7> + 1753220096U, // <3,7,3,7>: Cost 2 vuzpr <1,3,5,7>, <1,3,5,7> + 1753220096U, // <3,7,3,u>: Cost 2 vuzpr <1,3,5,7>, <1,3,5,7> + 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS + 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7> + 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7> + 3002150608U, // <3,7,4,3>: Cost 3 vzipr , <5,1,7,3> + 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS + 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS + 2826961244U, // <3,7,4,6>: Cost 3 vuzpr <1,3,5,7>, <0,4,2,6> + 2732971383U, // <3,7,4,7>: Cost 3 vext3 LHS, <7,4,7,5> + 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS + 2826963551U, // <3,7,5,0>: Cost 3 vuzpr <1,3,5,7>, <3,5,7,0> + 2826963552U, // <3,7,5,1>: Cost 3 vuzpr <1,3,5,7>, <3,5,7,1> + 2826962032U, // <3,7,5,2>: Cost 3 vuzpr <1,3,5,7>, <1,5,0,2> + 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0> + 2826963555U, // <3,7,5,4>: Cost 3 vuzpr <1,3,5,7>, <3,5,7,4> + 2826962044U, // <3,7,5,5>: Cost 3 vuzpr <1,3,5,7>, <1,5,1,5> + 2104016897U, // <3,7,5,6>: Cost 2 ins <3,u,5,6>, lane 1 + 1753222454U, // <3,7,5,7>: Cost 2 vuzpr <1,3,5,7>, RHS + 1753222455U, // <3,7,5,u>: Cost 2 vuzpr <1,3,5,7>, RHS + 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1> + 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0> + 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3> + 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0> + 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5> + 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4> + 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6> + 2133295104U, // <3,7,6,7>: Cost 2 ins , lane 0 + 2133295104U, // <3,7,6,u>: Cost 2 ins , lane 0 + 2962362223U, // <3,7,7,0>: Cost 3 vzipr <1,5,3,7>, <5,3,7,0> + 2826965109U, // <3,7,7,1>: Cost 3 vuzpr <1,3,5,7>, <5,7,0,1> + 2968998474U, // <3,7,7,2>: Cost 3 vzipr <2,6,3,7>, <6,3,7,2> + 2826963662U, // <3,7,7,3>: Cost 3 vuzpr <1,3,5,7>, <3,7,1,3> + 2962362227U, // <3,7,7,4>: Cost 3 vzipr <1,5,3,7>, <5,3,7,4> + 2826965149U, // <3,7,7,5>: Cost 3 vuzpr <1,3,5,7>, <5,7,4,5> + 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7> + 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7> + 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7> + 2826962300U, // <3,7,u,0>: Cost 3 vuzpr <1,3,5,7>, <1,u,3,0> + 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS + 2103689217U, // <3,7,u,2>: Cost 2 ins <3,u,1,2>, lane 1 + 1753219741U, // <3,7,u,3>: Cost 2 vuzpr <1,3,5,7>, LHS + 2826962304U, // <3,7,u,4>: Cost 3 vuzpr <1,3,5,7>, <1,u,3,4> + 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS + 1595545808U, // <3,7,u,6>: Cost 2 vext2 , + 1753222697U, // <3,7,u,7>: Cost 2 vuzpr <1,3,5,7>, RHS + 1753219746U, // <3,7,u,u>: Cost 2 vuzpr <1,3,5,7>, LHS + 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0> + 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, + 1696243814U, // <3,u,0,2>: Cost 2 vuzpl <3,0,u,2>, LHS + 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, + 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, + 1829951642U, // <3,u,0,5>: Cost 2 vzipl <3,0,1,2>, RHS + 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, + 1886571848U, // <3,u,0,7>: Cost 2 vzipr <1,2,3,0>, RHS + 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, + 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u> + 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u> + 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS + 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, + 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS + 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u> + 1964169370U, // <3,u,1,6>: Cost 2 vtrnl <3,0,1,2>, RHS + 2027212329U, // <3,u,1,7>: Cost 2 vtrnr <2,3,0,1>, RHS + 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS + 1659672428U, // <3,u,2,0>: Cost 2 vext3 LHS, + 2128969728U, // <3,u,2,1>: Cost 2 ins , lane 0 + 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u> + 1055244288U, // <3,u,2,3>: Cost 1 ins LHS, lane 0 + 1659672468U, // <3,u,2,4>: Cost 2 vext3 LHS, + 2129002496U, // <3,u,2,5>: Cost 2 ins , lane 0 + 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u> + 1885261128U, // <3,u,2,7>: Cost 2 vzipr <1,0,3,2>, RHS + 1055244288U, // <3,u,2,u>: Cost 1 ins LHS, lane 0 + 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, + 1616541639U, // <3,u,3,1>: Cost 2 vext3 LHS, + 1966315310U, // <3,u,3,2>: Cost 2 vtrnl <3,3,3,3>, LHS + 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS + 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, + 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, + 1966315674U, // <3,u,3,6>: Cost 2 vtrnl <3,3,3,3>, RHS + 1885932872U, // <3,u,3,7>: Cost 2 vzipr <1,1,3,3>, RHS + 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS + 2960344003U, // <3,u,4,0>: Cost 3 vzipr <1,2,3,4>, <1,2,u,0> + 1832933166U, // <3,u,4,1>: Cost 2 vzipl <3,4,5,6>, LHS + 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, + 1886601372U, // <3,u,4,3>: Cost 2 vzipr <1,2,3,4>, LHS + 1886602138U, // <3,u,4,4>: Cost 2 vzipr <1,2,3,4>, <1,2,3,4> + 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, + 1696247094U, // <3,u,4,6>: Cost 2 vuzpl <3,0,u,2>, RHS + 1886604616U, // <3,u,4,7>: Cost 2 vzipr <1,2,3,4>, RHS + 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, + 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS + 2128527360U, // <3,u,5,1>: Cost 2 ins , lane 0 + 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5> + 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, + 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS + 2027538126U, // <3,u,5,5>: Cost 2 vtrnr <2,3,4,5>, <2,3,4,5> + 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS + 1752935734U, // <3,u,5,7>: Cost 2 vuzpr <1,3,1,u>, RHS + 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS + 1663875248U, // <3,u,6,0>: Cost 2 vext3 LHS, + 2131918848U, // <3,u,6,1>: Cost 2 ins , lane 0 + 2128609280U, // <3,u,6,2>: Cost 2 ins , lane 0 + 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, + 1663875288U, // <3,u,6,4>: Cost 2 vext3 LHS, + 2131951616U, // <3,u,6,5>: Cost 2 ins , lane 0 + 2131296256U, // <3,u,6,6>: Cost 2 ins , lane 0 + 1058226176U, // <3,u,6,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <3,u,6,u>: Cost 1 ins RHS, lane 0 + 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS + 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7> + 2098896898U, // <3,u,7,2>: Cost 2 ins <3,0,u,2>, lane 2 + 2021655197U, // <3,u,7,3>: Cost 2 vtrnr <1,3,5,7>, LHS + 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS + 1659230515U, // <3,u,7,5>: Cost 2 vext3 LHS, + 2131369984U, // <3,u,7,6>: Cost 2 ins , lane 0 + 2021658153U, // <3,u,7,7>: Cost 2 vtrnr <1,3,5,7>, RHS + 2021655202U, // <3,u,7,u>: Cost 2 vtrnr <1,3,5,7>, LHS + 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, + 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, + 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS + 1055244288U, // <3,u,u,3>: Cost 1 ins LHS, lane 0 + 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, + 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, + 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS + 1058226176U, // <3,u,u,7>: Cost 1 ins RHS, lane 0 + 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS + 2128150528U, // <4,0,0,0>: Cost 2 ins , lane 0 + 2104860674U, // <4,0,0,1>: Cost 2 ins <4,0,u,1>, lane 2 + 1705607270U, // <4,0,0,2>: Cost 2 vuzpl <4,6,0,2>, LHS + 3178070019U, // <4,0,0,3>: Cost 3 ins <4,0,0,u>, lane 3 + 2909946194U, // <4,0,0,4>: Cost 3 vzipl <4,0,5,1>, <0,4,1,5> + 3178070019U, // <4,0,0,5>: Cost 3 ins <4,0,0,u>, lane 3 + 3183362049U, // <4,0,0,6>: Cost 3 ins <4,u,0,6>, lane 1 + 2109628417U, // <4,0,0,7>: Cost 2 ins <4,u,0,7>, lane 1 + 1705607324U, // <4,0,0,u>: Cost 2 vuzpl <4,6,0,2>, LHS + 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS + 2128232448U, // <4,0,1,1>: Cost 2 ins , lane 0 + 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS + 2833612902U, // <4,0,1,3>: Cost 3 vuzpr <2,4,6,0>, LHS + 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS + 2779350016U, // <4,0,1,5>: Cost 3 vuzpl <4,6,0,2>, <1,3,5,7> + 3202015232U, // <4,0,1,6>: Cost 3 ins , lane 0 + 2109702145U, // <4,0,1,7>: Cost 2 ins <4,u,1,7>, lane 1 + 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS + 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4> + 2104860674U, // <4,0,2,1>: Cost 2 ins <4,0,u,1>, lane 2 + 2128314368U, // <4,0,2,2>: Cost 2 ins , lane 0 + 2104918021U, // <4,0,2,3>: Cost 2 ins <4,0,u,u>, lane 5 + 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6> + 3044622465U, // <4,0,2,5>: Cost 3 vtrnl <4,1,2,3>, <0,1,5,3> + 2833613004U, // <4,0,2,6>: Cost 3 vuzpr <2,4,6,0>, <0,2,4,6> + 2109775873U, // <4,0,2,7>: Cost 2 ins <4,u,2,7>, lane 1 + 2104860674U, // <4,0,2,u>: Cost 2 ins <4,0,u,1>, lane 2 + 3202113536U, // <4,0,3,0>: Cost 3 ins , lane 0 + 2104860674U, // <4,0,3,1>: Cost 2 ins <4,0,u,1>, lane 2 + 2128388096U, // <4,0,3,2>: Cost 2 ins , lane 0 + 2779351452U, // <4,0,3,3>: Cost 3 vuzpl <4,6,0,2>, <3,3,3,3> + 3178627074U, // <4,0,3,4>: Cost 3 ins <4,0,u,4>, lane 2 + 2839512782U, // <4,0,3,5>: Cost 3 vuzpr <3,4,5,0>, <2,3,4,5> + 3178643458U, // <4,0,3,6>: Cost 3 ins <4,0,u,6>, lane 2 + 2109849601U, // <4,0,3,7>: Cost 2 ins <4,u,3,7>, lane 1 + 2104860674U, // <4,0,3,u>: Cost 2 ins <4,0,u,1>, lane 2 + 1705610572U, // <4,0,4,0>: Cost 2 vuzpl <4,6,0,2>, <4,6,0,2> + 2104860674U, // <4,0,4,1>: Cost 2 ins <4,0,u,1>, lane 2 + 1974370406U, // <4,0,4,2>: Cost 2 vtrnl <4,6,4,6>, LHS + 3178364931U, // <4,0,4,3>: Cost 3 ins <4,0,4,u>, lane 3 + 2109898753U, // <4,0,4,4>: Cost 2 ins <4,u,4,4>, lane 1 + 2104918021U, // <4,0,4,5>: Cost 2 ins <4,0,u,u>, lane 5 + 1705610550U, // <4,0,4,6>: Cost 2 vuzpl <4,6,0,2>, RHS + 2109923329U, // <4,0,4,7>: Cost 2 ins <4,u,4,7>, lane 1 + 1705610568U, // <4,0,4,u>: Cost 2 vuzpl <4,6,0,2>, RHS + 1839644672U, // <4,0,5,0>: Cost 2 vzipl RHS, <0,0,0,0> + 765902950U, // <4,0,5,1>: Cost 1 vzipl RHS, LHS + 1839644836U, // <4,0,5,2>: Cost 2 vzipl RHS, <0,2,0,2> + 2104696835U, // <4,0,5,3>: Cost 2 ins <4,0,5,u>, lane 3 + 1839645010U, // <4,0,5,4>: Cost 2 vzipl RHS, <0,4,1,5> + 2109980673U, // <4,0,5,5>: Cost 2 ins <4,u,5,5>, lane 1 + 2104696835U, // <4,0,5,6>: Cost 2 ins <4,0,5,u>, lane 3 + 2104696835U, // <4,0,5,7>: Cost 2 ins <4,0,5,u>, lane 3 + 765903517U, // <4,0,5,u>: Cost 1 vzipl RHS, LHS + 1973862400U, // <4,0,6,0>: Cost 2 vtrnl RHS, <0,0,0,0> + 1973862410U, // <4,0,6,1>: Cost 2 vtrnl RHS, <0,0,1,1> + 900120678U, // <4,0,6,2>: Cost 1 vtrnl RHS, LHS + 2104770563U, // <4,0,6,3>: Cost 2 ins <4,0,6,u>, lane 3 + 1973862604U, // <4,0,6,4>: Cost 2 vtrnl RHS, <0,2,4,6> + 2104770563U, // <4,0,6,5>: Cost 2 ins <4,0,6,u>, lane 3 + 2110062593U, // <4,0,6,6>: Cost 2 ins <4,u,6,6>, lane 1 + 1036328961U, // <4,0,6,7>: Cost 1 ins RHS, lane 1 + 900120732U, // <4,0,6,u>: Cost 1 vtrnl RHS, LHS + 3202408448U, // <4,0,7,0>: Cost 3 ins , lane 0 + 2104860674U, // <4,0,7,1>: Cost 2 ins <4,0,u,1>, lane 2 + 2104868866U, // <4,0,7,2>: Cost 2 ins <4,0,u,2>, lane 2 + 3114049557U, // <4,0,7,3>: Cost 3 vtrnr <4,4,6,7>, <0,0,2,3> + 3178627074U, // <4,0,7,4>: Cost 3 ins <4,0,u,4>, lane 2 + 2779354470U, // <4,0,7,5>: Cost 3 vuzpl <4,6,0,2>, <7,4,5,6> + 2779354473U, // <4,0,7,6>: Cost 3 vuzpl <4,6,0,2>, <7,4,6,0> + 2110144513U, // <4,0,7,7>: Cost 2 ins <4,u,7,7>, lane 1 + 2104860674U, // <4,0,7,u>: Cost 2 ins <4,0,u,1>, lane 2 + 1974009856U, // <4,0,u,0>: Cost 2 vtrnl RHS, <0,0,0,0> + 767893606U, // <4,0,u,1>: Cost 1 vzipl RHS, LHS + 900268134U, // <4,0,u,2>: Cost 1 vtrnl RHS, LHS + 2104918021U, // <4,0,u,3>: Cost 2 ins <4,0,u,u>, lane 5 + 1974010060U, // <4,0,u,4>: Cost 2 vtrnl RHS, <0,2,4,6> + 2104918021U, // <4,0,u,5>: Cost 2 ins <4,0,u,u>, lane 5 + 1705613466U, // <4,0,u,6>: Cost 2 vuzpl <4,6,0,2>, RHS + 1036328961U, // <4,0,u,7>: Cost 1 ins RHS, lane 1 + 900268188U, // <4,0,u,u>: Cost 1 vtrnl RHS, LHS + 2600640614U, // <4,1,0,0>: Cost 3 vext1 , LHS + 2128822272U, // <4,1,0,1>: Cost 2 ins , lane 0 + 2109587457U, // <4,1,0,2>: Cost 2 ins <4,u,0,2>, lane 1 + 2128838656U, // <4,1,0,3>: Cost 2 ins , lane 0 + 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5> + 3047785472U, // <4,1,0,5>: Cost 3 vtrnl <4,6,0,2>, <1,3,5,7> + 3183362049U, // <4,1,0,6>: Cost 3 ins <4,u,0,6>, lane 1 + 2109628417U, // <4,1,0,7>: Cost 2 ins <4,u,0,7>, lane 1 + 2109587457U, // <4,1,0,u>: Cost 2 ins <4,u,0,2>, lane 1 + 3202629632U, // <4,1,1,0>: Cost 3 ins , lane 0 + 2128896000U, // <4,1,1,1>: Cost 2 ins , lane 0 + 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4> + 2128912384U, // <4,1,1,3>: Cost 2 ins , lane 0 + 3202662400U, // <4,1,1,4>: Cost 3 ins , lane 0 + 2958401874U, // <4,1,1,5>: Cost 3 vzipr <0,u,4,1>, <0,4,1,5> + 2778801323U, // <4,1,1,6>: Cost 3 vuzpl <4,5,1,7>, <1,5,6,7> + 2109702145U, // <4,1,1,7>: Cost 2 ins <4,u,1,7>, lane 1 + 2128896000U, // <4,1,1,u>: Cost 2 ins , lane 0 + 2128961536U, // <4,1,2,0>: Cost 2 ins , lane 0 + 2128969728U, // <4,1,2,1>: Cost 2 ins , lane 0 + 2128977920U, // <4,1,2,2>: Cost 2 ins , lane 0 + 1055244288U, // <4,1,2,3>: Cost 1 ins LHS, lane 0 + 2128994304U, // <4,1,2,4>: Cost 2 ins , lane 0 + 2129002496U, // <4,1,2,5>: Cost 2 ins , lane 0 + 2129010688U, // <4,1,2,6>: Cost 2 ins , lane 0 + 2129018880U, // <4,1,2,7>: Cost 2 ins , lane 0 + 1055244288U, // <4,1,2,u>: Cost 1 ins LHS, lane 0 + 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS + 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3> + 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4> + 2129059840U, // <4,1,3,3>: Cost 2 ins , lane 0 + 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS + 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7> + 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> + 2109849601U, // <4,1,3,7>: Cost 2 ins <4,u,3,7>, lane 1 + 2129059840U, // <4,1,3,u>: Cost 2 ins , lane 0 + 2600673382U, // <4,1,4,0>: Cost 3 vext1 , LHS + 1705061641U, // <4,1,4,1>: Cost 2 vuzpl <4,5,1,7>, <4,5,1,7> + 2912641946U, // <4,1,4,2>: Cost 3 vzipl <4,4,5,6>, <1,2,3,4> + 2040135782U, // <4,1,4,3>: Cost 2 vtrnr <4,4,4,4>, LHS + 2109898753U, // <4,1,4,4>: Cost 2 ins <4,u,4,4>, lane 1 + 2129149952U, // <4,1,4,5>: Cost 2 ins , lane 0 + 2109915137U, // <4,1,4,6>: Cost 2 ins <4,u,4,6>, lane 1 + 2109923329U, // <4,1,4,7>: Cost 2 ins <4,u,4,7>, lane 1 + 2109915137U, // <4,1,4,u>: Cost 2 ins <4,u,4,6>, lane 1 + 1479164242U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, <0,4,1,5> + 1839645492U, // <4,1,5,1>: Cost 2 vzipl RHS, <1,1,1,1> + 1839645590U, // <4,1,5,2>: Cost 2 vzipl RHS, <1,2,3,0> + 2016034918U, // <4,1,5,3>: Cost 2 vtrnr <0,4,1,5>, LHS + 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS + 1839645840U, // <4,1,5,5>: Cost 2 vzipl RHS, <1,5,3,7> + 3089776763U, // <4,1,5,6>: Cost 3 vtrnr <0,4,1,5>, <0,1,4,6> + 2109997057U, // <4,1,5,7>: Cost 2 ins <4,u,5,7>, lane 1 + 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS + 2110013441U, // <4,1,6,0>: Cost 2 ins <4,u,6,0>, lane 1 + 1973863220U, // <4,1,6,1>: Cost 2 vtrnl RHS, <1,1,1,1> + 2110029825U, // <4,1,6,2>: Cost 2 ins <4,u,6,2>, lane 1 + 2016116838U, // <4,1,6,3>: Cost 2 vtrnr <0,4,2,6>, LHS + 2110046209U, // <4,1,6,4>: Cost 2 ins <4,u,6,4>, lane 1 + 1973863424U, // <4,1,6,5>: Cost 2 vtrnl RHS, <1,3,5,7> + 2110062593U, // <4,1,6,6>: Cost 2 ins <4,u,6,6>, lane 1 + 1036328961U, // <4,1,6,7>: Cost 1 ins RHS, lane 1 + 1036328961U, // <4,1,6,u>: Cost 1 ins RHS, lane 1 + 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> + 3203080192U, // <4,1,7,1>: Cost 3 ins , lane 0 + 3203088384U, // <4,1,7,2>: Cost 3 ins , lane 0 + 2129354752U, // <4,1,7,3>: Cost 2 ins , lane 0 + 2664666470U, // <4,1,7,4>: Cost 3 vext2 <7,u,4,1>, <7,4,5,6> + 3203112960U, // <4,1,7,5>: Cost 3 ins , lane 0 + 3114049641U, // <4,1,7,6>: Cost 3 vtrnr <4,4,6,7>, <0,1,2,6> + 2110144513U, // <4,1,7,7>: Cost 2 ins <4,u,7,7>, lane 1 + 2129354752U, // <4,1,7,u>: Cost 2 ins , lane 0 + 1479188821U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, <0,4,1,u> + 1974010676U, // <4,1,u,1>: Cost 2 vtrnl RHS, <1,1,1,1> + 1841636246U, // <4,1,u,2>: Cost 2 vzipl RHS, <1,2,3,0> + 1055244288U, // <4,1,u,3>: Cost 1 ins LHS, lane 0 + 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS + 1974010880U, // <4,1,u,5>: Cost 2 vtrnl RHS, <1,3,5,7> + 2109915137U, // <4,1,u,6>: Cost 2 ins <4,u,4,6>, lane 1 + 1036328961U, // <4,1,u,7>: Cost 1 ins RHS, lane 1 + 1055244288U, // <4,1,u,u>: Cost 1 ins LHS, lane 0 + 3047786150U, // <4,2,0,0>: Cost 3 vtrnl <4,6,0,2>, <2,3,0,1> + 2109579265U, // <4,2,0,1>: Cost 2 ins <4,u,0,1>, lane 1 + 2129494016U, // <4,2,0,2>: Cost 2 ins , lane 0 + 2967019622U, // <4,2,0,3>: Cost 3 vzipr <2,3,4,0>, LHS + 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6> + 2909947747U, // <4,2,0,5>: Cost 3 vzipl <4,0,5,1>, <2,5,3,1> + 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4> + 2109628417U, // <4,2,0,7>: Cost 2 ins <4,u,0,7>, lane 1 + 2129494016U, // <4,2,0,u>: Cost 2 ins , lane 0 + 3203293184U, // <4,2,1,0>: Cost 3 ins , lane 0 + 3203301376U, // <4,2,1,1>: Cost 3 ins , lane 0 + 3203309568U, // <4,2,1,2>: Cost 3 ins , lane 0 + 2821242982U, // <4,2,1,3>: Cost 3 vuzpr <0,4,0,2>, LHS + 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> + 3203334144U, // <4,2,1,5>: Cost 3 ins , lane 0 + 3203342336U, // <4,2,1,6>: Cost 3 ins , lane 0 + 2109702145U, // <4,2,1,7>: Cost 2 ins <4,u,1,7>, lane 1 + 2109702145U, // <4,2,1,u>: Cost 2 ins <4,u,1,7>, lane 1 + 2229208824U, // <4,2,2,0>: Cost 3 vrev <2,4,0,2> + 2911397400U, // <4,2,2,1>: Cost 3 vzipl <4,2,6,7>, <2,1,2,3> + 2129641472U, // <4,2,2,2>: Cost 2 ins , lane 0 + 2129649664U, // <4,2,2,3>: Cost 2 ins , lane 0 + 2697954940U, // <4,2,2,4>: Cost 3 vext3 <2,2,4,4>, <2,2,4,4> + 2911397764U, // <4,2,2,5>: Cost 3 vzipl <4,2,6,7>, <2,5,6,7> + 2821243084U, // <4,2,2,6>: Cost 3 vuzpr <0,4,0,2>, <0,2,4,6> + 2109775873U, // <4,2,2,7>: Cost 2 ins <4,u,2,7>, lane 1 + 2129641472U, // <4,2,2,u>: Cost 2 ins , lane 0 + 2129698816U, // <4,2,3,0>: Cost 2 ins , lane 0 + 2229290754U, // <4,2,3,1>: Cost 3 vrev <2,4,1,3> + 3203457024U, // <4,2,3,2>: Cost 3 ins , lane 0 + 2129723392U, // <4,2,3,3>: Cost 2 ins , lane 0 + 2129731584U, // <4,2,3,4>: Cost 2 ins , lane 0 + 2833188558U, // <4,2,3,5>: Cost 3 vuzpr <2,4,0,2>, <2,3,4,5> + 3203489792U, // <4,2,3,6>: Cost 3 ins , lane 0 + 2109849601U, // <4,2,3,7>: Cost 2 ins <4,u,3,7>, lane 1 + 2129698816U, // <4,2,3,u>: Cost 2 ins , lane 0 + 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS + 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4> + 1702448074U, // <4,2,4,2>: Cost 2 vuzpl <4,1,2,3>, <4,1,2,3> + 1905918054U, // <4,2,4,3>: Cost 2 vzipr <4,4,4,4>, LHS + 2109898753U, // <4,2,4,4>: Cost 2 ins <4,u,4,4>, lane 1 + 2109906945U, // <4,2,4,5>: Cost 2 ins <4,u,4,5>, lane 1 + 2129821696U, // <4,2,4,6>: Cost 2 ins , lane 0 + 2109923329U, // <4,2,4,7>: Cost 2 ins <4,u,4,7>, lane 1 + 2129821696U, // <4,2,4,u>: Cost 2 ins , lane 0 + 3089777558U, // <4,2,5,0>: Cost 3 vtrnr <0,4,1,5>, <1,2,3,0> + 2109947905U, // <4,2,5,1>: Cost 2 ins <4,u,5,1>, lane 1 + 1839646312U, // <4,2,5,2>: Cost 2 vzipl RHS, <2,2,2,2> + 1893318758U, // <4,2,5,3>: Cost 2 vzipr <2,3,4,5>, LHS + 3089777562U, // <4,2,5,4>: Cost 3 vtrnr <0,4,1,5>, <1,2,3,4> + 2109980673U, // <4,2,5,5>: Cost 2 ins <4,u,5,5>, lane 1 + 1839646650U, // <4,2,5,6>: Cost 2 vzipl RHS, <2,6,3,7> + 2109997057U, // <4,2,5,7>: Cost 2 ins <4,u,5,7>, lane 1 + 1893318763U, // <4,2,5,u>: Cost 2 vzipr <2,3,4,5>, LHS + 1479246172U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, <0,4,2,6> + 2110021633U, // <4,2,6,1>: Cost 2 ins <4,u,6,1>, lane 1 + 1973864040U, // <4,2,6,2>: Cost 2 vtrnl RHS, <2,2,2,2> + 1880719462U, // <4,2,6,3>: Cost 2 vzipr <0,2,4,6>, LHS + 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS + 2110054401U, // <4,2,6,5>: Cost 2 ins <4,u,6,5>, lane 1 + 2110062593U, // <4,2,6,6>: Cost 2 ins <4,u,6,6>, lane 1 + 1036328961U, // <4,2,6,7>: Cost 1 ins RHS, lane 1 + 1036328961U, // <4,2,6,u>: Cost 1 ins RHS, lane 1 + 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> + 3203743744U, // <4,2,7,1>: Cost 3 ins , lane 0 + 3203751936U, // <4,2,7,2>: Cost 3 ins , lane 0 + 2130018304U, // <4,2,7,3>: Cost 2 ins , lane 0 + 3102032794U, // <4,2,7,4>: Cost 3 vtrnr <2,4,5,7>, <1,2,3,4> + 2229618474U, // <4,2,7,5>: Cost 3 vrev <2,4,5,7> + 3203784704U, // <4,2,7,6>: Cost 3 ins , lane 0 + 2110144513U, // <4,2,7,7>: Cost 2 ins <4,u,7,7>, lane 1 + 2130018304U, // <4,2,7,u>: Cost 2 ins , lane 0 + 1479262558U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, <0,4,2,u> + 2109947905U, // <4,2,u,1>: Cost 2 ins <4,u,5,1>, lane 1 + 1974011496U, // <4,2,u,2>: Cost 2 vtrnl RHS, <2,2,2,2> + 1880735846U, // <4,2,u,3>: Cost 2 vzipr <0,2,4,u>, LHS + 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS + 2109980673U, // <4,2,u,5>: Cost 2 ins <4,u,5,5>, lane 1 + 1841637306U, // <4,2,u,6>: Cost 2 vzipl RHS, <2,6,3,7> + 1036328961U, // <4,2,u,7>: Cost 1 ins RHS, lane 1 + 1036328961U, // <4,2,u,u>: Cost 1 ins RHS, lane 1 + 3203883008U, // <4,3,0,0>: Cost 3 ins , lane 0 + 2130149376U, // <4,3,0,1>: Cost 2 ins , lane 0 + 2109587457U, // <4,3,0,2>: Cost 2 ins <4,u,0,2>, lane 1 + 3047786908U, // <4,3,0,3>: Cost 3 vtrnl <4,6,0,2>, <3,3,3,3> + 2967020442U, // <4,3,0,4>: Cost 3 vzipr <2,3,4,0>, <1,2,3,4> + 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0> + 3183362049U, // <4,3,0,6>: Cost 3 ins <4,u,0,6>, lane 1 + 2109628417U, // <4,3,0,7>: Cost 2 ins <4,u,0,7>, lane 1 + 2130149376U, // <4,3,0,u>: Cost 2 ins , lane 0 + 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1> + 3203964928U, // <4,3,1,1>: Cost 3 ins , lane 0 + 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4> + 2130239488U, // <4,3,1,3>: Cost 2 ins , lane 0 + 2967028634U, // <4,3,1,4>: Cost 3 vzipr <2,3,4,1>, <1,2,3,4> + 3203997696U, // <4,3,1,5>: Cost 3 ins , lane 0 + 2821398633U, // <4,3,1,6>: Cost 3 vuzpr <0,4,2,3>, <0,1,2,6> + 2109702145U, // <4,3,1,7>: Cost 2 ins <4,u,1,7>, lane 1 + 2130239488U, // <4,3,1,u>: Cost 2 ins , lane 0 + 3204030464U, // <4,3,2,0>: Cost 3 ins , lane 0 + 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3> + 3204046848U, // <4,3,2,2>: Cost 3 ins , lane 0 + 2130313216U, // <4,3,2,3>: Cost 2 ins , lane 0 + 2833269658U, // <4,3,2,4>: Cost 3 vuzpr <2,4,1,3>, <1,2,3,4> + 3101624014U, // <4,3,2,5>: Cost 3 vtrnr <2,4,0,2>, <2,3,4,5> + 3204079616U, // <4,3,2,6>: Cost 3 ins , lane 0 + 2109775873U, // <4,3,2,7>: Cost 2 ins <4,u,2,7>, lane 1 + 2130313216U, // <4,3,2,u>: Cost 2 ins , lane 0 + 3204104192U, // <4,3,3,0>: Cost 3 ins , lane 0 + 2779564182U, // <4,3,3,1>: Cost 3 vuzpl <4,6,3,1>, <3,0,1,2> + 2636810580U, // <4,3,3,2>: Cost 3 vext2 <3,2,4,3>, <3,2,4,3> + 2130386944U, // <4,3,3,3>: Cost 2 ins , lane 0 + 2965717914U, // <4,3,3,4>: Cost 3 vzipr <2,1,4,3>, <1,2,3,4> + 2779597314U, // <4,3,3,5>: Cost 3 vuzpl <4,6,3,5>, <3,4,5,6> + 2778950237U, // <4,3,3,6>: Cost 3 vuzpl <4,5,3,7>, <3,5,6,7> + 2109849601U, // <4,3,3,7>: Cost 2 ins <4,u,3,7>, lane 1 + 2130386944U, // <4,3,3,u>: Cost 2 ins , lane 0 + 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1> + 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2> + 3183624193U, // <4,3,4,2>: Cost 3 ins <4,u,4,2>, lane 1 + 1747657049U, // <4,3,4,3>: Cost 2 vuzpr <0,4,2,3>, <0,4,2,3> + 2109898753U, // <4,3,4,4>: Cost 2 ins <4,u,4,4>, lane 1 + 2130477056U, // <4,3,4,5>: Cost 2 ins , lane 0 + 2109915137U, // <4,3,4,6>: Cost 2 ins <4,u,4,6>, lane 1 + 2109923329U, // <4,3,4,7>: Cost 2 ins <4,u,4,7>, lane 1 + 2130477056U, // <4,3,4,u>: Cost 2 ins , lane 0 + 1839646870U, // <4,3,5,0>: Cost 2 vzipl RHS, <3,0,1,2> + 2109947905U, // <4,3,5,1>: Cost 2 ins <4,u,5,1>, lane 1 + 2967061238U, // <4,3,5,2>: Cost 3 vzipr <2,3,4,5>, <1,0,3,2> + 1839647132U, // <4,3,5,3>: Cost 2 vzipl RHS, <3,3,3,3> + 1839647234U, // <4,3,5,4>: Cost 2 vzipl RHS, <3,4,5,6> + 2109980673U, // <4,3,5,5>: Cost 2 ins <4,u,5,5>, lane 1 + 2913389176U, // <4,3,5,6>: Cost 3 vzipl RHS, <3,6,0,7> + 2130567168U, // <4,3,5,7>: Cost 2 ins , lane 0 + 1839647518U, // <4,3,5,u>: Cost 2 vzipl RHS, <3,u,1,2> + 2110013441U, // <4,3,6,0>: Cost 2 ins <4,u,6,0>, lane 1 + 1973864598U, // <4,3,6,1>: Cost 2 vtrnl RHS, <3,0,1,2> + 2110029825U, // <4,3,6,2>: Cost 2 ins <4,u,6,2>, lane 1 + 1973864860U, // <4,3,6,3>: Cost 2 vtrnl RHS, <3,3,3,3> + 2110046209U, // <4,3,6,4>: Cost 2 ins <4,u,6,4>, lane 1 + 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6> + 2110062593U, // <4,3,6,6>: Cost 2 ins <4,u,6,6>, lane 1 + 1036328961U, // <4,3,6,7>: Cost 1 ins RHS, lane 1 + 1036328961U, // <4,3,6,u>: Cost 1 ins RHS, lane 1 + 3204399104U, // <4,3,7,0>: Cost 3 ins , lane 0 + 3204407296U, // <4,3,7,1>: Cost 3 ins , lane 0 + 2660701368U, // <4,3,7,2>: Cost 3 vext2 <7,2,4,3>, <7,2,4,3> + 3204423680U, // <4,3,7,3>: Cost 3 ins , lane 0 + 2968404890U, // <4,3,7,4>: Cost 3 vzipr <2,5,4,7>, <1,2,3,4> + 3204440064U, // <4,3,7,5>: Cost 3 ins , lane 0 + 2235664908U, // <4,3,7,6>: Cost 3 vrev <3,4,6,7> + 2110144513U, // <4,3,7,7>: Cost 2 ins <4,u,7,7>, lane 1 + 2110144513U, // <4,3,7,u>: Cost 2 ins <4,u,7,7>, lane 1 + 1841637526U, // <4,3,u,0>: Cost 2 vzipl RHS, <3,0,1,2> + 1974012054U, // <4,3,u,1>: Cost 2 vtrnl RHS, <3,0,1,2> + 2109587457U, // <4,3,u,2>: Cost 2 ins <4,u,0,2>, lane 1 + 1974012316U, // <4,3,u,3>: Cost 2 vtrnl RHS, <3,3,3,3> + 1841637890U, // <4,3,u,4>: Cost 2 vzipl RHS, <3,4,5,6> + 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u> + 2109915137U, // <4,3,u,6>: Cost 2 ins <4,u,4,6>, lane 1 + 1036328961U, // <4,3,u,7>: Cost 1 ins RHS, lane 1 + 1036328961U, // <4,3,u,u>: Cost 1 ins RHS, lane 1 + 1974046028U, // <4,4,0,0>: Cost 2 vtrnl <4,6,0,2>, <4,6,0,2> + 2107572229U, // <4,4,0,1>: Cost 2 ins <4,4,u,u>, lane 5 + 1705934950U, // <4,4,0,2>: Cost 2 vuzpl <4,6,4,6>, LHS + 3180724227U, // <4,4,0,3>: Cost 3 ins <4,4,0,u>, lane 3 + 2107539458U, // <4,4,0,4>: Cost 2 ins <4,4,u,4>, lane 2 + 2107547650U, // <4,4,0,5>: Cost 2 ins <4,4,u,5>, lane 2 + 1974046006U, // <4,4,0,6>: Cost 2 vtrnl <4,6,0,2>, RHS + 2109628417U, // <4,4,0,7>: Cost 2 ins <4,u,0,7>, lane 1 + 1974046024U, // <4,4,0,u>: Cost 2 vtrnl <4,6,0,2>, RHS + 3204620288U, // <4,4,1,0>: Cost 3 ins , lane 0 + 1836665802U, // <4,4,1,1>: Cost 2 vzipl <4,1,2,3>, <4,1,2,3> + 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3> + 1771700326U, // <4,4,1,3>: Cost 2 vuzpr <4,4,4,4>, LHS + 2107539458U, // <4,4,1,4>: Cost 2 ins <4,4,u,4>, lane 2 + 2130919424U, // <4,4,1,5>: Cost 2 ins , lane 0 + 2107555842U, // <4,4,1,6>: Cost 2 ins <4,4,u,6>, lane 2 + 2109702145U, // <4,4,1,7>: Cost 2 ins <4,u,1,7>, lane 1 + 2130919424U, // <4,4,1,u>: Cost 2 ins , lane 0 + 2779678374U, // <4,4,2,0>: Cost 3 vuzpl <4,6,4,6>, <2,3,0,1> + 3044625673U, // <4,4,2,1>: Cost 3 vtrnl <4,1,2,3>, <4,5,1,7> + 1970883530U, // <4,4,2,2>: Cost 2 vtrnl <4,1,2,3>, <4,1,2,3> + 2107572229U, // <4,4,2,3>: Cost 2 ins <4,4,u,u>, lane 5 + 2107539458U, // <4,4,2,4>: Cost 2 ins <4,4,u,4>, lane 2 + 2107547650U, // <4,4,2,5>: Cost 2 ins <4,4,u,5>, lane 2 + 2131001344U, // <4,4,2,6>: Cost 2 ins , lane 0 + 2109775873U, // <4,4,2,7>: Cost 2 ins <4,u,2,7>, lane 1 + 2107572229U, // <4,4,2,u>: Cost 2 ins <4,4,u,u>, lane 5 + 3181248514U, // <4,4,3,0>: Cost 3 ins <4,4,u,0>, lane 2 + 2779678870U, // <4,4,3,1>: Cost 3 vuzpl <4,6,4,6>, <3,0,1,2> + 3181264898U, // <4,4,3,2>: Cost 3 ins <4,4,u,2>, lane 2 + 1880031352U, // <4,4,3,3>: Cost 2 vzipr <0,1,4,3>, <0,1,4,3> + 2107539458U, // <4,4,3,4>: Cost 2 ins <4,4,u,4>, lane 2 + 2107547650U, // <4,4,3,5>: Cost 2 ins <4,4,u,5>, lane 2 + 2107555842U, // <4,4,3,6>: Cost 2 ins <4,4,u,6>, lane 2 + 2109849601U, // <4,4,3,7>: Cost 2 ins <4,u,3,7>, lane 1 + 2107547650U, // <4,4,3,u>: Cost 2 ins <4,4,u,5>, lane 2 + 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS + 2107277315U, // <4,4,4,1>: Cost 2 ins <4,4,4,u>, lane 3 + 2107277315U, // <4,4,4,2>: Cost 2 ins <4,4,4,u>, lane 3 + 2107277315U, // <4,4,4,3>: Cost 2 ins <4,4,4,u>, lane 3 + 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS + 2107547650U, // <4,4,4,5>: Cost 2 ins <4,4,u,5>, lane 2 + 1705938230U, // <4,4,4,6>: Cost 2 vuzpl <4,6,4,6>, RHS + 2109923329U, // <4,4,4,7>: Cost 2 ins <4,u,4,7>, lane 1 + 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS + 1839647634U, // <4,4,5,0>: Cost 2 vzipl RHS, <4,0,5,1> + 2109947905U, // <4,4,5,1>: Cost 2 ins <4,u,5,1>, lane 1 + 2107351043U, // <4,4,5,2>: Cost 2 ins <4,4,5,u>, lane 3 + 2107351043U, // <4,4,5,3>: Cost 2 ins <4,4,5,u>, lane 3 + 1839647952U, // <4,4,5,4>: Cost 2 vzipl RHS, <4,4,4,4> + 765906230U, // <4,4,5,5>: Cost 1 vzipl RHS, RHS + 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS + 2107351043U, // <4,4,5,7>: Cost 2 ins <4,4,5,u>, lane 3 + 765906473U, // <4,4,5,u>: Cost 1 vzipl RHS, RHS + 1973865804U, // <4,4,6,0>: Cost 2 vtrnl RHS, <4,6,0,2> + 2107424771U, // <4,4,6,1>: Cost 2 ins <4,4,6,u>, lane 3 + 2110029825U, // <4,4,6,2>: Cost 2 ins <4,u,6,2>, lane 1 + 2107424771U, // <4,4,6,3>: Cost 2 ins <4,4,6,u>, lane 3 + 1973865680U, // <4,4,6,4>: Cost 2 vtrnl RHS, <4,4,4,4> + 1973865362U, // <4,4,6,5>: Cost 2 vtrnl RHS, <4,0,5,1> + 900123958U, // <4,4,6,6>: Cost 1 vtrnl RHS, RHS + 1036328961U, // <4,4,6,7>: Cost 1 ins RHS, lane 1 + 900123976U, // <4,4,6,u>: Cost 1 vtrnl RHS, RHS + 3181248514U, // <4,4,7,0>: Cost 3 ins <4,4,u,0>, lane 2 + 2779681786U, // <4,4,7,1>: Cost 3 vuzpl <4,6,4,6>, <7,0,1,2> + 3181264898U, // <4,4,7,2>: Cost 3 ins <4,4,u,2>, lane 2 + 2845442636U, // <4,4,7,3>: Cost 3 vuzpr <4,4,4,4>, <0,7,2,3> + 2107539458U, // <4,4,7,4>: Cost 2 ins <4,4,u,4>, lane 2 + 2107547650U, // <4,4,7,5>: Cost 2 ins <4,4,u,5>, lane 2 + 2131369984U, // <4,4,7,6>: Cost 2 ins , lane 0 + 2040311013U, // <4,4,7,7>: Cost 2 vtrnr <4,4,6,7>, <4,4,6,7> + 2107547650U, // <4,4,7,u>: Cost 2 ins <4,4,u,5>, lane 2 + 1974013260U, // <4,4,u,0>: Cost 2 vtrnl RHS, <4,6,0,2> + 2107572229U, // <4,4,u,1>: Cost 2 ins <4,4,u,u>, lane 5 + 1705940782U, // <4,4,u,2>: Cost 2 vuzpl <4,6,4,6>, LHS + 2107572229U, // <4,4,u,3>: Cost 2 ins <4,4,u,u>, lane 5 + 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS + 767896886U, // <4,4,u,5>: Cost 1 vzipl RHS, RHS + 900271414U, // <4,4,u,6>: Cost 1 vtrnl RHS, RHS + 1036328961U, // <4,4,u,7>: Cost 1 ins RHS, lane 1 + 900271432U, // <4,4,u,u>: Cost 1 vtrnl RHS, RHS + 2108170242U, // <4,5,0,0>: Cost 2 ins <4,5,u,0>, lane 2 + 1034493957U, // <4,5,0,1>: Cost 1 ins RHS, lane 5 + 1707294822U, // <4,5,0,2>: Cost 2 vuzpl <4,u,5,1>, LHS + 2108194818U, // <4,5,0,3>: Cost 2 ins <4,5,u,3>, lane 2 + 2108203010U, // <4,5,0,4>: Cost 2 ins <4,5,u,4>, lane 2 + 2108211202U, // <4,5,0,5>: Cost 2 ins <4,5,u,5>, lane 2 + 2108219394U, // <4,5,0,6>: Cost 2 ins <4,5,u,6>, lane 2 + 1034485762U, // <4,5,0,7>: Cost 1 ins RHS, lane 2 + 1034493957U, // <4,5,0,u>: Cost 1 ins RHS, lane 5 + 2108170242U, // <4,5,1,0>: Cost 2 ins <4,5,u,0>, lane 2 + 2133540868U, // <4,5,1,1>: Cost 2 ins , lane 4 + 2133549060U, // <4,5,1,2>: Cost 2 ins , lane 4 + 1747599462U, // <4,5,1,3>: Cost 2 vuzpr <0,4,1,5>, LHS + 2108203010U, // <4,5,1,4>: Cost 2 ins <4,5,u,4>, lane 2 + 2133573636U, // <4,5,1,5>: Cost 2 ins , lane 4 + 2108219394U, // <4,5,1,6>: Cost 2 ins <4,5,u,6>, lane 2 + 1034485762U, // <4,5,1,7>: Cost 1 ins RHS, lane 2 + 1034485762U, // <4,5,1,u>: Cost 1 ins RHS, lane 2 + 2108170242U, // <4,5,2,0>: Cost 2 ins <4,5,u,0>, lane 2 + 2108178434U, // <4,5,2,1>: Cost 2 ins <4,5,u,1>, lane 2 + 2133622788U, // <4,5,2,2>: Cost 2 ins , lane 4 + 1059889156U, // <4,5,2,3>: Cost 1 ins LHS, lane 4 + 2108203010U, // <4,5,2,4>: Cost 2 ins <4,5,u,4>, lane 2 + 2108211202U, // <4,5,2,5>: Cost 2 ins <4,5,u,5>, lane 2 + 2133655556U, // <4,5,2,6>: Cost 2 ins , lane 4 + 1034485762U, // <4,5,2,7>: Cost 1 ins RHS, lane 2 + 1059889156U, // <4,5,2,u>: Cost 1 ins LHS, lane 4 + 2133680132U, // <4,5,3,0>: Cost 2 ins , lane 4 + 2108178434U, // <4,5,3,1>: Cost 2 ins <4,5,u,1>, lane 2 + 2133696516U, // <4,5,3,2>: Cost 2 ins , lane 4 + 2133704708U, // <4,5,3,3>: Cost 2 ins , lane 4 + 2133712900U, // <4,5,3,4>: Cost 2 ins , lane 4 + 2108211202U, // <4,5,3,5>: Cost 2 ins <4,5,u,5>, lane 2 + 2108219394U, // <4,5,3,6>: Cost 2 ins <4,5,u,6>, lane 2 + 1034485762U, // <4,5,3,7>: Cost 1 ins RHS, lane 2 + 1034485762U, // <4,5,3,u>: Cost 1 ins RHS, lane 2 + 2108170242U, // <4,5,4,0>: Cost 2 ins <4,5,u,0>, lane 2 + 2108178434U, // <4,5,4,1>: Cost 2 ins <4,5,u,1>, lane 2 + 2108186626U, // <4,5,4,2>: Cost 2 ins <4,5,u,2>, lane 2 + 2108194818U, // <4,5,4,3>: Cost 2 ins <4,5,u,3>, lane 2 + 2109898753U, // <4,5,4,4>: Cost 2 ins <4,u,4,4>, lane 1 + 1034493957U, // <4,5,4,5>: Cost 1 ins RHS, lane 5 + 1707298102U, // <4,5,4,6>: Cost 2 vuzpl <4,u,5,1>, RHS + 1034485762U, // <4,5,4,7>: Cost 1 ins RHS, lane 2 + 1034493957U, // <4,5,4,u>: Cost 1 ins RHS, lane 5 + 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS + 1839656656U, // <4,5,5,1>: Cost 2 vzipl RHS, <5,1,7,3> + 2108186626U, // <4,5,5,2>: Cost 2 ins <4,5,u,2>, lane 2 + 2108194818U, // <4,5,5,3>: Cost 2 ins <4,5,u,3>, lane 2 + 1839648710U, // <4,5,5,4>: Cost 2 vzipl RHS, <5,4,7,6> + 1839648772U, // <4,5,5,5>: Cost 2 vzipl RHS, <5,5,5,5> + 1839648866U, // <4,5,5,6>: Cost 2 vzipl RHS, <5,6,7,0> + 1034485762U, // <4,5,5,7>: Cost 1 ins RHS, lane 2 + 1034485762U, // <4,5,5,u>: Cost 1 ins RHS, lane 2 + 1034346499U, // <4,5,6,0>: Cost 1 ins RHS, lane 3 + 1034346499U, // <4,5,6,1>: Cost 1 ins RHS, lane 3 + 1034346499U, // <4,5,6,2>: Cost 1 ins RHS, lane 3 + 1034346499U, // <4,5,6,3>: Cost 1 ins RHS, lane 3 + 1034346499U, // <4,5,6,4>: Cost 1 ins RHS, lane 3 + 1034346499U, // <4,5,6,5>: Cost 1 ins RHS, lane 3 + 1034346499U, // <4,5,6,6>: Cost 1 ins RHS, lane 3 + 27705344U, // <4,5,6,7>: Cost 0 copy RHS + 27705344U, // <4,5,6,u>: Cost 0 copy RHS + 2133975044U, // <4,5,7,0>: Cost 2 ins , lane 4 + 2108178434U, // <4,5,7,1>: Cost 2 ins <4,5,u,1>, lane 2 + 2108186626U, // <4,5,7,2>: Cost 2 ins <4,5,u,2>, lane 2 + 2133999620U, // <4,5,7,3>: Cost 2 ins , lane 4 + 2134007812U, // <4,5,7,4>: Cost 2 ins , lane 4 + 2108211202U, // <4,5,7,5>: Cost 2 ins <4,5,u,5>, lane 2 + 2134024196U, // <4,5,7,6>: Cost 2 ins , lane 4 + 1034485762U, // <4,5,7,7>: Cost 1 ins RHS, lane 2 + 1034485762U, // <4,5,7,u>: Cost 1 ins RHS, lane 2 + 1034346499U, // <4,5,u,0>: Cost 1 ins RHS, lane 3 + 1034493957U, // <4,5,u,1>: Cost 1 ins RHS, lane 5 + 1034346499U, // <4,5,u,2>: Cost 1 ins RHS, lane 3 + 1059889156U, // <4,5,u,3>: Cost 1 ins LHS, lane 4 + 1034346499U, // <4,5,u,4>: Cost 1 ins RHS, lane 3 + 1034493957U, // <4,5,u,5>: Cost 1 ins RHS, lane 5 + 1034346499U, // <4,5,u,6>: Cost 1 ins RHS, lane 3 + 27705344U, // <4,5,u,7>: Cost 0 copy RHS + 27705344U, // <4,5,u,u>: Cost 0 copy RHS + 1705426944U, // <4,6,0,0>: Cost 2 vuzpl RHS, <0,0,0,0> + 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS + 631685222U, // <4,6,0,2>: Cost 1 vuzpl RHS, LHS + 2108309507U, // <4,6,0,3>: Cost 2 ins <4,6,0,u>, lane 3 + 1705427148U, // <4,6,0,4>: Cost 2 vuzpl RHS, <0,2,4,6> + 2108309507U, // <4,6,0,5>: Cost 2 ins <4,6,0,u>, lane 3 + 2108882946U, // <4,6,0,6>: Cost 2 ins <4,6,u,6>, lane 2 + 2108309507U, // <4,6,0,7>: Cost 2 ins <4,6,0,u>, lane 3 + 631685276U, // <4,6,0,u>: Cost 1 vuzpl RHS, LHS + 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2> + 1705427764U, // <4,6,1,1>: Cost 2 vuzpl RHS, <1,1,1,1> + 2108850178U, // <4,6,1,2>: Cost 2 ins <4,6,u,2>, lane 2 + 1747681382U, // <4,6,1,3>: Cost 2 vuzpr <0,4,2,6>, LHS + 2779169619U, // <4,6,1,4>: Cost 3 vuzpl RHS, <1,1,4,5> + 1705427968U, // <4,6,1,5>: Cost 2 vuzpl RHS, <1,3,5,7> + 2108882946U, // <4,6,1,6>: Cost 2 ins <4,6,u,6>, lane 2 + 2109702145U, // <4,6,1,7>: Cost 2 ins <4,u,1,7>, lane 1 + 1747681387U, // <4,6,1,u>: Cost 2 vuzpr <0,4,2,6>, LHS + 1705428646U, // <4,6,2,0>: Cost 2 vuzpl RHS, <2,3,0,1> + 2779170237U, // <4,6,2,1>: Cost 3 vuzpl RHS, <2,0,1,2> + 1705428584U, // <4,6,2,2>: Cost 2 vuzpl RHS, <2,2,2,2> + 1705428594U, // <4,6,2,3>: Cost 2 vuzpl RHS, <2,2,3,3> + 1705428686U, // <4,6,2,4>: Cost 2 vuzpl RHS, <2,3,4,5> + 2839560386U, // <4,6,2,5>: Cost 3 vuzpr <3,4,5,6>, <0,2,3,5> + 2108882946U, // <4,6,2,6>: Cost 2 ins <4,6,u,6>, lane 2 + 2109775873U, // <4,6,2,7>: Cost 2 ins <4,u,2,7>, lane 1 + 1705428639U, // <4,6,2,u>: Cost 2 vuzpl RHS, <2,2,u,3> + 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2> + 1705429142U, // <4,6,3,1>: Cost 2 vuzpl RHS, <3,0,1,2> + 2108850178U, // <4,6,3,2>: Cost 2 ins <4,6,u,2>, lane 2 + 1705429404U, // <4,6,3,3>: Cost 2 vuzpl RHS, <3,3,3,3> + 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6> + 1705429506U, // <4,6,3,5>: Cost 2 vuzpl RHS, <3,4,5,6> + 2108882946U, // <4,6,3,6>: Cost 2 ins <4,6,u,6>, lane 2 + 2132410368U, // <4,6,3,7>: Cost 2 ins , lane 0 + 1705429205U, // <4,6,3,u>: Cost 2 vuzpl RHS, <3,0,u,2> + 1705430348U, // <4,6,4,0>: Cost 2 vuzpl RHS, <4,6,0,2> + 2108604419U, // <4,6,4,1>: Cost 2 ins <4,6,4,u>, lane 3 + 2108850178U, // <4,6,4,2>: Cost 2 ins <4,6,u,2>, lane 2 + 2108604419U, // <4,6,4,3>: Cost 2 ins <4,6,4,u>, lane 3 + 1705430224U, // <4,6,4,4>: Cost 2 vuzpl RHS, <4,4,4,4> + 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS + 631688502U, // <4,6,4,6>: Cost 1 vuzpl RHS, RHS + 2108604419U, // <4,6,4,7>: Cost 2 ins <4,6,4,u>, lane 3 + 631688520U, // <4,6,4,u>: Cost 1 vuzpl RHS, RHS + 2839563567U, // <4,6,5,0>: Cost 3 vuzpr <3,4,5,6>, <4,5,6,0> + 1705439360U, // <4,6,5,1>: Cost 2 vuzpl RHS, <5,7,1,3> + 1839657466U, // <4,6,5,2>: Cost 2 vzipl RHS, <6,2,7,3> + 2839563570U, // <4,6,5,3>: Cost 3 vuzpr <3,4,5,6>, <4,5,6,3> + 2839563571U, // <4,6,5,4>: Cost 3 vuzpr <3,4,5,6>, <4,5,6,4> + 1705431044U, // <4,6,5,5>: Cost 2 vuzpl RHS, <5,5,5,5> + 1839649592U, // <4,6,5,6>: Cost 2 vzipl RHS, <6,6,6,6> + 1747684662U, // <4,6,5,7>: Cost 2 vuzpr <0,4,2,6>, RHS + 1747684663U, // <4,6,5,u>: Cost 2 vuzpr <0,4,2,6>, RHS + 1705431886U, // <4,6,6,0>: Cost 2 vuzpl RHS, <6,7,0,1> + 2110021633U, // <4,6,6,1>: Cost 2 ins <4,u,6,1>, lane 1 + 2110029825U, // <4,6,6,2>: Cost 2 ins <4,u,6,2>, lane 1 + 2110038017U, // <4,6,6,3>: Cost 2 ins <4,u,6,3>, lane 1 + 1705431926U, // <4,6,6,4>: Cost 2 vuzpl RHS, <6,7,4,5> + 2110054401U, // <4,6,6,5>: Cost 2 ins <4,u,6,5>, lane 1 + 1705431864U, // <4,6,6,6>: Cost 2 vuzpl RHS, <6,6,6,6> + 1036328961U, // <4,6,6,7>: Cost 1 ins RHS, lane 1 + 1036328961U, // <4,6,6,u>: Cost 1 ins RHS, lane 1 + 2132647936U, // <4,6,7,0>: Cost 2 ins , lane 0 + 1705432058U, // <4,6,7,1>: Cost 2 vuzpl RHS, <7,0,1,2> + 2108850178U, // <4,6,7,2>: Cost 2 ins <4,6,u,2>, lane 2 + 2779173980U, // <4,6,7,3>: Cost 3 vuzpl RHS, <7,1,3,1> + 2132680704U, // <4,6,7,4>: Cost 2 ins , lane 0 + 1705432422U, // <4,6,7,5>: Cost 2 vuzpl RHS, <7,4,5,6> + 2108882946U, // <4,6,7,6>: Cost 2 ins <4,6,u,6>, lane 2 + 1705432684U, // <4,6,7,7>: Cost 2 vuzpl RHS, <7,7,7,7> + 1705432121U, // <4,6,7,u>: Cost 2 vuzpl RHS, <7,0,u,2> + 1705433020U, // <4,6,u,0>: Cost 2 vuzpl RHS, + 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS + 631691054U, // <4,6,u,2>: Cost 1 vuzpl RHS, LHS + 1747681949U, // <4,6,u,3>: Cost 2 vuzpr <0,4,2,6>, LHS + 1705433060U, // <4,6,u,4>: Cost 2 vuzpl RHS, + 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS + 631691418U, // <4,6,u,6>: Cost 1 vuzpl RHS, RHS + 1036328961U, // <4,6,u,7>: Cost 1 ins RHS, lane 1 + 631691108U, // <4,6,u,u>: Cost 1 vuzpl RHS, LHS + 3206537216U, // <4,7,0,0>: Cost 3 ins , lane 0 + 2132803584U, // <4,7,0,1>: Cost 2 ins , lane 0 + 2109587457U, // <4,7,0,2>: Cost 2 ins <4,u,0,2>, lane 1 + 2845614101U, // <4,7,0,3>: Cost 3 vuzpr <4,4,6,7>, <0,0,2,3> + 3206569984U, // <4,7,0,4>: Cost 3 ins , lane 0 + 3047789926U, // <4,7,0,5>: Cost 3 vtrnl <4,6,0,2>, <7,4,5,6> + 3047789929U, // <4,7,0,6>: Cost 3 vtrnl <4,6,0,2>, <7,4,6,0> + 2109628417U, // <4,7,0,7>: Cost 2 ins <4,u,0,7>, lane 1 + 2132803584U, // <4,7,0,u>: Cost 2 ins , lane 0 + 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1> + 3206619136U, // <4,7,1,1>: Cost 3 ins , lane 0 + 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4> + 2132893696U, // <4,7,1,3>: Cost 2 ins , lane 0 + 3206643712U, // <4,7,1,4>: Cost 3 ins , lane 0 + 3206651904U, // <4,7,1,5>: Cost 3 ins , lane 0 + 2988265414U, // <4,7,1,6>: Cost 3 vzipr <5,u,4,1>, <5,4,7,6> + 2109702145U, // <4,7,1,7>: Cost 2 ins <4,u,1,7>, lane 1 + 2132893696U, // <4,7,1,u>: Cost 2 ins , lane 0 + 3206684672U, // <4,7,2,0>: Cost 3 ins , lane 0 + 3206692864U, // <4,7,2,1>: Cost 3 ins , lane 0 + 3206701056U, // <4,7,2,2>: Cost 3 ins , lane 0 + 2132967424U, // <4,7,2,3>: Cost 2 ins , lane 0 + 2833597338U, // <4,7,2,4>: Cost 3 vuzpr <2,4,5,7>, <1,2,3,4> + 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7> + 3206733824U, // <4,7,2,6>: Cost 3 ins , lane 0 + 2109775873U, // <4,7,2,7>: Cost 2 ins <4,u,2,7>, lane 1 + 2132967424U, // <4,7,2,u>: Cost 2 ins , lane 0 + 3206758400U, // <4,7,3,0>: Cost 3 ins , lane 0 + 3206766592U, // <4,7,3,1>: Cost 3 ins , lane 0 + 3047388245U, // <4,7,3,2>: Cost 3 vtrnl <4,5,3,7>, <7,1,2,3> + 3206782976U, // <4,7,3,3>: Cost 3 ins , lane 0 + 2989609062U, // <4,7,3,4>: Cost 3 vzipr <6,1,4,3>, <5,6,7,4> + 3206799360U, // <4,7,3,5>: Cost 3 ins , lane 0 + 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7> + 2109849601U, // <4,7,3,7>: Cost 2 ins <4,u,3,7>, lane 1 + 2109849601U, // <4,7,3,u>: Cost 2 ins <4,u,3,7>, lane 1 + 2583199846U, // <4,7,4,0>: Cost 3 vext1 <5,4,7,4>, LHS + 3048117242U, // <4,7,4,1>: Cost 3 vtrnl <4,6,4,6>, <7,0,1,2> + 3183624193U, // <4,7,4,2>: Cost 3 ins <4,u,4,2>, lane 1 + 2979659923U, // <4,7,4,3>: Cost 3 vzipr <4,4,4,4>, <0,1,7,3> + 2109898753U, // <4,7,4,4>: Cost 2 ins <4,u,4,4>, lane 1 + 2133131264U, // <4,7,4,5>: Cost 2 ins , lane 0 + 2109915137U, // <4,7,4,6>: Cost 2 ins <4,u,4,6>, lane 1 + 1771875557U, // <4,7,4,7>: Cost 2 vuzpr <4,4,6,7>, <4,4,6,7> + 2133131264U, // <4,7,4,u>: Cost 2 ins , lane 0 + 1839649786U, // <4,7,5,0>: Cost 2 vzipl RHS, <7,0,1,2> + 2109947905U, // <4,7,5,1>: Cost 2 ins <4,u,5,1>, lane 1 + 2913391781U, // <4,7,5,2>: Cost 3 vzipl RHS, <7,2,2,2> + 2913391843U, // <4,7,5,3>: Cost 3 vzipl RHS, <7,3,0,1> + 1839650150U, // <4,7,5,4>: Cost 2 vzipl RHS, <7,4,5,6> + 2109980673U, // <4,7,5,5>: Cost 2 ins <4,u,5,5>, lane 1 + 2913392145U, // <4,7,5,6>: Cost 3 vzipl RHS, <7,6,6,6> + 1839650412U, // <4,7,5,7>: Cost 2 vzipl RHS, <7,7,7,7> + 1839650434U, // <4,7,5,u>: Cost 2 vzipl RHS, <7,u,1,2> + 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS + 1973867514U, // <4,7,6,1>: Cost 2 vtrnl RHS, <7,0,1,2> + 2110029825U, // <4,7,6,2>: Cost 2 ins <4,u,6,2>, lane 1 + 2110038017U, // <4,7,6,3>: Cost 2 ins <4,u,6,3>, lane 1 + 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS + 1973867878U, // <4,7,6,5>: Cost 2 vtrnl RHS, <7,4,5,6> + 2110062593U, // <4,7,6,6>: Cost 2 ins <4,u,6,6>, lane 1 + 1036328961U, // <4,7,6,7>: Cost 1 ins RHS, lane 1 + 1036328961U, // <4,7,6,u>: Cost 1 ins RHS, lane 1 + 2914587642U, // <4,7,7,0>: Cost 3 vzipl <4,7,5,0>, <7,0,1,2> + 2779862010U, // <4,7,7,1>: Cost 3 vuzpl <4,6,7,1>, <7,0,1,2> + 2779247701U, // <4,7,7,2>: Cost 3 vuzpl <4,5,7,7>, <7,1,2,3> + 3207077888U, // <4,7,7,3>: Cost 3 ins , lane 0 + 2914620774U, // <4,7,7,4>: Cost 3 vzipl <4,7,5,4>, <7,4,5,6> + 2779895142U, // <4,7,7,5>: Cost 3 vuzpl <4,6,7,5>, <7,4,5,6> + 2992295878U, // <4,7,7,6>: Cost 3 vzipr <6,5,4,7>, <5,4,7,6> + 2133368832U, // <4,7,7,7>: Cost 2 ins , lane 0 + 2133368832U, // <4,7,7,u>: Cost 2 ins , lane 0 + 1841640442U, // <4,7,u,0>: Cost 2 vzipl RHS, <7,0,1,2> + 1974014970U, // <4,7,u,1>: Cost 2 vtrnl RHS, <7,0,1,2> + 2109587457U, // <4,7,u,2>: Cost 2 ins <4,u,0,2>, lane 1 + 2132893696U, // <4,7,u,3>: Cost 2 ins , lane 0 + 1841640806U, // <4,7,u,4>: Cost 2 vzipl RHS, <7,4,5,6> + 1974015334U, // <4,7,u,5>: Cost 2 vtrnl RHS, <7,4,5,6> + 2109915137U, // <4,7,u,6>: Cost 2 ins <4,u,4,6>, lane 1 + 1036328961U, // <4,7,u,7>: Cost 1 ins RHS, lane 1 + 1036328961U, // <4,7,u,u>: Cost 1 ins RHS, lane 1 + 1705574400U, // <4,u,0,0>: Cost 2 vuzpl RHS, <0,0,0,0> + 1034493957U, // <4,u,0,1>: Cost 1 ins RHS, lane 5 + 631832678U, // <4,u,0,2>: Cost 1 vuzpl RHS, LHS + 2108309507U, // <4,u,0,3>: Cost 2 ins <4,6,0,u>, lane 3 + 1705574604U, // <4,u,0,4>: Cost 2 vuzpl RHS, <0,2,4,6> + 2107547650U, // <4,u,0,5>: Cost 2 ins <4,4,u,5>, lane 2 + 1974048922U, // <4,u,0,6>: Cost 2 vtrnl <4,6,0,2>, RHS + 1034485762U, // <4,u,0,7>: Cost 1 ins RHS, lane 2 + 631832732U, // <4,u,0,u>: Cost 1 vuzpl RHS, LHS + 2108170242U, // <4,u,1,0>: Cost 2 ins <4,5,u,0>, lane 2 + 1705575220U, // <4,u,1,1>: Cost 2 vuzpl RHS, <1,1,1,1> + 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS + 1747624038U, // <4,u,1,3>: Cost 2 vuzpr <0,4,1,u>, LHS + 2107539458U, // <4,u,1,4>: Cost 2 ins <4,4,u,4>, lane 2 + 1705575424U, // <4,u,1,5>: Cost 2 vuzpl RHS, <1,3,5,7> + 2107555842U, // <4,u,1,6>: Cost 2 ins <4,4,u,6>, lane 2 + 1034485762U, // <4,u,1,7>: Cost 1 ins RHS, lane 2 + 1034485762U, // <4,u,1,u>: Cost 1 ins RHS, lane 2 + 1705576102U, // <4,u,2,0>: Cost 2 vuzpl RHS, <2,3,0,1> + 2104860674U, // <4,u,2,1>: Cost 2 ins <4,0,u,1>, lane 2 + 1705576040U, // <4,u,2,2>: Cost 2 vuzpl RHS, <2,2,2,2> + 1055244288U, // <4,u,2,3>: Cost 1 ins LHS, lane 0 + 1705576142U, // <4,u,2,4>: Cost 2 vuzpl RHS, <2,3,4,5> + 2107547650U, // <4,u,2,5>: Cost 2 ins <4,4,u,5>, lane 2 + 2131001344U, // <4,u,2,6>: Cost 2 ins , lane 0 + 1034485762U, // <4,u,2,7>: Cost 1 ins RHS, lane 2 + 1055244288U, // <4,u,2,u>: Cost 1 ins LHS, lane 0 + 2129698816U, // <4,u,3,0>: Cost 2 ins , lane 0 + 1705576598U, // <4,u,3,1>: Cost 2 vuzpl RHS, <3,0,1,2> + 2128388096U, // <4,u,3,2>: Cost 2 ins , lane 0 + 1705576860U, // <4,u,3,3>: Cost 2 vuzpl RHS, <3,3,3,3> + 2129731584U, // <4,u,3,4>: Cost 2 ins , lane 0 + 1705576962U, // <4,u,3,5>: Cost 2 vuzpl RHS, <3,4,5,6> + 2107555842U, // <4,u,3,6>: Cost 2 ins <4,4,u,6>, lane 2 + 1034485762U, // <4,u,3,7>: Cost 1 ins RHS, lane 2 + 1034485762U, // <4,u,3,u>: Cost 1 ins RHS, lane 2 + 1705577804U, // <4,u,4,0>: Cost 2 vuzpl RHS, <4,6,0,2> + 2104860674U, // <4,u,4,1>: Cost 2 ins <4,0,u,1>, lane 2 + 1974376238U, // <4,u,4,2>: Cost 2 vtrnl <4,6,4,6>, LHS + 2108604419U, // <4,u,4,3>: Cost 2 ins <4,6,4,u>, lane 3 + 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS + 1034493957U, // <4,u,4,5>: Cost 1 ins RHS, lane 5 + 631835958U, // <4,u,4,6>: Cost 1 vuzpl RHS, RHS + 1034485762U, // <4,u,4,7>: Cost 1 ins RHS, lane 2 + 631835976U, // <4,u,4,u>: Cost 1 vuzpl RHS, RHS + 1839650515U, // <4,u,5,0>: Cost 2 vzipl RHS, + 765908782U, // <4,u,5,1>: Cost 1 vzipl RHS, LHS + 1839650693U, // <4,u,5,2>: Cost 2 vzipl RHS, + 2016035485U, // <4,u,5,3>: Cost 2 vtrnr <0,4,1,5>, LHS + 1839650879U, // <4,u,5,4>: Cost 2 vzipl RHS, + 765909146U, // <4,u,5,5>: Cost 1 vzipl RHS, RHS + 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS + 1034485762U, // <4,u,5,7>: Cost 1 ins RHS, lane 2 + 765909349U, // <4,u,5,u>: Cost 1 vzipl RHS, LHS + 1034346499U, // <4,u,6,0>: Cost 1 ins RHS, lane 3 + 1034346499U, // <4,u,6,1>: Cost 1 ins RHS, lane 3 + 900126510U, // <4,u,6,2>: Cost 1 vtrnl RHS, LHS + 1034346499U, // <4,u,6,3>: Cost 1 ins RHS, lane 3 + 1034346499U, // <4,u,6,4>: Cost 1 ins RHS, lane 3 + 1034346499U, // <4,u,6,5>: Cost 1 ins RHS, lane 3 + 900126874U, // <4,u,6,6>: Cost 1 vtrnl RHS, RHS + 27705344U, // <4,u,6,7>: Cost 0 copy RHS + 27705344U, // <4,u,6,u>: Cost 0 copy RHS + 2133975044U, // <4,u,7,0>: Cost 2 ins , lane 4 + 1705579514U, // <4,u,7,1>: Cost 2 vuzpl RHS, <7,0,1,2> + 2104868866U, // <4,u,7,2>: Cost 2 ins <4,0,u,2>, lane 2 + 2129354752U, // <4,u,7,3>: Cost 2 ins , lane 0 + 2134007812U, // <4,u,7,4>: Cost 2 ins , lane 4 + 1705579878U, // <4,u,7,5>: Cost 2 vuzpl RHS, <7,4,5,6> + 2131369984U, // <4,u,7,6>: Cost 2 ins , lane 0 + 1034485762U, // <4,u,7,7>: Cost 1 ins RHS, lane 2 + 1034485762U, // <4,u,7,u>: Cost 1 ins RHS, lane 2 + 1034346499U, // <4,u,u,0>: Cost 1 ins RHS, lane 3 + 767899438U, // <4,u,u,1>: Cost 1 vzipl RHS, LHS + 631838510U, // <4,u,u,2>: Cost 1 vuzpl RHS, LHS + 1055244288U, // <4,u,u,3>: Cost 1 ins LHS, lane 0 + 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS + 767899802U, // <4,u,u,5>: Cost 1 vzipl RHS, RHS + 631838874U, // <4,u,u,6>: Cost 1 vuzpl RHS, RHS + 27705344U, // <4,u,u,7>: Cost 0 copy RHS + 27705344U, // <4,u,u,u>: Cost 0 copy RHS + 2128150528U, // <5,0,0,0>: Cost 2 ins , lane 0 + 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1> + 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2> + 2846220309U, // <5,0,0,3>: Cost 3 vuzpr <4,5,6,0>, <0,0,2,3> + 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5> + 2583318482U, // <5,0,0,5>: Cost 3 vext1 <5,5,0,0>, <5,5,0,0> + 3189334017U, // <5,0,0,6>: Cost 3 ins <5,u,0,6>, lane 1 + 2846223265U, // <5,0,0,7>: Cost 3 vuzpr <4,5,6,0>, <4,0,6,7> + 2128150528U, // <5,0,0,u>: Cost 2 ins , lane 0 + 1503608934U, // <5,0,1,0>: Cost 2 vext1 <4,5,0,1>, LHS + 1843003494U, // <5,0,1,1>: Cost 2 vzipl <5,1,7,3>, LHS + 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS + 2115641345U, // <5,0,1,3>: Cost 2 ins <5,u,1,3>, lane 1 + 1611612282U, // <5,0,1,4>: Cost 2 vext3 <0,1,4,5>, <0,1,4,5> + 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1> + 3202015232U, // <5,0,1,6>: Cost 3 ins , lane 0 + 3189415937U, // <5,0,1,7>: Cost 3 ins <5,u,1,7>, lane 1 + 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS + 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2> + 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> + 2128314368U, // <5,0,2,2>: Cost 2 ins , lane 0 + 2128322560U, // <5,0,2,3>: Cost 2 ins , lane 0 + 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5> + 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5> + 3189481473U, // <5,0,2,6>: Cost 3 ins <5,u,2,6>, lane 1 + 2595280262U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,5,0,2> + 2128314368U, // <5,0,2,u>: Cost 2 ins , lane 0 + 3202113536U, // <5,0,3,0>: Cost 3 ins , lane 0 + 2918047846U, // <5,0,3,1>: Cost 3 vzipl <5,3,7,0>, LHS + 2128388096U, // <5,0,3,2>: Cost 2 ins , lane 0 + 3189530625U, // <5,0,3,3>: Cost 3 ins <5,u,3,3>, lane 1 + 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0> + 2785315330U, // <5,0,3,5>: Cost 3 vuzpl <5,6,0,1>, <3,4,5,6> + 3202162688U, // <5,0,3,6>: Cost 3 ins , lane 0 + 2840323072U, // <5,0,3,7>: Cost 3 vuzpr <3,5,7,0>, <1,3,5,7> + 2128388096U, // <5,0,3,u>: Cost 2 ins , lane 0 + 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS + 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> + 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6> + 3184336899U, // <5,0,4,3>: Cost 3 ins <5,0,4,u>, lane 3 + 2687345005U, // <5,0,4,4>: Cost 3 vext3 <0,4,4,5>, <0,4,4,5> + 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS + 2846222850U, // <5,0,4,6>: Cost 3 vuzpr <4,5,6,0>, <3,4,5,6> + 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0> + 1845019293U, // <5,0,4,u>: Cost 2 vzipl <5,4,7,6>, LHS + 1772481839U, // <5,0,5,0>: Cost 2 vuzpr <4,5,6,0>, <4,5,6,0> + 1845526630U, // <5,0,5,1>: Cost 2 vzipl <5,5,5,5>, LHS + 1979744358U, // <5,0,5,2>: Cost 2 vtrnl <5,5,5,5>, LHS + 3189678081U, // <5,0,5,3>: Cost 3 ins <5,u,5,3>, lane 1 + 2919268690U, // <5,0,5,4>: Cost 3 vzipl <5,5,5,5>, <0,4,1,5> + 2115952641U, // <5,0,5,5>: Cost 2 ins <5,u,5,5>, lane 1 + 3202310144U, // <5,0,5,6>: Cost 3 ins , lane 0 + 2115969025U, // <5,0,5,7>: Cost 2 ins <5,u,5,7>, lane 1 + 1845527197U, // <5,0,5,u>: Cost 2 vzipl <5,5,5,5>, LHS + 2973777920U, // <5,0,6,0>: Cost 3 vzipr <3,4,5,6>, <0,0,0,0> + 1846296678U, // <5,0,6,1>: Cost 2 vzipl <5,6,7,0>, LHS + 2128609280U, // <5,0,6,2>: Cost 2 ins , lane 0 + 3189751809U, // <5,0,6,3>: Cost 3 ins <5,u,6,3>, lane 1 + 2920038738U, // <5,0,6,4>: Cost 3 vzipl <5,6,7,0>, <0,4,1,5> + 2920038866U, // <5,0,6,5>: Cost 3 vzipl <5,6,7,0>, <0,5,6,7> + 3189776385U, // <5,0,6,6>: Cost 3 ins <5,u,6,6>, lane 1 + 2128650240U, // <5,0,6,7>: Cost 2 ins , lane 0 + 1846297245U, // <5,0,6,u>: Cost 2 vzipl <5,6,7,0>, LHS + 2040971264U, // <5,0,7,0>: Cost 2 vtrnr RHS, <0,0,0,0> + 2040971274U, // <5,0,7,1>: Cost 2 vtrnr RHS, <0,0,1,1> + 2040971284U, // <5,0,7,2>: Cost 2 vtrnr RHS, <0,0,2,2> + 2116083713U, // <5,0,7,3>: Cost 2 ins <5,u,7,3>, lane 1 + 2116091905U, // <5,0,7,4>: Cost 2 ins <5,u,7,4>, lane 1 + 3114715316U, // <5,0,7,5>: Cost 3 vtrnr RHS, <3,0,4,5> + 2116108289U, // <5,0,7,6>: Cost 2 ins <5,u,7,6>, lane 1 + 2116116481U, // <5,0,7,7>: Cost 2 ins <5,u,7,7>, lane 1 + 2040971281U, // <5,0,7,u>: Cost 2 vtrnr RHS, <0,0,1,u> + 2040979456U, // <5,0,u,0>: Cost 2 vtrnr RHS, <0,0,0,0> + 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5> + 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS + 2115641345U, // <5,0,u,3>: Cost 2 ins <5,u,1,3>, lane 1 + 2116091905U, // <5,0,u,4>: Cost 2 ins <5,u,7,4>, lane 1 + 2115952641U, // <5,0,u,5>: Cost 2 ins <5,u,5,5>, lane 1 + 2116108289U, // <5,0,u,6>: Cost 2 ins <5,u,7,6>, lane 1 + 2115969025U, // <5,0,u,7>: Cost 2 ins <5,u,5,7>, lane 1 + 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS + 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0> + 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS + 1712324710U, // <5,1,0,2>: Cost 2 vuzpl <5,7,1,3>, LHS + 2111512578U, // <5,1,0,3>: Cost 2 ins <5,1,u,3>, lane 2 + 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5> + 2977710418U, // <5,1,0,5>: Cost 3 vzipr <4,1,5,0>, <0,4,1,5> + 3185278978U, // <5,1,0,6>: Cost 3 ins <5,1,u,6>, lane 2 + 3184705539U, // <5,1,0,7>: Cost 3 ins <5,1,0,u>, lane 3 + 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS + 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1> + 2128896000U, // <5,1,1,1>: Cost 2 ins , lane 0 + 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0> + 2115641345U, // <5,1,1,3>: Cost 2 ins <5,u,1,3>, lane 1 + 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5> + 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5> + 3189407745U, // <5,1,1,6>: Cost 3 ins <5,u,1,6>, lane 1 + 2982367283U, // <5,1,1,7>: Cost 3 vzipr <4,u,5,1>, <5,6,1,7> + 2115641345U, // <5,1,1,u>: Cost 2 ins <5,u,1,3>, lane 1 + 2128961536U, // <5,1,2,0>: Cost 2 ins , lane 0 + 2128969728U, // <5,1,2,1>: Cost 2 ins , lane 0 + 2128977920U, // <5,1,2,2>: Cost 2 ins , lane 0 + 1055244288U, // <5,1,2,3>: Cost 1 ins LHS, lane 0 + 2128994304U, // <5,1,2,4>: Cost 2 ins , lane 0 + 2129002496U, // <5,1,2,5>: Cost 2 ins , lane 0 + 2129010688U, // <5,1,2,6>: Cost 2 ins , lane 0 + 2129018880U, // <5,1,2,7>: Cost 2 ins , lane 0 + 1055244288U, // <5,1,2,u>: Cost 1 ins LHS, lane 0 + 2571468902U, // <5,1,3,0>: Cost 3 vext1 <3,5,1,3>, LHS + 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3> + 2571470542U, // <5,1,3,2>: Cost 3 vext1 <3,5,1,3>, <2,3,4,5> + 2129059840U, // <5,1,3,3>: Cost 2 ins , lane 0 + 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5> + 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7> + 2595361654U, // <5,1,3,6>: Cost 3 vext1 <7,5,1,3>, <6,7,4,5> + 2840331264U, // <5,1,3,7>: Cost 3 vuzpr <3,5,7,1>, <1,3,5,7> + 2129059840U, // <5,1,3,u>: Cost 2 ins , lane 0 + 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1> + 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5> + 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5> + 2111512578U, // <5,1,4,3>: Cost 2 ins <5,1,u,3>, lane 2 + 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4> + 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS + 1712327990U, // <5,1,4,6>: Cost 2 vuzpl <5,7,1,3>, RHS + 3185000451U, // <5,1,4,7>: Cost 3 ins <5,1,4,u>, lane 3 + 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1> + 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1> + 1712328832U, // <5,1,5,1>: Cost 2 vuzpl <5,7,1,3>, <5,7,1,3> + 2982398102U, // <5,1,5,2>: Cost 3 vzipr <4,u,5,5>, <3,0,1,2> + 2046853222U, // <5,1,5,3>: Cost 2 vtrnr <5,5,5,5>, LHS + 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5> + 2115952641U, // <5,1,5,5>: Cost 2 ins <5,u,5,5>, lane 1 + 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0> + 2115969025U, // <5,1,5,7>: Cost 2 ins <5,u,5,7>, lane 1 + 2046853227U, // <5,1,5,u>: Cost 2 vtrnr <5,5,5,5>, LHS + 2920039158U, // <5,1,6,0>: Cost 3 vzipl <5,6,7,0>, <1,0,3,2> + 2961834642U, // <5,1,6,1>: Cost 3 vzipr <1,4,5,6>, <0,u,1,1> + 2973780118U, // <5,1,6,2>: Cost 3 vzipr <3,4,5,6>, <3,0,1,2> + 2111512578U, // <5,1,6,3>: Cost 2 ins <5,1,u,3>, lane 2 + 2224227480U, // <5,1,6,4>: Cost 3 vrev <1,5,4,6> + 2973778258U, // <5,1,6,5>: Cost 3 vzipr <3,4,5,6>, <0,4,1,5> + 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6> + 2111553541U, // <5,1,6,7>: Cost 2 ins <5,1,u,u>, lane 5 + 2111512578U, // <5,1,6,u>: Cost 2 ins <5,1,u,3>, lane 2 + 2116059137U, // <5,1,7,0>: Cost 2 ins <5,u,7,0>, lane 1 + 2040972084U, // <5,1,7,1>: Cost 2 vtrnr RHS, <1,1,1,1> + 2111479811U, // <5,1,7,2>: Cost 2 ins <5,1,7,u>, lane 3 + 967229542U, // <5,1,7,3>: Cost 1 vtrnr RHS, LHS + 2116091905U, // <5,1,7,4>: Cost 2 ins <5,u,7,4>, lane 1 + 2111479811U, // <5,1,7,5>: Cost 2 ins <5,1,7,u>, lane 3 + 2116108289U, // <5,1,7,6>: Cost 2 ins <5,u,7,6>, lane 1 + 2116116481U, // <5,1,7,7>: Cost 2 ins <5,u,7,7>, lane 1 + 967229547U, // <5,1,7,u>: Cost 1 vtrnr RHS, LHS + 2116059137U, // <5,1,u,0>: Cost 2 ins <5,u,7,0>, lane 1 + 2040980276U, // <5,1,u,1>: Cost 2 vtrnr RHS, <1,1,1,1> + 1712330542U, // <5,1,u,2>: Cost 2 vuzpl <5,7,1,3>, LHS + 967237734U, // <5,1,u,3>: Cost 1 vtrnr RHS, LHS + 2116091905U, // <5,1,u,4>: Cost 2 ins <5,u,7,4>, lane 1 + 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS + 1712330906U, // <5,1,u,6>: Cost 2 vuzpl <5,7,1,3>, RHS + 2115969025U, // <5,1,u,7>: Cost 2 ins <5,u,5,7>, lane 1 + 967237739U, // <5,1,u,u>: Cost 1 vtrnr RHS, LHS + 2786132132U, // <5,2,0,0>: Cost 3 vuzpl <5,7,2,2>, <0,2,0,2> + 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS + 2129494016U, // <5,2,0,2>: Cost 2 ins , lane 0 + 2973728870U, // <5,2,0,3>: Cost 3 vzipr <3,4,5,0>, LHS + 2786164940U, // <5,2,0,4>: Cost 3 vuzpl <5,7,2,6>, <0,2,4,6> + 2782158977U, // <5,2,0,5>: Cost 3 vuzpl <5,1,2,3>, <0,1,5,3> + 3185942530U, // <5,2,0,6>: Cost 3 ins <5,2,u,6>, lane 2 + 3114658883U, // <5,2,0,7>: Cost 3 vtrnr <4,5,6,0>, <4,2,6,7> + 2129494016U, // <5,2,0,u>: Cost 2 ins , lane 0 + 3054503590U, // <5,2,1,0>: Cost 3 vtrnl <5,7,1,3>, <2,3,0,1> + 3203301376U, // <5,2,1,1>: Cost 3 ins , lane 0 + 2982363156U, // <5,2,1,2>: Cost 3 vzipr <4,u,5,1>, <0,0,2,2> + 1908621414U, // <5,2,1,3>: Cost 2 vzipr <4,u,5,1>, LHS + 3054503630U, // <5,2,1,4>: Cost 3 vtrnl <5,7,1,3>, <2,3,4,5> + 2601390208U, // <5,2,1,5>: Cost 3 vext1 , <5,7,1,3> + 2982363484U, // <5,2,1,6>: Cost 3 vzipr <4,u,5,1>, <0,4,2,6> + 3189415937U, // <5,2,1,7>: Cost 3 ins <5,u,1,7>, lane 1 + 1908621419U, // <5,2,1,u>: Cost 2 vzipr <4,u,5,1>, LHS + 3203366912U, // <5,2,2,0>: Cost 3 ins , lane 0 + 3203375104U, // <5,2,2,1>: Cost 3 ins , lane 0 + 2129641472U, // <5,2,2,2>: Cost 2 ins , lane 0 + 2129649664U, // <5,2,2,3>: Cost 2 ins , lane 0 + 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5> + 2698036870U, // <5,2,2,5>: Cost 3 vext3 <2,2,5,5>, <2,2,5,5> + 3189481473U, // <5,2,2,6>: Cost 3 ins <5,u,2,6>, lane 1 + 2846239811U, // <5,2,2,7>: Cost 3 vuzpr <4,5,6,2>, <4,2,6,7> + 2129641472U, // <5,2,2,u>: Cost 2 ins , lane 0 + 2129698816U, // <5,2,3,0>: Cost 2 ins , lane 0 + 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5> + 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5> + 2129723392U, // <5,2,3,3>: Cost 2 ins , lane 0 + 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5> + 2717943511U, // <5,2,3,5>: Cost 3 vext3 <5,5,5,5>, <2,3,5,5> + 3203489792U, // <5,2,3,6>: Cost 3 ins , lane 0 + 2827879424U, // <5,2,3,7>: Cost 3 vuzpr <1,5,0,2>, <1,3,5,7> + 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5> + 3203514368U, // <5,2,4,0>: Cost 3 ins , lane 0 + 3189587969U, // <5,2,4,1>: Cost 3 ins <5,u,4,1>, lane 1 + 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5> + 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5> + 3203547136U, // <5,2,4,4>: Cost 3 ins , lane 0 + 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS + 2129821696U, // <5,2,4,6>: Cost 2 ins , lane 0 + 2846239973U, // <5,2,4,7>: Cost 3 vuzpr <4,5,6,2>, <4,4,6,7> + 2129821696U, // <5,2,4,u>: Cost 2 ins , lane 0 + 3053487782U, // <5,2,5,0>: Cost 3 vtrnl <5,5,5,5>, <2,3,0,1> + 3203596288U, // <5,2,5,1>: Cost 3 ins , lane 0 + 1772498225U, // <5,2,5,2>: Cost 2 vuzpr <4,5,6,2>, <4,5,6,2> + 1908654182U, // <5,2,5,3>: Cost 2 vzipr <4,u,5,5>, LHS + 3053487822U, // <5,2,5,4>: Cost 3 vtrnl <5,5,5,5>, <2,3,4,5> + 2115952641U, // <5,2,5,5>: Cost 2 ins <5,u,5,5>, lane 1 + 2982396252U, // <5,2,5,6>: Cost 3 vzipr <4,u,5,5>, <0,4,2,6> + 2115969025U, // <5,2,5,7>: Cost 2 ins <5,u,5,7>, lane 1 + 1908654187U, // <5,2,5,u>: Cost 2 vzipr <4,u,5,5>, LHS + 3203661824U, // <5,2,6,0>: Cost 3 ins , lane 0 + 3189735425U, // <5,2,6,1>: Cost 3 ins <5,u,6,1>, lane 1 + 2973777940U, // <5,2,6,2>: Cost 3 vzipr <3,4,5,6>, <0,0,2,2> + 1900036198U, // <5,2,6,3>: Cost 2 vzipr <3,4,5,6>, LHS + 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5> + 2973778186U, // <5,2,6,5>: Cost 3 vzipr <3,4,5,6>, <0,3,2,5> + 2973778268U, // <5,2,6,6>: Cost 3 vzipr <3,4,5,6>, <0,4,2,6> + 2129977344U, // <5,2,6,7>: Cost 2 ins , lane 0 + 1900036203U, // <5,2,6,u>: Cost 2 vzipr <3,4,5,6>, LHS + 2040972182U, // <5,2,7,0>: Cost 2 vtrnr RHS, <1,2,3,0> + 3114713251U, // <5,2,7,1>: Cost 3 vtrnr RHS, <0,2,0,1> + 2040971428U, // <5,2,7,2>: Cost 2 vtrnr RHS, <0,2,0,2> + 1887436902U, // <5,2,7,3>: Cost 2 vzipr <1,3,5,7>, LHS + 2040972186U, // <5,2,7,4>: Cost 2 vtrnr RHS, <1,2,3,4> + 2961178728U, // <5,2,7,5>: Cost 3 vzipr <1,3,5,7>, <0,1,2,5> + 2040971468U, // <5,2,7,6>: Cost 2 vtrnr RHS, <0,2,4,6> + 2116116481U, // <5,2,7,7>: Cost 2 ins <5,u,7,7>, lane 1 + 1887436907U, // <5,2,7,u>: Cost 2 vzipr <1,3,5,7>, LHS + 2040980374U, // <5,2,u,0>: Cost 2 vtrnr RHS, <1,2,3,0> + 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS + 2040979620U, // <5,2,u,2>: Cost 2 vtrnr RHS, <0,2,0,2> + 1887445094U, // <5,2,u,3>: Cost 2 vzipr <1,3,5,u>, LHS + 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5> + 2115952641U, // <5,2,u,5>: Cost 2 ins <5,u,5,5>, lane 1 + 2040979660U, // <5,2,u,6>: Cost 2 vtrnr RHS, <0,2,4,6> + 2115969025U, // <5,2,u,7>: Cost 2 ins <5,u,5,7>, lane 1 + 1887445099U, // <5,2,u,u>: Cost 2 vzipr <1,3,5,u>, LHS + 3203883008U, // <5,3,0,0>: Cost 3 ins , lane 0 + 2130149376U, // <5,3,0,1>: Cost 2 ins , lane 0 + 2782904422U, // <5,3,0,2>: Cost 3 vuzpl <5,2,3,4>, LHS + 3186581506U, // <5,3,0,3>: Cost 3 ins <5,3,u,3>, lane 2 + 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1> + 3053750786U, // <5,3,0,5>: Cost 3 vtrnl <5,6,0,1>, <3,4,5,6> + 2618302971U, // <5,3,0,6>: Cost 3 vext2 <0,1,5,3>, <0,6,2,3> + 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0> + 2130149376U, // <5,3,0,u>: Cost 2 ins , lane 0 + 2982364054U, // <5,3,1,0>: Cost 3 vzipr <4,u,5,1>, <1,2,3,0> + 3054504086U, // <5,3,1,1>: Cost 3 vtrnl <5,7,1,3>, <3,0,1,2> + 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3> + 2130239488U, // <5,3,1,3>: Cost 2 ins , lane 0 + 2982364058U, // <5,3,1,4>: Cost 3 vzipr <4,u,5,1>, <1,2,3,4> + 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7> + 3189407745U, // <5,3,1,6>: Cost 3 ins <5,u,1,6>, lane 1 + 2964448400U, // <5,3,1,7>: Cost 3 vzipr <1,u,5,1>, <1,5,3,7> + 2130239488U, // <5,3,1,u>: Cost 2 ins , lane 0 + 2235845154U, // <5,3,2,0>: Cost 3 vrev <3,5,0,2> + 3204038656U, // <5,3,2,1>: Cost 3 ins , lane 0 + 3204046848U, // <5,3,2,2>: Cost 3 ins , lane 0 + 2130313216U, // <5,3,2,3>: Cost 2 ins , lane 0 + 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5> + 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4> + 3204079616U, // <5,3,2,6>: Cost 3 ins , lane 0 + 3096314880U, // <5,3,2,7>: Cost 3 vtrnr <1,5,0,2>, <1,3,5,7> + 2130313216U, // <5,3,2,u>: Cost 2 ins , lane 0 + 3204104192U, // <5,3,3,0>: Cost 3 ins , lane 0 + 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3> + 3204120576U, // <5,3,3,2>: Cost 3 ins , lane 0 + 2130386944U, // <5,3,3,3>: Cost 2 ins , lane 0 + 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5> + 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5> + 3189555201U, // <5,3,3,6>: Cost 3 ins <5,u,3,6>, lane 1 + 2971763856U, // <5,3,3,7>: Cost 3 vzipr <3,1,5,3>, <1,5,3,7> + 2130386944U, // <5,3,3,u>: Cost 2 ins , lane 0 + 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5> + 2642193381U, // <5,3,4,1>: Cost 3 vext2 <4,1,5,3>, <4,1,5,3> + 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3> + 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5> + 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5> + 2130477056U, // <5,3,4,5>: Cost 2 ins , lane 0 + 2846247426U, // <5,3,4,6>: Cost 3 vuzpr <4,5,6,3>, <3,4,5,6> + 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4> + 2130477056U, // <5,3,4,u>: Cost 2 ins , lane 0 + 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS + 3053488278U, // <5,3,5,1>: Cost 3 vtrnl <5,5,5,5>, <3,0,1,2> + 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5> + 1748320682U, // <5,3,5,3>: Cost 2 vuzpr <0,5,2,3>, <0,5,2,3> + 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS + 2115952641U, // <5,3,5,5>: Cost 2 ins <5,u,5,5>, lane 1 + 3204300800U, // <5,3,5,6>: Cost 3 ins , lane 0 + 2130567168U, // <5,3,5,7>: Cost 2 ins , lane 0 + 2130567168U, // <5,3,5,u>: Cost 2 ins , lane 0 + 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS + 3204333568U, // <5,3,6,1>: Cost 3 ins , lane 0 + 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6> + 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6> + 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS + 2973778114U, // <5,3,6,5>: Cost 3 vzipr <3,4,5,6>, <0,2,3,5> + 2973779816U, // <5,3,6,6>: Cost 3 vzipr <3,4,5,6>, <2,5,3,6> + 2130640896U, // <5,3,6,7>: Cost 2 ins , lane 0 + 2130640896U, // <5,3,6,u>: Cost 2 ins , lane 0 + 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS + 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7> + 2961179382U, // <5,3,7,2>: Cost 3 vzipr <1,3,5,7>, <1,0,3,2> + 2040972248U, // <5,3,7,3>: Cost 2 vtrnr RHS, <1,3,1,3> + 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS + 2040973006U, // <5,3,7,5>: Cost 2 vtrnr RHS, <2,3,4,5> + 2116108289U, // <5,3,7,6>: Cost 2 ins <5,u,7,6>, lane 1 + 2040972288U, // <5,3,7,7>: Cost 2 vtrnr RHS, <1,3,5,7> + 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS + 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS + 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u> + 2961187574U, // <5,3,u,2>: Cost 3 vzipr <1,3,5,u>, <1,0,3,2> + 2040980440U, // <5,3,u,3>: Cost 2 vtrnr RHS, <1,3,1,3> + 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS + 2040981198U, // <5,3,u,5>: Cost 2 vtrnr RHS, <2,3,4,5> + 2116108289U, // <5,3,u,6>: Cost 2 ins <5,u,7,6>, lane 1 + 2040980480U, // <5,3,u,7>: Cost 2 vtrnr RHS, <1,3,5,7> + 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS + 3189284865U, // <5,4,0,0>: Cost 3 ins <5,u,0,0>, lane 1 + 2113544197U, // <5,4,0,1>: Cost 2 ins <5,4,u,u>, lane 5 + 2781626470U, // <5,4,0,2>: Cost 3 vuzpl <5,0,4,1>, LHS + 2242022676U, // <5,4,0,3>: Cost 3 vrev <4,5,3,0> + 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5> + 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1> + 2113527810U, // <5,4,0,6>: Cost 2 ins <5,4,u,6>, lane 2 + 3114659045U, // <5,4,0,7>: Cost 3 vtrnr <4,5,6,0>, <4,4,6,7> + 2113544197U, // <5,4,0,u>: Cost 2 ins <5,4,u,u>, lane 5 + 1168067834U, // <5,4,1,0>: Cost 2 vrev <4,5,0,1> + 3189366785U, // <5,4,1,1>: Cost 3 ins <5,u,1,1>, lane 1 + 3204636672U, // <5,4,1,2>: Cost 3 ins , lane 0 + 2115641345U, // <5,4,1,3>: Cost 2 ins <5,u,1,3>, lane 1 + 2982366416U, // <5,4,1,4>: Cost 3 vzipr <4,u,5,1>, <4,4,4,4> + 1843006774U, // <5,4,1,5>: Cost 2 vzipl <5,1,7,3>, RHS + 1980763446U, // <5,4,1,6>: Cost 2 vtrnl <5,7,1,3>, RHS + 3189415937U, // <5,4,1,7>: Cost 3 ins <5,u,1,7>, lane 1 + 1843007017U, // <5,4,1,u>: Cost 2 vzipl <5,1,7,3>, RHS + 3204694016U, // <5,4,2,0>: Cost 3 ins , lane 0 + 2241891588U, // <5,4,2,1>: Cost 3 vrev <4,5,1,2> + 3189448705U, // <5,4,2,2>: Cost 3 ins <5,u,2,2>, lane 1 + 2113544197U, // <5,4,2,3>: Cost 2 ins <5,4,u,u>, lane 5 + 3204726784U, // <5,4,2,4>: Cost 3 ins , lane 0 + 2973746894U, // <5,4,2,5>: Cost 3 vzipr <3,4,5,2>, <2,3,4,5> + 2131001344U, // <5,4,2,6>: Cost 2 ins , lane 0 + 3114675429U, // <5,4,2,7>: Cost 3 vtrnr <4,5,6,2>, <4,4,6,7> + 2113544197U, // <5,4,2,u>: Cost 2 ins <5,4,u,u>, lane 5 + 3204767744U, // <5,4,3,0>: Cost 3 ins , lane 0 + 2241899781U, // <5,4,3,1>: Cost 3 vrev <4,5,1,3> + 1168231694U, // <5,4,3,2>: Cost 2 vrev <4,5,2,3> + 3189530625U, // <5,4,3,3>: Cost 3 ins <5,u,3,3>, lane 1 + 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4> + 2978399950U, // <5,4,3,5>: Cost 3 vzipr <4,2,5,3>, <2,3,4,5> + 2113527810U, // <5,4,3,6>: Cost 2 ins <5,4,u,6>, lane 2 + 2840355840U, // <5,4,3,7>: Cost 3 vuzpr <3,5,7,4>, <1,3,5,7> + 2113527810U, // <5,4,3,u>: Cost 2 ins <5,4,u,6>, lane 2 + 2918763410U, // <5,4,4,0>: Cost 3 vzipl <5,4,7,6>, <4,0,5,1> + 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4> + 3186991107U, // <5,4,4,2>: Cost 3 ins <5,4,4,u>, lane 3 + 3186991107U, // <5,4,4,3>: Cost 3 ins <5,4,4,u>, lane 3 + 2131132416U, // <5,4,4,4>: Cost 2 ins , lane 0 + 1845022006U, // <5,4,4,5>: Cost 2 vzipl <5,4,7,6>, RHS + 2113527810U, // <5,4,4,6>: Cost 2 ins <5,4,u,6>, lane 2 + 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4> + 1845022249U, // <5,4,4,u>: Cost 2 vzipl <5,4,7,6>, RHS + 1503936614U, // <5,4,5,0>: Cost 2 vext1 <4,5,4,5>, LHS + 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5> + 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3> + 3189678081U, // <5,4,5,3>: Cost 3 ins <5,u,5,3>, lane 1 + 1168395554U, // <5,4,5,4>: Cost 2 vrev <4,5,4,5> + 1845529910U, // <5,4,5,5>: Cost 2 vzipl <5,5,5,5>, RHS + 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS + 2115969025U, // <5,4,5,7>: Cost 2 ins <5,u,5,7>, lane 1 + 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS + 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS + 2559771800U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,5,4,6> + 3189743617U, // <5,4,6,2>: Cost 3 ins <5,u,6,2>, lane 1 + 2571717194U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,5,4,6> + 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS + 1846299958U, // <5,4,6,5>: Cost 2 vzipl <5,6,7,0>, RHS + 2131296256U, // <5,4,6,6>: Cost 2 ins , lane 0 + 2113544197U, // <5,4,6,7>: Cost 2 ins <5,4,u,u>, lane 5 + 1846300201U, // <5,4,6,u>: Cost 2 vzipl <5,6,7,0>, RHS + 2116059137U, // <5,4,7,0>: Cost 2 ins <5,u,7,0>, lane 1 + 2113470467U, // <5,4,7,1>: Cost 2 ins <5,4,7,u>, lane 3 + 2113470467U, // <5,4,7,2>: Cost 2 ins <5,4,7,u>, lane 3 + 2116083713U, // <5,4,7,3>: Cost 2 ins <5,u,7,3>, lane 1 + 2040974544U, // <5,4,7,4>: Cost 2 vtrnr RHS, <4,4,4,4> + 2040971602U, // <5,4,7,5>: Cost 2 vtrnr RHS, <0,4,1,5> + 94817590U, // <5,4,7,6>: Cost 1 vrev RHS + 2116116481U, // <5,4,7,7>: Cost 2 ins <5,u,7,7>, lane 1 + 94965064U, // <5,4,7,u>: Cost 1 vrev RHS + 2116059137U, // <5,4,u,0>: Cost 2 ins <5,u,7,0>, lane 1 + 2113544197U, // <5,4,u,1>: Cost 2 ins <5,4,u,u>, lane 5 + 2113470467U, // <5,4,u,2>: Cost 2 ins <5,4,7,u>, lane 3 + 2115641345U, // <5,4,u,3>: Cost 2 ins <5,u,1,3>, lane 1 + 2040982736U, // <5,4,u,4>: Cost 2 vtrnr RHS, <4,4,4,4> + 2040979794U, // <5,4,u,5>: Cost 2 vtrnr RHS, <0,4,1,5> + 94825783U, // <5,4,u,6>: Cost 1 vrev RHS + 2115969025U, // <5,4,u,7>: Cost 2 ins <5,u,5,7>, lane 1 + 94973257U, // <5,4,u,u>: Cost 1 vrev RHS + 2040917295U, // <5,5,0,0>: Cost 2 vtrnr <4,5,6,0>, <4,5,6,0> + 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS + 1711308902U, // <5,5,0,2>: Cost 2 vuzpl <5,5,5,5>, LHS + 3187908610U, // <5,5,0,3>: Cost 3 ins <5,5,u,3>, lane 2 + 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1> + 2114183170U, // <5,5,0,5>: Cost 2 ins <5,5,u,5>, lane 2 + 3187933186U, // <5,5,0,6>: Cost 3 ins <5,5,u,6>, lane 2 + 2114199554U, // <5,5,0,7>: Cost 2 ins <5,5,u,7>, lane 2 + 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS + 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2> + 1908624922U, // <5,5,1,1>: Cost 2 vzipr <4,u,5,1>, <4,u,5,1> + 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0> + 1778417766U, // <5,5,1,3>: Cost 2 vuzpr <5,5,5,5>, LHS + 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5> + 2114183170U, // <5,5,1,5>: Cost 2 ins <5,5,u,5>, lane 2 + 2982365698U, // <5,5,1,6>: Cost 3 vzipr <4,u,5,1>, <3,4,5,6> + 2114199554U, // <5,5,1,7>: Cost 2 ins <5,5,u,7>, lane 2 + 1778417771U, // <5,5,1,u>: Cost 2 vuzpr <5,5,5,5>, LHS + 2785052326U, // <5,5,2,0>: Cost 3 vuzpl <5,5,5,5>, <2,3,0,1> + 3205365760U, // <5,5,2,1>: Cost 3 ins , lane 0 + 2040933681U, // <5,5,2,2>: Cost 2 vtrnr <4,5,6,2>, <4,5,6,2> + 2114207749U, // <5,5,2,3>: Cost 2 ins <5,5,u,u>, lane 5 + 2785052366U, // <5,5,2,4>: Cost 3 vuzpl <5,5,5,5>, <2,3,4,5> + 2114183170U, // <5,5,2,5>: Cost 2 ins <5,5,u,5>, lane 2 + 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7> + 2114199554U, // <5,5,2,7>: Cost 2 ins <5,5,u,7>, lane 2 + 2114207749U, // <5,5,2,u>: Cost 2 ins <5,5,u,u>, lane 5 + 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2> + 2785052822U, // <5,5,3,1>: Cost 3 vuzpl <5,5,5,5>, <3,0,1,2> + 3187900418U, // <5,5,3,2>: Cost 3 ins <5,5,u,2>, lane 2 + 1880105089U, // <5,5,3,3>: Cost 2 vzipr <0,1,5,3>, <0,1,5,3> + 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6> + 2114183170U, // <5,5,3,5>: Cost 2 ins <5,5,u,5>, lane 2 + 3205480448U, // <5,5,3,6>: Cost 3 ins , lane 0 + 2131746816U, // <5,5,3,7>: Cost 2 ins , lane 0 + 2131746816U, // <5,5,3,u>: Cost 2 ins , lane 0 + 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1> + 2716987279U, // <5,5,4,1>: Cost 3 vext3 <5,4,1,5>, <5,4,1,5> + 3187900418U, // <5,5,4,2>: Cost 3 ins <5,5,u,2>, lane 2 + 3187908610U, // <5,5,4,3>: Cost 3 ins <5,5,u,3>, lane 2 + 1845022662U, // <5,5,4,4>: Cost 2 vzipl <5,4,7,6>, <5,4,7,6> + 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS + 1711312182U, // <5,5,4,6>: Cost 2 vuzpl <5,5,5,5>, RHS + 2114199554U, // <5,5,4,7>: Cost 2 ins <5,5,u,7>, lane 2 + 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5> + 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS + 2113986563U, // <5,5,5,1>: Cost 2 ins <5,5,5,u>, lane 3 + 2113986563U, // <5,5,5,2>: Cost 2 ins <5,5,5,u>, lane 3 + 2113986563U, // <5,5,5,3>: Cost 2 ins <5,5,5,u>, lane 3 + 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS + 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS + 2113986563U, // <5,5,5,6>: Cost 2 ins <5,5,5,u>, lane 3 + 1778421046U, // <5,5,5,7>: Cost 2 vuzpr <5,5,5,5>, RHS + 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS + 2131910656U, // <5,5,6,0>: Cost 2 ins , lane 0 + 2131918848U, // <5,5,6,1>: Cost 2 ins , lane 0 + 2131927040U, // <5,5,6,2>: Cost 2 ins , lane 0 + 2131935232U, // <5,5,6,3>: Cost 2 ins , lane 0 + 2131943424U, // <5,5,6,4>: Cost 2 ins , lane 0 + 2131951616U, // <5,5,6,5>: Cost 2 ins , lane 0 + 1900038658U, // <5,5,6,6>: Cost 2 vzipr <3,4,5,6>, <3,4,5,6> + 1058226176U, // <5,5,6,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <5,5,6,u>: Cost 1 ins RHS, lane 0 + 2116059137U, // <5,5,7,0>: Cost 2 ins <5,u,7,0>, lane 1 + 2114134019U, // <5,5,7,1>: Cost 2 ins <5,5,7,u>, lane 3 + 2114134019U, // <5,5,7,2>: Cost 2 ins <5,5,7,u>, lane 3 + 2116083713U, // <5,5,7,3>: Cost 2 ins <5,u,7,3>, lane 1 + 2116091905U, // <5,5,7,4>: Cost 2 ins <5,u,7,4>, lane 1 + 2040975364U, // <5,5,7,5>: Cost 2 vtrnr RHS, <5,5,5,5> + 2116108289U, // <5,5,7,6>: Cost 2 ins <5,u,7,6>, lane 1 + 967232822U, // <5,5,7,7>: Cost 1 vtrnr RHS, RHS + 967232823U, // <5,5,7,u>: Cost 1 vtrnr RHS, RHS + 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS + 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS + 1711314734U, // <5,5,u,2>: Cost 2 vuzpl <5,5,5,5>, LHS + 1778418333U, // <5,5,u,3>: Cost 2 vuzpr <5,5,5,5>, LHS + 1845022662U, // <5,5,u,4>: Cost 2 vzipl <5,4,7,6>, <5,4,7,6> + 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS + 1711315098U, // <5,5,u,6>: Cost 2 vuzpl <5,5,5,5>, RHS + 967241014U, // <5,5,u,7>: Cost 1 vtrnr RHS, RHS + 967241015U, // <5,5,u,u>: Cost 1 vtrnr RHS, RHS + 2114805762U, // <5,6,0,0>: Cost 2 ins <5,6,u,0>, lane 2 + 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS + 2132148224U, // <5,6,0,2>: Cost 2 ins , lane 0 + 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4> + 2114838530U, // <5,6,0,4>: Cost 2 ins <5,6,u,4>, lane 2 + 3188588546U, // <5,6,0,5>: Cost 3 ins <5,6,u,5>, lane 2 + 3188596738U, // <5,6,0,6>: Cost 3 ins <5,6,u,6>, lane 2 + 2973732150U, // <5,6,0,7>: Cost 3 vzipr <3,4,5,0>, RHS + 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS + 2114805762U, // <5,6,1,0>: Cost 2 ins <5,6,u,0>, lane 2 + 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1> + 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0> + 2115641345U, // <5,6,1,3>: Cost 2 ins <5,u,1,3>, lane 1 + 2114838530U, // <5,6,1,4>: Cost 2 ins <5,6,u,4>, lane 2 + 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7> + 2982366436U, // <5,6,1,6>: Cost 3 vzipr <4,u,5,1>, <4,4,6,6> + 1908624694U, // <5,6,1,7>: Cost 2 vzipr <4,u,5,1>, RHS + 1908624695U, // <5,6,1,u>: Cost 2 vzipr <4,u,5,1>, RHS + 2114805762U, // <5,6,2,0>: Cost 2 ins <5,6,u,0>, lane 2 + 3188555778U, // <5,6,2,1>: Cost 3 ins <5,6,u,1>, lane 2 + 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2> + 2114871301U, // <5,6,2,3>: Cost 2 ins <5,6,u,u>, lane 5 + 2114838530U, // <5,6,2,4>: Cost 2 ins <5,6,u,4>, lane 2 + 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6> + 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7> + 2964458806U, // <5,6,2,7>: Cost 3 vzipr <1,u,5,2>, RHS + 2114805762U, // <5,6,2,u>: Cost 2 ins <5,6,u,0>, lane 2 + 2114805762U, // <5,6,3,0>: Cost 2 ins <5,6,u,0>, lane 2 + 3206103040U, // <5,6,3,1>: Cost 3 ins , lane 0 + 3206111232U, // <5,6,3,2>: Cost 3 ins , lane 0 + 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3> + 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6> + 2783119874U, // <5,6,3,5>: Cost 3 vuzpl <5,2,6,3>, <3,4,5,6> + 3206144000U, // <5,6,3,6>: Cost 3 ins , lane 0 + 2132410368U, // <5,6,3,7>: Cost 2 ins , lane 0 + 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6> + 2114805762U, // <5,6,4,0>: Cost 2 ins <5,6,u,0>, lane 2 + 3189587969U, // <5,6,4,1>: Cost 3 ins <5,u,4,1>, lane 1 + 2918765050U, // <5,6,4,2>: Cost 3 vzipl <5,4,7,6>, <6,2,7,3> + 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5> + 2114838530U, // <5,6,4,4>: Cost 2 ins <5,6,u,4>, lane 2 + 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS + 2132475904U, // <5,6,4,6>: Cost 2 ins , lane 0 + 2972437814U, // <5,6,4,7>: Cost 3 vzipr <3,2,5,4>, RHS + 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS + 2114805762U, // <5,6,5,0>: Cost 2 ins <5,6,u,0>, lane 2 + 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3> + 2982398876U, // <5,6,5,2>: Cost 3 vzipr <4,u,5,5>, <4,0,6,2> + 3189678081U, // <5,6,5,3>: Cost 3 ins <5,u,5,3>, lane 1 + 2114838530U, // <5,6,5,4>: Cost 2 ins <5,6,u,4>, lane 2 + 2115952641U, // <5,6,5,5>: Cost 2 ins <5,u,5,5>, lane 1 + 1772530997U, // <5,6,5,6>: Cost 2 vuzpr <4,5,6,6>, <4,5,6,6> + 1908657462U, // <5,6,5,7>: Cost 2 vzipr <4,u,5,5>, RHS + 1908657463U, // <5,6,5,u>: Cost 2 vzipr <4,u,5,5>, RHS + 2114805762U, // <5,6,6,0>: Cost 2 ins <5,6,u,0>, lane 2 + 3189735425U, // <5,6,6,1>: Cost 3 ins <5,u,6,1>, lane 1 + 2920043002U, // <5,6,6,2>: Cost 3 vzipl <5,6,7,0>, <6,2,7,3> + 2973781298U, // <5,6,6,3>: Cost 3 vzipr <3,4,5,6>, <4,5,6,3> + 2114838530U, // <5,6,6,4>: Cost 2 ins <5,6,u,4>, lane 2 + 2973781138U, // <5,6,6,5>: Cost 3 vzipr <3,4,5,6>, <4,3,6,5> + 2132623360U, // <5,6,6,6>: Cost 2 ins , lane 0 + 1900039478U, // <5,6,6,7>: Cost 2 vzipr <3,4,5,6>, RHS + 1900039479U, // <5,6,6,u>: Cost 2 vzipr <3,4,5,6>, RHS + 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS + 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2> + 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2> + 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2> + 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS + 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6> + 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6> + 1887440182U, // <5,6,7,7>: Cost 2 vzipr <1,3,5,7>, RHS + 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS + 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS + 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS + 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2> + 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2> + 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS + 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS + 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3> + 1887448374U, // <5,6,u,7>: Cost 2 vzipr <1,3,5,u>, RHS + 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS + 1772535808U, // <5,7,0,0>: Cost 2 vuzpr RHS, <0,0,0,0> + 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS + 1772535828U, // <5,7,0,2>: Cost 2 vuzpr RHS, <0,0,2,2> + 2115493890U, // <5,7,0,3>: Cost 2 ins <5,7,u,3>, lane 2 + 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5> + 2846279860U, // <5,7,0,5>: Cost 3 vuzpr RHS, <3,0,4,5> + 2846277674U, // <5,7,0,6>: Cost 3 vuzpr RHS, <0,0,4,6> + 2115526658U, // <5,7,0,7>: Cost 2 ins <5,7,u,7>, lane 2 + 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS + 2115018755U, // <5,7,1,0>: Cost 2 ins <5,7,1,u>, lane 3 + 1772536628U, // <5,7,1,1>: Cost 2 vuzpr RHS, <1,1,1,1> + 2115018755U, // <5,7,1,2>: Cost 2 ins <5,7,1,u>, lane 3 + 698794086U, // <5,7,1,3>: Cost 1 vuzpr RHS, LHS + 2115018755U, // <5,7,1,4>: Cost 2 ins <5,7,1,u>, lane 3 + 2115018755U, // <5,7,1,5>: Cost 2 ins <5,7,1,u>, lane 3 + 2115018755U, // <5,7,1,6>: Cost 2 ins <5,7,1,u>, lane 3 + 2115526658U, // <5,7,1,7>: Cost 2 ins <5,7,u,7>, lane 2 + 698794091U, // <5,7,1,u>: Cost 1 vuzpr RHS, LHS + 1772536726U, // <5,7,2,0>: Cost 2 vuzpr RHS, <1,2,3,0> + 2846277795U, // <5,7,2,1>: Cost 3 vuzpr RHS, <0,2,0,1> + 1772535972U, // <5,7,2,2>: Cost 2 vuzpr RHS, <0,2,0,2> + 1772537458U, // <5,7,2,3>: Cost 2 vuzpr RHS, <2,2,3,3> + 1772536730U, // <5,7,2,4>: Cost 2 vuzpr RHS, <1,2,3,4> + 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7> + 1772536012U, // <5,7,2,6>: Cost 2 vuzpr RHS, <0,2,4,6> + 2115526658U, // <5,7,2,7>: Cost 2 ins <5,7,u,7>, lane 2 + 1772535978U, // <5,7,2,u>: Cost 2 vuzpr RHS, <0,2,0,u> + 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2> + 1772537510U, // <5,7,3,1>: Cost 2 vuzpr RHS, <2,3,0,1> + 2846278606U, // <5,7,3,2>: Cost 3 vuzpr RHS, <1,3,0,2> + 1772536792U, // <5,7,3,3>: Cost 2 vuzpr RHS, <1,3,1,3> + 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6> + 1772537550U, // <5,7,3,5>: Cost 2 vuzpr RHS, <2,3,4,5> + 2846278628U, // <5,7,3,6>: Cost 3 vuzpr RHS, <1,3,2,6> + 1772536832U, // <5,7,3,7>: Cost 2 vuzpr RHS, <1,3,5,7> + 1772536797U, // <5,7,3,u>: Cost 2 vuzpr RHS, <1,3,1,u> + 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS + 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7> + 2846277958U, // <5,7,4,2>: Cost 3 vuzpr RHS, <0,4,0,2> + 2115493890U, // <5,7,4,3>: Cost 2 ins <5,7,u,3>, lane 2 + 1772539088U, // <5,7,4,4>: Cost 2 vuzpr RHS, <4,4,4,4> + 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS + 1772536156U, // <5,7,4,6>: Cost 2 vuzpr RHS, <0,4,2,6> + 2115526658U, // <5,7,4,7>: Cost 2 ins <5,7,u,7>, lane 2 + 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS + 2115313667U, // <5,7,5,0>: Cost 2 ins <5,7,5,u>, lane 3 + 2115313667U, // <5,7,5,1>: Cost 2 ins <5,7,5,u>, lane 3 + 2115313667U, // <5,7,5,2>: Cost 2 ins <5,7,5,u>, lane 3 + 2115493890U, // <5,7,5,3>: Cost 2 ins <5,7,u,3>, lane 2 + 2115313667U, // <5,7,5,4>: Cost 2 ins <5,7,5,u>, lane 3 + 1772539908U, // <5,7,5,5>: Cost 2 vuzpr RHS, <5,5,5,5> + 2115313667U, // <5,7,5,6>: Cost 2 ins <5,7,5,u>, lane 3 + 698797366U, // <5,7,5,7>: Cost 1 vuzpr RHS, RHS + 698797367U, // <5,7,5,u>: Cost 1 vuzpr RHS, RHS + 1772540002U, // <5,7,6,0>: Cost 2 vuzpr RHS, <5,6,7,0> + 2846279577U, // <5,7,6,1>: Cost 3 vuzpr RHS, <2,6,0,1> + 1772539212U, // <5,7,6,2>: Cost 2 vuzpr RHS, <4,6,0,2> + 2115493890U, // <5,7,6,3>: Cost 2 ins <5,7,u,3>, lane 2 + 1772540006U, // <5,7,6,4>: Cost 2 vuzpr RHS, <5,6,7,4> + 2846279617U, // <5,7,6,5>: Cost 3 vuzpr RHS, <2,6,4,5> + 1772539252U, // <5,7,6,6>: Cost 2 vuzpr RHS, <4,6,4,6> + 1772537786U, // <5,7,6,7>: Cost 2 vuzpr RHS, <2,6,3,7> + 1772537787U, // <5,7,6,u>: Cost 2 vuzpr RHS, <2,6,3,u> + 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS + 1772540750U, // <5,7,7,1>: Cost 2 vuzpr RHS, <6,7,0,1> + 2846281846U, // <5,7,7,2>: Cost 3 vuzpr RHS, <5,7,0,2> + 1772540032U, // <5,7,7,3>: Cost 2 vuzpr RHS, <5,7,1,3> + 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS + 1772540790U, // <5,7,7,5>: Cost 2 vuzpr RHS, <6,7,4,5> + 2116108289U, // <5,7,7,6>: Cost 2 ins <5,u,7,6>, lane 1 + 1772540072U, // <5,7,7,7>: Cost 2 vuzpr RHS, <5,7,5,7> + 1772540037U, // <5,7,7,u>: Cost 2 vuzpr RHS, <5,7,1,u> + 1772537212U, // <5,7,u,0>: Cost 2 vuzpr RHS, <1,u,3,0> + 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS + 1772536458U, // <5,7,u,2>: Cost 2 vuzpr RHS, <0,u,0,2> + 698794653U, // <5,7,u,3>: Cost 1 vuzpr RHS, LHS + 1772537216U, // <5,7,u,4>: Cost 2 vuzpr RHS, <1,u,3,4> + 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS + 1772536480U, // <5,7,u,6>: Cost 2 vuzpr RHS, <0,u,2,6> + 698797609U, // <5,7,u,7>: Cost 1 vuzpr RHS, RHS + 698794658U, // <5,7,u,u>: Cost 1 vuzpr RHS, LHS + 1772544000U, // <5,u,0,0>: Cost 2 vuzpr RHS, <0,0,0,0> + 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS + 1772544020U, // <5,u,0,2>: Cost 2 vuzpr RHS, <0,0,2,2> + 2111512578U, // <5,u,0,3>: Cost 2 ins <5,1,u,3>, lane 2 + 2114838530U, // <5,u,0,4>: Cost 2 ins <5,6,u,4>, lane 2 + 2114183170U, // <5,u,0,5>: Cost 2 ins <5,5,u,5>, lane 2 + 2113527810U, // <5,u,0,6>: Cost 2 ins <5,4,u,6>, lane 2 + 2114199554U, // <5,u,0,7>: Cost 2 ins <5,5,u,7>, lane 2 + 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS + 2114805762U, // <5,u,1,0>: Cost 2 ins <5,6,u,0>, lane 2 + 1772544820U, // <5,u,1,1>: Cost 2 vuzpr RHS, <1,1,1,1> + 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS + 698802278U, // <5,u,1,3>: Cost 1 vuzpr RHS, LHS + 2114838530U, // <5,u,1,4>: Cost 2 ins <5,6,u,4>, lane 2 + 1843009690U, // <5,u,1,5>: Cost 2 vzipl <5,1,7,3>, RHS + 1980766362U, // <5,u,1,6>: Cost 2 vtrnl <5,7,1,3>, RHS + 1908624712U, // <5,u,1,7>: Cost 2 vzipr <4,u,5,1>, RHS + 698802283U, // <5,u,1,u>: Cost 1 vuzpr RHS, LHS + 1772544918U, // <5,u,2,0>: Cost 2 vuzpr RHS, <1,2,3,0> + 2128969728U, // <5,u,2,1>: Cost 2 ins , lane 0 + 1772544164U, // <5,u,2,2>: Cost 2 vuzpr RHS, <0,2,0,2> + 1055244288U, // <5,u,2,3>: Cost 1 ins LHS, lane 0 + 1772544922U, // <5,u,2,4>: Cost 2 vuzpr RHS, <1,2,3,4> + 2129002496U, // <5,u,2,5>: Cost 2 ins , lane 0 + 1772544204U, // <5,u,2,6>: Cost 2 vuzpr RHS, <0,2,4,6> + 2114199554U, // <5,u,2,7>: Cost 2 ins <5,5,u,7>, lane 2 + 1055244288U, // <5,u,2,u>: Cost 1 ins LHS, lane 0 + 2129698816U, // <5,u,3,0>: Cost 2 ins , lane 0 + 1772545702U, // <5,u,3,1>: Cost 2 vuzpr RHS, <2,3,0,1> + 2128388096U, // <5,u,3,2>: Cost 2 ins , lane 0 + 1772544984U, // <5,u,3,3>: Cost 2 vuzpr RHS, <1,3,1,3> + 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u> + 1772545742U, // <5,u,3,5>: Cost 2 vuzpr RHS, <2,3,4,5> + 2113527810U, // <5,u,3,6>: Cost 2 ins <5,4,u,6>, lane 2 + 1772545024U, // <5,u,3,7>: Cost 2 vuzpr RHS, <1,3,5,7> + 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u> + 2114805762U, // <5,u,4,0>: Cost 2 ins <5,6,u,0>, lane 2 + 1845024558U, // <5,u,4,1>: Cost 2 vzipl <5,4,7,6>, LHS + 2642897979U, // <5,u,4,2>: Cost 3 vext2 <4,2,5,u>, <4,2,5,u> + 2111512578U, // <5,u,4,3>: Cost 2 ins <5,1,u,3>, lane 2 + 1772547280U, // <5,u,4,4>: Cost 2 vuzpr RHS, <4,4,4,4> + 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS + 1772544348U, // <5,u,4,6>: Cost 2 vuzpr RHS, <0,4,2,6> + 2114199554U, // <5,u,4,7>: Cost 2 ins <5,5,u,7>, lane 2 + 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS + 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS + 1845532462U, // <5,u,5,1>: Cost 2 vzipl <5,5,5,5>, LHS + 1979750190U, // <5,u,5,2>: Cost 2 vtrnl <5,5,5,5>, LHS + 1908654236U, // <5,u,5,3>: Cost 2 vzipr <4,u,5,5>, LHS + 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS + 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS + 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS + 698805558U, // <5,u,5,7>: Cost 1 vuzpr RHS, RHS + 698805559U, // <5,u,5,u>: Cost 1 vuzpr RHS, RHS + 1772548194U, // <5,u,6,0>: Cost 2 vuzpr RHS, <5,6,7,0> + 1846302510U, // <5,u,6,1>: Cost 2 vzipl <5,6,7,0>, LHS + 1772547404U, // <5,u,6,2>: Cost 2 vuzpr RHS, <4,6,0,2> + 1900036252U, // <5,u,6,3>: Cost 2 vzipr <3,4,5,6>, LHS + 1772548198U, // <5,u,6,4>: Cost 2 vuzpr RHS, <5,6,7,4> + 1846302874U, // <5,u,6,5>: Cost 2 vzipl <5,6,7,0>, RHS + 1772547444U, // <5,u,6,6>: Cost 2 vuzpr RHS, <4,6,4,6> + 1058226176U, // <5,u,6,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <5,u,6,u>: Cost 1 ins RHS, lane 0 + 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS + 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7> + 2040971914U, // <5,u,7,2>: Cost 2 vtrnr RHS, <0,u,0,2> + 967230109U, // <5,u,7,3>: Cost 1 vtrnr RHS, LHS + 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS + 2040971926U, // <5,u,7,5>: Cost 2 vtrnr RHS, <0,u,1,5> + 118708378U, // <5,u,7,6>: Cost 1 vrev RHS + 967233065U, // <5,u,7,7>: Cost 1 vtrnr RHS, RHS + 967230114U, // <5,u,7,u>: Cost 1 vtrnr RHS, LHS + 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS + 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS + 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS + 698802845U, // <5,u,u,3>: Cost 1 vuzpr RHS, LHS + 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS + 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS + 118716571U, // <5,u,u,6>: Cost 1 vrev RHS + 698805801U, // <5,u,u,7>: Cost 1 vuzpr RHS, RHS + 698802850U, // <5,u,u,u>: Cost 1 vuzpr RHS, LHS + 2128150528U, // <6,0,0,0>: Cost 2 ins , lane 0 + 2121523201U, // <6,0,0,1>: Cost 2 ins <6,u,0,1>, lane 1 + 1718206566U, // <6,0,0,2>: Cost 2 vuzpl <6,7,0,1>, LHS + 2852933922U, // <6,0,0,3>: Cost 3 vuzpr <5,6,7,0>, <6,0,1,3> + 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6> + 2852934680U, // <6,0,0,5>: Cost 3 vuzpr <5,6,7,0>, <7,0,4,5> + 2852934690U, // <6,0,0,6>: Cost 3 vuzpr <5,6,7,0>, <7,0,5,6> + 2852933962U, // <6,0,0,7>: Cost 3 vuzpr <5,6,7,0>, <6,0,5,7> + 1718206620U, // <6,0,0,u>: Cost 2 vuzpl <6,7,0,1>, LHS + 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS + 2128232448U, // <6,0,1,1>: Cost 2 ins , lane 0 + 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS + 1779187814U, // <6,0,1,3>: Cost 2 vuzpr <5,6,7,0>, LHS + 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS + 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1> + 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1> + 2791949566U, // <6,0,1,7>: Cost 3 vuzpl <6,7,0,1>, <1,6,7,0> + 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS + 1504280678U, // <6,0,2,0>: Cost 2 vext1 <4,6,0,2>, LHS + 1849639014U, // <6,0,2,1>: Cost 2 vzipl <6,2,7,3>, LHS + 2128314368U, // <6,0,2,2>: Cost 2 ins , lane 0 + 2128322560U, // <6,0,2,3>: Cost 2 ins , lane 0 + 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6> + 2578026192U, // <6,0,2,5>: Cost 3 vext1 <4,6,0,2>, <5,1,7,3> + 2578026792U, // <6,0,2,6>: Cost 3 vext1 <4,6,0,2>, <6,0,2,0> + 2578027514U, // <6,0,2,7>: Cost 3 vext1 <4,6,0,2>, <7,0,1,2> + 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6> + 3202113536U, // <6,0,3,0>: Cost 3 ins , lane 0 + 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4> + 2128388096U, // <6,0,3,2>: Cost 2 ins , lane 0 + 2852930520U, // <6,0,3,3>: Cost 3 vuzpr <5,6,7,0>, <1,3,1,3> + 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6> + 2852931278U, // <6,0,3,5>: Cost 3 vuzpr <5,6,7,0>, <2,3,4,5> + 3190587394U, // <6,0,3,6>: Cost 3 ins <6,0,u,6>, lane 2 + 2852930560U, // <6,0,3,7>: Cost 3 vuzpr <5,6,7,0>, <1,3,5,7> + 2128388096U, // <6,0,3,u>: Cost 2 ins , lane 0 + 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6> + 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5> + 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6> + 3195576321U, // <6,0,4,3>: Cost 3 ins <6,u,4,3>, lane 1 + 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6> + 2121850881U, // <6,0,4,5>: Cost 2 ins <6,u,4,5>, lane 1 + 1718209846U, // <6,0,4,6>: Cost 2 vuzpl <6,7,0,1>, RHS + 3195609089U, // <6,0,4,7>: Cost 3 ins <6,u,4,7>, lane 1 + 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6> + 3202260992U, // <6,0,5,0>: Cost 3 ins , lane 0 + 2128527360U, // <6,0,5,1>: Cost 2 ins , lane 0 + 3056156774U, // <6,0,5,2>: Cost 3 vtrnl <6,0,5,7>, LHS + 3190562818U, // <6,0,5,3>: Cost 3 ins <6,0,u,3>, lane 2 + 3058802892U, // <6,0,5,4>: Cost 3 vtrnl <6,4,5,6>, <0,2,4,6> + 2852933636U, // <6,0,5,5>: Cost 3 vuzpr <5,6,7,0>, <5,5,5,5> + 2852932908U, // <6,0,5,6>: Cost 3 vuzpr <5,6,7,0>, <4,5,5,6> + 1779191094U, // <6,0,5,7>: Cost 2 vuzpr <5,6,7,0>, RHS + 1779191095U, // <6,0,5,u>: Cost 2 vuzpr <5,6,7,0>, RHS + 1779191906U, // <6,0,6,0>: Cost 2 vuzpr <5,6,7,0>, <5,6,7,0> + 1852244070U, // <6,0,6,1>: Cost 2 vzipl <6,6,6,6>, LHS + 1986461798U, // <6,0,6,2>: Cost 2 vtrnl <6,6,6,6>, LHS + 3195723777U, // <6,0,6,3>: Cost 3 ins <6,u,6,3>, lane 1 + 2852933734U, // <6,0,6,4>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,4> + 3195740161U, // <6,0,6,5>: Cost 3 ins <6,u,6,5>, lane 1 + 2122006529U, // <6,0,6,6>: Cost 2 ins <6,u,6,6>, lane 1 + 2128650240U, // <6,0,6,7>: Cost 2 ins , lane 0 + 1852244637U, // <6,0,6,u>: Cost 2 vzipl <6,6,6,6>, LHS + 1906753536U, // <6,0,7,0>: Cost 2 vzipr RHS, <0,0,0,0> + 1906755238U, // <6,0,7,1>: Cost 2 vzipr RHS, <2,3,0,1> + 1906753700U, // <6,0,7,2>: Cost 2 vzipr RHS, <0,2,0,2> + 2122055681U, // <6,0,7,3>: Cost 2 ins <6,u,7,3>, lane 1 + 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS + 2980496418U, // <6,0,7,5>: Cost 3 vzipr RHS, <1,4,0,5> + 2980495690U, // <6,0,7,6>: Cost 3 vzipr RHS, <0,4,0,6> + 2122088449U, // <6,0,7,7>: Cost 2 ins <6,u,7,7>, lane 1 + 1906753706U, // <6,0,7,u>: Cost 2 vzipr RHS, <0,2,0,u> + 1906761728U, // <6,0,u,0>: Cost 2 vzipr RHS, <0,0,0,0> + 1906763430U, // <6,0,u,1>: Cost 2 vzipr RHS, <2,3,0,1> + 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS + 1779188381U, // <6,0,u,3>: Cost 2 vuzpr <5,6,7,0>, LHS + 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6> + 2121850881U, // <6,0,u,5>: Cost 2 ins <6,u,4,5>, lane 1 + 1718212762U, // <6,0,u,6>: Cost 2 vuzpl <6,7,0,1>, RHS + 1779191337U, // <6,0,u,7>: Cost 2 vuzpr <5,6,7,0>, RHS + 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS + 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS + 2121523201U, // <6,1,0,1>: Cost 2 ins <6,u,0,1>, lane 1 + 2846673046U, // <6,1,0,2>: Cost 3 vuzpr <4,6,3,1>, <3,0,1,2> + 2047623270U, // <6,1,0,3>: Cost 2 vtrnr <5,6,7,0>, LHS + 2787385548U, // <6,1,0,4>: Cost 3 vuzpl <6,0,1,2>, <0,2,4,6> + 3060384768U, // <6,1,0,5>: Cost 3 vtrnl <6,7,0,1>, <1,3,5,7> + 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1> + 3060385022U, // <6,1,0,7>: Cost 3 vtrnl <6,7,0,1>, <1,6,7,0> + 2047623275U, // <6,1,0,u>: Cost 2 vtrnr <5,6,7,0>, LHS + 2578088038U, // <6,1,1,0>: Cost 3 vext1 <4,6,1,1>, LHS + 2128896000U, // <6,1,1,1>: Cost 2 ins , lane 0 + 2981778426U, // <6,1,1,2>: Cost 3 vzipr <4,7,6,1>, <7,0,1,2> + 2128912384U, // <6,1,1,3>: Cost 2 ins , lane 0 + 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6> + 3202670592U, // <6,1,1,5>: Cost 3 ins , lane 0 + 2691482470U, // <6,1,1,6>: Cost 3 vext3 <1,1,6,6>, <1,1,6,6> + 2980449545U, // <6,1,1,7>: Cost 3 vzipr <4,5,6,1>, <4,5,1,7> + 2128896000U, // <6,1,1,u>: Cost 2 ins , lane 0 + 2128961536U, // <6,1,2,0>: Cost 2 ins , lane 0 + 2128969728U, // <6,1,2,1>: Cost 2 ins , lane 0 + 2128977920U, // <6,1,2,2>: Cost 2 ins , lane 0 + 1055244288U, // <6,1,2,3>: Cost 1 ins LHS, lane 0 + 2128994304U, // <6,1,2,4>: Cost 2 ins , lane 0 + 2129002496U, // <6,1,2,5>: Cost 2 ins , lane 0 + 2129010688U, // <6,1,2,6>: Cost 2 ins , lane 0 + 2129018880U, // <6,1,2,7>: Cost 2 ins , lane 0 + 1055244288U, // <6,1,2,u>: Cost 1 ins LHS, lane 0 + 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS + 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3> + 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6> + 2129059840U, // <6,1,3,3>: Cost 2 ins , lane 0 + 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6> + 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7> + 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3> + 2953923849U, // <6,1,3,7>: Cost 3 vzipr <0,1,6,3>, <4,5,1,7> + 2129059840U, // <6,1,3,u>: Cost 2 ins , lane 0 + 2788724044U, // <6,1,4,0>: Cost 3 vuzpl <6,2,1,3>, <4,6,0,2> + 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6> + 3195568129U, // <6,1,4,2>: Cost 3 ins <6,u,4,2>, lane 1 + 2047656038U, // <6,1,4,3>: Cost 2 vtrnr <5,6,7,4>, LHS + 2791378292U, // <6,1,4,4>: Cost 3 vuzpl <6,6,1,3>, <4,6,4,6> + 2121850881U, // <6,1,4,5>: Cost 2 ins <6,u,4,5>, lane 1 + 2834506076U, // <6,1,4,6>: Cost 3 vuzpr <2,6,0,1>, <0,4,2,6> + 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1> + 2047656043U, // <6,1,4,u>: Cost 2 vtrnr <5,6,7,4>, LHS + 2578120806U, // <6,1,5,0>: Cost 3 vext1 <4,6,1,5>, LHS + 2578121728U, // <6,1,5,1>: Cost 3 vext1 <4,6,1,5>, <1,3,5,7> + 3202940928U, // <6,1,5,2>: Cost 3 ins , lane 0 + 2129207296U, // <6,1,5,3>: Cost 2 ins , lane 0 + 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6> + 3202965504U, // <6,1,5,5>: Cost 3 ins , lane 0 + 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0> + 2834509110U, // <6,1,5,7>: Cost 3 vuzpr <2,6,0,1>, RHS + 2129207296U, // <6,1,5,u>: Cost 2 ins , lane 0 + 2925986550U, // <6,1,6,0>: Cost 3 vzipl <6,6,6,6>, <1,0,3,2> + 2834507673U, // <6,1,6,1>: Cost 3 vuzpr <2,6,0,1>, <2,6,0,1> + 2982480022U, // <6,1,6,2>: Cost 3 vzipr <4,u,6,6>, <3,0,1,2> + 2041479270U, // <6,1,6,3>: Cost 2 vtrnr <4,6,4,6>, LHS + 2602020150U, // <6,1,6,4>: Cost 3 vext1 , RHS + 2982478162U, // <6,1,6,5>: Cost 3 vzipr <4,u,6,6>, <0,4,1,5> + 2122006529U, // <6,1,6,6>: Cost 2 ins <6,u,6,6>, lane 1 + 2129313792U, // <6,1,6,7>: Cost 2 ins , lane 0 + 2041479275U, // <6,1,6,u>: Cost 2 vtrnr <4,6,4,6>, LHS + 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS + 1906753546U, // <6,1,7,1>: Cost 2 vzipr RHS, <0,0,1,1> + 1906755734U, // <6,1,7,2>: Cost 2 vzipr RHS, <3,0,1,2> + 2029469798U, // <6,1,7,3>: Cost 2 vtrnr <2,6,3,7>, LHS + 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS + 1906753874U, // <6,1,7,5>: Cost 2 vzipr RHS, <0,4,1,5> + 2980495537U, // <6,1,7,6>: Cost 3 vzipr RHS, <0,2,1,6> + 2122088449U, // <6,1,7,7>: Cost 2 ins <6,u,7,7>, lane 1 + 2029469803U, // <6,1,7,u>: Cost 2 vtrnr <2,6,3,7>, LHS + 2128961536U, // <6,1,u,0>: Cost 2 ins , lane 0 + 1906761738U, // <6,1,u,1>: Cost 2 vzipr RHS, <0,0,1,1> + 1906763926U, // <6,1,u,2>: Cost 2 vzipr RHS, <3,0,1,2> + 1055244288U, // <6,1,u,3>: Cost 1 ins LHS, lane 0 + 2128994304U, // <6,1,u,4>: Cost 2 ins , lane 0 + 1906762066U, // <6,1,u,5>: Cost 2 vzipr RHS, <0,4,1,5> + 2129010688U, // <6,1,u,6>: Cost 2 ins , lane 0 + 2122088449U, // <6,1,u,7>: Cost 2 ins <6,u,7,7>, lane 1 + 1055244288U, // <6,1,u,u>: Cost 1 ins LHS, lane 0 + 2846457856U, // <6,2,0,0>: Cost 3 vuzpr <4,6,0,2>, <0,0,0,0> + 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS + 2129494016U, // <6,2,0,2>: Cost 2 ins , lane 0 + 2118148098U, // <6,2,0,3>: Cost 2 ins <6,2,u,3>, lane 2 + 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6> + 3195297793U, // <6,2,0,5>: Cost 3 ins <6,u,0,5>, lane 1 + 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4> + 3195314177U, // <6,2,0,7>: Cost 3 ins <6,u,0,7>, lane 1 + 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS + 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1> + 2846458676U, // <6,2,1,1>: Cost 3 vuzpr <4,6,0,2>, <1,1,1,1> + 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0> + 1772716134U, // <6,2,1,3>: Cost 2 vuzpr <4,6,0,2>, LHS + 3191414787U, // <6,2,1,4>: Cost 3 ins <6,2,1,u>, lane 3 + 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7> + 3114885324U, // <6,2,1,6>: Cost 3 vtrnr <4,6,0,1>, <0,2,4,6> + 3191922690U, // <6,2,1,7>: Cost 3 ins <6,2,u,7>, lane 2 + 1772716139U, // <6,2,1,u>: Cost 2 vuzpr <4,6,0,2>, LHS + 2846458774U, // <6,2,2,0>: Cost 3 vuzpr <4,6,0,2>, <1,2,3,0> + 3195412481U, // <6,2,2,1>: Cost 3 ins <6,u,2,1>, lane 1 + 2129641472U, // <6,2,2,2>: Cost 2 ins , lane 0 + 1908703334U, // <6,2,2,3>: Cost 2 vzipr <4,u,6,2>, LHS + 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6> + 3195445249U, // <6,2,2,5>: Cost 3 ins <6,u,2,5>, lane 1 + 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6> + 2846462444U, // <6,2,2,7>: Cost 3 vuzpr <4,6,0,2>, <6,2,5,7> + 1908703339U, // <6,2,2,u>: Cost 2 vzipr <4,u,6,2>, LHS + 2129698816U, // <6,2,3,0>: Cost 2 ins , lane 0 + 2230618020U, // <6,2,3,1>: Cost 3 vrev <2,6,1,3> + 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6> + 2129723392U, // <6,2,3,3>: Cost 2 ins , lane 0 + 2129731584U, // <6,2,3,4>: Cost 2 ins , lane 0 + 2846459598U, // <6,2,3,5>: Cost 3 vuzpr <4,6,0,2>, <2,3,4,5> + 2966528348U, // <6,2,3,6>: Cost 3 vzipr <2,2,6,3>, <0,4,2,6> + 2846458880U, // <6,2,3,7>: Cost 3 vuzpr <4,6,0,2>, <1,3,5,7> + 2129698816U, // <6,2,3,u>: Cost 2 ins , lane 0 + 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2> + 3191873538U, // <6,2,4,1>: Cost 3 ins <6,2,u,1>, lane 2 + 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6> + 2118148098U, // <6,2,4,3>: Cost 2 ins <6,2,u,3>, lane 2 + 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6> + 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS + 2129821696U, // <6,2,4,6>: Cost 2 ins , lane 0 + 3195609089U, // <6,2,4,7>: Cost 3 ins <6,u,4,7>, lane 1 + 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2> + 3191709699U, // <6,2,5,0>: Cost 3 ins <6,2,5,u>, lane 3 + 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3> + 3203604480U, // <6,2,5,2>: Cost 3 ins , lane 0 + 2118148098U, // <6,2,5,3>: Cost 2 ins <6,2,u,3>, lane 2 + 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5> + 2846461956U, // <6,2,5,5>: Cost 3 vuzpr <4,6,0,2>, <5,5,5,5> + 3115213004U, // <6,2,5,6>: Cost 3 vtrnr <4,6,4,5>, <0,2,4,6> + 1772719414U, // <6,2,5,7>: Cost 2 vuzpr <4,6,0,2>, RHS + 1772719415U, // <6,2,5,u>: Cost 2 vuzpr <4,6,0,2>, RHS + 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1> + 3195707393U, // <6,2,6,1>: Cost 3 ins <6,u,6,1>, lane 1 + 1772719436U, // <6,2,6,2>: Cost 2 vuzpr <4,6,0,2>, <4,6,0,2> + 1908736102U, // <6,2,6,3>: Cost 2 vzipr <4,u,6,6>, LHS + 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5> + 3195740161U, // <6,2,6,5>: Cost 3 ins <6,u,6,5>, lane 1 + 2122006529U, // <6,2,6,6>: Cost 2 ins <6,u,6,6>, lane 1 + 2118189061U, // <6,2,6,7>: Cost 2 ins <6,2,u,u>, lane 5 + 1908736107U, // <6,2,6,u>: Cost 2 vzipr <4,u,6,6>, LHS + 2118115331U, // <6,2,7,0>: Cost 2 ins <6,2,7,u>, lane 3 + 2118115331U, // <6,2,7,1>: Cost 2 ins <6,2,7,u>, lane 3 + 1906753556U, // <6,2,7,2>: Cost 2 vzipr RHS, <0,0,2,2> + 833011814U, // <6,2,7,3>: Cost 1 vzipr RHS, LHS + 2118115331U, // <6,2,7,4>: Cost 2 ins <6,2,7,u>, lane 3 + 2118115331U, // <6,2,7,5>: Cost 2 ins <6,2,7,u>, lane 3 + 1906753884U, // <6,2,7,6>: Cost 2 vzipr RHS, <0,4,2,6> + 2122088449U, // <6,2,7,7>: Cost 2 ins <6,u,7,7>, lane 1 + 833011819U, // <6,2,7,u>: Cost 1 vzipr RHS, LHS + 2129698816U, // <6,2,u,0>: Cost 2 ins , lane 0 + 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS + 1906761748U, // <6,2,u,2>: Cost 2 vzipr RHS, <0,0,2,2> + 833020006U, // <6,2,u,3>: Cost 1 vzipr RHS, LHS + 2129731584U, // <6,2,u,4>: Cost 2 ins , lane 0 + 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS + 1906762076U, // <6,2,u,6>: Cost 2 vzipr RHS, <0,4,2,6> + 1772719657U, // <6,2,u,7>: Cost 2 vuzpr <4,6,0,2>, RHS + 833020011U, // <6,2,u,u>: Cost 1 vzipr RHS, LHS + 3203883008U, // <6,3,0,0>: Cost 3 ins , lane 0 + 2130149376U, // <6,3,0,1>: Cost 2 ins , lane 0 + 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4> + 3121365976U, // <6,3,0,3>: Cost 3 vtrnr <5,6,7,0>, <1,3,1,3> + 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2> + 3121366734U, // <6,3,0,5>: Cost 3 vtrnr <5,6,7,0>, <2,3,4,5> + 3195305985U, // <6,3,0,6>: Cost 3 ins <6,u,0,6>, lane 1 + 3121366016U, // <6,3,0,7>: Cost 3 vtrnr <5,6,7,0>, <1,3,5,7> + 2130149376U, // <6,3,0,u>: Cost 2 ins , lane 0 + 2578235494U, // <6,3,1,0>: Cost 3 vext1 <4,6,3,1>, LHS + 3203964928U, // <6,3,1,1>: Cost 3 ins , lane 0 + 3203973120U, // <6,3,1,2>: Cost 3 ins , lane 0 + 2130239488U, // <6,3,1,3>: Cost 2 ins , lane 0 + 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> + 3203997696U, // <6,3,1,5>: Cost 3 ins , lane 0 + 2822725737U, // <6,3,1,6>: Cost 3 vuzpr <0,6,2,3>, <0,1,2,6> + 2970494906U, // <6,3,1,7>: Cost 3 vzipr <2,u,6,1>, <2,6,3,7> + 2130239488U, // <6,3,1,u>: Cost 2 ins , lane 0 + 2982445974U, // <6,3,2,0>: Cost 3 vzipr <4,u,6,2>, <1,2,3,0> + 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> + 2630985357U, // <6,3,2,2>: Cost 3 vext2 <2,2,6,3>, <2,2,6,3> + 2130313216U, // <6,3,2,3>: Cost 2 ins , lane 0 + 2982445978U, // <6,3,2,4>: Cost 3 vzipr <4,u,6,2>, <1,2,3,4> + 3114895054U, // <6,3,2,5>: Cost 3 vtrnr <4,6,0,2>, <2,3,4,5> + 2834596044U, // <6,3,2,6>: Cost 3 vuzpr <2,6,1,3>, <0,2,4,6> + 3114894336U, // <6,3,2,7>: Cost 3 vtrnr <4,6,0,2>, <1,3,5,7> + 2130313216U, // <6,3,2,u>: Cost 2 ins , lane 0 + 2578251878U, // <6,3,3,0>: Cost 3 vext1 <4,6,3,3>, LHS + 2792163478U, // <6,3,3,1>: Cost 3 vuzpl <6,7,3,0>, <3,0,1,2> + 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3> + 2130386944U, // <6,3,3,3>: Cost 2 ins , lane 0 + 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6> + 2792196610U, // <6,3,3,5>: Cost 3 vuzpl <6,7,3,4>, <3,4,5,6> + 2590200602U, // <6,3,3,6>: Cost 3 vext1 <6,6,3,3>, <6,6,3,3> + 2972501946U, // <6,3,3,7>: Cost 3 vzipr <3,2,6,3>, <2,6,3,7> + 2130386944U, // <6,3,3,u>: Cost 2 ins , lane 0 + 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS + 2705050078U, // <6,3,4,1>: Cost 3 vext3 <3,4,1,6>, <3,4,1,6> + 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3> + 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6> + 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS + 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6> + 2846540124U, // <6,3,4,6>: Cost 3 vuzpr <4,6,1,3>, <0,4,2,6> + 3121398784U, // <6,3,4,7>: Cost 3 vtrnr <5,6,7,4>, <1,3,5,7> + 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6> + 2578268262U, // <6,3,5,0>: Cost 3 vext1 <4,6,3,5>, LHS + 3204259840U, // <6,3,5,1>: Cost 3 ins , lane 0 + 2648903448U, // <6,3,5,2>: Cost 3 vext2 <5,2,6,3>, <5,2,6,3> + 2578270722U, // <6,3,5,3>: Cost 3 vext1 <4,6,3,5>, <3,4,5,6> + 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6> + 3204292608U, // <6,3,5,5>: Cost 3 ins , lane 0 + 3204300800U, // <6,3,5,6>: Cost 3 ins , lane 0 + 2130567168U, // <6,3,5,7>: Cost 2 ins , lane 0 + 2130567168U, // <6,3,5,u>: Cost 2 ins , lane 0 + 2982478742U, // <6,3,6,0>: Cost 3 vzipr <4,u,6,6>, <1,2,3,0> + 3115222694U, // <6,3,6,1>: Cost 3 vtrnr <4,6,4,6>, <2,3,0,1> + 2982478582U, // <6,3,6,2>: Cost 3 vzipr <4,u,6,6>, <1,0,3,2> + 1748984315U, // <6,3,6,3>: Cost 2 vuzpr <0,6,2,3>, <0,6,2,3> + 2982478746U, // <6,3,6,4>: Cost 3 vzipr <4,u,6,6>, <1,2,3,4> + 3115222734U, // <6,3,6,5>: Cost 3 vtrnr <4,6,4,6>, <2,3,4,5> + 2122006529U, // <6,3,6,6>: Cost 2 ins <6,u,6,6>, lane 1 + 2130640896U, // <6,3,6,7>: Cost 2 ins , lane 0 + 1748984315U, // <6,3,6,u>: Cost 2 vuzpr <0,6,2,3>, <0,6,2,3> + 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS + 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7> + 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7> + 1906754376U, // <6,3,7,3>: Cost 2 vzipr RHS, <1,1,3,3> + 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS + 3103213262U, // <6,3,7,5>: Cost 3 vtrnr <2,6,3,7>, <2,3,4,5> + 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3> + 1906754704U, // <6,3,7,7>: Cost 2 vzipr RHS, <1,5,3,7> + 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS + 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS + 2130149376U, // <6,3,u,1>: Cost 2 ins , lane 0 + 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u> + 1906762568U, // <6,3,u,3>: Cost 2 vzipr RHS, <1,1,3,3> + 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS + 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6> + 2122006529U, // <6,3,u,6>: Cost 2 ins <6,u,6,6>, lane 1 + 1906762896U, // <6,3,u,7>: Cost 2 vzipr RHS, <1,5,3,7> + 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS + 2242465098U, // <6,4,0,0>: Cost 3 vrev <4,6,0,0> + 2121523201U, // <6,4,0,1>: Cost 2 ins <6,u,0,1>, lane 1 + 1718534246U, // <6,4,0,2>: Cost 2 vuzpl <6,7,4,5>, LHS + 3195281409U, // <6,4,0,3>: Cost 3 ins <6,u,0,3>, lane 1 + 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6> + 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1> + 1986645302U, // <6,4,0,6>: Cost 2 vtrnl <6,7,0,1>, RHS + 3195314177U, // <6,4,0,7>: Cost 3 ins <6,u,0,7>, lane 1 + 1986645320U, // <6,4,0,u>: Cost 2 vtrnl <6,7,0,1>, RHS + 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1> + 2242547028U, // <6,4,1,1>: Cost 3 vrev <4,6,1,1> + 3204636672U, // <6,4,1,2>: Cost 3 ins , lane 0 + 1779220582U, // <6,4,1,3>: Cost 2 vuzpr <5,6,7,4>, LHS + 3059813748U, // <6,4,1,4>: Cost 3 vtrnl <6,6,1,3>, <4,6,4,6> + 2130919424U, // <6,4,1,5>: Cost 2 ins , lane 0 + 3102941532U, // <6,4,1,6>: Cost 3 vtrnr <2,6,0,1>, <0,4,2,6> + 2242989450U, // <6,4,1,7>: Cost 3 vrev <4,6,7,1> + 1779220587U, // <6,4,1,u>: Cost 2 vuzpr <5,6,7,4>, LHS + 1168739660U, // <6,4,2,0>: Cost 2 vrev <4,6,0,2> + 3195412481U, // <6,4,2,1>: Cost 3 ins <6,u,2,1>, lane 1 + 2242628958U, // <6,4,2,2>: Cost 3 vrev <4,6,2,2> + 2130976768U, // <6,4,2,3>: Cost 2 ins , lane 0 + 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4> + 1849642294U, // <6,4,2,5>: Cost 2 vzipl <6,2,7,3>, RHS + 2131001344U, // <6,4,2,6>: Cost 2 ins , lane 0 + 3195461633U, // <6,4,2,7>: Cost 3 ins <6,u,2,7>, lane 1 + 1169329556U, // <6,4,2,u>: Cost 2 vrev <4,6,u,2> + 3195478017U, // <6,4,3,0>: Cost 3 ins <6,u,3,0>, lane 1 + 2242563414U, // <6,4,3,1>: Cost 3 vrev <4,6,1,3> + 2242637151U, // <6,4,3,2>: Cost 3 vrev <4,6,2,3> + 2242710888U, // <6,4,3,3>: Cost 3 vrev <4,6,3,3> + 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6> + 2846623438U, // <6,4,3,5>: Cost 3 vuzpr <4,6,2,4>, <2,3,4,5> + 2965864652U, // <6,4,3,6>: Cost 3 vzipr <2,1,6,3>, <0,2,4,6> + 2852963328U, // <6,4,3,7>: Cost 3 vuzpr <5,6,7,4>, <1,3,5,7> + 2243079573U, // <6,4,3,u>: Cost 3 vrev <4,6,u,3> + 2242497870U, // <6,4,4,0>: Cost 3 vrev <4,6,0,4> + 2852967732U, // <6,4,4,1>: Cost 3 vuzpr <5,6,7,4>, <7,4,0,1> + 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4> + 2852967014U, // <6,4,4,3>: Cost 3 vuzpr <5,6,7,4>, <6,4,1,3> + 2131132416U, // <6,4,4,4>: Cost 2 ins , lane 0 + 2121850881U, // <6,4,4,5>: Cost 2 ins <6,u,4,5>, lane 1 + 1718537526U, // <6,4,4,6>: Cost 2 vuzpl <6,7,4,5>, RHS + 2852967054U, // <6,4,4,7>: Cost 3 vuzpr <5,6,7,4>, <6,4,5,7> + 1718537544U, // <6,4,4,u>: Cost 2 vuzpl <6,7,4,5>, RHS + 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS + 2242579800U, // <6,4,5,1>: Cost 3 vrev <4,6,1,5> + 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5> + 2242727274U, // <6,4,5,3>: Cost 3 vrev <4,6,3,5> + 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS + 2131214336U, // <6,4,5,5>: Cost 2 ins , lane 0 + 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS + 1779223862U, // <6,4,5,7>: Cost 2 vuzpr <5,6,7,4>, RHS + 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS + 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS + 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2> + 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2> + 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2> + 1169067380U, // <6,4,6,4>: Cost 2 vrev <4,6,4,6> + 1852247350U, // <6,4,6,5>: Cost 2 vzipl <6,6,6,6>, RHS + 1986465078U, // <6,4,6,6>: Cost 2 vtrnl <6,6,6,6>, RHS + 2131304448U, // <6,4,6,7>: Cost 2 ins , lane 0 + 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS + 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS + 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4> + 2980495398U, // <6,4,7,2>: Cost 3 vzipr RHS, <0,0,4,2> + 2122055681U, // <6,4,7,3>: Cost 2 ins <6,u,7,3>, lane 1 + 1906756816U, // <6,4,7,4>: Cost 2 vzipr RHS, <4,4,4,4> + 1906755278U, // <6,4,7,5>: Cost 2 vzipr RHS, <2,3,4,5> + 1906753740U, // <6,4,7,6>: Cost 2 vzipr RHS, <0,2,4,6> + 2122088449U, // <6,4,7,7>: Cost 2 ins <6,u,7,7>, lane 1 + 1906753742U, // <6,4,7,u>: Cost 2 vzipr RHS, <0,2,4,u> + 1168788818U, // <6,4,u,0>: Cost 2 vrev <4,6,0,u> + 2121523201U, // <6,4,u,1>: Cost 2 ins <6,u,0,1>, lane 1 + 1718540078U, // <6,4,u,2>: Cost 2 vuzpl <6,7,4,5>, LHS + 1779221149U, // <6,4,u,3>: Cost 2 vuzpr <5,6,7,4>, LHS + 1906765008U, // <6,4,u,4>: Cost 2 vzipr RHS, <4,4,4,4> + 1906763470U, // <6,4,u,5>: Cost 2 vzipr RHS, <2,3,4,5> + 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS + 1779224105U, // <6,4,u,7>: Cost 2 vuzpr <5,6,7,4>, RHS + 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS + 3195256833U, // <6,5,0,0>: Cost 3 ins <6,u,0,0>, lane 1 + 2121523201U, // <6,5,0,1>: Cost 2 ins <6,u,0,1>, lane 1 + 2787721318U, // <6,5,0,2>: Cost 3 vuzpl <6,0,5,7>, LHS + 3195281409U, // <6,5,0,3>: Cost 3 ins <6,u,0,3>, lane 1 + 2790367436U, // <6,5,0,4>: Cost 3 vuzpl <6,4,5,6>, <0,2,4,6> + 3121369092U, // <6,5,0,5>: Cost 3 vtrnr <5,6,7,0>, <5,5,5,5> + 2980440578U, // <6,5,0,6>: Cost 3 vzipr <4,5,6,0>, <3,4,5,6> + 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0> + 2047626551U, // <6,5,0,u>: Cost 2 vtrnr <5,6,7,0>, RHS + 2578382950U, // <6,5,1,0>: Cost 3 vext1 <4,6,5,1>, LHS + 3205292032U, // <6,5,1,1>: Cost 3 ins , lane 0 + 3195346945U, // <6,5,1,2>: Cost 3 ins <6,u,1,2>, lane 1 + 2834833510U, // <6,5,1,3>: Cost 3 vuzpr <2,6,4,5>, LHS + 2578386296U, // <6,5,1,4>: Cost 3 vext1 <4,6,5,1>, <4,6,5,1> + 2578387072U, // <6,5,1,5>: Cost 3 vext1 <4,6,5,1>, <5,7,1,3> + 2922205282U, // <6,5,1,6>: Cost 3 vzipl <6,1,0,3>, <5,6,7,0> + 2131599360U, // <6,5,1,7>: Cost 2 ins , lane 0 + 2131599360U, // <6,5,1,u>: Cost 2 ins , lane 0 + 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS + 2982448018U, // <6,5,2,1>: Cost 3 vzipr <4,u,6,2>, <4,0,5,1> + 3195420673U, // <6,5,2,2>: Cost 3 ins <6,u,2,2>, lane 1 + 2131640320U, // <6,5,2,3>: Cost 2 ins , lane 0 + 2578394489U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, <4,6,5,2> + 3114897412U, // <6,5,2,5>: Cost 3 vtrnr <4,6,0,2>, <5,5,5,5> + 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7> + 2041154870U, // <6,5,2,7>: Cost 2 vtrnr <4,6,0,2>, RHS + 2041154871U, // <6,5,2,u>: Cost 2 vtrnr <4,6,0,2>, RHS + 3195478017U, // <6,5,3,0>: Cost 3 ins <6,u,3,0>, lane 1 + 3205439488U, // <6,5,3,1>: Cost 3 ins , lane 0 + 3091164465U, // <6,5,3,2>: Cost 3 vtrnr <0,6,2,3>, <4,5,6,2> + 3195502593U, // <6,5,3,3>: Cost 3 ins <6,u,3,3>, lane 1 + 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6> + 3205472256U, // <6,5,3,5>: Cost 3 ins , lane 0 + 2980465154U, // <6,5,3,6>: Cost 3 vzipr <4,5,6,3>, <3,4,5,6> + 2131746816U, // <6,5,3,7>: Cost 2 ins , lane 0 + 2131746816U, // <6,5,3,u>: Cost 2 ins , lane 0 + 2789051724U, // <6,5,4,0>: Cost 3 vuzpl <6,2,5,7>, <4,6,0,2> + 3060715648U, // <6,5,4,1>: Cost 3 vtrnl <6,7,4,5>, <5,7,1,3> + 3195568129U, // <6,5,4,2>: Cost 3 ins <6,u,4,2>, lane 1 + 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5> + 2791705972U, // <6,5,4,4>: Cost 3 vuzpl <6,6,5,7>, <4,6,4,6> + 2121850881U, // <6,5,4,5>: Cost 2 ins <6,u,4,5>, lane 1 + 2834833756U, // <6,5,4,6>: Cost 3 vuzpr <2,6,4,5>, <0,4,2,6> + 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6> + 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6> + 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS + 3006363382U, // <6,5,5,1>: Cost 3 vzipr , + 3205595136U, // <6,5,5,2>: Cost 3 ins , lane 0 + 2980479105U, // <6,5,5,3>: Cost 3 vzipr <4,5,6,5>, <0,1,5,3> + 2578419068U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, <4,6,5,5> + 2131877888U, // <6,5,5,5>: Cost 2 ins , lane 0 + 2979154434U, // <6,5,5,6>: Cost 3 vzipr <4,3,6,5>, <3,4,5,6> + 2131894272U, // <6,5,5,7>: Cost 2 ins , lane 0 + 2131877888U, // <6,5,5,u>: Cost 2 ins , lane 0 + 2131910656U, // <6,5,6,0>: Cost 2 ins , lane 0 + 2131918848U, // <6,5,6,1>: Cost 2 ins , lane 0 + 2131927040U, // <6,5,6,2>: Cost 2 ins , lane 0 + 2131935232U, // <6,5,6,3>: Cost 2 ins , lane 0 + 2131943424U, // <6,5,6,4>: Cost 2 ins , lane 0 + 2131951616U, // <6,5,6,5>: Cost 2 ins , lane 0 + 2131959808U, // <6,5,6,6>: Cost 2 ins , lane 0 + 1058226176U, // <6,5,6,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <6,5,6,u>: Cost 1 ins RHS, lane 0 + 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS + 1906756498U, // <6,5,7,1>: Cost 2 vzipr RHS, <4,0,5,1> + 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7> + 2122055681U, // <6,5,7,3>: Cost 2 ins <6,u,7,3>, lane 1 + 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS + 1906756826U, // <6,5,7,5>: Cost 2 vzipr RHS, <4,4,5,5> + 1906756098U, // <6,5,7,6>: Cost 2 vzipr RHS, <3,4,5,6> + 2029473078U, // <6,5,7,7>: Cost 2 vtrnr <2,6,3,7>, RHS + 2029473079U, // <6,5,7,u>: Cost 2 vtrnr <2,6,3,7>, RHS + 2131910656U, // <6,5,u,0>: Cost 2 ins , lane 0 + 1906764690U, // <6,5,u,1>: Cost 2 vzipr RHS, <4,0,5,1> + 2131927040U, // <6,5,u,2>: Cost 2 ins , lane 0 + 2122055681U, // <6,5,u,3>: Cost 2 ins <6,u,7,3>, lane 1 + 2131943424U, // <6,5,u,4>: Cost 2 ins , lane 0 + 1906765018U, // <6,5,u,5>: Cost 2 vzipr RHS, <4,4,5,5> + 1906764290U, // <6,5,u,6>: Cost 2 vzipr RHS, <3,4,5,6> + 1058226176U, // <6,5,u,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <6,5,u,u>: Cost 1 ins RHS, lane 0 + 2047627362U, // <6,6,0,0>: Cost 2 vtrnr <5,6,7,0>, <5,6,7,0> + 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS + 1718026342U, // <6,6,0,2>: Cost 2 vuzpl <6,6,6,6>, LHS + 3195281409U, // <6,6,0,3>: Cost 3 ins <6,u,0,3>, lane 1 + 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2> + 3195297793U, // <6,6,0,5>: Cost 3 ins <6,u,0,5>, lane 1 + 2120826882U, // <6,6,0,6>: Cost 2 ins <6,6,u,6>, lane 2 + 2120835074U, // <6,6,0,7>: Cost 2 ins <6,6,u,7>, lane 2 + 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS + 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2> + 1906707760U, // <6,6,1,1>: Cost 2 vzipr <4,5,6,1>, <4,5,6,1> + 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0> + 1773043814U, // <6,6,1,3>: Cost 2 vuzpr <4,6,4,6>, LHS + 3194068995U, // <6,6,1,4>: Cost 3 ins <6,6,1,u>, lane 3 + 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7> + 2120826882U, // <6,6,1,6>: Cost 2 ins <6,6,u,6>, lane 2 + 2120835074U, // <6,6,1,7>: Cost 2 ins <6,6,u,7>, lane 2 + 1773043819U, // <6,6,1,u>: Cost 2 vuzpr <4,6,4,6>, LHS + 3114896750U, // <6,6,2,0>: Cost 3 vtrnr <4,6,0,2>, <4,6,4,0> + 3195412481U, // <6,6,2,1>: Cost 3 ins <6,u,2,1>, lane 1 + 2041154892U, // <6,6,2,2>: Cost 2 vtrnr <4,6,0,2>, <4,6,0,2> + 2120843269U, // <6,6,2,3>: Cost 2 ins <6,6,u,u>, lane 5 + 3114897510U, // <6,6,2,4>: Cost 3 vtrnr <4,6,0,2>, <5,6,7,4> + 3195445249U, // <6,6,2,5>: Cost 3 ins <6,u,2,5>, lane 1 + 2120826882U, // <6,6,2,6>: Cost 2 ins <6,6,u,6>, lane 2 + 1908706614U, // <6,6,2,7>: Cost 2 vzipr <4,u,6,2>, RHS + 1908706615U, // <6,6,2,u>: Cost 2 vzipr <4,u,6,2>, RHS + 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2> + 2846787238U, // <6,6,3,1>: Cost 3 vuzpr <4,6,4,6>, <2,3,0,1> + 3206111232U, // <6,6,3,2>: Cost 3 ins , lane 0 + 1880178826U, // <6,6,3,3>: Cost 2 vzipr <0,1,6,3>, <0,1,6,3> + 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5> + 2846787278U, // <6,6,3,5>: Cost 3 vuzpr <4,6,4,6>, <2,3,4,5> + 2120826882U, // <6,6,3,6>: Cost 2 ins <6,6,u,6>, lane 2 + 2132410368U, // <6,6,3,7>: Cost 2 ins , lane 0 + 2132410368U, // <6,6,3,u>: Cost 2 ins , lane 0 + 2846790288U, // <6,6,4,0>: Cost 3 vuzpr <4,6,4,6>, <6,4,6,0> + 3194527746U, // <6,6,4,1>: Cost 3 ins <6,6,u,1>, lane 2 + 2846788778U, // <6,6,4,2>: Cost 3 vuzpr <4,6,4,6>, <4,4,0,2> + 3195576321U, // <6,6,4,3>: Cost 3 ins <6,u,4,3>, lane 1 + 2047660134U, // <6,6,4,4>: Cost 2 vtrnr <5,6,7,4>, <5,6,7,4> + 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS + 1718029622U, // <6,6,4,6>: Cost 2 vuzpl <6,6,6,6>, RHS + 2120835074U, // <6,6,4,7>: Cost 2 ins <6,6,u,7>, lane 2 + 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6> + 3194363907U, // <6,6,5,0>: Cost 3 ins <6,6,5,u>, lane 3 + 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3> + 3206258688U, // <6,6,5,2>: Cost 3 ins , lane 0 + 3194544130U, // <6,6,5,3>: Cost 3 ins <6,6,u,3>, lane 2 + 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6> + 1906740532U, // <6,6,5,5>: Cost 2 vzipr <4,5,6,5>, <4,5,6,5> + 2120826882U, // <6,6,5,6>: Cost 2 ins <6,6,u,6>, lane 2 + 1773047094U, // <6,6,5,7>: Cost 2 vuzpr <4,6,4,6>, RHS + 1773047095U, // <6,6,5,u>: Cost 2 vuzpr <4,6,4,6>, RHS + 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS + 2120695811U, // <6,6,6,1>: Cost 2 ins <6,6,6,u>, lane 3 + 2120695811U, // <6,6,6,2>: Cost 2 ins <6,6,6,u>, lane 3 + 2120695811U, // <6,6,6,3>: Cost 2 ins <6,6,6,u>, lane 3 + 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS + 2120695811U, // <6,6,6,5>: Cost 2 ins <6,6,6,u>, lane 3 + 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS + 1908739382U, // <6,6,6,7>: Cost 2 vzipr <4,u,6,6>, RHS + 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS + 2132647936U, // <6,6,7,0>: Cost 2 ins , lane 0 + 2120769539U, // <6,6,7,1>: Cost 2 ins <6,6,7,u>, lane 3 + 1908747164U, // <6,6,7,2>: Cost 2 vzipr RHS, <4,0,6,2> + 2122055681U, // <6,6,7,3>: Cost 2 ins <6,u,7,3>, lane 1 + 2132680704U, // <6,6,7,4>: Cost 2 ins , lane 0 + 2120769539U, // <6,6,7,5>: Cost 2 ins <6,6,7,u>, lane 3 + 1906758456U, // <6,6,7,6>: Cost 2 vzipr RHS, <6,6,6,6> + 833015094U, // <6,6,7,7>: Cost 1 vzipr RHS, RHS + 833015095U, // <6,6,7,u>: Cost 1 vzipr RHS, RHS + 2047627362U, // <6,6,u,0>: Cost 2 vtrnr <5,6,7,0>, <5,6,7,0> + 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS + 1906764700U, // <6,6,u,2>: Cost 2 vzipr RHS, <4,0,6,2> + 1773044381U, // <6,6,u,3>: Cost 2 vuzpr <4,6,4,6>, LHS + 2047660134U, // <6,6,u,4>: Cost 2 vtrnr <5,6,7,4>, <5,6,7,4> + 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS + 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS + 833023286U, // <6,6,u,7>: Cost 1 vzipr RHS, RHS + 833023287U, // <6,6,u,u>: Cost 1 vzipr RHS, RHS + 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0> + 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS + 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2> + 2120916995U, // <6,7,0,3>: Cost 2 ins <6,7,0,u>, lane 3 + 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5> + 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0> + 2120916995U, // <6,7,0,6>: Cost 2 ins <6,7,0,u>, lane 3 + 2120916995U, // <6,7,0,7>: Cost 2 ins <6,7,0,u>, lane 3 + 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS + 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2> + 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1> + 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0> + 1761034342U, // <6,7,1,3>: Cost 2 vuzpr <2,6,3,7>, LHS + 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5> + 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7> + 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7> + 2121498626U, // <6,7,1,7>: Cost 2 ins <6,7,u,7>, lane 2 + 1761034347U, // <6,7,1,u>: Cost 2 vuzpr <2,6,3,7>, LHS + 2121064451U, // <6,7,2,0>: Cost 2 ins <6,7,2,u>, lane 3 + 2121449474U, // <6,7,2,1>: Cost 2 ins <6,7,u,1>, lane 2 + 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2> + 1059889156U, // <6,7,2,3>: Cost 1 ins LHS, lane 4 + 2121064451U, // <6,7,2,4>: Cost 2 ins <6,7,2,u>, lane 3 + 2121482242U, // <6,7,2,5>: Cost 2 ins <6,7,u,5>, lane 2 + 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7> + 2121498626U, // <6,7,2,7>: Cost 2 ins <6,7,u,7>, lane 2 + 1059889156U, // <6,7,2,u>: Cost 1 ins LHS, lane 4 + 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2> + 2121449474U, // <6,7,3,1>: Cost 2 ins <6,7,u,1>, lane 2 + 2133696516U, // <6,7,3,2>: Cost 2 ins , lane 4 + 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3> + 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6> + 2121482242U, // <6,7,3,5>: Cost 2 ins <6,7,u,5>, lane 2 + 2834777789U, // <6,7,3,6>: Cost 3 vuzpr <2,6,3,7>, <2,3,2,6> + 2133737476U, // <6,7,3,7>: Cost 2 ins , lane 4 + 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2> + 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1> + 2121449474U, // <6,7,4,1>: Cost 2 ins <6,7,u,1>, lane 2 + 2121211907U, // <6,7,4,2>: Cost 2 ins <6,7,4,u>, lane 3 + 2121211907U, // <6,7,4,3>: Cost 2 ins <6,7,4,u>, lane 3 + 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4> + 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS + 1573203276U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,0,2> + 2121211907U, // <6,7,4,7>: Cost 2 ins <6,7,4,u>, lane 3 + 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS + 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2> + 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3> + 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4> + 2121465858U, // <6,7,5,3>: Cost 2 ins <6,7,u,3>, lane 2 + 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6> + 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5> + 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0> + 1761037622U, // <6,7,5,7>: Cost 2 vuzpr <2,6,3,7>, RHS + 1761037623U, // <6,7,5,u>: Cost 2 vuzpr <2,6,3,7>, RHS + 2121359363U, // <6,7,6,0>: Cost 2 ins <6,7,6,u>, lane 3 + 2121449474U, // <6,7,6,1>: Cost 2 ins <6,7,u,1>, lane 2 + 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3> + 2121465858U, // <6,7,6,3>: Cost 2 ins <6,7,u,3>, lane 2 + 2121359363U, // <6,7,6,4>: Cost 2 ins <6,7,6,u>, lane 3 + 2121482242U, // <6,7,6,5>: Cost 2 ins <6,7,u,5>, lane 2 + 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6> + 1060216836U, // <6,7,6,7>: Cost 1 ins RHS, lane 4 + 1060216836U, // <6,7,6,u>: Cost 1 ins RHS, lane 4 + 1906757730U, // <6,7,7,0>: Cost 2 vzipr RHS, <5,6,7,0> + 2121449474U, // <6,7,7,1>: Cost 2 ins <6,7,u,1>, lane 2 + 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3> + 1906758138U, // <6,7,7,3>: Cost 2 vzipr RHS, <6,2,7,3> + 1906757734U, // <6,7,7,4>: Cost 2 vzipr RHS, <5,6,7,4> + 2121482242U, // <6,7,7,5>: Cost 2 ins <6,7,u,5>, lane 2 + 1906757574U, // <6,7,7,6>: Cost 2 vzipr RHS, <5,4,7,6> + 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7> + 1906757738U, // <6,7,7,u>: Cost 2 vzipr RHS, <5,6,7,u> + 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, + 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS + 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, + 1059889156U, // <6,7,u,3>: Cost 1 ins LHS, lane 4 + 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, + 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS + 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, + 1060216836U, // <6,7,u,7>: Cost 1 ins RHS, lane 4 + 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS + 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0> + 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS + 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2> + 2047623837U, // <6,u,0,3>: Cost 2 vtrnr <5,6,7,0>, LHS + 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5> + 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0> + 1986648218U, // <6,u,0,6>: Cost 2 vtrnl <6,7,0,1>, RHS + 2047626793U, // <6,u,0,7>: Cost 2 vtrnr <5,6,7,0>, RHS + 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS + 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2> + 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1> + 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS + 1761042534U, // <6,u,1,3>: Cost 2 vuzpr <2,6,3,u>, LHS + 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS + 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7> + 2120826882U, // <6,u,1,6>: Cost 2 ins <6,6,u,6>, lane 2 + 2120835074U, // <6,u,1,7>: Cost 2 ins <6,6,u,7>, lane 2 + 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS + 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS + 1849644846U, // <6,u,2,1>: Cost 2 vzipl <6,2,7,3>, LHS + 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2> + 1055244288U, // <6,u,2,3>: Cost 1 ins LHS, lane 0 + 1504873876U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, <4,6,u,2> + 1849645210U, // <6,u,2,5>: Cost 2 vzipl <6,2,7,3>, RHS + 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7> + 2041155113U, // <6,u,2,7>: Cost 2 vtrnr <4,6,0,2>, RHS + 1055244288U, // <6,u,2,u>: Cost 1 ins LHS, lane 0 + 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2> + 2121449474U, // <6,u,3,1>: Cost 2 ins <6,7,u,1>, lane 2 + 2128388096U, // <6,u,3,2>: Cost 2 ins , lane 0 + 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3> + 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6> + 2121482242U, // <6,u,3,5>: Cost 2 ins <6,7,u,5>, lane 2 + 2120826882U, // <6,u,3,6>: Cost 2 ins <6,6,u,6>, lane 2 + 2131746816U, // <6,u,3,7>: Cost 2 ins , lane 0 + 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2> + 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1> + 2121449474U, // <6,u,4,1>: Cost 2 ins <6,7,u,1>, lane 2 + 1986975534U, // <6,u,4,2>: Cost 2 vtrnl <6,7,4,5>, LHS + 2047656605U, // <6,u,4,3>: Cost 2 vtrnr <5,6,7,4>, LHS + 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4> + 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS + 1571220812U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,0,2> + 2047659561U, // <6,u,4,7>: Cost 2 vtrnr <5,6,7,4>, RHS + 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS + 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS + 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3> + 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5> + 2118148098U, // <6,u,5,3>: Cost 2 ins <6,2,u,3>, lane 2 + 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6> + 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5> + 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS + 1761045814U, // <6,u,5,7>: Cost 2 vuzpr <2,6,3,u>, RHS + 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS + 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS + 1852249902U, // <6,u,6,1>: Cost 2 vzipl <6,6,6,6>, LHS + 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3> + 2041479837U, // <6,u,6,3>: Cost 2 vtrnr <4,6,4,6>, LHS + 1504906648U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, <4,6,u,6> + 1852250266U, // <6,u,6,5>: Cost 2 vzipl <6,6,6,6>, RHS + 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS + 1058226176U, // <6,u,6,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <6,u,6,u>: Cost 1 ins RHS, lane 0 + 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS + 1906753609U, // <6,u,7,1>: Cost 2 vzipr RHS, <0,0,u,1> + 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7> + 833011868U, // <6,u,7,3>: Cost 1 vzipr RHS, LHS + 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS + 1906753937U, // <6,u,7,5>: Cost 2 vzipr RHS, <0,4,u,5> + 1906753776U, // <6,u,7,6>: Cost 2 vzipr RHS, <0,2,u,6> + 833015112U, // <6,u,7,7>: Cost 1 vzipr RHS, RHS + 833011873U, // <6,u,7,u>: Cost 1 vzipr RHS, LHS + 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS + 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS + 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS + 833020060U, // <6,u,u,3>: Cost 1 vzipr RHS, LHS + 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS + 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS + 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS + 833023304U, // <6,u,u,7>: Cost 1 vzipr RHS, RHS + 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS + 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0> + 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1> + 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2> + 2987152532U, // <7,0,0,3>: Cost 3 vzipr <5,6,7,0>, <7,2,0,3> + 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1> + 2987152210U, // <7,0,0,5>: Cost 3 vzipr <5,6,7,0>, <6,7,0,5> + 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0> + 2987152050U, // <7,0,0,7>: Cost 3 vzipr <5,6,7,0>, <6,5,0,7> + 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1> + 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS + 2128232448U, // <7,0,1,1>: Cost 2 ins , lane 0 + 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS + 2122317827U, // <7,0,1,3>: Cost 2 ins <7,0,1,u>, lane 3 + 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS + 2122317827U, // <7,0,1,5>: Cost 2 ins <7,0,1,u>, lane 3 + 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1> + 2122317827U, // <7,0,1,7>: Cost 2 ins <7,0,1,u>, lane 3 + 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS + 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2> + 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5> + 2128314368U, // <7,0,2,2>: Cost 2 ins , lane 0 + 2122833925U, // <7,0,2,3>: Cost 2 ins <7,0,u,u>, lane 5 + 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6> + 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7> + 2712060126U, // <7,0,2,6>: Cost 3 vext3 RHS, <0,2,6,6> + 3201433601U, // <7,0,2,7>: Cost 3 ins <7,u,2,7>, lane 1 + 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2> + 2983854080U, // <7,0,3,0>: Cost 3 vzipr <5,1,7,3>, <0,0,0,0> + 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0> + 2128388096U, // <7,0,3,2>: Cost 2 ins , lane 0 + 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3> + 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6> + 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0> + 3196559362U, // <7,0,3,6>: Cost 3 ins <7,0,u,6>, lane 2 + 3201507329U, // <7,0,3,7>: Cost 3 ins <7,u,3,7>, lane 1 + 2128388096U, // <7,0,3,u>: Cost 2 ins , lane 0 + 2712060230U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,2> + 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5> + 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6> + 3201548289U, // <7,0,4,3>: Cost 3 ins <7,u,4,3>, lane 1 + 2712060269U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,5> + 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS + 2651606348U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,0,2> + 3201581057U, // <7,0,4,7>: Cost 3 ins <7,u,4,7>, lane 1 + 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5> + 2647625340U, // <7,0,5,0>: Cost 3 vext2 <5,0,7,0>, <5,0,7,0> + 2128527360U, // <7,0,5,1>: Cost 2 ins , lane 0 + 1991032934U, // <7,0,5,2>: Cost 2 vtrnl <7,4,5,6>, LHS + 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0> + 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6> + 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5> + 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0> + 2847477046U, // <7,0,5,7>: Cost 3 vuzpr <4,7,5,0>, RHS + 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0> + 2985869312U, // <7,0,6,0>: Cost 3 vzipr <5,4,7,6>, <0,0,0,0> + 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7> + 2128609280U, // <7,0,6,2>: Cost 2 ins , lane 0 + 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0> + 3202367488U, // <7,0,6,4>: Cost 3 ins , lane 0 + 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7> + 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6> + 2122833925U, // <7,0,6,7>: Cost 2 ins <7,0,u,u>, lane 5 + 2128609280U, // <7,0,6,u>: Cost 2 ins , lane 0 + 2847477192U, // <7,0,7,0>: Cost 3 vuzpr <4,7,5,0>, <4,7,5,0> + 1858961510U, // <7,0,7,1>: Cost 2 vzipl <7,7,7,7>, LHS + 1993179238U, // <7,0,7,2>: Cost 2 vtrnl <7,7,7,7>, LHS + 3201769473U, // <7,0,7,3>: Cost 3 ins <7,u,7,3>, lane 1 + 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6> + 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7> + 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0> + 2128060417U, // <7,0,7,7>: Cost 2 ins <7,u,7,7>, lane 1 + 1858962077U, // <7,0,7,u>: Cost 2 vzipl <7,7,7,7>, LHS + 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2> + 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1> + 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS + 2122317827U, // <7,0,u,3>: Cost 2 ins <7,0,1,u>, lane 3 + 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6> + 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS + 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u> + 2122317827U, // <7,0,u,7>: Cost 2 ins <7,0,1,u>, lane 3 + 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS + 2712060634U, // <7,1,0,0>: Cost 3 vext3 RHS, <1,0,0,1> + 2128822272U, // <7,1,0,1>: Cost 2 ins , lane 0 + 1719615590U, // <7,1,0,2>: Cost 2 vuzpl <7,0,1,2>, LHS + 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2> + 2859062268U, // <7,1,0,4>: Cost 3 vuzpr <6,7,0,1>, <7,0,1,4> + 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1> + 2859061568U, // <7,1,0,6>: Cost 3 vuzpr <6,7,0,1>, <6,0,4,6> + 3201286145U, // <7,1,0,7>: Cost 3 ins <7,u,0,7>, lane 1 + 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2> + 2712060714U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,0> + 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1> + 2127577089U, // <7,1,1,2>: Cost 2 ins <7,u,1,2>, lane 1 + 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3> + 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5> + 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7> + 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1> + 2859057294U, // <7,1,1,7>: Cost 3 vuzpr <6,7,0,1>, <0,1,6,7> + 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3> + 2128961536U, // <7,1,2,0>: Cost 2 ins , lane 0 + 2128969728U, // <7,1,2,1>: Cost 2 ins , lane 0 + 2128977920U, // <7,1,2,2>: Cost 2 ins , lane 0 + 1055244288U, // <7,1,2,3>: Cost 1 ins LHS, lane 0 + 2128994304U, // <7,1,2,4>: Cost 2 ins , lane 0 + 2129002496U, // <7,1,2,5>: Cost 2 ins , lane 0 + 2129010688U, // <7,1,2,6>: Cost 2 ins , lane 0 + 2129018880U, // <7,1,2,7>: Cost 2 ins , lane 0 + 1055244288U, // <7,1,2,u>: Cost 1 ins LHS, lane 0 + 1510998118U, // <7,1,3,0>: Cost 2 vext1 <5,7,1,3>, LHS + 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3> + 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0> + 2047869030U, // <7,1,3,3>: Cost 2 vtrnr <5,7,1,3>, LHS + 1511001398U, // <7,1,3,4>: Cost 2 vext1 <5,7,1,3>, RHS + 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7> + 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7> + 2983859604U, // <7,1,3,7>: Cost 3 vzipr <5,1,7,3>, <7,5,1,7> + 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7> + 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5> + 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5> + 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5> + 2129133568U, // <7,1,4,3>: Cost 2 ins , lane 0 + 2859060432U, // <7,1,4,4>: Cost 3 vuzpr <6,7,0,1>, <4,4,4,4> + 2129149952U, // <7,1,4,5>: Cost 2 ins , lane 0 + 1719618870U, // <7,1,4,6>: Cost 2 vuzpl <7,0,1,2>, RHS + 2793360778U, // <7,1,4,7>: Cost 3 vuzpl <7,0,1,2>, <4,6,7,1> + 1719618888U, // <7,1,4,u>: Cost 2 vuzpl <7,0,1,2>, RHS + 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS + 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7> + 3202940928U, // <7,1,5,2>: Cost 3 ins , lane 0 + 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7> + 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS + 2985861458U, // <7,1,5,5>: Cost 3 vzipr <5,4,7,5>, <0,4,1,5> + 2127904769U, // <7,1,5,6>: Cost 2 ins <7,u,5,6>, lane 1 + 1785318710U, // <7,1,5,7>: Cost 2 vuzpr <6,7,0,1>, RHS + 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7> + 2653606230U, // <7,1,6,0>: Cost 3 vext2 <6,0,7,1>, <6,0,7,1> + 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7> + 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7> + 2129281024U, // <7,1,6,3>: Cost 2 ins , lane 0 + 2859061350U, // <7,1,6,4>: Cost 3 vuzpr <6,7,0,1>, <5,6,7,4> + 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7> + 2859060596U, // <7,1,6,6>: Cost 3 vuzpr <6,7,0,1>, <4,6,4,6> + 2129313792U, // <7,1,6,7>: Cost 2 ins , lane 0 + 2129281024U, // <7,1,6,u>: Cost 2 ins , lane 0 + 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2> + 1785320270U, // <7,1,7,1>: Cost 2 vuzpr <6,7,0,1>, <6,7,0,1> + 2986543254U, // <7,1,7,2>: Cost 3 vzipr <5,5,7,7>, <3,0,1,2> + 2048196710U, // <7,1,7,3>: Cost 2 vtrnr <5,7,5,7>, LHS + 2793362538U, // <7,1,7,4>: Cost 3 vuzpl <7,0,1,2>, <7,1,4,6> + 2986541394U, // <7,1,7,5>: Cost 3 vzipr <5,5,7,7>, <0,4,1,5> + 3201794049U, // <7,1,7,6>: Cost 3 ins <7,u,7,6>, lane 1 + 2128060417U, // <7,1,7,7>: Cost 2 ins <7,u,7,7>, lane 1 + 2048196715U, // <7,1,7,u>: Cost 2 vtrnr <5,7,5,7>, LHS + 1511039078U, // <7,1,u,0>: Cost 2 vext1 <5,7,1,u>, LHS + 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3> + 1719621422U, // <7,1,u,2>: Cost 2 vuzpl <7,0,1,2>, LHS + 1055244288U, // <7,1,u,3>: Cost 1 ins LHS, lane 0 + 1511042358U, // <7,1,u,4>: Cost 2 vext1 <5,7,1,u>, RHS + 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7> + 1719621786U, // <7,1,u,6>: Cost 2 vuzpl <7,0,1,2>, RHS + 1785318953U, // <7,1,u,7>: Cost 2 vuzpr <6,7,0,1>, RHS + 1055244288U, // <7,1,u,u>: Cost 1 ins LHS, lane 0 + 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2> + 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2> + 2129494016U, // <7,2,0,2>: Cost 2 ins , lane 0 + 1913405542U, // <7,2,0,3>: Cost 2 vzipr <5,6,7,0>, LHS + 2712061400U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,2> + 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7> + 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1> + 2927577066U, // <7,2,0,7>: Cost 3 vzipl <7,0,1,2>, <2,7,0,1> + 1913405547U, // <7,2,0,u>: Cost 2 vzipr <5,6,7,0>, LHS + 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3> + 3203301376U, // <7,2,1,1>: Cost 3 ins , lane 0 + 2127577089U, // <7,2,1,2>: Cost 2 ins <7,u,1,2>, lane 1 + 2974548070U, // <7,2,1,3>: Cost 3 vzipr <3,5,7,1>, LHS + 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3> + 3203334144U, // <7,2,1,5>: Cost 3 ins , lane 0 + 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3> + 2602718850U, // <7,2,1,7>: Cost 3 vext1 , <7,u,1,2> + 2127577089U, // <7,2,1,u>: Cost 2 ins <7,u,1,2>, lane 1 + 2712061524U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,0> + 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3> + 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2> + 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3> + 2712061564U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,4> + 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7> + 2712061581U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,3> + 3201433601U, // <7,2,2,7>: Cost 3 ins <7,u,2,7>, lane 1 + 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3> + 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1> + 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5> + 1638319802U, // <7,2,3,2>: Cost 2 vext3 RHS, <2,3,2,3> + 1910112358U, // <7,2,3,3>: Cost 2 vzipr <5,1,7,3>, LHS + 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5> + 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7> + 1625048802U, // <7,2,3,6>: Cost 2 vext3 <2,3,6,7>, <2,3,6,7> + 2990495214U, // <7,2,3,7>: Cost 3 vzipr <6,2,7,3>, <7,6,2,7> + 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1> + 2712061688U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,2> + 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3> + 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4> + 1913438310U, // <7,2,4,3>: Cost 2 vzipr <5,6,7,4>, LHS + 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6> + 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7> + 2129821696U, // <7,2,4,6>: Cost 2 ins , lane 0 + 3201581057U, // <7,2,4,7>: Cost 3 ins <7,u,4,7>, lane 1 + 1913438315U, // <7,2,4,u>: Cost 2 vzipr <5,6,7,4>, LHS + 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7> + 3203596288U, // <7,2,5,1>: Cost 3 ins , lane 0 + 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7> + 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7> + 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7> + 3203629056U, // <7,2,5,5>: Cost 3 ins , lane 0 + 2127904769U, // <7,2,5,6>: Cost 2 ins <7,u,5,6>, lane 1 + 2853096758U, // <7,2,5,7>: Cost 3 vuzpr <5,7,0,2>, RHS + 2127904769U, // <7,2,5,u>: Cost 2 ins <7,u,5,6>, lane 1 + 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS + 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3> + 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7> + 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7> + 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS + 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7> + 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7> + 2129977344U, // <7,2,6,7>: Cost 2 ins , lane 0 + 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7> + 3121939350U, // <7,2,7,0>: Cost 3 vtrnr <5,7,5,7>, <1,2,3,0> + 3203743744U, // <7,2,7,1>: Cost 3 ins , lane 0 + 1720366165U, // <7,2,7,2>: Cost 2 vuzpl <7,1,2,3>, <7,1,2,3> + 1912799334U, // <7,2,7,3>: Cost 2 vzipr <5,5,7,7>, LHS + 3121939354U, // <7,2,7,4>: Cost 3 vtrnr <5,7,5,7>, <1,2,3,4> + 3203776512U, // <7,2,7,5>: Cost 3 ins , lane 0 + 2986541404U, // <7,2,7,6>: Cost 3 vzipr <5,5,7,7>, <0,4,2,6> + 2128060417U, // <7,2,7,7>: Cost 2 ins <7,u,7,7>, lane 1 + 1912799339U, // <7,2,7,u>: Cost 2 vzipr <5,5,7,7>, LHS + 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1> + 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5> + 2129494016U, // <7,2,u,2>: Cost 2 ins , lane 0 + 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7> + 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5> + 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7> + 2129821696U, // <7,2,u,6>: Cost 2 ins , lane 0 + 2129977344U, // <7,2,u,7>: Cost 2 ins , lane 0 + 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7> + 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0> + 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2> + 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0> + 2712062119U, // <7,3,0,3>: Cost 3 vext3 RHS, <3,0,3,1> + 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1> + 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2> + 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0> + 2985157776U, // <7,3,0,7>: Cost 3 vzipr <5,3,7,0>, <1,5,3,7> + 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2> + 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1> + 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1> + 2127577089U, // <7,3,1,2>: Cost 2 ins <7,u,1,2>, lane 1 + 1779433574U, // <7,3,1,3>: Cost 2 vuzpr <5,7,1,3>, LHS + 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6> + 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3> + 2853179064U, // <7,3,1,6>: Cost 3 vuzpr <5,7,1,3>, <5,1,4,6> + 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5> + 1779433579U, // <7,3,1,u>: Cost 2 vuzpr <5,7,1,3>, LHS + 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1> + 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0> + 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2> + 2130313216U, // <7,3,2,3>: Cost 2 ins , lane 0 + 2712062292U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,3> + 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4> + 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3> + 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3> + 2130313216U, // <7,3,2,u>: Cost 2 ins , lane 0 + 2712062334U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,0> + 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3> + 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3> + 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3> + 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4> + 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7> + 2990491658U, // <7,3,3,6>: Cost 3 vzipr <6,2,7,3>, <2,7,3,6> + 2972574864U, // <7,3,3,7>: Cost 3 vzipr <3,2,7,3>, <1,5,3,7> + 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3> + 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1> + 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2> + 2987180790U, // <7,3,4,2>: Cost 3 vzipr <5,6,7,4>, <1,0,3,2> + 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5> + 2712062455U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,4> + 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6> + 2648313164U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,0,2> + 2985190544U, // <7,3,4,7>: Cost 3 vzipr <5,3,7,4>, <1,5,3,7> + 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6> + 2712062498U, // <7,3,5,0>: Cost 3 vext3 RHS, <3,5,0,2> + 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3> + 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3> + 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7> + 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5> + 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7> + 2127904769U, // <7,3,5,6>: Cost 2 ins <7,u,5,6>, lane 1 + 1779436854U, // <7,3,5,7>: Cost 2 vuzpr <5,7,1,3>, RHS + 1779436855U, // <7,3,5,u>: Cost 2 vuzpr <5,7,1,3>, RHS + 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7> + 2853178744U, // <7,3,6,1>: Cost 3 vuzpr <5,7,1,3>, <4,6,5,1> + 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3> + 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7> + 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7> + 3204366336U, // <7,3,6,5>: Cost 3 ins , lane 0 + 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6> + 2130640896U, // <7,3,6,7>: Cost 2 ins , lane 0 + 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3> + 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1> + 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5> + 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6> + 1779437696U, // <7,3,7,3>: Cost 2 vuzpr <5,7,1,3>, <5,7,1,3> + 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5> + 2237582070U, // <7,3,7,5>: Cost 3 vrev <3,7,5,7> + 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7> + 2128060417U, // <7,3,7,7>: Cost 2 ins <7,u,7,7>, lane 1 + 1779437696U, // <7,3,7,u>: Cost 2 vuzpr <5,7,1,3>, <5,7,1,3> + 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u> + 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2> + 1593153452U, // <7,3,u,2>: Cost 2 vext2 , + 1779434141U, // <7,3,u,3>: Cost 2 vuzpr <5,7,1,3>, LHS + 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u> + 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6> + 2127904769U, // <7,3,u,6>: Cost 2 ins <7,u,5,6>, lane 1 + 1779437097U, // <7,3,u,7>: Cost 2 vuzpr <5,7,1,3>, RHS + 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2> + 2714053478U, // <7,4,0,0>: Cost 3 vext3 RHS, <4,0,0,2> + 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS + 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2> + 3201253377U, // <7,4,0,3>: Cost 3 ins <7,u,0,3>, lane 1 + 2714053512U, // <7,4,0,4>: Cost 3 vext3 RHS, <4,0,4,0> + 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1> + 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2> + 2927578568U, // <7,4,0,7>: Cost 3 vzipl <7,0,1,2>, <4,7,5,0> + 1640311726U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,2> + 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2> + 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1> + 2127577089U, // <7,4,1,2>: Cost 2 ins <7,u,1,2>, lane 1 + 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7> + 3127495888U, // <7,4,1,4>: Cost 3 vtrnr <6,7,0,1>, <4,4,4,4> + 2130919424U, // <7,4,1,5>: Cost 2 ins , lane 0 + 1988054326U, // <7,4,1,6>: Cost 2 vtrnl <7,0,1,2>, RHS + 3061796234U, // <7,4,1,7>: Cost 3 vtrnl <7,0,1,2>, <4,6,7,1> + 1988054344U, // <7,4,1,u>: Cost 2 vtrnl <7,0,1,2>, RHS + 3204694016U, // <7,4,2,0>: Cost 3 ins , lane 0 + 3199172610U, // <7,4,2,1>: Cost 3 ins <7,4,u,1>, lane 2 + 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2> + 2125488133U, // <7,4,2,3>: Cost 2 ins <7,4,u,u>, lane 5 + 2853258138U, // <7,4,2,4>: Cost 3 vuzpr <5,7,2,4>, <1,2,3,4> + 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3> + 2131001344U, // <7,4,2,6>: Cost 2 ins , lane 0 + 3201433601U, // <7,4,2,7>: Cost 3 ins <7,u,2,7>, lane 1 + 2125488133U, // <7,4,2,u>: Cost 2 ins <7,4,u,u>, lane 5 + 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2> + 3201458177U, // <7,4,3,1>: Cost 3 ins <7,u,3,1>, lane 1 + 3204784128U, // <7,4,3,2>: Cost 3 ins , lane 0 + 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3> + 2983857360U, // <7,4,3,4>: Cost 3 vzipr <5,1,7,3>, <4,4,4,4> + 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4> + 2125471746U, // <7,4,3,6>: Cost 2 ins <7,4,u,6>, lane 2 + 3201507329U, // <7,4,3,7>: Cost 3 ins <7,u,3,7>, lane 1 + 2125471746U, // <7,4,3,u>: Cost 2 ins <7,4,u,6>, lane 2 + 2714053800U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,0> + 3201531905U, // <7,4,4,1>: Cost 3 ins <7,u,4,1>, lane 1 + 3201540097U, // <7,4,4,2>: Cost 3 ins <7,u,4,2>, lane 1 + 2987185336U, // <7,4,4,3>: Cost 3 vzipr <5,6,7,4>, <7,2,4,3> + 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4> + 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5> + 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6> + 2987185664U, // <7,4,4,7>: Cost 3 vzipr <5,6,7,4>, <7,6,4,7> + 1640312054U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,6> + 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS + 2125266947U, // <7,4,5,1>: Cost 2 ins <7,4,5,u>, lane 3 + 2125266947U, // <7,4,5,2>: Cost 2 ins <7,4,5,u>, lane 3 + 2125266947U, // <7,4,5,3>: Cost 2 ins <7,4,5,u>, lane 3 + 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS + 2131214336U, // <7,4,5,5>: Cost 2 ins , lane 0 + 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS + 2125266947U, // <7,4,5,7>: Cost 2 ins <7,4,5,u>, lane 3 + 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS + 1638468940U, // <7,4,6,0>: Cost 2 vext3 RHS, <4,6,0,2> + 2712063318U, // <7,4,6,1>: Cost 3 vext3 RHS, <4,6,1,3> + 2712210780U, // <7,4,6,2>: Cost 3 vext3 RHS, <4,6,2,0> + 2712210790U, // <7,4,6,3>: Cost 3 vext3 RHS, <4,6,3,1> + 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6> + 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7> + 2131296256U, // <7,4,6,6>: Cost 2 ins , lane 0 + 2125488133U, // <7,4,6,7>: Cost 2 ins <7,4,u,u>, lane 5 + 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2> + 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2> + 2794279930U, // <7,4,7,1>: Cost 3 vuzpl <7,1,4,6>, <7,0,1,2> + 3201761281U, // <7,4,7,2>: Cost 3 ins <7,u,7,2>, lane 1 + 3201769473U, // <7,4,7,3>: Cost 3 ins <7,u,7,3>, lane 1 + 2847509964U, // <7,4,7,4>: Cost 3 vuzpr <4,7,5,4>, <4,7,5,4> + 1858964790U, // <7,4,7,5>: Cost 2 vzipl <7,7,7,7>, RHS + 1993182518U, // <7,4,7,6>: Cost 2 vtrnl <7,7,7,7>, RHS + 2128060417U, // <7,4,7,7>: Cost 2 ins <7,u,7,7>, lane 1 + 1858965033U, // <7,4,7,u>: Cost 2 vzipl <7,7,7,7>, RHS + 1640312302U, // <7,4,u,0>: Cost 2 vext3 RHS, <4,u,0,2> + 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS + 2127577089U, // <7,4,u,2>: Cost 2 ins <7,u,1,2>, lane 1 + 2125488133U, // <7,4,u,3>: Cost 2 ins <7,4,u,u>, lane 5 + 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6> + 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1> + 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS + 2125266947U, // <7,4,u,7>: Cost 2 ins <7,4,5,u>, lane 3 + 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS + 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS + 2131476480U, // <7,5,0,1>: Cost 2 ins , lane 0 + 1722597478U, // <7,5,0,2>: Cost 2 vuzpl <7,4,5,6>, LHS + 3201253377U, // <7,5,0,3>: Cost 3 ins <7,u,0,3>, lane 1 + 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1> + 2987150554U, // <7,5,0,5>: Cost 3 vzipr <5,6,7,0>, <4,4,5,5> + 2987149826U, // <7,5,0,6>: Cost 3 vzipr <5,6,7,0>, <3,4,5,6> + 2131525632U, // <7,5,0,7>: Cost 2 ins , lane 0 + 1722597532U, // <7,5,0,u>: Cost 2 vuzpl <7,4,5,6>, LHS + 2714054287U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1> + 2249183358U, // <7,5,1,1>: Cost 3 vrev <5,7,1,1> + 2127577089U, // <7,5,1,2>: Cost 2 ins <7,u,1,2>, lane 1 + 1785643110U, // <7,5,1,3>: Cost 2 vuzpr <6,7,4,5>, LHS + 2714054327U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5> + 3127496708U, // <7,5,1,5>: Cost 3 vtrnr <6,7,0,1>, <5,5,5,5> + 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1> + 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3> + 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3> + 2249117814U, // <7,5,2,0>: Cost 3 vrev <5,7,0,2> + 2714054379U, // <7,5,2,1>: Cost 3 vext3 RHS, <5,2,1,3> + 2249265288U, // <7,5,2,2>: Cost 3 vrev <5,7,2,2> + 2131640320U, // <7,5,2,3>: Cost 2 ins , lane 0 + 2859385754U, // <7,5,2,4>: Cost 3 vuzpr <6,7,4,5>, <1,2,3,4> + 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3> + 2712063768U, // <7,5,2,6>: Cost 3 vext3 RHS, <5,2,6,3> + 2131673088U, // <7,5,2,7>: Cost 2 ins , lane 0 + 2131640320U, // <7,5,2,u>: Cost 2 ins , lane 0 + 3201449985U, // <7,5,3,0>: Cost 3 ins <7,u,3,0>, lane 1 + 1175457920U, // <7,5,3,1>: Cost 2 vrev <5,7,1,3> + 2249273481U, // <7,5,3,2>: Cost 3 vrev <5,7,2,3> + 2249347218U, // <7,5,3,3>: Cost 3 vrev <5,7,3,3> + 3201482753U, // <7,5,3,4>: Cost 3 ins <7,u,3,4>, lane 1 + 2983857370U, // <7,5,3,5>: Cost 3 vzipr <5,1,7,3>, <4,4,5,5> + 2983856642U, // <7,5,3,6>: Cost 3 vzipr <5,1,7,3>, <3,4,5,6> + 2047872310U, // <7,5,3,7>: Cost 2 vtrnr <5,7,1,3>, RHS + 2047872311U, // <7,5,3,u>: Cost 2 vtrnr <5,7,1,3>, RHS + 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS + 2987182994U, // <7,5,4,1>: Cost 3 vzipr <5,6,7,4>, <4,0,5,1> + 2249281674U, // <7,5,4,2>: Cost 3 vrev <5,7,2,4> + 3201548289U, // <7,5,4,3>: Cost 3 ins <7,u,4,3>, lane 1 + 2579074508U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, <4,7,5,4> + 2131804160U, // <7,5,4,5>: Cost 2 ins , lane 0 + 1722600758U, // <7,5,4,6>: Cost 2 vuzpl <7,4,5,6>, RHS + 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6> + 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6> + 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1> + 2714054620U, // <7,5,5,1>: Cost 3 vext3 RHS, <5,5,1,1> + 3201613825U, // <7,5,5,2>: Cost 3 ins <7,u,5,2>, lane 1 + 2649657204U, // <7,5,5,3>: Cost 3 vext2 <5,3,7,5>, <5,3,7,5> + 2714054651U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,5> + 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5> + 2127904769U, // <7,5,5,6>: Cost 2 ins <7,u,5,6>, lane 1 + 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7> + 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7> + 2131910656U, // <7,5,6,0>: Cost 2 ins , lane 0 + 2131918848U, // <7,5,6,1>: Cost 2 ins , lane 0 + 2131927040U, // <7,5,6,2>: Cost 2 ins , lane 0 + 2131935232U, // <7,5,6,3>: Cost 2 ins , lane 0 + 2131943424U, // <7,5,6,4>: Cost 2 ins , lane 0 + 2131951616U, // <7,5,6,5>: Cost 2 ins , lane 0 + 2131959808U, // <7,5,6,6>: Cost 2 ins , lane 0 + 1058226176U, // <7,5,6,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <7,5,6,u>: Cost 1 ins RHS, lane 0 + 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS + 1638469760U, // <7,5,7,1>: Cost 2 vext3 RHS, <5,7,1,3> + 2712211590U, // <7,5,7,2>: Cost 3 vext3 RHS, <5,7,2,0> + 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7> + 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS + 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7> + 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0> + 2048199990U, // <7,5,7,7>: Cost 2 vtrnr <5,7,5,7>, RHS + 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3> + 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS + 1638469841U, // <7,5,u,1>: Cost 2 vext3 RHS, <5,u,1,3> + 1722603310U, // <7,5,u,2>: Cost 2 vuzpl <7,4,5,6>, LHS + 1785643677U, // <7,5,u,3>: Cost 2 vuzpr <6,7,4,5>, LHS + 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS + 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7> + 1722603674U, // <7,5,u,6>: Cost 2 vuzpl <7,4,5,6>, RHS + 1058226176U, // <7,5,u,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <7,5,u,u>: Cost 1 ins RHS, lane 0 + 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0> + 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS + 2132148224U, // <7,6,0,2>: Cost 2 ins , lane 0 + 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0> + 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2> + 2987151292U, // <7,6,0,5>: Cost 3 vzipr <5,6,7,0>, <5,4,6,5> + 2987150564U, // <7,6,0,6>: Cost 3 vzipr <5,6,7,0>, <4,4,6,6> + 1913408822U, // <7,6,0,7>: Cost 2 vzipr <5,6,7,0>, RHS + 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS + 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1> + 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1> + 2127577089U, // <7,6,1,2>: Cost 2 ins <7,u,1,2>, lane 1 + 2841329766U, // <7,6,1,3>: Cost 3 vuzpr <3,7,2,6>, LHS + 2579123666U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, <4,7,6,1> + 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7> + 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3> + 2974551350U, // <7,6,1,7>: Cost 3 vzipr <3,5,7,1>, RHS + 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1> + 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1> + 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3> + 2714055117U, // <7,6,2,2>: Cost 3 vext3 RHS, <6,2,2,3> + 2132303872U, // <7,6,2,3>: Cost 2 ins , lane 0 + 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5> + 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7> + 2714055152U, // <7,6,2,6>: Cost 3 vext3 RHS, <6,2,6,2> + 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3> + 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3> + 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2> + 3121614200U, // <7,6,3,1>: Cost 3 vtrnr <5,7,1,3>, <4,6,5,1> + 1181504354U, // <7,6,3,2>: Cost 2 vrev <6,7,2,3> + 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3> + 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5> + 3206135808U, // <7,6,3,5>: Cost 3 ins , lane 0 + 2983857380U, // <7,6,3,6>: Cost 3 vzipr <5,1,7,3>, <4,4,6,6> + 1910115638U, // <7,6,3,7>: Cost 2 vzipr <5,1,7,3>, RHS + 1910115639U, // <7,6,3,u>: Cost 2 vzipr <5,1,7,3>, RHS + 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1> + 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3> + 2714055276U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,0> + 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4> + 2650328272U, // <7,6,4,4>: Cost 3 vext2 <5,4,7,6>, <4,4,4,4> + 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS + 2132475904U, // <7,6,4,6>: Cost 2 ins , lane 0 + 1913441590U, // <7,6,4,7>: Cost 2 vzipr <5,6,7,4>, RHS + 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS + 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS + 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3> + 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7> + 3201622017U, // <7,6,5,3>: Cost 3 ins <7,u,5,3>, lane 1 + 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6> + 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5> + 2127904769U, // <7,6,5,6>: Cost 2 ins <7,u,5,6>, lane 1 + 2971929910U, // <7,6,5,7>: Cost 3 vzipr <3,1,7,5>, RHS + 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5> + 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1> + 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3> + 2712212245U, // <7,6,6,2>: Cost 3 vext3 RHS, <6,6,2,7> + 3201695745U, // <7,6,6,3>: Cost 3 ins <7,u,6,3>, lane 1 + 2714055461U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,5> + 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7> + 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6> + 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7> + 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7> + 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1> + 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0> + 1638323042U, // <7,6,7,2>: Cost 2 vext3 RHS, <6,7,2,3> + 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0> + 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5> + 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4> + 1638323082U, // <7,6,7,6>: Cost 2 vext3 RHS, <6,7,6,7> + 1912802614U, // <7,6,7,7>: Cost 2 vzipr <5,5,7,7>, RHS + 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1> + 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1> + 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS + 2132148224U, // <7,6,u,2>: Cost 2 ins , lane 0 + 2132303872U, // <7,6,u,3>: Cost 2 ins , lane 0 + 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5> + 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS + 2132475904U, // <7,6,u,6>: Cost 2 ins , lane 0 + 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3> + 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1> + 1913409634U, // <7,7,0,0>: Cost 2 vzipr <5,6,7,0>, <5,6,7,0> + 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2> + 1724743782U, // <7,7,0,2>: Cost 2 vuzpl <7,7,7,7>, LHS + 2987151056U, // <7,7,0,3>: Cost 3 vzipr <5,6,7,0>, <5,1,7,3> + 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1> + 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0> + 2987151302U, // <7,7,0,6>: Cost 3 vzipr <5,6,7,0>, <5,4,7,6> + 2127470594U, // <7,7,0,7>: Cost 2 ins <7,7,u,7>, lane 2 + 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2> + 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3> + 2053755726U, // <7,7,1,1>: Cost 2 vtrnr <6,7,0,1>, <6,7,0,1> + 2127577089U, // <7,7,1,2>: Cost 2 ins <7,u,1,2>, lane 1 + 1779761254U, // <7,7,1,3>: Cost 2 vuzpr <5,7,5,7>, LHS + 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS + 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3> + 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1> + 2127470594U, // <7,7,1,7>: Cost 2 ins <7,7,u,7>, lane 2 + 1779761259U, // <7,7,1,u>: Cost 2 vuzpr <5,7,5,7>, LHS + 2853503894U, // <7,7,2,0>: Cost 3 vuzpr <5,7,5,7>, <1,2,3,0> + 3206692864U, // <7,7,2,1>: Cost 3 ins , lane 0 + 1988801621U, // <7,7,2,2>: Cost 2 vtrnl <7,1,2,3>, <7,1,2,3> + 2132967424U, // <7,7,2,3>: Cost 2 ins , lane 0 + 2853503898U, // <7,7,2,4>: Cost 3 vuzpr <5,7,5,7>, <1,2,3,4> + 3206725632U, // <7,7,2,5>: Cost 3 ins , lane 0 + 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3> + 2127470594U, // <7,7,2,7>: Cost 2 ins <7,7,u,7>, lane 2 + 1988801621U, // <7,7,2,u>: Cost 2 vtrnl <7,1,2,3>, <7,1,2,3> + 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1> + 3121615694U, // <7,7,3,1>: Cost 3 vtrnr <5,7,1,3>, <6,7,0,1> + 3201171458U, // <7,7,3,2>: Cost 3 ins <7,7,u,2>, lane 2 + 1910116048U, // <7,7,3,3>: Cost 2 vzipr <5,1,7,3>, <5,1,7,3> + 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5> + 2639055462U, // <7,7,3,5>: Cost 3 vext2 <3,5,7,7>, <3,5,7,7> + 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7> + 2127470594U, // <7,7,3,7>: Cost 2 ins <7,7,u,7>, lane 2 + 1910116048U, // <7,7,3,u>: Cost 2 vzipr <5,1,7,3>, <5,1,7,3> + 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5> + 3062715386U, // <7,7,4,1>: Cost 3 vtrnl <7,1,4,6>, <7,0,1,2> + 3201540097U, // <7,7,4,2>: Cost 3 ins <7,u,4,2>, lane 1 + 2987183824U, // <7,7,4,3>: Cost 3 vzipr <5,6,7,4>, <5,1,7,3> + 1913442406U, // <7,7,4,4>: Cost 2 vzipr <5,6,7,4>, <5,6,7,4> + 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6> + 1724747062U, // <7,7,4,6>: Cost 2 vuzpl <7,7,7,7>, RHS + 2127470594U, // <7,7,4,7>: Cost 2 ins <7,7,u,7>, lane 2 + 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6> + 2853508547U, // <7,7,5,0>: Cost 3 vuzpr <5,7,5,7>, <7,5,7,0> + 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7> + 3201613825U, // <7,7,5,2>: Cost 3 ins <7,u,5,2>, lane 1 + 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7> + 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7> + 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7> + 2127904769U, // <7,7,5,6>: Cost 2 ins <7,u,5,6>, lane 1 + 1779764534U, // <7,7,5,7>: Cost 2 vuzpr <5,7,5,7>, RHS + 1779764535U, // <7,7,5,u>: Cost 2 vuzpr <5,7,5,7>, RHS + 2985873506U, // <7,7,6,0>: Cost 3 vzipr <5,4,7,6>, <5,6,7,0> + 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0> + 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7> + 2985873104U, // <7,7,6,3>: Cost 3 vzipr <5,4,7,6>, <5,1,7,3> + 2985873510U, // <7,7,6,4>: Cost 3 vzipr <5,4,7,6>, <5,6,7,4> + 2985873511U, // <7,7,6,5>: Cost 3 vzipr <5,4,7,6>, <5,6,7,5> + 1912131526U, // <7,7,6,6>: Cost 2 vzipr <5,4,7,6>, <5,4,7,6> + 2133295104U, // <7,7,6,7>: Cost 2 ins , lane 0 + 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7> + 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS + 2127405059U, // <7,7,7,1>: Cost 2 ins <7,7,7,u>, lane 3 + 2127405059U, // <7,7,7,2>: Cost 2 ins <7,7,7,u>, lane 3 + 2127405059U, // <7,7,7,3>: Cost 2 ins <7,7,7,u>, lane 3 + 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS + 2127405059U, // <7,7,7,5>: Cost 2 ins <7,7,7,u>, lane 3 + 2127405059U, // <7,7,7,6>: Cost 2 ins <7,7,7,u>, lane 3 + 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS + 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS + 1913409634U, // <7,7,u,0>: Cost 2 vzipr <5,6,7,0>, <5,6,7,0> + 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2> + 1724749614U, // <7,7,u,2>: Cost 2 vuzpl <7,7,7,7>, LHS + 1779761821U, // <7,7,u,3>: Cost 2 vuzpr <5,7,5,7>, LHS + 1913442406U, // <7,7,u,4>: Cost 2 vzipr <5,6,7,4>, <5,6,7,4> + 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6> + 1724749978U, // <7,7,u,6>: Cost 2 vuzpl <7,7,7,7>, RHS + 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS + 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS + 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0> + 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, + 1720131686U, // <7,u,0,2>: Cost 2 vuzpl <7,0,u,2>, LHS + 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, + 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, + 1853839514U, // <7,u,0,5>: Cost 2 vzipl <7,0,1,2>, RHS + 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, + 1913408840U, // <7,u,0,7>: Cost 2 vzipr <5,6,7,0>, RHS + 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, + 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS + 2128232448U, // <7,u,1,1>: Cost 2 ins , lane 0 + 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS + 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, + 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS + 2122317827U, // <7,u,1,5>: Cost 2 ins <7,0,1,u>, lane 3 + 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1> + 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, + 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS + 1662211948U, // <7,u,2,0>: Cost 2 vext3 RHS, + 2128969728U, // <7,u,2,1>: Cost 2 ins , lane 0 + 2128314368U, // <7,u,2,2>: Cost 2 ins , lane 0 + 1055244288U, // <7,u,2,3>: Cost 1 ins LHS, lane 0 + 1662211988U, // <7,u,2,4>: Cost 2 vext3 RHS, + 2129002496U, // <7,u,2,5>: Cost 2 ins , lane 0 + 2131001344U, // <7,u,2,6>: Cost 2 ins , lane 0 + 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, + 1055244288U, // <7,u,2,u>: Cost 1 ins LHS, lane 0 + 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, + 1638324167U, // <7,u,3,1>: Cost 2 vext3 RHS, + 2128388096U, // <7,u,3,2>: Cost 2 ins , lane 0 + 1910112412U, // <7,u,3,3>: Cost 2 vzipr <5,1,7,3>, LHS + 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, + 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, + 2125471746U, // <7,u,3,6>: Cost 2 ins <7,4,u,6>, lane 2 + 1910115656U, // <7,u,3,7>: Cost 2 vzipr <5,1,7,3>, RHS + 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, + 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, + 1856821038U, // <7,u,4,1>: Cost 2 vzipl <7,4,5,6>, LHS + 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, + 1913438364U, // <7,u,4,3>: Cost 2 vzipr <5,6,7,4>, LHS + 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4> + 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, + 1720134966U, // <7,u,4,6>: Cost 2 vuzpl <7,0,u,2>, RHS + 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, + 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, + 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS + 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u> + 1991038766U, // <7,u,5,2>: Cost 2 vtrnl <7,4,5,6>, LHS + 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, + 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u> + 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u> + 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS + 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, + 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS + 1662359728U, // <7,u,6,0>: Cost 2 vext3 RHS, + 2131918848U, // <7,u,6,1>: Cost 2 ins , lane 0 + 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u> + 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, + 1662359768U, // <7,u,6,4>: Cost 2 vext3 RHS, + 2131951616U, // <7,u,6,5>: Cost 2 ins , lane 0 + 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u> + 1058226176U, // <7,u,6,7>: Cost 1 ins RHS, lane 0 + 1058226176U, // <7,u,6,u>: Cost 1 ins RHS, lane 0 + 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, + 1640462603U, // <7,u,7,1>: Cost 2 vext3 RHS, + 1993185070U, // <7,u,7,2>: Cost 2 vtrnl <7,7,7,7>, LHS + 1912799388U, // <7,u,7,3>: Cost 2 vzipr <5,5,7,7>, LHS + 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, + 1640462643U, // <7,u,7,5>: Cost 2 vext3 RHS, + 1993185434U, // <7,u,7,6>: Cost 2 vtrnl <7,7,7,7>, RHS + 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS + 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS + 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, + 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, + 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS + 1055244288U, // <7,u,u,3>: Cost 1 ins LHS, lane 0 + 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, + 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, + 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS + 1058226176U, // <7,u,u,7>: Cost 1 ins RHS, lane 0 + 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS + 135053414U, // : Cost 1 vdup0 LHS + 1611489290U, // : Cost 2 vext3 LHS, <0,0,1,1> + 1611489300U, // : Cost 2 vext3 LHS, <0,0,2,2> + 2085707777U, // : Cost 2 ins <0,u,0,3>, lane 1 + 1481706806U, // : Cost 2 vext1 <0,u,0,0>, RHS + 2080440323U, // : Cost 2 ins <0,0,0,u>, lane 3 + 2080440323U, // : Cost 2 ins <0,0,0,u>, lane 3 + 2080440323U, // : Cost 2 ins <0,0,0,u>, lane 3 + 135053414U, // : Cost 1 vdup0 LHS + 1493655654U, // : Cost 2 vext1 <2,u,0,1>, LHS + 786808934U, // : Cost 1 vzipl LHS, LHS + 537747563U, // : Cost 1 vext3 LHS, LHS + 1756332134U, // : Cost 2 vuzpr <1,u,3,0>, LHS + 1493658934U, // : Cost 2 vext1 <2,u,0,1>, RHS + 2085797889U, // : Cost 2 ins <0,u,1,5>, lane 1 + 1517548447U, // : Cost 2 vext1 <6,u,0,1>, <6,u,0,1> + 2080514051U, // : Cost 2 ins <0,0,1,u>, lane 3 + 537747612U, // : Cost 1 vext3 LHS, LHS + 1611489444U, // : Cost 2 vext3 LHS, <0,2,0,2> + 1994768394U, // : Cost 2 vtrnl LHS, <0,0,1,1> + 921026662U, // : Cost 1 vtrnl LHS, LHS + 1012113409U, // : Cost 1 ins LHS, lane 1 + 1611489484U, // : Cost 2 vext3 LHS, <0,2,4,6> + 2080587779U, // : Cost 2 ins <0,0,2,u>, lane 3 + 2085879809U, // : Cost 2 ins <0,u,2,6>, lane 1 + 2080587779U, // : Cost 2 ins <0,0,2,u>, lane 3 + 921026716U, // : Cost 1 vtrnl LHS, LHS + 1880326144U, // : Cost 2 vzipr LHS, <0,0,0,0> + 1880327846U, // : Cost 2 vzipr LHS, <2,3,0,1> + 72589981U, // : Cost 1 vrev LHS + 2091900929U, // : Cost 2 ins <1,u,3,3>, lane 1 + 2091909121U, // : Cost 2 ins <1,u,3,4>, lane 1 + 2086633475U, // : Cost 2 ins <1,0,3,u>, lane 3 + 2086633475U, // : Cost 2 ins <1,0,3,u>, lane 3 + 2091933697U, // : Cost 2 ins <1,u,3,7>, lane 1 + 73032403U, // : Cost 1 vrev LHS + 1705610572U, // : Cost 2 vuzpl <4,6,0,2>, <4,6,0,2> + 1611489618U, // : Cost 2 vext3 LHS, <0,4,1,5> + 1611489628U, // : Cost 2 vext3 LHS, <0,4,2,6> + 2086002689U, // : Cost 2 ins <0,u,4,3>, lane 1 + 1947828428U, // : Cost 2 vtrnl <0,2,4,6>, <0,2,4,6> + 1551396150U, // : Cost 2 vext2 <1,2,u,0>, RHS + 1726844214U, // : Cost 2 vuzpl , RHS + 2109923329U, // : Cost 2 ins <4,u,4,7>, lane 1 + 1611932050U, // : Cost 2 vext3 LHS, <0,4,u,6> + 1863532544U, // : Cost 2 vzipl RHS, <0,0,0,0> + 789790822U, // : Cost 1 vzipl RHS, LHS + 1996349542U, // : Cost 2 vtrnl , LHS + 2104696835U, // : Cost 2 ins <4,0,5,u>, lane 3 + 1863532882U, // : Cost 2 vzipl RHS, <0,4,1,5> + 2109980673U, // : Cost 2 ins <4,u,5,5>, lane 1 + 1577939051U, // : Cost 2 vext2 <5,6,u,0>, <5,6,u,0> + 1756335414U, // : Cost 2 vuzpr <1,u,3,0>, RHS + 789791389U, // : Cost 1 vzipl RHS, LHS + 1997750272U, // : Cost 2 vtrnl RHS, <0,0,0,0> + 1997750282U, // : Cost 2 vtrnl RHS, <0,0,1,1> + 924008550U, // : Cost 1 vtrnl RHS, LHS + 2104770563U, // : Cost 2 ins <4,0,6,u>, lane 3 + 1146503858U, // : Cost 2 vrev <0,u,4,6> + 2104770563U, // : Cost 2 ins <4,0,6,u>, lane 3 + 2110062593U, // : Cost 2 ins <4,u,6,6>, lane 1 + 1036328961U, // : Cost 1 ins RHS, lane 1 + 924008604U, // : Cost 1 vtrnl RHS, LHS + 1906900992U, // : Cost 2 vzipr RHS, <0,0,0,0> + 1906902694U, // : Cost 2 vzipr RHS, <2,3,0,1> + 1906901156U, // : Cost 2 vzipr RHS, <0,2,0,2> + 2116083713U, // : Cost 2 ins <5,u,7,3>, lane 1 + 2116091905U, // : Cost 2 ins <5,u,7,4>, lane 1 + 2980643874U, // : Cost 3 vzipr RHS, <1,4,0,5> + 2116108289U, // : Cost 2 ins <5,u,7,6>, lane 1 + 2116116481U, // : Cost 2 ins <5,u,7,7>, lane 1 + 1906901162U, // : Cost 2 vzipr RHS, <0,2,0,u> + 135053414U, // : Cost 1 vdup0 LHS + 791453798U, // : Cost 1 vzipl LHS, LHS + 537748125U, // : Cost 1 vext3 LHS, LHS + 1012113409U, // : Cost 1 ins LHS, lane 1 + 1611932338U, // : Cost 2 vext3 LHS, <0,u,4,6> + 1551399066U, // : Cost 2 vext2 <1,2,u,0>, RHS + 1517605798U, // : Cost 2 vext1 <6,u,0,u>, <6,u,0,u> + 1036328961U, // : Cost 1 ins RHS, lane 1 + 537748179U, // : Cost 1 vext3 LHS, LHS + 1818149622U, // : Cost 2 vzipl <1,0,3,2>, <1,0,3,2> + 1007951877U, // : Cost 1 ins LHS, lane 5 + 1725587558U, // : Cost 2 vuzpl , LHS + 1007910914U, // : Cost 1 ins LHS, lane 2 + 2081660930U, // : Cost 2 ins <0,1,u,4>, lane 2 + 2081669122U, // : Cost 2 ins <0,1,u,5>, lane 2 + 2081677314U, // : Cost 2 ins <0,1,u,6>, lane 2 + 2081685506U, // : Cost 2 ins <0,1,u,7>, lane 2 + 1007951877U, // : Cost 1 ins LHS, lane 5 + 1481786002U, // : Cost 2 vext1 <0,u,1,1>, <0,u,1,1> + 202162278U, // : Cost 1 vdup1 LHS + 1860551574U, // : Cost 2 vzipl LHS, <1,2,3,0> + 1007910914U, // : Cost 1 ins LHS, lane 2 + 1481788726U, // : Cost 2 vext1 <0,u,1,1>, RHS + 1860551824U, // : Cost 2 vzipl LHS, <1,5,3,7> + 2081677314U, // : Cost 2 ins <0,1,u,6>, lane 2 + 2081685506U, // : Cost 2 ins <0,1,u,7>, lane 2 + 1007910914U, // : Cost 1 ins LHS, lane 2 + 1007509507U, // : Cost 1 ins LHS, lane 3 + 1007509507U, // : Cost 1 ins LHS, lane 3 + 1007509507U, // : Cost 1 ins LHS, lane 3 + 835584U, // : Cost 0 copy LHS + 1007509507U, // : Cost 1 ins LHS, lane 3 + 1007509507U, // : Cost 1 ins LHS, lane 3 + 1007509507U, // : Cost 1 ins LHS, lane 3 + 1007509507U, // : Cost 1 ins LHS, lane 3 + 835584U, // : Cost 0 copy LHS + 1487773798U, // : Cost 2 vext1 <1,u,1,3>, LHS + 1611490264U, // : Cost 2 vext3 LHS, <1,3,1,3> + 1880328342U, // : Cost 2 vzipr LHS, <3,0,1,2> + 945004646U, // : Cost 1 vtrnr LHS, LHS + 1487777078U, // : Cost 2 vext1 <1,u,1,3>, RHS + 1611490304U, // : Cost 2 vext3 LHS, <1,3,5,7> + 2087297027U, // : Cost 2 ins <1,1,3,u>, lane 3 + 2133737476U, // : Cost 2 ins , lane 4 + 945004651U, // : Cost 1 vtrnr LHS, LHS + 1567992749U, // : Cost 2 vext2 <4,0,u,1>, <4,0,u,1> + 2081636354U, // : Cost 2 ins <0,1,u,1>, lane 2 + 2081644546U, // : Cost 2 ins <0,1,u,2>, lane 2 + 1007910914U, // : Cost 1 ins LHS, lane 2 + 2081660930U, // : Cost 2 ins <0,1,u,4>, lane 2 + 1007951877U, // : Cost 1 ins LHS, lane 5 + 1725590838U, // : Cost 2 vuzpl , RHS + 2081685506U, // : Cost 2 ins <0,1,u,7>, lane 2 + 1007910914U, // : Cost 1 ins LHS, lane 2 + 1481818774U, // : Cost 2 vext1 <0,u,1,5>, <0,u,1,5> + 1863533364U, // : Cost 2 vzipl RHS, <1,1,1,1> + 1863533462U, // : Cost 2 vzipl RHS, <1,2,3,0> + 1007910914U, // : Cost 1 ins LHS, lane 2 + 1481821494U, // : Cost 2 vext1 <0,u,1,5>, RHS + 1863533712U, // : Cost 2 vzipl RHS, <1,5,3,7> + 2133876740U, // : Cost 2 ins , lane 4 + 1750224182U, // : Cost 2 vuzpr <0,u,1,1>, RHS + 1007910914U, // : Cost 1 ins LHS, lane 2 + 2081628162U, // : Cost 2 ins <0,1,u,0>, lane 2 + 1997751092U, // : Cost 2 vtrnl RHS, <1,1,1,1> + 2133917700U, // : Cost 2 ins , lane 4 + 1007910914U, // : Cost 1 ins LHS, lane 2 + 2081660930U, // : Cost 2 ins <0,1,u,4>, lane 2 + 1997751296U, // : Cost 2 vtrnl RHS, <1,3,5,7> + 2133950468U, // : Cost 2 ins , lane 4 + 1060216836U, // : Cost 1 ins RHS, lane 4 + 1007910914U, // : Cost 1 ins LHS, lane 2 + 2133975044U, // : Cost 2 ins , lane 4 + 1906901002U, // : Cost 2 vzipr RHS, <0,0,1,1> + 1906903190U, // : Cost 2 vzipr RHS, <3,0,1,2> + 969220198U, // : Cost 1 vtrnr RHS, LHS + 2134007812U, // : Cost 2 ins , lane 4 + 1152558485U, // : Cost 2 vrev <1,u,5,7> + 2134024196U, // : Cost 2 ins , lane 4 + 2134032388U, // : Cost 2 ins , lane 4 + 969220203U, // : Cost 1 vtrnr RHS, LHS + 1007509507U, // : Cost 1 ins LHS, lane 3 + 1007951877U, // : Cost 1 ins LHS, lane 5 + 1007509507U, // : Cost 1 ins LHS, lane 3 + 835584U, // : Cost 0 copy LHS + 1007509507U, // : Cost 1 ins LHS, lane 3 + 1007509507U, // : Cost 1 ins LHS, lane 3 + 1007509507U, // : Cost 1 ins LHS, lane 3 + 1007509507U, // : Cost 1 ins LHS, lane 3 + 835584U, // : Cost 0 copy LHS + 1726332928U, // : Cost 2 vuzpl LHS, <0,0,0,0> + 1545437286U, // : Cost 2 vext2 <0,2,u,2>, LHS + 652591206U, // : Cost 1 vuzpl LHS, LHS + 1886937190U, // : Cost 2 vzipr <1,2,u,0>, LHS + 1726333132U, // : Cost 2 vuzpl LHS, <0,2,4,6> + 2081767427U, // : Cost 2 ins <0,2,0,u>, lane 3 + 2082340866U, // : Cost 2 ins <0,2,u,6>, lane 2 + 2081767427U, // : Cost 2 ins <0,2,0,u>, lane 3 + 652591260U, // : Cost 1 vuzpl LHS, LHS + 1550082851U, // : Cost 2 vext2 <1,0,u,2>, <1,0,u,2> + 1726333748U, // : Cost 2 vuzpl LHS, <1,1,1,1> + 1860552296U, // : Cost 2 vzipl LHS, <2,2,2,2> + 1750155366U, // : Cost 2 vuzpr <0,u,0,2>, LHS + 2088296450U, // : Cost 2 ins <1,2,u,4>, lane 2 + 1726333952U, // : Cost 2 vuzpl LHS, <1,3,5,7> + 1860552634U, // : Cost 2 vzipl LHS, <2,6,3,7> + 2109702145U, // : Cost 2 ins <4,u,1,7>, lane 1 + 1750155371U, // : Cost 2 vuzpr <0,u,0,2>, LHS + 1481867932U, // : Cost 2 vext1 <0,u,2,2>, <0,u,2,2> + 2085838849U, // : Cost 2 ins <0,u,2,1>, lane 1 + 269271142U, // : Cost 1 vdup2 LHS + 1012113409U, // : Cost 1 ins LHS, lane 1 + 1481870646U, // : Cost 2 vext1 <0,u,2,2>, RHS + 2085871617U, // : Cost 2 ins <0,u,2,5>, lane 1 + 2085879809U, // : Cost 2 ins <0,u,2,6>, lane 1 + 2085888001U, // : Cost 2 ins <0,u,2,7>, lane 1 + 1012113409U, // : Cost 1 ins LHS, lane 1 + 408134301U, // : Cost 1 vext1 LHS, LHS + 1481876214U, // : Cost 2 vext1 LHS, <1,0,3,2> + 1880326164U, // : Cost 2 vzipr LHS, <0,0,2,2> + 806584422U, // : Cost 1 vzipr LHS, LHS + 408137014U, // : Cost 1 vext1 LHS, RHS + 1726335490U, // : Cost 2 vuzpl LHS, <3,4,5,6> + 1880326492U, // : Cost 2 vzipr LHS, <0,4,2,6> + 1529656314U, // : Cost 2 vext1 LHS, <7,0,1,2> + 806584427U, // : Cost 1 vzipr LHS, LHS + 1726336332U, // : Cost 2 vuzpl LHS, <4,6,0,2> + 2082062339U, // : Cost 2 ins <0,2,4,u>, lane 3 + 2082308098U, // : Cost 2 ins <0,2,u,2>, lane 2 + 1886969958U, // : Cost 2 vzipr <1,2,u,4>, LHS + 1726336208U, // : Cost 2 vuzpl LHS, <4,4,4,4> + 1545440566U, // : Cost 2 vext2 <0,2,u,2>, RHS + 652594486U, // : Cost 1 vuzpl LHS, RHS + 2082062339U, // : Cost 2 ins <0,2,4,u>, lane 3 + 652594504U, // : Cost 1 vuzpl LHS, RHS + 2088263682U, // : Cost 2 ins <1,2,u,0>, lane 2 + 1726337152U, // : Cost 2 vuzpl LHS, <5,7,1,3> + 1863534184U, // : Cost 2 vzipl RHS, <2,2,2,2> + 1884987494U, // : Cost 2 vzipr <0,u,u,5>, LHS + 1158441059U, // : Cost 2 vrev <2,u,4,5> + 1726337028U, // : Cost 2 vuzpl LHS, <5,5,5,5> + 1863534522U, // : Cost 2 vzipl RHS, <2,6,3,7> + 1750158646U, // : Cost 2 vuzpr <0,u,0,2>, RHS + 1750158647U, // : Cost 2 vuzpr <0,u,0,2>, RHS + 1481900704U, // : Cost 2 vext1 <0,u,2,6>, <0,u,2,6> + 2110021633U, // : Cost 2 ins <4,u,6,1>, lane 1 + 1997751912U, // : Cost 2 vtrnl RHS, <2,2,2,2> + 1611491258U, // : Cost 2 vext3 LHS, <2,6,3,7> + 1481903414U, // : Cost 2 vext1 <0,u,2,6>, RHS + 2110054401U, // : Cost 2 ins <4,u,6,5>, lane 1 + 1726337848U, // : Cost 2 vuzpl LHS, <6,6,6,6> + 1036328961U, // : Cost 1 ins RHS, lane 1 + 1036328961U, // : Cost 1 ins RHS, lane 1 + 2042962838U, // : Cost 2 vtrnr RHS, <1,2,3,0> + 1726338042U, // : Cost 2 vuzpl LHS, <7,0,1,2> + 1906901012U, // : Cost 2 vzipr RHS, <0,0,2,2> + 833159270U, // : Cost 1 vzipr RHS, LHS + 2042962842U, // : Cost 2 vtrnr RHS, <1,2,3,4> + 1726338406U, // : Cost 2 vuzpl LHS, <7,4,5,6> + 1906901340U, // : Cost 2 vzipr RHS, <0,4,2,6> + 1726338668U, // : Cost 2 vuzpl LHS, <7,7,7,7> + 833159275U, // : Cost 1 vzipr RHS, LHS + 408175266U, // : Cost 1 vext1 LHS, LHS + 1545443118U, // : Cost 2 vext2 <0,2,u,2>, LHS + 652597038U, // : Cost 1 vuzpl LHS, LHS + 806625382U, // : Cost 1 vzipr LHS, LHS + 408177974U, // : Cost 1 vext1 LHS, RHS + 1545443482U, // : Cost 2 vext2 <0,2,u,2>, RHS + 652597402U, // : Cost 1 vuzpl LHS, RHS + 1036328961U, // : Cost 1 ins RHS, lane 1 + 806625387U, // : Cost 1 vzipr LHS, LHS + 1544781824U, // : Cost 2 vext2 LHS, <0,0,0,0> + 471040156U, // : Cost 1 vext2 LHS, LHS + 1544781988U, // : Cost 2 vext2 LHS, <0,2,0,2> + 2088951810U, // : Cost 2 ins <1,3,u,3>, lane 2 + 1544782162U, // : Cost 2 vext2 LHS, <0,4,1,5> + 2094940162U, // : Cost 2 ins <2,3,u,5>, lane 2 + 2094374915U, // : Cost 2 ins <2,3,0,u>, lane 3 + 2088984578U, // : Cost 2 ins <1,3,u,7>, lane 2 + 471040669U, // : Cost 1 vext2 LHS, LHS + 1544782582U, // : Cost 2 vext2 LHS, <1,0,3,2> + 1544782644U, // : Cost 2 vext2 LHS, <1,1,1,1> + 1544782742U, // : Cost 2 vext2 LHS, <1,2,3,0> + 676569190U, // : Cost 1 vuzpr LHS, LHS + 1860553218U, // : Cost 2 vzipl LHS, <3,4,5,6> + 1544782992U, // : Cost 2 vext2 LHS, <1,5,3,7> + 2088476675U, // : Cost 2 ins <1,3,1,u>, lane 3 + 2088984578U, // : Cost 2 ins <1,3,u,7>, lane 2 + 676569195U, // : Cost 1 vuzpr LHS, LHS + 1750311830U, // : Cost 2 vuzpr LHS, <1,2,3,0> + 1164167966U, // : Cost 2 vrev <3,u,1,2> + 1544783464U, // : Cost 2 vext2 LHS, <2,2,2,2> + 1012113409U, // : Cost 1 ins LHS, lane 1 + 1750311834U, // : Cost 2 vuzpr LHS, <1,2,3,4> + 1994770946U, // : Cost 2 vtrnl LHS, <3,4,5,6> + 1544783802U, // : Cost 2 vext2 LHS, <2,6,3,7> + 2088984578U, // : Cost 2 ins <1,3,u,7>, lane 2 + 1012113409U, // : Cost 1 ins LHS, lane 1 + 1544784022U, // : Cost 2 vext2 LHS, <3,0,1,2> + 1750312614U, // : Cost 2 vuzpr LHS, <2,3,0,1> + 1880326902U, // : Cost 2 vzipr LHS, <1,0,3,2> + 336380006U, // : Cost 1 vdup3 LHS + 1544784386U, // : Cost 2 vext2 LHS, <3,4,5,6> + 1750312654U, // : Cost 2 vuzpr LHS, <2,3,4,5> + 2100568067U, // : Cost 2 ins <3,3,3,u>, lane 3 + 1880327312U, // : Cost 2 vzipr LHS, <1,5,3,7> + 336380006U, // : Cost 1 vdup3 LHS + 1487929446U, // : Cost 2 vext1 <1,u,3,4>, LHS + 1487930752U, // : Cost 2 vext1 <1,u,3,4>, <1,u,3,4> + 2094669827U, // : Cost 2 ins <2,3,4,u>, lane 3 + 2088951810U, // : Cost 2 ins <1,3,u,3>, lane 2 + 1487932726U, // : Cost 2 vext1 <1,u,3,4>, RHS + 471043382U, // : Cost 1 vext2 LHS, RHS + 1750311260U, // : Cost 2 vuzpr LHS, <0,4,2,6> + 2088984578U, // : Cost 2 ins <1,3,u,7>, lane 2 + 471043625U, // : Cost 1 vext2 LHS, RHS + 1863534742U, // : Cost 2 vzipl RHS, <3,0,1,2> + 1574645465U, // : Cost 2 vext2 <5,1,u,3>, <5,1,u,3> + 2088771587U, // : Cost 2 ins <1,3,5,u>, lane 3 + 1863535004U, // : Cost 2 vzipl RHS, <3,3,3,3> + 1592561606U, // : Cost 2 vext2 LHS, <5,4,7,6> + 1592561668U, // : Cost 2 vext2 LHS, <5,5,5,5> + 1592561762U, // : Cost 2 vext2 LHS, <5,6,7,0> + 676572470U, // : Cost 1 vuzpr LHS, RHS + 676572471U, // : Cost 1 vuzpr LHS, RHS + 1798090850U, // : Cost 2 vuzpr LHS, <5,6,7,0> + 1997752470U, // : Cost 2 vtrnl RHS, <3,0,1,2> + 1581281795U, // : Cost 2 vext2 <6,2,u,3>, <6,2,u,3> + 1997752732U, // : Cost 2 vtrnl RHS, <3,3,3,3> + 1798090854U, // : Cost 2 vuzpr LHS, <5,6,7,4> + 1164495686U, // : Cost 2 vrev <3,u,5,6> + 1592562488U, // : Cost 2 vext2 LHS, <6,6,6,6> + 1060216836U, // : Cost 1 ins RHS, lane 4 + 1060216836U, // : Cost 1 ins RHS, lane 4 + 1487954022U, // : Cost 2 vext1 <1,u,3,7>, LHS + 1487955331U, // : Cost 2 vext1 <1,u,3,7>, <1,u,3,7> + 1493928028U, // : Cost 2 vext1 <2,u,3,7>, <2,u,3,7> + 1906901832U, // : Cost 2 vzipr RHS, <1,1,3,3> + 1487957302U, // : Cost 2 vext1 <1,u,3,7>, RHS + 2042963662U, // : Cost 2 vtrnr RHS, <2,3,4,5> + 2134024196U, // : Cost 2 ins , lane 4 + 1906902160U, // : Cost 2 vzipr RHS, <1,5,3,7> + 1487959854U, // : Cost 2 vext1 <1,u,3,7>, LHS + 1544787667U, // : Cost 2 vext2 LHS, + 471045934U, // : Cost 1 vext2 LHS, LHS + 1880367862U, // : Cost 2 vzipr LHS, <1,0,3,2> + 676569757U, // : Cost 1 vuzpr LHS, LHS + 1544788031U, // : Cost 2 vext2 LHS, + 471046298U, // : Cost 1 vext2 LHS, RHS + 1750311584U, // : Cost 2 vuzpr LHS, <0,u,2,6> + 676572713U, // : Cost 1 vuzpr LHS, RHS + 471046501U, // : Cost 1 vext2 LHS, LHS + 1974046028U, // : Cost 2 vtrnl <4,6,0,2>, <4,6,0,2> + 1551425638U, // : Cost 2 vext2 <1,2,u,4>, LHS + 1727168614U, // : Cost 2 vuzpl , LHS + 2085707777U, // : Cost 2 ins <0,u,0,3>, lane 1 + 1679392972U, // : Cost 2 vuzpl <0,2,4,6>, <0,2,4,6> + 1638329234U, // : Cost 2 vext3 RHS, <4,0,5,1> + 1638329244U, // : Cost 2 vext3 RHS, <4,0,6,2> + 2109628417U, // : Cost 2 ins <4,u,0,7>, lane 1 + 1551426205U, // : Cost 2 vext2 <1,2,u,4>, LHS + 1860553618U, // : Cost 2 vzipl LHS, <4,0,5,1> + 2085765121U, // : Cost 2 ins <0,u,1,1>, lane 1 + 1551426503U, // : Cost 2 vext2 <1,2,u,4>, <1,2,u,4> + 1756364902U, // : Cost 2 vuzpr <1,u,3,4>, LHS + 1860553936U, // : Cost 2 vzipl LHS, <4,4,4,4> + 786812214U, // : Cost 1 vzipl LHS, RHS + 1994026294U, // : Cost 2 vtrnl , RHS + 2083168259U, // : Cost 2 ins <0,4,1,u>, lane 3 + 786812457U, // : Cost 1 vzipl LHS, RHS + 1170066926U, // : Cost 2 vrev <4,u,0,2> + 2083241987U, // : Cost 2 ins <0,4,2,u>, lane 3 + 2085847041U, // : Cost 2 ins <0,u,2,2>, lane 1 + 1012113409U, // : Cost 1 ins LHS, lane 1 + 1994771664U, // : Cost 2 vtrnl LHS, <4,4,4,4> + 1994771346U, // : Cost 2 vtrnl LHS, <4,0,5,1> + 921029942U, // : Cost 1 vtrnl LHS, RHS + 2083241987U, // : Cost 2 ins <0,4,2,u>, lane 3 + 921029960U, // : Cost 1 vtrnl LHS, RHS + 2091876353U, // : Cost 2 ins <1,u,3,0>, lane 1 + 2954070192U, // : Cost 3 vzipr LHS, <3,0,4,1> + 2091892737U, // : Cost 2 ins <1,u,3,2>, lane 1 + 2091900929U, // : Cost 2 ins <1,u,3,3>, lane 1 + 1928105168U, // : Cost 2 vzipr LHS, <4,4,4,4> + 1880327886U, // : Cost 2 vzipr LHS, <2,3,4,5> + 1880326348U, // : Cost 2 vzipr LHS, <0,2,4,6> + 2091933697U, // : Cost 2 ins <1,u,3,7>, lane 1 + 1880326350U, // : Cost 2 vzipr LHS, <0,2,4,u> + 1505919078U, // : Cost 2 vext1 <4,u,4,4>, LHS + 2107277315U, // : Cost 2 ins <4,4,4,u>, lane 3 + 2107277315U, // : Cost 2 ins <4,4,4,u>, lane 3 + 2086002689U, // : Cost 2 ins <0,u,4,3>, lane 1 + 161926454U, // : Cost 1 vdup0 RHS + 1551428918U, // : Cost 2 vext2 <1,2,u,4>, RHS + 1638329572U, // : Cost 2 vext3 RHS, <4,4,6,6> + 2109923329U, // : Cost 2 ins <4,u,4,7>, lane 1 + 161926454U, // : Cost 1 vdup0 RHS + 1493983334U, // : Cost 2 vext1 <2,u,4,5>, LHS + 2101379075U, // : Cost 2 ins <3,4,5,u>, lane 3 + 1493985379U, // : Cost 2 vext1 <2,u,4,5>, <2,u,4,5> + 2101379075U, // : Cost 2 ins <3,4,5,u>, lane 3 + 1493986614U, // : Cost 2 vext1 <2,u,4,5>, RHS + 789794102U, // : Cost 1 vzipl RHS, RHS + 537750838U, // : Cost 1 vext3 LHS, RHS + 1756368182U, // : Cost 2 vuzpr <1,u,3,4>, RHS + 537750856U, // : Cost 1 vext3 LHS, RHS + 1482048178U, // : Cost 2 vext1 <0,u,4,6>, <0,u,4,6> + 2107424771U, // : Cost 2 ins <4,4,6,u>, lane 3 + 2110029825U, // : Cost 2 ins <4,u,6,2>, lane 1 + 2107424771U, // : Cost 2 ins <4,4,6,u>, lane 3 + 1482050870U, // : Cost 2 vext1 <0,u,4,6>, RHS + 1997753234U, // : Cost 2 vtrnl RHS, <4,0,5,1> + 924011830U, // : Cost 1 vtrnl RHS, RHS + 1036328961U, // : Cost 1 ins RHS, lane 1 + 924011848U, // : Cost 1 vtrnl RHS, RHS + 2116059137U, // : Cost 2 ins <5,u,7,0>, lane 1 + 2113470467U, // : Cost 2 ins <5,4,7,u>, lane 3 + 2113470467U, // : Cost 2 ins <5,4,7,u>, lane 3 + 2116083713U, // : Cost 2 ins <5,u,7,3>, lane 1 + 1906904272U, // : Cost 2 vzipr RHS, <4,4,4,4> + 1906902734U, // : Cost 2 vzipr RHS, <2,3,4,5> + 96808489U, // : Cost 1 vrev RHS + 2116116481U, // : Cost 2 ins <5,u,7,7>, lane 1 + 96955963U, // : Cost 1 vrev RHS + 1482064564U, // : Cost 2 vext1 <0,u,4,u>, <0,u,4,u> + 1551431470U, // : Cost 2 vext2 <1,2,u,4>, LHS + 1494009958U, // : Cost 2 vext1 <2,u,4,u>, <2,u,4,u> + 1012113409U, // : Cost 1 ins LHS, lane 1 + 161926454U, // : Cost 1 vdup0 RHS + 791457078U, // : Cost 1 vzipl LHS, RHS + 537751081U, // : Cost 1 vext3 LHS, RHS + 1036328961U, // : Cost 1 ins RHS, lane 1 + 537751099U, // : Cost 1 vext3 LHS, RHS + 2085683201U, // : Cost 2 ins <0,u,0,0>, lane 1 + 1034493957U, // : Cost 1 ins RHS, lane 5 + 1727914086U, // : Cost 2 vuzpl , LHS + 2085707777U, // : Cost 2 ins <0,u,0,3>, lane 1 + 1546273106U, // : Cost 2 vext2 <0,4,1,5>, <0,4,1,5> + 1678778497U, // : Cost 2 vuzpl <0,1,5,3>, <0,1,5,3> + 2108219394U, // : Cost 2 ins <4,5,u,6>, lane 2 + 1034485762U, // : Cost 1 ins RHS, lane 2 + 1034493957U, // : Cost 1 ins RHS, lane 5 + 1505968230U, // : Cost 2 vext1 <4,u,5,1>, LHS + 1860554448U, // : Cost 2 vzipl LHS, <5,1,7,3> + 2103689217U, // : Cost 2 ins <3,u,1,2>, lane 1 + 1750253670U, // : Cost 2 vuzpr <0,u,1,5>, LHS + 1505971738U, // : Cost 2 vext1 <4,u,5,1>, <4,u,5,1> + 1860554756U, // : Cost 2 vzipl LHS, <5,5,5,5> + 1860554850U, // : Cost 2 vzipl LHS, <5,6,7,0> + 1034485762U, // : Cost 1 ins RHS, lane 2 + 1034485762U, // : Cost 1 ins RHS, lane 2 + 2085830657U, // : Cost 2 ins <0,u,2,0>, lane 1 + 1994772608U, // : Cost 2 vtrnl LHS, <5,7,1,3> + 2085847041U, // : Cost 2 ins <0,u,2,2>, lane 1 + 1012113409U, // : Cost 1 ins LHS, lane 1 + 2085863425U, // : Cost 2 ins <0,u,2,4>, lane 1 + 1994772484U, // : Cost 2 vtrnl LHS, <5,5,5,5> + 2085879809U, // : Cost 2 ins <0,u,2,6>, lane 1 + 1034485762U, // : Cost 1 ins RHS, lane 2 + 1012113409U, // : Cost 1 ins LHS, lane 1 + 2091876353U, // : Cost 2 ins <1,u,3,0>, lane 1 + 1176121553U, // : Cost 2 vrev <5,u,1,3> + 2091892737U, // : Cost 2 ins <1,u,3,2>, lane 1 + 2091900929U, // : Cost 2 ins <1,u,3,3>, lane 1 + 2091909121U, // : Cost 2 ins <1,u,3,4>, lane 1 + 1928105178U, // : Cost 2 vzipr LHS, <4,4,5,5> + 1880328706U, // : Cost 2 vzipr LHS, <3,4,5,6> + 945007926U, // : Cost 1 vtrnr LHS, RHS + 945007927U, // : Cost 1 vtrnr LHS, RHS + 2108170242U, // : Cost 2 ins <4,5,u,0>, lane 2 + 2108178434U, // : Cost 2 ins <4,5,u,1>, lane 2 + 2108186626U, // : Cost 2 ins <4,5,u,2>, lane 2 + 2086002689U, // : Cost 2 ins <0,u,4,3>, lane 1 + 1845022662U, // : Cost 2 vzipl <5,4,7,6>, <5,4,7,6> + 1034493957U, // : Cost 1 ins RHS, lane 5 + 1727917366U, // : Cost 2 vuzpl , RHS + 1034485762U, // : Cost 1 ins RHS, lane 2 + 1034493957U, // : Cost 1 ins RHS, lane 5 + 1506000998U, // : Cost 2 vext1 <4,u,5,5>, LHS + 1863536336U, // : Cost 2 vzipl RHS, <5,1,7,3> + 2108186626U, // : Cost 2 ins <4,5,u,2>, lane 2 + 2086076417U, // : Cost 2 ins <0,u,5,3>, lane 1 + 1506004510U, // : Cost 2 vext1 <4,u,5,5>, <4,u,5,5> + 229035318U, // : Cost 1 vdup1 RHS + 1863536738U, // : Cost 2 vzipl RHS, <5,6,7,0> + 1034485762U, // : Cost 1 ins RHS, lane 2 + 1034485762U, // : Cost 1 ins RHS, lane 2 + 1034346499U, // : Cost 1 ins RHS, lane 3 + 1034346499U, // : Cost 1 ins RHS, lane 3 + 1034346499U, // : Cost 1 ins RHS, lane 3 + 1034346499U, // : Cost 1 ins RHS, lane 3 + 1034346499U, // : Cost 1 ins RHS, lane 3 + 1034346499U, // : Cost 1 ins RHS, lane 3 + 1034346499U, // : Cost 1 ins RHS, lane 3 + 27705344U, // : Cost 0 copy RHS + 27705344U, // : Cost 0 copy RHS + 1488101478U, // : Cost 2 vext1 <1,u,5,7>, LHS + 1488102805U, // : Cost 2 vext1 <1,u,5,7>, <1,u,5,7> + 2114134019U, // : Cost 2 ins <5,5,7,u>, lane 3 + 2133999620U, // : Cost 2 ins , lane 4 + 1488104758U, // : Cost 2 vext1 <1,u,5,7>, RHS + 1638330536U, // : Cost 2 vext3 RHS, <5,7,5,7> + 1906903554U, // : Cost 2 vzipr RHS, <3,4,5,6> + 969223478U, // : Cost 1 vtrnr RHS, RHS + 969223479U, // : Cost 1 vtrnr RHS, RHS + 1034346499U, // : Cost 1 ins RHS, lane 3 + 1034493957U, // : Cost 1 ins RHS, lane 5 + 1034346499U, // : Cost 1 ins RHS, lane 3 + 1012113409U, // : Cost 1 ins LHS, lane 1 + 1034346499U, // : Cost 1 ins RHS, lane 3 + 1034493957U, // : Cost 1 ins RHS, lane 5 + 1034346499U, // : Cost 1 ins RHS, lane 3 + 27705344U, // : Cost 0 copy RHS + 27705344U, // : Cost 0 copy RHS + 1729314816U, // : Cost 2 vuzpl RHS, <0,0,0,0> + 1545470054U, // : Cost 2 vext2 <0,2,u,6>, LHS + 655573094U, // : Cost 1 vuzpl RHS, LHS + 2108309507U, // : Cost 2 ins <4,6,0,u>, lane 3 + 1546797458U, // : Cost 2 vext2 <0,4,u,6>, <0,4,u,6> + 2108309507U, // : Cost 2 ins <4,6,0,u>, lane 3 + 2108882946U, // : Cost 2 ins <4,6,u,6>, lane 2 + 1886940470U, // : Cost 2 vzipr <1,2,u,0>, RHS + 655573148U, // : Cost 1 vuzpl RHS, LHS + 1182004127U, // : Cost 2 vrev <6,u,0,1> + 1729315636U, // : Cost 2 vuzpl RHS, <1,1,1,1> + 1860555258U, // : Cost 2 vzipl LHS, <6,2,7,3> + 1750335590U, // : Cost 2 vuzpr <0,u,2,6>, LHS + 2114838530U, // : Cost 2 ins <5,6,u,4>, lane 2 + 1729315840U, // : Cost 2 vuzpl RHS, <1,3,5,7> + 1860555576U, // : Cost 2 vzipl LHS, <6,6,6,6> + 1884958006U, // : Cost 2 vzipr <0,u,u,1>, RHS + 1750335595U, // : Cost 2 vuzpr <0,u,2,6>, LHS + 1506050150U, // : Cost 2 vext1 <4,u,6,2>, LHS + 2085838849U, // : Cost 2 ins <0,u,2,1>, lane 1 + 1729316456U, // : Cost 2 vuzpl RHS, <2,2,2,2> + 1012113409U, // : Cost 1 ins LHS, lane 1 + 1506053668U, // : Cost 2 vext1 <4,u,6,2>, <4,u,6,2> + 2085871617U, // : Cost 2 ins <0,u,2,5>, lane 1 + 1994773304U, // : Cost 2 vtrnl LHS, <6,6,6,6> + 1880984886U, // : Cost 2 vzipr <0,2,u,2>, RHS + 1012113409U, // : Cost 1 ins LHS, lane 1 + 2066526306U, // : Cost 2 vtrnr LHS, <5,6,7,0> + 1729317014U, // : Cost 2 vuzpl RHS, <3,0,1,2> + 1928104860U, // : Cost 2 vzipr LHS, <4,0,6,2> + 1729317276U, // : Cost 2 vuzpl RHS, <3,3,3,3> + 1564715549U, // : Cost 2 vext2 <3,4,u,6>, <3,4,u,6> + 1729317378U, // : Cost 2 vuzpl RHS, <3,4,5,6> + 1928105188U, // : Cost 2 vzipr LHS, <4,4,6,6> + 806587702U, // : Cost 1 vzipr LHS, RHS + 806587703U, // : Cost 1 vzipr LHS, RHS + 1729318220U, // : Cost 2 vuzpl RHS, <4,6,0,2> + 2108604419U, // : Cost 2 ins <4,6,4,u>, lane 3 + 2108850178U, // : Cost 2 ins <4,6,u,2>, lane 2 + 2108604419U, // : Cost 2 ins <4,6,4,u>, lane 3 + 1729318096U, // : Cost 2 vuzpl RHS, <4,4,4,4> + 1545473334U, // : Cost 2 vext2 <0,2,u,6>, RHS + 655576374U, // : Cost 1 vuzpl RHS, RHS + 1886973238U, // : Cost 2 vzipr <1,2,u,4>, RHS + 655576392U, // : Cost 1 vuzpl RHS, RHS + 2114805762U, // : Cost 2 ins <5,6,u,0>, lane 2 + 1729319040U, // : Cost 2 vuzpl RHS, <5,7,1,3> + 1863537146U, // : Cost 2 vzipl RHS, <6,2,7,3> + 2086076417U, // : Cost 2 ins <0,u,5,3>, lane 1 + 1576660943U, // : Cost 2 vext2 <5,4,u,6>, <5,4,u,6> + 1729318916U, // : Cost 2 vuzpl RHS, <5,5,5,5> + 1863537464U, // : Cost 2 vzipl RHS, <6,6,6,6> + 1750338870U, // : Cost 2 vuzpr <0,u,2,6>, RHS + 1750338871U, // : Cost 2 vuzpr <0,u,2,6>, RHS + 1506082918U, // : Cost 2 vext1 <4,u,6,6>, LHS + 2110021633U, // : Cost 2 ins <4,u,6,1>, lane 1 + 2110029825U, // : Cost 2 ins <4,u,6,2>, lane 1 + 2086150145U, // : Cost 2 ins <0,u,6,3>, lane 1 + 1506086440U, // : Cost 2 vext1 <4,u,6,6>, <4,u,6,6> + 2110054401U, // : Cost 2 ins <4,u,6,5>, lane 1 + 296144182U, // : Cost 1 vdup2 RHS + 1036328961U, // : Cost 1 ins RHS, lane 1 + 1036328961U, // : Cost 1 ins RHS, lane 1 + 432349286U, // : Cost 1 vext1 RHS, LHS + 1506091766U, // : Cost 2 vext1 RHS, <1,0,3,2> + 1906903964U, // : Cost 2 vzipr RHS, <4,0,6,2> + 1506093206U, // : Cost 2 vext1 RHS, <3,0,1,2> + 432352809U, // : Cost 1 vext1 RHS, RHS + 1506094800U, // : Cost 2 vext1 RHS, <5,1,7,3> + 1906904292U, // : Cost 2 vzipr RHS, <4,4,6,6> + 833162550U, // : Cost 1 vzipr RHS, RHS + 833162551U, // : Cost 1 vzipr RHS, RHS + 432357478U, // : Cost 1 vext1 RHS, LHS + 1545475886U, // : Cost 2 vext2 <0,2,u,6>, LHS + 655578926U, // : Cost 1 vuzpl RHS, LHS + 1012113409U, // : Cost 1 ins LHS, lane 1 + 432361002U, // : Cost 1 vext1 RHS, RHS + 1545476250U, // : Cost 2 vext2 <0,2,u,6>, RHS + 655579290U, // : Cost 1 vuzpl RHS, RHS + 806628662U, // : Cost 1 vzipr LHS, RHS + 806628663U, // : Cost 1 vzipr LHS, RHS + 1571356672U, // : Cost 2 vext2 RHS, <0,0,0,0> + 497614950U, // : Cost 1 vext2 RHS, LHS + 1571356836U, // : Cost 2 vext2 RHS, <0,2,0,2> + 2115493890U, // : Cost 2 ins <5,7,u,3>, lane 2 + 1571357010U, // : Cost 2 vext2 RHS, <0,4,1,5> + 1512083716U, // : Cost 2 vext1 <5,u,7,0>, <5,u,7,0> + 2120916995U, // : Cost 2 ins <6,7,0,u>, lane 3 + 2115526658U, // : Cost 2 ins <5,7,u,7>, lane 2 + 497615517U, // : Cost 1 vext2 RHS, LHS + 1571357430U, // : Cost 2 vext2 RHS, <1,0,3,2> + 1571357492U, // : Cost 2 vext2 RHS, <1,1,1,1> + 1571357590U, // : Cost 2 vext2 RHS, <1,2,3,0> + 700784742U, // : Cost 1 vuzpr RHS, LHS + 1860556134U, // : Cost 2 vzipl LHS, <7,4,5,6> + 1553441981U, // : Cost 2 vext2 <1,5,u,7>, <1,5,u,7> + 2115018755U, // : Cost 2 ins <5,7,1,u>, lane 3 + 1860556396U, // : Cost 2 vzipl LHS, <7,7,7,7> + 700784747U, // : Cost 1 vuzpr RHS, LHS + 1774527382U, // : Cost 2 vuzpr RHS, <1,2,3,0> + 1188058754U, // : Cost 2 vrev <7,u,1,2> + 1571358312U, // : Cost 2 vext2 RHS, <2,2,2,2> + 1012113409U, // : Cost 1 ins LHS, lane 1 + 1774527386U, // : Cost 2 vuzpr RHS, <1,2,3,4> + 1994773862U, // : Cost 2 vtrnl LHS, <7,4,5,6> + 1560078311U, // : Cost 2 vext2 <2,6,u,7>, <2,6,u,7> + 1994774124U, // : Cost 2 vtrnl LHS, <7,7,7,7> + 1012113409U, // : Cost 1 ins LHS, lane 1 + 1571358870U, // : Cost 2 vext2 RHS, <3,0,1,2> + 1774528166U, // : Cost 2 vuzpr RHS, <2,3,0,1> + 2091892737U, // : Cost 2 ins <1,u,3,2>, lane 1 + 1571359132U, // : Cost 2 vext2 RHS, <3,3,3,3> + 1571359234U, // : Cost 2 vext2 RHS, <3,4,5,6> + 1774528206U, // : Cost 2 vuzpr RHS, <2,3,4,5> + 1518080992U, // : Cost 2 vext1 <6,u,7,3>, <6,u,7,3> + 1774527488U, // : Cost 2 vuzpr RHS, <1,3,5,7> + 1571359518U, // : Cost 2 vext2 RHS, <3,u,1,2> + 1571359634U, // : Cost 2 vext2 RHS, <4,0,5,1> + 2121449474U, // : Cost 2 ins <6,7,u,1>, lane 2 + 2121211907U, // : Cost 2 ins <6,7,4,u>, lane 3 + 2115493890U, // : Cost 2 ins <5,7,u,3>, lane 2 + 1571359952U, // : Cost 2 vext2 RHS, <4,4,4,4> + 497618248U, // : Cost 1 vext2 RHS, RHS + 1571360076U, // : Cost 2 vext2 RHS, <4,6,0,2> + 2115526658U, // : Cost 2 ins <5,7,u,7>, lane 2 + 497618473U, // : Cost 1 vext2 RHS, RHS + 1863537658U, // : Cost 2 vzipl RHS, <7,0,1,2> + 1571360464U, // : Cost 2 vext2 RHS, <5,1,7,3> + 2115313667U, // : Cost 2 ins <5,7,5,u>, lane 3 + 2115493890U, // : Cost 2 ins <5,7,u,3>, lane 2 + 1571360710U, // : Cost 2 vext2 RHS, <5,4,7,6> + 1571360772U, // : Cost 2 vext2 RHS, <5,5,5,5> + 1571360866U, // : Cost 2 vext2 RHS, <5,6,7,0> + 700788022U, // : Cost 1 vuzpr RHS, RHS + 700788023U, // : Cost 1 vuzpr RHS, RHS + 1774530658U, // : Cost 2 vuzpr RHS, <5,6,7,0> + 1997755386U, // : Cost 2 vtrnl RHS, <7,0,1,2> + 1571361274U, // : Cost 2 vext2 RHS, <6,2,7,3> + 2115493890U, // : Cost 2 ins <5,7,u,3>, lane 2 + 1774530662U, // : Cost 2 vuzpr RHS, <5,6,7,4> + 1188386474U, // : Cost 2 vrev <7,u,5,6> + 1571361592U, // : Cost 2 vext2 RHS, <6,6,6,6> + 1036328961U, // : Cost 1 ins RHS, lane 1 + 1036328961U, // : Cost 1 ins RHS, lane 1 + 1571361786U, // : Cost 2 vext2 RHS, <7,0,1,2> + 1774531406U, // : Cost 2 vuzpr RHS, <6,7,0,1> + 2127405059U, // : Cost 2 ins <7,7,7,u>, lane 3 + 1906904784U, // : Cost 2 vzipr RHS, <5,1,7,3> + 1571362150U, // : Cost 2 vext2 RHS, <7,4,5,6> + 1774531446U, // : Cost 2 vuzpr RHS, <6,7,4,5> + 1906905030U, // : Cost 2 vzipr RHS, <5,4,7,6> + 363253046U, // : Cost 1 vdup3 RHS + 363253046U, // : Cost 1 vdup3 RHS + 1571362515U, // : Cost 2 vext2 RHS, + 497620782U, // : Cost 1 vext2 RHS, LHS + 1571362693U, // : Cost 2 vext2 RHS, + 700785309U, // : Cost 1 vuzpr RHS, LHS + 1571362879U, // : Cost 2 vext2 RHS, + 497621146U, // : Cost 1 vext2 RHS, RHS + 1571363024U, // : Cost 2 vext2 RHS, + 700788265U, // : Cost 1 vuzpr RHS, RHS + 497621349U, // : Cost 1 vext2 RHS, LHS + 135053414U, // : Cost 1 vdup0 LHS + 471081121U, // : Cost 1 vext2 LHS, LHS + 653033574U, // : Cost 1 vuzpl LHS, LHS + 1007910914U, // : Cost 1 ins LHS, lane 2 + 1544823122U, // : Cost 2 vext2 LHS, <0,4,1,5> + 1512157453U, // : Cost 2 vext1 <5,u,u,0>, <5,u,u,0> + 1995282586U, // : Cost 2 vtrnl , RHS + 1034485762U, // : Cost 1 ins RHS, lane 2 + 471081629U, // : Cost 1 vext2 LHS, LHS + 1544823542U, // : Cost 2 vext2 LHS, <1,0,3,2> + 786814766U, // : Cost 1 vzipl LHS, LHS + 537753390U, // : Cost 1 vext3 LHS, LHS + 676610150U, // : Cost 1 vuzpr LHS, LHS + 1482304822U, // : Cost 2 vext1 <0,u,u,1>, RHS + 786815130U, // : Cost 1 vzipl LHS, RHS + 1518138343U, // : Cost 2 vext1 <6,u,u,1>, <6,u,u,1> + 1034485762U, // : Cost 1 ins RHS, lane 2 + 537753444U, // : Cost 1 vext3 LHS, LHS + 1007509507U, // : Cost 1 ins LHS, lane 3 + 1007509507U, // : Cost 1 ins LHS, lane 3 + 921032494U, // : Cost 1 vtrnl LHS, LHS + 835584U, // : Cost 0 copy LHS + 1007509507U, // : Cost 1 ins LHS, lane 3 + 1007509507U, // : Cost 1 ins LHS, lane 3 + 921032858U, // : Cost 1 vtrnl LHS, RHS + 1007509507U, // : Cost 1 ins LHS, lane 3 + 835584U, // : Cost 0 copy LHS + 408576723U, // : Cost 1 vext1 LHS, LHS + 1880327918U, // : Cost 2 vzipr LHS, <2,3,u,1> + 120371557U, // : Cost 1 vrev LHS + 806584476U, // : Cost 1 vzipr LHS, LHS + 408579382U, // : Cost 1 vext1 LHS, RHS + 1880327922U, // : Cost 2 vzipr LHS, <2,3,u,5> + 1880326384U, // : Cost 2 vzipr LHS, <0,2,u,6> + 806587720U, // : Cost 1 vzipr LHS, RHS + 806584481U, // : Cost 1 vzipr LHS, LHS + 1488298086U, // : Cost 2 vext1 <1,u,u,4>, LHS + 1488299437U, // : Cost 2 vext1 <1,u,u,4>, <1,u,u,4> + 1659271204U, // : Cost 2 vext3 LHS, + 1007910914U, // : Cost 1 ins LHS, lane 2 + 161926454U, // : Cost 1 vdup0 RHS + 471084342U, // : Cost 1 vext2 LHS, RHS + 653036854U, // : Cost 1 vuzpl LHS, RHS + 1034485762U, // : Cost 1 ins RHS, lane 2 + 471084585U, // : Cost 1 vext2 LHS, RHS + 1482334933U, // : Cost 2 vext1 <0,u,u,5>, <0,u,u,5> + 789796654U, // : Cost 1 vzipl RHS, LHS + 1494280327U, // : Cost 2 vext1 <2,u,u,5>, <2,u,u,5> + 1007910914U, // : Cost 1 ins LHS, lane 2 + 1482337590U, // : Cost 2 vext1 <0,u,u,5>, RHS + 789797018U, // : Cost 1 vzipl RHS, RHS + 537753754U, // : Cost 1 vext3 LHS, RHS + 676613430U, // : Cost 1 vuzpr LHS, RHS + 537753772U, // : Cost 1 vext3 LHS, RHS + 1034346499U, // : Cost 1 ins RHS, lane 3 + 1034346499U, // : Cost 1 ins RHS, lane 3 + 924014382U, // : Cost 1 vtrnl RHS, LHS + 1007910914U, // : Cost 1 ins LHS, lane 2 + 1034346499U, // : Cost 1 ins RHS, lane 3 + 1034346499U, // : Cost 1 ins RHS, lane 3 + 924014746U, // : Cost 1 vtrnl RHS, RHS + 27705344U, // : Cost 0 copy RHS + 27705344U, // : Cost 0 copy RHS + 432496742U, // : Cost 1 vext1 RHS, LHS + 1488324016U, // : Cost 2 vext1 <1,u,u,7>, <1,u,u,7> + 1494296713U, // : Cost 2 vext1 <2,u,u,7>, <2,u,u,7> + 833159324U, // : Cost 1 vzipr RHS, LHS + 432500283U, // : Cost 1 vext1 RHS, RHS + 1906901393U, // : Cost 2 vzipr RHS, <0,4,u,5> + 120699277U, // : Cost 1 vrev RHS + 833162568U, // : Cost 1 vzipr RHS, RHS + 833159329U, // : Cost 1 vzipr RHS, LHS + 408617688U, // : Cost 1 vext1 LHS, LHS + 471086894U, // : Cost 1 vext2 LHS, LHS + 537753957U, // : Cost 1 vext3 LHS, LHS + 835584U, // : Cost 0 copy LHS + 408620342U, // : Cost 1 vext1 LHS, RHS + 471087258U, // : Cost 1 vext2 LHS, RHS + 537753997U, // : Cost 1 vext3 LHS, RHS + 27705344U, // : Cost 0 copy RHS + 835584U, // : Cost 0 copy LHS + 0}; + +static unsigned getPerfectShuffleCost(llvm::ArrayRef M) { + assert(M.size() == 4 && "Expected a 4 entry perfect shuffle"); + + // Special case zero-cost nop copies, from either LHS or RHS. + if (llvm::all_of(llvm::enumerate(M), [](auto &E) { + return E.value() < 0 || E.value() == (int)E.index(); + })) + return 0; + if (llvm::all_of(llvm::enumerate(M), [](auto &E) { + return E.value() < 0 || E.value() == (int)E.index() + 4; + })) + return 0; + + // Get the four mask elementd from the 2 inputs. Perfect shuffles encode undef + // elements with value 8. + unsigned PFIndexes[4]; + for (unsigned i = 0; i != 4; ++i) { + assert(M[i] < 8 && "Expected a maximum entry of 8 for shuffle mask"); + if (M[i] < 0) + PFIndexes[i] = 8; + else + PFIndexes[i] = M[i]; + } + + // Compute the index in the perfect shuffle table. + unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + + PFIndexes[2] * 9 + PFIndexes[3]; + unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; + // And extract the cost from the upper bits. The cost is encoded as Cost-1. + return (PFEntry >> 30) + 1; +} #endif diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index d1b901e58d27..f7c06b9fb71b 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -19,6 +19,7 @@ #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -32,6 +33,8 @@ using namespace llvm; +#define GET_CC_REGISTER_LISTS +#include "AArch64GenCallingConv.inc" #define GET_REGINFO_TARGET_DESC #include "AArch64GenRegisterInfo.inc" @@ -63,14 +66,6 @@ bool AArch64RegisterInfo::regNeedsCFI(unsigned Reg, return true; } -bool AArch64RegisterInfo::hasSVEArgsOrReturn(const MachineFunction *MF) { - const Function &F = MF->getFunction(); - return isa(F.getReturnType()) || - any_of(F.args(), [](const Argument &Arg) { - return isa(Arg.getType()); - }); -} - const MCPhysReg * AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); @@ -108,7 +103,7 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { // This is for OSes other than Windows; Windows is a separate case further // above. return CSR_AArch64_AAPCS_X18_SaveList; - if (hasSVEArgsOrReturn(MF)) + if (MF->getInfo()->isSVECC()) return CSR_AArch64_SVE_AAPCS_SaveList; return CSR_AArch64_AAPCS_SaveList; } @@ -335,6 +330,13 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening)) markSuperRegs(Reserved, AArch64::W16); + // SME tiles are not allocatable. + if (MF.getSubtarget().hasSME()) { + for (MCSubRegIterator SubReg(AArch64::ZA, this, /*self=*/true); + SubReg.isValid(); ++SubReg) + Reserved.set(*SubReg); + } + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } @@ -417,6 +419,68 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return false; } +bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF, + MCRegister Reg) const { + CallingConv::ID CC = MF.getFunction().getCallingConv(); + const AArch64Subtarget &STI = MF.getSubtarget(); + bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv()); + + auto HasReg = [](ArrayRef RegList, MCRegister Reg) { + return llvm::any_of(RegList, + [Reg](const MCRegister R) { return R == Reg; }); + }; + + switch (CC) { + default: + report_fatal_error("Unsupported calling convention."); + case CallingConv::WebKit_JS: + return HasReg(CC_AArch64_WebKit_JS_ArgRegs, Reg); + case CallingConv::GHC: + return HasReg(CC_AArch64_GHC_ArgRegs, Reg); + case CallingConv::C: + case CallingConv::Fast: + case CallingConv::PreserveMost: + case CallingConv::CXX_FAST_TLS: + case CallingConv::Swift: + case CallingConv::SwiftTail: + case CallingConv::Tail: + if (STI.isTargetWindows() && IsVarArg) + return HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg); + if (!STI.isTargetDarwin()) { + switch (CC) { + default: + return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg); + case CallingConv::Swift: + case CallingConv::SwiftTail: + return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg) || + HasReg(CC_AArch64_AAPCS_Swift_ArgRegs, Reg); + } + } + if (!IsVarArg) { + switch (CC) { + default: + return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg); + case CallingConv::Swift: + case CallingConv::SwiftTail: + return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg) || + HasReg(CC_AArch64_DarwinPCS_Swift_ArgRegs, Reg); + } + } + if (STI.isTargetILP32()) + return HasReg(CC_AArch64_DarwinPCS_ILP32_VarArg_ArgRegs, Reg); + return HasReg(CC_AArch64_DarwinPCS_VarArg_ArgRegs, Reg); + case CallingConv::Win64: + if (IsVarArg) + HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg); + return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg); + case CallingConv::CFGuard_Check: + return HasReg(CC_AArch64_Win64_CFGuard_Check_ArgRegs, Reg); + case CallingConv::AArch64_VectorCall: + case CallingConv::AArch64_SVE_VectorCall: + return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg); + } +} + Register AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const AArch64FrameLowering *TFI = getFrameLowering(MF); @@ -588,23 +652,31 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, // Create a scratch register for the frame index elimination in an instruction. // This function has special handling of stack tagging loop pseudos, in which -// case it can also change the instruction opcode (but not the operands). +// case it can also change the instruction opcode. static Register -createScratchRegisterForInstruction(MachineInstr &MI, +createScratchRegisterForInstruction(MachineInstr &MI, unsigned FIOperandNum, const AArch64InstrInfo *TII) { // ST*Gloop have a reserved scratch register in operand 1. Use it, and also // replace the instruction with the writeback variant because it will now // satisfy the operand constraints for it. - if (MI.getOpcode() == AArch64::STGloop) { - MI.setDesc(TII->get(AArch64::STGloop_wback)); - return MI.getOperand(1).getReg(); - } else if (MI.getOpcode() == AArch64::STZGloop) { - MI.setDesc(TII->get(AArch64::STZGloop_wback)); - return MI.getOperand(1).getReg(); + Register ScratchReg; + if (MI.getOpcode() == AArch64::STGloop || + MI.getOpcode() == AArch64::STZGloop) { + assert(FIOperandNum == 3 && + "Wrong frame index operand for STGloop/STZGloop"); + unsigned Op = MI.getOpcode() == AArch64::STGloop ? AArch64::STGloop_wback + : AArch64::STZGloop_wback; + ScratchReg = MI.getOperand(1).getReg(); + MI.getOperand(3).ChangeToRegister(ScratchReg, false, false, true); + MI.setDesc(TII->get(Op)); + MI.tieOperands(1, 3); } else { - return MI.getMF()->getRegInfo().createVirtualRegister( - &AArch64::GPR64RegClass); + ScratchReg = + MI.getMF()->getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); + MI.getOperand(FIOperandNum) + .ChangeToRegister(ScratchReg, false, false, true); } + return ScratchReg; } void AArch64RegisterInfo::getOffsetOpcodes( @@ -721,9 +793,9 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If we get here, the immediate doesn't fit into the instruction. We folded // as much as possible above. Handle the rest, providing a register that is // SP+LargeImm. - Register ScratchReg = createScratchRegisterForInstruction(MI, TII); + Register ScratchReg = + createScratchRegisterForInstruction(MI, FIOperandNum, TII); emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII); - MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true); } unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index 0c871ac089a7..12dd70fa4aa8 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -42,8 +42,6 @@ public: void UpdateCustomCallPreservedMask(MachineFunction &MF, const uint32_t **Mask) const; - static bool hasSVEArgsOrReturn(const MachineFunction *MF); - /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; const MCPhysReg *getDarwinCalleeSavedRegs(const MachineFunction *MF) const; @@ -120,6 +118,9 @@ public: bool hasBasePointer(const MachineFunction &MF) const; unsigned getBaseRegister() const; + bool isArgumentRegister(const MachineFunction &MF, + MCRegister Reg) const override; + // Debug information queries. Register getFrameRegister(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 70daf5abf81d..7a2b165570cb 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -871,7 +871,7 @@ class ZPRRegOp : RegisterClass< "AArch64", - [ nxv16i1, nxv8i1, nxv4i1, nxv2i1 ], 16, + [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16, (sequence "P%u", 0, lastreg)> { let Size = 16; } @@ -1212,26 +1212,28 @@ let SubRegIndices = [zasubb] in { // SME Register Classes -// Accumulator array -def MPR : RegisterClass<"AArch64", [untyped], 2048, (add ZA)> { - let Size = 2048; -} +let isAllocatable = 0 in { + // Accumulator array + def MPR : RegisterClass<"AArch64", [untyped], 2048, (add ZA)> { + let Size = 2048; + } -// Accumulator array as single tiles -def MPR8 : RegisterClass<"AArch64", [untyped], 2048, (add (sequence "ZAB%u", 0, 0))> { - let Size = 2048; -} -def MPR16 : RegisterClass<"AArch64", [untyped], 1024, (add (sequence "ZAH%u", 0, 1))> { - let Size = 1024; -} -def MPR32 : RegisterClass<"AArch64", [untyped], 512, (add (sequence "ZAS%u", 0, 3))> { - let Size = 512; -} -def MPR64 : RegisterClass<"AArch64", [untyped], 256, (add (sequence "ZAD%u", 0, 7))> { - let Size = 256; -} -def MPR128 : RegisterClass<"AArch64", [untyped], 128, (add (sequence "ZAQ%u", 0, 15))> { - let Size = 128; + // Accumulator array as single tiles + def MPR8 : RegisterClass<"AArch64", [untyped], 2048, (add (sequence "ZAB%u", 0, 0))> { + let Size = 2048; + } + def MPR16 : RegisterClass<"AArch64", [untyped], 1024, (add (sequence "ZAH%u", 0, 1))> { + let Size = 1024; + } + def MPR32 : RegisterClass<"AArch64", [untyped], 512, (add (sequence "ZAS%u", 0, 3))> { + let Size = 512; + } + def MPR64 : RegisterClass<"AArch64", [untyped], 256, (add (sequence "ZAD%u", 0, 7))> { + let Size = 256; + } + def MPR128 : RegisterClass<"AArch64", [untyped], 128, (add (sequence "ZAQ%u", 0, 15))> { + let Size = 128; + } } // SME Register Operands @@ -1385,3 +1387,12 @@ def svcr_op : Operand { return AArch64SVCR::lookupSVCRByEncoding(MCOp.getImm()) != nullptr; }]; } + +//===----------------------------------------------------------------------===// +// Register categories. +// + +def GeneralPurposeRegisters : RegisterCategory<[GPR64, GPR32]>; + +def FIXED_REGS : RegisterClass<"AArch64", [i64], 64, (add FP, SP, VG, FFR)>; +def FixedRegisters : RegisterCategory<[CCR, FIXED_REGS]>; diff --git a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp index c4965e7146ff..364ce687fd55 100644 --- a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp +++ b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp @@ -360,8 +360,8 @@ AArch64SLSHardening::ConvertBLRToBL(MachineBasicBlock &MBB, assert(ImpSPOpIdx != -1); int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx); int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx); - BL->RemoveOperand(FirstOpIdxToRemove); - BL->RemoveOperand(SecondOpIdxToRemove); + BL->removeOperand(FirstOpIdxToRemove); + BL->removeOperand(SecondOpIdxToRemove); // Now copy over the implicit operands from the original BLR BL->copyImplicitOps(MF, BLR); MF.moveCallSiteInfo(&BLR, BL); diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index aacace64e998..e595d20c8d4e 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -14,9 +14,18 @@ // Add vector elements horizontally or vertically to ZA tile. //===----------------------------------------------------------------------===// +def SDT_AArch64RDSVL : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>; +def AArch64rdsvl : SDNode<"AArch64ISD::RDSVL", SDT_AArch64RDSVL>; + let Predicates = [HasSME] in { +def RDSVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdsvl", /*streaming_sve=*/0b1>; +def ADDSPL_XXI : sve_int_arith_vl<0b1, "addspl", /*streaming_sve=*/0b1>; +def ADDSVL_XXI : sve_int_arith_vl<0b0, "addsvl", /*streaming_sve=*/0b1>; + def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">; def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">; + +def : Pat<(AArch64rdsvl (i32 simm6_32b:$imm)), (RDSVLI_XI simm6_32b:$imm)>; } let Predicates = [HasSMEI64] in { @@ -29,41 +38,41 @@ let Predicates = [HasSME] in { // Outer products //===----------------------------------------------------------------------===// -defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b0, "bfmopa">; -defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b1, "bfmops">; +defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b0, "bfmopa", int_aarch64_sme_mopa_wide>; +defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b1, "bfmops", int_aarch64_sme_mops_wide>; -def FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa">; -def FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops">; +defm FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa", int_aarch64_sme_mopa>; +defm FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops", int_aarch64_sme_mops>; } let Predicates = [HasSMEF64] in { -def FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa">; -def FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops">; +defm FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa", int_aarch64_sme_mopa>; +defm FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops", int_aarch64_sme_mops>; } let Predicates = [HasSME] in { -defm FMOPAL_MPPZZ : sme_f16_outer_product<0b0, "fmopa">; -defm FMOPSL_MPPZZ : sme_f16_outer_product<0b1, "fmops">; - -def SMOPA_MPPZZ_S : sme_int_outer_product_i32<0b000, "smopa">; -def SMOPS_MPPZZ_S : sme_int_outer_product_i32<0b001, "smops">; -def UMOPA_MPPZZ_S : sme_int_outer_product_i32<0b110, "umopa">; -def UMOPS_MPPZZ_S : sme_int_outer_product_i32<0b111, "umops">; -def SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa">; -def SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops">; -def USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa">; -def USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops">; +defm FMOPAL_MPPZZ : sme_f16_outer_product<0b0, "fmopa", int_aarch64_sme_mopa_wide>; +defm FMOPSL_MPPZZ : sme_f16_outer_product<0b1, "fmops", int_aarch64_sme_mops_wide>; + +defm SMOPA_MPPZZ_S : sme_int_outer_product_i32<0b000, "smopa", int_aarch64_sme_smopa_wide>; +defm SMOPS_MPPZZ_S : sme_int_outer_product_i32<0b001, "smops", int_aarch64_sme_smops_wide>; +defm UMOPA_MPPZZ_S : sme_int_outer_product_i32<0b110, "umopa", int_aarch64_sme_umopa_wide>; +defm UMOPS_MPPZZ_S : sme_int_outer_product_i32<0b111, "umops", int_aarch64_sme_umops_wide>; +defm SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa", int_aarch64_sme_sumopa_wide>; +defm SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops", int_aarch64_sme_sumops_wide>; +defm USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa", int_aarch64_sme_usmopa_wide>; +defm USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops", int_aarch64_sme_usmops_wide>; } let Predicates = [HasSMEI64] in { -def SMOPA_MPPZZ_D : sme_int_outer_product_i64<0b000, "smopa">; -def SMOPS_MPPZZ_D : sme_int_outer_product_i64<0b001, "smops">; -def UMOPA_MPPZZ_D : sme_int_outer_product_i64<0b110, "umopa">; -def UMOPS_MPPZZ_D : sme_int_outer_product_i64<0b111, "umops">; -def SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa">; -def SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops">; -def USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa">; -def USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops">; +defm SMOPA_MPPZZ_D : sme_int_outer_product_i64<0b000, "smopa", int_aarch64_sme_smopa_wide>; +defm SMOPS_MPPZZ_D : sme_int_outer_product_i64<0b001, "smops", int_aarch64_sme_smops_wide>; +defm UMOPA_MPPZZ_D : sme_int_outer_product_i64<0b110, "umopa", int_aarch64_sme_umopa_wide>; +defm UMOPS_MPPZZ_D : sme_int_outer_product_i64<0b111, "umops", int_aarch64_sme_umops_wide>; +defm SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa", int_aarch64_sme_sumopa_wide>; +defm SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops", int_aarch64_sme_sumops_wide>; +defm USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa", int_aarch64_sme_usmopa_wide>; +defm USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops", int_aarch64_sme_usmops_wide>; } let Predicates = [HasSME] in { @@ -129,15 +138,21 @@ def : InstAlias<"smstop", (MSRpstatesvcrImm1 0b011, 0b0)>; def : InstAlias<"smstop sm", (MSRpstatesvcrImm1 0b001, 0b0)>; def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>; +// Read and write TPIDR2_EL0 +def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val), + (MSR 0xde85, GPR64:$val)>; +def : Pat<(i64 (int_aarch64_sme_get_tpidr2)), + (MRS 0xde85)>; + //===----------------------------------------------------------------------===// // SVE2 instructions //===----------------------------------------------------------------------===// -def REVD_ZPmZ : sve2_int_perm_revd<"revd">; +defm REVD_ZPmZ : sve2_int_perm_revd<"revd", AArch64revd_mt>; -defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0>; -defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1>; +defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0, int_aarch64_sve_sclamp>; +defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1, int_aarch64_sve_uclamp>; -defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel">; +defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>; } // End let Predicates = [HasSME] diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 1d162610de9c..68ff1b78e84b 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -165,8 +165,8 @@ def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>; def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>; def SDT_AArch64Arith : SDTypeProfile<1, 3, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, - SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3> + SDTCisVec<0>, SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, + SDTCisSameAs<2,3>, SDTCisSameNumEltsAs<0,1> ]>; def SDT_AArch64FMA : SDTypeProfile<1, 4, [ @@ -175,7 +175,6 @@ def SDT_AArch64FMA : SDTypeProfile<1, 4, [ ]>; // Predicated operations with the result of inactive lanes being unspecified. -def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>; def AArch64asr_p : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>; def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>; def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>; @@ -194,7 +193,6 @@ def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>; def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>; def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>; def AArch64smulh_p : SDNode<"AArch64ISD::MULHS_PRED", SDT_AArch64Arith>; -def AArch64sub_p : SDNode<"AArch64ISD::SUB_PRED", SDT_AArch64Arith>; def AArch64uabd_p : SDNode<"AArch64ISD::ABDU_PRED", SDT_AArch64Arith>; def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>; def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>; @@ -235,6 +233,7 @@ def AArch64rbit_mt : SDNode<"AArch64ISD::BITREVERSE_MERGE_PASSTHRU", SDT_AArch def AArch64revb_mt : SDNode<"AArch64ISD::BSWAP_MERGE_PASSTHRU", SDT_AArch64Arith>; def AArch64revh_mt : SDNode<"AArch64ISD::REVH_MERGE_PASSTHRU", SDT_AArch64Arith>; def AArch64revw_mt : SDNode<"AArch64ISD::REVW_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64revd_mt : SDNode<"AArch64ISD::REVD_MERGE_PASSTHRU", SDT_AArch64Arith>; // These are like the above but we don't yet have need for ISD nodes. They allow // a single pattern to match intrinsic and ISD operand layouts. @@ -242,6 +241,26 @@ def AArch64cls_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_ def AArch64cnot_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cnot node:$pt, node:$pg, node:$op)]>; def AArch64not_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_not node:$pt, node:$pg, node:$op)]>; +def AArch64fmul_m1 : EitherVSelectOrPassthruPatFrags; +def AArch64fadd_m1 : EitherVSelectOrPassthruPatFrags; +def AArch64fsub_m1 : EitherVSelectOrPassthruPatFrags; + +def AArch64saba : PatFrags<(ops node:$op1, node:$op2, node:$op3), + [(int_aarch64_sve_saba node:$op1, node:$op2, node:$op3), + (add node:$op1, (AArch64sabd_p (SVEAllActive), node:$op2, node:$op3))]>; + +def AArch64uaba : PatFrags<(ops node:$op1, node:$op2, node:$op3), + [(int_aarch64_sve_uaba node:$op1, node:$op2, node:$op3), + (add node:$op1, (AArch64uabd_p (SVEAllActive), node:$op2, node:$op3))]>; + +def AArch64usra : PatFrags<(ops node:$op1, node:$op2, node:$op3), + [(int_aarch64_sve_usra node:$op1, node:$op2, node:$op3), + (add node:$op1, (AArch64lsr_p (SVEAllActive), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>; + +def AArch64ssra : PatFrags<(ops node:$op1, node:$op2, node:$op3), + [(int_aarch64_sve_ssra node:$op1, node:$op2, node:$op3), + (add node:$op1, (AArch64asr_p (SVEAllActive), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>; + def SDT_AArch64FCVT : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCVecEltisVT<1,i1> @@ -282,6 +301,14 @@ def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2), def AArch64fabd_p : PatFrag<(ops node:$pg, node:$op1, node:$op2), (AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)>; +// FMAs with a negated multiplication operand can be commuted. +def AArch64fmls_p : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), + [(AArch64fma_p node:$pred, (AArch64fneg_mt node:$pred, node:$op1, (undef)), node:$op2, node:$op3), + (AArch64fma_p node:$pred, node:$op2, (AArch64fneg_mt node:$pred, node:$op1, (undef)), node:$op3)]>; + +def AArch64fsubr_p : PatFrag<(ops node:$pg, node:$op1, node:$op2), + (AArch64fsub_p node:$pg, node:$op2, node:$op1)>; + def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt), (AArch64fneg_mt node:$pred, node:$op, node:$pt), [{ return N->getFlags().hasNoSignedZeros(); @@ -295,11 +322,14 @@ def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [ def AArch64bic_node : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>; def AArch64bic : PatFrags<(ops node:$op1, node:$op2), - [(and node:$op1, (xor node:$op2, (AArch64dup (i32 -1)))), - (and node:$op1, (xor node:$op2, (AArch64dup (i64 -1)))), + [(and node:$op1, (xor node:$op2, (splat_vector (i32 -1)))), + (and node:$op1, (xor node:$op2, (splat_vector (i64 -1)))), (and node:$op1, (xor node:$op2, (SVEAllActive))), (AArch64bic_node node:$op1, node:$op2)]>; +def AArch64subr : PatFrag<(ops node:$op1, node:$op2), + (sub node:$op2, node:$op1)>; + let Predicates = [HasSVE] in { defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>; def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">; @@ -308,7 +338,7 @@ let Predicates = [HasSVE] in { def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add>; defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub>; defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd", saddsat>; @@ -325,25 +355,27 @@ let Predicates = [HasSVEorStreamingSVE] in { defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", int_aarch64_sve_sub, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">; defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>; - defm ADD_ZPZZ : sve_int_bin_pred_bhsd; - defm SUB_ZPZZ : sve_int_bin_pred_bhsd; -} // End HasSVEorStreamingSVE + defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", "ORR_ZPZZ", int_aarch64_sve_orr, DestructiveBinaryComm>; + defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", "EOR_ZPZZ", int_aarch64_sve_eor, DestructiveBinaryComm>; + defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", "AND_ZPZZ", int_aarch64_sve_and, DestructiveBinaryComm>; + defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", "BIC_ZPZZ", int_aarch64_sve_bic, DestructiveBinary>; +} // End HasSVEorSME -let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in { +let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in { defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm SUB_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm SUBR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; -} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos -let Predicates = [HasSVEorStreamingSVE] in { - defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_orr>; - defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_eor>; - defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", int_aarch64_sve_and>; - defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", int_aarch64_sve_bic>; + defm ORR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm EOR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm AND_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd; +} // End HasSVEorSME, UseExperimentalZeroingPseudos +let Predicates = [HasSVEorSME] in { defm ADD_ZI : sve_int_arith_imm0<0b000, "add", add>; defm SUB_ZI : sve_int_arith_imm0<0b001, "sub", sub>; - defm SUBR_ZI : sve_int_arith_imm0_subr<0b011, "subr", sub>; + defm SUBR_ZI : sve_int_arith_imm0<0b011, "subr", AArch64subr>; defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd", saddsat>; defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat>; defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub", ssubsat>; @@ -440,11 +472,11 @@ let Predicates = [HasSVEorStreamingSVE] in { defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", "FMINNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fminnm>; defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", "FMAX_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmax>; defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", "FMIN_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmin>; - + defm FADD_ZPZI : sve_fp_2op_i_p_zds_hfd; defm FSUB_ZPZI : sve_fp_2op_i_p_zds_hfd; defm FMUL_ZPZI : sve_fp_2op_i_p_zds_hfd; - defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_hfd; + defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_hfd; defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_hfd; defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_hfd; defm FMAX_ZPZI : sve_fp_2op_i_p_zds_hfd; @@ -461,9 +493,9 @@ let Predicates = [HasSVEorStreamingSVE] in { defm FMIN_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; } - defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>; - defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">; - defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", int_aarch64_sve_fmul, DestructiveBinaryComm>; + defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", AArch64fadd_m1, DestructiveBinaryComm>; + defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", AArch64fsub_m1, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">; + defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", AArch64fmul_m1, DestructiveBinaryComm>; defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", int_aarch64_sve_fsubr, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>; defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", int_aarch64_sve_fmaxnm, DestructiveBinaryComm>; defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", int_aarch64_sve_fminnm, DestructiveBinaryComm>; @@ -484,9 +516,9 @@ let Predicates = [HasSVEorStreamingSVE] in { defm FMIN_ZPZZ : sve_fp_bin_pred_hfd; defm FABD_ZPZZ : sve_fp_bin_pred_hfd; defm FDIV_ZPZZ : sve_fp_bin_pred_hfd; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME -let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in { +let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in { defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; defm FMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; @@ -499,28 +531,28 @@ let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in { defm FMULX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; -} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos +} // End HasSVEorSME, UseExperimentalZeroingPseudos -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>; defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>; defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", AArch64frecps>; defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", AArch64frsqrts>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel", int_aarch64_sve_ftssel_x>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>; defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>; @@ -545,7 +577,7 @@ let Predicates = [HasSVEorStreamingSVE] in { (!cast("FMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>; // Zd = Za + -Zn * Zm - def : Pat<(Ty (AArch64fma_p PredTy:$P, (AArch64fneg_mt PredTy:$P, Ty:$Zn, (Ty (undef))), Ty:$Zm, Ty:$Za)), + def : Pat<(Ty (AArch64fmls_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za)), (!cast("FMLS_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>; // Zd = -Za + Zn * Zm @@ -576,26 +608,26 @@ let Predicates = [HasSVEorStreamingSVE] in { defm : fma; defm : fma; defm : fma; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>; defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>; defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>; defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { // SVE floating point reductions. defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_p>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_p>; defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_p>; defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_p>; @@ -613,7 +645,7 @@ let Predicates = [HasSVEorStreamingSVE] in { defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">; // Splat scalar register (unpredicated, GPR or vector + element index) - defm DUP_ZR : sve_int_perm_dup_r<"dup", AArch64dup>; + defm DUP_ZR : sve_int_perm_dup_r<"dup", splat_vector>; defm DUP_ZZI : sve_int_perm_dup_i<"dup">; // Splat scalar register (predicated) @@ -621,61 +653,67 @@ let Predicates = [HasSVEorStreamingSVE] in { defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_mt>; // Duplicate FP scalar into all vector elements - def : Pat<(nxv8f16 (AArch64dup (f16 FPR16:$src))), + def : Pat<(nxv8f16 (splat_vector (f16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; - def : Pat<(nxv4f16 (AArch64dup (f16 FPR16:$src))), + def : Pat<(nxv4f16 (splat_vector (f16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; - def : Pat<(nxv2f16 (AArch64dup (f16 FPR16:$src))), + def : Pat<(nxv2f16 (splat_vector (f16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; - def : Pat<(nxv4f32 (AArch64dup (f32 FPR32:$src))), + def : Pat<(nxv4f32 (splat_vector (f32 FPR32:$src))), (DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>; - def : Pat<(nxv2f32 (AArch64dup (f32 FPR32:$src))), + def : Pat<(nxv2f32 (splat_vector (f32 FPR32:$src))), (DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>; - def : Pat<(nxv2f64 (AArch64dup (f64 FPR64:$src))), + def : Pat<(nxv2f64 (splat_vector (f64 FPR64:$src))), (DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>; - def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))), + def : Pat<(nxv8bf16 (splat_vector (bf16 FPR16:$src))), + (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; + def : Pat<(nxv4bf16 (splat_vector (bf16 FPR16:$src))), + (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; + def : Pat<(nxv2bf16 (splat_vector (bf16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; // Duplicate +0.0 into all vector elements - def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; - def : Pat<(nxv4f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; - def : Pat<(nxv2f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; - def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>; - def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>; - def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>; - def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv8f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv4f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv2f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv4f32 (splat_vector (f32 fpimm0))), (DUP_ZI_S 0, 0)>; + def : Pat<(nxv2f32 (splat_vector (f32 fpimm0))), (DUP_ZI_S 0, 0)>; + def : Pat<(nxv2f64 (splat_vector (f64 fpimm0))), (DUP_ZI_D 0, 0)>; + def : Pat<(nxv8bf16 (splat_vector (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv4bf16 (splat_vector (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv2bf16 (splat_vector (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; // Duplicate Int immediate into all vector elements - def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))), + def : Pat<(nxv16i8 (splat_vector (i32 (SVECpyDupImm8Pat i32:$a, i32:$b)))), (DUP_ZI_B $a, $b)>; - def : Pat<(nxv8i16 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))), + def : Pat<(nxv8i16 (splat_vector (i32 (SVECpyDupImm16Pat i32:$a, i32:$b)))), (DUP_ZI_H $a, $b)>; - def : Pat<(nxv4i32 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))), + def : Pat<(nxv4i32 (splat_vector (i32 (SVECpyDupImm32Pat i32:$a, i32:$b)))), (DUP_ZI_S $a, $b)>; - def : Pat<(nxv2i64 (AArch64dup (i64 (SVE8BitLslImm64 i32:$a, i32:$b)))), + def : Pat<(nxv2i64 (splat_vector (i64 (SVECpyDupImm64Pat i32:$a, i32:$b)))), (DUP_ZI_D $a, $b)>; // Duplicate immediate FP into all vector elements. - def : Pat<(nxv2f32 (AArch64dup (f32 fpimm:$val))), + def : Pat<(nxv2f32 (splat_vector (f32 fpimm:$val))), (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; - def : Pat<(nxv4f32 (AArch64dup (f32 fpimm:$val))), + def : Pat<(nxv4f32 (splat_vector (f32 fpimm:$val))), (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; - def : Pat<(nxv2f64 (AArch64dup (f64 fpimm:$val))), + def : Pat<(nxv2f64 (splat_vector (f64 fpimm:$val))), (DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>; // Duplicate FP immediate into all vector elements let AddedComplexity = 2 in { - def : Pat<(nxv8f16 (AArch64dup fpimm16:$imm8)), + def : Pat<(nxv8f16 (splat_vector fpimm16:$imm8)), (FDUP_ZI_H fpimm16:$imm8)>; - def : Pat<(nxv4f16 (AArch64dup fpimm16:$imm8)), + def : Pat<(nxv4f16 (splat_vector fpimm16:$imm8)), (FDUP_ZI_H fpimm16:$imm8)>; - def : Pat<(nxv2f16 (AArch64dup fpimm16:$imm8)), + def : Pat<(nxv2f16 (splat_vector fpimm16:$imm8)), (FDUP_ZI_H fpimm16:$imm8)>; - def : Pat<(nxv4f32 (AArch64dup fpimm32:$imm8)), + def : Pat<(nxv4f32 (splat_vector fpimm32:$imm8)), (FDUP_ZI_S fpimm32:$imm8)>; - def : Pat<(nxv2f32 (AArch64dup fpimm32:$imm8)), + def : Pat<(nxv2f32 (splat_vector fpimm32:$imm8)), (FDUP_ZI_S fpimm32:$imm8)>; - def : Pat<(nxv2f64 (AArch64dup fpimm64:$imm8)), + def : Pat<(nxv2f64 (splat_vector fpimm64:$imm8)), (FDUP_ZI_D fpimm64:$imm8)>; } @@ -683,13 +721,13 @@ let Predicates = [HasSVEorStreamingSVE] in { defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>; defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>; defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>; defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>; @@ -710,16 +748,21 @@ let Predicates = [HasSVEorStreamingSVE] in { defm PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo", int_aarch64_sve_punpklo>; defm PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi", int_aarch64_sve_punpkhi>; + // Define pattern for `nxv1i1 splat_vector(1)`. + // We do this here instead of in ISelLowering such that PatFrag's can still + // recognize a splat. + def : Pat<(nxv1i1 immAllOnesV), (PUNPKLO_PP (PTRUE_D 31))>; + defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">; defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">; def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { defm FEXPA_ZZ : sve_int_bin_cons_misc_0_c_fexpa<"fexpa", int_aarch64_sve_fexpa_x>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa", int_aarch64_sve_brkpa_z>; defm BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas", null_frag>; defm BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb", int_aarch64_sve_brkpb_z>; @@ -831,7 +874,7 @@ let Predicates = [HasSVEorStreamingSVE] in { defm LD1SB_S : sve_mem_cld_ss<0b1101, "ld1sb", Z_s, ZPR32, GPR64NoXZRshifted8>; defm LD1SB_H : sve_mem_cld_ss<0b1110, "ld1sb", Z_h, ZPR16, GPR64NoXZRshifted8>; defm LD1D : sve_mem_cld_ss<0b1111, "ld1d", Z_d, ZPR64, GPR64NoXZRshifted64>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { // non-faulting continuous load with reg+immediate @@ -871,7 +914,7 @@ let Predicates = [HasSVE] in { defm LDFF1D : sve_mem_cldff_ss<0b1111, "ldff1d", Z_d, ZPR64, GPR64shifted64>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { // LD(2|3|4) structured loads with reg+immediate defm LD2B_IMM : sve_mem_eld_si<0b00, 0b01, ZZ_b, "ld2b", simm4s2>; defm LD3B_IMM : sve_mem_eld_si<0b00, 0b10, ZZZ_b, "ld3b", simm4s3>; @@ -899,7 +942,7 @@ let Predicates = [HasSVEorStreamingSVE] in { def LD2D : sve_mem_eld_ss<0b11, 0b01, ZZ_d, "ld2d", GPR64NoXZRshifted64>; def LD3D : sve_mem_eld_ss<0b11, 0b10, ZZZ_d, "ld3d", GPR64NoXZRshifted64>; def LD4D : sve_mem_eld_ss<0b11, 0b11, ZZZZ_d, "ld4d", GPR64NoXZRshifted64>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { // Gathers using unscaled 32-bit offsets, e.g. @@ -1013,9 +1056,95 @@ let Predicates = [HasSVE] in { defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; + + multiclass sve_masked_gather_x2_scaled { + // base + vector of scaled offsets + def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (nxv2i64 ZPR:$offs))), + (!cast(Inst # _SCALED) PPR:$gp, GPR64:$base, ZPR:$offs)>; + // base + vector of signed 32bit scaled offsets + def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (sext_inreg (nxv2i64 ZPR:$offs), nxv2i32))), + (!cast(Inst # _SXTW_SCALED) PPR:$gp, GPR64:$base, ZPR:$offs)>; + // base + vector of unsigned 32bit scaled offsets + def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))))), + (!cast(Inst # _UXTW_SCALED) PPR:$gp, GPR64:$base, ZPR:$offs)>; + } + + multiclass sve_masked_gather_x2_unscaled { + // vector of pointers + immediate offset (includes zero) + def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), (i64 ImmTy:$imm), (nxv2i64 ZPR:$ptrs))), + (!cast(Inst # _IMM) PPR:$gp, ZPR:$ptrs, ImmTy:$imm)>; + // base + vector of offsets + def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (nxv2i64 ZPR:$offs))), + (!cast(Inst) PPR:$gp, GPR64:$base, ZPR:$offs)>; + // base + vector of signed 32bit offsets + def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (sext_inreg (nxv2i64 ZPR:$offs), nxv2i32))), + (!cast(Inst # _SXTW) PPR:$gp, GPR64:$base, ZPR:$offs)>; + // base + vector of unsigned 32bit offsets + def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))))), + (!cast(Inst # _UXTW) PPR:$gp, GPR64:$base, ZPR:$offs)>; + } + + multiclass sve_masked_gather_x4 { + def : Pat<(Ty (Load (SVEDup0Undef), (nxv4i1 PPR:$gp), GPR64:$base, (nxv4i32 ZPR:$offs))), + (Inst PPR:$gp, GPR64:$base, ZPR:$offs)>; + } + + defm : sve_masked_gather_x2_scaled; + defm : sve_masked_gather_x2_scaled; + defm : sve_masked_gather_x2_scaled; + defm : sve_masked_gather_x2_scaled; + defm : sve_masked_gather_x2_scaled; + defm : sve_masked_gather_x2_scaled; + defm : sve_masked_gather_x2_scaled; + defm : sve_masked_gather_x2_scaled; + defm : sve_masked_gather_x2_scaled; + + defm : sve_masked_gather_x2_unscaled; + defm : sve_masked_gather_x2_unscaled; + defm : sve_masked_gather_x2_unscaled; + defm : sve_masked_gather_x2_unscaled; + defm : sve_masked_gather_x2_unscaled; + defm : sve_masked_gather_x2_unscaled; + defm : sve_masked_gather_x2_unscaled; + defm : sve_masked_gather_x2_unscaled; + defm : sve_masked_gather_x2_unscaled; + defm : sve_masked_gather_x2_unscaled; + defm : sve_masked_gather_x2_unscaled; + + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; + defm : sve_masked_gather_x4; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { // Non-temporal contiguous loads (register + immediate) defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>; defm LDNT1H_ZRI : sve_mem_cldnt_si<0b01, "ldnt1h", Z_h, ZPR16>; @@ -1051,7 +1180,7 @@ let Predicates = [HasSVEorStreamingSVE] in { defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>; defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>; defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { // Scatters using unpacked, unscaled 32-bit offsets, e.g. @@ -1100,12 +1229,87 @@ let Predicates = [HasSVE] in { // Scatters using scaled 64-bit offsets, e.g. // st1h z0.d, p0, [x0, z0.d, lsl #1] - defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", AArch64st1_scatter_scaled, ZPR64ExtLSL16, nxv2i16>; - defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", AArch64st1_scatter_scaled, ZPR64ExtLSL32, nxv2i32>; - defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", AArch64st1_scatter_scaled, ZPR64ExtLSL64, nxv2i64>; + defm SST1H_D : sve_mem_sst_sv_64_scaled<0b01, "st1h", AArch64st1_scatter_scaled, ZPR64ExtLSL16, nxv2i16>; + defm SST1W_D : sve_mem_sst_sv_64_scaled<0b10, "st1w", AArch64st1_scatter_scaled, ZPR64ExtLSL32, nxv2i32>; + defm SST1D : sve_mem_sst_sv_64_scaled<0b11, "st1d", AArch64st1_scatter_scaled, ZPR64ExtLSL64, nxv2i64>; + + multiclass sve_masked_scatter_x2_scaled { + // base + vector of scaled offsets + def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (nxv2i64 ZPR:$offs)), + (!cast(Inst # _SCALED) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; + // base + vector of signed 32bit scaled offsets + def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (sext_inreg (nxv2i64 ZPR:$offs), nxv2i32)), + (!cast(Inst # _SXTW_SCALED) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; + // base + vector of unsigned 32bit scaled offsets + def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (splat_vector (i64 0xFFFFFFFF))))), + (!cast(Inst # _UXTW_SCALED) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; + } + + multiclass sve_masked_scatter_x2_unscaled { + // vector of pointers + immediate offset (includes zero) + def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), (i64 ImmTy:$imm), (nxv2i64 ZPR:$ptrs)), + (!cast(Inst # _IMM) ZPR:$data, PPR:$gp, ZPR:$ptrs, ImmTy:$imm)>; + // base + vector of offsets + def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (nxv2i64 ZPR:$offs)), + (!cast(Inst) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; + // base + vector of signed 32bit offsets + def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (sext_inreg (nxv2i64 ZPR:$offs), nxv2i32)), + (!cast(Inst # _SXTW) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; + // base + vector of unsigned 32bit offsets + def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (splat_vector (i64 0xFFFFFFFF))))), + (!cast(Inst # _UXTW) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; + } + + multiclass sve_masked_scatter_x4 { + def : Pat<(Store (Ty ZPR:$data), (nxv4i1 PPR:$gp), GPR64:$base, (nxv4i32 ZPR:$offs)), + (Inst ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; + } + + defm : sve_masked_scatter_x2_scaled; + defm : sve_masked_scatter_x2_scaled; + defm : sve_masked_scatter_x2_scaled; + defm : sve_masked_scatter_x2_scaled; + defm : sve_masked_scatter_x2_scaled; + defm : sve_masked_scatter_x2_scaled; + defm : sve_masked_scatter_x2_scaled; + + defm : sve_masked_scatter_x2_unscaled; + defm : sve_masked_scatter_x2_unscaled; + defm : sve_masked_scatter_x2_unscaled; + defm : sve_masked_scatter_x2_unscaled; + defm : sve_masked_scatter_x2_unscaled; + defm : sve_masked_scatter_x2_unscaled; + defm : sve_masked_scatter_x2_unscaled; + defm : sve_masked_scatter_x2_unscaled; + + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; + defm : sve_masked_scatter_x4; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { // ST(2|3|4) structured stores (register + immediate) defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>; defm ST3B_IMM : sve_mem_est_si<0b00, 0b10, ZZZ_b, "st3b", simm4s3>; @@ -1161,7 +1365,7 @@ let Predicates = [HasSVEorStreamingSVE] in { // Contiguous prefetch (register + register) def PRFB_PRR : sve_mem_prfm_ss<0b001, "prfb", GPR64NoXZRshifted8>; def PRFH_PRR : sve_mem_prfm_ss<0b011, "prfh", GPR64NoXZRshifted16>; - def PRFS_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>; + def PRFW_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>; def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>; multiclass sve_prefetch { @@ -1184,9 +1388,9 @@ let Predicates = [HasSVEorStreamingSVE] in { defm : sve_prefetch; defm : sve_prefetch; - defm : sve_prefetch; + defm : sve_prefetch; defm : sve_prefetch; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { // Gather prefetch using scaled 32-bit offsets, e.g. @@ -1249,7 +1453,7 @@ let Predicates = [HasSVE] in { // Patterns to generate adr instruction. // adr z0.d, [z0.d, z0.d, uxtw] def : Pat<(add nxv2i64:$Op1, - (nxv2i64 (and nxv2i64:$Op2, (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))))), + (nxv2i64 (and nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))))), (ADR_UXTW_ZZZ_D_0 $Op1, $Op2)>; // adr z0.d, [z0.d, z0.d, sxtw] def : Pat<(add nxv2i64:$Op1, @@ -1262,7 +1466,7 @@ let Predicates = [HasSVE] in { def : Pat<(add Ty:$Op1, (Ty (AArch64lsl_p (PredTy (SVEAllActive)), Ty:$Op2, - (Ty (AArch64dup (ShiftTy ShiftAmt)))))), + (Ty (splat_vector (ShiftTy ShiftAmt)))))), (DestAdrIns $Op1, $Op2)>; } defm : adrShiftPat; @@ -1277,14 +1481,14 @@ let Predicates = [HasSVE] in { multiclass adrXtwShiftPat { def : Pat<(add Ty:$Op1, (Ty (AArch64lsl_p (PredTy (SVEAllActive)), - (Ty (and Ty:$Op2, (Ty (AArch64dup (i64 0xFFFFFFFF))))), - (Ty (AArch64dup (i64 ShiftAmt)))))), + (Ty (and Ty:$Op2, (Ty (splat_vector (i64 0xFFFFFFFF))))), + (Ty (splat_vector (i64 ShiftAmt)))))), (!cast("ADR_UXTW_ZZZ_D_"#ShiftAmt) $Op1, $Op2)>; def : Pat<(add Ty:$Op1, (Ty (AArch64lsl_p (PredTy (SVEAllActive)), (Ty (sext_inreg Ty:$Op2, nxv2i32)), - (Ty (AArch64dup (i64 ShiftAmt)))))), + (Ty (splat_vector (i64 ShiftAmt)))))), (!cast("ADR_SXTW_ZZZ_D_"#ShiftAmt) $Op1, $Op2)>; } defm : adrXtwShiftPat; @@ -1292,7 +1496,7 @@ let Predicates = [HasSVE] in { defm : adrXtwShiftPat; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>; defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1", AArch64zip1>; @@ -1310,6 +1514,10 @@ let Predicates = [HasSVEorStreamingSVE] in { defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>; // Extract lo/hi halves of legal predicate types. + def : Pat<(nxv1i1 (extract_subvector (nxv2i1 PPR:$Ps), (i64 0))), + (PUNPKLO_PP PPR:$Ps)>; + def : Pat<(nxv1i1 (extract_subvector (nxv2i1 PPR:$Ps), (i64 1))), + (PUNPKHI_PP PPR:$Ps)>; def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))), (PUNPKLO_PP PPR:$Ps)>; def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))), @@ -1400,6 +1608,8 @@ let Predicates = [HasSVEorStreamingSVE] in { (UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>; // Concatenate two predicates. + def : Pat<(nxv2i1 (concat_vectors nxv1i1:$p1, nxv1i1:$p2)), + (UZP1_PPP_D $p1, $p2)>; def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)), (UZP1_PPP_S $p1, $p2)>; def : Pat<(nxv8i1 (concat_vectors nxv4i1:$p1, nxv4i1:$p2)), @@ -1475,7 +1685,7 @@ let Predicates = [HasSVEorStreamingSVE] in { defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", SETOGE, SETGE, SETOLE, SETLE>; defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", SETOGT, SETGT, SETOLT, SETLT>; defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>; - defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", SETONE, SETNE, SETONE, SETNE>; + defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", SETUNE, SETNE, SETUNE, SETNE>; defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", SETUO, SETUO, SETUO, SETUO>; defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>; defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>; @@ -1485,7 +1695,7 @@ let Predicates = [HasSVEorStreamingSVE] in { defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt", SETOLT, SETLT, SETOGT, SETGT>; defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle", SETOLE, SETLE, SETOGE, SETGE>; defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>; - defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETONE, SETNE, SETONE, SETNE>; + defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETUNE, SETNE, SETUNE, SETNE>; defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt>; defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele>; @@ -1522,7 +1732,7 @@ let Predicates = [HasSVEorStreamingSVE] in { defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd", add, int_aarch64_sve_cntd>; defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd", sub, int_aarch64_sve_cntd>; -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb", int_aarch64_sve_sqincb_n32>; defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb", int_aarch64_sve_uqincb_n32>; defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb", int_aarch64_sve_sqdecb_n32>; @@ -1619,16 +1829,16 @@ let Predicates = [HasSVEorStreamingSVE] in { defm ASR_ZPZI : sve_int_shift_pred_bhsd; defm LSR_ZPZI : sve_int_shift_pred_bhsd; defm LSL_ZPZI : sve_int_shift_pred_bhsd; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME -let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in { +let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in { defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; -} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos +} // End HasSVEorSME, UseExperimentalZeroingPseudos -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">; defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">; defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">; @@ -1679,60 +1889,61 @@ let Predicates = [HasSVEorStreamingSVE] in { defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; - def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))), - (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + //These patterns exist to improve the code quality of conversions on unpacked types. + def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))), + (FCVT_ZPmZ_HtoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; // FP_ROUND has an additional 'precise' flag which indicates the type of rounding. // This is ignored by the pattern below where it is matched by (i64 timm0_1) - def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))), - (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))), + (FCVT_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - // Floating-point -> signed integer - def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), + // Signed integer -> Floating-point + def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))), - (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 PPR:$Pg), + def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 (SVEAllActive):$Pg), (sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))), - (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))), - (SCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))), - (SCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))), - (SCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - // Floating-point -> unsigned integer - def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), + // Unsigned integer -> Floating-point + def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))), - (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (nxv2i64 (splat_vector (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))), + (UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))), - (UCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))), + (UCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 PPR:$Pg), + def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 (SVEAllActive):$Pg), (and (nxv4i32 ZPR:$Zs), - (nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))), - (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (nxv4i32 (splat_vector (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))), + (UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))), - (UCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))), + (UCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))), - (UCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))), + (UCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>; defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>; @@ -1743,27 +1954,27 @@ let Predicates = [HasSVEorStreamingSVE] in { defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", AArch64frinti_mt>; defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", AArch64frecpx_mt>; defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME -let Predicates = [HasBF16, HasSVEorStreamingSVE] in { +let Predicates = [HasBF16, HasSVEorSME] in { defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>; defm BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>; -} // End HasBF16, HasSVEorStreamingSVE +} // End HasBF16, HasSVEorSME let Predicates = [HasBF16, HasSVE] in { defm BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla", int_aarch64_sve_bfmmla>; } // End HasBF16, HasSVE -let Predicates = [HasBF16, HasSVEorStreamingSVE] in { - defm BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb", int_aarch64_sve_bfmlalb>; - defm BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt", int_aarch64_sve_bfmlalt>; - defm BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane>; - defm BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt", int_aarch64_sve_bfmlalt_lane>; +let Predicates = [HasBF16, HasSVEorSME] in { + defm BFMLALB_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb", int_aarch64_sve_bfmlalb>; + defm BFMLALT_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt", int_aarch64_sve_bfmlalt>; + defm BFMLALB_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane>; + defm BFMLALT_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt", int_aarch64_sve_bfmlalt_lane>; defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32>; defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>; -} // End HasBF16, HasSVEorStreamingSVE +} // End HasBF16, HasSVEorSME -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { // InstAliases def : InstAlias<"mov $Zd, $Zn", (ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>; @@ -1875,7 +2086,7 @@ let Predicates = [HasSVEorStreamingSVE] in { let AddedComplexity = 1 in { class LD1RPat : - Pat<(vt (AArch64dup (index_vt (operator (CP GPR64:$base, immtype:$offset))))), + Pat<(vt (splat_vector (index_vt (operator (CP GPR64:$base, immtype:$offset))))), (load (ptrue 31), GPR64:$base, $offset)>; } @@ -1963,22 +2174,22 @@ let Predicates = [HasSVEorStreamingSVE] in { GPR32:$op, sub_32), $imm), sub_32))>; - def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))), + def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (splat_vector (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))), (INCH_ZPiI ZPR:$op, 31, $imm)>; - def : Pat<(nxv4i32 (add ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))), + def : Pat<(nxv4i32 (add ZPR:$op, (nxv4i32 (splat_vector (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))), (INCW_ZPiI ZPR:$op, 31, $imm)>; - def : Pat<(nxv2i64 (add ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))), + def : Pat<(nxv2i64 (add ZPR:$op, (nxv2i64 (splat_vector (i64 (vscale (sve_cntd_imm i32:$imm))))))), (INCD_ZPiI ZPR:$op, 31, $imm)>; - def : Pat<(nxv8i16 (sub ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))), + def : Pat<(nxv8i16 (sub ZPR:$op, (nxv8i16 (splat_vector (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))), (DECH_ZPiI ZPR:$op, 31, $imm)>; - def : Pat<(nxv4i32 (sub ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))), + def : Pat<(nxv4i32 (sub ZPR:$op, (nxv4i32 (splat_vector (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))), (DECW_ZPiI ZPR:$op, 31, $imm)>; - def : Pat<(nxv2i64 (sub ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))), + def : Pat<(nxv2i64 (sub ZPR:$op, (nxv2i64 (splat_vector (i64 (vscale (sve_cntd_imm i32:$imm))))))), (DECD_ZPiI ZPR:$op, 31, $imm)>; } - let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL], AddedComplexity = 5 in { + let Predicates = [HasSVEorSME, UseScalarIncVL], AddedComplexity = 5 in { def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm i32:$imm))), (INCH_XPiI GPR64:$op, 31, $imm)>; def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm i32:$imm))), @@ -2098,15 +2309,23 @@ let Predicates = [HasSVEorStreamingSVE] in { def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv16i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv16i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv16i1 (reinterpret_cast (nxv1i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv8i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv8i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv8i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv8i1 (reinterpret_cast (nxv1i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv4i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv4i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv4i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv4i1 (reinterpret_cast (nxv1i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv2i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv2i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv2i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv2i1 (reinterpret_cast (nxv1i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv1i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv1i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv1i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv1i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; // These allow casting from/to unpacked floating-point types. def : Pat<(nxv2f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; @@ -2145,12 +2364,12 @@ let Predicates = [HasSVEorStreamingSVE] in { } // 2-element contiguous loads - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; defm : pred_load; defm : pred_load; defm : pred_load; @@ -2158,18 +2377,18 @@ let Predicates = [HasSVEorStreamingSVE] in { defm : pred_load; // 4-element contiguous loads - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; defm : pred_load; defm : pred_load; defm : pred_load; defm : pred_load; // 8-element contiguous loads - defm : pred_load; - defm : pred_load; + defm : pred_load; + defm : pred_load; defm : pred_load; defm : pred_load; defm : pred_load; @@ -2397,7 +2616,7 @@ let Predicates = [HasSVEorStreamingSVE] in { // 16-element contiguous loads defm : ld1; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { multiclass ldnf1 { @@ -2482,7 +2701,7 @@ let Predicates = [HasSVE] in { defm : ldff1; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { multiclass st1 { // reg + reg @@ -2716,7 +2935,7 @@ let Predicates = [HasSVEorStreamingSVE] in { def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)), (f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>; } -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE, HasMatMulInt8] in { defm SMMLA_ZZZ : sve_int_matmul<0b00, "smmla", int_aarch64_sve_smmla>; @@ -2724,11 +2943,11 @@ let Predicates = [HasSVE, HasMatMulInt8] in { defm USMMLA_ZZZ : sve_int_matmul<0b10, "usmmla", int_aarch64_sve_usmmla>; } // End HasSVE, HasMatMulInt8 -let Predicates = [HasSVEorStreamingSVE, HasMatMulInt8] in { +let Predicates = [HasSVEorSME, HasMatMulInt8] in { defm USDOT_ZZZ : sve_int_dot_mixed<"usdot", int_aarch64_sve_usdot>; defm USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot", int_aarch64_sve_usdot_lane>; defm SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot", int_aarch64_sve_sudot_lane>; -} // End HasSVEorStreamingSVE, HasMatMulInt8 +} // End HasSVEorSME, HasMatMulInt8 let Predicates = [HasSVE, HasMatMulFP32] in { defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0, "fmmla", ZPR32, int_aarch64_sve_fmmla, nxv4f32>; @@ -2746,16 +2965,16 @@ let Predicates = [HasSVE, HasMatMulFP64] in { defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64, nxv2i64, nxv2i1, AArch64ld1ro_z, am_sve_regreg_lsl3>; } // End HasSVE, HasMatMulFP64 -let Predicates = [HasSVEorStreamingSVE, HasMatMulFP64] in { +let Predicates = [HasSVEorSME, HasMatMulFP64] in { defm ZIP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 0, "zip1", int_aarch64_sve_zip1q>; defm ZIP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 1, "zip2", int_aarch64_sve_zip2q>; defm UZP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 0, "uzp1", int_aarch64_sve_uzp1q>; defm UZP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 1, "uzp2", int_aarch64_sve_uzp2q>; defm TRN1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 0, "trn1", int_aarch64_sve_trn1q>; defm TRN2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 1, "trn2", int_aarch64_sve_trn2q>; -} // End HasSVEorStreamingSVE, HasMatMulFP64 +} // End HasSVEorSME, HasMatMulFP64 -let Predicates = [HasSVE2orStreamingSVE] in { +let Predicates = [HasSVE2orSME] in { // SVE2 integer multiply-add (indexed) defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla", int_aarch64_sve_mla_lane>; defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls", int_aarch64_sve_mls_lane>; @@ -2903,17 +3122,17 @@ let Predicates = [HasSVE2orStreamingSVE] in { defm UQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd; defm SQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd; defm UQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd; -} // End HasSVE2orStreamingSVE +} // End HasSVE2orSME -let Predicates = [HasSVE2orStreamingSVE, UseExperimentalZeroingPseudos] in { +let Predicates = [HasSVE2orSME, UseExperimentalZeroingPseudos] in { defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; defm UQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; defm SRSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; defm URSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; -} // End HasSVE2orStreamingSVE, UseExperimentalZeroingPseudos +} // End HasSVE2orSME, UseExperimentalZeroingPseudos -let Predicates = [HasSVE2orStreamingSVE] in { +let Predicates = [HasSVE2orSME] in { // SVE2 predicated shifts defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0110, "sqshl", "SQSHL_ZPZI", int_aarch64_sve_sqshl>; defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0111, "uqshl", "UQSHL_ZPZI", int_aarch64_sve_uqshl>; @@ -2960,18 +3179,18 @@ let Predicates = [HasSVE2orStreamingSVE] in { defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli", int_aarch64_sve_sli>; // SVE2 bitwise shift right and accumulate - defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra", int_aarch64_sve_ssra>; - defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra", int_aarch64_sve_usra>; - defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra", int_aarch64_sve_srsra>; - defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra", int_aarch64_sve_ursra>; + defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra", AArch64ssra>; + defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra", AArch64usra>; + defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra", int_aarch64_sve_srsra, int_aarch64_sve_srshr>; + defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra", int_aarch64_sve_ursra, int_aarch64_sve_urshr>; // SVE2 complex integer add defm CADD_ZZI : sve2_int_cadd<0b0, "cadd", int_aarch64_sve_cadd_x>; defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd", int_aarch64_sve_sqcadd_x>; // SVE2 integer absolute difference and accumulate - defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba", int_aarch64_sve_saba>; - defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba", int_aarch64_sve_uaba>; + defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba", AArch64saba>; + defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba", AArch64uaba>; // SVE2 integer absolute difference and accumulate long defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb", int_aarch64_sve_sabalb>; @@ -3026,7 +3245,7 @@ let Predicates = [HasSVE2orStreamingSVE] in { defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt", int_aarch64_sve_sqxtnt>; defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt", int_aarch64_sve_uqxtnt>; defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt", int_aarch64_sve_sqxtunt>; -} // End HasSVE2orStreamingSVE +} // End HasSVE2orSME let Predicates = [HasSVE2] in { // SVE2 character match @@ -3034,7 +3253,7 @@ let Predicates = [HasSVE2] in { defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch", int_aarch64_sve_nmatch>; } // End HasSVE2 -let Predicates = [HasSVE2orStreamingSVE] in { +let Predicates = [HasSVE2orSME] in { // SVE2 bitwise exclusive-or interleaved defm EORBT_ZZZ : sve2_bitwise_xor_interleaved<0b0, "eorbt", int_aarch64_sve_eorbt>; defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb", int_aarch64_sve_eortb>; @@ -3049,7 +3268,7 @@ let Predicates = [HasSVE2orStreamingSVE] in { defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt", int_aarch64_sve_saddlbt>; defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt", int_aarch64_sve_ssublbt>; defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb", int_aarch64_sve_ssubltb>; -} // End HasSVE2orStreamingSVE +} // End HasSVE2orSME let Predicates = [HasSVE2] in { // SVE2 histogram generation (segment) @@ -3059,7 +3278,7 @@ let Predicates = [HasSVE2] in { defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt", int_aarch64_sve_histcnt>; } // End HasSVE2 -let Predicates = [HasSVE2orStreamingSVE] in { +let Predicates = [HasSVE2orSME] in { // SVE2 floating-point base 2 logarithm as integer defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb", int_aarch64_sve_flogb>; @@ -3091,7 +3310,7 @@ let Predicates = [HasSVE2orStreamingSVE] in { // SVE2 bitwise ternary operations defm EOR3_ZZZZ : sve2_int_bitwise_ternary_op<0b000, "eor3", int_aarch64_sve_eor3>; defm BCAX_ZZZZ : sve2_int_bitwise_ternary_op<0b010, "bcax", int_aarch64_sve_bcax>; - defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", int_aarch64_sve_bsl>; + defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", int_aarch64_sve_bsl, AArch64bsp>; defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>; defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>; defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", int_aarch64_sve_nbsl>; @@ -3101,7 +3320,7 @@ let Predicates = [HasSVE2orStreamingSVE] in { // SVE2 extract vector (immediate offset, constructive) def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">; -} // End HasSVE2orStreamingSVE +} // End HasSVE2orSME let Predicates = [HasSVE2] in { // SVE2 non-temporal gather loads @@ -3120,10 +3339,10 @@ let Predicates = [HasSVE2] in { defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather_z, nxv2i64>; } // End HasSVE2 -let Predicates = [HasSVE2orStreamingSVE] in { +let Predicates = [HasSVE2orSME] in { // SVE2 vector splice (constructive) defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">; -} // End HasSVE2orStreamingSVE +} // End HasSVE2orSME let Predicates = [HasSVE2] in { // SVE2 non-temporal scatter stores @@ -3137,7 +3356,7 @@ let Predicates = [HasSVE2] in { defm STNT1D_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b110, "stnt1d", AArch64stnt1_scatter, nxv2i64>; } // End HasSVE2 -let Predicates = [HasSVE2orStreamingSVE] in { +let Predicates = [HasSVE2orSME] in { // SVE2 table lookup (three sources) defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl", int_aarch64_sve_tbl2>; defm TBX_ZZZ : sve2_int_perm_tbx<"tbx", int_aarch64_sve_tbx>; @@ -3156,7 +3375,7 @@ let Predicates = [HasSVE2orStreamingSVE] in { // SVE2 pointer conflict compare defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr", "int_aarch64_sve_whilewr">; defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw", "int_aarch64_sve_whilerw">; -} // End HasSVE2orStreamingSVE +} // End HasSVE2orSME let Predicates = [HasSVE2AES] in { // SVE2 crypto destructive binary operations diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td index 009219ce3c54..c6b112d0d2f1 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA55.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td @@ -6,7 +6,10 @@ // //===----------------------------------------------------------------------===// // -// This file defines the machine model for the ARM Cortex-A55 processors. +// This file defines the machine model for the ARM Cortex-A55 processors. Note +// that this schedule is currently used as the default for -mcpu=generic. As a +// result, some of the modelling decision made do not precisely model the +// Cortex-A55, instead aiming to be a good compromise between different cpus. // //===----------------------------------------------------------------------===// @@ -149,8 +152,31 @@ def : WriteRes { let Latency = 3; } def : WriteRes { let Latency = 4; } def : WriteRes { let Latency = 3; } def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; let BeginGroup = 1; } + +// NEON +class CortexA55WriteVd : SchedWriteRes<[res]> { + let Latency = n; +} +class CortexA55WriteVq : SchedWriteRes<[res, res]> { + let Latency = n; + let BeginGroup = 1; +} +def CortexA55WriteDotScVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; +def CortexA55WriteDotVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; +def CortexA55WriteDotVd_4 : CortexA55WriteVd<4, CortexA55UnitFPALU>; +def CortexA55WriteMlaLVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; +def CortexA55WriteMlaIxVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; +def CortexA55WriteMlaVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; +def CortexA55WriteMlaVd_4 : CortexA55WriteVd<4, CortexA55UnitFPALU>; +def CortexA55WriteAluVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; +def CortexA55WriteAluVd_3 : CortexA55WriteVd<3, CortexA55UnitFPALU>; +def CortexA55WriteAluVq_3 : CortexA55WriteVq<3, CortexA55UnitFPALU>; +def CortexA55WriteAluVd_2 : CortexA55WriteVd<2, CortexA55UnitFPALU>; +def CortexA55WriteAluVq_2 : CortexA55WriteVq<2, CortexA55UnitFPALU>; +def CortexA55WriteAluVd_1 : CortexA55WriteVd<1, CortexA55UnitFPALU>; +def CortexA55WriteAluVq_1 : CortexA55WriteVq<1, CortexA55UnitFPALU>; +def : SchedAlias>; +def : SchedAlias>; // FP ALU specific new schedwrite definitions def CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;} @@ -358,4 +384,99 @@ def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>; def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; +// 4.15. Advanced SIMD integer instructions +// ASIMD absolute diff +def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]ABDv(2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]ABDv(16i8|4i32|8i16)")>; +// ASIMD absolute diff accum +def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]ABAL?v")>; +// ASIMD absolute diff long +def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]ABDLv")>; +// ASIMD arith #1 +def : InstRW<[CortexA55WriteAluVd_2], (instregex "(ADD|SUB|NEG)v(1i64|2i32|4i16|8i8)", + "[SU]R?HADDv(2i32|4i16|8i8)", "[SU]HSUBv(2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_2], (instregex "(ADD|SUB|NEG)v(2i64|4i32|8i16|16i8)", + "[SU]R?HADDv(8i16|4i32|16i8)", "[SU]HSUBv(8i16|4i32|16i8)")>; +// ASIMD arith #2 +def : InstRW<[CortexA55WriteAluVd_3], (instregex "ABSv(1i64|2i32|4i16|8i8)$", + "[SU]ADDLPv(2i32_v1i64|4i16_v2i32|8i8_v4i16)$", + "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(1i16|1i32|1i64|1i8|2i32|4i16|8i8)$", + "ADDPv(2i32|4i16|8i8)$")>; +def : InstRW<[CortexA55WriteAluVq_3], (instregex "ABSv(2i64|4i32|8i16|16i8)$", + "[SU]ADDLPv(16i8_v8i16|4i32_v2i64|8i16_v4i32)$", + "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(16i8|2i64|4i32|8i16)$", + "ADDPv(16i8|2i64|4i32|8i16)$")>; +// ASIMD arith #3 +def : InstRW<[CortexA55WriteAluVq_3], (instregex "SADDLv", "UADDLv", "SADDWv", + "UADDWv", "SSUBLv", "USUBLv", "SSUBWv", "USUBWv", "ADDHNv", "SUBHNv")>; +// ASIMD arith #5 +def : InstRW<[CortexA55WriteAluVq_4], (instregex "RADDHNv", "RSUBHNv")>; +// ASIMD arith, reduce +def : InstRW<[CortexA55WriteAluVq_3], (instregex "ADDVv", "SADDLVv", "UADDLVv")>; +// ASIMD compare #1 +def : InstRW<[CortexA55WriteAluVd_2], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(1i64|2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_2], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(2i64|4i32|8i16|16i8)")>; +// ASIMD compare #2 +def : InstRW<[CortexA55WriteAluVd_3], (instregex "CMTSTv(1i64|2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_3], (instregex "CMTSTv(2i64|4i32|8i16|16i8)")>; +// ASIMD logical $1 +def : InstRW<[CortexA55WriteAluVd_1], (instregex "(AND|EOR|NOT|ORN)v8i8", + "(ORR|BIC)v(2i32|4i16|8i8)$", "MVNIv(2i|2s|4i16)")>; +def : InstRW<[CortexA55WriteAluVq_1], (instregex "(AND|EOR|NOT|ORN)v16i8", + "(ORR|BIC)v(16i8|4i32|8i16)$", "MVNIv(4i32|4s|8i16)")>; +// ASIMD max/min, basic +def : InstRW<[CortexA55WriteAluVd_2], (instregex "[SU](MIN|MAX)P?v(2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_2], (instregex "[SU](MIN|MAX)P?v(16i8|4i132|8i16)")>; +// SIMD max/min, reduce +def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU](MAX|MIN)Vv")>; +// ASIMD multiply, by element +def : InstRW<[CortexA55WriteAluVq_4], (instregex "MULv(2i32|4i16|4i32|8i16)_indexed$", + "SQR?DMULHv(1i16|1i32|2i32|4i16|4i32|8i16)_indexed$")>; +// ASIMD multiply +def : InstRW<[CortexA55WriteAluVd_3], (instrs PMULv8i8)>; +def : InstRW<[CortexA55WriteAluVq_3], (instrs PMULv16i8)>; +// ASIMD multiply accumulate +def : InstRW<[CortexA55WriteMlaVd_4], (instregex "ML[AS]v(2i32|4i16|8i8)$")>; +def : InstRW<[CortexA55WriteMlaVq_4], (instregex "ML[AS]v(16i8|4i32|8i16)$")>; +def : InstRW<[CortexA55WriteMlaIxVq_4], (instregex "ML[AS]v(2i32|4i16|4i32|8i16)_indexed$")>; +// ASIMD multiply accumulate half +def : InstRW<[CortexA55WriteAluVq_4], (instregex "SQRDML[AS]H[vi]")>; +// ASIMD multiply accumulate long +def : InstRW<[CortexA55WriteMlaLVq_4], (instregex "[SU]ML[AS]Lv")>; +// ASIMD multiply accumulate long #2 +def : InstRW<[CortexA55WriteAluVq_4], (instregex "SQDML[AS]L[iv]")>; +// ASIMD dot product +def : InstRW<[CortexA55WriteDotVd_4], (instregex "[SU]DOTv8i8")>; +def : InstRW<[CortexA55WriteDotVq_4], (instregex "[SU]DOTv16i8")>; +// ASIMD dot product, by scalar +def : InstRW<[CortexA55WriteDotScVq_4], (instregex "[SU]DOTlanev")>; +// ASIMD multiply long +def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]MULLv", "SQDMULL[iv]")>; +// ASIMD polynomial (8x8) multiply long +def : InstRW<[CortexA55WriteAluVq_3], (instrs PMULLv8i8, PMULLv16i8)>; +// ASIMD pairwise add and accumulate +def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]ADALPv")>; +// ASIMD shift accumulate +def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]SRA(d|v2i32|v4i16|v8i8)")>; +def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]SRAv(16i8|2i64|4i32|8i16)")>; +// ASIMD shift accumulate #2 +def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]RSRA[vd]")>; +// ASIMD shift by immed +def : InstRW<[CortexA55WriteAluVd_2], (instregex "SHLd$", "SHLv", + "SLId$", "SRId$", "[SU]SHR[vd]", "SHRNv")>; +// ASIMD shift by immed +// SXTL and UXTL are aliases for SHLL +def : InstRW<[CortexA55WriteAluVq_2], (instregex "[US]?SHLLv")>; +// ASIMD shift by immed #2 +def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]RSHR(d|v2i32|v4i16|v8i8)", + "RSHRNv(2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]RSHRv(16i8|2i64|4i32|8i16)", + "RSHRNv(16i8|4i32|8i16)")>; +// ASIMD shift by register +def : InstRW<[CortexA55WriteAluVd_2], (instregex "[SU]SHLv(1i64|2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_2], (instregex "[SU]SHLv(2i64|4i32|8i16|16i8)")>; +// ASIMD shift by register #2 +def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]RSHLv(1i64|2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]RSHLv(2i64|4i32|8i16|16i8)")>; + } diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td index fa10d056b7f7..6b053f1969b4 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td @@ -22,7 +22,7 @@ def A64FXModel : SchedMachineModel { list UnsupportedFeatures = [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth, - HasSVE2orStreamingSVE]; + HasSVE2orSME]; let FullInstRWOverlapCheck = 0; } @@ -3348,7 +3348,7 @@ def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFH_PRI)>; def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFH_D_PZI, PRFH_S_PZI)>; // [351] "prfw $prfop, $Pg, [$Rn, $Rm]"; -def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFS_PRR)>; +def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFW_PRR)>; // [352] "prfw $prfop, $Pg, [$Rn, $Zm]"; def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFW_D_SCALED, PRFW_D_SXTW_SCALED, PRFW_D_UXTW_SCALED, PRFW_S_SXTW_SCALED, PRFW_S_UXTW_SCALED)>; @@ -3554,7 +3554,7 @@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCW_ZPiI)>; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B, ST1B_D, ST1B_H, ST1B_S)>; // [421] "st1b $Zt, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1B_D_REAL, SST1B_D_SXTW, SST1B_D_UXTW, SST1B_S_SXTW, SST1B_S_UXTW)>; +def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1B_D, SST1B_D_SXTW, SST1B_D_UXTW, SST1B_S_SXTW, SST1B_S_UXTW)>; // [422] "st1b $Zt, $Pg, [$Rn, $imm4, mul vl]"; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B_D_IMM, ST1B_H_IMM, ST1B_IMM, ST1B_S_IMM)>; @@ -3566,7 +3566,7 @@ def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1B_D_IMM, SST1B_S_IMM)>; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D)>; // [425] "st1d $Zt, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1D_REAL, SST1D_SCALED_SCALED_REAL, SST1D_SXTW, SST1D_SXTW_SCALED, SST1D_UXTW, SST1D_UXTW_SCALED)>; +def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1D, SST1D_SCALED, SST1D_SXTW, SST1D_SXTW_SCALED, SST1D_UXTW, SST1D_UXTW_SCALED)>; // [426] "st1d $Zt, $Pg, [$Rn, $imm4, mul vl]"; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D_IMM)>; @@ -3578,7 +3578,7 @@ def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1D_IMM)>; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H, ST1H_D, ST1H_S)>; // [429] "st1h $Zt, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1H_D_REAL, SST1H_D_SCALED_SCALED_REAL, SST1H_D_SXTW, SST1H_D_SXTW_SCALED, SST1H_D_UXTW, SST1H_D_UXTW_SCALED, SST1H_S_SXTW, SST1H_S_SXTW_SCALED, SST1H_S_UXTW, SST1H_S_UXTW_SCALED)>; +def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1H_D, SST1H_D_SCALED, SST1H_D_SXTW, SST1H_D_SXTW_SCALED, SST1H_D_UXTW, SST1H_D_UXTW_SCALED, SST1H_S_SXTW, SST1H_S_SXTW_SCALED, SST1H_S_UXTW, SST1H_S_UXTW_SCALED)>; // [430] "st1h $Zt, $Pg, [$Rn, $imm4, mul vl]"; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H_D_IMM, ST1H_IMM, ST1H_S_IMM)>; @@ -3590,7 +3590,7 @@ def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1H_D_IMM, SST1H_S_IMM)>; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W, ST1W_D)>; // [433] "st1w $Zt, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1W_D_REAL, SST1W_D_SCALED_SCALED_REAL, SST1W_D_SXTW, SST1W_D_SXTW_SCALED, SST1W_D_UXTW, SST1W_D_UXTW_SCALED, SST1W_SXTW, SST1W_SXTW_SCALED, SST1W_UXTW, SST1W_UXTW_SCALED)>; +def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1W_D, SST1W_D_SCALED, SST1W_D_SXTW, SST1W_D_SXTW_SCALED, SST1W_D_UXTW, SST1W_D_UXTW_SCALED, SST1W_SXTW, SST1W_SXTW_SCALED, SST1W_UXTW, SST1W_UXTW_SCALED)>; // [434] "st1w $Zt, $Pg, [$Rn, $imm4, mul vl]"; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W_D_IMM, ST1W_IMM)>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td new file mode 100644 index 000000000000..32f7299fbf87 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td @@ -0,0 +1,1136 @@ +//=- AArch64SchedAmpere1.td - Ampere-1 scheduling def -----*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for the Ampere Computing Ampere-1 to +// support instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +// The Ampere-1 core is an out-of-order micro-architecture. The front +// end has branch prediction, with a 10-cycle recovery time from a +// mispredicted branch. Instructions coming out of the front end are +// decoded into internal micro-ops (uops). + +def Ampere1Model : SchedMachineModel { + let IssueWidth = 4; // 4-way decode and dispatch + let MicroOpBufferSize = 174; // micro-op re-order buffer size + let LoadLatency = 4; // Optimistic load latency + let MispredictPenalty = 10; // Branch mispredict penalty + let LoopMicroOpBufferSize = 32; // Instruction queue size + let CompleteModel = 1; + + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + SMEUnsupported.F); +} + +let SchedModel = Ampere1Model in { + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Ampere-1. +// Ampere-1 has 12 pipelines that 8 independent scheduler (4 integer, 2 FP, +// and 2 memory) issue into. The integer and FP schedulers can each issue +// one uop per cycle, while the memory schedulers can each issue one load +// and one store address calculation per cycle. + +def Ampere1UnitA : ProcResource<2>; // integer single-cycle, branch, and flags r/w +def Ampere1UnitB : ProcResource<2>; // integer single-cycle, and complex shifts +def Ampere1UnitBS : ProcResource<1>; // integer multi-cycle +def Ampere1UnitL : ProcResource<2>; // load +def Ampere1UnitS : ProcResource<2>; // store address calculation +def Ampere1UnitX : ProcResource<1>; // FP and vector operations, and flag write +def Ampere1UnitY : ProcResource<1>; // FP and vector operations, and crypto +def Ampere1UnitZ : ProcResource<1>; // FP store data and FP-to-integer moves + +def Ampere1UnitAB : ProcResGroup<[Ampere1UnitA, Ampere1UnitB]>; +def Ampere1UnitXY : ProcResGroup<[Ampere1UnitX, Ampere1UnitY]>; + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to the Ampere-1. + +def Ampere1Write_1cyc_1A : SchedWriteRes<[Ampere1UnitA]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1Write_1cyc_2A : SchedWriteRes<[Ampere1UnitA, Ampere1UnitA]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def Ampere1Write_1cyc_1B : SchedWriteRes<[Ampere1UnitB]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1Write_1cyc_1AB : SchedWriteRes<[Ampere1UnitAB]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1Write_1cyc_1L : SchedWriteRes<[Ampere1UnitL]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1Write_1cyc_1S : SchedWriteRes<[Ampere1UnitS]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1Write_1cyc_2S : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def Ampere1Write_2cyc_1Y : SchedWriteRes<[Ampere1UnitY]> { + let Latency = 2; + let NumMicroOps = 1; +} + +def Ampere1Write_2cyc_2AB : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitAB]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1Write_2cyc_1B_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitAB]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1Write_2cyc_1B_1A : SchedWriteRes<[Ampere1UnitB, Ampere1UnitA]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1Write_2cyc_1AB_1A : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitA]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1Write_2cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1Write_2cyc_1AB_2S : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS, + Ampere1UnitS]> { + let Latency = 2; + let NumMicroOps = 3; +} + +def Ampere1Write_2cyc_1AB_1S_1Z : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS, + Ampere1UnitZ]> { + let Latency = 2; + let NumMicroOps = 3; +} + +def Ampere1Write_2cyc_1B_1S : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1Write_2cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { + let Latency = 2; + let NumMicroOps = 1; +} + +def Ampere1Write_2cyc_1S_1Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1Write_3cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1Write_3cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1Write_3cyc_1B_1S_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS, + Ampere1UnitAB]> { + let Latency = 2; + let NumMicroOps = 3; +} + +def Ampere1Write_3cyc_1S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ, Ampere1UnitZ]> { + let Latency = 2; + let NumMicroOps = 3; +} + +def Ampere1Write_3cyc_2S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, + Ampere1UnitZ, Ampere1UnitZ]> { + let Latency = 2; + let NumMicroOps = 4; +} + +def Ampere1Write_4cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1Write_4cyc_1L : SchedWriteRes<[Ampere1UnitL]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1Write_4cyc_1X : SchedWriteRes<[Ampere1UnitX]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1Write_4cyc_1Y : SchedWriteRes<[Ampere1UnitY]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1Write_4cyc_1Z : SchedWriteRes<[Ampere1UnitZ]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1Write_4cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def Ampere1Write_4cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1Write_4cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def Ampere1Write_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitS, Ampere1UnitZ]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def Ampere1Write_4cyc_3S_3Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, Ampere1UnitS, + Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> { + let Latency = 4; + let NumMicroOps = 6; +} + +def Ampere1Write_5cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def Ampere1Write_5cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { + let Latency = 5; + let NumMicroOps = 1; +} + +def Ampere1Write_5cyc_1X : SchedWriteRes<[Ampere1UnitX]> { + let Latency = 5; + let NumMicroOps = 1; +} + +def Ampere1Write_5cyc_1L : SchedWriteRes<[Ampere1UnitL]> { + let Latency = 5; + let NumMicroOps = 1; +} + +def Ampere1Write_5cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def Ampere1Write_5cyc_1L_1BS : SchedWriteRes<[Ampere1UnitL, Ampere1UnitBS]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def Ampere1Write_5cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { + let Latency = 5; + let NumMicroOps = 1; +} + +def Ampere1Write_5cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def Ampere1Write_5cyc_4S_4Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, + Ampere1UnitS, Ampere1UnitS, + Ampere1UnitZ, Ampere1UnitZ, + Ampere1UnitZ, Ampere1UnitZ]> { + let Latency = 5; + let NumMicroOps = 8; +} + +def Ampere1Write_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitS, Ampere1UnitS, + Ampere1UnitZ, Ampere1UnitZ]> { + let Latency = 5; + let NumMicroOps = 6; +} + +def Ampere1Write_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitS, Ampere1UnitS, + Ampere1UnitZ, Ampere1UnitZ]> { + let Latency = 6; + let NumMicroOps = 6; +} + +def Ampere1Write_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitS, Ampere1UnitS, Ampere1UnitS, + Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> { + let Latency = 6; + let NumMicroOps = 9; +} + +def Ampere1Write_6cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1Write_6cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { + let Latency = 6; + let NumMicroOps = 1; +} + +def Ampere1Write_6cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1Write_6cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def Ampere1Write_6cyc_3L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def Ampere1Write_6cyc_4L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, + Ampere1UnitL, Ampere1UnitL]> { + let Latency = 6; + let NumMicroOps = 4; +} + +def Ampere1Write_6cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1Write_7cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { + let Latency = 7; + let NumMicroOps = 1; +} + +def Ampere1Write_7cyc_1BS_1XY : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitXY]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def Ampere1Write_7cyc_1L_1XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitXY]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def Ampere1Write_7cyc_2L_2XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, + Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 7; + let NumMicroOps = 4; +} + +def Ampere1Write_7cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def Ampere1Write_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitS, Ampere1UnitS, + Ampere1UnitS, Ampere1UnitS, + Ampere1UnitZ, Ampere1UnitZ, + Ampere1UnitZ, Ampere1UnitZ]> { + let Latency = 7; + let NumMicroOps = 12; +} + +def Ampere1Write_8cyc_1BS_1A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA]> { + let Latency = 8; + let NumMicroOps = 2; +} + +def Ampere1Write_8cyc_1BS_2A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA, + Ampere1UnitA]> { + let Latency = 8; + let NumMicroOps = 3; +} + +def Ampere1Write_8cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 8; + let NumMicroOps = 2; +} + +def Ampere1Write_8cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 8; + let NumMicroOps = 4; +} + +def Ampere1Write_8cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL, + Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 8; + let NumMicroOps = 6; +} + +def Ampere1Write_8cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, + Ampere1UnitL, Ampere1UnitL, + Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 8; + let NumMicroOps = 8; +} + +def Ampere1Write_9cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL, + Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 9; + let NumMicroOps = 6; +} + +def Ampere1Write_9cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, + Ampere1UnitL, Ampere1UnitL, + Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 9; + let NumMicroOps = 8; +} + +def Ampere1Write_9cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 9; + let NumMicroOps = 3; +} + +def Ampere1Write_9cyc_2L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, + Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 9; + let NumMicroOps = 5; +} + +def Ampere1Write_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitS, Ampere1UnitS, + Ampere1UnitS, Ampere1UnitS, + Ampere1UnitZ, Ampere1UnitZ, + Ampere1UnitZ, Ampere1UnitZ]> { + let Latency = 9; + let NumMicroOps = 14; +} + +def Ampere1Write_9cyc_8XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitS, Ampere1UnitS, + Ampere1UnitS, Ampere1UnitS, + Ampere1UnitZ, Ampere1UnitZ, + Ampere1UnitZ, Ampere1UnitZ]> { + let Latency = 9; + let NumMicroOps = 16; +} + +def Ampere1Write_10cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 10; + let NumMicroOps = 2; +} + +def Ampere1Write_10cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> { + let Latency = 10; + let NumMicroOps = 2; +} + +def Ampere1Write_10cyc_1X_1Z : SchedWriteRes<[Ampere1UnitX, Ampere1UnitZ]> { + let Latency = 10; + let NumMicroOps = 2; +} + +def Ampere1Write_10cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL, + Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 10; + let NumMicroOps = 6; +} + +def Ampere1Write_10cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> { + let Latency = 10; + let NumMicroOps = 3; +} + +def Ampere1Write_10cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> { + let Latency = 10; + let NumMicroOps = 3; +} + +def Ampere1Write_11cyc_1BS_1L : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitL]> { + let Latency = 11; + let NumMicroOps = 2; +} + +def Ampere1Write_11cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> { + let Latency = 11; + let NumMicroOps = 3; +} + +def Ampere1Write_11cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> { + let Latency = 11; + let NumMicroOps = 3; +} + +def Ampere1Write_11cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, + Ampere1UnitL, Ampere1UnitL, + Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 11; + let NumMicroOps = 12; +} + +def Ampere1Write_12cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, + Ampere1UnitL, Ampere1UnitL, + Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 12; + let NumMicroOps = 12; +} + +def Ampere1Write_12cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 12; + let NumMicroOps = 3; +} + +def Ampere1Write_12cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, + Ampere1UnitXY, Ampere1UnitXY]> { + let Latency = 12; + let NumMicroOps = 4; +} + +def Ampere1Write_18cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { + let Latency = 18; + let NumMicroOps = 1; +} + +def Ampere1Write_19cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { + let Latency = 19; + let NumMicroOps = 1; +} + +def Ampere1Write_25cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { + let Latency = 25; + let NumMicroOps = 1; +} + +def Ampere1Write_32cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { + let Latency = 32; + let NumMicroOps = 1; +} + +def Ampere1Write_34cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { + let Latency = 34; + let NumMicroOps = 1; +} + +def Ampere1Write_34cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { + let Latency = 34; + let NumMicroOps = 1; +} + +def Ampere1Write_39cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { + let Latency = 39; + let NumMicroOps = 1; +} + +def Ampere1Write_62cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { + let Latency = 62; + let NumMicroOps = 1; +} + +// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4), +// which are a single uop, and for extended registers, which have full flexibility +// across Unit A or B for both uops. +def Ampere1Write_Arith : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar]>; + +def Ampere1Write_ArithFlagsetting : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar]>; + +//===----------------------------------------------------------------------===// +// Map the target-defined scheduler read/write resources and latencies for Ampere-1. +// This provides a coarse model, which is then specialised below. + +def : WriteRes; // MOVN, MOVZ +def : WriteRes; // ALU +def : WriteRes { + let Latency = 2; + let NumMicroOps = 2; +} // ALU of Shifted-Reg +def : WriteRes { + let Latency = 2; + let NumMicroOps = 2; +} // ALU of Extended-Reg +def : WriteRes; // EXTR shifts a reg pair +def : WriteRes; // Shift/Scale +def : WriteRes { + let Latency = 18; +} // 32-bit Divide +def : WriteRes { + let Latency = 34; +} // 64-bit Divide +def : WriteRes { + let Latency = 3; +} // 32-bit Multiply +def : WriteRes { + let Latency = 3; +} // 32-bit Multiply +def : WriteRes; +def : WriteRes; +def : WriteRes { + let Latency = 4; +} // Load from base addr plus immediate offset +def : WriteRes { + let Latency = 1; +} // Store to base addr plus immediate offset +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} // Store a register pair. +def : WriteRes; +def : WriteRes { + let Latency = 5; + let NumMicroOps = 2; +} // Load from a register index (maybe scaled). +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} // Store to a register index (maybe scaled). +def : WriteRes { + let Latency = 2; +} // General floating-point ops. +def : WriteRes { + let Latency = 5; +} // Floating-point compare. +def : WriteRes { + let Latency = 6; +} // Float conversion. +def : WriteRes { +} // Float-int register copy. +def : WriteRes { + let Latency = 2; +} // Float-int register copy. +def : WriteRes { + let Latency = 5; +} // Floating-point multiply. +def : WriteRes { + let Latency = 34; +} // Floating-point division. +def : WriteRes { + let Latency = 3; +} // 64bit Vector D ops. +def : WriteRes { + let Latency = 3; +} // 128bit Vector Q ops. +def : WriteRes { + let Latency = 5; +} // Vector loads. +def : WriteRes { + let Latency = 2; +} // Vector stores. + +def : WriteRes { let Unsupported = 1; } + +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +def : WriteRes { + let Latency = 4; +} // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP + +// Forwarding logic. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Specialising the scheduling model further for Ampere-1. + +def : InstRW<[Ampere1Write_1cyc_1AB], (instrs COPY)>; + +// Branch instructions +def : InstRW<[Ampere1Write_1cyc_1A], (instrs Bcc, BL, RET)>; +def : InstRW<[Ampere1Write_1cyc_1A], + (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; +def : InstRW<[Ampere1Write_1cyc_2A], (instrs BLR)>; + +// Cryptography instructions +// -- AES encryption/decryption +def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AES[DE]")>; +def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AESI?MC")>; +// -- Polynomial multiplication +def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^PMUL", "^PMULL")>; +// -- SHA-256 hash +def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA256(H|H2)")>; +// -- SHA-256 schedule update +def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA256SU[01]")>; +// -- SHA-3 instructions +def : InstRW<[Ampere1Write_2cyc_1XY], + (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>; +// -- SHA-512 hash +def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA512(H|H2)")>; +// -- SHA-512 schedule update +def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA512SU[01]")>; +// -- SHA1 choose/majority/parity +def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA1[CMP]")>; +// -- SHA1 hash/schedule update +def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1SU[01]")>; +def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1H")>; + +// FP and vector load instructions +// -- Load 1-element structure to one/all lanes +// ---- all lanes +def : InstRW<[Ampere1Write_7cyc_1L_1XY], + (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>; +// ---- one lane +def : InstRW<[Ampere1Write_7cyc_1L_1XY], + (instregex "^LD1i(8|16|32|64)")>; +// -- Load 1-element structure to one/all lanes, 1D size +def : InstRW<[Ampere1Write_5cyc_1L], + (instregex "^LD1Rv1d")>; +// -- Load 1-element structures to 1 register +def : InstRW<[Ampere1Write_5cyc_1L], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Load 1-element structures to 2 registers +def : InstRW<[Ampere1Write_5cyc_2L], + (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Load 1-element structures to 3 registers +def : InstRW<[Ampere1Write_6cyc_3L], + (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Load 1-element structures to 4 registers +def : InstRW<[Ampere1Write_6cyc_4L], + (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Load 2-element structure to all lanes of 2 registers, 1D size +def : InstRW<[Ampere1Write_5cyc_2L], + (instregex "^LD2Rv1d")>; +// -- Load 2-element structure to all lanes of 2 registers, other sizes +def : InstRW<[Ampere1Write_7cyc_2L_2XY], + (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>; +// -- Load 2-element structure to one lane of 2 registers +def : InstRW<[Ampere1Write_7cyc_2L_2XY], + (instregex "^LD2i(8|16|32|64)")>; +// -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size +def : InstRW<[Ampere1Write_7cyc_2L_2XY], + (instregex "^LD2Twov(16b|8h|4s|2d)")>; +// -- Load 2-element structures to 2 registers, 8B/4H/2S size +def : InstRW<[Ampere1Write_9cyc_2L_3XY], + (instregex "^LD2Twov(8b|4h|2s)")>; +// -- Load 3-element structure to all lanes of 3 registers, 1D size +def : InstRW<[Ampere1Write_6cyc_3L], + (instregex "^LD3Rv1d")>; +// -- Load 3-element structure to all lanes of 3 registers, other sizes +def : InstRW<[Ampere1Write_8cyc_3L_3XY], + (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>; +// -- Load 3-element structure to one lane of 3 registers +def : InstRW<[Ampere1Write_8cyc_3L_3XY], + (instregex "^LD3i(8|16|32|64)")>; +// -- Load 3-element structures to 3 registers, 16B/8H/4S sizes +def : InstRW<[Ampere1Write_9cyc_3L_3XY], + (instregex "^LD3Threev(16b|8h|4s)")>; +// -- Load 3-element structures to 3 registers, 2D size +def : InstRW<[Ampere1Write_8cyc_3L_3XY], + (instregex "^LD3Threev2d")>; +// -- Load 3-element structures to 3 registers, 8B/4H/2S sizes +def : InstRW<[Ampere1Write_10cyc_3L_3XY], + (instregex "^LD3Threev(8b|4h|2s)")>; +// -- Load 4-element structure to all lanes of 4 registers, 1D size +def : InstRW<[Ampere1Write_6cyc_4L], + (instregex "^LD4Rv1d")>; +// -- Load 4-element structure to all lanes of 4 registers, other sizes +def : InstRW<[Ampere1Write_8cyc_4L_4XY], + (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>; +// -- Load 4-element structure to one lane of 4 registers +def : InstRW<[Ampere1Write_6cyc_4L], + (instregex "^LD4i(8|16|32|64)")>; +// -- Load 4-element structures to 4 registers, 2D size +def : InstRW<[Ampere1Write_9cyc_4L_4XY], + (instregex "^LD4Fourv2d")>; +// -- Load 4-element structures to 4 registers, 2S size +def : InstRW<[Ampere1Write_12cyc_4L_8XY], + (instregex "^LD4Fourv2s")>; +// -- Load 4-element structures to 4 registers, other sizes +def : InstRW<[Ampere1Write_11cyc_4L_8XY], + (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>; +// -- Load pair, Q-form +def : InstRW<[Ampere1Write_5cyc_2L], (instregex "LDN?PQ")>; +// -- Load pair, S/D-form +def : InstRW<[Ampere1Write_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>; +// -- Load register +def : InstRW<[Ampere1Write_5cyc_1L], (instregex "LDU?R[BHSDQ]i")>; +// -- Load register, sign-extended register +def : InstRW<[Ampere1Write_6cyc_1AB_1L], (instregex "LDR[BHSDQ]ro(W|X)")>; + +// FP and vector store instructions +// -- Store 1-element structure from one lane of 1 register +def : InstRW<[Ampere1Write_4cyc_1XY_1S_1Z], + (instregex "^ST1i(8|16|32|64)")>; +// -- Store 1-element structures from 1 register +def : InstRW<[Ampere1Write_2cyc_1S_1Z], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 1-element structures from 2 registers +def : InstRW<[Ampere1Write_3cyc_2S_2Z], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 1-element structures from 3 registers +def : InstRW<[Ampere1Write_4cyc_3S_3Z], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 1-element structures from 4 registers +def : InstRW<[Ampere1Write_5cyc_4S_4Z], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 2-element structure from one lane of 2 registers +def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z], + (instregex "^ST2i(8|16|32|64)")>; +// -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes +def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z], + (instregex "^ST2Twov(16b|8h|4s|2d)")>; +// -- Store 2-element structures from 2 registers, 8B/4H/2S sizes +def : InstRW<[Ampere1Write_6cyc_2XY_2S_2Z], + (instregex "^ST2Twov(8b|4h|2s)")>; +// -- Store 3-element structure from one lane of 3 registers +def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z], + (instregex "^ST3i(8|16|32|64)")>; +// -- Store 3-element structures from 3 registers +def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z], + (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 4-element structure from one lane of 4 registers +def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z], + (instregex "^ST4i(8|16|32|64)")>; +// -- Store 4-element structures from 4 registers, 16B/8H/4S sizes +def : InstRW<[Ampere1Write_9cyc_8XY_4S_4Z], + (instregex "^ST4Fourv(16b|8h|4s)")>; +// -- Store 4-element structures from 4 registers, 2D sizes +def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z], + (instregex "^ST4Fourv2d")>; +// -- Store 4-element structures from 4 registers, 8B/4H/2S sizes +def : InstRW<[Ampere1Write_9cyc_6XY_4S_4Z], + (instregex "^ST4Fourv(8b|4h|2s)")>; +// -- Store pair, Q-form +def : InstRW<[Ampere1Write_3cyc_2S_2Z], (instregex "^STN?PQ")>; +// -- Store pair, S/D-form +def : InstRW<[Ampere1Write_3cyc_1S_2Z], (instregex "^STN?P[SD]")>; +// -- Store register +def : InstRW<[Ampere1Write_2cyc_1S_1Z], (instregex "^STU?R[BHSDQ](ui|i)")>; +// -- Store register, sign-extended register offset +def : InstRW<[Ampere1Write_2cyc_1AB_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>; + +// FP data processing, bfloat16 format +def : InstRW<[Ampere1Write_5cyc_1XY], (instrs BFCVT)>; +def : InstRW<[Ampere1Write_7cyc_2XY], (instrs BFCVTN, BFCVTN2)>; +def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>; +def : InstRW<[Ampere1Write_4cyc_2XY], (instrs BFMMLA)>; +def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^BFMLAL")>; + +// FP data processing, scalar/vector, half precision +def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>; +def : InstRW<[Ampere1Write_4cyc_1XY], + (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>; +def : InstRW<[Ampere1Write_4cyc_1XY], + (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>; +def : InstRW<[Ampere1Write_4cyc_1XY], + (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>; +def : InstRW<[Ampere1Write_4cyc_1X], + (instregex "^FCMPE?H")>; +def : InstRW<[Ampere1Write_10cyc_1A_1BS_1X], + (instregex "^FCCMPE?H")>; +def : InstRW<[Ampere1Write_10cyc_1A_1BS_1XY], + (instregex "^FCSELH")>; +def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>; +def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^[SU]CVTFv.[fi]16")>; +def : InstRW<[Ampere1Write_25cyc_1XY], (instregex "^FDIVv.[if]16", "FDIVH")>; +def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>; +def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>; +def : InstRW<[Ampere1Write_12cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>; +def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FMULX?v.[if]16")>; +def : InstRW<[Ampere1Write_4cyc_1XY], (instrs FMULX16)>; +def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>; +def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>; +def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRECPXv.[if]16")>; +def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>; +def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>; +def : InstRW<[Ampere1Write_39cyc_1XY], (instregex "^FSQRTv.f16", "^FSQRTHr")>; + +// FP data processing, scalar/vector, single/double precision +def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>; +def : InstRW<[Ampere1Write_5cyc_1XY], + (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>; +def : InstRW<[Ampere1Write_5cyc_1XY], + (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>; +def : InstRW<[Ampere1Write_5cyc_1XY], + (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>; +def : InstRW<[Ampere1Write_5cyc_1X], + (instregex "^FCMPE?(S|D)")>; +def : InstRW<[Ampere1Write_11cyc_1A_1BS_1X], + (instregex "^FCCMPE?(S|D)")>; +def : InstRW<[Ampere1Write_11cyc_1A_1BS_1XY], + (instregex "^FCSEL(S|D)")>; +def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>; +def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^[SU]CVTFv.[fi](32|64)")>; +def : InstRW<[Ampere1Write_34cyc_1XY], (instregex "^FDIVv.[if](64)", "FDIVD")>; +def : InstRW<[Ampere1Write_19cyc_1XY], (instregex "^FDIVv.[if](32)", "FDIVS")>; +def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>; +def : InstRW<[Ampere1Write_10cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>; +def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>; +def : InstRW<[Ampere1Write_6cyc_1XY], (instrs FMULX32, FMULX64)>; +def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>; +def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>; +def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>; +def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>; +def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>; +def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT(32|64)")>; +def : InstRW<[Ampere1Write_62cyc_1XY], (instregex "^FSQRTv.f64", "^FSQRTDr")>; +def : InstRW<[Ampere1Write_32cyc_1XY], (instregex "^FSQRTv.f32", "^FSQRTSr")>; + +// FP miscellaneous instructions +def : InstRW<[Ampere1Write_10cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; +def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FCVT[HSD]Hr")>; +def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[HSD][SD]r")>; +def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVTLv")>; +def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^FCVT(N|XN)v")>; +def : InstRW<[Ampere1Write_10cyc_1X_1Z], (instrs FJCVTZS)>; +def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>; +def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>; +def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>; +def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "^FMOVXDHighr")>; +def : InstRW<[Ampere1Write_4cyc_1Z], (instregex "^FMOV[WX][HSD]r")>; + +// Integer arithmetic and logical instructions +def : InstRW<[Ampere1Write_1cyc_1A], + (instregex "ADC(W|X)r", "SBC(W|X)r")>; +def : InstRW<[Ampere1Write_Arith], + (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r")>; +def : InstRW<[Ampere1Write_ArithFlagsetting], + (instregex "(ADD|AND|BIC|SUB)S(W|X)r")>; +def : InstRW<[Ampere1Write_1cyc_1A], + (instregex "(ADC|SBC)S(W|X)r")>; +def : InstRW<[Ampere1Write_1cyc_1A], (instrs RMIF)>; +def : InstRW<[Ampere1Write_1cyc_1A], + (instregex "(CCMN|CCMP)(X|W)")>; +def : InstRW<[Ampere1Write_1cyc_1A], + (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>; +def : InstRW<[Ampere1Write_18cyc_1BS], (instrs SDIVWr, UDIVWr)>; +def : InstRW<[Ampere1Write_34cyc_1BS], (instrs SDIVXr, UDIVXr)>; +def : InstRW<[Ampere1Write_3cyc_1BS], + (instregex "(S|U)MULHr")>; +def : InstRW<[Ampere1Write_4cyc_1BS], + (instregex "(S|U)?M(ADD|SUB)L?r")>; + +// Integer load instructions +def : InstRW<[Ampere1Write_4cyc_2L], + (instregex "(LDNP|LDP|LDPSW)(X|W)")>; +def : InstRW<[Ampere1Write_4cyc_1L], + (instregex "LDR(B|D|H|Q|S)ui")>; +def : InstRW<[Ampere1Write_4cyc_1L], + (instregex "LDR(D|Q|W|X)l")>; +def : InstRW<[Ampere1Write_4cyc_1L], + (instregex "LDTR(B|H|W|X)i")>; +def : InstRW<[Ampere1Write_4cyc_1L], + (instregex "LDTRS(BW|BX|HW|HX|W)i")>; +def : InstRW<[Ampere1Write_4cyc_1L], + (instregex "LDUR(BB|HH|X|W)i")>; +def : InstRW<[Ampere1Write_4cyc_1L], + (instregex "LDURS(BW|BX|HW|HX|W)i")>; +def : InstRW<[Ampere1Write_5cyc_1AB_1L], + (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>; +def : InstRW<[Ampere1Write_1cyc_1L], + (instrs PRFMl, PRFUMi, PRFUMi)>; +def : InstRW<[Ampere1Write_2cyc_1AB_1L], + (instrs PRFMroW, PRFMroX)>; + +// Integer miscellaneous instructions +def : InstRW<[Ampere1Write_1cyc_1A], (instrs ADR, ADRP)>; +def : InstRW<[Ampere1Write_1cyc_1B], (instregex "EXTR(W|X)")>; +def : InstRW<[Ampere1Write_1cyc_1B], (instregex "(S|U)?BFM(W|X)")>; +def : InstRW<[Ampere1Write_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>; +def : InstRW<[Ampere1Write_1cyc_1B], (instregex "CLS(W|X)")>; +def : InstRW<[Ampere1Write_1cyc_1A], (instrs SETF8, SETF16)>; +def : InstRW<[Ampere1Write_1cyc_1AB], + (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; +def : InstRW<[Ampere1Write_1cyc_1B], + (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>; +def : InstRW<[Ampere1Write_1cyc_1B], + (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>; + +// Integer store instructions +def : InstRW<[Ampere1Write_1cyc_2S], (instregex "STNP(X|W)i")>; +def : InstRW<[Ampere1Write_2cyc_1B_1S], + (instrs STPWi, STPXi)>; +def : InstRW<[Ampere1Write_3cyc_1B_1S_1AB], + (instregex "STP(W|X)(pre|post)")>; +def : InstRW<[Ampere1Write_1cyc_1S], + (instrs STTRBi, STTRHi, STTRWi, STTRXi)>; +def : InstRW<[Ampere1Write_1cyc_1S], + (instregex "STUR(BB|HH|X|W)i", + "STR(X|W)ui", + "STUR(BB|HH|X|W)i")>; +def : InstRW<[Ampere1Write_1cyc_2S], (instrs STRWroX, STRXroX)>; +def : InstRW<[Ampere1Write_2cyc_1AB_2S], (instrs STRWroW, STRXroW)>; + +// Pointer authentication +//def : InstRW<[Ampere1Write_7cyc_1BS], +// (instrs AUTIAZ, AUTIBZ, AUTIASP, AUTIBSP, AUTIA1716, AUTIB1716)>; +def : InstRW<[Ampere1Write_8cyc_1BS_1A], + (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>; +def : InstRW<[Ampere1Write_8cyc_1BS_2A], + (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>; +//def : InstRW<[Ampere1Write_7cyc_1BS], +// (instrs PACIAZ, PACIBZ, PACIASP, PACIBSP, PACIA1716, PACIB1716)>; +def : InstRW<[Ampere1Write_11cyc_1BS_1L], (instregex "^LDRA(A|B)")>; +def : InstRW<[Ampere1Write_7cyc_1BS], (instrs XPACD, XPACI)>; + +// Vector integer instructions +// -- absolute difference +def : InstRW<[Ampere1Write_3cyc_1XY], + (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv", + "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>; +// -- arithmetic +def : InstRW<[Ampere1Write_3cyc_1XY], + (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD", + "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW", + "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>; +// -- arithmetic, horizontal, 16B +def : InstRW<[Ampere1Write_12cyc_4XY], + (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>; +def : InstRW<[Ampere1Write_12cyc_4XY], + (instregex "^[SU](MIN|MAX)Vv16i8v")>; +// -- arithmetic, horizontal, 4H/4S +def : InstRW<[Ampere1Write_6cyc_2XY], + (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>; +def : InstRW<[Ampere1Write_6cyc_2XY], + (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>; +// -- arithmetic, horizontal, 8B/8H +def : InstRW<[Ampere1Write_9cyc_3XY], + (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>; +def : InstRW<[Ampere1Write_9cyc_3XY], + (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>; +// -- arithmetic, narrowing +def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(ADD|SUB)HNv.*")>; +def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>; +// -- arithmetic, pairwise +def : InstRW<[Ampere1Write_3cyc_1XY], + (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>; +// -- arithmetic, saturating +def : InstRW<[Ampere1Write_3cyc_1XY], + (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>; +// -- bit count +def : InstRW<[Ampere1Write_2cyc_1XY], + (instregex "^(CLS|CLZ|CNT)v")>; +// -- compare +def : InstRW<[Ampere1Write_3cyc_1XY], + (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv", + "^CMHIv", "^CMHSv")>; +// -- compare non-zero +def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^CMTSTv")>; +// -- dot product +def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>; +// -- fp reciprocal estimate +def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPEv", "^FRSQRTEv")>; +// -- integer reciprocal estimate +def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>; +// -- logical +def : InstRW<[Ampere1Write_2cyc_1XY], + (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; +// -- logical, narrowing +def : InstRW<[Ampere1Write_5cyc_2XY], + (instregex "RSHRNv", + "SHRNv", "SQSHRNv", "SQSHRUNv", + "UQXTNv")>; +// -- matrix multiply +def : InstRW<[Ampere1Write_6cyc_2XY], + (instrs SMMLA, UMMLA, USMMLA)>; +// -- max/min +def : InstRW<[Ampere1Write_3cyc_1XY], + (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; +def : InstRW<[Ampere1Write_3cyc_1XY], + (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; +// -- move immediate +def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>; +// -- multiply +def : InstRW<[Ampere1Write_3cyc_1XY], + (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>; +// -- multiply accumulate +def : InstRW<[Ampere1Write_3cyc_1XY], + (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>; +// -- negation, saturating +def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>; +// -- reverse bits/bytes +def : InstRW<[Ampere1Write_2cyc_1XY], + (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>; +// -- shift +def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; +// -- shift and accumulate +def : InstRW<[Ampere1Write_3cyc_1XY], + (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>; +// -- shift, saturating +def : InstRW<[Ampere1Write_3cyc_1XY], + (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU", + "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL", + "^UQSHL")>; + +// Vector miscellaneous instructions +// -- duplicate element +def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^DUPv.+lane")>; +// -- duplicate from GPR +def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^DUPv.+gpr")>; +// -- extract narrow +def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^XTNv")>; +// -- insert/extract element +def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>; +// -- move FP immediate +def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOVv")>; +// -- move element to GPR +def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "(S|U)MOVv")>; +// -- move from GPR to any element +def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>; +// -- table lookup +def : InstRW<[Ampere1Write_2cyc_1XY], + (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>; +def : InstRW<[Ampere1Write_4cyc_2XY], + (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>; +def : InstRW<[Ampere1Write_6cyc_3XY], + (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>; +def : InstRW<[Ampere1Write_8cyc_4XY], + (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>; +// -- transpose +def : InstRW<[Ampere1Write_2cyc_1XY], + (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>; +// -- zip/unzip +def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>; + +} // SchedModel = Ampere1Model diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td b/llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td new file mode 100644 index 000000000000..8552c07bda56 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td @@ -0,0 +1,25 @@ +//===- AArch64SchedPredAmpere.td - AArch64 Sched Preds -----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines scheduling predicate definitions that are used by the +// AArch64 Ampere Computing processors. +// +//===----------------------------------------------------------------------===// + +// Auxiliary predicates. + +// Check for a LSL shift <= 4 +def AmpereCheapLSL : MCSchedPredicate< + CheckAny<[CheckShiftBy0, + CheckAll< + [CheckShiftLSL, + CheckAny< + [CheckShiftBy1, + CheckShiftBy2, + CheckShiftBy3, + CheckShiftBy4]>]>]>>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td b/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td index fcda2394bacf..ee7cc1f5095b 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td +++ b/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td @@ -109,10 +109,7 @@ def ExynosScaledIdxFn : TIIPredicate<"isExynosScaledAddr", def ExynosScaledIdxPred : MCSchedPredicate; // Identify FP instructions. -def ExynosFPPred : MCSchedPredicate>; +def ExynosFPPred : MCSchedPredicate; // Identify 128-bit NEON instructions. def ExynosQFormPred : MCSchedPredicate; diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredicates.td b/llvm/lib/Target/AArch64/AArch64SchedPredicates.td index fc13b23b4cf8..4473f3a53845 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedPredicates.td +++ b/llvm/lib/Target/AArch64/AArch64SchedPredicates.td @@ -53,152 +53,23 @@ let FunctionMapper = "AArch64_AM::getShiftType" in { } // Check for shifting in arithmetic and logic instructions. -foreach I = {0-3, 8} in { +foreach I = {0-4, 8} in { let FunctionMapper = "AArch64_AM::getShiftValue" in def CheckShiftBy#I : CheckImmOperand<3, I>; } // Generic predicates. - -// Identify whether an instruction is the 16-bit NEON form based on its result. -def CheckHForm : CheckAll<[CheckIsRegOperand<0>, - CheckAny<[CheckRegOperand<0, H0>, - CheckRegOperand<0, H1>, - CheckRegOperand<0, H2>, - CheckRegOperand<0, H3>, - CheckRegOperand<0, H4>, - CheckRegOperand<0, H5>, - CheckRegOperand<0, H6>, - CheckRegOperand<0, H7>, - CheckRegOperand<0, H8>, - CheckRegOperand<0, H9>, - CheckRegOperand<0, H10>, - CheckRegOperand<0, H11>, - CheckRegOperand<0, H12>, - CheckRegOperand<0, H13>, - CheckRegOperand<0, H14>, - CheckRegOperand<0, H15>, - CheckRegOperand<0, H16>, - CheckRegOperand<0, H17>, - CheckRegOperand<0, H18>, - CheckRegOperand<0, H19>, - CheckRegOperand<0, H20>, - CheckRegOperand<0, H21>, - CheckRegOperand<0, H22>, - CheckRegOperand<0, H23>, - CheckRegOperand<0, H24>, - CheckRegOperand<0, H25>, - CheckRegOperand<0, H26>, - CheckRegOperand<0, H27>, - CheckRegOperand<0, H28>, - CheckRegOperand<0, H29>, - CheckRegOperand<0, H30>, - CheckRegOperand<0, H31>]>]>; - -// Identify whether an instruction is the 32-bit NEON form based on its result. -def CheckSForm : CheckAll<[CheckIsRegOperand<0>, - CheckAny<[CheckRegOperand<0, S0>, - CheckRegOperand<0, S1>, - CheckRegOperand<0, S2>, - CheckRegOperand<0, S3>, - CheckRegOperand<0, S4>, - CheckRegOperand<0, S5>, - CheckRegOperand<0, S6>, - CheckRegOperand<0, S7>, - CheckRegOperand<0, S8>, - CheckRegOperand<0, S9>, - CheckRegOperand<0, S10>, - CheckRegOperand<0, S11>, - CheckRegOperand<0, S12>, - CheckRegOperand<0, S13>, - CheckRegOperand<0, S14>, - CheckRegOperand<0, S15>, - CheckRegOperand<0, S16>, - CheckRegOperand<0, S17>, - CheckRegOperand<0, S18>, - CheckRegOperand<0, S19>, - CheckRegOperand<0, S20>, - CheckRegOperand<0, S21>, - CheckRegOperand<0, S22>, - CheckRegOperand<0, S23>, - CheckRegOperand<0, S24>, - CheckRegOperand<0, S25>, - CheckRegOperand<0, S26>, - CheckRegOperand<0, S27>, - CheckRegOperand<0, S28>, - CheckRegOperand<0, S29>, - CheckRegOperand<0, S30>, - CheckRegOperand<0, S31>]>]>; - -// Identify whether an instruction is the 64-bit NEON form based on its result. -def CheckDForm : CheckAll<[CheckIsRegOperand<0>, - CheckAny<[CheckRegOperand<0, D0>, - CheckRegOperand<0, D1>, - CheckRegOperand<0, D2>, - CheckRegOperand<0, D3>, - CheckRegOperand<0, D4>, - CheckRegOperand<0, D5>, - CheckRegOperand<0, D6>, - CheckRegOperand<0, D7>, - CheckRegOperand<0, D8>, - CheckRegOperand<0, D9>, - CheckRegOperand<0, D10>, - CheckRegOperand<0, D11>, - CheckRegOperand<0, D12>, - CheckRegOperand<0, D13>, - CheckRegOperand<0, D14>, - CheckRegOperand<0, D15>, - CheckRegOperand<0, D16>, - CheckRegOperand<0, D17>, - CheckRegOperand<0, D18>, - CheckRegOperand<0, D19>, - CheckRegOperand<0, D20>, - CheckRegOperand<0, D21>, - CheckRegOperand<0, D22>, - CheckRegOperand<0, D23>, - CheckRegOperand<0, D24>, - CheckRegOperand<0, D25>, - CheckRegOperand<0, D26>, - CheckRegOperand<0, D27>, - CheckRegOperand<0, D28>, - CheckRegOperand<0, D29>, - CheckRegOperand<0, D30>, - CheckRegOperand<0, D31>]>]>; +// Identify whether an instruction is NEON or floating point +def CheckFpOrNEON : CheckFunctionPredicateWithTII< + "AArch64_MC::isFpOrNEON", + "AArch64InstrInfo::isFpOrNEON" +>; // Identify whether an instruction is the 128-bit NEON form based on its result. -def CheckQForm : CheckAll<[CheckIsRegOperand<0>, - CheckAny<[CheckRegOperand<0, Q0>, - CheckRegOperand<0, Q1>, - CheckRegOperand<0, Q2>, - CheckRegOperand<0, Q3>, - CheckRegOperand<0, Q4>, - CheckRegOperand<0, Q5>, - CheckRegOperand<0, Q6>, - CheckRegOperand<0, Q7>, - CheckRegOperand<0, Q8>, - CheckRegOperand<0, Q9>, - CheckRegOperand<0, Q10>, - CheckRegOperand<0, Q11>, - CheckRegOperand<0, Q12>, - CheckRegOperand<0, Q13>, - CheckRegOperand<0, Q14>, - CheckRegOperand<0, Q15>, - CheckRegOperand<0, Q16>, - CheckRegOperand<0, Q17>, - CheckRegOperand<0, Q18>, - CheckRegOperand<0, Q19>, - CheckRegOperand<0, Q20>, - CheckRegOperand<0, Q21>, - CheckRegOperand<0, Q22>, - CheckRegOperand<0, Q23>, - CheckRegOperand<0, Q24>, - CheckRegOperand<0, Q25>, - CheckRegOperand<0, Q26>, - CheckRegOperand<0, Q27>, - CheckRegOperand<0, Q28>, - CheckRegOperand<0, Q29>, - CheckRegOperand<0, Q30>, - CheckRegOperand<0, Q31>]>]>; +def CheckQForm : CheckFunctionPredicateWithTII< + "AArch64_MC::isQForm", + "AArch64InstrInfo::isQForm" +>; // Identify arithmetic instructions with extend. def IsArithExtOp : CheckOpcode<[ADDWrx, ADDXrx, ADDSWrx, ADDSXrx, diff --git a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td index 77fca22a5f55..6ecfc97a4273 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td +++ b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td @@ -25,7 +25,8 @@ def TSV110Model : SchedMachineModel { let CompleteModel = 1; list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); + PAUnsupported.F, + SMEUnsupported.F); } // Define each kind of processor resource and number available on the TSV110, diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 893269c1a7ef..677797a6797b 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -91,7 +91,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy( SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVolatile, + SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const { const AArch64Subtarget &STI = DAG.getMachineFunction().getSubtarget(); @@ -100,38 +100,6 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size, Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{}); } - - // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *V = dyn_cast(Src); - ConstantSDNode *SizeValue = dyn_cast(Size); - const char *bzeroName = - (V && V->isZero()) - ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) - : nullptr; - // For small size (< 256), it is not beneficial to use bzero - // instead of memset. - if (bzeroName && (!SizeValue || SizeValue->getZExtValue() > 256)) { - const AArch64TargetLowering &TLI = *STI.getTargetLowering(); - - EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); - Type *IntPtrTy = Type::getInt8PtrTy(*DAG.getContext()); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - Entry.Node = Size; - Args.push_back(Entry); - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl) - .setChain(Chain) - .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(bzeroName, IntPtr), - std::move(Args)) - .setDiscardResult(); - std::pair CallResult = TLI.LowerCallTo(CLI); - return CallResult.second; - } return SDValue(); } diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h index 47fe3bf7dcf5..73f93724d6fc 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -34,7 +34,7 @@ public: SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, - bool isVolatile, + bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override; SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp index 566c7a16db23..24816bc9e9bd 100644 --- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp +++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp @@ -42,20 +42,23 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/MemoryTaggingSupport.h" #include #include +#include #include using namespace llvm; @@ -63,12 +66,12 @@ using namespace llvm; #define DEBUG_TYPE "aarch64-stack-tagging" static cl::opt ClMergeInit( - "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore, + "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::desc("merge stack variable initializers with tagging when possible")); static cl::opt ClUseStackSafety("stack-tagging-use-stack-safety", cl::Hidden, - cl::init(true), cl::ZeroOrMore, + cl::init(true), cl::desc("Use Stack Safety analysis results")); static cl::opt ClScanLimit("stack-tagging-merge-init-scan-limit", @@ -78,6 +81,12 @@ static cl::opt ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit", cl::init(272), cl::Hidden); +static cl::opt ClMaxLifetimes( + "stack-tagging-max-lifetimes-for-alloca", cl::Hidden, cl::init(3), + cl::ReallyHidden, + cl::desc("How many lifetime ends to handle for a single alloca."), + cl::Optional); + static const Align kTagGranuleSize = Align(16); namespace { @@ -283,15 +292,6 @@ public: }; class AArch64StackTagging : public FunctionPass { - struct AllocaInfo { - AllocaInst *AI; - TrackingVH OldAI; // Track through RAUW to replace debug uses. - SmallVector LifetimeStart; - SmallVector LifetimeEnd; - SmallVector DbgVariableIntrinsics; - int Tag; // -1 for non-tagged allocations - }; - const bool MergeInit; const bool UseStackSafety; @@ -307,7 +307,6 @@ public: } bool isInterestingAlloca(const AllocaInst &AI); - void alignAndPadAlloca(AllocaInfo &Info); void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr, uint64_t Size); @@ -316,9 +315,9 @@ public: Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr, uint64_t Size, InitializerBuilder &IB); - Instruction * - insertBaseTaggedPointer(const MapVector &Allocas, - const DominatorTree *DT); + Instruction *insertBaseTaggedPointer( + const MapVector &Allocas, + const DominatorTree *DT); bool runOnFunction(Function &F) override; StringRef getPassName() const override { return "AArch64 Stack Tagging"; } @@ -419,7 +418,7 @@ bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) { bool IsInteresting = AI.getAllocatedType()->isSized() && AI.isStaticAlloca() && // alloca() may be called with 0 size, ignore it. - AI.getAllocationSizeInBits(*DL).getValue() > 0 && + *AI.getAllocationSizeInBits(*DL) > 0 && // inalloca allocas are not treated as static, and we don't want // dynamic alloca instrumentation for them as well. !AI.isUsedWithInAlloca() && @@ -460,15 +459,13 @@ void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore, } Instruction *AArch64StackTagging::insertBaseTaggedPointer( - const MapVector &Allocas, + const MapVector &AllocasToInstrument, const DominatorTree *DT) { BasicBlock *PrologueBB = nullptr; // Try sinking IRG as deep as possible to avoid hurting shrink wrap. - for (auto &I : Allocas) { - const AllocaInfo &Info = I.second; + for (auto &I : AllocasToInstrument) { + const memtag::AllocaInfo &Info = I.second; AllocaInst *AI = Info.AI; - if (Info.Tag < 0) - continue; if (!PrologueBB) { PrologueBB = AI->getParent(); continue; @@ -486,40 +483,6 @@ Instruction *AArch64StackTagging::insertBaseTaggedPointer( return Base; } -void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) { - const Align NewAlignment = - max(MaybeAlign(Info.AI->getAlign()), kTagGranuleSize); - Info.AI->setAlignment(NewAlignment); - - uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8; - uint64_t AlignedSize = alignTo(Size, kTagGranuleSize); - if (Size == AlignedSize) - return; - - // Add padding to the alloca. - Type *AllocatedType = - Info.AI->isArrayAllocation() - ? ArrayType::get( - Info.AI->getAllocatedType(), - cast(Info.AI->getArraySize())->getZExtValue()) - : Info.AI->getAllocatedType(); - Type *PaddingType = - ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size); - Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType); - auto *NewAI = new AllocaInst( - TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI); - NewAI->takeName(Info.AI); - NewAI->setAlignment(Info.AI->getAlign()); - NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca()); - NewAI->setSwiftError(Info.AI->isSwiftError()); - NewAI->copyMetadata(*Info.AI); - - auto *NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI); - Info.AI->replaceAllUsesWith(NewPtr); - Info.AI->eraseFromParent(); - Info.AI = NewAI; -} - // FIXME: check for MTE extension bool AArch64StackTagging::runOnFunction(Function &Fn) { if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag)) @@ -532,76 +495,21 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) { if (MergeInit) AA = &getAnalysis().getAAResults(); - MapVector Allocas; // need stable iteration order - SmallVector RetVec; - SmallVector UnrecognizedLifetimes; - - for (auto &BB : *F) { - for (Instruction &I : BB) { - if (auto *AI = dyn_cast(&I)) { - Allocas[AI].AI = AI; - Allocas[AI].OldAI = AI; - continue; - } - - if (auto *DVI = dyn_cast(&I)) { - for (Value *V : DVI->location_ops()) - if (auto *AI = dyn_cast_or_null(V)) - if (Allocas[AI].DbgVariableIntrinsics.empty() || - Allocas[AI].DbgVariableIntrinsics.back() != DVI) - Allocas[AI].DbgVariableIntrinsics.push_back(DVI); - continue; - } - - auto *II = dyn_cast(&I); - if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end)) { - AllocaInst *AI = findAllocaForValue(II->getArgOperand(1)); - if (!AI) { - UnrecognizedLifetimes.push_back(&I); - continue; - } - if (II->getIntrinsicID() == Intrinsic::lifetime_start) - Allocas[AI].LifetimeStart.push_back(II); - else - Allocas[AI].LifetimeEnd.push_back(II); - } - - if (isa(&I)) - RetVec.push_back(&I); - } - } + memtag::StackInfoBuilder SIB( + [this](const AllocaInst &AI) { return isInterestingAlloca(AI); }); + for (Instruction &I : instructions(F)) + SIB.visit(I); + memtag::StackInfo &SInfo = SIB.get(); - if (Allocas.empty()) + if (SInfo.AllocasToInstrument.empty()) return false; - int NextTag = 0; - int NumInterestingAllocas = 0; - for (auto &I : Allocas) { - AllocaInfo &Info = I.second; - assert(Info.AI); - - if (!isInterestingAlloca(*Info.AI)) { - Info.Tag = -1; - continue; - } - - alignAndPadAlloca(Info); - NumInterestingAllocas++; - Info.Tag = NextTag; - NextTag = (NextTag + 1) % 16; - } - - if (NumInterestingAllocas == 0) - return true; - std::unique_ptr DeleteDT; DominatorTree *DT = nullptr; if (auto *P = getAnalysisIfAvailable()) DT = &P->getDomTree(); - if (DT == nullptr && (NumInterestingAllocas > 1 || - !F->hasFnAttribute(Attribute::OptimizeNone))) { + if (DT == nullptr) { DeleteDT = std::make_unique(*F); DT = DeleteDT.get(); } @@ -611,38 +519,57 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) { if (auto *P = getAnalysisIfAvailable()) PDT = &P->getPostDomTree(); - if (PDT == nullptr && !F->hasFnAttribute(Attribute::OptimizeNone)) { + if (PDT == nullptr) { DeletePDT = std::make_unique(*F); PDT = DeletePDT.get(); } + std::unique_ptr DeleteLI; + LoopInfo *LI = nullptr; + if (auto *LIWP = getAnalysisIfAvailable()) { + LI = &LIWP->getLoopInfo(); + } else { + DeleteLI = std::make_unique(*DT); + LI = DeleteLI.get(); + } + SetTagFunc = Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag); - Instruction *Base = insertBaseTaggedPointer(Allocas, DT); + Instruction *Base = insertBaseTaggedPointer(SInfo.AllocasToInstrument, DT); - for (auto &I : Allocas) { - const AllocaInfo &Info = I.second; + int NextTag = 0; + for (auto &I : SInfo.AllocasToInstrument) { + memtag::AllocaInfo &Info = I.second; + assert(Info.AI && isInterestingAlloca(*Info.AI)); + TrackingVH OldAI = Info.AI; + memtag::alignAndPadAlloca(Info, kTagGranuleSize); AllocaInst *AI = Info.AI; - if (Info.Tag < 0) - continue; - + int Tag = NextTag; + NextTag = (NextTag + 1) % 16; // Replace alloca with tagp(alloca). IRBuilder<> IRB(Info.AI->getNextNode()); Function *TagP = Intrinsic::getDeclaration( F->getParent(), Intrinsic::aarch64_tagp, {Info.AI->getType()}); Instruction *TagPCall = IRB.CreateCall(TagP, {Constant::getNullValue(Info.AI->getType()), Base, - ConstantInt::get(IRB.getInt64Ty(), Info.Tag)}); + ConstantInt::get(IRB.getInt64Ty(), Tag)}); if (Info.AI->hasName()) TagPCall->setName(Info.AI->getName() + ".tag"); Info.AI->replaceAllUsesWith(TagPCall); TagPCall->setOperand(0, Info.AI); - if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 && - Info.LifetimeEnd.size() == 1) { + // Calls to functions that may return twice (e.g. setjmp) confuse the + // postdominator analysis, and will leave us to keep memory tagged after + // function return. Work around this by always untagging at every return + // statement if return_twice functions are called. + bool StandardLifetime = + SInfo.UnrecognizedLifetimes.empty() && + memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, DT, LI, + ClMaxLifetimes) && + !SInfo.CallsReturnTwice; + if (StandardLifetime) { IntrinsicInst *Start = Info.LifetimeStart[0]; - IntrinsicInst *End = Info.LifetimeEnd[0]; uint64_t Size = cast(Start->getArgOperand(0))->getZExtValue(); Size = alignTo(Size, kTagGranuleSize); @@ -650,14 +577,16 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) { auto TagEnd = [&](Instruction *Node) { untagAlloca(AI, Node, Size); }; if (!DT || !PDT || - !forAllReachableExits(*DT, *PDT, Start, Info.LifetimeEnd, RetVec, - TagEnd)) - End->eraseFromParent(); + !memtag::forAllReachableExits(*DT, *PDT, *LI, Start, Info.LifetimeEnd, + SInfo.RetVec, TagEnd)) { + for (auto *End : Info.LifetimeEnd) + End->eraseFromParent(); + } } else { - uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8; + uint64_t Size = *Info.AI->getAllocationSizeInBits(*DL) / 8; Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy()); tagAlloca(AI, &*IRB.GetInsertPoint(), Ptr, Size); - for (auto &RI : RetVec) { + for (auto &RI : SInfo.RetVec) { untagAlloca(AI, RI, Size); } // We may have inserted tag/untag outside of any lifetime interval. @@ -670,12 +599,12 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) { // Fixup debug intrinsics to point to the new alloca. for (auto DVI : Info.DbgVariableIntrinsics) - DVI->replaceVariableLocationOp(Info.OldAI, Info.AI); + DVI->replaceVariableLocationOp(OldAI, Info.AI); } // If we have instrumented at least one alloca, all unrecognized lifetime - // instrinsics have to go. - for (auto &I : UnrecognizedLifetimes) + // intrinsics have to go. + for (auto &I : SInfo.UnrecognizedLifetimes) I->eraseFromParent(); return true; diff --git a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp index cae6d65bed2d..7e91dc1b6385 100644 --- a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp +++ b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp @@ -50,7 +50,6 @@ cl::opt ClUncheckedLdSt( static cl::opt ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true), - cl::ZeroOrMore, cl::desc("Apply first slot optimization for stack tagging " "(eliminate ADDG Rt, Rn, 0, 0).")); diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 8a7e20237271..15005304383d 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -21,6 +21,7 @@ #include "GISel/AArch64RegisterBankInfo.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/AArch64TargetParser.h" @@ -51,6 +52,16 @@ static cl::opt static cl::opt UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen.")); +static cl::opt OverrideVectorInsertExtractBaseCost( + "aarch64-insert-extract-base-cost", + cl::desc("Base cost of vector insert/extract element"), cl::Hidden); + +unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const { + if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0) + return OverrideVectorInsertExtractBaseCost; + return VectorInsertExtractBaseCost; +} + AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies( StringRef FS, StringRef CPUString, StringRef TuneCPUString) { // Determine default and user-specified characteristics @@ -78,14 +89,17 @@ void AArch64Subtarget::initializeProperties() { CacheLineSize = 64; break; case CortexA35: - break; case CortexA53: case CortexA55: PrefFunctionLogAlignment = 4; + PrefLoopLogAlignment = 4; + MaxBytesForLoopAlignment = 8; break; case CortexA57: MaxInterleaveFactor = 4; PrefFunctionLogAlignment = 4; + PrefLoopLogAlignment = 4; + MaxBytesForLoopAlignment = 8; break; case CortexA65: PrefFunctionLogAlignment = 3; @@ -93,6 +107,10 @@ void AArch64Subtarget::initializeProperties() { case CortexA72: case CortexA73: case CortexA75: + PrefFunctionLogAlignment = 4; + PrefLoopLogAlignment = 4; + MaxBytesForLoopAlignment = 8; + break; case CortexA76: case CortexA77: case CortexA78: @@ -101,12 +119,21 @@ void AArch64Subtarget::initializeProperties() { case CortexX1: case CortexX1C: PrefFunctionLogAlignment = 4; + PrefLoopLogAlignment = 5; + MaxBytesForLoopAlignment = 16; break; case CortexA510: + PrefFunctionLogAlignment = 4; + VScaleForTuning = 1; + PrefLoopLogAlignment = 4; + MaxBytesForLoopAlignment = 8; + break; case CortexA710: case CortexX2: PrefFunctionLogAlignment = 4; VScaleForTuning = 1; + PrefLoopLogAlignment = 5; + MaxBytesForLoopAlignment = 16; break; case A64FX: CacheLineSize = 256; @@ -221,6 +248,12 @@ void AArch64Subtarget::initializeProperties() { // FIXME: remove this to enable 64-bit SLP if performance looks good. MinVectorRegisterBitWidth = 128; break; + case Ampere1: + CacheLineSize = 64; + PrefFunctionLogAlignment = 6; + PrefLoopLogAlignment = 6; + MaxInterleaveFactor = 4; + break; } } @@ -352,6 +385,8 @@ bool AArch64Subtarget::supportsAddressTopByteIgnored() const { if (!UseAddressTopByteIgnored) return false; + if (TargetTriple.isDriverKit()) + return true; if (TargetTriple.isiOS()) { return TargetTriple.getiOSVersion() >= VersionTuple(8); } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 7b2bbad30f85..c92e3e44de31 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -22,7 +22,7 @@ #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include @@ -40,6 +40,7 @@ public: enum ARMProcFamilyEnum : uint8_t { Others, A64FX, + Ampere1, AppleA7, AppleA10, AppleA11, @@ -87,191 +88,14 @@ protected: /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. ARMProcFamilyEnum ARMProcFamily = Others; - bool HasV8_0aOps = false; - bool HasV8_1aOps = false; - bool HasV8_2aOps = false; - bool HasV8_3aOps = false; - bool HasV8_4aOps = false; - bool HasV8_5aOps = false; - bool HasV8_6aOps = false; - bool HasV8_7aOps = false; - bool HasV8_8aOps = false; - bool HasV9_0aOps = false; - bool HasV9_1aOps = false; - bool HasV9_2aOps = false; - bool HasV9_3aOps = false; - bool HasV8_0rOps = false; - - bool HasCONTEXTIDREL2 = false; - bool HasEL2VMSA = false; - bool HasEL3 = false; - bool HasFPARMv8 = false; - bool HasNEON = false; - bool HasCrypto = false; - bool HasDotProd = false; - bool HasCRC = false; - bool HasLSE = false; - bool HasLSE2 = false; - bool HasRAS = false; - bool HasRDM = false; - bool HasPerfMon = false; - bool HasFullFP16 = false; - bool HasFP16FML = false; - bool HasSPE = false; - - bool FixCortexA53_835769 = false; - - // ARMv8.1 extensions - bool HasVH = false; - bool HasPAN = false; - bool HasLOR = false; - - // ARMv8.2 extensions - bool HasPsUAO = false; - bool HasPAN_RWV = false; - bool HasCCPP = false; - - // SVE extensions - bool HasSVE = false; - bool UseExperimentalZeroingPseudos = false; - bool UseScalarIncVL = false; - - // Armv8.2 Crypto extensions - bool HasSM4 = false; - bool HasSHA3 = false; - bool HasSHA2 = false; - bool HasAES = false; - - // ARMv8.3 extensions - bool HasPAuth = false; - bool HasJS = false; - bool HasCCIDX = false; - bool HasComplxNum = false; - - // ARMv8.4 extensions - bool HasNV = false; - bool HasMPAM = false; - bool HasDIT = false; - bool HasTRACEV8_4 = false; - bool HasAM = false; - bool HasSEL2 = false; - bool HasTLB_RMI = false; - bool HasFlagM = false; - bool HasRCPC_IMMO = false; - - bool HasLSLFast = false; - bool HasRCPC = false; - bool HasAggressiveFMA = false; - - // Armv8.5-A Extensions - bool HasAlternativeNZCV = false; - bool HasFRInt3264 = false; - bool HasSpecRestrict = false; - bool HasSSBS = false; - bool HasSB = false; - bool HasPredRes = false; - bool HasCCDP = false; - bool HasBTI = false; - bool HasRandGen = false; - bool HasMTE = false; - bool HasTME = false; - - // Armv8.6-A Extensions - bool HasBF16 = false; - bool HasMatMulInt8 = false; - bool HasMatMulFP32 = false; - bool HasMatMulFP64 = false; - bool HasAMVS = false; - bool HasFineGrainedTraps = false; - bool HasEnhancedCounterVirtualization = false; - - // Armv8.7-A Extensions - bool HasXS = false; - bool HasWFxT = false; - bool HasHCX = false; - bool HasLS64 = false; - - // Armv8.8-A Extensions - bool HasHBC = false; - bool HasMOPS = false; - - // Arm SVE2 extensions - bool HasSVE2 = false; - bool HasSVE2AES = false; - bool HasSVE2SM4 = false; - bool HasSVE2SHA3 = false; - bool HasSVE2BitPerm = false; - - // Armv9-A Extensions - bool HasRME = false; - - // Arm Scalable Matrix Extension (SME) - bool HasSME = false; - bool HasSMEF64 = false; - bool HasSMEI64 = false; - bool HasStreamingSVE = false; - - // AppleA7 system register. - bool HasAppleA7SysReg = false; - - // Future architecture extensions. - bool HasETE = false; - bool HasTRBE = false; - bool HasBRBE = false; - bool HasSPE_EEF = false; - - // HasZeroCycleRegMove - Has zero-cycle register mov instructions. - bool HasZeroCycleRegMove = false; - - // HasZeroCycleZeroing - Has zero-cycle zeroing instructions. - bool HasZeroCycleZeroing = false; - bool HasZeroCycleZeroingGP = false; - bool HasZeroCycleZeroingFPWorkaround = false; - - // It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0". - // as movi is more efficient across all cores. Newer cores can eliminate - // fmovs early and there is no difference with movi, but this not true for - // all implementations. - bool HasZeroCycleZeroingFP = true; - - // StrictAlign - Disallow unaligned memory accesses. - bool StrictAlign = false; - - // NegativeImmediates - transform instructions with negative immediates - bool NegativeImmediates = true; - // Enable 64-bit vectorization in SLP. unsigned MinVectorRegisterBitWidth = 64; - bool OutlineAtomics = false; - bool PredictableSelectIsExpensive = false; - bool BalanceFPOps = false; - bool CustomAsCheapAsMove = false; - bool ExynosAsCheapAsMove = false; - bool UsePostRAScheduler = false; - bool Misaligned128StoreIsSlow = false; - bool Paired128IsSlow = false; - bool STRQroIsSlow = false; - bool UseAlternateSExtLoadCVTF32Pattern = false; - bool HasArithmeticBccFusion = false; - bool HasArithmeticCbzFusion = false; - bool HasCmpBccFusion = false; - bool HasFuseAddress = false; - bool HasFuseAES = false; - bool HasFuseArithmeticLogic = false; - bool HasFuseCCSelect = false; - bool HasFuseCryptoEOR = false; - bool HasFuseLiterals = false; - bool DisableLatencySchedHeuristic = false; - bool UseRSqrt = false; - bool Force32BitJumpTables = false; - bool UseEL1ForTP = false; - bool UseEL2ForTP = false; - bool UseEL3ForTP = false; - bool AllowTaggedGlobals = false; - bool HardenSlsRetBr = false; - bool HardenSlsBlr = false; - bool HardenSlsNoComdat = false; +// Bool members corresponding to the SubtargetFeatures defined in tablegen +#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ + bool ATTRIBUTE = DEFAULT; +#include "AArch64GenSubtargetInfo.inc" + uint8_t MaxInterleaveFactor = 2; uint8_t VectorInsertExtractBaseCost = 3; uint16_t CacheLineSize = 0; @@ -282,7 +106,6 @@ protected: unsigned PrefLoopLogAlignment = 0; unsigned MaxBytesForLoopAlignment = 0; unsigned MaxJumpTableSize = 0; - unsigned WideningBaseCost = 0; // ReserveXRegister[i] - X#i is not available as a general purpose register. BitVector ReserveXRegister; @@ -331,6 +154,11 @@ public: unsigned MinSVEVectorSizeInBitsOverride = 0, unsigned MaxSVEVectorSizeInBitsOverride = 0); +// Getters for SubtargetFeatures defined in tablegen +#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ + bool GETTER() const { return ATTRIBUTE; } +#include "AArch64GenSubtargetInfo.inc" + const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } @@ -351,9 +179,7 @@ public: const RegisterBankInfo *getRegBankInfo() const override; const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override { return true; } - bool enablePostRAScheduler() const override { - return UsePostRAScheduler; - } + bool enablePostRAScheduler() const override { return usePostRAScheduler(); } /// Returns ARM processor family. /// Avoid this function! CPU specifics should be kept local to this class @@ -363,30 +189,6 @@ public: return ARMProcFamily; } - bool hasV8_0aOps() const { return HasV8_0aOps; } - bool hasV8_1aOps() const { return HasV8_1aOps; } - bool hasV8_2aOps() const { return HasV8_2aOps; } - bool hasV8_3aOps() const { return HasV8_3aOps; } - bool hasV8_4aOps() const { return HasV8_4aOps; } - bool hasV8_5aOps() const { return HasV8_5aOps; } - bool hasV9_0aOps() const { return HasV9_0aOps; } - bool hasV9_1aOps() const { return HasV9_1aOps; } - bool hasV9_2aOps() const { return HasV9_2aOps; } - bool hasV9_3aOps() const { return HasV9_3aOps; } - bool hasV8_0rOps() const { return HasV8_0rOps; } - - bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } - - bool hasZeroCycleZeroingGP() const { return HasZeroCycleZeroingGP; } - - bool hasZeroCycleZeroingFP() const { return HasZeroCycleZeroingFP; } - - bool hasZeroCycleZeroingFPWorkaround() const { - return HasZeroCycleZeroingFPWorkaround; - } - - bool requiresStrictAlign() const { return StrictAlign; } - bool isXRaySupported() const override { return true; } unsigned getMinVectorRegisterBitWidth() const { @@ -399,63 +201,16 @@ public: return CustomCallSavedXRegs[i]; } bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); } - bool hasFPARMv8() const { return HasFPARMv8; } - bool hasNEON() const { return HasNEON; } - bool hasCrypto() const { return HasCrypto; } - bool hasDotProd() const { return HasDotProd; } - bool hasCRC() const { return HasCRC; } - bool hasLSE() const { return HasLSE; } - bool hasLSE2() const { return HasLSE2; } - bool hasRAS() const { return HasRAS; } - bool hasRDM() const { return HasRDM; } - bool hasSM4() const { return HasSM4; } - bool hasSHA3() const { return HasSHA3; } - bool hasSHA2() const { return HasSHA2; } - bool hasAES() const { return HasAES; } - bool hasCONTEXTIDREL2() const { return HasCONTEXTIDREL2; } - bool balanceFPOps() const { return BalanceFPOps; } - bool predictableSelectIsExpensive() const { - return PredictableSelectIsExpensive; - } - bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; } - bool hasExynosCheapAsMoveHandling() const { return ExynosAsCheapAsMove; } - bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; } - bool isPaired128Slow() const { return Paired128IsSlow; } - bool isSTRQroSlow() const { return STRQroIsSlow; } - bool useAlternateSExtLoadCVTF32Pattern() const { - return UseAlternateSExtLoadCVTF32Pattern; - } - bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; } - bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } - bool hasCmpBccFusion() const { return HasCmpBccFusion; } - bool hasFuseAddress() const { return HasFuseAddress; } - bool hasFuseAES() const { return HasFuseAES; } - bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; } - bool hasFuseCCSelect() const { return HasFuseCCSelect; } - bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; } - bool hasFuseLiterals() const { return HasFuseLiterals; } /// Return true if the CPU supports any kind of instruction fusion. bool hasFusion() const { return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || - hasFuseAES() || hasFuseArithmeticLogic() || - hasFuseCCSelect() || hasFuseLiterals(); + hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() || + hasFuseAdrpAdd() || hasFuseLiterals(); } - bool hardenSlsRetBr() const { return HardenSlsRetBr; } - bool hardenSlsBlr() const { return HardenSlsBlr; } - bool hardenSlsNoComdat() const { return HardenSlsNoComdat; } - - bool useEL1ForTP() const { return UseEL1ForTP; } - bool useEL2ForTP() const { return UseEL2ForTP; } - bool useEL3ForTP() const { return UseEL3ForTP; } - - bool useRSqrt() const { return UseRSqrt; } - bool force32BitJumpTables() const { return Force32BitJumpTables; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } - unsigned getVectorInsertExtractBaseCost() const { - return VectorInsertExtractBaseCost; - } + unsigned getVectorInsertExtractBaseCost() const; unsigned getCacheLineSize() const override { return CacheLineSize; } unsigned getPrefetchDistance() const override { return PrefetchDistance; } unsigned getMinPrefetchStride(unsigned NumMemAccesses, @@ -478,60 +233,10 @@ public: unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } - unsigned getWideningBaseCost() const { return WideningBaseCost; } - - bool useExperimentalZeroingPseudos() const { - return UseExperimentalZeroingPseudos; - } - - bool useScalarIncVL() const { return UseScalarIncVL; } - /// CPU has TBI (top byte of addresses is ignored during HW address /// translation) and OS enables it. bool supportsAddressTopByteIgnored() const; - bool hasPerfMon() const { return HasPerfMon; } - bool hasFullFP16() const { return HasFullFP16; } - bool hasFP16FML() const { return HasFP16FML; } - bool hasSPE() const { return HasSPE; } - bool hasLSLFast() const { return HasLSLFast; } - bool hasSVE() const { return HasSVE; } - bool hasSVE2() const { return HasSVE2; } - bool hasRCPC() const { return HasRCPC; } - bool hasAggressiveFMA() const { return HasAggressiveFMA; } - bool hasAlternativeNZCV() const { return HasAlternativeNZCV; } - bool hasFRInt3264() const { return HasFRInt3264; } - bool hasSpecRestrict() const { return HasSpecRestrict; } - bool hasSSBS() const { return HasSSBS; } - bool hasSB() const { return HasSB; } - bool hasPredRes() const { return HasPredRes; } - bool hasCCDP() const { return HasCCDP; } - bool hasBTI() const { return HasBTI; } - bool hasRandGen() const { return HasRandGen; } - bool hasMTE() const { return HasMTE; } - bool hasTME() const { return HasTME; } - // Arm SVE2 extensions - bool hasSVE2AES() const { return HasSVE2AES; } - bool hasSVE2SM4() const { return HasSVE2SM4; } - bool hasSVE2SHA3() const { return HasSVE2SHA3; } - bool hasSVE2BitPerm() const { return HasSVE2BitPerm; } - bool hasMatMulInt8() const { return HasMatMulInt8; } - bool hasMatMulFP32() const { return HasMatMulFP32; } - bool hasMatMulFP64() const { return HasMatMulFP64; } - - // Armv8.6-A Extensions - bool hasBF16() const { return HasBF16; } - bool hasFineGrainedTraps() const { return HasFineGrainedTraps; } - bool hasEnhancedCounterVirtualization() const { - return HasEnhancedCounterVirtualization; - } - - // Arm Scalable Matrix Extension (SME) - bool hasSME() const { return HasSME; } - bool hasSMEF64() const { return HasSMEF64; } - bool hasSMEI64() const { return HasSMEI64; } - bool hasStreamingSVE() const { return HasStreamingSVE; } - bool isLittleEndian() const { return IsLittle; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } @@ -552,42 +257,6 @@ public: bool useAA() const override; - bool outlineAtomics() const { return OutlineAtomics; } - - bool hasVH() const { return HasVH; } - bool hasPAN() const { return HasPAN; } - bool hasLOR() const { return HasLOR; } - - bool hasPsUAO() const { return HasPsUAO; } - bool hasPAN_RWV() const { return HasPAN_RWV; } - bool hasCCPP() const { return HasCCPP; } - - bool hasPAuth() const { return HasPAuth; } - bool hasJS() const { return HasJS; } - bool hasCCIDX() const { return HasCCIDX; } - bool hasComplxNum() const { return HasComplxNum; } - - bool hasNV() const { return HasNV; } - bool hasMPAM() const { return HasMPAM; } - bool hasDIT() const { return HasDIT; } - bool hasTRACEV8_4() const { return HasTRACEV8_4; } - bool hasAM() const { return HasAM; } - bool hasAMVS() const { return HasAMVS; } - bool hasXS() const { return HasXS; } - bool hasWFxT() const { return HasWFxT; } - bool hasHCX() const { return HasHCX; } - bool hasLS64() const { return HasLS64; } - bool hasSEL2() const { return HasSEL2; } - bool hasTLB_RMI() const { return HasTLB_RMI; } - bool hasFlagM() const { return HasFlagM; } - bool hasRCPC_IMMO() const { return HasRCPC_IMMO; } - bool hasEL2VMSA() const { return HasEL2VMSA; } - bool hasEL3() const { return HasEL3; } - bool hasHBC() const { return HasHBC; } - bool hasMOPS() const { return HasMOPS; } - - bool fixCortexA53_835769() const { return FixCortexA53_835769; } - bool addrSinkUsingGEPs() const override { // Keeping GEPs inbounds is important for exploiting AArch64 // addressing-modes in ILP32 mode. @@ -623,8 +292,6 @@ public: bool enableEarlyIfConversion() const override; - bool enableAdvancedRASplitCost() const override { return false; } - std::unique_ptr getCustomPBQPConstraints() const override; bool isCallingConvWin64(CallingConv::ID CC) const { diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td index cce5813fe6e9..f3788175c48d 100644 --- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td +++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td @@ -18,23 +18,23 @@ include "llvm/TableGen/SearchableTable.td" //===----------------------------------------------------------------------===// def HasCCPP : Predicate<"Subtarget->hasCCPP()">, - AssemblerPredicate<(all_of FeatureCCPP), "ccpp">; + AssemblerPredicateWithAll<(all_of FeatureCCPP), "ccpp">; def HasPAN : Predicate<"Subtarget->hasPAN()">, - AssemblerPredicate<(all_of FeaturePAN), + AssemblerPredicateWithAll<(all_of FeaturePAN), "ARM v8.1 Privileged Access-Never extension">; def HasPsUAO : Predicate<"Subtarget->hasPsUAO()">, - AssemblerPredicate<(all_of FeaturePsUAO), + AssemblerPredicateWithAll<(all_of FeaturePsUAO), "ARM v8.2 UAO PState extension (psuao)">; def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">, - AssemblerPredicate<(all_of FeaturePAN_RWV), + AssemblerPredicateWithAll<(all_of FeaturePAN_RWV), "ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">; def HasCONTEXTIDREL2 : Predicate<"Subtarget->hasCONTEXTIDREL2()">, - AssemblerPredicate<(all_of FeatureCONTEXTIDREL2), + AssemblerPredicateWithAll<(all_of FeatureCONTEXTIDREL2), "Target contains CONTEXTIDR_EL2 RW operand">; //===----------------------------------------------------------------------===// @@ -631,6 +631,7 @@ def : ROSysReg<"OSLSR_EL1", 0b10, 0b000, 0b0001, 0b0001, 0b100>; def : ROSysReg<"DBGAUTHSTATUS_EL1", 0b10, 0b000, 0b0111, 0b1110, 0b110>; def : ROSysReg<"PMCEID0_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b110>; def : ROSysReg<"PMCEID1_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b111>; +def : ROSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>; def : ROSysReg<"MIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b000>; def : ROSysReg<"CCSIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b000>; @@ -977,7 +978,6 @@ def : RWSysReg<"PMUSERENR_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b000>; def : RWSysReg<"PMINTENSET_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b001>; def : RWSysReg<"PMINTENCLR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b010>; def : RWSysReg<"PMOVSSET_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b011>; -def : RWSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>; def : RWSysReg<"MAIR_EL1", 0b11, 0b000, 0b1010, 0b0010, 0b000>; def : RWSysReg<"MAIR_EL2", 0b11, 0b100, 0b1010, 0b0010, 0b000>; def : RWSysReg<"MAIR_EL3", 0b11, 0b110, 0b1010, 0b0010, 0b000>; diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 4af28fc070dd..3f9795f5198b 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -12,6 +12,7 @@ #include "AArch64TargetMachine.h" #include "AArch64.h" #include "AArch64MachineFunctionInfo.h" +#include "AArch64MachineScheduler.h" #include "AArch64MacroFusion.h" #include "AArch64Subtarget.h" #include "AArch64TargetObjectFile.h" @@ -21,7 +22,9 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/CFIFixup.h" #include "llvm/CodeGen/CSEConfigBase.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" @@ -31,6 +34,7 @@ #include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -59,6 +63,11 @@ static cl::opt cl::desc("Enable the conditional branch tuning pass"), cl::init(true), cl::Hidden); +static cl::opt EnableAArch64CopyPropagation( + "aarch64-enable-copy-propagation", + cl::desc("Enable the copy propagation with AArch64 copy instr"), + cl::init(true), cl::Hidden); + static cl::opt EnableMCR("aarch64-enable-mcr", cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); @@ -265,7 +274,7 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT, // On ELF platforms the default static relocation model has a smart enough // linker to cope with referencing external symbols defined in a shared // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. - if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC) + if (!RM || *RM == Reloc::DynamicNoPIC) return Reloc::Static; return *RM; } @@ -354,6 +363,10 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, // AArch64 supports the debug entry values. setSupportsDebugEntryValues(true); + + // AArch64 supports fixing up the DWARF unwind information. + if (!getMCAsmInfo()->usesWindowsCFI()) + setCFIFixup(true); } AArch64TargetMachine::~AArch64TargetMachine() = default; @@ -379,7 +392,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const { if (VScaleRangeAttr.isValid()) { Optional VScaleMax = VScaleRangeAttr.getVScaleRangeMax(); MinSVEVectorSize = VScaleRangeAttr.getVScaleRangeMin() * 128; - MaxSVEVectorSize = VScaleMax ? VScaleMax.getValue() * 128 : 0; + MaxSVEVectorSize = VScaleMax ? *VScaleMax * 128 : 0; } else { MinSVEVectorSize = SVEVectorBitsMinOpt; MaxSVEVectorSize = SVEVectorBitsMaxOpt; @@ -468,15 +481,17 @@ public: ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { const AArch64Subtarget &ST = C->MF->getSubtarget(); + ScheduleDAGMI *DAG = + new ScheduleDAGMI(C, std::make_unique(C), + /* RemoveKillFlags=*/true); if (ST.hasFusion()) { // Run the Macro Fusion after RA again since literals are expanded from // pseudos then (v. addPreSched2()). - ScheduleDAGMI *DAG = createGenericSchedPostRA(C); DAG->addMutation(createAArch64MacroFusionDAGMutation()); return DAG; } - return nullptr; + return DAG; } void addIRPasses() override; @@ -504,7 +519,7 @@ public: } // end anonymous namespace TargetTransformInfo -AArch64TargetMachine::getTargetTransformInfo(const Function &F) { +AArch64TargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(AArch64TTIImpl(this, F)); } @@ -531,6 +546,7 @@ void AArch64PassConfig::addIRPasses() { if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) addPass(createCFGSimplificationPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) + .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) @@ -574,6 +590,9 @@ void AArch64PassConfig::addIRPasses() { // Add Control Flow Guard checks. if (TM->getTargetTriple().isOSWindows()) addPass(createCFGuardCheckPass()); + + if (TM->Options.JMCInstrument) + addPass(createJMCInstrumenterPass()); } // Pass Pipeline Configuration @@ -759,6 +778,10 @@ void AArch64PassConfig::addPreEmitPass() { if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt) addPass(createAArch64LoadStoreOptimizationPass()); + if (TM->getOptLevel() >= CodeGenOpt::Aggressive && + EnableAArch64CopyPropagation) + addPass(createMachineCopyPropagationPass(true)); + addPass(createAArch64A53Fix835769()); if (EnableBranchTargets) @@ -804,8 +827,7 @@ AArch64TargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const { bool AArch64TargetMachine::parseMachineFunctionInfo( const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const { - const auto &YamlMFI = - reinterpret_cast(MFI); + const auto &YamlMFI = static_cast(MFI); MachineFunction &MF = PFS.MF; MF.getInfo()->initializeBaseYamlFields(YamlMFI); return false; diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h index 7d314bce99b1..beb109502ff9 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h @@ -41,7 +41,7 @@ public: // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; TargetLoweringObjectFile* getObjFileLowering() const override { return TLOF.get(); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b2ffdf949d8b..41c7a8c5042f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -8,6 +8,7 @@ #include "AArch64TargetTransformInfo.h" #include "AArch64ExpandImm.h" +#include "AArch64PerfectShuffle.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopInfo.h" @@ -15,8 +16,8 @@ #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/CostTable.h" #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" @@ -50,6 +51,12 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, return (CallerBits & CalleeBits) == CalleeBits; } +bool AArch64TTIImpl::shouldMaximizeVectorBandwidth( + TargetTransformInfo::RegisterKind K) const { + assert(K != TargetTransformInfo::RGK_Scalar); + return K == TargetTransformInfo::RGK_FixedWidthVector; +} + /// Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. @@ -370,6 +377,49 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return Entry->Cost; break; } + case Intrinsic::fptosi_sat: + case Intrinsic::fptoui_sat: { + if (ICA.getArgTypes().empty()) + break; + bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat; + auto LT = TLI->getTypeLegalizationCost(DL, ICA.getArgTypes()[0]); + EVT MTy = TLI->getValueType(DL, RetTy); + // Check for the legal types, which are where the size of the input and the + // output are the same, or we are using cvt f64->i32 or f32->i64. + if ((LT.second == MVT::f32 || LT.second == MVT::f64 || + LT.second == MVT::v2f32 || LT.second == MVT::v4f32 || + LT.second == MVT::v2f64) && + (LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits() || + (LT.second == MVT::f64 && MTy == MVT::i32) || + (LT.second == MVT::f32 && MTy == MVT::i64))) + return LT.first; + // Similarly for fp16 sizes + if (ST->hasFullFP16() && + ((LT.second == MVT::f16 && MTy == MVT::i32) || + ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) && + (LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits())))) + return LT.first; + + // Otherwise we use a legal convert followed by a min+max + if ((LT.second.getScalarType() == MVT::f32 || + LT.second.getScalarType() == MVT::f64 || + (ST->hasFullFP16() && LT.second.getScalarType() == MVT::f16)) && + LT.second.getScalarSizeInBits() >= MTy.getScalarSizeInBits()) { + Type *LegalTy = + Type::getIntNTy(RetTy->getContext(), LT.second.getScalarSizeInBits()); + if (LT.second.isVector()) + LegalTy = VectorType::get(LegalTy, LT.second.getVectorElementCount()); + InstructionCost Cost = 1; + IntrinsicCostAttributes Attrs1(IsSigned ? Intrinsic::smin : Intrinsic::umin, + LegalTy, {LegalTy, LegalTy}); + Cost += getIntrinsicInstrCost(Attrs1, CostKind); + IntrinsicCostAttributes Attrs2(IsSigned ? Intrinsic::smax : Intrinsic::umax, + LegalTy, {LegalTy, LegalTy}); + Cost += getIntrinsicInstrCost(Attrs2, CostKind); + return LT.first * Cost; + } + break; + } default: break; } @@ -525,6 +575,14 @@ static Optional instCombineConvertFromSVBool(InstCombiner &IC, return IC.replaceInstUsesWith(II, EarliestReplacement); } +static Optional instCombineSVESel(InstCombiner &IC, + IntrinsicInst &II) { + IRBuilder<> Builder(&II); + auto Select = Builder.CreateSelect(II.getOperand(0), II.getOperand(1), + II.getOperand(2)); + return IC.replaceInstUsesWith(II, Select); +} + static Optional instCombineSVEDup(InstCombiner &IC, IntrinsicInst &II) { IntrinsicInst *Pg = dyn_cast(II.getArgOperand(1)); @@ -594,8 +652,7 @@ static Optional instCombineSVECmpNE(InstCombiner &IC, return None; auto *VecIns = dyn_cast(DupQLane->getArgOperand(0)); - if (!VecIns || - VecIns->getIntrinsicID() != Intrinsic::experimental_vector_insert) + if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert) return None; // Where the vector insert is a fixed constant vector insert into undef at @@ -862,12 +919,14 @@ instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { if (isAllActivePredicate(Pred)) { LoadInst *Load = Builder.CreateLoad(VecTy, VecPtr); + Load->copyMetadata(II); return IC.replaceInstUsesWith(II, Load); } CallInst *MaskedLoad = Builder.CreateMaskedLoad(VecTy, VecPtr, PtrOp->getPointerAlignment(DL), Pred, ConstantAggregateZero::get(VecTy)); + MaskedLoad->copyMetadata(II); return IC.replaceInstUsesWith(II, MaskedLoad); } @@ -883,12 +942,14 @@ instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { Builder.CreateBitCast(PtrOp, VecOp->getType()->getPointerTo()); if (isAllActivePredicate(Pred)) { - Builder.CreateStore(VecOp, VecPtr); + StoreInst *Store = Builder.CreateStore(VecOp, VecPtr); + Store->copyMetadata(II); return IC.eraseInstFromFunction(II); } - Builder.CreateMaskedStore(VecOp, VecPtr, PtrOp->getPointerAlignment(DL), - Pred); + CallInst *MaskedStore = Builder.CreateMaskedStore( + VecOp, VecPtr, PtrOp->getPointerAlignment(DL), Pred); + MaskedStore->copyMetadata(II); return IC.eraseInstFromFunction(II); } @@ -1069,7 +1130,6 @@ static Optional instCombineLD1GatherIndex(InstCombiner &IC, Value *BasePtr = II.getOperand(1); Value *Index = II.getOperand(2); Type *Ty = II.getType(); - Type *BasePtrTy = BasePtr->getType(); Value *PassThru = ConstantAggregateZero::get(Ty); // Contiguous gather => masked load. @@ -1085,8 +1145,8 @@ static Optional instCombineLD1GatherIndex(InstCombiner &IC, BasePtr->getPointerAlignment(II.getModule()->getDataLayout()); Type *VecPtrTy = PointerType::getUnqual(Ty); - Value *Ptr = Builder.CreateGEP(BasePtrTy->getPointerElementType(), BasePtr, - IndexBase); + Value *Ptr = Builder.CreateGEP( + cast(Ty)->getElementType(), BasePtr, IndexBase); Ptr = Builder.CreateBitCast(Ptr, VecPtrTy); CallInst *MaskedLoad = Builder.CreateMaskedLoad(Ty, Ptr, Alignment, Mask, PassThru); @@ -1104,10 +1164,9 @@ static Optional instCombineST1ScatterIndex(InstCombiner &IC, Value *BasePtr = II.getOperand(2); Value *Index = II.getOperand(3); Type *Ty = Val->getType(); - Type *BasePtrTy = BasePtr->getType(); // Contiguous scatter => masked store. - // (sve.ld1.scatter.index Value Mask BasePtr (sve.index IndexBase 1)) + // (sve.st1.scatter.index Value Mask BasePtr (sve.index IndexBase 1)) // => (masked.store Value (gep BasePtr IndexBase) Align Mask) Value *IndexBase; if (match(Index, m_Intrinsic( @@ -1118,8 +1177,8 @@ static Optional instCombineST1ScatterIndex(InstCombiner &IC, Align Alignment = BasePtr->getPointerAlignment(II.getModule()->getDataLayout()); - Value *Ptr = Builder.CreateGEP(BasePtrTy->getPointerElementType(), BasePtr, - IndexBase); + Value *Ptr = Builder.CreateGEP( + cast(Ty)->getElementType(), BasePtr, IndexBase); Type *VecPtrTy = PointerType::getUnqual(Ty); Ptr = Builder.CreateBitCast(Ptr, VecPtrTy); @@ -1165,6 +1224,52 @@ static Optional instCombineSVESDIV(InstCombiner &IC, return None; } +static Optional instCombineMaxMinNM(InstCombiner &IC, + IntrinsicInst &II) { + Value *A = II.getArgOperand(0); + Value *B = II.getArgOperand(1); + if (A == B) + return IC.replaceInstUsesWith(II, A); + + return None; +} + +static Optional instCombineSVESrshl(InstCombiner &IC, + IntrinsicInst &II) { + IRBuilder<> Builder(&II); + Value *Pred = II.getOperand(0); + Value *Vec = II.getOperand(1); + Value *Shift = II.getOperand(2); + + // Convert SRSHL into the simpler LSL intrinsic when fed by an ABS intrinsic. + Value *AbsPred, *MergedValue; + if (!match(Vec, m_Intrinsic( + m_Value(MergedValue), m_Value(AbsPred), m_Value())) && + !match(Vec, m_Intrinsic( + m_Value(MergedValue), m_Value(AbsPred), m_Value()))) + + return None; + + // Transform is valid if any of the following are true: + // * The ABS merge value is an undef or non-negative + // * The ABS predicate is all active + // * The ABS predicate and the SRSHL predicates are the same + if (!isa(MergedValue) && + !match(MergedValue, m_NonNegative()) && + AbsPred != Pred && !isAllActivePredicate(AbsPred)) + return None; + + // Only valid when the shift amount is non-negative, otherwise the rounding + // behaviour of SRSHL cannot be ignored. + if (!match(Shift, m_NonNegative())) + return None; + + auto LSL = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_lsl, {II.getType()}, + {Pred, Vec, Shift}); + + return IC.replaceInstUsesWith(II, LSL); +} + Optional AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { @@ -1172,6 +1277,9 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, switch (IID) { default: break; + case Intrinsic::aarch64_neon_fmaxnm: + case Intrinsic::aarch64_neon_fminnm: + return instCombineMaxMinNM(IC, II); case Intrinsic::aarch64_sve_convert_from_svbool: return instCombineConvertFromSVBool(IC, II); case Intrinsic::aarch64_sve_dup: @@ -1227,6 +1335,10 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, return instCombineSVEST1(IC, II, DL); case Intrinsic::aarch64_sve_sdiv: return instCombineSVESDIV(IC, II); + case Intrinsic::aarch64_sve_sel: + return instCombineSVESel(IC, II); + case Intrinsic::aarch64_sve_srshl: + return instCombineSVESrshl(IC, II); } return None; @@ -1262,7 +1374,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode, ArrayRef Args) { // A helper that returns a vector type from the given type. The number of - // elements in type Ty determine the vector width. + // elements in type Ty determines the vector width. auto toVectorTy = [&](Type *ArgTy) { return VectorType::get(ArgTy->getScalarType(), cast(DstTy)->getElementCount()); @@ -1277,26 +1389,32 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode, // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the // instructions. // - // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we + // TODO: Add additional widening operations (e.g., shl, etc.) once we // verify that their extending operands are eliminated during code // generation. switch (Opcode) { case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2). case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2). + case Instruction::Mul: // SMULL(2), UMULL(2) break; default: return false; } // To be a widening instruction (either the "wide" or "long" versions), the - // second operand must be a sign- or zero extend having a single user. We - // only consider extends having a single user because they may otherwise not - // be eliminated. + // second operand must be a sign- or zero extend. if (Args.size() != 2 || - (!isa(Args[1]) && !isa(Args[1])) || - !Args[1]->hasOneUse()) + (!isa(Args[1]) && !isa(Args[1]))) return false; auto *Extend = cast(Args[1]); + auto *Arg0 = dyn_cast(Args[0]); + + // A mul only has a mull version (not like addw). Both operands need to be + // extending and the same type. + if (Opcode == Instruction::Mul && + (!Arg0 || Arg0->getOpcode() != Extend->getOpcode() || + Arg0->getOperand(0)->getType() != Extend->getOperand(0)->getType())) + return false; // Legalize the destination type and ensure it can be used in a widening // operation. @@ -1334,7 +1452,7 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, // If the cast is observable, and it is used by a widening instruction (e.g., // uaddl, saddw, etc.), it may be free. - if (I && I->hasOneUse()) { + if (I && I->hasOneUser()) { auto *SingleUser = cast(*I->user_begin()); SmallVector Operands(SingleUser->operand_values()); if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) { @@ -1606,6 +1724,36 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, SrcTy.getSimpleVT())) return AdjustCost(Entry->Cost); + static const TypeConversionCostTblEntry FP16Tbl[] = { + {ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f16, 1}, // fcvtzs + {ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f16, 1}, + {ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f16, 1}, // fcvtzs + {ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f16, 1}, + {ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f16, 2}, // fcvtl+fcvtzs + {ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f16, 2}, + {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f16, 2}, // fcvtzs+xtn + {ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f16, 2}, + {ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f16, 1}, // fcvtzs + {ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f16, 1}, + {ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f16, 4}, // 2*fcvtl+2*fcvtzs + {ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f16, 4}, + {ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f16, 3}, // 2*fcvtzs+xtn + {ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f16, 3}, + {ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f16, 2}, // 2*fcvtzs + {ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f16, 2}, + {ISD::FP_TO_SINT, MVT::v16i32, MVT::v16f16, 8}, // 4*fcvtl+4*fcvtzs + {ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f16, 8}, + {ISD::UINT_TO_FP, MVT::v8f16, MVT::v8i8, 2}, // ushll + ucvtf + {ISD::SINT_TO_FP, MVT::v8f16, MVT::v8i8, 2}, // sshll + scvtf + {ISD::UINT_TO_FP, MVT::v16f16, MVT::v16i8, 4}, // 2 * ushl(2) + 2 * ucvtf + {ISD::SINT_TO_FP, MVT::v16f16, MVT::v16i8, 4}, // 2 * sshl(2) + 2 * scvtf + }; + + if (ST->hasFullFP16()) + if (const auto *Entry = ConvertCostTableLookup( + FP16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) + return AdjustCost(Entry->Cost); + return AdjustCost( BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); } @@ -1723,24 +1871,12 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); - - // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.), - // add in the widening overhead specified by the sub-target. Since the - // extends feeding widening instructions are performed automatically, they - // aren't present in the generated code and have a zero cost. By adding a - // widening overhead here, we attach the total cost of the combined operation - // to the widening instruction. - InstructionCost Cost = 0; - if (isWideningInstruction(Ty, Opcode, Args)) - Cost += ST->getWideningBaseCost(); - int ISD = TLI->InstructionOpcodeToISD(Opcode); switch (ISD) { default: - return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, - Opd2Info, - Opd1PropInfo, Opd2PropInfo); + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); case ISD::SDIV: if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue && Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) { @@ -1748,26 +1884,22 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( // normally expanded to the sequence ADD + CMP + SELECT + SRA. // The OperandValue properties many not be same as that of previous // operation; conservatively assume OP_None. - Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, - Opd1Info, Opd2Info, - TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, - Opd1Info, Opd2Info, - TargetTransformInfo::OP_None, + InstructionCost Cost = getArithmeticInstrCost( + Instruction::Add, Ty, CostKind, Opd1Info, Opd2Info, + TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); + Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Opd1Info, + Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::Select, Ty, CostKind, - Opd1Info, Opd2Info, - TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, - Opd1Info, Opd2Info, - TargetTransformInfo::OP_None, + Cost += getArithmeticInstrCost( + Instruction::Select, Ty, CostKind, Opd1Info, Opd2Info, + TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); + Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Opd1Info, + Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); return Cost; } LLVM_FALLTHROUGH; - case ISD::UDIV: + case ISD::UDIV: { if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue) { auto VT = TLI->getValueType(DL, Ty); if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) { @@ -1787,9 +1919,8 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( } } - Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, - Opd2Info, - Opd1PropInfo, Opd2PropInfo); + InstructionCost Cost = BaseT::getArithmeticInstrCost( + Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); if (Ty->isVectorTy()) { // On AArch64, vector divisions are not supported natively and are // expanded into scalar divisions of each pair of elements. @@ -1804,27 +1935,31 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( Cost += Cost; } return Cost; - + } case ISD::MUL: - if (LT.second != MVT::v2i64) - return (Cost + 1) * LT.first; // Since we do not have a MUL.2d instruction, a mul <2 x i64> is expensive // as elements are extracted from the vectors and the muls scalarized. // As getScalarizationOverhead is a bit too pessimistic, we estimate the // cost for a i64 vector directly here, which is: - // - four i64 extracts, - // - two i64 inserts, and - // - two muls. - // So, for a v2i64 with LT.First = 1 the cost is 8, and for a v4i64 with - // LT.first = 2 the cost is 16. - return LT.first * 8; + // - four 2-cost i64 extracts, + // - two 2-cost i64 inserts, and + // - two 1-cost muls. + // So, for a v2i64 with LT.First = 1 the cost is 14, and for a v4i64 with + // LT.first = 2 the cost is 28. If both operands are extensions it will not + // need to scalarize so the cost can be cheaper (smull or umull). + if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args)) + return LT.first; + return LT.first * 14; case ISD::ADD: case ISD::XOR: case ISD::OR: case ISD::AND: + case ISD::SRL: + case ISD::SRA: + case ISD::SHL: // These nodes are marked as 'custom' for combining purposes only. // We know that they are legal. See LowerAdd in ISelLowering. - return (Cost + 1) * LT.first; + return LT.first; case ISD::FADD: case ISD::FSUB: @@ -1834,11 +1969,10 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( // These nodes are marked as 'custom' just to lower them to SVE. // We know said lowering will incur no additional cost. if (!Ty->getScalarType()->isFP128Ty()) - return (Cost + 2) * LT.first; + return 2 * LT.first; - return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, - Opd2Info, - Opd1PropInfo, Opd2PropInfo); + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); } } @@ -1946,6 +2080,10 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { return Options; } +bool AArch64TTIImpl::prefersVectorizedAddressing() const { + return ST->hasSVE(); +} + InstructionCost AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, @@ -2559,11 +2697,97 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) { InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, - VectorType *SubTp) { + VectorType *SubTp, + ArrayRef Args) { + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + // If we have a Mask, and the LT is being legalized somehow, split the Mask + // into smaller vectors and sum the cost of each shuffle. + if (!Mask.empty() && isa(Tp) && LT.second.isVector() && + Tp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() && + cast(Tp)->getNumElements() > + LT.second.getVectorNumElements() && + !Index && !SubTp) { + unsigned TpNumElts = cast(Tp)->getNumElements(); + assert(Mask.size() == TpNumElts && "Expected Mask and Tp size to match!"); + unsigned LTNumElts = LT.second.getVectorNumElements(); + unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts; + VectorType *NTp = + VectorType::get(Tp->getScalarType(), LT.second.getVectorElementCount()); + InstructionCost Cost; + for (unsigned N = 0; N < NumVecs; N++) { + SmallVector NMask; + // Split the existing mask into chunks of size LTNumElts. Track the source + // sub-vectors to ensure the result has at most 2 inputs. + unsigned Source1, Source2; + unsigned NumSources = 0; + for (unsigned E = 0; E < LTNumElts; E++) { + int MaskElt = (N * LTNumElts + E < TpNumElts) ? Mask[N * LTNumElts + E] + : UndefMaskElem; + if (MaskElt < 0) { + NMask.push_back(UndefMaskElem); + continue; + } + + // Calculate which source from the input this comes from and whether it + // is new to us. + unsigned Source = MaskElt / LTNumElts; + if (NumSources == 0) { + Source1 = Source; + NumSources = 1; + } else if (NumSources == 1 && Source != Source1) { + Source2 = Source; + NumSources = 2; + } else if (NumSources >= 2 && Source != Source1 && Source != Source2) { + NumSources++; + } + + // Add to the new mask. For the NumSources>2 case these are not correct, + // but are only used for the modular lane number. + if (Source == Source1) + NMask.push_back(MaskElt % LTNumElts); + else if (Source == Source2) + NMask.push_back(MaskElt % LTNumElts + LTNumElts); + else + NMask.push_back(MaskElt % LTNumElts); + } + // If the sub-mask has at most 2 input sub-vectors then re-cost it using + // getShuffleCost. If not then cost it using the worst case. + if (NumSources <= 2) + Cost += getShuffleCost(NumSources <= 1 ? TTI::SK_PermuteSingleSrc + : TTI::SK_PermuteTwoSrc, + NTp, NMask, 0, nullptr, Args); + else if (any_of(enumerate(NMask), [&](const auto &ME) { + return ME.value() % LTNumElts == ME.index(); + })) + Cost += LTNumElts - 1; + else + Cost += LTNumElts; + } + return Cost; + } + Kind = improveShuffleKindFromMask(Kind, Mask); + + // Check for broadcast loads. + if (Kind == TTI::SK_Broadcast) { + bool IsLoad = !Args.empty() && isa(Args[0]); + if (IsLoad && LT.second.isVector() && + isLegalBroadcastLoad(Tp->getElementType(), + LT.second.getVectorElementCount())) + return 0; // broadcast is handled by ld1r + } + + // If we have 4 elements for the shuffle and a Mask, get the cost straight + // from the perfect shuffle tables. + if (Mask.size() == 4 && Tp->getElementCount() == ElementCount::getFixed(4) && + (Tp->getScalarSizeInBits() == 16 || Tp->getScalarSizeInBits() == 32) && + all_of(Mask, [](int E) { return E < 8; })) + return getPerfectShuffleCost(Mask); + if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose || Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc || Kind == TTI::SK_Reverse) { + static const CostTblEntry ShuffleTbl[] = { // Broadcast shuffle kinds can be performed with 'dup'. { TTI::SK_Broadcast, MVT::v8i8, 1 }, @@ -2618,6 +2842,12 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, { TTI::SK_Reverse, MVT::v2f32, 1 }, // mov. { TTI::SK_Reverse, MVT::v4f32, 2 }, // REV64; EXT { TTI::SK_Reverse, MVT::v2f64, 1 }, // mov. + { TTI::SK_Reverse, MVT::v8f16, 2 }, // REV64; EXT + { TTI::SK_Reverse, MVT::v8i16, 2 }, // REV64; EXT + { TTI::SK_Reverse, MVT::v16i8, 2 }, // REV64; EXT + { TTI::SK_Reverse, MVT::v4f16, 1 }, // REV64 + { TTI::SK_Reverse, MVT::v4i16, 1 }, // REV64 + { TTI::SK_Reverse, MVT::v8i8, 1 }, // REV64 // Broadcast shuffle kinds for scalable vectors { TTI::SK_Broadcast, MVT::nxv16i8, 1 }, { TTI::SK_Broadcast, MVT::nxv8i16, 1 }, @@ -2655,11 +2885,26 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, { TTI::SK_Reverse, MVT::nxv4i1, 1 }, { TTI::SK_Reverse, MVT::nxv2i1, 1 }, }; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; } + if (Kind == TTI::SK_Splice && isa(Tp)) return getSpliceCost(Tp, Index); + + // Inserting a subvector can often be done with either a D, S or H register + // move, so long as the inserted vector is "aligned". + if (Kind == TTI::SK_InsertSubvector && LT.second.isFixedLengthVector() && + LT.second.getSizeInBits() <= 128 && SubTp) { + std::pair SubLT = + TLI->getTypeLegalizationCost(DL, SubTp); + if (SubLT.second.isVector()) { + int NumElts = LT.second.getVectorNumElements(); + int NumSubElts = SubLT.second.getVectorNumElements(); + if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0) + return SubLT.first; + } + } + return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp); } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index a6029b9f2445..d0aacb457a39 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -135,6 +135,8 @@ public: return ST->getVScaleForTuning(); } + bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const; + /// Try to return an estimate cost factor that can be used as a multiplier /// when scalarizing an operation for a vector with ElementCount \p VF. /// For scalable vectors this currently takes the most pessimistic view based @@ -148,6 +150,8 @@ public: unsigned getMaxInterleaveFactor(unsigned VF); + bool prefersVectorizedAddressing() const; + InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind); @@ -278,6 +282,23 @@ public: return isLegalMaskedGatherScatter(DataType); } + bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const { + // Return true if we can generate a `ld1r` splat load instruction. + if (!ST->hasNEON() || NumElements.isScalable()) + return false; + switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) { + case 8: + case 16: + case 32: + case 64: { + // We accept bit-widths >= 64bits and elements {8,16,32,64} bits. + unsigned VectorBits = NumElements.getFixedValue() * ElementBits; + return VectorBits >= 64; + } + } + return false; + } + bool isLegalNTStore(Type *DataType, Align Alignment) { // NOTE: The logic below is mostly geared towards LV, which calls it with // vectors with 2 elements. We might want to improve that, if other @@ -330,7 +351,8 @@ public: InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, - VectorType *SubTp); + VectorType *SubTp, + ArrayRef Args = None); /// @} }; diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 33ed7ae9780e..ade23f643538 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -127,7 +127,7 @@ private: return Prefix; } - PrefixInfo() : Active(false), Predicated(false) {} + PrefixInfo() = default; bool isActive() const { return Active; } bool isPredicated() const { return Predicated; } unsigned getElementSize() const { @@ -141,8 +141,8 @@ private: } private: - bool Active; - bool Predicated; + bool Active = false; + bool Predicated = false; unsigned ElementSize; unsigned Dst; unsigned Pg; @@ -157,7 +157,8 @@ private: bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands); void createSysAlias(uint16_t Encoding, OperandVector &Operands, SMLoc S); - AArch64CC::CondCode parseCondCodeString(StringRef Cond); + AArch64CC::CondCode parseCondCodeString(StringRef Cond, + std::string &Suggestion); bool parseCondCode(OperandVector &Operands, bool invertCondCode); unsigned matchRegisterNameAlias(StringRef Name, RegKind Kind); bool parseRegister(OperandVector &Operands); @@ -189,6 +190,7 @@ private: bool parseDirectiveUnreq(SMLoc L); bool parseDirectiveCFINegateRAState(); bool parseDirectiveCFIBKeyFrame(); + bool parseDirectiveCFIMTETaggedFrame(); bool parseDirectiveVariantPCS(SMLoc L); @@ -2425,7 +2427,7 @@ static Optional> parseVectorKind(StringRef Suffix, } static bool isValidVectorKind(StringRef Suffix, RegKind VectorKind) { - return parseVectorKind(Suffix, VectorKind).hasValue(); + return parseVectorKind(Suffix, VectorKind).has_value(); } static unsigned matchSVEDataVectorRegName(StringRef Name) { @@ -2758,8 +2760,8 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { } auto PRFM = LookupByEncoding(MCE->getValue()); - Operands.push_back(AArch64Operand::CreatePrefetch( - prfop, PRFM.getValueOr(""), S, getContext())); + Operands.push_back(AArch64Operand::CreatePrefetch(prfop, PRFM.value_or(""), + S, getContext())); return MatchOperand_Success; } @@ -3029,8 +3031,10 @@ AArch64AsmParser::tryParseImmWithOptionalShift(OperandVector &Operands) { return MatchOperand_Success; } -/// parseCondCodeString - Parse a Condition Code string. -AArch64CC::CondCode AArch64AsmParser::parseCondCodeString(StringRef Cond) { +/// parseCondCodeString - Parse a Condition Code string, optionally returning a +/// suggestion to help common typos. +AArch64CC::CondCode +AArch64AsmParser::parseCondCodeString(StringRef Cond, std::string &Suggestion) { AArch64CC::CondCode CC = StringSwitch(Cond.lower()) .Case("eq", AArch64CC::EQ) .Case("ne", AArch64CC::NE) @@ -3053,7 +3057,7 @@ AArch64CC::CondCode AArch64AsmParser::parseCondCodeString(StringRef Cond) { .Default(AArch64CC::Invalid); if (CC == AArch64CC::Invalid && - getSTI().getFeatureBits()[AArch64::FeatureSVE]) + getSTI().getFeatureBits()[AArch64::FeatureSVE]) { CC = StringSwitch(Cond.lower()) .Case("none", AArch64CC::EQ) .Case("any", AArch64CC::NE) @@ -3067,6 +3071,9 @@ AArch64CC::CondCode AArch64AsmParser::parseCondCodeString(StringRef Cond) { .Case("tstop", AArch64CC::LT) .Default(AArch64CC::Invalid); + if (CC == AArch64CC::Invalid && Cond.lower() == "nfirst") + Suggestion = "nfrst"; + } return CC; } @@ -3078,9 +3085,14 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands, assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); StringRef Cond = Tok.getString(); - AArch64CC::CondCode CC = parseCondCodeString(Cond); - if (CC == AArch64CC::Invalid) - return TokError("invalid condition code"); + std::string Suggestion; + AArch64CC::CondCode CC = parseCondCodeString(Cond, Suggestion); + if (CC == AArch64CC::Invalid) { + std::string Msg = "invalid condition code"; + if (!Suggestion.empty()) + Msg += ", did you mean " + Suggestion + "?"; + return TokError(Msg); + } Lex(); // Eat identifier token. if (invertCondCode) { @@ -3910,7 +3922,6 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) { const MCRegisterInfo *RI = getContext().getRegisterInfo(); unsigned PrevReg = FirstReg; - unsigned Count = 1; SmallSet DRegs; AArch64Operand::ComputeRegsForAlias(FirstReg, DRegs, ElementWidth); @@ -3942,7 +3953,6 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) { } PrevReg = Reg; - ++Count; } if (parseToken(AsmToken::RCurly, "'}' expected")) @@ -4545,9 +4555,14 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() + (Head.data() - Name.data())); - AArch64CC::CondCode CC = parseCondCodeString(Head); - if (CC == AArch64CC::Invalid) - return Error(SuffixLoc, "invalid condition code"); + std::string Suggestion; + AArch64CC::CondCode CC = parseCondCodeString(Head, Suggestion); + if (CC == AArch64CC::Invalid) { + std::string Msg = "invalid condition code"; + if (!Suggestion.empty()) + Msg += ", did you mean " + Suggestion + "?"; + return Error(SuffixLoc, Msg); + } Operands.push_back(AArch64Operand::CreateToken(".", SuffixLoc, getContext(), /*IsSuffix=*/true)); Operands.push_back( @@ -6024,6 +6039,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveCFINegateRAState(); else if (IDVal == ".cfi_b_key_frame") parseDirectiveCFIBKeyFrame(); + else if (IDVal == ".cfi_mte_tagged_frame") + parseDirectiveCFIMTETaggedFrame(); else if (IDVal == ".arch_extension") parseDirectiveArchExtension(Loc); else if (IDVal == ".variant_pcs") @@ -6198,12 +6215,11 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) { if (Extension.Features.none()) report_fatal_error("unsupported architectural extension: " + Name); - FeatureBitset ToggleFeatures = EnableFeature - ? (~Features & Extension.Features) - : ( Features & Extension.Features); - FeatureBitset Features = - ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures)); - setAvailableFeatures(Features); + FeatureBitset ToggleFeatures = + EnableFeature + ? STI.SetFeatureBitsTransitively(~Features & Extension.Features) + : STI.ToggleFeature(Features & Extension.Features); + setAvailableFeatures(ComputeAvailableFeatures(ToggleFeatures)); break; } } @@ -6217,8 +6233,7 @@ bool AArch64AsmParser::parseDirectiveArchExtension(SMLoc L) { StringRef Name = getParser().parseStringToEndOfStatement().trim(); - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.arch_extension' directive")) + if (parseEOL()) return true; bool EnableFeature = true; @@ -6236,12 +6251,11 @@ bool AArch64AsmParser::parseDirectiveArchExtension(SMLoc L) { if (Extension.Features.none()) return Error(ExtLoc, "unsupported architectural extension: " + Name); - FeatureBitset ToggleFeatures = EnableFeature - ? (~Features & Extension.Features) - : (Features & Extension.Features); - FeatureBitset Features = - ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures)); - setAvailableFeatures(Features); + FeatureBitset ToggleFeatures = + EnableFeature + ? STI.SetFeatureBitsTransitively(~Features & Extension.Features) + : STI.ToggleFeature(Features & Extension.Features); + setAvailableFeatures(ComputeAvailableFeatures(ToggleFeatures)); return false; } @@ -6281,7 +6295,6 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) { ExpandCryptoAEK(llvm::AArch64::getCPUArchKind(CPU), RequestedExtensions); - FeatureBitset Features = STI.getFeatureBits(); for (auto Name : RequestedExtensions) { // Advance source location past '+'. CurLoc = incrementLoc(CurLoc, 1); @@ -6301,12 +6314,12 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) { if (Extension.Features.none()) report_fatal_error("unsupported architectural extension: " + Name); - FeatureBitset ToggleFeatures = EnableFeature - ? (~Features & Extension.Features) - : ( Features & Extension.Features); - FeatureBitset Features = - ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures)); - setAvailableFeatures(Features); + FeatureBitset Features = STI.getFeatureBits(); + FeatureBitset ToggleFeatures = + EnableFeature + ? STI.SetFeatureBitsTransitively(~Features & Extension.Features) + : STI.ToggleFeature(Features & Extension.Features); + setAvailableFeatures(ComputeAvailableFeatures(ToggleFeatures)); FoundExtension = true; break; @@ -6401,12 +6414,10 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) { if (Idx + 1 == NbArgs) break; - if (parseToken(AsmToken::Comma, - "unexpected token in '" + Twine(IDVal) + "' directive")) + if (parseComma()) return true; } - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '" + Twine(IDVal) + "' directive")) + if (parseEOL()) return true; getStreamer().emitLOHDirective((MCLOHType)Kind, Args); @@ -6416,7 +6427,7 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) { /// parseDirectiveLtorg /// ::= .ltorg | .pool bool AArch64AsmParser::parseDirectiveLtorg(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + if (parseEOL()) return true; getTargetStreamer().emitCurrentConstantPool(); return false; @@ -6474,8 +6485,7 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { return Error(SRegLoc, "register name or alias expected"); // Shouldn't be anything else. - if (parseToken(AsmToken::EndOfStatement, - "unexpected input in .req directive")) + if (parseEOL()) return true; auto pair = std::make_pair(RegisterKind, (unsigned) RegNum); @@ -6496,7 +6506,7 @@ bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) { } bool AArch64AsmParser::parseDirectiveCFINegateRAState() { - if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + if (parseEOL()) return true; getStreamer().emitCFINegateRAState(); return false; @@ -6505,31 +6515,31 @@ bool AArch64AsmParser::parseDirectiveCFINegateRAState() { /// parseDirectiveCFIBKeyFrame /// ::= .cfi_b_key bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() { - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.cfi_b_key_frame'")) + if (parseEOL()) return true; getStreamer().emitCFIBKeyFrame(); return false; } +/// parseDirectiveCFIMTETaggedFrame +/// ::= .cfi_mte_tagged_frame +bool AArch64AsmParser::parseDirectiveCFIMTETaggedFrame() { + if (parseEOL()) + return true; + getStreamer().emitCFIMTETaggedFrame(); + return false; +} + /// parseDirectiveVariantPCS /// ::= .variant_pcs symbolname bool AArch64AsmParser::parseDirectiveVariantPCS(SMLoc L) { - const AsmToken &Tok = getTok(); - if (Tok.isNot(AsmToken::Identifier)) + StringRef Name; + if (getParser().parseIdentifier(Name)) return TokError("expected symbol name"); - - StringRef SymbolName = Tok.getIdentifier(); - - MCSymbol *Sym = getContext().lookupSymbol(SymbolName); - if (!Sym) - return TokError("unknown symbol"); - - Lex(); // Eat the symbol - if (parseEOL()) return true; - getTargetStreamer().emitDirectiveVariantPCS(Sym); + getTargetStreamer().emitDirectiveVariantPCS( + getContext().getOrCreateSymbol(Name)); return false; } @@ -6880,7 +6890,7 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, // as a literal token. if (Op.isTokenEqual("za")) return Match_Success; - break; + return Match_InvalidOperand; } if (!Op.isImm()) return Match_InvalidOperand; diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 9ce00f76d9c7..1b65589416c3 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -16,9 +16,10 @@ #include "TargetInfo/AArch64TargetInfo.h" #include "Utils/AArch64BaseInfo.h" #include "llvm-c/Disassembler.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" @@ -37,213 +38,226 @@ using DecodeStatus = MCDisassembler::DecodeStatus; // Forward declare these because the autogenerated code will reference them. // Definitions are further down. -static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus +DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus +DecodeGPR64x8ClassRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeZPR_4bRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeZPR_3bRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeZPR2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeZPR3RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); template static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMatrixTileListRegisterClass(MCInst &Inst, - unsigned RegMask, - uint64_t Address, - const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeMatrixTileListRegisterClass(MCInst &Inst, unsigned RegMask, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeFixedPointScaleImm64(MCInst &Inst, unsigned Imm, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodePCRelLabel19(MCInst &Inst, unsigned Imm, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMemExtend(MCInst &Inst, unsigned Imm, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMRSSystemRegister(MCInst &Inst, unsigned Imm, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMSRSystemRegister(MCInst &Inst, unsigned Imm, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThreeAddrSRegInstruction(MCInst &Inst, uint32_t insn, - uint64_t Address, - const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeThreeAddrSRegInstruction(MCInst &Inst, uint32_t insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn, - uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus +DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSignedLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeExclusiveLdStInstruction(MCInst &Inst, uint32_t insn, - uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus +DecodeExclusiveLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeAuthLoadInstruction(MCInst &Inst, uint32_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeAddSubERegInstruction(MCInst &Inst, uint32_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeLogicalImmInstruction(MCInst &Inst, uint32_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeModImmInstruction(MCInst &Inst, uint32_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeModImmTiedInstruction(MCInst &Inst, uint32_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeUnconditionalBranch(MCInst &Inst, uint32_t insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn, - uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus +DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeTestAndBranch(MCInst &Inst, uint32_t insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeFMOVLaneInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeVecShiftR64Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); + uint64_t Addr, + const MCDisassembler *Decoder); static DecodeStatus DecodeVecShiftR64ImmNarrow(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeVecShiftR32Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); + uint64_t Addr, + const MCDisassembler *Decoder); static DecodeStatus DecodeVecShiftR32ImmNarrow(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeVecShiftR16Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); + uint64_t Addr, + const MCDisassembler *Decoder); static DecodeStatus DecodeVecShiftR16ImmNarrow(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeVecShiftR8Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); + uint64_t Addr, + const MCDisassembler *Decoder); static DecodeStatus DecodeVecShiftL64Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); + uint64_t Addr, + const MCDisassembler *Decoder); static DecodeStatus DecodeVecShiftL32Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); + uint64_t Addr, + const MCDisassembler *Decoder); static DecodeStatus DecodeVecShiftL16Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); + uint64_t Addr, + const MCDisassembler *Decoder); static DecodeStatus DecodeVecShiftL8Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeWSeqPairsClassRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Addr, - const void *Decoder); -static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Addr, - const void *Decoder); -static DecodeStatus DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn, - uint64_t Address, - const void *Decoder); + uint64_t Addr, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeWSeqPairsClassRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeXSeqPairsClassRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn, uint64_t Address, + const MCDisassembler *Decoder); template static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); template -static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); +static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm, uint64_t Addr, + const MCDisassembler *Decoder); static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); + uint64_t Addr, + const MCDisassembler *Decoder); static DecodeStatus DecodeSVCROp(MCInst &Inst, unsigned Imm, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeCPYMemOpInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeSETMemOpInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, - const void *Decoder); + const MCDisassembler *Decoder); static bool Check(DecodeStatus &Out, DecodeStatus In) { switch (In) { @@ -270,7 +284,8 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) { static MCDisassembler *createAArch64Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new AArch64Disassembler(STI, Ctx); + + return new AArch64Disassembler(STI, Ctx, T.createMCInstrInfo()); } DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, @@ -295,67 +310,37 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, DecodeStatus Result = decodeInstruction(Table, MI, Insn, Address, this, STI); - switch (MI.getOpcode()) { - default: - break; + const MCInstrDesc &Desc = MCII->get(MI.getOpcode()); + // For Scalable Matrix Extension (SME) instructions that have an implicit - // operand for the accumulator (ZA) which isn't encoded, manually insert - // operand. - case AArch64::LDR_ZA: - case AArch64::STR_ZA: { - MI.insert(MI.begin(), MCOperand::createReg(AArch64::ZA)); - // Spill and fill instructions have a single immediate used for both the - // vector select offset and optional memory offset. Replicate the decoded - // immediate. + // operand for the accumulator (ZA) or implicit immediate zero which isn't + // encoded, manually insert operand. + for (unsigned i = 0; i < Desc.getNumOperands(); i++) { + if (Desc.OpInfo[i].OperandType == MCOI::OPERAND_REGISTER) { + switch (Desc.OpInfo[i].RegClass) { + default: + break; + case AArch64::MPRRegClassID: + MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZA)); + break; + case AArch64::MPR8RegClassID: + MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZAB0)); + break; + } + } else if (Desc.OpInfo[i].OperandType == + AArch64::OPERAND_IMPLICIT_IMM_0) { + MI.insert(MI.begin() + i, MCOperand::createImm(0)); + } + } + + if (MI.getOpcode() == AArch64::LDR_ZA || + MI.getOpcode() == AArch64::STR_ZA) { + // Spill and fill instructions have a single immediate used for both + // the vector select offset and optional memory offset. Replicate + // the decoded immediate. const MCOperand &Imm4Op = MI.getOperand(2); assert(Imm4Op.isImm() && "Unexpected operand type!"); MI.addOperand(Imm4Op); - break; - } - case AArch64::LD1_MXIPXX_H_B: - case AArch64::LD1_MXIPXX_V_B: - case AArch64::ST1_MXIPXX_H_B: - case AArch64::ST1_MXIPXX_V_B: - case AArch64::INSERT_MXIPZ_H_B: - case AArch64::INSERT_MXIPZ_V_B: - // e.g. - // MOVA ZA0.B[, ], /M, .B - // ^ insert implicit 8-bit element tile - MI.insert(MI.begin(), MCOperand::createReg(AArch64::ZAB0)); - break; - case AArch64::EXTRACT_ZPMXI_H_B: - case AArch64::EXTRACT_ZPMXI_V_B: - // MOVA .B, /M, ZA0.B[, ] - // ^ insert implicit 8-bit element tile - MI.insert(MI.begin()+2, MCOperand::createReg(AArch64::ZAB0)); - break; - case AArch64::LD1_MXIPXX_H_Q: - case AArch64::LD1_MXIPXX_V_Q: - case AArch64::ST1_MXIPXX_H_Q: - case AArch64::ST1_MXIPXX_V_Q: - // 128-bit load/store have implicit zero vector index. - MI.insert(MI.begin()+2, MCOperand::createImm(0)); - break; - // 128-bit mova have implicit zero vector index. - case AArch64::INSERT_MXIPZ_H_Q: - case AArch64::INSERT_MXIPZ_V_Q: - MI.insert(MI.begin()+2, MCOperand::createImm(0)); - break; - case AArch64::EXTRACT_ZPMXI_H_Q: - case AArch64::EXTRACT_ZPMXI_V_Q: - MI.addOperand(MCOperand::createImm(0)); - break; - case AArch64::SMOVvi8to32_idx0: - case AArch64::SMOVvi8to64_idx0: - case AArch64::SMOVvi16to32_idx0: - case AArch64::SMOVvi16to64_idx0: - case AArch64::SMOVvi32to64_idx0: - case AArch64::UMOVvi8_idx0: - case AArch64::UMOVvi16_idx0: - case AArch64::UMOVvi32_idx0: - case AArch64::UMOVvi64_idx0: - MI.addOperand(MCOperand::createImm(0)); - break; } if (Result != MCDisassembler::Fail) @@ -400,7 +385,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Disassembler() { static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; @@ -410,9 +395,9 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo, return Success; } -static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, + const MCDisassembler *Decoder) { if (RegNo > 15) return Fail; return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder); @@ -420,7 +405,7 @@ static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; @@ -432,7 +417,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; @@ -444,7 +429,7 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; @@ -456,7 +441,7 @@ static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; @@ -466,9 +451,9 @@ static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo, return Success; } -static DecodeStatus DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, + const MCDisassembler *Decoder) { if (RegNo > 30) return Fail; @@ -481,7 +466,7 @@ static DecodeStatus DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; @@ -491,10 +476,9 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo, return Success; } -static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeGPR64x8ClassRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 22) return Fail; if (RegNo & 1) @@ -509,7 +493,7 @@ static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst, static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; unsigned Register = @@ -518,10 +502,10 @@ static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo, return Success; } -static DecodeStatus DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const MCDisassembler *Decoder) { if (RegNo > 3) return Fail; @@ -534,7 +518,7 @@ static DecodeStatus DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst, static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; @@ -546,7 +530,7 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; @@ -558,7 +542,7 @@ static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void* Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; @@ -570,7 +554,7 @@ static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeZPR_4bRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 15) return Fail; return DecodeZPRRegisterClass(Inst, RegNo, Address, Decoder); @@ -578,7 +562,7 @@ static DecodeStatus DecodeZPR_4bRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeZPR_3bRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 7) return Fail; return DecodeZPRRegisterClass(Inst, RegNo, Address, Decoder); @@ -586,7 +570,7 @@ static DecodeStatus DecodeZPR_3bRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeZPR2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void* Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; unsigned Register = @@ -597,7 +581,7 @@ static DecodeStatus DecodeZPR2RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeZPR3RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void* Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; unsigned Register = @@ -608,7 +592,7 @@ static DecodeStatus DecodeZPR3RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void* Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; unsigned Register = @@ -617,10 +601,10 @@ static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo, return Success; } -static DecodeStatus DecodeMatrixTileListRegisterClass(MCInst &Inst, - unsigned RegMask, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeMatrixTileListRegisterClass(MCInst &Inst, unsigned RegMask, + uint64_t Address, + const MCDisassembler *Decoder) { if (RegMask > 0xFF) return Fail; Inst.addOperand(MCOperand::createImm(RegMask)); @@ -641,7 +625,8 @@ static const SmallVector, 5> template static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned LastReg = (1 << NumBitsForTile) - 1; if (RegNo > LastReg) return Fail; @@ -651,7 +636,8 @@ static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { if (RegNo > 15) return Fail; @@ -663,7 +649,7 @@ static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, - const void* Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 7) return Fail; @@ -672,7 +658,8 @@ static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; unsigned Register = @@ -682,7 +669,8 @@ static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; unsigned Register = @@ -693,7 +681,7 @@ static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; unsigned Register = @@ -703,7 +691,8 @@ static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; unsigned Register = @@ -713,7 +702,8 @@ static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; unsigned Register = @@ -724,7 +714,7 @@ static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return Fail; unsigned Register = @@ -735,7 +725,7 @@ static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { // scale{5} is asserted as 1 in tblgen. Imm |= 0x20; Inst.addOperand(MCOperand::createImm(64 - Imm)); @@ -744,29 +734,29 @@ static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm, static DecodeStatus DecodeFixedPointScaleImm64(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(64 - Imm)); return Success; } static DecodeStatus DecodePCRelLabel19(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { int64_t ImmVal = Imm; - const AArch64Disassembler *Dis = - static_cast(Decoder); // Sign-extend 19-bit immediate. if (ImmVal & (1 << (19 - 1))) ImmVal |= ~((1LL << 19) - 1); - if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal * 4, Addr, - Inst.getOpcode() != AArch64::LDRXl, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand( + Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(ImmVal)); return Success; } static DecodeStatus DecodeMemExtend(MCInst &Inst, unsigned Imm, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm((Imm >> 1) & 1)); Inst.addOperand(MCOperand::createImm(Imm & 1)); return Success; @@ -774,7 +764,7 @@ static DecodeStatus DecodeMemExtend(MCInst &Inst, unsigned Imm, static DecodeStatus DecodeMRSSystemRegister(MCInst &Inst, unsigned Imm, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(Imm)); // Every system register in the encoding space is valid with the syntax @@ -784,7 +774,7 @@ static DecodeStatus DecodeMRSSystemRegister(MCInst &Inst, unsigned Imm, static DecodeStatus DecodeMSRSystemRegister(MCInst &Inst, unsigned Imm, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(Imm)); return Success; @@ -792,7 +782,7 @@ static DecodeStatus DecodeMSRSystemRegister(MCInst &Inst, unsigned Imm, static DecodeStatus DecodeFMOVLaneInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // This decoder exists to add the dummy Lane operand to the MCInst, which must // be 1 in assembly but has no other real manifestation. unsigned Rd = fieldFromInstruction(Insn, 0, 5); @@ -826,66 +816,74 @@ static DecodeStatus DecodeVecShiftLImm(MCInst &Inst, unsigned Imm, } static DecodeStatus DecodeVecShiftR64Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { return DecodeVecShiftRImm(Inst, Imm, 64); } static DecodeStatus DecodeVecShiftR64ImmNarrow(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeVecShiftRImm(Inst, Imm | 0x20, 64); } static DecodeStatus DecodeVecShiftR32Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { return DecodeVecShiftRImm(Inst, Imm, 32); } static DecodeStatus DecodeVecShiftR32ImmNarrow(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeVecShiftRImm(Inst, Imm | 0x10, 32); } static DecodeStatus DecodeVecShiftR16Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { return DecodeVecShiftRImm(Inst, Imm, 16); } static DecodeStatus DecodeVecShiftR16ImmNarrow(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeVecShiftRImm(Inst, Imm | 0x8, 16); } static DecodeStatus DecodeVecShiftR8Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { return DecodeVecShiftRImm(Inst, Imm, 8); } static DecodeStatus DecodeVecShiftL64Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { return DecodeVecShiftLImm(Inst, Imm, 64); } static DecodeStatus DecodeVecShiftL32Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { return DecodeVecShiftLImm(Inst, Imm, 32); } static DecodeStatus DecodeVecShiftL16Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { return DecodeVecShiftLImm(Inst, Imm, 16); } static DecodeStatus DecodeVecShiftL8Imm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { return DecodeVecShiftLImm(Inst, Imm, 8); } -static DecodeStatus DecodeThreeAddrSRegInstruction(MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeThreeAddrSRegInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, + const MCDisassembler *Decoder) { unsigned Rd = fieldFromInstruction(insn, 0, 5); unsigned Rn = fieldFromInstruction(insn, 5, 5); unsigned Rm = fieldFromInstruction(insn, 16, 5); @@ -947,7 +945,7 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(MCInst &Inst, uint32_t insn, static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Rd = fieldFromInstruction(insn, 0, 5); unsigned imm = fieldFromInstruction(insn, 5, 16); unsigned shift = fieldFromInstruction(insn, 21, 2); @@ -978,14 +976,12 @@ static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn, return Success; } -static DecodeStatus DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, + const MCDisassembler *Decoder) { unsigned Rt = fieldFromInstruction(insn, 0, 5); unsigned Rn = fieldFromInstruction(insn, 5, 5); unsigned offset = fieldFromInstruction(insn, 10, 12); - const AArch64Disassembler *Dis = - static_cast(Decoder); switch (Inst.getOpcode()) { default: @@ -1034,14 +1030,14 @@ static DecodeStatus DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn, } DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - if (!Dis->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(offset)); return Success; } static DecodeStatus DecodeSignedLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Rt = fieldFromInstruction(insn, 0, 5); unsigned Rn = fieldFromInstruction(insn, 5, 5); int64_t offset = fieldFromInstruction(insn, 12, 9); @@ -1237,9 +1233,9 @@ static DecodeStatus DecodeSignedLdStInstruction(MCInst &Inst, uint32_t insn, return Success; } -static DecodeStatus DecodeExclusiveLdStInstruction(MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeExclusiveLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, + const MCDisassembler *Decoder) { unsigned Rt = fieldFromInstruction(insn, 0, 5); unsigned Rn = fieldFromInstruction(insn, 5, 5); unsigned Rt2 = fieldFromInstruction(insn, 10, 5); @@ -1322,7 +1318,7 @@ static DecodeStatus DecodeExclusiveLdStInstruction(MCInst &Inst, uint32_t insn, static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Rt = fieldFromInstruction(insn, 0, 5); unsigned Rn = fieldFromInstruction(insn, 5, 5); unsigned Rt2 = fieldFromInstruction(insn, 10, 5); @@ -1456,7 +1452,7 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn, static DecodeStatus DecodeAuthLoadInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Rt = fieldFromInstruction(insn, 0, 5); unsigned Rn = fieldFromInstruction(insn, 5, 5); uint64_t offset = fieldFromInstruction(insn, 22, 1) << 9 | @@ -1489,7 +1485,7 @@ static DecodeStatus DecodeAuthLoadInstruction(MCInst &Inst, uint32_t insn, static DecodeStatus DecodeAddSubERegInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Rd = fieldFromInstruction(insn, 0, 5); unsigned Rn = fieldFromInstruction(insn, 5, 5); unsigned Rm = fieldFromInstruction(insn, 16, 5); @@ -1546,7 +1542,7 @@ static DecodeStatus DecodeAddSubERegInstruction(MCInst &Inst, uint32_t insn, static DecodeStatus DecodeLogicalImmInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Rd = fieldFromInstruction(insn, 0, 5); unsigned Rn = fieldFromInstruction(insn, 5, 5); unsigned Datasize = fieldFromInstruction(insn, 31, 1); @@ -1577,7 +1573,7 @@ static DecodeStatus DecodeLogicalImmInstruction(MCInst &Inst, uint32_t insn, static DecodeStatus DecodeModImmInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Rd = fieldFromInstruction(insn, 0, 5); unsigned cmode = fieldFromInstruction(insn, 12, 4); unsigned imm = fieldFromInstruction(insn, 16, 3) << 5; @@ -1616,7 +1612,7 @@ static DecodeStatus DecodeModImmInstruction(MCInst &Inst, uint32_t insn, static DecodeStatus DecodeModImmTiedInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Rd = fieldFromInstruction(insn, 0, 5); unsigned cmode = fieldFromInstruction(insn, 12, 4); unsigned imm = fieldFromInstruction(insn, 16, 3) << 5; @@ -1633,26 +1629,26 @@ static DecodeStatus DecodeModImmTiedInstruction(MCInst &Inst, uint32_t insn, } static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { unsigned Rd = fieldFromInstruction(insn, 0, 5); int64_t imm = fieldFromInstruction(insn, 5, 19) << 2; imm |= fieldFromInstruction(insn, 29, 2); - const AArch64Disassembler *Dis = - static_cast(Decoder); // Sign-extend the 21-bit immediate. if (imm & (1 << (21 - 1))) imm |= ~((1LL << 21) - 1); DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - if (!Dis->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(imm)); return Success; } static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { unsigned Rd = fieldFromInstruction(insn, 0, 5); unsigned Rn = fieldFromInstruction(insn, 5, 5); unsigned Imm = fieldFromInstruction(insn, 10, 14); @@ -1661,8 +1657,6 @@ static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn, unsigned ShifterVal = (Imm >> 12) & 3; unsigned ImmVal = Imm & 0xFFF; - const AArch64Disassembler *Dis = - static_cast(Decoder); if (ShifterVal != 0 && ShifterVal != 1) return Fail; @@ -1681,7 +1675,7 @@ static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn, DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); } - if (!Dis->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(ImmVal)); Inst.addOperand(MCOperand::createImm(12 * ShifterVal)); return Success; @@ -1689,24 +1683,22 @@ static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn, static DecodeStatus DecodeUnconditionalBranch(MCInst &Inst, uint32_t insn, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { int64_t imm = fieldFromInstruction(insn, 0, 26); - const AArch64Disassembler *Dis = - static_cast(Decoder); // Sign-extend the 26-bit immediate. if (imm & (1 << (26 - 1))) imm |= ~((1LL << 26) - 1); - if (!Dis->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(imm)); return Success; } -static DecodeStatus DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, + const MCDisassembler *Decoder) { uint64_t op1 = fieldFromInstruction(insn, 16, 3); uint64_t op2 = fieldFromInstruction(insn, 5, 3); uint64_t crm = fieldFromInstruction(insn, 8, 4); @@ -1726,22 +1718,20 @@ static DecodeStatus DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn, Inst.addOperand(MCOperand::createImm(pstate_field)); Inst.addOperand(MCOperand::createImm(crm)); - const AArch64Disassembler *Dis = - static_cast(Decoder); auto PState = AArch64PState::lookupPStateByEncoding(pstate_field); - if (PState && PState->haveFeatures(Dis->getSubtargetInfo().getFeatureBits())) + if (PState && + PState->haveFeatures(Decoder->getSubtargetInfo().getFeatureBits())) return Success; return Fail; } static DecodeStatus DecodeTestAndBranch(MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { uint64_t Rt = fieldFromInstruction(insn, 0, 5); uint64_t bit = fieldFromInstruction(insn, 31, 1) << 5; bit |= fieldFromInstruction(insn, 19, 5); int64_t dst = fieldFromInstruction(insn, 5, 14); - const AArch64Disassembler *Dis = - static_cast(Decoder); // Sign-extend 14-bit immediate. if (dst & (1 << (14 - 1))) @@ -1752,17 +1742,16 @@ static DecodeStatus DecodeTestAndBranch(MCInst &Inst, uint32_t insn, else DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); Inst.addOperand(MCOperand::createImm(bit)); - if (!Dis->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(dst)); return Success; } -static DecodeStatus DecodeGPRSeqPairsClassRegisterClass(MCInst &Inst, - unsigned RegClassID, - unsigned RegNo, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeGPRSeqPairsClassRegisterClass(MCInst &Inst, unsigned RegClassID, + unsigned RegNo, uint64_t Addr, + const MCDisassembler *Decoder) { // Register number must be even (see CASP instruction) if (RegNo & 0x1) return Fail; @@ -1772,27 +1761,25 @@ static DecodeStatus DecodeGPRSeqPairsClassRegisterClass(MCInst &Inst, return Success; } -static DecodeStatus DecodeWSeqPairsClassRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeWSeqPairsClassRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, + const MCDisassembler *Decoder) { return DecodeGPRSeqPairsClassRegisterClass(Inst, AArch64::WSeqPairsClassRegClassID, RegNo, Addr, Decoder); } -static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeXSeqPairsClassRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, + const MCDisassembler *Decoder) { return DecodeGPRSeqPairsClassRegisterClass(Inst, AArch64::XSeqPairsClassRegClassID, RegNo, Addr, Decoder); } -static DecodeStatus DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, + const MCDisassembler *Decoder) { unsigned Zdn = fieldFromInstruction(insn, 0, 5); unsigned imm = fieldFromInstruction(insn, 5, 13); if (!AArch64_AM::isValidDecodeLogicalImmediate(imm, 64)) @@ -1808,7 +1795,7 @@ static DecodeStatus DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn, template static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (Imm & ~((1LL << Bits) - 1)) return Fail; @@ -1822,8 +1809,8 @@ static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address, // Decode 8-bit signed/unsigned immediate for a given element width. template -static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { +static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm, uint64_t Addr, + const MCDisassembler *Decoder) { unsigned Val = (uint8_t)Imm; unsigned Shift = (Imm & 0x100) ? 8 : 0; if (ElementWidth == 8 && Shift) @@ -1835,13 +1822,14 @@ static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm, // Decode uimm4 ranged from 1-16. static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(Imm + 1)); return Success; } static DecodeStatus DecodeSVCROp(MCInst &Inst, unsigned Imm, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (AArch64SVCR::lookupSVCRByEncoding(Imm)) { Inst.addOperand(MCOperand::createImm(Imm)); return Success; @@ -1851,7 +1839,7 @@ static DecodeStatus DecodeSVCROp(MCInst &Inst, unsigned Imm, uint64_t Address, static DecodeStatus DecodeCPYMemOpInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Rd = fieldFromInstruction(insn, 0, 5); unsigned Rs = fieldFromInstruction(insn, 16, 5); unsigned Rn = fieldFromInstruction(insn, 5, 5); @@ -1876,7 +1864,7 @@ static DecodeStatus DecodeCPYMemOpInstruction(MCInst &Inst, uint32_t insn, static DecodeStatus DecodeSETMemOpInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Rd = fieldFromInstruction(insn, 0, 5); unsigned Rm = fieldFromInstruction(insn, 16, 5); unsigned Rn = fieldFromInstruction(insn, 5, 5); diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h index 374a89edcb74..6761d449a7f4 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h @@ -13,13 +13,17 @@ #define LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64DISASSEMBLER_H #include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstrInfo.h" namespace llvm { class AArch64Disassembler : public MCDisassembler { + std::unique_ptr const MCII; + public: - AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx) - : MCDisassembler(STI, Ctx) {} + AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx, + MCInstrInfo const *MCII) + : MCDisassembler(STI, Ctx), MCII(MCII) {} ~AArch64Disassembler() override = default; diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp index 5b6f06f8dbb4..11964b2075e5 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp @@ -60,7 +60,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) { /// an operand to the MCInst and Fail otherwise. bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, - bool IsBranch, uint64_t Offset, uint64_t InstSize) { + bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) { if (!SymbolLookUp) return false; // FIXME: This method shares a lot of code with @@ -73,8 +73,8 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( SymbolicOp.Value = Value; uint64_t ReferenceType; const char *ReferenceName; - if (!GetOpInfo || - !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { + if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize, + 1, &SymbolicOp)) { if (IsBranch) { ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h index dc72331660cc..ca677db49739 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h @@ -29,7 +29,8 @@ public: bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, bool IsBranch, - uint64_t Offset, uint64_t InstSize) override; + uint64_t Offset, uint64_t OpSize, + uint64_t InstSize) override; }; } // namespace llvm diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 097b93e4fcca..89e1d85a6085 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -18,6 +18,7 @@ #include "AArch64Subtarget.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ObjCARCUtil.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -1058,10 +1059,10 @@ bool AArch64CallLowering::lowerTailCall( // If Callee is a reg, since it is used by a target specific instruction, // it must have a register class matching the constraint of that instruction. - if (Info.Callee.isReg()) + if (MIB->getOperand(0).isReg()) constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), *MF.getSubtarget().getRegBankInfo(), *MIB, - MIB->getDesc(), Info.Callee, 0); + MIB->getDesc(), MIB->getOperand(0), 0); MF.getFrameInfo().setHasTailCall(); Info.LoweredTailCall = true; @@ -1127,14 +1128,39 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // Create a temporarily-floating call instruction so we can add the implicit // uses of arg registers. - unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false); + + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + unsigned Opc = 0; + // Calls with operand bundle "clang.arc.attachedcall" are special. They should + // be expanded to the call, directly followed by a special marker sequence and + // a call to an ObjC library function. + if (Info.CB && objcarc::hasAttachedCallOpBundle(Info.CB)) + Opc = AArch64::BLR_RVMARKER; + // A call to a returns twice function like setjmp must be followed by a bti + // instruction. + else if (Info.CB && + Info.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && + !Subtarget.noBTIAtReturnTwice() && + MF.getInfo()->branchTargetEnforcement()) + Opc = AArch64::BLR_BTI; + else + Opc = getCallOpcode(MF, Info.Callee.isReg(), false); auto MIB = MIRBuilder.buildInstrNoInsert(Opc); + unsigned CalleeOpNo = 0; + + if (Opc == AArch64::BLR_RVMARKER) { + // Add a target global address for the retainRV/claimRV runtime function + // just before the call target. + Function *ARCFn = *objcarc::getAttachedARCFunction(Info.CB); + MIB.addGlobalAddress(ARCFn); + ++CalleeOpNo; + } + MIB.add(Info.Callee); // Tell the call which registers are clobbered. const uint32_t *Mask; - const AArch64Subtarget &Subtarget = MF.getSubtarget(); const auto *TRI = Subtarget.getRegisterInfo(); AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, @@ -1160,10 +1186,10 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // If Callee is a reg, since it is used by a target specific // instruction, it must have a register class matching the // constraint of that instruction. - if (Info.Callee.isReg()) + if (MIB->getOperand(CalleeOpNo).isReg()) constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(), *Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(), - Info.Callee, 0); + MIB->getOperand(CalleeOpNo), CalleeOpNo); // Finally we can copy the returned value back into its virtual-register. In // symmetry with the arguments, the physical register must be an diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 703e356f016d..9a65687735fe 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -21,13 +21,16 @@ #include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/Optional.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -38,9 +41,9 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" -#include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -62,6 +65,7 @@ namespace { #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATE_BITSET + class AArch64InstructionSelector : public InstructionSelector { public: AArch64InstructionSelector(const AArch64TargetMachine &TM, @@ -293,6 +297,20 @@ private: emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; + /// Emit expression as a conjunction (a series of CCMP/CFCMP ops). + /// In some cases this is even possible with OR operations in the expression. + MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC, + MachineIRBuilder &MIB) const; + MachineInstr *emitConditionalComparison(Register LHS, Register RHS, + CmpInst::Predicate CC, + AArch64CC::CondCode Predicate, + AArch64CC::CondCode OutCC, + MachineIRBuilder &MIB) const; + MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC, + bool Negate, Register CCOp, + AArch64CC::CondCode Predicate, + MachineIRBuilder &MIB) const; + /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. /// \p IsNegative is true if the test should be "not zero". /// This will also optimize the test bit instruction when possible. @@ -419,12 +437,16 @@ private: int OpIdx = -1) const; void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx = -1) const; + void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx = -1) const; // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags); // Optimization methods. - bool tryOptSelect(MachineInstr &MI); + bool tryOptSelect(GSelect &Sel); + bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI); MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const; @@ -485,9 +507,11 @@ AArch64InstructionSelector::AArch64InstructionSelector( // FIXME: This should be target-independent, inferred from the types declared // for each class in the bank. +// +/// Given a register bank, and a type, return the smallest register class that +/// can represent that combination. static const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, - const RegisterBankInfo &RBI, bool GetAllRegSet = false) { if (RB.getID() == AArch64::GPRRegBankID) { if (Ty.getSizeInBits() <= 32) @@ -828,39 +852,6 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, return GenericOpc; } -#ifndef NDEBUG -/// Helper function that verifies that we have a valid copy at the end of -/// selectCopy. Verifies that the source and dest have the expected sizes and -/// then returns true. -static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, - const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { - const Register DstReg = I.getOperand(0).getReg(); - const Register SrcReg = I.getOperand(1).getReg(); - const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); - const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); - - // Make sure the size of the source and dest line up. - assert( - (DstSize == SrcSize || - // Copies are a mean to setup initial types, the number of - // bits may not exactly match. - (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || - // Copies are a mean to copy bits around, as long as we are - // on the same register class, that's fine. Otherwise, that - // means we need some SUBREG_TO_REG or AND & co. - (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && - "Copy with different width?!"); - - // Check the size of the destination. - assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && - "GPRs cannot get more than 64-bit width values"); - - return true; -} -#endif - /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg /// to \p *To. /// @@ -935,31 +926,6 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return false; } - // A couple helpers below, for making sure that the copy we produce is valid. - - // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want - // to verify that the src and dst are the same size, since that's handled by - // the SUBREG_TO_REG. - bool KnownValid = false; - - // Returns true, or asserts if something we don't expect happens. Instead of - // returning true, we return isValidCopy() to ensure that we verify the - // result. - auto CheckCopy = [&]() { - // If we have a bitcast or something, we can't have physical registers. - assert((I.isCopy() || - (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && - !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && - "No phys reg on generic operator!"); - bool ValidCopy = true; -#ifndef NDEBUG - ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI); - assert(ValidCopy && "Invalid copy."); -#endif - (void)KnownValid; - return ValidCopy; - }; - // Is this a copy? If so, then we may need to insert a subregister copy. if (I.isCopy()) { // Yes. Check if there's anything to fix up. @@ -1004,15 +970,12 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, .addImm(SubReg); MachineOperand &RegOp = I.getOperand(1); RegOp.setReg(PromoteReg); - - // Promise that the copy is implicitly validated by the SUBREG_TO_REG. - KnownValid = true; } // If the destination is a physical register, then there's nothing to // change, so we're done. if (Register::isPhysicalRegister(DstReg)) - return CheckCopy(); + return true; } // No need to constrain SrcReg. It will get constrained when we hit another @@ -1032,7 +995,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, } I.setDesc(TII.get(AArch64::COPY)); - return CheckCopy(); + return true; } static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { @@ -1309,6 +1272,90 @@ static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { } } +/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC. +static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, + AArch64CC::CondCode &CondCode, + AArch64CC::CondCode &CondCode2) { + CondCode2 = AArch64CC::AL; + switch (CC) { + default: + llvm_unreachable("Unknown FP condition!"); + case CmpInst::FCMP_OEQ: + CondCode = AArch64CC::EQ; + break; + case CmpInst::FCMP_OGT: + CondCode = AArch64CC::GT; + break; + case CmpInst::FCMP_OGE: + CondCode = AArch64CC::GE; + break; + case CmpInst::FCMP_OLT: + CondCode = AArch64CC::MI; + break; + case CmpInst::FCMP_OLE: + CondCode = AArch64CC::LS; + break; + case CmpInst::FCMP_ONE: + CondCode = AArch64CC::MI; + CondCode2 = AArch64CC::GT; + break; + case CmpInst::FCMP_ORD: + CondCode = AArch64CC::VC; + break; + case CmpInst::FCMP_UNO: + CondCode = AArch64CC::VS; + break; + case CmpInst::FCMP_UEQ: + CondCode = AArch64CC::EQ; + CondCode2 = AArch64CC::VS; + break; + case CmpInst::FCMP_UGT: + CondCode = AArch64CC::HI; + break; + case CmpInst::FCMP_UGE: + CondCode = AArch64CC::PL; + break; + case CmpInst::FCMP_ULT: + CondCode = AArch64CC::LT; + break; + case CmpInst::FCMP_ULE: + CondCode = AArch64CC::LE; + break; + case CmpInst::FCMP_UNE: + CondCode = AArch64CC::NE; + break; + } +} + +/// Convert an IR fp condition code to an AArch64 CC. +/// This differs from changeFPCCToAArch64CC in that it returns cond codes that +/// should be AND'ed instead of OR'ed. +static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, + AArch64CC::CondCode &CondCode, + AArch64CC::CondCode &CondCode2) { + CondCode2 = AArch64CC::AL; + switch (CC) { + default: + changeFPCCToORAArch64CC(CC, CondCode, CondCode2); + assert(CondCode2 == AArch64CC::AL); + break; + case CmpInst::FCMP_ONE: + // (a one b) + // == ((a olt b) || (a ogt b)) + // == ((a ord b) && (a une b)) + CondCode = AArch64CC::VC; + CondCode2 = AArch64CC::NE; + break; + case CmpInst::FCMP_UEQ: + // (a ueq b) + // == ((a uno b) || (a oeq b)) + // == ((a ule b) && (a uge b)) + CondCode = AArch64CC::PL; + CondCode2 = AArch64CC::LE; + break; + } +} + /// Return a register which can be used as a bit to test in a TB(N)Z. static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI) { @@ -1703,7 +1750,6 @@ static Optional getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI) { assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"); MachineInstr *OpMI = MRI.getVRegDef(Reg); - assert(OpMI && "Expected to find a vreg def for vector shift operand"); return getAArch64VectorSplatScalar(*OpMI, MRI); } @@ -1810,7 +1856,7 @@ bool AArch64InstructionSelector::selectVectorAshrLshr( unsigned Opc = 0; unsigned NegOpc = 0; const TargetRegisterClass *RC = - getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI); + getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID)); if (Ty == LLT::fixed_vector(2, 64)) { Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; NegOpc = AArch64::NEGv2i64; @@ -2266,6 +2312,16 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { I.eraseFromParent(); return true; } + case TargetOpcode::G_FENCE: { + if (I.getOperand(1).getImm() == 0) + BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CompilerBarrier)) + .addImm(I.getOperand(0).getImm()); + else + BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::DMB)) + .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb); + I.eraseFromParent(); + return true; + } default: return false; } @@ -2279,8 +2335,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - const AArch64Subtarget *Subtarget = - &static_cast(MF.getSubtarget()); + const AArch64Subtarget *Subtarget = &MF.getSubtarget(); if (Subtarget->requiresStrictAlign()) { // We don't support this feature yet. LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n"); @@ -2312,7 +2367,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return false; } const RegisterBank &RB = *RegClassOrBank.get(); - DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI); + DefRC = getRegClassForTypeOnBank(DefTy, RB); if (!DefRC) { LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); return false; @@ -2488,7 +2543,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { // The case when we have 0.0 is covered by tablegen. Reject it here so we // can be sure tablegen works correctly and isn't rescued by this code. - // 0.0 is not covered by tablegen for FP128. So we will handle this + // 0.0 is not covered by tablegen for FP128. So we will handle this // scenario in the code here. if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0)) return false; @@ -2510,7 +2565,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } if (isFP) { - const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize); + const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB); // For 16, 64, and 128b values, emit a constant pool load. switch (DefSize) { default: @@ -2735,12 +2790,18 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return false; if (isa(LdSt)) { - static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH, - AArch64::LDARW, AArch64::LDARX}; + static constexpr unsigned LDAPROpcodes[] = { + AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX}; + static constexpr unsigned LDAROpcodes[] = { + AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX}; + ArrayRef Opcodes = + STI.hasLDAPR() && Order != AtomicOrdering::SequentiallyConsistent + ? LDAPROpcodes + : LDAROpcodes; I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); } else { - static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH, - AArch64::STLRW, AArch64::STLRX}; + static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH, + AArch64::STLRW, AArch64::STLRX}; Register ValReg = LdSt.getReg(0); if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) { // Emit a subreg copy of 32 bits. @@ -2774,7 +2835,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { if (isa(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { unsigned SubReg; LLT MemTy = LdSt.getMMO().getMemoryType(); - auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI); + auto *RC = getRegClassForTypeOnBank(MemTy, RB); if (!getSubRegForClass(RC, TRI, SubReg)) return false; @@ -2790,7 +2851,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { if (RB.getID() == AArch64::FPRRegBankID) { unsigned SubReg; LLT MemTy = LdSt.getMMO().getMemoryType(); - auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI); + auto *RC = getRegClassForTypeOnBank(MemTy, RB); if (!getSubRegForClass(RC, TRI, SubReg)) return false; Register OldDst = LdSt.getReg(0); @@ -2804,7 +2865,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { .addImm(0) .addUse(NewDst) .addImm(SubReg); - auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI); + auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB); RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI); MIB.setInstr(LdSt); } @@ -2934,8 +2995,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && ShiftTy.getSizeInBits() == 64) { assert(!ShiftTy.isVector() && "unexpected vector shift ty"); - assert(MRI.getVRegDef(ShiftReg) && - "could not find a vreg definition for shift amount"); // Insert a subregister copy to implement a 64->32 trunc auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) .addReg(ShiftReg, 0, AArch64::sub_32); @@ -2944,10 +3003,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } } LLVM_FALLTHROUGH; - case TargetOpcode::G_FADD: - case TargetOpcode::G_FSUB: - case TargetOpcode::G_FMUL: - case TargetOpcode::G_FDIV: case TargetOpcode::G_OR: { // Reject the various things we don't support yet. if (unsupportedBinOp(I, RBI, MRI, TRI)) @@ -3026,13 +3081,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } if (DstRB.getID() == AArch64::GPRRegBankID) { - const TargetRegisterClass *DstRC = - getRegClassForTypeOnBank(DstTy, DstRB, RBI); + const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB); if (!DstRC) return false; - const TargetRegisterClass *SrcRC = - getRegClassForTypeOnBank(SrcTy, SrcRB, RBI); + const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB); if (!SrcRC) return false; @@ -3270,6 +3323,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { I.setDesc(TII.get(NewOpc)); constrainSelectedInstRegOperands(I, TII, TRI, RBI); + I.setFlags(MachineInstr::NoFPExcept); return true; } @@ -3291,17 +3345,18 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return selectCopy(I, TII, MRI, TRI, RBI); case TargetOpcode::G_SELECT: { - if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) { + auto &Sel = cast(I); + if (MRI.getType(Sel.getCondReg()) != LLT::scalar(1)) { LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty << ", expected: " << LLT::scalar(1) << '\n'); return false; } - const Register CondReg = I.getOperand(1).getReg(); - const Register TReg = I.getOperand(2).getReg(); - const Register FReg = I.getOperand(3).getReg(); + const Register CondReg = Sel.getCondReg(); + const Register TReg = Sel.getTrueReg(); + const Register FReg = Sel.getFalseReg(); - if (tryOptSelect(I)) + if (tryOptSelect(Sel)) return true; // Make sure to use an unused vreg instead of wzr, so that the peephole @@ -3310,9 +3365,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); - if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB)) + if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB)) return false; - I.eraseFromParent(); + Sel.eraseFromParent(); return true; } case TargetOpcode::G_ICMP: { @@ -3357,8 +3412,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); const Register DstReg = I.getOperand(0).getReg(); const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); - const TargetRegisterClass *DstRC = - getRegClassForTypeOnBank(DstTy, DstRB, RBI); + const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB); RBI.constrainGenericRegister(DstReg, *DstRC, MRI); return true; } @@ -3871,7 +3925,7 @@ bool AArch64InstructionSelector::selectVectorICmp( const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); const TargetRegisterClass *SrcRC = - getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true); + getRegClassForTypeOnBank(SrcTy, VecRB, true); if (!SrcRC) { LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); return false; @@ -4037,7 +4091,7 @@ MachineInstr *AArch64InstructionSelector::emitExtractVectorElt( } const TargetRegisterClass *DstRC = - getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true); + getRegClassForTypeOnBank(ScalarTy, DstRB, true); if (!DstRC) { LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n"); return nullptr; @@ -4046,7 +4100,7 @@ MachineInstr *AArch64InstructionSelector::emitExtractVectorElt( const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); const LLT &VecTy = MRI.getType(VecReg); const TargetRegisterClass *VecRC = - getRegClassForTypeOnBank(VecTy, VecRB, RBI, true); + getRegClassForTypeOnBank(VecTy, VecRB, true); if (!VecRC) { LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); return nullptr; @@ -4205,9 +4259,9 @@ bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I, } else { // No. We have to perform subregister inserts. For each insert, create an // implicit def and a subregister insert, and save the register we create. - const TargetRegisterClass *RC = - getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI), - WideTy.getScalarSizeInBits() * NumElts); + const TargetRegisterClass *RC = getRegClassForTypeOnBank( + LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()), + *RBI.getRegBank(SrcReg, MRI, TRI)); unsigned SubReg = 0; bool Found = getSubRegForClass(RC, TRI, SubReg); (void)Found; @@ -4594,6 +4648,7 @@ AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS, // Partially build the compare. Decide if we need to add a use for the // third operand based off whether or not we're comparing against 0.0. auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS); + CmpMI.setMIFlags(MachineInstr::NoFPExcept); if (!ShouldUseImm) CmpMI.addUse(RHS); constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); @@ -4632,7 +4687,7 @@ MachineInstr *AArch64InstructionSelector::emitVectorConcat( const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits()); const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); const TargetRegisterClass *DstRC = - getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2); + getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank); MachineInstr *WidenedOp1 = emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder); @@ -4701,7 +4756,256 @@ AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, } } -bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) { +/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be +/// expressed as a conjunction. +/// \param CanNegate Set to true if we can negate the whole sub-tree just by +/// changing the conditions on the CMP tests. +/// (this means we can call emitConjunctionRec() with +/// Negate==true on this sub-tree) +/// \param MustBeFirst Set to true if this subtree needs to be negated and we +/// cannot do the negation naturally. We are required to +/// emit the subtree first in this case. +/// \param WillNegate Is true if are called when the result of this +/// subexpression must be negated. This happens when the +/// outer expression is an OR. We can use this fact to know +/// that we have a double negation (or (or ...) ...) that +/// can be implemented for free. +static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, + bool WillNegate, MachineRegisterInfo &MRI, + unsigned Depth = 0) { + if (!MRI.hasOneNonDBGUse(Val)) + return false; + MachineInstr *ValDef = MRI.getVRegDef(Val); + unsigned Opcode = ValDef->getOpcode(); + if (Opcode == TargetOpcode::G_TRUNC) { + // Look through a trunc. + Val = ValDef->getOperand(1).getReg(); + ValDef = MRI.getVRegDef(Val); + Opcode = ValDef->getOpcode(); + } + if (isa(ValDef)) { + CanNegate = true; + MustBeFirst = false; + return true; + } + // Protect against exponential runtime and stack overflow. + if (Depth > 6) + return false; + if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) { + bool IsOR = Opcode == TargetOpcode::G_OR; + Register O0 = ValDef->getOperand(1).getReg(); + Register O1 = ValDef->getOperand(2).getReg(); + bool CanNegateL; + bool MustBeFirstL; + if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1)) + return false; + bool CanNegateR; + bool MustBeFirstR; + if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1)) + return false; + + if (MustBeFirstL && MustBeFirstR) + return false; + + if (IsOR) { + // For an OR expression we need to be able to naturally negate at least + // one side or we cannot do the transformation at all. + if (!CanNegateL && !CanNegateR) + return false; + // If we the result of the OR will be negated and we can naturally negate + // the leaves, then this sub-tree as a whole negates naturally. + CanNegate = WillNegate && CanNegateL && CanNegateR; + // If we cannot naturally negate the whole sub-tree, then this must be + // emitted first. + MustBeFirst = !CanNegate; + } else { + assert(Opcode == TargetOpcode::G_AND && "Must be G_AND"); + // We cannot naturally negate an AND operation. + CanNegate = false; + MustBeFirst = MustBeFirstL || MustBeFirstR; + } + return true; + } + return false; +} + +MachineInstr *AArch64InstructionSelector::emitConditionalComparison( + Register LHS, Register RHS, CmpInst::Predicate CC, + AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, + MachineIRBuilder &MIB) const { + // TODO: emit CMN as an optimization. + auto &MRI = *MIB.getMRI(); + LLT OpTy = MRI.getType(LHS); + assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64); + unsigned CCmpOpc; + if (CmpInst::isIntPredicate(CC)) { + CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr; + } else { + switch (OpTy.getSizeInBits()) { + case 16: + CCmpOpc = AArch64::FCCMPHrr; + break; + case 32: + CCmpOpc = AArch64::FCCMPSrr; + break; + case 64: + CCmpOpc = AArch64::FCCMPDrr; + break; + default: + return nullptr; + } + } + AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); + auto CCmp = + MIB.buildInstr(CCmpOpc, {}, {LHS, RHS}).addImm(NZCV).addImm(Predicate); + constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI); + return &*CCmp; +} + +MachineInstr *AArch64InstructionSelector::emitConjunctionRec( + Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp, + AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const { + // We're at a tree leaf, produce a conditional comparison operation. + auto &MRI = *MIB.getMRI(); + MachineInstr *ValDef = MRI.getVRegDef(Val); + unsigned Opcode = ValDef->getOpcode(); + if (Opcode == TargetOpcode::G_TRUNC) { + // Look through a trunc. + Val = ValDef->getOperand(1).getReg(); + ValDef = MRI.getVRegDef(Val); + Opcode = ValDef->getOpcode(); + } + if (auto *Cmp = dyn_cast(ValDef)) { + Register LHS = Cmp->getLHSReg(); + Register RHS = Cmp->getRHSReg(); + CmpInst::Predicate CC = Cmp->getCond(); + if (Negate) + CC = CmpInst::getInversePredicate(CC); + if (isa(Cmp)) { + OutCC = changeICMPPredToAArch64CC(CC); + } else { + // Handle special FP cases. + AArch64CC::CondCode ExtraCC; + changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC); + // Some floating point conditions can't be tested with a single condition + // code. Construct an additional comparison in this case. + if (ExtraCC != AArch64CC::AL) { + MachineInstr *ExtraCmp; + if (!CCOp) + ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC); + else + ExtraCmp = + emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB); + CCOp = ExtraCmp->getOperand(0).getReg(); + Predicate = ExtraCC; + } + } + + // Produce a normal comparison if we are first in the chain + if (!CCOp) { + auto Dst = MRI.cloneVirtualRegister(LHS); + if (isa(Cmp)) + return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB); + return emitFPCompare(Cmp->getOperand(2).getReg(), + Cmp->getOperand(3).getReg(), MIB); + } + // Otherwise produce a ccmp. + return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB); + } + assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree"); + + bool IsOR = Opcode == TargetOpcode::G_OR; + + Register LHS = ValDef->getOperand(1).getReg(); + bool CanNegateL; + bool MustBeFirstL; + bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI); + assert(ValidL && "Valid conjunction/disjunction tree"); + (void)ValidL; + + Register RHS = ValDef->getOperand(2).getReg(); + bool CanNegateR; + bool MustBeFirstR; + bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI); + assert(ValidR && "Valid conjunction/disjunction tree"); + (void)ValidR; + + // Swap sub-tree that must come first to the right side. + if (MustBeFirstL) { + assert(!MustBeFirstR && "Valid conjunction/disjunction tree"); + std::swap(LHS, RHS); + std::swap(CanNegateL, CanNegateR); + std::swap(MustBeFirstL, MustBeFirstR); + } + + bool NegateR; + bool NegateAfterR; + bool NegateL; + bool NegateAfterAll; + if (Opcode == TargetOpcode::G_OR) { + // Swap the sub-tree that we can negate naturally to the left. + if (!CanNegateL) { + assert(CanNegateR && "at least one side must be negatable"); + assert(!MustBeFirstR && "invalid conjunction/disjunction tree"); + assert(!Negate); + std::swap(LHS, RHS); + NegateR = false; + NegateAfterR = true; + } else { + // Negate the left sub-tree if possible, otherwise negate the result. + NegateR = CanNegateR; + NegateAfterR = !CanNegateR; + } + NegateL = true; + NegateAfterAll = !Negate; + } else { + assert(Opcode == TargetOpcode::G_AND && + "Valid conjunction/disjunction tree"); + assert(!Negate && "Valid conjunction/disjunction tree"); + + NegateL = false; + NegateR = false; + NegateAfterR = false; + NegateAfterAll = false; + } + + // Emit sub-trees. + AArch64CC::CondCode RHSCC; + MachineInstr *CmpR = + emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB); + if (NegateAfterR) + RHSCC = AArch64CC::getInvertedCondCode(RHSCC); + MachineInstr *CmpL = emitConjunctionRec( + LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB); + if (NegateAfterAll) + OutCC = AArch64CC::getInvertedCondCode(OutCC); + return CmpL; +} + +MachineInstr *AArch64InstructionSelector::emitConjunction( + Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const { + bool DummyCanNegate; + bool DummyMustBeFirst; + if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false, + *MIB.getMRI())) + return nullptr; + return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB); +} + +bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI, + MachineInstr &CondMI) { + AArch64CC::CondCode AArch64CC; + MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB); + if (!ConjMI) + return false; + + emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB); + SelI.eraseFromParent(); + return true; +} + +bool AArch64InstructionSelector::tryOptSelect(GSelect &I) { MachineRegisterInfo &MRI = *MIB.getMRI(); // We want to recognize this pattern: // @@ -4750,12 +5054,12 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) { } // Is the condition defined by a compare? - if (!CondDef) - return false; - unsigned CondOpc = CondDef->getOpcode(); - if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) + if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) { + if (tryOptSelectConjunction(I, *CondDef)) + return true; return false; + } AArch64CC::CondCode CondCode; if (CondOpc == TargetOpcode::G_ICMP) { @@ -5081,7 +5385,7 @@ bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I, // the original size to get the result we want. Register DemoteVec = InsMI->getOperand(0).getReg(); const TargetRegisterClass *RC = - getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize); + getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DemoteVec, MRI, TRI)); if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); return false; @@ -5198,12 +5502,11 @@ bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg( })) return false; unsigned SubReg; - const TargetRegisterClass *EltRC = - getMinClassForRegBank(EltRB, EltTy.getSizeInBits()); + const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB); if (!EltRC) return false; const TargetRegisterClass *DstRC = - getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits()); + getRegClassForTypeOnBank(MRI.getType(Dst), DstRB); if (!DstRC) return false; if (!getSubRegForClass(EltRC, TRI, SubReg)) @@ -5261,7 +5564,7 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I, if (DstSize < 128) { // Force this to be FPR using the destination vector. const TargetRegisterClass *RC = - getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize); + getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI)); if (!RC) return false; if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { @@ -5528,7 +5831,7 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, uint64_t Key = I.getOperand(3).getImm(); Register DiscReg = I.getOperand(4).getReg(); auto DiscVal = getIConstantVRegVal(DiscReg, MRI); - bool IsDiscZero = DiscVal.hasValue() && DiscVal->isNullValue(); + bool IsDiscZero = DiscVal && DiscVal->isNullValue(); if (Key > 3) return false; @@ -5777,8 +6080,6 @@ AArch64InstructionSelector::selectExtendedSHL( MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg()); - if (!OffsetInst) - return None; unsigned OffsetOpc = OffsetInst->getOpcode(); bool LookedThroughZExt = false; @@ -5932,7 +6233,7 @@ AArch64InstructionSelector::selectAddrModeRegisterOffset( // We need a GEP. MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); - if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD) + if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD) return None; // If this is used more than once, let's not bother folding. @@ -6112,14 +6413,12 @@ AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, return None; MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); - if (!RootDef) - return None; MachineOperand &OffImm = RootDef->getOperand(2); if (!OffImm.isReg()) return None; MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); - if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) + if (RHS->getOpcode() != TargetOpcode::G_CONSTANT) return None; int64_t RHSC; MachineOperand &RHSOp1 = RHS->getOperand(1); @@ -6187,9 +6486,6 @@ AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, return None; MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); - if (!RootDef) - return None; - if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { return {{ [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, @@ -6210,27 +6506,26 @@ AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, MachineOperand &RHS = RootDef->getOperand(2); MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); - if (LHSDef && RHSDef) { - int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); - unsigned Scale = Log2_32(Size); - if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { - if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) - return {{ - [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, - }}; + int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); + unsigned Scale = Log2_32(Size); + if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { + if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) return {{ - [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, + [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, }}; - } + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, + }}; } } // Before falling back to our general case, check if the unscaled // instructions can handle this. If so, that's preferable. - if (selectAddrModeUnscaled(Root, Size).hasValue()) + if (selectAddrModeUnscaled(Root, Size)) return None; return {{ @@ -6269,8 +6564,6 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root, // Check if the operand is defined by an instruction which corresponds to // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); - if (!ShiftInst) - return None; AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); if (ShType == AArch64_AM::InvalidShiftExtend) return None; @@ -6425,7 +6718,7 @@ AArch64InstructionSelector::selectArithExtendedRegister( // to. if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); - if (ExtInst && isDef32(*ExtInst)) + if (isDef32(*ExtInst)) return None; } } @@ -6450,7 +6743,7 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, Optional CstVal = getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI); assert(CstVal && "Expected constant value"); - MIB.addImm(CstVal.getValue()); + MIB.addImm(*CstVal); } void AArch64InstructionSelector::renderLogicalImm32( @@ -6498,6 +6791,17 @@ void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB, AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF())); } +void AArch64InstructionSelector::renderFPImm32SIMDModImmType4( + MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && + "Expected G_FCONSTANT"); + MIB.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI.getOperand(1) + .getFPImm() + ->getValueAPF() + .bitcastToAPInt() + .getZExtValue())); +} + bool AArch64InstructionSelector::isLoadStoreOfNumBytes( const MachineInstr &MI, unsigned NumBytes) const { if (!MI.mayLoadOrStore()) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index e9df7e001d38..74ec9373ce9e 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -169,7 +169,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .scalarize(0); getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM}) - .lowerFor({s1, s8, s16, s32, s64, v2s64, v4s32, v2s32}) + .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32}) .widenScalarOrEltToNextPow2(0) .clampScalarOrElt(0, s32, s64) .clampNumElements(0, v2s32, v4s32) @@ -180,7 +180,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder({G_SMULO, G_UMULO}) .widenScalarToNextPow2(0, /*Min = */ 32) .clampScalar(0, s32, s64) - .lowerIf(typeIs(1, s1)); + .lower(); getActionDefinitionsBuilder({G_SMULH, G_UMULH}) .legalFor({s64, v8s16, v16s8, v4s32}) @@ -308,7 +308,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) // These extends are also legal .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}}) .widenScalarToNextPow2(0, /* MinSize = */8) - .lowerIfMemSizeNotPow2() + .lowerIfMemSizeNotByteSizePow2() .clampScalar(0, s8, s64) .narrowScalarIf([=](const LegalityQuery &Query) { // Clamp extending load results to 32-bits. @@ -317,10 +317,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) Query.Types[0].getSizeInBits() > 32; }, changeTo(0, s32)) - // Lower any any-extending loads left into G_ANYEXT and G_LOAD - .lowerIf([=](const LegalityQuery &Query) { - return Query.Types[0] != Query.MMODescrs[0].MemoryTy; - }) .clampMaxNumElements(0, s8, 16) .clampMaxNumElements(0, s16, 8) .clampMaxNumElements(0, s32, 4) @@ -536,7 +532,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) .lowerIf( - all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, s1), typeIs(2, p0))); + all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0))); getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG) .customIf([](const LegalityQuery &Query) { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index 3dec980a819a..ba206bac68d1 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -20,11 +20,13 @@ //===----------------------------------------------------------------------===// #include "AArch64TargetMachine.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" @@ -133,7 +135,7 @@ bool matchAArch64MulConstCombine( if (!Const) return false; - const APInt ConstValue = Const->Value.sextOrSelf(Ty.getSizeInBits()); + APInt ConstValue = Const->Value.sext(Ty.getSizeInBits()); // The following code is ported from AArch64ISelLowering. // Multiplication of a power of two plus/minus one can be done more // cheaply as as shift+add/sub. For now, this is true unilaterally. If @@ -258,7 +260,7 @@ void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI, // %d(s64) = G_ZEXT %a(s32) Observer.changingInstr(MI); MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT)); - MI.RemoveOperand(2); + MI.removeOperand(2); Observer.changedInstr(MI); } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 3ff67d188822..d7959a82c484 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -58,7 +58,7 @@ struct ShuffleVectorPseudo { ShuffleVectorPseudo(unsigned Opc, Register Dst, std::initializer_list SrcOps) : Opc(Opc), Dst(Dst), SrcOps(SrcOps){}; - ShuffleVectorPseudo() {} + ShuffleVectorPseudo() = default; }; /// Check if a vector shuffle corresponds to a REV instruction with the diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp index cc45c6642ac5..ce6f15a799b7 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp @@ -149,7 +149,7 @@ bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) { "op in fcmp range: " << II); II.setDesc(TII->get(NewOpc)); - II.RemoveOperand(DeadNZCVIdx); + II.removeOperand(DeadNZCVIdx); // Changing the opcode can result in differing regclass requirements, // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp. // Constrain the regclasses, possibly introducing a copy. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp index d3f4130d2ba1..275949c5ee64 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -13,6 +13,7 @@ #include "AArch64GlobalISelUtils.h" #include "AArch64TargetMachine.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" @@ -162,13 +163,14 @@ static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI, // Check whether folding this offset is legal. It must not go out of bounds of // the referenced object to avoid violating the code model, and must be - // smaller than 2^21 because this is the largest offset expressible in all - // object formats. + // smaller than 2^20 because this is the largest offset expressible in all + // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF + // stores an immediate signed 21 bit offset.) // // This check also prevents us from folding negative offsets, which will end // up being treated in the same way as large positive ones. They could also // cause code model violations, and aren't really common enough to matter. - if (NewOffset >= (1 << 21)) + if (NewOffset >= (1 << 20)) return false; Type *T = GV->getValueType(); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 515a5c63a559..f0b311289c41 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -12,20 +12,19 @@ //===----------------------------------------------------------------------===// #include "AArch64RegisterBankInfo.h" -#include "AArch64InstrInfo.h" #include "AArch64RegisterInfo.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -42,8 +41,8 @@ using namespace llvm; -AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) - : AArch64GenRegisterBankInfo() { +AArch64RegisterBankInfo::AArch64RegisterBankInfo( + const TargetRegisterInfo &TRI) { static llvm::once_flag InitializeRegisterBankFlag; static auto InitializeRegisterBankOnce = [&]() { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h index 2d76e48d7df2..01ef0bd92d50 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h @@ -13,7 +13,7 @@ #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64REGISTERBANKINFO_H #define LLVM_LIB_TARGET_AARCH64_AARCH64REGISTERBANKINFO_H -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #define GET_REGBANK_DECLARATIONS #include "AArch64GenRegisterBank.inc" diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index dbb8e85713cb..e4b547e17f64 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -22,10 +22,10 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -470,7 +470,7 @@ bool AArch64AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, // We are properly aligned, so write NOPs as requested. Count /= 4; for (uint64_t i = 0; i != Count; ++i) - support::endian::write(OS, 0xd503201f, Endian); + OS.write("\x1f\x20\x03\xd5", 4); return true; } @@ -592,17 +592,18 @@ public: if (XReg != AArch64::FP) return CU::UNWIND_ARM64_MODE_DWARF; - assert(XReg == AArch64::FP && "Invalid frame pointer!"); - assert(i + 2 < e && "Insufficient CFI instructions to define a frame!"); + if (i + 2 >= e) + return CU::UNWIND_ARM64_MODE_DWARF; const MCCFIInstruction &LRPush = Instrs[++i]; - assert(LRPush.getOperation() == MCCFIInstruction::OpOffset && - "Link register not pushed!"); + if (LRPush.getOperation() != MCCFIInstruction::OpOffset) + return CU::UNWIND_ARM64_MODE_DWARF; const MCCFIInstruction &FPPush = Instrs[++i]; - assert(FPPush.getOperation() == MCCFIInstruction::OpOffset && - "Frame pointer not pushed!"); + if (FPPush.getOperation() != MCCFIInstruction::OpOffset) + return CU::UNWIND_ARM64_MODE_DWARF; - assert(FPPush.getOffset() + 8 == LRPush.getOffset()); + if (FPPush.getOffset() + 8 != LRPush.getOffset()) + return CU::UNWIND_ARM64_MODE_DWARF; CurOffset = FPPush.getOffset(); unsigned LRReg = *MRI.getLLVMRegNum(LRPush.getRegister(), true); @@ -611,8 +612,8 @@ public: LRReg = getXRegFromWReg(LRReg); FPReg = getXRegFromWReg(FPReg); - assert(LRReg == AArch64::LR && FPReg == AArch64::FP && - "Pushing invalid registers for frame!"); + if (LRReg != AArch64::LR || FPReg != AArch64::FP) + return CU::UNWIND_ARM64_MODE_DWARF; // Indicate that the function has a frame. CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAME; @@ -620,7 +621,8 @@ public: break; } case MCCFIInstruction::OpDefCfaOffset: { - assert(StackSize == 0 && "We already have the CFA offset!"); + if (StackSize != 0) + return CU::UNWIND_ARM64_MODE_DWARF; StackSize = std::abs(Inst.getOffset()); break; } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 78c0e90b1384..46edb12959d2 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -254,6 +254,7 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) { } void AArch64TargetELFStreamer::emitDirectiveVariantPCS(MCSymbol *Symbol) { + getStreamer().getAssembler().registerSymbol(*Symbol); cast(Symbol)->setOther(ELF::STO_AARCH64_VARIANT_PCS); } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index ee0870d9ef7a..5d2ba7ef02c0 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -1340,11 +1340,6 @@ void AArch64InstPrinter::printGPRSeqPairsClassOperand(const MCInst *MI, O << getRegisterName(Even) << ", " << getRegisterName(Odd); } -static const unsigned MatrixZADRegisterTable[] = { - AArch64::ZAD0, AArch64::ZAD1, AArch64::ZAD2, AArch64::ZAD3, - AArch64::ZAD4, AArch64::ZAD5, AArch64::ZAD6, AArch64::ZAD7 -}; - void AArch64InstPrinter::printMatrixTileList(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -1362,7 +1357,7 @@ void AArch64InstPrinter::printMatrixTileList(const MCInst *MI, unsigned OpNum, unsigned Reg = RegMask & (1 << I); if (Reg == 0) continue; - O << getRegisterName(MatrixZADRegisterTable[I]); + O << getRegisterName(AArch64::ZAD0 + I); if (Printed + 1 != NumRegs) O << ", "; ++Printed; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index ad97071434df..2901e5c0fe4d 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -16,6 +16,7 @@ #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixup.h" @@ -677,7 +678,6 @@ unsigned AArch64MCCodeEmitter::fixOneOperandFPComparison( #include "AArch64GenMCCodeEmitter.inc" MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new AArch64MCCodeEmitter(MCII, Ctx); } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index 844bd6bbada9..cb39c2a11487 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index c1186ae804d2..34e3b2cf58e4 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -52,21 +52,14 @@ static MCSubtargetInfo * createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { if (CPU.empty()) { CPU = "generic"; + if (FS.empty()) + FS = "+v8a"; if (TT.isArm64e()) CPU = "apple-a12"; } - // Most of the NEON instruction set isn't supported in streaming mode on SME - // targets, disable NEON unless explicitly requested. - bool RequestedNEON = FS.contains("neon"); - bool RequestedStreamingSVE = FS.contains("streaming-sve"); - MCSubtargetInfo *STI = - createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); - if (RequestedStreamingSVE && !RequestedNEON && - STI->hasFeature(AArch64::FeatureNEON)) - STI->ToggleFeature(AArch64::FeatureNEON); - return STI; + return createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); } void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) { @@ -243,6 +236,31 @@ void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) { MRI->mapLLVMRegToCVReg(I.Reg, static_cast(I.CVReg)); } +bool AArch64_MC::isQForm(const MCInst &MI, const MCInstrInfo *MCII) { + const auto &FPR128 = AArch64MCRegisterClasses[AArch64::FPR128RegClassID]; + return llvm::any_of(MI, [&](const MCOperand &Op) { + return Op.isReg() && FPR128.contains(Op.getReg()); + }); +} + +bool AArch64_MC::isFpOrNEON(const MCInst &MI, const MCInstrInfo *MCII) { + const auto &FPR128 = AArch64MCRegisterClasses[AArch64::FPR128RegClassID]; + const auto &FPR64 = AArch64MCRegisterClasses[AArch64::FPR64RegClassID]; + const auto &FPR32 = AArch64MCRegisterClasses[AArch64::FPR32RegClassID]; + const auto &FPR16 = AArch64MCRegisterClasses[AArch64::FPR16RegClassID]; + const auto &FPR8 = AArch64MCRegisterClasses[AArch64::FPR8RegClassID]; + + auto IsFPR = [&](const MCOperand &Op) { + if (!Op.isReg()) + return false; + auto Reg = Op.getReg(); + return FPR128.contains(Reg) || FPR64.contains(Reg) || FPR32.contains(Reg) || + FPR16.contains(Reg) || FPR8.contains(Reg); + }; + + return llvm::any_of(MI, IsFPR); +} + static MCRegisterInfo *createAArch64MCRegisterInfo(const Triple &Triple) { MCRegisterInfo *X = new MCRegisterInfo(); InitAArch64MCRegisterInfo(X, AArch64::LR); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index 66cb7a37a958..049c49796dc6 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCTARGETDESC_H #define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCTARGETDESC_H +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/DataTypes.h" #include @@ -22,6 +23,7 @@ class formatted_raw_ostream; class MCAsmBackend; class MCCodeEmitter; class MCContext; +class MCInst; class MCInstrInfo; class MCInstPrinter; class MCRegisterInfo; @@ -33,7 +35,6 @@ class MCTargetStreamer; class Target; MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCAsmBackend *createAArch64leAsmBackend(const Target &T, const MCSubtargetInfo &STI, @@ -60,8 +61,16 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, namespace AArch64_MC { void initLLVMToCVRegMapping(MCRegisterInfo *MRI); +bool isQForm(const MCInst &MI, const MCInstrInfo *MCII); +bool isFpOrNEON(const MCInst &MI, const MCInstrInfo *MCII); } +namespace AArch64 { +enum OperandType { + OPERAND_IMPLICIT_IMM_0 = MCOI::OPERAND_FIRST_TARGET, +}; +} // namespace AArch64 + } // End llvm namespace // Defines symbolic names for AArch64 registers. This defines a mapping from diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp index 92552c3d41d5..1a8071ac1b33 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp @@ -76,7 +76,7 @@ void AArch64TargetStreamer::emitNoteSection(unsigned Flags) { return; } MCSection *Cur = OutStreamer.getCurrentSectionOnly(); - OutStreamer.SwitchSection(Nt); + OutStreamer.switchSection(Nt); // Emit the note header. OutStreamer.emitValueToAlignment(Align(8).value()); @@ -92,7 +92,7 @@ void AArch64TargetStreamer::emitNoteSection(unsigned Flags) { OutStreamer.emitIntValue(0, 4); // pad OutStreamer.endSection(Nt); - OutStreamer.SwitchSection(Cur); + OutStreamer.switchSection(Cur); } void AArch64TargetStreamer::emitInst(uint32_t Inst) { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp index 0072af4cc16e..46ffa50b3e6e 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp index b688165d3a7b..820d940c1ed2 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp @@ -8,6 +8,7 @@ #include "AArch64WinCOFFStreamer.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCWin64EH.h" @@ -26,14 +27,14 @@ public: std::unique_ptr OW) : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {} - void EmitWinEHHandlerData(SMLoc Loc) override; - void EmitWindowsUnwindTables() override; - void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) override; + void emitWinEHHandlerData(SMLoc Loc) override; + void emitWindowsUnwindTables() override; + void emitWindowsUnwindTables(WinEH::FrameInfo *Frame) override; void finishImpl() override; }; -void AArch64WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { - MCStreamer::EmitWinEHHandlerData(Loc); +void AArch64WinCOFFStreamer::emitWinEHHandlerData(SMLoc Loc) { + MCStreamer::emitWinEHHandlerData(Loc); // We have to emit the unwind info now, because this directive // actually switches to the .xdata section! @@ -41,11 +42,11 @@ void AArch64WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { /* HandlerData = */ true); } -void AArch64WinCOFFStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) { +void AArch64WinCOFFStreamer::emitWindowsUnwindTables(WinEH::FrameInfo *Frame) { EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false); } -void AArch64WinCOFFStreamer::EmitWindowsUnwindTables() { +void AArch64WinCOFFStreamer::emitWindowsUnwindTables() { if (!getNumWinFrameInfos()) return; EHStreamer.Emit(*this); @@ -53,7 +54,7 @@ void AArch64WinCOFFStreamer::EmitWindowsUnwindTables() { void AArch64WinCOFFStreamer::finishImpl() { emitFrames(nullptr); - EmitWindowsUnwindTables(); + emitWindowsUnwindTables(); MCWinCOFFStreamer::finishImpl(); } @@ -71,10 +72,9 @@ void AArch64TargetWinCOFFStreamer::emitARM64WinUnwindCode(unsigned UnwindCode, WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc()); if (!CurFrame) return; - MCSymbol *Label = S.emitCFILabel(); - auto Inst = WinEH::Instruction(UnwindCode, Label, Reg, Offset); + auto Inst = WinEH::Instruction(UnwindCode, /*Label=*/nullptr, Reg, Offset); if (InEpilogCFI) - CurFrame->EpilogMap[CurrentEpilog].push_back(Inst); + CurFrame->EpilogMap[CurrentEpilog].Instructions.push_back(Inst); else CurFrame->Instructions.push_back(Inst); } @@ -176,7 +176,8 @@ void AArch64TargetWinCOFFStreamer::emitARM64WinCFIPrologEnd() { MCSymbol *Label = S.emitCFILabel(); CurFrame->PrologEnd = Label; - WinEH::Instruction Inst = WinEH::Instruction(Win64EH::UOP_End, Label, -1, 0); + WinEH::Instruction Inst = + WinEH::Instruction(Win64EH::UOP_End, /*Label=*/nullptr, -1, 0); auto it = CurFrame->Instructions.begin(); CurFrame->Instructions.insert(it, Inst); } @@ -198,9 +199,9 @@ void AArch64TargetWinCOFFStreamer::emitARM64WinCFIEpilogEnd() { return; InEpilogCFI = false; - MCSymbol *Label = S.emitCFILabel(); - WinEH::Instruction Inst = WinEH::Instruction(Win64EH::UOP_End, Label, -1, 0); - CurFrame->EpilogMap[CurrentEpilog].push_back(Inst); + WinEH::Instruction Inst = + WinEH::Instruction(Win64EH::UOP_End, /*Label=*/nullptr, -1, 0); + CurFrame->EpilogMap[CurrentEpilog].Instructions.push_back(Inst); CurrentEpilog = nullptr; } diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 41f2cead4cf8..2744e81f99f1 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -10,14 +10,36 @@ // //===----------------------------------------------------------------------===// +def imm_to_tile8 : ComplexPattern", []>; +def imm_to_tile16 : ComplexPattern", []>; +def imm_to_tile32 : ComplexPattern", []>; +def imm_to_tile64 : ComplexPattern", []>; +def imm_to_tile128 : ComplexPattern", []>; + +def tileslice8 : ComplexPattern", []>; +def tileslice16 : ComplexPattern", []>; +def tileslice32 : ComplexPattern", []>; +def tileslice64 : ComplexPattern", []>; +def tileslice128 : ComplexPattern", []>; // nop + +def am_sme_indexed_b4 :ComplexPattern", [], [SDNPWantRoot]>; + //===----------------------------------------------------------------------===// // SME Outer Products //===----------------------------------------------------------------------===// +class sme_outer_product_pseudo + : Pseudo<(outs), (ins i64imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm, + zpr_ty:$zn, zpr_ty:$zm), []>, + Sched<[]> { + // Translated to the actual instructions in AArch64ISelLowering.cpp + let usesCustomInserter = 1; +} + class sme_fp_outer_product_inst : I<(outs za_ty:$ZAda), - (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), + (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", "", []>, Sched<[]> { @@ -34,26 +56,42 @@ class sme_fp_outer_product_inst - : sme_fp_outer_product_inst { - bits<2> ZAda; - let Inst{1-0} = ZAda; - let Inst{2} = 0b0; +multiclass sme_outer_product_fp32 { + def NAME : sme_fp_outer_product_inst { + bits<2> ZAda; + let Inst{1-0} = ZAda; + let Inst{2} = 0b0; + } + + def NAME # _PSEUDO : sme_outer_product_pseudo; + + def : Pat<(op imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), + (nxv4f32 ZPR32:$zn), (nxv4f32 ZPR32:$zm)), + (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; } -class sme_outer_product_fp64 - : sme_fp_outer_product_inst { - bits<3> ZAda; - let Inst{2-0} = ZAda; +multiclass sme_outer_product_fp64 { + def NAME : sme_fp_outer_product_inst { + bits<3> ZAda; + let Inst{2-0} = ZAda; + } + + def NAME # _PSEUDO : sme_outer_product_pseudo; + + def : Pat<(op imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), + (nxv2f64 ZPR64:$zn), (nxv2f64 ZPR64:$zm)), + (!cast(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>; } class sme_int_outer_product_inst : I<(outs za_ty:$ZAda), - (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), + (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", "", []>, Sched<[]> { @@ -72,26 +110,44 @@ class sme_int_outer_product_inst opc, string mnemonic> - : sme_int_outer_product_inst { - bits<2> ZAda; - let Inst{1-0} = ZAda; - let Inst{2} = 0b0; +multiclass sme_int_outer_product_i32 opc, string mnemonic, + SDPatternOperator op> { + def NAME : sme_int_outer_product_inst { + bits<2> ZAda; + let Inst{1-0} = ZAda; + let Inst{2} = 0b0; + } + + def NAME # _PSEUDO : sme_outer_product_pseudo; + + def : Pat<(op imm0_3:$tile, (nxv16i1 PPR3bAny:$pn), (nxv16i1 PPR3bAny:$pm), + (nxv16i8 ZPR8:$zn), (nxv16i8 ZPR8:$zm)), + (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; } -class sme_int_outer_product_i64 opc, string mnemonic> - : sme_int_outer_product_inst { - bits<3> ZAda; - let Inst{2-0} = ZAda; +multiclass sme_int_outer_product_i64 opc, string mnemonic, + SDPatternOperator op> { + def NAME : sme_int_outer_product_inst { + bits<3> ZAda; + let Inst{2-0} = ZAda; + } + + def NAME # _PSEUDO : sme_outer_product_pseudo; + + def : Pat<(op imm0_7:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), + (nxv8i16 ZPR16:$zn), (nxv8i16 ZPR16:$zm)), + (!cast(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>; } class sme_outer_product_widening_inst : I<(outs TileOp32:$ZAda), - (ins PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm), + (ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm), mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", "", []>, Sched<[]> { @@ -109,14 +165,28 @@ class sme_outer_product_widening_inst let Inst{4} = S; let Inst{3-2} = 0b00; let Inst{1-0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; } -multiclass sme_bf16_outer_product { - def : sme_outer_product_widening_inst<0b0, S, mnemonic>; +multiclass sme_bf16_outer_product { + def NAME : sme_outer_product_widening_inst<0b0, S, mnemonic>; + + def NAME # _PSEUDO : sme_outer_product_pseudo; + + def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), + (nxv8bf16 ZPR16:$zn), (nxv8bf16 ZPR16:$zm)), + (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; } -multiclass sme_f16_outer_product { - def : sme_outer_product_widening_inst<0b1, S, mnemonic>; +multiclass sme_f16_outer_product { + def NAME : sme_outer_product_widening_inst<0b1, S, mnemonic>; + + def NAME # _PSEUDO : sme_outer_product_pseudo; + + def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), + (nxv8f16 ZPR16:$zn), (nxv8f16 ZPR16:$zm)), + (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; } //===----------------------------------------------------------------------===// @@ -126,7 +196,7 @@ multiclass sme_f16_outer_product { class sme_add_vector_to_tile_inst : I<(outs tile_ty:$ZAda), - (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), + (ins tile_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn", "", []>, Sched<[]> { bits<3> Pm; @@ -140,6 +210,8 @@ class sme_add_vector_to_tile_inst @@ -225,6 +297,33 @@ multiclass sme_mem_ld_ss_aliases { defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">; } +multiclass sme_mem_ld_ss_patterns { + // base, tileslice + def : Pat<(Load PPR3bAny:$pg, GPR64sp:$base, tile_ty:$tile, + (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), + (Inst tile_ty:$tile, $idx, $imm, $pg, $base, XZR)>; + + // reg + reg, tileslice + let AddedComplexity = 1 in { + def : Pat<(Load PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset), + tile_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, + offset_ty:$imm))), + (Inst tile_ty:$tile, $idx, $imm, $pg, $base, $offset)>; + } +} + +class sme_load_pseudo + : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx, + i64imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>, + Sched<[]> { + // Translated to the actual instructions in AArch64ISelLowering.cpp + let usesCustomInserter = 1; + let mayLoad = 1; +} + multiclass sme_mem_ld_v_ss { def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b", !if(is_col, TileVectorOpV8, TileVectorOpH8), @@ -264,6 +363,40 @@ multiclass sme_mem_ld_v_ss { } defm : sme_mem_ld_ss_aliases; + + // Pseudo instructions for lowering intrinsics, using immediates instead of + // tile registers. + def _PSEUDO_B : sme_load_pseudo; + def _PSEUDO_H : sme_load_pseudo; + def _PSEUDO_S : sme_load_pseudo; + def _PSEUDO_D : sme_load_pseudo; + def _PSEUDO_Q : sme_load_pseudo; + + defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_B), + !if(is_col, int_aarch64_sme_ld1b_vert, + int_aarch64_sme_ld1b_horiz), + sme_elm_idx0_0, imm0_15, am_sve_regreg_lsl0, + tileslice8>; + defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_H), + !if(is_col, int_aarch64_sme_ld1h_vert, + int_aarch64_sme_ld1h_horiz), + imm0_1, imm0_7, am_sve_regreg_lsl1, + tileslice16>; + defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_S), + !if(is_col, int_aarch64_sme_ld1w_vert, + int_aarch64_sme_ld1w_horiz), + imm0_3, imm0_3, am_sve_regreg_lsl2, + tileslice32>; + defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_D), + !if(is_col, int_aarch64_sme_ld1d_vert, + int_aarch64_sme_ld1d_horiz), + imm0_7, imm0_1, am_sve_regreg_lsl3, + tileslice64>; + defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_Q), + !if(is_col, int_aarch64_sme_ld1q_vert, + int_aarch64_sme_ld1q_horiz), + imm0_15, sme_elm_idx0_0, am_sve_regreg_lsl4, + tileslice128>; } multiclass sme_mem_ld_ss { @@ -310,6 +443,25 @@ multiclass sme_mem_st_ss_aliases { defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>; } +multiclass sme_mem_st_ss_patterns { + // base, tileslice + def : Pat<(Store PPR3bAny:$pg, GPR64sp:$base, (imm2tile untyped:$tile), + (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), + (Inst $tile, $idx, $imm, $pg, $base, XZR)>; + + // reg + reg, tileslice + let AddedComplexity = 1 in { + def : Pat<(Store PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset), + (imm2tile untyped:$tile), + (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), + (Inst $tile, $idx, $imm, $pg, $base, $offset)>; + } +} + multiclass sme_mem_st_v_ss { def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b", !if(is_col, TileVectorOpV8, TileVectorOpH8), @@ -349,6 +501,32 @@ multiclass sme_mem_st_v_ss { } defm : sme_mem_st_ss_aliases; + + defm : sme_mem_st_ss_patterns(NAME # _B), + !if(is_col, int_aarch64_sme_st1b_vert, + int_aarch64_sme_st1b_horiz), + imm0_15, imm_to_tile8, am_sve_regreg_lsl0, + tileslice8>; + defm : sme_mem_st_ss_patterns(NAME # _H), + !if(is_col, int_aarch64_sme_st1h_vert, + int_aarch64_sme_st1h_horiz), + imm0_7, imm_to_tile16, am_sve_regreg_lsl1, + tileslice16>; + defm : sme_mem_st_ss_patterns(NAME # _S), + !if(is_col, int_aarch64_sme_st1w_vert, + int_aarch64_sme_st1w_horiz), + imm0_3, imm_to_tile32, am_sve_regreg_lsl2, + tileslice32>; + defm : sme_mem_st_ss_patterns(NAME # _D), + !if(is_col, int_aarch64_sme_st1d_vert, + int_aarch64_sme_st1d_horiz), + imm0_1, imm_to_tile64, am_sve_regreg_lsl3, + tileslice64>; + defm : sme_mem_st_ss_patterns(NAME # _Q), + !if(is_col, int_aarch64_sme_st1q_vert, + int_aarch64_sme_st1q_horiz), + sme_elm_idx0_0, imm_to_tile128, + am_sve_regreg_lsl4, tileslice128>; } multiclass sme_mem_st_ss { @@ -360,7 +538,7 @@ multiclass sme_mem_st_ss { // SME Save and Restore Array //===----------------------------------------------------------------------===// -class sme_spill_fill_inst +class sme_spill_fill_base : I, Sched<[]> { @@ -375,33 +553,61 @@ class sme_spill_fill_inst let Inst{9-5} = Rn; let Inst{4} = 0b0; let Inst{3-0} = imm4; - - let mayLoad = !not(isStore); - let mayStore = isStore; } -multiclass sme_spill_fill { - def NAME : sme_spill_fill_inst; - +let mayStore = 1 in +class sme_spill_inst + : sme_spill_fill_base<0b1, (outs), + (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv, + sme_elm_idx0_15:$imm4, GPR64sp:$Rn, + imm0_15:$offset), + opcodestr>; +let mayLoad = 1 in +class sme_fill_inst + : sme_spill_fill_base<0b0, (outs MatrixOp:$ZAt), + (ins MatrixIndexGPR32Op12_15:$Rv, + sme_elm_idx0_15:$imm4, GPR64sp:$Rn, + imm0_15:$offset), + opcodestr>; +multiclass sme_spill { + def NAME : sme_spill_inst; def : InstAlias(NAME) MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; -} - -multiclass sme_spill { - defm NAME : sme_spill_fill<0b1, (outs), - (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv, - sme_elm_idx0_15:$imm4, GPR64sp:$Rn, - imm0_15:$offset), - opcodestr>; + // base + def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base), + (!cast(NAME) ZA, $idx, 0, $base, 0)>; + // scalar + immediate (mul vl) + let AddedComplexity = 2 in { + def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, + (am_sme_indexed_b4 GPR64sp:$base, imm0_15:$imm4)), + (!cast(NAME) ZA, $idx, 0, $base, $imm4)>; + } } multiclass sme_fill { - defm NAME : sme_spill_fill<0b0, (outs MatrixOp:$ZAt), - (ins MatrixIndexGPR32Op12_15:$Rv, - sme_elm_idx0_15:$imm4, GPR64sp:$Rn, - imm0_15:$offset), - opcodestr>; + def NAME : sme_fill_inst; + def : InstAlias(NAME) MatrixOp:$ZAt, + MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; + def NAME # _PSEUDO + : Pseudo<(outs), + (ins MatrixIndexGPR32Op12_15:$idx, imm0_15:$imm4, + GPR64sp:$base), []>, + Sched<[]> { + // Translated to actual instruction in AArch64ISelLowering.cpp + let usesCustomInserter = 1; + let mayLoad = 1; + } + // base + def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base), + (!cast(NAME # _PSEUDO) $idx, 0, $base)>; + // scalar + immediate (mul vl) + let AddedComplexity = 2 in { + def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, + (am_sme_indexed_b4 GPR64sp:$base, imm0_15:$imm4)), + (!cast(NAME # _PSEUDO) $idx, $imm4, $base)>; + } } //===----------------------------------------------------------------------===// @@ -429,8 +635,12 @@ class sme_vector_to_tile_inst sz, MatrixTileVectorOperand tile_ty bit is_col, Operand imm_ty, ZPRRegOp zpr_ty, string mnemonic> : sme_vector_to_tile_base; + (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), + mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">{ + + let Constraints = "$ZAd = $_ZAd"; +} + multiclass sme_vector_to_tile_aliases; } +multiclass sme_vector_to_tile_patterns { + def : Pat<(op imm_ty:$tile, MatrixIndexGPR32Op12_15:$idx, + (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)), + (inst imm_ty:$tile, $idx, 0, $pg, $zn)>; + let AddedComplexity = 1 in { + def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, + offset_ty:$imm)), + (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)), + (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>; + } +} + +class sme_mova_insert_pseudo + : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx, + i64imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>, + Sched<[]> { + // Translated to the actual instructions in AArch64ISelLowering.cpp + let usesCustomInserter = 1; +} + multiclass sme_vector_v_to_tile { def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8, TileVectorOpH8), @@ -478,6 +712,14 @@ multiclass sme_vector_v_to_tile { let Inst{3-0} = ZAd; } + // Pseudo instructions for lowering intrinsics, using immediates instead of + // tile registers. + def _PSEUDO_B : sme_mova_insert_pseudo; + def _PSEUDO_H : sme_mova_insert_pseudo; + def _PSEUDO_S : sme_mova_insert_pseudo; + def _PSEUDO_D : sme_mova_insert_pseudo; + def _PSEUDO_Q : sme_mova_insert_pseudo; + defm : sme_vector_to_tile_aliases(NAME # _B), !if(is_col, TileVectorOpV8, TileVectorOpH8), @@ -498,6 +740,62 @@ multiclass sme_vector_v_to_tile { !if(is_col, TileVectorOpV128, TileVectorOpH128), ZPR128, sme_elm_idx0_0>; + + defvar op = !if(is_col, int_aarch64_sme_write_vert, + int_aarch64_sme_write_horiz); + + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_B), + nxv16i8, nxv16i1, sme_elm_idx0_0, imm0_15, + op, tileslice8>; + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_H), + nxv8i16, nxv8i1, sme_elm_idx0_1, imm0_7, + op, tileslice16>; + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_H), + nxv8f16, nxv8i1, sme_elm_idx0_1, imm0_7, + op, tileslice16>; + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_H), + nxv8bf16, nxv8i1, sme_elm_idx0_1, imm0_7, + op, tileslice16>; + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_S), + nxv4i32, nxv4i1, sme_elm_idx0_3, imm0_3, + op, tileslice32>; + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_S), + nxv4f32, nxv4i1, sme_elm_idx0_3, imm0_3, + op, tileslice32>; + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_D), + nxv2i64, nxv2i1, sme_elm_idx0_7, imm0_1, + op, tileslice64>; + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_D), + nxv2f64, nxv2i1, sme_elm_idx0_7, imm0_1, + op, tileslice64>; + + defvar opq = !if(is_col, int_aarch64_sme_writeq_vert, + int_aarch64_sme_writeq_horiz); + + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), + nxv16i8, nxv16i1, sme_elm_idx0_15, + sme_elm_idx0_0, opq, tileslice128>; + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), + nxv8i16, nxv8i1, sme_elm_idx0_15, + sme_elm_idx0_0, opq, tileslice128>; + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), + nxv8f16, nxv8i1, sme_elm_idx0_15, + sme_elm_idx0_0, opq, tileslice128>; + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), + nxv8bf16, nxv8i1, sme_elm_idx0_15, + sme_elm_idx0_0, opq, tileslice128>; + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), + nxv4i32, nxv4i1, sme_elm_idx0_15, + sme_elm_idx0_0, opq, tileslice128>; + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), + nxv4f32, nxv4i1, sme_elm_idx0_15, + sme_elm_idx0_0, opq, tileslice128>; + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), + nxv2i64, nxv2i1, sme_elm_idx0_15, + sme_elm_idx0_0, opq, tileslice128>; + defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), + nxv2f64, nxv2i1, sme_elm_idx0_15, + sme_elm_idx0_0, opq, tileslice128>; } multiclass sme_vector_to_tile { @@ -526,8 +824,11 @@ class sme_tile_to_vector_inst sz, ZPRRegOp zpr_ty, MatrixTileVectorOperand tile_ty, bit is_col, Operand imm_ty, string mnemonic> : sme_tile_to_vector_base; + (ins zpr_ty:$_Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), + mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]"> { + + let Constraints = "$Zd = $_Zd"; +} multiclass sme_tile_to_vector_aliases; } +multiclass sme_tile_to_vector_patterns { + def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg), + (imm2tile untyped:$tile), MatrixIndexGPR32Op12_15:$idx)), + (inst $passthru, $pg, $tile, $idx, 0)>; + let AddedComplexity = 1 in { + def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg), + (imm2tile untyped:$tile), + (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, + offset_ty:$imm)))), + (inst $passthru, $pg, $tile, $idx, $imm)>; + } +} + multiclass sme_tile_to_vector_v { def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8, TileVectorOpH8), @@ -589,6 +907,62 @@ multiclass sme_tile_to_vector_v { defm : sme_tile_to_vector_aliases(NAME # _Q), ZPR128, !if(is_col, TileVectorOpV128, TileVectorOpH128), sme_elm_idx0_0>; + + defvar op = !if(is_col, int_aarch64_sme_read_vert, + int_aarch64_sme_read_horiz); + + defm : sme_tile_to_vector_patterns(NAME # _B), + nxv16i8, nxv16i1, imm0_15, + imm_to_tile8, tileslice8, op>; + defm : sme_tile_to_vector_patterns(NAME # _H), + nxv8i16, nxv8i1, imm0_7, + imm_to_tile16, tileslice16, op>; + defm : sme_tile_to_vector_patterns(NAME # _H), + nxv8f16, nxv8i1, imm0_7, + imm_to_tile16, tileslice16, op>; + defm : sme_tile_to_vector_patterns(NAME # _H), + nxv8bf16, nxv8i1, imm0_7, + imm_to_tile16, tileslice16, op>; + defm : sme_tile_to_vector_patterns(NAME # _S), + nxv4i32, nxv4i1, imm0_3, + imm_to_tile32, tileslice32, op>; + defm : sme_tile_to_vector_patterns(NAME # _S), + nxv4f32, nxv4i1, imm0_3, + imm_to_tile32, tileslice32, op>; + defm : sme_tile_to_vector_patterns(NAME # _D), + nxv2i64, nxv2i1, imm0_1, + imm_to_tile64, tileslice64, op>; + defm : sme_tile_to_vector_patterns(NAME # _D), + nxv2f64, nxv2i1, imm0_1, + imm_to_tile64, tileslice64, op>; + + defvar opq = !if(is_col, int_aarch64_sme_readq_vert, + int_aarch64_sme_readq_horiz); + + defm : sme_tile_to_vector_patterns(NAME # _Q), + nxv16i8, nxv16i1, sme_elm_idx0_0, + imm_to_tile128, tileslice128, opq>; + defm : sme_tile_to_vector_patterns(NAME # _Q), + nxv8i16, nxv8i1, sme_elm_idx0_0, + imm_to_tile128, tileslice128, opq>; + defm : sme_tile_to_vector_patterns(NAME # _Q), + nxv8f16, nxv8i1, sme_elm_idx0_0, + imm_to_tile128, tileslice128, opq>; + defm : sme_tile_to_vector_patterns(NAME # _Q), + nxv8bf16, nxv8i1, sme_elm_idx0_0, + imm_to_tile128, tileslice128, opq>; + defm : sme_tile_to_vector_patterns(NAME # _Q), + nxv4i32, nxv4i1, sme_elm_idx0_0, + imm_to_tile128, tileslice128, opq>; + defm : sme_tile_to_vector_patterns(NAME # _Q), + nxv4f32, nxv4i1, sme_elm_idx0_0, + imm_to_tile128, tileslice128, opq>; + defm : sme_tile_to_vector_patterns(NAME # _Q), + nxv2i64, nxv2i1, sme_elm_idx0_0, + imm_to_tile128, tileslice128, opq>; + defm : sme_tile_to_vector_patterns(NAME # _Q), + nxv2f64, nxv2i1, sme_elm_idx0_0, + imm_to_tile128, tileslice128, opq>; } multiclass sme_tile_to_vector { @@ -600,8 +974,11 @@ multiclass sme_tile_to_vector { // SME Zero //===----------------------------------------------------------------------===// +// NOTE: This definition isn't really correct because there are outputs, i.e. +// the tile registers being zeroed. We fix this up in a custom inserter that +// marks the appropriate registers as being implicitly defined. class sme_zero_inst - : I<(outs MatrixTileList:$imm), (ins), + : I<(outs), (ins MatrixTileList:$imm), mnemonic, "\t$imm", "", []>, Sched<[]> { bits<8> imm; let Inst{31-8} = 0b110000000000100000000000; @@ -626,6 +1003,15 @@ multiclass sme_zero { def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast(NAME) 0b10111011), 1>; def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast(NAME) 0b11011101), 1>; def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast(NAME) 0b11101110), 1>; + + def NAME # _PSEUDO : Pseudo<(outs), (ins i64imm:$tilelist), []>, + Sched<[]> { + // Translated to the actual instructions in AArch64ISelLowering.cpp + let usesCustomInserter = 1; + } + + def : Pat<(int_aarch64_sme_zero imm:$imm), + (!cast(NAME # _PSEUDO) imm:$imm)>; } //===----------------------------------------------------------------------===// @@ -651,6 +1037,15 @@ class sve2_int_perm_revd let ElementSize = ZPR128.ElementSize; } +multiclass sve2_int_perm_revd { + def NAME : sve2_int_perm_revd; + + def : SVE_1_Op_Passthru_Pat(NAME)>; + def : SVE_1_Op_Passthru_Pat(NAME)>; + def : SVE_1_Op_Passthru_Pat(NAME)>; + def : SVE_1_Op_Passthru_Pat(NAME)>; +} + class sve2_clamp sz, bit U, ZPRRegOp zpr_ty> : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd), asm, "\t$Zd, $Zn, $Zm", "", []>, @@ -672,11 +1067,16 @@ class sve2_clamp sz, bit U, ZPRRegOp zpr_ty> let ElementSize = zpr_ty.ElementSize; } -multiclass sve2_clamp { +multiclass sve2_clamp { def _B : sve2_clamp; def _H : sve2_clamp; def _S : sve2_clamp; def _D : sve2_clamp; + + def : SVE_3_Op_Pat(NAME # _B)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } class sve2_int_perm_sel_p @@ -699,7 +1099,7 @@ class sve2_int_perm_sel_p let Inst{3-0} = Pd; } -multiclass sve2_int_perm_sel_p { +multiclass sve2_int_perm_sel_p { def _B : sve2_int_perm_sel_p { bits<4> imm; let Inst{23-22} = imm{3-2}; @@ -723,4 +1123,32 @@ multiclass sve2_int_perm_sel_p { let Inst{22} = 0b1; let Inst{20-18} = 0b000; } + + def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm), + MatrixIndexGPR32Op12_15:$idx)), + (!cast(NAME # _B) $Pn, $Pm, $idx, 0)>; + def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm), + MatrixIndexGPR32Op12_15:$idx)), + (!cast(NAME # _H) $Pn, $Pm, $idx, 0)>; + def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm), + MatrixIndexGPR32Op12_15:$idx)), + (!cast(NAME # _S) $Pn, $Pm, $idx, 0)>; + def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm), + MatrixIndexGPR32Op12_15:$idx)), + (!cast(NAME # _D) $Pn, $Pm, $idx, 0)>; + + let AddedComplexity = 1 in { + def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm), + (i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))), + (!cast(NAME # _B) $Pn, $Pm, $idx, $imm)>; + def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm), + (i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))), + (!cast(NAME # _H) $Pn, $Pm, $idx, $imm)>; + def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm), + (i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))), + (!cast(NAME # _S) $Pn, $Pm, $idx, $imm)>; + def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm), + (i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))), + (!cast(NAME # _D) $Pn, $Pm, $idx, $imm)>; + } } diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 9d4bdbe5d053..3631536a32b9 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -199,6 +199,11 @@ def SVEAddSubImm16Pat : ComplexPattern", [ def SVEAddSubImm32Pat : ComplexPattern", []>; def SVEAddSubImm64Pat : ComplexPattern", []>; +def SVECpyDupImm8Pat : ComplexPattern", []>; +def SVECpyDupImm16Pat : ComplexPattern", []>; +def SVECpyDupImm32Pat : ComplexPattern", []>; +def SVECpyDupImm64Pat : ComplexPattern", []>; + def SVELogicalImm8Pat : ComplexPattern", []>; def SVELogicalImm16Pat : ComplexPattern", []>; def SVELogicalImm32Pat : ComplexPattern", []>; @@ -209,14 +214,6 @@ def SVELogicalImm16NotPat : ComplexPattern", []>; def SVELogicalImm64NotPat : ComplexPattern", []>; -def SVE8BitLslImm32 : ComplexPattern; -def SVE8BitLslImm64 : ComplexPattern; -class SVE8BitLslImm { - ComplexPattern Pat = !cond( - !eq(ty, i32): SVE8BitLslImm32, - !eq(ty, i64): SVE8BitLslImm64); -} - def SVEArithUImm8Pat : ComplexPattern", []>; def SVEArithUImm16Pat : ComplexPattern", []>; def SVEArithUImm32Pat : ComplexPattern", []>; @@ -234,6 +231,8 @@ def SVEShiftImmR16 : ComplexPattern", [] def SVEShiftImmR32 : ComplexPattern", []>; def SVEShiftImmR64 : ComplexPattern", []>; +def SVEShiftSplatImmR : ComplexPattern; + def SVEAllActive : ComplexPattern; class SVEExactFPImm : AsmOperandClass { @@ -335,9 +334,14 @@ multiclass sve_int_ptrue opc, string asm, SDPatternOperator op> { def SDT_AArch64PTrue : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; def AArch64ptrue : SDNode<"AArch64ISD::PTRUE", SDT_AArch64PTrue>; -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm PTRUE : sve_int_ptrue<0b000, "ptrue", AArch64ptrue>; defm PTRUES : sve_int_ptrue<0b001, "ptrues", null_frag>; + + def : Pat<(nxv16i1 immAllOnesV), (PTRUE_B 31)>; + def : Pat<(nxv8i1 immAllOnesV), (PTRUE_H 31)>; + def : Pat<(nxv4i1 immAllOnesV), (PTRUE_S 31)>; + def : Pat<(nxv2i1 immAllOnesV), (PTRUE_D 31)>; } //===----------------------------------------------------------------------===// @@ -370,24 +374,27 @@ class SVE_1_Op_Passthru_Round_Pat; -class SVE_1_Op_Imm_OptLsl_Reverse_Pat - : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))), - (inst $Op1, i32:$imm, i32:$shift)>; +multiclass SVE_1_Op_PassthruUndef_Round_Pat{ + def : Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), (vtd undef))), + (inst (IMPLICIT_DEF), $Op1, $Op2)>; + def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, (i64 timm0_1), vtd:$Op3)), + (inst $Op3, $Op1, $Op2)>; +} class SVE_1_Op_Imm_OptLsl_Pat - : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))))), + : Pat<(vt (op (vt zprty:$Op1), (vt (splat_vector (it (cpx i32:$imm, i32:$shift)))))), (inst $Op1, i32:$imm, i32:$shift)>; class SVE_1_Op_Imm_Arith_All_Active - : Pat<(vt (op (pt (SVEAllActive)), (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))), + : Pat<(vt (op (pt (SVEAllActive)), (vt zprty:$Op1), (vt (splat_vector (it (cpx i32:$imm)))))), (inst $Op1, i32:$imm)>; class SVE_1_Op_Imm_Log_Pat - : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i64:$imm)))))), + : Pat<(vt (op (vt zprty:$Op1), (vt (splat_vector (it (cpx i64:$imm)))))), (inst $Op1, i64:$imm)>; class SVE_2_Op_Pat -: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))), +: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (splat_vector (it (cast i32:$imm)))))), (inst $Pg, $Rn, i32:$imm)>; class SVE_Shift_DupImm_All_Active_Pat -: Pat<(vt (op (pt (SVEAllActive)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))), +: Pat<(vt (op (pt (SVEAllActive)), vt:$Rn, (vt (splat_vector (it (cast i32:$imm)))))), (inst $Rn, i32:$imm)>; class SVE_2_Op_Fp_Imm_Pat -: Pat<(vt (op (pt PPR_3b:$Pg), (vt ZPR:$Zs1), (vt (AArch64dup (it immL))))), +: Pat<(vt (op (pt PPR_3b:$Pg), (vt ZPR:$Zs1), (vt (splat_vector (it immL))))), (inst $Pg, $Zs1, imm)>; class SVE_2_Op_Fp_Imm_Pat_Zero : Pat<(vt (op pt:$Pg, (vselect pt:$Pg, vt:$Zs1, (SVEDup0)), - (vt (AArch64dup (it immL))))), + (vt (splat_vector (it immL))))), (inst $Pg, $Zs1, imm)>; +// Used to re-order the operands of BSP when lowering to BSL. BSP has the order: +// mask, in1, in2 whereas BSL for SVE2 has them ordered in1, in2, mask +class SVE_3_Op_BSP_Pat +: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)), + (inst $Op2, $Op3, $Op1)>; + +class SVE_Shift_Add_All_Active_Pat +: Pat<(vtd (add vt1:$Op1, (op (pt (SVEAllActive)), vt2:$Op2, vt3:$Op3))), + (inst $Op1, $Op2, $Op3)>; + +//===----------------------------------------------------------------------===// +// SVE pattern match helpers. +//===----------------------------------------------------------------------===// + +// Matches either an intrinsic, or a predicated operation with an all active predicate +class EitherVSelectOrPassthruPatFrags +: PatFrags<(ops node:$Pg, node:$Op1, node:$Op2), [ + (intrinsic node:$Pg, node:$Op1, node:$Op2), + (vselect node:$Pg, (sdnode (SVEAllActive), node:$Op1, node:$Op2), node:$Op1), + ]>; + // // Pseudo -> Instruction mappings // @@ -612,10 +643,11 @@ class sve_int_pfalse opc, string asm> multiclass sve_int_pfalse opc, string asm> { def NAME : sve_int_pfalse; - def : Pat<(nxv16i1 (splat_vector (i32 0))), (!cast(NAME))>; - def : Pat<(nxv8i1 (splat_vector (i32 0))), (!cast(NAME))>; - def : Pat<(nxv4i1 (splat_vector (i32 0))), (!cast(NAME))>; - def : Pat<(nxv2i1 (splat_vector (i32 0))), (!cast(NAME))>; + def : Pat<(nxv16i1 immAllZerosV), (!cast(NAME))>; + def : Pat<(nxv8i1 immAllZerosV), (!cast(NAME))>; + def : Pat<(nxv4i1 immAllZerosV), (!cast(NAME))>; + def : Pat<(nxv2i1 immAllZerosV), (!cast(NAME))>; + def : Pat<(nxv1i1 immAllZerosV), (!cast(NAME))>; } class sve_int_ptest opc, string asm> @@ -885,6 +917,8 @@ class sve_int_count opc, string asm> let Inst{10} = opc{0}; let Inst{9-5} = pattern; let Inst{4-0} = Rd; + + let isReMaterializable = 1; } multiclass sve_int_count opc, string asm, SDPatternOperator op> { @@ -965,7 +999,7 @@ class sve_int_pred_pattern_a opc, string asm> multiclass sve_int_pred_pattern_a opc, string asm, SDPatternOperator op, SDPatternOperator opcnt> { - let Predicates = [HasSVEorStreamingSVE] in { + let Predicates = [HasSVEorSME] in { def NAME : sve_int_pred_pattern_a; def : InstAlias opc, string asm, (!cast(NAME) GPR64:$Rdn, 0b11111, 1), 2>; } - let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL] in { + let Predicates = [HasSVEorSME, UseScalarIncVL] in { def : Pat<(i64 (op GPR64:$Rdn, (opcnt sve_pred_enum:$pattern))), (!cast(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1)>; @@ -1170,28 +1204,45 @@ multiclass sve_int_perm_dup_i { (!cast(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>; // Duplicate extracted element of vector into all vector elements - def : Pat<(nxv16i8 (AArch64dup (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))), + def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))), (!cast(NAME # _B) ZPR:$vec, sve_elm_idx_extdup_b:$index)>; - def : Pat<(nxv8i16 (AArch64dup (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), + def : Pat<(nxv8i16 (splat_vector (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), (!cast(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>; - def : Pat<(nxv4i32 (AArch64dup (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), + def : Pat<(nxv4i32 (splat_vector (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), (!cast(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>; - def : Pat<(nxv2i64 (AArch64dup (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), + def : Pat<(nxv2i64 (splat_vector (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), (!cast(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>; - def : Pat<(nxv8f16 (AArch64dup (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), + def : Pat<(nxv8f16 (splat_vector (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), (!cast(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>; - def : Pat<(nxv8bf16 (AArch64dup (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), + def : Pat<(nxv8bf16 (splat_vector (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), (!cast(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>; - def : Pat<(nxv4f16 (AArch64dup (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), + def : Pat<(nxv4f16 (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), (!cast(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>; - def : Pat<(nxv2f16 (AArch64dup (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), + def : Pat<(nxv2f16 (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), (!cast(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>; - def : Pat<(nxv4f32 (AArch64dup (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), + def : Pat<(nxv4f32 (splat_vector (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), (!cast(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>; - def : Pat<(nxv2f32 (AArch64dup (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), + def : Pat<(nxv2f32 (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), (!cast(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>; - def : Pat<(nxv2f64 (AArch64dup (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), + def : Pat<(nxv2f64 (splat_vector (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), (!cast(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>; + + def : Pat<(nxv16i8 (AArch64duplane128 nxv16i8:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; + def : Pat<(nxv8i16 (AArch64duplane128 nxv8i16:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; + def : Pat<(nxv4i32 (AArch64duplane128 nxv4i32:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; + def : Pat<(nxv2i64 (AArch64duplane128 nxv2i64:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; + def : Pat<(nxv8f16 (AArch64duplane128 nxv8f16:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; + def : Pat<(nxv4f32 (AArch64duplane128 nxv4f32:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; + def : Pat<(nxv2f64 (AArch64duplane128 nxv2f64:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; + def : Pat<(nxv8bf16 (AArch64duplane128 nxv8bf16:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; } class sve_int_perm_tbl sz8_64, bits<2> opc, string asm, ZPRRegOp zprty, @@ -1631,6 +1682,7 @@ multiclass sve_int_pred_log opc, string asm, SDPatternOperator op, def : SVE_3_Op_Pat(NAME)>; def : SVE_3_Op_Pat(NAME)>; def : SVE_3_Op_Pat(NAME)>; + def : SVE_3_Op_Pat(NAME)>; def : SVE_2_Op_AllActive_Pat(NAME), PTRUE_B>; def : SVE_2_Op_AllActive_Pat { def : InstAlias<"mov $Zd, $imm", (!cast(NAME) ZPR64:$Zd, sve_preferred_logical_imm64:$imm), 5>; - def : Pat<(nxv2i64 (AArch64dup (i64 logical_imm64:$imm))), + def : Pat<(nxv2i64 (splat_vector (i64 logical_imm64:$imm))), (!cast(NAME) logical_imm64:$imm)>; } @@ -2478,7 +2530,7 @@ multiclass sve2_fp_mla_long opc, string asm, SDPatternOperator op> { // SVE Stack Allocation Group //===----------------------------------------------------------------------===// -class sve_int_arith_vl +class sve_int_arith_vl : I<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, simm6_32b:$imm6), asm, "\t$Rd, $Rn, $imm6", "", @@ -2490,12 +2542,13 @@ class sve_int_arith_vl let Inst{22} = opc; let Inst{21} = 0b1; let Inst{20-16} = Rn; - let Inst{15-11} = 0b01010; + let Inst{15-12} = 0b0101; + let Inst{11} = streaming_sve; let Inst{10-5} = imm6; let Inst{4-0} = Rd; } -class sve_int_read_vl_a opc2, string asm> +class sve_int_read_vl_a opc2, string asm, bit streaming_sve = 0b0> : I<(outs GPR64:$Rd), (ins simm6_32b:$imm6), asm, "\t$Rd, $imm6", "", @@ -2506,9 +2559,12 @@ class sve_int_read_vl_a opc2, string asm> let Inst{22} = op; let Inst{21} = 0b1; let Inst{20-16} = opc2{4-0}; - let Inst{15-11} = 0b01010; + let Inst{15-12} = 0b0101; + let Inst{11} = streaming_sve; let Inst{10-5} = imm6; let Inst{4-0} = Rd; + + let isReMaterializable = 1; } //===----------------------------------------------------------------------===// @@ -2589,8 +2645,8 @@ multiclass sve_fp_2op_p_zd opc, string asm, SDPatternOperator int_op, SDPatternOperator ir_op, ValueType vt1, ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { - def NAME : sve_fp_2op_p_zd; - + def NAME : sve_fp_2op_p_zd, + SVEPseudo2Instr; // convert vt1 to a packed type for the intrinsic patterns defvar packedvt1 = !cond(!eq(!cast(vt1), "nxv2f16"): nxv8f16, !eq(!cast(vt1), "nxv4f16"): nxv8f16, @@ -2604,8 +2660,11 @@ multiclass sve_fp_2op_p_zd opc, string asm, 1 : vt3); def : SVE_3_Op_Pat(NAME)>; - def : SVE_1_Op_Passthru_Pat(NAME)>; + + def _UNDEF : PredOneOpPassthruPseudo(i_zprtype)>; + + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF)>; } multiclass sve_fp_2op_p_zdr opc, string asm, @@ -2614,7 +2673,8 @@ multiclass sve_fp_2op_p_zdr opc, string asm, SDPatternOperator int_op, SDPatternOperator ir_op, ValueType vt1, ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { - def NAME : sve_fp_2op_p_zd; + def NAME : sve_fp_2op_p_zd, + SVEPseudo2Instr; // convert vt1 to a packed type for the intrinsic patterns defvar packedvt1 = !cond(!eq(!cast(vt1), "nxv2f16"): nxv8f16, @@ -2623,8 +2683,11 @@ multiclass sve_fp_2op_p_zdr opc, string asm, 1 : vt1); def : SVE_3_Op_Pat(NAME)>; - def : SVE_1_Op_Passthru_Round_Pat(NAME)>; + + def _UNDEF : PredOneOpPassthruPseudo(i_zprtype)>; + + defm : SVE_1_Op_PassthruUndef_Round_Pat(NAME # _UNDEF)>; } multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op> { @@ -2726,11 +2789,19 @@ class sve_int_bin_pred_arit_log sz8_64, bits<2> fmt, bits<3> opc, let ElementSize = zprty.ElementSize; } -multiclass sve_int_bin_pred_log opc, string asm, SDPatternOperator op> { - def _B : sve_int_bin_pred_arit_log<0b00, 0b11, opc, asm, ZPR8>; - def _H : sve_int_bin_pred_arit_log<0b01, 0b11, opc, asm, ZPR16>; - def _S : sve_int_bin_pred_arit_log<0b10, 0b11, opc, asm, ZPR32>; - def _D : sve_int_bin_pred_arit_log<0b11, 0b11, opc, asm, ZPR64>; +multiclass sve_int_bin_pred_log opc, string asm, string Ps, + SDPatternOperator op, + DestructiveInstTypeEnum flags> { + let DestructiveInstType = flags in { + def _B : sve_int_bin_pred_arit_log<0b00, 0b11, opc, asm, ZPR8>, + SVEPseudo2Instr; + def _H : sve_int_bin_pred_arit_log<0b01, 0b11, opc, asm, ZPR16>, + SVEPseudo2Instr; + def _S : sve_int_bin_pred_arit_log<0b10, 0b11, opc, asm, ZPR32>, + SVEPseudo2Instr; + def _D : sve_int_bin_pred_arit_log<0b11, 0b11, opc, asm, ZPR64>, + SVEPseudo2Instr; + } def : SVE_3_Op_Pat(NAME # _B)>; def : SVE_3_Op_Pat(NAME # _H)>; @@ -3756,7 +3827,8 @@ class sve2_int_bin_accum_shift_imm tsz8_64, bits<2> opc, string asm, } multiclass sve2_int_bin_accum_shift_imm_right opc, string asm, - SDPatternOperator op> { + SDPatternOperator op, + SDPatternOperator shift_op = null_frag> { def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; @@ -3773,6 +3845,11 @@ multiclass sve2_int_bin_accum_shift_imm_right opc, string asm, def : SVE_3_Op_Imm_Pat(NAME # _H)>; def : SVE_3_Op_Imm_Pat(NAME # _S)>; def : SVE_3_Op_Imm_Pat(NAME # _D)>; + + def : SVE_Shift_Add_All_Active_Pat(NAME # _B)>; + def : SVE_Shift_Add_All_Active_Pat(NAME # _H)>; + def : SVE_Shift_Add_All_Active_Pat(NAME # _S)>; + def : SVE_Shift_Add_All_Active_Pat(NAME # _D)>; } class sve2_int_cadd sz, bit opc, string asm, ZPRRegOp zprty> @@ -4331,18 +4408,6 @@ multiclass sve_int_arith_imm0 opc, string asm, SDPatternOperator op> { def : SVE_1_Op_Imm_OptLsl_Pat(NAME # _D)>; } -multiclass sve_int_arith_imm0_subr opc, string asm, SDPatternOperator op> { - def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>; - def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>; - def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>; - def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>; - - def : SVE_1_Op_Imm_OptLsl_Reverse_Pat(NAME # _B)>; - def : SVE_1_Op_Imm_OptLsl_Reverse_Pat(NAME # _H)>; - def : SVE_1_Op_Imm_OptLsl_Reverse_Pat(NAME # _S)>; - def : SVE_1_Op_Imm_OptLsl_Reverse_Pat(NAME # _D)>; -} - class sve_int_arith_imm sz8_64, bits<6> opc, string asm, ZPRRegOp zprty, Operand immtype> : I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm), @@ -4458,7 +4523,8 @@ class sve2_int_bitwise_ternary_op_d opc, string asm> let ElementSize = ElementSizeNone; } -multiclass sve2_int_bitwise_ternary_op opc, string asm, SDPatternOperator op> { +multiclass sve2_int_bitwise_ternary_op opc, string asm, SDPatternOperator op, + SDPatternOperator ir_op = null_frag> { def NAME : sve2_int_bitwise_ternary_op_d; def : InstAlias opc, string asm, SDPatternOperato def : SVE_3_Op_Pat(NAME)>; def : SVE_3_Op_Pat(NAME)>; def : SVE_3_Op_Pat(NAME)>; + + + def : SVE_3_Op_BSP_Pat(NAME)>; + def : SVE_3_Op_BSP_Pat(NAME)>; + def : SVE_3_Op_BSP_Pat(NAME)>; + def : SVE_3_Op_BSP_Pat(NAME)>; } class sve2_int_rotate_right_imm tsz8_64, string asm, @@ -4578,29 +4650,28 @@ class sve_int_dup_imm_pred sz8_64, bit m, string asm, } multiclass sve_int_dup_imm_pred_merge_inst< - bits<2> sz8_64, string asm, ZPRRegOp zprty, ValueType intty, - ValueType predty, ValueType scalarty, imm8_opt_lsl cpyimm> { + bits<2> sz8_64, string asm, ZPRRegOp zprty, imm8_opt_lsl cpyimm, + ValueType intty, ValueType predty, ValueType scalarty, ComplexPattern cpx> { let Constraints = "$Zd = $_Zd" in def NAME : sve_int_dup_imm_pred; def : InstAlias<"mov $Zd, $Pg/m, $imm", (!cast(NAME) zprty:$Zd, PPRAny:$Pg, cpyimm:$imm), 1>; - def : Pat<(intty - (vselect predty:$Pg, - (intty (AArch64dup (scalarty (SVE8BitLslImm.Pat i32:$imm, i32:$shift)))), - intty:$Zd)), - (!cast(NAME) zprty:$Zd, $Pg, i32:$imm, i32:$shift)>; + def : Pat<(vselect predty:$Pg, + (intty (splat_vector (scalarty (cpx i32:$imm, i32:$shift)))), + ZPR:$Zd), + (!cast(NAME) $Zd, $Pg, $imm, $shift)>; } multiclass sve_int_dup_imm_pred_merge { - defm _B : sve_int_dup_imm_pred_merge_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1, - i32, cpy_imm8_opt_lsl_i8>; - defm _H : sve_int_dup_imm_pred_merge_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1, - i32, cpy_imm8_opt_lsl_i16>; - defm _S : sve_int_dup_imm_pred_merge_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1, - i32, cpy_imm8_opt_lsl_i32>; - defm _D : sve_int_dup_imm_pred_merge_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1, - i64, cpy_imm8_opt_lsl_i64>; + defm _B : sve_int_dup_imm_pred_merge_inst<0b00, asm, ZPR8, cpy_imm8_opt_lsl_i8, + nxv16i8, nxv16i1, i32, SVECpyDupImm8Pat>; + defm _H : sve_int_dup_imm_pred_merge_inst<0b01, asm, ZPR16, cpy_imm8_opt_lsl_i16, + nxv8i16, nxv8i1, i32, SVECpyDupImm16Pat>; + defm _S : sve_int_dup_imm_pred_merge_inst<0b10, asm, ZPR32, cpy_imm8_opt_lsl_i32, + nxv4i32, nxv4i1, i32, SVECpyDupImm32Pat>; + defm _D : sve_int_dup_imm_pred_merge_inst<0b11, asm, ZPR64, cpy_imm8_opt_lsl_i64, + nxv2i64, nxv2i1, i64, SVECpyDupImm64Pat>; def : InstAlias<"fmov $Zd, $Pg/m, #0.0", (!cast(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, 0, 0), 0>; @@ -4608,11 +4679,24 @@ multiclass sve_int_dup_imm_pred_merge { (!cast(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, 0, 0), 0>; def : InstAlias<"fmov $Zd, $Pg/m, #0.0", (!cast(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, 0, 0), 0>; + + def : Pat<(vselect PPRAny:$Pg, (SVEDup0), (nxv8f16 ZPR:$Zd)), + (!cast(NAME # _H) $Zd, $Pg, 0, 0)>; + def : Pat<(vselect PPRAny:$Pg, (SVEDup0), (nxv4f16 ZPR:$Zd)), + (!cast(NAME # _S) $Zd, $Pg, 0, 0)>; + def : Pat<(vselect PPRAny:$Pg, (SVEDup0), (nxv2f16 ZPR:$Zd)), + (!cast(NAME # _D) $Zd, $Pg, 0, 0)>; + def : Pat<(vselect PPRAny:$Pg, (SVEDup0), (nxv4f32 ZPR:$Zd)), + (!cast(NAME # _S) $Zd, $Pg, 0, 0)>; + def : Pat<(vselect PPRAny:$Pg, (SVEDup0), (nxv2f32 ZPR:$Zd)), + (!cast(NAME # _D) $Zd, $Pg, 0, 0)>; + def : Pat<(vselect PPRAny:$Pg, (SVEDup0), (nxv2f64 ZPR:$Zd)), + (!cast(NAME # _D) $Zd, $Pg, 0, 0)>; } multiclass sve_int_dup_imm_pred_zero_inst< - bits<2> sz8_64, string asm, ZPRRegOp zprty, ValueType intty, - ValueType predty, ValueType scalarty, imm8_opt_lsl cpyimm> { + bits<2> sz8_64, string asm, ZPRRegOp zprty, imm8_opt_lsl cpyimm, + ValueType intty, ValueType predty, ValueType scalarty, ComplexPattern cpx> { def NAME : sve_int_dup_imm_pred; def : InstAlias<"mov $Zd, $Pg/z, $imm", @@ -4623,22 +4707,21 @@ multiclass sve_int_dup_imm_pred_zero_inst< (!cast(NAME) PPRAny:$Ps1, -1, 0)>; def : Pat<(intty (anyext (predty PPRAny:$Ps1))), (!cast(NAME) PPRAny:$Ps1, 1, 0)>; - def : Pat<(intty - (vselect predty:$Pg, - (intty (AArch64dup (scalarty (SVE8BitLslImm.Pat i32:$imm, i32:$shift)))), - (intty (AArch64dup (scalarty 0))))), - (!cast(NAME) $Pg, i32:$imm, i32:$shift)>; + def : Pat<(vselect predty:$Pg, + (intty (splat_vector (scalarty (cpx i32:$imm, i32:$shift)))), + (intty (splat_vector (scalarty 0)))), + (!cast(NAME) $Pg, $imm, $shift)>; } multiclass sve_int_dup_imm_pred_zero { - defm _B : sve_int_dup_imm_pred_zero_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1, - i32, cpy_imm8_opt_lsl_i8>; - defm _H : sve_int_dup_imm_pred_zero_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1, - i32, cpy_imm8_opt_lsl_i16>; - defm _S : sve_int_dup_imm_pred_zero_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1, - i32, cpy_imm8_opt_lsl_i32>; - defm _D : sve_int_dup_imm_pred_zero_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1, - i64, cpy_imm8_opt_lsl_i64>; + defm _B : sve_int_dup_imm_pred_zero_inst<0b00, asm, ZPR8, cpy_imm8_opt_lsl_i8, + nxv16i8, nxv16i1, i32, SVECpyDupImm8Pat>; + defm _H : sve_int_dup_imm_pred_zero_inst<0b01, asm, ZPR16, cpy_imm8_opt_lsl_i16, + nxv8i16, nxv8i1, i32, SVECpyDupImm16Pat>; + defm _S : sve_int_dup_imm_pred_zero_inst<0b10, asm, ZPR32, cpy_imm8_opt_lsl_i32, + nxv4i32, nxv4i1, i32, SVECpyDupImm32Pat>; + defm _D : sve_int_dup_imm_pred_zero_inst<0b11, asm, ZPR64, cpy_imm8_opt_lsl_i64, + nxv2i64, nxv2i1, i64, SVECpyDupImm64Pat>; } //===----------------------------------------------------------------------===// @@ -4690,6 +4773,10 @@ multiclass SVE_SETCC_Pat_With_Zero; def : Pat<(predvt (AArch64setcc_z predvt:$Op1, (SVEDup0), intvt:$Op2, invcc)), (cmp $Op1, $Op2)>; + def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))), + (cmp $Pg, $Op1)>; + def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))), + (cmp $Pg, $Op1)>; } multiclass sve_int_cmp_0 opc, string asm, CondCode cc, CondCode invcc> { @@ -4761,14 +4848,26 @@ multiclass SVE_SETCC_Imm_Pat { def : Pat<(predvt (AArch64setcc_z (predvt PPR_3b:$Pg), - (intvt ZPR:$Zs1), - (intvt (AArch64dup (immtype:$imm))), - cc)), + (intvt ZPR:$Zs1), + (intvt (splat_vector (immtype:$imm))), + cc)), (cmp $Pg, $Zs1, immtype:$imm)>; def : Pat<(predvt (AArch64setcc_z (predvt PPR_3b:$Pg), - (intvt (AArch64dup (immtype:$imm))), - (intvt ZPR:$Zs1), - commuted_cc)), + (intvt (splat_vector (immtype:$imm))), + (intvt ZPR:$Zs1), + commuted_cc)), + (cmp $Pg, $Zs1, immtype:$imm)>; + def : Pat<(predvt (and predvt:$Pg, + (AArch64setcc_z (predvt (AArch64ptrue 31)), + (intvt ZPR:$Zs1), + (intvt (splat_vector (immtype:$imm))), + cc))), + (cmp $Pg, $Zs1, immtype:$imm)>; + def : Pat<(predvt (and predvt:$Pg, + (AArch64setcc_z (predvt (AArch64ptrue 31)), + (intvt (splat_vector (immtype:$imm))), + (intvt ZPR:$Zs1), + commuted_cc))), (cmp $Pg, $Zs1, immtype:$imm)>; } @@ -5148,6 +5247,8 @@ class sve_int_index_ii sz8_64, string asm, ZPRRegOp zprty, let Inst{15-10} = 0b010000; let Inst{9-5} = imm5; let Inst{4-0} = Zd; + + let isReMaterializable = 1; } multiclass sve_int_index_ii { @@ -5166,13 +5267,13 @@ multiclass sve_int_index_ii { (!cast(NAME # "_D") (i64 0), simm5_64b:$imm5b)>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5b)), (nxv16i8 (splat_vector(simm5_8b:$imm5)))), (!cast(NAME # "_B") simm5_8b:$imm5, (!cast("trunc_imm") $imm5b))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5b)), (nxv8i16 (splat_vector(simm5_16b:$imm5)))), (!cast(NAME # "_H") simm5_16b:$imm5, (!cast("trunc_imm") $imm5b))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5b)), (nxv4i32 (splat_vector(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>; - def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5b)), (nxv2i64 (splat_vector(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>; } @@ -5211,35 +5312,35 @@ multiclass sve_int_index_ir(NAME # "_D") (i64 0), (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)), sub_32))>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (splat_vector(simm5_8b:$imm5)))), (!cast(NAME # "_B") simm5_8b:$imm5, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (splat_vector(simm5_16b:$imm5)))), (!cast(NAME # "_H") simm5_16b:$imm5, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (splat_vector(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, (!cast("MOVi32imm") $imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (splat_vector(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, (!cast("MOVi64imm") $imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (splat_vector(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)), sub_32))>; // mul(step_vector(1), dup(Y)) -> index(0, Y). - def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))), (!cast(NAME # "_B") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))), (!cast(NAME # "_H") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))), (!cast(NAME # "_S") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), + def : Pat<(mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))), (!cast(NAME # "_D") (i64 0), GPR64:$Rm)>; // add(mul(step_vector(1), dup(Y)), dup(X)) -> index(X, Y). - def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))), (nxv16i8 (splat_vector(simm5_8b:$imm5)))), (!cast(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))), (nxv8i16 (splat_vector(simm5_16b:$imm5)))), (!cast(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))), (nxv4i32 (splat_vector(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add (muloneuseop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))), (nxv2i64 (splat_vector(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>; } @@ -5267,13 +5368,13 @@ multiclass sve_int_index_ri { def _D : sve_int_index_ri<0b11, asm, ZPR64, GPR64, simm5_64b>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5)), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5)), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))), (!cast(NAME # "_B") GPR32:$Rm, (!cast("trunc_imm") $imm5))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5)), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5)), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))), (!cast(NAME # "_H") GPR32:$Rm, (!cast("trunc_imm") $imm5))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5)), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5)), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))), (!cast(NAME # "_S") GPR32:$Rm, simm5_32b:$imm5)>; - def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5)), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5)), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))), (!cast(NAME # "_D") GPR64:$Rm, simm5_64b:$imm5)>; } @@ -5301,25 +5402,25 @@ multiclass sve_int_index_rr { def _D : sve_int_index_rr<0b11, asm, ZPR64, GPR64>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (splat_vector(i32 GPR32:$Rn)))), (!cast(NAME # "_B") GPR32:$Rn, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (splat_vector(i32 GPR32:$Rn)))), (!cast(NAME # "_H") GPR32:$Rn, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (splat_vector(i32 GPR32:$Rn)))), (!cast(NAME # "_S") GPR32:$Rn, (!cast("MOVi32imm") $imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (splat_vector(i64 GPR64:$Rn)))), (!cast(NAME # "_D") GPR64:$Rn, (!cast("MOVi64imm") $imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (splat_vector(i64 GPR64:$Rn)))), (!cast(NAME # "_D") GPR64:$Rn, (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)), sub_32))>; // add(mul(step_vector(1), dup(Y)), dup(X)) -> index(X, Y). - def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))), (nxv16i8 (splat_vector(i32 GPR32:$Rn)))), (!cast(NAME # "_B") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),(nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))),(nxv8i16 (splat_vector(i32 GPR32:$Rn)))), (!cast(NAME # "_H") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),(nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))),(nxv4i32 (splat_vector(i32 GPR32:$Rn)))), (!cast(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))),(nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + def : Pat<(add (mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))),(nxv2i64 (splat_vector(i64 GPR64:$Rn)))), (!cast(NAME # "_D") GPR64:$Rn, GPR64:$Rm)>; } @@ -5972,25 +6073,25 @@ multiclass sve_mem_sst_sv_64_scaled msz, string asm, SDPatternOperator op, RegisterOperand zprext, ValueType vt> { - def _SCALED_REAL : sve_mem_sst_sv2; + def _SCALED : sve_mem_sst_sv2; def : InstAlias(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>; + (!cast(NAME # _SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>; def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt), - (!cast(NAME # _SCALED_REAL) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$indices)>; + (!cast(NAME # _SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$indices)>; } multiclass sve_mem_sst_sv_64_unscaled msz, string asm, SDPatternOperator op, ValueType vt> { - def _REAL : sve_mem_sst_sv2; + def NAME : sve_mem_sst_sv2; def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>; + (!cast(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>; def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt), - (!cast(NAME # _REAL) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; + (!cast(NAME) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; } class sve_mem_sst_vi opc, string asm, ZPRRegOp zprty, @@ -8433,6 +8534,7 @@ def am_sve_regreg_lsl0 : ComplexPattern", [ def am_sve_regreg_lsl1 : ComplexPattern", []>; def am_sve_regreg_lsl2 : ComplexPattern", []>; def am_sve_regreg_lsl3 : ComplexPattern", []>; +def am_sve_regreg_lsl4 : ComplexPattern", []>; // Predicated pseudo floating point two operand instructions. multiclass sve_fp_bin_pred_hfd { diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp index 4a24162540a5..ccb34f367338 100644 --- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp +++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp @@ -305,8 +305,7 @@ bool SVEIntrinsicOpts::optimizePredicateStore(Instruction *I) { // ..where the value stored comes from a vector extract.. auto *IntrI = dyn_cast(Store->getOperand(0)); - if (!IntrI || - IntrI->getIntrinsicID() != Intrinsic::experimental_vector_extract) + if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_extract) return false; // ..that is extracting from index 0.. @@ -365,8 +364,7 @@ bool SVEIntrinsicOpts::optimizePredicateLoad(Instruction *I) { // ..whose operand is a vector_insert.. auto *IntrI = dyn_cast(BitCast->getOperand(0)); - if (!IntrI || - IntrI->getIntrinsicID() != Intrinsic::experimental_vector_insert) + if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_insert) return false; // ..that is inserting into index zero of an undef vector.. @@ -451,8 +449,8 @@ bool SVEIntrinsicOpts::runOnModule(Module &M) { continue; switch (F.getIntrinsicID()) { - case Intrinsic::experimental_vector_extract: - case Intrinsic::experimental_vector_insert: + case Intrinsic::vector_extract: + case Intrinsic::vector_insert: case Intrinsic::aarch64_sve_ptrue: for (User *U : F.users()) Functions.insert(cast(U)->getFunction()); diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 5906a5d6b50b..71303611265c 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -634,7 +634,8 @@ namespace AArch64SysReg { FeatureBitset FeaturesRequired; bool haveFeatures(FeatureBitset ActiveFeatures) const { - return (FeaturesRequired & ActiveFeatures) == FeaturesRequired; + return ActiveFeatures[llvm::AArch64::FeatureAll] || + (FeaturesRequired & ActiveFeatures) == FeaturesRequired; } }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 11cc1a01d248..c4680cbedadf 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -91,10 +91,6 @@ ModulePass *createAMDGPULowerIntrinsicsPass(); void initializeAMDGPULowerIntrinsicsPass(PassRegistry &); extern char &AMDGPULowerIntrinsicsID; -ModulePass *createAMDGPUFixFunctionBitcastsPass(); -void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &); -extern char &AMDGPUFixFunctionBitcastsID; - ModulePass *createAMDGPUCtorDtorLoweringPass(); void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &); extern char &AMDGPUCtorDtorLoweringID; @@ -303,6 +299,12 @@ extern char &SIMemoryLegalizerID; void initializeSIModeRegisterPass(PassRegistry&); extern char &SIModeRegisterID; +void initializeAMDGPUReleaseVGPRsPass(PassRegistry &); +extern char &AMDGPUReleaseVGPRsID; + +void initializeAMDGPUInsertDelayAluPass(PassRegistry &); +extern char &AMDGPUInsertDelayAluID; + void initializeSIInsertHardClausesPass(PassRegistry &); extern char &SIInsertHardClausesID; @@ -335,6 +337,9 @@ extern char &GCNNSAReassignID; void initializeGCNPreRAOptimizationsPass(PassRegistry &); extern char &GCNPreRAOptimizationsID; +FunctionPass *createAMDGPUSetWavePriorityPass(); +void initializeAMDGPUSetWavePriorityPass(PassRegistry &); + namespace AMDGPU { enum TargetIndex { TI_CONSTDATA_START, diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 806c0b18637a..48b5814cd482 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -86,6 +86,12 @@ def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts" "Have s_scratch_* flat memory instructions" >; +def FeatureEnableFlatScratch : SubtargetFeature<"enable-flat-scratch", + "EnableFlatScratch", + "true", + "Use scratch_* flat memory instructions to access scratch" +>; + def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts", "AddNoCarryInsts", "true", @@ -171,6 +177,12 @@ def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", "VI SGPR initialization bug requiring a fixed SGPR allocation size" >; +def FeatureUserSGPRInit16Bug : SubtargetFeature<"user-sgpr-init16-bug", + "UserSGPRInit16Bug", + "true", + "Bug requiring at least 16 user+system SGPRs to be enabled" +>; + def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug", "LDSMisalignedBug", "true", @@ -307,12 +319,24 @@ def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts", "Additional instructions for GFX90A+" >; +def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts", + "GFX940Insts", + "true", + "Additional instructions for GFX940+" +>; + def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts", "GFX10Insts", "true", "Additional instructions for GFX10+" >; +def FeatureGFX11Insts : SubtargetFeature<"gfx11-insts", + "GFX11Insts", + "true", + "Additional instructions for GFX11+" +>; + def FeatureGFX10_3Insts : SubtargetFeature<"gfx10-3-insts", "GFX10_3Insts", "true", @@ -343,6 +367,12 @@ def Feature16BitInsts : SubtargetFeature<"16-bit-insts", "Has i16/f16 instructions" >; +def FeatureTrue16BitInsts : SubtargetFeature<"true16", + "HasTrue16BitInsts", + "true", + "True 16-bit operand instructions" +>; + def FeatureVOP3P : SubtargetFeature<"vop3p", "HasVOP3PInsts", "true", @@ -458,6 +488,12 @@ def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding", "Support NSA encoding for image instructions" >; +def FeatureImageInsts : SubtargetFeature<"image-insts", + "HasImageInsts", + "true", + "Support image instructions" +>; + def FeatureExtendedImageInsts : SubtargetFeature<"extended-image-insts", "HasExtendedImageInsts", "true", @@ -536,6 +572,13 @@ def FeatureDot7Insts : SubtargetFeature<"dot7-insts", "Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions" >; +def FeatureDot8Insts : SubtargetFeature<"dot8-insts", + "HasDot8Insts", + "true", + "Has v_dot2_f16_f16, v_dot2_bf16_bf16, v_dot2_f32_bf16, " + "v_dot4_i32_iu8, v_dot8_i32_iu4 instructions" +>; + def FeatureMAIInsts : SubtargetFeature<"mai-insts", "HasMAIInsts", "true", @@ -548,11 +591,28 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", "Has v_pk_fmac_f16 instruction" >; -def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts", - "HasAtomicFaddInsts", +def FeatureAtomicFaddRtnInsts : SubtargetFeature<"atomic-fadd-rtn-insts", + "HasAtomicFaddRtnInsts", "true", - "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, " - "global_atomic_pk_add_f16 instructions", + "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that " + "return original value", + [FeatureFlatGlobalInsts] +>; + +def FeatureAtomicFaddNoRtnInsts : SubtargetFeature<"atomic-fadd-no-rtn-insts", + "HasAtomicFaddNoRtnInsts", + "true", + "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that " + "don't return original value", + [FeatureFlatGlobalInsts] +>; + +def FeatureAtomicPkFaddNoRtnInsts + : SubtargetFeature<"atomic-pk-fadd-no-rtn-insts", + "HasAtomicPkFaddNoRtnInsts", + "true", + "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that " + "don't return original value", [FeatureFlatGlobalInsts] >; @@ -632,6 +692,12 @@ class SubtargetFeatureNSAMaxSize : SubtargetFeature < def FeatureNSAMaxSize5 : SubtargetFeatureNSAMaxSize<5>; def FeatureNSAMaxSize13 : SubtargetFeatureNSAMaxSize<13>; +def FeatureVOPD : SubtargetFeature<"vopd", + "HasVOPDInsts", + "true", + "Has VOPD dual issue wave32 instructions" +>; + //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// @@ -762,7 +828,7 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel, - FeatureTrigReducedRange, FeatureExtendedImageInsts + FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts ] >; @@ -772,7 +838,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, - FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess + FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess, + FeatureImageInsts ] >; @@ -787,7 +854,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, - FeatureUnalignedBufferAccess + FeatureUnalignedBufferAccess, FeatureImageInsts ] >; @@ -824,6 +891,25 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureGFX10A16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16, + FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts + ] +>; + +def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11", + "gfx11", + [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, + FeatureFlatAddressSpace, Feature16BitInsts, + FeatureInv2PiInlineImm, FeatureApertureRegs, + FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts, + FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts, + FeatureGFX11Insts, FeatureVOP3P, FeatureVOPD, FeatureTrue16BitInsts, + FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, + FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, + FeatureAddNoCarryInsts, FeatureFmaMixInsts, + FeatureNoSdstCMPX, FeatureVscnt, + FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, + FeatureNoDataDepHazard, FeaturePkFmacF16Inst, + FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16, FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess ] >; @@ -910,6 +996,7 @@ def FeatureISAVersion9_0_0 : FeatureSet< FeatureLDSBankCount32, FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureImageGather4D16Bug]>; @@ -919,6 +1006,7 @@ def FeatureISAVersion9_0_2 : FeatureSet< FeatureLDSBankCount32, FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureImageGather4D16Bug]>; @@ -927,6 +1015,7 @@ def FeatureISAVersion9_0_4 : FeatureSet< FeatureLDSBankCount32, FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureFmaMixInsts, FeatureImageGather4D16Bug]>; @@ -938,6 +1027,7 @@ def FeatureISAVersion9_0_6 : FeatureSet< FeatureLDSBankCount32, FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureDLInsts, FeatureDot1Insts, @@ -953,6 +1043,7 @@ def FeatureISAVersion9_0_8 : FeatureSet< FeatureLDSBankCount32, FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureDLInsts, FeatureDot1Insts, @@ -964,7 +1055,8 @@ def FeatureISAVersion9_0_8 : FeatureSet< FeatureDot7Insts, FeatureMAIInsts, FeaturePkFmacF16Inst, - FeatureAtomicFaddInsts, + FeatureAtomicFaddNoRtnInsts, + FeatureAtomicPkFaddNoRtnInsts, FeatureSupportsSRAMECC, FeatureMFMAInlineLiteralBug, FeatureImageGather4D16Bug]>; @@ -975,6 +1067,7 @@ def FeatureISAVersion9_0_9 : FeatureSet< FeatureLDSBankCount32, FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureImageGather4D16Bug]>; @@ -995,7 +1088,10 @@ def FeatureISAVersion9_0_A : FeatureSet< FeaturePackedFP32Ops, FeatureMAIInsts, FeaturePkFmacF16Inst, - FeatureAtomicFaddInsts, + FeatureAtomicFaddRtnInsts, + FeatureAtomicFaddNoRtnInsts, + FeatureAtomicPkFaddNoRtnInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureSupportsSRAMECC, FeaturePackedTID, @@ -1007,9 +1103,36 @@ def FeatureISAVersion9_0_C : FeatureSet< FeatureLDSBankCount32, FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureImageGather4D16Bug]>; +def FeatureISAVersion9_4_0 : FeatureSet< + [FeatureGFX9, + FeatureGFX90AInsts, + FeatureGFX940Insts, + FeatureFmaMixInsts, + FeatureLDSBankCount32, + FeatureDLInsts, + FeatureDot1Insts, + FeatureDot2Insts, + FeatureDot3Insts, + FeatureDot4Insts, + FeatureDot5Insts, + FeatureDot6Insts, + FeatureDot7Insts, + Feature64BitDPP, + FeaturePackedFP32Ops, + FeatureMAIInsts, + FeaturePkFmacF16Inst, + FeatureAtomicFaddRtnInsts, + FeatureAtomicFaddNoRtnInsts, + FeatureAtomicPkFaddNoRtnInsts, + FeatureSupportsSRAMECC, + FeaturePackedTID, + FeatureArchitectedFlatScratch, + FullRate64Ops]>; + // TODO: Organize more features into groups. def FeatureGroup { // Bugs present on gfx10.1. @@ -1124,6 +1247,33 @@ def FeatureISAVersion10_3_0 : FeatureSet< FeatureWavefrontSize32, FeatureShaderCyclesRegister]>; +def FeatureISAVersion11_Common : FeatureSet< + [FeatureGFX11, + FeatureLDSBankCount32, + FeatureDLInsts, + FeatureDot5Insts, + FeatureDot7Insts, + FeatureDot8Insts, + FeatureNSAEncoding, + FeatureNSAMaxSize5, + FeatureWavefrontSize32, + FeatureShaderCyclesRegister, + FeatureArchitectedFlatScratch, + FeatureAtomicFaddRtnInsts, + FeatureAtomicFaddNoRtnInsts, + FeatureImageInsts, + FeaturePackedTID, + FeatureVcmpxPermlaneHazard]>; + +// Features for GFX 11.0.0 and 11.0.1 +def FeatureISAVersion11_0 : FeatureSet< + !listconcat(FeatureISAVersion11_Common.Features, + [FeatureUserSGPRInit16Bug])>; + +def FeatureISAVersion11_0_2 : FeatureSet< + !listconcat(FeatureISAVersion11_Common.Features, + [FeatureUserSGPRInit16Bug])>; + //===----------------------------------------------------------------------===// def AMDGPUInstrInfo : InstrInfo { @@ -1152,8 +1302,10 @@ def AMDGPUAsmVariants { int SDWA9_ID = 3; string DPP = "DPP"; int DPP_ID = 4; + string VOP3_DPP = "VOP3_DPP"; + int VOP3_DPP_ID = 5; string Disable = "Disable"; - int Disable_ID = 5; + int Disable_ID = 6; } def DefaultAMDGPUAsmParserVariant : AsmParserVariant { @@ -1176,12 +1328,16 @@ def SDWA9AsmParserVariant : AsmParserVariant { let Name = AMDGPUAsmVariants.SDWA9; } - def DPPAsmParserVariant : AsmParserVariant { let Variant = AMDGPUAsmVariants.DPP_ID; let Name = AMDGPUAsmVariants.DPP; } +def VOP3_DPPAsmParserVariant : AsmParserVariant { + let Variant = AMDGPUAsmVariants.VOP3_DPP_ID; + let Name = AMDGPUAsmVariants.VOP3_DPP; +} + def AMDGPU : Target { // Pull in Instruction Info: let InstructionSet = AMDGPUInstrInfo; @@ -1190,7 +1346,8 @@ def AMDGPU : Target { VOP3AsmParserVariant, SDWAAsmParserVariant, SDWA9AsmParserVariant, - DPPAsmParserVariant]; + DPPAsmParserVariant, + VOP3_DPPAsmParserVariant]; let AssemblyWriters = [AMDGPUAsmWriter]; let AllowRegisterRenaming = 1; } @@ -1216,6 +1373,12 @@ def isGFX6GFX7GFX10 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, + AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX11Insts))>; + +def isGFX6GFX7GFX10Plus : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" + "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, AssemblerPredicate<(all_of (not FeatureGCN3Encoding))>; def isGFX7Only : @@ -1225,6 +1388,12 @@ def isGFX7Only : def isGFX7GFX10 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, + AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX11Insts))>; + +def isGFX7GFX10GFX11 : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts)>; def isGFX7GFX8GFX9 : @@ -1248,6 +1417,21 @@ def isGFX6GFX7GFX8GFX9NotGFX90A : " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, AssemblerPredicate<(all_of (not FeatureGFX10Insts), (not FeatureGFX90AInsts))>; +def isGFX6GFX7GFX8GFX9GFX10 : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, + AssemblerPredicate<(all_of (not FeatureGFX11Insts))>; + +def isGFX7GFX8GFX9GFX10 : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, + AssemblerPredicate<(all_of FeatureCIInsts, (not FeatureGFX11Insts))>; + def isGFX7Plus : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">, AssemblerPredicate<(all_of FeatureCIInsts)>; @@ -1287,18 +1471,37 @@ def isGFX8GFX9NotGFX90A : AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; def isGFX90AOnly : - Predicate<"Subtarget->hasGFX90AInsts()">, - AssemblerPredicate<(all_of FeatureGFX90AInsts)>; + Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">, + AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>; def isGFX908orGFX90A : - Predicate<"Subtarget->hasMAIInsts()">, - AssemblerPredicate<(all_of FeatureMAIInsts)>; + Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX940Insts()">, + AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX940Insts))>; + +def isGFX940Plus : + Predicate<"Subtarget->hasGFX940Insts()">, + AssemblerPredicate<(all_of FeatureGFX940Insts)>; + +def isGFX940GFX11Plus : + Predicate<"Subtarget->hasGFX940Insts() ||" + "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">, + AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>; + +def isGFX8GFX9NotGFX940 : + Predicate<"!Subtarget->hasGFX940Insts() &&" + "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" + " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, + AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX940Insts))>; def isGFX8GFX9 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding)>; +def isGFX10Only : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, + AssemblerPredicate<(all_of FeatureGFX10Insts, (not FeatureGFX11Insts))>; + def isGFX10Plus : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, AssemblerPredicate<(all_of FeatureGFX10Insts)>; @@ -1308,6 +1511,25 @@ def isGFX10Before1030 : "!Subtarget->hasGFX10_3Insts()">, AssemblerPredicate<(all_of FeatureGFX10Insts,(not FeatureGFX10_3Insts))>; +def isGFX9GFX10 : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, + AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX11Insts))>; + +def isGFX8GFX9GFX10 : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, + AssemblerPredicate<(all_of FeatureGFX8Insts, (not FeatureGFX11Insts))>; + +def isGFX11Only : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, + AssemblerPredicate<(all_of FeatureGFX11Insts)>; + +def isGFX11Plus : + Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">, + AssemblerPredicate<(all_of FeatureGFX11Insts)>; + def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; @@ -1321,7 +1543,9 @@ def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">, AssemblerPredicate<(all_of FeatureGFX9Insts)>; def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">, - AssemblerPredicate<(any_of FeatureGFX10_3Insts)>; + AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX940Insts)>; +def HasFlatScratchSVSMode : Predicate<"Subtarget->hasFlatScratchSVSMode()">, + AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>; def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">, AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>; @@ -1354,6 +1578,11 @@ def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">; def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">, AssemblerPredicate<(all_of Feature16BitInsts)>; + +def HasTrue16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts()">, + AssemblerPredicate<(all_of FeatureTrue16BitInsts)>; +def NotHasTrue16BitInsts : Predicate<"!Subtarget->hasTrue16BitInsts()">; + def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, AssemblerPredicate<(all_of FeatureVOP3P)>; @@ -1385,7 +1614,10 @@ def HasPackedFP32Ops : Predicate<"Subtarget->hasPackedFP32Ops()">, def HasFmaakFmamkF32Insts : Predicate<"Subtarget->hasFmaakFmamkF32Insts()">, - AssemblerPredicate<(any_of FeatureGFX10Insts)>; + AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>; + +def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">, + AssemblerPredicate<(all_of FeatureImageInsts)>; def HasExtendedImageInsts : Predicate<"Subtarget->hasExtendedImageInsts()">, AssemblerPredicate<(all_of FeatureExtendedImageInsts)>; @@ -1454,6 +1686,9 @@ def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">, def HasDot7Insts : Predicate<"Subtarget->hasDot7Insts()">, AssemblerPredicate<(all_of FeatureDot7Insts)>; +def HasDot8Insts : Predicate<"Subtarget->hasDot8Insts()">, + AssemblerPredicate<(all_of FeatureDot8Insts)>; + def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">, AssemblerPredicate<(all_of FeatureGetWaveIdInst)>; @@ -1478,8 +1713,13 @@ def HasMadMacF32Insts : Predicate<"Subtarget->hasMadMacF32Insts()">, def HasFmaLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts()">, AssemblerPredicate<(any_of FeatureGFX10_3Insts)>; -def HasAtomicFaddInsts : Predicate<"Subtarget->hasAtomicFaddInsts()">, - AssemblerPredicate<(all_of FeatureAtomicFaddInsts)>; +def HasAtomicFaddRtnInsts : Predicate<"Subtarget->hasAtomicFaddRtnInsts()">, + AssemblerPredicate<(all_of FeatureAtomicFaddRtnInsts)>; +def HasAtomicFaddNoRtnInsts : Predicate<"Subtarget->hasAtomicFaddNoRtnInsts()">, + AssemblerPredicate<(all_of FeatureAtomicFaddNoRtnInsts)>; +def HasAtomicPkFaddNoRtnInsts + : Predicate<"Subtarget->hasAtomicPkFaddNoRtnInsts()">, + AssemblerPredicate<(all_of FeatureAtomicPkFaddNoRtnInsts)>; def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">, AssemblerPredicate<(all_of FeatureDsSrc2Insts)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index bebf032b5535..74be0336851c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -14,12 +14,11 @@ #include "AMDGPU.h" #include "Utils/AMDGPUBaseInfo.h" -#include "llvm/ADT/SmallSet.h" +#include "Utils/AMDGPUMemoryUtils.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/InstVisitor.h" -#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/InitializePasses.h" #define DEBUG_TYPE "amdgpu-annotate-uniform" @@ -33,8 +32,18 @@ class AMDGPUAnnotateUniformValues : public FunctionPass, LegacyDivergenceAnalysis *DA; MemorySSA *MSSA; AliasAnalysis *AA; - DenseMap noClobberClones; bool isEntryFunc; + bool Changed; + + void setUniformMetadata(Instruction *I) { + I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {})); + Changed = true; + } + + void setNoClobberMetadata(Instruction *I) { + I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {})); + Changed = true; + } public: static char ID; @@ -54,7 +63,6 @@ public: void visitBranchInst(BranchInst &I); void visitLoadInst(LoadInst &I); - bool isClobberedInFunction(LoadInst * Load); }; } // End anonymous namespace @@ -69,88 +77,6 @@ INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, char AMDGPUAnnotateUniformValues::ID = 0; -static void setUniformMetadata(Instruction *I) { - I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {})); -} -static void setNoClobberMetadata(Instruction *I) { - I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {})); -} - -bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst *Load) { - MemorySSAWalker *Walker = MSSA->getWalker(); - SmallVector WorkList{Walker->getClobberingMemoryAccess(Load)}; - SmallSet Visited; - MemoryLocation Loc(MemoryLocation::get(Load)); - - const auto isReallyAClobber = [this, Load](MemoryDef *Def) -> bool { - Instruction *DefInst = Def->getMemoryInst(); - LLVM_DEBUG(dbgs() << " Def: " << *DefInst << '\n'); - - if (isa(DefInst)) - return false; - - if (const IntrinsicInst *II = dyn_cast(DefInst)) { - switch (II->getIntrinsicID()) { - case Intrinsic::amdgcn_s_barrier: - case Intrinsic::amdgcn_wave_barrier: - return false; - default: - break; - } - } - - // Ignore atomics not aliasing with the original load, any atomic is a - // universal MemoryDef from MSSA's point of view too, just like a fence. - const auto checkNoAlias = [this, Load](auto I) -> bool { - return I && AA->isNoAlias(I->getPointerOperand(), - Load->getPointerOperand()); - }; - - if (checkNoAlias(dyn_cast(DefInst)) || - checkNoAlias(dyn_cast(DefInst))) - return false; - - return true; - }; - - LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n'); - - // Start with a nearest dominating clobbering access, it will be either - // live on entry (nothing to do, load is not clobbered), MemoryDef, or - // MemoryPhi if several MemoryDefs can define this memory state. In that - // case add all Defs to WorkList and continue going up and checking all - // the definitions of this memory location until the root. When all the - // defs are exhausted and came to the entry state we have no clobber. - // Along the scan ignore barriers and fences which are considered clobbers - // by the MemorySSA, but not really writing anything into the memory. - while (!WorkList.empty()) { - MemoryAccess *MA = WorkList.pop_back_val(); - if (!Visited.insert(MA).second) - continue; - - if (MSSA->isLiveOnEntryDef(MA)) - continue; - - if (MemoryDef *Def = dyn_cast(MA)) { - if (isReallyAClobber(Def)) { - LLVM_DEBUG(dbgs() << " -> load is clobbered\n"); - return true; - } - - WorkList.push_back( - Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc)); - continue; - } - - const MemoryPhi *Phi = cast(MA); - for (auto &Use : Phi->incoming_values()) - WorkList.push_back(cast(&Use)); - } - - LLVM_DEBUG(dbgs() << " -> no clobber\n"); - return false; -} - void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { if (DA->isUniform(&I)) setUniformMetadata(&I); @@ -160,46 +86,18 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { Value *Ptr = I.getPointerOperand(); if (!DA->isUniform(Ptr)) return; + Instruction *PtrI = dyn_cast(Ptr); + if (PtrI) + setUniformMetadata(PtrI); + // We're tracking up to the Function boundaries, and cannot go beyond because // of FunctionPass restrictions. We can ensure that is memory not clobbered // for memory operations that are live in to entry points only. - Instruction *PtrI = dyn_cast(Ptr); - - if (!isEntryFunc) { - if (PtrI) - setUniformMetadata(PtrI); + if (!isEntryFunc) return; - } - - bool NotClobbered = false; bool GlobalLoad = I.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; - if (PtrI) - NotClobbered = GlobalLoad && !isClobberedInFunction(&I); - else if (isa(Ptr) || isa(Ptr)) { - if (GlobalLoad && !isClobberedInFunction(&I)) { - NotClobbered = true; - // Lookup for the existing GEP - if (noClobberClones.count(Ptr)) { - PtrI = noClobberClones[Ptr]; - } else { - // Create GEP of the Value - Function *F = I.getParent()->getParent(); - Value *Idx = Constant::getIntegerValue( - Type::getInt32Ty(Ptr->getContext()), APInt(64, 0)); - // Insert GEP at the entry to make it dominate all uses - PtrI = GetElementPtrInst::Create(I.getType(), Ptr, - ArrayRef(Idx), Twine(""), - F->getEntryBlock().getFirstNonPHI()); - } - I.replaceUsesOfWith(Ptr, PtrI); - } - } - - if (PtrI) { - setUniformMetadata(PtrI); - if (NotClobbered) - setNoClobberMetadata(PtrI); - } + if (GlobalLoad && !AMDGPU::isClobberedInFunction(&I, MSSA, AA)) + setNoClobberMetadata(&I); } bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { @@ -215,9 +113,9 @@ bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { AA = &getAnalysis().getAAResults(); isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv()); + Changed = false; visit(F); - noClobberClones.clear(); - return true; + return Changed; } FunctionPass * diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 6e2984f2a04f..57a4660bc1eb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -27,6 +27,8 @@ #include "SIMachineFunctionInfo.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -34,6 +36,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/AMDHSAKernelDescriptor.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -111,6 +114,12 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const { } void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) { + IsTargetStreamerInitialized = false; +} + +void AMDGPUAsmPrinter::initTargetStreamer(Module &M) { + IsTargetStreamerInitialized = true; + // TODO: Which one is called first, emitStartOfAsmFile or // emitFunctionBodyStart? if (getTargetStreamer() && !getTargetStreamer()->getTargetID()) @@ -143,6 +152,10 @@ void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) { } void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) { + // Init target streamer if it has not yet happened + if (!IsTargetStreamerInitialized) + initTargetStreamer(M); + // Following code requires TargetStreamer to be present. if (!getTargetStreamer()) return; @@ -234,8 +247,8 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() { auto &ObjectFileInfo = *Context.getObjectFileInfo(); auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection(); - Streamer.PushSection(); - Streamer.SwitchSection(&ReadOnlySection); + Streamer.pushSection(); + Streamer.switchSection(&ReadOnlySection); // CP microcode requires the kernel descriptor to be allocated on 64 byte // alignment. @@ -256,7 +269,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() { CurrentProgramInfo.FlatUsed), CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed); - Streamer.PopSection(); + Streamer.popSection(); } void AMDGPUAsmPrinter::emitFunctionEntryLabel() { @@ -319,7 +332,7 @@ void AMDGPUAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { const DataLayout &DL = GV->getParent()->getDataLayout(); uint64_t Size = DL.getTypeAllocSize(GV->getValueType()); - Align Alignment = GV->getAlign().getValueOr(Align(4)); + Align Alignment = GV->getAlign().value_or(Align(4)); emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration()); emitLinkage(GV, GVSym); @@ -339,7 +352,7 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) { if ((AMDGPU::isGFX10Plus(STI) || AMDGPU::isGFX90A(STI)) && (STI.getTargetTriple().getOS() == Triple::AMDHSA || STI.getTargetTriple().getOS() == Triple::AMDPAL)) { - OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); + OutStreamer->switchSection(getObjFileLowering().getTextSection()); getTargetStreamer()->EmitCodeEnd(STI); } @@ -381,7 +394,7 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; } - if (MFI.hasQueuePtr()) { + if (MFI.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; } @@ -437,6 +450,11 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor( } bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + // Init target streamer lazily on the first function so that previous passes + // can set metadata. + if (!IsTargetStreamerInitialized) + initTargetStreamer(*MF.getFunction().getParent()); + ResourceUsage = &getAnalysis(); CurrentProgramInfo = SIProgramInfo(); @@ -454,7 +472,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) { MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0); - OutStreamer->SwitchSection(ConfigSection); + OutStreamer->switchSection(ConfigSection); } if (MFI->isModuleEntryFunction()) { @@ -491,7 +509,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (isVerbose()) { MCSectionELF *CommentSection = Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0); - OutStreamer->SwitchSection(CommentSection); + OutStreamer->switchSection(CommentSection); if (!MFI->isEntryFunction()) { OutStreamer->emitRawComment(" Function info:", false); @@ -590,7 +608,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (DumpCodeInstEmitter) { - OutStreamer->SwitchSection( + OutStreamer->switchSection( Context.getELFSection(".AMDGPU.disasm", ELF::SHT_PROGBITS, 0)); for (size_t i = 0; i < DisasmLines.size(); ++i) { @@ -677,7 +695,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion; const uint64_t MaxScratchPerWorkitem = - GCNSubtarget::MaxWaveScratchSize / STM.getWavefrontSize(); + STM.getMaxWaveScratchSize() / STM.getWavefrontSize(); if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) { DiagnosticInfoStackSize DiagStackSize(MF.getFunction(), ProgInfo.ScratchSize, @@ -857,22 +875,18 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, LDSAlignShift = 9; } - unsigned LDSSpillSize = - MFI->getLDSWaveSpillSize() * MFI->getMaxFlatWorkGroupSize(); - - ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize; + ProgInfo.LDSSize = MFI->getLDSSize(); ProgInfo.LDSBlocks = alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift; - // Scratch is allocated in 256 dword blocks. - unsigned ScratchAlignShift = 10; + // Scratch is allocated in 64-dword or 256-dword blocks. + unsigned ScratchAlignShift = + STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 8 : 10; // We need to program the hardware with the amount of scratch memory that // is used by the entire wave. ProgInfo.ScratchSize is the amount of // scratch memory used per thread. - ProgInfo.ScratchBlocks = - alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(), - 1ULL << ScratchAlignShift) >> - ScratchAlignShift; + ProgInfo.ScratchBlocks = divideCeil( + ProgInfo.ScratchSize * STM.getWavefrontSize(), 1ULL << ScratchAlignShift); if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) { ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1; @@ -886,8 +900,14 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, else if (MFI->hasWorkItemIDY()) TIDIGCompCnt = 1; + // The private segment wave byte offset is the last of the system SGPRs. We + // initially assumed it was allocated, and may have used it. It shouldn't harm + // anything to disable it if we know the stack isn't used here. We may still + // have emitted code reading it to initialize scratch, but if that's unused + // reading garbage should be OK. + const bool EnablePrivateSegment = ProgInfo.ScratchBlocks > 0; ProgInfo.ComputePGMRSrc2 = - S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) | + S_00B84C_SCRATCH_EN(EnablePrivateSegment) | S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) | // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP. S_00B84C_TRAP_HANDLER(STM.isAmdHsaOS() ? 0 : STM.isTrapHandlerEnabled()) | @@ -931,6 +951,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) { void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo) { const SIMachineFunctionInfo *MFI = MF.getInfo(); + const GCNSubtarget &STM = MF.getSubtarget(); unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv()); if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { @@ -942,7 +963,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc2); OutStreamer->emitInt32(R_00B860_COMPUTE_TMPRING_SIZE); - OutStreamer->emitInt32(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks)); + OutStreamer->emitInt32( + STM.getGeneration() >= AMDGPUSubtarget::GFX11 + ? S_00B860_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks) + : S_00B860_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks)); // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 = // 0" comment but I don't see a corresponding field in the register spec. @@ -951,14 +975,18 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, OutStreamer->emitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) | S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4); OutStreamer->emitInt32(R_0286E8_SPI_TMPRING_SIZE); - OutStreamer->emitIntValue( - S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4); + OutStreamer->emitInt32( + STM.getGeneration() >= AMDGPUSubtarget::GFX11 + ? S_0286E8_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks) + : S_0286E8_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks)); } if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) { OutStreamer->emitInt32(R_00B02C_SPI_SHADER_PGM_RSRC2_PS); - OutStreamer->emitInt32( - S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks)); + unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11 + ? divideCeil(CurrentProgramInfo.LDSBlocks, 2) + : CurrentProgramInfo.LDSBlocks; + OutStreamer->emitInt32(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize)); OutStreamer->emitInt32(R_0286CC_SPI_PS_INPUT_ENA); OutStreamer->emitInt32(MFI->getPSInputEnable()); OutStreamer->emitInt32(R_0286D0_SPI_PS_INPUT_ADDR); @@ -984,6 +1012,13 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF, MD->setEntryPoint(CC, MF.getFunction().getName()); MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU); + + // Only set AGPRs for supported devices + const GCNSubtarget &STM = MF.getSubtarget(); + if (STM.hasMAIInsts()) { + MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR); + } + MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU); MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC)); if (AMDGPU::isCompute(CC)) { @@ -995,12 +1030,14 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF, // ScratchSize is in bytes, 16 aligned. MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16)); if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) { - MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks)); + unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11 + ? divideCeil(CurrentProgramInfo.LDSBlocks, 2) + : CurrentProgramInfo.LDSBlocks; + MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize)); MD->setSpiPsInputEna(MFI->getPSInputEnable()); MD->setSpiPsInputAddr(MFI->getPSInputAddr()); } - const GCNSubtarget &STM = MF.getSubtarget(); if (STM.isWave32()) MD->setWave32(MF.getFunction().getCallingConv()); } @@ -1067,7 +1104,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, if (MFI->hasDispatchPtr()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; - if (MFI->hasQueuePtr()) + if (MFI->hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; if (MFI->hasKernargSegmentPtr()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index d5c60aa3be7d..ddda2cf107b1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -77,6 +77,8 @@ private: const MachineFunction &MF, const SIProgramInfo &PI) const; + void initTargetStreamer(Module &M); + public: explicit AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer); @@ -132,6 +134,7 @@ protected: std::vector DisasmLines, HexLines; size_t DisasmLineMaxLen; + bool IsTargetStreamerInitialized; }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index 1e2cf3890d0a..3ccfd9dde269 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -311,6 +311,12 @@ Value *AMDGPUAtomicOptimizer::buildReduction(IRBuilder<> &B, if (ST->isWave32()) return V; + if (ST->hasPermLane64()) { + // Reduce across the upper and lower 32 lanes. + return buildNonAtomicBinOp( + B, Op, V, B.CreateIntrinsic(Intrinsic::amdgcn_permlane64, {}, V)); + } + // Pick an arbitrary lane from 0..31 and an arbitrary lane from 32..63 and // combine them with a scalar operation. Function *ReadLane = diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def new file mode 100644 index 000000000000..0a2cf3874245 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def @@ -0,0 +1,31 @@ +//===--- AMDGPUAttributes.def ---------------------------------*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains descriptions of the various function attributes +// that indicate *absence* of the corresponding implicit kernel +// arguments. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +AMDGPU_ATTRIBUTE(DISPATCH_PTR, "amdgpu-no-dispatch-ptr") +AMDGPU_ATTRIBUTE(QUEUE_PTR, "amdgpu-no-queue-ptr") +AMDGPU_ATTRIBUTE(DISPATCH_ID, "amdgpu-no-dispatch-id") +AMDGPU_ATTRIBUTE(IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr") +AMDGPU_ATTRIBUTE(MULTIGRID_SYNC_ARG, "amdgpu-no-multigrid-sync-arg") +AMDGPU_ATTRIBUTE(HOSTCALL_PTR, "amdgpu-no-hostcall-ptr") +AMDGPU_ATTRIBUTE(HEAP_PTR, "amdgpu-no-heap-ptr") +AMDGPU_ATTRIBUTE(WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x") +AMDGPU_ATTRIBUTE(WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y") +AMDGPU_ATTRIBUTE(WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z") +AMDGPU_ATTRIBUTE(WORKITEM_ID_X, "amdgpu-no-workitem-id-x") +AMDGPU_ATTRIBUTE(WORKITEM_ID_Y, "amdgpu-no-workitem-id-y") +AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z") + +#undef AMDGPU_ATTRIBUTE diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index b4ebc7d7d75f..8de0d7e6bff1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -12,6 +12,7 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" @@ -22,37 +23,25 @@ using namespace llvm; +#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS, + +enum ImplicitArgumentPositions { + #include "AMDGPUAttributes.def" + LAST_ARG_POS +}; + +#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS, + enum ImplicitArgumentMask { NOT_IMPLICIT_INPUT = 0, - - // SGPRs - DISPATCH_PTR = 1 << 0, - QUEUE_PTR = 1 << 1, - DISPATCH_ID = 1 << 2, - IMPLICIT_ARG_PTR = 1 << 3, - WORKGROUP_ID_X = 1 << 4, - WORKGROUP_ID_Y = 1 << 5, - WORKGROUP_ID_Z = 1 << 6, - - // VGPRS: - WORKITEM_ID_X = 1 << 7, - WORKITEM_ID_Y = 1 << 8, - WORKITEM_ID_Z = 1 << 9, - ALL_ARGUMENT_MASK = (1 << 10) - 1 + #include "AMDGPUAttributes.def" + ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1 }; +#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str}, static constexpr std::pair ImplicitAttrs[] = { - {DISPATCH_PTR, "amdgpu-no-dispatch-ptr"}, - {QUEUE_PTR, "amdgpu-no-queue-ptr"}, - {DISPATCH_ID, "amdgpu-no-dispatch-id"}, - {IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"}, - {WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"}, - {WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y"}, - {WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z"}, - {WORKITEM_ID_X, "amdgpu-no-workitem-id-x"}, - {WORKITEM_ID_Y, "amdgpu-no-workitem-id-y"}, - {WORKITEM_ID_Z, "amdgpu-no-workitem-id-z"} + #include "AMDGPUAttributes.def" }; // We do not need to note the x workitem or workgroup id because they are always @@ -61,7 +50,9 @@ static constexpr std::pair= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR; + NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5. return QUEUE_PTR; default: return NOT_IMPLICIT_INPUT; @@ -114,7 +115,7 @@ static bool isDSAddress(const Constant *C) { /// Returns true if the function requires the implicit argument be passed /// regardless of the function contents. -static bool funcRequiresImplicitArgPtr(const Function &F) { +static bool funcRequiresHostcallPtr(const Function &F) { // Sanitizers require the hostcall buffer passed in the implicit arguments. return F.hasFnAttribute(Attribute::SanitizeAddress) || F.hasFnAttribute(Attribute::SanitizeThread) || @@ -140,6 +141,12 @@ public: return ST.hasApertureRegs(); } + /// Check if the subtarget supports GetDoorbellID. + bool supportsGetDoorbellID(Function &F) { + const GCNSubtarget &ST = TM.getSubtarget(F); + return ST.supportsGetDoorbellID(); + } + std::pair getFlatWorkGroupSizes(const Function &F) { const GCNSubtarget &ST = TM.getSubtarget(F); return ST.getFlatWorkGroupSizes(F); @@ -152,7 +159,7 @@ public: } private: - /// Check if the ConstantExpr \p CE requires queue ptr attribute. + /// Check if the ConstantExpr \p CE requires the queue pointer. static bool visitConstExpr(const ConstantExpr *CE) { if (CE->getOpcode() == Instruction::AddrSpaceCast) { unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); @@ -186,7 +193,7 @@ private: } public: - /// Returns true if \p Fn needs a queue ptr attribute because of \p C. + /// Returns true if \p Fn needs the queue pointer because of \p C. bool needsQueuePtr(const Constant *C, Function &Fn) { bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); bool HasAperture = hasApertureRegs(Fn); @@ -205,7 +212,7 @@ public: } private: - /// Used to determine if the Constant needs a queue ptr attribute. + /// Used to determine if the Constant needs the queue pointer. DenseMap ConstantStatus; }; @@ -353,12 +360,15 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { // If the function requires the implicit arg pointer due to sanitizers, // assume it's needed even if explicitly marked as not requiring it. - const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F); - if (NeedsImplicit) + const bool NeedsHostcall = funcRequiresHostcallPtr(*F); + if (NeedsHostcall) { removeAssumedBits(IMPLICIT_ARG_PTR); + removeAssumedBits(HOSTCALL_PTR); + } for (auto Attr : ImplicitAttrs) { - if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR) + if (NeedsHostcall && + (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR)) continue; if (F->hasFnAttribute(Attr.second)) @@ -388,9 +398,11 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { return indicatePessimisticFixpoint(); bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); - auto &InfoCache = static_cast(A.getInfoCache()); - bool NeedsQueuePtr = false; + bool NeedsImplicit = false; + auto &InfoCache = static_cast(A.getInfoCache()); + bool HasApertureRegs = InfoCache.hasApertureRegs(*F); + bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F); for (Function *Callee : AAEdges.getOptimisticEdges()) { Intrinsic::ID IID = Callee->getIntrinsicID(); @@ -403,20 +415,87 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { bool NonKernelOnly = false; ImplicitArgumentMask AttrMask = - intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr); + intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit, + HasApertureRegs, SupportsGetDoorbellID); if (AttrMask != NOT_IMPLICIT_INPUT) { if ((IsNonEntryFunc || !NonKernelOnly)) removeAssumedBits(AttrMask); } } - // If we found that we need amdgpu-queue-ptr, nothing else to do. - if (NeedsQueuePtr) { + // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base. + if (NeedsImplicit) + removeAssumedBits(IMPLICIT_ARG_PTR); + + if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) { + // Under V5, we need implicitarg_ptr + offsets to access private_base or + // shared_base. We do not actually need queue_ptr. + if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) + removeAssumedBits(IMPLICIT_ARG_PTR); + else + removeAssumedBits(QUEUE_PTR); + } + + if (funcRetrievesMultigridSyncArg(A)) { + assert(!isAssumed(IMPLICIT_ARG_PTR) && + "multigrid_sync_arg needs implicitarg_ptr"); + removeAssumedBits(MULTIGRID_SYNC_ARG); + } + + if (funcRetrievesHostcallPtr(A)) { + assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr"); + removeAssumedBits(HOSTCALL_PTR); + } + + if (funcRetrievesHeapPtr(A)) { + assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr"); + removeAssumedBits(HEAP_PTR); + } + + if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) { + assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr"); removeAssumedBits(QUEUE_PTR); - return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : - ChangeStatus::UNCHANGED; } + return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED + : ChangeStatus::UNCHANGED; + } + + ChangeStatus manifest(Attributor &A) override { + SmallVector AttrList; + LLVMContext &Ctx = getAssociatedFunction()->getContext(); + + for (auto Attr : ImplicitAttrs) { + if (isKnown(Attr.first)) + AttrList.push_back(Attribute::get(Ctx, Attr.second)); + } + + return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, + /* ForceReplace */ true); + } + + const std::string getAsStr() const override { + std::string Str; + raw_string_ostream OS(Str); + OS << "AMDInfo["; + for (auto Attr : ImplicitAttrs) + OS << ' ' << Attr.second; + OS << " ]"; + return OS.str(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} + +private: + bool checkForQueuePtr(Attributor &A) { + Function *F = getAssociatedFunction(); + bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); + + auto &InfoCache = static_cast(A.getInfoCache()); + + bool NeedsQueuePtr = false; + auto CheckAddrSpaceCasts = [&](Instruction &I) { unsigned SrcAS = static_cast(I).getSrcAddressSpace(); if (castRequiresQueuePtr(SrcAS)) { @@ -431,7 +510,7 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { // `checkForAllInstructions` is much more cheaper than going through all // instructions, try it first. - // amdgpu-queue-ptr is not needed if aperture regs is present. + // The queue pointer is not needed if aperture regs is present. if (!HasApertureRegs) { bool UsedAssumedInformation = false; A.checkForAllInstructions(CheckAddrSpaceCasts, *this, @@ -439,61 +518,79 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { UsedAssumedInformation); } - // If we found that we need amdgpu-queue-ptr, nothing else to do. - if (NeedsQueuePtr) { - removeAssumedBits(QUEUE_PTR); - return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : - ChangeStatus::UNCHANGED; - } + // If we found that we need the queue pointer, nothing else to do. + if (NeedsQueuePtr) + return true; - if (!IsNonEntryFunc && HasApertureRegs) { - return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : - ChangeStatus::UNCHANGED; - } + if (!IsNonEntryFunc && HasApertureRegs) + return false; for (BasicBlock &BB : *F) { for (Instruction &I : BB) { for (const Use &U : I.operands()) { if (const auto *C = dyn_cast(U)) { - if (InfoCache.needsQueuePtr(C, *F)) { - removeAssumedBits(QUEUE_PTR); - return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : - ChangeStatus::UNCHANGED; - } + if (InfoCache.needsQueuePtr(C, *F)) + return true; } } } } - return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : - ChangeStatus::UNCHANGED; + return false; } - ChangeStatus manifest(Attributor &A) override { - SmallVector AttrList; - LLVMContext &Ctx = getAssociatedFunction()->getContext(); + bool funcRetrievesMultigridSyncArg(Attributor &A) { + auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(); + AAPointerInfo::OffsetAndSize OAS(Pos, 8); + return funcRetrievesImplicitKernelArg(A, OAS); + } - for (auto Attr : ImplicitAttrs) { - if (isKnown(Attr.first)) - AttrList.push_back(Attribute::get(Ctx, Attr.second)); - } + bool funcRetrievesHostcallPtr(Attributor &A) { + auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(); + AAPointerInfo::OffsetAndSize OAS(Pos, 8); + return funcRetrievesImplicitKernelArg(A, OAS); + } - return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, - /* ForceReplace */ true); + bool funcRetrievesHeapPtr(Attributor &A) { + if (AMDGPU::getAmdhsaCodeObjectVersion() != 5) + return false; + AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8); + return funcRetrievesImplicitKernelArg(A, OAS); } - const std::string getAsStr() const override { - std::string Str; - raw_string_ostream OS(Str); - OS << "AMDInfo["; - for (auto Attr : ImplicitAttrs) - OS << ' ' << Attr.second; - OS << " ]"; - return OS.str(); + bool funcRetrievesQueuePtr(Attributor &A) { + if (AMDGPU::getAmdhsaCodeObjectVersion() != 5) + return false; + AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8); + return funcRetrievesImplicitKernelArg(A, OAS); } - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override {} + bool funcRetrievesImplicitKernelArg(Attributor &A, + AAPointerInfo::OffsetAndSize OAS) { + // Check if this is a call to the implicitarg_ptr builtin and it + // is used to retrieve the hostcall pointer. The implicit arg for + // hostcall is not used only if every use of the implicitarg_ptr + // is a load that clearly does not retrieve any byte of the + // hostcall pointer. We check this by tracing all the uses of the + // initial call to the implicitarg_ptr intrinsic. + auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) { + auto &Call = cast(I); + if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr) + return true; + + const auto &PointerInfoAA = A.getAAFor( + *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED); + + return PointerInfoAA.forallInterferingAccesses( + OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) { + return Acc.getRemoteInst()->isDroppable(); + }); + }; + + bool UsedAssumedInformation = false; + return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this, + UsedAssumedInformation); + } }; AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, @@ -646,9 +743,14 @@ public: AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); DenseSet Allowed( {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, - &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID}); + &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, &AAPointerInfo::ID}); + + AttributorConfig AC(CGUpdater); + AC.Allowed = &Allowed; + AC.IsModulePass = true; + AC.DefaultInitializeLiveInternals = false; - Attributor A(Functions, InfoCache, CGUpdater, &Allowed); + Attributor A(Functions, InfoCache, AC); for (Function &F : M) { if (!F.isIntrinsic()) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index cd084fd5440a..fd812eb676ef 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #define DEBUG_TYPE "amdgpu-call-lowering" @@ -349,7 +350,6 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val, FunctionLoweringInfo &FLI) const { MachineFunction &MF = B.getMF(); - MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *MFI = MF.getInfo(); MFI->setIfReturnsVoid(!Val); @@ -365,40 +365,15 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val, return true; } - auto const &ST = MF.getSubtarget(); - - unsigned ReturnOpc = 0; - if (IsShader) - ReturnOpc = AMDGPU::SI_RETURN_TO_EPILOG; - else if (CC == CallingConv::AMDGPU_Gfx) - ReturnOpc = AMDGPU::S_SETPC_B64_return_gfx; - else - ReturnOpc = AMDGPU::S_SETPC_B64_return; - + unsigned ReturnOpc = + IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN; auto Ret = B.buildInstrNoInsert(ReturnOpc); - Register ReturnAddrVReg; - if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { - ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); - Ret.addUse(ReturnAddrVReg); - } else if (ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) { - ReturnAddrVReg = - MRI.createVirtualRegister(&AMDGPU::Gfx_CCR_SGPR_64RegClass); - Ret.addUse(ReturnAddrVReg); - } if (!FLI.CanLowerReturn) insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister); else if (!lowerReturnVal(B, Val, VRegs, Ret)) return false; - if (ReturnOpc == AMDGPU::S_SETPC_B64_return || - ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) { - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), - &AMDGPU::SGPR_64RegClass); - B.buildCopy(ReturnAddrVReg, LiveInReturn); - } - // TODO: Handle CalleeSavedRegsViaCopy. B.insertInstr(Ret); @@ -479,7 +454,7 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, CCInfo.AllocateReg(DispatchPtrReg); } - if (Info.hasQueuePtr()) { + if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) { Register QueuePtrReg = Info.addQueuePtr(TRI); MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(QueuePtrReg); @@ -523,7 +498,7 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel( const SITargetLowering &TLI = *getTLI(); const DataLayout &DL = F.getParent()->getDataLayout(); - Info->allocateModuleLDSGlobal(F.getParent()); + Info->allocateModuleLDSGlobal(F); SmallVector ArgLocs; CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); @@ -543,9 +518,8 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel( if (AllocSize == 0) continue; - MaybeAlign ABIAlign = IsByRef ? Arg.getParamAlign() : None; - if (!ABIAlign) - ABIAlign = DL.getABITypeAlign(ArgTy); + MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : None; + Align ABIAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy); uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; @@ -608,19 +582,11 @@ bool AMDGPUCallLowering::lowerFormalArguments( const SIRegisterInfo *TRI = Subtarget.getRegisterInfo(); const DataLayout &DL = F.getParent()->getDataLayout(); - Info->allocateModuleLDSGlobal(F.getParent()); + Info->allocateModuleLDSGlobal(F); SmallVector ArgLocs; CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); - if (!IsEntryFunc) { - Register ReturnAddrReg = TRI->getReturnAddressReg(MF); - Register LiveInReturn = MF.addLiveIn(ReturnAddrReg, - &AMDGPU::SGPR_64RegClass); - MBB.addLiveIn(ReturnAddrReg); - B.buildCopy(LiveInReturn, ReturnAddrReg); - } - if (Info->hasImplicitBufferPtr()) { Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index 1682d43ae671..b6c66077675f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -148,53 +148,32 @@ def CSR_AMDGPU_VGPRs : CalleeSavedRegs< (sequence "VGPR%u", 248, 255)) >; -def CSR_AMDGPU_AGPRs_32_255 : CalleeSavedRegs< +def CSR_AMDGPU_AGPRs : CalleeSavedRegs< (sequence "AGPR%u", 32, 255) >; -def CSR_AMDGPU_SGPRs_32_105 : CalleeSavedRegs< - (sequence "SGPR%u", 32, 105) +def CSR_AMDGPU_SGPRs : CalleeSavedRegs< + (sequence "SGPR%u", 30, 105) >; -def CSR_AMDGPU_SI_Gfx_SGPRs_4_29 : CalleeSavedRegs< - (sequence "SGPR%u", 4, 29) +def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs< + (add (sequence "SGPR%u", 4, 31), (sequence "SGPR%u", 64, 105)) >; -def CSR_AMDGPU_SI_Gfx_SGPRs_64_105 : CalleeSavedRegs< - (sequence "SGPR%u", 64, 105) +def CSR_AMDGPU : CalleeSavedRegs< + (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs) >; -// Just to get the regmask, not for calling convention purposes. -def CSR_AMDGPU_AllVGPRs : CalleeSavedRegs< - (sequence "VGPR%u", 0, 255) ->; - -def CSR_AMDGPU_AllAGPRs : CalleeSavedRegs< - (sequence "AGPR%u", 0, 255) ->; -def CSR_AMDGPU_AllVectorRegs : CalleeSavedRegs< - (add CSR_AMDGPU_AllVGPRs, CSR_AMDGPU_AllAGPRs) ->; - -// Just to get the regmask, not for calling convention purposes. -def CSR_AMDGPU_AllAllocatableSRegs : CalleeSavedRegs< - (add (sequence "SGPR%u", 0, 105), VCC_LO, VCC_HI) ->; - -def CSR_AMDGPU_HighRegs : CalleeSavedRegs< - (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs_32_105) ->; - -def CSR_AMDGPU_HighRegs_With_AGPRs : CalleeSavedRegs< - (add CSR_AMDGPU_HighRegs, CSR_AMDGPU_AGPRs_32_255) +def CSR_AMDGPU_GFX90AInsts : CalleeSavedRegs< + (add CSR_AMDGPU, CSR_AMDGPU_AGPRs) >; def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs< - (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs_4_29, CSR_AMDGPU_SI_Gfx_SGPRs_64_105) + (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs) >; -def CSR_AMDGPU_SI_Gfx_With_AGPRs : CalleeSavedRegs< - (add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs_32_255) +def CSR_AMDGPU_SI_Gfx_GFX90AInsts : CalleeSavedRegs< + (add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs) >; def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>; @@ -233,3 +212,24 @@ def CC_AMDGPU : CallingConv<[ "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C", CCDelegateTo> ]>; + +// Trivial class to denote when a def is used only to get a RegMask, i.e. +// SaveList is ignored and the def is not used as part of any calling +// convention. +class RegMask : CalleeSavedRegs; + +def AMDGPU_AllVGPRs : RegMask< + (sequence "VGPR%u", 0, 255) +>; + +def AMDGPU_AllAGPRs : RegMask< + (sequence "AGPR%u", 0, 255) +>; + +def AMDGPU_AllVectorRegs : RegMask< + (add AMDGPU_AllVGPRs, AMDGPU_AllAGPRs) +>; + +def AMDGPU_AllAllocatableSRegs : RegMask< + (add (sequence "SGPR%u", 0, 105), VCC_LO, VCC_HI) +>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 1920684d8f1f..94d7844e8a32 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -877,7 +877,7 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) { return getMul64(Builder, LHS, RHS).second; } -/// Figure out how many bits are really needed for this ddivision. \p AtLeast is +/// Figure out how many bits are really needed for this division. \p AtLeast is /// an optimization hint to bypass the second ComputeNumSignBits call if we the /// first one is insufficient. Returns -1 on failure. int AMDGPUCodeGenPrepare::getDivNumBits(BinaryOperator &I, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp index e79ff9b597c9..c16d8ee51a7a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -373,7 +373,8 @@ void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI, replaceRegWith(MRI, Dst, NegatedMatchInfo); // Recreate non negated value for other uses of old MatchInfoDst - Builder.setInstrAndDebugLoc(MI); + auto NextInst = ++MatchInfo->getIterator(); + Builder.setInstrAndDebugLoc(*NextInst); Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags()); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp index 04bf623bfa46..8fcf669041b9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp @@ -50,7 +50,7 @@ public: } bool createInitOrFiniKernel(Module &M, GlobalVariable *GV, bool IsCtor) { - if (!GV) + if (!GV || !GV->hasInitializer()) return false; ConstantArray *GA = dyn_cast(GV->getInitializer()); if (!GA || GA->getNumOperands() == 0) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp index bed0707f3aa7..8236ff609f85 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp @@ -22,7 +22,7 @@ namespace { class ExportClustering : public ScheduleDAGMutation { public: - ExportClustering() {} + ExportClustering() = default; void apply(ScheduleDAGInstrs *DAG) override; }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp b/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp deleted file mode 100644 index ea6c6d0fd212..000000000000 --- a/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp +++ /dev/null @@ -1,64 +0,0 @@ -//===-- AMDGPUFixFunctionBitcasts.cpp - Fix function bitcasts -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Promote indirect (bitcast) calls to direct calls when they are statically -/// known to be direct. Required when InstCombine is not run (e.g. at OptNone) -/// because AMDGPU does not support indirect calls. -/// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils/CallPromotionUtils.h" - -using namespace llvm; - -#define DEBUG_TYPE "amdgpu-fix-function-bitcasts" - -namespace { -class AMDGPUFixFunctionBitcasts final - : public ModulePass, - public InstVisitor { - - bool runOnModule(Module &M) override; - - bool Modified; - -public: - void visitCallBase(CallBase &CB) { - if (CB.getCalledFunction()) - return; - auto *Callee = - dyn_cast(CB.getCalledOperand()->stripPointerCasts()); - if (Callee && isLegalToPromote(CB, Callee)) { - promoteCall(CB, Callee); - Modified = true; - } - } - - static char ID; - AMDGPUFixFunctionBitcasts() : ModulePass(ID) {} -}; -} // End anonymous namespace - -char AMDGPUFixFunctionBitcasts::ID = 0; -char &llvm::AMDGPUFixFunctionBitcastsID = AMDGPUFixFunctionBitcasts::ID; -INITIALIZE_PASS(AMDGPUFixFunctionBitcasts, DEBUG_TYPE, - "Fix function bitcasts for AMDGPU", false, false) - -ModulePass *llvm::createAMDGPUFixFunctionBitcastsPass() { - return new AMDGPUFixFunctionBitcasts(); -} - -bool AMDGPUFixFunctionBitcasts::runOnModule(Module &M) { - Modified = false; - visit(M); - return Modified; -} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 7fd94a977be7..5747fc0ca8e6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -47,10 +47,30 @@ def gi_vop3pmods : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_vop3pmodsdot : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_dotiuvop3pmods : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_wmmaopselvop3pmods : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def gi_vop3opselmods : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_vinterpmods : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_vinterpmods_hi : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + // FIXME: Why do we have both VOP3OpSel and VOP3OpSelMods? def gi_vop3opsel : GIComplexOperandMatcher, @@ -93,6 +113,10 @@ def gi_flat_scratch_saddr : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_flat_scratch_svaddr : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def gi_ds_1addr_1offset : GIComplexOperandMatcher, GIComplexPatternEquiv; @@ -123,7 +147,7 @@ def gi_smrd_buffer_imm32 : // Separate load nodes are defined to glue m0 initialization in // SelectionDAG. The GISel selector can just insert m0 initialization -// directly before before selecting a glue-less load, so hide this +// directly before selecting a glue-less load, so hide this // distinction. def : GINodeEquiv { @@ -222,6 +246,9 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; + class GISelSop2Pat < SDPatternOperator node, Instruction inst, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp index cabdc6998011..1bbdc39a7a5e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp @@ -7,8 +7,10 @@ //===----------------------------------------------------------------------===// #include "AMDGPUGlobalISelUtils.h" +#include "GCNSubtarget.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/IR/Constants.h" +#include "llvm/Support/LowLevelTypeImpl.h" using namespace llvm; using namespace MIPatternMatch; @@ -66,3 +68,12 @@ bool AMDGPU::isLegalVOP3PShuffleMask(ArrayRef Mask) { return true; return (Mask[0] & 2) == (Mask[1] & 2); } + +bool AMDGPU::hasAtomicFaddRtnForTy(const GCNSubtarget &Subtarget, + const LLT &Ty) { + if (Ty == LLT::scalar(32)) + return Subtarget.hasAtomicFaddRtnInsts(); + if (Ty == LLT::fixed_vector(2, 16) || Ty == LLT::scalar(64)) + return Subtarget.hasGFX90AInsts(); + return false; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h index 14d3a3fb7997..5c600d059b7a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h @@ -16,6 +16,8 @@ namespace llvm { class MachineRegisterInfo; +class GCNSubtarget; +class LLT; namespace AMDGPU { @@ -24,7 +26,7 @@ std::pair getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg); bool isLegalVOP3PShuffleMask(ArrayRef Mask); - +bool hasAtomicFaddRtnForTy(const GCNSubtarget &Subtarget, const LLT &Ty); } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index f5018e3a19ac..6fa44ffcbfaa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -400,17 +400,15 @@ void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func, auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); - // Emit "printf buffer" argument if printf is used, otherwise emit dummy - // "none" argument. if (HiddenArgNumBytes >= 32) { + // We forbid the use of features requiring hostcall when compiling OpenCL + // before code object V5, which makes the mutual exclusion between the + // "printf buffer" and "hostcall buffer" here sound. if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer); - else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) { - // The printf runtime binding pass should have ensured that hostcall and - // printf are not used in the same module. - assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts")); + else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer); - } else + else emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); } @@ -427,8 +425,12 @@ void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func, } // Emit the pointer argument for multi-grid object. - if (HiddenArgNumBytes >= 56) - emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg); + if (HiddenArgNumBytes >= 56) { + if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) + emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg); + else + emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); + } } bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { @@ -803,6 +805,8 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF, auto &DL = M->getDataLayout(); auto Int64Ty = Type::getInt64Ty(Func.getContext()); + Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); + if (HiddenArgNumBytes >= 8) emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args); @@ -816,19 +820,17 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF, auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); - // Emit "printf buffer" argument if printf is used, emit "hostcall buffer" - // if "hostcall" module flag is set, otherwise emit dummy "none" argument. if (HiddenArgNumBytes >= 32) { + // We forbid the use of features requiring hostcall when compiling OpenCL + // before code object V5, which makes the mutual exclusion between the + // "printf buffer" and "hostcall buffer" here sound. if (M->getNamedMetadata("llvm.printf.fmts")) emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, Args); - else if (M->getModuleFlag("amdgpu_hostcall")) { - // The printf runtime binding pass should have ensured that hostcall and - // printf are not used in the same module. - assert(!M->getNamedMetadata("llvm.printf.fmts")); + else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, Args); - } else + else emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); } @@ -847,9 +849,14 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF, } // Emit the pointer argument for multi-grid object. - if (HiddenArgNumBytes >= 56) - emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, - Args); + if (HiddenArgNumBytes >= 56) { + if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { + emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, + Args); + } else { + emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); + } + } } msgpack::MapDocNode @@ -876,6 +883,12 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, Kern.getDocument()->getNode(STM.getWavefrontSize()); Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR); + + // Only add AGPR count to metadata for supported devices + if (STM.hasMAIInsts()) { + Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR); + } + Kern[".max_flat_workgroup_size"] = Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); Kern[".sgpr_spill_count"] = @@ -971,13 +984,20 @@ void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF, msgpack::ArrayDocNode Args) { auto &Func = MF.getFunction(); const GCNSubtarget &ST = MF.getSubtarget(); + + // No implicit kernel argument is used. + if (ST.getImplicitArgNumBytes(Func) == 0) + return; + const Module *M = Func.getParent(); auto &DL = M->getDataLayout(); + const SIMachineFunctionInfo &MFI = *MF.getInfo(); auto Int64Ty = Type::getInt64Ty(Func.getContext()); auto Int32Ty = Type::getInt32Ty(Func.getContext()); auto Int16Ty = Type::getInt16Ty(Func.getContext()); + Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args); emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args); emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args); @@ -1008,40 +1028,49 @@ void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF, if (M->getNamedMetadata("llvm.printf.fmts")) { emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, Args); - } else + } else { Offset += 8; // Skipped. + } - if (M->getModuleFlag("amdgpu_hostcall")) { + if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) { emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, Args); - } else + } else { Offset += 8; // Skipped. + } - emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, + if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { + emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, Args); + } else { + Offset += 8; // Skipped. + } - // Ignore temporarily until it is implemented. - // emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args); - Offset += 8; + if (!Func.hasFnAttribute("amdgpu-no-heap-ptr")) + emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args); + else + Offset += 8; // Skipped. if (Func.hasFnAttribute("calls-enqueue-kernel")) { emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, Args); emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, Args); - } else + } else { Offset += 16; // Skipped. + } Offset += 72; // Reserved. - // hidden_private_base and hidden_shared_base are only used by GFX8. - if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) { + // hidden_private_base and hidden_shared_base are only when the subtarget has + // ApertureRegs. + if (!ST.hasApertureRegs()) { emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args); emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args); - } else + } else { Offset += 8; // Skipped. + } - const SIMachineFunctionInfo &MFI = *MF.getInfo(); if (MFI.hasQueuePtr()) emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h index bcf7fc449094..9b22d1f4d1b1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -42,7 +42,7 @@ namespace HSAMD { class MetadataStreamer { public: - virtual ~MetadataStreamer(){}; + virtual ~MetadataStreamer() = default; virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp new file mode 100644 index 000000000000..5c507ef70a8c --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp @@ -0,0 +1,439 @@ +//===--- AMDGPUIGroupLP.cpp - AMDGPU IGroupLP ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file This file defines a set of schedule DAG mutations that can be used to +// override default scheduler behavior to enforce specific scheduling patterns. +// They should be used in cases where runtime performance considerations such as +// inter-wavefront interactions, mean that compile-time heuristics cannot +// predict the optimal instruction ordering, or in kernels where optimum +// instruction scheduling is important enough to warrant manual intervention. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUIGroupLP.h" +#include "AMDGPUTargetMachine.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/ADT/BitmaskEnum.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/TargetOpcodes.h" + +using namespace llvm; + +#define DEBUG_TYPE "machine-scheduler" + +namespace { + +static cl::opt + EnableIGroupLP("amdgpu-igrouplp", + cl::desc("Enable construction of Instruction Groups and " + "their ordering for scheduling"), + cl::init(false)); + +static cl::opt> + VMEMGroupMaxSize("amdgpu-igrouplp-vmem-group-size", cl::init(None), + cl::Hidden, + cl::desc("The maximum number of instructions to include " + "in VMEM group.")); + +static cl::opt> + MFMAGroupMaxSize("amdgpu-igrouplp-mfma-group-size", cl::init(None), + cl::Hidden, + cl::desc("The maximum number of instructions to include " + "in MFMA group.")); + +static cl::opt> + LDRGroupMaxSize("amdgpu-igrouplp-ldr-group-size", cl::init(None), + cl::Hidden, + cl::desc("The maximum number of instructions to include " + "in lds/gds read group.")); + +static cl::opt> + LDWGroupMaxSize("amdgpu-igrouplp-ldw-group-size", cl::init(None), + cl::Hidden, + cl::desc("The maximum number of instructions to include " + "in lds/gds write group.")); + +typedef function_ref + CanAddMIFn; + +// Classify instructions into groups to enable fine tuned control over the +// scheduler. These groups may be more specific than current SchedModel +// instruction classes. +class SchedGroup { +private: + // Function that returns true if a non-bundle MI may be inserted into this + // group. + const CanAddMIFn canAddMI; + + // Maximum number of SUnits that can be added to this group. + Optional MaxSize; + + // Collection of SUnits that are classified as members of this group. + SmallVector Collection; + + ScheduleDAGInstrs *DAG; + + void tryAddEdge(SUnit *A, SUnit *B) { + if (A != B && DAG->canAddEdge(B, A)) { + DAG->addEdge(B, SDep(A, SDep::Artificial)); + LLVM_DEBUG(dbgs() << "Adding edge...\n" + << "from: SU(" << A->NodeNum << ") " << *A->getInstr() + << "to: SU(" << B->NodeNum << ") " << *B->getInstr()); + } + } + +public: + // Add DAG dependencies from all SUnits in this SchedGroup and this SU. If + // MakePred is true, SU will be a predecessor of the SUnits in this + // SchedGroup, otherwise SU will be a successor. + void link(SUnit &SU, bool MakePred = false) { + for (auto A : Collection) { + SUnit *B = &SU; + if (MakePred) + std::swap(A, B); + + tryAddEdge(A, B); + } + } + + // Add DAG dependencies from all SUnits in this SchedGroup and this SU. Use + // the predicate to determine whether SU should be a predecessor (P = true) + // or a successor (P = false) of this SchedGroup. + void link(SUnit &SU, function_ref P) { + for (auto A : Collection) { + SUnit *B = &SU; + if (P(A, B)) + std::swap(A, B); + + tryAddEdge(A, B); + } + } + + // Add DAG dependencies such that SUnits in this group shall be ordered + // before SUnits in OtherGroup. + void link(SchedGroup &OtherGroup) { + for (auto B : OtherGroup.Collection) + link(*B); + } + + // Returns true if no more instructions may be added to this group. + bool isFull() { return MaxSize && Collection.size() >= *MaxSize; } + + // Returns true if SU can be added to this SchedGroup. + bool canAddSU(SUnit &SU, const SIInstrInfo *TII) { + if (isFull()) + return false; + + MachineInstr &MI = *SU.getInstr(); + if (MI.getOpcode() != TargetOpcode::BUNDLE) + return canAddMI(MI, TII); + + // Special case for bundled MIs. + const MachineBasicBlock *MBB = MI.getParent(); + MachineBasicBlock::instr_iterator B = MI.getIterator(), E = ++B; + while (E != MBB->end() && E->isBundledWithPred()) + ++E; + + // Return true if all of the bundled MIs can be added to this group. + return std::all_of( + B, E, [this, TII](MachineInstr &MI) { return canAddMI(MI, TII); }); + } + + void add(SUnit &SU) { Collection.push_back(&SU); } + + SchedGroup(CanAddMIFn canAddMI, Optional MaxSize, + ScheduleDAGInstrs *DAG) + : canAddMI(canAddMI), MaxSize(MaxSize), DAG(DAG) {} +}; + +bool isMFMASGMember(const MachineInstr &MI, const SIInstrInfo *TII) { + return TII->isMFMA(MI); +} + +bool isVALUSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { + return TII->isVALU(MI) && !TII->isMFMA(MI); +} + +bool isSALUSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { + return TII->isSALU(MI); +} + +bool isVMEMSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { + return TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI)); +} + +bool isVMEMReadSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { + return MI.mayLoad() && + (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))); +} + +bool isVMEMWriteSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { + return MI.mayStore() && + (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))); +} + +bool isDSWriteSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { + return MI.mayStore() && TII->isDS(MI); +} + +bool isDSReadSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { + return MI.mayLoad() && TII->isDS(MI); +} + +class IGroupLPDAGMutation : public ScheduleDAGMutation { +public: + const SIInstrInfo *TII; + ScheduleDAGMI *DAG; + + IGroupLPDAGMutation() = default; + void apply(ScheduleDAGInstrs *DAGInstrs) override; +}; + +// DAG mutation that coordinates with the SCHED_BARRIER instruction and +// corresponding builtin. The mutation adds edges from specific instruction +// classes determined by the SCHED_BARRIER mask so that they cannot be +// scheduled around the SCHED_BARRIER. +class SchedBarrierDAGMutation : public ScheduleDAGMutation { +private: + const SIInstrInfo *TII; + + ScheduleDAGMI *DAG; + + // Components of the mask that determines which instructions may not be + // scheduled across the SCHED_BARRIER. + enum class SchedBarrierMasks { + NONE = 0u, + ALU = 1u << 0, + VALU = 1u << 1, + SALU = 1u << 2, + MFMA = 1u << 3, + VMEM = 1u << 4, + VMEM_READ = 1u << 5, + VMEM_WRITE = 1u << 6, + DS = 1u << 7, + DS_READ = 1u << 8, + DS_WRITE = 1u << 9, + LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ DS_WRITE) + }; + + // Cache SchedGroups of each type if we have multiple SCHED_BARRIERs in a + // region. + // + std::unique_ptr MFMASchedGroup = nullptr; + std::unique_ptr VALUSchedGroup = nullptr; + std::unique_ptr SALUSchedGroup = nullptr; + std::unique_ptr VMEMReadSchedGroup = nullptr; + std::unique_ptr VMEMWriteSchedGroup = nullptr; + std::unique_ptr DSWriteSchedGroup = nullptr; + std::unique_ptr DSReadSchedGroup = nullptr; + + // Use a SCHED_BARRIER's mask to identify instruction SchedGroups that should + // not be reordered accross the SCHED_BARRIER. + void getSchedGroupsFromMask(int32_t Mask, + SmallVectorImpl &SchedGroups); + + // Add DAG edges that enforce SCHED_BARRIER ordering. + void addSchedBarrierEdges(SUnit &SU); + + // Classify instructions and add them to the SchedGroup. + void initSchedGroup(SchedGroup *SG); + + // Remove all existing edges from a SCHED_BARRIER. + void resetSchedBarrierEdges(SUnit &SU); + +public: + void apply(ScheduleDAGInstrs *DAGInstrs) override; + + SchedBarrierDAGMutation() = default; +}; + +void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) { + const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget(); + TII = ST.getInstrInfo(); + DAG = static_cast(DAGInstrs); + const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel(); + if (!TSchedModel || DAG->SUnits.empty()) + return; + + LLVM_DEBUG(dbgs() << "Applying IGroupLPDAGMutation...\n"); + + // The order of InstructionGroups in this vector defines the + // order in which edges will be added. In other words, given the + // present ordering, we will try to make each VMEMRead instruction + // a predecessor of each DSRead instruction, and so on. + SmallVector PipelineOrderGroups = { + SchedGroup(isVMEMSGMember, VMEMGroupMaxSize, DAG), + SchedGroup(isDSReadSGMember, LDRGroupMaxSize, DAG), + SchedGroup(isMFMASGMember, MFMAGroupMaxSize, DAG), + SchedGroup(isDSWriteSGMember, LDWGroupMaxSize, DAG)}; + + for (SUnit &SU : DAG->SUnits) { + LLVM_DEBUG(dbgs() << "Checking Node"; DAG->dumpNode(SU)); + for (auto &SG : PipelineOrderGroups) + if (SG.canAddSU(SU, TII)) + SG.add(SU); + } + + for (unsigned i = 0; i < PipelineOrderGroups.size() - 1; i++) { + auto &GroupA = PipelineOrderGroups[i]; + for (unsigned j = i + 1; j < PipelineOrderGroups.size(); j++) { + auto &GroupB = PipelineOrderGroups[j]; + GroupA.link(GroupB); + } + } +} + +void SchedBarrierDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) { + const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel(); + if (!TSchedModel || DAGInstrs->SUnits.empty()) + return; + + LLVM_DEBUG(dbgs() << "Applying SchedBarrierDAGMutation...\n"); + + const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget(); + TII = ST.getInstrInfo(); + DAG = static_cast(DAGInstrs); + for (auto &SU : DAG->SUnits) + if (SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER) + addSchedBarrierEdges(SU); +} + +void SchedBarrierDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) { + MachineInstr &MI = *SchedBarrier.getInstr(); + assert(MI.getOpcode() == AMDGPU::SCHED_BARRIER); + // Remove all existing edges from the SCHED_BARRIER that were added due to the + // instruction having side effects. + resetSchedBarrierEdges(SchedBarrier); + SmallVector SchedGroups; + int32_t Mask = MI.getOperand(0).getImm(); + getSchedGroupsFromMask(Mask, SchedGroups); + for (auto SG : SchedGroups) + SG->link( + SchedBarrier, (function_ref)[]( + const SUnit *A, const SUnit *B) { + return A->NodeNum > B->NodeNum; + }); +} + +void SchedBarrierDAGMutation::getSchedGroupsFromMask( + int32_t Mask, SmallVectorImpl &SchedGroups) { + SchedBarrierMasks SBMask = (SchedBarrierMasks)Mask; + // See IntrinsicsAMDGPU.td for an explanation of these masks and their + // mappings. + // + if ((SBMask & SchedBarrierMasks::VALU) == SchedBarrierMasks::NONE && + (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) { + if (!VALUSchedGroup) { + VALUSchedGroup = std::make_unique(isVALUSGMember, None, DAG); + initSchedGroup(VALUSchedGroup.get()); + } + + SchedGroups.push_back(VALUSchedGroup.get()); + } + + if ((SBMask & SchedBarrierMasks::SALU) == SchedBarrierMasks::NONE && + (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) { + if (!SALUSchedGroup) { + SALUSchedGroup = std::make_unique(isSALUSGMember, None, DAG); + initSchedGroup(SALUSchedGroup.get()); + } + + SchedGroups.push_back(SALUSchedGroup.get()); + } + + if ((SBMask & SchedBarrierMasks::MFMA) == SchedBarrierMasks::NONE && + (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) { + if (!MFMASchedGroup) { + MFMASchedGroup = std::make_unique(isMFMASGMember, None, DAG); + initSchedGroup(MFMASchedGroup.get()); + } + + SchedGroups.push_back(MFMASchedGroup.get()); + } + + if ((SBMask & SchedBarrierMasks::VMEM_READ) == SchedBarrierMasks::NONE && + (SBMask & SchedBarrierMasks::VMEM) == SchedBarrierMasks::NONE) { + if (!VMEMReadSchedGroup) { + VMEMReadSchedGroup = + std::make_unique(isVMEMReadSGMember, None, DAG); + initSchedGroup(VMEMReadSchedGroup.get()); + } + + SchedGroups.push_back(VMEMReadSchedGroup.get()); + } + + if ((SBMask & SchedBarrierMasks::VMEM_WRITE) == SchedBarrierMasks::NONE && + (SBMask & SchedBarrierMasks::VMEM) == SchedBarrierMasks::NONE) { + if (!VMEMWriteSchedGroup) { + VMEMWriteSchedGroup = + std::make_unique(isVMEMWriteSGMember, None, DAG); + initSchedGroup(VMEMWriteSchedGroup.get()); + } + + SchedGroups.push_back(VMEMWriteSchedGroup.get()); + } + + if ((SBMask & SchedBarrierMasks::DS_READ) == SchedBarrierMasks::NONE && + (SBMask & SchedBarrierMasks::DS) == SchedBarrierMasks::NONE) { + if (!DSReadSchedGroup) { + DSReadSchedGroup = + std::make_unique(isDSReadSGMember, None, DAG); + initSchedGroup(DSReadSchedGroup.get()); + } + + SchedGroups.push_back(DSReadSchedGroup.get()); + } + + if ((SBMask & SchedBarrierMasks::DS_WRITE) == SchedBarrierMasks::NONE && + (SBMask & SchedBarrierMasks::DS) == SchedBarrierMasks::NONE) { + if (!DSWriteSchedGroup) { + DSWriteSchedGroup = + std::make_unique(isDSWriteSGMember, None, DAG); + initSchedGroup(DSWriteSchedGroup.get()); + } + + SchedGroups.push_back(DSWriteSchedGroup.get()); + } +} + +void SchedBarrierDAGMutation::initSchedGroup(SchedGroup *SG) { + assert(SG); + for (auto &SU : DAG->SUnits) + if (SG->canAddSU(SU, TII)) + SG->add(SU); +} + +void SchedBarrierDAGMutation::resetSchedBarrierEdges(SUnit &SU) { + assert(SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER); + for (auto &P : SU.Preds) + SU.removePred(P); + + for (auto &S : SU.Succs) { + for (auto &SP : S.getSUnit()->Preds) { + if (SP.getSUnit() == &SU) { + S.getSUnit()->removePred(SP); + } + } + } +} + +} // namespace + +namespace llvm { + +std::unique_ptr createIGroupLPDAGMutation() { + return EnableIGroupLP ? std::make_unique() : nullptr; +} + +std::unique_ptr createSchedBarrierDAGMutation() { + return std::make_unique(); +} + +} // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h new file mode 100644 index 000000000000..aeb1bbad3705 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h @@ -0,0 +1,22 @@ +//===- AMDGPUMFMAIGroupLP.h - AMDGPU MFMA IGroupLP --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMFMAIGROUPLP_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMFMAIGROUPLP_H + +#include "llvm/CodeGen/ScheduleDAGMutation.h" +#include + +namespace llvm { + +std::unique_ptr createIGroupLPDAGMutation(); +std::unique_ptr createSchedBarrierDAGMutation(); + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUMFMAIGROUPLP_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 8236e6672247..b00df27f5fd3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -13,7 +13,9 @@ #include "AMDGPUISelDAGToDAG.h" #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/R600MCTargetDesc.h" #include "R600RegisterInfo.h" #include "SIMachineFunctionInfo.h" @@ -679,9 +681,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::FMA: SelectFMAD_FMA(N); return; - case AMDGPUISD::ATOMIC_CMP_SWAP: - SelectATOMIC_CMP_SWAP(N); - return; case AMDGPUISD::CVT_PKRTZ_F16_F32: case AMDGPUISD::CVT_PKNORM_I16_F32: case AMDGPUISD::CVT_PKNORM_U16_F32: @@ -1008,7 +1007,12 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { SDLoc SL(N); bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; - unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64; + unsigned Opc; + if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11) + Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64 + : AMDGPU::V_MAD_U64_U32_gfx11_e64; + else + Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64; SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), @@ -1021,7 +1025,12 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) { SDLoc SL(N); bool Signed = N->getOpcode() == ISD::SMUL_LOHI; - unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64; + unsigned Opc; + if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11) + Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64 + : AMDGPU::V_MAD_U64_U32_gfx11_e64; + else + Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64; SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64); SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); @@ -1798,6 +1807,82 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr, return true; } +// Check whether the flat scratch SVS swizzle bug affects this access. +bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug( + SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const { + if (!Subtarget->hasFlatScratchSVSSwizzleBug()) + return false; + + // The bug affects the swizzling of SVS accesses if there is any carry out + // from the two low order bits (i.e. from bit 1 into bit 2) when adding + // voffset to (soffset + inst_offset). + KnownBits VKnown = CurDAG->computeKnownBits(VAddr); + KnownBits SKnown = KnownBits::computeForAddSub( + true, false, CurDAG->computeKnownBits(SAddr), + KnownBits::makeConstant(APInt(32, ImmOffset))); + uint64_t VMax = VKnown.getMaxValue().getZExtValue(); + uint64_t SMax = SKnown.getMaxValue().getZExtValue(); + return (VMax & 3) + (SMax & 3) >= 4; +} + +bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr, + SDValue &VAddr, SDValue &SAddr, + SDValue &Offset) const { + int64_t ImmOffset = 0; + + SDValue LHS, RHS; + if (isBaseWithConstantOffset64(Addr, LHS, RHS)) { + int64_t COffsetVal = cast(RHS)->getSExtValue(); + const SIInstrInfo *TII = Subtarget->getInstrInfo(); + + if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) { + Addr = LHS; + ImmOffset = COffsetVal; + } else if (!LHS->isDivergent() && COffsetVal > 0) { + SDLoc SL(N); + // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) + + // (large_offset & MaxOffset); + int64_t SplitImmOffset, RemainderOffset; + std::tie(SplitImmOffset, RemainderOffset) + = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true); + + if (isUInt<32>(RemainderOffset)) { + SDNode *VMov = CurDAG->getMachineNode( + AMDGPU::V_MOV_B32_e32, SL, MVT::i32, + CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32)); + VAddr = SDValue(VMov, 0); + SAddr = LHS; + if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset)) + return false; + Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16); + return true; + } + } + } + + if (Addr.getOpcode() != ISD::ADD) + return false; + + LHS = Addr.getOperand(0); + RHS = Addr.getOperand(1); + + if (!LHS->isDivergent() && RHS->isDivergent()) { + SAddr = LHS; + VAddr = RHS; + } else if (!RHS->isDivergent() && LHS->isDivergent()) { + SAddr = RHS; + VAddr = LHS; + } else { + return false; + } + + if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset)) + return false; + SAddr = SelectSAddrFI(CurDAG, SAddr); + Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16); + return true; +} + bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const { ConstantSDNode *C = dyn_cast(ByteOffsetNode); @@ -2224,70 +2309,6 @@ void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) { } } -// This is here because there isn't a way to use the generated sub0_sub1 as the -// subreg index to EXTRACT_SUBREG in tablegen. -void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { - MemSDNode *Mem = cast(N); - unsigned AS = Mem->getAddressSpace(); - if (AS == AMDGPUAS::FLAT_ADDRESS) { - SelectCode(N); - return; - } - - MVT VT = N->getSimpleValueType(0); - bool Is32 = (VT == MVT::i32); - SDLoc SL(N); - - MachineSDNode *CmpSwap = nullptr; - if (Subtarget->hasAddr64()) { - SDValue SRsrc, VAddr, SOffset, Offset; - - if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset)) { - unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : - AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; - SDValue CmpVal = Mem->getOperand(2); - SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32); - - // XXX - Do we care about glue operands? - - SDValue Ops[] = {CmpVal, VAddr, SRsrc, SOffset, Offset, CPol, - Mem->getChain()}; - - CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); - } - } - - if (!CmpSwap) { - SDValue SRsrc, SOffset, Offset; - if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset)) { - unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : - AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; - - SDValue CmpVal = Mem->getOperand(2); - SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32); - SDValue Ops[] = {CmpVal, SRsrc, SOffset, Offset, CPol, Mem->getChain()}; - - CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); - } - } - - if (!CmpSwap) { - SelectCode(N); - return; - } - - MachineMemOperand *MMO = Mem->getMemOperand(); - CurDAG->setNodeMemRefs(CmpSwap, {MMO}); - - unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; - SDValue Extract - = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); - - ReplaceUses(SDValue(N, 0), Extract); - ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); - CurDAG->RemoveDeadNode(N); -} - void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) { // The address is assumed to be uniform, so if it ends up in a VGPR, it will // be copied to an SGPR with readfirstlane. @@ -2587,6 +2608,30 @@ bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { return true; } +bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src, + SDValue &SrcMods, + bool OpSel) const { + unsigned Mods; + if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) { + if (OpSel) + Mods |= SISrcMods::OP_SEL_0; + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; + } + + return false; +} + +bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ false); +} + +bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ true); +} + bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const { @@ -2619,7 +2664,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, } bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, - SDValue &SrcMods) const { + SDValue &SrcMods, bool IsDOT) const { unsigned Mods = 0; Src = In; @@ -2628,7 +2673,8 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, Src = Src.getOperand(0); } - if (Src.getOpcode() == ISD::BUILD_VECTOR) { + if (Src.getOpcode() == ISD::BUILD_VECTOR && + (!IsDOT || !Subtarget->hasDOTOpSelHazard())) { unsigned VecMods = Mods; SDValue Lo = stripBitcast(Src.getOperand(0)); @@ -2716,6 +2762,40 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, return true; } +bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + return SelectVOP3PMods(In, Src, SrcMods, true); +} + +bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const { + const ConstantSDNode *C = cast(In); + // Literal i1 value set in intrinsic, represents SrcMods for the next operand. + // 1 promotes packed values to signed, 0 treats them as unsigned. + assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value"); + + unsigned Mods = SISrcMods::OP_SEL_1; + unsigned SrcSign = C->getAPIntValue().getZExtValue(); + if (SrcSign == 1) + Mods ^= SISrcMods::NEG; + + Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; +} + +bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In, + SDValue &Src) const { + const ConstantSDNode *C = cast(In); + assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value"); + + unsigned Mods = SISrcMods::OP_SEL_1; + unsigned SrcVal = C->getAPIntValue().getZExtValue(); + if (SrcVal == 1) + Mods |= SISrcMods::OP_SEL_0; + + Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; +} + bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const { Src = In; @@ -2840,7 +2920,7 @@ bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const { } } } - // If "AllUsesAcceptSReg == false" so far we haven't suceeded + // If "AllUsesAcceptSReg == false" so far we haven't succeeded // commuting current user. This means have at least one use // that strictly require VGPR. Thus, we will not attempt to commute // other user instructions. @@ -2854,26 +2934,15 @@ bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const { bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const { auto Ld = cast(N); - return Ld->getAlignment() >= 4 && - ( - ( - ( - Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || - Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT - ) - && - !N->isDivergent() - ) - || - ( - Subtarget->getScalarizeGlobalBehavior() && - Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && - Ld->isSimple() && - !N->isDivergent() && - static_cast( - getTargetLowering())->isMemOpHasNoClobberedMemOperand(N) - ) - ); + return Ld->getAlign() >= Align(4) && + (((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || + Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) && + !N->isDivergent()) || + (Subtarget->getScalarizeGlobalBehavior() && + Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && + Ld->isSimple() && !N->isDivergent() && + static_cast(getTargetLowering()) + ->isMemOpHasNoClobberedMemOperand(N))); } void AMDGPUDAGToDAGISel::PostprocessISelDAG() { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index d638d9877a9b..862be9dc5568 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -188,6 +188,10 @@ private: SDValue &VOffset, SDValue &Offset) const; bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, SDValue &Offset) const; + bool checkFlatScratchSVSSwizzleBug(SDValue VAddr, SDValue SAddr, + uint64_t ImmOffset) const; + bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr, + SDValue &SAddr, SDValue &Offset) const; bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const; @@ -214,10 +218,20 @@ private: bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; + bool SelectVINTERPModsImpl(SDValue In, SDValue &Src, SDValue &SrcMods, + bool OpSel) const; + bool SelectVINTERPMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVINTERPModsHi(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp, SDValue &Omod) const; - bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods, + bool IsDOT = false) const; + bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const; + + bool SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const; + bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const; bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; @@ -245,7 +259,6 @@ private: bool isCBranchSCC(const SDNode *N) const; void SelectBRCOND(SDNode *N); void SelectFMAD_FMA(SDNode *N); - void SelectATOMIC_CMP_SWAP(SDNode *N); void SelectDSAppendConsume(SDNode *N, unsigned IntrID); void SelectDS_GWS(SDNode *N, unsigned IntrID); void SelectInterpP1F16(SDNode *N); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index b9d0655feef7..ef7929012597 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -19,6 +19,7 @@ #include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/Support/CommandLine.h" @@ -127,49 +128,27 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, // There are no 64-bit extloads. These should be done as a 32-bit extload and // an extension to 64-bit. - for (MVT VT : MVT::integer_valuetypes()) { - setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand); - setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand); - } + for (MVT VT : MVT::integer_valuetypes()) + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i64, VT, + Expand); for (MVT VT : MVT::integer_valuetypes()) { if (VT == MVT::i64) continue; - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Legal); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); - - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Legal); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand); - - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Legal); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Legal); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand); + for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}) { + setLoadExtAction(Op, VT, MVT::i1, Promote); + setLoadExtAction(Op, VT, MVT::i8, Legal); + setLoadExtAction(Op, VT, MVT::i16, Legal); + setLoadExtAction(Op, VT, MVT::i32, Expand); + } } - for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Expand); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i8, Expand); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v3i16, Expand); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v3i16, Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v3i16, Expand); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand); - } + for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) + for (auto MemVT : + {MVT::v2i8, MVT::v4i8, MVT::v2i16, MVT::v3i16, MVT::v4i16}) + setLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}, VT, MemVT, + Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); @@ -304,229 +283,125 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand); setTruncStoreAction(MVT::v16i64, MVT::v16i1, Expand); - setOperationAction(ISD::Constant, MVT::i32, Legal); - setOperationAction(ISD::Constant, MVT::i64, Legal); - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + setOperationAction(ISD::Constant, {MVT::i32, MVT::i64}, Legal); + setOperationAction(ISD::ConstantFP, {MVT::f32, MVT::f64}, Legal); - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction({ISD::BR_JT, ISD::BRIND}, MVT::Other, Expand); // This is totally unsupported, just custom lower to produce an error. setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); // Library functions. These default to Expand, but we have instructions // for them. - setOperationAction(ISD::FCEIL, MVT::f32, Legal); - setOperationAction(ISD::FEXP2, MVT::f32, Legal); - setOperationAction(ISD::FPOW, MVT::f32, Legal); - setOperationAction(ISD::FLOG2, MVT::f32, Legal); - setOperationAction(ISD::FABS, MVT::f32, Legal); - setOperationAction(ISD::FFLOOR, MVT::f32, Legal); - setOperationAction(ISD::FRINT, MVT::f32, Legal); - setOperationAction(ISD::FTRUNC, MVT::f32, Legal); - setOperationAction(ISD::FMINNUM, MVT::f32, Legal); - setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); + setOperationAction({ISD::FCEIL, ISD::FEXP2, ISD::FPOW, ISD::FLOG2, ISD::FABS, + ISD::FFLOOR, ISD::FRINT, ISD::FTRUNC, ISD::FMINNUM, + ISD::FMAXNUM}, + MVT::f32, Legal); - setOperationAction(ISD::FROUND, MVT::f32, Custom); - setOperationAction(ISD::FROUND, MVT::f64, Custom); + setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom); - setOperationAction(ISD::FLOG, MVT::f32, Custom); - setOperationAction(ISD::FLOG10, MVT::f32, Custom); - setOperationAction(ISD::FEXP, MVT::f32, Custom); + setOperationAction({ISD::FLOG, ISD::FLOG10, ISD::FEXP}, MVT::f32, Custom); + setOperationAction(ISD::FNEARBYINT, {MVT::f32, MVT::f64}, Custom); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); - - setOperationAction(ISD::FREM, MVT::f16, Custom); - setOperationAction(ISD::FREM, MVT::f32, Custom); - setOperationAction(ISD::FREM, MVT::f64, Custom); + setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom); // Expand to fneg + fadd. setOperationAction(ISD::FSUB, MVT::f64, Expand); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v3i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v3f32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v5i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v5f32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v6i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v6f32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v7i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v7f32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f16, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f16, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3f32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5f32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5i32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v6f32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v6i32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v7f32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v7i32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3f64, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i64, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f64, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i64, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f64, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i64, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f64, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, + {MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32, + MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32, + MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32}, + Custom); + setOperationAction( + ISD::EXTRACT_SUBVECTOR, + {MVT::v2f16, MVT::v2i16, MVT::v4f16, MVT::v4i16, MVT::v2f32, + MVT::v2i32, MVT::v3f32, MVT::v3i32, MVT::v4f32, MVT::v4i32, + MVT::v5f32, MVT::v5i32, MVT::v6f32, MVT::v6i32, MVT::v7f32, + MVT::v7i32, MVT::v8f32, MVT::v8i32, MVT::v16f16, MVT::v16i16, + MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32, MVT::v2f64, + MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64, MVT::v4i64, + MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64}, + Custom); setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); - setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom); - setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom); + setOperationAction(ISD::FP_TO_FP16, {MVT::f64, MVT::f32}, Custom); const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; for (MVT VT : ScalarIntVTs) { // These should use [SU]DIVREM, so set them to expand - setOperationAction(ISD::SDIV, VT, Expand); - setOperationAction(ISD::UDIV, VT, Expand); - setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::UREM, VT, Expand); + setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT, + Expand); // GPU does not have divrem function for signed or unsigned. - setOperationAction(ISD::SDIVREM, VT, Custom); - setOperationAction(ISD::UDIVREM, VT, Custom); + setOperationAction({ISD::SDIVREM, ISD::UDIVREM}, VT, Custom); // GPU does not have [S|U]MUL_LOHI functions as a single instruction. - setOperationAction(ISD::SMUL_LOHI, VT, Expand); - setOperationAction(ISD::UMUL_LOHI, VT, Expand); + setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand); - setOperationAction(ISD::BSWAP, VT, Expand); - setOperationAction(ISD::CTTZ, VT, Expand); - setOperationAction(ISD::CTLZ, VT, Expand); + setOperationAction({ISD::BSWAP, ISD::CTTZ, ISD::CTLZ}, VT, Expand); // AMDGPU uses ADDC/SUBC/ADDE/SUBE - setOperationAction(ISD::ADDC, VT, Legal); - setOperationAction(ISD::SUBC, VT, Legal); - setOperationAction(ISD::ADDE, VT, Legal); - setOperationAction(ISD::SUBE, VT, Legal); + setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT, Legal); } // The hardware supports 32-bit FSHR, but not FSHL. setOperationAction(ISD::FSHR, MVT::i32, Legal); // The hardware supports 32-bit ROTR, but not ROTL. - setOperationAction(ISD::ROTL, MVT::i32, Expand); - setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTL, {MVT::i32, MVT::i64}, Expand); setOperationAction(ISD::ROTR, MVT::i64, Expand); - setOperationAction(ISD::MULHU, MVT::i16, Expand); - setOperationAction(ISD::MULHS, MVT::i16, Expand); + setOperationAction({ISD::MULHU, ISD::MULHS}, MVT::i16, Expand); - setOperationAction(ISD::MUL, MVT::i64, Expand); - setOperationAction(ISD::MULHU, MVT::i64, Expand); - setOperationAction(ISD::MULHS, MVT::i64, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + setOperationAction({ISD::MUL, ISD::MULHU, ISD::MULHS}, MVT::i64, Expand); + setOperationAction( + {ISD::UINT_TO_FP, ISD::SINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}, + MVT::i64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); - setOperationAction(ISD::SMIN, MVT::i32, Legal); - setOperationAction(ISD::UMIN, MVT::i32, Legal); - setOperationAction(ISD::SMAX, MVT::i32, Legal); - setOperationAction(ISD::UMAX, MVT::i32, Legal); + setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, MVT::i32, + Legal); - setOperationAction(ISD::CTTZ, MVT::i64, Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom); - setOperationAction(ISD::CTLZ, MVT::i64, Custom); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); + setOperationAction( + {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, + MVT::i64, Custom); static const MVT::SimpleValueType VectorIntTypes[] = { MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32}; for (MVT VT : VectorIntTypes) { // Expand the following operations for the current type by default. - setOperationAction(ISD::ADD, VT, Expand); - setOperationAction(ISD::AND, VT, Expand); - setOperationAction(ISD::FP_TO_SINT, VT, Expand); - setOperationAction(ISD::FP_TO_UINT, VT, Expand); - setOperationAction(ISD::MUL, VT, Expand); - setOperationAction(ISD::MULHU, VT, Expand); - setOperationAction(ISD::MULHS, VT, Expand); - setOperationAction(ISD::OR, VT, Expand); - setOperationAction(ISD::SHL, VT, Expand); - setOperationAction(ISD::SRA, VT, Expand); - setOperationAction(ISD::SRL, VT, Expand); - setOperationAction(ISD::ROTL, VT, Expand); - setOperationAction(ISD::ROTR, VT, Expand); - setOperationAction(ISD::SUB, VT, Expand); - setOperationAction(ISD::SINT_TO_FP, VT, Expand); - setOperationAction(ISD::UINT_TO_FP, VT, Expand); - setOperationAction(ISD::SDIV, VT, Expand); - setOperationAction(ISD::UDIV, VT, Expand); - setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::UREM, VT, Expand); - setOperationAction(ISD::SMUL_LOHI, VT, Expand); - setOperationAction(ISD::UMUL_LOHI, VT, Expand); - setOperationAction(ISD::SDIVREM, VT, Expand); - setOperationAction(ISD::UDIVREM, VT, Expand); - setOperationAction(ISD::SELECT, VT, Expand); - setOperationAction(ISD::VSELECT, VT, Expand); - setOperationAction(ISD::SELECT_CC, VT, Expand); - setOperationAction(ISD::XOR, VT, Expand); - setOperationAction(ISD::BSWAP, VT, Expand); - setOperationAction(ISD::CTPOP, VT, Expand); - setOperationAction(ISD::CTTZ, VT, Expand); - setOperationAction(ISD::CTLZ, VT, Expand); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); - setOperationAction(ISD::SETCC, VT, Expand); + setOperationAction({ISD::ADD, ISD::AND, ISD::FP_TO_SINT, + ISD::FP_TO_UINT, ISD::MUL, ISD::MULHU, + ISD::MULHS, ISD::OR, ISD::SHL, + ISD::SRA, ISD::SRL, ISD::ROTL, + ISD::ROTR, ISD::SUB, ISD::SINT_TO_FP, + ISD::UINT_TO_FP, ISD::SDIV, ISD::UDIV, + ISD::SREM, ISD::UREM, ISD::SMUL_LOHI, + ISD::UMUL_LOHI, ISD::SDIVREM, ISD::UDIVREM, + ISD::SELECT, ISD::VSELECT, ISD::SELECT_CC, + ISD::XOR, ISD::BSWAP, ISD::CTPOP, + ISD::CTTZ, ISD::CTLZ, ISD::VECTOR_SHUFFLE, + ISD::SETCC}, + VT, Expand); } static const MVT::SimpleValueType FloatVectorTypes[] = { MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32}; for (MVT VT : FloatVectorTypes) { - setOperationAction(ISD::FABS, VT, Expand); - setOperationAction(ISD::FMINNUM, VT, Expand); - setOperationAction(ISD::FMAXNUM, VT, Expand); - setOperationAction(ISD::FADD, VT, Expand); - setOperationAction(ISD::FCEIL, VT, Expand); - setOperationAction(ISD::FCOS, VT, Expand); - setOperationAction(ISD::FDIV, VT, Expand); - setOperationAction(ISD::FEXP2, VT, Expand); - setOperationAction(ISD::FEXP, VT, Expand); - setOperationAction(ISD::FLOG2, VT, Expand); - setOperationAction(ISD::FREM, VT, Expand); - setOperationAction(ISD::FLOG, VT, Expand); - setOperationAction(ISD::FLOG10, VT, Expand); - setOperationAction(ISD::FPOW, VT, Expand); - setOperationAction(ISD::FFLOOR, VT, Expand); - setOperationAction(ISD::FTRUNC, VT, Expand); - setOperationAction(ISD::FMUL, VT, Expand); - setOperationAction(ISD::FMA, VT, Expand); - setOperationAction(ISD::FRINT, VT, Expand); - setOperationAction(ISD::FNEARBYINT, VT, Expand); - setOperationAction(ISD::FSQRT, VT, Expand); - setOperationAction(ISD::FSIN, VT, Expand); - setOperationAction(ISD::FSUB, VT, Expand); - setOperationAction(ISD::FNEG, VT, Expand); - setOperationAction(ISD::VSELECT, VT, Expand); - setOperationAction(ISD::SELECT_CC, VT, Expand); - setOperationAction(ISD::FCOPYSIGN, VT, Expand); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); - setOperationAction(ISD::SETCC, VT, Expand); - setOperationAction(ISD::FCANONICALIZE, VT, Expand); + setOperationAction( + {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, + ISD::FCEIL, ISD::FCOS, ISD::FDIV, ISD::FEXP2, + ISD::FEXP, ISD::FLOG2, ISD::FREM, ISD::FLOG, + ISD::FLOG10, ISD::FPOW, ISD::FFLOOR, ISD::FTRUNC, + ISD::FMUL, ISD::FMA, ISD::FRINT, ISD::FNEARBYINT, + ISD::FSQRT, ISD::FSIN, ISD::FSUB, ISD::FNEG, + ISD::VSELECT, ISD::SELECT_CC, ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE, + ISD::SETCC, ISD::FCANONICALIZE}, + VT, Expand); } // This causes using an unrolled select operation rather than expansion with @@ -590,26 +465,16 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, if (AMDGPUBypassSlowDiv) addBypassSlowDiv(64, 32); - setTargetDAGCombine(ISD::BITCAST); - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::TRUNCATE); - setTargetDAGCombine(ISD::MUL); - setTargetDAGCombine(ISD::SMUL_LOHI); - setTargetDAGCombine(ISD::UMUL_LOHI); - setTargetDAGCombine(ISD::MULHU); - setTargetDAGCombine(ISD::MULHS); - setTargetDAGCombine(ISD::SELECT); - setTargetDAGCombine(ISD::SELECT_CC); - setTargetDAGCombine(ISD::STORE); - setTargetDAGCombine(ISD::FADD); - setTargetDAGCombine(ISD::FSUB); - setTargetDAGCombine(ISD::FNEG); - setTargetDAGCombine(ISD::FABS); - setTargetDAGCombine(ISD::AssertZext); - setTargetDAGCombine(ISD::AssertSext); - setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + setTargetDAGCombine({ISD::BITCAST, ISD::SHL, + ISD::SRA, ISD::SRL, + ISD::TRUNCATE, ISD::MUL, + ISD::SMUL_LOHI, ISD::UMUL_LOHI, + ISD::MULHU, ISD::MULHS, + ISD::SELECT, ISD::SELECT_CC, + ISD::STORE, ISD::FADD, + ISD::FSUB, ISD::FNEG, + ISD::FABS, ISD::AssertZext, + ISD::AssertSext, ISD::INTRINSIC_WO_CHAIN}); } bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const { @@ -785,11 +650,11 @@ bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N, unsigned AS = MN->getAddressSpace(); // Do not shrink an aligned scalar load to sub-dword. // Scalar engine cannot do sub-dword loads. - if (OldSize >= 32 && NewSize < 32 && MN->getAlignment() >= 4 && + if (OldSize >= 32 && NewSize < 32 && MN->getAlign() >= Align(4) && (AS == AMDGPUAS::CONSTANT_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || - (isa(N) && - AS == AMDGPUAS::GLOBAL_ADDRESS && MN->isInvariant())) && + (isa(N) && AS == AMDGPUAS::GLOBAL_ADDRESS && + MN->isInvariant())) && AMDGPUInstrInfo::isUniformMMO(MN->getMemOperand())) return false; @@ -855,6 +720,8 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const { AMDGPUAS::CONSTANT_ADDRESS_32BIT) return true; return false; + case AMDGPUISD::SETCC: // ballot-style instruction + return true; } return false; } @@ -1072,10 +939,9 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute( const bool IsByRef = Arg.hasByRefAttr(); Type *BaseArgTy = Arg.getType(); Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy; - MaybeAlign Alignment = IsByRef ? Arg.getParamAlign() : None; - if (!Alignment) - Alignment = DL.getABITypeAlign(MemArgTy); - MaxAlign = max(Alignment, MaxAlign); + Align Alignment = DL.getValueOrABITypeAlignment( + IsByRef ? Arg.getParamAlign() : None, MemArgTy); + MaxAlign = std::max(Alignment, MaxAlign); uint64_t AllocSize = DL.getTypeAllocSize(MemArgTy); uint64_t ArgOffset = alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset; @@ -1415,6 +1281,11 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, (Start == 0 || Start == 4)) return Op; + if (((SrcVT == MVT::v16f16 && VT == MVT::v8f16) || + (SrcVT == MVT::v16i16 && VT == MVT::v8i16)) && + (Start == 0 || Start == 8)) + return Op; + DAG.ExtractVectorElements(Op.getOperand(0), Args, Start, VT.getVectorNumElements()); @@ -1589,8 +1460,8 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op, std::tie(Lo, Hi) = splitVector(Op, SL, LoVT, HiVT, DAG); unsigned Size = LoMemVT.getStoreSize(); - unsigned BaseAlign = Load->getAlignment(); - unsigned HiAlign = MinAlign(BaseAlign, Size); + Align BaseAlign = Load->getAlign(); + Align HiAlign = commonAlignment(BaseAlign, Size); SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT, Load->getChain(), BasePtr, SrcValue, LoMemVT, @@ -1628,13 +1499,13 @@ SDValue AMDGPUTargetLowering::WidenOrSplitVectorLoad(SDValue Op, EVT MemVT = Load->getMemoryVT(); SDLoc SL(Op); const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo(); - unsigned BaseAlign = Load->getAlignment(); + Align BaseAlign = Load->getAlign(); unsigned NumElements = MemVT.getVectorNumElements(); // Widen from vec3 to vec4 when the load is at least 8-byte aligned // or 16-byte fully dereferenceable. Otherwise, split the vector load. if (NumElements != 3 || - (BaseAlign < 8 && + (BaseAlign < Align(8) && !SrcValue.isDereferenceable(16, *DAG.getContext(), DAG.getDataLayout()))) return SplitVectorLoad(Op, DAG); @@ -1681,9 +1552,9 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, LoMemVT.getStoreSize()); const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo(); - unsigned BaseAlign = Store->getAlignment(); + Align BaseAlign = Store->getAlign(); unsigned Size = LoMemVT.getStoreSize(); - unsigned HiAlign = MinAlign(BaseAlign, Size); + Align HiAlign = commonAlignment(BaseAlign, Size); SDValue LoStore = DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign, @@ -3003,12 +2874,11 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N, // the bytes again are not eliminated in the case of an unaligned copy. if (!allowsMisalignedMemoryAccesses( VT, AS, Alignment, LN->getMemOperand()->getFlags(), &IsFast)) { - SDValue Ops[2]; - if (VT.isVector()) - std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LN, DAG); - else - std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG); + return SplitVectorLoad(SDValue(LN, 0), DAG); + + SDValue Ops[2]; + std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG); return DAG.getMergeValues(Ops, SDLoc(N)); } @@ -3059,7 +2929,7 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N, if (!allowsMisalignedMemoryAccesses( VT, AS, Alignment, SN->getMemOperand()->getFlags(), &IsFast)) { if (VT.isVector()) - return scalarizeVectorStore(SN, DAG); + return SplitVectorStore(SDValue(SN, 0), DAG); return expandUnalignedStore(SN, DAG); } @@ -3281,8 +3151,9 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N, // this improves the ability to match BFE patterns in isel. if (LHS.getOpcode() == ISD::AND) { if (auto *Mask = dyn_cast(LHS.getOperand(1))) { - if (Mask->getAPIntValue().isShiftedMask() && - Mask->getAPIntValue().countTrailingZeros() == ShiftAmt) { + unsigned MaskIdx, MaskLen; + if (Mask->getAPIntValue().isShiftedMask(MaskIdx, MaskLen) && + MaskIdx == ShiftAmt) { return DAG.getNode( ISD::AND, SL, VT, DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)), @@ -4380,10 +4251,14 @@ uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) + ExplicitArgOffset; switch (Param) { - case GRID_DIM: + case FIRST_IMPLICIT: return ArgOffset; - case GRID_OFFSET: - return ArgOffset + 4; + case PRIVATE_BASE: + return ArgOffset + AMDGPU::ImplicitArg::PRIVATE_BASE_OFFSET; + case SHARED_BASE: + return ArgOffset + AMDGPU::ImplicitArg::SHARED_BASE_OFFSET; + case QUEUE_PTR: + return ArgOffset + AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET; } llvm_unreachable("unexpected implicit parameter type"); } @@ -4405,7 +4280,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(TC_RETURN) NODE_NAME_CASE(TRAP) NODE_NAME_CASE(RET_FLAG) - NODE_NAME_CASE(RET_GFX_FLAG) NODE_NAME_CASE(RETURN_TO_EPILOG) NODE_NAME_CASE(ENDPGM) NODE_NAME_CASE(DWORDADDR) @@ -4485,6 +4359,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(PC_ADD_REL_OFFSET) NODE_NAME_CASE(LDS) + NODE_NAME_CASE(FPTRUNC_ROUND_UPWARD) + NODE_NAME_CASE(FPTRUNC_ROUND_DOWNWARD) NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(LOAD_D16_HI) @@ -4580,6 +4456,19 @@ SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand, return SDValue(); } +static unsigned workitemIntrinsicDim(unsigned ID) { + switch (ID) { + case Intrinsic::amdgcn_workitem_id_x: + return 0; + case Intrinsic::amdgcn_workitem_id_y: + return 1; + case Intrinsic::amdgcn_workitem_id_z: + return 2; + default: + llvm_unreachable("not a workitem intrinsic"); + } +} + void AMDGPUTargetLowering::computeKnownBitsForTargetNode( const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { @@ -4716,6 +4605,14 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2()); break; } + case Intrinsic::amdgcn_workitem_id_x: + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::amdgcn_workitem_id_z: { + unsigned MaxValue = Subtarget->getMaxWorkitemID( + DAG.getMachineFunction().getFunction(), workitemIntrinsicDim(IID)); + Known.Zero.setHighBits(countLeadingZeros(MaxValue)); + break; + } default: break; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index b41506157b68..73081483f1c3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -320,8 +320,9 @@ public: enum ImplicitParameter { FIRST_IMPLICIT, - GRID_DIM = FIRST_IMPLICIT, - GRID_OFFSET, + PRIVATE_BASE, + SHARED_BASE, + QUEUE_PTR, }; /// Helper function that returns the byte offset of the given @@ -367,9 +368,6 @@ enum NodeType : unsigned { // Return with values from a non-entry function. RET_FLAG, - // Return with values from a non-entry function (AMDGPU_Gfx CC). - RET_GFX_FLAG, - DWORDADDR, FRACT, @@ -483,6 +481,9 @@ enum NodeType : unsigned { CONST_DATA_PTR, PC_ADD_REL_OFFSET, LDS, + FPTRUNC_ROUND_UPWARD, + FPTRUNC_ROUND_DOWNWARD, + DUMMY_CHAIN, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, LOAD_D16_HI, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp new file mode 100644 index 000000000000..c9cdbc89f3a4 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp @@ -0,0 +1,457 @@ +//===- AMDGPUInsertDelayAlu.cpp - Insert s_delay_alu instructions ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Insert s_delay_alu instructions to avoid stalls on GFX11+. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIInstrInfo.h" +#include "llvm/ADT/SetVector.h" + +using namespace llvm; + +#define DEBUG_TYPE "amdgpu-insert-delay-alu" + +namespace { + +class AMDGPUInsertDelayAlu : public MachineFunctionPass { +public: + static char ID; + + const SIInstrInfo *SII; + const TargetRegisterInfo *TRI; + + TargetSchedModel SchedModel; + + AMDGPUInsertDelayAlu() : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + // Return true if MI waits for all outstanding VALU instructions to complete. + static bool instructionWaitsForVALU(const MachineInstr &MI) { + // These instruction types wait for VA_VDST==0 before issuing. + const uint64_t VA_VDST_0 = SIInstrFlags::DS | SIInstrFlags::EXP | + SIInstrFlags::FLAT | SIInstrFlags::MIMG | + SIInstrFlags::MTBUF | SIInstrFlags::MUBUF; + if (MI.getDesc().TSFlags & VA_VDST_0) + return true; + if (MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B32 || + MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B64) + return true; + if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && + (MI.getOperand(0).getImm() & 0xf000) == 0) + return true; + return false; + } + + // Types of delay that can be encoded in an s_delay_alu instruction. + enum DelayType { VALU, TRANS, SALU, OTHER }; + + // Get the delay type for an instruction with the specified TSFlags. + static DelayType getDelayType(uint64_t TSFlags) { + if (TSFlags & SIInstrFlags::TRANS) + return TRANS; + if (TSFlags & SIInstrFlags::VALU) + return VALU; + if (TSFlags & SIInstrFlags::SALU) + return SALU; + return OTHER; + } + + // Information about the last instruction(s) that wrote to a particular + // regunit. In straight-line code there will only be one such instruction, but + // when control flow converges we merge the delay information from each path + // to represent the union of the worst-case delays of each type. + struct DelayInfo { + // One larger than the maximum number of (non-TRANS) VALU instructions we + // can encode in an s_delay_alu instruction. + static const unsigned VALU_MAX = 5; + + // One larger than the maximum number of TRANS instructions we can encode in + // an s_delay_alu instruction. + static const unsigned TRANS_MAX = 4; + + // If it was written by a (non-TRANS) VALU, remember how many clock cycles + // are left until it completes, and how many other (non-TRANS) VALU we have + // seen since it was issued. + uint8_t VALUCycles = 0; + uint8_t VALUNum = VALU_MAX; + + // If it was written by a TRANS, remember how many clock cycles are left + // until it completes, and how many other TRANS we have seen since it was + // issued. + uint8_t TRANSCycles = 0; + uint8_t TRANSNum = TRANS_MAX; + // Also remember how many other (non-TRANS) VALU we have seen since it was + // issued. When an instruction depends on both a prior TRANS and a prior + // non-TRANS VALU, this is used to decide whether to encode a wait for just + // one or both of them. + uint8_t TRANSNumVALU = VALU_MAX; + + // If it was written by an SALU, remember how many clock cycles are left + // until it completes. + uint8_t SALUCycles = 0; + + DelayInfo() = default; + + DelayInfo(DelayType Type, unsigned Cycles) { + switch (Type) { + default: + llvm_unreachable("unexpected type"); + case VALU: + VALUCycles = Cycles; + VALUNum = 0; + break; + case TRANS: + TRANSCycles = Cycles; + TRANSNum = 0; + TRANSNumVALU = 0; + break; + case SALU: + SALUCycles = Cycles; + break; + } + } + + bool operator==(const DelayInfo &RHS) const { + return VALUCycles == RHS.VALUCycles && VALUNum == RHS.VALUNum && + TRANSCycles == RHS.TRANSCycles && TRANSNum == RHS.TRANSNum && + TRANSNumVALU == RHS.TRANSNumVALU && SALUCycles == RHS.SALUCycles; + } + + bool operator!=(const DelayInfo &RHS) const { return !(*this == RHS); } + + // Merge another DelayInfo into this one, to represent the union of the + // worst-case delays of each type. + void merge(const DelayInfo &RHS) { + VALUCycles = std::max(VALUCycles, RHS.VALUCycles); + VALUNum = std::min(VALUNum, RHS.VALUNum); + TRANSCycles = std::max(TRANSCycles, RHS.TRANSCycles); + TRANSNum = std::min(TRANSNum, RHS.TRANSNum); + TRANSNumVALU = std::min(TRANSNumVALU, RHS.TRANSNumVALU); + SALUCycles = std::max(SALUCycles, RHS.SALUCycles); + } + + // Update this DelayInfo after issuing an instruction. IsVALU should be 1 + // when issuing a (non-TRANS) VALU, else 0. IsTRANS should be 1 when issuing + // a TRANS, else 0. Cycles is the number of cycles it takes to issue the + // instruction. Return true if there is no longer any useful delay info. + bool advance(DelayType Type, unsigned Cycles) { + bool Erase = true; + + VALUNum += (Type == VALU); + if (VALUNum >= VALU_MAX || VALUCycles <= Cycles) { + // Forget about the VALU instruction. It was too far back or has + // definitely completed by now. + VALUNum = VALU_MAX; + VALUCycles = 0; + } else { + VALUCycles -= Cycles; + Erase = false; + } + + TRANSNum += (Type == TRANS); + TRANSNumVALU += (Type == VALU); + if (TRANSNum >= TRANS_MAX || TRANSCycles <= Cycles) { + // Forget about any TRANS instruction. It was too far back or has + // definitely completed by now. + TRANSNum = TRANS_MAX; + TRANSNumVALU = VALU_MAX; + TRANSCycles = 0; + } else { + TRANSCycles -= Cycles; + Erase = false; + } + + if (SALUCycles <= Cycles) { + // Forget about any SALU instruction. It has definitely completed by + // now. + SALUCycles = 0; + } else { + SALUCycles -= Cycles; + Erase = false; + } + + return Erase; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void dump() const { + if (VALUCycles) + dbgs() << " VALUCycles=" << (int)VALUCycles; + if (VALUNum < VALU_MAX) + dbgs() << " VALUNum=" << (int)VALUNum; + if (TRANSCycles) + dbgs() << " TRANSCycles=" << (int)TRANSCycles; + if (TRANSNum < TRANS_MAX) + dbgs() << " TRANSNum=" << (int)TRANSNum; + if (TRANSNumVALU < VALU_MAX) + dbgs() << " TRANSNumVALU=" << (int)TRANSNumVALU; + if (SALUCycles) + dbgs() << " SALUCycles=" << (int)SALUCycles; + } +#endif + }; + + // A map from regunits to the delay info for that regunit. + struct DelayState : DenseMap { + // Merge another DelayState into this one by merging the delay info for each + // regunit. + void merge(const DelayState &RHS) { + for (const auto &KV : RHS) { + iterator It; + bool Inserted; + std::tie(It, Inserted) = insert(KV); + if (!Inserted) + It->second.merge(KV.second); + } + } + + // Advance the delay info for each regunit, erasing any that are no longer + // useful. + void advance(DelayType Type, unsigned Cycles) { + iterator Next; + for (auto I = begin(), E = end(); I != E; I = Next) { + Next = std::next(I); + if (I->second.advance(Type, Cycles)) + erase(I); + } + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void dump(const TargetRegisterInfo *TRI) const { + if (empty()) { + dbgs() << " empty\n"; + return; + } + + // Dump DelayInfo for each RegUnit in numerical order. + SmallVector Order; + Order.reserve(size()); + for (const_iterator I = begin(), E = end(); I != E; ++I) + Order.push_back(I); + llvm::sort(Order, [](const const_iterator &A, const const_iterator &B) { + return A->first < B->first; + }); + for (const_iterator I : Order) { + dbgs() << " " << printRegUnit(I->first, TRI); + I->second.dump(); + dbgs() << "\n"; + } + } +#endif + }; + + // The saved delay state at the end of each basic block. + DenseMap BlockState; + + // Emit an s_delay_alu instruction if necessary before MI. + MachineInstr *emitDelayAlu(MachineInstr &MI, DelayInfo Delay, + MachineInstr *LastDelayAlu) { + unsigned Imm = 0; + + // Wait for a TRANS instruction. + if (Delay.TRANSNum < DelayInfo::TRANS_MAX) + Imm |= 4 + Delay.TRANSNum; + + // Wait for a VALU instruction (if it's more recent than any TRANS + // instruction that we're also waiting for). + if (Delay.VALUNum < DelayInfo::VALU_MAX && + Delay.VALUNum <= Delay.TRANSNumVALU) { + if (Imm & 0xf) + Imm |= Delay.VALUNum << 7; + else + Imm |= Delay.VALUNum; + } + + // Wait for an SALU instruction. + if (Delay.SALUCycles) { + if (Imm & 0x780) { + // We have already encoded a VALU and a TRANS delay. There's no room in + // the encoding for an SALU delay as well, so just drop it. + } else if (Imm & 0xf) { + Imm |= (Delay.SALUCycles + 8) << 7; + } else { + Imm |= Delay.SALUCycles + 8; + } + } + + // Don't emit the s_delay_alu instruction if there's nothing to wait for. + if (!Imm) + return LastDelayAlu; + + // If we only need to wait for one instruction, try encoding it in the last + // s_delay_alu that we emitted. + if (!(Imm & 0x780) && LastDelayAlu) { + unsigned Skip = 0; + for (auto I = MachineBasicBlock::instr_iterator(LastDelayAlu), + E = MachineBasicBlock::instr_iterator(MI); + ++I != E;) { + if (!I->isBundle() && !I->isMetaInstruction()) + ++Skip; + } + if (Skip < 6) { + MachineOperand &Op = LastDelayAlu->getOperand(0); + unsigned LastImm = Op.getImm(); + assert((LastImm & ~0xf) == 0 && + "Remembered an s_delay_alu with no room for another delay!"); + LastImm |= Imm << 7 | Skip << 4; + Op.setImm(LastImm); + return nullptr; + } + } + + auto &MBB = *MI.getParent(); + MachineInstr *DelayAlu = + BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_DELAY_ALU)).addImm(Imm); + // Remember the s_delay_alu for next time if there is still room in it to + // encode another delay. + return (Imm & 0x780) ? nullptr : DelayAlu; + } + + bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) { + DelayState State; + for (auto *Pred : MBB.predecessors()) + State.merge(BlockState[Pred]); + + LLVM_DEBUG(dbgs() << " State at start of " << printMBBReference(MBB) + << "\n"; + State.dump(TRI);); + + bool Changed = false; + MachineInstr *LastDelayAlu = nullptr; + + // Iterate over the contents of bundles, but don't emit any instructions + // inside a bundle. + for (auto &MI : MBB.instrs()) { + if (MI.isBundle() || MI.isMetaInstruction()) + continue; + + // Ignore some more instructions that do not generate any code. + switch (MI.getOpcode()) { + case AMDGPU::SI_RETURN_TO_EPILOG: + continue; + } + + DelayType Type = getDelayType(MI.getDesc().TSFlags); + + if (instructionWaitsForVALU(MI)) { + // Forget about all outstanding VALU delays. + State = DelayState(); + } else if (Type != OTHER) { + DelayInfo Delay; + // TODO: Scan implicit uses too? + for (const auto &Op : MI.explicit_uses()) { + if (Op.isReg()) { + // One of the operands of the writelane is also the output operand. + // This creates the insertion of redundant delays. Hence, we have to + // ignore this operand. + if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32 && Op.isTied()) + continue; + for (MCRegUnitIterator UI(Op.getReg(), TRI); UI.isValid(); ++UI) { + auto It = State.find(*UI); + if (It != State.end()) { + Delay.merge(It->second); + State.erase(*UI); + } + } + } + } + if (Emit && !MI.isBundledWithPred()) { + // TODO: For VALU->SALU delays should we use s_delay_alu or s_nop or + // just ignore them? + LastDelayAlu = emitDelayAlu(MI, Delay, LastDelayAlu); + } + } + + if (Type != OTHER) { + // TODO: Scan implicit defs too? + for (const auto &Op : MI.defs()) { + unsigned Latency = SchedModel.computeOperandLatency( + &MI, MI.getOperandNo(&Op), nullptr, 0); + for (MCRegUnitIterator UI(Op.getReg(), TRI); UI.isValid(); ++UI) + State[*UI] = DelayInfo(Type, Latency); + } + } + + // Advance by the number of cycles it takes to issue this instruction. + // TODO: Use a more advanced model that accounts for instructions that + // take multiple cycles to issue on a particular pipeline. + unsigned Cycles = SIInstrInfo::getNumWaitStates(MI); + // TODO: In wave64 mode, double the number of cycles for VALU and VMEM + // instructions on the assumption that they will usually have to be issued + // twice? + State.advance(Type, Cycles); + + LLVM_DEBUG(dbgs() << " State after " << MI; State.dump(TRI);); + } + + if (Emit) { + assert(State == BlockState[&MBB] && + "Basic block state should not have changed on final pass!"); + } else if (State != BlockState[&MBB]) { + BlockState[&MBB] = std::move(State); + Changed = true; + } + return Changed; + } + + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; + + LLVM_DEBUG(dbgs() << "AMDGPUInsertDelayAlu running on " << MF.getName() + << "\n"); + + const GCNSubtarget &ST = MF.getSubtarget(); + if (!ST.hasDelayAlu()) + return false; + + SII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + + SchedModel.init(&ST); + + // Calculate the delay state for each basic block, iterating until we reach + // a fixed point. + SetVector WorkList; + for (auto &MBB : reverse(MF)) + WorkList.insert(&MBB); + while (!WorkList.empty()) { + auto &MBB = *WorkList.pop_back_val(); + bool Changed = runOnMachineBasicBlock(MBB, false); + if (Changed) + WorkList.insert(MBB.succ_begin(), MBB.succ_end()); + } + + LLVM_DEBUG(dbgs() << "Final pass over all BBs\n"); + + // Make one last pass over all basic blocks to emit s_delay_alu + // instructions. + bool Changed = false; + for (auto &MBB : MF) + Changed |= runOnMachineBasicBlock(MBB, true); + return Changed; + } +}; + +} // namespace + +char AMDGPUInsertDelayAlu::ID = 0; + +char &llvm::AMDGPUInsertDelayAluID = AMDGPUInsertDelayAlu::ID; + +INITIALIZE_PASS(AMDGPUInsertDelayAlu, DEBUG_TYPE, "AMDGPU Insert Delay ALU", + false, false) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 4f1d700bcd84..695093322a01 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -110,33 +110,42 @@ static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) { llvm_unreachable("Should never be called!"); } -/// Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with -/// the modified arguments. +/// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with +/// modified arguments (based on OldIntr) and replaces InstToReplace with +/// this newly created intrinsic call. static Optional modifyIntrinsicCall( - IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC, + IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, + InstCombiner &IC, std::function &, SmallVectorImpl &)> Func) { SmallVector ArgTys; - if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys)) + if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys)) return None; - SmallVector Args(II.args()); + SmallVector Args(OldIntr.args()); // Modify arguments and types Func(Args, ArgTys); - Function *I = Intrinsic::getDeclaration(II.getModule(), NewIntr, ArgTys); + Function *I = Intrinsic::getDeclaration(OldIntr.getModule(), NewIntr, ArgTys); CallInst *NewCall = IC.Builder.CreateCall(I, Args); - NewCall->takeName(&II); - NewCall->copyMetadata(II); + NewCall->takeName(&OldIntr); + NewCall->copyMetadata(OldIntr); if (isa(NewCall)) - NewCall->copyFastMathFlags(&II); + NewCall->copyFastMathFlags(&OldIntr); // Erase and replace uses - if (!II.getType()->isVoidTy()) - IC.replaceInstUsesWith(II, NewCall); - return IC.eraseInstFromFunction(II); + if (!InstToReplace.getType()->isVoidTy()) + IC.replaceInstUsesWith(InstToReplace, NewCall); + + bool RemoveOldIntr = &OldIntr != &InstToReplace; + + auto RetValue = IC.eraseInstFromFunction(InstToReplace); + if (RemoveOldIntr) + IC.eraseInstFromFunction(OldIntr); + + return RetValue; } static Optional @@ -153,7 +162,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ, ImageDimIntr->Dim); return modifyIntrinsicCall( - II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { + II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { Args.erase(Args.begin() + ImageDimIntr->LodIndex); }); } @@ -170,7 +179,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP, ImageDimIntr->Dim); return modifyIntrinsicCall( - II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { + II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { Args.erase(Args.begin() + ImageDimIntr->MipIndex); }); } @@ -187,7 +196,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias, ImageDimIntr->Dim); return modifyIntrinsicCall( - II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { + II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { Args.erase(Args.begin() + ImageDimIntr->BiasIndex); ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg); }); @@ -205,13 +214,41 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, AMDGPU::getImageDimIntrinsicByBaseOpcode( OffsetMappingInfo->NoOffset, ImageDimIntr->Dim); return modifyIntrinsicCall( - II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { + II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { Args.erase(Args.begin() + ImageDimIntr->OffsetIndex); }); } } } + // Try to use D16 + if (ST->hasD16Images()) { + + const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = + AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode); + + if (BaseOpcode->HasD16) { + + // If the only use of image intrinsic is a fptrunc (with conversion to + // half) then both fptrunc and image intrinsic will be replaced with image + // intrinsic with D16 flag. + if (II.hasOneUse()) { + Instruction *User = II.user_back(); + + if (User->getOpcode() == Instruction::FPTrunc && + User->getType()->getScalarType()->isHalfTy()) { + + return modifyIntrinsicCall(II, *User, ImageDimIntr->Intr, IC, + [&](auto &Args, auto &ArgTys) { + // Change return type of image intrinsic. + // Set it to return type of fptrunc. + ArgTys[0] = User->getType(); + }); + } + } + } + } + // Try to use A16 or G16 if (!ST->hasA16() && !ST->hasG16()) return None; @@ -263,7 +300,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, : Type::getInt16Ty(II.getContext()); return modifyIntrinsicCall( - II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) { + II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) { ArgTys[ImageDimIntr->GradientTyArg] = CoordType; if (!OnlyDerivatives) { ArgTys[ImageDimIntr->CoordTyArg] = CoordType; @@ -584,6 +621,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { return IC.replaceInstUsesWith(II, RightShift); } case Intrinsic::amdgcn_exp: + case Intrinsic::amdgcn_exp_row: case Intrinsic::amdgcn_exp_compr: { ConstantInt *En = cast(II.getArgOperand(1)); unsigned EnBits = En->getZExtValue(); @@ -882,6 +920,12 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType())); } + case Intrinsic::amdgcn_permlane64: + // A constant value is trivially uniform. + if (Constant *C = dyn_cast(II.getArgOperand(0))) { + return IC.replaceInstUsesWith(II, C); + } + break; case Intrinsic::amdgcn_readfirstlane: case Intrinsic::amdgcn_readlane: { // A constant value is trivially uniform. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 391dc8428539..23b8fcf75f16 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -355,11 +355,7 @@ def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone, def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] ->; - -def AMDGPUret_gfx_flag : SDNode<"AMDGPUISD::RET_GFX_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, +def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index b7d0f0580cda..3f242fdb6d8e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IntrinsicsAMDGPU.h" @@ -80,8 +81,11 @@ bool AMDGPUInstructionSelector::isVCC(Register Reg, RegClassOrBank.dyn_cast(); if (RC) { const LLT Ty = MRI.getType(Reg); - return RC->hasSuperClassEq(TRI.getBoolRC()) && - Ty.isValid() && Ty.getSizeInBits() == 1; + if (!Ty.isValid() || Ty.getSizeInBits() != 1) + return false; + // G_TRUNC s1 result is never vcc. + return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC && + RC->hasSuperClassEq(TRI.getBoolRC()); } const RegisterBank *RB = RegClassOrBank.get(); @@ -91,7 +95,7 @@ bool AMDGPUInstructionSelector::isVCC(Register Reg, bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI, unsigned NewOpc) const { MI.setDesc(TII.get(NewOpc)); - MI.RemoveOperand(1); // Remove intrinsic ID. + MI.removeOperand(1); // Remove intrinsic ID. MI.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); MachineOperand &Dst = MI.getOperand(0); @@ -216,7 +220,7 @@ bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { } const RegisterBank &RB = *RegClassOrBank.get(); - DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, *MRI); + DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB); if (!DefRC) { LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); return false; @@ -454,6 +458,24 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE( return true; } +bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32( + MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + const bool IsUnsigned = I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32; + + unsigned Opc; + if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11) + Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64 + : AMDGPU::V_MAD_I64_I32_gfx11_e64; + else + Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64; + I.setDesc(TII.get(Opc)); + I.addOperand(*MF, MachineOperand::CreateImm(0)); + I.addImplicitDefUseOperands(*MF); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + // TODO: We should probably legalize these to only using 32-bit results. bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); @@ -481,7 +503,7 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI); const TargetRegisterClass *SrcRC = - TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, *MRI); + TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank); if (!SrcRC) return false; unsigned SubReg = SIRegisterInfo::getSubRegFromChannel(Offset / 32, @@ -514,7 +536,7 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); const unsigned DstSize = DstTy.getSizeInBits(); const TargetRegisterClass *DstRC = - TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI); + TRI.getRegClassForSizeOnBank(DstSize, *DstBank); if (!DstRC) return false; @@ -556,7 +578,7 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI); const TargetRegisterClass *SrcRC = - TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, *MRI); + TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank); if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) return false; @@ -630,7 +652,7 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC( MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI); if (Src1Def && Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) { MI.setDesc(TII.get(AMDGPU::COPY)); - MI.RemoveOperand(2); + MI.removeOperand(2); return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI) && RBI.constrainGenericRegister(Src0, AMDGPU::SReg_32RegClass, *MRI); } @@ -643,6 +665,8 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC( // // (build_vector_trunc (lshr_oneuse $src0, 16), (lshr_oneuse $src1, 16) // => (S_PACK_HH_B32_B16 $src0, $src1) + // (build_vector_trunc (lshr_oneuse SReg_32:$src0, 16), $src1) + // => (S_PACK_HL_B32_B16 $src0, $src1) // (build_vector_trunc $src0, (lshr_oneuse SReg_32:$src1, 16)) // => (S_PACK_LH_B32_B16 $src0, $src1) // (build_vector_trunc $src0, $src1) @@ -662,14 +686,20 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC( } else if (Shift1) { Opc = AMDGPU::S_PACK_LH_B32_B16; MI.getOperand(2).setReg(ShiftSrc1); - } else if (Shift0 && ConstSrc1 && ConstSrc1->Value == 0) { - // build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16 - auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst) - .addReg(ShiftSrc0) - .addImm(16); + } else if (Shift0) { + if (ConstSrc1 && ConstSrc1->Value == 0) { + // build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16 + auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst) + .addReg(ShiftSrc0) + .addImm(16); - MI.eraseFromParent(); - return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); + MI.eraseFromParent(); + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); + } + if (STI.hasSPackHL()) { + Opc = AMDGPU::S_PACK_HL_B32_B16; + MI.getOperand(1).setReg(ShiftSrc0); + } } MI.setDesc(TII.get(Opc)); @@ -722,16 +752,16 @@ bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); const TargetRegisterClass *DstRC = - TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI); + TRI.getRegClassForSizeOnBank(DstSize, *DstBank); if (!DstRC) return false; const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI); const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI); const TargetRegisterClass *Src0RC = - TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank, *MRI); + TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank); const TargetRegisterClass *Src1RC = - TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank, *MRI); + TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank); // Deal with weird cases where the class only partially supports the subreg // index. @@ -970,6 +1000,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { return selectGroupStaticSize(I); case Intrinsic::returnaddress: return selectReturnAddress(I); + case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16: + case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16: + case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16: + case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16: + case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8: + case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: + return selectSMFMACIntrin(I); default: return selectImpl(I, *CoverageInfo); } @@ -1142,7 +1179,7 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const { Optional Arg = getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI); - if (Arg.hasValue()) { + if (Arg) { const int64_t Value = Arg.getValue().Value.getSExtValue(); if (Value == 0) { unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; @@ -1164,8 +1201,7 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const { Register DstReg = I.getOperand(0).getReg(); const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); - const TargetRegisterClass *DstRC = - TRI.getRegClassForSizeOnBank(32, *DstBank, *MRI); + const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank); if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) return false; @@ -1300,12 +1336,14 @@ bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic( unsigned ShaderType = SIInstrInfo::getDSShaderTypeValue(*MF); unsigned Offset0 = OrderedCountIndex << 2; - unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) | - (Instruction << 4); + unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4); if (STI.getGeneration() >= AMDGPUSubtarget::GFX10) Offset1 |= (CountDw - 1) << 6; + if (STI.getGeneration() < AMDGPUSubtarget::GFX11) + Offset1 |= ShaderType << 2; + unsigned Offset = Offset0 | (Offset1 << 8); Register M0Val = MI.getOperand(2).getReg(); @@ -1424,23 +1462,7 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI, if (HasVSrc) { Register VSrc = MI.getOperand(1).getReg(); - - if (STI.needsAlignedVGPRs()) { - // Add implicit aligned super-reg to force alignment on the data operand. - Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); - Register NewVR = - MRI->createVirtualRegister(&AMDGPU::VReg_64_Align2RegClass); - BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), NewVR) - .addReg(VSrc, 0, MI.getOperand(1).getSubReg()) - .addImm(AMDGPU::sub0) - .addReg(Undef) - .addImm(AMDGPU::sub1); - MIB.addReg(NewVR, 0, AMDGPU::sub0); - MIB.addReg(NewVR, RegState::Implicit); - } else { - MIB.addReg(VSrc); - } + MIB.addReg(VSrc); if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI)) return false; @@ -1449,6 +1471,8 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI, MIB.addImm(ImmOffset) .cloneMemRefs(MI); + TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::data0); + MI.eraseFromParent(); return true; } @@ -1523,6 +1547,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim); unsigned IntrOpcode = Intr->BaseOpcode; const bool IsGFX10Plus = AMDGPU::isGFX10Plus(STI); + const bool IsGFX11Plus = AMDGPU::isGFX11Plus(STI); const unsigned ArgOffset = MI.getNumExplicitDefs() + 1; @@ -1627,7 +1652,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( } // The legalizer preprocessed the intrinsic arguments. If we aren't using - // NSA, these should have beeen packed into a single value in the first + // NSA, these should have been packed into a single value in the first // address register const bool UseNSA = NumVAddrRegs != 1 && NumVAddrDwords == NumVAddrRegs; if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) { @@ -1639,13 +1664,29 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( ++NumVDataDwords; int Opcode = -1; - if (IsGFX10Plus) { + if (IsGFX11Plus) { + Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, + UseNSA ? AMDGPU::MIMGEncGfx11NSA + : AMDGPU::MIMGEncGfx11Default, + NumVDataDwords, NumVAddrDwords); + } else if (IsGFX10Plus) { Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, UseNSA ? AMDGPU::MIMGEncGfx10NSA : AMDGPU::MIMGEncGfx10Default, NumVDataDwords, NumVAddrDwords); } else { - if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (Subtarget->hasGFX90AInsts()) { + Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx90a, + NumVDataDwords, NumVAddrDwords); + if (Opcode == -1) { + LLVM_DEBUG( + dbgs() + << "requested image instruction is not supported on this GPU\n"); + return false; + } + } + if (Opcode == -1 && + STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8, NumVDataDwords, NumVAddrDwords); if (Opcode == -1) @@ -1703,7 +1744,13 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( if (IsGFX10Plus) MIB.addImm(IsA16 ? -1 : 0); - MIB.addImm(TFE); // tfe + if (!Subtarget->hasGFX90AInsts()) { + MIB.addImm(TFE); // tfe + } else if (TFE) { + LLVM_DEBUG(dbgs() << "TFE is not supported on this GPU\n"); + return false; + } + MIB.addImm(LWE); // lwe if (!IsGFX10Plus) MIB.addImm(DimInfo->DA ? -1 : 0); @@ -1743,7 +1790,9 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( } MI.eraseFromParent(); - return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); + constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); + TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr); + return true; } bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( @@ -1770,10 +1819,22 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( return selectSBarrier(I); case Intrinsic::amdgcn_global_atomic_fadd: return selectGlobalAtomicFadd(I, I.getOperand(2), I.getOperand(3)); - default: { - return selectImpl(I, *CoverageInfo); - } + case Intrinsic::amdgcn_raw_buffer_load_lds: + case Intrinsic::amdgcn_struct_buffer_load_lds: + return selectBufferLoadLds(I); + case Intrinsic::amdgcn_global_load_lds: + return selectGlobalLoadLds(I); + case Intrinsic::amdgcn_exp_compr: + if (!STI.hasCompressedExport()) { + Function &F = I.getMF()->getFunction(); + DiagnosticInfoUnsupported NoFpRet( + F, "intrinsic not supported on subtarget", I.getDebugLoc(), DS_Error); + F.getContext().diagnose(NoFpRet); + return false; + } + break; } + return selectImpl(I, *CoverageInfo); } bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { @@ -1872,10 +1933,10 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { unsigned DstSize = DstTy.getSizeInBits(); unsigned SrcSize = SrcTy.getSizeInBits(); - const TargetRegisterClass *SrcRC - = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, *MRI); - const TargetRegisterClass *DstRC - = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, *MRI); + const TargetRegisterClass *SrcRC = + TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB); + const TargetRegisterClass *DstRC = + TRI.getRegClassForSizeOnBank(DstSize, *DstRB); if (!SrcRC || !DstRC) return false; @@ -2014,10 +2075,10 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { return selectCOPY(I); const TargetRegisterClass *SrcRC = - TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank, *MRI); + TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank); const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); const TargetRegisterClass *DstRC = - TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI); + TRI.getRegClassForSizeOnBank(DstSize, *DstBank); Register UndefReg = MRI->createVirtualRegister(SrcRC); BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); @@ -2384,65 +2445,6 @@ bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW( return selectImpl(I, *CoverageInfo); } -// TODO: No rtn optimization. -bool AMDGPUInstructionSelector::selectG_AMDGPU_ATOMIC_CMPXCHG( - MachineInstr &MI) const { - Register PtrReg = MI.getOperand(1).getReg(); - const LLT PtrTy = MRI->getType(PtrReg); - if (PtrTy.getAddressSpace() == AMDGPUAS::FLAT_ADDRESS || - STI.useFlatForGlobal()) - return selectImpl(MI, *CoverageInfo); - - Register DstReg = MI.getOperand(0).getReg(); - const LLT Ty = MRI->getType(DstReg); - const bool Is64 = Ty.getSizeInBits() == 64; - const unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0; - Register TmpReg = MRI->createVirtualRegister( - Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass); - - const DebugLoc &DL = MI.getDebugLoc(); - MachineBasicBlock *BB = MI.getParent(); - - Register VAddr, RSrcReg, SOffset; - int64_t Offset = 0; - - unsigned Opcode; - if (selectMUBUFOffsetImpl(MI.getOperand(1), RSrcReg, SOffset, Offset)) { - Opcode = Is64 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN : - AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN; - } else if (selectMUBUFAddr64Impl(MI.getOperand(1), VAddr, - RSrcReg, SOffset, Offset)) { - Opcode = Is64 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN : - AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN; - } else - return selectImpl(MI, *CoverageInfo); - - auto MIB = BuildMI(*BB, &MI, DL, TII.get(Opcode), TmpReg) - .addReg(MI.getOperand(2).getReg()); - - if (VAddr) - MIB.addReg(VAddr); - - MIB.addReg(RSrcReg); - if (SOffset) - MIB.addReg(SOffset); - else - MIB.addImm(0); - - MIB.addImm(Offset); - MIB.addImm(AMDGPU::CPol::GLC); - MIB.cloneMemRefs(MI); - - BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), DstReg) - .addReg(TmpReg, RegState::Kill, SubReg); - - MI.eraseFromParent(); - - MRI->setRegClass( - DstReg, Is64 ? &AMDGPU::VReg_64RegClass : &AMDGPU::VGPR_32RegClass); - return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); -} - static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI) { if (Reg.isPhysical()) return false; @@ -2551,7 +2553,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const { // Try to avoid emitting a bit operation when we only need to touch half of // the 64-bit pointer. - APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zextOrSelf(64); + APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zext(64); const APInt MaskHi32 = APInt::getHighBitsSet(64, 32); const APInt MaskLo32 = APInt::getLowBitsSet(64, 32); @@ -2571,12 +2573,10 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const { const TargetRegisterClass &RegRC = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass; - const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB, - *MRI); - const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB, - *MRI); + const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB); + const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB); const TargetRegisterClass *MaskRC = - TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB, *MRI); + TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB); if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) || @@ -2689,10 +2689,10 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT( if (IdxRB->getID() != AMDGPU::SGPRRegBankID) return false; - const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB, - *MRI); - const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(DstTy, *DstRB, - *MRI); + const TargetRegisterClass *SrcRC = + TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB); + const TargetRegisterClass *DstRC = + TRI.getRegClassForTypeOnBank(DstTy, *DstRB); if (!SrcRC || !DstRC) return false; if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) || @@ -2771,10 +2771,10 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT( if (IdxRB->getID() != AMDGPU::SGPRRegBankID) return false; - const TargetRegisterClass *VecRC = TRI.getRegClassForTypeOnBank(VecTy, *VecRB, - *MRI); - const TargetRegisterClass *ValRC = TRI.getRegClassForTypeOnBank(ValTy, *ValRB, - *MRI); + const TargetRegisterClass *VecRC = + TRI.getRegClassForTypeOnBank(VecTy, *VecRB); + const TargetRegisterClass *ValRC = + TRI.getRegClassForTypeOnBank(ValTy, *ValRB); if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) || !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) || @@ -2867,7 +2867,6 @@ bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR( return false; assert(ShufMask.size() == 2); - assert(STI.hasSDWA() && "no target has VOP3P but not SDWA"); MachineBasicBlock *MBB = MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); @@ -2924,17 +2923,28 @@ bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR( } } else if (Mask[0] == 0 && Mask[1] == 0) { if (IsVALU) { - // Write low half of the register into the high half. - MachineInstr *MovSDWA = - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg) - .addImm(0) // $src0_modifiers - .addReg(SrcVec) // $src0 - .addImm(0) // $clamp - .addImm(AMDGPU::SDWA::WORD_1) // $dst_sel - .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused - .addImm(AMDGPU::SDWA::WORD_0) // $src0_sel - .addReg(SrcVec, RegState::Implicit); - MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1); + if (STI.hasSDWA()) { + // Write low half of the register into the high half. + MachineInstr *MovSDWA = + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg) + .addImm(0) // $src0_modifiers + .addReg(SrcVec) // $src0 + .addImm(0) // $clamp + .addImm(AMDGPU::SDWA::WORD_1) // $dst_sel + .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused + .addImm(AMDGPU::SDWA::WORD_0) // $src0_sel + .addReg(SrcVec, RegState::Implicit); + MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1); + } else { + Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg) + .addImm(0xFFFF) + .addReg(SrcVec); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), DstReg) + .addReg(TmpReg) + .addImm(16) + .addReg(TmpReg); + } } else { BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg) .addReg(SrcVec) @@ -2942,17 +2952,28 @@ bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR( } } else if (Mask[0] == 1 && Mask[1] == 1) { if (IsVALU) { - // Write high half of the register into the low half. - MachineInstr *MovSDWA = - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg) - .addImm(0) // $src0_modifiers - .addReg(SrcVec) // $src0 - .addImm(0) // $clamp - .addImm(AMDGPU::SDWA::WORD_0) // $dst_sel - .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused - .addImm(AMDGPU::SDWA::WORD_1) // $src0_sel - .addReg(SrcVec, RegState::Implicit); - MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1); + if (STI.hasSDWA()) { + // Write high half of the register into the low half. + MachineInstr *MovSDWA = + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg) + .addImm(0) // $src0_modifiers + .addReg(SrcVec) // $src0 + .addImm(0) // $clamp + .addImm(AMDGPU::SDWA::WORD_0) // $dst_sel + .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused + .addImm(AMDGPU::SDWA::WORD_1) // $src0_sel + .addReg(SrcVec, RegState::Implicit); + MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1); + } else { + Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) + .addImm(16) + .addReg(SrcVec); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), DstReg) + .addReg(TmpReg) + .addImm(16) + .addReg(TmpReg); + } } else { BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_HH_B32_B16), DstReg) .addReg(SrcVec) @@ -2965,13 +2986,19 @@ bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR( .addReg(SrcVec) .addImm(16); } else { - Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg) - .addReg(SrcVec) - .addImm(16); - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg) - .addReg(TmpReg) - .addReg(SrcVec); + if (STI.hasSPackHL()) { + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_HL_B32_B16), DstReg) + .addReg(SrcVec) + .addReg(SrcVec); + } else { + Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg) + .addReg(SrcVec) + .addImm(16); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg) + .addReg(TmpReg) + .addReg(SrcVec); + } } } else llvm_unreachable("all shuffle masks should be handled"); @@ -2982,13 +3009,15 @@ bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR( bool AMDGPUInstructionSelector::selectAMDGPU_BUFFER_ATOMIC_FADD( MachineInstr &MI) const { - if (STI.hasGFX90AInsts()) + const Register DefReg = MI.getOperand(0).getReg(); + LLT DefTy = MRI->getType(DefReg); + if (AMDGPU::hasAtomicFaddRtnForTy(STI, DefTy)) return selectImpl(MI, *CoverageInfo); MachineBasicBlock *MBB = MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); - if (!MRI->use_nodbg_empty(MI.getOperand(0).getReg())) { + if (!MRI->use_nodbg_empty(DefReg)) { Function &F = MBB->getParent()->getFunction(); DiagnosticInfoUnsupported NoFpRet(F, "return versions of fp atomics not supported", @@ -3105,9 +3134,236 @@ bool AMDGPUInstructionSelector::selectGlobalAtomicFadd( return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); } +bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { + unsigned Opc; + unsigned Size = MI.getOperand(3).getImm(); + + // The struct intrinsic variants add one additional operand over raw. + const bool HasVIndex = MI.getNumOperands() == 9; + Register VIndex; + int OpOffset = 0; + if (HasVIndex) { + VIndex = MI.getOperand(4).getReg(); + OpOffset = 1; + } + + Register VOffset = MI.getOperand(4 + OpOffset).getReg(); + Optional MaybeVOffset = + getIConstantVRegValWithLookThrough(VOffset, *MRI); + const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue(); + + switch (Size) { + default: + return false; + case 1: + Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN + : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET; + break; + case 2: + Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN + : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET; + break; + case 4: + Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN + : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET; + break; + } + + MachineBasicBlock *MBB = MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) + .add(MI.getOperand(2)); + + auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc)); + + if (HasVIndex && HasVOffset) { + Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class()); + BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg) + .addReg(VIndex) + .addImm(AMDGPU::sub0) + .addReg(VOffset) + .addImm(AMDGPU::sub1); + + MIB.addReg(IdxReg); + } else if (HasVIndex) { + MIB.addReg(VIndex); + } else if (HasVOffset) { + MIB.addReg(VOffset); + } + + MIB.add(MI.getOperand(1)); // rsrc + MIB.add(MI.getOperand(5 + OpOffset)); // soffset + MIB.add(MI.getOperand(6 + OpOffset)); // imm offset + unsigned Aux = MI.getOperand(7 + OpOffset).getImm(); + MIB.addImm(Aux & AMDGPU::CPol::ALL); // cpol + MIB.addImm((Aux >> 3) & 1); // swz + + MachineMemOperand *LoadMMO = *MI.memoperands_begin(); + MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo(); + LoadPtrI.Offset = MI.getOperand(6 + OpOffset).getImm(); + MachinePointerInfo StorePtrI = LoadPtrI; + StorePtrI.V = nullptr; + StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS; + + auto F = LoadMMO->getFlags() & + ~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad); + LoadMMO = MF->getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad, + Size, LoadMMO->getBaseAlign()); + + MachineMemOperand *StoreMMO = + MF->getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore, + sizeof(int32_t), LoadMMO->getBaseAlign()); + + MIB.setMemRefs({LoadMMO, StoreMMO}); + + MI.eraseFromParent(); + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); +} + +/// Match a zero extend from a 32-bit value to 64-bits. +static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) { + Register ZExtSrc; + if (mi_match(Reg, MRI, m_GZExt(m_Reg(ZExtSrc)))) + return MRI.getType(ZExtSrc) == LLT::scalar(32) ? ZExtSrc : Register(); + + // Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0) + const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); + if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES) + return false; + + if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) { + return Def->getOperand(1).getReg(); + } + + return Register(); +} + +bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{ + unsigned Opc; + unsigned Size = MI.getOperand(3).getImm(); + + switch (Size) { + default: + return false; + case 1: + Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE; + break; + case 2: + Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT; + break; + case 4: + Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD; + break; + } + + MachineBasicBlock *MBB = MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) + .add(MI.getOperand(2)); + + Register Addr = MI.getOperand(1).getReg(); + Register VOffset; + // Try to split SAddr and VOffset. Global and LDS pointers share the same + // immediate offset, so we cannot use a regular SelectGlobalSAddr(). + if (!isSGPR(Addr)) { + auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI); + if (isSGPR(AddrDef->Reg)) { + Addr = AddrDef->Reg; + } else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) { + Register SAddr = + getSrcRegIgnoringCopies(AddrDef->MI->getOperand(1).getReg(), *MRI); + if (SAddr && isSGPR(SAddr)) { + Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg(); + if (Register Off = matchZeroExtendFromS32(*MRI, PtrBaseOffset)) { + Addr = SAddr; + VOffset = Off; + } + } + } + } + + if (isSGPR(Addr)) { + Opc = AMDGPU::getGlobalSaddrOp(Opc); + if (!VOffset) { + VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), VOffset) + .addImm(0); + } + } + + auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc)) + .addReg(Addr); + + if (isSGPR(Addr)) + MIB.addReg(VOffset); + + MIB.add(MI.getOperand(4)) // offset + .add(MI.getOperand(5)); // cpol + + MachineMemOperand *LoadMMO = *MI.memoperands_begin(); + MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo(); + LoadPtrI.Offset = MI.getOperand(4).getImm(); + MachinePointerInfo StorePtrI = LoadPtrI; + LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS; + StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS; + auto F = LoadMMO->getFlags() & + ~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad); + LoadMMO = MF->getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad, + Size, LoadMMO->getBaseAlign()); + MachineMemOperand *StoreMMO = + MF->getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore, + sizeof(int32_t), Align(4)); + + MIB.setMemRefs({LoadMMO, StoreMMO}); + + MI.eraseFromParent(); + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); +} + bool AMDGPUInstructionSelector::selectBVHIntrinsic(MachineInstr &MI) const{ MI.setDesc(TII.get(MI.getOperand(1).getImm())); - MI.RemoveOperand(1); + MI.removeOperand(1); + MI.addImplicitDefUseOperands(*MI.getParent()->getParent()); + return true; +} + +bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const { + unsigned Opc; + switch (MI.getIntrinsicID()) { + case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16: + Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16: + Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16: + Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16: + Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64; + break; + case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8: + Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64; + break; + case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: + Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64; + break; + default: + llvm_unreachable("unhandled smfmac intrinsic"); + } + + auto VDst_In = MI.getOperand(4); + + MI.setDesc(TII.get(Opc)); + MI.removeOperand(4); // VDst_In + MI.removeOperand(1); // Intrinsic ID + MI.addOperand(VDst_In); // Readd VDst_In to the end MI.addImplicitDefUseOperands(*MI.getParent()->getParent()); return true; } @@ -3166,6 +3422,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_UADDE: case TargetOpcode::G_USUBE: return selectG_UADDO_USUBO_UADDE_USUBE(I); + case AMDGPU::G_AMDGPU_MAD_U64_U32: + case AMDGPU::G_AMDGPU_MAD_I64_I32: + return selectG_AMDGPU_MAD_64_32(I); case TargetOpcode::G_INTTOPTR: case TargetOpcode::G_BITCAST: case TargetOpcode::G_PTRTOINT: @@ -3226,8 +3485,6 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { case AMDGPU::G_AMDGPU_ATOMIC_FMIN: case AMDGPU::G_AMDGPU_ATOMIC_FMAX: return selectG_LOAD_STORE_ATOMICRMW(I); - case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: - return selectG_AMDGPU_ATOMIC_CMPXCHG(I); case TargetOpcode::G_SELECT: return selectG_SELECT(I); case TargetOpcode::G_TRUNC: @@ -3286,9 +3543,8 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { } -std::pair -AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root, - bool AllowAbs) const { +std::pair AMDGPUInstructionSelector::selectVOP3ModsImpl( + MachineOperand &Root, bool AllowAbs, bool OpSel, bool ForceVGPR) const { Register Src = Root.getReg(); Register OrigSrc = Src; unsigned Mods = 0; @@ -3305,7 +3561,10 @@ AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root, Mods |= SISrcMods::ABS; } - if (Mods != 0 && + if (OpSel) + Mods |= SISrcMods::OP_SEL_0; + + if ((Mods != 0 || ForceVGPR) && RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) { MachineInstr *UseMI = Root.getParent(); @@ -3407,7 +3666,7 @@ AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const { std::pair AMDGPUInstructionSelector::selectVOP3PModsImpl( - Register Src, const MachineRegisterInfo &MRI) const { + Register Src, const MachineRegisterInfo &MRI, bool IsDOT) const { unsigned Mods = 0; MachineInstr *MI = MRI.getVRegDef(Src); @@ -3421,6 +3680,7 @@ AMDGPUInstructionSelector::selectVOP3PModsImpl( } // TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector. + (void)IsDOT; // DOTs do not use OPSEL on gfx940+, check ST.hasDOTOpSelHazard() // Packed instructions do not have abs modifiers. Mods |= SISrcMods::OP_SEL_1; @@ -3443,6 +3703,50 @@ AMDGPUInstructionSelector::selectVOP3PMods(MachineOperand &Root) const { }}; } +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const { + MachineRegisterInfo &MRI + = Root.getParent()->getParent()->getParent()->getRegInfo(); + + Register Src; + unsigned Mods; + std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI, true); + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods + }}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectDotIUVOP3PMods(MachineOperand &Root) const { + // Literal i1 value set in intrinsic, represents SrcMods for the next operand. + // Value is in Imm operand as i1 sign extended to int64_t. + // 1(-1) promotes packed values to signed, 0 treats them as unsigned. + assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) && + "expected i1 value"); + unsigned Mods = SISrcMods::OP_SEL_1; + if (Root.getImm() == -1) + Mods ^= SISrcMods::NEG; + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods + }}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods( + MachineOperand &Root) const { + assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) && + "expected i1 value"); + unsigned Mods = SISrcMods::OP_SEL_1; + if (Root.getImm() != 0) + Mods |= SISrcMods::OP_SEL_0; + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods + }}; +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const { Register Src; @@ -3466,6 +3770,36 @@ AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const { }}; } +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const { + Register Src; + unsigned Mods; + std::tie(Src, Mods) = selectVOP3ModsImpl(Root, + /* AllowAbs */ false, + /* OpSel */ false, + /* ForceVGPR */ true); + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods + }}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const { + Register Src; + unsigned Mods; + std::tie(Src, Mods) = selectVOP3ModsImpl(Root, + /* AllowAbs */ false, + /* OpSel */ true, + /* ForceVGPR */ true); + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods + }}; +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { SmallVector AddrInfo; @@ -3594,24 +3928,6 @@ AMDGPUInstructionSelector::selectScratchOffset(MachineOperand &Root) const { }}; } -/// Match a zero extend from a 32-bit value to 64-bits. -static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) { - Register ZExtSrc; - if (mi_match(Reg, MRI, m_GZExt(m_Reg(ZExtSrc)))) - return MRI.getType(ZExtSrc) == LLT::scalar(32) ? ZExtSrc : Register(); - - // Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0) - const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); - if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES) - return false; - - if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) { - return Def->getOperand(1).getReg(); - } - - return Register(); -} - // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset) InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const { @@ -3631,9 +3947,6 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const { ImmOffset = ConstOffset; } else { auto PtrBaseDef = getDefSrcRegIgnoringCopies(PtrBase, *MRI); - if (!PtrBaseDef) - return None; - if (isSGPR(PtrBaseDef->Reg)) { if (ConstOffset > 0) { // Offset is too large. @@ -3679,11 +3992,8 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const { } } - auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI); - if (!AddrDef) - return None; - // Match the variable offset. + auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI); if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) { // Look through the SGPR->VGPR copy. Register SAddr = @@ -3749,9 +4059,6 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const { } auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI); - if (!AddrDef) - return None; - if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) { int FI = AddrDef->MI->getOperand(1).getIndex(); return {{ @@ -3768,8 +4075,7 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const { auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI); auto RHSDef = getDefSrcRegIgnoringCopies(RHS, *MRI); - if (LHSDef && RHSDef && - LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX && + if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX && isSGPR(RHSDef->Reg)) { int FI = LHSDef->MI->getOperand(1).getIndex(); MachineInstr &I = *Root.getParent(); @@ -3792,6 +4098,74 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const { }}; } +// Check whether the flat scratch SVS swizzle bug affects this access. +bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug( + Register VAddr, Register SAddr, uint64_t ImmOffset) const { + if (!Subtarget->hasFlatScratchSVSSwizzleBug()) + return false; + + // The bug affects the swizzling of SVS accesses if there is any carry out + // from the two low order bits (i.e. from bit 1 into bit 2) when adding + // voffset to (soffset + inst_offset). + auto VKnown = KnownBits->getKnownBits(VAddr); + auto SKnown = KnownBits::computeForAddSub( + true, false, KnownBits->getKnownBits(SAddr), + KnownBits::makeConstant(APInt(32, ImmOffset))); + uint64_t VMax = VKnown.getMaxValue().getZExtValue(); + uint64_t SMax = SKnown.getMaxValue().getZExtValue(); + return (VMax & 3) + (SMax & 3) >= 4; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const { + Register Addr = Root.getReg(); + Register PtrBase; + int64_t ConstOffset; + int64_t ImmOffset = 0; + + // Match the immediate offset first, which canonically is moved as low as + // possible. + std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI); + + if (ConstOffset != 0 && + TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, true)) { + Addr = PtrBase; + ImmOffset = ConstOffset; + } + + auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI); + if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD) + return None; + + Register RHS = AddrDef->MI->getOperand(2).getReg(); + if (RBI.getRegBank(RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) + return None; + + Register LHS = AddrDef->MI->getOperand(1).getReg(); + auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI); + + if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset)) + return None; + + if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) { + int FI = LHSDef->MI->getOperand(1).getIndex(); + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr + [=](MachineInstrBuilder &MIB) { MIB.addFrameIndex(FI); }, // saddr + [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset + }}; + } + + if (!isSGPR(LHS)) + return None; + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr + [=](MachineInstrBuilder &MIB) { MIB.addReg(LHS); }, // saddr + [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset + }}; +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { MachineInstr *MI = Root.getParent(); @@ -3856,7 +4230,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { MIB.addReg(Info->getScratchRSrcReg()); }, [=](MachineInstrBuilder &MIB) { // vaddr - if (FI.hasValue()) + if (FI) MIB.addFrameIndex(FI.getValue()); else MIB.addReg(VAddr); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 42095332d11a..22672ba59e76 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -97,6 +97,7 @@ private: bool selectG_AND_OR_XOR(MachineInstr &I) const; bool selectG_ADD_SUB(MachineInstr &I) const; bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const; + bool selectG_AMDGPU_MAD_64_32(MachineInstr &I) const; bool selectG_EXTRACT(MachineInstr &I) const; bool selectG_MERGE_VALUES(MachineInstr &I) const; bool selectG_UNMERGE_VALUES(MachineInstr &I) const; @@ -133,7 +134,6 @@ private: void initM0(MachineInstr &I) const; bool selectG_LOAD_STORE_ATOMICRMW(MachineInstr &I) const; - bool selectG_AMDGPU_ATOMIC_CMPXCHG(MachineInstr &I) const; bool selectG_SELECT(MachineInstr &I) const; bool selectG_BRCOND(MachineInstr &I) const; bool selectG_GLOBAL_VALUE(MachineInstr &I) const; @@ -144,11 +144,15 @@ private: bool selectAMDGPU_BUFFER_ATOMIC_FADD(MachineInstr &I) const; bool selectGlobalAtomicFadd(MachineInstr &I, MachineOperand &AddrOp, MachineOperand &DataOp) const; + bool selectBufferLoadLds(MachineInstr &MI) const; + bool selectGlobalLoadLds(MachineInstr &MI) const; bool selectBVHIntrinsic(MachineInstr &I) const; + bool selectSMFMACIntrin(MachineInstr &I) const; bool selectWaveAddress(MachineInstr &I) const; - std::pair selectVOP3ModsImpl(MachineOperand &Root, - bool AllowAbs = true) const; + std::pair + selectVOP3ModsImpl(MachineOperand &Root, bool AllowAbs = true, + bool OpSel = false, bool ForceVGPR = false) const; InstructionSelector::ComplexRendererFns selectVCSRC(MachineOperand &Root) const; @@ -173,14 +177,29 @@ private: selectVOP3Mods_nnan(MachineOperand &Root) const; std::pair - selectVOP3PModsImpl(Register Src, const MachineRegisterInfo &MRI) const; + selectVOP3PModsImpl(Register Src, const MachineRegisterInfo &MRI, + bool IsDOT = false) const; InstructionSelector::ComplexRendererFns selectVOP3PMods(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectVOP3PModsDOT(MachineOperand &Root) const; + + InstructionSelector::ComplexRendererFns + selectDotIUVOP3PMods(MachineOperand &Root) const; + + InstructionSelector::ComplexRendererFns + selectWMMAOpSelVOP3PMods(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns selectVOP3OpSelMods(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectVINTERPMods(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectVINTERPModsHi(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns selectSmrdImm(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns @@ -203,6 +222,10 @@ private: InstructionSelector::ComplexRendererFns selectScratchSAddr(MachineOperand &Root) const; + bool checkFlatScratchSVSSwizzleBug(Register VAddr, Register SAddr, + uint64_t ImmOffset) const; + InstructionSelector::ComplexRendererFns + selectScratchSVAddr(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectMUBUFScratchOffen(MachineOperand &Root) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 7d3dbfd7e851..31012915457b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -40,7 +40,7 @@ class AMDGPUInst SoftFail = 0; + field bits<96> SoftFail = 0; let DecoderNamespace = Namespace; @@ -87,6 +87,17 @@ class PredConcat lst, Predicate pred> { !listconcat([pred], !filter(item, lst, !ne(item, pred))); } +// Add a Register to the list if does not already exist +class RegAppend lst, Register reg> { + list ret = + !listconcat([reg], !filter(item, lst, !ne(item, reg))); +} +// Get the union of two Register lists +class RegListUnion lstA, list lstB> { + list ret = + !foldl(lstA, lstB, temp, item, RegAppend.ret); +} + class PredicateControl { Predicate SubtargetPredicate = TruePredicate; Predicate AssemblerPredicate = TruePredicate; @@ -444,34 +455,28 @@ def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { let IsNonExtLoad = 1; } -def extloadi8_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> { +def extloadi8_#as : PatFrag<(ops node:$ptr), (extloadi8 node:$ptr)> { let IsLoad = 1; - let MemoryVT = i8; } -def extloadi16_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> { +def extloadi16_#as : PatFrag<(ops node:$ptr), (extloadi16 node:$ptr)> { let IsLoad = 1; - let MemoryVT = i16; } -def sextloadi8_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { +def sextloadi8_#as : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr)> { let IsLoad = 1; - let MemoryVT = i8; } -def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { +def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr)> { let IsLoad = 1; - let MemoryVT = i16; } -def zextloadi8_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { +def zextloadi8_#as : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr)> { let IsLoad = 1; - let MemoryVT = i8; } -def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { +def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr)> { let IsLoad = 1; - let MemoryVT = i16; } def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> { @@ -498,17 +503,15 @@ def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> { foreach as = [ "global", "flat", "local", "private", "region" ] in { -let AddressSpaces = !cast("StoreAddress_"#as).AddrSpaces in { +let IsStore = 1, AddressSpaces = !cast("StoreAddress_"#as).AddrSpaces in { def store_#as : PatFrag<(ops node:$val, node:$ptr), (unindexedstore node:$val, node:$ptr)> { - let IsStore = 1; let IsTruncStore = 0; } // truncstore fragments. def truncstore_#as : PatFrag<(ops node:$val, node:$ptr), (unindexedstore node:$val, node:$ptr)> { - let IsStore = 1; let IsTruncStore = 1; } @@ -517,90 +520,133 @@ def truncstore_#as : PatFrag<(ops node:$val, node:$ptr), // unnecessary check that the memory size is less than the value type // in the generated matcher table. def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr)> { - let IsStore = 1; - let MemoryVT = i8; -} - + (truncstorei8 node:$val, node:$ptr)>; def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr)> { - let IsStore = 1; - let MemoryVT = i16; -} + (truncstorei16 node:$val, node:$ptr)>; def store_hi16_#as : StoreHi16 ; def truncstorei8_hi16_#as : StoreHi16; def truncstorei16_hi16_#as : StoreHi16; -defm atomic_store_#as : binary_atomic_op; +} // End let IsStore = 1, AddressSpaces = ... -} // End let AddressSpaces +let IsAtomic = 1, AddressSpaces = !cast("StoreAddress_"#as).AddrSpaces in { +def atomic_store_8_#as : PatFrag<(ops node:$ptr, node:$val), + (atomic_store_8 node:$ptr, node:$val)>; +def atomic_store_16_#as : PatFrag<(ops node:$ptr, node:$val), + (atomic_store_16 node:$ptr, node:$val)>; +def atomic_store_32_#as : PatFrag<(ops node:$ptr, node:$val), + (atomic_store_32 node:$ptr, node:$val)>; +def atomic_store_64_#as : PatFrag<(ops node:$ptr, node:$val), + (atomic_store_64 node:$ptr, node:$val)>; +} } // End foreach as +// TODO: Add GISelPredicateCode for the ret and noret PatFrags once +// GlobalISelEmitter allows pattern matches where src and dst def count +// mismatch. + +multiclass ret_noret_op { + let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }], + GISelPredicateCode = [{ return true; }] in { + def "_ret" : PatFrag<(ops node:$ptr, node:$data), + (!cast(NAME) node:$ptr, node:$data)>; + } + + let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }], + GISelPredicateCode = [{ return false; }] in { + def "_noret" : PatFrag<(ops node:$ptr, node:$data), + (!cast(NAME) node:$ptr, node:$data)>; + } +} + +defm int_amdgcn_flat_atomic_fadd : ret_noret_op; +defm int_amdgcn_flat_atomic_fadd_v2bf16 : ret_noret_op; +defm int_amdgcn_flat_atomic_fmin : ret_noret_op; +defm int_amdgcn_flat_atomic_fmax : ret_noret_op; +defm int_amdgcn_global_atomic_fadd : ret_noret_op; +defm int_amdgcn_global_atomic_fadd_v2bf16 : ret_noret_op; +defm int_amdgcn_global_atomic_fmin : ret_noret_op; +defm int_amdgcn_global_atomic_fmax : ret_noret_op; +defm int_amdgcn_ds_fadd_v2bf16 : ret_noret_op; multiclass ret_noret_binary_atomic_op { + let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }], + GISelPredicateCode = [{ return false; }] in { + defm "_noret" : binary_atomic_op; + } + + let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }], + GISelPredicateCode = [{ return true; }] in { + defm "_ret" : binary_atomic_op; + } +} + +multiclass ret_noret_ternary_atomic_op { + let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }], + GISelPredicateCode = [{ return false; }] in { + defm "_noret" : ternary_atomic_op; + } + + let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }], + GISelPredicateCode = [{ return true; }] in { + defm "_ret" : ternary_atomic_op; + } +} + +multiclass binary_atomic_op_all_as { foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { let AddressSpaces = !cast("LoadAddress_"#as).AddrSpaces in { defm "_"#as : binary_atomic_op; - - let PredicateCode = [{return (SDValue(N, 0).use_empty());}] in { - defm "_"#as#"_noret" : binary_atomic_op; - } - - let PredicateCode = [{return !(SDValue(N, 0).use_empty());}] in { - defm "_"#as#"_ret" : binary_atomic_op; - } + defm "_"#as : ret_noret_binary_atomic_op; } } } -defm atomic_swap : ret_noret_binary_atomic_op; -defm atomic_load_add : ret_noret_binary_atomic_op; -defm atomic_load_and : ret_noret_binary_atomic_op; -defm atomic_load_max : ret_noret_binary_atomic_op; -defm atomic_load_min : ret_noret_binary_atomic_op; -defm atomic_load_or : ret_noret_binary_atomic_op; -defm atomic_load_sub : ret_noret_binary_atomic_op; -defm atomic_load_umax : ret_noret_binary_atomic_op; -defm atomic_load_umin : ret_noret_binary_atomic_op; -defm atomic_load_xor : ret_noret_binary_atomic_op; -defm atomic_load_fadd : ret_noret_binary_atomic_op; +defm atomic_swap : binary_atomic_op_all_as; +defm atomic_load_add : binary_atomic_op_all_as; +defm atomic_load_and : binary_atomic_op_all_as; +defm atomic_load_max : binary_atomic_op_all_as; +defm atomic_load_min : binary_atomic_op_all_as; +defm atomic_load_or : binary_atomic_op_all_as; +defm atomic_load_sub : binary_atomic_op_all_as; +defm atomic_load_umax : binary_atomic_op_all_as; +defm atomic_load_umin : binary_atomic_op_all_as; +defm atomic_load_xor : binary_atomic_op_all_as; +defm atomic_load_fadd : binary_atomic_op_all_as; let MemoryVT = v2f16 in -defm atomic_load_fadd_v2f16 : ret_noret_binary_atomic_op; -defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op; +defm atomic_load_fadd_v2f16 : binary_atomic_op_all_as; +defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as; def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>, - Aligned<8> { + Aligned<8> { let IsLoad = 1; - let IsNonExtLoad = 1; } def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>, Aligned<16> { let IsLoad = 1; - let IsNonExtLoad = 1; } def store_align8_local: PatFrag<(ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)>, Aligned<8> { let IsStore = 1; - let IsTruncStore = 0; } def store_align16_local: PatFrag<(ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)>, Aligned<16> { let IsStore = 1; - let IsTruncStore = 0; } let AddressSpaces = StoreAddress_local.AddrSpaces in { defm atomic_cmp_swap_local : ternary_atomic_op; -defm atomic_cmp_swap_local_m0 : ternary_atomic_op; +defm atomic_cmp_swap_local : ret_noret_ternary_atomic_op; +defm atomic_cmp_swap_local_m0 : ret_noret_ternary_atomic_op; } let AddressSpaces = StoreAddress_region.AddrSpaces in { -defm atomic_cmp_swap_region : ternary_atomic_op; -defm atomic_cmp_swap_region_m0 : ternary_atomic_op; +defm atomic_cmp_swap_region : ret_noret_ternary_atomic_op; +defm atomic_cmp_swap_region_m0 : ret_noret_ternary_atomic_op; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 645d05aa9238..01a3e78ea48c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/IntrinsicsR600.h" #define DEBUG_TYPE "amdgpu-legalinfo" @@ -134,7 +135,6 @@ static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) { static LLT getBitcastRegisterType(const LLT Ty) { const unsigned Size = Ty.getSizeInBits(); - LLT CoercedTy; if (Size <= 32) { // <2 x s8> -> s16 // <4 x s8> -> s32 @@ -530,13 +530,22 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.hasVOP3PInsts() && ST.hasAddNoCarry() && ST.hasIntClamp()) { // Full set of gfx9 features. - getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) + getActionDefinitionsBuilder({G_ADD, G_SUB}) .legalFor({S32, S16, V2S16}) + .clampMaxNumElementsStrict(0, S16, 2) + .scalarize(0) .minScalar(0, S16) + .widenScalarToNextMultipleOf(0, 32) + .maxScalar(0, S32); + + getActionDefinitionsBuilder(G_MUL) + .legalFor({S32, S16, V2S16}) .clampMaxNumElementsStrict(0, S16, 2) + .scalarize(0) + .minScalar(0, S16) .widenScalarToNextMultipleOf(0, 32) - .maxScalar(0, S32) - .scalarize(0); + .custom(); + assert(ST.hasMad64_32()); getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT}) .legalFor({S32, S16, V2S16}) // Clamp modifier @@ -546,13 +555,21 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .widenScalarToNextPow2(0, 32) .lower(); } else if (ST.has16BitInsts()) { - getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) + getActionDefinitionsBuilder({G_ADD, G_SUB}) .legalFor({S32, S16}) .minScalar(0, S16) .widenScalarToNextMultipleOf(0, 32) .maxScalar(0, S32) .scalarize(0); + getActionDefinitionsBuilder(G_MUL) + .legalFor({S32, S16}) + .scalarize(0) + .minScalar(0, S16) + .widenScalarToNextMultipleOf(0, 32) + .custom(); + assert(ST.hasMad64_32()); + // Technically the saturating operations require clamp bit support, but this // was introduced at the same time as 16-bit operations. getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT}) @@ -569,12 +586,23 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0) .lower(); } else { - getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) + getActionDefinitionsBuilder({G_ADD, G_SUB}) .legalFor({S32}) .widenScalarToNextMultipleOf(0, 32) .clampScalar(0, S32, S32) .scalarize(0); + auto &Mul = getActionDefinitionsBuilder(G_MUL) + .legalFor({S32}) + .scalarize(0) + .minScalar(0, S32) + .widenScalarToNextMultipleOf(0, 32); + + if (ST.hasMad64_32()) + Mul.custom(); + else + Mul.maxScalar(0, S32); + if (ST.hasIntClamp()) { getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT}) .legalFor({S32}) // Clamp modifier. @@ -632,7 +660,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, G_UADDE, G_SADDE, G_USUBE, G_SSUBE}) .legalFor({{S32, S1}, {S32, S32}}) .minScalar(0, S32) - // TODO: .scalarize(0) + .scalarize(0) .lower(); getActionDefinitionsBuilder(G_BITCAST) @@ -767,13 +795,24 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .narrowScalarFor({{S64, S16}}, changeTo(0, S32)) .scalarize(0); - getActionDefinitionsBuilder(G_FSUB) + auto &FSubActions = getActionDefinitionsBuilder(G_FSUB); + if (ST.has16BitInsts()) { + FSubActions + // Use actual fsub instruction + .legalFor({S32, S16}) + // Must use fadd + fneg + .lowerFor({S64, V2S16}); + } else { + FSubActions // Use actual fsub instruction .legalFor({S32}) // Must use fadd + fneg - .lowerFor({S64, S16, V2S16}) - .scalarize(0) - .clampScalar(0, S32, S64); + .lowerFor({S64, S16, V2S16}); + } + + FSubActions + .scalarize(0) + .clampScalar(0, S32, S64); // Whether this is legal depends on the floating point mode for the function. auto &FMad = getActionDefinitionsBuilder(G_FMAD); @@ -839,6 +878,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0) .lower(); + getActionDefinitionsBuilder(G_INTRINSIC_FPTRUNC_ROUND) + .customFor({S16, S32}) + .scalarize(0) + .lower(); + // Lower roundeven into G_FRINT getActionDefinitionsBuilder({G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN}) .scalarize(0) @@ -1292,6 +1336,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, Atomic.legalFor({{S32, LocalPtr}, {S32, RegionPtr}}); if (ST.hasGFX90AInsts()) Atomic.legalFor({{S64, LocalPtr}}); + if (ST.hasGFX940Insts()) + Atomic.legalFor({{V2S16, LocalPtr}}); } if (ST.hasAtomicFaddInsts()) Atomic.legalFor({{S32, GlobalPtr}}); @@ -1505,7 +1551,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .clampMaxNumElements(1, S16, 2) // TODO: Make 4? .clampMaxNumElements(0, S16, 64); - // TODO: Don't fully scalarize v2s16 pieces? Or combine out thosse + // TODO: Don't fully scalarize v2s16 pieces? Or combine out those // pre-legalize. if (ST.hasVOP3PInsts()) { getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) @@ -1756,9 +1802,13 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper, return legalizeFFloor(MI, MRI, B); case TargetOpcode::G_BUILD_VECTOR: return legalizeBuildVector(MI, MRI, B); + case TargetOpcode::G_MUL: + return legalizeMul(Helper, MI); case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTTZ: return legalizeCTLZ_CTTZ(MI, MRI, B); + case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND: + return legalizeFPTruncRound(MI, B); default: return false; } @@ -1801,6 +1851,39 @@ Register AMDGPULegalizerInfo::getSegmentAperture( return B.buildShl(S32, GetReg, ShiftAmt).getReg(0); } + // TODO: can we be smarter about machine pointer info? + MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); + Register LoadAddr = MRI.createGenericVirtualRegister( + LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); + // For code object version 5, private_base and shared_base are passed through + // implicit kernargs. + if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) { + AMDGPUTargetLowering::ImplicitParameter Param = + AS == AMDGPUAS::LOCAL_ADDRESS ? AMDGPUTargetLowering::SHARED_BASE + : AMDGPUTargetLowering::PRIVATE_BASE; + uint64_t Offset = + ST.getTargetLowering()->getImplicitParameterOffset(B.getMF(), Param); + + Register KernargPtrReg = MRI.createGenericVirtualRegister( + LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); + + if (!loadInputValue(KernargPtrReg, B, + AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR)) + return Register(); + + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, + MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant, + LLT::scalar(32), commonAlignment(Align(64), Offset)); + + // Pointer address + B.buildPtrAdd(LoadAddr, KernargPtrReg, + B.buildConstant(LLT::scalar(64), Offset).getReg(0)); + // Load address + return B.buildLoad(S32, LoadAddr, *MMO).getReg(0); + } + Register QueuePtr = MRI.createGenericVirtualRegister( LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); @@ -1811,17 +1894,14 @@ Register AMDGPULegalizerInfo::getSegmentAperture( // private_segment_aperture_base_hi. uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44; - // TODO: can we be smarter about machine pointer info? - MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); MachineMemOperand *MMO = MF.getMachineMemOperand( PtrInfo, MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant, LLT::scalar(32), commonAlignment(Align(64), StructOffset)); - Register LoadAddr; - - B.materializePtrAdd(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset); + B.buildPtrAdd(LoadAddr, QueuePtr, + B.buildConstant(LLT::scalar(64), StructOffset).getReg(0)); return B.buildLoad(S32, LoadAddr, *MMO).getReg(0); } @@ -1872,31 +1952,9 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( return true; } - if (DestAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { - // Truncate. - B.buildExtract(Dst, Src, 0); - MI.eraseFromParent(); - return true; - } - - if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { - const SIMachineFunctionInfo *Info = MF.getInfo(); - uint32_t AddrHiVal = Info->get32BitAddressHighBits(); - - // FIXME: This is a bit ugly due to creating a merge of 2 pointers to - // another. Merge operands are required to be the same type, but creating an - // extra ptrtoint would be kind of pointless. - auto HighAddr = B.buildConstant( - LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS_32BIT, 32), AddrHiVal); - B.buildMerge(Dst, {Src, HighAddr}); - MI.eraseFromParent(); - return true; - } - - if (SrcAS == AMDGPUAS::FLAT_ADDRESS) { - assert(DestAS == AMDGPUAS::LOCAL_ADDRESS || - DestAS == AMDGPUAS::PRIVATE_ADDRESS); - + if (SrcAS == AMDGPUAS::FLAT_ADDRESS && + (DestAS == AMDGPUAS::LOCAL_ADDRESS || + DestAS == AMDGPUAS::PRIVATE_ADDRESS)) { if (isKnownNonNull(Src, MRI, TM, SrcAS)) { // Extract low 32-bits of the pointer. B.buildExtract(Dst, Src, 0); @@ -1920,37 +1978,70 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( return true; } - if (SrcAS != AMDGPUAS::LOCAL_ADDRESS && SrcAS != AMDGPUAS::PRIVATE_ADDRESS) - return false; + if (DestAS == AMDGPUAS::FLAT_ADDRESS && + (SrcAS == AMDGPUAS::LOCAL_ADDRESS || + SrcAS == AMDGPUAS::PRIVATE_ADDRESS)) { + if (!ST.hasFlatAddressSpace()) + return false; - if (!ST.hasFlatAddressSpace()) - return false; + Register ApertureReg = getSegmentAperture(SrcAS, MRI, B); + if (!ApertureReg.isValid()) + return false; - Register ApertureReg = getSegmentAperture(SrcAS, MRI, B); - if (!ApertureReg.isValid()) - return false; + // Coerce the type of the low half of the result so we can use merge_values. + Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0); + + // TODO: Should we allow mismatched types but matching sizes in merges to + // avoid the ptrtoint? + auto BuildPtr = B.buildMerge(DstTy, {SrcAsInt, ApertureReg}); + + if (isKnownNonNull(Src, MRI, TM, SrcAS)) { + B.buildCopy(Dst, BuildPtr); + MI.eraseFromParent(); + return true; + } + + auto SegmentNull = B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS)); + auto FlatNull = B.buildConstant(DstTy, TM.getNullPointerValue(DestAS)); - // Coerce the type of the low half of the result so we can use merge_values. - Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0); + auto CmpRes = B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src, + SegmentNull.getReg(0)); - // TODO: Should we allow mismatched types but matching sizes in merges to - // avoid the ptrtoint? - auto BuildPtr = B.buildMerge(DstTy, {SrcAsInt, ApertureReg}); + B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull); + + MI.eraseFromParent(); + return true; + } - if (isKnownNonNull(Src, MRI, TM, SrcAS)) { - B.buildCopy(Dst, BuildPtr); + if (DestAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT && + SrcTy.getSizeInBits() == 64) { + // Truncate. + B.buildExtract(Dst, Src, 0); MI.eraseFromParent(); return true; } - auto SegmentNull = B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS)); - auto FlatNull = B.buildConstant(DstTy, TM.getNullPointerValue(DestAS)); + if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT && + DstTy.getSizeInBits() == 64) { + const SIMachineFunctionInfo *Info = MF.getInfo(); + uint32_t AddrHiVal = Info->get32BitAddressHighBits(); - auto CmpRes = - B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src, SegmentNull.getReg(0)); + // FIXME: This is a bit ugly due to creating a merge of 2 pointers to + // another. Merge operands are required to be the same type, but creating an + // extra ptrtoint would be kind of pointless. + auto HighAddr = B.buildConstant( + LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS_32BIT, 32), AddrHiVal); + B.buildMerge(Dst, {Src, HighAddr}); + MI.eraseFromParent(); + return true; + } - B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull); + DiagnosticInfoUnsupported InvalidAddrSpaceCast( + MF.getFunction(), "invalid addrspacecast", B.getDebugLoc()); + LLVMContext &Ctx = MF.getFunction().getContext(); + Ctx.diagnose(InvalidAddrSpaceCast); + B.buildUndef(Dst); MI.eraseFromParent(); return true; } @@ -2811,6 +2902,298 @@ bool AMDGPULegalizerInfo::legalizeBuildVector( return true; } +// Build a big integer multiply or multiply-add using MAD_64_32 instructions. +// +// Source and accumulation registers must all be 32-bits. +// +// TODO: When the multiply is uniform, we should produce a code sequence +// that is better suited to instruction selection on the SALU. Instead of +// the outer loop going over parts of the result, the outer loop should go +// over parts of one of the factors. This should result in instruction +// selection that makes full use of S_ADDC_U32 instructions. +void AMDGPULegalizerInfo::buildMultiply( + LegalizerHelper &Helper, MutableArrayRef Accum, + ArrayRef Src0, ArrayRef Src1, + bool UsePartialMad64_32, bool SeparateOddAlignedProducts) const { + // Use (possibly empty) vectors of S1 registers to represent the set of + // carries from one pair of positions to the next. + using Carry = SmallVector; + + MachineIRBuilder &B = Helper.MIRBuilder; + + const LLT S1 = LLT::scalar(1); + const LLT S32 = LLT::scalar(32); + const LLT S64 = LLT::scalar(64); + + Register Zero32; + Register Zero64; + + auto getZero32 = [&]() -> Register { + if (!Zero32) + Zero32 = B.buildConstant(S32, 0).getReg(0); + return Zero32; + }; + auto getZero64 = [&]() -> Register { + if (!Zero64) + Zero64 = B.buildConstant(S64, 0).getReg(0); + return Zero64; + }; + + // Merge the given carries into the 32-bit LocalAccum, which is modified + // in-place. + // + // Returns the carry-out, which is a single S1 register or null. + auto mergeCarry = + [&](Register &LocalAccum, const Carry &CarryIn) -> Register { + if (CarryIn.empty()) + return Register(); + + bool HaveCarryOut = true; + Register CarryAccum; + if (CarryIn.size() == 1) { + if (!LocalAccum) { + LocalAccum = B.buildZExt(S32, CarryIn[0]).getReg(0); + return Register(); + } + + CarryAccum = getZero32(); + } else { + CarryAccum = B.buildZExt(S32, CarryIn[0]).getReg(0); + for (unsigned i = 1; i + 1 < CarryIn.size(); ++i) { + CarryAccum = + B.buildUAdde(S32, S1, CarryAccum, getZero32(), CarryIn[i]) + .getReg(0); + } + + if (!LocalAccum) { + LocalAccum = getZero32(); + HaveCarryOut = false; + } + } + + auto Add = + B.buildUAdde(S32, S1, CarryAccum, LocalAccum, CarryIn.back()); + LocalAccum = Add.getReg(0); + return HaveCarryOut ? Add.getReg(1) : Register(); + }; + + // Build a multiply-add chain to compute + // + // LocalAccum + (partial products at DstIndex) + // + (opportunistic subset of CarryIn) + // + // LocalAccum is an array of one or two 32-bit registers that are updated + // in-place. The incoming registers may be null. + // + // In some edge cases, carry-ins can be consumed "for free". In that case, + // the consumed carry bits are removed from CarryIn in-place. + auto buildMadChain = + [&](MutableArrayRef LocalAccum, unsigned DstIndex, Carry &CarryIn) + -> Carry { + assert((DstIndex + 1 < Accum.size() && LocalAccum.size() == 2) || + (DstIndex + 1 >= Accum.size() && LocalAccum.size() == 1)); + + Carry CarryOut; + unsigned j0 = 0; + + // Use plain 32-bit multiplication for the most significant part of the + // result by default. + if (LocalAccum.size() == 1 && + (!UsePartialMad64_32 || !CarryIn.empty())) { + do { + unsigned j1 = DstIndex - j0; + auto Mul = B.buildMul(S32, Src0[j0], Src1[j1]); + if (!LocalAccum[0]) { + LocalAccum[0] = Mul.getReg(0); + } else { + if (CarryIn.empty()) { + LocalAccum[0] = B.buildAdd(S32, LocalAccum[0], Mul).getReg(0); + } else { + LocalAccum[0] = + B.buildUAdde(S32, S1, LocalAccum[0], Mul, CarryIn.back()) + .getReg(0); + CarryIn.pop_back(); + } + } + ++j0; + } while (j0 <= DstIndex && (!UsePartialMad64_32 || !CarryIn.empty())); + } + + // Build full 64-bit multiplies. + if (j0 <= DstIndex) { + bool HaveSmallAccum = false; + Register Tmp; + + if (LocalAccum[0]) { + if (LocalAccum.size() == 1) { + Tmp = B.buildAnyExt(S64, LocalAccum[0]).getReg(0); + HaveSmallAccum = true; + } else if (LocalAccum[1]) { + Tmp = B.buildMerge(S64, LocalAccum).getReg(0); + HaveSmallAccum = false; + } else { + Tmp = B.buildZExt(S64, LocalAccum[0]).getReg(0); + HaveSmallAccum = true; + } + } else { + assert(LocalAccum.size() == 1 || !LocalAccum[1]); + Tmp = getZero64(); + HaveSmallAccum = true; + } + + do { + unsigned j1 = DstIndex - j0; + auto Mad = B.buildInstr(AMDGPU::G_AMDGPU_MAD_U64_U32, {S64, S1}, + {Src0[j0], Src1[j1], Tmp}); + Tmp = Mad.getReg(0); + if (!HaveSmallAccum) + CarryOut.push_back(Mad.getReg(1)); + HaveSmallAccum = false; + ++j0; + } while (j0 <= DstIndex); + + auto Unmerge = B.buildUnmerge(S32, Tmp); + LocalAccum[0] = Unmerge.getReg(0); + if (LocalAccum.size() > 1) + LocalAccum[1] = Unmerge.getReg(1); + } + + return CarryOut; + }; + + // Outer multiply loop, iterating over destination parts from least + // significant to most significant parts. + // + // The columns of the following diagram correspond to the destination parts + // affected by one iteration of the outer loop (ignoring boundary + // conditions). + // + // Dest index relative to 2 * i: 1 0 -1 + // ------ + // Carries from previous iteration: e o + // Even-aligned partial product sum: E E . + // Odd-aligned partial product sum: O O + // + // 'o' is OddCarry, 'e' is EvenCarry. + // EE and OO are computed from partial products via buildMadChain and use + // accumulation where possible and appropriate. + // + Register SeparateOddCarry; + Carry EvenCarry; + Carry OddCarry; + + for (unsigned i = 0; i <= Accum.size() / 2; ++i) { + Carry OddCarryIn = std::move(OddCarry); + Carry EvenCarryIn = std::move(EvenCarry); + OddCarry.clear(); + EvenCarry.clear(); + + // Partial products at offset 2 * i. + if (2 * i < Accum.size()) { + auto LocalAccum = Accum.drop_front(2 * i).take_front(2); + EvenCarry = buildMadChain(LocalAccum, 2 * i, EvenCarryIn); + } + + // Partial products at offset 2 * i - 1. + if (i > 0) { + if (!SeparateOddAlignedProducts) { + auto LocalAccum = Accum.drop_front(2 * i - 1).take_front(2); + OddCarry = buildMadChain(LocalAccum, 2 * i - 1, OddCarryIn); + } else { + bool IsHighest = 2 * i >= Accum.size(); + Register SeparateOddOut[2]; + auto LocalAccum = makeMutableArrayRef(SeparateOddOut) + .take_front(IsHighest ? 1 : 2); + OddCarry = buildMadChain(LocalAccum, 2 * i - 1, OddCarryIn); + + MachineInstr *Lo; + + if (i == 1) { + if (!IsHighest) + Lo = B.buildUAddo(S32, S1, Accum[2 * i - 1], SeparateOddOut[0]); + else + Lo = B.buildAdd(S32, Accum[2 * i - 1], SeparateOddOut[0]); + } else { + Lo = B.buildUAdde(S32, S1, Accum[2 * i - 1], SeparateOddOut[0], + SeparateOddCarry); + } + Accum[2 * i - 1] = Lo->getOperand(0).getReg(); + + if (!IsHighest) { + auto Hi = B.buildUAdde(S32, S1, Accum[2 * i], SeparateOddOut[1], + Lo->getOperand(1).getReg()); + Accum[2 * i] = Hi.getReg(0); + SeparateOddCarry = Hi.getReg(1); + } + } + } + + // Add in the carries from the previous iteration + if (i > 0) { + if (Register CarryOut = mergeCarry(Accum[2 * i - 1], OddCarryIn)) + EvenCarryIn.push_back(CarryOut); + + if (2 * i < Accum.size()) { + if (Register CarryOut = mergeCarry(Accum[2 * i], EvenCarryIn)) + OddCarry.push_back(CarryOut); + } + } + } +} + +// Custom narrowing of wide multiplies using wide multiply-add instructions. +// +// TODO: If the multiply is followed by an addition, we should attempt to +// integrate it to make better use of V_MAD_U64_U32's multiply-add capabilities. +bool AMDGPULegalizerInfo::legalizeMul(LegalizerHelper &Helper, + MachineInstr &MI) const { + assert(ST.hasMad64_32()); + assert(MI.getOpcode() == TargetOpcode::G_MUL); + + MachineIRBuilder &B = Helper.MIRBuilder; + MachineRegisterInfo &MRI = *B.getMRI(); + + Register DstReg = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + + LLT Ty = MRI.getType(DstReg); + assert(Ty.isScalar()); + + unsigned Size = Ty.getSizeInBits(); + unsigned NumParts = Size / 32; + assert((Size % 32) == 0); + assert(NumParts >= 2); + + // Whether to use MAD_64_32 for partial products whose high half is + // discarded. This avoids some ADD instructions but risks false dependency + // stalls on some subtargets in some cases. + const bool UsePartialMad64_32 = ST.getGeneration() < AMDGPUSubtarget::GFX10; + + // Whether to compute odd-aligned partial products separately. This is + // advisable on subtargets where the accumulator of MAD_64_32 must be placed + // in an even-aligned VGPR. + const bool SeparateOddAlignedProducts = ST.hasFullRate64Ops(); + + LLT S32 = LLT::scalar(32); + SmallVector Src0Parts, Src1Parts; + for (unsigned i = 0; i < NumParts; ++i) { + Src0Parts.push_back(MRI.createGenericVirtualRegister(S32)); + Src1Parts.push_back(MRI.createGenericVirtualRegister(S32)); + } + B.buildUnmerge(Src0Parts, Src0); + B.buildUnmerge(Src1Parts, Src1); + + SmallVector AccumRegs(NumParts); + buildMultiply(Helper, AccumRegs, Src0Parts, Src1Parts, UsePartialMad64_32, + SeparateOddAlignedProducts); + + B.buildMerge(DstReg, AccumRegs); + MI.eraseFromParent(); + return true; + +} + // Legalize ctlz/cttz to ffbh/ffbl instead of the default legalization to // ctlz/cttz_zero_undef. This allows us to fix up the result for the zero input // case with a single min instruction instead of a compare+select. @@ -2954,6 +3337,89 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin( return true; } +static bool replaceWithConstant(MachineIRBuilder &B, MachineInstr &MI, + int64_t C) { + B.buildConstant(MI.getOperand(0).getReg(), C); + MI.eraseFromParent(); + return true; +} + +bool AMDGPULegalizerInfo::legalizeWorkitemIDIntrinsic( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, + unsigned Dim, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const { + unsigned MaxID = ST.getMaxWorkitemID(B.getMF().getFunction(), Dim); + if (MaxID == 0) + return replaceWithConstant(B, MI, 0); + + const SIMachineFunctionInfo *MFI = B.getMF().getInfo(); + const ArgDescriptor *Arg; + const TargetRegisterClass *ArgRC; + LLT ArgTy; + std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType); + + Register DstReg = MI.getOperand(0).getReg(); + if (!Arg) { + // It's undefined behavior if a function marked with the amdgpu-no-* + // attributes uses the corresponding intrinsic. + B.buildUndef(DstReg); + MI.eraseFromParent(); + return true; + } + + if (Arg->isMasked()) { + // Don't bother inserting AssertZext for packed IDs since we're emitting the + // masking operations anyway. + // + // TODO: We could assert the top bit is 0 for the source copy. + if (!loadInputValue(DstReg, B, ArgType)) + return false; + } else { + Register TmpReg = MRI.createGenericVirtualRegister(LLT::scalar(32)); + if (!loadInputValue(TmpReg, B, ArgType)) + return false; + B.buildAssertZExt(DstReg, TmpReg, 32 - countLeadingZeros(MaxID)); + } + + MI.eraseFromParent(); + return true; +} + +Register AMDGPULegalizerInfo::getKernargParameterPtr(MachineIRBuilder &B, + int64_t Offset) const { + LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); + Register KernArgReg = B.getMRI()->createGenericVirtualRegister(PtrTy); + + // TODO: If we passed in the base kernel offset we could have a better + // alignment than 4, but we don't really need it. + if (!loadInputValue(KernArgReg, B, + AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR)) + llvm_unreachable("failed to find kernarg segment ptr"); + + auto COffset = B.buildConstant(LLT::scalar(64), Offset); + // TODO: Should get nuw + return B.buildPtrAdd(PtrTy, KernArgReg, COffset).getReg(0); +} + +/// Legalize a value that's loaded from kernel arguments. This is only used by +/// legacy intrinsics. +bool AMDGPULegalizerInfo::legalizeKernargMemParameter(MachineInstr &MI, + MachineIRBuilder &B, + uint64_t Offset, + Align Alignment) const { + Register DstReg = MI.getOperand(0).getReg(); + + assert(B.getMRI()->getType(DstReg) == LLT::scalar(32) && + "unexpected kernarg parameter type"); + + Register Ptr = getKernargParameterPtr(B, Offset); + MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); + B.buildLoad(DstReg, Ptr, PtrInfo, Align(4), + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant); + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { @@ -3688,9 +4154,9 @@ bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper, // The remaining operands were used to set fields in the MemOperand on // construction. for (int I = 6; I > 3; --I) - MI.RemoveOperand(I); + MI.removeOperand(I); - MI.RemoveOperand(1); // Remove the intrinsic ID. + MI.removeOperand(1); // Remove the intrinsic ID. Observer.changedInstr(MI); return true; } @@ -4359,7 +4825,7 @@ static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI, /// /// We don't want to directly select image instructions just yet, but also want /// to exposes all register repacking to the legalizer/combiners. We also don't -/// want a selected instrution entering RegBankSelect. In order to avoid +/// want a selected instruction entering RegBankSelect. In order to avoid /// defining a multitude of intermediate image instructions, directly hack on /// the intrinsic's arguments. In cases like a16 addresses, this requires /// padding now unnecessary arguments with $noreg. @@ -4508,6 +4974,10 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( // // SIShrinkInstructions will convert NSA encodings to non-NSA after register // allocation when possible. + // + // TODO: we can actually allow partial NSA where the final register is a + // contiguous set of the remaining addresses. + // This could help where there are more addresses than supported. const bool UseNSA = ST.hasNSAEncoding() && CorrectedNumVAddrs >= 3 && CorrectedNumVAddrs <= ST.getNSAMaxSize(); @@ -4607,7 +5077,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( return false; // TODO: Make sure the TFE operand bit is set. - MI.RemoveOperand(1); + MI.removeOperand(1); // Handle the easy case that requires no repack instructions. if (Ty == S32) { @@ -4737,7 +5207,7 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad( // should be fixed to have a memory operand. Since it's readnone, we're not // allowed to add one. MI.setDesc(B.getTII().get(AMDGPU::G_AMDGPU_S_BUFFER_LOAD)); - MI.RemoveOperand(1); // Remove intrinsic ID + MI.removeOperand(1); // Remove intrinsic ID // FIXME: When intrinsic definition is fixed, this should have an MMO already. // TODO: Should this use datalayout alignment? @@ -4797,6 +5267,47 @@ bool AMDGPULegalizerInfo::legalizeTrapEndpgm( bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { + MachineFunction &MF = B.getMF(); + const LLT S64 = LLT::scalar(64); + + Register SGPR01(AMDGPU::SGPR0_SGPR1); + // For code object version 5, queue_ptr is passed through implicit kernarg. + if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) { + AMDGPUTargetLowering::ImplicitParameter Param = + AMDGPUTargetLowering::QUEUE_PTR; + uint64_t Offset = + ST.getTargetLowering()->getImplicitParameterOffset(B.getMF(), Param); + + Register KernargPtrReg = MRI.createGenericVirtualRegister( + LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); + + if (!loadInputValue(KernargPtrReg, B, + AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR)) + return false; + + // TODO: can we be smarter about machine pointer info? + MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, + MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant, + LLT::scalar(64), commonAlignment(Align(64), Offset)); + + // Pointer address + Register LoadAddr = MRI.createGenericVirtualRegister( + LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); + B.buildPtrAdd(LoadAddr, KernargPtrReg, + B.buildConstant(LLT::scalar(64), Offset).getReg(0)); + // Load address + Register Temp = B.buildLoad(S64, LoadAddr, *MMO).getReg(0); + B.buildCopy(SGPR01, Temp); + B.buildInstr(AMDGPU::S_TRAP) + .addImm(static_cast(GCNSubtarget::TrapID::LLVMAMDHSATrap)) + .addReg(SGPR01, RegState::Implicit); + MI.eraseFromParent(); + return true; + } + // Pass queue pointer to trap handler as input, and insert trap instruction // Reference: https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi Register LiveIn = @@ -4804,7 +5315,6 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr( if (!loadInputValue(LiveIn, B, AMDGPUFunctionArgInfo::QUEUE_PTR)) return false; - Register SGPR01(AMDGPU::SGPR0_SGPR1); B.buildCopy(SGPR01, LiveIn); B.buildInstr(AMDGPU::S_TRAP) .addImm(static_cast(GCNSubtarget::TrapID::LLVMAMDHSATrap)) @@ -4848,6 +5358,8 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI = *B.getMRI(); const LLT S16 = LLT::scalar(16); const LLT S32 = LLT::scalar(32); + const LLT V2S16 = LLT::fixed_vector(2, 16); + const LLT V3S32 = LLT::fixed_vector(3, 32); Register DstReg = MI.getOperand(0).getReg(); Register NodePtr = MI.getOperand(2).getReg(); @@ -4865,61 +5377,98 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI, return false; } + const bool IsGFX11Plus = AMDGPU::isGFX11Plus(ST); const bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16; const bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64; const unsigned NumVDataDwords = 4; const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11); - const bool UseNSA = - ST.hasNSAEncoding() && NumVAddrDwords <= ST.getNSAMaxSize(); + const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords; + const bool UseNSA = ST.hasNSAEncoding() && NumVAddrs <= ST.getNSAMaxSize(); const unsigned BaseOpcodes[2][2] = { {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16}, {AMDGPU::IMAGE_BVH64_INTERSECT_RAY, AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}}; int Opcode; if (UseNSA) { - Opcode = - AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16], AMDGPU::MIMGEncGfx10NSA, - NumVDataDwords, NumVAddrDwords); - } else { Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16], - AMDGPU::MIMGEncGfx10Default, NumVDataDwords, - PowerOf2Ceil(NumVAddrDwords)); + IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA + : AMDGPU::MIMGEncGfx10NSA, + NumVDataDwords, NumVAddrDwords); + } else { + Opcode = AMDGPU::getMIMGOpcode( + BaseOpcodes[Is64][IsA16], + IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default : AMDGPU::MIMGEncGfx10Default, + NumVDataDwords, PowerOf2Ceil(NumVAddrDwords)); } assert(Opcode != -1); SmallVector Ops; - if (Is64) { - auto Unmerge = B.buildUnmerge({S32, S32}, NodePtr); - Ops.push_back(Unmerge.getReg(0)); - Ops.push_back(Unmerge.getReg(1)); - } else { + if (UseNSA && IsGFX11Plus) { + auto packLanes = [&Ops, &S32, &V3S32, &B](Register Src) { + auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src); + auto Merged = B.buildMerge( + V3S32, {Unmerge.getReg(0), Unmerge.getReg(1), Unmerge.getReg(2)}); + Ops.push_back(Merged.getReg(0)); + }; + Ops.push_back(NodePtr); - } - Ops.push_back(RayExtent); + Ops.push_back(RayExtent); + packLanes(RayOrigin); + + if (IsA16) { + auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16}, RayDir); + auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16}, RayInvDir); + auto MergedDir = B.buildMerge( + V3S32, + {B.buildBitcast(S32, B.buildMerge(V2S16, {UnmergeRayInvDir.getReg(0), + UnmergeRayDir.getReg(0)})) + .getReg(0), + B.buildBitcast(S32, B.buildMerge(V2S16, {UnmergeRayInvDir.getReg(1), + UnmergeRayDir.getReg(1)})) + .getReg(0), + B.buildBitcast(S32, B.buildMerge(V2S16, {UnmergeRayInvDir.getReg(2), + UnmergeRayDir.getReg(2)})) + .getReg(0)}); + Ops.push_back(MergedDir.getReg(0)); + } else { + packLanes(RayDir); + packLanes(RayInvDir); + } + } else { + if (Is64) { + auto Unmerge = B.buildUnmerge({S32, S32}, NodePtr); + Ops.push_back(Unmerge.getReg(0)); + Ops.push_back(Unmerge.getReg(1)); + } else { + Ops.push_back(NodePtr); + } + Ops.push_back(RayExtent); - auto packLanes = [&Ops, &S32, &B](Register Src) { - auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src); - Ops.push_back(Unmerge.getReg(0)); - Ops.push_back(Unmerge.getReg(1)); - Ops.push_back(Unmerge.getReg(2)); - }; + auto packLanes = [&Ops, &S32, &B](Register Src) { + auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src); + Ops.push_back(Unmerge.getReg(0)); + Ops.push_back(Unmerge.getReg(1)); + Ops.push_back(Unmerge.getReg(2)); + }; - packLanes(RayOrigin); - if (IsA16) { - auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16}, RayDir); - auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16}, RayInvDir); - Register R1 = MRI.createGenericVirtualRegister(S32); - Register R2 = MRI.createGenericVirtualRegister(S32); - Register R3 = MRI.createGenericVirtualRegister(S32); - B.buildMerge(R1, {UnmergeRayDir.getReg(0), UnmergeRayDir.getReg(1)}); - B.buildMerge(R2, {UnmergeRayDir.getReg(2), UnmergeRayInvDir.getReg(0)}); - B.buildMerge(R3, {UnmergeRayInvDir.getReg(1), UnmergeRayInvDir.getReg(2)}); - Ops.push_back(R1); - Ops.push_back(R2); - Ops.push_back(R3); - } else { - packLanes(RayDir); - packLanes(RayInvDir); + packLanes(RayOrigin); + if (IsA16) { + auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16}, RayDir); + auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16}, RayInvDir); + Register R1 = MRI.createGenericVirtualRegister(S32); + Register R2 = MRI.createGenericVirtualRegister(S32); + Register R3 = MRI.createGenericVirtualRegister(S32); + B.buildMerge(R1, {UnmergeRayDir.getReg(0), UnmergeRayDir.getReg(1)}); + B.buildMerge(R2, {UnmergeRayDir.getReg(2), UnmergeRayInvDir.getReg(0)}); + B.buildMerge(R3, + {UnmergeRayInvDir.getReg(1), UnmergeRayInvDir.getReg(2)}); + Ops.push_back(R1); + Ops.push_back(R2); + Ops.push_back(R3); + } else { + packLanes(RayDir); + packLanes(RayInvDir); + } } if (!UseNSA) { @@ -4946,9 +5495,24 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI, return true; } -static bool replaceWithConstant(MachineIRBuilder &B, MachineInstr &MI, int64_t C) { - B.buildConstant(MI.getOperand(0).getReg(), C); +bool AMDGPULegalizerInfo::legalizeFPTruncRound(MachineInstr &MI, + MachineIRBuilder &B) const { + unsigned Opc; + int RoundMode = MI.getOperand(2).getImm(); + + if (RoundMode == (int)RoundingMode::TowardPositive) + Opc = AMDGPU::G_FPTRUNC_ROUND_UPWARD; + else if (RoundMode == (int)RoundingMode::TowardNegative) + Opc = AMDGPU::G_FPTRUNC_ROUND_DOWNWARD; + else + return false; + + B.buildInstr(Opc) + .addDef(MI.getOperand(0).getReg()) + .addUse(MI.getOperand(1).getReg()); + MI.eraseFromParent(); + return true; } @@ -5055,22 +5619,14 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_implicitarg_ptr: return legalizeImplicitArgPtr(MI, MRI, B); case Intrinsic::amdgcn_workitem_id_x: - if (ST.getMaxWorkitemID(B.getMF().getFunction(), 0) == 0) - return replaceWithConstant(B, MI, 0); - return legalizePreloadedArgIntrin(MI, MRI, B, - AMDGPUFunctionArgInfo::WORKITEM_ID_X); + return legalizeWorkitemIDIntrinsic(MI, MRI, B, 0, + AMDGPUFunctionArgInfo::WORKITEM_ID_X); case Intrinsic::amdgcn_workitem_id_y: - if (ST.getMaxWorkitemID(B.getMF().getFunction(), 1) == 0) - return replaceWithConstant(B, MI, 0); - - return legalizePreloadedArgIntrin(MI, MRI, B, - AMDGPUFunctionArgInfo::WORKITEM_ID_Y); + return legalizeWorkitemIDIntrinsic(MI, MRI, B, 1, + AMDGPUFunctionArgInfo::WORKITEM_ID_Y); case Intrinsic::amdgcn_workitem_id_z: - if (ST.getMaxWorkitemID(B.getMF().getFunction(), 2) == 0) - return replaceWithConstant(B, MI, 0); - - return legalizePreloadedArgIntrin(MI, MRI, B, - AMDGPUFunctionArgInfo::WORKITEM_ID_Z); + return legalizeWorkitemIDIntrinsic(MI, MRI, B, 2, + AMDGPUFunctionArgInfo::WORKITEM_ID_Z); case Intrinsic::amdgcn_workgroup_id_x: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_X); @@ -5092,6 +5648,31 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_dispatch_id: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::DISPATCH_ID); + case Intrinsic::r600_read_ngroups_x: + // TODO: Emit error for hsa + return legalizeKernargMemParameter(MI, B, + SI::KernelInputOffsets::NGROUPS_X); + case Intrinsic::r600_read_ngroups_y: + return legalizeKernargMemParameter(MI, B, + SI::KernelInputOffsets::NGROUPS_Y); + case Intrinsic::r600_read_ngroups_z: + return legalizeKernargMemParameter(MI, B, + SI::KernelInputOffsets::NGROUPS_Z); + case Intrinsic::r600_read_local_size_x: + // TODO: Could insert G_ASSERT_ZEXT from s16 + return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::LOCAL_SIZE_X); + case Intrinsic::r600_read_local_size_y: + // TODO: Could insert G_ASSERT_ZEXT from s16 + return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::LOCAL_SIZE_Y); + // TODO: Could insert G_ASSERT_ZEXT from s16 + case Intrinsic::r600_read_local_size_z: + return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::LOCAL_SIZE_Z); + case Intrinsic::r600_read_global_size_x: + return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::GLOBAL_SIZE_X); + case Intrinsic::r600_read_global_size_y: + return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::GLOBAL_SIZE_Y); + case Intrinsic::r600_read_global_size_z: + return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::GLOBAL_SIZE_Z); case Intrinsic::amdgcn_fdiv_fast: return legalizeFDIVFastIntrin(MI, MRI, B); case Intrinsic::amdgcn_is_shared: @@ -5157,7 +5738,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_raw_buffer_atomic_fadd: case Intrinsic::amdgcn_struct_buffer_atomic_fadd: { Register DstReg = MI.getOperand(0).getReg(); - if (!MRI.use_empty(DstReg) && !ST.hasGFX90AInsts()) { + if (!MRI.use_empty(DstReg) && + !AMDGPU::hasAtomicFaddRtnForTy(ST, MRI.getType(DstReg))) { Function &F = B.getMF().getFunction(); DiagnosticInfoUnsupported NoFpRet( F, "return versions of fp atomics not supported", B.getDebugLoc(), diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 964a41d3d740..cee533aa34ec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -88,6 +88,12 @@ public: bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + + void buildMultiply(LegalizerHelper &Helper, MutableArrayRef Accum, + ArrayRef Src0, ArrayRef Src1, + bool UsePartialMad64_32, + bool SeparateOddAlignedProducts) const; + bool legalizeMul(LegalizerHelper &Helper, MachineInstr &MI) const; bool legalizeCTLZ_CTTZ(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; @@ -96,9 +102,18 @@ public: const TargetRegisterClass *ArgRC, LLT ArgTy) const; bool loadInputValue(Register DstReg, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; + bool legalizePreloadedArgIntrin( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; + bool legalizeWorkitemIDIntrinsic( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, + unsigned Dim, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; + + Register getKernargParameterPtr(MachineIRBuilder &B, int64_t Offset) const; + bool legalizeKernargMemParameter(MachineInstr &MI, MachineIRBuilder &B, + uint64_t Offset, + Align Alignment = Align(4)) const; bool legalizeUnsignedDIV_REM(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; @@ -169,6 +184,8 @@ public: bool legalizeBVHIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeImageIntrinsic( MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index bbbadfdfd444..78e092b2e872 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -1593,8 +1593,9 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) { // max vector size is 16, and sincos will generate two results. double DVal0[16], DVal1[16]; + int FuncVecSize = getVecSize(FInfo); bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS); - if (getVecSize(FInfo) == 1) { + if (FuncVecSize == 1) { if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1, copr2)) { return false; @@ -1603,7 +1604,7 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) { ConstantDataVector *CDV0 = dyn_cast_or_null(copr0); ConstantDataVector *CDV1 = dyn_cast_or_null(copr1); ConstantDataVector *CDV2 = dyn_cast_or_null(copr2); - for (int i=0; i < getVecSize(FInfo); ++i) { + for (int i = 0; i < FuncVecSize; ++i) { Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr; Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr; Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr; @@ -1616,19 +1617,19 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) { LLVMContext &context = CI->getParent()->getParent()->getContext(); Constant *nval0, *nval1; - if (getVecSize(FInfo) == 1) { + if (FuncVecSize == 1) { nval0 = ConstantFP::get(CI->getType(), DVal0[0]); if (hasTwoResults) nval1 = ConstantFP::get(CI->getType(), DVal1[0]); } else { if (getArgType(FInfo) == AMDGPULibFunc::F32) { SmallVector FVal0, FVal1; - for (int i=0; i < getVecSize(FInfo); ++i) + for (int i = 0; i < FuncVecSize; ++i) FVal0.push_back((float)DVal0[i]); ArrayRef tmp0(FVal0); nval0 = ConstantDataVector::get(context, tmp0); if (hasTwoResults) { - for (int i=0; i < getVecSize(FInfo); ++i) + for (int i = 0; i < FuncVecSize; ++i) FVal1.push_back((float)DVal1[i]); ArrayRef tmp1(FVal1); nval1 = ConstantDataVector::get(context, tmp1); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h index dc0ac72016f3..bf0fda25b2c0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h @@ -324,8 +324,8 @@ public: class AMDGPULibFuncImpl : public AMDGPULibFuncBase { public: - AMDGPULibFuncImpl() {} - virtual ~AMDGPULibFuncImpl() {} + AMDGPULibFuncImpl() = default; + virtual ~AMDGPULibFuncImpl() = default; /// Get unmangled name for mangled library function and name for unmangled /// library function. diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp index b700dd5aa301..93d1eed2cf63 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp @@ -13,7 +13,6 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" @@ -156,11 +155,8 @@ bool AMDGPULowerIntrinsics::runOnModule(Module &M) { Changed = true; break; - case Intrinsic::amdgcn_workitem_id_x: case Intrinsic::r600_read_tidig_x: - case Intrinsic::amdgcn_workitem_id_y: case Intrinsic::r600_read_tidig_y: - case Intrinsic::amdgcn_workitem_id_z: case Intrinsic::r600_read_tidig_z: case Intrinsic::r600_read_local_size_x: case Intrinsic::r600_read_local_size_y: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp index c34c12ab9fec..2e5c35f1f571 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -73,7 +73,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F); Align MaxAlign; - // FIXME: Alignment is broken broken with explicit arg offset.; + // FIXME: Alignment is broken with explicit arg offset.; const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign); if (TotalKernArgSize == 0) return false; @@ -92,9 +92,8 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { for (Argument &Arg : F.args()) { const bool IsByRef = Arg.hasByRefAttr(); Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType(); - MaybeAlign ABITypeAlign = IsByRef ? Arg.getParamAlign() : None; - if (!ABITypeAlign) - ABITypeAlign = DL.getABITypeAlign(ArgTy); + MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : None; + Align ABITypeAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy); uint64_t Size = DL.getTypeSizeInBits(ArgTy); uint64_t AllocSize = DL.getTypeAllocSize(ArgTy); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp index 08a1b970648d..f5903b3afb81 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp @@ -163,39 +163,29 @@ static bool processUse(CallInst *CI) { if (!GroupSize || !GridSize) continue; + using namespace llvm::PatternMatch; + auto GroupIDIntrin = + I == 0 ? m_Intrinsic() + : (I == 1 ? m_Intrinsic() + : m_Intrinsic()); + for (User *U : GroupSize->users()) { auto *ZextGroupSize = dyn_cast(U); if (!ZextGroupSize) continue; - for (User *ZextUser : ZextGroupSize->users()) { - auto *SI = dyn_cast(ZextUser); - if (!SI) - continue; - - using namespace llvm::PatternMatch; - auto GroupIDIntrin = I == 0 ? - m_Intrinsic() : - (I == 1 ? m_Intrinsic() : - m_Intrinsic()); - - auto SubExpr = m_Sub(m_Specific(GridSize), - m_Mul(GroupIDIntrin, m_Specific(ZextGroupSize))); - - ICmpInst::Predicate Pred; - if (match(SI, - m_Select(m_ICmp(Pred, SubExpr, m_Specific(ZextGroupSize)), - SubExpr, - m_Specific(ZextGroupSize))) && - Pred == ICmpInst::ICMP_ULT) { + for (User *UMin : ZextGroupSize->users()) { + if (match(UMin, + m_UMin(m_Sub(m_Specific(GridSize), + m_Mul(GroupIDIntrin, m_Specific(ZextGroupSize))), + m_Specific(ZextGroupSize)))) { if (HasReqdWorkGroupSize) { ConstantInt *KnownSize = mdconst::extract(MD->getOperand(I)); - SI->replaceAllUsesWith(ConstantExpr::getIntegerCast(KnownSize, - SI->getType(), - false)); + UMin->replaceAllUsesWith(ConstantExpr::getIntegerCast( + KnownSize, UMin->getType(), false)); } else { - SI->replaceAllUsesWith(ZextGroupSize); + UMin->replaceAllUsesWith(ZextGroupSize); } MadeChange = true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 6e2b5dc471bc..35922341de26 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -14,7 +14,7 @@ // known address. AMDGPUMachineFunction allocates the LDS global. // // Local variables with constant annotation or non-undef initializer are passed -// through unchanged for simplication or error diagnostics in later passes. +// through unchanged for simplification or error diagnostics in later passes. // // To reduce the memory overhead variables that are only used by kernels are // excluded from this transform. The analysis to determine whether a variable @@ -28,8 +28,9 @@ #include "AMDGPU.h" #include "Utils/AMDGPUBaseInfo.h" -#include "Utils/AMDGPULDSUtils.h" +#include "Utils/AMDGPUMemoryUtils.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" @@ -163,9 +164,10 @@ public: } bool runOnModule(Module &M) override { + CallGraph CG = CallGraph(M); UsedList = getUsedList(M); bool Changed = superAlignLDSGlobals(M); - Changed |= processUsedLDS(M); + Changed |= processUsedLDS(CG, M); for (Function &F : M.functions()) { if (F.isDeclaration()) @@ -174,7 +176,7 @@ public: // Only lower compute kernels' LDS. if (!AMDGPU::isKernel(F.getCallingConv())) continue; - Changed |= processUsedLDS(M, &F); + Changed |= processUsedLDS(CG, M, &F); } UsedList.clear(); @@ -226,7 +228,7 @@ private: return Changed; } - bool processUsedLDS(Module &M, Function *F = nullptr) { + bool processUsedLDS(CallGraph const &CG, Module &M, Function *F = nullptr) { LLVMContext &Ctx = M.getContext(); const DataLayout &DL = M.getDataLayout(); @@ -374,7 +376,20 @@ private: IRBuilder<> Builder(Ctx); for (Function &Func : M.functions()) { if (!Func.isDeclaration() && AMDGPU::isKernelCC(&Func)) { - markUsedByKernel(Builder, &Func, SGV); + const CallGraphNode *N = CG[&Func]; + const bool CalleesRequireModuleLDS = N->size() > 0; + + if (CalleesRequireModuleLDS) { + // If a function this kernel might call requires module LDS, + // annotate the kernel to let later passes know it will allocate + // this structure, even if not apparent from the IR. + markUsedByKernel(Builder, &Func, SGV); + } else { + // However if we are certain this kernel cannot call a function that + // requires module LDS, annotate the kernel so the backend can elide + // the allocation without repeating callgraph walks. + Func.addFnAttr("amdgpu-elide-module-lds"); + } } } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 3fad7e192195..ed6ddbf426fd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -120,8 +120,7 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { // FIXME: Should be able to handle this with emitPseudoExpansionLowering. We // need to select it to the subtarget specific version, and there's no way to // do that with a single pseudo source operation. - if (Opcode == AMDGPU::S_SETPC_B64_return || - Opcode == AMDGPU::S_SETPC_B64_return_gfx) + if (Opcode == AMDGPU::S_SETPC_B64_return) Opcode = AMDGPU::S_SETPC_B64; else if (Opcode == AMDGPU::SI_CALL) { // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the @@ -208,6 +207,16 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) { return; } + if (MI->getOpcode() == AMDGPU::SCHED_BARRIER) { + if (isVerbose()) { + std::string HexString; + raw_string_ostream HexStream(HexString); + HexStream << format_hex(MI->getOperand(0).getImm(), 10, true); + OutStreamer->emitRawComment(" sched_barrier mask(" + HexString + ")"); + } + return; + } + if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) { if (isVerbose()) OutStreamer->emitRawComment(" divergent unreachable"); @@ -240,7 +249,7 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) { raw_svector_ostream CodeStream(CodeBytes); std::unique_ptr InstEmitter(createSIMCCodeEmitter( - *STI.getInstrInfo(), *OutContext.getRegisterInfo(), OutContext)); + *STI.getInstrInfo(), OutContext)); InstEmitter->encodeInstruction(TmpInst, CodeStream, Fixups, STI); assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h index 0e43b4fe9461..5c656f158e71 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h @@ -1,4 +1,4 @@ -//===- AMDGPUMCInstLower.h - Lower AMDGPU MachineInstr to an MCInst -------===// +//===- AMDGPUMCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*--===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp index c3441f81a78e..0712466a0e88 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp @@ -21,17 +21,18 @@ bool AMDGPUMIRFormatter::parseCustomPseudoSourceValue( StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS, const PseudoSourceValue *&PSV, ErrorCallbackType ErrorCallback) const { SIMachineFunctionInfo *MFI = MF.getInfo(); - const SIInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const AMDGPUTargetMachine &TM = + static_cast(MF.getTarget()); if (Src == "BufferResource") { - PSV = MFI->getBufferPSV(TII); + PSV = MFI->getBufferPSV(TM); return false; } if (Src == "ImageResource") { - PSV = MFI->getImagePSV(TII); + PSV = MFI->getImagePSV(TM); return false; } if (Src == "GWSResource") { - PSV = MFI->getGWSPSV(TII); + PSV = MFI->getGWSPSV(TM); return false; } llvm_unreachable("unknown MIR custom pseudo source value"); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h index 47faa6c72481..753f7edc9385 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h @@ -25,7 +25,7 @@ struct PerFunctionMIParsingState; class AMDGPUMIRFormatter final : public MIRFormatter { public: - AMDGPUMIRFormatter() {} + AMDGPUMIRFormatter() = default; virtual ~AMDGPUMIRFormatter() = default; /// Implement target specific parsing of target custom pseudo source value. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp index 4e2f98d2a5db..d837f8cb2f60 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -1295,7 +1295,7 @@ static void fixRegionTerminator(RegionMRT *Region) { } } -// If a region region is just a sequence of regions (and the exit +// If a region is just a sequence of regions (and the exit // block in the case of the top level region), we can simply skip // linearizing it, because it is already linear bool regionIsSequence(RegionMRT *Region) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index 593388a4d819..b461c3c4bfdc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMachineFunction.h" +#include "AMDGPU.h" #include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -32,6 +33,15 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); WaveLimiter = WaveLimitAttr.getValueAsBool(); + // FIXME: How is this attribute supposed to interact with statically known + // global sizes? + StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); + if (!S.empty()) + S.consumeInteger(0, GDSSize); + + // Assume the attribute allocates before any known GDS globals. + StaticGDSSize = GDSSize; + CallingConv::ID CC = F.getCallingConv(); if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); @@ -46,25 +56,43 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, Align Alignment = DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); - /// TODO: We should sort these to minimize wasted space due to alignment - /// padding. Currently the padding is decided by the first encountered use - /// during lowering. - unsigned Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); + unsigned Offset; + if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + /// TODO: We should sort these to minimize wasted space due to alignment + /// padding. Currently the padding is decided by the first encountered use + /// during lowering. + Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); - Entry.first->second = Offset; - StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); + StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); - // Update the LDS size considering the padding to align the dynamic shared - // memory. - LDSSize = alignTo(StaticLDSSize, DynLDSAlign); + // Update the LDS size considering the padding to align the dynamic shared + // memory. + LDSSize = alignTo(StaticLDSSize, DynLDSAlign); + } else { + assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && + "expected region address space"); + Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); + StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); + + // FIXME: Apply alignment of dynamic GDS + GDSSize = StaticGDSSize; + } + + Entry.first->second = Offset; return Offset; } -void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Module *M) { +// This kernel calls no functions that require the module lds struct +static bool canElideModuleLDS(const Function &F) { + return F.hasFnAttribute("amdgpu-elide-module-lds"); +} + +void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) { + const Module *M = F.getParent(); if (isModuleEntryFunction()) { const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds"); - if (GV) { + if (GV && !canElideModuleLDS(F)) { unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV); (void)Offset; assert(Offset == 0 && diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 48cf46b5f871..df62c2314617 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -12,6 +12,10 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Function.h" namespace llvm { @@ -25,11 +29,13 @@ protected: Align MaxKernArgAlign; // Cache for this. /// Number of bytes in the LDS that are being used. - unsigned LDSSize = 0; + uint32_t LDSSize = 0; + uint32_t GDSSize = 0; /// Number of bytes in the LDS allocated statically. This field is only used /// in the instruction selector and not part of the machine function info. - unsigned StaticLDSSize = 0; + uint32_t StaticLDSSize = 0; + uint32_t StaticGDSSize = 0; /// Align for dynamic shared memory if any. Dynamic shared memory is /// allocated directly after the static one, i.e., LDSSize. Need to pad @@ -63,12 +69,16 @@ public: return ExplicitKernArgSize; } - unsigned getMaxKernArgAlign() const { return MaxKernArgAlign.value(); } + Align getMaxKernArgAlign() const { return MaxKernArgAlign; } - unsigned getLDSSize() const { + uint32_t getLDSSize() const { return LDSSize; } + uint32_t getGDSSize() const { + return GDSSize; + } + AMDGPU::SIModeRegisterDefaults getMode() const { return Mode; } @@ -92,7 +102,7 @@ public: } unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV); - void allocateModuleLDSGlobal(const Module *M); + void allocateModuleLDSGlobal(const Function &F); Align getDynLDSAlign() const { return DynLDSAlign; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp index 6646cce8186b..2d48be9ea542 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMachineModuleInfo.h" +#include "llvm/MC/MCSymbol.h" namespace llvm { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp index 5a5a5d213a1a..fb7709d66c76 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp @@ -34,6 +34,7 @@ #include "AMDGPU.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallString.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" @@ -71,7 +72,7 @@ ModulePass* llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass() { return new AMDGPUOpenCLEnqueuedBlockLowering(); } -/// Collect direct or indrect callers of \p F and save them +/// Collect direct or indirect callers of \p F and save them /// to \p Callers. static void collectCallers(Function *F, DenseSet &Callers) { for (auto U : F->users()) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp index 8ad344816ad2..09dbd2150db6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -116,7 +116,6 @@ private: bool isGlobalAddr(const Value *V) const; bool isLocalAddr(const Value *V) const; - bool isConstantAddr(const Value *V) const; }; static std::pair getMemoryInstrPtrAndType( @@ -153,7 +152,7 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const { if (auto LD = dyn_cast(V)) { auto M = LD->getPointerOperand(); - if (isGlobalAddr(M) || isLocalAddr(M) || isConstantAddr(M)) { + if (isGlobalAddr(M)) { LLVM_DEBUG(dbgs() << " is IA\n"); return true; } @@ -267,19 +266,23 @@ bool AMDGPUPerfHint::runOnFunction(Function &F) { << " LSMInst cost: " << Info->LSMInstCost << '\n' << " TotalInst cost: " << Info->InstCost << '\n'); + bool Changed = false; + if (isMemBound(*Info)) { LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n"); NumMemBound++; F.addFnAttr("amdgpu-memory-bound", "true"); + Changed = true; } if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) { LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n"); NumLimitWave++; F.addFnAttr("amdgpu-wave-limiter", "true"); + Changed = true; } - return true; + return Changed; } bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) { @@ -332,15 +335,6 @@ AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const { return MAI; } -bool AMDGPUPerfHint::isConstantAddr(const Value *V) const { - if (auto PT = dyn_cast(V->getType())) { - unsigned As = PT->getAddressSpace(); - return As == AMDGPUAS::CONSTANT_ADDRESS || - As == AMDGPUAS::CONSTANT_ADDRESS_32BIT; - } - return false; -} - bool AMDGPUPerfHint::MemAccessInfo::isLargeStride( MemAccessInfo &Reference) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp index c029046ab65f..bfe2e9b66ed4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -16,6 +16,7 @@ #include "AMDGPULegalizerInfo.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" @@ -125,7 +126,6 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16( LLT::scalar(64)); const LLT S32 = LLT::scalar(32); - B.setMBB(*MI.getParent()); B.setInstrAndDebugLoc(MI); auto Unmerge = B.buildUnmerge(S32, Src); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp index f91f31508ad2..1db7c18e4598 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -19,6 +19,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Dominators.h" @@ -66,7 +67,7 @@ private: Value *simplify(Instruction *I, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return SimplifyInstruction(I, {*TD, TLI, DT}); + return simplifyInstruction(I, {*TD, TLI, DT}); } const DataLayout *TD; @@ -562,15 +563,6 @@ bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) { if (Printfs.empty()) return false; - if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) { - for (auto &U : HostcallFunction->uses()) { - if (auto *CI = dyn_cast(U.getUser())) { - M.getContext().emitError( - CI, "Cannot use both printf and hostcall in the same module"); - } - } - } - TD = &M.getDataLayout(); return lowerPrintfForGpu(M); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 99b7ffb33884..5a4426ba8113 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -334,86 +334,49 @@ static FixedVectorType *arrayTypeToVecType(ArrayType *ArrayTy) { ArrayTy->getNumElements()); } -static Value *stripBitcasts(Value *V) { - while (Instruction *I = dyn_cast(V)) { - if (I->getOpcode() != Instruction::BitCast) - break; - V = I->getOperand(0); - } - return V; -} - static Value * calculateVectorIndex(Value *Ptr, const std::map &GEPIdx) { - GetElementPtrInst *GEP = dyn_cast(stripBitcasts(Ptr)); + auto *GEP = dyn_cast(Ptr->stripPointerCasts()); if (!GEP) - return nullptr; + return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext())); auto I = GEPIdx.find(GEP); - return I == GEPIdx.end() ? nullptr : I->second; + assert(I != GEPIdx.end() && "Must have entry for GEP!"); + return I->second; } -static Value* GEPToVectorIndex(GetElementPtrInst *GEP) { - // FIXME we only support simple cases - if (GEP->getNumOperands() != 3) +static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca, + Type *VecElemTy, const DataLayout &DL) { + // TODO: Extracting a "multiple of X" from a GEP might be a useful generic + // helper. + unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType()); + MapVector VarOffsets; + APInt ConstOffset(BW, 0); + if (GEP->getPointerOperand()->stripPointerCasts() != Alloca || + !GEP->collectOffset(DL, BW, VarOffsets, ConstOffset)) return nullptr; - ConstantInt *I0 = dyn_cast(GEP->getOperand(1)); - if (!I0 || !I0->isZero()) + unsigned VecElemSize = DL.getTypeAllocSize(VecElemTy); + if (VarOffsets.size() > 1) return nullptr; - return GEP->getOperand(2); -} - -// Not an instruction handled below to turn into a vector. -// -// TODO: Check isTriviallyVectorizable for calls and handle other -// instructions. -static bool canVectorizeInst(Instruction *Inst, User *User, - const DataLayout &DL) { - switch (Inst->getOpcode()) { - case Instruction::Load: { - // Currently only handle the case where the Pointer Operand is a GEP. - // Also we could not vectorize volatile or atomic loads. - LoadInst *LI = cast(Inst); - if (isa(User) && - LI->getPointerOperandType() == User->getType() && - isa(LI->getType())) - return true; - - Instruction *PtrInst = dyn_cast(LI->getPointerOperand()); - if (!PtrInst) - return false; - - return (PtrInst->getOpcode() == Instruction::GetElementPtr || - PtrInst->getOpcode() == Instruction::BitCast) && - LI->isSimple(); + if (VarOffsets.size() == 1) { + // Only handle cases where we don't need to insert extra arithmetic + // instructions. + const auto &VarOffset = VarOffsets.front(); + if (!ConstOffset.isZero() || VarOffset.second != VecElemSize) + return nullptr; + return VarOffset.first; } - case Instruction::BitCast: - return true; - case Instruction::Store: { - // Must be the stored pointer operand, not a stored value, plus - // since it should be canonical form, the User should be a GEP. - // Also we could not vectorize volatile or atomic stores. - StoreInst *SI = cast(Inst); - if (isa(User) && - SI->getPointerOperandType() == User->getType() && - isa(SI->getValueOperand()->getType())) - return true; - - Instruction *UserInst = dyn_cast(User); - if (!UserInst) - return false; - return (SI->getPointerOperand() == User) && - (UserInst->getOpcode() == Instruction::GetElementPtr || - UserInst->getOpcode() == Instruction::BitCast) && - SI->isSimple(); - } - default: - return false; - } + APInt Quot; + uint64_t Rem; + APInt::udivrem(ConstOffset, VecElemSize, Quot, Rem); + if (Rem != 0) + return nullptr; + + return ConstantInt::get(GEP->getContext(), Quot); } static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL, @@ -455,73 +418,87 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL, } std::map GEPVectorIdx; - std::vector WorkList; - SmallVector Users(Alloca->users()); - SmallVector UseUsers(Users.size(), Alloca); + SmallVector WorkList; + SmallVector Uses; + for (Use &U : Alloca->uses()) + Uses.push_back(&U); + Type *VecEltTy = VectorTy->getElementType(); - while (!Users.empty()) { - User *AllocaUser = Users.pop_back_val(); - User *UseUser = UseUsers.pop_back_val(); - Instruction *Inst = dyn_cast(AllocaUser); - - GetElementPtrInst *GEP = dyn_cast(AllocaUser); - if (!GEP) { - if (!canVectorizeInst(Inst, UseUser, DL)) + while (!Uses.empty()) { + Use *U = Uses.pop_back_val(); + Instruction *Inst = dyn_cast(U->getUser()); + + if (Value *Ptr = getLoadStorePointerOperand(Inst)) { + // This is a store of the pointer, not to the pointer. + if (isa(Inst) && + U->getOperandNo() != StoreInst::getPointerOperandIndex()) return false; - if (Inst->getOpcode() == Instruction::BitCast) { - Type *FromTy = Inst->getOperand(0)->getType()->getPointerElementType(); - Type *ToTy = Inst->getType()->getPointerElementType(); - if (FromTy->isAggregateType() || ToTy->isAggregateType() || - DL.getTypeSizeInBits(FromTy) != DL.getTypeSizeInBits(ToTy)) - continue; - - for (User *CastUser : Inst->users()) { - if (isAssumeLikeIntrinsic(cast(CastUser))) - continue; - Users.push_back(CastUser); - UseUsers.push_back(Inst); - } + Type *AccessTy = getLoadStoreType(Inst); + Ptr = Ptr->stripPointerCasts(); + // Alloca already accessed as vector, leave alone. + if (Ptr == Alloca && DL.getTypeStoreSize(Alloca->getAllocatedType()) == + DL.getTypeStoreSize(AccessTy)) continue; - } - WorkList.push_back(AllocaUser); + // Check that this is a simple access of a vector element. + bool IsSimple = isa(Inst) ? cast(Inst)->isSimple() + : cast(Inst)->isSimple(); + if (!IsSimple || + !CastInst::isBitOrNoopPointerCastable(VecEltTy, AccessTy, DL)) + return false; + + WorkList.push_back(Inst); continue; } - Value *Index = GEPToVectorIndex(GEP); + if (isa(Inst)) { + // Look through bitcasts. + for (Use &U : Inst->uses()) + Uses.push_back(&U); + continue; + } - // If we can't compute a vector index from this GEP, then we can't - // promote this alloca to vector. - if (!Index) { - LLVM_DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP - << '\n'); - return false; + if (auto *GEP = dyn_cast(Inst)) { + // If we can't compute a vector index from this GEP, then we can't + // promote this alloca to vector. + Value *Index = GEPToVectorIndex(GEP, Alloca, VecEltTy, DL); + if (!Index) { + LLVM_DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP + << '\n'); + return false; + } + + GEPVectorIdx[GEP] = Index; + for (Use &U : Inst->uses()) + Uses.push_back(&U); + continue; } - GEPVectorIdx[GEP] = Index; - Users.append(GEP->user_begin(), GEP->user_end()); - UseUsers.append(GEP->getNumUses(), GEP); + // Ignore assume-like intrinsics and comparisons used in assumes. + if (isAssumeLikeIntrinsic(Inst)) + continue; + + if (isa(Inst) && all_of(Inst->users(), [](User *U) { + return isAssumeLikeIntrinsic(cast(U)); + })) + continue; + + // Unknown user. + return false; } LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> " << *VectorTy << '\n'); - for (Value *V : WorkList) { - Instruction *Inst = cast(V); + for (Instruction *Inst : WorkList) { IRBuilder<> Builder(Inst); switch (Inst->getOpcode()) { case Instruction::Load: { - if (Inst->getType() == AllocaTy || Inst->getType()->isVectorTy()) - break; - Value *Ptr = cast(Inst)->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); - if (!Index) - break; - - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); + Type *VecPtrTy = VectorTy->getPointerTo(Alloca->getAddressSpace()); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(VectorTy, BitCast); Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index); @@ -533,16 +510,9 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL, } case Instruction::Store: { StoreInst *SI = cast(Inst); - if (SI->getValueOperand()->getType() == AllocaTy || - SI->getValueOperand()->getType()->isVectorTy()) - break; - Value *Ptr = SI->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); - if (!Index) - break; - - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); + Type *VecPtrTy = VectorTy->getPointerTo(Alloca->getAddressSpace()); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(VectorTy, BitCast); Value *Elt = SI->getValueOperand(); @@ -808,10 +778,7 @@ bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(const Function &F) { // // FIXME: We should really do something to fix the addresses to a more optimal // value instead - llvm::sort(AllocatedSizes, [](std::pair LHS, - std::pair RHS) { - return LHS.second < RHS.second; - }); + llvm::sort(AllocatedSizes, llvm::less_second()); // Check how much local memory is being used by global objects CurrentLocalMemUsage = 0; @@ -917,7 +884,7 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) { // usage order. // // FIXME: It is also possible that if we're allowed to use all of the memory - // could could end up using more than the maximum due to alignment padding. + // could end up using more than the maximum due to alignment padding. uint32_t NewSize = alignTo(CurrentLocalMemUsage, Alignment); uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp index 01d03d17ec47..ed450f59e4b3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp @@ -16,7 +16,9 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "Utils/AMDGPUMemoryUtils.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/IRBuilder.h" #include "llvm/InitializePasses.h" @@ -30,6 +32,8 @@ namespace { class AMDGPUPromoteKernelArguments : public FunctionPass { MemorySSA *MSSA; + AliasAnalysis *AA; + Instruction *ArgCastInsertPt; SmallVector Ptrs; @@ -38,16 +42,19 @@ class AMDGPUPromoteKernelArguments : public FunctionPass { bool promotePointer(Value *Ptr); + bool promoteLoad(LoadInst *LI); + public: static char ID; AMDGPUPromoteKernelArguments() : FunctionPass(ID) {} - bool run(Function &F, MemorySSA &MSSA); + bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA); bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); } @@ -68,17 +75,10 @@ void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) { break; case Instruction::Load: { LoadInst *LD = cast(U); - PointerType *PT = dyn_cast(LD->getType()); - if (!PT || - (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS && - PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS && - PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) || - LD->getPointerOperand()->stripInBoundsOffsets() != Ptr) - break; - const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(LD); - // TODO: This load poprobably can be promoted to constant address space. - if (MSSA->isLiveOnEntryDef(MA)) + if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr && + !AMDGPU::isClobberedInFunction(LD, MSSA, AA)) Ptrs.push_back(LD); + break; } case Instruction::GetElementPtr: @@ -92,15 +92,26 @@ void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) { } bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) { - enqueueUsers(Ptr); + bool Changed = false; + + LoadInst *LI = dyn_cast(Ptr); + if (LI) + Changed |= promoteLoad(LI); + + PointerType *PT = dyn_cast(Ptr->getType()); + if (!PT) + return Changed; + + if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS || + PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || + PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) + enqueueUsers(Ptr); - PointerType *PT = cast(Ptr->getType()); if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) - return false; + return Changed; - bool IsArg = isa(Ptr); - IRBuilder<> B(IsArg ? ArgCastInsertPt - : &*std::next(cast(Ptr)->getIterator())); + IRBuilder<> B(LI ? &*std::next(cast(Ptr)->getIterator()) + : ArgCastInsertPt); // Cast pointer to global address space and back to flat and let // Infer Address Spaces pass to do all necessary rewriting. @@ -116,6 +127,14 @@ bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) { return true; } +bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) { + if (!LI->isSimple()) + return false; + + LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {})); + return true; +} + // skip allocas static BasicBlock::iterator getInsertPt(BasicBlock &BB) { BasicBlock::iterator InsPt = BB.getFirstInsertionPt(); @@ -131,7 +150,8 @@ static BasicBlock::iterator getInsertPt(BasicBlock &BB) { return InsPt; } -bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) { +bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA, + AliasAnalysis &AA) { if (skipFunction(F)) return false; @@ -141,6 +161,7 @@ bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) { ArgCastInsertPt = &*getInsertPt(*F.begin()); this->MSSA = &MSSA; + this->AA = &AA; for (Argument &Arg : F.args()) { if (Arg.use_empty()) @@ -166,11 +187,13 @@ bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) { bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) { MemorySSA &MSSA = getAnalysis().getMSSA(); - return run(F, MSSA); + AliasAnalysis &AA = getAnalysis().getAAResults(); + return run(F, MSSA, AA); } INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE, "AMDGPU Promote Kernel Arguments", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE, "AMDGPU Promote Kernel Arguments", false, false) @@ -185,7 +208,8 @@ PreservedAnalyses AMDGPUPromoteKernelArgumentsPass::run(Function &F, FunctionAnalysisManager &AM) { MemorySSA &MSSA = AM.getResult(F).getMSSA(); - if (AMDGPUPromoteKernelArguments().run(F, MSSA)) { + AliasAnalysis &AA = AM.getResult(F); + if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) { PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index de2dccef804a..0830cbd919a0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -76,10 +76,11 @@ #include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/RegisterBank.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #define GET_TARGET_REGBANK_IMPL @@ -193,9 +194,7 @@ public: } AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST) - : AMDGPUGenRegisterBankInfo(), - Subtarget(ST), - TRI(Subtarget.getRegisterInfo()), + : Subtarget(ST), TRI(Subtarget.getRegisterInfo()), TII(Subtarget.getInstrInfo()) { // HACK: Until this is fully tablegen'd. @@ -428,11 +427,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( } } -static bool memOpHasNoClobbered(const MachineMemOperand *MMO) { - const Instruction *I = dyn_cast_or_null(MMO->getValue()); - return I && I->getMetadata("amdgpu.noclobber"); -} - // FIXME: Returns uniform if there's no source value information. This is // probably wrong. static bool isScalarLoadLegal(const MachineInstr &MI) { @@ -451,7 +445,7 @@ static bool isScalarLoadLegal(const MachineInstr &MI) { // spaces. (IsConst || !MMO->isVolatile()) && // Memory must be known constant, or not written before this load. - (IsConst || MMO->isInvariant() || memOpHasNoClobbered(MMO)) && + (IsConst || MMO->isInvariant() || (MMO->getFlags() & MONoClobber)) && AMDGPUInstrInfo::isUniformMMO(MMO); } @@ -684,6 +678,62 @@ static LLT getHalfSizedType(LLT Ty) { return LLT::scalar(Ty.getScalarSizeInBits() / 2); } +// Build one or more V_READFIRSTLANE_B32 instructions to move the given vector +// source value into a scalar register. +Register AMDGPURegisterBankInfo::buildReadFirstLane(MachineIRBuilder &B, + MachineRegisterInfo &MRI, + Register Src) const { + LLT Ty = MRI.getType(Src); + const RegisterBank *Bank = getRegBank(Src, MRI, *TRI); + + if (Bank == &AMDGPU::SGPRRegBank) + return Src; + + unsigned Bits = Ty.getSizeInBits(); + assert(Bits % 32 == 0); + + if (Bank != &AMDGPU::VGPRRegBank) { + // We need to copy from AGPR to VGPR + Src = B.buildCopy(Ty, Src).getReg(0); + MRI.setRegBank(Src, AMDGPU::VGPRRegBank); + } + + LLT S32 = LLT::scalar(32); + unsigned NumParts = Bits / 32; + SmallVector SrcParts; + SmallVector DstParts; + + if (Bits == 32) { + SrcParts.push_back(Src); + } else { + auto Unmerge = B.buildUnmerge(S32, Src); + for (unsigned i = 0; i < NumParts; ++i) + SrcParts.push_back(Unmerge.getReg(i)); + } + + for (unsigned i = 0; i < NumParts; ++i) { + Register SrcPart = SrcParts[i]; + Register DstPart = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + MRI.setType(DstPart, NumParts == 1 ? Ty : S32); + + const TargetRegisterClass *Constrained = + constrainGenericRegister(SrcPart, AMDGPU::VGPR_32RegClass, MRI); + (void)Constrained; + assert(Constrained && "Failed to constrain readfirstlane src reg"); + + B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart}); + + DstParts.push_back(DstPart); + } + + if (Bits == 32) + return DstParts[0]; + + Register Dst = B.buildMerge(Ty, DstParts).getReg(0); + MRI.setRegBank(Dst, AMDGPU::SGPRRegBank); + return Dst; +} + /// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If /// any of the required SGPR operands are VGPRs, perform a waterfall loop to /// execute the instruction for each unique combination of values in all lanes @@ -716,8 +766,6 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop( MachineFunction *MF = &B.getMF(); const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass(); - const unsigned WaveAndOpc = Subtarget.isWave32() ? - AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; const unsigned MovExecOpc = Subtarget.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; const unsigned MovExecTermOpc = @@ -747,16 +795,19 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop( // To insert the loop we need to split the block. Move everything before this // point to a new block, and insert a new empty block before this instruction. MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock(); + MachineBasicBlock *BodyBB = MF->CreateMachineBasicBlock(); MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock(); MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock(); MachineFunction::iterator MBBI(MBB); ++MBBI; MF->insert(MBBI, LoopBB); + MF->insert(MBBI, BodyBB); MF->insert(MBBI, RestoreExecBB); MF->insert(MBBI, RemainderBB); - LoopBB->addSuccessor(RestoreExecBB); - LoopBB->addSuccessor(LoopBB); + LoopBB->addSuccessor(BodyBB); + BodyBB->addSuccessor(RestoreExecBB); + BodyBB->addSuccessor(LoopBB); // Move the rest of the block into a new block. RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB); @@ -768,27 +819,27 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop( B.setInsertPt(*LoopBB, LoopBB->end()); B.buildInstr(TargetOpcode::PHI) - .addDef(PhiExec) - .addReg(InitSaveExecReg) - .addMBB(&MBB) - .addReg(NewExec) - .addMBB(LoopBB); + .addDef(PhiExec) + .addReg(InitSaveExecReg) + .addMBB(&MBB) + .addReg(NewExec) + .addMBB(BodyBB); const DebugLoc &DL = B.getDL(); MachineInstr &FirstInst = *Range.begin(); - // Move the instruction into the loop. Note we moved everything after + // Move the instruction into the loop body. Note we moved everything after // Range.end() already into a new block, so Range.end() is no longer valid. - LoopBB->splice(LoopBB->end(), &MBB, Range.begin(), MBB.end()); + BodyBB->splice(BodyBB->end(), &MBB, Range.begin(), MBB.end()); // Figure out the iterator range after splicing the instructions. MachineBasicBlock::iterator NewBegin = FirstInst.getIterator(); - auto NewEnd = LoopBB->end(); + auto NewEnd = BodyBB->end(); - MachineBasicBlock::iterator I = Range.begin(); - B.setInsertPt(*LoopBB, I); + B.setMBB(*LoopBB); + LLT S1 = LLT::scalar(1); Register CondReg; assert(std::distance(NewBegin, NewEnd) == OrigRangeSize); @@ -819,164 +870,62 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop( B.setMBB(MBB); OpReg = B.buildCopy(OpTy, OpReg).getReg(0); MRI.setRegBank(OpReg, AMDGPU::VGPRRegBank); - B.setInstr(*I); + B.setMBB(*LoopBB); } - unsigned OpSize = OpTy.getSizeInBits(); + Register CurrentLaneReg = buildReadFirstLane(B, MRI, OpReg); - // Can only do a readlane of 32-bit pieces. - if (OpSize == 32) { - // Avoid extra copies in the simple case of one 32-bit register. - Register CurrentLaneOpReg - = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - MRI.setType(CurrentLaneOpReg, OpTy); - - constrainGenericRegister(OpReg, AMDGPU::VGPR_32RegClass, MRI); - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpReg) - .addReg(OpReg); - - Register NewCondReg = MRI.createVirtualRegister(WaveRC); - bool First = CondReg == AMDGPU::NoRegister; - if (First) - CondReg = NewCondReg; - - // Compare the just read M0 value to all possible Idx values. - B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64) - .addDef(NewCondReg) - .addReg(CurrentLaneOpReg) - .addReg(OpReg); - Op.setReg(CurrentLaneOpReg); - - if (!First) { - Register AndReg = MRI.createVirtualRegister(WaveRC); - - // If there are multiple operands to consider, and the conditions. - B.buildInstr(WaveAndOpc) - .addDef(AndReg) - .addReg(NewCondReg) - .addReg(CondReg); - CondReg = AndReg; - } + // Build the comparison(s). + unsigned OpSize = OpTy.getSizeInBits(); + bool Is64 = OpSize % 64 == 0; + unsigned PartSize = Is64 ? 64 : 32; + LLT PartTy = LLT::scalar(PartSize); + unsigned NumParts = OpSize / PartSize; + SmallVector OpParts; + SmallVector CurrentLaneParts; + + if (NumParts == 1) { + OpParts.push_back(OpReg); + CurrentLaneParts.push_back(CurrentLaneReg); } else { - LLT S32 = LLT::scalar(32); - SmallVector ReadlanePieces; - - // The compares can be done as 64-bit, but the extract needs to be done - // in 32-bit pieces. - - bool Is64 = OpSize % 64 == 0; - - unsigned UnmergeTySize = Is64 ? 64 : 32; - unsigned CmpOp = - Is64 ? AMDGPU::V_CMP_EQ_U64_e64 : AMDGPU::V_CMP_EQ_U32_e64; - - // Insert the unmerge before the loop. - - B.setMBB(MBB); - unsigned NumPieces = OpSize / UnmergeTySize; - SmallVector UnmergePieces; - if (NumPieces == 1) { - UnmergePieces.push_back(OpReg); - } else { - LLT UnmergeTy = LLT::scalar(UnmergeTySize); - MachineInstrBuilder Unmerge = B.buildUnmerge(UnmergeTy, OpReg); - for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) - UnmergePieces.push_back(Unmerge.getReg(PieceIdx)); + auto UnmergeOp = B.buildUnmerge(PartTy, OpReg); + auto UnmergeCurrentLane = B.buildUnmerge(PartTy, CurrentLaneReg); + for (unsigned i = 0; i < NumParts; ++i) { + OpParts.push_back(UnmergeOp.getReg(i)); + CurrentLaneParts.push_back(UnmergeCurrentLane.getReg(i)); + MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank); + MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank); } - B.setInstr(*I); - - for (Register UnmergePiece : UnmergePieces) { - Register CurrentLaneOpReg; - if (Is64) { - Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32); - Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32); - - MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass); - MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass); - MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass); - - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpRegLo) - .addReg(UnmergePiece, 0, AMDGPU::sub0); - - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpRegHi) - .addReg(UnmergePiece, 0, AMDGPU::sub1); - - CurrentLaneOpReg = - B.buildMerge(LLT::scalar(64), - {CurrentLaneOpRegLo, CurrentLaneOpRegHi}) - .getReg(0); - - MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass); - - if (OpTy.getScalarSizeInBits() == 64) { - // If we need to produce a 64-bit element vector, so use the - // merged pieces - ReadlanePieces.push_back(CurrentLaneOpReg); - } else { - // 32-bit element type. - ReadlanePieces.push_back(CurrentLaneOpRegLo); - ReadlanePieces.push_back(CurrentLaneOpRegHi); - } - } else { - CurrentLaneOpReg = MRI.createGenericVirtualRegister(S32); - MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass); - MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass); - - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpReg) - .addReg(UnmergePiece); - ReadlanePieces.push_back(CurrentLaneOpReg); - } + } - Register NewCondReg = MRI.createVirtualRegister(WaveRC); - bool First = CondReg == AMDGPU::NoRegister; - if (First) - CondReg = NewCondReg; - - B.buildInstr(CmpOp) - .addDef(NewCondReg) - .addReg(CurrentLaneOpReg) - .addReg(UnmergePiece); - - if (!First) { - Register AndReg = MRI.createVirtualRegister(WaveRC); - - // If there are multiple operands to consider, and the conditions. - B.buildInstr(WaveAndOpc) - .addDef(AndReg) - .addReg(NewCondReg) - .addReg(CondReg); - CondReg = AndReg; - } - } + for (unsigned i = 0; i < NumParts; ++i) { + auto CmpReg = B.buildICmp(CmpInst::ICMP_EQ, S1, CurrentLaneParts[i], + OpParts[i]).getReg(0); + MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank); - // FIXME: Build merge seems to switch to CONCAT_VECTORS but not - // BUILD_VECTOR - if (OpTy.isVector()) { - auto Merge = B.buildBuildVector(OpTy, ReadlanePieces); - Op.setReg(Merge.getReg(0)); - MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank); - } else if (ReadlanePieces.size() > 1) { - auto Merge = B.buildMerge(OpTy, ReadlanePieces); - Op.setReg(Merge.getReg(0)); - MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank); + if (!CondReg) { + CondReg = CmpReg; } else { - Op.setReg(ReadlanePieces[0]); + CondReg = B.buildAnd(S1, CondReg, CmpReg).getReg(0); + MRI.setRegBank(CondReg, AMDGPU::VCCRegBank); } } + Op.setReg(CurrentLaneReg); + // Make sure we don't re-process this register again. WaterfalledRegMap.insert(std::make_pair(OldReg, Op.getReg())); } } + // The ballot becomes a no-op during instruction selection. + CondReg = B.buildIntrinsic(Intrinsic::amdgcn_ballot, + {LLT::scalar(Subtarget.isWave32() ? 32 : 64)}, + false) + .addReg(CondReg) + .getReg(0); + MRI.setRegClass(CondReg, WaveRC); + // Update EXEC, save the original EXEC value to VCC. B.buildInstr(AndSaveExecOpc) .addDef(NewExec) @@ -984,7 +933,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop( MRI.setSimpleHint(NewExec, CondReg); - B.setInsertPt(*LoopBB, LoopBB->end()); + B.setInsertPt(*BodyBB, BodyBB->end()); // Update EXEC, switch all done bits to 0 and all todo bits to 1. B.buildInstr(XorTermOpc) @@ -1064,28 +1013,10 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane( if (Bank == &AMDGPU::SGPRRegBank) return; - LLT Ty = MRI.getType(Reg); MachineIRBuilder B(MI); - if (Bank != &AMDGPU::VGPRRegBank) { - // We need to copy from AGPR to VGPR - Reg = B.buildCopy(Ty, Reg).getReg(0); - MRI.setRegBank(Reg, AMDGPU::VGPRRegBank); - } - - Register SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - B.buildInstr(AMDGPU::V_READFIRSTLANE_B32) - .addDef(SGPR) - .addReg(Reg); - - MRI.setType(SGPR, Ty); - - const TargetRegisterClass *Constrained = - constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI); - (void)Constrained; - assert(Constrained && "Failed to constrain readfirstlane src reg"); - - MI.getOperand(OpIdx).setReg(SGPR); + Reg = buildReadFirstLane(B, MRI, Reg); + MI.getOperand(OpIdx).setReg(Reg); } /// Split \p Ty into 2 pieces. The first will have \p FirstSize bits, and the @@ -1624,6 +1555,157 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper, return true; } +bool AMDGPURegisterBankInfo::applyMappingMAD_64_32( + const OperandsMapper &OpdMapper) const { + MachineInstr &MI = OpdMapper.getMI(); + MachineRegisterInfo &MRI = OpdMapper.getMRI(); + + // Insert basic copies. + applyDefaultMapping(OpdMapper); + + Register Dst0 = MI.getOperand(0).getReg(); + Register Dst1 = MI.getOperand(1).getReg(); + Register Src0 = MI.getOperand(2).getReg(); + Register Src1 = MI.getOperand(3).getReg(); + Register Src2 = MI.getOperand(4).getReg(); + + if (MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank) + return true; + + bool IsUnsigned = MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32; + LLT S1 = LLT::scalar(1); + LLT S32 = LLT::scalar(32); + + bool DstOnValu = MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank; + bool Accumulate = true; + + if (!DstOnValu) { + if (mi_match(Src2, MRI, m_ZeroInt())) + Accumulate = false; + } + + // Keep the multiplication on the SALU. + MachineIRBuilder B(MI); + + Register DstHi; + Register DstLo = B.buildMul(S32, Src0, Src1).getReg(0); + bool MulHiInVgpr = false; + + MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank); + + if (Subtarget.hasSMulHi()) { + DstHi = IsUnsigned ? B.buildUMulH(S32, Src0, Src1).getReg(0) + : B.buildSMulH(S32, Src0, Src1).getReg(0); + MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank); + } else { + Register VSrc0 = B.buildCopy(S32, Src0).getReg(0); + Register VSrc1 = B.buildCopy(S32, Src1).getReg(0); + + MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank); + MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank); + + DstHi = IsUnsigned ? B.buildUMulH(S32, VSrc0, VSrc1).getReg(0) + : B.buildSMulH(S32, VSrc0, VSrc1).getReg(0); + MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank); + + if (!DstOnValu) { + DstHi = buildReadFirstLane(B, MRI, DstHi); + } else { + MulHiInVgpr = true; + } + } + + // Accumulate and produce the "carry-out" bit. + // + // The "carry-out" is defined as bit 64 of the result when computed as a + // big integer. For unsigned multiply-add, this matches the usual definition + // of carry-out. For signed multiply-add, bit 64 is the sign bit of the + // result, which is determined as: + // sign(Src0 * Src1) + sign(Src2) + carry-out from unsigned 64-bit add + LLT CarryType = DstOnValu ? S1 : S32; + const RegisterBank &CarryBank = + DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank; + const RegisterBank &DstBank = + DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank; + Register Carry; + Register Zero; + + if (!IsUnsigned) { + Zero = B.buildConstant(S32, 0).getReg(0); + MRI.setRegBank(Zero, + MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank); + + Carry = B.buildICmp(CmpInst::ICMP_SLT, MulHiInVgpr ? S1 : S32, DstHi, Zero) + .getReg(0); + MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank + : AMDGPU::SGPRRegBank); + + if (DstOnValu && !MulHiInVgpr) { + Carry = B.buildTrunc(S1, Carry).getReg(0); + MRI.setRegBank(Carry, AMDGPU::VCCRegBank); + } + } + + if (Accumulate) { + if (DstOnValu) { + DstLo = B.buildCopy(S32, DstLo).getReg(0); + DstHi = B.buildCopy(S32, DstHi).getReg(0); + MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank); + MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank); + } + + auto Unmerge = B.buildUnmerge(S32, Src2); + Register Src2Lo = Unmerge.getReg(0); + Register Src2Hi = Unmerge.getReg(1); + MRI.setRegBank(Src2Lo, DstBank); + MRI.setRegBank(Src2Hi, DstBank); + + if (!IsUnsigned) { + auto Src2Sign = B.buildICmp(CmpInst::ICMP_SLT, CarryType, Src2Hi, Zero); + MRI.setRegBank(Src2Sign.getReg(0), CarryBank); + + Carry = B.buildXor(CarryType, Carry, Src2Sign).getReg(0); + MRI.setRegBank(Carry, CarryBank); + } + + auto AddLo = B.buildUAddo(S32, CarryType, DstLo, Src2Lo); + DstLo = AddLo.getReg(0); + Register CarryLo = AddLo.getReg(1); + MRI.setRegBank(DstLo, DstBank); + MRI.setRegBank(CarryLo, CarryBank); + + auto AddHi = B.buildUAdde(S32, CarryType, DstHi, Src2Hi, CarryLo); + DstHi = AddHi.getReg(0); + MRI.setRegBank(DstHi, DstBank); + + Register CarryHi = AddHi.getReg(1); + MRI.setRegBank(CarryHi, CarryBank); + + if (IsUnsigned) { + Carry = CarryHi; + } else { + Carry = B.buildXor(CarryType, Carry, CarryHi).getReg(0); + MRI.setRegBank(Carry, CarryBank); + } + } else { + if (IsUnsigned) { + Carry = B.buildConstant(CarryType, 0).getReg(0); + MRI.setRegBank(Carry, CarryBank); + } + } + + B.buildMerge(Dst0, {DstLo, DstHi}); + + if (DstOnValu) { + B.buildCopy(Dst1, Carry); + } else { + B.buildTrunc(Dst1, Carry); + } + + MI.eraseFromParent(); + return true; +} + // Return a suitable opcode for extending the operands of Opc when widening. static unsigned getExtendOp(unsigned Opc) { switch (Opc) { @@ -1794,7 +1876,7 @@ bool AMDGPURegisterBankInfo::buildVCopy(MachineIRBuilder &B, Register DstReg, } /// Utility function for pushing dynamic vector indexes with a constant offset -/// into waterwall loops. +/// into waterfall loops. static void reinsertVectorIndexAdd(MachineIRBuilder &B, MachineInstr &IdxUseInstr, unsigned OpIdx, @@ -1857,7 +1939,7 @@ bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect( unsigned NumElem = VecTy.getNumElements(); if (!SITargetLowering::shouldExpandVectorDynExt(EltSize, NumElem, - IsDivergentIdx)) + IsDivergentIdx, &Subtarget)) return false; MachineIRBuilder B(MI); @@ -1955,7 +2037,7 @@ bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect( unsigned NumElem = VecTy.getNumElements(); if (!SITargetLowering::shouldExpandVectorDynExt(EltSize, NumElem, - IsDivergentIdx)) + IsDivergentIdx, &Subtarget)) return false; MachineIRBuilder B(MI); @@ -2926,7 +3008,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case Intrinsic::amdgcn_interp_p2: case Intrinsic::amdgcn_interp_mov: case Intrinsic::amdgcn_interp_p1_f16: - case Intrinsic::amdgcn_interp_p2_f16: { + case Intrinsic::amdgcn_interp_p2_f16: + case Intrinsic::amdgcn_lds_param_load: { applyDefaultMapping(OpdMapper); // Readlane for m0 value, which is always the last operand. @@ -2934,6 +3017,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl( constrainOpWithReadfirstlane(MI, MRI, MI.getNumOperands() - 1); // Index return; } + case Intrinsic::amdgcn_interp_inreg_p10: + case Intrinsic::amdgcn_interp_inreg_p2: + case Intrinsic::amdgcn_interp_inreg_p10_f16: + case Intrinsic::amdgcn_interp_inreg_p2_f16: + applyDefaultMapping(OpdMapper); + return; case Intrinsic::amdgcn_permlane16: case Intrinsic::amdgcn_permlanex16: { // Doing a waterfall loop over these wouldn't make any sense. @@ -3015,6 +3104,35 @@ void AMDGPURegisterBankInfo::applyMappingImpl( constrainOpWithReadfirstlane(MI, MRI, 2); return; } + case Intrinsic::amdgcn_raw_buffer_load_lds: { + applyDefaultMapping(OpdMapper); + constrainOpWithReadfirstlane(MI, MRI, 1); // rsrc + constrainOpWithReadfirstlane(MI, MRI, 2); // M0 + constrainOpWithReadfirstlane(MI, MRI, 5); // soffset + return; + } + case Intrinsic::amdgcn_struct_buffer_load_lds: { + applyDefaultMapping(OpdMapper); + constrainOpWithReadfirstlane(MI, MRI, 1); // rsrc + constrainOpWithReadfirstlane(MI, MRI, 2); // M0 + constrainOpWithReadfirstlane(MI, MRI, 6); // soffset + return; + } + case Intrinsic::amdgcn_global_load_lds: { + applyDefaultMapping(OpdMapper); + constrainOpWithReadfirstlane(MI, MRI, 2); + return; + } + case Intrinsic::amdgcn_lds_direct_load: { + applyDefaultMapping(OpdMapper); + // Readlane for m0 value, which is always the last operand. + constrainOpWithReadfirstlane(MI, MRI, MI.getNumOperands() - 1); // Index + return; + } + case Intrinsic::amdgcn_exp_row: + applyDefaultMapping(OpdMapper); + constrainOpWithReadfirstlane(MI, MRI, 8); // M0 + return; default: { if (const AMDGPU::RsrcIntrinsic *RSrcIntrin = AMDGPU::lookupRsrcIntrinsic(IntrID)) { @@ -3143,6 +3261,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case AMDGPU::G_UBFX: applyMappingBFE(OpdMapper, /*Signed*/ false); return; + case AMDGPU::G_AMDGPU_MAD_U64_U32: + case AMDGPU::G_AMDGPU_MAD_I64_I32: + applyMappingMAD_64_32(OpdMapper); + return; default: break; } @@ -3668,6 +3790,48 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { return getDefaultMappingSOP(MI); return getDefaultMappingVOP(MI); } + case AMDGPU::G_AMDGPU_MAD_U64_U32: + case AMDGPU::G_AMDGPU_MAD_I64_I32: { + // Three possible mappings: + // + // - Default SOP + // - Default VOP + // - Scalar multiply: src0 and src1 are SGPRs, the rest is VOP. + // + // This allows instruction selection to keep the multiplication part of the + // instruction on the SALU. + bool AllSalu = true; + bool MulSalu = true; + for (unsigned i = 0; i < 5; ++i) { + Register Reg = MI.getOperand(i).getReg(); + if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) { + if (Bank->getID() != AMDGPU::SGPRRegBankID) { + AllSalu = false; + if (i == 2 || i == 3) { + MulSalu = false; + break; + } + } + } + } + + if (AllSalu) + return getDefaultMappingSOP(MI); + + // If the multiply-add is full-rate in VALU, use that even if the + // multiplication part is scalar. Accumulating separately on the VALU would + // take two instructions. + if (!MulSalu || Subtarget.hasFullRate64Ops()) + return getDefaultMappingVOP(MI); + + // Keep the multiplication on the SALU, then accumulate on the VALU. + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64); + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); + OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); + OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); + OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64); + break; + } case AMDGPU::G_IMPLICIT_DEF: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); @@ -3828,10 +3992,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } case AMDGPU::G_FCMP: { unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); - unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); OpdsMapping[1] = nullptr; // Predicate Operand. - OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size); + OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); break; } @@ -4102,6 +4265,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_udot4: case Intrinsic::amdgcn_sdot8: case Intrinsic::amdgcn_udot8: + case Intrinsic::amdgcn_fdot2_bf16_bf16: + case Intrinsic::amdgcn_fdot2_f16_f16: + case Intrinsic::amdgcn_fdot2_f32_bf16: + case Intrinsic::amdgcn_sudot4: + case Intrinsic::amdgcn_sudot8: + case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16: + case Intrinsic::amdgcn_wmma_f16_16x16x16_f16: + case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16: + case Intrinsic::amdgcn_wmma_f32_16x16x16_f16: + case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4: + case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8: return getDefaultMappingVOP(MI); case Intrinsic::amdgcn_sbfe: case Intrinsic::amdgcn_ubfe: @@ -4120,6 +4294,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_wqm: case Intrinsic::amdgcn_softwqm: case Intrinsic::amdgcn_set_inactive: + case Intrinsic::amdgcn_permlane64: return getDefaultMappingAllVGPR(MI); case Intrinsic::amdgcn_kernarg_segment_ptr: case Intrinsic::amdgcn_s_getpc: @@ -4247,24 +4422,50 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k: case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k: case Intrinsic::amdgcn_mfma_f64_16x16x4f64: - case Intrinsic::amdgcn_mfma_f64_4x4x4f64: { + case Intrinsic::amdgcn_mfma_f64_4x4x4f64: + case Intrinsic::amdgcn_mfma_i32_16x16x32_i8: + case Intrinsic::amdgcn_mfma_i32_32x32x16_i8: + case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32: + case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32: { // Default for MAI intrinsics. // srcC can also be an immediate which can be folded later. // FIXME: Should we eventually add an alternative mapping with AGPR src // for srcA/srcB? // // vdst, srcA, srcB, srcC + const SIMachineFunctionInfo *Info = MF.getInfo(); + OpdsMapping[0] = + Info->mayNeedAGPRs() + ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) + : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = + Info->mayNeedAGPRs() + ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) + : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16: + case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16: + case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16: + case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16: + case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8: + case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: { + // vdst, srcA, srcB, srcC, idx OpdsMapping[0] = getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[4] = getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + OpdsMapping[5] = getVGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); break; } case Intrinsic::amdgcn_interp_p1: case Intrinsic::amdgcn_interp_p2: case Intrinsic::amdgcn_interp_mov: case Intrinsic::amdgcn_interp_p1_f16: - case Intrinsic::amdgcn_interp_p2_f16: { + case Intrinsic::amdgcn_interp_p2_f16: + case Intrinsic::amdgcn_lds_param_load: { const int M0Idx = MI.getNumOperands() - 1; Register M0Reg = MI.getOperand(M0Idx).getReg(); unsigned M0Bank = getRegBankID(M0Reg, MRI, AMDGPU::SGPRRegBankID); @@ -4279,6 +4480,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32); break; } + case Intrinsic::amdgcn_interp_inreg_p10: + case Intrinsic::amdgcn_interp_inreg_p2: + case Intrinsic::amdgcn_interp_inreg_p10_f16: + case Intrinsic::amdgcn_interp_inreg_p2_f16: { + unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize); + OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); + OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); + OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); + break; + } case Intrinsic::amdgcn_ballot: { unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); @@ -4314,8 +4526,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); } else { // NSA form - for (unsigned I = 2; I < N; ++I) - OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); + for (unsigned I = 2; I < N; ++I) { + unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits(); + OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); + } } break; } @@ -4325,7 +4539,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_s_getreg: case Intrinsic::amdgcn_s_memtime: case Intrinsic::amdgcn_s_memrealtime: - case Intrinsic::amdgcn_s_get_waveid_in_workgroup: { + case Intrinsic::amdgcn_s_get_waveid_in_workgroup: + case Intrinsic::amdgcn_s_sendmsg_rtn: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; @@ -4337,6 +4552,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_flat_atomic_fadd: case Intrinsic::amdgcn_flat_atomic_fmin: case Intrinsic::amdgcn_flat_atomic_fmax: + case Intrinsic::amdgcn_global_atomic_fadd_v2bf16: + case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: return getDefaultMappingAllVGPR(MI); case Intrinsic::amdgcn_ds_ordered_add: case Intrinsic::amdgcn_ds_ordered_swap: { @@ -4366,6 +4583,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); break; + case Intrinsic::amdgcn_exp_row: + OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); + OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); + OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); + OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); + OpdsMapping[8] = getSGPROpMapping(MI.getOperand(8).getReg(), MRI, *TRI); + break; case Intrinsic::amdgcn_s_sendmsg: case Intrinsic::amdgcn_s_sendmsghalt: { // This must be an SGPR, but accept a VGPR. @@ -4412,6 +4636,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); break; } + case Intrinsic::amdgcn_raw_buffer_load_lds: { + OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); + break; + } case Intrinsic::amdgcn_raw_buffer_store: case Intrinsic::amdgcn_raw_buffer_store_format: case Intrinsic::amdgcn_raw_tbuffer_store: { @@ -4430,6 +4661,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); break; } + case Intrinsic::amdgcn_struct_buffer_load_lds: { + OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + OpdsMapping[5] = getVGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); + OpdsMapping[6] = getSGPROpMapping(MI.getOperand(6).getReg(), MRI, *TRI); + break; + } case Intrinsic::amdgcn_struct_buffer_store: case Intrinsic::amdgcn_struct_tbuffer_store: { OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); @@ -4464,6 +4703,31 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32); break; } + case Intrinsic::amdgcn_global_load_lds: { + OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_lds_direct_load: { + const int M0Idx = MI.getNumOperands() - 1; + Register M0Reg = MI.getOperand(M0Idx).getReg(); + unsigned M0Bank = getRegBankID(M0Reg, MRI, AMDGPU::SGPRRegBankID); + unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize); + for (int I = 2; I != M0Idx && MI.getOperand(I).isReg(); ++I) + OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); + + // Must be SGPR, but we must take whatever the original bank is and fix it + // later. + OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32); + break; + } + case Intrinsic::amdgcn_ds_add_gs_reg_rtn: + case Intrinsic::amdgcn_ds_sub_gs_reg_rtn: + OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + break; default: return getInvalidInstructionMapping(); } @@ -4568,6 +4832,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1); break; } + case AMDGPU::G_FPTRUNC_ROUND_UPWARD: + case AMDGPU::G_FPTRUNC_ROUND_DOWNWARD: + return getDefaultMappingVOP(MI); } return getInstructionMapping(/*ID*/1, /*Cost*/1, diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index 2b9d0923ab49..c9741c2202e6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -16,7 +16,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/Register.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #define GET_REGBANK_DECLARATIONS #include "AMDGPUGenRegisterBank.inc" @@ -59,6 +59,9 @@ public: SmallSet &SGPROperandRegs, MachineRegisterInfo &MRI) const; + Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI, + Register Src) const; + bool executeInWaterfallLoop(MachineIRBuilder &B, MachineInstr &MI, MachineRegisterInfo &MRI, @@ -83,6 +86,8 @@ public: bool applyMappingBFE(const OperandsMapper &OpdMapper, bool Signed) const; + bool applyMappingMAD_64_32(const OperandsMapper &OpdMapper) const; + Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp new file mode 100644 index 000000000000..a86871a4a653 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp @@ -0,0 +1,140 @@ +//===- AMDGPUReleaseVGPRs.cpp - Automatically release vgprs on GFX11+ -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Insert S_SENDMSG instructions to release vgprs on GFX11+. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineOperand.h" +using namespace llvm; + +#define DEBUG_TYPE "release-vgprs" + +namespace { + +class AMDGPUReleaseVGPRs : public MachineFunctionPass { +public: + static char ID; + + const SIInstrInfo *SII; + const SIRegisterInfo *TRI; + + AMDGPUReleaseVGPRs() : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + // Used to cache the result of isLastInstructionVMEMStore for each block + using BlockVMEMStoreType = DenseMap; + BlockVMEMStoreType BlockVMEMStore; + + // Return true if the last instruction referencing a vgpr in this MBB + // is a VMEM store, otherwise return false. + // Visit previous basic blocks to find this last instruction if needed. + // Because this pass is late in the pipeline, it is expected that the + // last vgpr use will likely be one of vmem store, ds, exp. + // Loads and others vgpr operations would have been + // deleted by this point, except for complex control flow involving loops. + // This is why we are just testing the type of instructions rather + // than the operands. + bool isLastVGPRUseVMEMStore(MachineBasicBlock &MBB) { + // Use the cache to break infinite loop and save some time. Initialize to + // false in case we have a cycle. + BlockVMEMStoreType::iterator It; + bool Inserted; + std::tie(It, Inserted) = BlockVMEMStore.insert({&MBB, false}); + bool &CacheEntry = It->second; + if (!Inserted) + return CacheEntry; + + for (auto &MI : reverse(MBB.instrs())) { + // If it's a VMEM store, a vgpr will be used, return true. + if ((SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI)) && MI.mayStore()) + return CacheEntry = true; + + // If it's referencing a VGPR but is not a VMEM store, return false. + if (SIInstrInfo::isDS(MI) || SIInstrInfo::isEXP(MI) || + SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI) || + SIInstrInfo::isVALU(MI)) + return CacheEntry = false; + } + + // Recursive call into parent blocks. Look into predecessors if there is no + // vgpr used in this block. + return CacheEntry = llvm::any_of(MBB.predecessors(), + [this](MachineBasicBlock *Parent) { + return isLastVGPRUseVMEMStore(*Parent); + }); + } + + bool runOnMachineBasicBlock(MachineBasicBlock &MBB) { + + bool Changed = false; + + for (auto &MI : MBB.terminators()) { + // Look for S_ENDPGM instructions + if (MI.getOpcode() == AMDGPU::S_ENDPGM || + MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) { + // If the last instruction using a VGPR in the block is a VMEM store, + // release VGPRs. The VGPRs release will be placed just before ending + // the program + if (isLastVGPRUseVMEMStore(MBB)) { + BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_SENDMSG)) + .addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus); + Changed = true; + } + } + } + + return Changed; + } + + bool runOnMachineFunction(MachineFunction &MF) override { + Function &F = MF.getFunction(); + if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) + return false; + + // This pass only runs on GFX11+ + const GCNSubtarget &ST = MF.getSubtarget(); + if (ST.getGeneration() < AMDGPUSubtarget::GFX11) + return false; + + LLVM_DEBUG(dbgs() << "AMDGPUReleaseVGPRs running on " << MF.getName() + << "\n"); + + SII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + + bool Changed = false; + for (auto &MBB : MF) { + Changed |= runOnMachineBasicBlock(MBB); + } + + BlockVMEMStore.clear(); + + return Changed; + } +}; + +} // namespace + +char AMDGPUReleaseVGPRs::ID = 0; + +char &llvm::AMDGPUReleaseVGPRsID = AMDGPUReleaseVGPRs::ID; + +INITIALIZE_PASS(AMDGPUReleaseVGPRs, DEBUG_TYPE, "Release VGPRs", false, false) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp index 2475b44b42a3..4d7a3f4028e8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp @@ -83,7 +83,7 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" #include "Utils/AMDGPUBaseInfo.h" -#include "Utils/AMDGPULDSUtils.h" +#include "Utils/AMDGPUMemoryUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" @@ -442,7 +442,7 @@ class CollectReachableCallees { continue; for (const auto &GI : *CGN) { - auto *RCB = cast(GI.first.getValue()); + auto *RCB = cast(*GI.first); auto *RCGN = GI.second; if (auto *DCallee = RCGN->getFunction()) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp index cb511e5e3483..f7f93c75c870 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -27,7 +27,9 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" @@ -87,9 +89,7 @@ int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs( int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs( const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const { - if (ST.hasGFX90AInsts() && ArgNumAGPR) - return alignTo(ArgNumVGPR, 4) + ArgNumAGPR; - return std::max(ArgNumVGPR, ArgNumAGPR); + return AMDGPU::getTotalNumVGPRs(ST.hasGFX90AInsts(), ArgNumAGPR, ArgNumVGPR); } int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs( @@ -97,28 +97,31 @@ int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs( return getTotalNumVGPRs(ST, NumAGPR, NumVGPR); } -bool AMDGPUResourceUsageAnalysis::runOnSCC(CallGraphSCC &SCC) { +bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) { auto *TPC = getAnalysisIfAvailable(); if (!TPC) return false; + MachineModuleInfo &MMI = getAnalysis().getMMI(); const TargetMachine &TM = TPC->getTM(); bool HasIndirectCall = false; - for (CallGraphNode *I : SCC) { - Function *F = I->getFunction(); + CallGraph CG = CallGraph(M); + auto End = po_end(&CG); + + for (auto IT = po_begin(&CG); IT != End; ++IT) { + Function *F = IT->getFunction(); if (!F || F->isDeclaration()) continue; - MachineModuleInfo &MMI = - getAnalysis().getMMI(); - MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); + MachineFunction *MF = MMI.getMachineFunction(*F); + assert(MF && "function must have been generated already"); auto CI = CallGraphResourceInfo.insert( - std::make_pair(&MF.getFunction(), SIFunctionResourceInfo())); + std::make_pair(F, SIFunctionResourceInfo())); SIFunctionResourceInfo &Info = CI.first->second; assert(CI.second && "should only be called once per function"); - Info = analyzeResourceUsage(MF, TM); + Info = analyzeResourceUsage(*MF, TM); HasIndirectCall |= Info.HasIndirectCall; } @@ -246,6 +249,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage( case AMDGPU::SRC_PRIVATE_BASE: case AMDGPU::SRC_PRIVATE_LIMIT: case AMDGPU::SGPR_NULL: + case AMDGPU::SGPR_NULL64: case AMDGPU::MODE: continue; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h index b0a2d3bffc62..df0789e471c1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h @@ -24,7 +24,7 @@ class GCNSubtarget; class MachineFunction; class TargetMachine; -struct AMDGPUResourceUsageAnalysis : public CallGraphSCCPass { +struct AMDGPUResourceUsageAnalysis : public ModulePass { static char ID; public: @@ -50,15 +50,15 @@ public: int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const; }; - AMDGPUResourceUsageAnalysis() : CallGraphSCCPass(ID) {} + AMDGPUResourceUsageAnalysis() : ModulePass(ID) {} - bool runOnSCC(CallGraphSCC &SCC) override; - - bool doInitialization(CallGraph &CG) override { + bool doInitialization(Module &M) override { CallGraphResourceInfo.clear(); - return CallGraphSCCPass::doInitialization(CG); + return ModulePass::doInitialization(M); } + bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.setPreservesAll(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp index 1c6c63dd5b25..4f8a61a77097 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp @@ -83,12 +83,8 @@ private: const DataLayout *DL = nullptr; MemoryDependenceResults *MDA = nullptr; - bool checkArgumentUses(Value &Arg) const; - bool isOutArgumentCandidate(Argument &Arg) const; - -#ifndef NDEBUG - bool isVec3ToVec4Shuffle(Type *Ty0, Type* Ty1) const; -#endif + Type *getStoredType(Value &Arg) const; + Type *getOutArgumentType(Argument &Arg) const; public: static char ID; @@ -114,72 +110,61 @@ INITIALIZE_PASS_END(AMDGPURewriteOutArguments, DEBUG_TYPE, char AMDGPURewriteOutArguments::ID = 0; -bool AMDGPURewriteOutArguments::checkArgumentUses(Value &Arg) const { +Type *AMDGPURewriteOutArguments::getStoredType(Value &Arg) const { const int MaxUses = 10; int UseCount = 0; - for (Use &U : Arg.uses()) { - StoreInst *SI = dyn_cast(U.getUser()); - if (UseCount > MaxUses) - return false; + SmallVector Worklist; + for (Use &U : Arg.uses()) + Worklist.push_back(&U); - if (!SI) { - auto *BCI = dyn_cast(U.getUser()); - if (!BCI || !BCI->hasOneUse()) - return false; - - // We don't handle multiple stores currently, so stores to aggregate - // pointers aren't worth the trouble since they are canonically split up. - Type *DestEltTy = BCI->getType()->getPointerElementType(); - if (DestEltTy->isAggregateType()) - return false; - - // We could handle these if we had a convenient way to bitcast between - // them. - Type *SrcEltTy = Arg.getType()->getPointerElementType(); - if (SrcEltTy->isArrayTy()) - return false; - - // Special case handle structs with single members. It is useful to handle - // some casts between structs and non-structs, but we can't bitcast - // directly between them. Blender uses some casts that look like - // { <3 x float> }* to <4 x float>* - if ((SrcEltTy->isStructTy() && (SrcEltTy->getStructNumElements() != 1))) - return false; - - // Clang emits OpenCL 3-vector type accesses with a bitcast to the - // equivalent 4-element vector and accesses that, and we're looking for - // this pointer cast. - if (DL->getTypeAllocSize(SrcEltTy) != DL->getTypeAllocSize(DestEltTy)) - return false; - - return checkArgumentUses(*BCI); + Type *StoredType = nullptr; + while (!Worklist.empty()) { + Use *U = Worklist.pop_back_val(); + + if (auto *BCI = dyn_cast(U->getUser())) { + for (Use &U : BCI->uses()) + Worklist.push_back(&U); + continue; } - if (!SI->isSimple() || - U.getOperandNo() != StoreInst::getPointerOperandIndex()) - return false; + if (auto *SI = dyn_cast(U->getUser())) { + if (UseCount++ > MaxUses) + return nullptr; + + if (!SI->isSimple() || + U->getOperandNo() != StoreInst::getPointerOperandIndex()) + return nullptr; - ++UseCount; + if (StoredType && StoredType != SI->getValueOperand()->getType()) + return nullptr; // More than one type. + StoredType = SI->getValueOperand()->getType(); + continue; + } + + // Unsupported user. + return nullptr; } - // Skip unused arguments. - return UseCount > 0; + return StoredType; } -bool AMDGPURewriteOutArguments::isOutArgumentCandidate(Argument &Arg) const { +Type *AMDGPURewriteOutArguments::getOutArgumentType(Argument &Arg) const { const unsigned MaxOutArgSizeBytes = 4 * MaxNumRetRegs; PointerType *ArgTy = dyn_cast(Arg.getType()); // TODO: It might be useful for any out arguments, not just privates. if (!ArgTy || (ArgTy->getAddressSpace() != DL->getAllocaAddrSpace() && !AnyAddressSpace) || - Arg.hasByValAttr() || Arg.hasStructRetAttr() || - DL->getTypeStoreSize(ArgTy->getPointerElementType()) > MaxOutArgSizeBytes) { - return false; + Arg.hasByValAttr() || Arg.hasStructRetAttr()) { + return nullptr; } - return checkArgumentUses(Arg); + Type *StoredType = getStoredType(Arg); + if (!StoredType || DL->getTypeStoreSize(StoredType) > MaxOutArgSizeBytes) + return nullptr; + + return StoredType; } bool AMDGPURewriteOutArguments::doInitialization(Module &M) { @@ -187,22 +172,6 @@ bool AMDGPURewriteOutArguments::doInitialization(Module &M) { return false; } -#ifndef NDEBUG -bool AMDGPURewriteOutArguments::isVec3ToVec4Shuffle(Type *Ty0, Type* Ty1) const { - auto *VT0 = dyn_cast(Ty0); - auto *VT1 = dyn_cast(Ty1); - if (!VT0 || !VT1) - return false; - - if (VT0->getNumElements() != 3 || - VT1->getNumElements() != 4) - return false; - - return DL->getTypeSizeInBits(VT0->getElementType()) == - DL->getTypeSizeInBits(VT1->getElementType()); -} -#endif - bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { if (skipFunction(F)) return false; @@ -215,7 +184,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { MDA = &getAnalysis().getMemDep(); unsigned ReturnNumRegs = 0; - SmallSet OutArgIndexes; + SmallDenseMap OutArgIndexes; SmallVector ReturnTypes; Type *RetTy = F.getReturnType(); if (!RetTy->isVoidTy()) { @@ -227,12 +196,12 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { ReturnTypes.push_back(RetTy); } - SmallVector OutArgs; + SmallVector, 4> OutArgs; for (Argument &Arg : F.args()) { - if (isOutArgumentCandidate(Arg)) { + if (Type *Ty = getOutArgumentType(Arg)) { LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg << " in function " << F.getName() << '\n'); - OutArgs.push_back(&Arg); + OutArgs.push_back({&Arg, Ty}); } } @@ -264,11 +233,12 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { // first. On the second iteration we've removed that out clobbering argument // (by effectively moving it into another function) and will find the second // argument is OK to move. - for (Argument *OutArg : OutArgs) { + for (const auto &Pair : OutArgs) { bool ThisReplaceable = true; SmallVector, 4> ReplaceableStores; - Type *ArgTy = OutArg->getType()->getPointerElementType(); + Argument *OutArg = Pair.first; + Type *ArgTy = Pair.second; // Skip this argument if converting it will push us over the register // count to return limit. @@ -324,7 +294,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { if (ThisReplaceable) { ReturnTypes.push_back(ArgTy); - OutArgIndexes.insert(OutArg->getArgNo()); + OutArgIndexes.insert({OutArg->getArgNo(), ArgTy}); ++NumOutArgumentsReplaced; Changing = true; } @@ -376,32 +346,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { if (RetVal) NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++); - for (std::pair ReturnPoint : Replacement.second) { - Argument *Arg = ReturnPoint.first; - Value *Val = ReturnPoint.second; - Type *EltTy = Arg->getType()->getPointerElementType(); - if (Val->getType() != EltTy) { - Type *EffectiveEltTy = EltTy; - if (StructType *CT = dyn_cast(EltTy)) { - assert(CT->getNumElements() == 1); - EffectiveEltTy = CT->getElementType(0); - } - - if (DL->getTypeSizeInBits(EffectiveEltTy) != - DL->getTypeSizeInBits(Val->getType())) { - assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType())); - Val = B.CreateShuffleVector(Val, ArrayRef{0, 1, 2}); - } - - Val = B.CreateBitCast(Val, EffectiveEltTy); - - // Re-create single element composite. - if (EltTy != EffectiveEltTy) - Val = B.CreateInsertValue(UndefValue::get(EltTy), Val, 0); - } - - NewRetVal = B.CreateInsertValue(NewRetVal, Val, RetIdx++); - } + for (std::pair ReturnPoint : Replacement.second) + NewRetVal = B.CreateInsertValue(NewRetVal, ReturnPoint.second, RetIdx++); if (RetVal) RI->setOperand(0, NewRetVal); @@ -433,7 +379,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { PointerType *ArgType = cast(Arg.getType()); - auto *EltTy = ArgType->getPointerElementType(); + Type *EltTy = OutArgIndexes[Arg.getArgNo()]; const auto Align = DL->getValueOrABITypeAlignment(Arg.getParamAlign(), EltTy); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index afe016731395..8297635d7bb2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -39,7 +39,8 @@ class GcnBufferFormatBase f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bi } class Gfx9BufferFormat f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase; -class Gfx10PlusBufferFormat f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase; +class Gfx10BufferFormat f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase; +class Gfx11PlusBufferFormat f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase; class GcnBufferFormatTable : GenericTable { let CppTypeName = "GcnBufferFormatInfo"; @@ -51,17 +52,25 @@ def Gfx9BufferFormat : GcnBufferFormatTable { let FilterClass = "Gfx9BufferFormat"; let PrimaryKeyName = "getGfx9BufferFormatInfo"; } -def Gfx10PlusBufferFormat : GcnBufferFormatTable { - let FilterClass = "Gfx10PlusBufferFormat"; - let PrimaryKeyName = "getGfx10PlusBufferFormatInfo"; +def Gfx10BufferFormat : GcnBufferFormatTable { + let FilterClass = "Gfx10BufferFormat"; + let PrimaryKeyName = "getGfx10BufferFormatInfo"; +} +def Gfx11PlusBufferFormat : GcnBufferFormatTable { + let FilterClass = "Gfx11PlusBufferFormat"; + let PrimaryKeyName = "getGfx11PlusBufferFormatInfo"; } def getGfx9BufferFormatInfo : SearchIndex { let Table = Gfx9BufferFormat; let Key = ["Format"]; } -def getGfx10PlusBufferFormatInfo : SearchIndex { - let Table = Gfx10PlusBufferFormat; +def getGfx10BufferFormatInfo : SearchIndex { + let Table = Gfx10BufferFormat; + let Key = ["Format"]; +} +def getGfx11PlusBufferFormatInfo : SearchIndex { + let Table = Gfx11PlusBufferFormat; let Key = ["Format"]; } @@ -119,57 +128,87 @@ def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_SINT*/ 0x5E, 32, 4, /*NUM_FORMA def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_FLOAT*/ 0x7E, 32, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32_32*/ 14>; // Buffer formats with equal component sizes (GFX10 and later) -def : Gfx10PlusBufferFormat< /*FORMAT_8_UNORM*/ 0x01, 8, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8*/ 1>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_SNORM*/ 0x02, 8, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8*/ 1>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_USCALED*/ 0x03, 8, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8*/ 1>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_SSCALED*/ 0x04, 8, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8*/ 1>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_UINT*/ 0x05, 8, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8*/ 1>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_SINT*/ 0x06, 8, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8*/ 1>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_UNORM*/ 0x07, 16, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16*/ 2>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_SNORM*/ 0x08, 16, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16*/ 2>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_USCALED*/ 0x09, 16, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16*/ 2>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_SSCALED*/ 0x0A, 16, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16*/ 2>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_UINT*/ 0x0B, 16, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16*/ 2>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_SINT*/ 0x0C, 16, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16*/ 2>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_FLOAT*/ 0x0D, 16, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16*/ 2>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_8_UNORM*/ 0x0E, 8, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8*/ 3>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SNORM*/ 0x0F, 8, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8*/ 3>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_8_USCALED*/ 0x10, 8, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8*/ 3>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SSCALED*/ 0x11, 8, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8*/ 3>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_8_UINT*/ 0x12, 8, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8*/ 3>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SINT*/ 0x13, 8, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8*/ 3>; -def : Gfx10PlusBufferFormat< /*FORMAT_32_UINT*/ 0x14, 32, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32*/ 4>; -def : Gfx10PlusBufferFormat< /*FORMAT_32_SINT*/ 0x15, 32, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32*/ 4>; -def : Gfx10PlusBufferFormat< /*FORMAT_32_FLOAT*/ 0x16, 32, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32*/ 4>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_16_UNORM*/ 0x17, 16, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16*/ 5>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SNORM*/ 0x18, 16, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16*/ 5>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_16_USCALED*/ 0x19, 16, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16*/ 5>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SSCALED*/ 0x1A, 16, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16*/ 5>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_16_UINT*/ 0x1B, 16, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16*/ 5>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SINT*/ 0x1C, 16, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16*/ 5>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_16_FLOAT*/ 0x1D, 16, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16*/ 5>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_UNORM*/ 0x38, 8, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8_8_8*/ 10>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SNORM*/ 0x39, 8, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8_8_8*/ 10>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_USCALED*/ 0x3A, 8, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8_8_8*/ 10>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SSCALED*/ 0x3B, 8, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8_8_8*/ 10>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_UINT*/ 0x3C, 8, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8_8_8*/ 10>; -def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SINT*/ 0x3D, 8, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8_8_8*/ 10>; -def : Gfx10PlusBufferFormat< /*FORMAT_32_32_UINT*/ 0x3E, 32, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32*/ 11>; -def : Gfx10PlusBufferFormat< /*FORMAT_32_32_SINT*/ 0x3F, 32, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32*/ 11>; -def : Gfx10PlusBufferFormat< /*FORMAT_32_32_FLOAT*/ 0x40, 32, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32*/ 11>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_UNORM*/ 0x41, 16, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16_16_16*/ 12>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SNORM*/ 0x42, 16, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16_16_16*/ 12>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_USCALED*/ 0x43, 16, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16_16_16*/ 12>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SSCALED*/ 0x44, 16, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16_16_16*/ 12>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_UINT*/ 0x45, 16, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16_16_16*/ 12>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SINT*/ 0x46, 16, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16_16_16*/ 12>; -def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_FLOAT*/ 0x47, 16, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16_16_16*/ 12>; -def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_UINT*/ 0x48, 32, 3, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32*/ 13>; -def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_SINT*/ 0x49, 32, 3, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32*/ 13>; -def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_FLOAT*/ 0x4A, 32, 3, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32*/ 13>; -def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_UINT*/ 0x4B, 32, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32_32*/ 14>; -def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_SINT*/ 0x4C, 32, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32_32*/ 14>; -def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_FLOAT*/ 0x4D, 32, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32_32*/ 14>; +multiclass Gfx10PlusBufferFormat f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> { + def : Gfx10BufferFormat; + def : Gfx11PlusBufferFormat; +} +defm : Gfx10PlusBufferFormat< /*FORMAT_8_UNORM*/ 0x01, 8, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8*/ 1>; +defm : Gfx10PlusBufferFormat< /*FORMAT_8_SNORM*/ 0x02, 8, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8*/ 1>; +defm : Gfx10PlusBufferFormat< /*FORMAT_8_USCALED*/ 0x03, 8, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8*/ 1>; +defm : Gfx10PlusBufferFormat< /*FORMAT_8_SSCALED*/ 0x04, 8, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8*/ 1>; +defm : Gfx10PlusBufferFormat< /*FORMAT_8_UINT*/ 0x05, 8, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8*/ 1>; +defm : Gfx10PlusBufferFormat< /*FORMAT_8_SINT*/ 0x06, 8, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8*/ 1>; +defm : Gfx10PlusBufferFormat< /*FORMAT_16_UNORM*/ 0x07, 16, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16*/ 2>; +defm : Gfx10PlusBufferFormat< /*FORMAT_16_SNORM*/ 0x08, 16, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16*/ 2>; +defm : Gfx10PlusBufferFormat< /*FORMAT_16_USCALED*/ 0x09, 16, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16*/ 2>; +defm : Gfx10PlusBufferFormat< /*FORMAT_16_SSCALED*/ 0x0A, 16, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16*/ 2>; +defm : Gfx10PlusBufferFormat< /*FORMAT_16_UINT*/ 0x0B, 16, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16*/ 2>; +defm : Gfx10PlusBufferFormat< /*FORMAT_16_SINT*/ 0x0C, 16, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16*/ 2>; +defm : Gfx10PlusBufferFormat< /*FORMAT_16_FLOAT*/ 0x0D, 16, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16*/ 2>; +defm : Gfx10PlusBufferFormat< /*FORMAT_8_8_UNORM*/ 0x0E, 8, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8*/ 3>; +defm : Gfx10PlusBufferFormat< /*FORMAT_8_8_SNORM*/ 0x0F, 8, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8*/ 3>; +defm : Gfx10PlusBufferFormat< /*FORMAT_8_8_USCALED*/ 0x10, 8, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8*/ 3>; +defm : Gfx10PlusBufferFormat< /*FORMAT_8_8_SSCALED*/ 0x11, 8, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8*/ 3>; +defm : Gfx10PlusBufferFormat< /*FORMAT_8_8_UINT*/ 0x12, 8, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8*/ 3>; +defm : Gfx10PlusBufferFormat< /*FORMAT_8_8_SINT*/ 0x13, 8, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8*/ 3>; +defm : Gfx10PlusBufferFormat< /*FORMAT_32_UINT*/ 0x14, 32, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32*/ 4>; +defm : Gfx10PlusBufferFormat< /*FORMAT_32_SINT*/ 0x15, 32, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32*/ 4>; +defm : Gfx10PlusBufferFormat< /*FORMAT_32_FLOAT*/ 0x16, 32, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32*/ 4>; +defm : Gfx10PlusBufferFormat< /*FORMAT_16_16_UNORM*/ 0x17, 16, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16*/ 5>; +defm : Gfx10PlusBufferFormat< /*FORMAT_16_16_SNORM*/ 0x18, 16, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16*/ 5>; +defm : Gfx10PlusBufferFormat< /*FORMAT_16_16_USCALED*/ 0x19, 16, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16*/ 5>; +defm : Gfx10PlusBufferFormat< /*FORMAT_16_16_SSCALED*/ 0x1A, 16, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16*/ 5>; +defm : Gfx10PlusBufferFormat< /*FORMAT_16_16_UINT*/ 0x1B, 16, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16*/ 5>; +defm : Gfx10PlusBufferFormat< /*FORMAT_16_16_SINT*/ 0x1C, 16, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16*/ 5>; +defm : Gfx10PlusBufferFormat< /*FORMAT_16_16_FLOAT*/ 0x1D, 16, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16*/ 5>; + +// Buffer formats with equal component sizes (GFX10 only) +def : Gfx10BufferFormat< /*FORMAT_8_8_8_8_UNORM*/ 0x38, 8, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx10BufferFormat< /*FORMAT_8_8_8_8_SNORM*/ 0x39, 8, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx10BufferFormat< /*FORMAT_8_8_8_8_USCALED*/ 0x3A, 8, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx10BufferFormat< /*FORMAT_8_8_8_8_SSCALED*/ 0x3B, 8, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx10BufferFormat< /*FORMAT_8_8_8_8_UINT*/ 0x3C, 8, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx10BufferFormat< /*FORMAT_8_8_8_8_SINT*/ 0x3D, 8, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx10BufferFormat< /*FORMAT_32_32_UINT*/ 0x3E, 32, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32*/ 11>; +def : Gfx10BufferFormat< /*FORMAT_32_32_SINT*/ 0x3F, 32, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32*/ 11>; +def : Gfx10BufferFormat< /*FORMAT_32_32_FLOAT*/ 0x40, 32, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32*/ 11>; +def : Gfx10BufferFormat< /*FORMAT_16_16_16_16_UNORM*/ 0x41, 16, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx10BufferFormat< /*FORMAT_16_16_16_16_SNORM*/ 0x42, 16, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx10BufferFormat< /*FORMAT_16_16_16_16_USCALED*/ 0x43, 16, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx10BufferFormat< /*FORMAT_16_16_16_16_SSCALED*/ 0x44, 16, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx10BufferFormat< /*FORMAT_16_16_16_16_UINT*/ 0x45, 16, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx10BufferFormat< /*FORMAT_16_16_16_16_SINT*/ 0x46, 16, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx10BufferFormat< /*FORMAT_16_16_16_16_FLOAT*/ 0x47, 16, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx10BufferFormat< /*FORMAT_32_32_32_UINT*/ 0x48, 32, 3, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32*/ 13>; +def : Gfx10BufferFormat< /*FORMAT_32_32_32_SINT*/ 0x49, 32, 3, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32*/ 13>; +def : Gfx10BufferFormat< /*FORMAT_32_32_32_FLOAT*/ 0x4A, 32, 3, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32*/ 13>; +def : Gfx10BufferFormat< /*FORMAT_32_32_32_32_UINT*/ 0x4B, 32, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32_32*/ 14>; +def : Gfx10BufferFormat< /*FORMAT_32_32_32_32_SINT*/ 0x4C, 32, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32_32*/ 14>; +def : Gfx10BufferFormat< /*FORMAT_32_32_32_32_FLOAT*/ 0x4D, 32, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32_32*/ 14>; + +// Buffer formats with equal component sizes (GFX11 and later) +def : Gfx11PlusBufferFormat< /*FORMAT_8_8_8_8_UNORM*/ 0x2A, 8, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx11PlusBufferFormat< /*FORMAT_8_8_8_8_SNORM*/ 0x2B, 8, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx11PlusBufferFormat< /*FORMAT_8_8_8_8_USCALED*/ 0x2C, 8, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx11PlusBufferFormat< /*FORMAT_8_8_8_8_SSCALED*/ 0x2D, 8, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx11PlusBufferFormat< /*FORMAT_8_8_8_8_UINT*/ 0x2E, 8, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx11PlusBufferFormat< /*FORMAT_8_8_8_8_SINT*/ 0x2F, 8, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx11PlusBufferFormat< /*FORMAT_32_32_UINT*/ 0x30, 32, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32*/ 11>; +def : Gfx11PlusBufferFormat< /*FORMAT_32_32_SINT*/ 0x31, 32, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32*/ 11>; +def : Gfx11PlusBufferFormat< /*FORMAT_32_32_FLOAT*/ 0x32, 32, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32*/ 11>; +def : Gfx11PlusBufferFormat< /*FORMAT_16_16_16_16_UNORM*/ 0x33, 16, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx11PlusBufferFormat< /*FORMAT_16_16_16_16_SNORM*/ 0x34, 16, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx11PlusBufferFormat< /*FORMAT_16_16_16_16_USCALED*/ 0x35, 16, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx11PlusBufferFormat< /*FORMAT_16_16_16_16_SSCALED*/ 0x36, 16, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx11PlusBufferFormat< /*FORMAT_16_16_16_16_UINT*/ 0x37, 16, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx11PlusBufferFormat< /*FORMAT_16_16_16_16_SINT*/ 0x38, 16, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx11PlusBufferFormat< /*FORMAT_16_16_16_16_FLOAT*/ 0x39, 16, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx11PlusBufferFormat< /*FORMAT_32_32_32_UINT*/ 0x3A, 32, 3, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32*/ 13>; +def : Gfx11PlusBufferFormat< /*FORMAT_32_32_32_SINT*/ 0x3B, 32, 3, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32*/ 13>; +def : Gfx11PlusBufferFormat< /*FORMAT_32_32_32_FLOAT*/ 0x3C, 32, 3, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32*/ 13>; +def : Gfx11PlusBufferFormat< /*FORMAT_32_32_32_32_UINT*/ 0x3D, 32, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32_32*/ 14>; +def : Gfx11PlusBufferFormat< /*FORMAT_32_32_32_32_SINT*/ 0x3E, 32, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32_32*/ 14>; +def : Gfx11PlusBufferFormat< /*FORMAT_32_32_32_32_FLOAT*/ 0x3F, 32, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32_32*/ 14>; class SourceOfDivergence { Intrinsic Intr = intr; @@ -191,6 +230,8 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; @@ -205,9 +246,12 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; +def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; @@ -292,6 +336,16 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; // The dummy boolean output is divergent from the IR's perspective, // but the mask results are uniform. These produce a divergent and diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp new file mode 100644 index 000000000000..34702ee6623b --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp @@ -0,0 +1,166 @@ +//===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Pass to temporarily raise the wave priority beginning the start of +/// the shader function until its last VMEM instructions to allow younger +/// waves to issue their VMEM instructions as well. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIInstrInfo.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Allocator.h" + +using namespace llvm; + +#define DEBUG_TYPE "amdgpu-set-wave-priority" + +namespace { + +struct MBBInfo { + MBBInfo() = default; + bool MayReachVMEMLoad = false; +}; + +using MBBInfoSet = DenseMap; + +class AMDGPUSetWavePriority : public MachineFunctionPass { +public: + static char ID; + + AMDGPUSetWavePriority() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { return "Set wave priority"; } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + MachineInstr *BuildSetprioMI(MachineFunction &MF, unsigned priority) const; + + const SIInstrInfo *TII; +}; + +} // End anonymous namespace. + +INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false, + false) + +char AMDGPUSetWavePriority::ID = 0; + +FunctionPass *llvm::createAMDGPUSetWavePriorityPass() { + return new AMDGPUSetWavePriority(); +} + +MachineInstr *AMDGPUSetWavePriority::BuildSetprioMI(MachineFunction &MF, + unsigned priority) const { + return BuildMI(MF, DebugLoc(), TII->get(AMDGPU::S_SETPRIO)).addImm(priority); +} + +// Checks that for every predecessor Pred that can reach a VMEM load, +// none of Pred's successors can reach a VMEM load. +static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB, + MBBInfoSet &MBBInfos) { + for (const MachineBasicBlock *Pred : MBB.predecessors()) { + if (!MBBInfos[Pred].MayReachVMEMLoad) + continue; + for (const MachineBasicBlock *Succ : Pred->successors()) { + if (MBBInfos[Succ].MayReachVMEMLoad) + return false; + } + } + return true; +} + +static bool isVMEMLoad(const MachineInstr &MI) { + return SIInstrInfo::isVMEM(MI) && MI.mayLoad(); +} + +bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) { + const unsigned HighPriority = 3; + const unsigned LowPriority = 0; + + Function &F = MF.getFunction(); + if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) + return false; + + const GCNSubtarget &ST = MF.getSubtarget(); + TII = ST.getInstrInfo(); + + MBBInfoSet MBBInfos; + SmallVector Worklist; + for (MachineBasicBlock &MBB : MF) { + if (any_of(MBB, isVMEMLoad)) + Worklist.push_back(&MBB); + } + + // Mark blocks from which control may reach VMEM loads. + while (!Worklist.empty()) { + const MachineBasicBlock *MBB = Worklist.pop_back_val(); + MBBInfo &Info = MBBInfos[MBB]; + if (!Info.MayReachVMEMLoad) { + Info.MayReachVMEMLoad = true; + Worklist.append(MBB->pred_begin(), MBB->pred_end()); + } + } + + MachineBasicBlock &Entry = MF.front(); + if (!MBBInfos[&Entry].MayReachVMEMLoad) + return false; + + // Raise the priority at the beginning of the shader. + MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end(); + while (I != E && !SIInstrInfo::isVALU(*I) && !I->isTerminator()) + ++I; + Entry.insert(I, BuildSetprioMI(MF, HighPriority)); + + // Lower the priority on edges where control leaves blocks from which + // VMEM loads are reachable. + SmallSet PriorityLoweringBlocks; + for (MachineBasicBlock &MBB : MF) { + if (MBBInfos[&MBB].MayReachVMEMLoad) { + if (MBB.succ_empty()) + PriorityLoweringBlocks.insert(&MBB); + continue; + } + + if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) { + for (MachineBasicBlock *Pred : MBB.predecessors()) { + if (MBBInfos[Pred].MayReachVMEMLoad) + PriorityLoweringBlocks.insert(Pred); + } + continue; + } + + // Where lowering the priority in predecessors is not possible, the + // block receiving control either was not part of a loop in the first + // place or the loop simplification/canonicalization pass should have + // already tried to split the edge and insert a preheader, and if for + // whatever reason it failed to do so, then this leaves us with the + // only option of lowering the priority within the loop. + PriorityLoweringBlocks.insert(&MBB); + } + + for (MachineBasicBlock *MBB : PriorityLoweringBlocks) { + MachineBasicBlock::iterator I = MBB->end(), B = MBB->begin(); + while (I != B) { + if (isVMEMLoad(*--I)) { + ++I; + break; + } + } + MBB->insert(I, BuildSetprioMI(MF, LowPriority)); + } + + return true; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index e82f9232b114..77816a783630 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -50,11 +50,6 @@ static cl::opt EnableVGPRIndexMode( cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), cl::init(false)); -static cl::opt EnableFlatScratch( - "amdgpu-enable-flat-scratch", - cl::desc("Use flat scratch instructions"), - cl::init(false)); - static cl::opt UseAA("amdgpu-use-aa-in-codegen", cl::desc("Enable the use of AA during codegen."), cl::init(true)); @@ -159,26 +154,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, return *this; } -AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : - TargetTriple(TT), - GCN3Encoding(false), - Has16BitInsts(false), - HasMadMixInsts(false), - HasMadMacF32Insts(false), - HasDsSrc2Insts(false), - HasSDWA(false), - HasVOP3PInsts(false), - HasMulI24(true), - HasMulU24(true), - HasSMulHi(false), - HasInv2PiInlineImm(false), - HasFminFmaxLegacy(true), - EnablePromoteAlloca(false), - HasTrigReducedRange(false), - MaxWavesPerEU(10), - LocalMemorySize(0), - WavefrontSizeLog2(0) - { } +AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : TargetTriple(TT) {} GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM) @@ -187,120 +163,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, AMDGPUSubtarget(TT), TargetTriple(TT), TargetID(*this), - Gen(INVALID), InstrItins(getInstrItineraryForCPU(GPU)), - LDSBankCount(0), - MaxPrivateElementSize(0), - - FastFMAF32(false), - FastDenormalF32(false), - HalfRate64Ops(false), - FullRate64Ops(false), - - FlatForGlobal(false), - AutoWaitcntBeforeBarrier(false), - UnalignedScratchAccess(false), - UnalignedAccessMode(false), - - HasApertureRegs(false), - SupportsXNACK(false), - EnableXNACK(false), - EnableTgSplit(false), - EnableCuMode(false), - TrapHandler(false), - - EnableLoadStoreOpt(false), - EnableUnsafeDSOffsetFolding(false), - EnableSIScheduler(false), - EnableDS128(false), - EnablePRTStrictNull(false), - DumpCode(false), - - FP64(false), - CIInsts(false), - GFX8Insts(false), - GFX9Insts(false), - GFX90AInsts(false), - GFX10Insts(false), - GFX10_3Insts(false), - GFX7GFX8GFX9Insts(false), - SGPRInitBug(false), - NegativeScratchOffsetBug(false), - NegativeUnalignedScratchOffsetBug(false), - HasSMemRealTime(false), - HasIntClamp(false), - HasFmaMixInsts(false), - HasMovrel(false), - HasVGPRIndexMode(false), - HasScalarStores(false), - HasScalarAtomics(false), - HasSDWAOmod(false), - HasSDWAScalar(false), - HasSDWASdst(false), - HasSDWAMac(false), - HasSDWAOutModsVOPC(false), - HasDPP(false), - HasDPP8(false), - Has64BitDPP(false), - HasPackedFP32Ops(false), - HasExtendedImageInsts(false), - HasR128A16(false), - HasGFX10A16(false), - HasG16(false), - HasNSAEncoding(false), - NSAMaxSize(0), - GFX10_AEncoding(false), - GFX10_BEncoding(false), - HasDLInsts(false), - HasDot1Insts(false), - HasDot2Insts(false), - HasDot3Insts(false), - HasDot4Insts(false), - HasDot5Insts(false), - HasDot6Insts(false), - HasDot7Insts(false), - HasMAIInsts(false), - HasPkFmacF16Inst(false), - HasAtomicFaddInsts(false), - SupportsSRAMECC(false), - EnableSRAMECC(false), - HasNoSdstCMPX(false), - HasVscnt(false), - HasGetWaveIdInst(false), - HasSMemTimeInst(false), - HasShaderCyclesRegister(false), - HasVOP3Literal(false), - HasNoDataDepHazard(false), - FlatAddressSpace(false), - FlatInstOffsets(false), - FlatGlobalInsts(false), - FlatScratchInsts(false), - ScalarFlatScratchInsts(false), - HasArchitectedFlatScratch(false), - AddNoCarryInsts(false), - HasUnpackedD16VMem(false), - LDSMisalignedBug(false), - HasMFMAInlineLiteralBug(false), - UnalignedBufferAccess(false), - UnalignedDSAccess(false), - HasPackedTID(false), - - ScalarizeGlobal(false), - - HasVcmpxPermlaneHazard(false), - HasVMEMtoScalarWriteHazard(false), - HasSMEMtoVectorWriteHazard(false), - HasInstFwdPrefetchBug(false), - HasVcmpxExecWARHazard(false), - HasLdsBranchVmemWARHazard(false), - HasNSAtoVMEMBug(false), - HasNSAClauseBug(false), - HasOffset3fBug(false), - HasFlatSegmentOffsetBug(false), - HasImageStoreD16Bug(false), - HasImageGather4D16Bug(false), - - FeatureDisable(false), InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)), TLInfo(TM, *this), FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) { @@ -314,11 +177,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, *this, *static_cast(RegBankInfo.get()), TM)); } -bool GCNSubtarget::enableFlatScratch() const { - return flatScratchIsArchitected() || - (EnableFlatScratch && hasFlatScratchInsts()); -} - unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const { if (getGeneration() < GFX10) return 1; @@ -326,12 +184,15 @@ unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const { switch (Opcode) { case AMDGPU::V_LSHLREV_B64_e64: case AMDGPU::V_LSHLREV_B64_gfx10: + case AMDGPU::V_LSHLREV_B64_e64_gfx11: case AMDGPU::V_LSHL_B64_e64: case AMDGPU::V_LSHRREV_B64_e64: case AMDGPU::V_LSHRREV_B64_gfx10: + case AMDGPU::V_LSHRREV_B64_e64_gfx11: case AMDGPU::V_LSHR_B64_e64: case AMDGPU::V_ASHRREV_I64_e64: case AMDGPU::V_ASHRREV_I64_gfx10: + case AMDGPU::V_ASHRREV_I64_e64_gfx11: case AMDGPU::V_ASHR_I64_e64: return 1; } @@ -658,7 +519,8 @@ unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const { return 16; // Assume all implicit inputs are used by default - return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 56); + unsigned NBytes = (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) ? 256 : 56; + return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", NBytes); } uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F, @@ -673,13 +535,11 @@ uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F, for (const Argument &Arg : F.args()) { const bool IsByRef = Arg.hasByRefAttr(); Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType(); - MaybeAlign Alignment = IsByRef ? Arg.getParamAlign() : None; - if (!Alignment) - Alignment = DL.getABITypeAlign(ArgTy); - + Align Alignment = DL.getValueOrABITypeAlignment( + IsByRef ? Arg.getParamAlign() : None, ArgTy); uint64_t AllocSize = DL.getTypeAllocSize(ArgTy); ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize; - MaxAlign = max(MaxAlign, Alignment); + MaxAlign = std::max(MaxAlign, Alignment); } return ExplicitArgBytes; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 7f1b94be4ffe..7400c81effd0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -38,30 +38,32 @@ public: SEA_ISLANDS = 6, VOLCANIC_ISLANDS = 7, GFX9 = 8, - GFX10 = 9 + GFX10 = 9, + GFX11 = 10 }; private: Triple TargetTriple; protected: - bool GCN3Encoding; - bool Has16BitInsts; - bool HasMadMixInsts; - bool HasMadMacF32Insts; - bool HasDsSrc2Insts; - bool HasSDWA; - bool HasVOP3PInsts; - bool HasMulI24; - bool HasMulU24; - bool HasSMulHi; - bool HasInv2PiInlineImm; - bool HasFminFmaxLegacy; - bool EnablePromoteAlloca; - bool HasTrigReducedRange; - unsigned MaxWavesPerEU; - unsigned LocalMemorySize; - char WavefrontSizeLog2; + bool GCN3Encoding = false; + bool Has16BitInsts = false; + bool HasTrue16BitInsts = false; + bool HasMadMixInsts = false; + bool HasMadMacF32Insts = false; + bool HasDsSrc2Insts = false; + bool HasSDWA = false; + bool HasVOP3PInsts = false; + bool HasMulI24 = true; + bool HasMulU24 = true; + bool HasSMulHi = false; + bool HasInv2PiInlineImm = false; + bool HasFminFmaxLegacy = true; + bool EnablePromoteAlloca = false; + bool HasTrigReducedRange = false; + unsigned MaxWavesPerEU = 10; + unsigned LocalMemorySize = 0; + char WavefrontSizeLog2 = 0; public: AMDGPUSubtarget(const Triple &TT); @@ -145,6 +147,8 @@ public: return Has16BitInsts; } + bool hasTrue16BitInsts() const { return HasTrue16BitInsts; } + bool hasMadMixInsts() const { return HasMadMixInsts; } @@ -267,7 +271,7 @@ public: /// \p WavefrontSize. AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const; - virtual ~AMDGPUSubtarget() {} + virtual ~AMDGPUSubtarget() = default; }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index a2c61f9da8da..1c6b9d35695a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -16,6 +16,7 @@ #include "AMDGPU.h" #include "AMDGPUAliasAnalysis.h" #include "AMDGPUExportClustering.h" +#include "AMDGPUIGroupLP.h" #include "AMDGPUMacroFusion.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPUTargetTransformInfo.h" @@ -27,6 +28,7 @@ #include "SIMachineScheduler.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" @@ -56,6 +58,7 @@ #include "llvm/Transforms/Vectorize.h" using namespace llvm; +using namespace llvm::PatternMatch; namespace { class SGPRRegisterRegAlloc : public RegisterRegAllocBase { @@ -269,12 +272,22 @@ static cl::opt EnableSIModeRegisterPass( cl::init(true), cl::Hidden); +// Enable GFX11+ s_delay_alu insertion +static cl::opt + EnableInsertDelayAlu("amdgpu-enable-delay-alu", + cl::desc("Enable s_delay_alu insertion"), + cl::init(true), cl::Hidden); + // Option is used in lit tests to prevent deadcoding of patterns inspected. static cl::opt EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc")); +static cl::opt EnableSetWavePriority("amdgpu-set-wave-priority", + cl::desc("Adjust wave priority"), + cl::init(false), cl::Hidden); + static cl::opt EnableScalarIRPasses( "amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), @@ -330,7 +343,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIOptimizeExecMaskingPreRAPass(*PR); initializeSIOptimizeVGPRLiveRangePass(*PR); initializeSILoadStoreOptimizerPass(*PR); - initializeAMDGPUFixFunctionBitcastsPass(*PR); initializeAMDGPUCtorDtorLoweringPass(*PR); initializeAMDGPUAlwaysInlinePass(*PR); initializeAMDGPUAttributorPass(*PR); @@ -357,6 +369,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPURewriteOutArgumentsPass(*PR); initializeAMDGPUUnifyMetadataPass(*PR); initializeSIAnnotateControlFlowPass(*PR); + initializeAMDGPUReleaseVGPRsPass(*PR); + initializeAMDGPUInsertDelayAluPass(*PR); initializeSIInsertHardClausesPass(*PR); initializeSIInsertWaitcntsPass(*PR); initializeSIModeRegisterPass(*PR); @@ -390,9 +404,14 @@ static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { + const GCNSubtarget &ST = C->MF->getSubtarget(); ScheduleDAGMILive *DAG = new GCNScheduleDAGMILive(C, std::make_unique(C)); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + if (ST.shouldClusterStores()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(createIGroupLPDAGMutation()); + DAG->addMutation(createSchedBarrierDAGMutation()); DAG->addMutation(createAMDGPUMacroFusionDAGMutation()); DAG->addMutation(createAMDGPUExportClusteringDAGMutation()); return DAG; @@ -400,9 +419,12 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { + const GCNSubtarget &ST = C->MF->getSubtarget(); auto DAG = new GCNIterativeScheduler(C, GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + if (ST.shouldClusterStores()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); return DAG; } @@ -413,9 +435,12 @@ static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) { static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C) { + const GCNSubtarget &ST = C->MF->getSubtarget(); auto DAG = new GCNIterativeScheduler(C, GCNIterativeScheduler::SCHEDULE_ILP); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + if (ST.shouldClusterStores()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createAMDGPUMacroFusionDAGMutation()); return DAG; } @@ -801,6 +826,23 @@ AMDGPUTargetMachine::getPredicatedAddrSpace(const Value *V) const { return std::make_pair(nullptr, -1); } +unsigned +AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const { + switch (Kind) { + case PseudoSourceValue::Stack: + case PseudoSourceValue::FixedStack: + return AMDGPUAS::PRIVATE_ADDRESS; + case PseudoSourceValue::ConstantPool: + case PseudoSourceValue::GOT: + case PseudoSourceValue::JumpTable: + case PseudoSourceValue::GlobalValueCallEntry: + case PseudoSourceValue::ExternalSymbolCallEntry: + case PseudoSourceValue::TargetCustom: + return AMDGPUAS::CONSTANT_ADDRESS; + } + return AMDGPUAS::FLAT_ADDRESS; +} + //===----------------------------------------------------------------------===// // GCN Target Machine (SI+) //===----------------------------------------------------------------------===// @@ -836,7 +878,7 @@ GCNTargetMachine::getSubtargetImpl(const Function &F) const { } TargetTransformInfo -GCNTargetMachine::getTargetTransformInfo(const Function &F) { +GCNTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(GCNTTIImpl(this, F)); } @@ -873,7 +915,11 @@ public: ScheduleDAGMI *DAG = createGenericSchedPostRA(C); const GCNSubtarget &ST = C->MF->getSubtarget(); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + if (ST.shouldClusterStores()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII)); + DAG->addMutation(createIGroupLPDAGMutation()); + DAG->addMutation(createSchedBarrierDAGMutation()); return DAG; } @@ -953,10 +999,6 @@ void AMDGPUPassConfig::addIRPasses() { addPass(createAMDGPUPrintfRuntimeBinding()); addPass(createAMDGPUCtorDtorLoweringPass()); - // This must occur before inlining, as the inliner will not look through - // bitcast calls. - addPass(createAMDGPUFixFunctionBitcastsPass()); - // A call to propagate attributes pass in the backend in case opt was not run. addPass(createAMDGPUPropagateAttributesEarlyPass(&TM)); @@ -967,7 +1009,7 @@ void AMDGPUPassConfig::addIRPasses() { addPass(createAlwaysInlinerLegacyPass()); // We need to add the barrier noop pass, otherwise adding the function // inlining pass will cause all of the PassConfigs passes to be run - // one function at a time, which means if we have a nodule with two + // one function at a time, which means if we have a module with two // functions, then we will generate code for the first function // without ever running any passes on the second. addPass(createBarrierNoopPass()); @@ -1079,8 +1121,11 @@ bool AMDGPUPassConfig::addGCPasses() { llvm::ScheduleDAGInstrs * AMDGPUPassConfig::createMachineScheduler(MachineSchedContext *C) const { + const GCNSubtarget &ST = C->MF->getSubtarget(); ScheduleDAGMILive *DAG = createGenericSchedLive(C); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + if (ST.shouldClusterStores()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); return DAG; } @@ -1363,6 +1408,8 @@ void GCNPassConfig::addPreEmitPass() { addPass(&SIInsertHardClausesID); addPass(&SILateBranchLoweringPassID); + if (isPassEnabled(EnableSetWavePriority, CodeGenOpt::Less)) + addPass(createAMDGPUSetWavePriorityPass()); if (getOptLevel() > CodeGenOpt::None) addPass(&SIPreEmitPeepholeID); // The hazard recognizer that runs as part of the post-ra scheduler does not @@ -1374,6 +1421,13 @@ void GCNPassConfig::addPreEmitPass() { // Here we add a stand-alone hazard recognizer pass which can handle all // cases. addPass(&PostRAHazardRecognizerID); + + if (getOptLevel() > CodeGenOpt::Less) + addPass(&AMDGPUReleaseVGPRsID); + + if (isPassEnabled(EnableInsertDelayAlu, CodeGenOpt::Less)) + addPass(&AMDGPUInsertDelayAluID); + addPass(&BranchRelaxationPassID); } @@ -1396,7 +1450,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo( const yaml::MachineFunctionInfo &MFI_, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const { const yaml::SIMachineFunctionInfo &YamlMFI = - reinterpret_cast(MFI_); + static_cast(MFI_); MachineFunction &MF = PFS.MF; SIMachineFunctionInfo *MFI = MF.getInfo(); @@ -1420,6 +1474,14 @@ bool GCNTargetMachine::parseMachineFunctionInfo( return false; }; + auto parseOptionalRegister = [&](const yaml::StringValue &RegName, + Register &RegVal) { + return !RegName.Value.empty() && parseRegister(RegName, RegVal); + }; + + if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy)) + return true; + auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) { // Create a diagnostic for a the register string literal. const MemoryBuffer &Buffer = @@ -1452,6 +1514,14 @@ bool GCNTargetMachine::parseMachineFunctionInfo( return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg); } + for (const auto &YamlReg : YamlMFI.WWMReservedRegs) { + Register ParsedReg; + if (parseRegister(YamlReg, ParsedReg)) + return true; + + MFI->reserveWWMRegister(ParsedReg); + } + auto parseAndCheckArgument = [&](const Optional &A, const TargetRegisterClass &RC, ArgDescriptor &Arg, unsigned UserSGPRs, @@ -1473,7 +1543,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo( Arg = ArgDescriptor::createStack(A->StackOffset); // Check and apply the optional mask. if (A->Mask) - Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue()); + Arg = ArgDescriptor::createArg(Arg, *A->Mask); MFI->NumUserSGPRs += UserSGPRs; MFI->NumSystemSGPRs += SystemSGPRs; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index dd3676f3b707..567cc9d610d2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // /// \file -/// The AMDGPU TargetMachine interface definition for hw codgen targets. +/// The AMDGPU TargetMachine interface definition for hw codegen targets. // //===----------------------------------------------------------------------===// @@ -64,6 +64,8 @@ public: std::pair getPredicatedAddrSpace(const Value *V) const override; + + unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const override; }; //===----------------------------------------------------------------------===// @@ -84,7 +86,7 @@ public: const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override; - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; bool useIPRA() const override { return true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index a8df7789c8a1..a79cd2e9499e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -288,33 +288,21 @@ GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(static_cast(TM->getSubtargetImpl(F))), TLI(ST->getTargetLowering()), CommonTTI(TM, F), - IsGraphics(AMDGPU::isGraphics(F.getCallingConv())), - MaxVGPRs(ST->getMaxNumVGPRs( - std::max(ST->getWavesPerEU(F).first, - ST->getWavesPerEUForWorkGroup( - ST->getFlatWorkGroupSizes(F).second)))) { + IsGraphics(AMDGPU::isGraphics(F.getCallingConv())) { AMDGPU::SIModeRegisterDefaults Mode(F); HasFP32Denormals = Mode.allFP32Denormals(); HasFP64FP16Denormals = Mode.allFP64FP16Denormals(); } -unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const { - // The concept of vector registers doesn't really exist. Some packed vector - // operations operate on the normal 32-bit registers. - return MaxVGPRs; -} +unsigned GCNTTIImpl::getNumberOfRegisters(unsigned RCID) const { + // NB: RCID is not an RCID. In fact it is 0 or 1 for scalar or vector + // registers. See getRegisterClassForType for the implementation. + // In this case vector registers are not vector in terms of + // VGPRs, but those which can hold multiple values. -unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const { // This is really the number of registers to fill when vectorizing / // interleaving loops, so we lie to avoid trying to use all registers. - return getHardwareNumberOfRegisters(Vec) >> 3; -} - -unsigned GCNTTIImpl::getNumberOfRegisters(unsigned RCID) const { - const SIRegisterInfo *TRI = ST->getRegisterInfo(); - const TargetRegisterClass *RC = TRI->getRegClass(RCID); - unsigned NumVGPRs = (TRI->getRegSizeInBits(*RC) + 31) / 32; - return getHardwareNumberOfRegisters(false) / NumVGPRs; + return 4; } TypeSize @@ -410,11 +398,14 @@ bool GCNTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, // unaligned access is legal? // // FIXME: This could use fine tuning and microbenchmarks. -Type *GCNTTIImpl::getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, - unsigned SrcAddrSpace, - unsigned DestAddrSpace, - unsigned SrcAlign, - unsigned DestAlign) const { +Type *GCNTTIImpl::getMemcpyLoopLoweringType( + LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, + unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, + Optional AtomicElementSize) const { + + if (AtomicElementSize) + return Type::getIntNTy(Context, *AtomicElementSize * 8); + unsigned MinAlign = std::min(SrcAlign, DestAlign); // A (multi-)dword access at an address == 2 (mod 4) will be decomposed by the @@ -439,11 +430,17 @@ Type *GCNTTIImpl::getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, } void GCNTTIImpl::getMemcpyLoopResidualLoweringType( - SmallVectorImpl &OpsOut, LLVMContext &Context, - unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, - unsigned SrcAlign, unsigned DestAlign) const { + SmallVectorImpl &OpsOut, LLVMContext &Context, + unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, + unsigned SrcAlign, unsigned DestAlign, + Optional AtomicCpySize) const { assert(RemainingBytes < 16); + if (AtomicCpySize) + BaseT::getMemcpyLoopResidualLoweringType( + OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign, + DestAlign, AtomicCpySize); + unsigned MinAlign = std::min(SrcAlign, DestAlign); if (MinAlign != 2) { @@ -1042,7 +1039,8 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *VT, ArrayRef Mask, - int Index, VectorType *SubTp) { + int Index, VectorType *SubTp, + ArrayRef Args) { Kind = improveShuffleKindFromMask(Kind, Mask); if (ST->hasVOP3PInsts()) { if (cast(VT)->getNumElements() == 2 && diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index e901b5c5747d..f2260c31e678 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -68,7 +68,6 @@ class GCNTTIImpl final : public BasicTTIImplBase { bool IsGraphics; bool HasFP32Denormals; bool HasFP64FP16Denormals; - unsigned MaxVGPRs; static const FeatureBitset InlineFeatureIgnoreList; @@ -113,8 +112,6 @@ public: return TTI::PSK_FastHardware; } - unsigned getHardwareNumberOfRegisters(bool Vector) const; - unsigned getNumberOfRegisters(bool Vector) const; unsigned getNumberOfRegisters(unsigned RCID) const; TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const; unsigned getMinVectorRegisterBitWidth() const; @@ -135,15 +132,14 @@ public: unsigned AddrSpace) const; Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, - unsigned SrcAlign, unsigned DestAlign) const; - - void getMemcpyLoopResidualLoweringType(SmallVectorImpl &OpsOut, - LLVMContext &Context, - unsigned RemainingBytes, - unsigned SrcAddrSpace, - unsigned DestAddrSpace, - unsigned SrcAlign, - unsigned DestAlign) const; + unsigned SrcAlign, unsigned DestAlign, + Optional AtomicElementSize) const; + + void getMemcpyLoopResidualLoweringType( + SmallVectorImpl &OpsOut, LLVMContext &Context, + unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, + unsigned SrcAlign, unsigned DestAlign, + Optional AtomicCpySize) const; unsigned getMaxInterleaveFactor(unsigned VF); bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; @@ -201,7 +197,8 @@ public: InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, - VectorType *SubTp); + VectorType *SubTp, + ArrayRef Args = None); bool areInlineCompatible(const Function *Caller, const Function *Callee) const; diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp deleted file mode 100644 index 1736c078eb83..000000000000 --- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ /dev/null @@ -1,1638 +0,0 @@ -//===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//==-----------------------------------------------------------------------===// - -#include "MCTargetDesc/R600MCTargetDesc.h" -#include "R600.h" -#include "R600RegisterInfo.h" -#include "R600Subtarget.h" -#include "llvm/ADT/SCCIterator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachinePostDominators.h" -#include "llvm/InitializePasses.h" - -using namespace llvm; - -#define DEBUG_TYPE "structcfg" - -#define DEFAULT_VEC_SLOTS 8 - -// TODO: move-begin. - -//===----------------------------------------------------------------------===// -// -// Statistics for CFGStructurizer. -// -//===----------------------------------------------------------------------===// - -STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " - "matched"); -STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " - "matched"); -STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); -STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); - -namespace llvm { - -void initializeAMDGPUCFGStructurizerPass(PassRegistry &); - -} // end namespace llvm - -namespace { - -//===----------------------------------------------------------------------===// -// -// Miscellaneous utility for CFGStructurizer. -// -//===----------------------------------------------------------------------===// - -#define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n"); - -#define SHOWNEWBLK(b, msg) \ - LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ - dbgs() << "\n";); - -#define SHOWBLK_DETAIL(b, msg) \ - LLVM_DEBUG(if (b) { \ - dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ - b->print(dbgs()); \ - dbgs() << "\n"; \ - }); - -#define INVALIDSCCNUM -1 - -//===----------------------------------------------------------------------===// -// -// supporting data structure for CFGStructurizer -// -//===----------------------------------------------------------------------===// - -class BlockInformation { -public: - bool IsRetired = false; - int SccNum = INVALIDSCCNUM; - - BlockInformation() = default; -}; - -//===----------------------------------------------------------------------===// -// -// CFGStructurizer -// -//===----------------------------------------------------------------------===// - -class AMDGPUCFGStructurizer : public MachineFunctionPass { -public: - using MBBVector = SmallVector; - using MBBInfoMap = std::map; - using LoopLandInfoMap = std::map; - - enum PathToKind { - Not_SinglePath = 0, - SinglePath_InPath = 1, - SinglePath_NotInPath = 2 - }; - - static char ID; - - AMDGPUCFGStructurizer() : MachineFunctionPass(ID) { - initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { - return "AMDGPU Control Flow Graph structurizer Pass"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - /// Perform the CFG structurization - bool run(); - - /// Perform the CFG preparation - /// This step will remove every unconditionnal/dead jump instructions and make - /// sure all loops have an exit block - bool prepare(); - - bool runOnMachineFunction(MachineFunction &MF) override { - // FIXME: This pass causes verification failures. - MF.getProperties().set( - MachineFunctionProperties::Property::FailsVerification); - - TII = MF.getSubtarget().getInstrInfo(); - TRI = &TII->getRegisterInfo(); - LLVM_DEBUG(MF.dump();); - OrderedBlks.clear(); - Visited.clear(); - FuncRep = &MF; - MLI = &getAnalysis(); - LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); - MDT = &getAnalysis(); - LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr);); - PDT = &getAnalysis(); - LLVM_DEBUG(PDT->print(dbgs());); - prepare(); - run(); - LLVM_DEBUG(MF.dump();); - return true; - } - -protected: - MachineDominatorTree *MDT; - MachinePostDominatorTree *PDT; - MachineLoopInfo *MLI; - const R600InstrInfo *TII = nullptr; - const R600RegisterInfo *TRI = nullptr; - - // PRINT FUNCTIONS - /// Print the ordered Blocks. - void printOrderedBlocks() const { - size_t i = 0; - for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), - iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { - dbgs() << "BB" << (*iterBlk)->getNumber(); - dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; - if (i != 0 && i % 10 == 0) { - dbgs() << "\n"; - } else { - dbgs() << " "; - } - } - } - - static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { - for (const MachineLoop *L : LoopInfo) - L->print(dbgs()); - } - - // UTILITY FUNCTIONS - int getSCCNum(MachineBasicBlock *MBB) const; - MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; - bool hasBackEdge(MachineBasicBlock *MBB) const; - bool isRetiredBlock(MachineBasicBlock *MBB) const; - bool isActiveLoophead(MachineBasicBlock *MBB) const; - PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, - bool AllowSideEntry = true) const; - int countActiveBlock(MBBVector::const_iterator It, - MBBVector::const_iterator E) const; - bool needMigrateBlock(MachineBasicBlock *MBB) const; - - // Utility Functions - void reversePredicateSetter(MachineBasicBlock::iterator I, - MachineBasicBlock &MBB); - /// Compute the reversed DFS post order of Blocks - void orderBlocks(MachineFunction *MF); - - // Function originally from CFGStructTraits - void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, - const DebugLoc &DL = DebugLoc()); - MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, - const DebugLoc &DL = DebugLoc()); - MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); - void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, - const DebugLoc &DL); - void insertCondBranchBefore(MachineBasicBlock *MBB, - MachineBasicBlock::iterator I, int NewOpcode, - int RegNum, const DebugLoc &DL); - - static int getBranchNzeroOpcode(int OldOpcode); - static int getBranchZeroOpcode(int OldOpcode); - static int getContinueNzeroOpcode(int OldOpcode); - static int getContinueZeroOpcode(int OldOpcode); - static MachineBasicBlock *getTrueBranch(MachineInstr *MI); - static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); - static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, - MachineInstr *MI); - static bool isCondBranch(MachineInstr *MI); - static bool isUncondBranch(MachineInstr *MI); - static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); - static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); - - /// The correct naming for this is getPossibleLoopendBlockBranchInstr. - /// - /// BB with backward-edge could have move instructions after the branch - /// instruction. Such move instruction "belong to" the loop backward-edge. - MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); - - static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); - static bool isReturnBlock(MachineBasicBlock *MBB); - static void cloneSuccessorList(MachineBasicBlock *DstMBB, - MachineBasicBlock *SrcMBB); - static MachineBasicBlock *clone(MachineBasicBlock *MBB); - - /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose - /// because the AMDGPU instruction is not recognized as terminator fix this - /// and retire this routine - void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, - MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); - - static void wrapup(MachineBasicBlock *MBB); - - int patternMatch(MachineBasicBlock *MBB); - int patternMatchGroup(MachineBasicBlock *MBB); - int serialPatternMatch(MachineBasicBlock *MBB); - int ifPatternMatch(MachineBasicBlock *MBB); - int loopendPatternMatch(); - int mergeLoop(MachineLoop *LoopRep); - - /// return true iff src1Blk->succ_empty() && src1Blk and src2Blk are in - /// the same loop with LoopLandInfo without explicitly keeping track of - /// loopContBlks and loopBreakBlks, this is a method to get the information. - bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, - MachineBasicBlock *Src2MBB); - int handleJumpintoIf(MachineBasicBlock *HeadMBB, - MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); - int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, - MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); - int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, - MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, - MachineBasicBlock **LandMBBPtr); - void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, - MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, - MachineBasicBlock *LandMBB, bool Detail = false); - int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, - MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); - void mergeSerialBlock(MachineBasicBlock *DstMBB, - MachineBasicBlock *SrcMBB); - - void mergeIfthenelseBlock(MachineInstr *BranchMI, - MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, - MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); - void mergeLooplandBlock(MachineBasicBlock *DstMBB, - MachineBasicBlock *LandMBB); - void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, - MachineBasicBlock *LandMBB); - void settleLoopcontBlock(MachineBasicBlock *ContingMBB, - MachineBasicBlock *ContMBB); - - /// normalizeInfiniteLoopExit change - /// B1: - /// uncond_br LoopHeader - /// - /// to - /// B1: - /// cond_br 1 LoopHeader dummyExit - /// and return the newly added dummy exit block - MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); - void removeUnconditionalBranch(MachineBasicBlock *MBB); - - /// Remove duplicate branches instructions in a block. - /// For instance - /// B0: - /// cond_br X B1 B2 - /// cond_br X B1 B2 - /// is transformed to - /// B0: - /// cond_br X B1 B2 - void removeRedundantConditionalBranch(MachineBasicBlock *MBB); - - void addDummyExitBlock(SmallVectorImpl &RetMBB); - void removeSuccessor(MachineBasicBlock *MBB); - MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, - MachineBasicBlock *PredMBB); - void migrateInstruction(MachineBasicBlock *SrcMBB, - MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); - void recordSccnum(MachineBasicBlock *MBB, int SCCNum); - void retireBlock(MachineBasicBlock *MBB); - -private: - MBBInfoMap BlockInfoMap; - LoopLandInfoMap LLInfoMap; - std::map Visited; - MachineFunction *FuncRep; - SmallVector OrderedBlks; -}; - -} // end anonymous namespace - -char AMDGPUCFGStructurizer::ID = 0; - -int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { - MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); - if (It == BlockInfoMap.end()) - return INVALIDSCCNUM; - return (*It).second->SccNum; -} - -MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) - const { - LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); - if (It == LLInfoMap.end()) - return nullptr; - return (*It).second; -} - -bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { - MachineLoop *LoopRep = MLI->getLoopFor(MBB); - if (!LoopRep) - return false; - MachineBasicBlock *LoopHeader = LoopRep->getHeader(); - return MBB->isSuccessor(LoopHeader); -} - -bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { - MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); - if (It == BlockInfoMap.end()) - return false; - return (*It).second->IsRetired; -} - -bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { - MachineLoop *LoopRep = MLI->getLoopFor(MBB); - while (LoopRep && LoopRep->getHeader() == MBB) { - MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); - if(!LoopLand) - return true; - if (!isRetiredBlock(LoopLand)) - return true; - LoopRep = LoopRep->getParentLoop(); - } - return false; -} - -AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo( - MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, - bool AllowSideEntry) const { - assert(DstMBB); - if (SrcMBB == DstMBB) - return SinglePath_InPath; - while (SrcMBB && SrcMBB->succ_size() == 1) { - SrcMBB = *SrcMBB->succ_begin(); - if (SrcMBB == DstMBB) - return SinglePath_InPath; - if (!AllowSideEntry && SrcMBB->pred_size() > 1) - return Not_SinglePath; - } - if (SrcMBB && SrcMBB->succ_size()==0) - return SinglePath_NotInPath; - return Not_SinglePath; -} - -int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, - MBBVector::const_iterator E) const { - int Count = 0; - while (It != E) { - if (!isRetiredBlock(*It)) - ++Count; - ++It; - } - return Count; -} - -bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { - unsigned BlockSizeThreshold = 30; - unsigned CloneInstrThreshold = 100; - bool MultiplePreds = MBB && (MBB->pred_size() > 1); - - if(!MultiplePreds) - return false; - unsigned BlkSize = MBB->size(); - return ((BlkSize > BlockSizeThreshold) && - (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); -} - -void AMDGPUCFGStructurizer::reversePredicateSetter( - MachineBasicBlock::iterator I, MachineBasicBlock &MBB) { - assert(I.isValid() && "Expected valid iterator"); - for (;; --I) { - if (I == MBB.end()) - continue; - if (I->getOpcode() == R600::PRED_X) { - switch (I->getOperand(2).getImm()) { - case R600::PRED_SETE_INT: - I->getOperand(2).setImm(R600::PRED_SETNE_INT); - return; - case R600::PRED_SETNE_INT: - I->getOperand(2).setImm(R600::PRED_SETE_INT); - return; - case R600::PRED_SETE: - I->getOperand(2).setImm(R600::PRED_SETNE); - return; - case R600::PRED_SETNE: - I->getOperand(2).setImm(R600::PRED_SETE); - return; - default: - llvm_unreachable("PRED_X Opcode invalid!"); - } - } - } -} - -void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, - int NewOpcode, const DebugLoc &DL) { - MachineInstr *MI = - MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); - MBB->push_back(MI); - //assume the instruction doesn't take any reg operand ... - SHOWNEWINSTR(MI); -} - -MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, - int NewOpcode, - const DebugLoc &DL) { - MachineInstr *MI = - MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); - if (!MBB->empty()) - MBB->insert(MBB->begin(), MI); - else - MBB->push_back(MI); - SHOWNEWINSTR(MI); - return MI; -} - -MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore( - MachineBasicBlock::iterator I, int NewOpcode) { - MachineInstr *OldMI = &(*I); - MachineBasicBlock *MBB = OldMI->getParent(); - MachineInstr *NewMBB = - MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); - MBB->insert(I, NewMBB); - //assume the instruction doesn't take any reg operand ... - SHOWNEWINSTR(NewMBB); - return NewMBB; -} - -void AMDGPUCFGStructurizer::insertCondBranchBefore( - MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) { - MachineInstr *OldMI = &(*I); - MachineBasicBlock *MBB = OldMI->getParent(); - MachineFunction *MF = MBB->getParent(); - MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL); - MBB->insert(I, NewMI); - MachineInstrBuilder MIB(*MF, NewMI); - MIB.addReg(OldMI->getOperand(1).getReg(), false); - SHOWNEWINSTR(NewMI); - //erase later oldInstr->eraseFromParent(); -} - -void AMDGPUCFGStructurizer::insertCondBranchBefore( - MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode, - int RegNum, const DebugLoc &DL) { - MachineFunction *MF = blk->getParent(); - MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL); - //insert before - blk->insert(I, NewInstr); - MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); - SHOWNEWINSTR(NewInstr); -} - -int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { - switch(OldOpcode) { - case R600::JUMP_COND: - case R600::JUMP: return R600::IF_PREDICATE_SET; - case R600::BRANCH_COND_i32: - case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32; - default: llvm_unreachable("internal error"); - } - return -1; -} - -int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { - switch(OldOpcode) { - case R600::JUMP_COND: - case R600::JUMP: return R600::IF_PREDICATE_SET; - case R600::BRANCH_COND_i32: - case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32; - default: llvm_unreachable("internal error"); - } - return -1; -} - -int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { - switch(OldOpcode) { - case R600::JUMP_COND: - case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32; - default: llvm_unreachable("internal error"); - } - return -1; -} - -int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { - switch(OldOpcode) { - case R600::JUMP_COND: - case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32; - default: llvm_unreachable("internal error"); - } - return -1; -} - -MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) { - return MI->getOperand(0).getMBB(); -} - -void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI, - MachineBasicBlock *MBB) { - MI->getOperand(0).setMBB(MBB); -} - -MachineBasicBlock * -AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, - MachineInstr *MI) { - assert(MBB->succ_size() == 2); - MachineBasicBlock *TrueBranch = getTrueBranch(MI); - MachineBasicBlock::succ_iterator It = MBB->succ_begin(); - MachineBasicBlock::succ_iterator Next = It; - ++Next; - return (*It == TrueBranch) ? *Next : *It; -} - -bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) { - switch (MI->getOpcode()) { - case R600::JUMP_COND: - case R600::BRANCH_COND_i32: - case R600::BRANCH_COND_f32: return true; - default: - return false; - } - return false; -} - -bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) { - switch (MI->getOpcode()) { - case R600::JUMP: - case R600::BRANCH: - return true; - default: - return false; - } - return false; -} - -DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { - //get DebugLoc from the first MachineBasicBlock instruction with debug info - DebugLoc DL; - for (MachineInstr &MI : *MBB) - if (MI.getDebugLoc()) - DL = MI.getDebugLoc(); - return DL; -} - -MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr( - MachineBasicBlock *MBB) { - MachineBasicBlock::reverse_iterator It = MBB->rbegin(); - MachineInstr *MI = &*It; - if (MI && (isCondBranch(MI) || isUncondBranch(MI))) - return MI; - return nullptr; -} - -MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr( - MachineBasicBlock *MBB) { - for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); - It != E; ++It) { - // FIXME: Simplify - MachineInstr *MI = &*It; - if (MI) { - if (isCondBranch(MI) || isUncondBranch(MI)) - return MI; - else if (!TII->isMov(MI->getOpcode())) - break; - } - } - return nullptr; -} - -MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { - MachineBasicBlock::reverse_iterator It = MBB->rbegin(); - if (It != MBB->rend()) { - MachineInstr *instr = &(*It); - if (instr->getOpcode() == R600::RETURN) - return instr; - } - return nullptr; -} - -bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { - MachineInstr *MI = getReturnInstr(MBB); - bool IsReturn = MBB->succ_empty(); - if (MI) - assert(IsReturn); - else if (IsReturn) - LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber() - << " is return block without RETURN instr\n";); - return IsReturn; -} - -void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, - MachineBasicBlock *SrcMBB) { - for (MachineBasicBlock *Succ : SrcMBB->successors()) - DstMBB->addSuccessor(Succ); // *iter's predecessor is also taken care of -} - -MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) { - MachineFunction *Func = MBB->getParent(); - MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); - Func->push_back(NewMBB); //insert to function - for (const MachineInstr &It : *MBB) - NewMBB->push_back(Func->CloneMachineInstr(&It)); - return NewMBB; -} - -void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith( - MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, - MachineBasicBlock *NewBlk) { - MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB); - if (BranchMI && isCondBranch(BranchMI) && - getTrueBranch(BranchMI) == OldMBB) - setTrueBranch(BranchMI, NewBlk); -} - -void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) { - assert((!MBB->getParent()->getJumpTableInfo() - || MBB->getParent()->getJumpTableInfo()->isEmpty()) - && "found a jump table"); - - //collect continue right before endloop - SmallVector ContInstr; - MachineBasicBlock::iterator Pre = MBB->begin(); - MachineBasicBlock::iterator E = MBB->end(); - MachineBasicBlock::iterator It = Pre; - while (It != E) { - if (Pre->getOpcode() == R600::CONTINUE - && It->getOpcode() == R600::ENDLOOP) - ContInstr.push_back(&*Pre); - Pre = It; - ++It; - } - - //delete continue right before endloop - for (unsigned i = 0; i < ContInstr.size(); ++i) - ContInstr[i]->eraseFromParent(); - - // TODO to fix up jump table so later phase won't be confused. if - // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but - // there isn't such an interface yet. alternatively, replace all the other - // blocks in the jump table with the entryBlk //} -} - -bool AMDGPUCFGStructurizer::prepare() { - bool Changed = false; - - //FIXME: if not reducible flow graph, make it so ??? - - LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";); - - orderBlocks(FuncRep); - - SmallVector RetBlks; - - // Add an ExitBlk to loop that don't have one - for (MachineLoop *LoopRep : *MLI) { - MBBVector ExitingMBBs; - LoopRep->getExitingBlocks(ExitingMBBs); - - if (ExitingMBBs.size() == 0) { - MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); - if (DummyExitBlk) - RetBlks.push_back(DummyExitBlk); - } - } - - // Remove unconditional branch instr. - // Add dummy exit block iff there are multiple returns. - for (MachineBasicBlock *MBB : OrderedBlks) { - removeUnconditionalBranch(MBB); - removeRedundantConditionalBranch(MBB); - if (isReturnBlock(MBB)) { - RetBlks.push_back(MBB); - } - assert(MBB->succ_size() <= 2); - } - - if (RetBlks.size() >= 2) { - addDummyExitBlock(RetBlks); - Changed = true; - } - - return Changed; -} - -bool AMDGPUCFGStructurizer::run() { - //Assume reducible CFG... - LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n"); - -#ifdef STRESSTEST - //Use the worse block ordering to test the algorithm. - ReverseVector(orderedBlks); -#endif - - LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); - int NumIter = 0; - bool Finish = false; - MachineBasicBlock *MBB; - bool MakeProgress = false; - int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(), - OrderedBlks.end()); - - do { - ++NumIter; - LLVM_DEBUG(dbgs() << "numIter = " << NumIter - << ", numRemaintedBlk = " << NumRemainedBlk << "\n";); - - SmallVectorImpl::const_iterator It = - OrderedBlks.begin(); - SmallVectorImpl::const_iterator E = - OrderedBlks.end(); - - SmallVectorImpl::const_iterator SccBeginIter = - It; - MachineBasicBlock *SccBeginMBB = nullptr; - int SccNumBlk = 0; // The number of active blocks, init to a - // maximum possible number. - int SccNumIter; // Number of iteration in this SCC. - - while (It != E) { - MBB = *It; - - if (!SccBeginMBB) { - SccBeginIter = It; - SccBeginMBB = MBB; - SccNumIter = 0; - SccNumBlk = NumRemainedBlk; // Init to maximum possible number. - LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); - dbgs() << "\n";); - } - - if (!isRetiredBlock(MBB)) - patternMatch(MBB); - - ++It; - - bool ContNextScc = true; - if (It == E - || getSCCNum(SccBeginMBB) != getSCCNum(*It)) { - // Just finish one scc. - ++SccNumIter; - int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); - if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { - LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB) - << ", sccNumIter = " << SccNumIter; - dbgs() << "doesn't make any progress\n";); - ContNextScc = true; - } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { - SccNumBlk = sccRemainedNumBlk; - It = SccBeginIter; - ContNextScc = false; - LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB) - << "sccNumIter = " << SccNumIter << '\n';); - } else { - // Finish the current scc. - ContNextScc = true; - } - } else { - // Continue on next component in the current scc. - ContNextScc = false; - } - - if (ContNextScc) - SccBeginMBB = nullptr; - } //while, "one iteration" over the function. - - MachineBasicBlock *EntryMBB = - *GraphTraits::nodes_begin(FuncRep); - if (EntryMBB->succ_empty()) { - Finish = true; - LLVM_DEBUG(dbgs() << "Reduce to one block\n";); - } else { - int NewnumRemainedBlk - = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); - // consider cloned blocks ?? - if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { - MakeProgress = true; - NumRemainedBlk = NewnumRemainedBlk; - } else { - MakeProgress = false; - LLVM_DEBUG(dbgs() << "No progress\n";); - } - } - } while (!Finish && MakeProgress); - - // Misc wrap up to maintain the consistency of the Function representation. - wrapup(*GraphTraits::nodes_begin(FuncRep)); - - // Detach retired Block, release memory. - for (auto &It : BlockInfoMap) { - if (It.second && It.second->IsRetired) { - assert((It.first)->getNumber() != -1); - LLVM_DEBUG(dbgs() << "Erase BB" << (It.first)->getNumber() << "\n";); - It.first->eraseFromParent(); // Remove from the parent Function. - } - delete It.second; - } - BlockInfoMap.clear(); - LLInfoMap.clear(); - - if (!Finish) { - LLVM_DEBUG(FuncRep->viewCFG()); - report_fatal_error("IRREDUCIBLE_CFG"); - } - - return true; -} - -void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { - int SccNum = 0; - for (scc_iterator It = scc_begin(MF); !It.isAtEnd(); - ++It, ++SccNum) { - const std::vector &SccNext = *It; - for (MachineBasicBlock *MBB : SccNext) { - OrderedBlks.push_back(MBB); - recordSccnum(MBB, SccNum); - } - } - - // walk through all the block in func to check for unreachable - for (auto *MBB : nodes(MF)) { - SccNum = getSCCNum(MBB); - if (SccNum == INVALIDSCCNUM) - dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"; - } -} - -int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { - int NumMatch = 0; - int CurMatch; - - LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";); - - while ((CurMatch = patternMatchGroup(MBB)) > 0) - NumMatch += CurMatch; - - LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber() - << ", numMatch = " << NumMatch << "\n";); - - return NumMatch; -} - -int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { - int NumMatch = 0; - NumMatch += loopendPatternMatch(); - NumMatch += serialPatternMatch(MBB); - NumMatch += ifPatternMatch(MBB); - return NumMatch; -} - -int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { - if (MBB->succ_size() != 1) - return 0; - - MachineBasicBlock *childBlk = *MBB->succ_begin(); - if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) - return 0; - - mergeSerialBlock(MBB, childBlk); - ++numSerialPatternMatch; - return 1; -} - -int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { - //two edges - if (MBB->succ_size() != 2) - return 0; - if (hasBackEdge(MBB)) - return 0; - MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); - if (!BranchMI) - return 0; - - assert(isCondBranch(BranchMI)); - int NumMatch = 0; - - MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI); - NumMatch += serialPatternMatch(TrueMBB); - NumMatch += ifPatternMatch(TrueMBB); - MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI); - NumMatch += serialPatternMatch(FalseMBB); - NumMatch += ifPatternMatch(FalseMBB); - MachineBasicBlock *LandBlk; - int Cloned = 0; - - assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); - // TODO: Simplify - if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 - && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { - // Diamond pattern - LandBlk = *TrueMBB->succ_begin(); - } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { - // Triangle pattern, false is empty - LandBlk = FalseMBB; - FalseMBB = nullptr; - } else if (FalseMBB->succ_size() == 1 - && *FalseMBB->succ_begin() == TrueMBB) { - // Triangle pattern, true is empty - // We reverse the predicate to make a triangle, empty false pattern; - std::swap(TrueMBB, FalseMBB); - reversePredicateSetter(MBB->end(), *MBB); - LandBlk = FalseMBB; - FalseMBB = nullptr; - } else if (FalseMBB->succ_size() == 1 - && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { - LandBlk = *FalseMBB->succ_begin(); - } else if (TrueMBB->succ_size() == 1 - && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) { - LandBlk = *TrueMBB->succ_begin(); - } else { - return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB); - } - - // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the - // new BB created for landBlk==NULL may introduce new challenge to the - // reduction process. - if (LandBlk && - ((TrueMBB && TrueMBB->pred_size() > 1) - || (FalseMBB && FalseMBB->pred_size() > 1))) { - Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk); - } - - if (TrueMBB && TrueMBB->pred_size() > 1) { - TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB); - ++Cloned; - } - - if (FalseMBB && FalseMBB->pred_size() > 1) { - FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB); - ++Cloned; - } - - mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk); - - ++numIfPatternMatch; - - numClonedBlock += Cloned; - - return 1 + Cloned + NumMatch; -} - -int AMDGPUCFGStructurizer::loopendPatternMatch() { - std::deque NestedLoops; - for (auto &It: *MLI) - for (MachineLoop *ML : depth_first(It)) - NestedLoops.push_front(ML); - - if (NestedLoops.empty()) - return 0; - - // Process nested loop outside->inside (we did push_front), - // so "continue" to a outside loop won't be mistaken as "break" - // of the current loop. - int Num = 0; - for (MachineLoop *ExaminedLoop : NestedLoops) { - if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) - continue; - LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); - int NumBreak = mergeLoop(ExaminedLoop); - if (NumBreak == -1) - break; - Num += NumBreak; - } - return Num; -} - -int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { - MachineBasicBlock *LoopHeader = LoopRep->getHeader(); - MBBVector ExitingMBBs; - LoopRep->getExitingBlocks(ExitingMBBs); - assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); - LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() - << " exiting blocks\n";); - // We assume a single ExitBlk - MBBVector ExitBlks; - LoopRep->getExitBlocks(ExitBlks); - SmallPtrSet ExitBlkSet; - for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i) - ExitBlkSet.insert(ExitBlks[i]); - assert(ExitBlkSet.size() == 1); - MachineBasicBlock *ExitBlk = *ExitBlks.begin(); - assert(ExitBlk && "Loop has several exit block"); - MBBVector LatchBlks; - for (auto *LB : inverse_children(LoopHeader)) - if (LoopRep->contains(LB)) - LatchBlks.push_back(LB); - - for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i) - mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk); - for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i) - settleLoopcontBlock(LatchBlks[i], LoopHeader); - int Match = 0; - do { - Match = 0; - Match += serialPatternMatch(LoopHeader); - Match += ifPatternMatch(LoopHeader); - } while (Match > 0); - mergeLooplandBlock(LoopHeader, ExitBlk); - MachineLoop *ParentLoop = LoopRep->getParentLoop(); - if (ParentLoop) - MLI->changeLoopFor(LoopHeader, ParentLoop); - else - MLI->removeBlock(LoopHeader); - Visited[LoopRep] = true; - return 1; -} - -bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak( - MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { - if (Src1MBB->succ_empty()) { - MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB); - if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { - MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; - if (TheEntry) { - LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB" - << Src1MBB->getNumber() << " src2 = BB" - << Src2MBB->getNumber() << "\n";); - return true; - } - } - } - return false; -} - -int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, - MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { - int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); - if (Num == 0) { - LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" - << "\n";); - Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); - } - return Num; -} - -int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, - MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { - int Num = 0; - MachineBasicBlock *DownBlk; - - //trueBlk could be the common post dominator - DownBlk = TrueMBB; - - LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() - << " true = BB" << TrueMBB->getNumber() - << ", numSucc=" << TrueMBB->succ_size() << " false = BB" - << FalseMBB->getNumber() << "\n";); - - while (DownBlk) { - LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber();); - - if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { - LLVM_DEBUG(dbgs() << " working\n";); - - Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); - Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); - - numClonedBlock += Num; - Num += serialPatternMatch(*HeadMBB->succ_begin()); - Num += serialPatternMatch(*std::next(HeadMBB->succ_begin())); - Num += ifPatternMatch(HeadMBB); - assert(Num > 0); - - break; - } - LLVM_DEBUG(dbgs() << " not working\n";); - DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; - } // walk down the postDomTree - - return Num; -} - -#ifndef NDEBUG -void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf( - MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, - MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { - dbgs() << "head = BB" << HeadMBB->getNumber() - << " size = " << HeadMBB->size(); - if (Detail) { - dbgs() << "\n"; - HeadMBB->print(dbgs()); - dbgs() << "\n"; - } - - if (TrueMBB) { - dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " - << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); - if (Detail) { - dbgs() << "\n"; - TrueMBB->print(dbgs()); - dbgs() << "\n"; - } - } - if (FalseMBB) { - dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " - << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); - if (Detail) { - dbgs() << "\n"; - FalseMBB->print(dbgs()); - dbgs() << "\n"; - } - } - if (LandMBB) { - dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " - << LandMBB->size() << " numPred = " << LandMBB->pred_size(); - if (Detail) { - dbgs() << "\n"; - LandMBB->print(dbgs()); - dbgs() << "\n"; - } - } - - dbgs() << "\n"; -} -#endif - -int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, - MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, - MachineBasicBlock **LandMBBPtr) { - bool MigrateTrue = false; - bool MigrateFalse = false; - - MachineBasicBlock *LandBlk = *LandMBBPtr; - - assert((!TrueMBB || TrueMBB->succ_size() <= 1) - && (!FalseMBB || FalseMBB->succ_size() <= 1)); - - if (TrueMBB == FalseMBB) - return 0; - - MigrateTrue = needMigrateBlock(TrueMBB); - MigrateFalse = needMigrateBlock(FalseMBB); - - if (!MigrateTrue && !MigrateFalse) - return 0; - - // If we need to migrate either trueBlk and falseBlk, migrate the rest that - // have more than one predecessors. without doing this, its predecessor - // rather than headBlk will have undefined value in initReg. - if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) - MigrateTrue = true; - if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) - MigrateFalse = true; - - LLVM_DEBUG( - dbgs() << "before improveSimpleJumpintoIf: "; - showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); - - // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk - // - // new: headBlk => if () {initReg = 1; org trueBlk branch} else - // {initReg = 0; org falseBlk branch } - // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} - // => org landBlk - // if landBlk->pred_size() > 2, put the about if-else inside - // if (initReg !=2) {...} - // - // add initReg = initVal to headBlk - - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - if (!MigrateTrue || !MigrateFalse) { - // XXX: We have an opportunity here to optimize the "branch into if" case - // here. Branch into if looks like this: - // entry - // / | - // diamond_head branch_from - // / \ | - // diamond_false diamond_true - // \ / - // done - // - // The diamond_head block begins the "if" and the diamond_true block - // is the block being "branched into". - // - // If MigrateTrue is true, then TrueBB is the block being "branched into" - // and if MigrateFalse is true, then FalseBB is the block being - // "branched into" - // - // Here is the pseudo code for how I think the optimization should work: - // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. - // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. - // 3. Move the branch instruction from diamond_head into its own basic - // block (new_block). - // 4. Add an unconditional branch from diamond_head to new_block - // 5. Replace the branch instruction in branch_from with an unconditional - // branch to new_block. If branch_from has multiple predecessors, then - // we need to replace the True/False block in the branch - // instruction instead of replacing it. - // 6. Change the condition of the branch instruction in new_block from - // COND to (COND || GPR0) - // - // In order insert these MOV instruction, we will need to use the - // RegisterScavenger. Usually liveness stops being tracked during - // the late machine optimization passes, however if we implement - // bool TargetRegisterInfo::requiresRegisterScavenging( - // const MachineFunction &MF) - // and have it return true, liveness will be tracked correctly - // by generic optimization passes. We will also need to make sure that - // all of our target-specific passes that run after regalloc and before - // the CFGStructurizer track liveness and we will need to modify this pass - // to correctly track liveness. - // - // After the above changes, the new CFG should look like this: - // entry - // / | - // diamond_head branch_from - // \ / - // new_block - // / | - // diamond_false diamond_true - // \ / - // done - // - // Without this optimization, we are forced to duplicate the diamond_true - // block and we will end up with a CFG like this: - // - // entry - // / | - // diamond_head branch_from - // / \ | - // diamond_false diamond_true diamond_true (duplicate) - // \ / | - // done --------------------| - // - // Duplicating diamond_true can be very costly especially if it has a - // lot of instructions. - return 0; - } - - int NumNewBlk = 0; - - bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); - - //insert R600::ENDIF to avoid special case "input landBlk == NULL" - MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF); - - if (LandBlkHasOtherPred) { - report_fatal_error("Extra register needed to handle CFG"); - Register CmpResReg = - HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); - report_fatal_error("Extra compare instruction needed to handle CFG"); - insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, - CmpResReg, DebugLoc()); - } - - // XXX: We are running this after RA, so creating virtual registers will - // cause an assertion failure in the PostRA scheduling pass. - Register InitReg = - HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); - insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg, - DebugLoc()); - - if (MigrateTrue) { - migrateInstruction(TrueMBB, LandBlk, I); - // need to uncondionally insert the assignment to ensure a path from its - // predecessor rather than headBlk has valid value in initReg if - // (initVal != 1). - report_fatal_error("Extra register needed to handle CFG"); - } - insertInstrBefore(I, R600::ELSE); - - if (MigrateFalse) { - migrateInstruction(FalseMBB, LandBlk, I); - // need to uncondionally insert the assignment to ensure a path from its - // predecessor rather than headBlk has valid value in initReg if - // (initVal != 0) - report_fatal_error("Extra register needed to handle CFG"); - } - - if (LandBlkHasOtherPred) { - // add endif - insertInstrBefore(I, R600::ENDIF); - - // put initReg = 2 to other predecessors of landBlk - for (MachineBasicBlock *MBB : LandBlk->predecessors()) - if (MBB != TrueMBB && MBB != FalseMBB) - report_fatal_error("Extra register needed to handle CFG"); - } - LLVM_DEBUG( - dbgs() << "result from improveSimpleJumpintoIf: "; - showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); - - // update landBlk - *LandMBBPtr = LandBlk; - - return NumNewBlk; -} - -void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, - MachineBasicBlock *SrcMBB) { - LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB" - << SrcMBB->getNumber() << "\n";); - DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); - - DstMBB->removeSuccessor(SrcMBB, true); - cloneSuccessorList(DstMBB, SrcMBB); - - removeSuccessor(SrcMBB); - MLI->removeBlock(SrcMBB); - retireBlock(SrcMBB); -} - -void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, - MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, - MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { - assert (TrueMBB); - LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ "; - if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs() - << " } else "; - dbgs() << "{ "; if (FalseMBB) { - dbgs() << "BB" << FalseMBB->getNumber(); - } dbgs() << " }\n "; - dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else { - dbgs() << "BB" << LandMBB->getNumber(); - } dbgs() << "\n";); - - int OldOpcode = BranchMI->getOpcode(); - DebugLoc BranchDL = BranchMI->getDebugLoc(); - -// transform to -// if cond -// trueBlk -// else -// falseBlk -// endif -// landBlk - - MachineBasicBlock::iterator I = BranchMI; - insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode), - BranchDL); - - if (TrueMBB) { - MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); - MBB->removeSuccessor(TrueMBB, true); - if (LandMBB && TrueMBB->succ_size()!=0) - TrueMBB->removeSuccessor(LandMBB, true); - retireBlock(TrueMBB); - MLI->removeBlock(TrueMBB); - } - - if (FalseMBB) { - insertInstrBefore(I, R600::ELSE); - MBB->splice(I, FalseMBB, FalseMBB->begin(), - FalseMBB->end()); - MBB->removeSuccessor(FalseMBB, true); - if (LandMBB && !FalseMBB->succ_empty()) - FalseMBB->removeSuccessor(LandMBB, true); - retireBlock(FalseMBB); - MLI->removeBlock(FalseMBB); - } - insertInstrBefore(I, R600::ENDIF); - - BranchMI->eraseFromParent(); - - if (LandMBB && TrueMBB && FalseMBB) - MBB->addSuccessor(LandMBB); -} - -void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, - MachineBasicBlock *LandMBB) { - LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() - << " land = BB" << LandMBB->getNumber() << "\n";); - - insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc()); - insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc()); - DstBlk->replaceSuccessor(DstBlk, LandMBB); -} - -void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, - MachineBasicBlock *LandMBB) { - LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB" - << ExitingMBB->getNumber() << " land = BB" - << LandMBB->getNumber() << "\n";); - MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); - assert(BranchMI && isCondBranch(BranchMI)); - DebugLoc DL = BranchMI->getDebugLoc(); - MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI); - MachineBasicBlock::iterator I = BranchMI; - if (TrueBranch != LandMBB) - reversePredicateSetter(I, *I->getParent()); - insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL); - insertInstrBefore(I, R600::BREAK); - insertInstrBefore(I, R600::ENDIF); - //now branchInst can be erase safely - BranchMI->eraseFromParent(); - //now take care of successors, retire blocks - ExitingMBB->removeSuccessor(LandMBB, true); -} - -void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, - MachineBasicBlock *ContMBB) { - LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB" - << ContingMBB->getNumber() << ", cont = BB" - << ContMBB->getNumber() << "\n";); - - MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); - if (MI) { - assert(isCondBranch(MI)); - MachineBasicBlock::iterator I = MI; - MachineBasicBlock *TrueBranch = getTrueBranch(MI); - int OldOpcode = MI->getOpcode(); - DebugLoc DL = MI->getDebugLoc(); - - bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); - - if (!UseContinueLogical) { - int BranchOpcode = - TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : - getBranchZeroOpcode(OldOpcode); - insertCondBranchBefore(I, BranchOpcode, DL); - // insertEnd to ensure phi-moves, if exist, go before the continue-instr. - insertInstrEnd(ContingMBB, R600::CONTINUE, DL); - insertInstrEnd(ContingMBB, R600::ENDIF, DL); - } else { - int BranchOpcode = - TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : - getContinueZeroOpcode(OldOpcode); - insertCondBranchBefore(I, BranchOpcode, DL); - } - - MI->eraseFromParent(); - } else { - // if we've arrived here then we've already erased the branch instruction - // travel back up the basic block to see the last reference of our debug - // location we've just inserted that reference here so it should be - // representative insertEnd to ensure phi-moves, if exist, go before the - // continue-instr. - insertInstrEnd(ContingMBB, R600::CONTINUE, - getLastDebugLocInBB(ContingMBB)); - } -} - -int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, - MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { - int Cloned = 0; - assert(PreMBB->isSuccessor(SrcMBB)); - while (SrcMBB && SrcMBB != DstMBB) { - assert(SrcMBB->succ_size() == 1); - if (SrcMBB->pred_size() > 1) { - SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB); - ++Cloned; - } - - PreMBB = SrcMBB; - SrcMBB = *SrcMBB->succ_begin(); - } - - return Cloned; -} - -MachineBasicBlock * -AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, - MachineBasicBlock *PredMBB) { - assert(PredMBB->isSuccessor(MBB) && - "succBlk is not a prececessor of curBlk"); - - MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions - replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); - //srcBlk, oldBlk, newBlk - - PredMBB->replaceSuccessor(MBB, CloneMBB); - - // add all successor to cloneBlk - cloneSuccessorList(CloneMBB, MBB); - - numClonedInstr += MBB->size(); - - LLVM_DEBUG(dbgs() << "Cloned block: " - << "BB" << MBB->getNumber() << "size " << MBB->size() - << "\n";); - - SHOWNEWBLK(CloneMBB, "result of Cloned block: "); - - return CloneMBB; -} - -void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, - MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { - MachineBasicBlock::iterator SpliceEnd; - //look for the input branchinstr, not the AMDGPU branchinstr - MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); - if (!BranchMI) { - LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";); - SpliceEnd = SrcMBB->end(); - } else { - LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI); - SpliceEnd = BranchMI; - } - LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = " - << DstMBB->size() << "srcSize = " << SrcMBB->size() - << "\n";); - - //splice insert before insertPos - DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); - - LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = " - << DstMBB->size() << "srcSize = " << SrcMBB->size() - << '\n';); -} - -MachineBasicBlock * -AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { - MachineBasicBlock *LoopHeader = LoopRep->getHeader(); - MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); - - if (!LoopHeader || !LoopLatch) - return nullptr; - MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); - // Is LoopRep an infinite loop ? - if (!BranchMI || !isUncondBranch(BranchMI)) - return nullptr; - - MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); - FuncRep->push_back(DummyExitBlk); //insert to function - SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); - LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); - LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext(); - Ctx.emitError("Extra register needed to handle CFG"); - return nullptr; -} - -void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { - MachineInstr *BranchMI; - - // I saw two unconditional branch in one basic block in example - // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. - while ((BranchMI = getLoopendBlockBranchInstr(MBB)) - && isUncondBranch(BranchMI)) { - LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI); - BranchMI->eraseFromParent(); - } -} - -void AMDGPUCFGStructurizer::removeRedundantConditionalBranch( - MachineBasicBlock *MBB) { - if (MBB->succ_size() != 2) - return; - MachineBasicBlock *MBB1 = *MBB->succ_begin(); - MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin()); - if (MBB1 != MBB2) - return; - - MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); - assert(BranchMI && isCondBranch(BranchMI)); - LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI); - BranchMI->eraseFromParent(); - SHOWNEWBLK(MBB1, "Removing redundant successor"); - MBB->removeSuccessor(MBB1, true); -} - -void AMDGPUCFGStructurizer::addDummyExitBlock( - SmallVectorImpl &RetMBB) { - MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); - FuncRep->push_back(DummyExitBlk); //insert to function - insertInstrEnd(DummyExitBlk, R600::RETURN); - - for (MachineBasicBlock *MBB : RetMBB) { - if (MachineInstr *MI = getReturnInstr(MBB)) - MI->eraseFromParent(); - MBB->addSuccessor(DummyExitBlk); - LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() - << " successors\n";); - } - SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); -} - -void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { - while (MBB->succ_size()) - MBB->removeSuccessor(*MBB->succ_begin()); -} - -void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, - int SccNum) { - BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; - if (!srcBlkInfo) - srcBlkInfo = new BlockInformation(); - srcBlkInfo->SccNum = SccNum; -} - -void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { - LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";); - - BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; - - if (!SrcBlkInfo) - SrcBlkInfo = new BlockInformation(); - - SrcBlkInfo->IsRetired = true; - assert(MBB->succ_empty() && MBB->pred_empty() && "can't retire block yet"); -} - -INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", - "AMDGPU CFG Structurizer", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer", - "AMDGPU CFG Structurizer", false, false) - -FunctionPass *llvm::createAMDGPUCFGStructurizerPass() { - return new AMDGPUCFGStructurizer(); -} diff --git a/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h b/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h index 654153ea5151..8e5f966b7c6c 100644 --- a/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h +++ b/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h @@ -142,7 +142,7 @@ enum amd_code_property_mask_t { /// is provided to the finalizer when it is invoked and is recorded /// here. The hardware will interleave the memory requests of each /// lane of a wavefront by this element size to ensure each - /// work-item gets a distinct memory memory location. Therefore, the + /// work-item gets a distinct memory location. Therefore, the /// finalizer ensures that all load and store operations done to /// private memory do not exceed this size. For example, if the /// element size is 4 (32-bits or dword) and a 64-bit value must be diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index ffe626513d47..e12d0ffef35c 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -20,10 +20,13 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" @@ -33,6 +36,7 @@ #include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Casting.h" #include "llvm/Support/MachineValueType.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetParser.h" using namespace llvm; @@ -120,12 +124,6 @@ public: ImmTyD16, ImmTyClampSI, ImmTyOModSI, - ImmTyDPP8, - ImmTyDppCtrl, - ImmTyDppRowMask, - ImmTyDppBankMask, - ImmTyDppBoundCtrl, - ImmTyDppFi, ImmTySdwaDstSel, ImmTySdwaSrc0Sel, ImmTySdwaSrc1Sel, @@ -151,6 +149,12 @@ public: ImmTyOpSelHi, ImmTyNegLo, ImmTyNegHi, + ImmTyDPP8, + ImmTyDppCtrl, + ImmTyDppRowMask, + ImmTyDppBankMask, + ImmTyDppBoundCtrl, + ImmTyDppFi, ImmTySwizzle, ImmTyGprIdxMode, ImmTyHigh, @@ -158,6 +162,8 @@ public: ImmTyCBSZ, ImmTyABID, ImmTyEndpgm, + ImmTyWaitVDST, + ImmTyWaitEXP, }; enum ImmKindTy { @@ -262,6 +268,14 @@ public: return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); } + bool isRegOrInlineImmWithInt16InputMods() const { + return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); + } + + bool isRegOrInlineImmWithInt32InputMods() const { + return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); + } + bool isRegOrImmWithInt64InputMods() const { return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); } @@ -278,6 +292,15 @@ public: return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); } + bool isRegOrInlineImmWithFP16InputMods() const { + return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); + } + + bool isRegOrInlineImmWithFP32InputMods() const { + return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); + } + + bool isVReg() const { return isRegClass(AMDGPU::VGPR_32RegClassID) || isRegClass(AMDGPU::VReg_64RegClassID) || @@ -815,6 +838,8 @@ public: } bool isSWaitCnt() const; + bool isDepCtr() const; + bool isSDelayAlu() const; bool isHwreg() const; bool isSendMsg() const; bool isSwizzle() const; @@ -830,6 +855,8 @@ public: bool isS16Imm() const; bool isU16Imm() const; bool isEndpgm() const; + bool isWaitVDST() const; + bool isWaitEXP() const; StringRef getExpressionAsToken() const { assert(isExpr()); @@ -1037,6 +1064,8 @@ public: case ImmTyCBSZ: OS << "CBSZ"; break; case ImmTyABID: OS << "ABID"; break; case ImmTyEndpgm: OS << "Endpgm"; break; + case ImmTyWaitVDST: OS << "WaitVDST"; break; + case ImmTyWaitEXP: OS << "WaitEXP"; break; } } @@ -1123,7 +1152,9 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { class KernelScopeInfo { int SgprIndexUnusedMin = -1; int VgprIndexUnusedMin = -1; + int AgprIndexUnusedMin = -1; MCContext *Ctx = nullptr; + MCSubtargetInfo const *MSTI = nullptr; void usesSgprAt(int i) { if (i >= SgprIndexUnusedMin) { @@ -1142,7 +1173,31 @@ class KernelScopeInfo { if (Ctx) { MCSymbol* const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); - Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); + int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, + VgprIndexUnusedMin); + Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); + } + } + } + + void usesAgprAt(int i) { + // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction + if (!hasMAIInsts(*MSTI)) + return; + + if (i >= AgprIndexUnusedMin) { + AgprIndexUnusedMin = ++i; + if (Ctx) { + MCSymbol* const Sym = + Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); + Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); + + // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) + MCSymbol* const vSym = + Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); + int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, + VgprIndexUnusedMin); + vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); } } } @@ -1152,16 +1207,29 @@ public: void initialize(MCContext &Context) { Ctx = &Context; + MSTI = Ctx->getSubtargetInfo(); + usesSgprAt(SgprIndexUnusedMin = -1); usesVgprAt(VgprIndexUnusedMin = -1); + if (hasMAIInsts(*MSTI)) { + usesAgprAt(AgprIndexUnusedMin = -1); + } } - void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { + void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, + unsigned RegWidth) { switch (RegKind) { - case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; - case IS_AGPR: // fall through - case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; - default: break; + case IS_SGPR: + usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); + break; + case IS_AGPR: + usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); + break; + case IS_VGPR: + usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); + break; + default: + break; } } }; @@ -1353,10 +1421,15 @@ public: return AMDGPU::isGFX9(getSTI()); } + // TODO: isGFX90A is also true for GFX940. We need to clean it. bool isGFX90A() const { return AMDGPU::isGFX90A(getSTI()); } + bool isGFX940() const { + return AMDGPU::isGFX940(getSTI()); + } + bool isGFX9Plus() const { return AMDGPU::isGFX9Plus(getSTI()); } @@ -1367,6 +1440,14 @@ public: bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } + bool isGFX11() const { + return AMDGPU::isGFX11(getSTI()); + } + + bool isGFX11Plus() const { + return AMDGPU::isGFX11Plus(getSTI()); + } + bool isGFX10_BEncoding() const { return AMDGPU::isGFX10_BEncoding(getSTI()); } @@ -1496,6 +1577,14 @@ public: bool parseCnt(int64_t &IntVal); OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); + + bool parseDepCtr(int64_t &IntVal, unsigned &Mask); + void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); + OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); + + bool parseDelay(int64_t &Delay); + OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); + OperandMatchResultTy parseHwreg(OperandVector &Operands); private: @@ -1522,6 +1611,7 @@ private: SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; + SMLoc getBLGPLoc(const OperandVector &Operands) const; SMLoc getOperandLoc(std::function Test, const OperandVector &Operands) const; @@ -1540,7 +1630,7 @@ private: bool validateMIMGAtomicDMask(const MCInst &Inst); bool validateMIMGGatherDMask(const MCInst &Inst); bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); - bool validateMIMGDataSize(const MCInst &Inst); + Optional validateMIMGDataSize(const MCInst &Inst); bool validateMIMGAddrSize(const MCInst &Inst); bool validateMIMGD16(const MCInst &Inst); bool validateMIMGDim(const MCInst &Inst); @@ -1553,10 +1643,14 @@ private: bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); bool validateAGPRLdSt(const MCInst &Inst) const; bool validateVGPRAlign(const MCInst &Inst) const; + bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); bool validateGWS(const MCInst &Inst, const OperandVector &Operands); bool validateDivScale(const MCInst &Inst); bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, const SMLoc &IDLoc); + bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, + const SMLoc &IDLoc); + bool validateExeczVcczOperands(const OperandVector &Operands); Optional validateLdsDirect(const MCInst &Inst); unsigned getConstantBusLimit(unsigned Opcode) const; bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); @@ -1586,7 +1680,7 @@ private: bool parseExpr(int64_t &Imm, StringRef Expected = ""); bool parseExpr(OperandVector &Operands); StringRef getTokenStr() const; - AsmToken peekToken(); + AsmToken peekToken(bool ShouldSkipSpace = true); AsmToken getToken() const; SMLoc getLoc() const; void lex(); @@ -1644,10 +1738,12 @@ public: void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); void cvtVOP3(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); + void cvtVOPD(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, OptionalImmIndexMap &OptionalIdx); void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); + void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); void cvtMIMG(MCInst &Inst, const OperandVector &Operands, bool IsAtomic = false); @@ -1668,7 +1764,24 @@ public: AMDGPUOperand::Ptr defaultBoundCtrl() const; AMDGPUOperand::Ptr defaultFI() const; void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); - void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } + void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { + cvtDPP(Inst, Operands, true); + } + void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands, + bool IsDPP8 = false); + void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) { + cvtVOPCNoDstDPP(Inst, Operands, true); + } + void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, + bool IsDPP8 = false); + void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { + cvtVOP3DPP(Inst, Operands, true); + } + void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands, + bool IsDPP8 = false); + void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) { + cvtVOPC64NoDstDPP(Inst, Operands, true); + } OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, AMDGPUOperand::ImmTy Type); @@ -1689,6 +1802,10 @@ public: OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; + + AMDGPUOperand::Ptr defaultWaitVDST() const; + AMDGPUOperand::Ptr defaultWaitEXP() const; + OperandMatchResultTy parseVOPD(OperandVector &Operands); }; struct OptionalOperand { @@ -1897,7 +2014,7 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const { // We allow fp literals with f16x2 operands assuming that the specified // literal goes into the lower half and the upper half is zero. We also - // require that the literal may be losslesly converted to f16. + // require that the literal may be losslessly converted to f16. MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : (type == MVT::v2i16)? MVT::i16 : (type == MVT::v2f32)? MVT::f32 : type; @@ -2211,52 +2328,86 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) { if (Is == IS_VGPR) { switch (RegWidth) { default: return -1; - case 1: return AMDGPU::VGPR_32RegClassID; - case 2: return AMDGPU::VReg_64RegClassID; - case 3: return AMDGPU::VReg_96RegClassID; - case 4: return AMDGPU::VReg_128RegClassID; - case 5: return AMDGPU::VReg_160RegClassID; - case 6: return AMDGPU::VReg_192RegClassID; - case 7: return AMDGPU::VReg_224RegClassID; - case 8: return AMDGPU::VReg_256RegClassID; - case 16: return AMDGPU::VReg_512RegClassID; - case 32: return AMDGPU::VReg_1024RegClassID; + case 32: + return AMDGPU::VGPR_32RegClassID; + case 64: + return AMDGPU::VReg_64RegClassID; + case 96: + return AMDGPU::VReg_96RegClassID; + case 128: + return AMDGPU::VReg_128RegClassID; + case 160: + return AMDGPU::VReg_160RegClassID; + case 192: + return AMDGPU::VReg_192RegClassID; + case 224: + return AMDGPU::VReg_224RegClassID; + case 256: + return AMDGPU::VReg_256RegClassID; + case 512: + return AMDGPU::VReg_512RegClassID; + case 1024: + return AMDGPU::VReg_1024RegClassID; } } else if (Is == IS_TTMP) { switch (RegWidth) { default: return -1; - case 1: return AMDGPU::TTMP_32RegClassID; - case 2: return AMDGPU::TTMP_64RegClassID; - case 4: return AMDGPU::TTMP_128RegClassID; - case 8: return AMDGPU::TTMP_256RegClassID; - case 16: return AMDGPU::TTMP_512RegClassID; + case 32: + return AMDGPU::TTMP_32RegClassID; + case 64: + return AMDGPU::TTMP_64RegClassID; + case 128: + return AMDGPU::TTMP_128RegClassID; + case 256: + return AMDGPU::TTMP_256RegClassID; + case 512: + return AMDGPU::TTMP_512RegClassID; } } else if (Is == IS_SGPR) { switch (RegWidth) { default: return -1; - case 1: return AMDGPU::SGPR_32RegClassID; - case 2: return AMDGPU::SGPR_64RegClassID; - case 3: return AMDGPU::SGPR_96RegClassID; - case 4: return AMDGPU::SGPR_128RegClassID; - case 5: return AMDGPU::SGPR_160RegClassID; - case 6: return AMDGPU::SGPR_192RegClassID; - case 7: return AMDGPU::SGPR_224RegClassID; - case 8: return AMDGPU::SGPR_256RegClassID; - case 16: return AMDGPU::SGPR_512RegClassID; + case 32: + return AMDGPU::SGPR_32RegClassID; + case 64: + return AMDGPU::SGPR_64RegClassID; + case 96: + return AMDGPU::SGPR_96RegClassID; + case 128: + return AMDGPU::SGPR_128RegClassID; + case 160: + return AMDGPU::SGPR_160RegClassID; + case 192: + return AMDGPU::SGPR_192RegClassID; + case 224: + return AMDGPU::SGPR_224RegClassID; + case 256: + return AMDGPU::SGPR_256RegClassID; + case 512: + return AMDGPU::SGPR_512RegClassID; } } else if (Is == IS_AGPR) { switch (RegWidth) { default: return -1; - case 1: return AMDGPU::AGPR_32RegClassID; - case 2: return AMDGPU::AReg_64RegClassID; - case 3: return AMDGPU::AReg_96RegClassID; - case 4: return AMDGPU::AReg_128RegClassID; - case 5: return AMDGPU::AReg_160RegClassID; - case 6: return AMDGPU::AReg_192RegClassID; - case 7: return AMDGPU::AReg_224RegClassID; - case 8: return AMDGPU::AReg_256RegClassID; - case 16: return AMDGPU::AReg_512RegClassID; - case 32: return AMDGPU::AReg_1024RegClassID; + case 32: + return AMDGPU::AGPR_32RegClassID; + case 64: + return AMDGPU::AReg_64RegClassID; + case 96: + return AMDGPU::AReg_96RegClassID; + case 128: + return AMDGPU::AReg_128RegClassID; + case 160: + return AMDGPU::AReg_160RegClassID; + case 192: + return AMDGPU::AReg_192RegClassID; + case 224: + return AMDGPU::AReg_224RegClassID; + case 256: + return AMDGPU::AReg_256RegClassID; + case 512: + return AMDGPU::AReg_512RegClassID; + case 1024: + return AMDGPU::AReg_1024RegClassID; } } return -1; @@ -2343,32 +2494,32 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, case IS_SPECIAL: if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { Reg = AMDGPU::EXEC; - RegWidth = 2; + RegWidth = 64; return true; } if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { Reg = AMDGPU::FLAT_SCR; - RegWidth = 2; + RegWidth = 64; return true; } if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { Reg = AMDGPU::XNACK_MASK; - RegWidth = 2; + RegWidth = 64; return true; } if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { Reg = AMDGPU::VCC; - RegWidth = 2; + RegWidth = 64; return true; } if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { Reg = AMDGPU::TBA; - RegWidth = 2; + RegWidth = 64; return true; } if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { Reg = AMDGPU::TMA; - RegWidth = 2; + RegWidth = 64; return true; } Error(Loc, "register does not fit in the list"); @@ -2377,11 +2528,11 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, case IS_SGPR: case IS_AGPR: case IS_TTMP: - if (Reg1 != Reg + RegWidth) { + if (Reg1 != Reg + RegWidth / 32) { Error(Loc, "registers in a list must have consecutive indices"); return false; } - RegWidth++; + RegWidth += 32; return true; default: llvm_unreachable("unexpected register kind"); @@ -2470,7 +2621,7 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, if (RegKind == IS_SGPR || RegKind == IS_TTMP) { // SGPR and TTMP registers must be aligned. // Max required alignment is 4 dwords. - AlignSize = std::min(RegWidth, 4u); + AlignSize = std::min(RegWidth / 32, 4u); } if (RegNum % AlignSize != 0) { @@ -2495,8 +2646,7 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, return RC.getRegister(RegIdx); } -bool -AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { +bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { int64_t RegLo, RegHi; if (!skipToken(AsmToken::LBrac, "missing register index")) return false; @@ -2534,7 +2684,7 @@ AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { } Num = static_cast(RegLo); - Width = (RegHi - RegLo) + 1; + RegWidth = 32 * ((RegHi - RegLo) + 1); return true; } @@ -2545,7 +2695,7 @@ unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, unsigned Reg = getSpecialRegForName(getTokenStr()); if (Reg) { RegNum = 0; - RegWidth = 1; + RegWidth = 32; RegKind = IS_SPECIAL; Tokens.push_back(getToken()); lex(); // skip register name @@ -2577,7 +2727,7 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, Error(Loc, "invalid register index"); return AMDGPU::NoRegister; } - RegWidth = 1; + RegWidth = 32; } else { // Range of registers: v[XX:YY]. ":YY" is optional. if (!ParseRegRange(RegNum, RegWidth)) @@ -2603,7 +2753,7 @@ unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, auto Loc = getLoc(); if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) return AMDGPU::NoRegister; - if (RegWidth != 1) { + if (RegWidth != 32) { Error(Loc, "expected a single 32-bit register"); return AMDGPU::NoRegister; } @@ -2618,7 +2768,7 @@ unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, Tokens)) { return AMDGPU::NoRegister; } - if (NextRegWidth != 1) { + if (NextRegWidth != 32) { Error(Loc, "expected a single 32-bit register"); return AMDGPU::NoRegister; } @@ -2721,7 +2871,7 @@ bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, return true; MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); - int64_t NewMax = DwordRegIndex + RegWidth - 1; + int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; int64_t OldCount; if (!Sym->isVariable()) @@ -2761,7 +2911,8 @@ OperandMatchResultTy AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { // TODO: add syntactic sugar for 1/(2*PI) - assert(!isRegister()); + if (isRegister()) + return MatchOperand_NoMatch; assert(!isModifier()); const auto& Tok = getToken(); @@ -2927,7 +3078,7 @@ AMDGPUAsmParser::isModifier() { // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 // Negative fp literals with preceding "-" are -// handled likewise for unifomtity +// handled likewise for uniformity // bool AMDGPUAsmParser::parseSP3NegModifier() { @@ -3110,7 +3261,8 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { static ArrayRef getAllVariants() { static const unsigned Variants[] = { AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, - AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP + AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, + AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP }; return makeArrayRef(Variants); @@ -3118,6 +3270,10 @@ static ArrayRef getAllVariants() { // What asm variants we should check ArrayRef AMDGPUAsmParser::getMatchedVariants() const { + if (isForcedDPP() && isForcedVOP3()) { + static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; + return makeArrayRef(Variants); + } if (getForcedEncodingSize() == 32) { static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; return makeArrayRef(Variants); @@ -3143,6 +3299,9 @@ ArrayRef AMDGPUAsmParser::getMatchedVariants() const { } StringRef AMDGPUAsmParser::getMatchedVariantName() const { + if (isForcedDPP() && isForcedVOP3()) + return "e64_dpp"; + if (getForcedEncodingSize() == 32) return "e32"; @@ -3231,10 +3390,13 @@ unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { // 64-bit shift instructions can use only one scalar value input case AMDGPU::V_LSHLREV_B64_e64: case AMDGPU::V_LSHLREV_B64_gfx10: + case AMDGPU::V_LSHLREV_B64_e64_gfx11: case AMDGPU::V_LSHRREV_B64_e64: case AMDGPU::V_LSHRREV_B64_gfx10: + case AMDGPU::V_LSHRREV_B64_e64_gfx11: case AMDGPU::V_ASHRREV_I64_e64: case AMDGPU::V_ASHRREV_I64_gfx10: + case AMDGPU::V_ASHRREV_I64_e64_gfx11: case AMDGPU::V_LSHL_B64_e64: case AMDGPU::V_LSHR_B64_e64: case AMDGPU::V_ASHR_I64_e64: @@ -3305,8 +3467,7 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, // flat_scratch_lo, flat_scratch_hi // are theoretically valid but they are disabled anyway. // Note that this code mimics SIInstrInfo::verifyInstruction - if (!SGPRsUsed.count(LastSGPR)) { - SGPRsUsed.insert(LastSGPR); + if (SGPRsUsed.insert(LastSGPR).second) { ++ConstantBusUseCount; } } else { // Expression or a literal @@ -3369,7 +3530,6 @@ AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, assert(DstIdx != -1); const MCOperand &Dst = Inst.getOperand(DstIdx); assert(Dst.isReg()); - const unsigned DstReg = mc2PseudoReg(Dst.getReg()); const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; @@ -3377,8 +3537,8 @@ AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, if (SrcIdx == -1) break; const MCOperand &Src = Inst.getOperand(SrcIdx); if (Src.isReg()) { - const unsigned SrcReg = mc2PseudoReg(Src.getReg()); - if (isRegIntersect(DstReg, SrcReg, TRI)) { + if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { + const unsigned SrcReg = mc2PseudoReg(Src.getReg()); Error(getRegLoc(SrcReg, Operands), "destination must be different than all sources"); return false; @@ -3403,13 +3563,13 @@ bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { return true; } -bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { +Optional AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { const unsigned Opc = Inst.getOpcode(); const MCInstrDesc &Desc = MII.get(Opc); if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) - return true; + return None; int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); @@ -3418,7 +3578,7 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { assert(VDataIdx != -1); if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray - return true; + return None; unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; @@ -3426,15 +3586,22 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { if (DMask == 0) DMask = 1; + bool isPackedD16 = false; unsigned DataSize = (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); if (hasPackedD16()) { int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); - if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) + isPackedD16 = D16Idx >= 0; + if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) DataSize = (DataSize + 1) / 2; } - return (VDataSize / 4) == DataSize + TFESize; + if ((VDataSize / 4) == DataSize + TFESize) + return None; + + return StringRef(isPackedD16 + ? "image data size does not match dmask, d16 and tfe" + : "image data size does not match dmask and tfe"); } bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { @@ -3607,7 +3774,7 @@ bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, auto Reg = mc2PseudoReg(Src0.getReg()); const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - if (isSGPR(Reg, TRI)) { + if (!isGFX90A() && isSGPR(Reg, TRI)) { Error(getRegLoc(Reg, Operands), "source operand must be either a VGPR or an inline constant"); return false; @@ -3641,7 +3808,7 @@ bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) return true; - if (isRegIntersect(Src2Reg, DstReg, TRI)) { + if (TRI->regsOverlap(Src2Reg, DstReg)) { Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), "source 2 operand must not partially overlap with dst"); return false; @@ -3861,7 +4028,7 @@ Optional AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { const auto &Src = Inst.getOperand(SrcIdx); if (Src.isReg() && Src.getReg() == LDS_DIRECT) { - if (isGFX90A()) + if (isGFX90A() || isGFX11Plus()) return StringRef("lds_direct is not supported on this GPU"); if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) @@ -4009,6 +4176,20 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { if (OpSel & ~3) return false; } + + if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + if (OpSelIdx != -1) { + if (Inst.getOperand(OpSelIdx).getImm() != 0) + return false; + } + int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); + if (OpSelHiIdx != -1) { + if (Inst.getOperand(OpSelHiIdx).getImm() != -1) + return false; + } + } + return true; } @@ -4179,6 +4360,47 @@ bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { return true; } +SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + if (Op.isBLGP()) + return Op.getStartLoc(); + } + return SMLoc(); +} + +bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, + const OperandVector &Operands) { + unsigned Opc = Inst.getOpcode(); + int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); + if (BlgpIdx == -1) + return true; + SMLoc BLGPLoc = getBLGPLoc(Operands); + if (!BLGPLoc.isValid()) + return true; + bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); + auto FB = getFeatureBits(); + bool UsesNeg = false; + if (FB[AMDGPU::FeatureGFX940Insts]) { + switch (Opc) { + case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: + case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: + case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: + case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: + UsesNeg = true; + } + } + + if (IsNeg == UsesNeg) + return true; + + Error(BLGPLoc, + UsesNeg ? "invalid modifier: blgp is not supported" + : "invalid modifier: neg is not supported"); + + return false; +} + // gfx90a has an undocumented limitation: // DS_GWS opcodes must use even aligned registers. bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, @@ -4218,13 +4440,19 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, unsigned CPol = Inst.getOperand(CPolPos).getImm(); uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; - if ((TSFlags & (SIInstrFlags::SMRD)) && - (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { - Error(IDLoc, "invalid cache policy for SMRD instruction"); - return false; + if (TSFlags & SIInstrFlags::SMRD) { + if (CPol && (isSI() || isCI())) { + SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); + Error(S, "cache policy is not supported for SMRD instructions"); + return false; + } + if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { + Error(IDLoc, "invalid cache policy for SMEM instruction"); + return false; + } } - if (isGFX90A() && (CPol & CPol::SCC)) { + if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); StringRef CStr(S.getPointer()); S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); @@ -4237,15 +4465,18 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, if (TSFlags & SIInstrFlags::IsAtomicRet) { if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { - Error(IDLoc, "instruction must use glc"); + Error(IDLoc, isGFX940() ? "instruction must use sc0" + : "instruction must use glc"); return false; } } else { if (CPol & CPol::GLC) { SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); StringRef CStr(S.getPointer()); - S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); - Error(S, "instruction must not use glc"); + S = SMLoc::getFromPointer( + &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); + Error(S, isGFX940() ? "instruction must not use sc0" + : "instruction must not use glc"); return false; } } @@ -4253,6 +4484,47 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, return true; } +bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, + const OperandVector &Operands, + const SMLoc &IDLoc) { + if (isGFX940()) + return true; + + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != + (SIInstrFlags::VALU | SIInstrFlags::FLAT)) + return true; + // This is FLAT LDS DMA. + + SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); + StringRef CStr(S.getPointer()); + if (!CStr.startswith("lds")) { + // This is incorrectly selected LDS DMA version of a FLAT load opcode. + // And LDS version should have 'lds' modifier, but it follows optional + // operands so its absense is ignored by the matcher. + Error(IDLoc, "invalid operands for instruction"); + return false; + } + + return true; +} + +bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { + if (!isGFX11Plus()) + return true; + for (auto &Operand : Operands) { + if (!Operand->isReg()) + continue; + unsigned Reg = Operand->getReg(); + if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { + Error(getRegLoc(Reg, Operands), + "execz and vccz are not supported on this GPU"); + return false; + } + } + return true; +} + bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands) { @@ -4302,9 +4574,8 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, "invalid dim; must be MSAA type"); return false; } - if (!validateMIMGDataSize(Inst)) { - Error(IDLoc, - "image data size does not match dmask and tfe"); + if (auto ErrMsg = validateMIMGDataSize(Inst)) { + Error(IDLoc, *ErrMsg); return false; } if (!validateMIMGAddrSize(Inst)) { @@ -4357,6 +4628,10 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, return false; } + if (!validateBLGP(Inst, Operands)) { + return false; + } + if (!validateDivScale(Inst)) { Error(IDLoc, "ABS not allowed in VOP3B instructions"); return false; @@ -4364,6 +4639,13 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, if (!validateCoherencyBits(Inst, Operands, IDLoc)) { return false; } + if (!validateExeczVcczOperands(Operands)) { + return false; + } + + if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { + return false; + } return true; } @@ -4606,6 +4888,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { SMRange VGPRRange; uint64_t NextFreeVGPR = 0; uint64_t AccumOffset = 0; + uint64_t SharedVGPRCount = 0; SMRange SGPRRange; uint64_t NextFreeSGPR = 0; @@ -4630,9 +4913,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { if (ID == ".end_amdhsa_kernel") break; - if (Seen.find(ID) != Seen.end()) + if (!Seen.insert(ID).second) return TokError(".amdhsa_ directives cannot be repeated"); - Seen.insert(ID); SMLoc ValStart = getLoc(); int64_t IVal; @@ -4833,6 +5115,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { return Error(IDRange.Start, "directive requires gfx10+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, ValRange); + } else if (ID == ".amdhsa_shared_vgpr_count") { + if (IVersion.Major < 10) + return Error(IDRange.Start, "directive requires gfx10+", IDRange); + SharedVGPRCount = Val; + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, + COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, + ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { PARSE_BITS_ENTRY( KD.compute_pgm_rsrc2, @@ -4922,6 +5211,19 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { (AccumOffset / 4 - 1)); } + if (IVersion.Major == 10) { + // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS + if (SharedVGPRCount && EnableWavefrontSize32) { + return TokError("shared_vgpr_count directive not valid on " + "wavefront size 32"); + } + if (SharedVGPRCount * 2 + VGPRBlocks > 63) { + return TokError("shared_vgpr_count*2 + " + "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " + "exceed 63\n"); + } + } + getTargetStreamer().EmitAmdhsaKernelDescriptor( getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, ReserveFlatScr); @@ -5253,8 +5555,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { return Error(AlignLoc, "alignment is too large"); } - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.amdgpu_lds' directive")) + if (parseEOL()) return true; Symbol->redefineIfPossible(); @@ -5313,26 +5614,21 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) { - for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); - R.isValid(); ++R) { - if (*R == RegNo) - return isGFX9Plus(); - } + if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) + return isGFX9Plus(); - // GFX10 has 2 more SGPRs 104 and 105. - for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); - R.isValid(); ++R) { - if (*R == RegNo) - return hasSGPR104_SGPR105(); - } + // GFX10+ has 2 more SGPRs 104 and 105. + if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) + return hasSGPR104_SGPR105(); switch (RegNo) { case AMDGPU::SRC_SHARED_BASE: case AMDGPU::SRC_SHARED_LIMIT: case AMDGPU::SRC_PRIVATE_BASE: case AMDGPU::SRC_PRIVATE_LIMIT: - case AMDGPU::SRC_POPS_EXITING_WAVE_ID: return isGFX9Plus(); + case AMDGPU::SRC_POPS_EXITING_WAVE_ID: + return isGFX9Plus() && !isGFX11Plus(); case AMDGPU::TBA: case AMDGPU::TBA_LO: case AMDGPU::TBA_HI: @@ -5355,7 +5651,7 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, if (isSI() || isGFX10Plus()) { // No flat_scr on SI. - // On GFX10 flat scratch is not a valid register operand and can only be + // On GFX10Plus flat scratch is not a valid register operand and can only be // accessed with s_setreg/s_getreg. switch (RegNo) { case AMDGPU::FLAT_SCR: @@ -5369,11 +5665,8 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that // SI/CI have. - for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); - R.isValid(); ++R) { - if (*R == RegNo) - return hasSGPR102_SGPR103(); - } + if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) + return hasSGPR102_SGPR103(); return true; } @@ -5381,8 +5674,13 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, OperandMatchResultTy AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, OperandMode Mode) { + OperandMatchResultTy ResTy = parseVOPD(Operands); + if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || + isToken(AsmToken::EndOfStatement)) + return ResTy; + // Try to parse with a custom parser - OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + ResTy = MatchOperandParserImpl(Operands, Mnemonic); // If we successfully parsed the operand or if there as an error parsing, // we are done. @@ -5435,7 +5733,11 @@ StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { setForcedDPP(false); setForcedSDWA(false); - if (Name.endswith("_e64")) { + if (Name.endswith("_e64_dpp")) { + setForcedDPP(true); + setForcedEncodingSize(64); + return Name.substr(0, Name.size() - 8); + } else if (Name.endswith("_e64")) { setForcedEncodingSize(64); return Name.substr(0, Name.size() - 4); } else if (Name.endswith("_e32")) { @@ -5451,11 +5753,20 @@ StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { return Name; } +static void applyMnemonicAliases(StringRef &Mnemonic, + const FeatureBitset &Features, + unsigned VariantID); + bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { // Add the instruction mnemonic Name = parseMnemonicSuffix(Name); + + // If the target architecture uses MnemonicAlias, call it here to parse + // operands correctly. + applyMnemonicAliases(Name, getAvailableFeatures(), 0); + Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); bool IsMIMG = Name.startswith("image_"); @@ -5603,7 +5914,24 @@ AMDGPUAsmParser::parseCPol(OperandVector &Operands) { unsigned CPolOff = 0; SMLoc S = getLoc(); - if (trySkipId("glc")) + StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); + if (isGFX940() && !Mnemo.startswith("s_")) { + if (trySkipId("sc0")) + CPolOn = AMDGPU::CPol::SC0; + else if (trySkipId("nosc0")) + CPolOff = AMDGPU::CPol::SC0; + else if (trySkipId("nt")) + CPolOn = AMDGPU::CPol::NT; + else if (trySkipId("nont")) + CPolOff = AMDGPU::CPol::NT; + else if (trySkipId("sc1")) + CPolOn = AMDGPU::CPol::SC1; + else if (trySkipId("nosc1")) + CPolOff = AMDGPU::CPol::SC1; + else + return MatchOperand_NoMatch; + } + else if (trySkipId("glc")) CPolOn = AMDGPU::CPol::GLC; else if (trySkipId("noglc")) CPolOff = AMDGPU::CPol::GLC; @@ -5809,7 +6137,7 @@ AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; if (isGFX10Plus()) { - auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); + auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); if (Ufmt == UFMT_UNDEF) { Error(FormatLoc, "unsupported format"); return MatchOperand_ParseFail; @@ -5828,7 +6156,7 @@ AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, int64_t &Format) { using namespace llvm::AMDGPU::MTBUFFormat; - auto Id = getUnifiedFormat(FormatStr); + auto Id = getUnifiedFormat(FormatStr, getSTI()); if (Id == UFMT_UNDEF) return MatchOperand_NoMatch; @@ -5969,6 +6297,7 @@ void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, bool IsGdsHardcoded) { OptionalImmIndexMap OptionalIdx; + AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset; for (unsigned i = 1, e = Operands.size(); i != e; ++i) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); @@ -5986,13 +6315,10 @@ void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, // Handle optional arguments OptionalIdx[Op.getImmTy()] = i; - } - AMDGPUOperand::ImmTy OffsetType = - (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || - Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || - Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : - AMDGPUOperand::ImmTyOffset; + if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle) + OffsetType = AMDGPUOperand::ImmTySwizzle; + } addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); @@ -6034,7 +6360,7 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { continue; } - if (Op.isToken() && Op.getToken() == "done") + if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) continue; // Handle optional arguments @@ -6157,11 +6483,179 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { return MatchOperand_Success; } +bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { + SMLoc FieldLoc = getLoc(); + StringRef FieldName = getTokenStr(); + if (!skipToken(AsmToken::Identifier, "expected a field name") || + !skipToken(AsmToken::LParen, "expected a left parenthesis")) + return false; + + SMLoc ValueLoc = getLoc(); + StringRef ValueName = getTokenStr(); + if (!skipToken(AsmToken::Identifier, "expected a value name") || + !skipToken(AsmToken::RParen, "expected a right parenthesis")) + return false; + + unsigned Shift; + if (FieldName == "instid0") { + Shift = 0; + } else if (FieldName == "instskip") { + Shift = 4; + } else if (FieldName == "instid1") { + Shift = 7; + } else { + Error(FieldLoc, "invalid field name " + FieldName); + return false; + } + + int Value; + if (Shift == 4) { + // Parse values for instskip. + Value = StringSwitch(ValueName) + .Case("SAME", 0) + .Case("NEXT", 1) + .Case("SKIP_1", 2) + .Case("SKIP_2", 3) + .Case("SKIP_3", 4) + .Case("SKIP_4", 5) + .Default(-1); + } else { + // Parse values for instid0 and instid1. + Value = StringSwitch(ValueName) + .Case("NO_DEP", 0) + .Case("VALU_DEP_1", 1) + .Case("VALU_DEP_2", 2) + .Case("VALU_DEP_3", 3) + .Case("VALU_DEP_4", 4) + .Case("TRANS32_DEP_1", 5) + .Case("TRANS32_DEP_2", 6) + .Case("TRANS32_DEP_3", 7) + .Case("FMA_ACCUM_CYCLE_1", 8) + .Case("SALU_CYCLE_1", 9) + .Case("SALU_CYCLE_2", 10) + .Case("SALU_CYCLE_3", 11) + .Default(-1); + } + if (Value < 0) { + Error(ValueLoc, "invalid value name " + ValueName); + return false; + } + + Delay |= Value << Shift; + return true; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { + int64_t Delay = 0; + SMLoc S = getLoc(); + + if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { + do { + if (!parseDelay(Delay)) + return MatchOperand_ParseFail; + } while (trySkipToken(AsmToken::Pipe)); + } else { + if (!parseExpr(Delay)) + return MatchOperand_ParseFail; + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); + return MatchOperand_Success; +} + bool AMDGPUOperand::isSWaitCnt() const { return isImm(); } +bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } + +//===----------------------------------------------------------------------===// +// DepCtr +//===----------------------------------------------------------------------===// + +void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, + StringRef DepCtrName) { + switch (ErrorId) { + case OPR_ID_UNKNOWN: + Error(Loc, Twine("invalid counter name ", DepCtrName)); + return; + case OPR_ID_UNSUPPORTED: + Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); + return; + case OPR_ID_DUPLICATE: + Error(Loc, Twine("duplicate counter name ", DepCtrName)); + return; + case OPR_VAL_INVALID: + Error(Loc, Twine("invalid value for ", DepCtrName)); + return; + default: + assert(false); + } +} + +bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { + + using namespace llvm::AMDGPU::DepCtr; + + SMLoc DepCtrLoc = getLoc(); + StringRef DepCtrName = getTokenStr(); + + if (!skipToken(AsmToken::Identifier, "expected a counter name") || + !skipToken(AsmToken::LParen, "expected a left parenthesis")) + return false; + + int64_t ExprVal; + if (!parseExpr(ExprVal)) + return false; + + unsigned PrevOprMask = UsedOprMask; + int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); + + if (CntVal < 0) { + depCtrError(DepCtrLoc, CntVal, DepCtrName); + return false; + } + + if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) + return false; + + if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { + if (isToken(AsmToken::EndOfStatement)) { + Error(getLoc(), "expected a counter name"); + return false; + } + } + + unsigned CntValMask = PrevOprMask ^ UsedOprMask; + DepCtr = (DepCtr & ~CntValMask) | CntVal; + return true; +} + +OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { + using namespace llvm::AMDGPU::DepCtr; + + int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); + SMLoc Loc = getLoc(); + + if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { + unsigned UsedOprMask = 0; + while (!isToken(AsmToken::EndOfStatement)) { + if (!parseDepCtr(DepCtr, UsedOprMask)) + return MatchOperand_ParseFail; + } + } else { + if (!parseExpr(DepCtr)) + return MatchOperand_ParseFail; + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); + return MatchOperand_Success; +} + +bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } + //===----------------------------------------------------------------------===// // hwreg //===----------------------------------------------------------------------===// @@ -6175,7 +6669,7 @@ AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, // The register may be specified by name or using a numeric code HwReg.Loc = getLoc(); if (isToken(AsmToken::Identifier) && - (HwReg.Id = getHwregId(getTokenStr())) >= 0) { + (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { HwReg.IsSymbolic = true; lex(); // skip register name } else if (!parseExpr(HwReg.Id, "a register name")) { @@ -6208,15 +6702,18 @@ AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, using namespace llvm::AMDGPU::Hwreg; - if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { - Error(HwReg.Loc, - "specified hardware register is not supported on this GPU"); - return false; - } - if (!isValidHwreg(HwReg.Id)) { - Error(HwReg.Loc, - "invalid code of hardware register: only 6-bit values are legal"); - return false; + if (HwReg.IsSymbolic) { + if (HwReg.Id == OPR_ID_UNSUPPORTED) { + Error(HwReg.Loc, + "specified hardware register is not supported on this GPU"); + return false; + } + } else { + if (!isValidHwreg(HwReg.Id)) { + Error(HwReg.Loc, + "invalid code of hardware register: only 6-bit values are legal"); + return false; + } } if (!isValidHwregOffset(Offset.Id)) { Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); @@ -6238,7 +6735,7 @@ AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { SMLoc Loc = getLoc(); if (trySkipId("hwreg", AsmToken::LParen)) { - OperandInfoTy HwReg(ID_UNKNOWN_); + OperandInfoTy HwReg(OPR_ID_UNKNOWN); OperandInfoTy Offset(OFFSET_DEFAULT_); OperandInfoTy Width(WIDTH_DEFAULT_); if (parseHwregBody(HwReg, Offset, Width) && @@ -6275,7 +6772,8 @@ AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, using namespace llvm::AMDGPU::SendMsg; Msg.Loc = getLoc(); - if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { + if (isToken(AsmToken::Identifier) && + (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { Msg.IsSymbolic = true; lex(); // skip message name } else if (!parseExpr(Msg.Id, "a message name")) { @@ -6310,15 +6808,22 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, using namespace llvm::AMDGPU::SendMsg; // Validation strictness depends on whether message is specified - // in a symbolc or in a numeric form. In the latter case + // in a symbolic or in a numeric form. In the latter case // only encoding possibility is checked. bool Strict = Msg.IsSymbolic; - if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { - Error(Msg.Loc, "invalid message id"); - return false; + if (Strict) { + if (Msg.Id == OPR_ID_UNSUPPORTED) { + Error(Msg.Loc, "specified message id is not supported on this GPU"); + return false; + } + } else { + if (!isValidMsgId(Msg.Id, getSTI())) { + Error(Msg.Loc, "invalid message id"); + return false; + } } - if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { + if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { if (Op.IsDefined) { Error(Op.Loc, "message does not support operations"); } else { @@ -6330,7 +6835,8 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, Error(Op.Loc, "invalid operation id"); return false; } - if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { + if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && + Stream.IsDefined) { Error(Stream.Loc, "message operation does not support streams"); return false; } @@ -6349,7 +6855,7 @@ AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { SMLoc Loc = getLoc(); if (trySkipId("sendmsg", AsmToken::LParen)) { - OperandInfoTy Msg(ID_UNKNOWN_); + OperandInfoTy Msg(OPR_ID_UNKNOWN); OperandInfoTy Op(OP_NONE_); OperandInfoTy Stream(STREAM_ID_NONE_); if (parseSendMsgBody(Msg, Op, Stream) && @@ -6610,9 +7116,10 @@ AMDGPUAsmParser::getToken() const { return Parser.getTok(); } -AsmToken -AMDGPUAsmParser::peekToken() { - return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); +AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { + return isToken(AsmToken::EndOfStatement) + ? getToken() + : getLexer().peekTok(ShouldSkipSpace); } void @@ -7078,8 +7585,6 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsLds) { - bool IsLdsOpcode = IsLds; - bool HasLdsModifier = false; OptionalImmIndexMap OptionalIdx; unsigned FirstOperandIdx = 1; bool IsAtomicReturn = false; @@ -7123,8 +7628,6 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, continue; } - HasLdsModifier |= Op.isLDS(); - // Handle tokens like 'offen' which are sometimes hard-coded into the // asm string. There are no MCInst operands for these. if (Op.isToken()) { @@ -7136,25 +7639,10 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, OptionalIdx[Op.getImmTy()] = i; } - // This is a workaround for an llvm quirk which may result in an - // incorrect instruction selection. Lds and non-lds versions of - // MUBUF instructions are identical except that lds versions - // have mandatory 'lds' modifier. However this modifier follows - // optional modifiers and llvm asm matcher regards this 'lds' - // modifier as an optional one. As a result, an lds version - // of opcode may be selected even if it has no 'lds' modifier. - if (IsLdsOpcode && !HasLdsModifier) { - int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); - if (NoLdsOpcode != -1) { // Got lds version - correct it. - Inst.setOpcode(NoLdsOpcode); - IsLdsOpcode = false; - } - } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); - if (!IsLdsOpcode) { // tfe is not legal with lds opcodes + if (!IsLds) { // tfe is not legal with lds opcodes addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); @@ -7327,7 +7815,8 @@ bool AMDGPUOperand::isSMRDOffset8() const { } bool AMDGPUOperand::isSMEMOffset() const { - return isImm(); // Offset range is checked later by validator. + return isImmTy(ImmTyNone) || + isImmTy(ImmTyOffset); // Offset range is checked later by validator. } bool AMDGPUOperand::isSMRDLiteralOffset() const { @@ -7415,10 +7904,6 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, - {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, - {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, - {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, - {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, @@ -7429,9 +7914,17 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, + {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr}, + {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr}, + {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, + {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, + {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, + {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, - {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} + {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, + {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, + {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} }; void AMDGPUAsmParser::onBeginOfFile() { @@ -7497,8 +7990,17 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) res = parseDim(Operands); } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { res = parseCPol(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) { + res = parseDPP8(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { + res = parseDPPCtrl(Operands); } else { res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); + if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { + res = parseOperandArrayWithPrefix("neg", Operands, + AMDGPUOperand::ImmTyBLGP, + nullptr); + } } if (res != MatchOperand_NoMatch) { return res; @@ -7596,6 +8098,66 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) } } +void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) +{ + OptionalImmIndexMap OptionalIdx; + unsigned Opc = Inst.getOpcode(); + + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegOrImmWithFPInputModsOperands(Inst, 2); + } else if (Op.isImmModifier()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + llvm_unreachable("unhandled operand type"); + } + } + + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + if (OpSelIdx != -1) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); + + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); + + if (OpSelIdx == -1) + return; + + const int Ops[] = { AMDGPU::OpName::src0, + AMDGPU::OpName::src1, + AMDGPU::OpName::src2 }; + const int ModOps[] = { AMDGPU::OpName::src0_modifiers, + AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers }; + + unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + + for (int J = 0; J < 3; ++J) { + int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); + if (OpIdx == -1) + break; + + int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); + uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); + + if ((OpSel & (1 << J)) != 0) + ModVal |= SISrcMods::OP_SEL_0; + if (ModOps[J] == AMDGPU::OpName::src0_modifiers && + (OpSel & (1 << 3)) != 0) + ModVal |= SISrcMods::DST_OP_SEL; + + Inst.getOperand(ModIdx).setImm(ModVal); + } +} + void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, OptionalImmIndexMap &OptionalIdx) { unsigned Opc = Inst.getOpcode(); @@ -7652,9 +8214,12 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, Opc == AMDGPU::V_MAC_F16_e64_vi || Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || + Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || Opc == AMDGPU::V_FMAC_F32_e64_vi || Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || - Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { + Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || + Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || + Opc == AMDGPU::V_FMAC_F16_e64_gfx11) { auto it = Inst.begin(); std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 @@ -7731,6 +8296,11 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, if (OpIdx == -1) break; + int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); + + if (ModIdx == -1) + continue; + uint32_t ModVal = 0; if ((OpSel & (1 << J)) != 0) @@ -7745,8 +8315,6 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, if ((NegHi & (1 << J)) != 0) ModVal |= SISrcMods::NEG_HI; - int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); - Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); } } @@ -7757,6 +8325,118 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { cvtVOP3P(Inst, Operands, OptIdx); } +//===----------------------------------------------------------------------===// +// VOPD +//===----------------------------------------------------------------------===// + +OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { + if (!hasVOPD(getSTI())) + return MatchOperand_NoMatch; + + if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { + SMLoc S = getLoc(); + lex(); + lex(); + Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); + const MCExpr *Expr; + if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) { + Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); + return MatchOperand_Success; + } + Error(S, "invalid VOPD :: usage"); + return MatchOperand_ParseFail; + } + return MatchOperand_NoMatch; +} + +// Create VOPD MCInst operands using parsed assembler operands. +// Parsed VOPD operands are ordered as follows: +// OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' +// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] +// If both OpX and OpY have an imm, the first imm has a different name: +// OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::' +// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] +// MCInst operands have the following order: +// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] +void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { + auto addOp = [&](uint16_t i) { // NOLINT:function pointer + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + return; + } + if (Op.isImm()) { + Op.addImmOperands(Inst, 1); + return; + } + // Handle tokens like 'offen' which are sometimes hard-coded into the + // asm string. There are no MCInst operands for these. + if (Op.isToken()) { + return; + } + llvm_unreachable("Unhandled operand type in cvtVOPD"); + }; + + // Indices into MCInst.Operands + const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ... + const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ... + const auto MinOpYImmMCIndex = 4; // dstX, dstY, src0X, src0Y, imm, ... + + unsigned Opc = Inst.getOpcode(); + bool HasVsrc1X = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1; + bool HasImmX = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || + (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == + FmamkOpXImmMCIndex || + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == + FmaakOpXImmMCIndex)); + + bool HasVsrc1Y = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1; + bool HasImmY = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >= + MinOpYImmMCIndex + HasVsrc1X; + + // Indices of parsed operands relative to dst + const auto DstIdx = 0; + const auto Src0Idx = 1; + const auto Vsrc1OrImmIdx = 2; + + const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X; + const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo) + + // Offsets into parsed operands + const auto OpXFirstOperandOffset = 1; + const auto OpYFirstOperandOffset = + OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize; + + // Order of addOp calls determines MC operand order + addOp(OpXFirstOperandOffset + DstIdx); // vdstX + addOp(OpYFirstOperandOffset + DstIdx); // vdstY + + addOp(OpXFirstOperandOffset + Src0Idx); // src0X + if (HasImmX) { + // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak + addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); + addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1); + } else { + if (HasVsrc1X) // all except v_mov + addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X + } + + addOp(OpYFirstOperandOffset + Src0Idx); // src0Y + if (HasImmY) { + // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak + addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); + addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1); + } else { + if (HasVsrc1Y) // all except v_mov + addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y + } +} + //===----------------------------------------------------------------------===// // dpp //===----------------------------------------------------------------------===// @@ -8067,6 +8747,88 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); } +// Add dummy $old operand +void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst, + const OperandVector &Operands, + bool IsDPP8) { + Inst.addOperand(MCOperand::createReg(0)); + cvtVOP3DPP(Inst, Operands, IsDPP8); +} + +void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { + OptionalImmIndexMap OptionalIdx; + unsigned Opc = Inst.getOpcode(); + bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + int Fi = 0; + for (unsigned E = Operands.size(); I != E; ++I) { + auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), + MCOI::TIED_TO); + if (TiedTo != -1) { + assert((unsigned)TiedTo < Inst.getNumOperands()); + // handle tied old or src2 for MAC instructions + Inst.addOperand(Inst.getOperand(TiedTo)); + } + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + // Add the register arguments + if (IsDPP8 && Op.isFI()) { + Fi = Op.getImm(); + } else if (HasModifiers && + isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegOrImmWithFPInputModsOperands(Inst, 2); + } else if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + } else if (Op.isImm() && + Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) { + assert(!HasModifiers && "Case should be unreachable with modifiers"); + assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); + Op.addImmOperands(Inst, 1); + } else if (Op.isImm()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + llvm_unreachable("unhandled operand type"); + } + } + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + } + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + } + if (Desc.TSFlags & SIInstrFlags::VOP3P) + cvtVOP3P(Inst, Operands, OptionalIdx); + else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); + } + + if (IsDPP8) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); + using namespace llvm::AMDGPU::DPP; + Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); + } else { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); + } + } +} + +// Add dummy $old operand +void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst, + const OperandVector &Operands, + bool IsDPP8) { + Inst.addOperand(MCOperand::createReg(0)); + cvtDPP(Inst, Operands, IsDPP8); +} + void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { OptionalImmIndexMap OptionalIdx; @@ -8352,7 +9114,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { #define GET_MNEMONIC_CHECKER #include "AMDGPUGenAsmMatcher.inc" -// This fuction should be defined after auto-generated include so that we have +// This function should be defined after auto-generated include so that we have // MatchClassKind enum defined unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) { @@ -8431,3 +9193,27 @@ OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { } bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } + +//===----------------------------------------------------------------------===// +// LDSDIR +//===----------------------------------------------------------------------===// + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); +} + +bool AMDGPUOperand::isWaitVDST() const { + return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); +} + +//===----------------------------------------------------------------------===// +// VINTERP +//===----------------------------------------------------------------------===// + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); +} + +bool AMDGPUOperand::isWaitEXP() const { + return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); +} diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index a535c8cc0918..a087323e5de7 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -35,11 +35,6 @@ class MUBUFAddr64Table { string OpName = Name; } -class MUBUFLdsTable { - bit IsLds = is_lds; - string OpName = Name; -} - class MTBUFAddr64Table { bit IsAddr64 = is_addr64; string OpName = Name; @@ -100,8 +95,8 @@ class MTBUF_Pseudo sccb_value = 0; } -class MTBUF_Real : - InstSI { +class MTBUF_Real : + InstSI { let isPseudo = 0; let isCodeGenOnly = 0; @@ -136,7 +131,7 @@ class MTBUF_Real : bits<3> nfmt = format{6-4}; // GFX90A+ only: instruction uses AccVGPR for data - // Bit superceedes tfe. + // Bit supersedes tfe. bits<1> acc = !if(ps.has_vdata, vdata{9}, 0); } @@ -320,7 +315,7 @@ class MUBUF_Pseudo idxen = 0; bits<1> addr64 = 0; bits<1> lds = 0; - bits<1> has_vdata = 1; + bits<1> has_vdata = !not(lds); bits<1> has_vaddr = 1; bits<1> has_glc = 1; bits<1> has_dlc = 1; @@ -337,8 +332,8 @@ class MUBUF_Pseudo IsBufferInv = 0; } -class MUBUF_Real : - InstSI { +class MUBUF_Real : + InstSI { let isPseudo = 0; let isCodeGenOnly = 0; @@ -360,6 +355,8 @@ class MUBUF_Real : let mayStore = ps.mayStore; let IsAtomicRet = ps.IsAtomicRet; let IsAtomicNoRet = ps.IsAtomicNoRet; + let VALU = ps.VALU; + let LGKM_CNT = ps.LGKM_CNT; bits<12> offset; bits<5> cpol; @@ -370,8 +367,8 @@ class MUBUF_Real : bits<8> soffset; // GFX90A+ only: instruction uses AccVGPR for data - // Bit superceedes tfe. - bits<1> acc = !if(ps.has_vdata, vdata{9}, 0); + // Bit supersedes tfe. + bits<1> acc = !if(ps.has_vdata, vdata{9}, !if(ps.lds, ?, 0)); } @@ -486,16 +483,17 @@ class MUBUF_Load_Pseudo pattern=[], // Workaround bug bz30254 int addrKindCopy = addrKind, RegisterClass vdata_rc = getVregSrcForVT.ret, RegisterOperand vdata_op = getLdStRegisterOperand.ret> : MUBUF_Pseudo.ret, !if(HasTiedDest, (ins vdata_op:$vdata_in), (ins))), - " $vdata, " # getMUBUFAsmOps.ret # "$cpol" # + !if(!or(isLds, isLdsOpc), " ", " $vdata, ") # getMUBUFAsmOps.ret # "$cpol" # !if(isLds, " lds", "$tfe") # "$swz", pattern>, MUBUF_SetupAddr { @@ -504,13 +502,16 @@ class MUBUF_Load_Pseudo .ret; + let VALU = isLds; } class MUBUF_Offset_Load_Pat : Pat < @@ -563,6 +564,20 @@ multiclass MUBUF_Pseudo_Loads_Lds { defm _LDS : MUBUF_Pseudo_Loads; } +multiclass MUBUF_Pseudo_Loads_LDSOpc { + + defvar legal_load_vt = !if(!eq(!cast(load_vt), !cast(v3f16)), v4f16, load_vt); + + def _OFFSET : MUBUF_Load_Pseudo ; + def _OFFEN : MUBUF_Load_Pseudo ; + def _IDXEN : MUBUF_Load_Pseudo ; + def _BOTHEN : MUBUF_Load_Pseudo ; +} + class MUBUF_Store_Pseudo (outs), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol:$cpol, SWZ:$swz), " $srsrc, $soffset$offset lds$cpol$swz"> { - let mayLoad = 0; + let LGKM_CNT = 1; + let mayLoad = 1; let mayStore = 1; let maybeAtomic = 1; @@ -623,6 +639,7 @@ class MUBUF_Pseudo_Store_Lds let has_vaddr = 0; let has_tfe = 0; let lds = 1; + let VALU = 1; let Uses = [EXEC, M0]; let AsmMatchConverter = "cvtMubufLds"; @@ -785,7 +802,7 @@ multiclass MUBUF_Pseudo_Atomics_RTN : + SDPatternOperator atomic = null_frag> : MUBUF_Pseudo_Atomics_NO_RTN, MUBUF_Pseudo_Atomics_RTN; @@ -898,6 +915,29 @@ defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads < "buffer_load_dwordx4", v4i32 >; +defm BUFFER_LOAD_LDS_B32 : MUBUF_Pseudo_Loads_LDSOpc < + "buffer_load_lds_b32", i32 +>; +defm BUFFER_LOAD_LDS_FORMAT_X : MUBUF_Pseudo_Loads_LDSOpc < + "buffer_load_lds_format_x", f32 +>; +defm BUFFER_LOAD_LDS_I8 : MUBUF_Pseudo_Loads_LDSOpc < + "buffer_load_lds_i8", i32 +>; +defm BUFFER_LOAD_LDS_I16 : MUBUF_Pseudo_Loads_LDSOpc < + "buffer_load_lds_i16", i32 +>; +defm BUFFER_LOAD_LDS_U8 : MUBUF_Pseudo_Loads_LDSOpc < + "buffer_load_lds_u8", i32 +>; +defm BUFFER_LOAD_LDS_U16 : MUBUF_Pseudo_Loads_LDSOpc < + "buffer_load_lds_u16", i32 +>; + +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, atomic_load_8_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, atomic_load_16_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i16, atomic_load_8_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i16, atomic_load_16_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>; @@ -909,21 +949,6 @@ defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", v2i32, load_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", v3i32, load_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>; -// This is not described in AMD documentation, -// but 'lds' versions of these opcodes are available -// in at least GFX8+ chips. See Bug 37653. -let SubtargetPredicate = isGFX8GFX9 in { -defm BUFFER_LOAD_DWORDX2_LDS : MUBUF_Pseudo_Loads < - "buffer_load_dwordx2", v2i32, 0, 1 ->; -defm BUFFER_LOAD_DWORDX3_LDS : MUBUF_Pseudo_Loads < - "buffer_load_dwordx3", v3i32, 0, 1 ->; -defm BUFFER_LOAD_DWORDX4_LDS : MUBUF_Pseudo_Loads < - "buffer_load_dwordx4", v4i32, 0, 1 ->; -} - defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores < "buffer_store_byte", i32, truncstorei8_global >; @@ -943,82 +968,82 @@ defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores < "buffer_store_dwordx4", v4i32, store_global >; defm BUFFER_ATOMIC_SWAP : MUBUF_Pseudo_Atomics < - "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global_32 + "buffer_atomic_swap", VGPR_32, i32 >; defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Pseudo_Atomics < - "buffer_atomic_cmpswap", VReg_64, v2i32, null_frag + "buffer_atomic_cmpswap", VReg_64, v2i32 >; defm BUFFER_ATOMIC_ADD : MUBUF_Pseudo_Atomics < - "buffer_atomic_add", VGPR_32, i32, atomic_load_add_global_32 + "buffer_atomic_add", VGPR_32, i32 >; defm BUFFER_ATOMIC_SUB : MUBUF_Pseudo_Atomics < - "buffer_atomic_sub", VGPR_32, i32, atomic_load_sub_global_32 + "buffer_atomic_sub", VGPR_32, i32 >; defm BUFFER_ATOMIC_SMIN : MUBUF_Pseudo_Atomics < - "buffer_atomic_smin", VGPR_32, i32, atomic_load_min_global_32 + "buffer_atomic_smin", VGPR_32, i32 >; defm BUFFER_ATOMIC_UMIN : MUBUF_Pseudo_Atomics < - "buffer_atomic_umin", VGPR_32, i32, atomic_load_umin_global_32 + "buffer_atomic_umin", VGPR_32, i32 >; defm BUFFER_ATOMIC_SMAX : MUBUF_Pseudo_Atomics < - "buffer_atomic_smax", VGPR_32, i32, atomic_load_max_global_32 + "buffer_atomic_smax", VGPR_32, i32 >; defm BUFFER_ATOMIC_UMAX : MUBUF_Pseudo_Atomics < - "buffer_atomic_umax", VGPR_32, i32, atomic_load_umax_global_32 + "buffer_atomic_umax", VGPR_32, i32 >; defm BUFFER_ATOMIC_AND : MUBUF_Pseudo_Atomics < - "buffer_atomic_and", VGPR_32, i32, atomic_load_and_global_32 + "buffer_atomic_and", VGPR_32, i32 >; defm BUFFER_ATOMIC_OR : MUBUF_Pseudo_Atomics < - "buffer_atomic_or", VGPR_32, i32, atomic_load_or_global_32 + "buffer_atomic_or", VGPR_32, i32 >; defm BUFFER_ATOMIC_XOR : MUBUF_Pseudo_Atomics < - "buffer_atomic_xor", VGPR_32, i32, atomic_load_xor_global_32 + "buffer_atomic_xor", VGPR_32, i32 >; defm BUFFER_ATOMIC_INC : MUBUF_Pseudo_Atomics < - "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global_32 + "buffer_atomic_inc", VGPR_32, i32 >; defm BUFFER_ATOMIC_DEC : MUBUF_Pseudo_Atomics < - "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global_32 + "buffer_atomic_dec", VGPR_32, i32 >; defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global_64 + "buffer_atomic_swap_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag + "buffer_atomic_cmpswap_x2", VReg_128, v2i64 >; defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_add_x2", VReg_64, i64, atomic_load_add_global_64 + "buffer_atomic_add_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_sub_x2", VReg_64, i64, atomic_load_sub_global_64 + "buffer_atomic_sub_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_smin_x2", VReg_64, i64, atomic_load_min_global_64 + "buffer_atomic_smin_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_umin_x2", VReg_64, i64, atomic_load_umin_global_64 + "buffer_atomic_umin_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_smax_x2", VReg_64, i64, atomic_load_max_global_64 + "buffer_atomic_smax_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_umax_x2", VReg_64, i64, atomic_load_umax_global_64 + "buffer_atomic_umax_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_AND_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_and_x2", VReg_64, i64, atomic_load_and_global_64 + "buffer_atomic_and_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_OR_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_or_x2", VReg_64, i64, atomic_load_or_global_64 + "buffer_atomic_or_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_xor_x2", VReg_64, i64, atomic_load_xor_global_64 + "buffer_atomic_xor_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_INC_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global_64 + "buffer_atomic_inc_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global_64 + "buffer_atomic_dec_x2", VReg_64, i64 >; let SubtargetPredicate = HasGFX10_BEncoding in @@ -1040,7 +1065,7 @@ def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc", int_amdgcn_buffer_wbinvl1_sc>; } -let SubtargetPredicate = isGFX6GFX7GFX10 in { +let SubtargetPredicate = isGFX6GFX7GFX10Plus in { defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics < "buffer_atomic_fcmpswap", VReg_64, v2f32, null_frag @@ -1051,6 +1076,11 @@ defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics < defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics < "buffer_atomic_fmax", VGPR_32, f32, null_frag >; + +} + +let SubtargetPredicate = isGFX6GFX7GFX10 in { + defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics < "buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag >; @@ -1109,23 +1139,25 @@ defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores < def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1", int_amdgcn_buffer_wbinvl1>; -let SubtargetPredicate = HasAtomicFaddInsts in { -defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN < +let SubtargetPredicate = HasAtomicFaddNoRtnInsts in +defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN< "buffer_atomic_add_f32", VGPR_32, f32 >; + +let SubtargetPredicate = HasAtomicPkFaddNoRtnInsts in defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN < "buffer_atomic_pk_add_f16", VGPR_32, v2f16 >; -let OtherPredicates = [isGFX90APlus] in { -defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN < +let OtherPredicates = [HasAtomicFaddRtnInsts] in +defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN< "buffer_atomic_add_f32", VGPR_32, f32, atomic_load_fadd_global_32 >; + +let OtherPredicates = [isGFX90APlus] in defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_RTN < "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_load_fadd_v2f16_global_32 >; -} -} // End SubtargetPredicate = HasAtomicFaddInsts //===----------------------------------------------------------------------===// // MTBUF Instructions @@ -1175,15 +1207,28 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol", let SubtargetPredicate = isGFX90APlus in { def BUFFER_WBL2 : MUBUF_Invalidate<"buffer_wbl2"> { + let has_glc = 1; + let has_sccb = 1; + let InOperandList = (ins CPol_0:$cpol); + let AsmOperands = "$cpol"; } def BUFFER_INVL2 : MUBUF_Invalidate<"buffer_invl2"> { + let SubtargetPredicate = isGFX90AOnly; } - defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd>; - defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64, int_amdgcn_global_atomic_fmin>; - defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax>; + defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; + defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; + defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; } // End SubtargetPredicate = isGFX90APlus +def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { + let SubtargetPredicate = isGFX940Plus; + let has_glc = 1; + let has_sccb = 1; + let InOperandList = (ins CPol_0:$cpol); + let AsmOperands = "$cpol"; +} + let SubtargetPredicate = isGFX10Plus in { def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">; def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">; @@ -1364,75 +1409,169 @@ defm : MUBUF_StoreIntrinsicPat; // buffer_atomic patterns //===----------------------------------------------------------------------===// -multiclass BufferAtomicPatterns { +multiclass BufferAtomicPat { + foreach RtnMode = ["ret", "noret"] in { + + defvar Op = !cast(OpPrefix # "_" # RtnMode + # !if(isIntr, "", "_" # vt.Size)); + defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", ""); + + def : GCNPat< + (vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset), vt:$vdata_in)), + (!cast(Inst # "_OFFSET" # InstSuffix) getVregSrcForVT.ret:$vdata_in, + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset) + >; + + def : GCNPat< + (vt (Op (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), + vt:$vdata_in)), + (!cast(Inst # "_ADDR64" # InstSuffix) getVregSrcForVT.ret:$vdata_in, + VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset) + >; + + } // end foreach RtnMode +} + +multiclass BufferAtomicIntrPat { + defm : BufferAtomicPat; +} + +multiclass BufferAtomicCmpSwapPat { + foreach RtnMode = ["ret", "noret"] in { + + defvar Op = !cast("AMDGPUatomic_cmp_swap_global_" # RtnMode + # "_" # vt.Size); + defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", ""); + + defvar OffsetResDag = (!cast(Inst # "_OFFSET" # InstSuffix) + getVregSrcForVT.ret:$vdata_in, SReg_128:$srsrc, SCSrc_b32:$soffset, + offset:$offset); + def : GCNPat< + (vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset), data_vt:$vdata_in)), + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (vt (COPY_TO_REGCLASS OffsetResDag, getVregSrcForVT.ret)), + !if(!eq(vt, i32), sub0, sub0_sub1)), + OffsetResDag) + >; + + defvar Addr64ResDag = (!cast(Inst # "_ADDR64" # InstSuffix) + getVregSrcForVT.ret:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, + SCSrc_b32:$soffset, offset:$offset); + def : GCNPat< + (vt (Op (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), + data_vt:$vdata_in)), + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (vt (COPY_TO_REGCLASS Addr64ResDag, getVregSrcForVT.ret)), + !if(!eq(vt, i32), sub0, sub0_sub1)), + Addr64ResDag) + >; + + } // end foreach RtnMode +} + +foreach Ty = [i32, i64] in { + +defvar Suffix = !if(!eq(Ty, i64), "_X2", ""); + +defm : BufferAtomicPat<"atomic_swap_global", Ty, "BUFFER_ATOMIC_SWAP" # Suffix>; +defm : BufferAtomicPat<"atomic_load_add_global", Ty, "BUFFER_ATOMIC_ADD" # Suffix>; +defm : BufferAtomicPat<"atomic_load_sub_global", Ty, "BUFFER_ATOMIC_SUB" # Suffix>; +defm : BufferAtomicPat<"atomic_load_min_global", Ty, "BUFFER_ATOMIC_SMIN" # Suffix>; +defm : BufferAtomicPat<"atomic_load_umin_global", Ty, "BUFFER_ATOMIC_UMIN" # Suffix>; +defm : BufferAtomicPat<"atomic_load_max_global", Ty, "BUFFER_ATOMIC_SMAX" # Suffix>; +defm : BufferAtomicPat<"atomic_load_umax_global", Ty, "BUFFER_ATOMIC_UMAX" # Suffix>; +defm : BufferAtomicPat<"atomic_load_and_global", Ty, "BUFFER_ATOMIC_AND" # Suffix>; +defm : BufferAtomicPat<"atomic_load_or_global", Ty, "BUFFER_ATOMIC_OR" # Suffix>; +defm : BufferAtomicPat<"atomic_load_xor_global", Ty, "BUFFER_ATOMIC_XOR" # Suffix>; +defm : BufferAtomicPat<"atomic_inc_global", Ty, "BUFFER_ATOMIC_INC" # Suffix>; +defm : BufferAtomicPat<"atomic_dec_global", Ty, "BUFFER_ATOMIC_DEC" # Suffix>; + +} // end foreach Ty + +defm : BufferAtomicCmpSwapPat; +defm : BufferAtomicCmpSwapPat; + +multiclass SIBufferAtomicPat RtnModes = ["ret", "noret"]> { + foreach RtnMode = RtnModes in { + + defvar Op = !cast(!if(!eq(RtnMode, "none"), + OpPrefix, OpPrefix # "_" # RtnMode)); + defvar InstSuffix = !if(!or(!eq(RtnMode, "none"), !eq(RtnMode, "ret")), + "_RTN", ""); + defvar CachePolicy = !if(!or(!eq(RtnMode, "none"), !eq(RtnMode, "ret")), + (set_glc $cachepolicy), (timm:$cachepolicy)); + def : GCNPat< - (vt (name vt:$vdata_in, v4i32:$rsrc, 0, 0, i32:$soffset, + (vt (Op vt:$vdata_in, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$cachepolicy, 0)), - (!cast(opcode # _OFFSET_RTN) + (!cast(Inst # "_OFFSET" # InstSuffix) getVregSrcForVT.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (set_glc $cachepolicy)) + (as_i16timm $offset), CachePolicy) >; def : GCNPat< - (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, + (vt (Op vt:$vdata_in, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$cachepolicy, timm)), - (!cast(opcode # _IDXEN_RTN) getVregSrcForVT.ret:$vdata_in, - VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (set_glc $cachepolicy)) + (!cast(Inst # "_IDXEN" # InstSuffix) + getVregSrcForVT.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc, + SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy) >; def : GCNPat< - (vt (name vt:$vdata_in, v4i32:$rsrc, 0, i32:$voffset, + (vt (Op vt:$vdata_in, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$cachepolicy, 0)), - (!cast(opcode # _OFFEN_RTN) getVregSrcForVT.ret:$vdata_in, - VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (set_glc $cachepolicy)) + (!cast(Inst # "_OFFEN" # InstSuffix) + getVregSrcForVT.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc, + SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy) >; def : GCNPat< - (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, i32:$voffset, + (vt (Op vt:$vdata_in, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, timm:$cachepolicy, timm)), - (!cast(opcode # _BOTHEN_RTN) + (!cast(Inst # "_BOTHEN" # InstSuffix) getVregSrcForVT.ret:$vdata_in, (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), - SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (set_glc $cachepolicy)) + SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy) >; -} - -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; + } // end foreach RtnMode +} + +defm : SIBufferAtomicPat<"SIbuffer_atomic_swap", i32, "BUFFER_ATOMIC_SWAP">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_swap", f32, "BUFFER_ATOMIC_SWAP">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_add", i32, "BUFFER_ATOMIC_ADD">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_sub", i32, "BUFFER_ATOMIC_SUB">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_smin", i32, "BUFFER_ATOMIC_SMIN">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_umin", i32, "BUFFER_ATOMIC_UMIN">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_smax", i32, "BUFFER_ATOMIC_SMAX">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_umax", i32, "BUFFER_ATOMIC_UMAX">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_and", i32, "BUFFER_ATOMIC_AND">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_or", i32, "BUFFER_ATOMIC_OR">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i32, "BUFFER_ATOMIC_XOR">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i32, "BUFFER_ATOMIC_INC">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i32, "BUFFER_ATOMIC_DEC">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["none"]>; +defm : SIBufferAtomicPat<"SIbuffer_atomic_swap", i64, "BUFFER_ATOMIC_SWAP_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_add", i64, "BUFFER_ATOMIC_ADD_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_sub", i64, "BUFFER_ATOMIC_SUB_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_smin", i64, "BUFFER_ATOMIC_SMIN_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_umin", i64, "BUFFER_ATOMIC_UMIN_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_smax", i64, "BUFFER_ATOMIC_SMAX_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_umax", i64, "BUFFER_ATOMIC_UMAX_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_and", i64, "BUFFER_ATOMIC_AND_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_or", i64, "BUFFER_ATOMIC_OR_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, "BUFFER_ATOMIC_XOR_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">; + +let SubtargetPredicate = isGFX6GFX7GFX10Plus in { + defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">; + defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">; +} let SubtargetPredicate = isGFX6GFX7GFX10 in { - defm : BufferAtomicPatterns; - defm : BufferAtomicPatterns; - defm : BufferAtomicPatterns; - defm : BufferAtomicPatterns; + defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_FMIN_X2">; + defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_FMAX_X2">; } class NoUseBufferAtomic : PatFrag < @@ -1482,71 +1621,89 @@ multiclass BufferAtomicPatterns_NO_RTN; } -let SubtargetPredicate = HasAtomicFaddInsts in { +let SubtargetPredicate = HasAtomicFaddNoRtnInsts in defm : BufferAtomicPatterns_NO_RTN; + +let SubtargetPredicate = HasAtomicPkFaddNoRtnInsts in defm : BufferAtomicPatterns_NO_RTN; -} -let SubtargetPredicate = isGFX90APlus in { - defm : BufferAtomicPatterns; - defm : BufferAtomicPatterns; +let SubtargetPredicate = HasAtomicFaddRtnInsts in + defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32">; - defm : BufferAtomicPatterns; - defm : BufferAtomicPatterns; - defm : BufferAtomicPatterns; +let SubtargetPredicate = isGFX90APlus in { + defm : BufferAtomicIntrPat<"int_amdgcn_global_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; + defm : BufferAtomicIntrPat<"int_amdgcn_global_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; + defm : BufferAtomicIntrPat<"int_amdgcn_global_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">; + defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16">; + + defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; + defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; + defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">; } // End SubtargetPredicate = isGFX90APlus +foreach RtnMode = ["ret", "noret"] in { + +defvar Op = !cast(SIbuffer_atomic_cmpswap # "_" # RtnMode); +defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", ""); +defvar CachePolicy = !if(!eq(RtnMode, "ret"), (set_glc $cachepolicy), + (timm:$cachepolicy)); + +defvar OffsetResDag = (!cast("BUFFER_ATOMIC_CMPSWAP_OFFSET" # InstSuffix) + (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy); def : GCNPat< - (SIbuffer_atomic_cmpswap + (Op i32:$data, i32:$cmp, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$cachepolicy, 0), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS - (BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), - SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (set_glc $cachepolicy)), VReg_64)), sub0) + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS OffsetResDag, VReg_64)), sub0), + OffsetResDag) >; +defvar IdxenResDag = (!cast("BUFFER_ATOMIC_CMPSWAP_IDXEN" # InstSuffix) + (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), + CachePolicy); def : GCNPat< - (SIbuffer_atomic_cmpswap + (Op i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$cachepolicy, timm), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS - (BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), - VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (set_glc $cachepolicy)), VReg_64)), - sub0) + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS IdxenResDag, VReg_64)), sub0), + IdxenResDag) >; +defvar OffenResDag = (!cast("BUFFER_ATOMIC_CMPSWAP_OFFEN" # InstSuffix) + (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), + CachePolicy); def : GCNPat< - (SIbuffer_atomic_cmpswap + (Op i32:$data, i32:$cmp, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$cachepolicy, 0), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS - (BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), - VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (set_glc $cachepolicy)), VReg_64)), - sub0) + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS OffenResDag, VReg_64)), sub0), + OffenResDag) >; +defvar BothenResDag = (!cast("BUFFER_ATOMIC_CMPSWAP_BOTHEN" # InstSuffix) + (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), + SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy); def : GCNPat< - (SIbuffer_atomic_cmpswap + (Op i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, timm:$cachepolicy, timm), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS - (BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), - (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), - SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (set_glc $cachepolicy)), VReg_64)), - sub0) + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS BothenResDag, VReg_64)), sub0), + BothenResDag) >; +} // end foreach RtnMode + class MUBUFLoad_PatternADDR64 : GCNPat < (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, @@ -1682,8 +1839,12 @@ multiclass MUBUFStore_Atomic_Pattern ; } let SubtargetPredicate = isGFX6GFX7 in { -defm : MUBUFStore_Atomic_Pattern ; -defm : MUBUFStore_Atomic_Pattern ; +defm : MUBUFStore_Atomic_Pattern ; +defm : MUBUFStore_Atomic_Pattern ; +defm : MUBUFStore_Atomic_Pattern ; +defm : MUBUFStore_Atomic_Pattern ; +defm : MUBUFStore_Atomic_Pattern ; +defm : MUBUFStore_Atomic_Pattern ; } // End Predicates = isGFX6GFX7 @@ -1731,7 +1892,7 @@ defm : MUBUFScratchStorePat ; -let OtherPredicates = [D16PreservesUnusedBits, DisableFlatScratch] in { +let OtherPredicates = [HasD16LoadStore, DisableFlatScratch] in { // Hiding the extract high pattern in the PatFrag seems to not // automatically increase the complexity. let AddedComplexity = 1 in { @@ -1882,24 +2043,41 @@ let SubtargetPredicate = HasPackedD16VMem in { //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Base ENC_MUBUF for GFX6, GFX7, GFX10. +// Base ENC_MUBUF for GFX6, GFX7, GFX10, GFX11. //===----------------------------------------------------------------------===// -class Base_MUBUF_Real_gfx6_gfx7_gfx10 op, MUBUF_Pseudo ps, int ef> : - MUBUF_Real, Enc64, SIMCInstr { +class Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11 : + MUBUF_Real, Enc64, SIMCInstr { let Inst{11-0} = !if(ps.has_offset, offset, ?); + let Inst{31-26} = 0x38; + let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); + let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); + let Inst{63-56} = !if(ps.has_soffset, soffset, ?); +} + +class MUBUF_Real_gfx11 op, MUBUF_Pseudo ps, + string real_name = ps.Mnemonic> : + Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11 { + let Inst{12} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); + let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); + let Inst{25-18} = op; + let Inst{53} = !if(ps.has_tfe, tfe, ?); + let Inst{54} = ps.offen; + let Inst{55} = ps.idxen; +} + +class Base_MUBUF_Real_gfx6_gfx7_gfx10 op, MUBUF_Pseudo ps, int ef> : + Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11 { let Inst{12} = ps.offen; let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{16} = ps.lds; let Inst{24-18} = op; - let Inst{31-26} = 0x38; - let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); - let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); - let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); - let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } class MUBUF_Real_gfx10 op, MUBUF_Pseudo ps> : @@ -1913,11 +2091,156 @@ class MUBUF_Real_gfx6_gfx7 op, MUBUF_Pseudo ps> : let Inst{15} = ps.addr64; } +//===----------------------------------------------------------------------===// +// MUBUF - GFX11. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in +multiclass MUBUF_Real_AllAddr_gfx11_Renamed_Impl op, string real_name> { + def _BOTHEN_gfx11 : + MUBUF_Real_gfx11(NAME#"_BOTHEN"), real_name>, + AtomicNoRet; + def _IDXEN_gfx11 : + MUBUF_Real_gfx11(NAME#"_IDXEN"), real_name>, + AtomicNoRet; + def _OFFEN_gfx11 : + MUBUF_Real_gfx11(NAME#"_OFFEN"), real_name>, + AtomicNoRet; + def _OFFSET_gfx11 : + MUBUF_Real_gfx11(NAME#"_OFFSET"), real_name>, + AtomicNoRet; +} + +multiclass MUBUF_Real_AllAddr_gfx11_Impl op, MUBUF_Pseudo ps> : + MUBUF_Real_AllAddr_gfx11_Renamed_Impl; +multiclass MUBUF_Real_AllAddr_gfx11 op> : + MUBUF_Real_AllAddr_gfx11_Impl(NAME#"_BOTHEN")>; + +class Pre_gfx11_MUBUF_Name : + MnemonicAlias, Requires<[isGFX11Plus]>; +multiclass MUBUF_Real_AllAddr_gfx11_Renamed op, string real_name> : + MUBUF_Real_AllAddr_gfx11_Renamed_Impl { + def : Pre_gfx11_MUBUF_Name(NAME#"_BOTHEN"), real_name>; +} + +let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in +multiclass MUBUF_Real_Atomics_RTN_gfx11_Renamed op, string real_name> { + def _BOTHEN_RTN_gfx11 : + MUBUF_Real_gfx11(NAME#"_BOTHEN_RTN"), real_name>, + AtomicNoRet; + def _IDXEN_RTN_gfx11 : + MUBUF_Real_gfx11(NAME#"_IDXEN_RTN"), real_name>, + AtomicNoRet; + def _OFFEN_RTN_gfx11 : + MUBUF_Real_gfx11(NAME#"_OFFEN_RTN"), real_name>, + AtomicNoRet; + def _OFFSET_RTN_gfx11 : + MUBUF_Real_gfx11(NAME#"_OFFSET_RTN"), real_name>, + AtomicNoRet; +} + +multiclass MUBUF_Real_Atomics_RTN_gfx11_impl op, MUBUF_Pseudo ps> : + MUBUF_Real_Atomics_RTN_gfx11_Renamed; +multiclass MUBUF_Real_Atomics_RTN_gfx11 op> : + MUBUF_Real_Atomics_RTN_gfx11_impl(NAME#"_BOTHEN")>; + +multiclass MUBUF_Real_Atomics_gfx11 op> : + MUBUF_Real_AllAddr_gfx11, + MUBUF_Real_Atomics_RTN_gfx11; + +multiclass MUBUF_Real_Atomics_gfx11_Renamed op, string real_name> : + MUBUF_Real_AllAddr_gfx11_Renamed, + MUBUF_Real_Atomics_RTN_gfx11_Renamed; + +let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { +def BUFFER_GL0_INV_gfx11 : MUBUF_Real_gfx11<0x02B, BUFFER_GL0_INV>; +def BUFFER_GL1_INV_gfx11 : MUBUF_Real_gfx11<0x02C, BUFFER_GL1_INV>; +} + +defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_gfx11_Renamed<0x014, "buffer_load_b32">; +defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_gfx11_Renamed<0x015, "buffer_load_b64">; +defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_gfx11_Renamed<0x016, "buffer_load_b96">; +defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_gfx11_Renamed<0x017, "buffer_load_b128">; +defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_gfx11_Renamed<0x020, "buffer_load_d16_b16">; +defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x008, "buffer_load_d16_format_x">; +defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx11_Renamed<0x009, "buffer_load_d16_format_xy">; +defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx11_Renamed<0x00a, "buffer_load_d16_format_xyz">; +defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx11_Renamed<0x00b, "buffer_load_d16_format_xyzw">; +defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x023, "buffer_load_d16_hi_b16">; +defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x026, "buffer_load_d16_hi_format_x">; +defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x022, "buffer_load_d16_hi_i8">; +defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x021, "buffer_load_d16_hi_u8">; +defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01f, "buffer_load_d16_i8">; +defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01e, "buffer_load_d16_u8">; +defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_gfx11<0x000>; +defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_gfx11<0x001>; +defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx11<0x002>; +defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx11<0x003>; +defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_gfx11_Renamed<0x011, "buffer_load_i8">; +defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_gfx11_Renamed<0x013, "buffer_load_i16">; +defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_gfx11_Renamed<0x010, "buffer_load_u8">; +defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_gfx11_Renamed<0x012, "buffer_load_u16">; +defm BUFFER_LOAD_LDS_B32 : MUBUF_Real_AllAddr_gfx11<0x031>; +defm BUFFER_LOAD_LDS_FORMAT_X : MUBUF_Real_AllAddr_gfx11<0x032>; +defm BUFFER_LOAD_LDS_I8 : MUBUF_Real_AllAddr_gfx11<0x02e>; +defm BUFFER_LOAD_LDS_I16 : MUBUF_Real_AllAddr_gfx11<0x030>; +defm BUFFER_LOAD_LDS_U8 : MUBUF_Real_AllAddr_gfx11<0x02d>; +defm BUFFER_LOAD_LDS_U16 : MUBUF_Real_AllAddr_gfx11<0x02f>; +defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_gfx11_Renamed<0x018, "buffer_store_b8">; +defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_gfx11_Renamed<0x019, "buffer_store_b16">; +defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_gfx11_Renamed<0x01A, "buffer_store_b32">; +defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01B, "buffer_store_b64">; +defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01C, "buffer_store_b96">; +defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01D, "buffer_store_b128">; +defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x00C, "buffer_store_d16_format_x">; +defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx11_Renamed<0x00D, "buffer_store_d16_format_xy">; +defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx11_Renamed<0x00E, "buffer_store_d16_format_xyz">; +defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx11_Renamed<0x00F, "buffer_store_d16_format_xyzw">; +defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x024, "buffer_store_d16_hi_b8">; +defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x025, "buffer_store_d16_hi_b16">; +defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x027, "buffer_store_d16_hi_format_x">; +defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_gfx11<0x004>; +defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_gfx11<0x005>; +defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx11<0x006>; +defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx11<0x007>; +defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomics_gfx11<0x056>; +defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomics_gfx11_Renamed<0x035, "buffer_atomic_add_u32">; +defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x043, "buffer_atomic_add_u64">; +defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomics_gfx11_Renamed<0x03C, "buffer_atomic_and_b32">; +defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x049, "buffer_atomic_and_b64">; +defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomics_gfx11_Renamed<0x034, "buffer_atomic_cmpswap_b32">; +defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x042, "buffer_atomic_cmpswap_b64">; +defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx11_Renamed<0x050, "buffer_atomic_cmpswap_f32">; +defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_RTN_gfx11_Renamed<0x037, "buffer_atomic_csub_u32">; +def : MnemonicAlias<"buffer_atomic_csub", "buffer_atomic_csub_u32">, Requires<[isGFX11Plus]>; +defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx11_Renamed<0x040, "buffer_atomic_dec_u32">; +defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x04D, "buffer_atomic_dec_u64">; +defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx11_Renamed<0x03F, "buffer_atomic_inc_u32">; +defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x04C, "buffer_atomic_inc_u64">; +defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx11_Renamed<0x052, "buffer_atomic_max_f32">; +defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomics_gfx11_Renamed<0x03A, "buffer_atomic_max_i32">; +defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x047, "buffer_atomic_max_i64">; +defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomics_gfx11_Renamed<0x03B, "buffer_atomic_max_u32">; +defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x048, "buffer_atomic_max_u64">; +defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx11_Renamed<0x051, "buffer_atomic_min_f32">; +defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomics_gfx11_Renamed<0x038, "buffer_atomic_min_i32">; +defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x045, "buffer_atomic_min_i64">; +defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomics_gfx11_Renamed<0x039, "buffer_atomic_min_u32">; +defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x046, "buffer_atomic_min_u64">; +defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx11_Renamed<0x03D, "buffer_atomic_or_b32">; +defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x04A, "buffer_atomic_or_b64">; +defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomics_gfx11_Renamed<0x036, "buffer_atomic_sub_u32">; +defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x044, "buffer_atomic_sub_u64">; +defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomics_gfx11_Renamed<0x033, "buffer_atomic_swap_b32">; +defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x041, "buffer_atomic_swap_b64">; +defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx11_Renamed<0x03E, "buffer_atomic_xor_b32">; +defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x04B, "buffer_atomic_xor_b64">; + //===----------------------------------------------------------------------===// // MUBUF - GFX10. //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { +let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { multiclass MUBUF_Real_AllAddr_gfx10 op> { def _BOTHEN_gfx10 : MUBUF_Real_gfx10(NAME#"_BOTHEN")>; @@ -1929,23 +2252,15 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { MUBUF_Real_gfx10(NAME#"_OFFSET")>; } multiclass MUBUF_Real_AllAddr_Lds_gfx10 op> { - def _OFFSET_gfx10 : MUBUF_Real_gfx10(NAME#"_OFFSET")>, - MUBUFLdsTable<0, NAME # "_OFFSET_gfx10">; - def _OFFEN_gfx10 : MUBUF_Real_gfx10(NAME#"_OFFEN")>, - MUBUFLdsTable<0, NAME # "_OFFEN_gfx10">; - def _IDXEN_gfx10 : MUBUF_Real_gfx10(NAME#"_IDXEN")>, - MUBUFLdsTable<0, NAME # "_IDXEN_gfx10">; - def _BOTHEN_gfx10 : MUBUF_Real_gfx10(NAME#"_BOTHEN")>, - MUBUFLdsTable<0, NAME # "_BOTHEN_gfx10">; - - def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_OFFSET")>, - MUBUFLdsTable<1, NAME # "_OFFSET_gfx10">; - def _LDS_OFFEN_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_OFFEN")>, - MUBUFLdsTable<1, NAME # "_OFFEN_gfx10">; - def _LDS_IDXEN_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_IDXEN")>, - MUBUFLdsTable<1, NAME # "_IDXEN_gfx10">; - def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_BOTHEN")>, - MUBUFLdsTable<1, NAME # "_BOTHEN_gfx10">; + def _OFFSET_gfx10 : MUBUF_Real_gfx10(NAME#"_OFFSET")>; + def _OFFEN_gfx10 : MUBUF_Real_gfx10(NAME#"_OFFEN")>; + def _IDXEN_gfx10 : MUBUF_Real_gfx10(NAME#"_IDXEN")>; + def _BOTHEN_gfx10 : MUBUF_Real_gfx10(NAME#"_BOTHEN")>; + + def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_OFFSET")>; + def _LDS_OFFEN_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_OFFEN")>; + def _LDS_IDXEN_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_IDXEN")>; + def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_BOTHEN")>; } multiclass MUBUF_Real_Atomics_RTN_gfx10 op> { def _BOTHEN_RTN_gfx10 : @@ -1976,7 +2291,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { MUBUF_Real_gfx10(NAME#"_OFFSET")>, AtomicNoRet; } -} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" +} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>; defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x01b>; @@ -2033,27 +2348,17 @@ let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { MUBUF_Real_gfx6_gfx7(NAME#"_OFFSET")>; } multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7 op> { - def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_OFFSET")>, - MUBUFLdsTable<0, NAME # "_OFFSET_gfx6_gfx7">; - def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_ADDR64")>, - MUBUFLdsTable<0, NAME # "_ADDR64_gfx6_gfx7">; - def _OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_OFFEN")>, - MUBUFLdsTable<0, NAME # "_OFFEN_gfx6_gfx7">; - def _IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_IDXEN")>, - MUBUFLdsTable<0, NAME # "_IDXEN_gfx6_gfx7">; - def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_BOTHEN")>, - MUBUFLdsTable<0, NAME # "_BOTHEN_gfx6_gfx7">; - - def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_OFFSET")>, - MUBUFLdsTable<1, NAME # "_OFFSET_gfx6_gfx7">; - def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_ADDR64")>, - MUBUFLdsTable<1, NAME # "_ADDR64_gfx6_gfx7">; - def _LDS_OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_OFFEN")>, - MUBUFLdsTable<1, NAME # "_OFFEN_gfx6_gfx7">; - def _LDS_IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_IDXEN")>, - MUBUFLdsTable<1, NAME # "_IDXEN_gfx6_gfx7">; - def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_BOTHEN")>, - MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">; + def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_OFFSET")>; + def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_ADDR64")>; + def _OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_OFFEN")>; + def _IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_IDXEN")>; + def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_BOTHEN")>; + + def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_OFFSET")>; + def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_ADDR64")>; + def _LDS_OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_OFFEN")>; + def _LDS_IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_IDXEN")>; + def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_BOTHEN")>; } multiclass MUBUF_Real_Atomics_gfx6_gfx7 op> { def _ADDR64_gfx6_gfx7 : @@ -2167,25 +2472,88 @@ defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>; def BUFFER_WBINVL1_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<0x071, BUFFER_WBINVL1>; //===----------------------------------------------------------------------===// -// Base ENC_MTBUF for GFX6, GFX7, GFX10. +// Base ENC_MTBUF for GFX6, GFX7, GFX10, GFX11. //===----------------------------------------------------------------------===// -class Base_MTBUF_Real_gfx6_gfx7_gfx10 op, MTBUF_Pseudo ps, int ef> : - MTBUF_Real, Enc64, SIMCInstr { +class Base_MTBUF_Real_gfx6_gfx7_gfx10_gfx11 : + MTBUF_Real, Enc64, SIMCInstr { let Inst{11-0} = !if(ps.has_offset, offset, ?); - let Inst{12} = ps.offen; - let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); - let Inst{18-16} = op; let Inst{31-26} = 0x3a; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); + let Inst{63-56} = !if(ps.has_soffset, soffset, ?); +} + +class Base_MTBUF_Real_gfx11 op, MTBUF_Pseudo ps, + string real_name = ps.Mnemonic> : + Base_MTBUF_Real_gfx6_gfx7_gfx10_gfx11 { + let Inst{12} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); + let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value); + let Inst{18-15} = op; + let Inst{25-19} = format; + let Inst{53} = !if(ps.has_tfe, tfe, ?); + let Inst{54} = ps.offen; + let Inst{55} = ps.idxen; +} + +class Base_MTBUF_Real_gfx6_gfx7_gfx10 op, MTBUF_Pseudo ps, int ef> : + Base_MTBUF_Real_gfx6_gfx7_gfx10_gfx11 { + let Inst{12} = ps.offen; + let Inst{13} = ps.idxen; + let Inst{18-16} = op; let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); - let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } +//===----------------------------------------------------------------------===// +// MTBUF - GFX11. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in +multiclass MTBUF_Real_AllAddr_gfx11_Renamed_Impl op, string real_name> { + def _BOTHEN_gfx11 : + Base_MTBUF_Real_gfx11(NAME#"_BOTHEN"), real_name>; + def _IDXEN_gfx11 : + Base_MTBUF_Real_gfx11(NAME#"_IDXEN"), real_name>; + def _OFFEN_gfx11 : + Base_MTBUF_Real_gfx11(NAME#"_OFFEN"), real_name>; + def _OFFSET_gfx11 : + Base_MTBUF_Real_gfx11(NAME#"_OFFSET"), real_name>; +} + +multiclass MTBUF_Real_AllAddr_gfx11_Impl op, MTBUF_Pseudo ps> + : MTBUF_Real_AllAddr_gfx11_Renamed_Impl; +multiclass MTBUF_Real_AllAddr_gfx11 op> + : MTBUF_Real_AllAddr_gfx11_Impl(NAME#"_BOTHEN")>; + + +class Pre_gfx11_MTBUF_Name + : MnemonicAlias, Requires<[isGFX11Plus]>; +multiclass MTBUF_Real_AllAddr_gfx11_Renamed op, string real_name> + : MTBUF_Real_AllAddr_gfx11_Renamed_Impl { + def : Pre_gfx11_MTBUF_Name(NAME#"_BOTHEN"), real_name>; +} + +defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx11_Renamed<0x008, "tbuffer_load_d16_format_x">; +defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx11_Renamed<0x009, "tbuffer_load_d16_format_xy">; +defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx11_Renamed<0x00a, "tbuffer_load_d16_format_xyz">; +defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx11_Renamed<0x00b, "tbuffer_load_d16_format_xyzw">; +defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_gfx11<0x000>; +defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_gfx11<0x001>; +defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx11<0x002>; +defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx11<0x003>; +defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx11_Renamed<0x00c, "tbuffer_store_d16_format_x">; +defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx11_Renamed<0x00d, "tbuffer_store_d16_format_xy">; +defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx11_Renamed<0x00e, "tbuffer_store_d16_format_xyz">; +defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx11_Renamed<0x00f, "tbuffer_store_d16_format_xyzw">; +defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_gfx11<0x004>; +defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_gfx11<0x005>; +defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx11<0x006>; +defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx11<0x007>; + //===----------------------------------------------------------------------===// // MTBUF - GFX10. //===----------------------------------------------------------------------===// @@ -2197,7 +2565,7 @@ class MTBUF_Real_gfx10 op, MTBUF_Pseudo ps> : let Inst{53} = op{3}; } -let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { +let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { multiclass MTBUF_Real_AllAddr_gfx10 op> { def _BOTHEN_gfx10 : MTBUF_Real_gfx10(NAME#"_BOTHEN")>; @@ -2208,7 +2576,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { def _OFFSET_gfx10 : MTBUF_Real_gfx10(NAME#"_OFFSET")>; } -} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" +} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x008>; defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x009>; @@ -2303,9 +2671,28 @@ class MUBUF_Real_gfx90a op, MUBUF_Pseudo ps, let Inst{55} = acc; } +class MUBUF_Real_gfx940 op, MUBUF_Pseudo ps> : + MUBUF_Real_Base_vi { + let AssemblerPredicate = isGFX940Plus; + let DecoderNamespace = "GFX9"; + let AsmString = ps.Mnemonic # !subst("$tfe", "", ps.AsmOperands); + + let Inst{55} = acc; +} + multiclass MUBUF_Real_vi_gfx90a op, MUBUF_Pseudo ps> { def _vi : MUBUF_Real_vi; - def _gfx90a : MUBUF_Real_gfx90a; + + foreach _ = BoolToList.ret in + def _gfx90a : MUBUF_Real_gfx90a; + + foreach _ = BoolToList.ret in { + def _gfx90a : MUBUF_Real_gfx90a { + let SubtargetPredicate = isGFX90AOnly; + let AssemblerPredicate = isGFX90AOnly; + } + def _gfx940 : MUBUF_Real_gfx940; + } } multiclass MUBUF_Real_AllAddr_vi op> { @@ -2317,41 +2704,25 @@ multiclass MUBUF_Real_AllAddr_vi op> { multiclass MUBUF_Real_AllAddr_Lds_vi op> { - def _OFFSET_vi : MUBUF_Real_vi (NAME#"_OFFSET")>, - MUBUFLdsTable<0, NAME # "_OFFSET_vi">; - def _OFFEN_vi : MUBUF_Real_vi (NAME#"_OFFEN")>, - MUBUFLdsTable<0, NAME # "_OFFEN_vi">; - def _IDXEN_vi : MUBUF_Real_vi (NAME#"_IDXEN")>, - MUBUFLdsTable<0, NAME # "_IDXEN_vi">; - def _BOTHEN_vi : MUBUF_Real_vi (NAME#"_BOTHEN")>, - MUBUFLdsTable<0, NAME # "_BOTHEN_vi">; - - def _LDS_OFFSET_vi : MUBUF_Real_vi (NAME#"_LDS_OFFSET")>, - MUBUFLdsTable<1, NAME # "_OFFSET_vi">; - def _LDS_OFFEN_vi : MUBUF_Real_vi (NAME#"_LDS_OFFEN")>, - MUBUFLdsTable<1, NAME # "_OFFEN_vi">; - def _LDS_IDXEN_vi : MUBUF_Real_vi (NAME#"_LDS_IDXEN")>, - MUBUFLdsTable<1, NAME # "_IDXEN_vi">; - def _LDS_BOTHEN_vi : MUBUF_Real_vi (NAME#"_LDS_BOTHEN")>, - MUBUFLdsTable<1, NAME # "_BOTHEN_vi">; - - def _OFFSET_gfx90a : MUBUF_Real_gfx90a (NAME#"_OFFSET")>, - MUBUFLdsTable<0, NAME # "_OFFSET_gfx90a">; - def _OFFEN_gfx90a : MUBUF_Real_gfx90a (NAME#"_OFFEN")>, - MUBUFLdsTable<0, NAME # "_OFFEN_gfx90a">; - def _IDXEN_gfx90a : MUBUF_Real_gfx90a (NAME#"_IDXEN")>, - MUBUFLdsTable<0, NAME # "_IDXEN_gfx90a">; - def _BOTHEN_gfx90a : MUBUF_Real_gfx90a (NAME#"_BOTHEN")>, - MUBUFLdsTable<0, NAME # "_BOTHEN_gfx90a">; - - def _LDS_OFFSET_gfx90a : MUBUF_Real_gfx90a (NAME#"_LDS_OFFSET")>, - MUBUFLdsTable<1, NAME # "_OFFSET_gfx90a">; - def _LDS_OFFEN_gfx90a : MUBUF_Real_gfx90a (NAME#"_LDS_OFFEN")>, - MUBUFLdsTable<1, NAME # "_OFFEN_gfx90a">; - def _LDS_IDXEN_gfx90a : MUBUF_Real_gfx90a (NAME#"_LDS_IDXEN")>, - MUBUFLdsTable<1, NAME # "_IDXEN_gfx90a">; - def _LDS_BOTHEN_gfx90a : MUBUF_Real_gfx90a (NAME#"_LDS_BOTHEN")>, - MUBUFLdsTable<1, NAME # "_BOTHEN_gfx90a">; + def _OFFSET_vi : MUBUF_Real_vi (NAME#"_OFFSET")>; + def _OFFEN_vi : MUBUF_Real_vi (NAME#"_OFFEN")>; + def _IDXEN_vi : MUBUF_Real_vi (NAME#"_IDXEN")>; + def _BOTHEN_vi : MUBUF_Real_vi (NAME#"_BOTHEN")>; + + def _LDS_OFFSET_vi : MUBUF_Real_vi (NAME#"_LDS_OFFSET")>; + def _LDS_OFFEN_vi : MUBUF_Real_vi (NAME#"_LDS_OFFEN")>; + def _LDS_IDXEN_vi : MUBUF_Real_vi (NAME#"_LDS_IDXEN")>; + def _LDS_BOTHEN_vi : MUBUF_Real_vi (NAME#"_LDS_BOTHEN")>; + + def _OFFSET_gfx90a : MUBUF_Real_gfx90a (NAME#"_OFFSET")>; + def _OFFEN_gfx90a : MUBUF_Real_gfx90a (NAME#"_OFFEN")>; + def _IDXEN_gfx90a : MUBUF_Real_gfx90a (NAME#"_IDXEN")>; + def _BOTHEN_gfx90a : MUBUF_Real_gfx90a (NAME#"_BOTHEN")>; + + def _LDS_OFFSET_gfx90a : MUBUF_Real_gfx90a (NAME#"_LDS_OFFSET")>; + def _LDS_OFFEN_gfx90a : MUBUF_Real_gfx90a (NAME#"_LDS_OFFEN")>; + def _LDS_IDXEN_gfx90a : MUBUF_Real_gfx90a (NAME#"_LDS_IDXEN")>; + def _LDS_BOTHEN_gfx90a : MUBUF_Real_gfx90a (NAME#"_LDS_BOTHEN")>; } class MUBUF_Real_gfx80 op, MUBUF_Pseudo ps> : @@ -2424,9 +2795,9 @@ defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_vi <0x11>; defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_vi <0x12>; defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_vi <0x13>; defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_vi <0x14>; -defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_Lds_vi <0x15>; -defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_Lds_vi <0x16>; -defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_Lds_vi <0x17>; +defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_vi <0x15>; +defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_vi <0x16>; +defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_vi <0x17>; defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_vi <0x18>; defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_vi <0x19>; defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_vi <0x1a>; @@ -2481,12 +2852,12 @@ def BUFFER_WBINVL1_vi : MUBUF_Real_vi <0x3e, BUFFER_WBINVL1>; def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>; } // End AssemblerPredicate = isGFX8GFX9 -let SubtargetPredicate = HasAtomicFaddInsts in { +let SubtargetPredicate = HasAtomicFaddNoRtnInsts in { defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>; defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>; -} // End SubtargetPredicate = HasAtomicFaddInsts +} // End SubtargetPredicate = HasAtomicFaddNoRtnInsts let SubtargetPredicate = isGFX90APlus in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_vi<0x4f>; @@ -2495,9 +2866,17 @@ let SubtargetPredicate = isGFX90APlus in { } // End SubtargetPredicate = isGFX90APlus, AssemblerPredicate = isGFX90APlus def BUFFER_WBL2_gfx90a : MUBUF_Real_gfx90a<0x28, BUFFER_WBL2> { + let AsmString = BUFFER_WBL2.Mnemonic; // drop flags + let AssemblerPredicate = isGFX90AOnly; + let SubtargetPredicate = isGFX90AOnly; } def BUFFER_INVL2_gfx90a : MUBUF_Real_gfx90a<0x29, BUFFER_INVL2>; +let SubtargetPredicate = isGFX940Plus in { +def BUFFER_WBL2_gfx940 : MUBUF_Real_gfx940<0x28, BUFFER_WBL2>; +def BUFFER_INV_gfx940 : MUBUF_Real_gfx940<0x29, BUFFER_INV>; +} + class MTBUF_Real_Base_vi op, MTBUF_Pseudo ps, int Enc> : MTBUF_Real, Enc64, diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index c4043177b618..27b723875aa4 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -52,8 +52,8 @@ class DS_Pseudo patt let Uses = !if(has_m0_read, [M0, EXEC], [EXEC]); } -class DS_Real : - InstSI , +class DS_Real : + InstSI , Enc64 { let isPseudo = 0; @@ -72,6 +72,9 @@ class DS_Real : let IsAtomicRet = ps.IsAtomicRet; let IsAtomicNoRet = ps.IsAtomicNoRet; + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + // encoding fields bits<10> vdst; bits<1> gds; @@ -172,6 +175,22 @@ multiclass DS_1A2D_Off8_NORET_mc { } } +class DS_0A1D_RET_GDS.ret, + RegisterOperand src_op = getLdStRegisterOperand.ret> +: DS_Pseudo { + + let has_addr = 0; + let has_data1 = 0; + let has_gds = 0; + let gdsValue = 1; + let AsmMatchConverter = "cvtDSGds"; + let hasSideEffects = 1; +} + class DS_1A1D_RET .ret> : DS_Pseudo; } // End SubtargetPredicate = isGFX90APlus +let SubtargetPredicate = isGFX940Plus in { + defm DS_PK_ADD_F16 : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_f16">; + defm DS_PK_ADD_RTN_F16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_f16", VGPR_32, "ds_pk_add_f16">; + defm DS_PK_ADD_BF16 : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_bf16">; + defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_bf16", VGPR_32, "ds_pk_add_bf16">; +} // End SubtargetPredicate = isGFX940Plus + +defm DS_CMPSTORE_B32 : DS_1A2D_NORET_mc<"ds_cmpstore_b32">; +defm DS_CMPSTORE_F32 : DS_1A2D_NORET_mc<"ds_cmpstore_f32">; +defm DS_CMPSTORE_B64 : DS_1A2D_NORET_mc<"ds_cmpstore_b64", VReg_64>; +defm DS_CMPSTORE_F64 : DS_1A2D_NORET_mc<"ds_cmpstore_f64", VReg_64>; +defm DS_CMPSTORE_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b32", VGPR_32, "ds_cmpstore_b32">; +defm DS_CMPSTORE_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f32", VGPR_32, "ds_cmpstore_f32">; +defm DS_CMPSTORE_RTN_B64 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b64", VReg_64, "ds_cmpstore_b64">; +defm DS_CMPSTORE_RTN_F64 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f64", VReg_64, "ds_cmpstore_f64">; + defm DS_MSKOR_B32 : DS_1A2D_NORET_mc<"ds_mskor_b32">; defm DS_CMPST_B32 : DS_1A2D_NORET_mc<"ds_cmpst_b32">; defm DS_CMPST_F32 : DS_1A2D_NORET_mc<"ds_cmpst_f32">; @@ -619,6 +654,8 @@ def DS_READ_ADDTID_B32 : DS_0A_RET<"ds_read_addtid_b32">; def DS_CONSUME : DS_0A_RET<"ds_consume">; def DS_APPEND : DS_0A_RET<"ds_append">; + +let SubtargetPredicate = isNotGFX90APlus in def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">; //===----------------------------------------------------------------------===// @@ -667,6 +704,18 @@ let SubtargetPredicate = HasLDSFPAtomicAdd, OtherPredicates = [HasDsSrc2Insts] i def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">; } + +//===----------------------------------------------------------------------===// +// Instruction definitions for GFX11 and newer. +//===----------------------------------------------------------------------===// + +let SubtargetPredicate = isGFX11Plus in { + +def DS_ADD_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_add_gs_reg_rtn", VReg_64, VGPR_32>; +def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VReg_64, VGPR_32>; + +} // let SubtargetPredicate = isGFX11Plus + //===----------------------------------------------------------------------===// // DS Patterns //===----------------------------------------------------------------------===// @@ -777,14 +826,14 @@ foreach vt = Reg32Types.types in { defm : DSWritePat_mc ; } -defm : DSAtomicWritePat_mc ; -defm : DSAtomicWritePat_mc ; -defm : DSAtomicWritePat_mc ; -defm : DSAtomicWritePat_mc ; -defm : DSAtomicWritePat_mc ; -defm : DSAtomicWritePat_mc ; +defm : DSAtomicWritePat_mc ; +defm : DSAtomicWritePat_mc ; +defm : DSAtomicWritePat_mc ; +defm : DSAtomicWritePat_mc ; +defm : DSAtomicWritePat_mc ; +defm : DSAtomicWritePat_mc ; -let OtherPredicates = [D16PreservesUnusedBits] in { +let OtherPredicates = [HasD16LoadStore] in { def : DSWritePat ; def : DSWritePat ; } @@ -870,15 +919,30 @@ defm : DSWritePat_mc ; let SubtargetPredicate = HasUnalignedAccessMode in { -// FIXME: From performance point of view, is ds_read_b96/ds_write_b96 better choice -// for unaligned accesses? +// Select 64 bit loads and stores aligned less than 4 as a single ds_read_b64/ +// ds_write_b64 instruction as this is faster than ds_read2_b32/ds_write2_b32 +// which would be used otherwise. In this case a b32 access would still be +// misaligned, but we will have 2 of them. +foreach vt = VReg_64.RegTypes in { +defm : DSReadPat_mc ; +defm : DSWritePat_mc ; +} + +// Selection will split most of the unaligned 3 dword accesses due to performance +// reasons when beneficial. Keep these two patterns for the rest of the cases. foreach vt = VReg_96.RegTypes in { defm : DSReadPat_mc ; defm : DSWritePat_mc ; } -// For performance reasons, *do not* select ds_read_b128/ds_write_b128 for unaligned -// accesses. +// Select 128 bit loads and stores aligned less than 4 as a single ds_read_b128/ +// ds_write_b128 instruction as this is faster than ds_read2_b64/ds_write2_b64 +// which would be used otherwise. In this case a b64 access would still be +// misaligned, but we will have 2 of them. +foreach vt = VReg_128.RegTypes in { +defm : DSReadPat_mc ; +defm : DSWritePat_mc ; +} } // End SubtargetPredicate = HasUnalignedAccessMode @@ -904,69 +968,143 @@ multiclass DSAtomicRetPat_mc { def : DSAtomicRetPat(frag#"_region_m0_"#vt.Size), 1>; } +multiclass DSAtomicRetNoRetPat_mc { + let OtherPredicates = [LDSRequiresM0Init] in { + def : DSAtomicRetPat(frag#"_local_m0_ret_"#vt.Size)>; + def : DSAtomicRetPat(frag#"_local_m0_noret_"#vt.Size)>; + } + + let OtherPredicates = [NotLDSRequiresM0Init] in { + def : DSAtomicRetPat(!cast(inst)#"_gfx9"), vt, + !cast(frag#"_local_ret_"#vt.Size)>; + def : DSAtomicRetPat(!cast(noRetInst)#"_gfx9"), vt, + !cast(frag#"_local_noret_"#vt.Size)>; + } + def : DSAtomicRetPat(frag#"_region_m0_ret_"#vt.Size), 1>; + def : DSAtomicRetPat(frag#"_region_m0_noret_"#vt.Size), 1>; +} -class DSAtomicCmpXChg : GCNPat < + + +let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in { +// Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode. +class DSAtomicCmpXChgSwapped : GCNPat < (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap), (inst $ptr, getVregSrcForVT.ret:$cmp, getVregSrcForVT.ret:$swap, offset:$offset, (i1 gds)) >; -multiclass DSAtomicCmpXChg_mc { +multiclass DSAtomicCmpXChgSwapped_mc { let OtherPredicates = [LDSRequiresM0Init] in { - def : DSAtomicCmpXChg(frag#"_local_m0_"#vt.Size)>; + def : DSAtomicCmpXChgSwapped(frag#"_local_m0_ret_"#vt.Size)>; + def : DSAtomicCmpXChgSwapped(frag#"_local_m0_noret_"#vt.Size)>; } let OtherPredicates = [NotLDSRequiresM0Init] in { - def : DSAtomicCmpXChg(!cast(inst)#"_gfx9"), vt, - !cast(frag#"_local_"#vt.Size)>; + def : DSAtomicCmpXChgSwapped(!cast(inst)#"_gfx9"), vt, + !cast(frag#"_local_ret_"#vt.Size)>; + def : DSAtomicCmpXChgSwapped(!cast(noRetInst)#"_gfx9"), vt, + !cast(frag#"_local_noret_"#vt.Size)>; } - def : DSAtomicCmpXChg(frag#"_region_m0_"#vt.Size), 1>; + def : DSAtomicCmpXChgSwapped(frag#"_region_m0_ret_"#vt.Size), 1>; + def : DSAtomicCmpXChgSwapped(frag#"_region_m0_noret_"#vt.Size), 1>; } +} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 + +let SubtargetPredicate = isGFX11Plus in { +// The order of src and cmp agrees with the BUFFER_ATOMIC_CMPSWAP opcode. +class DSAtomicCmpXChg : GCNPat < + (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap), + (inst $ptr, getVregSrcForVT.ret:$swap, getVregSrcForVT.ret:$cmp, offset:$offset, (i1 gds)) +>; +multiclass DSAtomicCmpXChg_mc { + def : DSAtomicCmpXChg(!cast(inst)#"_gfx9"), vt, + !cast(frag#"_local_ret_"#vt.Size)>; + def : DSAtomicCmpXChg(!cast(noRetInst)#"_gfx9"), vt, + !cast(frag#"_local_noret_"#vt.Size)>; + + def : DSAtomicCmpXChg(frag#"_region_m0_ret_"#vt.Size), 1>; + def : DSAtomicCmpXChg(frag#"_region_m0_noret_"#vt.Size), 1>; +} +} // End SubtargetPredicate = isGFX11Plus // 32-bit atomics. defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicCmpXChg_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; + +let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in { +defm : DSAtomicCmpXChgSwapped_mc; +} + +let SubtargetPredicate = isGFX11Plus in { +defm : DSAtomicCmpXChg_mc; +} let SubtargetPredicate = HasLDSFPAtomicAdd in { -defm : DSAtomicRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; } // 64-bit atomics. defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; -defm : DSAtomicRetPat_mc; - -defm : DSAtomicCmpXChg_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; +defm : DSAtomicRetNoRetPat_mc; + +let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in { +defm : DSAtomicCmpXChgSwapped_mc; +} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 + +let SubtargetPredicate = isGFX11Plus in { +defm : DSAtomicCmpXChg_mc; +} // End SubtargetPredicate = isGFX11Plus let SubtargetPredicate = isGFX90APlus in { -def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +} + +let SubtargetPredicate = isGFX940Plus in { +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : GCNPat < + (v2i16 (int_amdgcn_ds_fadd_v2bf16_ret i32:$ptr, v2i16:$src)), + (DS_PK_ADD_RTN_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0) +>; +def : GCNPat < + (v2i16 (int_amdgcn_ds_fadd_v2bf16_noret i32:$ptr, v2i16:$src)), + (DS_PK_ADD_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0) +>; } def : Pat < @@ -974,16 +1112,44 @@ def : Pat < (DS_ORDERED_COUNT $value, (as_i16imm $offset)) >; +def : GCNPat < + (i64 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)), + (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)) +>; + +def : GCNPat < + (i32 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)), + (EXTRACT_SUBREG + (i64 (COPY_TO_REGCLASS + (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)), + VReg_64)), + sub0) +>; + +def : GCNPat < + (i64 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)), + (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)) +>; + +def : GCNPat < + (i32 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)), + (EXTRACT_SUBREG + (i64 (COPY_TO_REGCLASS + (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)), + VReg_64)), + sub0) +>; + //===----------------------------------------------------------------------===// // Target-specific instruction encodings. //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Base ENC_DS for GFX6, GFX7, GFX10. +// Base ENC_DS for GFX6, GFX7, GFX10, GFX11. //===----------------------------------------------------------------------===// -class Base_DS_Real_gfx6_gfx7_gfx10 op, DS_Pseudo ps, int ef> : - DS_Real, SIMCInstr { +class Base_DS_Real_gfx6_gfx7_gfx10_gfx11 op, DS_Pseudo ps, int ef, string opName = ps.Mnemonic> : + DS_Real, SIMCInstr { let Inst{7-0} = !if(ps.has_offset0, offset0, 0); let Inst{15-8} = !if(ps.has_offset1, offset1, 0); @@ -996,20 +1162,90 @@ class Base_DS_Real_gfx6_gfx7_gfx10 op, DS_Pseudo ps, int ef> : let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0); } +//===----------------------------------------------------------------------===// +// GFX11. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX11Plus, DecoderNamespace = "GFX11" in { + multiclass DS_Real_gfx11 op> { + def _gfx11 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11(NAME), + SIEncodingFamily.GFX11>; + } + + multiclass DS_Real_Renamed_gfx11 op, DS_Pseudo backing_pseudo, string real_name> { + def _gfx11 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11, + MnemonicAlias, Requires<[isGFX11Plus]>; + } +} // End AssemblerPredicate = isGFX11Plus, DecoderNamespace = "GFX11" + +defm DS_STORE_B32 : DS_Real_Renamed_gfx11<0x00d, DS_WRITE_B32, "ds_store_b32">; +defm DS_STORE_2ADDR_B32 : DS_Real_Renamed_gfx11<0x00e, DS_WRITE2_B32, "ds_store_2addr_b32">; +defm DS_STORE_2ADDR_STRIDE64_B32 : DS_Real_Renamed_gfx11<0x00f, DS_WRITE2ST64_B32, "ds_store_2addr_stride64_b32">; +defm DS_STORE_B8 : DS_Real_Renamed_gfx11<0x01e, DS_WRITE_B8, "ds_store_b8">; +defm DS_STORE_B16 : DS_Real_Renamed_gfx11<0x01f, DS_WRITE_B16, "ds_store_b16">; +defm DS_STOREXCHG_RTN_B32 : DS_Real_Renamed_gfx11<0x02d, DS_WRXCHG_RTN_B32, "ds_storexchg_rtn_b32">; +defm DS_STOREXCHG_2ADDR_RTN_B32 : DS_Real_Renamed_gfx11<0x02e, DS_WRXCHG2_RTN_B32, "ds_storexchg_2addr_rtn_b32">; +defm DS_STOREXCHG_2ADDR_STRIDE64_RTN_B32 : DS_Real_Renamed_gfx11<0x02f, DS_WRXCHG2ST64_RTN_B32, "ds_storexchg_2addr_stride64_rtn_b32">; +defm DS_LOAD_B32 : DS_Real_Renamed_gfx11<0x036, DS_READ_B32, "ds_load_b32">; +defm DS_LOAD_2ADDR_B32 : DS_Real_Renamed_gfx11<0x037, DS_READ2_B32, "ds_load_2addr_b32">; +defm DS_LOAD_2ADDR_STRIDE64_B32 : DS_Real_Renamed_gfx11<0x038, DS_READ2ST64_B32, "ds_load_2addr_stride64_b32">; +defm DS_LOAD_I8 : DS_Real_Renamed_gfx11<0x039, DS_READ_I8, "ds_load_i8">; +defm DS_LOAD_U8 : DS_Real_Renamed_gfx11<0x03a, DS_READ_U8, "ds_load_u8">; +defm DS_LOAD_I16 : DS_Real_Renamed_gfx11<0x03b, DS_READ_I16, "ds_load_i16">; +defm DS_LOAD_U16 : DS_Real_Renamed_gfx11<0x03c, DS_READ_U16, "ds_load_u16">; +defm DS_STORE_B64 : DS_Real_Renamed_gfx11<0x04d, DS_WRITE_B64, "ds_store_b64">; +defm DS_STORE_2ADDR_B64 : DS_Real_Renamed_gfx11<0x04e, DS_WRITE2_B64, "ds_store_2addr_b64">; +defm DS_STORE_2ADDR_STRIDE64_B64 : DS_Real_Renamed_gfx11<0x04f, DS_WRITE2ST64_B64, "ds_store_2addr_stride64_b64">; +defm DS_STOREXCHG_RTN_B64 : DS_Real_Renamed_gfx11<0x06d, DS_WRXCHG_RTN_B64, "ds_storexchg_rtn_b64">; +defm DS_STOREXCHG_2ADDR_RTN_B64 : DS_Real_Renamed_gfx11<0x06e, DS_WRXCHG2_RTN_B64, "ds_storexchg_2addr_rtn_b64">; +defm DS_STOREXCHG_2ADDR_STRIDE64_RTN_B64 : DS_Real_Renamed_gfx11<0x06f, DS_WRXCHG2ST64_RTN_B64, "ds_storexchg_2addr_stride64_rtn_b64">; +defm DS_LOAD_B64 : DS_Real_Renamed_gfx11<0x076, DS_READ_B64, "ds_load_b64">; +defm DS_LOAD_2ADDR_B64 : DS_Real_Renamed_gfx11<0x077, DS_READ2_B64, "ds_load_2addr_b64">; +defm DS_LOAD_2ADDR_STRIDE64_B64 : DS_Real_Renamed_gfx11<0x078, DS_READ2ST64_B64, "ds_load_2addr_stride64_b64">; +defm DS_STORE_B8_D16_HI : DS_Real_Renamed_gfx11<0x0a0, DS_WRITE_B8_D16_HI, "ds_store_b8_d16_hi">; +defm DS_STORE_B16_D16_HI : DS_Real_Renamed_gfx11<0x0a1, DS_WRITE_B16_D16_HI, "ds_store_b16_d16_hi">; +defm DS_LOAD_U8_D16 : DS_Real_Renamed_gfx11<0x0a2, DS_READ_U8_D16, "ds_load_u8_d16">; +defm DS_LOAD_U8_D16_HI : DS_Real_Renamed_gfx11<0x0a3, DS_READ_U8_D16_HI, "ds_load_u8_d16_hi">; +defm DS_LOAD_I8_D16 : DS_Real_Renamed_gfx11<0x0a4, DS_READ_I8_D16, "ds_load_i8_d16">; +defm DS_LOAD_I8_D16_HI : DS_Real_Renamed_gfx11<0x0a5, DS_READ_I8_D16_HI, "ds_load_i8_d16_hi">; +defm DS_LOAD_U16_D16 : DS_Real_Renamed_gfx11<0x0a6, DS_READ_U16_D16, "ds_load_u16_d16">; +defm DS_LOAD_U16_D16_HI : DS_Real_Renamed_gfx11<0x0a7, DS_READ_U16_D16_HI, "ds_load_u16_d16_hi">; +defm DS_STORE_ADDTID_B32 : DS_Real_Renamed_gfx11<0x0b0, DS_WRITE_ADDTID_B32, "ds_store_addtid_b32">; +defm DS_LOAD_ADDTID_B32 : DS_Real_Renamed_gfx11<0x0b1, DS_READ_ADDTID_B32, "ds_load_addtid_b32">; +defm DS_STORE_B96 : DS_Real_Renamed_gfx11<0x0de, DS_WRITE_B96, "ds_store_b96">; +defm DS_STORE_B128 : DS_Real_Renamed_gfx11<0x0df, DS_WRITE_B128, "ds_store_b128">; +defm DS_LOAD_B96 : DS_Real_Renamed_gfx11<0x0fe, DS_READ_B96, "ds_load_b96">; +defm DS_LOAD_B128 : DS_Real_Renamed_gfx11<0x0ff, DS_READ_B128, "ds_load_b128">; + +// DS_CMPST_* are renamed to DS_CMPSTORE_* in GFX11, but also the data operands (src and cmp) are swapped +// comparing to pre-GFX11. +// Note: the mnemonic alias is not generated to avoid a potential ambiguity due to the semantics change. + +defm DS_CMPSTORE_B32 : DS_Real_gfx11<0x010>; +defm DS_CMPSTORE_F32 : DS_Real_gfx11<0x011>; +defm DS_CMPSTORE_RTN_B32 : DS_Real_gfx11<0x030>; +defm DS_CMPSTORE_RTN_F32 : DS_Real_gfx11<0x031>; +defm DS_CMPSTORE_B64 : DS_Real_gfx11<0x050>; +defm DS_CMPSTORE_F64 : DS_Real_gfx11<0x051>; +defm DS_CMPSTORE_RTN_B64 : DS_Real_gfx11<0x070>; +defm DS_CMPSTORE_RTN_F64 : DS_Real_gfx11<0x071>; + +defm DS_ADD_RTN_F32 : DS_Real_gfx11<0x079>; +defm DS_ADD_GS_REG_RTN : DS_Real_gfx11<0x07a>; +defm DS_SUB_GS_REG_RTN : DS_Real_gfx11<0x07b>; + //===----------------------------------------------------------------------===// // GFX10. //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { +let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { multiclass DS_Real_gfx10 op> { - def _gfx10 : Base_DS_Real_gfx6_gfx7_gfx10(NAME), + def _gfx10 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11(NAME), SIEncodingFamily.GFX10>; } -} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" +} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" -defm DS_ADD_F32 : DS_Real_gfx10<0x015>; defm DS_ADD_RTN_F32 : DS_Real_gfx10<0x055>; -defm DS_ADD_SRC2_F32 : DS_Real_gfx10<0x095>; defm DS_WRITE_B8_D16_HI : DS_Real_gfx10<0x0a0>; defm DS_WRITE_B16_D16_HI : DS_Real_gfx10<0x0a1>; defm DS_READ_U8_D16 : DS_Real_gfx10<0x0a2>; @@ -1020,95 +1256,118 @@ defm DS_READ_U16_D16 : DS_Real_gfx10<0x0a6>; defm DS_READ_U16_D16_HI : DS_Real_gfx10<0x0a7>; defm DS_WRITE_ADDTID_B32 : DS_Real_gfx10<0x0b0>; defm DS_READ_ADDTID_B32 : DS_Real_gfx10<0x0b1>; -defm DS_PERMUTE_B32 : DS_Real_gfx10<0x0b2>; -defm DS_BPERMUTE_B32 : DS_Real_gfx10<0x0b3>; //===----------------------------------------------------------------------===// -// GFX7, GFX10. +// GFX10, GFX11. +//===----------------------------------------------------------------------===// + +multiclass DS_Real_gfx10_gfx11 op> : + DS_Real_gfx10, DS_Real_gfx11; + +defm DS_ADD_F32 : DS_Real_gfx10_gfx11<0x015>; +defm DS_ADD_SRC2_F32 : DS_Real_gfx10<0x095>; +defm DS_PERMUTE_B32 : DS_Real_gfx10_gfx11<0x0b2>; +defm DS_BPERMUTE_B32 : DS_Real_gfx10_gfx11<0x0b3>; + +//===----------------------------------------------------------------------===// +// GFX7, GFX10, GFX11. //===----------------------------------------------------------------------===// let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { multiclass DS_Real_gfx7 op> { - def _gfx7 : Base_DS_Real_gfx6_gfx7_gfx10(NAME), + def _gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11(NAME), SIEncodingFamily.SI>; } } // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" +multiclass DS_Real_gfx7_gfx10_gfx11 op> : + DS_Real_gfx7, DS_Real_gfx10_gfx11; + multiclass DS_Real_gfx7_gfx10 op> : DS_Real_gfx7, DS_Real_gfx10; // FIXME-GFX7: Add tests when upstreaming this part. -defm DS_GWS_SEMA_RELEASE_ALL : DS_Real_gfx7_gfx10<0x018>; -defm DS_WRAP_RTN_B32 : DS_Real_gfx7_gfx10<0x034>; -defm DS_CONDXCHG32_RTN_B64 : DS_Real_gfx7_gfx10<0x07e>; +defm DS_GWS_SEMA_RELEASE_ALL : DS_Real_gfx7_gfx10_gfx11<0x018>; +defm DS_WRAP_RTN_B32 : DS_Real_gfx7_gfx10_gfx11<0x034>; +defm DS_CONDXCHG32_RTN_B64 : DS_Real_gfx7_gfx10_gfx11<0x07e>; defm DS_WRITE_B96 : DS_Real_gfx7_gfx10<0x0de>; defm DS_WRITE_B128 : DS_Real_gfx7_gfx10<0x0df>; defm DS_READ_B96 : DS_Real_gfx7_gfx10<0x0fe>; defm DS_READ_B128 : DS_Real_gfx7_gfx10<0x0ff>; //===----------------------------------------------------------------------===// -// GFX6, GFX7, GFX10. +// GFX6, GFX7, GFX10, GFX11. //===----------------------------------------------------------------------===// let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { multiclass DS_Real_gfx6_gfx7 op> { - def _gfx6_gfx7 : Base_DS_Real_gfx6_gfx7_gfx10(NAME), + def _gfx6_gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11(NAME), SIEncodingFamily.SI>; } } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" +multiclass DS_Real_gfx6_gfx7_gfx10_gfx11 op> : + DS_Real_gfx6_gfx7, DS_Real_gfx10_gfx11; + multiclass DS_Real_gfx6_gfx7_gfx10 op> : DS_Real_gfx6_gfx7, DS_Real_gfx10; -defm DS_ADD_U32 : DS_Real_gfx6_gfx7_gfx10<0x000>; -defm DS_SUB_U32 : DS_Real_gfx6_gfx7_gfx10<0x001>; -defm DS_RSUB_U32 : DS_Real_gfx6_gfx7_gfx10<0x002>; -defm DS_INC_U32 : DS_Real_gfx6_gfx7_gfx10<0x003>; -defm DS_DEC_U32 : DS_Real_gfx6_gfx7_gfx10<0x004>; -defm DS_MIN_I32 : DS_Real_gfx6_gfx7_gfx10<0x005>; -defm DS_MAX_I32 : DS_Real_gfx6_gfx7_gfx10<0x006>; -defm DS_MIN_U32 : DS_Real_gfx6_gfx7_gfx10<0x007>; -defm DS_MAX_U32 : DS_Real_gfx6_gfx7_gfx10<0x008>; -defm DS_AND_B32 : DS_Real_gfx6_gfx7_gfx10<0x009>; -defm DS_OR_B32 : DS_Real_gfx6_gfx7_gfx10<0x00a>; -defm DS_XOR_B32 : DS_Real_gfx6_gfx7_gfx10<0x00b>; -defm DS_MSKOR_B32 : DS_Real_gfx6_gfx7_gfx10<0x00c>; +defm DS_ADD_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x000>; +defm DS_SUB_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x001>; +defm DS_RSUB_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x002>; +defm DS_INC_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x003>; +defm DS_DEC_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x004>; +defm DS_MIN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x005>; +defm DS_MAX_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x006>; +defm DS_MIN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x007>; +defm DS_MAX_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x008>; +defm DS_AND_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x009>; +defm DS_OR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x00a>; +defm DS_XOR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x00b>; +defm DS_MSKOR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x00c>; + defm DS_WRITE_B32 : DS_Real_gfx6_gfx7_gfx10<0x00d>; defm DS_WRITE2_B32 : DS_Real_gfx6_gfx7_gfx10<0x00e>; defm DS_WRITE2ST64_B32 : DS_Real_gfx6_gfx7_gfx10<0x00f>; defm DS_CMPST_B32 : DS_Real_gfx6_gfx7_gfx10<0x010>; defm DS_CMPST_F32 : DS_Real_gfx6_gfx7_gfx10<0x011>; -defm DS_MIN_F32 : DS_Real_gfx6_gfx7_gfx10<0x012>; -defm DS_MAX_F32 : DS_Real_gfx6_gfx7_gfx10<0x013>; -defm DS_NOP : DS_Real_gfx6_gfx7_gfx10<0x014>; -defm DS_GWS_INIT : DS_Real_gfx6_gfx7_gfx10<0x019>; -defm DS_GWS_SEMA_V : DS_Real_gfx6_gfx7_gfx10<0x01a>; -defm DS_GWS_SEMA_BR : DS_Real_gfx6_gfx7_gfx10<0x01b>; -defm DS_GWS_SEMA_P : DS_Real_gfx6_gfx7_gfx10<0x01c>; -defm DS_GWS_BARRIER : DS_Real_gfx6_gfx7_gfx10<0x01d>; + +defm DS_MIN_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x012>; +defm DS_MAX_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x013>; +defm DS_NOP : DS_Real_gfx6_gfx7_gfx10_gfx11<0x014>; +defm DS_GWS_INIT : DS_Real_gfx6_gfx7_gfx10_gfx11<0x019>; +defm DS_GWS_SEMA_V : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01a>; +defm DS_GWS_SEMA_BR : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01b>; +defm DS_GWS_SEMA_P : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01c>; +defm DS_GWS_BARRIER : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01d>; + defm DS_WRITE_B8 : DS_Real_gfx6_gfx7_gfx10<0x01e>; defm DS_WRITE_B16 : DS_Real_gfx6_gfx7_gfx10<0x01f>; -defm DS_ADD_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x020>; -defm DS_SUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x021>; -defm DS_RSUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x022>; -defm DS_INC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x023>; -defm DS_DEC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x024>; -defm DS_MIN_RTN_I32 : DS_Real_gfx6_gfx7_gfx10<0x025>; -defm DS_MAX_RTN_I32 : DS_Real_gfx6_gfx7_gfx10<0x026>; -defm DS_MIN_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x027>; -defm DS_MAX_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x028>; -defm DS_AND_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x029>; -defm DS_OR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02a>; -defm DS_XOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02b>; -defm DS_MSKOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02c>; + +defm DS_ADD_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x020>; +defm DS_SUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x021>; +defm DS_RSUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x022>; +defm DS_INC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x023>; +defm DS_DEC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x024>; +defm DS_MIN_RTN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x025>; +defm DS_MAX_RTN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x026>; +defm DS_MIN_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x027>; +defm DS_MAX_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x028>; +defm DS_AND_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x029>; +defm DS_OR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x02a>; +defm DS_XOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x02b>; +defm DS_MSKOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x02c>; + defm DS_WRXCHG_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02d>; defm DS_WRXCHG2_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02e>; defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02f>; defm DS_CMPST_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x030>; defm DS_CMPST_RTN_F32 : DS_Real_gfx6_gfx7_gfx10<0x031>; -defm DS_MIN_RTN_F32 : DS_Real_gfx6_gfx7_gfx10<0x032>; -defm DS_MAX_RTN_F32 : DS_Real_gfx6_gfx7_gfx10<0x033>; -defm DS_SWIZZLE_B32 : DS_Real_gfx6_gfx7_gfx10<0x035>; + +defm DS_MIN_RTN_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x032>; +defm DS_MAX_RTN_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x033>; +defm DS_SWIZZLE_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x035>; + defm DS_READ_B32 : DS_Real_gfx6_gfx7_gfx10<0x036>; defm DS_READ2_B32 : DS_Real_gfx6_gfx7_gfx10<0x037>; defm DS_READ2ST64_B32 : DS_Real_gfx6_gfx7_gfx10<0x038>; @@ -1116,49 +1375,55 @@ defm DS_READ_I8 : DS_Real_gfx6_gfx7_gfx10<0x039>; defm DS_READ_U8 : DS_Real_gfx6_gfx7_gfx10<0x03a>; defm DS_READ_I16 : DS_Real_gfx6_gfx7_gfx10<0x03b>; defm DS_READ_U16 : DS_Real_gfx6_gfx7_gfx10<0x03c>; -defm DS_CONSUME : DS_Real_gfx6_gfx7_gfx10<0x03d>; -defm DS_APPEND : DS_Real_gfx6_gfx7_gfx10<0x03e>; -defm DS_ORDERED_COUNT : DS_Real_gfx6_gfx7_gfx10<0x03f>; -defm DS_ADD_U64 : DS_Real_gfx6_gfx7_gfx10<0x040>; -defm DS_SUB_U64 : DS_Real_gfx6_gfx7_gfx10<0x041>; -defm DS_RSUB_U64 : DS_Real_gfx6_gfx7_gfx10<0x042>; -defm DS_INC_U64 : DS_Real_gfx6_gfx7_gfx10<0x043>; -defm DS_DEC_U64 : DS_Real_gfx6_gfx7_gfx10<0x044>; -defm DS_MIN_I64 : DS_Real_gfx6_gfx7_gfx10<0x045>; -defm DS_MAX_I64 : DS_Real_gfx6_gfx7_gfx10<0x046>; -defm DS_MIN_U64 : DS_Real_gfx6_gfx7_gfx10<0x047>; -defm DS_MAX_U64 : DS_Real_gfx6_gfx7_gfx10<0x048>; -defm DS_AND_B64 : DS_Real_gfx6_gfx7_gfx10<0x049>; -defm DS_OR_B64 : DS_Real_gfx6_gfx7_gfx10<0x04a>; -defm DS_XOR_B64 : DS_Real_gfx6_gfx7_gfx10<0x04b>; -defm DS_MSKOR_B64 : DS_Real_gfx6_gfx7_gfx10<0x04c>; + +defm DS_CONSUME : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03d>; +defm DS_APPEND : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03e>; +defm DS_ORDERED_COUNT : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03f>; +defm DS_ADD_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x040>; +defm DS_SUB_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x041>; +defm DS_RSUB_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x042>; +defm DS_INC_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x043>; +defm DS_DEC_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x044>; +defm DS_MIN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x045>; +defm DS_MAX_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x046>; +defm DS_MIN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x047>; +defm DS_MAX_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x048>; +defm DS_AND_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x049>; +defm DS_OR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x04a>; +defm DS_XOR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x04b>; +defm DS_MSKOR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x04c>; + defm DS_WRITE_B64 : DS_Real_gfx6_gfx7_gfx10<0x04d>; defm DS_WRITE2_B64 : DS_Real_gfx6_gfx7_gfx10<0x04e>; defm DS_WRITE2ST64_B64 : DS_Real_gfx6_gfx7_gfx10<0x04f>; defm DS_CMPST_B64 : DS_Real_gfx6_gfx7_gfx10<0x050>; defm DS_CMPST_F64 : DS_Real_gfx6_gfx7_gfx10<0x051>; -defm DS_MIN_F64 : DS_Real_gfx6_gfx7_gfx10<0x052>; -defm DS_MAX_F64 : DS_Real_gfx6_gfx7_gfx10<0x053>; -defm DS_ADD_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x060>; -defm DS_SUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x061>; -defm DS_RSUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x062>; -defm DS_INC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x063>; -defm DS_DEC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x064>; -defm DS_MIN_RTN_I64 : DS_Real_gfx6_gfx7_gfx10<0x065>; -defm DS_MAX_RTN_I64 : DS_Real_gfx6_gfx7_gfx10<0x066>; -defm DS_MIN_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x067>; -defm DS_MAX_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x068>; -defm DS_AND_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x069>; -defm DS_OR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06a>; -defm DS_XOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06b>; -defm DS_MSKOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06c>; + +defm DS_MIN_F64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x052>; +defm DS_MAX_F64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x053>; +defm DS_ADD_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x060>; +defm DS_SUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x061>; +defm DS_RSUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x062>; +defm DS_INC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x063>; +defm DS_DEC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x064>; +defm DS_MIN_RTN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x065>; +defm DS_MAX_RTN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x066>; +defm DS_MIN_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x067>; +defm DS_MAX_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x068>; +defm DS_AND_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x069>; +defm DS_OR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x06a>; +defm DS_XOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x06b>; +defm DS_MSKOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x06c>; + defm DS_WRXCHG_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06d>; defm DS_WRXCHG2_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06e>; defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06f>; defm DS_CMPST_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x070>; defm DS_CMPST_RTN_F64 : DS_Real_gfx6_gfx7_gfx10<0x071>; -defm DS_MIN_RTN_F64 : DS_Real_gfx6_gfx7_gfx10<0x072>; -defm DS_MAX_RTN_F64 : DS_Real_gfx6_gfx7_gfx10<0x073>; + +defm DS_MIN_RTN_F64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x072>; +defm DS_MAX_RTN_F64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x073>; + defm DS_READ_B64 : DS_Real_gfx6_gfx7_gfx10<0x076>; defm DS_READ2_B64 : DS_Real_gfx6_gfx7_gfx10<0x077>; defm DS_READ2ST64_B64 : DS_Real_gfx6_gfx7_gfx10<0x078>; @@ -1381,3 +1646,10 @@ let SubtargetPredicate = isGFX90APlus in { def DS_ADD_F64_vi : DS_Real_vi<0x5c, DS_ADD_F64>; def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>; } // End SubtargetPredicate = isGFX90APlus + +let SubtargetPredicate = isGFX940Plus in { + def DS_PK_ADD_F16_vi : DS_Real_vi<0x17, DS_PK_ADD_F16>; + def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>; + def DS_PK_ADD_BF16_vi : DS_Real_vi<0x18, DS_PK_ADD_BF16>; + def DS_PK_ADD_RTN_BF16_vi : DS_Real_vi<0xb8, DS_PK_ADD_RTN_BF16>; +} // End SubtargetPredicate = isGFX940Plus diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index e2186d4d533e..ccaf646008b1 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -18,15 +18,20 @@ #include "Disassembler/AMDGPUDisassembler.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" +#include "SIRegisterInfo.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm-c/DisassemblerTypes.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCFixedLenDisassembler.h" -#include "llvm/MC/TargetRegistry.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" #include "llvm/Support/AMDHSAKernelDescriptor.h" using namespace llvm; @@ -70,7 +75,8 @@ static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, } static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); // Our branches take a simm16, but we need two extra bits to account for the @@ -78,13 +84,13 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, APInt SignedOffset(18, Imm * 4, true); int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue(); - if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2)) + if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0)) return MCDisassembler::Success; return addOperand(Inst, MCOperand::createImm(Imm)); } -static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { +static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); int64_t Offset; if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets. @@ -95,20 +101,19 @@ static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, return addOperand(Inst, MCOperand::createImm(Offset)); } -static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, - uint64_t Addr, const void *Decoder) { +static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeBoolReg(Val)); } -#define DECODE_OPERAND(StaticDecoderName, DecoderName) \ -static DecodeStatus StaticDecoderName(MCInst &Inst, \ - unsigned Imm, \ - uint64_t /*Addr*/, \ - const void *Decoder) { \ - auto DAsm = static_cast(Decoder); \ - return addOperand(Inst, DAsm->DecoderName(Imm)); \ -} +#define DECODE_OPERAND(StaticDecoderName, DecoderName) \ + static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \ + uint64_t /*Addr*/, \ + const MCDisassembler *Decoder) { \ + auto DAsm = static_cast(Decoder); \ + return addOperand(Inst, DAsm->DecoderName(Imm)); \ + } #define DECODE_OPERAND_REG(RegClass) \ DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) @@ -144,155 +149,151 @@ DECODE_OPERAND_REG(AReg_512) DECODE_OPERAND_REG(AReg_1024) DECODE_OPERAND_REG(AV_32) DECODE_OPERAND_REG(AV_64) +DECODE_OPERAND_REG(AV_128) +DECODE_OPERAND_REG(AVDst_128) +DECODE_OPERAND_REG(AVDst_512) -static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm)); } -static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, - unsigned Imm, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, unsigned Imm, + uint64_t Addr, + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); } -static DecodeStatus decodeOperand_VSrcV232(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_VSrcV232(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeOperand_VSrcV232(Imm)); } -static DecodeStatus decodeOperand_VS_16(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_VS_16(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm)); } -static DecodeStatus decodeOperand_VS_32(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_VS_32(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm)); } -static DecodeStatus decodeOperand_AReg_64(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_AReg_64(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm | 512)); } -static DecodeStatus decodeOperand_AReg_128(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_AReg_128(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512)); } -static DecodeStatus decodeOperand_AReg_256(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_AReg_256(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm | 512)); } -static DecodeStatus decodeOperand_AReg_512(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_AReg_512(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm | 512)); } -static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512)); } -static DecodeStatus decodeOperand_VReg_64(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_VReg_64(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm)); } -static DecodeStatus decodeOperand_VReg_128(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_VReg_128(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm)); } -static DecodeStatus decodeOperand_VReg_256(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_VReg_256(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm)); } -static DecodeStatus decodeOperand_VReg_512(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_VReg_512(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm)); } -static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm)); } static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { const auto *DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm)); } static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { + uint64_t Addr, + const MCDisassembler *Decoder) { const auto *DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm)); } -static DecodeStatus decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr, + const MCDisassembler *Decoder) { const auto *DAsm = static_cast(Decoder); return addOperand( Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true)); } -static DecodeStatus decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr, + const MCDisassembler *Decoder) { const auto *DAsm = static_cast(Decoder); return addOperand( Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true)); } +static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, + uint64_t Addr, const void *Decoder) { + const auto *DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val)); +} + static bool IsAGPROperand(const MCInst &Inst, int OpIdx, const MCRegisterInfo *MRI) { if (OpIdx < 0) @@ -307,10 +308,9 @@ static bool IsAGPROperand(const MCInst &Inst, int OpIdx, return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255; } -static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); if (!DAsm->isGFX90A()) { Imm &= 511; @@ -342,54 +342,41 @@ static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256)); } -static DecodeStatus DecodeAVLdSt_32RegisterClass(MCInst &Inst, - unsigned Imm, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, + const MCDisassembler *Decoder) { return decodeOperand_AVLdSt_Any(Inst, Imm, AMDGPUDisassembler::OPW32, Decoder); } -static DecodeStatus DecodeAVLdSt_64RegisterClass(MCInst &Inst, - unsigned Imm, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeAVLdSt_64RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, + const MCDisassembler *Decoder) { return decodeOperand_AVLdSt_Any(Inst, Imm, AMDGPUDisassembler::OPW64, Decoder); } -static DecodeStatus DecodeAVLdSt_96RegisterClass(MCInst &Inst, - unsigned Imm, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeAVLdSt_96RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, + const MCDisassembler *Decoder) { return decodeOperand_AVLdSt_Any(Inst, Imm, AMDGPUDisassembler::OPW96, Decoder); } -static DecodeStatus DecodeAVLdSt_128RegisterClass(MCInst &Inst, - unsigned Imm, - uint64_t Addr, - const void *Decoder) { +static DecodeStatus +DecodeAVLdSt_128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, + const MCDisassembler *Decoder) { return decodeOperand_AVLdSt_Any(Inst, Imm, AMDGPUDisassembler::OPW128, Decoder); } -static DecodeStatus decodeOperand_SReg_32(MCInst &Inst, - unsigned Imm, +static DecodeStatus decodeOperand_SReg_32(MCInst &Inst, unsigned Imm, uint64_t Addr, - const void *Decoder) { + const MCDisassembler *Decoder) { auto DAsm = static_cast(Decoder); return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm)); } -static DecodeStatus decodeOperand_VGPR_32(MCInst &Inst, - unsigned Imm, - uint64_t Addr, - const void *Decoder) { - auto DAsm = static_cast(Decoder); - return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW32, Imm)); -} - #define DECODE_SDWA(DecName) \ DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) @@ -410,21 +397,15 @@ template static inline T eatBytes(ArrayRef& Bytes) { return Res; } -DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table, - MCInst &MI, - uint64_t Inst, - uint64_t Address) const { - assert(MI.getOpcode() == 0); - assert(MI.getNumOperands() == 0); - MCInst TmpInst; - HasLiteral = false; - const auto SavedBytes = Bytes; - if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) { - MI = TmpInst; - return MCDisassembler::Success; - } - Bytes = SavedBytes; - return MCDisassembler::Fail; +static inline DecoderUInt128 eat12Bytes(ArrayRef &Bytes) { + assert(Bytes.size() >= 12); + uint64_t Lo = support::endian::read( + Bytes.data()); + Bytes = Bytes.slice(8); + uint64_t Hi = support::endian::read( + Bytes.data()); + Bytes = Bytes.slice(4); + return DecoderUInt128(Lo, Hi); } // The disassembler is greedy, so we need to check FI operand value to @@ -457,6 +438,29 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2 // encodings + if (isGFX11Plus() && Bytes.size() >= 12 ) { + DecoderUInt128 DecW = eat12Bytes(Bytes); + Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, + Address); + if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) + break; + MI = MCInst(); // clear + Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, + Address); + if (Res) { + if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) + convertVOP3PDPPInst(MI); + else if (AMDGPU::isVOPC64DPP(MI.getOpcode())) + convertVOPCDPPInst(MI); + break; + } + Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address); + if (Res) + break; + } + // Reinitialize Bytes + Bytes = Bytes_.slice(0, MaxInstBytesNum); + if (Bytes.size() >= 8) { const uint64_t QW = eatBytes(Bytes); @@ -475,12 +479,23 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address); if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) break; + MI = MCInst(); // clear + Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address); + if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) + break; MI = MCInst(); // clear Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address); if (Res) break; + Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address); + if (Res) { + if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) + convertVOPCDPPInst(MI); + break; + } + Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); if (Res) { IsSDWA = true; break; } @@ -535,6 +550,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address); if (Res) break; + Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address); + if (Res) break; + if (Bytes.size() < 4) break; const uint64_t QW = ((uint64_t)eatBytes(Bytes) << 32) | DW; @@ -554,6 +572,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, if (Res) break; Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address); + if (Res) break; + + Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address); + if (Res) + break; + + Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address); } while (false); if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || @@ -565,8 +590,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, MI.getOpcode() == AMDGPU::V_FMAC_F64_e64_gfx90a || MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi || MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 || + MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx11 || MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || - MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) { + MI.getOpcode() == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || + MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10 || + MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx11)) { // Insert dummy unused src2_modifiers. insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src2_modifiers); @@ -625,8 +653,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = MCDisassembler::Fail; } else { for (unsigned i = 0; i < NSAArgs; ++i) { - MI.insert(MI.begin() + VAddr0Idx + 1 + i, - decodeOperand_VGPR_32(Bytes[i])); + const unsigned VAddrIdx = VAddr0Idx + 1 + i; + auto VAddrRCID = MCII->get(MI.getOpcode()).OpInfo[VAddrIdx].RegClass; + MI.insert(MI.begin() + VAddrIdx, + createRegOperand(VAddrRCID, Bytes[i])); } Bytes = Bytes.slice(4 * NSAWords); } @@ -636,6 +666,12 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = convertMIMGInst(MI); } + if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)) + Res = convertEXPInst(MI); + + if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)) + Res = convertVINTERPInst(MI); + if (Res && IsSDWA) Res = convertSDWAInst(MI); @@ -667,6 +703,28 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Res; } +DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const { + if (STI.getFeatureBits()[AMDGPU::FeatureGFX11]) { + // The MCInst still has these fields even though they are no longer encoded + // in the GFX11 instruction. + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm); + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr); + } + return MCDisassembler::Success; +} + +DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const { + if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 || + MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 || + MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 || + MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11) { + // The MCInst has this field that is not directly encoded in the + // instruction. + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel); + } + return MCDisassembler::Success; +} + DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] || STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { @@ -692,18 +750,23 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { unsigned Opc = MI.getOpcode(); unsigned DescNumOps = MCII->get(Opc).getNumOperands(); - - // Insert dummy unused src modifiers. - if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) - insertNamedMCOperand(MI, MCOperand::createImm(0), - AMDGPU::OpName::src0_modifiers); - - if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1) - insertNamedMCOperand(MI, MCOperand::createImm(0), - AMDGPU::OpName::src1_modifiers); - + if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) { + convertVOP3PDPPInst(MI); + } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) || + AMDGPU::isVOPC64DPP(Opc)) { + convertVOPCDPPInst(MI); + } else { + // Insert dummy unused src modifiers. + if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) + insertNamedMCOperand(MI, MCOperand::createImm(0), + AMDGPU::OpName::src0_modifiers); + + if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1) + insertNamedMCOperand(MI, MCOperand::createImm(0), + AMDGPU::OpName::src1_modifiers); + } return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail; } @@ -745,7 +808,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { bool IsNSA = false; unsigned AddrSize = Info->VAddrDwords; - if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { + if (isGFX10Plus()) { unsigned DimIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim); int A16Idx = @@ -757,7 +820,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { AddrSize = AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI)); - IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA; + IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA || + Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA; if (!IsNSA) { if (AddrSize > 8) AddrSize = 16; @@ -808,9 +872,9 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { } } + // If not using NSA on GFX10+, widen address register to correct size. unsigned NewVAddr0 = AMDGPU::NoRegister; - if (STI.getFeatureBits()[AMDGPU::FeatureGFX10] && !IsNSA && - AddrSize != Info->VAddrDwords) { + if (isGFX10Plus() && !IsNSA && AddrSize != Info->VAddrDwords) { unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg(); unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0); VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0; @@ -844,11 +908,84 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { return MCDisassembler::Success; } +// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen +// decoder only adds to src_modifiers, so manually add the bits to the other +// operands. +DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const { + unsigned Opc = MI.getOpcode(); + unsigned DescNumOps = MCII->get(Opc).getNumOperands(); + + if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in); + + const int ModOps[] = {AMDGPU::OpName::src0_modifiers, + AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers}; + unsigned OpSel = 0; + unsigned OpSelHi = 0; + unsigned NegLo = 0; + unsigned NegHi = 0; + for (int J = 0; J < 3; ++J) { + int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); + if (OpIdx == -1) + break; + unsigned Val = MI.getOperand(OpIdx).getImm(); + + OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J; + OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J; + NegLo |= !!(Val & SISrcMods::NEG) << J; + NegHi |= !!(Val & SISrcMods::NEG_HI) << J; + } + + if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) + insertNamedMCOperand(MI, MCOperand::createImm(OpSel), + AMDGPU::OpName::op_sel); + if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi) != -1) + insertNamedMCOperand(MI, MCOperand::createImm(OpSelHi), + AMDGPU::OpName::op_sel_hi); + if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo) != -1) + insertNamedMCOperand(MI, MCOperand::createImm(NegLo), + AMDGPU::OpName::neg_lo); + if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi) != -1) + insertNamedMCOperand(MI, MCOperand::createImm(NegHi), + AMDGPU::OpName::neg_hi); + + return MCDisassembler::Success; +} + +// Create dummy old operand and insert optional operands +DecodeStatus AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const { + unsigned Opc = MI.getOpcode(); + unsigned DescNumOps = MCII->get(Opc).getNumOperands(); + + if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old) != -1) + insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old); + + if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) + insertNamedMCOperand(MI, MCOperand::createImm(0), + AMDGPU::OpName::src0_modifiers); + + if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1) + insertNamedMCOperand(MI, MCOperand::createImm(0), + AMDGPU::OpName::src1_modifiers); + return MCDisassembler::Success; +} + DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const { assert(HasLiteral && "Should have decoded a literal"); const MCInstrDesc &Desc = MCII->get(MI.getOpcode()); unsigned DescNumOps = Desc.getNumOperands(); + insertNamedMCOperand(MI, MCOperand::createImm(Literal), + AMDGPU::OpName::immDeferred); assert(DescNumOps == MI.getNumOperands()); for (unsigned I = 0; I < DescNumOps; ++I) { auto &Op = MI.getOperand(I); @@ -1001,6 +1138,22 @@ MCOperand AMDGPUDisassembler::decodeOperand_AV_64(unsigned Val) const { return decodeSrcOp(OPW64, Val); } +MCOperand AMDGPUDisassembler::decodeOperand_AV_128(unsigned Val) const { + return decodeSrcOp(OPW128, Val); +} + +MCOperand AMDGPUDisassembler::decodeOperand_AVDst_128(unsigned Val) const { + using namespace AMDGPU::EncValues; + assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1. + return decodeSrcOp(OPW128, Val | IS_VGPR); +} + +MCOperand AMDGPUDisassembler::decodeOperand_AVDst_512(unsigned Val) const { + using namespace AMDGPU::EncValues; + assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1. + return decodeSrcOp(OPW512, Val | IS_VGPR); +} + MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { return createRegOperand(AMDGPU::VReg_64RegClassID, Val); } @@ -1075,6 +1228,9 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const { MCOperand AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { if (HasLiteral) { + assert( + AMDGPU::hasVOPD(STI) && + "Should only decode multiple kimm with VOPD, check VSrc operand types"); if (Literal != Val) return errOperand(Val, "More than one unique literal is illegal"); } @@ -1367,6 +1523,20 @@ MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) c llvm_unreachable("unknown dst register"); } +// Bit 0 of DstY isn't stored in the instruction, because it's always the +// opposite of bit 0 of DstX. +MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst, + unsigned Val) const { + int VDstXInd = + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX); + assert(VDstXInd != -1); + assert(Inst.getOperand(VDstXInd).isReg()); + unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg()); + Val |= ~XDstReg & 1; + auto Width = llvm::AMDGPUDisassembler::OPW32; + return createRegOperand(getVgprClassId(Width), Val); +} + MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { using namespace AMDGPU; @@ -1381,8 +1551,10 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { case 109: return createRegOperand(TBA_HI); case 110: return createRegOperand(TMA_LO); case 111: return createRegOperand(TMA_HI); - case 124: return createRegOperand(M0); - case 125: return createRegOperand(SGPR_NULL); + case 124: + return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0); + case 125: + return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL); case 126: return createRegOperand(EXEC_LO); case 127: return createRegOperand(EXEC_HI); case 235: return createRegOperand(SRC_SHARED_BASE); @@ -1408,7 +1580,14 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { case 106: return createRegOperand(VCC); case 108: return createRegOperand(TBA); case 110: return createRegOperand(TMA); - case 125: return createRegOperand(SGPR_NULL); + case 124: + if (isGFX11Plus()) + return createRegOperand(SGPR_NULL); + break; + case 125: + if (!isGFX11Plus()) + return createRegOperand(SGPR_NULL); + break; case 126: return createRegOperand(EXEC); case 235: return createRegOperand(SRC_SHARED_BASE); case 236: return createRegOperand(SRC_SHARED_LIMIT); @@ -1522,6 +1701,15 @@ bool AMDGPUDisassembler::isGFX10Plus() const { return AMDGPU::isGFX10Plus(STI); } +bool AMDGPUDisassembler::isGFX11() const { + return STI.getFeatureBits()[AMDGPU::FeatureGFX11]; +} + +bool AMDGPUDisassembler::isGFX11Plus() const { + return AMDGPU::isGFX11Plus(STI); +} + + bool AMDGPUDisassembler::hasArchitectedFlatScratch() const { return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; } @@ -1888,10 +2076,10 @@ AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, //===----------------------------------------------------------------------===// // Try to find symbol name for specified label -bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst, - raw_ostream &/*cStream*/, int64_t Value, - uint64_t /*Address*/, bool IsBranch, - uint64_t /*Offset*/, uint64_t /*InstSize*/) { +bool AMDGPUSymbolizer::tryAddingSymbolicOperand( + MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value, + uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/, + uint64_t /*OpSize*/, uint64_t /*InstSize*/) { if (!IsBranch) { return false; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index eea6074d5281..31869f0917ae 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -15,8 +15,10 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H #define LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H +#include "llvm/ADT/APInt.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/DataExtractor.h" #include @@ -27,6 +29,60 @@ class MCOperand; class MCSubtargetInfo; class Twine; +// Exposes an interface expected by autogenerated code in +// FixedLenDecoderEmitter +class DecoderUInt128 { +private: + uint64_t Lo = 0; + uint64_t Hi = 0; + +public: + DecoderUInt128() = default; + DecoderUInt128(uint64_t Lo, uint64_t Hi = 0) : Lo(Lo), Hi(Hi) {} + operator bool() const { return Lo || Hi; } + void insertBits(uint64_t SubBits, unsigned BitPosition, unsigned NumBits) { + assert(NumBits && NumBits <= 64); + assert(SubBits >> 1 >> (NumBits - 1) == 0); + assert(BitPosition < 128); + if (BitPosition < 64) { + Lo |= SubBits << BitPosition; + Hi |= SubBits >> 1 >> (63 - BitPosition); + } else { + Hi |= SubBits << (BitPosition - 64); + } + } + uint64_t extractBitsAsZExtValue(unsigned NumBits, + unsigned BitPosition) const { + assert(NumBits && NumBits <= 64); + assert(BitPosition < 128); + uint64_t Val; + if (BitPosition < 64) + Val = Lo >> BitPosition | Hi << 1 << (63 - BitPosition); + else + Val = Hi >> (BitPosition - 64); + return Val & ((uint64_t(2) << (NumBits - 1)) - 1); + } + DecoderUInt128 operator&(const DecoderUInt128 &RHS) const { + return DecoderUInt128(Lo & RHS.Lo, Hi & RHS.Hi); + } + DecoderUInt128 operator&(const uint64_t &RHS) const { + return *this & DecoderUInt128(RHS); + } + DecoderUInt128 operator~() const { return DecoderUInt128(~Lo, ~Hi); } + bool operator==(const DecoderUInt128 &RHS) { + return Lo == RHS.Lo && Hi == RHS.Hi; + } + bool operator!=(const DecoderUInt128 &RHS) { + return Lo != RHS.Lo || Hi != RHS.Hi; + } + bool operator!=(const int &RHS) { + return *this != DecoderUInt128(RHS); + } + friend raw_ostream &operator<<(raw_ostream &OS, const DecoderUInt128 &RHS) { + return OS << APInt(128, {RHS.Lo, RHS.Hi}); + } +}; + //===----------------------------------------------------------------------===// // AMDGPUDisassembler //===----------------------------------------------------------------------===// @@ -57,8 +113,21 @@ public: MCOperand errOperand(unsigned V, const Twine& ErrMsg) const; - DecodeStatus tryDecodeInst(const uint8_t* Table, MCInst &MI, uint64_t Inst, - uint64_t Address) const; + template + DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, + uint64_t Address) const { + assert(MI.getOpcode() == 0); + assert(MI.getNumOperands() == 0); + MCInst TmpInst; + HasLiteral = false; + const auto SavedBytes = Bytes; + if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) { + MI = TmpInst; + return MCDisassembler::Success; + } + Bytes = SavedBytes; + return MCDisassembler::Fail; + } Optional onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, @@ -87,10 +156,14 @@ public: DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const; + DecodeStatus convertEXPInst(MCInst &MI) const; + DecodeStatus convertVINTERPInst(MCInst &MI) const; DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const; DecodeStatus convertSDWAInst(MCInst &MI) const; DecodeStatus convertDPP8Inst(MCInst &MI) const; DecodeStatus convertMIMGInst(MCInst &MI) const; + DecodeStatus convertVOP3PDPPInst(MCInst &MI) const; + DecodeStatus convertVOPCDPPInst(MCInst &MI) const; MCOperand decodeOperand_VGPR_32(unsigned Val) const; MCOperand decodeOperand_VRegOrLds_32(unsigned Val) const; @@ -127,6 +200,9 @@ public: MCOperand decodeOperand_AReg_1024(unsigned Val) const; MCOperand decodeOperand_AV_32(unsigned Val) const; MCOperand decodeOperand_AV_64(unsigned Val) const; + MCOperand decodeOperand_AV_128(unsigned Val) const; + MCOperand decodeOperand_AVDst_128(unsigned Val) const; + MCOperand decodeOperand_AVDst_512(unsigned Val) const; enum OpWidthTy { OPW32, @@ -157,6 +233,7 @@ public: MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral = false) const; MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const; + MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const; MCOperand decodeSpecialReg32(unsigned Val) const; MCOperand decodeSpecialReg64(unsigned Val) const; @@ -177,6 +254,8 @@ public: bool isGFX9Plus() const; bool isGFX10() const; bool isGFX10Plus() const; + bool isGFX11() const; + bool isGFX11Plus() const; bool hasArchitectedFlatScratch() const; }; @@ -196,8 +275,8 @@ public: : MCSymbolizer(Ctx, std::move(RelInfo)), DisInfo(disInfo) {} bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, - int64_t Value, uint64_t Address, - bool IsBranch, uint64_t Offset, + int64_t Value, uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override; void tryAddingPcLoadReferenceComment(raw_ostream &cStream, diff --git a/llvm/lib/Target/AMDGPU/EXPInstructions.td b/llvm/lib/Target/AMDGPU/EXPInstructions.td index b3b55ddd2c97..14ba01f0d67c 100644 --- a/llvm/lib/Target/AMDGPU/EXPInstructions.td +++ b/llvm/lib/Target/AMDGPU/EXPInstructions.td @@ -10,7 +10,7 @@ // EXP classes //===----------------------------------------------------------------------===// -class EXPCommon : InstSI< +class EXPCommon : InstSI< (outs), (ins exp_tgt:$tgt, ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3, @@ -21,21 +21,30 @@ class EXPCommon : InstSI< let mayLoad = done; let mayStore = 1; let UseNamedOperandTable = 1; - let Uses = [EXEC]; + let Uses = !if(row, [EXEC, M0], [EXEC]); let SchedRW = [WriteExport]; let DisableWQM = 1; } -class EXP_Pseudo : EXPCommon, - SIMCInstr { +class EXP_Pseudo + : EXPCommon, SIMCInstr { let isPseudo = 1; let isCodeGenOnly = 1; } -class EXP_Real - : EXPCommon, - SIMCInstr { +// Real instruction with optional asm operands "compr" and "vm". +class EXP_Real_ComprVM + : EXPCommon<0, done, "exp$tgt $src0, $src1, $src2, $src3" + #!if(done, " done", "")#"$compr$vm">, + SIMCInstr { + let AsmMatchConverter = "cvtExp"; +} + +// Real instruction with optional asm operand "row_en". +class EXP_Real_Row + : EXPCommon, + SIMCInstr { let AsmMatchConverter = "cvtExp"; } @@ -43,17 +52,21 @@ class EXP_Real // EXP Instructions //===----------------------------------------------------------------------===// -// Split EXP instruction into EXP and EXP_DONE so we can set -// mayLoad for done=1. -def EXP : EXP_Pseudo<0>; -def EXP_DONE : EXP_Pseudo<1>; +// DONE variants have mayLoad = 1. +// ROW variants have an implicit use of M0. +let SubtargetPredicate = isNotGFX90APlus in { +def EXP : EXP_Pseudo<0, 0>; +def EXP_DONE : EXP_Pseudo<0, 1>; +def EXP_ROW : EXP_Pseudo<1, 0>; +def EXP_ROW_DONE : EXP_Pseudo<1, 1>; +} // let SubtargetPredicate = isNotGFX90APlus //===----------------------------------------------------------------------===// // SI //===----------------------------------------------------------------------===// class EXP_Real_si - : EXP_Real<_done, pseudo, SIEncodingFamily.SI>, EXPe { + : EXP_Real_ComprVM<_done, pseudo, SIEncodingFamily.SI>, EXPe_ComprVM { let AssemblerPredicate = isGFX6GFX7; let DecoderNamespace = "GFX6GFX7"; let done = _done; @@ -67,8 +80,9 @@ def EXP_DONE_si : EXP_Real_si<1, "EXP_DONE">; //===----------------------------------------------------------------------===// class EXP_Real_vi - : EXP_Real<_done, pseudo, SIEncodingFamily.VI>, EXPe_vi { + : EXP_Real_ComprVM<_done, pseudo, SIEncodingFamily.VI>, EXPe_vi { let AssemblerPredicate = isGFX8GFX9; + let SubtargetPredicate = isNotGFX90APlus; let DecoderNamespace = "GFX8"; let done = _done; } @@ -77,12 +91,12 @@ def EXP_vi : EXP_Real_vi<0, "EXP">; def EXP_DONE_vi : EXP_Real_vi<1, "EXP_DONE">; //===----------------------------------------------------------------------===// -// GFX10+ +// GFX10 //===----------------------------------------------------------------------===// class EXP_Real_gfx10 - : EXP_Real<_done, pseudo, SIEncodingFamily.GFX10>, EXPe { - let AssemblerPredicate = isGFX10Plus; + : EXP_Real_ComprVM<_done, pseudo, SIEncodingFamily.GFX10>, EXPe_ComprVM { + let AssemblerPredicate = isGFX10Only; let DecoderNamespace = "GFX10"; let done = _done; } @@ -90,6 +104,23 @@ class EXP_Real_gfx10 def EXP_gfx10 : EXP_Real_gfx10<0, "EXP">; def EXP_DONE_gfx10 : EXP_Real_gfx10<1, "EXP_DONE">; +//===----------------------------------------------------------------------===// +// GFX11+ +//===----------------------------------------------------------------------===// + +class EXP_Real_gfx11 + : EXP_Real_Row<_row, _done, pseudo, SIEncodingFamily.GFX11>, EXPe_Row { + let AssemblerPredicate = isGFX11Plus; + let DecoderNamespace = "GFX11"; + let row = _row; + let done = _done; +} + +def EXP_gfx11 : EXP_Real_gfx11<0, 0, "EXP">; +def EXP_DONE_gfx11 : EXP_Real_gfx11<0, 1, "EXP_DONE">; +def EXP_ROW_gfx11 : EXP_Real_gfx11<1, 0, "EXP_ROW">; +def EXP_ROW_DONE_gfx11 : EXP_Real_gfx11<1, 1, "EXP_ROW_DONE">; + //===----------------------------------------------------------------------===// // EXP Patterns //===----------------------------------------------------------------------===// @@ -103,6 +134,15 @@ class ExpPattern : GCNPat< ExpSrc2:$src2, ExpSrc3:$src3, timm:$vm, 0, timm:$en) >; +class ExpRowPattern : GCNPat< + (int_amdgcn_exp_row timm:$tgt, timm:$en, + (vt ExpSrc0:$src0), (vt ExpSrc1:$src1), + (vt ExpSrc2:$src2), (vt ExpSrc3:$src3), + done_val, M0), + (Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1, + ExpSrc2:$src2, ExpSrc3:$src3, 0, 0, timm:$en) +>; + class ExpComprPattern : GCNPat< (int_amdgcn_exp_compr timm:$tgt, timm:$en, (vt ExpSrc0:$src0), (vt ExpSrc1:$src1), @@ -119,6 +159,11 @@ def : ExpPattern; def : ExpPattern; def : ExpPattern; +def : ExpRowPattern; +def : ExpRowPattern; +def : ExpRowPattern; +def : ExpRowPattern; + def : ExpComprPattern; def : ExpComprPattern; def : ExpComprPattern; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index c530d3cb49f0..cb2822818549 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -12,6 +12,7 @@ def ScratchOffset : ComplexPattern; def ScratchSAddr : ComplexPattern; +def ScratchSVAddr : ComplexPattern; //===----------------------------------------------------------------------===// // FLAT classes @@ -56,6 +57,9 @@ class FLAT_Pseudo dlcValue = 0; bits<1> has_sccb = 1; bits<1> sccbValue = 0; + bits<1> has_sve = 0; // Scratch VGPR Enable + bits<1> lds = 0; + bits<1> sve = 0; let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); @@ -74,8 +78,8 @@ class FLAT_Pseudo op, FLAT_Pseudo ps> : - InstSI , +class FLAT_Real op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : + InstSI , Enc64 { let isPseudo = 0; @@ -96,6 +100,7 @@ class FLAT_Real op, FLAT_Pseudo ps> : let IsAtomicNoRet = ps.IsAtomicNoRet; let VM_CNT = ps.VM_CNT; let LGKM_CNT = ps.LGKM_CNT; + let VALU = ps.VALU; // encoding fields bits<8> vaddr; @@ -106,7 +111,7 @@ class FLAT_Real op, FLAT_Pseudo ps> : bits<5> cpol; // Only valid on gfx9 - bits<1> lds = 0; // XXX - What does this actually do? + bits<1> lds = ps.lds; // LDS DMA for global and scratch // Segment, 00=flat, 01=scratch, 10=global, 11=reserved bits<2> seg = !if(ps.is_flat_global, 0b10, @@ -123,7 +128,7 @@ class FLAT_Real op, FLAT_Pseudo ps> : // Only valid on GFX9+ let Inst{12-0} = offset; - let Inst{13} = lds; + let Inst{13} = !if(ps.has_sve, ps.sve, lds); let Inst{15-14} = seg; let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); @@ -240,6 +245,35 @@ multiclass FLAT_Global_Store_Pseudo { } } +class FLAT_Global_Load_LDS_Pseudo : FLAT_Pseudo< + opName, + (outs ), + !con( + !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)), + (ins flat_offset:$offset, CPol_0:$cpol)), + " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> { + let LGKM_CNT = 1; + let is_flat_global = 1; + let lds = 1; + let has_data = 0; + let has_vdst = 0; + let mayLoad = 1; + let mayStore = 1; + let has_saddr = 1; + let enabled_saddr = EnableSaddr; + let VALU = 1; + let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); + let Uses = [M0, EXEC]; + let SchedRW = [WriteVMEM, WriteLDS]; +} + +multiclass FLAT_Global_Load_LDS_Pseudo { + def "" : FLAT_Global_Load_LDS_Pseudo, + GlobalSaddrTable<0, opName>; + def _SADDR : FLAT_Global_Load_LDS_Pseudo, + GlobalSaddrTable<1, opName>; +} + class FLAT_Global_Store_AddTid_Pseudo : FLAT_Pseudo< opName, @@ -273,16 +307,19 @@ class FlatScratchInst { class FLAT_Scratch_Load_Pseudo + bit EnableSVE = 0, + bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo< opName, (outs getLdStRegisterOperand.ret:$vdst), !con( - !if(EnableSaddr, - (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), - !if(EnableVaddr, - (ins VGPR_32:$vaddr, flat_offset:$offset), - (ins flat_offset:$offset))), + !if(EnableSVE, + (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), + !if(EnableSaddr, + (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), + !if(EnableVaddr, + (ins VGPR_32:$vaddr, flat_offset:$offset), + (ins flat_offset:$offset)))), !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand.ret:$vdst_in), (ins CPol_0:$cpol))), " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { @@ -291,7 +328,9 @@ class FLAT_Scratch_Load_Pseudo .ret> : FLAT_Pseudo< opName, (outs), - !if(EnableSaddr, - (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), - !if(EnableVaddr, - (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol), - (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol))), + !if(EnableSVE, + (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), + !if(EnableSaddr, + (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), + !if(EnableVaddr, + (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol), + (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))), " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { let mayLoad = 0; let mayStore = 1; @@ -315,7 +357,9 @@ class FLAT_Scratch_Store_Pseudo , FlatScratchInst; + let SubtargetPredicate = HasFlatScratchSVSMode in + def _SVS : FLAT_Scratch_Load_Pseudo, + FlatScratchInst; + let SubtargetPredicate = HasFlatScratchSTMode in - def _ST : FLAT_Scratch_Load_Pseudo, + def _ST : FLAT_Scratch_Load_Pseudo, FlatScratchInst; } } @@ -339,12 +387,59 @@ multiclass FLAT_Scratch_Store_Pseudo { def _SADDR : FLAT_Scratch_Store_Pseudo, FlatScratchInst; + let SubtargetPredicate = HasFlatScratchSVSMode in + def _SVS : FLAT_Scratch_Store_Pseudo, + FlatScratchInst; + let SubtargetPredicate = HasFlatScratchSTMode in - def _ST : FLAT_Scratch_Store_Pseudo, + def _ST : FLAT_Scratch_Store_Pseudo, FlatScratchInst; } } +class FLAT_Scratch_Load_LDS_Pseudo : FLAT_Pseudo< + opName, + (outs ), + !if(EnableSVE, + (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), + !if(EnableSaddr, + (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), + !if(EnableVaddr, + (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol), + (ins flat_offset:$offset, CPol:$cpol)))), + " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { + + let LGKM_CNT = 1; + let is_flat_scratch = 1; + let lds = 1; + let has_data = 0; + let has_vdst = 0; + let mayLoad = 1; + let mayStore = 1; + let has_saddr = 1; + let enabled_saddr = EnableSaddr; + let has_vaddr = EnableVaddr; + let has_sve = EnableSVE; + let sve = EnableVaddr; + let VALU = 1; + let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); + let Uses = [M0, EXEC]; + let SchedRW = [WriteVMEM, WriteLDS]; +} + +multiclass FLAT_Scratch_Load_LDS_Pseudo { + def "" : FLAT_Scratch_Load_LDS_Pseudo, + FlatScratchInst; + def _SADDR : FLAT_Scratch_Load_LDS_Pseudo, + FlatScratchInst; + def _SVS : FLAT_Scratch_Load_LDS_Pseudo, + FlatScratchInst; + def _ST : FLAT_Scratch_Load_LDS_Pseudo, + FlatScratchInst; +} + class FLAT_AtomicNoRet_Pseudo pattern = []> : FLAT_Pseudo { @@ -375,7 +470,6 @@ multiclass FLAT_Atomic_Pseudo< string opName, RegisterClass vdst_rc, ValueType vt, - SDPatternOperator atomic = null_frag, ValueType data_vt = vt, RegisterClass data_rc = vdst_rc, bit isFP = isFloatType.ret, @@ -394,11 +488,9 @@ multiclass FLAT_Atomic_Pseudo< def _RTN : FLAT_AtomicRet_Pseudo .ret:$vdst), (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), - " $vdst, $vaddr, $vdata$offset$cpol", - [(set vt:$vdst, - (atomic (FlatOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>, - GlobalSaddrTable<0, opName#"_rtn">, - AtomicNoRet { + " $vdst, $vaddr, $vdata$offset$cpol">, + GlobalSaddrTable<0, opName#"_rtn">, + AtomicNoRet { let FPAtomic = isFP; let AddedComplexity = -1; // Prefer global atomics if available } @@ -441,7 +533,6 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN< string opName, RegisterClass vdst_rc, ValueType vt, - SDPatternOperator atomic = null_frag, ValueType data_vt = vt, RegisterClass data_rc = vdst_rc, bit isFP = isFloatType.ret, @@ -451,11 +542,9 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN< def _RTN : FLAT_AtomicRet_Pseudo , - GlobalSaddrTable<0, opName#"_rtn">, - AtomicNoRet { + " $vdst, $vaddr, $vdata, off$offset$cpol">, + GlobalSaddrTable<0, opName#"_rtn">, + AtomicNoRet { let has_saddr = 1; let FPAtomic = isFP; } @@ -477,12 +566,11 @@ multiclass FLAT_Global_Atomic_Pseudo< string opName, RegisterClass vdst_rc, ValueType vt, - SDPatternOperator atomic_rtn = null_frag, ValueType data_vt = vt, RegisterClass data_rc = vdst_rc> { let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN; - defm "" : FLAT_Global_Atomic_Pseudo_RTN; + defm "" : FLAT_Global_Atomic_Pseudo_RTN; } } @@ -519,99 +607,88 @@ def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR } defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", - VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32, - v2i32, VReg_64>; + VGPR_32, i32, v2i32, VReg_64>; defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", - VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64, - v2i64, VReg_128>; + VReg_64, i64, v2i64, VReg_128>; defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", - VGPR_32, i32, atomic_swap_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", - VReg_64, i64, atomic_swap_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", - VGPR_32, i32, atomic_load_add_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", - VGPR_32, i32, atomic_load_sub_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", - VGPR_32, i32, atomic_load_min_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", - VGPR_32, i32, atomic_load_umin_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", - VGPR_32, i32, atomic_load_max_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", - VGPR_32, i32, atomic_load_umax_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", - VGPR_32, i32, atomic_load_and_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", - VGPR_32, i32, atomic_load_or_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", - VGPR_32, i32, atomic_load_xor_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", - VGPR_32, i32, atomic_inc_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", - VGPR_32, i32, atomic_dec_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", - VReg_64, i64, atomic_load_add_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", - VReg_64, i64, atomic_load_sub_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", - VReg_64, i64, atomic_load_min_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", - VReg_64, i64, atomic_load_umin_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", - VReg_64, i64, atomic_load_max_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", - VReg_64, i64, atomic_load_umax_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", - VReg_64, i64, atomic_load_and_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", - VReg_64, i64, atomic_load_or_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", - VReg_64, i64, atomic_load_xor_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", - VReg_64, i64, atomic_inc_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", - VReg_64, i64, atomic_dec_flat_64>; + VReg_64, i64>; // GFX7-, GFX10-only flat instructions. let SubtargetPredicate = isGFX7GFX10 in { -defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", - VGPR_32, f32, null_frag, v2f32, VReg_64>; - defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", - VReg_64, f64, null_frag, v2f64, VReg_128>; - -defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", - VGPR_32, f32>; - -defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", - VGPR_32, f32>; + VReg_64, f64, v2f64, VReg_128>; defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", VReg_64, f64>; @@ -622,14 +699,39 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", } // End SubtargetPredicate = isGFX7GFX10 let SubtargetPredicate = isGFX90APlus in { - defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64, int_amdgcn_flat_atomic_fadd>; - defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64, int_amdgcn_flat_atomic_fmin>; - defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64, int_amdgcn_flat_atomic_fmax>; - defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd>; - defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64, int_amdgcn_global_atomic_fmin>; - defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax>; + defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>; + defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>; + defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>; + defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>; + defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>; + defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>; } // End SubtargetPredicate = isGFX90APlus +let SubtargetPredicate = isGFX940Plus in { + defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>; + defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>; + defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>; +} // End SubtargetPredicate = isGFX940Plus + +// GFX7-, GFX10-, GFX11-only flat instructions. +let SubtargetPredicate = isGFX7GFX10GFX11 in { + +defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", + VGPR_32, f32, v2f32, VReg_64>; + +defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", + VGPR_32, f32>; + +defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", + VGPR_32, f32>; + +} // End SubtargetPredicate = isGFX7GFX10GFX11 + +// GFX940-, GFX11-only flat instructions. +let SubtargetPredicate = isGFX940GFX11Plus in { + defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>; +} // End SubtargetPredicate = isGFX940GFX11Plus + defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; @@ -662,88 +764,93 @@ defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d let is_flat_global = 1 in { defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", - VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, - v2i32, VReg_64>; + VGPR_32, i32, v2i32, VReg_64>; defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", - VReg_64, i64, AMDGPUatomic_cmp_swap_global_64, - v2i64, VReg_128>; + VReg_64, i64, v2i64, VReg_128>; defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", - VGPR_32, i32, atomic_swap_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", - VReg_64, i64, atomic_swap_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", - VGPR_32, i32, atomic_load_add_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", - VGPR_32, i32, atomic_load_sub_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", - VGPR_32, i32, atomic_load_min_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", - VGPR_32, i32, atomic_load_umin_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", - VGPR_32, i32, atomic_load_max_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", - VGPR_32, i32, atomic_load_umax_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", - VGPR_32, i32, atomic_load_and_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", - VGPR_32, i32, atomic_load_or_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", - VGPR_32, i32, atomic_load_xor_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", - VGPR_32, i32, atomic_inc_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", - VGPR_32, i32, atomic_dec_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", - VReg_64, i64, atomic_load_add_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", - VReg_64, i64, atomic_load_sub_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", - VReg_64, i64, atomic_load_min_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", - VReg_64, i64, atomic_load_umin_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", - VReg_64, i64, atomic_load_max_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", - VReg_64, i64, atomic_load_umax_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", - VReg_64, i64, atomic_load_and_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", - VReg_64, i64, atomic_load_or_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", - VReg_64, i64, atomic_load_xor_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", - VReg_64, i64, atomic_inc_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", - VReg_64, i64, atomic_dec_global_64>; + VReg_64, i64>; let SubtargetPredicate = HasGFX10_BEncoding in defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub", - VGPR_32, i32, int_amdgcn_global_atomic_csub>; + VGPR_32, i32>; + +defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">; +defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">; +defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">; +defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; +defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; + } // End is_flat_global = 1 @@ -775,41 +882,46 @@ defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; +defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">; +defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">; +defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">; +defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">; +defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">; + } // End SubtargetPredicate = HasFlatScratchInsts let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { defm GLOBAL_ATOMIC_FCMPSWAP : - FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>; + FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>; defm GLOBAL_ATOMIC_FMIN : - FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32, int_amdgcn_global_atomic_fmin>; + FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; defm GLOBAL_ATOMIC_FMAX : - FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32, int_amdgcn_global_atomic_fmax>; + FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; defm GLOBAL_ATOMIC_FCMPSWAP_X2 : - FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, null_frag, v2f64, VReg_128>; + FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>; defm GLOBAL_ATOMIC_FMIN_X2 : - FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64, int_amdgcn_global_atomic_fmin>; + FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; defm GLOBAL_ATOMIC_FMAX_X2 : - FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64, int_amdgcn_global_atomic_fmax>; + FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>; } // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 let is_flat_global = 1 in { -let OtherPredicates = [HasAtomicFaddInsts] in { +let OtherPredicates = [HasAtomicFaddNoRtnInsts] in defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < "global_atomic_add_f32", VGPR_32, f32 >; +let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < "global_atomic_pk_add_f16", VGPR_32, v2f16 >; -} // End OtherPredicates = [HasAtomicFaddInsts] - -let OtherPredicates = [isGFX90APlus] in { +let OtherPredicates = [HasAtomicFaddRtnInsts] in defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < - "global_atomic_add_f32", VGPR_32, f32, int_amdgcn_global_atomic_fadd + "global_atomic_add_f32", VGPR_32, f32 >; +let OtherPredicates = [isGFX90APlus] in defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < - "global_atomic_pk_add_f16", VGPR_32, v2f16, int_amdgcn_global_atomic_fadd + "global_atomic_pk_add_f16", VGPR_32, v2f16 >; -} // End OtherPredicates = [isGFX90APlus] } // End is_flat_global = 1 //===----------------------------------------------------------------------===// @@ -896,24 +1008,47 @@ class FlatStoreSignedAtomicPat .ret:$data, $offset) >; -class FlatAtomicPat : GCNPat < - (vt (node (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)), - (inst $vaddr, $data, $offset) ->; - class FlatAtomicPatNoRtn : GCNPat < (node (FlatOffset i64:$vaddr, i16:$offset), vt:$data), (inst VReg_64:$vaddr, getVregSrcForVT.ret:$data, $offset) >; +multiclass FlatAtomicPat { + defvar rtnNode = !cast(node#"_ret_"#vt.Size); + defvar noRtnNode = !cast(node#"_noret_"#vt.Size); + + def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)), + (!cast(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT.ret:$data, $offset)>; + + def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)), + (!cast(inst) VReg_64:$vaddr, getVregSrcForVT.ret:$data, $offset)>; +} + +multiclass FlatSignedAtomicPat { + defvar rtnNode = !cast(node # "_ret" # !if(isIntr, "", "_" # vt.Size)); + defvar noRtnNode = !cast(node # "_noret" # !if(isIntr, "", "_" # vt.Size)); + + def : GCNPat <(vt (rtnNode (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)), + (!cast(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT.ret:$data, $offset)>; + + def : GCNPat <(vt (noRtnNode (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)), + (!cast(inst) VReg_64:$vaddr, getVregSrcForVT.ret:$data, $offset)>; +} + +multiclass FlatSignedAtomicIntrPat { + defm : FlatSignedAtomicPat; +} + class FlatSignedAtomicPatNoRtn : GCNPat < (node (GlobalOffset i64:$vaddr, i16:$offset), vt:$data), (inst VReg_64:$vaddr, getVregSrcForVT.ret:$data, $offset) >; -class FlatSignedAtomicPat : GCNPat < +class FlatSignedAtomicPatRtn : GCNPat < (vt (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)), (inst VReg_64:$vaddr, getVregSrcForVT.ret:$data, $offset) >; @@ -949,8 +1084,28 @@ class ScratchStoreSaddrPat .ret:$data, $saddr, $offset) >; +class ScratchLoadSVaddrPat : GCNPat < + (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset))), + (inst $vaddr, $saddr, $offset, 0) +>; + +class ScratchStoreSVaddrPat : GCNPat < + (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset)), + (inst getVregSrcForVT.ret:$data, $vaddr, $saddr, $offset) +>; + +class ScratchLoadSVaddrPat_D16 : GCNPat < + (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset), vt:$in)), + (inst $vaddr, $saddr, $offset, 0, $in) +>; + let OtherPredicates = [HasFlatAddressSpace] in { +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; @@ -986,44 +1141,52 @@ def : FlatLoadPat ; def : FlatStorePat ; } -def : FlatStoreAtomicPat ; -def : FlatStoreAtomicPat ; - -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; - -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; +def : FlatStoreAtomicPat ; +def : FlatStoreAtomicPat ; +def : FlatStoreAtomicPat ; +def : FlatStoreAtomicPat ; +def : FlatStoreAtomicPat ; +def : FlatStoreAtomicPat ; + +foreach as = [ "flat", "global" ] in { +defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_inc_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_dec_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>; + +defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_inc_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_dec_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>; +} // end foreach as def : FlatStorePat ; def : FlatStorePat ; -let OtherPredicates = [D16PreservesUnusedBits] in { +let OtherPredicates = [HasD16LoadStore] in { def : FlatStorePat ; def : FlatStorePat ; +} +let OtherPredicates = [D16PreservesUnusedBits] in { def : FlatLoadPat_D16 ; def : FlatLoadPat_D16 ; def : FlatLoadPat_D16 ; @@ -1084,9 +1247,9 @@ multiclass GlobalFLATAtomicStorePats { - def : FlatSignedAtomicPat (nortn_inst_name#"_RTN"), node, vt, data_vt> { +multiclass GlobalFLATAtomicPatsRtn { + def : FlatSignedAtomicPatRtn (nortn_inst_name#"_RTN"), node, vt, data_vt> { let AddedComplexity = 10; } @@ -1095,6 +1258,26 @@ multiclass GlobalFLATAtomicPats { + defvar rtnNode = !cast(node # "_ret" # !if(isIntr, "", "_" # vt.Size)); + defvar noRtnNode = !cast(node # "_noret" # !if(isIntr, "", "_" # vt.Size)); + + let AddedComplexity = 10 in { + defm : FlatSignedAtomicPat ; + } + + let AddedComplexity = 11 in { + def : GlobalAtomicSaddrPat(inst#"_SADDR"), noRtnNode, vt, data_vt>; + def : GlobalAtomicSaddrPat(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>; + } +} + +multiclass GlobalFLATAtomicIntrPats { + defm : GlobalFLATAtomicPats; +} + multiclass GlobalFLATNoRtnAtomicPats { def : FlatSignedAtomicPatNoRtn { @@ -1114,6 +1297,11 @@ multiclass ScratchFLATLoadPats(!cast(inst)#"_SADDR"), node, vt> { let AddedComplexity = 26; } + + def : ScratchLoadSVaddrPat(!cast(inst)#"_SVS"), node, vt> { + let SubtargetPredicate = HasFlatScratchSVSMode; + let AddedComplexity = 27; + } } multiclass ScratchFLATStorePats(!cast(inst)#"_SADDR"), node, vt> { let AddedComplexity = 26; } + + def : ScratchStoreSVaddrPat(!cast(inst)#"_SVS"), node, vt> { + let SubtargetPredicate = HasFlatScratchSVSMode; + let AddedComplexity = 27; + } } multiclass ScratchFLATLoadPats_D16 { @@ -1135,10 +1328,19 @@ multiclass ScratchFLATLoadPats_D16(!cast(inst)#"_SADDR"), node, vt> { let AddedComplexity = 26; } + + def : ScratchLoadSVaddrPat_D16 (!cast(inst)#"_SVS"), node, vt> { + let SubtargetPredicate = HasFlatScratchSVSMode; + let AddedComplexity = 27; + } } let OtherPredicates = [HasFlatGlobalInsts] in { +defm : GlobalFLATLoadPats ; +defm : GlobalFLATLoadPats ; +defm : GlobalFLATLoadPats ; +defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; @@ -1179,10 +1381,12 @@ defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; -let OtherPredicates = [D16PreservesUnusedBits] in { +let OtherPredicates = [HasD16LoadStore] in { defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; +} +let OtherPredicates = [D16PreservesUnusedBits] in { defm : GlobalFLATLoadPats_D16 ; defm : GlobalFLATLoadPats_D16 ; defm : GlobalFLATLoadPats_D16 ; @@ -1198,59 +1402,84 @@ defm : GlobalFLATLoadPats_D16 defm : GlobalFLATLoadPats_D16 ; } -defm : GlobalFLATAtomicStorePats ; -defm : GlobalFLATAtomicStorePats ; - -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", atomic_load_add_global_32, i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", atomic_load_sub_global_32, i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", atomic_inc_global_32, i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", atomic_dec_global_32, i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", atomic_load_and_global_32, i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", atomic_load_max_global_32, i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", atomic_load_umax_global_32, i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", atomic_load_min_global_32, i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", atomic_load_umin_global_32, i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", atomic_load_or_global_32, i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", atomic_swap_global_32, i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", AMDGPUatomic_cmp_swap_global_32, i32, v2i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", atomic_load_xor_global_32, i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CSUB", int_amdgcn_global_atomic_csub, i32>; - -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", atomic_load_add_global_64, i64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", atomic_load_sub_global_64, i64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", atomic_inc_global_64, i64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", atomic_dec_global_64, i64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", atomic_load_and_global_64, i64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", atomic_load_max_global_64, i64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", atomic_load_umax_global_64, i64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", atomic_load_min_global_64, i64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", atomic_load_umin_global_64, i64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", atomic_load_or_global_64, i64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", atomic_swap_global_64, i64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", AMDGPUatomic_cmp_swap_global_64, i64, v2i64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", atomic_load_xor_global_64, i64>; +defm : GlobalFLATAtomicStorePats ; +defm : GlobalFLATAtomicStorePats ; +defm : GlobalFLATAtomicStorePats ; +defm : GlobalFLATAtomicStorePats ; +defm : GlobalFLATAtomicStorePats ; +defm : GlobalFLATAtomicStorePats ; + +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_inc_global", i32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_dec_global", i32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", "atomic_load_min_global", i32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", "atomic_load_umin_global", i32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", "atomic_load_or_global", i32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", "atomic_swap_global", i32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_global", i32, v2i32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>; +defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", int_amdgcn_global_atomic_csub, i32>; + +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_inc_global", i64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_dec_global", i64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", "atomic_load_min_global", i64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", "atomic_load_umin_global", i64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", "atomic_load_or_global", i64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>; let OtherPredicates = [isGFX10Plus] in { -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", atomic_load_fmin_global_32, f32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", atomic_load_fmax_global_32, f32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", atomic_load_fmin_global_64, f64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", atomic_load_fmax_global_64, f64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", "atomic_load_fmin_global", f64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", "atomic_load_fmax_global", f64>; +defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>; +defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>; +defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN_X2", "int_amdgcn_global_atomic_fmin", f64>; +defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>; } -let OtherPredicates = [HasAtomicFaddInsts] in { +let OtherPredicates = [HasAtomicFaddNoRtnInsts] in defm : GlobalFLATNoRtnAtomicPats ; +let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in defm : GlobalFLATNoRtnAtomicPats ; -} let OtherPredicates = [isGFX90APlus] in { -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F32", atomic_load_fadd_global_32, f32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", atomic_load_fadd_v2f16_global_32, v2f16>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", atomic_load_fadd_global_64, f64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", atomic_load_fmin_global_64, f64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", atomic_load_fmax_global_64, f64>; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_v2f16_global", v2f16>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>; +defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", f32>; +defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", f64>; +defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", v2f16>; +defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>; +defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>; +defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>; +defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>; +defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>; +defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", f64>; +defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>; +defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>; +} + +let OtherPredicates = [isGFX940Plus] in { +defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>; +defm : FlatSignedAtomicPat <"FLAT_ATOMIC_PK_ADD_F16", "atomic_load_fadd_v2f16_flat", v2f16>; +defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", f32>; +defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", v2f16>; +defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>; +defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>; } } // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 @@ -1291,10 +1520,12 @@ defm : ScratchFLATStorePats ; defm : ScratchFLATStorePats ; defm : ScratchFLATStorePats ; -let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in { +let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in { defm : ScratchFLATStorePats ; defm : ScratchFLATStorePats ; +} +let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in { defm : ScratchFLATLoadPats_D16 ; defm : ScratchFLATLoadPats_D16 ; defm : ScratchFLATLoadPats_D16 ; @@ -1405,6 +1636,57 @@ multiclass FLAT_Real_AllAddr_vi op, def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR"), has_sccb>; } +class FLAT_Real_gfx940 op, FLAT_Pseudo ps> : + FLAT_Real , + SIMCInstr { + let AssemblerPredicate = isGFX940Plus; + let DecoderNamespace = "GFX9"; + let Inst{13} = ps.sve; + let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); +} + +multiclass FLAT_Real_AllAddr_SVE_vi op> { + def _vi : FLAT_Real_vi(NAME)> { + let AssemblerPredicate = isGFX8GFX9NotGFX940; + let OtherPredicates = [isGFX8GFX9NotGFX940]; + } + def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR")> { + let DecoderNamespace = "GFX9"; + } + let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in { + def _VE_gfx940 : FLAT_Real_gfx940(NAME)>; + def _SVS_gfx940 : FLAT_Real_gfx940(NAME#"_SVS")>; + def _ST_gfx940 : FLAT_Real_gfx940(NAME#"_ST")>; + } +} + +multiclass FLAT_Real_AllAddr_LDS op, bits<7> pre_gfx940_op, + string pre_gfx940_name = !subst("_lds", "", !cast(NAME).PseudoInstr), + bit has_sccb = !cast(NAME).has_sccb> { + + let OtherPredicates = [isGFX8GFX9NotGFX940] in { + def _vi : FLAT_Real_vi(NAME), has_sccb> { + let AsmString = pre_gfx940_name # !cast(NAME).AsmOperands # " lds"; + } + def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR"), has_sccb> { + let AsmString = pre_gfx940_name # !cast(NAME#"_SADDR").AsmOperands # " lds"; + } + } + + let SubtargetPredicate = isGFX940Plus in { + def _gfx940 : FLAT_Real_gfx940(NAME)>; + def _SADDR_gfx940 : FLAT_Real_gfx940(NAME#"_SADDR")>; + } +} + +multiclass FLAT_Real_AllAddr_SVE_LDS op, bits<7> pre_gfx940_op> { + defm "" : FLAT_Real_AllAddr_LDS; + let SubtargetPredicate = isGFX940Plus in { + def _SVS_gfx940 : FLAT_Real_gfx940(NAME#"_SVS")>; + def _ST_gfx940 : FLAT_Real_gfx940(NAME#"_ST")>; + } +} + def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; @@ -1496,6 +1778,11 @@ defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; +defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS <0x026, 0x10>; +defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS <0x027, 0x11>; +defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>; +defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>; +defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>; defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; @@ -1524,32 +1811,39 @@ defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; -defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; -defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; -defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; -defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; -defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; -defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; -defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; -defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; -defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; -defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; -defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; -defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; -defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; -defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; -defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; -defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; -defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; -defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; -defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; -defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; -defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; -defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; - -let SubtargetPredicate = HasAtomicFaddInsts in { -defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; -defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; +defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_SVE_LDS <0x026, 0x10>; +defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_SVE_LDS <0x027, 0x11>; +defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_LDS <0x028, 0x12>; +defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_LDS <0x029, 0x13>; +defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_AllAddr_SVE_LDS <0x02a, 0x14>; + +defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x10>; +defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x11>; +defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_SVE_vi <0x12>; +defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x13>; +defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_SVE_vi <0x14>; +defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x15>; +defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x16>; +defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x17>; +defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_SVE_vi <0x18>; +defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x19>; +defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x20>; +defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x21>; +defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x22>; +defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x23>; +defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_SVE_vi <0x24>; +defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x25>; +defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_SVE_vi <0x1a>; +defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>; +defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_SVE_vi <0x1c>; +defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x1d>; +defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>; +defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>; + +let SubtargetPredicate = isGFX8GFX9NotGFX940 in { + // These instructions are encoded differently on gfx90* and gfx940. + defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; + defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; } let SubtargetPredicate = isGFX90AOnly in { @@ -1561,13 +1855,46 @@ let SubtargetPredicate = isGFX90AOnly in { defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>; } // End SubtargetPredicate = isGFX90AOnly +multiclass FLAT_Real_AllAddr_gfx940 op> { + def _gfx940 : FLAT_Real_gfx940(NAME)>; + def _SADDR_gfx940 : FLAT_Real_gfx940(NAME#"_SADDR")>; +} + +multiclass FLAT_Real_Atomics_gfx940 op, FLAT_Pseudo ps> { + def _gfx940 : FLAT_Real_gfx940(ps.PseudoInstr)>; + def _RTN_gfx940 : FLAT_Real_gfx940(ps.PseudoInstr # "_RTN")>; +} + +multiclass FLAT_Global_Real_Atomics_gfx940 op> : + FLAT_Real_AllAddr_gfx940 { + def _RTN_gfx940 : FLAT_Real_gfx940 (NAME#"_RTN")>; + def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 (NAME#"_SADDR_RTN")>; +} + +let SubtargetPredicate = isGFX940Plus in { + // These instructions are encoded differently on gfx90* and gfx940. + defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>; + defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>; + + defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f, FLAT_ATOMIC_ADD_F64>; + defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50, FLAT_ATOMIC_MIN_F64>; + defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51, FLAT_ATOMIC_MAX_F64>; + defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>; + defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>; + defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>; + defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d, FLAT_ATOMIC_ADD_F32>; + defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e, FLAT_ATOMIC_PK_ADD_F16>; + defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52, FLAT_ATOMIC_PK_ADD_BF16>; + defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; +} // End SubtargetPredicate = isGFX940Plus + //===----------------------------------------------------------------------===// // GFX10. //===----------------------------------------------------------------------===// class FLAT_Real_gfx10 op, FLAT_Pseudo ps> : FLAT_Real, SIMCInstr { - let AssemblerPredicate = isGFX10Plus; + let AssemblerPredicate = isGFX10Only; let DecoderNamespace = "GFX10"; let Inst{11-0} = offset{11-0}; @@ -1627,6 +1954,23 @@ multiclass FLAT_Real_ScratchAllAddr_gfx10 op> : FLAT_Real_SADDR_gfx10, FLAT_Real_ST_gfx10; +multiclass FLAT_Real_AllAddr_LDS_gfx10 op, + string opname = !subst("_lds", "", !cast(NAME).PseudoInstr)> { + let AsmString = opname # !cast(NAME).AsmOperands # " lds" in + defm "" : FLAT_Real_Base_gfx10; + + let AsmString = opname # !cast(NAME#"_SADDR").AsmOperands # " lds" in + defm "" : FLAT_Real_SADDR_gfx10; +} + +multiclass FLAT_Real_ScratchAllAddr_LDS_gfx10 op, + string opname = !subst("_lds", "", !cast(NAME).PseudoInstr)> { + defm "" : FLAT_Real_AllAddr_LDS_gfx10; + + let AsmString = opname # !cast(NAME#"_ST").AsmOperands # " lds" in + defm "" : FLAT_Real_ST_gfx10; +} + // ENC_FLAT. defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; @@ -1743,6 +2087,12 @@ defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>; defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>; +defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x008>; +defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x009>; +defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00a>; +defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00b>; +defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS_gfx10 <0x00c>; + // ENC_FLAT_SCRATCH. defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>; defm SCRATCH_LOAD_SBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x009>; @@ -1766,3 +2116,219 @@ defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x022>; defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>; defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>; defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>; + +defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x008>; +defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>; +defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>; +defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>; +defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>; + +//===----------------------------------------------------------------------===// +// GFX11 +//===----------------------------------------------------------------------===// + +class FLAT_Real_gfx11 op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : + FLAT_Real , + SIMCInstr { + let AssemblerPredicate = isGFX11Plus; + let DecoderNamespace = "GFX11"; + + let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); + let Inst{15} = cpol{CPolBit.SLC}; + let Inst{17-16} = seg; + let Inst{55} = ps.sve; +} + +multiclass FLAT_Real_Base_gfx11 op, string ps, string opName, int renamed = false> { + def _gfx11 : FLAT_Real_gfx11(ps), opName> { + let Inst{54-48} = !cast(SGPR_NULL_gfx11plus.HWEncoding); + } + if renamed then + def _renamed_gfx11 : MnemonicAlias(ps).Mnemonic, opName>, Requires<[isGFX11Plus]>; +} + +multiclass FLAT_Real_RTN_gfx11 op, string ps, string opName> { + def _RTN_gfx11 : FLAT_Real_gfx11(ps#"_RTN"), opName> { + let Inst{54-48} = !cast(SGPR_NULL_gfx11plus.HWEncoding); + } +} + +multiclass FLAT_Real_SADDR_gfx11 op, string ps, string opName> { + def _SADDR_gfx11 : FLAT_Real_gfx11(ps#"_SADDR"), opName>; +} + +multiclass FLAT_Real_SADDR_RTN_gfx11 op, string ps, string opName> { + def _SADDR_RTN_gfx11 : FLAT_Real_gfx11(ps#"_SADDR_RTN"), opName>; +} + +multiclass FLAT_Real_ST_gfx11 op, string ps, string opName> { + def _ST_gfx11 : FLAT_Real_gfx11(ps#"_ST"), opName> { + let Inst{54-48} = !cast(SGPR_NULL_gfx11plus.HWEncoding); + let OtherPredicates = [HasFlatScratchSTMode]; + } +} + +multiclass FLAT_Real_SVS_gfx11 op, string ps, string opName> { + def _SVS_gfx11 : FLAT_Real_gfx11(ps#"_SVS"), opName> { + let OtherPredicates = [HasFlatScratchSVSMode]; + } +} + +multiclass FLAT_Real_AllAddr_gfx11 op, string ps, string opName, int renamed = false> : + FLAT_Real_Base_gfx11, + FLAT_Real_SADDR_gfx11; + +multiclass FLAT_Real_Atomics_gfx11 op, string ps, string opName, int renamed = false> : + FLAT_Real_Base_gfx11, + FLAT_Real_RTN_gfx11; + +multiclass FLAT_Real_GlblAtomics_gfx11 op, string ps, string opName, int renamed = false> : + FLAT_Real_AllAddr_gfx11, + FLAT_Real_RTN_gfx11, + FLAT_Real_SADDR_RTN_gfx11; + +multiclass FLAT_Real_GlblAtomics_RTN_gfx11 op, string ps, string opName> : + FLAT_Real_RTN_gfx11, + FLAT_Real_SADDR_RTN_gfx11; + +multiclass FLAT_Real_ScratchAllAddr_gfx11 op, string ps, string opName, int renamed = false> : + FLAT_Real_Base_gfx11, + FLAT_Real_SADDR_gfx11, + FLAT_Real_ST_gfx11, + FLAT_Real_SVS_gfx11; + +// ENC_FLAT. +defm FLAT_LOAD_U8 : FLAT_Real_Base_gfx11<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>; +defm FLAT_LOAD_I8 : FLAT_Real_Base_gfx11<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>; +defm FLAT_LOAD_U16 : FLAT_Real_Base_gfx11<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>; +defm FLAT_LOAD_I16 : FLAT_Real_Base_gfx11<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>; +defm FLAT_LOAD_B32 : FLAT_Real_Base_gfx11<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>; +defm FLAT_LOAD_B64 : FLAT_Real_Base_gfx11<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>; +defm FLAT_LOAD_B96 : FLAT_Real_Base_gfx11<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>; +defm FLAT_LOAD_B128 : FLAT_Real_Base_gfx11<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>; +defm FLAT_STORE_B8 : FLAT_Real_Base_gfx11<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>; +defm FLAT_STORE_B16 : FLAT_Real_Base_gfx11<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>; +defm FLAT_STORE_B32 : FLAT_Real_Base_gfx11<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>; +defm FLAT_STORE_B64 : FLAT_Real_Base_gfx11<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>; +defm FLAT_STORE_B96 : FLAT_Real_Base_gfx11<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>; +defm FLAT_STORE_B128 : FLAT_Real_Base_gfx11<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>; +defm FLAT_LOAD_D16_U8 : FLAT_Real_Base_gfx11<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">; +defm FLAT_LOAD_D16_I8 : FLAT_Real_Base_gfx11<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">; +defm FLAT_LOAD_D16_B16 : FLAT_Real_Base_gfx11<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">; +defm FLAT_LOAD_D16_HI_U8 : FLAT_Real_Base_gfx11<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">; +defm FLAT_LOAD_D16_HI_I8 : FLAT_Real_Base_gfx11<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">; +defm FLAT_LOAD_D16_HI_B16 : FLAT_Real_Base_gfx11<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">; +defm FLAT_STORE_D16_HI_B8 : FLAT_Real_Base_gfx11<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">; +defm FLAT_STORE_D16_HI_B16 : FLAT_Real_Base_gfx11<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">; +defm FLAT_ATOMIC_SWAP_B32 : FLAT_Real_Atomics_gfx11<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>; +defm FLAT_ATOMIC_CMPSWAP_B32 : FLAT_Real_Atomics_gfx11<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>; +defm FLAT_ATOMIC_ADD_U32 : FLAT_Real_Atomics_gfx11<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>; +defm FLAT_ATOMIC_SUB_U32 : FLAT_Real_Atomics_gfx11<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>; +defm FLAT_ATOMIC_MIN_I32 : FLAT_Real_Atomics_gfx11<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>; +defm FLAT_ATOMIC_MIN_U32 : FLAT_Real_Atomics_gfx11<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>; +defm FLAT_ATOMIC_MAX_I32 : FLAT_Real_Atomics_gfx11<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>; +defm FLAT_ATOMIC_MAX_U32 : FLAT_Real_Atomics_gfx11<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>; +defm FLAT_ATOMIC_AND_B32 : FLAT_Real_Atomics_gfx11<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>; +defm FLAT_ATOMIC_OR_B32 : FLAT_Real_Atomics_gfx11<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>; +defm FLAT_ATOMIC_XOR_B32 : FLAT_Real_Atomics_gfx11<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>; +defm FLAT_ATOMIC_INC_U32 : FLAT_Real_Atomics_gfx11<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>; +defm FLAT_ATOMIC_DEC_U32 : FLAT_Real_Atomics_gfx11<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>; +defm FLAT_ATOMIC_SWAP_B64 : FLAT_Real_Atomics_gfx11<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>; +defm FLAT_ATOMIC_CMPSWAP_B64 : FLAT_Real_Atomics_gfx11<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>; +defm FLAT_ATOMIC_ADD_U64 : FLAT_Real_Atomics_gfx11<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>; +defm FLAT_ATOMIC_SUB_U64 : FLAT_Real_Atomics_gfx11<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>; +defm FLAT_ATOMIC_MIN_I64 : FLAT_Real_Atomics_gfx11<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>; +defm FLAT_ATOMIC_MIN_U64 : FLAT_Real_Atomics_gfx11<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>; +defm FLAT_ATOMIC_MAX_I64 : FLAT_Real_Atomics_gfx11<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>; +defm FLAT_ATOMIC_MAX_U64 : FLAT_Real_Atomics_gfx11<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>; +defm FLAT_ATOMIC_AND_B64 : FLAT_Real_Atomics_gfx11<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>; +defm FLAT_ATOMIC_OR_B64 : FLAT_Real_Atomics_gfx11<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>; +defm FLAT_ATOMIC_XOR_B64 : FLAT_Real_Atomics_gfx11<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>; +defm FLAT_ATOMIC_INC_U64 : FLAT_Real_Atomics_gfx11<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>; +defm FLAT_ATOMIC_DEC_U64 : FLAT_Real_Atomics_gfx11<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>; +defm FLAT_ATOMIC_CMPSWAP_F32 : FLAT_Real_Atomics_gfx11<0x050, "FLAT_ATOMIC_FCMPSWAP", "flat_atomic_cmpswap_f32">; +defm FLAT_ATOMIC_MIN_F32 : FLAT_Real_Atomics_gfx11<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_f32">; +defm FLAT_ATOMIC_MAX_F32 : FLAT_Real_Atomics_gfx11<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_f32">; +defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_gfx11<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">; + +// ENC_FLAT_GLBL. +defm GLOBAL_LOAD_U8 : FLAT_Real_AllAddr_gfx11<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>; +defm GLOBAL_LOAD_I8 : FLAT_Real_AllAddr_gfx11<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>; +defm GLOBAL_LOAD_U16 : FLAT_Real_AllAddr_gfx11<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>; +defm GLOBAL_LOAD_I16 : FLAT_Real_AllAddr_gfx11<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>; +defm GLOBAL_LOAD_B32 : FLAT_Real_AllAddr_gfx11<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>; +defm GLOBAL_LOAD_B64 : FLAT_Real_AllAddr_gfx11<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>; +defm GLOBAL_LOAD_B96 : FLAT_Real_AllAddr_gfx11<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>; +defm GLOBAL_LOAD_B128 : FLAT_Real_AllAddr_gfx11<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>; +defm GLOBAL_STORE_B8 : FLAT_Real_AllAddr_gfx11<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>; +defm GLOBAL_STORE_B16 : FLAT_Real_AllAddr_gfx11<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>; +defm GLOBAL_STORE_B32 : FLAT_Real_AllAddr_gfx11<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>; +defm GLOBAL_STORE_B64 : FLAT_Real_AllAddr_gfx11<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>; +defm GLOBAL_STORE_B96 : FLAT_Real_AllAddr_gfx11<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>; +defm GLOBAL_STORE_B128 : FLAT_Real_AllAddr_gfx11<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>; +defm GLOBAL_LOAD_D16_U8 : FLAT_Real_AllAddr_gfx11<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">; +defm GLOBAL_LOAD_D16_I8 : FLAT_Real_AllAddr_gfx11<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">; +defm GLOBAL_LOAD_D16_B16 : FLAT_Real_AllAddr_gfx11<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">; +defm GLOBAL_LOAD_D16_HI_U8 : FLAT_Real_AllAddr_gfx11<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">; +defm GLOBAL_LOAD_D16_HI_I8 : FLAT_Real_AllAddr_gfx11<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">; +defm GLOBAL_LOAD_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">; +defm GLOBAL_STORE_D16_HI_B8 : FLAT_Real_AllAddr_gfx11<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">; +defm GLOBAL_STORE_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">; +defm GLOBAL_LOAD_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">; +defm GLOBAL_STORE_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">; +defm GLOBAL_ATOMIC_SWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>; +defm GLOBAL_ATOMIC_CMPSWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>; +defm GLOBAL_ATOMIC_ADD_U32 : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>; +defm GLOBAL_ATOMIC_SUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>; +defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_RTN_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32">; +defm GLOBAL_ATOMIC_MIN_I32 : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>; +defm GLOBAL_ATOMIC_MIN_U32 : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>; +defm GLOBAL_ATOMIC_MAX_I32 : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>; +defm GLOBAL_ATOMIC_MAX_U32 : FLAT_Real_GlblAtomics_gfx11<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>; +defm GLOBAL_ATOMIC_AND_B32 : FLAT_Real_GlblAtomics_gfx11<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>; +defm GLOBAL_ATOMIC_OR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>; +defm GLOBAL_ATOMIC_XOR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>; +defm GLOBAL_ATOMIC_INC_U32 : FLAT_Real_GlblAtomics_gfx11<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>; +defm GLOBAL_ATOMIC_DEC_U32 : FLAT_Real_GlblAtomics_gfx11<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>; +defm GLOBAL_ATOMIC_SWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>; +defm GLOBAL_ATOMIC_CMPSWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>; +defm GLOBAL_ATOMIC_ADD_U64 : FLAT_Real_GlblAtomics_gfx11<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>; +defm GLOBAL_ATOMIC_SUB_U64 : FLAT_Real_GlblAtomics_gfx11<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>; +defm GLOBAL_ATOMIC_MIN_I64 : FLAT_Real_GlblAtomics_gfx11<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>; +defm GLOBAL_ATOMIC_MIN_U64 : FLAT_Real_GlblAtomics_gfx11<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>; +defm GLOBAL_ATOMIC_MAX_I64 : FLAT_Real_GlblAtomics_gfx11<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>; +defm GLOBAL_ATOMIC_MAX_U64 : FLAT_Real_GlblAtomics_gfx11<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>; +defm GLOBAL_ATOMIC_AND_B64 : FLAT_Real_GlblAtomics_gfx11<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>; +defm GLOBAL_ATOMIC_OR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>; +defm GLOBAL_ATOMIC_XOR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>; +defm GLOBAL_ATOMIC_INC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>; +defm GLOBAL_ATOMIC_DEC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>; +defm GLOBAL_ATOMIC_CMPSWAP_F32 : FLAT_Real_GlblAtomics_gfx11<0x050, "GLOBAL_ATOMIC_FCMPSWAP", "global_atomic_cmpswap_f32">; +defm GLOBAL_ATOMIC_MIN_F32 : FLAT_Real_GlblAtomics_gfx11<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_f32">; +defm GLOBAL_ATOMIC_MAX_F32 : FLAT_Real_GlblAtomics_gfx11<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_f32">; +defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_GlblAtomics_gfx11<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">; + +// ENC_FLAT_SCRATCH. +defm SCRATCH_LOAD_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>; +defm SCRATCH_LOAD_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>; +defm SCRATCH_LOAD_U16 : FLAT_Real_ScratchAllAddr_gfx11<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>; +defm SCRATCH_LOAD_I16 : FLAT_Real_ScratchAllAddr_gfx11<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>; +defm SCRATCH_LOAD_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>; +defm SCRATCH_LOAD_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>; +defm SCRATCH_LOAD_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>; +defm SCRATCH_LOAD_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>; +defm SCRATCH_STORE_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>; +defm SCRATCH_STORE_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>; +defm SCRATCH_STORE_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>; +defm SCRATCH_STORE_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>; +defm SCRATCH_STORE_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>; +defm SCRATCH_STORE_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>; +defm SCRATCH_LOAD_D16_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">; +defm SCRATCH_LOAD_D16_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">; +defm SCRATCH_LOAD_D16_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">; +defm SCRATCH_LOAD_D16_HI_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">; +defm SCRATCH_LOAD_D16_HI_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">; +defm SCRATCH_LOAD_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">; +defm SCRATCH_STORE_D16_HI_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">; +defm SCRATCH_STORE_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">; diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index a8c85ec4e5ea..1cd880eaa48e 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -167,7 +167,9 @@ MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const { return nullptr; case AMDGPU::COPY: case AMDGPU::V_MOV_B32_e32: - case AMDGPU::V_MOV_B64_PSEUDO: { + case AMDGPU::V_MOV_B64_PSEUDO: + case AMDGPU::V_MOV_B64_e32: + case AMDGPU::V_MOV_B64_e64: { auto &Op1 = Def->getOperand(1); if (Op1.isImm()) return &Op1; @@ -183,6 +185,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, bool CombBCZ, bool IsShrinkable) const { assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp || + MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp || MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); auto OrigOp = OrigMI.getOpcode(); @@ -383,6 +386,7 @@ bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName, bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp || + MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp || MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI); @@ -399,7 +403,8 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { return false; } - if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) { + if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO || + MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) { auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl); assert(DppCtrl && DppCtrl->isImm()); if (!AMDGPU::isLegal64BitDPPControl(DppCtrl->getImm())) { @@ -447,12 +452,6 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { return false; } - if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) { - LLVM_DEBUG(dbgs() << - " failed: old reg def and mov should be in the same BB\n"); - return false; - } - if (OldOpndValue->getImm() == 0) { if (MaskAllLanes) { assert(!BoundCtrlZero); // by check [1] @@ -616,7 +615,8 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) { if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) { Changed = true; ++NumDPPMovsCombined; - } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) { + } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO || + MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) { if (ST->has64BitDPP() && combineDPPMov(MI)) { Changed = true; ++NumDPPMovsCombined; diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index c0592f6f3c7a..b6d16009e776 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -13,14 +13,38 @@ #include "GCNHazardRecognizer.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/TargetParser.h" using namespace llvm; +namespace { + +struct MFMAPaddingRatioParser : public cl::parser { + MFMAPaddingRatioParser(cl::Option &O) : cl::parser(O) {} + + bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, unsigned &Value) { + if (Arg.getAsInteger(0, Value)) + return O.error("'" + Arg + "' value invalid for uint argument!"); + + if (Value > 100) + return O.error("'" + Arg + "' value must be in the range [0, 100]!"); + + return false; + } +}; + +} // end anonymous namespace + +static cl::opt + MFMAPaddingRatio("amdgpu-mfma-padding-ratio", cl::init(0), cl::Hidden, + cl::desc("Fill a percentage of the latency between " + "neighboring MFMA with s_nops.")); + //===----------------------------------------------------------------------===// -// Hazard Recoginizer Implementation +// Hazard Recognizer Implementation //===----------------------------------------------------------------------===// static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF, @@ -92,12 +116,7 @@ static bool isSMovRel(unsigned Opcode) { } static bool isDGEMM(unsigned Opcode) { - return Opcode == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || - Opcode == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64 || - Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_e64 || - Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64 || - Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64 || - Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64; + return AMDGPU::getMAIIsDGEMM(Opcode); } static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) { @@ -109,7 +128,10 @@ static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) { Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64) return false; - return true; + if (!ST.hasGFX940Insts()) + return true; + + return AMDGPU::getMAIIsGFX940XDL(Opcode); } static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, @@ -144,6 +166,11 @@ static bool isPermlane(const MachineInstr &MI) { Opcode == AMDGPU::V_PERMLANEX16_B32_e64; } +static bool isLdsDma(const MachineInstr &MI) { + return SIInstrInfo::isVALU(MI) && + (SIInstrInfo::isMUBUF(MI) || SIInstrInfo::isFLAT(MI)); +} + static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, AMDGPU::OpName::simm16); @@ -204,12 +231,12 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0) return HazardType; - if (ST.hasReadM0MovRelInterpHazard() && - (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) && - checkReadM0Hazards(MI) > 0) - return HazardType; - - if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) && + if (((ST.hasReadM0MovRelInterpHazard() && + (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) || + (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) || + (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) || + (ST.hasReadM0LdsDirectHazard() && + MI->readsRegister(AMDGPU::LDS_DIRECT))) && checkReadM0Hazards(MI) > 0) return HazardType; @@ -237,6 +264,14 @@ static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII, } } +unsigned +GCNHazardRecognizer::getMFMAPipelineWaitStates(const MachineInstr &MI) const { + const MCSchedClassDesc *SC = TSchedModel.resolveSchedClass(&MI); + assert(TSchedModel.getWriteProcResBegin(SC) != + TSchedModel.getWriteProcResEnd(SC)); + return TSchedModel.getWriteProcResBegin(SC)->Cycles; +} + void GCNHazardRecognizer::processBundle() { MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator()); MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end(); @@ -321,11 +356,11 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) { if (isRFE(MI->getOpcode())) return std::max(WaitStates, checkRFEHazards(MI)); - if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) || - isSMovRel(MI->getOpcode()))) - return std::max(WaitStates, checkReadM0Hazards(MI)); - - if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) + if ((ST.hasReadM0MovRelInterpHazard() && + (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) || + (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) || + (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) || + (ST.hasReadM0LdsDirectHazard() && MI->readsRegister(AMDGPU::LDS_DIRECT))) return std::max(WaitStates, checkReadM0Hazards(MI)); if (SIInstrInfo::isMAI(*MI)) @@ -389,16 +424,61 @@ void GCNHazardRecognizer::RecedeCycle() { // Helper Functions //===----------------------------------------------------------------------===// +typedef enum { HazardFound, HazardExpired, NoHazardFound } HazardFnResult; + typedef function_ref IsExpiredFn; +typedef function_ref GetNumWaitStatesFn; + +// Search for a hazard in a block and its predecessors. +template +static bool +hasHazard(StateT State, + function_ref IsHazard, + function_ref UpdateState, + const MachineBasicBlock *MBB, + MachineBasicBlock::const_reverse_instr_iterator I, + DenseSet &Visited) { + for (auto E = MBB->instr_rend(); I != E; ++I) { + // No need to look at parent BUNDLE instructions. + if (I->isBundle()) + continue; + + switch (IsHazard(State, *I)) { + case HazardFound: + return true; + case HazardExpired: + return false; + default: + // Continue search + break; + } + + if (I->isInlineAsm() || I->isMetaInstruction()) + continue; + + UpdateState(State, *I); + } + + for (MachineBasicBlock *Pred : MBB->predecessors()) { + if (!Visited.insert(Pred).second) + continue; + + if (hasHazard(State, IsHazard, UpdateState, Pred, Pred->instr_rbegin(), + Visited)) + return true; + } + + return false; +} // Returns a minimum wait states since \p I walking all predecessors. // Only scans until \p IsExpired does not return true. // Can only be run in a hazard recognizer mode. -static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, - const MachineBasicBlock *MBB, - MachineBasicBlock::const_reverse_instr_iterator I, - int WaitStates, IsExpiredFn IsExpired, - DenseSet &Visited) { +static int getWaitStatesSince( + GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB, + MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates, + IsExpiredFn IsExpired, DenseSet &Visited, + GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) { for (auto E = MBB->instr_rend(); I != E; ++I) { // Don't add WaitStates for parent BUNDLE instructions. if (I->isBundle()) @@ -410,7 +490,7 @@ static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, if (I->isInlineAsm()) continue; - WaitStates += SIInstrInfo::getNumWaitStates(*I); + WaitStates += GetNumWaitStates(*I); if (IsExpired(*I, WaitStates)) return std::numeric_limits::max(); @@ -421,8 +501,8 @@ static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, if (!Visited.insert(Pred).second) continue; - int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), - WaitStates, IsExpired, Visited); + int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), WaitStates, + IsExpired, Visited, GetNumWaitStates); MinWaitStates = std::min(MinWaitStates, W); } @@ -534,7 +614,7 @@ int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) { // In order to handle these situations correctly we need to make sure that // when a clause has more than one instruction, no instruction in the clause // writes to a register that is read by another instruction in the clause - // (including itself). If we encounter this situaion, we need to break the + // (including itself). If we encounter this situation, we need to break the // clause by inserting a non SMEM instruction. for (MachineInstr *MI : EmittedInstrs) { @@ -764,7 +844,7 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, // 8 bytes can have there store data over written by the next instruction. const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const int VALUWaitStates = 1; + const int VALUWaitStates = ST.hasGFX940Insts() ? 2 : 1; int WaitStatesNeeded = 0; if (!TRI->isVectorRegister(MRI, Def.getReg())) @@ -783,13 +863,136 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, } int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { + int WaitStatesNeeded = 0; + + if (ST.hasTransForwardingHazard() && !SIInstrInfo::isTRANS(*VALU)) { + const int TransDefWaitstates = 1; + + auto IsTransDefFn = [this, VALU](const MachineInstr &MI) { + if (!SIInstrInfo::isTRANS(MI)) + return false; + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); + Register Def = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)->getReg(); + + for (const MachineOperand &Use : VALU->explicit_uses()) { + if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg())) + return true; + } + + return false; + }; + + int WaitStatesNeededForDef = + TransDefWaitstates - + getWaitStatesSince(IsTransDefFn, TransDefWaitstates); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); + } + + if (ST.hasDstSelForwardingHazard()) { + const int Shift16DefWaitstates = 1; + + auto IsShift16BitDefFn = [this, VALU](const MachineInstr &MI) { + if (!SIInstrInfo::isVALU(MI)) + return false; + const SIInstrInfo *TII = ST.getInstrInfo(); + if (SIInstrInfo::isSDWA(MI)) { + if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel)) + if (DstSel->getImm() == AMDGPU::SDWA::DWORD) + return false; + } else { + if ((AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::op_sel) == -1) || + !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers) + ->getImm() & + SISrcMods::DST_OP_SEL)) + return false; + } + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + if (auto *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) { + Register Def = Dst->getReg(); + + for (const MachineOperand &Use : VALU->explicit_uses()) { + if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg())) + return true; + } + } + + return false; + }; + + int WaitStatesNeededForDef = + Shift16DefWaitstates - + getWaitStatesSince(IsShift16BitDefFn, Shift16DefWaitstates); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); + } + + if (ST.hasVDecCoExecHazard()) { + const int VALUWriteSGPRVALUReadWaitstates = 2; + const int VALUWriteEXECRWLane = 4; + const int VALUWriteVGPRReadlaneRead = 1; + + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + Register UseReg; + auto IsVALUDefSGPRFn = [&UseReg, TRI](const MachineInstr &MI) { + if (!SIInstrInfo::isVALU(MI)) + return false; + return MI.modifiesRegister(UseReg, TRI); + }; + + for (const MachineOperand &Use : VALU->explicit_uses()) { + if (!Use.isReg()) + continue; + + UseReg = Use.getReg(); + if (TRI->isSGPRReg(MRI, UseReg)) { + int WaitStatesNeededForDef = + VALUWriteSGPRVALUReadWaitstates - + getWaitStatesSince(IsVALUDefSGPRFn, + VALUWriteSGPRVALUReadWaitstates); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); + } + } + + if (VALU->readsRegister(AMDGPU::VCC, TRI)) { + UseReg = AMDGPU::VCC; + int WaitStatesNeededForDef = + VALUWriteSGPRVALUReadWaitstates - + getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteSGPRVALUReadWaitstates); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); + } + + switch (VALU->getOpcode()) { + case AMDGPU::V_READLANE_B32: + case AMDGPU::V_READFIRSTLANE_B32: { + MachineOperand *Src = TII.getNamedOperand(*VALU, AMDGPU::OpName::src0); + UseReg = Src->getReg(); + int WaitStatesNeededForDef = + VALUWriteVGPRReadlaneRead - + getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteVGPRReadlaneRead); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); + } + LLVM_FALLTHROUGH; + case AMDGPU::V_WRITELANE_B32: { + UseReg = AMDGPU::EXEC; + int WaitStatesNeededForDef = + VALUWriteEXECRWLane - + getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteEXECRWLane); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); + break; + } + default: + break; + } + } + // This checks for the hazard where VMEM instructions that store more than // 8 bytes can have there store data over written by the next instruction. if (!ST.has12DWordStoreHazard()) - return 0; + return WaitStatesNeeded; const MachineRegisterInfo &MRI = MF.getRegInfo(); - int WaitStatesNeeded = 0; for (const MachineOperand &Def : VALU->defs()) { WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI)); @@ -861,10 +1064,10 @@ int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) { int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) { const SIInstrInfo *TII = ST.getInstrInfo(); - const int SMovRelWaitStates = 1; + const int ReadM0WaitStates = 1; auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isSALU(MI); }; - return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, - SMovRelWaitStates); + return ReadM0WaitStates - + getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, ReadM0WaitStates); } void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { @@ -873,6 +1076,13 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { fixSMEMtoVectorWriteHazards(MI); fixVcmpxExecWARHazard(MI); fixLdsBranchVmemWARHazard(MI); + if (ST.hasLdsDirect()) { + fixLdsDirectVALUHazard(MI); + fixLdsDirectVMEMHazard(MI); + } + fixVALUPartialForwardingHazard(MI); + fixVALUTransUseHazard(MI); + fixWMMAHazards(MI); } bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { @@ -880,7 +1090,12 @@ bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { return false; const SIInstrInfo *TII = ST.getInstrInfo(); - auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isVOPC(MI); }; + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + auto IsHazardFn = [TII, TRI](const MachineInstr &MI) { + return (TII->isVOPC(MI) || + ((TII->isVOP3(MI) || TII->isSDWA(MI)) && MI.isCompare())) && + MI.modifiesRegister(AMDGPU::EXEC, TRI); + }; auto IsExpiredFn = [](const MachineInstr &MI, int) { unsigned Opc = MI.getOpcode(); @@ -893,7 +1108,7 @@ bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { return false; // V_NOP will be discarded by SQ. - // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE* + // Use V_MOV_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE* // which is always a VGPR and available. auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); Register Reg = Src0->getReg(); @@ -1157,6 +1372,369 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) { return true; } +bool GCNHazardRecognizer::fixLdsDirectVALUHazard(MachineInstr *MI) { + if (!SIInstrInfo::isLDSDIR(*MI)) + return false; + + const int NoHazardWaitStates = 15; + const MachineOperand *VDST = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst); + const Register VDSTReg = VDST->getReg(); + + bool VisitedTrans = false; + auto IsHazardFn = [this, VDSTReg, &VisitedTrans](const MachineInstr &I) { + if (!SIInstrInfo::isVALU(I)) + return false; + VisitedTrans = VisitedTrans || SIInstrInfo::isTRANS(I); + // Cover both WAR and WAW + return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI); + }; + auto IsExpiredFn = [&](const MachineInstr &I, int WaitStates) { + if (WaitStates >= NoHazardWaitStates) + return true; + // Instructions which cause va_vdst==0 expire hazard + return SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) || + SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I); + }; + auto GetWaitStatesFn = [](const MachineInstr &MI) { + return SIInstrInfo::isVALU(MI) ? 1 : 0; + }; + + DenseSet Visited; + auto Count = ::getWaitStatesSince(IsHazardFn, MI->getParent(), + std::next(MI->getReverseIterator()), 0, + IsExpiredFn, Visited, GetWaitStatesFn); + + // Transcendentals can execute in parallel to other VALUs. + // This makes va_vdst count unusable with a mixture of VALU and TRANS. + if (VisitedTrans) + Count = 0; + + MachineOperand *WaitVdstOp = + TII.getNamedOperand(*MI, AMDGPU::OpName::waitvdst); + WaitVdstOp->setImm(std::min(Count, NoHazardWaitStates)); + + return true; +} + +bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) { + if (!SIInstrInfo::isLDSDIR(*MI)) + return false; + + const MachineOperand *VDST = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst); + const Register VDSTReg = VDST->getReg(); + + auto IsHazardFn = [this, VDSTReg](const MachineInstr &I) { + if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I) && + !SIInstrInfo::isDS(I)) + return false; + return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI); + }; + auto IsExpiredFn = [](const MachineInstr &I, int) { + return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) || + (I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) || + (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && + I.getOperand(0).getImm() == 0xffe3); + }; + + if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == + std::numeric_limits::max()) + return false; + + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII.get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(0xffe3); + + return true; +} + +bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) { + if (!ST.isWave64()) + return false; + if (!ST.hasVALUPartialForwardingHazard()) + return false; + if (!SIInstrInfo::isVALU(*MI)) + return false; + + SmallSetVector SrcVGPRs; + + for (const MachineOperand &Use : MI->explicit_uses()) { + if (Use.isReg() && TRI.isVGPR(MF.getRegInfo(), Use.getReg())) + SrcVGPRs.insert(Use.getReg()); + } + + // Only applies with >= 2 unique VGPR sources + if (SrcVGPRs.size() <= 1) + return false; + + // Look for the following pattern: + // Va <- VALU [PreExecPos] + // intv1 + // Exec <- SALU [ExecPos] + // intv2 + // Vb <- VALU [PostExecPos] + // intv3 + // MI Va, Vb (WaitState = 0) + // + // Where: + // intv1 + intv2 <= 2 VALUs + // intv3 <= 4 VALUs + // + // If found, insert an appropriate S_WAITCNT_DEPCTR before MI. + + const int Intv1plus2MaxVALUs = 2; + const int Intv3MaxVALUs = 4; + const int IntvMaxVALUs = 6; + const int NoHazardVALUWaitStates = IntvMaxVALUs + 2; + + struct StateType { + SmallDenseMap DefPos; + int ExecPos = std::numeric_limits::max(); + int VALUs = 0; + }; + + StateType State; + + // This overloads expiry testing with all the hazard detection + auto IsHazardFn = [&, this](StateType &State, const MachineInstr &I) { + // Too many VALU states have passed + if (State.VALUs > NoHazardVALUWaitStates) + return HazardExpired; + + // Instructions which cause va_vdst==0 expire hazard + if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) || + SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) || + (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && + I.getOperand(0).getImm() == 0x0fff)) + return HazardExpired; + + // Track registers writes + bool Changed = false; + if (SIInstrInfo::isVALU(I)) { + for (Register Src : SrcVGPRs) { + if (!State.DefPos.count(Src) && I.modifiesRegister(Src, &TRI)) { + State.DefPos[Src] = State.VALUs; + Changed = true; + } + } + } else if (SIInstrInfo::isSALU(I)) { + if (State.ExecPos == std::numeric_limits::max()) { + if (!State.DefPos.empty() && I.modifiesRegister(AMDGPU::EXEC, &TRI)) { + State.ExecPos = State.VALUs; + Changed = true; + } + } + } + + // Early expiration: too many VALUs in intv3 + if (State.VALUs > Intv3MaxVALUs && State.DefPos.empty()) + return HazardExpired; + + // Only evaluate state if something changed + if (!Changed) + return NoHazardFound; + + // Determine positions of VALUs pre/post exec change + if (State.ExecPos == std::numeric_limits::max()) + return NoHazardFound; + + int PreExecPos = std::numeric_limits::max(); + int PostExecPos = std::numeric_limits::max(); + + for (auto Entry : State.DefPos) { + int DefVALUs = Entry.second; + if (DefVALUs != std::numeric_limits::max()) { + if (DefVALUs >= State.ExecPos) + PreExecPos = std::min(PreExecPos, DefVALUs); + else if (DefVALUs < State.ExecPos) + PostExecPos = std::min(PostExecPos, DefVALUs); + } + } + + // Need a VALUs post exec change + if (PostExecPos == std::numeric_limits::max()) + return NoHazardFound; + + // Too many VALUs in intv3? + int Intv3VALUs = PostExecPos; + if (Intv3VALUs > Intv3MaxVALUs) + return HazardExpired; + + // Too many VALUs in intv2? + int Intv2VALUs = (State.ExecPos - PostExecPos) - 1; + if (Intv2VALUs > Intv1plus2MaxVALUs) + return HazardExpired; + + // Need a VALUs pre exec change + if (PreExecPos == std::numeric_limits::max()) + return NoHazardFound; + + // Too many VALUs in intv1? + int Intv1VALUs = PreExecPos - State.ExecPos; + if (Intv1VALUs > Intv1plus2MaxVALUs) + return HazardExpired; + + // Too many VALUs in intv1 + intv2 + if (Intv1VALUs + Intv2VALUs > Intv1plus2MaxVALUs) + return HazardExpired; + + return HazardFound; + }; + auto UpdateStateFn = [](StateType &State, const MachineInstr &MI) { + if (SIInstrInfo::isVALU(MI)) + State.VALUs += 1; + }; + + DenseSet Visited; + if (!hasHazard(State, IsHazardFn, UpdateStateFn, MI->getParent(), + std::next(MI->getReverseIterator()), Visited)) + return false; + + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII.get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(0x0fff); + + return true; +} + +bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) { + if (!ST.hasVALUTransUseHazard()) + return false; + if (!SIInstrInfo::isVALU(*MI)) + return false; + + SmallSet SrcVGPRs; + + for (const MachineOperand &Use : MI->explicit_uses()) { + if (Use.isReg() && TRI.isVGPR(MF.getRegInfo(), Use.getReg())) + SrcVGPRs.insert(Use.getReg()); + } + + // Look for the following pattern: + // Va <- TRANS VALU + // intv + // MI Va (WaitState = 0) + // + // Where: + // intv <= 5 VALUs / 1 TRANS + // + // If found, insert an appropriate S_WAITCNT_DEPCTR before MI. + + const int IntvMaxVALUs = 5; + const int IntvMaxTRANS = 1; + + struct StateType { + int VALUs = 0; + int TRANS = 0; + }; + + StateType State; + + // This overloads expiry testing with all the hazard detection + auto IsHazardFn = [&, this](StateType &State, const MachineInstr &I) { + // Too many VALU states have passed + if (State.VALUs > IntvMaxVALUs || State.TRANS > IntvMaxTRANS) + return HazardExpired; + + // Instructions which cause va_vdst==0 expire hazard + if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) || + SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) || + (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && + I.getOperand(0).getImm() == 0x0fff)) + return HazardExpired; + + // Track registers writes + if (SIInstrInfo::isTRANS(I)) { + for (Register Src : SrcVGPRs) { + if (I.modifiesRegister(Src, &TRI)) { + return HazardFound; + } + } + } + + return NoHazardFound; + }; + auto UpdateStateFn = [](StateType &State, const MachineInstr &MI) { + if (SIInstrInfo::isVALU(MI)) + State.VALUs += 1; + if (SIInstrInfo::isTRANS(MI)) + State.TRANS += 1; + }; + + DenseSet Visited; + if (!hasHazard(State, IsHazardFn, UpdateStateFn, MI->getParent(), + std::next(MI->getReverseIterator()), Visited)) + return false; + + // Hazard is observed - insert a wait on va_dst counter to ensure hazard is + // avoided (mask 0x0fff achieves this). + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII.get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(0x0fff); + + return true; +} + +bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) { + if (!SIInstrInfo::isWMMA(*MI)) + return false; + + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + + auto IsHazardFn = [MI, TII, TRI](const MachineInstr &I) { + if (!SIInstrInfo::isWMMA(I)) + return false; + + // Src0 or Src1 of the current wmma instruction overlaps with the dest of + // the previous wmma. + const Register CurSrc0Reg = + TII->getNamedOperand(*MI, AMDGPU::OpName::src0)->getReg(); + const Register CurSrc1Reg = + TII->getNamedOperand(*MI, AMDGPU::OpName::src1)->getReg(); + + const Register PrevDstReg = + TII->getNamedOperand(I, AMDGPU::OpName::vdst)->getReg(); + + if (TRI->regsOverlap(PrevDstReg, CurSrc0Reg) || + TRI->regsOverlap(PrevDstReg, CurSrc1Reg)) { + return true; + } + + // Src2 of the current wmma instruction overlaps with the dest of the + // previous wmma. + const MachineOperand *Src2 = + TII->getNamedOperand(*MI, AMDGPU::OpName::src2); + const Register CurSrc2Reg = Src2->isReg() ? Src2->getReg() : Register(); + + if (CurSrc2Reg != AMDGPU::NoRegister && + TRI->regsOverlap(PrevDstReg, CurSrc2Reg)) { + + const MachineOperand *Src2Mods = + TII->getNamedOperand(*MI, AMDGPU::OpName::src2_modifiers); + const bool NoSrc2Mods = + (Src2Mods->getImm() & (SISrcMods::NEG | SISrcMods::NEG_HI)) == 0; + // Exception: there is no hazard if the wmma instructions are of the same + // type and there is no input modifier on src2 of the current instruction. + return !(NoSrc2Mods && (TII->pseudoToMCOpcode(I.getOpcode()) == + TII->pseudoToMCOpcode(MI->getOpcode()))); + } + + return false; + }; + + auto IsExpiredFn = [](const MachineInstr &I, int) { + return SIInstrInfo::isVALU(I); + }; + + if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == + std::numeric_limits::max()) + return false; + + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32)); + + return true; +} + int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) { int NSAtoVMEMWaitStates = 1; @@ -1223,6 +1801,36 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) { return ST.hasGFX90AInsts() ? checkMAIHazards90A(MI) : checkMAIHazards908(MI); } +int GCNHazardRecognizer::checkMFMAPadding(MachineInstr *MI) { + // Early exit if no padding is requested. + if (MFMAPaddingRatio == 0) + return 0; + + const SIMachineFunctionInfo *MFI = MF.getInfo(); + if (!SIInstrInfo::isMFMA(*MI) || MFI->getOccupancy() < 2) + return 0; + + int NeighborMFMALatency = 0; + auto IsNeighboringMFMA = [&NeighborMFMALatency, + this](const MachineInstr &MI) { + if (!SIInstrInfo::isMFMA(MI)) + return false; + + NeighborMFMALatency = this->getMFMAPipelineWaitStates(MI); + return true; + }; + + const int MaxMFMAPipelineWaitStates = 16; + int WaitStatesSinceNeighborMFMA = + getWaitStatesSince(IsNeighboringMFMA, MaxMFMAPipelineWaitStates); + + int NeighborMFMAPaddingNeeded = + (NeighborMFMALatency * MFMAPaddingRatio / 100) - + WaitStatesSinceNeighborMFMA; + + return std::max(0, NeighborMFMAPaddingNeeded); +} + int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { int WaitStatesNeeded = 0; unsigned Opc = MI->getOpcode(); @@ -1257,12 +1865,6 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { } } - auto IsMFMAFn = [](const MachineInstr &MI) { - return SIInstrInfo::isMAI(MI) && - MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && - MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; - }; - for (const MachineOperand &Op : MI->explicit_operands()) { if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg())) continue; @@ -1282,9 +1884,9 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { Register Reg = Op.getReg(); unsigned HazardDefLatency = 0; - auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, + auto IsOverlappedMFMAFn = [Reg, &HazardDefLatency, this](const MachineInstr &MI) { - if (!IsMFMAFn(MI)) + if (!SIInstrInfo::isMFMA(MI)) return false; Register DstReg = MI.getOperand(0).getReg(); if (DstReg == Reg) @@ -1361,9 +1963,9 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { Register DstReg = MI->getOperand(0).getReg(); unsigned HazardDefLatency = 0; - auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, + auto IsSrcCMFMAFn = [DstReg, &HazardDefLatency, this](const MachineInstr &MI) { - if (!IsMFMAFn(MI)) + if (!SIInstrInfo::isMFMA(MI)) return false; Register Reg = TII.getNamedOperand(MI, AMDGPU::OpName::src2)->getReg(); HazardDefLatency = @@ -1387,6 +1989,9 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); } + // Pad neighboring MFMA with noops for better inter-wave performance. + WaitStatesNeeded = std::max(WaitStatesNeeded, checkMFMAPadding(MI)); + return WaitStatesNeeded; } @@ -1394,21 +1999,16 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { int WaitStatesNeeded = 0; unsigned Opc = MI->getOpcode(); - auto IsMFMAFn = [](const MachineInstr &MI) { - return SIInstrInfo::isMAI(MI) && - MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && - MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; + auto IsLegacyVALUFn = [](const MachineInstr &MI) { + return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMFMA(MI); }; - auto IsLegacyVALUFn = [&IsMFMAFn](const MachineInstr &MI) { - return SIInstrInfo::isVALU(MI) && !IsMFMAFn(MI); + auto IsLegacyVALUNotDotFn = [](const MachineInstr &MI) { + return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMFMA(MI) && + !SIInstrInfo::isDOT(MI); }; - auto IsLegacyVALUNotDotFn = [&IsMFMAFn](const MachineInstr &MI) { - return SIInstrInfo::isVALU(MI) && !IsMFMAFn(MI) && !SIInstrInfo::isDOT(MI); - }; - - if (!IsMFMAFn(*MI)) + if (!SIInstrInfo::isMFMA(*MI)) return WaitStatesNeeded; const int VALUWritesExecWaitStates = 4; @@ -1423,6 +2023,13 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { for (const MachineOperand &Use : MI->explicit_uses()) { const int LegacyVALUNotDotWritesVGPRWaitStates = 2; const int SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates = 2; + const int GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates = 3; + const int GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates = 5; + const int GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates = 4; + const int GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates = 9; + const int GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates = 8; + const int GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates = 17; + const int GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates = 16; const int SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates = 8; const int SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates = 16; const int SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates = 3; @@ -1433,9 +2040,18 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5; const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11; const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19; + const int GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates = 4; + const int GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates = 6; + const int GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates = 10; + const int GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates = 18; + const int GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates = 5; + const int GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates = 7; + const int GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates = 11; + const int GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates = 19; const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6; const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11; const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4; + const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2; const int MaxWaitStates = 19; if (!Use.isReg()) @@ -1444,9 +2060,9 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { bool FullReg; const MachineInstr *MI1; - auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &FullReg, &MI1, + auto IsOverlappedMFMAFn = [Reg, &FullReg, &MI1, this](const MachineInstr &MI) { - if (!IsMFMAFn(MI)) + if (!SIInstrInfo::isMFMA(MI)) return false; Register DstReg = MI.getOperand(0).getReg(); FullReg = (DstReg == Reg); @@ -1467,7 +2083,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { unsigned Opc1 = MI1->getOpcode(); int NeedWaitStates = 0; if (OpNo == SrcCIdx) { - if (!isDGEMM(Opc) && isDGEMM(Opc1)) { + if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) { NeedWaitStates = 0; } else if (FullReg) { if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || @@ -1475,6 +2091,9 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { (Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64)) NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates; + else if (ST.hasGFX940Insts() && + TSchedModel.computeInstrLatency(MI1) == 2) + NeedWaitStates = GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates; } else { switch (Opc1) { case AMDGPU::V_MFMA_F64_16X16X4F64_e64: @@ -1490,22 +2109,42 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates; break; default: + if (ST.hasGFX940Insts() && isXDL(ST, *MI) && !isXDL(ST, *MI1)) + break; switch (TSchedModel.computeInstrLatency(MI1)) { case 2: - NeedWaitStates = isDGEMM(Opc) - ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates - : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MI1) + ? GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates + : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates + : isDGEMM(Opc) + ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates + : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates; + break; + case 4: + assert(ST.hasGFX940Insts()); + NeedWaitStates = isXDL(ST, *MI1) + ? GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates + : GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates; break; case 8: - NeedWaitStates = isDGEMM(Opc) - ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates - : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MI1) + ? GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates + : GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates + : isDGEMM(Opc) + ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates + : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates; break; case 16: LLVM_FALLTHROUGH; default: - NeedWaitStates = isDGEMM(Opc) - ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates - : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MI1) + ? GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates + : GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates + : isDGEMM(Opc) + ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates + : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates; } } } @@ -1524,14 +2163,32 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { default: switch (TSchedModel.computeInstrLatency(MI1)) { case 2: - NeedWaitStates = SMFMA4x4WritesVGPROverlappedSrcABWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MI1) + ? GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates + : GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates + : SMFMA4x4WritesVGPROverlappedSrcABWaitStates; + break; + case 4: + assert(ST.hasGFX940Insts()); + NeedWaitStates = isXDL(ST, *MI1) + ? GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates + : GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates; break; case 8: - NeedWaitStates = SMFMA16x16WritesVGPROverlappedSrcABWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MI1) + ? GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates + : GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates + : SMFMA16x16WritesVGPROverlappedSrcABWaitStates; break; case 16: LLVM_FALLTHROUGH; default: - NeedWaitStates = SMFMA32x32WritesVGPROverlappedSrcABWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MI1) + ? GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates + : GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates + : SMFMA32x32WritesVGPROverlappedSrcABWaitStates; } } } @@ -1599,18 +2256,12 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { if (!ST.hasGFX90AInsts()) return 0; - auto IsMFMAFn = [](const MachineInstr &MI) -> bool { - return SIInstrInfo::isMAI(MI) && - MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && - MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; - }; - auto IsDGEMMFn = [](const MachineInstr &MI) -> bool { return isDGEMM(MI.getOpcode()); }; // This is checked in checkMAIHazards90A() - if (IsMFMAFn(*MI)) + if (SIInstrInfo::isMFMA(*MI)) return 0; int WaitStatesNeeded = 0; @@ -1623,8 +2274,9 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { const MachineInstr *MFMA = nullptr; unsigned Reg; - auto IsMFMAWriteFn = [&Reg, &IsMFMAFn, &MFMA, this](const MachineInstr &MI) { - if (!IsMFMAFn(MI) || !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg)) + auto IsMFMAWriteFn = [&Reg, &MFMA, this](const MachineInstr &MI) { + if (!SIInstrInfo::isMFMA(MI) || + !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg)) return false; MFMA = &MI; return true; @@ -1646,6 +2298,14 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5; const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11; const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19; + const int GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates = 4; + const int GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates = 6; + const int GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates = 10; + const int GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates = 18; + const int GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates = 5; + const int GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates = 7; + const int GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates = 11; + const int GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates = 19; const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9; const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18; const int DMFMA4x4WriteVgprVALUReadWaitStates = 6; @@ -1685,16 +2345,30 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { int NeedWaitStates = MaxWaitStates; switch (HazardDefLatency) { case 2: - NeedWaitStates = SMFMA4x4WriteVgprVALUMemExpReadWaitStates; + NeedWaitStates = + ST.hasGFX940Insts() + ? isXDL(ST, *MFMA) + ? GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates + : GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates + : SMFMA4x4WriteVgprVALUMemExpReadWaitStates; break; case 4: - assert(isDGEMM(MFMA->getOpcode())); + assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts()); NeedWaitStates = - IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates - : DMFMA4x4WriteVgprVALUReadWaitStates; + isDGEMM(MFMA->getOpcode()) + ? IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates + : DMFMA4x4WriteVgprVALUReadWaitStates + : isXDL(ST, *MFMA) + ? GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates + : GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates; break; case 8: - NeedWaitStates = SMFMA16x16WriteVgprVALUMemExpReadWaitStates; + NeedWaitStates = + ST.hasGFX940Insts() + ? isXDL(ST, *MFMA) + ? GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates + : GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates + : SMFMA16x16WriteVgprVALUMemExpReadWaitStates; break; case 16: LLVM_FALLTHROUGH; default: @@ -1702,7 +2376,11 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { isDGEMM(MFMA->getOpcode()) ? IsMemOrExport ? DMFMA16x16WriteVgprMemExpReadWaitStates : DMFMA16x16WriteVgprVALUReadWaitStates - : SMFMA32x32WriteVgprVALUMemExpReadWaitStates; + : ST.hasGFX940Insts() + ? isXDL(ST, *MFMA) + ? GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates + : GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates + : SMFMA32x32WriteVgprVALUMemExpReadWaitStates; break; } @@ -1732,7 +2410,16 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { const int SMFMA4x4WriteVgprVALUWawWaitStates = 5; const int SMFMA16x16WriteVgprVALUWawWaitStates = 11; const int SMFMA32x32WriteVgprVALUWawWaitStates = 19; + const int GFX940_SMFMA2PassWriteVgprVALUWawWaitStates = 4; + const int GFX940_SMFMA4PassWriteVgprVALUWawWaitStates = 6; + const int GFX940_SMFMA8PassWriteVgprVALUWawWaitStates = 10; + const int GFX940_SMFMA16PassWriteVgprVALUWawWaitStates = 18; + const int GFX940_XDL2PassWriteVgprVALUWawWaitStates = 5; + const int GFX940_XDL4PassWriteVgprVALUWawWaitStates = 7; + const int GFX940_XDL8PassWriteVgprVALUWawWaitStates = 11; + const int GFX940_XDL16PassWriteVgprVALUWawWaitStates = 19; const int SMFMA4x4ReadVgprVALUWarWaitStates = 1; + const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3; const int SMFMA16x16ReadVgprVALUWarWaitStates = 7; const int SMFMA32x32ReadVgprVALUWarWaitStates = 15; const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6; @@ -1757,19 +2444,35 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { int NeedWaitStates = MaxWaitStates; switch (TSchedModel.computeInstrLatency(MFMA)) { case 2: - NeedWaitStates = SMFMA4x4WriteVgprVALUWawWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MFMA) + ? GFX940_XDL2PassWriteVgprVALUWawWaitStates + : GFX940_SMFMA2PassWriteVgprVALUWawWaitStates + : SMFMA4x4WriteVgprVALUWawWaitStates; break; case 4: - assert(isDGEMM(MFMA->getOpcode())); - NeedWaitStates = DMFMA4x4WriteVgprVALUWriteWaitStates; + assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts()); + NeedWaitStates = isDGEMM(MFMA->getOpcode()) + ? DMFMA4x4WriteVgprVALUWriteWaitStates + : isXDL(ST, *MFMA) + ? GFX940_XDL4PassWriteVgprVALUWawWaitStates + : GFX940_SMFMA4PassWriteVgprVALUWawWaitStates; break; case 8: - NeedWaitStates = SMFMA16x16WriteVgprVALUWawWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MFMA) + ? GFX940_XDL8PassWriteVgprVALUWawWaitStates + : GFX940_SMFMA8PassWriteVgprVALUWawWaitStates + : SMFMA16x16WriteVgprVALUWawWaitStates; break; case 16: LLVM_FALLTHROUGH; default: NeedWaitStates = isDGEMM(MFMA->getOpcode()) ? DMFMA16x16WriteVgprVALUWriteWaitStates + : ST.hasGFX940Insts() + ? isXDL(ST, *MFMA) + ? GFX940_XDL16PassWriteVgprVALUWawWaitStates + : GFX940_SMFMA16PassWriteVgprVALUWawWaitStates : SMFMA32x32WriteVgprVALUWawWaitStates; break; } @@ -1781,12 +2484,14 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { break; } - auto IsSMFMAReadAsCFn = [&Reg, &IsMFMAFn, &MFMA, - this](const MachineInstr &MI) { - if (!IsMFMAFn(MI) || isDGEMM(MI.getOpcode()) || + auto IsSMFMAReadAsCFn = [&Reg, &MFMA, this](const MachineInstr &MI) { + if (!SIInstrInfo::isMFMA(MI) || isDGEMM(MI.getOpcode()) || !MI.readsRegister(Reg, &TRI)) return false; + if (ST.hasGFX940Insts() && !isXDL(ST, MI)) + return false; + const MachineOperand *SrcC = TII.getNamedOperand(MI, AMDGPU::OpName::src2); assert(SrcC); @@ -1808,6 +2513,9 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { switch (HazardDefLatency) { case 2: NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates; break; + case 4: assert(ST.hasGFX940Insts()); + NeedWaitStates = GFX940_XDL4PassReadVgprVALUWarWaitStates; + break; case 8: NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates; break; case 16: LLVM_FALLTHROUGH; @@ -1827,11 +2535,10 @@ bool GCNHazardRecognizer::ShouldPreferAnother(SUnit *SU) { return false; const MachineInstr *MAI = nullptr; + auto IsMFMAFn = [&MAI](const MachineInstr &MI) { MAI = nullptr; - if (SIInstrInfo::isMAI(MI) && - MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && - MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64) + if (SIInstrInfo::isMFMA(MI)) MAI = &MI; return MAI != nullptr; }; diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index 716bc027a894..57f5a04c6eda 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -62,6 +62,10 @@ private: void addClauseInst(const MachineInstr &MI); + /// \returns the number of wait states before another MFMA instruction can be + /// issued after \p MI. + unsigned getMFMAPipelineWaitStates(const MachineInstr &MI) const; + // Advance over a MachineInstr bundle. Look for hazards in the bundled // instructions. void processBundle(); @@ -92,10 +96,31 @@ private: bool fixSMEMtoVectorWriteHazards(MachineInstr *MI); bool fixVcmpxExecWARHazard(MachineInstr *MI); bool fixLdsBranchVmemWARHazard(MachineInstr *MI); + bool fixLdsDirectVALUHazard(MachineInstr *MI); + bool fixLdsDirectVMEMHazard(MachineInstr *MI); + bool fixVALUPartialForwardingHazard(MachineInstr *MI); + bool fixVALUTransUseHazard(MachineInstr *MI); + bool fixWMMAHazards(MachineInstr *MI); int checkMAIHazards(MachineInstr *MI); int checkMAIHazards908(MachineInstr *MI); int checkMAIHazards90A(MachineInstr *MI); + /// Pad the latency between neighboring MFMA instructions with s_nops. The + /// percentage of wait states to fill with s_nops is specified by the command + /// line option '-amdgpu-mfma-padding-ratio'. + /// + /// For example, with '-amdgpu-mfma-padding-ratio=100': + /// + /// 2 pass MFMA instructions have a latency of 2 wait states. Therefore, a + /// 'S_NOP 1' will be added between sequential MFMA instructions. + /// + /// V_MFMA_F32_4X4X1F32 + /// V_MFMA_F32_4X4X1F32 + ///--> + /// V_MFMA_F32_4X4X1F32 + /// S_NOP 1 + /// V_MFMA_F32_4X4X1F32 + int checkMFMAPadding(MachineInstr *MI); int checkMAIVALUHazards(MachineInstr *MI); int checkMAILdStHazards(MachineInstr *MI); diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp index 9f98f9ada802..6f82148854c4 100644 --- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp @@ -1,4 +1,4 @@ -//===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===// +//===-- GCNNSAReassign.cpp - Reassign registers in NSA instructions -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,7 +8,7 @@ // /// \file /// \brief Try to reassign registers on GFX10+ from non-sequential to sequential -/// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA +/// in NSA image instructions. Later SIShrinkInstructions pass will replace NSA /// with sequential versions where possible. /// //===----------------------------------------------------------------------===// @@ -16,10 +16,12 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/InitializePasses.h" using namespace llvm; @@ -159,15 +161,23 @@ GCNNSAReassign::scavengeRegs(SmallVectorImpl &Intervals) const { GCNNSAReassign::NSA_Status GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const { const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); - if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA) + if (!Info) return NSA_Status::NOT_NSA; + switch (Info->MIMGEncoding) { + case AMDGPU::MIMGEncGfx10NSA: + case AMDGPU::MIMGEncGfx11NSA: + break; + default: + return NSA_Status::NOT_NSA; + } + int VAddr0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); unsigned VgprBase = 0; bool NSA = false; - for (unsigned I = 0; I < Info->VAddrDwords; ++I) { + for (unsigned I = 0; I < Info->VAddrOperands; ++I) { const MachineOperand &Op = MI.getOperand(VAddr0Idx + I); Register Reg = Op.getReg(); if (Reg.isPhysical() || !VRM->isAssignedReg(Reg)) @@ -179,15 +189,16 @@ GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const { if (!PhysReg) return NSA_Status::FIXED; + // TODO: address the below limitation to handle GFX11 BVH instructions // Bail if address is not a VGPR32. That should be possible to extend the // optimization to work with subregs of a wider register tuples, but the // logic to find free registers will be much more complicated with much // less chances for success. That seems reasonable to assume that in most // cases a tuple is used because a vector variable contains different - // parts of an address and it is either already consequitive or cannot + // parts of an address and it is either already consecutive or cannot // be reassigned if not. If needed it is better to rely on register // coalescer to process such address tuples. - if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg()) + if (TRI->getRegSizeInBits(*MRI->getRegClass(Reg)) != 32 || Op.getSubReg()) return NSA_Status::FIXED; // InlineSpiller does not call LRM::assign() after an LI split leaving @@ -278,7 +289,7 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { SmallVector Intervals; SmallVector OrigRegs; SlotIndex MinInd, MaxInd; - for (unsigned I = 0; I < Info->VAddrDwords; ++I) { + for (unsigned I = 0; I < Info->VAddrOperands; ++I) { const MachineOperand &Op = MI->getOperand(VAddr0Idx + I); Register Reg = Op.getReg(); LiveInterval *LI = &LIS->getInterval(Reg); @@ -331,11 +342,11 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { } if (!Success) { - for (unsigned I = 0; I < Info->VAddrDwords; ++I) + for (unsigned I = 0; I < Info->VAddrOperands; ++I) if (VRM->hasPhys(Intervals[I]->reg())) LRM->unassign(*Intervals[I]); - for (unsigned I = 0; I < Info->VAddrDwords; ++I) + for (unsigned I = 0; I < Info->VAddrOperands; ++I) LRM->assign(*Intervals[I], OrigRegs[I]); continue; diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index 3a68ed1934e1..281474994bca 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -192,6 +192,10 @@ def : ProcessorModel<"gfx90c", SIQuarterSpeedModel, FeatureISAVersion9_0_C.Features >; +def : ProcessorModel<"gfx940", SIDPGFX940FullSpeedModel, + FeatureISAVersion9_4_0.Features +>; + //===----------------------------------------------------------------------===// // GCN GFX10. //===----------------------------------------------------------------------===// @@ -235,3 +239,27 @@ def : ProcessorModel<"gfx1034", GFX10SpeedModel, def : ProcessorModel<"gfx1035", GFX10SpeedModel, FeatureISAVersion10_3_0.Features >; + +def : ProcessorModel<"gfx1036", GFX10SpeedModel, + FeatureISAVersion10_3_0.Features +>; + +//===----------------------------------------------------------------------===// +// GCN GFX11. +//===----------------------------------------------------------------------===// + +def : ProcessorModel<"gfx1100", GFX11SpeedModel, + FeatureISAVersion11_0.Features +>; + +def : ProcessorModel<"gfx1101", GFX11SpeedModel, + FeatureISAVersion11_0.Features +>; + +def : ProcessorModel<"gfx1102", GFX11SpeedModel, + FeatureISAVersion11_0_2.Features +>; + +def : ProcessorModel<"gfx1103", GFX11SpeedModel, + FeatureISAVersion11_0_2.Features +>; diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index 257561cb8430..c41548d19c8e 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -10,7 +10,7 @@ /// This file defines the GCNRegPressure class, which tracks registry pressure /// by bookkeeping number of SGPR/VGPRs used, weights for large SGPR/VGPRs. It /// also implements a compare function, which compares different register -/// pressures, and declares one with max occupance as winner. +/// pressures, and declares one with max occupancy as winner. /// //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 75855a7a4f9c..100410bb7644 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -13,6 +13,7 @@ #include "GCNSchedStrategy.h" #include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #define DEBUG_TYPE "machine-scheduler" @@ -362,6 +363,9 @@ void GCNScheduleDAGMILive::schedule() { if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) { Pressure[RegionIdx] = PressureAfter; + RegionsWithMinOcc[RegionIdx] = + PressureAfter.getOccupancy(ST) == MinOccupancy; + LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); return; } @@ -378,6 +382,7 @@ void GCNScheduleDAGMILive::schedule() { // occupancy before was higher, or if the current schedule has register // pressure higher than the excess limits which could lead to more spilling. unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); + // Allow memory bound functions to drop to 4 waves if not limited by an // attribute. if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy && @@ -390,6 +395,7 @@ void GCNScheduleDAGMILive::schedule() { if (NewOccupancy < MinOccupancy) { MinOccupancy = NewOccupancy; MFI.limitOccupancy(MinOccupancy); + RegionsWithMinOcc.reset(); LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " << MinOccupancy << ".\n"); } @@ -416,6 +422,8 @@ void GCNScheduleDAGMILive::schedule() { PressureAfter.less(ST, PressureBefore) || !RescheduleRegions[RegionIdx]) { Pressure[RegionIdx] = PressureAfter; + RegionsWithMinOcc[RegionIdx] = + PressureAfter.getOccupancy(ST) == MinOccupancy; if (!RegionsWithClusters[RegionIdx] && (Stage + 1) == UnclusteredReschedule) RescheduleRegions[RegionIdx] = false; @@ -425,13 +433,18 @@ void GCNScheduleDAGMILive::schedule() { } } + RegionsWithMinOcc[RegionIdx] = + PressureBefore.getOccupancy(ST) == MinOccupancy; LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] || (Stage + 1) != UnclusteredReschedule; RegionEnd = RegionBegin; + int SkippedDebugInstr = 0; for (MachineInstr *MI : Unsched) { - if (MI->isDebugInstr()) + if (MI->isDebugInstr()) { + ++SkippedDebugInstr; continue; + } if (MI->getIterator() != RegionEnd) { BB->remove(MI); @@ -459,10 +472,31 @@ void GCNScheduleDAGMILive::schedule() { ++RegionEnd; LLVM_DEBUG(dbgs() << "Scheduling " << *MI); } + + // After reverting schedule, debug instrs will now be at the end of the block + // and RegionEnd will point to the first debug instr. Increment RegionEnd + // pass debug instrs to the actual end of the scheduling region. + while (SkippedDebugInstr-- > 0) + ++RegionEnd; + + // If Unsched.front() instruction is a debug instruction, this will actually + // shrink the region since we moved all debug instructions to the end of the + // block. Find the first instruction that is not a debug instruction. RegionBegin = Unsched.front()->getIterator(); - Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); + if (RegionBegin->isDebugInstr()) { + for (MachineInstr *MI : Unsched) { + if (MI->isDebugInstr()) + continue; + RegionBegin = MI->getIterator(); + break; + } + } + // Then move the debug instructions back into their correct place and set + // RegionBegin and RegionEnd if needed. placeDebugValues(); + + Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); } GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const { @@ -493,14 +527,14 @@ void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) { auto I = MBB->begin(); auto LiveInIt = MBBLiveIns.find(MBB); + auto &Rgn = Regions[CurRegion]; + auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); if (LiveInIt != MBBLiveIns.end()) { auto LiveIn = std::move(LiveInIt->second); RPTracker.reset(*MBB->begin(), &LiveIn); MBBLiveIns.erase(LiveInIt); } else { - auto &Rgn = Regions[CurRegion]; I = Rgn.first; - auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); auto LRS = BBLiveInMap.lookup(NonDbgMI); #ifdef EXPENSIVE_CHECKS assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); @@ -511,7 +545,7 @@ void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) { for ( ; ; ) { I = RPTracker.getNext(); - if (Regions[CurRegion].first == I) { + if (Regions[CurRegion].first == I || NonDbgMI == I) { LiveIns[CurRegion] = RPTracker.getLiveRegs(); RPTracker.clearMaxPressure(); } @@ -561,9 +595,11 @@ void GCNScheduleDAGMILive::finalizeSchedule() { RescheduleRegions.resize(Regions.size()); RegionsWithClusters.resize(Regions.size()); RegionsWithHighRP.resize(Regions.size()); + RegionsWithMinOcc.resize(Regions.size()); RescheduleRegions.set(); RegionsWithClusters.reset(); RegionsWithHighRP.reset(); + RegionsWithMinOcc.reset(); if (!Regions.empty()) BBLiveInMap = getBBLiveInMap(); @@ -600,13 +636,41 @@ void GCNScheduleDAGMILive::finalizeSchedule() { << "Retrying function scheduling with lowest recorded occupancy " << MinOccupancy << ".\n"); } + + if (Stage == PreRARematerialize) { + if (RegionsWithMinOcc.none() || Regions.size() == 1) + break; + + const GCNSubtarget &ST = MF.getSubtarget(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + // Check maximum occupancy + if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) == + MinOccupancy) + break; + + // FIXME: This pass will invalidate cached MBBLiveIns for regions + // inbetween the defs and region we sinked the def to. Cached pressure + // for regions where a def is sinked from will also be invalidated. Will + // need to be fixed if there is another pass after this pass. + static_assert(LastStage == PreRARematerialize, + "Passes after PreRARematerialize are not supported"); + + collectRematerializableInstructions(); + if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII)) + break; + + LLVM_DEBUG( + dbgs() << "Retrying function scheduling with improved occupancy of " + << MinOccupancy << " from rematerializing\n"); + } } if (Stage == UnclusteredReschedule) SavedMutations.swap(Mutations); for (auto Region : Regions) { - if ((Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) || + if (((Stage == UnclusteredReschedule || Stage == PreRARematerialize) && + !RescheduleRegions[RegionIdx]) || (Stage == ClusteredLowOccupancyReschedule && !RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) { @@ -631,6 +695,7 @@ void GCNScheduleDAGMILive::finalizeSchedule() { // Skip empty scheduling regions (0 or 1 schedulable instructions). if (begin() == end() || begin() == std::prev(end())) { exitRegion(); + ++RegionIdx; continue; } @@ -653,3 +718,282 @@ void GCNScheduleDAGMILive::finalizeSchedule() { SavedMutations.swap(Mutations); } while (Stage != LastStage); } + +void GCNScheduleDAGMILive::collectRematerializableInstructions() { + const SIRegisterInfo *SRI = static_cast(TRI); + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + Register Reg = Register::index2VirtReg(I); + if (!LIS->hasInterval(Reg)) + continue; + + // TODO: Handle AGPR and SGPR rematerialization + if (!SRI->isVGPRClass(MRI.getRegClass(Reg)) || !MRI.hasOneDef(Reg) || + !MRI.hasOneNonDBGUse(Reg)) + continue; + + MachineOperand *Op = MRI.getOneDef(Reg); + MachineInstr *Def = Op->getParent(); + if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def, AA)) + continue; + + MachineInstr *UseI = &*MRI.use_instr_nodbg_begin(Reg); + if (Def->getParent() == UseI->getParent()) + continue; + + // We are only collecting defs that are defined in another block and are + // live-through or used inside regions at MinOccupancy. This means that the + // register must be in the live-in set for the region. + bool AddedToRematList = false; + for (unsigned I = 0, E = Regions.size(); I != E; ++I) { + auto It = LiveIns[I].find(Reg); + if (It != LiveIns[I].end() && !It->second.none()) { + if (RegionsWithMinOcc[I]) { + RematerializableInsts[I][Def] = UseI; + AddedToRematList = true; + } + + // Collect regions with rematerializable reg as live-in to avoid + // searching later when updating RP. + RematDefToLiveInRegions[Def].push_back(I); + } + } + if (!AddedToRematList) + RematDefToLiveInRegions.erase(Def); + } +} + +bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST, + const TargetInstrInfo *TII) { + // Temporary copies of cached variables we will be modifying and replacing if + // sinking succeeds. + SmallVector< + std::pair, 32> + NewRegions; + DenseMap NewLiveIns; + DenseMap NewPressure; + BitVector NewRescheduleRegions; + + NewRegions.resize(Regions.size()); + NewRescheduleRegions.resize(Regions.size()); + + // Collect only regions that has a rematerializable def as a live-in. + SmallSet ImpactedRegions; + for (const auto &It : RematDefToLiveInRegions) + ImpactedRegions.insert(It.second.begin(), It.second.end()); + + // Make copies of register pressure and live-ins cache that will be updated + // as we rematerialize. + for (auto Idx : ImpactedRegions) { + NewPressure[Idx] = Pressure[Idx]; + NewLiveIns[Idx] = LiveIns[Idx]; + } + NewRegions = Regions; + NewRescheduleRegions.reset(); + + DenseMap InsertedMIToOldDef; + bool Improved = false; + for (auto I : ImpactedRegions) { + if (!RegionsWithMinOcc[I]) + continue; + + Improved = false; + int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts()); + int SGPRUsage = NewPressure[I].getSGPRNum(); + + // TODO: Handle occupancy drop due to AGPR and SGPR. + // Check if cause of occupancy drop is due to VGPR usage and not SGPR. + if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == MinOccupancy) + break; + + // The occupancy of this region could have been improved by a previous + // iteration's sinking of defs. + if (NewPressure[I].getOccupancy(ST) > MinOccupancy) { + NewRescheduleRegions[I] = true; + Improved = true; + continue; + } + + // First check if we have enough trivially rematerializable instructions to + // improve occupancy. Optimistically assume all instructions we are able to + // sink decreased RP. + int TotalSinkableRegs = 0; + for (const auto &It : RematerializableInsts[I]) { + MachineInstr *Def = It.first; + Register DefReg = Def->getOperand(0).getReg(); + TotalSinkableRegs += + SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]); + } + int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs; + unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink); + // If in the most optimistic scenario, we cannot improve occupancy, then do + // not attempt to sink any instructions. + if (OptimisticOccupancy <= MinOccupancy) + break; + + unsigned ImproveOccupancy = 0; + SmallVector SinkedDefs; + for (auto &It : RematerializableInsts[I]) { + MachineInstr *Def = It.first; + MachineBasicBlock::iterator InsertPos = + MachineBasicBlock::iterator(It.second); + Register Reg = Def->getOperand(0).getReg(); + // Rematerialize MI to its use block. Since we are only rematerializing + // instructions that do not have any virtual reg uses, we do not need to + // call LiveRangeEdit::allUsesAvailableAt() and + // LiveRangeEdit::canRematerializeAt(). + TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg, + Def->getOperand(0).getSubReg(), *Def, *TRI); + MachineInstr *NewMI = &*(--InsertPos); + LIS->InsertMachineInstrInMaps(*NewMI); + LIS->removeInterval(Reg); + LIS->createAndComputeVirtRegInterval(Reg); + InsertedMIToOldDef[NewMI] = Def; + + // Update region boundaries in scheduling region we sinked from since we + // may sink an instruction that was at the beginning or end of its region + updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr, + /*Removing =*/true); + + // Update region boundaries in region we sinked to. + updateRegionBoundaries(NewRegions, InsertPos, NewMI); + + LaneBitmask PrevMask = NewLiveIns[I][Reg]; + // FIXME: Also update cached pressure for where the def was sinked from. + // Update RP for all regions that has this reg as a live-in and remove + // the reg from all regions as a live-in. + for (auto Idx : RematDefToLiveInRegions[Def]) { + NewLiveIns[Idx].erase(Reg); + if (InsertPos->getParent() != Regions[Idx].first->getParent()) { + // Def is live-through and not used in this block. + NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), MRI); + } else { + // Def is used and rematerialized into this block. + GCNDownwardRPTracker RPT(*LIS); + auto *NonDbgMI = &*skipDebugInstructionsForward( + NewRegions[Idx].first, NewRegions[Idx].second); + RPT.reset(*NonDbgMI, &NewLiveIns[Idx]); + RPT.advance(NewRegions[Idx].second); + NewPressure[Idx] = RPT.moveMaxPressure(); + } + } + + SinkedDefs.push_back(Def); + ImproveOccupancy = NewPressure[I].getOccupancy(ST); + if (ImproveOccupancy > MinOccupancy) + break; + } + + // Remove defs we just sinked from all regions' list of sinkable defs + for (auto &Def : SinkedDefs) + for (auto TrackedIdx : RematDefToLiveInRegions[Def]) + RematerializableInsts[TrackedIdx].erase(Def); + + if (ImproveOccupancy <= MinOccupancy) + break; + + NewRescheduleRegions[I] = true; + Improved = true; + } + + if (!Improved) { + // Occupancy was not improved for all regions that were at MinOccupancy. + // Undo sinking and remove newly rematerialized instructions. + for (auto &Entry : InsertedMIToOldDef) { + MachineInstr *MI = Entry.first; + MachineInstr *OldMI = Entry.second; + Register Reg = MI->getOperand(0).getReg(); + LIS->RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + OldMI->clearRegisterDeads(Reg); + LIS->removeInterval(Reg); + LIS->createAndComputeVirtRegInterval(Reg); + } + return false; + } + + // Occupancy was improved for all regions. + for (auto &Entry : InsertedMIToOldDef) { + MachineInstr *MI = Entry.first; + MachineInstr *OldMI = Entry.second; + + // Remove OldMI from BBLiveInMap since we are sinking it from its MBB. + BBLiveInMap.erase(OldMI); + + // Remove OldMI and update LIS + Register Reg = MI->getOperand(0).getReg(); + LIS->RemoveMachineInstrFromMaps(*OldMI); + OldMI->eraseFromParent(); + LIS->removeInterval(Reg); + LIS->createAndComputeVirtRegInterval(Reg); + } + + // Update live-ins, register pressure, and regions caches. + for (auto Idx : ImpactedRegions) { + LiveIns[Idx] = NewLiveIns[Idx]; + Pressure[Idx] = NewPressure[Idx]; + MBBLiveIns.erase(Regions[Idx].first->getParent()); + } + Regions = NewRegions; + RescheduleRegions = NewRescheduleRegions; + + SIMachineFunctionInfo &MFI = *MF.getInfo(); + MFI.increaseOccupancy(MF, ++MinOccupancy); + + return true; +} + +// Copied from MachineLICM +bool GCNScheduleDAGMILive::isTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) { + if (!TII->isTriviallyReMaterializable(MI, AA)) + return false; + + for (const MachineOperand &MO : MI.operands()) + if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual()) + return false; + + return true; +} + +// When removing, we will have to check both beginning and ending of the region. +// When inserting, we will only have to check if we are inserting NewMI in front +// of a scheduling region and do not need to check the ending since we will only +// ever be inserting before an already existing MI. +void GCNScheduleDAGMILive::updateRegionBoundaries( + SmallVectorImpl> &RegionBoundaries, + MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) { + unsigned I = 0, E = RegionBoundaries.size(); + // Search for first region of the block where MI is located + while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent()) + ++I; + + for (; I != E; ++I) { + if (MI->getParent() != RegionBoundaries[I].first->getParent()) + return; + + if (Removing && MI == RegionBoundaries[I].first && + MI == RegionBoundaries[I].second) { + // MI is in a region with size 1, after removing, the region will be + // size 0, set RegionBegin and RegionEnd to pass end of block iterator. + RegionBoundaries[I] = + std::make_pair(MI->getParent()->end(), MI->getParent()->end()); + return; + } + if (MI == RegionBoundaries[I].first) { + if (Removing) + RegionBoundaries[I] = + std::make_pair(std::next(MI), RegionBoundaries[I].second); + else + // Inserted NewMI in front of region, set new RegionBegin to NewMI + RegionBoundaries[I] = std::make_pair(MachineBasicBlock::iterator(NewMI), + RegionBoundaries[I].second); + return; + } + if (Removing && MI == RegionBoundaries[I].second) { + RegionBoundaries[I] = + std::make_pair(RegionBoundaries[I].first, std::prev(MI)); + return; + } + } +} diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index a6e42ad3dfca..97f94f69b70e 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -14,6 +14,7 @@ #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H #include "GCNRegPressure.h" +#include "llvm/ADT/MapVector.h" #include "llvm/CodeGen/MachineScheduler.h" namespace llvm { @@ -77,7 +78,8 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive { InitialSchedule, UnclusteredReschedule, ClusteredLowOccupancyReschedule, - LastStage = ClusteredLowOccupancyReschedule + PreRARematerialize, + LastStage = PreRARematerialize }; const GCNSubtarget &ST; @@ -110,24 +112,56 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive { // Record regions with high register pressure. BitVector RegionsWithHighRP; + // Regions that has the same occupancy as the latest MinOccupancy + BitVector RegionsWithMinOcc; + // Region live-in cache. SmallVector LiveIns; // Region pressure cache. SmallVector Pressure; + // Each region at MinOccupancy will have their own list of trivially + // rematerializable instructions we can remat to reduce RP. The list maps an + // instruction to the position we should remat before, usually the MI using + // the rematerializable instruction. + MapVector> + RematerializableInsts; + + // Map a trivially remateriazable def to a list of regions at MinOccupancy + // that has the defined reg as a live-in. + DenseMap> RematDefToLiveInRegions; + // Temporary basic block live-in cache. DenseMap MBBLiveIns; DenseMap BBLiveInMap; DenseMap getBBLiveInMap() const; + // Collect all trivially rematerializable VGPR instructions with a single def + // and single use outside the defining block into RematerializableInsts. + void collectRematerializableInstructions(); + + bool isTriviallyReMaterializable(const MachineInstr &MI, AAResults *AA); + + // TODO: Should also attempt to reduce RP of SGPRs and AGPRs + // Attempt to reduce RP of VGPR by sinking trivially rematerializable + // instructions. Returns true if we were able to sink instruction(s). + bool sinkTriviallyRematInsts(const GCNSubtarget &ST, + const TargetInstrInfo *TII); + // Return current region pressure. GCNRegPressure getRealRegPressure() const; // Compute and cache live-ins and pressure for all regions in block. void computeBlockPressure(const MachineBasicBlock *MBB); + // Update region boundaries when removing MI or inserting NewMI before MI. + void updateRegionBoundaries( + SmallVectorImpl> &RegionBoundaries, + MachineBasicBlock::iterator MI, MachineInstr *NewMI, + bool Removing = false); public: GCNScheduleDAGMILive(MachineSchedContext *C, diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 0cd2cfa2f0e7..d269d0945f3b 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -58,133 +58,142 @@ protected: // Basic subtarget description. Triple TargetTriple; AMDGPU::IsaInfo::AMDGPUTargetID TargetID; - unsigned Gen; + unsigned Gen = INVALID; InstrItineraryData InstrItins; - int LDSBankCount; - unsigned MaxPrivateElementSize; + int LDSBankCount = 0; + unsigned MaxPrivateElementSize = 0; // Possibly statically set by tablegen, but may want to be overridden. - bool FastFMAF32; - bool FastDenormalF32; - bool HalfRate64Ops; - bool FullRate64Ops; + bool FastFMAF32 = false; + bool FastDenormalF32 = false; + bool HalfRate64Ops = false; + bool FullRate64Ops = false; // Dynamically set bits that enable features. - bool FlatForGlobal; - bool AutoWaitcntBeforeBarrier; - bool UnalignedScratchAccess; - bool UnalignedAccessMode; - bool HasApertureRegs; - bool SupportsXNACK; + bool FlatForGlobal = false; + bool AutoWaitcntBeforeBarrier = false; + bool UnalignedScratchAccess = false; + bool UnalignedAccessMode = false; + bool HasApertureRegs = false; + bool SupportsXNACK = false; // This should not be used directly. 'TargetID' tracks the dynamic settings // for XNACK. - bool EnableXNACK; + bool EnableXNACK = false; - bool EnableTgSplit; - bool EnableCuMode; - bool TrapHandler; + bool EnableTgSplit = false; + bool EnableCuMode = false; + bool TrapHandler = false; // Used as options. - bool EnableLoadStoreOpt; - bool EnableUnsafeDSOffsetFolding; - bool EnableSIScheduler; - bool EnableDS128; - bool EnablePRTStrictNull; - bool DumpCode; + bool EnableLoadStoreOpt = false; + bool EnableUnsafeDSOffsetFolding = false; + bool EnableSIScheduler = false; + bool EnableDS128 = false; + bool EnablePRTStrictNull = false; + bool DumpCode = false; // Subtarget statically properties set by tablegen - bool FP64; - bool FMA; - bool MIMG_R128; - bool CIInsts; - bool GFX8Insts; - bool GFX9Insts; - bool GFX90AInsts; - bool GFX10Insts; - bool GFX10_3Insts; - bool GFX7GFX8GFX9Insts; - bool SGPRInitBug; - bool NegativeScratchOffsetBug; - bool NegativeUnalignedScratchOffsetBug; - bool HasSMemRealTime; - bool HasIntClamp; - bool HasFmaMixInsts; - bool HasMovrel; - bool HasVGPRIndexMode; - bool HasScalarStores; - bool HasScalarAtomics; - bool HasSDWAOmod; - bool HasSDWAScalar; - bool HasSDWASdst; - bool HasSDWAMac; - bool HasSDWAOutModsVOPC; - bool HasDPP; - bool HasDPP8; - bool Has64BitDPP; - bool HasPackedFP32Ops; - bool HasExtendedImageInsts; - bool HasR128A16; - bool HasGFX10A16; - bool HasG16; - bool HasNSAEncoding; - unsigned NSAMaxSize; - bool GFX10_AEncoding; - bool GFX10_BEncoding; - bool HasDLInsts; - bool HasDot1Insts; - bool HasDot2Insts; - bool HasDot3Insts; - bool HasDot4Insts; - bool HasDot5Insts; - bool HasDot6Insts; - bool HasDot7Insts; - bool HasMAIInsts; - bool HasPkFmacF16Inst; - bool HasAtomicFaddInsts; - bool SupportsSRAMECC; + bool FP64 = false; + bool FMA = false; + bool MIMG_R128 = false; + bool CIInsts = false; + bool GFX8Insts = false; + bool GFX9Insts = false; + bool GFX90AInsts = false; + bool GFX940Insts = false; + bool GFX10Insts = false; + bool GFX11Insts = false; + bool GFX10_3Insts = false; + bool GFX7GFX8GFX9Insts = false; + bool SGPRInitBug = false; + bool UserSGPRInit16Bug = false; + bool NegativeScratchOffsetBug = false; + bool NegativeUnalignedScratchOffsetBug = false; + bool HasSMemRealTime = false; + bool HasIntClamp = false; + bool HasFmaMixInsts = false; + bool HasMovrel = false; + bool HasVGPRIndexMode = false; + bool HasScalarStores = false; + bool HasScalarAtomics = false; + bool HasSDWAOmod = false; + bool HasSDWAScalar = false; + bool HasSDWASdst = false; + bool HasSDWAMac = false; + bool HasSDWAOutModsVOPC = false; + bool HasDPP = false; + bool HasDPP8 = false; + bool Has64BitDPP = false; + bool HasPackedFP32Ops = false; + bool HasImageInsts = false; + bool HasExtendedImageInsts = false; + bool HasR128A16 = false; + bool HasGFX10A16 = false; + bool HasG16 = false; + bool HasNSAEncoding = false; + unsigned NSAMaxSize = 0; + bool GFX10_AEncoding = false; + bool GFX10_BEncoding = false; + bool HasDLInsts = false; + bool HasDot1Insts = false; + bool HasDot2Insts = false; + bool HasDot3Insts = false; + bool HasDot4Insts = false; + bool HasDot5Insts = false; + bool HasDot6Insts = false; + bool HasDot7Insts = false; + bool HasDot8Insts = false; + bool HasMAIInsts = false; + bool HasPkFmacF16Inst = false; + bool HasAtomicFaddRtnInsts = false; + bool HasAtomicFaddNoRtnInsts = false; + bool HasAtomicPkFaddNoRtnInsts = false; + bool SupportsSRAMECC = false; // This should not be used directly. 'TargetID' tracks the dynamic settings // for SRAMECC. - bool EnableSRAMECC; - - bool HasNoSdstCMPX; - bool HasVscnt; - bool HasGetWaveIdInst; - bool HasSMemTimeInst; - bool HasShaderCyclesRegister; - bool HasVOP3Literal; - bool HasNoDataDepHazard; - bool FlatAddressSpace; - bool FlatInstOffsets; - bool FlatGlobalInsts; - bool FlatScratchInsts; - bool ScalarFlatScratchInsts; - bool HasArchitectedFlatScratch; - bool AddNoCarryInsts; - bool HasUnpackedD16VMem; - bool LDSMisalignedBug; - bool HasMFMAInlineLiteralBug; - bool UnalignedBufferAccess; - bool UnalignedDSAccess; - bool HasPackedTID; - bool ScalarizeGlobal; - - bool HasVcmpxPermlaneHazard; - bool HasVMEMtoScalarWriteHazard; - bool HasSMEMtoVectorWriteHazard; - bool HasInstFwdPrefetchBug; - bool HasVcmpxExecWARHazard; - bool HasLdsBranchVmemWARHazard; - bool HasNSAtoVMEMBug; - bool HasNSAClauseBug; - bool HasOffset3fBug; - bool HasFlatSegmentOffsetBug; - bool HasImageStoreD16Bug; - bool HasImageGather4D16Bug; + bool EnableSRAMECC = false; + + bool HasNoSdstCMPX = false; + bool HasVscnt = false; + bool HasGetWaveIdInst = false; + bool HasSMemTimeInst = false; + bool HasShaderCyclesRegister = false; + bool HasVOP3Literal = false; + bool HasNoDataDepHazard = false; + bool FlatAddressSpace = false; + bool FlatInstOffsets = false; + bool FlatGlobalInsts = false; + bool FlatScratchInsts = false; + bool ScalarFlatScratchInsts = false; + bool HasArchitectedFlatScratch = false; + bool EnableFlatScratch = false; + bool AddNoCarryInsts = false; + bool HasUnpackedD16VMem = false; + bool LDSMisalignedBug = false; + bool HasMFMAInlineLiteralBug = false; + bool UnalignedBufferAccess = false; + bool UnalignedDSAccess = false; + bool HasPackedTID = false; + bool ScalarizeGlobal = false; + + bool HasVcmpxPermlaneHazard = false; + bool HasVMEMtoScalarWriteHazard = false; + bool HasSMEMtoVectorWriteHazard = false; + bool HasInstFwdPrefetchBug = false; + bool HasVcmpxExecWARHazard = false; + bool HasLdsBranchVmemWARHazard = false; + bool HasNSAtoVMEMBug = false; + bool HasNSAClauseBug = false; + bool HasOffset3fBug = false; + bool HasFlatSegmentOffsetBug = false; + bool HasImageStoreD16Bug = false; + bool HasImageGather4D16Bug = false; + bool HasVOPDInsts = false; // Dummy feature to use for assembler in tablegen. - bool FeatureDisable; + bool FeatureDisable = false; SelectionDAGTargetInfo TSInfo; private: @@ -193,9 +202,6 @@ private: SIFrameLowering FrameLowering; public: - // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword. - static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1); - GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM); ~GCNSubtarget() override; @@ -258,9 +264,19 @@ public: return (Generation)Gen; } + unsigned getMaxWaveScratchSize() const { + // See COMPUTE_TMPRING_SIZE.WAVESIZE. + if (getGeneration() < GFX11) { + // 13-bit field in units of 256-dword. + return (256 * 4) * ((1 << 13) - 1); + } + // 15-bit field in units of 64-dword. + return (64 * 4) * ((1 << 15) - 1); + } + /// Return the number of high bits known to be zero for a frame index. unsigned getKnownHighZeroBitsForFrameIndex() const { - return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2(); + return countLeadingZeros(getMaxWaveScratchSize()) + getWavefrontSizeLog2(); } int getLDSBankCount() const { @@ -558,13 +574,20 @@ public: // The ST addressing mode means no registers are used, either VGPR or SGPR, // but only immediate offset is swizzled and added to the FLAT scratch base. bool hasFlatScratchSTMode() const { - return hasFlatScratchInsts() && hasGFX10_3Insts(); + return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts()); } + bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; } + bool hasScalarFlatScratchInsts() const { return ScalarFlatScratchInsts; } + bool enableFlatScratch() const { + return flatScratchIsArchitected() || + (EnableFlatScratch && hasFlatScratchInsts()); + } + bool hasGlobalAddTidInsts() const { return GFX10_BEncoding; } @@ -690,6 +713,10 @@ public: return HasDot7Insts; } + bool hasDot8Insts() const { + return HasDot8Insts; + } + bool hasMAIInsts() const { return HasMAIInsts; } @@ -699,9 +726,15 @@ public: } bool hasAtomicFaddInsts() const { - return HasAtomicFaddInsts; + return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts; } + bool hasAtomicFaddRtnInsts() const { return HasAtomicFaddRtnInsts; } + + bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; } + + bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; } + bool hasNoSdstCMPX() const { return HasNoSdstCMPX; } @@ -765,8 +798,6 @@ public: return true; } - bool enableFlatScratch() const; - void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override; @@ -805,6 +836,9 @@ public: /// \returns true if the subtarget has the v_permlanex16_b32 instruction. bool hasPermLaneX16() const { return getGeneration() >= GFX10; } + /// \returns true if the subtarget has the v_permlane64_b32 instruction. + bool hasPermLane64() const { return getGeneration() >= GFX11; } + bool hasDPP() const { return HasDPP; } @@ -830,7 +864,11 @@ public: } bool hasFmaakFmamkF32Insts() const { - return getGeneration() >= GFX10; + return getGeneration() >= GFX10 || hasGFX940Insts(); + } + + bool hasImageInsts() const { + return HasImageInsts; } bool hasExtendedImageInsts() const { @@ -875,6 +913,10 @@ public: bool hasMadF16() const; + bool hasMovB64() const { return GFX940Insts; } + + bool hasLshlAddB64() const { return GFX940Insts; } + bool enableSIScheduler() const { return EnableSIScheduler; } @@ -887,6 +929,10 @@ public: return SGPRInitBug; } + bool hasUserSGPRInit16Bug() const { + return UserSGPRInit16Bug; + } + bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; } bool hasNegativeUnalignedScratchOffsetBug() const { @@ -915,6 +961,14 @@ public: getGeneration() <= AMDGPUSubtarget::GFX9; } + bool hasReadM0LdsDmaHazard() const { + return getGeneration() == AMDGPUSubtarget::GFX9; + } + + bool hasReadM0LdsDirectHazard() const { + return getGeneration() == AMDGPUSubtarget::GFX9; + } + bool hasVcmpxPermlaneHazard() const { return HasVcmpxPermlaneHazard; } @@ -943,6 +997,22 @@ public: return HasLdsBranchVmemWARHazard; } + // Has one cycle hazard on transcendental instruction feeding a + // non transcendental VALU. + bool hasTransForwardingHazard() const { return GFX940Insts; } + + // Has one cycle hazard on a VALU instruction partially writing dst with + // a shift of result bits feeding another VALU instruction. + bool hasDstSelForwardingHazard() const { return GFX940Insts; } + + // Cannot use op_sel with v_dot instructions. + bool hasDOTOpSelHazard() const { return GFX940Insts; } + + // Does not have HW interlocs for VALU writing and then reading SGPRs. + bool hasVDecCoExecHazard() const { + return GFX940Insts; + } + bool hasNSAtoVMEMBug() const { return HasNSAtoVMEMBug; } @@ -953,11 +1023,43 @@ public: bool hasGFX90AInsts() const { return GFX90AInsts; } + bool hasVOP3DPP() const { return getGeneration() >= GFX11; } + + bool hasLdsDirect() const { return getGeneration() >= GFX11; } + + bool hasVALUPartialForwardingHazard() const { + return getGeneration() >= GFX11; + } + + bool hasVALUTransUseHazard() const { return getGeneration() >= GFX11; } + /// Return if operations acting on VGPR tuples require even alignment. bool needsAlignedVGPRs() const { return GFX90AInsts; } + /// Return true if the target has the S_PACK_HL_B32_B16 instruction. + bool hasSPackHL() const { return GFX11Insts; } + + /// Return true if the target's EXP instruction has the COMPR flag, which + /// affects the meaning of the EN (enable) bits. + bool hasCompressedExport() const { return !GFX11Insts; } + + /// Return true if the target's EXP instruction supports the NULL export + /// target. + bool hasNullExportTarget() const { return !GFX11Insts; } + + bool hasVOPDInsts() const { return HasVOPDInsts; } + + bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; } + + /// Return true if the target has the S_DELAY_ALU instruction. + bool hasDelayAlu() const { return GFX11Insts; } + bool hasPackedTID() const { return HasPackedTID; } + // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that + // hasGFX90AInsts is also true. + bool hasGFX940Insts() const { return GFX940Insts; } + /// Return the maximum number of waves per SIMD for kernels using \p SGPRs /// SGPRs unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; @@ -989,6 +1091,9 @@ public: return getGeneration() >= GFX9; } + // \returns true if the target supports the pre-NGG legacy geometry path. + bool hasLegacyGeometry() const { return getGeneration() < GFX11; } + /// \returns SGPR allocation granularity supported by the subtarget. unsigned getSGPRAllocGranule() const { return AMDGPU::IsaInfo::getSGPRAllocGranule(this); @@ -1105,6 +1210,10 @@ public: /// unit requirement. unsigned getMaxNumVGPRs(const Function &F) const; + unsigned getMaxNumAGPRs(const Function &F) const { + return getMaxNumVGPRs(F); + } + /// \returns Maximum number of VGPRs that meets number of waves per execution /// unit requirement for function \p MF, or number of VGPRs explicitly /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. @@ -1165,6 +1274,10 @@ public: void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override; + + // \returns true if it's beneficial on this subtarget for the scheduler to + // cluster stores as well as loads. + bool shouldClusterStores() const { return getGeneration() >= GFX11; } }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td new file mode 100644 index 000000000000..1f65376890da --- /dev/null +++ b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td @@ -0,0 +1,116 @@ +//===-- LDSDIRInstructions.td - LDS Direct Instruction Definitions --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LDSDIR encoding +//===----------------------------------------------------------------------===// + +class LDSDIRe op, bit is_direct> : Enc32 { + // encoding fields + bits<2> attrchan; + bits<6> attr; + bits<4> waitvdst; + bits<8> vdst; + + // encoding + let Inst{31-24} = 0xce; // encoding + let Inst{23-22} = 0x0; // reserved + let Inst{21-20} = op; + let Inst{19-16} = waitvdst; + let Inst{15-10} = !if(is_direct, ?, attr); + let Inst{9-8} = !if(is_direct, ?, attrchan); + let Inst{7-0} = vdst; +} + +//===----------------------------------------------------------------------===// +// LDSDIR Classes +//===----------------------------------------------------------------------===// + +class LDSDIR_getIns { + dag ret = !if(direct, + (ins wait_vdst:$waitvdst), + (ins Attr:$attr, AttrChan:$attrchan, wait_vdst:$waitvdst) + ); +} + +class LDSDIR_Common : InstSI< + (outs VGPR_32:$vdst), + LDSDIR_getIns.ret, + asm> { + let LDSDIR = 1; + let EXP_CNT = 1; + + let hasSideEffects = 0; + let mayLoad = 1; + let mayStore = 0; + + string Mnemonic = opName; + let UseNamedOperandTable = 1; + + let Uses = [M0, EXEC]; + let DisableWQM = 0; + let SchedRW = [WriteLDS]; + + bit is_direct; + let is_direct = direct; +} + +class LDSDIR_Pseudo : + LDSDIR_Common, + SIMCInstr { + let isPseudo = 1; + let isCodeGenOnly = 1; +} + +class LDSDIR_getAsm { + string ret = !if(direct, + " $vdst$waitvdst", + " $vdst, $attr$attrchan$waitvdst" + ); +} + +class LDSDIR_Real op, LDSDIR_Pseudo lds, int subtarget> : + LDSDIR_Common.ret, + lds.is_direct>, + SIMCInstr , + LDSDIRe { + let isPseudo = 0; + let isCodeGenOnly = 0; +} + +//===----------------------------------------------------------------------===// +// LDS Direct Instructions +//===----------------------------------------------------------------------===// + +def LDS_DIRECT_LOAD : LDSDIR_Pseudo<"lds_direct_load", 1>; +def LDS_PARAM_LOAD : LDSDIR_Pseudo<"lds_param_load", 0>; + +def : GCNPat < + (f32 (int_amdgcn_lds_direct_load M0)), + (LDS_DIRECT_LOAD 0) +>; + +def : GCNPat < + (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)), + (LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0) +>; + +//===----------------------------------------------------------------------===// +// GFX11+ +//===----------------------------------------------------------------------===// + +multiclass LDSDIR_Real_gfx11 op, LDSDIR_Pseudo lds = !cast(NAME)> { + def _gfx11 : LDSDIR_Real { + let AssemblerPredicate = isGFX11Plus; + let DecoderNamespace = "GFX11"; + } +} + +defm LDS_PARAM_LOAD : LDSDIR_Real_gfx11<0x0>; +defm LDS_DIRECT_LOAD : LDSDIR_Real_gfx11<0x1>; diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp index 912bcc792e4d..24c9cc2d7dd2 100644 --- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp +++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp @@ -239,9 +239,9 @@ void AMDGPUCustomBehaviour::generateWaitCntInfo() { AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU()); InstrWaitCntInfo.resize(SrcMgr.size()); - int Index = 0; - for (auto I = SrcMgr.begin(), E = SrcMgr.end(); I != E; ++I, ++Index) { - const std::unique_ptr &Inst = *I; + for (const auto &EN : llvm::enumerate(SrcMgr.getInstructions())) { + const std::unique_ptr &Inst = EN.value(); + unsigned Index = EN.index(); unsigned Opcode = Inst->getOpcode(); const MCInstrDesc &MCID = MCII.get(Opcode); if ((MCID.TSFlags & SIInstrFlags::DS) && diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h index 56650515bd0a..7a0d454c3578 100644 --- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h +++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h @@ -31,7 +31,7 @@ public: AMDGPUInstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII) : InstrPostProcess(STI, MCII) {} - ~AMDGPUInstrPostProcess() {} + ~AMDGPUInstrPostProcess() = default; void postProcessInstruction(std::unique_ptr &Inst, const MCInst &MCI) override; @@ -86,7 +86,7 @@ public: AMDGPUCustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII); - ~AMDGPUCustomBehaviour() {} + ~AMDGPUCustomBehaviour() = default; /// This method is used to determine if an instruction /// should be allowed to be dispatched. The return value is /// how many cycles until the instruction can be dispatched. diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 50318a59225d..bda3c25e956b 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -10,13 +10,16 @@ #include "MCTargetDesc/AMDGPUFixupKinds.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/EndianStream.h" +#include "llvm/Support/TargetParser.h" using namespace llvm; using namespace llvm::AMDGPU; @@ -47,7 +50,10 @@ public: bool writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const override; + Optional getFixupKind(StringRef Name) const override; const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; }; } //End anonymous namespace @@ -134,6 +140,9 @@ void AMDGPUAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, MutableArrayRef Data, uint64_t Value, bool IsResolved, const MCSubtargetInfo *STI) const { + if (Fixup.getKind() >= FirstLiteralRelocationKind) + return; + Value = adjustFixupValue(Fixup, Value, &Asm.getContext()); if (!Value) return; // Doesn't change encoding. @@ -153,6 +162,15 @@ void AMDGPUAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, Data[Offset + i] |= static_cast((Value >> (i * 8)) & 0xff); } +Optional AMDGPUAsmBackend::getFixupKind(StringRef Name) const { + return StringSwitch>(Name) +#define ELF_RELOC(Name, Value) \ + .Case(#Name, MCFixupKind(FirstLiteralRelocationKind + Value)) +#include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def" +#undef ELF_RELOC + .Default(None); +} + const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo( MCFixupKind Kind) const { const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = { @@ -160,12 +178,21 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo( { "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, }; + if (Kind >= FirstLiteralRelocationKind) + return MCAsmBackend::getFixupKindInfo(FK_NONE); + if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); return Infos[Kind - FirstTargetFixupKind]; } +bool AMDGPUAsmBackend::shouldForceRelocation(const MCAssembler &, + const MCFixup &Fixup, + const MCValue &) { + return Fixup.getKind() >= FirstLiteralRelocationKind; +} + unsigned AMDGPUAsmBackend::getMinimumNopSize() const { return 4; } @@ -236,5 +263,5 @@ MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI, const MCTargetOptions &Options) { return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple(), - getHsaAbiVersion(&STI).getValueOr(0)); + getHsaAbiVersion(&STI).value_or(0)); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index bb2c298c2850..066b36622a16 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -65,7 +65,10 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AMDGPU_REL64; } - switch (Fixup.getKind()) { + MCFixupKind Kind = Fixup.getKind(); + if (Kind >= FirstLiteralRelocationKind) + return Kind - FirstLiteralRelocationKind; + switch (Kind) { default: break; case FK_PCRel_4: return ELF::R_AMDGPU_REL32; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 76663b563150..bd938d829953 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -120,14 +120,6 @@ void AMDGPUInstPrinter::printAddr64(const MCInst *MI, unsigned OpNo, printNamedBit(MI, OpNo, O, "addr64"); } -void AMDGPUInstPrinter::printMBUFOffset(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - if (MI->getOperand(OpNo).getImm()) { - O << " offset:"; - printU16ImmDecOperand(MI, OpNo, O); - } -} - void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -152,7 +144,7 @@ void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo, if (IsFlatSeg) { // Unsigned offset printU16ImmDecOperand(MI, OpNo, O); } else { // Signed offset - if (AMDGPU::isGFX10Plus(STI)) { + if (AMDGPU::isGFX10(STI)) { O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm())); } else { O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm())); @@ -191,6 +183,13 @@ void AMDGPUInstPrinter::printSMEMOffset(const MCInst *MI, unsigned OpNo, O << formatHex(MI->getOperand(OpNo).getImm()); } +void AMDGPUInstPrinter::printSMEMOffsetMod(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + O << " offset:"; + printSMEMOffset(MI, OpNo, STI, O); +} + void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -206,13 +205,15 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { auto Imm = MI->getOperand(OpNo).getImm(); if (Imm & CPol::GLC) - O << " glc"; + O << ((AMDGPU::isGFX940(STI) && + !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0" + : " glc"); if (Imm & CPol::SLC) - O << " slc"; + O << (AMDGPU::isGFX940(STI) ? " nt" : " slc"); if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI)) O << " dlc"; if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI)) - O << " scc"; + O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc"); if (Imm & ~CPol::ALL) O << " /* unexpected cache policy bit */"; } @@ -309,8 +310,8 @@ void AMDGPUInstPrinter::printSymbolicFormat(const MCInst *MI, if (AMDGPU::isGFX10Plus(STI)) { if (Val == UFMT_DEFAULT) return; - if (isValidUnifiedFormat(Val)) { - O << " format:[" << getUnifiedFormatName(Val) << ']'; + if (isValidUnifiedFormat(Val, STI)) { + O << " format:[" << getUnifiedFormatName(Val, STI) << ']'; } else { O << " format:" << Val; } @@ -362,27 +363,26 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, } void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, - raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { auto Opcode = MI->getOpcode(); auto Flags = MII.get(Opcode).TSFlags; - if (OpNo == 0) { - if (Flags & SIInstrFlags::VOP3) { + if (Flags & SIInstrFlags::VOP3 && Flags & SIInstrFlags::DPP) + O << "_e64_dpp"; + else if (Flags & SIInstrFlags::VOP3) { if (!getVOP3IsSingle(Opcode)) O << "_e64"; - } else if (Flags & SIInstrFlags::DPP) { + } else if (Flags & SIInstrFlags::DPP) O << "_dpp"; - } else if (Flags & SIInstrFlags::SDWA) { + else if (Flags & SIInstrFlags::SDWA) O << "_sdwa"; - } else if (((Flags & SIInstrFlags::VOP1) && !getVOP1IsSingle(Opcode)) || - ((Flags & SIInstrFlags::VOP2) && !getVOP2IsSingle(Opcode))) { + else if (((Flags & SIInstrFlags::VOP1) && !getVOP1IsSingle(Opcode)) || + ((Flags & SIInstrFlags::VOP2) && !getVOP2IsSingle(Opcode))) O << "_e32"; - } O << " "; } - printOperand(MI, OpNo, STI, O); + printRegularOperand(MI, OpNo, STI, O); // Print default vcc/vcc_lo operand. switch (Opcode) { @@ -400,7 +400,16 @@ void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo, case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10: case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10: - printDefaultVccOperand(1, STI, O); + case AMDGPU::V_ADD_CO_CI_U32_e32_gfx11: + case AMDGPU::V_SUB_CO_CI_U32_e32_gfx11: + case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx11: + case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx11: + case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx11: + case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx11: + case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11: + case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11: + case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11: + printDefaultVccOperand(false, STI, O); break; } } @@ -412,7 +421,7 @@ void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo, else O << "_e32 "; - printOperand(MI, OpNo, STI, O); + printRegularOperand(MI, OpNo, STI, O); } void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm, @@ -533,7 +542,7 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) O << "0.15915494309189532"; else { - assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882); + assert(isUInt<32>(Imm) || isInt<32>(Imm)); // In rare situations, we will have a 32-bit literal in a 64-bit // operand. This is technically allowed for the encoding of s_mov_b64. @@ -548,6 +557,18 @@ void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo, if (!Imm) return; + if (AMDGPU::isGFX940(STI)) { + switch (MI->getOpcode()) { + case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: + case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: + case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: + case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: + O << " neg:[" << (Imm & 1) << ',' << ((Imm >> 1) & 1) << ',' + << ((Imm >> 2) & 1) << ']'; + return; + } + } + O << " blgp:" << Imm; } @@ -571,26 +592,73 @@ void AMDGPUInstPrinter::printABID(const MCInst *MI, unsigned OpNo, O << " abid:" << Imm; } -void AMDGPUInstPrinter::printDefaultVccOperand(unsigned OpNo, +void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand, const MCSubtargetInfo &STI, raw_ostream &O) { - if (OpNo > 0) + if (!FirstOperand) O << ", "; - printRegOperand(STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ? - AMDGPU::VCC : AMDGPU::VCC_LO, O, MRI); - if (OpNo == 0) + printRegOperand(STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] + ? AMDGPU::VCC + : AMDGPU::VCC_LO, + O, MRI); + if (FirstOperand) O << ", "; } +void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + uint8_t Imm = MI->getOperand(OpNo).getImm(); + if (Imm != 0) { + O << " wait_vdst:"; + printU4ImmDecOperand(MI, OpNo, O); + } +} + +void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + uint8_t Imm = MI->getOperand(OpNo).getImm(); + if (Imm != 0) { + O << " wait_exp:"; + printU4ImmDecOperand(MI, OpNo, O); + } +} + +bool AMDGPUInstPrinter::needsImpliedVcc(const MCInstrDesc &Desc, + unsigned OpNo) const { + return OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) && + (Desc.TSFlags & SIInstrFlags::VOPC) && + (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) || + Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO)); +} + +// Print default vcc/vcc_lo operand of VOPC. void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - // Print default vcc/vcc_lo operand of VOPC. - const MCInstrDesc &Desc = MII.get(MI->getOpcode()); - if (OpNo == 0 && (Desc.TSFlags & SIInstrFlags::VOPC) && + unsigned Opc = MI->getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); + // 0, 1 and 2 are the first printed operands in different cases + // If there are printed modifiers, printOperandAndFPInputMods or + // printOperandAndIntInputMods will be called instead + if ((OpNo == 0 || + (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP)) || + (OpNo == 2 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) && + (Desc.TSFlags & SIInstrFlags::VOPC) && (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) || Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO))) - printDefaultVccOperand(OpNo, STI, O); + printDefaultVccOperand(true, STI, O); + + printRegularOperand(MI, OpNo, STI, O); +} + +// Print operands after vcc or modifier handling. +void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); if (OpNo >= MI->getNumOperands()) { O << "/*Missing OP" << OpNo << "*/"; @@ -710,12 +778,24 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10: case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10: + case AMDGPU::V_CNDMASK_B32_e32_gfx11: + case AMDGPU::V_ADD_CO_CI_U32_e32_gfx11: + case AMDGPU::V_SUB_CO_CI_U32_e32_gfx11: + case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx11: + case AMDGPU::V_CNDMASK_B32_dpp_gfx11: + case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx11: + case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx11: + case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx11: + case AMDGPU::V_CNDMASK_B32_dpp8_gfx11: + case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11: + case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11: + case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11: case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7: case AMDGPU::V_CNDMASK_B32_e32_vi: if ((int)OpNo == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::src1)) - printDefaultVccOperand(OpNo, STI, O); + printDefaultVccOperand(OpNo == 0, STI, O); break; } @@ -732,6 +812,10 @@ void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + if (needsImpliedVcc(Desc, OpNo)) + printDefaultVccOperand(true, STI, O); + unsigned InputModifiers = MI->getOperand(OpNo).getImm(); // Use 'neg(...)' instead of '-' to avoid ambiguity. @@ -754,7 +838,7 @@ void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI, if (InputModifiers & SISrcMods::ABS) O << '|'; - printOperand(MI, OpNo + 1, STI, O); + printRegularOperand(MI, OpNo + 1, STI, O); if (InputModifiers & SISrcMods::ABS) O << '|'; @@ -767,10 +851,14 @@ void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + if (needsImpliedVcc(Desc, OpNo)) + printDefaultVccOperand(true, STI, O); + unsigned InputModifiers = MI->getOperand(OpNo).getImm(); if (InputModifiers & SISrcMods::SEXT) O << "sext("; - printOperand(MI, OpNo + 1, STI, O); + printRegularOperand(MI, OpNo + 1, STI, O); if (InputModifiers & SISrcMods::SEXT) O << ')'; @@ -784,7 +872,7 @@ void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI, case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10: if ((int)OpNo + 1 == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::src1)) - printDefaultVccOperand(OpNo, STI, O); + printDefaultVccOperand(OpNo == 0, STI, O); break; } } @@ -1203,9 +1291,9 @@ void AMDGPUInstPrinter::printVGPRIndexMode(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - printOperand(MI, OpNo, STI, O); + printRegularOperand(MI, OpNo, STI, O); O << ", "; - printOperand(MI, OpNo + 1, STI, O); + printRegularOperand(MI, OpNo + 1, STI, O); } void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo, @@ -1262,15 +1350,16 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo, uint16_t MsgId; uint16_t OpId; uint16_t StreamId; - decodeMsg(Imm16, MsgId, OpId, StreamId); + decodeMsg(Imm16, MsgId, OpId, StreamId, STI); - if (isValidMsgId(MsgId, STI) && - isValidMsgOp(MsgId, OpId, STI) && + StringRef MsgName = getMsgName(MsgId, STI); + + if (!MsgName.empty() && isValidMsgOp(MsgId, OpId, STI) && isValidMsgStream(MsgId, OpId, StreamId, STI)) { - O << "sendmsg(" << getMsgName(MsgId); - if (msgRequiresOp(MsgId)) { - O << ", " << getMsgOpName(MsgId, OpId); - if (msgSupportsStream(MsgId, OpId)) { + O << "sendmsg(" << MsgName; + if (msgRequiresOp(MsgId, STI)) { + O << ", " << getMsgOpName(MsgId, OpId, STI); + if (msgSupportsStream(MsgId, OpId, STI)) { O << ", " << StreamId; } } @@ -1423,6 +1512,76 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo, } } +void AMDGPUInstPrinter::printDepCtr(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + using namespace llvm::AMDGPU::DepCtr; + + uint64_t Imm16 = MI->getOperand(OpNo).getImm() & 0xffff; + + bool HasNonDefaultVal = false; + if (isSymbolicDepCtrEncoding(Imm16, HasNonDefaultVal, STI)) { + int Id = 0; + StringRef Name; + unsigned Val; + bool IsDefault; + bool NeedSpace = false; + while (decodeDepCtr(Imm16, Id, Name, Val, IsDefault, STI)) { + if (!IsDefault || !HasNonDefaultVal) { + if (NeedSpace) + O << ' '; + O << Name << '(' << Val << ')'; + NeedSpace = true; + } + } + } else { + O << formatHex(Imm16); + } +} + +void AMDGPUInstPrinter::printDelayFlag(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const char *BadInstId = "/* invalid instid value */"; + static const std::array InstIds = { + "NO_DEP", "VALU_DEP_1", "VALU_DEP_2", + "VALU_DEP_3", "VALU_DEP_4", "TRANS32_DEP_1", + "TRANS32_DEP_2", "TRANS32_DEP_3", "FMA_ACCUM_CYCLE_1", + "SALU_CYCLE_1", "SALU_CYCLE_2", "SALU_CYCLE_3"}; + + const char *BadInstSkip = "/* invalid instskip value */"; + static const std::array InstSkips = { + "SAME", "NEXT", "SKIP_1", "SKIP_2", "SKIP_3", "SKIP_4"}; + + unsigned SImm16 = MI->getOperand(OpNo).getImm(); + const char *Prefix = ""; + + unsigned Value = SImm16 & 0xF; + if (Value) { + const char *Name = Value < InstIds.size() ? InstIds[Value] : BadInstId; + O << Prefix << "instid0(" << Name << ')'; + Prefix = " | "; + } + + Value = (SImm16 >> 4) & 7; + if (Value) { + const char *Name = + Value < InstSkips.size() ? InstSkips[Value] : BadInstSkip; + O << Prefix << "instskip(" << Name << ')'; + Prefix = " | "; + } + + Value = (SImm16 >> 7) & 0xF; + if (Value) { + const char *Name = Value < InstIds.size() ? InstIds[Value] : BadInstId; + O << Prefix << "instid1(" << Name << ')'; + Prefix = " | "; + } + + if (!*Prefix) + O << "0"; +} + void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Id; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index 71db0beba0b6..202edeee3cb3 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -15,6 +15,7 @@ #include "llvm/MC/MCInstPrinter.h" namespace llvm { +class MCInstrDesc; class AMDGPUInstPrinter : public MCInstPrinter { public: @@ -50,7 +51,6 @@ private: void printOffen(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printFlatOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, @@ -64,6 +64,8 @@ private: const MCSubtargetInfo &STI, raw_ostream &O); void printSMEMOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printSMEMOffsetMod(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, @@ -116,6 +118,8 @@ private: raw_ostream &O); void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printRegularOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { printOperand(MI, OpNum, STI, O); @@ -172,8 +176,13 @@ private: raw_ostream &O); void printABID(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); - void printDefaultVccOperand(unsigned OpNo, const MCSubtargetInfo &STI, + bool needsImpliedVcc(const MCInstrDesc &Desc, unsigned OpNo) const; + void printDefaultVccOperand(bool FirstOperand, const MCSubtargetInfo &STI, raw_ostream &O); + void printWaitVDST(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); + void printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printExpSrcN(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O, unsigned N); @@ -234,6 +243,10 @@ protected: raw_ostream &O); void printWaitFlag(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printDepCtr(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); + void printDelayFlag(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printHwreg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printEndpgm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h index 53c724f2211a..02c213f90f89 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -14,8 +14,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCCODEEMITTER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCCODEEMITTER_H +#include "llvm/ADT/APInt.h" #include "llvm/MC/MCCodeEmitter.h" -#include namespace llvm { @@ -34,46 +34,34 @@ protected: AMDGPUMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {} public: + void getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl &Fixups, + APInt &Inst, APInt &Scratch, + const MCSubtargetInfo &STI) const; - uint64_t getBinaryCodeForInstr(const MCInst &MI, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + virtual void getMachineOpValue(const MCInst &MI, const MCOperand &MO, + APInt &Op, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const = 0; - virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 0; - } + virtual void getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const = 0; - virtual unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, + virtual void getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 0; - } + const MCSubtargetInfo &STI) const = 0; - virtual unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 0; - } + virtual void getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const = 0; - virtual unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + virtual void getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, + APInt &Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 0; - } - - virtual unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 0; - } - - virtual unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 0; - } + const MCSubtargetInfo &STI) const = 0; + + virtual void getAVOperandEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const = 0; protected: FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 1f917cd91b47..11fe3f9ef058 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -19,6 +19,7 @@ #include "R600InstPrinter.h" #include "R600MCTargetDesc.h" #include "TargetInfo/AMDGPUTargetInfo.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCELFStreamer.h" @@ -27,6 +28,7 @@ #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h index e5cce6045c8c..060d4b660632 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -33,7 +33,6 @@ enum AMDGPUDwarfFlavour : unsigned { Wave64 = 0, Wave32 = 1 }; MCRegisterInfo *createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour); MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCAsmBackend *createAMDGPUAsmBackend(const Target &T, @@ -51,7 +50,6 @@ createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, #define GET_INSTRINFO_ENUM #define GET_INSTRINFO_OPERAND_ENUM -#define GET_INSTRINFO_SCHED_ENUM #include "AMDGPUGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 7aa5f1abf65b..078133469549 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -17,12 +17,16 @@ #include "Utils/AMDKernelCodeTUtils.h" #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/AMDHSAKernelDescriptor.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetParser.h" using namespace llvm; using namespace llvm::AMDGPU; @@ -102,6 +106,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break; @@ -112,6 +117,11 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break; case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; } @@ -165,6 +175,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909; case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A; case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C; + case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940; case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011; case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012; @@ -175,6 +186,11 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033; case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034; case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035; + case GK_GFX1036: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036; + case GK_GFX1100: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100; + case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101; + case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102; + case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103; case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; } @@ -285,7 +301,7 @@ bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { uint32_t Encoded_pad = Encoded_s_code_end; // Instruction cache line size in bytes. - const unsigned Log2CacheLineSize = 6; + const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6; const unsigned CacheLineSize = 1u << Log2CacheLineSize; // Extra padding amount in bytes to support prefetch mode 3. @@ -439,6 +455,8 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( PRINT_FIELD(OS, ".amdhsa_forward_progress", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS); + PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3, + amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); } PRINT_FIELD( OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, @@ -515,8 +533,8 @@ void AMDGPUTargetELFStreamer::EmitNote( if (STI.getTargetTriple().getOS() == Triple::AMDHSA) NoteFlags = ELF::SHF_ALLOC; - S.PushSection(); - S.SwitchSection( + S.pushSection(); + S.switchSection( Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags)); S.emitInt32(NameSZ); // namesz S.emitValue(DescSZ, 4); // descz @@ -525,7 +543,7 @@ void AMDGPUTargetELFStreamer::EmitNote( S.emitValueToAlignment(4, 0, 1, 0); // padding 0 EmitDesc(S); // desc S.emitValueToAlignment(4, 0, 1, 0); // padding 0 - S.PopSection(); + S.popSection(); } unsigned AMDGPUTargetELFStreamer::getEFlags() { @@ -691,7 +709,7 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major, OS.emitBytes(VendorName); OS.emitInt8(0); // NULL terminate VendorName OS.emitBytes(ArchName); - OS.emitInt8(0); // NULL terminte ArchName + OS.emitInt8(0); // NULL terminate ArchName }); } @@ -699,9 +717,9 @@ void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { MCStreamer &OS = getStreamer(); - OS.PushSection(); + OS.pushSection(); OS.emitBytes(StringRef((const char*)&Header, sizeof(Header))); - OS.PopSection(); + OS.popSection(); } void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, @@ -806,7 +824,7 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { uint32_t Encoded_pad = Encoded_s_code_end; // Instruction cache line size in bytes. - const unsigned Log2CacheLineSize = 6; + const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6; const unsigned CacheLineSize = 1u << Log2CacheLineSize; // Extra padding amount in bytes to support prefetch mode 3. @@ -818,11 +836,11 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { } MCStreamer &OS = getStreamer(); - OS.PushSection(); + OS.pushSection(); OS.emitValueToAlignment(CacheLineSize, Encoded_pad, 4); for (unsigned I = 0; I < FillSize; I += 4) OS.emitInt32(Encoded_pad); - OS.PopSection(); + OS.popSection(); return true; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 6fe192e95e72..78eb304fe84f 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -20,6 +20,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/EndianStream.h" @@ -84,9 +85,8 @@ enum FCInstr { }; MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { - return new R600MCCodeEmitter(MCII, MRI); + return new R600MCCodeEmitter(MCII, *Ctx.getRegisterInfo()); } void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h index fc52cb33824f..605ae851378d 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h @@ -24,7 +24,6 @@ class MCInstrInfo; class MCRegisterInfo; MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCInstrInfo *createR600MCInstrInfo(); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index 77f219aaa3ab..5e67fb5ec876 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -17,10 +17,15 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/APInt.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/Casting.h" using namespace llvm; @@ -34,9 +39,8 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { const MCSubtargetInfo &STI) const; public: - SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, - MCContext &ctx) - : AMDGPUMCCodeEmitter(mcii), MRI(mri) {} + SIMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) + : AMDGPUMCCodeEmitter(mcii), MRI(*ctx.getRegisterInfo()) {} SIMCCodeEmitter(const SIMCCodeEmitter &) = delete; SIMCCodeEmitter &operator=(const SIMCCodeEmitter &) = delete; @@ -46,42 +50,45 @@ public: const MCSubtargetInfo &STI) const override; /// \returns the encoding for an MCOperand. - uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const override; + void getMachineOpValue(const MCInst &MI, const MCOperand &MO, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; /// Use a fixup to encode the simm16 field for SOPP branch /// instructions. - unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, + void getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + + void getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; - unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const override; + void getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; - unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + void getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; - unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const override; - - unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const override; + void getAVOperandEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; private: uint64_t getImplicitOpSelHiEncoding(int Opcode) const; + void getMachineOpValueCommon(const MCInst &MI, const MCOperand &MO, + unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; }; } // end anonymous namespace MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { - return new SIMCCodeEmitter(MCII, MRI, Ctx); + return new SIMCCodeEmitter(MCII, Ctx); } // Returns the encoding value to use if the given integer is an integer inline @@ -309,8 +316,9 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, computeAvailableFeatures(STI.getFeatureBits())); int Opcode = MI.getOpcode(); - uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups, STI); - const MCInstrDesc &Desc = MCII.get(Opcode); + APInt Encoding, Scratch; + getBinaryCodeForInstr(MI, Fixups, Encoding, Scratch, STI); + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); unsigned bytes = Desc.getSize(); // Set unused op_sel_hi bits to 1 for VOP3P and MAI instructions. @@ -322,7 +330,7 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, } for (unsigned i = 0; i < bytes; i++) { - OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff)); + OS.write((uint8_t)Encoding.extractBitsAsZExtValue(8, 8 * i)); } // NSA encoding. @@ -335,9 +343,11 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, unsigned NumExtraAddrs = srsrc - vaddr0 - 1; unsigned NumPadding = (-NumExtraAddrs) & 3; - for (unsigned i = 0; i < NumExtraAddrs; ++i) - OS.write((uint8_t)getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i), - Fixups, STI)); + for (unsigned i = 0; i < NumExtraAddrs; ++i) { + getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i), Encoding, Fixups, + STI); + OS.write((uint8_t)Encoding.getLimitedValue()); + } for (unsigned i = 0; i < NumPadding; ++i) OS.write(0); } @@ -385,34 +395,36 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, } } -unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +void SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, + APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); if (MO.isExpr()) { const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_sopp_br; Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc())); - return 0; + Op = APInt::getNullValue(96); + } else { + getMachineOpValue(MI, MO, Op, Fixups, STI); } - - return getMachineOpValue(MI, MO, Fixups, STI); } -unsigned SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +void SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, + APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { auto Offset = MI.getOperand(OpNo).getImm(); // VI only supports 20-bit unsigned offsets. assert(!AMDGPU::isVI(STI) || isUInt<20>(Offset)); - return Offset; + Op = Offset; } -unsigned -SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +void SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { using namespace AMDGPU::SDWA; uint64_t RegEnc = 0; @@ -426,23 +438,24 @@ SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) { RegEnc |= SDWA9EncValues::SRC_SGPR_MASK; } - return RegEnc; + Op = RegEnc; + return; } else { const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI); if (Enc != ~0U && Enc != 255) { - return Enc | SDWA9EncValues::SRC_SGPR_MASK; + Op = Enc | SDWA9EncValues::SRC_SGPR_MASK; + return; } } llvm_unreachable("Unsupported operand kind"); - return 0; } -unsigned -SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +void SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, + APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { using namespace AMDGPU::SDWA; uint64_t RegEnc = 0; @@ -455,13 +468,13 @@ SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK; RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK; } - return RegEnc; + Op = RegEnc; } -unsigned -SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +void SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo, + APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { unsigned Reg = MI.getOperand(OpNo).getReg(); uint64_t Enc = MRI.getEncodingValue(Reg); @@ -476,10 +489,11 @@ SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo, MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AReg_224RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) || + MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg)) Enc |= 512; - return Enc; + Op = Enc; } static bool needsPCRel(const MCExpr *Expr) { @@ -505,12 +519,21 @@ static bool needsPCRel(const MCExpr *Expr) { llvm_unreachable("invalid kind"); } -uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, - const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - if (MO.isReg()) - return MRI.getEncodingValue(MO.getReg()); +void SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, + const MCOperand &MO, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + if (MO.isReg()){ + Op = MRI.getEncodingValue(MO.getReg()); + return; + } + unsigned OpNo = &MO - MI.begin(); + getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI); +} + +void SIMCCodeEmitter::getMachineOpValueCommon( + const MCInst &MI, const MCOperand &MO, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { if (MO.isExpr() && MO.getExpr()->getKind() != MCExpr::Constant) { // FIXME: If this is expression is PCRel or not should not depend on what @@ -533,28 +556,22 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, uint32_t Offset = Desc.getSize(); assert(Offset == 4 || Offset == 8); - Fixups.push_back( - MCFixup::create(Offset, MO.getExpr(), Kind, MI.getLoc())); - } - - // Figure out the operand number, needed for isSrcOperand check - unsigned OpNo = 0; - for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) { - if (&MO == &MI.getOperand(OpNo)) - break; + Fixups.push_back(MCFixup::create(Offset, MO.getExpr(), Kind, MI.getLoc())); } const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); if (AMDGPU::isSISrcOperand(Desc, OpNo)) { uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI); - if (Enc != ~0U) - return Enc; - - } else if (MO.isImm()) - return MO.getImm(); + if (Enc != ~0U) { + Op = Enc; + return; + } + } else if (MO.isImm()) { + Op = MO.getImm(); + return; + } llvm_unreachable("Encoding of this operand type is not supported yet."); - return 0; } #define ENABLE_INSTR_PREDICATE_VERIFIER diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index cf03fd682143..be1addf35012 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -14,6 +14,8 @@ // - MIMGEncGfx90a: encoding for gfx90a for atomics // - MIMGEncGfx10Default: gfx10 default (non-NSA) encoding // - MIMGEncGfx10NSA: gfx10 NSA encoding +// - MIMGEncGfx11Default: gfx11 default (non-NSA) encoding +// - MIMGEncGfx11NSA: gfx11 NSA encoding class MIMGEncoding; def MIMGEncGfx6 : MIMGEncoding; @@ -21,6 +23,8 @@ def MIMGEncGfx8 : MIMGEncoding; def MIMGEncGfx90a : MIMGEncoding; def MIMGEncGfx10Default : MIMGEncoding; def MIMGEncGfx10NSA : MIMGEncoding; +def MIMGEncGfx11Default : MIMGEncoding; +def MIMGEncGfx11NSA : MIMGEncoding; def MIMGEncoding : GenericEnum { let FilterClass = "MIMGEncoding"; @@ -90,11 +94,13 @@ def MIMG { int NOP = -1; } -class mimgopc { - field bits<8> BASE = base; // Opcode for all but atomics +class mimgopc { + field bits<8> GFX11 = gfx11; + field bits<8> GFX10M = gfx10m; // GFX10minus for all but atomics field bits<8> VI = vi; // VI is only used for atomic instructions field bits<8> SI = si; // SI is only used for atomic instructions - bit HAS_BASE = !ne(base, MIMG.NOP); + bit HAS_GFX11 = !ne(gfx11, MIMG.NOP); + bit HAS_GFX10M = !ne(gfx10m, MIMG.NOP); bit HAS_VI = !ne(vi, MIMG.NOP); bit HAS_SI = !ne(si, MIMG.NOP); } @@ -207,12 +213,16 @@ class MIMG MIMGEncoding MIMGEncoding; bits<8> VDataDwords; bits<8> VAddrDwords; + + // If NSA is used this counts number of operands VAddrDwords is split into. + bits<8> VAddrOperands; } def MIMGInfoTable : GenericTable { let FilterClass = "MIMG"; let CppTypeName = "MIMGInfo"; - let Fields = ["Opcode", "BaseOpcode", "MIMGEncoding", "VDataDwords", "VAddrDwords"]; + let Fields = ["Opcode", "BaseOpcode", "MIMGEncoding", "VDataDwords", + "VAddrDwords", "VAddrOperands"]; string TypeOf_BaseOpcode = "MIMGBaseOpcode"; string TypeOf_MIMGEncoding = "MIMGEncoding"; @@ -227,11 +237,12 @@ def getMIMGInfo : SearchIndex { // This class used to use !foldl to memoize the AddrAsmNames list. // It turned out that that was much slower than using !filter. -class MIMGNSAHelper { +class MIMGNSAHelper addr_types=!listsplat(VGPR_32, num_addrs)> { list AddrAsmNames = !foreach(i, !filter(i, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], !lt(i, num_addrs)), "vaddr" # i); - dag AddrIns = !dag(ins, !foreach(arg, AddrAsmNames, VGPR_32), AddrAsmNames); + dag AddrIns = !dag(ins, addr_types, AddrAsmNames); string AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]"; int NSA = !if(!le(num_addrs, 1), ?, @@ -247,6 +258,7 @@ class MIMG_gfx6789 op, dag outs, string dns = ""> let AssemblerPredicate = isGFX6GFX7GFX8GFX9NotGFX90A; let MIMGEncoding = MIMGEncGfx6; + let VAddrOperands = 1; let d16 = !if(BaseOpcode.HasD16, ?, 0); } @@ -257,6 +269,7 @@ class MIMG_gfx90a op, dag outs, string dns = ""> let AssemblerPredicate = isGFX90APlus; let MIMGEncoding = MIMGEncGfx90a; + let VAddrOperands = 1; let d16 = !if(BaseOpcode.HasD16, ?, 0); } @@ -264,10 +277,11 @@ class MIMG_gfx90a op, dag outs, string dns = ""> // Base class of all non-NSA gfx10 MIMG instructions. class MIMG_gfx10 : MIMG, MIMGe_gfx10 { - let SubtargetPredicate = isGFX10Plus; - let AssemblerPredicate = isGFX10Plus; + let SubtargetPredicate = isGFX10Only; + let AssemblerPredicate = isGFX10Only; let MIMGEncoding = MIMGEncGfx10Default; + let VAddrOperands = 1; let d16 = !if(BaseOpcode.HasD16, ?, 0); let nsa = 0; @@ -277,10 +291,11 @@ class MIMG_gfx10 // Note that 1-dword addresses always use non-NSA variants. class MIMG_nsa_gfx10 : MIMG, MIMGe_gfx10 { - let SubtargetPredicate = isGFX10Plus; - let AssemblerPredicate = isGFX10Plus; + let SubtargetPredicate = isGFX10Only; + let AssemblerPredicate = isGFX10Only; let MIMGEncoding = MIMGEncGfx10NSA; + let VAddrOperands = num_addrs; MIMGNSAHelper nsah = MIMGNSAHelper; dag AddrIns = nsah.AddrIns; @@ -290,11 +305,45 @@ class MIMG_nsa_gfx10 let nsa = nsah.NSA; } +// Base class of all non-NSA gfx11 MIMG instructions. +class MIMG_gfx11 + : MIMG, MIMGe_gfx11 { + let SubtargetPredicate = isGFX11Plus; + let AssemblerPredicate = isGFX11Plus; + + let MIMGEncoding = MIMGEncGfx11Default; + let VAddrOperands = 1; + + let d16 = !if(BaseOpcode.HasD16, ?, 0); + let nsa = 0; +} + +// Base class for all NSA MIMG instructions. +// Note that 1-dword addresses always use non-NSA variants. +class MIMG_nsa_gfx11 addr_types=[]> + : MIMG, MIMGe_gfx11 { + let SubtargetPredicate = isGFX11Plus; + let AssemblerPredicate = isGFX11Plus; + + let MIMGEncoding = MIMGEncGfx11NSA; + let VAddrOperands = num_addrs; + + MIMGNSAHelper nsah = !if(!empty(addr_types), + MIMGNSAHelper, + MIMGNSAHelper); + dag AddrIns = nsah.AddrIns; + string AddrAsm = nsah.AddrAsm; + + let d16 = !if(BaseOpcode.HasD16, ?, 0); + let nsa = nsah.NSA; +} + class MIMG_NoSampler_Helper - : MIMG_gfx6789 { + : MIMG_gfx6789 { let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc, DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), @@ -307,7 +356,7 @@ class MIMG_NoSampler_Helper_gfx90a - : MIMG_gfx90a .ret:$vdata), dns> { + : MIMG_gfx90a .ret:$vdata), dns> { let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc, DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, LWE:$lwe, DA:$da), @@ -319,7 +368,7 @@ class MIMG_NoSampler_Helper_gfx90a - : MIMG_gfx10 { + : MIMG_gfx10 { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), @@ -331,7 +380,32 @@ class MIMG_NoSampler_gfx10 - : MIMG_nsa_gfx10 { + : MIMG_nsa_gfx10 { + let InOperandList = !con(AddrIns, + (ins SReg_256:$srsrc, DMask:$dmask, + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_NoSampler_gfx11 + : MIMG_gfx11 { + let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask, + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_NoSampler_nsa_gfx11 + : MIMG_nsa_gfx11 { let InOperandList = !con(AddrIns, (ins SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, @@ -347,7 +421,7 @@ multiclass MIMG_NoSampler_Src_Helper { let ssamp = 0 in { let VAddrDwords = 1 in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V1 : MIMG_NoSampler_Helper ; if !not(ExtendedImageInst) then @@ -356,30 +430,42 @@ multiclass MIMG_NoSampler_Src_Helper ; } + if op.HAS_GFX11 then { + def _V1_gfx11 : MIMG_NoSampler_gfx11; + } } let VAddrDwords = 2 in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V2 : MIMG_NoSampler_Helper ; if !not(ExtendedImageInst) then def _V2_gfx90a : MIMG_NoSampler_Helper_gfx90a ; def _V2_gfx10 : MIMG_NoSampler_gfx10; def _V2_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10; } + if op.HAS_GFX11 then { + def _V2_gfx11 : MIMG_NoSampler_gfx11; + def _V2_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11; + } } let VAddrDwords = 3 in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V3 : MIMG_NoSampler_Helper ; if !not(ExtendedImageInst) then def _V3_gfx90a : MIMG_NoSampler_Helper_gfx90a ; def _V3_gfx10 : MIMG_NoSampler_gfx10; def _V3_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10; } + if op.HAS_GFX11 then { + def _V3_gfx11 : MIMG_NoSampler_gfx11; + def _V3_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11; + } } let VAddrDwords = 4 in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V4 : MIMG_NoSampler_Helper ; if !not(ExtendedImageInst) then def _V4_gfx90a : MIMG_NoSampler_Helper_gfx90a ; @@ -387,6 +473,11 @@ multiclass MIMG_NoSampler_Src_Helper ; } + if op.HAS_GFX11 then { + def _V4_gfx11 : MIMG_NoSampler_gfx11; + def _V4_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11; + } } } } @@ -420,7 +511,7 @@ class MIMG_Store_Helper - : MIMG_gfx6789 { + : MIMG_gfx6789 { let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), @@ -433,7 +524,7 @@ class MIMG_Store_Helper_gfx90a - : MIMG_gfx90a { + : MIMG_gfx90a { let InOperandList = !con((ins getLdStRegisterOperand.ret:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, DMask:$dmask, UNorm:$unorm, CPol:$cpol, @@ -446,7 +537,7 @@ class MIMG_Store_Helper_gfx90a - : MIMG_gfx10 { + : MIMG_gfx10 { let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), @@ -458,7 +549,33 @@ class MIMG_Store_gfx10 - : MIMG_nsa_gfx10 { + : MIMG_nsa_gfx10 { + let InOperandList = !con((ins DataRC:$vdata), + AddrIns, + (ins SReg_256:$srsrc, DMask:$dmask, + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_Store_gfx11 + : MIMG_gfx11 { + let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc, + DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_Store_nsa_gfx11 + : MIMG_nsa_gfx11 { let InOperandList = !con((ins DataRC:$vdata), AddrIns, (ins SReg_256:$srsrc, DMask:$dmask, @@ -475,39 +592,57 @@ multiclass MIMG_Store_Addr_Helper ; + let hasPostISelHook = 1 in def _V1_gfx90a : MIMG_Store_Helper_gfx90a ; def _V1_gfx10 : MIMG_Store_gfx10 ; } + if op.HAS_GFX11 then { + def _V1_gfx11 : MIMG_Store_gfx11 ; + } } let VAddrDwords = 2 in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V2 : MIMG_Store_Helper ; def _V2_gfx90a : MIMG_Store_Helper_gfx90a ; def _V2_gfx10 : MIMG_Store_gfx10 ; def _V2_nsa_gfx10 : MIMG_Store_nsa_gfx10 ; } + if op.HAS_GFX11 then { + def _V2_gfx11 : MIMG_Store_gfx11 ; + def _V2_nsa_gfx11 : MIMG_Store_nsa_gfx11 ; + } } let VAddrDwords = 3 in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V3 : MIMG_Store_Helper ; def _V3_gfx90a : MIMG_Store_Helper_gfx90a ; def _V3_gfx10 : MIMG_Store_gfx10 ; def _V3_nsa_gfx10 : MIMG_Store_nsa_gfx10 ; } + if op.HAS_GFX11 then { + def _V3_gfx11 : MIMG_Store_gfx11 ; + def _V3_nsa_gfx11 : MIMG_Store_nsa_gfx11 ; + } } let VAddrDwords = 4 in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V4 : MIMG_Store_Helper ; def _V4_gfx90a : MIMG_Store_Helper_gfx90a ; def _V4_gfx10 : MIMG_Store_gfx10 ; def _V4_nsa_gfx10 : MIMG_Store_nsa_gfx10 ; } + if op.HAS_GFX11 then { + def _V4_gfx11 : MIMG_Store_gfx11 ; + def _V4_nsa_gfx11 : MIMG_Store_nsa_gfx11 ; + } } } } @@ -582,7 +717,7 @@ class MIMG_Atomic_gfx90a - : MIMG_gfx10(op.BASE), (outs DataRC:$vdst), + : MIMG_gfx10(op.GFX10M), (outs DataRC:$vdst), !if(enableDisasm, "AMDGPU", "")> { let Constraints = "$vdst = $vdata"; let AsmMatchConverter = "cvtMIMGAtomic"; @@ -596,7 +731,37 @@ class MIMG_Atomic_gfx10 - : MIMG_nsa_gfx10(op.BASE), (outs DataRC:$vdst), num_addrs, + : MIMG_nsa_gfx10(op.GFX10M), (outs DataRC:$vdst), num_addrs, + !if(enableDisasm, "AMDGPU", "")> { + let Constraints = "$vdst = $vdata"; + let AsmMatchConverter = "cvtMIMGAtomic"; + + let InOperandList = !con((ins DataRC:$vdata), + AddrIns, + (ins SReg_256:$srsrc, DMask:$dmask, + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe)); + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"; +} + +class MIMG_Atomic_gfx11 + : MIMG_gfx11(op.GFX11), (outs DataRC:$vdst), + !if(enableDisasm, "AMDGPU", "")> { + let Constraints = "$vdst = $vdata"; + let AsmMatchConverter = "cvtMIMGAtomic"; + + let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc, + DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe); + let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"; +} + +class MIMG_Atomic_nsa_gfx11 + : MIMG_nsa_gfx11(op.GFX11), (outs DataRC:$vdst), num_addrs, !if(enableDisasm, "AMDGPU", "")> { let Constraints = "$vdst = $vdata"; let AsmMatchConverter = "cvtMIMGAtomic"; @@ -622,11 +787,15 @@ multiclass MIMG_Atomic_Addr_Helper_m ; + let hasPostISelHook = 1 in def _V1_gfx90a : MIMG_Atomic_gfx90a ; } - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V1_gfx10 : MIMG_Atomic_gfx10 ; } + if op.HAS_GFX11 then { + def _V1_gfx11 : MIMG_Atomic_gfx11 ; + } } let VAddrDwords = 2 in { if op.HAS_SI then { @@ -636,10 +805,14 @@ multiclass MIMG_Atomic_Addr_Helper_m ; def _V2_gfx90a : MIMG_Atomic_gfx90a ; } - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V2_gfx10 : MIMG_Atomic_gfx10 ; def _V2_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; } + if op.HAS_GFX11 then { + def _V2_gfx11 : MIMG_Atomic_gfx11 ; + def _V2_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 ; + } } let VAddrDwords = 3 in { if op.HAS_SI then { @@ -649,10 +822,14 @@ multiclass MIMG_Atomic_Addr_Helper_m ; def _V3_gfx90a : MIMG_Atomic_gfx90a ; } - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V3_gfx10 : MIMG_Atomic_gfx10 ; def _V3_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; } + if op.HAS_GFX11 then { + def _V3_gfx11 : MIMG_Atomic_gfx11 ; + def _V3_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 ; + } } let VAddrDwords = 4 in { if op.HAS_SI then { @@ -662,10 +839,14 @@ multiclass MIMG_Atomic_Addr_Helper_m ; def _V4_gfx90a : MIMG_Atomic_gfx90a ; } - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V4_gfx10 : MIMG_Atomic_gfx10 ; def _V4_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; } + if op.HAS_GFX11 then { + def _V4_gfx11 : MIMG_Atomic_gfx11 ; + def _V4_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 ; + } } } } @@ -691,7 +872,7 @@ multiclass MIMG_Atomic class MIMG_Sampler_Helper - : MIMG_gfx6789 { + : MIMG_gfx6789 { let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), @@ -702,7 +883,7 @@ class MIMG_Sampler_Helper - : MIMG_gfx90a.ret:$vdata), dns> { + : MIMG_gfx90a.ret:$vdata), dns> { let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, LWE:$lwe, DA:$da), @@ -714,7 +895,7 @@ class MIMG_Sampler_gfx90a - : MIMG_gfx10 { + : MIMG_gfx10 { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), @@ -727,7 +908,34 @@ class MIMG_Sampler_gfx10 - : MIMG_nsa_gfx10 { + : MIMG_nsa_gfx10 { + let InOperandList = !con(AddrIns, + (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm" + #"$cpol$r128$a16$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_Sampler_gfx11 + : MIMG_gfx11 { + let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp, + DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm" + #"$cpol$r128$a16$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_Sampler_nsa_gfx11 + : MIMG_nsa_gfx11 { let InOperandList = !con(AddrIns, (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, @@ -823,7 +1031,7 @@ multiclass MIMG_Sampler_Src_Helper { foreach addr = MIMG_Sampler_AddrSizes.MachineInstrs in { let VAddrDwords = addr.NumWords in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V # addr.NumWords : MIMG_Sampler_Helper ; @@ -835,16 +1043,26 @@ multiclass MIMG_Sampler_Src_Helper ; } + if op.HAS_GFX11 then { + def _V # addr.NumWords # _gfx11 + : MIMG_Sampler_gfx11 ; + } } } foreach addr = MIMG_Sampler_AddrSizes.NSAInstrs in { let VAddrDwords = addr.NumWords in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V # addr.NumWords # _nsa_gfx10 : MIMG_Sampler_nsa_gfx10; } + if !and(op.HAS_GFX11, !le(addr.NumWords, 5)) then { + def _V # addr.NumWords # _nsa_gfx11 + : MIMG_Sampler_nsa_gfx11; + } } } } @@ -911,10 +1129,17 @@ class MIMG_IntersectRay_Helper { // when we only need 9, 11 or 12 depending on A16 field and ptr size. RegisterClass RegClass = MIMGAddrSize.RegClass; int VAddrDwords = !srl(RegClass.Size, 5); + + int gfx11_nsa_addrs = !if(A16, 4, 5); + RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32); + list gfx11_addr_types = + !if(A16, + [node_ptr_type, VGPR_32, VReg_96, VReg_96], + [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]); } class MIMG_IntersectRay_gfx10 - : MIMG_gfx10 { + : MIMG_gfx10 { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc), !if(A16, (ins GFX10A16:$a16), (ins))); @@ -924,7 +1149,27 @@ class MIMG_IntersectRay_gfx10 - : MIMG_nsa_gfx10 { + : MIMG_nsa_gfx10 { + let InOperandList = !con(nsah.AddrIns, + (ins SReg_128:$srsrc), + !if(A16, (ins GFX10A16:$a16), (ins))); + let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(A16, "$a16", ""); +} + +class MIMG_IntersectRay_gfx11 + : MIMG_gfx11 { + + let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc), + !if(A16, (ins GFX10A16:$a16), (ins))); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(A16, "$a16", ""); + + let nsa = 0; +} + +class MIMG_IntersectRay_nsa_gfx11 addr_types> + : MIMG_nsa_gfx11 { let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc), !if(A16, (ins GFX10A16:$a16), (ins))); @@ -936,9 +1181,7 @@ multiclass MIMG_IntersectRay { def "" : MIMGBaseOpcode { let BVH = 1; } - let SubtargetPredicate = HasGFX10_AEncoding, - AssemblerPredicate = HasGFX10_AEncoding, - AsmMatchConverter = !if(A16, "cvtIntersectRay", ""), + let AsmMatchConverter = !if(A16, "cvtIntersectRay", ""), dmask = 0xf, unorm = 1, d16 = 0, @@ -955,142 +1198,183 @@ multiclass MIMG_IntersectRay { def _sa_gfx10 : MIMG_IntersectRay_gfx10 { let VAddrDwords = info.VAddrDwords; } + def _sa_gfx11 : MIMG_IntersectRay_gfx11 { + let VAddrDwords = info.VAddrDwords; + } def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10 { let VAddrDwords = info.num_addrs; } + def _nsa_gfx11 : MIMG_IntersectRay_nsa_gfx11 { + let VAddrDwords = info.num_addrs; + } + } +} + +multiclass MIMG_MSAA_Load { + def "" : MIMGBaseOpcode { + let HasD16 = 1; + let Gather4 = 1; /* for appropriate dmask handling */ + let MSAA = 1; + } + + let BaseOpcode = !cast(NAME), + Gather4 = 1, hasPostISelHook = 0, mayLoad = 1 in { + let VDataDwords = 2 in + defm _V2 : MIMG_NoSampler_Src_Helper; /* packed D16 */ + let VDataDwords = 3 in + defm _V3 : MIMG_NoSampler_Src_Helper; /* packed D16 + tfe */ + let VDataDwords = 4 in + defm _V4 : MIMG_NoSampler_Src_Helper; + let VDataDwords = 5 in + defm _V5 : MIMG_NoSampler_Src_Helper; } } //===----------------------------------------------------------------------===// // MIMG Instructions //===----------------------------------------------------------------------===// -defm IMAGE_LOAD : MIMG_NoSampler , "image_load", 1>; -defm IMAGE_LOAD_MIP : MIMG_NoSampler , "image_load_mip", 1, 1>; -defm IMAGE_LOAD_PCK : MIMG_NoSampler , "image_load_pck", 0>; -defm IMAGE_LOAD_PCK_SGN : MIMG_NoSampler , "image_load_pck_sgn", 0>; -defm IMAGE_LOAD_MIP_PCK : MIMG_NoSampler , "image_load_mip_pck", 0, 1>; -defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoSampler , "image_load_mip_pck_sgn", 0, 1>; -defm IMAGE_STORE : MIMG_Store , "image_store", 1>; -defm IMAGE_STORE_MIP : MIMG_Store , "image_store_mip", 1, 1>; -defm IMAGE_STORE_PCK : MIMG_Store , "image_store_pck", 0>; -defm IMAGE_STORE_MIP_PCK : MIMG_Store , "image_store_mip_pck", 0, 1>; - -defm IMAGE_GET_RESINFO : MIMG_NoSampler , "image_get_resinfo", 0, 1, 1>; - -defm IMAGE_ATOMIC_SWAP : MIMG_Atomic , "image_atomic_swap">; -defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic , "image_atomic_cmpswap", 1>; -defm IMAGE_ATOMIC_ADD : MIMG_Atomic , "image_atomic_add">; -defm IMAGE_ATOMIC_SUB : MIMG_Atomic , "image_atomic_sub">; -defm IMAGE_ATOMIC_RSUB : MIMG_Atomic , "image_atomic_rsub">; -defm IMAGE_ATOMIC_SMIN : MIMG_Atomic , "image_atomic_smin">; -defm IMAGE_ATOMIC_UMIN : MIMG_Atomic , "image_atomic_umin">; -defm IMAGE_ATOMIC_SMAX : MIMG_Atomic , "image_atomic_smax">; -defm IMAGE_ATOMIC_UMAX : MIMG_Atomic , "image_atomic_umax">; -defm IMAGE_ATOMIC_AND : MIMG_Atomic , "image_atomic_and">; -defm IMAGE_ATOMIC_OR : MIMG_Atomic , "image_atomic_or">; -defm IMAGE_ATOMIC_XOR : MIMG_Atomic , "image_atomic_xor">; -defm IMAGE_ATOMIC_INC : MIMG_Atomic , "image_atomic_inc">; -defm IMAGE_ATOMIC_DEC : MIMG_Atomic , "image_atomic_dec">; -defm IMAGE_ATOMIC_FCMPSWAP : MIMG_Atomic , "image_atomic_fcmpswap", 1, 1>; -defm IMAGE_ATOMIC_FMIN : MIMG_Atomic , "image_atomic_fmin", 0, 1>; -defm IMAGE_ATOMIC_FMAX : MIMG_Atomic , "image_atomic_fmax", 0, 1>; - -defm IMAGE_SAMPLE : MIMG_Sampler_WQM , AMDGPUSample>; +let OtherPredicates = [HasImageInsts] in { + +defm IMAGE_LOAD : MIMG_NoSampler , "image_load", 1>; +defm IMAGE_LOAD_MIP : MIMG_NoSampler , "image_load_mip", 1, 1>; +defm IMAGE_LOAD_PCK : MIMG_NoSampler , "image_load_pck", 0>; +defm IMAGE_LOAD_PCK_SGN : MIMG_NoSampler , "image_load_pck_sgn", 0>; +defm IMAGE_LOAD_MIP_PCK : MIMG_NoSampler , "image_load_mip_pck", 0, 1>; +defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoSampler , "image_load_mip_pck_sgn", 0, 1>; +defm IMAGE_STORE : MIMG_Store , "image_store", 1>; +defm IMAGE_STORE_MIP : MIMG_Store , "image_store_mip", 1, 1>; +defm IMAGE_STORE_PCK : MIMG_Store , "image_store_pck", 0>; +defm IMAGE_STORE_MIP_PCK : MIMG_Store , "image_store_mip_pck", 0, 1>; + +defm IMAGE_GET_RESINFO : MIMG_NoSampler , "image_get_resinfo", 0, 1, 1>; + +defm IMAGE_ATOMIC_SWAP : MIMG_Atomic , "image_atomic_swap">; +defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic , "image_atomic_cmpswap", 1>; +defm IMAGE_ATOMIC_ADD : MIMG_Atomic , "image_atomic_add">; +defm IMAGE_ATOMIC_SUB : MIMG_Atomic , "image_atomic_sub">; +defm IMAGE_ATOMIC_RSUB : MIMG_Atomic , "image_atomic_rsub">; +defm IMAGE_ATOMIC_SMIN : MIMG_Atomic , "image_atomic_smin">; +defm IMAGE_ATOMIC_UMIN : MIMG_Atomic , "image_atomic_umin">; +defm IMAGE_ATOMIC_SMAX : MIMG_Atomic , "image_atomic_smax">; +defm IMAGE_ATOMIC_UMAX : MIMG_Atomic , "image_atomic_umax">; +defm IMAGE_ATOMIC_AND : MIMG_Atomic , "image_atomic_and">; +defm IMAGE_ATOMIC_OR : MIMG_Atomic , "image_atomic_or">; +defm IMAGE_ATOMIC_XOR : MIMG_Atomic , "image_atomic_xor">; +defm IMAGE_ATOMIC_INC : MIMG_Atomic , "image_atomic_inc">; +defm IMAGE_ATOMIC_DEC : MIMG_Atomic , "image_atomic_dec">; +defm IMAGE_ATOMIC_FCMPSWAP : MIMG_Atomic , "image_atomic_fcmpswap", 1, 1>; +defm IMAGE_ATOMIC_FMIN : MIMG_Atomic , "image_atomic_fmin", 0, 1>; +defm IMAGE_ATOMIC_FMAX : MIMG_Atomic , "image_atomic_fmax", 0, 1>; + +defm IMAGE_SAMPLE : MIMG_Sampler_WQM , AMDGPUSample>; let OtherPredicates = [HasExtendedImageInsts] in { -defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM , AMDGPUSample_cl>; -defm IMAGE_SAMPLE_D : MIMG_Sampler , AMDGPUSample_d>; -defm IMAGE_SAMPLE_D_CL : MIMG_Sampler , AMDGPUSample_d_cl>; -defm IMAGE_SAMPLE_D_G16 : MIMG_Sampler , AMDGPUSample_d, 0, 1>; -defm IMAGE_SAMPLE_D_CL_G16 : MIMG_Sampler , AMDGPUSample_d_cl, 0, 1>; -defm IMAGE_SAMPLE_L : MIMG_Sampler , AMDGPUSample_l>; -defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM , AMDGPUSample_b>; -defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM , AMDGPUSample_b_cl>; -defm IMAGE_SAMPLE_LZ : MIMG_Sampler , AMDGPUSample_lz>; -defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM , AMDGPUSample_c>; -defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM , AMDGPUSample_c_cl>; -defm IMAGE_SAMPLE_C_D : MIMG_Sampler , AMDGPUSample_c_d>; -defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler , AMDGPUSample_c_d_cl>; -defm IMAGE_SAMPLE_C_D_G16 : MIMG_Sampler , AMDGPUSample_c_d, 0, 1>; -defm IMAGE_SAMPLE_C_D_CL_G16 : MIMG_Sampler , AMDGPUSample_c_d_cl, 0, 1>; -defm IMAGE_SAMPLE_C_L : MIMG_Sampler , AMDGPUSample_c_l>; -defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM , AMDGPUSample_c_b>; -defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM , AMDGPUSample_c_b_cl>; -defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler , AMDGPUSample_c_lz>; -defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM , AMDGPUSample_o>; -defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM , AMDGPUSample_cl_o>; -defm IMAGE_SAMPLE_D_O : MIMG_Sampler , AMDGPUSample_d_o>; -defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler , AMDGPUSample_d_cl_o>; -defm IMAGE_SAMPLE_D_O_G16 : MIMG_Sampler , AMDGPUSample_d_o, 0, 1>; -defm IMAGE_SAMPLE_D_CL_O_G16 : MIMG_Sampler , AMDGPUSample_d_cl_o, 0, 1>; -defm IMAGE_SAMPLE_L_O : MIMG_Sampler , AMDGPUSample_l_o>; -defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM , AMDGPUSample_b_o>; -defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM , AMDGPUSample_b_cl_o>; -defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler , AMDGPUSample_lz_o>; -defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM , AMDGPUSample_c_o>; -defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM , AMDGPUSample_c_cl_o>; -defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler , AMDGPUSample_c_d_o>; -defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler , AMDGPUSample_c_d_cl_o>; -defm IMAGE_SAMPLE_C_D_O_G16 : MIMG_Sampler , AMDGPUSample_c_d_o, 0, 1>; -defm IMAGE_SAMPLE_C_D_CL_O_G16 : MIMG_Sampler , AMDGPUSample_c_d_cl_o, 0, 1>; -defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler , AMDGPUSample_c_l_o>; -defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM , AMDGPUSample_c_b_cl_o>; -defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM , AMDGPUSample_c_b_o>; -defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler , AMDGPUSample_c_lz_o>; -defm IMAGE_GATHER4 : MIMG_Gather_WQM , AMDGPUSample>; -defm IMAGE_GATHER4_CL : MIMG_Gather_WQM , AMDGPUSample_cl>; -defm IMAGE_GATHER4_L : MIMG_Gather , AMDGPUSample_l>; -defm IMAGE_GATHER4_B : MIMG_Gather_WQM , AMDGPUSample_b>; -defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM , AMDGPUSample_b_cl>; -defm IMAGE_GATHER4_LZ : MIMG_Gather , AMDGPUSample_lz>; -defm IMAGE_GATHER4_C : MIMG_Gather_WQM , AMDGPUSample_c>; -defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM , AMDGPUSample_c_cl>; -defm IMAGE_GATHER4_C_L : MIMG_Gather , AMDGPUSample_c_l>; -defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM , AMDGPUSample_c_b>; -defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM , AMDGPUSample_c_b_cl>; -defm IMAGE_GATHER4_C_LZ : MIMG_Gather , AMDGPUSample_c_lz>; -defm IMAGE_GATHER4_O : MIMG_Gather_WQM , AMDGPUSample_o>; -defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM , AMDGPUSample_cl_o>; -defm IMAGE_GATHER4_L_O : MIMG_Gather , AMDGPUSample_l_o>; -defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM , AMDGPUSample_b_o>; -defm IMAGE_GATHER4_B_CL_O : MIMG_Gather , AMDGPUSample_b_cl_o>; -defm IMAGE_GATHER4_LZ_O : MIMG_Gather , AMDGPUSample_lz_o>; -defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM , AMDGPUSample_c_o>; -defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM , AMDGPUSample_c_cl_o>; -defm IMAGE_GATHER4_C_L_O : MIMG_Gather , AMDGPUSample_c_l_o>; -defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM , AMDGPUSample_c_b_o>; -defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM , AMDGPUSample_c_b_cl_o>; -defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather , AMDGPUSample_c_lz_o>; -//defm IMAGE_GATHER4H : MIMG_Gather_WQM , ?>; - -defm IMAGE_GET_LOD : MIMG_Sampler , AMDGPUSample, 1, 0, 1, "image_get_lod">; - -defm IMAGE_SAMPLE_CD : MIMG_Sampler , AMDGPUSample_cd>; -defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler , AMDGPUSample_cd_cl>; -defm IMAGE_SAMPLE_C_CD : MIMG_Sampler , AMDGPUSample_c_cd>; -defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler , AMDGPUSample_c_cd_cl>; -defm IMAGE_SAMPLE_CD_O : MIMG_Sampler , AMDGPUSample_cd_o>; -defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler , AMDGPUSample_cd_cl_o>; -defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler , AMDGPUSample_c_cd_o>; -defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler , AMDGPUSample_c_cd_cl_o>; -defm IMAGE_SAMPLE_CD_G16 : MIMG_Sampler , AMDGPUSample_cd, 0, 1>; -defm IMAGE_SAMPLE_CD_CL_G16 : MIMG_Sampler , AMDGPUSample_cd_cl, 0, 1>; -defm IMAGE_SAMPLE_C_CD_G16 : MIMG_Sampler , AMDGPUSample_c_cd, 0, 1>; -defm IMAGE_SAMPLE_C_CD_CL_G16 : MIMG_Sampler , AMDGPUSample_c_cd_cl, 0, 1>; -defm IMAGE_SAMPLE_CD_O_G16 : MIMG_Sampler , AMDGPUSample_cd_o, 0, 1>; -defm IMAGE_SAMPLE_CD_CL_O_G16 : MIMG_Sampler , AMDGPUSample_cd_cl_o, 0, 1>; -defm IMAGE_SAMPLE_C_CD_O_G16 : MIMG_Sampler , AMDGPUSample_c_cd_o, 0, 1>; -defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler , AMDGPUSample_c_cd_cl_o, 0, 1>; +defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM , AMDGPUSample_cl>; +defm IMAGE_SAMPLE_D : MIMG_Sampler , AMDGPUSample_d>; +defm IMAGE_SAMPLE_D_CL : MIMG_Sampler , AMDGPUSample_d_cl>; +defm IMAGE_SAMPLE_L : MIMG_Sampler , AMDGPUSample_l>; +defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM , AMDGPUSample_b>; +defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM , AMDGPUSample_b_cl>; +defm IMAGE_SAMPLE_LZ : MIMG_Sampler , AMDGPUSample_lz>; +defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM , AMDGPUSample_c>; +defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM , AMDGPUSample_c_cl>; +defm IMAGE_SAMPLE_C_D : MIMG_Sampler , AMDGPUSample_c_d>; +defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler , AMDGPUSample_c_d_cl>; +defm IMAGE_SAMPLE_C_L : MIMG_Sampler , AMDGPUSample_c_l>; +defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM , AMDGPUSample_c_b>; +defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM , AMDGPUSample_c_b_cl>; +defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler , AMDGPUSample_c_lz>; +defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM , AMDGPUSample_o>; +defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM , AMDGPUSample_cl_o>; +defm IMAGE_SAMPLE_D_O : MIMG_Sampler , AMDGPUSample_d_o>; +defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler , AMDGPUSample_d_cl_o>; +defm IMAGE_SAMPLE_L_O : MIMG_Sampler , AMDGPUSample_l_o>; +defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM , AMDGPUSample_b_o>; +defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM , AMDGPUSample_b_cl_o>; +defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler , AMDGPUSample_lz_o>; +defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM , AMDGPUSample_c_o>; +defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM , AMDGPUSample_c_cl_o>; +defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler , AMDGPUSample_c_d_o>; +defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler , AMDGPUSample_c_d_cl_o>; +defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler , AMDGPUSample_c_l_o>; +defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM , AMDGPUSample_c_b_cl_o>; +defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM , AMDGPUSample_c_b_o>; +defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler , AMDGPUSample_c_lz_o>; +defm IMAGE_GATHER4 : MIMG_Gather_WQM , AMDGPUSample>; +defm IMAGE_GATHER4_CL : MIMG_Gather_WQM , AMDGPUSample_cl>; +defm IMAGE_GATHER4_L : MIMG_Gather , AMDGPUSample_l>; +defm IMAGE_GATHER4_B : MIMG_Gather_WQM , AMDGPUSample_b>; +defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM , AMDGPUSample_b_cl>; +defm IMAGE_GATHER4_LZ : MIMG_Gather , AMDGPUSample_lz>; +defm IMAGE_GATHER4_C : MIMG_Gather_WQM , AMDGPUSample_c>; +defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM , AMDGPUSample_c_cl>; +defm IMAGE_GATHER4_C_L : MIMG_Gather , AMDGPUSample_c_l>; +defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM , AMDGPUSample_c_b>; +defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM , AMDGPUSample_c_b_cl>; +defm IMAGE_GATHER4_C_LZ : MIMG_Gather , AMDGPUSample_c_lz>; +defm IMAGE_GATHER4_O : MIMG_Gather_WQM , AMDGPUSample_o>; +defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM , AMDGPUSample_cl_o>; +defm IMAGE_GATHER4_L_O : MIMG_Gather , AMDGPUSample_l_o>; +defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM , AMDGPUSample_b_o>; +defm IMAGE_GATHER4_B_CL_O : MIMG_Gather , AMDGPUSample_b_cl_o>; +defm IMAGE_GATHER4_LZ_O : MIMG_Gather , AMDGPUSample_lz_o>; +defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM , AMDGPUSample_c_o>; +defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM , AMDGPUSample_c_cl_o>; +defm IMAGE_GATHER4_C_L_O : MIMG_Gather , AMDGPUSample_c_l_o>; +defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM , AMDGPUSample_c_b_o>; +defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM , AMDGPUSample_c_b_cl_o>; +defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather , AMDGPUSample_c_lz_o>; +//defm IMAGE_GATHER4H : MIMG_Gather_WQM , ?>; + +defm IMAGE_GET_LOD : MIMG_Sampler , AMDGPUSample, 1, 0, 1, "image_get_lod">; + +defm IMAGE_SAMPLE_CD : MIMG_Sampler , AMDGPUSample_cd>; +defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler , AMDGPUSample_cd_cl>; +defm IMAGE_SAMPLE_C_CD : MIMG_Sampler , AMDGPUSample_c_cd>; +defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler , AMDGPUSample_c_cd_cl>; +defm IMAGE_SAMPLE_CD_O : MIMG_Sampler , AMDGPUSample_cd_o>; +defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler , AMDGPUSample_cd_cl_o>; +defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler , AMDGPUSample_c_cd_o>; +defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler , AMDGPUSample_c_cd_cl_o>; } // End OtherPredicates = [HasExtendedImageInsts] -//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>; -//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>; - -let SubtargetPredicate = HasGFX10_AEncoding in -defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler , "image_msaa_load", 1, 0, 0, 1>; -defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay, "image_bvh_intersect_ray", 0, 0>; -defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay, "image_bvh_intersect_ray", 0, 1>; -defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay, "image_bvh64_intersect_ray", 1, 0>; -defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay, "image_bvh64_intersect_ray", 1, 1>; +let OtherPredicates = [HasExtendedImageInsts,HasG16] in { +defm IMAGE_SAMPLE_D_G16 : MIMG_Sampler , AMDGPUSample_d, 0, 1>; +defm IMAGE_SAMPLE_D_CL_G16 : MIMG_Sampler , AMDGPUSample_d_cl, 0, 1>; +defm IMAGE_SAMPLE_C_D_G16 : MIMG_Sampler , AMDGPUSample_c_d, 0, 1>; +defm IMAGE_SAMPLE_C_D_CL_G16 : MIMG_Sampler , AMDGPUSample_c_d_cl, 0, 1>; +defm IMAGE_SAMPLE_D_O_G16 : MIMG_Sampler , AMDGPUSample_d_o, 0, 1>; +defm IMAGE_SAMPLE_D_CL_O_G16 : MIMG_Sampler , AMDGPUSample_d_cl_o, 0, 1>; +defm IMAGE_SAMPLE_C_D_O_G16 : MIMG_Sampler , AMDGPUSample_c_d_o, 0, 1>; +defm IMAGE_SAMPLE_C_D_CL_O_G16 : MIMG_Sampler , AMDGPUSample_c_d_cl_o, 0, 1>; +defm IMAGE_SAMPLE_CD_G16 : MIMG_Sampler , AMDGPUSample_cd, 0, 1>; +defm IMAGE_SAMPLE_CD_CL_G16 : MIMG_Sampler , AMDGPUSample_cd_cl, 0, 1>; +defm IMAGE_SAMPLE_C_CD_G16 : MIMG_Sampler , AMDGPUSample_c_cd, 0, 1>; +defm IMAGE_SAMPLE_C_CD_CL_G16 : MIMG_Sampler , AMDGPUSample_c_cd_cl, 0, 1>; +defm IMAGE_SAMPLE_CD_O_G16 : MIMG_Sampler , AMDGPUSample_cd_o, 0, 1>; +defm IMAGE_SAMPLE_CD_CL_O_G16 : MIMG_Sampler , AMDGPUSample_cd_cl_o, 0, 1>; +defm IMAGE_SAMPLE_C_CD_O_G16 : MIMG_Sampler , AMDGPUSample_c_cd_o, 0, 1>; +defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler , AMDGPUSample_c_cd_cl_o, 0, 1>; +} // End OtherPredicates = [HasExtendedImageInsts,HasG16] + +//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", mimgopc<0x7e>>; +//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", mimgopc<0x7f>>; + +let SubtargetPredicate = isGFX10Only, OtherPredicates = [HasGFX10_AEncoding] in +defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler , "image_msaa_load", 1, 0, 0, 1>; + +let OtherPredicates = [HasGFX10_AEncoding] in +defm IMAGE_MSAA_LOAD : MIMG_MSAA_Load , "image_msaa_load">; + +let OtherPredicates = [HasGFX10_AEncoding] in { +defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay, "image_bvh_intersect_ray", 0, 0>; +defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay, "image_bvh_intersect_ray", 0, 1>; +defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay, "image_bvh64_intersect_ray", 1, 0>; +defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay, "image_bvh64_intersect_ray", 1, 1>; +} // End OtherPredicates = [HasGFX10_AEncoding] + +} // End let OtherPredicates = [HasImageInsts] /********** ========================================= **********/ /********** Table of dimension-aware image intrinsics **********/ diff --git a/llvm/lib/Target/AMDGPU/R600.h b/llvm/lib/Target/AMDGPU/R600.h index 2b483ae63da9..5dfbf8f1ef95 100644 --- a/llvm/lib/Target/AMDGPU/R600.h +++ b/llvm/lib/Target/AMDGPU/R600.h @@ -26,7 +26,7 @@ FunctionPass *createR600EmitClauseMarkers(); FunctionPass *createR600ClauseMergePass(); FunctionPass *createR600Packetizer(); FunctionPass *createR600ControlFlowFinalizer(); -FunctionPass *createAMDGPUCFGStructurizerPass(); +FunctionPass *createR600MachineCFGStructurizerPass(); FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel); ModulePass *createR600OpenCLImageTypeLoweringPass(); diff --git a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp index c19e3c41485e..afcb6b4d65f8 100644 --- a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp @@ -111,7 +111,7 @@ bool R600AsmPrinter::runOnMachineFunction(MachineFunction &MF) { MCContext &Context = getObjFileLowering().getContext(); MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0); - OutStreamer->SwitchSection(ConfigSection); + OutStreamer->switchSection(ConfigSection); EmitProgramInfoR600(MF); @@ -120,7 +120,7 @@ bool R600AsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (isVerbose()) { MCSectionELF *CommentSection = Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0); - OutStreamer->SwitchSection(CommentSection); + OutStreamer->switchSection(CommentSection); R600MachineFunctionInfo *MFI = MF.getInfo(); OutStreamer->emitRawComment( diff --git a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp index 715fd69fc7ae..2b85df8ac6cf 100644 --- a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp +++ b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // /// \file -/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer. +/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative manner. /// This pass is merging consecutive CFAlus where applicable. /// It needs to be called after IfCvt for best results. //===----------------------------------------------------------------------===// @@ -15,6 +15,7 @@ #include "MCTargetDesc/R600MCTargetDesc.h" #include "R600.h" #include "R600Subtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index 8a48a67b829c..4bf38a3c6ceb 100644 --- a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -16,6 +16,7 @@ #include "R600.h" #include "R600MachineFunctionInfo.h" #include "R600Subtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp index b9ca7f928d56..ef67e5c937dc 100644 --- a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -17,6 +17,7 @@ #include "R600.h" #include "R600Defines.h" #include "R600Subtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; @@ -327,9 +328,9 @@ char R600EmitClauseMarkers::ID = 0; } // end anonymous namespace INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers", - "R600 Emit Clause Markters", false, false) + "R600 Emit Clause Markers", false, false) INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers", - "R600 Emit Clause Markters", false, false) + "R600 Emit Clause Markers", false, false) FunctionPass *llvm::createR600EmitClauseMarkers() { return new R600EmitClauseMarkers(); diff --git a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index 194879fef53c..ef2d049f9175 100644 --- a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -17,6 +17,8 @@ #include "R600.h" #include "R600Defines.h" #include "R600Subtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp b/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp index abd4086db62c..fd8cecab90da 100644 --- a/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp @@ -8,6 +8,7 @@ #include "R600FrameLowering.h" #include "R600Subtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index bd757e9e3d70..bf52f7830ad7 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -42,39 +42,26 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, computeRegisterProperties(Subtarget->getRegisterInfo()); // Legalize loads and stores to the private address space. - setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::LOAD, MVT::v2i32, Custom); - setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom); // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address // spaces, so it is custom lowered to handle those where it isn't. - for (MVT VT : MVT::integer_valuetypes()) { - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom); - - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom); - - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom); - } + for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}) + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(Op, VT, MVT::i1, Promote); + setLoadExtAction(Op, VT, MVT::i8, Custom); + setLoadExtAction(Op, VT, MVT::i16, Custom); + } // Workaround for LegalizeDAG asserting on expansion of i1 vector loads. - setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand); - setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand); + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i32, + MVT::v2i1, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand); + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v4i32, + MVT::v4i1, Expand); - setOperationAction(ISD::STORE, MVT::i8, Custom); - setOperationAction(ISD::STORE, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::v2i32, Custom); - setOperationAction(ISD::STORE, MVT::v4i32, Custom); + setOperationAction(ISD::STORE, {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32}, + Custom); setTruncStoreAction(MVT::i32, MVT::i8, Custom); setTruncStoreAction(MVT::i32, MVT::i16, Custom); @@ -96,55 +83,34 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand); // Set condition code actions - setCondCodeAction(ISD::SETO, MVT::f32, Expand); - setCondCodeAction(ISD::SETUO, MVT::f32, Expand); - setCondCodeAction(ISD::SETLT, MVT::f32, Expand); - setCondCodeAction(ISD::SETLE, MVT::f32, Expand); - setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); - setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); - setCondCodeAction(ISD::SETONE, MVT::f32, Expand); - setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); - setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); - setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); - setCondCodeAction(ISD::SETULT, MVT::f32, Expand); - setCondCodeAction(ISD::SETULE, MVT::f32, Expand); - - setCondCodeAction(ISD::SETLE, MVT::i32, Expand); - setCondCodeAction(ISD::SETLT, MVT::i32, Expand); - setCondCodeAction(ISD::SETULE, MVT::i32, Expand); - setCondCodeAction(ISD::SETULT, MVT::i32, Expand); - - setOperationAction(ISD::FCOS, MVT::f32, Custom); - setOperationAction(ISD::FSIN, MVT::f32, Custom); - - setOperationAction(ISD::SETCC, MVT::v4i32, Expand); - setOperationAction(ISD::SETCC, MVT::v2i32, Expand); - - setOperationAction(ISD::BR_CC, MVT::i32, Expand); - setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setCondCodeAction({ISD::SETO, ISD::SETUO, ISD::SETLT, ISD::SETLE, ISD::SETOLT, + ISD::SETOLE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGE, + ISD::SETUGT, ISD::SETULT, ISD::SETULE}, + MVT::f32, Expand); + + setCondCodeAction({ISD::SETLE, ISD::SETLT, ISD::SETULE, ISD::SETULT}, + MVT::i32, Expand); + + setOperationAction({ISD::FCOS, ISD::FSIN}, MVT::f32, Custom); + + setOperationAction(ISD::SETCC, {MVT::v4i32, MVT::v2i32}, Expand); + + setOperationAction(ISD::BR_CC, {MVT::i32, MVT::f32}, Expand); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::FSUB, MVT::f32, Expand); - setOperationAction(ISD::FCEIL, MVT::f64, Custom); - setOperationAction(ISD::FTRUNC, MVT::f64, Custom); - setOperationAction(ISD::FRINT, MVT::f64, Custom); - setOperationAction(ISD::FFLOOR, MVT::f64, Custom); + setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FFLOOR}, + MVT::f64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom); - setOperationAction(ISD::SETCC, MVT::i32, Expand); - setOperationAction(ISD::SETCC, MVT::f32, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + setOperationAction(ISD::SETCC, {MVT::i32, MVT::f32}, Expand); + setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT}, {MVT::i1, MVT::i64}, + Custom); - setOperationAction(ISD::SELECT, MVT::i32, Expand); - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::v2i32, Expand); - setOperationAction(ISD::SELECT, MVT::v4i32, Expand); + setOperationAction(ISD::SELECT, {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32}, + Expand); // ADD, SUB overflow. // TODO: turn these into Legal? @@ -158,56 +124,43 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, if (!Subtarget->hasBFE()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i1, MVT::v4i1}, Expand); if (!Subtarget->hasBFE()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i8, MVT::v4i8}, Expand); if (!Subtarget->hasBFE()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v4i16}, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i32, MVT::v4i32}, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, + {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, + {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom); // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32 // to be Legal/Custom in order to avoid library calls. - setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); - setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); - setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); + setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, MVT::i32, + Custom); - if (!Subtarget->hasFMA()) { - setOperationAction(ISD::FMA, MVT::f32, Expand); - setOperationAction(ISD::FMA, MVT::f64, Expand); - } + if (!Subtarget->hasFMA()) + setOperationAction(ISD::FMA, {MVT::f32, MVT::f64}, Expand); // FIXME: May need no denormals check setOperationAction(ISD::FMAD, MVT::f32, Legal); - if (!Subtarget->hasBFI()) { + if (!Subtarget->hasBFI()) // fcopysign can be done in a single instruction with BFI. - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - } + setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand); if (!Subtarget->hasBCNT(32)) setOperationAction(ISD::CTPOP, MVT::i32, Expand); @@ -229,30 +182,22 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; - for (MVT VT : ScalarIntVTs) { - setOperationAction(ISD::ADDC, VT, Expand); - setOperationAction(ISD::SUBC, VT, Expand); - setOperationAction(ISD::ADDE, VT, Expand); - setOperationAction(ISD::SUBE, VT, Expand); - } + for (MVT VT : ScalarIntVTs) + setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT, + Expand); // LLVM will expand these to atomic_cmp_swap(0) // and atomic_swap, respectively. - setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + setOperationAction({ISD::ATOMIC_LOAD, ISD::ATOMIC_STORE}, MVT::i32, Expand); // We need to custom lower some of the intrinsics - setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction({ISD::INTRINSIC_VOID, ISD::INTRINSIC_WO_CHAIN}, MVT::Other, + Custom); setSchedulingPreference(Sched::Source); - setTargetDAGCombine(ISD::FP_ROUND); - setTargetDAGCombine(ISD::FP_TO_SINT); - setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); - setTargetDAGCombine(ISD::SELECT_CC); - setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); - setTargetDAGCombine(ISD::LOAD); + setTargetDAGCombine({ISD::FP_ROUND, ISD::FP_TO_SINT, ISD::EXTRACT_VECTOR_ELT, + ISD::SELECT_CC, ISD::INSERT_VECTOR_ELT, ISD::LOAD}); } static inline bool isEOP(MachineBasicBlock::iterator I) { @@ -995,7 +940,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const /// LLVM generates byte-addressed pointers. For indirect addressing, we need to /// convert these pointers to a register index. Each register holds /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the -/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used +/// \p StackWidth, which tells us how many of the 4 sub-registers will be used /// for indirect addressing. SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth, @@ -1100,7 +1045,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, DAG.getConstant(3, DL, MVT::i32)); - // TODO: Contrary to the name of the functiom, + // TODO: Contrary to the name of the function, // it also handles sub i32 non-truncating stores (like i1) SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, Store->getValue()); @@ -1163,9 +1108,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); // TODO: can the chain be replaced without creating a new store? SDValue NewStore = DAG.getTruncStore( - NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), - MemVT, StoreNode->getAlignment(), - StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo()); + NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT, + StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(), + StoreNode->getAAInfo()); StoreNode = cast(NewStore); } @@ -1417,7 +1362,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8)); SDValue NewLoad = DAG.getExtLoad( ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT, - LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags()); + LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags()); SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad, DAG.getValueType(MemVT)); @@ -1610,7 +1555,7 @@ static SDValue CompactSwizzlableVector( if (NewBldVec[i].isUndef()) // We mask write here to teach later passes that the ith element of this // vector is undef. Thus we can use it to reduce 128 bits reg usage, - // break false dependencies and additionnaly make assembly easier to read. + // break false dependencies and additionally make assembly easier to read. RemapSwizzle[i] = 7; // SEL_MASK_WRITE if (ConstantFPSDNode *C = dyn_cast(NewBldVec[i])) { if (C->isZero()) { @@ -1714,7 +1659,7 @@ SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block, if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode)) return SDValue(); - if (LoadNode->getAlignment() < 4) + if (LoadNode->getAlign() < Align(4)) return SDValue(); int ConstantBlock = ConstantAddressBlock(Block); diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp index aec8b1ae4837..d04ec6490aae 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -18,6 +18,7 @@ #include "R600Defines.h" #include "R600Subtarget.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineFrameInfo.h" using namespace llvm; @@ -1469,21 +1470,3 @@ void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand, FlagOp.setImm(InstFlags); } } - -unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind( - unsigned Kind) const { - switch (Kind) { - case PseudoSourceValue::Stack: - case PseudoSourceValue::FixedStack: - return AMDGPUAS::PRIVATE_ADDRESS; - case PseudoSourceValue::ConstantPool: - case PseudoSourceValue::GOT: - case PseudoSourceValue::JumpTable: - case PseudoSourceValue::GlobalValueCallEntry: - case PseudoSourceValue::ExternalSymbolCallEntry: - case PseudoSourceValue::TargetCustom: - return AMDGPUAS::CONSTANT_ADDRESS; - } - - llvm_unreachable("Invalid pseudo source kind"); -} diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h index bc8a4786df77..f720e4656348 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h @@ -320,9 +320,6 @@ public: bool isRegisterLoad(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & R600InstrFlags::REGISTER_LOAD; } - - unsigned getAddressSpaceForPseudoSourceKind( - unsigned Kind) const override; }; namespace R600 { diff --git a/llvm/lib/Target/AMDGPU/R600MachineCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/R600MachineCFGStructurizer.cpp new file mode 100644 index 000000000000..0a96c643d9bd --- /dev/null +++ b/llvm/lib/Target/AMDGPU/R600MachineCFGStructurizer.cpp @@ -0,0 +1,1640 @@ +//===- R600MachineCFGStructurizer.cpp - CFG Structurizer ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//==-----------------------------------------------------------------------===// + +#include "MCTargetDesc/R600MCTargetDesc.h" +#include "R600.h" +#include "R600RegisterInfo.h" +#include "R600Subtarget.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +#define DEBUG_TYPE "structcfg" + +#define DEFAULT_VEC_SLOTS 8 + +// TODO: move-begin. + +//===----------------------------------------------------------------------===// +// +// Statistics for CFGStructurizer. +// +//===----------------------------------------------------------------------===// + +STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " + "matched"); +STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " + "matched"); +STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); +STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); + +namespace llvm { + +void initializeR600MachineCFGStructurizerPass(PassRegistry &); + +} // end namespace llvm + +namespace { + +//===----------------------------------------------------------------------===// +// +// Miscellaneous utility for CFGStructurizer. +// +//===----------------------------------------------------------------------===// + +#define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n"); + +#define SHOWNEWBLK(b, msg) \ + LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ + dbgs() << "\n";); + +#define SHOWBLK_DETAIL(b, msg) \ + LLVM_DEBUG(if (b) { \ + dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ + b->print(dbgs()); \ + dbgs() << "\n"; \ + }); + +#define INVALIDSCCNUM -1 + +//===----------------------------------------------------------------------===// +// +// supporting data structure for CFGStructurizer +// +//===----------------------------------------------------------------------===// + +class BlockInformation { +public: + bool IsRetired = false; + int SccNum = INVALIDSCCNUM; + + BlockInformation() = default; +}; + +//===----------------------------------------------------------------------===// +// +// CFGStructurizer +// +//===----------------------------------------------------------------------===// + +class R600MachineCFGStructurizer : public MachineFunctionPass { +public: + using MBBVector = SmallVector; + using MBBInfoMap = std::map; + using LoopLandInfoMap = std::map; + + enum PathToKind { + Not_SinglePath = 0, + SinglePath_InPath = 1, + SinglePath_NotInPath = 2 + }; + + static char ID; + + R600MachineCFGStructurizer() : MachineFunctionPass(ID) { + initializeR600MachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "AMDGPU Control Flow Graph structurizer Pass"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// Perform the CFG structurization + bool run(); + + /// Perform the CFG preparation + /// This step will remove every unconditionnal/dead jump instructions and make + /// sure all loops have an exit block + bool prepare(); + + bool runOnMachineFunction(MachineFunction &MF) override { + // FIXME: This pass causes verification failures. + MF.getProperties().set( + MachineFunctionProperties::Property::FailsVerification); + + TII = MF.getSubtarget().getInstrInfo(); + TRI = &TII->getRegisterInfo(); + LLVM_DEBUG(MF.dump();); + OrderedBlks.clear(); + Visited.clear(); + FuncRep = &MF; + MLI = &getAnalysis(); + LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); + MDT = &getAnalysis(); + LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr);); + PDT = &getAnalysis(); + LLVM_DEBUG(PDT->print(dbgs());); + prepare(); + run(); + LLVM_DEBUG(MF.dump();); + return true; + } + +protected: + MachineDominatorTree *MDT; + MachinePostDominatorTree *PDT; + MachineLoopInfo *MLI; + const R600InstrInfo *TII = nullptr; + const R600RegisterInfo *TRI = nullptr; + + // PRINT FUNCTIONS + /// Print the ordered Blocks. + void printOrderedBlocks() const { + size_t i = 0; + for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), + iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { + dbgs() << "BB" << (*iterBlk)->getNumber(); + dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; + if (i != 0 && i % 10 == 0) { + dbgs() << "\n"; + } else { + dbgs() << " "; + } + } + } + + static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { + for (const MachineLoop *L : LoopInfo) + L->print(dbgs()); + } + + // UTILITY FUNCTIONS + int getSCCNum(MachineBasicBlock *MBB) const; + MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; + bool hasBackEdge(MachineBasicBlock *MBB) const; + bool isRetiredBlock(MachineBasicBlock *MBB) const; + bool isActiveLoophead(MachineBasicBlock *MBB) const; + PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, + bool AllowSideEntry = true) const; + int countActiveBlock(MBBVector::const_iterator It, + MBBVector::const_iterator E) const; + bool needMigrateBlock(MachineBasicBlock *MBB) const; + + // Utility Functions + void reversePredicateSetter(MachineBasicBlock::iterator I, + MachineBasicBlock &MBB); + /// Compute the reversed DFS post order of Blocks + void orderBlocks(MachineFunction *MF); + + // Function originally from CFGStructTraits + void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, + const DebugLoc &DL = DebugLoc()); + MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, + const DebugLoc &DL = DebugLoc()); + MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); + void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, + const DebugLoc &DL); + void insertCondBranchBefore(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, int NewOpcode, + int RegNum, const DebugLoc &DL); + + static int getBranchNzeroOpcode(int OldOpcode); + static int getBranchZeroOpcode(int OldOpcode); + static int getContinueNzeroOpcode(int OldOpcode); + static int getContinueZeroOpcode(int OldOpcode); + static MachineBasicBlock *getTrueBranch(MachineInstr *MI); + static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); + static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, + MachineInstr *MI); + static bool isCondBranch(MachineInstr *MI); + static bool isUncondBranch(MachineInstr *MI); + static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); + static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); + + /// The correct naming for this is getPossibleLoopendBlockBranchInstr. + /// + /// BB with backward-edge could have move instructions after the branch + /// instruction. Such move instruction "belong to" the loop backward-edge. + MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); + + static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); + static bool isReturnBlock(MachineBasicBlock *MBB); + static void cloneSuccessorList(MachineBasicBlock *DstMBB, + MachineBasicBlock *SrcMBB); + static MachineBasicBlock *clone(MachineBasicBlock *MBB); + + /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose + /// because the AMDGPU instruction is not recognized as terminator fix this + /// and retire this routine + void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, + MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); + + static void wrapup(MachineBasicBlock *MBB); + + int patternMatch(MachineBasicBlock *MBB); + int patternMatchGroup(MachineBasicBlock *MBB); + int serialPatternMatch(MachineBasicBlock *MBB); + int ifPatternMatch(MachineBasicBlock *MBB); + int loopendPatternMatch(); + int mergeLoop(MachineLoop *LoopRep); + + /// return true iff src1Blk->succ_empty() && src1Blk and src2Blk are in + /// the same loop with LoopLandInfo without explicitly keeping track of + /// loopContBlks and loopBreakBlks, this is a method to get the information. + bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, + MachineBasicBlock *Src2MBB); + int handleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); + int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); + int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, + MachineBasicBlock **LandMBBPtr); + void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, + MachineBasicBlock *LandMBB, bool Detail = false); + int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, + MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); + void mergeSerialBlock(MachineBasicBlock *DstMBB, + MachineBasicBlock *SrcMBB); + + void mergeIfthenelseBlock(MachineInstr *BranchMI, + MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); + void mergeLooplandBlock(MachineBasicBlock *DstMBB, + MachineBasicBlock *LandMBB); + void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, + MachineBasicBlock *LandMBB); + void settleLoopcontBlock(MachineBasicBlock *ContingMBB, + MachineBasicBlock *ContMBB); + + /// normalizeInfiniteLoopExit change + /// B1: + /// uncond_br LoopHeader + /// + /// to + /// B1: + /// cond_br 1 LoopHeader dummyExit + /// and return the newly added dummy exit block + MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); + void removeUnconditionalBranch(MachineBasicBlock *MBB); + + /// Remove duplicate branches instructions in a block. + /// For instance + /// B0: + /// cond_br X B1 B2 + /// cond_br X B1 B2 + /// is transformed to + /// B0: + /// cond_br X B1 B2 + void removeRedundantConditionalBranch(MachineBasicBlock *MBB); + + void addDummyExitBlock(SmallVectorImpl &RetMBB); + void removeSuccessor(MachineBasicBlock *MBB); + MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, + MachineBasicBlock *PredMBB); + void migrateInstruction(MachineBasicBlock *SrcMBB, + MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); + void recordSccnum(MachineBasicBlock *MBB, int SCCNum); + void retireBlock(MachineBasicBlock *MBB); + +private: + MBBInfoMap BlockInfoMap; + LoopLandInfoMap LLInfoMap; + std::map Visited; + MachineFunction *FuncRep; + SmallVector OrderedBlks; +}; + +} // end anonymous namespace + +char R600MachineCFGStructurizer::ID = 0; + +int R600MachineCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { + MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); + if (It == BlockInfoMap.end()) + return INVALIDSCCNUM; + return (*It).second->SccNum; +} + +MachineBasicBlock *R600MachineCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) + const { + LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); + if (It == LLInfoMap.end()) + return nullptr; + return (*It).second; +} + +bool R600MachineCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { + MachineLoop *LoopRep = MLI->getLoopFor(MBB); + if (!LoopRep) + return false; + MachineBasicBlock *LoopHeader = LoopRep->getHeader(); + return MBB->isSuccessor(LoopHeader); +} + +bool R600MachineCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { + MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); + if (It == BlockInfoMap.end()) + return false; + return (*It).second->IsRetired; +} + +bool R600MachineCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { + MachineLoop *LoopRep = MLI->getLoopFor(MBB); + while (LoopRep && LoopRep->getHeader() == MBB) { + MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); + if(!LoopLand) + return true; + if (!isRetiredBlock(LoopLand)) + return true; + LoopRep = LoopRep->getParentLoop(); + } + return false; +} + +R600MachineCFGStructurizer::PathToKind R600MachineCFGStructurizer::singlePathTo( + MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, + bool AllowSideEntry) const { + assert(DstMBB); + if (SrcMBB == DstMBB) + return SinglePath_InPath; + while (SrcMBB && SrcMBB->succ_size() == 1) { + SrcMBB = *SrcMBB->succ_begin(); + if (SrcMBB == DstMBB) + return SinglePath_InPath; + if (!AllowSideEntry && SrcMBB->pred_size() > 1) + return Not_SinglePath; + } + if (SrcMBB && SrcMBB->succ_size()==0) + return SinglePath_NotInPath; + return Not_SinglePath; +} + +int R600MachineCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, + MBBVector::const_iterator E) const { + int Count = 0; + while (It != E) { + if (!isRetiredBlock(*It)) + ++Count; + ++It; + } + return Count; +} + +bool R600MachineCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { + unsigned BlockSizeThreshold = 30; + unsigned CloneInstrThreshold = 100; + bool MultiplePreds = MBB && (MBB->pred_size() > 1); + + if(!MultiplePreds) + return false; + unsigned BlkSize = MBB->size(); + return ((BlkSize > BlockSizeThreshold) && + (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); +} + +void R600MachineCFGStructurizer::reversePredicateSetter( + MachineBasicBlock::iterator I, MachineBasicBlock &MBB) { + assert(I.isValid() && "Expected valid iterator"); + for (;; --I) { + if (I == MBB.end()) + continue; + if (I->getOpcode() == R600::PRED_X) { + switch (I->getOperand(2).getImm()) { + case R600::PRED_SETE_INT: + I->getOperand(2).setImm(R600::PRED_SETNE_INT); + return; + case R600::PRED_SETNE_INT: + I->getOperand(2).setImm(R600::PRED_SETE_INT); + return; + case R600::PRED_SETE: + I->getOperand(2).setImm(R600::PRED_SETNE); + return; + case R600::PRED_SETNE: + I->getOperand(2).setImm(R600::PRED_SETE); + return; + default: + llvm_unreachable("PRED_X Opcode invalid!"); + } + } + } +} + +void R600MachineCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, + int NewOpcode, const DebugLoc &DL) { + MachineInstr *MI = + MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); + MBB->push_back(MI); + //assume the instruction doesn't take any reg operand ... + SHOWNEWINSTR(MI); +} + +MachineInstr *R600MachineCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, + int NewOpcode, + const DebugLoc &DL) { + MachineInstr *MI = + MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); + if (!MBB->empty()) + MBB->insert(MBB->begin(), MI); + else + MBB->push_back(MI); + SHOWNEWINSTR(MI); + return MI; +} + +MachineInstr *R600MachineCFGStructurizer::insertInstrBefore( + MachineBasicBlock::iterator I, int NewOpcode) { + MachineInstr *OldMI = &(*I); + MachineBasicBlock *MBB = OldMI->getParent(); + MachineInstr *NewMBB = + MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); + MBB->insert(I, NewMBB); + //assume the instruction doesn't take any reg operand ... + SHOWNEWINSTR(NewMBB); + return NewMBB; +} + +void R600MachineCFGStructurizer::insertCondBranchBefore( + MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) { + MachineInstr *OldMI = &(*I); + MachineBasicBlock *MBB = OldMI->getParent(); + MachineFunction *MF = MBB->getParent(); + MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL); + MBB->insert(I, NewMI); + MachineInstrBuilder MIB(*MF, NewMI); + MIB.addReg(OldMI->getOperand(1).getReg(), false); + SHOWNEWINSTR(NewMI); + //erase later oldInstr->eraseFromParent(); +} + +void R600MachineCFGStructurizer::insertCondBranchBefore( + MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode, + int RegNum, const DebugLoc &DL) { + MachineFunction *MF = blk->getParent(); + MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL); + //insert before + blk->insert(I, NewInstr); + MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); + SHOWNEWINSTR(NewInstr); +} + +int R600MachineCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { + switch(OldOpcode) { + case R600::JUMP_COND: + case R600::JUMP: return R600::IF_PREDICATE_SET; + case R600::BRANCH_COND_i32: + case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32; + default: llvm_unreachable("internal error"); + } + return -1; +} + +int R600MachineCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { + switch(OldOpcode) { + case R600::JUMP_COND: + case R600::JUMP: return R600::IF_PREDICATE_SET; + case R600::BRANCH_COND_i32: + case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32; + default: llvm_unreachable("internal error"); + } + return -1; +} + +int R600MachineCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { + switch(OldOpcode) { + case R600::JUMP_COND: + case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32; + default: llvm_unreachable("internal error"); + } + return -1; +} + +int R600MachineCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { + switch(OldOpcode) { + case R600::JUMP_COND: + case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32; + default: llvm_unreachable("internal error"); + } + return -1; +} + +MachineBasicBlock *R600MachineCFGStructurizer::getTrueBranch(MachineInstr *MI) { + return MI->getOperand(0).getMBB(); +} + +void R600MachineCFGStructurizer::setTrueBranch(MachineInstr *MI, + MachineBasicBlock *MBB) { + MI->getOperand(0).setMBB(MBB); +} + +MachineBasicBlock * +R600MachineCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, + MachineInstr *MI) { + assert(MBB->succ_size() == 2); + MachineBasicBlock *TrueBranch = getTrueBranch(MI); + MachineBasicBlock::succ_iterator It = MBB->succ_begin(); + MachineBasicBlock::succ_iterator Next = It; + ++Next; + return (*It == TrueBranch) ? *Next : *It; +} + +bool R600MachineCFGStructurizer::isCondBranch(MachineInstr *MI) { + switch (MI->getOpcode()) { + case R600::JUMP_COND: + case R600::BRANCH_COND_i32: + case R600::BRANCH_COND_f32: return true; + default: + return false; + } + return false; +} + +bool R600MachineCFGStructurizer::isUncondBranch(MachineInstr *MI) { + switch (MI->getOpcode()) { + case R600::JUMP: + case R600::BRANCH: + return true; + default: + return false; + } + return false; +} + +DebugLoc R600MachineCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { + //get DebugLoc from the first MachineBasicBlock instruction with debug info + DebugLoc DL; + for (MachineInstr &MI : *MBB) + if (MI.getDebugLoc()) + DL = MI.getDebugLoc(); + return DL; +} + +MachineInstr *R600MachineCFGStructurizer::getNormalBlockBranchInstr( + MachineBasicBlock *MBB) { + MachineBasicBlock::reverse_iterator It = MBB->rbegin(); + MachineInstr *MI = &*It; + if (MI && (isCondBranch(MI) || isUncondBranch(MI))) + return MI; + return nullptr; +} + +MachineInstr *R600MachineCFGStructurizer::getLoopendBlockBranchInstr( + MachineBasicBlock *MBB) { + for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); + It != E; ++It) { + // FIXME: Simplify + MachineInstr *MI = &*It; + if (MI) { + if (isCondBranch(MI) || isUncondBranch(MI)) + return MI; + else if (!TII->isMov(MI->getOpcode())) + break; + } + } + return nullptr; +} + +MachineInstr *R600MachineCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { + MachineBasicBlock::reverse_iterator It = MBB->rbegin(); + if (It != MBB->rend()) { + MachineInstr *instr = &(*It); + if (instr->getOpcode() == R600::RETURN) + return instr; + } + return nullptr; +} + +bool R600MachineCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { + MachineInstr *MI = getReturnInstr(MBB); + bool IsReturn = MBB->succ_empty(); + if (MI) + assert(IsReturn); + else if (IsReturn) + LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber() + << " is return block without RETURN instr\n";); + return IsReturn; +} + +void R600MachineCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, + MachineBasicBlock *SrcMBB) { + for (MachineBasicBlock *Succ : SrcMBB->successors()) + DstMBB->addSuccessor(Succ); // *iter's predecessor is also taken care of +} + +MachineBasicBlock *R600MachineCFGStructurizer::clone(MachineBasicBlock *MBB) { + MachineFunction *Func = MBB->getParent(); + MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); + Func->push_back(NewMBB); //insert to function + for (const MachineInstr &It : *MBB) + NewMBB->push_back(Func->CloneMachineInstr(&It)); + return NewMBB; +} + +void R600MachineCFGStructurizer::replaceInstrUseOfBlockWith( + MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, + MachineBasicBlock *NewBlk) { + MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB); + if (BranchMI && isCondBranch(BranchMI) && + getTrueBranch(BranchMI) == OldMBB) + setTrueBranch(BranchMI, NewBlk); +} + +void R600MachineCFGStructurizer::wrapup(MachineBasicBlock *MBB) { + assert((!MBB->getParent()->getJumpTableInfo() + || MBB->getParent()->getJumpTableInfo()->isEmpty()) + && "found a jump table"); + + //collect continue right before endloop + SmallVector ContInstr; + MachineBasicBlock::iterator Pre = MBB->begin(); + MachineBasicBlock::iterator E = MBB->end(); + MachineBasicBlock::iterator It = Pre; + while (It != E) { + if (Pre->getOpcode() == R600::CONTINUE + && It->getOpcode() == R600::ENDLOOP) + ContInstr.push_back(&*Pre); + Pre = It; + ++It; + } + + //delete continue right before endloop + for (unsigned i = 0; i < ContInstr.size(); ++i) + ContInstr[i]->eraseFromParent(); + + // TODO to fix up jump table so later phase won't be confused. if + // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but + // there isn't such an interface yet. alternatively, replace all the other + // blocks in the jump table with the entryBlk //} +} + +bool R600MachineCFGStructurizer::prepare() { + bool Changed = false; + + //FIXME: if not reducible flow graph, make it so ??? + + LLVM_DEBUG(dbgs() << "R600MachineCFGStructurizer::prepare\n";); + + orderBlocks(FuncRep); + + SmallVector RetBlks; + + // Add an ExitBlk to loop that don't have one + for (MachineLoop *LoopRep : *MLI) { + MBBVector ExitingMBBs; + LoopRep->getExitingBlocks(ExitingMBBs); + + if (ExitingMBBs.size() == 0) { + MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); + if (DummyExitBlk) + RetBlks.push_back(DummyExitBlk); + } + } + + // Remove unconditional branch instr. + // Add dummy exit block iff there are multiple returns. + for (MachineBasicBlock *MBB : OrderedBlks) { + removeUnconditionalBranch(MBB); + removeRedundantConditionalBranch(MBB); + if (isReturnBlock(MBB)) { + RetBlks.push_back(MBB); + } + assert(MBB->succ_size() <= 2); + } + + if (RetBlks.size() >= 2) { + addDummyExitBlock(RetBlks); + Changed = true; + } + + return Changed; +} + +bool R600MachineCFGStructurizer::run() { + //Assume reducible CFG... + LLVM_DEBUG(dbgs() << "R600MachineCFGStructurizer::run\n"); + +#ifdef STRESSTEST + //Use the worse block ordering to test the algorithm. + ReverseVector(orderedBlks); +#endif + + LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); + int NumIter = 0; + bool Finish = false; + MachineBasicBlock *MBB; + bool MakeProgress = false; + int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(), + OrderedBlks.end()); + + do { + ++NumIter; + LLVM_DEBUG(dbgs() << "numIter = " << NumIter + << ", numRemaintedBlk = " << NumRemainedBlk << "\n";); + (void)NumIter; + + SmallVectorImpl::const_iterator It = + OrderedBlks.begin(); + SmallVectorImpl::const_iterator E = + OrderedBlks.end(); + + SmallVectorImpl::const_iterator SccBeginIter = + It; + MachineBasicBlock *SccBeginMBB = nullptr; + int SccNumBlk = 0; // The number of active blocks, init to a + // maximum possible number. + int SccNumIter; // Number of iteration in this SCC. + + while (It != E) { + MBB = *It; + + if (!SccBeginMBB) { + SccBeginIter = It; + SccBeginMBB = MBB; + SccNumIter = 0; + SccNumBlk = NumRemainedBlk; // Init to maximum possible number. + LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); + dbgs() << "\n";); + } + + if (!isRetiredBlock(MBB)) + patternMatch(MBB); + + ++It; + + bool ContNextScc = true; + if (It == E + || getSCCNum(SccBeginMBB) != getSCCNum(*It)) { + // Just finish one scc. + ++SccNumIter; + int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); + if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { + LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB) + << ", sccNumIter = " << SccNumIter; + dbgs() << "doesn't make any progress\n";); + (void)SccNumIter; + ContNextScc = true; + } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { + SccNumBlk = sccRemainedNumBlk; + It = SccBeginIter; + ContNextScc = false; + LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB) + << "sccNumIter = " << SccNumIter << '\n';); + } else { + // Finish the current scc. + ContNextScc = true; + } + } else { + // Continue on next component in the current scc. + ContNextScc = false; + } + + if (ContNextScc) + SccBeginMBB = nullptr; + } //while, "one iteration" over the function. + + MachineBasicBlock *EntryMBB = + *GraphTraits::nodes_begin(FuncRep); + if (EntryMBB->succ_empty()) { + Finish = true; + LLVM_DEBUG(dbgs() << "Reduce to one block\n";); + } else { + int NewnumRemainedBlk + = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); + // consider cloned blocks ?? + if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { + MakeProgress = true; + NumRemainedBlk = NewnumRemainedBlk; + } else { + MakeProgress = false; + LLVM_DEBUG(dbgs() << "No progress\n";); + } + } + } while (!Finish && MakeProgress); + + // Misc wrap up to maintain the consistency of the Function representation. + wrapup(*GraphTraits::nodes_begin(FuncRep)); + + // Detach retired Block, release memory. + for (auto &It : BlockInfoMap) { + if (It.second && It.second->IsRetired) { + assert((It.first)->getNumber() != -1); + LLVM_DEBUG(dbgs() << "Erase BB" << (It.first)->getNumber() << "\n";); + It.first->eraseFromParent(); // Remove from the parent Function. + } + delete It.second; + } + BlockInfoMap.clear(); + LLInfoMap.clear(); + + if (!Finish) { + LLVM_DEBUG(FuncRep->viewCFG()); + report_fatal_error("IRREDUCIBLE_CFG"); + } + + return true; +} + +void R600MachineCFGStructurizer::orderBlocks(MachineFunction *MF) { + int SccNum = 0; + for (scc_iterator It = scc_begin(MF); !It.isAtEnd(); + ++It, ++SccNum) { + const std::vector &SccNext = *It; + for (MachineBasicBlock *MBB : SccNext) { + OrderedBlks.push_back(MBB); + recordSccnum(MBB, SccNum); + } + } + + // walk through all the block in func to check for unreachable + for (auto *MBB : nodes(MF)) { + SccNum = getSCCNum(MBB); + if (SccNum == INVALIDSCCNUM) + dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"; + } +} + +int R600MachineCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { + int NumMatch = 0; + int CurMatch; + + LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";); + + while ((CurMatch = patternMatchGroup(MBB)) > 0) + NumMatch += CurMatch; + + LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber() + << ", numMatch = " << NumMatch << "\n";); + + return NumMatch; +} + +int R600MachineCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { + int NumMatch = 0; + NumMatch += loopendPatternMatch(); + NumMatch += serialPatternMatch(MBB); + NumMatch += ifPatternMatch(MBB); + return NumMatch; +} + +int R600MachineCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { + if (MBB->succ_size() != 1) + return 0; + + MachineBasicBlock *childBlk = *MBB->succ_begin(); + if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) + return 0; + + mergeSerialBlock(MBB, childBlk); + ++numSerialPatternMatch; + return 1; +} + +int R600MachineCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { + //two edges + if (MBB->succ_size() != 2) + return 0; + if (hasBackEdge(MBB)) + return 0; + MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); + if (!BranchMI) + return 0; + + assert(isCondBranch(BranchMI)); + int NumMatch = 0; + + MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI); + NumMatch += serialPatternMatch(TrueMBB); + NumMatch += ifPatternMatch(TrueMBB); + MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI); + NumMatch += serialPatternMatch(FalseMBB); + NumMatch += ifPatternMatch(FalseMBB); + MachineBasicBlock *LandBlk; + int Cloned = 0; + + assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); + // TODO: Simplify + if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 + && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { + // Diamond pattern + LandBlk = *TrueMBB->succ_begin(); + } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { + // Triangle pattern, false is empty + LandBlk = FalseMBB; + FalseMBB = nullptr; + } else if (FalseMBB->succ_size() == 1 + && *FalseMBB->succ_begin() == TrueMBB) { + // Triangle pattern, true is empty + // We reverse the predicate to make a triangle, empty false pattern; + std::swap(TrueMBB, FalseMBB); + reversePredicateSetter(MBB->end(), *MBB); + LandBlk = FalseMBB; + FalseMBB = nullptr; + } else if (FalseMBB->succ_size() == 1 + && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { + LandBlk = *FalseMBB->succ_begin(); + } else if (TrueMBB->succ_size() == 1 + && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) { + LandBlk = *TrueMBB->succ_begin(); + } else { + return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB); + } + + // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the + // new BB created for landBlk==NULL may introduce new challenge to the + // reduction process. + if (LandBlk && + ((TrueMBB && TrueMBB->pred_size() > 1) + || (FalseMBB && FalseMBB->pred_size() > 1))) { + Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk); + } + + if (TrueMBB && TrueMBB->pred_size() > 1) { + TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB); + ++Cloned; + } + + if (FalseMBB && FalseMBB->pred_size() > 1) { + FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB); + ++Cloned; + } + + mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk); + + ++numIfPatternMatch; + + numClonedBlock += Cloned; + + return 1 + Cloned + NumMatch; +} + +int R600MachineCFGStructurizer::loopendPatternMatch() { + std::deque NestedLoops; + for (auto &It: *MLI) + for (MachineLoop *ML : depth_first(It)) + NestedLoops.push_front(ML); + + if (NestedLoops.empty()) + return 0; + + // Process nested loop outside->inside (we did push_front), + // so "continue" to a outside loop won't be mistaken as "break" + // of the current loop. + int Num = 0; + for (MachineLoop *ExaminedLoop : NestedLoops) { + if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) + continue; + LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); + int NumBreak = mergeLoop(ExaminedLoop); + if (NumBreak == -1) + break; + Num += NumBreak; + } + return Num; +} + +int R600MachineCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { + MachineBasicBlock *LoopHeader = LoopRep->getHeader(); + MBBVector ExitingMBBs; + LoopRep->getExitingBlocks(ExitingMBBs); + assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); + LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() + << " exiting blocks\n";); + // We assume a single ExitBlk + MBBVector ExitBlks; + LoopRep->getExitBlocks(ExitBlks); + SmallPtrSet ExitBlkSet; + for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i) + ExitBlkSet.insert(ExitBlks[i]); + assert(ExitBlkSet.size() == 1); + MachineBasicBlock *ExitBlk = *ExitBlks.begin(); + assert(ExitBlk && "Loop has several exit block"); + MBBVector LatchBlks; + for (auto *LB : inverse_children(LoopHeader)) + if (LoopRep->contains(LB)) + LatchBlks.push_back(LB); + + for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i) + mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk); + for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i) + settleLoopcontBlock(LatchBlks[i], LoopHeader); + int Match = 0; + do { + Match = 0; + Match += serialPatternMatch(LoopHeader); + Match += ifPatternMatch(LoopHeader); + } while (Match > 0); + mergeLooplandBlock(LoopHeader, ExitBlk); + MachineLoop *ParentLoop = LoopRep->getParentLoop(); + if (ParentLoop) + MLI->changeLoopFor(LoopHeader, ParentLoop); + else + MLI->removeBlock(LoopHeader); + Visited[LoopRep] = true; + return 1; +} + +bool R600MachineCFGStructurizer::isSameloopDetachedContbreak( + MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { + if (Src1MBB->succ_empty()) { + MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB); + if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { + MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; + if (TheEntry) { + LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB" + << Src1MBB->getNumber() << " src2 = BB" + << Src2MBB->getNumber() << "\n";); + return true; + } + } + } + return false; +} + +int R600MachineCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { + int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); + if (Num == 0) { + LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" + << "\n";); + Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); + } + return Num; +} + +int R600MachineCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { + int Num = 0; + MachineBasicBlock *DownBlk; + + //trueBlk could be the common post dominator + DownBlk = TrueMBB; + + LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() + << " true = BB" << TrueMBB->getNumber() + << ", numSucc=" << TrueMBB->succ_size() << " false = BB" + << FalseMBB->getNumber() << "\n";); + + while (DownBlk) { + LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber();); + + if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { + LLVM_DEBUG(dbgs() << " working\n";); + + Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); + Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); + + numClonedBlock += Num; + Num += serialPatternMatch(*HeadMBB->succ_begin()); + Num += serialPatternMatch(*std::next(HeadMBB->succ_begin())); + Num += ifPatternMatch(HeadMBB); + assert(Num > 0); + + break; + } + LLVM_DEBUG(dbgs() << " not working\n";); + DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; + } // walk down the postDomTree + + return Num; +} + +#ifndef NDEBUG +void R600MachineCFGStructurizer::showImproveSimpleJumpintoIf( + MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { + dbgs() << "head = BB" << HeadMBB->getNumber() + << " size = " << HeadMBB->size(); + if (Detail) { + dbgs() << "\n"; + HeadMBB->print(dbgs()); + dbgs() << "\n"; + } + + if (TrueMBB) { + dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " + << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); + if (Detail) { + dbgs() << "\n"; + TrueMBB->print(dbgs()); + dbgs() << "\n"; + } + } + if (FalseMBB) { + dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " + << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); + if (Detail) { + dbgs() << "\n"; + FalseMBB->print(dbgs()); + dbgs() << "\n"; + } + } + if (LandMBB) { + dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " + << LandMBB->size() << " numPred = " << LandMBB->pred_size(); + if (Detail) { + dbgs() << "\n"; + LandMBB->print(dbgs()); + dbgs() << "\n"; + } + } + + dbgs() << "\n"; +} +#endif + +int R600MachineCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, + MachineBasicBlock **LandMBBPtr) { + bool MigrateTrue = false; + bool MigrateFalse = false; + + MachineBasicBlock *LandBlk = *LandMBBPtr; + + assert((!TrueMBB || TrueMBB->succ_size() <= 1) + && (!FalseMBB || FalseMBB->succ_size() <= 1)); + + if (TrueMBB == FalseMBB) + return 0; + + MigrateTrue = needMigrateBlock(TrueMBB); + MigrateFalse = needMigrateBlock(FalseMBB); + + if (!MigrateTrue && !MigrateFalse) + return 0; + + // If we need to migrate either trueBlk and falseBlk, migrate the rest that + // have more than one predecessors. without doing this, its predecessor + // rather than headBlk will have undefined value in initReg. + if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) + MigrateTrue = true; + if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) + MigrateFalse = true; + + LLVM_DEBUG( + dbgs() << "before improveSimpleJumpintoIf: "; + showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); + + // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk + // + // new: headBlk => if () {initReg = 1; org trueBlk branch} else + // {initReg = 0; org falseBlk branch } + // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} + // => org landBlk + // if landBlk->pred_size() > 2, put the about if-else inside + // if (initReg !=2) {...} + // + // add initReg = initVal to headBlk + + const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); + if (!MigrateTrue || !MigrateFalse) { + // XXX: We have an opportunity here to optimize the "branch into if" case + // here. Branch into if looks like this: + // entry + // / | + // diamond_head branch_from + // / \ | + // diamond_false diamond_true + // \ / + // done + // + // The diamond_head block begins the "if" and the diamond_true block + // is the block being "branched into". + // + // If MigrateTrue is true, then TrueBB is the block being "branched into" + // and if MigrateFalse is true, then FalseBB is the block being + // "branched into" + // + // Here is the pseudo code for how I think the optimization should work: + // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. + // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. + // 3. Move the branch instruction from diamond_head into its own basic + // block (new_block). + // 4. Add an unconditional branch from diamond_head to new_block + // 5. Replace the branch instruction in branch_from with an unconditional + // branch to new_block. If branch_from has multiple predecessors, then + // we need to replace the True/False block in the branch + // instruction instead of replacing it. + // 6. Change the condition of the branch instruction in new_block from + // COND to (COND || GPR0) + // + // In order insert these MOV instruction, we will need to use the + // RegisterScavenger. Usually liveness stops being tracked during + // the late machine optimization passes, however if we implement + // bool TargetRegisterInfo::requiresRegisterScavenging( + // const MachineFunction &MF) + // and have it return true, liveness will be tracked correctly + // by generic optimization passes. We will also need to make sure that + // all of our target-specific passes that run after regalloc and before + // the CFGStructurizer track liveness and we will need to modify this pass + // to correctly track liveness. + // + // After the above changes, the new CFG should look like this: + // entry + // / | + // diamond_head branch_from + // \ / + // new_block + // / | + // diamond_false diamond_true + // \ / + // done + // + // Without this optimization, we are forced to duplicate the diamond_true + // block and we will end up with a CFG like this: + // + // entry + // / | + // diamond_head branch_from + // / \ | + // diamond_false diamond_true diamond_true (duplicate) + // \ / | + // done --------------------| + // + // Duplicating diamond_true can be very costly especially if it has a + // lot of instructions. + return 0; + } + + int NumNewBlk = 0; + + bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); + + //insert R600::ENDIF to avoid special case "input landBlk == NULL" + MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF); + + if (LandBlkHasOtherPred) { + report_fatal_error("Extra register needed to handle CFG"); + Register CmpResReg = + HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); + report_fatal_error("Extra compare instruction needed to handle CFG"); + insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, + CmpResReg, DebugLoc()); + } + + // XXX: We are running this after RA, so creating virtual registers will + // cause an assertion failure in the PostRA scheduling pass. + Register InitReg = + HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); + insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg, + DebugLoc()); + + if (MigrateTrue) { + migrateInstruction(TrueMBB, LandBlk, I); + // need to uncondionally insert the assignment to ensure a path from its + // predecessor rather than headBlk has valid value in initReg if + // (initVal != 1). + report_fatal_error("Extra register needed to handle CFG"); + } + insertInstrBefore(I, R600::ELSE); + + if (MigrateFalse) { + migrateInstruction(FalseMBB, LandBlk, I); + // need to uncondionally insert the assignment to ensure a path from its + // predecessor rather than headBlk has valid value in initReg if + // (initVal != 0) + report_fatal_error("Extra register needed to handle CFG"); + } + + if (LandBlkHasOtherPred) { + // add endif + insertInstrBefore(I, R600::ENDIF); + + // put initReg = 2 to other predecessors of landBlk + for (MachineBasicBlock *MBB : LandBlk->predecessors()) + if (MBB != TrueMBB && MBB != FalseMBB) + report_fatal_error("Extra register needed to handle CFG"); + } + LLVM_DEBUG( + dbgs() << "result from improveSimpleJumpintoIf: "; + showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); + + // update landBlk + *LandMBBPtr = LandBlk; + + return NumNewBlk; +} + +void R600MachineCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, + MachineBasicBlock *SrcMBB) { + LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB" + << SrcMBB->getNumber() << "\n";); + DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); + + DstMBB->removeSuccessor(SrcMBB, true); + cloneSuccessorList(DstMBB, SrcMBB); + + removeSuccessor(SrcMBB); + MLI->removeBlock(SrcMBB); + retireBlock(SrcMBB); +} + +void R600MachineCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, + MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { + assert (TrueMBB); + LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ "; + if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs() + << " } else "; + dbgs() << "{ "; if (FalseMBB) { + dbgs() << "BB" << FalseMBB->getNumber(); + } dbgs() << " }\n "; + dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else { + dbgs() << "BB" << LandMBB->getNumber(); + } dbgs() << "\n";); + + int OldOpcode = BranchMI->getOpcode(); + DebugLoc BranchDL = BranchMI->getDebugLoc(); + +// transform to +// if cond +// trueBlk +// else +// falseBlk +// endif +// landBlk + + MachineBasicBlock::iterator I = BranchMI; + insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode), + BranchDL); + + if (TrueMBB) { + MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); + MBB->removeSuccessor(TrueMBB, true); + if (LandMBB && TrueMBB->succ_size()!=0) + TrueMBB->removeSuccessor(LandMBB, true); + retireBlock(TrueMBB); + MLI->removeBlock(TrueMBB); + } + + if (FalseMBB) { + insertInstrBefore(I, R600::ELSE); + MBB->splice(I, FalseMBB, FalseMBB->begin(), + FalseMBB->end()); + MBB->removeSuccessor(FalseMBB, true); + if (LandMBB && !FalseMBB->succ_empty()) + FalseMBB->removeSuccessor(LandMBB, true); + retireBlock(FalseMBB); + MLI->removeBlock(FalseMBB); + } + insertInstrBefore(I, R600::ENDIF); + + BranchMI->eraseFromParent(); + + if (LandMBB && TrueMBB && FalseMBB) + MBB->addSuccessor(LandMBB); +} + +void R600MachineCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, + MachineBasicBlock *LandMBB) { + LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() + << " land = BB" << LandMBB->getNumber() << "\n";); + + insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc()); + insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc()); + DstBlk->replaceSuccessor(DstBlk, LandMBB); +} + +void R600MachineCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, + MachineBasicBlock *LandMBB) { + LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB" + << ExitingMBB->getNumber() << " land = BB" + << LandMBB->getNumber() << "\n";); + MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); + assert(BranchMI && isCondBranch(BranchMI)); + DebugLoc DL = BranchMI->getDebugLoc(); + MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI); + MachineBasicBlock::iterator I = BranchMI; + if (TrueBranch != LandMBB) + reversePredicateSetter(I, *I->getParent()); + insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL); + insertInstrBefore(I, R600::BREAK); + insertInstrBefore(I, R600::ENDIF); + //now branchInst can be erase safely + BranchMI->eraseFromParent(); + //now take care of successors, retire blocks + ExitingMBB->removeSuccessor(LandMBB, true); +} + +void R600MachineCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, + MachineBasicBlock *ContMBB) { + LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB" + << ContingMBB->getNumber() << ", cont = BB" + << ContMBB->getNumber() << "\n";); + + MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); + if (MI) { + assert(isCondBranch(MI)); + MachineBasicBlock::iterator I = MI; + MachineBasicBlock *TrueBranch = getTrueBranch(MI); + int OldOpcode = MI->getOpcode(); + DebugLoc DL = MI->getDebugLoc(); + + bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); + + if (!UseContinueLogical) { + int BranchOpcode = + TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : + getBranchZeroOpcode(OldOpcode); + insertCondBranchBefore(I, BranchOpcode, DL); + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + insertInstrEnd(ContingMBB, R600::CONTINUE, DL); + insertInstrEnd(ContingMBB, R600::ENDIF, DL); + } else { + int BranchOpcode = + TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : + getContinueZeroOpcode(OldOpcode); + insertCondBranchBefore(I, BranchOpcode, DL); + } + + MI->eraseFromParent(); + } else { + // if we've arrived here then we've already erased the branch instruction + // travel back up the basic block to see the last reference of our debug + // location we've just inserted that reference here so it should be + // representative insertEnd to ensure phi-moves, if exist, go before the + // continue-instr. + insertInstrEnd(ContingMBB, R600::CONTINUE, + getLastDebugLocInBB(ContingMBB)); + } +} + +int R600MachineCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, + MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { + int Cloned = 0; + assert(PreMBB->isSuccessor(SrcMBB)); + while (SrcMBB && SrcMBB != DstMBB) { + assert(SrcMBB->succ_size() == 1); + if (SrcMBB->pred_size() > 1) { + SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB); + ++Cloned; + } + + PreMBB = SrcMBB; + SrcMBB = *SrcMBB->succ_begin(); + } + + return Cloned; +} + +MachineBasicBlock * +R600MachineCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, + MachineBasicBlock *PredMBB) { + assert(PredMBB->isSuccessor(MBB) && "succBlk is not a predecessor of curBlk"); + + MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions + replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); + //srcBlk, oldBlk, newBlk + + PredMBB->replaceSuccessor(MBB, CloneMBB); + + // add all successor to cloneBlk + cloneSuccessorList(CloneMBB, MBB); + + numClonedInstr += MBB->size(); + + LLVM_DEBUG(dbgs() << "Cloned block: " + << "BB" << MBB->getNumber() << "size " << MBB->size() + << "\n";); + + SHOWNEWBLK(CloneMBB, "result of Cloned block: "); + + return CloneMBB; +} + +void R600MachineCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, + MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { + MachineBasicBlock::iterator SpliceEnd; + //look for the input branchinstr, not the AMDGPU branchinstr + MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); + if (!BranchMI) { + LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";); + SpliceEnd = SrcMBB->end(); + } else { + LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI); + SpliceEnd = BranchMI; + } + LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = " + << DstMBB->size() << "srcSize = " << SrcMBB->size() + << "\n";); + + //splice insert before insertPos + DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); + + LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = " + << DstMBB->size() << "srcSize = " << SrcMBB->size() + << '\n';); +} + +MachineBasicBlock * +R600MachineCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { + MachineBasicBlock *LoopHeader = LoopRep->getHeader(); + MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); + + if (!LoopHeader || !LoopLatch) + return nullptr; + MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); + // Is LoopRep an infinite loop ? + if (!BranchMI || !isUncondBranch(BranchMI)) + return nullptr; + + MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); + FuncRep->push_back(DummyExitBlk); //insert to function + SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); + LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); + LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext(); + Ctx.emitError("Extra register needed to handle CFG"); + return nullptr; +} + +void R600MachineCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { + MachineInstr *BranchMI; + + // I saw two unconditional branch in one basic block in example + // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. + while ((BranchMI = getLoopendBlockBranchInstr(MBB)) + && isUncondBranch(BranchMI)) { + LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI); + BranchMI->eraseFromParent(); + } +} + +void R600MachineCFGStructurizer::removeRedundantConditionalBranch( + MachineBasicBlock *MBB) { + if (MBB->succ_size() != 2) + return; + MachineBasicBlock *MBB1 = *MBB->succ_begin(); + MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin()); + if (MBB1 != MBB2) + return; + + MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); + assert(BranchMI && isCondBranch(BranchMI)); + LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI); + BranchMI->eraseFromParent(); + SHOWNEWBLK(MBB1, "Removing redundant successor"); + MBB->removeSuccessor(MBB1, true); +} + +void R600MachineCFGStructurizer::addDummyExitBlock( + SmallVectorImpl &RetMBB) { + MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); + FuncRep->push_back(DummyExitBlk); //insert to function + insertInstrEnd(DummyExitBlk, R600::RETURN); + + for (MachineBasicBlock *MBB : RetMBB) { + if (MachineInstr *MI = getReturnInstr(MBB)) + MI->eraseFromParent(); + MBB->addSuccessor(DummyExitBlk); + LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() + << " successors\n";); + } + SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); +} + +void R600MachineCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { + while (MBB->succ_size()) + MBB->removeSuccessor(*MBB->succ_begin()); +} + +void R600MachineCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, + int SccNum) { + BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; + if (!srcBlkInfo) + srcBlkInfo = new BlockInformation(); + srcBlkInfo->SccNum = SccNum; +} + +void R600MachineCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { + LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";); + + BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; + + if (!SrcBlkInfo) + SrcBlkInfo = new BlockInformation(); + + SrcBlkInfo->IsRetired = true; + assert(MBB->succ_empty() && MBB->pred_empty() && "can't retire block yet"); +} + +INITIALIZE_PASS_BEGIN(R600MachineCFGStructurizer, "amdgpustructurizer", + "AMDGPU CFG Structurizer", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(R600MachineCFGStructurizer, "amdgpustructurizer", + "AMDGPU CFG Structurizer", false, false) + +FunctionPass *llvm::createR600MachineCFGStructurizerPass() { + return new R600MachineCFGStructurizer(); +} diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp index fbe2a1cd9fba..59e274787590 100644 --- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp @@ -207,7 +207,7 @@ public: return !ARDef || !ARUse; } - // isLegalToPruneDependencies - Is it legal to prune dependece between SUI + // isLegalToPruneDependencies - Is it legal to prune dependency between SUI // and SUJ. bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override { return false; diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.cpp b/llvm/lib/Target/AMDGPU/R600Subtarget.cpp index 20c1ce7266dd..d8f061054904 100644 --- a/llvm/lib/Target/AMDGPU/R600Subtarget.cpp +++ b/llvm/lib/Target/AMDGPU/R600Subtarget.cpp @@ -27,8 +27,6 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, : R600GenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS), AMDGPUSubtarget(TT), InstrInfo(*this), FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), - FMA(false), CaymanISA(false), CFALUBug(false), HasVertexCache(false), - R600ALUInst(false), FP64(false), TexVTXClauseSize(0), Gen(R600), TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)), InstrItins(getInstrItineraryForCPU(GPU)) {} diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h index 92d559b1f8e6..c3d002f29272 100644 --- a/llvm/lib/Target/AMDGPU/R600Subtarget.h +++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h @@ -31,14 +31,14 @@ class R600Subtarget final : public R600GenSubtargetInfo, private: R600InstrInfo InstrInfo; R600FrameLowering FrameLowering; - bool FMA; - bool CaymanISA; - bool CFALUBug; - bool HasVertexCache; - bool R600ALUInst; - bool FP64; - short TexVTXClauseSize; - Generation Gen; + bool FMA = false; + bool CaymanISA = false; + bool CFALUBug = false; + bool HasVertexCache = false; + bool R600ALUInst = false; + bool FP64 = false; + short TexVTXClauseSize = 0; + Generation Gen = R600; R600TargetLowering TLInfo; InstrItineraryData InstrItins; SelectionDAGTargetInfo TSInfo; diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp index 39dad45425fc..76bb0f65ef69 100644 --- a/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp @@ -83,7 +83,7 @@ R600TargetMachine::getSubtargetImpl(const Function &F) const { } TargetTransformInfo -R600TargetMachine::getTargetTransformInfo(const Function &F) { +R600TargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(R600TTIImpl(this, F)); } @@ -131,7 +131,7 @@ void R600PassConfig::addPreSched2() { } void R600PassConfig::addPreEmitPass() { - addPass(createAMDGPUCFGStructurizerPass()); + addPass(createR600MachineCFGStructurizerPass()); addPass(createR600ExpandSpecialInstrsPass()); addPass(&FinalizeMachineBundlesID); addPass(createR600Packetizer()); diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.h b/llvm/lib/Target/AMDGPU/R600TargetMachine.h index 0ccbca3c68b1..8d20841292b9 100644 --- a/llvm/lib/Target/AMDGPU/R600TargetMachine.h +++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.h @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // /// \file -/// The AMDGPU TargetMachine interface definition for hw codgen targets. +/// The AMDGPU TargetMachine interface definition for hw codegen targets. // //===----------------------------------------------------------------------===// @@ -38,7 +38,7 @@ public: const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override; - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; bool isMachineVerifierClean() const override { return false; } }; diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp index b81fac36fc95..afd2a38b11ec 100644 --- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -73,19 +73,19 @@ class SIAnnotateControlFlow : public FunctionPass { bool hasKill(const BasicBlock *BB); - void eraseIfUnused(PHINode *Phi); + bool eraseIfUnused(PHINode *Phi); - void openIf(BranchInst *Term); + bool openIf(BranchInst *Term); - void insertElse(BranchInst *Term); + bool insertElse(BranchInst *Term); Value * handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term); - void handleLoop(BranchInst *Term); + bool handleLoop(BranchInst *Term); - void closeControlFlow(BasicBlock *BB); + bool closeControlFlow(BasicBlock *BB); public: static char ID; @@ -193,31 +193,34 @@ bool SIAnnotateControlFlow::hasKill(const BasicBlock *BB) { return false; } -// Erase "Phi" if it is not used any more -void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) { - if (RecursivelyDeleteDeadPHINode(Phi)) { +// Erase "Phi" if it is not used any more. Return true if any change was made. +bool SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) { + bool Changed = RecursivelyDeleteDeadPHINode(Phi); + if (Changed) LLVM_DEBUG(dbgs() << "Erased unused condition phi\n"); - } + return Changed; } /// Open a new "If" block -void SIAnnotateControlFlow::openIf(BranchInst *Term) { +bool SIAnnotateControlFlow::openIf(BranchInst *Term) { if (isUniform(Term)) - return; + return false; Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term); Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term)); push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term)); + return true; } /// Close the last "If" block and open a new "Else" block -void SIAnnotateControlFlow::insertElse(BranchInst *Term) { +bool SIAnnotateControlFlow::insertElse(BranchInst *Term) { if (isUniform(Term)) { - return; + return false; } Value *Ret = CallInst::Create(Else, popSaved(), "", Term); Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term)); push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term)); + return true; } /// Recursively handle the condition leading to a loop @@ -255,14 +258,14 @@ Value *SIAnnotateControlFlow::handleLoopCondition( } /// Handle a back edge (loop) -void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { +bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) { if (isUniform(Term)) - return; + return false; BasicBlock *BB = Term->getParent(); llvm::Loop *L = LI->getLoopFor(BB); if (!L) - return; + return false; BasicBlock *Target = Term->getSuccessor(1); PHINode *Broken = PHINode::Create(IntMask, 0, "phi.broken", &Target->front()); @@ -286,10 +289,12 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { Term->setCondition(CallInst::Create(Loop, Arg, "", Term)); push(Term->getSuccessor(0), Arg); + + return true; } /// Close the last opened control flow -void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) { +bool SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) { llvm::Loop *L = LI->getLoopFor(BB); assert(Stack.back().first == BB); @@ -322,6 +327,8 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) { } CallInst::Create(EndCf, Exec, "", FirstInsertionPt); } + + return true; } /// Annotate the control flow with intrinsics so the backend can @@ -333,6 +340,7 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) { TargetPassConfig &TPC = getAnalysis(); const TargetMachine &TM = TPC.getTM(); + bool Changed = false; initialize(*F.getParent(), TM.getSubtarget(F)); for (df_iterator I = df_begin(&F.getEntryBlock()), E = df_end(&F.getEntryBlock()); I != E; ++I) { @@ -341,32 +349,32 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) { if (!Term || Term->isUnconditional()) { if (isTopOfStack(BB)) - closeControlFlow(BB); + Changed |= closeControlFlow(BB); continue; } if (I.nodeVisited(Term->getSuccessor(1))) { if (isTopOfStack(BB)) - closeControlFlow(BB); + Changed |= closeControlFlow(BB); if (DT->dominates(Term->getSuccessor(1), BB)) - handleLoop(Term); + Changed |= handleLoop(Term); continue; } if (isTopOfStack(BB)) { PHINode *Phi = dyn_cast(Term->getCondition()); if (Phi && Phi->getParent() == BB && isElse(Phi) && !hasKill(BB)) { - insertElse(Term); - eraseIfUnused(Phi); + Changed |= insertElse(Term); + Changed |= eraseIfUnused(Phi); continue; } - closeControlFlow(BB); + Changed |= closeControlFlow(BB); } - openIf(Term); + Changed |= openIf(Term); } if (!Stack.empty()) { @@ -374,7 +382,7 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) { report_fatal_error("failed to annotate CFG"); } - return true; + return Changed; } /// Create the annotation pass diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 107ee5ed5532..85930312352b 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -63,6 +63,12 @@ enum : uint64_t { VGPRSpill = 1 << 24, SGPRSpill = 1 << 25, + // LDSDIR instruction format. + LDSDIR = 1 << 26, + + // VINTERP instruction format. + VINTERP = 1 << 27, + // High bits - other information. VM_CNT = UINT64_C(1) << 32, EXP_CNT = UINT64_C(1) << 33, @@ -120,7 +126,10 @@ enum : uint64_t { IsAtomicNoRet = UINT64_C(1) << 57, // Atomic with return. - IsAtomicRet = UINT64_C(1) << 58 + IsAtomicRet = UINT64_C(1) << 58, + + // Is a WMMA instruction. + IsWMMA = UINT64_C(1) << 59, }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. @@ -258,9 +267,10 @@ namespace AMDGPUAsmVariants { VOP3 = 1, SDWA = 2, SDWA9 = 3, - DPP = 4 + DPP = 4, + VOP3_DPP = 5 }; -} +} // namespace AMDGPUAsmVariants namespace AMDGPU { namespace EncValues { // Encoding values of enum9/8/7 operands @@ -280,7 +290,8 @@ enum : unsigned { INLINE_FLOATING_C_MAX = 248, LITERAL_CONST = 255, VGPR_MIN = 256, - VGPR_MAX = 511 + VGPR_MAX = 511, + IS_VGPR = 256 // Indicates VGPR or AGPR }; } // namespace EncValues @@ -294,6 +305,9 @@ enum CPol { SLC = 2, DLC = 4, SCC = 16, + SC0 = GLC, + SC1 = SCC, + NT = SLC, ALL = GLC | SLC | DLC | SCC }; @@ -302,24 +316,33 @@ enum CPol { namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns. enum Id { // Message ID, width(4) [3:0]. - ID_UNKNOWN_ = -1, ID_INTERRUPT = 1, - ID_GS = 2, - ID_GS_DONE = 3, - ID_SAVEWAVE = 4, // added in GFX8 + + ID_GS_PreGFX11 = 2, // replaced in GFX11 + ID_GS_DONE_PreGFX11 = 3, // replaced in GFX11 + + ID_HS_TESSFACTOR_GFX11Plus = 2, // reused in GFX11 + ID_DEALLOC_VGPRS_GFX11Plus = 3, // reused in GFX11 + + ID_SAVEWAVE = 4, // added in GFX8, removed in GFX11 ID_STALL_WAVE_GEN = 5, // added in GFX9 ID_HALT_WAVES = 6, // added in GFX9 ID_ORDERED_PS_DONE = 7, // added in GFX9 ID_EARLY_PRIM_DEALLOC = 8, // added in GFX9, removed in GFX10 ID_GS_ALLOC_REQ = 9, // added in GFX9 - ID_GET_DOORBELL = 10, // added in GFX9 - ID_GET_DDID = 11, // added in GFX10 + ID_GET_DOORBELL = 10, // added in GFX9, removed in GFX11 + ID_GET_DDID = 11, // added in GFX10, removed in GFX11 ID_SYSMSG = 15, - ID_GAPS_LAST_, // Indicate that sequence has gaps. - ID_GAPS_FIRST_ = ID_INTERRUPT, - ID_SHIFT_ = 0, - ID_WIDTH_ = 4, - ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_) + + ID_RTN_GET_DOORBELL = 128, + ID_RTN_GET_DDID = 129, + ID_RTN_GET_TMA = 130, + ID_RTN_GET_REALTIME = 131, + ID_RTN_SAVE_WAVE = 132, + ID_RTN_GET_TBA = 133, + + ID_MASK_PreGFX11_ = 0xF, + ID_MASK_GFX11Plus_ = 0xFF }; enum Op { // Both GS and SYS operation IDs. @@ -360,8 +383,6 @@ enum StreamId : unsigned { // Stream ID, (2) [9:8]. namespace Hwreg { // Encoding of SIMM16 used in s_setreg/getreg* insns. enum Id { // HwRegCode, (6) [5:0] - ID_UNKNOWN_ = -1, - ID_SYMBOLIC_FIRST_ = 1, // There are corresponding symbolic names defined. ID_MODE = 1, ID_STATUS = 2, ID_TRAPSTS = 3, @@ -370,12 +391,15 @@ enum Id { // HwRegCode, (6) [5:0] ID_LDS_ALLOC = 6, ID_IB_STS = 7, ID_MEM_BASES = 15, - ID_SYMBOLIC_FIRST_GFX9_ = ID_MEM_BASES, ID_TBA_LO = 16, - ID_SYMBOLIC_FIRST_GFX10_ = ID_TBA_LO, ID_TBA_HI = 17, ID_TMA_LO = 18, ID_TMA_HI = 19, + ID_XCC_ID = 20, + ID_SQ_PERF_SNAPSHOT_DATA = 21, + ID_SQ_PERF_SNAPSHOT_DATA1 = 22, + ID_SQ_PERF_SNAPSHOT_PC_LO = 23, + ID_SQ_PERF_SNAPSHOT_PC_HI = 24, ID_FLAT_SCR_LO = 20, ID_FLAT_SCR_HI = 21, ID_XNACK_MASK = 22, @@ -383,8 +407,7 @@ enum Id { // HwRegCode, (6) [5:0] ID_HW_ID2 = 24, ID_POPS_PACKER = 25, ID_SHADER_CYCLES = 29, - ID_SYMBOLIC_FIRST_GFX1030_ = ID_SHADER_CYCLES, - ID_SYMBOLIC_LAST_ = 30, + ID_SHIFT_ = 0, ID_WIDTH_ = 6, ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_) @@ -503,6 +526,15 @@ enum MergedFormat : int64_t { DFMT_NFMT_MAX = DFMT_NFMT_MASK }; +enum UnifiedFormatCommon : int64_t { + UFMT_MAX = 127, + UFMT_UNDEF = -1, + UFMT_DEFAULT = 1 +}; + +} // namespace MTBUFFormat + +namespace UfmtGFX10 { enum UnifiedFormat : int64_t { UFMT_INVALID = 0, @@ -598,14 +630,95 @@ enum UnifiedFormat : int64_t { UFMT_FIRST = UFMT_INVALID, UFMT_LAST = UFMT_32_32_32_32_FLOAT, +}; - UFMT_MAX = 127, +} // namespace UfmtGFX10 - UFMT_UNDEF = -1, - UFMT_DEFAULT = UFMT_8_UNORM +namespace UfmtGFX11 { +enum UnifiedFormat : int64_t { + UFMT_INVALID = 0, + + UFMT_8_UNORM, + UFMT_8_SNORM, + UFMT_8_USCALED, + UFMT_8_SSCALED, + UFMT_8_UINT, + UFMT_8_SINT, + + UFMT_16_UNORM, + UFMT_16_SNORM, + UFMT_16_USCALED, + UFMT_16_SSCALED, + UFMT_16_UINT, + UFMT_16_SINT, + UFMT_16_FLOAT, + + UFMT_8_8_UNORM, + UFMT_8_8_SNORM, + UFMT_8_8_USCALED, + UFMT_8_8_SSCALED, + UFMT_8_8_UINT, + UFMT_8_8_SINT, + + UFMT_32_UINT, + UFMT_32_SINT, + UFMT_32_FLOAT, + + UFMT_16_16_UNORM, + UFMT_16_16_SNORM, + UFMT_16_16_USCALED, + UFMT_16_16_SSCALED, + UFMT_16_16_UINT, + UFMT_16_16_SINT, + UFMT_16_16_FLOAT, + + UFMT_10_11_11_FLOAT, + + UFMT_11_11_10_FLOAT, + + UFMT_10_10_10_2_UNORM, + UFMT_10_10_10_2_SNORM, + UFMT_10_10_10_2_UINT, + UFMT_10_10_10_2_SINT, + + UFMT_2_10_10_10_UNORM, + UFMT_2_10_10_10_SNORM, + UFMT_2_10_10_10_USCALED, + UFMT_2_10_10_10_SSCALED, + UFMT_2_10_10_10_UINT, + UFMT_2_10_10_10_SINT, + + UFMT_8_8_8_8_UNORM, + UFMT_8_8_8_8_SNORM, + UFMT_8_8_8_8_USCALED, + UFMT_8_8_8_8_SSCALED, + UFMT_8_8_8_8_UINT, + UFMT_8_8_8_8_SINT, + + UFMT_32_32_UINT, + UFMT_32_32_SINT, + UFMT_32_32_FLOAT, + + UFMT_16_16_16_16_UNORM, + UFMT_16_16_16_16_SNORM, + UFMT_16_16_16_16_USCALED, + UFMT_16_16_16_16_SSCALED, + UFMT_16_16_16_16_UINT, + UFMT_16_16_16_16_SINT, + UFMT_16_16_16_16_FLOAT, + + UFMT_32_32_32_UINT, + UFMT_32_32_32_SINT, + UFMT_32_32_32_FLOAT, + UFMT_32_32_32_32_UINT, + UFMT_32_32_32_32_SINT, + UFMT_32_32_32_32_FLOAT, + + UFMT_FIRST = UFMT_INVALID, + UFMT_LAST = UFMT_32_32_32_32_FLOAT, }; -} // namespace MTBUFFormat +} // namespace UfmtGFX11 namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32. @@ -746,20 +859,23 @@ enum Target : unsigned { ET_MRT0 = 0, ET_MRT7 = 7, ET_MRTZ = 8, - ET_NULL = 9, + ET_NULL = 9, // Pre-GFX11 ET_POS0 = 12, ET_POS3 = 15, - ET_POS4 = 16, // GFX10+ - ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget - ET_PRIM = 20, // GFX10+ - ET_PARAM0 = 32, - ET_PARAM31 = 63, + ET_POS4 = 16, // GFX10+ + ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget + ET_PRIM = 20, // GFX10+ + ET_DUAL_SRC_BLEND0 = 21, // GFX11+ + ET_DUAL_SRC_BLEND1 = 22, // GFX11+ + ET_PARAM0 = 32, // Pre-GFX11 + ET_PARAM31 = 63, // Pre-GFX11 ET_NULL_MAX_IDX = 0, ET_MRTZ_MAX_IDX = 0, ET_PRIM_MAX_IDX = 0, ET_MRT_MAX_IDX = 7, ET_POS_MAX_IDX = 4, + ET_DUAL_SRC_BLEND_MAX_IDX = 1, ET_PARAM_MAX_IDX = 31, ET_INVALID = 255, @@ -777,6 +893,18 @@ enum OpSel : uint64_t { } // namespace VOP3PEncoding +namespace ImplicitArg { +// Implicit kernel argument offset for code object version 5. +enum Offset_COV5 : unsigned { + HOSTCALL_PTR_OFFSET = 80, + MULTIGRID_SYNC_ARG_OFFSET = 88, + HEAP_PTR_OFFSET = 96, + PRIVATE_BASE_OFFSET = 192, + SHARED_BASE_OFFSET = 196, + QUEUE_PTR_OFFSET = 200, +}; + +} // namespace ImplicitArg } // namespace AMDGPU #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 @@ -911,10 +1039,12 @@ enum OpSel : uint64_t { #define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6) #define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860 -#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12) +#define S_00B860_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12) +#define S_00B860_WAVESIZE_GFX11Plus(x) (((x) & 0x7FFF) << 12) #define R_0286E8_SPI_TMPRING_SIZE 0x0286E8 -#define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12) +#define S_0286E8_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12) +#define S_0286E8_WAVESIZE_GFX11Plus(x) (((x) & 0x7FFF) << 12) #define R_028B54_VGT_SHADER_STAGES_EN 0x028B54 #define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 33954e11d6c6..99aa8a60b04f 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -92,7 +92,7 @@ public: bool tryFoldCndMask(MachineInstr &MI) const; bool tryFoldZeroHighBits(MachineInstr &MI) const; - void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const; + bool foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const; const MachineOperand *isClamp(const MachineInstr &MI) const; bool tryFoldClamp(MachineInstr &MI); @@ -146,30 +146,6 @@ static unsigned macToMad(unsigned Opc) { return AMDGPU::INSTRUCTION_LIST_END; } -// Wrapper around isInlineConstant that understands special cases when -// instruction types are replaced during operand folding. -static bool isInlineConstantIfFolded(const SIInstrInfo *TII, - const MachineInstr &UseMI, - unsigned OpNo, - const MachineOperand &OpToFold) { - if (TII->isInlineConstant(UseMI, OpNo, OpToFold)) - return true; - - unsigned Opc = UseMI.getOpcode(); - unsigned NewOpc = macToMad(Opc); - if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) { - // Special case for mac. Since this is replaced with mad when folded into - // src2, we need to check the legality for the final instruction. - int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); - if (static_cast(OpNo) == Src2Idx) { - const MCInstrDesc &MadDesc = TII->get(NewOpc); - return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType); - } - } - - return false; -} - // TODO: Add heuristic that the frame index might not fit in the addressing mode // immediate offset to avoid materializing in loops. static bool frameIndexMayFold(const SIInstrInfo *TII, @@ -210,6 +186,8 @@ static bool updateOperand(FoldCandidate &Fold, if (Fold.isImm()) { if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked && !(MI->getDesc().TSFlags & SIInstrFlags::IsMAI) && + (!ST.hasDOTOpSelHazard() || + !(MI->getDesc().TSFlags & SIInstrFlags::IsDOT)) && AMDGPU::isFoldableLiteralV216(Fold.ImmToFold, ST.hasInv2PiInlineImm())) { // Set op_sel/op_sel_hi on this operand or bail out if op_sel is @@ -289,7 +267,7 @@ static bool updateOperand(FoldCandidate &Fold, // when looking at a use. Dst0.setReg(NewReg0); for (unsigned I = MI->getNumOperands() - 1; I > 0; --I) - MI->RemoveOperand(I); + MI->removeOperand(I); MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF)); if (Fold.isCommuted()) @@ -490,6 +468,8 @@ static bool isUseSafeToFold(const SIInstrInfo *TII, case AMDGPU::V_MOV_B32_e32: case AMDGPU::V_MOV_B32_e64: case AMDGPU::V_MOV_B64_PSEUDO: + case AMDGPU::V_MOV_B64_e32: + case AMDGPU::V_MOV_B64_e64: // Do not fold into an indirect mov. return !MI.hasRegisterImplicitUseOperand(AMDGPU::M0); } @@ -675,7 +655,9 @@ void SIFoldOperands::foldOperand( if (TII->isFLATScratch(*UseMI) && AMDGPU::getNamedOperandIdx(UseMI->getOpcode(), - AMDGPU::OpName::vaddr) != -1) { + AMDGPU::OpName::vaddr) != -1 && + AMDGPU::getNamedOperandIdx(UseMI->getOpcode(), + AMDGPU::OpName::saddr) == -1) { unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(UseMI->getOpcode()); UseMI->setDesc(TII->get(NewOpc)); } @@ -739,7 +721,7 @@ void SIFoldOperands::foldOperand( while (ImpOpI != ImpOpE) { MachineInstr::mop_iterator Tmp = ImpOpI; ImpOpI++; - UseMI->RemoveOperand(UseMI->getOperandNo(Tmp)); + UseMI->removeOperand(UseMI->getOperandNo(Tmp)); } CopiesToReplace.push_back(UseMI); } else { @@ -768,7 +750,7 @@ void SIFoldOperands::foldOperand( UseMI->setDesc(TII->get(AMDGPU::REG_SEQUENCE)); for (unsigned I = UseMI->getNumOperands() - 1; I > 0; --I) - UseMI->RemoveOperand(I); + UseMI->removeOperand(I); MachineInstrBuilder B(*MBB.getParent(), UseMI); DenseMap VGPRCopies; @@ -871,7 +853,7 @@ void SIFoldOperands::foldOperand( UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm()); else UseMI->getOperand(1).ChangeToFrameIndex(OpToFold.getIndex()); - UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane) + UseMI->removeOperand(2); // Remove exec read (or src1 for readlane) return; } @@ -890,7 +872,7 @@ void SIFoldOperands::foldOperand( UseMI->getOperand(1).setReg(OpToFold.getReg()); UseMI->getOperand(1).setSubReg(OpToFold.getSubReg()); UseMI->getOperand(1).setIsKill(false); - UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane) + UseMI->removeOperand(2); // Remove exec read (or src1 for readlane) return; } } @@ -906,6 +888,22 @@ void SIFoldOperands::foldOperand( } if (!FoldingImmLike) { + if (OpToFold.isReg() && ST->needsAlignedVGPRs()) { + // Don't fold if OpToFold doesn't hold an aligned register. + const TargetRegisterClass *RC = + TRI->getRegClassForReg(*MRI, OpToFold.getReg()); + if (TRI->hasVectorRegisters(RC) && OpToFold.getSubReg()) { + unsigned SubReg = OpToFold.getSubReg(); + const TargetRegisterClass *SubRC = TRI->getSubRegClass(RC, SubReg); + RC = TRI->getCompatibleSubRegClass(RC, SubRC, SubReg); + if (RC) + RC = SubRC; + } + + if (!RC || !TRI->isProperlyAlignedRC(*RC)) + return; + } + tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); // FIXME: We could try to change the instruction from 64-bit to 32-bit @@ -1025,7 +1023,7 @@ static void stripExtraCopyOperands(MachineInstr &MI) { Desc.getNumImplicitDefs(); for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I) - MI.RemoveOperand(I); + MI.removeOperand(I); } static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) { @@ -1093,7 +1091,7 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, // Be careful to change the right operand, src0 may belong to a different // instruction. MI->getOperand(Src0Idx).ChangeToImmediate(NewImm); - MI->RemoveOperand(Src1Idx); + MI->removeOperand(Src1Idx); mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR))); return true; } @@ -1112,11 +1110,11 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, Opc == AMDGPU::S_OR_B32) { if (Src1Val == 0) { // y = or x, 0 => y = copy x - MI->RemoveOperand(Src1Idx); + MI->removeOperand(Src1Idx); mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); } else if (Src1Val == -1) { // y = or x, -1 => y = v_mov_b32 -1 - MI->RemoveOperand(Src1Idx); + MI->removeOperand(Src1Idx); mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32))); } else return false; @@ -1129,11 +1127,11 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MI->getOpcode() == AMDGPU::S_AND_B32) { if (Src1Val == 0) { // y = and x, 0 => y = v_mov_b32 0 - MI->RemoveOperand(Src0Idx); + MI->removeOperand(Src0Idx); mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32))); } else if (Src1Val == -1) { // y = and x, -1 => y = copy x - MI->RemoveOperand(Src1Idx); + MI->removeOperand(Src1Idx); mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); stripExtraCopyOperands(*MI); } else @@ -1147,7 +1145,7 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MI->getOpcode() == AMDGPU::S_XOR_B32) { if (Src1Val == 0) { // y = xor x, 0 => y = copy x - MI->RemoveOperand(Src1Idx); + MI->removeOperand(Src1Idx); mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); return true; } @@ -1185,12 +1183,12 @@ bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const { TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false)); int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); if (Src2Idx != -1) - MI.RemoveOperand(Src2Idx); - MI.RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1)); + MI.removeOperand(Src2Idx); + MI.removeOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1)); if (Src1ModIdx != -1) - MI.RemoveOperand(Src1ModIdx); + MI.removeOperand(Src1ModIdx); if (Src0ModIdx != -1) - MI.RemoveOperand(Src0ModIdx); + MI.removeOperand(Src0ModIdx); mutateCopyOp(MI, NewDesc); LLVM_DEBUG(dbgs() << MI); return true; @@ -1217,7 +1215,7 @@ bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const { return false; } -void SIFoldOperands::foldInstOperand(MachineInstr &MI, +bool SIFoldOperands::foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const { // We need mutate the operands of new mov instructions to add implicit // uses of EXEC, but adding them invalidates the use_iterator, so defer @@ -1225,6 +1223,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, SmallVector CopiesToReplace; SmallVector FoldList; MachineOperand &Dst = MI.getOperand(0); + bool Changed = false; if (OpToFold.isImm()) { for (auto &UseMI : @@ -1237,66 +1236,25 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, // We may also encounter cases where one or both operands are // immediates materialized into a register, which would ordinarily not // be folded due to multiple uses or operand constraints. - if (tryConstantFoldOp(*MRI, TII, &UseMI)) + if (tryConstantFoldOp(*MRI, TII, &UseMI)) { LLVM_DEBUG(dbgs() << "Constant folded " << UseMI); - } - } - - bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal(); - if (FoldingImm) { - unsigned NumLiteralUses = 0; - MachineOperand *NonInlineUse = nullptr; - int NonInlineUseOpNo = -1; - - for (auto &Use : - make_early_inc_range(MRI->use_nodbg_operands(Dst.getReg()))) { - MachineInstr *UseMI = Use.getParent(); - unsigned OpNo = UseMI->getOperandNo(&Use); - - // Try to fold any inline immediate uses, and then only fold other - // constants if they have one use. - // - // The legality of the inline immediate must be checked based on the use - // operand, not the defining instruction, because 32-bit instructions - // with 32-bit inline immediate sources may be used to materialize - // constants used in 16-bit operands. - // - // e.g. it is unsafe to fold: - // s_mov_b32 s0, 1.0 // materializes 0x3f800000 - // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00 - - // Folding immediates with more than one use will increase program size. - // FIXME: This will also reduce register usage, which may be better - // in some cases. A better heuristic is needed. - if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) { - foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace); - } else if (frameIndexMayFold(TII, *UseMI, OpNo, OpToFold)) { - foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace); - } else { - if (++NumLiteralUses == 1) { - NonInlineUse = &Use; - NonInlineUseOpNo = OpNo; - } + Changed = true; } } + } - if (NumLiteralUses == 1) { - MachineInstr *UseMI = NonInlineUse->getParent(); - foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace); - } - } else { - // Folding register. - SmallVector UsesToProcess; - for (auto &Use : MRI->use_nodbg_operands(Dst.getReg())) - UsesToProcess.push_back(&Use); - for (auto U : UsesToProcess) { - MachineInstr *UseMI = U->getParent(); - - foldOperand(OpToFold, UseMI, UseMI->getOperandNo(U), - FoldList, CopiesToReplace); - } + SmallVector UsesToProcess; + for (auto &Use : MRI->use_nodbg_operands(Dst.getReg())) + UsesToProcess.push_back(&Use); + for (auto U : UsesToProcess) { + MachineInstr *UseMI = U->getParent(); + foldOperand(OpToFold, UseMI, UseMI->getOperandNo(U), FoldList, + CopiesToReplace); } + if (CopiesToReplace.empty() && FoldList.empty()) + return Changed; + MachineFunction *MF = MI.getParent()->getParent(); // Make sure we add EXEC uses to any new v_mov instructions created. for (MachineInstr *Copy : CopiesToReplace) @@ -1328,6 +1286,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, TII->commuteInstruction(*Fold.UseMI, false); } } + return true; } // Clamp patterns are canonically selected to v_max_* instructions, so only @@ -1593,8 +1552,9 @@ bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) { unsigned OpIdx = Op - &UseMI->getOperand(0); const MCInstrDesc &InstDesc = UseMI->getDesc(); - if (!TRI->isVectorSuperClass( - TRI->getRegClass(InstDesc.OpInfo[OpIdx].RegClass))) + const TargetRegisterClass *OpRC = + TII->getRegClass(InstDesc, OpIdx, TRI, *MI.getMF()); + if (!OpRC || !TRI->isVectorSuperClass(OpRC)) return false; const auto *NewDstRC = TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg)); @@ -1751,22 +1711,31 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { bool IsIEEEMode = MFI->getMode().IEEE; bool HasNSZ = MFI->hasNoSignedZerosFPMath(); + bool Changed = false; for (MachineBasicBlock *MBB : depth_first(&MF)) { MachineOperand *CurrentKnownM0Val = nullptr; for (auto &MI : make_early_inc_range(*MBB)) { - tryFoldCndMask(MI); + Changed |= tryFoldCndMask(MI); - if (tryFoldZeroHighBits(MI)) + if (tryFoldZeroHighBits(MI)) { + Changed = true; continue; + } - if (MI.isRegSequence() && tryFoldRegSequence(MI)) + if (MI.isRegSequence() && tryFoldRegSequence(MI)) { + Changed = true; continue; + } - if (MI.isPHI() && tryFoldLCSSAPhi(MI)) + if (MI.isPHI() && tryFoldLCSSAPhi(MI)) { + Changed = true; continue; + } - if (MI.mayLoad() && tryFoldLoad(MI)) + if (MI.mayLoad() && tryFoldLoad(MI)) { + Changed = true; continue; + } if (!TII->isFoldableCopy(MI)) { // Saw an unknown clobber of m0, so we no longer know what it is. @@ -1777,7 +1746,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { // instruction, and not the omod multiply. if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || !tryFoldOMod(MI)) - tryFoldClamp(MI); + Changed |= tryFoldClamp(MI); continue; } @@ -1788,6 +1757,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { MachineOperand &NewM0Val = MI.getOperand(1); if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) { MI.eraseFromParent(); + Changed = true; continue; } @@ -1817,7 +1787,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (!MI.getOperand(0).getReg().isVirtual()) continue; - foldInstOperand(MI, OpToFold); + Changed |= foldInstOperand(MI, OpToFold); // If we managed to fold all uses of this copy then we might as well // delete it now. @@ -1829,6 +1799,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { auto &SrcOp = InstToErase->getOperand(1); auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register(); InstToErase->eraseFromParent(); + Changed = true; InstToErase = nullptr; if (!SrcReg || SrcReg.isPhysical()) break; @@ -1837,9 +1808,11 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { break; } if (InstToErase && InstToErase->isRegSequence() && - MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) + MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) { InstToErase->eraseFromParent(); + Changed = true; + } } } - return true; + return Changed; } diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp index 80ee7a00252a..d7ca7f36284b 100644 --- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp +++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp @@ -241,7 +241,7 @@ void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI, } // Check register def/use conflicts, occupancy limits and collect def/use maps. -// Return true if instruction can be bundled with previous. It it cannot +// Return true if instruction can be bundled with previous. If it cannot // def/use maps are not updated. bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses, diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 6078f4a0577a..a57e81eb4e4a 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -749,7 +749,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, return; } - const MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); @@ -789,19 +789,13 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, *Reg.FI); } - // VGPRs used for Whole Wave Mode - for (const auto &Reg : FuncInfo->WWMReservedRegs) { - auto VGPR = Reg.first; - auto FI = Reg.second; - if (!FI) - continue; - + for (auto ReservedWWM : FuncInfo->wwmAllocation()) { if (!ScratchExecCopy) ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true); - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, - *FI); + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, + std::get<0>(ReservedWWM), std::get<1>(ReservedWWM)); } if (ScratchExecCopy) { @@ -813,27 +807,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, LiveRegs.addReg(ScratchExecCopy); } - if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) { - const int FramePtrFI = *FPSaveIndex; - assert(!MFI.isDeadObjectIndex(FramePtrFI)); - - initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true); - - MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( - MRI, LiveRegs, AMDGPU::VGPR_32RegClass); - if (!TmpVGPR) - report_fatal_error("failed to find free scratch register"); - - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) - .addReg(FramePtrReg); - - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, - FramePtrFI); - } - - if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) { - const int BasePtrFI = *BPSaveIndex; - assert(!MFI.isDeadObjectIndex(BasePtrFI)); + auto SaveSGPRToMemory = [&](Register Reg, const int FI) { + assert(!MFI.isDeadObjectIndex(FI)); initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true); @@ -843,44 +818,31 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, report_fatal_error("failed to find free scratch register"); BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) - .addReg(BasePtrReg); + .addReg(Reg); buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, - BasePtrFI); - } + FI); + }; - // In this case, spill the FP to a reserved VGPR. - if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) { - const int FramePtrFI = *FPSaveIndex; - assert(!MFI.isDeadObjectIndex(FramePtrFI)); + auto SaveSGPRToVGPRLane = [&](Register Reg, const int FI) { + assert(!MFI.isDeadObjectIndex(FI)); - assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill); - ArrayRef Spill = - FuncInfo->getSGPRToVGPRSpills(FramePtrFI); + assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); + ArrayRef Spill = + FuncInfo->getSGPRToVGPRSpills(FI); assert(Spill.size() == 1); - // Save FP before setting it up. BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) - .addReg(FramePtrReg) + .addReg(Reg) .addImm(Spill[0].Lane) .addReg(Spill[0].VGPR, RegState::Undef); - } + }; - // In this case, spill the BP to a reserved VGPR. - if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) { - const int BasePtrFI = *BPSaveIndex; - assert(!MFI.isDeadObjectIndex(BasePtrFI)); - - assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill); - ArrayRef Spill = - FuncInfo->getSGPRToVGPRSpills(BasePtrFI); - assert(Spill.size() == 1); - - // Save BP before setting it up. - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) - .addReg(BasePtrReg) - .addImm(Spill[0].Lane) - .addReg(Spill[0].VGPR, RegState::Undef); + if (FPSaveIndex) { + if (spilledToMemory(MF, *FPSaveIndex)) + SaveSGPRToMemory(FramePtrReg, *FPSaveIndex); + else + SaveSGPRToVGPRLane(FramePtrReg, *FPSaveIndex); } // Emit the copy if we need an FP, and are using a free SGPR to save it. @@ -891,6 +853,13 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } + if (BPSaveIndex) { + if (spilledToMemory(MF, *BPSaveIndex)) + SaveSGPRToMemory(BasePtrReg, *BPSaveIndex); + else + SaveSGPRToVGPRLane(BasePtrReg, *BPSaveIndex); + } + // Emit the copy if we need a BP, and are using a free SGPR to save it. if (FuncInfo->SGPRForBPSaveRestoreCopy) { BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), @@ -1034,56 +1003,44 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameDestroy); } + auto RestoreSGPRFromMemory = [&](Register Reg, const int FI) { + initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); + MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( + MRI, LiveRegs, AMDGPU::VGPR_32RegClass); + if (!TmpVGPR) + report_fatal_error("failed to find free scratch register"); + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, + FI); + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), Reg) + .addReg(TmpVGPR, RegState::Kill); + }; + + auto RestoreSGPRFromVGPRLane = [&](Register Reg, const int FI) { + assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); + ArrayRef Spill = + FuncInfo->getSGPRToVGPRSpills(FI); + assert(Spill.size() == 1); + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), Reg) + .addReg(Spill[0].VGPR) + .addImm(Spill[0].Lane); + }; + if (FPSaveIndex) { const int FramePtrFI = *FPSaveIndex; assert(!MFI.isDeadObjectIndex(FramePtrFI)); - if (spilledToMemory(MF, FramePtrFI)) { - initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); - - MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( - MRI, LiveRegs, AMDGPU::VGPR_32RegClass); - if (!TmpVGPR) - report_fatal_error("failed to find free scratch register"); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, - TmpVGPR, FramePtrFI); - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg) - .addReg(TmpVGPR, RegState::Kill); - } else { - // Reload from VGPR spill. - assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill); - ArrayRef Spill = - FuncInfo->getSGPRToVGPRSpills(FramePtrFI); - assert(Spill.size() == 1); - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg) - .addReg(Spill[0].VGPR) - .addImm(Spill[0].Lane); - } + if (spilledToMemory(MF, FramePtrFI)) + RestoreSGPRFromMemory(FramePtrReg, FramePtrFI); + else + RestoreSGPRFromVGPRLane(FramePtrReg, FramePtrFI); } if (BPSaveIndex) { const int BasePtrFI = *BPSaveIndex; assert(!MFI.isDeadObjectIndex(BasePtrFI)); - if (spilledToMemory(MF, BasePtrFI)) { - initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); - - MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( - MRI, LiveRegs, AMDGPU::VGPR_32RegClass); - if (!TmpVGPR) - report_fatal_error("failed to find free scratch register"); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, - TmpVGPR, BasePtrFI); - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg) - .addReg(TmpVGPR, RegState::Kill); - } else { - // Reload from VGPR spill. - assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill); - ArrayRef Spill = - FuncInfo->getSGPRToVGPRSpills(BasePtrFI); - assert(Spill.size() == 1); - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg) - .addReg(Spill[0].VGPR) - .addImm(Spill[0].Lane); - } + if (spilledToMemory(MF, BasePtrFI)) + RestoreSGPRFromMemory(BasePtrReg, BasePtrFI); + else + RestoreSGPRFromVGPRLane(BasePtrReg, BasePtrFI); } Register ScratchExecCopy; @@ -1100,18 +1057,13 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, Reg.VGPR, *Reg.FI); } - for (const auto &Reg : FuncInfo->WWMReservedRegs) { - auto VGPR = Reg.first; - auto FI = Reg.second; - if (!FI) - continue; - + for (auto ReservedWWM : FuncInfo->wwmAllocation()) { if (!ScratchExecCopy) ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, - *FI); + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, + std::get<0>(ReservedWWM), std::get<1>(ReservedWWM)); } if (ScratchExecCopy) { @@ -1161,6 +1113,11 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + if (!FuncInfo->isEntryFunction()) { + // Spill VGPRs used for Whole Wave Mode + FuncInfo->allocateWWMReservedSpillSlots(MFI, *TRI); + } + const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() && EnableSpillVGPRToAGPR; @@ -1200,7 +1157,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( } } - // Stack slot coloring may assign different objets to the same stack slot. + // Stack slot coloring may assign different objects to the same stack slot. // If not, then the VGPR to AGPR spill slot is dead. for (unsigned FI : SpillFIs.set_bits()) if (!NonVGPRSpillFIs.test(FI)) @@ -1229,7 +1186,11 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( } } - FuncInfo->removeDeadFrameIndices(MFI); + // At this point we've already allocated all spilled SGPRs to VGPRs if we + // can. Any remaining SGPR spills will go to memory, so move them back to the + // default stack. + bool HaveSGPRToVMemSpill = + FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true); assert(allSGPRSpillsAreDead(MF) && "SGPR spill should have been removed in SILowerSGPRSpills"); @@ -1241,6 +1202,39 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( // Add an emergency spill slot RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI)); + + // If we are spilling SGPRs to memory with a large frame, we may need a + // second VGPR emergency frame index. + if (HaveSGPRToVMemSpill && + allocateScavengingFrameIndexesNearIncomingSP(MF)) { + RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false)); + } + } +} + +void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced( + MachineFunction &MF, RegScavenger *RS) const { + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + + if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { + // On gfx908, we had initially reserved highest available VGPR for AGPR + // copy. Now since we are done with RA, check if there exist an unused VGPR + // which is lower than the eariler reserved VGPR before RA. If one exist, + // use it for AGPR copy instead of one reserved before RA. + Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy(); + Register UnusedLowVGPR = + TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); + if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) < + TRI->getHWRegIndex(VGPRForAGPRCopy))) { + // Call to setVGPRForAGPRCopy() should happen first before calling + // freezeReservedRegs() so that getReservedRegs() can reserve this newly + // identified VGPR (for AGPR copy). + FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR); + MRI.freezeReservedRegs(MF); + } } } @@ -1333,6 +1327,20 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, // FP will be specially managed like SP. if (WillHaveFP || hasFP(MF)) SavedRegs.reset(MFI->getFrameOffsetReg()); + + // Return address use with return instruction is hidden through the SI_RETURN + // pseudo. Given that and since the IPRA computes actual register usage and + // does not use CSR list, the clobbering of return address by function calls + // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register + // usage collection. This will ensure save/restore of return address happens + // in those scenarios. + const MachineRegisterInfo &MRI = MF.getRegInfo(); + Register RetAddrReg = TRI->getReturnAddressReg(MF); + if (!MFI->isEntryFunction() && + (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) { + SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0)); + SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1)); + } } bool SIFrameLowering::assignCalleeSavedSpillSlots( diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index 7949dcfa6632..79154d494e91 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -47,6 +47,9 @@ public: MachineFunction &MF, RegScavenger *RS = nullptr) const override; + void processFunctionBeforeFrameIndicesReplaced( + MachineFunction &MF, RegScavenger *RS = nullptr) const override; + MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index e2f4a0896bc3..094d5cd58673 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -17,6 +17,7 @@ #include "AMDGPUTargetMachine.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -25,6 +26,7 @@ #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/IR/DiagnosticInfo.h" @@ -136,6 +138,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::v8i16, &AMDGPU::SGPR_128RegClass); addRegisterClass(MVT::v8f16, &AMDGPU::SGPR_128RegClass); + addRegisterClass(MVT::v16i16, &AMDGPU::SGPR_256RegClass); + addRegisterClass(MVT::v16f16, &AMDGPU::SGPR_256RegClass); } addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass); @@ -151,27 +155,17 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setBooleanVectorContents(ZeroOrOneBooleanContent); // We need to custom lower vector stores from local memory - setOperationAction(ISD::LOAD, MVT::v2i32, Custom); - setOperationAction(ISD::LOAD, MVT::v3i32, Custom); - setOperationAction(ISD::LOAD, MVT::v4i32, Custom); - setOperationAction(ISD::LOAD, MVT::v5i32, Custom); - setOperationAction(ISD::LOAD, MVT::v6i32, Custom); - setOperationAction(ISD::LOAD, MVT::v7i32, Custom); - setOperationAction(ISD::LOAD, MVT::v8i32, Custom); - setOperationAction(ISD::LOAD, MVT::v16i32, Custom); - setOperationAction(ISD::LOAD, MVT::i1, Custom); - setOperationAction(ISD::LOAD, MVT::v32i32, Custom); - - setOperationAction(ISD::STORE, MVT::v2i32, Custom); - setOperationAction(ISD::STORE, MVT::v3i32, Custom); - setOperationAction(ISD::STORE, MVT::v4i32, Custom); - setOperationAction(ISD::STORE, MVT::v5i32, Custom); - setOperationAction(ISD::STORE, MVT::v6i32, Custom); - setOperationAction(ISD::STORE, MVT::v7i32, Custom); - setOperationAction(ISD::STORE, MVT::v8i32, Custom); - setOperationAction(ISD::STORE, MVT::v16i32, Custom); - setOperationAction(ISD::STORE, MVT::i1, Custom); - setOperationAction(ISD::STORE, MVT::v32i32, Custom); + setOperationAction(ISD::LOAD, + {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, + MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1, + MVT::v32i32}, + Custom); + + setOperationAction(ISD::STORE, + {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, + MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1, + MVT::v32i32}, + Custom); setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand); @@ -198,81 +192,57 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::v8i64, MVT::v8i32, Expand); setTruncStoreAction(MVT::v16i64, MVT::v16i32, Expand); - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::GlobalAddress, {MVT::i32, MVT::i64}, Custom); setOperationAction(ISD::SELECT, MVT::i1, Promote); setOperationAction(ISD::SELECT, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::f64, Promote); AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64); - setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); + setOperationAction(ISD::SELECT_CC, + {MVT::f32, MVT::i32, MVT::i64, MVT::f64, MVT::i1}, Expand); setOperationAction(ISD::SETCC, MVT::i1, Promote); - setOperationAction(ISD::SETCC, MVT::v2i1, Expand); - setOperationAction(ISD::SETCC, MVT::v4i1, Expand); + setOperationAction(ISD::SETCC, {MVT::v2i1, MVT::v4i1}, Expand); AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32); - setOperationAction(ISD::TRUNCATE, MVT::v2i32, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); - setOperationAction(ISD::TRUNCATE, MVT::v3i32, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v3f32, Expand); - setOperationAction(ISD::TRUNCATE, MVT::v4i32, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v4f32, Expand); - setOperationAction(ISD::TRUNCATE, MVT::v5i32, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v5f32, Expand); - setOperationAction(ISD::TRUNCATE, MVT::v6i32, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v6f32, Expand); - setOperationAction(ISD::TRUNCATE, MVT::v7i32, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v7f32, Expand); - setOperationAction(ISD::TRUNCATE, MVT::v8i32, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v8f32, Expand); - setOperationAction(ISD::TRUNCATE, MVT::v16i32, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v16f32, Expand); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v3i16, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom); + setOperationAction(ISD::TRUNCATE, + {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, + MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32}, + Expand); + setOperationAction(ISD::FP_ROUND, + {MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, + MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32}, + Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, + {MVT::v2i1, MVT::v4i1, MVT::v2i8, MVT::v4i8, MVT::v2i16, + MVT::v3i16, MVT::v4i16, MVT::Other}, + Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); - setOperationAction(ISD::BR_CC, MVT::i1, Expand); - setOperationAction(ISD::BR_CC, MVT::i32, Expand); - setOperationAction(ISD::BR_CC, MVT::i64, Expand); - setOperationAction(ISD::BR_CC, MVT::f32, Expand); - setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, + {MVT::i1, MVT::i32, MVT::i64, MVT::f32, MVT::f64}, Expand); - setOperationAction(ISD::UADDO, MVT::i32, Legal); - setOperationAction(ISD::USUBO, MVT::i32, Legal); + setOperationAction({ISD::UADDO, ISD::USUBO}, MVT::i32, Legal); - setOperationAction(ISD::ADDCARRY, MVT::i32, Legal); - setOperationAction(ISD::SUBCARRY, MVT::i32, Legal); + setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY}, MVT::i32, Legal); - setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); - setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); - setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); + setOperationAction({ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS}, MVT::i64, + Expand); #if 0 - setOperationAction(ISD::ADDCARRY, MVT::i64, Legal); - setOperationAction(ISD::SUBCARRY, MVT::i64, Legal); + setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY}, MVT::i64, Legal); #endif // We only support LOAD/STORE and vector manipulation ops for vectors // with > 4 elements. - for (MVT VT : { MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, - MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16, - MVT::v3i64, MVT::v3f64, MVT::v6i32, MVT::v6f32, - MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64, - MVT::v8i16, MVT::v8f16, MVT::v16i64, MVT::v16f64, - MVT::v32i32, MVT::v32f32 }) { + for (MVT VT : + {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64, + MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v3i64, MVT::v3f64, + MVT::v6i32, MVT::v6f32, MVT::v4i64, MVT::v4f64, MVT::v8i64, + MVT::v8f64, MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16, + MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32}) { for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { switch (Op) { case ISD::LOAD: @@ -372,94 +342,63 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v32i32); } - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, + {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}, + Expand); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4f16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); + setOperationAction(ISD::BUILD_VECTOR, {MVT::v4f16, MVT::v4i16}, Custom); // Avoid stack access for these. // TODO: Generalize to more vector types. - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i16, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Custom); - - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i8, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i8, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i8, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i8, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i8, Custom); - - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i16, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f16, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f16, Custom); + setOperationAction({ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}, + {MVT::v2i16, MVT::v2f16, MVT::v2i8, MVT::v4i8, MVT::v8i8, + MVT::v4i16, MVT::v4f16, MVT::v16i16, MVT::v16f16}, + Custom); // Deal with vec3 vector operations when widened to vec4. - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v3i32, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v3f32, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i32, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4f32, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, + {MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32}, Custom); // Deal with vec5/6/7 vector operations when widened to vec8. - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5i32, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5f32, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v6i32, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v6f32, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v7i32, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v7f32, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i32, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8f32, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, + {MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32, + MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32}, + Custom); // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, // and output demarshalling - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP, {MVT::i32, MVT::i64}, Custom); // We can't return success/failure, only the old value, // let LLVM add the comparison - setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, {MVT::i32, MVT::i64}, + Expand); - if (Subtarget->hasFlatAddressSpace()) { - setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); - setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); - } + if (Subtarget->hasFlatAddressSpace()) + setOperationAction(ISD::ADDRSPACECAST, {MVT::i32, MVT::i64}, Custom); - setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); - setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + setOperationAction(ISD::BITREVERSE, {MVT::i32, MVT::i64}, Legal); // FIXME: This should be narrowed to i32, but that only happens if i64 is // illegal. // FIXME: Should lower sub-i32 bswaps to bit-ops without v_perm_b32. - setOperationAction(ISD::BSWAP, MVT::i64, Legal); - setOperationAction(ISD::BSWAP, MVT::i32, Legal); + setOperationAction(ISD::BSWAP, {MVT::i64, MVT::i32}, Legal); // On SI this is s_memtime and s_memrealtime on VI. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); - setOperationAction(ISD::TRAP, MVT::Other, Custom); - setOperationAction(ISD::DEBUGTRAP, MVT::Other, Custom); + setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Custom); if (Subtarget->has16BitInsts()) { - setOperationAction(ISD::FPOW, MVT::f16, Promote); - setOperationAction(ISD::FPOWI, MVT::f16, Promote); - setOperationAction(ISD::FLOG, MVT::f16, Custom); - setOperationAction(ISD::FEXP, MVT::f16, Custom); - setOperationAction(ISD::FLOG10, MVT::f16, Custom); + setOperationAction({ISD::FPOW, ISD::FPOWI}, MVT::f16, Promote); + setOperationAction({ISD::FLOG, ISD::FEXP, ISD::FLOG10}, MVT::f16, Custom); } if (Subtarget->hasMadMacF32Insts()) setOperationAction(ISD::FMAD, MVT::f32, Legal); - if (!Subtarget->hasBFI()) { + if (!Subtarget->hasBFI()) // fcopysign can be done in a single instruction with BFI. - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - } + setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand); if (!Subtarget->hasBCNT(32)) setOperationAction(ISD::CTPOP, MVT::i32, Expand); @@ -467,15 +406,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, if (!Subtarget->hasBCNT(64)) setOperationAction(ISD::CTPOP, MVT::i64, Expand); - if (Subtarget->hasFFBH()) { - setOperationAction(ISD::CTLZ, MVT::i32, Custom); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); - } + if (Subtarget->hasFFBH()) + setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom); - if (Subtarget->hasFFBL()) { - setOperationAction(ISD::CTTZ, MVT::i32, Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); - } + if (Subtarget->hasFFBL()) + setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom); // We only really have 32-bit BFE instructions (and 16-bit on VI). // @@ -489,84 +424,48 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setHasExtractBitsInsn(true); // Clamp modifier on add/sub - if (Subtarget->hasIntClamp()) { - setOperationAction(ISD::UADDSAT, MVT::i32, Legal); - setOperationAction(ISD::USUBSAT, MVT::i32, Legal); - } + if (Subtarget->hasIntClamp()) + setOperationAction({ISD::UADDSAT, ISD::USUBSAT}, MVT::i32, Legal); - if (Subtarget->hasAddNoCarry()) { - setOperationAction(ISD::SADDSAT, MVT::i16, Legal); - setOperationAction(ISD::SSUBSAT, MVT::i16, Legal); - setOperationAction(ISD::SADDSAT, MVT::i32, Legal); - setOperationAction(ISD::SSUBSAT, MVT::i32, Legal); - } - - setOperationAction(ISD::FMINNUM, MVT::f32, Custom); - setOperationAction(ISD::FMAXNUM, MVT::f32, Custom); - setOperationAction(ISD::FMINNUM, MVT::f64, Custom); - setOperationAction(ISD::FMAXNUM, MVT::f64, Custom); + if (Subtarget->hasAddNoCarry()) + setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, {MVT::i16, MVT::i32}, + Legal); + setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, {MVT::f32, MVT::f64}, + Custom); // These are really only legal for ieee_mode functions. We should be avoiding // them for functions that don't have ieee_mode enabled, so just say they are // legal. - setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); - setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); - setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); - setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); - + setOperationAction({ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE}, + {MVT::f32, MVT::f64}, Legal); - if (Subtarget->haveRoundOpsF64()) { - setOperationAction(ISD::FTRUNC, MVT::f64, Legal); - setOperationAction(ISD::FCEIL, MVT::f64, Legal); - setOperationAction(ISD::FRINT, MVT::f64, Legal); - } else { - setOperationAction(ISD::FCEIL, MVT::f64, Custom); - setOperationAction(ISD::FTRUNC, MVT::f64, Custom); - setOperationAction(ISD::FRINT, MVT::f64, Custom); - setOperationAction(ISD::FFLOOR, MVT::f64, Custom); - } + if (Subtarget->haveRoundOpsF64()) + setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FRINT}, MVT::f64, Legal); + else + setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FFLOOR}, + MVT::f64, Custom); setOperationAction(ISD::FFLOOR, MVT::f64, Legal); - setOperationAction(ISD::FSIN, MVT::f32, Custom); - setOperationAction(ISD::FCOS, MVT::f32, Custom); - setOperationAction(ISD::FDIV, MVT::f32, Custom); + setOperationAction({ISD::FSIN, ISD::FCOS, ISD::FDIV}, MVT::f32, Custom); setOperationAction(ISD::FDIV, MVT::f64, Custom); if (Subtarget->has16BitInsts()) { - setOperationAction(ISD::Constant, MVT::i16, Legal); - - setOperationAction(ISD::SMIN, MVT::i16, Legal); - setOperationAction(ISD::SMAX, MVT::i16, Legal); - - setOperationAction(ISD::UMIN, MVT::i16, Legal); - setOperationAction(ISD::UMAX, MVT::i16, Legal); + setOperationAction({ISD::Constant, ISD::SMIN, ISD::SMAX, ISD::UMIN, + ISD::UMAX, ISD::UADDSAT, ISD::USUBSAT}, + MVT::i16, Legal); - setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Promote); AddPromotedToType(ISD::SIGN_EXTEND, MVT::i16, MVT::i32); - setOperationAction(ISD::ROTR, MVT::i16, Expand); - setOperationAction(ISD::ROTL, MVT::i16, Expand); - - setOperationAction(ISD::SDIV, MVT::i16, Promote); - setOperationAction(ISD::UDIV, MVT::i16, Promote); - setOperationAction(ISD::SREM, MVT::i16, Promote); - setOperationAction(ISD::UREM, MVT::i16, Promote); - setOperationAction(ISD::UADDSAT, MVT::i16, Legal); - setOperationAction(ISD::USUBSAT, MVT::i16, Legal); - - setOperationAction(ISD::BITREVERSE, MVT::i16, Promote); - - setOperationAction(ISD::CTTZ, MVT::i16, Promote); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote); - setOperationAction(ISD::CTLZ, MVT::i16, Promote); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote); - setOperationAction(ISD::CTPOP, MVT::i16, Promote); + setOperationAction({ISD::ROTR, ISD::ROTL, ISD::SELECT_CC, ISD::BR_CC}, + MVT::i16, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); - - setOperationAction(ISD::BR_CC, MVT::i16, Expand); + setOperationAction({ISD::SIGN_EXTEND, ISD::SDIV, ISD::UDIV, ISD::SREM, + ISD::UREM, ISD::BITREVERSE, ISD::CTTZ, + ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, + ISD::CTPOP}, + MVT::i16, Promote); setOperationAction(ISD::LOAD, MVT::i16, Custom); @@ -577,8 +476,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_TO_FP16, MVT::i16, Promote); AddPromotedToType(ISD::FP_TO_FP16, MVT::i16, MVT::i32); - setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i16, Custom); + setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::i16, Custom); // F16 - Constant Actions. setOperationAction(ISD::ConstantFP, MVT::f16, Legal); @@ -590,22 +488,18 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, AddPromotedToType(ISD::STORE, MVT::f16, MVT::i16); // F16 - VOP1 Actions. - setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); - setOperationAction(ISD::FCOS, MVT::f16, Custom); - setOperationAction(ISD::FSIN, MVT::f16, Custom); + setOperationAction( + {ISD::FP_ROUND, ISD::FCOS, ISD::FSIN, ISD::FROUND, ISD::FPTRUNC_ROUND}, + MVT::f16, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i16, Custom); + setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, MVT::i16, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::f16, Promote); - setOperationAction(ISD::FP_TO_UINT, MVT::f16, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::f16, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::f16, Promote); - setOperationAction(ISD::FROUND, MVT::f16, Custom); + setOperationAction( + {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP}, + MVT::f16, Promote); // F16 - VOP2 Actions. - setOperationAction(ISD::BR_CC, MVT::f16, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); + setOperationAction({ISD::BR_CC, ISD::SELECT_CC}, MVT::f16, Expand); setOperationAction(ISD::FDIV, MVT::f16, Custom); @@ -615,7 +509,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMAD, MVT::f16, Legal); for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16, MVT::v8i16, - MVT::v8f16}) { + MVT::v8f16, MVT::v16i16, MVT::v16f16}) { for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { switch (Op) { case ISD::LOAD: @@ -639,16 +533,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, } // v_perm_b32 can handle either of these. - setOperationAction(ISD::BSWAP, MVT::i16, Legal); - setOperationAction(ISD::BSWAP, MVT::v2i16, Legal); + setOperationAction(ISD::BSWAP, {MVT::i16, MVT::v2i16}, Legal); setOperationAction(ISD::BSWAP, MVT::v4i16, Custom); // XXX - Do these do anything? Vector constants turn into build_vector. - setOperationAction(ISD::Constant, MVT::v2i16, Legal); - setOperationAction(ISD::ConstantFP, MVT::v2f16, Legal); + setOperationAction(ISD::Constant, {MVT::v2i16, MVT::v2f16}, Legal); - setOperationAction(ISD::UNDEF, MVT::v2i16, Legal); - setOperationAction(ISD::UNDEF, MVT::v2f16, Legal); + setOperationAction(ISD::UNDEF, {MVT::v2i16, MVT::v2f16}, Legal); setOperationAction(ISD::STORE, MVT::v2i16, Promote); AddPromotedToType(ISD::STORE, MVT::v2i16, MVT::i32); @@ -692,140 +583,98 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::STORE, MVT::v8f16, Promote); AddPromotedToType(ISD::STORE, MVT::v8f16, MVT::v4i32); - setOperationAction(ISD::ANY_EXTEND, MVT::v2i32, Expand); - setOperationAction(ISD::ZERO_EXTEND, MVT::v2i32, Expand); - setOperationAction(ISD::SIGN_EXTEND, MVT::v2i32, Expand); + setOperationAction(ISD::LOAD, MVT::v16i16, Promote); + AddPromotedToType(ISD::LOAD, MVT::v16i16, MVT::v8i32); + setOperationAction(ISD::LOAD, MVT::v16f16, Promote); + AddPromotedToType(ISD::LOAD, MVT::v16f16, MVT::v8i32); + + setOperationAction(ISD::STORE, MVT::v16i16, Promote); + AddPromotedToType(ISD::STORE, MVT::v16i16, MVT::v8i32); + setOperationAction(ISD::STORE, MVT::v16f16, Promote); + AddPromotedToType(ISD::STORE, MVT::v16f16, MVT::v8i32); + + setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND}, + MVT::v2i32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Expand); - setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Expand); - setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Expand); - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Expand); + setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND}, + MVT::v4i32, Expand); - setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Expand); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Expand); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Expand); + setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND}, + MVT::v8i32, Expand); - if (!Subtarget->hasVOP3PInsts()) { - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom); - } + if (!Subtarget->hasVOP3PInsts()) + setOperationAction(ISD::BUILD_VECTOR, {MVT::v2i16, MVT::v2f16}, Custom); setOperationAction(ISD::FNEG, MVT::v2f16, Legal); // This isn't really legal, but this avoids the legalizer unrolling it (and // allows matching fneg (fabs x) patterns) setOperationAction(ISD::FABS, MVT::v2f16, Legal); - setOperationAction(ISD::FMAXNUM, MVT::f16, Custom); - setOperationAction(ISD::FMINNUM, MVT::f16, Custom); - setOperationAction(ISD::FMAXNUM_IEEE, MVT::f16, Legal); - setOperationAction(ISD::FMINNUM_IEEE, MVT::f16, Legal); + setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, MVT::f16, Custom); + setOperationAction({ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE}, MVT::f16, Legal); - setOperationAction(ISD::FMINNUM_IEEE, MVT::v4f16, Custom); - setOperationAction(ISD::FMAXNUM_IEEE, MVT::v4f16, Custom); - setOperationAction(ISD::FMINNUM_IEEE, MVT::v8f16, Custom); - setOperationAction(ISD::FMAXNUM_IEEE, MVT::v8f16, Custom); + setOperationAction({ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE}, + {MVT::v4f16, MVT::v8f16, MVT::v16f16}, Custom); - setOperationAction(ISD::FMINNUM, MVT::v4f16, Expand); - setOperationAction(ISD::FMAXNUM, MVT::v4f16, Expand); - setOperationAction(ISD::FMINNUM, MVT::v8f16, Expand); - setOperationAction(ISD::FMAXNUM, MVT::v8f16, Expand); + setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, + {MVT::v4f16, MVT::v8f16, MVT::v16f16}, Expand); - for (MVT Vec16 : { MVT::v8i16, MVT::v8f16 }) { - setOperationAction(ISD::BUILD_VECTOR, Vec16, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec16, Custom); + for (MVT Vec16 : {MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16}) { + setOperationAction( + {ISD::BUILD_VECTOR, ISD::EXTRACT_VECTOR_ELT, ISD::SCALAR_TO_VECTOR}, + Vec16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, Vec16, Expand); - setOperationAction(ISD::SCALAR_TO_VECTOR, Vec16, Expand); } } if (Subtarget->hasVOP3PInsts()) { - setOperationAction(ISD::ADD, MVT::v2i16, Legal); - setOperationAction(ISD::SUB, MVT::v2i16, Legal); - setOperationAction(ISD::MUL, MVT::v2i16, Legal); - setOperationAction(ISD::SHL, MVT::v2i16, Legal); - setOperationAction(ISD::SRL, MVT::v2i16, Legal); - setOperationAction(ISD::SRA, MVT::v2i16, Legal); - setOperationAction(ISD::SMIN, MVT::v2i16, Legal); - setOperationAction(ISD::UMIN, MVT::v2i16, Legal); - setOperationAction(ISD::SMAX, MVT::v2i16, Legal); - setOperationAction(ISD::UMAX, MVT::v2i16, Legal); - - setOperationAction(ISD::UADDSAT, MVT::v2i16, Legal); - setOperationAction(ISD::USUBSAT, MVT::v2i16, Legal); - setOperationAction(ISD::SADDSAT, MVT::v2i16, Legal); - setOperationAction(ISD::SSUBSAT, MVT::v2i16, Legal); - - setOperationAction(ISD::FADD, MVT::v2f16, Legal); - setOperationAction(ISD::FMUL, MVT::v2f16, Legal); - setOperationAction(ISD::FMA, MVT::v2f16, Legal); - - setOperationAction(ISD::FMINNUM_IEEE, MVT::v2f16, Legal); - setOperationAction(ISD::FMAXNUM_IEEE, MVT::v2f16, Legal); - - setOperationAction(ISD::FCANONICALIZE, MVT::v2f16, Legal); - - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom); - - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom); - - for (MVT VT : { MVT::v4i16, MVT::v8i16 }) { - // Split vector operations. - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); - setOperationAction(ISD::SRL, VT, Custom); - setOperationAction(ISD::ADD, VT, Custom); - setOperationAction(ISD::SUB, VT, Custom); - setOperationAction(ISD::MUL, VT, Custom); + setOperationAction({ISD::ADD, ISD::SUB, ISD::MUL, ISD::SHL, ISD::SRL, + ISD::SRA, ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX, + ISD::UADDSAT, ISD::USUBSAT, ISD::SADDSAT, ISD::SSUBSAT}, + MVT::v2i16, Legal); - setOperationAction(ISD::SMIN, VT, Custom); - setOperationAction(ISD::SMAX, VT, Custom); - setOperationAction(ISD::UMIN, VT, Custom); - setOperationAction(ISD::UMAX, VT, Custom); + setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FMINNUM_IEEE, + ISD::FMAXNUM_IEEE, ISD::FCANONICALIZE}, + MVT::v2f16, Legal); - setOperationAction(ISD::UADDSAT, VT, Custom); - setOperationAction(ISD::SADDSAT, VT, Custom); - setOperationAction(ISD::USUBSAT, VT, Custom); - setOperationAction(ISD::SSUBSAT, VT, Custom); - } + setOperationAction(ISD::EXTRACT_VECTOR_ELT, {MVT::v2i16, MVT::v2f16}, + Custom); + + setOperationAction(ISD::VECTOR_SHUFFLE, + {MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::v8i16, + MVT::v16f16, MVT::v16i16}, + Custom); - for (MVT VT : { MVT::v4f16, MVT::v8f16 }) { + for (MVT VT : {MVT::v4i16, MVT::v8i16, MVT::v16i16}) // Split vector operations. - setOperationAction(ISD::FADD, VT, Custom); - setOperationAction(ISD::FMUL, VT, Custom); - setOperationAction(ISD::FMA, VT, Custom); - setOperationAction(ISD::FCANONICALIZE, VT, Custom); - } + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL, ISD::ADD, ISD::SUB, + ISD::MUL, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, + ISD::UADDSAT, ISD::SADDSAT, ISD::USUBSAT, + ISD::SSUBSAT}, + VT, Custom); - setOperationAction(ISD::FMAXNUM, MVT::v2f16, Custom); - setOperationAction(ISD::FMINNUM, MVT::v2f16, Custom); + for (MVT VT : {MVT::v4f16, MVT::v8f16, MVT::v16f16}) + // Split vector operations. + setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FCANONICALIZE}, + VT, Custom); - setOperationAction(ISD::FMINNUM, MVT::v4f16, Custom); - setOperationAction(ISD::FMAXNUM, MVT::v4f16, Custom); + setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, {MVT::v2f16, MVT::v4f16}, + Custom); setOperationAction(ISD::FEXP, MVT::v2f16, Custom); - setOperationAction(ISD::SELECT, MVT::v4i16, Custom); - setOperationAction(ISD::SELECT, MVT::v4f16, Custom); + setOperationAction(ISD::SELECT, {MVT::v4i16, MVT::v4f16}, Custom); if (Subtarget->hasPackedFP32Ops()) { - setOperationAction(ISD::FADD, MVT::v2f32, Legal); - setOperationAction(ISD::FMUL, MVT::v2f32, Legal); - setOperationAction(ISD::FMA, MVT::v2f32, Legal); - setOperationAction(ISD::FNEG, MVT::v2f32, Legal); - - for (MVT VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32, MVT::v32f32 }) { - setOperationAction(ISD::FADD, VT, Custom); - setOperationAction(ISD::FMUL, VT, Custom); - setOperationAction(ISD::FMA, VT, Custom); - } + setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FNEG}, + MVT::v2f32, Legal); + setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA}, + {MVT::v4f32, MVT::v8f32, MVT::v16f32, MVT::v32f32}, + Custom); } } - setOperationAction(ISD::FNEG, MVT::v4f16, Custom); - setOperationAction(ISD::FABS, MVT::v4f16, Custom); + setOperationAction({ISD::FNEG, ISD::FABS}, MVT::v4f16, Custom); if (Subtarget->has16BitInsts()) { setOperationAction(ISD::SELECT, MVT::v2i16, Promote); @@ -834,107 +683,88 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, AddPromotedToType(ISD::SELECT, MVT::v2f16, MVT::i32); } else { // Legalization hack. - setOperationAction(ISD::SELECT, MVT::v2i16, Custom); - setOperationAction(ISD::SELECT, MVT::v2f16, Custom); - - setOperationAction(ISD::FNEG, MVT::v2f16, Custom); - setOperationAction(ISD::FABS, MVT::v2f16, Custom); - } - - for (MVT VT : { MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8, - MVT::v8i16, MVT::v8f16 }) { - setOperationAction(ISD::SELECT, VT, Custom); - } - - setOperationAction(ISD::SMULO, MVT::i64, Custom); - setOperationAction(ISD::UMULO, MVT::i64, Custom); - - if (Subtarget->hasMad64_32()) { - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); - } - - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom); - - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2i16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v3f16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v3i16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4i16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::f16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); - - setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::v3i16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::v3f16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::v4i16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::f16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); - - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::ADDCARRY); - setTargetDAGCombine(ISD::SUB); - setTargetDAGCombine(ISD::SUBCARRY); - setTargetDAGCombine(ISD::FADD); - setTargetDAGCombine(ISD::FSUB); - setTargetDAGCombine(ISD::FMINNUM); - setTargetDAGCombine(ISD::FMAXNUM); - setTargetDAGCombine(ISD::FMINNUM_IEEE); - setTargetDAGCombine(ISD::FMAXNUM_IEEE); - setTargetDAGCombine(ISD::FMA); - setTargetDAGCombine(ISD::SMIN); - setTargetDAGCombine(ISD::SMAX); - setTargetDAGCombine(ISD::UMIN); - setTargetDAGCombine(ISD::UMAX); - setTargetDAGCombine(ISD::SETCC); - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::XOR); - setTargetDAGCombine(ISD::SINT_TO_FP); - setTargetDAGCombine(ISD::UINT_TO_FP); - setTargetDAGCombine(ISD::FCANONICALIZE); - setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); - setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); - setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); + setOperationAction(ISD::SELECT, {MVT::v2i16, MVT::v2f16}, Custom); + + setOperationAction({ISD::FNEG, ISD::FABS}, MVT::v2f16, Custom); + } + + setOperationAction(ISD::SELECT, + {MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8, + MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16}, + Custom); + + setOperationAction({ISD::SMULO, ISD::UMULO}, MVT::i64, Custom); + + if (Subtarget->hasMad64_32()) + setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, Custom); + + setOperationAction(ISD::INTRINSIC_WO_CHAIN, + {MVT::Other, MVT::f32, MVT::v4f32, MVT::i16, MVT::f16, + MVT::v2i16, MVT::v2f16}, + Custom); + + setOperationAction(ISD::INTRINSIC_W_CHAIN, + {MVT::v2f16, MVT::v2i16, MVT::v3f16, MVT::v3i16, + MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::Other, MVT::f16, + MVT::i16, MVT::i8}, + Custom); + + setOperationAction(ISD::INTRINSIC_VOID, + {MVT::Other, MVT::v2i16, MVT::v2f16, MVT::v3i16, + MVT::v3f16, MVT::v4f16, MVT::v4i16, MVT::f16, MVT::i16, + MVT::i8}, + Custom); + + setTargetDAGCombine({ISD::ADD, + ISD::ADDCARRY, + ISD::SUB, + ISD::SUBCARRY, + ISD::FADD, + ISD::FSUB, + ISD::FMINNUM, + ISD::FMAXNUM, + ISD::FMINNUM_IEEE, + ISD::FMAXNUM_IEEE, + ISD::FMA, + ISD::SMIN, + ISD::SMAX, + ISD::UMIN, + ISD::UMAX, + ISD::SETCC, + ISD::AND, + ISD::OR, + ISD::XOR, + ISD::SINT_TO_FP, + ISD::UINT_TO_FP, + ISD::FCANONICALIZE, + ISD::SCALAR_TO_VECTOR, + ISD::ZERO_EXTEND, + ISD::SIGN_EXTEND_INREG, + ISD::EXTRACT_VECTOR_ELT, + ISD::INSERT_VECTOR_ELT}); // All memory operations. Some folding on the pointer operand is done to help // matching the constant offsets in the addressing modes. - setTargetDAGCombine(ISD::LOAD); - setTargetDAGCombine(ISD::STORE); - setTargetDAGCombine(ISD::ATOMIC_LOAD); - setTargetDAGCombine(ISD::ATOMIC_STORE); - setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP); - setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); - setTargetDAGCombine(ISD::ATOMIC_SWAP); - setTargetDAGCombine(ISD::ATOMIC_LOAD_ADD); - setTargetDAGCombine(ISD::ATOMIC_LOAD_SUB); - setTargetDAGCombine(ISD::ATOMIC_LOAD_AND); - setTargetDAGCombine(ISD::ATOMIC_LOAD_OR); - setTargetDAGCombine(ISD::ATOMIC_LOAD_XOR); - setTargetDAGCombine(ISD::ATOMIC_LOAD_NAND); - setTargetDAGCombine(ISD::ATOMIC_LOAD_MIN); - setTargetDAGCombine(ISD::ATOMIC_LOAD_MAX); - setTargetDAGCombine(ISD::ATOMIC_LOAD_UMIN); - setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX); - setTargetDAGCombine(ISD::ATOMIC_LOAD_FADD); - setTargetDAGCombine(ISD::INTRINSIC_VOID); - setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); + setTargetDAGCombine({ISD::LOAD, + ISD::STORE, + ISD::ATOMIC_LOAD, + ISD::ATOMIC_STORE, + ISD::ATOMIC_CMP_SWAP, + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, + ISD::ATOMIC_SWAP, + ISD::ATOMIC_LOAD_ADD, + ISD::ATOMIC_LOAD_SUB, + ISD::ATOMIC_LOAD_AND, + ISD::ATOMIC_LOAD_OR, + ISD::ATOMIC_LOAD_XOR, + ISD::ATOMIC_LOAD_NAND, + ISD::ATOMIC_LOAD_MIN, + ISD::ATOMIC_LOAD_MAX, + ISD::ATOMIC_LOAD_UMIN, + ISD::ATOMIC_LOAD_UMAX, + ISD::ATOMIC_LOAD_FADD, + ISD::INTRINSIC_VOID, + ISD::INTRINSIC_W_CHAIN}); // FIXME: In other contexts we pretend this is a per-function property. setStackPointerRegisterToSaveRestore(AMDGPU::SGPR32); @@ -1118,6 +948,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &CI, MachineFunction &MF, unsigned IntrID) const { + Info.flags = MachineMemOperand::MONone; + if (CI.hasMetadata(LLVMContext::MD_invariant_load)) + Info.flags |= MachineMemOperand::MOInvariant; + if (const AMDGPU::RsrcIntrinsic *RsrcIntr = AMDGPU::lookupRsrcIntrinsic(IntrID)) { AttributeList Attr = Intrinsic::getAttributes(CI.getContext(), @@ -1127,16 +961,17 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, SIMachineFunctionInfo *MFI = MF.getInfo(); + const GCNTargetMachine &TM = + static_cast(getTargetMachine()); + if (RsrcIntr->IsImage) { - Info.ptrVal = - MFI->getImagePSV(*MF.getSubtarget().getInstrInfo()); + Info.ptrVal = MFI->getImagePSV(TM); Info.align.reset(); } else { - Info.ptrVal = - MFI->getBufferPSV(*MF.getSubtarget().getInstrInfo()); + Info.ptrVal = MFI->getBufferPSV(TM); } - Info.flags = MachineMemOperand::MODereferenceable; + Info.flags |= MachineMemOperand::MODereferenceable; if (Attr.hasFnAttr(Attribute::ReadOnly)) { unsigned DMaskLanes = 4; @@ -1178,12 +1013,23 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable; + Info.flags |= MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; // XXX - Should this be volatile without known ordering? Info.flags |= MachineMemOperand::MOVolatile; + + switch (IntrID) { + default: + break; + case Intrinsic::amdgcn_raw_buffer_load_lds: + case Intrinsic::amdgcn_struct_buffer_load_lds: { + unsigned Width = cast(CI.getArgOperand(2))->getZExtValue(); + Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8); + return true; + } + } } return true; } @@ -1200,7 +1046,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); Info.align.reset(); - Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; const ConstantInt *Vol = cast(CI.getOperand(4)); if (!Vol->isZero()) @@ -1211,12 +1057,14 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::amdgcn_buffer_atomic_fadd: { SIMachineFunctionInfo *MFI = MF.getInfo(); + const GCNTargetMachine &TM = + static_cast(getTargetMachine()); + Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(CI.getOperand(0)->getType()); - Info.ptrVal = - MFI->getBufferPSV(*MF.getSubtarget().getInstrInfo()); + Info.ptrVal = MFI->getBufferPSV(TM); Info.align.reset(); - Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; const ConstantInt *Vol = dyn_cast(CI.getOperand(4)); if (!Vol || !Vol->isZero()) @@ -1230,7 +1078,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); Info.align.reset(); - Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; const ConstantInt *Vol = cast(CI.getOperand(1)); if (!Vol->isZero()) @@ -1243,20 +1091,23 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); Info.align.reset(); - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MOVolatile; + Info.flags |= MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MOVolatile; return true; } case Intrinsic::amdgcn_image_bvh_intersect_ray: { SIMachineFunctionInfo *MFI = MF.getInfo(); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(CI.getType()); // XXX: what is correct VT? - Info.ptrVal = - MFI->getImagePSV(*MF.getSubtarget().getInstrInfo()); + + const GCNTargetMachine &TM = + static_cast(getTargetMachine()); + + Info.ptrVal = MFI->getImagePSV(TM); Info.align.reset(); - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MODereferenceable; + Info.flags |= MachineMemOperand::MOLoad | + MachineMemOperand::MODereferenceable; return true; } case Intrinsic::amdgcn_global_atomic_fadd: @@ -1264,15 +1115,17 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::amdgcn_global_atomic_fmax: case Intrinsic::amdgcn_flat_atomic_fadd: case Intrinsic::amdgcn_flat_atomic_fmin: - case Intrinsic::amdgcn_flat_atomic_fmax: { + case Intrinsic::amdgcn_flat_atomic_fmax: + case Intrinsic::amdgcn_global_atomic_fadd_v2bf16: + case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); Info.align.reset(); - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable | - MachineMemOperand::MOVolatile; + Info.flags |= MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOVolatile; return true; } case Intrinsic::amdgcn_ds_gws_init: @@ -1283,18 +1136,29 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::amdgcn_ds_gws_sema_release_all: { Info.opc = ISD::INTRINSIC_VOID; + const GCNTargetMachine &TM = + static_cast(getTargetMachine()); + SIMachineFunctionInfo *MFI = MF.getInfo(); - Info.ptrVal = - MFI->getGWSPSV(*MF.getSubtarget().getInstrInfo()); + Info.ptrVal = MFI->getGWSPSV(TM); // This is an abstract access, but we need to specify a type and size. Info.memVT = MVT::i32; Info.size = 4; Info.align = Align(4); - Info.flags = MachineMemOperand::MOStore; if (IntrID == Intrinsic::amdgcn_ds_gws_barrier) - Info.flags = MachineMemOperand::MOLoad; + Info.flags |= MachineMemOperand::MOLoad; + else + Info.flags |= MachineMemOperand::MOStore; + return true; + } + case Intrinsic::amdgcn_global_load_lds: { + Info.opc = ISD::INTRINSIC_VOID; + unsigned Width = cast(CI.getArgOperand(2))->getZExtValue(); + Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8); + Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore | + MachineMemOperand::MOVolatile; return true; } default: @@ -1319,6 +1183,8 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II, case Intrinsic::amdgcn_flat_atomic_fadd: case Intrinsic::amdgcn_flat_atomic_fmin: case Intrinsic::amdgcn_flat_atomic_fmax: + case Intrinsic::amdgcn_global_atomic_fadd_v2bf16: + case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: case Intrinsic::amdgcn_global_atomic_csub: { Value *Ptr = II->getArgOperand(0); AccessTy = II->getType(); @@ -1506,47 +1372,96 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( AddrSpace == AMDGPUAS::REGION_ADDRESS) { // Check if alignment requirements for ds_read/write instructions are // disabled. - if (Subtarget->hasUnalignedDSAccessEnabled() && - !Subtarget->hasLDSMisalignedBug()) { - if (IsFast) - *IsFast = Alignment != Align(2); - return true; - } + if (!Subtarget->hasUnalignedDSAccessEnabled() && Alignment < Align(4)) + return false; + + Align RequiredAlignment(PowerOf2Ceil(Size/8)); // Natural alignment. + if (Subtarget->hasLDSMisalignedBug() && Size > 32 && + Alignment < RequiredAlignment) + return false; // Either, the alignment requirements are "enabled", or there is an // unaligned LDS access related hardware bug though alignment requirements // are "disabled". In either case, we need to check for proper alignment // requirements. // - if (Size == 64) { + switch (Size) { + case 64: + // SI has a hardware bug in the LDS / GDS bounds checking: if the base + // address is negative, then the instruction is incorrectly treated as + // out-of-bounds even if base + offsets is in bounds. Split vectorized + // loads here to avoid emitting ds_read2_b32. We may re-combine the + // load later in the SILoadStoreOptimizer. + if (!Subtarget->hasUsableDSOffset() && Alignment < Align(8)) + return false; + // 8 byte accessing via ds_read/write_b64 require 8-byte alignment, but we // can do a 4 byte aligned, 8 byte access in a single operation using // ds_read2/write2_b32 with adjacent offsets. - bool AlignedBy4 = Alignment >= Align(4); - if (IsFast) - *IsFast = AlignedBy4; + RequiredAlignment = Align(4); + + if (Subtarget->hasUnalignedDSAccessEnabled()) { + // We will either select ds_read_b64/ds_write_b64 or ds_read2_b32/ + // ds_write2_b32 depending on the alignment. In either case with either + // alignment there is no faster way of doing this. + if (IsFast) + *IsFast = true; + return true; + } + + break; + case 96: + if (!Subtarget->hasDS96AndDS128()) + return false; - return AlignedBy4; - } - if (Size == 96) { // 12 byte accessing via ds_read/write_b96 require 16-byte alignment on // gfx8 and older. - bool AlignedBy16 = Alignment >= Align(16); - if (IsFast) - *IsFast = AlignedBy16; - return AlignedBy16; - } - if (Size == 128) { + if (Subtarget->hasUnalignedDSAccessEnabled()) { + // Naturally aligned access is fastest. However, also report it is Fast + // if memory is aligned less than DWORD. A narrow load or store will be + // be equally slow as a single ds_read_b96/ds_write_b96, but there will + // be more of them, so overall we will pay less penalty issuing a single + // instruction. + if (IsFast) + *IsFast = Alignment >= RequiredAlignment || Alignment < Align(4); + return true; + } + + break; + case 128: + if (!Subtarget->hasDS96AndDS128() || !Subtarget->useDS128()) + return false; + // 16 byte accessing via ds_read/write_b128 require 16-byte alignment on // gfx8 and older, but we can do a 8 byte aligned, 16 byte access in a // single operation using ds_read2/write2_b64. - bool AlignedBy8 = Alignment >= Align(8); - if (IsFast) - *IsFast = AlignedBy8; + RequiredAlignment = Align(8); + + if (Subtarget->hasUnalignedDSAccessEnabled()) { + // Naturally aligned access is fastest. However, also report it is Fast + // if memory is aligned less than DWORD. A narrow load or store will be + // be equally slow as a single ds_read_b128/ds_write_b128, but there + // will be more of them, so overall we will pay less penalty issuing a + // single instruction. + if (IsFast) + *IsFast = Alignment >= RequiredAlignment || Alignment < Align(4); + return true; + } - return AlignedBy8; + break; + default: + if (Size > 32) + return false; + + break; } + + if (IsFast) + *IsFast = Alignment >= RequiredAlignment; + + return Alignment >= RequiredAlignment || + Subtarget->hasUnalignedDSAccessEnabled(); } if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) { @@ -1571,14 +1486,12 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( return AlignedBy4; } - if (Subtarget->hasUnalignedBufferAccessEnabled() && - !(AddrSpace == AMDGPUAS::LOCAL_ADDRESS || - AddrSpace == AMDGPUAS::REGION_ADDRESS)) { - // If we have an uniform constant load, it still requires using a slow + if (Subtarget->hasUnalignedBufferAccessEnabled()) { + // If we have a uniform constant load, it still requires using a slow // buffer instruction if unaligned. if (IsFast) { // Accesses can really be issued as 1-byte aligned or 4-byte aligned, so - // 2-byte alignment is worse than 1 unless doing a 2-byte accesss. + // 2-byte alignment is worse than 1 unless doing a 2-byte access. *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS || AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ? Alignment >= Align(4) : Alignment != Align(2); @@ -1603,20 +1516,22 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( bool SITargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, bool *IsFast) const { - if (IsFast) - *IsFast = false; - - // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96, - // which isn't a simple VT. - // Until MVT is extended to handle this, simply check for the size and - // rely on the condition below: allow accesses if the size is a multiple of 4. - if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 && - VT.getStoreSize() > 16)) { - return false; + bool Allow = allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace, + Alignment, Flags, IsFast); + + if (Allow && IsFast && Subtarget->hasUnalignedDSAccessEnabled() && + (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || + AddrSpace == AMDGPUAS::REGION_ADDRESS)) { + // Lie it is fast if +unaligned-access-mode is passed so that DS accesses + // get vectorized. We could use ds_read2_b*/ds_write2_b* instructions on a + // misaligned data which is faster than a pair of ds_read_b*/ds_write_b* + // which would be equally misaligned. + // This is only used by the common passes, selection always calls the + // allowsMisalignedMemoryAccessesImpl version. + *IsFast = true; } - return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace, - Alignment, Flags, IsFast); + return Allow; } EVT SITargetLowering::getOptimalMemOpType( @@ -1639,9 +1554,7 @@ EVT SITargetLowering::getOptimalMemOpType( bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const { const MemSDNode *MemNode = cast(N); - const Value *Ptr = MemNode->getMemOperand()->getValue(); - const Instruction *I = dyn_cast_or_null(Ptr); - return I && I->getMetadata("amdgpu.noclobber"); + return MemNode->getMemOperand()->getFlags() & MONoClobber; } bool SITargetLowering::isNonGlobalAddrSpace(unsigned AS) { @@ -1681,6 +1594,15 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return true; } +bool SITargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, + unsigned Index) const { + if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) + return false; + + // TODO: Add more cases that are cheap. + return Index == 0; +} + bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const { if (Subtarget->has16BitInsts() && VT == MVT::i16) { switch (Op) { @@ -2106,7 +2028,7 @@ void SITargetLowering::allocateSpecialInputSGPRs( if (Info.hasDispatchPtr()) allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr); - if (Info.hasQueuePtr()) + if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr); // Implicit arg ptr takes the place of the kernarg segment pointer. This is a @@ -2153,7 +2075,7 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo, CCInfo.AllocateReg(DispatchPtrReg); } - if (Info.hasQueuePtr()) { + if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) { Register QueuePtrReg = Info.addQueuePtr(TRI); MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(QueuePtrReg); @@ -2190,6 +2112,24 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const { + if (Subtarget->hasUserSGPRInit16Bug()) { + // Pad up the used user SGPRs with dead inputs. + unsigned CurrentUserSGPRs = Info.getNumUserSGPRs(); + + // Note we do not count the PrivateSegmentWaveByteOffset. We do not want to + // rely on it to reach 16 since if we end up having no stack usage, it will + // not really be added. + unsigned NumRequiredSystemSGPRs = Info.hasWorkGroupIDX() + + Info.hasWorkGroupIDY() + + Info.hasWorkGroupIDZ() + + Info.hasWorkGroupInfo(); + for (unsigned i = NumRequiredSystemSGPRs + CurrentUserSGPRs; i < 16; ++i) { + Register Reg = Info.addReservedUserSGPR(); + MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); + CCInfo.AllocateReg(Reg); + } + } + if (Info.hasWorkGroupIDX()) { Register Reg = Info.addWorkGroupIDX(); MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); @@ -2234,6 +2174,8 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo, MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass); CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg); } + + assert(!Subtarget->hasUserSGPRInit16Bug() || Info.getNumPreloadedSGPRs() >= 16); } static void reservePrivateMemoryRegs(const TargetMachine &TM, @@ -2388,7 +2330,7 @@ SDValue SITargetLowering::LowerFormalArguments( return DAG.getEntryNode(); } - Info->allocateModuleLDSGlobal(Fn.getParent()); + Info->allocateModuleLDSGlobal(Fn); SmallVector Splits; SmallVector ArgLocs; @@ -2538,7 +2480,13 @@ SDValue SITargetLowering::LowerFormalArguments( assert(VA.isRegLoc() && "Parameter must be in a register!"); Register Reg = VA.getLocReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); + const TargetRegisterClass *RC = nullptr; + if (AMDGPU::VGPR_32RegClass.contains(Reg)) + RC = &AMDGPU::VGPR_32RegClass; + else if (AMDGPU::SGPR_32RegClass.contains(Reg)) + RC = &AMDGPU::SGPR_32RegClass; + else + llvm_unreachable("Unexpected register class in LowerFormalArguments!"); EVT ValVT = VA.getValVT(); Reg = MF.addLiveIn(Reg, RC); @@ -2657,24 +2605,6 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) - // Add return address for callable functions. - if (!Info->isEntryFunction()) { - const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); - SDValue ReturnAddrReg = CreateLiveInRegister( - DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64); - - SDValue ReturnAddrVirtualReg = - DAG.getRegister(MF.getRegInfo().createVirtualRegister( - CallConv != CallingConv::AMDGPU_Gfx - ? &AMDGPU::CCR_SGPR_64RegClass - : &AMDGPU::Gfx_CCR_SGPR_64RegClass), - MVT::i64); - Chain = - DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag); - Flag = Chain.getValue(1); - RetOps.push_back(ReturnAddrVirtualReg); - } - // Copy the result values into the output registers. for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E; ++I, ++RealRVLocIdx) { @@ -2731,15 +2661,8 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, RetOps.push_back(Flag); unsigned Opc = AMDGPUISD::ENDPGM; - if (!IsWaveEnd) { - if (IsShader) - Opc = AMDGPUISD::RETURN_TO_EPILOG; - else if (CallConv == CallingConv::AMDGPU_Gfx) - Opc = AMDGPUISD::RET_GFX_FLAG; - else - Opc = AMDGPUISD::RET_FLAG; - } - + if (!IsWaveEnd) + Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG; return DAG.getNode(Opc, DL, MVT::Other, RetOps); } @@ -3321,21 +3244,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, } - SDValue PhysReturnAddrReg; - if (IsTailCall) { - // Since the return is being combined with the call, we need to pass on the - // return address. - - const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); - SDValue ReturnAddrReg = CreateLiveInRegister( - DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64); - - PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF), - MVT::i64); - Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, InFlag); - InFlag = Chain.getValue(1); - } - // We don't usually want to end the call-sequence here because we would tidy // the frame up *after* the call, however in the ABI-changing tail-call case // we've carefully laid out the parameters so that when sp is reset they'll be @@ -3365,8 +3273,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, // this information must travel along with the operation for eventual // consumption by emitEpilogue. Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32)); - - Ops.push_back(PhysReturnAddrReg); } // Add argument registers to the end of the list so that they are known live @@ -4104,6 +4010,21 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( bool IsAdd = (MI.getOpcode() == AMDGPU::V_ADD_U64_PSEUDO); + MachineOperand &Dest = MI.getOperand(0); + MachineOperand &Src0 = MI.getOperand(1); + MachineOperand &Src1 = MI.getOperand(2); + + if (IsAdd && ST.hasLshlAddB64()) { + auto Add = BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_LSHL_ADD_U64_e64), + Dest.getReg()) + .add(Src0) + .addImm(0) + .add(Src1); + TII->legalizeOperands(*Add); + MI.eraseFromParent(); + return BB; + } + const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -4112,10 +4033,6 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( Register CarryReg = MRI.createVirtualRegister(CarryRC); Register DeadCarryReg = MRI.createVirtualRegister(CarryRC); - MachineOperand &Dest = MI.getOperand(0); - MachineOperand &Src0 = MI.getOperand(1); - MachineOperand &Src1 = MI.getOperand(2); - const TargetRegisterClass *Src0RC = Src0.isReg() ? MRI.getRegClass(Src0.getReg()) : &AMDGPU::VReg_64RegClass; @@ -4390,29 +4307,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( case AMDGPU::DS_GWS_INIT: case AMDGPU::DS_GWS_SEMA_BR: case AMDGPU::DS_GWS_BARRIER: - if (Subtarget->needsAlignedVGPRs()) { - // Add implicit aligned super-reg to force alignment on the data operand. - const DebugLoc &DL = MI.getDebugLoc(); - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); - MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::data0); - Register DataReg = Op->getReg(); - bool IsAGPR = TRI->isAGPR(MRI, DataReg); - Register Undef = MRI.createVirtualRegister( - IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), Undef); - Register NewVR = - MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass - : &AMDGPU::VReg_64_Align2RegClass); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), NewVR) - .addReg(DataReg, 0, Op->getSubReg()) - .addImm(AMDGPU::sub0) - .addReg(Undef) - .addImm(AMDGPU::sub1); - Op->setReg(NewVR); - Op->setSubReg(AMDGPU::sub0); - MI.addOperand(MachineOperand::CreateReg(NewVR, false, true)); - } + TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::data0); LLVM_FALLTHROUGH; case AMDGPU::DS_GWS_SEMA_V: case AMDGPU::DS_GWS_SEMA_P: @@ -4500,6 +4395,18 @@ bool SITargetLowering::hasBitPreservingFPLogic(EVT VT) const { return isTypeLegal(VT.getScalarType()); } +bool SITargetLowering::hasAtomicFaddRtnForTy(SDValue &Op) const { + switch (Op.getValue(0).getSimpleValueType().SimpleTy) { + case MVT::f32: + return Subtarget->hasAtomicFaddRtnInsts(); + case MVT::v2f16: + case MVT::f64: + return Subtarget->hasGFX90AInsts(); + default: + return false; + } +} + bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const { // This currently forces unfolding various combinations of fsub into fma with // free fneg'd operands. As long as we have fast FMA (controlled by @@ -4560,7 +4467,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, // Otherwise f32 mad is always full rate and returns the same result as // the separate operations so should be preferred over fma. - // However does not support denomals. + // However does not support denormals. if (hasFP32Denormals(MF)) return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts(); @@ -4653,8 +4560,9 @@ SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op, unsigned Opc = Op.getOpcode(); EVT VT = Op.getValueType(); assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 || - VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8f32 || - VT == MVT::v16f32 || VT == MVT::v32f32); + VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 || + VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 || + VT == MVT::v32f32); SDValue Lo0, Hi0; std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0); @@ -4676,8 +4584,9 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op, unsigned Opc = Op.getOpcode(); EVT VT = Op.getValueType(); assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 || - VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v8f32 || - VT == MVT::v16f32 || VT == MVT::v32f32); + VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v16i16 || + VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 || + VT == MVT::v32f32); SDValue Lo0, Hi0; SDValue Op0 = Op.getOperand(0); @@ -4738,10 +4647,30 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); + case ISD::SCALAR_TO_VECTOR: + return lowerSCALAR_TO_VECTOR(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); case ISD::FP_ROUND: return lowerFP_ROUND(Op, DAG); + case ISD::FPTRUNC_ROUND: { + unsigned Opc; + SDLoc DL(Op); + + if (Op.getOperand(0)->getValueType(0) != MVT::f32) + return SDValue(); + + // Get the rounding mode from the last operand + int RoundMode = cast(Op.getOperand(1))->getZExtValue(); + if (RoundMode == (int)RoundingMode::TowardPositive) + Opc = AMDGPUISD::FPTRUNC_ROUND_UPWARD; + else if (RoundMode == (int)RoundingMode::TowardNegative) + Opc = AMDGPUISD::FPTRUNC_ROUND_DOWNWARD; + else + return SDValue(); + + return DAG.getNode(Opc, DL, Op.getNode()->getVTList(), Op->getOperand(0)); + } case ISD::TRAP: return lowerTRAP(Op, DAG); case ISD::DEBUGTRAP: @@ -5356,7 +5285,7 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op, if (IsIEEEMode) return expandFMINNUM_FMAXNUM(Op.getNode(), DAG); - if (VT == MVT::v4f16 || VT == MVT::v8f16) + if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16) return splitBinaryVectorOp(Op, DAG); return Op; } @@ -5439,24 +5368,41 @@ SDValue SITargetLowering::lowerTrapEndpgm( return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain); } +SDValue SITargetLowering::loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT, + const SDLoc &DL, Align Alignment, ImplicitParameter Param) const { + MachineFunction &MF = DAG.getMachineFunction(); + uint64_t Offset = getImplicitParameterOffset(MF, Param); + SDValue Ptr = lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), Offset); + MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); + return DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, PtrInfo, Alignment, + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant); +} + SDValue SITargetLowering::lowerTrapHsaQueuePtr( SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Chain = Op.getOperand(0); - MachineFunction &MF = DAG.getMachineFunction(); - SIMachineFunctionInfo *Info = MF.getInfo(); - Register UserSGPR = Info->getQueuePtrUserSGPR(); - SDValue QueuePtr; - if (UserSGPR == AMDGPU::NoRegister) { - // We probably are in a function incorrectly marked with - // amdgpu-no-queue-ptr. This is undefined. We don't want to delete the trap, - // so just use a null pointer. - QueuePtr = DAG.getConstant(0, SL, MVT::i64); + // For code object version 5, QueuePtr is passed through implicit kernarg. + if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) { + QueuePtr = + loadImplicitKernelArgument(DAG, MVT::i64, SL, Align(8), QUEUE_PTR); } else { - QueuePtr = CreateLiveInRegister( - DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64); + MachineFunction &MF = DAG.getMachineFunction(); + SIMachineFunctionInfo *Info = MF.getInfo(); + Register UserSGPR = Info->getQueuePtrUserSGPR(); + + if (UserSGPR == AMDGPU::NoRegister) { + // We probably are in a function incorrectly marked with + // amdgpu-no-queue-ptr. This is undefined. We don't want to delete the + // trap, so just use a null pointer. + QueuePtr = DAG.getConstant(0, SL, MVT::i64); + } else { + QueuePtr = CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, UserSGPR, + MVT::i64); + } } SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64); @@ -5532,6 +5478,14 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, return DAG.getNode(ISD::SHL, DL, MVT::i32, ApertureReg, ShiftAmount); } + // For code object version 5, private_base and shared_base are passed through + // implicit kernargs. + if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) { + ImplicitParameter Param = + (AS == AMDGPUAS::LOCAL_ADDRESS) ? SHARED_BASE : PRIVATE_BASE; + return loadImplicitKernelArgument(DAG, MVT::i32, DL, Align(4), Param); + } + MachineFunction &MF = DAG.getMachineFunction(); SIMachineFunctionInfo *Info = MF.getInfo(); Register UserSGPR = Info->getQueuePtrUserSGPR(); @@ -5691,14 +5645,11 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, EVT EltVT = VecVT.getVectorElementType(); unsigned VecSize = VecVT.getSizeInBits(); unsigned EltSize = EltVT.getSizeInBits(); + SDLoc SL(Op); - - assert(VecSize <= 64); - + // Specially handle the case of v4i16 with static indexing. unsigned NumElts = VecVT.getVectorNumElements(); - SDLoc SL(Op); auto KIdx = dyn_cast(Idx); - if (NumElts == 4 && EltSize == 16 && KIdx) { SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Vec); @@ -5726,35 +5677,41 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, return DAG.getNode(ISD::BITCAST, SL, VecVT, Concat); } + // Static indexing does not lower to stack access, and hence there is no need + // for special custom lowering to avoid stack access. if (isa(Idx)) return SDValue(); - MVT IntVT = MVT::getIntegerVT(VecSize); - - // Avoid stack access for dynamic indexing. + // Avoid stack access for dynamic indexing by custom lowering to // v_bfi_b32 (v_bfm_b32 16, (shl idx, 16)), val, vec - // Create a congruent vector with the target value in each element so that - // the required element can be masked and ORed into the target vector. - SDValue ExtVal = DAG.getNode(ISD::BITCAST, SL, IntVT, - DAG.getSplatBuildVector(VecVT, SL, InsVal)); + assert(VecSize <= 64 && "Expected target vector size to be <= 64 bits"); + + MVT IntVT = MVT::getIntegerVT(VecSize); + // Convert vector index to bit-index and get the required bit mask. assert(isPowerOf2_32(EltSize)); SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32); - - // Convert vector index to bit-index. SDValue ScaledIdx = DAG.getNode(ISD::SHL, SL, MVT::i32, Idx, ScaleFactor); - - SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, IntVT, Vec); SDValue BFM = DAG.getNode(ISD::SHL, SL, IntVT, DAG.getConstant(0xffff, SL, IntVT), ScaledIdx); + // 1. Create a congruent vector with the target value in each element. + SDValue ExtVal = DAG.getNode(ISD::BITCAST, SL, IntVT, + DAG.getSplatBuildVector(VecVT, SL, InsVal)); + + // 2. Mask off all other indicies except the required index within (1). SDValue LHS = DAG.getNode(ISD::AND, SL, IntVT, BFM, ExtVal); + + // 3. Mask off the required index within the target vector. + SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, IntVT, Vec); SDValue RHS = DAG.getNode(ISD::AND, SL, IntVT, DAG.getNOT(SL, BFM, IntVT), BCVec); + // 4. Get (2) and (3) ORed into the target vector. SDValue BFI = DAG.getNode(ISD::OR, SL, IntVT, LHS, RHS); + return DAG.getNode(ISD::BITCAST, SL, VecVT, BFI); } @@ -5778,17 +5735,35 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, if (SDValue Combined = performExtractVectorEltCombine(Op.getNode(), DCI)) return Combined; - if (VecSize == 128) { + if (VecSize == 128 || VecSize == 256) { SDValue Lo, Hi; EVT LoVT, HiVT; - SDValue V2 = DAG.getBitcast(MVT::v2i64, Vec); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); - Lo = - DAG.getBitcast(LoVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, - V2, DAG.getConstant(0, SL, MVT::i32))); - Hi = - DAG.getBitcast(HiVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, - V2, DAG.getConstant(1, SL, MVT::i32))); + + if (VecSize == 128) { + SDValue V2 = DAG.getBitcast(MVT::v2i64, Vec); + Lo = DAG.getBitcast(LoVT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2, + DAG.getConstant(0, SL, MVT::i32))); + Hi = DAG.getBitcast(HiVT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2, + DAG.getConstant(1, SL, MVT::i32))); + } else { + assert(VecSize == 256); + + SDValue V2 = DAG.getBitcast(MVT::v4i64, Vec); + SDValue Parts[4]; + for (unsigned P = 0; P < 4; ++P) { + Parts[P] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2, + DAG.getConstant(P, SL, MVT::i32)); + } + + Lo = DAG.getBitcast(LoVT, DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i64, + Parts[0], Parts[1])); + Hi = DAG.getBitcast(HiVT, DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i64, + Parts[2], Parts[3])); + } + EVT IdxVT = Idx.getValueType(); unsigned NElem = VecVT.getVectorNumElements(); assert(isPowerOf2_32(NElem)); @@ -5800,10 +5775,19 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, assert(VecSize <= 64); + MVT IntVT = MVT::getIntegerVT(VecSize); + + // If Vec is just a SCALAR_TO_VECTOR, then use the scalar integer directly. + SDValue VecBC = peekThroughBitcasts(Vec); + if (VecBC.getOpcode() == ISD::SCALAR_TO_VECTOR) { + SDValue Src = VecBC.getOperand(0); + Src = DAG.getBitcast(Src.getValueType().changeTypeToInteger(), Src); + Vec = DAG.getAnyExtOrTrunc(Src, SL, IntVT); + } + unsigned EltSize = EltVT.getSizeInBits(); assert(isPowerOf2_32(EltSize)); - MVT IntVT = MVT::getIntegerVT(VecSize); SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32); // Convert vector index to bit-index (* EltSize) @@ -5877,6 +5861,22 @@ SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(ISD::CONCAT_VECTORS, SL, ResultVT, Pieces); } +SDValue SITargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + SDValue SVal = Op.getOperand(0); + EVT ResultVT = Op.getValueType(); + EVT SValVT = SVal.getValueType(); + SDValue UndefVal = DAG.getUNDEF(SValVT); + SDLoc SL(Op); + + SmallVector VElts; + VElts.push_back(SVal); + for (int I = 1, E = ResultVT.getVectorNumElements(); I < E; ++I) + VElts.push_back(UndefVal); + + return DAG.getBuildVector(ResultVT, SL, VElts); +} + SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); @@ -5906,6 +5906,27 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op, return DAG.getNode(ISD::BITCAST, SL, VT, Blend); } + if (VT == MVT::v16i16 || VT == MVT::v16f16) { + EVT QuarterVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(), + VT.getVectorNumElements() / 4); + MVT QuarterIntVT = MVT::getIntegerVT(QuarterVT.getSizeInBits()); + + SmallVector Parts[4]; + for (unsigned I = 0, E = VT.getVectorNumElements() / 4; I != E; ++I) { + for (unsigned P = 0; P < 4; ++P) + Parts[P].push_back(Op.getOperand(I + P * E)); + } + SDValue Casts[4]; + for (unsigned P = 0; P < 4; ++P) { + SDValue Vec = DAG.getBuildVector(QuarterVT, SL, Parts[P]); + Casts[P] = DAG.getNode(ISD::BITCAST, SL, QuarterIntVT, Vec); + } + + SDValue Blend = + DAG.getBuildVector(MVT::getVectorVT(QuarterIntVT, 4), SL, Casts); + return DAG.getNode(ISD::BITCAST, SL, VT, Blend); + } + assert(VT == MVT::v2f16 || VT == MVT::v2i16); assert(!Subtarget->hasVOP3PInsts() && "this should be legal"); @@ -6277,6 +6298,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim); unsigned IntrOpcode = Intr->BaseOpcode; bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget); + bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget); SmallVector ResultTypes(Op->values()); SmallVector OrigResultTypes(Op->values()); @@ -6455,6 +6477,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op, // // SIShrinkInstructions will convert NSA encodings to non-NSA after register // allocation when possible. + // + // TODO: we can actually allow partial NSA where the final register is a + // contiguous set of the remaining addresses. + // This could help where there are more addresses than supported. bool UseNSA = ST->hasFeature(AMDGPU::FeatureNSAEncoding) && VAddrs.size() >= 3 && VAddrs.size() <= (unsigned)ST->getNSAMaxSize(); @@ -6561,7 +6587,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op, UseNSA ? VAddrs.size() : VAddr.getValueType().getSizeInBits() / 32; int Opcode = -1; - if (IsGFX10Plus) { + if (IsGFX11Plus) { + Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, + UseNSA ? AMDGPU::MIMGEncGfx11NSA + : AMDGPU::MIMGEncGfx11Default, + NumVDataDwords, NumVAddrDwords); + } else if (IsGFX10Plus) { Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, UseNSA ? AMDGPU::MIMGEncGfx10NSA : AMDGPU::MIMGEncGfx10Default, @@ -6685,6 +6716,32 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, return Loads[0]; } +SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op, + unsigned Dim, + const ArgDescriptor &Arg) const { + SDLoc SL(Op); + MachineFunction &MF = DAG.getMachineFunction(); + unsigned MaxID = Subtarget->getMaxWorkitemID(MF.getFunction(), Dim); + if (MaxID == 0) + return DAG.getConstant(0, SL, MVT::i32); + + SDValue Val = loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, + SDLoc(DAG.getEntryNode()), Arg); + + // Don't bother inserting AssertZext for packed IDs since we're emitting the + // masking operations anyway. + // + // TODO: We could assert the top bit is 0 for the source copy. + if (Arg.isMasked()) + return Val; + + // Preserve the known bits after expansion to a copy. + EVT SmallVT = + EVT::getIntegerVT(*DAG.getContext(), 32 - countLeadingZeros(MaxID)); + return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Val, + DAG.getValueType(SmallVT)); +} + SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -6831,26 +6888,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); case Intrinsic::amdgcn_workitem_id_x: - if (Subtarget->getMaxWorkitemID(MF.getFunction(), 0) == 0) - return DAG.getConstant(0, DL, MVT::i32); - - return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, - SDLoc(DAG.getEntryNode()), - MFI->getArgInfo().WorkItemIDX); + return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX); case Intrinsic::amdgcn_workitem_id_y: - if (Subtarget->getMaxWorkitemID(MF.getFunction(), 1) == 0) - return DAG.getConstant(0, DL, MVT::i32); - - return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, - SDLoc(DAG.getEntryNode()), - MFI->getArgInfo().WorkItemIDY); + return lowerWorkitemID(DAG, Op, 1, MFI->getArgInfo().WorkItemIDY); case Intrinsic::amdgcn_workitem_id_z: - if (Subtarget->getMaxWorkitemID(MF.getFunction(), 2) == 0) - return DAG.getConstant(0, DL, MVT::i32); - - return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, - SDLoc(DAG.getEntryNode()), - MFI->getArgInfo().WorkItemIDZ); + return lowerWorkitemID(DAG, Op, 2, MFI->getArgInfo().WorkItemIDZ); case Intrinsic::amdgcn_wavefrontsize: return DAG.getConstant(MF.getSubtarget().getWavefrontSize(), SDLoc(Op), MVT::i32); @@ -7157,12 +7199,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, unsigned ShaderType = SIInstrInfo::getDSShaderTypeValue(DAG.getMachineFunction()); unsigned Offset0 = OrderedCountIndex << 2; - unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) | - (Instruction << 4); + unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4); if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10) Offset1 |= (CountDw - 1) << 6; + if (Subtarget->getGeneration() < AMDGPUSubtarget::GFX11) + Offset1 |= ShaderType << 2; + unsigned Offset = Offset0 | (Offset1 << 8); SDValue Ops[] = { @@ -7441,7 +7485,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR; break; case Intrinsic::amdgcn_buffer_atomic_fadd: - if (!Op.getValue(0).use_empty() && !Subtarget->hasGFX90AInsts()) { + if (!Op.getValue(0).use_empty() && !hasAtomicFaddRtnForTy(Op)) { DiagnosticInfoUnsupported NoFpRet(DAG.getMachineFunction().getFunction(), "return versions of fp atomics not supported", @@ -7609,12 +7653,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, return SDValue(); } + const bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget); const bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16; const bool Is64 = NodePtr.getValueType() == MVT::i64; const unsigned NumVDataDwords = 4; const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11); - const bool UseNSA = Subtarget->hasNSAEncoding() && - NumVAddrDwords <= Subtarget->getNSAMaxSize(); + const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords; + const bool UseNSA = + Subtarget->hasNSAEncoding() && NumVAddrs <= Subtarget->getNSAMaxSize(); const unsigned BaseOpcodes[2][2] = { {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16}, {AMDGPU::IMAGE_BVH64_INTERSECT_RAY, @@ -7622,12 +7668,15 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, int Opcode; if (UseNSA) { Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16], - AMDGPU::MIMGEncGfx10NSA, NumVDataDwords, - NumVAddrDwords); + IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA + : AMDGPU::MIMGEncGfx10NSA, + NumVDataDwords, NumVAddrDwords); } else { - Opcode = AMDGPU::getMIMGOpcode( - BaseOpcodes[Is64][IsA16], AMDGPU::MIMGEncGfx10Default, NumVDataDwords, - PowerOf2Ceil(NumVAddrDwords)); + Opcode = + AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16], + IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default + : AMDGPU::MIMGEncGfx10Default, + NumVDataDwords, PowerOf2Ceil(NumVAddrDwords)); } assert(Opcode != -1); @@ -7660,15 +7709,36 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, } }; - if (Is64) - DAG.ExtractVectorElements(DAG.getBitcast(MVT::v2i32, NodePtr), Ops, 0, 2); - else + if (UseNSA && IsGFX11Plus) { Ops.push_back(NodePtr); + Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent)); + Ops.push_back(RayOrigin); + if (IsA16) { + SmallVector DirLanes, InvDirLanes, MergedLanes; + DAG.ExtractVectorElements(RayDir, DirLanes, 0, 3); + DAG.ExtractVectorElements(RayInvDir, InvDirLanes, 0, 3); + for (unsigned I = 0; I < 3; ++I) { + MergedLanes.push_back(DAG.getBitcast( + MVT::i32, DAG.getBuildVector(MVT::v2f16, DL, + {DirLanes[I], InvDirLanes[I]}))); + } + Ops.push_back(DAG.getBuildVector(MVT::v3i32, DL, MergedLanes)); + } else { + Ops.push_back(RayDir); + Ops.push_back(RayInvDir); + } + } else { + if (Is64) + DAG.ExtractVectorElements(DAG.getBitcast(MVT::v2i32, NodePtr), Ops, 0, + 2); + else + Ops.push_back(NodePtr); - Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent)); - packLanes(RayOrigin, true); - packLanes(RayDir, true); - packLanes(RayInvDir, false); + Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent)); + packLanes(RayOrigin, true); + packLanes(RayDir, true); + packLanes(RayInvDir, false); + } if (!UseNSA) { // Build a single vector containing all the operands so far prepared. @@ -7868,6 +7938,12 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, switch (IntrinsicID) { case Intrinsic::amdgcn_exp_compr: { + if (!Subtarget->hasCompressedExport()) { + DiagnosticInfoUnsupported BadIntrin( + DAG.getMachineFunction().getFunction(), + "intrinsic not supported on subtarget", DL.getDebugLoc()); + DAG.getContext()->diagnose(BadIntrin); + } SDValue Src0 = Op.getOperand(4); SDValue Src1 = Op.getOperand(5); // Hack around illegal type on SI by directly selecting it. @@ -8110,6 +8186,160 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, M->getMemoryVT(), M->getMemOperand()); } + case Intrinsic::amdgcn_raw_buffer_load_lds: + case Intrinsic::amdgcn_struct_buffer_load_lds: { + unsigned Opc; + bool HasVIndex = IntrinsicID == Intrinsic::amdgcn_struct_buffer_load_lds; + unsigned OpOffset = HasVIndex ? 1 : 0; + SDValue VOffset = Op.getOperand(5 + OpOffset); + auto CVOffset = dyn_cast(VOffset); + bool HasVOffset = !CVOffset || !CVOffset->isZero(); + unsigned Size = Op->getConstantOperandVal(4); + + switch (Size) { + default: + return SDValue(); + case 1: + Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN + : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET; + break; + case 2: + Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN + : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET; + break; + case 4: + Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN + : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET; + break; + } + + SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(3)); + + SmallVector Ops; + + if (HasVIndex && HasVOffset) + Ops.push_back(DAG.getBuildVector(MVT::v2i32, DL, + { Op.getOperand(5), // VIndex + VOffset })); + else if (HasVIndex) + Ops.push_back(Op.getOperand(5)); + else if (HasVOffset) + Ops.push_back(VOffset); + + Ops.push_back(Op.getOperand(2)); // rsrc + Ops.push_back(Op.getOperand(6 + OpOffset)); // soffset + Ops.push_back(Op.getOperand(7 + OpOffset)); // imm offset + unsigned Aux = Op.getConstantOperandVal(8 + OpOffset); + Ops.push_back( + DAG.getTargetConstant(Aux & AMDGPU::CPol::ALL, DL, MVT::i8)); // cpol + Ops.push_back( + DAG.getTargetConstant((Aux >> 3) & 1, DL, MVT::i8)); // swz + Ops.push_back(M0Val.getValue(0)); // Chain + Ops.push_back(M0Val.getValue(1)); // Glue + + auto *M = cast(Op); + MachineMemOperand *LoadMMO = M->getMemOperand(); + MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo(); + LoadPtrI.Offset = Op->getConstantOperandVal(7 + OpOffset); + MachinePointerInfo StorePtrI = LoadPtrI; + StorePtrI.V = nullptr; + StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS; + + auto F = LoadMMO->getFlags() & + ~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad); + LoadMMO = MF.getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad, + Size, LoadMMO->getBaseAlign()); + + MachineMemOperand *StoreMMO = + MF.getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore, + sizeof(int32_t), LoadMMO->getBaseAlign()); + + auto Load = DAG.getMachineNode(Opc, DL, M->getVTList(), Ops); + DAG.setNodeMemRefs(Load, {LoadMMO, StoreMMO}); + + return SDValue(Load, 0); + } + case Intrinsic::amdgcn_global_load_lds: { + unsigned Opc; + unsigned Size = Op->getConstantOperandVal(4); + switch (Size) { + default: + return SDValue(); + case 1: + Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE; + break; + case 2: + Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT; + break; + case 4: + Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD; + break; + } + + auto *M = cast(Op); + SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(3)); + + SmallVector Ops; + + SDValue Addr = Op.getOperand(2); // Global ptr + SDValue VOffset; + // Try to split SAddr and VOffset. Global and LDS pointers share the same + // immediate offset, so we cannot use a regular SelectGlobalSAddr(). + if (Addr->isDivergent() && Addr.getOpcode() == ISD::ADD) { + SDValue LHS = Addr.getOperand(0); + SDValue RHS = Addr.getOperand(1); + + if (LHS->isDivergent()) + std::swap(LHS, RHS); + + if (!LHS->isDivergent() && RHS.getOpcode() == ISD::ZERO_EXTEND && + RHS.getOperand(0).getValueType() == MVT::i32) { + // add (i64 sgpr), (zero_extend (i32 vgpr)) + Addr = LHS; + VOffset = RHS.getOperand(0); + } + } + + Ops.push_back(Addr); + if (!Addr->isDivergent()) { + Opc = AMDGPU::getGlobalSaddrOp(Opc); + if (!VOffset) + VOffset = SDValue( + DAG.getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, + DAG.getTargetConstant(0, DL, MVT::i32)), 0); + Ops.push_back(VOffset); + } + + Ops.push_back(Op.getOperand(5)); // Offset + Ops.push_back(Op.getOperand(6)); // CPol + Ops.push_back(M0Val.getValue(0)); // Chain + Ops.push_back(M0Val.getValue(1)); // Glue + + MachineMemOperand *LoadMMO = M->getMemOperand(); + MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo(); + LoadPtrI.Offset = Op->getConstantOperandVal(5); + MachinePointerInfo StorePtrI = LoadPtrI; + LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS; + StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS; + auto F = LoadMMO->getFlags() & + ~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad); + LoadMMO = MF.getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad, + Size, LoadMMO->getBaseAlign()); + MachineMemOperand *StoreMMO = + MF.getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore, + sizeof(int32_t), Align(4)); + + auto Load = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops); + DAG.setNodeMemRefs(Load, {LoadMMO, StoreMMO}); + + return SDValue(Load, 0); + } case Intrinsic::amdgcn_end_cf: return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other, Op->getOperand(2), Chain), 0); @@ -8271,7 +8501,7 @@ static SDValue getLoadExtOrTrunc(SelectionDAG &DAG, SDValue SITargetLowering::widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; - if (Ld->getAlignment() < 4 || Ld->isDivergent()) + if (Ld->getAlign() < Align(4) || Ld->isDivergent()) return SDValue(); // FIXME: Constant loads should all be marked invariant. @@ -8296,14 +8526,11 @@ SDValue SITargetLowering::widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const // TODO: Drop only high part of range. SDValue Ptr = Ld->getBasePtr(); - SDValue NewLoad = DAG.getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, - MVT::i32, SL, Ld->getChain(), Ptr, - Ld->getOffset(), - Ld->getPointerInfo(), MVT::i32, - Ld->getAlignment(), - Ld->getMemOperand()->getFlags(), - Ld->getAAInfo(), - nullptr); // Drop ranges + SDValue NewLoad = DAG.getLoad( + ISD::UNINDEXED, ISD::NON_EXTLOAD, MVT::i32, SL, Ld->getChain(), Ptr, + Ld->getOffset(), Ld->getPointerInfo(), MVT::i32, Ld->getAlign(), + Ld->getMemOperand()->getFlags(), Ld->getAAInfo(), + nullptr); // Drop ranges EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); if (MemVT.isFloatingPoint()) { @@ -8392,17 +8619,16 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType().getVectorElementType() == MVT::i32 && "Custom lowering for non-i32 vectors hasn't been implemented."); - unsigned Alignment = Load->getAlignment(); + Align Alignment = Load->getAlign(); unsigned AS = Load->getAddressSpace(); - if (Subtarget->hasLDSMisalignedBug() && - AS == AMDGPUAS::FLAT_ADDRESS && - Alignment < MemVT.getStoreSize() && MemVT.getSizeInBits() > 32) { + if (Subtarget->hasLDSMisalignedBug() && AS == AMDGPUAS::FLAT_ADDRESS && + Alignment.value() < MemVT.getStoreSize() && MemVT.getSizeInBits() > 32) { return SplitVectorLoad(Op, DAG); } MachineFunction &MF = DAG.getMachineFunction(); SIMachineFunctionInfo *MFI = MF.getInfo(); - // If there is a possibilty that flat instruction access scratch memory + // If there is a possibility that flat instruction access scratch memory // then we need to use the same legalization rules we use for private. if (AS == AMDGPUAS::FLAT_ADDRESS && !Subtarget->hasMultiDwordFlatScratchAddressing()) @@ -8413,7 +8639,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (AS == AMDGPUAS::CONSTANT_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { - if (!Op->isDivergent() && Alignment >= 4 && NumElements < 32) { + if (!Op->isDivergent() && Alignment >= Align(4) && NumElements < 32) { if (MemVT.isPow2VectorType()) return SDValue(); return WidenOrSplitVectorLoad(Op, DAG); @@ -8429,7 +8655,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { AS == AMDGPUAS::GLOBAL_ADDRESS) { if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() && Load->isSimple() && isMemOpHasNoClobberedMemOperand(Load) && - Alignment >= 4 && NumElements < 32) { + Alignment >= Align(4) && NumElements < 32) { if (MemVT.isPow2VectorType()) return SDValue(); return WidenOrSplitVectorLoad(Op, DAG); @@ -8479,27 +8705,15 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("unsupported private_element_size"); } } else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) { - // Use ds_read_b128 or ds_read_b96 when possible. - if (Subtarget->hasDS96AndDS128() && - ((Subtarget->useDS128() && MemVT.getStoreSize() == 16) || - MemVT.getStoreSize() == 12) && - allowsMisalignedMemoryAccessesImpl(MemVT.getSizeInBits(), AS, - Load->getAlign())) + bool Fast = false; + auto Flags = Load->getMemOperand()->getFlags(); + if (allowsMisalignedMemoryAccessesImpl(MemVT.getSizeInBits(), AS, + Load->getAlign(), Flags, &Fast) && + Fast) return SDValue(); - if (NumElements > 2) + if (MemVT.isVector()) return SplitVectorLoad(Op, DAG); - - // SI has a hardware bug in the LDS / GDS boounds checking: if the base - // address is negative, then the instruction is incorrectly treated as - // out-of-bounds even if base + offsets is in bounds. Split vectorized - // loads here to avoid emitting ds_read2_b32. We may re-combine the - // load later in the SILoadStoreOptimizer. - if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && - NumElements == 2 && MemVT.getStoreSize() == 8 && - Load->getAlignment() < 8) { - return SplitVectorLoad(Op, DAG); - } } if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), @@ -8514,7 +8728,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); - if (VT.getSizeInBits() == 128) + if (VT.getSizeInBits() == 128 || VT.getSizeInBits() == 256) return splitTernaryVectorOp(Op, DAG); assert(VT.getSizeInBits() == 64); @@ -8946,13 +9160,13 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { unsigned AS = Store->getAddressSpace(); if (Subtarget->hasLDSMisalignedBug() && AS == AMDGPUAS::FLAT_ADDRESS && - Store->getAlignment() < VT.getStoreSize() && VT.getSizeInBits() > 32) { + Store->getAlign().value() < VT.getStoreSize() && VT.getSizeInBits() > 32) { return SplitVectorStore(Op, DAG); } MachineFunction &MF = DAG.getMachineFunction(); SIMachineFunctionInfo *MFI = MF.getInfo(); - // If there is a possibilty that flat instruction access scratch memory + // If there is a possibility that flat instruction access scratch memory // then we need to use the same legalization rules we use for private. if (AS == AMDGPUAS::FLAT_ADDRESS && !Subtarget->hasMultiDwordFlatScratchAddressing()) @@ -8990,39 +9204,21 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("unsupported private_element_size"); } } else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) { - // Use ds_write_b128 or ds_write_b96 when possible. - if (Subtarget->hasDS96AndDS128() && - ((Subtarget->useDS128() && VT.getStoreSize() == 16) || - (VT.getStoreSize() == 12)) && - allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AS, - Store->getAlign())) + bool Fast = false; + auto Flags = Store->getMemOperand()->getFlags(); + if (allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AS, + Store->getAlign(), Flags, &Fast) && + Fast) return SDValue(); - if (NumElements > 2) + if (VT.isVector()) return SplitVectorStore(Op, DAG); - // SI has a hardware bug in the LDS / GDS boounds checking: if the base - // address is negative, then the instruction is incorrectly treated as - // out-of-bounds even if base + offsets is in bounds. Split vectorized - // stores here to avoid emitting ds_write2_b32. We may re-combine the - // store later in the SILoadStoreOptimizer. - if (!Subtarget->hasUsableDSOffset() && - NumElements == 2 && VT.getStoreSize() == 8 && - Store->getAlignment() < 8) { - return SplitVectorStore(Op, DAG); - } - - if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), - VT, *Store->getMemOperand())) { - if (VT.isVector()) - return SplitVectorStore(Op, DAG); - return expandUnalignedStore(Store, DAG); - } - - return SDValue(); - } else { - llvm_unreachable("unhandled address space"); + return expandUnalignedStore(Store, DAG); } + + // Probably an invalid store. If so we'll end up emitting a selection error. + return SDValue(); } SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { @@ -10041,7 +10237,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine( } } - // If one half is undef, and one is constant, perfer a splat vector rather + // If one half is undef, and one is constant, prefer a splat vector rather // than the normal qNaN. If it's a register, prefer 0.0 since that's // cheaper to use and may be free with a packed operation. if (NewElts[0].isUndef()) { @@ -10349,7 +10545,8 @@ SDValue SITargetLowering::performCvtPkRTZCombine(SDNode *N, // expanded into a set of cmp/select instructions. bool SITargetLowering::shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, - bool IsDivergentIdx) { + bool IsDivergentIdx, + const GCNSubtarget *Subtarget) { if (UseDivergentRegisterIndexing) return false; @@ -10371,10 +10568,18 @@ bool SITargetLowering::shouldExpandVectorDynExt(unsigned EltSize, // Large vectors would yield too many compares and v_cndmask_b32 instructions. unsigned NumInsts = NumElem /* Number of compares */ + ((EltSize + 31) / 32) * NumElem /* Number of cndmasks */; - return NumInsts <= 16; + + // On some architectures (GFX9) movrel is not available and it's better + // to expand. + if (!Subtarget->hasMovrel()) + return NumInsts <= 16; + + // If movrel is available, use it instead of expanding for vector of 8 + // elements. + return NumInsts <= 15; } -static bool shouldExpandVectorDynExt(SDNode *N) { +bool SITargetLowering::shouldExpandVectorDynExt(SDNode *N) const { SDValue Idx = N->getOperand(N->getNumOperands() - 1); if (isa(Idx)) return false; @@ -10385,8 +10590,8 @@ static bool shouldExpandVectorDynExt(SDNode *N) { unsigned EltSize = EltVT.getSizeInBits(); unsigned NumElem = VecVT.getVectorNumElements(); - return SITargetLowering::shouldExpandVectorDynExt(EltSize, NumElem, - Idx->isDivergent()); + return SITargetLowering::shouldExpandVectorDynExt( + EltSize, NumElem, Idx->isDivergent(), getSubtarget()); } SDValue SITargetLowering::performExtractVectorEltCombine( @@ -10450,7 +10655,7 @@ SDValue SITargetLowering::performExtractVectorEltCombine( unsigned EltSize = EltVT.getSizeInBits(); // EXTRACT_VECTOR_ELT (, var-idx) => n x select (e, const-idx) - if (::shouldExpandVectorDynExt(N)) { + if (shouldExpandVectorDynExt(N)) { SDLoc SL(N); SDValue Idx = N->getOperand(1); SDValue V; @@ -10513,7 +10718,7 @@ SITargetLowering::performInsertVectorEltCombine(SDNode *N, // INSERT_VECTOR_ELT (, var-idx) // => BUILD_VECTOR n x select (e, const-idx) - if (!::shouldExpandVectorDynExt(N)) + if (!shouldExpandVectorDynExt(N)) return SDValue(); SelectionDAG &DAG = DCI.DAG; @@ -10603,39 +10808,145 @@ static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL, return DAG.getNode(ISD::TRUNCATE, SL, VT, Mad); } -SDValue SITargetLowering::performAddCombine(SDNode *N, +// Fold (add (mul x, y), z) --> (mad_[iu]64_[iu]32 x, y, z) plus high +// multiplies, if any. +// +// Full 64-bit multiplies that feed into an addition are lowered here instead +// of using the generic expansion. The generic expansion ends up with +// a tree of ADD nodes that prevents us from using the "add" part of the +// MAD instruction. The expansion produced here results in a chain of ADDs +// instead of a tree. +SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N, DAGCombinerInfo &DCI) const { + assert(N->getOpcode() == ISD::ADD); + SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); SDLoc SL(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - if ((LHS.getOpcode() == ISD::MUL || RHS.getOpcode() == ISD::MUL) - && Subtarget->hasMad64_32() && - !VT.isVector() && VT.getScalarSizeInBits() > 32 && - VT.getScalarSizeInBits() <= 64) { - if (LHS.getOpcode() != ISD::MUL) - std::swap(LHS, RHS); + if (VT.isVector()) + return SDValue(); - SDValue MulLHS = LHS.getOperand(0); - SDValue MulRHS = LHS.getOperand(1); - SDValue AddRHS = RHS; + // S_MUL_HI_[IU]32 was added in gfx9, which allows us to keep the overall + // result in scalar registers for uniform values. + if (!N->isDivergent() && Subtarget->hasSMulHi()) + return SDValue(); + + unsigned NumBits = VT.getScalarSizeInBits(); + if (NumBits <= 32 || NumBits > 64) + return SDValue(); + + if (LHS.getOpcode() != ISD::MUL) { + assert(RHS.getOpcode() == ISD::MUL); + std::swap(LHS, RHS); + } + + // Avoid the fold if it would unduly increase the number of multiplies due to + // multiple uses, except on hardware with full-rate multiply-add (which is + // part of full-rate 64-bit ops). + if (!Subtarget->hasFullRate64Ops()) { + unsigned NumUsers = 0; + for (SDNode *Use : LHS->uses()) { + // There is a use that does not feed into addition, so the multiply can't + // be removed. We prefer MUL + ADD + ADDC over MAD + MUL. + if (Use->getOpcode() != ISD::ADD) + return SDValue(); - // TODO: Maybe restrict if SGPR inputs. - if (numBitsUnsigned(MulLHS, DAG) <= 32 && - numBitsUnsigned(MulRHS, DAG) <= 32) { - MulLHS = DAG.getZExtOrTrunc(MulLHS, SL, MVT::i32); - MulRHS = DAG.getZExtOrTrunc(MulRHS, SL, MVT::i32); - AddRHS = DAG.getZExtOrTrunc(AddRHS, SL, MVT::i64); - return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, false); + // We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer + // MUL + 3xADD + 3xADDC over 3xMAD. + ++NumUsers; + if (NumUsers >= 3) + return SDValue(); } + } + + SDValue MulLHS = LHS.getOperand(0); + SDValue MulRHS = LHS.getOperand(1); + SDValue AddRHS = RHS; + + // Always check whether operands are small unsigned values, since that + // knowledge is useful in more cases. Check for small signed values only if + // doing so can unlock a shorter code sequence. + bool MulLHSUnsigned32 = numBitsUnsigned(MulLHS, DAG) <= 32; + bool MulRHSUnsigned32 = numBitsUnsigned(MulRHS, DAG) <= 32; + + bool MulSignedLo = false; + if (!MulLHSUnsigned32 || !MulRHSUnsigned32) { + MulSignedLo = numBitsSigned(MulLHS, DAG) <= 32 && + numBitsSigned(MulRHS, DAG) <= 32; + } - if (numBitsSigned(MulLHS, DAG) <= 32 && numBitsSigned(MulRHS, DAG) <= 32) { - MulLHS = DAG.getSExtOrTrunc(MulLHS, SL, MVT::i32); - MulRHS = DAG.getSExtOrTrunc(MulRHS, SL, MVT::i32); - AddRHS = DAG.getSExtOrTrunc(AddRHS, SL, MVT::i64); - return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, true); + // The operands and final result all have the same number of bits. If + // operands need to be extended, they can be extended with garbage. The + // resulting garbage in the high bits of the mad_[iu]64_[iu]32 result is + // truncated away in the end. + if (VT != MVT::i64) { + MulLHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i64, MulLHS); + MulRHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i64, MulRHS); + AddRHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i64, AddRHS); + } + + // The basic code generated is conceptually straightforward. Pseudo code: + // + // accum = mad_64_32 lhs.lo, rhs.lo, accum + // accum.hi = add (mul lhs.hi, rhs.lo), accum.hi + // accum.hi = add (mul lhs.lo, rhs.hi), accum.hi + // + // The second and third lines are optional, depending on whether the factors + // are {sign,zero}-extended or not. + // + // The actual DAG is noisier than the pseudo code, but only due to + // instructions that disassemble values into low and high parts, and + // assemble the final result. + SDValue Zero = DAG.getConstant(0, SL, MVT::i32); + SDValue One = DAG.getConstant(1, SL, MVT::i32); + + auto MulLHSLo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, MulLHS); + auto MulRHSLo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, MulRHS); + SDValue Accum = + getMad64_32(DAG, SL, MVT::i64, MulLHSLo, MulRHSLo, AddRHS, MulSignedLo); + + if (!MulSignedLo && (!MulLHSUnsigned32 || !MulRHSUnsigned32)) { + auto AccumLo = DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, Accum, Zero); + auto AccumHi = DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, Accum, One); + + if (!MulLHSUnsigned32) { + auto MulLHSHi = + DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, MulLHS, One); + SDValue MulHi = DAG.getNode(ISD::MUL, SL, MVT::i32, MulLHSHi, MulRHSLo); + AccumHi = DAG.getNode(ISD::ADD, SL, MVT::i32, MulHi, AccumHi); + } + + if (!MulRHSUnsigned32) { + auto MulRHSHi = + DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, MulRHS, One); + SDValue MulHi = DAG.getNode(ISD::MUL, SL, MVT::i32, MulLHSLo, MulRHSHi); + AccumHi = DAG.getNode(ISD::ADD, SL, MVT::i32, MulHi, AccumHi); + } + + Accum = DAG.getBuildVector(MVT::v2i32, SL, {AccumLo, AccumHi}); + Accum = DAG.getBitcast(MVT::i64, Accum); + } + + if (VT != MVT::i64) + Accum = DAG.getNode(ISD::TRUNCATE, SL, VT, Accum); + return Accum; +} + +SDValue SITargetLowering::performAddCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + SDLoc SL(N); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + if (LHS.getOpcode() == ISD::MUL || RHS.getOpcode() == ISD::MUL) { + if (Subtarget->hasMad64_32()) { + if (SDValue Folded = tryFoldToMad64_32(N, DCI)) + return Folded; } return SDValue(); @@ -10763,7 +11074,7 @@ SDValue SITargetLowering::performFAddCombine(SDNode *N, SDValue RHS = N->getOperand(1); // These should really be instruction patterns, but writing patterns with - // source modiifiers is a pain. + // source modifiers is a pain. // fadd (fadd (a, a), b) -> mad 2.0, a, b if (LHS.getOpcode() == ISD::FADD) { @@ -10860,8 +11171,8 @@ SDValue SITargetLowering::performFMACombine(SDNode *N, return SDValue(); // fdot2_f32_f16 always flushes fp32 denormal operand and output to zero, - // regardless of the denorm mode setting. Therefore, unsafe-fp-math/fp-contract - // is sufficient to allow generaing fdot2. + // regardless of the denorm mode setting. Therefore, + // unsafe-fp-math/fp-contract is sufficient to allow generating fdot2. const TargetOptions &Options = DAG.getTarget().Options; if (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || (N->getFlags().hasAllowContract() && @@ -11562,7 +11873,7 @@ void SITargetLowering::AddIMGInit(MachineInstr &MI) const { if (DstSize < InitIdx) return; - // Create a register for the intialization value. + // Create a register for the initialization value. Register PrevDst = MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); unsigned NewDst = 0; // Final initialized value will be in here @@ -11608,7 +11919,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, TII->legalizeOperandsVOP3(MRI, MI); // Prefer VGPRs over AGPRs in mAI instructions where possible. - // This saves a chain-copy of registers and better ballance register + // This saves a chain-copy of registers and better balance register // use between vgpr and agpr as agpr tuples tend to be big. if (MI.getDesc().OpInfo) { unsigned Opc = MI.getOpcode(); @@ -11633,54 +11944,29 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // so no use checks are needed. MRI.setRegClass(Op.getReg(), NewRC); } - } - return; - } - - // Replace unused atomics with the no return version. - int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI.getOpcode()); - if (NoRetAtomicOp != -1) { - if (!Node->hasAnyUseOfValue(0)) { - int CPolIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), - AMDGPU::OpName::cpol); - if (CPolIdx != -1) { - MachineOperand &CPol = MI.getOperand(CPolIdx); - CPol.setImm(CPol.getImm() & ~AMDGPU::CPol::GLC); + // Resolve the rest of AV operands to AGPRs. + if (auto *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2)) { + if (Src2->isReg() && Src2->getReg().isVirtual()) { + auto *RC = TRI->getRegClassForReg(MRI, Src2->getReg()); + if (TRI->isVectorSuperClass(RC)) { + auto *NewRC = TRI->getEquivalentAGPRClass(RC); + MRI.setRegClass(Src2->getReg(), NewRC); + if (Src2->isTied()) + MRI.setRegClass(MI.getOperand(0).getReg(), NewRC); + } + } } - MI.RemoveOperand(0); - MI.setDesc(TII->get(NoRetAtomicOp)); - return; } - // For mubuf_atomic_cmpswap, we need to have tablegen use an extract_subreg - // instruction, because the return type of these instructions is a vec2 of - // the memory type, so it can be tied to the input operand. - // This means these instructions always have a use, so we need to add a - // special case to check if the atomic has only one extract_subreg use, - // which itself has no uses. - if ((Node->hasNUsesOfValue(1, 0) && - Node->use_begin()->isMachineOpcode() && - Node->use_begin()->getMachineOpcode() == AMDGPU::EXTRACT_SUBREG && - !Node->use_begin()->hasAnyUseOfValue(0))) { - Register Def = MI.getOperand(0).getReg(); - - // Change this into a noret atomic. - MI.setDesc(TII->get(NoRetAtomicOp)); - MI.RemoveOperand(0); - - // If we only remove the def operand from the atomic instruction, the - // extract_subreg will be left with a use of a vreg without a def. - // So we need to insert an implicit_def to avoid machine verifier - // errors. - BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), - TII->get(AMDGPU::IMPLICIT_DEF), Def); - } return; } - if (TII->isMIMG(MI) && !MI.mayStore()) - AddIMGInit(MI); + if (TII->isMIMG(MI)) { + if (!MI.mayStore()) + AddIMGInit(MI); + TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::vaddr); + } } static SDValue buildSMovImm32(SelectionDAG &DAG, const SDLoc &DL, @@ -12243,13 +12529,17 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { MachineBasicBlock *Exit = ML->getExitBlock(); if (Pre && Exit) { - BuildMI(*Pre, Pre->getFirstTerminator(), DebugLoc(), - TII->get(AMDGPU::S_INST_PREFETCH)) - .addImm(1); // prefetch 2 lines behind PC + auto PreTerm = Pre->getFirstTerminator(); + if (PreTerm == Pre->begin() || + std::prev(PreTerm)->getOpcode() != AMDGPU::S_INST_PREFETCH) + BuildMI(*Pre, PreTerm, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH)) + .addImm(1); // prefetch 2 lines behind PC - BuildMI(*Exit, Exit->getFirstNonDebugInstr(), DebugLoc(), - TII->get(AMDGPU::S_INST_PREFETCH)) - .addImm(2); // prefetch 1 line behind PC + auto ExitHead = Exit->getFirstNonDebugInstr(); + if (ExitHead == Exit->end() || + ExitHead->getOpcode() != AMDGPU::S_INST_PREFETCH) + BuildMI(*Exit, ExitHead, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH)) + .addImm(2); // prefetch 1 line behind PC } return CacheLineAlign; @@ -12390,6 +12680,9 @@ static bool fpModeMatchesGlobalFPAtomicMode(const AtomicRMWInst *RMW) { TargetLowering::AtomicExpansionKind SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { + unsigned AS = RMW->getPointerAddressSpace(); + if (AS == AMDGPUAS::PRIVATE_ADDRESS) + return AtomicExpansionKind::NotAtomic; auto ReportUnsafeHWInst = [&](TargetLowering::AtomicExpansionKind Kind) { OptimizationRemarkEmitter ORE(RMW->getFunction()); @@ -12421,10 +12714,11 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { if (!Ty->isFloatTy() && (!Subtarget->hasGFX90AInsts() || !Ty->isDoubleTy())) return AtomicExpansionKind::CmpXChg; - unsigned AS = RMW->getPointerAddressSpace(); - if ((AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) && - Subtarget->hasAtomicFaddInsts()) { + Subtarget->hasAtomicFaddNoRtnInsts()) { + if (Subtarget->hasGFX940Insts()) + return AtomicExpansionKind::None; + // The amdgpu-unsafe-fp-atomics attribute enables generation of unsafe // floating point atomic instructions. May generate more efficient code, // but may not respect rounding and denormal modes, and may give incorrect @@ -12453,8 +12747,8 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { : AtomicExpansionKind::CmpXChg; } - // DS FP atomics do repect the denormal mode, but the rounding mode is fixed - // to round-to-nearest-even. + // DS FP atomics do respect the denormal mode, but the rounding mode is + // fixed to round-to-nearest-even. // The only exception is DS_ADD_F64 which never flushes regardless of mode. if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomicAdd()) { if (!Ty->isDoubleTy()) @@ -12479,6 +12773,27 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW); } +TargetLowering::AtomicExpansionKind +SITargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { + return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS + ? AtomicExpansionKind::NotAtomic + : AtomicExpansionKind::None; +} + +TargetLowering::AtomicExpansionKind +SITargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { + return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS + ? AtomicExpansionKind::NotAtomic + : AtomicExpansionKind::None; +} + +TargetLowering::AtomicExpansionKind +SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const { + return CmpX->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS + ? AtomicExpansionKind::NotAtomic + : AtomicExpansionKind::None; +} + const TargetRegisterClass * SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false); @@ -12500,7 +12815,7 @@ SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { // always uniform. static bool hasCFUser(const Value *V, SmallPtrSet &Visited, unsigned WaveSize) { - // FIXME: We asssume we never cast the mask results of a control flow + // FIXME: We assume we never cast the mask results of a control flow // intrinsic. // Early exit if the type won't be consistent as a compile time hack. IntegerType *IT = dyn_cast(V->getType()); @@ -12604,7 +12919,7 @@ bool SITargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const { if (!N0.hasOneUse()) return false; - // Take care of the oportunity to keep N0 uniform + // Take care of the opportunity to keep N0 uniform if (N0->isDivergent() || !N1->isDivergent()) return true; // Check if we have a good chance to form the memory access pattern with the @@ -12612,3 +12927,11 @@ bool SITargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0, return (DAG.isBaseWithConstantOffset(N0) && hasMemSDNodeUser(*N0->use_begin())); } + +MachineMemOperand::Flags +SITargetLowering::getTargetMMOFlags(const Instruction &I) const { + // Propagate metadata set by AMDGPUAnnotateUniformValues to the MMO of a load. + if (I.getMetadata("amdgpu.noclobber")) + return MONoClobber; + return MachineMemOperand::MONone; +} diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index bf81e082b478..4fbccf0c5850 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -53,6 +53,9 @@ private: uint64_t Offset, Align Alignment, bool Signed, const ISD::InputArg *Arg = nullptr) const; + SDValue loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT, const SDLoc &DL, + Align Alignment, + ImplicitParameter Param) const; SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA, const SDLoc &SL, SDValue Chain, @@ -76,6 +79,9 @@ private: SDValue lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG, unsigned NewOpcode) const; + SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim, + const ArgDescriptor &ArgDesc) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; @@ -145,6 +151,7 @@ private: SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const; @@ -191,6 +198,7 @@ private: SDValue reassociateScalarOps(SDNode *N, SelectionDAG &DAG) const; unsigned getFusedOpcode(const SelectionDAG &DAG, const SDNode *N0, const SDNode *N1) const; + SDValue tryFoldToMad64_32(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -227,7 +235,10 @@ public: /// Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (, var-idx) should be /// expanded into a set of cmp/select instructions. static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, - bool IsDivergentIdx); + bool IsDivergentIdx, + const GCNSubtarget *Subtarget); + + bool shouldExpandVectorDynExt(SDNode *N) const; private: // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the @@ -310,6 +321,9 @@ public: bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; + bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, + unsigned Index) const override; + bool isTypeDesirableForOp(unsigned Op, EVT VT) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; @@ -380,6 +394,7 @@ public: MachineBasicBlock *BB) const override; bool hasBitPreservingFPLogic(EVT VT) const override; + bool hasAtomicFaddRtnForTy(SDValue &Op) const; bool enableAggressiveFMAFusion(EVT VT) const override; bool enableAggressiveFMAFusion(LLT Ty) const override; EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, @@ -466,6 +481,10 @@ public: bool SNaN = false, unsigned Depth = 0) const override; AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; + AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + AtomicExpansionKind + shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent) const override; @@ -505,6 +524,9 @@ public: std::pair getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const; + + MachineMemOperand::Flags + getTargetMMOFlags(const Instruction &I) const override; }; } // End namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp index 125f006a1d1d..50f8ad4433c6 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp @@ -35,6 +35,7 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; @@ -42,11 +43,39 @@ using namespace llvm; namespace { +// A clause length of 64 instructions could be encoded in the s_clause +// instruction, but the hardware documentation (at least for GFX11) says that +// 63 is the maximum allowed. +constexpr unsigned MaxInstructionsInClause = 63; + enum HardClauseType { + // For GFX10: + // Texture, buffer, global or scratch memory instructions. HARDCLAUSE_VMEM, // Flat (not global or scratch) memory instructions. HARDCLAUSE_FLAT, + + // For GFX11: + + // Texture memory instructions. + HARDCLAUSE_MIMG_LOAD, + HARDCLAUSE_MIMG_STORE, + HARDCLAUSE_MIMG_ATOMIC, + HARDCLAUSE_MIMG_SAMPLE, + // Buffer, global or scratch memory instructions. + HARDCLAUSE_VMEM_LOAD, + HARDCLAUSE_VMEM_STORE, + HARDCLAUSE_VMEM_ATOMIC, + // Flat (not global or scratch) memory instructions. + HARDCLAUSE_FLAT_LOAD, + HARDCLAUSE_FLAT_STORE, + HARDCLAUSE_FLAT_ATOMIC, + // BVH instructions. + HARDCLAUSE_BVH, + + // Common: + // Instructions that access LDS. HARDCLAUSE_LDS, // Scalar memory instructions. @@ -78,19 +107,43 @@ public: } HardClauseType getHardClauseType(const MachineInstr &MI) { - - // On current architectures we only get a benefit from clausing loads. - if (MI.mayLoad()) { - if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { - if (ST->hasNSAClauseBug()) { + if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) { + if (ST->getGeneration() == AMDGPUSubtarget::GFX10) { + if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { + if (ST->hasNSAClauseBug()) { + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); + if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA) + return HARDCLAUSE_ILLEGAL; + } + return HARDCLAUSE_VMEM; + } + if (SIInstrInfo::isFLAT(MI)) + return HARDCLAUSE_FLAT; + } else { + assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11); + if (SIInstrInfo::isMIMG(MI)) { const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); - if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA) - return HARDCLAUSE_ILLEGAL; + const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo = + AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); + if (BaseInfo->BVH) + return HARDCLAUSE_BVH; + if (BaseInfo->Sampler) + return HARDCLAUSE_MIMG_SAMPLE; + return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC + : HARDCLAUSE_MIMG_LOAD + : HARDCLAUSE_MIMG_STORE; + } + if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { + return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC + : HARDCLAUSE_VMEM_LOAD + : HARDCLAUSE_VMEM_STORE; + } + if (SIInstrInfo::isFLAT(MI)) { + return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC + : HARDCLAUSE_FLAT_LOAD + : HARDCLAUSE_FLAT_STORE; } - return HARDCLAUSE_VMEM; } - if (SIInstrInfo::isFLAT(MI)) - return HARDCLAUSE_FLAT; // TODO: LDS if (SIInstrInfo::isSMRD(MI)) return HARDCLAUSE_SMEM; @@ -129,7 +182,7 @@ public: bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { if (CI.First == CI.Last) return false; - assert(CI.Length <= 64 && "Hard clause is too long!"); + assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!"); auto &MBB = *CI.First->getParent(); auto ClauseMI = @@ -170,7 +223,7 @@ public: } } - if (CI.Length == 64 || + if (CI.Length == MaxInstructionsInClause || (CI.Length && Type != HARDCLAUSE_INTERNAL && Type != HARDCLAUSE_IGNORE && (Type != CI.Type || diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index f8a10bc8ef6f..349bcbf82195 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -31,6 +31,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Sequence.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/InitializePasses.h" #include "llvm/Support/DebugCounter.h" @@ -87,29 +88,29 @@ struct RegisterEncoding { }; enum WaitEventType { - VMEM_ACCESS, // vector-memory read & write - VMEM_READ_ACCESS, // vector-memory read - VMEM_WRITE_ACCESS,// vector-memory write - LDS_ACCESS, // lds read & write - GDS_ACCESS, // gds read & write - SQ_MESSAGE, // send message - SMEM_ACCESS, // scalar-memory read & write - EXP_GPR_LOCK, // export holding on its data src - GDS_GPR_LOCK, // GDS holding on its data and addr src - EXP_POS_ACCESS, // write to export position - EXP_PARAM_ACCESS, // write to export parameter - VMW_GPR_LOCK, // vector-memory write holding on its data src + VMEM_ACCESS, // vector-memory read & write + VMEM_READ_ACCESS, // vector-memory read + VMEM_WRITE_ACCESS, // vector-memory write + LDS_ACCESS, // lds read & write + GDS_ACCESS, // gds read & write + SQ_MESSAGE, // send message + SMEM_ACCESS, // scalar-memory read & write + EXP_GPR_LOCK, // export holding on its data src + GDS_GPR_LOCK, // GDS holding on its data and addr src + EXP_POS_ACCESS, // write to export position + EXP_PARAM_ACCESS, // write to export parameter + VMW_GPR_LOCK, // vector-memory write holding on its data src + EXP_LDS_ACCESS, // read by ldsdir counting as export NUM_WAIT_EVENTS, }; static const unsigned WaitEventMaskForInst[NUM_INST_CNTS] = { - (1 << VMEM_ACCESS) | (1 << VMEM_READ_ACCESS), - (1 << SMEM_ACCESS) | (1 << LDS_ACCESS) | (1 << GDS_ACCESS) | - (1 << SQ_MESSAGE), - (1 << EXP_GPR_LOCK) | (1 << GDS_GPR_LOCK) | (1 << VMW_GPR_LOCK) | - (1 << EXP_PARAM_ACCESS) | (1 << EXP_POS_ACCESS), - (1 << VMEM_WRITE_ACCESS) -}; + (1 << VMEM_ACCESS) | (1 << VMEM_READ_ACCESS), + (1 << SMEM_ACCESS) | (1 << LDS_ACCESS) | (1 << GDS_ACCESS) | + (1 << SQ_MESSAGE), + (1 << EXP_GPR_LOCK) | (1 << GDS_GPR_LOCK) | (1 << VMW_GPR_LOCK) | + (1 << EXP_PARAM_ACCESS) | (1 << EXP_POS_ACCESS) | (1 << EXP_LDS_ACCESS), + (1 << VMEM_WRITE_ACCESS)}; // The mapping is: // 0 .. SQ_MAX_PGM_VGPRS-1 real VGPRs @@ -119,10 +120,10 @@ static const unsigned WaitEventMaskForInst[NUM_INST_CNTS] = { // special tokens like SCMEM_LDS (needed for buffer load to LDS). enum RegisterMapping { SQ_MAX_PGM_VGPRS = 512, // Maximum programmable VGPRs across all targets. - AGPR_OFFSET = 226, // Maximum programmable ArchVGPRs across all targets. + AGPR_OFFSET = 256, // Maximum programmable ArchVGPRs across all targets. SQ_MAX_PGM_SGPRS = 256, // Maximum programmable SGPRs across all targets. NUM_EXTRA_VGPRS = 1, // A reserved slot for DS. - EXTRA_VGPR_LDS = 0, // This is a placeholder the Shader algorithm uses. + EXTRA_VGPR_LDS = 0, // An artificial register to track LDS writes. NUM_ALL_VGPRS = SQ_MAX_PGM_VGPRS + NUM_EXTRA_VGPRS, // Where SGPR starts. }; @@ -355,6 +356,8 @@ private: DenseSet TrackedWaitcntSet; DenseMap SLoadAddresses; + DenseMap PreheadersToFlush; + MachineLoopInfo *MLI; MachinePostDominatorTree *PDT; struct BlockInfo { @@ -381,6 +384,9 @@ public: (void)ForceVMCounter; } + bool shouldFlushVmCnt(MachineLoop *ML, WaitcntBrackets &Brackets); + bool isPreheaderToFlush(MachineBasicBlock &MBB, + WaitcntBrackets &ScoreBrackets); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -389,6 +395,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -431,14 +438,23 @@ public: bool mayAccessLDSThroughFlat(const MachineInstr &MI) const; bool generateWaitcntInstBefore(MachineInstr &MI, WaitcntBrackets &ScoreBrackets, - MachineInstr *OldWaitcntInstr); + MachineInstr *OldWaitcntInstr, + bool FlushVmCnt); + bool generateWaitcntBlockEnd(MachineBasicBlock &Block, + WaitcntBrackets &ScoreBrackets, + MachineInstr *OldWaitcntInstr); + bool generateWaitcnt(AMDGPU::Waitcnt Wait, + MachineBasicBlock::instr_iterator It, + MachineBasicBlock &Block, WaitcntBrackets &ScoreBrackets, + MachineInstr *OldWaitcntInstr); void updateEventWaitcntAfter(MachineInstr &Inst, WaitcntBrackets *ScoreBrackets); bool insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block, WaitcntBrackets &ScoreBrackets); bool applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets, MachineInstr &OldWaitcntInstr, - AMDGPU::Waitcnt &Wait, const MachineInstr *MI); + AMDGPU::Waitcnt &Wait, + MachineBasicBlock::instr_iterator It); }; } // end anonymous namespace @@ -496,6 +512,14 @@ void WaitcntBrackets::setExpScore(const MachineInstr *MI, } } +// MUBUF and FLAT LDS DMA operations need a wait on vmcnt before LDS written +// can be accessed. A load from LDS to VMEM does not need a wait. +static bool mayWriteLDSThroughDMA(const MachineInstr &MI) { + return SIInstrInfo::isVALU(MI) && + (SIInstrInfo::isMUBUF(MI) || SIInstrInfo::isFLAT(MI)) && + MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD; +} + void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI, @@ -588,6 +612,12 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data), CurrScore); } + } else if (TII->isLDSDIR(Inst)) { + // LDSDIR instructions attach the score to the destination. + setExpScore( + &Inst, TII, TRI, MRI, + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdst), + CurrScore); } else { if (TII->isEXP(Inst)) { // For export the destination registers are really temps that @@ -644,7 +674,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, setRegScore(RegNo, T, CurrScore); } } - if (TII->isDS(Inst) && Inst.mayStore()) { + if (Inst.mayStore() && (TII->isDS(Inst) || mayWriteLDSThroughDMA(Inst))) { setRegScore(SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS, T, CurrScore); } } @@ -784,6 +814,7 @@ bool WaitcntBrackets::counterOutOfOrder(InstCounterType T) const { INITIALIZE_PASS_BEGIN(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_END(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false, false) @@ -796,53 +827,53 @@ FunctionPass *llvm::createSIInsertWaitcntsPass() { return new SIInsertWaitcnts(); } -/// Combine consecutive waitcnt instructions that precede \p MI and follow +/// Combine consecutive waitcnt instructions that precede \p It and follow /// \p OldWaitcntInstr and apply any extra wait from waitcnt that were added /// by previous passes. Currently this pass conservatively assumes that these /// preexisting waitcnt are required for correctness. -bool SIInsertWaitcnts::applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets, - MachineInstr &OldWaitcntInstr, - AMDGPU::Waitcnt &Wait, - const MachineInstr *MI) { +bool SIInsertWaitcnts::applyPreexistingWaitcnt( + WaitcntBrackets &ScoreBrackets, MachineInstr &OldWaitcntInstr, + AMDGPU::Waitcnt &Wait, MachineBasicBlock::instr_iterator It) { bool Modified = false; MachineInstr *WaitcntInstr = nullptr; MachineInstr *WaitcntVsCntInstr = nullptr; - for (auto II = OldWaitcntInstr.getIterator(), NextI = std::next(II); - &*II != MI; II = NextI, ++NextI) { - if (II->isMetaInstruction()) + + for (auto &II : + make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) { + if (II.isMetaInstruction()) continue; - if (II->getOpcode() == AMDGPU::S_WAITCNT) { + if (II.getOpcode() == AMDGPU::S_WAITCNT) { // Conservatively update required wait if this waitcnt was added in an // earlier pass. In this case it will not exist in the tracked waitcnt // set. - if (!TrackedWaitcntSet.count(&*II)) { - unsigned IEnc = II->getOperand(0).getImm(); + if (!TrackedWaitcntSet.count(&II)) { + unsigned IEnc = II.getOperand(0).getImm(); AMDGPU::Waitcnt OldWait = AMDGPU::decodeWaitcnt(IV, IEnc); Wait = Wait.combined(OldWait); } // Merge consecutive waitcnt of the same type by erasing multiples. if (!WaitcntInstr) { - WaitcntInstr = &*II; + WaitcntInstr = &II; } else { - II->eraseFromParent(); + II.eraseFromParent(); Modified = true; } } else { - assert(II->getOpcode() == AMDGPU::S_WAITCNT_VSCNT); - assert(II->getOperand(0).getReg() == AMDGPU::SGPR_NULL); - if (!TrackedWaitcntSet.count(&*II)) { + assert(II.getOpcode() == AMDGPU::S_WAITCNT_VSCNT); + assert(II.getOperand(0).getReg() == AMDGPU::SGPR_NULL); + if (!TrackedWaitcntSet.count(&II)) { unsigned OldVSCnt = - TII->getNamedOperand(*II, AMDGPU::OpName::simm16)->getImm(); + TII->getNamedOperand(II, AMDGPU::OpName::simm16)->getImm(); Wait.VsCnt = std::min(Wait.VsCnt, OldVSCnt); } if (!WaitcntVsCntInstr) { - WaitcntVsCntInstr = &*II; + WaitcntVsCntInstr = &II; } else { - II->eraseFromParent(); + II.eraseFromParent(); Modified = true; } } @@ -862,9 +893,14 @@ bool SIInsertWaitcnts::applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets, Wait.LgkmCnt = ~0u; Wait.ExpCnt = ~0u; - LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n" - << "Old Instr: " << *MI << "New Instr: " << *WaitcntInstr - << '\n'); + LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end() + ? dbgs() << "applyPreexistingWaitcnt\n" + << "New Instr at block end: " << *WaitcntInstr + << '\n' + : dbgs() << "applyPreexistingWaitcnt\n" + << "Old Instr: " << *It + << "New Instr: " << *WaitcntInstr << '\n'); + } else { WaitcntInstr->eraseFromParent(); Modified = true; @@ -885,9 +921,13 @@ bool SIInsertWaitcnts::applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets, ScoreBrackets.applyWaitcnt(Wait); Wait.VsCnt = ~0u; - LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n" - << "Old Instr: " << *MI - << "New Instr: " << *WaitcntVsCntInstr << '\n'); + LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end() + ? dbgs() << "applyPreexistingWaitcnt\n" + << "New Instr at block end: " + << *WaitcntVsCntInstr << '\n' + : dbgs() << "applyPreexistingWaitcnt\n" + << "Old Instr: " << *It + << "New Instr: " << *WaitcntVsCntInstr << '\n'); } else { WaitcntVsCntInstr->eraseFromParent(); Modified = true; @@ -928,16 +968,18 @@ static bool callWaitsOnFunctionReturn(const MachineInstr &MI) { /// and if so what the value of each counter is. /// The "score bracket" is bound by the lower bound and upper bound /// scores (*_score_LB and *_score_ub respectively). -bool SIInsertWaitcnts::generateWaitcntInstBefore( - MachineInstr &MI, WaitcntBrackets &ScoreBrackets, - MachineInstr *OldWaitcntInstr) { +/// If FlushVmCnt is true, that means that we want to generate a s_waitcnt to +/// flush the vmcnt counter here. +bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, + WaitcntBrackets &ScoreBrackets, + MachineInstr *OldWaitcntInstr, + bool FlushVmCnt) { setForceEmitWaitcnt(); if (MI.isMetaInstruction()) return false; AMDGPU::Waitcnt Wait; - bool Modified = false; // FIXME: This should have already been handled by the memory legalizer. // Removing this currently doesn't affect any lit tests, but we need to @@ -955,16 +997,17 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( // NOTE: this could be improved with knowledge of all call sites or // with knowledge of the called routines. if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || + MI.getOpcode() == AMDGPU::SI_RETURN || MI.getOpcode() == AMDGPU::S_SETPC_B64_return || - MI.getOpcode() == AMDGPU::S_SETPC_B64_return_gfx || (MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) { Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt())); } // Resolve vm waits before gs-done. else if ((MI.getOpcode() == AMDGPU::S_SENDMSG || MI.getOpcode() == AMDGPU::S_SENDMSGHALT) && - ((MI.getOperand(0).getImm() & AMDGPU::SendMsg::ID_MASK_) == - AMDGPU::SendMsg::ID_GS_DONE)) { + ST->hasLegacyGeometry() && + ((MI.getOperand(0).getImm() & AMDGPU::SendMsg::ID_MASK_PreGFX11_) == + AMDGPU::SendMsg::ID_GS_DONE_PreGFX11)) { Wait.VmCnt = 0; } #if 0 // TODO: the following blocks of logic when we have fence. @@ -1040,7 +1083,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( if (MI.isCall() && callWaitsOnFunctionEntry(MI)) { // The function is going to insert a wait on everything in its prolog. // This still needs to be careful if the call target is a load (e.g. a GOT - // load). We also need to check WAW depenancy with saved PC. + // load). We also need to check WAW dependency with saved PC. Wait = AMDGPU::Waitcnt(); int CallAddrOpIdx = @@ -1089,7 +1132,10 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( SLoadAddresses.erase(Ptr); } unsigned AS = Memop->getAddrSpace(); - if (AS != AMDGPUAS::LOCAL_ADDRESS) + if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::FLAT_ADDRESS) + continue; + // No need to wait before load from VMEM to LDS. + if (mayWriteLDSThroughDMA(MI)) continue; unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS; // VM_CNT is only relevant to vgpr or LDS. @@ -1123,7 +1169,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait); ScoreBrackets.clearVgprVmemTypes(RegNo); } - if (Op.isDef()) { + if (Op.isDef() || ScoreBrackets.hasPendingEvent(EXP_LDS_ACCESS)) { ScoreBrackets.determineWait( EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait); } @@ -1170,47 +1216,93 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( if (ForceEmitWaitcnt[VS_CNT]) Wait.VsCnt = 0; - if (OldWaitcntInstr) { + if (FlushVmCnt) { + unsigned UB = ScoreBrackets.getScoreUB(VM_CNT); + unsigned LB = ScoreBrackets.getScoreLB(VM_CNT); + if (UB - LB != 0) + Wait.VmCnt = 0; + } + + return generateWaitcnt(Wait, MI.getIterator(), *MI.getParent(), ScoreBrackets, + OldWaitcntInstr); +} + +// Add a waitcnt to flush the vmcnt counter at the end of the given block if +// needed. +bool SIInsertWaitcnts::generateWaitcntBlockEnd(MachineBasicBlock &Block, + WaitcntBrackets &ScoreBrackets, + MachineInstr *OldWaitcntInstr) { + AMDGPU::Waitcnt Wait; + + unsigned UB = ScoreBrackets.getScoreUB(VM_CNT); + unsigned LB = ScoreBrackets.getScoreLB(VM_CNT); + if (UB - LB == 0) + return false; + + Wait.VmCnt = 0; + + return generateWaitcnt(Wait, Block.instr_end(), Block, ScoreBrackets, + OldWaitcntInstr); +} + +bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait, + MachineBasicBlock::instr_iterator It, + MachineBasicBlock &Block, + WaitcntBrackets &ScoreBrackets, + MachineInstr *OldWaitcntInstr) { + bool Modified = false; + const DebugLoc &DL = Block.findDebugLoc(It); + + if (OldWaitcntInstr) // Try to merge the required wait with preexisting waitcnt instructions. // Also erase redundant waitcnt. Modified = - applyPreexistingWaitcnt(ScoreBrackets, *OldWaitcntInstr, Wait, &MI); - } else { - // Update waitcnt brackets after determining the required wait. + applyPreexistingWaitcnt(ScoreBrackets, *OldWaitcntInstr, Wait, It); + else ScoreBrackets.applyWaitcnt(Wait); + + // ExpCnt can be merged into VINTERP. + if (Wait.ExpCnt != ~0u && It != Block.instr_end() && + SIInstrInfo::isVINTERP(*It)) { + MachineOperand *WaitExp = + TII->getNamedOperand(*It, AMDGPU::OpName::waitexp); + if (Wait.ExpCnt < WaitExp->getImm()) { + WaitExp->setImm(Wait.ExpCnt); + Modified = true; + } + Wait.ExpCnt = ~0u; + + LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n" + << "Update Instr: " << *It); } // Build new waitcnt instructions unless no wait is needed or the old waitcnt // instruction was modified to handle the required wait. if (Wait.hasWaitExceptVsCnt()) { unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait); - auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(), - MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT)) - .addImm(Enc); + auto SWaitInst = + BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Enc); TrackedWaitcntSet.insert(SWaitInst); Modified = true; - LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n" - << "Old Instr: " << MI - << "New Instr: " << *SWaitInst << '\n'); + LLVM_DEBUG(dbgs() << "generateWaitcnt\n"; + if (It != Block.instr_end()) dbgs() << "Old Instr: " << *It; + dbgs() << "New Instr: " << *SWaitInst << '\n'); } if (Wait.hasWaitVsCnt()) { assert(ST->hasVscnt()); - auto SWaitInst = - BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), - TII->get(AMDGPU::S_WAITCNT_VSCNT)) - .addReg(AMDGPU::SGPR_NULL, RegState::Undef) - .addImm(Wait.VsCnt); + auto SWaitInst = BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT)) + .addReg(AMDGPU::SGPR_NULL, RegState::Undef) + .addImm(Wait.VsCnt); TrackedWaitcntSet.insert(SWaitInst); Modified = true; - LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n" - << "Old Instr: " << MI - << "New Instr: " << *SWaitInst << '\n'); + LLVM_DEBUG(dbgs() << "generateWaitcnt\n"; + if (It != Block.instr_end()) dbgs() << "Old Instr: " << *It; + dbgs() << "New Instr: " << *SWaitInst << '\n'); } - return Modified; } @@ -1338,6 +1430,11 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, // May need to way wait for anything. ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt()); } + } else if (SIInstrInfo::isLDSDIR(Inst)) { + ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_LDS_ACCESS, Inst); + } else if (TII->isVINTERP(Inst)) { + int64_t Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::waitexp)->getImm(); + ScoreBrackets->applyWaitcnt(EXP_CNT, Imm); } else if (SIInstrInfo::isEXP(Inst)) { unsigned Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::tgt)->getImm(); if (Imm >= AMDGPU::Exp::ET_PARAM0 && Imm <= AMDGPU::Exp::ET_PARAM31) @@ -1349,6 +1446,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, } else { switch (Inst.getOpcode()) { case AMDGPU::S_SENDMSG: + case AMDGPU::S_SENDMSG_RTN_B32: + case AMDGPU::S_SENDMSG_RTN_B64: case AMDGPU::S_SENDMSGHALT: ScoreBrackets->updateByEvent(TII, TRI, MRI, SQ_MESSAGE, Inst); break; @@ -1476,8 +1575,12 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, continue; } + bool FlushVmCnt = Block.getFirstTerminator() == Inst && + isPreheaderToFlush(Block, ScoreBrackets); + // Generate an s_waitcnt instruction to be placed before Inst, if needed. - Modified |= generateWaitcntInstBefore(Inst, ScoreBrackets, OldWaitcntInstr); + Modified |= generateWaitcntInstBefore(Inst, ScoreBrackets, OldWaitcntInstr, + FlushVmCnt); OldWaitcntInstr = nullptr; // Restore vccz if it's not known to be correct already. @@ -1562,9 +1665,101 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, ++Iter; } + if (Block.getFirstTerminator() == Block.end() && + isPreheaderToFlush(Block, ScoreBrackets)) + Modified |= generateWaitcntBlockEnd(Block, ScoreBrackets, OldWaitcntInstr); + return Modified; } +// Return true if the given machine basic block is a preheader of a loop in +// which we want to flush the vmcnt counter, and false otherwise. +bool SIInsertWaitcnts::isPreheaderToFlush(MachineBasicBlock &MBB, + WaitcntBrackets &ScoreBrackets) { + if (PreheadersToFlush.count(&MBB)) + return PreheadersToFlush[&MBB]; + + auto UpdateCache = [&](bool val) { + PreheadersToFlush[&MBB] = val; + return val; + }; + + MachineBasicBlock *Succ = MBB.getSingleSuccessor(); + if (!Succ) + return UpdateCache(false); + + MachineLoop *Loop = MLI->getLoopFor(Succ); + if (!Loop) + return UpdateCache(false); + + if (Loop->getLoopPreheader() == &MBB && shouldFlushVmCnt(Loop, ScoreBrackets)) + return UpdateCache(true); + + return UpdateCache(false); +} + +// Return true if it is better to flush the vmcnt counter in the preheader of +// the given loop. We currently decide to flush in two situations: +// 1. The loop contains vmem store(s), no vmem load and at least one use of a +// vgpr containing a value that is loaded outside of the loop. (Only on +// targets with no vscnt counter). +// 2. The loop contains vmem load(s), but the loaded values are not used in the +// loop, and at least one use of a vgpr containing a value that is loaded +// outside of the loop. +bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML, + WaitcntBrackets &Brackets) { + bool HasVMemLoad = false; + bool HasVMemStore = false; + bool UsesVgprLoadedOutside = false; + DenseSet VgprUse; + DenseSet VgprDef; + + for (MachineBasicBlock *MBB : ML->blocks()) { + for (MachineInstr &MI : *MBB) { + if (SIInstrInfo::isVMEM(MI)) { + if (MI.mayLoad()) + HasVMemLoad = true; + if (MI.mayStore()) + HasVMemStore = true; + } + for (unsigned I = 0; I < MI.getNumOperands(); I++) { + MachineOperand &Op = MI.getOperand(I); + if (!Op.isReg() || !TRI->isVectorRegister(*MRI, Op.getReg())) + continue; + RegInterval Interval = Brackets.getRegInterval(&MI, TII, MRI, TRI, I); + // Vgpr use + if (Op.isUse()) { + for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { + // If we find a register that is loaded inside the loop, 1. and 2. + // are invalidated and we can exit. + if (VgprDef.contains(RegNo)) + return false; + VgprUse.insert(RegNo); + // If at least one of Op's registers is in the score brackets, the + // value is likely loaded outside of the loop. + if (Brackets.getRegScore(RegNo, VM_CNT) > 0) { + UsesVgprLoadedOutside = true; + break; + } + } + } + // VMem load vgpr def + else if (SIInstrInfo::isVMEM(MI) && MI.mayLoad() && Op.isDef()) + for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { + // If we find a register that is loaded inside the loop, 1. and 2. + // are invalidated and we can exit. + if (VgprUse.contains(RegNo)) + return false; + VgprDef.insert(RegNo); + } + } + } + } + if (!ST->hasVscnt() && HasVMemStore && !HasVMemLoad && UsesVgprLoadedOutside) + return true; + return HasVMemLoad && UsesVgprLoadedOutside; +} + bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { ST = &MF.getSubtarget(); TII = ST->getInstrInfo(); @@ -1572,6 +1767,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); IV = AMDGPU::getIsaVersion(ST->getCPU()); const SIMachineFunctionInfo *MFI = MF.getInfo(); + MLI = &getAnalysis(); PDT = &getAnalysis(); ForceEmitZeroWaitcnts = ForceEmitZeroFlag; diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index e39f52875f1f..b398e108bf62 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -48,6 +48,12 @@ class InstSI Inst; + int Size = 12; +} + def CPolBit { int GLC = 0; int SLC = 1; @@ -284,7 +303,7 @@ class VINTRPe op> : Enc32 { let Inst{31-26} = 0x32; // encoding } -class MIMGe : Enc64 { +class MIMGe_gfxpre11 : Enc64 { bits<10> vdata; bits<4> dmask; bits<1> unorm; @@ -309,7 +328,7 @@ class MIMGe : Enc64 { let Inst{63} = d16; } -class MIMGe_gfx6789 op> : MIMGe { +class MIMGe_gfx6789 op> : MIMGe_gfxpre11 { bits<8> vaddr; bits<1> da; @@ -321,7 +340,7 @@ class MIMGe_gfx6789 op> : MIMGe { let Inst{39-32} = vaddr; } -class MIMGe_gfx90a op> : MIMGe { +class MIMGe_gfx90a op> : MIMGe_gfxpre11 { bits<8> vaddr; bits<1> da; @@ -333,7 +352,7 @@ class MIMGe_gfx90a op> : MIMGe { let Inst{39-32} = vaddr; } -class MIMGe_gfx10 op> : MIMGe { +class MIMGe_gfx10 op> : MIMGe_gfxpre11 { bits<8> vaddr0; bits<3> dim; bits<2> nsa; @@ -349,12 +368,46 @@ class MIMGe_gfx10 op> : MIMGe { let Inst{62} = a16; } +class MIMGe_gfx11 op> : Enc64 { + bits<8> vdata; + bits<4> dmask; + bits<1> unorm; + bits<5> cpol; + bits<1> r128; + bits<1> tfe; + bits<1> lwe; + bits<7> srsrc; + bits<7> ssamp; + bit d16; + bits<1> a16; + bits<8> vaddr0; + bits<3> dim; + bits<1> nsa; + + let Inst{0} = nsa; + let Inst{4-2} = dim; + let Inst{7} = unorm; + let Inst{11-8} = dmask; + let Inst{12} = cpol{CPolBit.SLC}; + let Inst{13} = cpol{CPolBit.DLC}; + let Inst{14} = cpol{CPolBit.GLC}; + let Inst{15} = r128; + let Inst{16} = a16; + let Inst{17} = d16; + let Inst{25-18} = op; + let Inst{31-26} = 0x3c; + let Inst{39-32} = vaddr0; + let Inst{47-40} = vdata; + let Inst{52-48} = srsrc{6-2}; + let Inst{53} = tfe; + let Inst{54} = lwe; + let Inst{62-58} = ssamp{6-2}; +} + class EXPe : Enc64 { bits<4> en; bits<6> tgt; - bits<1> compr; bits<1> done; - bits<1> vm; bits<8> src0; bits<8> src1; bits<8> src2; @@ -362,9 +415,7 @@ class EXPe : Enc64 { let Inst{3-0} = en; let Inst{9-4} = tgt; - let Inst{10} = compr; let Inst{11} = done; - let Inst{12} = vm; let Inst{31-26} = 0x3e; let Inst{39-32} = src0; let Inst{47-40} = src1; @@ -372,6 +423,22 @@ class EXPe : Enc64 { let Inst{63-56} = src3; } +// Pre-GFX11 encoding has compr and vm bits. +class EXPe_ComprVM : EXPe { + bits<1> compr; + bits<1> vm; + + let Inst{10} = compr; + let Inst{12} = vm; +} + +// GFX11+ encoding has row bit. +class EXPe_Row : EXPe { + bits<1> row; + + let Inst{13} = row; +} + let Uses = [EXEC] in { class VINTRPCommon pattern> : diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0a2f9381e71f..814a7c446889 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -16,12 +16,12 @@ #include "AMDGPUInstrInfo.h" #include "GCNHazardRecognizer.h" #include "GCNSubtarget.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ScheduleDAG.h" @@ -130,9 +130,31 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, return false; } -static bool readsExecAsData(const MachineInstr &MI) { - if (MI.isCompare()) - return true; +// Returns true if the scalar result of a VALU instruction depends on exec. +static bool resultDependsOnExec(const MachineInstr &MI) { + // Ignore comparisons which are only used masked with exec. + // This allows some hoisting/sinking of VALU comparisons. + if (MI.isCompare()) { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + Register DstReg = MI.getOperand(0).getReg(); + if (!DstReg.isVirtual()) + return true; + for (MachineInstr &Use : MRI.use_nodbg_instructions(DstReg)) { + switch (Use.getOpcode()) { + case AMDGPU::S_AND_SAVEEXEC_B32: + case AMDGPU::S_AND_SAVEEXEC_B64: + break; + case AMDGPU::S_AND_B32: + case AMDGPU::S_AND_B64: + if (!Use.readsRegister(AMDGPU::EXEC)) + return true; + break; + default: + return true; + } + } + return false; + } switch (MI.getOpcode()) { default: @@ -147,7 +169,7 @@ static bool readsExecAsData(const MachineInstr &MI) { bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const { // Any implicit use of exec by VALU is not a real register read. return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() && - isVALU(*MO.getParent()) && !readsExecAsData(*MO.getParent()); + isVALU(*MO.getParent()) && !resultDependsOnExec(*MO.getParent()); } bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, @@ -181,7 +203,7 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, if (Offset0Idx == -1 || Offset1Idx == -1) return false; - // XXX - be careful of datalesss loads + // XXX - be careful of dataless loads // getNamedOperandIdx returns the index for MachineInstrs. Since they // include the output in the operand list, but SDNodes don't, we need to // subtract the index by one. @@ -362,6 +384,8 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth( DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); if (DataOpIdx == -1) DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); + if (DataOpIdx == -1) // LDS DMA + return false; Width = getOpSize(LdSt, DataOpIdx); return true; } @@ -410,6 +434,8 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth( DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); if (DataOpIdx == -1) DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); + if (DataOpIdx == -1) // LDS DMA + return false; Width = getOpSize(LdSt, DataOpIdx); return true; } @@ -464,7 +490,7 @@ bool SIInstrInfo::shouldClusterMemOps(ArrayRef BaseOps1, return false; } - // In order to avoid regester pressure, on an average, the number of DWORDS + // In order to avoid register pressure, on an average, the number of DWORDS // loaded together by all clustered mem ops should not exceed 8. This is an // empirical value based on certain observations and performance related // experiments. @@ -517,8 +543,9 @@ static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); } -/// Handle copying from SGPR to AGPR, or from AGPR to AGPR. It is not possible -/// to directly copy, so an intermediate VGPR needs to be used. +/// Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908. It is not +/// possible to have a direct copy in these cases on GFX908, so an intermediate +/// VGPR copy is required. static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -527,10 +554,18 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII, RegScavenger &RS, Register ImpDefSuperReg = Register(), Register ImpUseSuperReg = Register()) { - const SIRegisterInfo &RI = TII.getRegisterInfo(); + assert((TII.getSubtarget().hasMAIInsts() && + !TII.getSubtarget().hasGFX90AInsts()) && + "Expected GFX908 subtarget."); - assert(AMDGPU::SReg_32RegClass.contains(SrcReg) || - AMDGPU::AGPR_32RegClass.contains(SrcReg)); + assert((AMDGPU::SReg_32RegClass.contains(SrcReg) || + AMDGPU::AGPR_32RegClass.contains(SrcReg)) && + "Source register of the copy should be either an SGPR or an AGPR."); + + assert(AMDGPU::AGPR_32RegClass.contains(DestReg) && + "Destination register of the copy should be an AGPR."); + + const SIRegisterInfo &RI = TII.getRegisterInfo(); // First try to find defining accvgpr_write to avoid temporary registers. for (auto Def = MI, E = MBB.begin(); Def != E; ) { @@ -581,23 +616,21 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII, // Registers in the sequence are allocated contiguously so we can just // use register number to pick one of three round-robin temps. - unsigned RegNo = DestReg % 3; - Register Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0); - if (!Tmp) - report_fatal_error("Cannot scavenge VGPR to copy to AGPR"); - RS.setRegUsed(Tmp); - - if (!TII.getSubtarget().hasGFX90AInsts()) { - // Only loop through if there are any free registers left, otherwise - // scavenger may report a fatal error without emergency spill slot - // or spill with the slot. - while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) { - Register Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0); - if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs) - break; - Tmp = Tmp2; - RS.setRegUsed(Tmp); - } + unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3; + Register Tmp = + MBB.getParent()->getInfo()->getVGPRForAGPRCopy(); + assert(MBB.getParent()->getRegInfo().isReserved(Tmp) && + "VGPR used for an intermediate copy should have been reserved."); + + // Only loop through if there are any free registers left, otherwise + // scavenger may report a fatal error without emergency spill slot + // or spill with the slot. + while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) { + Register Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0); + if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs) + break; + Tmp = Tmp2; + RS.setRegUsed(Tmp); } // Insert copy to temporary VGPR. @@ -796,7 +829,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } if (RC == &AMDGPU::AGPR_32RegClass) { - if (AMDGPU::VGPR_32RegClass.contains(SrcReg)) { + if (AMDGPU::VGPR_32RegClass.contains(SrcReg) || + (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) { BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; @@ -884,6 +918,11 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, const TargetRegisterClass *SrcRC = RI.getPhysRegClass(SrcReg); if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) { + if (ST.hasMovB64()) { + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_e32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } if (ST.hasPackedFP32Ops()) { BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestReg) .addImm(SISrcMods::OP_SEL_1) @@ -906,7 +945,9 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } - expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RC, Forward); + const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg); + expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, CanKillSuperReg, RC, + Forward); return; } @@ -915,7 +956,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (RI.isAGPRClass(RC)) { if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC)) Opcode = AMDGPU::V_ACCVGPR_MOV_B32; - else if (RI.hasVGPRs(SrcRC)) + else if (RI.hasVGPRs(SrcRC) || + (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC))) Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64; else Opcode = AMDGPU::INSTRUCTION_LIST_END; @@ -925,7 +967,10 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, (RI.isProperlyAlignedRC(*RC) && (SrcRC == RC || RI.isSGPRClass(SrcRC)))) { // TODO: In 96-bit case, could do a 64-bit mov and then a 32-bit mov. - if (ST.hasPackedFP32Ops()) { + if (ST.hasMovB64()) { + Opcode = AMDGPU::V_MOV_B64_e32; + EltSize = 8; + } else if (ST.hasPackedFP32Ops()) { Opcode = AMDGPU::V_PK_MOV_B32; EltSize = 8; } @@ -1725,13 +1770,8 @@ unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) { case AMDGPU::S_NOP: return MI.getOperand(0).getImm() + 1; - - // FIXME: Any other pseudo instruction? // SI_RETURN_TO_EPILOG is a fallthrough to code outside of the function. The // hazard, even if one exist, won't really be visible. Should we handle it? - case AMDGPU::SI_MASKED_UNREACHABLE: - case AMDGPU::WAVE_BARRIER: - return 0; } } @@ -1807,6 +1847,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { const MachineOperand &SrcOp = MI.getOperand(1); // FIXME: Will this work for 64-bit floating point immediates? assert(!SrcOp.isFPImm()); + if (ST.hasMovB64()) { + MI.setDesc(get(AMDGPU::V_MOV_B64_e32)); + if (!isLiteralConstant(MI, 1) || isUInt<32>(SrcOp.getImm())) + break; + } if (SrcOp.isImm()) { APInt Imm(64, SrcOp.getImm()); APInt Lo(32, Imm.getLoBits(32).getZExtValue()); @@ -1887,6 +1932,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case AMDGPU::V_SET_INACTIVE_B32: { unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + // FIXME: We may possibly optimize the COPY once we find ways to make LLVM + // optimizations (mainly Register Coalescer) aware of WWM register liveness. + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg()) + .add(MI.getOperand(1)); auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec); FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg()) @@ -1899,11 +1948,15 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case AMDGPU::V_SET_INACTIVE_B64: { unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; - auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec); - FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), MI.getOperand(0).getReg()) - .add(MI.getOperand(2)); + .add(MI.getOperand(1)); + expandPostRAPseudo(*Copy); + auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec); + FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten + Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), + MI.getOperand(0).getReg()) + .add(MI.getOperand(2)); expandPostRAPseudo(*Copy); BuildMI(MBB, MI, DL, get(NotOpc), Exec) .addReg(Exec); @@ -2085,6 +2138,23 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64)); break; } + case AMDGPU::SI_RETURN: { + const MachineFunction *MF = MBB.getParent(); + const GCNSubtarget &ST = MF->getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + // Hiding the return address use with SI_RETURN may lead to extra kills in + // the function and missing live-ins. We are fine in practice because callee + // saved register handling ensures the register value is restored before + // RET, but we need the undef flag here to appease the MachineVerifier + // liveness checks. + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, get(AMDGPU::S_SETPC_B64_return)) + .addReg(TRI->getReturnAddressReg(*MF), RegState::Undef); + + MIB.copyImplicitOps(MI); + MI.eraseFromParent(); + break; + } } return true; } @@ -2093,6 +2163,13 @@ std::pair SIInstrInfo::expandMovDPP64(MachineInstr &MI) const { assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); + if (ST.hasMovB64() && + AMDGPU::isLegal64BitDPPControl( + getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) { + MI.setDesc(get(AMDGPU::V_MOV_B64_dpp)); + return std::make_pair(&MI, nullptr); + } + MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MBB.findDebugLoc(MI); MachineFunction *MF = MBB.getParent(); @@ -2789,6 +2866,8 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) { case AMDGPU::V_MOV_B32_e32: case AMDGPU::V_MOV_B32_e64: case AMDGPU::V_MOV_B64_PSEUDO: + case AMDGPU::V_MOV_B64_e32: + case AMDGPU::V_MOV_B64_e64: case AMDGPU::S_MOV_B32: case AMDGPU::S_MOV_B64: case AMDGPU::COPY: @@ -2801,35 +2880,15 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) { } } -unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind( - unsigned Kind) const { - switch(Kind) { - case PseudoSourceValue::Stack: - case PseudoSourceValue::FixedStack: - return AMDGPUAS::PRIVATE_ADDRESS; - case PseudoSourceValue::ConstantPool: - case PseudoSourceValue::GOT: - case PseudoSourceValue::JumpTable: - case PseudoSourceValue::GlobalValueCallEntry: - case PseudoSourceValue::ExternalSymbolCallEntry: - case PseudoSourceValue::TargetCustom: - return AMDGPUAS::CONSTANT_ADDRESS; - } - return AMDGPUAS::FLAT_ADDRESS; -} +static constexpr unsigned ModifierOpNames[] = { + AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp, + AMDGPU::OpName::omod}; -static void removeModOperands(MachineInstr &MI) { +void SIInstrInfo::removeModOperands(MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); - int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, - AMDGPU::OpName::src0_modifiers); - int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, - AMDGPU::OpName::src1_modifiers); - int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc, - AMDGPU::OpName::src2_modifiers); - - MI.RemoveOperand(Src2ModIdx); - MI.RemoveOperand(Src1ModIdx); - MI.RemoveOperand(Src0ModIdx); + for (unsigned Name : reverse(ModifierOpNames)) + MI.removeOperand(AMDGPU::getNamedOperandIdx(Opc, Name)); } bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, @@ -2841,7 +2900,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, default: return false; case AMDGPU::S_MOV_B64: - // TODO: We could fold 64-bit immediates, but this get compilicated + // TODO: We could fold 64-bit immediates, but this get complicated // when there are sub-registers. return false; @@ -2921,7 +2980,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2); // Multiplied part is the constant: Use v_madmk_{f16, f32}. - // We should only expect these to be on src0 due to canonicalizations. + // We should only expect these to be on src0 due to canonicalization. if (Src0->isReg() && Src0->getReg() == Reg) { if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) return false; @@ -2942,12 +3001,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. - // Remove these first since they are at the end. - UseMI.RemoveOperand( - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); - UseMI.RemoveOperand( - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); - Register Src1Reg = Src1->getReg(); unsigned Src1SubReg = Src1->getSubReg(); Src0->setReg(Src1Reg); @@ -2966,7 +3019,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, removeModOperands(UseMI); UseMI.setDesc(get(NewOpc)); - bool DeleteDef = MRI->hasOneNonDBGUse(Reg); + bool DeleteDef = MRI->use_nodbg_empty(Reg); if (DeleteDef) DefMI.eraseFromParent(); @@ -3025,12 +3078,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. - // Remove these first since they are at the end. - UseMI.RemoveOperand( - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); - UseMI.RemoveOperand( - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); - if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || @@ -3049,7 +3096,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, // constant and SGPR are illegal. legalizeOperands(UseMI); - bool DeleteDef = MRI->hasOneNonDBGUse(Reg); + bool DeleteDef = MRI->use_nodbg_empty(Reg); if (DeleteDef) DefMI.eraseFromParent(); @@ -3192,34 +3239,68 @@ static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const { + MachineBasicBlock &MBB = *MI.getParent(); unsigned Opc = MI.getOpcode(); - bool IsF16 = false; + + // Handle MFMA. + int NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(Opc); + if (NewMFMAOpc != -1) { + MachineInstrBuilder MIB = + BuildMI(MBB, MI, MI.getDebugLoc(), get(NewMFMAOpc)); + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) + MIB.add(MI.getOperand(I)); + updateLiveVariables(LV, MI, *MIB); + if (LIS) + LIS->ReplaceMachineInstrInMaps(MI, *MIB); + return MIB; + } + + if (SIInstrInfo::isWMMA(MI)) { + unsigned NewOpc = AMDGPU::mapWMMA2AddrTo3AddrOpcode(MI.getOpcode()); + MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) + .setMIFlags(MI.getFlags()); + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) + MIB->addOperand(MI.getOperand(I)); + + updateLiveVariables(LV, MI, *MIB); + if (LIS) + LIS->ReplaceMachineInstrInMaps(MI, *MIB); + + return MIB; + } + + // Handle MAC/FMAC. + bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 || + Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64; bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 || + Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 || + Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; - int NewMFMAOpc = -1; + bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 || + Opc == AMDGPU::V_MAC_LEGACY_F32_e64 || + Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 || + Opc == AMDGPU::V_FMAC_LEGACY_F32_e64; + bool Src0Literal = false; switch (Opc) { default: - NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(Opc); - if (NewMFMAOpc == -1) - return nullptr; - break; + return nullptr; case AMDGPU::V_MAC_F16_e64: case AMDGPU::V_FMAC_F16_e64: - IsF16 = true; - LLVM_FALLTHROUGH; case AMDGPU::V_MAC_F32_e64: + case AMDGPU::V_MAC_LEGACY_F32_e64: case AMDGPU::V_FMAC_F32_e64: + case AMDGPU::V_FMAC_LEGACY_F32_e64: case AMDGPU::V_FMAC_F64_e64: break; case AMDGPU::V_MAC_F16_e32: case AMDGPU::V_FMAC_F16_e32: - IsF16 = true; - LLVM_FALLTHROUGH; case AMDGPU::V_MAC_F32_e32: + case AMDGPU::V_MAC_LEGACY_F32_e32: case AMDGPU::V_FMAC_F32_e32: + case AMDGPU::V_FMAC_LEGACY_F32_e32: case AMDGPU::V_FMAC_F64_e32: { int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); @@ -3228,25 +3309,13 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, return nullptr; if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0)) - return nullptr; + Src0Literal = true; break; } } MachineInstrBuilder MIB; - MachineBasicBlock &MBB = *MI.getParent(); - - if (NewMFMAOpc != -1) { - MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewMFMAOpc)); - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) - MIB.add(MI.getOperand(I)); - updateLiveVariables(LV, MI, *MIB); - if (LIS) - LIS->ReplaceMachineInstrInMaps(MI, *MIB); - return MIB; - } - const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0); const MachineOperand *Src0Mods = @@ -3255,10 +3324,13 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, const MachineOperand *Src1Mods = getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); + const MachineOperand *Src2Mods = + getNamedOperand(MI, AMDGPU::OpName::src2_modifiers); const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod); - if (!Src0Mods && !Src1Mods && !Clamp && !Omod && !IsF64 && + if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 && + !IsLegacy && // If we have an SGPR input, we will violate the constant bus restriction. (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { @@ -3271,11 +3343,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, // We cannot just remove the DefMI here, calling pass will crash. DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF)); for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I) - DefMI->RemoveOperand(I); + DefMI->removeOperand(I); }; int64_t Imm; - if (getFoldableImm(Src2, Imm, &DefMI)) { + if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) { unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32) : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32); @@ -3295,7 +3367,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32) : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); - if (getFoldableImm(Src1, Imm, &DefMI)) { + if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) { if (pseudoToMCOpcode(NewOpc) != -1) { MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) .add(*Dst) @@ -3309,7 +3381,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, return MIB; } } - if (getFoldableImm(Src0, Imm, &DefMI)) { + if (Src0Literal || getFoldableImm(Src0, Imm, &DefMI)) { + if (Src0Literal) { + Imm = Src0->getImm(); + DefMI = nullptr; + } if (pseudoToMCOpcode(NewOpc) != -1 && isOperandLegal( MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0), @@ -3322,16 +3398,27 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, updateLiveVariables(LV, MI, *MIB); if (LIS) LIS->ReplaceMachineInstrInMaps(MI, *MIB); - killDef(); + if (DefMI) + killDef(); return MIB; } } } - unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64 - : IsF64 ? AMDGPU::V_FMA_F64_e64 - : AMDGPU::V_FMA_F32_e64) - : (IsF16 ? AMDGPU::V_MAD_F16_e64 : AMDGPU::V_MAD_F32_e64); + // VOP2 mac/fmac with a literal operand cannot be converted to VOP3 mad/fma + // because VOP3 does not allow a literal operand. + // TODO: Remove this restriction for GFX10. + if (Src0Literal) + return nullptr; + + unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64 + : IsF64 ? AMDGPU::V_FMA_F64_e64 + : IsLegacy + ? AMDGPU::V_FMA_LEGACY_F32_e64 + : AMDGPU::V_FMA_F32_e64 + : IsF16 ? AMDGPU::V_MAD_F16_e64 + : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64 + : AMDGPU::V_MAD_F32_e64; if (pseudoToMCOpcode(NewOpc) == -1) return nullptr; @@ -3341,7 +3428,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, .add(*Src0) .addImm(Src1Mods ? Src1Mods->getImm() : 0) .add(*Src1) - .addImm(0) // Src mods + .addImm(Src2Mods ? Src2Mods->getImm() : 0) .add(*Src2) .addImm(Clamp ? Clamp->getImm() : 0) .addImm(Omod ? Omod->getImm() : 0); @@ -3383,6 +3470,9 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) return true; + if (MI.getOpcode() == AMDGPU::SCHED_BARRIER && MI.getOperand(0).getImm() == 0) + return true; + // Target-independent instructions do not have an implicit-use of EXEC, even // when they operate on VGPRs. Treating EXEC modifications as scheduling // boundaries prevents incorrect movements of such instructions. @@ -3676,11 +3766,8 @@ bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, } bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const { - return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) || - hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) || - hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) || - hasModifiersSet(MI, AMDGPU::OpName::clamp) || - hasModifiersSet(MI, AMDGPU::OpName::omod); + return any_of(ModifierOpNames, + [&](unsigned Name) { return hasModifiersSet(MI, Name); }); } bool SIInstrInfo::canShrink(const MachineInstr &MI, @@ -3754,18 +3841,19 @@ static void copyFlagsToImplicitVCC(MachineInstr &MI, MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI, unsigned Op32) const { - MachineBasicBlock *MBB = MI.getParent();; + MachineBasicBlock *MBB = MI.getParent(); MachineInstrBuilder Inst32 = BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32)) .setMIFlags(MI.getFlags()); // Add the dst operand if the 32-bit encoding also has an explicit $vdst. // For VOPC instructions, this is replaced by an implicit def of vcc. - int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst); - if (Op32DstIdx != -1) { + if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst) != -1) { // dst Inst32.add(MI.getOperand(0)); - } else { + } else if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::sdst) != -1) { + // VOPCX instructions won't be writing to an explicit dst, so this should + // not fail for these instructions. assert(((MI.getOperand(0).getReg() == AMDGPU::VCC) || (MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) && "Unexpected case"); @@ -3816,7 +3904,7 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); // Null is free - if (MO.getReg() == AMDGPU::SGPR_NULL) + if (MO.getReg() == AMDGPU::SGPR_NULL || MO.getReg() == AMDGPU::SGPR_NULL64) return false; // SGPRs use the constant bus @@ -3951,6 +4039,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: + case AMDGPU::OPERAND_REG_IMM_V2FP32: break; case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: @@ -4031,9 +4120,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); - const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx }; - - for (int OpIdx: OpIndicies) { + for (int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) { if (OpIdx == -1) continue; const MachineOperand &MO = MI.getOperand(OpIdx); @@ -4150,24 +4237,25 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } // Verify VOP*. Ignore multiple sgpr operands on writelane. - if (Desc.getOpcode() != AMDGPU::V_WRITELANE_B32 - && (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI))) { - // Only look at the true operands. Only a real operand can use the constant - // bus, and we don't want to check pseudo-operands like the source modifier - // flags. - const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; - + if (isVALU(MI) && Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) { unsigned ConstantBusCount = 0; bool UsesLiteral = false; const MachineOperand *LiteralVal = nullptr; - if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) + int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); + if (ImmIdx != -1) { ++ConstantBusCount; + UsesLiteral = true; + LiteralVal = &MI.getOperand(ImmIdx); + } SmallVector SGPRsUsed; Register SGPRUsed; - for (int OpIdx : OpIndices) { + // Only look at the true operands. Only a real operand can use the constant + // bus, and we don't want to check pseudo-operands like the source modifier + // flags. + for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) { if (OpIdx == -1) break; const MachineOperand &MO = MI.getOperand(OpIdx); @@ -4186,8 +4274,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, UsesLiteral = true; LiteralVal = &MO; } else if (!MO.isIdenticalTo(*LiteralVal)) { - assert(isVOP3(MI)); - ErrInfo = "VOP3 instruction uses more than one literal"; + assert(isVOP2(MI) || isVOP3(MI)); + ErrInfo = "VOP2/VOP3 instruction uses more than one literal"; return false; } } @@ -4196,7 +4284,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, SGPRUsed = findImplicitSGPRRead(MI); if (SGPRUsed != AMDGPU::NoRegister) { - // Implicit uses may safely overlap true overands + // Implicit uses may safely overlap true operands if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) { return !RI.regsOverlap(SGPRUsed, SGPR); })) { @@ -4225,7 +4313,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, unsigned SGPRCount = 0; Register SGPRUsed = AMDGPU::NoRegister; - for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) { + for (int OpIdx : {Src0Idx, Src1Idx}) { if (OpIdx == -1) break; @@ -4272,16 +4360,11 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, if (isSOP2(MI) || isSOPC(MI)) { const MachineOperand &Src0 = MI.getOperand(Src0Idx); const MachineOperand &Src1 = MI.getOperand(Src1Idx); - unsigned Immediates = 0; - if (!Src0.isReg() && - !isInlineConstant(Src0, Desc.OpInfo[Src0Idx].OperandType)) - Immediates++; - if (!Src1.isReg() && - !isInlineConstant(Src1, Desc.OpInfo[Src1Idx].OperandType)) - Immediates++; - - if (Immediates > 1) { + if (!Src0.isReg() && !Src1.isReg() && + !isInlineConstant(Src0, Desc.OpInfo[Src0Idx].OperandType) && + !isInlineConstant(Src1, Desc.OpInfo[Src1Idx].OperandType) && + !Src0.isIdenticalTo(Src1)) { ErrInfo = "SOP2/SOPC instruction requires too many immediate constants"; return false; } @@ -4364,10 +4447,11 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } if (isSMRD(MI)) { - if (MI.mayStore()) { + if (MI.mayStore() && + ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) { // The register offset form of scalar stores may only use m0 as the // soffset register. - const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff); + const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soffset); if (Soff && Soff->getReg() != AMDGPU::M0) { ErrInfo = "scalar stores must use m0 as offset register"; return false; @@ -4477,7 +4561,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); - int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO && ((DstIdx >= 0 && @@ -4527,24 +4610,45 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } - if (ST.needsAlignedVGPRs() && - (MI.getOpcode() == AMDGPU::DS_GWS_INIT || - MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR || - MI.getOpcode() == AMDGPU::DS_GWS_BARRIER)) { - const MachineOperand *Op = getNamedOperand(MI, AMDGPU::OpName::data0); - Register Reg = Op->getReg(); - bool Aligned = true; - if (Reg.isPhysical()) { - Aligned = !(RI.getHWRegIndex(Reg) & 1); - } else { + if (ST.needsAlignedVGPRs()) { + const auto isAlignedReg = [&MI, &MRI, this](unsigned OpName) -> bool { + const MachineOperand *Op = getNamedOperand(MI, OpName); + if (!Op) + return true; + Register Reg = Op->getReg(); + if (Reg.isPhysical()) + return !(RI.getHWRegIndex(Reg) & 1); const TargetRegisterClass &RC = *MRI.getRegClass(Reg); - Aligned = RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) && - !(RI.getChannelFromSubReg(Op->getSubReg()) & 1); + return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) && + !(RI.getChannelFromSubReg(Op->getSubReg()) & 1); + }; + + if (MI.getOpcode() == AMDGPU::DS_GWS_INIT || + MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR || + MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) { + + if (!isAlignedReg(AMDGPU::OpName::data0)) { + ErrInfo = "Subtarget requires even aligned vector registers " + "for DS_GWS instructions"; + return false; + } + } + + if (isMIMG(MI)) { + if (!isAlignedReg(AMDGPU::OpName::vaddr)) { + ErrInfo = "Subtarget requires even aligned vector registers " + "for vaddr operand of image instructions"; + return false; + } } + } - if (!Aligned) { - ErrInfo = "Subtarget requires even aligned vector registers " - "for DS_GWS instructions"; + if (MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && + !ST.hasGFX90AInsts()) { + const MachineOperand *Src = getNamedOperand(MI, AMDGPU::OpName::src0); + if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) { + ErrInfo = "Invalid register class: " + "v_accvgpr_write with an SGPR is not supported on this GPU"; return false; } } @@ -4641,26 +4745,40 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { "Unexpected scalar opcode without corresponding vector one!"); } -static unsigned adjustAllocatableRegClass(const GCNSubtarget &ST, - const MachineRegisterInfo &MRI, - const MCInstrDesc &TID, - unsigned RCID, - bool IsAllocatable) { +static const TargetRegisterClass * +adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, + const MachineRegisterInfo &MRI, + const MCInstrDesc &TID, unsigned RCID, + bool IsAllocatable) { if ((IsAllocatable || !ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) && (((TID.mayLoad() || TID.mayStore()) && !(TID.TSFlags & SIInstrFlags::VGPRSpill)) || (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::MIMG)))) { switch (RCID) { - case AMDGPU::AV_32RegClassID: return AMDGPU::VGPR_32RegClassID; - case AMDGPU::AV_64RegClassID: return AMDGPU::VReg_64RegClassID; - case AMDGPU::AV_96RegClassID: return AMDGPU::VReg_96RegClassID; - case AMDGPU::AV_128RegClassID: return AMDGPU::VReg_128RegClassID; - case AMDGPU::AV_160RegClassID: return AMDGPU::VReg_160RegClassID; + case AMDGPU::AV_32RegClassID: + RCID = AMDGPU::VGPR_32RegClassID; + break; + case AMDGPU::AV_64RegClassID: + RCID = AMDGPU::VReg_64RegClassID; + break; + case AMDGPU::AV_96RegClassID: + RCID = AMDGPU::VReg_96RegClassID; + break; + case AMDGPU::AV_128RegClassID: + RCID = AMDGPU::VReg_128RegClassID; + break; + case AMDGPU::AV_160RegClassID: + RCID = AMDGPU::VReg_160RegClassID; + break; + case AMDGPU::AV_512RegClassID: + RCID = AMDGPU::VReg_512RegClassID; + break; default: break; } } - return RCID; + + return RI.getProperlyAlignedRC(RI.getRegClass(RCID)); } const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, @@ -4673,7 +4791,7 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, bool IsAllocatable = false; if (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::FLAT)) { // vdst and vdata should be both VGPR or AGPR, same for the DS instructions - // with two data operands. Request register class constainted to VGPR only + // with two data operands. Request register class constrained to VGPR only // of both operands present as Machine Copy Propagation can not check this // constraint and possibly other passes too. // @@ -4690,9 +4808,8 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, AMDGPU::OpName::data1) != -1; } } - RegClass = adjustAllocatableRegClass(ST, MF.getRegInfo(), TID, RegClass, - IsAllocatable); - return RI.getRegClass(RegClass); + return adjustAllocatableRegClass(ST, RI, MF.getRegInfo(), TID, RegClass, + IsAllocatable); } const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, @@ -4709,8 +4826,7 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, } unsigned RCID = Desc.OpInfo[OpNo].RegClass; - RCID = adjustAllocatableRegClass(ST, MRI, Desc, RCID, true); - return RI.getRegClass(RCID); + return adjustAllocatableRegClass(ST, RI, MRI, Desc, RCID, true); } void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { @@ -4797,7 +4913,7 @@ MachineOperand SIInstrInfo::buildExtractSubRegOrImm( void SIInstrInfo::swapOperands(MachineInstr &Inst) const { assert(Inst.getNumExplicitOperands() == 3); MachineOperand Op1 = Inst.getOperand(1); - Inst.RemoveOperand(1); + Inst.removeOperand(1); Inst.addOperand(Op1); } @@ -4851,9 +4967,9 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, MO = &MI.getOperand(OpIdx); int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode()); - int VOP3LiteralLimit = ST.hasVOP3Literal() ? 1 : 0; + int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0; if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) { - if (isVOP3(MI) && isLiteralConstantLike(*MO, OpInfo) && !VOP3LiteralLimit--) + if (isLiteralConstantLike(*MO, OpInfo) && !LiteralLimit--) return false; SmallDenseSet SGPRsUsed; @@ -4872,12 +4988,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, return false; SGPRsUsed.insert(SGPR); } - } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) { - if (--ConstantBusLimit <= 0) - return false; - } else if (isVOP3(MI) && AMDGPU::isSISrcOperand(InstDesc, i) && - isLiteralConstantLike(Op, InstDesc.OpInfo[i])) { - if (!VOP3LiteralLimit--) + } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32 || + (AMDGPU::isSISrcOperand(InstDesc, i) && + isLiteralConstantLike(Op, InstDesc.OpInfo[i]))) { + if (!LiteralLimit--) return false; if (--ConstantBusLimit <= 0) return false; @@ -4886,7 +5000,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, } if (MO->isReg()) { - assert(DefinedRC); + if (!DefinedRC) { + // This operand allows any register. + return true; + } if (!isLegalRegOperand(MRI, OpInfo, *MO)) return false; bool IsAGPR = RI.isAGPR(MRI, MO->getReg()); @@ -4916,7 +5033,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR) return false; } - if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && + if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() && (int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) && RI.isSGPRReg(MRI, MO->getReg())) return false; @@ -5186,7 +5303,7 @@ void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI, Register SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI); SBase->setReg(SGPR); } - MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff); + MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soffset); if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) { Register SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI); SOff->setReg(SGPR); @@ -5232,16 +5349,16 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const { const MCInstrDesc &NewDesc = get(NewOpc); Inst.setDesc(NewDesc); - // Callers expect interator to be valid after this call, so modify the + // Callers expect iterator to be valid after this call, so modify the // instruction in place. if (OldVAddrIdx == NewVAddrIdx) { MachineOperand &NewVAddr = Inst.getOperand(NewVAddrIdx); // Clear use list from the old vaddr holding a zero register. MRI.removeRegOperandFromUseList(&NewVAddr); MRI.moveOperands(&NewVAddr, &SAddr, 1); - Inst.RemoveOperand(OldSAddrIdx); + Inst.removeOperand(OldSAddrIdx); // Update the use list with the pointer we have just moved from vaddr to - // saddr poisition. Otherwise new vaddr will be missing from the use list. + // saddr position. Otherwise new vaddr will be missing from the use list. MRI.removeRegOperandFromUseList(&NewVAddr); MRI.addRegOperandToUseList(&NewVAddr); } else { @@ -5251,14 +5368,14 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const { int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in); - // RemoveOperand doesn't try to fixup tied operand indexes at it goes, so + // removeOperand doesn't try to fixup tied operand indexes at it goes, so // it asserts. Untie the operands for now and retie them afterwards. if (NewVDstIn != -1) { int OldVDstIn = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in); Inst.untieRegOperand(OldVDstIn); } - Inst.RemoveOperand(OldVAddrIdx); + Inst.removeOperand(OldVAddrIdx); if (NewVDstIn != -1) { int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst); @@ -5340,7 +5457,8 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, static void emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, - const DebugLoc &DL, MachineOperand &Rsrc) { + MachineBasicBlock &BodyBB, const DebugLoc &DL, + MachineOperand &Rsrc) { MachineFunction &MF = *OrigBB.getParent(); const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); @@ -5398,7 +5516,7 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, else Cmp.addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx, 2)); - // Combine the comparision results with AND. + // Combine the comparison results with AND. if (CondReg == AMDGPU::NoRegister) // First. CondReg = NewCondReg; else { // If not the first, we create an AND. @@ -5433,14 +5551,14 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, .addReg(CondReg, RegState::Kill); // The original instruction is here; we insert the terminators after it. - I = LoopBB.end(); + I = BodyBB.end(); // Update EXEC, switch all done bits to 0 and all todo bits to 1. - BuildMI(LoopBB, I, DL, TII.get(XorTermOpc), Exec) + BuildMI(BodyBB, I, DL, TII.get(XorTermOpc), Exec) .addReg(Exec) .addReg(SaveExec); - BuildMI(LoopBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB); + BuildMI(BodyBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB); } // Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register @@ -5487,31 +5605,35 @@ loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, // To insert the loop we need to split the block. Move everything after this // point to a new block, and insert a new empty block between the two. MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *BodyBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock(); MachineFunction::iterator MBBI(MBB); ++MBBI; MF.insert(MBBI, LoopBB); + MF.insert(MBBI, BodyBB); MF.insert(MBBI, RemainderBB); - LoopBB->addSuccessor(LoopBB); - LoopBB->addSuccessor(RemainderBB); + LoopBB->addSuccessor(BodyBB); + BodyBB->addSuccessor(LoopBB); + BodyBB->addSuccessor(RemainderBB); - // Move Begin to MI to the LoopBB, and the remainder of the block to + // Move Begin to MI to the BodyBB, and the remainder of the block to // RemainderBB. RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB); RemainderBB->splice(RemainderBB->begin(), &MBB, End, MBB.end()); - LoopBB->splice(LoopBB->begin(), &MBB, Begin, MBB.end()); + BodyBB->splice(BodyBB->begin(), &MBB, Begin, MBB.end()); MBB.addSuccessor(LoopBB); // Update dominators. We know that MBB immediately dominates LoopBB, that - // LoopBB immediately dominates RemainderBB, and that RemainderBB immediately - // dominates all of the successors transferred to it from MBB that MBB used - // to properly dominate. + // LoopBB immediately dominates BodyBB, and BodyBB immediately dominates + // RemainderBB. RemainderBB immediately dominates all of the successors + // transferred to it from MBB that MBB used to properly dominate. if (MDT) { MDT->addNewBlock(LoopBB, &MBB); - MDT->addNewBlock(RemainderBB, LoopBB); + MDT->addNewBlock(BodyBB, LoopBB); + MDT->addNewBlock(RemainderBB, BodyBB); for (auto &Succ : RemainderBB->successors()) { if (MDT->properlyDominates(&MBB, Succ)) { MDT->changeImmediateDominator(Succ, RemainderBB); @@ -5519,12 +5641,12 @@ loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, } } - emitLoadSRsrcFromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, Rsrc); + emitLoadSRsrcFromVGPRLoop(TII, MRI, MBB, *LoopBB, *BodyBB, DL, Rsrc); // Restore the EXEC mask MachineBasicBlock::iterator First = RemainderBB->begin(); BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc), Exec).addReg(SaveExec); - return LoopBB; + return BodyBB; } // Extract pointer from Rsrc and return a zero-value Rsrc replacement. @@ -5762,7 +5884,7 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI, if (RI.getCommonSubClass(MRI.getRegClass(Rsrc->getReg()), RI.getRegClass(RsrcRC))) { // The operands are legal. - // FIXME: We may need to legalize operands besided srsrc. + // FIXME: We may need to legalize operands besides srsrc. return CreatedBB; } @@ -5836,7 +5958,7 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI, MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset); unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode()); - // Atomics rith return have have an additional tied operand and are + // Atomics with return have an additional tied operand and are // missing some of the special bits. MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in); MachineInstr *Addr64; @@ -6050,7 +6172,7 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(Opc), VCC) .addReg(EXEC) .addReg(IsSCC ? VCC : CondReg); - Inst.RemoveOperand(1); + Inst.removeOperand(1); } break; @@ -6060,6 +6182,7 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, case AMDGPU::S_PACK_LL_B32_B16: case AMDGPU::S_PACK_LH_B32_B16: + case AMDGPU::S_PACK_HL_B32_B16: case AMDGPU::S_PACK_HH_B32_B16: movePackToVALU(Worklist, MRI, Inst); Inst.eraseFromParent(); @@ -6217,7 +6340,7 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, addSCCDefUsersToVALUWorklist(Op, Inst, Worklist); if (Op.isUse()) addSCCDefsToVALUWorklist(Op, Worklist); - Inst.RemoveOperand(i); + Inst.removeOperand(i); } } @@ -6247,7 +6370,7 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. - Inst.RemoveOperand(2); // Remove old immediate. + Inst.removeOperand(2); // Remove old immediate. Inst.addOperand(MachineOperand::CreateImm(Offset)); Inst.addOperand(MachineOperand::CreateImm(BitWidth)); } @@ -6281,7 +6404,7 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, // these are deleted later, but at -O0 it would leave a suspicious // looking illegal copy of an undef register. for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I) - Inst.RemoveOperand(I); + Inst.removeOperand(I); Inst.setDesc(get(AMDGPU::IMPLICIT_DEF)); continue; } @@ -6323,7 +6446,7 @@ SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst, AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64; assert(Inst.getOperand(3).getReg() == AMDGPU::SCC); - Inst.RemoveOperand(3); + Inst.removeOperand(3); Inst.setDesc(get(NewOpc)); Inst.addOperand(MachineOperand::CreateImm(0)); // clamp bit @@ -6467,7 +6590,7 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist, // Using the identity !(x ^ y) == (!x ^ y) == (x ^ !y), we can // invert either source and then perform the XOR. If either source is a // scalar register, then we can leave the inversion on the scalar unit to - // acheive a better distrubution of scalar and vector instructions. + // achieve a better distribution of scalar and vector instructions. bool Src0IsSGPR = Src0.isReg() && RI.isSGPRClass(MRI.getRegClass(Src0.getReg())); bool Src1IsSGPR = Src1.isReg() && @@ -6689,7 +6812,7 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist, legalizeOperands(*LoHalf, MDT); legalizeOperands(*HiHalf, MDT); - // Move all users of this moved vlaue. + // Move all users of this moved value. addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); } @@ -6753,7 +6876,7 @@ void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist, Worklist.insert(&LoHalf); Worklist.insert(&HiHalf); - // Move all users of this moved vlaue. + // Move all users of this moved value. addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); } @@ -6831,7 +6954,7 @@ void SIInstrInfo::splitScalar64BitBCNT( MRI.replaceRegWith(Dest.getReg(), ResultReg); - // We don't need to legalize operands here. src0 for etiher instruction can be + // We don't need to legalize operands here. src0 for either instruction can be // an SGPR, and the second input is unused or determined here. addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } @@ -6973,6 +7096,17 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, .add(Src1); break; } + case AMDGPU::S_PACK_HL_B32_B16: { + Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) + .addImm(16) + .add(Src0); + BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32_e64), ResultReg) + .add(Src1) + .addImm(16) + .addReg(TmpReg, RegState::Kill); + break; + } case AMDGPU::S_PACK_HH_B32_B16: { Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -7045,7 +7179,7 @@ void SIInstrInfo::addSCCDefsToVALUWorklist(MachineOperand &Op, assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isUse()); MachineInstr *SCCUseInst = Op.getParent(); - // Look for a preceeding instruction that either defines VCC or SCC. If VCC + // Look for a preceding instruction that either defines VCC or SCC. If VCC // then there is nothing to do because the defining instruction has been // converted to a VALU already. If SCC then that instruction needs to be // converted to a VALU. @@ -7191,7 +7325,10 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { - return (AMDGPU::MTBUFFormat::UFMT_32_FLOAT << 44) | + int64_t Format = ST.getGeneration() >= AMDGPUSubtarget::GFX11 ? + AMDGPU::UfmtGFX11::UFMT_32_FLOAT : + AMDGPU::UfmtGFX10::UFMT_32_FLOAT; + return (Format << 44) | (1ULL << 56) | // RESOURCE_LEVEL = 1 (3ULL << 60); // OOB_SELECT = 3 } @@ -7332,7 +7469,9 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { return DescSize; bool HasLiteral = false; for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) { - if (isLiteralConstant(MI, I)) { + const MachineOperand &Op = MI.getOperand(I); + const MCOperandInfo &OpInfo = Desc.OpInfo[I]; + if (isLiteralConstantLike(Op, OpInfo)) { HasLiteral = true; break; } @@ -7513,6 +7652,16 @@ SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { return makeArrayRef(TargetFlags); } +ArrayRef> +SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const { + static const std::pair TargetFlags[] = + { + {MONoClobber, "amdgpu-noclobber"}, + }; + + return makeArrayRef(TargetFlags); +} + bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const { return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY && MI.modifiesRegister(AMDGPU::EXEC, &RI); @@ -7690,6 +7839,7 @@ SIInstrInfo::splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, } // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td +// and the columns of the getMCOpcodeGen table. enum SIEncodingFamily { SI = 0, VI = 1, @@ -7699,7 +7849,9 @@ enum SIEncodingFamily { GFX9 = 5, GFX10 = 6, SDWA10 = 7, - GFX90A = 8 + GFX90A = 8, + GFX940 = 9, + GFX11 = 10, }; static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) { @@ -7714,6 +7866,8 @@ static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) { return SIEncodingFamily::VI; case AMDGPUSubtarget::GFX10: return SIEncodingFamily::GFX10; + case AMDGPUSubtarget::GFX11: + return SIEncodingFamily::GFX11; } llvm_unreachable("Unknown subtarget generation!"); } @@ -7779,6 +7933,9 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { if (ST.hasGFX90AInsts()) { uint16_t NMCOp = (uint16_t)-1; + if (ST.hasGFX940Insts()) + NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX940); + if (NMCOp == (uint16_t)-1) NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX90A); if (NMCOp == (uint16_t)-1) NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX9); @@ -7925,7 +8082,7 @@ bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, auto &UseInst = *Use.getParent(); // Don't bother searching between blocks, although it is possible this block // doesn't modify exec. - if (UseInst.getParent() != DefBB) + if (UseInst.getParent() != DefBB || UseInst.isPHI()) return true; if (++NumUse > MaxUseScan) @@ -8150,7 +8307,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, this](int64_t ExpectedValue, unsigned SrcSize, - bool IsReversable, bool IsSigned) -> bool { + bool IsReversible, bool IsSigned) -> bool { // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n // s_cmp_eq_i32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n // s_cmp_ge_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n @@ -8208,7 +8365,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, bool IsReversedCC = false; if (CmpValue != ExpectedValue) { - if (!IsReversable) + if (!IsReversible) return false; IsReversedCC = CmpValue == (ExpectedValue ^ Mask); if (!IsReversedCC) @@ -8284,3 +8441,37 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, return false; } + +void SIInstrInfo::enforceOperandRCAlignment(MachineInstr &MI, + unsigned OpName) const { + if (!ST.needsAlignedVGPRs()) + return; + + int OpNo = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); + if (OpNo < 0) + return; + MachineOperand &Op = MI.getOperand(OpNo); + if (getOpSize(MI, OpNo) > 4) + return; + + // Add implicit aligned super-reg to force alignment on the data operand. + const DebugLoc &DL = MI.getDebugLoc(); + MachineBasicBlock *BB = MI.getParent(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + Register DataReg = Op.getReg(); + bool IsAGPR = RI.isAGPR(MRI, DataReg); + Register Undef = MRI.createVirtualRegister( + IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass); + BuildMI(*BB, MI, DL, get(AMDGPU::IMPLICIT_DEF), Undef); + Register NewVR = + MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass + : &AMDGPU::VReg_64_Align2RegClass); + BuildMI(*BB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewVR) + .addReg(DataReg, 0, Op.getSubReg()) + .addImm(AMDGPU::sub0) + .addReg(Undef) + .addImm(AMDGPU::sub1); + Op.setReg(NewVR); + Op.setSubReg(AMDGPU::sub0); + MI.addOperand(MachineOperand::CreateReg(NewVR, false, true)); +} diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index e551d6c7223f..311f9f68e675 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H #include "AMDGPUMIRFormatter.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIRegisterInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SetVector.h" @@ -35,6 +36,11 @@ class RegScavenger; class TargetRegisterClass; class ScheduleHazardRecognizer; +/// Mark the MMO of a uniform load if there are no potentially clobbering stores +/// on any path from the start of an entry function to this load. +static const MachineMemOperand::Flags MONoClobber = + MachineMemOperand::MOTargetFlag1; + class SIInstrInfo final : public AMDGPUGenInstrInfo { private: const SIRegisterInfo RI; @@ -323,15 +329,14 @@ public: Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override; - unsigned getAddressSpaceForPseudoSourceKind( - unsigned Kind) const override; - bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override; static bool isFoldableCopy(const MachineInstr &MI); + void removeModOperands(MachineInstr &MI) const; + bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final; @@ -549,6 +554,14 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::EXP; } + static bool isDualSourceBlendEXP(const MachineInstr &MI) { + if (!isEXP(MI)) + return false; + unsigned Target = MI.getOperand(0).getImm(); + return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 || + Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1; + } + bool isEXP(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::EXP; } @@ -651,14 +664,43 @@ public: return get(Opcode).TSFlags & SIInstrFlags::IsMAI; } + static bool isMFMA(const MachineInstr &MI) { + return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && + MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; + } + static bool isDOT(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::IsDOT; } + static bool isWMMA(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA; + } + + bool isWMMA(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::IsWMMA; + } + bool isDOT(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::IsDOT; } + static bool isLDSDIR(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR; + } + + bool isLDSDIR(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::LDSDIR; + } + + static bool isVINTERP(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::VINTERP; + } + + bool isVINTERP(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::VINTERP; + } + static bool isScalarUnit(const MachineInstr &MI) { return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); } @@ -1036,6 +1078,9 @@ public: ArrayRef> getSerializableDirectMachineOperandTargetFlags() const override; + ArrayRef> + getSerializableMachineMemOperandTargetFlags() const override; + ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override; @@ -1132,6 +1177,11 @@ public: static unsigned getDSShaderTypeValue(const MachineFunction &MF); const TargetSchedModel &getSchedModel() const { return SchedModel; } + + // Enforce operand's \p OpName even alignment if required by target. + // This is used if an operand is a 32 bit register but needs to be aligned + // regardless. + void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const; }; /// \brief Returns true if a reg:subreg pair P has a TRC class @@ -1209,9 +1259,6 @@ namespace AMDGPU { LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode); - LLVM_READONLY - int getMUBUFNoLdsInst(uint16_t Opcode); - LLVM_READONLY int getAtomicNoRetOp(uint16_t Opcode); @@ -1236,6 +1283,11 @@ namespace AMDGPU { LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode); + /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode + /// of an SVS (SADDR + VADDR) form. + LLVM_READONLY + int getFlatScratchInstSVfromSVS(uint16_t Opcode); + /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode /// of an SV (VADDR) form. LLVM_READONLY @@ -1250,6 +1302,10 @@ namespace AMDGPU { LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode); + /// \returns v_cmpx version of a v_cmp instruction. + LLVM_READONLY + int getVCMPXOpFromVCMP(uint16_t Opcode); + const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 713a08907e99..29ee9f12b12d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1,4 +1,4 @@ -//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===// +//===-- SIInstrInfo.td -----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -17,7 +17,8 @@ class GCNPredicateControl : PredicateControl { } // Except for the NONE field, this must be kept in sync with the -// SIEncodingFamily enum in AMDGPUInstrInfo.cpp +// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the +// getMCOpcodeGen table. def SIEncodingFamily { int NONE = -1; int SI = 0; @@ -29,6 +30,8 @@ def SIEncodingFamily { int GFX10 = 6; int SDWA10 = 7; int GFX90A = 8; + int GFX940 = 9; + int GFX11 = 10; } //===----------------------------------------------------------------------===// @@ -190,6 +193,44 @@ def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; def SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; def SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; +multiclass SDBufferAtomicRetNoRet { + def "_ret" : PatFrag< + (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, + node:$offset, node:$cachepolicy, node:$idxen), + (!cast(NAME) node:$vdata_in, node:$rsrc, node:$vindex, + node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, + node:$idxen)> { + let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }]; + let GISelPredicateCode = [{ return true; }]; + } + + def "_noret" : PatFrag< + (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, + node:$offset, node:$cachepolicy, node:$idxen), + (!cast(NAME) node:$vdata_in, node:$rsrc, node:$vindex, + node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, + node:$idxen)> { + let PredicateCode = [{ return SDValue(N, 0).use_empty(); }]; + let GISelPredicateCode = [{ return false; }]; + } +} + +defm SIbuffer_atomic_swap : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_add : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_sub : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_smin : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_umin : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_smax : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_umax : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_and : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_or : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_xor : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_inc : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_dec : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_fadd : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_fmin : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_fmax : SDBufferAtomicRetNoRet; + def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", SDTypeProfile<1, 9, [SDTCisVT<0, i32>, // dst @@ -205,6 +246,26 @@ def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] >; +def SIbuffer_atomic_cmpswap_ret : PatFrag< + (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, + node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), + (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, + node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, + node:$idxen)> { + let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }]; + let GISelPredicateCode = [{ return true; }]; +} + +def SIbuffer_atomic_cmpswap_noret : PatFrag< + (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, + node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), + (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, + node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, + node:$idxen)> { + let PredicateCode = [{ return SDValue(N, 0).use_empty(); }]; + let GISelPredicateCode = [{ return false; }]; +} + class SDGlobalAtomicNoRtn : SDNode , // vaddr @@ -255,35 +316,57 @@ def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] >; +def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD", + SDTFPRoundOp +>; + +def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD", + SDTFPRoundOp +>; + //===----------------------------------------------------------------------===// // ValueType helpers //===----------------------------------------------------------------------===// // Returns 1 if the source arguments have modifiers, 0 if they do not. -// XXX - do f16 instructions? class isFloatType { bit ret = !or(!eq(SrcVT.Value, f16.Value), !eq(SrcVT.Value, f32.Value), !eq(SrcVT.Value, f64.Value), !eq(SrcVT.Value, v2f16.Value), !eq(SrcVT.Value, v4f16.Value), + !eq(SrcVT.Value, v8f16.Value), + !eq(SrcVT.Value, v16f16.Value), !eq(SrcVT.Value, v2f32.Value), + !eq(SrcVT.Value, v4f32.Value), + !eq(SrcVT.Value, v8f32.Value), !eq(SrcVT.Value, v2f64.Value), !eq(SrcVT.Value, v4f64.Value)); } +// XXX - do v2i16 instructions? class isIntType { bit ret = !or(!eq(SrcVT.Value, i16.Value), !eq(SrcVT.Value, i32.Value), !eq(SrcVT.Value, i64.Value), - !eq(SrcVT.Value, v2i32.Value)); + !eq(SrcVT.Value, v4i16.Value), + !eq(SrcVT.Value, v8i16.Value), + !eq(SrcVT.Value, v16i16.Value), + !eq(SrcVT.Value, v2i32.Value), + !eq(SrcVT.Value, v4i32.Value), + !eq(SrcVT.Value, v8i32.Value)); } class isPackedType { bit ret = !or(!eq(SrcVT.Value, v2i16.Value), !eq(SrcVT.Value, v2f16.Value), !eq(SrcVT.Value, v4f16.Value), - !eq(SrcVT.Value, v2f32.Value)); + !eq(SrcVT.Value, v2i32.Value), + !eq(SrcVT.Value, v2f32.Value), + !eq(SrcVT.Value, v4i32.Value), + !eq(SrcVT.Value, v4f32.Value), + !eq(SrcVT.Value, v8i32.Value), + !eq(SrcVT.Value, v8f32.Value)); } @@ -291,19 +374,10 @@ class isPackedType { // PatFrags for global memory operations //===----------------------------------------------------------------------===// -foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { -let AddressSpaces = !cast("LoadAddress_"#as).AddrSpaces in { - - -defm atomic_inc_#as : binary_atomic_op; -defm atomic_dec_#as : binary_atomic_op; -defm atomic_load_fmin_#as : binary_atomic_op; -defm atomic_load_fmax_#as : binary_atomic_op; - - -} // End let AddressSpaces = ... -} // End foreach AddrSpace - +defm atomic_inc : binary_atomic_op_all_as; +defm atomic_dec : binary_atomic_op_all_as; +defm atomic_load_fmin : binary_atomic_op_all_as; +defm atomic_load_fmax : binary_atomic_op_all_as; //===----------------------------------------------------------------------===// // SDNodes PatFrags for loads/stores with a glue input. @@ -408,50 +482,36 @@ def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { let IsNonExtLoad = 1; } -let MemoryVT = i8 in { def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; -} -let MemoryVT = i16 in { def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; -} +} // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces def load_align8_local_m0 : PatFrag<(ops node:$ptr), - (load_local_m0 node:$ptr)>, Aligned<8> { + (load_local_m0 node:$ptr)> { let IsLoad = 1; - let IsNonExtLoad = 1; + int MinAlignment = 8; } def load_align16_local_m0 : PatFrag<(ops node:$ptr), - (load_local_m0 node:$ptr)>, Aligned<16> { + (load_local_m0 node:$ptr)> { let IsLoad = 1; - let IsNonExtLoad = 1; + int MinAlignment = 16; } -} // End IsLoad = 1 - let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr), - (atomic_load_8_glue node:$ptr)> { - let MemoryVT = i8; -} + (atomic_load_8_glue node:$ptr)>; def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr), - (atomic_load_16_glue node:$ptr)> { - let MemoryVT = i16; -} + (atomic_load_16_glue node:$ptr)>; def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), - (atomic_load_32_glue node:$ptr)> { - let MemoryVT = i32; -} + (atomic_load_32_glue node:$ptr)>; def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), - (atomic_load_64_glue node:$ptr)> { - let MemoryVT = i64; -} - + (atomic_load_64_glue node:$ptr)>; } // End let AddressSpaces = LoadAddress_local.AddrSpaces @@ -485,75 +545,103 @@ def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), (truncstore_glue node:$val, node:$ptr)> { let IsStore = 1; let MemoryVT = i8; + let IsTruncStore = 1; } def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), (truncstore_glue node:$val, node:$ptr)> { let IsStore = 1; let MemoryVT = i16; + let IsTruncStore = 1; } let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), - (store_glue node:$val, node:$ptr)> { - let IsStore = 1; - let IsTruncStore = 0; -} - + (store_glue node:$val, node:$ptr)>; def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), - (unindexedstore_glue node:$val, node:$ptr)> { - let IsStore = 1; - let MemoryVT = i8; -} - + (truncstorei8_glue node:$val, node:$ptr)>; def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), - (unindexedstore_glue node:$val, node:$ptr)> { - let IsStore = 1; - let MemoryVT = i16; -} + (truncstorei16_glue node:$val, node:$ptr)>; } def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr), (store_local_m0 node:$value, node:$ptr)>, Aligned<8> { let IsStore = 1; - let IsTruncStore = 0; } def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr), (store_local_m0 node:$value, node:$ptr)>, Aligned<16> { let IsStore = 1; +} + +let PredicateCode = [{return cast(N)->getAlignment() < 4;}], + GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}], + AddressSpaces = [ AddrSpaces.Local ] in { +def load_align_less_than_4_local : PatFrag<(ops node:$ptr), + (load_local node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; +} + +def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr), + (load_local_m0 node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; +} + +def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr), + (store_local node:$value, node:$ptr)> { + let IsStore = 1; let IsTruncStore = 0; } -let AddressSpaces = StoreAddress_local.AddrSpaces in { +def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr), + (store_local_m0 node:$value, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; +} +} -def atomic_store_local_8_m0 : PatFrag < - (ops node:$value, node:$ptr), - (AMDGPUatomic_st_glue node:$value, node:$ptr)> { +def atomic_store_8_glue : PatFrag < + (ops node:$ptr, node:$value), + (AMDGPUatomic_st_glue node:$ptr, node:$value)> { let IsAtomic = 1; let MemoryVT = i8; } -def atomic_store_local_16_m0 : PatFrag < - (ops node:$value, node:$ptr), - (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + +def atomic_store_16_glue : PatFrag < + (ops node:$ptr, node:$value), + (AMDGPUatomic_st_glue node:$ptr, node:$value)> { let IsAtomic = 1; let MemoryVT = i16; } -def atomic_store_local_32_m0 : PatFrag < - (ops node:$value, node:$ptr), - (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + +def atomic_store_32_glue : PatFrag < + (ops node:$ptr, node:$value), + (AMDGPUatomic_st_glue node:$ptr, node:$value)> { let IsAtomic = 1; let MemoryVT = i32; } -def atomic_store_local_64_m0 : PatFrag < - (ops node:$value, node:$ptr), - (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + +def atomic_store_64_glue : PatFrag < + (ops node:$ptr, node:$value), + (AMDGPUatomic_st_glue node:$ptr, node:$value)> { let IsAtomic = 1; let MemoryVT = i64; } -} // End let AddressSpaces = StoreAddress_local.AddrSpaces + +let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { +def atomic_store_8_local_m0 : PatFrag<(ops node:$ptr, node:$val), + (atomic_store_8_glue node:$ptr, node:$val)>; +def atomic_store_16_local_m0 : PatFrag<(ops node:$ptr, node:$val), + (atomic_store_16_glue node:$ptr, node:$val)>; +def atomic_store_32_local_m0 : PatFrag<(ops node:$ptr, node:$val), + (atomic_store_32_glue node:$ptr, node:$val)>; +def atomic_store_64_local_m0 : PatFrag<(ops node:$ptr, node:$val), + (atomic_store_64_glue node:$ptr, node:$val)>; +} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces def si_setcc_uniform : PatFrag < @@ -686,10 +774,14 @@ multiclass SIAtomicM0Glue2 (NAME#"_glue"), IsInt>; + defm _local_m0 : ret_noret_binary_atomic_op (NAME#"_glue"), + IsInt>; } let AddressSpaces = StoreAddress_region.AddrSpaces in { defm _region_m0 : binary_atomic_op (NAME#"_glue"), IsInt>; + defm _region_m0 : ret_noret_binary_atomic_op (NAME#"_glue"), + IsInt>; } } @@ -954,6 +1046,18 @@ def SWaitMatchClass : AsmOperandClass { let ParserMethod = "parseSWaitCntOps"; } +def DepCtrMatchClass : AsmOperandClass { + let Name = "DepCtr"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "parseDepCtrOps"; +} + +def SDelayMatchClass : AsmOperandClass { + let Name = "SDelayAlu"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "parseSDelayAluOps"; +} + def VReg32OrOffClass : AsmOperandClass { let Name = "VReg32OrOff"; let ParserMethod = "parseVReg32OrOff"; @@ -979,6 +1083,16 @@ def WAIT_FLAG : Operand { let ParserMatchClass = SWaitMatchClass; let PrintMethod = "printWaitFlag"; } + +def DepCtrImm : Operand { + let ParserMatchClass = DepCtrMatchClass; + let PrintMethod = "printDepCtr"; +} + +def DELAY_FLAG : Operand { + let ParserMatchClass = SDelayMatchClass; + let PrintMethod = "printDelayFlag"; +} } // End OperandType = "OPERAND_IMMEDIATE" include "SIInstrFormats.td" @@ -1163,14 +1277,6 @@ def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT", 0>>; def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>; def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>; -def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>; - -def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>; -def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>; -def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>; -def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>; -def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>; - def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>; def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>; def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>; @@ -1181,6 +1287,14 @@ def op_sel_hi0 : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>; def neg_lo0 : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>; def neg_hi0 : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>; +def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>; +def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>; + +def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>; +def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>; +def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>; +def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>; + def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>; def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>; def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>; @@ -1191,6 +1305,9 @@ def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> { } +def wait_vdst : NamedOperandU8<"WaitVDST", NamedMatchClass<"WaitVDST">>; +def wait_exp : NamedOperandU8<"WaitEXP", NamedMatchClass<"WaitEXP">>; + } // End OperandType = "OPERAND_IMMEDIATE" class KImmMatchClass : AsmOperandClass { @@ -1223,10 +1340,18 @@ class FPInputModsMatchClass : AsmOperandClass { let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods"; } +class FPVCSrcInputModsMatchClass : FPInputModsMatchClass { + let Name = "RegOrInlineImmWithFP"#opSize#"InputMods"; + let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods"; +} + def FP16InputModsMatchClass : FPInputModsMatchClass<16>; def FP32InputModsMatchClass : FPInputModsMatchClass<32>; def FP64InputModsMatchClass : FPInputModsMatchClass<64>; +def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>; +def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>; + class InputMods : Operand { let OperandNamespace = "AMDGPU"; let OperandType = "OPERAND_INPUT_MODS"; @@ -1241,19 +1366,28 @@ def FP16InputMods : FPInputMods; def FP32InputMods : FPInputMods; def FP64InputMods : FPInputMods; +def FP16VCSrcInputMods : FPInputMods; +def FP32VCSrcInputMods : FPInputMods; + class IntInputModsMatchClass : AsmOperandClass { let Name = "RegOrImmWithInt"#opSize#"InputMods"; let ParserMethod = "parseRegOrImmWithIntInputMods"; let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods"; } +class IntVCSrcInputModsMatchClass : IntInputModsMatchClass { + let Name = "RegOrInlineImmWithInt"#opSize#"InputMods"; + let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods"; +} def Int32InputModsMatchClass : IntInputModsMatchClass<32>; def Int64InputModsMatchClass : IntInputModsMatchClass<64>; +def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>; class IntInputMods : InputMods { let PrintMethod = "printOperandAndIntInputMods"; } def Int32InputMods : IntInputMods; def Int64InputMods : IntInputMods; +def Int32VCSrcInputMods : IntInputMods; class OpSelModsMatchClass : AsmOperandClass { let Name = "OpSelMods"; @@ -1366,12 +1500,19 @@ def VOP3OMods : ComplexPattern; def VOP3PMods : ComplexPattern; +def VOP3PModsDOT : ComplexPattern; +def DotIUVOP3PMods : ComplexPattern; +def WMMAOpSelVOP3PMods : ComplexPattern; + def VOP3OpSel : ComplexPattern; def VOP3OpSelMods : ComplexPattern; def VOP3PMadMixMods : ComplexPattern; +def VINTERPMods : ComplexPattern; +def VINTERPModsHi : ComplexPattern; + //===----------------------------------------------------------------------===// // SI assembler operands //===----------------------------------------------------------------------===// @@ -1575,6 +1716,19 @@ class getVOP3SrcForVT { ); } +// Src2 of VOP3 DPP instructions cannot be a literal +class getVOP3DPPSrcForVT { + bit isFP = isFloatType.ret; + RegisterOperand ret = + !if (!eq(VT.Value, i1.Value), SSrc_i1, + !if (isFP, + !if (!eq(VT.Value, f16.Value), VCSrc_f16, + !if (!eq(VT.Value, v2f16.Value), VCSrc_v2f16, VCSrc_f32)), + !if (!eq(VT.Value, i16.Value), VCSrc_b16, + !if (!eq(VT.Value, v2i16.Value), VCSrc_v2b16, + VCSrc_b32)))); +} + // Float or packed int class isModifierType { bit ret = !or(!eq(SrcVT.Value, f16.Value), @@ -1583,7 +1737,17 @@ class isModifierType { !eq(SrcVT.Value, v2f16.Value), !eq(SrcVT.Value, v2i16.Value), !eq(SrcVT.Value, v2f32.Value), - !eq(SrcVT.Value, v2i32.Value)); + !eq(SrcVT.Value, v2i32.Value), + !eq(SrcVT.Value, v4f16.Value), + !eq(SrcVT.Value, v4i16.Value), + !eq(SrcVT.Value, v4f32.Value), + !eq(SrcVT.Value, v4i32.Value), + !eq(SrcVT.Value, v8f16.Value), + !eq(SrcVT.Value, v8i16.Value), + !eq(SrcVT.Value, v8f32.Value), + !eq(SrcVT.Value, v8i32.Value), + !eq(SrcVT.Value, v16f16.Value), + !eq(SrcVT.Value, v16i16.Value)); } // Return type of input modifiers operand for specified input operand @@ -1611,6 +1775,17 @@ class getSrcModDPP { Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } +// Return type of input modifiers operand for specified input operand for DPP +class getSrcModVOP3DPP { + bit isFP = isFloatType.ret; + bit isPacked = isPackedType.ret; + Operand ret = + !if (isFP, + !if (!eq(VT.Value, f16.Value), FP16VCSrcInputMods, + FP32VCSrcInputMods), + !if (EnableF32SrcMods, FP32VCSrcInputMods, Int32VCSrcInputMods)); +} + // Return type of input modifiers operand specified input operand for SDWA class getSrcModSDWA { Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods, @@ -1620,7 +1795,7 @@ class getSrcModSDWA { } // Returns the input arguments for VOP[12C] instructions for the given SrcVT. -class getIns32 { +class getIns32 { dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2 (ins))); @@ -1715,19 +1890,21 @@ class getInsVOP3Base.ret; dag opsel = (ins op_sel0:$op_sel); - dag vop3pFields = (ins op_sel_hi0:$op_sel_hi, neg_lo0:$neg_lo, neg_hi0:$neg_hi); + dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); + dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi)); + dag ret = !con(base, !if(HasOpSel, opsel,(ins)), !if(IsVOP3P, vop3pFields,(ins))); } class getInsVOP3P { dag ret = getInsVOP3Base.ret; + HasOpSel, 1/*IsVOP3P*/>.ret; } class getInsVOP3OpSel { + RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { dag ret = !if (!eq(NumSrcArgs, 0), // VOP1 without input operands (V_NOP) @@ -1756,6 +1933,7 @@ class getInsDPPBase { - dag ret = !con(getInsDPPBase.ret, + RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + dag ret = !con(getInsDPPBase.ret, (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, - bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); } class getInsDPP16 { - dag ret = !con(getInsDPP.ret, + RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + dag ret = !con(getInsDPP.ret, (ins FI:$fi)); } class getInsDPP8 { - dag ret = !con(getInsDPPBase.ret, + RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + dag ret = !con(getInsDPPBase.ret, (ins dpp8:$dpp8, FI:$fi)); } +class getInsVOP3DPPBase { + dag old = ( ins OldRC:$old ); + dag base = VOP3Base; + dag ret = !con( + !if(!ne(NumSrcArgs, 0), old, (ins)), + base + ); +} + +class getInsVOP3DPP { + dag ret = !con(getInsVOP3DPPBase.ret, + (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); +} + +class getInsVOP3DPP16 { + dag ret = !con(getInsVOP3DPP.ret, + (ins FI:$fi)); +} + +class getInsVOP3DPP8 { + dag ret = !con(getInsVOP3DPPBase.ret, + (ins dpp8:$dpp8, FI:$fi)); +} // Ins for SDWA class getInsSDWA { !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); } +class getAsmVOPDPart { + string dst = "$vdst" # XorY; + string src0 = ", $src0" # XorY; + string src1 = ", $vsrc1" # XorY; + string ret = dst # + !if(!ge(NumSrcArgs, 1), src0, "") # + !if(!ge(NumSrcArgs, 2), src1, ""); +} + // Returns the assembly string for the inputs and outputs of a VOP3 // instruction. class getAsm64 { + bit HasClamp, bit HasOpSel> { string dst = "$vdst"; string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); string src1 = !if(!eq(NumSrcArgs, 1), "", @@ -1900,10 +2125,11 @@ class getAsmVOP3P { @@ -1955,15 +2181,63 @@ class getAsmDPP16 - : getAsmDPP { + : getAsmDPP{ let ret = dst#args#" $dpp8$fi"; } +class getAsmVOP3DPPBase { + string dst = !if(HasDst, + !if(!eq(DstVT.Size, 1), + "$sdst", + "$vdst"), + ""); // use $sdst for VOPC + string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); + string isrc1 = !if(!eq(NumSrcArgs, 1), "", + !if(!eq(NumSrcArgs, 2), " $src1", + " $src1,")); + string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); + + string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); + string fsrc1 = !if(!eq(NumSrcArgs, 1), "", + !if(!eq(NumSrcArgs, 2), " $src1_modifiers", + " $src1_modifiers,")); + string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); + + string src0 = !if(Src0HasMods, fsrc0, isrc0); + string src1 = !if(Src1HasMods, fsrc1, isrc1); + string src2 = !if(Src2HasMods, fsrc2, isrc2); + string opsel = !if(HasOpSel, "$op_sel", ""); + string 3PMods = !if(IsVOP3P, + !if(HasOpSel, "$op_sel_hi", "") + #!if(HasModifiers, "$neg_lo$neg_hi", ""), + ""); + string clamp = !if(HasClamp, "$clamp", ""); + string omod = !if(HasOMod, "$omod", ""); + + string ret = dst#", "#src0#src1#src2#opsel#3PMods#clamp#omod; + +} + +class getAsmVOP3DPP { + string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; +} + +class getAsmVOP3DPP16 { + string ret = getAsmVOP3DPP.ret # "$fi"; +} + +class getAsmVOP3DPP8 { + string ret = base # " $dpp8$fi"; +} + class getAsmSDWA { string dst = !if(HasDst, !if(!eq(DstVT.Size, 1), - " vcc", // use vcc token as dst for VOPC instructioins + " vcc", // use vcc token as dst for VOPC instructions "$vdst"), ""); string src0 = "$src0_modifiers"; @@ -2056,6 +2330,12 @@ class getHasDPP { 1); } +class getHasExt32BitDPP { + bit ret = !and(getHasDPP.ret, + !not(getHas64BitOps.ret)); +} + class getHasExt64BitDPP { bit ret = !and(getHasDPP.ret, @@ -2089,6 +2369,24 @@ class BitAnd { bit ret = !if(a, !if(b, 1, 0), 0); } +class getHasVOP3DPP { + bit ret = !if(!eq(DstVT.Size, 64), + 0, // 64-bit dst No DPP for 64-bit operands + !if(!eq(Src0VT.Size, 64), + 0, // 64-bit src0 + !if(!eq(Src1VT.Size, 64), + 0, // 64-bit src1 + !if(!eq(Src2VT.Size, 64), + 0, // 64-bit src2 + 1 + ) + ) + ) + ); +} + + def PatGenMode { int NoPattern = 0; int Pattern = 1; @@ -2106,15 +2404,20 @@ class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, field ValueType Src1VT = ArgVT[2]; field ValueType Src2VT = ArgVT[3]; field RegisterOperand DstRC = getVALUDstForVT.ret; + field RegisterOperand DstRC64 = DstRC; field RegisterOperand DstRCDPP = getVALUDstForVT.ret; field RegisterOperand DstRCSDWA = getSDWADstForVT.ret; field RegisterOperand Src0RC32 = getVOPSrc0ForVT.ret; - field RegisterClass Src1RC32 = getVregSrcForVT.ret; + field RegisterOperand Src1RC32 = RegisterOperand.ret>; field RegisterOperand Src0RC64 = getVOP3SrcForVT.ret; field RegisterOperand Src1RC64 = getVOP3SrcForVT.ret; field RegisterOperand Src2RC64 = getVOP3SrcForVT.ret; field RegisterClass Src0DPP = getVregSrcForVT.ret; field RegisterClass Src1DPP = getVregSrcForVT.ret; + field RegisterClass Src2DPP = getVregSrcForVT.ret; + field RegisterOperand Src0VOP3DPP = VGPRSrc_32; + field RegisterOperand Src1VOP3DPP = VGPRSrc_32; + field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT.ret; field RegisterOperand Src0SDWA = getSDWASrcForVT.ret; field RegisterOperand Src1SDWA = getSDWASrcForVT.ret; field Operand Src0Mod = getSrcMod.ret; @@ -2122,6 +2425,8 @@ class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, field Operand Src2Mod = getSrcMod.ret; field Operand Src0ModDPP = getSrcModDPP.ret; field Operand Src1ModDPP = getSrcModDPP.ret; + field Operand Src2ModDPP = getSrcModDPP.ret; + field Operand Src2ModVOP3DPP = getSrcModVOP3DPP.ret; field Operand Src0ModSDWA = getSrcModSDWA.ret; field Operand Src1ModSDWA = getSrcModSDWA.ret; @@ -2169,15 +2474,20 @@ class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0); field bit HasExt = getHasExt.ret; - field bit HasExtDPP = getHasDPP.ret; + field bit HasExtVOP3DPP = getHasVOP3DPP.ret; + field bit HasExtDPP = !if(!or(getHasDPP.ret, + HasExtVOP3DPP), 1, 0); + field bit HasExt32BitDPP = getHasExt32BitDPP.ret; field bit HasExt64BitDPP = getHasExt64BitDPP.ret; field bit HasExtSDWA = getHasSDWA.ret; field bit HasExtSDWA9 = HasExtSDWA; field int NeedPatGen = PatGenMode.NoPattern; field bit IsMAI = 0; + field bit IsVOP3P = 0; field bit IsDOT = 0; field bit IsSingle = 0; + field bit IsWMMA = 0; field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); @@ -2188,9 +2498,11 @@ class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, // VOP3b instructions are a special case with a second explicit // output. This is manually overridden for them. field dag Outs32 = Outs; - field dag Outs64 = Outs; + field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs)); field dag OutsDPP = getOutsDPP.ret; field dag OutsDPP8 = getOutsDPP.ret; + field dag OutsVOP3DPP = OutsDPP; + field dag OutsVOP3DPP8 = OutsDPP8; field dag OutsSDWA = getOutsSDWA.ret; field dag Ins32 = getIns32.ret; @@ -2198,7 +2510,7 @@ class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, HasIntClamp, HasModifiers, HasSrc2Mods, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; field dag InsVOP3P = getInsVOP3P.ret; field dag InsVOP3OpSel = getInsVOP3OpSel _ArgVT, bit _EnableF32SrcMods = 0, getOpSelMod.ret, getOpSelMod.ret>.ret; field dag InsDPP = !if(HasExtDPP, - getInsDPP.ret, + getInsDPP.ret, (ins)); - field dag InsDPP16 = getInsDPP16.ret; - field dag InsDPP8 = getInsDPP8.ret; + field dag InsDPP16 = getInsDPP16.ret; + field dag InsDPP8 = getInsDPP8.ret; + field dag InsVOP3Base = getInsVOP3Base.ret; + field dag InsVOP3DPP = getInsVOP3DPP.ret; + field dag InsVOP3DPP16 = getInsVOP3DPP16.ret; + field dag InsVOP3DPP8 = getInsVOP3DPP8.ret; field dag InsSDWA = getInsSDWA.ret; + field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X); + // It is a slight misnomer to use the deferred f32 operand type for non-float + // operands, but this operand type will only be used if the other dual + // component is FMAAK or FMAMK + field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X); + field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y); + field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y); field string Asm32 = getAsm32.ret; field string Asm64 = getAsm64.ret; - field string AsmVOP3P = getAsmVOP3P.ret; + field string AsmVOP3P = getAsmVOP3P.ret; field string AsmVOP3OpSel = getAsmVOP3OpSel _ArgVT, bit _EnableF32SrcMods = 0, // DPP8 encoding has no fields for modifiers, and it is enforced by setting // the asm operand name via this HasModifiers flag field string AsmDPP8 = getAsmDPP8.ret; + field string AsmVOP3DPPBase = getAsmVOP3DPPBase.ret; + field string AsmVOP3DPP = getAsmVOP3DPP.ret; + field string AsmVOP3DPP16 = getAsmVOP3DPP16.ret; + field string AsmVOP3DPP8 = getAsmVOP3DPP8.ret; field string AsmSDWA = getAsmSDWA.ret; field string AsmSDWA9 = getAsmSDWA9.ret; - + field string AsmVOPDX = getAsmVOPDPart.ret; + field string AsmVOPDY = getAsmVOPDPart.ret; field string TieRegDPP = "$old"; } -class VOP_NO_EXT : VOPProfile { + class VOP_NO_EXT : VOPProfile { let HasExt = 0; let HasExtDPP = 0; + let HasExtVOP3DPP = 0; + let HasExt32BitDPP = 0; let HasExt64BitDPP = 0; let HasExtSDWA = 0; let HasExtSDWA9 = 0; @@ -2249,10 +2584,10 @@ class VOP_NO_EXT : VOPProfile { class VOP_PAT_GEN : VOPProfile { let NeedPatGen = mode; } - def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>; def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; +def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; @@ -2264,6 +2599,7 @@ def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>; +def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>; def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>; def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>; @@ -2274,6 +2610,10 @@ def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>; def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>; +def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>; +def VOP_I16_V2I16_V2I16_I16 : VOPProfile <[i16, v2i16, v2i16, i16]>; +def VOP_F32_V2I16_V2I16_F32 : VOPProfile <[f32, v2i16, v2i16, f32]>; + def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>; def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; @@ -2343,6 +2683,18 @@ def VOP_V4F32_V4I16_V4I16_V4F32 : VOPProfile <[v4f32, v4i16, v4i16, v4f32]>; def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>; def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>; +def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>; +def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>; +def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>; +def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>; + +def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>; +def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>; +def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>; +def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>; +def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>; +def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>; + class Commutable_REV { string RevOp = revOp; bit IsOrig = isOrig; @@ -2394,10 +2746,11 @@ multiclass VINTRP_m op, dag outs, dag ins, string asm, def _vi : VINTRP_Real_vi ; - let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { + let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { def _gfx10 : VINTRP_Real_si; - } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" + } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" } + //===----------------------------------------------------------------------===// // Vector instruction mappings //===----------------------------------------------------------------------===// @@ -2470,6 +2823,7 @@ def getMCOpcodeGen : InstrMapping { let RowFields = ["PseudoInstr"]; let ColFields = ["Subtarget"]; let KeyCol = [!cast(SIEncodingFamily.NONE)]; + // These columns must be kept in sync with the SIEncodingFamily enumeration. let ValueCols = [[!cast(SIEncodingFamily.SI)], [!cast(SIEncodingFamily.VI)], [!cast(SIEncodingFamily.SDWA)], @@ -2482,7 +2836,9 @@ def getMCOpcodeGen : InstrMapping { [!cast(SIEncodingFamily.GFX9)], [!cast(SIEncodingFamily.GFX10)], [!cast(SIEncodingFamily.SDWA10)], - [!cast(SIEncodingFamily.GFX90A)]]; + [!cast(SIEncodingFamily.GFX90A)], + [!cast(SIEncodingFamily.GFX940)], + [!cast(SIEncodingFamily.GFX11)]]; } // Get equivalent SOPK instruction. @@ -2510,14 +2866,6 @@ def getIfAddr64Inst : InstrMapping { let ValueCols = [["1"]]; } -def getMUBUFNoLdsInst : InstrMapping { - let FilterClass = "MUBUFLdsTable"; - let RowFields = ["OpName"]; - let ColFields = ["IsLds"]; - let KeyCol = ["1"]; - let ValueCols = [["0"]]; -} - // Maps an atomic opcode to its returnless version. def getAtomicNoRetOp : InstrMapping { let FilterClass = "AtomicNoRet"; @@ -2580,6 +2928,14 @@ def getFlatScratchInstSSfromSV : InstrMapping { let ValueCols = [["SS"]]; } +def getFlatScratchInstSVfromSVS : InstrMapping { + let FilterClass = "FlatScratchInst"; + let RowFields = ["SVOp"]; + let ColFields = ["Mode"]; + let KeyCol = ["SVS"]; + let ValueCols = [["SV"]]; +} + def getFlatScratchInstSVfromSS : InstrMapping { let FilterClass = "FlatScratchInst"; let RowFields = ["SVOp"]; @@ -2596,6 +2952,15 @@ def getMFMAEarlyClobberOp : InstrMapping { let ValueCols = [["0"]]; } +// Maps an v_cmp instruction to its v_cmpx equivalent. +def getVCMPXOpFromVCMP : InstrMapping { + let FilterClass = "VCMPVCMPXTable"; + let RowFields = ["VCMPOp"]; + let ColFields = ["IsVCMPX"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; +} + include "SIInstructions.td" include "DSInstructions.td" diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 7be63ae6964b..829669157893 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -14,12 +14,24 @@ class GCNPat : Pat, GCNPredicateContro } +class UniformSextInreg : PatFrag< + (ops node:$src), + (sext_inreg $src, VT), + [{ return !N->isDivergent(); }]>; + +class DivergentSextInreg : PatFrag< + (ops node:$src), + (sext_inreg $src, VT), + [{ return N->isDivergent(); }]>; + include "SOPInstructions.td" include "VOPInstructions.td" include "SMInstructions.td" include "FLATInstructions.td" include "BUFInstructions.td" include "EXPInstructions.td" +include "LDSDIRInstructions.td" +include "VINTERPInstructions.td" //===----------------------------------------------------------------------===// // VINTRP Instructions @@ -176,19 +188,33 @@ def EXIT_STRICT_WQM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> { let mayStore = 0; } +// Pseudo instructions used for @llvm.fptrunc.round upward +// and @llvm.fptrunc.round downward. +// These intrinsics will be legalized to G_FPTRUNC_ROUND_UPWARD +// and G_FPTRUNC_ROUND_DOWNWARD before being lowered to +// FPTRUNC_UPWARD_PSEUDO and FPTRUNC_DOWNWARD_PSEUDO. +// The final codegen is done in the ModeRegister pass. +let Uses = [MODE, EXEC] in { +def FPTRUNC_UPWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst), + (ins VGPR_32:$src0), + [(set f16:$vdst, (SIfptrunc_round_upward f32:$src0))]>; + +def FPTRUNC_DOWNWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst), + (ins VGPR_32:$src0), + [(set f16:$vdst, (SIfptrunc_round_downward f32:$src0))]>; +} // End Uses = [MODE, EXEC] + // Invert the exec mask and overwrite the inactive lanes of dst with inactive, // restoring it after we're done. let Defs = [SCC] in { def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst), - (ins VGPR_32: $src, VSrc_b32:$inactive), + (ins VSrc_b32: $src, VSrc_b32:$inactive), [(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> { - let Constraints = "$src = $vdst"; } def V_SET_INACTIVE_B64 : VPseudoInstSI <(outs VReg_64:$vdst), - (ins VReg_64: $src, VSrc_b64:$inactive), + (ins VSrc_b64: $src, VSrc_b64:$inactive), [(set i64:$vdst, (int_amdgcn_set_inactive i64:$src, i64:$inactive))]> { - let Constraints = "$src = $vdst"; } } // End Defs = [SCC] @@ -287,6 +313,20 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins), let isConvergent = 1; let FixedSize = 1; let Size = 0; + let isMeta = 1; +} + +def SCHED_BARRIER : SPseudoInstSI<(outs), (ins i32imm:$mask), + [(int_amdgcn_sched_barrier (i32 timm:$mask))]> { + let SchedRW = []; + let hasNoSchedulingInfo = 1; + let hasSideEffects = 1; + let mayLoad = 0; + let mayStore = 0; + let isConvergent = 1; + let FixedSize = 1; + let Size = 0; + let isMeta = 1; } // SI pseudo instructions. These are used by the CFG structurizer pass @@ -424,6 +464,7 @@ def SI_MASKED_UNREACHABLE : SPseudoInstSI <(outs), (ins), let Size = 0; let hasNoSchedulingInfo = 1; let FixedSize = 1; + let isMeta = 1; } // Used as an isel pseudo to directly emit initialization with an @@ -459,11 +500,14 @@ def SI_RETURN_TO_EPILOG : SPseudoInstSI < let hasNoSchedulingInfo = 1; let DisableWQM = 1; let FixedSize = 1; + + // TODO: Should this be true? + let isMeta = 0; } // Return for returning function calls. def SI_RETURN : SPseudoInstSI < - (outs), (ins), [], + (outs), (ins), [(AMDGPUret_flag)], "; return"> { let isTerminator = 1; let isBarrier = 1; @@ -496,6 +540,7 @@ def : GCNPat< def SI_CALL : SPseudoInstSI < (outs SReg_64:$dst), (ins SSrc_b64:$src0, unknown:$callee)> { let Size = 4; + let FixedSize = 1; let isCall = 1; let UseNamedOperandTable = 1; let SchedRW = [WriteBranch]; @@ -508,6 +553,7 @@ def SI_TCRETURN : SPseudoInstSI <(outs), (ins SReg_64:$src0, unknown:$callee, i32imm:$fpdiff), [(AMDGPUtc_return i64:$src0, tglobaladdr:$callee, i32:$fpdiff)]> { let Size = 4; + let FixedSize = 1; let isCall = 1; let isTerminator = 1; let isReturn = 1; @@ -1212,6 +1258,26 @@ def : Pat < (v4f16 (EXTRACT_SUBREG v8f16:$vec, sub2_sub3)) >; +def : Pat < + (extract_subvector v16i16:$vec, (i32 0)), + (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub0_sub1_sub2_sub3)) +>; + +def : Pat < + (extract_subvector v16i16:$vec, (i32 8)), + (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub4_sub5_sub6_sub7)) +>; + +def : Pat < + (extract_subvector v16f16:$vec, (i32 0)), + (v8f16 (EXTRACT_SUBREG v16f16:$vec, sub0_sub1_sub2_sub3)) +>; + +def : Pat < + (extract_subvector v16f16:$vec, (i32 8)), + (v8f16 (EXTRACT_SUBREG v16f16:$vec, sub4_sub5_sub6_sub7)) +>; + foreach Index = 0-31 in { def Extract_Element_v32i32_#Index : Extract_Element < i32, v32i32, Index, !cast(sub#Index) @@ -1371,7 +1437,18 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; def : BitConvert ; - +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; // 512-bit bitcast def : BitConvert ; @@ -1941,12 +2018,6 @@ def : GCNPat < //===----------------------------------------------------------------------===// // Conversion Patterns //===----------------------------------------------------------------------===// - -class UniformSextInreg : PatFrag< - (ops node:$src), - (sext_inreg $src, VT), - [{ return !N->isDivergent(); }]>; - def : GCNPat<(i32 (UniformSextInreg i32:$src)), (S_BFE_I32 i32:$src, (i32 65536))>; // 0 | 1 << 16 @@ -1981,23 +2052,28 @@ def : GCNPat < (S_BFE_I64 i64:$src, (i32 0x200000)) // 0 | 32 << 16 >; - -class DivergentSextInreg : PatFrag< - (ops node:$src), - (sext_inreg $src, VT), - [{ return N->isDivergent(); }]>; - -def : GCNPat<(i32 (DivergentSextInreg i32:$src)), +def : GCNPat< + (i32 (DivergentSextInreg i32:$src)), (V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>; def : GCNPat < (i16 (DivergentSextInreg i16:$src)), - (V_BFE_I32_e64 $src, (i32 0), (i32 1)) // 0 | 1 << 16 + (V_BFE_I32_e64 $src, (i32 0), (i32 1)) >; def : GCNPat < (i16 (DivergentSextInreg i16:$src)), - (V_BFE_I32_e64 $src, (i32 0), (i32 8)) // 0 | 8 << 16 + (V_BFE_I32_e64 $src, (i32 0), (i32 8)) +>; + +def : GCNPat< + (i32 (DivergentSextInreg i32:$src)), + (V_BFE_I32_e64 i32:$src, (i32 0), (i32 8)) +>; + +def : GCNPat < + (i32 (DivergentSextInreg i32:$src)), + (V_BFE_I32_e64 $src, (i32 0), (i32 16)) >; def : GCNPat < @@ -2010,14 +2086,14 @@ def : GCNPat < def : GCNPat < (i64 (DivergentSextInreg i64:$src)), (REG_SEQUENCE VReg_64, - (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 8)/* 0 | 8 << 16 */), sub0, + (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 8)), sub0, (V_ASHRREV_I32_e32 (i32 31), (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 8))), sub1) >; def : GCNPat < (i64 (DivergentSextInreg i64:$src)), (REG_SEQUENCE VReg_64, - (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 16)/* 0 | 16 << 16 */), sub0, + (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 16)), sub0, (V_ASHRREV_I32_e32 (i32 31), (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 16))), sub1) >; @@ -2053,11 +2129,17 @@ def : ZExt_i64_i1_Pat; // FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that // REG_SEQUENCE patterns don't support instructions with multiple outputs. def : GCNPat < - (i64 (sext i32:$src)), + (i64 (UniformUnaryFrag i32:$src)), (REG_SEQUENCE SReg_64, $src, sub0, (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, (i32 31)), SReg_32_XM0)), sub1) >; +def : GCNPat < + (i64 (DivergentUnaryFrag i32:$src)), + (REG_SEQUENCE VReg_64, $src, sub0, + (i32 (COPY_TO_REGCLASS (V_ASHRREV_I32_e64 (i32 31), $src), VGPR_32)), sub1) +>; + def : GCNPat < (i64 (sext i1:$src)), (REG_SEQUENCE VReg_64, @@ -2234,6 +2316,30 @@ def : GCNPat < // certainty what the source behavior is without more context on how // the src is lowered. e.g. fptrunc + fma may be lowered to a // v_fma_mix* instruction which does not zero, or may not. +def : GCNPat< + (i32 (DivergentUnaryFrag i32:$src)), + (V_MAX_I32_e64 (V_SUB_CO_U32_e32 (i32 0), $src), $src)>; + +let AddedComplexity = 1 in { +def : GCNPat< + (i32 (DivergentUnaryFrag i32:$src)), + (V_MAX_I32_e64 (V_SUB_U32_e32 (i32 0), $src), $src)>{ + let SubtargetPredicate = HasAddNoCarryInsts; +} +} // AddedComplexity = 1 + +def : GCNPat< + (i32 (DivergentUnaryFrag i16:$src)), + (V_AND_B32_e64 (S_MOV_B32 (i32 0xffff)), $src) +>; + +def : GCNPat< + (i64 (DivergentUnaryFrag i16:$src)), + (REG_SEQUENCE VReg_64, + (V_AND_B32_e64 (S_MOV_B32 (i32 0xffff)), $src), sub0, + (S_MOV_B32 (i32 0)), sub1) +>; + def : GCNPat< (i32 (zext (i16 (bitconvert fp16_zeros_high_16bits:$src)))), (COPY VSrc_b16:$src)>; @@ -2269,6 +2375,34 @@ def : GCNPat < (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1)) >; +def IMMBitSelConst : SDNodeXFormgetTargetConstant(1ULL << N->getZExtValue(), SDLoc(N), + MVT::i32); +}]>; + +// Matching separate SRL and TRUNC instructions +// with dependent operands (SRL dest is source of TRUNC) +// generates three instructions. However, by using bit shifts, +// the V_LSHRREV_B32_e64 result can be directly used in the +// operand of the V_AND_B32_e64 instruction: +// (trunc i32 (srl i32 $a, i32 $b)) -> +// v_and_b32_e64 $a, (1 << $b), $a +// v_cmp_ne_u32_e64 $a, 0, $a + +// Handle the VALU case. +def : GCNPat < + (i1 (DivergentUnaryFrag (i32 (srl i32:$a, (i32 imm:$b))))), + (V_CMP_NE_U32_e64 (V_AND_B32_e64 (i32 (IMMBitSelConst $b)), $a), + (i32 0)) +>; + +// Handle the scalar case. +def : GCNPat < + (i1 (UniformUnaryFrag (i32 (srl i32:$a, (i32 imm:$b))))), + (S_CMP_LG_U32 (S_AND_B32 (i32 (IMMBitSelConst $b)), $a), + (i32 0)) +>; + def : GCNPat < (i1 (DivergentUnaryFrag i64:$a)), (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), @@ -2350,6 +2484,11 @@ def : GCNPat < } +def : GCNPat< + (i64 (DivergentUnaryFrag i64:$a)), + (REG_SEQUENCE VReg_64, + (V_BFREV_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1))), sub0, + (V_BFREV_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0))), sub1)>; // Prefer selecting to max when legal, but using mul is always valid. let AddedComplexity = -5 in { @@ -2508,12 +2647,12 @@ def : GCNPat < >; def : GCNPat < - (v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))), + (v2i16 (UniformBinFrag (i16 SReg_32:$src0), (i16 undef))), (COPY_TO_REGCLASS SReg_32:$src0, SReg_32) >; def : GCNPat < - (v2i16 (build_vector (i16 VGPR_32:$src0), (i16 undef))), + (v2i16 (DivergentBinFrag (i16 VGPR_32:$src0), (i16 undef))), (COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32) >; @@ -2597,6 +2736,15 @@ def : GCNPat < >; } // End SubtargetPredicate = HasVOP3PInsts +// With multiple uses of the shift, this will duplicate the shift and +// increase register pressure. +let SubtargetPredicate = isGFX11Plus in +def : GCNPat < + (v2i16 (build_vector (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))), (i16 SReg_32:$src1))), + (v2i16 (S_PACK_HL_B32_B16 SReg_32:$src0, SReg_32:$src1)) +>; + + def : GCNPat < (v2f16 (scalar_to_vector f16:$src0)), (COPY $src0) @@ -2678,18 +2826,18 @@ def : GCNPat < // an inline immediate than -c. // TODO: Also do for 64-bit. def : GCNPat< - (add i32:$src0, (i32 NegSubInlineConst32:$src1)), + (UniformBinFrag i32:$src0, (i32 NegSubInlineConst32:$src1)), (S_SUB_I32 SReg_32:$src0, NegSubInlineConst32:$src1) >; def : GCNPat< - (add i32:$src0, (i32 NegSubInlineConst32:$src1)), + (DivergentBinFrag i32:$src0, (i32 NegSubInlineConst32:$src1)), (V_SUB_U32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> { let SubtargetPredicate = HasAddNoCarryInsts; } def : GCNPat< - (add i32:$src0, (i32 NegSubInlineConst32:$src1)), + (DivergentBinFrag i32:$src0, (i32 NegSubInlineConst32:$src1)), (V_SUB_CO_U32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> { let SubtargetPredicate = NotHasAddNoCarryInsts; } @@ -2703,20 +2851,21 @@ def : GCNPat< (S_MOV_B32 SReg_32:$src) >; -multiclass BFMPatterns { +multiclass BFMPatterns { def : GCNPat < - (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)), + (vt (SHL (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)), (BFM $a, $b) >; def : GCNPat < - (vt (add (vt (shl 1, vt:$a)), -1)), - (BFM $a, (MOV (i32 0))) + (vt (ADD (vt (shl 1, vt:$a)), -1)), + (BFM $a, (i32 0)) >; } -defm : BFMPatterns ; -// FIXME: defm : BFMPatterns ; +defm : BFMPatterns , UniformBinFrag, S_BFM_B32>; +// FIXME: defm : BFMPatterns , UniformBinFrag, S_BFM_B64>; +defm : BFMPatterns , DivergentBinFrag, V_BFM_B32_e64>; // Bitfield extract patterns @@ -3007,6 +3156,19 @@ def G_AMDGPU_CLAMP : AMDGPUGenericInstruction { let hasSideEffects = 0; } +// Integer multiply-add: arg0 * arg1 + arg2. +// +// arg0 and arg1 are 32-bit integers (interpreted as signed or unsigned), +// arg2 is a 64-bit integer. Result is a 64-bit integer and a 1-bit carry-out. +class G_AMDGPU_MAD_64_32 : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst, type1:$carry_out); + let InOperandList = (ins type2:$arg0, type2:$arg1, type0:$arg2); + let hasSideEffects = 0; +} + +def G_AMDGPU_MAD_U64_U32 : G_AMDGPU_MAD_64_32; +def G_AMDGPU_MAD_I64_I32 : G_AMDGPU_MAD_64_32; + // Atomic cmpxchg. $cmpval ad $newval are packed in a single vector // operand Expects a MachineMemOperand in addition to explicit // operands. @@ -3130,3 +3292,15 @@ def G_SI_CALL : AMDGPUGenericInstruction { // TODO: Should really base this on the call target let isConvergent = 1; } + +def G_FPTRUNC_ROUND_UPWARD : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$vdst); + let InOperandList = (ins type1:$src0); + let hasSideEffects = 0; +} + +def G_FPTRUNC_ROUND_DOWNWARD : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$vdst); + let InOperandList = (ins type1:$src0); + let hasSideEffects = 0; +} diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp index 4fa8ec711134..47095ae22027 100644 --- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp @@ -72,16 +72,22 @@ static void generateEndPgm(MachineBasicBlock &MBB, bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS; // Check if hardware has been configured to expect color or depth exports. - bool HasExports = - AMDGPU::getHasColorExport(F) || AMDGPU::getHasDepthExport(F); + bool HasColorExports = AMDGPU::getHasColorExport(F); + bool HasDepthExports = AMDGPU::getHasDepthExport(F); + bool HasExports = HasColorExports || HasDepthExports; // Prior to GFX10, hardware always expects at least one export for PS. bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget()); if (IsPS && (HasExports || MustExport)) { // Generate "null export" if hardware is expecting PS to export. + const GCNSubtarget &ST = MBB.getParent()->getSubtarget(); + int Target = + ST.hasNullExportTarget() + ? AMDGPU::Exp::ET_NULL + : (HasColorExports ? AMDGPU::Exp::ET_MRT0 : AMDGPU::Exp::ET_MRTZ); BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE)) - .addImm(AMDGPU::Exp::ET_NULL) + .addImm(Target) .addReg(AMDGPU::VGPR0, RegState::Undef) .addReg(AMDGPU::VGPR0, RegState::Undef) .addReg(AMDGPU::VGPR0, RegState::Undef) diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 44bdbe37dec0..6d4e1d2c898b 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -79,6 +79,13 @@ enum InstClassEnum { MIMG, TBUFFER_LOAD, TBUFFER_STORE, + GLOBAL_LOAD_SADDR, + GLOBAL_STORE_SADDR, + FLAT_LOAD, + FLAT_STORE, + GLOBAL_LOAD, // GLOBAL_LOAD/GLOBAL_STORE are never used as the InstClass of + GLOBAL_STORE // any CombineInfo, they are only ever returned by + // getCommonInstClass. }; struct AddressRegs { @@ -86,6 +93,7 @@ struct AddressRegs { bool SBase = false; bool SRsrc = false; bool SOffset = false; + bool SAddr = false; bool VAddr = false; bool Addr = false; bool SSamp = false; @@ -160,6 +168,11 @@ class SILoadStoreOptimizer : public MachineFunctionPass { } void setMI(MachineBasicBlock::iterator MI, const SILoadStoreOptimizer &LSO); + + // Compare by pointer order. + bool operator<(const CombineInfo& Other) const { + return (InstClass == MIMG) ? DMask < Other.DMask : Offset < Other.Offset; + } }; struct BaseRegisters { @@ -185,6 +198,9 @@ private: AliasAnalysis *AA = nullptr; bool OptimizeAgain; + bool canSwapInstructions(const DenseSet &ARegDefs, + const DenseSet &ARegUses, + const MachineInstr &A, const MachineInstr &B) const; static bool dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII, const CombineInfo &Paired); @@ -199,38 +215,43 @@ private: const CombineInfo &Paired); const TargetRegisterClass *getDataRegClass(const MachineInstr &MI) const; - bool checkAndPrepareMerge(CombineInfo &CI, CombineInfo &Paired, - SmallVectorImpl &InstsToMove); + CombineInfo *checkAndPrepareMerge(CombineInfo &CI, CombineInfo &Paired); unsigned read2Opcode(unsigned EltSize) const; unsigned read2ST64Opcode(unsigned EltSize) const; - MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI, - CombineInfo &Paired, - const SmallVectorImpl &InstsToMove); + MachineBasicBlock::iterator + mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired, + MachineBasicBlock::iterator InsertBefore); unsigned write2Opcode(unsigned EltSize) const; unsigned write2ST64Opcode(unsigned EltSize) const; MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove); + MachineBasicBlock::iterator InsertBefore); MachineBasicBlock::iterator mergeImagePair(CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove); + MachineBasicBlock::iterator InsertBefore); MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove); + MachineBasicBlock::iterator InsertBefore); MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove); + MachineBasicBlock::iterator InsertBefore); MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove); + MachineBasicBlock::iterator InsertBefore); MachineBasicBlock::iterator mergeTBufferLoadPair(CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove); + MachineBasicBlock::iterator InsertBefore); MachineBasicBlock::iterator mergeTBufferStorePair(CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove); + MachineBasicBlock::iterator InsertBefore); + MachineBasicBlock::iterator + mergeFlatLoadPair(CombineInfo &CI, CombineInfo &Paired, + MachineBasicBlock::iterator InsertBefore); + MachineBasicBlock::iterator + mergeFlatStorePair(CombineInfo &CI, CombineInfo &Paired, + MachineBasicBlock::iterator InsertBefore); void updateBaseAndOffset(MachineInstr &I, Register NewBase, int32_t NewOffset) const; @@ -252,6 +273,12 @@ private: MemInfoMap &Visited, SmallPtrSet &AnchorList, std::list> &MergeableInsts) const; + static MachineMemOperand *combineKnownAdjacentMMOs(const CombineInfo &CI, + const CombineInfo &Paired); + + static InstClassEnum getCommonInstClass(const CombineInfo &CI, + const CombineInfo &Paired); + public: static char ID; @@ -298,10 +325,35 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { switch (Opc) { case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: + case AMDGPU::GLOBAL_LOAD_DWORD: + case AMDGPU::GLOBAL_LOAD_DWORD_SADDR: + case AMDGPU::GLOBAL_STORE_DWORD: + case AMDGPU::GLOBAL_STORE_DWORD_SADDR: + case AMDGPU::FLAT_LOAD_DWORD: + case AMDGPU::FLAT_STORE_DWORD: return 1; case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: + case AMDGPU::GLOBAL_LOAD_DWORDX2: + case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX2: + case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX2: + case AMDGPU::FLAT_STORE_DWORDX2: return 2; + case AMDGPU::GLOBAL_LOAD_DWORDX3: + case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX3: + case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX3: + case AMDGPU::FLAT_STORE_DWORDX3: + return 3; case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: + case AMDGPU::GLOBAL_LOAD_DWORDX4: + case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX4: + case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX4: + case AMDGPU::FLAT_STORE_DWORDX4: return 4; case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM: return 8; @@ -386,11 +438,40 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::DS_WRITE_B64: case AMDGPU::DS_WRITE_B64_gfx9: return DS_WRITE; + case AMDGPU::GLOBAL_LOAD_DWORD: + case AMDGPU::GLOBAL_LOAD_DWORDX2: + case AMDGPU::GLOBAL_LOAD_DWORDX3: + case AMDGPU::GLOBAL_LOAD_DWORDX4: + case AMDGPU::FLAT_LOAD_DWORD: + case AMDGPU::FLAT_LOAD_DWORDX2: + case AMDGPU::FLAT_LOAD_DWORDX3: + case AMDGPU::FLAT_LOAD_DWORDX4: + return FLAT_LOAD; + case AMDGPU::GLOBAL_LOAD_DWORD_SADDR: + case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR: + case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR: + case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR: + return GLOBAL_LOAD_SADDR; + case AMDGPU::GLOBAL_STORE_DWORD: + case AMDGPU::GLOBAL_STORE_DWORDX2: + case AMDGPU::GLOBAL_STORE_DWORDX3: + case AMDGPU::GLOBAL_STORE_DWORDX4: + case AMDGPU::FLAT_STORE_DWORD: + case AMDGPU::FLAT_STORE_DWORDX2: + case AMDGPU::FLAT_STORE_DWORDX3: + case AMDGPU::FLAT_STORE_DWORDX4: + return FLAT_STORE; + case AMDGPU::GLOBAL_STORE_DWORD_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: + return GLOBAL_STORE_SADDR; } } /// Determines instruction subclass from opcode. Only instructions -/// of the same subclass can be merged together. +/// of the same subclass can be merged together. The merged instruction may have +/// a different subclass but must have the same class. static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) { switch (Opc) { default: @@ -418,9 +499,55 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM: return AMDGPU::S_BUFFER_LOAD_DWORD_IMM; + case AMDGPU::GLOBAL_LOAD_DWORD: + case AMDGPU::GLOBAL_LOAD_DWORDX2: + case AMDGPU::GLOBAL_LOAD_DWORDX3: + case AMDGPU::GLOBAL_LOAD_DWORDX4: + case AMDGPU::FLAT_LOAD_DWORD: + case AMDGPU::FLAT_LOAD_DWORDX2: + case AMDGPU::FLAT_LOAD_DWORDX3: + case AMDGPU::FLAT_LOAD_DWORDX4: + return AMDGPU::FLAT_LOAD_DWORD; + case AMDGPU::GLOBAL_LOAD_DWORD_SADDR: + case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR: + case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR: + case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR: + return AMDGPU::GLOBAL_LOAD_DWORD_SADDR; + case AMDGPU::GLOBAL_STORE_DWORD: + case AMDGPU::GLOBAL_STORE_DWORDX2: + case AMDGPU::GLOBAL_STORE_DWORDX3: + case AMDGPU::GLOBAL_STORE_DWORDX4: + case AMDGPU::FLAT_STORE_DWORD: + case AMDGPU::FLAT_STORE_DWORDX2: + case AMDGPU::FLAT_STORE_DWORDX3: + case AMDGPU::FLAT_STORE_DWORDX4: + return AMDGPU::FLAT_STORE_DWORD; + case AMDGPU::GLOBAL_STORE_DWORD_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: + return AMDGPU::GLOBAL_STORE_DWORD_SADDR; } } +// GLOBAL loads and stores are classified as FLAT initially. If both combined +// instructions are FLAT GLOBAL adjust the class to GLOBAL_LOAD or GLOBAL_STORE. +// If either or both instructions are non segment specific FLAT the resulting +// combined operation will be FLAT, potentially promoting one of the GLOBAL +// operations to FLAT. +// For other instructions return the original unmodified class. +InstClassEnum +SILoadStoreOptimizer::getCommonInstClass(const CombineInfo &CI, + const CombineInfo &Paired) { + assert(CI.InstClass == Paired.InstClass); + + if ((CI.InstClass == FLAT_LOAD || CI.InstClass == FLAT_STORE) && + SIInstrInfo::isFLATGlobal(*CI.I) && SIInstrInfo::isFLATGlobal(*Paired.I)) + return (CI.InstClass == FLAT_STORE) ? GLOBAL_STORE : GLOBAL_LOAD; + + return CI.InstClass; +} + static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) { AddressRegs Result; @@ -480,6 +607,34 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::DS_WRITE_B64_gfx9: Result.Addr = true; return Result; + case AMDGPU::GLOBAL_LOAD_DWORD_SADDR: + case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR: + case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR: + case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR: + case AMDGPU::GLOBAL_STORE_DWORD_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: + Result.SAddr = true; + LLVM_FALLTHROUGH; + case AMDGPU::GLOBAL_LOAD_DWORD: + case AMDGPU::GLOBAL_LOAD_DWORDX2: + case AMDGPU::GLOBAL_LOAD_DWORDX3: + case AMDGPU::GLOBAL_LOAD_DWORDX4: + case AMDGPU::GLOBAL_STORE_DWORD: + case AMDGPU::GLOBAL_STORE_DWORDX2: + case AMDGPU::GLOBAL_STORE_DWORDX3: + case AMDGPU::GLOBAL_STORE_DWORDX4: + case AMDGPU::FLAT_LOAD_DWORD: + case AMDGPU::FLAT_LOAD_DWORDX2: + case AMDGPU::FLAT_LOAD_DWORDX3: + case AMDGPU::FLAT_LOAD_DWORDX4: + case AMDGPU::FLAT_STORE_DWORD: + case AMDGPU::FLAT_STORE_DWORDX2: + case AMDGPU::FLAT_STORE_DWORDX3: + case AMDGPU::FLAT_STORE_DWORDX4: + Result.VAddr = true; + return Result; } } @@ -551,6 +706,9 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI, if (Regs.SOffset) AddrIdx[NumAddresses++] = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset); + if (Regs.SAddr) + AddrIdx[NumAddresses++] = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr); if (Regs.VAddr) AddrIdx[NumAddresses++] = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr); @@ -579,92 +737,58 @@ FunctionPass *llvm::createSILoadStoreOptimizerPass() { return new SILoadStoreOptimizer(); } -static void moveInstsAfter(MachineBasicBlock::iterator I, - ArrayRef InstsToMove) { - MachineBasicBlock *MBB = I->getParent(); - ++I; - for (MachineInstr *MI : InstsToMove) { - MI->removeFromParent(); - MBB->insert(I, MI); - } -} - static void addDefsUsesToList(const MachineInstr &MI, DenseSet &RegDefs, - DenseSet &PhysRegUses) { - for (const MachineOperand &Op : MI.operands()) { - if (Op.isReg()) { - if (Op.isDef()) - RegDefs.insert(Op.getReg()); - else if (Op.readsReg() && Op.getReg().isPhysical()) - PhysRegUses.insert(Op.getReg()); - } - } -} - -static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A, - MachineBasicBlock::iterator B, - AliasAnalysis *AA) { - // RAW or WAR - cannot reorder - // WAW - cannot reorder - // RAR - safe to reorder - return !(A->mayStore() || B->mayStore()) || !A->mayAlias(AA, *B, true); -} - -// Add MI and its defs to the lists if MI reads one of the defs that are -// already in the list. Returns true in that case. -static bool addToListsIfDependent(MachineInstr &MI, DenseSet &RegDefs, - DenseSet &PhysRegUses, - SmallVectorImpl &Insts) { - for (MachineOperand &Use : MI.operands()) { - // If one of the defs is read, then there is a use of Def between I and the - // instruction that I will potentially be merged with. We will need to move - // this instruction after the merged instructions. - // - // Similarly, if there is a def which is read by an instruction that is to - // be moved for merging, then we need to move the def-instruction as well. - // This can only happen for physical registers such as M0; virtual - // registers are in SSA form. - if (Use.isReg() && ((Use.readsReg() && RegDefs.count(Use.getReg())) || - (Use.isDef() && RegDefs.count(Use.getReg())) || - (Use.isDef() && Use.getReg().isPhysical() && - PhysRegUses.count(Use.getReg())))) { - Insts.push_back(&MI); - addDefsUsesToList(MI, RegDefs, PhysRegUses); - return true; - } + DenseSet &RegUses) { + for (const auto &Op : MI.operands()) { + if (!Op.isReg()) + continue; + if (Op.isDef()) + RegDefs.insert(Op.getReg()); + if (Op.readsReg()) + RegUses.insert(Op.getReg()); } - - return false; } -static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp, - ArrayRef InstsToMove, - AliasAnalysis *AA) { - assert(MemOp.mayLoadOrStore()); - - for (MachineInstr *InstToMove : InstsToMove) { - if (!InstToMove->mayLoadOrStore()) +bool SILoadStoreOptimizer::canSwapInstructions( + const DenseSet &ARegDefs, const DenseSet &ARegUses, + const MachineInstr &A, const MachineInstr &B) const { + if (A.mayLoadOrStore() && B.mayLoadOrStore() && + (A.mayStore() || B.mayStore()) && A.mayAlias(AA, B, true)) + return false; + for (const auto &BOp : B.operands()) { + if (!BOp.isReg()) continue; - if (!memAccessesCanBeReordered(MemOp, *InstToMove, AA)) + if ((BOp.isDef() || BOp.readsReg()) && ARegDefs.contains(BOp.getReg())) + return false; + if (BOp.isDef() && ARegUses.contains(BOp.getReg())) return false; } return true; } -// This function assumes that \p A and \p B have are identical except for -// size and offset, and they reference adjacent memory. -static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF, - const MachineMemOperand *A, - const MachineMemOperand *B) { - unsigned MinOffset = std::min(A->getOffset(), B->getOffset()); - unsigned Size = A->getSize() + B->getSize(); - // This function adds the offset parameter to the existing offset for A, - // so we pass 0 here as the offset and then manually set it to the correct - // value after the call. - MachineMemOperand *MMO = MF.getMachineMemOperand(A, 0, Size); - MMO->setOffset(MinOffset); - return MMO; +// Given that \p CI and \p Paired are adjacent memory operations produce a new +// MMO for the combined operation with a new access size. +MachineMemOperand * +SILoadStoreOptimizer::combineKnownAdjacentMMOs(const CombineInfo &CI, + const CombineInfo &Paired) { + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); + + unsigned Size = MMOa->getSize() + MMOb->getSize(); + + // A base pointer for the combined operation is the same as the leading + // operation's pointer. + if (Paired < CI) + std::swap(MMOa, MMOb); + + MachinePointerInfo PtrInfo(MMOa->getPointerInfo()); + // If merging FLAT and GLOBAL set address space to FLAT. + if (MMOb->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS) + PtrInfo.AddrSpace = AMDGPUAS::FLAT_ADDRESS; + + MachineFunction *MF = CI.I->getMF(); + return MF->getMachineMemOperand(MMOa, PtrInfo, Size); } bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI, @@ -787,8 +911,7 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI, if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) { return (EltOffset0 + CI.Width == EltOffset1 || EltOffset1 + Paired.Width == EltOffset0) && - CI.CPol == Paired.CPol && - (CI.InstClass == S_BUFFER_LOAD_IMM || CI.CPol == Paired.CPol); + CI.CPol == Paired.CPol; } // If the offset in elements doesn't fit in 8-bits, we might be able to use @@ -889,111 +1012,59 @@ SILoadStoreOptimizer::getDataRegClass(const MachineInstr &MI) const { return nullptr; } -/// This function assumes that CI comes before Paired in a basic block. -bool SILoadStoreOptimizer::checkAndPrepareMerge( - CombineInfo &CI, CombineInfo &Paired, - SmallVectorImpl &InstsToMove) { +/// This function assumes that CI comes before Paired in a basic block. Return +/// an insertion point for the merged instruction or nullptr on failure. +SILoadStoreOptimizer::CombineInfo * +SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI, + CombineInfo &Paired) { + // If another instruction has already been merged into CI, it may now be a + // type that we can't do any further merging into. + if (CI.InstClass == UNKNOWN || Paired.InstClass == UNKNOWN) + return nullptr; + assert(CI.InstClass == Paired.InstClass); + + if (getInstSubclass(CI.I->getOpcode(), *TII) != + getInstSubclass(Paired.I->getOpcode(), *TII)) + return nullptr; // Check both offsets (or masks for MIMG) can be combined and fit in the // reduced range. - if (CI.InstClass == MIMG && !dmasksCanBeCombined(CI, *TII, Paired)) - return false; - - if (CI.InstClass != MIMG && - (!widthsFit(*STM, CI, Paired) || !offsetsCanBeCombined(CI, *STM, Paired))) - return false; - - const unsigned Opc = CI.I->getOpcode(); - const InstClassEnum InstClass = getInstClass(Opc, *TII); - - if (InstClass == UNKNOWN) { - return false; + if (CI.InstClass == MIMG) { + if (!dmasksCanBeCombined(CI, *TII, Paired)) + return nullptr; + } else { + if (!widthsFit(*STM, CI, Paired) || !offsetsCanBeCombined(CI, *STM, Paired)) + return nullptr; } - const unsigned InstSubclass = getInstSubclass(Opc, *TII); - - DenseSet RegDefsToMove; - DenseSet PhysRegUsesToMove; - addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove); - - MachineBasicBlock::iterator E = std::next(Paired.I); - MachineBasicBlock::iterator MBBI = std::next(CI.I); - MachineBasicBlock::iterator MBBE = CI.I->getParent()->end(); - for (; MBBI != E; ++MBBI) { - - if (MBBI == MBBE) { - // CombineInfo::Order is a hint on the instruction ordering within the - // basic block. This hint suggests that CI precedes Paired, which is - // true most of the time. However, moveInstsAfter() processing a - // previous list may have changed this order in a situation when it - // moves an instruction which exists in some other merge list. - // In this case it must be dependent. - return false; - } - - if ((getInstClass(MBBI->getOpcode(), *TII) != InstClass) || - (getInstSubclass(MBBI->getOpcode(), *TII) != InstSubclass)) { - // This is not a matching instruction, but we can keep looking as - // long as one of these conditions are met: - // 1. It is safe to move I down past MBBI. - // 2. It is safe to move MBBI down past the instruction that I will - // be merged into. - - if (MBBI->mayLoadOrStore() && - (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) || - !canMoveInstsAcrossMemOp(*MBBI, InstsToMove, AA))) { - // We fail condition #1, but we may still be able to satisfy condition - // #2. Add this instruction to the move list and then we will check - // if condition #2 holds once we have selected the matching instruction. - InstsToMove.push_back(&*MBBI); - addDefsUsesToList(*MBBI, RegDefsToMove, PhysRegUsesToMove); - continue; - } - // When we match I with another DS instruction we will be moving I down - // to the location of the matched instruction any uses of I will need to - // be moved down as well. - addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove, - InstsToMove); - continue; + DenseSet RegDefs; + DenseSet RegUses; + CombineInfo *Where; + if (CI.I->mayLoad()) { + // Try to hoist Paired up to CI. + addDefsUsesToList(*Paired.I, RegDefs, RegUses); + for (MachineBasicBlock::iterator MBBI = Paired.I; --MBBI != CI.I;) { + if (!canSwapInstructions(RegDefs, RegUses, *Paired.I, *MBBI)) + return nullptr; } - - // Handle a case like - // DS_WRITE_B32 addr, v, idx0 - // w = DS_READ_B32 addr, idx0 - // DS_WRITE_B32 addr, f(w), idx1 - // where the DS_READ_B32 ends up in InstsToMove and therefore prevents - // merging of the two writes. - if (addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove, - InstsToMove)) - continue; - - if (&*MBBI == &*Paired.I) { - // We need to go through the list of instructions that we plan to - // move and make sure they are all safe to move down past the merged - // instruction. - if (canMoveInstsAcrossMemOp(*MBBI, InstsToMove, AA)) { - - // Call offsetsCanBeCombined with modify = true so that the offsets are - // correct for the new instruction. This should return true, because - // this function should only be called on CombineInfo objects that - // have already been confirmed to be mergeable. - if (CI.InstClass != MIMG) - offsetsCanBeCombined(CI, *STM, Paired, true); - return true; - } - return false; + Where = &CI; + } else { + // Try to sink CI down to Paired. + addDefsUsesToList(*CI.I, RegDefs, RegUses); + for (MachineBasicBlock::iterator MBBI = CI.I; ++MBBI != Paired.I;) { + if (!canSwapInstructions(RegDefs, RegUses, *CI.I, *MBBI)) + return nullptr; } - - // We've found a load/store that we couldn't merge for some reason. - // We could potentially keep looking, but we'd need to make sure that - // it was safe to move I and also all the instruction in InstsToMove - // down past this instruction. - // check if we can move I across MBBI and if we can move all I's users - if (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) || - !canMoveInstsAcrossMemOp(*MBBI, InstsToMove, AA)) - break; + Where = &Paired; } - return false; + + // Call offsetsCanBeCombined with modify = true so that the offsets are + // correct for the new instruction. This should return true, because + // this function should only be called on CombineInfo objects that + // have already been confirmed to be mergeable. + if (CI.InstClass == DS_READ || CI.InstClass == DS_WRITE) + offsetsCanBeCombined(CI, *STM, Paired, true); + return Where; } unsigned SILoadStoreOptimizer::read2Opcode(unsigned EltSize) const { @@ -1012,7 +1083,7 @@ unsigned SILoadStoreOptimizer::read2ST64Opcode(unsigned EltSize) const { MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove) { + MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); // Be careful, since the addresses could be subregisters themselves in weird @@ -1051,13 +1122,13 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired, unsigned BaseRegFlags = 0; if (CI.BaseOff) { Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(*MBB, Paired.I, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg) + BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg) .addImm(CI.BaseOff); BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); BaseRegFlags = RegState::Kill; - TII->getAddNoCarry(*MBB, Paired.I, DL, BaseReg) + TII->getAddNoCarry(*MBB, InsertBefore, DL, BaseReg) .addReg(ImmReg) .addReg(AddrReg->getReg(), 0, BaseSubReg) .addImm(0); // clamp bit @@ -1065,7 +1136,7 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired, } MachineInstrBuilder Read2 = - BuildMI(*MBB, Paired.I, DL, Read2Desc, DestReg) + BuildMI(*MBB, InsertBefore, DL, Read2Desc, DestReg) .addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr .addImm(NewOffset0) // offset0 .addImm(NewOffset1) // offset1 @@ -1077,14 +1148,12 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired, const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); // Copy to the old destination registers. - BuildMI(*MBB, Paired.I, DL, CopyDesc) + BuildMI(*MBB, InsertBefore, DL, CopyDesc) .add(*Dest0) // Copy to same destination including flags and sub reg. .addReg(DestReg, 0, SubRegIdx0); - MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc) - .add(*Dest1) - .addReg(DestReg, RegState::Kill, SubRegIdx1); - - moveInstsAfter(Copy1, InstsToMove); + BuildMI(*MBB, InsertBefore, DL, CopyDesc) + .add(*Dest1) + .addReg(DestReg, RegState::Kill, SubRegIdx1); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1109,9 +1178,9 @@ unsigned SILoadStoreOptimizer::write2ST64Opcode(unsigned EltSize) const { : AMDGPU::DS_WRITE2ST64_B64_gfx9; } -MachineBasicBlock::iterator -SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove) { +MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( + CombineInfo &CI, CombineInfo &Paired, + MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); // Be sure to use .addOperand(), and not .addReg() with these. We want to be @@ -1145,13 +1214,13 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired, unsigned BaseRegFlags = 0; if (CI.BaseOff) { Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(*MBB, Paired.I, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg) + BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg) .addImm(CI.BaseOff); BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); BaseRegFlags = RegState::Kill; - TII->getAddNoCarry(*MBB, Paired.I, DL, BaseReg) + TII->getAddNoCarry(*MBB, InsertBefore, DL, BaseReg) .addReg(ImmReg) .addReg(AddrReg->getReg(), 0, BaseSubReg) .addImm(0); // clamp bit @@ -1159,7 +1228,7 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired, } MachineInstrBuilder Write2 = - BuildMI(*MBB, Paired.I, DL, Write2Desc) + BuildMI(*MBB, InsertBefore, DL, Write2Desc) .addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr .add(*Data0) // data0 .add(*Data1) // data1 @@ -1168,8 +1237,6 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired, .addImm(0) // gds .cloneMergedMemRefs({&*CI.I, &*Paired.I}); - moveInstsAfter(Write2, InstsToMove); - CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1179,7 +1246,7 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove) { + MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); DebugLoc DL = CI.I->getDebugLoc(); const unsigned Opcode = getNewOpcode(CI, Paired); @@ -1191,7 +1258,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, unsigned DMaskIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::dmask); - auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg); + auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg); for (unsigned I = 1, E = (*CI.I).getNumOperands(); I != E; ++I) { if (I == DMaskIdx) MIB.addImm(MergedDMask); @@ -1204,10 +1271,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - - MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); unsigned SubRegIdx0, SubRegIdx1; std::tie(SubRegIdx0, SubRegIdx1) = getSubRegIdxs(CI, Paired); @@ -1217,14 +1281,12 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata); - BuildMI(*MBB, Paired.I, DL, CopyDesc) + BuildMI(*MBB, InsertBefore, DL, CopyDesc) .add(*Dest0) // Copy to same destination including flags and sub reg. .addReg(DestReg, 0, SubRegIdx0); - MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc) - .add(*Dest1) - .addReg(DestReg, RegState::Kill, SubRegIdx1); - - moveInstsAfter(Copy1, InstsToMove); + BuildMI(*MBB, InsertBefore, DL, CopyDesc) + .add(*Dest1) + .addReg(DestReg, RegState::Kill, SubRegIdx1); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1233,7 +1295,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair( CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove) { + MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); DebugLoc DL = CI.I->getDebugLoc(); const unsigned Opcode = getNewOpcode(CI, Paired); @@ -1248,15 +1310,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = - BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg) - .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase)) - .addImm(MergedOffset) // offset - .addImm(CI.CPol) // cpol - .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg) + .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase)) + .addImm(MergedOffset) // offset + .addImm(CI.CPol) // cpol + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); std::pair SubRegIdx = getSubRegIdxs(CI, Paired); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -1267,14 +1326,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair( const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::sdst); const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::sdst); - BuildMI(*MBB, Paired.I, DL, CopyDesc) + BuildMI(*MBB, InsertBefore, DL, CopyDesc) .add(*Dest0) // Copy to same destination including flags and sub reg. .addReg(DestReg, 0, SubRegIdx0); - MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc) - .add(*Dest1) - .addReg(DestReg, RegState::Kill, SubRegIdx1); - - moveInstsAfter(Copy1, InstsToMove); + BuildMI(*MBB, InsertBefore, DL, CopyDesc) + .add(*Dest1) + .addReg(DestReg, RegState::Kill, SubRegIdx1); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1283,7 +1340,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair( MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove) { + MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); DebugLoc DL = CI.I->getDebugLoc(); @@ -1295,7 +1352,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( Register DestReg = MRI->createVirtualRegister(SuperRC); unsigned MergedOffset = std::min(CI.Offset, Paired.Offset); - auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg); + auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg); AddressRegs Regs = getRegs(Opcode, *TII); @@ -1307,9 +1364,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) @@ -1317,7 +1371,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( .addImm(CI.CPol) // cpol .addImm(0) // tfe .addImm(0) // swz - .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); std::pair SubRegIdx = getSubRegIdxs(CI, Paired); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -1328,14 +1382,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata); - BuildMI(*MBB, Paired.I, DL, CopyDesc) + BuildMI(*MBB, InsertBefore, DL, CopyDesc) .add(*Dest0) // Copy to same destination including flags and sub reg. .addReg(DestReg, 0, SubRegIdx0); - MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc) - .add(*Dest1) - .addReg(DestReg, RegState::Kill, SubRegIdx1); - - moveInstsAfter(Copy1, InstsToMove); + BuildMI(*MBB, InsertBefore, DL, CopyDesc) + .add(*Dest1) + .addReg(DestReg, RegState::Kill, SubRegIdx1); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1344,7 +1396,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove) { + MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); DebugLoc DL = CI.I->getDebugLoc(); @@ -1356,7 +1408,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( Register DestReg = MRI->createVirtualRegister(SuperRC); unsigned MergedOffset = std::min(CI.Offset, Paired.Offset); - auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg); + auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg); AddressRegs Regs = getRegs(Opcode, *TII); @@ -1371,9 +1423,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) @@ -1382,8 +1431,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( .addImm(CI.CPol) // cpol .addImm(0) // tfe .addImm(0) // swz - .addMemOperand( - combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); std::pair SubRegIdx = getSubRegIdxs(CI, Paired); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -1394,14 +1442,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata); - BuildMI(*MBB, Paired.I, DL, CopyDesc) + BuildMI(*MBB, InsertBefore, DL, CopyDesc) .add(*Dest0) // Copy to same destination including flags and sub reg. .addReg(DestReg, 0, SubRegIdx0); - MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc) - .add(*Dest1) - .addReg(DestReg, RegState::Kill, SubRegIdx1); - - moveInstsAfter(Copy1, InstsToMove); + BuildMI(*MBB, InsertBefore, DL, CopyDesc) + .add(*Dest1) + .addReg(DestReg, RegState::Kill, SubRegIdx1); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1410,7 +1456,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair( CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove) { + MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); DebugLoc DL = CI.I->getDebugLoc(); @@ -1427,13 +1473,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair( const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); const auto *Src1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata); - BuildMI(*MBB, Paired.I, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) + BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) .add(*Src0) .addImm(SubRegIdx0) .add(*Src1) .addImm(SubRegIdx1); - auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode)) + auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode)) .addReg(SrcReg, RegState::Kill); AddressRegs Regs = getRegs(Opcode, *TII); @@ -1449,9 +1495,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) @@ -1460,10 +1503,92 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair( .addImm(CI.CPol) // cpol .addImm(0) // tfe .addImm(0) // swz - .addMemOperand( - combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); + + CI.I->eraseFromParent(); + Paired.I->eraseFromParent(); + return New; +} + +MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair( + CombineInfo &CI, CombineInfo &Paired, + MachineBasicBlock::iterator InsertBefore) { + MachineBasicBlock *MBB = CI.I->getParent(); + DebugLoc DL = CI.I->getDebugLoc(); + + const unsigned Opcode = getNewOpcode(CI, Paired); + + const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired); + Register DestReg = MRI->createVirtualRegister(SuperRC); + + auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg); + + if (auto *SAddr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::saddr)) + MIB.add(*SAddr); + + MachineInstr *New = + MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)) + .addImm(std::min(CI.Offset, Paired.Offset)) + .addImm(CI.CPol) + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); + + std::pair SubRegIdx = getSubRegIdxs(CI, Paired); + const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); + const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); + + // Copy to the old destination registers. + const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdst); + const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdst); - moveInstsAfter(MIB, InstsToMove); + BuildMI(*MBB, InsertBefore, DL, CopyDesc) + .add(*Dest0) // Copy to same destination including flags and sub reg. + .addReg(DestReg, 0, SubRegIdx0); + BuildMI(*MBB, InsertBefore, DL, CopyDesc) + .add(*Dest1) + .addReg(DestReg, RegState::Kill, SubRegIdx1); + + CI.I->eraseFromParent(); + Paired.I->eraseFromParent(); + return New; +} + +MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatStorePair( + CombineInfo &CI, CombineInfo &Paired, + MachineBasicBlock::iterator InsertBefore) { + MachineBasicBlock *MBB = CI.I->getParent(); + DebugLoc DL = CI.I->getDebugLoc(); + + const unsigned Opcode = getNewOpcode(CI, Paired); + + std::pair SubRegIdx = getSubRegIdxs(CI, Paired); + const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); + const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); + + // Copy to the new source register. + const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired); + Register SrcReg = MRI->createVirtualRegister(SuperRC); + + const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); + const auto *Src1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata); + + BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) + .add(*Src0) + .addImm(SubRegIdx0) + .add(*Src1) + .addImm(SubRegIdx1); + + auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode)) + .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)) + .addReg(SrcReg, RegState::Kill); + + if (auto *SAddr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::saddr)) + MIB.add(*SAddr); + + MachineInstr *New = + MIB.addImm(std::min(CI.Offset, Paired.Offset)) + .addImm(CI.CPol) + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1474,7 +1599,7 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI, const CombineInfo &Paired) { const unsigned Width = CI.Width + Paired.Width; - switch (CI.InstClass) { + switch (getCommonInstClass(CI, Paired)) { default: assert(CI.InstClass == BUFFER_LOAD || CI.InstClass == BUFFER_STORE); // FIXME: Handle d16 correctly @@ -1498,6 +1623,72 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI, case 8: return AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM; } + case GLOBAL_LOAD: + switch (Width) { + default: + return 0; + case 2: + return AMDGPU::GLOBAL_LOAD_DWORDX2; + case 3: + return AMDGPU::GLOBAL_LOAD_DWORDX3; + case 4: + return AMDGPU::GLOBAL_LOAD_DWORDX4; + } + case GLOBAL_LOAD_SADDR: + switch (Width) { + default: + return 0; + case 2: + return AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR; + case 3: + return AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR; + case 4: + return AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR; + } + case GLOBAL_STORE: + switch (Width) { + default: + return 0; + case 2: + return AMDGPU::GLOBAL_STORE_DWORDX2; + case 3: + return AMDGPU::GLOBAL_STORE_DWORDX3; + case 4: + return AMDGPU::GLOBAL_STORE_DWORDX4; + } + case GLOBAL_STORE_SADDR: + switch (Width) { + default: + return 0; + case 2: + return AMDGPU::GLOBAL_STORE_DWORDX2_SADDR; + case 3: + return AMDGPU::GLOBAL_STORE_DWORDX3_SADDR; + case 4: + return AMDGPU::GLOBAL_STORE_DWORDX4_SADDR; + } + case FLAT_LOAD: + switch (Width) { + default: + return 0; + case 2: + return AMDGPU::FLAT_LOAD_DWORDX2; + case 3: + return AMDGPU::FLAT_LOAD_DWORDX3; + case 4: + return AMDGPU::FLAT_LOAD_DWORDX4; + } + case FLAT_STORE: + switch (Width) { + default: + return 0; + case 2: + return AMDGPU::FLAT_STORE_DWORDX2; + case 3: + return AMDGPU::FLAT_STORE_DWORDX3; + case 4: + return AMDGPU::FLAT_STORE_DWORDX4; + } case MIMG: assert((countPopulation(CI.DMask | Paired.DMask) == Width) && "No overlaps"); @@ -1508,15 +1699,9 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI, std::pair SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI, const CombineInfo &Paired) { - bool ReverseOrder; - if (CI.InstClass == MIMG) { - assert( - (countPopulation(CI.DMask | Paired.DMask) == CI.Width + Paired.Width) && - "No overlaps"); - ReverseOrder = CI.DMask > Paired.DMask; - } else { - ReverseOrder = CI.Offset > Paired.Offset; - } + assert((CI.InstClass != MIMG || (countPopulation(CI.DMask | Paired.DMask) == + CI.Width + Paired.Width)) && + "No overlaps"); unsigned Idx0; unsigned Idx1; @@ -1532,7 +1717,7 @@ SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI, assert(CI.Width >= 1 && CI.Width <= 4); assert(Paired.Width >= 1 && Paired.Width <= 4); - if (ReverseOrder) { + if (Paired < CI) { Idx1 = Idxs[0][Paired.Width - 1]; Idx0 = Idxs[Paired.Width][CI.Width - 1]; } else { @@ -1569,7 +1754,7 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI, MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair( CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove) { + MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); DebugLoc DL = CI.I->getDebugLoc(); @@ -1586,13 +1771,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair( const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); const auto *Src1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata); - BuildMI(*MBB, Paired.I, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) + BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) .add(*Src0) .addImm(SubRegIdx0) .add(*Src1) .addImm(SubRegIdx1); - auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode)) + auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode)) .addReg(SrcReg, RegState::Kill); AddressRegs Regs = getRegs(Opcode, *TII); @@ -1606,9 +1791,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) @@ -1616,9 +1798,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair( .addImm(CI.CPol) // cpol .addImm(0) // tfe .addImm(0) // swz - .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); - - moveInstsAfter(MIB, InstsToMove); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1846,7 +2026,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm( // from which (&a + 4096) has 13 bit distance. Both &a + 6144 and &a + 8192 // has 13bit distance from &a + 4096. The heuristic considers &a + 8192 // as the new-base(anchor) because of the maximum distance which can - // accomodate more intermediate bases presumeably. + // accommodate more intermediate bases presumably. // // Step3: move (&a + 8192) above load1. Compute and promote offsets from // (&a + 8192) for load1, load2, load4. @@ -2098,8 +2278,8 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr( CombineInfo &CI = *First; CombineInfo &Paired = *Second; - SmallVector InstsToMove; - if (!checkAndPrepareMerge(CI, Paired, InstsToMove)) { + CombineInfo *Where = checkAndPrepareMerge(CI, Paired); + if (!Where) { ++I; continue; } @@ -2108,66 +2288,56 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr( LLVM_DEBUG(dbgs() << "Merging: " << *CI.I << " with: " << *Paired.I); + MachineBasicBlock::iterator NewMI; switch (CI.InstClass) { default: llvm_unreachable("unknown InstClass"); break; - case DS_READ: { - MachineBasicBlock::iterator NewMI = - mergeRead2Pair(CI, Paired, InstsToMove); - CI.setMI(NewMI, *this); + case DS_READ: + NewMI = mergeRead2Pair(CI, Paired, Where->I); break; - } - case DS_WRITE: { - MachineBasicBlock::iterator NewMI = - mergeWrite2Pair(CI, Paired, InstsToMove); - CI.setMI(NewMI, *this); + case DS_WRITE: + NewMI = mergeWrite2Pair(CI, Paired, Where->I); break; - } - case S_BUFFER_LOAD_IMM: { - MachineBasicBlock::iterator NewMI = - mergeSBufferLoadImmPair(CI, Paired, InstsToMove); - CI.setMI(NewMI, *this); - OptimizeListAgain |= (CI.Width + Paired.Width) < 8; + case S_BUFFER_LOAD_IMM: + NewMI = mergeSBufferLoadImmPair(CI, Paired, Where->I); + OptimizeListAgain |= CI.Width + Paired.Width < 8; break; - } - case BUFFER_LOAD: { - MachineBasicBlock::iterator NewMI = - mergeBufferLoadPair(CI, Paired, InstsToMove); - CI.setMI(NewMI, *this); - OptimizeListAgain |= (CI.Width + Paired.Width) < 4; + case BUFFER_LOAD: + NewMI = mergeBufferLoadPair(CI, Paired, Where->I); + OptimizeListAgain |= CI.Width + Paired.Width < 4; break; - } - case BUFFER_STORE: { - MachineBasicBlock::iterator NewMI = - mergeBufferStorePair(CI, Paired, InstsToMove); - CI.setMI(NewMI, *this); - OptimizeListAgain |= (CI.Width + Paired.Width) < 4; + case BUFFER_STORE: + NewMI = mergeBufferStorePair(CI, Paired, Where->I); + OptimizeListAgain |= CI.Width + Paired.Width < 4; break; - } - case MIMG: { - MachineBasicBlock::iterator NewMI = - mergeImagePair(CI, Paired, InstsToMove); - CI.setMI(NewMI, *this); - OptimizeListAgain |= (CI.Width + Paired.Width) < 4; + case MIMG: + NewMI = mergeImagePair(CI, Paired, Where->I); + OptimizeListAgain |= CI.Width + Paired.Width < 4; break; - } - case TBUFFER_LOAD: { - MachineBasicBlock::iterator NewMI = - mergeTBufferLoadPair(CI, Paired, InstsToMove); - CI.setMI(NewMI, *this); - OptimizeListAgain |= (CI.Width + Paired.Width) < 4; + case TBUFFER_LOAD: + NewMI = mergeTBufferLoadPair(CI, Paired, Where->I); + OptimizeListAgain |= CI.Width + Paired.Width < 4; break; - } - case TBUFFER_STORE: { - MachineBasicBlock::iterator NewMI = - mergeTBufferStorePair(CI, Paired, InstsToMove); - CI.setMI(NewMI, *this); - OptimizeListAgain |= (CI.Width + Paired.Width) < 4; + case TBUFFER_STORE: + NewMI = mergeTBufferStorePair(CI, Paired, Where->I); + OptimizeListAgain |= CI.Width + Paired.Width < 4; + break; + case FLAT_LOAD: + case GLOBAL_LOAD: + case GLOBAL_LOAD_SADDR: + NewMI = mergeFlatLoadPair(CI, Paired, Where->I); + OptimizeListAgain |= CI.Width + Paired.Width < 4; + break; + case FLAT_STORE: + case GLOBAL_STORE: + case GLOBAL_STORE_SADDR: + NewMI = mergeFlatStorePair(CI, Paired, Where->I); + OptimizeListAgain |= CI.Width + Paired.Width < 4; break; } - } - CI.Order = Paired.Order; + CI.setMI(NewMI, *this); + CI.Order = Where->Order; if (I == Second) I = Next; diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index e1018bdfde46..607383ab8cde 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -509,8 +509,35 @@ MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) { BuildMI(MBB, InsPt, DL, TII->get(Opcode), Exec) .addReg(Exec) .add(MI.getOperand(0)); - if (LV) - LV->replaceKillInstruction(MI.getOperand(0).getReg(), MI, *NewMI); + if (LV) { + LV->replaceKillInstruction(DataReg, MI, *NewMI); + + if (SplitBB != &MBB) { + // Track the set of registers defined in the split block so we don't + // accidentally add the original block to AliveBlocks. + DenseSet SplitDefs; + for (MachineInstr &X : *SplitBB) { + for (MachineOperand &Op : X.operands()) { + if (Op.isReg() && Op.isDef() && Op.getReg().isVirtual()) + SplitDefs.insert(Op.getReg()); + } + } + + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + Register Reg = Register::index2VirtReg(i); + LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); + + if (VI.AliveBlocks.test(MBB.getNumber())) + VI.AliveBlocks.set(SplitBB->getNumber()); + else { + for (MachineInstr *Kill : VI.Kills) { + if (Kill->getParent() == SplitBB && !SplitDefs.contains(Reg)) + VI.AliveBlocks.set(MBB.getNumber()); + } + } + } + } + } LoweredEndCf.insert(NewMI); @@ -540,7 +567,7 @@ void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo, return; // Make sure we do not modify exec between def and use. - // A copy with implcitly defined exec inserted earlier is an exclusion, it + // A copy with implicitly defined exec inserted earlier is an exclusion, it // does not really modify exec. for (auto I = Def->getIterator(); I != MI.getIterator(); ++I) if (I->modifiesRegister(AMDGPU::EXEC, TRI) && @@ -573,14 +600,14 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) { else return; Register Reg = MI.getOperand(OpToReplace).getReg(); - MI.RemoveOperand(OpToReplace); + MI.removeOperand(OpToReplace); MI.addOperand(Ops[UniqueOpndIdx]); if (MRI->use_empty(Reg)) MRI->getUniqueVRegDef(Reg)->eraseFromParent(); } void SILowerControlFlow::optimizeEndCf() { - // If the only instruction immediately following this END_CF is an another + // If the only instruction immediately following this END_CF is another // END_CF in the only successor we can avoid emitting exec mask restore here. if (!EnableOptimizeEndCf) return; @@ -865,6 +892,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { } } + bool Changed = false; MachineFunction::iterator NextBB; for (MachineFunction::iterator BI = MF.begin(); BI != MF.end(); BI = NextBB) { @@ -886,6 +914,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::SI_LOOP: case AMDGPU::SI_END_CF: SplitMBB = process(MI); + Changed = true; break; // FIXME: find a better place for this @@ -894,6 +923,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { lowerInitExec(MBB, MI); if (LIS) LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC); + Changed = true; break; default: @@ -913,5 +943,5 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { LoweredIf.clear(); KillBlocks.clear(); - return true; + return Changed; } diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp index 672266f0c11e..5fb545b50228 100644 --- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -79,9 +79,9 @@ public: } private: - void lowerCopiesFromI1(); - void lowerPhis(); - void lowerCopiesToI1(); + bool lowerCopiesFromI1(); + bool lowerPhis(); + bool lowerCopiesToI1(); bool isConstantLaneMask(Register Reg, bool &Val) const; void buildMergeLaneMasks(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, @@ -473,15 +473,17 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) { OrN2Op = AMDGPU::S_ORN2_B64; } - lowerCopiesFromI1(); - lowerPhis(); - lowerCopiesToI1(); + bool Changed = false; + Changed |= lowerCopiesFromI1(); + Changed |= lowerPhis(); + Changed |= lowerCopiesToI1(); + assert(Changed || ConstrainRegs.empty()); for (unsigned Reg : ConstrainRegs) MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass); ConstrainRegs.clear(); - return true; + return Changed; } #ifndef NDEBUG @@ -493,7 +495,8 @@ static bool isVRegCompatibleReg(const SIRegisterInfo &TRI, } #endif -void SILowerI1Copies::lowerCopiesFromI1() { +bool SILowerI1Copies::lowerCopiesFromI1() { + bool Changed = false; SmallVector DeadCopies; for (MachineBasicBlock &MBB : *MF) { @@ -509,6 +512,8 @@ void SILowerI1Copies::lowerCopiesFromI1() { if (isLaneMaskReg(DstReg) || isVreg1(DstReg)) continue; + Changed = true; + // Copy into a 32-bit vector register. LLVM_DEBUG(dbgs() << "Lower copy from i1: " << MI); DebugLoc DL = MI.getDebugLoc(); @@ -530,9 +535,10 @@ void SILowerI1Copies::lowerCopiesFromI1() { MI->eraseFromParent(); DeadCopies.clear(); } + return Changed; } -void SILowerI1Copies::lowerPhis() { +bool SILowerI1Copies::lowerPhis() { MachineSSAUpdater SSAUpdater(*MF); LoopFinder LF(*DT, *PDT); PhiIncomingAnalysis PIA(*PDT); @@ -550,6 +556,8 @@ void SILowerI1Copies::lowerPhis() { Vreg1Phis.push_back(&MI); } } + if (Vreg1Phis.empty()) + return false; MachineBasicBlock *PrevMBB = nullptr; for (MachineInstr *MI : Vreg1Phis) { @@ -662,9 +670,11 @@ void SILowerI1Copies::lowerPhis() { IncomingRegs.clear(); IncomingUpdated.clear(); } + return true; } -void SILowerI1Copies::lowerCopiesToI1() { +bool SILowerI1Copies::lowerCopiesToI1() { + bool Changed = false; MachineSSAUpdater SSAUpdater(*MF); LoopFinder LF(*DT, *PDT); SmallVector DeadCopies; @@ -681,6 +691,8 @@ void SILowerI1Copies::lowerCopiesToI1() { if (!isVreg1(DstReg)) continue; + Changed = true; + if (MRI->use_empty(DstReg)) { DeadCopies.push_back(&MI); continue; @@ -731,6 +743,7 @@ void SILowerI1Copies::lowerCopiesToI1() { MI->eraseFromParent(); DeadCopies.clear(); } + return Changed; } bool SILowerI1Copies::isConstantLaneMask(Register Reg, bool &Val) const { diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 0fbdbef6fcce..dd881ec42d53 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -20,6 +20,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/InitializePasses.h" @@ -79,6 +80,8 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *RI = ST.getRegisterInfo(); MachineBasicBlock::iterator I = SaveBlock.begin(); if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { @@ -89,8 +92,8 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, MCRegister Reg = CS.getReg(); MachineInstrSpan MIS(I, &SaveBlock); - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(Reg, MVT::i32); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( + Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); // If this value was already livein, we probably have a direct use of the // incoming register value, so don't kill at the spill point. This happens @@ -119,7 +122,8 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *RI = ST.getRegisterInfo(); // Restore all registers immediately before the return and any // terminators that precede it. MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); @@ -128,8 +132,8 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { for (const CalleeSavedInfo &CI : reverse(CSI)) { Register Reg = CI.getReg(); - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(Reg, MVT::i32); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( + Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); assert(I != RestoreBlock.begin() && @@ -321,7 +325,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { // free frame index ids by the later pass(es) like "stack slot coloring" // which in turn could mess-up with the book keeping of "frame index to VGPR // lane". - FuncInfo->removeDeadFrameIndices(MFI); + FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false); MadeChange = true; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index cca8565c9ff9..0504c59ebd9e 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -31,6 +31,9 @@ using namespace llvm; SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), + BufferPSV(static_cast(MF.getTarget())), + ImagePSV(static_cast(MF.getTarget())), + GWSResourcePSV(static_cast(MF.getTarget())), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), @@ -48,8 +51,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) ImplicitBufferPtr(false), ImplicitArgPtr(false), GITPtrHigh(0xffffffff), - HighBitsOf32BitAddress(0), - GDSSize(0) { + HighBitsOf32BitAddress(0) { const GCNSubtarget &ST = MF.getSubtarget(); const Function &F = MF.getFunction(); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); @@ -74,6 +76,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) PSInputAddr = AMDGPU::getInitialPSInputAddr(F); } + MayNeedAGPRs = ST.hasMAIInsts(); + if (!isEntryFunction()) { if (CC != CallingConv::AMDGPU_Gfx) ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo; @@ -97,6 +101,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) ImplicitArgPtr = false; MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(), MaxKernArgAlign); + + if (ST.hasGFX90AInsts() && + ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && + !mayUseAGPRs(MF)) + MayNeedAGPRs = false; // We will select all MAI with VGPR operands. } bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); @@ -177,9 +186,20 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (!S.empty()) S.consumeInteger(0, HighBitsOf32BitAddress); - S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); - if (!S.empty()) - S.consumeInteger(0, GDSSize); + // On GFX908, in order to guarantee copying between AGPRs, we need a scratch + // VGPR available at all times. For now, reserve highest available VGPR. After + // RA, shift it to the lowest available unused VGPR if the one exist. + if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { + VGPRForAGPRCopy = + AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1); + } +} + +MachineFunctionInfo *SIMachineFunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const { + return DestMF.cloneInfo(*this); } void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { @@ -265,7 +285,7 @@ bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF, /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, int FI) { - std::vector &SpillLanes = SGPRToVGPRSpills[FI]; + std::vector &SpillLanes = SGPRToVGPRSpills[FI]; // This has already been allocated. if (!SpillLanes.empty()) @@ -320,7 +340,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI)); - // Add this register as live-in to all blocks to avoid machine verifer + // Add this register as live-in to all blocks to avoid machine verifier // complaining about use of an undefined physical register. for (MachineBasicBlock &BB : MF) BB.addLiveIn(LaneVGPR); @@ -328,7 +348,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, LaneVGPR = SpillVGPRs.back().VGPR; } - SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); + SpillLanes.push_back(SIRegisterInfo::SpilledReg(LaneVGPR, VGPRIndex)); } return true; @@ -402,7 +422,8 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, return Spill.FullyAllocated; } -void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { +bool SIMachineFunctionInfo::removeDeadFrameIndices( + MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) { // Remove dead frame indices from function frame, however keep FP & BP since // spills for them haven't been inserted yet. And also make sure to remove the // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could @@ -415,17 +436,42 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { } } - // All other SPGRs must be allocated on the default stack, so reset the stack - // ID. - for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; - ++i) - if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) - MFI.setStackID(i, TargetStackID::Default); + bool HaveSGPRToMemory = false; + + if (ResetSGPRSpillStackIDs) { + // All other SPGRs must be allocated on the default stack, so reset the + // stack ID. + for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; + ++i) { + if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) { + if (MFI.getStackID(i) == TargetStackID::SGPRSpill) { + MFI.setStackID(i, TargetStackID::Default); + HaveSGPRToMemory = true; + } + } + } + } for (auto &R : VGPRToAGPRSpills) { if (R.second.IsDead) MFI.RemoveStackObject(R.first); } + + return HaveSGPRToMemory; +} + +void SIMachineFunctionInfo::allocateWWMReservedSpillSlots( + MachineFrameInfo &MFI, const SIRegisterInfo &TRI) { + assert(WWMReservedFrameIndexes.empty()); + + WWMReservedFrameIndexes.resize(WWMReservedRegs.size()); + + int I = 0; + for (Register VGPR : WWMReservedRegs) { + const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR); + WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject( + TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC)); + } } int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI, @@ -539,6 +585,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( const llvm::MachineFunction &MF) : ExplicitKernArgSize(MFI.getExplicitKernArgSize()), MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()), + GDSSize(MFI.getGDSSize()), DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()), NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()), @@ -549,7 +596,14 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), + BytesInStackArgArea(MFI.getBytesInStackArgArea()), + ReturnsVoid(MFI.returnsVoid()), ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) { + for (Register Reg : MFI.WWMReservedRegs) + WWMReservedRegs.push_back(regToString(Reg, TRI)); + + if (MFI.getVGPRForAGPRCopy()) + VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI); auto SFI = MFI.getOptionalScavengeFI(); if (SFI) ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo()); @@ -563,8 +617,9 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) { ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; - MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign); + MaxKernArgAlign = YamlMFI.MaxKernArgAlign; LDSSize = YamlMFI.LDSSize; + GDSSize = YamlMFI.GDSSize; DynLDSAlign = YamlMFI.DynLDSAlign; HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; Occupancy = YamlMFI.Occupancy; @@ -574,6 +629,8 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( WaveLimiter = YamlMFI.WaveLimiter; HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs; HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs; + BytesInStackArgArea = YamlMFI.BytesInStackArgArea; + ReturnsVoid = YamlMFI.ReturnsVoid; if (YamlMFI.ScavengeFI) { auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo()); @@ -595,10 +652,47 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( return false; } +bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const { + for (const BasicBlock &BB : MF.getFunction()) { + for (const Instruction &I : BB) { + const auto *CB = dyn_cast(&I); + if (!CB) + continue; + + if (CB->isInlineAsm()) { + const InlineAsm *IA = dyn_cast(CB->getCalledOperand()); + for (const auto &CI : IA->ParseConstraints()) { + for (StringRef Code : CI.Codes) { + Code.consume_front("{"); + if (Code.startswith("a")) + return true; + } + } + continue; + } + + const Function *Callee = + dyn_cast(CB->getCalledOperand()->stripPointerCasts()); + if (!Callee) + return true; + + if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic) + return true; + } + } + + return false; +} + bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const { if (UsesAGPRs) return *UsesAGPRs; + if (!mayNeedAGPRs()) { + UsesAGPRs = false; + return false; + } + if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) || MF.getFrameInfo().hasCalls()) { UsesAGPRs = true; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 8e821274bb77..bebb13cbf09f 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -15,9 +15,10 @@ #include "AMDGPUArgumentUsageInfo.h" #include "AMDGPUMachineFunction.h" +#include "AMDGPUTargetMachine.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIInstrInfo.h" -#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/raw_ostream.h" @@ -39,8 +40,8 @@ public: }; protected: - AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII) - : PseudoSourceValue(Kind, TII) {} + AMDGPUPseudoSourceValue(unsigned Kind, const AMDGPUTargetMachine &TM) + : PseudoSourceValue(Kind, TM) {} public: bool isConstant(const MachineFrameInfo *) const override { @@ -60,8 +61,8 @@ public: class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue { public: - explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) - : AMDGPUPseudoSourceValue(PSVBuffer, TII) {} + explicit AMDGPUBufferPseudoSourceValue(const AMDGPUTargetMachine &TM) + : AMDGPUPseudoSourceValue(PSVBuffer, TM) {} static bool classof(const PseudoSourceValue *V) { return V->kind() == PSVBuffer; @@ -73,8 +74,8 @@ public: class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue { public: // TODO: Is the img rsrc useful? - explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) - : AMDGPUPseudoSourceValue(PSVImage, TII) {} + explicit AMDGPUImagePseudoSourceValue(const AMDGPUTargetMachine &TM) + : AMDGPUPseudoSourceValue(PSVImage, TM) {} static bool classof(const PseudoSourceValue *V) { return V->kind() == PSVImage; @@ -85,8 +86,8 @@ public: class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue { public: - explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII) - : AMDGPUPseudoSourceValue(GWSResource, TII) {} + explicit AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine &TM) + : AMDGPUPseudoSourceValue(GWSResource, TM) {} static bool classof(const PseudoSourceValue *V) { return V->kind() == GWSResource; @@ -269,8 +270,9 @@ template <> struct MappingTraits { struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { uint64_t ExplicitKernArgSize = 0; - unsigned MaxKernArgAlign = 0; - unsigned LDSSize = 0; + Align MaxKernArgAlign; + uint32_t LDSSize = 0; + uint32_t GDSSize = 0; Align DynLDSAlign; bool IsEntryFunction = false; bool NoSignedZerosFPMath = false; @@ -283,13 +285,19 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { // TODO: 10 may be a better default since it's the maximum. unsigned Occupancy = 0; + SmallVector WWMReservedRegs; + StringValue ScratchRSrcReg = "$private_rsrc_reg"; StringValue FrameOffsetReg = "$fp_reg"; StringValue StackPtrOffsetReg = "$sp_reg"; + unsigned BytesInStackArgArea = 0; + bool ReturnsVoid = true; + Optional ArgInfo; SIMode Mode; Optional ScavengeFI; + StringValue VGPRForAGPRCopy; SIMachineFunctionInfo() = default; SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, @@ -304,8 +312,9 @@ template <> struct MappingTraits { static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) { YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize, UINT64_C(0)); - YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u); + YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign); YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u); + YamlIO.mapOptional("gdsSize", MFI.GDSSize, 0u); YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align()); YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false); YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false); @@ -319,12 +328,17 @@ template <> struct MappingTraits { StringValue("$fp_reg")); YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg, StringValue("$sp_reg")); + YamlIO.mapOptional("bytesInStackArgArea", MFI.BytesInStackArgArea, 0u); + YamlIO.mapOptional("returnsVoid", MFI.ReturnsVoid, true); YamlIO.mapOptional("argumentInfo", MFI.ArgInfo); YamlIO.mapOptional("mode", MFI.Mode, SIMode()); YamlIO.mapOptional("highBitsOf32BitAddress", MFI.HighBitsOf32BitAddress, 0u); YamlIO.mapOptional("occupancy", MFI.Occupancy, 0); + YamlIO.mapOptional("wwmReservedRegs", MFI.WWMReservedRegs); YamlIO.mapOptional("scavengeFI", MFI.ScavengeFI); + YamlIO.mapOptional("vgprForAGPRCopy", MFI.VGPRForAGPRCopy, + StringValue()); // Don't print out when it's empty. } }; @@ -335,8 +349,6 @@ template <> struct MappingTraits { class SIMachineFunctionInfo final : public AMDGPUMachineFunction { friend class GCNTargetMachine; - Register TIDReg = AMDGPU::NoRegister; - // Registers that may be reserved for spilling purposes. These may be the same // as the input registers. Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; @@ -377,12 +389,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // unit. Minimum - first, maximum - second. std::pair WavesPerEU = {0, 0}; - std::unique_ptr BufferPSV; - std::unique_ptr ImagePSV; - std::unique_ptr GWSResourcePSV; + const AMDGPUBufferPseudoSourceValue BufferPSV; + const AMDGPUImagePseudoSourceValue ImagePSV; + const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV; private: - unsigned LDSWaveSpillSize = 0; unsigned NumUserSGPRs = 0; unsigned NumSystemSGPRs = 0; @@ -422,13 +433,14 @@ private: // user arguments. This is an offset from the KernargSegmentPtr. bool ImplicitArgPtr : 1; + bool MayNeedAGPRs : 1; + // The hard-wired high half of the address of the global information table // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since // current hardware only allows a 16 bit value. unsigned GITPtrHigh; unsigned HighBitsOf32BitAddress; - unsigned GDSSize; // Current recorded maximum possible occupancy. unsigned Occupancy; @@ -440,17 +452,6 @@ private: MCPhysReg getNextSystemSGPR() const; public: - struct SpilledReg { - Register VGPR; - int Lane = -1; - - SpilledReg() = default; - SpilledReg(Register R, int L) : VGPR (R), Lane (L) {} - - bool hasLane() { return Lane != -1;} - bool hasReg() { return VGPR != 0;} - }; - struct SGPRSpillVGPR { // VGPR used for SGPR spills Register VGPR; @@ -468,14 +469,28 @@ public: bool IsDead = false; }; - // Map WWM VGPR to a stack slot that is used to save/restore it in the - // prolog/epilog. - MapVector> WWMReservedRegs; + // Track VGPRs reserved for WWM. + SmallSetVector WWMReservedRegs; + + /// Track stack slots used for save/restore of reserved WWM VGPRs in the + /// prolog/epilog. + + /// FIXME: This is temporary state only needed in PrologEpilogInserter, and + /// doesn't really belong here. It does not require serialization + SmallVector WWMReservedFrameIndexes; + + void allocateWWMReservedSpillSlots(MachineFrameInfo &MFI, + const SIRegisterInfo &TRI); + + auto wwmAllocation() const { + assert(WWMReservedRegs.size() == WWMReservedFrameIndexes.size()); + return zip(WWMReservedRegs, WWMReservedFrameIndexes); + } private: // Track VGPR + wave index for each subregister of the SGPR spilled to // frameindex key. - DenseMap> SGPRToVGPRSpills; + DenseMap> SGPRToVGPRSpills; unsigned NumVGPRSpillLanes = 0; SmallVector SpillVGPRs; @@ -491,6 +506,18 @@ private: // frame, so save it here and add it to the RegScavenger later. Optional ScavengeFI; +private: + Register VGPRForAGPRCopy; + +public: + Register getVGPRForAGPRCopy() const { + return VGPRForAGPRCopy; + } + + void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy) { + VGPRForAGPRCopy = NewVGPRForAGPRCopy; + } + public: // FIXME /// If this is set, an SGPR used for save/restore of the register used for the /// frame pointer. @@ -506,31 +533,32 @@ public: // FIXME public: SIMachineFunctionInfo(const MachineFunction &MF); + SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI) = default; + + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange); - void reserveWWMRegister(Register Reg, Optional FI) { - WWMReservedRegs.insert(std::make_pair(Reg, FI)); + void reserveWWMRegister(Register Reg) { + WWMReservedRegs.insert(Reg); } - ArrayRef getSGPRToVGPRSpills(int FrameIndex) const { + ArrayRef + getSGPRToVGPRSpills(int FrameIndex) const { auto I = SGPRToVGPRSpills.find(FrameIndex); - return (I == SGPRToVGPRSpills.end()) ? - ArrayRef() : makeArrayRef(I->second); + return (I == SGPRToVGPRSpills.end()) + ? ArrayRef() + : makeArrayRef(I->second); } ArrayRef getSGPRSpillVGPRs() const { return SpillVGPRs; } - void setSGPRSpillVGPRs(Register NewVGPR, Optional newFI, int Index) { - SpillVGPRs[Index].VGPR = NewVGPR; - SpillVGPRs[Index].FI = newFI; - } - - bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF); - ArrayRef getAGPRSpillVGPRs() const { return SpillAGPR; } @@ -555,15 +583,15 @@ public: unsigned NumLane) const; bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR); - void removeDeadFrameIndices(MachineFrameInfo &MFI); + + /// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill + /// to the default stack. + bool removeDeadFrameIndices(MachineFrameInfo &MFI, + bool ResetSGPRSpillStackIDs); int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI); Optional getOptionalScavengeFI() const { return ScavengeFI; } - bool hasCalculatedTID() const { return TIDReg != 0; }; - Register getTIDReg() const { return TIDReg; }; - void setTIDReg(Register Reg) { TIDReg = Reg; } - unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; } @@ -581,6 +609,13 @@ public: Register addFlatScratchInit(const SIRegisterInfo &TRI); Register addImplicitBufferPtr(const SIRegisterInfo &TRI); + /// Increment user SGPRs used for padding the argument list only. + Register addReservedUserSGPR() { + Register Next = getNextUserSGPR(); + ++NumUserSGPRs; + return Next; + } + // Add system SGPRs. Register addWorkGroupIDX() { ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); @@ -722,10 +757,6 @@ public: return HighBitsOf32BitAddress; } - unsigned getGDSSize() const { - return GDSSize; - } - unsigned getNumUserSGPRs() const { return NumUserSGPRs; } @@ -903,31 +934,19 @@ public: llvm_unreachable("unexpected dimension"); } - unsigned getLDSWaveSpillSize() const { - return LDSWaveSpillSize; + const AMDGPUBufferPseudoSourceValue * + getBufferPSV(const AMDGPUTargetMachine &TM) { + return &BufferPSV; } - const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII) { - if (!BufferPSV) - BufferPSV = std::make_unique(TII); - - return BufferPSV.get(); - } - - const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII) { - if (!ImagePSV) - ImagePSV = std::make_unique(TII); - - return ImagePSV.get(); + const AMDGPUImagePseudoSourceValue * + getImagePSV(const AMDGPUTargetMachine &TM) { + return &ImagePSV; } - const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) { - if (!GWSResourcePSV) { - GWSResourcePSV = - std::make_unique(TII); - } - - return GWSResourcePSV.get(); + const AMDGPUGWSResourcePseudoSourceValue * + getGWSPSV(const AMDGPUTargetMachine &TM) { + return &GWSResourcePSV; } unsigned getOccupancy() const { @@ -953,6 +972,14 @@ public: limitOccupancy(MF); } + bool mayNeedAGPRs() const { + return MayNeedAGPRs; + } + + // \returns true if a function has a use of AGPRs via inline asm or + // has a call which may use it. + bool mayUseAGPRs(const MachineFunction &MF) const; + // \returns true if a function needs or may need AGPRs. bool usesAGPRs(const MachineFunction &MF) const; }; diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp index 81db66a98ddf..e426e938b856 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -64,7 +64,7 @@ using namespace llvm; // First the instructions are put into blocks. // We want the blocks help control register usage and hide high latencies // later. To help control register usage, we typically want all local -// computations, when for example you create a result that can be comsummed +// computations, when for example you create a result that can be consumed // right away, to be contained in a block. Block inputs and outputs would // typically be important results that are needed in several locations of // the shader. Since we do want blocks to help hide high latencies, we want @@ -90,8 +90,8 @@ using namespace llvm; // Increasing the number of active wavefronts helps hide the former, but it // doesn't solve the latter, thus why even if wavefront count is high, we have // to try have as many instructions hiding high latencies as possible. -// The OpenCL doc says for example latency of 400 cycles for a global mem access, -// which is hidden by 10 instructions if the wavefront count is 10. +// The OpenCL doc says for example latency of 400 cycles for a global mem +// access, which is hidden by 10 instructions if the wavefront count is 10. // Some figures taken from AMD docs: // Both texture and constant L1 caches are 4-way associative with 64 bytes @@ -353,7 +353,7 @@ void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock, // able to correctly handle 5 vs 6, 2 vs 3. // (Note: This is not sufficient for RPTracker to not do mistakes for case 4) // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7 - // Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7 + // Comparing to LiveInRegs is not sufficient to differentiate 4 vs 5, 7 // The use of findDefBetween removes the case 4. for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) { Register Reg = RegMaskPair.RegUnit; @@ -402,7 +402,7 @@ void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock, nodeScheduled(SU); } - // TODO: compute InternalAdditionnalPressure. + // TODO: compute InternalAdditionalPressure. InternalAdditionalPressure.resize(TopPressure.MaxSetPressure.size()); // Check everything is right. @@ -696,7 +696,7 @@ void SIScheduleBlockCreator::colorHighLatenciesGroups() { bool HasSubGraph; std::vector SubGraph; // By construction (topological order), if SU and - // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary + // DAG->SUnits[j] are linked, DAG->SUnits[j] is necessary // in the parent graph of SU. #ifndef NDEBUG SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j], @@ -1123,36 +1123,26 @@ void SIScheduleBlockCreator::colorExports() { for (unsigned SUNum : DAG->TopDownIndex2SU) { const SUnit &SU = DAG->SUnits[SUNum]; if (SIInstrInfo::isEXP(*SU.getInstr())) { - // Check the EXP can be added to the group safely, - // ie without needing any other instruction. - // The EXP is allowed to depend on other EXP - // (they will be in the same group). - for (unsigned j : ExpGroup) { - bool HasSubGraph; - std::vector SubGraph; - // By construction (topological order), if SU and - // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary - // in the parent graph of SU. -#ifndef NDEBUG - SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j], - HasSubGraph); - assert(!HasSubGraph); -#endif - SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU, - HasSubGraph); - if (!HasSubGraph) - continue; // No dependencies between each other - - // SubGraph contains all the instructions required - // between EXP SUnits[j] and EXP SU. - for (unsigned k : SubGraph) { - if (!SIInstrInfo::isEXP(*DAG->SUnits[k].getInstr())) - // Other instructions than EXP would be required in the group. - // Abort the groupping. - return; + // SU is an export instruction. Check whether one of its successor + // dependencies is a non-export, in which case we skip export grouping. + for (const SDep &SuccDep : SU.Succs) { + const SUnit *SuccSU = SuccDep.getSUnit(); + if (SuccDep.isWeak() || SuccSU->NodeNum >= DAG->SUnits.size()) { + // Ignore these dependencies. + continue; + } + assert(SuccSU->isInstr() && + "SUnit unexpectedly not representing an instruction!"); + + if (!SIInstrInfo::isEXP(*SuccSU->getInstr())) { + // A non-export depends on us. Skip export grouping. + // Note that this is a bit pessimistic: We could still group all other + // exports that are not depended on by non-exports, directly or + // indirectly. Simply skipping this particular export but grouping all + // others would not account for indirect dependencies. + return; } } - ExpGroup.push_back(SUNum); } } diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index fff4f6729c99..8a66213931ff 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -19,6 +19,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/BitmaskEnum.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/TargetParser.h" @@ -63,7 +64,7 @@ enum class SIAtomicScope { }; /// The distinct address spaces supported by the AMDGPU target for -/// atomic memory operation. Can be ORed toether. +/// atomic memory operation. Can be ORed together. enum class SIAtomicAddrSpace { NONE = 0u, GLOBAL = 1u << 0, @@ -459,6 +460,56 @@ public: Position Pos) const override; }; +class SIGfx940CacheControl : public SIGfx90ACacheControl { +protected: + + /// Sets SC0 bit to "true" if present in \p MI. Returns true if \p MI + /// is modified, false otherwise. + bool enableSC0Bit(const MachineBasicBlock::iterator &MI) const { + return enableNamedBit(MI, AMDGPU::CPol::SC0); + } + + /// Sets SC1 bit to "true" if present in \p MI. Returns true if \p MI + /// is modified, false otherwise. + bool enableSC1Bit(const MachineBasicBlock::iterator &MI) const { + return enableNamedBit(MI, AMDGPU::CPol::SC1); + } + + /// Sets NT bit to "true" if present in \p MI. Returns true if \p MI + /// is modified, false otherwise. + bool enableNTBit(const MachineBasicBlock::iterator &MI) const { + return enableNamedBit(MI, AMDGPU::CPol::NT); + } + +public: + + SIGfx940CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {}; + + bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI, + SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace) const override; + + bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI, + SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace) const override; + + bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI, + SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace) const override; + + bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI, + SIAtomicAddrSpace AddrSpace, SIMemOp Op, + bool IsVolatile, + bool IsNonTemporal) const override; + + bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace, Position Pos) const override; + + bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering, + Position Pos) const override; +}; + class SIGfx10CacheControl : public SIGfx7CacheControl { protected: @@ -494,6 +545,20 @@ public: Position Pos) const override; }; +class SIGfx11CacheControl : public SIGfx10CacheControl { +public: + SIGfx11CacheControl(const GCNSubtarget &ST) : SIGfx10CacheControl(ST) {} + + bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI, + SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace) const override; + + bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI, + SIAtomicAddrSpace AddrSpace, SIMemOp Op, + bool IsVolatile, + bool IsNonTemporal) const override; +}; + class SIMemoryLegalizer final : public MachineFunctionPass { private: @@ -649,7 +714,7 @@ Optional SIMemOpAccess::constructFromMIWithMMO( return None; } - SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID(); + SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID(); Ordering = getMergedAtomicOrdering(Ordering, OpOrdering); assert(MMO->getFailureOrdering() != AtomicOrdering::Release && MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease); @@ -668,7 +733,7 @@ Optional SIMemOpAccess::constructFromMIWithMMO( return None; } std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) = - ScopeOrNone.getValue(); + *ScopeOrNone; if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) || ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) || ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) { @@ -730,7 +795,7 @@ Optional SIMemOpAccess::getAtomicFenceInfo( SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE; bool IsCrossAddressSpaceOrdering = false; std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) = - ScopeOrNone.getValue(); + *ScopeOrNone; if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) || ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) { @@ -775,13 +840,17 @@ bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI, /* static */ std::unique_ptr SICacheControl::create(const GCNSubtarget &ST) { GCNSubtarget::Generation Generation = ST.getGeneration(); + if (ST.hasGFX940Insts()) + return std::make_unique(ST); if (ST.hasGFX90AInsts()) return std::make_unique(ST); if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS) return std::make_unique(ST); if (Generation < AMDGPUSubtarget::GFX10) return std::make_unique(ST); - return std::make_unique(ST); + if (Generation < AMDGPUSubtarget::GFX11) + return std::make_unique(ST); + return std::make_unique(ST); } bool SIGfx6CacheControl::enableLoadCacheBypass( @@ -943,7 +1012,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI, case SIAtomicScope::WAVEFRONT: case SIAtomicScope::SINGLETHREAD: // The LDS keeps all memory operations in order for - // the same wavesfront. + // the same wavefront. break; default: llvm_unreachable("Unsupported synchronization scope"); @@ -1360,7 +1429,9 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI, // to initiate writeback of any dirty cache lines of earlier writes by the // same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the // writeback has completed. - BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2)); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2)) + // Set SC bits to indicate system scope. + .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1); // Followed by same as GFX7, which will ensure the necessary "S_WAITCNT // vmcnt(0)" needed by the "BUFFER_WBL2". Changed = true; @@ -1386,6 +1457,308 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI, return Changed; } +bool SIGfx940CacheControl::enableLoadCacheBypass( + const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace) const { + assert(MI->mayLoad() && !MI->mayStore()); + bool Changed = false; + + if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + switch (Scope) { + case SIAtomicScope::SYSTEM: + // Set SC bits to indicate system scope. + Changed |= enableSC0Bit(MI); + Changed |= enableSC1Bit(MI); + break; + case SIAtomicScope::AGENT: + // Set SC bits to indicate agent scope. + Changed |= enableSC1Bit(MI); + break; + case SIAtomicScope::WORKGROUP: + // In threadgroup split mode the waves of a work-group can be executing on + // different CUs. Therefore need to bypass the L1 which is per CU. + // Otherwise in non-threadgroup split mode all waves of a work-group are + // on the same CU, and so the L1 does not need to be bypassed. Setting SC + // bits to indicate work-group scope will do this automatically. + Changed |= enableSC0Bit(MI); + break; + case SIAtomicScope::WAVEFRONT: + case SIAtomicScope::SINGLETHREAD: + // Leave SC bits unset to indicate wavefront scope. + break; + default: + llvm_unreachable("Unsupported synchronization scope"); + } + } + + /// The scratch address space does not need the global memory caches + /// to be bypassed as all memory operations by the same thread are + /// sequentially consistent, and no other thread can access scratch + /// memory. + + /// Other address spaces do not have a cache. + + return Changed; +} + +bool SIGfx940CacheControl::enableStoreCacheBypass( + const MachineBasicBlock::iterator &MI, + SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const { + assert(!MI->mayLoad() && MI->mayStore()); + bool Changed = false; + + if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + switch (Scope) { + case SIAtomicScope::SYSTEM: + // Set SC bits to indicate system scope. + Changed |= enableSC0Bit(MI); + Changed |= enableSC1Bit(MI); + break; + case SIAtomicScope::AGENT: + // Set SC bits to indicate agent scope. + Changed |= enableSC1Bit(MI); + break; + case SIAtomicScope::WORKGROUP: + // Set SC bits to indicate workgroup scope. + Changed |= enableSC0Bit(MI); + break; + case SIAtomicScope::WAVEFRONT: + case SIAtomicScope::SINGLETHREAD: + // Leave SC bits unset to indicate wavefront scope. + break; + default: + llvm_unreachable("Unsupported synchronization scope"); + } + } + + /// The scratch address space does not need the global memory caches + /// to be bypassed as all memory operations by the same thread are + /// sequentially consistent, and no other thread can access scratch + /// memory. + + /// Other address spaces do not have a cache. + + return Changed; +} + +bool SIGfx940CacheControl::enableRMWCacheBypass( + const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace) const { + assert(MI->mayLoad() && MI->mayStore()); + bool Changed = false; + + if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + switch (Scope) { + case SIAtomicScope::SYSTEM: + // Set SC1 bit to indicate system scope. + Changed |= enableSC1Bit(MI); + break; + case SIAtomicScope::AGENT: + case SIAtomicScope::WORKGROUP: + case SIAtomicScope::WAVEFRONT: + case SIAtomicScope::SINGLETHREAD: + // RMW atomic operations implicitly bypass the L1 cache and only use SC1 + // to indicate system or agent scope. The SC0 bit is used to indicate if + // they are return or no-return. Leave SC1 bit unset to indicate agent + // scope. + break; + default: + llvm_unreachable("Unsupported synchronization scope"); + } + } + + return Changed; +} + +bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal( + MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op, + bool IsVolatile, bool IsNonTemporal) const { + // Only handle load and store, not atomic read-modify-write insructions. The + // latter use glc to indicate if the atomic returns a result and so must not + // be used for cache control. + assert(MI->mayLoad() ^ MI->mayStore()); + + // Only update load and store, not LLVM IR atomic read-modify-write + // instructions. The latter are always marked as volatile so cannot sensibly + // handle it as do not want to pessimize all atomics. Also they do not support + // the nontemporal attribute. + assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE); + + bool Changed = false; + + if (IsVolatile) { + // Set SC bits to indicate system scope. + Changed |= enableSC0Bit(MI); + Changed |= enableSC1Bit(MI); + + // Ensure operation has completed at system scope to cause all volatile + // operations to be visible outside the program in a global order. Do not + // request cross address space as only the global address space can be + // observable outside the program, so no need to cause a waitcnt for LDS + // address space operations. + Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false, + Position::AFTER); + + return Changed; + } + + if (IsNonTemporal) { + Changed |= enableNTBit(MI); + return Changed; + } + + return Changed; +} + +bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, + SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace, + Position Pos) const { + if (!InsertCacheInv) + return false; + + bool Changed = false; + + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + if (Pos == Position::AFTER) + ++MI; + + if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + switch (Scope) { + case SIAtomicScope::SYSTEM: + // Ensures that following loads will not see stale remote VMEM data or + // stale local VMEM data with MTYPE NC. Local VMEM data with MTYPE RW and + // CC will never be stale due to the local memory probes. + BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV)) + // Set SC bits to indicate system scope. + .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1); + // Inserting a "S_WAITCNT vmcnt(0)" after is not required because the + // hardware does not reorder memory operations by the same wave with + // respect to a preceding "BUFFER_INV". The invalidate is guaranteed to + // remove any cache lines of earlier writes by the same wave and ensures + // later reads by the same wave will refetch the cache lines. + Changed = true; + break; + case SIAtomicScope::AGENT: + // Ensures that following loads will not see stale remote date or local + // MTYPE NC global data. Local MTYPE RW and CC memory will never be stale + // due to the memory probes. + BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV)) + // Set SC bits to indicate agent scope. + .addImm(AMDGPU::CPol::SC1); + // Inserting "S_WAITCNT vmcnt(0)" is not required because the hardware + // does not reorder memory operations with respect to preceeding buffer + // invalidate. The invalidate is guaranteed to remove any cache lines of + // earlier writes and ensures later writes will refetch the cache lines. + Changed = true; + break; + case SIAtomicScope::WORKGROUP: + // In threadgroup split mode the waves of a work-group can be executing on + // different CUs. Therefore need to invalidate the L1 which is per CU. + // Otherwise in non-threadgroup split mode all waves of a work-group are + // on the same CU, and so the L1 does not need to be invalidated. + if (ST.isTgSplitEnabled()) { + // Ensures L1 is invalidated if in threadgroup split mode. In + // non-threadgroup split mode it is a NOP, but no point generating it in + // that case if know not in that mode. + BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV)) + // Set SC bits to indicate work-group scope. + .addImm(AMDGPU::CPol::SC0); + // Inserting "S_WAITCNT vmcnt(0)" is not required because the hardware + // does not reorder memory operations with respect to preceeding buffer + // invalidate. The invalidate is guaranteed to remove any cache lines of + // earlier writes and ensures later writes will refetch the cache lines. + Changed = true; + } + break; + case SIAtomicScope::WAVEFRONT: + case SIAtomicScope::SINGLETHREAD: + // Could generate "BUFFER_INV" but it would do nothing as there are no + // caches to invalidate. + break; + default: + llvm_unreachable("Unsupported synchronization scope"); + } + } + + /// The scratch address space does not need the global memory cache + /// to be flushed as all memory operations by the same thread are + /// sequentially consistent, and no other thread can access scratch + /// memory. + + /// Other address spaces do not have a cache. + + if (Pos == Position::AFTER) + --MI; + + return Changed; +} + +bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI, + SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace, + bool IsCrossAddrSpaceOrdering, + Position Pos) const { + bool Changed = false; + + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + if (Pos == Position::AFTER) + ++MI; + + if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + switch (Scope) { + case SIAtomicScope::SYSTEM: + // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the + // hardware does not reorder memory operations by the same wave with + // respect to a following "BUFFER_WBL2". The "BUFFER_WBL2" is guaranteed + // to initiate writeback of any dirty cache lines of earlier writes by the + // same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the + // writeback has completed. + BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2)) + // Set SC bits to indicate system scope. + .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1); + // Since AddrSpace contains SIAtomicAddrSpace::GLOBAL and Scope is + // SIAtomicScope::SYSTEM, the following insertWait will generate the + // required "S_WAITCNT vmcnt(0)" needed by the "BUFFER_WBL2". + Changed = true; + break; + case SIAtomicScope::AGENT: + BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2)) + // Set SC bits to indicate agent scope. + .addImm(AMDGPU::CPol::SC1); + + // Since AddrSpace contains SIAtomicAddrSpace::GLOBAL and Scope is + // SIAtomicScope::AGENT, the following insertWait will generate the + // required "S_WAITCNT vmcnt(0)". + Changed = true; + break; + case SIAtomicScope::WORKGROUP: + case SIAtomicScope::WAVEFRONT: + case SIAtomicScope::SINGLETHREAD: + // Do not generate "BUFFER_WBL2" as there are no caches it would + // writeback, and would require an otherwise unnecessary + // "S_WAITCNT vmcnt(0)". + break; + default: + llvm_unreachable("Unsupported synchronization scope"); + } + } + + if (Pos == Position::AFTER) + --MI; + + // Ensure the necessary S_WAITCNT needed by any "BUFFER_WBL2" as well as other + // S_WAITCNT needed. + Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE, + IsCrossAddrSpaceOrdering, Pos); + + return Changed; +} + bool SIGfx10CacheControl::enableLoadCacheBypass( const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, @@ -1547,7 +1920,7 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI, case SIAtomicScope::WAVEFRONT: case SIAtomicScope::SINGLETHREAD: // The LDS keeps all memory operations in order for - // the same wavesfront. + // the same wavefront. break; default: llvm_unreachable("Unsupported synchronization scope"); @@ -1655,6 +2028,101 @@ bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, return Changed; } +bool SIGfx11CacheControl::enableLoadCacheBypass( + const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace) const { + assert(MI->mayLoad() && !MI->mayStore()); + bool Changed = false; + + if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + switch (Scope) { + case SIAtomicScope::SYSTEM: + case SIAtomicScope::AGENT: + // Set the L0 and L1 cache policies to MISS_EVICT. + // Note: there is no L2 cache coherent bypass control at the ISA level. + Changed |= enableGLCBit(MI); + break; + case SIAtomicScope::WORKGROUP: + // In WGP mode the waves of a work-group can be executing on either CU of + // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in + // CU mode all waves of a work-group are on the same CU, and so the L0 + // does not need to be bypassed. + if (!ST.isCuModeEnabled()) + Changed |= enableGLCBit(MI); + break; + case SIAtomicScope::WAVEFRONT: + case SIAtomicScope::SINGLETHREAD: + // No cache to bypass. + break; + default: + llvm_unreachable("Unsupported synchronization scope"); + } + } + + /// The scratch address space does not need the global memory caches + /// to be bypassed as all memory operations by the same thread are + /// sequentially consistent, and no other thread can access scratch + /// memory. + + /// Other address spaces do not have a cache. + + return Changed; +} + +bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal( + MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op, + bool IsVolatile, bool IsNonTemporal) const { + + // Only handle load and store, not atomic read-modify-write insructions. The + // latter use glc to indicate if the atomic returns a result and so must not + // be used for cache control. + assert(MI->mayLoad() ^ MI->mayStore()); + + // Only update load and store, not LLVM IR atomic read-modify-write + // instructions. The latter are always marked as volatile so cannot sensibly + // handle it as do not want to pessimize all atomics. Also they do not support + // the nontemporal attribute. + assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE); + + bool Changed = false; + + if (IsVolatile) { + // Set L0 and L1 cache policy to be MISS_EVICT for load instructions + // and MISS_LRU for store instructions. + // Note: there is no L2 cache coherent bypass control at the ISA level. + if (Op == SIMemOp::LOAD) + Changed |= enableGLCBit(MI); + + // Set MALL NOALLOC for load and store instructions. + Changed |= enableDLCBit(MI); + + // Ensure operation has completed at system scope to cause all volatile + // operations to be visible outside the program in a global order. Do not + // request cross address space as only the global address space can be + // observable outside the program, so no need to cause a waitcnt for LDS + // address space operations. + Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false, + Position::AFTER); + return Changed; + } + + if (IsNonTemporal) { + // For loads setting SLC configures L0 and L1 cache policy to HIT_EVICT + // and L2 cache policy to STREAM. + // For stores setting both GLC and SLC configures L0 and L1 cache policy + // to MISS_EVICT and the L2 cache policy to STREAM. + if (Op == SIMemOp::STORE) + Changed |= enableGLCBit(MI); + Changed |= enableSLCBit(MI); + + // Set MALL NOALLOC for load and store instructions. + Changed |= enableDLCBit(MI); + return Changed; + } + + return Changed; +} + bool SIMemoryLegalizer::removeAtomicPseudoMIs() { if (AtomicPseudoMIs.empty()) return false; diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp index 24a8879b5684..a5816e2e8c73 100644 --- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp +++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp @@ -17,6 +17,7 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include #define DEBUG_TYPE "si-mode-register" @@ -162,7 +163,9 @@ FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); } // double precision setting. Status SIModeRegister::getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII) { - if (TII->usesFPDPRounding(MI)) { + if (TII->usesFPDPRounding(MI) || + MI.getOpcode() == AMDGPU::FPTRUNC_UPWARD_PSEUDO || + MI.getOpcode() == AMDGPU::FPTRUNC_DOWNWARD_PSEUDO) { switch (MI.getOpcode()) { case AMDGPU::V_INTERP_P1LL_F16: case AMDGPU::V_INTERP_P1LV_F16: @@ -170,6 +173,18 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI, // f16 interpolation instructions need double precision round to zero return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO)); + case AMDGPU::FPTRUNC_UPWARD_PSEUDO: { + // Replacing the pseudo by a real instruction + MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); + return Status(FP_ROUND_MODE_DP(3), + FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_INF)); + } + case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: { + // Replacing the pseudo by a real instruction + MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); + return Status(FP_ROUND_MODE_DP(3), + FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEGINF)); + } default: return DefaultStatus; } diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index b9c839fe28ba..5215397d5936 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -9,6 +9,7 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" @@ -292,6 +293,210 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) { return false; } +// Backwards-iterate from Origin (for n=MaxInstructions iterations) until either +// the beginning of the BB is reached or Pred evaluates to true - which can be +// an arbitrary condition based on the current MachineInstr, for instance an +// target instruction. Breaks prematurely by returning nullptr if one of the +// registers given in NonModifiableRegs is modified by the current instruction. +static MachineInstr * +findInstrBackwards(MachineInstr &Origin, + std::function Pred, + ArrayRef NonModifiableRegs, + const SIRegisterInfo *TRI, unsigned MaxInstructions = 20) { + MachineBasicBlock::reverse_iterator A = Origin.getReverseIterator(), + E = Origin.getParent()->rend(); + unsigned CurrentIteration = 0; + + for (++A; CurrentIteration < MaxInstructions && A != E; ++A) { + if (A->isDebugInstr()) + continue; + + if (Pred(&*A)) + return &*A; + + for (MCRegister Reg : NonModifiableRegs) { + if (A->modifiesRegister(Reg, TRI)) + return nullptr; + } + + ++CurrentIteration; + } + + return nullptr; +} + + +// Determine if a register Reg is not re-defined and still in use +// in the range (Stop..Start]. +// It does so by backwards calculating liveness from the end of the BB until +// either Stop or the beginning of the BB is reached. +// After liveness is calculated, we can determine if Reg is still in use and not +// defined inbetween the instructions. +static bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start, + MCRegister Reg, const SIRegisterInfo *TRI, + MachineRegisterInfo &MRI, + bool useLiveOuts = false, + bool ignoreStart = false) { + LivePhysRegs LR(*TRI); + if (useLiveOuts) + LR.addLiveOuts(*Stop.getParent()); + + MachineBasicBlock::reverse_iterator A(Start); + MachineBasicBlock::reverse_iterator E(Stop); + + if (ignoreStart) + ++A; + + for (; A != Stop.getParent()->rend() && A != Stop; ++A) { + LR.stepBackward(*A); + } + + return !LR.available(MRI, Reg); +} + +// Determine if a register Reg is not re-defined and still in use +// in the range (Stop..BB.end]. +static bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg, + const SIRegisterInfo *TRI, + MachineRegisterInfo &MRI) { + return isRegisterInUseBetween(Stop, *Stop.getParent()->rbegin(), Reg, TRI, + MRI, true); +} + +// Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec sequence +// by looking at an instance of a s_and_saveexec instruction. Returns a pointer +// to the v_cmp instruction if it is safe to replace the sequence (see the +// conditions in the function body). This is after register allocation, so some +// checks on operand dependencies need to be considered. +static MachineInstr *findPossibleVCMPVCMPXOptimization( + MachineInstr &SaveExec, MCRegister Exec, const SIRegisterInfo *TRI, + const SIInstrInfo *TII, MachineRegisterInfo &MRI) { + + MachineInstr *VCmp = nullptr; + + Register SaveExecDest = SaveExec.getOperand(0).getReg(); + if (!TRI->isSGPRReg(MRI, SaveExecDest)) + return nullptr; + + MachineOperand *SaveExecSrc0 = + TII->getNamedOperand(SaveExec, AMDGPU::OpName::src0); + if (!SaveExecSrc0->isReg()) + return nullptr; + + // Try to find the last v_cmp instruction that defs the saveexec input + // operand without any write to Exec or the saveexec input operand inbetween. + VCmp = findInstrBackwards( + SaveExec, + [&](MachineInstr *Check) { + return AMDGPU::getVCMPXOpFromVCMP(Check->getOpcode()) != -1 && + Check->modifiesRegister(SaveExecSrc0->getReg(), TRI); + }, + {Exec, SaveExecSrc0->getReg()}, TRI); + + if (!VCmp) + return nullptr; + + MachineOperand *VCmpDest = TII->getNamedOperand(*VCmp, AMDGPU::OpName::sdst); + assert(VCmpDest && "Should have an sdst operand!"); + + // Check if any of the v_cmp source operands is written by the saveexec. + MachineOperand *Src0 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src0); + if (Src0->isReg() && TRI->isSGPRReg(MRI, Src0->getReg()) && + SaveExec.modifiesRegister(Src0->getReg(), TRI)) + return nullptr; + + MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1); + if (Src1->isReg() && TRI->isSGPRReg(MRI, Src1->getReg()) && + SaveExec.modifiesRegister(Src1->getReg(), TRI)) + return nullptr; + + // Don't do the transformation if the destination operand is included in + // it's MBB Live-outs, meaning it's used in any of it's successors, leading + // to incorrect code if the v_cmp and therefore the def of + // the dest operand is removed. + if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg())) + return nullptr; + + // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the + // s_and_saveexec, skip the optimization. + if (isRegisterInUseBetween(*VCmp, SaveExec, VCmpDest->getReg(), TRI, MRI, + false, true) || + isRegisterInUseAfter(SaveExec, VCmpDest->getReg(), TRI, MRI)) + return nullptr; + + // Try to determine if there is a write to any of the VCmp + // operands between the saveexec and the vcmp. + // If yes, additional VGPR spilling might need to be inserted. In this case, + // it's not worth replacing the instruction sequence. + SmallVector NonDefRegs; + if (Src0->isReg()) + NonDefRegs.push_back(Src0->getReg()); + + if (Src1->isReg()) + NonDefRegs.push_back(Src1->getReg()); + + if (!findInstrBackwards( + SaveExec, [&](MachineInstr *Check) { return Check == VCmp; }, + NonDefRegs, TRI)) + return nullptr; + + return VCmp; +} + +// Inserts the optimized s_mov_b32 / v_cmpx sequence based on the +// operands extracted from a v_cmp ..., s_and_saveexec pattern. +static bool optimizeVCMPSaveExecSequence(MachineInstr &SaveExecInstr, + MachineInstr &VCmp, MCRegister Exec, + const SIInstrInfo *TII, + const SIRegisterInfo *TRI, + MachineRegisterInfo &MRI) { + const int NewOpcode = AMDGPU::getVCMPXOpFromVCMP(VCmp.getOpcode()); + + if (NewOpcode == -1) + return false; + + MachineOperand *Src0 = TII->getNamedOperand(VCmp, AMDGPU::OpName::src0); + MachineOperand *Src1 = TII->getNamedOperand(VCmp, AMDGPU::OpName::src1); + + Register MoveDest = SaveExecInstr.getOperand(0).getReg(); + + MachineBasicBlock::instr_iterator InsertPosIt = SaveExecInstr.getIterator(); + if (!SaveExecInstr.uses().empty()) { + bool isSGPR32 = TRI->getRegSizeInBits(MoveDest, MRI) == 32; + unsigned MovOpcode = isSGPR32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + BuildMI(*SaveExecInstr.getParent(), InsertPosIt, + SaveExecInstr.getDebugLoc(), TII->get(MovOpcode), MoveDest) + .addReg(Exec); + } + + // Omit dst as V_CMPX is implicitly writing to EXEC. + // Add dummy src and clamp modifiers, if needed. + auto Builder = BuildMI(*VCmp.getParent(), std::next(InsertPosIt), + VCmp.getDebugLoc(), TII->get(NewOpcode)); + + auto TryAddImmediateValueFromNamedOperand = + [&](unsigned OperandName) -> void { + if (auto *Mod = TII->getNamedOperand(VCmp, OperandName)) + Builder.addImm(Mod->getImm()); + }; + + TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::src0_modifiers); + Builder.add(*Src0); + + TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::src1_modifiers); + Builder.add(*Src1); + + TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::clamp); + + // The kill flags may no longer be correct. + if (Src0->isReg()) + MRI.clearKillFlags(Src0->getReg()); + if (Src1->isReg()) + MRI.clearKillFlags(Src1->getReg()); + + return true; +} + bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -299,6 +504,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); + MachineRegisterInfo *MRI = &MF.getRegInfo(); MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; // Optimize sequences emitted for control flow lowering. They are originally @@ -312,6 +518,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { // x = s__saveexec_b64 y // + bool Changed = false; for (MachineBasicBlock &MBB : MF) { MachineBasicBlock::reverse_iterator I = fixTerminators(*TII, MBB); MachineBasicBlock::reverse_iterator E = MBB.rend(); @@ -351,6 +558,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n'); CopyToExecInst->eraseFromParent(); + Changed = true; } continue; @@ -456,8 +664,49 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { OtherInst->substituteRegister(CopyToExec, Exec, AMDGPU::NoSubRegister, *TRI); } + + Changed = true; } - return true; + // After all s_op_saveexec instructions are inserted, + // replace (on GFX10.3 and later) + // v_cmp_* SGPR, IMM, VGPR + // s_and_saveexec_b32 EXEC_SGPR_DEST, SGPR + // with + // s_mov_b32 EXEC_SGPR_DEST, exec_lo + // v_cmpx_* IMM, VGPR + // to reduce pipeline stalls. + if (ST.hasGFX10_3Insts()) { + DenseMap SaveExecVCmpMapping; + const unsigned AndSaveExecOpcode = + ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64; + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + // Record relevant v_cmp / s_and_saveexec instruction pairs for + // replacement. + if (MI.getOpcode() != AndSaveExecOpcode) + continue; + + if (MachineInstr *VCmp = + findPossibleVCMPVCMPXOptimization(MI, Exec, TRI, TII, *MRI)) + SaveExecVCmpMapping[&MI] = VCmp; + } + } + + for (const auto &Entry : SaveExecVCmpMapping) { + MachineInstr *SaveExecInstr = Entry.getFirst(); + MachineInstr *VCmpInstr = Entry.getSecond(); + + if (optimizeVCMPSaveExecSequence(*SaveExecInstr, *VCmpInstr, Exec, TII, + TRI, *MRI)) { + SaveExecInstr->eraseFromParent(); + VCmpInstr->eraseFromParent(); + + Changed = true; + } + } + } + return Changed; } diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 5f89f3826683..e5e65a8dbbf1 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -39,7 +39,7 @@ private: MCRegister CondReg; MCRegister ExecReg; - Register optimizeVcndVcmpPair(MachineBasicBlock &MBB); + bool optimizeVcndVcmpPair(MachineBasicBlock &MBB); bool optimizeElseBranch(MachineBasicBlock &MBB); public: @@ -90,8 +90,8 @@ static bool isDefBetween(const LiveRange &LR, SlotIndex AndIdx, static bool isDefBetween(const SIRegisterInfo &TRI, LiveIntervals *LIS, Register Reg, const MachineInstr &Sel, const MachineInstr &And) { - SlotIndex AndIdx = LIS->getInstructionIndex(And); - SlotIndex SelIdx = LIS->getInstructionIndex(Sel); + SlotIndex AndIdx = LIS->getInstructionIndex(And).getRegSlot(); + SlotIndex SelIdx = LIS->getInstructionIndex(Sel).getRegSlot(); if (Reg.isVirtual()) return isDefBetween(LIS->getInterval(Reg), AndIdx, SelIdx); @@ -119,21 +119,20 @@ static bool isDefBetween(const SIRegisterInfo &TRI, // required part of the pattern since V_CNDMASK_B32 writes zeroes for inactive // lanes. // -// Returns %cc register on success. -Register -SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { +// Returns true on success. +bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { auto I = llvm::find_if(MBB.terminators(), [](const MachineInstr &MI) { unsigned Opc = MI.getOpcode(); return Opc == AMDGPU::S_CBRANCH_VCCZ || Opc == AMDGPU::S_CBRANCH_VCCNZ; }); if (I == MBB.terminators().end()) - return Register(); + return false; auto *And = TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister, *I, *MRI, LIS); if (!And || And->getOpcode() != AndOpc || !And->getOperand(1).isReg() || !And->getOperand(2).isReg()) - return Register(); + return false; MachineOperand *AndCC = &And->getOperand(1); Register CmpReg = AndCC->getReg(); @@ -143,49 +142,49 @@ SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { CmpReg = AndCC->getReg(); CmpSubReg = AndCC->getSubReg(); } else if (And->getOperand(2).getReg() != Register(ExecReg)) { - return Register(); + return false; } auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, *MRI, LIS); if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 || Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) || Cmp->getParent() != And->getParent()) - return Register(); + return false; MachineOperand *Op1 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0); MachineOperand *Op2 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1); if (Op1->isImm() && Op2->isReg()) std::swap(Op1, Op2); if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1) - return Register(); + return false; Register SelReg = Op1->getReg(); auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, *MRI, LIS); if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64) - return Register(); + return false; if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) || TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers)) - return Register(); + return false; Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0); Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1); MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2); if (!Op1->isImm() || !Op2->isImm() || !CC->isReg() || Op1->getImm() != 0 || Op2->getImm() != 1) - return Register(); + return false; Register CCReg = CC->getReg(); // If there was a def between the select and the and, we would need to move it // to fold this. if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And)) - return Register(); + return false; + // TODO: Guard against implicit def operands? LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t' << *And); - LIS->RemoveMachineInstrFromMaps(*And); MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc), And->getOperand(0).getReg()) @@ -196,34 +195,92 @@ SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { MachineOperand &Andn2SCC = Andn2->getOperand(3); assert(Andn2SCC.getReg() == AMDGPU::SCC); Andn2SCC.setIsDead(AndSCC.isDead()); + + SlotIndex AndIdx = LIS->ReplaceMachineInstrInMaps(*And, *Andn2); And->eraseFromParent(); - LIS->InsertMachineInstrInMaps(*Andn2); LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n'); + SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp); + SlotIndex SelIdx = LIS->getInstructionIndex(*Sel); + + LiveInterval *CmpLI = + CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr; + LiveInterval *SelLI = + SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr; + + // Update live intervals for CCReg before potentially removing CmpReg/SelReg, + // and their associated liveness information. + if (CCReg.isVirtual()) { + // Note: this ignores that SelLI might have multiple internal values + // or splits and simply extends the live range to cover all cases + // where the result of the v_cndmask_b32 was live (e.g. loops). + // This could yield worse register allocation in rare edge cases. + SlotIndex EndIdx = AndIdx.getRegSlot(); + if (SelLI && SelLI->endIndex() > EndIdx && SelLI->endIndex().isBlock()) + EndIdx = SelLI->endIndex(); + + LiveInterval &CCLI = LIS->getInterval(CCReg); + auto CCQ = CCLI.Query(SelIdx.getRegSlot()); + if (CCQ.valueIn()) { + CCLI.addSegment(LiveRange::Segment(SelIdx.getRegSlot(), + EndIdx, CCQ.valueIn())); + } + + if (CC->getSubReg()) { + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(CC->getSubReg()); + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + CCLI.refineSubRanges( + Allocator, Mask, + [=](LiveInterval::SubRange &SR) { + auto CCQS = SR.Query(SelIdx.getRegSlot()); + if (CCQS.valueIn()) { + SR.addSegment(LiveRange::Segment( + SelIdx.getRegSlot(), EndIdx, CCQS.valueIn())); + } + }, + *LIS->getSlotIndexes(), *TRI); + CCLI.removeEmptySubRanges(); + + SmallVector SplitLIs; + LIS->splitSeparateComponents(CCLI, SplitLIs); + } + } else + LIS->removeAllRegUnitsForPhysReg(CCReg); + // Try to remove compare. Cmp value should not used in between of cmp // and s_and_b64 if VCC or just unused if any other register. - if ((CmpReg.isVirtual() && MRI->use_nodbg_empty(CmpReg)) || + if ((CmpReg.isVirtual() && CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) || (CmpReg == Register(CondReg) && std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(), [&](const MachineInstr &MI) { return MI.readsRegister(CondReg, TRI); }))) { LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n'); - + if (CmpLI) + LIS->removeVRegDefAt(*CmpLI, CmpIdx.getRegSlot()); LIS->RemoveMachineInstrFromMaps(*Cmp); Cmp->eraseFromParent(); // Try to remove v_cndmask_b32. - if (SelReg.isVirtual() && MRI->use_nodbg_empty(SelReg)) { - LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); + if (SelLI) { + bool CanRemoveSel = SelLI->Query(CmpIdx.getRegSlot()).isKill(); + if (!CanRemoveSel) { + // Try to shrink the live interval and check for dead def instead. + LIS->shrinkToUses(SelLI, nullptr); + CanRemoveSel = SelLI->Query(SelIdx.getRegSlot()).isDeadDef(); + } + if (CanRemoveSel) { + LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); - LIS->RemoveMachineInstrFromMaps(*Sel); - Sel->eraseFromParent(); + LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot()); + LIS->RemoveMachineInstrFromMaps(*Sel); + Sel->eraseFromParent(); + } } } - return CCReg; + return true; } // Optimize sequence @@ -330,8 +387,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { Changed = true; } - if (Register Reg = optimizeVcndVcmpPair(MBB)) { - RecalcRegs.insert(Reg); + if (optimizeVcndVcmpPair(MBB)) { RecalcRegs.insert(AMDGPU::VCC_LO); RecalcRegs.insert(AMDGPU::VCC_HI); RecalcRegs.insert(AMDGPU::SCC); @@ -402,7 +458,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { } // If the only user of a logical operation is move to exec, fold it now - // to prevent forming of saveexec. I.e: + // to prevent forming of saveexec. I.e.: // // %0:sreg_64 = COPY $exec // %1:sreg_64 = S_AND_B64 %0:sreg_64, %2:sreg_64 diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp index e13e33ed5457..2ae3157bab49 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp @@ -112,8 +112,10 @@ public: SmallVectorImpl &CandidateRegs) const; void collectWaterfallCandidateRegisters( - MachineBasicBlock *Loop, - SmallSetVector &CandidateRegs) const; + MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd, + SmallSetVector &CandidateRegs, + SmallSetVector &Blocks, + SmallVectorImpl &Instructions) const; void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB, SmallVectorImpl &Uses) const; @@ -131,7 +133,10 @@ public: MachineBasicBlock *Flow, MachineBasicBlock *Endif, SmallSetVector &ElseBlocks) const; - void optimizeWaterfallLiveRange(Register Reg, MachineBasicBlock *If) const; + void optimizeWaterfallLiveRange( + Register Reg, MachineBasicBlock *LoopHeader, + SmallSetVector &LoopBlocks, + SmallVectorImpl &Instructions) const; SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {} @@ -323,12 +328,34 @@ void SIOptimizeVGPRLiveRange::collectCandidateRegisters( /// Collect the registers used in the waterfall loop block that are defined /// before. void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters( - MachineBasicBlock *Loop, - SmallSetVector &CandidateRegs) const { + MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd, + SmallSetVector &CandidateRegs, + SmallSetVector &Blocks, + SmallVectorImpl &Instructions) const { + + // Collect loop instructions, potentially spanning multiple blocks + auto *MBB = LoopHeader; + for (;;) { + Blocks.insert(MBB); + for (auto &MI : *MBB) { + if (MI.isDebugInstr()) + continue; + Instructions.push_back(&MI); + } + if (MBB == LoopEnd) + break; - for (auto &MI : Loop->instrs()) { - if (MI.isDebugInstr()) - continue; + if ((MBB != LoopHeader && MBB->pred_size() != 1) || + (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) { + LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n"); + return; + } + + MBB = *MBB->succ_begin(); + } + + for (auto *I : Instructions) { + auto &MI = *I; for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.getReg() || MO.isDef()) @@ -340,16 +367,17 @@ void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters( continue; if (MO.readsReg()) { - const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); + MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); // Make sure the value is defined before the LOOP block - if (DefMBB != Loop && !CandidateRegs.contains(MOReg)) { + if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) { // If the variable is used after the loop, the register coalescer will // merge the newly created register and remove the phi node again. // Just do nothing in that case. LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg); bool IsUsed = false; - for (auto *Succ : Loop->successors()) { - if (Succ != Loop && OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) { + for (auto *Succ : LoopEnd->successors()) { + if (!Blocks.contains(Succ) && + OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) { IsUsed = true; break; } @@ -513,7 +541,9 @@ void SIOptimizeVGPRLiveRange::optimizeLiveRange( } void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange( - Register Reg, MachineBasicBlock *Loop) const { + Register Reg, MachineBasicBlock *LoopHeader, + SmallSetVector &Blocks, + SmallVectorImpl &Instructions) const { // Insert a new PHI, marking the value from the last loop iteration undef. LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); const auto *RC = MRI->getRegClass(Reg); @@ -525,15 +555,16 @@ void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange( for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) { auto *UseMI = O.getParent(); auto *UseBlock = UseMI->getParent(); - // Replace uses in Loop block - if (UseBlock == Loop) + // Replace uses in Loop blocks + if (Blocks.contains(UseBlock)) O.setReg(NewReg); } - MachineInstrBuilder PHI = BuildMI(*Loop, Loop->getFirstNonPHI(), DebugLoc(), - TII->get(TargetOpcode::PHI), NewReg); - for (auto *Pred : Loop->predecessors()) { - if (Pred == Loop) + MachineInstrBuilder PHI = + BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), NewReg); + for (auto *Pred : LoopHeader->predecessors()) { + if (Blocks.contains(Pred)) PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred); else PHI.addReg(Reg).addMBB(Pred); @@ -542,21 +573,36 @@ void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange( LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg); LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); - // collectWaterfallCandidateRegisters only collects registers that are dead - // after the loop. So we know that the old reg is not live throughout the - // whole block anymore. - OldVarInfo.AliveBlocks.reset(Loop->getNumber()); - - // Mark the last use as kill - for (auto &MI : reverse(Loop->instrs())) { - if (MI.readsRegister(NewReg, TRI)) { - MI.addRegisterKilled(NewReg, TRI); - NewVarInfo.Kills.push_back(&MI); + // Find last use and mark as kill + MachineInstr *Kill = nullptr; + for (auto *MI : reverse(Instructions)) { + if (MI->readsRegister(NewReg, TRI)) { + MI->addRegisterKilled(NewReg, TRI); + NewVarInfo.Kills.push_back(MI); + Kill = MI; break; } } - assert(!NewVarInfo.Kills.empty() && - "Failed to find last usage of register in loop"); + assert(Kill && "Failed to find last usage of register in loop"); + + MachineBasicBlock *KillBlock = Kill->getParent(); + bool PostKillBlock = false; + for (auto *Block : Blocks) { + auto BBNum = Block->getNumber(); + + // collectWaterfallCandidateRegisters only collects registers that are dead + // after the loop. So we know that the old reg is no longer live throughout + // the waterfall loop. + OldVarInfo.AliveBlocks.reset(BBNum); + + // The new register is live up to (and including) the block that kills it. + PostKillBlock |= (Block == KillBlock); + if (PostKillBlock) { + NewVarInfo.AliveBlocks.reset(BBNum); + } else if (Block != LoopHeader) { + NewVarInfo.AliveBlocks.set(BBNum); + } + } } char SIOptimizeVGPRLiveRange::ID = 0; @@ -601,6 +647,10 @@ bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) { if (!Endif) continue; + // Skip unexpected control flow. + if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif)) + continue; + SmallSetVector ElseBlocks; SmallVector CandidateRegs; @@ -620,15 +670,22 @@ bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) { for (auto Reg : CandidateRegs) optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks); } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) { + auto *LoopHeader = MI.getOperand(0).getMBB(); + auto *LoopEnd = &MBB; + LLVM_DEBUG(dbgs() << "Checking Waterfall loop: " - << printMBBReference(MBB) << '\n'); + << printMBBReference(*LoopHeader) << '\n'); SmallSetVector CandidateRegs; - collectWaterfallCandidateRegisters(&MBB, CandidateRegs); + SmallVector Instructions; + SmallSetVector Blocks; + + collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs, + Blocks, Instructions); MadeChange |= !CandidateRegs.empty(); // Now we are safe to optimize. for (auto Reg : CandidateRegs) - optimizeWaterfallLiveRange(Reg, &MBB); + optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions); } } } diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index da41a5e2478a..e768a2f3e1a5 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -316,7 +316,7 @@ uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII, } if (Abs || Neg) { assert(!Sext && - "Float and integer src modifiers can't be set simulteniously"); + "Float and integer src modifiers can't be set simultaneously"); Mods |= Abs ? SISrcMods::ABS : 0u; Mods ^= Neg ? SISrcMods::NEG : 0u; } else if (Sext) { @@ -1131,16 +1131,16 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, bool Converted = false; for (auto &Operand : SDWAOperands) { LLVM_DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand); - // There should be no intesection between SDWA operands and potential MIs + // There should be no intersection between SDWA operands and potential MIs // e.g.: // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0 // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0 // v_add_u32 v3, v4, v2 // - // In that example it is possible that we would fold 2nd instruction into 3rd - // (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that was - // already destroyed). So if SDWAOperand is also a potential MI then do not - // apply it. + // In that example it is possible that we would fold 2nd instruction into + // 3rd (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that + // was already destroyed). So if SDWAOperand is also a potential MI then do + // not apply it. if (PotentialMatches.count(Operand->getParentInst()) == 0) Converted |= Operand->convertToSDWA(*SDWAInst, TII); } diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp index c2e2875ed6bf..4fab13bb44b1 100644 --- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -18,7 +18,10 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRegMatrix.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/InitializePasses.h" using namespace llvm; @@ -85,9 +88,6 @@ FunctionPass *llvm::createSIPreAllocateWWMRegsPass() { } bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { - if (!MO.isReg()) - return false; - Register Reg = MO.getReg(); if (Reg.isPhysical()) return false; @@ -111,7 +111,6 @@ bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { } llvm_unreachable("physreg not found for WWM expression"); - return false; } void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { @@ -142,7 +141,6 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { } SIMachineFunctionInfo *MFI = MF.getInfo(); - MachineFrameInfo &FrameInfo = MF.getFrameInfo(); for (unsigned Reg : RegsToRewrite) { LIS->removeInterval(Reg); @@ -150,18 +148,7 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { const Register PhysReg = VRM->getPhys(Reg); assert(PhysReg != 0); - // Check if PhysReg is already reserved - if (!MFI->WWMReservedRegs.count(PhysReg)) { - Optional FI; - if (!MFI->isEntryFunction()) { - // Create a stack object for a possible spill in the function prologue. - // Note: Non-CSR VGPR also need this as we may overwrite inactive lanes. - const TargetRegisterClass *RC = TRI->getPhysRegClass(PhysReg); - FI = FrameInfo.CreateSpillStackObject(TRI->getSpillSize(*RC), - TRI->getSpillAlign(*RC)); - } - MFI->reserveWWMRegister(PhysReg, FI); - } + MFI->reserveWWMRegister(PhysReg); } RegsToRewrite.clear(); diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index b0e45dd3e3e3..8d33b8a1fd4b 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -74,6 +74,15 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { // We end up with this pattern sometimes after basic block placement. // It happens while combining a block which assigns -1 or 0 to a saved mask // and another block which consumes that saved mask and then a branch. + // + // While searching this also performs the following substitution: + // vcc = V_CMP + // vcc = S_AND exec, vcc + // S_CBRANCH_VCC[N]Z + // => + // vcc = V_CMP + // S_CBRANCH_VCC[N]Z + bool Changed = false; MachineBasicBlock &MBB = *MI.getParent(); const GCNSubtarget &ST = MBB.getParent()->getSubtarget(); @@ -121,19 +130,32 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { SReg = Op2.getReg(); auto M = std::next(A); bool ReadsSreg = false; + bool ModifiesExec = false; for (; M != E; ++M) { if (M->definesRegister(SReg, TRI)) break; if (M->modifiesRegister(SReg, TRI)) return Changed; ReadsSreg |= M->readsRegister(SReg, TRI); + ModifiesExec |= M->modifiesRegister(ExecReg, TRI); + } + if (M == E) + return Changed; + // If SReg is VCC and SReg definition is a VALU comparison. + // This means S_AND with EXEC is not required. + // Erase the S_AND and return. + // Note: isVOPC is used instead of isCompare to catch V_CMP_CLASS + if (A->getOpcode() == And && SReg == CondReg && !ModifiesExec && + TII->isVOPC(*M)) { + A->eraseFromParent(); + return true; } - if (M == E || !M->isMoveImmediate() || !M->getOperand(1).isImm() || + if (!M->isMoveImmediate() || !M->getOperand(1).isImm() || (M->getOperand(1).getImm() != -1 && M->getOperand(1).getImm() != 0)) return Changed; MaskValue = M->getOperand(1).getImm(); // First if sreg is only used in the AND instruction fold the immediate - // into into the AND. + // into the AND. if (!ReadsSreg && Op2.isKill()) { A->getOperand(2).ChangeToImmediate(MaskValue); M->eraseFromParent(); @@ -213,7 +235,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ : AMDGPU::S_CBRANCH_EXECNZ)); } - MI.RemoveOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI)); + MI.removeOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI)); MI.addImplicitDefUseOperands(*MBB.getParent()); return true; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 21aed4ececb5..ad1455ed20fd 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -19,7 +19,9 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" using namespace llvm; @@ -182,6 +184,16 @@ struct SGPRSpillBuilder { TmpVGPRLive = true; } + if (TmpVGPRLive) { + // We need to inform the scavenger that this index is already in use until + // we're done with the custom emergency spill. + RS->assignRegToScavengingIndex(TmpVGPRIndex, TmpVGPR); + } + + // We may end up recursively calling the scavenger, and don't want to re-use + // the same register. + RS->setRegUsed(TmpVGPR); + // Try to scavenge SGPRs to save exec assert(!SavedExecReg && "Exec is already saved, refuse to save again"); const TargetRegisterClass &RC = @@ -202,6 +214,12 @@ struct SGPRSpillBuilder { // Spill needed lanes TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false); } else { + // The modify and restore of exec clobber SCC, which we would have to save + // and restore. FIXME: We probably would need to reserve a register for + // this. + if (RS->isRegUsed(AMDGPU::SCC)) + MI->emitError("unhandled SGPR spill to memory"); + // Spill active lanes if (TmpVGPRLive) TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false, @@ -251,6 +269,12 @@ struct SGPRSpillBuilder { if (TmpVGPRLive) TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true); } + + // Inform the scavenger where we're releasing our custom scavenged register. + if (TmpVGPRLive) { + MachineBasicBlock::iterator RestorePt = std::prev(MI); + RS->assignRegToScavengingIndex(TmpVGPRIndex, TmpVGPR, &*RestorePt); + } } // Write TmpVGPR to memory or read TmpVGPR from memory. @@ -265,6 +289,12 @@ struct SGPRSpillBuilder { // Spill needed lanes TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad); } else { + // The modify and restore of exec clobber SCC, which we would have to save + // and restore. FIXME: We probably would need to reserve a register for + // this. + if (RS->isRegUsed(AMDGPU::SCC)) + MI->emitError("unhandled SGPR spill to memory"); + // Spill active lanes TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad, /*IsKill*/ false); @@ -329,7 +359,7 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) static auto InitializeSubRegFromChannelTableOnce = [this]() { for (auto &Row : SubRegFromChannelTable) Row.fill(AMDGPU::NoSubRegister); - for (uint16_t Idx = 1; Idx < getNumSubRegIndices(); ++Idx) { + for (unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) { unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32; unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32; assert(Width < SubRegFromChannelTableWidthMap.size()); @@ -364,13 +394,11 @@ const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs( case CallingConv::C: case CallingConv::Fast: case CallingConv::Cold: - return MF->getSubtarget().hasGFX90AInsts() - ? CSR_AMDGPU_HighRegs_With_AGPRs_SaveList - : CSR_AMDGPU_HighRegs_SaveList; + return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList + : CSR_AMDGPU_SaveList; case CallingConv::AMDGPU_Gfx: - return MF->getSubtarget().hasGFX90AInsts() - ? CSR_AMDGPU_SI_Gfx_With_AGPRs_SaveList - : CSR_AMDGPU_SI_Gfx_SaveList; + return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList + : CSR_AMDGPU_SI_Gfx_SaveList; default: { // Dummy to not crash RegisterClassInfo. static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister; @@ -390,13 +418,11 @@ const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF, case CallingConv::C: case CallingConv::Fast: case CallingConv::Cold: - return MF.getSubtarget().hasGFX90AInsts() - ? CSR_AMDGPU_HighRegs_With_AGPRs_RegMask - : CSR_AMDGPU_HighRegs_RegMask; + return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask + : CSR_AMDGPU_RegMask; case CallingConv::AMDGPU_Gfx: - return MF.getSubtarget().hasGFX90AInsts() - ? CSR_AMDGPU_SI_Gfx_With_AGPRs_RegMask - : CSR_AMDGPU_SI_Gfx_RegMask; + return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask + : CSR_AMDGPU_SI_Gfx_RegMask; default: return nullptr; } @@ -413,8 +439,7 @@ SIRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, // equivalent AV class. If used one, the verifier will crash after // RegBankSelect in the GISel flow. The aligned regclasses are not fully given // until Instruction selection. - if (MF.getSubtarget().hasMAIInsts() && - (isVGPRClass(RC) || isAGPRClass(RC))) { + if (ST.hasMAIInsts() && (isVGPRClass(RC) || isAGPRClass(RC))) { if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass) return &AMDGPU::AV_32RegClass; if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass) @@ -463,8 +488,7 @@ SIRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, } Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const SIFrameLowering *TFI = - MF.getSubtarget().getFrameLowering(); + const SIFrameLowering *TFI = ST.getFrameLowering(); const SIMachineFunctionInfo *FuncInfo = MF.getInfo(); // During ISel lowering we always reserve the stack pointer in entry // functions, but never actually want to reference it when accessing our own @@ -487,19 +511,19 @@ bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const { Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; } const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const { - return CSR_AMDGPU_AllVGPRs_RegMask; + return AMDGPU_AllVGPRs_RegMask; } const uint32_t *SIRegisterInfo::getAllAGPRRegMask() const { - return CSR_AMDGPU_AllAGPRs_RegMask; + return AMDGPU_AllAGPRs_RegMask; } const uint32_t *SIRegisterInfo::getAllVectorRegMask() const { - return CSR_AMDGPU_AllVectorRegs_RegMask; + return AMDGPU_AllVectorRegs_RegMask; } const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const { - return CSR_AMDGPU_AllAllocatableSRegs_RegMask; + return AMDGPU_AllAllocatableSRegs_RegMask; } unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel, @@ -522,6 +546,10 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(AMDGPU::MODE); + const SIMachineFunctionInfo *MFI = MF.getInfo(); + + // Reserve special purpose registers. + // // EXEC_LO and EXEC_HI could be allocated and used as regular register, but // this seems likely to result in bugs, so I'm marking them as reserved. reserveRegisterTuples(Reserved, AMDGPU::EXEC); @@ -563,7 +591,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15); // Reserve null register - it shall never be allocated - reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL); + reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64); // Disallow vcc_hi allocation in wave32. It may be allocated but most likely // will result in bugs. @@ -572,6 +600,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AMDGPU::VCC_HI); } + // Reserve SGPRs. + // unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) { @@ -579,39 +609,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, Reg); } - const SIMachineFunctionInfo *MFI = MF.getInfo(); - unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF); - unsigned MaxNumAGPRs = MaxNumVGPRs; - unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); - - if (ST.hasGFX90AInsts()) { - // In an entry function without calls and AGPRs used it is possible to use - // the whole register budget for VGPRs. - - // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and - // split register file accordingly. - if (MFI->usesAGPRs(MF)) { - MaxNumVGPRs /= 2; - MaxNumAGPRs = MaxNumVGPRs; - } else { - if (MaxNumVGPRs > TotalNumVGPRs) { - MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs; - MaxNumVGPRs = TotalNumVGPRs; - } else - MaxNumAGPRs = 0; - } - } - - for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) { - unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i); - reserveRegisterTuples(Reserved, Reg); - } - - for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) { - unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i); - reserveRegisterTuples(Reserved, Reg); - } - for (auto Reg : AMDGPU::SReg_32RegClass) { Reserved.set(getSubReg(Reg, AMDGPU::hi16)); Register Low = getSubReg(Reg, AMDGPU::lo16); @@ -620,22 +617,10 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(Low); } - for (auto Reg : AMDGPU::AGPR_32RegClass) { - Reserved.set(getSubReg(Reg, AMDGPU::hi16)); - } - - // Reserve all the rest AGPRs if there are no instructions to use it. - if (!ST.hasMAIInsts()) { - for (unsigned i = 0; i < MaxNumVGPRs; ++i) { - unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i); - reserveRegisterTuples(Reserved, Reg); - } - } - Register ScratchRSrcReg = MFI->getScratchRSrcReg(); if (ScratchRSrcReg != AMDGPU::NoRegister) { - // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need - // to spill. + // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we + // need to spill. // TODO: May need to reserve a VGPR if doing LDS spilling. reserveRegisterTuples(Reserved, ScratchRSrcReg); } @@ -644,7 +629,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // which is detected after the function is lowered. If we aren't really going // to need SP, don't bother reserving it. MCRegister StackPtrReg = MFI->getStackPtrOffsetReg(); - if (StackPtrReg) { reserveRegisterTuples(Reserved, StackPtrReg); assert(!isSubRegister(ScratchRSrcReg, StackPtrReg)); @@ -662,20 +646,63 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { assert(!isSubRegister(ScratchRSrcReg, BasePtrReg)); } - for (auto Reg : MFI->WWMReservedRegs) { - reserveRegisterTuples(Reserved, Reg.first); + // Reserve VGPRs/AGPRs. + // + unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF); + unsigned MaxNumAGPRs = MaxNumVGPRs; + unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); + + // Reserve all the AGPRs if there are no instructions to use it. + if (!ST.hasMAIInsts()) { + for (unsigned i = 0; i < MaxNumAGPRs; ++i) { + unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i); + reserveRegisterTuples(Reserved, Reg); + } } - // Reserve VGPRs used for SGPR spilling. - // Note we treat freezeReservedRegs unusually because we run register - // allocation in two phases. It's OK to re-freeze with new registers for the - // second run. -#if 0 - for (auto &SpilledFI : MFI->sgpr_spill_vgprs()) { - for (auto &SpilledVGPR : SpilledFI.second) - reserveRegisterTuples(Reserved, SpilledVGPR.VGPR); + for (auto Reg : AMDGPU::AGPR_32RegClass) { + Reserved.set(getSubReg(Reg, AMDGPU::hi16)); } -#endif + + // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically, + // a wave may have up to 512 total vector registers combining together both + // VGPRs and AGPRs. Hence, in an entry function without calls and without + // AGPRs used within it, it is possible to use the whole vector register + // budget for VGPRs. + // + // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split + // register file accordingly. + if (ST.hasGFX90AInsts()) { + if (MFI->usesAGPRs(MF)) { + MaxNumVGPRs /= 2; + MaxNumAGPRs = MaxNumVGPRs; + } else { + if (MaxNumVGPRs > TotalNumVGPRs) { + MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs; + MaxNumVGPRs = TotalNumVGPRs; + } else + MaxNumAGPRs = 0; + } + } + + for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) { + unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i); + reserveRegisterTuples(Reserved, Reg); + } + + for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) { + unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i); + reserveRegisterTuples(Reserved, Reg); + } + + // On GFX908, in order to guarantee copying between AGPRs, we need a scratch + // VGPR available at all times. + if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { + reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy()); + } + + for (Register Reg : MFI->WWMReservedRegs) + reserveRegisterTuples(Reserved, Reg); // FIXME: Stop using reserved registers for this. for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs()) @@ -690,6 +717,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +bool SIRegisterInfo::isAsmClobberable(const MachineFunction &MF, + MCRegister PhysReg) const { + return !MF.getRegInfo().isReserved(PhysReg); +} + bool SIRegisterInfo::shouldRealignStack(const MachineFunction &MF) const { const SIMachineFunctionInfo *Info = MF.getInfo(); // On entry, the base address is 0, so it can't possibly need any more @@ -1010,6 +1042,8 @@ static int getOffsetMUBUFStore(unsigned Opc) { return AMDGPU::BUFFER_STORE_SHORT_OFFSET; case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN: return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET; + case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN: + return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET; case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN: return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET; case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN: @@ -1035,6 +1069,8 @@ static int getOffsetMUBUFLoad(unsigned Opc) { return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET; case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN: return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET; + case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN: + return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET; case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN: return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET; case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN: @@ -1054,6 +1090,64 @@ static int getOffsetMUBUFLoad(unsigned Opc) { } } +static int getOffenMUBUFStore(unsigned Opc) { + switch (Opc) { + case AMDGPU::BUFFER_STORE_DWORD_OFFSET: + return AMDGPU::BUFFER_STORE_DWORD_OFFEN; + case AMDGPU::BUFFER_STORE_BYTE_OFFSET: + return AMDGPU::BUFFER_STORE_BYTE_OFFEN; + case AMDGPU::BUFFER_STORE_SHORT_OFFSET: + return AMDGPU::BUFFER_STORE_SHORT_OFFEN; + case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET: + return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN; + case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET: + return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN; + case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET: + return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN; + case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET: + return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN; + case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET: + return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN; + default: + return -1; + } +} + +static int getOffenMUBUFLoad(unsigned Opc) { + switch (Opc) { + case AMDGPU::BUFFER_LOAD_DWORD_OFFSET: + return AMDGPU::BUFFER_LOAD_DWORD_OFFEN; + case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET: + return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN; + case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET: + return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN; + case AMDGPU::BUFFER_LOAD_USHORT_OFFSET: + return AMDGPU::BUFFER_LOAD_USHORT_OFFEN; + case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET: + return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN; + case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET: + return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN; + case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET: + return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN; + case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET: + return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN; + case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET: + return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN; + case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET: + return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN; + case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET: + return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN; + case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET: + return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN; + case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET: + return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN; + case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET: + return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN; + default: + return -1; + } +} + static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -1139,8 +1233,9 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize) { bool IsStore = TII->get(LoadStoreOp).mayStore(); + bool HasVAddr = AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) != -1; bool UseST = - AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 && + !HasVAddr && AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::saddr) < 0; switch (EltSize) { @@ -1164,7 +1259,9 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, llvm_unreachable("Unexpected spill load/store size!"); } - if (UseST) + if (HasVAddr) + LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp); + else if (UseST) LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp); return LoadStoreOp; @@ -1186,6 +1283,7 @@ void SIRegisterInfo::buildSpillLoadStore( bool IsStore = Desc->mayStore(); bool IsFlat = TII->isFLATScratch(LoadStoreOp); + bool CanClobberSCC = false; bool Scavenged = false; MCRegister SOffset = ScratchOffsetReg; @@ -1202,6 +1300,8 @@ void SIRegisterInfo::buildSpillLoadStore( unsigned RemSize = RegWidth - Size; unsigned NumRemSubRegs = RemSize ? 1 : 0; int64_t Offset = InstOffset + MFI.getObjectOffset(Index); + int64_t MaterializedOffset = Offset; + int64_t MaxOffset = Offset + Size + RemSize - EltSize; int64_t ScratchOffsetRegDelta = 0; @@ -1216,6 +1316,42 @@ void SIRegisterInfo::buildSpillLoadStore( assert((IsFlat || ((Offset % EltSize) == 0)) && "unexpected VGPR spill offset"); + // Track a VGPR to use for a constant offset we need to materialize. + Register TmpOffsetVGPR; + + // Track a VGPR to use as an intermediate value. + Register TmpIntermediateVGPR; + bool UseVGPROffset = false; + + // Materialize a VGPR offset required for the given SGPR/VGPR/Immediate + // combination. + auto MaterializeVOffset = [&](Register SGPRBase, Register TmpVGPR, + int64_t VOffset) { + // We are using a VGPR offset + if (IsFlat && SGPRBase) { + // We only have 1 VGPR offset, or 1 SGPR offset. We don't have a free + // SGPR, so perform the add as vector. + // We don't need a base SGPR in the kernel. + + if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) { + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e64), TmpVGPR) + .addReg(SGPRBase) + .addImm(VOffset) + .addImm(0); // clamp + } else { + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) + .addReg(SGPRBase); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32), TmpVGPR) + .addImm(VOffset) + .addReg(TmpOffsetVGPR); + } + } else { + assert(TmpOffsetVGPR); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) + .addImm(VOffset); + } + }; + bool IsOffsetLegal = IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch) @@ -1223,17 +1359,17 @@ void SIRegisterInfo::buildSpillLoadStore( if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) { SOffset = MCRegister(); - // We currently only support spilling VGPRs to EltSize boundaries, meaning - // we can simplify the adjustment of Offset here to just scale with - // WavefrontSize. - if (!IsFlat) - Offset *= ST.getWavefrontSize(); - // We don't have access to the register scavenger if this function is called // during PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case. + // TODO: Clobbering SCC is not necessary for scratch instructions in the + // entry. if (RS) { SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false); + + // Piggy back on the liveness scan we just did see if SCC is dead. + CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC); } else if (LiveRegs) { + CanClobberSCC = !LiveRegs->contains(AMDGPU::SCC); for (MCRegister Reg : AMDGPU::SGPR_32RegClass) { if (LiveRegs->available(MF->getRegInfo(), Reg)) { SOffset = Reg; @@ -1242,7 +1378,26 @@ void SIRegisterInfo::buildSpillLoadStore( } } + if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC) + SOffset = Register(); + if (!SOffset) { + UseVGPROffset = true; + + if (RS) { + TmpOffsetVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); + } else { + assert(LiveRegs); + for (MCRegister Reg : AMDGPU::VGPR_32RegClass) { + if (LiveRegs->available(MF->getRegInfo(), Reg)) { + TmpOffsetVGPR = Reg; + break; + } + } + } + + assert(TmpOffsetVGPR); + } else if (!SOffset && CanClobberSCC) { // There are no free SGPRs, and since we are in the process of spilling // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true // on SI/CI and on VI it is true until we implement spilling using scalar @@ -1250,6 +1405,9 @@ void SIRegisterInfo::buildSpillLoadStore( // add the offset directly to the ScratchOffset or StackPtrOffset // register, and then subtract the offset after the spill to return the // register to it's original value. + + // TODO: If we don't have to do an emergency stack slot spill, converting + // to use the VGPR offset is fewer instructions. if (!ScratchOffsetReg) ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg(); SOffset = ScratchOffsetReg; @@ -1258,12 +1416,22 @@ void SIRegisterInfo::buildSpillLoadStore( Scavenged = true; } - if (!SOffset) + // We currently only support spilling VGPRs to EltSize boundaries, meaning + // we can simplify the adjustment of Offset here to just scale with + // WavefrontSize. + if (!IsFlat && !UseVGPROffset) + Offset *= ST.getWavefrontSize(); + + if (!UseVGPROffset && !SOffset) report_fatal_error("could not scavenge SGPR to spill in entry function"); - if (ScratchOffsetReg == AMDGPU::NoRegister) { + if (UseVGPROffset) { + // We are using a VGPR offset + MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, Offset); + } else if (ScratchOffsetReg == AMDGPU::NoRegister) { BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset); } else { + assert(Offset != 0); auto Add = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset) .addReg(ScratchOffsetReg) .addImm(Offset); @@ -1277,13 +1445,16 @@ void SIRegisterInfo::buildSpillLoadStore( assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 && "Unexpected vaddr for flat scratch with a FI operand"); - assert(ST.hasFlatScratchSTMode()); - LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp); + if (UseVGPROffset) { + LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp); + } else { + assert(ST.hasFlatScratchSTMode()); + LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp); + } + Desc = &TII->get(LoadStoreOp); } - Register TmpReg; - for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e; ++i, RegOffset += EltSize) { if (i == NumSubRegs) { @@ -1292,6 +1463,22 @@ void SIRegisterInfo::buildSpillLoadStore( } Desc = &TII->get(LoadStoreOp); + if (!IsFlat && UseVGPROffset) { + int NewLoadStoreOp = IsStore ? getOffenMUBUFStore(LoadStoreOp) + : getOffenMUBUFLoad(LoadStoreOp); + Desc = &TII->get(NewLoadStoreOp); + } + + if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) { + // If we are spilling an AGPR beyond the range of the memory instruction + // offset and need to use a VGPR offset, we ideally have at least 2 + // scratch VGPRs. If we don't have a second free VGPR without spilling, + // recycle the VGPR used for the offset which requires resetting after + // each subregister. + + MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset); + } + unsigned NumRegs = EltSize / 4; Register SubReg = e == 1 ? ValueReg @@ -1300,7 +1487,8 @@ void SIRegisterInfo::buildSpillLoadStore( unsigned SOffsetRegState = 0; unsigned SrcDstRegState = getDefRegState(!IsStore); - if (i + 1 == e) { + const bool IsLastSubReg = i + 1 == e; + if (IsLastSubReg) { SOffsetRegState |= getKillRegState(Scavenged); // The last implicit use carries the "Kill" flag. SrcDstRegState |= getKillRegState(IsKill); @@ -1363,21 +1551,26 @@ void SIRegisterInfo::buildSpillLoadStore( if (IsAGPR) { assert(EltSize == 4); - if (!TmpReg) { - assert(RS && "Needs to have RegScavenger to spill an AGPR!"); - // FIXME: change to scavengeRegisterBackwards() - TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); - RS->setRegUsed(TmpReg); + if (!TmpIntermediateVGPR) { + TmpIntermediateVGPR = FuncInfo->getVGPRForAGPRCopy(); + assert(MF->getRegInfo().isReserved(TmpIntermediateVGPR)); } if (IsStore) { auto AccRead = BuildMI(MBB, MI, DL, - TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg) + TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), + TmpIntermediateVGPR) .addReg(SubReg, getKillRegState(IsKill)); if (NeedSuperRegDef) AccRead.addReg(ValueReg, RegState::ImplicitDefine); AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse); } - SubReg = TmpReg; + SubReg = TmpIntermediateVGPR; + } else if (UseVGPROffset) { + // FIXME: change to scavengeRegisterBackwards() + if (!TmpOffsetVGPR) { + TmpOffsetVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); + RS->setRegUsed(TmpOffsetVGPR); + } } MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset); @@ -1388,12 +1581,26 @@ void SIRegisterInfo::buildSpillLoadStore( auto MIB = BuildMI(MBB, MI, DL, *Desc) .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill)); + + if (UseVGPROffset) { + // For an AGPR spill, we reuse the same temp VGPR for the offset and the + // intermediate accvgpr_write. + MIB.addReg(TmpOffsetVGPR, getKillRegState(IsLastSubReg && !IsAGPR)); + } + if (!IsFlat) MIB.addReg(FuncInfo->getScratchRSrcReg()); if (SOffset == AMDGPU::NoRegister) { - if (!IsFlat) - MIB.addImm(0); + if (!IsFlat) { + if (UseVGPROffset && ScratchOffsetReg) { + assert(!FuncInfo->isEntryFunction()); + MIB.addReg(ScratchOffsetReg); + } else { + assert(FuncInfo->isEntryFunction()); + MIB.addImm(0); + } + } } else { MIB.addReg(SOffset, SOffsetRegState); } @@ -1407,10 +1614,10 @@ void SIRegisterInfo::buildSpillLoadStore( if (!IsAGPR && NeedSuperRegDef) MIB.addReg(ValueReg, RegState::ImplicitDefine); - if (!IsStore && TmpReg != AMDGPU::NoRegister) { + if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) { MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), FinalReg) - .addReg(TmpReg, RegState::Kill); + .addReg(TmpIntermediateVGPR, RegState::Kill); MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); } @@ -1466,8 +1673,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, bool OnlyToVGPR) const { SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); - ArrayRef VGPRSpills = - SB.MFI.getSGPRToVGPRSpills(Index); + ArrayRef VGPRSpills = SB.MFI.getSGPRToVGPRSpills(Index); bool SpillToVGPR = !VGPRSpills.empty(); if (OnlyToVGPR && !SpillToVGPR) return false; @@ -1485,7 +1691,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, SB.NumSubRegs == 1 ? SB.SuperReg : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); - SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; + SpilledReg Spill = VGPRSpills[i]; bool UseKill = SB.IsKill && i == SB.NumSubRegs - 1; @@ -1586,8 +1792,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, bool OnlyToVGPR) const { SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); - ArrayRef VGPRSpills = - SB.MFI.getSGPRToVGPRSpills(Index); + ArrayRef VGPRSpills = SB.MFI.getSGPRToVGPRSpills(Index); bool SpillToVGPR = !VGPRSpills.empty(); if (OnlyToVGPR && !SpillToVGPR) return false; @@ -1599,7 +1804,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, ? SB.SuperReg : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); - SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; + SpilledReg Spill = VGPRSpills[i]; auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) @@ -1937,18 +2142,23 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, Offset = 0; } - assert(!TII->getNamedOperand(*MI, AMDGPU::OpName::vaddr) && - "Unexpected vaddr for flat scratch with a FI operand"); - - // On GFX10 we have ST mode to use no registers for an address. - // Otherwise we need to materialize 0 into an SGPR. - if (!Offset && ST.hasFlatScratchSTMode()) { + if (!Offset) { unsigned Opc = MI->getOpcode(); - unsigned NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc); - MI->RemoveOperand( - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr)); - MI->setDesc(TII->get(NewOpc)); - return; + int NewOpc = -1; + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) != -1) { + NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc); + } else if (ST.hasFlatScratchSTMode()) { + // On GFX10 we have ST mode to use no registers for an address. + // Otherwise we need to materialize 0 into an SGPR. + NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc); + } + + if (NewOpc != -1) { + MI->removeOperand( + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr)); + MI->setDesc(TII->get(NewOpc)); + return; + } } } @@ -2026,57 +2236,78 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (!IsMUBUF && !MFI->isEntryFunction()) { // Convert to a swizzled stack address by scaling by the wave size. - // // In an entry function/kernel the offset is already swizzled. - - bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32; - Register ResultReg = - IsCopy ? MI->getOperand(0).getReg() - : RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); + bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum)); + bool LiveSCC = RS->isRegUsed(AMDGPU::SCC); + const TargetRegisterClass *RC = IsSALU && !LiveSCC + ? &AMDGPU::SReg_32RegClass + : &AMDGPU::VGPR_32RegClass; + bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 || + MI->getOpcode() == AMDGPU::V_MOV_B32_e64; + Register ResultReg = IsCopy ? MI->getOperand(0).getReg() + : RS->scavengeRegister(RC, MI, 0); int64_t Offset = FrameInfo.getObjectOffset(Index); if (Offset == 0) { + unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 + : AMDGPU::V_LSHRREV_B32_e64; // XXX - This never happens because of emergency scavenging slot at 0? - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg) - .addImm(ST.getWavefrontSizeLog2()) - .addReg(FrameReg); + auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg) + .addImm(ST.getWavefrontSizeLog2()) + .addReg(FrameReg); + if (IsSALU && !LiveSCC) + Shift.getInstr()->getOperand(3).setIsDead( + true); // Mark SCC as dead. + if (IsSALU && LiveSCC) { + Register NewDest = + RS->scavengeRegister(&AMDGPU::SReg_32RegClass, Shift, 0); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + NewDest) + .addReg(ResultReg); + ResultReg = NewDest; + } } else { - if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) { - // Reuse ResultReg in intermediate step. - Register ScaledReg = ResultReg; - - BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), - ScaledReg) - .addImm(ST.getWavefrontSizeLog2()) - .addReg(FrameReg); - - const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32; - - // TODO: Fold if use instruction is another add of a constant. - if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { - // FIXME: This can fail - MIB.addImm(Offset); - MIB.addReg(ScaledReg, RegState::Kill); - if (!IsVOP2) + MachineInstrBuilder MIB; + if (!IsSALU) { + if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) != + nullptr) { + // Reuse ResultReg in intermediate step. + Register ScaledReg = ResultReg; + + BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), + ScaledReg) + .addImm(ST.getWavefrontSizeLog2()) + .addReg(FrameReg); + + const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32; + + // TODO: Fold if use instruction is another add of a constant. + if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { + // FIXME: This can fail + MIB.addImm(Offset); + MIB.addReg(ScaledReg, RegState::Kill); + if (!IsVOP2) + MIB.addImm(0); // clamp bit + } else { + assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 && + "Need to reuse carry out register"); + + // Use scavenged unused carry out as offset register. + Register ConstOffsetReg; + if (!isWave32) + ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0); + else + ConstOffsetReg = MIB.getReg(1); + + BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) + .addImm(Offset); + MIB.addReg(ConstOffsetReg, RegState::Kill); + MIB.addReg(ScaledReg, RegState::Kill); MIB.addImm(0); // clamp bit - } else { - assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 && - "Need to reuse carry out register"); - - // Use scavenged unused carry out as offset register. - Register ConstOffsetReg; - if (!isWave32) - ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0); - else - ConstOffsetReg = MIB.getReg(1); - - BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) - .addImm(Offset); - MIB.addReg(ConstOffsetReg, RegState::Kill); - MIB.addReg(ScaledReg, RegState::Kill); - MIB.addImm(0); // clamp bit + } } - } else { + } + if (!MIB || IsSALU) { // We have to produce a carry out, and there isn't a free SGPR pair // for it. We can keep the whole computation on the SALU to avoid // clobbering an additional register at the cost of an extra mov. @@ -2084,7 +2315,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, // We may have 1 free scratch SGPR even though a carry out is // unavailable. Only one additional mov is needed. Register TmpScaledReg = - RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false); + RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false); Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg; BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg) @@ -2093,14 +2324,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg) .addReg(ScaledReg, RegState::Kill) .addImm(Offset); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg) - .addReg(ScaledReg, RegState::Kill); + if (!IsSALU) + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg) + .addReg(ScaledReg, RegState::Kill); + else + ResultReg = ScaledReg; // If there were truly no free SGPRs, we need to undo everything. if (!TmpScaledReg.isValid()) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg) - .addReg(ScaledReg, RegState::Kill) - .addImm(-Offset); + .addReg(ScaledReg, RegState::Kill) + .addImm(-Offset); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg) .addReg(FrameReg) .addImm(ST.getWavefrontSizeLog2()); @@ -2665,8 +2899,7 @@ MCRegister SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const const TargetRegisterClass * SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, - const RegisterBank &RB, - const MachineRegisterInfo &MRI) const { + const RegisterBank &RB) const { switch (RB.getID()) { case AMDGPU::VGPRRegBankID: return getVGPRClassForBitWidth(std::max(32u, Size)); @@ -2688,7 +2921,7 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const { const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg()); if (const RegisterBank *RB = RCOrRB.dyn_cast()) - return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI); + return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB); if (const auto *RC = RCOrRB.dyn_cast()) return getAllocatableClass(RC); @@ -2808,9 +3041,29 @@ bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const { return true; } +const TargetRegisterClass * +SIRegisterInfo::getProperlyAlignedRC(const TargetRegisterClass *RC) const { + if (!RC || !ST.needsAlignedVGPRs()) + return RC; + + unsigned Size = getRegSizeInBits(*RC); + if (Size <= 32) + return RC; + + if (isVGPRClass(RC)) + return getAlignedVGPRClassForBitWidth(Size); + if (isAGPRClass(RC)) + return getAlignedAGPRClassForBitWidth(Size); + if (isVectorSuperClass(RC)) + return getAlignedVectorSuperClassForBitWidth(Size); + + return RC; +} + bool SIRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { switch (PhysReg) { case AMDGPU::SGPR_NULL: + case AMDGPU::SGPR_NULL64: case AMDGPU::SRC_SHARED_BASE: case AMDGPU::SRC_PRIVATE_BASE: case AMDGPU::SRC_SHARED_LIMIT: diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index f1fe0a1d9329..9bfbc253410b 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -51,6 +51,17 @@ private: public: SIRegisterInfo(const GCNSubtarget &ST); + struct SpilledReg { + Register VGPR; + int Lane = -1; + + SpilledReg() = default; + SpilledReg(Register R, int L) : VGPR(R), Lane(L) {} + + bool hasLane() { return Lane != -1; } + bool hasReg() { return VGPR != 0; } + }; + /// \returns the sub reg enum value for the given \p Channel /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1); @@ -64,6 +75,8 @@ public: MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const; BitVector getReservedRegs(const MachineFunction &MF) const override; + bool isAsmClobberable(const MachineFunction &MF, + MCRegister PhysReg) const override; const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const; @@ -304,15 +317,11 @@ public: MCRegister getReturnAddressReg(const MachineFunction &MF) const; const TargetRegisterClass * - getRegClassForSizeOnBank(unsigned Size, - const RegisterBank &Bank, - const MachineRegisterInfo &MRI) const; + getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const; const TargetRegisterClass * - getRegClassForTypeOnBank(LLT Ty, - const RegisterBank &Bank, - const MachineRegisterInfo &MRI) const { - return getRegClassForSizeOnBank(Ty.getSizeInBits(), Bank, MRI); + getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const { + return getRegClassForSizeOnBank(Ty.getSizeInBits(), Bank); } const TargetRegisterClass * @@ -377,6 +386,11 @@ public: // the subtarget. bool isProperlyAlignedRC(const TargetRegisterClass &RC) const; + // Given \p RC returns corresponding aligned register class if required + // by the subtarget. + const TargetRegisterClass * + getProperlyAlignedRC(const TargetRegisterClass *RC) const; + /// Return all SGPR128 which satisfy the waves per execution unit requirement /// of the subtarget. ArrayRef getAllSGPR128(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index eb9452f4b85e..ffe8dce79816 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -97,7 +97,7 @@ class RegSeqNames { dag trunc_rc = (trunc RC, @@ -189,7 +189,7 @@ def PC_REG : SIReg<"pc", 0>, DwarfRegNum<[16, 16]> { def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; - let HWEncoding = 106; + let HWEncoding = VCC_LO.HWEncoding; } defm EXEC_LO : SIRegLoHi16<"exec_lo", 126>, DwarfRegNum<[1, 1]>; @@ -198,7 +198,7 @@ defm EXEC_HI : SIRegLoHi16<"exec_hi", 127>; def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegNum<[17, 1]> { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; - let HWEncoding = 126; + let HWEncoding = EXEC_LO.HWEncoding; } // 32-bit real registers, for MC only. @@ -211,8 +211,23 @@ defm SRC_SCC : SIRegLoHi16<"src_scc", 253>; // Should never be emitted. def SCC : SIReg<"scc">; -defm M0 : SIRegLoHi16 <"m0", 124>; -defm SGPR_NULL : SIRegLoHi16 <"null", 125>; +// Encoding changes between subtarget generations. +// See also Utils/AMDGPUBaseInfo.cpp MAP_REG2REG. +defm M0_gfxpre11 : SIRegLoHi16 <"m0", 124>; +defm M0_gfx11plus : SIRegLoHi16 <"m0", 125>; +defm M0 : SIRegLoHi16 <"m0", 0>; + +defm SGPR_NULL_gfxpre11 : SIRegLoHi16 <"null", 125>; +defm SGPR_NULL_gfx11plus : SIRegLoHi16 <"null", 124>; +defm SGPR_NULL : SIRegLoHi16 <"null", 0>; +defm SGPR_NULL_HI : SIRegLoHi16 <"", 0>; + +def SGPR_NULL64 : + RegisterWithSubRegs<"null", [SGPR_NULL, SGPR_NULL_HI]> { + let Namespace = "AMDGPU"; + let SubRegIndices = [sub0, sub1]; + let HWEncoding = SGPR_NULL.HWEncoding; +} defm SRC_SHARED_BASE : SIRegLoHi16<"src_shared_base", 235>; defm SRC_SHARED_LIMIT : SIRegLoHi16<"src_shared_limit", 236>; @@ -237,7 +252,7 @@ def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]> { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; - let HWEncoding = 104; + let HWEncoding = XNACK_MASK_LO.HWEncoding; } // Trap handler registers @@ -247,7 +262,7 @@ defm TBA_HI : SIRegLoHi16<"tba_hi", 109>; def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]> { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; - let HWEncoding = 108; + let HWEncoding = TBA_LO.HWEncoding; } defm TMA_LO : SIRegLoHi16<"tma_lo", 110>; @@ -256,7 +271,7 @@ defm TMA_HI : SIRegLoHi16<"tma_hi", 111>; def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]> { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; - let HWEncoding = 110; + let HWEncoding = TMA_LO.HWEncoding; } foreach Index = 0...15 in { @@ -635,16 +650,16 @@ let GeneratePressureSet = 0, HasSGPR = 1 in { // See comments in SIInstructions.td for more info. def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, - SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, - SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID, + SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, + SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID, SRC_VCCZ, SRC_EXECZ, SRC_SCC)> { let AllocationPriority = 10; } def SReg_LO16_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16, - XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, TTMP_LO16, TMA_LO_LO16, - TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16, + XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16, + TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16, SRC_SHARED_LIMIT_LO16, SRC_PRIVATE_BASE_LO16, SRC_PRIVATE_LIMIT_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16)> { let Size = 16; @@ -701,23 +716,6 @@ def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], let HasSGPR = 1; } -// CCR (call clobbered registers) SGPR 64-bit registers -def CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, - (add (trunc SGPR_64, 16))> { - let CopyCost = SGPR_64.CopyCost; - let AllocationPriority = SGPR_64.AllocationPriority; - let HasSGPR = 1; -} - -// Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC -def Gfx_CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, - (add (trunc (shl SGPR_64, 15), 1), // s[30:31] - (trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63] - let CopyCost = SGPR_64.CopyCost; - let AllocationPriority = SGPR_64.AllocationPriority; - let HasSGPR = 1; -} - def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> { let isAllocatable = 0; @@ -725,7 +723,7 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, } def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, - (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> { + (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, TTMP_64, TBA, TMA)> { let CopyCost = 1; let AllocationPriority = 13; let HasSGPR = 1; @@ -788,7 +786,7 @@ defm "" : SRegClass<4, 15, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], SGPR_128R defm "" : SRegClass<5, 16, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>; defm "" : SRegClass<6, 17, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>; defm "" : SRegClass<7, 18, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>; -defm "" : SRegClass<8, 19, [v8i32, v8f32, v4i64, v4f64], SGPR_256Regs, TTMP_256Regs>; +defm "" : SRegClass<8, 19, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], SGPR_256Regs, TTMP_256Regs>; defm "" : SRegClass<16, 20, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>; defm "" : SRegClass<32, 21, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>; @@ -829,7 +827,7 @@ defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>; defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>; defm VReg_224 : VRegClass<7, [v7i32, v7f32], (add VGPR_224)>; -defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64], (add VGPR_256)>; +defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], (add VGPR_256)>; defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>; defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>; @@ -856,21 +854,12 @@ defm AReg_1024 : ARegClass<32, [v32i32, v32f32, v16i64, v16f64], (add AGPR_1024) } // End GeneratePressureSet = 0 -// This is not a real register. This is just to have a register to add -// to VReg_1 that does not alias any real register that would -// introduce inferred register classes. -def ARTIFICIAL_VGPR : SIReg <"invalid vgpr", 0> { - let isArtificial = 1; -} - let GeneratePressureSet = 0 in { -// FIXME: Should specify an empty set for this. No register should -// ever be allocated using VReg_1. This is a hack for SelectionDAG -// that should always be lowered by SILowerI1Copies. TableGen crashes -// on an empty register set, but also sorts register classes based on -// the number of registerss in them. Add only one register so this is +// No register should ever be allocated using VReg_1. This is a hack for +// SelectionDAG that should always be lowered by SILowerI1Copies. TableGen +// sorts register classes based on the number of registers in them so this is // sorted to the end and not preferred over VGPR_32. -def VReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add ARTIFICIAL_VGPR)> { +def VReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add)> { let Size = 1; let HasVGPR = 1; } @@ -913,11 +902,11 @@ defm AV_64 : AVRegClass<2, VReg_64.RegTypes, (add VGPR_64), (add AGPR_64)>; defm AV_96 : AVRegClass<3, VReg_96.RegTypes, (add VGPR_96), (add AGPR_96)>; defm AV_128 : AVRegClass<4, VReg_128.RegTypes, (add VGPR_128), (add AGPR_128)>; defm AV_160 : AVRegClass<5, VReg_160.RegTypes, (add VGPR_160), (add AGPR_160)>; -defm AV_192 : AVRegClass<6, VReg_160.RegTypes, (add VGPR_192), (add AGPR_192)>; -defm AV_224 : AVRegClass<7, VReg_160.RegTypes, (add VGPR_224), (add AGPR_224)>; -defm AV_256 : AVRegClass<8, VReg_160.RegTypes, (add VGPR_256), (add AGPR_256)>; -defm AV_512 : AVRegClass<16, VReg_160.RegTypes, (add VGPR_512), (add AGPR_512)>; -defm AV_1024 : AVRegClass<32, VReg_160.RegTypes, (add VGPR_1024), (add AGPR_1024)>; +defm AV_192 : AVRegClass<6, VReg_192.RegTypes, (add VGPR_192), (add AGPR_192)>; +defm AV_224 : AVRegClass<7, VReg_224.RegTypes, (add VGPR_224), (add AGPR_224)>; +defm AV_256 : AVRegClass<8, VReg_256.RegTypes, (add VGPR_256), (add AGPR_256)>; +defm AV_512 : AVRegClass<16, VReg_512.RegTypes, (add VGPR_512), (add AGPR_512)>; +defm AV_1024 : AVRegClass<32, VReg_1024.RegTypes, (add VGPR_1024), (add AGPR_1024)>; //===----------------------------------------------------------------------===// // Register operands @@ -1087,6 +1076,27 @@ def VRegSrc_32 : RegisterOperand { let DecoderMethod = "DecodeVS_32RegisterClass"; } +def VRegSrc_64 : RegisterOperand { + let DecoderMethod = "decodeOperand_VReg_64"; +} + +def VRegSrc_128 : RegisterOperand { + let DecoderMethod = "decodeOperand_VReg_128"; +} + +def VRegSrc_256 : RegisterOperand { + let DecoderMethod = "decodeOperand_VReg_256"; +} + +//===----------------------------------------------------------------------===// +// VGPRSrc_* +//===----------------------------------------------------------------------===// + +// An 8-bit RegisterOperand wrapper for a VGPR +def VGPRSrc_32 : RegisterOperand { + let DecoderMethod = "DecodeVGPR_32RegisterClass"; +} + //===----------------------------------------------------------------------===// // ASrc_* Operands with an AccVGPR //===----------------------------------------------------------------------===// @@ -1116,7 +1126,7 @@ defm VISrc_512 : RegInlineOperandAC<"VReg", "VISrc_512", "_512">; defm VISrc_1024 : RegInlineOperandAC<"VReg", "VISrc_1024", "_1024">; //===----------------------------------------------------------------------===// -// AVSrc_* Operands with an AGPR or VGPR +// AVSrc_*, AVDst_*, AVLdSt_* Operands with an AGPR or VGPR //===----------------------------------------------------------------------===// def AVSrc_32 : RegisterOperand { @@ -1129,6 +1139,21 @@ def AVSrc_64 : RegisterOperand { let EncoderMethod = "getAVOperandEncoding"; } +def AVSrc_128 : RegisterOperand { + let DecoderMethod = "DecodeAV_128RegisterClass"; + let EncoderMethod = "getAVOperandEncoding"; +} + +def AVDst_128 : RegisterOperand { + let DecoderMethod = "DecodeAVDst_128RegisterClass"; + let EncoderMethod = "getAVOperandEncoding"; +} + +def AVDst_512 : RegisterOperand { + let DecoderMethod = "DecodeAVDst_512RegisterClass"; + let EncoderMethod = "getAVOperandEncoding"; +} + def AVLdSt_32 : RegisterOperand { let DecoderMethod = "DecodeAVLdSt_32RegisterClass"; let EncoderMethod = "getAVOperandEncoding"; diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index 18d424a3bc9f..53441b5a4ced 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -59,6 +59,7 @@ def WriteIntMul : SchedWrite; // mAI multipass instructions. def Write2PassMAI : SchedWrite; +def Write4PassMAI : SchedWrite; def Write8PassMAI : SchedWrite; def Write16PassMAI : SchedWrite; def Write4PassDGEMM : SchedWrite; @@ -86,7 +87,9 @@ class SISchedMachineModel : SchedMachineModel { def SIFullSpeedModel : SISchedMachineModel; def SIQuarterSpeedModel : SISchedMachineModel; def SIDPFullSpeedModel : SISchedMachineModel; +def SIDPGFX940FullSpeedModel : SISchedMachineModel; def GFX10SpeedModel : SISchedMachineModel; +def GFX11SpeedModel : SISchedMachineModel; // XXX: Are the resource counts correct? def HWBranch : ProcResource<1> { @@ -156,6 +159,8 @@ multiclass SICommonWriteRes { let ResourceCycles = [2] in def : HWWriteRes; + let ResourceCycles = [4] in + def : HWWriteRes; let ResourceCycles = [8] in def : HWWriteRes; let ResourceCycles = [16] in @@ -244,6 +249,40 @@ def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; } // End SchedModel = SIDPFullSpeedModel +let SchedModel = SIDPGFX940FullSpeedModel in { + +defm : SICommonWriteRes; + +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; + +def : InstRW<[WriteCopy], (instrs COPY)>; +def : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; +def : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_4X4X")>; + +def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X8X")>; +def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X16")>; +def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X32")>; +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X[14][FBI]")>; + +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X4XF")>; +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X8")>; +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X16")>; +def : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X[124][FBI]")>; + +def : InstRW<[Write4PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_4X4X")>; +def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; + +def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_16X16X")>; +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_32X32X")>; + +} // End SchedModel = SIDPGFX940FullSpeedModel + let SchedModel = GFX10SpeedModel in { // The latency values are 1 / (operations / cycle). @@ -273,3 +312,29 @@ def : HWWriteRes; def : InstRW<[WriteCopy], (instrs COPY)>; } // End SchedModel = GFX10SpeedModel + +let SchedModel = GFX11SpeedModel in { + +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; + +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; + +def : InstRW<[WriteCopy], (instrs COPY)>; + +} // End SchedModel = GFX11SpeedModel diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index c8f1daf26de9..05d2dd000162 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -26,15 +26,40 @@ using namespace llvm; namespace { class SIShrinkInstructions : public MachineFunctionPass { + MachineRegisterInfo *MRI; + const GCNSubtarget *ST; + const SIInstrInfo *TII; + const SIRegisterInfo *TRI; + public: static char ID; - void shrinkMIMG(MachineInstr &MI); - public: SIShrinkInstructions() : MachineFunctionPass(ID) { } + bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const; + bool isKImmOperand(const MachineOperand &Src) const; + bool isKUImmOperand(const MachineOperand &Src) const; + bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const; + bool isReverseInlineImm(const MachineOperand &Src, int32_t &ReverseImm) const; + void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const; + void shrinkScalarCompare(MachineInstr &MI) const; + void shrinkMIMG(MachineInstr &MI) const; + void shrinkMadFma(MachineInstr &MI) const; + bool shrinkScalarLogicOp(MachineInstr &MI) const; + bool tryReplaceDeadSDST(MachineInstr &MI) const; + bool instAccessReg(iterator_range &&R, + Register Reg, unsigned SubReg) const; + bool instReadsReg(const MachineInstr *MI, unsigned Reg, + unsigned SubReg) const; + bool instModifiesReg(const MachineInstr *MI, unsigned Reg, + unsigned SubReg) const; + TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub, + unsigned I) const; + void dropInstructionKeepingImpDefs(MachineInstr &MI) const; + MachineInstr *matchSwap(MachineInstr &MovT) const; + bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { return "SI Shrink Instructions"; } @@ -59,8 +84,8 @@ FunctionPass *llvm::createSIShrinkInstructionsPass() { /// This function checks \p MI for operands defined by a move immediate /// instruction and then folds the literal constant into the instruction if it /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions. -static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, - MachineRegisterInfo &MRI, bool TryToCommute = true) { +bool SIShrinkInstructions::foldImmediates(MachineInstr &MI, + bool TryToCommute) const { assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI)); int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); @@ -69,8 +94,8 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineOperand &Src0 = MI.getOperand(Src0Idx); if (Src0.isReg()) { Register Reg = Src0.getReg(); - if (Reg.isVirtual() && MRI.hasOneUse(Reg)) { - MachineInstr *Def = MRI.getUniqueVRegDef(Reg); + if (Reg.isVirtual()) { + MachineInstr *Def = MRI->getUniqueVRegDef(Reg); if (Def && Def->isMoveImmediate()) { MachineOperand &MovSrc = Def->getOperand(1); bool ConstantFolded = false; @@ -91,8 +116,8 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, } if (ConstantFolded) { - assert(MRI.use_empty(Reg)); - Def->eraseFromParent(); + if (MRI->use_nodbg_empty(Reg)) + Def->eraseFromParent(); ++NumLiteralConstantsFolded; return true; } @@ -103,7 +128,7 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, // We have failed to fold src0, so commute the instruction and try again. if (TryToCommute && MI.isCommutable()) { if (TII->commuteInstruction(MI)) { - if (foldImmediates(MI, TII, MRI, false)) + if (foldImmediates(MI, false)) return true; // Commute back. @@ -114,21 +139,20 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, return false; } -static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) { +bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const { return isInt<16>(Src.getImm()) && !TII->isInlineConstant(*Src.getParent(), Src.getParent()->getOperandNo(&Src)); } -static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) { +bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const { return isUInt<16>(Src.getImm()) && !TII->isInlineConstant(*Src.getParent(), Src.getParent()->getOperandNo(&Src)); } -static bool isKImmOrKUImmOperand(const SIInstrInfo *TII, - const MachineOperand &Src, - bool &IsUnsigned) { +bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src, + bool &IsUnsigned) const { if (isInt<16>(Src.getImm())) { IsUnsigned = false; return !TII->isInlineConstant(Src); @@ -144,9 +168,8 @@ static bool isKImmOrKUImmOperand(const SIInstrInfo *TII, /// \returns true if the constant in \p Src should be replaced with a bitreverse /// of an inline immediate. -static bool isReverseInlineImm(const SIInstrInfo *TII, - const MachineOperand &Src, - int32_t &ReverseImm) { +bool SIShrinkInstructions::isReverseInlineImm(const MachineOperand &Src, + int32_t &ReverseImm) const { if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src)) return false; @@ -156,8 +179,9 @@ static bool isReverseInlineImm(const SIInstrInfo *TII, /// Copy implicit register operands from specified instruction to this /// instruction that are not part of the instruction definition. -static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, - const MachineInstr &MI) { +void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI, + MachineInstr &MI) const { + MachineFunction &MF = *MI.getMF(); for (unsigned i = MI.getDesc().getNumOperands() + MI.getDesc().getNumImplicitUses() + MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands(); @@ -168,7 +192,7 @@ static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, } } -static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) { +void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const { // cmpk instructions do scc = dst imm16, so commute the instruction to // get constants on the RHS. if (!MI.getOperand(0).isReg()) @@ -191,7 +215,7 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) { // and initially selected to the unsigned versions. if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) { bool HasUImm; - if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) { + if (isKImmOrKUImmOperand(Src1, HasUImm)) { if (!HasUImm) { SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ? AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32; @@ -205,22 +229,30 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) { const MCInstrDesc &NewDesc = TII->get(SOPKOpc); - if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) || - (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(TII, Src1))) { + if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(Src1)) || + (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(Src1))) { MI.setDesc(NewDesc); } } // Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding. -void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) { +void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const { const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); - if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA) + if (!Info) return; - MachineFunction *MF = MI.getParent()->getParent(); - const GCNSubtarget &ST = MF->getSubtarget(); - const SIInstrInfo *TII = ST.getInstrInfo(); - const SIRegisterInfo &TRI = TII->getRegisterInfo(); + uint8_t NewEncoding; + switch (Info->MIMGEncoding) { + case AMDGPU::MIMGEncGfx10NSA: + NewEncoding = AMDGPU::MIMGEncGfx10Default; + break; + case AMDGPU::MIMGEncGfx11NSA: + NewEncoding = AMDGPU::MIMGEncGfx11Default; + break; + default: + return; + } + int VAddr0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); unsigned NewAddrDwords = Info->VAddrDwords; @@ -246,16 +278,23 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) { } unsigned VgprBase = 0; + unsigned NextVgpr = 0; bool IsUndef = true; bool IsKill = NewAddrDwords == Info->VAddrDwords; - for (unsigned i = 0; i < Info->VAddrDwords; ++i) { - const MachineOperand &Op = MI.getOperand(VAddr0Idx + i); - unsigned Vgpr = TRI.getHWRegIndex(Op.getReg()); + for (unsigned Idx = 0; Idx < Info->VAddrOperands; ++Idx) { + const MachineOperand &Op = MI.getOperand(VAddr0Idx + Idx); + unsigned Vgpr = TRI->getHWRegIndex(Op.getReg()); + unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32; + assert(Dwords > 0 && "Un-implemented for less than 32 bit regs"); - if (i == 0) { + if (Idx == 0) { VgprBase = Vgpr; - } else if (VgprBase + i != Vgpr) + NextVgpr = Vgpr + Dwords; + } else if (Vgpr == NextVgpr) { + NextVgpr = Vgpr + Dwords; + } else { return; + } if (!Op.isUndef()) IsUndef = false; @@ -288,21 +327,108 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) { } } - unsigned NewOpcode = - AMDGPU::getMIMGOpcode(Info->BaseOpcode, AMDGPU::MIMGEncGfx10Default, - Info->VDataDwords, NewAddrDwords); + unsigned NewOpcode = AMDGPU::getMIMGOpcode(Info->BaseOpcode, NewEncoding, + Info->VDataDwords, NewAddrDwords); MI.setDesc(TII->get(NewOpcode)); MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase)); MI.getOperand(VAddr0Idx).setIsUndef(IsUndef); MI.getOperand(VAddr0Idx).setIsKill(IsKill); - for (unsigned i = 1; i < Info->VAddrDwords; ++i) - MI.RemoveOperand(VAddr0Idx + 1); + for (int i = 1; i < Info->VAddrOperands; ++i) + MI.removeOperand(VAddr0Idx + 1); if (ToUntie >= 0) { MI.tieOperands( AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata), - ToUntie - (Info->VAddrDwords - 1)); + ToUntie - (Info->VAddrOperands - 1)); + } +} + +// Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK. +void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { + if (!ST->hasVOP3Literal()) + return; + + if (TII->hasAnyModifiersSet(MI)) + return; + + const unsigned Opcode = MI.getOpcode(); + MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0); + MachineOperand &Src1 = *TII->getNamedOperand(MI, AMDGPU::OpName::src1); + MachineOperand &Src2 = *TII->getNamedOperand(MI, AMDGPU::OpName::src2); + unsigned NewOpcode = AMDGPU::INSTRUCTION_LIST_END; + + bool Swap; + + // Detect "Dst = VSrc * VGPR + Imm" and convert to AK form. + if (Src2.isImm() && !TII->isInlineConstant(Src2)) { + if (Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg())) + Swap = false; + else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg())) + Swap = true; + else + return; + + switch (Opcode) { + default: + llvm_unreachable("Unexpected mad/fma opcode!"); + case AMDGPU::V_MAD_F32_e64: + NewOpcode = AMDGPU::V_MADAK_F32; + break; + case AMDGPU::V_FMA_F32_e64: + NewOpcode = AMDGPU::V_FMAAK_F32; + break; + case AMDGPU::V_MAD_F16_e64: + NewOpcode = AMDGPU::V_MADAK_F16; + break; + case AMDGPU::V_FMA_F16_e64: + NewOpcode = AMDGPU::V_FMAAK_F16; + break; + } + } + + // Detect "Dst = VSrc * Imm + VGPR" and convert to MK form. + if (Src2.isReg() && TRI->isVGPR(*MRI, Src2.getReg())) { + if (Src1.isImm() && !TII->isInlineConstant(Src1)) + Swap = false; + else if (Src0.isImm() && !TII->isInlineConstant(Src0)) + Swap = true; + else + return; + + switch (Opcode) { + default: + llvm_unreachable("Unexpected mad/fma opcode!"); + case AMDGPU::V_MAD_F32_e64: + NewOpcode = AMDGPU::V_MADMK_F32; + break; + case AMDGPU::V_FMA_F32_e64: + NewOpcode = AMDGPU::V_FMAMK_F32; + break; + case AMDGPU::V_MAD_F16_e64: + NewOpcode = AMDGPU::V_MADMK_F16; + break; + case AMDGPU::V_FMA_F16_e64: + NewOpcode = AMDGPU::V_FMAMK_F16; + break; + } + } + + if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) + return; + + if (Swap) { + // Swap Src0 and Src1 by building a new instruction. + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpcode), + MI.getOperand(0).getReg()) + .add(Src1) + .add(Src0) + .add(Src2) + .setMIFlags(MI.getFlags()); + MI.eraseFromParent(); + } else { + TII->removeModOperands(MI); + MI.setDesc(TII->get(NewOpcode)); } } @@ -311,10 +437,7 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) { /// If the inverse of the immediate is legal, use ANDN2, ORN2 or /// XNOR (as a ^ b == ~(a ^ ~b)). /// \returns true if the caller should continue the machine function iterator -static bool shrinkScalarLogicOp(const GCNSubtarget &ST, - MachineRegisterInfo &MRI, - const SIInstrInfo *TII, - MachineInstr &MI) { +bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); const MachineOperand *Dest = &MI.getOperand(0); MachineOperand *Src0 = &MI.getOperand(1); @@ -323,7 +446,7 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST, MachineOperand *SrcImm = Src1; if (!SrcImm->isImm() || - AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST.hasInv2PiInlineImm())) + AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST->hasInv2PiInlineImm())) return false; uint32_t Imm = static_cast(SrcImm->getImm()); @@ -333,7 +456,7 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST, if (isPowerOf2_32(~Imm)) { NewImm = countTrailingOnes(Imm); Opc = AMDGPU::S_BITSET0_B32; - } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) { + } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) { NewImm = ~Imm; Opc = AMDGPU::S_ANDN2_B32; } @@ -341,12 +464,12 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST, if (isPowerOf2_32(Imm)) { NewImm = countTrailingZeros(Imm); Opc = AMDGPU::S_BITSET1_B32; - } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) { + } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) { NewImm = ~Imm; Opc = AMDGPU::S_ORN2_B32; } } else if (Opc == AMDGPU::S_XOR_B32) { - if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) { + if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) { NewImm = ~Imm; Opc = AMDGPU::S_XNOR_B32; } @@ -354,16 +477,10 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST, llvm_unreachable("unexpected opcode"); } - if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) && - SrcImm == Src0) { - if (!TII->commuteInstruction(MI, false, 1, 2)) - NewImm = 0; - } - if (NewImm != 0) { if (Dest->getReg().isVirtual() && SrcReg->isReg()) { - MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg()); - MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg()); + MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg()); + MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg()); return true; } @@ -390,19 +507,19 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST, // This is the same as MachineInstr::readsRegister/modifiesRegister except // it takes subregs into account. -static bool instAccessReg(iterator_range &&R, - Register Reg, unsigned SubReg, - const SIRegisterInfo &TRI) { +bool SIShrinkInstructions::instAccessReg( + iterator_range &&R, Register Reg, + unsigned SubReg) const { for (const MachineOperand &MO : R) { if (!MO.isReg()) continue; if (Reg.isPhysical() && MO.getReg().isPhysical()) { - if (TRI.regsOverlap(Reg, MO.getReg())) + if (TRI->regsOverlap(Reg, MO.getReg())) return true; } else if (MO.getReg() == Reg && Reg.isVirtual()) { - LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) & - TRI.getSubRegIndexLaneMask(MO.getSubReg()); + LaneBitmask Overlap = TRI->getSubRegIndexLaneMask(SubReg) & + TRI->getSubRegIndexLaneMask(MO.getSubReg()); if (Overlap.any()) return true; } @@ -410,33 +527,31 @@ static bool instAccessReg(iterator_range &&R, return false; } -static bool instReadsReg(const MachineInstr *MI, - unsigned Reg, unsigned SubReg, - const SIRegisterInfo &TRI) { - return instAccessReg(MI->uses(), Reg, SubReg, TRI); +bool SIShrinkInstructions::instReadsReg(const MachineInstr *MI, unsigned Reg, + unsigned SubReg) const { + return instAccessReg(MI->uses(), Reg, SubReg); } -static bool instModifiesReg(const MachineInstr *MI, - unsigned Reg, unsigned SubReg, - const SIRegisterInfo &TRI) { - return instAccessReg(MI->defs(), Reg, SubReg, TRI); +bool SIShrinkInstructions::instModifiesReg(const MachineInstr *MI, unsigned Reg, + unsigned SubReg) const { + return instAccessReg(MI->defs(), Reg, SubReg); } -static TargetInstrInfo::RegSubRegPair -getSubRegForIndex(Register Reg, unsigned Sub, unsigned I, - const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) { - if (TRI.getRegSizeInBits(Reg, MRI) != 32) { +TargetInstrInfo::RegSubRegPair +SIShrinkInstructions::getSubRegForIndex(Register Reg, unsigned Sub, + unsigned I) const { + if (TRI->getRegSizeInBits(Reg, *MRI) != 32) { if (Reg.isPhysical()) { - Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I)); + Reg = TRI->getSubReg(Reg, TRI->getSubRegFromChannel(I)); } else { - Sub = TRI.getSubRegFromChannel(I + TRI.getChannelFromSubReg(Sub)); + Sub = TRI->getSubRegFromChannel(I + TRI->getChannelFromSubReg(Sub)); } } return TargetInstrInfo::RegSubRegPair(Reg, Sub); } -static void dropInstructionKeepingImpDefs(MachineInstr &MI, - const SIInstrInfo *TII) { +void SIShrinkInstructions::dropInstructionKeepingImpDefs( + MachineInstr &MI) const { for (unsigned i = MI.getDesc().getNumOperands() + MI.getDesc().getNumImplicitUses() + MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands(); @@ -464,14 +579,13 @@ static void dropInstructionKeepingImpDefs(MachineInstr &MI, // Returns next valid instruction pointer if was able to create v_swap_b32. // // This shall not be done too early not to prevent possible folding which may -// remove matched moves, and this should prefereably be done before RA to +// remove matched moves, and this should preferably be done before RA to // release saved registers and also possibly after RA which can insert copies // too. // -// This is really just a generic peephole that is not a canocical shrinking, +// This is really just a generic peephole that is not a canonical shrinking, // although requirements match the pass placement and it reduces code size too. -static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, - const SIInstrInfo *TII) { +MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const { assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 || MovT.getOpcode() == AMDGPU::COPY); @@ -486,8 +600,7 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, unsigned Size = TII->getOpSize(MovT, 0) / 4; - const SIRegisterInfo &TRI = TII->getRegisterInfo(); - if (!TRI.isVGPR(MRI, X)) + if (!TRI->isVGPR(*MRI, X)) return nullptr; if (MovT.hasRegisterImplicitUseOperand(AMDGPU::M0)) @@ -501,7 +614,7 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) { MachineInstr *MovY = &*Iter; - KilledT = MovY->killsRegister(T, &TRI); + KilledT = MovY->killsRegister(T, TRI); if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 && MovY->getOpcode() != AMDGPU::COPY) || @@ -514,21 +627,20 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, Register Y = MovY->getOperand(0).getReg(); unsigned Ysub = MovY->getOperand(0).getSubReg(); - if (!TRI.isVGPR(MRI, Y)) + if (!TRI->isVGPR(*MRI, Y)) continue; MachineInstr *MovX = nullptr; for (auto IY = MovY->getIterator(), I = std::next(MovT.getIterator()); I != IY; ++I) { - if (instReadsReg(&*I, X, Xsub, TRI) || - instModifiesReg(&*I, Y, Ysub, TRI) || - instModifiesReg(&*I, T, Tsub, TRI) || - (MovX && instModifiesReg(&*I, X, Xsub, TRI))) { + if (instReadsReg(&*I, X, Xsub) || instModifiesReg(&*I, Y, Ysub) || + instModifiesReg(&*I, T, Tsub) || + (MovX && instModifiesReg(&*I, X, Xsub))) { MovX = nullptr; break; } - if (!instReadsReg(&*I, Y, Ysub, TRI)) { - if (!MovX && instModifiesReg(&*I, X, Xsub, TRI)) { + if (!instReadsReg(&*I, Y, Ysub)) { + if (!MovX && instModifiesReg(&*I, X, Xsub)) { MovX = nullptr; break; } @@ -559,8 +671,8 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, for (unsigned I = 0; I < Size; ++I) { TargetInstrInfo::RegSubRegPair X1, Y1; - X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI); - Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI); + X1 = getSubRegForIndex(X, Xsub, I); + Y1 = getSubRegForIndex(Y, Ysub, I); MachineBasicBlock &MBB = *MovT.getParent(); auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(), TII->get(AMDGPU::V_SWAP_B32)) @@ -570,23 +682,23 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, .addReg(X1.Reg, 0, X1.SubReg).getInstr(); if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { // Drop implicit EXEC. - MIB->RemoveOperand(MIB->getNumExplicitOperands()); + MIB->removeOperand(MIB->getNumExplicitOperands()); MIB->copyImplicitOps(*MBB.getParent(), *MovX); } } MovX->eraseFromParent(); - dropInstructionKeepingImpDefs(*MovY, TII); + dropInstructionKeepingImpDefs(*MovY); MachineInstr *Next = &*std::next(MovT.getIterator()); - if (T.isVirtual() && MRI.use_nodbg_empty(T)) { - dropInstructionKeepingImpDefs(MovT, TII); + if (T.isVirtual() && MRI->use_nodbg_empty(T)) { + dropInstructionKeepingImpDefs(MovT); } else { Xop.setIsKill(false); for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) { unsigned OpNo = MovT.getNumExplicitOperands() + I; const MachineOperand &Op = MovT.getOperand(OpNo); - if (Op.isKill() && TRI.regsOverlap(X, Op.getReg())) - MovT.RemoveOperand(OpNo); + if (Op.isKill() && TRI->regsOverlap(X, Op.getReg())) + MovT.removeOperand(OpNo); } } @@ -596,14 +708,32 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, return nullptr; } +// If an instruction has dead sdst replace it with NULL register on gfx1030+ +bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const { + if (!ST->hasGFX10_3Insts()) + return false; + + MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + if (!Op) + return false; + Register SDstReg = Op->getReg(); + if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg)) + return false; + + Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64); + return true; +} + bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; - MachineRegisterInfo &MRI = MF.getRegInfo(); - const GCNSubtarget &ST = MF.getSubtarget(); - const SIInstrInfo *TII = ST.getInstrInfo(); - unsigned VCCReg = ST.isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC; + MRI = &MF.getRegInfo(); + ST = &MF.getSubtarget(); + TII = ST->getInstrInfo(); + TRI = &TII->getRegisterInfo(); + + unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC; std::vector I1Defs; @@ -628,7 +758,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { MachineOperand &Src = MI.getOperand(1); if (Src.isImm() && MI.getOperand(0).getReg().isPhysical()) { int32_t ReverseImm; - if (isReverseInlineImm(TII, Src, ReverseImm)) { + if (isReverseInlineImm(Src, ReverseImm)) { MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32)); Src.setImm(ReverseImm); continue; @@ -636,19 +766,15 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { } } - if (ST.hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 || - MI.getOpcode() == AMDGPU::COPY)) { - if (auto *NextMI = matchSwap(MI, MRI, TII)) { + if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 || + MI.getOpcode() == AMDGPU::COPY)) { + if (auto *NextMI = matchSwap(MI)) { Next = NextMI->getIterator(); continue; } } - // FIXME: We also need to consider movs of constant operands since - // immediate operands are not folded if they have more than one use, and - // the operand folding pass is unaware if the immediate will be free since - // it won't know if the src == dest constraint will end up being - // satisfied. + // Try to use S_ADDK_I32 and S_MULK_I32. if (MI.getOpcode() == AMDGPU::S_ADD_I32 || MI.getOpcode() == AMDGPU::S_MUL_I32) { const MachineOperand *Dest = &MI.getOperand(0); @@ -664,13 +790,13 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { // we have a vector add of a constant, we usually don't get the correct // allocation due to the subregister usage. if (Dest->getReg().isVirtual() && Src0->isReg()) { - MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg()); - MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg()); + MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg()); + MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg()); continue; } if (Src0->isReg() && Src0->getReg() == Dest->getReg()) { - if (Src1->isImm() && isKImmOperand(TII, *Src1)) { + if (Src1->isImm() && isKImmOperand(*Src1)) { unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ? AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32; @@ -682,7 +808,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { // Try to use s_cmpk_* if (MI.isCompare() && TII->isSOPC(MI)) { - shrinkScalarCompare(TII, MI); + shrinkScalarCompare(MI); continue; } @@ -693,9 +819,9 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (Src.isImm() && Dst.getReg().isPhysical()) { int32_t ReverseImm; - if (isKImmOperand(TII, Src)) + if (isKImmOperand(Src)) MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); - else if (isReverseInlineImm(TII, Src, ReverseImm)) { + else if (isReverseInlineImm(Src, ReverseImm)) { MI.setDesc(TII->get(AMDGPU::S_BREV_B32)); Src.setImm(ReverseImm); } @@ -708,47 +834,70 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (MI.getOpcode() == AMDGPU::S_AND_B32 || MI.getOpcode() == AMDGPU::S_OR_B32 || MI.getOpcode() == AMDGPU::S_XOR_B32) { - if (shrinkScalarLogicOp(ST, MRI, TII, MI)) + if (shrinkScalarLogicOp(MI)) continue; } if (TII->isMIMG(MI.getOpcode()) && - ST.getGeneration() >= AMDGPUSubtarget::GFX10 && + ST->getGeneration() >= AMDGPUSubtarget::GFX10 && MF.getProperties().hasProperty( MachineFunctionProperties::Property::NoVRegs)) { shrinkMIMG(MI); continue; } - if (!TII->hasVALU32BitEncoding(MI.getOpcode())) + if (!TII->isVOP3(MI)) continue; - if (!TII->canShrink(MI, MRI)) { + if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 || + MI.getOpcode() == AMDGPU::V_FMA_F32_e64 || + MI.getOpcode() == AMDGPU::V_MAD_F16_e64 || + MI.getOpcode() == AMDGPU::V_FMA_F16_e64) { + shrinkMadFma(MI); + continue; + } + + if (!TII->hasVALU32BitEncoding(MI.getOpcode())) { + // If there is no chance we will shrink it and use VCC as sdst to get + // a 32 bit form try to replace dead sdst with NULL. + tryReplaceDeadSDST(MI); + continue; + } + + if (!TII->canShrink(MI, *MRI)) { // Try commuting the instruction and see if that enables us to shrink // it. if (!MI.isCommutable() || !TII->commuteInstruction(MI) || - !TII->canShrink(MI, MRI)) + !TII->canShrink(MI, *MRI)) { + tryReplaceDeadSDST(MI); continue; + } } int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); if (TII->isVOPC(Op32)) { - Register DstReg = MI.getOperand(0).getReg(); - if (DstReg.isVirtual()) { - // VOPC instructions can only write to the VCC register. We can't - // force them to use VCC here, because this is only one register and - // cannot deal with sequences which would require multiple copies of - // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...) - // - // So, instead of forcing the instruction to write to VCC, we provide - // a hint to the register allocator to use VCC and then we will run - // this pass again after RA and shrink it if it outputs to VCC. - MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, VCCReg); - continue; + MachineOperand &Op0 = MI.getOperand(0); + if (Op0.isReg()) { + // Exclude VOPCX instructions as these don't explicitly write a + // dst. + Register DstReg = Op0.getReg(); + if (DstReg.isVirtual()) { + // VOPC instructions can only write to the VCC register. We can't + // force them to use VCC here, because this is only one register and + // cannot deal with sequences which would require multiple copies of + // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...) + // + // So, instead of forcing the instruction to write to VCC, we + // provide a hint to the register allocator to use VCC and then we + // will run this pass again after RA and shrink it if it outputs to + // VCC. + MRI->setRegAllocationHint(DstReg, 0, VCCReg); + continue; + } + if (DstReg != VCCReg) + continue; } - if (DstReg != VCCReg) - continue; } if (Op32 == AMDGPU::V_CNDMASK_B32_e32) { @@ -760,7 +909,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { continue; Register SReg = Src2->getReg(); if (SReg.isVirtual()) { - MRI.setRegAllocationHint(SReg, 0, VCCReg); + MRI->setRegAllocationHint(SReg, 0, VCCReg); continue; } if (SReg != VCCReg) @@ -776,7 +925,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (SDst->getReg() != VCCReg) { if (SDst->getReg().isVirtual()) - MRI.setRegAllocationHint(SDst->getReg(), 0, VCCReg); + MRI->setRegAllocationHint(SDst->getReg(), 0, VCCReg); Next = true; } @@ -786,7 +935,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { AMDGPU::OpName::src2); if (Src2 && Src2->getReg() != VCCReg) { if (Src2->getReg().isVirtual()) - MRI.setRegAllocationHint(Src2->getReg(), 0, VCCReg); + MRI->setRegAllocationHint(Src2->getReg(), 0, VCCReg); Next = true; } @@ -801,14 +950,14 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { ++NumInstructionsShrunk; // Copy extra operands not present in the instruction definition. - copyExtraImplicitOps(*Inst32, MF, MI); + copyExtraImplicitOps(*Inst32, MI); // Copy deadness from the old explicit vcc def to the new implicit def. if (SDst && SDst->isDead()) Inst32->findRegisterDefOperand(VCCReg)->setIsDead(); MI.eraseFromParent(); - foldImmediates(*Inst32, TII, MRI); + foldImmediates(*Inst32); LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n'); } diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 46efb3c605c6..a5798afab595 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -349,8 +349,7 @@ void SIWholeQuadMode::markDefs(const MachineInstr &UseMI, LiveRange &LR, const VNInfo *NextValue = nullptr; const VisitKey Key(Value, DefinedLanes); - if (!Visited.count(Key)) { - Visited.insert(Key); + if (Visited.insert(Key).second) { // On first visit to a phi then start processing first predecessor NextPredIdx = 0; } @@ -535,13 +534,36 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, GlobalFlags |= StateStrictWWM; LowerToMovInstrs.push_back(&MI); continue; - } else if (Opcode == AMDGPU::STRICT_WQM) { + } else if (Opcode == AMDGPU::STRICT_WQM || + TII->isDualSourceBlendEXP(MI)) { // STRICT_WQM is similar to STRICTWWM, but instead of enabling all // threads of the wave like STRICTWWM, STRICT_WQM enables all threads in // quads that have at least one active thread. markInstructionUses(MI, StateStrictWQM, Worklist); GlobalFlags |= StateStrictWQM; - LowerToMovInstrs.push_back(&MI); + + if (Opcode == AMDGPU::STRICT_WQM) { + LowerToMovInstrs.push_back(&MI); + } else { + // Dual source blend export acts as implicit strict-wqm, its sources + // need to be shuffled in strict wqm, but the export itself needs to + // run in exact mode. + BBI.Needs |= StateExact; + if (!(BBI.InNeeds & StateExact)) { + BBI.InNeeds |= StateExact; + Worklist.push_back(MBB); + } + GlobalFlags |= StateExact; + III.Disabled = StateWQM | StateStrict; + } + continue; + } else if (Opcode == AMDGPU::LDS_PARAM_LOAD || + Opcode == AMDGPU::LDS_DIRECT_LOAD) { + // Mark these STRICTWQM, but only for the instruction, not its operands. + // This avoid unnecessarily marking M0 as requiring WQM. + InstrInfo &II = Instructions[&MI]; + II.Needs |= StateStrictWQM; + GlobalFlags |= StateStrictWQM; continue; } else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 || Opcode == AMDGPU::V_SET_INACTIVE_B64) { @@ -969,7 +991,7 @@ MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB, MachineInstr *WQMMaskMI = nullptr; Register LiveMaskWQM; if (IsDemote) { - // Demotes deactive quads with only helper lanes + // Demote - deactivate quads with only helper lanes LiveMaskWQM = MRI->createVirtualRegister(TRI->getBoolRC()); WQMMaskMI = BuildMI(MBB, MI, DL, TII->get(WQMOpc), LiveMaskWQM).addReg(LiveMaskReg); @@ -977,7 +999,7 @@ MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB, .addReg(Exec) .addReg(LiveMaskWQM); } else { - // Kills deactivate lanes + // Kill - deactivate lanes no longer in live mask if (Op.isImm()) { unsigned MovOpc = ST->isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; NewTerm = BuildMI(MBB, &MI, DL, TII->get(MovOpc), Exec).addImm(0); @@ -1453,7 +1475,7 @@ void SIWholeQuadMode::lowerCopyInstrs() { } int Index = MI->findRegisterUseOperandIdx(AMDGPU::EXEC); while (Index >= 0) { - MI->RemoveOperand(Index); + MI->removeOperand(Index); Index = MI->findRegisterUseOperandIdx(AMDGPU::EXEC); } MI->setDesc(TII->get(AMDGPU::COPY)); @@ -1468,7 +1490,7 @@ void SIWholeQuadMode::lowerCopyInstrs() { // an undef input so it is being replaced by a simple copy. // There should be a second undef source that we should remove. assert(MI->getOperand(2).isUndef()); - MI->RemoveOperand(2); + MI->removeOperand(2); MI->untieRegOperand(1); } else { assert(MI->getNumExplicitOperands() == 2); @@ -1588,11 +1610,11 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { // Physical registers like SCC aren't tracked by default anyway, so just // removing the ranges we computed is the simplest option for maintaining // the analysis results. - LIS->removeRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI)); + LIS->removeAllRegUnitsForPhysReg(AMDGPU::SCC); // If we performed any kills then recompute EXEC if (!KillInstrs.empty()) - LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI)); + LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC); return true; } diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index 184c871db775..882d13402a19 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -11,13 +11,19 @@ def smrd_offset_8 : NamedOperandU32<"SMRDOffset8", let OperandType = "OPERAND_IMMEDIATE"; } -def smem_offset : NamedOperandU32<"SMEMOffset", - NamedMatchClass<"SMEMOffset">> { +class SMEMOffset : NamedOperandU32<"SMEMOffset", + NamedMatchClass<"SMEMOffset">> { let OperandType = "OPERAND_IMMEDIATE"; let EncoderMethod = "getSMEMOffsetEncoding"; let DecoderMethod = "decodeSMEMOffset"; } +def smem_offset : SMEMOffset; + +def smem_offset_mod : SMEMOffset { + let PrintMethod = "printSMEMOffsetMod"; +} + //===----------------------------------------------------------------------===// // Scalar Memory classes //===----------------------------------------------------------------------===// @@ -43,13 +49,13 @@ class SM_Pseudo patt bits<1> has_sdst = 1; bit has_glc = 0; bit has_dlc = 0; - bits<1> has_offset = 1; - bits<1> offset_is_imm = 0; + bit has_offset = 0; + bit has_soffset = 0; bit is_buffer = 0; } -class SM_Real - : InstSI { +class SM_Real + : InstSI { let isPseudo = 0; let isCodeGenOnly = 0; @@ -77,20 +83,40 @@ class SM_Real bits<7> sbase; bits<7> sdst; bits<32> offset; - bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0); + bits<8> soffset; bits<5> cpol; } -class SM_Probe_Pseudo - : SM_Pseudo { +class OffsetMode { + bit HasOffset = hasOffset; + bit HasSOffset = hasSOffset; + string Variant = variant; + dag Ins = ins; + string Asm = asm; +} + +def IMM_Offset : OffsetMode<1, 0, "_IMM", (ins smem_offset:$offset), "$offset">; +def SGPR_Offset : OffsetMode<0, 1, "_SGPR", (ins SReg_32:$soffset), "$soffset">; +def SGPR_IMM_Offset : OffsetMode<1, 1, "_SGPR_IMM", + (ins SReg_32:$soffset, smem_offset_mod:$offset), + "$soffset$offset">; + +class SM_Probe_Pseudo + : SM_Pseudo { let mayLoad = 0; let mayStore = 0; let has_glc = 0; let LGKM_CNT = 0; let ScalarStore = 0; let hasSideEffects = 1; - let offset_is_imm = isImm; - let PseudoInstr = opName # !if(isImm, "_IMM", "_SGPR"); + let has_offset = hasOffset; + let has_soffset = hasSOffset; + let PseudoInstr = opName # variant; } class SM_Load_Pseudo pattern=[]> @@ -102,10 +128,11 @@ class SM_Load_Pseudo let has_dlc = 1; } -class SM_Store_Pseudo pattern = []> - : SM_Pseudo { - RegisterClass BaseClass; - RegisterClass SrcClass; +class SM_Store_Pseudo + : SM_Pseudo { + RegisterClass BaseClass = baseClass; + RegisterClass SrcClass = srcClass; let mayLoad = 0; let mayStore = 1; let has_glc = 1; @@ -113,16 +140,19 @@ class SM_Store_Pseudo pattern let ScalarStore = 1; } -class SM_Discard_Pseudo - : SM_Pseudo { +class SM_Discard_Pseudo + : SM_Pseudo { let mayLoad = 0; let mayStore = 0; let has_glc = 0; let has_sdst = 0; let ScalarStore = 0; let hasSideEffects = 1; - let offset_is_imm = isImm; - let PseudoInstr = opName # !if(isImm, "_IMM", "_SGPR"); + let has_offset = hasOffset; + let has_soffset = hasSOffset; + let PseudoInstr = opName # variant; } multiclass SM_Pseudo_Loads { - let offset_is_imm = 1; + let has_offset = 1; let BaseClass = baseClass; let PseudoInstr = opName # "_IMM"; let has_glc = 1; @@ -141,39 +171,63 @@ multiclass SM_Pseudo_Loads { + (ins baseClass:$sbase, SReg_32:$soffset, CPol:$cpol), + " $sdst, $sbase, $soffset$cpol", []> { + let has_soffset = 1; let BaseClass = baseClass; let PseudoInstr = opName # "_SGPR"; let has_glc = 1; let has_dlc = 1; } + + def _SGPR_IMM : SM_Load_Pseudo { + let has_offset = 1; + let has_soffset = 1; + let BaseClass = baseClass; + let PseudoInstr = opName # "_SGPR_IMM"; + let has_glc = 1; + let has_dlc = 1; + } } multiclass SM_Pseudo_Stores { - def _IMM : SM_Store_Pseudo { - let offset_is_imm = 1; - let BaseClass = baseClass; - let SrcClass = srcClass; + " $sdata, $sbase, $offset$cpol"> { + let has_offset = 1; let PseudoInstr = opName # "_IMM"; } - def _SGPR : SM_Store_Pseudo { - let BaseClass = baseClass; - let SrcClass = srcClass; + def _SGPR : SM_Store_Pseudo { + let has_soffset = 1; let PseudoInstr = opName # "_SGPR"; } + + def _SGPR_IMM : SM_Store_Pseudo { + let has_offset = 1; + let has_soffset = 1; + let PseudoInstr = opName # "_SGPR_IMM"; + } } multiclass SM_Pseudo_Discards { - def _IMM : SM_Discard_Pseudo ; - def _SGPR : SM_Discard_Pseudo ; + def _IMM : SM_Discard_Pseudo ; + def _SGPR : SM_Discard_Pseudo ; + def _SGPR_IMM : SM_Discard_Pseudo ; } class SM_Time_Pseudo : SM_Pseudo< @@ -184,21 +238,24 @@ class SM_Time_Pseudo : SM_Pse let mayStore = 0; let mayLoad = 0; let has_sbase = 0; - let has_offset = 0; } class SM_Inval_Pseudo : SM_Pseudo< opName, (outs), (ins), "", [(node)]> { let hasSideEffects = 1; + let mayLoad = 0; let mayStore = 0; let has_sdst = 0; let has_sbase = 0; - let has_offset = 0; } multiclass SM_Pseudo_Probe { - def _IMM : SM_Probe_Pseudo ; - def _SGPR : SM_Probe_Pseudo ; + def _IMM : SM_Probe_Pseudo ; + def _SGPR : SM_Probe_Pseudo ; + def _SGPR_IMM : SM_Probe_Pseudo ; } class SM_WaveId_Pseudo : SM_Pseudo< @@ -206,9 +263,8 @@ class SM_WaveId_Pseudo : SM_Pseudo< " $sdst", [(set i32:$sdst, (node))]> { let hasSideEffects = 1; let mayStore = 0; - let mayLoad = 1; + let mayLoad = 0; let has_sbase = 0; - let has_offset = 0; } //===----------------------------------------------------------------------===// @@ -225,6 +281,7 @@ class SM_Atomic_Pseudo : SM_Atomic_Pseudo, AtomicNoRet { - let offset_is_imm = isImm; + let has_offset = offsets.HasOffset; + let has_soffset = offsets.HasSOffset; let PseudoInstr = opNameWithSuffix; let Constraints = !if(isRet, "$sdst = $sdata", ""); @@ -264,10 +321,12 @@ class SM_Pseudo_Atomic { - def _IMM : SM_Pseudo_Atomic ; - def _SGPR : SM_Pseudo_Atomic ; - def _IMM_RTN : SM_Pseudo_Atomic ; - def _SGPR_RTN : SM_Pseudo_Atomic ; + def _IMM : SM_Pseudo_Atomic ; + def _SGPR : SM_Pseudo_Atomic ; + def _SGPR_IMM : SM_Pseudo_Atomic ; + def _IMM_RTN : SM_Pseudo_Atomic ; + def _SGPR_RTN : SM_Pseudo_Atomic ; + def _SGPR_IMM_RTN : SM_Pseudo_Atomic ; } //===----------------------------------------------------------------------===// @@ -452,16 +511,14 @@ class SMRD_Real_si op, SM_Pseudo ps> let AssemblerPredicate = isGFX6GFX7; let DecoderNamespace = "GFX6GFX7"; - let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?); - let Inst{8} = imm; + let Inst{7-0} = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?)); + let Inst{8} = ps.has_offset; let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?); let Inst{26-22} = op; let Inst{31-27} = 0x18; //encoding } -// FIXME: Assembler should reject trying to use glc on SMRD -// instructions on SI. multiclass SM_Real_Loads_si op, string ps, SM_Load_Pseudo immPs = !cast(ps#_IMM), SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { @@ -470,10 +527,8 @@ multiclass SM_Real_Loads_si op, string ps, let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol); } - // FIXME: The operand name $offset is inconsistent with $soff used - // in the pseudo def _SGPR_si : SMRD_Real_si { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); } } @@ -494,42 +549,82 @@ def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>; //===----------------------------------------------------------------------===// -// VI +// VI and GFX9. //===----------------------------------------------------------------------===// class SMEM_Real_vi op, SM_Pseudo ps> : SM_Real , SIMCInstr , Enc64 { - let AssemblerPredicate = isGFX8GFX9; + field bit IsGFX9SpecificEncoding = false; + let AssemblerPredicate = !if(IsGFX9SpecificEncoding, isGFX9Only, isGFX8GFX9); let DecoderNamespace = "GFX8"; let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); + // Note that for GFX9 instructions with immediate offsets, soffset_en + // must be defined, whereas in GFX8 it's undefined in all cases, + // meaning GFX9 is not perfectly backward-compatible with GFX8, despite + // documentation suggesting otherwise. + field bit SOffsetEn = !if(IsGFX9SpecificEncoding, + !if(ps.has_offset, ps.has_soffset, !if(ps.has_soffset, 0, ?)), + ?); + let Inst{14} = SOffsetEn; + let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); - let Inst{17} = imm; + + // imm + // TODO: Shall not be defined if the instruction has no offset nor + // soffset. + let Inst{17} = ps.has_offset; + let Inst{25-18} = op; let Inst{31-26} = 0x30; //encoding // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed. // Offset value is corrected accordingly when offset is encoded/decoded. - let Inst{38-32} = !if(ps.has_offset, offset{6-0}, ?); - let Inst{52-39} = !if(ps.has_offset, !if(imm, offset{20-7}, ?), ?); + // TODO: Forbid non-M0 register offsets for GFX8 stores and atomics. + field bits<21> Offset; + let Offset{6-0} = !if(ps.has_offset, offset{6-0}, + !if(ps.has_soffset, soffset{6-0}, ?)); + let Offset{20-7} = !if(ps.has_offset, offset{20-7}, ?); + let Inst{52-32} = Offset; + + // soffset + let Inst{63-57} = !if(!and(IsGFX9SpecificEncoding, ps.has_soffset), + soffset{6-0}, ?); } -multiclass SM_Real_Loads_vi op, string ps, - SM_Load_Pseudo immPs = !cast(ps#_IMM), - SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { - def _IMM_vi : SMEM_Real_vi { - let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); - } - def _SGPR_vi : SMEM_Real_vi { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); - } +class SMEM_Real_Load_vi op, string ps, dag offsets> + : SMEM_Real_vi(ps)> { + RegisterClass BaseClass = !cast(ps).BaseClass; + let InOperandList = !con((ins BaseClass:$sbase), offsets, (ins CPol:$cpol)); } -class SMEM_Real_Store_vi op, SM_Pseudo ps> : SMEM_Real_vi { +// The alternative GFX9 SGPR encoding using soffset to encode the +// offset register. Not available in assembler and goes to the GFX9 +// encoding family to avoid conflicts with the primary SGPR variant. +class SMEM_Real_SGPR_alt_gfx9 { + bit IsGFX9SpecificEncoding = true; + bit SOffsetEn = 1; + bit Offset = ?; + int Subtarget = SIEncodingFamily.GFX9; + string AsmVariantName = "NonParsable"; +} + +multiclass SM_Real_Loads_vi op, string ps> { + def _IMM_vi : SMEM_Real_Load_vi ; + def _SGPR_vi : SMEM_Real_Load_vi ; + def _SGPR_alt_gfx9 : SMEM_Real_Load_vi , + SMEM_Real_SGPR_alt_gfx9; + let IsGFX9SpecificEncoding = true in + def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi < + op, ps#"_SGPR_IMM", (ins SReg_32:$soffset, smem_offset_mod:$offset)>; +} + +class SMEM_Real_Store_Base_vi op, SM_Pseudo ps> : SMEM_Real_vi { // encoding bits<7> sdata; @@ -537,23 +632,34 @@ class SMEM_Real_Store_vi op, SM_Pseudo ps> : SMEM_Real_vi { let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); } -multiclass SM_Real_Stores_vi op, string ps, - SM_Store_Pseudo immPs = !cast(ps#_IMM), - SM_Store_Pseudo sgprPs = !cast(ps#_SGPR)> { - // FIXME: The operand name $offset is inconsistent with $soff used - // in the pseudo - def _IMM_vi : SMEM_Real_Store_vi { - let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); - } +class SMEM_Real_Store_vi op, string ps, dag offsets> + : SMEM_Real_Store_Base_vi (ps)> { + RegisterClass SrcClass = !cast(ps).SrcClass; + RegisterClass BaseClass = !cast(ps).BaseClass; + let InOperandList = !con((ins SrcClass:$sdata, BaseClass:$sbase), + offsets, (ins CPol:$cpol)); +} - def _SGPR_vi : SMEM_Real_Store_vi { - let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); - } +multiclass SM_Real_Stores_vi op, string ps> { + def _IMM_vi : SMEM_Real_Store_vi ; + def _SGPR_vi : SMEM_Real_Store_vi ; + def _SGPR_alt_gfx9 : SMEM_Real_Store_vi , + SMEM_Real_SGPR_alt_gfx9; + let IsGFX9SpecificEncoding = true in + def _SGPR_IMM_gfx9 : SMEM_Real_Store_vi < + op, ps#"_SGPR_IMM", (ins SReg_32:$soffset, smem_offset_mod:$offset)>; } multiclass SM_Real_Probe_vi op, string ps> { - def _IMM_vi : SMEM_Real_Store_vi (ps#_IMM)>; - def _SGPR_vi : SMEM_Real_Store_vi (ps#_SGPR)>; + def _IMM_vi : SMEM_Real_Store_Base_vi (ps#_IMM)>; + def _SGPR_vi : SMEM_Real_Store_Base_vi (ps#_SGPR)>; + def _SGPR_alt_gfx9 + : SMEM_Real_Store_Base_vi (ps#_SGPR)>, + SMEM_Real_SGPR_alt_gfx9; + let IsGFX9SpecificEncoding = true in + def _SGPR_IMM_gfx9 + : SMEM_Real_Store_Base_vi (ps#_SGPR_IMM)>; } defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00, "S_LOAD_DWORD">; @@ -614,8 +720,20 @@ class SMEM_Atomic_Real_vi op, SM_Atomic_Pseudo ps> multiclass SM_Real_Atomics_vi op, string ps> { def _IMM_vi : SMEM_Atomic_Real_vi (ps#_IMM)>; def _SGPR_vi : SMEM_Atomic_Real_vi (ps#_SGPR)>; + def _SGPR_alt_gfx9 + : SMEM_Atomic_Real_vi (ps#_SGPR)>, + SMEM_Real_SGPR_alt_gfx9; + let IsGFX9SpecificEncoding = true in + def _SGPR_IMM_gfx9 + : SMEM_Atomic_Real_vi (ps#_SGPR_IMM)>; def _IMM_RTN_vi : SMEM_Atomic_Real_vi (ps#_IMM_RTN)>; def _SGPR_RTN_vi : SMEM_Atomic_Real_vi (ps#_SGPR_RTN)>; + def _SGPR_RTN_alt_gfx9 + : SMEM_Atomic_Real_vi (ps#_SGPR_RTN)>, + SMEM_Real_SGPR_alt_gfx9; + let IsGFX9SpecificEncoding = true in + def _SGPR_IMM_RTN_gfx9 + : SMEM_Atomic_Real_vi (ps#_SGPR_IMM_RTN)>; } defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_vi <0x40, "S_BUFFER_ATOMIC_SWAP">; @@ -677,6 +795,10 @@ defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0xac, "S_ATOMIC_DEC_X2"> multiclass SM_Real_Discard_vi op, string ps> { def _IMM_vi : SMEM_Real_vi (ps#_IMM)>; def _SGPR_vi : SMEM_Real_vi (ps#_SGPR)>; + def _SGPR_alt_gfx9 : SMEM_Real_vi (ps#_SGPR)>, + SMEM_Real_SGPR_alt_gfx9; + let IsGFX9SpecificEncoding = true in + def _SGPR_IMM_gfx9 : SMEM_Real_vi (ps#_SGPR_IMM)>; } defm S_DCACHE_DISCARD : SM_Real_Discard_vi <0x28, "S_DCACHE_DISCARD">; @@ -727,8 +849,8 @@ class SMRD_Real_ci op, SM_Pseudo ps> let AssemblerPredicate = isGFX7Only; let DecoderNamespace = "GFX7"; - let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?); - let Inst{8} = imm; + let Inst{7-0} = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?)); + let Inst{8} = ps.has_offset; let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?); let Inst{26-22} = op; @@ -876,20 +998,27 @@ def : GCNPat < // GFX10. //===----------------------------------------------------------------------===// -class SMEM_Real_gfx10 op, SM_Pseudo ps> : - SM_Real, SIMCInstr, Enc64 { - let AssemblerPredicate = isGFX10Plus; - let DecoderNamespace = "GFX10"; - +class SMEM_Real_10Plus_common op, SM_Pseudo ps, string opName, + int subtarget, RegisterWithSubRegs sgpr_null> : + SM_Real, SIMCInstr, Enc64 { let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); - let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?); - let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); let Inst{25-18} = op; let Inst{31-26} = 0x3d; - let Inst{52-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{20-0}, ?), ?); - let Inst{63-57} = !if(ps.offset_is_imm, !cast(SGPR_NULL.HWEncoding), - !if(ps.has_offset, offset{6-0}, ?)); + // There are SMEM instructions that do not employ any of the offset + // fields, in which case we need them to remain undefined. + let Inst{52-32} = !if(ps.has_offset, offset{20-0}, !if(ps.has_soffset, 0, ?)); + let Inst{63-57} = !if(ps.has_soffset, soffset{6-0}, + !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?)); +} + +class SMEM_Real_gfx10 op, SM_Pseudo ps> + : SMEM_Real_10Plus_common { + let AssemblerPredicate = isGFX10Only; + let DecoderNamespace = "GFX10"; + let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?); + let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); } multiclass SM_Real_Loads_gfx10 op, string ps, @@ -899,7 +1028,11 @@ multiclass SM_Real_Loads_gfx10 op, string ps, let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); } def _SGPR_gfx10 : SMEM_Real_gfx10 { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); + } + def _SGPR_IMM_gfx10 : SMEM_Real_gfx10(ps#_SGPR_IMM)> { + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, + smem_offset_mod:$offset, CPol:$cpol); } } @@ -913,14 +1046,17 @@ class SMEM_Real_Store_gfx10 op, SM_Pseudo ps> : SMEM_Real_gfx10 multiclass SM_Real_Stores_gfx10 op, string ps, SM_Store_Pseudo immPs = !cast(ps#_IMM), SM_Store_Pseudo sgprPs = !cast(ps#_SGPR)> { - // FIXME: The operand name $offset is inconsistent with $soff used - // in the pseudo def _IMM_gfx10 : SMEM_Real_Store_gfx10 { let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); } def _SGPR_gfx10 : SMEM_Real_Store_gfx10 { - let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); + let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); + } + + def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 (ps#_SGPR_IMM)> { + let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, + SReg_32:$soffset, smem_offset_mod:$offset, CPol:$cpol); } } @@ -969,6 +1105,8 @@ def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>; multiclass SM_Real_Probe_gfx10 op, string ps> { def _IMM_gfx10 : SMEM_Real_Store_gfx10 (ps#_IMM)>; def _SGPR_gfx10 : SMEM_Real_Store_gfx10 (ps#_SGPR)>; + def _SGPR_IMM_gfx10 + : SMEM_Real_Store_gfx10 (ps#_SGPR_IMM)>; } defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">; @@ -992,8 +1130,10 @@ class SMEM_Atomic_Real_gfx10 op, SM_Atomic_Pseudo ps> multiclass SM_Real_Atomics_gfx10 op, string ps> { def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_IMM)>; def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR)>; + def _SGPR_IMM_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR_IMM)>; def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_IMM_RTN)>; def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR_RTN)>; + def _SGPR_IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR_IMM_RTN)>; } let SubtargetPredicate = HasScalarAtomics in { @@ -1057,6 +1197,7 @@ defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X multiclass SM_Real_Discard_gfx10 op, string ps> { def _IMM_gfx10 : SMEM_Real_gfx10 (ps#_IMM)>; def _SGPR_gfx10 : SMEM_Real_gfx10 (ps#_SGPR)>; + def _SGPR_IMM_gfx10 : SMEM_Real_gfx10 (ps#_SGPR_IMM)>; } defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">; @@ -1072,3 +1213,64 @@ def SMInfoTable : GenericTable { let PrimaryKey = ["Opcode"]; let PrimaryKeyName = "getSMEMOpcodeHelper"; } + +//===----------------------------------------------------------------------===// +// GFX11. +//===----------------------------------------------------------------------===// + +class SMEM_Real_gfx11 op, SM_Pseudo ps, string opName = ps.Mnemonic> : + SMEM_Real_10Plus_common { + let AssemblerPredicate = isGFX11Plus; + let DecoderNamespace = "GFX11"; + let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0); +} + +class SMEM_Real_Load_gfx11 op, string ps, string opName, dag offsets> : + SMEM_Real_gfx11(ps), opName> { + RegisterClass BaseClass = !cast(ps).BaseClass; + let InOperandList = !con((ins BaseClass:$sbase), offsets, (ins CPol:$cpol)); +} + +multiclass SM_Real_Loads_gfx11 op, string ps, string opName> { + def _IMM_gfx11 : SMEM_Real_Load_gfx11; + def _SGPR_gfx11 : SMEM_Real_Load_gfx11; + def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11< + op, ps#"_SGPR_IMM", opName, (ins SReg_32:$soffset, smem_offset_mod:$offset)>; + def : MnemonicAlias(ps#"_IMM").Mnemonic, opName>, + Requires<[isGFX11Plus]>; +} + +defm S_LOAD_B32 : SM_Real_Loads_gfx11<0x000, "S_LOAD_DWORD", "s_load_b32">; +defm S_LOAD_B64 : SM_Real_Loads_gfx11<0x001, "S_LOAD_DWORDX2", "s_load_b64">; +defm S_LOAD_B128 : SM_Real_Loads_gfx11<0x002, "S_LOAD_DWORDX4", "s_load_b128">; +defm S_LOAD_B256 : SM_Real_Loads_gfx11<0x003, "S_LOAD_DWORDX8", "s_load_b256">; +defm S_LOAD_B512 : SM_Real_Loads_gfx11<0x004, "S_LOAD_DWORDX16", "s_load_b512">; + +defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx11<0x008, "S_BUFFER_LOAD_DWORD", "s_buffer_load_b32">; +defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx11<0x009, "S_BUFFER_LOAD_DWORDX2", "s_buffer_load_b64">; +defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx11<0x00a, "S_BUFFER_LOAD_DWORDX4", "s_buffer_load_b128">; +defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx11<0x00b, "S_BUFFER_LOAD_DWORDX8", "s_buffer_load_b256">; +defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx11<0x00c, "S_BUFFER_LOAD_DWORDX16", "s_buffer_load_b512">; + +def S_GL1_INV_gfx11 : SMEM_Real_gfx11<0x020, S_GL1_INV>; +def S_DCACHE_INV_gfx11 : SMEM_Real_gfx11<0x021, S_DCACHE_INV>; + +class SMEM_Real_Store_gfx11 op, SM_Pseudo ps> : SMEM_Real_gfx11 { + // encoding + bits<7> sdata; + + let sdst = ?; + let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); +} + +multiclass SM_Real_Probe_gfx11 op, string ps> { + def _IMM_gfx11 : SMEM_Real_Store_gfx11 (ps#_IMM)>; + def _SGPR_gfx11 : SMEM_Real_Store_gfx11 (ps#_SGPR)>; + def _SGPR_IMM_gfx11 + : SMEM_Real_Store_gfx11 (ps#_SGPR_IMM)>; +} + +defm S_ATC_PROBE : SM_Real_Probe_gfx11 <0x22, "S_ATC_PROBE">; +defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23, "S_ATC_PROBE_BUFFER">; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 3f7837f7dbf1..37d20045adb5 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -152,8 +152,8 @@ class SOP1_64_0 pattern=[]> : SOP1_Pseudo < } // 64-bit input, no output -class SOP1_1 pattern=[]> : SOP1_Pseudo < - opName, (outs), (ins rc:$src0), "$src0", pattern> { +class SOP1_1 pattern=[]> : SOP1_Pseudo < + opName, (outs), (ins SReg_64:$src0), "$src0", pattern> { let has_sdst = 0; } @@ -235,10 +235,10 @@ def : GCNPat < let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def S_BREV_B32 : SOP1_32 <"s_brev_b32", - [(set i32:$sdst, (bitreverse i32:$src0))] + [(set i32:$sdst, (UniformUnaryFrag i32:$src0))] >; def S_BREV_B64 : SOP1_64 <"s_brev_b64", - [(set i64:$sdst, (bitreverse i64:$src0))] + [(set i64:$sdst, (UniformUnaryFrag i64:$src0))] >; } // End isReMaterializable = 1, isAsCheapAsAMove = 1 @@ -276,10 +276,10 @@ def S_FLBIT_I32 : SOP1_32 <"s_flbit_i32", >; def S_FLBIT_I32_I64 : SOP1_32_64 <"s_flbit_i32_i64">; def S_SEXT_I32_I8 : SOP1_32 <"s_sext_i32_i8", - [(set i32:$sdst, (sext_inreg i32:$src0, i8))] + [(set i32:$sdst, (UniformSextInreg i32:$src0))] >; def S_SEXT_I32_I16 : SOP1_32 <"s_sext_i32_i16", - [(set i32:$sdst, (sext_inreg i32:$src0, i16))] + [(set i32:$sdst, (UniformSextInreg i32:$src0))] >; } // End isReMaterializable = 1 @@ -300,8 +300,7 @@ def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">; let isReturn = 1 in { // Define variant marked as return rather than branch. -def S_SETPC_B64_return : SOP1_1<"", CCR_SGPR_64, [(AMDGPUret_flag i64:$src0)]>; -def S_SETPC_B64_return_gfx : SOP1_1<"", Gfx_CCR_SGPR_64, [(AMDGPUret_gfx_flag i64:$src0)]>; +def S_SETPC_B64_return : SOP1_1<"">; } } // End isTerminator = 1, isBarrier = 1 @@ -341,7 +340,7 @@ def S_CBRANCH_JOIN : SOP1_0_32R <"s_cbranch_join">; let Defs = [SCC] in { def S_ABS_I32 : SOP1_32 <"s_abs_i32", - [(set i32:$sdst, (abs i32:$src0))] + [(set i32:$sdst, (UniformUnaryFrag i32:$src0))] >; } // End Defs = [SCC] @@ -385,6 +384,21 @@ let SubtargetPredicate = isGFX10Plus in { } // End Uses = [M0] } // End SubtargetPredicate = isGFX10Plus +let SubtargetPredicate = isGFX11Plus in { + let hasSideEffects = 1 in { + // For s_sendmsg_rtn_* the src0 field encodes the message type directly; it + // is not an SGPR number. + def S_SENDMSG_RTN_B32 : SOP1_Pseudo< + "s_sendmsg_rtn_b32", (outs SReg_32:$sdst), (ins SendMsgImm:$src0), + "$sdst, $src0", [(set i32:$sdst, (int_amdgcn_s_sendmsg_rtn timm:$src0))] + >; + def S_SENDMSG_RTN_B64 : SOP1_Pseudo< + "s_sendmsg_rtn_b64", (outs SReg_64:$sdst), (ins SendMsgImm:$src0), + "$sdst, $src0", [(set i64:$sdst, (int_amdgcn_s_sendmsg_rtn timm:$src0))] + >; + } +} // End SubtargetPredicate = isGFX11Plus + //===----------------------------------------------------------------------===// // SOP2 Instructions //===----------------------------------------------------------------------===// @@ -690,6 +704,10 @@ let SubtargetPredicate = isGFX9Plus in { } // End isCommutable = 1, isReMaterializable = 1 } // End SubtargetPredicate = isGFX9Plus +let SubtargetPredicate = isGFX11Plus in { + def S_PACK_HL_B32_B16 : SOP2_32<"s_pack_hl_b32_b16">; +} // End SubtargetPredicate = isGFX11Plus + //===----------------------------------------------------------------------===// // SOPK Instructions //===----------------------------------------------------------------------===// @@ -855,9 +873,7 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo < "$sdst, $simm16" >; -let mayLoad = 1 in { -// s_getreg_b32 should use hasSideEffects = 1 for tablegen to allow -// its use in the readcyclecounter selection. +// This is hasSideEffects to allow its use in readcyclecounter selection. // FIXME: Need to truncate immediate to 16-bits. def S_GETREG_B32 : SOPK_Pseudo < "s_getreg_b32", @@ -867,7 +883,6 @@ def S_GETREG_B32 : SOPK_Pseudo < let SOPKZext = 1; let hasSideEffects = 1; } -} // End mayLoad = 1 let Defs = [MODE], Uses = [MODE] in { @@ -1169,12 +1184,12 @@ def S_ENDPGM_SAVED : SOPP_Pseudo<"s_endpgm_saved", (ins)> { let isReturn = 1; } -let SubtargetPredicate = isGFX9Plus in { +let SubtargetPredicate = isGFX9GFX10 in { let isBarrier = 1, isReturn = 1, simm16 = 0, fixed_imm = 1 in { def S_ENDPGM_ORDERED_PS_DONE : SOPP_Pseudo<"s_endpgm_ordered_ps_done", (ins)>; } // End isBarrier = 1, isReturn = 1, simm16 = 0, fixed_imm = 1 -} // End SubtargetPredicate = isGFX9Plus +} // End SubtargetPredicate = isGFX9GFX10 let SubtargetPredicate = isGFX10Plus in { let isBarrier = 1, isReturn = 1, simm16 = 0, fixed_imm = 1 in { @@ -1279,15 +1294,21 @@ def S_SLEEP : SOPP_Pseudo <"s_sleep", (ins i32imm:$simm16), let hasSideEffects = 1; } -def S_SETPRIO : SOPP_Pseudo <"s_setprio" , (ins i16imm:$simm16), "$simm16">; +def S_SETPRIO : SOPP_Pseudo <"s_setprio", (ins i16imm:$simm16), "$simm16", + [(int_amdgcn_s_setprio timm:$simm16)]> { + let hasSideEffects = 1; +} let Uses = [EXEC, M0] in { -// FIXME: Should this be mayLoad+mayStore? def S_SENDMSG : SOPP_Pseudo <"s_sendmsg" , (ins SendMsgImm:$simm16), "$simm16", - [(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]>; + [(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]> { + let hasSideEffects = 1; +} def S_SENDMSGHALT : SOPP_Pseudo <"s_sendmsghalt" , (ins SendMsgImm:$simm16), "$simm16", - [(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]>; + [(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]> { + let hasSideEffects = 1; +} } // End Uses = [EXEC, M0] @@ -1341,7 +1362,7 @@ let SubtargetPredicate = isGFX10Plus in { let fixed_imm = 1; } def S_WAITCNT_DEPCTR : - SOPP_Pseudo <"s_waitcnt_depctr" , (ins s16imm:$simm16), "$simm16">; + SOPP_Pseudo <"s_waitcnt_depctr" , (ins DepCtrImm:$simm16), "$simm16">; let hasSideEffects = 0, Uses = [MODE], Defs = [MODE] in { def S_ROUND_MODE : @@ -1355,6 +1376,13 @@ let SubtargetPredicate = isGFX10Plus in { SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16">; } // End SubtargetPredicate = isGFX10Plus +let SubtargetPredicate = isGFX11Plus in { + def S_WAIT_EVENT : SOPP_Pseudo<"s_wait_event", (ins s16imm:$simm16), + "$simm16">; + def S_DELAY_ALU : SOPP_Pseudo<"s_delay_alu", (ins DELAY_FLAG:$simm16), + "$simm16">; +} // End SubtargetPredicate = isGFX11Plus + //===----------------------------------------------------------------------===// // SOP1 Patterns //===----------------------------------------------------------------------===// @@ -1377,7 +1405,7 @@ def : GCNPat < >; def : GCNPat < - (i32 (smax i32:$x, (i32 (ineg i32:$x)))), + (i32 (UniformBinFrag i32:$x, (i32 (ineg i32:$x)))), (S_ABS_I32 SReg_32:$x) >; @@ -1408,7 +1436,7 @@ def : GCNPat < // REG_SEQUENCE patterns don't support instructions with multiple // outputs. def : GCNPat< - (i64 (zext i16:$src)), + (i64 (UniformUnaryFrag i16:$src)), (REG_SEQUENCE SReg_64, (i32 (COPY_TO_REGCLASS (S_AND_B32 $src, (S_MOV_B32 (i32 0xffff))), SGPR_32)), sub0, (S_MOV_B32 (i32 0)), sub1) @@ -1421,7 +1449,7 @@ def : GCNPat < >; def : GCNPat< - (i32 (zext i16:$src)), + (i32 (UniformUnaryFrag i16:$src)), (S_AND_B32 (S_MOV_B32 (i32 0xffff)), $src) >; @@ -1448,8 +1476,13 @@ def : ScalarNot2Pat; // Target-specific instruction encodings. //===----------------------------------------------------------------------===// +class Select_gfx11 : SIMCInstr { + Predicate AssemblerPredicate = isGFX11Only; + string DecoderNamespace = "GFX11"; +} + class Select_gfx10 : SIMCInstr { - Predicate AssemblerPredicate = isGFX10Plus; + Predicate AssemblerPredicate = isGFX10Only; string DecoderNamespace = "GFX10"; } @@ -1463,6 +1496,87 @@ class Select_gfx6_gfx7 : SIMCInstr { string DecoderNamespace = "GFX6GFX7"; } +//===----------------------------------------------------------------------===// +// GFX11. +//===----------------------------------------------------------------------===// + +multiclass SOP1_Real_gfx11 op> { + def _gfx11 : SOP1_Real(NAME)>, + Select_gfx11(NAME).Mnemonic>; +} + +multiclass SOP1_Real_Renamed_gfx11 op, SOP1_Pseudo backing_pseudo, string real_name> { + def _gfx11 : SOP1_Real, + Select_gfx11, + MnemonicAlias, Requires<[isGFX11Plus]>; +} + +defm S_MOV_B32 : SOP1_Real_gfx11<0x000>; +defm S_MOV_B64 : SOP1_Real_gfx11<0x001>; +defm S_CMOV_B32 : SOP1_Real_gfx11<0x002>; +defm S_CMOV_B64 : SOP1_Real_gfx11<0x003>; +defm S_BREV_B32 : SOP1_Real_gfx11<0x004>; +defm S_BREV_B64 : SOP1_Real_gfx11<0x005>; +defm S_CTZ_I32_B32 : SOP1_Real_Renamed_gfx11<0x008, S_FF1_I32_B32, "s_ctz_i32_b32">; +defm S_CTZ_I32_B64 : SOP1_Real_Renamed_gfx11<0x009, S_FF1_I32_B64, "s_ctz_i32_b64">; +defm S_CLZ_I32_U32 : SOP1_Real_Renamed_gfx11<0x00a, S_FLBIT_I32_B32, "s_clz_i32_u32">; +defm S_CLZ_I32_U64 : SOP1_Real_Renamed_gfx11<0x00b, S_FLBIT_I32_B64, "s_clz_i32_u64">; +defm S_CLS_I32 : SOP1_Real_Renamed_gfx11<0x00c, S_FLBIT_I32, "s_cls_i32">; +defm S_CLS_I32_I64 : SOP1_Real_Renamed_gfx11<0x00d, S_FLBIT_I32_I64, "s_cls_i32_i64">; +defm S_SEXT_I32_I8 : SOP1_Real_gfx11<0x00e>; +defm S_SEXT_I32_I16 : SOP1_Real_gfx11<0x00f>; +defm S_BITSET0_B32 : SOP1_Real_gfx11<0x010>; +defm S_BITSET0_B64 : SOP1_Real_gfx11<0x011>; +defm S_BITSET1_B32 : SOP1_Real_gfx11<0x012>; +defm S_BITSET1_B64 : SOP1_Real_gfx11<0x013>; +defm S_BITREPLICATE_B64_B32 : SOP1_Real_gfx11<0x014>; +defm S_ABS_I32 : SOP1_Real_gfx11<0x015>; +defm S_BCNT0_I32_B32 : SOP1_Real_gfx11<0x016>; +defm S_BCNT0_I32_B64 : SOP1_Real_gfx11<0x017>; +defm S_BCNT1_I32_B32 : SOP1_Real_gfx11<0x018>; +defm S_BCNT1_I32_B64 : SOP1_Real_gfx11<0x019>; +defm S_QUADMASK_B32 : SOP1_Real_gfx11<0x01a>; +defm S_QUADMASK_B64 : SOP1_Real_gfx11<0x01b>; +defm S_WQM_B32 : SOP1_Real_gfx11<0x01c>; +defm S_WQM_B64 : SOP1_Real_gfx11<0x01d>; +defm S_NOT_B32 : SOP1_Real_gfx11<0x01e>; +defm S_NOT_B64 : SOP1_Real_gfx11<0x01f>; +defm S_AND_SAVEEXEC_B32 : SOP1_Real_gfx11<0x020>; +defm S_AND_SAVEEXEC_B64 : SOP1_Real_gfx11<0x021>; +defm S_OR_SAVEEXEC_B32 : SOP1_Real_gfx11<0x022>; +defm S_OR_SAVEEXEC_B64 : SOP1_Real_gfx11<0x023>; +defm S_XOR_SAVEEXEC_B32 : SOP1_Real_gfx11<0x024>; +defm S_XOR_SAVEEXEC_B64 : SOP1_Real_gfx11<0x025>; +defm S_NAND_SAVEEXEC_B32 : SOP1_Real_gfx11<0x026>; +defm S_NAND_SAVEEXEC_B64 : SOP1_Real_gfx11<0x027>; +defm S_NOR_SAVEEXEC_B32 : SOP1_Real_gfx11<0x028>; +defm S_NOR_SAVEEXEC_B64 : SOP1_Real_gfx11<0x029>; +defm S_XNOR_SAVEEXEC_B32 : SOP1_Real_gfx11<0x02a>; +/*defm S_XNOR_SAVEEXEC_B64 : SOP1_Real_gfx11<0x02b>; //same as older arch, handled there*/ +defm S_AND_NOT0_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11<0x02c, S_ANDN1_SAVEEXEC_B32, "s_and_not0_saveexec_b32">; +defm S_AND_NOT0_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11<0x02d, S_ANDN1_SAVEEXEC_B64, "s_and_not0_saveexec_b64">; +defm S_OR_NOT0_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11<0x02e, S_ORN1_SAVEEXEC_B32, "s_or_not0_saveexec_b32">; +defm S_OR_NOT0_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11<0x02f, S_ORN1_SAVEEXEC_B64, "s_or_not0_saveexec_b64">; +defm S_AND_NOT1_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11<0x030, S_ANDN2_SAVEEXEC_B32, "s_and_not1_saveexec_b32">; +defm S_AND_NOT1_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11<0x031, S_ANDN2_SAVEEXEC_B64, "s_and_not1_saveexec_b64">; +defm S_OR_NOT1_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11<0x032, S_ORN2_SAVEEXEC_B32, "s_or_not1_saveexec_b32">; +defm S_OR_NOT1_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11<0x033, S_ORN2_SAVEEXEC_B64, "s_or_not1_saveexec_b64">; +defm S_AND_NOT0_WREXEC_B32 : SOP1_Real_Renamed_gfx11<0x034, S_ANDN1_WREXEC_B32, "s_and_not0_wrexec_b32">; +defm S_AND_NOT0_WREXEC_B64 : SOP1_Real_Renamed_gfx11<0x035, S_ANDN1_WREXEC_B64, "s_and_not0_wrexec_b64">; +defm S_AND_NOT1_WREXEC_B32 : SOP1_Real_Renamed_gfx11<0x036, S_ANDN2_WREXEC_B32, "s_and_not1_wrexec_b32">; +defm S_AND_NOT1_WREXEC_B64 : SOP1_Real_Renamed_gfx11<0x037, S_ANDN2_WREXEC_B64, "s_and_not1_wrexec_b64">; +defm S_MOVRELS_B32 : SOP1_Real_gfx11<0x040>; +defm S_MOVRELS_B64 : SOP1_Real_gfx11<0x041>; +defm S_MOVRELD_B32 : SOP1_Real_gfx11<0x042>; +defm S_MOVRELD_B64 : SOP1_Real_gfx11<0x043>; +defm S_MOVRELSD_2_B32 : SOP1_Real_gfx11<0x044>; +defm S_GETPC_B64 : SOP1_Real_gfx11<0x047>; +defm S_SETPC_B64 : SOP1_Real_gfx11<0x048>; +defm S_SWAPPC_B64 : SOP1_Real_gfx11<0x049>; +defm S_RFE_B64 : SOP1_Real_gfx11<0x04a>; +defm S_SENDMSG_RTN_B32 : SOP1_Real_gfx11<0x04c>; +defm S_SENDMSG_RTN_B64 : SOP1_Real_gfx11<0x04d>; + //===----------------------------------------------------------------------===// // SOP1 - GFX10. //===----------------------------------------------------------------------===// @@ -1473,6 +1587,9 @@ multiclass SOP1_Real_gfx10 op> { Select_gfx10; } +multiclass SOP1_Real_gfx10_gfx11 op> : + SOP1_Real_gfx10, SOP1_Real_gfx11; + defm S_ANDN1_SAVEEXEC_B64 : SOP1_Real_gfx10<0x037>; defm S_ORN1_SAVEEXEC_B64 : SOP1_Real_gfx10<0x038>; defm S_ANDN1_WREXEC_B64 : SOP1_Real_gfx10<0x039>; @@ -1493,7 +1610,7 @@ defm S_ANDN2_WREXEC_B32 : SOP1_Real_gfx10<0x047>; defm S_MOVRELSD_2_B32 : SOP1_Real_gfx10<0x049>; //===----------------------------------------------------------------------===// -// SOP1 - GFX6, GFX7. +// SOP1 - GFX6, GFX7, GFX10, GFX11. //===----------------------------------------------------------------------===// @@ -1506,6 +1623,9 @@ multiclass SOP1_Real_gfx6_gfx7 op> { multiclass SOP1_Real_gfx6_gfx7_gfx10 op> : SOP1_Real_gfx6_gfx7, SOP1_Real_gfx10; +multiclass SOP1_Real_gfx6_gfx7_gfx10_gfx11 op> : + SOP1_Real_gfx6_gfx7, SOP1_Real_gfx10_gfx11; + defm S_CBRANCH_JOIN : SOP1_Real_gfx6_gfx7<0x032>; defm S_MOV_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x003>; @@ -1547,7 +1667,7 @@ defm S_ANDN2_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x027>; defm S_ORN2_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x028>; defm S_NAND_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x029>; defm S_NOR_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x02a>; -defm S_XNOR_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x02b>; +defm S_XNOR_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10_gfx11<0x02b>; defm S_QUADMASK_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x02c>; defm S_QUADMASK_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x02d>; defm S_MOVRELS_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x02e>; @@ -1556,6 +1676,65 @@ defm S_MOVRELD_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x030>; defm S_MOVRELD_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x031>; defm S_ABS_I32 : SOP1_Real_gfx6_gfx7_gfx10<0x034>; +//===----------------------------------------------------------------------===// +// SOP2 - GFX11. +//===----------------------------------------------------------------------===// + +multiclass SOP2_Real_gfx11 op> { + def _gfx11 : SOP2_Real(NAME)>, + Select_gfx11(NAME).Mnemonic>; +} + +multiclass SOP2_Real_Renamed_gfx11 op, SOP2_Pseudo backing_pseudo, string real_name> { + def _gfx11 : SOP2_Real, + Select_gfx11, + MnemonicAlias, Requires<[isGFX11Plus]>; +} + +defm S_ABSDIFF_I32 : SOP2_Real_gfx11<0x006>; +defm S_LSHL_B32 : SOP2_Real_gfx11<0x008>; +defm S_LSHL_B64 : SOP2_Real_gfx11<0x009>; +defm S_LSHR_B32 : SOP2_Real_gfx11<0x00a>; +defm S_LSHR_B64 : SOP2_Real_gfx11<0x00b>; +defm S_ASHR_I32 : SOP2_Real_gfx11<0x00c>; +defm S_ASHR_I64 : SOP2_Real_gfx11<0x00d>; +defm S_LSHL1_ADD_U32 : SOP2_Real_gfx11<0x00e>; +defm S_LSHL2_ADD_U32 : SOP2_Real_gfx11<0x00f>; +defm S_LSHL3_ADD_U32 : SOP2_Real_gfx11<0x010>; +defm S_LSHL4_ADD_U32 : SOP2_Real_gfx11<0x011>; +defm S_MIN_I32 : SOP2_Real_gfx11<0x012>; +defm S_MIN_U32 : SOP2_Real_gfx11<0x013>; +defm S_MAX_I32 : SOP2_Real_gfx11<0x014>; +defm S_MAX_U32 : SOP2_Real_gfx11<0x015>; +defm S_AND_B32 : SOP2_Real_gfx11<0x016>; +defm S_AND_B64 : SOP2_Real_gfx11<0x017>; +defm S_OR_B32 : SOP2_Real_gfx11<0x018>; +defm S_OR_B64 : SOP2_Real_gfx11<0x019>; +defm S_XOR_B32 : SOP2_Real_gfx11<0x01a>; +defm S_XOR_B64 : SOP2_Real_gfx11<0x01b>; +defm S_NAND_B32 : SOP2_Real_gfx11<0x01c>; +defm S_NAND_B64 : SOP2_Real_gfx11<0x01d>; +defm S_NOR_B32 : SOP2_Real_gfx11<0x01e>; +defm S_NOR_B64 : SOP2_Real_gfx11<0x01f>; +defm S_XNOR_B32 : SOP2_Real_gfx11<0x020>; +defm S_XNOR_B64 : SOP2_Real_gfx11<0x021>; +defm S_AND_NOT1_B32 : SOP2_Real_Renamed_gfx11<0x022, S_ANDN2_B32, "s_and_not1_b32">; +defm S_AND_NOT1_B64 : SOP2_Real_Renamed_gfx11<0x023, S_ANDN2_B64, "s_and_not1_b64">; +defm S_OR_NOT1_B32 : SOP2_Real_Renamed_gfx11<0x024, S_ORN2_B32, "s_or_not1_b32">; +defm S_OR_NOT1_B64 : SOP2_Real_Renamed_gfx11<0x025, S_ORN2_B64, "s_or_not1_b64">; +defm S_BFE_U32 : SOP2_Real_gfx11<0x026>; +defm S_BFE_I32 : SOP2_Real_gfx11<0x027>; +defm S_BFE_U64 : SOP2_Real_gfx11<0x028>; +defm S_BFE_I64 : SOP2_Real_gfx11<0x029>; +defm S_BFM_B32 : SOP2_Real_gfx11<0x02a>; +defm S_BFM_B64 : SOP2_Real_gfx11<0x02b>; +defm S_MUL_I32 : SOP2_Real_gfx11<0x02c>; +defm S_MUL_HI_U32 : SOP2_Real_gfx11<0x02d>; +defm S_MUL_HI_I32 : SOP2_Real_gfx11<0x02e>; +defm S_CSELECT_B32 : SOP2_Real_gfx11<0x030>; +defm S_CSELECT_B64 : SOP2_Real_gfx11<0x031>; +defm S_PACK_HL_B32_B16 : SOP2_Real_gfx11<0x035>; + //===----------------------------------------------------------------------===// // SOP2 - GFX10. //===----------------------------------------------------------------------===// @@ -1566,13 +1745,16 @@ multiclass SOP2_Real_gfx10 op> { Select_gfx10; } +multiclass SOP2_Real_gfx10_gfx11 op> : + SOP2_Real_gfx10, SOP2_Real_gfx11; + defm S_LSHL1_ADD_U32 : SOP2_Real_gfx10<0x02e>; defm S_LSHL2_ADD_U32 : SOP2_Real_gfx10<0x02f>; defm S_LSHL3_ADD_U32 : SOP2_Real_gfx10<0x030>; defm S_LSHL4_ADD_U32 : SOP2_Real_gfx10<0x031>; -defm S_PACK_LL_B32_B16 : SOP2_Real_gfx10<0x032>; -defm S_PACK_LH_B32_B16 : SOP2_Real_gfx10<0x033>; -defm S_PACK_HH_B32_B16 : SOP2_Real_gfx10<0x034>; +defm S_PACK_LL_B32_B16 : SOP2_Real_gfx10_gfx11<0x032>; +defm S_PACK_LH_B32_B16 : SOP2_Real_gfx10_gfx11<0x033>; +defm S_PACK_HH_B32_B16 : SOP2_Real_gfx10_gfx11<0x034>; defm S_MUL_HI_U32 : SOP2_Real_gfx10<0x035>; defm S_MUL_HI_I32 : SOP2_Real_gfx10<0x036>; @@ -1589,14 +1771,17 @@ multiclass SOP2_Real_gfx6_gfx7 op> { multiclass SOP2_Real_gfx6_gfx7_gfx10 op> : SOP2_Real_gfx6_gfx7, SOP2_Real_gfx10; +multiclass SOP2_Real_gfx6_gfx7_gfx10_gfx11 op> : + SOP2_Real_gfx6_gfx7, SOP2_Real_gfx10_gfx11; + defm S_CBRANCH_G_FORK : SOP2_Real_gfx6_gfx7<0x02b>; -defm S_ADD_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x000>; -defm S_SUB_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x001>; -defm S_ADD_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x002>; -defm S_SUB_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x003>; -defm S_ADDC_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x004>; -defm S_SUBB_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x005>; +defm S_ADD_U32 : SOP2_Real_gfx6_gfx7_gfx10_gfx11<0x000>; +defm S_SUB_U32 : SOP2_Real_gfx6_gfx7_gfx10_gfx11<0x001>; +defm S_ADD_I32 : SOP2_Real_gfx6_gfx7_gfx10_gfx11<0x002>; +defm S_SUB_I32 : SOP2_Real_gfx6_gfx7_gfx10_gfx11<0x003>; +defm S_ADDC_U32 : SOP2_Real_gfx6_gfx7_gfx10_gfx11<0x004>; +defm S_SUBB_U32 : SOP2_Real_gfx6_gfx7_gfx10_gfx11<0x005>; defm S_MIN_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x006>; defm S_MIN_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x007>; defm S_MAX_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x008>; @@ -1634,6 +1819,31 @@ defm S_BFE_U64 : SOP2_Real_gfx6_gfx7_gfx10<0x029>; defm S_BFE_I64 : SOP2_Real_gfx6_gfx7_gfx10<0x02a>; defm S_ABSDIFF_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x02c>; +//===----------------------------------------------------------------------===// +// SOPK - GFX11. +//===----------------------------------------------------------------------===// + +multiclass SOPK_Real32_gfx11 op> { + def _gfx11 : SOPK_Real32(NAME)>, + Select_gfx11(NAME).Mnemonic>; +} + +multiclass SOPK_Real64_gfx11 op> { + def _gfx11 : SOPK_Real64(NAME)>, + Select_gfx11(NAME).Mnemonic>; +} + +defm S_GETREG_B32 : SOPK_Real32_gfx11<0x011>; +defm S_SETREG_B32 : SOPK_Real32_gfx11<0x012>; +defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx11<0x013>; +defm S_CALL_B64 : SOPK_Real32_gfx11<0x014>; +defm S_SUBVECTOR_LOOP_BEGIN : SOPK_Real32_gfx11<0x016>; +defm S_SUBVECTOR_LOOP_END : SOPK_Real32_gfx11<0x017>; +defm S_WAITCNT_VSCNT : SOPK_Real32_gfx11<0x018>; +defm S_WAITCNT_VMCNT : SOPK_Real32_gfx11<0x019>; +defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx11<0x01a>; +defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx11<0x01b>; + //===----------------------------------------------------------------------===// // SOPK - GFX10. //===----------------------------------------------------------------------===// @@ -1650,7 +1860,10 @@ multiclass SOPK_Real64_gfx10 op> { Select_gfx10; } -defm S_VERSION : SOPK_Real32_gfx10<0x001>; +multiclass SOPK_Real32_gfx10_gfx11 op> : + SOPK_Real32_gfx10, SOPK_Real32_gfx11; + +defm S_VERSION : SOPK_Real32_gfx10_gfx11<0x001>; defm S_CALL_B64 : SOPK_Real32_gfx10<0x016>; defm S_WAITCNT_VSCNT : SOPK_Real32_gfx10<0x017>; defm S_WAITCNT_VMCNT : SOPK_Real32_gfx10<0x018>; @@ -1681,28 +1894,95 @@ multiclass SOPK_Real32_gfx6_gfx7_gfx10 op> : multiclass SOPK_Real64_gfx6_gfx7_gfx10 op> : SOPK_Real64_gfx6_gfx7, SOPK_Real64_gfx10; +multiclass SOPK_Real32_gfx6_gfx7_gfx10_gfx11 op> : + SOPK_Real32_gfx6_gfx7, SOPK_Real32_gfx10_gfx11; + defm S_CBRANCH_I_FORK : SOPK_Real32_gfx6_gfx7<0x011>; -defm S_MOVK_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x000>; -defm S_CMOVK_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x002>; -defm S_CMPK_EQ_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x003>; -defm S_CMPK_LG_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x004>; -defm S_CMPK_GT_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x005>; -defm S_CMPK_GE_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x006>; -defm S_CMPK_LT_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x007>; -defm S_CMPK_LE_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x008>; -defm S_CMPK_EQ_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x009>; -defm S_CMPK_LG_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00a>; -defm S_CMPK_GT_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00b>; -defm S_CMPK_GE_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00c>; -defm S_CMPK_LT_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00d>; -defm S_CMPK_LE_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00e>; -defm S_ADDK_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00f>; -defm S_MULK_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x010>; +defm S_MOVK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x000>; +defm S_CMOVK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x002>; +defm S_CMPK_EQ_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x003>; +defm S_CMPK_LG_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x004>; +defm S_CMPK_GT_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x005>; +defm S_CMPK_GE_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x006>; +defm S_CMPK_LT_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x007>; +defm S_CMPK_LE_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x008>; +defm S_CMPK_EQ_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x009>; +defm S_CMPK_LG_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00a>; +defm S_CMPK_GT_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00b>; +defm S_CMPK_GE_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00c>; +defm S_CMPK_LT_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00d>; +defm S_CMPK_LE_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00e>; +defm S_ADDK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00f>; +defm S_MULK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x010>; defm S_GETREG_B32 : SOPK_Real32_gfx6_gfx7_gfx10<0x012>; defm S_SETREG_B32 : SOPK_Real32_gfx6_gfx7_gfx10<0x013>; defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx6_gfx7_gfx10<0x015>; +//===----------------------------------------------------------------------===// +// SOPP - GFX11 +//===----------------------------------------------------------------------===// + +multiclass SOPP_Real_32_gfx11 op, string real_name = !cast(NAME).Mnemonic # " "> { + def _gfx11 : SOPP_Real_32(NAME), real_name>, + Select_gfx11(NAME).Mnemonic>, + SOPPRelaxTable<0, !cast(NAME).KeyName, "_gfx11">; +} + +multiclass SOPP_Real_64_gfx11 op, string real_name = !cast(NAME).Mnemonic # " "> { + def _gfx11 : SOPP_Real_64(NAME), real_name>, + Select_gfx11(NAME).Mnemonic>, + SOPPRelaxTable<1, !cast(NAME).KeyName, "_gfx11">; +} + +multiclass SOPP_Real_32_Renamed_gfx11 op, SOPP_Pseudo backing_pseudo, string real_name> { + def _gfx11 : SOPP_Real_32, + Select_gfx11, + MnemonicAlias, Requires<[isGFX11Plus]>; +} + +multiclass SOPP_Real_With_Relaxation_gfx11 op> { + defm "" : SOPP_Real_32_gfx11; + defm _pad_s_nop : SOPP_Real_64_gfx11; +} + +defm S_SETKILL : SOPP_Real_32_gfx11<0x001>; +defm S_SETHALT : SOPP_Real_32_gfx11<0x002>; +defm S_SLEEP : SOPP_Real_32_gfx11<0x003>; +defm S_SET_INST_PREFETCH_DISTANCE : SOPP_Real_32_Renamed_gfx11<0x004, S_INST_PREFETCH, "s_set_inst_prefetch_distance">; +defm S_CLAUSE : SOPP_Real_32_gfx11<0x005>; +defm S_DELAY_ALU : SOPP_Real_32_gfx11<0x007>; +defm S_WAITCNT_DEPCTR : SOPP_Real_32_gfx11<0x008>; +defm S_WAITCNT : SOPP_Real_32_gfx11<0x009>; +defm S_WAIT_IDLE : SOPP_Real_32_gfx11<0x00a>; +defm S_WAIT_EVENT : SOPP_Real_32_gfx11<0x00b>; +defm S_TRAP : SOPP_Real_32_gfx11<0x010>; +defm S_ROUND_MODE : SOPP_Real_32_gfx11<0x011>; +defm S_DENORM_MODE : SOPP_Real_32_gfx11<0x012>; +defm S_BRANCH : SOPP_Real_With_Relaxation_gfx11<0x020>; +defm S_CBRANCH_SCC0 : SOPP_Real_With_Relaxation_gfx11<0x021>; +defm S_CBRANCH_SCC1 : SOPP_Real_With_Relaxation_gfx11<0x022>; +defm S_CBRANCH_VCCZ : SOPP_Real_With_Relaxation_gfx11<0x023>; +defm S_CBRANCH_VCCNZ : SOPP_Real_With_Relaxation_gfx11<0x024>; +defm S_CBRANCH_EXECZ : SOPP_Real_With_Relaxation_gfx11<0x025>; +defm S_CBRANCH_EXECNZ : SOPP_Real_With_Relaxation_gfx11<0x026>; +defm S_CBRANCH_CDBGSYS : SOPP_Real_With_Relaxation_gfx11<0x027>; +defm S_CBRANCH_CDBGUSER : SOPP_Real_With_Relaxation_gfx11<0x028>; +defm S_CBRANCH_CDBGSYS_OR_USER : SOPP_Real_With_Relaxation_gfx11<0x029>; +defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_Real_With_Relaxation_gfx11<0x02a>; +defm S_ENDPGM : SOPP_Real_32_gfx11<0x030, "s_endpgm">; +defm S_ENDPGM_SAVED : SOPP_Real_32_gfx11<0x031>; +defm S_WAKEUP : SOPP_Real_32_gfx11<0x034>; +defm S_SETPRIO : SOPP_Real_32_gfx11<0x035>; +defm S_SENDMSG : SOPP_Real_32_gfx11<0x036>; +defm S_SENDMSGHALT : SOPP_Real_32_gfx11<0x037>; +defm S_INCPERFLEVEL : SOPP_Real_32_gfx11<0x038>; +defm S_DECPERFLEVEL : SOPP_Real_32_gfx11<0x039>; +defm S_TTRACEDATA : SOPP_Real_32_gfx11<0x03a>; +defm S_TTRACEDATA_IMM : SOPP_Real_32_gfx11<0x03b>; +defm S_ICACHE_INV : SOPP_Real_32_gfx11<0x03c>; +defm S_BARRIER : SOPP_Real_32_gfx11<0x03d>; + //===----------------------------------------------------------------------===// // SOPP - GFX6, GFX7, GFX8, GFX9, GFX10 //===----------------------------------------------------------------------===// @@ -1737,6 +2017,12 @@ multiclass SOPP_Real_32_gfx6_gfx7_gfx8_gfx9 op, string real_name = !cast multiclass SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10 op, string real_name = !cast(NAME).Mnemonic # " "> : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9, SOPP_Real_32_gfx10; +multiclass SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10_gfx11 op, string real_name = !cast(NAME).Mnemonic # " "> : + SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10, SOPP_Real_32_gfx11; + +multiclass SOPP_Real_32_gfx10_gfx11 op, string real_name = !cast(NAME).Mnemonic # " "> : + SOPP_Real_32_gfx10, SOPP_Real_32_gfx11; + //64 bit encodings, for Relaxation multiclass SOPP_Real_64_gfx6_gfx7 op, string real_name = !cast(NAME).Mnemonic # " "> { defvar ps = !cast(NAME); @@ -1768,13 +2054,16 @@ multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9 op, string real_name = !cast multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10 op, string real_name = !cast(NAME).Mnemonic # " "> : SOPP_Real_64_gfx6_gfx7_gfx8_gfx9, SOPP_Real_64_gfx10; +multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10_gfx11 op, string real_name = !cast(NAME).Mnemonic # " "> : + SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10, SOPP_Real_64_gfx11; + //relaxation for insts with no operands not implemented multiclass SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10 op> { defm "" : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10; defm _pad_s_nop : SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10; } -defm S_NOP : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x000>; +defm S_NOP : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10_gfx11<0x000>; defm S_ENDPGM : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x001, "s_endpgm">; defm S_WAKEUP : SOPP_Real_32_gfx8_gfx9_gfx10<0x003>; defm S_BARRIER : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00a>; @@ -1794,7 +2083,7 @@ defm S_ENDPGM_SAVED : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x01B>; defm S_SET_GPR_IDX_OFF : SOPP_Real_32_gfx8_gfx9<0x01c>; defm S_SET_GPR_IDX_MODE : SOPP_Real_32_gfx8_gfx9<0x01d>; defm S_ENDPGM_ORDERED_PS_DONE : SOPP_Real_32_gfx8_gfx9_gfx10<0x01e>; -defm S_CODE_END : SOPP_Real_32_gfx10<0x01f>; +defm S_CODE_END : SOPP_Real_32_gfx10_gfx11<0x01f>; defm S_INST_PREFETCH : SOPP_Real_32_gfx10<0x020>; defm S_CLAUSE : SOPP_Real_32_gfx10<0x021>; defm S_WAIT_IDLE : SOPP_Real_32_gfx10<0x022>; @@ -1817,6 +2106,34 @@ defm S_CBRANCH_CDBGSYS_OR_USER : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_ defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<0x01A>; } +//===----------------------------------------------------------------------===// +// SOPC - GFX11 +//===----------------------------------------------------------------------===// + +multiclass SOPC_Real_gfx11 op> { + def _gfx11 : SOPC_Real(NAME)>, + Select_gfx11(NAME).Mnemonic>; +} + +defm S_CMP_EQ_I32 : SOPC_Real_gfx11<0x00>; +defm S_CMP_LG_I32 : SOPC_Real_gfx11<0x01>; +defm S_CMP_GT_I32 : SOPC_Real_gfx11<0x02>; +defm S_CMP_GE_I32 : SOPC_Real_gfx11<0x03>; +defm S_CMP_LT_I32 : SOPC_Real_gfx11<0x04>; +defm S_CMP_LE_I32 : SOPC_Real_gfx11<0x05>; +defm S_CMP_EQ_U32 : SOPC_Real_gfx11<0x06>; +defm S_CMP_LG_U32 : SOPC_Real_gfx11<0x07>; +defm S_CMP_GT_U32 : SOPC_Real_gfx11<0x08>; +defm S_CMP_GE_U32 : SOPC_Real_gfx11<0x09>; +defm S_CMP_LT_U32 : SOPC_Real_gfx11<0x0a>; +defm S_CMP_LE_U32 : SOPC_Real_gfx11<0x0b>; +defm S_BITCMP0_B32 : SOPC_Real_gfx11<0x0c>; +defm S_BITCMP1_B32 : SOPC_Real_gfx11<0x0d>; +defm S_BITCMP0_B64 : SOPC_Real_gfx11<0x0e>; +defm S_BITCMP1_B64 : SOPC_Real_gfx11<0x0f>; +defm S_CMP_EQ_U64 : SOPC_Real_gfx11<0x10>; +defm S_CMP_LG_U64 : SOPC_Real_gfx11<0x11>; + //===----------------------------------------------------------------------===// // SOPC - GFX6, GFX7, GFX8, GFX9, GFX10 //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index 18c348d1cf89..c0fd5bc69325 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -6,33 +6,64 @@ // //===----------------------------------------------------------------------===// #include "AMDGPUAsmUtils.h" +#include "AMDGPUBaseInfo.h" #include "SIDefines.h" -#include "llvm/ADT/StringRef.h" - namespace llvm { namespace AMDGPU { + +namespace DepCtr { + +// NOLINTBEGIN +const CustomOperandVal DepCtrInfo[] = { + // Name max dflt offset width constraint + {{"depctr_hold_cnt"}, 1, 1, 7, 1, isGFX10_BEncoding}, + {{"depctr_sa_sdst"}, 1, 1, 0, 1}, + {{"depctr_va_vdst"}, 15, 15, 12, 4}, + {{"depctr_va_sdst"}, 7, 7, 9, 3}, + {{"depctr_va_ssrc"}, 1, 1, 8, 1}, + {{"depctr_va_vcc"}, 1, 1, 1, 1}, + {{"depctr_vm_vsrc"}, 7, 7, 2, 3}, +}; +// NOLINTEND + +const int DEP_CTR_SIZE = + static_cast(sizeof(DepCtrInfo) / sizeof(CustomOperandVal)); + +} // namespace DepCtr + namespace SendMsg { -// This must be in sync with llvm::AMDGPU::SendMsg::Id enum members, see SIDefines.h. -const char *const IdSymbolic[ID_GAPS_LAST_] = { - nullptr, - "MSG_INTERRUPT", - "MSG_GS", - "MSG_GS_DONE", - "MSG_SAVEWAVE", - "MSG_STALL_WAVE_GEN", - "MSG_HALT_WAVES", - "MSG_ORDERED_PS_DONE", - "MSG_EARLY_PRIM_DEALLOC", - "MSG_GS_ALLOC_REQ", - "MSG_GET_DOORBELL", - "MSG_GET_DDID", - nullptr, - nullptr, - nullptr, - "MSG_SYSMSG" +// Disable lint checking for this block since it makes the table unreadable. +// NOLINTBEGIN +const CustomOperand Msg[] = { + {{""}}, + {{"MSG_INTERRUPT"}, ID_INTERRUPT}, + {{"MSG_GS"}, ID_GS_PreGFX11, isNotGFX11Plus}, + {{"MSG_GS_DONE"}, ID_GS_DONE_PreGFX11, isNotGFX11Plus}, + {{"MSG_SAVEWAVE"}, ID_SAVEWAVE, isGFX8_GFX9_GFX10}, + {{"MSG_STALL_WAVE_GEN"}, ID_STALL_WAVE_GEN, isGFX9Plus}, + {{"MSG_HALT_WAVES"}, ID_HALT_WAVES, isGFX9Plus}, + {{"MSG_ORDERED_PS_DONE"}, ID_ORDERED_PS_DONE, isGFX9Plus}, + {{"MSG_EARLY_PRIM_DEALLOC"}, ID_EARLY_PRIM_DEALLOC, isGFX9_GFX10}, + {{"MSG_GS_ALLOC_REQ"}, ID_GS_ALLOC_REQ, isGFX9Plus}, + {{"MSG_GET_DOORBELL"}, ID_GET_DOORBELL, isGFX9_GFX10}, + {{"MSG_GET_DDID"}, ID_GET_DDID, isGFX10}, + {{"MSG_HS_TESSFACTOR"}, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus}, + {{"MSG_DEALLOC_VGPRS"}, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus}, + {{""}}, + {{"MSG_SYSMSG"}, ID_SYSMSG}, + {{"MSG_RTN_GET_DOORBELL"}, ID_RTN_GET_DOORBELL, isGFX11Plus}, + {{"MSG_RTN_GET_DDID"}, ID_RTN_GET_DDID, isGFX11Plus}, + {{"MSG_RTN_GET_TMA"}, ID_RTN_GET_TMA, isGFX11Plus}, + {{"MSG_RTN_GET_REALTIME"}, ID_RTN_GET_REALTIME, isGFX11Plus}, + {{"MSG_RTN_SAVE_WAVE"}, ID_RTN_SAVE_WAVE, isGFX11Plus}, + {{"MSG_RTN_GET_TBA"}, ID_RTN_GET_TBA, isGFX11Plus}, }; +// NOLINTEND + +const int MSG_SIZE = static_cast( + sizeof(Msg) / sizeof(CustomOperand)); // These two must be in sync with llvm::AMDGPU::SendMsg::Op enum members, see SIDefines.h. const char *const OpSysSymbolic[OP_SYS_LAST_] = { @@ -54,39 +85,54 @@ const char *const OpGsSymbolic[OP_GS_LAST_] = { namespace Hwreg { -// This must be in sync with llvm::AMDGPU::Hwreg::ID_SYMBOLIC_FIRST_/LAST_, see SIDefines.h. -const char* const IdSymbolic[] = { - nullptr, - "HW_REG_MODE", - "HW_REG_STATUS", - "HW_REG_TRAPSTS", - "HW_REG_HW_ID", - "HW_REG_GPR_ALLOC", - "HW_REG_LDS_ALLOC", - "HW_REG_IB_STS", - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - "HW_REG_SH_MEM_BASES", - "HW_REG_TBA_LO", - "HW_REG_TBA_HI", - "HW_REG_TMA_LO", - "HW_REG_TMA_HI", - "HW_REG_FLAT_SCR_LO", - "HW_REG_FLAT_SCR_HI", - "HW_REG_XNACK_MASK", - "HW_REG_HW_ID1", - "HW_REG_HW_ID2", - "HW_REG_POPS_PACKER", - nullptr, - nullptr, - nullptr, - "HW_REG_SHADER_CYCLES" +// Disable lint checking for this block since it makes the table unreadable. +// NOLINTBEGIN +const CustomOperand Opr[] = { + {{""}}, + {{"HW_REG_MODE"}, ID_MODE}, + {{"HW_REG_STATUS"}, ID_STATUS}, + {{"HW_REG_TRAPSTS"}, ID_TRAPSTS}, + {{"HW_REG_HW_ID"}, ID_HW_ID, isNotGFX10Plus}, + {{"HW_REG_GPR_ALLOC"}, ID_GPR_ALLOC}, + {{"HW_REG_LDS_ALLOC"}, ID_LDS_ALLOC}, + {{"HW_REG_IB_STS"}, ID_IB_STS}, + {{""}}, + {{""}}, + {{""}}, + {{""}}, + {{""}}, + {{""}}, + {{""}}, + {{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9Plus}, + {{"HW_REG_TBA_LO"}, ID_TBA_LO, isGFX9_GFX10}, + {{"HW_REG_TBA_HI"}, ID_TBA_HI, isGFX9_GFX10}, + {{"HW_REG_TMA_LO"}, ID_TMA_LO, isGFX9_GFX10}, + {{"HW_REG_TMA_HI"}, ID_TMA_HI, isGFX9_GFX10}, + {{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10Plus}, + {{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10Plus}, + {{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK, isGFX10Before1030}, + {{"HW_REG_HW_ID1"}, ID_HW_ID1, isGFX10Plus}, + {{"HW_REG_HW_ID2"}, ID_HW_ID2, isGFX10Plus}, + {{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10}, + {{""}}, + {{""}}, + {{""}}, + {{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_BEncoding}, + + // GFX940 specific registers + {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940}, + {{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940}, + {{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940}, + {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940}, + {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940}, + + // Aliases + {{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10}, }; +// NOLINTEND + +const int OPR_SIZE = static_cast( + sizeof(Opr) / sizeof(CustomOperand)); } // namespace Hwreg @@ -144,7 +190,7 @@ StringLiteral const NfmtSymbolicVI[] = { // VI and GFX9 "BUF_NUM_FORMAT_FLOAT" }; -StringLiteral const UfmtSymbolic[] = { +StringLiteral const UfmtSymbolicGFX10[] = { "BUF_FMT_INVALID", "BUF_FMT_8_UNORM", @@ -238,7 +284,7 @@ StringLiteral const UfmtSymbolic[] = { "BUF_FMT_32_32_32_32_FLOAT" }; -unsigned const DfmtNfmt2UFmt[] = { +unsigned const DfmtNfmt2UFmtGFX10[] = { DFMT_INVALID | (NFMT_UNORM << NFMT_SHIFT), DFMT_8 | (NFMT_UNORM << NFMT_SHIFT), @@ -332,6 +378,166 @@ unsigned const DfmtNfmt2UFmt[] = { DFMT_32_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT) }; +StringLiteral const UfmtSymbolicGFX11[] = { + "BUF_FMT_INVALID", + + "BUF_FMT_8_UNORM", + "BUF_FMT_8_SNORM", + "BUF_FMT_8_USCALED", + "BUF_FMT_8_SSCALED", + "BUF_FMT_8_UINT", + "BUF_FMT_8_SINT", + + "BUF_FMT_16_UNORM", + "BUF_FMT_16_SNORM", + "BUF_FMT_16_USCALED", + "BUF_FMT_16_SSCALED", + "BUF_FMT_16_UINT", + "BUF_FMT_16_SINT", + "BUF_FMT_16_FLOAT", + + "BUF_FMT_8_8_UNORM", + "BUF_FMT_8_8_SNORM", + "BUF_FMT_8_8_USCALED", + "BUF_FMT_8_8_SSCALED", + "BUF_FMT_8_8_UINT", + "BUF_FMT_8_8_SINT", + + "BUF_FMT_32_UINT", + "BUF_FMT_32_SINT", + "BUF_FMT_32_FLOAT", + + "BUF_FMT_16_16_UNORM", + "BUF_FMT_16_16_SNORM", + "BUF_FMT_16_16_USCALED", + "BUF_FMT_16_16_SSCALED", + "BUF_FMT_16_16_UINT", + "BUF_FMT_16_16_SINT", + "BUF_FMT_16_16_FLOAT", + + "BUF_FMT_10_11_11_FLOAT", + + "BUF_FMT_11_11_10_FLOAT", + + "BUF_FMT_10_10_10_2_UNORM", + "BUF_FMT_10_10_10_2_SNORM", + "BUF_FMT_10_10_10_2_UINT", + "BUF_FMT_10_10_10_2_SINT", + + "BUF_FMT_2_10_10_10_UNORM", + "BUF_FMT_2_10_10_10_SNORM", + "BUF_FMT_2_10_10_10_USCALED", + "BUF_FMT_2_10_10_10_SSCALED", + "BUF_FMT_2_10_10_10_UINT", + "BUF_FMT_2_10_10_10_SINT", + + "BUF_FMT_8_8_8_8_UNORM", + "BUF_FMT_8_8_8_8_SNORM", + "BUF_FMT_8_8_8_8_USCALED", + "BUF_FMT_8_8_8_8_SSCALED", + "BUF_FMT_8_8_8_8_UINT", + "BUF_FMT_8_8_8_8_SINT", + + "BUF_FMT_32_32_UINT", + "BUF_FMT_32_32_SINT", + "BUF_FMT_32_32_FLOAT", + + "BUF_FMT_16_16_16_16_UNORM", + "BUF_FMT_16_16_16_16_SNORM", + "BUF_FMT_16_16_16_16_USCALED", + "BUF_FMT_16_16_16_16_SSCALED", + "BUF_FMT_16_16_16_16_UINT", + "BUF_FMT_16_16_16_16_SINT", + "BUF_FMT_16_16_16_16_FLOAT", + + "BUF_FMT_32_32_32_UINT", + "BUF_FMT_32_32_32_SINT", + "BUF_FMT_32_32_32_FLOAT", + "BUF_FMT_32_32_32_32_UINT", + "BUF_FMT_32_32_32_32_SINT", + "BUF_FMT_32_32_32_32_FLOAT" +}; + +unsigned const DfmtNfmt2UFmtGFX11[] = { + DFMT_INVALID | (NFMT_UNORM << NFMT_SHIFT), + + DFMT_8 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_8 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_8 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_8 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_8 | (NFMT_UINT << NFMT_SHIFT), + DFMT_8 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_16 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_16 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_16 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_16 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_16 | (NFMT_UINT << NFMT_SHIFT), + DFMT_16 | (NFMT_SINT << NFMT_SHIFT), + DFMT_16 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_8_8 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_8_8 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_8_8 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_8_8 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_8_8 | (NFMT_UINT << NFMT_SHIFT), + DFMT_8_8 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_32 | (NFMT_UINT << NFMT_SHIFT), + DFMT_32 | (NFMT_SINT << NFMT_SHIFT), + DFMT_32 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_16_16 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_16_16 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_16_16 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_16_16 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_16_16 | (NFMT_UINT << NFMT_SHIFT), + DFMT_16_16 | (NFMT_SINT << NFMT_SHIFT), + DFMT_16_16 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_10_11_11 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_11_11_10 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_10_10_10_2 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_10_10_10_2 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_10_10_10_2 | (NFMT_UINT << NFMT_SHIFT), + DFMT_10_10_10_2 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_2_10_10_10 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_UINT << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_8_8_8_8 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_UINT << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_32_32 | (NFMT_UINT << NFMT_SHIFT), + DFMT_32_32 | (NFMT_SINT << NFMT_SHIFT), + DFMT_32_32 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_16_16_16_16 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_UINT << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_SINT << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_32_32_32 | (NFMT_UINT << NFMT_SHIFT), + DFMT_32_32_32 | (NFMT_SINT << NFMT_SHIFT), + DFMT_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT), + DFMT_32_32_32_32 | (NFMT_UINT << NFMT_SHIFT), + DFMT_32_32_32_32 | (NFMT_SINT << NFMT_SHIFT), + DFMT_32_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT) +}; + } // namespace MTBUFFormat namespace Swizzle { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h index d1deb570a938..054e35e90f2f 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h @@ -11,15 +11,60 @@ #include "SIDefines.h" +#include "llvm/ADT/StringRef.h" + namespace llvm { class StringLiteral; +class MCSubtargetInfo; namespace AMDGPU { +const int OPR_ID_UNKNOWN = -1; +const int OPR_ID_UNSUPPORTED = -2; +const int OPR_ID_DUPLICATE = -3; +const int OPR_VAL_INVALID = -4; + +template struct CustomOperand { + StringLiteral Name; + int Encoding = 0; + bool (*Cond)(T Context) = nullptr; +}; + +struct CustomOperandVal { + StringLiteral Name; + unsigned Max; + unsigned Default; + unsigned Shift; + unsigned Width; + bool (*Cond)(const MCSubtargetInfo &STI) = nullptr; + unsigned Mask = (1 << Width) - 1; + + unsigned decode(unsigned Code) const { return (Code >> Shift) & Mask; } + + unsigned encode(unsigned Val) const { return (Val & Mask) << Shift; } + + unsigned getMask() const { return Mask << Shift; } + + bool isValid(unsigned Val) const { return Val <= Max; } + + bool isSupported(const MCSubtargetInfo &STI) const { + return !Cond || Cond(STI); + } +}; + +namespace DepCtr { + +extern const CustomOperandVal DepCtrInfo[]; +extern const int DEP_CTR_SIZE; + +} // namespace DepCtr + namespace SendMsg { // Symbolic names for the sendmsg(...) syntax. -extern const char *const IdSymbolic[ID_GAPS_LAST_]; +extern const CustomOperand Msg[]; +extern const int MSG_SIZE; + extern const char *const OpSysSymbolic[OP_SYS_LAST_]; extern const char *const OpGsSymbolic[OP_GS_LAST_]; @@ -27,7 +72,8 @@ extern const char *const OpGsSymbolic[OP_GS_LAST_]; namespace Hwreg { // Symbolic names for the hwreg(...) syntax. -extern const char* const IdSymbolic[]; +extern const CustomOperand Opr[]; +extern const int OPR_SIZE; } // namespace Hwreg @@ -37,8 +83,10 @@ extern StringLiteral const DfmtSymbolic[]; extern StringLiteral const NfmtSymbolicGFX10[]; extern StringLiteral const NfmtSymbolicSICI[]; extern StringLiteral const NfmtSymbolicVI[]; -extern StringLiteral const UfmtSymbolic[]; -extern unsigned const DfmtNfmt2UFmt[]; +extern StringLiteral const UfmtSymbolicGFX10[]; +extern StringLiteral const UfmtSymbolicGFX11[]; +extern unsigned const DfmtNfmt2UFmtGFX10[]; +extern unsigned const DfmtNfmt2UFmtGFX11[]; } // namespace MTBUFFormat diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 683be871ff82..e4ab72f1095b 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -28,10 +28,15 @@ #define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" -static llvm::cl::opt AmdhsaCodeObjectVersion( - "amdhsa-code-object-version", llvm::cl::Hidden, - llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4), - llvm::cl::ZeroOrMore); +static llvm::cl::opt + AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, + llvm::cl::desc("AMDHSA Code Object Version"), + llvm::cl::init(4)); + +// TODO-GFX11: Remove this when full 16-bit codegen is implemented. +static llvm::cl::opt + LimitTo128VGPRs("amdgpu-limit-to-128-vgprs", llvm::cl::Hidden, + llvm::cl::desc("Never use more than 128 VGPRs")); namespace { @@ -44,9 +49,8 @@ unsigned getBitMask(unsigned Shift, unsigned Width) { /// /// \returns Packed \p Dst. unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { - Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); - Dst |= (Src << Shift) & getBitMask(Shift, Width); - return Dst; + unsigned Mask = getBitMask(Shift, Width); + return ((Src << Shift) & Mask) | (Dst & ~Mask); } /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. @@ -57,30 +61,40 @@ unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { } /// \returns Vmcnt bit shift (lower bits). -unsigned getVmcntBitShiftLo() { return 0; } +unsigned getVmcntBitShiftLo(unsigned VersionMajor) { + return VersionMajor >= 11 ? 10 : 0; +} /// \returns Vmcnt bit width (lower bits). -unsigned getVmcntBitWidthLo() { return 4; } +unsigned getVmcntBitWidthLo(unsigned VersionMajor) { + return VersionMajor >= 11 ? 6 : 4; +} /// \returns Expcnt bit shift. -unsigned getExpcntBitShift() { return 4; } +unsigned getExpcntBitShift(unsigned VersionMajor) { + return VersionMajor >= 11 ? 0 : 4; +} /// \returns Expcnt bit width. -unsigned getExpcntBitWidth() { return 3; } +unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; } /// \returns Lgkmcnt bit shift. -unsigned getLgkmcntBitShift() { return 8; } +unsigned getLgkmcntBitShift(unsigned VersionMajor) { + return VersionMajor >= 11 ? 4 : 8; +} /// \returns Lgkmcnt bit width. unsigned getLgkmcntBitWidth(unsigned VersionMajor) { - return (VersionMajor >= 10) ? 6 : 4; + return VersionMajor >= 10 ? 6 : 4; } /// \returns Vmcnt bit shift (higher bits). -unsigned getVmcntBitShiftHi() { return 14; } +unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; } /// \returns Vmcnt bit width (higher bits). -unsigned getVmcntBitWidthHi() { return 2; } +unsigned getVmcntBitWidthHi(unsigned VersionMajor) { + return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0; +} } // end namespace anonymous @@ -136,6 +150,41 @@ bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) { isHsaAbiVersion5(STI); } +unsigned getAmdhsaCodeObjectVersion() { + return AmdhsaCodeObjectVersion; +} + +unsigned getMultigridSyncArgImplicitArgPosition() { + switch (AmdhsaCodeObjectVersion) { + case 2: + case 3: + case 4: + return 48; + case 5: + return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; + default: + llvm_unreachable("Unexpected code object version"); + return 0; + } +} + + +// FIXME: All such magic numbers about the ABI should be in a +// central TD file. +unsigned getHostcallImplicitArgPosition() { + switch (AmdhsaCodeObjectVersion) { + case 2: + case 3: + case 4: + return 24; + case 5: + return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; + default: + llvm_unreachable("Unexpected code object version"); + return 0; + } +} + #define GET_MIMGBaseOpcodesTable_IMPL #define GET_MIMGDimInfoTable_IMPL #define GET_MIMGInfoTable_IMPL @@ -144,6 +193,7 @@ bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) { #define GET_MIMGBiasMappingTable_IMPL #define GET_MIMGOffsetMappingTable_IMPL #define GET_MIMGG16MappingTable_IMPL +#define GET_MAIInstInfoTable_IMPL #include "AMDGPUGenSearchableTables.inc" int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, @@ -223,6 +273,10 @@ struct VOPInfo { bool IsSingle; }; +struct VOPC64DPPInfo { + uint16_t Opcode; +}; + #define GET_MTBUFInfoTable_DECL #define GET_MTBUFInfoTable_IMPL #define GET_MUBUFInfoTable_DECL @@ -235,6 +289,14 @@ struct VOPInfo { #define GET_VOP2InfoTable_IMPL #define GET_VOP3InfoTable_DECL #define GET_VOP3InfoTable_IMPL +#define GET_VOPC64DPPTable_DECL +#define GET_VOPC64DPPTable_IMPL +#define GET_VOPC64DPP8Table_DECL +#define GET_VOPC64DPP8Table_IMPL +#define GET_WMMAOpcode2AddrMappingTable_DECL +#define GET_WMMAOpcode2AddrMappingTable_IMPL +#define GET_WMMAOpcode3AddrMappingTable_DECL +#define GET_WMMAOpcode3AddrMappingTable_IMPL #include "AMDGPUGenSearchableTables.inc" int getMTBUFBaseOpcode(unsigned Opc) { @@ -322,6 +384,30 @@ bool getVOP3IsSingle(unsigned Opc) { return Info ? Info->IsSingle : false; } +bool isVOPC64DPP(unsigned Opc) { + return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc); +} + +bool getMAIIsDGEMM(unsigned Opc) { + const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); + return Info ? Info->is_dgemm : false; +} + +bool getMAIIsGFX940XDL(unsigned Opc) { + const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); + return Info ? Info->is_gfx940_xdl : false; +} + +unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { + const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); + return Info ? Info->Opcode3Addr : ~0u; +} + +unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) { + const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc); + return Info ? Info->Opcode2Addr : ~0u; +} + // Wrapper for Tablegen'd function. enum Subtarget is not defined in any // header files, so we need to wrap it in a function that takes unsigned // instead. @@ -740,6 +826,15 @@ unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { } unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { + if (LimitTo128VGPRs.getNumOccurrences() ? LimitTo128VGPRs + : isGFX11Plus(*STI)) { + // GFX11 changes the encoding of 16-bit operands in VOP1/2/C instructions + // such that values 128..255 no longer mean v128..v255, they mean + // v0.hi..v127.hi instead. Until the compiler understands this, it is not + // safe to use v128..v255. + // TODO-GFX11: Remove this when full 16-bit codegen is implemented. + return 128; + } if (STI->getFeatureBits().test(FeatureGFX90AInsts)) return 512; return 256; @@ -904,16 +999,13 @@ std::pair getIntegerPairAttribute(const Function &F, } unsigned getVmcntBitMask(const IsaVersion &Version) { - unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; - if (Version.Major < 9) - return VmcntLo; - - unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); - return VmcntLo | VmcntHi; + return (1 << (getVmcntBitWidthLo(Version.Major) + + getVmcntBitWidthHi(Version.Major))) - + 1; } unsigned getExpcntBitMask(const IsaVersion &Version) { - return (1 << getExpcntBitWidth()) - 1; + return (1 << getExpcntBitWidth(Version.Major)) - 1; } unsigned getLgkmcntBitMask(const IsaVersion &Version) { @@ -921,36 +1013,32 @@ unsigned getLgkmcntBitMask(const IsaVersion &Version) { } unsigned getWaitcntBitMask(const IsaVersion &Version) { - unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); - unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); - unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), + unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major), + getVmcntBitWidthLo(Version.Major)); + unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major), + getExpcntBitWidth(Version.Major)); + unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major), getLgkmcntBitWidth(Version.Major)); - unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; - if (Version.Major < 9) - return Waitcnt; - - unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); - return Waitcnt | VmcntHi; + unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major), + getVmcntBitWidthHi(Version.Major)); + return VmcntLo | Expcnt | Lgkmcnt | VmcntHi; } unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { - unsigned VmcntLo = - unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); - if (Version.Major < 9) - return VmcntLo; - - unsigned VmcntHi = - unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); - VmcntHi <<= getVmcntBitWidthLo(); - return VmcntLo | VmcntHi; + unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major), + getVmcntBitWidthLo(Version.Major)); + unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major), + getVmcntBitWidthHi(Version.Major)); + return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major); } unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { - return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); + return unpackBits(Waitcnt, getExpcntBitShift(Version.Major), + getExpcntBitWidth(Version.Major)); } unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { - return unpackBits(Waitcnt, getLgkmcntBitShift(), + return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major), getLgkmcntBitWidth(Version.Major)); } @@ -971,24 +1059,23 @@ Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt) { - Waitcnt = - packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); - if (Version.Major < 9) - return Waitcnt; - - Vmcnt >>= getVmcntBitWidthLo(); - return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); + Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major), + getVmcntBitWidthLo(Version.Major)); + return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt, + getVmcntBitShiftHi(Version.Major), + getVmcntBitWidthHi(Version.Major)); } unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt) { - return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); + return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major), + getExpcntBitWidth(Version.Major)); } unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt) { - return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), - getLgkmcntBitWidth(Version.Major)); + return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major), + getLgkmcntBitWidth(Version.Major)); } unsigned encodeWaitcnt(const IsaVersion &Version, @@ -1005,43 +1092,184 @@ unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { } //===----------------------------------------------------------------------===// -// hwreg +// Custom Operands. +// +// A table of custom operands shall describe "primary" operand names +// first followed by aliases if any. It is not required but recommended +// to arrange operands so that operand encoding match operand position +// in the table. This will make disassembly a bit more efficient. +// Unused slots in the table shall have an empty name. +// //===----------------------------------------------------------------------===// -namespace Hwreg { - -int64_t getHwregId(const StringRef Name) { - for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) { - if (IdSymbolic[Id] && Name == IdSymbolic[Id]) - return Id; +template +static bool isValidOpr(int Idx, const CustomOperand OpInfo[], int OpInfoSize, + T Context) { + return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() && + (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)); +} + +template +static int getOprIdx(std::function &)> Test, + const CustomOperand OpInfo[], int OpInfoSize, + T Context) { + int InvalidIdx = OPR_ID_UNKNOWN; + for (int Idx = 0; Idx < OpInfoSize; ++Idx) { + if (Test(OpInfo[Idx])) { + if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)) + return Idx; + InvalidIdx = OPR_ID_UNSUPPORTED; + } } - return ID_UNKNOWN_; + return InvalidIdx; +} + +template +static int getOprIdx(const StringRef Name, const CustomOperand OpInfo[], + int OpInfoSize, T Context) { + auto Test = [=](const CustomOperand &Op) { return Op.Name == Name; }; + return getOprIdx(Test, OpInfo, OpInfoSize, Context); +} + +template +static int getOprIdx(int Id, const CustomOperand OpInfo[], int OpInfoSize, + T Context, bool QuickCheck = true) { + auto Test = [=](const CustomOperand &Op) { + return Op.Encoding == Id && !Op.Name.empty(); + }; + // This is an optimization that should work in most cases. + // As a side effect, it may cause selection of an alias + // instead of a primary operand name in case of sparse tables. + if (QuickCheck && isValidOpr(Id, OpInfo, OpInfoSize, Context) && + OpInfo[Id].Encoding == Id) { + return Id; + } + return getOprIdx(Test, OpInfo, OpInfoSize, Context); } -static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) { - if (isSI(STI) || isCI(STI) || isVI(STI)) - return ID_SYMBOLIC_FIRST_GFX9_; - else if (isGFX9(STI)) - return ID_SYMBOLIC_FIRST_GFX10_; - else if (isGFX10(STI) && !isGFX10_BEncoding(STI)) - return ID_SYMBOLIC_FIRST_GFX1030_; - else - return ID_SYMBOLIC_LAST_; +//===----------------------------------------------------------------------===// +// Custom Operand Values +//===----------------------------------------------------------------------===// + +static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, + int Size, + const MCSubtargetInfo &STI) { + unsigned Enc = 0; + for (int Idx = 0; Idx < Size; ++Idx) { + const auto &Op = Opr[Idx]; + if (Op.isSupported(STI)) + Enc |= Op.encode(Op.Default); + } + return Enc; +} + +static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, + int Size, unsigned Code, + bool &HasNonDefaultVal, + const MCSubtargetInfo &STI) { + unsigned UsedOprMask = 0; + HasNonDefaultVal = false; + for (int Idx = 0; Idx < Size; ++Idx) { + const auto &Op = Opr[Idx]; + if (!Op.isSupported(STI)) + continue; + UsedOprMask |= Op.getMask(); + unsigned Val = Op.decode(Code); + if (!Op.isValid(Val)) + return false; + HasNonDefaultVal |= (Val != Op.Default); + } + return (Code & ~UsedOprMask) == 0; +} + +static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, + unsigned Code, int &Idx, StringRef &Name, + unsigned &Val, bool &IsDefault, + const MCSubtargetInfo &STI) { + while (Idx < Size) { + const auto &Op = Opr[Idx++]; + if (Op.isSupported(STI)) { + Name = Op.Name; + Val = Op.decode(Code); + IsDefault = (Val == Op.Default); + return true; + } + } + + return false; } -bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) { - switch (Id) { - case ID_HW_ID: - return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); - case ID_HW_ID1: - case ID_HW_ID2: - return isGFX10Plus(STI); - case ID_XNACK_MASK: - return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); - default: - return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) && - IdSymbolic[Id]; +static int encodeCustomOperandVal(const CustomOperandVal &Op, + int64_t InputVal) { + if (InputVal < 0 || InputVal > Op.Max) + return OPR_VAL_INVALID; + return Op.encode(InputVal); +} + +static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, + const StringRef Name, int64_t InputVal, + unsigned &UsedOprMask, + const MCSubtargetInfo &STI) { + int InvalidId = OPR_ID_UNKNOWN; + for (int Idx = 0; Idx < Size; ++Idx) { + const auto &Op = Opr[Idx]; + if (Op.Name == Name) { + if (!Op.isSupported(STI)) { + InvalidId = OPR_ID_UNSUPPORTED; + continue; + } + auto OprMask = Op.getMask(); + if (OprMask & UsedOprMask) + return OPR_ID_DUPLICATE; + UsedOprMask |= OprMask; + return encodeCustomOperandVal(Op, InputVal); + } } + return InvalidId; +} + +//===----------------------------------------------------------------------===// +// DepCtr +//===----------------------------------------------------------------------===// + +namespace DepCtr { + +int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) { + static int Default = -1; + if (Default == -1) + Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI); + return Default; +} + +bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, + const MCSubtargetInfo &STI) { + return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code, + HasNonDefaultVal, STI); +} + +bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, + bool &IsDefault, const MCSubtargetInfo &STI) { + return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val, + IsDefault, STI); +} + +int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, + const MCSubtargetInfo &STI) { + return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask, + STI); +} + +} // namespace DepCtr + +//===----------------------------------------------------------------------===// +// hwreg +//===----------------------------------------------------------------------===// + +namespace Hwreg { + +int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) { + int Idx = getOprIdx(Name, Opr, OPR_SIZE, STI); + return (Idx < 0) ? Idx : Opr[Idx].Encoding; } bool isValidHwreg(int64_t Id) { @@ -1063,7 +1291,8 @@ uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) { } StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) { - return isValidHwreg(Id, STI) ? IdSymbolic[Id] : ""; + int Idx = getOprIdx(Id, Opr, OPR_SIZE, STI); + return (Idx < 0) ? "" : Opr[Idx].Name; } void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) { @@ -1087,12 +1316,13 @@ struct ExpTgt { }; static constexpr ExpTgt ExpTgtInfo[] = { - {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, - {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, - {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, - {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, - {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, - {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, + {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, + {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, + {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, + {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, + {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, + {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX}, + {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, }; bool getTgtName(unsigned Id, StringRef &Name, int &Index) { @@ -1130,7 +1360,20 @@ unsigned getTgtId(const StringRef Name) { } bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { - return (Id != ET_POS4 && Id != ET_PRIM) || isGFX10Plus(STI); + switch (Id) { + case ET_NULL: + return !isGFX11Plus(STI); + case ET_POS4: + case ET_PRIM: + return isGFX10Plus(STI); + case ET_DUAL_SRC_BLEND0: + case ET_DUAL_SRC_BLEND1: + return isGFX11Plus(STI); + default: + if (Id >= ET_PARAM0 && Id <= ET_PARAM31) + return !isGFX11Plus(STI); + return true; + } } } // namespace Exp @@ -1196,27 +1439,44 @@ void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; } -int64_t getUnifiedFormat(const StringRef Name) { - for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) { - if (Name == UfmtSymbolic[Id]) - return Id; +int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) { + if (isGFX11Plus(STI)) { + for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { + if (Name == UfmtSymbolicGFX11[Id]) + return Id; + } + } else { + for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { + if (Name == UfmtSymbolicGFX10[Id]) + return Id; + } } return UFMT_UNDEF; } -StringRef getUnifiedFormatName(unsigned Id) { - return isValidUnifiedFormat(Id) ? UfmtSymbolic[Id] : ""; +StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) { + if(isValidUnifiedFormat(Id, STI)) + return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id]; + return ""; } -bool isValidUnifiedFormat(unsigned Id) { - return Id <= UFMT_LAST; +bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) { + return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST; } -int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt) { +int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, + const MCSubtargetInfo &STI) { int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); - for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) { - if (Fmt == DfmtNfmt2UFmt[Id]) - return Id; + if (isGFX11Plus(STI)) { + for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { + if (Fmt == DfmtNfmt2UFmtGFX11[Id]) + return Id; + } + } else { + for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { + if (Fmt == DfmtNfmt2UFmtGFX10[Id]) + return Id; + } } return UFMT_UNDEF; } @@ -1239,40 +1499,22 @@ unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { namespace SendMsg { -int64_t getMsgId(const StringRef Name) { - for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { - if (IdSymbolic[i] && Name == IdSymbolic[i]) - return i; - } - return ID_UNKNOWN_; +static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) { + return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; } -bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) { - if (Strict) { - switch (MsgId) { - case ID_SAVEWAVE: - return isVI(STI) || isGFX9Plus(STI); - case ID_STALL_WAVE_GEN: - case ID_HALT_WAVES: - case ID_ORDERED_PS_DONE: - case ID_GS_ALLOC_REQ: - case ID_GET_DOORBELL: - return isGFX9Plus(STI); - case ID_EARLY_PRIM_DEALLOC: - return isGFX9(STI); - case ID_GET_DDID: - return isGFX10Plus(STI); - default: - return 0 <= MsgId && MsgId < ID_GAPS_LAST_ && IdSymbolic[MsgId]; - } - } else { - return 0 <= MsgId && isUInt(MsgId); - } +int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) { + int Idx = getOprIdx(Name, Msg, MSG_SIZE, STI); + return (Idx < 0) ? Idx : Msg[Idx].Encoding; } -StringRef getMsgName(int64_t MsgId) { - assert(0 <= MsgId && MsgId < ID_GAPS_LAST_); - return IdSymbolic[MsgId]; +bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) { + return (MsgId & ~(getMsgIdMask(STI))) == 0; +} + +StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) { + int Idx = getOprIdx(MsgId, Msg, MSG_SIZE, STI); + return (Idx < 0) ? "" : Msg[Idx].Name; } int64_t getMsgOpId(int64_t MsgId, const StringRef Name) { @@ -1289,26 +1531,27 @@ int64_t getMsgOpId(int64_t MsgId, const StringRef Name) { bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict) { - assert(isValidMsgId(MsgId, STI, Strict)); + assert(isValidMsgId(MsgId, STI)); if (!Strict) return 0 <= OpId && isUInt(OpId); - switch(MsgId) - { - case ID_GS: - return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; - case ID_GS_DONE: - return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_; - case ID_SYSMSG: + if (MsgId == ID_SYSMSG) return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_; - default: - return OpId == OP_NONE_; + if (!isGFX11Plus(STI)) { + switch (MsgId) { + case ID_GS_PreGFX11: + return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; + case ID_GS_DONE_PreGFX11: + return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_; + } } + return OpId == OP_NONE_; } -StringRef getMsgOpName(int64_t MsgId, int64_t OpId) { - assert(msgRequiresOp(MsgId)); +StringRef getMsgOpName(int64_t MsgId, int64_t OpId, + const MCSubtargetInfo &STI) { + assert(msgRequiresOp(MsgId, STI)); return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId]; } @@ -1319,42 +1562,48 @@ bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, if (!Strict) return 0 <= StreamId && isUInt(StreamId); - switch(MsgId) - { - case ID_GS: - return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; - case ID_GS_DONE: - return (OpId == OP_GS_NOP)? - (StreamId == STREAM_ID_NONE_) : - (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); - default: - return StreamId == STREAM_ID_NONE_; + if (!isGFX11Plus(STI)) { + switch (MsgId) { + case ID_GS_PreGFX11: + return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; + case ID_GS_DONE_PreGFX11: + return (OpId == OP_GS_NOP) ? + (StreamId == STREAM_ID_NONE_) : + (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); + } } + return StreamId == STREAM_ID_NONE_; } -bool msgRequiresOp(int64_t MsgId) { - return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG; +bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) { + return MsgId == ID_SYSMSG || + (!isGFX11Plus(STI) && + (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); } -bool msgSupportsStream(int64_t MsgId, int64_t OpId) { - return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP; +bool msgSupportsStream(int64_t MsgId, int64_t OpId, + const MCSubtargetInfo &STI) { + return !isGFX11Plus(STI) && + (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && + OpId != OP_GS_NOP; } -void decodeMsg(unsigned Val, - uint16_t &MsgId, - uint16_t &OpId, - uint16_t &StreamId) { - MsgId = Val & ID_MASK_; - OpId = (Val & OP_MASK_) >> OP_SHIFT_; - StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; +void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, + uint16_t &StreamId, const MCSubtargetInfo &STI) { + MsgId = Val & getMsgIdMask(STI); + if (isGFX11Plus(STI)) { + OpId = 0; + StreamId = 0; + } else { + OpId = (Val & OP_MASK_) >> OP_SHIFT_; + StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; + } } uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) { - return (MsgId << ID_SHIFT_) | - (OpId << OP_SHIFT_) | - (StreamId << STREAM_ID_SHIFT_); + return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_); } } // namespace SendMsg @@ -1427,6 +1676,10 @@ bool isModuleEntryFunctionCC(CallingConv::ID CC) { } } +bool isKernelCC(const Function *Func) { + return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); +} + bool hasXNACK(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; } @@ -1448,7 +1701,8 @@ bool hasG16(const MCSubtargetInfo &STI) { } bool hasPackedD16(const MCSubtargetInfo &STI) { - return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]; + return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem] && !isCI(STI) && + !isSI(STI); } bool isSI(const MCSubtargetInfo &STI) { @@ -1467,6 +1721,18 @@ bool isGFX9(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; } +bool isGFX9_GFX10(const MCSubtargetInfo &STI) { + return isGFX9(STI) || isGFX10(STI); +} + +bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) { + return isVI(STI) || isGFX9(STI) || isGFX10(STI); +} + +bool isGFX8Plus(const MCSubtargetInfo &STI) { + return isVI(STI) || isGFX9Plus(STI); +} + bool isGFX9Plus(const MCSubtargetInfo &STI) { return isGFX9(STI) || isGFX10Plus(STI); } @@ -1475,7 +1741,29 @@ bool isGFX10(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; } -bool isGFX10Plus(const MCSubtargetInfo &STI) { return isGFX10(STI); } +bool isGFX10Plus(const MCSubtargetInfo &STI) { + return isGFX10(STI) || isGFX11Plus(STI); +} + +bool isGFX11(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX11]; +} + +bool isGFX11Plus(const MCSubtargetInfo &STI) { + return isGFX11(STI); +} + +bool isNotGFX11Plus(const MCSubtargetInfo &STI) { + return !isGFX11Plus(STI); +} + +bool isNotGFX10Plus(const MCSubtargetInfo &STI) { + return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); +} + +bool isGFX10Before1030(const MCSubtargetInfo &STI) { + return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); +} bool isGCN3Encoding(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; @@ -1497,10 +1785,29 @@ bool isGFX90A(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]; } +bool isGFX940(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts]; +} + bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; } +bool hasMAIInsts(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureMAIInsts]; +} + +bool hasVOPD(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureVOPD]; +} + +int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, + int32_t ArgNumVGPR) { + if (has90AInsts && ArgNumAGPR) + return alignTo(ArgNumVGPR, 4) + ArgNumAGPR; + return std::max(ArgNumVGPR, ArgNumAGPR); +} + bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); @@ -1508,13 +1815,6 @@ bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { Reg == AMDGPU::SCC; } -bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { - for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { - if (*R == Reg1) return true; - } - return false; -} - #define MAP_REG2REG \ using namespace AMDGPU; \ switch(Reg) { \ @@ -1554,6 +1854,9 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ + CASE_GFXPRE11_GFX11PLUS(M0) \ + CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \ + CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \ } #define CASE_CI_VI(node) \ @@ -1563,6 +1866,12 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { #define CASE_VI_GFX9PLUS(node) \ case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; +#define CASE_GFXPRE11_GFX11PLUS(node) \ + case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11; + +#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \ + case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11; + unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { if (STI.getTargetTriple().getArch() == Triple::r600) return Reg; @@ -1571,9 +1880,13 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { #undef CASE_CI_VI #undef CASE_VI_GFX9PLUS +#undef CASE_GFXPRE11_GFX11PLUS +#undef CASE_GFXPRE11_GFX11PLUS_TO #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; +#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node; +#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) unsigned mc2PseudoReg(unsigned Reg) { MAP_REG2REG @@ -1581,6 +1894,8 @@ unsigned mc2PseudoReg(unsigned Reg) { #undef CASE_CI_VI #undef CASE_VI_GFX9PLUS +#undef CASE_GFXPRE11_GFX11PLUS +#undef CASE_GFXPRE11_GFX11PLUS_TO #undef MAP_REG2REG bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { @@ -1934,7 +2249,7 @@ Optional getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, } unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed) { - // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. + // Address offset is 12-bit signed for GFX10, 13-bit for GFX9 and GFX11+. if (AMDGPU::isGFX10(ST)) return Signed ? 12 : 11; @@ -2029,7 +2344,8 @@ const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); #define GET_SourcesOfDivergence_IMPL #define GET_Gfx9BufferFormat_IMPL -#define GET_Gfx10PlusBufferFormat_IMPL +#define GET_Gfx10BufferFormat_IMPL +#define GET_Gfx11PlusBufferFormat_IMPL #include "AMDGPUGenSearchableTables.inc" } // end anonymous namespace @@ -2042,16 +2358,20 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI) { - return isGFX10Plus(STI) - ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents, + return isGFX11Plus(STI) + ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents, NumFormat) - : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat); + : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp, + NumComponents, NumFormat) + : getGfx9BufferFormatInfo(BitsPerComp, + NumComponents, NumFormat); } const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, const MCSubtargetInfo &STI) { - return isGFX10Plus(STI) ? getGfx10PlusBufferFormatInfo(Format) - : getGfx9BufferFormatInfo(Format); + return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format) + : isGFX10(STI) ? getGfx10BufferFormatInfo(Format) + : getGfx9BufferFormatInfo(Format); } } // namespace AMDGPU diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 4516b511f3c8..dffeec10a14a 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -50,10 +50,19 @@ bool isHsaAbiVersion4(const MCSubtargetInfo *STI); /// \returns True if HSA OS ABI Version identification is 5, /// false otherwise. bool isHsaAbiVersion5(const MCSubtargetInfo *STI); -/// \returns True if HSA OS ABI Version identification is 3 or 4, +/// \returns True if HSA OS ABI Version identification is 3 and above, /// false otherwise. bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI); +/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr +unsigned getMultigridSyncArgImplicitArgPosition(); + +/// \returns The offset of the hostcall pointer argument from implicitarg_ptr +unsigned getHostcallImplicitArgPosition(); + +/// \returns Code object version. +unsigned getAmdhsaCodeObjectVersion(); + struct GcnBufferFormatInfo { unsigned Format; unsigned BitsPerComp; @@ -62,12 +71,19 @@ struct GcnBufferFormatInfo { unsigned DataFormat; }; +struct MAIInstInfo { + uint16_t Opcode; + bool is_dgemm; + bool is_gfx940_xdl; +}; + #define GET_MIMGBaseOpcode_DECL #define GET_MIMGDim_DECL #define GET_MIMGEncoding_DECL #define GET_MIMGLZMapping_DECL #define GET_MIMGMIPMapping_DECL #define GET_MIMGBiASMapping_DECL +#define GET_MAIInstInfoTable_DECL #include "AMDGPUGenSearchableTables.inc" namespace IsaInfo { @@ -352,6 +368,11 @@ struct MIMGG16MappingInfo { LLVM_READONLY const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); +struct WMMAOpcodeMappingInfo { + unsigned Opcode2Addr; + unsigned Opcode3Addr; +}; + LLVM_READONLY const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); @@ -382,6 +403,7 @@ struct MIMGInfo { uint8_t MIMGEncoding; uint8_t VDataDwords; uint8_t VAddrDwords; + uint8_t VAddrOperands; }; LLVM_READONLY @@ -438,6 +460,16 @@ bool getVOP2IsSingle(unsigned Opc); LLVM_READONLY bool getVOP3IsSingle(unsigned Opc); +LLVM_READONLY +bool isVOPC64DPP(unsigned Opc); + +/// Returns true if MAI operation is a double precision GEMM. +LLVM_READONLY +bool getMAIIsDGEMM(unsigned Opc); + +LLVM_READONLY +bool getMAIIsGFX940XDL(unsigned Opc); + LLVM_READONLY const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, @@ -450,6 +482,12 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, LLVM_READONLY int getMCOpcode(uint16_t Opcode, unsigned Gen); +LLVM_READONLY +unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); + +LLVM_READONLY +unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc); + void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI); @@ -496,7 +534,7 @@ struct Waitcnt { unsigned LgkmCnt = ~0u; unsigned VsCnt = ~0u; - Waitcnt() {} + Waitcnt() = default; Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} @@ -555,11 +593,14 @@ unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); /// \p Lgkmcnt respectively. /// /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: -/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) -/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) -/// \p Expcnt = \p Waitcnt[6:4] -/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) -/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) +/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9) +/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10) +/// \p Vmcnt = \p Waitcnt[15:10] (gfx11+) +/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11) +/// \p Expcnt = \p Waitcnt[2:0] (gfx11+) +/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10) +/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10) +/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11+) void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); @@ -581,12 +622,15 @@ unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, /// \p Version. /// /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: -/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) -/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) -/// Waitcnt[6:4] = \p Expcnt -/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) -/// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) -/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) +/// Waitcnt[2:0] = \p Expcnt (gfx11+) +/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9) +/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10) +/// Waitcnt[6:4] = \p Expcnt (pre-gfx11) +/// Waitcnt[9:4] = \p Lgkmcnt (gfx11+) +/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10) +/// Waitcnt[13:8] = \p Lgkmcnt (gfx10) +/// Waitcnt[15:10] = \p Vmcnt (gfx11+) +/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10) /// /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given /// isa \p Version. @@ -598,10 +642,7 @@ unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); namespace Hwreg { LLVM_READONLY -int64_t getHwregId(const StringRef Name); - -LLVM_READNONE -bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI); +int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI); LLVM_READNONE bool isValidHwreg(int64_t Id); @@ -622,6 +663,18 @@ void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); } // namespace Hwreg +namespace DepCtr { + +int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI); +int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, + const MCSubtargetInfo &STI); +bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, + const MCSubtargetInfo &STI); +bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, + bool &IsDefault, const MCSubtargetInfo &STI); + +} // namespace DepCtr + namespace Exp { bool getTgtName(unsigned Id, StringRef &Name, int &Index); @@ -653,13 +706,14 @@ bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); -int64_t getUnifiedFormat(const StringRef Name); +int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI); -StringRef getUnifiedFormatName(unsigned Id); +StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI); -bool isValidUnifiedFormat(unsigned Val); +bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI); -int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt); +int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, + const MCSubtargetInfo &STI); bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); @@ -670,19 +724,19 @@ unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); namespace SendMsg { LLVM_READONLY -int64_t getMsgId(const StringRef Name); +int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI); LLVM_READONLY int64_t getMsgOpId(int64_t MsgId, const StringRef Name); LLVM_READNONE -StringRef getMsgName(int64_t MsgId); +StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI); LLVM_READNONE -StringRef getMsgOpName(int64_t MsgId, int64_t OpId); +StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); LLVM_READNONE -bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true); +bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI); LLVM_READNONE bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, @@ -693,15 +747,13 @@ bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict = true); LLVM_READNONE -bool msgRequiresOp(int64_t MsgId); +bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI); LLVM_READNONE -bool msgSupportsStream(int64_t MsgId, int64_t OpId); +bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); -void decodeMsg(unsigned Val, - uint16_t &MsgId, - uint16_t &OpId, - uint16_t &StreamId); +void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, + uint16_t &StreamId, const MCSubtargetInfo &STI); LLVM_READNONE uint64_t encodeMsg(uint64_t MsgId, @@ -738,6 +790,8 @@ bool isEntryFunctionCC(CallingConv::ID CC); LLVM_READNONE bool isModuleEntryFunctionCC(CallingConv::ID CC); +bool isKernelCC(const Function *Func); + // FIXME: Remove this when calling conventions cleaned up LLVM_READNONE inline bool isKernel(CallingConv::ID CC) { @@ -761,22 +815,31 @@ bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); bool isVI(const MCSubtargetInfo &STI); bool isGFX9(const MCSubtargetInfo &STI); +bool isGFX9_GFX10(const MCSubtargetInfo &STI); +bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI); +bool isGFX8Plus(const MCSubtargetInfo &STI); bool isGFX9Plus(const MCSubtargetInfo &STI); bool isGFX10(const MCSubtargetInfo &STI); bool isGFX10Plus(const MCSubtargetInfo &STI); +bool isNotGFX10Plus(const MCSubtargetInfo &STI); +bool isGFX10Before1030(const MCSubtargetInfo &STI); +bool isGFX11(const MCSubtargetInfo &STI); +bool isGFX11Plus(const MCSubtargetInfo &STI); +bool isNotGFX11Plus(const MCSubtargetInfo &STI); bool isGCN3Encoding(const MCSubtargetInfo &STI); bool isGFX10_AEncoding(const MCSubtargetInfo &STI); bool isGFX10_BEncoding(const MCSubtargetInfo &STI); bool hasGFX10_3Insts(const MCSubtargetInfo &STI); bool isGFX90A(const MCSubtargetInfo &STI); +bool isGFX940(const MCSubtargetInfo &STI); bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); +bool hasMAIInsts(const MCSubtargetInfo &STI); +bool hasVOPD(const MCSubtargetInfo &STI); +int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); /// Is Reg - scalar register bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); -/// Is there any intersection between registers -bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); - /// If \p Reg is a pseudo reg, return the correct hardware register given /// \p STI otherwise return \p Reg. unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); @@ -931,7 +994,7 @@ inline bool isLegal64BitDPPControl(unsigned DC) { /// \returns true if the intrinsic is divergent bool isIntrinsicSourceOfDivergence(unsigned IntrID); -// Track defaults for fields in the MODE registser. +// Track defaults for fields in the MODE register. struct SIModeRegisterDefaults { /// Floating point opcodes that support exception flag gathering quiet and /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp deleted file mode 100644 index a83ff6667956..000000000000 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp +++ /dev/null @@ -1,144 +0,0 @@ -//===- AMDGPULDSUtils.cpp -------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// AMDGPU LDS related helper utility functions. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPULDSUtils.h" -#include "AMDGPU.h" -#include "Utils/AMDGPUBaseInfo.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/ReplaceConstant.h" - -using namespace llvm; - -namespace llvm { - -namespace AMDGPU { - -bool isKernelCC(const Function *Func) { - return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); -} - -Align getAlign(DataLayout const &DL, const GlobalVariable *GV) { - return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), - GV->getValueType()); -} - -static void collectFunctionUses(User *U, const Function *F, - SetVector &InstUsers) { - SmallVector Stack{U}; - - while (!Stack.empty()) { - U = Stack.pop_back_val(); - - if (auto *I = dyn_cast(U)) { - if (I->getFunction() == F) - InstUsers.insert(I); - continue; - } - - if (!isa(U)) - continue; - - append_range(Stack, U->users()); - } -} - -void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F) { - SetVector InstUsers; - - collectFunctionUses(C, F, InstUsers); - for (Instruction *I : InstUsers) { - convertConstantExprsToInstructions(I, C); - } -} - -static bool shouldLowerLDSToStruct(const GlobalVariable &GV, - const Function *F) { - // We are not interested in kernel LDS lowering for module LDS itself. - if (F && GV.getName() == "llvm.amdgcn.module.lds") - return false; - - bool Ret = false; - SmallPtrSet Visited; - SmallVector Stack(GV.users()); - - assert(!F || isKernelCC(F)); - - while (!Stack.empty()) { - const User *V = Stack.pop_back_val(); - Visited.insert(V); - - if (isa(V)) { - // This use of the LDS variable is the initializer of a global variable. - // This is ill formed. The address of an LDS variable is kernel dependent - // and unknown until runtime. It can't be written to a global variable. - continue; - } - - if (auto *I = dyn_cast(V)) { - const Function *UF = I->getFunction(); - if (UF == F) { - // Used from this kernel, we want to put it into the structure. - Ret = true; - } else if (!F) { - // For module LDS lowering, lowering is required if the user instruction - // is from non-kernel function. - Ret |= !isKernelCC(UF); - } - continue; - } - - // User V should be a constant, recursively visit users of V. - assert(isa(V) && "Expected a constant."); - append_range(Stack, V->users()); - } - - return Ret; -} - -std::vector findVariablesToLower(Module &M, - const Function *F) { - std::vector LocalVars; - for (auto &GV : M.globals()) { - if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { - continue; - } - if (!GV.hasInitializer()) { - // addrspace(3) without initializer implies cuda/hip extern __shared__ - // the semantics for such a variable appears to be that all extern - // __shared__ variables alias one another, in which case this transform - // is not required - continue; - } - if (!isa(GV.getInitializer())) { - // Initializers are unimplemented for LDS address space. - // Leave such variables in place for consistent error reporting. - continue; - } - if (GV.isConstant()) { - // A constant undef variable can't be written to, and any load is - // undef, so it should be eliminated by the optimizer. It could be - // dropped by the back end if not. This pass skips over it. - continue; - } - if (!shouldLowerLDSToStruct(GV, F)) { - continue; - } - LocalVars.push_back(&GV); - } - return LocalVars; -} - -} // end namespace AMDGPU - -} // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h deleted file mode 100644 index 83ef68cc3f60..000000000000 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h +++ /dev/null @@ -1,38 +0,0 @@ -//===- AMDGPULDSUtils.h - LDS related helper functions -*- C++ -*----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// AMDGPU LDS related helper utility functions. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H -#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/IR/Constants.h" - -namespace llvm { - -class ConstantExpr; - -namespace AMDGPU { - -bool isKernelCC(const Function *Func); - -Align getAlign(DataLayout const &DL, const GlobalVariable *GV); - -std::vector findVariablesToLower(Module &M, - const Function *F = nullptr); - -/// Replace all uses of constant \p C with instructions in \p F. -void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F); -} // end namespace AMDGPU - -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp new file mode 100644 index 000000000000..83d7cbdb183c --- /dev/null +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp @@ -0,0 +1,220 @@ +//===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUMemoryUtils.h" +#include "AMDGPU.h" +#include "AMDGPUBaseInfo.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/ReplaceConstant.h" + +#define DEBUG_TYPE "amdgpu-memory-utils" + +using namespace llvm; + +namespace llvm { + +namespace AMDGPU { + +Align getAlign(DataLayout const &DL, const GlobalVariable *GV) { + return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), + GV->getValueType()); +} + +static void collectFunctionUses(User *U, const Function *F, + SetVector &InstUsers) { + SmallVector Stack{U}; + + while (!Stack.empty()) { + U = Stack.pop_back_val(); + + if (auto *I = dyn_cast(U)) { + if (I->getFunction() == F) + InstUsers.insert(I); + continue; + } + + if (!isa(U)) + continue; + + append_range(Stack, U->users()); + } +} + +void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F) { + SetVector InstUsers; + + collectFunctionUses(C, F, InstUsers); + for (Instruction *I : InstUsers) { + convertConstantExprsToInstructions(I, C); + } +} + +static bool shouldLowerLDSToStruct(const GlobalVariable &GV, + const Function *F) { + // We are not interested in kernel LDS lowering for module LDS itself. + if (F && GV.getName() == "llvm.amdgcn.module.lds") + return false; + + bool Ret = false; + SmallPtrSet Visited; + SmallVector Stack(GV.users()); + + assert(!F || isKernelCC(F)); + + while (!Stack.empty()) { + const User *V = Stack.pop_back_val(); + Visited.insert(V); + + if (isa(V)) { + // This use of the LDS variable is the initializer of a global variable. + // This is ill formed. The address of an LDS variable is kernel dependent + // and unknown until runtime. It can't be written to a global variable. + continue; + } + + if (auto *I = dyn_cast(V)) { + const Function *UF = I->getFunction(); + if (UF == F) { + // Used from this kernel, we want to put it into the structure. + Ret = true; + } else if (!F) { + // For module LDS lowering, lowering is required if the user instruction + // is from non-kernel function. + Ret |= !isKernelCC(UF); + } + continue; + } + + // User V should be a constant, recursively visit users of V. + assert(isa(V) && "Expected a constant."); + append_range(Stack, V->users()); + } + + return Ret; +} + +std::vector findVariablesToLower(Module &M, + const Function *F) { + std::vector LocalVars; + for (auto &GV : M.globals()) { + if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { + continue; + } + if (!GV.hasInitializer()) { + // addrspace(3) without initializer implies cuda/hip extern __shared__ + // the semantics for such a variable appears to be that all extern + // __shared__ variables alias one another, in which case this transform + // is not required + continue; + } + if (!isa(GV.getInitializer())) { + // Initializers are unimplemented for LDS address space. + // Leave such variables in place for consistent error reporting. + continue; + } + if (GV.isConstant()) { + // A constant undef variable can't be written to, and any load is + // undef, so it should be eliminated by the optimizer. It could be + // dropped by the back end if not. This pass skips over it. + continue; + } + if (!shouldLowerLDSToStruct(GV, F)) { + continue; + } + LocalVars.push_back(&GV); + } + return LocalVars; +} + +bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) { + Instruction *DefInst = Def->getMemoryInst(); + + if (isa(DefInst)) + return false; + + if (const IntrinsicInst *II = dyn_cast(DefInst)) { + switch (II->getIntrinsicID()) { + case Intrinsic::amdgcn_s_barrier: + case Intrinsic::amdgcn_wave_barrier: + case Intrinsic::amdgcn_sched_barrier: + return false; + default: + break; + } + } + + // Ignore atomics not aliasing with the original load, any atomic is a + // universal MemoryDef from MSSA's point of view too, just like a fence. + const auto checkNoAlias = [AA, Ptr](auto I) -> bool { + return I && AA->isNoAlias(I->getPointerOperand(), Ptr); + }; + + if (checkNoAlias(dyn_cast(DefInst)) || + checkNoAlias(dyn_cast(DefInst))) + return false; + + return true; +} + +bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, + AAResults *AA) { + MemorySSAWalker *Walker = MSSA->getWalker(); + SmallVector WorkList{Walker->getClobberingMemoryAccess(Load)}; + SmallSet Visited; + MemoryLocation Loc(MemoryLocation::get(Load)); + + LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n'); + + // Start with a nearest dominating clobbering access, it will be either + // live on entry (nothing to do, load is not clobbered), MemoryDef, or + // MemoryPhi if several MemoryDefs can define this memory state. In that + // case add all Defs to WorkList and continue going up and checking all + // the definitions of this memory location until the root. When all the + // defs are exhausted and came to the entry state we have no clobber. + // Along the scan ignore barriers and fences which are considered clobbers + // by the MemorySSA, but not really writing anything into the memory. + while (!WorkList.empty()) { + MemoryAccess *MA = WorkList.pop_back_val(); + if (!Visited.insert(MA).second) + continue; + + if (MSSA->isLiveOnEntryDef(MA)) + continue; + + if (MemoryDef *Def = dyn_cast(MA)) { + LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n'); + + if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) { + LLVM_DEBUG(dbgs() << " -> load is clobbered\n"); + return true; + } + + WorkList.push_back( + Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc)); + continue; + } + + const MemoryPhi *Phi = cast(MA); + for (auto &Use : Phi->incoming_values()) + WorkList.push_back(cast(&Use)); + } + + LLVM_DEBUG(dbgs() << " -> no clobber\n"); + return false; +} + +} // end namespace AMDGPU + +} // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h new file mode 100644 index 000000000000..65ed02ca62de --- /dev/null +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h @@ -0,0 +1,51 @@ +//===- AMDGPUMemoryUtils.h - Memory related helper functions -*- C++ -*----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H +#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H + +#include + +namespace llvm { + +struct Align; +class AAResults; +class ConstantExpr; +class DataLayout; +class Function; +class GlobalVariable; +class LoadInst; +class MemoryDef; +class MemorySSA; +class Module; +class Value; + +namespace AMDGPU { + +Align getAlign(DataLayout const &DL, const GlobalVariable *GV); + +std::vector findVariablesToLower(Module &M, + const Function *F = nullptr); + +/// Replace all uses of constant \p C with instructions in \p F. +void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F); + +/// Given a \p Def clobbering a load from \p Ptr according to the MSSA check +/// if this is actually a memory update or an artificial clobber to facilitate +/// ordering constraints. +bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA); + +/// Check is a \p Load is clobbered in its function. +bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, + AAResults *AA); + +} // end namespace AMDGPU + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp index f6b5975f1934..4ad93f7b0b68 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -209,6 +209,11 @@ void AMDGPUPALMetadata::setNumUsedVgprs(CallingConv::ID CC, unsigned Val) { getHwStage(CC)[".vgpr_count"] = MsgPackDoc.getNode(Val); } +// Set the number of used agprs in the metadata. +void AMDGPUPALMetadata::setNumUsedAgprs(CallingConv::ID CC, unsigned Val) { + getHwStage(CC)[".agpr_count"] = Val; +} + // Set the number of used sgprs in the metadata. This is an optional advisory // record for logging etc; wave dispatch actually uses the rsrc1 register for // the shader stage to determine the number of sgprs to allocate. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h index 7fdd9a8429c1..a45a799e38a9 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h @@ -69,6 +69,10 @@ public: // the shader stage to determine the number of vgprs to allocate. void setNumUsedVgprs(unsigned CC, unsigned Val); + // Set the number of used agprs in the metadata. This is an optional advisory + // record for logging etc; + void setNumUsedAgprs(unsigned CC, unsigned Val); + // Set the number of used sgprs in the metadata. This is an optional advisory // record for logging etc; wave dispatch actually uses the rsrc1 register for // the shader stage to determine the number of sgprs to allocate. diff --git a/llvm/lib/Target/AMDGPU/VIInstrFormats.td b/llvm/lib/Target/AMDGPU/VIInstrFormats.td index bd65a495fa72..7393ef6c2a2d 100644 --- a/llvm/lib/Target/AMDGPU/VIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/VIInstrFormats.td @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -class EXPe_vi : EXPe { +class EXPe_vi : EXPe_ComprVM { let Inst{31-26} = 0x31; //encoding } diff --git a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td new file mode 100644 index 000000000000..c63fbbc241d9 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td @@ -0,0 +1,180 @@ +//===-- VINTERPInstructions.td - VINTERP Instruction Definitions ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// VINTERP encoding +//===----------------------------------------------------------------------===// + +class VINTERPe_gfx11 op, VOPProfile P> : Enc64 { + bits<8> vdst; + bits<4> src0_modifiers; + bits<9> src0; + bits<3> src1_modifiers; + bits<9> src1; + bits<3> src2_modifiers; + bits<9> src2; + bits<1> clamp; + bits<3> waitexp; + + let Inst{31-26} = 0x33; // VOP3P encoding + let Inst{25-24} = 0x1; // VINTERP sub-encoding + let Inst{23} = 0; // reserved + + let Inst{7-0} = vdst; + let Inst{10-8} = waitexp; + let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0) + let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1) + let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2) + let Inst{14} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel(3) + let Inst{15} = clamp; + let Inst{22-16} = op; + let Inst{40-32} = src0; + let Inst{49-41} = src1; + let Inst{58-50} = src2; + let Inst{61} = src0_modifiers{0}; // neg(0) + let Inst{62} = src1_modifiers{0}; // neg(1) + let Inst{63} = src2_modifiers{0}; // neg(2) +} + +//===----------------------------------------------------------------------===// +// VOP3 VINTERP +//===----------------------------------------------------------------------===// + +class VINTERP_Pseudo pattern = []> : + VOP3_Pseudo { + let AsmMatchConverter = "cvtVINTERP"; + let mayRaiseFPException = 0; + + let VOP3_OPSEL = 1; + let VINTERP = 1; +} + +class VINTERP_Real : + VOP3_Real { + let VINTERP = 1; +} + +def VOP3_VINTERP_F32 : VOPProfile<[f32, f32, f32, f32]> { + let HasOpSel = 0; + let HasModifiers = 1; + + let Outs64 = (outs VGPR_32:$vdst); + let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, + Src1Mod:$src1_modifiers, VRegSrc_32:$src1, + Src2Mod:$src2_modifiers, VRegSrc_32:$src2, + clampmod:$clamp, + wait_exp:$waitexp); + + let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$waitexp"; +} + +class VOP3_VINTERP_F16 ArgVT> : VOPProfile { + let HasOpSel = 1; + let HasModifiers = 1; + + let Outs64 = (outs VGPR_32:$vdst); + let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, + Src1Mod:$src1_modifiers, VRegSrc_32:$src1, + Src2Mod:$src2_modifiers, VRegSrc_32:$src2, + clampmod:$clamp, op_sel0:$op_sel, + wait_exp:$waitexp); + + let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp"; +} + +//===----------------------------------------------------------------------===// +// VINTERP Pseudo Instructions +//===----------------------------------------------------------------------===// + +let SubtargetPredicate = isGFX11Plus in { + +let Uses = [M0, EXEC, MODE] in { +def V_INTERP_P10_F32_inreg : VINTERP_Pseudo <"v_interp_p10_f32", VOP3_VINTERP_F32>; +def V_INTERP_P2_F32_inreg : VINTERP_Pseudo <"v_interp_p2_f32", VOP3_VINTERP_F32>; +def V_INTERP_P10_F16_F32_inreg : + VINTERP_Pseudo <"v_interp_p10_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>; +def V_INTERP_P2_F16_F32_inreg : + VINTERP_Pseudo <"v_interp_p2_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>; +} // Uses = [M0, EXEC, MODE] + +let Uses = [M0, EXEC] in { +def V_INTERP_P10_RTZ_F16_F32_inreg : + VINTERP_Pseudo <"v_interp_p10_rtz_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>; +def V_INTERP_P2_RTZ_F16_F32_inreg : + VINTERP_Pseudo <"v_interp_p2_rtz_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>; +} // Uses = [M0, EXEC] + +} // SubtargetPredicate = isGFX11Plus + +class VInterpF32Pat : GCNPat < + (f32 (op + (VINTERPMods f32:$src0, i32:$src0_modifiers), + (VINTERPMods f32:$src1, i32:$src1_modifiers), + (VINTERPMods f32:$src2, i32:$src2_modifiers))), + (inst $src0_modifiers, $src0, + $src1_modifiers, $src1, + $src2_modifiers, $src2, + 0, /* clamp */ + 7) /* wait_exp */ +>; + +def VINTERP_OPSEL { + int LOW = 0; + int HIGH = 0xa; +} + +class VInterpF16Pat pat> : GCNPat < + (dst_type (op + (pat[0] f32:$src0, i32:$src0_modifiers), + (pat[1] f32:$src1, i32:$src1_modifiers), + (pat[2] f32:$src2, i32:$src2_modifiers), + !if(high, (i1 -1), (i1 0)))), + (inst $src0_modifiers, $src0, + $src1_modifiers, $src1, + $src2_modifiers, $src2, + 0, /* clamp */ + /* op_sel = 0 */ + 7) /* wait_exp */ +>; + +multiclass VInterpF16Pat high_pat> { + def : VInterpF16Pat; + def : VInterpF16Pat; +} + +def : VInterpF32Pat; +def : VInterpF32Pat; +defm : VInterpF16Pat; +defm : VInterpF16Pat; + +//===----------------------------------------------------------------------===// +// VINTERP Real Instructions +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX11Plus, DecoderNamespace = "GFX11" in { + multiclass VINTERP_Real_gfx11 op> { + def _gfx11 : + VINTERP_Real(NAME), SIEncodingFamily.GFX11>, + VINTERPe_gfx11(NAME).Pfl>; + } +} + +defm V_INTERP_P10_F32_inreg : VINTERP_Real_gfx11<0x000>; +defm V_INTERP_P2_F32_inreg : VINTERP_Real_gfx11<0x001>; +defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_gfx11<0x002>; +defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_gfx11<0x003>; +defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_gfx11<0x004>; +defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_gfx11<0x005>; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 48548d8b6722..1d374a9f90ba 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -59,9 +59,9 @@ class VOP1_Pseudo pattern=[], bit VOP1On let AsmVariantName = AMDGPUAsmVariants.Default; } -class VOP1_Real : +class VOP1_Real : VOP_Real , - InstSI , + InstSI , SIMCInstr { let VALU = 1; @@ -110,13 +110,18 @@ class getVOP1Pat64 : LetDummies { } multiclass VOP1Inst { + SDPatternOperator node = null_frag, int VOPDOp = -1> { // We only want to set this on the basic, non-SDWA or DPP forms. - defvar should_mov_imm = !eq(opName, "v_mov_b32"); + defvar should_mov_imm = !or(!eq(opName, "v_mov_b32"), + !eq(opName, "v_mov_b64")); let isMoveImm = should_mov_imm in { - def _e32 : VOP1_Pseudo ; - def _e64 : VOP3_Pseudo .ret>; + if !eq(VOPDOp, -1) then + def _e32 : VOP1_Pseudo ; + else + // Only for V_MOV_B32 + def _e32 : VOP1_Pseudo , VOPD_Component; + def _e64 : VOP3InstBase ; } foreach _ = BoolToList.ret in @@ -125,6 +130,11 @@ multiclass VOP1Inst .ret in def _dpp : VOP1_DPP_Pseudo ; + let SubtargetPredicate = isGFX11Plus in { + foreach _ = BoolToList.ret in + def _e64_dpp : VOP3_DPP_Pseudo ; + } // End SubtargetPredicate = isGFX11Plus + def : MnemonicAlias, LetDummies; def : MnemonicAlias, LetDummies; @@ -141,7 +151,9 @@ class VOPProfileI2F : VOPProfile<[dstVt, srcVt, untyped, untyped]> { let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); + let InsVOP3Base = (ins Src0DPP:$src0, clampmod:$clamp, omod:$omod); let Asm64 = "$vdst, $src0$clamp$omod"; + let AsmVOP3DPPBase = Asm64; let HasModifiers = 0; let HasClamp = 1; @@ -151,6 +163,12 @@ def VOP1_F64_I32 : VOPProfileI2F ; def VOP1_F32_I32 : VOPProfileI2F ; def VOP1_F16_I16 : VOPProfileI2F ; +def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{ + let HasExtVOP3DPP = 0; +} + +// OMod clears exceptions when set. OMod was always an operand, but its +// now explicitly set. class VOP_SPECIAL_OMOD_PROF : VOPProfile<[dstVt, srcVt, untyped, untyped]> { @@ -165,11 +183,21 @@ def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF; //===----------------------------------------------------------------------===// let VOPAsmPrefer32Bit = 1 in { -defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>; +defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>; +} + +def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> { + let InsVOPDX = (ins Src0RC32:$src0X); + let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X); + let InsVOPDY = (ins Src0RC32:$src0Y); + let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y); } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { -defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>; +defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>; + +let SubtargetPredicate = isGFX940Plus in +defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>; } // End isMoveImm = 1 // FIXME: Specify SchedRW for READFIRSTLANE_B32 @@ -282,7 +310,7 @@ defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; } // End TRANS = 1, SchedRW = [WriteTrans32] defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; -defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>; +defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, DivergentUnaryFrag>; defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; @@ -472,7 +500,7 @@ let SubtargetPredicate = isGFX9Only in { } // End SubtargetPredicate = isGFX9Only let SubtargetPredicate = isGFX10Plus in { - defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NONE>; + defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT>; let Uses = [M0] in { defm V_MOVRELSD_2_B32 : @@ -498,6 +526,17 @@ def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1 let isAsCheapAsAMove = 1; } +let SubtargetPredicate = isGFX11Plus in { + // Restrict src0 to be VGPR + def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS, + getVOP1Pat64.ret, + /*VOP1Only=*/ 1>; + defm V_NOT_B16 : VOP1Inst<"v_not_b16", VOP_I16_I16>; + defm V_CVT_I32_I16 : VOP1Inst<"v_cvt_i32_i16", VOP_I32_I16>; + defm V_CVT_U32_U16 : VOP1Inst<"v_cvt_u32_u16", VOP_I16_I16>; +} // End SubtargetPredicate = isGFX11Plus + //===----------------------------------------------------------------------===// // Target-specific instruction encodings. //===----------------------------------------------------------------------===// @@ -517,9 +556,9 @@ class VOP1_DPP op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP1 let Inst{31-25} = 0x3f; } -class VOP1_DPP16 op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl> : +class VOP1_DPP16 op, VOP1_DPP_Pseudo ps, int subtarget, VOPProfile p = ps.Pfl> : VOP1_DPP, - SIMCInstr { + SIMCInstr { let AssemblerPredicate = HasDPP16; let SubtargetPredicate = HasDPP16; } @@ -538,11 +577,113 @@ class VOP1_DPP8 op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : let Inst{31-25} = 0x3f; } +//===----------------------------------------------------------------------===// +// GFX11. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { + multiclass VOP1Only_Real_gfx11 op> { + let IsSingle = 1 in + def _gfx11 : + VOP1_Real(NAME), SIEncodingFamily.GFX11>, + VOP1e(NAME).Pfl>; + } + multiclass VOP1_Real_e32_gfx11 op, string opName = NAME> { + defvar ps = !cast(opName#"_e32"); + def _e32_gfx11 : + VOP1_Real, + VOP1e; + } + multiclass VOP1_Real_e32_with_name_gfx11 op, string opName, + string asmName> { + defvar ps = !cast(opName#"_e32"); + let AsmString = asmName # ps.AsmOperands in { + defm NAME : VOP1_Real_e32_gfx11, + MnemonicAlias, Requires<[isGFX11Plus]>; + } + } + multiclass VOP1_Real_e64_gfx11 op> { + def _e64_gfx11 : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX11>, + VOP3e_gfx11<{0, 1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>; + } + multiclass VOP1_Real_dpp_gfx11 op, string opName = NAME> { + defvar ps = !cast(opName#"_e32"); + def _dpp_gfx11 : VOP1_DPP16(opName#"_dpp"), SIEncodingFamily.GFX11> { + let DecoderNamespace = "DPPGFX11"; + } + } + multiclass VOP1_Real_dpp_with_name_gfx11 op, string opName, + string asmName> { + defvar ps = !cast(opName#"_e32"); + let AsmString = asmName # ps.Pfl.AsmDPP16, DecoderNamespace = "DPPGFX11" in { + defm NAME : VOP1_Real_dpp_gfx11, + MnemonicAlias, Requires<[isGFX11Plus]>; + } + } + multiclass VOP1_Real_dpp8_gfx11 op, string opName = NAME> { + defvar ps = !cast(opName#"_e32"); + def _dpp8_gfx11 : VOP1_DPP8 { + let DecoderNamespace = "DPP8GFX11"; + } + } + multiclass VOP1_Real_dpp8_with_name_gfx11 op, string opName, + string asmName> { + defvar ps = !cast(opName#"_e32"); + let AsmString = asmName # ps.Pfl.AsmDPP8, DecoderNamespace = "DPP8GFX11" in { + defm NAME : VOP1_Real_dpp8_gfx11, + MnemonicAlias, Requires<[isGFX11Plus]>; + } + } +} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" + +multiclass VOP1_Realtriple_e64_gfx11 op> { + defm NAME : VOP3_Realtriple_gfx11<{0, 1, 1, op{6-0}}, /*isSingle=*/ 0, NAME>; +} +multiclass VOP1_Realtriple_e64_with_name_gfx11 op, string opName, + string asmName> { + defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 1, op{6-0}}, opName, + asmName>; +} + +multiclass VOP1_Real_FULL_gfx11 op> : + VOP1_Real_e32_gfx11, VOP1_Realtriple_e64_gfx11, + VOP1_Real_dpp_gfx11, VOP1_Real_dpp8_gfx11; + +multiclass VOP1_Real_NO_VOP3_with_name_gfx11 op, string opName, + string asmName> : + VOP1_Real_e32_with_name_gfx11, + VOP1_Real_dpp_with_name_gfx11, + VOP1_Real_dpp8_with_name_gfx11; + +multiclass VOP1_Real_FULL_with_name_gfx11 op, string opName, + string asmName> : + VOP1_Real_NO_VOP3_with_name_gfx11, + VOP1_Realtriple_e64_with_name_gfx11; + +multiclass VOP1_Real_NO_DPP_gfx11 op> : + VOP1_Real_e32_gfx11, VOP1_Real_e64_gfx11; + +defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00c, + "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">; +defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00d, + "V_CVT_FLR_I32_F32", "v_cvt_floor_i32_f32">; +defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11<0x039, + "V_FFBH_U32", "v_clz_i32_u32">; +defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11<0x03a, + "V_FFBL_B32", "v_ctz_i32_b32">; +defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11<0x03b, + "V_FFBH_I32", "v_cls_i32">; +defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11<0x067>; +defm V_NOT_B16 : VOP1_Real_FULL_gfx11<0x069>; +defm V_CVT_I32_I16 : VOP1_Real_FULL_gfx11<0x06a>; +defm V_CVT_U32_U16 : VOP1_Real_FULL_gfx11<0x06b>; + //===----------------------------------------------------------------------===// // GFX10. //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { +let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { multiclass VOP1Only_Real_gfx10 op> { def _gfx10 : VOP1_Real(NAME), SIEncodingFamily.GFX10>, @@ -567,50 +708,59 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } } multiclass VOP1_Real_dpp_gfx10 op> { - foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in - def _dpp_gfx10 : VOP1_DPP16(NAME#"_dpp")> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in + def _dpp_gfx10 : VOP1_DPP16(NAME#"_dpp"), SIEncodingFamily.GFX10> { let DecoderNamespace = "SDWA10"; } } multiclass VOP1_Real_dpp8_gfx10 op> { - foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in def _dpp8_gfx10 : VOP1_DPP8(NAME#"_e32")> { let DecoderNamespace = "DPP8"; } } -} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" +} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" multiclass VOP1_Real_gfx10 op> : VOP1_Real_e32_gfx10, VOP1_Real_e64_gfx10, VOP1_Real_sdwa_gfx10, VOP1_Real_dpp_gfx10, VOP1_Real_dpp8_gfx10; -defm V_PIPEFLUSH : VOP1_Real_gfx10<0x01b>; -defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10<0x048>; -defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; -defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; -defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; -defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; -defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; -defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; -defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; -defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; -defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; -defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; -defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; -defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; -defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; -defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; -defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; -defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; -defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; -defm V_COS_F16 : VOP1_Real_gfx10<0x061>; -defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>; -defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; -defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; - -defm V_SWAP_B32 : VOP1Only_Real_gfx10<0x065>; -defm V_SWAPREL_B32 : VOP1Only_Real_gfx10<0x068>; +multiclass VOP1_Real_gfx10_FULL_gfx11 op> : + VOP1_Real_gfx10, VOP1_Real_FULL_gfx11; + +multiclass VOP1_Real_gfx10_NO_DPP_gfx11 op> : + VOP1_Real_gfx10, VOP1_Real_NO_DPP_gfx11; + +multiclass VOP1Only_Real_gfx10_gfx11 op> : + VOP1Only_Real_gfx10, VOP1Only_Real_gfx11; + +defm V_PIPEFLUSH : VOP1_Real_gfx10_NO_DPP_gfx11<0x01b>; +defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10_FULL_gfx11<0x048>; +defm V_CVT_F16_U16 : VOP1_Real_gfx10_FULL_gfx11<0x050>; +defm V_CVT_F16_I16 : VOP1_Real_gfx10_FULL_gfx11<0x051>; +defm V_CVT_U16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x052>; +defm V_CVT_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x053>; +defm V_RCP_F16 : VOP1_Real_gfx10_FULL_gfx11<0x054>; +defm V_SQRT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x055>; +defm V_RSQ_F16 : VOP1_Real_gfx10_FULL_gfx11<0x056>; +defm V_LOG_F16 : VOP1_Real_gfx10_FULL_gfx11<0x057>; +defm V_EXP_F16 : VOP1_Real_gfx10_FULL_gfx11<0x058>; +defm V_FREXP_MANT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x059>; +defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05a>; +defm V_FLOOR_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05b>; +defm V_CEIL_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05c>; +defm V_TRUNC_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05d>; +defm V_RNDNE_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05e>; +defm V_FRACT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05f>; +defm V_SIN_F16 : VOP1_Real_gfx10_FULL_gfx11<0x060>; +defm V_COS_F16 : VOP1_Real_gfx10_FULL_gfx11<0x061>; +defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10_FULL_gfx11<0x062>; +defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x063>; +defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x064>; + +defm V_SWAP_B32 : VOP1Only_Real_gfx10_gfx11<0x065>; +defm V_SWAPREL_B32 : VOP1Only_Real_gfx10_gfx11<0x068>; //===----------------------------------------------------------------------===// // GFX7, GFX10. @@ -635,16 +785,19 @@ multiclass VOP1_Real_gfx7 op> : multiclass VOP1_Real_gfx7_gfx10 op> : VOP1_Real_gfx7, VOP1_Real_gfx10; +multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11 op> : + VOP1_Real_gfx7_gfx10, VOP1_Real_NO_DPP_gfx11; + defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; -defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10<0x017>; -defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10<0x018>; -defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10<0x019>; -defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10<0x01a>; +defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x017>; +defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x018>; +defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x019>; +defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x01a>; //===----------------------------------------------------------------------===// -// GFX6, GFX7, GFX10. +// GFX6, GFX7, GFX10, GFX11. //===----------------------------------------------------------------------===// let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { @@ -666,65 +819,71 @@ multiclass VOP1_Real_gfx6_gfx7 op> : multiclass VOP1_Real_gfx6_gfx7_gfx10 op> : VOP1_Real_gfx6_gfx7, VOP1_Real_gfx10; -defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; -defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; -defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; -defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; -defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; -defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; -defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; - -defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10<0x000>; -defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x001>; -defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x003>; -defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x004>; -defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x005>; -defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x006>; -defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x007>; -defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x008>; -defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; -defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; +multiclass VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11 op> : + VOP1_Real_gfx6_gfx7_gfx10, VOP1_Real_FULL_gfx11; + +multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11 op> : + VOP1_Real_gfx6_gfx7_gfx10, VOP1_Real_NO_DPP_gfx11; + +defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; +defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; +defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; +defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; +defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; +defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; +defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; + +defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x000>; +defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x001>; +defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x003>; +defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x004>; +defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x005>; +defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x006>; +defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x007>; +defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x008>; +defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00a>; +defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00b>; defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; -defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10<0x00e>; -defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x00f>; -defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x010>; -defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10<0x011>; -defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10<0x012>; -defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10<0x013>; -defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10<0x014>; -defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x015>; -defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x016>; -defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x020>; -defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x021>; -defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x022>; -defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x023>; -defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x024>; -defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x025>; -defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x027>; -defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02a>; -defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02b>; -defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02e>; -defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x02f>; -defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x031>; -defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x033>; -defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x034>; -defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x035>; -defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x036>; -defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x037>; -defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x038>; +defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00e>; +defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x00f>; +defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x010>; +defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x011>; +defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x012>; +defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x013>; +defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x014>; +defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x015>; +defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x016>; +defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x020>; +defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x021>; +defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x022>; +defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x023>; +defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x024>; +defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x025>; +defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x027>; +defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02a>; +defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02b>; +defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02e>; +defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x02f>; +defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x031>; +defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x033>; +defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x034>; +defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x035>; +defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x036>; +defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x037>; +defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x038>; defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; -defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03c>; -defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03d>; -defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03e>; -defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x03f>; -defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x040>; +defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03c>; +defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03d>; +defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03e>; +defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x03f>; +defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x040>; defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; -defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x042>; -defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x043>; -defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x044>; +defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x042>; +defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x043>; +defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x044>; //===----------------------------------------------------------------------===// // GFX8, GFX9 (VI). @@ -949,14 +1108,29 @@ multiclass VOP1_Real_gfx9 op> { defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; +let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in +defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; + //===----------------------------------------------------------------------===// // GFX10 //===----------------------------------------------------------------------===// -let OtherPredicates = [isGFX10Plus] in { +let OtherPredicates = [isGFX10Only] in { def : GCNPat < (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) >; -} // End OtherPredicates = [isGFX10Plus] +} // End OtherPredicates = [isGFX10Only] + +//===----------------------------------------------------------------------===// +// GFX11 +//===----------------------------------------------------------------------===// + +let OtherPredicates = [isGFX11Only] in { +def : GCNPat < + (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), + (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, VGPR_32:$src, + (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) +>; +} // End OtherPredicates = [isGFX11Only] diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index b9ff814a4dc5..1485a1e63129 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -80,9 +80,9 @@ class VOP2_Pseudo pattern=[], string suf let AsmVariantName = AMDGPUAsmVariants.Default; } -class VOP2_Real : +class VOP2_Real : VOP_Real , - InstSI , + InstSI , SIMCInstr { let VALU = 1; @@ -140,15 +140,26 @@ multiclass VOP2Inst_e32; } // End renamedInGFX9 = GFX9Renamed } - +multiclass + VOP2Inst_e32_VOPD VOPDOp, + string VOPDName, SDPatternOperator node = null_frag, + string revOp = opName, bit GFX9Renamed = 0> { + defm NAME : VOP2Inst_e32, + VOPD_Component; +} multiclass VOP2Inst_e64 { let renamedInGFX9 = GFX9Renamed in { - def _e64 : VOP3_Pseudo .ret>, + def _e64 : VOP3InstBase , Commutable_REV; + + let SubtargetPredicate = isGFX11Plus in { + foreach _ = BoolToList.ret in + def _e64_dpp : VOP3_DPP_Pseudo ; + } // End SubtargetPredicate = isGFX11Plus } // End renamedInGFX9 = GFX9Renamed } @@ -175,6 +186,22 @@ multiclass VOP2Inst VOPDOp, + string VOPDName, + SDPatternOperator node = null_frag, + string revOp = opName, + bit GFX9Renamed = 0> : + VOP2Inst_e32_VOPD, + VOP2Inst_e64, + VOP2Inst_sdwa { + let renamedInGFX9 = GFX9Renamed in { + foreach _ = BoolToList.ret in + def _dpp : VOP2_DPP_Pseudo ; + } +} + multiclass VOP2bInst .ret in def _dpp : VOP2_DPP_Pseudo ; - } + } // End Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] - def _e64 : VOP3_Pseudo .ret>, + def _e64 : VOP3InstBase , Commutable_REV; + + let SubtargetPredicate = isGFX11Plus in { + foreach _ = BoolToList.ret in + def _e64_dpp : VOP3_DPP_Pseudo ; + } // End SubtargetPredicate = isGFX11Plus } } } @@ -220,16 +252,19 @@ multiclass VOP2bInstAliases { } } -multiclass VOP2eInst { +multiclass + VOP2eInst_Base VOPDOp, string VOPDName, + SDPatternOperator node, string revOp, bit useSGPRInput> { let SchedRW = [Write32Bit] in { let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { - def _e32 : VOP2_Pseudo , - Commutable_REV; + if !eq(VOPDOp, -1) then + def _e32 : VOP2_Pseudo , + Commutable_REV; + else + def _e32 : VOP2_Pseudo , + Commutable_REV, + VOPD_Component; foreach _ = BoolToList.ret in def _sdwa : VOP2_SDWA_Pseudo { @@ -240,13 +275,29 @@ multiclass VOP2eInst ; } - def _e64 : VOP3_Pseudo .ret>, + def _e64 : VOP3InstBase , Commutable_REV { let isReMaterializable = 1; } + + let SubtargetPredicate = isGFX11Plus in { + foreach _ = BoolToList.ret in + def _e64_dpp : VOP3_DPP_Pseudo ; + } // End SubtargetPredicate = isGFX11Plus } } +multiclass + VOP2eInst + : VOP2eInst_Base; + +multiclass + VOP2eInst_VOPD VOPDOp, string VOPDName, + SDPatternOperator node = null_frag, string revOp = opName, + bit useSGPRInput = !eq(P.NumSrcArgs, 3)> + : VOP2eInst_Base; + class VOP2eInstAlias : InstAlias { } } -class VOP_MADAK : VOPProfile <[vt, vt, vt, vt]> { +class VOP_MADK_Base : VOPProfile <[vt, vt, vt, vt]> { + string AsmVOPDXDeferred = ?; +} + +class VOP_MADAK : VOP_MADK_Base { field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); field dag Ins32 = !if(!eq(vt.Size, 32), (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm), (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm)); + field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm); + // Note that both src0X and imm are deferred + let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred); + field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm); + field string Asm32 = "$vdst, $src0, $src1, $imm"; + field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm"; + let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred"; + field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm"; field bit HasExt = 0; let IsSingle = 1; } @@ -280,10 +343,17 @@ class VOP_MADAK : VOPProfile <[vt, vt, vt, vt]> { def VOP_MADAK_F16 : VOP_MADAK ; def VOP_MADAK_F32 : VOP_MADAK ; -class VOP_MADMK : VOPProfile <[vt, vt, vt, vt]> { +class VOP_MADMK : VOP_MADK_Base { field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1); + field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X); + let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X); + field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y); + field string Asm32 = "$vdst, $src0, $imm, $src1"; + field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X"; + let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X"; + field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y"; field bit HasExt = 0; let IsSingle = 1; } @@ -308,6 +378,10 @@ class VOP_MAC : VOPProfile <[vt0, vt1, vt1, v dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let InsDPP16 = !con(InsDPP, (ins FI:$fi)); + let InsVOP3Base = getIns64, 3, + 0, HasModifiers, HasModifiers, HasOMod, + Src0Mod, Src1Mod, Src2Mod>.ret; + let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1ModDPP:$src1_modifiers, Src1DPP:$src1, getVregSrcForVT.ret:$src2, // stub argument @@ -330,6 +404,7 @@ class VOP_MAC : VOPProfile <[vt0, vt1, vt1, v let HasExt = 1; let HasExtDPP = 1; + let HasExt32BitDPP = 1; let HasExtSDWA = 1; let HasExtSDWA9 = 0; let TieRegDPP = "$src2"; @@ -337,9 +412,9 @@ class VOP_MAC : VOPProfile <[vt0, vt1, vt1, v def VOP_MAC_F16 : VOP_MAC ; def VOP_MAC_F32 : VOP_MAC ; -let HasExtDPP = 0 in +let HasExtDPP = 0, HasExt32BitDPP = 0 in def VOP_MAC_LEGACY_F32 : VOP_MAC ; -let HasExtSDWA = 0, HasExt64BitDPP = 1 in +let HasExtSDWA = 0, HasExt32BitDPP = 0, HasExt64BitDPP = 1 in def VOP_MAC_F64 : VOP_MAC ; class VOP_DOT_ACC : VOP_MAC { @@ -355,6 +430,7 @@ def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC { } def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC { + let HasExtVOP3DPP = 0; let HasSrc0Mods = 1; let HasSrc1Mods = 1; } @@ -368,13 +444,27 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; let AsmDPP16 = AsmDPP#"$fi"; + let AsmVOP3DPPBase = Asm64; + let InsDPP = (ins DstRCDPP:$old, + Src0DPP:$src0, + Src1DPP:$src1, + dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsDPP16 = !con(InsDPP, (ins FI:$fi)); + let InsDPP8 = (ins DstRCDPP:$old, + Src0DPP:$src0, + Src1DPP:$src1, + dpp8:$dpp8, FI:$fi); let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); + let OutsVOP3DPP = Outs64; + let OutsVOP3DPP8 = Outs64; } // Write out to vcc or arbitrary SGPR and read in from vcc or // arbitrary SGPR. def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> { + let HasSrc2Mods = 0; let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; @@ -384,6 +474,9 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=* let AsmDPP16 = AsmDPP#"$fi"; let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); + let AsmVOP3DPPBase = Asm64; + let OutsVOP3DPP = Outs64; + let OutsVOP3DPP8 = Outs64; // Suppress src2 implied by type since the 32-bit encoding uses an // implicit VCC use. @@ -401,15 +494,20 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=* dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let InsDPP16 = !con(InsDPP, (ins FI:$fi)); + let InsDPP8 = (ins DstRCDPP:$old, + Src0DPP:$src0, + Src1DPP:$src1, + dpp8:$dpp8, FI:$fi); let HasExt = 1; let HasExtDPP = 1; + let HasExt32BitDPP = 1; let HasExtSDWA = 1; let HasExtSDWA9 = 1; } // Read in from vcc or arbitrary SGPR. -def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> { +class VOP2e_SGPR ArgVT> : VOPProfile { let Asm32 = "$vdst, $src0, $src1"; let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; @@ -417,6 +515,7 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/ let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi"; let AsmDPP16 = AsmDPP#"$fi"; + let AsmVOP3DPPBase = Asm64; let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst); @@ -437,14 +536,22 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let InsDPP16 = !con(InsDPP, (ins FI:$fi)); + let InsDPP8 = (ins DstRCDPP:$old, + Src0ModDPP:$src0_modifiers, Src0DPP:$src0, + Src1ModDPP:$src1_modifiers, Src1DPP:$src1, + dpp8:$dpp8, FI:$fi); let HasExt = 1; let HasExtDPP = 1; + let HasExt32BitDPP = 1; let HasExtSDWA = 1; let HasExtSDWA9 = 1; } -def VOP_READLANE : VOPProfile<[i32, i32, i32]> { +def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>; +def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>; + +def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> { let Outs32 = (outs SReg_32:$vdst); let Outs64 = Outs32; let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1); @@ -454,6 +561,7 @@ def VOP_READLANE : VOPProfile<[i32, i32, i32]> { let HasExt = 0; let HasExtDPP = 0; + let HasExt32BitDPP = 0; let HasExt64BitDPP = 0; let HasExtSDWA = 0; let HasExtSDWA9 = 0; @@ -471,6 +579,7 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { let HasExt = 0; let HasExtDPP = 0; + let HasExt32BitDPP = 0; let HasExt64BitDPP = 0; let HasExtSDWA = 0; let HasExtSDWA9 = 0; @@ -480,31 +589,33 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { // VOP2 Instructions //===----------------------------------------------------------------------===// -defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>; +let SubtargetPredicate = isGFX11Plus in +defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>; +defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">; let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; let isCommutable = 1 in { let isReMaterializable = 1 in { -defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, any_fadd>; -defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, any_fsub>; -defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">; -defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>; -defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>; +defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>; +defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>; +defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">; +defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>; +defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>; defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>; defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>; defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>; defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; -defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>; -defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>; +defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>; +defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>; defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN, smin>; defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN, smax>; defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN, umin>; defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN, umax>; defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">; defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">; -defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, clshl_rev_32, "v_lshl_b32">; -defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN, and>; +defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">; +defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN, 0x12, "v_and_b32", and>; defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN, or>; defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN, xor>; } // End isReMaterializable = 1 @@ -536,7 +647,7 @@ defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_f let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { -defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>; +defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>; defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; } @@ -555,20 +666,20 @@ def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, } // End isConvergent = 1 let isReMaterializable = 1 in { -defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT>; -defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT, add_ctpop>; -defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT, int_amdgcn_mbcnt_lo>; -defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT, int_amdgcn_mbcnt_hi>; -defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT, AMDGPUldexp>; +defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>; +defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32, add_ctpop>; +defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>; +defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>; +defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>; let ReadsModeReg = 0, mayRaiseFPException = 0 in { -defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT, AMDGPUpknorm_i16_f32>; -defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT, AMDGPUpknorm_u16_f32>; +defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>; +defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>; } -defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT, AMDGPUpkrtz_f16_f32>; -defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT, AMDGPUpk_u16_u32>; -defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT, AMDGPUpk_i16_i32>; +defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_V2F16_F32_F32, AMDGPUpkrtz_f16_f32>; +defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_V2I16_I32_I32, AMDGPUpk_u16_u32>; +defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_V2I16_I32_I32, AMDGPUpk_i16_i32>; let SubtargetPredicate = isGFX6GFX7 in { @@ -641,8 +752,9 @@ def : divergent_i64_BinOp ; def : divergent_i64_BinOp ; def : divergent_i64_BinOp ; -let SubtargetPredicate = Has16BitInsts in { +let SubtargetPredicate = Has16BitInsts in { +let isReMaterializable = 1 in { let FPDPRounding = 1 in { def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; @@ -664,9 +776,7 @@ def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; } } // End FPDPRounding = 1 -defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; -defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; -defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">; + defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; @@ -675,12 +785,19 @@ defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16, smax>; defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16, umin>; defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16, smin>; -let Constraints = "$vdst = $src2", DisableEncoding="$src2", - isConvertibleToThreeAddress = 1 in { -defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; +let SubtargetPredicate = isGFX8GFX9 in { + defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; + defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; + defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">; } } // End isCommutable = 1 +} // End isReMaterializable = 1 +// FIXME: Missing FPDPRounding +let Constraints = "$vdst = $src2", DisableEncoding="$src2", + isConvertibleToThreeAddress = 1, isCommutable = 1 in { +defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; +} } // End SubtargetPredicate = Has16BitInsts let SubtargetPredicate = HasDLInsts in { @@ -722,7 +839,7 @@ let Constraints = "$vdst = $src2", DisableEncoding = "$src2", isConvertibleToThreeAddress = 1, isCommutable = 1 in -defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>; +defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">; } // End SubtargetPredicate = HasDLInsts @@ -750,7 +867,7 @@ let Constraints = "$vdst = $src2", isCommutable = 1, IsDOT = 1 in { let SubtargetPredicate = HasDot5Insts in - defm V_DOT2C_F32_F16 : VOP2Inst<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>; + defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">; let SubtargetPredicate = HasDot6Insts in defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; @@ -788,20 +905,20 @@ let AddedComplexity = 30 in { } // End AddedComplexity = 30 let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in { -def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">; +def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">; let isCommutable = 1 in -def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">; +def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">; } let SubtargetPredicate = isGFX10Plus in { -let FPDPRounding = 1 in { +let FPDPRounding = 1, isReMaterializable = 1 in { def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; let isCommutable = 1 in def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; -} // End FPDPRounding = 1 +} // End FPDPRounding = 1, isReMaterializable = 1 let Constraints = "$vdst = $src2", DisableEncoding="$src2", @@ -857,7 +974,7 @@ def : GCNPat < >; } -let Predicates = [Has16BitInsts] in { +let Predicates = [Has16BitInsts, isGFX8GFX9] in { // Undo sub x, c -> add x, -c canonicalization since c is more likely // an inline immediate than -c. @@ -867,9 +984,6 @@ def : GCNPat< (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) >; - -let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { - def : GCNPat< (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))), (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) @@ -885,7 +999,10 @@ defm : Arithmetic_i16_0Hi_Pats; defm : Arithmetic_i16_0Hi_Pats; defm : Arithmetic_i16_0Hi_Pats; defm : Arithmetic_i16_0Hi_Pats; -} // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9] + +} // End Predicates = [Has16BitInsts, isGFX8GFX9] + +let Predicates = [Has16BitInsts] in { def : ZExt_i16_i1_Pat; def : ZExt_i16_i1_Pat; @@ -917,8 +1034,16 @@ def : VOPBinOpClampPat; def : VOPBinOpClampPat; } +let SubtargetPredicate = isGFX11Plus in { + let isCommutable = 1 in { + defm V_AND_B16 : VOP2Inst <"v_and_b16", VOP_I16_I16_I16, and>; + defm V_OR_B16 : VOP2Inst <"v_or_b16", VOP_I16_I16_I16, or>; + defm V_XOR_B16 : VOP2Inst <"v_xor_b16", VOP_I16_I16_I16, xor>; + } // End isCommutable = 1 +} // End SubtargetPredicate = isGFX11Plus + //===----------------------------------------------------------------------===// -// Target-specific instruction encodings. +// DPP Encodings //===----------------------------------------------------------------------===// class VOP2_DPP op, VOP2_DPP_Pseudo ps, @@ -947,10 +1072,10 @@ class Base_VOP2_DPP16 op, VOP2_DPP_Pseudo ps, let OtherPredicates = ps.OtherPredicates; } -class VOP2_DPP16 op, VOP2_DPP_Pseudo ps, +class VOP2_DPP16 op, VOP2_DPP_Pseudo ps, int subtarget, string opName = ps.OpName, VOPProfile p = ps.Pfl> : Base_VOP2_DPP16, - SIMCInstr ; + SIMCInstr ; class VOP2_DPP8 op, VOP2_Pseudo ps, VOPProfile p = ps.Pfl> : @@ -972,11 +1097,254 @@ class VOP2_DPP8 op, VOP2_Pseudo ps, let OtherPredicates = ps.OtherPredicates; } +//===----------------------------------------------------------------------===// +// GFX11. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { + //===------------------------------- VOP2 -------------------------------===// + multiclass VOP2Only_Real_MADK_gfx11 op> { + def _gfx11 : + VOP2_Real(NAME), SIEncodingFamily.GFX11>, + VOP2_MADKe(NAME).Pfl>; + } + multiclass VOP2_Real_e32_gfx11 op> { + def _e32_gfx11 : + VOP2_Real(NAME#"_e32"), SIEncodingFamily.GFX11>, + VOP2e(NAME#"_e32").Pfl>; + } + multiclass VOP2Only_Real_e32_gfx11 op> { + let IsSingle = 1 in + defm NAME: VOP2_Real_e32_gfx11; + } + multiclass VOP2_Real_e64_gfx11 op> { + def _e64_gfx11 : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX11>, + VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; + } + multiclass VOP2_Real_dpp_gfx11 op> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_gfx11 : VOP2_DPP16(NAME#"_dpp"), SIEncodingFamily.GFX11> { + let DecoderNamespace = "DPPGFX11"; + } + } + multiclass VOP2_Real_dpp8_gfx11 op> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in + def _dpp8_gfx11 : VOP2_DPP8(NAME#"_e32")> { + let DecoderNamespace = "DPP8GFX11"; + } + } + + //===------------------------- VOP2 (with name) -------------------------===// + multiclass VOP2_Real_e32_with_name_gfx11 op, string opName, + string asmName, bit single = 0> { + defvar ps = !cast(opName#"_e32"); + def _e32_gfx11 : + VOP2_Real, + VOP2e, + MnemonicAlias, Requires<[isGFX11Plus]> { + let AsmString = asmName # ps.AsmOperands; + let IsSingle = single; + } + } + multiclass VOP2_Real_e64_with_name_gfx11 op, string opName, + string asmName> { + defvar ps = !cast(opName#"_e64"); + def _e64_gfx11 : + VOP3_Real, + VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, ps.Pfl>, + MnemonicAlias, Requires<[isGFX11Plus]> { + let AsmString = asmName # ps.AsmOperands; + } + } + + multiclass VOP2_Real_dpp_with_name_gfx11 op, string opName, + string asmName> { + defvar ps = !cast(opName#"_e32"); + foreach _ = BoolToList.ret in + def _dpp_gfx11 : VOP2_DPP16(opName#"_dpp"), + SIEncodingFamily.GFX11> { + let AsmString = asmName # ps.Pfl.AsmDPP16; + let DecoderNamespace = "DPPGFX11"; + } + } + multiclass VOP2_Real_dpp8_with_name_gfx11 op, string opName, + string asmName> { + defvar ps = !cast(opName#"_e32"); + foreach _ = BoolToList.ret in + def _dpp8_gfx11 : VOP2_DPP8 { + let AsmString = asmName # ps.Pfl.AsmDPP8; + let DecoderNamespace = "DPP8GFX11"; + } + } + + //===------------------------------ VOP2be ------------------------------===// + multiclass VOP2be_Real_e32_gfx11 op, string opName, string asmName> { + defvar ps = !cast(opName#"_e32"); + def _e32_gfx11 : + VOP2_Real, + VOP2e { + let AsmString = asmName # !subst(", vcc", "", ps.AsmOperands); + } + } + multiclass VOP2be_Real_dpp_gfx11 op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_gfx11 : + VOP2_DPP16(opName#"_dpp"), SIEncodingFamily.GFX11, asmName> { + string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; + let AsmString = asmName # !subst(", vcc", "", AsmDPP); + let DecoderNamespace = "DPPGFX11"; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_w32_gfx11 : + Base_VOP2_DPP16(opName#"_dpp"), asmName> { + string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; + let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_w64_gfx11 : + Base_VOP2_DPP16(opName#"_dpp"), asmName> { + string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; + let AsmString = asmName # AsmDPP; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + multiclass VOP2be_Real_dpp8_gfx11 op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp8_gfx11 : + VOP2_DPP8(opName#"_e32")> { + string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + let AsmString = asmName # !subst(", vcc", "", AsmDPP8); + let DecoderNamespace = "DPP8GFX11"; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp8_w32_gfx11 : + VOP2_DPP8(opName#"_e32")> { + string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp8_w64_gfx11 : + VOP2_DPP8(opName#"_e32")> { + string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + let AsmString = asmName # AsmDPP8; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + +} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" + +// We don't want to override separate decoderNamespaces within these +multiclass VOP2_Realtriple_e64_gfx11 op> { + defm NAME : VOP3_Realtriple_gfx11<{0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, NAME> ; +} +multiclass VOP2_Realtriple_e64_with_name_gfx11 op, string opName, + string asmName> { + defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 0, 0, op{5-0}}, opName, asmName> ; +} + +multiclass VOP2be_Real_gfx11 op, string opName, string asmName> : + VOP2be_Real_e32_gfx11, + VOP3be_Realtriple_gfx11<{0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, opName, asmName>, + VOP2be_Real_dpp_gfx11, + VOP2be_Real_dpp8_gfx11; + +// Only for CNDMASK +multiclass VOP2e_Real_gfx11 op, string opName, string asmName> : + VOP2_Real_e32_gfx11, + VOP2_Realtriple_e64_gfx11, + VOP2be_Real_dpp_gfx11, + VOP2be_Real_dpp8_gfx11; + +multiclass VOP2Only_Real_gfx11 op> : + VOP2Only_Real_e32_gfx11, + VOP2_Real_dpp_gfx11, + VOP2_Real_dpp8_gfx11; + +multiclass VOP2_Real_NO_VOP3_gfx11 op> : + VOP2_Real_e32_gfx11, VOP2_Real_dpp_gfx11, VOP2_Real_dpp8_gfx11; + +multiclass VOP2_Real_FULL_gfx11 op> : + VOP2_Realtriple_e64_gfx11, VOP2_Real_NO_VOP3_gfx11; + +multiclass VOP2_Real_NO_VOP3_with_name_gfx11 op, string opName, + string asmName, bit isSingle = 0> : + VOP2_Real_e32_with_name_gfx11, + VOP2_Real_dpp_with_name_gfx11, + VOP2_Real_dpp8_with_name_gfx11; + +multiclass VOP2_Real_FULL_with_name_gfx11 op, string opName, + string asmName> : + VOP2_Realtriple_e64_with_name_gfx11, + VOP2_Real_NO_VOP3_with_name_gfx11; + +multiclass VOP2_Real_NO_DPP_gfx11 op> : + VOP2_Real_e32_gfx11, VOP2_Real_e64_gfx11; + +multiclass VOP2_Real_NO_DPP_with_name_gfx11 op, string opName, + string asmName> : + VOP2_Real_e32_with_name_gfx11, + VOP2_Real_e64_with_name_gfx11; + +defm V_CNDMASK_B32 : VOP2e_Real_gfx11<0x001, "V_CNDMASK_B32", + "v_cndmask_b32">; +defm V_DOT2ACC_F32_F16 : VOP2_Real_NO_VOP3_with_name_gfx11<0x002, + "V_DOT2C_F32_F16", "v_dot2acc_f32_f16", 1>; +defm V_FMAC_DX9_ZERO_F32 : VOP2_Real_NO_DPP_with_name_gfx11<0x006, + "V_FMAC_LEGACY_F32", "v_fmac_dx9_zero_f32">; +defm V_MUL_DX9_ZERO_F32 : VOP2_Real_FULL_with_name_gfx11<0x007, + "V_MUL_LEGACY_F32", "v_mul_dx9_zero_f32">; +defm V_LSHLREV_B32 : VOP2_Real_FULL_gfx11<0x018>; +defm V_LSHRREV_B32 : VOP2_Real_FULL_gfx11<0x019>; +defm V_ASHRREV_I32 : VOP2_Real_FULL_gfx11<0x01a>; +defm V_ADD_CO_CI_U32 : + VOP2be_Real_gfx11<0x020, "V_ADDC_U32", "v_add_co_ci_u32">; +defm V_SUB_CO_CI_U32 : + VOP2be_Real_gfx11<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">; +defm V_SUBREV_CO_CI_U32 : + VOP2be_Real_gfx11<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; + +defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11<0x02f, + "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">; +defm V_PK_FMAC_F16 : VOP2Only_Real_gfx11<0x03c>; + +// VOP3 only. +defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11<0x25d>; +defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11<0x31c>; +defm V_BFM_B32 : VOP3Only_Realtriple_gfx11<0x31d>; +defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11<0x31e>; +defm V_MBCNT_LO_U32_B32 : VOP3Only_Realtriple_gfx11<0x31f>; +defm V_MBCNT_HI_U32_B32 : VOP3Only_Realtriple_gfx11<0x320>; +defm V_CVT_PKNORM_I16_F32 : VOP3Only_Realtriple_gfx11<0x321>; +defm V_CVT_PKNORM_U16_F32 : VOP3Only_Realtriple_gfx11<0x322>; +defm V_CVT_PK_U16_U32 : VOP3Only_Realtriple_gfx11<0x323>; +defm V_CVT_PK_I16_I32 : VOP3Only_Realtriple_gfx11<0x324>; +defm V_ADD_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x300>; +defm V_SUB_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x301>; +defm V_SUBREV_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x302>; + +let SubtargetPredicate = isGFX11Plus in { + defm : VOP2eInstAliases; + + defm : VOP2bInstAliases< + V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx11, "v_add_co_ci_u32">; + defm : VOP2bInstAliases< + V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx11, "v_sub_co_ci_u32">; + defm : VOP2bInstAliases< + V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx11, "v_subrev_co_ci_u32">; +} // End SubtargetPredicate = isGFX11Plus + //===----------------------------------------------------------------------===// // GFX10. //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { +let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { //===------------------------------- VOP2 -------------------------------===// multiclass VOP2Only_Real_MADK_gfx10 op> { def _gfx10 : @@ -1011,13 +1379,13 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } } multiclass VOP2_Real_dpp_gfx10 op> { - foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in - def _dpp_gfx10 : VOP2_DPP16(NAME#"_dpp")> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in + def _dpp_gfx10 : VOP2_DPP16(NAME#"_dpp"), SIEncodingFamily.GFX10> { let DecoderNamespace = "SDWA10"; } } multiclass VOP2_Real_dpp8_gfx10 op> { - foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in def _dpp8_gfx10 : VOP2_DPP8(NAME#"_e32")> { let DecoderNamespace = "DPP8"; } @@ -1056,15 +1424,15 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } multiclass VOP2_Real_dpp_gfx10_with_name op, string opName, string asmName> { - foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in - def _dpp_gfx10 : VOP2_DPP16(opName#"_dpp")> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExt32BitDPP>.ret in + def _dpp_gfx10 : VOP2_DPP16(opName#"_dpp"), SIEncodingFamily.GFX10> { VOP2_Pseudo ps = !cast(opName#"_e32"); let AsmString = asmName # ps.Pfl.AsmDPP16; } } multiclass VOP2_Real_dpp8_gfx10_with_name op, string opName, string asmName> { - foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + foreach _ = BoolToList(opName#"_e32").Pfl.HasExt32BitDPP>.ret in def _dpp8_gfx10 : VOP2_DPP8(opName#"_e32")> { VOP2_Pseudo ps = !cast(opName#"_e32"); let AsmString = asmName # ps.Pfl.AsmDPP8; @@ -1122,14 +1490,14 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } } multiclass VOP2be_Real_dpp_gfx10 op, string opName, string asmName> { - foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + foreach _ = BoolToList(opName#"_e32").Pfl.HasExt32BitDPP>.ret in def _dpp_gfx10 : - VOP2_DPP16(opName#"_dpp"), asmName> { + VOP2_DPP16(opName#"_dpp"), SIEncodingFamily.GFX10, asmName> { string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; let AsmString = asmName # !subst(", vcc", "", AsmDPP); let DecoderNamespace = "SDWA10"; } - foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + foreach _ = BoolToList(opName#"_e32").Pfl.HasExt32BitDPP>.ret in def _dpp_w32_gfx10 : Base_VOP2_DPP16(opName#"_dpp"), asmName> { string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; @@ -1137,7 +1505,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { let isAsmParserOnly = 1; let WaveSizePredicate = isWave32; } - foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + foreach _ = BoolToList(opName#"_e32").Pfl.HasExt32BitDPP>.ret in def _dpp_w64_gfx10 : Base_VOP2_DPP16(opName#"_dpp"), asmName> { string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; @@ -1147,14 +1515,14 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } } multiclass VOP2be_Real_dpp8_gfx10 op, string opName, string asmName> { - foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + foreach _ = BoolToList(opName#"_e32").Pfl.HasExt32BitDPP>.ret in def _dpp8_gfx10 : VOP2_DPP8(opName#"_e32")> { string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; let AsmString = asmName # !subst(", vcc", "", AsmDPP8); let DecoderNamespace = "DPP8"; } - foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + foreach _ = BoolToList(opName#"_e32").Pfl.HasExt32BitDPP>.ret in def _dpp8_w32_gfx10 : VOP2_DPP8(opName#"_e32")> { string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; @@ -1162,7 +1530,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { let isAsmParserOnly = 1; let WaveSizePredicate = isWave32; } - foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + foreach _ = BoolToList(opName#"_e32").Pfl.HasExt32BitDPP>.ret in def _dpp8_w64_gfx10 : VOP2_DPP8(opName#"_e32")> { string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; @@ -1189,7 +1557,10 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { let IsSingle = 1; } } -} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" +} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" + +multiclass VOP2Only_Real_MADK_gfx10_gfx11 op> : + VOP2Only_Real_MADK_gfx10, VOP2Only_Real_MADK_gfx11; multiclass VOP2be_Real_gfx10 op, string opName, string asmName> : VOP2be_Real_e32_gfx10, @@ -1209,7 +1580,10 @@ multiclass VOP2_Real_gfx10 op> : VOP2_Real_e32_gfx10, VOP2_Real_e64_gfx10, VOP2_Real_sdwa_gfx10, VOP2_Real_dpp_gfx10, VOP2_Real_dpp8_gfx10; -multiclass VOP2_Real_gfx10_with_name op, string opName, +multiclass VOP2_Real_gfx10_gfx11 op> : + VOP2_Real_gfx10, VOP2_Real_FULL_gfx11; + +multiclass VOP2_Real_with_name_gfx10 op, string opName, string asmName> : VOP2_Real_e32_gfx10_with_name, VOP2_Real_e64_gfx10_with_name, @@ -1217,36 +1591,41 @@ multiclass VOP2_Real_gfx10_with_name op, string opName, VOP2_Real_dpp_gfx10_with_name, VOP2_Real_dpp8_gfx10_with_name; +multiclass VOP2_Real_with_name_gfx10_gfx11 op, string opName, + string asmName> : + VOP2_Real_with_name_gfx10, + VOP2_Real_FULL_with_name_gfx11; + // NB: Same opcode as v_mac_legacy_f32 let DecoderNamespace = "GFX10_B" in defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>; -defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>; -defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>; -defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>; -defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>; -defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; -defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; -defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; -defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; -defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; -defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; -defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; -defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; -defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; -defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; +defm V_XNOR_B32 : VOP2_Real_gfx10_gfx11<0x01e>; +defm V_FMAC_F32 : VOP2_Real_gfx10_gfx11<0x02b>; +defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02c>; +defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02d>; +defm V_ADD_F16 : VOP2_Real_gfx10_gfx11<0x032>; +defm V_SUB_F16 : VOP2_Real_gfx10_gfx11<0x033>; +defm V_SUBREV_F16 : VOP2_Real_gfx10_gfx11<0x034>; +defm V_MUL_F16 : VOP2_Real_gfx10_gfx11<0x035>; +defm V_FMAC_F16 : VOP2_Real_gfx10_gfx11<0x036>; +defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10_gfx11<0x037>; +defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10_gfx11<0x038>; +defm V_MAX_F16 : VOP2_Real_gfx10_gfx11<0x039>; +defm V_MIN_F16 : VOP2_Real_gfx10_gfx11<0x03a>; +defm V_LDEXP_F16 : VOP2_Real_gfx10_gfx11<0x03b>; let IsSingle = 1 in { -defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; + defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; } // VOP2 no carry-in, carry-out. defm V_ADD_NC_U32 : - VOP2_Real_gfx10_with_name<0x025, "V_ADD_U32", "v_add_nc_u32">; + VOP2_Real_with_name_gfx10_gfx11<0x025, "V_ADD_U32", "v_add_nc_u32">; defm V_SUB_NC_U32 : - VOP2_Real_gfx10_with_name<0x026, "V_SUB_U32", "v_sub_nc_u32">; + VOP2_Real_with_name_gfx10_gfx11<0x026, "V_SUB_U32", "v_sub_nc_u32">; defm V_SUBREV_NC_U32 : - VOP2_Real_gfx10_with_name<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">; + VOP2_Real_with_name_gfx10_gfx11<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">; // VOP2 carry-in, carry-out. defm V_ADD_CO_CI_U32 : @@ -1275,7 +1654,7 @@ defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>; defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>; defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>; -let SubtargetPredicate = isGFX10Plus in { +let SubtargetPredicate = isGFX10Only in { defm : VOP2eInstAliases; defm : VOP2bInstAliases< @@ -1284,10 +1663,10 @@ let SubtargetPredicate = isGFX10Plus in { V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">; defm : VOP2bInstAliases< V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">; -} // End SubtargetPredicate = isGFX10Plus +} // End SubtargetPredicate = isGFX10Only //===----------------------------------------------------------------------===// -// GFX6, GFX7, GFX10. +// GFX6, GFX7, GFX10, GFX11 //===----------------------------------------------------------------------===// class VOP2_DPPe op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : @@ -1338,6 +1717,9 @@ multiclass VOP2_Real_gfx6_gfx7 op> : multiclass VOP2_Real_gfx6_gfx7_gfx10 op> : VOP2_Real_gfx6_gfx7, VOP2_Real_gfx10; +multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11 op> : + VOP2_Real_gfx6_gfx7_gfx10, VOP2_Real_FULL_gfx11; + multiclass VOP2be_Real_gfx6_gfx7 op> : VOP2_Real_e32_gfx6_gfx7, VOP2be_Real_e64_gfx6_gfx7; @@ -1398,28 +1780,28 @@ let SubtargetPredicate = isGFX6GFX7 in { def : VOP2e64InstAlias; } // End SubtargetPredicate = isGFX6GFX7 -defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x003>; -defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x004>; -defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x005>; +defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x003>; +defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x004>; +defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x005>; defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>; defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>; -defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x008>; -defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x009>; -defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x00a>; -defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00b>; -defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00c>; -defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x00f>; -defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x010>; -defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x011>; -defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x012>; -defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x013>; -defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x014>; +defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x008>; +defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x009>; +defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00a>; +defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00b>; +defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00c>; +defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00f>; +defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x010>; +defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x011>; +defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x012>; +defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x013>; +defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x014>; defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>; defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>; defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>; -defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01b>; -defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01c>; -defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01d>; +defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01b>; +defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01c>; +defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01d>; defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>; defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>; defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>; @@ -1436,6 +1818,13 @@ multiclass VOP2_Real_MADK_vi op> { VOP2_MADKe(NAME).Pfl>; } +multiclass VOP2_Real_MADK_gfx940 op> { + def _gfx940 : VOP2_Real(NAME), SIEncodingFamily.GFX940>, + VOP2_MADKe(NAME).Pfl> { + let DecoderNamespace = "GFX9"; + } +} + multiclass VOP2_Real_e32_vi op> { def _e32_vi : VOP2_Real(NAME#"_e32"), SIEncodingFamily.VI>, @@ -1736,6 +2125,11 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus +let SubtargetPredicate = HasFmaakFmamkF32Insts in { +defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>; +defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>; +} + multiclass VOP2_Real_DOT_ACC_gfx9 op> : VOP2_Real_e32_vi { def _dpp_vi : VOP2_DPP(NAME#"_dpp")>; } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 494e3aeb6d55..dddd0aacc140 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -6,191 +6,25 @@ // //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// VOP3 Classes -//===----------------------------------------------------------------------===// - -class getVOP3ModPat { - dag src0 = !if(P.HasOMod, - (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), - (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)); - - list ret3 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT src0), - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), - (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))]; - - list ret2 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT src0), - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))]; - - list ret1 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT src0)))]; - - list ret = !if(!eq(P.NumSrcArgs, 3), ret3, - !if(!eq(P.NumSrcArgs, 2), ret2, - ret1)); -} - -class getVOP3PModPat { - dag src0_dag = (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers)); - dag src1_dag = (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)); - dag src2_dag = (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers)); - dag clamp_dag = (i1 timm:$clamp); - - list ret3 = [(set P.DstVT:$vdst, - !if(HasExplicitClamp, - (DivergentFragOrOp.ret src0_dag, src1_dag, src2_dag, clamp_dag), - (DivergentFragOrOp.ret src0_dag, src1_dag, src2_dag)))]; - - list ret2 = [(set P.DstVT:$vdst, - !if(HasExplicitClamp, - (DivergentFragOrOp.ret src0_dag, src1_dag, clamp_dag), - (DivergentFragOrOp.ret src0_dag, src1_dag)))]; - - list ret1 = [(set P.DstVT:$vdst, - !if(HasExplicitClamp, - (DivergentFragOrOp.ret src0_dag, clamp_dag), - (DivergentFragOrOp.ret src0_dag)))]; - - list ret = !if(!eq(P.NumSrcArgs, 3), ret3, - !if(!eq(P.NumSrcArgs, 2), ret2, - ret1)); -} - -class getVOP3OpSelPat { - list ret3 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers)), - (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)), - (P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))]; - - list ret2 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers)), - (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))]; - - list ret1 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))))]; - - list ret = !if(!eq(P.NumSrcArgs, 3), ret3, - !if(!eq(P.NumSrcArgs, 2), ret2, - ret1)); -} - -class getVOP3OpSelModPat { - list ret3 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers), - (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), - (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)), - (P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))]; - - list ret2 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers)), - (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), - (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))]; - - list ret1 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))))]; - - list ret = !if(!eq(P.NumSrcArgs, 3), ret3, - !if(!eq(P.NumSrcArgs, 2), ret2, - ret1)); -} - -class getVOP3Pat { - list ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))]; - list ret2 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret P.Src0VT:$src0, P.Src1VT:$src1))]; - list ret1 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret P.Src0VT:$src0))]; - list ret = !if(!eq(P.NumSrcArgs, 3), ret3, - !if(!eq(P.NumSrcArgs, 2), ret2, - ret1)); -} - -class getVOP3ClampPat { - list ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i1:$clamp))]; - list ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, i1:$clamp))]; - list ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, i1:$clamp))]; - list ret = !if(!eq(P.NumSrcArgs, 3), ret3, - !if(!eq(P.NumSrcArgs, 2), ret2, - ret1)); -} - -class getVOP3MAIPat { - list ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, - timm:$cbsz, timm:$abid, timm:$blgp))]; -} - -// Consistently gives instructions a _e64 suffix. -multiclass VOP3Inst_Pseudo_Wrapper pattern = []> { - def _e64 : VOP3_Pseudo; -} - -class VOP3InstBase : - VOP3_Pseudo.ret, - getVOP3OpSelPat.ret), - !if(P.HasModifiers, - getVOP3ModPat.ret, - !if(P.HasIntClamp, - getVOP3ClampPat.ret, - !if (P.IsMAI, - getVOP3MAIPat.ret, - getVOP3Pat.ret)))), - 0, P.HasOpSel> { - - let IntClamp = P.HasIntClamp; - let AsmMatchConverter = - !if(P.HasOpSel, - "cvtVOP3OpSel", - !if(!or(P.HasModifiers, P.HasOMod, P.HasIntClamp), - "cvtVOP3", - "")); -} - -multiclass VOP3Inst { - def _e64 : VOP3InstBase; -} - // Special case for v_div_fmas_{f32|f64}, since it seems to be the // only VOP instruction that implicitly reads VCC. let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in { def VOP_F32_F32_F32_F32_VCC : VOPProfile<[f32, f32, f32, f32]> { let Outs64 = (outs DstRC.RegClass:$vdst); + let HasExtVOP3DPP = 0; + let HasExtDPP = 0; } def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> { let Outs64 = (outs DstRC.RegClass:$vdst); } } -class VOP3Features { - bit HasClamp = Clamp; - bit HasOpSel = OpSel; - bit IsPacked = Packed; - bit IsMAI = MAI; -} - -def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>; -def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>; -def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>; -def VOP3_PACKED : VOP3Features<1, 1, 1, 0>; -def VOP3_MAI : VOP3Features<0, 0, 0, 1>; - -class VOP3_Profile : VOPProfile { - - let HasClamp = !if(Features.HasClamp, 1, P.HasClamp); - let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel); - let IsMAI = !if(Features.IsMAI, 1, P.IsMAI); - let IsPacked = !if(Features.IsPacked, 1, P.IsPacked); - - let HasModifiers = !if(Features.IsMAI, 0, !or(Features.IsPacked, P.HasModifiers)); - let IsSingle = 1; -} - class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> { let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod"; let IsSingle = 1; + let HasExtVOP3DPP = 0; + let HasExtDPP = 0; } def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile; @@ -198,12 +32,22 @@ def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile; def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> { let HasClamp = 1; - let IsSingle = 1; + let IsSingle = 1; let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; } +class V_MUL_PROF : VOP3_Profile

{ + let HasExtVOP3DPP = 0; + let HasExtDPP = 0; +} + +def DIV_FIXUP_F32_PROF : VOP3_Profile { + let HasExtVOP3DPP = 0; + let HasExtDPP = 0; +} + //===----------------------------------------------------------------------===// // VOP3 INTERP //===----------------------------------------------------------------------===// @@ -304,10 +148,10 @@ defm V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile, fmaxnum_l } // End SchedRW = [WriteDoubleAdd] let SchedRW = [WriteIntMul] in { -defm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile, DivergentBinFrag>; -defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", VOP3_Profile, mulhu>; -defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile>; -defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile, mulhs>; +defm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", V_MUL_PROF, DivergentBinFrag>; +defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", V_MUL_PROF, mulhu>; +defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", V_MUL_PROF>; +defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF, mulhs>; } // End SchedRW = [WriteIntMul] } // End isReMaterializable = 1 @@ -367,7 +211,7 @@ let isCommutable = 1 in { } // End isCommutable = 1 defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile, int_amdgcn_cvt_pk_u8_f32>; -defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile, AMDGPUdiv_fixup>; +defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>; let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in { defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile, AMDGPUdiv_fixup>; @@ -419,9 +263,9 @@ def : GCNPat< >; let isReMaterializable = 1 in { -let SubtargetPredicate = isGFX6GFX7GFX10 in { +let SubtargetPredicate = isGFX6GFX7GFX10Plus in { defm V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile>; -} // End SubtargetPredicate = isGFX6GFX7GFX10 +} // End SubtargetPredicate = isGFX6GFX7GFX10Plus let SchedRW = [Write32Bit] in { let SubtargetPredicate = isGFX8Plus in { @@ -430,21 +274,30 @@ defm V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile, AMD } // End SchedRW = [Write32Bit] } // End isReMaterializable = 1 -let SubtargetPredicate = isGFX7Plus in { +def VOPProfileMQSAD : VOP3_Profile { + let HasModifiers = 0; +} +let SubtargetPredicate = isGFX7Plus in { let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in { defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile>; -defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile>; +defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>; } // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] +} // End SubtargetPredicate = isGFX7Plus let isCommutable = 1 in { let SchedRW = [WriteIntMul, WriteSALU] in { +let SubtargetPredicate = isGFX7GFX8GFX9GFX10 in { defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; +} +let SubtargetPredicate = isGFX11Only, Constraints = "@earlyclobber $vdst" in { +defm V_MAD_U64_U32_gfx11 : VOP3Inst <"v_mad_u64_u32_gfx11", VOP3b_I64_I1_I32_I32_I64>; +defm V_MAD_I64_I32_gfx11 : VOP3Inst <"v_mad_i64_i32_gfx11", VOP3b_I64_I1_I32_I32_I64>; +} // End SubtargetPredicate = isGFX11Only, Constraints = "@earlyclobber $vdst" } // End SchedRW = [WriteIntMul, WriteSALU] } // End isCommutable = 1 -} // End SubtargetPredicate = isGFX7Plus let FPDPRounding = 1 in { let Predicates = [Has16BitInsts, isGFX8Only] in { @@ -557,7 +410,7 @@ defm: Ternary_i16_Pats_gfx9; } // End Predicates = [Has16BitInsts, isGFX10Plus] -class ThreeOpFrag : PatFrag< +class ThreeOpFragSDAG : PatFrag< (ops node:$x, node:$y, node:$z), // When the inner operation is used multiple times, selecting 3-op // instructions may still be beneficial -- if the other users can be @@ -587,7 +440,9 @@ class ThreeOpFrag : PatFrag< return true; }]> { let PredicateCodeUsesOperands = 1; +} +class ThreeOpFrag : ThreeOpFragSDAG { // The divergence predicate is irrelevant in GlobalISel, as we have // proper register bank checks. We just need to verify the constant // bus restriction when all the sources are considered. @@ -609,6 +464,23 @@ class ThreeOpFrag : PatFrag< }]; } +def shl_0_to_4 : PatFrag< + (ops node:$src0, node:$src1), (shl node:$src0, node:$src1), + [{ + if (auto *C = dyn_cast(N->getOperand(1))) { + return C->getZExtValue() <= 4; + } + return false; + }]> { + let GISelPredicateCode = [{ + int64_t Imm = 0; + if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Imm)) && + !mi_match(MI.getOperand(2).getReg(), MRI, m_Copy(m_ICst(Imm)))) + return false; + return (uint64_t)Imm <= 4; + }]; +} + let SubtargetPredicate = isGFX9Plus in { let isCommutable = 1, isReMaterializable = 1 in { defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile>; @@ -649,6 +521,10 @@ defm V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile>; } // End isReMaterializable = 1 +// V_LSHL_ADD_U64: D0.u64 = (S0.u64 << S1.u[2:0]) + S2.u64 +// src0 is shifted left by 0-4 (use “0” to get ADD_U64). +let SubtargetPredicate = isGFX940Plus in +defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile>; class ThreeOp_i32_Pats : GCNPat < // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions. @@ -664,6 +540,12 @@ def : ThreeOp_i32_Pats; def : ThreeOp_i32_Pats; def : ThreeOp_i32_Pats; +let SubtargetPredicate = isGFX940Plus in +def : GCNPat< + (ThreeOpFrag i64:$src0, i32:$src1, i64:$src2), + (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2) +>; + def : VOPBinOpClampPat; def : VOPBinOpClampPat; @@ -688,6 +570,33 @@ def : OpSelBinOpClampPat; def : OpSelBinOpClampPat; } // End SubtargetPredicate = isGFX9Plus +// FIXME: GlobalISel in general does not handle instructions with 2 results, +// so it cannot use these patterns. +multiclass IMAD32_Pats { + def : GCNPat < + (ThreeOpFrag i32:$src0, i32:$src1, i32:$src2), + (EXTRACT_SUBREG (inst $src0, $src1, + (REG_SEQUENCE SReg_64, // Use scalar and let it be legalized + $src2, sub0, + (i32 (IMPLICIT_DEF)), sub1), + 0 /* clamp */), + sub0) + >; + // Immediate src2 in the pattern above will not fold because it would be partially + // undef. Hence define specialized pattern for this case. + // FIXME: GlobalISel pattern exporter fails to export a pattern like this and asserts, + // make it SDAG only. + def : GCNPat < + (ThreeOpFragSDAG i32:$src0, i32:$src1, (i32 imm:$src2)), + (EXTRACT_SUBREG (inst $src0, $src1, (i64 (as_i64imm $src2)), 0 /* clamp */), sub0) + >; +} + +let SubtargetPredicate = isGFX9GFX10 in // exclude pre-GFX9 where it was slow +defm : IMAD32_Pats; +let SubtargetPredicate = isGFX11Only in +defm : IMAD32_Pats; + def VOP3_PERMLANE_Profile : VOP3_Profile, VOP3_OPSEL> { let Src0RC64 = VRegSrc_32; let Src1RC64 = SCSrc_b32; @@ -697,6 +606,8 @@ def VOP3_PERMLANE_Profile : VOP3_Profile, VOP3 IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2, VGPR_32:$vdst_in, op_sel0:$op_sel); let HasClamp = 0; + let HasExtVOP3DPP = 0; + let HasExtDPP = 0; } class PermlanePat, V_PERMLANEX16_B32_e64>; + + defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile, add>; + defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile, sub>; + + def : OpSelBinOpClampPat; + def : OpSelBinOpClampPat; + + // Undo sub x, c -> add x, -c canonicalization since c is more likely + // an inline immediate than -c. + def : GCNPat< + (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), + (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0) + >; + } // End SubtargetPredicate = isGFX10Plus class DivFmasPat : GCNPat< @@ -773,6 +698,36 @@ def : DivFmasPat; def : DivFmasPat; } +class VOP3_DOT_Profile : VOP3_Profile { + // FIXME VOP3 DPP versions are unsupported + let HasExtVOP3DPP = 0; + let HasClamp = 0; + let HasOMod = 0; + let InsVOP3OpSel = getInsVOP3OpSel.ret, FPVRegInputMods, IntOpSelMods), + !if(isFloatType.ret, FPVRegInputMods, IntOpSelMods), + !if(isFloatType.ret, FPVRegInputMods, IntOpSelMods)>.ret; +} + +let SubtargetPredicate = isGFX11Plus in { + defm V_MAXMIN_F32 : VOP3Inst<"v_maxmin_f32", VOP3_Profile>; + defm V_MINMAX_F32 : VOP3Inst<"v_minmax_f32", VOP3_Profile>; + defm V_MAXMIN_F16 : VOP3Inst<"v_maxmin_f16", VOP3_Profile>; + defm V_MINMAX_F16 : VOP3Inst<"v_minmax_f16", VOP3_Profile>; + defm V_MAXMIN_U32 : VOP3Inst<"v_maxmin_u32", VOP3_Profile>; + defm V_MINMAX_U32 : VOP3Inst<"v_minmax_u32", VOP3_Profile>; + defm V_MAXMIN_I32 : VOP3Inst<"v_maxmin_i32", VOP3_Profile>; + defm V_MINMAX_I32 : VOP3Inst<"v_minmax_i32", VOP3_Profile>; + defm V_CVT_PK_I16_F32 : VOP3Inst<"v_cvt_pk_i16_f32", VOP3_Profile>; + defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile>; +} // End SubtargetPredicate = isGFX11Plus + +let SubtargetPredicate = HasDot8Insts in { + defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile, int_amdgcn_fdot2_f16_f16>; + defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile, int_amdgcn_fdot2_bf16_bf16>; +} + //===----------------------------------------------------------------------===// // Integer Clamp Patterns //===----------------------------------------------------------------------===// @@ -813,16 +768,137 @@ def : IntClampPat; def : IntClampPat; def : IntClampPat; - //===----------------------------------------------------------------------===// // Target-specific instruction encodings. //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// GFX11. +//===----------------------------------------------------------------------===// + +defm V_FMA_DX9_ZERO_F32 : VOP3_Real_with_name_gfx11<0x209, "V_FMA_LEGACY_F32", "v_fma_dx9_zero_f32">; +defm V_MAD_I32_I24 : VOP3_Realtriple_gfx11<0x20a>; +defm V_MAD_U32_U24 : VOP3_Realtriple_gfx11<0x20b>; +defm V_CUBEID_F32 : VOP3_Realtriple_gfx11<0x20c>; +defm V_CUBESC_F32 : VOP3_Realtriple_gfx11<0x20d>; +defm V_CUBETC_F32 : VOP3_Realtriple_gfx11<0x20e>; +defm V_CUBEMA_F32 : VOP3_Realtriple_gfx11<0x20f>; +defm V_BFE_U32 : VOP3_Realtriple_gfx11<0x210>; +defm V_BFE_I32 : VOP3_Realtriple_gfx11<0x211>; +defm V_BFI_B32 : VOP3_Realtriple_gfx11<0x212>; +defm V_FMA_F32 : VOP3_Realtriple_gfx11<0x213>; +defm V_FMA_F64 : VOP3_Real_Base_gfx11<0x214>; +defm V_LERP_U8 : VOP3_Realtriple_gfx11<0x215>; +defm V_ALIGNBIT_B32 : VOP3_Realtriple_gfx11<0x216>; +defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11<0x217>; +defm V_MULLIT_F32 : VOP3_Realtriple_gfx11<0x218>; +defm V_MIN3_F32 : VOP3_Realtriple_gfx11<0x219>; +defm V_MIN3_I32 : VOP3_Realtriple_gfx11<0x21a>; +defm V_MIN3_U32 : VOP3_Realtriple_gfx11<0x21b>; +defm V_MAX3_F32 : VOP3_Realtriple_gfx11<0x21c>; +defm V_MAX3_I32 : VOP3_Realtriple_gfx11<0x21d>; +defm V_MAX3_U32 : VOP3_Realtriple_gfx11<0x21e>; +defm V_MED3_F32 : VOP3_Realtriple_gfx11<0x21f>; +defm V_MED3_I32 : VOP3_Realtriple_gfx11<0x220>; +defm V_MED3_U32 : VOP3_Realtriple_gfx11<0x221>; +defm V_SAD_U8 : VOP3_Realtriple_gfx11<0x222>; +defm V_SAD_HI_U8 : VOP3_Realtriple_gfx11<0x223>; +defm V_SAD_U16 : VOP3_Realtriple_gfx11<0x224>; +defm V_SAD_U32 : VOP3_Realtriple_gfx11<0x225>; +defm V_CVT_PK_U8_F32 : VOP3_Realtriple_gfx11<0x226>; +defm V_DIV_FIXUP_F32 : VOP3_Real_Base_gfx11<0x227>; +defm V_DIV_FIXUP_F64 : VOP3_Real_Base_gfx11<0x228>; +defm V_DIV_FMAS_F32 : VOP3_Real_Base_gfx11<0x237>; +defm V_DIV_FMAS_F64 : VOP3_Real_Base_gfx11<0x238>; +defm V_MSAD_U8 : VOP3_Realtriple_gfx11<0x239>; +defm V_QSAD_PK_U16_U8 : VOP3_Real_Base_gfx11<0x23a>; +defm V_MQSAD_PK_U16_U8 : VOP3_Real_Base_gfx11<0x23b>; +defm V_MQSAD_U32_U8 : VOP3_Real_Base_gfx11<0x23d>; +defm V_XOR3_B32 : VOP3_Realtriple_gfx11<0x240>; +defm V_MAD_U16 : VOP3_Realtriple_with_name_gfx11<0x241, "V_MAD_U16_gfx9", "v_mad_u16">; +defm V_PERM_B32 : VOP3_Realtriple_gfx11<0x244>; +defm V_XAD_U32 : VOP3_Realtriple_gfx11<0x245>; +defm V_LSHL_ADD_U32 : VOP3_Realtriple_gfx11<0x246>; +defm V_ADD_LSHL_U32 : VOP3_Realtriple_gfx11<0x247>; +defm V_FMA_F16 : VOP3_Realtriple_with_name_gfx11<0x248, "V_FMA_F16_gfx9", "v_fma_f16">; +defm V_MIN3_F16 : VOP3_Realtriple_gfx11<0x249>; +defm V_MIN3_I16 : VOP3_Realtriple_gfx11<0x24a>; +defm V_MIN3_U16 : VOP3_Realtriple_gfx11<0x24b>; +defm V_MAX3_F16 : VOP3_Realtriple_gfx11<0x24c>; +defm V_MAX3_I16 : VOP3_Realtriple_gfx11<0x24d>; +defm V_MAX3_U16 : VOP3_Realtriple_gfx11<0x24e>; +defm V_MED3_F16 : VOP3_Realtriple_gfx11<0x24f>; +defm V_MED3_I16 : VOP3_Realtriple_gfx11<0x250>; +defm V_MED3_U16 : VOP3_Realtriple_gfx11<0x251>; +defm V_MAD_I16 : VOP3_Realtriple_with_name_gfx11<0x253, "V_MAD_I16_gfx9", "v_mad_i16">; +defm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">; +defm V_ADD3_U32 : VOP3_Realtriple_gfx11<0x255>; +defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11<0x256>; +defm V_AND_OR_B32 : VOP3_Realtriple_gfx11<0x257>; +defm V_OR3_B32 : VOP3_Realtriple_gfx11<0x258>; +defm V_MAD_U32_U16 : VOP3_Realtriple_gfx11<0x259>; +defm V_MAD_I32_I16 : VOP3_Realtriple_gfx11<0x25a>; +defm V_PERMLANE16_B32 : VOP3_Real_Base_gfx11<0x25b>; +defm V_PERMLANEX16_B32 : VOP3_Real_Base_gfx11<0x25c>; +defm V_MAXMIN_F32 : VOP3_Realtriple_gfx11<0x25e>; +defm V_MINMAX_F32 : VOP3_Realtriple_gfx11<0x25f>; +defm V_MAXMIN_F16 : VOP3_Realtriple_gfx11<0x260>; +defm V_MINMAX_F16 : VOP3_Realtriple_gfx11<0x261>; +defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11<0x262>; +defm V_MINMAX_U32 : VOP3_Realtriple_gfx11<0x263>; +defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11<0x264>; +defm V_MINMAX_I32 : VOP3_Realtriple_gfx11<0x265>; +// FIXME VOP3 DPP Dot instructions are unsupported +defm V_DOT2_F16_F16 : VOP3_Real_Base_gfx11<0x266>; +defm V_DOT2_BF16_BF16 : VOP3_Real_Base_gfx11<0x267>; +defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">; +defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">; +defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">; +defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">; +defm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11<0x303>; +defm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11<0x304>; +defm V_MUL_LO_U16 : VOP3Only_Realtriple_gfx11<0x305>; +defm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11<0x306>; +defm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11<0x307>; +defm V_MAX_U16 : VOP3Only_Realtriple_gfx11<0x309>; +defm V_MAX_I16 : VOP3Only_Realtriple_gfx11<0x30a>; +defm V_MIN_U16 : VOP3Only_Realtriple_gfx11<0x30b>; +defm V_MIN_I16 : VOP3Only_Realtriple_gfx11<0x30c>; +defm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11<0x30d, "V_ADD_I16", "v_add_nc_i16">; +defm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11<0x30e, "V_SUB_I16", "v_sub_nc_i16">; +defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11<0x311>; +defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >; +defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >; +defm V_SUB_NC_I32 : VOP3_Realtriple_with_name_gfx11<0x325, "V_SUB_I32", "v_sub_nc_i32">; +defm V_ADD_NC_I32 : VOP3_Realtriple_with_name_gfx11<0x326, "V_ADD_I32", "v_add_nc_i32">; +defm V_ADD_F64 : VOP3_Real_Base_gfx11<0x327>; +defm V_MUL_F64 : VOP3_Real_Base_gfx11<0x328>; +defm V_MIN_F64 : VOP3_Real_Base_gfx11<0x329>; +defm V_MAX_F64 : VOP3_Real_Base_gfx11<0x32a>; +defm V_LDEXP_F64 : VOP3_Real_Base_gfx11<0x32b>; +defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11<0x32c>; +defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11<0x32d>; +defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11<0x32e>; +defm V_TRIG_PREOP_F64 : VOP3_Real_Base_gfx11<0x32f>; +defm V_LSHLREV_B16 : VOP3Only_Realtriple_gfx11<0x338>; +defm V_LSHRREV_B16 : VOP3Only_Realtriple_gfx11<0x339>; +defm V_ASHRREV_I16 : VOP3Only_Realtriple_gfx11<0x33a>; +defm V_LSHLREV_B64 : VOP3_Real_Base_gfx11<0x33c>; +defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11<0x33d>; +defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11<0x33e>; +defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx11<0x360>; // Pseudo in VOP2 +let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { + defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx11<0x361>; // Pseudo in VOP2 +} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) +defm V_AND_B16 : VOP3Only_Realtriple_gfx11<0x362>; +defm V_OR_B16 : VOP3Only_Realtriple_gfx11<0x363>; +defm V_XOR_B16 : VOP3Only_Realtriple_gfx11<0x364>; + //===----------------------------------------------------------------------===// // GFX10. //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { +let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { multiclass VOP3_Real_gfx10 op> { def _gfx10 : VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX10>, @@ -867,7 +943,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { let AsmString = asmName # ps.AsmOperands; } } -} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" +} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>; @@ -935,10 +1011,11 @@ defm V_MAD_I16 : defm V_DIV_FIXUP_F16 : VOP3OpSel_Real_gfx10_with_name<0x35f, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">; +defm V_ADD_NC_U16 : VOP3OpSel_Real_gfx10<0x303>; +defm V_SUB_NC_U16 : VOP3OpSel_Real_gfx10<0x304>; + // FIXME-GFX10-OPSEL: Need to add "selective" opsel support to some of these // (they do not support SDWA or DPP). -defm V_ADD_NC_U16 : VOP3_Real_gfx10_with_name<0x303, "V_ADD_U16", "v_add_nc_u16">; -defm V_SUB_NC_U16 : VOP3_Real_gfx10_with_name<0x304, "V_SUB_U16", "v_sub_nc_u16">; defm V_MUL_LO_U16 : VOP3_Real_gfx10_with_name<0x305, "V_MUL_LO_U16", "v_mul_lo_u16">; defm V_LSHRREV_B16 : VOP3_Real_gfx10_with_name<0x307, "V_LSHRREV_B16", "v_lshrrev_b16">; defm V_ASHRREV_I16 : VOP3_Real_gfx10_with_name<0x308, "V_ASHRREV_I16", "v_ashrrev_i16">; @@ -1273,3 +1350,5 @@ defm V_MAD_I32_I16 : VOP3OpSel_Real_gfx9 <0x1f2>; defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>; defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>; + +defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 707475ceccee..59ce532af59b 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -10,19 +10,33 @@ // VOP3P Classes //===----------------------------------------------------------------------===// +class VOP3P_Profile : VOP3_Profile { + let IsVOP3P = 1; + let HasExtVOP3DPP = HasDPP; + // We do not want to print src modifiers for vop3p because the bits are + // overloaded in meaning and the logic in printOperandAndFPInputMods is + // wrong for vop3p + let AsmVOP3DPPBase = AsmVOP3P; +} + // Used for FMA_MIX* and MAD_MIX* insts // Their operands are only sort of f16 operands. Depending on // op_sel_hi, these may be interpreted as f32. The inline immediate // values are really f16 converted to f32, so we treat these as f16 // operands. class VOP3P_Mix_Profile : VOP3_Profile { + bit useTiedOutput = 0> : VOP3P_Profile { bit UseTiedOutput = useTiedOutput; dag srcs = (ins FP16InputMods:$src0_modifiers, VCSrc_f16:$src0, FP16InputMods:$src1_modifiers, VCSrc_f16:$src1, FP16InputMods:$src2_modifiers, VCSrc_f16:$src2); + dag dpp_srcs = + (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, + FP16InputMods:$src1_modifiers, VCSrc_f16:$src1, + FP16InputMods:$src2_modifiers, VCSrc_f16:$src2); // FIXME: clampmod0 misbehaves with the non-default vdst_in // following it. For now workaround this by requiring clamp @@ -35,19 +49,27 @@ class VOP3P_Mix_Profile { + SDPatternOperator node = null_frag, bit IsDOT = 0> { def NAME : VOP3P_Pseudo.ret, + getVOP3PModPat.ret, getVOP3Pat.ret)>; + let SubtargetPredicate = isGFX11Plus in { + if P.HasExtVOP3DPP then + def _dpp : VOP3_DPP_Pseudo { + let VOP3P = 1; + let PseudoInstr = OpName #"_dpp"; + } + } // end SubtargetPredicate = isGFX11Plus } - // Non-packed instructions that use the VOP3P encoding. // VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed. multiclass VOP3_VOP3PInst { @@ -55,37 +77,47 @@ multiclass VOP3_VOP3PInst { let Constraints = !if(P.UseTiedOutput, "$vdst = $vdst_in", ""); let DisableEncoding = !if(P.UseTiedOutput, "$vdst_in", ""); } + let SubtargetPredicate = isGFX11Plus in { + if P.HasExtVOP3DPP then + def _dpp : VOP3_DPP_Pseudo { + let VOP3P = 1; + let PseudoInstr = OpName#"_dpp"; + let Constraints = !if(P.UseTiedOutput, "$vdst = $vdst_in", ""); + let DisableEncoding = !if(P.UseTiedOutput, "$vdst_in", ""); + } + } // end SubtargetPredicate = isGFX11Plus } +let isReMaterializable = 1 in { let isCommutable = 1 in { -defm V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3_Profile>; -defm V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3_Profile>; +defm V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3P_Profile>; +defm V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3P_Profile>; let FPDPRounding = 1 in { -defm V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile, any_fma>; -defm V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile, any_fadd>; -defm V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile, any_fmul>; +defm V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3P_Profile, any_fma>; +defm V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3P_Profile, any_fadd>; +defm V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3P_Profile, any_fmul>; } // End FPDPRounding = 1 -defm V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile, fmaxnum_like>; -defm V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile, fminnum_like>; +defm V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3P_Profile, fmaxnum_like>; +defm V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3P_Profile, fminnum_like>; -defm V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile, add>; -defm V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile>; -defm V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3_Profile, mul>; +defm V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3P_Profile, add>; +defm V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3P_Profile>; +defm V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3P_Profile, mul>; -defm V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3_Profile, smin>; -defm V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3_Profile, umin>; -defm V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3_Profile, smax>; -defm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile, umax>; +defm V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3P_Profile, smin>; +defm V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3P_Profile, umin>; +defm V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3P_Profile, smax>; +defm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3P_Profile, umax>; } -defm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3_Profile>; -defm V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile, sub>; - -defm V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile, clshl_rev_16>; -defm V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile, cashr_rev_16>; -defm V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile, clshr_rev_16>; +defm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3P_Profile>; +defm V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3P_Profile, sub>; +defm V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3P_Profile, clshl_rev_16>; +defm V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3P_Profile, cashr_rev_16>; +defm V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3P_Profile, clshr_rev_16>; +} // End isReMaterializable = 1 let SubtargetPredicate = HasVOP3PInsts in { @@ -178,6 +210,7 @@ let SubtargetPredicate = HasMadMixInsts in { // Size of src arguments (16/32) is controlled by op_sel. // For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi. let isCommutable = 1, mayRaiseFPException = 0 in { +let isReMaterializable = 1 in defm V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3P_Mix_Profile>; let FPDPRounding = 1 in { @@ -197,6 +230,8 @@ defm : MadFmaMixPats; // Essentially the same as the mad_mix versions let SubtargetPredicate = HasFmaMixInsts in { let isCommutable = 1 in { + +let isReMaterializable = 1 in defm V_FMA_MIX_F32 : VOP3_VOP3PInst<"v_fma_mix_f32", VOP3P_Mix_Profile>; let FPDPRounding = 1 in { @@ -297,34 +332,63 @@ let IsDOT = 1 in { let SubtargetPredicate = HasDot2Insts in { defm V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", - VOP3_Profile, int_amdgcn_sdot2, 1>; + VOP3P_Profile, int_amdgcn_sdot2, 1>; defm V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", - VOP3_Profile, int_amdgcn_udot2, 1>; + VOP3P_Profile, int_amdgcn_udot2, 1>; } // End SubtargetPredicate = HasDot2Insts let SubtargetPredicate = HasDot7Insts in { defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", - VOP3_Profile, + VOP3P_Profile, AMDGPUfdot2, 1/*ExplicitClamp*/>; defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", - VOP3_Profile, int_amdgcn_udot4, 1>; + VOP3P_Profile, int_amdgcn_udot4, 1>; defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", - VOP3_Profile, int_amdgcn_udot8, 1>; + VOP3P_Profile, int_amdgcn_udot8, 1>; } // End SubtargetPredicate = HasDot7Insts let SubtargetPredicate = HasDot1Insts in { defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", - VOP3_Profile, int_amdgcn_sdot4, 1>; + VOP3P_Profile, int_amdgcn_sdot4, 1>; defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", - VOP3_Profile, int_amdgcn_sdot8, 1>; + VOP3P_Profile, int_amdgcn_sdot8, 1>; } // End SubtargetPredicate = HasDot1Insts + +let SubtargetPredicate = HasDot8Insts in { + +defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", + VOP3P_Profile, + int_amdgcn_fdot2_f32_bf16, 1>; + +} // End SubtargetPredicate = HasDot8Insts + } // End let IsDOT = 1 +multiclass VOP3PDOTIUInst { + let IsDOT = 1 in + defm NAME : VOP3PInst, + null_frag, 1>; + // Dot-iu instructions consider input as signed if imod neg bits are set. Thus + // Dot-iu Intrinsics have extra operands and require separate codegen pattern. + def : GCNPat < (intrinsic_node (DotIUVOP3PMods i32:$src0_mods), i32:$src0, + (DotIUVOP3PMods i32:$src1_mods), i32:$src1, + i32:$src2, (i1 timm:$clamp)), + (!cast(NAME) $src0_mods, i32:$src0, + $src1_mods, i32:$src1, + (i32 8), i32:$src2, i1:$clamp) + >; +} + +let SubtargetPredicate = HasDot8Insts in { +defm V_DOT4_I32_IU8 : VOP3PDOTIUInst<"v_dot4_i32_iu8", int_amdgcn_sudot4>; +defm V_DOT8_I32_IU4 : VOP3PDOTIUInst<"v_dot8_i32_iu4", int_amdgcn_sudot8>; +} // End SubtargetPredicate = HasDot8Insts + def : UDot2Pat; def : SDot2Pat; @@ -365,18 +429,18 @@ def VDst_256 : VOPDstOperand; def VDst_512 : VOPDstOperand; def VDst_1024 : VOPDstOperand; -def VOPProfileAccRead : VOP3_Profile { +def VOPProfileAccRead : VOP3P_Profile { let Src0RC64 = ARegSrc_32; } -def VOPProfileAccWrite : VOP3_Profile { +def VOPProfileAccWrite : VOP3P_Profile { let DstRC = ADst_32; - let Src0RC64 = VISrc_b32; + let Src0RC64 = VCSrc_b32; } class VOPProfileMAI - : VOP3_Profile { + : VOP3P_Profile { let DstRC = _DstRC; let Src0RC64 = SrcABRC; let Src1RC64 = SrcABRC; @@ -387,15 +451,27 @@ class VOPProfileMAI + : VOPProfileMAI { + let Src1RC64 = _SrcBRC; + let Src2VT = DstVT; + let Asm64 = " $vdst, $src0, $src1, $idx$cbsz$abid"; + let Outs64 = (outs DstRC:$vdst); + let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, VRegSrc_32:$idx, cbsz:$cbsz, abid:$abid, Src2RC64:$src2); +} + def VOPProfileMAI_F32_F32_X4 : VOPProfileMAI; def VOPProfileMAI_F32_F32_X16 : VOPProfileMAI; def VOPProfileMAI_F32_F32_X32 : VOPProfileMAI; @@ -413,6 +489,10 @@ def VOPProfileMAI_F32_V4I16_X16 : VOPProfileMAI; def VOPProfileMAI_F64_16X16X4F64 : VOPProfileMAI; def VOPProfileMAI_F64_4X4X4F64 : VOPProfileMAI; +def VOPProfileMAI_I32_I64_X16 : VOPProfileMAI; +def VOPProfileMAI_I32_I64_X32 : VOPProfileMAI; +def VOPProfileMAI_F32_V2F32_X16 : VOPProfileMAI; +def VOPProfileMAI_F32_V2F32_X32 : VOPProfileMAI; def VOPProfileMAI_F32_F32_X4_VCD : VOPProfileMAI; def VOPProfileMAI_F32_F32_X16_VCD : VOPProfileMAI; @@ -431,12 +511,37 @@ def VOPProfileMAI_F32_V4I16_X16_VCD : VOPProfileMAI; def VOPProfileMAI_F64_16X16X4F64_VCD : VOPProfileMAI; def VOPProfileMAI_F64_4X4X4F64_VCD : VOPProfileMAI; +def VOPProfileMAI_I32_I64_X16_VCD : VOPProfileMAI; +def VOPProfileMAI_I32_I64_X32_VCD : VOPProfileMAI; +def VOPProfileMAI_F32_V2F32_X16_VCD : VOPProfileMAI; +def VOPProfileMAI_F32_V2F32_X32_VCD : VOPProfileMAI; + +def VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC; +def VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC; +def VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC; +def VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC; +def VOPProfileSMFMAC_I32_16X16X64_I8 : VOPProfileSMFMAC; +def VOPProfileSMFMAC_I32_32X32X32_I8 : VOPProfileSMFMAC; class MFMATable { bit IsMac = is_mac; string FMAOp = Name; } +class MAIFrag : PatFrag < + (ops node:$src0, node:$src1, node:$src2, node:$cbsz, node:$abid, node:$blgp), + (Op $src0, $src1, $src2, $cbsz, $abid, $blgp), + pred +>; + +let GISelPredicateCode = [{ return MF.getInfo()->mayNeedAGPRs(); }] in +class AgprMAIFrag : + MAIFraggetInfo()->mayNeedAGPRs(); }]>; + +let GISelPredicateCode = [{ return !MF.getInfo()->mayNeedAGPRs(); }] in +class VgprMAIFrag : + MAIFraggetInfo()->mayNeedAGPRs(); }]>; + let Predicates = [HasMAIInsts] in { let isAsCheapAsAMove = 1, isReMaterializable = 1 in { @@ -446,47 +551,62 @@ let isAsCheapAsAMove = 1, isReMaterializable = 1 in { } // End isMoveImm = 1 } // End isAsCheapAsAMove = 1, isReMaterializable = 1 +class MAIInst + : VOP3InstBase { + Instruction Opcode = !cast(NAME); + bit is_dgemm = 0; + bit is_gfx940_xdl = 0; +} + multiclass MAIInst("VOPProfileMAI_" # P).NoDstOverlap> { let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in { // FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported. let Constraints = !if(NoDstOverlap, "@earlyclobber $vdst", "") in { - defm "" : VOP3Inst("VOPProfileMAI_" # P), !if(NoDstOverlap, null_frag, node)>, - MFMATable<0, NAME # "_e64">; + def _e64 : MAIInst("VOPProfileMAI_" # P), + !if(NoDstOverlap, null_frag, AgprMAIFrag)>, + MFMATable<0, NAME # "_e64">; let SubtargetPredicate = isGFX90APlus, Mnemonic = OpName in - defm _vgprcd : VOP3Inst("VOPProfileMAI_" # P # "_VCD")>, - MFMATable<0, NAME # "_vgprcd_e64">; + def _vgprcd_e64 : MAIInst("VOPProfileMAI_" # P # "_VCD"), + !if(NoDstOverlap, null_frag, VgprMAIFrag)>, + MFMATable<0, NAME # "_vgprcd_e64">; } foreach _ = BoolToList.ret in { let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""), isConvertibleToThreeAddress = NoDstOverlap, Mnemonic = OpName in { - defm "_mac" : VOP3Inst("VOPProfileMAI_" # P), node>, - MFMATable<1, NAME # "_e64">; + def "_mac_e64" : MAIInst("VOPProfileMAI_" # P), AgprMAIFrag>, + MFMATable<1, NAME # "_e64">; let SubtargetPredicate = isGFX90APlus in - defm _mac_vgprcd : VOP3Inst("VOPProfileMAI_" # P # "_VCD")>, - MFMATable<1, NAME # "_vgprcd_e64">; + def _mac_vgprcd_e64 : MAIInst("VOPProfileMAI_" # P # "_VCD"), + VgprMAIFrag>, + MFMATable<1, NAME # "_vgprcd_e64">; } } } // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 } defm V_MFMA_F32_4X4X1F32 : MAIInst<"v_mfma_f32_4x4x1f32", "F32_F32_X4", int_amdgcn_mfma_f32_4x4x1f32>; -defm V_MFMA_F32_4X4X4F16 : MAIInst<"v_mfma_f32_4x4x4f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_4x4x4f16>; -defm V_MFMA_I32_4X4X4I8 : MAIInst<"v_mfma_i32_4x4x4i8", "I32_I32_X4", int_amdgcn_mfma_i32_4x4x4i8>; defm V_MFMA_F32_16X16X1F32 : MAIInst<"v_mfma_f32_16x16x1f32", "F32_F32_X16", int_amdgcn_mfma_f32_16x16x1f32>; defm V_MFMA_F32_16X16X4F32 : MAIInst<"v_mfma_f32_16x16x4f32", "F32_F32_X4", int_amdgcn_mfma_f32_16x16x4f32>; +defm V_MFMA_F32_32X32X1F32 : MAIInst<"v_mfma_f32_32x32x1f32", "F32_F32_X32", int_amdgcn_mfma_f32_32x32x1f32>; +defm V_MFMA_F32_32X32X2F32 : MAIInst<"v_mfma_f32_32x32x2f32", "F32_F32_X16", int_amdgcn_mfma_f32_32x32x2f32>; + +let is_gfx940_xdl = 1 in { +defm V_MFMA_F32_4X4X4F16 : MAIInst<"v_mfma_f32_4x4x4f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_4x4x4f16>; +defm V_MFMA_I32_4X4X4I8 : MAIInst<"v_mfma_i32_4x4x4i8", "I32_I32_X4", int_amdgcn_mfma_i32_4x4x4i8>; defm V_MFMA_F32_16X16X4F16 : MAIInst<"v_mfma_f32_16x16x4f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_16x16x4f16>; defm V_MFMA_F32_16X16X16F16 : MAIInst<"v_mfma_f32_16x16x16f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_16x16x16f16>; defm V_MFMA_I32_16X16X4I8 : MAIInst<"v_mfma_i32_16x16x4i8", "I32_I32_X16", int_amdgcn_mfma_i32_16x16x4i8>; -defm V_MFMA_F32_32X32X1F32 : MAIInst<"v_mfma_f32_32x32x1f32", "F32_F32_X32", int_amdgcn_mfma_f32_32x32x1f32>; -defm V_MFMA_F32_32X32X2F32 : MAIInst<"v_mfma_f32_32x32x2f32", "F32_F32_X16", int_amdgcn_mfma_f32_32x32x2f32>; defm V_MFMA_F32_32X32X4F16 : MAIInst<"v_mfma_f32_32x32x4f16", "F32_V4F16_X32", int_amdgcn_mfma_f32_32x32x4f16>; defm V_MFMA_F32_32X32X8F16 : MAIInst<"v_mfma_f32_32x32x8f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_32x32x8f16>; defm V_MFMA_I32_32X32X4I8 : MAIInst<"v_mfma_i32_32x32x4i8", "I32_I32_X32", int_amdgcn_mfma_i32_32x32x4i8>; +} + +let Predicates = [isGFX908orGFX90A] in { defm V_MFMA_I32_16X16X16I8 : MAIInst<"v_mfma_i32_16x16x16i8", "I32_I32_X4", int_amdgcn_mfma_i32_16x16x16i8>; defm V_MFMA_I32_32X32X8I8 : MAIInst<"v_mfma_i32_32x32x8i8", "I32_I32_X16", int_amdgcn_mfma_i32_32x32x8i8>; defm V_MFMA_F32_4X4X2BF16 : MAIInst<"v_mfma_f32_4x4x2bf16", "F32_V2I16_X4", int_amdgcn_mfma_f32_4x4x2bf16>; @@ -494,34 +614,314 @@ defm V_MFMA_F32_16X16X2BF16 : MAIInst<"v_mfma_f32_16x16x2bf16", "F32_V2I16_X16", defm V_MFMA_F32_16X16X8BF16 : MAIInst<"v_mfma_f32_16x16x8bf16", "F32_V2I16_X4", int_amdgcn_mfma_f32_16x16x8bf16>; defm V_MFMA_F32_32X32X2BF16 : MAIInst<"v_mfma_f32_32x32x2bf16", "F32_V2I16_X32", int_amdgcn_mfma_f32_32x32x2bf16>; defm V_MFMA_F32_32X32X4BF16 : MAIInst<"v_mfma_f32_32x32x4bf16", "F32_V2I16_X16", int_amdgcn_mfma_f32_32x32x4bf16>; +} } // End SubtargetPredicate = HasMAIInsts let Predicates = [isGFX90APlus] in { + let is_gfx940_xdl = 1 in { defm V_MFMA_F32_32X32X4BF16_1K : MAIInst<"v_mfma_f32_32x32x4bf16_1k", "F32_V4I16_X32", int_amdgcn_mfma_f32_32x32x4bf16_1k>; defm V_MFMA_F32_16X16X4BF16_1K : MAIInst<"v_mfma_f32_16x16x4bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_16x16x4bf16_1k>; defm V_MFMA_F32_4X4X4BF16_1K : MAIInst<"v_mfma_f32_4x4x4bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_4x4x4bf16_1k>; defm V_MFMA_F32_32X32X8BF16_1K : MAIInst<"v_mfma_f32_32x32x8bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_32x32x8bf16_1k>; defm V_MFMA_F32_16X16X16BF16_1K : MAIInst<"v_mfma_f32_16x16x16bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_16x16x16bf16_1k>; + } + let is_dgemm = 1 in { defm V_MFMA_F64_16X16X4F64 : MAIInst<"v_mfma_f64_16x16x4f64", "F64_16X16X4F64", int_amdgcn_mfma_f64_16x16x4f64>; defm V_MFMA_F64_4X4X4F64 : MAIInst<"v_mfma_f64_4x4x4f64", "F64_4X4X4F64", int_amdgcn_mfma_f64_4x4x4f64>; + } } // End Predicates = [isGFX90APlus] -let SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1 in { - defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3_Profile, any_fma>; - defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3_Profile, any_fmul>; - defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3_Profile, any_fadd>; - defm V_PK_MOV_B32 : VOP3PInst<"v_pk_mov_b32", VOP3_Profile>; +let Predicates = [isGFX940Plus], is_gfx940_xdl = 1 in { + defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32", int_amdgcn_mfma_i32_32x32x16_i8>; + defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>; + defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>; + defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>; +} // End Predicates = [isGFX940Plus], is_gfx940_xdl = 1 + +multiclass SMFMACInst { + let Constraints = "$vdst = $src2", DisableEncoding = "$src2", + isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1, is_gfx940_xdl = 1 in { + def _e64 : MAIInst("VOPProfileSMFMAC_" # P), node>; + } +} + +let SubtargetPredicate = isGFX940Plus in { +defm V_SMFMAC_F32_16X16X32_F16 : SMFMACInst<"v_smfmac_f32_16x16x32_f16", "F32_16X16X32_F16", int_amdgcn_smfmac_f32_16x16x32_f16>; +defm V_SMFMAC_F32_32X32X16_F16 : SMFMACInst<"v_smfmac_f32_32x32x16_f16", "F32_32X32X16_F16", int_amdgcn_smfmac_f32_32x32x16_f16>; +defm V_SMFMAC_F32_16X16X32_BF16 : SMFMACInst<"v_smfmac_f32_16x16x32_bf16", "F32_16X16X32_I16", int_amdgcn_smfmac_f32_16x16x32_bf16>; +defm V_SMFMAC_F32_32X32X16_BF16 : SMFMACInst<"v_smfmac_f32_32x32x16_bf16", "F32_32X32X16_I16", int_amdgcn_smfmac_f32_32x32x16_bf16>; +defm V_SMFMAC_I32_16X16X64_I8 : SMFMACInst<"v_smfmac_i32_16x16x64_i8", "I32_16X16X64_I8", int_amdgcn_smfmac_i32_16x16x64_i8>; +defm V_SMFMAC_I32_32X32X32_I8 : SMFMACInst<"v_smfmac_i32_32x32x32_i8", "I32_32X32X32_I8", int_amdgcn_smfmac_i32_32x32x32_i8>; +} + +def MAIInstInfoTable : GenericTable { + let FilterClass = "MAIInst"; + let CppTypeName = "MAIInstInfo"; + let Fields = [ + "Opcode", "is_dgemm", "is_gfx940_xdl" + ]; + + let PrimaryKey = ["Opcode"]; + let PrimaryKeyName = "getMAIInstInfoHelper"; +} + +let SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1, isReMaterializable = 1 in { + defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3P_Profile, any_fma>; + defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3P_Profile, any_fmul>; + defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3P_Profile, any_fadd>; + defm V_PK_MOV_B32 : VOP3PInst<"v_pk_mov_b32", VOP3P_Profile>; } // End SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1 def : MnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">; def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">; +class VOPProfileWMMA : VOP3P_Profile

{ + let DstRC = !if(!eq(Suffix, "_w32"), VDst_256, VDst_128); + let Src0RC64 = _Src01RC64; + let Src1RC64 = _Src01RC64; + let Src2RC64 = !if(!eq(Suffix, "_w32"), VISrc_256_f64, VISrc_128_f32); + let HasClamp = _HasClamp; + let HasOpSel = _HasOpSel; + let IsPacked = 1; + let IsWMMA = 1; +} + +def VOP_V8F32_V16F16_V16F16_V8F32 : VOPProfile <[v8f32, v16f16, v16f16, v8f32]>; +def VOP_V8F32_V16I16_V16I16_V8F32 : VOPProfile <[v8f32, v16i16, v16i16, v8f32]>; +def VOP_V16F16_V16F16_V16F16_V16F16 : VOPProfile <[v16f16, v16f16, v16f16, v16f16]>; +def VOP_V16I16_V16I16_V16I16_V16I16 : VOPProfile <[v16i16, v16i16, v16i16, v16i16]>; +def VOP_V8I32_V4I32_V4I32_V8I32 : VOPProfile <[v8i32, v4i32, v4i32, v8i32]>; +def VOP_V8I32_V2I32_V2I32_V8I32 : VOPProfile <[v8i32, v2i32, v2i32, v8i32]>; + +def VOP_V4F32_V16F16_V16F16_V4F32 : VOPProfile <[v4f32, v16f16, v16f16, v4f32]>; +def VOP_V4F32_V16I16_V16I16_V4F32 : VOPProfile <[v4f32, v16i16, v16i16, v4f32]>; +def VOP_V8F16_V16F16_V16F16_V8F16 : VOPProfile <[v8f16, v16f16, v16f16, v8f16]>; +def VOP_V8I16_V16I16_V16I16_V8I16 : VOPProfile <[v8i16, v16i16, v16i16, v8i16]>; +def VOP_V4I32_V4I32_V4I32_V4I32 : VOPProfile <[v4i32, v4i32, v4i32, v4i32]>; +def VOP_V4I32_V2I32_V2I32_V4I32 : VOPProfile <[v4i32, v2i32, v2i32, v4i32]>; + + +class WMMAType val> { + bit hasClamp = val{0}; + bit hasOpsel = val{1}; +} + +def WMMARegular : WMMAType<0b00>; +def WMMAUIClamp : WMMAType<0b01>; +def WMMAOpSel : WMMAType<0b10>; + +class WMMARegularPat : + GCNPat < (P.DstVT (node + (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers)), + (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers)) + )), + (P.DstVT (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, $src2_modifiers, P.Src2VT:$src2)) +>; + +class WMMAOpSelPat : + GCNPat < (P.DstVT (node + (P.Src0VT P.Src0VT:$src0), + (P.Src1VT P.Src1VT:$src1), + (P.Src2VT P.Src2VT:$src2), (WMMAOpSelVOP3PMods i32:$src2_modifiers) + )), + (P.DstVT (Inst (i32 8), P.Src0VT:$src0, (i32 8), P.Src1VT:$src1, i32:$src2_modifiers, P.Src2VT:$src2)) +>; + +class WMMAUIClampPat : + GCNPat < (P.DstVT (node + (DotIUVOP3PMods i32:$src0_modifiers), (P.Src0VT P.Src0VT:$src0), + (DotIUVOP3PMods i32:$src1_modifiers), (P.Src1VT P.Src1VT:$src1), + (P.Src2VT P.Src2VT:$src2), (i1 timm:$clamp) + )), + (P.DstVT (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp)) +>; + +class WMMAOpcodeMapping { + Instruction Opcode2Addr = TwoAddr; + Instruction Opcode3Addr = ThreeAddr; + Predicate WaveSizePredicate; +} + +def WMMAOpcode : GenericEnum { + let FilterClass = "VOP3P_Pseudo"; +} + +class WMMAMappingTable : GenericTable { + let FilterClass = "WMMAOpcodeMapping"; + let CppTypeName = "WMMAOpcodeMappingInfo"; + let Fields = ["Opcode2Addr", "Opcode3Addr"]; + string TypeOf_Opcode2Addr = "WMMAOpcode"; + string TypeOf_Opcode3Addr = "WMMAOpcode"; +} + +def WMMAOpcode2AddrMappingTable : WMMAMappingTable { + let PrimaryKey = ["Opcode2Addr"]; + let PrimaryKeyName = "getWMMAMappingInfoFrom2AddrOpcode"; +} + +def WMMAOpcode3AddrMappingTable : WMMAMappingTable { + let PrimaryKey = ["Opcode3Addr"]; + let PrimaryKeyName = "getWMMAMappingInfoFrom3AddrOpcode"; +} + +// The WMMA instruction has extra constraints: +// Matrices A and B cannot overlap with D. C cannot partially overlap with D, +// but it is OK for them to be the same (which is a typical case). +// +// We implement it as follows: +// 1) Map the intrinsic to the pseudo where D is tied to C ($vdst = $src2). +// 2) The pass twoaddressinstruction checks if src2 is live and if that is the case +// it converts the default pseudo to the pseudo where src2 is not the same as vdst. +// 3) @earlyclobber on the destination satisfies the constraint during RA. + +multiclass WMMAInst { + + defvar WMMAConstraints2Addr = "@earlyclobber $vdst,$vdst = $src2"; + defvar WMMAConstraints3Addr = "@earlyclobber $vdst"; + + defvar WMMAProfile = VOPProfileWMMA; + if !eq(Suffix, "_w32") then { + let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { + let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = 1 in { + def _twoaddr_w32 : VOP3P_Pseudo; + } + let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in { + def _threeaddr_w32 : VOP3P_Pseudo; + } + } + def : WMMAOpcodeMapping(NAME # _twoaddr_w32), + !cast(NAME # _threeaddr_w32)>; + } else if !eq(Suffix, "_w64") then { + let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { + let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = 1 in { + def _twoaddr_w64 : VOP3P_Pseudo; + } + let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in { + def _threeaddr_w64 : VOP3P_Pseudo; + } + } + def : WMMAOpcodeMapping(NAME # _twoaddr_w64), + !cast(NAME # _threeaddr_w64)>; + } + + if !eq(Type, WMMAOpSel) then { + def : WMMAOpSelPat(NAME # _twoaddr # Suffix), node, P>; + } else if !eq(Type, WMMAUIClamp) then { + def : WMMAUIClampPat(NAME # _twoaddr # Suffix), node, P>; + } else { + def : WMMARegularPat(NAME # _twoaddr # Suffix), node, P>; + } +} + + +let WaveSizePredicate = isWave32 in { + defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_f16", VOP_V8F32_V16F16_V16F16_V8F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular>; + defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_bf16", VOP_V8F32_V16I16_V16I16_V8F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular>; + defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f16_16x16x16_f16", VOP_V16F16_V16F16_V16F16_V16F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel>; + defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_bf16_16x16x16_bf16", VOP_V16I16_V16I16_V16I16_V16I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel>; + defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu8", VOP_V8I32_V4I32_V4I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp>; + defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu4", VOP_V8I32_V2I32_V2I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp>; +} + +let WaveSizePredicate = isWave64 in { + defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_f16", VOP_V4F32_V16F16_V16F16_V4F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular>; + defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_bf16", VOP_V4F32_V16I16_V16I16_V4F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular>; + defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f16_16x16x16_f16", VOP_V8F16_V16F16_V16F16_V8F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel>; + defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_bf16_16x16x16_bf16", VOP_V8I16_V16I16_V16I16_V8I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel>; + defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu8", VOP_V4I32_V4I32_V4I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp>; + defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu4", VOP_V4I32_V2I32_V2I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp>; + +} + //===----------------------------------------------------------------------===// // Begin Real Encodings //===----------------------------------------------------------------------===// +class VOP3P_DPP16 op, VOP_DPP_Pseudo ps, int subtarget, + string opName = ps.OpName> + : VOP3P_DPP, SIMCInstr { + let hasSideEffects = ps.hasSideEffects; + let Defs = ps.Defs; + let SchedRW = ps.SchedRW; + let Uses = ps.Uses; + let AssemblerPredicate = HasDPP16; + let SubtargetPredicate = HasDPP16; + let OtherPredicates = ps.OtherPredicates; +} + +class VOP3P_DPP8_Base op, VOP_Pseudo ps, string opName = ps.OpName> + : VOP3P_DPP8 { + let hasSideEffects = ps.hasSideEffects; + let Defs = ps.Defs; + let SchedRW = ps.SchedRW; + let Uses = ps.Uses; + let OtherPredicates = ps.OtherPredicates; +} + +//===----------------------------------------------------------------------===// +// GFX11. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX11Plus, + DecoderNamespace = "GFX11" in { + + multiclass VOP3P_Real_gfx11 op, string backing_ps_name = NAME, + string asmName = !cast(NAME).Mnemonic> { + def _gfx11 : VOP3P_Real(backing_ps_name), + SIEncodingFamily.GFX11, asmName>, + VOP3Pe_gfx11(backing_ps_name).Pfl>; + } + + multiclass VOP3P_Real_dpp_gfx11 op, string backing_ps_name = NAME, + string asmName = !cast(NAME).Mnemonic> { + defvar ps = !cast(backing_ps_name); + def _dpp_gfx11 + : VOP3P_DPP16(backing_ps_name #"_dpp"), + SIEncodingFamily.GFX11> { + let AsmString = asmName #ps.Pfl.AsmVOP3DPP16; + let DecoderNamespace = "DPPGFX11"; + } + } + + multiclass VOP3P_Real_dpp8_gfx11 op, string backing_ps_name = NAME, + string asmName = !cast(NAME).Mnemonic> { + defvar ps = !cast(backing_ps_name); + def _dpp8_gfx11 : VOP3P_DPP8_Base { + let AsmString = asmName #ps.Pfl.AsmVOP3DPP8; + let DecoderNamespace = "DPP8GFX11"; + } + } + + multiclass VOP3P_Realtriple_gfx11 op, string backing_ps_name = NAME, + string asmName = !cast(NAME).Mnemonic> + : VOP3P_Real_gfx11, + VOP3P_Real_dpp_gfx11, + VOP3P_Real_dpp8_gfx11; +} // End AssemblerPredicate = isGFX11Plus, DecoderNamespace = "GFX11" + +defm V_DOT4_I32_IU8 : VOP3P_Real_gfx11 <0x16>; +defm V_DOT8_I32_IU4 : VOP3P_Real_gfx11 <0x18>; +defm V_DOT2_F32_BF16 : VOP3P_Real_gfx11 <0x1a>; + +multiclass VOP3P_Real_WMMA op> { + let WaveSizePredicate = isWave32, DecoderNamespace = "GFX11" in { + defm _twoaddr_w32 : VOP3P_Real_gfx11 ; + } + let WaveSizePredicate = isWave64, DecoderNamespace = "WMMAGFX11" in { + defm _twoaddr_w64 : VOP3P_Real_gfx11 ; + } +} + +defm V_WMMA_F32_16X16X16_F16 : VOP3P_Real_WMMA <0x040>; +defm V_WMMA_F32_16X16X16_BF16 : VOP3P_Real_WMMA <0x041>; +defm V_WMMA_F16_16X16X16_F16 : VOP3P_Real_WMMA <0x042>; +defm V_WMMA_BF16_16X16X16_BF16 : VOP3P_Real_WMMA <0x043>; +defm V_WMMA_I32_16X16X16_IU8 : VOP3P_Real_WMMA <0x044>; +defm V_WMMA_I32_16X16X16_IU4 : VOP3P_Real_WMMA <0x045>; + //===----------------------------------------------------------------------===// // GFX8 (VI) //===----------------------------------------------------------------------===// @@ -557,15 +957,64 @@ multiclass VOP3P_Real_MFMA_gfx90a op> { VOP3Pe_MAI (NAME # "_vgprcd" # "_e64").Pfl, 0>; } // End AssemblerPredicate = isGFX90AOnly, DecoderNamespace = "GFX90A" } +} -multiclass VOP3P_Real_MFMA op> : - VOP3P_Real_MFMA_gfx90a { +multiclass VOP3P_Real_MFMA_gfx940_aliases(Op # "_e64"), + VOP3_Pseudo PS_VCD = !cast(Op # "_vgprcd" # "_e64"), + VOPProfile Pfl_ACD = PS_ACD.Pfl, + VOPProfile Pfl_VCD = PS_VCD.Pfl> { + let Predicates = [isGFX940Plus] in { + foreach _ = BoolToList.ret in { + def : InstAlias (Op # "_gfx940_acd") Pfl_ACD.DstRC:$vdst, + Pfl_ACD.Src0RC64:$src0, Pfl_ACD.Src1RC64:$src1, Pfl_ACD.Src2RC64:$src2, + cbsz:$cbsz, abid:$abid, blgp:$blgp)>, PredicateControl; + def : InstAlias (Op # "_gfx940_vcd") Pfl_VCD.DstRC:$vdst, + Pfl_VCD.Src0RC64:$src0, Pfl_VCD.Src1RC64:$src1, Pfl_VCD.Src2RC64:$src2, + cbsz:$cbsz, abid:$abid, blgp:$blgp)>, PredicateControl; + } + } // End Predicates = [isGFX940Plus] +} + +multiclass VOP3P_Real_MFMA_gfx940 op, string Name = !cast(NAME#"_e64").Mnemonic, + VOP3_Pseudo PS_ACD = !cast(NAME # "_e64"), + VOP3_Pseudo PS_VCD = !cast(NAME # "_vgprcd" # "_e64")> { + let SubtargetPredicate = isGFX940Plus, + AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9", + AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in { + def _gfx940_acd : VOP3P_Real, + VOP3Pe_MAI ; + + def _gfx940_vcd : VOP3P_Real, + VOP3Pe_MAI ; + } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" + + defm : VOP3P_Real_MFMA_gfx940_aliases; + + foreach _ = BoolToList.ret in + defm : VOP3P_Real_MFMA_gfx940_aliases; +} + +multiclass VOP3P_Real_MFMA op, string GFX940Name = !cast(NAME#"_e64").Mnemonic> : + VOP3P_Real_MFMA_gfx90a , + VOP3P_Real_MFMA_gfx940 { def _vi : VOP3P_Real(NAME#"_e64"), SIEncodingFamily.VI>, VOP3Pe_MAI (NAME#"_e64").Pfl, ?> { let AssemblerPredicate = HasMAIInsts; let DecoderNamespace = "GFX8"; + let Constraints = ""; } } + +multiclass VOP3P_Real_SMFMAC op, string alias> { + def _gfx940 : VOP3P_Real(NAME#"_e64"), SIEncodingFamily.VI>, + VOP3Pe_SMFMAC { + let AssemblerPredicate = isGFX940Plus; + let DecoderNamespace = "GFX8"; + } + def : MnemonicAlias(NAME#"_e64").Mnemonic>; } defm V_PK_MAD_I16 : VOP3P_Real_vi <0x00>; @@ -634,19 +1083,21 @@ let SubtargetPredicate = HasMAIInsts in { defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x58>; defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x59>; -defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MFMA <0x40>; -defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MFMA <0x41>; -defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MFMA <0x42>; -defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MFMA <0x44>; -defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MFMA <0x45>; -defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MFMA <0x48>; -defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MFMA <0x49>; -defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MFMA <0x4a>; -defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MFMA <0x4c>; -defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MFMA <0x4d>; -defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MFMA <0x50>; -defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MFMA <0x51>; -defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MFMA <0x52>; +defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MFMA <0x40, "v_mfma_f32_32x32x1_2b_f32">; +defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MFMA <0x41, "v_mfma_f32_16x16x1_4b_f32">; +defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MFMA <0x42, "v_mfma_f32_4x4x1_16b_f32">; +defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MFMA <0x44, "v_mfma_f32_32x32x2_f32">; +defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MFMA <0x45, "v_mfma_f32_16x16x4_f32">; +defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MFMA <0x48, "v_mfma_f32_32x32x4_2b_f16">; +defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MFMA <0x49, "v_mfma_f32_16x16x4_4b_f16">; +defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MFMA <0x4a, "v_mfma_f32_4x4x4_16b_f16">; +defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MFMA <0x4c, "v_mfma_f32_32x32x8_f16">; +defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MFMA <0x4d, "v_mfma_f32_16x16x16_f16">; +defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MFMA <0x50, "v_mfma_i32_32x32x4_2b_i8">; +defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MFMA <0x51, "v_mfma_i32_16x16x4_4b_i8">; +defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MFMA <0x52, "v_mfma_i32_4x4x4_16b_i8">; + +let SubtargetPredicate = isGFX908orGFX90A in { defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MFMA <0x55>; defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MFMA <0x54>; defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MFMA <0x68>; @@ -654,6 +1105,7 @@ defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MFMA <0x69>; defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MFMA <0x6b>; defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MFMA <0x6c>; defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MFMA <0x6d>; +} } // End SubtargetPredicate = HasMAIInsts @@ -665,6 +1117,27 @@ defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx90a <0x67>; defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx90a <0x6e>; defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx90a <0x6f>; +defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">; +defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">; +defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">; +defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">; + +defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">; +defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">; +defm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5f, "v_mfma_f32_4x4x4_16b_bf16">; +defm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx940 <0x60, "v_mfma_f32_32x32x8_bf16">; +defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx940 <0x61, "v_mfma_f32_16x16x16_bf16">; + +defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx940 <0x6e, "v_mfma_f64_16x16x4_f64">; +defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx940 <0x6f, "v_mfma_f64_4x4x4_4b_f64">; + +defm V_SMFMAC_F32_16X16X32_F16 : VOP3P_Real_SMFMAC <0x62, "v_smfmac_f32_16x16x32f16">; +defm V_SMFMAC_F32_32X32X16_F16 : VOP3P_Real_SMFMAC <0x64, "v_smfmac_f32_32x32x16f16">; +defm V_SMFMAC_F32_16X16X32_BF16 : VOP3P_Real_SMFMAC <0x66, "v_smfmac_f32_16x16x32bf16">; +defm V_SMFMAC_F32_32X32X16_BF16 : VOP3P_Real_SMFMAC <0x68, "v_smfmac_f32_32x32x16bf16">; +defm V_SMFMAC_I32_16X16X64_I8 : VOP3P_Real_SMFMAC <0x6a, "v_smfmac_i32_16x16x64i8">; +defm V_SMFMAC_I32_32X32X32_I8 : VOP3P_Real_SMFMAC <0x6c, "v_smfmac_i32_32x32x32i8">; + let SubtargetPredicate = HasPackedFP32Ops in { defm V_PK_FMA_F32 : VOP3P_Real_vi <0x30>; defm V_PK_MUL_F32 : VOP3P_Real_vi <0x31>; @@ -676,35 +1149,41 @@ let SubtargetPredicate = HasPackedFP32Ops in { // GFX10. //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10", VOP3P = 1 in { +let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10", VOP3P = 1 in { multiclass VOP3P_Real_gfx10 op> { def _gfx10 : VOP3P_Real(NAME), SIEncodingFamily.GFX10>, VOP3Pe_gfx10 (NAME).Pfl>; } -} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10", VOP3P = 1 - -defm V_PK_MAD_I16 : VOP3P_Real_gfx10<0x00>; -defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10<0x01>; -defm V_PK_ADD_I16 : VOP3P_Real_gfx10<0x02>; -defm V_PK_SUB_I16 : VOP3P_Real_gfx10<0x03>; -defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x04>; -defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x05>; -defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x06>; -defm V_PK_MAX_I16 : VOP3P_Real_gfx10<0x07>; -defm V_PK_MIN_I16 : VOP3P_Real_gfx10<0x08>; -defm V_PK_MAD_U16 : VOP3P_Real_gfx10<0x09>; -defm V_PK_ADD_U16 : VOP3P_Real_gfx10<0x0a>; -defm V_PK_SUB_U16 : VOP3P_Real_gfx10<0x0b>; -defm V_PK_MAX_U16 : VOP3P_Real_gfx10<0x0c>; -defm V_PK_MIN_U16 : VOP3P_Real_gfx10<0x0d>; -defm V_PK_FMA_F16 : VOP3P_Real_gfx10<0x0e>; -defm V_PK_ADD_F16 : VOP3P_Real_gfx10<0x0f>; -defm V_PK_MUL_F16 : VOP3P_Real_gfx10<0x10>; -defm V_PK_MIN_F16 : VOP3P_Real_gfx10<0x11>; -defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x12>; -defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x20>; -defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x21>; -defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x22>; +} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10", VOP3P = 1 + +multiclass VOP3P_Real_gfx10_gfx11 op> + : VOP3P_Real_gfx10, VOP3P_Real_gfx11; + +multiclass VOP3P_Real_gfx10_gfx11_Triple op> + : VOP3P_Real_gfx10, VOP3P_Realtriple_gfx11; + +defm V_PK_MAD_I16 : VOP3P_Real_gfx10_gfx11<0x00>; +defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10_gfx11<0x01>; +defm V_PK_ADD_I16 : VOP3P_Real_gfx10_gfx11<0x02>; +defm V_PK_SUB_I16 : VOP3P_Real_gfx10_gfx11<0x03>; +defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10_gfx11<0x04>; +defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10_gfx11<0x05>; +defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10_gfx11<0x06>; +defm V_PK_MAX_I16 : VOP3P_Real_gfx10_gfx11<0x07>; +defm V_PK_MIN_I16 : VOP3P_Real_gfx10_gfx11<0x08>; +defm V_PK_MAD_U16 : VOP3P_Real_gfx10_gfx11<0x09>; +defm V_PK_ADD_U16 : VOP3P_Real_gfx10_gfx11<0x0a>; +defm V_PK_SUB_U16 : VOP3P_Real_gfx10_gfx11<0x0b>; +defm V_PK_MAX_U16 : VOP3P_Real_gfx10_gfx11<0x0c>; +defm V_PK_MIN_U16 : VOP3P_Real_gfx10_gfx11<0x0d>; +defm V_PK_FMA_F16 : VOP3P_Real_gfx10_gfx11<0x0e>; +defm V_PK_ADD_F16 : VOP3P_Real_gfx10_gfx11<0x0f>; +defm V_PK_MUL_F16 : VOP3P_Real_gfx10_gfx11<0x10>; +defm V_PK_MIN_F16 : VOP3P_Real_gfx10_gfx11<0x11>; +defm V_PK_MAX_F16 : VOP3P_Real_gfx10_gfx11<0x12>; +defm V_FMA_MIX_F32 : VOP3P_Real_gfx10_gfx11_Triple <0x20>; +defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x21>; +defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x22>; let SubtargetPredicate = HasDot2Insts in { @@ -715,9 +1194,9 @@ defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>; let SubtargetPredicate = HasDot7Insts in { -defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x13>; -defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x17>; -defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x19>; +defm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x13>; +defm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11 <0x17>; +defm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11 <0x19>; } // End SubtargetPredicate = HasDot7Insts diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index c0cc91029d11..eb6c54a45263 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -49,12 +49,36 @@ class VOPC_SDWA9e op, VOPProfile P> : VOP_SDWA9Be

{ // an explicit $dst. class VOPC_Profile sched, ValueType vt0, ValueType vt1 = vt0> : VOPProfile <[i1, vt0, vt1, untyped]> { + // We want to exclude instructions with 64bit operands + let HasExtDPP = getHasVOP3DPP.ret; let Asm32 = "$src0, $src1"; + + let AsmDPP = !if (HasModifiers, + "$src0_modifiers, $src1_modifiers " + "$dpp_ctrl$row_mask$bank_mask$bound_ctrl", + "$src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"); + let AsmDPP8 = "$src0, $src1 $dpp8$fi"; + let AsmDPP16 = AsmDPP#"$fi"; + let InsDPP = getInsDPP, Src0DPP, Src1DPP, Src2DPP, + NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, + Src2ModDPP>.ret; + let InsDPP16 = getInsDPP16, Src0DPP, Src1DPP, Src2DPP, + NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, + Src2ModDPP>.ret; + let InsDPP8 = getInsDPP8, Src0DPP, Src1DPP, Src2DPP, + NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, + Src2ModDPP>.ret; + // The destination for 32-bit encoding is implicit. let HasDst32 = 0; // VOPC disallows dst_sel and dst_unused as they have no effect on destination let EmitDstSel = 0; let Outs64 = (outs VOPDstS64orS32:$sdst); + let OutsVOP3DPP = Outs64; + let OutsVOP3DPP8 = Outs64; + let InsVOP3DPP = getInsVOP3DPP.ret; + let InsVOP3DPP16 = getInsVOP3DPP16.ret; + let InsVOP3DPP8 = getInsVOP3DPP8.ret; list Schedule = sched; } @@ -62,12 +86,15 @@ class VOPC_NoSdst_Profile sched, ValueType vt0, ValueType vt1 = vt0> : VOPC_Profile { let Outs64 = (outs ); + let OutsVOP3DPP = Outs64; + let OutsVOP3DPP8 = Outs64; let OutsSDWA = (outs ); let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, src0_sel:$src0_sel, src1_sel:$src1_sel); let Asm64 = !if(isFloatType.ret, "$src0_modifiers, $src1_modifiers$clamp", "$src0, $src1"); + let AsmVOP3DPPBase = Asm64; let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel"; let EmitDst = 0; } @@ -100,8 +127,8 @@ class VOPC_Pseudo pattern=[], VOPProfile Pfl = P; } -class VOPC_Real : - InstSI , +class VOPC_Real : + InstSI , SIMCInstr { let VALU = 1; @@ -133,8 +160,9 @@ class VOPC_SDWA_Pseudo pattern=[]> : // This class is used only with VOPC instructions. Use $sdst for out operand class VOPCInstAlias : - InstAlias , PredicateControl { + string Asm32 = ps.Pfl.Asm32, string real_name = ps.OpName, + VOPProfile p = ps.Pfl> : + InstAlias , PredicateControl { field bit isCompare; field bit isCommutable; @@ -167,27 +195,32 @@ class VOPCInstAlias { - def : VOPCInstAlias (OpName#"_e64"), - !cast(OpName#"_e32_"#Arch)>; +multiclass VOPCInstAliases { + def : VOPCInstAlias (old_name#"_e64"), + !cast(real_name#"_e32_"#Arch), + !cast(old_name#"_e64").Pfl.Asm32, + real_name>; let WaveSizePredicate = isWave32 in { - def : VOPCInstAlias (OpName#"_e64"), - !cast(OpName#"_e32_"#Arch), - "vcc_lo, "#!cast(OpName#"_e64").Pfl.Asm32>; + def : VOPCInstAlias (old_name#"_e64"), + !cast(real_name#"_e32_"#Arch), + "vcc_lo, "#!cast(old_name#"_e64").Pfl.Asm32, + real_name>; } let WaveSizePredicate = isWave64 in { - def : VOPCInstAlias (OpName#"_e64"), - !cast(OpName#"_e32_"#Arch), - "vcc, "#!cast(OpName#"_e64").Pfl.Asm32>; + def : VOPCInstAlias (old_name#"_e64"), + !cast(real_name#"_e32_"#Arch), + "vcc, "#!cast(old_name#"_e64").Pfl.Asm32, + real_name>; } } -multiclass VOPCXInstAliases { - def : VOPCInstAlias (OpName#"_e64"), - !cast(OpName#"_e32_"#Arch)>; +multiclass VOPCXInstAliases { + def : VOPCInstAlias (old_name#"_e64"), + !cast(real_name#"_e32_"#Arch), + !cast(old_name#"_e64").Pfl.Asm32, + real_name>; } - class getVOPCPat64 : LetDummies { list ret = !if(P.HasModifiers, [(set i1:$sdst, @@ -205,6 +238,11 @@ class VCMPXNoSDstTable { string NoSDstOp = Name; } +class VCMPVCMPXTable { + bit IsVCMPX = 0; + string VCMPOp = Name; +} + multiclass VOPC_Pseudos , Commutable_REV, - VCMPXNoSDstTable<1, opName#"_e32"> { + VCMPXNoSDstTable<1, opName#"_e32">, + VCMPVCMPXTable { let Defs = !if(DefExec, [VCC, EXEC], [VCC]); let SchedRW = P.Schedule; let isConvergent = DefExec; @@ -223,7 +262,8 @@ multiclass VOPC_Pseudos .ret>, Commutable_REV, - VCMPXNoSDstTable<1, opName#"_e64"> { + VCMPXNoSDstTable<1, opName#"_e64">, + VCMPVCMPXTable { let Defs = !if(DefExec, [EXEC], []); let SchedRW = P.Schedule; let isCompare = 1; @@ -237,6 +277,26 @@ multiclass VOPC_Pseudos { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let SchedRW = P.Schedule; + let isConvergent = DefExec; + let isCompare = 1; + let VOPC = 1; + let Constraints = ""; + } + if P.HasExtVOP3DPP then + def _e64_dpp : VOP3_DPP_Pseudo { + let Defs = !if(DefExec, [EXEC], []); + let SchedRW = P.Schedule; + let isCompare = 1; + let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $sdst", ""); + } + } // end SubtargetPredicate = isGFX11Plus + } let SubtargetPredicate = HasSdstCMPX in { @@ -248,23 +308,27 @@ multiclass VOPCX_Pseudos , Commutable_REV, - VCMPXNoSDstTable<0, opName#"_e32"> { + VCMPXNoSDstTable<0, opName#"_e32">, + VCMPVCMPXTable { let Defs = [EXEC]; let SchedRW = P_NoSDst.Schedule; let isConvergent = 1; let isCompare = 1; let isCommutable = 1; let SubtargetPredicate = HasNoSdstCMPX; + let IsVCMPX = 1; } def _nosdst_e64 : VOP3_Pseudo, Commutable_REV, - VCMPXNoSDstTable<0, opName#"_e64"> { + VCMPXNoSDstTable<0, opName#"_e64">, + VCMPVCMPXTable { let Defs = [EXEC]; let SchedRW = P_NoSDst.Schedule; let isCompare = 1; let isCommutable = 1; let SubtargetPredicate = HasNoSdstCMPX; + let IsVCMPX = 1; } foreach _ = BoolToList.ret in @@ -275,6 +339,25 @@ multiclass VOPCX_Pseudos { + let Defs = [EXEC]; + let SchedRW = P_NoSDst.Schedule; + let isConvergent = 1; + let isCompare = 1; + let VOPC = 1; + let Constraints = ""; + } + if P.HasExtVOP3DPP then + def _nosdst_e64_dpp : VOP3_DPP_Pseudo { + let Defs = [EXEC]; + let SchedRW = P_NoSDst.Schedule; + let isCompare = 1; + let Constraints = ""; + } + } // end SubtargetPredicate = isGFX11Plus } } // End SubtargetPredicate = HasSdstCMPX @@ -626,8 +709,18 @@ defm V_CMPX_T_U64 : VOPCX_I64 <"v_cmpx_t_u64">; class VOPC_Class_Profile sched, ValueType vt> : VOPC_Profile { + let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; + let AsmDPP16 = AsmDPP#"$fi"; + let InsDPP = (ins VGPR_32:$old, FPVRegInputMods:$src0_modifiers, VGPR_32:$src0, VGPR_32:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsDPP16 = !con(InsDPP, (ins FI:$fi)); + // DPP8 forbids modifiers and can inherit from VOPC_Profile + let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1); + dag InsPartVOP3DPP = (ins Src0Mod:$src0_modifiers, VGPRSrc_32:$src0, VGPRSrc_32:$src1); + let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel), + (ins))); let Asm64 = "$sdst, $src0_modifiers, $src1"; + let AsmVOP3DPPBase = Asm64; let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, @@ -647,6 +740,7 @@ class VOPC_Class_NoSdst_Profile sched, ValueType vt> : Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, src0_sel:$src0_sel, src1_sel:$src1_sel); let Asm64 = "$src0_modifiers, $src1"; + let AsmVOP3DPPBase = Asm64; let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel"; let EmitDst = 0; } @@ -684,6 +778,24 @@ multiclass VOPC_Class_Pseudos { + let Defs = !if(DefExec, !if(DefVcc, [VCC, EXEC], [EXEC]), + !if(DefVcc, [VCC], [])); + let SchedRW = p.Schedule; + let isConvergent = DefExec; + let VOPC = 1; + let Constraints = ""; + } + if p.HasExtVOP3DPP then + def _e64_dpp : VOP3_DPP_Pseudo { + let Defs = !if(DefExec, [EXEC], []); + let SchedRW = p.Schedule; + let Constraints = !if(p.NumSrcArgs, p.TieRegDPP # " = $sdst", ""); + } + } // end SubtargetPredicate = isGFX11Plus } let SubtargetPredicate = HasSdstCMPX in { @@ -714,6 +826,23 @@ multiclass VOPCX_Class_Pseudos { + let Defs = [EXEC]; + let SchedRW = P_NoSDst.Schedule; + let isConvergent = 1; + let VOPC = 1; + let Constraints = ""; + } + if P.HasExtVOP3DPP then + def _nosdst_e64_dpp : VOP3_DPP_Pseudo { + let Defs = [EXEC]; + let SchedRW = P_NoSDst.Schedule; + let Constraints = ""; + } + } // end SubtargetPredicate = isGFX11Plus } } // End SubtargetPredicate = HasSdstCMPX @@ -871,15 +1000,677 @@ defm : FCMP_Pattern ; defm : FCMP_Pattern ; defm : FCMP_Pattern ; +//===----------------------------------------------------------------------===// +// DPP Encodings +//===----------------------------------------------------------------------===// + +// VOPC32 + +class VOPC_DPPe_Common op> : Enc64 { + bits<8> src1; + let Inst{16-9} = src1; + let Inst{24-17} = op; + let Inst{31-25} = 0x3e; +} + +class VOPC_DPP_Base op, string OpName, VOPProfile P> + : VOP_DPP_Base, + VOPC_DPPe_Common { + bits<2> src0_modifiers; + bits<8> src0; + bits<2> src1_modifiers; + bits<9> dpp_ctrl; + bits<1> bound_ctrl; + bits<4> bank_mask; + bits<4> row_mask; + bit fi; + + let Inst{8-0} = 0xfa; + + let Inst{39-32} = !if (P.HasSrc0, src0{7-0}, 0); + let Inst{48-40} = dpp_ctrl; + let Inst{50} = fi; + let Inst{51} = bound_ctrl; + let Inst{52} = !if (P.HasSrc0Mods, src0_modifiers{0}, 0); // src0_neg + let Inst{53} = !if (P.HasSrc0Mods, src0_modifiers{1}, 0); // src0_abs + let Inst{54} = !if (P.HasSrc1Mods, src1_modifiers{0}, 0); // src1_neg + let Inst{55} = !if (P.HasSrc1Mods, src1_modifiers{1}, 0); // src1_abs + let Inst{59-56} = bank_mask; + let Inst{63-60} = row_mask; + + let AsmMatchConverter = "cvtDPP"; + let VOPC = 1; +} + +class VOPC_DPP8_Base op, string OpName, VOPProfile P> + : VOP_DPP8_Base, + VOPC_DPPe_Common { + bits<8> src0; + bits<24> dpp8; + bits<9> fi; + + let Inst{8-0} = fi; + + let Inst{39-32} = !if (P.HasSrc0, src0{7-0}, 0); + let Inst{63-40} = dpp8{23-0}; + + let AsmMatchConverter = "cvtDPP8"; + let VOPC = 1; +} + +class VOPC_DPP16 op, VOP_DPP_Pseudo ps, string opName = ps.OpName> + : VOPC_DPP_Base { + let AssemblerPredicate = HasDPP16; + let SubtargetPredicate = HasDPP16; + let hasSideEffects = ps.hasSideEffects; + let Defs = ps.Defs; + let SchedRW = ps.SchedRW; + let Uses = ps.Uses; + let OtherPredicates = ps.OtherPredicates; + let Constraints = ps.Constraints; + let AsmMatchConverter = "cvtVOPCNoDstDPP"; +} + +class VOPC_DPP16_SIMC op, VOP_DPP_Pseudo ps, int subtarget, + string opName = ps.OpName> + : VOPC_DPP16, SIMCInstr; + +class VOPC_DPP8 op, VOPC_Pseudo ps, string opName = ps.OpName> + : VOPC_DPP8_Base { + // Note ps is the non-dpp pseudo + let hasSideEffects = ps.hasSideEffects; + let Defs = ps.Defs; + let SchedRW = ps.SchedRW; + let Uses = ps.Uses; + let OtherPredicates = ps.OtherPredicates; + let Constraints = ""; + let AsmMatchConverter = "cvtVOPCNoDstDPP8"; +} + +// VOPC64 + +class VOPC64_DPP_Base op, string OpName, VOPProfile P> + : VOP3_DPP_Base, VOP3_DPPe_Common { + Instruction Opcode = !cast(NAME); + + bits<8> src0; + bits<9> dpp_ctrl; + bits<1> bound_ctrl; + bits<4> bank_mask; + bits<4> row_mask; + bit fi; + + let Inst{40-32} = 0xfa; + let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{80-72} = dpp_ctrl; + let Inst{82} = fi; + let Inst{83} = bound_ctrl; + // Inst{87-84} ignored by hw + let Inst{91-88} = bank_mask; + let Inst{95-92} = row_mask; + +} + +class VOPC64_DPP16 op, VOP_DPP_Pseudo ps, string opName = ps.OpName> + : VOPC64_DPP_Base { + let AssemblerPredicate = HasDPP16; + let SubtargetPredicate = HasDPP16; + let hasSideEffects = ps.hasSideEffects; + let Defs = ps.Defs; + let SchedRW = ps.SchedRW; + let Uses = ps.Uses; + let OtherPredicates = ps.OtherPredicates; + let Constraints = ps.Constraints; +} + +class VOPC64_DPP16_Dst op, VOP_DPP_Pseudo ps, + string opName = ps.OpName> + : VOPC64_DPP16 { + bits<8> sdst; + let Inst{7-0} = sdst; +} + +class VOPC64_DPP16_NoDst op, VOP_DPP_Pseudo ps, + string opName = ps.OpName> + : VOPC64_DPP16 { + let Inst{7-0} = ? ; + let AsmMatchConverter = "cvtVOPC64NoDstDPP"; +} + +class VOPC64_DPP8_Base op, string OpName, VOPProfile P> + : VOP3_DPP8_Base, VOP3_DPPe_Common { + Instruction Opcode = !cast(NAME); + + bits<8> src0; + bits<24> dpp8; + bits<9> fi; + + let Inst{40-32} = fi; + let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{95-72} = dpp8{23-0}; + +} + +class VOPC64_DPP8 op, VOP_Pseudo ps, string opName = ps.OpName> + : VOPC64_DPP8_Base { + // Note ps is the non-dpp pseudo + let hasSideEffects = ps.hasSideEffects; + let Defs = ps.Defs; + let SchedRW = ps.SchedRW; + let Uses = ps.Uses; + let OtherPredicates = ps.OtherPredicates; +} + +class VOPC64_DPP8_Dst op, VOP_Pseudo ps, string opName = ps.OpName> + : VOPC64_DPP8 { + bits<8> sdst; + let Inst{7-0} = sdst; + let Constraints = "$old = $sdst"; +} + +class VOPC64_DPP8_NoDst op, VOP_Pseudo ps, string opName = ps.OpName> + : VOPC64_DPP8 { + let Inst{7-0} = ? ; + let AsmMatchConverter = "cvtVOPC64NoDstDPP8"; + let Constraints = ""; +} + //===----------------------------------------------------------------------===// // Target-specific instruction encodings. //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// GFX11. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX11Only in { + multiclass VOPC_Real_gfx11 op> { + defvar ps32 = !cast(NAME#"_e32"); + defvar ps64 = !cast(NAME#"_e64"); + let DecoderNamespace = "GFX11" in { + def _e32_gfx11 : VOPC_Real, + VOPCe; + def _e64_gfx11 : VOP3_Real, + VOP3a_gfx11<{0, op}, ps64.Pfl> { + // Encoding used for VOPC instructions encoded as VOP3 differs from + // VOP3e by destination name (sdst) as VOPC doesn't have vector dst. + bits<8> sdst; + let Inst{7-0} = sdst; + } + } // End DecoderNamespace = "GFX11" + + defm : VOPCInstAliases; + + foreach _ = BoolToList.ret in { + defvar psDPP = !cast(NAME #"_e32" #"_dpp"); + defvar AsmDPP = ps32.Pfl.AsmDPP16; + let DecoderNamespace = "DPPGFX11" in { + def _e32_dpp_gfx11 : VOPC_DPP16_SIMC; + def _e32_dpp_w32_gfx11 : VOPC_DPP16 { + let AsmString = psDPP.OpName # " vcc_lo, " # AsmDPP; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + def _e32_dpp_w64_gfx11 : VOPC_DPP16 { + let AsmString = psDPP.OpName # " vcc, " # AsmDPP; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + defvar AsmDPP8 = ps32.Pfl.AsmDPP8; + let DecoderNamespace = "DPP8GFX11" in { + def _e32_dpp8_gfx11 : VOPC_DPP8; + def _e32_dpp8_w32_gfx11 : VOPC_DPP8 { + let AsmString = ps32.OpName # " vcc_lo, " # AsmDPP8; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + def _e32_dpp8_w64_gfx11 : VOPC_DPP8 { + let AsmString = ps32.OpName # " vcc, " # AsmDPP8; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + } + foreach _ = BoolToList.ret in { + defvar psDPP = !cast(NAME #"_e64" #"_dpp"); + defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16; + let DecoderNamespace = "DPPGFX11" in { + def _e64_dpp_gfx11 : VOPC64_DPP16_Dst<{0, op}, psDPP>, + SIMCInstr; + def _e64_dpp_w32_gfx11 : VOPC64_DPP16_Dst<{0, op}, psDPP> { + let AsmString = psDPP.OpName # " vcc_lo, " # AsmDPP; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + def _e64_dpp_w64_gfx11 : VOPC64_DPP16_Dst<{0, op}, psDPP> { + let AsmString = psDPP.OpName # " vcc, " # AsmDPP; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8; + let DecoderNamespace = "DPP8GFX11" in { + def _e64_dpp8_gfx11 : VOPC64_DPP8_Dst<{0, op}, ps64>; + def _e64_dpp8_w32_gfx11 : VOPC64_DPP8_Dst<{0, op}, ps64> { + let AsmString = ps32.OpName # " vcc_lo, " # AsmDPP8; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + def _e64_dpp8_w64_gfx11 : VOPC64_DPP8_Dst<{0, op}, ps64> { + let AsmString = ps32.OpName # " vcc, " # AsmDPP8; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + } + + } + + multiclass VOPC_Real_with_name_gfx11 op, string OpName, + string asm_name> { + defvar ps32 = !cast(OpName#"_e32"); + defvar ps64 = !cast(OpName#"_e64"); + let DecoderNamespace = "GFX11" in { + def _e32_gfx11 : + // 32 and 64 bit forms of the instruction have _e32 and _e64 + // respectively appended to their assembly mnemonic. + // _e64 is printed as part of the VOPDstS64orS32 operand, whereas + // the destination-less 32bit forms add it to the asmString here. + VOPC_Real, + VOPCe, + MnemonicAlias, Requires<[isGFX11Plus]>; + def _e64_gfx11 : + VOP3_Real, + VOP3a_gfx11<{0, op}, ps64.Pfl>, + MnemonicAlias, Requires<[isGFX11Plus]> { + // Encoding used for VOPC instructions encoded as VOP3 differs from + // VOP3e by destination name (sdst) as VOPC doesn't have vector dst. + bits<8> sdst; + let Inst{7-0} = sdst; + } + } // End DecoderNamespace = "GFX11" + + defm : VOPCInstAliases; + + foreach _ = BoolToList.ret in { + defvar psDPP = !cast(OpName #"_e32" #"_dpp"); + defvar AsmDPP = ps32.Pfl.AsmDPP16; + let DecoderNamespace = "DPPGFX11" in { + def _e32_dpp_gfx11 : VOPC_DPP16_SIMC; + def _e32_dpp_w32_gfx11 + : VOPC_DPP16 { + let AsmString = asm_name # " vcc_lo, " # AsmDPP; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + def _e32_dpp_w64_gfx11 + : VOPC_DPP16 { + let AsmString = asm_name # " vcc, " # AsmDPP; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + defvar AsmDPP8 = ps32.Pfl.AsmDPP8; + let DecoderNamespace = "DPP8GFX11" in { + def _e32_dpp8_gfx11 : VOPC_DPP8; + def _e32_dpp8_w32_gfx11 + : VOPC_DPP8 { + let AsmString = asm_name # " vcc_lo, " # AsmDPP8; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + def _e32_dpp8_w64_gfx11 + : VOPC_DPP8 { + let AsmString = asm_name # " vcc, " # AsmDPP8; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + } + + foreach _ = BoolToList.ret in { + defvar psDPP = !cast(OpName #"_e64" #"_dpp"); + defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16; + let DecoderNamespace = "DPPGFX11" in { + def _e64_dpp_gfx11 : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>, + SIMCInstr; + def _e64_dpp_w32_gfx11 + : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> { + let AsmString = asm_name # " vcc_lo, " # AsmDPP; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + def _e64_dpp_w64_gfx11 + : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> { + let AsmString = asm_name # " vcc, " # AsmDPP; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8; + let DecoderNamespace = "DPP8GFX11" in { + def _e64_dpp8_gfx11 : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>; + def _e64_dpp8_w32_gfx11 + : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> { + let AsmString = asm_name # " vcc_lo, " # AsmDPP8; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + def _e64_dpp8_w64_gfx11 + : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> { + let AsmString = asm_name # " vcc, " # AsmDPP8; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + } + + } + + multiclass VOPCX_Real_gfx11 op> { + defvar ps32 = !cast(NAME#"_nosdst_e32"); + defvar ps64 = !cast(NAME#"_nosdst_e64"); + let DecoderNamespace = "GFX11" in { + def _e32_gfx11 : + VOPC_Real, + VOPCe { + let AsmString = !subst("_nosdst", "", ps32.PseudoInstr) + # " " # ps32.AsmOperands; + } + def _e64_gfx11 : + VOP3_Real, + VOP3a_gfx11<{0, op}, ps64.Pfl> { + let Inst{7-0} = ?; // sdst + let AsmString = !subst("_nosdst", "", ps64.Mnemonic) + # "{_e64} " # ps64.AsmOperands; + } + } // End DecoderNamespace = "GFX11" + + defm : VOPCXInstAliases; + + foreach _ = BoolToList.ret in { + defvar psDPP = !cast(NAME #"_nosdst_e32" #"_dpp"); + defvar AsmDPP = ps32.Pfl.AsmDPP16; + let DecoderNamespace = "DPPGFX11" in { + def _e32_dpp_gfx11 + : VOPC_DPP16_SIMC { + let AsmString = !subst("_nosdst", "", psDPP.OpName) # " " # AsmDPP; + } + } + defvar AsmDPP8 = ps32.Pfl.AsmDPP8; + let DecoderNamespace = "DPP8GFX11" in { + def _e32_dpp8_gfx11 : VOPC_DPP8 { + let AsmString = !subst("_nosdst", "", ps32.OpName) # " " # AsmDPP8; + } + } + } + + foreach _ = BoolToList.ret in { + defvar psDPP = !cast(NAME #"_nosdst_e64" #"_dpp"); + defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16; + let DecoderNamespace = "DPPGFX11" in { + def _e64_dpp_gfx11 + : VOPC64_DPP16_NoDst<{0, op}, psDPP>, + SIMCInstr { + let AsmString = !subst("_nosdst", "", psDPP.OpName) + # "{_e64_dpp} " # AsmDPP; + } + } + defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8; + let DecoderNamespace = "DPP8GFX11" in { + def _e64_dpp8_gfx11 : VOPC64_DPP8_NoDst<{0, op}, ps64> { + let AsmString = !subst("_nosdst", "", ps64.OpName) + # "{_e64_dpp} " # AsmDPP8; + } + } + } + } + + multiclass VOPCX_Real_with_name_gfx11 op, string OpName, + string asm_name> { + defvar ps32 = !cast(OpName#"_nosdst_e32"); + defvar ps64 = !cast(OpName#"_nosdst_e64"); + let DecoderNamespace = "GFX11" in { + def _e32_gfx11 + : VOPC_Real, + MnemonicAlias, + Requires<[isGFX11Plus]>, + VOPCe { + let AsmString = asm_name # "{_e32} " # ps32.AsmOperands; + } + def _e64_gfx11 + : VOP3_Real, + MnemonicAlias, + Requires<[isGFX11Plus]>, + VOP3a_gfx11<{0, op}, ps64.Pfl> { + let Inst{7-0} = ? ; // sdst + let AsmString = asm_name # "{_e64} " # ps64.AsmOperands; + } + } // End DecoderNamespace = "GFX11" + + defm : VOPCXInstAliases; + + foreach _ = BoolToList.ret in { + defvar psDPP = !cast(OpName#"_nosdst_e32"#"_dpp"); + let DecoderNamespace = "DPPGFX11" in { + def _e32_dpp_gfx11 : VOPC_DPP16_SIMC; + } + let DecoderNamespace = "DPP8GFX11" in { + def _e32_dpp8_gfx11 : VOPC_DPP8; + } + } + foreach _ = BoolToList.ret in { + defvar psDPP = !cast(OpName#"_nosdst_e64"#"_dpp"); + defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16; + let DecoderNamespace = "DPPGFX11" in { + def _e64_dpp_gfx11 + : VOPC64_DPP16_NoDst<{0, op}, psDPP, asm_name>, + SIMCInstr { + let AsmString = asm_name # "{_e64_dpp} " # AsmDPP; + } + } + defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8; + let DecoderNamespace = "DPP8GFX11" in { + def _e64_dpp8_gfx11 : VOPC64_DPP8_NoDst<{0, op}, ps64, asm_name> { + let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8; + } + } + } + + } +} // End AssemblerPredicate = isGFX11Only + +defm V_CMP_F_F16 : VOPC_Real_gfx11<0x000>; +defm V_CMP_LT_F16 : VOPC_Real_gfx11<0x001>; +defm V_CMP_EQ_F16 : VOPC_Real_gfx11<0x002>; +defm V_CMP_LE_F16 : VOPC_Real_gfx11<0x003>; +defm V_CMP_GT_F16 : VOPC_Real_gfx11<0x004>; +defm V_CMP_LG_F16 : VOPC_Real_gfx11<0x005>; +defm V_CMP_GE_F16 : VOPC_Real_gfx11<0x006>; +defm V_CMP_O_F16 : VOPC_Real_gfx11<0x007>; +defm V_CMP_U_F16 : VOPC_Real_gfx11<0x008>; +defm V_CMP_NGE_F16 : VOPC_Real_gfx11<0x009>; +defm V_CMP_NLG_F16 : VOPC_Real_gfx11<0x00a>; +defm V_CMP_NGT_F16 : VOPC_Real_gfx11<0x00b>; +defm V_CMP_NLE_F16 : VOPC_Real_gfx11<0x00c>; +defm V_CMP_NEQ_F16 : VOPC_Real_gfx11<0x00d>; +defm V_CMP_NLT_F16 : VOPC_Real_gfx11<0x00e>; +defm V_CMP_T_F16 : VOPC_Real_with_name_gfx11<0x00f, "V_CMP_TRU_F16", "v_cmp_t_f16">; +defm V_CMP_F_F32 : VOPC_Real_gfx11<0x010>; +defm V_CMP_LT_F32 : VOPC_Real_gfx11<0x011>; +defm V_CMP_EQ_F32 : VOPC_Real_gfx11<0x012>; +defm V_CMP_LE_F32 : VOPC_Real_gfx11<0x013>; +defm V_CMP_GT_F32 : VOPC_Real_gfx11<0x014>; +defm V_CMP_LG_F32 : VOPC_Real_gfx11<0x015>; +defm V_CMP_GE_F32 : VOPC_Real_gfx11<0x016>; +defm V_CMP_O_F32 : VOPC_Real_gfx11<0x017>; +defm V_CMP_U_F32 : VOPC_Real_gfx11<0x018>; +defm V_CMP_NGE_F32 : VOPC_Real_gfx11<0x019>; +defm V_CMP_NLG_F32 : VOPC_Real_gfx11<0x01a>; +defm V_CMP_NGT_F32 : VOPC_Real_gfx11<0x01b>; +defm V_CMP_NLE_F32 : VOPC_Real_gfx11<0x01c>; +defm V_CMP_NEQ_F32 : VOPC_Real_gfx11<0x01d>; +defm V_CMP_NLT_F32 : VOPC_Real_gfx11<0x01e>; +defm V_CMP_T_F32 : VOPC_Real_with_name_gfx11<0x01f, "V_CMP_TRU_F32", "v_cmp_t_f32">; +defm V_CMP_T_F64 : VOPC_Real_with_name_gfx11<0x02f, "V_CMP_TRU_F64", "v_cmp_t_f64">; +defm V_CMP_LT_I16 : VOPC_Real_gfx11<0x031>; +defm V_CMP_EQ_I16 : VOPC_Real_gfx11<0x032>; +defm V_CMP_LE_I16 : VOPC_Real_gfx11<0x033>; +defm V_CMP_GT_I16 : VOPC_Real_gfx11<0x034>; +defm V_CMP_NE_I16 : VOPC_Real_gfx11<0x035>; +defm V_CMP_GE_I16 : VOPC_Real_gfx11<0x036>; +defm V_CMP_LT_U16 : VOPC_Real_gfx11<0x039>; +defm V_CMP_EQ_U16 : VOPC_Real_gfx11<0x03a>; +defm V_CMP_LE_U16 : VOPC_Real_gfx11<0x03b>; +defm V_CMP_GT_U16 : VOPC_Real_gfx11<0x03c>; +defm V_CMP_NE_U16 : VOPC_Real_gfx11<0x03d>; +defm V_CMP_GE_U16 : VOPC_Real_gfx11<0x03e>; +defm V_CMP_F_I32 : VOPC_Real_gfx11<0x040>; +defm V_CMP_LT_I32 : VOPC_Real_gfx11<0x041>; +defm V_CMP_EQ_I32 : VOPC_Real_gfx11<0x042>; +defm V_CMP_LE_I32 : VOPC_Real_gfx11<0x043>; +defm V_CMP_GT_I32 : VOPC_Real_gfx11<0x044>; +defm V_CMP_NE_I32 : VOPC_Real_gfx11<0x045>; +defm V_CMP_GE_I32 : VOPC_Real_gfx11<0x046>; +defm V_CMP_T_I32 : VOPC_Real_gfx11<0x047>; +defm V_CMP_F_U32 : VOPC_Real_gfx11<0x048>; +defm V_CMP_LT_U32 : VOPC_Real_gfx11<0x049>; +defm V_CMP_EQ_U32 : VOPC_Real_gfx11<0x04a>; +defm V_CMP_LE_U32 : VOPC_Real_gfx11<0x04b>; +defm V_CMP_GT_U32 : VOPC_Real_gfx11<0x04c>; +defm V_CMP_NE_U32 : VOPC_Real_gfx11<0x04d>; +defm V_CMP_GE_U32 : VOPC_Real_gfx11<0x04e>; +defm V_CMP_T_U32 : VOPC_Real_gfx11<0x04f>; + +defm V_CMP_F_I64 : VOPC_Real_gfx11<0x050>; +defm V_CMP_LT_I64 : VOPC_Real_gfx11<0x051>; +defm V_CMP_EQ_I64 : VOPC_Real_gfx11<0x052>; +defm V_CMP_LE_I64 : VOPC_Real_gfx11<0x053>; +defm V_CMP_GT_I64 : VOPC_Real_gfx11<0x054>; +defm V_CMP_NE_I64 : VOPC_Real_gfx11<0x055>; +defm V_CMP_GE_I64 : VOPC_Real_gfx11<0x056>; +defm V_CMP_T_I64 : VOPC_Real_gfx11<0x057>; +defm V_CMP_F_U64 : VOPC_Real_gfx11<0x058>; +defm V_CMP_LT_U64 : VOPC_Real_gfx11<0x059>; +defm V_CMP_EQ_U64 : VOPC_Real_gfx11<0x05a>; +defm V_CMP_LE_U64 : VOPC_Real_gfx11<0x05b>; +defm V_CMP_GT_U64 : VOPC_Real_gfx11<0x05c>; +defm V_CMP_NE_U64 : VOPC_Real_gfx11<0x05d>; +defm V_CMP_GE_U64 : VOPC_Real_gfx11<0x05e>; +defm V_CMP_T_U64 : VOPC_Real_gfx11<0x05f>; + +defm V_CMP_CLASS_F16 : VOPC_Real_gfx11<0x07d>; +defm V_CMP_CLASS_F32 : VOPC_Real_gfx11<0x07e>; +defm V_CMP_CLASS_F64 : VOPC_Real_gfx11<0x07f>; + +defm V_CMPX_F_F16 : VOPCX_Real_gfx11<0x080>; +defm V_CMPX_LT_F16 : VOPCX_Real_gfx11<0x081>; +defm V_CMPX_EQ_F16 : VOPCX_Real_gfx11<0x082>; +defm V_CMPX_LE_F16 : VOPCX_Real_gfx11<0x083>; +defm V_CMPX_GT_F16 : VOPCX_Real_gfx11<0x084>; +defm V_CMPX_LG_F16 : VOPCX_Real_gfx11<0x085>; +defm V_CMPX_GE_F16 : VOPCX_Real_gfx11<0x086>; +defm V_CMPX_O_F16 : VOPCX_Real_gfx11<0x087>; +defm V_CMPX_U_F16 : VOPCX_Real_gfx11<0x088>; +defm V_CMPX_NGE_F16 : VOPCX_Real_gfx11<0x089>; +defm V_CMPX_NLG_F16 : VOPCX_Real_gfx11<0x08a>; +defm V_CMPX_NGT_F16 : VOPCX_Real_gfx11<0x08b>; +defm V_CMPX_NLE_F16 : VOPCX_Real_gfx11<0x08c>; +defm V_CMPX_NEQ_F16 : VOPCX_Real_gfx11<0x08d>; +defm V_CMPX_NLT_F16 : VOPCX_Real_gfx11<0x08e>; +defm V_CMPX_T_F16 : VOPCX_Real_with_name_gfx11<0x08f, "V_CMPX_TRU_F16", "v_cmpx_t_f16">; +defm V_CMPX_F_F32 : VOPCX_Real_gfx11<0x090>; +defm V_CMPX_LT_F32 : VOPCX_Real_gfx11<0x091>; +defm V_CMPX_EQ_F32 : VOPCX_Real_gfx11<0x092>; +defm V_CMPX_LE_F32 : VOPCX_Real_gfx11<0x093>; +defm V_CMPX_GT_F32 : VOPCX_Real_gfx11<0x094>; +defm V_CMPX_LG_F32 : VOPCX_Real_gfx11<0x095>; +defm V_CMPX_GE_F32 : VOPCX_Real_gfx11<0x096>; +defm V_CMPX_O_F32 : VOPCX_Real_gfx11<0x097>; +defm V_CMPX_U_F32 : VOPCX_Real_gfx11<0x098>; +defm V_CMPX_NGE_F32 : VOPCX_Real_gfx11<0x099>; +defm V_CMPX_NLG_F32 : VOPCX_Real_gfx11<0x09a>; +defm V_CMPX_NGT_F32 : VOPCX_Real_gfx11<0x09b>; +defm V_CMPX_NLE_F32 : VOPCX_Real_gfx11<0x09c>; +defm V_CMPX_NEQ_F32 : VOPCX_Real_gfx11<0x09d>; +defm V_CMPX_NLT_F32 : VOPCX_Real_gfx11<0x09e>; +defm V_CMPX_T_F32 : VOPCX_Real_with_name_gfx11<0x09f, "V_CMPX_TRU_F32", "v_cmpx_t_f32">; + +defm V_CMPX_F_F64 : VOPCX_Real_gfx11<0x0a0>; +defm V_CMPX_LT_F64 : VOPCX_Real_gfx11<0x0a1>; +defm V_CMPX_EQ_F64 : VOPCX_Real_gfx11<0x0a2>; +defm V_CMPX_LE_F64 : VOPCX_Real_gfx11<0x0a3>; +defm V_CMPX_GT_F64 : VOPCX_Real_gfx11<0x0a4>; +defm V_CMPX_LG_F64 : VOPCX_Real_gfx11<0x0a5>; +defm V_CMPX_GE_F64 : VOPCX_Real_gfx11<0x0a6>; +defm V_CMPX_O_F64 : VOPCX_Real_gfx11<0x0a7>; +defm V_CMPX_U_F64 : VOPCX_Real_gfx11<0x0a8>; +defm V_CMPX_NGE_F64 : VOPCX_Real_gfx11<0x0a9>; +defm V_CMPX_NLG_F64 : VOPCX_Real_gfx11<0x0aa>; +defm V_CMPX_NGT_F64 : VOPCX_Real_gfx11<0x0ab>; +defm V_CMPX_NLE_F64 : VOPCX_Real_gfx11<0x0ac>; +defm V_CMPX_NEQ_F64 : VOPCX_Real_gfx11<0x0ad>; +defm V_CMPX_NLT_F64 : VOPCX_Real_gfx11<0x0ae>; +defm V_CMPX_T_F64 : VOPCX_Real_with_name_gfx11<0x0af, "V_CMPX_TRU_F64", "v_cmpx_t_f64">; + +defm V_CMPX_LT_I16 : VOPCX_Real_gfx11<0x0b1>; +defm V_CMPX_EQ_I16 : VOPCX_Real_gfx11<0x0b2>; +defm V_CMPX_LE_I16 : VOPCX_Real_gfx11<0x0b3>; +defm V_CMPX_GT_I16 : VOPCX_Real_gfx11<0x0b4>; +defm V_CMPX_NE_I16 : VOPCX_Real_gfx11<0x0b5>; +defm V_CMPX_GE_I16 : VOPCX_Real_gfx11<0x0b6>; +defm V_CMPX_LT_U16 : VOPCX_Real_gfx11<0x0b9>; +defm V_CMPX_EQ_U16 : VOPCX_Real_gfx11<0x0ba>; +defm V_CMPX_LE_U16 : VOPCX_Real_gfx11<0x0bb>; +defm V_CMPX_GT_U16 : VOPCX_Real_gfx11<0x0bc>; +defm V_CMPX_NE_U16 : VOPCX_Real_gfx11<0x0bd>; +defm V_CMPX_GE_U16 : VOPCX_Real_gfx11<0x0be>; +defm V_CMPX_F_I32 : VOPCX_Real_gfx11<0x0c0>; +defm V_CMPX_LT_I32 : VOPCX_Real_gfx11<0x0c1>; +defm V_CMPX_EQ_I32 : VOPCX_Real_gfx11<0x0c2>; +defm V_CMPX_LE_I32 : VOPCX_Real_gfx11<0x0c3>; +defm V_CMPX_GT_I32 : VOPCX_Real_gfx11<0x0c4>; +defm V_CMPX_NE_I32 : VOPCX_Real_gfx11<0x0c5>; +defm V_CMPX_GE_I32 : VOPCX_Real_gfx11<0x0c6>; +defm V_CMPX_T_I32 : VOPCX_Real_gfx11<0x0c7>; +defm V_CMPX_F_U32 : VOPCX_Real_gfx11<0x0c8>; +defm V_CMPX_LT_U32 : VOPCX_Real_gfx11<0x0c9>; +defm V_CMPX_EQ_U32 : VOPCX_Real_gfx11<0x0ca>; +defm V_CMPX_LE_U32 : VOPCX_Real_gfx11<0x0cb>; +defm V_CMPX_GT_U32 : VOPCX_Real_gfx11<0x0cc>; +defm V_CMPX_NE_U32 : VOPCX_Real_gfx11<0x0cd>; +defm V_CMPX_GE_U32 : VOPCX_Real_gfx11<0x0ce>; +defm V_CMPX_T_U32 : VOPCX_Real_gfx11<0x0cf>; + +defm V_CMPX_F_I64 : VOPCX_Real_gfx11<0x0d0>; +defm V_CMPX_LT_I64 : VOPCX_Real_gfx11<0x0d1>; +defm V_CMPX_EQ_I64 : VOPCX_Real_gfx11<0x0d2>; +defm V_CMPX_LE_I64 : VOPCX_Real_gfx11<0x0d3>; +defm V_CMPX_GT_I64 : VOPCX_Real_gfx11<0x0d4>; +defm V_CMPX_NE_I64 : VOPCX_Real_gfx11<0x0d5>; +defm V_CMPX_GE_I64 : VOPCX_Real_gfx11<0x0d6>; +defm V_CMPX_T_I64 : VOPCX_Real_gfx11<0x0d7>; +defm V_CMPX_F_U64 : VOPCX_Real_gfx11<0x0d8>; +defm V_CMPX_LT_U64 : VOPCX_Real_gfx11<0x0d9>; +defm V_CMPX_EQ_U64 : VOPCX_Real_gfx11<0x0da>; +defm V_CMPX_LE_U64 : VOPCX_Real_gfx11<0x0db>; +defm V_CMPX_GT_U64 : VOPCX_Real_gfx11<0x0dc>; +defm V_CMPX_NE_U64 : VOPCX_Real_gfx11<0x0dd>; +defm V_CMPX_GE_U64 : VOPCX_Real_gfx11<0x0de>; +defm V_CMPX_T_U64 : VOPCX_Real_gfx11<0x0df>; +defm V_CMPX_CLASS_F16 : VOPCX_Real_gfx11<0x0fd>; +defm V_CMPX_CLASS_F32 : VOPCX_Real_gfx11<0x0fe>; +defm V_CMPX_CLASS_F64 : VOPCX_Real_gfx11<0x0ff>; + //===----------------------------------------------------------------------===// // GFX10. //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX10Plus in { +let AssemblerPredicate = isGFX10Only in { multiclass VOPC_Real_gfx10 op> { let DecoderNamespace = "GFX10" in { def _e32_gfx10 : @@ -931,7 +1722,7 @@ let AssemblerPredicate = isGFX10Plus in { defm : VOPCXInstAliases; } -} // End AssemblerPredicate = isGFX10Plus +} // End AssemblerPredicate = isGFX10Only defm V_CMP_LT_I16 : VOPC_Real_gfx10<0x089>; defm V_CMP_EQ_I16 : VOPC_Real_gfx10<0x08a>; @@ -1025,6 +1816,12 @@ multiclass VOPCX_Real_gfx6_gfx7 op> : multiclass VOPCX_Real_gfx6_gfx7_gfx10 op> : VOPC_Real_gfx6_gfx7, VOPCX_Real_gfx10; +multiclass VOPC_Real_gfx6_gfx7_gfx10_gfx11 op> : + VOPC_Real_gfx6_gfx7_gfx10, VOPC_Real_gfx11; + +multiclass VOPCX_Real_gfx6_gfx7_gfx10_gfx11 op> : + VOPCX_Real_gfx6_gfx7_gfx10, VOPCX_Real_gfx11; + defm V_CMP_F_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x000>; defm V_CMP_LT_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x001>; defm V_CMP_EQ_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x002>; @@ -1057,21 +1854,21 @@ defm V_CMPX_NLE_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01c>; defm V_CMPX_NEQ_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01d>; defm V_CMPX_NLT_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01e>; defm V_CMPX_TRU_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01f>; -defm V_CMP_F_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x020>; -defm V_CMP_LT_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x021>; -defm V_CMP_EQ_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x022>; -defm V_CMP_LE_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x023>; -defm V_CMP_GT_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x024>; -defm V_CMP_LG_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x025>; -defm V_CMP_GE_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x026>; -defm V_CMP_O_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x027>; -defm V_CMP_U_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x028>; -defm V_CMP_NGE_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x029>; -defm V_CMP_NLG_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02a>; -defm V_CMP_NGT_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02b>; -defm V_CMP_NLE_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02c>; -defm V_CMP_NEQ_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02d>; -defm V_CMP_NLT_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02e>; +defm V_CMP_F_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x020>; +defm V_CMP_LT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x021>; +defm V_CMP_EQ_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x022>; +defm V_CMP_LE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x023>; +defm V_CMP_GT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x024>; +defm V_CMP_LG_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x025>; +defm V_CMP_GE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x026>; +defm V_CMP_O_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x027>; +defm V_CMP_U_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x028>; +defm V_CMP_NGE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x029>; +defm V_CMP_NLG_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02a>; +defm V_CMP_NGT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02b>; +defm V_CMP_NLE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02c>; +defm V_CMP_NEQ_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02d>; +defm V_CMP_NLT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02e>; defm V_CMP_TRU_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02f>; defm V_CMPX_F_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x030>; defm V_CMPX_LT_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x031>; diff --git a/llvm/lib/Target/AMDGPU/VOPDInstructions.td b/llvm/lib/Target/AMDGPU/VOPDInstructions.td new file mode 100644 index 000000000000..420f18436095 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/VOPDInstructions.td @@ -0,0 +1,159 @@ +//===-- VOPDInstructions.td - Vector Instruction Definitions --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Encodings +//===----------------------------------------------------------------------===// + +class VOPDe opX, bits<5> opY> : Enc64 { + bits<9> src0X; + bits<8> vsrc1X; + bits<8> vdstX; + bits<9> src0Y; + bits<8> vsrc1Y; + bits<8> vdstY; + + let Inst{8-0} = src0X; + let Inst{16-9} = vsrc1X; + let Inst{21-17} = opY; + let Inst{25-22} = opX; + let Inst{31-26} = 0x32; // encoding + let Inst{40-32} = src0Y; + let Inst{48-41} = vsrc1Y; + let Inst{55-49} = vdstY{7-1}; + let Inst{63-56} = vdstX; +} + +class VOPD_MADKe opX, bits<5> opY> : Enc96 { + bits<9> src0X; + bits<8> vsrc1X; + bits<8> vdstX; + bits<9> src0Y; + bits<8> vsrc1Y; + bits<8> vdstY; + bits<32> imm; + + let Inst{8-0} = src0X; + let Inst{16-9} = vsrc1X; + let Inst{21-17} = opY; + let Inst{25-22} = opX; + let Inst{31-26} = 0x32; // encoding + let Inst{40-32} = src0Y; + let Inst{48-41} = vsrc1Y; + let Inst{55-49} = vdstY{7-1}; + let Inst{63-56} = vdstX; + let Inst{95-64} = imm; +} + +//===----------------------------------------------------------------------===// +// VOPD classes +//===----------------------------------------------------------------------===// + +class VOPD_Base + : VOPAnyCommon, + VOP, + SIMCInstr { + // Fields for table indexing + Instruction Opcode = !cast(NAME); + bits<5> OpX = XasVC.VOPDOp; + bits<5> OpY = YasVC.VOPDOp; + + let VALU = 1; + + let DecoderNamespace = "GFX11"; + let AssemblerPredicate = isGFX11Plus; + let WaveSizePredicate = isWave32; + let isCodeGenOnly = 0; + let SubtargetPredicate = isGFX11Plus; + let AsmMatchConverter = "cvtVOPD"; + let Size = 8; + let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg); + let mayRaiseFPException = ReadsModeReg; + + let Uses = RegListUnion.ret; + let Defs = RegListUnion.ret; + let SchedRW = !listconcat(VDX.SchedRW, VDY.SchedRW); +} + +class VOPD + : VOPD_Base, + VOPDe { + let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X); + let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y); +} + +class VOPD_MADK + : VOPD_Base, + VOPD_MADKe { + let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X); + let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y); + let Size = 12; +} + +// V_DUAL_DOT2ACC_F32_BF16 is a legal instruction, but V_DOT2ACC_F32_BF16 is +// not. Since we generate the DUAL form by converting from the normal form we +// will never generate it. +defvar VOPDYPseudos = [ + "V_FMAC_F32_e32", "V_FMAAK_F32", "V_FMAMK_F32", "V_MUL_F32_e32", + "V_ADD_F32_e32", "V_SUB_F32_e32", "V_SUBREV_F32_e32", "V_MUL_LEGACY_F32_e32", + "V_MOV_B32_e32", "V_CNDMASK_B32_e32", "V_MAX_F32_e32", "V_MIN_F32_e32", + "V_DOT2C_F32_F16_e32", "V_ADD_U32_e32", "V_LSHLREV_B32_e32", "V_AND_B32_e32" +]; +defvar VOPDXPseudos = VOPDYPseudos[0...VOPDX_Max_Index]; + +def VOPDDstYOperand : RegisterOperand { + let DecoderMethod = "decodeOperandVOPDDstY"; +} + +foreach x = VOPDXPseudos in { + foreach y = VOPDYPseudos in { + defvar xInst = !cast(x); + defvar yInst = !cast(y); + defvar XasVC = !cast(x); + defvar YasVC = !cast(y); + defvar isMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"), + !eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32")); + // If X or Y is MADK (have a mandatory immediate), all src operands which + // may contain an optional literal must use the VSrc_*_Deferred operand + // type. Optional literal operands in MADK VOPD components always use this + // operand form. If Both X and Y are MADK, the mandatory literal of X + // additionally must use an alternate operand format which defers to the + // 'real' Y literal + defvar isOpXMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32")); + defvar isOpYMADK = !or(!eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32")); + defvar OpName = "V_DUAL_" # !substr(x,2) # "_X_" # !substr(y,2); + defvar outs = (outs VGPRSrc_32:$vdstX, VOPDDstYOperand:$vdstY); + if !or(isOpXMADK, isOpYMADK) then { + if !and(isOpXMADK, isOpYMADK) then { + defvar X_MADK_Pfl = !cast(xInst.Pfl); + defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY); + defvar asm = XasVC.VOPDName #" "# X_MADK_Pfl.AsmVOPDXDeferred #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY; + def OpName : VOPD_MADK; + } else { + defvar asm = XasVC.VOPDName #" "# xInst.Pfl.AsmVOPDX #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY; + if isOpXMADK then { + assert !not(isOpYMADK), "Expected only OpX as MADK"; + defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDYDeferred); + def OpName : VOPD_MADK; + } else { + assert !not(isOpXMADK), "Expected only OpY as MADK"; + defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY); + def OpName : VOPD_MADK; + } + } + } else { + defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDY); + defvar asm = XasVC.VOPDName #" "# xInst.Pfl.AsmVOPDX #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY; + def OpName : VOPD; + } + } +} + diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index a8368892c565..8cd3d2fe2c47 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -30,6 +30,16 @@ class VOP { string OpName = opName; } +// First 13 insts from VOPDY are also VOPDX. DOT2ACC_F32_BF16 is omitted +defvar VOPDX_Max_Index = 12; + +class VOPD_Component OpIn, string vOPDName> { + Instruction BaseVOP = !cast(NAME); + string VOPDName = "v_dual_" # !substr(vOPDName, 2); + bits<5> VOPDOp = OpIn; + bit CanBeVOPDX = !le(VOPDOp, VOPDX_Max_Index); +} + class VOPAnyCommon pattern> : InstSI { @@ -92,6 +102,7 @@ class VOP3_Pseudo pattern = [], let VOP3_OPSEL = isVop3OpSel; let IsPacked = P.IsPacked; let IsMAI = P.IsMAI; + let IsWMMA = P.IsWMMA; let AsmOperands = !if(isVop3OpSel, P.AsmVOP3OpSel, @@ -144,9 +155,9 @@ class VOP_Real { bit IsSingle = ps.Pfl.IsSingle; } -class VOP3_Real : +class VOP3_Real : VOP_Real , - InstSI , + InstSI , SIMCInstr { let VALU = 1; @@ -155,9 +166,6 @@ class VOP3_Real : let isCodeGenOnly = 0; let UseNamedOperandTable = 1; - let Constraints = ps.Constraints; - let DisableEncoding = ps.DisableEncoding; - // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; let OtherPredicates = ps.OtherPredicates; @@ -179,8 +187,12 @@ class VOP3_Real : // XXX - Is there any reason to distinguish this from regular VOP3 // here? -class VOP3P_Real : - VOP3_Real; +class VOP3P_Real : + VOP3_Real { + + // The v_wmma pseudos have extra constraints that we do not want to impose on the real instruction. + let Constraints = !if(!eq(!substr(ps.Mnemonic,0,6), "v_wmma"), "", ps.Constraints); +} class VOP3a : Enc64 { bits<4> src0_modifiers; @@ -217,6 +229,8 @@ class VOP3a_gfx10 op, VOPProfile p> : VOP3a

{ let Inst{31-26} = 0x35; } +class VOP3a_gfx11 op, VOPProfile p> : VOP3a_gfx10; + class VOP3a_vi op, VOPProfile P> : VOP3a

{ let Inst{25-16} = op; let Inst{15} = !if(P.HasClamp, clamp{0}, 0); @@ -232,6 +246,8 @@ class VOP3e_gfx10 op, VOPProfile p> : VOP3a_gfx10 { let Inst{7-0} = !if(p.EmitDst, vdst{7-0}, 0); } +class VOP3e_gfx11 op, VOPProfile p> : VOP3e_gfx10; + class VOP3e_vi op, VOPProfile P> : VOP3a_vi { bits<8> vdst; let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0); @@ -251,6 +267,9 @@ class VOP3OpSel_gfx10 op, VOPProfile p> : VOP3e_gfx10 { let Inst{14} = !if(p.HasDst, src0_modifiers{3}, 0); } +class VOP3OpSel_gfx11 op, VOPProfile p> : VOP3OpSel_gfx10; + + // NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa class VOP3Interp_vi op, VOPProfile P> : VOP3e_vi { bits<2> attrchan; @@ -285,6 +304,8 @@ class VOP3Interp_gfx10 op, VOPProfile p> : VOP3e_gfx10 { let Inst{62} = !if(p.HasSrc0Mods, src0_modifiers{0}, 0); } +class VOP3Interp_gfx11 op, VOPProfile p> : VOP3Interp_gfx10; + class VOP3be : Enc64 { bits<8> vdst; bits<2> src0_modifiers; @@ -310,7 +331,6 @@ class VOP3be : Enc64 { class VOP3Pe op, VOPProfile P> : Enc64 { bits<8> vdst; - // neg, neg_hi, op_sel put in srcN_modifiers bits<4> src0_modifiers; bits<9> src0; bits<4> src1_modifiers; @@ -372,11 +392,42 @@ class VOP3Pe_MAI op, VOPProfile P, bit acc_cd = 0> : Enc64 { let Inst{63-61} = !if(P.HasSrc1, blgp, 0); } +class VOP3Pe_SMFMAC op> : Enc64 { + bits<10> vdst; // VGPR or AGPR, but not SGPR. vdst{8} is not encoded in the instruction. + bits<10> src0; + bits<10> src1; + bits<9> idx; + bits<3> blgp; + bits<3> cbsz; + bits<4> abid; + + let blgp = 0; + + let Inst{7-0} = vdst{7-0}; + + let Inst{10-8} = cbsz; + let Inst{14-11} = abid; + + let Inst{15} = vdst{9}; // acc(vdst) + + let Inst{22-16} = op; + let Inst{31-23} = 0x1a7; // encoding + let Inst{40-32} = src0{8-0}; + let Inst{49-41} = src1{8-0}; + let Inst{58-50} = idx; + + let Inst{59} = src0{9}; // acc(0) + let Inst{60} = src1{9}; // acc(1) + + let Inst{63-61} = blgp; +} class VOP3Pe_gfx10 op, VOPProfile P> : VOP3Pe { let Inst{31-23} = 0x198; //encoding } +class VOP3Pe_gfx11 op, VOPProfile P> : VOP3Pe_gfx10; + class VOP3be_gfx6_gfx7 op, VOPProfile p> : VOP3be

{ let Inst{25-17} = op; } @@ -388,6 +439,8 @@ class VOP3be_gfx10 op, VOPProfile p> : VOP3be

{ let Inst{31-26} = 0x35; } +class VOP3be_gfx11 op, VOPProfile p> : VOP3be_gfx10; + class VOP3be_vi op, VOPProfile P> : VOP3be

{ bits<1> clamp; let Inst{25-16} = op; @@ -621,8 +674,89 @@ class VOP_DPPe : Enc64 { let Inst{63-60} = row_mask; } -class VOP_DPP_Pseudo pattern=[]> : - InstSI , +class VOP3_DPPe_Fields_Base { + bits<9> dpp_ctrl; + bits<1> bound_ctrl; + bits<4> bank_mask; + bits<4> row_mask; + bit fi; +} +class VOP3_DPPe_Fields : VOP3_DPPe_Fields_Base { + bits<8> src0; +} + +// Common refers to common between DPP and DPP8 +class VOP3_DPPe_Common_Base op, VOPProfile P> : Enc96 { + bits<4> src0_modifiers; + bits<3> src1_modifiers; + bits<3> src2_modifiers; + bits<1> clamp; + bits<2> omod; + + let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); + let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); + let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); + // OPSEL must be set such that the low result only uses low inputs, and the high result only uses high inputs. + let Inst{11} = !if(P.HasOpSel,!if(P.HasSrc0Mods, src0_modifiers{2}, 0),?); + let Inst{12} = !if(P.HasOpSel,!if(P.HasSrc1Mods, src1_modifiers{2}, 0),?); + let Inst{13} = !if(P.HasOpSel,!if(P.HasSrc2Mods, src2_modifiers{2}, 0),?); + let Inst{14} = !if(P.HasOpSel,!if(P.HasSrc0Mods, src0_modifiers{3}, 0),?); + let Inst{15} = !if(P.HasClamp, clamp, 0); + let Inst{25-16} = op; + let Inst{31-26} = 0x35; + + let Inst{60-59} = !if(P.HasOMod, omod, 0); + let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); + let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); + let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); +} + +class VOP3_DPPe_Common op, VOPProfile P> : VOP3_DPPe_Common_Base { + bits<8> vdst; + bits<9> src1; + bits<9> src2; + + let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{49-41} = !if(P.HasSrc1, src1, 0); + let Inst{58-50} = !if(P.HasSrc2, src2, 0); +} + +class VOP3P_DPPe_Common_Base op, VOPProfile P> : Enc96 { + bits<4> src0_modifiers; + bits<4> src1_modifiers; + bits<4> src2_modifiers; + bits<1> clamp; + + let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0 + let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1 + let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2 + let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2}, 0); // op_sel(0) + let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1) + let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2) + let Inst{14} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{3}, ?); // op_sel_hi(2) + let Inst{15} = !if(P.HasClamp, clamp{0}, 0); + let Inst{22-16} = op; + let Inst{31-23} = 0x198; // encoding + let Inst{59} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{3}, ?); // op_sel_hi(0) + let Inst{60} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{3}, ?); // op_sel_hi(1) + let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo) + let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo) + let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo) +} + +class VOP3P_DPPe_Common op, VOPProfile P> : VOP3P_DPPe_Common_Base { + bits<8> vdst; + bits<9> src1; + bits<9> src2; + + let Inst{7-0} = vdst; + let Inst{49-41} = !if(P.HasSrc1, src1, 0); + let Inst{58-50} = !if(P.HasSrc2, src2, 0); +} + +class VOP_DPP_Pseudo pattern=[], + dag Ins = P.InsDPP, string asmOps = P.AsmDPP> : + InstSI , VOP , SIMCInstr { @@ -645,7 +779,7 @@ class VOP_DPP_Pseudo pattern=[]> : let isConvergent = 1; string Mnemonic = OpName; - string AsmOperands = P.AsmDPP; + string AsmOperands = asmOps; let AsmMatchConverter = !if(P.HasModifiers, "cvtDPP", ""); let SubtargetPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP); @@ -659,6 +793,17 @@ class VOP_DPP_Pseudo pattern=[]> : VOPProfile Pfl = P; } +class VOP3_DPP_Pseudo : + VOP_DPP_Pseudo { + let PseudoInstr = OpName#"_e64"#"_dpp"; + let OutOperandList = P.OutsVOP3DPP; + let Size = 12; + let VOP3 = 1; + let AsmMatchConverter = "cvtVOP3DPP"; + let AsmVariantName = !if(P.HasExtVOP3DPP, AMDGPUAsmVariants.VOP3_DPP, + AMDGPUAsmVariants.Disable); +} + class VOP_DPP_Real : InstSI , SIMCInstr { @@ -679,6 +824,7 @@ class VOP_DPP_Real : let isConvergent = ps.isConvergent; let SubtargetPredicate = ps.SubtargetPredicate; let AssemblerPredicate = ps.AssemblerPredicate; + let OtherPredicates = ps.OtherPredicates; let AsmMatchConverter = ps.AsmMatchConverter; let AsmVariantName = ps.AsmVariantName; let UseNamedOperandTable = ps.UseNamedOperandTable; @@ -692,11 +838,10 @@ class VOP_DPP_Real : let TRANS = ps.TRANS; } -class VOP_DPP : - InstSI , - VOP_DPPe { +class VOP_DPP_Base : + InstSI { let mayLoad = 0; let mayStore = 0; @@ -717,6 +862,59 @@ class VOP_DPP : + VOP_DPP_Base, VOP_DPPe; + +class VOP3_DPP_Base : + VOP_DPP_Base { + let OutOperandList = P.OutsVOP3DPP; + let AsmMatchConverter = "cvtVOP3DPP"; + let VOP3 = 1; + let AsmVariantName = !if(P.HasExtVOP3DPP, AMDGPUAsmVariants.VOP3_DPP, + AMDGPUAsmVariants.Disable); + let Size = 12; +} + +class VOP3_DPP op, string OpName, VOPProfile P, bit IsDPP16, + dag InsDPP = !if(IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP), + string AsmDPP = !if(IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> : + VOP3_DPP_Base, VOP3_DPPe_Common, + VOP3_DPPe_Fields { + + let Inst{40-32} = 0xfa; + let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{80-72} = dpp_ctrl; + let Inst{82} = !if(IsDPP16, fi, ?); + let Inst{83} = bound_ctrl; + + // Inst{87-84} ignored by hw + let Inst{91-88} = bank_mask; + let Inst{95-92} = row_mask; +} + +class VOP3P_DPP op, string OpName, VOPProfile P, bit IsDPP16, + dag InsDPP = !if(IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP), + string AsmDPP = !if(IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> : + VOP3_DPP_Base, VOP3P_DPPe_Common, + VOP3_DPPe_Fields { + + let VOP3P = 1; + + let Inst{40-32} = 0xfa; + let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{80-72} = dpp_ctrl; + let Inst{82} = !if(IsDPP16, fi, ?); + let Inst{83} = bound_ctrl; + + // Inst{87-84} ignored by hw + let Inst{91-88} = bank_mask; + let Inst{95-92} = row_mask; +} + class VOP_DPP8e : Enc64 { bits<8> src0; bits<24> dpp8; @@ -726,9 +924,14 @@ class VOP_DPP8e : Enc64 { let Inst{63-40} = dpp8{23-0}; } -class VOP_DPP8 : - InstSI, - VOP_DPP8e

{ +class VOP3_DPP8e_Fields { + bits<8> src0; + bits<24> dpp8; + bits<9> fi; +} + +class VOP_DPP8_Base : + InstSI { let mayLoad = 0; let mayStore = 0; @@ -742,12 +945,44 @@ class VOP_DPP8 : let AsmMatchConverter = "cvtDPP8"; let SubtargetPredicate = HasDPP8; let AssemblerPredicate = HasDPP8; - let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP, - AMDGPUAsmVariants.Disable); + let AsmVariantName = AMDGPUAsmVariants.DPP; let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", ""); let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, ""); } +class VOP_DPP8 : + VOP_DPP8_Base, VOP_DPP8e

; + +class VOP3_DPP8_Base : + VOP_DPP8_Base { + let OutOperandList = P.OutsVOP3DPP8; + let AsmMatchConverter = "cvtVOP3DPP8"; + let AsmVariantName = !if(P.HasExtVOP3DPP, AMDGPUAsmVariants.VOP3_DPP, + AMDGPUAsmVariants.Disable); + let VOP3 = 1; + let Size = 12; +} + + +class VOP3_DPP8 op, string OpName, VOPProfile P> : + VOP3_DPP8_Base, VOP3_DPPe_Common, + VOP3_DPP8e_Fields { + + let Inst{40-32} = fi; + let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{95-72} = dpp8{23-0}; +} + +class VOP3P_DPP8 op, string OpName, VOPProfile P> : + VOP3_DPP8_Base, VOP3P_DPPe_Common, + VOP3_DPP8e_Fields { + + let VOP3P = 1; + let Inst{40-32} = fi; + let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{95-72} = dpp8{23-0}; +} + def DPP8Mode { int FI_0 = 0xE9; int FI_1 = 0xEA; @@ -780,14 +1015,12 @@ class getDivergentFrag { } class VOPPatGen { - PatFrag Operator = getDivergentFrag < Op >.ret; dag Ins = !foreach(tmp, P.Ins32, !subst(ins, Operator, !subst(P.Src0RC32, P.Src0VT, !subst(P.Src1RC32, P.Src1VT, tmp)))); - dag Outs = !foreach(tmp, P.Outs32, !subst(outs, set, !subst(P.DstRC, P.DstVT, tmp))); @@ -827,12 +1060,379 @@ class VOPBinOpClampPat : DSTCLAMP.ENABLE) >; +//===----------------------------------------------------------------------===// +// VOP3 Classes +//===----------------------------------------------------------------------===// + +class getVOP3ModPat { + dag src0 = !if(P.HasOMod, + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)); + + list ret3 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT src0), + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))]; + + list ret2 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT src0), + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))]; + + list ret1 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT src0)))]; + + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3PModPat { + dag src0_dag = (P.Src0VT (SrcPat P.Src0VT:$src0, i32:$src0_modifiers)); + dag src1_dag = (P.Src1VT (SrcPat P.Src1VT:$src1, i32:$src1_modifiers)); + dag src2_dag = (P.Src2VT (SrcPat P.Src2VT:$src2, i32:$src2_modifiers)); + dag clamp_dag = (i1 timm:$clamp); + + list ret3 = [(set P.DstVT:$vdst, + !if(HasExplicitClamp, + (DivergentFragOrOp.ret src0_dag, src1_dag, src2_dag, clamp_dag), + (DivergentFragOrOp.ret src0_dag, src1_dag, src2_dag)))]; + + list ret2 = [(set P.DstVT:$vdst, + !if(HasExplicitClamp, + (DivergentFragOrOp.ret src0_dag, src1_dag, clamp_dag), + (DivergentFragOrOp.ret src0_dag, src1_dag)))]; + + list ret1 = [(set P.DstVT:$vdst, + !if(HasExplicitClamp, + (DivergentFragOrOp.ret src0_dag, clamp_dag), + (DivergentFragOrOp.ret src0_dag)))]; + + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3OpSelPat { + list ret3 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers)), + (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))]; + + list ret2 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers)), + (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))]; + + list ret1 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))))]; + + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3OpSelModPat { + list ret3 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers), + (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), + (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))]; + + list ret2 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers)), + (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), + (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))]; + + list ret1 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))))]; + + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3FromVOP2Pat { + list ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]; +} +// In VOP1, we can have clamp and omod even if !HasModifiers +class getVOP3Pat { + dag src0 = + !if(P.HasOMod, + !if(P.HasClamp, + (VOP3Mods0 P.Src0VT:$src0, i1:$clamp, i32:$omod), + (VOP3Mods0 P.Src0VT:$src0, i32:$omod)), // impossible? + !if(P.HasClamp, + (VOP3Mods0 P.Src0VT:$src0, i1:$clamp), + (VOP3Mods0 P.Src0VT:$src0)) + ); + list ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret (P.Src0VT src0), P.Src1VT:$src1, P.Src2VT:$src2))]; + + list ret2 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret (P.Src0VT src0), P.Src1VT:$src1))]; + + list ret1 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret (P.Src0VT src0)))]; + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3ClampPat { + list ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i1:$clamp))]; + list ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, i1:$clamp))]; + list ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, i1:$clamp))]; + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3MAIPat { + list ret = !if(!eq(P.Src0VT, P.Src1VT), + // mfma + [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, + timm:$cbsz, timm:$abid, timm:$blgp))], + // smfmac + [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i32:$idx, + timm:$cbsz, timm:$abid))]); +} + +class VOP3Features { + bit HasClamp = Clamp; + bit HasOpSel = OpSel; + bit IsPacked = Packed; + bit IsMAI = MAI; +} + +def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>; +def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>; +def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>; +def VOP3_PACKED : VOP3Features<1, 1, 1, 0>; +def VOP3_MAI : VOP3Features<0, 0, 0, 1>; + +class VOP3_Profile_Base : VOPProfile { + + let HasClamp = !if(Features.HasClamp, 1, P.HasClamp); + let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel); + let IsMAI = !if(Features.IsMAI, 1, P.IsMAI); + let IsPacked = !if(Features.IsPacked, 1, P.IsPacked); + + let HasModifiers = !if(Features.IsMAI, 0, !or(Features.IsPacked, P.HasModifiers)); +} + +class VOP3_Profile : VOP3_Profile_Base { + let IsSingle = 1; + +} + +// consistently gives instructions a _e64 suffix +multiclass VOP3Inst_Pseudo_Wrapper pattern = [], bit VOP3Only = 0> { + def _e64 : VOP3_Pseudo; +} + +class VOP3InstBase : + VOP3_Pseudo.ret, + getVOP3OpSelPat.ret), + !if(P.HasModifiers, + getVOP3ModPat.ret, + !if(IsVOP2, + getVOP3FromVOP2Pat.ret, + !if(P.HasIntClamp, + getVOP3ClampPat.ret, + !if (P.IsMAI, + getVOP3MAIPat.ret, + getVOP3Pat.ret))))), + 0, P.HasOpSel> { + + let IntClamp = P.HasIntClamp; + let AsmMatchConverter = + !if(P.HasOpSel, + "cvtVOP3OpSel", + !if(!or(P.HasModifiers, P.HasOMod, P.HasIntClamp), + "cvtVOP3", + "")); +} + +multiclass VOP3Inst { + def _e64 : VOP3InstBase; + let SubtargetPredicate = isGFX11Plus in { + foreach _ = BoolToList.ret in + def _e64_dpp : VOP3_DPP_Pseudo ; + } // end SubtargetPredicate = isGFX11Plus +} + +//===----------------------------------------------------------------------===// +// VOP3 DPP +//===----------------------------------------------------------------------===// + +class Base_VOP3_DPP16 op, VOP_DPP_Pseudo ps, string opName = ps.OpName> + : VOP3_DPP { + let hasSideEffects = ps.hasSideEffects; + let Defs = ps.Defs; + let SchedRW = ps.SchedRW; + let Uses = ps.Uses; + let AssemblerPredicate = HasDPP16; + let SubtargetPredicate = HasDPP16; + let OtherPredicates = ps.OtherPredicates; +} + +class VOP3_DPP16 op, VOP_DPP_Pseudo ps, int subtarget, + string opName = ps.OpName> + : Base_VOP3_DPP16, SIMCInstr; + +class Base_VOP3_DPP8 op, VOP_Pseudo ps, string opName = ps.OpName> + : VOP3_DPP8 { + let hasSideEffects = ps.hasSideEffects; + let Defs = ps.Defs; + let SchedRW = ps.SchedRW; + let Uses = ps.Uses; + + let OtherPredicates = ps.OtherPredicates; +} + +class Base_VOP3b_DPP16 op, VOP_DPP_Pseudo ps, + string opName = ps.OpName> + : Base_VOP3_DPP16 { + bits<7> sdst; + let Inst{14 - 8} = sdst; +} + +class VOP3b_DPP8_Base op, VOP_Pseudo ps, string opName = ps.OpName> + : Base_VOP3_DPP8 { + bits<7> sdst; + let Inst{14 - 8} = sdst; +} + +//===----------------------------------------------------------------------===// +// VOP3 GFX11 +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX11Only, + DecoderNamespace = "GFX11" in { + multiclass VOP3_Real_Base_gfx11 op, string opName = NAME, + bit isSingle = 0> { + defvar ps = !cast(opName#"_e64"); + let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in { + foreach _ = BoolToList.ret in + def _e64_gfx11 : + VOP3_Real, + VOP3OpSel_gfx11; + foreach _ = BoolToList.ret in + def _e64_gfx11 : + VOP3_Real, + VOP3e_gfx11; + } + } + multiclass VOP3_Real_with_name_gfx11 op, string opName, + string asmName, bit isSingle = 0> { + defvar ps = !cast(opName#"_e64"); + let AsmString = asmName # ps.AsmOperands, + IsSingle = !or(isSingle, ps.Pfl.IsSingle) in { + foreach _ = BoolToList.ret in + def _e64_gfx11 : + VOP3_Real, + VOP3OpSel_gfx11, + MnemonicAlias, Requires<[isGFX11Plus]>; + foreach _ = BoolToList.ret in + def _e64_gfx11 : + VOP3_Real, + VOP3e_gfx11, + MnemonicAlias, Requires<[isGFX11Plus]>; + } + } + // for READLANE/WRITELANE + multiclass VOP3_Real_No_Suffix_gfx11 op, string opName = NAME> { + defvar ps = !cast(opName); + def _e64_gfx11 : + VOP3_Real, + VOP3e_gfx11; + } + multiclass VOP3_Real_dpp_Base_gfx11 op, string opName = NAME> { + def _e64_dpp_gfx11 : VOP3_DPP16(opName#"_e64"#"_dpp"), SIEncodingFamily.GFX11> { + let DecoderNamespace = "DPPGFX11"; + } + } + multiclass VOP3_Real_dpp_with_name_gfx11 op, string opName, + string asmName> { + defvar ps = !cast(opName#"_e64"); + let AsmString = asmName # ps.Pfl.AsmVOP3DPP16, DecoderNamespace = "DPPGFX11" in { + defm NAME : VOP3_Real_dpp_Base_gfx11; + } + } + multiclass VOP3_Real_dpp8_Base_gfx11 op, string opName = NAME> { + defvar ps = !cast(opName#"_e64"); + def _e64_dpp8_gfx11 : Base_VOP3_DPP8 { + let DecoderNamespace = "DPP8GFX11"; + } + } + multiclass VOP3_Real_dpp8_with_name_gfx11 op, string opName, + string asmName> { + defvar ps = !cast(opName#"_e64"); + let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, DecoderNamespace = "DPP8GFX11" in { + defm NAME : VOP3_Real_dpp8_Base_gfx11; + } + } + multiclass VOP3be_Real_gfx11 op, string opName, string asmName, + bit isSingle = 0> { + defvar ps = !cast(opName#"_e64"); + let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in + def _e64_gfx11 : + VOP3_Real, + VOP3be_gfx11 ; + } + multiclass VOP3be_Real_dpp_gfx11 op, string opName, string asmName> { + defvar ps = !cast(opName #"_e64"); + defvar dpp_ps = !cast(opName #"_e64" #"_dpp"); + def _e64_dpp_gfx11 : Base_VOP3b_DPP16, + SIMCInstr { + let DecoderNamespace = "DPPGFX11"; + } + } + multiclass VOP3be_Real_dpp8_gfx11 op, string opName, string asmName> { + defvar ps = !cast(opName #"_e64"); + def _e64_dpp8_gfx11 : VOP3b_DPP8_Base { + let DecoderNamespace = "DPP8GFX11"; + } + } +} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" + +// VOP1 and VOP2 depend on these triple defs +multiclass VOP3_Realtriple_gfx11 op, + bit isSingle = 0, string opName = NAME> : + VOP3_Real_Base_gfx11, + VOP3_Real_dpp_Base_gfx11, + VOP3_Real_dpp8_Base_gfx11; + +multiclass VOP3Only_Realtriple_gfx11 op> : + VOP3_Realtriple_gfx11; + +multiclass VOP3_Realtriple_with_name_gfx11 op, string opName, + string asmName, bit isSingle = 0> : + VOP3_Real_with_name_gfx11, + VOP3_Real_dpp_with_name_gfx11, + VOP3_Real_dpp8_with_name_gfx11; + +multiclass VOP3Only_Realtriple_with_name_gfx11 op, string opName, + string asmName> : + VOP3_Realtriple_with_name_gfx11; + +multiclass VOP3be_Realtriple_gfx11< + bits<10> op, bit isSingle = 0, string opName = NAME, + string asmName = !cast(opName#"_e64").Mnemonic> : + VOP3be_Real_gfx11, + VOP3be_Real_dpp_gfx11, + VOP3be_Real_dpp8_gfx11; + +multiclass VOP3beOnly_Realtriple_gfx11 op> : + VOP3be_Realtriple_gfx11; include "VOPCInstructions.td" include "VOP1Instructions.td" include "VOP2Instructions.td" include "VOP3Instructions.td" include "VOP3PInstructions.td" +include "VOPDInstructions.td" class VOPInfoTable : GenericTable { @@ -847,3 +1447,15 @@ class VOPInfoTable : GenericTable { def VOP1InfoTable : VOPInfoTable<"VOP1">; def VOP2InfoTable : VOPInfoTable<"VOP2">; def VOP3InfoTable : VOPInfoTable<"VOP3">; + +class VOPC64Table : GenericTable { + let FilterClass = "VOPC64_" # Format # "_Base"; + let CppTypeName = "VOPC64DPPInfo"; + let Fields = ["Opcode"]; + + let PrimaryKey = ["Opcode"]; + let PrimaryKeyName = "isVOPC64" # Format # "OpcodeHelper"; +} + +def VOPC64DPPTable : VOPC64Table<"DPP">; +def VOPC64DPP8Table : VOPC64Table<"DPP8">; diff --git a/llvm/lib/Target/ARC/ARCMachineFunctionInfo.cpp b/llvm/lib/Target/ARC/ARCMachineFunctionInfo.cpp index 9cd9661ae245..733f2f0a0499 100644 --- a/llvm/lib/Target/ARC/ARCMachineFunctionInfo.cpp +++ b/llvm/lib/Target/ARC/ARCMachineFunctionInfo.cpp @@ -11,3 +11,10 @@ using namespace llvm; void ARCFunctionInfo::anchor() {} + +MachineFunctionInfo * +ARCFunctionInfo::clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap + &Src2DstMBB) const { + return DestMF.cloneInfo(*this); +} diff --git a/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h b/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h index 968c6b63f423..454206037498 100644 --- a/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h +++ b/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h @@ -34,9 +34,13 @@ public: explicit ARCFunctionInfo(MachineFunction &MF) : ReturnStackOffsetSet(false), VarArgsFrameIndex(0), ReturnStackOffset(-1U), MaxCallStackReq(0) {} - ~ARCFunctionInfo() {} + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; + void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; } int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } diff --git a/llvm/lib/Target/ARC/ARCOptAddrMode.cpp b/llvm/lib/Target/ARC/ARCOptAddrMode.cpp index c956f00b628d..589c58e285bb 100644 --- a/llvm/lib/Target/ARC/ARCOptAddrMode.cpp +++ b/llvm/lib/Target/ARC/ARCOptAddrMode.cpp @@ -36,7 +36,7 @@ using namespace llvm; namespace llvm { static cl::opt ArcKillAddrMode("arc-kill-addr-mode", cl::init(0), - cl::ReallyHidden, cl::ZeroOrMore); + cl::ReallyHidden); #define DUMP_BEFORE() ((ArcKillAddrMode & 0x0001) != 0) #define DUMP_AFTER() ((ArcKillAddrMode & 0x0002) != 0) @@ -459,12 +459,12 @@ void ARCOptAddrMode::changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode, Register BaseReg = Ldst.getOperand(BasePos).getReg(); - Ldst.RemoveOperand(OffPos); - Ldst.RemoveOperand(BasePos); + Ldst.removeOperand(OffPos); + Ldst.removeOperand(BasePos); if (IsStore) { Src = Ldst.getOperand(BasePos - 1); - Ldst.RemoveOperand(BasePos - 1); + Ldst.removeOperand(BasePos - 1); } Ldst.setDesc(AST->getInstrInfo()->get(NewOpcode)); diff --git a/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/llvm/lib/Target/ARC/ARCTargetMachine.cpp index 52f74b729ff7..21757927d873 100644 --- a/llvm/lib/Target/ARC/ARCTargetMachine.cpp +++ b/llvm/lib/Target/ARC/ARCTargetMachine.cpp @@ -21,7 +21,7 @@ using namespace llvm; static Reloc::Model getRelocModel(Optional RM) { - return RM.getValueOr(Reloc::Static); + return RM.value_or(Reloc::Static); } /// ARCTargetMachine ctor - Create an ILP32 architecture model @@ -84,6 +84,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARCTarget() { } TargetTransformInfo -ARCTargetMachine::getTargetTransformInfo(const Function &F) { +ARCTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(ARCTTIImpl(this, F)); } diff --git a/llvm/lib/Target/ARC/ARCTargetMachine.h b/llvm/lib/Target/ARC/ARCTargetMachine.h index c5e8c3f2936d..81ccfc6d5dd0 100644 --- a/llvm/lib/Target/ARC/ARCTargetMachine.h +++ b/llvm/lib/Target/ARC/ARCTargetMachine.h @@ -39,7 +39,7 @@ public: // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } diff --git a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp index bb5336931932..618101755904 100644 --- a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp +++ b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp @@ -16,8 +16,8 @@ #include "MCTargetDesc/ARCMCTargetDesc.h" #include "TargetInfo/ARCTargetInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -83,41 +83,43 @@ static bool readInstruction16(ArrayRef Bytes, uint64_t Address, } template -static DecodeStatus DecodeSignedOperand(MCInst &Inst, unsigned InsnS, - uint64_t Address = 0, - const void *Decoder = nullptr); +static DecodeStatus +DecodeSignedOperand(MCInst &Inst, unsigned InsnS, uint64_t Address = 0, + const MCDisassembler *Decoder = nullptr); template -static DecodeStatus DecodeFromCyclicRange(MCInst &Inst, unsigned InsnS, - uint64_t Address = 0, - const void *Decoder = nullptr); +static DecodeStatus +DecodeFromCyclicRange(MCInst &Inst, unsigned InsnS, uint64_t Address = 0, + const MCDisassembler *Decoder = nullptr); template static DecodeStatus DecodeBranchTargetS(MCInst &Inst, unsigned InsnS, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); -static DecodeStatus DecodeMEMrs9(MCInst &, unsigned, uint64_t, const void *); +static DecodeStatus DecodeMEMrs9(MCInst &, unsigned, uint64_t, + const MCDisassembler *); static DecodeStatus DecodeLdLImmInstruction(MCInst &, uint64_t, uint64_t, - const void *); + const MCDisassembler *); static DecodeStatus DecodeStLImmInstruction(MCInst &, uint64_t, uint64_t, - const void *); + const MCDisassembler *); static DecodeStatus DecodeLdRLImmInstruction(MCInst &, uint64_t, uint64_t, - const void *); + const MCDisassembler *); static DecodeStatus DecodeSOPwithRS12(MCInst &, uint64_t, uint64_t, - const void *); + const MCDisassembler *); static DecodeStatus DecodeSOPwithRU6(MCInst &, uint64_t, uint64_t, - const void *); + const MCDisassembler *); static DecodeStatus DecodeCCRU6Instruction(MCInst &, uint64_t, uint64_t, - const void *); + const MCDisassembler *); static DecodeStatus DecodeMoveHRegInstruction(MCInst &Inst, uint64_t, uint64_t, - const void *); + const MCDisassembler *); static const uint16_t GPR32DecoderTable[] = { ARC::R0, ARC::R1, ARC::R2, ARC::R3, ARC::R4, ARC::R5, ARC::R6, @@ -128,7 +130,7 @@ static const uint16_t GPR32DecoderTable[] = { static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= 32) { LLVM_DEBUG(dbgs() << "Not a GPR32 register."); return MCDisassembler::Fail; @@ -140,8 +142,8 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeGBR32ShortRegister(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // Enumerates registers from ranges [r0-r3],[r12-r15]. if (RegNo > 3) RegNo += 8; // 4 for r12, etc... @@ -165,7 +167,7 @@ static unsigned decodeAField(unsigned Insn) { } static DecodeStatus DecodeMEMrs9(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Dec) { + const MCDisassembler *Dec) { // We have the 9-bit immediate in the low bits, 6-bit register in high bits. unsigned S9 = Insn & 0x1ff; unsigned R = (Insn & (0x7fff & ~0x1ff)) >> 9; @@ -175,17 +177,16 @@ static DecodeStatus DecodeMEMrs9(MCInst &Inst, unsigned Insn, uint64_t Address, } static bool DecodeSymbolicOperand(MCInst &Inst, uint64_t Address, - uint64_t Value, const void *Decoder) { + uint64_t Value, + const MCDisassembler *Decoder) { static const uint64_t AtLeast = 2; - // TODO: Try to force emitter to use MCDisassembler* instead of void*. - auto Disassembler = static_cast(Decoder); - return (nullptr != Disassembler && - Disassembler->tryAddingSymbolicOperand(Inst, Value, Address, true, 0, - AtLeast)); + return (nullptr != Decoder && Decoder->tryAddingSymbolicOperand( + Inst, Value, Address, true, 0, AtLeast, 0)); } static void DecodeSymbolicOperandOff(MCInst &Inst, uint64_t Address, - uint64_t Offset, const void *Decoder) { + uint64_t Offset, + const MCDisassembler *Decoder) { uint64_t NextAddress = Address + Offset; if (!DecodeSymbolicOperand(Inst, Address, NextAddress, Decoder)) @@ -194,7 +195,8 @@ static void DecodeSymbolicOperandOff(MCInst &Inst, uint64_t Address, template static DecodeStatus DecodeBranchTargetS(MCInst &Inst, unsigned InsnS, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { static_assert(B > 0, "field is empty"); DecodeSymbolicOperandOff(Inst, Address, SignExtend32(InsnS), Decoder); @@ -204,7 +206,7 @@ static DecodeStatus DecodeBranchTargetS(MCInst &Inst, unsigned InsnS, template static DecodeStatus DecodeSignedOperand(MCInst &Inst, unsigned InsnS, uint64_t /*Address*/, - const void * /*Decoder*/) { + const MCDisassembler * /*Decoder*/) { static_assert(B > 0, "field is empty"); Inst.addOperand(MCOperand::createImm( @@ -215,7 +217,7 @@ static DecodeStatus DecodeSignedOperand(MCInst &Inst, unsigned InsnS, template static DecodeStatus DecodeFromCyclicRange(MCInst &Inst, unsigned InsnS, uint64_t /*Address*/, - const void * /*Decoder*/) { + const MCDisassembler * /*Decoder*/) { static_assert(B > 0, "field is empty"); const unsigned max = (1u << B) - 1; @@ -226,7 +228,7 @@ static DecodeStatus DecodeFromCyclicRange(MCInst &Inst, unsigned InsnS, static DecodeStatus DecodeStLImmInstruction(MCInst &Inst, uint64_t Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned SrcC, DstB, LImm; DstB = decodeBField(Insn); if (DstB != 62) { @@ -243,7 +245,7 @@ static DecodeStatus DecodeStLImmInstruction(MCInst &Inst, uint64_t Insn, static DecodeStatus DecodeLdLImmInstruction(MCInst &Inst, uint64_t Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned DstA, SrcB, LImm; LLVM_DEBUG(dbgs() << "Decoding LdLImm:\n"); SrcB = decodeBField(Insn); @@ -261,7 +263,7 @@ static DecodeStatus DecodeLdLImmInstruction(MCInst &Inst, uint64_t Insn, static DecodeStatus DecodeLdRLImmInstruction(MCInst &Inst, uint64_t Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned DstA, SrcB; LLVM_DEBUG(dbgs() << "Decoding LdRLimm\n"); DstA = decodeAField(Insn); @@ -278,7 +280,7 @@ static DecodeStatus DecodeLdRLImmInstruction(MCInst &Inst, uint64_t Insn, static DecodeStatus DecodeMoveHRegInstruction(MCInst &Inst, uint64_t Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { LLVM_DEBUG(dbgs() << "Decoding MOV_S h-register\n"); using Field = decltype(Insn); Field H = fieldFromInstruction(Insn, 5, 3) | @@ -304,7 +306,7 @@ static DecodeStatus DecodeMoveHRegInstruction(MCInst &Inst, uint64_t Insn, static DecodeStatus DecodeCCRU6Instruction(MCInst &Inst, uint64_t Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned DstB; LLVM_DEBUG(dbgs() << "Decoding CCRU6 instruction:\n"); DstB = decodeBField(Insn); @@ -318,7 +320,8 @@ static DecodeStatus DecodeCCRU6Instruction(MCInst &Inst, uint64_t Insn, } static DecodeStatus DecodeSOPwithRU6(MCInst &Inst, uint64_t Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned DstB = decodeBField(Insn); DecodeGPR32RegisterClass(Inst, DstB, Address, Decoder); using Field = decltype(Insn); @@ -328,7 +331,8 @@ static DecodeStatus DecodeSOPwithRU6(MCInst &Inst, uint64_t Insn, } static DecodeStatus DecodeSOPwithRS12(MCInst &Inst, uint64_t Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned DstB = decodeBField(Insn); DecodeGPR32RegisterClass(Inst, DstB, Address, Decoder); using Field = decltype(Insn); diff --git a/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/llvm/lib/Target/ARM/A15SDOptimizer.cpp index d0efecad63bc..65da95b0fc8d 100644 --- a/llvm/lib/Target/ARM/A15SDOptimizer.cpp +++ b/llvm/lib/Target/ARM/A15SDOptimizer.cpp @@ -361,9 +361,8 @@ void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, MI = Front.pop_back_val(); // If we have already explored this MachineInstr, ignore it. - if (Reached.find(MI) != Reached.end()) + if (!Reached.insert(MI).second) continue; - Reached.insert(MI); if (MI->isPHI()) { for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { Register Reg = MI->getOperand(I).getReg(); diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h index 979371bf7cf6..9990078cfdbb 100644 --- a/llvm/lib/Target/ARM/ARM.h +++ b/llvm/lib/Target/ARM/ARM.h @@ -57,6 +57,7 @@ Pass *createMVEGatherScatterLoweringPass(); FunctionPass *createARMSLSHardeningPass(); FunctionPass *createARMIndirectThunks(); Pass *createMVELaneInterleavingPass(); +FunctionPass *createARMFixCortexA57AES1742098Pass(); void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); @@ -77,6 +78,7 @@ void initializeMVETailPredicationPass(PassRegistry &); void initializeMVEGatherScatterLoweringPass(PassRegistry &); void initializeARMSLSHardeningPass(PassRegistry &); void initializeMVELaneInterleavingPass(PassRegistry &); +void initializeARMFixCortexA57AES1742098Pass(PassRegistry &); } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 27edf69b4abf..48559a89a30a 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -19,9 +19,11 @@ include "llvm/Target/Target.td" // ARM Subtarget state. // -def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", +// True if compiling for Thumb, false for ARM. +def ModeThumb : SubtargetFeature<"thumb-mode", "IsThumb", "true", "Thumb mode">; +// True if we're using software floating point features. def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat", "true", "Use software floating " "point features.">; @@ -48,14 +50,18 @@ def FeatureFPRegs64 : SubtargetFeature<"fpregs64", "HasFPRegs64", "true", "Enable 64-bit FP registers", [FeatureFPRegs]>; +// True if the floating point unit supports double precision. def FeatureFP64 : SubtargetFeature<"fp64", "HasFP64", "true", "Floating point unit supports " "double precision", [FeatureFPRegs64]>; +// True if subtarget has the full 32 double precision FP registers for VFPv3. def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true", "Extend FP to 32 double registers">; +/// Versions of the VFP flags restricted to single precision, or to +/// 16 d-registers, or both. multiclass VFPver prev, list otherimplies, @@ -100,6 +106,7 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; +// True if subtarget supports half-precision FP conversions. def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision " "floating point">; @@ -110,169 +117,211 @@ defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions", defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP", [FeatureVFP4], []>; +// True if subtarget supports half-precision FP operations. def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", "Enable full half-precision " "floating point", [FeatureFPARMv8_D16_SP, FeatureFPRegs16]>; +// True if subtarget supports half-precision FP fml operations. def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true", "Enable full half-precision " "floating point fml instructions", [FeatureFullFP16]>; +// True if subtarget supports [su]div in Thumb mode. def FeatureHWDivThumb : SubtargetFeature<"hwdiv", - "HasHardwareDivideInThumb", "true", + "HasDivideInThumbMode", "true", "Enable divide instructions in Thumb">; +// True if subtarget supports [su]div in ARM mode. def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", - "HasHardwareDivideInARM", "true", + "HasDivideInARMMode", "true", "Enable divide instructions in ARM mode">; // Atomic Support + +// True if the subtarget supports DMB / DSB data barrier instructions. def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", "Has data barrier (dmb/dsb) instructions">; +// True if the subtarget supports CLREX instructions. def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", "Has v7 clrex instruction">; +// True if the subtarget supports DFB data barrier instruction. def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true", "Has full data barrier (dfb) instruction">; +// True if the subtarget supports v8 atomics (LDA/LDAEX etc) instructions. def FeatureAcquireRelease : SubtargetFeature<"acquire-release", "HasAcquireRelease", "true", "Has v8 acquire/release (lda/ldaex " " etc) instructions">; -def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", +// True if floating point compare + branch is slow. +def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "IsFPBrccSlow", "true", "FP compare + branch is slow">; +// True if the processor supports the Performance Monitor Extensions. These +// include a generic cycle-counter as well as more fine-grained (often +// implementation-specific) events. def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", "Enable support for Performance " "Monitor extensions">; // TrustZone Security Extensions + +// True if processor supports TrustZone security extensions. def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", "Enable support for TrustZone " "security extensions">; +// True if processor supports ARMv8-M Security Extensions. def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", "Enable support for ARMv8-M " "Security Extensions">; +// True if processor supports SHA1 and SHA256. def FeatureSHA2 : SubtargetFeature<"sha2", "HasSHA2", "true", "Enable SHA1 and SHA256 support", [FeatureNEON]>; def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", "Enable AES support", [FeatureNEON]>; +// True if processor supports Cryptography extensions. def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", "Enable support for " "Cryptography extensions", [FeatureNEON, FeatureSHA2, FeatureAES]>; +// True if processor supports CRC instructions. def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable support for CRC instructions">; +// True if the ARMv8.2A dot product instructions are supported. def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true", "Enable support for dot product instructions", [FeatureNEON]>; -// Not to be confused with FeatureHasRetAddrStack (return address stack) +// True if the processor supports RAS extensions. +// Not to be confused with FeatureHasRetAddrStack (return address stack). def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", "Enable Reliability, Availability " "and Serviceability extensions">; -// Fast computation of non-negative address offsets +// Fast computation of non-negative address offsets. +// True if processor does positive address offset computation faster. def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", "Enable fast computation of " "positive address offsets">; -// Fast execution of AES crypto operations +// Fast execution of AES crypto operations. +// True if processor executes back to back AES instruction pairs faster. def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", "CPU fuses AES crypto operations">; -// Fast execution of bottom and top halves of literal generation +// Fast execution of bottom and top halves of literal generation. +// True if processor executes back to back bottom and top halves of literal generation faster. def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true", "CPU fuses literal generation operations">; -// The way of reading thread pointer -def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true", +// The way of reading thread pointer. +// True if read thread pointer from coprocessor register. +def FeatureReadTp : SubtargetFeature<"read-tp-hard", "IsReadTPHard", "true", "Reading thread pointer from register">; // Cyclone can zero VFP registers in 0 cycles. +// True if the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are +// particularly effective at zeroing a VFP register. def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions">; -// Whether it is profitable to unpredicate certain instructions during if-conversion +// Whether it is profitable to unpredicate certain instructions during if-conversion. +// True if if conversion may decide to leave some instructions unpredicated. def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", "IsProfitableToUnpredicate", "true", "Is profitable to unpredicate">; // Some targets (e.g. Swift) have microcoded VGETLNi32. +// True if VMOV will be favored over VGETLNi32. def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", "HasSlowVGETLNi32", "true", "Has slow VGETLNi32 - prefer VMOV">; // Some targets (e.g. Swift) have microcoded VDUP32. +// True if VMOV will be favored over VDUP. def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", "true", "Has slow VDUP32 - prefer VMOV">; // Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON // for scalar FP, as this allows more effective execution domain optimization. +// True if VMOVSR will be favored over VMOVDRR. def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", "true", "Prefer VMOVSR">; // Swift has ISHST barriers compatible with Atomic Release semantics but weaker -// than ISH -def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST", +// than ISH. +// True if ISHST barriers will be used for Release semantics. +def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHSTBarriers", "true", "Prefer ISHST barriers">; // Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU. +// True if the AGU and NEON/FPU units are multiplexed. def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", "true", "Has muxed AGU and NEON/FPU">; // Whether VLDM/VSTM starting with odd register number need more microops -// than single VLDRS -def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister", +// than single VLDRS. +// True if a VLDM/VSTM starting with an odd register number is considered to +// take more microops than single VLDRS/VSTRS. +def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "HasSlowOddRegister", "true", "VLDM/VSTM starting " "with an odd register is slow">; // Some targets have a renaming dependency when loading into D subregisters. +// True if loading into a D subregister will be penalized. def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", - "SlowLoadDSubregister", "true", + "HasSlowLoadDSubregister", "true", "Loading into D subregs is slow">; +// True if use a wider stride when allocating VFP registers. def FeatureUseWideStrideVFP : SubtargetFeature<"wide-stride-vfp", "UseWideStrideVFP", "true", "Use a wide stride when allocating VFP registers">; // Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. +// True if VMOVS will never be widened to VMOVD. def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", "DontWidenVMOVS", "true", "Don't widen VMOVS to VMOVD">; // Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different // VFP register widths. +// True if splat a register between VFP and NEON instructions. def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon", - "SplatVFPToNeon", "true", + "UseSplatVFPToNeon", "true", "Splat register from VFP to NEON", [FeatureDontWidenVMOVS]>; // Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. +// True if run the MLx expansion pass. def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", "ExpandMLx", "true", "Expand VFP/NEON MLA/MLS instructions">; // Some targets have special RAW hazards for VFP/NEON VMLA/VMLS. +// True if VFP/NEON VMLA/VMLS have special RAW hazards. def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", "true", "Has VMLx hazards">; // Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from // VFP to NEON, as an execution domain optimization. +// True if VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs", "true", "Convert VMOVSR, VMOVRS, " @@ -281,18 +330,21 @@ def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", // Some processors benefit from using NEON instructions for scalar // single-precision FP operations. This affects instruction selection and should // only be enabled if the handling of denormals is not important. +// Use the method useNEONForSinglePrecisionFP() to determine if NEON should actually be used. def FeatureNEONForFP : SubtargetFeature<"neonfp", - "UseNEONForSinglePrecisionFP", + "HasNEONForFP", "true", "Use NEON for single precision FP">; // On some processors, VLDn instructions that access unaligned data take one // extra cycle. Take that into account when computing operand latencies. -def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign", +// True if VLDn instructions take an extra cycle for unaligned accesses. +def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAccessAlignment", "true", "Check for VLDn unaligned access">; // Some processors have a nonpipelined VFP coprocessor. +// True if VFP instructions are not pipelined. def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", "NonpipelinedVFP", "true", "VFP instructions are not pipelined">; @@ -300,20 +352,27 @@ def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better // to just not use them. +// If the VFP2 / NEON instructions are available, indicates +// whether the FP VML[AS] instructions are slow (if so, don't use them). def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", "Disable VFP / NEON MAC instructions">; -// VFPv4 added VFMA instructions that can similar be fast or slow. +// VFPv4 added VFMA instructions that can similarly be fast or slow. +// If the VFP4 / NEON instructions are available, indicates +// whether the FP VFM[AS] instructions are slow (if so, don't use them). def FeatureHasSlowFPVFMx : SubtargetFeature<"slowfpvfmx", "SlowFPVFMx", "true", "Disable VFP / NEON FMA instructions">; // Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. +/// True if NEON has special multiplier accumulator +/// forwarding to allow mul + mla being issued back to back. def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", "HasVMLxForwarding", "true", "Has multiplier accumulator forwarding">; // Disable 32-bit to 16-bit narrowing for experimentation. -def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", +// True if codegen would prefer 32-bit Thumb instructions over 16-bit ones. +def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Prefers32BitThumb", "true", "Prefer 32-bit Thumb instrs">; def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopLogAlignment","2", @@ -332,17 +391,22 @@ def FeatureMVEVectorCostFactor4 : SubtargetFeature<"mve4beat", "MVEVectorCostFac /// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is /// mapped to a separate physical register. Avoid partial CPSR update for these /// processors. +/// True if codegen would avoid using instructions +/// that partially update CPSR and add false dependency on the previous +/// CPSR setting instruction. def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; /// Disable +1 predication cost for instructions updating CPSR. /// Enabled for Cortex-A57. +/// True if disable +1 predication cost for instructions updating CPSR. Enabled for Cortex-A57. def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", "CheapPredicableCPSRDef", "true", "Disable +1 predication cost for instructions updating CPSR">; +// True if codegen should avoid using flag setting movs with shifter operand (i.e. asr, lsl, lsr). def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", "AvoidMOVsShifterOperand", "true", "Avoid movs instructions with " @@ -357,16 +421,20 @@ def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", // Some processors have no branch predictor, which changes the expected cost of // taking a branch which affects the choice of whether to use predicated // instructions. +// True if the subtarget has a branch predictor. Having +// a branch predictor or not changes the expected cost of taking a branch +// which affects the choice of whether to use predicated instructions. def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor", "HasBranchPredictor", "false", "Has no branch predictor">; /// DSP extension. +/// True if the subtarget supports the DSP (saturating arith and such) instructions. def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Supports DSP instructions in " "ARM and/or Thumb2">; -// Multiprocessing extension. +// True if the subtarget supports Multiprocessing extension (ARMv7 only). def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", "Supports Multiprocessing extension">; @@ -378,31 +446,42 @@ def FeatureVirtualization : SubtargetFeature<"virtualization", // Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. // See ARMInstrInfo.td for details. +// True if NaCl TRAP instruction is generated instead of the regular TRAP. def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", "NaCl trap">; +// True if the subtarget disallows unaligned memory +// accesses for some types. For details, see +// ARMTargetLowering::allowsMisalignedMemoryAccesses(). def FeatureStrictAlign : SubtargetFeature<"strict-align", "StrictAlign", "true", "Disallow all unaligned memory " "access">; +// Generate calls via indirect call instructions. def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", "Generate calls via indirect call " "instructions">; +// Generate code that does not contain data access to code sections. def FeatureExecuteOnly : SubtargetFeature<"execute-only", "GenExecuteOnly", "true", "Enable the generation of " "execute only code.">; +// True if R9 is not available as a general purpose register. def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", "Reserve R9, making it unavailable" " as GPR">; +// True if MOVT / MOVW pairs are not used for materialization of +// 32-bit imms (including global addresses). def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", "Don't use movt/movw pairs for " "32-bit imms">; +/// Implicitly convert an instruction to a different one if its immediates +/// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1. def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", "NegativeImmediates", "false", @@ -415,28 +494,39 @@ def FeatureNoNegativeImmediates def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true", "Use the MachineScheduler">; +// Use the MachinePipeliner for instruction scheduling for the subtarget. +def FeatureUseMIPipeliner: SubtargetFeature<"use-mipipeliner", "UseMIPipeliner", "true", + "Use the MachinePipeliner">; + +// False if scheduling should happen again after register allocation. def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler", "DisablePostRAScheduler", "true", "Don't schedule again after register allocation">; // Armv8.5-A extensions +// Has speculation barrier. def FeatureSB : SubtargetFeature<"sb", "HasSB", "true", "Enable v8.5a Speculation Barrier" >; // Armv8.6-A extensions + +// True if subtarget supports BFloat16 floating point operations. def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", "true", "Enable support for BFloat16 instructions", [FeatureNEON]>; +// True if subtarget supports 8-bit integer matrix multiply. def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8", "true", "Enable Matrix Multiply Int8 Extension", [FeatureNEON]>; // Armv8.1-M extensions +// True if the processor supports the Low Overhead Branch extension. def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true", "Enable Low Overhead Branch " "extensions">; +// Mitigate against the cve-2021-35465 security vulnurability. def FeatureFixCMSE_CVE_2021_35465 : SubtargetFeature<"fix-cmse-cve-2021-35465", "FixCMSE_CVE_2021_35465", "true", "Mitigate against the cve-2021-35465 " @@ -446,11 +536,26 @@ def FeaturePACBTI : SubtargetFeature<"pacbti", "HasPACBTI", "true", "Enable Pointer Authentication and Branch " "Target Identification">; +/// Don't place a BTI instruction after return-twice constructs (setjmp). def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice", "NoBTIAtReturnTwice", "true", "Don't place a BTI instruction " "after a return-twice">; +def FeatureFixCortexA57AES1742098 : SubtargetFeature<"fix-cortex-a57-aes-1742098", + "FixCortexA57AES1742098", "true", + "Work around Cortex-A57 Erratum 1742098 / Cortex-A72 Erratum 1655431 (AES)">; + +def FeatureAAPCSFrameChain : SubtargetFeature<"aapcs-frame-chain", + "CreateAAPCSFrameChain", "true", + "Create an AAPCS compliant frame chain">; + +def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf", + "CreateAAPCSFrameChainLeaf", "true", + "Create an AAPCS compliant frame chain " + "for leaf functions", + [FeatureAAPCSFrameChain]>; + //===----------------------------------------------------------------------===// // ARM architecture class // @@ -467,16 +572,18 @@ def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", "Is microcontroller profile ('M' series)">; - +// True if Thumb2 instructions are supported. def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; +// True if subtarget does not support ARM mode execution. def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", "Does not support ARM mode execution">; //===----------------------------------------------------------------------===// // ARM ISAa. // +// Specify whether target support specific ARM ISA variants. def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; @@ -599,13 +706,16 @@ foreach i = {0-7} in // Control codegen mitigation against Straight Line Speculation vulnerability. //===----------------------------------------------------------------------===// +/// Harden against Straight Line Speculation for Returns and Indirect Branches. def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr", "HardenSlsRetBr", "true", "Harden against straight line speculation across RETurn and BranchRegister " "instructions">; +/// Harden against Straight Line Speculation for indirect calls. def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr", "HardenSlsBlr", "true", "Harden against straight line speculation across indirect calls">; +/// Generate thunk code for SLS mitigation in the normal text section. def FeatureHardenSlsNoComdat : SubtargetFeature<"harden-sls-nocomdat", "HardenSlsNoComdat", "true", "Generate thunk code for SLS mitigation in the normal text section">; @@ -1303,6 +1413,7 @@ def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em, def : ProcessorModel<"cortex-m7", CortexM7Model, [ARMv7em, ProcM7, FeatureFPARMv8_D16, + FeatureUseMIPipeliner, FeatureUseMISched]>; def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, @@ -1370,13 +1481,15 @@ def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, FeatureCRC, FeatureFPAO, FeatureAvoidPartialCPSR, - FeatureCheapPredicableCPSR]>; + FeatureCheapPredicableCPSR, + FeatureFixCortexA57AES1742098]>; def : ProcessorModel<"cortex-a72", CortexA57Model, [ARMv8a, ProcA72, FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, - FeatureCRC]>; + FeatureCRC, + FeatureFixCortexA57AES1742098]>; def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, FeatureHWDivThumb, diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index fa09b2567aa9..4aa28bc5d28d 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -161,10 +161,10 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { : COFF::IMAGE_SYM_CLASS_EXTERNAL; int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; - OutStreamer->BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer->EmitCOFFSymbolStorageClass(Scl); - OutStreamer->EmitCOFFSymbolType(Type); - OutStreamer->EndCOFFSymbolDef(); + OutStreamer->beginCOFFSymbolDef(CurrentFnSym); + OutStreamer->emitCOFFSymbolStorageClass(Scl); + OutStreamer->emitCOFFSymbolType(Type); + OutStreamer->endCOFFSymbolDef(); } // Emit the rest of the function body. @@ -535,27 +535,27 @@ void ARMAsmPrinter::emitEndOfAsmFile(Module &M) { if (!Stubs.empty()) { // Switch with ".non_lazy_symbol_pointer" directive. - OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); + OutStreamer->switchSection(TLOFMacho.getNonLazySymbolPointerSection()); emitAlignment(Align(4)); for (auto &Stub : Stubs) emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second); Stubs.clear(); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } Stubs = MMIMacho.GetThreadLocalGVStubList(); if (!Stubs.empty()) { // Switch with ".non_lazy_symbol_pointer" directive. - OutStreamer->SwitchSection(TLOFMacho.getThreadLocalPointerSection()); + OutStreamer->switchSection(TLOFMacho.getThreadLocalPointerSection()); emitAlignment(Align(4)); for (auto &Stub : Stubs) emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second); Stubs.clear(); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } // Funny Darwin hack: This flag tells the linker that no global symbols @@ -740,55 +740,53 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format, ARMBuildAttrs::FP16FormatIEEE); - if (MMI) { - if (const Module *SourceModule = MMI->getModule()) { - // ABI_PCS_wchar_t to indicate wchar_t width - // FIXME: There is no way to emit value 0 (wchar_t prohibited). - if (auto WCharWidthValue = mdconst::extract_or_null( - SourceModule->getModuleFlag("wchar_size"))) { - int WCharWidth = WCharWidthValue->getZExtValue(); - assert((WCharWidth == 2 || WCharWidth == 4) && - "wchar_t width must be 2 or 4 bytes"); - ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth); - } + if (const Module *SourceModule = MMI->getModule()) { + // ABI_PCS_wchar_t to indicate wchar_t width + // FIXME: There is no way to emit value 0 (wchar_t prohibited). + if (auto WCharWidthValue = mdconst::extract_or_null( + SourceModule->getModuleFlag("wchar_size"))) { + int WCharWidth = WCharWidthValue->getZExtValue(); + assert((WCharWidth == 2 || WCharWidth == 4) && + "wchar_t width must be 2 or 4 bytes"); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth); + } - // ABI_enum_size to indicate enum width - // FIXME: There is no way to emit value 0 (enums prohibited) or value 3 - // (all enums contain a value needing 32 bits to encode). - if (auto EnumWidthValue = mdconst::extract_or_null( - SourceModule->getModuleFlag("min_enum_size"))) { - int EnumWidth = EnumWidthValue->getZExtValue(); - assert((EnumWidth == 1 || EnumWidth == 4) && - "Minimum enum width must be 1 or 4 bytes"); - int EnumBuildAttr = EnumWidth == 1 ? 1 : 2; - ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr); - } + // ABI_enum_size to indicate enum width + // FIXME: There is no way to emit value 0 (enums prohibited) or value 3 + // (all enums contain a value needing 32 bits to encode). + if (auto EnumWidthValue = mdconst::extract_or_null( + SourceModule->getModuleFlag("min_enum_size"))) { + int EnumWidth = EnumWidthValue->getZExtValue(); + assert((EnumWidth == 1 || EnumWidth == 4) && + "Minimum enum width must be 1 or 4 bytes"); + int EnumBuildAttr = EnumWidth == 1 ? 1 : 2; + ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr); + } - auto *PACValue = mdconst::extract_or_null( - SourceModule->getModuleFlag("sign-return-address")); - if (PACValue && PACValue->getZExtValue() == 1) { - // If "+pacbti" is used as an architecture extension, - // Tag_PAC_extension is emitted in - // ARMTargetStreamer::emitTargetAttributes(). - if (!STI.hasPACBTI()) { - ATS.emitAttribute(ARMBuildAttrs::PAC_extension, - ARMBuildAttrs::AllowPACInNOPSpace); - } - ATS.emitAttribute(ARMBuildAttrs::PACRET_use, ARMBuildAttrs::PACRETUsed); + auto *PACValue = mdconst::extract_or_null( + SourceModule->getModuleFlag("sign-return-address")); + if (PACValue && PACValue->getZExtValue() == 1) { + // If "+pacbti" is used as an architecture extension, + // Tag_PAC_extension is emitted in + // ARMTargetStreamer::emitTargetAttributes(). + if (!STI.hasPACBTI()) { + ATS.emitAttribute(ARMBuildAttrs::PAC_extension, + ARMBuildAttrs::AllowPACInNOPSpace); } + ATS.emitAttribute(ARMBuildAttrs::PACRET_use, ARMBuildAttrs::PACRETUsed); + } - auto *BTIValue = mdconst::extract_or_null( - SourceModule->getModuleFlag("branch-target-enforcement")); - if (BTIValue && BTIValue->getZExtValue() == 1) { - // If "+pacbti" is used as an architecture extension, - // Tag_BTI_extension is emitted in - // ARMTargetStreamer::emitTargetAttributes(). - if (!STI.hasPACBTI()) { - ATS.emitAttribute(ARMBuildAttrs::BTI_extension, - ARMBuildAttrs::AllowBTIInNOPSpace); - } - ATS.emitAttribute(ARMBuildAttrs::BTI_use, ARMBuildAttrs::BTIUsed); + auto *BTIValue = mdconst::extract_or_null( + SourceModule->getModuleFlag("branch-target-enforcement")); + if (BTIValue && BTIValue->getZExtValue() == 1) { + // If "+pacbti" is used as an architecture extension, + // Tag_BTI_extension is emitted in + // ARMTargetStreamer::emitTargetAttributes(). + if (!STI.hasPACBTI()) { + ATS.emitAttribute(ARMBuildAttrs::BTI_extension, + ARMBuildAttrs::AllowBTIInNOPSpace); } + ATS.emitAttribute(ARMBuildAttrs::BTI_use, ARMBuildAttrs::BTIUsed); } } @@ -2276,6 +2274,47 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, TmpInstSB); return; } + + case ARM::SEH_StackAlloc: + ATS.emitARMWinCFIAllocStack(MI->getOperand(0).getImm(), + MI->getOperand(1).getImm()); + return; + + case ARM::SEH_SaveRegs: + case ARM::SEH_SaveRegs_Ret: + ATS.emitARMWinCFISaveRegMask(MI->getOperand(0).getImm(), + MI->getOperand(1).getImm()); + return; + + case ARM::SEH_SaveSP: + ATS.emitARMWinCFISaveSP(MI->getOperand(0).getImm()); + return; + + case ARM::SEH_SaveFRegs: + ATS.emitARMWinCFISaveFRegs(MI->getOperand(0).getImm(), + MI->getOperand(1).getImm()); + return; + + case ARM::SEH_SaveLR: + ATS.emitARMWinCFISaveLR(MI->getOperand(0).getImm()); + return; + + case ARM::SEH_Nop: + case ARM::SEH_Nop_Ret: + ATS.emitARMWinCFINop(MI->getOperand(0).getImm()); + return; + + case ARM::SEH_PrologEnd: + ATS.emitARMWinCFIPrologEnd(/*Fragment=*/false); + return; + + case ARM::SEH_EpilogStart: + ATS.emitARMWinCFIEpilogStart(ARMCC::AL); + return; + + case ARM::SEH_EpilogEnd: + ATS.emitARMWinCFIEpilogEnd(); + return; } MCInst TmpInst; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 5b0bae4d9274..80ba7b5f0d2e 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -343,6 +343,13 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, } // Branch analysis. +// Cond vector output format: +// 0 elements indicates an unconditional branch +// 2 elements indicates a conditional branch; the elements are +// the condition to check and the CPSR. +// 3 elements indicates a hardware loop end; the elements +// are the opcode, the operand value to test, and a dummy +// operand used to pad out to 3 operands. bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -394,6 +401,17 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } else if (I->isReturn()) { // Returns can't be analyzed, but we should run cleanup. CantAnalyze = true; + } else if (I->getOpcode() == ARM::t2LoopEnd && + MBB.getParent() + ->getSubtarget() + .enableMachinePipeliner()) { + if (!Cond.empty()) + return true; + FBB = TBB; + TBB = I->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); + Cond.push_back(I->getOperand(0)); + Cond.push_back(MachineOperand::CreateImm(0)); } else { // We encountered other unrecognized terminator. Bail out immediately. return true; @@ -457,7 +475,7 @@ unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, return 0; if (!isUncondBranchOpcode(I->getOpcode()) && - !isCondBranchOpcode(I->getOpcode())) + !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) return 0; // Remove the branch. @@ -467,7 +485,7 @@ unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, if (I == MBB.begin()) return 1; --I; - if (!isCondBranchOpcode(I->getOpcode())) + if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) return 1; // Remove the branch. @@ -491,8 +509,8 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 2 || Cond.size() == 0) && - "ARM branch conditions have two components!"); + assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) && + "ARM branch conditions have two or three components!"); // For conditional branches, we use addOperand to preserve CPSR flags. @@ -502,19 +520,24 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL)); else BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); - } else + } else if (Cond.size() == 2) { BuildMI(&MBB, DL, get(BccOpc)) .addMBB(TBB) .addImm(Cond[0].getImm()) .add(Cond[1]); + } else + BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB); return 1; } // Two-way conditional branch. - BuildMI(&MBB, DL, get(BccOpc)) - .addMBB(TBB) - .addImm(Cond[0].getImm()) - .add(Cond[1]); + if (Cond.size() == 2) + BuildMI(&MBB, DL, get(BccOpc)) + .addMBB(TBB) + .addImm(Cond[0].getImm()) + .add(Cond[1]); + else if (Cond.size() == 3) + BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB); if (isThumb) BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL)); else @@ -524,9 +547,12 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, bool ARMBaseInstrInfo:: reverseBranchCondition(SmallVectorImpl &Cond) const { - ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); - Cond[0].setImm(ARMCC::getOppositeCondition(CC)); - return false; + if (Cond.size() == 2) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); + Cond[0].setImm(ARMCC::getOppositeCondition(CC)); + return false; + } + return true; } bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { @@ -556,7 +582,7 @@ std::string ARMBaseInstrInfo::createMIROperandComment( return GenericComment; // If not, check if we have an immediate operand. - if (Op.getType() != MachineOperand::MO_Immediate) + if (!Op.isImm()) return std::string(); // And print its corresponding condition code if the immediate is a @@ -1703,7 +1729,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { // or some other super-register. int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD); if (ImpDefIdx != -1) - MI.RemoveOperand(ImpDefIdx); + MI.removeOperand(ImpDefIdx); // Change the opcode and operands. MI.setDesc(get(ARM::VMOVD)); @@ -2045,6 +2071,9 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI, if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) return true; + if (isSEHInstruction(MI)) + return true; + // Treat the start of the IT block as a scheduling boundary, but schedule // t2IT along with all instructions following it. // FIXME: This is a big hammer. But the alternative is to add all potential @@ -2598,7 +2627,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, // ahead: strip all existing registers off and add them back again // in the right order. for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) - MI->RemoveOperand(i); + MI->removeOperand(i); // Add the complete list back in. MachineInstrBuilder MIB(MF, &*MI); @@ -2626,7 +2655,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Turn it into a move. MI.setDesc(TII.get(ARM::MOVr)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - MI.RemoveOperand(FrameRegIdx+1); + MI.removeOperand(FrameRegIdx+1); Offset = 0; return true; } else if (Offset < 0) { @@ -5103,7 +5132,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, SrcReg = MI.getOperand(1).getReg(); for (unsigned i = MI.getDesc().getNumOperands(); i; --i) - MI.RemoveOperand(i - 1); + MI.removeOperand(i - 1); // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) MI.setDesc(get(ARM::VORRd)); @@ -5122,7 +5151,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, SrcReg = MI.getOperand(1).getReg(); for (unsigned i = MI.getDesc().getNumOperands(); i; --i) - MI.RemoveOperand(i - 1); + MI.removeOperand(i - 1); DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); @@ -5155,7 +5184,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, break; for (unsigned i = MI.getDesc().getNumOperands(); i; --i) - MI.RemoveOperand(i - 1); + MI.removeOperand(i - 1); // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) // Again DDst may be undefined at the beginning of this instruction. @@ -5190,7 +5219,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, break; for (unsigned i = MI.getDesc().getNumOperands(); i; --i) - MI.RemoveOperand(i - 1); + MI.removeOperand(i - 1); if (DSrc == DDst) { // Destination can be: @@ -5766,26 +5795,25 @@ struct OutlinerCosts { SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {} }; -unsigned -ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { - assert(C.LRUWasSet && "LRU wasn't set?"); +Register +ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const { MachineFunction *MF = C.getMF(); - const ARMBaseRegisterInfo *ARI = static_cast( - MF->getSubtarget().getRegisterInfo()); + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + const ARMBaseRegisterInfo *ARI = + static_cast(&TRI); BitVector regsReserved = ARI->getReservedRegs(*MF); // Check if there is an available register across the sequence that we can // use. - for (unsigned Reg : ARM::rGPRRegClass) { + for (Register Reg : ARM::rGPRRegClass) { if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) && Reg != ARM::LR && // LR is not reserved, but don't use it. Reg != ARM::R12 && // R12 is not guaranteed to be preserved. - C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) + C.isAvailableAcrossAndOutOfSeq(Reg, TRI) && + C.isAvailableInsideSeq(Reg, TRI)) return Reg; } - - // No suitable register. Return 0. - return 0u; + return Register(); } // Compute liveness of LR at the point after the interval [I, E), which @@ -5833,9 +5861,8 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( // Compute liveness information for each candidate, and set FlagsSetInAll. const TargetRegisterInfo &TRI = getRegisterInfo(); - std::for_each( - RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), - [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; }); + for (outliner::Candidate &C : RepeatedSequenceLocs) + FlagsSetInAll &= C.Flags; // According to the ARM Procedure Call Standard, the following are // undefined on entry/exit from a function call: @@ -5854,9 +5881,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( // to compute liveness here. if (C.Flags & UnsafeRegsDead) return false; - C.initLRU(TRI); - LiveRegUnits LRU = C.LRU; - return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR)); + return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI); }; // Are there any candidates where those registers are live? @@ -5969,7 +5994,6 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( std::vector CandidatesWithoutStackFixups; for (outliner::Candidate &C : RepeatedSequenceLocs) { - C.initLRU(TRI); // LR liveness is overestimated in return blocks, unless they end with a // tail call. const auto Last = C.getMBB()->rbegin(); @@ -5977,7 +6001,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( C.getMBB()->isReturnBlock() && !Last->isCall() ? isLRAvailable(TRI, Last, (MachineBasicBlock::reverse_iterator)C.front()) - : C.LRU.available(ARM::LR); + : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI); if (LRIsAvailable) { FrameID = MachineOutlinerNoLRSave; NumBytesNoStackCalls += Costs.CallNoLRSave; @@ -5996,7 +6020,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( // Is SP used in the sequence at all? If not, we don't have to modify // the stack, so we are guaranteed to get the same frame. - else if (C.UsedInSequence.available(ARM::SP)) { + else if (C.isAvailableInsideSeq(ARM::SP, TRI)) { NumBytesNoStackCalls += Costs.CallDefault; C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault); CandidatesWithoutStackFixups.push_back(C); @@ -6189,8 +6213,8 @@ bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, LiveRegUnits LRU(getRegisterInfo()); - std::for_each(MBB.rbegin(), MBB.rend(), - [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); + for (MachineInstr &MI : llvm::reverse(MBB)) + LRU.accumulate(MI); // Check if each of the unsafe registers are available... bool R12AvailableInBlock = LRU.available(ARM::R12); @@ -6635,7 +6659,7 @@ void ARMBaseInstrInfo::buildOutlinedFrame( MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, - MachineFunction &MF, const outliner::Candidate &C) const { + MachineFunction &MF, outliner::Candidate &C) const { MachineInstrBuilder MIB; MachineBasicBlock::iterator CallPt; unsigned Opc; @@ -6726,3 +6750,122 @@ unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) { : ARM::BLX_pred; } +namespace { +class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { + MachineInstr *EndLoop, *LoopCount; + MachineFunction *MF; + const TargetInstrInfo *TII; + + // Meanings of the various stuff with loop types: + // t2Bcc: + // EndLoop = branch at end of original BB that will become a kernel + // LoopCount = CC setter live into branch + // t2LoopEnd: + // EndLoop = branch at end of original BB + // LoopCount = t2LoopDec +public: + ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount) + : EndLoop(EndLoop), LoopCount(LoopCount), + MF(EndLoop->getParent()->getParent()), + TII(MF->getSubtarget().getInstrInfo()) {} + + bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { + // Only ignore the terminator. + return MI == EndLoop || MI == LoopCount; + } + + Optional createTripCountGreaterCondition( + int TC, MachineBasicBlock &MBB, + SmallVectorImpl &Cond) override { + + if (isCondBranchOpcode(EndLoop->getOpcode())) { + Cond.push_back(EndLoop->getOperand(1)); + Cond.push_back(EndLoop->getOperand(2)); + if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) { + TII->reverseBranchCondition(Cond); + } + return {}; + } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) { + // General case just lets the unrolled t2LoopDec do the subtraction and + // therefore just needs to check if zero has been reached. + MachineInstr *LoopDec = nullptr; + for (auto &I : MBB.instrs()) + if (I.getOpcode() == ARM::t2LoopDec) + LoopDec = &I; + assert(LoopDec && "Unable to find copied LoopDec"); + // Check if we're done with the loop. + BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri)) + .addReg(LoopDec->getOperand(0).getReg()) + .addImm(0) + .addImm(ARMCC::AL) + .addReg(ARM::NoRegister); + Cond.push_back(MachineOperand::CreateImm(ARMCC::EQ)); + Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false)); + return {}; + } else + llvm_unreachable("Unknown EndLoop"); + } + + void setPreheader(MachineBasicBlock *NewPreheader) override {} + + void adjustTripCount(int TripCountAdjust) override {} + + void disposed() override {} +}; +} // namespace + +std::unique_ptr +ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); + MachineBasicBlock *Preheader = *LoopBB->pred_begin(); + if (Preheader == LoopBB) + Preheader = *std::next(LoopBB->pred_begin()); + + if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) { + // If the branch is a Bcc, then the CPSR should be set somewhere within the + // block. We need to determine the reaching definition of CPSR so that + // it can be marked as non-pipelineable, allowing the pipeliner to force + // it into stage 0 or give up if it cannot or will not do so. + MachineInstr *CCSetter = nullptr; + for (auto &L : LoopBB->instrs()) { + if (L.isCall()) + return nullptr; + if (isCPSRDefined(L)) + CCSetter = &L; + } + if (CCSetter) + return std::make_unique(&*I, CCSetter); + else + return nullptr; // Unable to find the CC setter, so unable to guarantee + // that pipeline will work + } + + // Recognize: + // preheader: + // %1 = t2DoopLoopStart %0 + // loop: + // %2 = phi %1, , %..., %loop + // %3 = t2LoopDec %2, + // t2LoopEnd %3, %loop + + if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) { + for (auto &L : LoopBB->instrs()) + if (L.isCall()) + return nullptr; + else if (isVCTP(&L)) + return nullptr; + Register LoopDecResult = I->getOperand(0).getReg(); + MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); + MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult); + if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) + return nullptr; + MachineInstr *LoopStart = nullptr; + for (auto &J : Preheader->instrs()) + if (J.getOpcode() == ARM::t2DoLoopStart) + LoopStart = &J; + if (!LoopStart) + return nullptr; + return std::make_unique(&*I, LoopDec); + } + return nullptr; +} diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index defce07dd862..3b8f3403e3c3 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -360,7 +360,7 @@ public: MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - const outliner::Candidate &C) const override; + outliner::Candidate &C) const override; /// Enable outlining by default at -Oz. bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; @@ -372,10 +372,15 @@ public: MI->getOpcode() == ARM::t2WhileLoopStartTP; } + /// Analyze loop L, which must be a single-basic-block loop, and if the + /// conditions can be understood enough produce a PipelinerLoopInfo object. + std::unique_ptr + analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; + private: /// Returns an unused general-purpose register which can be used for /// constructing an outlined call if one exists. Returns 0 otherwise. - unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; + Register findRegisterToSaveLRTo(outliner::Candidate &C) const; /// Adds an instruction which saves the link register on top of the stack into /// the MachineBasicBlock \p MBB at position \p It. If \p Auth is true, @@ -752,6 +757,26 @@ static inline bool isValidCoprocessorNumber(unsigned Num, return true; } +static inline bool isSEHInstruction(const MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case ARM::SEH_StackAlloc: + case ARM::SEH_SaveRegs: + case ARM::SEH_SaveRegs_Ret: + case ARM::SEH_SaveSP: + case ARM::SEH_SaveFRegs: + case ARM::SEH_SaveLR: + case ARM::SEH_Nop: + case ARM::SEH_Nop_Ret: + case ARM::SEH_PrologEnd: + case ARM::SEH_EpilogStart: + case ARM::SEH_EpilogEnd: + return true; + default: + return false; + } +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index c543d02ff75a..1d0e743b94db 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -63,28 +63,26 @@ const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const ARMSubtarget &STI = MF->getSubtarget(); bool UseSplitPush = STI.splitFramePushPop(*MF); - const MCPhysReg *RegList = - STI.isTargetDarwin() - ? CSR_iOS_SaveList - : (UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList); - const Function &F = MF->getFunction(); + if (F.getCallingConv() == CallingConv::GHC) { // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around return CSR_NoRegs_SaveList; + } else if (STI.splitFramePointerPush(*MF)) { + return CSR_Win_SplitFP_SaveList; } else if (F.getCallingConv() == CallingConv::CFGuard_Check) { return CSR_Win_AAPCS_CFGuard_Check_SaveList; } else if (F.getCallingConv() == CallingConv::SwiftTail) { return STI.isTargetDarwin() ? CSR_iOS_SwiftTail_SaveList - : (UseSplitPush ? CSR_AAPCS_SplitPush_SwiftTail_SaveList + : (UseSplitPush ? CSR_ATPCS_SplitPush_SwiftTail_SaveList : CSR_AAPCS_SwiftTail_SaveList); } else if (F.hasFnAttribute("interrupt")) { if (STI.isMClass()) { // M-class CPUs have hardware which saves the registers needed to allow a // function conforming to the AAPCS to function as a handler. - return UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList; + return UseSplitPush ? CSR_ATPCS_SplitPush_SaveList : CSR_AAPCS_SaveList; } else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") { // Fast interrupt mode gives the handler a private copy of R8-R14, so less // need to be saved to restore user-mode state. @@ -101,7 +99,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (STI.isTargetDarwin()) return CSR_iOS_SwiftError_SaveList; - return UseSplitPush ? CSR_AAPCS_SplitPush_SwiftError_SaveList : + return UseSplitPush ? CSR_ATPCS_SplitPush_SwiftError_SaveList : CSR_AAPCS_SwiftError_SaveList; } @@ -109,7 +107,15 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return MF->getInfo()->isSplitCSR() ? CSR_iOS_CXX_TLS_PE_SaveList : CSR_iOS_CXX_TLS_SaveList; - return RegList; + + if (STI.isTargetDarwin()) + return CSR_iOS_SaveList; + + if (UseSplitPush) + return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList + : CSR_ATPCS_SplitPush_SaveList; + + return CSR_AAPCS_SaveList; } const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy( @@ -238,7 +244,7 @@ bool ARMBaseRegisterInfo::isInlineAsmReadOnlyReg(const MachineFunction &MF, BitVector Reserved(getNumRegs()); markSuperRegs(Reserved, ARM::PC); - if (TFI->hasFP(MF)) + if (TFI->isFPReserved(MF)) markSuperRegs(Reserved, STI.getFramePointerReg()); if (hasBasePointer(MF)) markSuperRegs(Reserved, BasePtr); diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 57d7842c63ca..73ed300ccff4 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -43,7 +43,7 @@ namespace ARMRI { /// isARMArea1Register - Returns true if the register is a low register (r0-r7) /// or a stack/pc register that we should push/pop. -static inline bool isARMArea1Register(unsigned Reg, bool isIOS) { +static inline bool isARMArea1Register(unsigned Reg, bool SplitFramePushPop) { using namespace ARM; switch (Reg) { @@ -53,25 +53,52 @@ static inline bool isARMArea1Register(unsigned Reg, bool isIOS) { return true; case R8: case R9: case R10: case R11: case R12: // For iOS we want r7 and lr to be next to each other. - return !isIOS; + return !SplitFramePushPop; default: return false; } } -static inline bool isARMArea2Register(unsigned Reg, bool isIOS) { +static inline bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop) { using namespace ARM; switch (Reg) { case R8: case R9: case R10: case R11: case R12: // iOS has this second area. - return isIOS; + return SplitFramePushPop; default: return false; } } -static inline bool isARMArea3Register(unsigned Reg, bool isIOS) { +static inline bool isSplitFPArea1Register(unsigned Reg, + bool SplitFramePushPop) { + using namespace ARM; + + switch (Reg) { + case R0: case R1: case R2: case R3: + case R4: case R5: case R6: case R7: + case R8: case R9: case R10: case R12: + case SP: case PC: + return true; + default: + return false; + } +} + +static inline bool isSplitFPArea2Register(unsigned Reg, + bool SplitFramePushPop) { + using namespace ARM; + + switch (Reg) { + case R11: case LR: + return true; + default: + return false; + } +} + +static inline bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop) { using namespace ARM; switch (Reg) { @@ -214,6 +241,8 @@ public: unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override; + + int getSEHRegNum(unsigned i) const { return getEncodingValue(i); } }; } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp index ddbd6702e528..b2d291bbe7ff 100644 --- a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp +++ b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp @@ -16,6 +16,7 @@ #include "ARMBasicBlockInfo.h" #include "ARMSubtarget.h" #include "MVETailPredUtils.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -212,7 +213,7 @@ bool ARMBlockPlacement::processPostOrderLoops(MachineLoop *ML) { bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; - const ARMSubtarget &ST = static_cast(MF.getSubtarget()); + const ARMSubtarget &ST = MF.getSubtarget(); if (!ST.hasLOB()) return false; LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Running on " << MF.getName() << "\n"); diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td index a6dbe563a4ab..d14424c2deca 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/llvm/lib/Target/ARM/ARMCallingConv.td @@ -284,19 +284,32 @@ def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>; // The order of callee-saved registers needs to match the order we actually push // them in FrameLowering, because this order is what's used by // PrologEpilogInserter to allocate frame index slots. So when R7 is the frame -// pointer, we use this AAPCS alternative. -def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, +// pointer, we use this ATPCS alternative. +def CSR_ATPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, R11, R10, R9, R8, (sequence "D%u", 15, 8))>; +def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4, + (sequence "D%u", 15, 8), + LR, R11)>; + // R8 is used to pass swifterror, remove it from CSR. -def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, +def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, R8)>; // R10 is used to pass swifterror, remove it from CSR. -def CSR_AAPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, +def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, R10)>; +// When enforcing an AAPCS compliant frame chain, R11 is used as the frame +// pointer even for Thumb targets, where split pushes are necessary. +// This AAPCS alternative makes sure the frame index slots match the push +// order in that case. +def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11, + R7, R6, R5, R4, + R10, R9, R8, + (sequence "D%u", 15, 8))>; + // Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' // and the pointer return value are both passed in R0 in these cases, this can // be partially modelled by treating R0 as a callee-saved register diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index a2a4f1f3bdfd..d77c3afd05e5 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -396,7 +396,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { << MCP->getConstants().size() << " CP entries, aligned to " << MCP->getConstantPoolAlign().value() << " bytes *****\n"); - STI = &static_cast(MF->getSubtarget()); + STI = &MF->getSubtarget(); TII = STI->getInstrInfo(); isPositionIndependentOrROPI = STI->getTargetLowering()->isPositionIndependent() || STI->isROPI(); diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2f083561bbd4..613904f702f0 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -2107,6 +2108,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::TCRETURNdi: case ARM::TCRETURNri: { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + if (MBBI->getOpcode() == ARM::SEH_EpilogEnd) + MBBI--; + if (MBBI->getOpcode() == ARM::SEH_Nop_Ret) + MBBI--; assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); unsigned RetOpcode = MBBI->getOpcode(); @@ -2116,13 +2121,21 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Tail call return: adjust the stack pointer and jump to callee. MBBI = MBB.getLastNonDebugInstr(); + if (MBBI->getOpcode() == ARM::SEH_EpilogEnd) + MBBI--; + if (MBBI->getOpcode() == ARM::SEH_Nop_Ret) + MBBI--; MachineOperand &JumpTarget = MBBI->getOperand(0); // Jump to label or value in register. if (RetOpcode == ARM::TCRETURNdi) { + MachineFunction *MF = MBB.getParent(); + bool NeedsWinCFI = MF->getTarget().getMCAsmInfo()->usesWindowsCFI() && + MF->getFunction().needsUnwindTableEntry(); unsigned TCOpcode = STI->isThumb() - ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) + ? ((STI->isTargetMachO() || NeedsWinCFI) ? ARM::tTAILJMPd + : ARM::tTAILJMPdND) : ARM::TAILJMPd; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); if (JumpTarget.isGlobal()) @@ -3132,7 +3145,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { } bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - STI = &static_cast(MF.getSubtarget()); + STI = &MF.getSubtarget(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); AFI = MF.getInfo(); diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index 5d94b99d4c5d..a167225e2743 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -122,8 +122,7 @@ class ARMFastISel final : public FastISel { explicit ARMFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FastISel(funcInfo, libInfo), - Subtarget( - &static_cast(funcInfo.MF->getSubtarget())), + Subtarget(&funcInfo.MF->getSubtarget()), M(const_cast(*funcInfo.Fn->getParent())), TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()) { @@ -156,7 +155,7 @@ class ARMFastISel final : public FastISel { const LoadInst *LI) override; bool fastLowerArguments() override; - #include "ARMGenFastISel.inc" +#include "ARMGenFastISel.inc" // Instruction selection routines. @@ -189,10 +188,10 @@ class ARMFastISel final : public FastISel { bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt); bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, - unsigned Alignment = 0, bool isZExt = true, + MaybeAlign Alignment = None, bool isZExt = true, bool allocReg = true); bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, - unsigned Alignment = 0); + MaybeAlign Alignment = None); bool ARMComputeAddress(const Value *Obj, Address &Addr); void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3); bool ARMIsMemCpySmall(uint64_t Len); @@ -602,8 +601,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { } if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) || - (Subtarget->isTargetMachO() && IsIndirect) || - Subtarget->genLongCalls()) { + (Subtarget->isTargetMachO() && IsIndirect)) { MachineInstrBuilder MIB; Register NewDestReg = createResultReg(TLI.getRegClassFor(VT)); if (isThumb2) @@ -898,7 +896,8 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr, } bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, - unsigned Alignment, bool isZExt, bool allocReg) { + MaybeAlign Alignment, bool isZExt, + bool allocReg) { unsigned Opc; bool useAM3 = false; bool needVMOV = false; @@ -924,7 +923,8 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::i16: - if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem()) + if (Alignment && *Alignment < Align(2) && + !Subtarget->allowsUnalignedMem()) return false; if (isThumb2) { @@ -939,7 +939,8 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::i32: - if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem()) + if (Alignment && *Alignment < Align(4) && + !Subtarget->allowsUnalignedMem()) return false; if (isThumb2) { @@ -955,7 +956,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, case MVT::f32: if (!Subtarget->hasVFP2Base()) return false; // Unaligned loads need special handling. Floats require word-alignment. - if (Alignment && Alignment < 4) { + if (Alignment && *Alignment < Align(4)) { needVMOV = true; VT = MVT::i32; Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; @@ -970,7 +971,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, if (!Subtarget->hasVFP2Base()) return false; // FIXME: Unaligned loads need special handling. Doublewords require // word-alignment. - if (Alignment && Alignment < 4) + if (Alignment && *Alignment < Align(4)) return false; Opc = ARM::VLDRD; @@ -1030,14 +1031,14 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; Register ResultReg; - if (!ARMEmitLoad(VT, ResultReg, Addr, cast(I)->getAlignment())) + if (!ARMEmitLoad(VT, ResultReg, Addr, cast(I)->getAlign())) return false; updateValueMap(I, ResultReg); return true; } bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, - unsigned Alignment) { + MaybeAlign Alignment) { unsigned StrOpc; bool useAM3 = false; switch (VT.SimpleTy) { @@ -1065,7 +1066,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, } break; case MVT::i16: - if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem()) + if (Alignment && *Alignment < Align(2) && + !Subtarget->allowsUnalignedMem()) return false; if (isThumb2) { @@ -1079,7 +1081,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, } break; case MVT::i32: - if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem()) + if (Alignment && *Alignment < Align(4) && + !Subtarget->allowsUnalignedMem()) return false; if (isThumb2) { @@ -1094,7 +1097,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, case MVT::f32: if (!Subtarget->hasVFP2Base()) return false; // Unaligned stores need special handling. Floats require word-alignment. - if (Alignment && Alignment < 4) { + if (Alignment && *Alignment < Align(4)) { Register MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VMOVRS), MoveReg) @@ -1111,8 +1114,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, if (!Subtarget->hasVFP2Base()) return false; // FIXME: Unaligned stores need special handling. Doublewords require // word-alignment. - if (Alignment && Alignment < 4) - return false; + if (Alignment && *Alignment < Align(4)) + return false; StrOpc = ARM::VSTRD; break; @@ -1166,7 +1169,7 @@ bool ARMFastISel::SelectStore(const Instruction *I) { if (!ARMComputeAddress(I->getOperand(1), Addr)) return false; - if (!ARMEmitStore(VT, SrcReg, Addr, cast(I)->getAlignment())) + if (!ARMEmitStore(VT, SrcReg, Addr, cast(I)->getAlign())) return false; return true; } @@ -2939,7 +2942,7 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; Register ResultReg = MI->getOperand(0).getReg(); - if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false)) + if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlign(), isZExt, false)) return false; MachineBasicBlock::iterator I(MI); removeDeadCode(I, std::next(I)); diff --git a/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp b/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp new file mode 100644 index 000000000000..77c8f7134a55 --- /dev/null +++ b/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp @@ -0,0 +1,432 @@ +//===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This pass works around a Cortex Core Fused AES erratum: +// - Cortex-A57 Erratum 1742098 +// - Cortex-A72 Erratum 1655431 +// +// The erratum may be triggered if an input vector register to AESE or AESD was +// last written by an instruction that only updated 32 bits of it. This can +// occur for either of the input registers. +// +// The workaround chosen is to update the input register using `r = VORRq r, r`, +// as this updates all 128 bits of the register unconditionally, but does not +// change the values observed in `r`, making the input safe. +// +// This pass has to be conservative in a few cases: +// - an input vector register to the AES instruction is defined outside the +// current function, where we have to assume the register was updated in an +// unsafe way; and +// - an input vector register to the AES instruction is updated along multiple +// different control-flow paths, where we have to ensure all the register +// updating instructions are safe. +// +// Both of these cases may apply to a input vector register. In either case, we +// need to ensure that, when the pass is finished, there exists a safe +// instruction between every unsafe register updating instruction and the AES +// instruction. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" +#include "ARMSubtarget.h" +#include "Utils/ARMBaseInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/InitializePasses.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098" + +//===----------------------------------------------------------------------===// + +namespace { +class ARMFixCortexA57AES1742098 : public MachineFunctionPass { +public: + static char ID; + explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) { + initializeARMFixCortexA57AES1742098Pass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "ARM fix for Cortex-A57 AES Erratum 1742098"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + // This is the information needed to insert the fixup in the right place. + struct AESFixupLocation { + MachineBasicBlock *Block; + // The fixup instruction will be inserted *before* InsertionPt. + MachineInstr *InsertionPt; + MachineOperand *MOp; + }; + + void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA, + const ARMBaseRegisterInfo *TRI, + SmallVectorImpl &FixupLocsForFn) const; + + void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII, + const ARMBaseRegisterInfo *TRI) const; + + static bool isFirstAESPairInstr(MachineInstr &MI); + static bool isSafeAESInput(MachineInstr &MI); +}; +char ARMFixCortexA57AES1742098::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE, + "ARM fix for Cortex-A57 AES Erratum 1742098", false, + false) +INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis); +INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE, + "ARM fix for Cortex-A57 AES Erratum 1742098", false, false) + +//===----------------------------------------------------------------------===// + +bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + return Opc == ARM::AESD || Opc == ARM::AESE; +} + +bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) { + auto CondCodeIsAL = [](MachineInstr &MI) -> bool { + int CCIdx = MI.findFirstPredOperandIdx(); + if (CCIdx == -1) + return false; + return MI.getOperand(CCIdx).getImm() == (int64_t)ARMCC::AL; + }; + + switch (MI.getOpcode()) { + // Unknown: Assume not safe. + default: + return false; + // 128-bit wide AES instructions + case ARM::AESD: + case ARM::AESE: + case ARM::AESMC: + case ARM::AESIMC: + // No CondCode. + return true; + // 128-bit and 64-bit wide bitwise ops (when condition = al) + case ARM::VANDd: + case ARM::VANDq: + case ARM::VORRd: + case ARM::VORRq: + case ARM::VEORd: + case ARM::VEORq: + case ARM::VMVNd: + case ARM::VMVNq: + // VMOV of 64-bit value between D registers (when condition = al) + case ARM::VMOVD: + // VMOV of 64 bit value from GPRs (when condition = al) + case ARM::VMOVDRR: + // VMOV of immediate into D or Q registers (when condition = al) + case ARM::VMOVv2i64: + case ARM::VMOVv1i64: + case ARM::VMOVv2f32: + case ARM::VMOVv4f32: + case ARM::VMOVv2i32: + case ARM::VMOVv4i32: + case ARM::VMOVv4i16: + case ARM::VMOVv8i16: + case ARM::VMOVv8i8: + case ARM::VMOVv16i8: + // Loads (when condition = al) + // VLD Dn, [Rn, #imm] + case ARM::VLDRD: + // VLDM + case ARM::VLDMDDB_UPD: + case ARM::VLDMDIA_UPD: + case ARM::VLDMDIA: + // VLDn to all lanes. + case ARM::VLD1d64: + case ARM::VLD1q64: + case ARM::VLD1d32: + case ARM::VLD1q32: + case ARM::VLD2b32: + case ARM::VLD2d32: + case ARM::VLD2q32: + case ARM::VLD1d16: + case ARM::VLD1q16: + case ARM::VLD2d16: + case ARM::VLD2q16: + case ARM::VLD1d8: + case ARM::VLD1q8: + case ARM::VLD2b8: + case ARM::VLD2d8: + case ARM::VLD2q8: + case ARM::VLD3d32: + case ARM::VLD3q32: + case ARM::VLD3d16: + case ARM::VLD3q16: + case ARM::VLD3d8: + case ARM::VLD3q8: + case ARM::VLD4d32: + case ARM::VLD4q32: + case ARM::VLD4d16: + case ARM::VLD4q16: + case ARM::VLD4d8: + case ARM::VLD4q8: + // VLD1 (single element to one lane) + case ARM::VLD1LNd32: + case ARM::VLD1LNd32_UPD: + case ARM::VLD1LNd8: + case ARM::VLD1LNd8_UPD: + case ARM::VLD1LNd16: + case ARM::VLD1LNd16_UPD: + // VLD1 (single element to all lanes) + case ARM::VLD1DUPd32: + case ARM::VLD1DUPd32wb_fixed: + case ARM::VLD1DUPd32wb_register: + case ARM::VLD1DUPd16: + case ARM::VLD1DUPd16wb_fixed: + case ARM::VLD1DUPd16wb_register: + case ARM::VLD1DUPd8: + case ARM::VLD1DUPd8wb_fixed: + case ARM::VLD1DUPd8wb_register: + case ARM::VLD1DUPq32: + case ARM::VLD1DUPq32wb_fixed: + case ARM::VLD1DUPq32wb_register: + case ARM::VLD1DUPq16: + case ARM::VLD1DUPq16wb_fixed: + case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq8: + case ARM::VLD1DUPq8wb_fixed: + case ARM::VLD1DUPq8wb_register: + // VMOV + case ARM::VSETLNi32: + case ARM::VSETLNi16: + case ARM::VSETLNi8: + return CondCodeIsAL(MI); + }; + + return false; +} + +bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) { + LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n"); + auto &STI = F.getSubtarget(); + + // Fix not requested or AES instructions not present: skip pass. + if (!STI.hasAES() || !STI.fixCortexA57AES1742098()) + return false; + + const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo(); + const ARMBaseInstrInfo *TII = STI.getInstrInfo(); + + auto &RDA = getAnalysis(); + + // Analyze whole function to find instructions which need fixing up... + SmallVector FixupLocsForFn{}; + analyzeMF(F, RDA, TRI, FixupLocsForFn); + + // ... and fix the instructions up all at the same time. + bool Changed = false; + LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n"); + for (AESFixupLocation &FixupLoc : FixupLocsForFn) { + insertAESFixup(FixupLoc, TII, TRI); + Changed |= true; + } + + return Changed; +} + +void ARMFixCortexA57AES1742098::analyzeMF( + MachineFunction &MF, ReachingDefAnalysis &RDA, + const ARMBaseRegisterInfo *TRI, + SmallVectorImpl &FixupLocsForFn) const { + unsigned MaxAllowedFixups = 0; + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!isFirstAESPairInstr(MI)) + continue; + + // Found an instruction to check the operands of. + LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI); + assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 && + "Unknown AES Instruction Format. Expected 1 def, 2 uses."); + + // A maximum of two fixups should be inserted for each AES pair (one per + // register use). + MaxAllowedFixups += 2; + + // Inspect all operands, choosing whether to insert a fixup. + for (MachineOperand &MOp : MI.uses()) { + SmallPtrSet AllDefs{}; + RDA.getGlobalReachingDefs(&MI, MOp.getReg(), AllDefs); + + // Planned Fixup: This should be added to FixupLocsForFn at most once. + AESFixupLocation NewLoc{&MBB, &MI, &MOp}; + + // In small functions with loops, this operand may be both a live-in and + // have definitions within the function itself. These will need a fixup. + bool IsLiveIn = MF.front().isLiveIn(MOp.getReg()); + + // If the register doesn't have defining instructions, and is not a + // live-in, then something is wrong and the fixup must always be + // inserted to be safe. + if (!IsLiveIn && AllDefs.size() == 0) { + LLVM_DEBUG(dbgs() + << "Fixup Planned: No Defining Instrs found, not live-in: " + << printReg(MOp.getReg(), TRI) << "\n"); + FixupLocsForFn.emplace_back(NewLoc); + continue; + } + + auto IsUnsafe = [](MachineInstr *MI) -> bool { + return !isSafeAESInput(*MI); + }; + size_t UnsafeCount = llvm::count_if(AllDefs, IsUnsafe); + + // If there are no unsafe definitions... + if (UnsafeCount == 0) { + // ... and the register is not live-in ... + if (!IsLiveIn) { + // ... then skip the fixup. + LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: " + << printReg(MOp.getReg(), TRI) << "\n"); + continue; + } + + // Otherwise, the only unsafe "definition" is a live-in, so insert the + // fixup at the start of the function. + LLVM_DEBUG(dbgs() + << "Fixup Planned: Live-In (with safe defining instrs): " + << printReg(MOp.getReg(), TRI) << "\n"); + NewLoc.Block = &MF.front(); + NewLoc.InsertionPt = &*NewLoc.Block->begin(); + LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before " + << *NewLoc.InsertionPt); + FixupLocsForFn.emplace_back(NewLoc); + continue; + } + + // If a fixup is needed in more than one place, then the best place to + // insert it is adjacent to the use rather than introducing a fixup + // adjacent to each def. + // + // FIXME: It might be better to hoist this to the start of the BB, if + // possible. + if (IsLiveIn || UnsafeCount > 1) { + LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs " + "(including live-ins): " + << printReg(MOp.getReg(), TRI) << "\n"); + FixupLocsForFn.emplace_back(NewLoc); + continue; + } + + assert(UnsafeCount == 1 && !IsLiveIn && + "At this point, there should be one unsafe defining instrs " + "and the defined register should not be a live-in."); + SmallPtrSetIterator It = + llvm::find_if(AllDefs, IsUnsafe); + assert(It != AllDefs.end() && + "UnsafeCount == 1 but No Unsafe MachineInstr found."); + MachineInstr *DefMI = *It; + + LLVM_DEBUG( + dbgs() << "Fixup Planned: Found single unsafe defining instrs for " + << printReg(MOp.getReg(), TRI) << ": " << *DefMI); + + // There is one unsafe defining instruction, which needs a fixup. It is + // generally good to hoist the fixup to be adjacent to the defining + // instruction rather than the using instruction, as the using + // instruction may be inside a loop when the defining instruction is + // not. + MachineBasicBlock::iterator DefIt = DefMI; + ++DefIt; + if (DefIt != DefMI->getParent()->end()) { + LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI + << "And immediately before " << *DefIt); + NewLoc.Block = DefIt->getParent(); + NewLoc.InsertionPt = &*DefIt; + } + + FixupLocsForFn.emplace_back(NewLoc); + } + } + } + + assert(FixupLocsForFn.size() <= MaxAllowedFixups && + "Inserted too many fixups for this function."); + (void)MaxAllowedFixups; +} + +void ARMFixCortexA57AES1742098::insertAESFixup( + AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII, + const ARMBaseRegisterInfo *TRI) const { + MachineOperand *OperandToFixup = FixupLoc.MOp; + + assert(OperandToFixup->isReg() && "OperandToFixup must be a register"); + Register RegToFixup = OperandToFixup->getReg(); + + LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI) + << " before: " << *FixupLoc.InsertionPt); + + // Insert the new `VORRq qN, qN, qN`. There are a few details here: + // + // The uses are marked as killed, even if the original use of OperandToFixup + // is not killed, as the new instruction is clobbering the register. This is + // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op + // (it is inserted for microarchitectural reasons). + // + // The def and the uses are still marked as Renamable if the original register + // was, to avoid having to rummage through all the other uses and defs and + // unset their renamable bits. + unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0; + BuildMI(*FixupLoc.Block, FixupLoc.InsertionPt, DebugLoc(), + TII->get(ARM::VORRq)) + .addReg(RegToFixup, RegState::Define | Renamable) + .addReg(RegToFixup, RegState::Kill | Renamable) + .addReg(RegToFixup, RegState::Kill | Renamable) + .addImm((uint64_t)ARMCC::AL) + .addReg(ARM::NoRegister); +} + +// Factory function used by AArch64TargetMachine to add the pass to +// the passmanager. +FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() { + return new ARMFixCortexA57AES1742098(); +} diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 1f2f6f7497e0..48b4d266b41a 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -47,7 +47,8 @@ // | | // |-----------------------------------| // | | -// | prev_fp, prev_lr | +// | prev_lr | +// | prev_fp | // | (a.k.a. "frame record") | // | | // |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11) @@ -138,6 +139,7 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInstrDesc.h" @@ -210,6 +212,12 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { MFI.isFrameAddressTaken()); } +/// isFPReserved - Return true if the frame pointer register should be +/// considered a reserved register on the scope of the specified function. +bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const { + return hasFP(MF) || MF.getSubtarget().createAAPCSFrameChain(); +} + /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is /// not required, we reserve argument space for call sites in the function /// immediately on entry to the current function. This eliminates the need for @@ -272,6 +280,230 @@ static int getArgumentStackToRestore(MachineFunction &MF, return ArgumentPopSize; } +static bool needsWinCFI(const MachineFunction &MF) { + const Function &F = MF.getFunction(); + return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + F.needsUnwindTableEntry(); +} + +// Given a load or a store instruction, generate an appropriate unwinding SEH +// code on Windows. +static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, + const TargetInstrInfo &TII, + unsigned Flags) { + unsigned Opc = MBBI->getOpcode(); + MachineBasicBlock *MBB = MBBI->getParent(); + MachineFunction &MF = *MBB->getParent(); + DebugLoc DL = MBBI->getDebugLoc(); + MachineInstrBuilder MIB; + const ARMSubtarget &Subtarget = MF.getSubtarget(); + const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + + Flags |= MachineInstr::NoMerge; + + switch (Opc) { + default: + report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc)); + break; + case ARM::t2ADDri: // add.w r11, sp, #xx + case ARM::t2ADDri12: // add.w r11, sp, #xx + case ARM::t2MOVTi16: // movt r4, #xx + case ARM::tBL: // bl __chkstk + // These are harmless if used for just setting up a frame pointer, + // but that frame pointer can't be relied upon for unwinding, unless + // set up with SEH_SaveSP. + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)) + .addImm(/*Wide=*/1) + .setMIFlags(Flags); + break; + + case ARM::t2MOVi16: { // mov(w) r4, #xx + bool Wide = MBBI->getOperand(1).getImm() >= 256; + if (!Wide) { + MachineInstrBuilder NewInstr = + BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags()); + NewInstr.add(MBBI->getOperand(0)); + NewInstr.add(t1CondCodeOp(/*isDead=*/true)); + for (unsigned i = 1, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) + NewInstr.add(MBBI->getOperand(i)); + MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr); + MBB->erase(MBBI); + MBBI = NewMBBI; + } + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags); + break; + } + + case ARM::tBLXr: // blx r12 (__chkstk) + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)) + .addImm(/*Wide=*/0) + .setMIFlags(Flags); + break; + + case ARM::t2MOVi32imm: // movw+movt + // This pseudo instruction expands into two mov instructions. If the + // second operand is a symbol reference, this will stay as two wide + // instructions, movw+movt. If they're immediates, the first one can + // end up as a narrow mov though. + // As two SEH instructions are appended here, they won't get interleaved + // between the two final movw/movt instructions, but it doesn't make any + // practical difference. + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)) + .addImm(/*Wide=*/1) + .setMIFlags(Flags); + MBB->insertAfter(MBBI, MIB); + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)) + .addImm(/*Wide=*/1) + .setMIFlags(Flags); + break; + + case ARM::t2LDMIA_RET: + case ARM::t2LDMIA_UPD: + case ARM::t2STMDB_UPD: { + unsigned Mask = 0; + bool Wide = false; + for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) { + const MachineOperand &MO = MBBI->getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = RegInfo->getSEHRegNum(MO.getReg()); + if (Reg == 15) + Reg = 14; + if (Reg >= 8 && Reg <= 13) + Wide = true; + else if (Opc == ARM::t2LDMIA_UPD && Reg == 14) + Wide = true; + Mask |= 1 << Reg; + } + if (!Wide) { + unsigned NewOpc; + switch (Opc) { + case ARM::t2LDMIA_RET: + NewOpc = ARM::tPOP_RET; + break; + case ARM::t2LDMIA_UPD: + NewOpc = ARM::tPOP; + break; + case ARM::t2STMDB_UPD: + NewOpc = ARM::tPUSH; + break; + default: + llvm_unreachable(""); + } + MachineInstrBuilder NewInstr = + BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags()); + for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) + NewInstr.add(MBBI->getOperand(i)); + MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr); + MBB->erase(MBBI); + MBBI = NewMBBI; + } + unsigned SEHOpc = + (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs; + MIB = BuildMI(MF, DL, TII.get(SEHOpc)) + .addImm(Mask) + .addImm(Wide ? 1 : 0) + .setMIFlags(Flags); + break; + } + case ARM::VSTMDDB_UPD: + case ARM::VLDMDIA_UPD: { + int First = -1, Last = 0; + for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) { + const MachineOperand &MO = MBBI->getOperand(i); + unsigned Reg = RegInfo->getSEHRegNum(MO.getReg()); + if (First == -1) + First = Reg; + Last = Reg; + } + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs)) + .addImm(First) + .addImm(Last) + .setMIFlags(Flags); + break; + } + case ARM::tSUBspi: + case ARM::tADDspi: + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc)) + .addImm(MBBI->getOperand(2).getImm() * 4) + .addImm(/*Wide=*/0) + .setMIFlags(Flags); + break; + case ARM::t2SUBspImm: + case ARM::t2SUBspImm12: + case ARM::t2ADDspImm: + case ARM::t2ADDspImm12: + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc)) + .addImm(MBBI->getOperand(2).getImm()) + .addImm(/*Wide=*/1) + .setMIFlags(Flags); + break; + + case ARM::tMOVr: + if (MBBI->getOperand(1).getReg() == ARM::SP && + (Flags & MachineInstr::FrameSetup)) { + unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP)) + .addImm(Reg) + .setMIFlags(Flags); + } else if (MBBI->getOperand(0).getReg() == ARM::SP && + (Flags & MachineInstr::FrameDestroy)) { + unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP)) + .addImm(Reg) + .setMIFlags(Flags); + } else { + report_fatal_error("No SEH Opcode for MOV"); + } + break; + + case ARM::tBX_RET: + case ARM::TCRETURNri: + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret)) + .addImm(/*Wide=*/0) + .setMIFlags(Flags); + break; + + case ARM::TCRETURNdi: + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret)) + .addImm(/*Wide=*/1) + .setMIFlags(Flags); + break; + } + return MBB->insertAfter(MBBI, MIB); +} + +static MachineBasicBlock::iterator +initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) { + if (MBBI == MBB.begin()) + return MachineBasicBlock::iterator(); + return std::prev(MBBI); +} + +static void insertSEHRange(MachineBasicBlock &MBB, + MachineBasicBlock::iterator Start, + const MachineBasicBlock::iterator &End, + const ARMBaseInstrInfo &TII, unsigned MIFlags) { + if (Start.isValid()) + Start = std::next(Start); + else + Start = MBB.begin(); + + for (auto MI = Start; MI != End;) { + auto Next = std::next(MI); + // Check if this instruction already has got a SEH opcode added. In that + // case, don't do this generic mapping. + if (Next != End && isSEHInstruction(*Next)) { + MI = std::next(Next); + while (MI != End && isSEHInstruction(*MI)) + ++MI; + continue; + } + insertSEH(MI, TII, MIFlags); + MI = Next; + } +} + static void emitRegPlusImmediate( bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, @@ -392,8 +624,7 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction) { - const ARMSubtarget &AST = - static_cast(MF.getSubtarget()); + const ARMSubtarget &AST = MF.getSubtarget(); const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops(); const unsigned AlignMask = Alignment.value() - 1U; const unsigned NrBitsToZero = Log2(Alignment); @@ -452,15 +683,23 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, /// Unfortunately we cannot determine this value in determineCalleeSaves() yet /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use /// this to produce a conservative estimate that we check in an assert() later. -static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI) { +static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, + const MachineFunction &MF) { // For Thumb1, push.w isn't available, so the first push will always push // r7 and lr onto the stack first. if (AFI.isThumb1OnlyFunction()) return -AFI.getArgRegsSaveSize() - (2 * 4); // This is a conservative estimation: Assume the frame pointer being r7 and // pc("r15") up to r8 getting spilled before (= 8 registers). - int FPCXTSaveSize = (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0; - return - FPCXTSaveSize - AFI.getArgRegsSaveSize() - (8 * 4); + int MaxRegBytes = 8 * 4; + if (STI.splitFramePointerPush(MF)) { + // Here, r11 can be stored below all of r4-r15 (3 registers more than + // above), plus d8-d15. + MaxRegBytes = 11 * 4 + 8 * 8; + } + int FPCXTSaveSize = + (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0; + return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes; } void ARMFrameLowering::emitPrologue(MachineFunction &MF, @@ -482,6 +721,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, unsigned NumBytes = MFI.getStackSize(); const std::vector &CSI = MFI.getCalleeSavedInfo(); int FPCXTSaveSize = 0; + bool NeedsWinCFI = needsWinCFI(MF); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. @@ -510,47 +750,92 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, MachineInstr::FrameSetup); DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true); } - DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP); + if (!NeedsWinCFI) + DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP); + if (NeedsWinCFI && MBBI != MBB.begin()) { + insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd)) + .setMIFlag(MachineInstr::FrameSetup); + MF.setHasWinCFI(true); + } return; } // Determine spill area sizes. - for (const CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: - if (STI.splitFramePushPop(MF)) { + if (STI.splitFramePointerPush(MF)) { + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R11: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; GPRCS2Size += 4; break; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R12: + GPRCS1Size += 4; + break; + case ARM::FPCXTNS: + FPCXTSaveSize = 4; + break; + default: + // This is a DPR. Exclude the aligned DPRCS2 spills. + if (Reg == ARM::D8) + D8SpillFI = FI; + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) + DPRCSSize += 8; + } + } + } else { + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.splitFramePushPop(MF)) { + GPRCS2Size += 4; + break; + } + LLVM_FALLTHROUGH; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + GPRCS1Size += 4; + break; + case ARM::FPCXTNS: + FPCXTSaveSize = 4; + break; + default: + // This is a DPR. Exclude the aligned DPRCS2 spills. + if (Reg == ARM::D8) + D8SpillFI = FI; + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) + DPRCSSize += 8; } - LLVM_FALLTHROUGH; - case ARM::R0: - case ARM::R1: - case ARM::R2: - case ARM::R3: - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - GPRCS1Size += 4; - break; - case ARM::FPCXTNS: - FPCXTSaveSize = 4; - break; - default: - // This is a DPR. Exclude the aligned DPRCS2 spills. - if (Reg == ARM::D8) - D8SpillFI = FI; - if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) - DPRCSSize += 8; } } @@ -585,15 +870,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size; unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4); - unsigned DPRGapSize = - (GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) % - DPRAlign.value(); + unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize; + if (!STI.splitFramePointerPush(MF)) { + DPRGapSize += GPRCS2Size; + } + DPRGapSize %= DPRAlign.value(); - unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; + unsigned DPRCSOffset; + if (STI.splitFramePointerPush(MF)) { + DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize; + GPRCS2Offset = DPRCSOffset - GPRCS2Size; + } else { + DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; + } int FramePtrOffsetInPush = 0; if (HasFP) { int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); - assert(getMaxFPOffset(STI, *AFI) <= FPOffset && + assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset && "Max FP estimation is wrong"); FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize; AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + @@ -604,7 +897,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 2. - if (GPRCS2Size > 0) { + if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) { GPRCS2Push = LastPush = MBBI++; DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); } @@ -644,18 +937,37 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } else NumBytes = DPRCSOffset; + if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) { + GPRCS2Push = LastPush = MBBI++; + DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); + } + + bool NeedsWinCFIStackAlloc = NeedsWinCFI; + if (STI.splitFramePointerPush(MF) && HasFP) + NeedsWinCFIStackAlloc = false; + if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) { uint32_t NumWords = NumBytes >> 2; - if (NumWords < 65536) + if (NumWords < 65536) { BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) .addImm(NumWords) .setMIFlags(MachineInstr::FrameSetup) .add(predOps(ARMCC::AL)); - else - BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4) - .addImm(NumWords) - .setMIFlags(MachineInstr::FrameSetup); + } else { + // Split into two instructions here, instead of using t2MOVi32imm, + // to allow inserting accurate SEH instructions (including accurate + // instruction size for each of them). + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) + .addImm(NumWords & 0xffff) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4) + .addReg(ARM::R4) + .addImm(NumWords >> 16) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); + } switch (TM.getCodeModel()) { case CodeModel::Tiny: @@ -682,12 +994,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, break; } - BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP) - .addReg(ARM::SP, RegState::Kill) - .addReg(ARM::R4, RegState::Kill) - .setMIFlags(MachineInstr::FrameSetup) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); + MachineInstrBuilder Instr, SEH; + Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP) + .addReg(ARM::SP, RegState::Kill) + .addReg(ARM::R4, RegState::Kill) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + if (NeedsWinCFIStackAlloc) { + SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc)) + .addImm(NumBytes) + .addImm(/*Wide=*/1) + .setMIFlags(MachineInstr::FrameSetup); + MBB.insertAfter(Instr, SEH); + } NumBytes = 0; } @@ -720,34 +1040,58 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // into spill area 1, including the FP in R11. In either case, it // is in area one and the adjustment needs to take place just after // that push. + // FIXME: The above is not necessary true when PACBTI is enabled. + // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes, + // so FP ends up on area two. + MachineBasicBlock::iterator AfterPush; if (HasFP) { - MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); + AfterPush = std::next(GPRCS1Push); unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push); - emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, - dl, TII, FramePtr, ARM::SP, - PushSize + FramePtrOffsetInPush, - MachineInstr::FrameSetup); - if (FramePtrOffsetInPush + PushSize != 0) { - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( - nullptr, MRI->getDwarfRegNum(FramePtr, true), - FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush)); - BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); + int FPOffset = PushSize + FramePtrOffsetInPush; + if (STI.splitFramePointerPush(MF)) { + AfterPush = std::next(GPRCS2Push); + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, + FramePtr, ARM::SP, 0, MachineInstr::FrameSetup); } else { - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( - nullptr, MRI->getDwarfRegNum(FramePtr, true))); - BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, + FramePtr, ARM::SP, FPOffset, + MachineInstr::FrameSetup); } + if (!NeedsWinCFI) { + if (FramePtrOffsetInPush + PushSize != 0) { + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( + nullptr, MRI->getDwarfRegNum(FramePtr, true), + FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush)); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } else { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( + nullptr, MRI->getDwarfRegNum(FramePtr, true))); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + } + } + + // Emit a SEH opcode indicating the prologue end. The rest of the prologue + // instructions below don't need to be replayed to unwind the stack. + if (NeedsWinCFI && MBBI != MBB.begin()) { + MachineBasicBlock::iterator End = MBBI; + if (HasFP && STI.splitFramePointerPush(MF)) + End = AfterPush; + insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup); + BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd)) + .setMIFlag(MachineInstr::FrameSetup); + MF.setHasWinCFI(true); } // Now that the prologue's actual instructions are finalised, we can insert // the necessary DWARF cf instructions to describe the situation. Start by // recording where each register ended up: - if (GPRCS1Size > 0) { + if (GPRCS1Size > 0 && !NeedsWinCFI) { MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); int CFIIndex; for (const auto &Entry : CSI) { @@ -781,7 +1125,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } } - if (GPRCS2Size > 0) { + if (GPRCS2Size > 0 && !NeedsWinCFI) { MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); for (const auto &Entry : CSI) { Register Reg = Entry.getReg(); @@ -807,7 +1151,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } } - if (DPRCSSize > 0) { + if (DPRCSSize > 0 && !NeedsWinCFI) { // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. MachineBasicBlock::iterator Pos = std::next(LastPush); @@ -831,7 +1175,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // throughout the process. If we have a frame pointer, it takes over the job // half-way through, so only the first few .cfi_def_cfa_offset instructions // actually get emitted. - DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP); + if (!NeedsWinCFI) + DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP); if (STI.isTargetELF() && hasFP(MF)) MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - @@ -928,7 +1273,14 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + MachineBasicBlock::iterator RangeStart; if (!AFI->hasStackFrame()) { + if (MF.hasWinCFI()) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart)) + .setMIFlag(MachineInstr::FrameDestroy); + RangeStart = initMBBRange(MBB, MBBI); + } + if (NumBytes + IncomingArgStackToRestore != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes + IncomingArgStackToRestore, @@ -944,6 +1296,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, ++MBBI; } + if (MF.hasWinCFI()) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart)) + .setMIFlag(MachineInstr::FrameDestroy); + RangeStart = initMBBRange(MBB, MBBI); + } + // Move SP to start of FP callee save spill area. NumBytes -= (ReservedArgStack + AFI->getFPCXTSaveAreaSize() + @@ -998,6 +1356,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineInstr::FrameDestroy); // Increment past our save areas. + if (AFI->getGPRCalleeSavedArea2Size() && STI.splitFramePointerPush(MF)) + MBBI++; + if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) { MBBI++; // Since vpop register list cannot have gaps, there may be multiple vpop @@ -1012,7 +1373,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineInstr::FrameDestroy); } - if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; + if (AFI->getGPRCalleeSavedArea2Size() && !STI.splitFramePointerPush(MF)) + MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; if (ReservedArgStack || IncomingArgStackToRestore) { @@ -1030,6 +1392,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction()) BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT)); } + + if (MF.hasWinCFI()) { + insertSEHRange(MBB, RangeStart, MBB.end(), TII, MachineInstr::FrameDestroy); + BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd)) + .setMIFlag(MachineInstr::FrameDestroy); + } } /// getFrameIndexReference - Provide a base+offset reference to an FI slot for @@ -1245,7 +1613,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 && - STI.hasV5TOps() && MBB.succ_empty() && !hasPAC) { + STI.hasV5TOps() && MBB.succ_empty() && !hasPAC && + !STI.splitFramePointerPush(MF)) { Reg = ARM::PC; // Fold the return instruction into the LDM. DeleteRet = true; @@ -1609,12 +1978,21 @@ bool ARMFrameLowering::spillCalleeSavedRegisters( .addImm(-4) .add(predOps(ARMCC::AL)); } - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, - MachineInstr::FrameSetup); - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, - MachineInstr::FrameSetup); - emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, - NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + if (STI.splitFramePointerPush(MF)) { + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, + &isSplitFPArea1Register, 0, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, + NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, + &isSplitFPArea2Register, 0, MachineInstr::FrameSetup); + } else { + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, + 0, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, + 0, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, + NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + } // The code above does not insert spill code for the aligned DPRCS2 registers. // The stack realignment code will be inserted between the push instructions @@ -1642,14 +2020,24 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters( emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; - unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; + unsigned LdrOpc = + AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; unsigned FltOpc = ARM::VLDMDIA_UPD; - emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, - NumAlignedDPRCS2Regs); - emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea2Register, 0); - emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea1Register, 0); + if (STI.splitFramePointerPush(MF)) { + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isSplitFPArea2Register, 0); + emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, + NumAlignedDPRCS2Regs); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isSplitFPArea1Register, 0); + } else { + emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, + NumAlignedDPRCS2Regs); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isARMArea2Register, 0); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isARMArea1Register, 0); + } return true; } @@ -1768,7 +2156,7 @@ checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) { return; // We are planning to use NEON instructions vst1 / vld1. - if (!static_cast(MF.getSubtarget()).hasNEON()) + if (!MF.getSubtarget().hasNEON()) return; // Don't bother if the default stack alignment is sufficiently high. @@ -1818,6 +2206,34 @@ bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { return true; } +static bool requiresAAPCSFrameRecord(const MachineFunction &MF) { + const auto &Subtarget = MF.getSubtarget(); + return Subtarget.createAAPCSFrameChainLeaf() || + (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls()); +} + +// Thumb1 may require a spill when storing to a frame index through FP, for +// cases where FP is a high register (R11). This scans the function for cases +// where this may happen. +static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, + const TargetFrameLowering &TFI) { + const ARMFunctionInfo *AFI = MF.getInfo(); + if (!AFI->isThumb1OnlyFunction()) + return false; + + for (const auto &MBB : MF) + for (const auto &MI : MBB) + if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi) + for (const auto &Op : MI.operands()) + if (Op.isFI()) { + Register Reg; + TFI.getFrameIndexReference(MF, Op.getIndex(), Reg); + if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP) + return true; + } + return false; +} + void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { @@ -1826,7 +2242,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // to take advantage the eliminateFrameIndex machinery. This also ensures it // is spilled in the order specified by getCalleeSavedRegs() to make it easier // to combine multiple loads / stores. - bool CanEliminateFrame = true; + bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)); bool CS1Spilled = false; bool LRSpilled = false; unsigned NumGPRSpills = 0; @@ -2021,6 +2437,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // Functions with VLAs or extremely large call frames are rare, and // if a function is allocating more than 1KB of stack, an extra 4-byte // slot probably isn't relevant. + // + // A special case is the scenario where r11 is used as FP, where accesses + // to a frame index will require its value to be moved into a low reg. + // This is handled later on, once we are able to determine if we have any + // fp-relative accesses. if (RegInfo->hasBasePointer(MF)) EstimatedRSStackSizeLimit = (1U << 5) * 4; else @@ -2049,7 +2470,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // // We could do slightly better on Thumb1; in some cases, an sp-relative // offset would be legal even though an fp-relative offset is not. - int MaxFPOffset = getMaxFPOffset(STI, *AFI); + int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF); bool HasLargeArgumentList = HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit; @@ -2067,7 +2488,9 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(FramePtr); // If the frame pointer is required by the ABI, also spill LR so that we // emit a complete frame record. - if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) { + if ((requiresAAPCSFrameRecord(MF) || + MF.getTarget().Options.DisableFramePointerElim(MF)) && + !LRSpilled) { SavedRegs.set(ARM::LR); LRSpilled = true; NumGPRSpills++; @@ -2149,7 +2572,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } // r7 can be used if it is not being used as the frame pointer. - if (!HasFP) { + if (!HasFP || FramePtr != ARM::R7) { if (SavedRegs.test(ARM::R7)) { --RegDeficit; LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = " @@ -2270,8 +2693,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // to materialize a stack offset. If so, either spill one additional // callee-saved register or reserve a special spill slot to facilitate // register scavenging. Thumb1 needs a spill slot for stack pointer - // adjustments also, even when the frame itself is small. - if (BigFrameOffsets && !ExtraCSSpill) { + // adjustments and for frame index accesses when FP is high register, + // even when the frame itself is small. + if (!ExtraCSSpill && + (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this))) { // If any non-reserved CS register isn't spilled, just spill one or two // extra. That should take care of it! unsigned NumExtras = TargetAlign.value() / 4; @@ -2488,6 +2913,7 @@ void ARMFrameLowering::adjustForSegmentedStacks( unsigned CFIIndex; const ARMSubtarget *ST = &MF.getSubtarget(); bool Thumb = ST->isThumb(); + bool Thumb2 = ST->isThumb2(); // Sadly, this currently doesn't support varargs, platforms other than // android/linux. Note that thumb1/thumb2 are support for android/linux. @@ -2505,19 +2931,10 @@ void ARMFrameLowering::adjustForSegmentedStacks( ARMFunctionInfo *ARMFI = MF.getInfo(); DebugLoc DL; - uint64_t StackSize = MFI.getStackSize(); - - // Do not generate a prologue for leaf functions with a stack of size zero. - // For non-leaf functions we have to allow for the possibility that the - // callis to a non-split function, as in PR37807. This function could also - // take the address of a non-split function. When the linker tries to adjust - // its non-existent prologue, it would fail with an error. Mark the object - // file so that such failures are not errors. See this Go language bug-report - // https://go-review.googlesource.com/c/go/+/148819/ - if (StackSize == 0 && !MFI.hasTailCall()) { - MF.getMMI().setHasNosplitStack(true); + if (!MFI.needsSplitStackProlog()) return; - } + + uint64_t StackSize = MFI.getStackSize(); // Use R4 and R5 as scratch registers. // We save R4 and R5 before use and restore them before leaving the function. @@ -2570,8 +2987,9 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Make sure the LiveIns are still sorted and unique. MBB->sortUniqueLiveIns(); // Replace the edges to PrologueMBB by edges to the sequences - // we are about to add. - MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]); + // we are about to add, but only update for immediate predecessors. + if (MBB->isSuccessor(&PrologueMBB)) + MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]); } // The required stack size that is aligned to ARM constant criterion. @@ -2604,17 +3022,19 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Emit the relevant DWARF information about the change in stack pointer as // well as where to find both r4 and r5 (the callee-save registers) - CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8)); - BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4)); - BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8)); - BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) { + CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8)); + BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4)); + BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8)); + BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } // mov SR1, sp if (Thumb) { @@ -2630,17 +3050,46 @@ void ARMFrameLowering::adjustForSegmentedStacks( // sub SR1, sp, #StackSize if (!CompareStackPointer && Thumb) { - BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1) - .add(condCodeOp()) - .addReg(ScratchReg1) - .addImm(AlignedStackSize) - .add(predOps(ARMCC::AL)); + if (AlignedStackSize < 256) { + BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1) + .add(condCodeOp()) + .addReg(ScratchReg1) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)); + } else { + if (Thumb2) { + BuildMI(McrMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0) + .addImm(AlignedStackSize); + } else { + auto MBBI = McrMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0, + AlignedStackSize); + } + BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1) + .add(condCodeOp()) + .addReg(ScratchReg1) + .addReg(ScratchReg0) + .add(predOps(ARMCC::AL)); + } } else if (!CompareStackPointer) { - BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) - .addReg(ARM::SP) - .addImm(AlignedStackSize) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); + if (AlignedStackSize < 256) { + BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) + .addReg(ARM::SP) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } else { + auto MBBI = McrMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0, + AlignedStackSize); + BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1) + .addReg(ARM::SP) + .addReg(ScratchReg0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } } if (Thumb && ST->isThumb1Only()) { @@ -2707,28 +3156,69 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Pass first argument for the __morestack by Scratch Register #0. // The amount size of stack required if (Thumb) { - BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0) - .add(condCodeOp()) - .addImm(AlignedStackSize) - .add(predOps(ARMCC::AL)); + if (AlignedStackSize < 256) { + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0) + .add(condCodeOp()) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)); + } else { + if (Thumb2) { + BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0) + .addImm(AlignedStackSize); + } else { + auto MBBI = AllocMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0, + AlignedStackSize); + } + } } else { - BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) - .addImm(AlignedStackSize) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); + if (AlignedStackSize < 256) { + BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } else { + auto MBBI = AllocMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0, + AlignedStackSize); + } } + // Pass second argument for the __morestack by Scratch Register #1. // The amount size of stack consumed to save function arguments. if (Thumb) { - BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1) - .add(condCodeOp()) - .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) - .add(predOps(ARMCC::AL)); + if (ARMFI->getArgumentStackSize() < 256) { + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1) + .add(condCodeOp()) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) + .add(predOps(ARMCC::AL)); + } else { + if (Thumb2) { + BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg1) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())); + } else { + auto MBBI = AllocMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool( + *AllocMBB, MBBI, DL, ScratchReg1, 0, + alignToARMConstant(ARMFI->getArgumentStackSize())); + } + } } else { - BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) - .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); + if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) { + BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } else { + auto MBBI = AllocMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool( + *AllocMBB, MBBI, DL, ScratchReg1, 0, + alignToARMConstant(ARMFI->getArgumentStackSize())); + } } // push {lr} - Save return address of this function. @@ -2746,13 +3236,15 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Emit the DWARF info about the change in stack as well as where to find the // previous link register - CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12)); - BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) { + CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12)); + BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12)); - BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } // Call __morestack(). if (Thumb) { @@ -2808,9 +3300,11 @@ void ARMFrameLowering::adjustForSegmentedStacks( } // Update the CFA offset now that we've popped - CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0)); - BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) { + CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0)); + BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } // Return from this function. BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL)); @@ -2832,20 +3326,22 @@ void ARMFrameLowering::adjustForSegmentedStacks( } // Update the CFA offset now that we've popped - CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0)); - BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - // Tell debuggers that r4 and r5 are now the same as they were in the - // previous function, that they're the "Same Value". - CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue( - nullptr, MRI->getDwarfRegNum(ScratchReg0, true))); - BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue( - nullptr, MRI->getDwarfRegNum(ScratchReg1, true))); - BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) { + CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0)); + BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Tell debuggers that r4 and r5 are now the same as they were in the + // previous function, that they're the "Same Value". + CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue( + nullptr, MRI->getDwarfRegNum(ScratchReg0, true))); + BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue( + nullptr, MRI->getDwarfRegNum(ScratchReg1, true))); + BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } // Organizing MBB lists PostStackMBB->addSuccessor(&PrologueMBB); diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h index 9822e2321bb4..16f2ce6bea6f 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -46,6 +46,7 @@ public: bool enableCalleeSaveSkip(const MachineFunction &MF) const override; bool hasFP(const MachineFunction &MF) const override; + bool isFPReserved(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const override; bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, diff --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp index 0d201a67af46..9b26aac6c0b7 100644 --- a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -11,6 +11,8 @@ #include "ARMBaseRegisterInfo.h" #include "ARMSubtarget.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetRegisterInfo.h" diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 98c8133282a2..e0e4ffd90e0e 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1058,15 +1058,15 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { // This case occurs only for VLD1-lane/dup and VST1-lane instructions. // The maximum alignment is equal to the memory size being referenced. - unsigned MMOAlign = MemN->getAlignment(); + llvm::Align MMOAlign = MemN->getAlign(); unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; - if (MMOAlign >= MemSize && MemSize > 1) + if (MMOAlign.value() >= MemSize && MemSize > 1) Alignment = MemSize; } else { // All other uses of addrmode6 are for intrinsics. For now just record // the raw alignment value; it will be refined later based on the legal // alignment operands for the intrinsic. - Alignment = MemN->getAlignment(); + Alignment = MemN->getAlign().value(); } Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); @@ -3464,40 +3464,39 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { return false; } -/// Target-specific DAG combining for ISD::XOR. +/// Target-specific DAG combining for ISD::SUB. /// Target-independent combining lowers SELECT_CC nodes of the form /// select_cc setg[ge] X, 0, X, -X /// select_cc setgt X, -1, X, -X /// select_cc setl[te] X, 0, -X, X /// select_cc setlt X, 1, -X, X /// which represent Integer ABS into: -/// Y = sra (X, size(X)-1); xor (add (X, Y), Y) +/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y) /// ARM instruction selection detects the latter and matches it to /// ARM::ABS or ARM::t2ABS machine node. bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ - SDValue XORSrc0 = N->getOperand(0); - SDValue XORSrc1 = N->getOperand(1); + SDValue SUBSrc0 = N->getOperand(0); + SDValue SUBSrc1 = N->getOperand(1); EVT VT = N->getValueType(0); if (Subtarget->isThumb1Only()) return false; - if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) + if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA) return false; - SDValue ADDSrc0 = XORSrc0.getOperand(0); - SDValue ADDSrc1 = XORSrc0.getOperand(1); - SDValue SRASrc0 = XORSrc1.getOperand(0); - SDValue SRASrc1 = XORSrc1.getOperand(1); + SDValue XORSrc0 = SUBSrc0.getOperand(0); + SDValue XORSrc1 = SUBSrc0.getOperand(1); + SDValue SRASrc0 = SUBSrc1.getOperand(0); + SDValue SRASrc1 = SUBSrc1.getOperand(1); ConstantSDNode *SRAConstant = dyn_cast(SRASrc1); EVT XType = SRASrc0.getValueType(); unsigned Size = XType.getSizeInBits() - 1; - if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && - XType.isInteger() && SRAConstant != nullptr && - Size == SRAConstant->getZExtValue()) { + if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() && + SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; - CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); + CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0); return true; } @@ -3673,8 +3672,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) { if (tryInlineAsm(N)) return; break; - case ISD::XOR: - // Select special operations if XOR node forms integer ABS pattern + case ISD::SUB: + // Select special operations if SUB node forms integer ABS pattern if (tryABSOp(N)) return; // Other cases are autogenerated. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 1b41427a1cab..85e32c08c74c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -273,6 +273,10 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::USUBSAT, VT, Legal); setOperationAction(ISD::ABDS, VT, Legal); setOperationAction(ISD::ABDU, VT, Legal); + setOperationAction(ISD::AVGFLOORS, VT, Legal); + setOperationAction(ISD::AVGFLOORU, VT, Legal); + setOperationAction(ISD::AVGCEILS, VT, Legal); + setOperationAction(ISD::AVGCEILU, VT, Legal); // No native support for these. setOperationAction(ISD::UDIV, VT, Expand); @@ -392,6 +396,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); @@ -476,7 +481,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && - !Subtarget->isTargetWatchOS()) { + !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) { bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard; for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) setLibcallCallingConv(static_cast(LCID), @@ -809,8 +814,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // Combine low-overhead loop intrinsics so that we can lower i1 types. if (Subtarget->hasLOB()) { - setTargetDAGCombine(ISD::BRCOND); - setTargetDAGCombine(ISD::BR_CC); + setTargetDAGCombine({ISD::BRCOND, ISD::BR_CC}); } if (Subtarget->hasNEON()) { @@ -982,13 +986,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMA, MVT::v4f32, Expand); } - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::FP_TO_SINT); - setTargetDAGCombine(ISD::FP_TO_UINT); - setTargetDAGCombine(ISD::FDIV); - setTargetDAGCombine(ISD::LOAD); + setTargetDAGCombine({ISD::SHL, ISD::SRL, ISD::SRA, ISD::FP_TO_SINT, + ISD::FP_TO_UINT, ISD::FDIV, ISD::LOAD}); // It is legal to extload from v4i8 to v4i16 or v4i32. for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, @@ -1002,32 +1001,17 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) { - setTargetDAGCombine(ISD::BUILD_VECTOR); - setTargetDAGCombine(ISD::VECTOR_SHUFFLE); - setTargetDAGCombine(ISD::INSERT_SUBVECTOR); - setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); - setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); - setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); - setTargetDAGCombine(ISD::STORE); - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::ANY_EXTEND); - setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); - setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); - setTargetDAGCombine(ISD::INTRINSIC_VOID); - setTargetDAGCombine(ISD::VECREDUCE_ADD); - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::BITCAST); + setTargetDAGCombine( + {ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE, ISD::INSERT_SUBVECTOR, + ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, + ISD::SIGN_EXTEND_INREG, ISD::STORE, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, + ISD::ANY_EXTEND, ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN, + ISD::INTRINSIC_VOID, ISD::VECREDUCE_ADD, ISD::ADD, ISD::BITCAST}); } if (Subtarget->hasMVEIntegerOps()) { - setTargetDAGCombine(ISD::SMIN); - setTargetDAGCombine(ISD::UMIN); - setTargetDAGCombine(ISD::SMAX); - setTargetDAGCombine(ISD::UMAX); - setTargetDAGCombine(ISD::FP_EXTEND); - setTargetDAGCombine(ISD::SELECT); - setTargetDAGCombine(ISD::SELECT_CC); - setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX, + ISD::FP_EXTEND, ISD::SELECT, ISD::SELECT_CC, + ISD::SETCC}); } if (Subtarget->hasMVEFloatOps()) { setTargetDAGCombine(ISD::FADD); @@ -1364,6 +1348,29 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } } + // Compute supported atomic widths. + if (Subtarget->isTargetLinux() || + (!Subtarget->isMClass() && Subtarget->hasV6Ops())) { + // For targets where __sync_* routines are reliably available, we use them + // if necessary. + // + // ARM Linux always supports 64-bit atomics through kernel-assisted atomic + // routines (kernel 3.1 or later). FIXME: Not with compiler-rt? + // + // ARMv6 targets have native instructions in ARM mode. For Thumb mode, + // such targets should provide __sync_* routines, which use the ARM mode + // instructions. (ARMv6 doesn't have dmb, but it has an equivalent + // encoding; see ARMISD::MEMBARRIER_MCR.) + setMaxAtomicSizeInBitsSupported(64); + } else if (Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) { + // Cortex-M (besides Cortex-M0) have 32-bit atomics. + setMaxAtomicSizeInBitsSupported(32); + } else { + // We can't assume anything about other targets; just use libatomic + // routines. + setMaxAtomicSizeInBitsSupported(0); + } + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. @@ -1545,12 +1552,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // We have target-specific dag combine patterns for the following nodes: // ARMISD::VMOVRRD - No need to call setTargetDAGCombine - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::SUB); - setTargetDAGCombine(ISD::MUL); - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::XOR); + setTargetDAGCombine( + {ISD::ADD, ISD::SUB, ISD::MUL, ISD::AND, ISD::OR, ISD::XOR}); if (Subtarget->hasMVEIntegerOps()) setTargetDAGCombine(ISD::VSELECT); @@ -1559,6 +1562,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SRL); if (Subtarget->isThumb1Only()) setTargetDAGCombine(ISD::SHL); + // Attempt to lower smin/smax to ssat/usat + if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || + Subtarget->isThumb2()) { + setTargetDAGCombine({ISD::SMIN, ISD::SMAX}); + } setStackPointerRegisterToSaveRestore(ARM::SP); @@ -1901,13 +1909,14 @@ ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { // source/dest is aligned and the copy size is large enough. We therefore want // to align such objects passed to memory intrinsics. bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, - unsigned &PrefAlign) const { + Align &PrefAlign) const { if (!isa(CI)) return false; MinSize = 8; // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 // cycle faster than 4-byte aligned LDM. - PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4); + PrefAlign = + (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4)); return true; } @@ -2326,7 +2335,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Lower 'returns_twice' calls to a pseudo-instruction. if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && - !Subtarget->getNoBTIAtReturnTwice()) + !Subtarget->noBTIAtReturnTwice()) GuardWithBTI = AFI->branchTargetEnforcement(); // Determine whether this is a non-secure function call. @@ -2778,25 +2787,23 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - if (!isTailCall) { - const uint32_t *Mask; - const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); - if (isThisReturn) { - // For 'this' returns, use the R0-preserving mask if applicable - Mask = ARI->getThisReturnPreservedMask(MF, CallConv); - if (!Mask) { - // Set isThisReturn to false if the calling convention is not one that - // allows 'returned' to be modeled in this way, so LowerCallResult does - // not try to pass 'this' straight through - isThisReturn = false; - Mask = ARI->getCallPreservedMask(MF, CallConv); - } - } else + const uint32_t *Mask; + const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); + if (isThisReturn) { + // For 'this' returns, use the R0-preserving mask if applicable + Mask = ARI->getThisReturnPreservedMask(MF, CallConv); + if (!Mask) { + // Set isThisReturn to false if the calling convention is not one that + // allows 'returned' to be modeled in this way, so LowerCallResult does + // not try to pass 'this' straight through + isThisReturn = false; Mask = ARI->getCallPreservedMask(MF, CallConv); + } + } else + Mask = ARI->getCallPreservedMask(MF, CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); - } + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); if (InFlag.getNode()) Ops.push_back(InFlag); @@ -4379,7 +4386,7 @@ void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, bool ARMTargetLowering::splitValueIntoRegisterParts( SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, Optional CC) const { - bool IsABIRegCopy = CC.hasValue(); + bool IsABIRegCopy = CC.has_value(); EVT ValueVT = Val.getValueType(); if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) { @@ -4397,7 +4404,7 @@ bool ARMTargetLowering::splitValueIntoRegisterParts( SDValue ARMTargetLowering::joinRegisterPartsIntoValue( SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, Optional CC) const { - bool IsABIRegCopy = CC.hasValue(); + bool IsABIRegCopy = CC.has_value(); if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) { unsigned ValueBits = ValueVT.getSizeInBits(); @@ -5547,7 +5554,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { if (LoadSDNode *Ld = dyn_cast(Op)) return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), Ld->getAlignment(), + Ld->getPointerInfo(), Ld->getAlign(), Ld->getMemOperand()->getFlags()); llvm_unreachable("Unknown VFP cmp argument!"); @@ -5567,14 +5574,14 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue Ptr = Ld->getBasePtr(); RetVal1 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), - Ld->getAlignment(), Ld->getMemOperand()->getFlags()); + Ld->getAlign(), Ld->getMemOperand()->getFlags()); EVT PtrType = Ptr.getValueType(); - unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); SDValue NewPtr = DAG.getNode(ISD::ADD, dl, PtrType, Ptr, DAG.getConstant(4, dl, PtrType)); RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr, - Ld->getPointerInfo().getWithOffset(4), NewAlign, + Ld->getPointerInfo().getWithOffset(4), + commonAlignment(Ld->getAlign(), 4), Ld->getMemOperand()->getFlags()); return; } @@ -5801,8 +5808,7 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { return DAG.UnrollVectorOp(Op.getNode()); } - const bool HasFullFP16 = - static_cast(DAG.getSubtarget()).hasFullFP16(); + const bool HasFullFP16 = DAG.getSubtarget().hasFullFP16(); EVT NewTy; const EVT OpTy = Op.getOperand(0).getValueType(); @@ -5912,8 +5918,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!"); - const bool HasFullFP16 = - static_cast(DAG.getSubtarget()).hasFullFP16(); + const bool HasFullFP16 = DAG.getSubtarget().hasFullFP16(); EVT DestVecType; if (VT == MVT::v4f32) @@ -9359,15 +9364,15 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { // The load already has the right type. if (ExtendedTy == LD->getMemoryVT()) return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), - LD->getBasePtr(), LD->getPointerInfo(), - LD->getAlignment(), LD->getMemOperand()->getFlags()); + LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(), + LD->getMemOperand()->getFlags()); // We need to create a zextload/sextload. We cannot just create a load // followed by a zext/zext node because LowerMUL is also run during normal // operation legalization where we can't create illegal types. return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), - LD->getMemoryVT(), LD->getAlignment(), + LD->getMemoryVT(), LD->getAlign(), LD->getMemOperand()->getFlags()); } @@ -9876,7 +9881,7 @@ ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, if (N->getOpcode() != ISD::SDIV) return SDValue(); - const auto &ST = static_cast(DAG.getSubtarget()); + const auto &ST = DAG.getSubtarget(); const bool MinSize = ST.hasMinSize(); const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode() : ST.hasDivideInARMMode(); @@ -10311,6 +10316,15 @@ SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues({Result, Chain}, dl); } +SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + + EVT VT = getPointerTy(DAG.getDataLayout()); + SDLoc DL(Op); + int FI = MFI.CreateFixedObject(4, 0, false); + return DAG.getFrameIndex(FI, VT); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump()); switch (Op.getOpcode()) { @@ -10424,6 +10438,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG); + case ISD::SPONENTRY: + return LowerSPONENTRY(Op, DAG); case ARMISD::WIN__DBZCHK: return SDValue(); } } @@ -10509,9 +10525,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, return; case ISD::INTRINSIC_WO_CHAIN: return ReplaceLongIntrinsic(N, Results, DAG); - case ISD::ABS: - lowerABS(N, Results, DAG); - return ; case ISD::LOAD: LowerLOAD(N, Results, DAG); break; @@ -12170,7 +12183,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, if (Subtarget->isThumb1Only()) { for (unsigned c = MCID->getNumOperands() - 4; c--;) { MI.addOperand(MI.getOperand(1)); - MI.RemoveOperand(1); + MI.removeOperand(1); } // Restore the ties @@ -12208,7 +12221,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, definesCPSR = true; if (MO.isDead()) deadCPSR = true; - MI.RemoveOperand(i); + MI.removeOperand(i); break; } } @@ -14775,14 +14788,14 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, SDValue BasePtr = LD->getBasePtr(); SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(), - LD->getAlignment(), LD->getMemOperand()->getFlags()); + LD->getAlign(), LD->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, DL, MVT::i32)); SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr, LD->getPointerInfo().getWithOffset(4), - std::min(4U, LD->getAlignment()), + commonAlignment(LD->getAlign(), 4), LD->getMemOperand()->getFlags()); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); @@ -15352,6 +15365,10 @@ static SDValue FlattenVectorShuffle(ShuffleVectorSDNode *N, SelectionDAG &DAG) { case ISD::MULHU: case ISD::ABDS: case ISD::ABDU: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: + case ISD::AVGCEILS: + case ISD::AVGCEILU: break; default: return SDValue(); @@ -15721,7 +15738,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, // Now, create a _UPD node, taking care of not breaking alignment. EVT AlignedVecTy = VecTy; - unsigned Alignment = MemN->getAlignment(); + Align Alignment = MemN->getAlign(); // If this is a less-than-standard-aligned load/store, change the type to // match the standard alignment. @@ -15738,10 +15755,8 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, // memory type to match the explicit alignment. That way, we don't // generate non-standard-aligned ARMISD::VLDx nodes. if (isa(N)) { - if (Alignment == 0) - Alignment = 1; - if (Alignment < VecTy.getScalarSizeInBits() / 8) { - MVT EltTy = MVT::getIntegerVT(Alignment * 8); + if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) { + MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8); assert(NumVecs == 1 && "Unexpected multi-element generic load/store."); assert(!isLaneOp && "Unexpected generic load/store lane."); unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8); @@ -15754,7 +15769,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, // alignment of the memory type. // Intrinsics, however, always get an explicit alignment, set to the // alignment of the MMO. - Alignment = 1; + Alignment = Align(1); } // Create the new updating load/store node. @@ -15787,7 +15802,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, } // For all node types, the alignment operand is always the last one. - Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32)); + Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32)); // If this is a non-standard-aligned STORE, the penultimate operand is the // stored value. Bitcast it to the aligned type. @@ -15965,10 +15980,10 @@ static SDValue CombineBaseUpdate(SDNode *N, // Try to fold with other users. Non-constant updates are considered // first, and constant updates are sorted to not break a sequence of // strided accesses (if there is any). - std::sort(BaseUpdates.begin(), BaseUpdates.end(), - [](BaseUpdateUser &LHS, BaseUpdateUser &RHS) { - return LHS.ConstInc < RHS.ConstInc; - }); + std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(), + [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) { + return LHS.ConstInc < RHS.ConstInc; + }); for (BaseUpdateUser &User : BaseUpdates) { if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI)) return SDValue(); @@ -16258,7 +16273,7 @@ static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, if (LD && Op.hasOneUse() && LD->isUnindexed() && LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) { SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1), - DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32)}; + DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)}; SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other); SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops, @@ -16360,7 +16375,7 @@ static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, ShuffWide, DAG.getIntPtrConstant(I, DL)); SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(), - St->getAlignment(), St->getMemOperand()->getFlags()); + St->getAlign(), St->getMemOperand()->getFlags()); BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment); Chains.push_back(Ch); @@ -16608,7 +16623,7 @@ static SDValue PerformSTORECombine(SDNode *N, DCI.AddToWorklist(ExtElt.getNode()); DCI.AddToWorklist(V.getNode()); return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), - St->getPointerInfo(), St->getAlignment(), + St->getPointerInfo(), St->getAlign(), St->getMemOperand()->getFlags(), St->getAAInfo()); } @@ -16690,14 +16705,16 @@ static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); SDLoc DL(N); - // The identity element for a fadd is -0.0, which these VMOV's represent. - auto isNegativeZeroSplat = [&](SDValue Op) { + // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set, + // which these VMOV's represent. + auto isIdentitySplat = [&](SDValue Op, bool NSZ) { if (Op.getOpcode() != ISD::BITCAST || Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM) return false; - if (VT == MVT::v4f32 && Op.getOperand(0).getConstantOperandVal(0) == 1664) + uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0); + if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ))) return true; - if (VT == MVT::v8f16 && Op.getOperand(0).getConstantOperandVal(0) == 2688) + if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ))) return true; return false; }; @@ -16705,12 +16722,17 @@ static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT) std::swap(Op0, Op1); - if (Op1.getOpcode() != ISD::VSELECT || - !isNegativeZeroSplat(Op1.getOperand(2))) + if (Op1.getOpcode() != ISD::VSELECT) + return SDValue(); + + SDNodeFlags FaddFlags = N->getFlags(); + bool NSZ = FaddFlags.hasNoSignedZeros(); + if (!isIdentitySplat(Op1.getOperand(2), NSZ)) return SDValue(); + SDValue FAdd = - DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), N->getFlags()); - return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0); + DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags); + return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags); } /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) @@ -17060,13 +17082,10 @@ static SDValue PerformVMOVNCombine(SDNode *N, IsTop ? Op1DemandedElts : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1)); - APInt KnownUndef, KnownZero; const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo(); - if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, KnownUndef, - KnownZero, DCI)) + if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI)) return SDValue(N, 0); - if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, KnownUndef, - KnownZero, DCI)) + if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI)) return SDValue(N, 0); return SDValue(); @@ -17082,10 +17101,8 @@ static SDValue PerformVQMOVNCombine(SDNode *N, APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1) : APInt::getHighBitsSet(2, 1)); - APInt KnownUndef, KnownZero; const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo(); - if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, KnownUndef, - KnownZero, DCI)) + if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI)) return SDValue(N, 0); return SDValue(); } @@ -17390,7 +17407,7 @@ static SDValue PerformShiftCombine(SDNode *N, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!VT.isVector() || !TLI.isTypeLegal(VT)) return SDValue(); - if (ST->hasMVEIntegerOps() && VT == MVT::v2i64) + if (ST->hasMVEIntegerOps()) return SDValue(); int64_t Cnt; @@ -17556,12 +17573,57 @@ static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating +// constant bounds. +static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) && + !Subtarget->isThumb2()) + return SDValue(); + + EVT VT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + + if (VT != MVT::i32 || + (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) || + !isa(Op.getOperand(1)) || + !isa(Op0.getOperand(1))) + return SDValue(); + + SDValue Min = Op; + SDValue Max = Op0; + SDValue Input = Op0.getOperand(0); + if (Min.getOpcode() == ISD::SMAX) + std::swap(Min, Max); + + APInt MinC = Min.getConstantOperandAPInt(1); + APInt MaxC = Max.getConstantOperandAPInt(1); + + if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX || + !(MinC + 1).isPowerOf2()) + return SDValue(); + + SDLoc DL(Op); + if (MinC == ~MaxC) + return DAG.getNode(ARMISD::SSAT, DL, VT, Input, + DAG.getConstant(MinC.countTrailingOnes(), DL, VT)); + if (MaxC == 0) + return DAG.getNode(ARMISD::USAT, DL, VT, Input, + DAG.getConstant(MinC.countTrailingOnes(), DL, VT)); + + return SDValue(); +} + /// PerformMinMaxCombine - Target-specific DAG combining for creating truncating /// saturates. static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); + + if (VT == MVT::i32) + return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST); + if (!ST->hasMVEIntegerOps()) return SDValue(); @@ -19354,8 +19416,8 @@ bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { // Return false to prevent folding // (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine, // if the folding leads to worse code. -bool ARMTargetLowering::isMulAddWithConstProfitable( - const SDValue &AddNode, const SDValue &ConstNode) const { +bool ARMTargetLowering::isMulAddWithConstProfitable(SDValue AddNode, + SDValue ConstNode) const { // Let the DAGCombiner decide for vector types and large types. const EVT VT = AddNode.getValueType(); if (VT.isVector() || VT.getScalarSizeInBits() > 32) @@ -20537,38 +20599,6 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result; } -void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const { - assert(N->getValueType(0) == MVT::i64 && "Unexpected type (!= i64) on ABS."); - MVT HalfT = MVT::i32; - SDLoc dl(N); - SDValue Hi, Lo, Tmp; - - if (!isOperationLegalOrCustom(ISD::ADDCARRY, HalfT) || - !isOperationLegalOrCustom(ISD::UADDO, HalfT)) - return ; - - unsigned OpTypeBits = HalfT.getScalarSizeInBits(); - SDVTList VTList = DAG.getVTList(HalfT, MVT::i1); - - Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), - DAG.getConstant(0, dl, HalfT)); - Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), - DAG.getConstant(1, dl, HalfT)); - - Tmp = DAG.getNode(ISD::SRA, dl, HalfT, Hi, - DAG.getConstant(OpTypeBits - 1, dl, - getShiftAmountTy(HalfT, DAG.getDataLayout()))); - Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo); - Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi, - SDValue(Lo.getNode(), 1)); - Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi); - Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo); - - Results.push_back(Lo); - Results.push_back(Hi); -} - bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -20787,24 +20817,24 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); - PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); + Type *ValTy = I.getParamElementType(0); Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(PtrTy->getPointerElementType()); + Info.memVT = MVT::getVT(ValTy); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType()); + Info.align = DL.getABITypeAlign(ValTy); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; } case Intrinsic::arm_stlex: case Intrinsic::arm_strex: { auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); - PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); + Type *ValTy = I.getParamElementType(1); Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(PtrTy->getPointerElementType()); + Info.memVT = MVT::getVT(ValTy); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; - Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType()); + Info.align = DL.getABITypeAlign(ValTy); Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; } @@ -20932,9 +20962,19 @@ Instruction *ARMTargetLowering::emitTrailingFence(IRBuilderBase &Builder, // are doomed anyway, so defer to the default libcall and blame the OS when // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit // anything for those. -bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { +TargetLoweringBase::AtomicExpansionKind +ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { + bool has64BitAtomicStore; + if (Subtarget->isMClass()) + has64BitAtomicStore = false; + else if (Subtarget->isThumb()) + has64BitAtomicStore = Subtarget->hasV7Ops(); + else + has64BitAtomicStore = Subtarget->hasV6Ops(); + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); - return (Size == 64) && !Subtarget->isMClass(); + return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand + : AtomicExpansionKind::None; } // Loads and stores less than 64-bits are already atomic; ones above that @@ -20946,9 +20986,17 @@ bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // sections A8.8.72-74 LDRD) TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { + bool has64BitAtomicLoad; + if (Subtarget->isMClass()) + has64BitAtomicLoad = false; + else if (Subtarget->isThumb()) + has64BitAtomicLoad = Subtarget->hasV7Ops(); + else + has64BitAtomicLoad = Subtarget->hasV6Ops(); + unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly - : AtomicExpansionKind::None; + return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly + : AtomicExpansionKind::None; } // For the real atomic operations, we have ldrex/strex up to 32 bits, @@ -20958,19 +21006,25 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { if (AI->isFloatingPointOperation()) return AtomicExpansionKind::CmpXChg; - // At -O0, fast-regalloc cannot cope with the live vregs necessary to - // implement atomicrmw without spilling. If the target address is also on the - // stack and close enough to the spill slot, this can lead to a situation - // where the monitor always gets cleared and the atomic operation can never - // succeed. So at -O0 lower this operation to a CAS loop. - if (getTargetMachine().getOptLevel() == CodeGenOpt::None) - return AtomicExpansionKind::CmpXChg; - unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); - return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) - ? AtomicExpansionKind::LLSC - : AtomicExpansionKind::None; + bool hasAtomicRMW; + if (Subtarget->isMClass()) + hasAtomicRMW = Subtarget->hasV8MBaselineOps(); + else if (Subtarget->isThumb()) + hasAtomicRMW = Subtarget->hasV7Ops(); + else + hasAtomicRMW = Subtarget->hasV6Ops(); + if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) { + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement atomicrmw without spilling. If the target address is also on + // the stack and close enough to the spill slot, this can lead to a + // situation where the monitor always gets cleared and the atomic operation + // can never succeed. So at -O0 lower this operation to a CAS loop. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return AtomicExpansionKind::CmpXChg; + return AtomicExpansionKind::LLSC; + } + return AtomicExpansionKind::None; } // Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32 @@ -20983,8 +21037,13 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { // situation where the monitor always gets cleared and the atomic operation // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits(); - bool HasAtomicCmpXchg = - !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); + bool HasAtomicCmpXchg; + if (Subtarget->isMClass()) + HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps(); + else if (Subtarget->isThumb()) + HasAtomicCmpXchg = Subtarget->hasV7Ops(); + else + HasAtomicCmpXchg = Subtarget->hasV6Ops(); if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U)) return AtomicExpansionKind::LLSC; @@ -21099,8 +21158,11 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Type *Tys[] = { Addr->getType() }; Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys); + CallInst *CI = Builder.CreateCall(Ldrex, Addr); - return Builder.CreateTruncOrBitCast(Builder.CreateCall(Ldrex, Addr), ValueTy); + CI->addParamAttr( + 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy)); + return Builder.CreateTruncOrBitCast(CI, ValueTy); } void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance( @@ -21138,10 +21200,13 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilderBase &Builder, Type *Tys[] = { Addr->getType() }; Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); - return Builder.CreateCall( + CallInst *CI = Builder.CreateCall( Strex, {Builder.CreateZExtOrBitCast( Val, Strex->getFunctionType()->getParamType(0)), Addr}); + CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType, + Val->getType())); + return CI; } @@ -21273,7 +21338,7 @@ bool ARMTargetLowering::lowerInterleavedLoad( SmallVector Ops; Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr)); - Ops.push_back(Builder.getInt32(LI->getAlignment())); + Ops.push_back(Builder.getInt32(LI->getAlign().value())); return Builder.CreateCall(VldnFunc, Ops, "vldN"); } else { @@ -21443,7 +21508,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, SmallVector Ops; Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr)); append_range(Ops, Shuffles); - Ops.push_back(Builder.getInt32(SI->getAlignment())); + Ops.push_back(Builder.getInt32(SI->getAlign().value())); Builder.CreateCall(VstNFunc, Ops); } else { assert((Factor == 2 || Factor == 4) && diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 1c5f8389f57c..10f60ab93ae3 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -581,7 +581,7 @@ class VectorType; getRegClassFor(MVT VT, bool isDivergent = false) const override; bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, - unsigned &PrefAlign) const override; + Align &PrefAlign) const override; /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. @@ -665,7 +665,8 @@ class VectorType; bool shouldInsertFencesForAtomic(const Instruction *I) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; - bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicStoreInIR(StoreInst *SI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; TargetLoweringBase::AtomicExpansionKind @@ -713,8 +714,8 @@ class VectorType; Align Alignment, const DataLayout &DL) const; - bool isMulAddWithConstProfitable(const SDValue &AddNode, - const SDValue &ConstNode) const override; + bool isMulAddWithConstProfitable(SDValue AddNode, + SDValue ConstNode) const override; bool alignLoopsWithOptSize() const override; @@ -845,8 +846,7 @@ class VectorType; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFSETCC(SDValue Op, SelectionDAG &DAG) const; - void lowerABS(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const; + SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; void LowerLOAD(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td index ff5afd787c82..c9a2d21bec53 100644 --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -1589,9 +1589,9 @@ class VFPXI pattern> + string opc, string asm, string cstr, list pattern> : VFPI { + opc, asm, cstr, pattern> { let PostEncoderMethod = "VFPThumb2PostEncoder"; } @@ -1751,8 +1751,8 @@ class AXSI4 opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, - string asm, list pattern> - : VFPAI { + string asm, string cstr, list pattern> + : VFPAI { // Instruction operands. bits<5> Dd; bits<5> Dm; @@ -1804,7 +1804,7 @@ class ADuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, class ADbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { // Instruction operands. bits<5> Dd; bits<5> Dn; @@ -1862,8 +1862,8 @@ class ADbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, // Single precision, unary, predicated class ASuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, - string asm, list pattern> - : VFPAI { + string asm, string cstr, list pattern> + : VFPAI { // Instruction operands. bits<5> Sd; bits<5> Sm; @@ -1916,14 +1916,14 @@ class ASuIn opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> : ASuI { + "", pattern> { list Predicates = [HasVFP2,DontUseNEONForFP]; } // Single precision, binary class ASbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { // Instruction operands. bits<5> Sd; bits<5> Sn; @@ -2000,7 +2000,7 @@ class ASbIn opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, class AHuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { list Predicates = [HasFullFP16]; // Instruction operands. @@ -2056,7 +2056,7 @@ class AHuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, // Half precision, binary class AHbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { list Predicates = [HasFullFP16]; // Instruction operands. @@ -2116,7 +2116,7 @@ class AHbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, class AVConv1I opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{19-16} = opcod3; @@ -2149,7 +2149,7 @@ class AVConv1In opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, class AVConvXI opcod1, bits<4> opcod2, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { let Inst{27-20} = opcod1; let Inst{11-8} = opcod2; let Inst{4} = 1; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 32a3911d3369..88bb74d1fc54 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -5129,6 +5129,7 @@ let hasNoSchedulingInfo = 1 in def TSB : AInoP<(outs), (ins tsb_opt:$opt), MiscFrm, NoItinerary, "tsb", "\t$opt", []>, Requires<[IsARM, HasV8_4a]> { let Inst{31-0} = 0xe320f012; + let DecoderMethod = "DecodeTSBInstruction"; } } @@ -6387,7 +6388,7 @@ def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>; // Pre-v6, 'mov r0, r0' was used as a NOP encoding. -def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>, +def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg), 0>, Requires<[IsARM, NoV6]>; // MUL/UMLAL/SMLAL/UMULL/SMULL are available on all arches, but @@ -6415,8 +6416,7 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", // 'it' blocks in ARM mode just validate the predicates. The IT itself // is discarded. -def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>, - ComplexDeprecationPredicate<"IT">; +def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>; let mayLoad = 1, mayStore =1, hasSideEffects = 1, hasNoSchedulingInfo = 1 in def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), @@ -6476,3 +6476,24 @@ def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary, let AsmString = "@ COMPILER BARRIER"; let hasNoSchedulingInfo = 1; } + +//===----------------------------------------------------------------------===// +// Instructions used for emitting unwind opcodes on Windows. +//===----------------------------------------------------------------------===// +let isPseudo = 1 in { + def SEH_StackAlloc : PseudoInst<(outs), (ins i32imm:$size, i32imm:$wide), NoItinerary, []>, Sched<[]>; + def SEH_SaveRegs : PseudoInst<(outs), (ins i32imm:$mask, i32imm:$wide), NoItinerary, []>, Sched<[]>; + let isTerminator = 1 in + def SEH_SaveRegs_Ret : PseudoInst<(outs), (ins i32imm:$mask, i32imm:$wide), NoItinerary, []>, Sched<[]>; + def SEH_SaveSP : PseudoInst<(outs), (ins i32imm:$reg), NoItinerary, []>, Sched<[]>; + def SEH_SaveFRegs : PseudoInst<(outs), (ins i32imm:$first, i32imm:$last), NoItinerary, []>, Sched<[]>; + let isTerminator = 1 in + def SEH_SaveLR : PseudoInst<(outs), (ins i32imm:$offst), NoItinerary, []>, Sched<[]>; + def SEH_Nop : PseudoInst<(outs), (ins i32imm:$wide), NoItinerary, []>, Sched<[]>; + let isTerminator = 1 in + def SEH_Nop_Ret : PseudoInst<(outs), (ins i32imm:$wide), NoItinerary, []>, Sched<[]>; + def SEH_PrologEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; + def SEH_EpilogStart : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; + let isTerminator = 1 in + def SEH_EpilogEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; +} diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 1ae0354ffc37..15c33014e988 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2192,36 +2192,29 @@ def subnsw : PatFrag<(ops node:$lhs, node:$rhs), return N->getFlags().hasNoSignedWrap(); }]>; -multiclass MVE_VRHADD_m { +multiclass MVE_VRHADD_m { def "" : MVE_VRHADD_Base; defvar Inst = !cast(NAME); + defm : MVE_TwoOpPattern(NAME)>; let Predicates = [HasMVEInt] in { - // Unpredicated rounding add-with-divide-by-two + // Unpredicated rounding add-with-divide-by-two intrinsic def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned))), (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; - - // Predicated add-with-divide-by-two - def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), - (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask), - (VTI.Vec MQPR:$inactive))), - (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), - ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg, - (VTI.Vec MQPR:$inactive)))>; } } -multiclass MVE_VRHADD - : MVE_VRHADD_m; +multiclass MVE_VRHADD + : MVE_VRHADD_m; -defm MVE_VRHADDs8 : MVE_VRHADD; -defm MVE_VRHADDs16 : MVE_VRHADD; -defm MVE_VRHADDs32 : MVE_VRHADD; -defm MVE_VRHADDu8 : MVE_VRHADD; -defm MVE_VRHADDu16 : MVE_VRHADD; -defm MVE_VRHADDu32 : MVE_VRHADD; +defm MVE_VRHADDs8 : MVE_VRHADD; +defm MVE_VRHADDs16 : MVE_VRHADD; +defm MVE_VRHADDs32 : MVE_VRHADD; +defm MVE_VRHADDu8 : MVE_VRHADD; +defm MVE_VRHADDu16 : MVE_VRHADD; +defm MVE_VRHADDu32 : MVE_VRHADD; // Rounding Halving Add perform the arithemtic operation with an extra bit of // precision, before performing the shift, to void clipping errors. We're not @@ -2303,11 +2296,12 @@ class MVE_VHSUB_ size, list pattern=[]> : MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>; -multiclass MVE_VHADD_m { def "" : MVE_VHADD_; defvar Inst = !cast(NAME); + defm : MVE_TwoOpPattern(NAME)>; let Predicates = [HasMVEInt] in { // Unpredicated add-and-divide-by-two @@ -2316,30 +2310,23 @@ multiclass MVE_VHADD_m; - - // Predicated add-and-divide-by-two - def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned), - (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), - (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), - ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg, - (VTI.Vec MQPR:$inactive)))>; } } -multiclass MVE_VHADD - : MVE_VHADD_m + : MVE_VHADD_m; // Halving add/sub perform the arithemtic operation with an extra bit of // precision, before performing the shift, to void clipping errors. We're not // modelling that here with these patterns, but we're using no wrap forms of // add/sub to ensure that the extra bit of information is not needed. -defm MVE_VHADDs8 : MVE_VHADD; -defm MVE_VHADDs16 : MVE_VHADD; -defm MVE_VHADDs32 : MVE_VHADD; -defm MVE_VHADDu8 : MVE_VHADD; -defm MVE_VHADDu16 : MVE_VHADD; -defm MVE_VHADDu32 : MVE_VHADD; +defm MVE_VHADDs8 : MVE_VHADD; +defm MVE_VHADDs16 : MVE_VHADD; +defm MVE_VHADDs32 : MVE_VHADD; +defm MVE_VHADDu8 : MVE_VHADD; +defm MVE_VHADDu16 : MVE_VHADD; +defm MVE_VHADDu32 : MVE_VHADD; multiclass MVE_VHSUB_m { +multiclass MVE_VHADDSUB_qr_m { def "" : MVE_VxADDSUB_qr; + defm : MVE_TwoOpPatternDup(NAME)>; defm : MVE_vec_scalar_int_pat_m(NAME), VTI, unpred_int, pred_int, 1, 1>; defvar Inst = !cast(NAME); @@ -5386,20 +5373,20 @@ multiclass MVE_VHADDSUB_qr_m : - MVE_VHADDSUB_qr_m<"vhadd", VTI, 0b0, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, - add_op, shift_op>; +multiclass MVE_VHADD_qr_m : + MVE_VHADDSUB_qr_m<"vhadd", VTI, 0b0, Op, int_arm_mve_vhadd, + int_arm_mve_hadd_predicated, add_op, shift_op>; multiclass MVE_VHSUB_qr_m : - MVE_VHADDSUB_qr_m<"vhsub", VTI, 0b1, int_arm_mve_vhsub, int_arm_mve_hsub_predicated, - add_op, shift_op>; - -defm MVE_VHADD_qr_s8 : MVE_VHADD_qr_m; -defm MVE_VHADD_qr_s16 : MVE_VHADD_qr_m; -defm MVE_VHADD_qr_s32 : MVE_VHADD_qr_m; -defm MVE_VHADD_qr_u8 : MVE_VHADD_qr_m; -defm MVE_VHADD_qr_u16 : MVE_VHADD_qr_m; -defm MVE_VHADD_qr_u32 : MVE_VHADD_qr_m; + MVE_VHADDSUB_qr_m<"vhsub", VTI, 0b1, null_frag, int_arm_mve_vhsub, + int_arm_mve_hsub_predicated, add_op, shift_op>; + +defm MVE_VHADD_qr_s8 : MVE_VHADD_qr_m; +defm MVE_VHADD_qr_s16 : MVE_VHADD_qr_m; +defm MVE_VHADD_qr_s32 : MVE_VHADD_qr_m; +defm MVE_VHADD_qr_u8 : MVE_VHADD_qr_m; +defm MVE_VHADD_qr_u16 : MVE_VHADD_qr_m; +defm MVE_VHADD_qr_u32 : MVE_VHADD_qr_m; defm MVE_VHSUB_qr_s8 : MVE_VHSUB_qr_m; defm MVE_VHSUB_qr_s16 : MVE_VHSUB_qr_m; diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 357aa6d062e9..cdad8e106de6 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -6946,6 +6946,9 @@ def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, v4f32, v4i16, int_arm_neon_vcvthf2fp>, Requires<[HasNEON, HasFP16]>; +def : Pat<(v4f16 (fpround (v4f32 QPR:$src))), (VCVTf2h QPR:$src)>; +def : Pat<(v4f32 (fpextend (v4f16 DPR:$src))), (VCVTh2f DPR:$src)>; + // Vector Reverse. // VREV64 : Vector Reverse elements within 64-bit doublewords diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index f80b9a5053f7..20d8a45aaf49 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -3561,6 +3561,7 @@ let hasNoSchedulingInfo = 1 in def t2TSB : T2I<(outs), (ins tsb_opt:$opt), NoItinerary, "tsb", "\t$opt", []>, Requires<[IsThumb, HasV8_4a]> { let Inst{31-0} = 0xf3af8012; + let DecoderMethod = "DecodeTSBInstruction"; } } @@ -3950,6 +3951,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, // Tail calls. The MachO version of thumb tail calls uses a t2 branch, so // it goes here. +// Windows SEH unwinding also needs a strict t2 branch for tail calls. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS version. let Uses = [SP] in @@ -3957,15 +3959,14 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { (ins thumb_br_target:$dst, pred:$p), 4, IIC_Br, [], (t2B thumb_br_target:$dst, pred:$p)>, - Requires<[IsThumb2, IsMachO]>, Sched<[WriteBr]>; + Requires<[IsThumb2]>, Sched<[WriteBr]>; } // IT block let Defs = [ITSTATE] in def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), AddrModeNone, 2, IIC_iALUx, - "it$mask\t$cc", "", []>, - ComplexDeprecationPredicate<"IT"> { + "it$mask\t$cc", "", []> { // 16-bit instruction. let Inst{31-16} = 0x0000; let Inst{15-8} = 0b10111111; diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index dc5f1b92a6c2..b233555d5225 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -584,12 +584,12 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b), let Defs = [FPSCR_NZCV] in { def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$Dd, DPR:$Dm), - IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", + IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", "", [(arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm))]>; def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), - IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", + IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", "", [(arm_cmpfpe SPR:$Sd, SPR:$Sm)]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -603,12 +603,12 @@ def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0, def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$Dd, DPR:$Dm), - IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", + IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", "", [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>; def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), - IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", + IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", "", [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -627,7 +627,7 @@ def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0, def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm", + IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm", "", [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>; def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, @@ -647,7 +647,7 @@ def VABSH : AHuI<0b11101, 0b11, 0b0000, 0b11, 0, let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), - IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", + IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", "", [(arm_cmpfpe0 (f64 DPR:$Dd))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -655,7 +655,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$Sd), - IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", + IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", "", [(arm_cmpfpe0 SPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -675,7 +675,7 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$Dd), - IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", + IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", "", [(arm_cmpfp0 (f64 DPR:$Dd))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -683,7 +683,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$Sd), - IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", + IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", "", [(arm_cmpfp0 SPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -704,7 +704,7 @@ def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), - IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", + IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", "", [(set DPR:$Dd, (fpextend SPR:$Sm))]>, Sched<[WriteFPCVT]> { // Instruction operands. @@ -723,7 +723,7 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, // Special case encoding: bits 11-8 is 0b1011. def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, - IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", + IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", "", [(set SPR:$Sd, (fpround DPR:$Dm))]>, Sched<[WriteFPCVT]> { // Instruction operands. @@ -749,7 +749,7 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, // Between half, single and double-precision. let hasSideEffects = 0 in def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", + /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", "", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; @@ -760,26 +760,30 @@ def : FP16Pat<(f16_to_fp GPR:$a), (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; let hasSideEffects = 0 in -def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", +def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda, SPR:$Sm), + /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", "$Sd = $Sda", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; def : FP16Pat<(f16 (fpround SPR:$Sm)), - (COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>; + (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>; def : FP16Pat<(fp_to_f16 SPR:$a), - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; + (i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>; def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane), - (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTBSH SPR:$src2), + (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), + (VCVTBSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane), - (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTBSH SPR:$src2), + (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), + (VCVTBSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; let hasSideEffects = 0 in def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", + /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", "", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; @@ -792,22 +796,26 @@ def : FP16Pat<(f32 (fpextend (extractelt (v4f16 DPR:$src), imm_odd:$lane))), (SSubReg_f16_reg imm_odd:$lane)))>; let hasSideEffects = 0 in -def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", +def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sda, SPR:$Sm), + /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", "$Sd = $Sda", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane), - (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTTSH SPR:$src2), + (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), + (VCVTTSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane), - (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTTSH SPR:$src2), + (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), + (VCVTTSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs DPR:$Dd), (ins SPR:$Sm), - NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", + NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", "", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFPARMv8, HasDPVFP]>, Sched<[WriteFPCVT]> { @@ -829,8 +837,8 @@ def : FP16Pat<(f64 (f16_to_fp GPR:$a)), Requires<[HasFPARMv8, HasDPVFP]>; def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, - (outs SPR:$Sd), (ins DPR:$Dm), - NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", + (outs SPR:$Sd), (ins SPR:$Sda, DPR:$Dm), + NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", "$Sd = $Sda", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFPARMv8, HasDPVFP]> { // Instruction operands. @@ -847,15 +855,15 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, } def : FullFP16Pat<(f16 (fpround DPR:$Dm)), - (COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>, + (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>, Requires<[HasFPARMv8, HasDPVFP]>; def : FP16Pat<(fp_to_f16 (f64 DPR:$a)), - (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>, + (i32 (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$a), GPR))>, Requires<[HasFPARMv8, HasDPVFP]>; def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), - NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", + NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", "", []>, Requires<[HasFPARMv8, HasDPVFP]> { // Instruction operands. bits<5> Sm; @@ -868,8 +876,8 @@ def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, } def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, - (outs SPR:$Sd), (ins DPR:$Dm), - NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", + (outs SPR:$Sd), (ins SPR:$Sda, DPR:$Dm), + NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", "$Sd = $Sda", []>, Requires<[HasFPARMv8, HasDPVFP]> { // Instruction operands. bits<5> Sd; @@ -990,7 +998,7 @@ defm VCVTM : vcvt_inst<"m", 0b11, ffloor>; def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", + IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", "", [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>; def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, @@ -1019,7 +1027,7 @@ multiclass vrint_inst_zrx { def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", + NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", "", [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>, Requires<[HasFPARMv8]> { let Inst{7} = op2; @@ -1027,7 +1035,7 @@ multiclass vrint_inst_zrx { } def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), - NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", + NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", "", [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>, Requires<[HasFPARMv8, HasDPVFP]> { let Inst{7} = op2; @@ -1094,13 +1102,13 @@ defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>; def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", + IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", "", [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>, Sched<[WriteFPSQRT64]>; def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", + IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", "", [(set SPR:$Sd, (fsqrt SPR:$Sm))]>, Sched<[WriteFPSQRT32]>; @@ -1113,12 +1121,12 @@ let hasSideEffects = 0 in { let isMoveReg = 1 in { def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>, + IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", "", []>, Requires<[HasFPRegs64]>; def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), - IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>, + IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", "", []>, Requires<[HasFPRegs]>; } // isMoveReg @@ -1984,7 +1992,7 @@ def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1, class BF16_VCVT op7_6> : VFPAI<(outs SPR:$Sd), (ins SPR:$dst, SPR:$Sm), VFPUnaryFrm, NoItinerary, - opc, ".bf16.f32\t$Sd, $Sm", []>, + opc, ".bf16.f32\t$Sd, $Sm", "", []>, RegConstraint<"$dst = $Sd">, Requires<[HasBF16]>, Sched<[]> { @@ -2440,7 +2448,7 @@ def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p), class MovFromVFP opc19_16, dag oops, dag iops, string opc, string asm, list pattern>: - VFPAI { + VFPAI { // Instruction operand. bits<4> Rt; @@ -2525,7 +2533,7 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in { class MovToVFP opc19_16, dag oops, dag iops, string opc, string asm, list pattern>: - VFPAI { + VFPAI { // Instruction operand. bits<4> Rt; @@ -2598,7 +2606,7 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in { let isReMaterializable = 1 in { def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_fpUNA64, - "vmov", ".f64\t$Dd, $imm", + "vmov", ".f64\t$Dd, $imm", "", [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3,HasDPVFP]> { bits<5> Dd; @@ -2617,7 +2625,7 @@ def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm), def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), VFPMiscFrm, IIC_fpUNA32, - "vmov", ".f32\t$Sd, $imm", + "vmov", ".f32\t$Sd, $imm", "", [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { bits<5> Sd; bits<8> imm; @@ -2635,7 +2643,7 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm), VFPMiscFrm, IIC_fpUNA16, - "vmov", ".f16\t$Sd, $imm", + "vmov", ".f16\t$Sd, $imm", "", [(set (f16 HPR:$Sd), vfp_f16imm:$imm)]>, Requires<[HasFullFP16]> { bits<5> Sd; diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp index 188b5562cac9..1c44893581f9 100644 --- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -624,12 +624,12 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB, bool UseMovt = STI.useMovt(); - unsigned Size = TM.getPointerSize(0); + LLT PtrTy = MRI.getType(MIB->getOperand(0).getReg()); const Align Alignment(4); - auto addOpsForConstantPoolLoad = [&MF, Alignment, - Size](MachineInstrBuilder &MIB, - const GlobalValue *GV, bool IsSBREL) { + auto addOpsForConstantPoolLoad = [&MF, Alignment, PtrTy]( + MachineInstrBuilder &MIB, + const GlobalValue *GV, bool IsSBREL) { assert((MIB->getOpcode() == ARM::LDRi12 || MIB->getOpcode() == ARM::t2LDRpci) && "Unsupported instruction"); @@ -644,7 +644,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB, MIB.addConstantPoolIndex(CPIndex, /*Offset*/ 0, /*TargetFlags*/ 0) .addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, - Size, Alignment)); + PtrTy, Alignment)); if (MIB->getOpcode() == ARM::LDRi12) MIB.addImm(0); MIB.add(predOps(ARMCC::AL)); @@ -733,7 +733,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB, // Add the offset to the SB register. MIB->setDesc(TII.get(Opcodes.ADDrr)); - MIB->RemoveOperand(1); + MIB->removeOperand(1); MIB.addReg(ARM::R9) // FIXME: don't hardcode R9 .addReg(Offset) .add(predOps(ARMCC::AL)) @@ -748,7 +748,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB, } else { // Load the global's address from the constant pool. MIB->setDesc(TII.get(Opcodes.ConstPoolLoad)); - MIB->RemoveOperand(1); + MIB->removeOperand(1); addOpsForConstantPoolLoad(MIB, GV, /*IsSBREL*/ false); } } else if (STI.isTargetMachO()) { @@ -997,7 +997,7 @@ bool ARMInstructionSelector::select(MachineInstr &I) { auto CPIndex = ConstPool->getConstantPoolIndex(I.getOperand(1).getFPImm(), Alignment); MIB->setDesc(TII.get(LoadOpcode)); - MIB->RemoveOperand(1); + MIB->removeOperand(1); MIB.addConstantPoolIndex(CPIndex, /*Offset*/ 0, /*TargetFlags*/ 0) .addMemOperand( MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index de88ffab1c28..52b6b6f3bcf7 100644 --- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -14,6 +14,7 @@ #include "ARMCallLowering.h" #include "ARMSubtarget.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index ef5fc12feb54..0a38f5633ae3 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -33,6 +34,7 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -2108,7 +2110,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { return false; MF = &Fn; - STI = &static_cast(Fn.getSubtarget()); + STI = &Fn.getSubtarget(); TL = STI->getTargetLowering(); AFI = Fn.getInfo(); TII = STI->getInstrInfo(); @@ -2199,7 +2201,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { return false; TD = &Fn.getDataLayout(); - STI = &static_cast(Fn.getSubtarget()); + STI = &Fn.getSubtarget(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); MRI = &Fn.getRegInfo(); @@ -2894,10 +2896,12 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) { LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg " << Base.virtRegIndex() << "\n"); - // Make sure that Increment has no uses before BaseAccess. + // Make sure that Increment has no uses before BaseAccess that are not PHI + // uses. for (MachineInstr &Use : MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) { - if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) { + if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI && + !DT->dominates(BaseAccess, &Use))) { LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n"); return false; } diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index f822672c4477..aa739db44da2 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -59,8 +59,10 @@ #include "MVETailPredUtils.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineLoopUtils.h" @@ -1297,7 +1299,7 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) { } bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { - const ARMSubtarget &ST = static_cast(mf.getSubtarget()); + const ARMSubtarget &ST = mf.getSubtarget(); if (!ST.hasLOB()) return false; diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp index 308d5e7889f2..9596e88deb18 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -73,3 +73,10 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) std::tie(SignReturnAddress, SignReturnAddressAll) = GetSignReturnAddress(MF.getFunction()); } + +MachineFunctionInfo * +ARMFunctionInfo::clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap + &Src2DstMBB) const { + return DestMF.cloneInfo(*this); +} diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index d8d937055d23..e906fea1a810 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -86,6 +86,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills /// areas. unsigned FPCXTSaveSize = 0; + unsigned FRSaveSize = 0; unsigned GPRCS1Size = 0; unsigned GPRCS2Size = 0; unsigned DPRCSAlignGapSize = 0; @@ -158,6 +159,11 @@ public: explicit ARMFunctionInfo(MachineFunction &MF); + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; + bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } bool isThumb2Function() const { return isThumb && hasThumb2; } @@ -198,12 +204,14 @@ public: void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; } unsigned getFPCXTSaveAreaSize() const { return FPCXTSaveSize; } + unsigned getFrameRecordSavedAreaSize() const { return FRSaveSize; } unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; } unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; } unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; } unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; } void setFPCXTSaveAreaSize(unsigned s) { FPCXTSaveSize = s; } + void setFrameRecordSavedAreaSize(unsigned s) { FRSaveSize = s; } void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; } void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; } diff --git a/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/llvm/lib/Target/ARM/ARMParallelDSP.cpp index 46baf8930939..6effd84041b5 100644 --- a/llvm/lib/Target/ARM/ARMParallelDSP.cpp +++ b/llvm/lib/Target/ARM/ARMParallelDSP.cpp @@ -459,6 +459,10 @@ bool ARMParallelDSP::Search(Value *V, BasicBlock *BB, Reduction &R) { if (ValidLHS && ValidRHS) return true; + // Ensure we don't add the root as the incoming accumulator. + if (R.getRoot() == I) + return false; + return R.InsertAcc(I); } case Instruction::Mul: { @@ -535,6 +539,7 @@ bool ARMParallelDSP::MatchSMLAD(Function &F) { InsertParallelMACs(R); Changed = true; AllAdds.insert(R.getAdds().begin(), R.getAdds().end()); + LLVM_DEBUG(dbgs() << "BB after inserting parallel MACs:\n" << BB); } } diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp index 1a7f10a13ed3..527fefbd291e 100644 --- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -13,9 +13,9 @@ #include "ARMRegisterBankInfo.h" #include "ARMInstrInfo.h" // For the register classes #include "ARMSubtarget.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #define GET_TARGET_REGBANK_IMPL @@ -129,8 +129,7 @@ static void checkValueMappings() { } // end namespace arm } // end namespace llvm -ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) - : ARMGenRegisterBankInfo() { +ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) { // We have only one set of register banks, whatever the subtarget // is. Therefore, the initialization of the RegBanks table should be // done only once. Indeed the table of all register banks diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.h b/llvm/lib/Target/ARM/ARMRegisterBankInfo.h index b8aff65a967e..c56134aab38c 100644 --- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.h +++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.h @@ -13,7 +13,7 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMREGISTERBANKINFO_H #define LLVM_LIB_TARGET_ARM_ARMREGISTERBANKINFO_H -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #define GET_REGBANK_DECLARATIONS #include "ARMGenRegisterBank.inc" diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp index ff4647dd46fd..d1d30e614fc9 100644 --- a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp @@ -15,4 +15,4 @@ using namespace llvm; void ARMRegisterInfo::anchor() { } -ARMRegisterInfo::ARMRegisterInfo() {} +ARMRegisterInfo::ARMRegisterInfo() = default; diff --git a/llvm/lib/Target/ARM/ARMSLSHardening.cpp b/llvm/lib/Target/ARM/ARMSLSHardening.cpp index 332acb453124..fa80b75484e1 100644 --- a/llvm/lib/Target/ARM/ARMSLSHardening.cpp +++ b/llvm/lib/Target/ARM/ARMSLSHardening.cpp @@ -322,8 +322,8 @@ MachineBasicBlock &ARMSLSHardening::ConvertIndirectCallToIndirectJump( assert(ImpSPOpIdx != -1); int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx); int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx); - BL->RemoveOperand(FirstOpIdxToRemove); - BL->RemoveOperand(SecondOpIdxToRemove); + BL->removeOperand(FirstOpIdxToRemove); + BL->removeOperand(SecondOpIdxToRemove); // Now copy over the implicit operands from the original IndirectCall BL->copyImplicitOps(MF, IndirectCall); MF.moveCallSiteInfo(&IndirectCall, BL); diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 12d4ad889897..379521752261 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -296,7 +296,7 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove( SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVolatile, + SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const { const ARMSubtarget &Subtarget = @@ -314,6 +314,9 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset( DAG.getZExtOrTrunc(Size, dl, MVT::i32)); } - return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, - Alignment.value(), RTLIB::MEMSET); + if (!AlwaysInline) + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, + Alignment.value(), RTLIB::MEMSET); + + return SDValue(); } diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h index 7aa831c09248..ffa8b5049351 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -55,6 +55,7 @@ public: SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, Align Alignment, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override; SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 32160b109343..79244f634ce3 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -52,19 +53,15 @@ UseFusedMulOps("arm-use-mulops", enum ITMode { DefaultIT, - RestrictedIT, - NoRestrictedIT + RestrictedIT }; static cl::opt -IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), - cl::ZeroOrMore, - cl::values(clEnumValN(DefaultIT, "arm-default-it", - "Generate IT block based on arch"), - clEnumValN(RestrictedIT, "arm-restrict-it", - "Disallow deprecated IT based on ARMv8"), - clEnumValN(NoRestrictedIT, "arm-no-restrict-it", - "Allow IT blocks based on ARMv7"))); + IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), + cl::values(clEnumValN(DefaultIT, "arm-default-it", + "Generate any type of IT block"), + clEnumValN(RestrictedIT, "arm-restrict-it", + "Disallow complex IT blocks"))); /// ForceFastISel - Use the fast-isel, even for subtargets where it is not /// currently supported (for testing only). @@ -237,21 +234,18 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { switch (IT) { case DefaultIT: - RestrictIT = hasV8Ops() && !hasMinSize(); + RestrictIT = false; break; case RestrictedIT: RestrictIT = true; break; - case NoRestrictedIT: - RestrictIT = false; - break; } // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. const FeatureBitset &Bits = getFeatureBits(); if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters (Options.UnsafeFPMath || isTargetDarwin())) - UseNEONForSinglePrecisionFP = true; + HasNEONForFP = true; if (isRWPI()) ReserveR9 = true; @@ -399,6 +393,14 @@ bool ARMSubtarget::enableSubRegLiveness() const { return hasMVEIntegerOps(); } +bool ARMSubtarget::enableMachinePipeliner() const { + // Enable the MachinePipeliner before register allocation for subtargets + // with the use-mipipeliner feature. + return getSchedModel().hasInstrSchedModel() && useMachinePipeliner(); +} + +bool ARMSubtarget::useDFAforSMS() const { return false; } + // This overrides the PostRAScheduler bit in the SchedModel for any CPU. bool ARMSubtarget::enablePostRAScheduler() const { if (enableMachineScheduler()) @@ -417,8 +419,6 @@ bool ARMSubtarget::enablePostRAMachineScheduler() const { return !isThumb1Only(); } -bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); } - bool ARMSubtarget::useStride4VFPs() const { // For general targets, the prologue can grow when VFPs are allocated with // stride 4 (more vpush instructions). But WatchOS uses a compact unwind @@ -491,3 +491,12 @@ bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF, return isThumb2() && MF.getFunction().hasMinSize() && ARM::GPRRegClass.contains(PhysReg); } + +bool ARMSubtarget::splitFramePointerPush(const MachineFunction &MF) const { + const Function &F = MF.getFunction(); + if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || + !F.needsUnwindTableEntry()) + return false; + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF); +} diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 7cbdc014299f..460ec62d5a33 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -25,8 +25,8 @@ #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" @@ -150,6 +150,11 @@ public: }; protected: +// Bool members corresponding to the SubtargetFeatures defined in tablegen +#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ + bool ATTRIBUTE = DEFAULT; +#include "ARMGenSubtargetInfo.inc" + /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. ARMProcFamilyEnum ARMProcFamily = Others; @@ -159,343 +164,22 @@ protected: /// ARMArch - ARM architecture ARMArchEnum ARMArch = ARMv4t; - /// HasV4TOps, HasV5TOps, HasV5TEOps, - /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops - - /// Specify whether target support specific ARM ISA variants. - bool HasV4TOps = false; - bool HasV5TOps = false; - bool HasV5TEOps = false; - bool HasV6Ops = false; - bool HasV6MOps = false; - bool HasV6KOps = false; - bool HasV6T2Ops = false; - bool HasV7Ops = false; - bool HasV8Ops = false; - bool HasV8_1aOps = false; - bool HasV8_2aOps = false; - bool HasV8_3aOps = false; - bool HasV8_4aOps = false; - bool HasV8_5aOps = false; - bool HasV8_6aOps = false; - bool HasV8_8aOps = false; - bool HasV8_7aOps = false; - bool HasV9_0aOps = false; - bool HasV9_1aOps = false; - bool HasV9_2aOps = false; - bool HasV9_3aOps = false; - bool HasV8MBaselineOps = false; - bool HasV8MMainlineOps = false; - bool HasV8_1MMainlineOps = false; - bool HasMVEIntegerOps = false; - bool HasMVEFloatOps = false; - bool HasCDEOps = false; - - /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what - /// floating point ISAs are supported. - bool HasVFPv2 = false; - bool HasVFPv3 = false; - bool HasVFPv4 = false; - bool HasFPARMv8 = false; - bool HasNEON = false; - bool HasFPRegs = false; - bool HasFPRegs16 = false; - bool HasFPRegs64 = false; - - /// Versions of the VFP flags restricted to single precision, or to - /// 16 d-registers, or both. - bool HasVFPv2SP = false; - bool HasVFPv3SP = false; - bool HasVFPv4SP = false; - bool HasFPARMv8SP = false; - bool HasVFPv3D16 = false; - bool HasVFPv4D16 = false; - bool HasFPARMv8D16 = false; - bool HasVFPv3D16SP = false; - bool HasVFPv4D16SP = false; - bool HasFPARMv8D16SP = false; - - /// HasDotProd - True if the ARMv8.2A dot product instructions are supported. - bool HasDotProd = false; - - /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been - /// specified. Use the method useNEONForSinglePrecisionFP() to - /// determine if NEON should actually be used. - bool UseNEONForSinglePrecisionFP = false; - /// UseMulOps - True if non-microcoded fused integer multiply-add and /// multiply-subtract instructions should be used. bool UseMulOps = false; - /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates - /// whether the FP VML[AS] instructions are slow (if so, don't use them). - bool SlowFPVMLx = false; - - /// SlowFPVFMx - If the VFP4 / NEON instructions are available, indicates - /// whether the FP VFM[AS] instructions are slow (if so, don't use them). - bool SlowFPVFMx = false; - - /// HasVMLxForwarding - If true, NEON has special multiplier accumulator - /// forwarding to allow mul + mla being issued back to back. - bool HasVMLxForwarding = false; - - /// SlowFPBrcc - True if floating point compare + branch is slow. - bool SlowFPBrcc = false; - - /// InThumbMode - True if compiling for Thumb, false for ARM. - bool InThumbMode = false; - - /// UseSoftFloat - True if we're using software floating point features. - bool UseSoftFloat = false; - - /// UseMISched - True if MachineScheduler should be used for this subtarget. - bool UseMISched = false; - - /// DisablePostRAScheduler - False if scheduling should happen again after - /// register allocation. - bool DisablePostRAScheduler = false; - - /// HasThumb2 - True if Thumb2 instructions are supported. - bool HasThumb2 = false; - - /// NoARM - True if subtarget does not support ARM mode execution. - bool NoARM = false; - - /// ReserveR9 - True if R9 is not available as a general purpose register. - bool ReserveR9 = false; - - /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of - /// 32-bit imms (including global addresses). - bool NoMovt = false; - /// SupportsTailCall - True if the OS supports tail call. The dynamic linker /// must be able to synthesize call stubs for interworking between ARM and /// Thumb. bool SupportsTailCall = false; - /// HasFP16 - True if subtarget supports half-precision FP conversions - bool HasFP16 = false; - - /// HasFullFP16 - True if subtarget supports half-precision FP operations - bool HasFullFP16 = false; - - /// HasFP16FML - True if subtarget supports half-precision FP fml operations - bool HasFP16FML = false; - - /// HasBF16 - True if subtarget supports BFloat16 floating point operations - bool HasBF16 = false; - - /// HasMatMulInt8 - True if subtarget supports 8-bit integer matrix multiply - bool HasMatMulInt8 = false; - - /// HasD32 - True if subtarget has the full 32 double precision - /// FP registers for VFPv3. - bool HasD32 = false; - - /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode - bool HasHardwareDivideInThumb = false; - - /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode - bool HasHardwareDivideInARM = false; - - /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier - /// instructions. - bool HasDataBarrier = false; - - /// HasFullDataBarrier - True if the subtarget supports DFB data barrier - /// instruction. - bool HasFullDataBarrier = false; - - /// HasV7Clrex - True if the subtarget supports CLREX instructions - bool HasV7Clrex = false; - - /// HasAcquireRelease - True if the subtarget supports v8 atomics (LDA/LDAEX etc) - /// instructions - bool HasAcquireRelease = false; - - /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions - /// over 16-bit ones. - bool Pref32BitThumb = false; - - /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions - /// that partially update CPSR and add false dependency on the previous - /// CPSR setting instruction. - bool AvoidCPSRPartialUpdate = false; - - /// CheapPredicableCPSRDef - If true, disable +1 predication cost - /// for instructions updating CPSR. Enabled for Cortex-A57. - bool CheapPredicableCPSRDef = false; - - /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting - /// movs with shifter operand (i.e. asr, lsl, lsr). - bool AvoidMOVsShifterOperand = false; - - /// HasRetAddrStack - Some processors perform return stack prediction. CodeGen should - /// avoid issue "normal" call instructions to callees which do not return. - bool HasRetAddrStack = false; - - /// HasBranchPredictor - True if the subtarget has a branch predictor. Having - /// a branch predictor or not changes the expected cost of taking a branch - /// which affects the choice of whether to use predicated instructions. - bool HasBranchPredictor = true; - - /// HasMPExtension - True if the subtarget supports Multiprocessing - /// extension (ARMv7 only). - bool HasMPExtension = false; - - /// HasVirtualization - True if the subtarget supports the Virtualization - /// extension. - bool HasVirtualization = false; - - /// HasFP64 - If true, the floating point unit supports double - /// precision. - bool HasFP64 = false; - - /// If true, the processor supports the Performance Monitor Extensions. These - /// include a generic cycle-counter as well as more fine-grained (often - /// implementation-specific) events. - bool HasPerfMon = false; - - /// HasTrustZone - if true, processor supports TrustZone security extensions - bool HasTrustZone = false; - - /// Has8MSecExt - if true, processor supports ARMv8-M Security Extensions - bool Has8MSecExt = false; - - /// HasSHA2 - if true, processor supports SHA1 and SHA256 - bool HasSHA2 = false; - - /// HasAES - if true, processor supports AES - bool HasAES = false; - - /// HasCrypto - if true, processor supports Cryptography extensions - bool HasCrypto = false; - - /// HasCRC - if true, processor supports CRC instructions - bool HasCRC = false; - - /// HasRAS - if true, the processor supports RAS extensions - bool HasRAS = false; - - /// HasLOB - if true, the processor supports the Low Overhead Branch extension - bool HasLOB = false; - - bool HasPACBTI = false; - - /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are - /// particularly effective at zeroing a VFP register. - bool HasZeroCycleZeroing = false; - - /// HasFPAO - if true, processor does positive address offset computation faster - bool HasFPAO = false; - - /// HasFuseAES - if true, processor executes back to back AES instruction - /// pairs faster. - bool HasFuseAES = false; - - /// HasFuseLiterals - if true, processor executes back to back - /// bottom and top halves of literal generation faster. - bool HasFuseLiterals = false; - - /// If true, if conversion may decide to leave some instructions unpredicated. - bool IsProfitableToUnpredicate = false; - - /// If true, VMOV will be favored over VGETLNi32. - bool HasSlowVGETLNi32 = false; - - /// If true, VMOV will be favored over VDUP. - bool HasSlowVDUP32 = false; - - /// If true, VMOVSR will be favored over VMOVDRR. - bool PreferVMOVSR = false; - - /// If true, ISHST barriers will be used for Release semantics. - bool PreferISHST = false; - - /// If true, a VLDM/VSTM starting with an odd register number is considered to - /// take more microops than single VLDRS/VSTRS. - bool SlowOddRegister = false; - - /// If true, loading into a D subregister will be penalized. - bool SlowLoadDSubregister = false; - - /// If true, use a wider stride when allocating VFP registers. - bool UseWideStrideVFP = false; - - /// If true, the AGU and NEON/FPU units are multiplexed. - bool HasMuxedUnits = false; - - /// If true, VMOVS will never be widened to VMOVD. - bool DontWidenVMOVS = false; - - /// If true, splat a register between VFP and NEON instructions. - bool SplatVFPToNeon = false; - - /// If true, run the MLx expansion pass. - bool ExpandMLx = false; - - /// If true, VFP/NEON VMLA/VMLS have special RAW hazards. - bool HasVMLxHazards = false; - - // If true, read thread pointer from coprocessor register. - bool ReadTPHard = false; - - /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. - bool UseNEONForFPMovs = false; - - /// If true, VLDn instructions take an extra cycle for unaligned accesses. - bool CheckVLDnAlign = false; - - /// If true, VFP instructions are not pipelined. - bool NonpipelinedVFP = false; - - /// StrictAlign - If true, the subtarget disallows unaligned memory - /// accesses for some types. For details, see - /// ARMTargetLowering::allowsMisalignedMemoryAccesses(). - bool StrictAlign = false; - - /// RestrictIT - If true, the subtarget disallows generation of deprecated IT - /// blocks to conform to ARMv8 rule. + /// RestrictIT - If true, the subtarget disallows generation of complex IT + /// blocks. bool RestrictIT = false; - /// HasDSP - If true, the subtarget supports the DSP (saturating arith - /// and such) instructions. - bool HasDSP = false; - - /// NaCl TRAP instruction is generated instead of the regular TRAP. - bool UseNaClTrap = false; - - /// Generate calls via indirect call instructions. - bool GenLongCalls = false; - - /// Generate code that does not contain data access to code sections. - bool GenExecuteOnly = false; - - /// Target machine allowed unsafe FP math (such as use of NEON fp) - bool UnsafeFPMath = false; - /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS). bool UseSjLjEH = false; - /// Has speculation barrier - bool HasSB = false; - - /// Implicitly convert an instruction to a different one if its immediates - /// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1. - bool NegativeImmediates = true; - - /// Mitigate against the cve-2021-35465 security vulnurability. - bool FixCMSE_CVE_2021_35465 = false; - - /// Harden against Straight Line Speculation for Returns and Indirect - /// Branches. - bool HardenSlsRetBr = false; - - /// Harden against Straight Line Speculation for indirect calls. - bool HardenSlsBlr = false; - - /// Generate thunk code for SLS mitigation in the normal text section. - bool HardenSlsNoComdat = false; - /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. Align stackAlignment = Align(4); @@ -540,10 +224,6 @@ protected: /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; - /// NoBTIAtReturnTwice - Don't place a BTI instruction after - /// return-twice constructs (setjmp) - bool NoBTIAtReturnTwice = false; - /// Options passed via command line that could influence the target const TargetOptions &Options; @@ -622,38 +302,12 @@ private: std::bitset<8> CoprocCDE = {}; public: - void computeIssueWidth(); +// Getters for SubtargetFeatures defined in tablegen +#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ + bool GETTER() const { return ATTRIBUTE; } +#include "ARMGenSubtargetInfo.inc" - bool hasV4TOps() const { return HasV4TOps; } - bool hasV5TOps() const { return HasV5TOps; } - bool hasV5TEOps() const { return HasV5TEOps; } - bool hasV6Ops() const { return HasV6Ops; } - bool hasV6MOps() const { return HasV6MOps; } - bool hasV6KOps() const { return HasV6KOps; } - bool hasV6T2Ops() const { return HasV6T2Ops; } - bool hasV7Ops() const { return HasV7Ops; } - bool hasV8Ops() const { return HasV8Ops; } - bool hasV8_1aOps() const { return HasV8_1aOps; } - bool hasV8_2aOps() const { return HasV8_2aOps; } - bool hasV8_3aOps() const { return HasV8_3aOps; } - bool hasV8_4aOps() const { return HasV8_4aOps; } - bool hasV8_5aOps() const { return HasV8_5aOps; } - bool hasV8_6aOps() const { return HasV8_6aOps; } - bool hasV8_7aOps() const { return HasV8_7aOps; } - bool hasV8_8aOps() const { return HasV8_8aOps; } - bool hasV9_0aOps() const { return HasV9_0aOps; } - bool hasV9_1aOps() const { return HasV9_1aOps; } - bool hasV9_2aOps() const { return HasV9_2aOps; } - bool hasV9_3aOps() const { return HasV9_3aOps; } - bool hasV8MBaselineOps() const { return HasV8MBaselineOps; } - bool hasV8MMainlineOps() const { return HasV8MMainlineOps; } - bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; } - bool hasMVEIntegerOps() const { return HasMVEIntegerOps; } - bool hasMVEFloatOps() const { return HasMVEFloatOps; } - bool hasCDEOps() const { return HasCDEOps; } - bool hasFPRegs() const { return HasFPRegs; } - bool hasFPRegs16() const { return HasFPRegs16; } - bool hasFPRegs64() const { return HasFPRegs64; } + void computeIssueWidth(); /// @{ /// These functions are obsolete, please consider adding subtarget features @@ -673,31 +327,14 @@ public: bool hasARMOps() const { return !NoARM; } - bool hasVFP2Base() const { return HasVFPv2SP; } - bool hasVFP3Base() const { return HasVFPv3D16SP; } - bool hasVFP4Base() const { return HasVFPv4D16SP; } - bool hasFPARMv8Base() const { return HasFPARMv8D16SP; } - bool hasNEON() const { return HasNEON; } - bool hasSHA2() const { return HasSHA2; } - bool hasAES() const { return HasAES; } - bool hasCrypto() const { return HasCrypto; } - bool hasDotProd() const { return HasDotProd; } - bool hasCRC() const { return HasCRC; } - bool hasRAS() const { return HasRAS; } - bool hasLOB() const { return HasLOB; } - bool hasPACBTI() const { return HasPACBTI; } - bool hasVirtualization() const { return HasVirtualization; } - bool useNEONForSinglePrecisionFP() const { - return hasNEON() && UseNEONForSinglePrecisionFP; + return hasNEON() && hasNEONForFP(); } - bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; } - bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } - bool hasDataBarrier() const { return HasDataBarrier; } - bool hasFullDataBarrier() const { return HasFullDataBarrier; } - bool hasV7Clrex() const { return HasV7Clrex; } - bool hasAcquireRelease() const { return HasAcquireRelease; } + bool hasVFP2Base() const { return hasVFPv2SP(); } + bool hasVFP3Base() const { return hasVFPv3D16SP(); } + bool hasVFP4Base() const { return hasVFPv4D16SP(); } + bool hasFPARMv8Base() const { return hasFPARMv8D16SP(); } bool hasAnyDataBarrier() const { return HasDataBarrier || (hasV6Ops() && !isThumb()); @@ -710,43 +347,7 @@ public: } bool useFPVFMx16() const { return useFPVFMx() && hasFullFP16(); } bool useFPVFMx64() const { return useFPVFMx() && hasFP64(); } - bool hasVMLxForwarding() const { return HasVMLxForwarding; } - bool isFPBrccSlow() const { return SlowFPBrcc; } - bool hasFP64() const { return HasFP64; } - bool hasPerfMon() const { return HasPerfMon; } - bool hasTrustZone() const { return HasTrustZone; } - bool has8MSecExt() const { return Has8MSecExt; } - bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } - bool hasFPAO() const { return HasFPAO; } - bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; } - bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; } - bool hasSlowVDUP32() const { return HasSlowVDUP32; } - bool preferVMOVSR() const { return PreferVMOVSR; } - bool preferISHSTBarriers() const { return PreferISHST; } - bool expandMLx() const { return ExpandMLx; } - bool hasVMLxHazards() const { return HasVMLxHazards; } - bool hasSlowOddRegister() const { return SlowOddRegister; } - bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; } - bool useWideStrideVFP() const { return UseWideStrideVFP; } - bool hasMuxedUnits() const { return HasMuxedUnits; } - bool dontWidenVMOVS() const { return DontWidenVMOVS; } - bool useSplatVFPToNeon() const { return SplatVFPToNeon; } - bool useNEONForFPMovs() const { return UseNEONForFPMovs; } - bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; } - bool nonpipelinedVFP() const { return NonpipelinedVFP; } - bool prefers32BitThumb() const { return Pref32BitThumb; } - bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } - bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; } - bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } - bool hasRetAddrStack() const { return HasRetAddrStack; } - bool hasBranchPredictor() const { return HasBranchPredictor; } - bool hasMPExtension() const { return HasMPExtension; } - bool hasDSP() const { return HasDSP; } - bool useNaClTrap() const { return UseNaClTrap; } bool useSjLjEH() const { return UseSjLjEH; } - bool hasSB() const { return HasSB; } - bool genLongCalls() const { return GenLongCalls; } - bool genExecuteOnly() const { return GenExecuteOnly; } bool hasBaseDSP() const { if (isThumb()) return hasDSP(); @@ -754,25 +355,16 @@ public: return hasV5TEOps(); } - bool hasFP16() const { return HasFP16; } - bool hasD32() const { return HasD32; } - bool hasFullFP16() const { return HasFullFP16; } - bool hasFP16FML() const { return HasFP16FML; } - bool hasBF16() const { return HasBF16; } - - bool hasFuseAES() const { return HasFuseAES; } - bool hasFuseLiterals() const { return HasFuseLiterals; } /// Return true if the CPU supports any kind of instruction fusion. bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); } - bool hasMatMulInt8() const { return HasMatMulInt8; } - const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetIOS() const { return TargetTriple.isiOS(); } bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); } bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); } + bool isTargetDriverKit() const { return TargetTriple.isDriverKit(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); } @@ -825,24 +417,21 @@ public: bool isRWPI() const; bool useMachineScheduler() const { return UseMISched; } - bool disablePostRAScheduler() const { return DisablePostRAScheduler; } - bool useSoftFloat() const { return UseSoftFloat; } - bool isThumb() const { return InThumbMode; } + bool useMachinePipeliner() const { return UseMIPipeliner; } bool hasMinSize() const { return OptMinSize; } - bool isThumb1Only() const { return InThumbMode && !HasThumb2; } - bool isThumb2() const { return InThumbMode && HasThumb2; } - bool hasThumb2() const { return HasThumb2; } + bool isThumb1Only() const { return isThumb() && !hasThumb2(); } + bool isThumb2() const { return isThumb() && hasThumb2(); } bool isMClass() const { return ARMProcClass == MClass; } bool isRClass() const { return ARMProcClass == RClass; } bool isAClass() const { return ARMProcClass == AClass; } - bool isReadTPHard() const { return ReadTPHard; } bool isR9Reserved() const { return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; } MCPhysReg getFramePointerReg() const { - if (isTargetDarwin() || (!isTargetWindows() && isThumb())) + if (isTargetDarwin() || + (!isTargetWindows() && isThumb() && !createAAPCSFrameChain())) return ARM::R7; return ARM::R11; } @@ -859,6 +448,8 @@ public: isThumb1Only(); } + bool splitFramePointerPush(const MachineFunction &MF) const; + bool useStride4VFPs() const; bool useMovt() const; @@ -878,6 +469,10 @@ public: /// Returns true if machine scheduler should be enabled. bool enableMachineScheduler() const override; + /// Returns true if machine pipeliner should be enabled. + bool enableMachinePipeliner() const override; + bool useDFAforSMS() const override; + /// True for some subtargets at > -O0. bool enablePostRAScheduler() const override; @@ -891,9 +486,6 @@ public: /// scheduling, DAGCombine, etc.). bool useAA() const override { return true; } - // enableAtomicExpand- True if we need to expand our atomics. - bool enableAtomicExpand() const override; - /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. const InstrItineraryData *getInstrItineraryData() const override { @@ -956,14 +548,6 @@ public: bool ignoreCSRForAllocationOrder(const MachineFunction &MF, unsigned PhysReg) const override; unsigned getGPRAllocationOrder(const MachineFunction &MF) const; - - bool fixCMSE_CVE_2021_35465() const { return FixCMSE_CVE_2021_35465; } - - bool hardenSlsRetBr() const { return HardenSlsRetBr; } - bool hardenSlsBlr() const { return HardenSlsBlr; } - bool hardenSlsNoComdat() const { return HardenSlsNoComdat; } - - bool getNoBTIAtReturnTwice() const { return NoBTIAtReturnTwice; } }; } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index c38970f8e341..d95c21d6504b 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/ExecutionDomainFix.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" @@ -30,20 +31,20 @@ #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" +#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/TargetParser.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" @@ -106,6 +107,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() { initializeMVEGatherScatterLoweringPass(Registry); initializeARMSLSHardeningPass(Registry); initializeMVELaneInterleavingPass(Registry); + initializeARMFixCortexA57AES1742098Pass(Registry); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -194,7 +196,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU, static Reloc::Model getEffectiveRelocModel(const Triple &TT, Optional RM) { - if (!RM.hasValue()) + if (!RM) // Default relocation model on Darwin is PIC. return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static; @@ -307,7 +309,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { } TargetTransformInfo -ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) { +ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(ARMTTIImpl(this, F)); } @@ -434,6 +436,9 @@ void ARMPassConfig::addIRPasses() { // Add Control Flow Guard checks. if (TM->getTargetTriple().isOSWindows()) addPass(createCFGuardCheckPass()); + + if (TM->Options.JMCInstrument) + addPass(createJMCInstrumenterPass()); } void ARMPassConfig::addCodeGenPrepare() { @@ -505,6 +510,9 @@ bool ARMPassConfig::addGlobalInstructionSelect() { void ARMPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { + if (getOptLevel() == CodeGenOpt::Aggressive) + addPass(&MachinePipelinerID); + addPass(createMVETPAndVPTOptimisationsPass()); addPass(createMLxExpansionPass()); @@ -573,8 +581,20 @@ void ARMPassConfig::addPreEmitPass() { } void ARMPassConfig::addPreEmitPass2() { + // Inserts fixup instructions before unsafe AES operations. Instructions may + // be inserted at the start of blocks and at within blocks so this pass has to + // come before those below. + addPass(createARMFixCortexA57AES1742098Pass()); + // Inserts BTIs at the start of functions and indirectly-called basic blocks, + // so passes cannot add to the start of basic blocks once this has run. addPass(createARMBranchTargetsPass()); + // Inserts Constant Islands. Block sizes cannot be increased after this point, + // as this may push the branch ranges and load offsets of accessing constant + // pools out of range.. addPass(createARMConstantIslandPass()); + // Finalises Low-Overhead Loops. This replaces pseudo instructions with real + // instructions, but the pseudos all have conservative sizes so that block + // sizes will only be decreased by this pass. addPass(createARMLowOverheadLoopsPass()); if (TM->getTargetTriple().isOSWindows()) { diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.h b/llvm/lib/Target/ARM/ARMTargetMachine.h index 8428092bf179..8d33a038deeb 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -52,7 +52,7 @@ public: const ARMSubtarget *getSubtargetImpl() const = delete; bool isLittleEndian() const { return isLittle; } - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index d9d563ead260..3a9946ee810b 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1202,7 +1202,8 @@ InstructionCost ARMTTIImpl::getMemcpyCost(const Instruction *I) { InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, - int Index, VectorType *SubTp) { + int Index, VectorType *SubTp, + ArrayRef Args) { Kind = improveShuffleKindFromMask(Kind, Mask); if (ST->hasNEON()) { if (Kind == TTI::SK_Broadcast) { @@ -1290,7 +1291,8 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, if (!Mask.empty()) { std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - if (Mask.size() <= LT.second.getVectorNumElements() && + if (LT.second.isVector() && + Mask.size() <= LT.second.getVectorNumElements() && (isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) || isVREVMask(Mask, LT.second, 64))) return ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput) * LT.first; @@ -1764,6 +1766,48 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return LT.first * ST->getMVEVectorCostFactor(CostKind); break; } + case Intrinsic::fptosi_sat: + case Intrinsic::fptoui_sat: { + if (ICA.getArgTypes().empty()) + break; + bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat; + auto LT = TLI->getTypeLegalizationCost(DL, ICA.getArgTypes()[0]); + EVT MTy = TLI->getValueType(DL, ICA.getReturnType()); + // Check for the legal types, with the corect subtarget features. + if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) || + (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) || + (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32)) + return LT.first; + + // Equally for MVE vector types + if (ST->hasMVEFloatOps() && + (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) && + LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits()) + return LT.first * ST->getMVEVectorCostFactor(CostKind); + + // Otherwise we use a legal convert followed by a min+max + if (((ST->hasVFP2Base() && LT.second == MVT::f32) || + (ST->hasFP64() && LT.second == MVT::f64) || + (ST->hasFullFP16() && LT.second == MVT::f16) || + (ST->hasMVEFloatOps() && + (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) && + LT.second.getScalarSizeInBits() >= MTy.getScalarSizeInBits()) { + Type *LegalTy = Type::getIntNTy(ICA.getReturnType()->getContext(), + LT.second.getScalarSizeInBits()); + InstructionCost Cost = + LT.second.isVector() ? ST->getMVEVectorCostFactor(CostKind) : 1; + IntrinsicCostAttributes Attrs1(IsSigned ? Intrinsic::smin + : Intrinsic::umin, + LegalTy, {LegalTy, LegalTy}); + Cost += getIntrinsicInstrCost(Attrs1, CostKind); + IntrinsicCostAttributes Attrs2(IsSigned ? Intrinsic::smax + : Intrinsic::umax, + LegalTy, {LegalTy, LegalTy}); + Cost += getIntrinsicInstrCost(Attrs2, CostKind); + return LT.first * Cost; + } + break; + } } return BaseT::getIntrinsicInstrCost(ICA, CostKind); @@ -1771,7 +1815,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, bool ARMTTIImpl::isLoweredToCall(const Function *F) { if (!F->isIntrinsic()) - BaseT::isLoweredToCall(F); + return BaseT::isLoweredToCall(F); // Assume all Arm-specific intrinsics map to an instruction. if (F->getName().startswith("llvm.arm")) diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 5bb84899e5ef..d7a2bdb3db15 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -213,7 +213,8 @@ public: InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, - VectorType *SubTp); + VectorType *SubTp, + ArrayRef Args = None); bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index c7734cc2cf11..b725ea3a84e5 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -453,6 +453,7 @@ class ARMAsmParser : public MCTargetAsmParser { bool AllowRAAC = false); bool parseMemory(OperandVector &); bool parseOperand(OperandVector &, StringRef Mnemonic); + bool parseImmExpr(int64_t &Out); bool parsePrefix(ARMMCExpr::VariantKind &RefKind); bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType, unsigned &ShiftAmount); @@ -488,6 +489,17 @@ class ARMAsmParser : public MCTargetAsmParser { bool parseDirectiveAlign(SMLoc L); bool parseDirectiveThumbSet(SMLoc L); + bool parseDirectiveSEHAllocStack(SMLoc L, bool Wide); + bool parseDirectiveSEHSaveRegs(SMLoc L, bool Wide); + bool parseDirectiveSEHSaveSP(SMLoc L); + bool parseDirectiveSEHSaveFRegs(SMLoc L); + bool parseDirectiveSEHSaveLR(SMLoc L); + bool parseDirectiveSEHPrologEnd(SMLoc L, bool Fragment); + bool parseDirectiveSEHNop(SMLoc L, bool Wide); + bool parseDirectiveSEHEpilogStart(SMLoc L, bool Condition); + bool parseDirectiveSEHEpilogEnd(SMLoc L); + bool parseDirectiveSEHCustom(SMLoc L); + bool isMnemonicVPTPredicable(StringRef Mnemonic, StringRef ExtraToken); StringRef splitMnemonic(StringRef Mnemonic, StringRef ExtraToken, unsigned &PredicationCode, @@ -4528,9 +4540,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder, if (Reg == EndReg) continue; // The register must be in the same register class as the first. - if ((Reg == ARM::RA_AUTH_CODE && - RC != &ARMMCRegisterClasses[ARM::GPRRegClassID]) || - (Reg != ARM::RA_AUTH_CODE && !RC->contains(Reg))) + if (!RC->contains(Reg)) return Error(AfterMinusLoc, "invalid register in register list"); // Ranges must go from low to high. if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg)) @@ -6319,6 +6329,18 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { } } +bool ARMAsmParser::parseImmExpr(int64_t &Out) { + const MCExpr *Expr = nullptr; + SMLoc L = getParser().getTok().getLoc(); + if (check(getParser().parseExpression(Expr), L, "expected expression")) + return true; + const MCConstantExpr *Value = dyn_cast_or_null(Expr); + if (check(!Value, L, "expected constant expression")) + return true; + Out = Value->getValue(); + return false; +} + // parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e. // :lower16: and :upper16:. bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { @@ -6379,7 +6401,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { CurrentFormat = WASM; break; case MCContext::IsGOFF: + case MCContext::IsSPIRV: case MCContext::IsXCOFF: + case MCContext::IsDXContainer: llvm_unreachable("unexpected object format"); break; } @@ -10958,9 +10982,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return true; } - { // processInstruction() updates inITBlock state, we need to save it away - bool wasInITBlock = inITBlock(); - + { // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the // individual transformations can chain off each other. E.g., @@ -10969,12 +10991,6 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, LLVM_DEBUG(dbgs() << "Changed to: "; Inst.dump_pretty(dbgs(), MII.getName(Inst.getOpcode())); dbgs() << "\n"); - - // Only after the instruction is fully processed, we can validate it - if (wasInITBlock && hasV8Ops() && isThumb() && - !isV8EligibleForIT(&Inst) && !getTargetOptions().MCNoDeprecatedWarn) { - Warning(IDLoc, "deprecated instruction in IT block"); - } } // Only move forward at the very end so that everything in validate @@ -11090,6 +11106,39 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveTLSDescSeq(DirectiveID.getLoc()); else return true; + } else if (IsCOFF) { + if (IDVal == ".seh_stackalloc") + parseDirectiveSEHAllocStack(DirectiveID.getLoc(), /*Wide=*/false); + else if (IDVal == ".seh_stackalloc_w") + parseDirectiveSEHAllocStack(DirectiveID.getLoc(), /*Wide=*/true); + else if (IDVal == ".seh_save_regs") + parseDirectiveSEHSaveRegs(DirectiveID.getLoc(), /*Wide=*/false); + else if (IDVal == ".seh_save_regs_w") + parseDirectiveSEHSaveRegs(DirectiveID.getLoc(), /*Wide=*/true); + else if (IDVal == ".seh_save_sp") + parseDirectiveSEHSaveSP(DirectiveID.getLoc()); + else if (IDVal == ".seh_save_fregs") + parseDirectiveSEHSaveFRegs(DirectiveID.getLoc()); + else if (IDVal == ".seh_save_lr") + parseDirectiveSEHSaveLR(DirectiveID.getLoc()); + else if (IDVal == ".seh_endprologue") + parseDirectiveSEHPrologEnd(DirectiveID.getLoc(), /*Fragment=*/false); + else if (IDVal == ".seh_endprologue_fragment") + parseDirectiveSEHPrologEnd(DirectiveID.getLoc(), /*Fragment=*/true); + else if (IDVal == ".seh_nop") + parseDirectiveSEHNop(DirectiveID.getLoc(), /*Wide=*/false); + else if (IDVal == ".seh_nop_w") + parseDirectiveSEHNop(DirectiveID.getLoc(), /*Wide=*/true); + else if (IDVal == ".seh_startepilogue") + parseDirectiveSEHEpilogStart(DirectiveID.getLoc(), /*Condition=*/false); + else if (IDVal == ".seh_startepilogue_cond") + parseDirectiveSEHEpilogStart(DirectiveID.getLoc(), /*Condition=*/true); + else if (IDVal == ".seh_endepilogue") + parseDirectiveSEHEpilogEnd(DirectiveID.getLoc()); + else if (IDVal == ".seh_custom") + parseDirectiveSEHCustom(DirectiveID.getLoc()); + else + return true; } else return true; return false; @@ -11113,8 +11162,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) { /// parseDirectiveThumb /// ::= .thumb bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive") || - check(!hasThumb(), L, "target does not support Thumb mode")) + if (parseEOL() || check(!hasThumb(), L, "target does not support Thumb mode")) return true; if (!isThumb()) @@ -11127,8 +11175,7 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { /// parseDirectiveARM /// ::= .arm bool ARMAsmParser::parseDirectiveARM(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive") || - check(!hasARM(), L, "target does not support ARM mode")) + if (parseEOL() || check(!hasARM(), L, "target does not support ARM mode")) return true; if (isThumb()) @@ -11167,15 +11214,13 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { Parser.getTok().getIdentifier()); getParser().getStreamer().emitThumbFunc(Func); Parser.Lex(); - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.thumb_func' directive")) + if (parseEOL()) return true; return false; } } - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.thumb_func' directive")) + if (parseEOL()) return true; // .thumb_func implies .thumb @@ -11204,7 +11249,7 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) { "'.syntax divided' arm assembly not supported") || check(Mode != "unified" && Mode != "UNIFIED", L, "unrecognized syntax mode in .syntax directive") || - parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + parseEOL()) return true; // TODO tell the MC streamer the mode @@ -11226,7 +11271,7 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) { } Parser.Lex(); - if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + if (parseEOL()) return true; if (Val == 16) { @@ -11257,8 +11302,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { SMLoc SRegLoc, ERegLoc; if (check(ParseRegister(Reg, SRegLoc, ERegLoc), SRegLoc, "register name expected") || - parseToken(AsmToken::EndOfStatement, - "unexpected input in .req directive.")) + parseEOL()) return true; if (RegisterReqs.insert(std::make_pair(Name, Reg)).first->second != Reg) @@ -11276,10 +11320,7 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { return Error(L, "unexpected input in .unreq directive."); RegisterReqs.erase(Parser.getTok().getIdentifier().lower()); Parser.Lex(); // Eat the identifier. - if (parseToken(AsmToken::EndOfStatement, - "unexpected input in '.unreq' directive")) - return true; - return false; + return parseEOL(); } // After changing arch/CPU, try to put the ARM/Thumb mode back to what it was @@ -11340,11 +11381,11 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { StringRef Name = Parser.getTok().getIdentifier(); Optional Ret = ELFAttrs::attrTypeFromString( Name, ARMBuildAttrs::getARMAttributeTags()); - if (!Ret.hasValue()) { + if (!Ret) { Error(TagLoc, "attribute name not recognised: " + Name); return false; } - Tag = Ret.getValue(); + Tag = *Ret; Parser.Lex(); } else { const MCExpr *AttrExpr; @@ -11406,8 +11447,7 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { Parser.Lex(); } - if (Parser.parseToken(AsmToken::EndOfStatement, - "unexpected token in '.eabi_attribute' directive")) + if (Parser.parseEOL()) return true; if (IsIntegerValue && IsStringValue) { @@ -11463,8 +11503,7 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { /// parseDirectiveFnStart /// ::= .fnstart bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.fnstart' directive")) + if (parseEOL()) return true; if (UC.hasFnStart()) { @@ -11485,8 +11524,7 @@ bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) { /// parseDirectiveFnEnd /// ::= .fnend bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.fnend' directive")) + if (parseEOL()) return true; // Check the ordering of unwind directives if (!UC.hasFnStart()) @@ -11502,8 +11540,7 @@ bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) { /// parseDirectiveCantUnwind /// ::= .cantunwind bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.cantunwind' directive")) + if (parseEOL()) return true; UC.recordCantUnwind(L); @@ -11538,8 +11575,7 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) { StringRef Name(Parser.getTok().getIdentifier()); Parser.Lex(); - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.personality' directive")) + if (parseEOL()) return true; UC.recordPersonality(L); @@ -11571,8 +11607,7 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) { /// parseDirectiveHandlerData /// ::= .handlerdata bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.handlerdata' directive")) + if (parseEOL()) return true; UC.recordHandlerData(L); @@ -11670,8 +11705,7 @@ bool ARMAsmParser::parseDirectivePad(SMLoc L) { if (!CE) return Error(ExLoc, "pad offset must be an immediate"); - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.pad' directive")) + if (parseEOL()) return true; getTargetStreamer().emitPad(CE->getValue()); @@ -11692,8 +11726,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { SmallVector, 1> Operands; // Parse the register list - if (parseRegisterList(Operands, true, true) || - parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + if (parseRegisterList(Operands, true, true) || parseEOL()) return true; ARMOperand &Op = (ARMOperand &)*Operands[0]; if (!IsVector && !Op.isRegList()) @@ -11776,7 +11809,7 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) { /// parseDirectiveLtorg /// ::= .ltorg | .pool bool ARMAsmParser::parseDirectiveLtorg(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + if (parseEOL()) return true; getTargetStreamer().emitCurrentConstantPool(); return false; @@ -11785,7 +11818,7 @@ bool ARMAsmParser::parseDirectiveLtorg(SMLoc L) { bool ARMAsmParser::parseDirectiveEven(SMLoc L) { const MCSection *Section = getStreamer().getCurrentSectionOnly(); - if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + if (parseEOL()) return true; if (!Section) { @@ -11794,7 +11827,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) { } assert(Section && "must have section to emit alignment"); - if (Section->UseCodeAlign()) + if (Section->useCodeAlign()) getStreamer().emitCodeAlignment(2, &getSTI()); else getStreamer().emitValueToAlignment(2); @@ -11810,9 +11843,7 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) { const MCExpr *IndexExpression; SMLoc IndexLoc = Parser.getTok().getLoc(); - if (Parser.parseExpression(IndexExpression) || - parseToken(AsmToken::EndOfStatement, - "unexpected token in '.personalityindex' directive")) { + if (Parser.parseExpression(IndexExpression) || parseEOL()) { return true; } @@ -11913,11 +11944,10 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) { MCSymbolRefExpr::VK_ARM_TLSDESCSEQ, getContext()); Lex(); - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.tlsdescseq' directive")) + if (parseEOL()) return true; - getTargetStreamer().AnnotateTLSDescriptorSequence(SRE); + getTargetStreamer().annotateTLSDescriptorSequence(SRE); return false; } @@ -11955,8 +11985,7 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) { Offset = CE->getValue(); } - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.movsp' directive")) + if (parseEOL()) return true; getTargetStreamer().emitMovSP(SPReg, Offset); @@ -11996,7 +12025,7 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) { // '.align' is target specifically handled to mean 2**2 byte alignment. const MCSection *Section = getStreamer().getCurrentSectionOnly(); assert(Section && "must have section to emit alignment"); - if (Section->UseCodeAlign()) + if (Section->useCodeAlign()) getStreamer().emitCodeAlignment(4, &getSTI(), 0); else getStreamer().emitValueToAlignment(4, 0, 1, 0); @@ -12026,6 +12055,175 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) { return false; } +/// parseDirectiveSEHAllocStack +/// ::= .seh_stackalloc +/// ::= .seh_stackalloc_w +bool ARMAsmParser::parseDirectiveSEHAllocStack(SMLoc L, bool Wide) { + int64_t Size; + if (parseImmExpr(Size)) + return true; + getTargetStreamer().emitARMWinCFIAllocStack(Size, Wide); + return false; +} + +/// parseDirectiveSEHSaveRegs +/// ::= .seh_save_regs +/// ::= .seh_save_regs_w +bool ARMAsmParser::parseDirectiveSEHSaveRegs(SMLoc L, bool Wide) { + SmallVector, 1> Operands; + + if (parseRegisterList(Operands) || parseEOL()) + return true; + ARMOperand &Op = (ARMOperand &)*Operands[0]; + if (!Op.isRegList()) + return Error(L, ".seh_save_regs{_w} expects GPR registers"); + const SmallVectorImpl &RegList = Op.getRegList(); + uint32_t Mask = 0; + for (size_t i = 0; i < RegList.size(); ++i) { + unsigned Reg = MRI->getEncodingValue(RegList[i]); + if (Reg == 15) // pc -> lr + Reg = 14; + if (Reg == 13) + return Error(L, ".seh_save_regs{_w} can't include SP"); + assert(Reg < 16U && "Register out of range"); + unsigned Bit = (1u << Reg); + Mask |= Bit; + } + if (!Wide && (Mask & 0x1f00) != 0) + return Error(L, + ".seh_save_regs cannot save R8-R12, needs .seh_save_regs_w"); + getTargetStreamer().emitARMWinCFISaveRegMask(Mask, Wide); + return false; +} + +/// parseDirectiveSEHSaveSP +/// ::= .seh_save_sp +bool ARMAsmParser::parseDirectiveSEHSaveSP(SMLoc L) { + int Reg = tryParseRegister(); + if (Reg == -1 || !MRI->getRegClass(ARM::GPRRegClassID).contains(Reg)) + return Error(L, "expected GPR"); + unsigned Index = MRI->getEncodingValue(Reg); + if (Index > 14 || Index == 13) + return Error(L, "invalid register for .seh_save_sp"); + getTargetStreamer().emitARMWinCFISaveSP(Index); + return false; +} + +/// parseDirectiveSEHSaveFRegs +/// ::= .seh_save_fregs +bool ARMAsmParser::parseDirectiveSEHSaveFRegs(SMLoc L) { + SmallVector, 1> Operands; + + if (parseRegisterList(Operands) || parseEOL()) + return true; + ARMOperand &Op = (ARMOperand &)*Operands[0]; + if (!Op.isDPRRegList()) + return Error(L, ".seh_save_fregs expects DPR registers"); + const SmallVectorImpl &RegList = Op.getRegList(); + uint32_t Mask = 0; + for (size_t i = 0; i < RegList.size(); ++i) { + unsigned Reg = MRI->getEncodingValue(RegList[i]); + assert(Reg < 32U && "Register out of range"); + unsigned Bit = (1u << Reg); + Mask |= Bit; + } + + if (Mask == 0) + return Error(L, ".seh_save_fregs missing registers"); + + unsigned First = 0; + while ((Mask & 1) == 0) { + First++; + Mask >>= 1; + } + if (((Mask + 1) & Mask) != 0) + return Error(L, + ".seh_save_fregs must take a contiguous range of registers"); + unsigned Last = First; + while ((Mask & 2) != 0) { + Last++; + Mask >>= 1; + } + if (First < 16 && Last >= 16) + return Error(L, ".seh_save_fregs must be all d0-d15 or d16-d31"); + getTargetStreamer().emitARMWinCFISaveFRegs(First, Last); + return false; +} + +/// parseDirectiveSEHSaveLR +/// ::= .seh_save_lr +bool ARMAsmParser::parseDirectiveSEHSaveLR(SMLoc L) { + int64_t Offset; + if (parseImmExpr(Offset)) + return true; + getTargetStreamer().emitARMWinCFISaveLR(Offset); + return false; +} + +/// parseDirectiveSEHPrologEnd +/// ::= .seh_endprologue +/// ::= .seh_endprologue_fragment +bool ARMAsmParser::parseDirectiveSEHPrologEnd(SMLoc L, bool Fragment) { + getTargetStreamer().emitARMWinCFIPrologEnd(Fragment); + return false; +} + +/// parseDirectiveSEHNop +/// ::= .seh_nop +/// ::= .seh_nop_w +bool ARMAsmParser::parseDirectiveSEHNop(SMLoc L, bool Wide) { + getTargetStreamer().emitARMWinCFINop(Wide); + return false; +} + +/// parseDirectiveSEHEpilogStart +/// ::= .seh_startepilogue +/// ::= .seh_startepilogue_cond +bool ARMAsmParser::parseDirectiveSEHEpilogStart(SMLoc L, bool Condition) { + unsigned CC = ARMCC::AL; + if (Condition) { + MCAsmParser &Parser = getParser(); + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + if (!Tok.is(AsmToken::Identifier)) + return Error(S, ".seh_startepilogue_cond missing condition"); + CC = ARMCondCodeFromString(Tok.getString()); + if (CC == ~0U) + return Error(S, "invalid condition"); + Parser.Lex(); // Eat the token. + } + + getTargetStreamer().emitARMWinCFIEpilogStart(CC); + return false; +} + +/// parseDirectiveSEHEpilogEnd +/// ::= .seh_endepilogue +bool ARMAsmParser::parseDirectiveSEHEpilogEnd(SMLoc L) { + getTargetStreamer().emitARMWinCFIEpilogEnd(); + return false; +} + +/// parseDirectiveSEHCustom +/// ::= .seh_custom +bool ARMAsmParser::parseDirectiveSEHCustom(SMLoc L) { + unsigned Opcode = 0; + do { + int64_t Byte; + if (parseImmExpr(Byte)) + return true; + if (Byte > 0xff || Byte < 0) + return Error(L, "Invalid byte value in .seh_custom"); + if (Opcode > 0x00ffffff) + return Error(L, "Too many bytes in .seh_custom"); + // Store the bytes as one big endian number in Opcode. In a multi byte + // opcode sequence, the first byte can't be zero. + Opcode = (Opcode << 8) | Byte; + } while (parseOptionalToken(AsmToken::Comma)); + getTargetStreamer().emitARMWinCFICustom(Opcode); + return false; +} + /// Force static initialization. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMAsmParser() { RegisterMCAsmParser X(getTheARMLETarget()); @@ -12338,8 +12536,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { SMLoc ExtLoc = Parser.getTok().getLoc(); Lex(); - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.arch_extension' directive")) + if (parseEOL()) return true; if (Name == "nocrypto") { diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index c3df7dc88d79..9acd49292268 100644 --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -13,8 +13,8 @@ #include "TargetInfo/ARMTargetInfo.h" #include "Utils/ARMBaseInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -175,408 +175,529 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) { // Forward declare these because the autogenerated code will reference them. // Definitions are further down. static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPRwithZRRegisterClass(MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPRwithZRnospRegisterClass( - MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus +DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSPR_8RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst, - unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMQQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeDPairSpacedRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode2IdxInstruction(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst,unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeTSBInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); -static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst & Inst, - unsigned Insn, - uint64_t Adddress, - const void *Decoder); +static DecodeStatus +DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned Insn, + uint64_t Adddress, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst,unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst,unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Val, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst, unsigned Val, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -template + uint64_t Address, + const MCDisassembler *Decoder); +template static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVCVTImmOperand(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeNEONComplexLane64Instruction(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder); -static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder); -static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -template -static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst, unsigned Val, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +template +static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -template + uint64_t Address, + const MCDisassembler *Decoder); +template static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -template + uint64_t Address, + const MCDisassembler *Decoder); +template static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst,unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst,unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst,unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); -static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); +static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); template static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned val, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); -template + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +template static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); -template + const MCDisassembler *Decoder); +template static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -template + uint64_t Address, + const MCDisassembler *Decoder); +template static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -template + uint64_t Address, + const MCDisassembler *Decoder); +template static DecodeStatus DecodeMVE_MEM_3_pre(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -template + uint64_t Address, + const MCDisassembler *Decoder); +template static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder); -template -static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); +template +static DecodeStatus +DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); typedef DecodeStatus OperandDecoder(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -template -static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +template +static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); #include "ARMGenDisassemblerTables.inc" @@ -710,11 +831,12 @@ extern const MCInstrDesc ARMInsts[]; /// operand to the MCInst and false otherwise. static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, bool isBranch, uint64_t InstSize, - MCInst &MI, const void *Decoder) { - const MCDisassembler *Dis = static_cast(Decoder); + MCInst &MI, + const MCDisassembler *Decoder) { // FIXME: Does it make sense for value to be negative? - return Dis->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address, isBranch, - /* Offset */ 0, InstSize); + return Decoder->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address, + isBranch, /*Offset=*/0, /*OpSize=*/0, + InstSize); } /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being @@ -727,7 +849,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, /// a literal 'C' string if the referenced address of the literal pool's entry /// is an address into a section with 'C' string literals. static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value, - const void *Decoder) { + const MCDisassembler *Decoder) { const MCDisassembler *Dis = static_cast(Decoder); Dis->tryAddingPcLoadReferenceComment(Value, Address); } @@ -1142,7 +1264,8 @@ static const uint16_t CLRMGPRDecoderTable[] = { }; static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; @@ -1153,7 +1276,7 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; @@ -1165,9 +1288,9 @@ static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus -DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (RegNo == 15) @@ -1180,7 +1303,7 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (RegNo == 13) @@ -1192,8 +1315,8 @@ static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (RegNo == 15) @@ -1207,8 +1330,8 @@ DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (RegNo == 15) @@ -1225,8 +1348,8 @@ DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (RegNo == 13) return MCDisassembler::Fail; @@ -1235,7 +1358,8 @@ DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); @@ -1247,7 +1371,8 @@ static const uint16_t GPRPairDecoderTable[] = { }; static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; // According to the Arm ARM RegNo = 14 is undefined, but we return fail @@ -1263,8 +1388,9 @@ static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo, return S; } -static DecodeStatus DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +static DecodeStatus +DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 13) return MCDisassembler::Fail; @@ -1278,7 +1404,7 @@ static DecodeStatus DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo != 13) return MCDisassembler::Fail; @@ -1288,7 +1414,8 @@ static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Register = 0; switch (RegNo) { case 0: @@ -1318,7 +1445,8 @@ static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; const FeatureBitset &featureBits = @@ -1343,7 +1471,8 @@ static const uint16_t SPRDecoderTable[] = { }; static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -1353,7 +1482,8 @@ static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeSPRRegisterClass(Inst, RegNo, Address, Decoder); } @@ -1369,7 +1499,8 @@ static const uint16_t DPRDecoderTable[] = { }; static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { const FeatureBitset &featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits(); @@ -1384,22 +1515,24 @@ static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder); } static DecodeStatus DecodeSPR_8RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; return DecodeSPRRegisterClass(Inst, RegNo, Address, Decoder); } -static DecodeStatus -DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder); @@ -1413,7 +1546,8 @@ static const uint16_t QPRDecoderTable[] = { }; static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 31 || (RegNo & 1) != 0) return MCDisassembler::Fail; RegNo >>= 1; @@ -1433,7 +1567,8 @@ static const uint16_t DPairDecoderTable[] = { }; static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 30) return MCDisassembler::Fail; @@ -1453,10 +1588,9 @@ static const uint16_t DPairSpacedDecoderTable[] = { ARM::D28_D30, ARM::D29_D31 }; -static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeDPairSpacedRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 29) return MCDisassembler::Fail; @@ -1466,7 +1600,8 @@ static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst, } static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (Val == 0xF) return MCDisassembler::Fail; // AL predicate is not allowed on Thumb1 branches. @@ -1483,7 +1618,8 @@ static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (Val) Inst.addOperand(MCOperand::createReg(ARM::CPSR)); else @@ -1492,7 +1628,8 @@ static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rm = fieldFromInstruction(Val, 0, 4); @@ -1529,7 +1666,8 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rm = fieldFromInstruction(Val, 0, 4); @@ -1564,7 +1702,8 @@ static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; bool NeedDisjointWriteback = false; @@ -1611,7 +1750,8 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Vd = fieldFromInstruction(Val, 8, 5); @@ -1635,7 +1775,8 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Vd = fieldFromInstruction(Val, 8, 5); @@ -1660,7 +1801,8 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // This operand encodes a mask of contiguous zeros between a specified MSB // and LSB. To decode it, we create the mask of all bits MSB-and-lower, // the mask of all bits LSB-and-lower, and then xor them to create @@ -1687,7 +1829,8 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned pred = fieldFromInstruction(Insn, 28, 4); @@ -1865,8 +2008,8 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -1971,7 +2114,8 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 13, 4); @@ -2013,9 +2157,22 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val, return S; } -static DecodeStatus -DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeTSBInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { + if (Inst.getOpcode() != ARM::TSB && Inst.getOpcode() != ARM::t2TSB) + return MCDisassembler::Fail; + + // The "csync" operand is not encoded into the "tsb" instruction (as this is + // the only available operand), but LLVM expects the instruction to have one + // operand, so we need to add the csync when decoding. + Inst.addOperand(MCOperand::createImm(ARM_TSB::CSYNC)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); @@ -2206,7 +2363,8 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -2235,7 +2393,8 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -2257,9 +2416,10 @@ static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, - unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus +DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -2350,7 +2510,8 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, // Check for UNPREDICTABLE predicated ESB instruction static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned pred = fieldFromInstruction(Insn, 28, 4); unsigned imm8 = fieldFromInstruction(Insn, 0, 8); const MCDisassembler *Dis = static_cast(Decoder); @@ -2372,7 +2533,8 @@ static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned imod = fieldFromInstruction(Insn, 18, 2); unsigned M = fieldFromInstruction(Insn, 17, 1); unsigned iflags = fieldFromInstruction(Insn, 6, 3); @@ -2419,7 +2581,8 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned imod = fieldFromInstruction(Insn, 9, 2); unsigned M = fieldFromInstruction(Insn, 8, 1); unsigned iflags = fieldFromInstruction(Insn, 5, 3); @@ -2460,9 +2623,9 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { unsigned imm = fieldFromInstruction(Insn, 0, 8); unsigned Opcode = ARM::t2HINT; @@ -2486,7 +2649,8 @@ static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 8, 4); @@ -2510,7 +2674,8 @@ static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -2537,7 +2702,8 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 16, 4); @@ -2565,7 +2731,8 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Pred = fieldFromInstruction(Insn, 28, 4); @@ -2586,7 +2753,8 @@ static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Imm = fieldFromInstruction(Insn, 9, 1); @@ -2614,7 +2782,8 @@ static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned add = fieldFromInstruction(Val, 12, 1); @@ -2634,7 +2803,8 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 9, 4); @@ -2654,7 +2824,8 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 9, 4); @@ -2674,13 +2845,14 @@ static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeGPRRegisterClass(Inst, Val, Address, Decoder); } -static DecodeStatus -DecodeT2BInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus Status = MCDisassembler::Success; // Note the J1 and J2 values are from the encoded instruction. So here @@ -2705,9 +2877,9 @@ DecodeT2BInstruction(MCInst &Inst, unsigned Insn, return Status; } -static DecodeStatus -DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned pred = fieldFromInstruction(Insn, 28, 4); @@ -2736,7 +2908,8 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rm = fieldFromInstruction(Val, 0, 4); @@ -2753,7 +2926,8 @@ static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3029,7 +3203,8 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned type = fieldFromInstruction(Insn, 8, 4); unsigned align = fieldFromInstruction(Insn, 4, 2); if (type == 6 && (align & 2)) return MCDisassembler::Fail; @@ -3042,7 +3217,8 @@ static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned size = fieldFromInstruction(Insn, 6, 2); if (size == 3) return MCDisassembler::Fail; @@ -3057,7 +3233,8 @@ static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned size = fieldFromInstruction(Insn, 6, 2); if (size == 3) return MCDisassembler::Fail; @@ -3070,7 +3247,8 @@ static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned size = fieldFromInstruction(Insn, 6, 2); if (size == 3) return MCDisassembler::Fail; @@ -3080,7 +3258,8 @@ static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3350,7 +3529,8 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3397,7 +3577,8 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3445,7 +3626,8 @@ static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3480,7 +3662,8 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3531,9 +3714,9 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus -DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3577,9 +3760,9 @@ DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus -DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) | @@ -3607,7 +3790,8 @@ DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Qd = fieldFromInstruction(Insn, 13, 3); @@ -3632,7 +3816,8 @@ static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3651,31 +3836,36 @@ static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(8 - Val)); return MCDisassembler::Success; } static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(16 - Val)); return MCDisassembler::Success; } static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(32 - Val)); return MCDisassembler::Success; } static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(64 - Val)); return MCDisassembler::Success; } static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3711,7 +3901,8 @@ static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned dst = fieldFromInstruction(Insn, 8, 3); @@ -3735,7 +3926,8 @@ static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, } static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4, true, 2, Inst, Decoder)) Inst.addOperand(MCOperand::createImm(SignExtend32<12>(Val << 1))); @@ -3743,7 +3935,8 @@ static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<21>(Val) + 4, true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::createImm(SignExtend32<21>(Val))); @@ -3751,7 +3944,8 @@ static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4, true, 2, Inst, Decoder)) Inst.addOperand(MCOperand::createImm(Val << 1)); @@ -3759,7 +3953,8 @@ static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 0, 3); @@ -3774,7 +3969,8 @@ static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 0, 3); @@ -3788,7 +3984,8 @@ static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned imm = Val << 2; Inst.addOperand(MCOperand::createImm(imm)); @@ -3798,7 +3995,8 @@ static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createReg(ARM::SP)); Inst.addOperand(MCOperand::createImm(Val)); @@ -3806,7 +4004,8 @@ static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 6, 4); @@ -3835,7 +4034,8 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); @@ -3918,7 +4118,8 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -4002,7 +4203,8 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -4081,8 +4283,8 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder) { +static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -4121,7 +4323,8 @@ static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); @@ -4173,8 +4376,8 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { if (Val == 0) Inst.addOperand(MCOperand::createImm(INT32_MIN)); else { @@ -4188,7 +4391,7 @@ static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (Val == 0) Inst.addOperand(MCOperand::createImm(INT32_MIN)); else { @@ -4203,7 +4406,8 @@ static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address, } static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 9, 4); @@ -4219,7 +4423,7 @@ static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 8, 4); @@ -4233,8 +4437,9 @@ static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst, unsigned Val, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 8, 4); @@ -4248,8 +4453,8 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, return S; } -static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { int imm = Val & 0xFF; if (Val == 0) imm = INT32_MIN; @@ -4260,9 +4465,9 @@ static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -template -static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +template +static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { int imm = Val & 0x7F; if (Val == 0) imm = INT32_MIN; @@ -4276,7 +4481,8 @@ static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 9, 4); @@ -4321,10 +4527,10 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, return S; } -template +template static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 8, 3); @@ -4338,10 +4544,10 @@ static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val, return S; } -template +template static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 8, 4); @@ -4358,7 +4564,8 @@ static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); @@ -4419,7 +4626,8 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 13, 4); @@ -4445,7 +4653,8 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned imm = fieldFromInstruction(Insn, 0, 7); Inst.addOperand(MCOperand::createReg(ARM::SP)); @@ -4456,7 +4665,8 @@ static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn, } static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (Inst.getOpcode() == ARM::tADDrSP) { @@ -4481,7 +4691,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, } static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned imod = fieldFromInstruction(Insn, 4, 1) | 0x2; unsigned flags = fieldFromInstruction(Insn, 0, 3); @@ -4492,7 +4703,8 @@ static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, } static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rm = fieldFromInstruction(Insn, 0, 4); unsigned add = fieldFromInstruction(Insn, 4, 1); @@ -4505,7 +4717,8 @@ static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 3, 4); unsigned Qm = fieldFromInstruction(Insn, 0, 3); @@ -4518,9 +4731,10 @@ static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn, return S; } -template +template static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Qm = fieldFromInstruction(Insn, 8, 3); int imm = fieldFromInstruction(Insn, 0, 7); @@ -4542,7 +4756,8 @@ static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // Val is passed in as S:J1:J2:imm10H:imm10L:'0' // Note only one trailing zero not two. Also the J1 and J2 values are from // the encoded instruction. So here change to I1 and I2 values via: @@ -4566,7 +4781,8 @@ static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (Val == 0xA || Val == 0xB) return MCDisassembler::Fail; @@ -4580,9 +4796,9 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus -DecodeThumbTableBranch(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { const FeatureBitset &FeatureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits(); DecodeStatus S = MCDisassembler::Success; @@ -4598,9 +4814,9 @@ DecodeThumbTableBranch(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus -DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned pred = fieldFromInstruction(Insn, 22, 4); @@ -4641,8 +4857,8 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, // Decode a shifted immediate operand. These basically consist // of an 8-bit value, and a 4-bit directive that specifies either // a splat operation or a rotation. -static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { unsigned ctrl = fieldFromInstruction(Val, 10, 2); if (ctrl == 0) { unsigned byte = fieldFromInstruction(Val, 8, 2); @@ -4672,9 +4888,9 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus -DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, + uint64_t Address, + const MCDisassembler *Decoder) { if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4, true, 2, Inst, Decoder)) Inst.addOperand(MCOperand::createImm(SignExtend32<9>(Val << 1))); @@ -4683,7 +4899,7 @@ DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // Val is passed in as S:J1:J2:imm10:imm11 // Note no trailing zero after imm11. Also the J1 and J2 values are from // the encoded instruction. So here change to I1 and I2 values via: @@ -4706,7 +4922,8 @@ static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (Val & ~0xf) return MCDisassembler::Fail; @@ -4715,7 +4932,8 @@ static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (Val & ~0xf) return MCDisassembler::Fail; @@ -4723,8 +4941,8 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; const FeatureBitset &FeatureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits(); @@ -4825,7 +5043,8 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned R = fieldFromInstruction(Val, 5, 1); unsigned SysM = fieldFromInstruction(Val, 0, 5); @@ -4840,7 +5059,8 @@ static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); @@ -4862,7 +5082,7 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -4887,7 +5107,8 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -4912,7 +5133,8 @@ static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -4939,7 +5161,8 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -4964,7 +5187,8 @@ static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -4988,8 +5212,8 @@ static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5055,8 +5279,8 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5120,8 +5344,8 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5187,8 +5411,8 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5250,8 +5474,8 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5320,8 +5544,8 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5383,8 +5607,8 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5464,8 +5688,8 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5536,8 +5760,8 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); unsigned Rt2 = fieldFromInstruction(Insn, 16, 4); @@ -5562,8 +5786,8 @@ static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); unsigned Rt2 = fieldFromInstruction(Insn, 16, 4); @@ -5588,8 +5812,8 @@ static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned pred = fieldFromInstruction(Insn, 4, 4); unsigned mask = fieldFromInstruction(Insn, 0, 4); @@ -5617,9 +5841,9 @@ static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus -DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); @@ -5654,9 +5878,9 @@ DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus -DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); @@ -5689,8 +5913,8 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, uint64_t Address, + const MCDisassembler *Decoder) { unsigned sign1 = fieldFromInstruction(Insn, 21, 1); unsigned sign2 = fieldFromInstruction(Insn, 23, 1); if (sign1 != sign2) return MCDisassembler::Fail; @@ -5717,7 +5941,7 @@ static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; // Shift of "asr #32" is not allowed in Thumb2 mode. @@ -5726,8 +5950,8 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val, return S; } -static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { unsigned Rt = fieldFromInstruction(Insn, 12, 4); unsigned Rt2 = fieldFromInstruction(Insn, 0, 4); unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5753,8 +5977,8 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { const FeatureBitset &featureBits = ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits(); bool hasFullFP16 = featureBits[ARM::FeatureFullFP16]; @@ -5812,8 +6036,8 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { const FeatureBitset &featureBits = ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits(); bool hasFullFP16 = featureBits[ARM::FeatureFullFP16]; @@ -5871,10 +6095,10 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeNEONComplexLane64Instruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0); Vd |= (fieldFromInstruction(Insn, 22, 1) << 4); unsigned Vn = (fieldFromInstruction(Insn, 16, 4) << 0); @@ -5904,8 +6128,8 @@ static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst, return S; } -static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 16, 4); @@ -5932,7 +6156,8 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, } static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned CRm = fieldFromInstruction(Val, 0, 4); @@ -5978,7 +6203,7 @@ static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val, static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { const FeatureBitset &featureBits = ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits(); DecodeStatus S = MCDisassembler::Success; @@ -6030,7 +6255,7 @@ static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val, template static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (Val == 0 && !zeroPermitted) S = MCDisassembler::Fail; @@ -6049,7 +6274,7 @@ static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { uint64_t LocImm = Inst.getOperand(0).getImm(); Val = LocImm + (2 << Val); @@ -6061,7 +6286,7 @@ static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (Val >= ARMCC::AL) // also exclude the non-condition NV return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(Val)); @@ -6069,7 +6294,7 @@ static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (Inst.getOpcode() == ARM::MVE_LCTP) @@ -6132,7 +6357,7 @@ static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address, static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (Val == 0) @@ -6144,7 +6369,8 @@ static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if ((RegNo) + 1 > 11) return MCDisassembler::Fail; @@ -6154,7 +6380,8 @@ static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if ((RegNo) > 14) return MCDisassembler::Fail; @@ -6165,7 +6392,8 @@ static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo == 15) { Inst.addOperand(MCOperand::createReg(ARM::APSR_NZCV)); return MCDisassembler::Success; @@ -6181,7 +6409,7 @@ DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; Inst.addOperand(MCOperand::createImm(ARMCC::AL)); @@ -6207,8 +6435,8 @@ static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address, } static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; @@ -6224,7 +6452,7 @@ static const uint16_t QQPRDecoderTable[] = { static DecodeStatus DecodeMQQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 6) return MCDisassembler::Fail; @@ -6240,7 +6468,7 @@ static const uint16_t QQQQPRDecoderTable[] = { static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 4) return MCDisassembler::Fail; @@ -6251,7 +6479,7 @@ static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; // Parse VPT mask and encode it in the MCInst as an immediate with the same @@ -6281,7 +6509,8 @@ static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // The vpred_r operand type includes an MQPR register field derived // from the encoding. But we don't actually want to add an operand // to the MCInst at this stage, because AddThumbPredicate will do it @@ -6292,18 +6521,16 @@ static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm((Val & 0x1) == 0 ? ARMCC::EQ : ARMCC::NE)); return MCDisassembler::Success; } -static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { unsigned Code; switch (Val & 0x3) { case 0: @@ -6323,17 +6550,16 @@ static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeRestrictedUPredicateOperand(MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm((Val & 0x1) == 0 ? ARMCC::HS : ARMCC::HI)); return MCDisassembler::Success; } -static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { unsigned Code; switch (Val) { default: @@ -6363,7 +6589,8 @@ static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Va } static DecodeStatus DecodeVCVTImmOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned DecodedVal = 64 - Val; @@ -6404,10 +6631,10 @@ static unsigned FixedRegForVSTRVLDR_SYSREG(unsigned Opcode) { } } -template +template static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { switch (Inst.getOpcode()) { case ARM::VSTR_FPSCR_pre: case ARM::VSTR_FPSCR_NZCVQC_pre: @@ -6448,9 +6675,10 @@ static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Val, return S; } -static inline DecodeStatus DecodeMVE_MEM_pre( - MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder, - unsigned Rn, OperandDecoder RnDecoder, OperandDecoder AddrDecoder) { +static inline DecodeStatus +DecodeMVE_MEM_pre(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder, unsigned Rn, + OperandDecoder RnDecoder, OperandDecoder AddrDecoder) { DecodeStatus S = MCDisassembler::Success; unsigned Qd = fieldFromInstruction(Val, 13, 3); @@ -6469,7 +6697,8 @@ static inline DecodeStatus DecodeMVE_MEM_pre( template static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder, fieldFromInstruction(Val, 16, 3), DecodetGPRRegisterClass, @@ -6478,7 +6707,8 @@ static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val, template static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder, fieldFromInstruction(Val, 16, 4), DecoderGPRRegisterClass, @@ -6487,17 +6717,18 @@ static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val, template static DecodeStatus DecodeMVE_MEM_3_pre(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder, fieldFromInstruction(Val, 17, 3), DecodeMQPRRegisterClass, DecodeMveAddrModeQ); } -template +template static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (Val < MinLog || Val > MaxLog) @@ -6507,10 +6738,10 @@ static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val, return S; } -template -static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { +template +static DecodeStatus +DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; Inst.addOperand(MCOperand::createImm(start + Val)); @@ -6519,7 +6750,8 @@ static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 0, 4); unsigned Rt2 = fieldFromInstruction(Insn, 16, 4); @@ -6542,7 +6774,8 @@ static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 0, 4); unsigned Rt2 = fieldFromInstruction(Insn, 16, 4); @@ -6566,8 +6799,9 @@ static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeMVEOverlappingLongShift( - MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { +static DecodeStatus +DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned RdaLo = fieldFromInstruction(Insn, 17, 3) << 1; @@ -6645,8 +6879,9 @@ static DecodeStatus DecodeMVEOverlappingLongShift( return S; } -static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) | fieldFromInstruction(Insn, 13, 3)); @@ -6664,9 +6899,9 @@ static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, uint64_t Addr return S; } -template +template static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; Inst.addOperand(MCOperand::createReg(ARM::VPR)); unsigned Qn = fieldFromInstruction(Insn, 17, 3); @@ -6703,7 +6938,7 @@ static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address, } static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; Inst.addOperand(MCOperand::createReg(ARM::VPR)); unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -6712,8 +6947,9 @@ static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; Inst.addOperand(MCOperand::createReg(ARM::VPR)); Inst.addOperand(MCOperand::createReg(ARM::VPR)); @@ -6721,7 +6957,8 @@ static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, uint64_t Address } static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { const unsigned Rd = fieldFromInstruction(Insn, 8, 4); const unsigned Rn = fieldFromInstruction(Insn, 16, 4); const unsigned Imm12 = fieldFromInstruction(Insn, 26, 1) << 11 | diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 16bc0ca179a7..d74da27fbc4f 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -17,8 +17,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" @@ -98,9 +98,20 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer { void emitInst(uint32_t Inst, char Suffix = '\0') override; void finishAttributeSection() override; - void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override; + void annotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override; void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override; + void emitARMWinCFIAllocStack(unsigned Size, bool Wide) override; + void emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) override; + void emitARMWinCFISaveSP(unsigned Reg) override; + void emitARMWinCFISaveFRegs(unsigned First, unsigned Last) override; + void emitARMWinCFISaveLR(unsigned Offset) override; + void emitARMWinCFIPrologEnd(bool Fragment) override; + void emitARMWinCFINop(bool Wide) override; + void emitARMWinCFIEpilogStart(unsigned Condition) override; + void emitARMWinCFIEpilogEnd() override; + void emitARMWinCFICustom(unsigned Opcode) override; + public: ARMTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter &InstPrinter, bool VerboseAsm); @@ -239,8 +250,8 @@ void ARMTargetAsmStreamer::emitFPU(unsigned FPU) { void ARMTargetAsmStreamer::finishAttributeSection() {} -void -ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { +void ARMTargetAsmStreamer::annotateTLSDescriptorSequence( + const MCSymbolRefExpr *S) { OS << "\t.tlsdescseq\t" << S->getSymbol().getName() << "\n"; } @@ -269,6 +280,101 @@ void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset, OS << '\n'; } +void ARMTargetAsmStreamer::emitARMWinCFIAllocStack(unsigned Size, bool Wide) { + if (Wide) + OS << "\t.seh_stackalloc_w\t" << Size << "\n"; + else + OS << "\t.seh_stackalloc\t" << Size << "\n"; +} + +static void printRegs(formatted_raw_ostream &OS, ListSeparator &LS, int First, + int Last) { + if (First != Last) + OS << LS << "r" << First << "-r" << Last; + else + OS << LS << "r" << First; +} + +void ARMTargetAsmStreamer::emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) { + if (Wide) + OS << "\t.seh_save_regs_w\t"; + else + OS << "\t.seh_save_regs\t"; + ListSeparator LS; + int First = -1; + OS << "{"; + for (int I = 0; I <= 12; I++) { + if (Mask & (1 << I)) { + if (First < 0) + First = I; + } else { + if (First >= 0) { + printRegs(OS, LS, First, I - 1); + First = -1; + } + } + } + if (First >= 0) + printRegs(OS, LS, First, 12); + if (Mask & (1 << 14)) + OS << LS << "lr"; + OS << "}\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFISaveSP(unsigned Reg) { + OS << "\t.seh_save_sp\tr" << Reg << "\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFISaveFRegs(unsigned First, + unsigned Last) { + if (First != Last) + OS << "\t.seh_save_fregs\t{d" << First << "-d" << Last << "}\n"; + else + OS << "\t.seh_save_fregs\t{d" << First << "}\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFISaveLR(unsigned Offset) { + OS << "\t.seh_save_lr\t" << Offset << "\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFIPrologEnd(bool Fragment) { + if (Fragment) + OS << "\t.seh_endprologue_fragment\n"; + else + OS << "\t.seh_endprologue\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFINop(bool Wide) { + if (Wide) + OS << "\t.seh_nop_w\n"; + else + OS << "\t.seh_nop\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFIEpilogStart(unsigned Condition) { + if (Condition == ARMCC::AL) + OS << "\t.seh_startepilogue\n"; + else + OS << "\t.seh_startepilogue_cond\t" + << ARMCondCodeToString(static_cast(Condition)) << "\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFIEpilogEnd() { + OS << "\t.seh_endepilogue\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFICustom(unsigned Opcode) { + int I; + for (I = 3; I > 0; I--) + if (Opcode & (0xffu << (8 * I))) + break; + ListSeparator LS; + OS << "\t.seh_custom\t"; + for (; I >= 0; I--) + OS << LS << ((Opcode >> (8 * I)) & 0xff); + OS << "\n"; +} + class ARMTargetELFStreamer : public ARMTargetStreamer { private: StringRef CurrentVendor; @@ -309,7 +415,7 @@ private: void finishAttributeSection() override; void emitLabel(MCSymbol *Symbol) override; - void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override; + void annotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override; void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override; // Reset state between object emissions @@ -984,8 +1090,8 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) { Streamer.emitThumbFunc(Symbol); } -void -ARMTargetELFStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { +void ARMTargetELFStreamer::annotateTLSDescriptorSequence( + const MCSymbolRefExpr *S) { getStreamer().EmitFixup(S, FK_Data_4); } @@ -1057,7 +1163,7 @@ inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix, assert(EHSection && "Failed to get the required EH section"); // Switch to .ARM.extab or .ARM.exidx section - SwitchSection(EHSection); + switchSection(EHSection); emitValueToAlignment(4, 0, 1, 0); } @@ -1150,7 +1256,7 @@ void ARMELFStreamer::emitFnEnd() { } // Switch to the section containing FnStart - SwitchSection(&FnStart->getSection()); + switchSection(&FnStart->getSection()); // Clean exception handling frame information EHReset(); @@ -1369,12 +1475,8 @@ MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) { return new ARMTargetStreamer(S); } -MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S, - const MCSubtargetInfo &STI) { - const Triple &TT = STI.getTargetTriple(); - if (TT.isOSBinFormatELF()) - return new ARMTargetELFStreamer(S); - return new ARMTargetStreamer(S); +MCTargetStreamer *createARMObjectTargetELFStreamer(MCStreamer &S) { + return new ARMTargetELFStreamer(S); } MCELFStreamer *createARMELFStreamer(MCContext &Context, diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 77c0e3522911..febd8ab8bbc0 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -89,6 +89,7 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() { AlignmentIsInBytes = false; SupportsDebugInformation = true; ExceptionsType = ExceptionHandling::WinEH; + WinEHEncodingType = WinEH::EncodingType::Itanium; PrivateGlobalPrefix = "$M"; PrivateLabelPrefix = "$M"; CommentString = "@"; @@ -110,7 +111,8 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() { PrivateLabelPrefix = ".L"; SupportsDebugInformation = true; - ExceptionsType = ExceptionHandling::DwarfCFI; + ExceptionsType = ExceptionHandling::WinEH; + WinEHEncodingType = WinEH::EncodingType::Itanium; UseParensForSymbolVariant = true; DwarfRegNumForCFI = false; diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 5ecacdab390f..c33bbfcc7114 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -2006,13 +2006,11 @@ getMVEPairVectorIndexOpValue(const MCInst &MI, unsigned OpIdx, #include "ARMGenMCCodeEmitter.inc" MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new ARMMCCodeEmitter(MCII, Ctx, true); } MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new ARMMCCodeEmitter(MCII, Ctx, false); } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 17ca1866cf95..3f1379f135d1 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -87,18 +87,6 @@ static bool getMRCDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, return false; } -static bool getITDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, - std::string &Info) { - if (STI.getFeatureBits()[llvm::ARM::HasV8Ops] && MI.getOperand(1).isImm() && - MI.getOperand(1).getImm() != 8) { - Info = "applying IT instruction to more than one subsequent instruction is " - "deprecated"; - return true; - } - - return false; -} - static bool getARMStoreDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, std::string &Info) { assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] && diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 5c8f9bfdca08..e0c992f4fae2 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -71,13 +71,13 @@ MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S, bool isVerboseAsm); MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI); +MCTargetStreamer *createARMObjectTargetELFStreamer(MCStreamer &S); +MCTargetStreamer *createARMObjectTargetWinCOFFStreamer(MCStreamer &S); MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCSubtargetInfo &STI, diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index ed4000c7e5be..0ea51839824b 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -9,6 +9,7 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmLayout.h" @@ -21,7 +22,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ScopedPrinter.h" using namespace llvm; @@ -149,7 +149,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, if (FixupOffset & 0xff000000) { Asm.getContext().reportError(Fixup.getLoc(), "can not encode offset '0x" + - to_hexString(FixupOffset) + + utohexstr(FixupOffset) + "' in resulting scattered relocation."); return; } @@ -264,7 +264,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, if (FixupOffset & 0xff000000) { Asm.getContext().reportError(Fixup.getLoc(), "can not encode offset '0x" + - to_hexString(FixupOffset) + + utohexstr(FixupOffset) + "' in resulting scattered relocation."); return; } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index 02a2d01176fc..16d1ae62053e 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -114,15 +114,28 @@ void ARMTargetStreamer::emitArchExtension(uint64_t ArchExt) {} void ARMTargetStreamer::emitObjectArch(ARM::ArchKind Arch) {} void ARMTargetStreamer::emitFPU(unsigned FPU) {} void ARMTargetStreamer::finishAttributeSection() {} -void -ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {} +void ARMTargetStreamer::annotateTLSDescriptorSequence( + const MCSymbolRefExpr *SRE) {} void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {} +void ARMTargetStreamer::emitARMWinCFIAllocStack(unsigned Size, bool Wide) {} +void ARMTargetStreamer::emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) {} +void ARMTargetStreamer::emitARMWinCFISaveSP(unsigned Reg) {} +void ARMTargetStreamer::emitARMWinCFISaveFRegs(unsigned First, unsigned Last) {} +void ARMTargetStreamer::emitARMWinCFISaveLR(unsigned Offset) {} +void ARMTargetStreamer::emitARMWinCFINop(bool Wide) {} +void ARMTargetStreamer::emitARMWinCFIPrologEnd(bool Fragment) {} +void ARMTargetStreamer::emitARMWinCFIEpilogStart(unsigned Condition) {} +void ARMTargetStreamer::emitARMWinCFIEpilogEnd() {} +void ARMTargetStreamer::emitARMWinCFICustom(unsigned Opcode) {} + static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) { if (STI.getCPU() == "xscale") return ARMBuildAttrs::v5TEJ; - if (STI.hasFeature(ARM::HasV8Ops)) { + if (STI.hasFeature(ARM::HasV9_0aOps)) + return ARMBuildAttrs::v9_A; + else if (STI.hasFeature(ARM::HasV8Ops)) { if (STI.hasFeature(ARM::FeatureRClass)) return ARMBuildAttrs::v8_R; return ARMBuildAttrs::v8_A; @@ -305,3 +318,13 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { emitAttribute(ARMBuildAttrs::BTI_extension, ARMBuildAttrs::AllowBTI); } } + +MCTargetStreamer * +llvm::createARMObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + const Triple &TT = STI.getTargetTriple(); + if (TT.isOSBinFormatELF()) + return createARMObjectTargetELFStreamer(S); + if (TT.isOSBinFormatCOFF()) + return createARMObjectTargetWinCOFFStreamer(S); + return new ARMTargetStreamer(S); +} diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index e6f649164a29..cdd7f6fb715a 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -8,30 +8,59 @@ #include "ARMMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCWin64EH.h" #include "llvm/MC/MCWinCOFFStreamer.h" using namespace llvm; namespace { class ARMWinCOFFStreamer : public MCWinCOFFStreamer { + Win64EH::ARMUnwindEmitter EHStreamer; + public: ARMWinCOFFStreamer(MCContext &C, std::unique_ptr AB, std::unique_ptr CE, std::unique_ptr OW) : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {} + void emitWinEHHandlerData(SMLoc Loc) override; + void emitWindowsUnwindTables() override; + void emitWindowsUnwindTables(WinEH::FrameInfo *Frame) override; + void emitThumbFunc(MCSymbol *Symbol) override; void finishImpl() override; }; +void ARMWinCOFFStreamer::emitWinEHHandlerData(SMLoc Loc) { + MCStreamer::emitWinEHHandlerData(Loc); + + // We have to emit the unwind info now, because this directive + // actually switches to the .xdata section! + EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo(), + /* HandlerData = */ true); +} + +void ARMWinCOFFStreamer::emitWindowsUnwindTables(WinEH::FrameInfo *Frame) { + EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false); +} + +void ARMWinCOFFStreamer::emitWindowsUnwindTables() { + if (!getNumWinFrameInfos()) + return; + EHStreamer.Emit(*this); +} + void ARMWinCOFFStreamer::emitThumbFunc(MCSymbol *Symbol) { getAssembler().setIsThumbFunc(Symbol); } void ARMWinCOFFStreamer::finishImpl() { emitFrames(nullptr); + emitWindowsUnwindTables(); MCWinCOFFStreamer::finishImpl(); } @@ -48,3 +77,201 @@ MCStreamer *llvm::createARMWinCOFFStreamer( return S; } +namespace { +class ARMTargetWinCOFFStreamer : public llvm::ARMTargetStreamer { +private: + // True if we are processing SEH directives in an epilogue. + bool InEpilogCFI = false; + + // Symbol of the current epilog for which we are processing SEH directives. + MCSymbol *CurrentEpilog = nullptr; + +public: + ARMTargetWinCOFFStreamer(llvm::MCStreamer &S) : ARMTargetStreamer(S) {} + + // The unwind codes on ARM Windows are documented at + // https://docs.microsoft.com/en-us/cpp/build/arm-exception-handling + void emitARMWinCFIAllocStack(unsigned Size, bool Wide) override; + void emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) override; + void emitARMWinCFISaveSP(unsigned Reg) override; + void emitARMWinCFISaveFRegs(unsigned First, unsigned Last) override; + void emitARMWinCFISaveLR(unsigned Offset) override; + void emitARMWinCFIPrologEnd(bool Fragment) override; + void emitARMWinCFINop(bool Wide) override; + void emitARMWinCFIEpilogStart(unsigned Condition) override; + void emitARMWinCFIEpilogEnd() override; + void emitARMWinCFICustom(unsigned Opcode) override; + +private: + void emitARMWinUnwindCode(unsigned UnwindCode, int Reg, int Offset); +}; + +// Helper function to common out unwind code setup for those codes that can +// belong to both prolog and epilog. +void ARMTargetWinCOFFStreamer::emitARMWinUnwindCode(unsigned UnwindCode, + int Reg, int Offset) { + auto &S = getStreamer(); + WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc()); + if (!CurFrame) + return; + MCSymbol *Label = S.emitCFILabel(); + auto Inst = WinEH::Instruction(UnwindCode, Label, Reg, Offset); + if (InEpilogCFI) + CurFrame->EpilogMap[CurrentEpilog].Instructions.push_back(Inst); + else + CurFrame->Instructions.push_back(Inst); +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFIAllocStack(unsigned Size, + bool Wide) { + unsigned Op = Win64EH::UOP_AllocSmall; + if (!Wide) { + if (Size / 4 > 0xffff) + Op = Win64EH::UOP_AllocHuge; + else if (Size / 4 > 0x7f) + Op = Win64EH::UOP_AllocLarge; + } else { + Op = Win64EH::UOP_WideAllocMedium; + if (Size / 4 > 0xffff) + Op = Win64EH::UOP_WideAllocHuge; + else if (Size / 4 > 0x3ff) + Op = Win64EH::UOP_WideAllocLarge; + } + emitARMWinUnwindCode(Op, -1, Size); +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFISaveRegMask(unsigned Mask, + bool Wide) { + assert(Mask != 0); + int Lr = (Mask & 0x4000) ? 1 : 0; + Mask &= ~0x4000; + if (Wide) + assert((Mask & ~0x1fff) == 0); + else + assert((Mask & ~0x00ff) == 0); + if (Mask && ((Mask + (1 << 4)) & Mask) == 0) { + if (Wide && (Mask & 0x1000) == 0 && (Mask & 0xff) == 0xf0) { + // One continuous range from r4 to r8-r11 + for (int I = 11; I >= 8; I--) { + if (Mask & (1 << I)) { + emitARMWinUnwindCode(Win64EH::UOP_WideSaveRegsR4R11LR, I, Lr); + return; + } + } + // If it actually was from r4 to r4-r7, continue below. + } else if (!Wide) { + // One continuous range from r4 to r4-r7 + for (int I = 7; I >= 4; I--) { + if (Mask & (1 << I)) { + emitARMWinUnwindCode(Win64EH::UOP_SaveRegsR4R7LR, I, Lr); + return; + } + } + llvm_unreachable("logic error"); + } + } + Mask |= Lr << 14; + if (Wide) + emitARMWinUnwindCode(Win64EH::UOP_WideSaveRegMask, Mask, 0); + else + emitARMWinUnwindCode(Win64EH::UOP_SaveRegMask, Mask, 0); +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFISaveSP(unsigned Reg) { + emitARMWinUnwindCode(Win64EH::UOP_SaveSP, Reg, 0); +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFISaveFRegs(unsigned First, + unsigned Last) { + assert(First <= Last); + assert(First >= 16 || Last < 16); + assert(First <= 31 && Last <= 31); + if (First == 8) + emitARMWinUnwindCode(Win64EH::UOP_SaveFRegD8D15, Last, 0); + else if (First <= 15) + emitARMWinUnwindCode(Win64EH::UOP_SaveFRegD0D15, First, Last); + else + emitARMWinUnwindCode(Win64EH::UOP_SaveFRegD16D31, First, Last); +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFISaveLR(unsigned Offset) { + emitARMWinUnwindCode(Win64EH::UOP_SaveLR, 0, Offset); +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFINop(bool Wide) { + if (Wide) + emitARMWinUnwindCode(Win64EH::UOP_WideNop, -1, 0); + else + emitARMWinUnwindCode(Win64EH::UOP_Nop, -1, 0); +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFIPrologEnd(bool Fragment) { + auto &S = getStreamer(); + WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc()); + if (!CurFrame) + return; + + MCSymbol *Label = S.emitCFILabel(); + CurFrame->PrologEnd = Label; + WinEH::Instruction Inst = + WinEH::Instruction(Win64EH::UOP_End, /*Label=*/nullptr, -1, 0); + auto it = CurFrame->Instructions.begin(); + CurFrame->Instructions.insert(it, Inst); + CurFrame->Fragment = Fragment; +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFIEpilogStart(unsigned Condition) { + auto &S = getStreamer(); + WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc()); + if (!CurFrame) + return; + + InEpilogCFI = true; + CurrentEpilog = S.emitCFILabel(); + CurFrame->EpilogMap[CurrentEpilog].Condition = Condition; +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFIEpilogEnd() { + auto &S = getStreamer(); + WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc()); + if (!CurFrame) + return; + + if (!CurrentEpilog) { + S.getContext().reportError(SMLoc(), "Stray .seh_endepilogue in " + + CurFrame->Function->getName()); + return; + } + + std::vector &Epilog = + CurFrame->EpilogMap[CurrentEpilog].Instructions; + + unsigned UnwindCode = Win64EH::UOP_End; + if (!Epilog.empty()) { + WinEH::Instruction EndInstr = Epilog.back(); + if (EndInstr.Operation == Win64EH::UOP_Nop) { + UnwindCode = Win64EH::UOP_EndNop; + Epilog.pop_back(); + } else if (EndInstr.Operation == Win64EH::UOP_WideNop) { + UnwindCode = Win64EH::UOP_WideEndNop; + Epilog.pop_back(); + } + } + + InEpilogCFI = false; + WinEH::Instruction Inst = WinEH::Instruction(UnwindCode, nullptr, -1, 0); + CurFrame->EpilogMap[CurrentEpilog].Instructions.push_back(Inst); + MCSymbol *Label = S.emitCFILabel(); + CurFrame->EpilogMap[CurrentEpilog].End = Label; + CurrentEpilog = nullptr; +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFICustom(unsigned Opcode) { + emitARMWinUnwindCode(Win64EH::UOP_Custom, 0, Opcode); +} + +} // end anonymous namespace + +MCTargetStreamer *llvm::createARMObjectTargetWinCOFFStreamer(MCStreamer &S) { + return new ARMTargetWinCOFFStreamer(S); +} diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp index cfd275bc0621..30785340ef12 100644 --- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp +++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp @@ -145,7 +145,8 @@ private: // Optimise the base and offsets of the given address bool optimiseAddress(Value *Address, BasicBlock *BB, LoopInfo *LI); // Try to fold consecutive geps together into one - Value *foldGEP(GetElementPtrInst *GEP, Value *&Offsets, IRBuilder<> &Builder); + Value *foldGEP(GetElementPtrInst *GEP, Value *&Offsets, unsigned &Scale, + IRBuilder<> &Builder); // Check whether these offsets could be moved out of the loop they're in bool optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI); // Pushes the given add out of the loop @@ -390,7 +391,7 @@ MVEGatherScatterLowering::getVarAndConst(Value *Inst, int TypeScale) { return ReturnFalse; // Check that the constant is small enough for an incrementing gather - int64_t Immediate = Const.getValue() << TypeScale; + int64_t Immediate = *Const << TypeScale; if (Immediate > 512 || Immediate < -512 || Immediate % 4 != 0) return ReturnFalse; @@ -964,7 +965,7 @@ static bool hasAllGatScatUsers(Instruction *I, const DataLayout &DL) { bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI) { - LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to optimize\n" + LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to optimize: " << *Offsets << "\n"); // Optimise the addresses of gathers/scatters by moving invariant // calculations out of the loop @@ -1103,8 +1104,8 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB, return true; } -static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP, - IRBuilder<> &Builder) { +static Value *CheckAndCreateOffsetAdd(Value *X, unsigned ScaleX, Value *Y, + unsigned ScaleY, IRBuilder<> &Builder) { // Splat the non-vector value to a vector of the given type - if the value is // a constant (and its value isn't too big), we can even use this opportunity // to scale it to the size of the vector elements @@ -1156,40 +1157,49 @@ static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP, ConstantInt *ConstYEl = dyn_cast(ConstY->getAggregateElement(i)); if (!ConstXEl || !ConstYEl || - ConstXEl->getZExtValue() + ConstYEl->getZExtValue() >= + ConstXEl->getZExtValue() * ScaleX + + ConstYEl->getZExtValue() * ScaleY >= (unsigned)(1 << (TargetElemSize - 1))) return nullptr; } } - Value *Add = Builder.CreateAdd(X, Y); + Value *XScale = Builder.CreateVectorSplat( + XElType->getNumElements(), + Builder.getIntN(XElType->getScalarSizeInBits(), ScaleX)); + Value *YScale = Builder.CreateVectorSplat( + YElType->getNumElements(), + Builder.getIntN(YElType->getScalarSizeInBits(), ScaleY)); + Value *Add = Builder.CreateAdd(Builder.CreateMul(X, XScale), + Builder.CreateMul(Y, YScale)); - FixedVectorType *GEPType = cast(GEP->getType()); - if (checkOffsetSize(Add, GEPType->getNumElements())) + if (checkOffsetSize(Add, XElType->getNumElements())) return Add; else return nullptr; } Value *MVEGatherScatterLowering::foldGEP(GetElementPtrInst *GEP, - Value *&Offsets, + Value *&Offsets, unsigned &Scale, IRBuilder<> &Builder) { Value *GEPPtr = GEP->getPointerOperand(); Offsets = GEP->getOperand(1); + Scale = DL->getTypeAllocSize(GEP->getSourceElementType()); // We only merge geps with constant offsets, because only for those // we can make sure that we do not cause an overflow - if (!isa(Offsets)) + if (GEP->getNumIndices() != 1 || !isa(Offsets)) return nullptr; - GetElementPtrInst *BaseGEP; - if ((BaseGEP = dyn_cast(GEPPtr))) { + if (GetElementPtrInst *BaseGEP = dyn_cast(GEPPtr)) { // Merge the two geps into one - Value *BaseBasePtr = foldGEP(BaseGEP, Offsets, Builder); + Value *BaseBasePtr = foldGEP(BaseGEP, Offsets, Scale, Builder); if (!BaseBasePtr) return nullptr; - Offsets = - CheckAndCreateOffsetAdd(Offsets, GEP->getOperand(1), GEP, Builder); + Offsets = CheckAndCreateOffsetAdd( + Offsets, Scale, GEP->getOperand(1), + DL->getTypeAllocSize(GEP->getSourceElementType()), Builder); if (Offsets == nullptr) return nullptr; + Scale = 1; // Scale is always an i8 at this point. return BaseBasePtr; } return GEPPtr; @@ -1206,15 +1216,24 @@ bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB, Builder.SetInsertPoint(GEP); Builder.SetCurrentDebugLocation(GEP->getDebugLoc()); Value *Offsets; - Value *Base = foldGEP(GEP, Offsets, Builder); + unsigned Scale; + Value *Base = foldGEP(GEP, Offsets, Scale, Builder); // We only want to merge the geps if there is a real chance that they can be // used by an MVE gather; thus the offset has to have the correct size // (always i32 if it is not of vector type) and the base has to be a // pointer. if (Offsets && Base && Base != GEP) { + assert(Scale == 1 && "Expected to fold GEP to a scale of 1"); + Type *BaseTy = Builder.getInt8PtrTy(); + if (auto *VecTy = dyn_cast(Base->getType())) + BaseTy = FixedVectorType::get(BaseTy, VecTy); GetElementPtrInst *NewAddress = GetElementPtrInst::Create( - GEP->getSourceElementType(), Base, Offsets, "gep.merged", GEP); - GEP->replaceAllUsesWith(NewAddress); + Builder.getInt8Ty(), Builder.CreateBitCast(Base, BaseTy), Offsets, + "gep.merged", GEP); + LLVM_DEBUG(dbgs() << "Folded GEP: " << *GEP + << "\n new : " << *NewAddress << "\n"); + GEP->replaceAllUsesWith( + Builder.CreateBitCast(NewAddress, GEP->getType())); GEP = NewAddress; Changed = true; } diff --git a/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp b/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp index 538bd10685b0..3e76efb5133f 100644 --- a/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp +++ b/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp @@ -45,6 +45,7 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -176,9 +177,8 @@ static bool tryInterleave(Instruction *Start, // Truncs case Instruction::Trunc: case Instruction::FPTrunc: - if (Truncs.count(I)) + if (!Truncs.insert(I)) continue; - Truncs.insert(I); Visited.insert(I); break; @@ -235,9 +235,8 @@ static bool tryInterleave(Instruction *Start, case Instruction::FAdd: case Instruction::FMul: case Instruction::Select: - if (Ops.count(I)) + if (!Ops.insert(I)) continue; - Ops.insert(I); for (Use &Op : I->operands()) { if (!isa(Op->getType())) diff --git a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp index 7e31ea77f4f5..6bad9d61238e 100644 --- a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp +++ b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp @@ -404,6 +404,17 @@ bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) { LoopPhi->getOperand(3).setReg(DecReg); } + SmallVector Cond; // For analyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch. + if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) { + // If the LoopEnd falls through, need to insert a t2B to the fall-through + // block so that the non-analyzable t2LoopEndDec doesn't fall through. + MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator(); + BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B)) + .addMBB(&*MBBI) + .add(predOps(ARMCC::AL)); + } + // Replace the loop dec and loop end as a single instruction. MachineInstrBuilder MI = BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(), @@ -1041,8 +1052,7 @@ bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) { } bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) { - const ARMSubtarget &STI = - static_cast(Fn.getSubtarget()); + const ARMSubtarget &STI = Fn.getSubtarget(); if (!STI.isThumb2() || !STI.hasLOB()) return false; diff --git a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp index c7f451cba14f..d6d43b9143d6 100644 --- a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp +++ b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp @@ -312,8 +312,7 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { } bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { - const ARMSubtarget &STI = - static_cast(Fn.getSubtarget()); + const ARMSubtarget &STI = Fn.getSubtarget(); if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) return false; diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 71a82a1e3271..df64710712cc 100644 --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -176,7 +176,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. - unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; + unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; if (ArgRegsSaveSize) { @@ -205,26 +205,38 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, return; } + bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(FramePtr); + for (const CalleeSavedInfo &I : CSI) { Register Reg = I.getReg(); int FI = I.getFrameIdx(); + if (Reg == FramePtr) + FramePtrSpillFI = FI; switch (Reg) { + case ARM::R11: + if (HasFrameRecordArea) { + FRSize += 4; + break; + } + LLVM_FALLTHROUGH; case ARM::R8: case ARM::R9: case ARM::R10: - case ARM::R11: if (STI.splitFramePushPop(MF)) { GPRCS2Size += 4; break; } LLVM_FALLTHROUGH; + case ARM::LR: + if (HasFrameRecordArea) { + FRSize += 4; + break; + } + LLVM_FALLTHROUGH; case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; GPRCS1Size += 4; break; default: @@ -232,18 +244,53 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, } } + MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push; + if (HasFrameRecordArea) { + // Skip Frame Record setup: + // push {lr} + // mov lr, r11 + // push {lr} + std::advance(MBBI, 2); + FRPush = MBBI++; + } + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { + GPRCS1Push = MBBI; ++MBBI; } + // Find last push instruction for GPRCS2 - spilling of high registers + // (r8-r11) could consist of multiple tPUSH and tMOVr instructions. + while (true) { + MachineBasicBlock::iterator OldMBBI = MBBI; + // Skip a run of tMOVr instructions + while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && + MBBI->getFlag(MachineInstr::FrameSetup)) + MBBI++; + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH && + MBBI->getFlag(MachineInstr::FrameSetup)) { + GPRCS2Push = MBBI; + MBBI++; + } else { + // We have reached an instruction which is not a push, so the previous + // run of tMOVr instructions (which may have been empty) was not part of + // the prologue. Reset MBBI back to the last PUSH of the prologue. + MBBI = OldMBBI; + break; + } + } + // Determine starting offsets of spill areas. - unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - + (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; bool HasFP = hasFP(MF); if (HasFP) AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); + if (HasFrameRecordArea) + AFI->setFrameRecordSavedAreaSize(FRSize); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); @@ -252,71 +299,45 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, int FramePtrOffsetInBlock = 0; unsigned adjustedGPRCS1Size = GPRCS1Size; if (GPRCS1Size > 0 && GPRCS2Size == 0 && - tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) { + tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) { FramePtrOffsetInBlock = NumBytes; adjustedGPRCS1Size += NumBytes; NumBytes = 0; } - - if (adjustedGPRCS1Size) { - CFAOffset += adjustedGPRCS1Size; - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - } - for (const CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: - if (STI.splitFramePushPop(MF)) - break; - LLVM_FALLTHROUGH; - case ARM::R0: - case ARM::R1: - case ARM::R2: - case ARM::R3: - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - break; - } - } + CFAOffset += adjustedGPRCS1Size; // Adjust FP so it point to the stack slot that contains the previous FP. if (HasFP) { - FramePtrOffsetInBlock += - MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; - BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addReg(ARM::SP) - .addImm(FramePtrOffsetInBlock / 4) - .setMIFlags(MachineInstr::FrameSetup) - .add(predOps(ARMCC::AL)); + MachineBasicBlock::iterator AfterPush = + HasFrameRecordArea ? std::next(FRPush) : std::next(GPRCS1Push); + if (HasFrameRecordArea) { + // We have just finished pushing the previous FP into the stack, + // so simply capture the SP value as the new Frame Pointer. + BuildMI(MBB, AfterPush, dl, TII.get(ARM::tMOVr), FramePtr) + .addReg(ARM::SP) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); + } else { + FramePtrOffsetInBlock += + MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; + BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr) + .addReg(ARM::SP) + .addImm(FramePtrOffsetInBlock / 4) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); + } + if(FramePtrOffsetInBlock) { - CFAOffset -= FramePtrOffsetInBlock; unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( - nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + nullptr, MRI->getDwarfRegNum(FramePtr, true), (CFAOffset - FramePtrOffsetInBlock))); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } @@ -326,45 +347,69 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, AFI->setShouldRestoreSPFromFP(true); } - // Skip past the spilling of r8-r11, which could consist of multiple tPUSH - // and tMOVr instructions. We don't need to add any call frame information - // in-between these instructions, because they do not modify the high - // registers. - while (true) { - MachineBasicBlock::iterator OldMBBI = MBBI; - // Skip a run of tMOVr instructions - while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr) - MBBI++; - if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { - MBBI++; - } else { - // We have reached an instruction which is not a push, so the previous - // run of tMOVr instructions (which may have been empty) was not part of - // the prologue. Reset MBBI back to the last PUSH of the prologue. - MBBI = OldMBBI; - break; + // Emit call frame information for the callee-saved low registers. + if (GPRCS1Size > 0) { + MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); + if (adjustedGPRCS1Size) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.splitFramePushPop(MF)) + break; + LLVM_FALLTHROUGH; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + break; + } } } // Emit call frame information for the callee-saved high registers. - for (auto &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: { - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - break; - } - default: - break; + if (GPRCS2Size > 0) { + MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); + for (auto &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: { + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + break; + } + default: + break; + } } } @@ -453,21 +498,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs); } -static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) { - if (MI.getOpcode() == ARM::tLDRspi && MI.getOperand(1).isFI() && - isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs)) - return true; - else if (MI.getOpcode() == ARM::tPOP) { - return true; - } else if (MI.getOpcode() == ARM::tMOVr) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) && - ARM::hGPRRegClass.contains(Dst)); - } - return false; -} - void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); @@ -483,26 +513,26 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, int NumBytes = (int)MFI.getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); Register FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes - ArgRegsSaveSize, ARM::NoRegister, - MachineInstr::NoFlags); + MachineInstr::FrameDestroy); } else { // Unwind MBBI to point to first LDR / VLDRD. if (MBBI != MBB.begin()) { do --MBBI; - while (MBBI != MBB.begin() && isCSRestore(*MBBI, CSRegs)); - if (!isCSRestore(*MBBI, CSRegs)) + while (MBBI != MBB.begin() && MBBI->getFlag(MachineInstr::FrameDestroy)); + if (!MBBI->getFlag(MachineInstr::FrameDestroy)) ++MBBI; } // Move SP to start of FP callee save spill area. - NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + + NumBytes -= (AFI->getFrameRecordSavedAreaSize() + + AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize() + ArgRegsSaveSize); @@ -516,14 +546,16 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, assert(!MFI.getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, - TII, *RegInfo); + TII, *RegInfo, MachineInstr::FrameDestroy); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(ARM::R4) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); } else BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(FramePtr) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); } else { // For a large stack frame, we might need a scratch register to store // the size of the frame. We know all callee-save registers are free @@ -542,10 +574,10 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock::iterator PMBBI = std::prev(MBBI); if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes)) emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes, - ScratchRegister, MachineInstr::NoFlags); + ScratchRegister, MachineInstr::FrameDestroy); } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes, - ScratchRegister, MachineInstr::NoFlags); + ScratchRegister, MachineInstr::FrameDestroy); } } @@ -637,7 +669,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, return true; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); // Copy implicit ops and popped registers, if any. for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef())) @@ -725,18 +758,20 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, .addReg(PopReg, RegState::Define) .addReg(ARM::SP) .addImm(MBBI->getNumExplicitOperands() - 2) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); // Move from the temporary register to the LR. BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(ARM::LR, RegState::Define) .addReg(PopReg, RegState::Kill) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); // Advance past the pop instruction. MBBI++; // Increment the SP. emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize + 4, ARM::NoRegister, - MachineInstr::NoFlags); + MachineInstr::FrameDestroy); return true; } @@ -746,7 +781,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(TemporaryReg, RegState::Define) .addReg(PopReg, RegState::Kill) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { @@ -754,7 +790,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // perform the opposite conversion: tPOP_RET to tPOP. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); bool Popped = false; for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && @@ -769,90 +806,82 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // Erase the old instruction. MBB.erase(MBBI); MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); } assert(PopReg && "Do not know how to get LR"); BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)) .add(predOps(ARMCC::AL)) - .addReg(PopReg, RegState::Define); + .addReg(PopReg, RegState::Define) + .setMIFlag(MachineInstr::FrameDestroy); emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize, - ARM::NoRegister, MachineInstr::NoFlags); + ARM::NoRegister, MachineInstr::FrameDestroy); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(ARM::LR, RegState::Define) .addReg(PopReg, RegState::Kill) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); if (TemporaryReg) BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(PopReg, RegState::Define) .addReg(TemporaryReg, RegState::Kill) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); return true; } -using ARMRegSet = std::bitset; - -// Return the first iteraror after CurrentReg which is present in EnabledRegs, -// or OrderEnd if no further registers are in that set. This does not advance -// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs. -static const unsigned *findNextOrderedReg(const unsigned *CurrentReg, - const ARMRegSet &EnabledRegs, - const unsigned *OrderEnd) { - while (CurrentReg != OrderEnd && !EnabledRegs[*CurrentReg]) - ++CurrentReg; - return CurrentReg; -} - -bool Thumb1FrameLowering::spillCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - ArrayRef CSI, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL; - const TargetInstrInfo &TII = *STI.getInstrInfo(); - MachineFunction &MF = *MBB.getParent(); - const ARMBaseRegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); - - ARMRegSet LoRegsToSave; // r0-r7, lr - ARMRegSet HiRegsToSave; // r8-r11 - ARMRegSet CopyRegs; // Registers which can be used after pushing - // LoRegs for saving HiRegs. - - for (const CalleeSavedInfo &I : llvm::reverse(CSI)) { - Register Reg = I.getReg(); - +static const SmallVector OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6, + ARM::R7, ARM::LR}; +static const SmallVector OrderedHighRegs = {ARM::R8, ARM::R9, + ARM::R10, ARM::R11}; +static const SmallVector OrderedCopyRegs = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, + ARM::R5, ARM::R6, ARM::R7, ARM::LR}; + +static void splitLowAndHighRegs(const std::set &Regs, + std::set &LowRegs, + std::set &HighRegs) { + for (Register Reg : Regs) { if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LoRegsToSave[Reg] = true; + LowRegs.insert(Reg); } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HiRegsToSave[Reg] = true; + HighRegs.insert(Reg); } else { llvm_unreachable("callee-saved register of unexpected class"); } - - if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && - !MF.getRegInfo().isLiveIn(Reg) && - !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) - CopyRegs[Reg] = true; } +} - // Unused argument registers can be used for the high register saving. - for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) - if (!MF.getRegInfo().isLiveIn(ArgReg)) - CopyRegs[ArgReg] = true; +template +It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt, + const std::set &RegSet) { + return std::find_if(OrderedStartIt, OrderedEndIt, + [&](Register Reg) { return RegSet.count(Reg); }); +} - // Push the low registers and lr +static void pushRegsToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const TargetInstrInfo &TII, + const std::set &RegsToSave, + const std::set &CopyRegs) { + MachineFunction &MF = *MBB.getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (!LoRegsToSave.none()) { + DebugLoc DL; + + std::set LowRegs, HighRegs; + splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs); + + // Push low regs first + if (!LowRegs.empty()) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); - for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { - if (LoRegsToSave[Reg]) { + for (unsigned Reg : OrderedLowRegs) { + if (LowRegs.count(Reg)) { bool isKill = !MRI.isLiveIn(Reg); if (isKill && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); @@ -863,31 +892,26 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters( MIB.setMIFlags(MachineInstr::FrameSetup); } - // Push the high registers. There are no store instructions that can access - // these registers directly, so we have to move them to low registers, and - // push them. This might take multiple pushes, as it is possible for there to + // Now push the high registers + // There are no store instructions that can access high registers directly, + // so we have to move them to low registers, and push them. + // This might take multiple pushes, as it is possible for there to // be fewer low registers available than high registers which need saving. - // These are in reverse order so that in the case where we need to use + // Find the first register to save. + // Registers must be processed in reverse order so that in case we need to use // multiple PUSH instructions, the order of the registers on the stack still // matches the unwind info. They need to be swicthed back to ascending order // before adding to the PUSH instruction. - static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6, - ARM::R5, ARM::R4, ARM::R3, - ARM::R2, ARM::R1, ARM::R0}; - static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8}; + auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(), + OrderedHighRegs.rend(), + HighRegs); - const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); - const unsigned *AllHighRegsEnd = std::end(AllHighRegs); - - // Find the first register to save. - const unsigned *HiRegToSave = findNextOrderedReg( - std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd); - - while (HiRegToSave != AllHighRegsEnd) { + while (HiRegToSave != OrderedHighRegs.rend()) { // Find the first low register to use. - const unsigned *CopyReg = - findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); + auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(), + OrderedCopyRegs.rend(), + CopyRegs); // Create the PUSH, but don't insert it yet (the MOVs need to come first). MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)) @@ -895,25 +919,29 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters( .setMIFlags(MachineInstr::FrameSetup); SmallVector RegsToPush; - while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { - if (HiRegsToSave[*HiRegToSave]) { + while (HiRegToSave != OrderedHighRegs.rend() && + CopyRegIt != OrderedCopyRegs.rend()) { + if (HighRegs.count(*HiRegToSave)) { bool isKill = !MRI.isLiveIn(*HiRegToSave); if (isKill && !MRI.isReserved(*HiRegToSave)) MBB.addLiveIn(*HiRegToSave); // Emit a MOV from the high reg to the low reg. BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) - .addReg(*CopyReg, RegState::Define) + .addReg(*CopyRegIt, RegState::Define) .addReg(*HiRegToSave, getKillRegState(isKill)) .add(predOps(ARMCC::AL)) .setMIFlags(MachineInstr::FrameSetup); // Record the register that must be added to the PUSH. - RegsToPush.push_back(*CopyReg); - - CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); - HiRegToSave = - findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd); + RegsToPush.push_back(*CopyRegIt); + + CopyRegIt = getNextOrderedReg(std::next(CopyRegIt), + OrderedCopyRegs.rend(), + CopyRegs); + HiRegToSave = getNextOrderedReg(std::next(HiRegToSave), + OrderedHighRegs.rend(), + HighRegs); } } @@ -924,84 +952,63 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters( // Insert the PUSH instruction after the MOVs. MBB.insert(MI, PushMIB); } - - return true; } -bool Thumb1FrameLowering::restoreCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; +static void popRegsFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MI, + const TargetInstrInfo &TII, + const std::set &RegsToRestore, + const std::set &AvailableCopyRegs, + bool IsVarArg, bool HasV5Ops) { + if (RegsToRestore.empty()) + return; MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo(); - const TargetInstrInfo &TII = *STI.getInstrInfo(); - const ARMBaseRegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); - - bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); - ARMRegSet LoRegsToRestore; - ARMRegSet HiRegsToRestore; - // Low registers (r0-r7) which can be used to restore the high registers. - ARMRegSet CopyRegs; + std::set LowRegs, HighRegs; + splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs); - for (CalleeSavedInfo I : CSI) { - Register Reg = I.getReg(); - - if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LoRegsToRestore[Reg] = true; - } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HiRegsToRestore[Reg] = true; - } else { - llvm_unreachable("callee-saved register of unexpected class"); - } - - // If this is a low register not used as the frame pointer, we may want to - // use it for restoring the high registers. - if ((ARM::tGPRRegClass.contains(Reg)) && - !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) - CopyRegs[Reg] = true; - } - - // If this is a return block, we may be able to use some unused return value - // registers for restoring the high regs. - auto Terminator = MBB.getFirstTerminator(); - if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { - CopyRegs[ARM::R0] = true; - CopyRegs[ARM::R1] = true; - CopyRegs[ARM::R2] = true; - CopyRegs[ARM::R3] = true; - for (auto Op : Terminator->implicit_operands()) { - if (Op.isReg()) - CopyRegs[Op.getReg()] = false; - } - } - - static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7}; - static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11}; - - const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); - const unsigned *AllHighRegsEnd = std::end(AllHighRegs); + // Pop the high registers first + // There are no store instructions that can access high registers directly, + // so we have to pop into low registers and them move to the high registers. + // This might take multiple pops, as it is possible for there to + // be fewer low registers available than high registers which need restoring. // Find the first register to restore. - auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs), - HiRegsToRestore, AllHighRegsEnd); + auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(), + OrderedHighRegs.end(), + HighRegs); + + std::set CopyRegs = AvailableCopyRegs; + Register LowScratchReg; + if (!HighRegs.empty() && CopyRegs.empty()) { + // No copy regs are available to pop high regs. Let's make use of a return + // register and the scratch register (IP/R12) to copy things around. + LowScratchReg = ARM::R0; + BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) + .addReg(ARM::R12, RegState::Define) + .addReg(LowScratchReg, RegState::Kill) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); + CopyRegs.insert(LowScratchReg); + } - while (HiRegToRestore != AllHighRegsEnd) { - assert(!CopyRegs.none()); + while (HiRegToRestore != OrderedHighRegs.end()) { + assert(!CopyRegs.empty()); // Find the first low register to use. - auto CopyReg = - findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); + auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(), + OrderedCopyRegs.end(), + CopyRegs); // Create the POP instruction. - MachineInstrBuilder PopMIB = - BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); + MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); - while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { + while (HiRegToRestore != OrderedHighRegs.end() && + CopyReg != OrderedCopyRegs.end()) { // Add the low register to the POP. PopMIB.addReg(*CopyReg, RegState::Define); @@ -1009,64 +1016,189 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters( BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) .addReg(*HiRegToRestore, RegState::Define) .addReg(*CopyReg, RegState::Kill) - .add(predOps(ARMCC::AL)); - - CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); - HiRegToRestore = - findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); + + CopyReg = getNextOrderedReg(std::next(CopyReg), + OrderedCopyRegs.end(), + CopyRegs); + HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore), + OrderedHighRegs.end(), + HighRegs); } } - MachineInstrBuilder MIB = - BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); - - bool NeedsPop = false; - for (CalleeSavedInfo &Info : llvm::reverse(CSI)) { - Register Reg = Info.getReg(); - - // High registers (excluding lr) have already been dealt with - if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR)) - continue; - - if (Reg == ARM::LR) { - Info.setRestored(false); - if (!MBB.succ_empty() || - MI->getOpcode() == ARM::TCRETURNdi || - MI->getOpcode() == ARM::TCRETURNri) - // LR may only be popped into PC, as part of return sequence. - // If this isn't the return sequence, we'll need emitPopSpecialFixUp - // to restore LR the hard way. - // FIXME: if we don't pass any stack arguments it would be actually - // advantageous *and* correct to do the conversion to an ordinary call - // instruction here. - continue; - // Special epilogue for vararg functions. See emitEpilogue - if (isVarArg) - continue; - // ARMv4T requires BX, see emitEpilogue - if (!STI.hasV5TOps()) - continue; + // Restore low register used as scratch if necessary + if (LowScratchReg.isValid()) { + BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) + .addReg(LowScratchReg, RegState::Define) + .addReg(ARM::R12, RegState::Kill) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); + } - // CMSE entry functions must return via BXNS, see emitEpilogue. - if (AFI->isCmseNSEntryFunction()) + // Now pop the low registers + if (!LowRegs.empty()) { + MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); + + bool NeedsPop = false; + for (Register Reg : OrderedLowRegs) { + if (!LowRegs.count(Reg)) continue; - // Pop LR into PC. - Reg = ARM::PC; - (*MIB).setDesc(TII.get(ARM::tPOP_RET)); - if (MI != MBB.end()) - MIB.copyImplicitOps(*MI); - MI = MBB.erase(MI); + if (Reg == ARM::LR) { + if (!MBB.succ_empty() || + MI->getOpcode() == ARM::TCRETURNdi || + MI->getOpcode() == ARM::TCRETURNri) + // LR may only be popped into PC, as part of return sequence. + // If this isn't the return sequence, we'll need emitPopSpecialFixUp + // to restore LR the hard way. + // FIXME: if we don't pass any stack arguments it would be actually + // advantageous *and* correct to do the conversion to an ordinary call + // instruction here. + continue; + // Special epilogue for vararg functions. See emitEpilogue + if (IsVarArg) + continue; + // ARMv4T requires BX, see emitEpilogue + if (!HasV5Ops) + continue; + + // CMSE entry functions must return via BXNS, see emitEpilogue. + if (AFI->isCmseNSEntryFunction()) + continue; + + // Pop LR into PC. + Reg = ARM::PC; + (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + if (MI != MBB.end()) + MIB.copyImplicitOps(*MI); + MI = MBB.erase(MI); + } + MIB.addReg(Reg, getDefRegState(true)); + NeedsPop = true; } - MIB.addReg(Reg, getDefRegState(true)); - NeedsPop = true; + + // It's illegal to emit pop instruction without operands. + if (NeedsPop) + MBB.insert(MI, &*MIB); + else + MF.deleteMachineInstr(MIB); + } +} + +bool Thumb1FrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + ArrayRef CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + const TargetInstrInfo &TII = *STI.getInstrInfo(); + MachineFunction &MF = *MBB.getParent(); + const ARMBaseRegisterInfo *RegInfo = static_cast( + MF.getSubtarget().getRegisterInfo()); + Register FPReg = RegInfo->getFrameRegister(MF); + + // In case FP is a high reg, we need a separate push sequence to generate + // a correct Frame Record + bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); + + std::set FrameRecord; + std::set SpilledGPRs; + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR)) + FrameRecord.insert(Reg); + else + SpilledGPRs.insert(Reg); } - // It's illegal to emit pop instruction without operands. - if (NeedsPop) - MBB.insert(MI, &*MIB); - else - MF.deleteMachineInstr(MIB); + pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR}); + + // Determine intermediate registers which can be used for pushing high regs: + // - Spilled low regs + // - Unused argument registers + std::set CopyRegs; + for (Register Reg : SpilledGPRs) + if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && + !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg)) + CopyRegs.insert(Reg); + for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) + if (!MF.getRegInfo().isLiveIn(ArgReg)) + CopyRegs.insert(ArgReg); + + pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs); + + return true; +} + +bool Thumb1FrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + const ARMBaseRegisterInfo *RegInfo = static_cast( + MF.getSubtarget().getRegisterInfo()); + bool IsVarArg = AFI->getArgRegsSaveSize() > 0; + Register FPReg = RegInfo->getFrameRegister(MF); + + // In case FP is a high reg, we need a separate pop sequence to generate + // a correct Frame Record + bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); + + std::set FrameRecord; + std::set SpilledGPRs; + for (CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR)) + FrameRecord.insert(Reg); + else + SpilledGPRs.insert(Reg); + + if (Reg == ARM::LR) + I.setRestored(false); + } + + // Determine intermidiate registers which can be used for popping high regs: + // - Spilled low regs + // - Unused return registers + std::set CopyRegs; + std::set UnusedReturnRegs; + for (Register Reg : SpilledGPRs) + if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg)) + CopyRegs.insert(Reg); + auto Terminator = MBB.getFirstTerminator(); + if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { + UnusedReturnRegs.insert(ARM::R0); + UnusedReturnRegs.insert(ARM::R1); + UnusedReturnRegs.insert(ARM::R2); + UnusedReturnRegs.insert(ARM::R3); + for (auto Op : Terminator->implicit_operands()) { + if (Op.isReg()) + UnusedReturnRegs.erase(Op.getReg()); + } + } + CopyRegs.insert(UnusedReturnRegs.begin(), UnusedReturnRegs.end()); + + // First pop regular spilled regs. + popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg, + STI.hasV5TOps()); + + // LR may only be popped into pc, as part of a return sequence. + // Check that no other pop instructions are inserted after that. + assert((!SpilledGPRs.count(ARM::LR) || FrameRecord.empty()) && + "Can't insert pop after return sequence"); + + // Now pop Frame Record regs. + // Only unused return registers can be used as copy regs at this point. + popRegsFromStack(MBB, MI, TII, FrameRecord, UnusedReturnRegs, IsVarArg, + STI.hasV5TOps()); return true; } diff --git a/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp index 5cdaa7f02201..155555152ced 100644 --- a/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -226,9 +226,10 @@ bool Thumb2ITBlock::InsertITInstructions(MachineBasicBlock &MBB) { ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; - // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it + // IT blocks are limited to one conditional op if -arm-restrict-it // is set: skip the loop if (!restrictIT) { + LLVM_DEBUG(dbgs() << "Allowing complex IT block\n";); // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && @@ -283,8 +284,7 @@ bool Thumb2ITBlock::InsertITInstructions(MachineBasicBlock &MBB) { } bool Thumb2ITBlock::runOnMachineFunction(MachineFunction &Fn) { - const ARMSubtarget &STI = - static_cast(Fn.getSubtarget()); + const ARMSubtarget &STI = Fn.getSubtarget(); if (!STI.isThumb2()) return false; AFI = Fn.getInfo(); diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index ebd139af2219..60dbc7b92013 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -555,7 +555,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, MI.setDesc(TII.get(ARM::tMOVr)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); // Remove offset and remaining explicit predicate operands. - do MI.RemoveOperand(FrameRegIdx+1); + do MI.removeOperand(FrameRegIdx+1); while (MI.getNumOperands() > FrameRegIdx+1); MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI); MIB.add(predOps(ARMCC::AL)); @@ -592,7 +592,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); // Remove the cc_out operand. if (HasCCOut) - MI.RemoveOperand(MI.getNumOperands()-1); + MI.removeOperand(MI.getNumOperands()-1); Offset = 0; return true; } @@ -626,7 +626,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return Offset == 0; } - MI.RemoveOperand(FrameRegIdx+1); + MI.removeOperand(FrameRegIdx+1); MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0); NewOpc = immediateOffsetOpcode(Opcode); AddrMode = ARMII::AddrModeT2_i12; diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index 1cc5422523f1..7ae4b19afb60 100644 --- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" @@ -205,11 +206,11 @@ namespace { bool IsSelfLoop); /// ReduceMI - Attempt to reduce MI, return true on success. - bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, - bool LiveCPSR, bool IsSelfLoop); + bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, bool LiveCPSR, + bool IsSelfLoop, bool SkipPrologueEpilogue); /// ReduceMBB - Reduce width of instructions in the specified basic block. - bool ReduceMBB(MachineBasicBlock &MBB); + bool ReduceMBB(MachineBasicBlock &MBB, bool SkipPrologueEpilogue); bool OptimizeSize; bool MinimizeSize; @@ -620,7 +621,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); - LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI + LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); @@ -668,7 +669,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); - LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI + LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); @@ -848,7 +849,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); - LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI + LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); @@ -971,7 +972,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); - LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI + LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); @@ -1012,11 +1013,15 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { } bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, - bool LiveCPSR, bool IsSelfLoop) { + bool LiveCPSR, bool IsSelfLoop, + bool SkipPrologueEpilogue) { unsigned Opcode = MI->getOpcode(); DenseMap::iterator OPI = ReduceOpcodeMap.find(Opcode); if (OPI == ReduceOpcodeMap.end()) return false; + if (SkipPrologueEpilogue && (MI->getFlag(MachineInstr::FrameSetup) || + MI->getFlag(MachineInstr::FrameDestroy))) + return false; const ReduceEntry &Entry = ReduceTable[OPI->second]; // Don't attempt normal reductions on "special" cases for now. @@ -1036,7 +1041,8 @@ bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, return false; } -bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { +bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB, + bool SkipPrologueEpilogue) { bool Modified = false; // Yes, CPSR could be livein. @@ -1080,7 +1086,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Does NextMII belong to the same bundle as MI? bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); - if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) { + if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) { Modified = true; MachineBasicBlock::instr_iterator I = std::prev(NextMII); MI = &*I; @@ -1130,7 +1136,7 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { if (PredicateFtor && !PredicateFtor(MF.getFunction())) return false; - STI = &static_cast(MF.getSubtarget()); + STI = &MF.getSubtarget(); if (STI->isThumb1Only() || STI->prefers32BitThumb()) return false; @@ -1147,8 +1153,10 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { // predecessors. ReversePostOrderTraversal RPOT(&MF); bool Modified = false; + bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + MF.getFunction().needsUnwindTableEntry(); for (MachineBasicBlock *MBB : RPOT) - Modified |= ReduceMBB(*MBB); + Modified |= ReduceMBB(*MBB, /*SkipPrologueEpilogue=*/NeedsWinCFI); return Modified; } diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp index 5d2bc4ebe191..2a3fa3b31512 100644 --- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -37,7 +37,7 @@ extern cl::opt ReuseFrameIndexVals; using namespace llvm; -ThumbRegisterInfo::ThumbRegisterInfo() {} +ThumbRegisterInfo::ThumbRegisterInfo() = default; const TargetRegisterClass * ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, @@ -338,7 +338,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, static void removeOperands(MachineInstr &MI, unsigned i) { unsigned Op = i; for (unsigned e = MI.getNumOperands(); i != e; ++i) - MI.RemoveOperand(Op); + MI.removeOperand(Op); } /// convertToNonSPOpcode - Change the opcode to the non-SP version, because @@ -361,6 +361,7 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II, const ARMBaseInstrInfo &TII) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); assert(MBB.getParent()->getSubtarget().isThumb1Only() && "This isn't needed for thumb2!"); DebugLoc dl = MI.getDebugLoc(); @@ -396,7 +397,18 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II, if ((unsigned)Offset <= Mask * Scale) { // Replace the FrameIndex with the frame register (e.g., sp). - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + Register DestReg = FrameReg; + + // In case FrameReg is a high register, move it to a low reg to ensure it + // can be used as an operand. + if (ARM::hGPRRegClass.contains(FrameReg) && FrameReg != ARM::SP) { + DestReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); + BuildMI(MBB, II, dl, TII.get(ARM::tMOVr), DestReg) + .addReg(FrameReg) + .add(predOps(ARMCC::AL)); + } + + MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false); ImmOp.ChangeToImmediate(ImmedOffset); // If we're using a register where sp was stored, convert the instruction @@ -517,7 +529,16 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset, false, TII, *this); else { emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); - UseRR = true; + if (!ARM::hGPRRegClass.contains(FrameReg)) { + UseRR = true; + } else { + // If FrameReg is a high register, add the reg values in a separate + // instruction as the load won't be able to access it. + BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), TmpReg) + .addReg(TmpReg) + .addReg(FrameReg) + .add(predOps(ARMCC::AL)); + } } } else { emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII, @@ -526,11 +547,14 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi)); MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true); - if (UseRR) + if (UseRR) { + assert(!ARM::hGPRRegClass.contains(FrameReg) && + "Thumb1 loads can't use high register"); // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, false); + } } else if (MI.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); bool UseRR = false; @@ -541,18 +565,30 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset, false, TII, *this); else { emitLoadConstPool(MBB, II, dl, VReg, 0, Offset); - UseRR = true; + if (!ARM::hGPRRegClass.contains(FrameReg)) { + UseRR = true; + } else { + // If FrameReg is a high register, add the reg values in a separate + // instruction as the load won't be able to access it. + BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), VReg) + .addReg(VReg) + .addReg(FrameReg) + .add(predOps(ARMCC::AL)); + } } } else emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII, *this); MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi)); MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true); - if (UseRR) + if (UseRR) { + assert(!ARM::hGPRRegClass.contains(FrameReg) && + "Thumb1 stores can't use high register"); // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, false); + } } else { llvm_unreachable("Unexpected opcode!"); } diff --git a/llvm/lib/Target/AVR/AVR.h b/llvm/lib/Target/AVR/AVR.h index 0b512172ba10..d29dc5f70e72 100644 --- a/llvm/lib/Target/AVR/AVR.h +++ b/llvm/lib/Target/AVR/AVR.h @@ -15,6 +15,8 @@ #define LLVM_AVR_H #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -27,12 +29,10 @@ FunctionPass *createAVRISelDag(AVRTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createAVRExpandPseudoPass(); FunctionPass *createAVRFrameAnalyzerPass(); -FunctionPass *createAVRRelaxMemPass(); FunctionPass *createAVRBranchSelectionPass(); void initializeAVRShiftExpandPass(PassRegistry &); void initializeAVRExpandPseudoPass(PassRegistry &); -void initializeAVRRelaxMemPass(PassRegistry &); /// Contains the AVR backend. namespace AVR { diff --git a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp index 259ab1bc7aec..0001e520b1fb 100644 --- a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp +++ b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp @@ -14,6 +14,7 @@ #include "AVR.h" #include "AVRMCInstLower.h" #include "AVRSubtarget.h" +#include "AVRTargetMachine.h" #include "MCTargetDesc/AVRInstPrinter.h" #include "MCTargetDesc/AVRMCExpr.h" #include "TargetInfo/AVRTargetInfo.h" @@ -21,6 +22,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Mangler.h" @@ -60,6 +62,8 @@ public: bool doFinalization(Module &M) override; + void emitStartOfAsmFile(Module &M) override; + private: const MCRegisterInfo &MRI; bool EmittedStructorSymbolAttrs = false; @@ -236,6 +240,45 @@ bool AVRAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } +void AVRAsmPrinter::emitStartOfAsmFile(Module &M) { + const AVRTargetMachine &TM = (const AVRTargetMachine &)MMI->getTarget(); + const AVRSubtarget *SubTM = (const AVRSubtarget *)TM.getSubtargetImpl(); + if (!SubTM) + return; + + // Emit __tmp_reg__. + OutStreamer->emitAssignment( + MMI->getContext().getOrCreateSymbol(StringRef("__tmp_reg__")), + MCConstantExpr::create(SubTM->getRegTmpIndex(), MMI->getContext())); + // Emit __zero_reg__. + OutStreamer->emitAssignment( + MMI->getContext().getOrCreateSymbol(StringRef("__zero_reg__")), + MCConstantExpr::create(SubTM->getRegZeroIndex(), MMI->getContext())); + // Emit __SREG__. + OutStreamer->emitAssignment( + MMI->getContext().getOrCreateSymbol(StringRef("__SREG__")), + MCConstantExpr::create(SubTM->getIORegSREG(), MMI->getContext())); + // Emit __SP_H__ if available. + if (!SubTM->hasSmallStack()) + OutStreamer->emitAssignment( + MMI->getContext().getOrCreateSymbol(StringRef("__SP_H__")), + MCConstantExpr::create(SubTM->getIORegSPH(), MMI->getContext())); + // Emit __SP_L__. + OutStreamer->emitAssignment( + MMI->getContext().getOrCreateSymbol(StringRef("__SP_L__")), + MCConstantExpr::create(SubTM->getIORegSPL(), MMI->getContext())); + // Emit __EIND__ if available. + if (SubTM->hasEIJMPCALL()) + OutStreamer->emitAssignment( + MMI->getContext().getOrCreateSymbol(StringRef("__EIND__")), + MCConstantExpr::create(SubTM->getIORegEIND(), MMI->getContext())); + // Emit __RAMPZ__ if available. + if (SubTM->hasELPM()) + OutStreamer->emitAssignment( + MMI->getContext().getOrCreateSymbol(StringRef("__RAMPZ__")), + MCConstantExpr::create(SubTM->getIORegRAMPZ(), MMI->getContext())); +} + } // end of namespace llvm extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRAsmPrinter() { diff --git a/llvm/lib/Target/AVR/AVRCallingConv.td b/llvm/lib/Target/AVR/AVRCallingConv.td index b4bc35e191c0..314d59bc2a59 100644 --- a/llvm/lib/Target/AVR/AVRCallingConv.td +++ b/llvm/lib/Target/AVR/AVRCallingConv.td @@ -27,6 +27,8 @@ def RetCC_AVR_BUILTIN : CallingConv<[ // Calling convention for variadic functions. def ArgCC_AVR_Vararg : CallingConv<[ + // i8 are always passed through the stack with a byte slot and byte alignment. + CCIfType<[i8], CCAssignToStack<1, 1>>, // i16 are always passed through the stack with an alignment of 1. CCAssignToStack<2, 1> ]>; @@ -36,4 +38,6 @@ def ArgCC_AVR_Vararg : CallingConv<[ //===----------------------------------------------------------------------===// def CSR_Normal : CalleeSavedRegs<(add R29, R28, (sequence "R%u", 17, 2))>; +def CSR_NormalTiny : CalleeSavedRegs<(add R29, R28, R19, R18)>; def CSR_Interrupts : CalleeSavedRegs<(add(sequence "R%u", 31, 2))>; +def CSR_InterruptsTiny : CalleeSavedRegs<(add(sequence "R%u", 31, 18))>; diff --git a/llvm/lib/Target/AVR/AVRDevices.td b/llvm/lib/Target/AVR/AVRDevices.td index 7ad0fe904a81..3eb5a16204e7 100644 --- a/llvm/lib/Target/AVR/AVRDevices.td +++ b/llvm/lib/Target/AVR/AVRDevices.td @@ -174,15 +174,13 @@ def FamilyAVR35 : Family<"avr35", [FamilyAVR3, FeatureMOVW, FeatureLPMX, FeatureSPM, FeatureBREAK]>; -def FamilyAVR4 : Family<"avr4", [ - FamilyAVR2, FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM, - FeatureBREAK -]>; +def FamilyAVR4 : Family<"avr4", + [FamilyAVR2, FeatureMultiplication, FeatureMOVW, + FeatureLPMX, FeatureSPM, FeatureBREAK]>; -def FamilyAVR5 : Family<"avr5", [ - FamilyAVR3, FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM, - FeatureBREAK -]>; +def FamilyAVR5 : Family<"avr5", + [FamilyAVR3, FeatureMultiplication, FeatureMOVW, + FeatureLPMX, FeatureSPM, FeatureBREAK]>; def FamilyAVR51 : Family<"avr51", [FamilyAVR5, FeatureELPM, FeatureELPMX]>; @@ -190,14 +188,21 @@ def FamilyAVR6 : Family<"avr6", [FamilyAVR51]>; def FamilyTiny : Family<"avrtiny", - [FamilyAVR0, FeatureBREAK, FeatureSRAM, FeatureTinyEncoding]>; - -def FamilyXMEGA : Family<"xmega", [ - FamilyAVR0, FeatureLPM, FeatureIJMPCALL, FeatureADDSUBIW, FeatureSRAM, - FeatureJMPCALL, FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM, - FeatureBREAK, FeatureEIJMPCALL, FeatureSPMX, FeatureDES, FeatureELPM, - FeatureELPMX -]>; + [FamilyAVR0, FeatureBREAK, FeatureSRAM, FeatureTinyEncoding, + FeatureSmallStack]>; + +def FamilyXMEGA3 : Family<"xmega3", + [FamilyAVR0, FeatureLPM, FeatureIJMPCALL, + FeatureADDSUBIW, FeatureSRAM, FeatureJMPCALL, + FeatureMultiplication, FeatureMOVW, FeatureLPMX, + FeatureBREAK]>; + +def FamilyXMEGA : Family<"xmega", + [FamilyAVR0, FeatureLPM, FeatureIJMPCALL, + FeatureADDSUBIW, FeatureSRAM, FeatureJMPCALL, + FeatureMultiplication, FeatureMOVW, FeatureLPMX, + FeatureSPM, FeatureBREAK, FeatureEIJMPCALL, + FeatureSPMX, FeatureDES, FeatureELPM, FeatureELPMX]>; def FamilyXMEGAU : Family<"xmegau", [FamilyXMEGA, FeatureRMW]>; @@ -237,7 +242,7 @@ def : Device<"avr51", FamilyAVR51, ELFArchAVR51>; def : Device<"avr6", FamilyAVR6, ELFArchAVR6>; def : Device<"avrxmega1", FamilyXMEGA, ELFArchXMEGA1>; def : Device<"avrxmega2", FamilyXMEGA, ELFArchXMEGA2>; -def : Device<"avrxmega3", FamilyXMEGA, ELFArchXMEGA3>; +def : Device<"avrxmega3", FamilyXMEGA3, ELFArchXMEGA3>; def : Device<"avrxmega4", FamilyXMEGA, ELFArchXMEGA4>; def : Device<"avrxmega5", FamilyXMEGA, ELFArchXMEGA5>; def : Device<"avrxmega6", FamilyXMEGA, ELFArchXMEGA6>; @@ -245,41 +250,44 @@ def : Device<"avrxmega7", FamilyXMEGA, ELFArchXMEGA7>; def : Device<"avrtiny", FamilyTiny, ELFArchTiny>; // Specific MCUs -def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1>; -def : Device<"attiny11", FamilyAVR1, ELFArchAVR1>; -def : Device<"attiny12", FamilyAVR1, ELFArchAVR1>; -def : Device<"attiny15", FamilyAVR1, ELFArchAVR1>; -def : Device<"attiny28", FamilyAVR1, ELFArchAVR1>; -def : Device<"at90s2313", FamilyAVR2, ELFArchAVR2>; -def : Device<"at90s2323", FamilyAVR2, ELFArchAVR2>; -def : Device<"at90s2333", FamilyAVR2, ELFArchAVR2>; -def : Device<"at90s2343", FamilyAVR2, ELFArchAVR2>; -def : Device<"attiny22", FamilyAVR2, ELFArchAVR2>; -def : Device<"attiny26", FamilyAVR2, ELFArchAVR2, [FeatureLPMX]>; +// NOTE: This list has been synchronized with gcc-avr 5.4.0 and avr-libc 2.0.0. +def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1, [FeatureSmallStack]>; +def : Device<"attiny11", FamilyAVR1, ELFArchAVR1, [FeatureSmallStack]>; +def : Device<"attiny12", FamilyAVR1, ELFArchAVR1, [FeatureSmallStack]>; +def : Device<"attiny15", FamilyAVR1, ELFArchAVR1, [FeatureSmallStack]>; +def : Device<"attiny28", FamilyAVR1, ELFArchAVR1, [FeatureSmallStack]>; +def : Device<"at90s2313", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>; +def : Device<"at90s2323", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>; +def : Device<"at90s2333", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>; +def : Device<"at90s2343", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>; +def : Device<"attiny22", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>; +def : Device<"attiny26", FamilyAVR2, ELFArchAVR2, + [FeatureLPMX, FeatureSmallStack]>; def : Device<"at86rf401", FamilyAVR2, ELFArchAVR25, [FeatureMOVW, FeatureLPMX]>; -def : Device<"at90s4414", FamilyAVR2, ELFArchAVR2>; -def : Device<"at90s4433", FamilyAVR2, ELFArchAVR2>; -def : Device<"at90s4434", FamilyAVR2, ELFArchAVR2>; +def : Device<"at90s4414", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>; +def : Device<"at90s4433", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>; +def : Device<"at90s4434", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>; def : Device<"at90s8515", FamilyAVR2, ELFArchAVR2>; def : Device<"at90c8534", FamilyAVR2, ELFArchAVR2>; def : Device<"at90s8535", FamilyAVR2, ELFArchAVR2>; def : Device<"ata5272", FamilyAVR25, ELFArchAVR25>; -def : Device<"attiny13", FamilyAVR25, ELFArchAVR25>; -def : Device<"attiny13a", FamilyAVR25, ELFArchAVR25>; -def : Device<"attiny2313", FamilyAVR25, ELFArchAVR25>; -def : Device<"attiny2313a", FamilyAVR25, ELFArchAVR25>; -def : Device<"attiny24", FamilyAVR25, ELFArchAVR25>; -def : Device<"attiny24a", FamilyAVR25, ELFArchAVR25>; +def : Device<"ata6616c", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny13", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>; +def : Device<"attiny13a", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>; +def : Device<"attiny2313", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>; +def : Device<"attiny2313a", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>; +def : Device<"attiny24", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>; +def : Device<"attiny24a", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>; def : Device<"attiny4313", FamilyAVR25, ELFArchAVR25>; def : Device<"attiny44", FamilyAVR25, ELFArchAVR25>; def : Device<"attiny44a", FamilyAVR25, ELFArchAVR25>; def : Device<"attiny84", FamilyAVR25, ELFArchAVR25>; def : Device<"attiny84a", FamilyAVR25, ELFArchAVR25>; -def : Device<"attiny25", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny25", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>; def : Device<"attiny45", FamilyAVR25, ELFArchAVR25>; def : Device<"attiny85", FamilyAVR25, ELFArchAVR25>; -def : Device<"attiny261", FamilyAVR25, ELFArchAVR25>; -def : Device<"attiny261a", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny261", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>; +def : Device<"attiny261a", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>; def : Device<"attiny441", FamilyAVR25, ELFArchAVR25>; def : Device<"attiny461", FamilyAVR25, ELFArchAVR25>; def : Device<"attiny461a", FamilyAVR25, ELFArchAVR25>; @@ -299,6 +307,8 @@ def : Device<"attiny167", FamilyAVR35, ELFArchAVR35>; def : Device<"at90usb82", FamilyAVR35, ELFArchAVR35>; def : Device<"at90usb162", FamilyAVR35, ELFArchAVR35>; def : Device<"ata5505", FamilyAVR35, ELFArchAVR35>; +def : Device<"ata6617c", FamilyAVR35, ELFArchAVR35>; +def : Device<"ata664251", FamilyAVR35, ELFArchAVR35>; def : Device<"atmega8u2", FamilyAVR35, ELFArchAVR35>; def : Device<"atmega16u2", FamilyAVR35, ELFArchAVR35>; def : Device<"atmega32u2", FamilyAVR35, ELFArchAVR35>; @@ -310,6 +320,7 @@ def : Device<"atmega8a", FamilyAVR2, ELFArchAVR4, [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>; def : Device<"ata6285", FamilyAVR4, ELFArchAVR4>; def : Device<"ata6286", FamilyAVR4, ELFArchAVR4>; +def : Device<"ata6612c", FamilyAVR4, ELFArchAVR4>; def : Device<"atmega48", FamilyAVR4, ELFArchAVR4>; def : Device<"atmega48a", FamilyAVR4, ELFArchAVR4>; def : Device<"atmega48pa", FamilyAVR4, ELFArchAVR4>; @@ -331,8 +342,17 @@ def : Device<"at90pwm2b", FamilyAVR4, ELFArchAVR4>; def : Device<"at90pwm3", FamilyAVR4, ELFArchAVR4>; def : Device<"at90pwm3b", FamilyAVR4, ELFArchAVR4>; def : Device<"at90pwm81", FamilyAVR4, ELFArchAVR4>; +def : Device<"ata5702m322", FamilyAVR5, ELFArchAVR5>; +def : Device<"ata5782", FamilyAVR5, ELFArchAVR5>; def : Device<"ata5790", FamilyAVR5, ELFArchAVR5>; +def : Device<"ata5790n", FamilyAVR5, ELFArchAVR5>; +def : Device<"ata5791", FamilyAVR5, ELFArchAVR5>; def : Device<"ata5795", FamilyAVR5, ELFArchAVR5>; +def : Device<"ata5831", FamilyAVR5, ELFArchAVR5>; +def : Device<"ata6613c", FamilyAVR5, ELFArchAVR5>; +def : Device<"ata6614q", FamilyAVR5, ELFArchAVR5>; +def : Device<"ata8210", FamilyAVR5, ELFArchAVR5>; +def : Device<"ata8510", FamilyAVR5, ELFArchAVR5>; def : Device<"atmega16", FamilyAVR5, ELFArchAVR5>; def : Device<"atmega16a", FamilyAVR5, ELFArchAVR5>; def : Device<"atmega161", FamilyAVR3, ELFArchAVR5, @@ -411,6 +431,7 @@ def : Device<"atmega16hvbrevb", FamilyAVR5, ELFArchAVR5>; def : Device<"atmega32hvb", FamilyAVR5, ELFArchAVR5>; def : Device<"atmega32hvbrevb", FamilyAVR5, ELFArchAVR5>; def : Device<"atmega64hve", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega64hve2", FamilyAVR5, ELFArchAVR5>; def : Device<"at90can32", FamilyAVR5, ELFArchAVR5>; def : Device<"at90can64", FamilyAVR5, ELFArchAVR5>; def : Device<"at90pwm161", FamilyAVR5, ELFArchAVR5>; @@ -452,12 +473,13 @@ def : Device<"atxmega16c4", FamilyXMEGAU, ELFArchXMEGA2>; def : Device<"atxmega16d4", FamilyXMEGA, ELFArchXMEGA2>; def : Device<"atxmega32a4", FamilyXMEGA, ELFArchXMEGA2>; def : Device<"atxmega32a4u", FamilyXMEGAU, ELFArchXMEGA2>; +def : Device<"atxmega32c3", FamilyXMEGAU, ELFArchXMEGA2>; def : Device<"atxmega32c4", FamilyXMEGAU, ELFArchXMEGA2>; +def : Device<"atxmega32d3", FamilyXMEGA, ELFArchXMEGA2>; def : Device<"atxmega32d4", FamilyXMEGA, ELFArchXMEGA2>; def : Device<"atxmega32e5", FamilyXMEGAU, ELFArchXMEGA2>; def : Device<"atxmega16e5", FamilyXMEGAU, ELFArchXMEGA2>; def : Device<"atxmega8e5", FamilyXMEGAU, ELFArchXMEGA2>; -def : Device<"atxmega32x1", FamilyXMEGA, ELFArchXMEGA2>; def : Device<"atxmega64a3", FamilyXMEGA, ELFArchXMEGA4>; def : Device<"atxmega64a3u", FamilyXMEGAU, ELFArchXMEGA4>; def : Device<"atxmega64a4u", FamilyXMEGAU, ELFArchXMEGA4>; @@ -498,28 +520,39 @@ def : Device<"attiny20", FamilyTiny, ELFArchTiny>; def : Device<"attiny40", FamilyTiny, ELFArchTiny>; def : Device<"attiny102", FamilyTiny, ELFArchTiny>; def : Device<"attiny104", FamilyTiny, ELFArchTiny>; -def : Device<"attiny202", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny402", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny204", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny404", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny804", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny1604", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny406", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny806", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny1606", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny807", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny1607", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny212", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny412", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny214", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny414", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny814", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny1614", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny416", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny816", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny1616", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny3216", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny417", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny817", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny1617", FamilyXMEGA, ELFArchXMEGA3>; -def : Device<"attiny3217", FamilyXMEGA, ELFArchXMEGA3>; +def : Device<"attiny202", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny402", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny204", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny404", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny804", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny1604", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny406", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny806", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny1606", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny807", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny1607", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny212", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny412", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny214", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny414", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny814", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny1614", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny416", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny816", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny1616", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny3216", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny417", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny817", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny1617", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny3217", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny1624", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny1626", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"attiny1627", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"atmega808", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"atmega809", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"atmega1608", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"atmega1609", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"atmega3208", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"atmega3209", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"atmega4808", FamilyXMEGA3, ELFArchXMEGA3>; +def : Device<"atmega4809", FamilyXMEGA3, ELFArchXMEGA3>; diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index 144ae2b320f9..a9dc9af819e6 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -54,8 +54,6 @@ private: const Register SCRATCH_REGISTER = AVR::R0; /// The register that will always contain zero. const Register ZERO_REGISTER = AVR::R1; - /// The IO address of the status register. - const unsigned SREG_ADDR = 0x3f; bool expandMBB(Block &MBB); bool expandMI(Block &MBB, BlockIt MBBI); @@ -86,21 +84,23 @@ private: bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI); - bool expandAtomicArithmeticOp(unsigned MemOpcode, unsigned ArithOpcode, - Block &MBB, BlockIt MBBI); - - /// Specific shift implementation. + /// Specific shift implementation for int8. bool expandLSLB7Rd(Block &MBB, BlockIt MBBI); bool expandLSRB7Rd(Block &MBB, BlockIt MBBI); bool expandASRB6Rd(Block &MBB, BlockIt MBBI); bool expandASRB7Rd(Block &MBB, BlockIt MBBI); + + /// Specific shift implementation for int16. bool expandLSLW4Rd(Block &MBB, BlockIt MBBI); bool expandLSRW4Rd(Block &MBB, BlockIt MBBI); + bool expandASRW7Rd(Block &MBB, BlockIt MBBI); bool expandLSLW8Rd(Block &MBB, BlockIt MBBI); bool expandLSRW8Rd(Block &MBB, BlockIt MBBI); bool expandASRW8Rd(Block &MBB, BlockIt MBBI); bool expandLSLW12Rd(Block &MBB, BlockIt MBBI); bool expandLSRW12Rd(Block &MBB, BlockIt MBBI); + bool expandASRW14Rd(Block &MBB, BlockIt MBBI); + bool expandASRW15Rd(Block &MBB, BlockIt MBBI); // Common implementation of LPMWRdZ and ELPMWRdZ. bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt); @@ -141,6 +141,7 @@ bool AVRExpandPseudo::runOnMachineFunction(MachineFunction &MF) { // Continue expanding the block until all pseudos are expanded. do { assert(ExpandCount < 10 && "pseudo expand limit reached"); + (void)ExpandCount; bool BlockModified = expandMBB(MBB); Modified |= BlockModified; @@ -453,7 +454,7 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { auto MIBHI = buildMI(MBB, MBBI, AVR::NEGRd) .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstHiReg, getKillRegState(DstIsKill)); + .addReg(DstHiReg, RegState::Kill); // SREG is always implicitly dead MIBHI->getOperand(2).setIsDead(); @@ -917,13 +918,13 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { template bool AVRExpandPseudo::expandAtomic(Block &MBB, BlockIt MBBI, Func f) { - // Remove the pseudo instruction. MachineInstr &MI = *MBBI; + const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); // Store the SREG. buildMI(MBB, MBBI, AVR::INRdA) .addReg(SCRATCH_REGISTER, RegState::Define) - .addImm(SREG_ADDR); + .addImm(STI.getIORegSREG()); // Disable exceptions. buildMI(MBB, MBBI, AVR::BCLRs).addImm(7); // CLI @@ -931,7 +932,9 @@ bool AVRExpandPseudo::expandAtomic(Block &MBB, BlockIt MBBI, Func f) { f(MI); // Restore the status reg. - buildMI(MBB, MBBI, AVR::OUTARr).addImm(SREG_ADDR).addReg(SCRATCH_REGISTER); + buildMI(MBB, MBBI, AVR::OUTARr) + .addImm(STI.getIORegSREG()) + .addReg(SCRATCH_REGISTER); MI.eraseFromParent(); return true; @@ -955,31 +958,6 @@ bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode, Block &MBB, return expandAtomicBinaryOp(Opcode, MBB, MBBI, [](MachineInstr &MI) {}); } -bool AVRExpandPseudo::expandAtomicArithmeticOp(unsigned Width, - unsigned ArithOpcode, Block &MBB, - BlockIt MBBI) { - return expandAtomic(MBB, MBBI, [&](MachineInstr &MI) { - auto DstReg = MI.getOperand(0).getReg(); - auto PtrOp = MI.getOperand(1); - auto SrcReg = MI.getOperand(2).getReg(); - - unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr; - unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr; - - // FIXME: this returns the new value (after the operation), not the old - // value as the atomicrmw instruction is supposed to do! - - // Create the load - buildMI(MBB, MBBI, LoadOpcode, DstReg).addReg(PtrOp.getReg()); - - // Create the arithmetic op - buildMI(MBB, MBBI, ArithOpcode, DstReg).addReg(DstReg).addReg(SrcReg); - - // Create the store - buildMI(MBB, MBBI, StoreOpcode).add(PtrOp).addReg(DstReg); - }); -} - Register AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); RegScavenger RS; @@ -1025,56 +1003,6 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { return expandAtomicBinaryOp(AVR::STWPtrRr, MBB, MBBI); } -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(8, AVR::ADDRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(16, AVR::ADDWRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(8, AVR::SUBRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(16, AVR::SUBWRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(8, AVR::ANDRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(16, AVR::ANDWRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(8, AVR::ORRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(16, AVR::ORWRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(8, AVR::EORRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(16, AVR::EORWRdRr, MBB, MBBI); -} - template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { // On AVR, there is only one core and so atomic fences do nothing. @@ -1230,37 +1158,94 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; - Register SrcLoReg, SrcHiReg; + Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(2).getReg(); - unsigned Imm = MI.getOperand(1).getImm(); bool DstIsKill = MI.getOperand(0).isKill(); + unsigned Imm = MI.getOperand(1).getImm(); + Register SrcReg = MI.getOperand(2).getReg(); bool SrcIsKill = MI.getOperand(2).isKill(); - unsigned OpLo = AVR::STDPtrQRr; - unsigned OpHi = AVR::STDPtrQRr; - TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg); - // Since we add 1 to the Imm value for the high byte below, and 63 is the - // highest Imm value allowed for the instruction, 62 is the limit here. - assert(Imm <= 62 && "Offset is out of range"); + // STD's maximum displacement is 63, so larger stores have to be split into a + // set of operations + if (Imm >= 63) { + if (!DstIsKill) { + buildMI(MBB, MBBI, AVR::PUSHWRr).addReg(DstReg); + } - auto MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(DstReg) - .addImm(Imm) - .addReg(SrcLoReg, getKillRegState(SrcIsKill)); + buildMI(MBB, MBBI, AVR::SUBIWRdK) + .addReg(DstReg, RegState::Define) + .addReg(DstReg, RegState::Kill) + .addImm(-Imm); - auto MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(DstReg, getKillRegState(DstIsKill)) - .addImm(Imm + 1) - .addReg(SrcHiReg, getKillRegState(SrcIsKill)); + buildMI(MBB, MBBI, AVR::STWPtrRr) + .addReg(DstReg, RegState::Kill) + .addReg(SrcReg, getKillRegState(SrcIsKill)); - MIBLO.setMemRefs(MI.memoperands()); - MIBHI.setMemRefs(MI.memoperands()); + if (!DstIsKill) { + buildMI(MBB, MBBI, AVR::POPWRd).addDef(DstReg, RegState::Define); + } + } else { + unsigned OpLo = AVR::STDPtrQRr; + unsigned OpHi = AVR::STDPtrQRr; + Register SrcLoReg, SrcHiReg; + TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg); + + auto MIBLO = buildMI(MBB, MBBI, OpLo) + .addReg(DstReg) + .addImm(Imm) + .addReg(SrcLoReg, getKillRegState(SrcIsKill)); + + auto MIBHI = buildMI(MBB, MBBI, OpHi) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addImm(Imm + 1) + .addReg(SrcHiReg, getKillRegState(SrcIsKill)); + + MIBLO.setMemRefs(MI.memoperands()); + MIBHI.setMemRefs(MI.memoperands()); + } MI.eraseFromParent(); return true; } +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + const MachineFunction &MF = *MBB.getParent(); + const AVRSubtarget &STI = MF.getSubtarget(); + + assert(MI.getOperand(0).getReg() == AVR::SP && + "SP is expected as base pointer"); + + assert(STI.getFrameLowering()->hasReservedCallFrame(MF) && + "unexpected STDSPQRr pseudo instruction"); + (void)STI; + + MI.setDesc(TII->get(AVR::STDPtrQRr)); + MI.getOperand(0).setReg(AVR::R29R28); + + return true; +} + +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + const MachineFunction &MF = *MBB.getParent(); + const AVRSubtarget &STI = MF.getSubtarget(); + + assert(MI.getOperand(0).getReg() == AVR::SP && + "SP is expected as base pointer"); + + assert(STI.getFrameLowering()->hasReservedCallFrame(MF) && + "unexpected STDWSPQRr pseudo instruction"); + (void)STI; + + MI.setDesc(TII->get(AVR::STDWPtrQRr)); + MI.getOperand(0).setReg(AVR::R29R28); + + return true; +} + template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; @@ -1378,6 +1363,7 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { unsigned OpShift, OpCarry; Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); OpShift = AVR::ADDRdRr; OpCarry = AVR::ADCRdRr; @@ -1387,13 +1373,13 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { // Shift part buildMI(MBB, MBBI, OpShift) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg) - .addReg(DstReg); + .addReg(DstReg, RegState::Kill) + .addReg(DstReg, RegState::Kill); // Add the carry bit auto MIB = buildMI(MBB, MBBI, OpCarry) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg) + .addReg(DstReg, getKillRegState(DstIsKill)) .addReg(ZERO_REGISTER); // SREG is always implicitly killed @@ -1446,13 +1432,13 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { // Low part buildMI(MBB, MBBI, OpLo) .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstLoReg) + .addReg(DstLoReg, getKillRegState(DstIsKill)) .addReg(DstLoReg, getKillRegState(DstIsKill)); auto MIBHI = buildMI(MBB, MBBI, OpHi) .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstHiReg) + .addReg(DstHiReg, getKillRegState(DstIsKill)) .addReg(DstHiReg, getKillRegState(DstIsKill)); if (ImpIsDead) @@ -1478,7 +1464,7 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { // add hireg, hireg <==> lsl hireg auto MILSL = buildMI(MBB, MBBI, AVR::ADDRdRr) - .addReg(DstHiReg, RegState::Define, getDeadRegState(DstIsDead)) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstHiReg, getKillRegState(DstIsKill)) .addReg(DstHiReg, getKillRegState(DstIsKill)); @@ -1502,16 +1488,16 @@ bool AVRExpandPseudo::expandLSLW4Rd(Block &MBB, BlockIt MBBI) { // swap Rl buildMI(MBB, MBBI, AVR::SWAPRd) .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstHiReg, getKillRegState(DstIsKill)); + .addReg(DstHiReg, RegState::Kill); buildMI(MBB, MBBI, AVR::SWAPRd) .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstLoReg, getKillRegState(DstIsKill)); + .addReg(DstLoReg, RegState::Kill); // andi Rh, 0xf0 auto MI0 = buildMI(MBB, MBBI, AVR::ANDIRdK) .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstHiReg, getKillRegState(DstIsKill)) + .addReg(DstHiReg, RegState::Kill) .addImm(0xf0); // SREG is implicitly dead. MI0->getOperand(3).setIsDead(); @@ -1520,7 +1506,7 @@ bool AVRExpandPseudo::expandLSLW4Rd(Block &MBB, BlockIt MBBI) { auto MI1 = buildMI(MBB, MBBI, AVR::EORRdRr) .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstHiReg, getKillRegState(DstIsKill)) + .addReg(DstHiReg, RegState::Kill) .addReg(DstLoReg); // SREG is implicitly dead. MI1->getOperand(3).setIsDead(); @@ -1591,7 +1577,7 @@ bool AVRExpandPseudo::expandLSLW12Rd(Block &MBB, BlockIt MBBI) { // swap Rh buildMI(MBB, MBBI, AVR::SWAPRd) .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstHiReg, getKillRegState(DstIsKill)); + .addReg(DstHiReg, RegState::Kill); // andi Rh, 0xf0 auto MI0 = @@ -1700,16 +1686,16 @@ bool AVRExpandPseudo::expandLSRW4Rd(Block &MBB, BlockIt MBBI) { // swap Rl buildMI(MBB, MBBI, AVR::SWAPRd) .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstHiReg, getKillRegState(DstIsKill)); + .addReg(DstHiReg, RegState::Kill); buildMI(MBB, MBBI, AVR::SWAPRd) .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstLoReg, getKillRegState(DstIsKill)); + .addReg(DstLoReg, RegState::Kill); // andi Rl, 0xf auto MI0 = buildMI(MBB, MBBI, AVR::ANDIRdK) .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstLoReg, getKillRegState(DstIsKill)) + .addReg(DstLoReg, RegState::Kill) .addImm(0xf); // SREG is implicitly dead. MI0->getOperand(3).setIsDead(); @@ -1718,7 +1704,7 @@ bool AVRExpandPseudo::expandLSRW4Rd(Block &MBB, BlockIt MBBI) { auto MI1 = buildMI(MBB, MBBI, AVR::EORRdRr) .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstLoReg, getKillRegState(DstIsKill)) + .addReg(DstLoReg, RegState::Kill) .addReg(DstHiReg); // SREG is implicitly dead. MI1->getOperand(3).setIsDead(); @@ -1789,7 +1775,7 @@ bool AVRExpandPseudo::expandLSRW12Rd(Block &MBB, BlockIt MBBI) { // swap Rl buildMI(MBB, MBBI, AVR::SWAPRd) .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstLoReg, getKillRegState(DstIsKill)); + .addReg(DstLoReg, RegState::Kill); // andi Rl, 0xf auto MI0 = @@ -1897,6 +1883,53 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { return true; } +bool AVRExpandPseudo::expandASRW7Rd(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstLoReg, DstHiReg; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(3).isDead(); + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // lsl r24 + // mov r24,r25 + // rol r24 + // sbc r25,r25 + + // lsl r24 <=> add r24, r24 + buildMI(MBB, MBBI, AVR::ADDRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstLoReg, RegState::Kill) + .addReg(DstLoReg, RegState::Kill); + + // mov r24, r25 + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg); + + // rol r24 <=> adc r24, r24 + buildMI(MBB, MBBI, AVR::ADCRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstLoReg, getKillRegState(DstIsKill)) + .addReg(DstLoReg, getKillRegState(DstIsKill)); + + // sbc r25, r25 + auto MISBC = + buildMI(MBB, MBBI, AVR::SBCRdRr) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg, getKillRegState(DstIsKill)) + .addReg(DstHiReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) + MISBC->getOperand(3).setIsDead(); + // SREG is always implicitly killed + MISBC->getOperand(4).setIsKill(); + + MI.eraseFromParent(); + return true; +} + bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; @@ -1913,9 +1946,9 @@ bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) { // Move the sign bit to the C flag. buildMI(MBB, MBBI, AVR::ADDRdRr) - .addReg(DstHiReg, RegState::Define, getDeadRegState(DstIsDead)) - .addReg(DstHiReg, getKillRegState(DstIsKill) | getDeadRegState(DstIsDead)) - .addReg(DstHiReg, getKillRegState(DstIsKill)); + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg, RegState::Kill) + .addReg(DstHiReg, RegState::Kill); // Set upper byte to 0 or -1. auto MIBHI = @@ -1923,8 +1956,102 @@ bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) { .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstHiReg, getKillRegState(DstIsKill)) .addReg(DstHiReg, getKillRegState(DstIsKill)); + if (ImpIsDead) MIBHI->getOperand(3).setIsDead(); + // SREG is always implicitly killed + MIBHI->getOperand(4).setIsKill(); + + MI.eraseFromParent(); + return true; +} +bool AVRExpandPseudo::expandASRW14Rd(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstLoReg, DstHiReg; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(3).isDead(); + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // lsl r25 + // sbc r24, r24 + // lsl r25 + // mov r25, r24 + // rol r24 + + // lsl r25 <=> add r25, r25 + buildMI(MBB, MBBI, AVR::ADDRdRr) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg, RegState::Kill) + .addReg(DstHiReg, RegState::Kill); + + // sbc r24, r24 + buildMI(MBB, MBBI, AVR::SBCRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstLoReg, RegState::Kill) + .addReg(DstLoReg, RegState::Kill); + + // lsl r25 <=> add r25, r25 + buildMI(MBB, MBBI, AVR::ADDRdRr) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg, RegState::Kill) + .addReg(DstHiReg, RegState::Kill); + + // mov r25, r24 + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstLoReg); + + // rol r24 <=> adc r24, r24 + auto MIROL = + buildMI(MBB, MBBI, AVR::ADCRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstLoReg, getKillRegState(DstIsKill)) + .addReg(DstLoReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) + MIROL->getOperand(3).setIsDead(); + // SREG is always implicitly killed + MIROL->getOperand(4).setIsKill(); + + MI.eraseFromParent(); + return false; +} + +bool AVRExpandPseudo::expandASRW15Rd(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstLoReg, DstHiReg; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool ImpIsDead = MI.getOperand(3).isDead(); + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // lsl r25 + // sbc r25, r25 + // mov r24, r25 + + // lsl r25 <=> add r25, r25 + buildMI(MBB, MBBI, AVR::ADDRdRr) + .addReg(DstHiReg, RegState::Define) + .addReg(DstHiReg, RegState::Kill) + .addReg(DstHiReg, RegState::Kill); + + // sbc r25, r25 + auto MISBC = + buildMI(MBB, MBBI, AVR::SBCRdRr) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg, RegState::Kill) + .addReg(DstHiReg, RegState::Kill); + if (ImpIsDead) + MISBC->getOperand(3).setIsDead(); + // SREG is always implicitly killed + MISBC->getOperand(4).setIsKill(); + + // mov r24, r25 + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg); MI.eraseFromParent(); return true; @@ -1935,8 +2062,14 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned Imm = MI.getOperand(2).getImm(); switch (Imm) { + case 7: + return expandASRW7Rd(MBB, MBBI); case 8: return expandASRW8Rd(MBB, MBBI); + case 14: + return expandASRW14Rd(MBB, MBBI); + case 15: + return expandASRW15Rd(MBB, MBBI); default: llvm_unreachable("unimplemented asrwn"); return false; @@ -1956,14 +2089,14 @@ bool AVRExpandPseudo::expandLSLB7Rd(Block &MBB, BlockIt MBBI) { buildMI(MBB, MBBI, AVR::RORRd) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, RegState::Kill) ->getOperand(3) .setIsUndef(true); buildMI(MBB, MBBI, AVR::EORRdRr) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg, getKillRegState(DstIsKill)) - .addReg(DstReg, getKillRegState(DstIsKill)); + .addReg(DstReg, RegState::Kill) + .addReg(DstReg, RegState::Kill); auto MIRRC = buildMI(MBB, MBBI, AVR::RORRd) @@ -2006,15 +2139,15 @@ bool AVRExpandPseudo::expandLSRB7Rd(Block &MBB, BlockIt MBBI) { buildMI(MBB, MBBI, AVR::ADCRdRr) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg, getKillRegState(DstIsKill)) - .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, RegState::Kill) + .addReg(DstReg, RegState::Kill) ->getOperand(4) .setIsUndef(true); buildMI(MBB, MBBI, AVR::EORRdRr) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg, getKillRegState(DstIsKill)) - .addReg(DstReg, getKillRegState(DstIsKill)); + .addReg(DstReg, RegState::Kill) + .addReg(DstReg, RegState::Kill); auto MIRRC = buildMI(MBB, MBBI, AVR::ADCRdRr) @@ -2064,13 +2197,13 @@ bool AVRExpandPseudo::expandASRB6Rd(Block &MBB, BlockIt MBBI) { buildMI(MBB, MBBI, AVR::ADDRdRr) // LSL Rd <==> ADD Rd, Rd .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg, getKillRegState(DstIsKill)) - .addReg(DstReg, getKillRegState(DstIsKill)); + .addReg(DstReg, RegState::Kill) + .addReg(DstReg, RegState::Kill); buildMI(MBB, MBBI, AVR::SBCRdRr) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg, getKillRegState(DstIsKill)) - .addReg(DstReg, getKillRegState(DstIsKill)); + .addReg(DstReg, RegState::Kill) + .addReg(DstReg, RegState::Kill); buildMI(MBB, MBBI, AVR::BLD) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) @@ -2095,8 +2228,8 @@ bool AVRExpandPseudo::expandASRB7Rd(Block &MBB, BlockIt MBBI) { buildMI(MBB, MBBI, AVR::ADDRdRr) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg, getKillRegState(DstIsKill)) - .addReg(DstReg, getKillRegState(DstIsKill)); + .addReg(DstReg, RegState::Kill) + .addReg(DstReg, RegState::Kill); auto MIRRC = buildMI(MBB, MBBI, AVR::SBCRdRr) @@ -2152,26 +2285,22 @@ template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { bool ImpIsDead = MI.getOperand(2).isDead(); TRI->splitReg(DstReg, DstLoReg, DstHiReg); - if (SrcReg != DstLoReg) { - auto MOV = - buildMI(MBB, MBBI, AVR::MOVRdRr) - .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(SrcReg); - - if (SrcReg == DstHiReg) { - MOV->getOperand(1).setIsKill(); - } - } + if (SrcReg != DstLoReg) + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(SrcReg); if (SrcReg != DstHiReg) { - buildMI(MBB, MBBI, AVR::MOVRdRr) - .addReg(DstHiReg, RegState::Define) - .addReg(SrcReg, getKillRegState(SrcIsKill)); + auto MOV = buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(DstHiReg, RegState::Define) + .addReg(SrcReg); + if (SrcReg != DstLoReg && SrcIsKill) + MOV->getOperand(1).setIsKill(); } buildMI(MBB, MBBI, AVR::ADDRdRr) // LSL Rd <==> ADD Rd, Rr .addReg(DstHiReg, RegState::Define) - .addReg(DstHiReg) + .addReg(DstHiReg, RegState::Kill) .addReg(DstHiReg, RegState::Kill); auto SBC = @@ -2256,6 +2385,7 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); MachineInstr &MI = *MBBI; Register SrcLoReg, SrcHiReg; Register SrcReg = MI.getOperand(1).getReg(); @@ -2265,7 +2395,7 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { buildMI(MBB, MBBI, AVR::INRdA) .addReg(AVR::R0, RegState::Define) - .addImm(SREG_ADDR) + .addImm(STI.getIORegSREG()) .setMIFlags(Flags); buildMI(MBB, MBBI, AVR::BCLRs).addImm(0x07).setMIFlags(Flags); @@ -2276,7 +2406,7 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { .setMIFlags(Flags); buildMI(MBB, MBBI, AVR::OUTARr) - .addImm(SREG_ADDR) + .addImm(STI.getIORegSREG()) .addReg(AVR::R0, RegState::Kill) .setMIFlags(Flags); @@ -2330,22 +2460,14 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) { EXPAND(AVR::AtomicLoad16); EXPAND(AVR::AtomicStore8); EXPAND(AVR::AtomicStore16); - EXPAND(AVR::AtomicLoadAdd8); - EXPAND(AVR::AtomicLoadAdd16); - EXPAND(AVR::AtomicLoadSub8); - EXPAND(AVR::AtomicLoadSub16); - EXPAND(AVR::AtomicLoadAnd8); - EXPAND(AVR::AtomicLoadAnd16); - EXPAND(AVR::AtomicLoadOr8); - EXPAND(AVR::AtomicLoadOr16); - EXPAND(AVR::AtomicLoadXor8); - EXPAND(AVR::AtomicLoadXor16); EXPAND(AVR::AtomicFence); EXPAND(AVR::STSWKRr); EXPAND(AVR::STWPtrRr); EXPAND(AVR::STWPtrPiRr); EXPAND(AVR::STWPtrPdRr); EXPAND(AVR::STDWPtrQRr); + EXPAND(AVR::STDSPQRr); + EXPAND(AVR::STDWSPQRr); EXPAND(AVR::INWRdA); EXPAND(AVR::OUTWARr); EXPAND(AVR::PUSHWRr); diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp index b3bc9ede205e..ec8b74e435ce 100644 --- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -73,7 +73,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII.get(AVR::INRdA), AVR::R0) - .addImm(0x3f) + .addImm(STI.getIORegSREG()) .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr)) .addReg(AVR::R0, RegState::Kill) @@ -144,7 +144,7 @@ static void restoreStatusRegister(MachineFunction &MF, MachineBasicBlock &MBB) { if (AFI->isInterruptOrSignalHandler()) { BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0); BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr)) - .addImm(0x3f) + .addImm(STI.getIORegSREG()) .addReg(AVR::R0, RegState::Kill); BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0); } @@ -201,8 +201,8 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF, // Restore the frame pointer by doing FP += . MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opcode), AVR::R29R28) - .addReg(AVR::R29R28, RegState::Kill) - .addImm(FrameSize); + .addReg(AVR::R29R28, RegState::Kill) + .addImm(FrameSize); // The SREG implicit def is dead. MI->getOperand(3).setIsDead(); } @@ -298,11 +298,11 @@ bool AVRFrameLowering::restoreCalleeSavedRegisters( /// Replace pseudo store instructions that pass arguments through the stack with /// real instructions. static void fixStackStores(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const TargetInstrInfo &TII, Register FP) { + MachineBasicBlock::iterator StartMI, + const TargetInstrInfo &TII) { // Iterate through the BB until we hit a call instruction or we reach the end. for (MachineInstr &MI : - llvm::make_early_inc_range(llvm::make_range(MI, MBB.end()))) { + llvm::make_early_inc_range(llvm::make_range(StartMI, MBB.end()))) { if (MI.isCall()) break; @@ -313,7 +313,7 @@ static void fixStackStores(MachineBasicBlock &MBB, continue; assert(MI.getOperand(0).getReg() == AVR::SP && - "Invalid register, should be SP!"); + "SP is expected as base pointer"); // Replace this instruction with a regular store. Use Y as the base // pointer since it is guaranteed to contain a copy of SP. @@ -321,7 +321,7 @@ static void fixStackStores(MachineBasicBlock &MBB, (Opcode == AVR::STDWSPQRr) ? AVR::STDWPtrQRr : AVR::STDPtrQRr; MI.setDesc(TII.get(STOpc)); - MI.getOperand(0).setReg(FP); + MI.getOperand(0).setReg(AVR::R31R30); } } @@ -331,11 +331,7 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr( const AVRSubtarget &STI = MF.getSubtarget(); const AVRInstrInfo &TII = *STI.getInstrInfo(); - // There is nothing to insert when the call frame memory is allocated during - // function entry. Delete the call frame pseudo and replace all pseudo stores - // with real store instructions. if (hasReservedCallFrame(MF)) { - fixStackStores(MBB, MI, TII, AVR::R29R28); return MBB.erase(MI); } @@ -343,57 +339,58 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr( unsigned int Opcode = MI->getOpcode(); int Amount = TII.getFrameSize(*MI); - // ADJCALLSTACKUP and ADJCALLSTACKDOWN are converted to adiw/subi - // instructions to read and write the stack pointer in I/O space. - if (Amount != 0) { - assert(getStackAlign() == Align(1) && "Unsupported stack alignment"); - - if (Opcode == TII.getCallFrameSetupOpcode()) { - // Update the stack pointer. - // In many cases this can be done far more efficiently by pushing the - // relevant values directly to the stack. However, doing that correctly - // (in the right order, possibly skipping some empty space for undef - // values, etc) is tricky and thus left to be optimized in the future. - BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP); - - MachineInstr *New = - BuildMI(MBB, MI, DL, TII.get(AVR::SUBIWRdK), AVR::R31R30) - .addReg(AVR::R31R30, RegState::Kill) - .addImm(Amount); - New->getOperand(3).setIsDead(); - - BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP).addReg(AVR::R31R30); - - // Make sure the remaining stack stores are converted to real store - // instructions. - fixStackStores(MBB, MI, TII, AVR::R31R30); - } else { - assert(Opcode == TII.getCallFrameDestroyOpcode()); - - // Note that small stack changes could be implemented more efficiently - // with a few pop instructions instead of the 8-9 instructions now - // required. - - // Select the best opcode to adjust SP based on the offset size. - unsigned addOpcode; - if (isUInt<6>(Amount)) { - addOpcode = AVR::ADIWRdK; - } else { - addOpcode = AVR::SUBIWRdK; - Amount = -Amount; - } + if (Amount == 0) { + return MBB.erase(MI); + } + + assert(getStackAlign() == Align(1) && "Unsupported stack alignment"); + + if (Opcode == TII.getCallFrameSetupOpcode()) { + // Update the stack pointer. + // In many cases this can be done far more efficiently by pushing the + // relevant values directly to the stack. However, doing that correctly + // (in the right order, possibly skipping some empty space for undef + // values, etc) is tricky and thus left to be optimized in the future. + BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP); + + MachineInstr *New = + BuildMI(MBB, MI, DL, TII.get(AVR::SUBIWRdK), AVR::R31R30) + .addReg(AVR::R31R30, RegState::Kill) + .addImm(Amount); + New->getOperand(3).setIsDead(); - // Build the instruction sequence. - BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP); + BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP).addReg(AVR::R31R30); - MachineInstr *New = BuildMI(MBB, MI, DL, TII.get(addOpcode), AVR::R31R30) - .addReg(AVR::R31R30, RegState::Kill) - .addImm(Amount); - New->getOperand(3).setIsDead(); + // Make sure the remaining stack stores are converted to real store + // instructions. + fixStackStores(MBB, MI, TII); + } else { + assert(Opcode == TII.getCallFrameDestroyOpcode()); - BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP) - .addReg(AVR::R31R30, RegState::Kill); + // Note that small stack changes could be implemented more efficiently + // with a few pop instructions instead of the 8-9 instructions now + // required. + + // Select the best opcode to adjust SP based on the offset size. + unsigned AddOpcode; + + if (isUInt<6>(Amount)) { + AddOpcode = AVR::ADIWRdK; + } else { + AddOpcode = AVR::SUBIWRdK; + Amount = -Amount; } + + // Build the instruction sequence. + BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP); + + MachineInstr *New = BuildMI(MBB, MI, DL, TII.get(AddOpcode), AVR::R31R30) + .addReg(AVR::R31R30, RegState::Kill) + .addImm(Amount); + New->getOperand(3).setIsDead(); + + BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP) + .addReg(AVR::R31R30, RegState::Kill); } return MBB.erase(MI); @@ -420,7 +417,7 @@ struct AVRFrameAnalyzer : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override { const MachineFrameInfo &MFI = MF.getFrameInfo(); - AVRMachineFunctionInfo *FuncInfo = MF.getInfo(); + AVRMachineFunctionInfo *AFI = MF.getInfo(); // If there are no fixed frame indexes during this stage it means there // are allocas present in the function. @@ -431,7 +428,7 @@ struct AVRFrameAnalyzer : public MachineFunctionPass { for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { // Variable sized objects have size 0. if (MFI.getObjectSize(i)) { - FuncInfo->setHasAllocas(true); + AFI->setHasAllocas(true); break; } } @@ -460,7 +457,7 @@ struct AVRFrameAnalyzer : public MachineFunctionPass { } if (MFI.isFixedObjectIndex(MO.getIndex())) { - FuncInfo->setHasStackArgs(true); + AFI->setHasStackArgs(true); return false; } } diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index a58fedf6cd36..7a1e7b1535a7 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -13,6 +13,7 @@ #include "AVRISelLowering.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -269,8 +270,6 @@ EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, } SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { - //: TODO: this function has to be completely rewritten to produce optimal - // code, for now it's producing very long but correct code. unsigned Opc8; const SDNode *N = Op.getNode(); EVT VT = Op.getValueType(); @@ -371,6 +370,27 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { ShiftAmount = 0; } } else if (VT.getSizeInBits() == 16) { + if (Op.getOpcode() == ISD::SRA) + // Special optimization for int16 arithmetic right shift. + switch (ShiftAmount) { + case 15: + Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, + DAG.getConstant(15, dl, VT)); + ShiftAmount = 0; + break; + case 14: + Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, + DAG.getConstant(14, dl, VT)); + ShiftAmount = 0; + break; + case 7: + Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, + DAG.getConstant(7, dl, VT)); + ShiftAmount = 0; + break; + default: + break; + } if (4 <= ShiftAmount && ShiftAmount < 8) switch (Op.getOpcode()) { case ISD::SHL: @@ -1023,17 +1043,24 @@ bool AVRTargetLowering::isOffsetFoldingLegal( /// Registers for calling conventions, ordered in reverse as required by ABI. /// Both arrays must be of the same length. -static const MCPhysReg RegList8[] = { +static const MCPhysReg RegList8AVR[] = { AVR::R25, AVR::R24, AVR::R23, AVR::R22, AVR::R21, AVR::R20, AVR::R19, AVR::R18, AVR::R17, AVR::R16, AVR::R15, AVR::R14, AVR::R13, AVR::R12, AVR::R11, AVR::R10, AVR::R9, AVR::R8}; -static const MCPhysReg RegList16[] = { +static const MCPhysReg RegList8Tiny[] = {AVR::R25, AVR::R24, AVR::R23, + AVR::R22, AVR::R21, AVR::R20}; +static const MCPhysReg RegList16AVR[] = { AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22, AVR::R22R21, AVR::R21R20, AVR::R20R19, AVR::R19R18, AVR::R18R17, AVR::R17R16, AVR::R16R15, AVR::R15R14, AVR::R14R13, AVR::R13R12, AVR::R12R11, AVR::R11R10, AVR::R10R9, AVR::R9R8}; +static const MCPhysReg RegList16Tiny[] = {AVR::R26R25, AVR::R25R24, + AVR::R24R23, AVR::R23R22, + AVR::R22R21, AVR::R21R20}; -static_assert(array_lengthof(RegList8) == array_lengthof(RegList16), +static_assert(array_lengthof(RegList8AVR) == array_lengthof(RegList16AVR), + "8-bit and 16-bit register arrays must be of equal length"); +static_assert(array_lengthof(RegList8Tiny) == array_lengthof(RegList16Tiny), "8-bit and 16-bit register arrays must be of equal length"); /// Analyze incoming and outgoing function arguments. We need custom C++ code @@ -1041,10 +1068,22 @@ static_assert(array_lengthof(RegList8) == array_lengthof(RegList16), /// In addition, all pieces of a certain argument have to be passed either /// using registers or the stack but never mixing both. template -static void -analyzeArguments(TargetLowering::CallLoweringInfo *CLI, const Function *F, - const DataLayout *TD, const SmallVectorImpl &Args, - SmallVectorImpl &ArgLocs, CCState &CCInfo) { +static void analyzeArguments(TargetLowering::CallLoweringInfo *CLI, + const Function *F, const DataLayout *TD, + const SmallVectorImpl &Args, + SmallVectorImpl &ArgLocs, + CCState &CCInfo, bool Tiny) { + // Choose the proper register list for argument passing according to the ABI. + ArrayRef RegList8; + ArrayRef RegList16; + if (Tiny) { + RegList8 = makeArrayRef(RegList8Tiny, array_lengthof(RegList8Tiny)); + RegList16 = makeArrayRef(RegList16Tiny, array_lengthof(RegList16Tiny)); + } else { + RegList8 = makeArrayRef(RegList8AVR, array_lengthof(RegList8AVR)); + RegList16 = makeArrayRef(RegList16AVR, array_lengthof(RegList16AVR)); + } + unsigned NumArgs = Args.size(); // This is the index of the last used register, in RegList*. // -1 means R26 (R26 is never actually used in CC). @@ -1074,7 +1113,7 @@ analyzeArguments(TargetLowering::CallLoweringInfo *CLI, const Function *F, unsigned RegIdx = RegLastIdx + TotalBytes; RegLastIdx = RegIdx; // If there are not enough registers, use the stack - if (RegIdx >= array_lengthof(RegList8)) { + if (RegIdx >= RegList8.size()) { UseStack = true; } for (; i != j; ++i) { @@ -1123,13 +1162,24 @@ getTotalArgumentsSizeInBytes(const SmallVectorImpl &Args) { /// one value, possibly an aggregate, and it is limited to 8 bytes. template static void analyzeReturnValues(const SmallVectorImpl &Args, - CCState &CCInfo) { + CCState &CCInfo, bool Tiny) { unsigned NumArgs = Args.size(); unsigned TotalBytes = getTotalArgumentsSizeInBytes(Args); // CanLowerReturn() guarantees this assertion. assert(TotalBytes <= 8 && "return values greater than 8 bytes cannot be lowered"); + // Choose the proper register list for argument passing according to the ABI. + ArrayRef RegList8; + ArrayRef RegList16; + if (Tiny) { + RegList8 = makeArrayRef(RegList8Tiny, array_lengthof(RegList8Tiny)); + RegList16 = makeArrayRef(RegList16Tiny, array_lengthof(RegList16Tiny)); + } else { + RegList8 = makeArrayRef(RegList8AVR, array_lengthof(RegList8AVR)); + RegList16 = makeArrayRef(RegList16AVR, array_lengthof(RegList16AVR)); + } + // GCC-ABI says that the size is rounded up to the next even number, // but actually once it is more than 4 it will always round up to 8. if (TotalBytes > 4) { @@ -1174,7 +1224,8 @@ SDValue AVRTargetLowering::LowerFormalArguments( if (isVarArg) { CCInfo.AnalyzeFormalArguments(Ins, ArgCC_AVR_Vararg); } else { - analyzeArguments(nullptr, &MF.getFunction(), &DL, Ins, ArgLocs, CCInfo); + analyzeArguments(nullptr, &MF.getFunction(), &DL, Ins, ArgLocs, CCInfo, + Subtarget.hasTinyEncoding()); } SDValue ArgValue; @@ -1285,8 +1336,8 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const Function *F = nullptr; if (const GlobalAddressSDNode *G = dyn_cast(Callee)) { const GlobalValue *GV = G->getGlobal(); - - F = cast(GV); + if (isa(GV)) + F = cast(GV); Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(DAG.getDataLayout())); } else if (const ExternalSymbolSDNode *ES = @@ -1299,7 +1350,8 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isVarArg) { CCInfo.AnalyzeCallOperands(Outs, ArgCC_AVR_Vararg); } else { - analyzeArguments(&CLI, F, &DAG.getDataLayout(), Outs, ArgLocs, CCInfo); + analyzeArguments(&CLI, F, &DAG.getDataLayout(), Outs, ArgLocs, CCInfo, + Subtarget.hasTinyEncoding()); } // Get a count of how many bytes are to be pushed on the stack. @@ -1444,7 +1496,7 @@ SDValue AVRTargetLowering::LowerCallResult( if (CallConv == CallingConv::AVR_BUILTIN) { CCInfo.AnalyzeCallResult(Ins, RetCC_AVR_BUILTIN); } else { - analyzeReturnValues(Ins, CCInfo); + analyzeReturnValues(Ins, CCInfo, Subtarget.hasTinyEncoding()); } // Copy all of the result registers out of their specified physreg. @@ -1495,7 +1547,7 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, if (CallConv == CallingConv::AVR_BUILTIN) { CCInfo.AnalyzeReturn(Outs, RetCC_AVR_BUILTIN); } else { - analyzeReturnValues(Outs, CCInfo); + analyzeReturnValues(Outs, CCInfo, Subtarget.hasTinyEncoding()); } SDValue Flag; @@ -1707,6 +1759,60 @@ AVRTargetLowering::insertCopyR1(MachineInstr &MI, MachineBasicBlock *BB) const { return BB; } +// Lower atomicrmw operation to disable interrupts, do operation, and restore +// interrupts. This works because all AVR microcontrollers are single core. +MachineBasicBlock *AVRTargetLowering::insertAtomicArithmeticOp( + MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, int Width) const { + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + MachineBasicBlock::iterator I(MI); + const Register SCRATCH_REGISTER = AVR::R0; + DebugLoc dl = MI.getDebugLoc(); + + // Example instruction sequence, for an atomic 8-bit add: + // ldi r25, 5 + // in r0, SREG + // cli + // ld r24, X + // add r25, r24 + // st X, r25 + // out SREG, r0 + + const TargetRegisterClass *RC = + (Width == 8) ? &AVR::GPR8RegClass : &AVR::DREGSRegClass; + unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr; + unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr; + + // Disable interrupts. + BuildMI(*BB, I, dl, TII.get(AVR::INRdA), SCRATCH_REGISTER) + .addImm(Subtarget.getIORegSREG()); + BuildMI(*BB, I, dl, TII.get(AVR::BCLRs)).addImm(7); + + // Load the original value. + BuildMI(*BB, I, dl, TII.get(LoadOpcode), MI.getOperand(0).getReg()) + .add(MI.getOperand(1)); + + // Do the arithmetic operation. + Register Result = MRI.createVirtualRegister(RC); + BuildMI(*BB, I, dl, TII.get(Opcode), Result) + .addReg(MI.getOperand(0).getReg()) + .add(MI.getOperand(2)); + + // Store the result. + BuildMI(*BB, I, dl, TII.get(StoreOpcode)) + .add(MI.getOperand(1)) + .addReg(Result); + + // Restore interrupts. + BuildMI(*BB, I, dl, TII.get(AVR::OUTARr)) + .addImm(Subtarget.getIORegSREG()) + .addReg(SCRATCH_REGISTER); + + // Remove the pseudo instruction. + MI.eraseFromParent(); + return BB; +} + MachineBasicBlock * AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -1731,6 +1837,26 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, return insertMul(MI, MBB); case AVR::CopyR1: return insertCopyR1(MI, MBB); + case AVR::AtomicLoadAdd8: + return insertAtomicArithmeticOp(MI, MBB, AVR::ADDRdRr, 8); + case AVR::AtomicLoadAdd16: + return insertAtomicArithmeticOp(MI, MBB, AVR::ADDWRdRr, 16); + case AVR::AtomicLoadSub8: + return insertAtomicArithmeticOp(MI, MBB, AVR::SUBRdRr, 8); + case AVR::AtomicLoadSub16: + return insertAtomicArithmeticOp(MI, MBB, AVR::SUBWRdRr, 16); + case AVR::AtomicLoadAnd8: + return insertAtomicArithmeticOp(MI, MBB, AVR::ANDRdRr, 8); + case AVR::AtomicLoadAnd16: + return insertAtomicArithmeticOp(MI, MBB, AVR::ANDWRdRr, 16); + case AVR::AtomicLoadOr8: + return insertAtomicArithmeticOp(MI, MBB, AVR::ORRdRr, 8); + case AVR::AtomicLoadOr16: + return insertAtomicArithmeticOp(MI, MBB, AVR::ORWRdRr, 16); + case AVR::AtomicLoadXor8: + return insertAtomicArithmeticOp(MI, MBB, AVR::EORRdRr, 8); + case AVR::AtomicLoadXor16: + return insertAtomicArithmeticOp(MI, MBB, AVR::EORWRdRr, 16); } assert((Opc == AVR::Select16 || Opc == AVR::Select8) && diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h index 116417b61566..c5c937c983ed 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.h +++ b/llvm/lib/Target/AVR/AVRISelLowering.h @@ -189,6 +189,9 @@ private: MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *insertCopyR1(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *insertAtomicArithmeticOp(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Opcode, int Width) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/AVR/AVRInstrFormats.td b/llvm/lib/Target/AVR/AVRInstrFormats.td index 2bcbcdfbf925..83c32c80dfb9 100644 --- a/llvm/lib/Target/AVR/AVRInstrFormats.td +++ b/llvm/lib/Target/AVR/AVRInstrFormats.td @@ -179,7 +179,8 @@ class FSTDLDD pattern> // r = src/dst register // // Note that the bit labelled 'i' above does not follow a simple pattern, -// so there exists a post encoder method to set it manually. +// so there exists a post encoder method to set it manually. Also a specified +// decoder method is needed. //===---------------------------------------------------------------------===// class FSTLD mode, dag outs, dag ins, string asmstr, list pattern> : AVRInst16 { @@ -200,6 +201,7 @@ class FSTLD mode, dag outs, dag ins, string asmstr, let Inst{3 - 2} = ptrreg{1 - 0}; let Inst{1 - 0} = mode{1 - 0}; + let DecoderMethod = "decodeLoadStore"; let PostEncoderMethod = "loadStorePostEncoder"; } diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp index ac52c47f93d5..510000f231fa 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -46,8 +46,9 @@ void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB, const AVRRegisterInfo &TRI = *STI.getRegisterInfo(); unsigned Opc; - // Not all AVR devices support the 16-bit `MOVW` instruction. if (AVR::DREGSRegClass.contains(DestReg, SrcReg)) { + // If our AVR has `movw`, let's emit that; otherwise let's emit two separate + // `mov`s. if (STI.hasMOVW() && AVR::DREGSMOVWRegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, MI, DL, get(AVR::MOVWRdRr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); @@ -57,11 +58,17 @@ void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB, TRI.splitReg(DestReg, DestLo, DestHi); TRI.splitReg(SrcReg, SrcLo, SrcHi); - // Copy each individual register with the `MOV` instruction. - BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestLo) - .addReg(SrcLo, getKillRegState(KillSrc)); - BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestHi) - .addReg(SrcHi, getKillRegState(KillSrc)); + if (DestLo == SrcHi) { + BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestHi) + .addReg(SrcHi, getKillRegState(KillSrc)); + BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestLo) + .addReg(SrcLo, getKillRegState(KillSrc)); + } else { + BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestLo) + .addReg(SrcLo, getKillRegState(KillSrc)); + BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestHi) + .addReg(SrcHi, getKillRegState(KillSrc)); + } } } else { if (AVR::GPR8RegClass.contains(DestReg, SrcReg)) { @@ -299,9 +306,7 @@ bool AVRInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } // If the block has any instructions after a JMP, delete them. - while (std::next(I) != MBB.end()) { - std::next(I)->eraseFromParent(); - } + MBB.erase(std::next(I), MBB.end()); Cond.clear(); FBB = nullptr; diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index 2b96dc0b833a..f20ba5edf208 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -177,12 +177,16 @@ def memri : Operand { let PrintMethod = "printMemri"; let EncoderMethod = "encodeMemri"; + let DecoderMethod = "decodeMemri"; let ParserMatchClass = MemriAsmOperand; } // Address operand for `SP+imm` used by STD{W}SPQRr -def memspi : Operand { let MIOperandInfo = (ops GPRSP, i16imm); } +def memspi : Operand { + let MIOperandInfo = (ops GPRSP, i16imm); + let PrintMethod = "printMemspi"; +} def relbrtarget_7 : Operand { let PrintMethod = "printPCRelImm"; @@ -194,6 +198,11 @@ def brtarget_13 : Operand { let EncoderMethod = "encodeRelCondBrTarget"; } +def rcalltarget_13 : Operand { + let PrintMethod = "printPCRelImm"; + let EncoderMethod = "encodeRelCondBrTarget"; +} + // The target of a 22 or 16-bit call/jmp instruction. def call_target : Operand { let EncoderMethod = "encodeCallTarget"; @@ -965,10 +974,8 @@ let isBarrier = 1, isBranch = 1, isTerminator = 1 in { let isCall = 1 in { // SP is marked as a use to prevent stack-pointer assignments that appear // immediately before calls from potentially appearing dead. - let Uses = [SP] in def RCALLk : FBRk<1, (outs), - (ins brtarget_13 - : $target), - "rcall\t$target", []>; + let Uses = [SP] in def RCALLk : FBRk<1, (outs), (ins rcalltarget_13:$k), + "rcall\t$k", [(AVRcall imm:$k)]>; // SP is marked as a use to prevent stack-pointer assignments that appear // immediately before calls from potentially appearing dead. @@ -985,13 +992,10 @@ let isCall = 1 in { // SP is marked as a use to prevent stack-pointer assignments that appear // immediately before calls from potentially appearing dead. // - //: TODO: the imm field can be either 16 or 22 bits in devices with more + // TODO: the imm field can be either 16 or 22 bits in devices with more // than 64k of ROM, fix it once we support the largest devices. - let Uses = [SP] in def CALLk : F32BRk<0b111, (outs), - (ins call_target - : $k), - "call\t$k", [(AVRcall imm - : $k)]>, + let Uses = [SP] in def CALLk : F32BRk<0b111, (outs), (ins call_target:$k), + "call\t$k", [(AVRcall imm:$k)]>, Requires<[HasJMPCALL]>; } @@ -1446,27 +1450,14 @@ class AtomicStore : $rd, DRC : $rr)]>; -let Constraints = - "@earlyclobber $rd" in class AtomicLoadOp - : Pseudo<(outs DRC - : $rd), - (ins PTRRC - : $rr, DRC - : $operand), - "atomic_op", [(set DRC - : $rd, (Op i16 - : $rr, DRC - : $operand))]>; - -// FIXME: I think 16-bit atomic binary ops need to mark -// r0 as clobbered. +class AtomicLoadOp + : Pseudo<(outs DRC:$rd), + (ins PTRRC:$rr, DRC:$operand), + "atomic_op", [(set DRC:$rd, (Op i16:$rr, DRC:$operand))]>; // Atomic instructions // =================== // -// These are all expanded by AVRExpandPseudoInsts -// // 8-bit operations can use any pointer register because // they are expanded directly into an LD/ST instruction. // @@ -1482,16 +1473,18 @@ def AtomicStore16 : AtomicStore; class AtomicLoadOp8 : AtomicLoadOp; class AtomicLoadOp16 : AtomicLoadOp; -def AtomicLoadAdd8 : AtomicLoadOp8; -def AtomicLoadAdd16 : AtomicLoadOp16; -def AtomicLoadSub8 : AtomicLoadOp8; -def AtomicLoadSub16 : AtomicLoadOp16; -def AtomicLoadAnd8 : AtomicLoadOp8; -def AtomicLoadAnd16 : AtomicLoadOp16; -def AtomicLoadOr8 : AtomicLoadOp8; -def AtomicLoadOr16 : AtomicLoadOp16; -def AtomicLoadXor8 : AtomicLoadOp8; -def AtomicLoadXor16 : AtomicLoadOp16; +let usesCustomInserter=1 in { + def AtomicLoadAdd8 : AtomicLoadOp8; + def AtomicLoadAdd16 : AtomicLoadOp16; + def AtomicLoadSub8 : AtomicLoadOp8; + def AtomicLoadSub16 : AtomicLoadOp16; + def AtomicLoadAnd8 : AtomicLoadOp8; + def AtomicLoadAnd16 : AtomicLoadOp16; + def AtomicLoadOr8 : AtomicLoadOp8; + def AtomicLoadOr16 : AtomicLoadOp16; + def AtomicLoadXor8 : AtomicLoadOp8; + def AtomicLoadXor16 : AtomicLoadOp16; +} def AtomicFence : Pseudo<(outs), (ins), "atomic_fence", [(atomic_fence timm, timm)]>; @@ -1954,7 +1947,7 @@ let Constraints = "$src = $rd", Defs = [SREG] in { : $src)), (implicit SREG)]>; - def ASRWNRd : Pseudo<(outs DLDREGS + def ASRWNRd : Pseudo<(outs DREGS : $rd), (ins DREGS : $src, imm16 @@ -2122,15 +2115,17 @@ def ROL : InstAlias<"rol\t$rd", (ADCRdRr GPR8 : $rd, GPR8 : $rd)>; // Sets all bits in a register. def : InstAlias<"ser\t$rd", (LDIRdK LD8 : $rd, 0xff), 0>; -let Defs = [SREG] in def BSETs : FS<0, (outs), - (ins i8imm - : $s), - "bset\t$s", []>; +let hasSideEffects=1 in { + let Defs = [SREG] in def BSETs : FS<0, + (outs), + (ins i8imm:$s), + "bset\t$s", []>; -let Defs = [SREG] in def BCLRs : FS<1, (outs), - (ins i8imm - : $s), - "bclr\t$s", []>; + let Defs = [SREG] in def BCLRs : FS<1, + (outs), + (ins i8imm:$s), + "bclr\t$s", []>; +} // Set/clear aliases for the carry (C) status flag (bit 0). def : InstAlias<"sec", (BSETs 0)>; @@ -2457,8 +2452,12 @@ def : Pat<(adde i8 : $src2))>; // Calls. -def : Pat<(AVRcall(i16 tglobaladdr : $dst)), (CALLk tglobaladdr : $dst)>; -def : Pat<(AVRcall(i16 texternalsym : $dst)), (CALLk texternalsym : $dst)>; +let Predicates = [HasJMPCALL] in { + def : Pat<(AVRcall(i16 tglobaladdr:$dst)), (CALLk tglobaladdr:$dst)>; + def : Pat<(AVRcall(i16 texternalsym:$dst)), (CALLk texternalsym:$dst)>; +} +def : Pat<(AVRcall(i16 tglobaladdr:$dst)), (RCALLk tglobaladdr:$dst)>; +def : Pat<(AVRcall(i16 texternalsym:$dst)), (RCALLk texternalsym:$dst)>; // `anyext` def : Pat<(i16(anyext i8 diff --git a/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h b/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h index 8b1c247eb6a7..da4c48559d9e 100644 --- a/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h +++ b/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h @@ -61,6 +61,13 @@ public: MF.getFunction().hasFnAttribute("signal"); } + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override { + return DestMF.cloneInfo(*this); + } + bool getHasSpills() const { return HasSpills; } void setHasSpills(bool B) { HasSpills = B; } diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp index 5dd7f5c55695..87e6558c12c2 100644 --- a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp @@ -36,15 +36,20 @@ AVRRegisterInfo::AVRRegisterInfo() : AVRGenRegisterInfo(0) {} const uint16_t * AVRRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const AVRMachineFunctionInfo *AFI = MF->getInfo(); - - return AFI->isInterruptOrSignalHandler() ? CSR_Interrupts_SaveList - : CSR_Normal_SaveList; + const AVRSubtarget &STI = MF->getSubtarget(); + if (STI.hasTinyEncoding()) + return AFI->isInterruptOrSignalHandler() ? CSR_InterruptsTiny_SaveList + : CSR_NormalTiny_SaveList; + else + return AFI->isInterruptOrSignalHandler() ? CSR_Interrupts_SaveList + : CSR_Normal_SaveList; } const uint32_t * AVRRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { - return CSR_Normal_RegMask; + const AVRSubtarget &STI = MF.getSubtarget(); + return STI.hasTinyEncoding() ? CSR_NormalTiny_RegMask : CSR_Normal_RegMask; } BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const { @@ -52,15 +57,26 @@ BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Reserve the intermediate result registers r1 and r2 // The result of instructions like 'mul' is always stored here. + // R0/R1/R1R0 are always reserved on both avr and avrtiny. Reserved.set(AVR::R0); Reserved.set(AVR::R1); Reserved.set(AVR::R1R0); - // Reserve the stack pointer. + // Reserve the stack pointer. Reserved.set(AVR::SPL); Reserved.set(AVR::SPH); Reserved.set(AVR::SP); + // Reserve R2~R17 only on avrtiny. + if (MF.getSubtarget().hasTinyEncoding()) { + // Reserve 8-bit registers R2~R15, Rtmp(R16) and Zero(R17). + for (unsigned Reg = AVR::R2; Reg <= AVR::R17; Reg++) + Reserved.set(Reg); + // Reserve 16-bit registers R3R2~R18R17. + for (unsigned Reg = AVR::R3R2; Reg <= AVR::R18R17; Reg++) + Reserved.set(Reg); + } + // We tenatively reserve the frame pointer register r29:r28 because the // function may require one, but we cannot tell until register allocation // is complete, which can be too late. @@ -137,6 +153,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetFrameLowering *TFI = TM.getSubtargetImpl()->getFrameLowering(); + const AVRSubtarget &STI = MF.getSubtarget(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); int Offset = MFI.getObjectOffset(FrameIndex); @@ -151,7 +168,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (MI.getOpcode() == AVR::FRMIDX) { MI.setDesc(TII.get(AVR::MOVWRdRr)); MI.getOperand(FIOperandNum).ChangeToRegister(AVR::R29R28, false); - MI.RemoveOperand(2); + MI.removeOperand(2); assert(Offset > 0 && "Invalid offset"); @@ -219,7 +236,8 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // a compare and branch, invalidating the contents of SREG set by the // compare instruction because of the add/sub pairs. Conservatively save and // restore SREG before and after each add/sub pair. - BuildMI(MBB, II, dl, TII.get(AVR::INRdA), AVR::R0).addImm(0x3f); + BuildMI(MBB, II, dl, TII.get(AVR::INRdA), AVR::R0) + .addImm(STI.getIORegSREG()); MachineInstr *New = BuildMI(MBB, II, dl, TII.get(AddOpc), AVR::R29R28) .addReg(AVR::R29R28, RegState::Kill) @@ -228,7 +246,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Restore SREG. BuildMI(MBB, std::next(II), dl, TII.get(AVR::OUTARr)) - .addImm(0x3f) + .addImm(STI.getIORegSREG()) .addReg(AVR::R0, RegState::Kill); // No need to set SREG as dead here otherwise if the next instruction is a diff --git a/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp b/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp deleted file mode 100644 index 76f29eb9f369..000000000000 --- a/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp +++ /dev/null @@ -1,144 +0,0 @@ -//===-- AVRRelaxMemOperations.cpp - Relax out of range loads/stores -------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass which relaxes out of range memory operations into -// equivalent operations which handle bigger addresses. -// -//===----------------------------------------------------------------------===// - -#include "AVR.h" -#include "AVRInstrInfo.h" -#include "AVRTargetMachine.h" -#include "MCTargetDesc/AVRMCTargetDesc.h" - -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" - -using namespace llvm; - -#define AVR_RELAX_MEM_OPS_NAME "AVR memory operation relaxation pass" - -namespace { - -class AVRRelaxMem : public MachineFunctionPass { -public: - static char ID; - - AVRRelaxMem() : MachineFunctionPass(ID) { - initializeAVRRelaxMemPass(*PassRegistry::getPassRegistry()); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - StringRef getPassName() const override { return AVR_RELAX_MEM_OPS_NAME; } - -private: - typedef MachineBasicBlock Block; - typedef Block::iterator BlockIt; - - const TargetInstrInfo *TII; - - template bool relax(Block &MBB, BlockIt MBBI); - - bool runOnBasicBlock(Block &MBB); - bool runOnInstruction(Block &MBB, BlockIt MBBI); - - MachineInstrBuilder buildMI(Block &MBB, BlockIt MBBI, unsigned Opcode) { - return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode)); - } -}; - -char AVRRelaxMem::ID = 0; - -bool AVRRelaxMem::runOnMachineFunction(MachineFunction &MF) { - bool Modified = false; - - const AVRSubtarget &STI = MF.getSubtarget(); - TII = STI.getInstrInfo(); - - for (Block &MBB : MF) { - bool BlockModified = runOnBasicBlock(MBB); - Modified |= BlockModified; - } - - return Modified; -} - -bool AVRRelaxMem::runOnBasicBlock(Block &MBB) { - bool Modified = false; - - BlockIt MBBI = MBB.begin(), E = MBB.end(); - while (MBBI != E) { - BlockIt NMBBI = std::next(MBBI); - Modified |= runOnInstruction(MBB, MBBI); - MBBI = NMBBI; - } - - return Modified; -} - -template <> bool AVRRelaxMem::relax(Block &MBB, BlockIt MBBI) { - MachineInstr &MI = *MBBI; - - MachineOperand &Ptr = MI.getOperand(0); - MachineOperand &Src = MI.getOperand(2); - int64_t Imm = MI.getOperand(1).getImm(); - - // We can definitely optimise this better. - if (Imm > 63) { - // Push the previous state of the pointer register. - // This instruction must preserve the value. - buildMI(MBB, MBBI, AVR::PUSHWRr).addReg(Ptr.getReg()); - - // Add the immediate to the pointer register. - buildMI(MBB, MBBI, AVR::SBCIWRdK) - .addReg(Ptr.getReg(), RegState::Define) - .addReg(Ptr.getReg()) - .addImm(-Imm); - - // Store the value in the source register to the address - // pointed to by the pointer register. - buildMI(MBB, MBBI, AVR::STWPtrRr) - .addReg(Ptr.getReg()) - .addReg(Src.getReg(), getKillRegState(Src.isKill())); - - // Pop the original state of the pointer register. - buildMI(MBB, MBBI, AVR::POPWRd) - .addDef(Ptr.getReg(), getKillRegState(Ptr.isKill())); - - MI.removeFromParent(); - } - - return false; -} - -bool AVRRelaxMem::runOnInstruction(Block &MBB, BlockIt MBBI) { - MachineInstr &MI = *MBBI; - int Opcode = MBBI->getOpcode(); - -#define RELAX(Op) \ - case Op: \ - return relax(MBB, MI) - - switch (Opcode) { RELAX(AVR::STDWPtrQRr); } -#undef RELAX - return false; -} - -} // end of anonymous namespace - -INITIALIZE_PASS(AVRRelaxMem, "avr-relax-mem", AVR_RELAX_MEM_OPS_NAME, false, - false) - -namespace llvm { - -FunctionPass *createAVRRelaxMemPass() { return new AVRRelaxMem(); } - -} // end of namespace llvm diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h index f8ca191b1868..2325193bac0a 100644 --- a/llvm/lib/Target/AVR/AVRSubtarget.h +++ b/llvm/lib/Target/AVR/AVRSubtarget.h @@ -91,8 +91,16 @@ public: return ELFArch; } - /// Get I/O register address. - int getIORegRAMPZ(void) const { return 0x3b; } + /// Get I/O register addresses. + int getIORegRAMPZ(void) const { return hasELPM() ? 0x3b : -1; } + int getIORegEIND(void) const { return hasEIJMPCALL() ? 0x3c : -1; } + int getIORegSPL(void) const { return 0x3d; } + int getIORegSPH(void) const { return hasSmallStack() ? -1 : 0x3e; } + int getIORegSREG(void) const { return 0x3f; } + + /// Get GPR aliases. + int getRegTmpIndex(void) const { return hasTinyEncoding() ? 16 : 0; } + int getRegZeroIndex(void) const { return hasTinyEncoding() ? 17 : 1; } private: /// The ELF e_flags architecture. diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp index 22b9ba3ece07..b9d77e0d1a51 100644 --- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp +++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp @@ -38,7 +38,7 @@ static StringRef getCPU(StringRef CPU) { } static Reloc::Model getEffectiveRelocModel(Optional RM) { - return RM.getValueOr(Reloc::Static); + return RM.value_or(Reloc::Static); } AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT, @@ -92,7 +92,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTarget() { auto &PR = *PassRegistry::getPassRegistry(); initializeAVRExpandPseudoPass(PR); - initializeAVRRelaxMemPass(PR); initializeAVRShiftExpandPass(PR); } @@ -118,7 +117,6 @@ bool AVRPassConfig::addInstSelector() { } void AVRPassConfig::addPreSched2() { - addPass(createAVRRelaxMemPass()); addPass(createAVRExpandPseudoPass()); } diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index f19e7840eb31..9e1c7b781f0f 100644 --- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -43,6 +43,10 @@ class AVRAsmParser : public MCTargetAsmParser { const MCRegisterInfo *MRI; const std::string GENERATE_STUBS = "gs"; + enum AVRMatchResultTy { + Match_InvalidRegisterOnTiny = FIRST_TARGET_MATCH_RESULT_TY + 1, + }; + #define GET_ASSEMBLER_HEADER #include "AVRGenAsmMatcher.inc" @@ -332,6 +336,8 @@ bool AVRAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode, return invalidOperand(Loc, Operands, ErrorInfo); case Match_MnemonicFail: return Error(Loc, "invalid instruction"); + case Match_InvalidRegisterOnTiny: + return Error(Loc, "invalid register on avrtiny"); default: return true; } @@ -399,6 +405,11 @@ bool AVRAsmParser::tryParseRegisterOperand(OperandVector &Operands) { if (RegNo == AVR::NoRegister) return true; + // Reject R0~R15 on avrtiny. + if (AVR::R0 <= RegNo && RegNo <= AVR::R15 && + STI.hasFeature(AVR::FeatureTinyEncoding)) + return Error(Parser.getTok().getLoc(), "invalid register on avrtiny"); + AsmToken const &T = Parser.getTok(); Operands.push_back(AVROperand::CreateReg(RegNo, T.getLoc(), T.getEndLoc())); Parser.Lex(); // Eat register token. @@ -726,6 +737,12 @@ unsigned AVRAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, if (Op.isImm()) { if (MCConstantExpr const *Const = dyn_cast(Op.getImm())) { int64_t RegNum = Const->getValue(); + + // Reject R0~R15 on avrtiny. + if (0 <= RegNum && RegNum <= 15 && + STI.hasFeature(AVR::FeatureTinyEncoding)) + return Match_InvalidRegisterOnTiny; + std::ostringstream RegName; RegName << "r" << RegNum; RegNum = MatchRegisterName(RegName.str()); diff --git a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp index 9dcd370b9f1e..ee0ae08e192f 100644 --- a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp +++ b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp @@ -18,8 +18,8 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/TargetRegistry.h" @@ -36,7 +36,7 @@ class AVRDisassembler : public MCDisassembler { public: AVRDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} - virtual ~AVRDisassembler() {} + virtual ~AVRDisassembler() = default; DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, @@ -66,7 +66,7 @@ static const uint16_t GPRDecoderTable[] = { static DecodeStatus DecodeGPR8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -77,7 +77,7 @@ static DecodeStatus DecodeGPR8RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeLD8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; @@ -86,48 +86,51 @@ static DecodeStatus DecodeLD8RegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodePTRREGSRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - // Note: this function must be defined but does not seem to be called. - assert(false && "unimplemented: PTRREGS register class"); - return MCDisassembler::Success; -} - static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus decodeCallTarget(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus decodeFFMULRdRr(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus decodeFMOVWRdRr(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus decodeFMUL2RdRr(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); + +static DecodeStatus decodeMemri(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); + +static DecodeStatus decodeLoadStore(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder); #include "AVRGenDisassemblerTables.inc" static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned addr = 0; addr |= fieldFromInstruction(Insn, 0, 4); addr |= fieldFromInstruction(Insn, 9, 2) << 4; @@ -140,7 +143,7 @@ static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn, uint64_t Address, } static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned addr = 0; addr |= fieldFromInstruction(Insn, 0, 4); addr |= fieldFromInstruction(Insn, 9, 2) << 4; @@ -153,7 +156,7 @@ static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn, uint64_t Address, } static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned addr = fieldFromInstruction(Insn, 3, 5); unsigned b = fieldFromInstruction(Insn, 0, 3); Inst.addOperand(MCOperand::createImm(addr)); @@ -162,7 +165,8 @@ static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn, uint64_t Address, } static DecodeStatus decodeCallTarget(MCInst &Inst, unsigned Field, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // Call targets need to be shifted left by one so this needs a custom // decoder. Inst.addOperand(MCOperand::createImm(Field << 1)); @@ -170,7 +174,7 @@ static DecodeStatus decodeCallTarget(MCInst &Inst, unsigned Field, } static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned d = fieldFromInstruction(Insn, 4, 5); if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail) @@ -179,7 +183,7 @@ static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn, uint64_t Address, } static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (decodeFRd(Inst, Insn, Address, Decoder) == MCDisassembler::Fail) return MCDisassembler::Fail; Inst.addOperand(MCOperand::createReg(AVR::R31R30)); @@ -187,7 +191,8 @@ static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn, uint64_t Address, } static DecodeStatus decodeFFMULRdRr(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned d = fieldFromInstruction(Insn, 4, 3) + 16; unsigned r = fieldFromInstruction(Insn, 0, 3) + 16; if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == @@ -200,7 +205,8 @@ static DecodeStatus decodeFFMULRdRr(MCInst &Inst, unsigned Insn, } static DecodeStatus decodeFMOVWRdRr(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned r = fieldFromInstruction(Insn, 4, 4) * 2; unsigned d = fieldFromInstruction(Insn, 0, 4) * 2; if (DecodeGPR8RegisterClass(Inst, r, Address, Decoder) == @@ -213,7 +219,7 @@ static DecodeStatus decodeFMOVWRdRr(MCInst &Inst, unsigned Insn, } static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned d = fieldFromInstruction(Insn, 4, 2) * 2 + 24; // starts at r24:r25 unsigned k = 0; k |= fieldFromInstruction(Insn, 0, 4); @@ -229,7 +235,8 @@ static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn, uint64_t Address, } static DecodeStatus decodeFMUL2RdRr(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned rd = fieldFromInstruction(Insn, 4, 4) + 16; unsigned rr = fieldFromInstruction(Insn, 0, 4) + 16; if (DecodeGPR8RegisterClass(Inst, rd, Address, Decoder) == @@ -241,6 +248,128 @@ static DecodeStatus decodeFMUL2RdRr(MCInst &Inst, unsigned Insn, return MCDisassembler::Success; } +static DecodeStatus decodeMemri(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { + // As in the EncoderMethod `AVRMCCodeEmitter::encodeMemri`, the memory + // address is encoded into 7-bit, in which bits 0-5 are the immediate offset, + // and the bit-6 is the pointer register bit (Z=0, Y=1). + if (Insn > 127) + return MCDisassembler::Fail; + + // Append the base register operand. + Inst.addOperand( + MCOperand::createReg((Insn & 0x40) ? AVR::R29R28 : AVR::R31R30)); + // Append the immediate offset operand. + Inst.addOperand(MCOperand::createImm(Insn & 0x3f)); + + return MCDisassembler::Success; +} + +static DecodeStatus decodeLoadStore(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { + // Get the register will be loaded or stored. + unsigned RegVal = GPRDecoderTable[(Insn >> 4) & 0x1f]; + + // Decode LDD/STD with offset less than 8. + if ((Insn & 0xf000) == 0x8000) { + unsigned RegBase = (Insn & 0x8) ? AVR::R29R28 : AVR::R31R30; + unsigned Offset = Insn & 7; // We need not consider offset > 7. + if ((Insn & 0x200) == 0) { // Decode LDD. + Inst.setOpcode(AVR::LDDRdPtrQ); + Inst.addOperand(MCOperand::createReg(RegVal)); + Inst.addOperand(MCOperand::createReg(RegBase)); + Inst.addOperand(MCOperand::createImm(Offset)); + } else { // Decode STD. + Inst.setOpcode(AVR::STDPtrQRr); + Inst.addOperand(MCOperand::createReg(RegBase)); + Inst.addOperand(MCOperand::createImm(Offset)); + Inst.addOperand(MCOperand::createReg(RegVal)); + } + return MCDisassembler::Success; + } + + // Decode the following 14 instructions. Bit 9 indicates load(0) or store(1), + // bits 8~4 indicate the value register, bits 3-2 indicate the base address + // register (11-X, 10-Y, 00-Z), bits 1~0 indicate the mode (00-basic, + // 01-postinc, 10-predec). + // ST X, Rr : 1001 001r rrrr 1100 + // ST X+, Rr : 1001 001r rrrr 1101 + // ST -X, Rr : 1001 001r rrrr 1110 + // ST Y+, Rr : 1001 001r rrrr 1001 + // ST -Y, Rr : 1001 001r rrrr 1010 + // ST Z+, Rr : 1001 001r rrrr 0001 + // ST -Z, Rr : 1001 001r rrrr 0010 + // LD Rd, X : 1001 000d dddd 1100 + // LD Rd, X+ : 1001 000d dddd 1101 + // LD Rd, -X : 1001 000d dddd 1110 + // LD Rd, Y+ : 1001 000d dddd 1001 + // LD Rd, -Y : 1001 000d dddd 1010 + // LD Rd, Z+ : 1001 000d dddd 0001 + // LD Rd, -Z : 1001 000d dddd 0010 + if ((Insn & 0xfc00) != 0x9000 || (Insn & 0xf) == 0) + return MCDisassembler::Fail; + + // Get the base address register. + unsigned RegBase; + switch (Insn & 0xc) { + case 0xc: + RegBase = AVR::R27R26; + break; + case 0x8: + RegBase = AVR::R29R28; + break; + case 0x0: + RegBase = AVR::R31R30; + break; + default: + return MCDisassembler::Fail; + } + + // Set the opcode. + switch (Insn & 0x203) { + case 0x200: + Inst.setOpcode(AVR::STPtrRr); + Inst.addOperand(MCOperand::createReg(RegBase)); + Inst.addOperand(MCOperand::createReg(RegVal)); + return MCDisassembler::Success; + case 0x201: + Inst.setOpcode(AVR::STPtrPiRr); + break; + case 0x202: + Inst.setOpcode(AVR::STPtrPdRr); + break; + case 0: + Inst.setOpcode(AVR::LDRdPtr); + Inst.addOperand(MCOperand::createReg(RegVal)); + Inst.addOperand(MCOperand::createReg(RegBase)); + return MCDisassembler::Success; + case 1: + Inst.setOpcode(AVR::LDRdPtrPi); + break; + case 2: + Inst.setOpcode(AVR::LDRdPtrPd); + break; + default: + return MCDisassembler::Fail; + } + + // Build postinc/predec machine instructions. + if ((Insn & 0x200) == 0) { // This is a load instruction. + Inst.addOperand(MCOperand::createReg(RegVal)); + Inst.addOperand(MCOperand::createReg(RegBase)); + Inst.addOperand(MCOperand::createReg(RegBase)); + } else { // This is a store instruction. + Inst.addOperand(MCOperand::createReg(RegBase)); + Inst.addOperand(MCOperand::createReg(RegBase)); + Inst.addOperand(MCOperand::createReg(RegVal)); + // STPtrPiRr and STPtrPdRr have an extra immediate operand. + Inst.addOperand(MCOperand::createImm(1)); + } + + return MCDisassembler::Success; +} + static DecodeStatus readInstruction16(ArrayRef Bytes, uint64_t Address, uint64_t &Size, uint32_t &Insn) { if (Bytes.size() < 2) { @@ -299,7 +428,12 @@ DecodeStatus AVRDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, // Try to auto-decode a 16-bit instruction. Result = decodeInstruction(getDecoderTable(Size), Instr, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) + return Result; + // Try to decode to a load/store instruction. ST/LD need a specified + // DecoderMethod, as they already have a specified PostEncoderMethod. + Result = decodeLoadStore(Instr, Insn, Address, this); if (Result != MCDisassembler::Fail) return Result; } @@ -323,4 +457,4 @@ DecodeStatus AVRDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, } typedef DecodeStatus (*DecodeFunc)(MCInst &MI, unsigned insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp index b90e103794da..850ddf0d9458 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp @@ -25,7 +25,7 @@ class AVRELFObjectWriter : public MCELFObjectTargetWriter { public: AVRELFObjectWriter(uint8_t OSABI); - virtual ~AVRELFObjectWriter() {} + virtual ~AVRELFObjectWriter() = default; unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp index 85933d6b9bb9..ade5df18c3b9 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp @@ -1,6 +1,7 @@ #include "AVRELFStreamer.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/FormattedStream.h" diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h index 11f55f6d253b..54dad3098385 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h @@ -43,6 +43,9 @@ private: printPCRelImm(MI, OpNo, O); } void printMemri(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemspi(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemri(MI, OpNo, O); + } // Autogenerated by TableGen. std::pair getMnemonic(const MCInst *MI) override; diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp index 9754ff7f1146..c8bb410e4882 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp @@ -295,7 +295,6 @@ void AVRMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, } MCCodeEmitter *createAVRMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new AVRMCCodeEmitter(MCII, Ctx); } diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h index 68589763f29a..5bf6c1a581e3 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h @@ -84,7 +84,7 @@ private: private: explicit AVRMCExpr(VariantKind Kind, const MCExpr *Expr, bool Negated) : Kind(Kind), SubExpr(Expr), Negated(Negated) {} - ~AVRMCExpr() {} + ~AVRMCExpr() = default; }; } // end namespace llvm diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h index ef116793d326..aaf236d82016 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h @@ -33,7 +33,6 @@ MCInstrInfo *createAVRMCInstrInfo(); /// Creates a machine code emitter for AVR. MCCodeEmitter *createAVRMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); /// Creates an assembly backend for AVR. diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp index 697deb117bcb..4c064d65d919 100644 --- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp +++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h index 89990f7e15c2..3de761bf6601 100644 --- a/llvm/lib/Target/BPF/BPF.h +++ b/llvm/lib/Target/BPF/BPF.h @@ -11,6 +11,8 @@ #include "MCTargetDesc/BPFMCTargetDesc.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" #include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 46141e69d9d4..349cdd92ae62 100644 --- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -77,6 +77,7 @@ #include "BPF.h" #include "BPFCORE.h" #include "BPFTargetMachine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" @@ -123,7 +124,7 @@ public: struct CallInfo { uint32_t Kind; uint32_t AccessIndex; - Align RecordAlignment; + MaybeAlign RecordAlignment; MDNode *Metadata; Value *Base; }; @@ -142,9 +143,9 @@ private: Module *M = nullptr; static std::map GEPGlobals; - // A map to link preserve_*_access_index instrinsic calls. + // A map to link preserve_*_access_index intrinsic calls. std::map> AIChain; - // A map to hold all the base preserve_*_access_index instrinsic calls. + // A map to hold all the base preserve_*_access_index intrinsic calls. // The base call is not an input of any other preserve_* // intrinsics. std::map BaseAICalls; @@ -169,7 +170,7 @@ private: uint32_t &StartBitOffset, uint32_t &EndBitOffset); uint32_t GetFieldInfo(uint32_t InfoKind, DICompositeType *CTy, uint32_t AccessIndex, uint32_t PatchImm, - Align RecordAlignment); + MaybeAlign RecordAlignment); Value *computeBaseAndAccessKey(CallInst *Call, CallInfo &CInfo, std::string &AccessKey, MDNode *&BaseMeta); @@ -270,7 +271,7 @@ static uint32_t calcArraySize(const DICompositeType *CTy, uint32_t StartDim) { static Type *getBaseElementType(const CallInst *Call) { // Element type is stored in an elementtype() attribute on the first param. - return Call->getAttributes().getParamElementType(0); + return Call->getParamElementType(0); } /// Check whether a call is a preserve_*_access_index intrinsic call or not. @@ -299,8 +300,6 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call, report_fatal_error("Missing metadata for llvm.preserve.union.access.index intrinsic"); CInfo.AccessIndex = getConstant(Call->getArgOperand(1)); CInfo.Base = Call->getArgOperand(0); - CInfo.RecordAlignment = - DL->getABITypeAlign(CInfo.Base->getType()->getPointerElementType()); return true; } if (GV->getName().startswith("llvm.preserve.struct.access.index")) { @@ -333,6 +332,8 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call, report_fatal_error("Incorrect flag for llvm.bpf.preserve.type.info intrinsic"); if (Flag == BPFCoreSharedInfo::PRESERVE_TYPE_INFO_EXISTENCE) CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_EXISTENCE; + else if (Flag == BPFCoreSharedInfo::PRESERVE_TYPE_INFO_MATCH) + CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_MATCH; else CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_SIZE; return true; @@ -592,10 +593,20 @@ void BPFAbstractMemberAccess::GetStorageBitRange(DIDerivedType *MemberTy, uint32_t &EndBitOffset) { uint32_t MemberBitSize = MemberTy->getSizeInBits(); uint32_t MemberBitOffset = MemberTy->getOffsetInBits(); + + if (RecordAlignment > 8) { + // If the Bits are within an aligned 8-byte, set the RecordAlignment + // to 8, other report the fatal error. + if (MemberBitOffset / 64 != (MemberBitOffset + MemberBitSize) / 64) + report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info, " + "requiring too big alignment"); + RecordAlignment = Align(8); + } + uint32_t AlignBits = RecordAlignment.value() * 8; - if (RecordAlignment > 8 || MemberBitSize > AlignBits) + if (MemberBitSize > AlignBits) report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info, " - "requiring too big alignment"); + "bitfield size greater than record alignment"); StartBitOffset = MemberBitOffset & ~(AlignBits - 1); if ((StartBitOffset + AlignBits) < (MemberBitOffset + MemberBitSize)) @@ -608,7 +619,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind, DICompositeType *CTy, uint32_t AccessIndex, uint32_t PatchImm, - Align RecordAlignment) { + MaybeAlign RecordAlignment) { if (InfoKind == BPFCoreSharedInfo::FIELD_EXISTENCE) return 1; @@ -624,7 +635,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind, PatchImm += MemberTy->getOffsetInBits() >> 3; } else { unsigned SBitOffset, NextSBitOffset; - GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, + GetStorageBitRange(MemberTy, *RecordAlignment, SBitOffset, NextSBitOffset); PatchImm += SBitOffset >> 3; } @@ -643,7 +654,8 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind, return SizeInBits >> 3; unsigned SBitOffset, NextSBitOffset; - GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset); + GetStorageBitRange(MemberTy, *RecordAlignment, SBitOffset, + NextSBitOffset); SizeInBits = NextSBitOffset - SBitOffset; if (SizeInBits & (SizeInBits - 1)) report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info"); @@ -703,7 +715,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind, } unsigned SBitOffset, NextSBitOffset; - GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset); + GetStorageBitRange(MemberTy, *RecordAlignment, SBitOffset, NextSBitOffset); if (NextSBitOffset - SBitOffset > 64) report_fatal_error("too big field size for llvm.bpf.preserve.field.info"); @@ -734,7 +746,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind, } unsigned SBitOffset, NextSBitOffset; - GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset); + GetStorageBitRange(MemberTy, *RecordAlignment, SBitOffset, NextSBitOffset); if (NextSBitOffset - SBitOffset > 64) report_fatal_error("too big field size for llvm.bpf.preserve.field.info"); @@ -923,7 +935,8 @@ MDNode *BPFAbstractMemberAccess::computeAccessKey(CallInst *Call, int64_t PatchImm; std::string AccessStr("0"); - if (CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_EXISTENCE) { + if (CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_EXISTENCE || + CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_MATCH) { PatchImm = 1; } else if (CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_SIZE) { // typedef debuginfo type has size 0, get the eventual base type. @@ -933,8 +946,11 @@ MDNode *BPFAbstractMemberAccess::computeAccessKey(CallInst *Call, // ENUM_VALUE_EXISTENCE and ENUM_VALUE IsInt32Ret = false; - const auto *CE = cast(Call->getArgOperand(1)); - const GlobalVariable *GV = cast(CE->getOperand(0)); + // The argument could be a global variable or a getelementptr with base to + // a global variable depending on whether the clang option `opaque-options` + // is set or not. + const GlobalVariable *GV = + cast(Call->getArgOperand(1)->stripPointerCasts()); assert(GV->hasInitializer()); const ConstantDataArray *DA = cast(GV->getInitializer()); assert(DA->isString()); diff --git a/llvm/lib/Target/BPF/BPFAdjustOpt.cpp b/llvm/lib/Target/BPF/BPFAdjustOpt.cpp index 69d0bca0bd77..98f8d59fbe01 100644 --- a/llvm/lib/Target/BPF/BPFAdjustOpt.cpp +++ b/llvm/lib/Target/BPF/BPFAdjustOpt.cpp @@ -259,10 +259,16 @@ bool BPFAdjustOptImpl::serializeICMPCrossBB(BasicBlock &BB) { return false; if (Cond1Op == ICmpInst::ICMP_SGT || Cond1Op == ICmpInst::ICMP_SGE) { - if (Cond2Op != ICmpInst::ICMP_SLT && Cond1Op != ICmpInst::ICMP_SLE) + if (Cond2Op != ICmpInst::ICMP_SLT && Cond2Op != ICmpInst::ICMP_SLE) return false; } else if (Cond1Op == ICmpInst::ICMP_SLT || Cond1Op == ICmpInst::ICMP_SLE) { - if (Cond2Op != ICmpInst::ICMP_SGT && Cond1Op != ICmpInst::ICMP_SGE) + if (Cond2Op != ICmpInst::ICMP_SGT && Cond2Op != ICmpInst::ICMP_SGE) + return false; + } else if (Cond1Op == ICmpInst::ICMP_ULT || Cond1Op == ICmpInst::ICMP_ULE) { + if (Cond2Op != ICmpInst::ICMP_UGT && Cond2Op != ICmpInst::ICMP_UGE) + return false; + } else if (Cond1Op == ICmpInst::ICMP_UGT || Cond1Op == ICmpInst::ICMP_UGE) { + if (Cond2Op != ICmpInst::ICMP_ULT && Cond2Op != ICmpInst::ICMP_ULE) return false; } else { return false; diff --git a/llvm/lib/Target/BPF/BPFCORE.h b/llvm/lib/Target/BPF/BPFCORE.h index 0c504412480d..c9aa135232c1 100644 --- a/llvm/lib/Target/BPF/BPFCORE.h +++ b/llvm/lib/Target/BPF/BPFCORE.h @@ -32,6 +32,7 @@ public: TYPE_SIZE, ENUM_VALUE_EXISTENCE, ENUM_VALUE, + TYPE_MATCH, MAX_FIELD_RELOC_KIND, }; @@ -46,6 +47,7 @@ public: enum PreserveTypeInfo : uint32_t { PRESERVE_TYPE_INFO_EXISTENCE = 0, PRESERVE_TYPE_INFO_SIZE, + PRESERVE_TYPE_INFO_MATCH, MAX_PRESERVE_TYPE_INFO_FLAG, }; diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index 0587cb0e16e3..16876e74c4a1 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -103,7 +103,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); @@ -168,6 +167,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0; MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0; MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0; + MaxLoadsPerMemcmp = 0; } else { // inline memcpy() for kernel to see explicit copy unsigned CommonMaxStores = @@ -176,6 +176,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores; MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores; MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores; + MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores; } // CPU/Feature control diff --git a/llvm/lib/Target/BPF/BPFInstrFormats.td b/llvm/lib/Target/BPF/BPFInstrFormats.td index a809065014e5..27db0be080ae 100644 --- a/llvm/lib/Target/BPF/BPFInstrFormats.td +++ b/llvm/lib/Target/BPF/BPFInstrFormats.td @@ -39,6 +39,7 @@ def BPF_AND : BPFArithOp<0x5>; def BPF_LSH : BPFArithOp<0x6>; def BPF_RSH : BPFArithOp<0x7>; def BPF_NEG : BPFArithOp<0x8>; +def BPF_MOD : BPFArithOp<0x9>; def BPF_XOR : BPFArithOp<0xa>; def BPF_MOV : BPFArithOp<0xb>; def BPF_ARSH : BPFArithOp<0xc>; diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/llvm/lib/Target/BPF/BPFInstrInfo.cpp index 54360a89782b..e61e32b62d83 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.cpp +++ b/llvm/lib/Target/BPF/BPFInstrInfo.cpp @@ -192,8 +192,7 @@ bool BPFInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } // If the block has any instructions after a J, delete them. - while (std::next(I) != MBB.end()) - std::next(I)->eraseFromParent(); + MBB.erase(std::next(I), MBB.end()); Cond.clear(); FBB = nullptr; diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index 082e1f4a92c2..6cac478561b2 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -298,6 +298,7 @@ let isAsCheapAsAMove = 1 in { } defm MUL : ALU; + defm MOD : ALUisPHI()) { - if (PhiInsns.find(PhiDef) != PhiInsns.end()) + if (!PhiInsns.insert(PhiDef).second) return false; - PhiInsns.insert(PhiDef); if (!isPhiFrom32Def(PhiDef)) return false; } @@ -143,9 +143,8 @@ bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn) return false; if (DefInsn->isPHI()) { - if (PhiInsns.find(DefInsn) != PhiInsns.end()) + if (!PhiInsns.insert(DefInsn).second) return false; - PhiInsns.insert(DefInsn); if (!isPhiFrom32Def(DefInsn)) return false; } else if (DefInsn->getOpcode() == BPF::COPY) { diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp index b4232875383c..088195994edd 100644 --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -31,9 +31,11 @@ #include "BPFCORE.h" #include "BPFInstrInfo.h" #include "BPFTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" +#include using namespace llvm; @@ -52,9 +54,12 @@ struct BPFMISimplifyPatchable : public MachineFunctionPass { } private: + std::set SkipInsts; + // Initialize class variables. void initialize(MachineFunction &MFParm); + bool isLoadInst(unsigned Opcode); bool removeLD(); void processCandidate(MachineRegisterInfo *MRI, MachineBasicBlock &MBB, MachineInstr &MI, Register &SrcReg, Register &DstReg, @@ -88,6 +93,12 @@ void BPFMISimplifyPatchable::initialize(MachineFunction &MFParm) { LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n"); } +bool BPFMISimplifyPatchable::isLoadInst(unsigned Opcode) { + return Opcode == BPF::LDD || Opcode == BPF::LDW || Opcode == BPF::LDH || + Opcode == BPF::LDB || Opcode == BPF::LDW32 || Opcode == BPF::LDH32 || + Opcode == BPF::LDB32; +} + void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI, MachineOperand *RelocOp, const GlobalValue *GVal) { const MachineInstr *Inst = RelocOp->getParent(); @@ -229,6 +240,11 @@ void BPFMISimplifyPatchable::processDstReg(MachineRegisterInfo *MRI, void BPFMISimplifyPatchable::processInst(MachineRegisterInfo *MRI, MachineInstr *Inst, MachineOperand *RelocOp, const GlobalValue *GVal) { unsigned Opcode = Inst->getOpcode(); + if (isLoadInst(Opcode)) { + SkipInsts.insert(Inst); + return; + } + if (Opcode == BPF::ADD_rr) checkADDrr(MRI, RelocOp, GVal); else if (Opcode == BPF::SLL_rr) @@ -253,10 +269,10 @@ bool BPFMISimplifyPatchable::removeLD() { } // Ensure the register format is LOAD , , 0 - if (MI.getOpcode() != BPF::LDD && MI.getOpcode() != BPF::LDW && - MI.getOpcode() != BPF::LDH && MI.getOpcode() != BPF::LDB && - MI.getOpcode() != BPF::LDW32 && MI.getOpcode() != BPF::LDH32 && - MI.getOpcode() != BPF::LDB32) + if (!isLoadInst(MI.getOpcode())) + continue; + + if (SkipInsts.find(&MI) != SkipInsts.end()) continue; if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg()) diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp index 6dfb7dc39922..8c58aae5b618 100644 --- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp +++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp @@ -12,6 +12,7 @@ #include "BPF.h" #include "BPFCORE.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp index 2fb76ab5c440..97d9ed3cad47 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -59,7 +59,7 @@ static std::string computeDataLayout(const Triple &TT) { } static Reloc::Model getEffectiveRelocModel(Optional RM) { - return RM.getValueOr(Reloc::PIC_); + return RM.value_or(Reloc::PIC_); } BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT, @@ -149,7 +149,7 @@ void BPFPassConfig::addIRPasses() { } TargetTransformInfo -BPFTargetMachine::getTargetTransformInfo(const Function &F) { +BPFTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(BPFTTIImpl(this, F)); } diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.h b/llvm/lib/Target/BPF/BPFTargetMachine.h index 98f64ccc3793..fede52089725 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.h +++ b/llvm/lib/Target/BPF/BPFTargetMachine.h @@ -34,7 +34,7 @@ public: TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h index 6b86bf6e6cc1..0c8f9604b665 100644 --- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -71,6 +71,15 @@ public: Opd2Info, Opd1PropInfo, Opd2PropInfo); } + + TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, + bool IsZeroCmp) const { + TTI::MemCmpExpansionOptions Options; + Options.LoadSizes = {8, 4, 2, 1}; + Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); + return Options; + } + }; } // end namespace llvm diff --git a/llvm/lib/Target/BPF/BTF.def b/llvm/lib/Target/BPF/BTF.def index 0ae4194bc512..1de0e51b4757 100644 --- a/llvm/lib/Target/BPF/BTF.def +++ b/llvm/lib/Target/BPF/BTF.def @@ -33,5 +33,6 @@ HANDLE_BTF_KIND(15, DATASEC) HANDLE_BTF_KIND(16, FLOAT) HANDLE_BTF_KIND(17, DECL_TAG) HANDLE_BTF_KIND(18, TYPE_TAG) +HANDLE_BTF_KIND(19, ENUM64) #undef HANDLE_BTF_KIND diff --git a/llvm/lib/Target/BPF/BTF.h b/llvm/lib/Target/BPF/BTF.h index e54b97cd49a9..4540054aaf34 100644 --- a/llvm/lib/Target/BPF/BTF.h +++ b/llvm/lib/Target/BPF/BTF.h @@ -60,6 +60,7 @@ enum { CommonTypeSize = 12, BTFArraySize = 12, BTFEnumSize = 8, + BTFEnum64Size = 12, BTFMemberSize = 12, BTFParamSize = 8, BTFDataSecVarSize = 12, @@ -145,6 +146,15 @@ struct BTFEnum { int32_t Val; ///< Enum member value }; +/// BTF_KIND_ENUM64 is followed by multiple "struct BTFEnum64". +/// The exact number of BTFEnum64 is stored in the vlen (of the +/// info in "struct CommonType"). +struct BTFEnum64 { + uint32_t NameOff; ///< Enum name offset in the string table + uint32_t Val_Lo32; ///< Enum member lo32 value + uint32_t Val_Hi32; ///< Enum member hi32 value +}; + /// BTF_KIND_ARRAY is followed by one "struct BTFArray". struct BTFArray { uint32_t ElemType; ///< Element type diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index d536aed1d211..a949e925eb60 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; @@ -161,9 +162,10 @@ void BTFTypeInt::emitType(MCStreamer &OS) { OS.emitInt32(IntVal); } -BTFTypeEnum::BTFTypeEnum(const DICompositeType *ETy, uint32_t VLen) : ETy(ETy) { +BTFTypeEnum::BTFTypeEnum(const DICompositeType *ETy, uint32_t VLen, + bool IsSigned) : ETy(ETy) { Kind = BTF::BTF_KIND_ENUM; - BTFType.Info = Kind << 24 | VLen; + BTFType.Info = IsSigned << 31 | Kind << 24 | VLen; BTFType.Size = roundupToBytes(ETy->getSizeInBits()); } @@ -199,6 +201,48 @@ void BTFTypeEnum::emitType(MCStreamer &OS) { } } +BTFTypeEnum64::BTFTypeEnum64(const DICompositeType *ETy, uint32_t VLen, + bool IsSigned) : ETy(ETy) { + Kind = BTF::BTF_KIND_ENUM64; + BTFType.Info = IsSigned << 31 | Kind << 24 | VLen; + BTFType.Size = roundupToBytes(ETy->getSizeInBits()); +} + +void BTFTypeEnum64::completeType(BTFDebug &BDebug) { + if (IsCompleted) + return; + IsCompleted = true; + + BTFType.NameOff = BDebug.addString(ETy->getName()); + + DINodeArray Elements = ETy->getElements(); + for (const auto Element : Elements) { + const auto *Enum = cast(Element); + + struct BTF::BTFEnum64 BTFEnum; + BTFEnum.NameOff = BDebug.addString(Enum->getName()); + uint64_t Value; + if (Enum->isUnsigned()) + Value = static_cast(Enum->getValue().getZExtValue()); + else + Value = static_cast(Enum->getValue().getSExtValue()); + BTFEnum.Val_Lo32 = Value; + BTFEnum.Val_Hi32 = Value >> 32; + EnumValues.push_back(BTFEnum); + } +} + +void BTFTypeEnum64::emitType(MCStreamer &OS) { + BTFTypeBase::emitType(OS); + for (const auto &Enum : EnumValues) { + OS.emitInt32(Enum.NameOff); + OS.AddComment("0x" + Twine::utohexstr(Enum.Val_Lo32)); + OS.emitInt32(Enum.Val_Lo32); + OS.AddComment("0x" + Twine::utohexstr(Enum.Val_Hi32)); + OS.emitInt32(Enum.Val_Hi32); + } +} + BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t NumElems) { Kind = BTF::BTF_KIND_ARRAY; BTFType.NameOff = 0; @@ -552,6 +596,46 @@ void BTFDebug::processDeclAnnotations(DINodeArray Annotations, } } +/// Generate btf_type_tag chains. +int BTFDebug::genBTFTypeTags(const DIDerivedType *DTy, int BaseTypeId) { + SmallVector MDStrs; + DINodeArray Annots = DTy->getAnnotations(); + if (Annots) { + // For type with "int __tag1 __tag2 *p", the MDStrs will have + // content: [__tag1, __tag2]. + for (const Metadata *Annotations : Annots->operands()) { + const MDNode *MD = cast(Annotations); + const MDString *Name = cast(MD->getOperand(0)); + if (!Name->getString().equals("btf_type_tag")) + continue; + MDStrs.push_back(cast(MD->getOperand(1))); + } + } + + if (MDStrs.size() == 0) + return -1; + + // With MDStrs [__tag1, __tag2], the output type chain looks like + // PTR -> __tag2 -> __tag1 -> BaseType + // In the below, we construct BTF types with the order of __tag1, __tag2 + // and PTR. + unsigned TmpTypeId; + std::unique_ptr TypeEntry; + if (BaseTypeId >= 0) + TypeEntry = + std::make_unique(BaseTypeId, MDStrs[0]->getString()); + else + TypeEntry = std::make_unique(DTy, MDStrs[0]->getString()); + TmpTypeId = addType(std::move(TypeEntry)); + + for (unsigned I = 1; I < MDStrs.size(); I++) { + const MDString *Value = MDStrs[I]; + TypeEntry = std::make_unique(TmpTypeId, Value->getString()); + TmpTypeId = addType(std::move(TypeEntry)); + } + return TmpTypeId; +} + /// Handle structure/union types. void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct, uint32_t &TypeId) { @@ -633,8 +717,25 @@ void BTFDebug::visitEnumType(const DICompositeType *CTy, uint32_t &TypeId) { if (VLen > BTF::MAX_VLEN) return; - auto TypeEntry = std::make_unique(CTy, VLen); - TypeId = addType(std::move(TypeEntry), CTy); + bool IsSigned = false; + unsigned NumBits = 32; + // No BaseType implies forward declaration in which case a + // BTFTypeEnum with Vlen = 0 is emitted. + if (CTy->getBaseType() != nullptr) { + const auto *BTy = cast(CTy->getBaseType()); + IsSigned = BTy->getEncoding() == dwarf::DW_ATE_signed || + BTy->getEncoding() == dwarf::DW_ATE_signed_char; + NumBits = BTy->getSizeInBits(); + } + + if (NumBits <= 32) { + auto TypeEntry = std::make_unique(CTy, VLen, IsSigned); + TypeId = addType(std::move(TypeEntry), CTy); + } else { + assert(NumBits == 64); + auto TypeEntry = std::make_unique(CTy, VLen, IsSigned); + TypeId = addType(std::move(TypeEntry), CTy); + } // No need to visit base type as BTF does not encode it. } @@ -684,9 +785,8 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId, /// pointee type will be replaced with either a real type or /// a forward declaration. auto TypeEntry = std::make_unique(DTy, Tag, true); - auto &Fixup = FixupDerivedTypes[CTy->getName()]; - Fixup.first = CTag == dwarf::DW_TAG_union_type; - Fixup.second.push_back(TypeEntry.get()); + auto &Fixup = FixupDerivedTypes[CTy]; + Fixup.push_back(std::make_pair(DTy, TypeEntry.get())); TypeId = addType(std::move(TypeEntry), DTy); return; } @@ -695,34 +795,8 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId, } if (Tag == dwarf::DW_TAG_pointer_type) { - SmallVector MDStrs; - DINodeArray Annots = DTy->getAnnotations(); - if (Annots) { - // For type with "int __tag1 __tag2 *p", the MDStrs will have - // content: [__tag1, __tag2]. - for (const Metadata *Annotations : Annots->operands()) { - const MDNode *MD = cast(Annotations); - const MDString *Name = cast(MD->getOperand(0)); - if (!Name->getString().equals("btf_type_tag")) - continue; - MDStrs.push_back(cast(MD->getOperand(1))); - } - } - - if (MDStrs.size() > 0) { - // With MDStrs [__tag1, __tag2], the output type chain looks like - // PTR -> __tag2 -> __tag1 -> BaseType - // In the below, we construct BTF types with the order of __tag1, __tag2 - // and PTR. - auto TypeEntry = - std::make_unique(DTy, MDStrs[0]->getString()); - unsigned TmpTypeId = addType(std::move(TypeEntry)); - for (unsigned I = 1; I < MDStrs.size(); I++) { - const MDString *Value = MDStrs[I]; - TypeEntry = - std::make_unique(TmpTypeId, Value->getString()); - TmpTypeId = addType(std::move(TypeEntry)); - } + int TmpTypeId = genBTFTypeTags(DTy, -1); + if (TmpTypeId >= 0) { auto TypeDEntry = std::make_unique(TmpTypeId, Tag, DTy->getName()); TypeId = addType(std::move(TypeDEntry), DTy); @@ -773,15 +847,31 @@ void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId, // already defined, we should keep moving to eventually // bring in types for "struct t". Otherwise, the "struct s2" // definition won't be correct. + // + // In the above, we have following debuginfo: + // {ptr, struct_member} -> typedef -> struct + // and BTF type for 'typedef' is generated while 'struct' may + // be in FixUp. But let us generalize the above to handle + // {different types} -> [various derived types]+ -> another type. + // For example, + // {func_param, struct_member} -> const -> ptr -> volatile -> struct + // We will traverse const/ptr/volatile which already have corresponding + // BTF types and generate type for 'struct' which might be in Fixup + // state. if (Ty && (!CheckPointer || !SeenPointer)) { if (const auto *DTy = dyn_cast(Ty)) { - unsigned Tag = DTy->getTag(); - if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type || - Tag == dwarf::DW_TAG_volatile_type || - Tag == dwarf::DW_TAG_restrict_type) { - uint32_t TmpTypeId; - visitTypeEntry(DTy->getBaseType(), TmpTypeId, CheckPointer, - SeenPointer); + while (DTy) { + const DIType *BaseTy = DTy->getBaseType(); + if (!BaseTy) + break; + + if (DIToIdMap.find(BaseTy) != DIToIdMap.end()) { + DTy = dyn_cast(BaseTy); + } else { + uint32_t TmpTypeId; + visitTypeEntry(BaseTy, TmpTypeId, CheckPointer, SeenPointer); + break; + } } } } @@ -908,7 +998,7 @@ void BTFDebug::emitBTFSection() { MCContext &Ctx = OS.getContext(); MCSectionELF *Sec = Ctx.getELFSection(".BTF", ELF::SHT_PROGBITS, 0); Sec->setAlignment(Align(4)); - OS.SwitchSection(Sec); + OS.switchSection(Sec); // Emit header. emitCommonHeader(); @@ -948,7 +1038,7 @@ void BTFDebug::emitBTFExtSection() { MCContext &Ctx = OS.getContext(); MCSectionELF *Sec = Ctx.getELFSection(".BTF.ext", ELF::SHT_PROGBITS, 0); Sec->setAlignment(Align(4)); - OS.SwitchSection(Sec); + OS.switchSection(Sec); // Emit header. emitCommonHeader(); @@ -1436,9 +1526,8 @@ void BTFDebug::processFuncPrototypes(const Function *F) { return; // Do not emit again if already emitted. - if (ProtoFunctions.find(F) != ProtoFunctions.end()) + if (!ProtoFunctions.insert(F).second) return; - ProtoFunctions.insert(F); uint32_t ProtoTypeId; const std::unordered_map FuncArgNames; @@ -1480,8 +1569,9 @@ void BTFDebug::endModule() { // Fixups for (auto &Fixup : FixupDerivedTypes) { - StringRef TypeName = Fixup.first; - bool IsUnion = Fixup.second.first; + const DICompositeType *CTy = Fixup.first; + StringRef TypeName = CTy->getName(); + bool IsUnion = CTy->getTag() == dwarf::DW_TAG_union_type; // Search through struct types uint32_t StructTypeId = 0; @@ -1497,8 +1587,15 @@ void BTFDebug::endModule() { StructTypeId = addType(std::move(FwdTypeEntry)); } - for (auto &DType : Fixup.second.second) { - DType->setPointeeType(StructTypeId); + for (auto &TypeInfo : Fixup.second) { + const DIDerivedType *DTy = TypeInfo.first; + BTFTypeDerived *BDType = TypeInfo.second; + + int TmpTypeId = genBTFTypeTags(DTy, StructTypeId); + if (TmpTypeId >= 0) + BDType->setPointeeType(TmpTypeId); + else + BDType->setPointeeType(StructTypeId); } } diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h index 7c30675c553c..1ad8ec5d918c 100644 --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -103,7 +103,7 @@ class BTFTypeEnum : public BTFTypeBase { std::vector EnumValues; public: - BTFTypeEnum(const DICompositeType *ETy, uint32_t NumValues); + BTFTypeEnum(const DICompositeType *ETy, uint32_t NumValues, bool IsSigned); uint32_t getSize() override { return BTFTypeBase::getSize() + EnumValues.size() * BTF::BTFEnumSize; } @@ -218,6 +218,20 @@ public: void emitType(MCStreamer &OS) override; }; +/// Handle 64-bit enumerate type. +class BTFTypeEnum64 : public BTFTypeBase { + const DICompositeType *ETy; + std::vector EnumValues; + +public: + BTFTypeEnum64(const DICompositeType *ETy, uint32_t NumValues, bool IsSigned); + uint32_t getSize() override { + return BTFTypeBase::getSize() + EnumValues.size() * BTF::BTFEnum64Size; + } + void completeType(BTFDebug &BDebug) override; + void emitType(MCStreamer &OS) override; +}; + class BTFTypeTypeTag : public BTFTypeBase { const DIDerivedType *DTy; StringRef Tag; @@ -289,7 +303,8 @@ class BTFDebug : public DebugHandlerBase { std::map> DataSecEntries; std::vector StructTypes; std::map> PatchImms; - std::map>> + std::map>> FixupDerivedTypes; std::setProtoFunctions; @@ -341,6 +356,13 @@ class BTFDebug : public DebugHandlerBase { void processDeclAnnotations(DINodeArray Annotations, uint32_t BaseTypeId, int ComponentId); + /// Generate BTF type_tag's. If BaseTypeId is nonnegative, the last + /// BTF type_tag in the chain points to BaseTypeId. Otherwise, it points to + /// the base type of DTy. Return the type id of the first BTF type_tag + /// in the chain. If no type_tag's are generated, a negative value + /// is returned. + int genBTFTypeTags(const DIDerivedType *DTy, int BaseTypeId); + /// Generate one field relocation record. void generatePatchImmReloc(const MCSymbol *ORSym, uint32_t RootId, const GlobalVariable *, bool IsAma); diff --git a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp index 3f643d47f934..aa408f8b65f7 100644 --- a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp +++ b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp @@ -15,9 +15,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/SubtargetFeature.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/MathExtras.h" #include @@ -99,7 +100,7 @@ static const unsigned GPRDecoderTable[] = { static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, - const void * /*Decoder*/) { + const MCDisassembler * /*Decoder*/) { if (RegNo > 11) return MCDisassembler::Fail; @@ -112,9 +113,9 @@ static const unsigned GPR32DecoderTable[] = { BPF::W0, BPF::W1, BPF::W2, BPF::W3, BPF::W4, BPF::W5, BPF::W6, BPF::W7, BPF::W8, BPF::W9, BPF::W10, BPF::W11}; -static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const void * /*Decoder*/) { +static DecodeStatus +DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, + const MCDisassembler * /*Decoder*/) { if (RegNo > 11) return MCDisassembler::Fail; @@ -124,7 +125,8 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus decodeMemoryOpValue(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Register = (Insn >> 16) & 0xf; if (Register > 11) return MCDisassembler::Fail; @@ -220,4 +222,4 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, } typedef DecodeStatus (*DecodeFunc)(MCInst &MI, unsigned insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index bacd00360f82..56fdd6766132 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -87,6 +87,11 @@ void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, } } else { assert(Fixup.getKind() == FK_PCRel_2); + + int64_t ByteOff = (int64_t)Value - 8; + if (ByteOff > INT16_MAX * 8 || ByteOff < INT16_MIN * 8) + report_fatal_error("Branch target out of insn range"); + Value = (uint16_t)((Value - 8) / 8); support::endian::write(&Data[Fixup.getOffset() + 2], Value, Endian); diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp index 200c72a07ed6..6f041584a955 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp @@ -15,6 +15,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" using namespace llvm; diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h index 3292c3e5ebb5..14f6b367b8c7 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h @@ -41,8 +41,6 @@ public: // section will be parsable, but with odd offsets and // line numbers, etc. CodePointerSize = 8; - - UseIntegratedAssembler = false; } void setDwarfUsesRelocationsAcrossSections(bool enable) { diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp index 12af92e0d198..a98d001097bc 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp @@ -73,15 +73,13 @@ private: } // end anonymous namespace MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { - return new BPFMCCodeEmitter(MCII, MRI, true); + return new BPFMCCodeEmitter(MCII, *Ctx.getRegisterInfo(), true); } MCCodeEmitter *llvm::createBPFbeMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { - return new BPFMCCodeEmitter(MCII, MRI, false); + return new BPFMCCodeEmitter(MCII, *Ctx.getRegisterInfo(), false); } unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI, diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index a426a132cf47..fc190504581c 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -14,6 +14,7 @@ #define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H #include "llvm/Config/config.h" +#include "llvm/MC/MCContext.h" #include "llvm/Support/DataTypes.h" #include @@ -30,10 +31,8 @@ class MCTargetOptions; class Target; MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCCodeEmitter *createBPFbeMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCAsmBackend *createBPFAsmBackend(const Target &T, const MCSubtargetInfo &STI, diff --git a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp index a62bd111cba9..63a60473d664 100644 --- a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp +++ b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp @@ -9,14 +9,17 @@ #include "MCTargetDesc/CSKYInstPrinter.h" #include "MCTargetDesc/CSKYMCExpr.h" #include "MCTargetDesc/CSKYMCTargetDesc.h" +#include "MCTargetDesc/CSKYTargetStreamer.h" #include "TargetInfo/CSKYTargetInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/Register.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" @@ -25,6 +28,8 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/CSKYAttributes.h" +#include "llvm/Support/CSKYTargetParser.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -52,6 +57,9 @@ class CSKYAsmParser : public MCTargetAsmParser { const MCRegisterInfo *MRI; + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, + unsigned Kind) override; + bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo, int64_t Lower, int64_t Upper, Twine Msg); @@ -78,6 +86,16 @@ class CSKYAsmParser : public MCTargetAsmParser { bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands, MCStreamer &Out); + bool processLRW(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + bool processJSRI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + bool processJMPI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + + CSKYTargetStreamer &getTargetStreamer() { + assert(getParser().getStreamer().getTargetStreamer() && + "do not have a target streamer"); + MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); + return static_cast(TS); + } // Auto-generated instruction matching functions #define GET_ASSEMBLER_HEADER @@ -95,6 +113,8 @@ class CSKYAsmParser : public MCTargetAsmParser { bool parseOperand(OperandVector &Operands, StringRef Mnemonic); + bool parseDirectiveAttribute(); + public: enum CSKYMatchResultTy { Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY, @@ -108,7 +128,14 @@ public: CSKYAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) : MCTargetAsmParser(Options, STI, MII) { + + MCAsmParserExtension::Initialize(Parser); + + // Cache the MCRegisterInfo. + MRI = getContext().getRegisterInfo(); + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + getTargetStreamer().emitTargetAttributes(STI); } }; @@ -612,6 +639,11 @@ public: #define GET_MNEMONIC_SPELL_CHECKER #include "CSKYGenAsmMatcher.inc" +static MCRegister convertFPR32ToFPR64(MCRegister Reg) { + assert(Reg >= CSKY::F0_32 && Reg <= CSKY::F31_32 && "Invalid register"); + return Reg - CSKY::F0_32 + CSKY::F0_64; +} + static std::string CSKYMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID = 0); @@ -788,6 +820,96 @@ bool CSKYAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, llvm_unreachable("Unknown match type detected!"); } +bool CSKYAsmParser::processLRW(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out) { + Inst.setLoc(IDLoc); + + unsigned Opcode; + MCOperand Op; + if (Inst.getOpcode() == CSKY::PseudoLRW16) + Opcode = CSKY::LRW16; + else + Opcode = CSKY::LRW32; + + if (Inst.getOperand(1).isImm()) { + if (isUInt<8>(Inst.getOperand(1).getImm()) && + Inst.getOperand(0).getReg() <= CSKY::R7) { + Opcode = CSKY::MOVI16; + } else if (getSTI().getFeatureBits()[CSKY::HasE2] && + isUInt<16>(Inst.getOperand(1).getImm())) { + Opcode = CSKY::MOVI32; + } else { + auto *Expr = getTargetStreamer().addConstantPoolEntry( + MCConstantExpr::create(Inst.getOperand(1).getImm(), getContext()), + Inst.getLoc()); + Inst.erase(std::prev(Inst.end())); + Inst.addOperand(MCOperand::createExpr(Expr)); + } + } else { + const MCExpr *AdjustExpr = nullptr; + if (const CSKYMCExpr *CSKYExpr = + dyn_cast(Inst.getOperand(1).getExpr())) { + if (CSKYExpr->getKind() == CSKYMCExpr::VK_CSKY_TLSGD || + CSKYExpr->getKind() == CSKYMCExpr::VK_CSKY_TLSIE || + CSKYExpr->getKind() == CSKYMCExpr::VK_CSKY_TLSLDM) { + MCSymbol *Dot = getContext().createNamedTempSymbol(); + Out.emitLabel(Dot); + AdjustExpr = MCSymbolRefExpr::create(Dot, getContext()); + } + } + auto *Expr = getTargetStreamer().addConstantPoolEntry( + Inst.getOperand(1).getExpr(), Inst.getLoc(), AdjustExpr); + Inst.erase(std::prev(Inst.end())); + Inst.addOperand(MCOperand::createExpr(Expr)); + } + + Inst.setOpcode(Opcode); + + Out.emitInstruction(Inst, getSTI()); + return false; +} + +bool CSKYAsmParser::processJSRI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out) { + Inst.setLoc(IDLoc); + + if (Inst.getOperand(0).isImm()) { + const MCExpr *Expr = getTargetStreamer().addConstantPoolEntry( + MCConstantExpr::create(Inst.getOperand(0).getImm(), getContext()), + Inst.getLoc()); + Inst.setOpcode(CSKY::JSRI32); + Inst.erase(std::prev(Inst.end())); + Inst.addOperand(MCOperand::createExpr(Expr)); + } else { + const MCExpr *Expr = getTargetStreamer().addConstantPoolEntry( + Inst.getOperand(0).getExpr(), Inst.getLoc()); + Inst.setOpcode(CSKY::JBSR32); + Inst.addOperand(MCOperand::createExpr(Expr)); + } + + Out.emitInstruction(Inst, getSTI()); + return false; +} + +bool CSKYAsmParser::processJMPI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out) { + Inst.setLoc(IDLoc); + + if (Inst.getOperand(0).isImm()) { + const MCExpr *Expr = getTargetStreamer().addConstantPoolEntry( + MCConstantExpr::create(Inst.getOperand(0).getImm(), getContext()), + Inst.getLoc()); + Inst.setOpcode(CSKY::JMPI32); + Inst.erase(std::prev(Inst.end())); + Inst.addOperand(MCOperand::createExpr(Expr)); + } else { + const MCExpr *Expr = getTargetStreamer().addConstantPoolEntry( + Inst.getOperand(0).getExpr(), Inst.getLoc()); + Inst.setOpcode(CSKY::JBR32); + Inst.addOperand(MCOperand::createExpr(Expr)); + } + + Out.emitInstruction(Inst, getSTI()); + return false; +} + bool CSKYAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands, MCStreamer &Out) { @@ -845,6 +967,28 @@ bool CSKYAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, Inst.erase(std::next(Inst.begin())); Inst.insert(Inst.end(), MCOperand::createReg(CSKY::C)); break; + case CSKY::PseudoLRW16: + case CSKY::PseudoLRW32: + return processLRW(Inst, IDLoc, Out); + case CSKY::PseudoJSRI32: + return processJSRI(Inst, IDLoc, Out); + case CSKY::PseudoJMPI32: + return processJMPI(Inst, IDLoc, Out); + case CSKY::JBSR32: + case CSKY::JBR16: + case CSKY::JBT16: + case CSKY::JBF16: + case CSKY::JBR32: + case CSKY::JBT32: + case CSKY::JBF32: + unsigned Num = Inst.getNumOperands() - 1; + assert(Inst.getOperand(Num).isExpr()); + + const MCExpr *Expr = getTargetStreamer().addConstantPoolEntry( + Inst.getOperand(Num).getExpr(), Inst.getLoc()); + + Inst.addOperand(MCOperand::createExpr(Expr)); + break; } emitToStreamer(Out, Inst); @@ -1471,7 +1615,132 @@ OperandMatchResultTy CSKYAsmParser::tryParseRegister(unsigned &RegNo, return MatchOperand_Success; } -bool CSKYAsmParser::ParseDirective(AsmToken DirectiveID) { return true; } +bool CSKYAsmParser::ParseDirective(AsmToken DirectiveID) { + // This returns false if this function recognizes the directive + // regardless of whether it is successfully handles or reports an + // error. Otherwise it returns true to give the generic parser a + // chance at recognizing it. + StringRef IDVal = DirectiveID.getString(); + + if (IDVal == ".csky_attribute") + return parseDirectiveAttribute(); + + return true; +} + +/// parseDirectiveAttribute +/// ::= .attribute expression ',' ( expression | "string" ) +bool CSKYAsmParser::parseDirectiveAttribute() { + MCAsmParser &Parser = getParser(); + int64_t Tag; + SMLoc TagLoc; + TagLoc = Parser.getTok().getLoc(); + if (Parser.getTok().is(AsmToken::Identifier)) { + StringRef Name = Parser.getTok().getIdentifier(); + Optional Ret = + ELFAttrs::attrTypeFromString(Name, CSKYAttrs::getCSKYAttributeTags()); + if (!Ret.hasValue()) { + Error(TagLoc, "attribute name not recognised: " + Name); + return false; + } + Tag = Ret.getValue(); + Parser.Lex(); + } else { + const MCExpr *AttrExpr; + + TagLoc = Parser.getTok().getLoc(); + if (Parser.parseExpression(AttrExpr)) + return true; + + const MCConstantExpr *CE = dyn_cast(AttrExpr); + if (check(!CE, TagLoc, "expected numeric constant")) + return true; + + Tag = CE->getValue(); + } + + if (Parser.parseToken(AsmToken::Comma, "comma expected")) + return true; + + StringRef StringValue; + int64_t IntegerValue = 0; + bool IsIntegerValue = ((Tag != CSKYAttrs::CSKY_ARCH_NAME) && + (Tag != CSKYAttrs::CSKY_CPU_NAME) && + (Tag != CSKYAttrs::CSKY_FPU_NUMBER_MODULE)); + + SMLoc ValueExprLoc = Parser.getTok().getLoc(); + if (IsIntegerValue) { + const MCExpr *ValueExpr; + if (Parser.parseExpression(ValueExpr)) + return true; + + const MCConstantExpr *CE = dyn_cast(ValueExpr); + if (!CE) + return Error(ValueExprLoc, "expected numeric constant"); + IntegerValue = CE->getValue(); + } else { + if (Parser.getTok().isNot(AsmToken::String)) + return Error(Parser.getTok().getLoc(), "expected string constant"); + + StringValue = Parser.getTok().getStringContents(); + Parser.Lex(); + } + + if (Parser.parseEOL()) + return true; + + if (IsIntegerValue) + getTargetStreamer().emitAttribute(Tag, IntegerValue); + else if (Tag != CSKYAttrs::CSKY_ARCH_NAME && Tag != CSKYAttrs::CSKY_CPU_NAME) + getTargetStreamer().emitTextAttribute(Tag, StringValue); + else { + CSKY::ArchKind ID = (Tag == CSKYAttrs::CSKY_ARCH_NAME) + ? CSKY::parseArch(StringValue) + : CSKY::parseCPUArch(StringValue); + if (ID == CSKY::ArchKind::INVALID) + return Error(ValueExprLoc, (Tag == CSKYAttrs::CSKY_ARCH_NAME) + ? "unknown arch name" + : "unknown cpu name"); + + getTargetStreamer().emitTextAttribute(Tag, StringValue); + } + + return false; +} + +unsigned CSKYAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, + unsigned Kind) { + CSKYOperand &Op = static_cast(AsmOp); + + if (!Op.isReg()) + return Match_InvalidOperand; + + MCRegister Reg = Op.getReg(); + + if (CSKYMCRegisterClasses[CSKY::FPR32RegClassID].contains(Reg)) { + // As the parser couldn't differentiate an FPR64 from an FPR32, coerce the + // register from FPR32 to FPR64 if necessary. + if (Kind == MCK_FPR64 || Kind == MCK_sFPR64) { + Op.Reg.RegNum = convertFPR32ToFPR64(Reg); + if (Kind == MCK_sFPR64 && + (Op.Reg.RegNum < CSKY::F0_64 || Op.Reg.RegNum > CSKY::F15_64)) + return Match_InvalidRegOutOfRange; + if (Kind == MCK_FPR64 && + (Op.Reg.RegNum < CSKY::F0_64 || Op.Reg.RegNum > CSKY::F31_64)) + return Match_InvalidRegOutOfRange; + return Match_Success; + } + } + + if (CSKYMCRegisterClasses[CSKY::GPRRegClassID].contains(Reg)) { + if (Kind == MCK_GPRPair) { + Op.Reg.RegNum = MRI->getEncodingValue(Reg) + CSKY::R0_R1; + return Match_Success; + } + } + + return Match_InvalidOperand; +} void CSKYAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) { MCInst CInst; diff --git a/llvm/lib/Target/CSKY/CSKY.h b/llvm/lib/Target/CSKY/CSKY.h index 401d6fa1a0a5..27a6c6d2f250 100644 --- a/llvm/lib/Target/CSKY/CSKY.h +++ b/llvm/lib/Target/CSKY/CSKY.h @@ -14,11 +14,13 @@ #ifndef LLVM_LIB_TARGET_CSKY_CSKY_H #define LLVM_LIB_TARGET_CSKY_CSKY_H +#include "llvm/PassRegistry.h" #include "llvm/Target/TargetMachine.h" namespace llvm { class CSKYTargetMachine; class FunctionPass; +class PassRegistry; FunctionPass *createCSKYISelDag(CSKYTargetMachine &TM); FunctionPass *createCSKYConstantIslandPass(); diff --git a/llvm/lib/Target/CSKY/CSKY.td b/llvm/lib/Target/CSKY/CSKY.td index ddb7fe93706e..a8db9151e127 100644 --- a/llvm/lib/Target/CSKY/CSKY.td +++ b/llvm/lib/Target/CSKY/CSKY.td @@ -32,6 +32,26 @@ def HasFPUv2_DF : Predicate<"Subtarget->hasFPUv2DoubleFloat()">, AssemblerPredicate<(all_of FeatureFPUV2_DF), "Enable FPUv2 double float instructions">; +def FeatureFdivdu : SubtargetFeature<"fdivdu", "HasFdivdu", "true", + "Enable float divide instructions">; +def HasFdivdu : Predicate<"Subtarget->hasFdivdu()">, + AssemblerPredicate<(all_of FeatureFdivdu), + "Enable float divide instructions">; + +def FeatureFPUV3_HI + : SubtargetFeature<"fpuv3_hi", "HasFPUv3HalfWord", "true", + "Enable FPUv3 harf word converting instructions">; +def HasFPUv3_HI : Predicate<"Subtarget->hasFPUv3HalfWord()">, + AssemblerPredicate<(all_of FeatureFPUV3_HI), + "Enable FPUv3 harf word converting instructions">; + +def FeatureFPUV3_HF + : SubtargetFeature<"fpuv3_hf", "HasFPUv3HalfFloat", "true", + "Enable FPUv3 harf precision operate instructions">; +def HasFPUv3_HF : Predicate<"Subtarget->hasFPUv3HalfFloat()">, + AssemblerPredicate<(all_of FeatureFPUV3_HF), + "Enable FPUv3 harf precision operate instructions">; + def FeatureFPUV3_SF : SubtargetFeature<"fpuv3_sf", "HasFPUv3SingleFloat", "true", "Enable FPUv3 single float instructions">; @@ -46,6 +66,85 @@ def HasFPUv3_DF : Predicate<"Subtarget->hasFPUv3DoubleFloat()">, AssemblerPredicate<(all_of FeatureFPUV3_DF), "Enable FPUv3 double float instructions">; +def HasFLOATE1 + : SubtargetFeature<"floate1", "HasFLOATE1", "true", "Support CSKY floate1 instructions">; +def iHasFLOATE1 : Predicate<"Subtarget->hasFLOATE1()">, + AssemblerPredicate<(all_of HasFLOATE1), + "Support CSKY floate1 instructions">; + +def HasFLOAT1E2 + : SubtargetFeature<"float1e2", "HasFLOAT1E2", "true", "Support CSKY float1e2 instructions">; +def iHasFLOAT1E2 : Predicate<"Subtarget->hasFLOAT1E2()">, + AssemblerPredicate<(all_of HasFLOAT1E2), + "Support CSKY float1e2 instructions">; + +def HasFLOAT1E3 + : SubtargetFeature<"float1e3", "HasFLOAT1E3", "true", "Support CSKY float1e3 instructions">; +def iHasFLOAT1E3 : Predicate<"Subtarget->hasFLOAT1E3()">, + AssemblerPredicate<(all_of HasFLOAT1E3), + "Support CSKY float1e3 instructions">; + +def HasFLOAT3E4 + : SubtargetFeature<"float3e4", "HasFLOAT3E4", "true", "Support CSKY float3e4 instructions">; +def iHasFLOAT3E4 : Predicate<"Subtarget->hasFLOAT3E4()">, + AssemblerPredicate<(all_of HasFLOAT3E4), + "Support CSKY float3e4 instructions">; + +def HasFLOAT7E60 + : SubtargetFeature<"float7e60", "HasFLOAT7E60", "true", "Support CSKY float7e60 instructions">; +def iHasFLOAT7E60 : Predicate<"Subtarget->hasFLOAT7E60()">, + AssemblerPredicate<(all_of HasFLOAT7E60), + "Support CSKY float7e60 instructions">; + +def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", + "Enable divide instrutions">; +def HasHWDiv : Predicate<"Subtarget->hasHardwareDivide()">, + AssemblerPredicate<(all_of FeatureHWDiv), + "Enable divide instrutions">; + +def FeatureSTM : SubtargetFeature<"multiple_stld", "HasSTM", "true", + "Enable multiple load/store instrutions">; +def HasSTM : Predicate<"Subtarget->hasSTM()">, + AssemblerPredicate<(all_of FeatureSTM), + "Enable multiple load/store instrutions">; + +def FeaturePushPop : SubtargetFeature<"pushpop", "HasPushPop", "true", + "Enable push/pop instrutions">; +def HasPushPop : Predicate<"Subtarget->hasPushPop()">, + AssemblerPredicate<(all_of FeaturePushPop), + "Enable push/pop instrutions">; + +def FeatureDSP + : SubtargetFeature<"edsp", "HasDSP", "true", "Enable DSP instrutions">; +def HasDSP : Predicate<"Subtarget->hasDSP()">, + AssemblerPredicate<(all_of FeatureDSP), + "Enable DSP instrutions">; + +def HasDSP1E2 + : SubtargetFeature<"dsp1e2", "HasDSP1E2", "true", "Support CSKY dsp1e2 instructions">; +def iHasDSP1E2 : Predicate<"Subtarget->hasDSP1E2()">, + AssemblerPredicate<(all_of HasDSP1E2), + "Support CSKY dsp1e2 instructions">; + +def HasDSPE60 + : SubtargetFeature<"dspe60", "HasDSPE60", "true", "Support CSKY dspe60 instructions">; +def iHasDSPE60 : Predicate<"Subtarget->hasDSPE60()">, + AssemblerPredicate<(all_of HasDSPE60), + "Support CSKY dspe60 instructions">; + +def FeatureDSPV2 : SubtargetFeature<"dspv2", "HasDSPV2", "true", + "Enable DSP V2.0 instrutions">; +def HasDSPV2 : Predicate<"Subtarget->hasDSPV2()">, + AssemblerPredicate<(all_of FeatureDSPV2), + "Enable DSP V2.0 instrutions">; + +def FeatureDSP_Silan : SubtargetFeature<"dsp_silan", "HasDSP_Silan", "true", + "Enable DSP Silan instrutions">; +def HasDSP_Silan : Predicate<"Subtarget->hasDSP_Silan()">, + AssemblerPredicate<(all_of FeatureDSP_Silan), + "Enable DSP Silan instrutions">; + +// Atomic Support def FeatureBTST16 : SubtargetFeature<"btst16", "HasBTST16", "true", "Use the 16-bit btsti instruction">; def HasBTST16 : Predicate<"Subtarget->hasBTST16()">, @@ -59,18 +158,110 @@ def HasExtendLrw : Predicate<"Subtarget->hasExtendLrw()">, AssemblerPredicate<(all_of FeatureExtendLrw), "Use the extend LRW instruction">; +def FeatureTrust : SubtargetFeature<"trust", "HasTrust", "true", + "Enable trust instructions">; +def HasTrust : Predicate<"Subtarget->hasTrust()">, + AssemblerPredicate<(all_of FeatureTrust), + "Enable trust instructions">; + def FeatureJAVA : SubtargetFeature<"java", "HasJAVA", "true", "Enable java instructions">; def HasJAVA : Predicate<"Subtarget->hasJAVA()">, AssemblerPredicate<(all_of FeatureJAVA), "Enable java instructions">; +def FeatureCache + : SubtargetFeature<"cache", "HasCache", "true", "Enable cache">; +def HasCache : Predicate<"Subtarget->hasCache()">, + AssemblerPredicate<(all_of FeatureCache), + "Enable cache">; + +def FeatureNVIC + : SubtargetFeature<"nvic", "HasNVIC", "true", "Enable NVIC">; +def HasNVIC : Predicate<"Subtarget->hasNVIC()">, + AssemblerPredicate<(all_of FeatureNVIC), + "Enable NVIC">; + def FeatureDoloop : SubtargetFeature<"doloop", "HasDoloop", "true", "Enable doloop instructions">; def HasDoloop : Predicate<"Subtarget->hasDoloop()">, AssemblerPredicate<(all_of FeatureDoloop), "Enable doloop instructions">; +// Other features than instructions +def FeatureHighreg : SubtargetFeature<"high-registers", "HasHighRegisters", + "true", "Enable r16-r31 registers">; +def HasHighRegisters : Predicate<"Subtarget->hasHighRegisters()">, + AssemblerPredicate<(all_of FeatureHighreg), + "Enable r16-r31 registers">; + +def FeatureSmart : SubtargetFeature<"smart", "SmartMode", "true", + "Let CPU work in Smart Mode">; +def SmartMode : Predicate<"Subtarget->smartMode()">, + AssemblerPredicate<(all_of FeatureSmart), + "Let CPU work in Smart Mode">; + +def FeatureVDSPV2 : SubtargetFeature<"vdspv2", "HasVDSPV2", "true", + "Enable vdsp-v2 instructions">; +def HasVDSPV2 : Predicate<"Subtarget->hasVDSPV2()">, + AssemblerPredicate<(all_of FeatureVDSPV2), + "Enable vdsp-v2 instructions">; + +def HasVDSPV2_FLOAT : Predicate<"Subtarget->hasVDSPV2_FLOAT()">; +def HasVDSPV2_HALF: Predicate<"Subtarget->hasVDSPV2_HALF()">; + +def HasVDSP2E3 + : SubtargetFeature<"vdsp2e3", "HasVDSP2E3", "true", "Support CSKY vdsp2e3 instructions">; +def iHasVDSP2E3 : Predicate<"Subtarget->hasVDSP2E3()">, + AssemblerPredicate<(all_of HasVDSP2E3), + "Support CSKY vdsp2e3 instructions">; + +def HasVDSP2E60F + : SubtargetFeature<"vdsp2e60f", "HasVDSP2E60F", "true", "Support CSKY vdsp2e60f instructions">; +def iHasVDSP2E60F : Predicate<"Subtarget->hasVDSP2E60F()">, + AssemblerPredicate<(all_of HasVDSP2E60F), + "Support CSKY vdsp2e60f instructions">; + +def FeatureHardTP : SubtargetFeature<"hard-tp", "ReadTPHard", "true", + "Enable TLS Pointer register">; +def ReadTPHard : Predicate<"Subtarget->readTPHard()">, + AssemblerPredicate<(all_of FeatureHardTP), + "Enable TLS Pointer register">; + +def FeatureSoftTP : SubtargetFeature<"soft-tp", "ReadTPHard", "false", + "Disable TLS Pointer register">; + +def FeatureIstack : SubtargetFeature<"istack", "EnableInterruptAttribute", + "true", "Enable interrput attribute">; +def EnableInterruptAttribute + : Predicate<"Subtarget->enableInterruptAttribute()">, + AssemblerPredicate<(all_of FeatureIstack), + "Enable interrput attribute">; + +def FeatureConstPool : SubtargetFeature<"constpool", "DumpConstPool", "true", + "Dump the constant pool by compiler">; +def DumpConstPool : Predicate<"Subtarget->dumpConstPool()">, + AssemblerPredicate<(all_of FeatureConstPool), + "Dump the constant pool by compiler">; + +def FeatureStackSize : SubtargetFeature<"stack-size", "EnableStackSize", "true", + "Output stack size information">; +def EnableStackSize : Predicate<"Subtarget->enableStackSize()">, + AssemblerPredicate<(all_of FeatureStackSize), + "Output stack size information">; + +def FeatureCCRT + : SubtargetFeature<"ccrt", "UseCCRT", "true", "Use CSKY compiler runtime">; +def UseCCRT : Predicate<"Subtarget->useCCRT()">, + AssemblerPredicate<(all_of FeatureCCRT), + "Use CSKY compiler runtime">; + +def FeatureVDSPV1_128 : SubtargetFeature<"vdspv1", "HasVDSPV1_128", "true", + "Enable 128bit vdsp-v1 instructions">; +def HasVDSPV1_128 : Predicate<"Subtarget->hasVDSPV1_128()">, + AssemblerPredicate<(all_of FeatureVDSPV1_128), + "Enable 128bit vdsp-v1 instructions">; + def HasE1 : SubtargetFeature<"e1", "HasE1", "true", "Support CSKY e1 instructions", [FeatureExtendLrw]>; @@ -91,12 +282,25 @@ def iHas2E3 : Predicate<"Subtarget->has2E3()">, AssemblerPredicate<(all_of Has2E3), "Support CSKY 2e3 instructions">; +def HasMP : SubtargetFeature<"mp", "HasMP", "true", + "Support CSKY mp instructions", [Has2E3]>; +def iHasMP : Predicate<"Subtarget->hasMP()">, + AssemblerPredicate<(all_of HasMP), + "Support CSKY mp instructions">; + def Has3E3r1 : SubtargetFeature<"3e3r1", "Has3E3r1", "true", "Support CSKY 3e3r1 instructions">; def iHas3E3r1 : Predicate<"Subtarget->has3E3r1()">, AssemblerPredicate<(all_of Has3E3r1), "Support CSKY 3e3r1 instructions">; +def Has3r1E3r2 : SubtargetFeature<"3e3r2", "Has3r1E3r2", "true", + "Support CSKY 3e3r2 instructions", + [Has3E3r1, FeatureDoloop]>; +def iHas3r1E3r2 : Predicate<"Subtarget->has3r1E3r2()">, + AssemblerPredicate<(all_of Has3r1E3r2), + "Support CSKY 3e3r2 instructions">; + def Has3r2E3r3 : SubtargetFeature<"3e3r3", "Has3r2E3r3", "true", "Support CSKY 3e3r3 instructions", [FeatureDoloop]>; @@ -128,6 +332,35 @@ def iHas10E60 : Predicate<"Subtarget->has10E60()">, AssemblerPredicate<(all_of Has10E60), "Support CSKY 10e60 instructions">; +//===----------------------------------------------------------------------===// +// CSKY Processor subtarget features. +//===----------------------------------------------------------------------===// + +def ProcCK801 : SubtargetFeature<"ck801", "CSKYProcFamily", "CK801", + "CSKY ck801 processors", []>; +def isCK801 : Predicate<"Subtarget->isCK801()">, + AssemblerPredicate<(all_of ProcCK801)>; +def ProcCK802 : SubtargetFeature<"ck802", "CSKYProcFamily", "CK802", + "CSKY ck802 processors", []>; +def ProcCK803 : SubtargetFeature<"ck803", "CSKYProcFamily", "CK803", + "CSKY ck803 processors", []>; +def ProcCK803S : SubtargetFeature<"ck803s", "CSKYProcFamily", "CK803S", + "CSKY ck803s processors", []>; +def ProcCK804 : SubtargetFeature<"ck804", "CSKYProcFamily", "CK804", + "CSKY ck804 processors", []>; +def ProcCK805 : SubtargetFeature<"ck805", "CSKYProcFamily", "CK805", + "CSKY ck805 processors", []>; +def ProcCK807 : SubtargetFeature<"ck807", "CSKYProcFamily", "CK807", + "CSKY ck807 processors", []>; +def ProcCK810 : SubtargetFeature<"ck810", "CSKYProcFamily", "CK810", + "CSKY ck810 processors", []>; +def ProcCK810V : SubtargetFeature<"ck810v", "CSKYProcFamily", "CK810V", + "CSKY ck810v processors", []>; +def ProcCK860 : SubtargetFeature<"ck860", "CSKYProcFamily", "CK860", + "CSKY ck860 processors", []>; +def ProcCK860V : SubtargetFeature<"ck860v", "CSKYProcFamily", "CK860V", + "CSKY ck860v processors", []>; + //===----------------------------------------------------------------------===// // Registers, calling conventions, instruction descriptions. //===----------------------------------------------------------------------===// @@ -142,6 +375,296 @@ include "CSKYInstrInfo.td" def : ProcessorModel<"generic", NoSchedModel, []>; +// CK801 series +class CK801 f, + list tunef = []> + : ProcessorModel; + +def : CK801<"ck801", NoSchedModel, []>; +def : CK801<"ck801t", NoSchedModel, []>; +def : CK801<"e801", NoSchedModel, []>; + +// CK802 series +class CK802 f, + list tunef = []> + : ProcessorModel; + +def : CK802<"ck802", NoSchedModel, []>; +def : CK802<"ck802t", NoSchedModel, []>; +def : CK802<"ck802j", NoSchedModel, [FeatureJAVA]>; +def : CK802<"e802", NoSchedModel, []>; +def : CK802<"e802t", NoSchedModel, []>; +def : CK802<"s802", NoSchedModel, []>; +def : CK802<"s802t", NoSchedModel, []>; + +// CK803 series +class CK803 f, + list tunef = []> + : ProcessorModel; + +def : CK803<"ck803", NoSchedModel, []>; +def : CK803<"ck803h", NoSchedModel, []>; +def : CK803<"ck803t", NoSchedModel, []>; +def : CK803<"ck803ht", NoSchedModel, []>; +def : CK803<"ck803f", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803<"ck803fh", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803<"ck803e", NoSchedModel, [FeatureDSP, HasDSP1E2, HasDSPE60]>; +def : CK803<"ck803eh", NoSchedModel, [FeatureDSP, HasDSP1E2, HasDSPE60]>; +def : CK803<"ck803et", NoSchedModel, [FeatureDSP, HasDSP1E2, HasDSPE60]>; +def : CK803<"ck803eht", NoSchedModel, [FeatureDSP, HasDSP1E2, HasDSPE60]>; +def : CK803<"ck803ef", NoSchedModel, + [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803<"ck803efh", NoSchedModel, + [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803<"ck803ft", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803<"ck803eft", NoSchedModel, + [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803<"ck803efht", NoSchedModel, + [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803<"ck803r1", NoSchedModel, [Has3E3r1, Has3r2E3r3, FeatureDSPV2]>; +def : CK803<"ck803hr1", NoSchedModel, [Has3E3r1, Has3r2E3r3, FeatureDSPV2]>; +def : CK803<"ck803tr1", NoSchedModel, [Has3E3r1, Has3r2E3r3, FeatureDSPV2]>; +def : CK803<"ck803htr1", NoSchedModel, [Has3E3r1, Has3r2E3r3, FeatureDSPV2]>; +def : CK803<"ck803fr1", NoSchedModel, + [Has3E3r1, Has3r2E3r3, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureDSPV2]>; +def : CK803<"ck803fhr1", NoSchedModel, + [Has3E3r1, Has3r2E3r3, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureDSPV2]>; +def : CK803<"ck803er1", NoSchedModel, + [Has3E3r1, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>; +def : CK803<"ck803etr1", NoSchedModel, + [Has3E3r1, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>; +def : CK803<"ck803ehr1", NoSchedModel, + [Has3E3r1, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>; +def : CK803<"ck803ehtr1", NoSchedModel, + [Has3E3r1, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>; +def : CK803<"ck803efr1", NoSchedModel, + [Has3E3r1, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK803<"ck803efhr1", NoSchedModel, + [Has3E3r1, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK803<"ck803ftr1", NoSchedModel, [Has3E3r1, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureDSPV2]>; +def : CK803<"ck803eftr1", NoSchedModel, + [Has3E3r1, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK803<"ck803efhtr1", NoSchedModel, + [Has3E3r1, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK803<"ck803r2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>; +def : CK803<"ck803hr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>; +def : CK803<"ck803tr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>; +def : CK803<"ck803htr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>; +def : CK803<"ck803fr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803<"ck803fhr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803<"ck803er2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>; +def : CK803<"ck803etr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>; +def : CK803<"ck803ehr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>; +def : CK803<"ck803ehtr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>; +def : CK803<"ck803efr2", NoSchedModel, + [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK803<"ck803efhr2", NoSchedModel, + [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK803<"ck803ftr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803<"ck803eftr2", NoSchedModel, + [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK803<"ck803efhtr2", NoSchedModel, + [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK803<"ck803r3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>; +def : CK803<"ck803hr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>; +def : CK803<"ck803tr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>; +def : CK803<"ck803htr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>; +def : CK803<"ck803fr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803<"ck803fhr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803<"ck803er3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>; +def : CK803<"ck803etr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>; +def : CK803<"ck803ehr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>; +def : CK803<"ck803ehtr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>; +def : CK803<"ck803efr3", NoSchedModel, + [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK803<"ck803efhr3", NoSchedModel, + [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK803<"ck803ftr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803<"ck803eftr3", NoSchedModel, + [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK803<"ck803efhtr3", NoSchedModel, + [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK803<"s803", NoSchedModel, [Has3r1E3r2, Has3r2E3r3]>; +def : CK803<"s803t", NoSchedModel, [Has3r1E3r2, Has3r2E3r3]>; +def : CK803<"e803", NoSchedModel, [Has3r1E3r2, Has3r2E3r3]>; +def : CK803<"e803t", NoSchedModel, [Has3r1E3r2, Has3r2E3r3]>; + +// CK803S series +class CK803S f, +list tunef = []> : CK803; + +def : CK803S<"ck803s", NoSchedModel, []>; +def : CK803S<"ck803sn", NoSchedModel, [FeatureDSP_Silan]>; +def : CK803S<"ck803st", NoSchedModel, []>; +def : CK803S<"ck803snt", NoSchedModel, [FeatureDSP_Silan]>; +def : CK803S<"ck803sf", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803S<"ck803sfn", NoSchedModel, [FeatureFPUV2_SF, FeatureDSP_Silan, HasFLOATE1, HasFLOAT1E3]>; +def : CK803S<"ck803se", NoSchedModel, [FeatureDSP, HasDSP1E2, HasDSPE60]>; +def : CK803S<"ck803sen", NoSchedModel, [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSP_Silan]>; +def : CK803S<"ck803sef", NoSchedModel, + [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803S<"ck803sefn", NoSchedModel, + [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, FeatureDSP_Silan, + HasFLOATE1, HasFLOAT1E3]>; +def : CK803S<"ck803seft", NoSchedModel, + [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK803S<"ck803sefnt", NoSchedModel, + [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, FeatureDSP_Silan, + HasFLOATE1, HasFLOAT1E3]>; + +// CK804 series +class CK804 f, + list tunef = []> + : CK803; + +def : CK804<"ck804", NoSchedModel, []>; +def : CK804<"ck804h", NoSchedModel, []>; +def : CK804<"ck804t", NoSchedModel, []>; +def : CK804<"ck804ht", NoSchedModel, []>; +def : CK804<"ck804f", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK804<"ck804fh", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK804<"ck804e", NoSchedModel, [FeatureDSPV2, FeatureHighreg]>; +def : CK804<"ck804et", NoSchedModel, [FeatureDSPV2, FeatureHighreg]>; +def : CK804<"ck804eh", NoSchedModel, [FeatureDSPV2, FeatureHighreg]>; +def : CK804<"ck804eht", NoSchedModel, [FeatureDSPV2, FeatureHighreg]>; +def : CK804<"ck804ef", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK804<"ck804efh", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK804<"ck804ft", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK804<"ck804eft", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK804<"ck804efht", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK804<"e804d", NoSchedModel, [FeatureDSPV2, FeatureHighreg]>; +def : CK804<"e804dt", NoSchedModel, [FeatureDSPV2, FeatureHighreg]>; +def : CK804<"e804f", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK804<"e804ft", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK804<"e804df", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; +def : CK804<"e804dft", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>; + +// CK805 series +class CK805 f, + list tunef = []> + : CK803; + +def : CK805<"ck805", NoSchedModel, []>; +def : CK805<"i805", NoSchedModel, []>; +def : CK805<"ck805t", NoSchedModel, []>; +def : CK805<"i805f", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK805<"ck805f", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK805<"ck805e", NoSchedModel, [FeatureDSPV2]>; +def : CK805<"ck805ef", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK805<"ck805et", NoSchedModel, [FeatureDSPV2]>; +def : CK805<"ck805ft", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; +def : CK805<"ck805eft", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>; + +// CK807 series +class CK807 f, + list tunef = []> + : ProcessorModel; + +def : CK807<"ck807", NoSchedModel, []>; +def : CK807<"c807", NoSchedModel, []>; +def : CK807<"r807", NoSchedModel, []>; +def : CK807<"ck807e", NoSchedModel, [FeatureDSP, HasDSP1E2, HasDSPE60]>; +def : CK807<"ck807f", NoSchedModel, + [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2, HasFLOAT1E3, HasFLOAT3E4]>; +def : CK807<"c807f", NoSchedModel, + [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2, HasFLOAT1E3, HasFLOAT3E4]>; +def : CK807<"r807f", NoSchedModel, + [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2, HasFLOAT1E3, HasFLOAT3E4]>; +def : CK807<"ck807ef", NoSchedModel, [ + FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, FeatureFPUV2_DF, + FeatureFdivdu, HasFLOATE1, HasFLOAT1E2, HasFLOAT1E3, HasFLOAT3E4]>; + +// CK810 series +class CK810 f, + list tunef = []> + : ProcessorModel; + +def : CK810<"ck810", NoSchedModel, []>; +def : CK810<"ck810e", NoSchedModel, []>; +def : CK810<"ck810t", NoSchedModel, []>; +def : CK810<"ck810et", NoSchedModel, []>; +def : CK810<"c810", NoSchedModel, + [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2]>; +def : CK810<"ck810f", NoSchedModel, + [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2]>; +def : CK810<"ck810ef", NoSchedModel, + [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2]>; +def : CK810<"ck810ft", NoSchedModel, + [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2]>; +def : CK810<"ck810eft", NoSchedModel, + [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2]>; +def : CK810<"c810t", NoSchedModel, + [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2]>; + +class CK810V f, + list tunef = []> + : CK810; + +def : CK810V<"ck810v", NoSchedModel, []>; +def : CK810V<"ck810ev", NoSchedModel, []>; +def : CK810V<"ck810tv", NoSchedModel, []>; +def : CK810V<"ck810etv", NoSchedModel, []>; +def : CK810V<"ck810fv", NoSchedModel, [ + FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2 +]>; +def : CK810V<"ck810efv", NoSchedModel, [ + FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2 +]>; +def : CK810V<"c810v", NoSchedModel, [ + FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2 +]>; +def : CK810V<"ck810ftv", NoSchedModel, [ + FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2 +]>; +def : CK810V<"ck810eftv", NoSchedModel, [ + FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2 +]>; +def : CK810V<"c810tv", NoSchedModel, [ + FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu, + HasFLOATE1, HasFLOAT1E2 +]>; + +// CK860 series +class CK860 f, + list tunef = []> + : ProcessorModel; + +class CK860V f, + list tunef = []> + : CK860; + +def : CK860<"ck860", NoSchedModel, []>; +def : CK860<"ck860f", NoSchedModel, + [FeatureFPUV3_HI, FeatureFPUV3_HF, FeatureFPUV3_SF, FeatureFPUV3_DF, HasFLOAT7E60]>; +def : CK860<"c860", NoSchedModel, + [FeatureFPUV3_HI, FeatureFPUV3_HF, FeatureFPUV3_SF, FeatureFPUV3_DF, HasFLOAT7E60]>; +def : CK860V<"c860v", NoSchedModel, + [FeatureFPUV3_HI, FeatureFPUV3_HF, FeatureFPUV3_SF, FeatureFPUV3_DF, HasFLOAT7E60]>; +def : CK860V<"ck860v", NoSchedModel, []>; +def : CK860V<"ck860fv", NoSchedModel, + [FeatureFPUV3_HI, FeatureFPUV3_HF, FeatureFPUV3_SF, FeatureFPUV3_DF, HasFLOAT7E60]>; + //===----------------------------------------------------------------------===// // Define the CSKY target. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp index c8269eeacfdb..0236b22ad379 100644 --- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp +++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp @@ -16,10 +16,12 @@ #include "CSKYTargetMachine.h" #include "MCTargetDesc/CSKYInstPrinter.h" #include "MCTargetDesc/CSKYMCExpr.h" +#include "MCTargetDesc/CSKYTargetStreamer.h" #include "TargetInfo/CSKYTargetInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -40,7 +42,15 @@ CSKYAsmPrinter::CSKYAsmPrinter(llvm::TargetMachine &TM, bool CSKYAsmPrinter::runOnMachineFunction(MachineFunction &MF) { MCP = MF.getConstantPool(); - Subtarget = &MF.getSubtarget(); + TII = MF.getSubtarget().getInstrInfo(); + + // Set the current MCSubtargetInfo to a copy which has the correct + // feature bits for the current MachineFunction + MCSubtargetInfo &NewSTI = + OutStreamer->getContext().getSubtargetCopy(*TM.getMCSubtargetInfo()); + NewSTI.setFeatureBits(MF.getSubtarget().getFeatureBits()); + Subtarget = &NewSTI; + return AsmPrinter::runOnMachineFunction(MF); } @@ -59,8 +69,6 @@ void CSKYAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { #include "CSKYGenMCPseudoLowering.inc" void CSKYAsmPrinter::expandTLSLA(const MachineInstr *MI) { - const CSKYInstrInfo *TII = Subtarget->getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); MCSymbol *PCLabel = OutContext.getOrCreateSymbol( @@ -119,6 +127,19 @@ void CSKYAsmPrinter::emitFunctionBodyEnd() { InConstantPool = false; } +void CSKYAsmPrinter::emitStartOfAsmFile(Module &M) { + if (TM.getTargetTriple().isOSBinFormatELF()) + emitAttributes(); +} + +void CSKYAsmPrinter::emitEndOfAsmFile(Module &M) { + CSKYTargetStreamer &CTS = + static_cast(*OutStreamer->getTargetStreamer()); + + if (TM.getTargetTriple().isOSBinFormatELF()) + CTS.finishAttributeSection(); +} + void CSKYAsmPrinter::emitInstruction(const MachineInstr *MI) { // Do any auto-generated pseudo lowerings. if (emitPseudoExpansionLowering(*OutStreamer, MI)) @@ -218,6 +239,84 @@ void CSKYAsmPrinter::emitMachineConstantPoolValue( OutStreamer->emitValue(Expr, Size); } +void CSKYAsmPrinter::emitAttributes() { + CSKYTargetStreamer &CTS = + static_cast(*OutStreamer->getTargetStreamer()); + + const Triple &TT = TM.getTargetTriple(); + StringRef CPU = TM.getTargetCPU(); + StringRef FS = TM.getTargetFeatureString(); + const CSKYTargetMachine &CTM = static_cast(TM); + /* TuneCPU doesn't impact emission of ELF attributes, ELF attributes only + care about arch related features, so we can set TuneCPU as CPU. */ + const CSKYSubtarget STI(TT, CPU, /*TuneCPU=*/CPU, FS, CTM); + + CTS.emitTargetAttributes(STI); +} + +bool CSKYAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &OS) { + // First try the generic code, which knows about modifiers like 'c' and 'n'. + if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS)) + return false; + + const MachineOperand &MO = MI->getOperand(OpNo); + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) + return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + return true; // Unknown modifier. + case 'R': + if (MO.getType() == MachineOperand::MO_Register) { + OS << CSKYInstPrinter::getRegisterName(MO.getReg() + 1); + return false; + } + } + } + + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + OS << MO.getImm(); + return false; + case MachineOperand::MO_Register: + if (MO.getReg() == CSKY::C) + return false; + OS << CSKYInstPrinter::getRegisterName(MO.getReg()); + return false; + case MachineOperand::MO_GlobalAddress: + PrintSymbolOperand(MO, OS); + return false; + case MachineOperand::MO_BlockAddress: { + MCSymbol *Sym = GetBlockAddressSymbol(MO.getBlockAddress()); + Sym->print(OS, MAI); + return false; + } + default: + break; + } + + return true; +} + +bool CSKYAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, const char *ExtraCode, + raw_ostream &OS) { + if (!ExtraCode) { + const MachineOperand &MO = MI->getOperand(OpNo); + // For now, we only support register memory operands in registers and + // assume there is no addend + if (!MO.isReg()) + return true; + + OS << "(" << CSKYInstPrinter::getRegisterName(MO.getReg()) << ", 0)"; + return false; + } + + return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, ExtraCode, OS); +} + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYAsmPrinter() { RegisterAsmPrinter X(getTheCSKYTarget()); } diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h index 04a253d349c8..5e87594e4fdf 100644 --- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h +++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h @@ -18,7 +18,8 @@ namespace llvm { class LLVM_LIBRARY_VISIBILITY CSKYAsmPrinter : public AsmPrinter { CSKYMCInstLower MCInstLowering; - const CSKYSubtarget *Subtarget; + const MCSubtargetInfo *Subtarget; + const TargetInstrInfo *TII; bool InConstantPool = false; @@ -28,6 +29,7 @@ class LLVM_LIBRARY_VISIBILITY CSKYAsmPrinter : public AsmPrinter { void expandTLSLA(const MachineInstr *MI); void emitCustomConstantPool(const MachineInstr *MI); + void emitAttributes(); public: explicit CSKYAsmPrinter(TargetMachine &TM, @@ -46,12 +48,22 @@ public: void emitFunctionBodyEnd() override; + void emitStartOfAsmFile(Module &M) override; + + void emitEndOfAsmFile(Module &M) override; + void emitInstruction(const MachineInstr *MI) override; bool runOnMachineFunction(MachineFunction &MF) override; // we emit constant pools customly! void emitConstantPool() override{}; + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &OS) override; + + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &OS) override; }; } // end namespace llvm diff --git a/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp b/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp index 3ac335e2ad9d..5d7241258543 100644 --- a/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp +++ b/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -286,7 +287,7 @@ LLVM_DUMP_METHOD void CSKYConstantIslands::dumpBBs() { bool CSKYConstantIslands::runOnMachineFunction(MachineFunction &Mf) { MF = &Mf; MCP = Mf.getConstantPool(); - STI = &static_cast(Mf.getSubtarget()); + STI = &Mf.getSubtarget(); LLVM_DEBUG(dbgs() << "***** CSKYConstantIslands: " << MCP->getConstants().size() << " CP entries, aligned to " @@ -904,8 +905,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { Scale = 2; break; default: - assert(0); - break; + llvm_unreachable(""); } unsigned MaxOffs = ((1 << (Bits - 1)) - 1) * Scale; diff --git a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp index 3bf001c2cee7..9907f39b3f90 100644 --- a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp +++ b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp @@ -13,6 +13,7 @@ #include "CSKYFrameLowering.h" #include "CSKYMachineFunctionInfo.h" #include "CSKYSubtarget.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -270,6 +271,17 @@ void CSKYFrameLowering::emitEpilogue(MachineFunction &MF, MachineInstr::FrameDestroy); } +static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, + const CSKYInstrInfo &TII) { + unsigned FnSize = 0; + for (auto &MBB : MF) { + for (auto &MI : MBB) + FnSize += TII.getInstSizeInBytes(MI); + } + FnSize += MF.getConstantPool()->getConstants().size() * 4; + return FnSize; +} + static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const CSKYSubtarget &STI) { unsigned Limit = (1 << 12) - 1; @@ -349,6 +361,7 @@ void CSKYFrameLowering::determineCalleeSaves(MachineFunction &MF, CSKYMachineFunctionInfo *CFI = MF.getInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const CSKYInstrInfo *TII = STI.getInstrInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -411,8 +424,6 @@ void CSKYFrameLowering::determineCalleeSaves(MachineFunction &MF, } } - CFI->setLRIsSpilled(SavedRegs.test(CSKY::R15)); - unsigned CSStackSize = 0; for (unsigned Reg : SavedRegs.set_bits()) { auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8; @@ -432,6 +443,14 @@ void CSKYFrameLowering::determineCalleeSaves(MachineFunction &MF, RS->addScavengingFrameIndex(MFI.CreateStackObject(size, align, false)); } + + unsigned FnSize = EstimateFunctionSizeInBytes(MF, *TII); + // Force R15 to be spilled if the function size is > 65534. This enables + // use of BSR to implement far jump. + if (FnSize >= ((1 << (16 - 1)) * 2)) + SavedRegs.set(CSKY::R15); + + CFI->setLRIsSpilled(SavedRegs.test(CSKY::R15)); } // Not preserve stack space within prologue for outgoing variables when the diff --git a/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp b/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp index d58f9095aa0d..b893487f1f0f 100644 --- a/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp +++ b/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp @@ -14,6 +14,7 @@ #include "CSKYSubtarget.h" #include "CSKYTargetMachine.h" #include "MCTargetDesc/CSKYMCTargetDesc.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" @@ -42,6 +43,13 @@ public: void Select(SDNode *N) override; bool selectAddCarry(SDNode *N); bool selectSubCarry(SDNode *N); + bool selectBITCAST_TO_LOHI(SDNode *N); + bool selectInlineAsm(SDNode *N); + + SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); + + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector &OutOps) override; #include "CSKYGenDAGISel.inc" }; @@ -86,6 +94,13 @@ void CSKYDAGToDAGISel::Select(SDNode *N) { IsSelected = true; break; } + case CSKYISD::BITCAST_TO_LOHI: + IsSelected = selectBITCAST_TO_LOHI(N); + break; + case ISD::INLINEASM: + case ISD::INLINEASM_BR: + IsSelected = selectInlineAsm(N); + break; } if (IsSelected) @@ -95,6 +110,185 @@ void CSKYDAGToDAGISel::Select(SDNode *N) { SelectCode(N); } +bool CSKYDAGToDAGISel::selectInlineAsm(SDNode *N) { + std::vector AsmNodeOperands; + unsigned Flag, Kind; + bool Changed = false; + unsigned NumOps = N->getNumOperands(); + + // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. + // However, some instructions (e.g. mula.s32) require GPR pair. + // Since there is no constraint to explicitly specify a + // reg pair, we use GPRPair reg class for "%r" for 64-bit data. + + SDLoc dl(N); + SDValue Glue = + N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue(nullptr, 0); + + SmallVector OpChanged; + // Glue node will be appended late. + for (unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; + ++i) { + SDValue op = N->getOperand(i); + AsmNodeOperands.push_back(op); + + if (i < InlineAsm::Op_FirstOperand) + continue; + + if (ConstantSDNode *C = dyn_cast(N->getOperand(i))) { + Flag = C->getZExtValue(); + Kind = InlineAsm::getKind(Flag); + } else + continue; + + // Immediate operands to inline asm in the SelectionDAG are modeled with + // two operands. The first is a constant of value InlineAsm::Kind_Imm, and + // the second is a constant with the value of the immediate. If we get here + // and we have a Kind_Imm, skip the next operand, and continue. + if (Kind == InlineAsm::Kind_Imm) { + SDValue op = N->getOperand(++i); + AsmNodeOperands.push_back(op); + continue; + } + + unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); + if (NumRegs) + OpChanged.push_back(false); + + unsigned DefIdx = 0; + bool IsTiedToChangedOp = false; + // If it's a use that is tied with a previous def, it has no + // reg class constraint. + if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) + IsTiedToChangedOp = OpChanged[DefIdx]; + + // Memory operands to inline asm in the SelectionDAG are modeled with two + // operands: a constant of value InlineAsm::Kind_Mem followed by the input + // operand. If we get here and we have a Kind_Mem, skip the next operand (so + // it doesn't get misinterpreted), and continue. We do this here because + // it's important to update the OpChanged array correctly before moving on. + if (Kind == InlineAsm::Kind_Mem) { + SDValue op = N->getOperand(++i); + AsmNodeOperands.push_back(op); + continue; + } + + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef && + Kind != InlineAsm::Kind_RegDefEarlyClobber) + continue; + + unsigned RC; + bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); + if ((!IsTiedToChangedOp && (!HasRC || RC != CSKY::GPRRegClassID)) || + NumRegs != 2) + continue; + + assert((i + 2 < NumOps) && "Invalid number of operands in inline asm"); + SDValue V0 = N->getOperand(i + 1); + SDValue V1 = N->getOperand(i + 2); + unsigned Reg0 = cast(V0)->getReg(); + unsigned Reg1 = cast(V1)->getReg(); + SDValue PairedReg; + MachineRegisterInfo &MRI = MF->getRegInfo(); + + if (Kind == InlineAsm::Kind_RegDef || + Kind == InlineAsm::Kind_RegDefEarlyClobber) { + // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to + // the original GPRs. + + Register GPVR = MRI.createVirtualRegister(&CSKY::GPRPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::i64); + SDValue Chain = SDValue(N, 0); + + SDNode *GU = N->getGluedUser(); + SDValue RegCopy = + CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::i64, Chain.getValue(1)); + + // Extract values from a GPRPair reg and copy to the original GPR reg. + SDValue Sub0 = + CurDAG->getTargetExtractSubreg(CSKY::sub32_0, dl, MVT::i32, RegCopy); + SDValue Sub1 = + CurDAG->getTargetExtractSubreg(CSKY::sub32_32, dl, MVT::i32, RegCopy); + SDValue T0 = + CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, RegCopy.getValue(1)); + SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); + + // Update the original glue user. + std::vector Ops(GU->op_begin(), GU->op_end() - 1); + Ops.push_back(T1.getValue(1)); + CurDAG->UpdateNodeOperands(GU, Ops); + } else { + // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a + // GPRPair and then pass the GPRPair to the inline asm. + SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; + + // As REG_SEQ doesn't take RegisterSDNode, we copy them first. + SDValue T0 = + CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, Chain.getValue(1)); + SDValue T1 = + CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, T0.getValue(1)); + SDValue Pair = SDValue(createGPRPairNode(MVT::i64, T0, T1), 0); + + // Copy REG_SEQ into a GPRPair-typed VR and replace the original two + // i32 VRs of inline asm with it. + Register GPVR = MRI.createVirtualRegister(&CSKY::GPRPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::i64); + Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); + + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; + Glue = Chain.getValue(1); + } + + Changed = true; + + if (PairedReg.getNode()) { + OpChanged[OpChanged.size() - 1] = true; + Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); + if (IsTiedToChangedOp) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); + else + Flag = InlineAsm::getFlagWordForRegClass(Flag, CSKY::GPRPairRegClassID); + // Replace the current flag. + AsmNodeOperands[AsmNodeOperands.size() - 1] = + CurDAG->getTargetConstant(Flag, dl, MVT::i32); + // Add the new register node and skip the original two GPRs. + AsmNodeOperands.push_back(PairedReg); + // Skip the next two GPRs. + i += 2; + } + } + + if (Glue.getNode()) + AsmNodeOperands.push_back(Glue); + if (!Changed) + return false; + + SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), + CurDAG->getVTList(MVT::Other, MVT::Glue), + AsmNodeOperands); + New->setNodeId(-1); + ReplaceNode(N, New.getNode()); + return true; +} + +bool CSKYDAGToDAGISel::selectBITCAST_TO_LOHI(SDNode *N) { + SDLoc Dl(N); + auto VT = N->getValueType(0); + auto V = N->getOperand(0); + + if (!Subtarget->hasFPUv2DoubleFloat()) + return false; + + SDValue V1 = SDValue(CurDAG->getMachineNode(CSKY::FMFVRL_D, Dl, VT, V), 0); + SDValue V2 = SDValue(CurDAG->getMachineNode(CSKY::FMFVRH_D, Dl, VT, V), 0); + + ReplaceUses(SDValue(N, 0), V1); + ReplaceUses(SDValue(N, 1), V2); + CurDAG->RemoveDeadNode(N); + + return true; +} + bool CSKYDAGToDAGISel::selectAddCarry(SDNode *N) { MachineSDNode *NewNode = nullptr; auto Type0 = N->getValueType(0); @@ -175,6 +369,31 @@ bool CSKYDAGToDAGISel::selectSubCarry(SDNode *N) { return true; } +SDNode *CSKYDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { + SDLoc dl(V0.getNode()); + SDValue RegClass = + CurDAG->getTargetConstant(CSKY::GPRPairRegClassID, dl, MVT::i32); + SDValue SubReg0 = CurDAG->getTargetConstant(CSKY::sub32_0, dl, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(CSKY::sub32_32, dl, MVT::i32); + const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1}; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); +} + +bool CSKYDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { + switch (ConstraintID) { + case InlineAsm::Constraint_m: + // We just support simple memory operands that have a single address + // operand and need no special handling. + OutOps.push_back(Op); + return false; + default: + break; + } + + return true; +} + FunctionPass *llvm::createCSKYISelDag(CSKYTargetMachine &TM) { return new CSKYDAGToDAGISel(TM); } diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp index 0b589e3d3e4f..012de34c9809 100644 --- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp +++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp @@ -19,6 +19,7 @@ #include "CSKYSubtarget.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Support/Debug.h" @@ -103,9 +104,7 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UDIV, MVT::i32, Expand); } - if (!Subtarget.has3r2E3r3()) { - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - } + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); // Float @@ -784,6 +783,175 @@ SDValue CSKYTargetLowering::getTargetConstantPoolValue(GlobalAddressSDNode *N, return DAG.getTargetConstantPool(CPV, Ty); } +CSKYTargetLowering::ConstraintType +CSKYTargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + break; + case 'a': + case 'b': + case 'v': + case 'w': + case 'y': + return C_RegisterClass; + case 'c': + case 'l': + case 'h': + case 'z': + return C_Register; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +std::pair +CSKYTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, + MVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return std::make_pair(0U, &CSKY::GPRRegClass); + case 'a': + return std::make_pair(0U, &CSKY::mGPRRegClass); + case 'b': + return std::make_pair(0U, &CSKY::sGPRRegClass); + case 'z': + return std::make_pair(CSKY::R14, &CSKY::GPRRegClass); + case 'c': + return std::make_pair(CSKY::C, &CSKY::CARRYRegClass); + case 'w': + if ((Subtarget.hasFPUv2SingleFloat() || + Subtarget.hasFPUv3SingleFloat()) && + VT == MVT::f32) + return std::make_pair(0U, &CSKY::sFPR32RegClass); + if ((Subtarget.hasFPUv2DoubleFloat() || + Subtarget.hasFPUv3DoubleFloat()) && + VT == MVT::f64) + return std::make_pair(0U, &CSKY::sFPR64RegClass); + break; + case 'v': + if (Subtarget.hasFPUv2SingleFloat() && VT == MVT::f32) + return std::make_pair(0U, &CSKY::sFPR32RegClass); + if (Subtarget.hasFPUv3SingleFloat() && VT == MVT::f32) + return std::make_pair(0U, &CSKY::FPR32RegClass); + if (Subtarget.hasFPUv2DoubleFloat() && VT == MVT::f64) + return std::make_pair(0U, &CSKY::sFPR64RegClass); + if (Subtarget.hasFPUv3DoubleFloat() && VT == MVT::f64) + return std::make_pair(0U, &CSKY::FPR64RegClass); + break; + default: + break; + } + } + + if (Constraint == "{c}") + return std::make_pair(CSKY::C, &CSKY::CARRYRegClass); + + // Clang will correctly decode the usage of register name aliases into their + // official names. However, other frontends like `rustc` do not. This allows + // users of these frontends to use the ABI names for registers in LLVM-style + // register constraints. + unsigned XRegFromAlias = StringSwitch(Constraint.lower()) + .Case("{a0}", CSKY::R0) + .Case("{a1}", CSKY::R1) + .Case("{a2}", CSKY::R2) + .Case("{a3}", CSKY::R3) + .Case("{l0}", CSKY::R4) + .Case("{l1}", CSKY::R5) + .Case("{l2}", CSKY::R6) + .Case("{l3}", CSKY::R7) + .Case("{l4}", CSKY::R8) + .Case("{l5}", CSKY::R9) + .Case("{l6}", CSKY::R10) + .Case("{l7}", CSKY::R11) + .Case("{t0}", CSKY::R12) + .Case("{t1}", CSKY::R13) + .Case("{sp}", CSKY::R14) + .Case("{lr}", CSKY::R15) + .Case("{l8}", CSKY::R16) + .Case("{l9}", CSKY::R17) + .Case("{t2}", CSKY::R18) + .Case("{t3}", CSKY::R19) + .Case("{t4}", CSKY::R20) + .Case("{t5}", CSKY::R21) + .Case("{t6}", CSKY::R22) + .Cases("{t7}", "{fp}", CSKY::R23) + .Cases("{t8}", "{top}", CSKY::R24) + .Cases("{t9}", "{bsp}", CSKY::R25) + .Case("{r26}", CSKY::R26) + .Case("{r27}", CSKY::R27) + .Cases("{gb}", "{rgb}", "{rdb}", CSKY::R28) + .Cases("{tb}", "{rtb}", CSKY::R29) + .Case("{svbr}", CSKY::R30) + .Case("{tls}", CSKY::R31) + .Default(CSKY::NoRegister); + + if (XRegFromAlias != CSKY::NoRegister) + return std::make_pair(XRegFromAlias, &CSKY::GPRRegClass); + + // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the + // TableGen record rather than the AsmName to choose registers for InlineAsm + // constraints, plus we want to match those names to the widest floating point + // register type available, manually select floating point registers here. + // + // The second case is the ABI name of the register, so that frontends can also + // use the ABI names in register constraint lists. + if (Subtarget.useHardFloat()) { + unsigned FReg = StringSwitch(Constraint.lower()) + .Cases("{fr0}", "{vr0}", CSKY::F0_32) + .Cases("{fr1}", "{vr1}", CSKY::F1_32) + .Cases("{fr2}", "{vr2}", CSKY::F2_32) + .Cases("{fr3}", "{vr3}", CSKY::F3_32) + .Cases("{fr4}", "{vr4}", CSKY::F4_32) + .Cases("{fr5}", "{vr5}", CSKY::F5_32) + .Cases("{fr6}", "{vr6}", CSKY::F6_32) + .Cases("{fr7}", "{vr7}", CSKY::F7_32) + .Cases("{fr8}", "{vr8}", CSKY::F8_32) + .Cases("{fr9}", "{vr9}", CSKY::F9_32) + .Cases("{fr10}", "{vr10}", CSKY::F10_32) + .Cases("{fr11}", "{vr11}", CSKY::F11_32) + .Cases("{fr12}", "{vr12}", CSKY::F12_32) + .Cases("{fr13}", "{vr13}", CSKY::F13_32) + .Cases("{fr14}", "{vr14}", CSKY::F14_32) + .Cases("{fr15}", "{vr15}", CSKY::F15_32) + .Cases("{fr16}", "{vr16}", CSKY::F16_32) + .Cases("{fr17}", "{vr17}", CSKY::F17_32) + .Cases("{fr18}", "{vr18}", CSKY::F18_32) + .Cases("{fr19}", "{vr19}", CSKY::F19_32) + .Cases("{fr20}", "{vr20}", CSKY::F20_32) + .Cases("{fr21}", "{vr21}", CSKY::F21_32) + .Cases("{fr22}", "{vr22}", CSKY::F22_32) + .Cases("{fr23}", "{vr23}", CSKY::F23_32) + .Cases("{fr24}", "{vr24}", CSKY::F24_32) + .Cases("{fr25}", "{vr25}", CSKY::F25_32) + .Cases("{fr26}", "{vr26}", CSKY::F26_32) + .Cases("{fr27}", "{vr27}", CSKY::F27_32) + .Cases("{fr28}", "{vr28}", CSKY::F28_32) + .Cases("{fr29}", "{vr29}", CSKY::F29_32) + .Cases("{fr30}", "{vr30}", CSKY::F30_32) + .Cases("{fr31}", "{vr31}", CSKY::F31_32) + .Default(CSKY::NoRegister); + if (FReg != CSKY::NoRegister) { + assert(CSKY::F0_32 <= FReg && FReg <= CSKY::F31_32 && "Unknown fp-reg"); + unsigned RegNo = FReg - CSKY::F0_32; + unsigned DReg = CSKY::F0_64 + RegNo; + + if (Subtarget.hasFPUv2DoubleFloat()) + return std::make_pair(DReg, &CSKY::sFPR64RegClass); + else if (Subtarget.hasFPUv3DoubleFloat()) + return std::make_pair(DReg, &CSKY::FPR64RegClass); + else if (Subtarget.hasFPUv2SingleFloat()) + return std::make_pair(FReg, &CSKY::sFPR32RegClass); + else if (Subtarget.hasFPUv3SingleFloat()) + return std::make_pair(FReg, &CSKY::FPR32RegClass); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode) { @@ -853,6 +1021,12 @@ CSKYTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); + case CSKY::FSELS: + case CSKY::FSELD: + if (Subtarget.hasE2()) + return emitSelectPseudo(MI, BB, CSKY::BT32); + else + return emitSelectPseudo(MI, BB, CSKY::BT16); case CSKY::ISEL32: return emitSelectPseudo(MI, BB, CSKY::BT32); case CSKY::ISEL16: diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.h b/llvm/lib/Target/CSKY/CSKYISelLowering.h index e1744d5ce220..1cd0f99b17bc 100644 --- a/llvm/lib/Target/CSKY/CSKYISelLowering.h +++ b/llvm/lib/Target/CSKY/CSKYISelLowering.h @@ -88,6 +88,12 @@ private: return (Kind != ScalarCondVectorVal); } + ConstraintType getConstraintType(StringRef Constraint) const override; + + std::pair + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; diff --git a/llvm/lib/Target/CSKY/CSKYInstrAlias.td b/llvm/lib/Target/CSKY/CSKYInstrAlias.td new file mode 100644 index 000000000000..e3c0538e752e --- /dev/null +++ b/llvm/lib/Target/CSKY/CSKYInstrAlias.td @@ -0,0 +1,38 @@ +//===-- CSKYInstrAlias.td - Target Description for CSKY ----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the CSKY instructions alias. +// +//===----------------------------------------------------------------------===// + +def : InstAlias<"nop", (MOV16 R0, R0)>; +def : InstAlias<"nop", (MOV32 R0, R0)>, Requires<[iHasE2]>; + +def : InstAlias<"bgeni16 $dst, $imm", (BGENI GPR:$dst, uimm5:$imm)>; +def : InstAlias<"bgeni32 $dst, $imm", (BGENI GPR:$dst, uimm5:$imm)>; + +def : InstAlias<"bsr $dst", (BSR32 call_symbol:$dst)>; + +def : InstAlias<"grs\t$rz, $offset", (GRS32 GPR:$rz, bare_symbol:$offset)>; + +def : InstAlias<"jbsr\t$src1", (JBSR32 call_symbol:$src1)>; + +def : InstAlias<"jbr $dst", (JBR16 br_symbol_16bit:$dst)>; +def : InstAlias<"jbt $dst", (JBT16 C, br_symbol_16bit:$dst)>; +def : InstAlias<"jbf $dst", (JBF16 C, br_symbol_16bit:$dst)>; + +def : InstAlias<"lrw $rz, $src", (PseudoLRW16 mGPR:$rz, bare_symbol:$src)>; +def : InstAlias<"lrw $rz, $src", (LRW16 mGPR:$rz, constpool_symbol_16bit:$src)>; +def : InstAlias<"lrw $rz, $src", (PseudoLRW32 GPR:$rz, bare_symbol:$src)>; +def : InstAlias<"lrw $rz, $src", (LRW32 GPR:$rz, constpool_symbol:$src)>; + +def : InstAlias<"jsri $dst", (PseudoJSRI32 call_symbol:$dst)>; +def : InstAlias<"jsri $dst", (JSRI32 constpool_symbol:$dst)>; + +def : InstAlias<"jmpi $dst", (PseudoJMPI32 br_symbol:$dst)>; +def : InstAlias<"jmpi $dst", (JMPI32 constpool_symbol:$dst)>; \ No newline at end of file diff --git a/llvm/lib/Target/CSKY/CSKYInstrFormats.td b/llvm/lib/Target/CSKY/CSKYInstrFormats.td index 9b6ef9ca23db..8144a501b3d2 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrFormats.td +++ b/llvm/lib/Target/CSKY/CSKYInstrFormats.td @@ -655,7 +655,7 @@ class R_Z_1 sop, bits<5> pcode, string op> // Format< OP[6] | RZ[5] | 00000[5] | SOP[6] | PCODE[5] | 00000[5] > // Instructions:(2) clrf32, clrt32 -class R_Z_2 sop, bits<5> pcode, string op, list pattern> +class R_Z_2 sop, bits<5> pcode, string op> : CSKY32Inst { bits<5> rz; diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp index c57ccb9d6eea..d490b385ac16 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp @@ -14,6 +14,7 @@ #include "CSKYConstantPoolValue.h" #include "CSKYMachineFunctionInfo.h" #include "CSKYTargetMachine.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/MC/MCContext.h" #define DEBUG_TYPE "csky-instr-info" @@ -222,9 +223,10 @@ bool CSKYInstrInfo::reverseBranchCondition( Register CSKYInstrInfo::movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, int64_t Val, + const DebugLoc &DL, uint64_t Val, MachineInstr::MIFlag Flag) const { - assert(isUInt<32>(Val) && "should be uint32"); + if (!isInt<32>(Val)) + report_fatal_error("Should only materialize 32-bit constants."); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -475,9 +477,6 @@ void CSKYInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const { - - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - if (CSKY::GPRRegClass.contains(SrcReg) && CSKY::CARRYRegClass.contains(DestReg)) { if (STI.hasE2()) { diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.h b/llvm/lib/Target/CSKY/CSKYInstrInfo.h index 1a1bbbf9154f..a979b0bf4b0d 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.h +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.h @@ -79,7 +79,7 @@ public: // Materializes the given integer Val into DstReg. Register movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, int64_t Val, + const DebugLoc &DL, uint64_t Val, MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const; }; diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td index a782efe7f4f4..300ecceae906 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td @@ -413,6 +413,19 @@ def psrflag : Operand, ImmLeaf(Imm);"> { let PrintMethod = "printPSRFlag"; } +multiclass uimm8SRLXForm { + def _0: SDNodeXFormgetTargetConstant((N->getZExtValue() >> 0) & 0xFF, SDLoc(N), MVT::i32);}]>; + def _8: SDNodeXFormgetTargetConstant((N->getZExtValue() >> 8) & 0xFF, SDLoc(N), MVT::i32);}]>; + def _16: SDNodeXFormgetTargetConstant((N->getZExtValue() >> 16) & 0xFF, SDLoc(N), MVT::i32);}]>; + def _24: SDNodeXFormgetTargetConstant((N->getZExtValue() >> 24) & 0xFF, SDLoc(N), MVT::i32);}]>; +} + +defm uimm8SRL : uimm8SRLXForm; + //===----------------------------------------------------------------------===// // Instruction Formats //===----------------------------------------------------------------------===// @@ -709,8 +722,6 @@ let Predicates= [iHasE2] in { def MOVI32 : I_16_MOV<0x10, "movi32", uimm16>; let Size = 4, isCodeGenOnly = 0 in def BGENI : CSKYPseudo<(outs GPR:$dst), (ins uimm5:$imm), "bgeni\t$dst, $imm", []>; - def : InstAlias<"bgeni16 $dst, $imm", (BGENI GPR:$dst, uimm5:$imm)>; - def : InstAlias<"bgeni32 $dst, $imm", (BGENI GPR:$dst, uimm5:$imm)>; def MOVIH32 : I_16_MOV<0x11, "movih32", uimm16_16_xform>; def MVC32 : R_Z_1<0x1, 0x8, "mvc32">; let isCodeGenOnly = 1 in @@ -723,8 +734,8 @@ let Predicates= [iHasE2] in { let Predicates = [iHas2E3] in { def MVCV32 : R_Z_1<0x1, 0x10, "mvcv32">; - def CLRF32 : R_Z_2<0xB, 0x1, "clrf32", []>; - def CLRT32 : R_Z_2<0xB, 0x2, "clrt32", []>; + def CLRF32 : R_Z_2<0xB, 0x1, "clrf32">; + def CLRT32 : R_Z_2<0xB, 0x2, "clrt32">; } //===----------------------------------------------------------------------===// @@ -779,8 +790,6 @@ def BNEZAD32 : CSKY32Inst; -def : InstAlias<"bsr $dst", (BSR32 call_symbol:$dst)>; - def BSR32_BR : J<0x38, (outs), (ins call_symbol:$offset), "bsr32", []>{ let isCodeGenOnly = 1; let isBranch = 1; @@ -804,7 +813,6 @@ let Predicates = [iHas2E3] in { def GRS32 : I_18_Z_L<0x3, "grs32\t$rz, $offset", (outs GPR:$rz), (ins bare_symbol:$offset), []>; -def : InstAlias<"grs\t$rz, $offset", (GRS32 GPR:$rz, bare_symbol:$offset)>; let Uses = [R28] in { def LRS32B : I_18_Z_L<0x0, "lrs32.b\t$rz, $offset", @@ -1291,8 +1299,6 @@ let Predicates = [iHasE2] in { let isCall = 1, Defs = [ R15 ], mayLoad = 1, Size = 4, isCodeGenOnly = 0 in def JBSR32 : CSKYPseudo<(outs), (ins call_symbol:$src1), "jbsr32\t$src1", []>; -def : InstAlias<"jbsr\t$src1", (JBSR32 call_symbol:$src1)>; - def JBR32 : CSKYPseudo<(outs), (ins br_symbol:$src1), "jbr32\t$src1", []> { let isBranch = 1; let isTerminator = 1; @@ -1338,18 +1344,13 @@ let mayLoad = 1, Size = 2, isCodeGenOnly = 0 in def PseudoLRW32 : CSKYPseudo<(outs GPR:$rz), (ins bare_symbol:$src), "lrw32 $rz, $src", []>; -def : InstAlias<"lrw $rz, $src", (PseudoLRW32 GPR:$rz, bare_symbol:$src)>; -def : InstAlias<"lrw $rz, $src", (LRW32 GPR:$rz, constpool_symbol:$src)>; + let mayLoad = 1, Size = 4, isCodeGenOnly = 0 in def PseudoJSRI32 : CSKYPseudo<(outs), (ins call_symbol:$src), "jsri32 $src", []>; -def : InstAlias<"jsri $dst", (PseudoJSRI32 call_symbol:$dst)>; -def : InstAlias<"jsri $dst", (JSRI32 constpool_symbol:$dst)>; let mayLoad = 1, Size = 4, isCodeGenOnly = 0 in def PseudoJMPI32 : CSKYPseudo<(outs), (ins br_symbol:$src), "jmpi32 $src", []>; -def : InstAlias<"jmpi $dst", (PseudoJMPI32 br_symbol:$dst)>; -def : InstAlias<"jmpi $dst", (JMPI32 constpool_symbol:$dst)>; let isNotDuplicable = 1, mayLoad = 1, mayStore = 0, Size = 8 in def PseudoTLSLA32 : CSKYPseudo<(outs GPR:$dst1, GPR:$dst2), @@ -1362,3 +1363,4 @@ def CONSTPOOL_ENTRY : CSKYPseudo<(outs), include "CSKYInstrInfo16Instr.td" include "CSKYInstrInfoF1.td" include "CSKYInstrInfoF2.td" +include "CSKYInstrAlias.td" diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td index 6a9dd03dfa1d..3be1ca8b7998 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td @@ -441,6 +441,137 @@ let mayLoad = 1, Size = 2, isCodeGenOnly = 0 in def PseudoLRW16 : CSKYPseudo<(outs mGPR:$rz), (ins bare_symbol:$src), "lrw16 $rz, $src", []>; +//===----------------------------------------------------------------------===// +// Instruction Patterns. +//===----------------------------------------------------------------------===// + +def : Pat<(sext_inreg mGPR:$src, i1), (ASRI16 (LSLI16 mGPR:$src, 7), 7)>; +def : Pat<(sext_inreg sGPR:$src, i8), (SEXTB16 sGPR:$src)>; +def : Pat<(sext_inreg sGPR:$src, i16), (SEXTH16 sGPR:$src)>; + +// Load & Store Patterns + +defm : LdPat; +defm : LdPat; + +defm : LdPat; +defm : LdPat; + +defm : LdPat; + + +defm : StPat; +defm : StPat; +defm : StPat; + +def : Pat<(CSKY_CALLReg sGPR:$src), (JSR16 sGPR:$src)>; +def : Pat<(CSKY_TAILReg sGPR:$src), (JMP16 sGPR:$src)>; + +// Symbol address Patterns +def : Pat<(CSKY_LOAD_ADDR tglobaladdr, tconstpool:$src2), (LRW16 tconstpool:$src2)>; +def : Pat<(CSKY_LOAD_ADDR tblockaddress, tconstpool:$src2), (LRW16 tconstpool:$src2)>; +def : Pat<(CSKY_LOAD_ADDR tjumptable:$src1, tconstpool:$src2), (LRW16_Gen tjumptable:$src1, tconstpool:$src2)>; +def : Pat<(CSKY_LOAD_ADDR texternalsym, tconstpool:$src2), (LRW16 tconstpool:$src2)>; + +def : Pat<(i32 (load constpool:$src)), (LRW16 (to_tconstpool tconstpool:$src))>; + +// Branch Patterns. + +def : Pat<(brcond CARRY:$ca, bb:$offset), + (BT16 CARRY:$ca, bb:$offset)>; + +def : Pat<(br bb:$offset), (BR16 bb:$offset)>; + +def : Pat<(brcond (i32 (setne mGPR:$rs1, uimm5:$rs2)), bb:$offset), + (BT16 (CMPNEI16 mGPR:$rs1, uimm5:$rs2), bb:$offset)>; +def : Pat<(brcond (i32 (seteq mGPR:$rs1, uimm5:$rs2)), bb:$offset), + (BF16 (CMPNEI16 mGPR:$rs1, uimm5:$rs2), bb:$offset)>; +def : Pat<(brcond (i32 (setuge mGPR:$rs1, oimm5:$rs2)), bb:$offset), + (BT16 (CMPHSI16 mGPR:$rs1, oimm5:$rs2), bb:$offset)>; +def : Pat<(brcond (i32 (setult mGPR:$rs1, oimm5:$rs2)), bb:$offset), + (BF16 (CMPHSI16 mGPR:$rs1, oimm5:$rs2), bb:$offset)>; +def : Pat<(brcond (i32 (setlt mGPR:$rs1, oimm5:$rs2)), bb:$offset), + (BT16 (CMPLTI16 mGPR:$rs1, oimm5:$rs2), bb:$offset)>; +def : Pat<(brcond (i32 (setge mGPR:$rs1, oimm5:$rs2)), bb:$offset), + (BF16 (CMPLTI16 mGPR:$rs1, oimm5:$rs2), bb:$offset)>; + +def : Pat<(brcond (i32 (setne sGPR:$rs1, sGPR:$rs2)), bb:$offset), + (BT16 (CMPNE16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>; +def : Pat<(brcond (i32 (seteq sGPR:$rs1, sGPR:$rs2)), bb:$offset), + (BF16 (CMPNE16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>; +def : Pat<(brcond (i32 (setuge sGPR:$rs1, sGPR:$rs2)), bb:$offset), + (BT16 (CMPHS16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>; +def : Pat<(brcond (i32 (setule sGPR:$rs1, sGPR:$rs2)), bb:$offset), + (BT16 (CMPHS16 sGPR:$rs2, sGPR:$rs1), bb:$offset)>; +def : Pat<(brcond (i32 (setult sGPR:$rs1, sGPR:$rs2)), bb:$offset), + (BF16 (CMPHS16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>; +def : Pat<(brcond (i32 (setugt sGPR:$rs1, sGPR:$rs2)), bb:$offset), + (BF16 (CMPHS16 sGPR:$rs2, sGPR:$rs1), bb:$offset)>; +def : Pat<(brcond (i32 (setlt sGPR:$rs1, sGPR:$rs2)), bb:$offset), + (BT16 (CMPLT16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>; +def : Pat<(brcond (i32 (setgt sGPR:$rs1, sGPR:$rs2)), bb:$offset), + (BT16 (CMPLT16 sGPR:$rs2, sGPR:$rs1), bb:$offset)>; +def : Pat<(brcond (i32 (setge sGPR:$rs1, sGPR:$rs2)), bb:$offset), + (BF16 (CMPLT16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>; +def : Pat<(brcond (i32 (setle sGPR:$rs1, sGPR:$rs2)), bb:$offset), + (BF16 (CMPLT16 sGPR:$rs2, sGPR:$rs1), bb:$offset)>; + +// Compare Patterns. +def : Pat<(setne sGPR:$rs1, sGPR:$rs2), + (SUBU16XZ (MOVI16 1), (MVCV16 (CMPNE16 sGPR:$rs1, sGPR:$rs2)))>; +def : Pat<(seteq sGPR:$rs1, sGPR:$rs2), + (MVCV16 (CMPNE16 sGPR:$rs1, sGPR:$rs2))>; +def : Pat<(setuge sGPR:$rs1, sGPR:$rs2), + (SUBU16XZ (MOVI16 1), (MVCV16 (CMPHS16 sGPR:$rs1, sGPR:$rs2)))>; +def : Pat<(setule sGPR:$rs1, sGPR:$rs2), + (SUBU16XZ (MOVI16 1), (MVCV16 (CMPHS16 sGPR:$rs2, sGPR:$rs1)))>; +def : Pat<(setult sGPR:$rs1, sGPR:$rs2), + (MVCV16 (CMPHS16 sGPR:$rs1, sGPR:$rs2))>; +def : Pat<(setugt sGPR:$rs1, sGPR:$rs2), + (MVCV16 (CMPHS16 sGPR:$rs2, sGPR:$rs1))>; +def : Pat<(setlt sGPR:$rs1, sGPR:$rs2), + (SUBU16XZ (MOVI16 1), (MVCV16 (CMPLT16 sGPR:$rs1, sGPR:$rs2)))>; +def : Pat<(setgt sGPR:$rs1, sGPR:$rs2), + (SUBU16XZ (MOVI16 1), (MVCV16 (CMPLT16 sGPR:$rs2, sGPR:$rs1)))>; +def : Pat<(setge sGPR:$rs1, sGPR:$rs2), + (MVCV16 (CMPLT16 sGPR:$rs1, sGPR:$rs2))>; +def : Pat<(setle sGPR:$rs1, sGPR:$rs2), + (MVCV16 (CMPLT16 sGPR:$rs2, sGPR:$rs1))>; + + +def : Pat<(setne mGPR:$rs1, uimm5:$rs2), + (SUBU16XZ (MOVI16 1), (MVCV16 (CMPNEI16 mGPR:$rs1, uimm5:$rs2)))>; +def : Pat<(seteq mGPR:$rs1, uimm5:$rs2), + (MVCV16 (CMPNEI16 mGPR:$rs1, uimm5:$rs2))>; +def : Pat<(setuge mGPR:$rs1, oimm5:$rs2), + (SUBU16XZ (MOVI16 1), (MVCV16 (CMPHSI16 mGPR:$rs1, oimm5:$rs2)))>; +def : Pat<(setult mGPR:$rs1, oimm5:$rs2), + (MVCV16 (CMPHSI16 mGPR:$rs1, oimm5:$rs2))>; +def : Pat<(setlt mGPR:$rs1, oimm5:$rs2), + (SUBU16XZ (MOVI16 1), (MVCV16 (CMPLTI16 mGPR:$rs1, oimm5:$rs2)))>; +def : Pat<(setge mGPR:$rs1, oimm5:$rs2), + (MVCV16 (CMPLTI16 mGPR:$rs1, oimm5:$rs2))>; + +def : Pat<(select CARRY:$ca, sGPR:$rx, sGPR:$false), + (ISEL16 CARRY:$ca, sGPR:$rx, sGPR:$false)>; +def : Pat<(select (and CARRY:$ca, 1), sGPR:$rx, sGPR:$false), + (ISEL16 CARRY:$ca, sGPR:$rx, sGPR:$false)>; + +def : Pat<(rotl sGPR:$rs1, sGPR:$rs2), + (ROTL16 sGPR:$rs1, (AND16 sGPR:$rs2, (MOVI16 0x1f)))>; + + +// FIXME: This is a temporary treatment for the e801. +def : Pat<(i32 imm:$imm), + (OR16 (MOVI16 (uimm8SRL_0 imm:$imm)), + (OR16 (LSLI16 (MOVI16 (uimm8SRL_8 imm:$imm)), 8), + (OR16 (LSLI16 (MOVI16 (uimm8SRL_16 imm:$imm)), 16), + (LSLI16 (MOVI16 (uimm8SRL_24 imm:$imm)), 24))))>; + +// Other operations. +let Predicates = [iHasE2] in { + def : Pat<(bswap sGPR:$rx), (REVB16 sGPR:$rx)>; +} //===----------------------------------------------------------------------===// // Compress Instruction tablegen backend. diff --git a/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h b/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h index b6e303f8ccfb..57e0d62481ad 100644 --- a/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h +++ b/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h @@ -18,8 +18,6 @@ namespace llvm { class CSKYMachineFunctionInfo : public MachineFunctionInfo { - MachineFunction &MF; - Register GlobalBaseReg = 0; bool SpillsCR = false; @@ -33,7 +31,14 @@ class CSKYMachineFunctionInfo : public MachineFunctionInfo { unsigned PICLabelUId = 0; public: - CSKYMachineFunctionInfo(MachineFunction &MF) : MF(MF) {} + CSKYMachineFunctionInfo(MachineFunction &) {} + + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override { + return DestMF.cloneInfo(*this); + } Register getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; } diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp b/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp index 57b6ae3c27b5..4f7811d22868 100644 --- a/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp +++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp @@ -13,6 +13,7 @@ #include "CSKYRegisterInfo.h" #include "CSKY.h" #include "CSKYSubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MCContext.h" @@ -29,6 +30,10 @@ const uint32_t * CSKYRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID Id) const { const CSKYSubtarget &STI = MF.getSubtarget(); + if (STI.hasFPUv2DoubleFloat() || STI.hasFPUv3DoubleFloat()) + return CSR_GPR_FPR64_RegMask; + if (STI.hasFPUv2SingleFloat() || STI.hasFPUv3SingleFloat()) + return CSR_GPR_FPR32_RegMask; return CSR_I32_RegMask; } @@ -82,9 +87,21 @@ const MCPhysReg * CSKYRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const CSKYSubtarget &STI = MF->getSubtarget(); if (MF->getFunction().hasFnAttribute("interrupt")) { + if (STI.hasFPUv3DoubleFloat()) + return CSR_GPR_FPR64v3_ISR_SaveList; + if (STI.hasFPUv3SingleFloat()) + return CSR_GPR_FPR32v3_ISR_SaveList; + if (STI.hasFPUv2DoubleFloat()) + return CSR_GPR_FPR64_ISR_SaveList; + if (STI.hasFPUv2SingleFloat()) + return CSR_GPR_FPR32_ISR_SaveList; return CSR_GPR_ISR_SaveList; } + if (STI.hasFPUv2DoubleFloat() || STI.hasFPUv3DoubleFloat()) + return CSR_GPR_FPR64_SaveList; + if (STI.hasFPUv2SingleFloat() || STI.hasFPUv3SingleFloat()) + return CSR_GPR_FPR32_SaveList; return CSR_I32_SaveList; } @@ -248,7 +265,6 @@ void CSKYRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(isInt<32>(Offset) && "Int32 expected"); // The offset won't fit in an immediate, so use a scratch register instead // Modify Offset and FrameReg appropriately - assert(Offset >= 0); Register ScratchReg = TII->movImm(MBB, NewII, DL, Offset); BuildMI(MBB, NewII, DL, TII->get(STI.hasE2() ? CSKY::ADDU32 : CSKY::ADDU16XZ), ScratchReg) @@ -265,7 +281,7 @@ void CSKYRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI->setDesc(TII->get(TargetOpcode::COPY)); MI->getOperand(FIOperandNum) .ChangeToRegister(FrameReg, false, false, FrameRegIsKill); - MI->RemoveOperand(FIOperandNum + 1); + MI->removeOperand(FIOperandNum + 1); } else { MI->getOperand(FIOperandNum) .ChangeToRegister(FrameReg, false, false, FrameRegIsKill); diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td index b7f4fc17166b..d12532a3c5c1 100644 --- a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td +++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td @@ -81,17 +81,21 @@ let RegAltNameIndices = [ABIRegAltName] in { def R29 : CSKYReg<29, "r29", ["rtb"]>, DwarfRegNum<[29]>; def R30 : CSKYReg<30, "r30", ["svbr"]>, DwarfRegNum<[30]>; def R31 : CSKYReg<31, "r31", ["tls"]>, DwarfRegNum<[31]>; - def C : CSKYReg<32, "cr0", ["psr"]>; + + // Faked for GPRTuple + def R32 : CSKYReg<32, "r32", ["r32"]>, DwarfRegNum<[32]>; + + def C : CSKYReg<33, "cr0", ["psr"]>; } def GPRTuple : RegisterTuples< [sub32_0, sub32_32], - [(add (sequence "R%u", 0, 30)), (add (sequence "R%u", 1, 31))], + [(add (sequence "R%u", 0, 31)), (add (sequence "R%u", 1, 32))], [ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", - "r24", "r25", "r26", "r27", "r28", "r29", "r30" + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" ]>; // Floating point registers @@ -189,9 +193,9 @@ def FPR32 : RegisterClass<"CSKY", [f32], 32, def sFPR32 : RegisterClass<"CSKY", [f32], 32, (add (sequence "F%u_32", 0, 15))>; -def FPR64 : RegisterClass<"CSKY", [f64], 64, +def FPR64 : RegisterClass<"CSKY", [f64], 32, (add (sequence "F%u_64", 0, 31))>; -def sFPR64 : RegisterClass<"CSKY", [f64], 64, +def sFPR64 : RegisterClass<"CSKY", [f64], 32, (add (sequence "F%u_64", 0, 15))>; def sFPR64_V : RegisterClass<"CSKY", [v2f32], 32, (add sFPR64)>; diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.cpp b/llvm/lib/Target/CSKY/CSKYSubtarget.cpp index 963c2ede9c44..251dbed82708 100644 --- a/llvm/lib/Target/CSKY/CSKYSubtarget.cpp +++ b/llvm/lib/Target/CSKY/CSKYSubtarget.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "CSKYSubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" using namespace llvm; @@ -33,14 +34,42 @@ CSKYSubtarget &CSKYSubtarget::initializeSubtargetDependencies( UseHardFloatABI = false; HasFPUv2SingleFloat = false; HasFPUv2DoubleFloat = false; + HasFPUv3HalfWord = false; + HasFPUv3HalfFloat = false; HasFPUv3SingleFloat = false; HasFPUv3DoubleFloat = false; - + HasFdivdu = false; + HasFLOATE1 = false; + HasFLOAT1E2 = false; + HasFLOAT1E3 = false; + HasFLOAT3E4 = false; + HasFLOAT7E60 = false; + HasExtendLrw = false; HasBTST16 = false; + HasTrust = false; HasJAVA = false; - HasExtendLrw = false; + HasCache = false; + HasNVIC = false; + HasDSP = false; + HasDSP1E2 = false; + HasDSPE60 = false; + HasDSPV2 = false; + HasDSP_Silan = false; HasDoloop = false; + HasHardwareDivide = false; HasHighRegisters = false; + HasVDSPV2 = false; + HasVDSP2E3 = false; + HasVDSP2E60F = false; + ReadTPHard = false; + HasVDSPV1_128 = false; + UseCCRT = false; + DumpConstPool = false; + EnableInterruptAttribute = false; + HasPushPop = false; + HasSTM = false; + SmartMode = false; + EnableStackSize = false; HasE1 = false; HasE2 = false; diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.h b/llvm/lib/Target/CSKY/CSKYSubtarget.h index 4cd590e8e76e..9e7ad00c0a50 100644 --- a/llvm/lib/Target/CSKY/CSKYSubtarget.h +++ b/llvm/lib/Target/CSKY/CSKYSubtarget.h @@ -36,18 +36,65 @@ class CSKYSubtarget : public CSKYGenSubtargetInfo { CSKYTargetLowering TLInfo; SelectionDAGTargetInfo TSInfo; + enum CSKYProcFamilyEnum { + Others, + + CK801, + CK802, + CK803, + CK803S, + CK804, + CK805, + CK807, + CK810, + CK810V, + CK860, + CK860V + }; + + /// CSKYProcFamily - CSKY processor family: CK801, CK802, and others. + CSKYProcFamilyEnum CSKYProcFamily = Others; + bool UseHardFloat; bool UseHardFloatABI; bool HasFPUv2SingleFloat; bool HasFPUv2DoubleFloat; + bool HasFPUv3HalfWord; + bool HasFPUv3HalfFloat; bool HasFPUv3SingleFloat; bool HasFPUv3DoubleFloat; - + bool HasFdivdu; + bool HasFLOATE1; + bool HasFLOAT1E2; + bool HasFLOAT1E3; + bool HasFLOAT3E4; + bool HasFLOAT7E60; bool HasBTST16; - bool HasJAVA; bool HasExtendLrw; + bool HasTrust; + bool HasJAVA; + bool HasCache; + bool HasNVIC; + bool HasDSP; + bool HasDSP1E2; + bool HasDSPE60; + bool HasDSPV2; + bool HasDSP_Silan; bool HasDoloop; + bool HasHardwareDivide; bool HasHighRegisters; + bool HasVDSPV2; + bool HasVDSP2E3; + bool HasVDSP2E60F; + bool ReadTPHard; + bool HasVDSPV1_128; + bool UseCCRT; + bool DumpConstPool; + bool EnableInterruptAttribute; + bool HasPushPop; + bool HasSTM; + bool SmartMode; + bool EnableStackSize; bool HasE1; bool HasE2; @@ -92,16 +139,49 @@ public: bool hasFPUv2SingleFloat() const { return HasFPUv2SingleFloat; } bool hasFPUv2DoubleFloat() const { return HasFPUv2DoubleFloat; } bool hasFPUv2() const { return HasFPUv2SingleFloat || HasFPUv2DoubleFloat; } + bool hasFPUv3HalfWord() const { return HasFPUv3HalfWord; } + bool hasFPUv3HalfFloat() const { return HasFPUv3HalfFloat; } bool hasFPUv3SingleFloat() const { return HasFPUv3SingleFloat; } bool hasFPUv3DoubleFloat() const { return HasFPUv3DoubleFloat; } - bool hasFPUv3() const { return HasFPUv3SingleFloat || HasFPUv3DoubleFloat; } + bool hasFPUv3() const { + return HasFPUv3HalfFloat || HasFPUv3SingleFloat || HasFPUv3DoubleFloat; + } bool hasAnyFloatExt() const { return hasFPUv2() || hasFPUv3(); }; - + bool hasFdivdu() const { return HasFdivdu; } + bool hasFLOATE1() const { return HasFLOATE1; } + bool hasFLOAT1E2() const { return HasFLOAT1E2; } + bool hasFLOAT1E3() const { return HasFLOAT1E3; } + bool hasFLOAT3E4() const { return HasFLOAT3E4; } + bool hasFLOAT7E60() const { return HasFLOAT7E60; } + bool hasExtendLrw() const { return HasExtendLrw; } bool hasBTST16() const { return HasBTST16; } + bool hasTrust() const { return HasTrust; } bool hasJAVA() const { return HasJAVA; } - bool hasExtendLrw() const { return HasExtendLrw; } + bool hasCache() const { return HasCache; } + bool hasNVIC() const { return HasNVIC; } + bool hasDSP() const { return HasDSP; } + bool hasDSP1E2() const { return HasDSP1E2; } + bool hasDSPE60() const { return HasDSPE60; } + bool hasDSPV2() const { return HasDSPV2; } + bool hasDSP_Silan() const { return HasDSP_Silan; } bool hasDoloop() const { return HasDoloop; } bool hasHighRegisters() const { return HasHighRegisters; } + bool hasVDSPV2() const { return HasVDSPV2; } + bool hasVDSPV2_FLOAT() const { return HasVDSPV2 && UseHardFloat; } + bool hasVDSPV2_HALF() const { + return HasVDSPV2 && UseHardFloat && HasFPUv3HalfFloat; + } + bool hasVDSP2E3() const { return HasVDSP2E3; } + bool hasVDSP2E60F() const { return HasVDSP2E60F; } + bool readTPHard() const { return ReadTPHard; } + bool hasVDSPV1_128() const { return HasVDSPV1_128; } + bool useCCRT() const { return UseCCRT; } + bool dumpConstPool() const { return DumpConstPool; } + bool enableInterruptAttribute() const { return EnableInterruptAttribute; } + bool hasPushPop() const { return HasPushPop; } + bool hasSTM() const { return HasSTM; } + bool smartMode() const { return SmartMode; } + bool enableStackSize() const { return EnableStackSize; } bool hasE1() const { return HasE1; } bool hasE2() const { return HasE2; } @@ -114,6 +194,18 @@ public: bool hasMP1E2() const { return HasMP1E2; } bool has7E10() const { return Has7E10; } bool has10E60() const { return Has10E60; } + + bool isCK801() const { return CSKYProcFamily == CK801; } + bool isCK802() const { return CSKYProcFamily == CK802; } + bool isCK803() const { return CSKYProcFamily == CK803; } + bool isCK803S() const { return CSKYProcFamily == CK803S; } + bool isCK804() const { return CSKYProcFamily == CK804; } + bool isCK805() const { return CSKYProcFamily == CK805; } + bool isCK807() const { return CSKYProcFamily == CK807; } + bool isCK810() const { return CSKYProcFamily == CK810; } + bool isCK810V() const { return CSKYProcFamily == CK810V; } + bool isCK860() const { return CSKYProcFamily == CK860; } + bool isCK860V() const { return CSKYProcFamily == CK860V; } }; } // namespace llvm diff --git a/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp b/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp index 94b24044c27d..d19f28fddd53 100644 --- a/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp +++ b/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp @@ -13,7 +13,9 @@ #include "CSKYTargetMachine.h" #include "CSKY.h" #include "CSKYSubtarget.h" +#include "CSKYTargetObjectFile.h" #include "TargetInfo/CSKYTargetInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -50,9 +52,9 @@ CSKYTargetMachine::CSKYTargetMachine(const Target &T, const Triple &TT, Optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, - RM.getValueOr(Reloc::Static), + RM.value_or(Reloc::Static), getEffectiveCodeModel(CM, CodeModel::Small), OL), - TLOF(std::make_unique()) { + TLOF(std::make_unique()) { initAsmInfo(); } @@ -94,6 +96,7 @@ public: return getTM(); } + void addIRPasses() override; bool addInstSelector() override; void addPreEmitPass() override; }; @@ -104,6 +107,11 @@ TargetPassConfig *CSKYTargetMachine::createPassConfig(PassManagerBase &PM) { return new CSKYPassConfig(*this, PM); } +void CSKYPassConfig::addIRPasses() { + addPass(createAtomicExpandPass()); + TargetPassConfig::addIRPasses(); +} + bool CSKYPassConfig::addInstSelector() { addPass(createCSKYISelDag(getCSKYTargetMachine())); diff --git a/llvm/lib/Target/CSKY/CSKYTargetObjectFile.cpp b/llvm/lib/Target/CSKY/CSKYTargetObjectFile.cpp new file mode 100644 index 000000000000..b5592d34ca54 --- /dev/null +++ b/llvm/lib/Target/CSKY/CSKYTargetObjectFile.cpp @@ -0,0 +1,25 @@ +//===-- CSKYTargetObjectFile.h - CSKY Object Info -*- C++ ---------------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CSKYTargetObjectFile.h" +#include "CSKYTargetMachine.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/MachineFrameInfo.h" + +using namespace llvm; + +void CSKYELFTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + PersonalityEncoding = + dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + TTypeEncoding = + dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; +} diff --git a/llvm/lib/Target/CSKY/CSKYTargetObjectFile.h b/llvm/lib/Target/CSKY/CSKYTargetObjectFile.h new file mode 100644 index 000000000000..a82f2681c12a --- /dev/null +++ b/llvm/lib/Target/CSKY/CSKYTargetObjectFile.h @@ -0,0 +1,24 @@ +//===-- CSKYTargetObjectFile.h - CSKY Object Info -*- C++ ---------------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CSKY_CSKYTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_CSKY_CSKYTARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { + +class CSKYELFTargetObjectFile : public TargetLoweringObjectFileELF { +public: + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_CSKY_CSKYTARGETOBJECTFILE_H diff --git a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp new file mode 100644 index 000000000000..9b4d8ea8dc56 --- /dev/null +++ b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp @@ -0,0 +1,553 @@ +//===-- CSKYDisassembler.cpp - Disassembler for CSKY ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the CSKYDisassembler class. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/CSKYBaseInfo.h" +#include "MCTargetDesc/CSKYMCTargetDesc.h" +#include "TargetInfo/CSKYTargetInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; + +#define DEBUG_TYPE "csky-disassembler" + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +class CSKYDisassembler : public MCDisassembler { + std::unique_ptr const MCII; + mutable StringRef symbolName; + + DecodeStatus handleCROperand(MCInst &Instr) const; + +public: + CSKYDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, + MCInstrInfo const *MCII); + + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &CStream) const override; +}; +} // end anonymous namespace + +CSKYDisassembler::CSKYDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, + MCInstrInfo const *MCII) + : MCDisassembler(STI, Ctx), MCII(MCII) {} + +static MCDisassembler *createCSKYDisassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new CSKYDisassembler(STI, Ctx, T.createMCInstrInfo()); +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYDisassembler() { + TargetRegistry::RegisterMCDisassembler(getTheCSKYTarget(), + createCSKYDisassembler); +} + +static const uint16_t GPRDecoderTable[] = { + CSKY::R0, CSKY::R1, CSKY::R2, CSKY::R3, CSKY::R4, CSKY::R5, CSKY::R6, + CSKY::R7, CSKY::R8, CSKY::R9, CSKY::R10, CSKY::R11, CSKY::R12, CSKY::R13, + CSKY::R14, CSKY::R15, CSKY::R16, CSKY::R17, CSKY::R18, CSKY::R19, CSKY::R20, + CSKY::R21, CSKY::R22, CSKY::R23, CSKY::R24, CSKY::R25, CSKY::R26, CSKY::R27, + CSKY::R28, CSKY::R29, CSKY::R30, CSKY::R31}; + +static const uint16_t GPRPairDecoderTable[] = { + CSKY::R0_R1, CSKY::R1_R2, CSKY::R2_R3, CSKY::R3_R4, CSKY::R4_R5, + CSKY::R5_R6, CSKY::R6_R7, CSKY::R7_R8, CSKY::R8_R9, CSKY::R9_R10, + CSKY::R10_R11, CSKY::R11_R12, CSKY::R12_R13, CSKY::R13_R14, CSKY::R14_R15, + CSKY::R15_R16, CSKY::R16_R17, CSKY::R17_R18, CSKY::R18_R19, CSKY::R19_R20, + CSKY::R20_R21, CSKY::R21_R22, CSKY::R22_R23, CSKY::R23_R24, CSKY::R24_R25, + CSKY::R25_R26, CSKY::R26_R27, CSKY::R27_R28, CSKY::R28_R29, CSKY::R29_R30, + CSKY::R30_R31, CSKY::R31_R32}; + +static const uint16_t FPR32DecoderTable[] = { + CSKY::F0_32, CSKY::F1_32, CSKY::F2_32, CSKY::F3_32, CSKY::F4_32, + CSKY::F5_32, CSKY::F6_32, CSKY::F7_32, CSKY::F8_32, CSKY::F9_32, + CSKY::F10_32, CSKY::F11_32, CSKY::F12_32, CSKY::F13_32, CSKY::F14_32, + CSKY::F15_32, CSKY::F16_32, CSKY::F17_32, CSKY::F18_32, CSKY::F19_32, + CSKY::F20_32, CSKY::F21_32, CSKY::F22_32, CSKY::F23_32, CSKY::F24_32, + CSKY::F25_32, CSKY::F26_32, CSKY::F27_32, CSKY::F28_32, CSKY::F29_32, + CSKY::F30_32, CSKY::F31_32}; + +static const uint16_t FPR64DecoderTable[] = { + CSKY::F0_64, CSKY::F1_64, CSKY::F2_64, CSKY::F3_64, CSKY::F4_64, + CSKY::F5_64, CSKY::F6_64, CSKY::F7_64, CSKY::F8_64, CSKY::F9_64, + CSKY::F10_64, CSKY::F11_64, CSKY::F12_64, CSKY::F13_64, CSKY::F14_64, + CSKY::F15_64, CSKY::F16_64, CSKY::F17_64, CSKY::F18_64, CSKY::F19_64, + CSKY::F20_64, CSKY::F21_64, CSKY::F22_64, CSKY::F23_64, CSKY::F24_64, + CSKY::F25_64, CSKY::F26_64, CSKY::F27_64, CSKY::F28_64, CSKY::F29_64, + CSKY::F30_64, CSKY::F31_64}; + +static const uint16_t FPR128DecoderTable[] = { + CSKY::F0_128, CSKY::F1_128, CSKY::F2_128, CSKY::F3_128, CSKY::F4_128, + CSKY::F5_128, CSKY::F6_128, CSKY::F7_128, CSKY::F8_128, CSKY::F9_128, + CSKY::F10_128, CSKY::F11_128, CSKY::F12_128, CSKY::F13_128, CSKY::F14_128, + CSKY::F15_128, CSKY::F16_128, CSKY::F17_128, CSKY::F18_128, CSKY::F19_128, + CSKY::F20_128, CSKY::F21_128, CSKY::F22_128, CSKY::F23_128, CSKY::F24_128, + CSKY::F25_128, CSKY::F26_128, CSKY::F27_128, CSKY::F28_128, CSKY::F29_128, + CSKY::F30_128, CSKY::F31_128}; + +static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 32) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(GPRDecoderTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 32) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(FPR32DecoderTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodesFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 16) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(FPR32DecoderTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodesFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 16) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(FPR64DecoderTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodesFPR64_VRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 16) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(FPR64DecoderTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 32) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(FPR64DecoderTable[RegNo])); + return MCDisassembler::Success; +} + +// TODO +LLVM_ATTRIBUTE_UNUSED +static DecodeStatus DecodesFPR128RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 16) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(FPR128DecoderTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodesGPRRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 16) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(GPRDecoderTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodemGPRRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 8) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(GPRDecoderTable[RegNo])); + return MCDisassembler::Success; +} + +// TODO +LLVM_ATTRIBUTE_UNUSED +static DecodeStatus DecodeGPRSPRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo != 14) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(GPRDecoderTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + const FeatureBitset &FeatureBits = + Decoder->getSubtargetInfo().getFeatureBits(); + bool hasHighReg = FeatureBits[CSKY::FeatureHighreg]; + + if (RegNo >= 32 || (!hasHighReg && RegNo >= 16)) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(GPRPairDecoderTable[RegNo])); + return MCDisassembler::Success; +} + +template +static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::createImm(Imm << S)); + return MCDisassembler::Success; +} + +template +static DecodeStatus decodeOImmOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::createImm(Imm + 1)); + return MCDisassembler::Success; +} + +static DecodeStatus decodeLRW16Imm8(MCInst &Inst, uint64_t Imm, int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt<8>(Imm) && "Invalid immediate"); + if ((Imm >> 7) & 0x1) { + Inst.addOperand(MCOperand::createImm((Imm & 0x7F) << 2)); + } else { + uint64_t V = ((Imm ^ 0xFFFFFFFF) & 0xFF); + Inst.addOperand(MCOperand::createImm(V << 2)); + } + + return MCDisassembler::Success; +} + +static DecodeStatus decodeJMPIXImmOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt<2>(Imm) && "Invalid immediate"); + + if (Imm == 0) + Inst.addOperand(MCOperand::createImm(16)); + else if (Imm == 1) + Inst.addOperand(MCOperand::createImm(24)); + else if (Imm == 2) + Inst.addOperand(MCOperand::createImm(32)); + else if (Imm == 3) + Inst.addOperand(MCOperand::createImm(40)); + else + return MCDisassembler::Fail; + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeRegSeqOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt<10>(Imm) && "Invalid immediate"); + + auto Imm5 = Imm & 0x1f; + auto Ry = (Imm >> 5) & 0x1f; + + if (DecodeGPRRegisterClass(Inst, Ry, Address, Decoder) == + MCDisassembler::Fail) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(GPRDecoderTable[Ry + Imm5])); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeRegSeqOperandF1(MCInst &Inst, uint64_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt<10>(Imm) && "Invalid immediate"); + + auto Imm5 = Imm & 0x1f; + auto Ry = (Imm >> 5) & 0x1f; + + if (DecodesFPR32RegisterClass(Inst, Ry, Address, Decoder) == + MCDisassembler::Fail) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(FPR32DecoderTable[Ry + Imm5])); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeRegSeqOperandD1(MCInst &Inst, uint64_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt<10>(Imm) && "Invalid immediate"); + + auto Imm5 = Imm & 0x1f; + auto Ry = (Imm >> 5) & 0x1f; + + if (DecodesFPR64RegisterClass(Inst, Ry, Address, Decoder) == + MCDisassembler::Fail) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(FPR64DecoderTable[Ry + Imm5])); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeRegSeqOperandF2(MCInst &Inst, uint64_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt<10>(Imm) && "Invalid immediate"); + + auto Imm5 = Imm & 0x1f; + auto Ry = (Imm >> 5) & 0x1f; + + if (DecodeFPR32RegisterClass(Inst, Ry, Address, Decoder) == + MCDisassembler::Fail) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(FPR32DecoderTable[Ry + Imm5])); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeRegSeqOperandD2(MCInst &Inst, uint64_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt<10>(Imm) && "Invalid immediate"); + + auto Imm5 = Imm & 0x1f; + auto Ry = (Imm >> 5) & 0x1f; + + if (DecodeFPR64RegisterClass(Inst, Ry, Address, Decoder) == + MCDisassembler::Fail) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(FPR64DecoderTable[Ry + Imm5])); + + return MCDisassembler::Success; +} + +static DecodeStatus decodeImmShiftOpValue(MCInst &Inst, uint64_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + Inst.addOperand(MCOperand::createImm(Log2(Imm))); + return MCDisassembler::Success; +} + +template +static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt(Imm) && "Invalid immediate"); + // Sign-extend the number in the bottom N bits of Imm + Inst.addOperand(MCOperand::createImm(SignExtend64(Imm) << S)); + return MCDisassembler::Success; +} + +#include "CSKYGenDisassemblerTables.inc" + +DecodeStatus CSKYDisassembler::handleCROperand(MCInst &MI) const { + + // FIXME: To query instruction info from td file or a table inc file + switch (MI.getOpcode()) { + default: + return MCDisassembler::Success; + case CSKY::LD16WSP: + case CSKY::ST16WSP: + case CSKY::ADDI16ZSP: + MI.insert(std::next(MI.begin()), MCOperand::createReg(CSKY::R14)); + return MCDisassembler::Success; + case CSKY::ADDI16SPSP: + case CSKY::SUBI16SPSP: + MI.insert(MI.begin(), MCOperand::createReg(CSKY::R14)); + MI.insert(MI.begin(), MCOperand::createReg(CSKY::R14)); + return MCDisassembler::Success; + case CSKY::FCMPHS_S: + case CSKY::FCMPHS_D: + case CSKY::FCMPLT_S: + case CSKY::FCMPLT_D: + case CSKY::FCMPNE_S: + case CSKY::FCMPNE_D: + case CSKY::FCMPUO_S: + case CSKY::FCMPUO_D: + case CSKY::FCMPZHS_S: + case CSKY::FCMPZHS_D: + case CSKY::FCMPZLS_S: + case CSKY::FCMPZLS_D: + case CSKY::FCMPZNE_S: + case CSKY::FCMPZNE_D: + case CSKY::FCMPZUO_S: + case CSKY::FCMPZUO_D: + case CSKY::f2FCMPHS_S: + case CSKY::f2FCMPHS_D: + case CSKY::f2FCMPLT_S: + case CSKY::f2FCMPLT_D: + case CSKY::f2FCMPNE_S: + case CSKY::f2FCMPNE_D: + case CSKY::f2FCMPUO_S: + case CSKY::f2FCMPUO_D: + case CSKY::f2FCMPHSZ_S: + case CSKY::f2FCMPHSZ_D: + case CSKY::f2FCMPHZ_S: + case CSKY::f2FCMPHZ_D: + case CSKY::f2FCMPLSZ_S: + case CSKY::f2FCMPLSZ_D: + case CSKY::f2FCMPLTZ_S: + case CSKY::f2FCMPLTZ_D: + case CSKY::f2FCMPNEZ_S: + case CSKY::f2FCMPNEZ_D: + case CSKY::f2FCMPUOZ_S: + case CSKY::f2FCMPUOZ_D: + + case CSKY::BT32: + case CSKY::BF32: + case CSKY::BT16: + case CSKY::BF16: + case CSKY::CMPNEI32: + case CSKY::CMPNEI16: + case CSKY::CMPNE32: + case CSKY::CMPNE16: + case CSKY::CMPHSI32: + case CSKY::CMPHSI16: + case CSKY::CMPHS32: + case CSKY::CMPHS16: + case CSKY::CMPLTI32: + case CSKY::CMPLTI16: + case CSKY::CMPLT32: + case CSKY::CMPLT16: + case CSKY::BTSTI32: + case CSKY::BTSTI16: + case CSKY::TSTNBZ32: + case CSKY::TSTNBZ16: + case CSKY::TST32: + case CSKY::TST16: + MI.insert(MI.begin(), MCOperand::createReg(CSKY::C)); + return MCDisassembler::Success; + case CSKY::LSLC32: + case CSKY::LSRC32: + case CSKY::ASRC32: + MI.insert(std::next(MI.begin()), MCOperand::createReg(CSKY::C)); + return MCDisassembler::Success; + case CSKY::MOVF32: + case CSKY::MOVT32: + case CSKY::MVC32: + case CSKY::MVCV32: + case CSKY::MVCV16: + case CSKY::INCT32: + case CSKY::INCF32: + case CSKY::DECT32: + case CSKY::DECF32: + case CSKY::DECGT32: + case CSKY::DECLT32: + case CSKY::DECNE32: + case CSKY::CLRF32: + case CSKY::CLRT32: + case CSKY::f2FSEL_S: + case CSKY::f2FSEL_D: + MI.insert(std::next(MI.begin()), MCOperand::createReg(CSKY::C)); + return MCDisassembler::Success; + case CSKY::ADDC32: + case CSKY::ADDC16: + case CSKY::SUBC32: + case CSKY::SUBC16: + case CSKY::XSR32: + MI.insert(std::next(MI.begin()), MCOperand::createReg(CSKY::C)); + MI.insert(MI.end(), MCOperand::createReg(CSKY::C)); + return MCDisassembler::Success; + case CSKY::INS32: + MI.getOperand(3).setImm(MI.getOperand(3).getImm() + + MI.getOperand(4).getImm()); + return MCDisassembler::Success; + } +} + +static bool decodeFPUV3Instruction(MCInst &MI, uint32_t insn, uint64_t Address, + const MCDisassembler *DisAsm, + const MCSubtargetInfo &STI) { + LLVM_DEBUG(dbgs() << "Trying CSKY 32-bit fpuv3 table :\n"); + if (!STI.getFeatureBits()[CSKY::FeatureFPUV3_HF] && + !STI.getFeatureBits()[CSKY::FeatureFPUV3_SF] && + !STI.getFeatureBits()[CSKY::FeatureFPUV3_DF]) + return false; + + DecodeStatus Result = + decodeInstruction(DecoderTableFPUV332, MI, insn, Address, DisAsm, STI); + + if (Result == MCDisassembler::Fail) { + MI.clear(); + return false; + } + + return true; +} + +DecodeStatus CSKYDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &CS) const { + + uint32_t Insn; + DecodeStatus Result = MCDisassembler::Fail; + + Insn = support::endian::read16le(Bytes.data()); + + if ((Insn >> 14) == 0x3) { + if (Bytes.size() < 4) { + Size = 0; + return MCDisassembler::Fail; + } + Insn = (Insn << 16) | support::endian::read16le(&Bytes[2]); + + if (decodeFPUV3Instruction(MI, Insn, Address, this, STI)) + Result = MCDisassembler::Success; + else { + LLVM_DEBUG(dbgs() << "Trying CSKY 32-bit table :\n"); + Result = decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI); + } + + Size = 4; + } else { + if (Bytes.size() < 2) { + Size = 0; + return MCDisassembler::Fail; + } + LLVM_DEBUG(dbgs() << "Trying CSKY 16-bit table :\n"); + Result = decodeInstruction(DecoderTable16, MI, Insn, Address, this, STI); + Size = 2; + } + + handleCROperand(MI); + + return Result; +} diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp index daa655416c47..b5dfdfa0b42b 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp @@ -88,6 +88,13 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, switch (Fixup.getTargetKind()) { default: llvm_unreachable("Unknown fixup kind!"); + case CSKY::fixup_csky_got32: + case CSKY::fixup_csky_got_imm18_scale4: + case CSKY::fixup_csky_gotoff: + case CSKY::fixup_csky_gotpc: + case CSKY::fixup_csky_plt32: + case CSKY::fixup_csky_plt_imm18_scale4: + llvm_unreachable("Relocation should be unconditionally forced\n"); case FK_Data_1: case FK_Data_2: case FK_Data_4: @@ -123,6 +130,71 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, Ctx.reportError(Fixup.getLoc(), "fixup value must be 2-byte aligned."); return (Value >> 1) & 0x3ffff; + case CSKY::fixup_csky_pcrel_uimm8_scale4: { + if (!isUIntN(10, Value)) + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value."); + if (Value & 0x3) + Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned."); + + unsigned IMM4L = (Value >> 2) & 0xf; + unsigned IMM4H = (Value >> 6) & 0xf; + + Value = (IMM4H << 21) | (IMM4L << 4); + return Value; + } + case CSKY::fixup_csky_pcrel_imm10_scale2: + if (!isIntN(11, Value)) + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value."); + if (Value & 0x1) + Ctx.reportError(Fixup.getLoc(), "fixup value must be 2-byte aligned."); + + return (Value >> 1) & 0x3ff; + case CSKY::fixup_csky_pcrel_uimm7_scale4: + if (!isUIntN(9, Value)) + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value."); + if (Value & 0x3) + Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned."); + + if ((Value & 0xff) <= 0b111111100) { + unsigned IMM5L = (Value >> 2) & 0x1f; + unsigned IMM2H = (Value >> 7) & 0x3; + + Value = (1 << 12) | (IMM2H << 8) | IMM5L; + } else { + unsigned IMM5L = (!Value >> 2) & 0x1f; + unsigned IMM2H = (!Value >> 7) & 0x3; + + Value = (IMM2H << 8) | IMM5L; + } + + return Value & 0xffff; + } +} + +bool CSKYAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, + bool Resolved, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout, + const bool WasForced) const { + // Return true if the symbol is actually unresolved. + // Resolved could be always false when shouldForceRelocation return true. + // We use !WasForced to indicate that the symbol is unresolved and not forced + // by shouldForceRelocation. + if (!Resolved && !WasForced) + return true; + + int64_t Offset = int64_t(Value); + switch (Fixup.getTargetKind()) { + default: + return false; + case CSKY::fixup_csky_pcrel_imm10_scale2: + return !isShiftedInt<10, 1>(Offset); + case CSKY::fixup_csky_pcrel_imm16_scale2: + return !isShiftedInt<16, 1>(Offset); + case CSKY::fixup_csky_pcrel_imm26_scale2: + return !isShiftedInt<26, 1>(Offset); + case CSKY::fixup_csky_pcrel_uimm7_scale4: + return !isShiftedUInt<8, 2>(Offset); } } @@ -152,8 +224,9 @@ void CSKYAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. bool IsLittleEndian = (Endian == support::little); + bool IsInstFixup = (Kind >= FirstTargetFixupKind); - if (IsLittleEndian && (NumBytes == 4)) { + if (IsLittleEndian && IsInstFixup && (NumBytes == 4)) { Data[Offset + 0] |= uint8_t((Value >> 16) & 0xff); Data[Offset + 1] |= uint8_t((Value >> 24) & 0xff); Data[Offset + 2] |= uint8_t(Value & 0xff); @@ -166,6 +239,50 @@ void CSKYAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, } } +bool CSKYAsmBackend::mayNeedRelaxation(const MCInst &Inst, + const MCSubtargetInfo &STI) const { + switch (Inst.getOpcode()) { + default: + return false; + case CSKY::JBR32: + case CSKY::JBT32: + case CSKY::JBF32: + case CSKY::JBSR32: + if (!STI.getFeatureBits()[CSKY::Has2E3]) + return false; + return true; + case CSKY::JBR16: + case CSKY::JBT16: + case CSKY::JBF16: + case CSKY::LRW16: + case CSKY::BR16: + return true; + } +} + +bool CSKYAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { + if (Fixup.getKind() >= FirstLiteralRelocationKind) + return true; + switch (Fixup.getTargetKind()) { + default: + break; + case CSKY::fixup_csky_got32: + case CSKY::fixup_csky_got_imm18_scale4: + case CSKY::fixup_csky_gotoff: + case CSKY::fixup_csky_gotpc: + case CSKY::fixup_csky_plt32: + case CSKY::fixup_csky_plt_imm18_scale4: + case CSKY::fixup_csky_doffset_imm18: + case CSKY::fixup_csky_doffset_imm18_scale2: + case CSKY::fixup_csky_doffset_imm18_scale4: + return true; + } + + return false; +} + bool CSKYAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { @@ -174,23 +291,62 @@ bool CSKYAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, void CSKYAsmBackend::relaxInstruction(MCInst &Inst, const MCSubtargetInfo &STI) const { - llvm_unreachable("CSKYAsmBackend::relaxInstruction() unimplemented"); -} + MCInst Res; -bool CSKYAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, - const MCSubtargetInfo *STI) const { - if (Count % 2) - return false; + switch (Inst.getOpcode()) { + default: + LLVM_DEBUG(Inst.dump()); + llvm_unreachable("Opcode not expected!"); + case CSKY::LRW16: + Res.setOpcode(CSKY::LRW32); + Res.addOperand(Inst.getOperand(0)); + Res.addOperand(Inst.getOperand(1)); + break; + case CSKY::BR16: + Res.setOpcode(CSKY::BR32); + Res.addOperand(Inst.getOperand(0)); + break; + case CSKY::JBSR32: + Res.setOpcode(CSKY::JSRI32); + Res.addOperand(Inst.getOperand(1)); + break; + case CSKY::JBR32: + Res.setOpcode(CSKY::JMPI32); + Res.addOperand(Inst.getOperand(1)); + break; + case CSKY::JBT32: + case CSKY::JBF32: + Res.setOpcode(Inst.getOpcode() == CSKY::JBT32 ? CSKY::JBT_E : CSKY::JBF_E); + Res.addOperand(Inst.getOperand(0)); + Res.addOperand(Inst.getOperand(1)); + Res.addOperand(Inst.getOperand(2)); + break; + case CSKY::JBR16: + Res.setOpcode(CSKY::JBR32); + Res.addOperand(Inst.getOperand(0)); + Res.addOperand(Inst.getOperand(1)); + break; + case CSKY::JBT16: + case CSKY::JBF16: + // ck801 + unsigned opcode; + if (STI.getFeatureBits()[CSKY::HasE2]) + opcode = Inst.getOpcode() == CSKY::JBT16 ? CSKY::JBT32 : CSKY::JBF32; + else + opcode = Inst.getOpcode() == CSKY::JBT16 ? CSKY::JBT_E : CSKY::JBF_E; - // MOV32 r0, r0 - while (Count >= 4) { - OS.write("\xc4\x00\x48\x20", 4); - Count -= 4; + Res.setOpcode(opcode); + Res.addOperand(Inst.getOperand(0)); + Res.addOperand(Inst.getOperand(1)); + Res.addOperand(Inst.getOperand(2)); + break; } - // MOV16 r0, r0 - if (Count) - OS.write("\x6c\x03", 2); + Inst = std::move(Res); +} +bool CSKYAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const { + OS.write_zeros(Count); return true; } diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h index e710954e9df8..09b3ce6cc82b 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h @@ -11,6 +11,7 @@ #include "MCTargetDesc/CSKYFixupKinds.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCTargetOptions.h" namespace llvm { @@ -39,9 +40,21 @@ public: void relaxInstruction(MCInst &Inst, const MCSubtargetInfo &STI) const override; + bool mayNeedRelaxation(const MCInst &Inst, + const MCSubtargetInfo &STI) const override; + + bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout, + const bool WasForced) const override; + bool writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const override; + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; + std::unique_ptr createObjectTargetWriter() const override; }; diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp index 163632632290..d7cc4c8525ee 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include "CSKYFixupKinds.h" +#include "CSKYMCExpr.h" #include "CSKYMCTargetDesc.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -33,10 +35,112 @@ unsigned CSKYELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { - // Determine the type of the relocation. - switch ((unsigned)Fixup.getKind()) { + const MCExpr *Expr = Fixup.getValue(); + // Determine the type of the relocation + unsigned Kind = Fixup.getTargetKind(); + MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); + + if (IsPCRel) { + switch (Kind) { + default: + LLVM_DEBUG(dbgs() << "Unknown Kind1 = " << Kind); + Ctx.reportError(Fixup.getLoc(), "Unsupported relocation type"); + return ELF::R_CKCORE_NONE; + case FK_Data_4: + case FK_PCRel_4: + return ELF::R_CKCORE_PCREL32; + case CSKY::fixup_csky_pcrel_uimm16_scale4: + return ELF::R_CKCORE_PCREL_IMM16_4; + case CSKY::fixup_csky_pcrel_uimm8_scale4: + return ELF::R_CKCORE_PCREL_IMM8_4; + case CSKY::fixup_csky_pcrel_imm26_scale2: + return ELF::R_CKCORE_PCREL_IMM26_2; + case CSKY::fixup_csky_pcrel_imm18_scale2: + return ELF::R_CKCORE_PCREL_IMM18_2; + case CSKY::fixup_csky_pcrel_imm16_scale2: + return ELF::R_CKCORE_PCREL_IMM16_2; + case CSKY::fixup_csky_pcrel_imm10_scale2: + return ELF::R_CKCORE_PCREL_IMM10_2; + case CSKY::fixup_csky_pcrel_uimm7_scale4: + return ELF::R_CKCORE_PCREL_IMM7_4; + } + } + + switch (Kind) { default: - llvm_unreachable("invalid fixup kind!"); + LLVM_DEBUG(dbgs() << "Unknown Kind2 = " << Kind); + Ctx.reportError(Fixup.getLoc(), "Unsupported relocation type"); + return ELF::R_CKCORE_NONE; + case FK_Data_1: + Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported"); + return ELF::R_CKCORE_NONE; + case FK_Data_2: + Ctx.reportError(Fixup.getLoc(), "2-byte data relocations not supported"); + return ELF::R_CKCORE_NONE; + case FK_Data_4: + if (Expr->getKind() == MCExpr::Target) { + auto TK = cast(Expr)->getKind(); + if (TK == CSKYMCExpr::VK_CSKY_ADDR) + return ELF::R_CKCORE_ADDR32; + if (TK == CSKYMCExpr::VK_CSKY_GOT) + return ELF::R_CKCORE_GOT32; + if (TK == CSKYMCExpr::VK_CSKY_GOTOFF) + return ELF::R_CKCORE_GOTOFF; + if (TK == CSKYMCExpr::VK_CSKY_PLT) + return ELF::R_CKCORE_PLT32; + if (TK == CSKYMCExpr::VK_CSKY_TLSIE) + return ELF::R_CKCORE_TLS_IE32; + if (TK == CSKYMCExpr::VK_CSKY_TLSLE) + return ELF::R_CKCORE_TLS_LE32; + if (TK == CSKYMCExpr::VK_CSKY_TLSGD) + return ELF::R_CKCORE_TLS_GD32; + if (TK == CSKYMCExpr::VK_CSKY_TLSLDM) + return ELF::R_CKCORE_TLS_LDM32; + if (TK == CSKYMCExpr::VK_CSKY_TLSLDO) + return ELF::R_CKCORE_TLS_LDO32; + if (TK == CSKYMCExpr::VK_CSKY_GOTPC) + return ELF::R_CKCORE_GOTPC; + if (TK == CSKYMCExpr::VK_CSKY_None) + return ELF::R_CKCORE_ADDR32; + + LLVM_DEBUG(dbgs() << "Unknown FK_Data_4 TK = " << TK); + Ctx.reportError(Fixup.getLoc(), "unknown target FK_Data_4"); + } else { + switch (Modifier) { + default: + Ctx.reportError(Fixup.getLoc(), + "invalid fixup for 4-byte data relocation"); + return ELF::R_CKCORE_NONE; + case MCSymbolRefExpr::VK_GOT: + return ELF::R_CKCORE_GOT32; + case MCSymbolRefExpr::VK_GOTOFF: + return ELF::R_CKCORE_GOTOFF; + case MCSymbolRefExpr::VK_PLT: + return ELF::R_CKCORE_PLT32; + case MCSymbolRefExpr::VK_None: + return ELF::R_CKCORE_ADDR32; + } + } + return ELF::R_CKCORE_NONE; + case FK_Data_8: + Ctx.reportError(Fixup.getLoc(), "8-byte data relocations not supported"); + return ELF::R_CKCORE_NONE; + case CSKY::fixup_csky_addr32: + return ELF::R_CKCORE_ADDR32; + case CSKY::fixup_csky_addr_hi16: + return ELF::R_CKCORE_ADDR_HI16; + case CSKY::fixup_csky_addr_lo16: + return ELF::R_CKCORE_ADDR_LO16; + case CSKY::fixup_csky_doffset_imm18: + return ELF::R_CKCORE_DOFFSET_IMM18; + case CSKY::fixup_csky_doffset_imm18_scale2: + return ELF::R_CKCORE_DOFFSET_IMM18_2; + case CSKY::fixup_csky_doffset_imm18_scale4: + return ELF::R_CKCORE_DOFFSET_IMM18_4; + case CSKY::fixup_csky_got_imm18_scale4: + return ELF::R_CKCORE_GOT_IMM18_4; + case CSKY::fixup_csky_plt_imm18_scale4: + return ELF::R_CKCORE_PLT_IMM18_4; } } diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp new file mode 100644 index 000000000000..90775c1b70f2 --- /dev/null +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp @@ -0,0 +1,335 @@ +//===-- CSKYELFStreamer.cpp - CSKY ELF Target Streamer Methods ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides CSKY specific target streamer methods. +// +//===----------------------------------------------------------------------===// + +#include "CSKYELFStreamer.h" +#include "CSKYMCTargetDesc.h" +#include "MCTargetDesc/CSKYAsmBackend.h" +#include "MCTargetDesc/CSKYBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/CSKYAttributes.h" +#include "llvm/Support/CSKYTargetParser.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/LEB128.h" + +using namespace llvm; + +// This part is for ELF object output. +CSKYTargetELFStreamer::CSKYTargetELFStreamer(MCStreamer &S, + const MCSubtargetInfo &STI) + : CSKYTargetStreamer(S), CurrentVendor("csky") { + MCAssembler &MCA = getStreamer().getAssembler(); + const FeatureBitset &Features = STI.getFeatureBits(); + + unsigned EFlags = MCA.getELFHeaderEFlags(); + + EFlags |= ELF::EF_CSKY_ABIV2; + + if (Features[CSKY::ProcCK801]) + EFlags |= ELF::EF_CSKY_801; + else if (Features[CSKY::ProcCK802]) + EFlags |= ELF::EF_CSKY_802; + else if (Features[CSKY::ProcCK803]) + EFlags |= ELF::EF_CSKY_803; + else if (Features[CSKY::ProcCK804]) + EFlags |= ELF::EF_CSKY_803; + else if (Features[CSKY::ProcCK805]) + EFlags |= ELF::EF_CSKY_805; + else if (Features[CSKY::ProcCK807]) + EFlags |= ELF::EF_CSKY_807; + else if (Features[CSKY::ProcCK810]) + EFlags |= ELF::EF_CSKY_810; + else if (Features[CSKY::ProcCK860]) + EFlags |= ELF::EF_CSKY_860; + else + EFlags |= ELF::EF_CSKY_810; + + if (Features[CSKY::FeatureFPUV2_SF] || Features[CSKY::FeatureFPUV3_SF]) + EFlags |= ELF::EF_CSKY_FLOAT; + + EFlags |= ELF::EF_CSKY_EFV1; + + MCA.setELFHeaderEFlags(EFlags); +} + +MCELFStreamer &CSKYTargetELFStreamer::getStreamer() { + return static_cast(Streamer); +} + +void CSKYTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) { + setAttributeItem(Attribute, Value, /*OverwriteExisting=*/true); +} + +void CSKYTargetELFStreamer::emitTextAttribute(unsigned Attribute, + StringRef String) { + setAttributeItem(Attribute, String, /*OverwriteExisting=*/true); +} + +void CSKYTargetELFStreamer::finishAttributeSection() { + if (Contents.empty()) + return; + + if (AttributeSection) { + Streamer.switchSection(AttributeSection); + } else { + MCAssembler &MCA = getStreamer().getAssembler(); + AttributeSection = MCA.getContext().getELFSection( + ".csky.attributes", ELF::SHT_CSKY_ATTRIBUTES, 0); + Streamer.switchSection(AttributeSection); + Streamer.emitInt8(ELFAttrs::Format_Version); + } + + // Vendor size + Vendor name + '\0' + const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1; + + // Tag + Tag Size + const size_t TagHeaderSize = 1 + 4; + + const size_t ContentsSize = calculateContentSize(); + + Streamer.emitInt32(VendorHeaderSize + TagHeaderSize + ContentsSize); + Streamer.emitBytes(CurrentVendor); + Streamer.emitInt8(0); // '\0' + + Streamer.emitInt8(ELFAttrs::File); + Streamer.emitInt32(TagHeaderSize + ContentsSize); + + // Size should have been accounted for already, now + // emit each field as its type (ULEB or String). + for (AttributeItem item : Contents) { + Streamer.emitULEB128IntValue(item.Tag); + switch (item.Type) { + default: + llvm_unreachable("Invalid attribute type"); + case AttributeType::Numeric: + Streamer.emitULEB128IntValue(item.IntValue); + break; + case AttributeType::Text: + Streamer.emitBytes(item.StringValue); + Streamer.emitInt8(0); // '\0' + break; + case AttributeType::NumericAndText: + Streamer.emitULEB128IntValue(item.IntValue); + Streamer.emitBytes(item.StringValue); + Streamer.emitInt8(0); // '\0' + break; + } + } + + Contents.clear(); +} + +size_t CSKYTargetELFStreamer::calculateContentSize() const { + size_t Result = 0; + for (AttributeItem item : Contents) { + switch (item.Type) { + case AttributeType::Hidden: + break; + case AttributeType::Numeric: + Result += getULEB128Size(item.Tag); + Result += getULEB128Size(item.IntValue); + break; + case AttributeType::Text: + Result += getULEB128Size(item.Tag); + Result += item.StringValue.size() + 1; // string + '\0' + break; + case AttributeType::NumericAndText: + Result += getULEB128Size(item.Tag); + Result += getULEB128Size(item.IntValue); + Result += item.StringValue.size() + 1; // string + '\0'; + break; + } + } + return Result; +} + +void CSKYELFStreamer::EmitMappingSymbol(StringRef Name) { + if (Name == "$d" && State == EMS_Data) + return; + if (Name == "$t" && State == EMS_Text) + return; + if (Name == "$t" && State == EMS_None) { + State = EMS_Text; + return; + } + + State = (Name == "$t" ? EMS_Text : EMS_Data); + + auto *Symbol = cast(getContext().getOrCreateSymbol( + Name + "." + Twine(MappingSymbolCounter++))); + emitLabel(Symbol); + + Symbol->setType(ELF::STT_NOTYPE); + Symbol->setBinding(ELF::STB_LOCAL); +} + +void CSKYTargetELFStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { + StringRef CPU = STI.getCPU(); + CSKY::ArchKind ArchID = CSKY::parseCPUArch(CPU); + + if (ArchID == CSKY::ArchKind::CK804) + ArchID = CSKY::ArchKind::CK803; + + StringRef CPU_ARCH = CSKY::getArchName(ArchID); + + if (ArchID == CSKY::ArchKind::INVALID) { + CPU = "ck810"; + CPU_ARCH = "ck810"; + } + emitTextAttribute(CSKYAttrs::CSKY_ARCH_NAME, CPU_ARCH); + emitTextAttribute(CSKYAttrs::CSKY_CPU_NAME, CPU); + + unsigned ISAFlag = 0; + if (STI.hasFeature(CSKY::HasE1)) + ISAFlag |= CSKYAttrs::V2_ISA_E1; + + if (STI.hasFeature(CSKY::HasE2)) + ISAFlag |= CSKYAttrs::V2_ISA_1E2; + + if (STI.hasFeature(CSKY::Has2E3)) + ISAFlag |= CSKYAttrs::V2_ISA_2E3; + + if (STI.hasFeature(CSKY::HasMP)) + ISAFlag |= CSKYAttrs::ISA_MP; + + if (STI.hasFeature(CSKY::Has3E3r1)) + ISAFlag |= CSKYAttrs::V2_ISA_3E3R1; + + if (STI.hasFeature(CSKY::Has3r1E3r2)) + ISAFlag |= CSKYAttrs::V2_ISA_3E3R2; + + if (STI.hasFeature(CSKY::Has3r2E3r3)) + ISAFlag |= CSKYAttrs::V2_ISA_3E3R3; + + if (STI.hasFeature(CSKY::Has3E7)) + ISAFlag |= CSKYAttrs::V2_ISA_3E7; + + if (STI.hasFeature(CSKY::HasMP1E2)) + ISAFlag |= CSKYAttrs::ISA_MP_1E2; + + if (STI.hasFeature(CSKY::Has7E10)) + ISAFlag |= CSKYAttrs::V2_ISA_7E10; + + if (STI.hasFeature(CSKY::Has10E60)) + ISAFlag |= CSKYAttrs::V2_ISA_10E60; + + if (STI.hasFeature(CSKY::FeatureTrust)) + ISAFlag |= CSKYAttrs::ISA_TRUST; + + if (STI.hasFeature(CSKY::FeatureJAVA)) + ISAFlag |= CSKYAttrs::ISA_JAVA; + + if (STI.hasFeature(CSKY::FeatureCache)) + ISAFlag |= CSKYAttrs::ISA_CACHE; + + if (STI.hasFeature(CSKY::FeatureNVIC)) + ISAFlag |= CSKYAttrs::ISA_NVIC; + + if (STI.hasFeature(CSKY::FeatureDSP)) + ISAFlag |= CSKYAttrs::ISA_DSP; + + if (STI.hasFeature(CSKY::HasDSP1E2)) + ISAFlag |= CSKYAttrs::ISA_DSP_1E2; + + if (STI.hasFeature(CSKY::HasDSPE60)) + ISAFlag |= CSKYAttrs::V2_ISA_DSPE60; + + if (STI.hasFeature(CSKY::FeatureDSPV2)) + ISAFlag |= CSKYAttrs::ISA_DSP_ENHANCE; + + if (STI.hasFeature(CSKY::FeatureDSP_Silan)) + ISAFlag |= CSKYAttrs::ISA_DSP_SILAN; + + if (STI.hasFeature(CSKY::FeatureVDSPV1_128)) + ISAFlag |= CSKYAttrs::ISA_VDSP; + + if (STI.hasFeature(CSKY::FeatureVDSPV2)) + ISAFlag |= CSKYAttrs::ISA_VDSP_2; + + if (STI.hasFeature(CSKY::HasVDSP2E3)) + ISAFlag |= CSKYAttrs::ISA_VDSP_2E3; + + if (STI.hasFeature(CSKY::HasVDSP2E60F)) + ISAFlag |= CSKYAttrs::ISA_VDSP_2E60F; + + emitAttribute(CSKYAttrs::CSKY_ISA_FLAGS, ISAFlag); + + unsigned ISAExtFlag = 0; + if (STI.hasFeature(CSKY::HasFLOATE1)) + ISAExtFlag |= CSKYAttrs::ISA_FLOAT_E1; + + if (STI.hasFeature(CSKY::HasFLOAT1E2)) + ISAExtFlag |= CSKYAttrs::ISA_FLOAT_1E2; + + if (STI.hasFeature(CSKY::HasFLOAT1E3)) + ISAExtFlag |= CSKYAttrs::ISA_FLOAT_1E3; + + if (STI.hasFeature(CSKY::HasFLOAT3E4)) + ISAExtFlag |= CSKYAttrs::ISA_FLOAT_3E4; + + if (STI.hasFeature(CSKY::HasFLOAT7E60)) + ISAExtFlag |= CSKYAttrs::ISA_FLOAT_7E60; + + emitAttribute(CSKYAttrs::CSKY_ISA_EXT_FLAGS, ISAExtFlag); + + if (STI.hasFeature(CSKY::FeatureDSP)) + emitAttribute(CSKYAttrs::CSKY_DSP_VERSION, + CSKYAttrs::DSP_VERSION_EXTENSION); + if (STI.hasFeature(CSKY::FeatureDSPV2)) + emitAttribute(CSKYAttrs::CSKY_DSP_VERSION, CSKYAttrs::DSP_VERSION_2); + + if (STI.hasFeature(CSKY::FeatureVDSPV2)) + emitAttribute(CSKYAttrs::CSKY_VDSP_VERSION, CSKYAttrs::VDSP_VERSION_2); + + if (STI.hasFeature(CSKY::FeatureFPUV2_SF) || + STI.hasFeature(CSKY::FeatureFPUV2_DF)) + emitAttribute(CSKYAttrs::CSKY_FPU_VERSION, CSKYAttrs::FPU_VERSION_2); + else if (STI.hasFeature(CSKY::FeatureFPUV3_HF) || + STI.hasFeature(CSKY::FeatureFPUV3_SF) || + STI.hasFeature(CSKY::FeatureFPUV3_DF)) + emitAttribute(CSKYAttrs::CSKY_FPU_VERSION, CSKYAttrs::FPU_VERSION_3); + + bool hasAnyFloatExt = STI.hasFeature(CSKY::FeatureFPUV2_SF) || + STI.hasFeature(CSKY::FeatureFPUV2_DF) || + STI.hasFeature(CSKY::FeatureFPUV3_HF) || + STI.hasFeature(CSKY::FeatureFPUV3_SF) || + STI.hasFeature(CSKY::FeatureFPUV3_DF); + + if (hasAnyFloatExt && STI.hasFeature(CSKY::ModeHardFloat) && + STI.hasFeature(CSKY::ModeHardFloatABI)) + emitAttribute(CSKYAttrs::CSKY_FPU_ABI, CSKYAttrs::FPU_ABI_HARD); + else if (hasAnyFloatExt && STI.hasFeature(CSKY::ModeHardFloat)) + emitAttribute(CSKYAttrs::CSKY_FPU_ABI, CSKYAttrs::FPU_ABI_SOFTFP); + else + emitAttribute(CSKYAttrs::CSKY_FPU_ABI, CSKYAttrs::FPU_ABI_SOFT); + + unsigned HardFPFlag = 0; + if (STI.hasFeature(CSKY::FeatureFPUV3_HF)) + HardFPFlag |= CSKYAttrs::FPU_HARDFP_HALF; + if (STI.hasFeature(CSKY::FeatureFPUV2_SF) || + STI.hasFeature(CSKY::FeatureFPUV3_SF)) + HardFPFlag |= CSKYAttrs::FPU_HARDFP_SINGLE; + if (STI.hasFeature(CSKY::FeatureFPUV2_DF) || + STI.hasFeature(CSKY::FeatureFPUV3_DF)) + HardFPFlag |= CSKYAttrs::FPU_HARDFP_DOUBLE; + + if (HardFPFlag != 0) { + emitAttribute(CSKYAttrs::CSKY_FPU_DENORMAL, CSKYAttrs::NEEDED); + emitAttribute(CSKYAttrs::CSKY_FPU_EXCEPTION, CSKYAttrs::NEEDED); + emitTextAttribute(CSKYAttrs::CSKY_FPU_NUMBER_MODULE, "IEEE 754"); + emitAttribute(CSKYAttrs::CSKY_FPU_HARDFP, HardFPFlag); + } +} diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.h new file mode 100644 index 000000000000..b7931e922279 --- /dev/null +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.h @@ -0,0 +1,148 @@ +//===-- CSKYELFStreamer.h - CSKY ELF Target Streamer -----------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CSKY_CSKYELFSTREAMER_H +#define LLVM_LIB_TARGET_CSKY_CSKYELFSTREAMER_H + +#include "CSKYTargetStreamer.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCObjectWriter.h" + +namespace llvm { + +class CSKYTargetELFStreamer : public CSKYTargetStreamer { +private: + enum class AttributeType { Hidden, Numeric, Text, NumericAndText }; + + struct AttributeItem { + AttributeType Type; + unsigned Tag; + unsigned IntValue; + std::string StringValue; + }; + + StringRef CurrentVendor; + SmallVector Contents; + + MCSection *AttributeSection = nullptr; + + AttributeItem *getAttributeItem(unsigned Attribute) { + for (size_t i = 0; i < Contents.size(); ++i) + if (Contents[i].Tag == Attribute) + return &Contents[i]; + return nullptr; + } + + void setAttributeItem(unsigned Attribute, unsigned Value, + bool OverwriteExisting) { + // Look for existing attribute item. + if (AttributeItem *Item = getAttributeItem(Attribute)) { + if (!OverwriteExisting) + return; + Item->Type = AttributeType::Numeric; + Item->IntValue = Value; + return; + } + + // Create new attribute item. + Contents.push_back({AttributeType::Numeric, Attribute, Value, ""}); + } + + void setAttributeItem(unsigned Attribute, StringRef Value, + bool OverwriteExisting) { + // Look for existing attribute item. + if (AttributeItem *Item = getAttributeItem(Attribute)) { + if (!OverwriteExisting) + return; + Item->Type = AttributeType::Text; + Item->StringValue = std::string(Value); + return; + } + + // Create new attribute item. + Contents.push_back({AttributeType::Text, Attribute, 0, std::string(Value)}); + } + + void setAttributeItems(unsigned Attribute, unsigned IntValue, + StringRef StringValue, bool OverwriteExisting) { + // Look for existing attribute item. + if (AttributeItem *Item = getAttributeItem(Attribute)) { + if (!OverwriteExisting) + return; + Item->Type = AttributeType::NumericAndText; + Item->IntValue = IntValue; + Item->StringValue = std::string(StringValue); + return; + } + + // Create new attribute item. + Contents.push_back({AttributeType::NumericAndText, Attribute, IntValue, + std::string(StringValue)}); + } + + void emitAttribute(unsigned Attribute, unsigned Value) override; + void emitTextAttribute(unsigned Attribute, StringRef String) override; + void finishAttributeSection() override; + size_t calculateContentSize() const; + + void emitTargetAttributes(const MCSubtargetInfo &STI) override; + +public: + MCELFStreamer &getStreamer(); + CSKYTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); +}; + +class CSKYELFStreamer : public MCELFStreamer { + int64_t MappingSymbolCounter = 0; + + void EmitMappingSymbol(StringRef Name); + +public: + friend class CSKYTargetELFStreamer; + + enum ElfMappingSymbol { EMS_None, EMS_Text, EMS_Data }; + + ElfMappingSymbol State; + + CSKYELFStreamer(MCContext &Context, std::unique_ptr TAB, + std::unique_ptr OW, + std::unique_ptr Emitter) + : MCELFStreamer(Context, std::move(TAB), std::move(OW), + std::move(Emitter)), + State(EMS_None) {} + + ~CSKYELFStreamer() override = default; + + void emitFill(const MCExpr &NumBytes, uint64_t FillValue, + SMLoc Loc) override { + EmitMappingSymbol("$d"); + MCObjectStreamer::emitFill(NumBytes, FillValue, Loc); + } + void emitBytes(StringRef Data) override { + EmitMappingSymbol("$d"); + MCELFStreamer::emitBytes(Data); + } + void emitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) override { + EmitMappingSymbol("$t"); + MCELFStreamer::emitInstruction(Inst, STI); + } + void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override { + EmitMappingSymbol("$d"); + MCELFStreamer::emitValueImpl(Value, Size, Loc); + } + void reset() override { + MappingSymbolCounter = 0; + State = EMS_None; + MCELFStreamer::reset(); + } +}; + +} // namespace llvm +#endif diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp index 07757f03c258..3a0017d11e23 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp @@ -9,16 +9,21 @@ // This class prints an CSKY MCInst to a .s file. // //===----------------------------------------------------------------------===// - #include "CSKYInstPrinter.h" +#include "MCTargetDesc/CSKYBaseInfo.h" +#include "MCTargetDesc/CSKYMCExpr.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" @@ -55,6 +60,14 @@ bool CSKYInstPrinter::applyTargetSpecificCLOption(StringRef Opt) { ArchRegNames = true; return true; } + if (Opt == "debug") { + DebugFlag = true; + return true; + } + if (Opt == "abi-names") { + ABIRegNames = true; + return true; + } return false; } @@ -70,7 +83,11 @@ void CSKYInstPrinter::printInst(const MCInst *MI, uint64_t Address, } void CSKYInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { - O << getRegisterName(RegNo); + if (PrintBranchImmAsAddress) + O << getRegisterName(RegNo, ABIRegNames ? CSKY::ABIRegAltName + : CSKY::NoRegAltName); + else + O << getRegisterName(RegNo); } void CSKYInstPrinter::printFPRRegName(raw_ostream &O, unsigned RegNo) const { @@ -87,15 +104,38 @@ void CSKYInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const MCOperand &MO = MI->getOperand(OpNo); if (MO.isReg()) { - if (MO.getReg() == CSKY::C) - O << ""; + unsigned Reg = MO.getReg(); + bool useABIName = false; + if (PrintBranchImmAsAddress) + useABIName = ABIRegNames; else - printRegName(O, MO.getReg()); + useABIName = !ArchRegNames; + + if (Reg == CSKY::C) + O << ""; + else if (STI.getFeatureBits()[CSKY::FeatureJAVA]) { + if (Reg == CSKY::R23) + O << (useABIName ? "fp" : "r23"); + else if (Reg == CSKY::R24) + O << (useABIName ? "top" : "r24"); + else if (Reg == CSKY::R25) + O << (useABIName ? "bsp" : "r25"); + else + printRegName(O, Reg); + } else + printRegName(O, Reg); + return; } if (MO.isImm()) { - O << formatImm(MO.getImm()); + uint64_t TSFlags = MII.get(MI->getOpcode()).TSFlags; + + if (((TSFlags & CSKYII::AddrModeMask) != CSKYII::AddrModeNone) && + PrintBranchImmAsAddress) + O << formatHex(MO.getImm()); + else + O << MO.getImm(); return; } @@ -157,6 +197,22 @@ void CSKYInstPrinter::printCSKYSymbolOperand(const MCInst *MI, uint64_t Address, } } +void CSKYInstPrinter::printPSRFlag(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { + auto V = MI->getOperand(OpNo).getImm(); + + ListSeparator LS; + + if ((V >> 3) & 0x1) + O << LS << "ee"; + if ((V >> 2) & 0x1) + O << LS << "ie"; + if ((V >> 1) & 0x1) + O << LS << "fe"; + if ((V >> 0) & 0x1) + O << LS << "af"; +} + void CSKYInstPrinter::printRegisterSeq(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp index 1d220b749cb1..540f901fd479 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp @@ -16,6 +16,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/EndianStream.h" using namespace llvm; @@ -64,15 +67,170 @@ static void writeData(uint32_t Bin, unsigned Size, raw_ostream &OS) { support::endian::write(OS, LO16, support::little); } +void CSKYMCCodeEmitter::expandJBTF(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + + MCInst TmpInst; + + uint32_t Binary; + + TmpInst = + MCInstBuilder(MI.getOpcode() == CSKY::JBT_E ? CSKY::BF16 : CSKY::BT16) + .addOperand(MI.getOperand(0)) + .addImm(6); + Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + writeData(Binary, 2, OS); + + if (!STI.getFeatureBits()[CSKY::Has2E3]) + TmpInst = MCInstBuilder(CSKY::BR32) + .addOperand(MI.getOperand(1)) + .addOperand(MI.getOperand(2)); + else + TmpInst = MCInstBuilder(CSKY::JMPI32).addOperand(MI.getOperand(2)); + Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + Fixups[Fixups.size() - 1].setOffset(2); + writeData(Binary, 4, OS); +} + +void CSKYMCCodeEmitter::expandNEG(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + + MCInst TmpInst; + uint32_t Binary; + unsigned Size = MI.getOpcode() == CSKY::NEG32 ? 4 : 2; + + TmpInst = MCInstBuilder(Size == 4 ? CSKY::NOT32 : CSKY::NOT16) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)); + Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + writeData(Binary, Size, OS); + + TmpInst = MCInstBuilder(Size == 4 ? CSKY::ADDI32 : CSKY::ADDI16) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(0)) + .addImm(1); + Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + writeData(Binary, Size, OS); +} + +void CSKYMCCodeEmitter::expandRSUBI(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + + MCInst TmpInst; + uint32_t Binary; + unsigned Size = MI.getOpcode() == CSKY::RSUBI32 ? 4 : 2; + + TmpInst = MCInstBuilder(Size == 4 ? CSKY::NOT32 : CSKY::NOT16) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)); + Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + writeData(Binary, Size, OS); + + TmpInst = MCInstBuilder(Size == 4 ? CSKY::ADDI32 : CSKY::ADDI16) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(0)) + .addImm(MI.getOperand(2).getImm() + 1); + Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + writeData(Binary, Size, OS); +} + void CSKYMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { const MCInstrDesc &Desc = MII.get(MI.getOpcode()); unsigned Size = Desc.getSize(); + MCInst TmpInst; + + switch (MI.getOpcode()) { + default: + TmpInst = MI; + break; + case CSKY::JBT_E: + case CSKY::JBF_E: + expandJBTF(MI, OS, Fixups, STI); + MCNumEmitted += 2; + return; + case CSKY::NEG32: + case CSKY::NEG16: + expandNEG(MI, OS, Fixups, STI); + MCNumEmitted += 2; + return; + case CSKY::RSUBI32: + case CSKY::RSUBI16: + expandRSUBI(MI, OS, Fixups, STI); + MCNumEmitted += 2; + return; + case CSKY::JBSR32: + TmpInst = MCInstBuilder(CSKY::BSR32).addOperand(MI.getOperand(0)); + break; + case CSKY::JBR16: + TmpInst = MCInstBuilder(CSKY::BR16).addOperand(MI.getOperand(0)); + break; + case CSKY::JBR32: + TmpInst = MCInstBuilder(CSKY::BR32).addOperand(MI.getOperand(0)); + break; + case CSKY::JBT16: + TmpInst = MCInstBuilder(CSKY::BT16) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)); + break; + case CSKY::JBT32: + TmpInst = MCInstBuilder(CSKY::BT32) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)); + break; + case CSKY::JBF16: + TmpInst = MCInstBuilder(CSKY::BF16) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)); + break; + case CSKY::JBF32: + TmpInst = MCInstBuilder(CSKY::BF32) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)); + break; + case CSKY::LRW32_Gen: + TmpInst = MCInstBuilder(CSKY::LRW32) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(2)); + break; + case CSKY::LRW16_Gen: + TmpInst = MCInstBuilder(CSKY::LRW16) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(2)); + break; + case CSKY::CMPLEI32: + TmpInst = MCInstBuilder(CSKY::CMPLTI32) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)) + .addImm(MI.getOperand(2).getImm() + 1); + break; + case CSKY::CMPLEI16: + TmpInst = MCInstBuilder(CSKY::CMPLTI16) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)) + .addImm(MI.getOperand(2).getImm() + 1); + break; + case CSKY::ROTRI32: + TmpInst = MCInstBuilder(CSKY::ROTLI32) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)) + .addImm(32 - MI.getOperand(2).getImm()); + break; + case CSKY::BGENI: + auto V = 1 << MI.getOperand(1).getImm(); + TmpInst = + MCInstBuilder(CSKY::MOVI32).addOperand(MI.getOperand(0)).addImm(V); + break; + } + ++MCNumEmitted; - uint32_t Bin = getBinaryCodeForInstr(MI, Fixups, STI); + uint32_t Bin = getBinaryCodeForInstr(TmpInst, Fixups, STI); uint16_t LO16 = static_cast(Bin); uint16_t HI16 = static_cast(Bin >> 16); @@ -170,7 +328,6 @@ MCFixupKind CSKYMCCodeEmitter::getTargetFixup(const MCExpr *Expr) const { } MCCodeEmitter *llvm::createCSKYMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new CSKYMCCodeEmitter(Ctx, MCII); } diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h index bfba07bcb32a..128430197cc5 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h @@ -20,6 +20,8 @@ namespace llvm { +class MCInstrInfo; + class CSKYMCCodeEmitter : public MCCodeEmitter { MCContext &Ctx; const MCInstrInfo &MII; @@ -169,6 +171,16 @@ public: Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc())); return 0; } + + void expandJBTF(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + void expandNEG(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + void expandRSUBI(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; }; } // namespace llvm diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp index 7987613b0608..b9989822dc36 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp @@ -8,10 +8,12 @@ #include "CSKYMCExpr.h" #include "CSKYFixupKinds.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/Casting.h" using namespace llvm; diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp index 0901c0993607..1a69dc8acde0 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp @@ -12,10 +12,14 @@ #include "CSKYMCTargetDesc.h" #include "CSKYAsmBackend.h" +#include "CSKYELFStreamer.h" #include "CSKYInstPrinter.h" #include "CSKYMCAsmInfo.h" #include "CSKYMCCodeEmitter.h" +#include "CSKYTargetStreamer.h" #include "TargetInfo/CSKYTargetInfo.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -72,6 +76,81 @@ static MCSubtargetInfo *createCSKYMCSubtargetInfo(const Triple &TT, return createCSKYMCSubtargetInfoImpl(TT, CPUName, /*TuneCPU=*/CPUName, FS); } +static MCTargetStreamer * +createCSKYObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + const Triple &TT = STI.getTargetTriple(); + if (TT.isOSBinFormatELF()) + return new CSKYTargetELFStreamer(S, STI); + return nullptr; +} + +static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter, + bool RelaxAll) { + CSKYELFStreamer *S = new CSKYELFStreamer(Ctx, std::move(MAB), std::move(OW), + std::move(Emitter)); + + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + return S; +} + +static MCTargetStreamer *createCSKYAsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrinter, + bool isVerboseAsm) { + return new CSKYTargetAsmStreamer(S, OS); +} + +static MCTargetStreamer *createCSKYNullTargetStreamer(MCStreamer &S) { + return new CSKYTargetStreamer(S); +} + +namespace { + +class CSKYMCInstrAnalysis : public MCInstrAnalysis { +public: + explicit CSKYMCInstrAnalysis(const MCInstrInfo *Info) + : MCInstrAnalysis(Info) {} + + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) const override { + if (isConditionalBranch(Inst) || isUnconditionalBranch(Inst)) { + int64_t Imm; + Imm = Inst.getOperand(Inst.getNumOperands() - 1).getImm(); + Target = Addr + Imm; + return true; + } + + if (Inst.getOpcode() == CSKY::BSR32) { + Target = Addr + Inst.getOperand(0).getImm(); + return true; + } + + switch (Inst.getOpcode()) { + default: + return false; + case CSKY::LRW16: + case CSKY::LRW32: + case CSKY::JSRI32: + case CSKY::JMPI32: + int64_t Imm = Inst.getOperand(Inst.getNumOperands() - 1).getImm(); + Target = ((Addr + Imm) & 0xFFFFFFFC); + return true; + } + + return false; + } +}; + +} // end anonymous namespace + +static MCInstrAnalysis *createCSKYInstrAnalysis(const MCInstrInfo *Info) { + return new CSKYMCInstrAnalysis(Info); +} + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYTargetMC() { auto &CSKYTarget = getTheCSKYTarget(); TargetRegistry::RegisterMCAsmBackend(CSKYTarget, createCSKYAsmBackend); @@ -82,4 +161,13 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYTargetMC() { TargetRegistry::RegisterMCInstPrinter(CSKYTarget, createCSKYMCInstPrinter); TargetRegistry::RegisterMCSubtargetInfo(CSKYTarget, createCSKYMCSubtargetInfo); + TargetRegistry::RegisterELFStreamer(CSKYTarget, createELFStreamer); + TargetRegistry::RegisterObjectTargetStreamer(CSKYTarget, + createCSKYObjectTargetStreamer); + TargetRegistry::RegisterAsmTargetStreamer(CSKYTarget, + createCSKYAsmTargetStreamer); + // Register the null target streamer. + TargetRegistry::RegisterNullTargetStreamer(CSKYTarget, + createCSKYNullTargetStreamer); + TargetRegistry::RegisterMCInstrAnalysis(CSKYTarget, createCSKYInstrAnalysis); } diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h index 25bbd635fc58..4b8c45e95b74 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h @@ -34,9 +34,7 @@ MCAsmBackend *createCSKYAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const MCTargetOptions &Options); -MCCodeEmitter *createCSKYMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - MCContext &Ctx); +MCCodeEmitter *createCSKYMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx); } // namespace llvm #define GET_REGINFO_ENUM diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp new file mode 100644 index 000000000000..dd7053d60aa1 --- /dev/null +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp @@ -0,0 +1,143 @@ +//===-- CSKYTargetStreamer.h - CSKY Target Streamer ----------*- C++ -*----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CSKYTargetStreamer.h" +#include "CSKYSubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; + +// +// ConstantPool implementation +// +// Emit the contents of the constant pool using the provided streamer. +void CSKYConstantPool::emitAll(MCStreamer &Streamer) { + if (Entries.empty()) + return; + + if (CurrentSection != nullptr) + Streamer.switchSection(CurrentSection); + + Streamer.emitDataRegion(MCDR_DataRegion); + for (const ConstantPoolEntry &Entry : Entries) { + Streamer.emitCodeAlignment( + Entry.Size, + Streamer.getContext().getSubtargetInfo()); // align naturally + Streamer.emitLabel(Entry.Label); + Streamer.emitValue(Entry.Value, Entry.Size, Entry.Loc); + } + Streamer.emitDataRegion(MCDR_DataRegionEnd); + Entries.clear(); +} + +const MCExpr *CSKYConstantPool::addEntry(MCStreamer &Streamer, + const MCExpr *Value, unsigned Size, + SMLoc Loc, const MCExpr *AdjustExpr) { + if (CurrentSection == nullptr) + CurrentSection = Streamer.getCurrentSectionOnly(); + + auto &Context = Streamer.getContext(); + + const MCConstantExpr *C = dyn_cast(Value); + + // Check if there is existing entry for the same constant. If so, reuse it. + auto Itr = C ? CachedEntries.find(C->getValue()) : CachedEntries.end(); + if (Itr != CachedEntries.end()) + return Itr->second; + + MCSymbol *CPEntryLabel = Context.createTempSymbol(); + const auto SymRef = MCSymbolRefExpr::create(CPEntryLabel, Context); + + if (AdjustExpr) { + const CSKYMCExpr *CSKYExpr = cast(Value); + + Value = MCBinaryExpr::createSub(AdjustExpr, SymRef, Context); + Value = MCBinaryExpr::createSub(CSKYExpr->getSubExpr(), Value, Context); + Value = CSKYMCExpr::create(Value, CSKYExpr->getKind(), Context); + } + + Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size, Loc)); + + if (C) + CachedEntries[C->getValue()] = SymRef; + return SymRef; +} + +bool CSKYConstantPool::empty() { return Entries.empty(); } + +void CSKYConstantPool::clearCache() { + CurrentSection = nullptr; + CachedEntries.clear(); +} + +CSKYTargetStreamer::CSKYTargetStreamer(MCStreamer &S) + : MCTargetStreamer(S), ConstantPool(new CSKYConstantPool()) {} + +const MCExpr * +CSKYTargetStreamer::addConstantPoolEntry(const MCExpr *Expr, SMLoc Loc, + const MCExpr *AdjustExpr) { + auto ELFRefKind = CSKYMCExpr::VK_CSKY_Invalid; + ConstantCounter++; + + const MCExpr *OrigExpr = Expr; + + if (const CSKYMCExpr *CE = dyn_cast(Expr)) { + Expr = CE->getSubExpr(); + ELFRefKind = CE->getKind(); + } + + if (const MCSymbolRefExpr *SymExpr = dyn_cast(Expr)) { + const MCSymbol *Sym = &SymExpr->getSymbol(); + + SymbolIndex Index = {Sym, ELFRefKind}; + + if (ConstantMap.find(Index) == ConstantMap.end()) { + ConstantMap[Index] = + ConstantPool->addEntry(getStreamer(), OrigExpr, 4, Loc, AdjustExpr); + } + return ConstantMap[Index]; + } + + return ConstantPool->addEntry(getStreamer(), Expr, 4, Loc, AdjustExpr); +} + +void CSKYTargetStreamer::emitCurrentConstantPool() { + ConstantPool->emitAll(Streamer); + ConstantPool->clearCache(); +} + +// finish() - write out any non-empty assembler constant pools. +void CSKYTargetStreamer::finish() { + if (ConstantCounter != 0) { + ConstantPool->emitAll(Streamer); + } + + finishAttributeSection(); +} + +void CSKYTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {} + +void CSKYTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {} +void CSKYTargetStreamer::emitTextAttribute(unsigned Attribute, + StringRef String) {} +void CSKYTargetStreamer::finishAttributeSection() {} + +void CSKYTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) { + OS << "\t.csky_attribute\t" << Attribute << ", " << Twine(Value) << "\n"; +} + +void CSKYTargetAsmStreamer::emitTextAttribute(unsigned Attribute, + StringRef String) { + OS << "\t.csky_attribute\t" << Attribute << ", \"" << String << "\"\n"; +} + +void CSKYTargetAsmStreamer::finishAttributeSection() {} diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.h new file mode 100644 index 000000000000..270d48d5939c --- /dev/null +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.h @@ -0,0 +1,110 @@ +//===-- CSKYTargetStreamer.h - CSKY Target Streamer ----------*- C++ -*----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CSKY_CSKYTARGETSTREAMER_H +#define LLVM_LIB_TARGET_CSKY_CSKYTARGETSTREAMER_H + +#include "MCTargetDesc/CSKYMCExpr.h" +#include "llvm/MC/ConstantPools.h" +#include "llvm/MC/MCStreamer.h" + +namespace llvm { + +class CSKYConstantPool { + using EntryVecTy = SmallVector; + EntryVecTy Entries; + std::map CachedEntries; + + MCSection *CurrentSection = nullptr; + +public: + // Initialize a new empty constant pool + CSKYConstantPool() = default; + + // Add a new entry to the constant pool in the next slot. + // \param Value is the new entry to put in the constant pool. + // \param Size is the size in bytes of the entry + // + // \returns a MCExpr that references the newly inserted value + const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Value, + unsigned Size, SMLoc Loc, const MCExpr *AdjustExpr); + + void emitAll(MCStreamer &Streamer); + + // Return true if the constant pool is empty + bool empty(); + + void clearCache(); +}; + +class CSKYTargetStreamer : public MCTargetStreamer { +public: + typedef struct { + const MCSymbol *sym; + CSKYMCExpr::VariantKind kind; + } SymbolIndex; + +protected: + std::unique_ptr ConstantPool; + + DenseMap ConstantMap; + + unsigned ConstantCounter = 0; + +public: + CSKYTargetStreamer(MCStreamer &S); + + virtual void emitTextAttribute(unsigned Attribute, StringRef String); + virtual void emitAttribute(unsigned Attribute, unsigned Value); + virtual void finishAttributeSection(); + + virtual void emitTargetAttributes(const MCSubtargetInfo &STI); + /// Add a new entry to the constant pool for the current section and return an + /// MCExpr that can be used to refer to the constant pool location. + const MCExpr *addConstantPoolEntry(const MCExpr *, SMLoc Loc, + const MCExpr *AdjustExpr = nullptr); + + void emitCurrentConstantPool(); + + void finish() override; +}; + +template <> struct DenseMapInfo { + static inline CSKYTargetStreamer::SymbolIndex getEmptyKey() { + return {nullptr, CSKYMCExpr::VK_CSKY_Invalid}; + } + static inline CSKYTargetStreamer::SymbolIndex getTombstoneKey() { + return {nullptr, CSKYMCExpr::VK_CSKY_Invalid}; + } + static unsigned getHashValue(const CSKYTargetStreamer::SymbolIndex &V) { + return hash_combine(DenseMapInfo::getHashValue(V.sym), + DenseMapInfo::getHashValue(V.kind)); + } + static bool isEqual(const CSKYTargetStreamer::SymbolIndex &A, + const CSKYTargetStreamer::SymbolIndex &B) { + return A.sym == B.sym && A.kind == B.kind; + } +}; + +class formatted_raw_ostream; + +class CSKYTargetAsmStreamer : public CSKYTargetStreamer { + formatted_raw_ostream &OS; + + void emitAttribute(unsigned Attribute, unsigned Value) override; + void emitTextAttribute(unsigned Attribute, StringRef String) override; + void finishAttributeSection() override; + +public: + CSKYTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) + : CSKYTargetStreamer(S), OS(OS) {} +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_CSKY_CSKYTARGETSTREAMER_H diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td new file mode 100644 index 000000000000..4d6e1a9d3166 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -0,0 +1,144 @@ +//- DXIL.td - Describe DXIL operation -------------------------*- tablegen -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is a target description file for DXIL operation. +/// +//===----------------------------------------------------------------------===// + +include "llvm/IR/Intrinsics.td" + +class dxil_class { + string name = _name; +} +class dxil_category { + string name = _name; +} + +def Unary : dxil_class<"Unary">; +def Binary : dxil_class<"Binary">; +def FlattenedThreadIdInGroupClass : dxil_class<"FlattenedThreadIdInGroup">; +def ThreadIdInGroupClass : dxil_class<"ThreadIdInGroup">; +def ThreadIdClass : dxil_class<"ThreadId">; +def GroupIdClass : dxil_class<"GroupId">; + +def binary_uint : dxil_category<"Binary uint">; +def unary_float : dxil_category<"Unary float">; +def ComputeID : dxil_category<"Compute/Mesh/Amplification shader">; + + +// The parameter description for a DXIL instruction +class dxil_param { + int pos = _pos; // position in parameter list + string llvm_type = type; // llvm type name, $o for overload, $r for resource + // type, $cb for legacy cbuffer, $u4 for u4 struct + string name = _name; // short, unique name + string doc = _doc; // the documentation description of this parameter + bit is_const = + _is_const; // whether this argument requires a constant value in the IR + string enum_name = _enum_name; // the name of the enum type if applicable + int max_value = + _max_value; // the maximum value for this parameter if applicable +} + +// A representation for a DXIL instruction +class dxil_inst { + string name = _name; // short, unique name + + string dxil_op = ""; // name of DXIL operation + int dxil_opid = 0; // ID of DXIL operation + dxil_class op_class; // name of the opcode class + dxil_category category; // classification for this instruction + string doc = ""; // the documentation description of this instruction + list ops = []; // the operands that this instruction takes + string oload_types = ""; // overload types if applicable + string fn_attr = ""; // attribute shorthands: rn=does not access + // memory,ro=only reads from memory, + bit is_deriv = 0; // whether this is some kind of derivative + bit is_gradient = 0; // whether this requires a gradient calculation + bit is_feedback = 0; // whether this is a sampler feedback op + bit is_wave = 0; // whether this requires in-wave, cross-lane functionality + bit requires_uniform_inputs = 0; // whether this operation requires that all + // of its inputs are uniform across the wave + // Group dxil operation for stats. + // Like how many atomic/float/uint/int/... instructions used in the program. + list stats_group = []; +} + +class dxil_op op_params, + list _stats_group = []> : dxil_inst { + let dxil_op = name; + let dxil_opid = code_id; + let doc = _doc; + let ops = op_params; + let op_class = code_class; + let category = op_category; + let oload_types = _oload_types; + let fn_attr = _fn_attr; + let stats_group = _stats_group; +} + +// The intrinsic which map directly to this dxil op. +class dxil_map_intrinsic { Intrinsic llvm_intrinsic = llvm_intrinsic_; } + +def Sin : dxil_op<"Sin", 13, Unary, unary_float, "returns sine(theta) for theta in radians.", + "half;float;", "rn", + [ + dxil_param<0, "$o", "", "operation result">, + dxil_param<1, "i32", "opcode", "DXIL opcode">, + dxil_param<2, "$o", "value", "input value"> + ], + ["floats"]>, + dxil_map_intrinsic; + +def UMax :dxil_op< "UMax", 39, Binary, binary_uint, "unsigned integer maximum. UMax(a,b) = a > b ? a : b", + "i16;i32;i64;", "rn", + [ + dxil_param<0, "$o", "", "operation result">, + dxil_param<1, "i32", "opcode", "DXIL opcode">, + dxil_param<2, "$o", "a", "input value">, + dxil_param<3, "$o", "b", "input value"> + ], + ["uints"]>, + dxil_map_intrinsic; + +def ThreadId :dxil_op< "ThreadId", 93, ThreadIdClass, ComputeID, "reads the thread ID", "i32;", "rn", + [ + dxil_param<0, "i32", "", "thread ID component">, + dxil_param<1, "i32", "opcode", "DXIL opcode">, + dxil_param<2, "i32", "component", "component to read (x,y,z)"> + ]>, + dxil_map_intrinsic; + +def GroupId :dxil_op< "GroupId", 94, GroupIdClass, ComputeID, "reads the group ID (SV_GroupID)", "i32;", "rn", + [ + dxil_param<0, "i32", "", "group ID component">, + dxil_param<1, "i32", "opcode", "DXIL opcode">, + dxil_param<2, "i32", "component", "component to read"> + ]>, + dxil_map_intrinsic; + +def ThreadIdInGroup :dxil_op< "ThreadIdInGroup", 95, ThreadIdInGroupClass, ComputeID, + "reads the thread ID within the group (SV_GroupThreadID)", "i32;", "rn", + [ + dxil_param<0, "i32", "", "thread ID in group component">, + dxil_param<1, "i32", "opcode", "DXIL opcode">, + dxil_param<2, "i32", "component", "component to read (x,y,z)"> + ]>, + dxil_map_intrinsic; + +def FlattenedThreadIdInGroup :dxil_op< "FlattenedThreadIdInGroup", 96, FlattenedThreadIdInGroupClass, ComputeID, + "provides a flattened index for a given thread within a given group (SV_GroupIndex)", "i32;", "rn", + [ + dxil_param<0, "i32", "", "result">, + dxil_param<1, "i32", "opcode", "DXIL opcode"> + ]>, + dxil_map_intrinsic; diff --git a/llvm/lib/Target/DirectX/DXILConstants.h b/llvm/lib/Target/DirectX/DXILConstants.h new file mode 100644 index 000000000000..e8e7b5396a46 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILConstants.h @@ -0,0 +1,25 @@ +//===- DXILConstants.h - Essential DXIL constants -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This file contains essential DXIL constants. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_DIRECTX_DXILCONSTANTS_H +#define LLVM_LIB_TARGET_DIRECTX_DXILCONSTANTS_H + +namespace llvm { +namespace DXIL { + +#define DXIL_OP_ENUM +#include "DXILOperation.inc" +#undef DXIL_OP_ENUM + +} // namespace DXIL +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp new file mode 100644 index 000000000000..11b89e4ec890 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -0,0 +1,265 @@ +//===- DXILOpLower.cpp - Lowering LLVM intrinsic to DIXLOp function -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This file contains passes and utilities to lower llvm intrinsic call +/// to DXILOp function call. +//===----------------------------------------------------------------------===// + +#include "DXILConstants.h" +#include "DirectX.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsDirectX.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/ErrorHandling.h" + +#define DEBUG_TYPE "dxil-op-lower" + +using namespace llvm; +using namespace llvm::DXIL; + +constexpr StringLiteral DXILOpNamePrefix = "dx.op."; + +enum OverloadKind : uint16_t { + VOID = 1, + HALF = 1 << 1, + FLOAT = 1 << 2, + DOUBLE = 1 << 3, + I1 = 1 << 4, + I8 = 1 << 5, + I16 = 1 << 6, + I32 = 1 << 7, + I64 = 1 << 8, + UserDefineType = 1 << 9, + ObjectType = 1 << 10, +}; + +static const char *getOverloadTypeName(OverloadKind Kind) { + switch (Kind) { + case OverloadKind::HALF: + return "f16"; + case OverloadKind::FLOAT: + return "f32"; + case OverloadKind::DOUBLE: + return "f64"; + case OverloadKind::I1: + return "i1"; + case OverloadKind::I8: + return "i8"; + case OverloadKind::I16: + return "i16"; + case OverloadKind::I32: + return "i32"; + case OverloadKind::I64: + return "i64"; + case OverloadKind::VOID: + case OverloadKind::ObjectType: + case OverloadKind::UserDefineType: + break; + } + llvm_unreachable("invalid overload type for name"); + return "void"; +} + +static OverloadKind getOverloadKind(Type *Ty) { + Type::TypeID T = Ty->getTypeID(); + switch (T) { + case Type::VoidTyID: + return OverloadKind::VOID; + case Type::HalfTyID: + return OverloadKind::HALF; + case Type::FloatTyID: + return OverloadKind::FLOAT; + case Type::DoubleTyID: + return OverloadKind::DOUBLE; + case Type::IntegerTyID: { + IntegerType *ITy = cast(Ty); + unsigned Bits = ITy->getBitWidth(); + switch (Bits) { + case 1: + return OverloadKind::I1; + case 8: + return OverloadKind::I8; + case 16: + return OverloadKind::I16; + case 32: + return OverloadKind::I32; + case 64: + return OverloadKind::I64; + default: + llvm_unreachable("invalid overload type"); + return OverloadKind::VOID; + } + } + case Type::PointerTyID: + return OverloadKind::UserDefineType; + case Type::StructTyID: + return OverloadKind::ObjectType; + default: + llvm_unreachable("invalid overload type"); + return OverloadKind::VOID; + } +} + +static std::string getTypeName(OverloadKind Kind, Type *Ty) { + if (Kind < OverloadKind::UserDefineType) { + return getOverloadTypeName(Kind); + } else if (Kind == OverloadKind::UserDefineType) { + StructType *ST = cast(Ty); + return ST->getStructName().str(); + } else if (Kind == OverloadKind::ObjectType) { + StructType *ST = cast(Ty); + return ST->getStructName().str(); + } else { + std::string Str; + raw_string_ostream OS(Str); + Ty->print(OS); + return OS.str(); + } +} + +// Static properties. +struct OpCodeProperty { + DXIL::OpCode OpCode; + // Offset in DXILOpCodeNameTable. + unsigned OpCodeNameOffset; + DXIL::OpCodeClass OpCodeClass; + // Offset in DXILOpCodeClassNameTable. + unsigned OpCodeClassNameOffset; + uint16_t OverloadTys; + llvm::Attribute::AttrKind FuncAttr; +}; + +// Include getOpCodeClassName getOpCodeProperty and getOpCodeName which +// generated by tableGen. +#define DXIL_OP_OPERATION_TABLE +#include "DXILOperation.inc" +#undef DXIL_OP_OPERATION_TABLE + +static std::string constructOverloadName(OverloadKind Kind, Type *Ty, + const OpCodeProperty &Prop) { + if (Kind == OverloadKind::VOID) { + return (Twine(DXILOpNamePrefix) + getOpCodeClassName(Prop)).str(); + } + return (Twine(DXILOpNamePrefix) + getOpCodeClassName(Prop) + "." + + getTypeName(Kind, Ty)) + .str(); +} + +static FunctionCallee createDXILOpFunction(DXIL::OpCode DXILOp, Function &F, + Module &M) { + const OpCodeProperty *Prop = getOpCodeProperty(DXILOp); + + // Get return type as overload type for DXILOp. + // Only simple mapping case here, so return type is good enough. + Type *OverloadTy = F.getReturnType(); + + OverloadKind Kind = getOverloadKind(OverloadTy); + // FIXME: find the issue and report error in clang instead of check it in + // backend. + if ((Prop->OverloadTys & (uint16_t)Kind) == 0) { + llvm_unreachable("invalid overload"); + } + + std::string FnName = constructOverloadName(Kind, OverloadTy, *Prop); + assert(!M.getFunction(FnName) && "Function already exists"); + + auto &Ctx = M.getContext(); + Type *OpCodeTy = Type::getInt32Ty(Ctx); + + SmallVector ArgTypes; + // DXIL has i32 opcode as first arg. + ArgTypes.emplace_back(OpCodeTy); + FunctionType *FT = F.getFunctionType(); + ArgTypes.append(FT->param_begin(), FT->param_end()); + FunctionType *DXILOpFT = FunctionType::get(OverloadTy, ArgTypes, false); + return M.getOrInsertFunction(FnName, DXILOpFT); +} + +static void lowerIntrinsic(DXIL::OpCode DXILOp, Function &F, Module &M) { + auto DXILOpFn = createDXILOpFunction(DXILOp, F, M); + IRBuilder<> B(M.getContext()); + Value *DXILOpArg = B.getInt32(static_cast(DXILOp)); + for (User *U : make_early_inc_range(F.users())) { + CallInst *CI = dyn_cast(U); + if (!CI) + continue; + + SmallVector Args; + Args.emplace_back(DXILOpArg); + Args.append(CI->arg_begin(), CI->arg_end()); + B.SetInsertPoint(CI); + CallInst *DXILCI = B.CreateCall(DXILOpFn, Args); + LLVM_DEBUG(DXILCI->setName(getOpCodeName(DXILOp))); + CI->replaceAllUsesWith(DXILCI); + CI->eraseFromParent(); + } + if (F.user_empty()) + F.eraseFromParent(); +} + +static bool lowerIntrinsics(Module &M) { + bool Updated = false; + +#define DXIL_OP_INTRINSIC_MAP +#include "DXILOperation.inc" +#undef DXIL_OP_INTRINSIC_MAP + + for (Function &F : make_early_inc_range(M.functions())) { + if (!F.isDeclaration()) + continue; + Intrinsic::ID ID = F.getIntrinsicID(); + if (ID == Intrinsic::not_intrinsic) + continue; + auto LowerIt = LowerMap.find(ID); + if (LowerIt == LowerMap.end()) + continue; + lowerIntrinsic(LowerIt->second, F, M); + Updated = true; + } + return Updated; +} + +namespace { +/// A pass that transforms external global definitions into declarations. +class DXILOpLowering : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &) { + if (lowerIntrinsics(M)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); + } +}; +} // namespace + +namespace { +class DXILOpLoweringLegacy : public ModulePass { +public: + bool runOnModule(Module &M) override { return lowerIntrinsics(M); } + StringRef getPassName() const override { return "DXIL Op Lowering"; } + DXILOpLoweringLegacy() : ModulePass(ID) {} + + static char ID; // Pass identification. +}; +char DXILOpLoweringLegacy::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(DXILOpLoweringLegacy, DEBUG_TYPE, "DXIL Op Lowering", + false, false) +INITIALIZE_PASS_END(DXILOpLoweringLegacy, DEBUG_TYPE, "DXIL Op Lowering", false, + false) + +ModulePass *llvm::createDXILOpLoweringLegacyPass() { + return new DXILOpLoweringLegacy(); +} diff --git a/llvm/lib/Target/DirectX/DXILPointerType.cpp b/llvm/lib/Target/DirectX/DXILPointerType.cpp new file mode 100644 index 000000000000..1e67f1a30ec4 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILPointerType.cpp @@ -0,0 +1,66 @@ +//===- Target/DirectX/DXILTypedPointerType.cpp - DXIL Typed Pointer Type +//-------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "DXILPointerType.h" +#include "llvm/ADT/Any.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/LLVMContext.h" + +using namespace llvm; +using namespace llvm::dxil; + +class TypedPointerTracking { +public: + TypedPointerTracking() {} + DenseMap> PointerTypes; + DenseMap, std::unique_ptr> + ASPointerTypes; +}; + +TypedPointerType *TypedPointerType::get(Type *EltTy, unsigned AddressSpace) { + assert(EltTy && "Can't get a pointer to type!"); + assert(isValidElementType(EltTy) && "Invalid type for pointer element!"); + + llvm::Any &TargetData = EltTy->getContext().getTargetData(); + if (!TargetData.hasValue()) + TargetData = Any{std::make_shared()}; + + assert(any_isa>(TargetData) && + "Unexpected target data type"); + + std::shared_ptr Tracking = + any_cast>(TargetData); + + // Since AddressSpace #0 is the common case, we special case it. + std::unique_ptr &Entry = + AddressSpace == 0 + ? Tracking->PointerTypes[EltTy] + : Tracking->ASPointerTypes[std::make_pair(EltTy, AddressSpace)]; + + if (!Entry) + Entry = std::unique_ptr( + new TypedPointerType(EltTy, AddressSpace)); + return Entry.get(); +} + +TypedPointerType::TypedPointerType(Type *E, unsigned AddrSpace) + : Type(E->getContext(), DXILPointerTyID), PointeeTy(E) { + ContainedTys = &PointeeTy; + NumContainedTys = 1; + setSubclassData(AddrSpace); +} + +bool TypedPointerType::isValidElementType(Type *ElemTy) { + return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && + !ElemTy->isMetadataTy() && !ElemTy->isTokenTy() && + !ElemTy->isX86_AMXTy(); +} diff --git a/llvm/lib/Target/DirectX/DXILPointerType.h b/llvm/lib/Target/DirectX/DXILPointerType.h new file mode 100644 index 000000000000..52cf2dbc40b0 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILPointerType.h @@ -0,0 +1,52 @@ +//===- Target/DirectX/DXILPointerType.h - DXIL Typed Pointer Type ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_DIRECTX_DXILPOINTERTYPE_H +#define LLVM_TARGET_DIRECTX_DXILPOINTERTYPE_H + +#include "llvm/IR/Type.h" + +namespace llvm { +namespace dxil { + +// DXIL has typed pointers, this pointer type abstraction is used for tracking +// in PointerTypeAnalysis and for the bitcode ValueEnumerator +class TypedPointerType : public Type { + explicit TypedPointerType(Type *ElType, unsigned AddrSpace); + + Type *PointeeTy; + +public: + TypedPointerType(const TypedPointerType &) = delete; + TypedPointerType &operator=(const TypedPointerType &) = delete; + + /// This constructs a pointer to an object of the specified type in a numbered + /// address space. + static TypedPointerType *get(Type *ElementType, unsigned AddressSpace); + + /// Return true if the specified type is valid as a element type. + static bool isValidElementType(Type *ElemTy); + + /// Return the address space of the Pointer type. + unsigned getAddressSpace() const { return getSubclassData(); } + + Type *getElementType() const { return PointeeTy; } + + /// Implement support type inquiry through isa, cast, and dyn_cast. + static bool classof(const Type *T) { + return T->getTypeID() == DXILPointerTyID; + } +}; + +} // namespace dxil +} // namespace llvm + +#endif // LLVM_TARGET_DIRECTX_DXILPOINTERTYPE_H diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp new file mode 100644 index 000000000000..14d970e6b69a --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp @@ -0,0 +1,184 @@ +//===- DXILPrepare.cpp - Prepare LLVM Module for DXIL encoding ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This file contains pases and utilities to convert a modern LLVM +/// module into a module compatible with the LLVM 3.7-based DirectX Intermediate +/// Language (DXIL). +//===----------------------------------------------------------------------===// + +#include "DirectX.h" +#include "PointerTypeAnalysis.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" + +#define DEBUG_TYPE "dxil-prepare" + +using namespace llvm; +using namespace llvm::dxil; + +namespace { + +constexpr bool isValidForDXIL(Attribute::AttrKind Attr) { + return is_contained({Attribute::Alignment, + Attribute::AlwaysInline, + Attribute::Builtin, + Attribute::ByVal, + Attribute::InAlloca, + Attribute::Cold, + Attribute::Convergent, + Attribute::InlineHint, + Attribute::InReg, + Attribute::JumpTable, + Attribute::MinSize, + Attribute::Naked, + Attribute::Nest, + Attribute::NoAlias, + Attribute::NoBuiltin, + Attribute::NoCapture, + Attribute::NoDuplicate, + Attribute::NoImplicitFloat, + Attribute::NoInline, + Attribute::NonLazyBind, + Attribute::NonNull, + Attribute::Dereferenceable, + Attribute::DereferenceableOrNull, + Attribute::NoRedZone, + Attribute::NoReturn, + Attribute::NoUnwind, + Attribute::OptimizeForSize, + Attribute::OptimizeNone, + Attribute::ReadNone, + Attribute::ReadOnly, + Attribute::ArgMemOnly, + Attribute::Returned, + Attribute::ReturnsTwice, + Attribute::SExt, + Attribute::StackAlignment, + Attribute::StackProtect, + Attribute::StackProtectReq, + Attribute::StackProtectStrong, + Attribute::SafeStack, + Attribute::StructRet, + Attribute::SanitizeAddress, + Attribute::SanitizeThread, + Attribute::SanitizeMemory, + Attribute::UWTable, + Attribute::ZExt}, + Attr); +} + +class DXILPrepareModule : public ModulePass { + + static Value *maybeGenerateBitcast(IRBuilder<> &Builder, + PointerTypeMap &PointerTypes, + Instruction &Inst, Value *Operand, + Type *Ty) { + // Omit bitcasts if the incoming value matches the instruction type. + auto It = PointerTypes.find(Operand); + if (It != PointerTypes.end()) + if (cast(It->second)->getElementType() == Ty) + return nullptr; + // Insert bitcasts where we are removing the instruction. + Builder.SetInsertPoint(&Inst); + // This code only gets hit in opaque-pointer mode, so the type of the + // pointer doesn't matter. + PointerType *PtrTy = cast(Operand->getType()); + return Builder.Insert( + CastInst::Create(Instruction::BitCast, Operand, + Builder.getInt8PtrTy(PtrTy->getAddressSpace()))); + } + +public: + bool runOnModule(Module &M) override { + PointerTypeMap PointerTypes = PointerTypeAnalysis::run(M); + AttributeMask AttrMask; + for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; + I = Attribute::AttrKind(I + 1)) { + if (!isValidForDXIL(I)) + AttrMask.addAttribute(I); + } + for (auto &F : M.functions()) { + F.removeFnAttrs(AttrMask); + F.removeRetAttrs(AttrMask); + for (size_t Idx = 0, End = F.arg_size(); Idx < End; ++Idx) + F.removeParamAttrs(Idx, AttrMask); + + for (auto &BB : F) { + IRBuilder<> Builder(&BB); + for (auto &I : make_early_inc_range(BB)) { + if (I.getOpcode() == Instruction::FNeg) { + Builder.SetInsertPoint(&I); + Value *In = I.getOperand(0); + Value *Zero = ConstantFP::get(In->getType(), -0.0); + I.replaceAllUsesWith(Builder.CreateFSub(Zero, In)); + I.eraseFromParent(); + continue; + } + // Only insert bitcasts if the IR is using opaque pointers. + if (M.getContext().supportsTypedPointers()) + continue; + + // Emtting NoOp bitcast instructions allows the ValueEnumerator to be + // unmodified as it reserves instruction IDs during contruction. + if (auto LI = dyn_cast(&I)) { + if (Value *NoOpBitcast = maybeGenerateBitcast( + Builder, PointerTypes, I, LI->getPointerOperand(), + LI->getType())) { + LI->replaceAllUsesWith( + Builder.CreateLoad(LI->getType(), NoOpBitcast)); + LI->eraseFromParent(); + } + continue; + } + if (auto SI = dyn_cast(&I)) { + if (Value *NoOpBitcast = maybeGenerateBitcast( + Builder, PointerTypes, I, SI->getPointerOperand(), + SI->getValueOperand()->getType())) { + + SI->replaceAllUsesWith( + Builder.CreateStore(SI->getValueOperand(), NoOpBitcast)); + SI->eraseFromParent(); + } + continue; + } + if (auto GEP = dyn_cast(&I)) { + if (Value *NoOpBitcast = maybeGenerateBitcast( + Builder, PointerTypes, I, GEP->getPointerOperand(), + GEP->getResultElementType())) + GEP->setOperand(0, NoOpBitcast); + continue; + } + } + } + } + return true; + } + + DXILPrepareModule() : ModulePass(ID) {} + + static char ID; // Pass identification. +}; +char DXILPrepareModule::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module", + false, false) +INITIALIZE_PASS_END(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module", false, + false) + +ModulePass *llvm::createDXILPrepareModulePass() { + return new DXILPrepareModule(); +} diff --git a/llvm/lib/Target/DirectX/DXILStubs.td b/llvm/lib/Target/DirectX/DXILStubs.td new file mode 100644 index 000000000000..ce4327f93bc1 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILStubs.td @@ -0,0 +1,18 @@ +// DXIL doesn't actually use registers, but this gets the boilerplate code +// generated through tablegen. +let Namespace = "DXIL" in { +def DXIL : Register<"DXIL">; +def DXILClass : RegisterClass<"DXIL", [i32], 32, (add DXIL)>; +} + +class DXILInst : Instruction { + let Namespace = "DXIL"; + let DecoderNamespace = "DXIL"; + + dag OutOperandList = (outs); + dag InOperandList = (ins); + let AsmString = "dummy"; + let Pattern = []; +} + +def DummyInst : DXILInst; diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp new file mode 100644 index 000000000000..634ead98a6ae --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp @@ -0,0 +1,121 @@ +//===- DXILTranslateMetadata.cpp - Pass to emit DXIL metadata ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +//===----------------------------------------------------------------------===// + +#include "DirectX.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" + +using namespace llvm; + +static uint32_t ConstMDToUint32(const MDOperand &MDO) { + ConstantInt *pConst = mdconst::extract(MDO); + return (uint32_t)pConst->getZExtValue(); +} + +static ConstantAsMetadata *Uint32ToConstMD(unsigned v, LLVMContext &Ctx) { + return ConstantAsMetadata::get( + Constant::getIntegerValue(IntegerType::get(Ctx, 32), APInt(32, v))); +} + +constexpr StringLiteral ValVerKey = "dx.valver"; +constexpr unsigned DXILVersionNumFields = 2; + +static void emitDXILValidatorVersion(Module &M, VersionTuple &ValidatorVer) { + NamedMDNode *DXILValidatorVersionMD = M.getNamedMetadata(ValVerKey); + + // Allow re-writing the validator version, since this can be changed at + // later points. + if (DXILValidatorVersionMD) + M.eraseNamedMetadata(DXILValidatorVersionMD); + + DXILValidatorVersionMD = M.getOrInsertNamedMetadata(ValVerKey); + + auto &Ctx = M.getContext(); + Metadata *MDVals[DXILVersionNumFields]; + MDVals[0] = Uint32ToConstMD(ValidatorVer.getMajor(), Ctx); + MDVals[1] = Uint32ToConstMD(ValidatorVer.getMinor().value_or(0), Ctx); + + DXILValidatorVersionMD->addOperand(MDNode::get(Ctx, MDVals)); +} + +static VersionTuple loadDXILValidatorVersion(MDNode *ValVerMD) { + if (ValVerMD->getNumOperands() != DXILVersionNumFields) + return VersionTuple(); + + unsigned Major = ConstMDToUint32(ValVerMD->getOperand(0)); + unsigned Minor = ConstMDToUint32(ValVerMD->getOperand(1)); + return VersionTuple(Major, Minor); +} + +static void cleanModuleFlags(Module &M) { + constexpr StringLiteral DeadKeys[] = {ValVerKey}; + // Collect DeadKeys in ModuleFlags. + StringSet<> DeadKeySet; + for (auto &Key : DeadKeys) { + if (M.getModuleFlag(Key)) + DeadKeySet.insert(Key); + } + if (DeadKeySet.empty()) + return; + + SmallVector ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + NamedMDNode *MDFlags = M.getModuleFlagsMetadata(); + MDFlags->eraseFromParent(); + // Add ModuleFlag which not dead. + for (auto &Flag : ModuleFlags) { + StringRef Key = Flag.Key->getString(); + if (DeadKeySet.contains(Key)) + continue; + M.addModuleFlag(Flag.Behavior, Key, Flag.Val); + } +} + +static void cleanModule(Module &M) { cleanModuleFlags(M); } + +namespace { +class DXILTranslateMetadata : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + explicit DXILTranslateMetadata() : ModulePass(ID), ValidatorVer(1, 0) {} + + StringRef getPassName() const override { return "DXIL Metadata Emit"; } + + bool runOnModule(Module &M) override; + +private: + VersionTuple ValidatorVer; +}; + +} // namespace + +bool DXILTranslateMetadata::runOnModule(Module &M) { + if (MDNode *ValVerMD = cast_or_null(M.getModuleFlag(ValVerKey))) { + auto ValVer = loadDXILValidatorVersion(ValVerMD); + if (!ValVer.empty()) + ValidatorVer = ValVer; + } + emitDXILValidatorVersion(M, ValidatorVer); + cleanModule(M); + return false; +} + +char DXILTranslateMetadata::ID = 0; + +ModulePass *llvm::createDXILTranslateMetadataPass() { + return new DXILTranslateMetadata(); +} + +INITIALIZE_PASS(DXILTranslateMetadata, "dxil-metadata-emit", + "DXIL Metadata Emit", false, false) diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp new file mode 100644 index 000000000000..494a71e51a89 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -0,0 +1,2963 @@ +//===- Bitcode/Writer/DXILBitcodeWriter.cpp - DXIL Bitcode Writer ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Bitcode writer implementation. +// +//===----------------------------------------------------------------------===// + +#include "DXILBitcodeWriter.h" +#include "DXILValueEnumerator.h" +#include "PointerTypeAnalysis.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/BitcodeCommon.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/LLVMBitCodes.h" +#include "llvm/Bitstream/BitCodes.h" +#include "llvm/Bitstream/BitstreamWriter.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalIFunc.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/UseListOrder.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/Object/IRSymtab.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SHA1.h" + +namespace llvm { +namespace dxil { + +// Generates an enum to use as an index in the Abbrev array of Metadata record. +enum MetadataAbbrev : unsigned { +#define HANDLE_MDNODE_LEAF(CLASS) CLASS##AbbrevID, +#include "llvm/IR/Metadata.def" + LastPlusOne +}; + +class DXILBitcodeWriter { + + /// These are manifest constants used by the bitcode writer. They do not need + /// to be kept in sync with the reader, but need to be consistent within this + /// file. + enum { + // VALUE_SYMTAB_BLOCK abbrev id's. + VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV, + VST_ENTRY_7_ABBREV, + VST_ENTRY_6_ABBREV, + VST_BBENTRY_6_ABBREV, + + // CONSTANTS_BLOCK abbrev id's. + CONSTANTS_SETTYPE_ABBREV = bitc::FIRST_APPLICATION_ABBREV, + CONSTANTS_INTEGER_ABBREV, + CONSTANTS_CE_CAST_Abbrev, + CONSTANTS_NULL_Abbrev, + + // FUNCTION_BLOCK abbrev id's. + FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV, + FUNCTION_INST_BINOP_ABBREV, + FUNCTION_INST_BINOP_FLAGS_ABBREV, + FUNCTION_INST_CAST_ABBREV, + FUNCTION_INST_RET_VOID_ABBREV, + FUNCTION_INST_RET_VAL_ABBREV, + FUNCTION_INST_UNREACHABLE_ABBREV, + FUNCTION_INST_GEP_ABBREV, + }; + + // Cache some types + Type *I8Ty; + Type *I8PtrTy; + + /// The stream created and owned by the client. + BitstreamWriter &Stream; + + StringTableBuilder &StrtabBuilder; + + /// The Module to write to bitcode. + const Module &M; + + /// Enumerates ids for all values in the module. + ValueEnumerator VE; + + /// Map that holds the correspondence between GUIDs in the summary index, + /// that came from indirect call profiles, and a value id generated by this + /// class to use in the VST and summary block records. + std::map GUIDToValueIdMap; + + /// Tracks the last value id recorded in the GUIDToValueMap. + unsigned GlobalValueId; + + /// Saves the offset of the VSTOffset record that must eventually be + /// backpatched with the offset of the actual VST. + uint64_t VSTOffsetPlaceholder = 0; + + /// Pointer to the buffer allocated by caller for bitcode writing. + const SmallVectorImpl &Buffer; + + /// The start bit of the identification block. + uint64_t BitcodeStartBit; + + /// This maps values to their typed pointers + PointerTypeMap PointerMap; + +public: + /// Constructs a ModuleBitcodeWriter object for the given Module, + /// writing to the provided \p Buffer. + DXILBitcodeWriter(const Module &M, SmallVectorImpl &Buffer, + StringTableBuilder &StrtabBuilder, BitstreamWriter &Stream) + : I8Ty(Type::getInt8Ty(M.getContext())), + I8PtrTy(TypedPointerType::get(I8Ty, 0)), Stream(Stream), + StrtabBuilder(StrtabBuilder), M(M), VE(M, I8PtrTy), Buffer(Buffer), + BitcodeStartBit(Stream.GetCurrentBitNo()), + PointerMap(PointerTypeAnalysis::run(M)) { + GlobalValueId = VE.getValues().size(); + // Enumerate the typed pointers + for (auto El : PointerMap) + VE.EnumerateType(El.second); + } + + /// Emit the current module to the bitstream. + void write(); + + static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind); + static void writeStringRecord(BitstreamWriter &Stream, unsigned Code, + StringRef Str, unsigned AbbrevToUse); + static void writeIdentificationBlock(BitstreamWriter &Stream); + static void emitSignedInt64(SmallVectorImpl &Vals, uint64_t V); + static void emitWideAPInt(SmallVectorImpl &Vals, const APInt &A); + + static unsigned getEncodedComdatSelectionKind(const Comdat &C); + static unsigned getEncodedLinkage(const GlobalValue::LinkageTypes Linkage); + static unsigned getEncodedLinkage(const GlobalValue &GV); + static unsigned getEncodedVisibility(const GlobalValue &GV); + static unsigned getEncodedThreadLocalMode(const GlobalValue &GV); + static unsigned getEncodedDLLStorageClass(const GlobalValue &GV); + static unsigned getEncodedCastOpcode(unsigned Opcode); + static unsigned getEncodedUnaryOpcode(unsigned Opcode); + static unsigned getEncodedBinaryOpcode(unsigned Opcode); + static unsigned getEncodedRMWOperation(AtomicRMWInst::BinOp Op); + static unsigned getEncodedOrdering(AtomicOrdering Ordering); + static uint64_t getOptimizationFlags(const Value *V); + +private: + void writeModuleVersion(); + void writePerModuleGlobalValueSummary(); + + void writePerModuleFunctionSummaryRecord(SmallVector &NameVals, + GlobalValueSummary *Summary, + unsigned ValueID, + unsigned FSCallsAbbrev, + unsigned FSCallsProfileAbbrev, + const Function &F); + void writeModuleLevelReferences(const GlobalVariable &V, + SmallVector &NameVals, + unsigned FSModRefsAbbrev, + unsigned FSModVTableRefsAbbrev); + + void assignValueId(GlobalValue::GUID ValGUID) { + GUIDToValueIdMap[ValGUID] = ++GlobalValueId; + } + + unsigned getValueId(GlobalValue::GUID ValGUID) { + const auto &VMI = GUIDToValueIdMap.find(ValGUID); + // Expect that any GUID value had a value Id assigned by an + // earlier call to assignValueId. + assert(VMI != GUIDToValueIdMap.end() && + "GUID does not have assigned value Id"); + return VMI->second; + } + + // Helper to get the valueId for the type of value recorded in VI. + unsigned getValueId(ValueInfo VI) { + if (!VI.haveGVs() || !VI.getValue()) + return getValueId(VI.getGUID()); + return VE.getValueID(VI.getValue()); + } + + std::map &valueIds() { return GUIDToValueIdMap; } + + uint64_t bitcodeStartBit() { return BitcodeStartBit; } + + size_t addToStrtab(StringRef Str); + + unsigned createDILocationAbbrev(); + unsigned createGenericDINodeAbbrev(); + + void writeAttributeGroupTable(); + void writeAttributeTable(); + void writeTypeTable(); + void writeComdats(); + void writeValueSymbolTableForwardDecl(); + void writeModuleInfo(); + void writeValueAsMetadata(const ValueAsMetadata *MD, + SmallVectorImpl &Record); + void writeMDTuple(const MDTuple *N, SmallVectorImpl &Record, + unsigned Abbrev); + void writeDILocation(const DILocation *N, SmallVectorImpl &Record, + unsigned &Abbrev); + void writeGenericDINode(const GenericDINode *N, + SmallVectorImpl &Record, unsigned &Abbrev) { + llvm_unreachable("DXIL cannot contain GenericDI Nodes"); + } + void writeDISubrange(const DISubrange *N, SmallVectorImpl &Record, + unsigned Abbrev); + void writeDIGenericSubrange(const DIGenericSubrange *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + llvm_unreachable("DXIL cannot contain DIGenericSubrange Nodes"); + } + void writeDIEnumerator(const DIEnumerator *N, + SmallVectorImpl &Record, unsigned Abbrev); + void writeDIBasicType(const DIBasicType *N, SmallVectorImpl &Record, + unsigned Abbrev); + void writeDIStringType(const DIStringType *N, + SmallVectorImpl &Record, unsigned Abbrev) { + llvm_unreachable("DXIL cannot contain DIStringType Nodes"); + } + void writeDIDerivedType(const DIDerivedType *N, + SmallVectorImpl &Record, unsigned Abbrev); + void writeDICompositeType(const DICompositeType *N, + SmallVectorImpl &Record, unsigned Abbrev); + void writeDISubroutineType(const DISubroutineType *N, + SmallVectorImpl &Record, + unsigned Abbrev); + void writeDIFile(const DIFile *N, SmallVectorImpl &Record, + unsigned Abbrev); + void writeDICompileUnit(const DICompileUnit *N, + SmallVectorImpl &Record, unsigned Abbrev); + void writeDISubprogram(const DISubprogram *N, + SmallVectorImpl &Record, unsigned Abbrev); + void writeDILexicalBlock(const DILexicalBlock *N, + SmallVectorImpl &Record, unsigned Abbrev); + void writeDILexicalBlockFile(const DILexicalBlockFile *N, + SmallVectorImpl &Record, + unsigned Abbrev); + void writeDICommonBlock(const DICommonBlock *N, + SmallVectorImpl &Record, unsigned Abbrev) { + llvm_unreachable("DXIL cannot contain DICommonBlock Nodes"); + } + void writeDINamespace(const DINamespace *N, SmallVectorImpl &Record, + unsigned Abbrev); + void writeDIMacro(const DIMacro *N, SmallVectorImpl &Record, + unsigned Abbrev) { + llvm_unreachable("DXIL cannot contain DIMacro Nodes"); + } + void writeDIMacroFile(const DIMacroFile *N, SmallVectorImpl &Record, + unsigned Abbrev) { + llvm_unreachable("DXIL cannot contain DIMacroFile Nodes"); + } + void writeDIArgList(const DIArgList *N, SmallVectorImpl &Record, + unsigned Abbrev) { + llvm_unreachable("DXIL cannot contain DIArgList Nodes"); + } + void writeDIModule(const DIModule *N, SmallVectorImpl &Record, + unsigned Abbrev); + void writeDITemplateTypeParameter(const DITemplateTypeParameter *N, + SmallVectorImpl &Record, + unsigned Abbrev); + void writeDITemplateValueParameter(const DITemplateValueParameter *N, + SmallVectorImpl &Record, + unsigned Abbrev); + void writeDIGlobalVariable(const DIGlobalVariable *N, + SmallVectorImpl &Record, + unsigned Abbrev); + void writeDILocalVariable(const DILocalVariable *N, + SmallVectorImpl &Record, unsigned Abbrev); + void writeDILabel(const DILabel *N, SmallVectorImpl &Record, + unsigned Abbrev) { + llvm_unreachable("DXIL cannot contain DILabel Nodes"); + } + void writeDIExpression(const DIExpression *N, + SmallVectorImpl &Record, unsigned Abbrev); + void writeDIGlobalVariableExpression(const DIGlobalVariableExpression *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + llvm_unreachable("DXIL cannot contain GlobalVariableExpression Nodes"); + } + void writeDIObjCProperty(const DIObjCProperty *N, + SmallVectorImpl &Record, unsigned Abbrev); + void writeDIImportedEntity(const DIImportedEntity *N, + SmallVectorImpl &Record, + unsigned Abbrev); + unsigned createNamedMetadataAbbrev(); + void writeNamedMetadata(SmallVectorImpl &Record); + unsigned createMetadataStringsAbbrev(); + void writeMetadataStrings(ArrayRef Strings, + SmallVectorImpl &Record); + void writeMetadataRecords(ArrayRef MDs, + SmallVectorImpl &Record, + std::vector *MDAbbrevs = nullptr, + std::vector *IndexPos = nullptr); + void writeModuleMetadata(); + void writeFunctionMetadata(const Function &F); + void writeFunctionMetadataAttachment(const Function &F); + void pushGlobalMetadataAttachment(SmallVectorImpl &Record, + const GlobalObject &GO); + void writeModuleMetadataKinds(); + void writeOperandBundleTags(); + void writeSyncScopeNames(); + void writeConstants(unsigned FirstVal, unsigned LastVal, bool isGlobal); + void writeModuleConstants(); + bool pushValueAndType(const Value *V, unsigned InstID, + SmallVectorImpl &Vals); + void writeOperandBundles(const CallBase &CB, unsigned InstID); + void pushValue(const Value *V, unsigned InstID, + SmallVectorImpl &Vals); + void pushValueSigned(const Value *V, unsigned InstID, + SmallVectorImpl &Vals); + void writeInstruction(const Instruction &I, unsigned InstID, + SmallVectorImpl &Vals); + void writeFunctionLevelValueSymbolTable(const ValueSymbolTable &VST); + void writeGlobalValueSymbolTable( + DenseMap &FunctionToBitcodeIndex); + void writeUseList(UseListOrder &&Order); + void writeUseListBlock(const Function *F); + void writeFunction(const Function &F); + void writeBlockInfo(); + + unsigned getEncodedSyncScopeID(SyncScope::ID SSID) { return unsigned(SSID); } + + unsigned getEncodedAlign(MaybeAlign Alignment) { return encode(Alignment); } + + unsigned getTypeID(Type *T, const Value *V = nullptr); + unsigned getTypeID(Type *T, const Function *F); +}; + +} // namespace dxil +} // namespace llvm + +using namespace llvm; +using namespace llvm::dxil; + +//////////////////////////////////////////////////////////////////////////////// +/// Begin dxil::BitcodeWriter Implementation +//////////////////////////////////////////////////////////////////////////////// + +dxil::BitcodeWriter::BitcodeWriter(SmallVectorImpl &Buffer, + raw_fd_stream *FS) + : Buffer(Buffer), Stream(new BitstreamWriter(Buffer, FS, 512)) { + // Emit the file header. + Stream->Emit((unsigned)'B', 8); + Stream->Emit((unsigned)'C', 8); + Stream->Emit(0x0, 4); + Stream->Emit(0xC, 4); + Stream->Emit(0xE, 4); + Stream->Emit(0xD, 4); +} + +dxil::BitcodeWriter::~BitcodeWriter() { assert(WroteStrtab); } + +/// Write the specified module to the specified output stream. +void dxil::WriteDXILToFile(const Module &M, raw_ostream &Out) { + SmallVector Buffer; + Buffer.reserve(256 * 1024); + + // If this is darwin or another generic macho target, reserve space for the + // header. + Triple TT(M.getTargetTriple()); + if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) + Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0); + + BitcodeWriter Writer(Buffer, dyn_cast(&Out)); + Writer.writeModule(M); + Writer.writeSymtab(); + Writer.writeStrtab(); + + // Write the generated bitstream to "Out". + if (!Buffer.empty()) + Out.write((char *)&Buffer.front(), Buffer.size()); +} + +void BitcodeWriter::writeBlob(unsigned Block, unsigned Record, StringRef Blob) { + Stream->EnterSubblock(Block, 3); + + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(Record)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + auto AbbrevNo = Stream->EmitAbbrev(std::move(Abbv)); + + Stream->EmitRecordWithBlob(AbbrevNo, ArrayRef{Record}, Blob); + + Stream->ExitBlock(); +} + +void BitcodeWriter::writeSymtab() { + assert(!WroteStrtab && !WroteSymtab); + + // If any module has module-level inline asm, we will require a registered asm + // parser for the target so that we can create an accurate symbol table for + // the module. + for (Module *M : Mods) { + if (M->getModuleInlineAsm().empty()) + continue; + } + + WroteSymtab = true; + SmallVector Symtab; + // The irsymtab::build function may be unable to create a symbol table if the + // module is malformed (e.g. it contains an invalid alias). Writing a symbol + // table is not required for correctness, but we still want to be able to + // write malformed modules to bitcode files, so swallow the error. + if (Error E = irsymtab::build(Mods, Symtab, StrtabBuilder, Alloc)) { + consumeError(std::move(E)); + return; + } + + writeBlob(bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB, + {Symtab.data(), Symtab.size()}); +} + +void BitcodeWriter::writeStrtab() { + assert(!WroteStrtab); + + std::vector Strtab; + StrtabBuilder.finalizeInOrder(); + Strtab.resize(StrtabBuilder.getSize()); + StrtabBuilder.write((uint8_t *)Strtab.data()); + + writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB, + {Strtab.data(), Strtab.size()}); + + WroteStrtab = true; +} + +void BitcodeWriter::copyStrtab(StringRef Strtab) { + writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB, Strtab); + WroteStrtab = true; +} + +void BitcodeWriter::writeModule(const Module &M) { + assert(!WroteStrtab); + + // The Mods vector is used by irsymtab::build, which requires non-const + // Modules in case it needs to materialize metadata. But the bitcode writer + // requires that the module is materialized, so we can cast to non-const here, + // after checking that it is in fact materialized. + assert(M.isMaterialized()); + Mods.push_back(const_cast(&M)); + + DXILBitcodeWriter ModuleWriter(M, Buffer, StrtabBuilder, *Stream); + ModuleWriter.write(); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Begin dxil::BitcodeWriterBase Implementation +//////////////////////////////////////////////////////////////////////////////// + +unsigned DXILBitcodeWriter::getEncodedCastOpcode(unsigned Opcode) { + switch (Opcode) { + default: + llvm_unreachable("Unknown cast instruction!"); + case Instruction::Trunc: + return bitc::CAST_TRUNC; + case Instruction::ZExt: + return bitc::CAST_ZEXT; + case Instruction::SExt: + return bitc::CAST_SEXT; + case Instruction::FPToUI: + return bitc::CAST_FPTOUI; + case Instruction::FPToSI: + return bitc::CAST_FPTOSI; + case Instruction::UIToFP: + return bitc::CAST_UITOFP; + case Instruction::SIToFP: + return bitc::CAST_SITOFP; + case Instruction::FPTrunc: + return bitc::CAST_FPTRUNC; + case Instruction::FPExt: + return bitc::CAST_FPEXT; + case Instruction::PtrToInt: + return bitc::CAST_PTRTOINT; + case Instruction::IntToPtr: + return bitc::CAST_INTTOPTR; + case Instruction::BitCast: + return bitc::CAST_BITCAST; + case Instruction::AddrSpaceCast: + return bitc::CAST_ADDRSPACECAST; + } +} + +unsigned DXILBitcodeWriter::getEncodedUnaryOpcode(unsigned Opcode) { + switch (Opcode) { + default: + llvm_unreachable("Unknown binary instruction!"); + case Instruction::FNeg: + return bitc::UNOP_FNEG; + } +} + +unsigned DXILBitcodeWriter::getEncodedBinaryOpcode(unsigned Opcode) { + switch (Opcode) { + default: + llvm_unreachable("Unknown binary instruction!"); + case Instruction::Add: + case Instruction::FAdd: + return bitc::BINOP_ADD; + case Instruction::Sub: + case Instruction::FSub: + return bitc::BINOP_SUB; + case Instruction::Mul: + case Instruction::FMul: + return bitc::BINOP_MUL; + case Instruction::UDiv: + return bitc::BINOP_UDIV; + case Instruction::FDiv: + case Instruction::SDiv: + return bitc::BINOP_SDIV; + case Instruction::URem: + return bitc::BINOP_UREM; + case Instruction::FRem: + case Instruction::SRem: + return bitc::BINOP_SREM; + case Instruction::Shl: + return bitc::BINOP_SHL; + case Instruction::LShr: + return bitc::BINOP_LSHR; + case Instruction::AShr: + return bitc::BINOP_ASHR; + case Instruction::And: + return bitc::BINOP_AND; + case Instruction::Or: + return bitc::BINOP_OR; + case Instruction::Xor: + return bitc::BINOP_XOR; + } +} + +unsigned DXILBitcodeWriter::getTypeID(Type *T, const Value *V) { + if (!T->isOpaquePointerTy()) + return VE.getTypeID(T); + auto It = PointerMap.find(V); + if (It != PointerMap.end()) + return VE.getTypeID(It->second); + return VE.getTypeID(I8PtrTy); +} + +unsigned DXILBitcodeWriter::getTypeID(Type *T, const Function *F) { + auto It = PointerMap.find(F); + if (It != PointerMap.end()) + return VE.getTypeID(It->second); + return VE.getTypeID(T); +} + +unsigned DXILBitcodeWriter::getEncodedRMWOperation(AtomicRMWInst::BinOp Op) { + switch (Op) { + default: + llvm_unreachable("Unknown RMW operation!"); + case AtomicRMWInst::Xchg: + return bitc::RMW_XCHG; + case AtomicRMWInst::Add: + return bitc::RMW_ADD; + case AtomicRMWInst::Sub: + return bitc::RMW_SUB; + case AtomicRMWInst::And: + return bitc::RMW_AND; + case AtomicRMWInst::Nand: + return bitc::RMW_NAND; + case AtomicRMWInst::Or: + return bitc::RMW_OR; + case AtomicRMWInst::Xor: + return bitc::RMW_XOR; + case AtomicRMWInst::Max: + return bitc::RMW_MAX; + case AtomicRMWInst::Min: + return bitc::RMW_MIN; + case AtomicRMWInst::UMax: + return bitc::RMW_UMAX; + case AtomicRMWInst::UMin: + return bitc::RMW_UMIN; + case AtomicRMWInst::FAdd: + return bitc::RMW_FADD; + case AtomicRMWInst::FSub: + return bitc::RMW_FSUB; + } +} + +unsigned DXILBitcodeWriter::getEncodedOrdering(AtomicOrdering Ordering) { + switch (Ordering) { + case AtomicOrdering::NotAtomic: + return bitc::ORDERING_NOTATOMIC; + case AtomicOrdering::Unordered: + return bitc::ORDERING_UNORDERED; + case AtomicOrdering::Monotonic: + return bitc::ORDERING_MONOTONIC; + case AtomicOrdering::Acquire: + return bitc::ORDERING_ACQUIRE; + case AtomicOrdering::Release: + return bitc::ORDERING_RELEASE; + case AtomicOrdering::AcquireRelease: + return bitc::ORDERING_ACQREL; + case AtomicOrdering::SequentiallyConsistent: + return bitc::ORDERING_SEQCST; + } + llvm_unreachable("Invalid ordering"); +} + +void DXILBitcodeWriter::writeStringRecord(BitstreamWriter &Stream, + unsigned Code, StringRef Str, + unsigned AbbrevToUse) { + SmallVector Vals; + + // Code: [strchar x N] + for (char C : Str) { + if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(C)) + AbbrevToUse = 0; + Vals.push_back(C); + } + + // Emit the finished record. + Stream.EmitRecord(Code, Vals, AbbrevToUse); +} + +uint64_t DXILBitcodeWriter::getAttrKindEncoding(Attribute::AttrKind Kind) { + switch (Kind) { + case Attribute::Alignment: + return bitc::ATTR_KIND_ALIGNMENT; + case Attribute::AlwaysInline: + return bitc::ATTR_KIND_ALWAYS_INLINE; + case Attribute::ArgMemOnly: + return bitc::ATTR_KIND_ARGMEMONLY; + case Attribute::Builtin: + return bitc::ATTR_KIND_BUILTIN; + case Attribute::ByVal: + return bitc::ATTR_KIND_BY_VAL; + case Attribute::Convergent: + return bitc::ATTR_KIND_CONVERGENT; + case Attribute::InAlloca: + return bitc::ATTR_KIND_IN_ALLOCA; + case Attribute::Cold: + return bitc::ATTR_KIND_COLD; + case Attribute::InlineHint: + return bitc::ATTR_KIND_INLINE_HINT; + case Attribute::InReg: + return bitc::ATTR_KIND_IN_REG; + case Attribute::JumpTable: + return bitc::ATTR_KIND_JUMP_TABLE; + case Attribute::MinSize: + return bitc::ATTR_KIND_MIN_SIZE; + case Attribute::Naked: + return bitc::ATTR_KIND_NAKED; + case Attribute::Nest: + return bitc::ATTR_KIND_NEST; + case Attribute::NoAlias: + return bitc::ATTR_KIND_NO_ALIAS; + case Attribute::NoBuiltin: + return bitc::ATTR_KIND_NO_BUILTIN; + case Attribute::NoCapture: + return bitc::ATTR_KIND_NO_CAPTURE; + case Attribute::NoDuplicate: + return bitc::ATTR_KIND_NO_DUPLICATE; + case Attribute::NoImplicitFloat: + return bitc::ATTR_KIND_NO_IMPLICIT_FLOAT; + case Attribute::NoInline: + return bitc::ATTR_KIND_NO_INLINE; + case Attribute::NonLazyBind: + return bitc::ATTR_KIND_NON_LAZY_BIND; + case Attribute::NonNull: + return bitc::ATTR_KIND_NON_NULL; + case Attribute::Dereferenceable: + return bitc::ATTR_KIND_DEREFERENCEABLE; + case Attribute::DereferenceableOrNull: + return bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL; + case Attribute::NoRedZone: + return bitc::ATTR_KIND_NO_RED_ZONE; + case Attribute::NoReturn: + return bitc::ATTR_KIND_NO_RETURN; + case Attribute::NoUnwind: + return bitc::ATTR_KIND_NO_UNWIND; + case Attribute::OptimizeForSize: + return bitc::ATTR_KIND_OPTIMIZE_FOR_SIZE; + case Attribute::OptimizeNone: + return bitc::ATTR_KIND_OPTIMIZE_NONE; + case Attribute::ReadNone: + return bitc::ATTR_KIND_READ_NONE; + case Attribute::ReadOnly: + return bitc::ATTR_KIND_READ_ONLY; + case Attribute::Returned: + return bitc::ATTR_KIND_RETURNED; + case Attribute::ReturnsTwice: + return bitc::ATTR_KIND_RETURNS_TWICE; + case Attribute::SExt: + return bitc::ATTR_KIND_S_EXT; + case Attribute::StackAlignment: + return bitc::ATTR_KIND_STACK_ALIGNMENT; + case Attribute::StackProtect: + return bitc::ATTR_KIND_STACK_PROTECT; + case Attribute::StackProtectReq: + return bitc::ATTR_KIND_STACK_PROTECT_REQ; + case Attribute::StackProtectStrong: + return bitc::ATTR_KIND_STACK_PROTECT_STRONG; + case Attribute::SafeStack: + return bitc::ATTR_KIND_SAFESTACK; + case Attribute::StructRet: + return bitc::ATTR_KIND_STRUCT_RET; + case Attribute::SanitizeAddress: + return bitc::ATTR_KIND_SANITIZE_ADDRESS; + case Attribute::SanitizeThread: + return bitc::ATTR_KIND_SANITIZE_THREAD; + case Attribute::SanitizeMemory: + return bitc::ATTR_KIND_SANITIZE_MEMORY; + case Attribute::UWTable: + return bitc::ATTR_KIND_UW_TABLE; + case Attribute::ZExt: + return bitc::ATTR_KIND_Z_EXT; + case Attribute::EndAttrKinds: + llvm_unreachable("Can not encode end-attribute kinds marker."); + case Attribute::None: + llvm_unreachable("Can not encode none-attribute."); + case Attribute::EmptyKey: + case Attribute::TombstoneKey: + llvm_unreachable("Trying to encode EmptyKey/TombstoneKey"); + default: + llvm_unreachable("Trying to encode attribute not supported by DXIL. These " + "should be stripped in DXILPrepare"); + } + + llvm_unreachable("Trying to encode unknown attribute"); +} + +void DXILBitcodeWriter::emitSignedInt64(SmallVectorImpl &Vals, + uint64_t V) { + if ((int64_t)V >= 0) + Vals.push_back(V << 1); + else + Vals.push_back((-V << 1) | 1); +} + +void DXILBitcodeWriter::emitWideAPInt(SmallVectorImpl &Vals, + const APInt &A) { + // We have an arbitrary precision integer value to write whose + // bit width is > 64. However, in canonical unsigned integer + // format it is likely that the high bits are going to be zero. + // So, we only write the number of active words. + unsigned NumWords = A.getActiveWords(); + const uint64_t *RawData = A.getRawData(); + for (unsigned i = 0; i < NumWords; i++) + emitSignedInt64(Vals, RawData[i]); +} + +uint64_t DXILBitcodeWriter::getOptimizationFlags(const Value *V) { + uint64_t Flags = 0; + + if (const auto *OBO = dyn_cast(V)) { + if (OBO->hasNoSignedWrap()) + Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP; + if (OBO->hasNoUnsignedWrap()) + Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP; + } else if (const auto *PEO = dyn_cast(V)) { + if (PEO->isExact()) + Flags |= 1 << bitc::PEO_EXACT; + } else if (const auto *FPMO = dyn_cast(V)) { + if (FPMO->hasAllowReassoc()) + Flags |= bitc::AllowReassoc; + if (FPMO->hasNoNaNs()) + Flags |= bitc::NoNaNs; + if (FPMO->hasNoInfs()) + Flags |= bitc::NoInfs; + if (FPMO->hasNoSignedZeros()) + Flags |= bitc::NoSignedZeros; + if (FPMO->hasAllowReciprocal()) + Flags |= bitc::AllowReciprocal; + if (FPMO->hasAllowContract()) + Flags |= bitc::AllowContract; + if (FPMO->hasApproxFunc()) + Flags |= bitc::ApproxFunc; + } + + return Flags; +} + +unsigned +DXILBitcodeWriter::getEncodedLinkage(const GlobalValue::LinkageTypes Linkage) { + switch (Linkage) { + case GlobalValue::ExternalLinkage: + return 0; + case GlobalValue::WeakAnyLinkage: + return 16; + case GlobalValue::AppendingLinkage: + return 2; + case GlobalValue::InternalLinkage: + return 3; + case GlobalValue::LinkOnceAnyLinkage: + return 18; + case GlobalValue::ExternalWeakLinkage: + return 7; + case GlobalValue::CommonLinkage: + return 8; + case GlobalValue::PrivateLinkage: + return 9; + case GlobalValue::WeakODRLinkage: + return 17; + case GlobalValue::LinkOnceODRLinkage: + return 19; + case GlobalValue::AvailableExternallyLinkage: + return 12; + } + llvm_unreachable("Invalid linkage"); +} + +unsigned DXILBitcodeWriter::getEncodedLinkage(const GlobalValue &GV) { + return getEncodedLinkage(GV.getLinkage()); +} + +unsigned DXILBitcodeWriter::getEncodedVisibility(const GlobalValue &GV) { + switch (GV.getVisibility()) { + case GlobalValue::DefaultVisibility: + return 0; + case GlobalValue::HiddenVisibility: + return 1; + case GlobalValue::ProtectedVisibility: + return 2; + } + llvm_unreachable("Invalid visibility"); +} + +unsigned DXILBitcodeWriter::getEncodedDLLStorageClass(const GlobalValue &GV) { + switch (GV.getDLLStorageClass()) { + case GlobalValue::DefaultStorageClass: + return 0; + case GlobalValue::DLLImportStorageClass: + return 1; + case GlobalValue::DLLExportStorageClass: + return 2; + } + llvm_unreachable("Invalid DLL storage class"); +} + +unsigned DXILBitcodeWriter::getEncodedThreadLocalMode(const GlobalValue &GV) { + switch (GV.getThreadLocalMode()) { + case GlobalVariable::NotThreadLocal: + return 0; + case GlobalVariable::GeneralDynamicTLSModel: + return 1; + case GlobalVariable::LocalDynamicTLSModel: + return 2; + case GlobalVariable::InitialExecTLSModel: + return 3; + case GlobalVariable::LocalExecTLSModel: + return 4; + } + llvm_unreachable("Invalid TLS model"); +} + +unsigned DXILBitcodeWriter::getEncodedComdatSelectionKind(const Comdat &C) { + switch (C.getSelectionKind()) { + case Comdat::Any: + return bitc::COMDAT_SELECTION_KIND_ANY; + case Comdat::ExactMatch: + return bitc::COMDAT_SELECTION_KIND_EXACT_MATCH; + case Comdat::Largest: + return bitc::COMDAT_SELECTION_KIND_LARGEST; + case Comdat::NoDeduplicate: + return bitc::COMDAT_SELECTION_KIND_NO_DUPLICATES; + case Comdat::SameSize: + return bitc::COMDAT_SELECTION_KIND_SAME_SIZE; + } + llvm_unreachable("Invalid selection kind"); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Begin DXILBitcodeWriter Implementation +//////////////////////////////////////////////////////////////////////////////// + +void DXILBitcodeWriter::writeAttributeGroupTable() { + const std::vector &AttrGrps = + VE.getAttributeGroups(); + if (AttrGrps.empty()) + return; + + Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3); + + SmallVector Record; + for (ValueEnumerator::IndexAndAttrSet Pair : AttrGrps) { + unsigned AttrListIndex = Pair.first; + AttributeSet AS = Pair.second; + Record.push_back(VE.getAttributeGroupID(Pair)); + Record.push_back(AttrListIndex); + + for (Attribute Attr : AS) { + if (Attr.isEnumAttribute()) { + uint64_t Val = getAttrKindEncoding(Attr.getKindAsEnum()); + assert(Val <= bitc::ATTR_KIND_ARGMEMONLY && + "DXIL does not support attributes above ATTR_KIND_ARGMEMONLY"); + Record.push_back(0); + Record.push_back(Val); + } else if (Attr.isIntAttribute()) { + uint64_t Val = getAttrKindEncoding(Attr.getKindAsEnum()); + assert(Val <= bitc::ATTR_KIND_ARGMEMONLY && + "DXIL does not support attributes above ATTR_KIND_ARGMEMONLY"); + Record.push_back(1); + Record.push_back(Val); + Record.push_back(Attr.getValueAsInt()); + } else { + StringRef Kind = Attr.getKindAsString(); + StringRef Val = Attr.getValueAsString(); + + Record.push_back(Val.empty() ? 3 : 4); + Record.append(Kind.begin(), Kind.end()); + Record.push_back(0); + if (!Val.empty()) { + Record.append(Val.begin(), Val.end()); + Record.push_back(0); + } + } + } + + Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record); + Record.clear(); + } + + Stream.ExitBlock(); +} + +void DXILBitcodeWriter::writeAttributeTable() { + const std::vector &Attrs = VE.getAttributeLists(); + if (Attrs.empty()) + return; + + Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3); + + SmallVector Record; + for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { + AttributeList AL = Attrs[i]; + for (unsigned i : AL.indexes()) { + AttributeSet AS = AL.getAttributes(i); + if (AS.hasAttributes()) + Record.push_back(VE.getAttributeGroupID({i, AS})); + } + + Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); + Record.clear(); + } + + Stream.ExitBlock(); +} + +/// WriteTypeTable - Write out the type table for a module. +void DXILBitcodeWriter::writeTypeTable() { + const ValueEnumerator::TypeList &TypeList = VE.getTypes(); + + Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */); + SmallVector TypeVals; + + uint64_t NumBits = VE.computeBitsRequiredForTypeIndicies(); + + // Abbrev for TYPE_CODE_POINTER. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0 + unsigned PtrAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // Abbrev for TYPE_CODE_FUNCTION. + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned FunctionAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // Abbrev for TYPE_CODE_STRUCT_ANON. + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned StructAnonAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // Abbrev for TYPE_CODE_STRUCT_NAME. + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + unsigned StructNameAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // Abbrev for TYPE_CODE_STRUCT_NAMED. + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned StructNamedAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // Abbrev for TYPE_CODE_ARRAY. + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned ArrayAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // Emit an entry count so the reader can reserve space. + TypeVals.push_back(TypeList.size()); + Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals); + TypeVals.clear(); + + // Loop over all of the types, emitting each in turn. + for (Type *T : TypeList) { + int AbbrevToUse = 0; + unsigned Code = 0; + + switch (T->getTypeID()) { + case Type::BFloatTyID: + case Type::X86_AMXTyID: + case Type::TokenTyID: + llvm_unreachable("These should never be used!!!"); + break; + case Type::VoidTyID: + Code = bitc::TYPE_CODE_VOID; + break; + case Type::HalfTyID: + Code = bitc::TYPE_CODE_HALF; + break; + case Type::FloatTyID: + Code = bitc::TYPE_CODE_FLOAT; + break; + case Type::DoubleTyID: + Code = bitc::TYPE_CODE_DOUBLE; + break; + case Type::X86_FP80TyID: + Code = bitc::TYPE_CODE_X86_FP80; + break; + case Type::FP128TyID: + Code = bitc::TYPE_CODE_FP128; + break; + case Type::PPC_FP128TyID: + Code = bitc::TYPE_CODE_PPC_FP128; + break; + case Type::LabelTyID: + Code = bitc::TYPE_CODE_LABEL; + break; + case Type::MetadataTyID: + Code = bitc::TYPE_CODE_METADATA; + break; + case Type::X86_MMXTyID: + Code = bitc::TYPE_CODE_X86_MMX; + break; + case Type::IntegerTyID: + // INTEGER: [width] + Code = bitc::TYPE_CODE_INTEGER; + TypeVals.push_back(cast(T)->getBitWidth()); + break; + case Type::DXILPointerTyID: { + TypedPointerType *PTy = cast(T); + // POINTER: [pointee type, address space] + Code = bitc::TYPE_CODE_POINTER; + TypeVals.push_back(getTypeID(PTy->getElementType())); + unsigned AddressSpace = PTy->getAddressSpace(); + TypeVals.push_back(AddressSpace); + if (AddressSpace == 0) + AbbrevToUse = PtrAbbrev; + break; + } + case Type::PointerTyID: { + PointerType *PTy = cast(T); + // POINTER: [pointee type, address space] + Code = bitc::TYPE_CODE_POINTER; + // Emitting an empty struct type for the opaque pointer's type allows + // this to be order-independent. Non-struct types must be emitted in + // bitcode before they can be referenced. + if (PTy->isOpaquePointerTy()) { + TypeVals.push_back(false); + Code = bitc::TYPE_CODE_OPAQUE; + writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME, + "dxilOpaquePtrReservedName", StructNameAbbrev); + } else { + TypeVals.push_back(getTypeID(PTy->getNonOpaquePointerElementType())); + unsigned AddressSpace = PTy->getAddressSpace(); + TypeVals.push_back(AddressSpace); + if (AddressSpace == 0) + AbbrevToUse = PtrAbbrev; + } + break; + } + case Type::FunctionTyID: { + FunctionType *FT = cast(T); + // FUNCTION: [isvararg, retty, paramty x N] + Code = bitc::TYPE_CODE_FUNCTION; + TypeVals.push_back(FT->isVarArg()); + TypeVals.push_back(getTypeID(FT->getReturnType())); + for (Type *PTy : FT->params()) + TypeVals.push_back(getTypeID(PTy)); + AbbrevToUse = FunctionAbbrev; + break; + } + case Type::StructTyID: { + StructType *ST = cast(T); + // STRUCT: [ispacked, eltty x N] + TypeVals.push_back(ST->isPacked()); + // Output all of the element types. + for (Type *ElTy : ST->elements()) + TypeVals.push_back(getTypeID(ElTy)); + + if (ST->isLiteral()) { + Code = bitc::TYPE_CODE_STRUCT_ANON; + AbbrevToUse = StructAnonAbbrev; + } else { + if (ST->isOpaque()) { + Code = bitc::TYPE_CODE_OPAQUE; + } else { + Code = bitc::TYPE_CODE_STRUCT_NAMED; + AbbrevToUse = StructNamedAbbrev; + } + + // Emit the name if it is present. + if (!ST->getName().empty()) + writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME, ST->getName(), + StructNameAbbrev); + } + break; + } + case Type::ArrayTyID: { + ArrayType *AT = cast(T); + // ARRAY: [numelts, eltty] + Code = bitc::TYPE_CODE_ARRAY; + TypeVals.push_back(AT->getNumElements()); + TypeVals.push_back(getTypeID(AT->getElementType())); + AbbrevToUse = ArrayAbbrev; + break; + } + case Type::FixedVectorTyID: + case Type::ScalableVectorTyID: { + VectorType *VT = cast(T); + // VECTOR [numelts, eltty] + Code = bitc::TYPE_CODE_VECTOR; + TypeVals.push_back(VT->getElementCount().getKnownMinValue()); + TypeVals.push_back(getTypeID(VT->getElementType())); + break; + } + } + + // Emit the finished record. + Stream.EmitRecord(Code, TypeVals, AbbrevToUse); + TypeVals.clear(); + } + + Stream.ExitBlock(); +} + +void DXILBitcodeWriter::writeComdats() { + SmallVector Vals; + for (const Comdat *C : VE.getComdats()) { + // COMDAT: [selection_kind, name] + Vals.push_back(getEncodedComdatSelectionKind(*C)); + size_t Size = C->getName().size(); + assert(isUInt<16>(Size)); + Vals.push_back(Size); + for (char Chr : C->getName()) + Vals.push_back((unsigned char)Chr); + Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0); + Vals.clear(); + } +} + +void DXILBitcodeWriter::writeValueSymbolTableForwardDecl() {} + +/// Emit top-level description of module, including target triple, inline asm, +/// descriptors for global variables, and function prototype info. +/// Returns the bit offset to backpatch with the location of the real VST. +void DXILBitcodeWriter::writeModuleInfo() { + // Emit various pieces of data attached to a module. + if (!M.getTargetTriple().empty()) + writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(), + 0 /*TODO*/); + const std::string &DL = M.getDataLayoutStr(); + if (!DL.empty()) + writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); + if (!M.getModuleInlineAsm().empty()) + writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(), + 0 /*TODO*/); + + // Emit information about sections and GC, computing how many there are. Also + // compute the maximum alignment value. + std::map SectionMap; + std::map GCMap; + MaybeAlign MaxAlignment; + unsigned MaxGlobalType = 0; + const auto UpdateMaxAlignment = [&MaxAlignment](const MaybeAlign A) { + if (A) + MaxAlignment = !MaxAlignment ? *A : std::max(*MaxAlignment, *A); + }; + for (const GlobalVariable &GV : M.globals()) { + UpdateMaxAlignment(GV.getAlign()); + MaxGlobalType = std::max(MaxGlobalType, getTypeID(GV.getValueType(), &GV)); + if (GV.hasSection()) { + // Give section names unique ID's. + unsigned &Entry = SectionMap[std::string(GV.getSection())]; + if (!Entry) { + writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, + GV.getSection(), 0 /*TODO*/); + Entry = SectionMap.size(); + } + } + } + for (const Function &F : M) { + UpdateMaxAlignment(F.getAlign()); + if (F.hasSection()) { + // Give section names unique ID's. + unsigned &Entry = SectionMap[std::string(F.getSection())]; + if (!Entry) { + writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, F.getSection(), + 0 /*TODO*/); + Entry = SectionMap.size(); + } + } + if (F.hasGC()) { + // Same for GC names. + unsigned &Entry = GCMap[F.getGC()]; + if (!Entry) { + writeStringRecord(Stream, bitc::MODULE_CODE_GCNAME, F.getGC(), + 0 /*TODO*/); + Entry = GCMap.size(); + } + } + } + + // Emit abbrev for globals, now that we know # sections and max alignment. + unsigned SimpleGVarAbbrev = 0; + if (!M.global_empty()) { + // Add an abbrev for common globals with no visibility or thread + // localness. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(MaxGlobalType + 1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // AddrSpace << 2 + //| explicitType << 1 + //| constant + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Initializer. + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // Linkage. + if (!MaxAlignment) // Alignment. + Abbv->Add(BitCodeAbbrevOp(0)); + else { + unsigned MaxEncAlignment = getEncodedAlign(MaxAlignment); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(MaxEncAlignment + 1))); + } + if (SectionMap.empty()) // Section. + Abbv->Add(BitCodeAbbrevOp(0)); + else + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(SectionMap.size() + 1))); + // Don't bother emitting vis + thread local. + SimpleGVarAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + } + + // Emit the global variable information. + SmallVector Vals; + for (const GlobalVariable &GV : M.globals()) { + unsigned AbbrevToUse = 0; + + // GLOBALVAR: [type, isconst, initid, + // linkage, alignment, section, visibility, threadlocal, + // unnamed_addr, externally_initialized, dllstorageclass, + // comdat] + Vals.push_back(getTypeID(GV.getValueType(), &GV)); + Vals.push_back( + GV.getType()->getAddressSpace() << 2 | 2 | + (GV.isConstant() ? 1 : 0)); // HLSL Change - bitwise | was used with + // unsigned int and bool + Vals.push_back( + GV.isDeclaration() ? 0 : (VE.getValueID(GV.getInitializer()) + 1)); + Vals.push_back(getEncodedLinkage(GV)); + Vals.push_back(getEncodedAlign(GV.getAlign())); + Vals.push_back(GV.hasSection() ? SectionMap[std::string(GV.getSection())] + : 0); + if (GV.isThreadLocal() || + GV.getVisibility() != GlobalValue::DefaultVisibility || + GV.getUnnamedAddr() != GlobalValue::UnnamedAddr::None || + GV.isExternallyInitialized() || + GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass || + GV.hasComdat()) { + Vals.push_back(getEncodedVisibility(GV)); + Vals.push_back(getEncodedThreadLocalMode(GV)); + Vals.push_back(GV.getUnnamedAddr() != GlobalValue::UnnamedAddr::None); + Vals.push_back(GV.isExternallyInitialized()); + Vals.push_back(getEncodedDLLStorageClass(GV)); + Vals.push_back(GV.hasComdat() ? VE.getComdatID(GV.getComdat()) : 0); + } else { + AbbrevToUse = SimpleGVarAbbrev; + } + + Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals, AbbrevToUse); + Vals.clear(); + } + + // Emit the function proto information. + for (const Function &F : M) { + // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, + // section, visibility, gc, unnamed_addr, prologuedata, + // dllstorageclass, comdat, prefixdata, personalityfn] + Vals.push_back(getTypeID(F.getFunctionType(), &F)); + Vals.push_back(F.getCallingConv()); + Vals.push_back(F.isDeclaration()); + Vals.push_back(getEncodedLinkage(F)); + Vals.push_back(VE.getAttributeListID(F.getAttributes())); + Vals.push_back(getEncodedAlign(F.getAlign())); + Vals.push_back(F.hasSection() ? SectionMap[std::string(F.getSection())] + : 0); + Vals.push_back(getEncodedVisibility(F)); + Vals.push_back(F.hasGC() ? GCMap[F.getGC()] : 0); + Vals.push_back(F.getUnnamedAddr() != GlobalValue::UnnamedAddr::None); + Vals.push_back( + F.hasPrologueData() ? (VE.getValueID(F.getPrologueData()) + 1) : 0); + Vals.push_back(getEncodedDLLStorageClass(F)); + Vals.push_back(F.hasComdat() ? VE.getComdatID(F.getComdat()) : 0); + Vals.push_back(F.hasPrefixData() ? (VE.getValueID(F.getPrefixData()) + 1) + : 0); + Vals.push_back( + F.hasPersonalityFn() ? (VE.getValueID(F.getPersonalityFn()) + 1) : 0); + + unsigned AbbrevToUse = 0; + Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse); + Vals.clear(); + } + + // Emit the alias information. + for (const GlobalAlias &A : M.aliases()) { + // ALIAS: [alias type, aliasee val#, linkage, visibility] + Vals.push_back(getTypeID(A.getValueType(), &A)); + Vals.push_back(VE.getValueID(A.getAliasee())); + Vals.push_back(getEncodedLinkage(A)); + Vals.push_back(getEncodedVisibility(A)); + Vals.push_back(getEncodedDLLStorageClass(A)); + Vals.push_back(getEncodedThreadLocalMode(A)); + Vals.push_back(A.getUnnamedAddr() != GlobalValue::UnnamedAddr::None); + unsigned AbbrevToUse = 0; + Stream.EmitRecord(bitc::MODULE_CODE_ALIAS_OLD, Vals, AbbrevToUse); + Vals.clear(); + } +} + +void DXILBitcodeWriter::writeValueAsMetadata( + const ValueAsMetadata *MD, SmallVectorImpl &Record) { + // Mimic an MDNode with a value as one operand. + Value *V = MD->getValue(); + Type *Ty = V->getType(); + if (Function *F = dyn_cast(V)) + Ty = TypedPointerType::get(F->getFunctionType(), F->getAddressSpace()); + else if (GlobalVariable *GV = dyn_cast(V)) + Ty = TypedPointerType::get(GV->getValueType(), GV->getAddressSpace()); + Record.push_back(getTypeID(Ty)); + Record.push_back(VE.getValueID(V)); + Stream.EmitRecord(bitc::METADATA_VALUE, Record, 0); + Record.clear(); +} + +void DXILBitcodeWriter::writeMDTuple(const MDTuple *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + Metadata *MD = N->getOperand(i); + assert(!(MD && isa(MD)) && + "Unexpected function-local metadata"); + Record.push_back(VE.getMetadataOrNullID(MD)); + } + Stream.EmitRecord(N->isDistinct() ? bitc::METADATA_DISTINCT_NODE + : bitc::METADATA_NODE, + Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDILocation(const DILocation *N, + SmallVectorImpl &Record, + unsigned &Abbrev) { + if (!Abbrev) + Abbrev = createDILocationAbbrev(); + Record.push_back(N->isDistinct()); + Record.push_back(N->getLine()); + Record.push_back(N->getColumn()); + Record.push_back(VE.getMetadataID(N->getScope())); + Record.push_back(VE.getMetadataOrNullID(N->getInlinedAt())); + + Stream.EmitRecord(bitc::METADATA_LOCATION, Record, Abbrev); + Record.clear(); +} + +static uint64_t rotateSign(APInt Val) { + int64_t I = Val.getSExtValue(); + uint64_t U = I; + return I < 0 ? ~(U << 1) : U << 1; +} + +static uint64_t rotateSign(DISubrange::BoundType Val) { + return rotateSign(Val.get()->getValue()); +} + +void DXILBitcodeWriter::writeDISubrange(const DISubrange *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back( + N->getCount().get()->getValue().getSExtValue()); + Record.push_back(rotateSign(N->getLowerBound())); + + Stream.EmitRecord(bitc::METADATA_SUBRANGE, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDIEnumerator(const DIEnumerator *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(rotateSign(N->getValue())); + Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + + Stream.EmitRecord(bitc::METADATA_ENUMERATOR, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDIBasicType(const DIBasicType *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(N->getTag()); + Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + Record.push_back(N->getSizeInBits()); + Record.push_back(N->getAlignInBits()); + Record.push_back(N->getEncoding()); + + Stream.EmitRecord(bitc::METADATA_BASIC_TYPE, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDIDerivedType(const DIDerivedType *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(N->getTag()); + Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + Record.push_back(VE.getMetadataOrNullID(N->getFile())); + Record.push_back(N->getLine()); + Record.push_back(VE.getMetadataOrNullID(N->getScope())); + Record.push_back(VE.getMetadataOrNullID(N->getBaseType())); + Record.push_back(N->getSizeInBits()); + Record.push_back(N->getAlignInBits()); + Record.push_back(N->getOffsetInBits()); + Record.push_back(N->getFlags()); + Record.push_back(VE.getMetadataOrNullID(N->getExtraData())); + + Stream.EmitRecord(bitc::METADATA_DERIVED_TYPE, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDICompositeType(const DICompositeType *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(N->getTag()); + Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + Record.push_back(VE.getMetadataOrNullID(N->getFile())); + Record.push_back(N->getLine()); + Record.push_back(VE.getMetadataOrNullID(N->getScope())); + Record.push_back(VE.getMetadataOrNullID(N->getBaseType())); + Record.push_back(N->getSizeInBits()); + Record.push_back(N->getAlignInBits()); + Record.push_back(N->getOffsetInBits()); + Record.push_back(N->getFlags()); + Record.push_back(VE.getMetadataOrNullID(N->getElements().get())); + Record.push_back(N->getRuntimeLang()); + Record.push_back(VE.getMetadataOrNullID(N->getVTableHolder())); + Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get())); + Record.push_back(VE.getMetadataOrNullID(N->getRawIdentifier())); + + Stream.EmitRecord(bitc::METADATA_COMPOSITE_TYPE, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDISubroutineType(const DISubroutineType *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(N->getFlags()); + Record.push_back(VE.getMetadataOrNullID(N->getTypeArray().get())); + + Stream.EmitRecord(bitc::METADATA_SUBROUTINE_TYPE, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDIFile(const DIFile *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(VE.getMetadataOrNullID(N->getRawFilename())); + Record.push_back(VE.getMetadataOrNullID(N->getRawDirectory())); + + Stream.EmitRecord(bitc::METADATA_FILE, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDICompileUnit(const DICompileUnit *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(N->getSourceLanguage()); + Record.push_back(VE.getMetadataOrNullID(N->getFile())); + Record.push_back(VE.getMetadataOrNullID(N->getRawProducer())); + Record.push_back(N->isOptimized()); + Record.push_back(VE.getMetadataOrNullID(N->getRawFlags())); + Record.push_back(N->getRuntimeVersion()); + Record.push_back(VE.getMetadataOrNullID(N->getRawSplitDebugFilename())); + Record.push_back(N->getEmissionKind()); + Record.push_back(VE.getMetadataOrNullID(N->getEnumTypes().get())); + Record.push_back(VE.getMetadataOrNullID(N->getRetainedTypes().get())); + Record.push_back(/* subprograms */ 0); + Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables().get())); + Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities().get())); + Record.push_back(N->getDWOId()); + + Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDISubprogram(const DISubprogram *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(VE.getMetadataOrNullID(N->getScope())); + Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName())); + Record.push_back(VE.getMetadataOrNullID(N->getFile())); + Record.push_back(N->getLine()); + Record.push_back(VE.getMetadataOrNullID(N->getType())); + Record.push_back(N->isLocalToUnit()); + Record.push_back(N->isDefinition()); + Record.push_back(N->getScopeLine()); + Record.push_back(VE.getMetadataOrNullID(N->getContainingType())); + Record.push_back(N->getVirtuality()); + Record.push_back(N->getVirtualIndex()); + Record.push_back(N->getFlags()); + Record.push_back(N->isOptimized()); + Record.push_back(VE.getMetadataOrNullID(N->getRawUnit())); + Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get())); + Record.push_back(VE.getMetadataOrNullID(N->getDeclaration())); + Record.push_back(VE.getMetadataOrNullID(N->getRetainedNodes().get())); + + Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDILexicalBlock(const DILexicalBlock *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(VE.getMetadataOrNullID(N->getScope())); + Record.push_back(VE.getMetadataOrNullID(N->getFile())); + Record.push_back(N->getLine()); + Record.push_back(N->getColumn()); + + Stream.EmitRecord(bitc::METADATA_LEXICAL_BLOCK, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDILexicalBlockFile( + const DILexicalBlockFile *N, SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(VE.getMetadataOrNullID(N->getScope())); + Record.push_back(VE.getMetadataOrNullID(N->getFile())); + Record.push_back(N->getDiscriminator()); + + Stream.EmitRecord(bitc::METADATA_LEXICAL_BLOCK_FILE, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDINamespace(const DINamespace *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(VE.getMetadataOrNullID(N->getScope())); + Record.push_back(VE.getMetadataOrNullID(N->getFile())); + Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + Record.push_back(/* line number */ 0); + + Stream.EmitRecord(bitc::METADATA_NAMESPACE, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDIModule(const DIModule *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + for (auto &I : N->operands()) + Record.push_back(VE.getMetadataOrNullID(I)); + + Stream.EmitRecord(bitc::METADATA_MODULE, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDITemplateTypeParameter( + const DITemplateTypeParameter *N, SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + Record.push_back(VE.getMetadataOrNullID(N->getType())); + + Stream.EmitRecord(bitc::METADATA_TEMPLATE_TYPE, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDITemplateValueParameter( + const DITemplateValueParameter *N, SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(N->getTag()); + Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + Record.push_back(VE.getMetadataOrNullID(N->getType())); + Record.push_back(VE.getMetadataOrNullID(N->getValue())); + + Stream.EmitRecord(bitc::METADATA_TEMPLATE_VALUE, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDIGlobalVariable(const DIGlobalVariable *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(VE.getMetadataOrNullID(N->getScope())); + Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName())); + Record.push_back(VE.getMetadataOrNullID(N->getFile())); + Record.push_back(N->getLine()); + Record.push_back(VE.getMetadataOrNullID(N->getType())); + Record.push_back(N->isLocalToUnit()); + Record.push_back(N->isDefinition()); + Record.push_back(/* N->getRawVariable() */ 0); + Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration())); + + Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDILocalVariable(const DILocalVariable *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(N->getTag()); + Record.push_back(VE.getMetadataOrNullID(N->getScope())); + Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + Record.push_back(VE.getMetadataOrNullID(N->getFile())); + Record.push_back(N->getLine()); + Record.push_back(VE.getMetadataOrNullID(N->getType())); + Record.push_back(N->getArg()); + Record.push_back(N->getFlags()); + + Stream.EmitRecord(bitc::METADATA_LOCAL_VAR, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDIExpression(const DIExpression *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.reserve(N->getElements().size() + 1); + + Record.push_back(N->isDistinct()); + Record.append(N->elements_begin(), N->elements_end()); + + Stream.EmitRecord(bitc::METADATA_EXPRESSION, Record, Abbrev); + Record.clear(); +} + +void DXILBitcodeWriter::writeDIObjCProperty(const DIObjCProperty *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + llvm_unreachable("DXIL does not support objc!!!"); +} + +void DXILBitcodeWriter::writeDIImportedEntity(const DIImportedEntity *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(N->getTag()); + Record.push_back(VE.getMetadataOrNullID(N->getScope())); + Record.push_back(VE.getMetadataOrNullID(N->getEntity())); + Record.push_back(N->getLine()); + Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + + Stream.EmitRecord(bitc::METADATA_IMPORTED_ENTITY, Record, Abbrev); + Record.clear(); +} + +unsigned DXILBitcodeWriter::createDILocationAbbrev() { + // Abbrev for METADATA_LOCATION. + // + // Assume the column is usually under 128, and always output the inlined-at + // location (it's never more expensive than building an array size 1). + std::shared_ptr Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_LOCATION)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + return Stream.EmitAbbrev(std::move(Abbv)); +} + +unsigned DXILBitcodeWriter::createGenericDINodeAbbrev() { + // Abbrev for METADATA_GENERIC_DEBUG. + // + // Assume the column is usually under 128, and always output the inlined-at + // location (it's never more expensive than building an array size 1). + std::shared_ptr Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_GENERIC_DEBUG)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + return Stream.EmitAbbrev(std::move(Abbv)); +} + +void DXILBitcodeWriter::writeMetadataRecords(ArrayRef MDs, + SmallVectorImpl &Record, + std::vector *MDAbbrevs, + std::vector *IndexPos) { + if (MDs.empty()) + return; + + // Initialize MDNode abbreviations. +#define HANDLE_MDNODE_LEAF(CLASS) unsigned CLASS##Abbrev = 0; +#include "llvm/IR/Metadata.def" + + for (const Metadata *MD : MDs) { + if (IndexPos) + IndexPos->push_back(Stream.GetCurrentBitNo()); + if (const MDNode *N = dyn_cast(MD)) { + assert(N->isResolved() && "Expected forward references to be resolved"); + + switch (N->getMetadataID()) { + default: + llvm_unreachable("Invalid MDNode subclass"); +#define HANDLE_MDNODE_LEAF(CLASS) \ + case Metadata::CLASS##Kind: \ + if (MDAbbrevs) \ + write##CLASS(cast(N), Record, \ + (*MDAbbrevs)[MetadataAbbrev::CLASS##AbbrevID]); \ + else \ + write##CLASS(cast(N), Record, CLASS##Abbrev); \ + continue; +#include "llvm/IR/Metadata.def" + } + } + writeValueAsMetadata(cast(MD), Record); + } +} + +unsigned DXILBitcodeWriter::createMetadataStringsAbbrev() { + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRING_OLD)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + return Stream.EmitAbbrev(std::move(Abbv)); +} + +void DXILBitcodeWriter::writeMetadataStrings( + ArrayRef Strings, SmallVectorImpl &Record) { + for (const Metadata *MD : Strings) { + const MDString *MDS = cast(MD); + // Code: [strchar x N] + Record.append(MDS->bytes_begin(), MDS->bytes_end()); + + // Emit the finished record. + Stream.EmitRecord(bitc::METADATA_STRING_OLD, Record, + createMetadataStringsAbbrev()); + Record.clear(); + } +} + +void DXILBitcodeWriter::writeModuleMetadata() { + if (!VE.hasMDs() && M.named_metadata_empty()) + return; + + Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 5); + + // Emit all abbrevs upfront, so that the reader can jump in the middle of the + // block and load any metadata. + std::vector MDAbbrevs; + + MDAbbrevs.resize(MetadataAbbrev::LastPlusOne); + MDAbbrevs[MetadataAbbrev::DILocationAbbrevID] = createDILocationAbbrev(); + MDAbbrevs[MetadataAbbrev::GenericDINodeAbbrevID] = + createGenericDINodeAbbrev(); + + unsigned NameAbbrev = 0; + if (!M.named_metadata_empty()) { + // Abbrev for METADATA_NAME. + std::shared_ptr Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_NAME)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + NameAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + } + + SmallVector Record; + writeMetadataStrings(VE.getMDStrings(), Record); + + std::vector IndexPos; + IndexPos.reserve(VE.getNonMDStrings().size()); + writeMetadataRecords(VE.getNonMDStrings(), Record, &MDAbbrevs, &IndexPos); + + // Write named metadata. + for (const NamedMDNode &NMD : M.named_metadata()) { + // Write name. + StringRef Str = NMD.getName(); + Record.append(Str.bytes_begin(), Str.bytes_end()); + Stream.EmitRecord(bitc::METADATA_NAME, Record, NameAbbrev); + Record.clear(); + + // Write named metadata operands. + for (const MDNode *N : NMD.operands()) + Record.push_back(VE.getMetadataID(N)); + Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0); + Record.clear(); + } + + Stream.ExitBlock(); +} + +void DXILBitcodeWriter::writeFunctionMetadata(const Function &F) { + if (!VE.hasMDs()) + return; + + Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 4); + SmallVector Record; + writeMetadataStrings(VE.getMDStrings(), Record); + writeMetadataRecords(VE.getNonMDStrings(), Record); + Stream.ExitBlock(); +} + +void DXILBitcodeWriter::writeFunctionMetadataAttachment(const Function &F) { + Stream.EnterSubblock(bitc::METADATA_ATTACHMENT_ID, 3); + + SmallVector Record; + + // Write metadata attachments + // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]] + SmallVector, 4> MDs; + F.getAllMetadata(MDs); + if (!MDs.empty()) { + for (const auto &I : MDs) { + Record.push_back(I.first); + Record.push_back(VE.getMetadataID(I.second)); + } + Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0); + Record.clear(); + } + + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) { + MDs.clear(); + I.getAllMetadataOtherThanDebugLoc(MDs); + + // If no metadata, ignore instruction. + if (MDs.empty()) + continue; + + Record.push_back(VE.getInstructionID(&I)); + + for (unsigned i = 0, e = MDs.size(); i != e; ++i) { + Record.push_back(MDs[i].first); + Record.push_back(VE.getMetadataID(MDs[i].second)); + } + Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0); + Record.clear(); + } + + Stream.ExitBlock(); +} + +void DXILBitcodeWriter::writeModuleMetadataKinds() { + SmallVector Record; + + // Write metadata kinds + // METADATA_KIND - [n x [id, name]] + SmallVector Names; + M.getMDKindNames(Names); + + if (Names.empty()) + return; + + Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); + + for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) { + Record.push_back(MDKindID); + StringRef KName = Names[MDKindID]; + Record.append(KName.begin(), KName.end()); + + Stream.EmitRecord(bitc::METADATA_KIND, Record, 0); + Record.clear(); + } + + Stream.ExitBlock(); +} + +void DXILBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal, + bool isGlobal) { + if (FirstVal == LastVal) + return; + + Stream.EnterSubblock(bitc::CONSTANTS_BLOCK_ID, 4); + + unsigned AggregateAbbrev = 0; + unsigned String8Abbrev = 0; + unsigned CString7Abbrev = 0; + unsigned CString6Abbrev = 0; + // If this is a constant pool for the module, emit module-specific abbrevs. + if (isGlobal) { + // Abbrev for CST_CODE_AGGREGATE. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_AGGREGATE)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add( + BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(LastVal + 1))); + AggregateAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // Abbrev for CST_CODE_STRING. + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + String8Abbrev = Stream.EmitAbbrev(std::move(Abbv)); + // Abbrev for CST_CODE_CSTRING. + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); + CString7Abbrev = Stream.EmitAbbrev(std::move(Abbv)); + // Abbrev for CST_CODE_CSTRING. + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + CString6Abbrev = Stream.EmitAbbrev(std::move(Abbv)); + } + + SmallVector Record; + + const ValueEnumerator::ValueList &Vals = VE.getValues(); + Type *LastTy = nullptr; + for (unsigned i = FirstVal; i != LastVal; ++i) { + const Value *V = Vals[i].first; + // If we need to switch types, do so now. + if (V->getType() != LastTy) { + LastTy = V->getType(); + Record.push_back(getTypeID(LastTy)); + Stream.EmitRecord(bitc::CST_CODE_SETTYPE, Record, + CONSTANTS_SETTYPE_ABBREV); + Record.clear(); + } + + if (const InlineAsm *IA = dyn_cast(V)) { + Record.push_back(unsigned(IA->hasSideEffects()) | + unsigned(IA->isAlignStack()) << 1 | + unsigned(IA->getDialect() & 1) << 2); + + // Add the asm string. + const std::string &AsmStr = IA->getAsmString(); + Record.push_back(AsmStr.size()); + Record.append(AsmStr.begin(), AsmStr.end()); + + // Add the constraint string. + const std::string &ConstraintStr = IA->getConstraintString(); + Record.push_back(ConstraintStr.size()); + Record.append(ConstraintStr.begin(), ConstraintStr.end()); + Stream.EmitRecord(bitc::CST_CODE_INLINEASM, Record); + Record.clear(); + continue; + } + const Constant *C = cast(V); + unsigned Code = -1U; + unsigned AbbrevToUse = 0; + if (C->isNullValue()) { + Code = bitc::CST_CODE_NULL; + } else if (isa(C)) { + Code = bitc::CST_CODE_UNDEF; + } else if (const ConstantInt *IV = dyn_cast(C)) { + if (IV->getBitWidth() <= 64) { + uint64_t V = IV->getSExtValue(); + emitSignedInt64(Record, V); + Code = bitc::CST_CODE_INTEGER; + AbbrevToUse = CONSTANTS_INTEGER_ABBREV; + } else { // Wide integers, > 64 bits in size. + // We have an arbitrary precision integer value to write whose + // bit width is > 64. However, in canonical unsigned integer + // format it is likely that the high bits are going to be zero. + // So, we only write the number of active words. + unsigned NWords = IV->getValue().getActiveWords(); + const uint64_t *RawWords = IV->getValue().getRawData(); + for (unsigned i = 0; i != NWords; ++i) { + emitSignedInt64(Record, RawWords[i]); + } + Code = bitc::CST_CODE_WIDE_INTEGER; + } + } else if (const ConstantFP *CFP = dyn_cast(C)) { + Code = bitc::CST_CODE_FLOAT; + Type *Ty = CFP->getType(); + if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) { + Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); + } else if (Ty->isX86_FP80Ty()) { + // api needed to prevent premature destruction + // bits are not in the same order as a normal i80 APInt, compensate. + APInt api = CFP->getValueAPF().bitcastToAPInt(); + const uint64_t *p = api.getRawData(); + Record.push_back((p[1] << 48) | (p[0] >> 16)); + Record.push_back(p[0] & 0xffffLL); + } else if (Ty->isFP128Ty() || Ty->isPPC_FP128Ty()) { + APInt api = CFP->getValueAPF().bitcastToAPInt(); + const uint64_t *p = api.getRawData(); + Record.push_back(p[0]); + Record.push_back(p[1]); + } else { + assert(0 && "Unknown FP type!"); + } + } else if (isa(C) && + cast(C)->isString()) { + const ConstantDataSequential *Str = cast(C); + // Emit constant strings specially. + unsigned NumElts = Str->getNumElements(); + // If this is a null-terminated string, use the denser CSTRING encoding. + if (Str->isCString()) { + Code = bitc::CST_CODE_CSTRING; + --NumElts; // Don't encode the null, which isn't allowed by char6. + } else { + Code = bitc::CST_CODE_STRING; + AbbrevToUse = String8Abbrev; + } + bool isCStr7 = Code == bitc::CST_CODE_CSTRING; + bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING; + for (unsigned i = 0; i != NumElts; ++i) { + unsigned char V = Str->getElementAsInteger(i); + Record.push_back(V); + isCStr7 &= (V & 128) == 0; + if (isCStrChar6) + isCStrChar6 = BitCodeAbbrevOp::isChar6(V); + } + + if (isCStrChar6) + AbbrevToUse = CString6Abbrev; + else if (isCStr7) + AbbrevToUse = CString7Abbrev; + } else if (const ConstantDataSequential *CDS = + dyn_cast(C)) { + Code = bitc::CST_CODE_DATA; + Type *EltTy = CDS->getType()->getArrayElementType(); + if (isa(EltTy)) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) + Record.push_back(CDS->getElementAsInteger(i)); + } else if (EltTy->isFloatTy()) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + union { + float F; + uint32_t I; + }; + F = CDS->getElementAsFloat(i); + Record.push_back(I); + } + } else { + assert(EltTy->isDoubleTy() && "Unknown ConstantData element type"); + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + union { + double F; + uint64_t I; + }; + F = CDS->getElementAsDouble(i); + Record.push_back(I); + } + } + } else if (isa(C) || isa(C) || + isa(C)) { + Code = bitc::CST_CODE_AGGREGATE; + for (const Value *Op : C->operands()) + Record.push_back(VE.getValueID(Op)); + AbbrevToUse = AggregateAbbrev; + } else if (const ConstantExpr *CE = dyn_cast(C)) { + switch (CE->getOpcode()) { + default: + if (Instruction::isCast(CE->getOpcode())) { + Code = bitc::CST_CODE_CE_CAST; + Record.push_back(getEncodedCastOpcode(CE->getOpcode())); + Record.push_back(getTypeID(C->getOperand(0)->getType())); + Record.push_back(VE.getValueID(C->getOperand(0))); + AbbrevToUse = CONSTANTS_CE_CAST_Abbrev; + } else { + assert(CE->getNumOperands() == 2 && "Unknown constant expr!"); + Code = bitc::CST_CODE_CE_BINOP; + Record.push_back(getEncodedBinaryOpcode(CE->getOpcode())); + Record.push_back(VE.getValueID(C->getOperand(0))); + Record.push_back(VE.getValueID(C->getOperand(1))); + uint64_t Flags = getOptimizationFlags(CE); + if (Flags != 0) + Record.push_back(Flags); + } + break; + case Instruction::GetElementPtr: { + Code = bitc::CST_CODE_CE_GEP; + const auto *GO = cast(C); + if (GO->isInBounds()) + Code = bitc::CST_CODE_CE_INBOUNDS_GEP; + Record.push_back(getTypeID(GO->getSourceElementType())); + for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) { + Record.push_back(getTypeID(C->getOperand(i)->getType())); + Record.push_back(VE.getValueID(C->getOperand(i))); + } + break; + } + case Instruction::Select: + Code = bitc::CST_CODE_CE_SELECT; + Record.push_back(VE.getValueID(C->getOperand(0))); + Record.push_back(VE.getValueID(C->getOperand(1))); + Record.push_back(VE.getValueID(C->getOperand(2))); + break; + case Instruction::ExtractElement: + Code = bitc::CST_CODE_CE_EXTRACTELT; + Record.push_back(getTypeID(C->getOperand(0)->getType())); + Record.push_back(VE.getValueID(C->getOperand(0))); + Record.push_back(getTypeID(C->getOperand(1)->getType())); + Record.push_back(VE.getValueID(C->getOperand(1))); + break; + case Instruction::InsertElement: + Code = bitc::CST_CODE_CE_INSERTELT; + Record.push_back(VE.getValueID(C->getOperand(0))); + Record.push_back(VE.getValueID(C->getOperand(1))); + Record.push_back(getTypeID(C->getOperand(2)->getType())); + Record.push_back(VE.getValueID(C->getOperand(2))); + break; + case Instruction::ShuffleVector: + // If the return type and argument types are the same, this is a + // standard shufflevector instruction. If the types are different, + // then the shuffle is widening or truncating the input vectors, and + // the argument type must also be encoded. + if (C->getType() == C->getOperand(0)->getType()) { + Code = bitc::CST_CODE_CE_SHUFFLEVEC; + } else { + Code = bitc::CST_CODE_CE_SHUFVEC_EX; + Record.push_back(getTypeID(C->getOperand(0)->getType())); + } + Record.push_back(VE.getValueID(C->getOperand(0))); + Record.push_back(VE.getValueID(C->getOperand(1))); + Record.push_back(VE.getValueID(C->getOperand(2))); + break; + case Instruction::ICmp: + case Instruction::FCmp: + Code = bitc::CST_CODE_CE_CMP; + Record.push_back(getTypeID(C->getOperand(0)->getType())); + Record.push_back(VE.getValueID(C->getOperand(0))); + Record.push_back(VE.getValueID(C->getOperand(1))); + Record.push_back(CE->getPredicate()); + break; + } + } else if (const BlockAddress *BA = dyn_cast(C)) { + Code = bitc::CST_CODE_BLOCKADDRESS; + Record.push_back(getTypeID(BA->getFunction()->getType())); + Record.push_back(VE.getValueID(BA->getFunction())); + Record.push_back(VE.getGlobalBasicBlockID(BA->getBasicBlock())); + } else { +#ifndef NDEBUG + C->dump(); +#endif + llvm_unreachable("Unknown constant!"); + } + Stream.EmitRecord(Code, Record, AbbrevToUse); + Record.clear(); + } + + Stream.ExitBlock(); +} + +void DXILBitcodeWriter::writeModuleConstants() { + const ValueEnumerator::ValueList &Vals = VE.getValues(); + + // Find the first constant to emit, which is the first non-globalvalue value. + // We know globalvalues have been emitted by WriteModuleInfo. + for (unsigned i = 0, e = Vals.size(); i != e; ++i) { + if (!isa(Vals[i].first)) { + writeConstants(i, Vals.size(), true); + return; + } + } +} + +/// pushValueAndType - The file has to encode both the value and type id for +/// many values, because we need to know what type to create for forward +/// references. However, most operands are not forward references, so this type +/// field is not needed. +/// +/// This function adds V's value ID to Vals. If the value ID is higher than the +/// instruction ID, then it is a forward reference, and it also includes the +/// type ID. The value ID that is written is encoded relative to the InstID. +bool DXILBitcodeWriter::pushValueAndType(const Value *V, unsigned InstID, + SmallVectorImpl &Vals) { + unsigned ValID = VE.getValueID(V); + // Make encoding relative to the InstID. + Vals.push_back(InstID - ValID); + if (ValID >= InstID) { + Vals.push_back(getTypeID(V->getType(), V)); + return true; + } + return false; +} + +/// pushValue - Like pushValueAndType, but where the type of the value is +/// omitted (perhaps it was already encoded in an earlier operand). +void DXILBitcodeWriter::pushValue(const Value *V, unsigned InstID, + SmallVectorImpl &Vals) { + unsigned ValID = VE.getValueID(V); + Vals.push_back(InstID - ValID); +} + +void DXILBitcodeWriter::pushValueSigned(const Value *V, unsigned InstID, + SmallVectorImpl &Vals) { + unsigned ValID = VE.getValueID(V); + int64_t diff = ((int32_t)InstID - (int32_t)ValID); + emitSignedInt64(Vals, diff); +} + +/// WriteInstruction - Emit an instruction +void DXILBitcodeWriter::writeInstruction(const Instruction &I, unsigned InstID, + SmallVectorImpl &Vals) { + unsigned Code = 0; + unsigned AbbrevToUse = 0; + VE.setInstructionID(&I); + switch (I.getOpcode()) { + default: + if (Instruction::isCast(I.getOpcode())) { + Code = bitc::FUNC_CODE_INST_CAST; + if (!pushValueAndType(I.getOperand(0), InstID, Vals)) + AbbrevToUse = (unsigned)FUNCTION_INST_CAST_ABBREV; + Vals.push_back(getTypeID(I.getType(), &I)); + Vals.push_back(getEncodedCastOpcode(I.getOpcode())); + } else { + assert(isa(I) && "Unknown instruction!"); + Code = bitc::FUNC_CODE_INST_BINOP; + if (!pushValueAndType(I.getOperand(0), InstID, Vals)) + AbbrevToUse = (unsigned)FUNCTION_INST_BINOP_ABBREV; + pushValue(I.getOperand(1), InstID, Vals); + Vals.push_back(getEncodedBinaryOpcode(I.getOpcode())); + uint64_t Flags = getOptimizationFlags(&I); + if (Flags != 0) { + if (AbbrevToUse == (unsigned)FUNCTION_INST_BINOP_ABBREV) + AbbrevToUse = (unsigned)FUNCTION_INST_BINOP_FLAGS_ABBREV; + Vals.push_back(Flags); + } + } + break; + + case Instruction::GetElementPtr: { + Code = bitc::FUNC_CODE_INST_GEP; + AbbrevToUse = (unsigned)FUNCTION_INST_GEP_ABBREV; + auto &GEPInst = cast(I); + Vals.push_back(GEPInst.isInBounds()); + Vals.push_back(getTypeID(GEPInst.getSourceElementType())); + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) + pushValueAndType(I.getOperand(i), InstID, Vals); + break; + } + case Instruction::ExtractValue: { + Code = bitc::FUNC_CODE_INST_EXTRACTVAL; + pushValueAndType(I.getOperand(0), InstID, Vals); + const ExtractValueInst *EVI = cast(&I); + Vals.append(EVI->idx_begin(), EVI->idx_end()); + break; + } + case Instruction::InsertValue: { + Code = bitc::FUNC_CODE_INST_INSERTVAL; + pushValueAndType(I.getOperand(0), InstID, Vals); + pushValueAndType(I.getOperand(1), InstID, Vals); + const InsertValueInst *IVI = cast(&I); + Vals.append(IVI->idx_begin(), IVI->idx_end()); + break; + } + case Instruction::Select: + Code = bitc::FUNC_CODE_INST_VSELECT; + pushValueAndType(I.getOperand(1), InstID, Vals); + pushValue(I.getOperand(2), InstID, Vals); + pushValueAndType(I.getOperand(0), InstID, Vals); + break; + case Instruction::ExtractElement: + Code = bitc::FUNC_CODE_INST_EXTRACTELT; + pushValueAndType(I.getOperand(0), InstID, Vals); + pushValueAndType(I.getOperand(1), InstID, Vals); + break; + case Instruction::InsertElement: + Code = bitc::FUNC_CODE_INST_INSERTELT; + pushValueAndType(I.getOperand(0), InstID, Vals); + pushValue(I.getOperand(1), InstID, Vals); + pushValueAndType(I.getOperand(2), InstID, Vals); + break; + case Instruction::ShuffleVector: + Code = bitc::FUNC_CODE_INST_SHUFFLEVEC; + pushValueAndType(I.getOperand(0), InstID, Vals); + pushValue(I.getOperand(1), InstID, Vals); + pushValue(I.getOperand(2), InstID, Vals); + break; + case Instruction::ICmp: + case Instruction::FCmp: { + // compare returning Int1Ty or vector of Int1Ty + Code = bitc::FUNC_CODE_INST_CMP2; + pushValueAndType(I.getOperand(0), InstID, Vals); + pushValue(I.getOperand(1), InstID, Vals); + Vals.push_back(cast(I).getPredicate()); + uint64_t Flags = getOptimizationFlags(&I); + if (Flags != 0) + Vals.push_back(Flags); + break; + } + + case Instruction::Ret: { + Code = bitc::FUNC_CODE_INST_RET; + unsigned NumOperands = I.getNumOperands(); + if (NumOperands == 0) + AbbrevToUse = (unsigned)FUNCTION_INST_RET_VOID_ABBREV; + else if (NumOperands == 1) { + if (!pushValueAndType(I.getOperand(0), InstID, Vals)) + AbbrevToUse = (unsigned)FUNCTION_INST_RET_VAL_ABBREV; + } else { + for (unsigned i = 0, e = NumOperands; i != e; ++i) + pushValueAndType(I.getOperand(i), InstID, Vals); + } + } break; + case Instruction::Br: { + Code = bitc::FUNC_CODE_INST_BR; + const BranchInst &II = cast(I); + Vals.push_back(VE.getValueID(II.getSuccessor(0))); + if (II.isConditional()) { + Vals.push_back(VE.getValueID(II.getSuccessor(1))); + pushValue(II.getCondition(), InstID, Vals); + } + } break; + case Instruction::Switch: { + Code = bitc::FUNC_CODE_INST_SWITCH; + const SwitchInst &SI = cast(I); + Vals.push_back(getTypeID(SI.getCondition()->getType())); + pushValue(SI.getCondition(), InstID, Vals); + Vals.push_back(VE.getValueID(SI.getDefaultDest())); + for (auto Case : SI.cases()) { + Vals.push_back(VE.getValueID(Case.getCaseValue())); + Vals.push_back(VE.getValueID(Case.getCaseSuccessor())); + } + } break; + case Instruction::IndirectBr: + Code = bitc::FUNC_CODE_INST_INDIRECTBR; + Vals.push_back(getTypeID(I.getOperand(0)->getType())); + // Encode the address operand as relative, but not the basic blocks. + pushValue(I.getOperand(0), InstID, Vals); + for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) + Vals.push_back(VE.getValueID(I.getOperand(i))); + break; + + case Instruction::Invoke: { + const InvokeInst *II = cast(&I); + const Value *Callee = II->getCalledOperand(); + FunctionType *FTy = II->getFunctionType(); + Code = bitc::FUNC_CODE_INST_INVOKE; + + Vals.push_back(VE.getAttributeListID(II->getAttributes())); + Vals.push_back(II->getCallingConv() | 1 << 13); + Vals.push_back(VE.getValueID(II->getNormalDest())); + Vals.push_back(VE.getValueID(II->getUnwindDest())); + Vals.push_back(getTypeID(FTy)); + pushValueAndType(Callee, InstID, Vals); + + // Emit value #'s for the fixed parameters. + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + pushValue(I.getOperand(i), InstID, Vals); // fixed param. + + // Emit type/value pairs for varargs params. + if (FTy->isVarArg()) { + for (unsigned i = FTy->getNumParams(), e = I.getNumOperands() - 3; i != e; + ++i) + pushValueAndType(I.getOperand(i), InstID, Vals); // vararg + } + break; + } + case Instruction::Resume: + Code = bitc::FUNC_CODE_INST_RESUME; + pushValueAndType(I.getOperand(0), InstID, Vals); + break; + case Instruction::Unreachable: + Code = bitc::FUNC_CODE_INST_UNREACHABLE; + AbbrevToUse = (unsigned)FUNCTION_INST_UNREACHABLE_ABBREV; + break; + + case Instruction::PHI: { + const PHINode &PN = cast(I); + Code = bitc::FUNC_CODE_INST_PHI; + // With the newer instruction encoding, forward references could give + // negative valued IDs. This is most common for PHIs, so we use + // signed VBRs. + SmallVector Vals64; + Vals64.push_back(getTypeID(PN.getType())); + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + pushValueSigned(PN.getIncomingValue(i), InstID, Vals64); + Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i))); + } + // Emit a Vals64 vector and exit. + Stream.EmitRecord(Code, Vals64, AbbrevToUse); + Vals64.clear(); + return; + } + + case Instruction::LandingPad: { + const LandingPadInst &LP = cast(I); + Code = bitc::FUNC_CODE_INST_LANDINGPAD; + Vals.push_back(getTypeID(LP.getType())); + Vals.push_back(LP.isCleanup()); + Vals.push_back(LP.getNumClauses()); + for (unsigned I = 0, E = LP.getNumClauses(); I != E; ++I) { + if (LP.isCatch(I)) + Vals.push_back(LandingPadInst::Catch); + else + Vals.push_back(LandingPadInst::Filter); + pushValueAndType(LP.getClause(I), InstID, Vals); + } + break; + } + + case Instruction::Alloca: { + Code = bitc::FUNC_CODE_INST_ALLOCA; + const AllocaInst &AI = cast(I); + Vals.push_back(getTypeID(AI.getAllocatedType())); + Vals.push_back(getTypeID(I.getOperand(0)->getType())); + Vals.push_back(VE.getValueID(I.getOperand(0))); // size. + using APV = AllocaPackedValues; + unsigned Record = 0; + unsigned EncodedAlign = getEncodedAlign(AI.getAlign()); + Bitfield::set( + Record, EncodedAlign & ((1 << APV::AlignLower::Bits) - 1)); + Bitfield::set(Record, + EncodedAlign >> APV::AlignLower::Bits); + Bitfield::set(Record, AI.isUsedWithInAlloca()); + Vals.push_back(Record); + break; + } + + case Instruction::Load: + if (cast(I).isAtomic()) { + Code = bitc::FUNC_CODE_INST_LOADATOMIC; + pushValueAndType(I.getOperand(0), InstID, Vals); + } else { + Code = bitc::FUNC_CODE_INST_LOAD; + if (!pushValueAndType(I.getOperand(0), InstID, Vals)) // ptr + AbbrevToUse = (unsigned)FUNCTION_INST_LOAD_ABBREV; + } + Vals.push_back(getTypeID(I.getType())); + Vals.push_back(Log2(cast(I).getAlign()) + 1); + Vals.push_back(cast(I).isVolatile()); + if (cast(I).isAtomic()) { + Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); + Vals.push_back(getEncodedSyncScopeID(cast(I).getSyncScopeID())); + } + break; + case Instruction::Store: + if (cast(I).isAtomic()) + Code = bitc::FUNC_CODE_INST_STOREATOMIC; + else + Code = bitc::FUNC_CODE_INST_STORE; + pushValueAndType(I.getOperand(1), InstID, Vals); // ptrty + ptr + pushValueAndType(I.getOperand(0), InstID, Vals); // valty + val + Vals.push_back(Log2(cast(I).getAlign()) + 1); + Vals.push_back(cast(I).isVolatile()); + if (cast(I).isAtomic()) { + Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); + Vals.push_back( + getEncodedSyncScopeID(cast(I).getSyncScopeID())); + } + break; + case Instruction::AtomicCmpXchg: + Code = bitc::FUNC_CODE_INST_CMPXCHG; + pushValueAndType(I.getOperand(0), InstID, Vals); // ptrty + ptr + pushValueAndType(I.getOperand(1), InstID, Vals); // cmp. + pushValue(I.getOperand(2), InstID, Vals); // newval. + Vals.push_back(cast(I).isVolatile()); + Vals.push_back( + getEncodedOrdering(cast(I).getSuccessOrdering())); + Vals.push_back( + getEncodedSyncScopeID(cast(I).getSyncScopeID())); + Vals.push_back( + getEncodedOrdering(cast(I).getFailureOrdering())); + Vals.push_back(cast(I).isWeak()); + break; + case Instruction::AtomicRMW: + Code = bitc::FUNC_CODE_INST_ATOMICRMW; + pushValueAndType(I.getOperand(0), InstID, Vals); // ptrty + ptr + pushValue(I.getOperand(1), InstID, Vals); // val. + Vals.push_back( + getEncodedRMWOperation(cast(I).getOperation())); + Vals.push_back(cast(I).isVolatile()); + Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); + Vals.push_back( + getEncodedSyncScopeID(cast(I).getSyncScopeID())); + break; + case Instruction::Fence: + Code = bitc::FUNC_CODE_INST_FENCE; + Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); + Vals.push_back(getEncodedSyncScopeID(cast(I).getSyncScopeID())); + break; + case Instruction::Call: { + const CallInst &CI = cast(I); + FunctionType *FTy = CI.getFunctionType(); + + Code = bitc::FUNC_CODE_INST_CALL; + + Vals.push_back(VE.getAttributeListID(CI.getAttributes())); + Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()) | + unsigned(CI.isMustTailCall()) << 14 | 1 << 15); + Vals.push_back(getTypeID(FTy, CI.getCalledFunction())); + pushValueAndType(CI.getCalledOperand(), InstID, Vals); // Callee + + // Emit value #'s for the fixed parameters. + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) { + // Check for labels (can happen with asm labels). + if (FTy->getParamType(i)->isLabelTy()) + Vals.push_back(VE.getValueID(CI.getArgOperand(i))); + else + pushValue(CI.getArgOperand(i), InstID, Vals); // fixed param. + } + + // Emit type/value pairs for varargs params. + if (FTy->isVarArg()) { + for (unsigned i = FTy->getNumParams(), e = CI.arg_size(); i != e; ++i) + pushValueAndType(CI.getArgOperand(i), InstID, Vals); // varargs + } + break; + } + case Instruction::VAArg: + Code = bitc::FUNC_CODE_INST_VAARG; + Vals.push_back(getTypeID(I.getOperand(0)->getType())); // valistty + pushValue(I.getOperand(0), InstID, Vals); // valist. + Vals.push_back(getTypeID(I.getType())); // restype. + break; + } + + Stream.EmitRecord(Code, Vals, AbbrevToUse); + Vals.clear(); +} + +// Emit names for globals/functions etc. +void DXILBitcodeWriter::writeFunctionLevelValueSymbolTable( + const ValueSymbolTable &VST) { + if (VST.empty()) + return; + Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); + + SmallVector NameVals; + + // HLSL Change + // Read the named values from a sorted list instead of the original list + // to ensure the binary is the same no matter what values ever existed. + SmallVector SortedTable; + + for (auto &VI : VST) { + SortedTable.push_back(VI.second->getValueName()); + } + // The keys are unique, so there shouldn't be stability issues. + std::sort(SortedTable.begin(), SortedTable.end(), + [](const ValueName *A, const ValueName *B) { + return A->first() < B->first(); + }); + + for (const ValueName *SI : SortedTable) { + auto &Name = *SI; + + // Figure out the encoding to use for the name. + bool is7Bit = true; + bool isChar6 = true; + for (const char *C = Name.getKeyData(), *E = C + Name.getKeyLength(); + C != E; ++C) { + if (isChar6) + isChar6 = BitCodeAbbrevOp::isChar6(*C); + if ((unsigned char)*C & 128) { + is7Bit = false; + break; // don't bother scanning the rest. + } + } + + unsigned AbbrevToUse = VST_ENTRY_8_ABBREV; + + // VST_ENTRY: [valueid, namechar x N] + // VST_BBENTRY: [bbid, namechar x N] + unsigned Code; + if (isa(SI->getValue())) { + Code = bitc::VST_CODE_BBENTRY; + if (isChar6) + AbbrevToUse = VST_BBENTRY_6_ABBREV; + } else { + Code = bitc::VST_CODE_ENTRY; + if (isChar6) + AbbrevToUse = VST_ENTRY_6_ABBREV; + else if (is7Bit) + AbbrevToUse = VST_ENTRY_7_ABBREV; + } + + NameVals.push_back(VE.getValueID(SI->getValue())); + for (const char *P = Name.getKeyData(), + *E = Name.getKeyData() + Name.getKeyLength(); + P != E; ++P) + NameVals.push_back((unsigned char)*P); + + // Emit the finished record. + Stream.EmitRecord(Code, NameVals, AbbrevToUse); + NameVals.clear(); + } + Stream.ExitBlock(); +} + +void DXILBitcodeWriter::writeUseList(UseListOrder &&Order) { + assert(Order.Shuffle.size() >= 2 && "Shuffle too small"); + unsigned Code; + if (isa(Order.V)) + Code = bitc::USELIST_CODE_BB; + else + Code = bitc::USELIST_CODE_DEFAULT; + + SmallVector Record(Order.Shuffle.begin(), Order.Shuffle.end()); + Record.push_back(VE.getValueID(Order.V)); + Stream.EmitRecord(Code, Record); +} + +void DXILBitcodeWriter::writeUseListBlock(const Function *F) { + auto hasMore = [&]() { + return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F; + }; + if (!hasMore()) + // Nothing to do. + return; + + Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3); + while (hasMore()) { + writeUseList(std::move(VE.UseListOrders.back())); + VE.UseListOrders.pop_back(); + } + Stream.ExitBlock(); +} + +/// Emit a function body to the module stream. +void DXILBitcodeWriter::writeFunction(const Function &F) { + Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4); + VE.incorporateFunction(F); + + SmallVector Vals; + + // Emit the number of basic blocks, so the reader can create them ahead of + // time. + Vals.push_back(VE.getBasicBlocks().size()); + Stream.EmitRecord(bitc::FUNC_CODE_DECLAREBLOCKS, Vals); + Vals.clear(); + + // If there are function-local constants, emit them now. + unsigned CstStart, CstEnd; + VE.getFunctionConstantRange(CstStart, CstEnd); + writeConstants(CstStart, CstEnd, false); + + // If there is function-local metadata, emit it now. + writeFunctionMetadata(F); + + // Keep a running idea of what the instruction ID is. + unsigned InstID = CstEnd; + + bool NeedsMetadataAttachment = F.hasMetadata(); + + DILocation *LastDL = nullptr; + + // Finally, emit all the instructions, in order. + for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; + ++I) { + writeInstruction(*I, InstID, Vals); + + if (!I->getType()->isVoidTy()) + ++InstID; + + // If the instruction has metadata, write a metadata attachment later. + NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc(); + + // If the instruction has a debug location, emit it. + DILocation *DL = I->getDebugLoc(); + if (!DL) + continue; + + if (DL == LastDL) { + // Just repeat the same debug loc as last time. + Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals); + continue; + } + + Vals.push_back(DL->getLine()); + Vals.push_back(DL->getColumn()); + Vals.push_back(VE.getMetadataOrNullID(DL->getScope())); + Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt())); + Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals); + Vals.clear(); + + LastDL = DL; + } + + // Emit names for all the instructions etc. + if (auto *Symtab = F.getValueSymbolTable()) + writeFunctionLevelValueSymbolTable(*Symtab); + + if (NeedsMetadataAttachment) + writeFunctionMetadataAttachment(F); + + writeUseListBlock(&F); + VE.purgeFunction(); + Stream.ExitBlock(); +} + +// Emit blockinfo, which defines the standard abbreviations etc. +void DXILBitcodeWriter::writeBlockInfo() { + // We only want to emit block info records for blocks that have multiple + // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK. + // Other blocks can define their abbrevs inline. + Stream.EnterBlockInfoBlock(); + + { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, + std::move(Abbv)) != VST_ENTRY_8_ABBREV) + assert(false && "Unexpected abbrev ordering!"); + } + + { // 7-bit fixed width VST_ENTRY strings. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); + if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, + std::move(Abbv)) != VST_ENTRY_7_ABBREV) + assert(false && "Unexpected abbrev ordering!"); + } + { // 6-bit char6 VST_ENTRY strings. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, + std::move(Abbv)) != VST_ENTRY_6_ABBREV) + assert(false && "Unexpected abbrev ordering!"); + } + { // 6-bit char6 VST_BBENTRY strings. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_BBENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, + std::move(Abbv)) != VST_BBENTRY_6_ABBREV) + assert(false && "Unexpected abbrev ordering!"); + } + + { // SETTYPE abbrev for CONSTANTS_BLOCK. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + VE.computeBitsRequiredForTypeIndicies())); + if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, std::move(Abbv)) != + CONSTANTS_SETTYPE_ABBREV) + assert(false && "Unexpected abbrev ordering!"); + } + + { // INTEGER abbrev for CONSTANTS_BLOCK. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, std::move(Abbv)) != + CONSTANTS_INTEGER_ABBREV) + assert(false && "Unexpected abbrev ordering!"); + } + + { // CE_CAST abbrev for CONSTANTS_BLOCK. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // cast opc + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // typeid + VE.computeBitsRequiredForTypeIndicies())); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id + + if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, std::move(Abbv)) != + CONSTANTS_CE_CAST_Abbrev) + assert(false && "Unexpected abbrev ordering!"); + } + { // NULL abbrev for CONSTANTS_BLOCK. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL)); + if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, std::move(Abbv)) != + CONSTANTS_NULL_Abbrev) + assert(false && "Unexpected abbrev ordering!"); + } + + // FIXME: This should only use space for first class types! + + { // INST_LOAD abbrev for FUNCTION_BLOCK. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty + VE.computeBitsRequiredForTypeIndicies())); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Align + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) != + (unsigned)FUNCTION_INST_LOAD_ABBREV) + assert(false && "Unexpected abbrev ordering!"); + } + { // INST_BINOP abbrev for FUNCTION_BLOCK. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) != + (unsigned)FUNCTION_INST_BINOP_ABBREV) + assert(false && "Unexpected abbrev ordering!"); + } + { // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // flags + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) != + (unsigned)FUNCTION_INST_BINOP_FLAGS_ABBREV) + assert(false && "Unexpected abbrev ordering!"); + } + { // INST_CAST abbrev for FUNCTION_BLOCK. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpVal + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty + VE.computeBitsRequiredForTypeIndicies())); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) != + (unsigned)FUNCTION_INST_CAST_ABBREV) + assert(false && "Unexpected abbrev ordering!"); + } + + { // INST_RET abbrev for FUNCTION_BLOCK. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET)); + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) != + (unsigned)FUNCTION_INST_RET_VOID_ABBREV) + assert(false && "Unexpected abbrev ordering!"); + } + { // INST_RET abbrev for FUNCTION_BLOCK. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) != + (unsigned)FUNCTION_INST_RET_VAL_ABBREV) + assert(false && "Unexpected abbrev ordering!"); + } + { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE)); + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) != + (unsigned)FUNCTION_INST_UNREACHABLE_ABBREV) + assert(false && "Unexpected abbrev ordering!"); + } + { + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_GEP)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty + Log2_32_Ceil(VE.getTypes().size() + 1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) != + (unsigned)FUNCTION_INST_GEP_ABBREV) + assert(false && "Unexpected abbrev ordering!"); + } + + Stream.ExitBlock(); +} + +void DXILBitcodeWriter::writeModuleVersion() { + // VERSION: [version#] + Stream.EmitRecord(bitc::MODULE_CODE_VERSION, ArrayRef{1}); +} + +/// WriteModule - Emit the specified module to the bitstream. +void DXILBitcodeWriter::write() { + // The identification block is new since llvm-3.7, but the old bitcode reader + // will skip it. + // writeIdentificationBlock(Stream); + + Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); + + // It is redundant to fully-specify this here, but nice to make it explicit + // so that it is clear the DXIL module version is different. + DXILBitcodeWriter::writeModuleVersion(); + + // Emit blockinfo, which defines the standard abbreviations etc. + writeBlockInfo(); + + // Emit information about attribute groups. + writeAttributeGroupTable(); + + // Emit information about parameter attributes. + writeAttributeTable(); + + // Emit information describing all of the types in the module. + writeTypeTable(); + + writeComdats(); + + // Emit top-level description of module, including target triple, inline asm, + // descriptors for global variables, and function prototype info. + writeModuleInfo(); + + // Emit constants. + writeModuleConstants(); + + // Emit metadata. + writeModuleMetadataKinds(); + + // Emit metadata. + writeModuleMetadata(); + + // Emit names for globals/functions etc. + // DXIL uses the same format for module-level value symbol table as for the + // function level table. + writeFunctionLevelValueSymbolTable(M.getValueSymbolTable()); + + // Emit module-level use-lists. + writeUseListBlock(nullptr); + + // Emit function bodies. + for (const Function &F : M) + if (!F.isDeclaration()) + writeFunction(F); + + Stream.ExitBlock(); +} diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h new file mode 100644 index 000000000000..289f692f0f82 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h @@ -0,0 +1,82 @@ +//===- Bitcode/Writer/DXILBitcodeWriter.cpp - DXIL Bitcode Writer ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Bitcode writer implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/MemoryBufferRef.h" +#include +#include +#include +#include + +namespace llvm { + +class BitstreamWriter; +class Module; +class raw_ostream; + +namespace dxil { + +class BitcodeWriter { + SmallVectorImpl &Buffer; + std::unique_ptr Stream; + + StringTableBuilder StrtabBuilder{StringTableBuilder::RAW}; + + // Owns any strings created by the irsymtab writer until we create the + // string table. + BumpPtrAllocator Alloc; + + bool WroteStrtab = false, WroteSymtab = false; + + void writeBlob(unsigned Block, unsigned Record, StringRef Blob); + + std::vector Mods; + +public: + /// Create a BitcodeWriter that writes to Buffer. + BitcodeWriter(SmallVectorImpl &Buffer, raw_fd_stream *FS = nullptr); + + ~BitcodeWriter(); + + /// Attempt to write a symbol table to the bitcode file. This must be called + /// at most once after all modules have been written. + /// + /// A reader does not require a symbol table to interpret a bitcode file; + /// the symbol table is needed only to improve link-time performance. So + /// this function may decide not to write a symbol table. It may so decide + /// if, for example, the target is unregistered or the IR is malformed. + void writeSymtab(); + + /// Write the bitcode file's string table. This must be called exactly once + /// after all modules and the optional symbol table have been written. + void writeStrtab(); + + /// Copy the string table for another module into this bitcode file. This + /// should be called after copying the module itself into the bitcode file. + void copyStrtab(StringRef Strtab); + + /// Write the specified module to the buffer specified at construction time. + void writeModule(const Module &M); +}; + +/// Write the specified module to the specified raw output stream. +/// +/// For streams where it matters, the given stream should be in "binary" +/// mode. +void WriteDXILToFile(const Module &M, raw_ostream &Out); + +} // namespace dxil + +} // namespace llvm diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp new file mode 100644 index 000000000000..08944ee3f1fe --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp @@ -0,0 +1,1147 @@ +//===- ValueEnumerator.cpp - Number values and types for bitcode writer ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the ValueEnumerator class. +// Forked from lib/Bitcode/Writer +// +//===----------------------------------------------------------------------===// + +#include "DXILValueEnumerator.h" +#include "DXILPointerType.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalIFunc.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::dxil; + +namespace { + +struct OrderMap { + DenseMap> IDs; + unsigned LastGlobalConstantID = 0; + unsigned LastGlobalValueID = 0; + + OrderMap() = default; + + bool isGlobalConstant(unsigned ID) const { + return ID <= LastGlobalConstantID; + } + + bool isGlobalValue(unsigned ID) const { + return ID <= LastGlobalValueID && !isGlobalConstant(ID); + } + + unsigned size() const { return IDs.size(); } + std::pair &operator[](const Value *V) { return IDs[V]; } + + std::pair lookup(const Value *V) const { + return IDs.lookup(V); + } + + void index(const Value *V) { + // Explicitly sequence get-size and insert-value operations to avoid UB. + unsigned ID = IDs.size() + 1; + IDs[V].first = ID; + } +}; + +} // end anonymous namespace + +static void orderValue(const Value *V, OrderMap &OM) { + if (OM.lookup(V).first) + return; + + if (const Constant *C = dyn_cast(V)) { + if (C->getNumOperands() && !isa(C)) { + for (const Value *Op : C->operands()) + if (!isa(Op) && !isa(Op)) + orderValue(Op, OM); + if (auto *CE = dyn_cast(C)) + if (CE->getOpcode() == Instruction::ShuffleVector) + orderValue(CE->getShuffleMaskForBitcode(), OM); + } + } + + // Note: we cannot cache this lookup above, since inserting into the map + // changes the map's size, and thus affects the other IDs. + OM.index(V); +} + +static OrderMap orderModule(const Module &M) { + // This needs to match the order used by ValueEnumerator::ValueEnumerator() + // and ValueEnumerator::incorporateFunction(). + OrderMap OM; + + // In the reader, initializers of GlobalValues are set *after* all the + // globals have been read. Rather than awkwardly modeling this behaviour + // directly in predictValueUseListOrderImpl(), just assign IDs to + // initializers of GlobalValues before GlobalValues themselves to model this + // implicitly. + for (const GlobalVariable &G : M.globals()) + if (G.hasInitializer()) + if (!isa(G.getInitializer())) + orderValue(G.getInitializer(), OM); + for (const GlobalAlias &A : M.aliases()) + if (!isa(A.getAliasee())) + orderValue(A.getAliasee(), OM); + for (const GlobalIFunc &I : M.ifuncs()) + if (!isa(I.getResolver())) + orderValue(I.getResolver(), OM); + for (const Function &F : M) { + for (const Use &U : F.operands()) + if (!isa(U.get())) + orderValue(U.get(), OM); + } + + // As constants used in metadata operands are emitted as module-level + // constants, we must order them before other operands. Also, we must order + // these before global values, as these will be read before setting the + // global values' initializers. The latter matters for constants which have + // uses towards other constants that are used as initializers. + auto orderConstantValue = [&OM](const Value *V) { + if ((isa(V) && !isa(V)) || isa(V)) + orderValue(V, OM); + }; + for (const Function &F : M) { + if (F.isDeclaration()) + continue; + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + for (const Value *V : I.operands()) { + if (const auto *MAV = dyn_cast(V)) { + if (const auto *VAM = + dyn_cast(MAV->getMetadata())) { + orderConstantValue(VAM->getValue()); + } else if (const auto *AL = + dyn_cast(MAV->getMetadata())) { + for (const auto *VAM : AL->getArgs()) + orderConstantValue(VAM->getValue()); + } + } + } + } + OM.LastGlobalConstantID = OM.size(); + + // Initializers of GlobalValues are processed in + // BitcodeReader::ResolveGlobalAndAliasInits(). Match the order there rather + // than ValueEnumerator, and match the code in predictValueUseListOrderImpl() + // by giving IDs in reverse order. + // + // Since GlobalValues never reference each other directly (just through + // initializers), their relative IDs only matter for determining order of + // uses in their initializers. + for (const Function &F : M) + orderValue(&F, OM); + for (const GlobalAlias &A : M.aliases()) + orderValue(&A, OM); + for (const GlobalIFunc &I : M.ifuncs()) + orderValue(&I, OM); + for (const GlobalVariable &G : M.globals()) + orderValue(&G, OM); + OM.LastGlobalValueID = OM.size(); + + for (const Function &F : M) { + if (F.isDeclaration()) + continue; + // Here we need to match the union of ValueEnumerator::incorporateFunction() + // and WriteFunction(). Basic blocks are implicitly declared before + // anything else (by declaring their size). + for (const BasicBlock &BB : F) + orderValue(&BB, OM); + for (const Argument &A : F.args()) + orderValue(&A, OM); + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) { + for (const Value *Op : I.operands()) + if ((isa(*Op) && !isa(*Op)) || + isa(*Op)) + orderValue(Op, OM); + if (auto *SVI = dyn_cast(&I)) + orderValue(SVI->getShuffleMaskForBitcode(), OM); + } + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + orderValue(&I, OM); + } + return OM; +} + +static void predictValueUseListOrderImpl(const Value *V, const Function *F, + unsigned ID, const OrderMap &OM, + UseListOrderStack &Stack) { + // Predict use-list order for this one. + using Entry = std::pair; + SmallVector List; + for (const Use &U : V->uses()) + // Check if this user will be serialized. + if (OM.lookup(U.getUser()).first) + List.push_back(std::make_pair(&U, List.size())); + + if (List.size() < 2) + // We may have lost some users. + return; + + bool IsGlobalValue = OM.isGlobalValue(ID); + llvm::sort(List, [&](const Entry &L, const Entry &R) { + const Use *LU = L.first; + const Use *RU = R.first; + if (LU == RU) + return false; + + auto LID = OM.lookup(LU->getUser()).first; + auto RID = OM.lookup(RU->getUser()).first; + + // Global values are processed in reverse order. + // + // Moreover, initializers of GlobalValues are set *after* all the globals + // have been read (despite having earlier IDs). Rather than awkwardly + // modeling this behaviour here, orderModule() has assigned IDs to + // initializers of GlobalValues before GlobalValues themselves. + if (OM.isGlobalValue(LID) && OM.isGlobalValue(RID)) { + if (LID == RID) + return LU->getOperandNo() > RU->getOperandNo(); + return LID < RID; + } + + // If ID is 4, then expect: 7 6 5 1 2 3. + if (LID < RID) { + if (RID <= ID) + if (!IsGlobalValue) // GlobalValue uses don't get reversed. + return true; + return false; + } + if (RID < LID) { + if (LID <= ID) + if (!IsGlobalValue) // GlobalValue uses don't get reversed. + return false; + return true; + } + + // LID and RID are equal, so we have different operands of the same user. + // Assume operands are added in order for all instructions. + if (LID <= ID) + if (!IsGlobalValue) // GlobalValue uses don't get reversed. + return LU->getOperandNo() < RU->getOperandNo(); + return LU->getOperandNo() > RU->getOperandNo(); + }); + + if (llvm::is_sorted(List, [](const Entry &L, const Entry &R) { + return L.second < R.second; + })) + // Order is already correct. + return; + + // Store the shuffle. + Stack.emplace_back(V, F, List.size()); + assert(List.size() == Stack.back().Shuffle.size() && "Wrong size"); + for (size_t I = 0, E = List.size(); I != E; ++I) + Stack.back().Shuffle[I] = List[I].second; +} + +static void predictValueUseListOrder(const Value *V, const Function *F, + OrderMap &OM, UseListOrderStack &Stack) { + auto &IDPair = OM[V]; + assert(IDPair.first && "Unmapped value"); + if (IDPair.second) + // Already predicted. + return; + + // Do the actual prediction. + IDPair.second = true; + if (!V->use_empty() && std::next(V->use_begin()) != V->use_end()) + predictValueUseListOrderImpl(V, F, IDPair.first, OM, Stack); + + // Recursive descent into constants. + if (const Constant *C = dyn_cast(V)) { + if (C->getNumOperands()) { // Visit GlobalValues. + for (const Value *Op : C->operands()) + if (isa(Op)) // Visit GlobalValues. + predictValueUseListOrder(Op, F, OM, Stack); + if (auto *CE = dyn_cast(C)) + if (CE->getOpcode() == Instruction::ShuffleVector) + predictValueUseListOrder(CE->getShuffleMaskForBitcode(), F, OM, + Stack); + } + } +} + +static UseListOrderStack predictUseListOrder(const Module &M) { + OrderMap OM = orderModule(M); + + // Use-list orders need to be serialized after all the users have been added + // to a value, or else the shuffles will be incomplete. Store them per + // function in a stack. + // + // Aside from function order, the order of values doesn't matter much here. + UseListOrderStack Stack; + + // We want to visit the functions backward now so we can list function-local + // constants in the last Function they're used in. Module-level constants + // have already been visited above. + for (const Function &F : llvm::reverse(M)) { + if (F.isDeclaration()) + continue; + for (const BasicBlock &BB : F) + predictValueUseListOrder(&BB, &F, OM, Stack); + for (const Argument &A : F.args()) + predictValueUseListOrder(&A, &F, OM, Stack); + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) { + for (const Value *Op : I.operands()) + if (isa(*Op) || isa(*Op)) // Visit GlobalValues. + predictValueUseListOrder(Op, &F, OM, Stack); + if (auto *SVI = dyn_cast(&I)) + predictValueUseListOrder(SVI->getShuffleMaskForBitcode(), &F, OM, + Stack); + } + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + predictValueUseListOrder(&I, &F, OM, Stack); + } + + // Visit globals last, since the module-level use-list block will be seen + // before the function bodies are processed. + for (const GlobalVariable &G : M.globals()) + predictValueUseListOrder(&G, nullptr, OM, Stack); + for (const Function &F : M) + predictValueUseListOrder(&F, nullptr, OM, Stack); + for (const GlobalAlias &A : M.aliases()) + predictValueUseListOrder(&A, nullptr, OM, Stack); + for (const GlobalIFunc &I : M.ifuncs()) + predictValueUseListOrder(&I, nullptr, OM, Stack); + for (const GlobalVariable &G : M.globals()) + if (G.hasInitializer()) + predictValueUseListOrder(G.getInitializer(), nullptr, OM, Stack); + for (const GlobalAlias &A : M.aliases()) + predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack); + for (const GlobalIFunc &I : M.ifuncs()) + predictValueUseListOrder(I.getResolver(), nullptr, OM, Stack); + for (const Function &F : M) { + for (const Use &U : F.operands()) + predictValueUseListOrder(U.get(), nullptr, OM, Stack); + } + + return Stack; +} + +ValueEnumerator::ValueEnumerator(const Module &M, Type *PrefixType) { + EnumerateType(PrefixType); + + UseListOrders = predictUseListOrder(M); + + // Enumerate the global variables. + for (const GlobalVariable &GV : M.globals()) { + EnumerateValue(&GV); + EnumerateType(GV.getValueType()); + } + + // Enumerate the functions. + for (const Function &F : M) { + EnumerateValue(&F); + EnumerateType(F.getValueType()); + EnumerateType( + dxil::TypedPointerType::get(F.getFunctionType(), F.getAddressSpace())); + EnumerateAttributes(F.getAttributes()); + } + + // Enumerate the aliases. + for (const GlobalAlias &GA : M.aliases()) { + EnumerateValue(&GA); + EnumerateType(GA.getValueType()); + } + + // Enumerate the ifuncs. + for (const GlobalIFunc &GIF : M.ifuncs()) { + EnumerateValue(&GIF); + EnumerateType(GIF.getValueType()); + } + + // Enumerate the global variable initializers and attributes. + for (const GlobalVariable &GV : M.globals()) { + if (GV.hasInitializer()) + EnumerateValue(GV.getInitializer()); + EnumerateType( + dxil::TypedPointerType::get(GV.getValueType(), GV.getAddressSpace())); + if (GV.hasAttributes()) + EnumerateAttributes(GV.getAttributesAsList(AttributeList::FunctionIndex)); + } + + // Enumerate the aliasees. + for (const GlobalAlias &GA : M.aliases()) + EnumerateValue(GA.getAliasee()); + + // Enumerate the ifunc resolvers. + for (const GlobalIFunc &GIF : M.ifuncs()) + EnumerateValue(GIF.getResolver()); + + // Enumerate any optional Function data. + for (const Function &F : M) + for (const Use &U : F.operands()) + EnumerateValue(U.get()); + + // Enumerate the metadata type. + // + // TODO: Move this to ValueEnumerator::EnumerateOperandType() once bitcode + // only encodes the metadata type when it's used as a value. + EnumerateType(Type::getMetadataTy(M.getContext())); + + // Insert constants and metadata that are named at module level into the slot + // pool so that the module symbol table can refer to them... + EnumerateValueSymbolTable(M.getValueSymbolTable()); + EnumerateNamedMetadata(M); + + SmallVector, 8> MDs; + for (const GlobalVariable &GV : M.globals()) { + MDs.clear(); + GV.getAllMetadata(MDs); + for (const auto &I : MDs) + // FIXME: Pass GV to EnumerateMetadata and arrange for the bitcode writer + // to write metadata to the global variable's own metadata block + // (PR28134). + EnumerateMetadata(nullptr, I.second); + } + + // Enumerate types used by function bodies and argument lists. + for (const Function &F : M) { + for (const Argument &A : F.args()) + EnumerateType(A.getType()); + + // Enumerate metadata attached to this function. + MDs.clear(); + F.getAllMetadata(MDs); + for (const auto &I : MDs) + EnumerateMetadata(F.isDeclaration() ? nullptr : &F, I.second); + + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) { + for (const Use &Op : I.operands()) { + auto *MD = dyn_cast(&Op); + if (!MD) { + EnumerateOperandType(Op); + continue; + } + + // Local metadata is enumerated during function-incorporation, but + // any ConstantAsMetadata arguments in a DIArgList should be examined + // now. + if (isa(MD->getMetadata())) + continue; + if (auto *AL = dyn_cast(MD->getMetadata())) { + for (auto *VAM : AL->getArgs()) + if (isa(VAM)) + EnumerateMetadata(&F, VAM); + continue; + } + + EnumerateMetadata(&F, MD->getMetadata()); + } + if (auto *SVI = dyn_cast(&I)) + EnumerateType(SVI->getShuffleMaskForBitcode()->getType()); + if (auto *GEP = dyn_cast(&I)) + EnumerateType(GEP->getSourceElementType()); + if (auto *AI = dyn_cast(&I)) + EnumerateType(AI->getAllocatedType()); + EnumerateType(I.getType()); + if (const auto *Call = dyn_cast(&I)) { + EnumerateAttributes(Call->getAttributes()); + EnumerateType(Call->getFunctionType()); + } + + // Enumerate metadata attached with this instruction. + MDs.clear(); + I.getAllMetadataOtherThanDebugLoc(MDs); + for (unsigned i = 0, e = MDs.size(); i != e; ++i) + EnumerateMetadata(&F, MDs[i].second); + + // Don't enumerate the location directly -- it has a special record + // type -- but enumerate its operands. + if (DILocation *L = I.getDebugLoc()) + for (const Metadata *Op : L->operands()) + EnumerateMetadata(&F, Op); + } + } + + // Organize metadata ordering. + organizeMetadata(); +} + +unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { + InstructionMapType::const_iterator I = InstructionMap.find(Inst); + assert(I != InstructionMap.end() && "Instruction is not mapped!"); + return I->second; +} + +unsigned ValueEnumerator::getComdatID(const Comdat *C) const { + unsigned ComdatID = Comdats.idFor(C); + assert(ComdatID && "Comdat not found!"); + return ComdatID; +} + +void ValueEnumerator::setInstructionID(const Instruction *I) { + InstructionMap[I] = InstructionCount++; +} + +unsigned ValueEnumerator::getValueID(const Value *V) const { + if (auto *MD = dyn_cast(V)) + return getMetadataID(MD->getMetadata()); + + ValueMapType::const_iterator I = ValueMap.find(V); + assert(I != ValueMap.end() && "Value not in slotcalculator!"); + return I->second - 1; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void ValueEnumerator::dump() const { + print(dbgs(), ValueMap, "Default"); + dbgs() << '\n'; + print(dbgs(), MetadataMap, "MetaData"); + dbgs() << '\n'; +} +#endif + +void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map, + const char *Name) const { + OS << "Map Name: " << Name << "\n"; + OS << "Size: " << Map.size() << "\n"; + for (const auto &I : Map) { + const Value *V = I.first; + if (V->hasName()) + OS << "Value: " << V->getName(); + else + OS << "Value: [null]\n"; + V->print(errs()); + errs() << '\n'; + + OS << " Uses(" << V->getNumUses() << "):"; + for (const Use &U : V->uses()) { + if (&U != &*V->use_begin()) + OS << ","; + if (U->hasName()) + OS << " " << U->getName(); + else + OS << " [null]"; + } + OS << "\n\n"; + } +} + +void ValueEnumerator::print(raw_ostream &OS, const MetadataMapType &Map, + const char *Name) const { + OS << "Map Name: " << Name << "\n"; + OS << "Size: " << Map.size() << "\n"; + for (const auto &I : Map) { + const Metadata *MD = I.first; + OS << "Metadata: slot = " << I.second.ID << "\n"; + OS << "Metadata: function = " << I.second.F << "\n"; + MD->print(OS); + OS << "\n"; + } +} + +/// EnumerateValueSymbolTable - Insert all of the values in the specified symbol +/// table into the values table. +void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { + for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end(); + VI != VE; ++VI) + EnumerateValue(VI->getValue()); +} + +/// Insert all of the values referenced by named metadata in the specified +/// module. +void ValueEnumerator::EnumerateNamedMetadata(const Module &M) { + for (const auto &I : M.named_metadata()) + EnumerateNamedMDNode(&I); +} + +void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) { + for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) + EnumerateMetadata(nullptr, MD->getOperand(i)); +} + +unsigned ValueEnumerator::getMetadataFunctionID(const Function *F) const { + return F ? getValueID(F) + 1 : 0; +} + +void ValueEnumerator::EnumerateMetadata(const Function *F, const Metadata *MD) { + EnumerateMetadata(getMetadataFunctionID(F), MD); +} + +void ValueEnumerator::EnumerateFunctionLocalMetadata( + const Function &F, const LocalAsMetadata *Local) { + EnumerateFunctionLocalMetadata(getMetadataFunctionID(&F), Local); +} + +void ValueEnumerator::EnumerateFunctionLocalListMetadata( + const Function &F, const DIArgList *ArgList) { + EnumerateFunctionLocalListMetadata(getMetadataFunctionID(&F), ArgList); +} + +void ValueEnumerator::dropFunctionFromMetadata( + MetadataMapType::value_type &FirstMD) { + SmallVector Worklist; + auto push = [&Worklist](MetadataMapType::value_type &MD) { + auto &Entry = MD.second; + + // Nothing to do if this metadata isn't tagged. + if (!Entry.F) + return; + + // Drop the function tag. + Entry.F = 0; + + // If this is has an ID and is an MDNode, then its operands have entries as + // well. We need to drop the function from them too. + if (Entry.ID) + if (auto *N = dyn_cast(MD.first)) + Worklist.push_back(N); + }; + push(FirstMD); + while (!Worklist.empty()) + for (const Metadata *Op : Worklist.pop_back_val()->operands()) { + if (!Op) + continue; + auto MD = MetadataMap.find(Op); + if (MD != MetadataMap.end()) + push(*MD); + } +} + +void ValueEnumerator::EnumerateMetadata(unsigned F, const Metadata *MD) { + // It's vital for reader efficiency that uniqued subgraphs are done in + // post-order; it's expensive when their operands have forward references. + // If a distinct node is referenced from a uniqued node, it'll be delayed + // until the uniqued subgraph has been completely traversed. + SmallVector DelayedDistinctNodes; + + // Start by enumerating MD, and then work through its transitive operands in + // post-order. This requires a depth-first search. + SmallVector, 32> Worklist; + if (const MDNode *N = enumerateMetadataImpl(F, MD)) + Worklist.push_back(std::make_pair(N, N->op_begin())); + + while (!Worklist.empty()) { + const MDNode *N = Worklist.back().first; + + // Enumerate operands until we hit a new node. We need to traverse these + // nodes' operands before visiting the rest of N's operands. + MDNode::op_iterator I = std::find_if( + Worklist.back().second, N->op_end(), + [&](const Metadata *MD) { return enumerateMetadataImpl(F, MD); }); + if (I != N->op_end()) { + auto *Op = cast(*I); + Worklist.back().second = ++I; + + // Delay traversing Op if it's a distinct node and N is uniqued. + if (Op->isDistinct() && !N->isDistinct()) + DelayedDistinctNodes.push_back(Op); + else + Worklist.push_back(std::make_pair(Op, Op->op_begin())); + continue; + } + + // All the operands have been visited. Now assign an ID. + Worklist.pop_back(); + MDs.push_back(N); + MetadataMap[N].ID = MDs.size(); + + // Flush out any delayed distinct nodes; these are all the distinct nodes + // that are leaves in last uniqued subgraph. + if (Worklist.empty() || Worklist.back().first->isDistinct()) { + for (const MDNode *N : DelayedDistinctNodes) + Worklist.push_back(std::make_pair(N, N->op_begin())); + DelayedDistinctNodes.clear(); + } + } +} + +const MDNode *ValueEnumerator::enumerateMetadataImpl(unsigned F, + const Metadata *MD) { + if (!MD) + return nullptr; + + assert( + (isa(MD) || isa(MD) || isa(MD)) && + "Invalid metadata kind"); + + auto Insertion = MetadataMap.insert(std::make_pair(MD, MDIndex(F))); + MDIndex &Entry = Insertion.first->second; + if (!Insertion.second) { + // Already mapped. If F doesn't match the function tag, drop it. + if (Entry.hasDifferentFunction(F)) + dropFunctionFromMetadata(*Insertion.first); + return nullptr; + } + + // Don't assign IDs to metadata nodes. + if (auto *N = dyn_cast(MD)) + return N; + + // Save the metadata. + MDs.push_back(MD); + Entry.ID = MDs.size(); + + // Enumerate the constant, if any. + if (auto *C = dyn_cast(MD)) + EnumerateValue(C->getValue()); + + return nullptr; +} + +/// EnumerateFunctionLocalMetadata - Incorporate function-local metadata +/// information reachable from the metadata. +void ValueEnumerator::EnumerateFunctionLocalMetadata( + unsigned F, const LocalAsMetadata *Local) { + assert(F && "Expected a function"); + + // Check to see if it's already in! + MDIndex &Index = MetadataMap[Local]; + if (Index.ID) { + assert(Index.F == F && "Expected the same function"); + return; + } + + MDs.push_back(Local); + Index.F = F; + Index.ID = MDs.size(); + + EnumerateValue(Local->getValue()); +} + +/// EnumerateFunctionLocalListMetadata - Incorporate function-local metadata +/// information reachable from the metadata. +void ValueEnumerator::EnumerateFunctionLocalListMetadata( + unsigned F, const DIArgList *ArgList) { + assert(F && "Expected a function"); + + // Check to see if it's already in! + MDIndex &Index = MetadataMap[ArgList]; + if (Index.ID) { + assert(Index.F == F && "Expected the same function"); + return; + } + + for (ValueAsMetadata *VAM : ArgList->getArgs()) { + if (isa(VAM)) { + assert(MetadataMap.count(VAM) && + "LocalAsMetadata should be enumerated before DIArgList"); + assert(MetadataMap[VAM].F == F && + "Expected LocalAsMetadata in the same function"); + } else { + assert(isa(VAM) && + "Expected LocalAsMetadata or ConstantAsMetadata"); + assert(ValueMap.count(VAM->getValue()) && + "Constant should be enumerated beforeDIArgList"); + EnumerateMetadata(F, VAM); + } + } + + MDs.push_back(ArgList); + Index.F = F; + Index.ID = MDs.size(); +} + +static unsigned getMetadataTypeOrder(const Metadata *MD) { + // Strings are emitted in bulk and must come first. + if (isa(MD)) + return 0; + + // ConstantAsMetadata doesn't reference anything. We may as well shuffle it + // to the front since we can detect it. + auto *N = dyn_cast(MD); + if (!N) + return 1; + + // The reader is fast forward references for distinct node operands, but slow + // when uniqued operands are unresolved. + return N->isDistinct() ? 2 : 3; +} + +void ValueEnumerator::organizeMetadata() { + assert(MetadataMap.size() == MDs.size() && + "Metadata map and vector out of sync"); + + if (MDs.empty()) + return; + + // Copy out the index information from MetadataMap in order to choose a new + // order. + SmallVector Order; + Order.reserve(MetadataMap.size()); + for (const Metadata *MD : MDs) + Order.push_back(MetadataMap.lookup(MD)); + + // Partition: + // - by function, then + // - by isa + // and then sort by the original/current ID. Since the IDs are guaranteed to + // be unique, the result of std::sort will be deterministic. There's no need + // for std::stable_sort. + llvm::sort(Order, [this](MDIndex LHS, MDIndex RHS) { + return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) < + std::make_tuple(RHS.F, getMetadataTypeOrder(RHS.get(MDs)), RHS.ID); + }); + + // Rebuild MDs, index the metadata ranges for each function in FunctionMDs, + // and fix up MetadataMap. + std::vector OldMDs; + MDs.swap(OldMDs); + MDs.reserve(OldMDs.size()); + for (unsigned I = 0, E = Order.size(); I != E && !Order[I].F; ++I) { + auto *MD = Order[I].get(OldMDs); + MDs.push_back(MD); + MetadataMap[MD].ID = I + 1; + if (isa(MD)) + ++NumMDStrings; + } + + // Return early if there's nothing for the functions. + if (MDs.size() == Order.size()) + return; + + // Build the function metadata ranges. + MDRange R; + FunctionMDs.reserve(OldMDs.size()); + unsigned PrevF = 0; + for (unsigned I = MDs.size(), E = Order.size(), ID = MDs.size(); I != E; + ++I) { + unsigned F = Order[I].F; + if (!PrevF) { + PrevF = F; + } else if (PrevF != F) { + R.Last = FunctionMDs.size(); + std::swap(R, FunctionMDInfo[PrevF]); + R.First = FunctionMDs.size(); + + ID = MDs.size(); + PrevF = F; + } + + auto *MD = Order[I].get(OldMDs); + FunctionMDs.push_back(MD); + MetadataMap[MD].ID = ++ID; + if (isa(MD)) + ++R.NumStrings; + } + R.Last = FunctionMDs.size(); + FunctionMDInfo[PrevF] = R; +} + +void ValueEnumerator::incorporateFunctionMetadata(const Function &F) { + NumModuleMDs = MDs.size(); + + auto R = FunctionMDInfo.lookup(getValueID(&F) + 1); + NumMDStrings = R.NumStrings; + MDs.insert(MDs.end(), FunctionMDs.begin() + R.First, + FunctionMDs.begin() + R.Last); +} + +void ValueEnumerator::EnumerateValue(const Value *V) { + assert(!V->getType()->isVoidTy() && "Can't insert void values!"); + assert(!isa(V) && "EnumerateValue doesn't handle Metadata!"); + + // Check to see if it's already in! + unsigned &ValueID = ValueMap[V]; + if (ValueID) { + // Increment use count. + Values[ValueID - 1].second++; + return; + } + + if (auto *GO = dyn_cast(V)) + if (const Comdat *C = GO->getComdat()) + Comdats.insert(C); + + // Enumerate the type of this value. + EnumerateType(V->getType()); + + if (const Constant *C = dyn_cast(V)) { + if (isa(C)) { + // Initializers for globals are handled explicitly elsewhere. + } else if (C->getNumOperands()) { + // If a constant has operands, enumerate them. This makes sure that if a + // constant has uses (for example an array of const ints), that they are + // inserted also. + + // We prefer to enumerate them with values before we enumerate the user + // itself. This makes it more likely that we can avoid forward references + // in the reader. We know that there can be no cycles in the constants + // graph that don't go through a global variable. + for (User::const_op_iterator I = C->op_begin(), E = C->op_end(); I != E; + ++I) + if (!isa(*I)) // Don't enumerate BB operand to BlockAddress. + EnumerateValue(*I); + if (auto *CE = dyn_cast(C)) { + if (CE->getOpcode() == Instruction::ShuffleVector) + EnumerateValue(CE->getShuffleMaskForBitcode()); + if (auto *GEP = dyn_cast(CE)) + EnumerateType(GEP->getSourceElementType()); + } + + // Finally, add the value. Doing this could make the ValueID reference be + // dangling, don't reuse it. + Values.push_back(std::make_pair(V, 1U)); + ValueMap[V] = Values.size(); + return; + } + } + + // Add the value. + Values.push_back(std::make_pair(V, 1U)); + ValueID = Values.size(); +} + +void ValueEnumerator::EnumerateType(Type *Ty) { + unsigned *TypeID = &TypeMap[Ty]; + + // We've already seen this type. + if (*TypeID) + return; + + // If it is a non-anonymous struct, mark the type as being visited so that we + // don't recursively visit it. This is safe because we allow forward + // references of these in the bitcode reader. + if (StructType *STy = dyn_cast(Ty)) + if (!STy->isLiteral()) + *TypeID = ~0U; + + // Enumerate all of the subtypes before we enumerate this type. This ensures + // that the type will be enumerated in an order that can be directly built. + for (Type *SubTy : Ty->subtypes()) + EnumerateType(SubTy); + + // Refresh the TypeID pointer in case the table rehashed. + TypeID = &TypeMap[Ty]; + + // Check to see if we got the pointer another way. This can happen when + // enumerating recursive types that hit the base case deeper than they start. + // + // If this is actually a struct that we are treating as forward ref'able, + // then emit the definition now that all of its contents are available. + if (*TypeID && *TypeID != ~0U) + return; + + // Add this type now that its contents are all happily enumerated. + Types.push_back(Ty); + + *TypeID = Types.size(); +} + +// Enumerate the types for the specified value. If the value is a constant, +// walk through it, enumerating the types of the constant. +void ValueEnumerator::EnumerateOperandType(const Value *V) { + EnumerateType(V->getType()); + + assert(!isa(V) && "Unexpected metadata operand"); + + const Constant *C = dyn_cast(V); + if (!C) + return; + + // If this constant is already enumerated, ignore it, we know its type must + // be enumerated. + if (ValueMap.count(C)) + return; + + // This constant may have operands, make sure to enumerate the types in + // them. + for (const Value *Op : C->operands()) { + // Don't enumerate basic blocks here, this happens as operands to + // blockaddress. + if (isa(Op)) + continue; + + EnumerateOperandType(Op); + } + if (auto *CE = dyn_cast(C)) { + if (CE->getOpcode() == Instruction::ShuffleVector) + EnumerateOperandType(CE->getShuffleMaskForBitcode()); + if (CE->getOpcode() == Instruction::GetElementPtr) + EnumerateType(cast(CE)->getSourceElementType()); + } +} + +void ValueEnumerator::EnumerateAttributes(AttributeList PAL) { + if (PAL.isEmpty()) + return; // null is always 0. + + // Do a lookup. + unsigned &Entry = AttributeListMap[PAL]; + if (Entry == 0) { + // Never saw this before, add it. + AttributeLists.push_back(PAL); + Entry = AttributeLists.size(); + } + + // Do lookups for all attribute groups. + for (unsigned i : PAL.indexes()) { + AttributeSet AS = PAL.getAttributes(i); + if (!AS.hasAttributes()) + continue; + IndexAndAttrSet Pair = {i, AS}; + unsigned &Entry = AttributeGroupMap[Pair]; + if (Entry == 0) { + AttributeGroups.push_back(Pair); + Entry = AttributeGroups.size(); + + for (Attribute Attr : AS) { + if (Attr.isTypeAttribute()) + EnumerateType(Attr.getValueAsType()); + } + } + } +} + +void ValueEnumerator::incorporateFunction(const Function &F) { + InstructionCount = 0; + NumModuleValues = Values.size(); + + // Add global metadata to the function block. This doesn't include + // LocalAsMetadata. + incorporateFunctionMetadata(F); + + // Adding function arguments to the value table. + for (const auto &I : F.args()) { + EnumerateValue(&I); + if (I.hasAttribute(Attribute::ByVal)) + EnumerateType(I.getParamByValType()); + else if (I.hasAttribute(Attribute::StructRet)) + EnumerateType(I.getParamStructRetType()); + else if (I.hasAttribute(Attribute::ByRef)) + EnumerateType(I.getParamByRefType()); + } + FirstFuncConstantID = Values.size(); + + // Add all function-level constants to the value table. + for (const BasicBlock &BB : F) { + for (const Instruction &I : BB) { + for (const Use &OI : I.operands()) { + if ((isa(OI) && !isa(OI)) || isa(OI)) + EnumerateValue(OI); + } + if (auto *SVI = dyn_cast(&I)) + EnumerateValue(SVI->getShuffleMaskForBitcode()); + } + BasicBlocks.push_back(&BB); + ValueMap[&BB] = BasicBlocks.size(); + } + + // Add the function's parameter attributes so they are available for use in + // the function's instruction. + EnumerateAttributes(F.getAttributes()); + + FirstInstID = Values.size(); + + SmallVector FnLocalMDVector; + SmallVector ArgListMDVector; + // Add all of the instructions. + for (const BasicBlock &BB : F) { + for (const Instruction &I : BB) { + for (const Use &OI : I.operands()) { + if (auto *MD = dyn_cast(&OI)) { + if (auto *Local = dyn_cast(MD->getMetadata())) { + // Enumerate metadata after the instructions they might refer to. + FnLocalMDVector.push_back(Local); + } else if (auto *ArgList = dyn_cast(MD->getMetadata())) { + ArgListMDVector.push_back(ArgList); + for (ValueAsMetadata *VMD : ArgList->getArgs()) { + if (auto *Local = dyn_cast(VMD)) { + // Enumerate metadata after the instructions they might refer + // to. + FnLocalMDVector.push_back(Local); + } + } + } + } + } + + if (!I.getType()->isVoidTy()) + EnumerateValue(&I); + } + } + + // Add all of the function-local metadata. + for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i) { + // At this point, every local values have been incorporated, we shouldn't + // have a metadata operand that references a value that hasn't been seen. + assert(ValueMap.count(FnLocalMDVector[i]->getValue()) && + "Missing value for metadata operand"); + EnumerateFunctionLocalMetadata(F, FnLocalMDVector[i]); + } + // DIArgList entries must come after function-local metadata, as it is not + // possible to forward-reference them. + for (const DIArgList *ArgList : ArgListMDVector) + EnumerateFunctionLocalListMetadata(F, ArgList); +} + +void ValueEnumerator::purgeFunction() { + /// Remove purged values from the ValueMap. + for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i) + ValueMap.erase(Values[i].first); + for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i) + MetadataMap.erase(MDs[i]); + for (const BasicBlock *BB : BasicBlocks) + ValueMap.erase(BB); + + Values.resize(NumModuleValues); + MDs.resize(NumModuleMDs); + BasicBlocks.clear(); + NumMDStrings = 0; +} + +static void IncorporateFunctionInfoGlobalBBIDs( + const Function *F, DenseMap &IDMap) { + unsigned Counter = 0; + for (const BasicBlock &BB : *F) + IDMap[&BB] = ++Counter; +} + +/// getGlobalBasicBlockID - This returns the function-specific ID for the +/// specified basic block. This is relatively expensive information, so it +/// should only be used by rare constructs such as address-of-label. +unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const { + unsigned &Idx = GlobalBasicBlockIDs[BB]; + if (Idx != 0) + return Idx - 1; + + IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs); + return getGlobalBasicBlockID(BB); +} + +uint64_t ValueEnumerator::computeBitsRequiredForTypeIndicies() const { + return Log2_32_Ceil(getTypes().size() + 1); +} diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.h b/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.h new file mode 100644 index 000000000000..6cf339b7a5cd --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.h @@ -0,0 +1,308 @@ +//===- DirectX/DXILWriter/ValueEnumerator.h - Number values -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class gives values and types Unique ID's. +// Forked from lib/Bitcode/Writer +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DXILWRITER_VALUEENUMERATOR_H +#define LLVM_DXILWRITER_VALUEENUMERATOR_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/UseListOrder.h" +#include +#include +#include +#include + +namespace llvm { + +class BasicBlock; +class Comdat; +class DIArgList; +class Function; +class Instruction; +class LocalAsMetadata; +class MDNode; +class Metadata; +class Module; +class NamedMDNode; +class raw_ostream; +class Type; +class Value; +class ValueSymbolTable; + +namespace dxil { + +class ValueEnumerator { +public: + using TypeList = std::vector; + + // For each value, we remember its Value* and occurrence frequency. + using ValueList = std::vector>; + + /// Attribute groups as encoded in bitcode are almost AttributeSets, but they + /// include the AttributeList index, so we have to track that in our map. + using IndexAndAttrSet = std::pair; + + UseListOrderStack UseListOrders; + +private: + using TypeMapType = DenseMap; + TypeMapType TypeMap; + TypeList Types; + + using ValueMapType = DenseMap; + ValueMapType ValueMap; + ValueList Values; + + using ComdatSetType = UniqueVector; + ComdatSetType Comdats; + + std::vector MDs; + std::vector FunctionMDs; + + /// Index of information about a piece of metadata. + struct MDIndex { + unsigned F = 0; ///< The ID of the function for this metadata, if any. + unsigned ID = 0; ///< The implicit ID of this metadata in bitcode. + + MDIndex() = default; + explicit MDIndex(unsigned F) : F(F) {} + + /// Check if this has a function tag, and it's different from NewF. + bool hasDifferentFunction(unsigned NewF) const { return F && F != NewF; } + + /// Fetch the MD this references out of the given metadata array. + const Metadata *get(ArrayRef MDs) const { + assert(ID && "Expected non-zero ID"); + assert(ID <= MDs.size() && "Expected valid ID"); + return MDs[ID - 1]; + } + }; + + using MetadataMapType = DenseMap; + MetadataMapType MetadataMap; + + /// Range of metadata IDs, as a half-open range. + struct MDRange { + unsigned First = 0; + unsigned Last = 0; + + /// Number of strings in the prefix of the metadata range. + unsigned NumStrings = 0; + + MDRange() = default; + explicit MDRange(unsigned First) : First(First) {} + }; + SmallDenseMap FunctionMDInfo; + + using AttributeGroupMapType = DenseMap; + AttributeGroupMapType AttributeGroupMap; + std::vector AttributeGroups; + + using AttributeListMapType = DenseMap; + AttributeListMapType AttributeListMap; + std::vector AttributeLists; + + /// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by + /// the "getGlobalBasicBlockID" method. + mutable DenseMap GlobalBasicBlockIDs; + + using InstructionMapType = DenseMap; + InstructionMapType InstructionMap; + unsigned InstructionCount; + + /// BasicBlocks - This contains all the basic blocks for the currently + /// incorporated function. Their reverse mapping is stored in ValueMap. + std::vector BasicBlocks; + + /// When a function is incorporated, this is the size of the Values list + /// before incorporation. + unsigned NumModuleValues; + + /// When a function is incorporated, this is the size of the Metadatas list + /// before incorporation. + unsigned NumModuleMDs = 0; + unsigned NumMDStrings = 0; + + unsigned FirstFuncConstantID; + unsigned FirstInstID; + +public: + ValueEnumerator(const Module &M, Type *PrefixType); + ValueEnumerator(const ValueEnumerator &) = delete; + ValueEnumerator &operator=(const ValueEnumerator &) = delete; + + void dump() const; + void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const; + void print(raw_ostream &OS, const MetadataMapType &Map, + const char *Name) const; + + unsigned getValueID(const Value *V) const; + + unsigned getMetadataID(const Metadata *MD) const { + auto ID = getMetadataOrNullID(MD); + assert(ID != 0 && "Metadata not in slotcalculator!"); + return ID - 1; + } + + unsigned getMetadataOrNullID(const Metadata *MD) const { + return MetadataMap.lookup(MD).ID; + } + + unsigned numMDs() const { return MDs.size(); } + + unsigned getTypeID(Type *T) const { + TypeMapType::const_iterator I = TypeMap.find(T); + assert(I != TypeMap.end() && "Type not in ValueEnumerator!"); + return I->second - 1; + } + + unsigned getInstructionID(const Instruction *I) const; + void setInstructionID(const Instruction *I); + + unsigned getAttributeListID(AttributeList PAL) const { + if (PAL.isEmpty()) + return 0; // Null maps to zero. + AttributeListMapType::const_iterator I = AttributeListMap.find(PAL); + assert(I != AttributeListMap.end() && "Attribute not in ValueEnumerator!"); + return I->second; + } + + unsigned getAttributeGroupID(IndexAndAttrSet Group) const { + if (!Group.second.hasAttributes()) + return 0; // Null maps to zero. + AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(Group); + assert(I != AttributeGroupMap.end() && "Attribute not in ValueEnumerator!"); + return I->second; + } + + /// getFunctionConstantRange - Return the range of values that corresponds to + /// function-local constants. + void getFunctionConstantRange(unsigned &Start, unsigned &End) const { + Start = FirstFuncConstantID; + End = FirstInstID; + } + + const ValueList &getValues() const { return Values; } + + /// Check whether the current block has any metadata to emit. + bool hasMDs() const { return NumModuleMDs < MDs.size(); } + + /// Get the MDString metadata for this block. + ArrayRef getMDStrings() const { + return makeArrayRef(MDs).slice(NumModuleMDs, NumMDStrings); + } + + /// Get the non-MDString metadata for this block. + ArrayRef getNonMDStrings() const { + return makeArrayRef(MDs).slice(NumModuleMDs).slice(NumMDStrings); + } + + const TypeList &getTypes() const { return Types; } + + const std::vector &getBasicBlocks() const { + return BasicBlocks; + } + + const std::vector &getAttributeLists() const { + return AttributeLists; + } + + const std::vector &getAttributeGroups() const { + return AttributeGroups; + } + + const ComdatSetType &getComdats() const { return Comdats; } + unsigned getComdatID(const Comdat *C) const; + + /// getGlobalBasicBlockID - This returns the function-specific ID for the + /// specified basic block. This is relatively expensive information, so it + /// should only be used by rare constructs such as address-of-label. + unsigned getGlobalBasicBlockID(const BasicBlock *BB) const; + + /// incorporateFunction/purgeFunction - If you'd like to deal with a function, + /// use these two methods to get its data into the ValueEnumerator! + void incorporateFunction(const Function &F); + + void purgeFunction(); + uint64_t computeBitsRequiredForTypeIndicies() const; + + void EnumerateType(Type *T); + +private: + + /// Reorder the reachable metadata. + /// + /// This is not just an optimization, but is mandatory for emitting MDString + /// correctly. + void organizeMetadata(); + + /// Drop the function tag from the transitive operands of the given node. + void dropFunctionFromMetadata(MetadataMapType::value_type &FirstMD); + + /// Incorporate the function metadata. + /// + /// This should be called before enumerating LocalAsMetadata for the + /// function. + void incorporateFunctionMetadata(const Function &F); + + /// Enumerate a single instance of metadata with the given function tag. + /// + /// If \c MD has already been enumerated, check that \c F matches its + /// function tag. If not, call \a dropFunctionFromMetadata(). + /// + /// Otherwise, mark \c MD as visited. Assign it an ID, or just return it if + /// it's an \a MDNode. + const MDNode *enumerateMetadataImpl(unsigned F, const Metadata *MD); + + unsigned getMetadataFunctionID(const Function *F) const; + + /// Enumerate reachable metadata in (almost) post-order. + /// + /// Enumerate all the metadata reachable from MD. We want to minimize the + /// cost of reading bitcode records, and so the primary consideration is that + /// operands of uniqued nodes are resolved before the nodes are read. This + /// avoids re-uniquing them on the context and factors away RAUW support. + /// + /// This algorithm guarantees that subgraphs of uniqued nodes are in + /// post-order. Distinct subgraphs reachable only from a single uniqued node + /// will be in post-order. + /// + /// \note The relative order of a distinct and uniqued node is irrelevant. + /// \a organizeMetadata() will later partition distinct nodes ahead of + /// uniqued ones. + ///{ + void EnumerateMetadata(const Function *F, const Metadata *MD); + void EnumerateMetadata(unsigned F, const Metadata *MD); + ///} + + void EnumerateFunctionLocalMetadata(const Function &F, + const LocalAsMetadata *Local); + void EnumerateFunctionLocalMetadata(unsigned F, const LocalAsMetadata *Local); + void EnumerateFunctionLocalListMetadata(const Function &F, + const DIArgList *ArgList); + void EnumerateFunctionLocalListMetadata(unsigned F, const DIArgList *Arglist); + void EnumerateNamedMDNode(const NamedMDNode *NMD); + void EnumerateValue(const Value *V); + void EnumerateOperandType(const Value *V); + void EnumerateAttributes(AttributeList PAL); + + void EnumerateValueSymbolTable(const ValueSymbolTable &ST); + void EnumerateNamedMetadata(const Module &M); +}; + +} // end namespace dxil +} // end namespace llvm + +#endif // LLVM_DXILWRITER_VALUEENUMERATOR_H diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp new file mode 100644 index 000000000000..c1f9f4aec672 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp @@ -0,0 +1,100 @@ +//===- DXILWriterPass.cpp - Bitcode writing pass --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// DXILWriterPass implementation. +// +//===----------------------------------------------------------------------===// + +#include "DXILWriterPass.h" +#include "DXILBitcodeWriter.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; +using namespace llvm::dxil; + +namespace { +class WriteDXILPass : public llvm::ModulePass { + raw_ostream &OS; // raw_ostream to print on + +public: + static char ID; // Pass identification, replacement for typeid + WriteDXILPass() : ModulePass(ID), OS(dbgs()) { + initializeWriteDXILPassPass(*PassRegistry::getPassRegistry()); + } + + explicit WriteDXILPass(raw_ostream &o) : ModulePass(ID), OS(o) { + initializeWriteDXILPassPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "Bitcode Writer"; } + + bool runOnModule(Module &M) override { + WriteDXILToFile(M, OS); + return false; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } +}; + +class EmbedDXILPass : public llvm::ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + EmbedDXILPass() : ModulePass(ID) { + initializeEmbedDXILPassPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "DXIL Embedder"; } + + bool runOnModule(Module &M) override { + std::string Data; + llvm::raw_string_ostream OS(Data); + WriteDXILToFile(M, OS); + + Constant *ModuleConstant = + ConstantDataArray::get(M.getContext(), arrayRefFromStringRef(Data)); + auto *GV = new llvm::GlobalVariable(M, ModuleConstant->getType(), true, + GlobalValue::PrivateLinkage, + ModuleConstant, "dx.dxil"); + GV->setSection("DXIL"); + GV->setAlignment(Align(4)); + appendToCompilerUsed(M, {GV}); + return true; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } +}; +} // namespace + +char WriteDXILPass::ID = 0; +INITIALIZE_PASS_BEGIN(WriteDXILPass, "write-bitcode", "Write Bitcode", false, + true) +INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass) +INITIALIZE_PASS_END(WriteDXILPass, "write-bitcode", "Write Bitcode", false, + true) + +ModulePass *llvm::createDXILWriterPass(raw_ostream &Str) { + return new WriteDXILPass(Str); +} + +char EmbedDXILPass::ID = 0; +INITIALIZE_PASS(EmbedDXILPass, "dxil-embed", "Embed DXIL", false, true) + +ModulePass *llvm::createDXILEmbedderPass() { return new EmbedDXILPass(); } diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.h b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.h new file mode 100644 index 000000000000..2c9c12178677 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.h @@ -0,0 +1,37 @@ +//===-- DXILWriterPass.h - Bitcode writing pass --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file provides a bitcode writing pass. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITCODE_DXILWriterPass_H +#define LLVM_BITCODE_DXILWriterPass_H + +#include "DirectX.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { +class Module; +class raw_ostream; + +/// Create and return a pass that writes the module to the specified +/// ostream. Note that this pass is designed for use with the legacy pass +/// manager. +ModulePass *createDXILWriterPass(raw_ostream &Str); + +/// Create and return a pass that writes the module to a global variable in the +/// module for later emission in the MCStreamer. Note that this pass is designed +/// for use with the legacy pass manager because it is run in CodeGen only. +ModulePass *createDXILEmbedderPass(); + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h new file mode 100644 index 000000000000..3883e4ba4621 --- /dev/null +++ b/llvm/lib/Target/DirectX/DirectX.h @@ -0,0 +1,43 @@ +//===- DirectXTargetMachine.h - DirectX Target Implementation ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_DIRECTX_DIRECTX_H +#define LLVM_LIB_TARGET_DIRECTX_DIRECTX_H + +namespace llvm { +class ModulePass; +class PassRegistry; + +/// Initializer for dxil writer pass +void initializeWriteDXILPassPass(PassRegistry &); + +/// Initializer for dxil embedder pass +void initializeEmbedDXILPassPass(PassRegistry &); + +/// Initializer for DXIL-prepare +void initializeDXILPrepareModulePass(PassRegistry &); + +/// Pass to convert modules into DXIL-compatable modules +ModulePass *createDXILPrepareModulePass(); + +/// Initializer for DXILOpLowering +void initializeDXILOpLoweringLegacyPass(PassRegistry &); + +/// Pass to lowering LLVM intrinsic call to DXIL op function call. +ModulePass *createDXILOpLoweringLegacyPass(); + +/// Initializer for DXILTranslateMetadata. +void initializeDXILTranslateMetadataPass(PassRegistry &); + +/// Pass to emit metadata for DXIL. +ModulePass *createDXILTranslateMetadataPass(); +} // namespace llvm + +#endif // LLVM_LIB_TARGET_DIRECTX_DIRECTX_H diff --git a/llvm/lib/Target/DirectX/DirectX.td b/llvm/lib/Target/DirectX/DirectX.td new file mode 100644 index 000000000000..4d1d45b84a68 --- /dev/null +++ b/llvm/lib/Target/DirectX/DirectX.td @@ -0,0 +1,54 @@ +//- DirectX.td - Describe the DirectX Target Machine ----------*- tablegen -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is a target description file for the DirectX target +/// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" +include "DXILStubs.td" + +//===----------------------------------------------------------------------===// +// DirectX Subtarget features. +//===----------------------------------------------------------------------===// + +def DirectXInstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// DirectX Processors supported. +//===----------------------------------------------------------------------===// + +def : ProcessorModel<"generic", NoSchedModel, []>; + + +//===----------------------------------------------------------------------===// +// Target Declaration +//===----------------------------------------------------------------------===// + +def DirectXAsmParser : AsmParser { + // The physical register names are not in the binary format or asm text + let ShouldEmitMatchRegisterName = 0; +} + +def DirectXAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 0; + int Variant = 0; + bit isMCAsmWriter = 1; +} + +def DirectX : Target { + let InstructionSet = DirectXInstrInfo; + let AssemblyParsers = [DirectXAsmParser]; + let AssemblyWriters = [DirectXAsmWriter]; +} diff --git a/llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp b/llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp new file mode 100644 index 000000000000..cea3283f6756 --- /dev/null +++ b/llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp @@ -0,0 +1,57 @@ +//===-- DirectXAsmPrinter.cpp - DirectX assembly writer --------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains AsmPrinters for the DirectX backend. +// +//===----------------------------------------------------------------------===// + +#include "TargetInfo/DirectXTargetInfo.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/SectionKind.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +namespace { + +// The DXILAsmPrinter is mostly a stub because DXIL is just LLVM bitcode which +// gets embedded into a DXContainer file. +class DXILAsmPrinter : public AsmPrinter { +public: + explicit DXILAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)) {} + + StringRef getPassName() const override { return "DXIL Assembly Printer"; } + void emitGlobalVariable(const GlobalVariable *GV) override; + bool runOnMachineFunction(MachineFunction &MF) override { return false; } +}; +} // namespace + +void DXILAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { + // If there is no initializer or the section is implicit, do nothing + if (!GV->hasInitializer() || GV->hasImplicitSection()) + return; + // Skip the LLVM metadata + if (GV->getSection() == "llvm.metadata") + return; + SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); + MCSection *TheSection = getObjFileLowering().SectionForGlobal(GV, GVKind, TM); + OutStreamer->switchSection(TheSection); + emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXAsmPrinter() { + RegisterAsmPrinter X(getTheDirectXTarget()); +} diff --git a/llvm/lib/Target/DirectX/DirectXFrameLowering.h b/llvm/lib/Target/DirectX/DirectXFrameLowering.h new file mode 100644 index 000000000000..76a1450054be --- /dev/null +++ b/llvm/lib/Target/DirectX/DirectXFrameLowering.h @@ -0,0 +1,35 @@ +//===-- DirectXFrameLowering.h - Frame lowering for DirectX --*- C++ ---*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class implements DirectX-specific bits of TargetFrameLowering class. +// This is just a stub because the current DXIL backend does not actually lower +// through the MC layer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DIRECTX_DIRECTXFRAMELOWERING_H +#define LLVM_DIRECTX_DIRECTXFRAMELOWERING_H + +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/Support/Alignment.h" + +namespace llvm { +class DirectXSubtarget; + +class DirectXFrameLowering : public TargetFrameLowering { +public: + explicit DirectXFrameLowering(const DirectXSubtarget &STI) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), 0) {} + + void emitPrologue(MachineFunction &, MachineBasicBlock &) const override {} + void emitEpilogue(MachineFunction &, MachineBasicBlock &) const override {} + + bool hasFP(const MachineFunction &) const override { return false; } +}; +} // namespace llvm +#endif // LLVM_DIRECTX_DIRECTXFRAMELOWERING_H diff --git a/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp b/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp new file mode 100644 index 000000000000..07b68648f16c --- /dev/null +++ b/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp @@ -0,0 +1,20 @@ +//===-- DirectXInstrInfo.cpp - InstrInfo for DirectX -*- C++ ------------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the DirectX specific subclass of TargetInstrInfo. +// +//===----------------------------------------------------------------------===// + +#include "DirectXInstrInfo.h" + +#define GET_INSTRINFO_CTOR_DTOR +#include "DirectXGenInstrInfo.inc" + +using namespace llvm; + +DirectXInstrInfo::~DirectXInstrInfo() {} diff --git a/llvm/lib/Target/DirectX/DirectXInstrInfo.h b/llvm/lib/Target/DirectX/DirectXInstrInfo.h new file mode 100644 index 000000000000..4fe79ee547fe --- /dev/null +++ b/llvm/lib/Target/DirectX/DirectXInstrInfo.h @@ -0,0 +1,30 @@ +//===-- DirectXInstrInfo.h - Define InstrInfo for DirectX -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the DirectX specific subclass of TargetInstrInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DIRECTX_DIRECTXINSTRINFO_H +#define LLVM_DIRECTX_DIRECTXINSTRINFO_H + +#include "DirectXRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "DirectXGenInstrInfo.inc" + +namespace llvm { +struct DirectXInstrInfo : public DirectXGenInstrInfo { + explicit DirectXInstrInfo() : DirectXGenInstrInfo() {} + + ~DirectXInstrInfo() override; +}; +} // namespace llvm + +#endif // LLVM_DIRECTX_DIRECTXINSTRINFO_H diff --git a/llvm/lib/Target/DirectX/DirectXRegisterInfo.cpp b/llvm/lib/Target/DirectX/DirectXRegisterInfo.cpp new file mode 100644 index 000000000000..c54b494f3730 --- /dev/null +++ b/llvm/lib/Target/DirectX/DirectXRegisterInfo.cpp @@ -0,0 +1,24 @@ +//===-- DirectXRegisterInfo.cpp - RegisterInfo for DirectX -*- C++ ------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the DirectX specific subclass of TargetRegisterInfo. +// +//===----------------------------------------------------------------------===// + +#include "DirectXRegisterInfo.h" +#include "DirectXFrameLowering.h" +#include "MCTargetDesc/DirectXMCTargetDesc.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" + +#define GET_REGINFO_TARGET_DESC +#include "DirectXGenRegisterInfo.inc" + +using namespace llvm; + +DirectXRegisterInfo::~DirectXRegisterInfo() {} diff --git a/llvm/lib/Target/DirectX/DirectXRegisterInfo.h b/llvm/lib/Target/DirectX/DirectXRegisterInfo.h new file mode 100644 index 000000000000..023c5c3ef337 --- /dev/null +++ b/llvm/lib/Target/DirectX/DirectXRegisterInfo.h @@ -0,0 +1,28 @@ +//===-- DirectXRegisterInfo.h - Define RegisterInfo for DirectX -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the DirectX specific subclass of TargetRegisterInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DIRECTX_DXILREGISTERINFO_H +#define LLVM_DIRECTX_DXILREGISTERINFO_H + +#include "llvm/CodeGen/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "DirectXGenRegisterInfo.inc" + +namespace llvm { +struct DirectXRegisterInfo : public DirectXGenRegisterInfo { + DirectXRegisterInfo() : DirectXGenRegisterInfo(0) {} + ~DirectXRegisterInfo(); +}; +} // namespace llvm + +#endif // LLVM_DIRECTX_DXILREGISTERINFO_H diff --git a/llvm/lib/Target/DirectX/DirectXSubtarget.cpp b/llvm/lib/Target/DirectX/DirectXSubtarget.cpp new file mode 100644 index 000000000000..526b7d29fb13 --- /dev/null +++ b/llvm/lib/Target/DirectX/DirectXSubtarget.cpp @@ -0,0 +1,29 @@ +//===-- DirectXSubtarget.cpp - DirectX Subtarget Information --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the DirectX-specific subclass of TargetSubtarget. +/// +//===----------------------------------------------------------------------===// + +#include "DirectXSubtarget.h" +#include "DirectXTargetLowering.h" + +using namespace llvm; + +#define DEBUG_TYPE "directx-subtarget" + +#define GET_SUBTARGETINFO_CTOR +#define GET_SUBTARGETINFO_TARGET_DESC +#include "DirectXGenSubtargetInfo.inc" + +DirectXSubtarget::DirectXSubtarget(const Triple &TT, StringRef CPU, + StringRef FS, const DirectXTargetMachine &TM) + : DirectXGenSubtargetInfo(TT, CPU, CPU, FS), FL(*this), TL(TM, *this) {} + +void DirectXSubtarget::anchor() {} diff --git a/llvm/lib/Target/DirectX/DirectXSubtarget.h b/llvm/lib/Target/DirectX/DirectXSubtarget.h new file mode 100644 index 000000000000..464d05a0e1ff --- /dev/null +++ b/llvm/lib/Target/DirectX/DirectXSubtarget.h @@ -0,0 +1,56 @@ +//===-- DirectXSubtarget.h - Define Subtarget for DirectX -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the DirectX specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DIRECTX_DIRECTXSUBTARGET_H +#define LLVM_DIRECTX_DIRECTXSUBTARGET_H + +#include "DirectXFrameLowering.h" +#include "DirectXInstrInfo.h" +#include "DirectXTargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" + +#define GET_SUBTARGETINFO_HEADER +#include "DirectXGenSubtargetInfo.inc" + +namespace llvm { + +class DirectXTargetMachine; + +class DirectXSubtarget : public DirectXGenSubtargetInfo { + DirectXFrameLowering FL; + DirectXTargetLowering TL; + DirectXInstrInfo InstrInfo; + + virtual void anchor(); // virtual anchor method + +public: + DirectXSubtarget(const Triple &TT, StringRef CPU, StringRef FS, + const DirectXTargetMachine &TM); + + /// Parses a subtarget feature string, setting appropriate options. + /// \note Definition of function is auto generated by `tblgen`. + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); + + const DirectXTargetLowering *getTargetLowering() const override { + return &TL; + } + + const DirectXFrameLowering *getFrameLowering() const override { return &FL; } + + const DirectXInstrInfo *getInstrInfo() const override { return &InstrInfo; } +}; + +} // end namespace llvm + +#endif // LLVM_DIRECTX_DIRECTXSUBTARGET_H diff --git a/llvm/lib/Target/DirectX/DirectXTargetLowering.h b/llvm/lib/Target/DirectX/DirectXTargetLowering.h new file mode 100644 index 000000000000..dc19894ab165 --- /dev/null +++ b/llvm/lib/Target/DirectX/DirectXTargetLowering.h @@ -0,0 +1,31 @@ +//===-- DirectXTargetLowering.h - Define DX TargetLowering -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the DirectX specific subclass of TargetLowering. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DIRECTX_DIRECTXTARGETLOWERING_H +#define LLVM_DIRECTX_DIRECTXTARGETLOWERING_H + +#include "llvm/CodeGen/TargetLowering.h" + +namespace llvm { + +class DirectXSubtarget; +class DirectXTargetMachine; + +class DirectXTargetLowering : public TargetLowering { +public: + explicit DirectXTargetLowering(const DirectXTargetMachine &TM, + const DirectXSubtarget &STI); +}; + +} // end namespace llvm + +#endif // LLVM_DIRECTX_DIRECTXTARGETLOWERING_H diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp new file mode 100644 index 000000000000..44bef80ea6fb --- /dev/null +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -0,0 +1,144 @@ +//===- DirectXTargetMachine.cpp - DirectX Target Implementation -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains DirectX target initializer. +/// +//===----------------------------------------------------------------------===// + +#include "DirectXTargetMachine.h" +#include "DXILWriter/DXILWriterPass.h" +#include "DirectX.h" +#include "DirectXSubtarget.h" +#include "DirectXTargetTransformInfo.h" +#include "TargetInfo/DirectXTargetInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/MC/MCSectionDXContainer.h" +#include "llvm/MC/SectionKind.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +using namespace llvm; + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() { + RegisterTargetMachine X(getTheDirectXTarget()); + auto *PR = PassRegistry::getPassRegistry(); + initializeDXILPrepareModulePass(*PR); + initializeEmbedDXILPassPass(*PR); + initializeDXILOpLoweringLegacyPass(*PR); + initializeDXILTranslateMetadataPass(*PR); +} + +class DXILTargetObjectFile : public TargetLoweringObjectFile { +public: + DXILTargetObjectFile() = default; + + MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const override { + return getContext().getDXContainerSection(GO->getSection(), Kind); + } + +protected: + MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const override { + llvm_unreachable("Not supported!"); + } +}; + +class DirectXPassConfig : public TargetPassConfig { +public: + DirectXPassConfig(DirectXTargetMachine &TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + DirectXTargetMachine &getDirectXTargetMachine() const { + return getTM(); + } + + FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; } +}; + +DirectXTargetMachine::DirectXTargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Optional RM, + Optional CM, + CodeGenOpt::Level OL, bool JIT) + : LLVMTargetMachine(T, + "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-" + "f32:32-f64:64-n8:16:32:64", + TT, CPU, FS, Options, Reloc::Static, CodeModel::Small, + OL), + TLOF(std::make_unique()), + Subtarget(std::make_unique(TT, CPU, FS, *this)) { + initAsmInfo(); +} + +DirectXTargetMachine::~DirectXTargetMachine() {} + +bool DirectXTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, + CodeGenFileType FileType, bool DisableVerify, + MachineModuleInfoWrapperPass *MMIWP) { + PM.add(createDXILOpLoweringLegacyPass()); + PM.add(createDXILPrepareModulePass()); + PM.add(createDXILTranslateMetadataPass()); + if (TargetPassConfig::willCompleteCodeGenPipeline()) { + PM.add(createDXILEmbedderPass()); + } + switch (FileType) { + case CGFT_AssemblyFile: + PM.add(createPrintModulePass(Out, "", true)); + break; + case CGFT_ObjectFile: + if (TargetPassConfig::willCompleteCodeGenPipeline()) { + if (!MMIWP) + MMIWP = new MachineModuleInfoWrapperPass(this); + PM.add(MMIWP); + if (addAsmPrinter(PM, Out, DwoOut, FileType, + MMIWP->getMMI().getContext())) + return true; + } else + PM.add(createDXILWriterPass(Out)); + break; + case CGFT_Null: + break; + } + return false; +} + +bool DirectXTargetMachine::addPassesToEmitMC(PassManagerBase &PM, + MCContext *&Ctx, + raw_pwrite_stream &Out, + bool DisableVerify) { + return true; +} + +TargetPassConfig *DirectXTargetMachine::createPassConfig(PassManagerBase &PM) { + return new DirectXPassConfig(*this, PM); +} + +const DirectXSubtarget * +DirectXTargetMachine::getSubtargetImpl(const Function &) const { + return Subtarget.get(); +} + +TargetTransformInfo +DirectXTargetMachine::getTargetTransformInfo(const Function &F) const { + return TargetTransformInfo(DirectXTTIImpl(this, F)); +} + +DirectXTargetLowering::DirectXTargetLowering(const DirectXTargetMachine &TM, + const DirectXSubtarget &STI) + : TargetLowering(TM) {} diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.h b/llvm/lib/Target/DirectX/DirectXTargetMachine.h new file mode 100644 index 000000000000..ae41638b6acf --- /dev/null +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.h @@ -0,0 +1,51 @@ +//===- DirectXTargetMachine.h - DirectX Target Implementation ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DIRECTX_DIRECTXTARGETMACHINE_H +#define LLVM_DIRECTX_DIRECTXTARGETMACHINE_H + +#include "DirectXSubtarget.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class Function; +class DirectXTargetMachine : public LLVMTargetMachine { + std::unique_ptr TLOF; + std::unique_ptr Subtarget; + +public: + DirectXTargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Optional RM, Optional CM, + CodeGenOpt::Level OL, bool JIT); + + ~DirectXTargetMachine() override; + + bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out, + raw_pwrite_stream *DwoOut, CodeGenFileType FileType, + bool DisableVerify, + MachineModuleInfoWrapperPass *MMIWP) override; + + bool addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, + raw_pwrite_stream &Out, bool DisableVerify) override; + + const DirectXSubtarget *getSubtargetImpl(const Function &) const override; + + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } + + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; +}; +} // namespace llvm + +#endif // LLVM_DIRECTX_DIRECTXTARGETMACHINE_H diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h new file mode 100644 index 000000000000..90beb386fa44 --- /dev/null +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h @@ -0,0 +1,39 @@ +//===- DirectXTargetTransformInfo.h - DirectX TTI ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DIRECTX_DIRECTXTARGETTRANSFORMINFO_H +#define LLVM_DIRECTX_DIRECTXTARGETTRANSFORMINFO_H + +#include "DirectXSubtarget.h" +#include "DirectXTargetMachine.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/IR/Function.h" + +namespace llvm { +class DirectXTTIImpl : public BasicTTIImplBase { + using BaseT = BasicTTIImplBase; + using TTI = TargetTransformInfo; + + friend BaseT; + + const DirectXSubtarget *ST; + const DirectXTargetLowering *TLI; + + const DirectXSubtarget *getST() const { return ST; } + const DirectXTargetLowering *getTLI() const { return TLI; } + +public: + explicit DirectXTTIImpl(const DirectXTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} +}; +} // namespace llvm + +#endif // LLVM_DIRECTX_DIRECTXTARGETTRANSFORMINFO_H diff --git a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.cpp b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.cpp new file mode 100644 index 000000000000..78ccbc444bce --- /dev/null +++ b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.cpp @@ -0,0 +1,28 @@ +//===-- DirectXContainerObjectWriter.cpp - DX object writer ----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains DXContainer object writers for the DirectX backend. +// +//===----------------------------------------------------------------------===// + +#include "DirectXContainerObjectWriter.h" +#include "llvm/MC/MCDXContainerWriter.h" + +using namespace llvm; + +namespace { +class DirectXContainerObjectWriter : public MCDXContainerTargetWriter { +public: + DirectXContainerObjectWriter() : MCDXContainerTargetWriter() {} +}; +} // namespace + +std::unique_ptr +llvm::createDXContainerTargetObjectWriter() { + return std::make_unique(); +} diff --git a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.h b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.h new file mode 100644 index 000000000000..a6fbdc865f7d --- /dev/null +++ b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.h @@ -0,0 +1,24 @@ +//===-- DirectXContainerObjectWriter.h - DX object writer ------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains DXContainer object writers for the DirectX backend. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DIRECTX_DIRECTXCONTAINEROBJECTWRITER_H +#define LLVM_DIRECTX_DIRECTXCONTAINEROBJECTWRITER_H + +#include "llvm/MC/MCObjectWriter.h" + +namespace llvm { + +std::unique_ptr createDXContainerTargetObjectWriter(); + +} + +#endif // LLVM_DIRECTX_DIRECTXCONTAINEROBJECTWRITER_H diff --git a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp new file mode 100644 index 000000000000..0c97ab62a37b --- /dev/null +++ b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp @@ -0,0 +1,152 @@ +//===- DirectXMCTargetDesc.cpp - DirectX Target Implementation --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains DirectX target initializer. +/// +//===----------------------------------------------------------------------===// + +#include "DirectXMCTargetDesc.h" +#include "DirectXContainerObjectWriter.h" +#include "TargetInfo/DirectXTargetInfo.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCDXContainerWriter.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Compiler.h" +#include + +using namespace llvm; + +#define GET_INSTRINFO_MC_DESC +#define GET_INSTRINFO_MC_HELPERS +#include "DirectXGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "DirectXGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "DirectXGenRegisterInfo.inc" + +namespace { + +// DXILInstPrinter is a null stub because DXIL instructions aren't printed. +class DXILInstPrinter : public MCInstPrinter { +public: + DXILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, + const MCSubtargetInfo &STI, raw_ostream &O) override {} + + std::pair getMnemonic(const MCInst *MI) override { + return std::make_pair("", 0ull); + } + +private: +}; + +class DXILMCCodeEmitter : public MCCodeEmitter { +public: + DXILMCCodeEmitter() {} + + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override {} +}; + +class DXILAsmBackend : public MCAsmBackend { + +public: + DXILAsmBackend(const MCSubtargetInfo &STI) : MCAsmBackend(support::little) {} + ~DXILAsmBackend() override = default; + + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved, + const MCSubtargetInfo *STI) const override {} + + std::unique_ptr + createObjectTargetWriter() const override { + return createDXContainerTargetObjectWriter(); + } + + unsigned getNumFixupKinds() const override { return 0; } + + bool writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const override { + return true; + } + + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + return true; + } +}; + +class DirectXMCAsmInfo : public MCAsmInfo { +public: + explicit DirectXMCAsmInfo(const Triple &TT, const MCTargetOptions &Options) + : MCAsmInfo() {} +}; + +} // namespace + +static MCInstPrinter *createDXILMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + if (SyntaxVariant == 0) + return new DXILInstPrinter(MAI, MII, MRI); + return nullptr; +} + +MCCodeEmitter *createDXILMCCodeEmitter(const MCInstrInfo &MCII, + MCContext &Ctx) { + return new DXILMCCodeEmitter(); +} + +MCAsmBackend *createDXILMCAsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options) { + return new DXILAsmBackend(STI); +} + +static MCSubtargetInfo * +createDirectXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { + return createDirectXMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); +} + +static MCRegisterInfo *createDirectXMCRegisterInfo(const Triple &Triple) { + return new MCRegisterInfo(); +} + +static MCInstrInfo *createDirectXMCInstrInfo() { return new MCInstrInfo(); } + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTargetMC() { + Target &T = getTheDirectXTarget(); + RegisterMCAsmInfo X(T); + TargetRegistry::RegisterMCInstrInfo(T, createDirectXMCInstrInfo); + TargetRegistry::RegisterMCInstPrinter(T, createDXILMCInstPrinter); + TargetRegistry::RegisterMCRegInfo(T, createDirectXMCRegisterInfo); + TargetRegistry::RegisterMCSubtargetInfo(T, createDirectXMCSubtargetInfo); + TargetRegistry::RegisterMCCodeEmitter(T, createDXILMCCodeEmitter); + TargetRegistry::RegisterMCAsmBackend(T, createDXILMCAsmBackend); +} diff --git a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.h b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.h new file mode 100644 index 000000000000..0c3873a24417 --- /dev/null +++ b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.h @@ -0,0 +1,29 @@ +//===- DirectXMCTargetDesc.h - DirectX Target Interface ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains DirectX target interface. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DIRECTX_DIRECTXMCTARGETDESC_H +#define LLVM_DIRECTX_DIRECTXMCTARGETDESC_H + +// Include DirectX stub register info +#define GET_REGINFO_ENUM +#include "DirectXGenRegisterInfo.inc" + +// Include DirectX stub instruction info +#define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS +#include "DirectXGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "DirectXGenSubtargetInfo.inc" + +#endif // LLVM_DIRECTX_DIRECTXMCTARGETDESC_H diff --git a/llvm/lib/Target/DirectX/PointerTypeAnalysis.cpp b/llvm/lib/Target/DirectX/PointerTypeAnalysis.cpp new file mode 100644 index 000000000000..1d536bbd0011 --- /dev/null +++ b/llvm/lib/Target/DirectX/PointerTypeAnalysis.cpp @@ -0,0 +1,119 @@ +//===- Target/DirectX/PointerTypeAnalisis.cpp - PointerType analysis ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Analysis pass to assign types to opaque pointers. +// +//===----------------------------------------------------------------------===// + +#include "PointerTypeAnalysis.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; +using namespace llvm::dxil; + +namespace { + +// Classifies the type of the value passed in by walking the value's users to +// find a typed instruction to materialize a type from. +TypedPointerType *classifyPointerType(const Value *V) { + assert(V->getType()->isOpaquePointerTy() && + "classifyPointerType called with non-opaque pointer"); + Type *PointeeTy = nullptr; + if (auto *Inst = dyn_cast(V)) { + if (!Inst->getResultElementType()->isOpaquePointerTy()) + PointeeTy = Inst->getResultElementType(); + } else if (auto *Inst = dyn_cast(V)) { + PointeeTy = Inst->getAllocatedType(); + } + for (const auto *User : V->users()) { + Type *NewPointeeTy = nullptr; + if (const auto *Inst = dyn_cast(User)) { + NewPointeeTy = Inst->getType(); + } else if (const auto *Inst = dyn_cast(User)) { + NewPointeeTy = Inst->getValueOperand()->getType(); + } else if (const auto *Inst = dyn_cast(User)) { + NewPointeeTy = Inst->getSourceElementType(); + } + if (NewPointeeTy) { + // HLSL doesn't support pointers, so it is unlikely to get more than one + // or two levels of indirection in the IR. Because of this, recursion is + // pretty safe. + if (NewPointeeTy->isOpaquePointerTy()) + return TypedPointerType::get(classifyPointerType(User), + V->getType()->getPointerAddressSpace()); + if (!PointeeTy) + PointeeTy = NewPointeeTy; + else if (PointeeTy != NewPointeeTy) + PointeeTy = Type::getInt8Ty(V->getContext()); + } + } + // If we were unable to determine the pointee type, set to i8 + if (!PointeeTy) + PointeeTy = Type::getInt8Ty(V->getContext()); + return TypedPointerType::get(PointeeTy, + V->getType()->getPointerAddressSpace()); +} + +// This function constructs a function type accepting typed pointers. It only +// handles function arguments and return types, and assigns the function type to +// the function's value in the type map. +void classifyFunctionType(const Function &F, PointerTypeMap &Map) { + SmallVector NewArgs; + bool HasOpaqueTy = false; + Type *RetTy = F.getReturnType(); + if (RetTy->isOpaquePointerTy()) { + RetTy = nullptr; + for (const auto &B : F) { + for (const auto &I : B) { + if (const auto *RetInst = dyn_cast_or_null(&I)) { + Type *NewRetTy = classifyPointerType(RetInst->getReturnValue()); + if (!RetTy) + RetTy = NewRetTy; + else if (RetTy != NewRetTy) + RetTy = TypedPointerType::get( + Type::getInt8Ty(I.getContext()), + F.getReturnType()->getPointerAddressSpace()); + } + } + } + } + for (auto &A : F.args()) { + Type *ArgTy = A.getType(); + if (ArgTy->isOpaquePointerTy()) { + TypedPointerType *NewTy = classifyPointerType(&A); + Map[&A] = NewTy; + ArgTy = NewTy; + HasOpaqueTy = true; + } + NewArgs.push_back(ArgTy); + } + if (!HasOpaqueTy) + return; + Map[&F] = FunctionType::get(RetTy, NewArgs, false); +} +} // anonymous namespace + +PointerTypeMap PointerTypeAnalysis::run(const Module &M) { + PointerTypeMap Map; + for (auto &G : M.globals()) { + if (G.getType()->isOpaquePointerTy()) + Map[&G] = classifyPointerType(&G); + } + for (auto &F : M) { + classifyFunctionType(F, Map); + + for (const auto &B : F) { + for (const auto &I : B) { + if (I.getType()->isOpaquePointerTy()) + Map[&I] = classifyPointerType(&I); + } + } + } + + return Map; +} diff --git a/llvm/lib/Target/DirectX/PointerTypeAnalysis.h b/llvm/lib/Target/DirectX/PointerTypeAnalysis.h new file mode 100644 index 000000000000..c4164b6bf359 --- /dev/null +++ b/llvm/lib/Target/DirectX/PointerTypeAnalysis.h @@ -0,0 +1,43 @@ +//===- Target/DirectX/PointerTypeAnalysis.h - PointerType analysis --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Analysis pass to assign types to opaque pointers. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_DIRECTX_POINTERTYPEANALYSIS_H +#define LLVM_TARGET_DIRECTX_POINTERTYPEANALYSIS_H + +#include "DXILPointerType.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +namespace dxil { + +// Store the underlying type and the number of pointer indirections +using PointerTypeMap = DenseMap; + +/// An analysis to compute the \c PointerTypes for pointers in a \c Module. +/// Since this analysis is only run during codegen and the new pass manager +/// doesn't support codegen passes, this is wrtten as a function in a namespace. +/// It is very simple to transform it into a proper analysis pass. +/// This code relies on typed pointers existing as LLVM types, but could be +/// migrated to a custom Type if PointerType loses typed support. +namespace PointerTypeAnalysis { + +/// Compute the \c PointerTypeMap for the module \c M. +PointerTypeMap run(const Module &M); +} // namespace PointerTypeAnalysis + +} // namespace dxil + +} // namespace llvm + +#endif // LLVM_TARGET_DIRECTX_POINTERTYPEANALYSIS_H diff --git a/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp b/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp new file mode 100644 index 000000000000..54c577debc34 --- /dev/null +++ b/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp @@ -0,0 +1,30 @@ +//===- DirectXTargetInfo.cpp - DirectX Target Implementation ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains DirectX target initializer. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Triple.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +Target &getTheDirectXTarget() { + static Target TheDirectXTarget; + return TheDirectXTarget; +} +} // namespace llvm + +using namespace llvm; + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTargetInfo() { + RegisterTarget X( + getTheDirectXTarget(), "dxil", "DirectX Intermediate Language", "DXIL"); +} diff --git a/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.h b/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.h new file mode 100644 index 000000000000..a860c430f81a --- /dev/null +++ b/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.h @@ -0,0 +1,18 @@ +//===-- DirectXTargetInfo.h - DircetX Target Implementation -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DIRECTX_TARGETINFO_DIRECTXTARGETINFO_H +#define LLVM_DIRECTX_TARGETINFO_DIRECTXTARGETINFO_H + +namespace llvm { +class Target; + +Target &getTheDirectXTarget(); +} // namespace llvm + +#endif // LLVM_DIRECTX_TARGETINFO_DIRECTXTARGETINFO_H diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 15eba89eeb55..4553f2fd9228 100644 --- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -681,7 +681,7 @@ bool HexagonAsmParser::ParseDirectiveSubsection(SMLoc L) { Subsection = HexagonMCExpr::create( MCConstantExpr::create(8192 + Res, getContext()), getContext()); - getStreamer().SubSection(Subsection); + getStreamer().subSection(Subsection); return false; } @@ -1450,7 +1450,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, MCOperand &MO_0 = Inst.getOperand(0); // push section onto section stack - MES->PushSection(); + MES->pushSection(); std::string myCharStr; MCSectionELF *mySection; @@ -1485,7 +1485,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, } else llvm_unreachable("unexpected type of machine operand!"); - MES->SwitchSection(mySection); + MES->switchSection(mySection); unsigned byteSize = is32bit ? 4 : 8; getStreamer().emitCodeAlignment(byteSize, &getSTI(), byteSize); @@ -1526,7 +1526,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, } else llvm_unreachable("unexpected type of machine operand!"); - MES->PopSection(); + MES->popSection(); if (Sym) { MCInst TmpInst; diff --git a/llvm/lib/Target/Hexagon/BitTracker.cpp b/llvm/lib/Target/Hexagon/BitTracker.cpp index 17adf32750db..4d5789a3c5fe 100644 --- a/llvm/lib/Target/Hexagon/BitTracker.cpp +++ b/llvm/lib/Target/Hexagon/BitTracker.cpp @@ -1056,9 +1056,8 @@ void BT::runEdgeQueue(BitVector &BlockScanned) { CFGEdge Edge = FlowQ.front(); FlowQ.pop(); - if (EdgeExec.count(Edge)) + if (!EdgeExec.insert(Edge).second) return; - EdgeExec.insert(Edge); ReachedBB.insert(Edge.second); const MachineBasicBlock &B = *MF.getBlockNumbered(Edge.second); diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 3c742c98077b..58d5df4c1f71 100644 --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -14,9 +14,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -78,11 +78,12 @@ static uint64_t fullValue(HexagonDisassembler const &Disassembler, MCInst &MI, uint64_t Operand = Upper26 | Lower6; return Operand; } -static HexagonDisassembler const &disassembler(void const *Decoder) { +static HexagonDisassembler const &disassembler(const MCDisassembler *Decoder) { return *static_cast(Decoder); } template -static void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) { +static void signedDecoder(MCInst &MI, unsigned tmp, + const MCDisassembler *Decoder) { HexagonDisassembler const &Disassembler = disassembler(Decoder); int64_t FullValue = fullValue(Disassembler, MI, SignExtend64(tmp)); int64_t Extended = SignExtend64<32>(FullValue); @@ -95,65 +96,66 @@ static void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) { static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGeneralSubRegsRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus +DecodeGeneralSubRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeHvxVRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus +DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeGeneralDoubleLow8RegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst, - unsigned RegNo, + const MCDisassembler *Decoder); +static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeHvxQRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeSysRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus +DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSysRegs64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); - + const MCDisassembler *Decoder); static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t /*Address*/, const void *Decoder); + uint64_t /*Address*/, + const MCDisassembler *Decoder); static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); #include "HexagonDepDecoders.inc" #include "HexagonGenDisassemblerTables.inc" @@ -542,15 +544,15 @@ static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Fail; } -static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { return DecodeIntRegsRegisterClass(Inst, RegNo, Address, Decoder); } static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { static const MCPhysReg IntRegDecoderTable[] = { Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, @@ -563,10 +565,10 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, return DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable); } -static DecodeStatus DecodeGeneralSubRegsRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeGeneralSubRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { static const MCPhysReg GeneralSubRegDecoderTable[] = { Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, Hexagon::R5, Hexagon::R6, Hexagon::R7, @@ -579,7 +581,7 @@ static DecodeStatus DecodeGeneralSubRegsRegisterClass(MCInst &Inst, static DecodeStatus DecodeHvxVRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, - const void *Decoder) { + const MCDisassembler *Decoder) { static const MCPhysReg HvxVRDecoderTable[] = { Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4, Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9, @@ -592,9 +594,10 @@ static DecodeStatus DecodeHvxVRRegisterClass(MCInst &Inst, unsigned RegNo, return DecodeRegisterClass(Inst, RegNo, HvxVRDecoderTable); } -static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const void *Decoder) { +static DecodeStatus +DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const MCDisassembler *Decoder) { static const MCPhysReg DoubleRegDecoderTable[] = { Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3, Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7, @@ -604,8 +607,10 @@ static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, return DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable); } -static DecodeStatus DecodeGeneralDoubleLow8RegsRegisterClass( - MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { +static DecodeStatus +DecodeGeneralDoubleLow8RegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const MCDisassembler *Decoder) { static const MCPhysReg GeneralDoubleLow8RegDecoderTable[] = { Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3, Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11}; @@ -615,7 +620,7 @@ static DecodeStatus DecodeGeneralDoubleLow8RegsRegisterClass( static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, - const void *Decoder) { + const MCDisassembler *Decoder) { static const MCPhysReg HvxWRDecoderTable[] = { Hexagon::W0, Hexagon::WR0, Hexagon::W1, Hexagon::WR1, Hexagon::W2, Hexagon::WR2, Hexagon::W3, Hexagon::WR3, Hexagon::W4, Hexagon::WR4, @@ -629,11 +634,11 @@ static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo, return DecodeRegisterClass(Inst, RegNo, HvxWRDecoderTable); } -LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily. -static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t /*Address*/, - const void *Decoder) { +LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily. + static DecodeStatus + DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const MCDisassembler *Decoder) { static const MCPhysReg HvxVQRDecoderTable[] = { Hexagon::VQ0, Hexagon::VQ1, Hexagon::VQ2, Hexagon::VQ3, Hexagon::VQ4, Hexagon::VQ5, Hexagon::VQ6, Hexagon::VQ7}; @@ -643,7 +648,7 @@ static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst, static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, - const void *Decoder) { + const MCDisassembler *Decoder) { static const MCPhysReg PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1, Hexagon::P2, Hexagon::P3}; @@ -652,7 +657,7 @@ static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeHvxQRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, - const void *Decoder) { + const MCDisassembler *Decoder) { static const MCPhysReg HvxQRDecoderTable[] = {Hexagon::Q0, Hexagon::Q1, Hexagon::Q2, Hexagon::Q3}; @@ -661,7 +666,7 @@ static DecodeStatus DecodeHvxQRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, - const void *Decoder) { + const MCDisassembler *Decoder) { using namespace Hexagon; static const MCPhysReg CtrlRegDecoderTable[] = { @@ -687,9 +692,9 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const void *Decoder) { +static DecodeStatus +DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, + const MCDisassembler *Decoder) { using namespace Hexagon; static const MCPhysReg CtrlReg64DecoderTable[] = { @@ -717,7 +722,7 @@ static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Register = 0; switch (RegNo) { case 0: @@ -735,7 +740,7 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, - const void *Decoder) { + const MCDisassembler *Decoder) { HexagonDisassembler const &Disassembler = disassembler(Decoder); int64_t FullValue = fullValue(Disassembler, MI, tmp); assert(FullValue >= 0 && "Negative in unsigned decoder"); @@ -744,7 +749,8 @@ static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, } static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t /*Address*/, const void *Decoder) { + uint64_t /*Address*/, + const MCDisassembler *Decoder) { HexagonDisassembler const &Disassembler = disassembler(Decoder); unsigned Bits = HexagonMCInstrInfo::getExtentBits(*Disassembler.MCII, MI); tmp = SignExtend64(tmp, Bits); @@ -754,7 +760,7 @@ static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp, // custom decoder for various jump/call immediates static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { HexagonDisassembler const &Disassembler = disassembler(Decoder); unsigned Bits = HexagonMCInstrInfo::getExtentBits(*Disassembler.MCII, MI); // r13_2 is not extendable, so if there are no extent bits, it's r13_2 @@ -762,7 +768,8 @@ static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address, Bits = 15; uint64_t FullValue = fullValue(Disassembler, MI, SignExtend64(tmp, Bits)); uint32_t Extended = FullValue + Address; - if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 4)) + if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 0, + 4)) HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext()); return MCDisassembler::Success; } @@ -799,7 +806,7 @@ static const uint16_t SysRegDecoderTable[] = { static DecodeStatus DecodeSysRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= sizeof(SysRegDecoderTable) / sizeof(SysRegDecoderTable[0])) return MCDisassembler::Fail; @@ -824,9 +831,9 @@ static const uint16_t SysReg64DecoderTable[] = { Hexagon::S73_72, Hexagon::S75_74, Hexagon::S77_76, Hexagon::S79_78, }; -static DecodeStatus DecodeSysRegs64RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const void *Decoder) { +static DecodeStatus +DecodeSysRegs64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, + const MCDisassembler *Decoder) { RegNo = RegNo >> 1; if (RegNo >= sizeof(SysReg64DecoderTable) / sizeof(SysReg64DecoderTable[0])) return MCDisassembler::Fail; @@ -839,9 +846,9 @@ static DecodeStatus DecodeSysRegs64RegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const void *Decoder) { +static DecodeStatus +DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, + const MCDisassembler *Decoder) { using namespace Hexagon; static const MCPhysReg GuestRegDecoderTable[] = { @@ -865,9 +872,10 @@ static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const void *Decoder) { +static DecodeStatus +DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const MCDisassembler *Decoder) { using namespace Hexagon; static const MCPhysReg GuestReg64DecoderTable[] = { diff --git a/llvm/lib/Target/Hexagon/HexagonArch.h b/llvm/lib/Target/Hexagon/HexagonArch.h deleted file mode 100644 index 4a42ec98feb1..000000000000 --- a/llvm/lib/Target/Hexagon/HexagonArch.h +++ /dev/null @@ -1,31 +0,0 @@ -//===- HexagonArch.h ------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONARCH_H -#define LLVM_LIB_TARGET_HEXAGON_HEXAGONARCH_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringRef.h" -#include "HexagonDepArch.h" -#include - -namespace llvm { -namespace Hexagon { - -template -llvm::Optional GetCpu(ArchCont const &ArchList, Val CPUString) { - llvm::Optional Res; - auto Entry = ArchList.find(CPUString); - if (Entry != ArchList.end()) - Res = Entry->second; - return Res; -} -} // namespace Hexagon -} // namespace llvm -#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONARCH_H diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 411078052e0f..48d339234e9e 100644 --- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -202,7 +202,7 @@ static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI, MCSectionELF *Section = OutStreamer.getContext().getELFSection( sectionName, ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); - OutStreamer.SwitchSection(Section); + OutStreamer.switchSection(Section); Sym = AP.OutContext.getOrCreateSymbol(Twine(symbolName)); if (Sym->isUndefined()) { @@ -231,7 +231,7 @@ static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI, MCSectionELF *Section = OutStreamer.getContext().getELFSection( ".lita", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); - OutStreamer.SwitchSection(Section); + OutStreamer.switchSection(Section); Sym = AP.OutContext.getOrCreateSymbol(Twine(LitaName)); if (Sym->isUndefined()) { OutStreamer.emitLabel(Sym); @@ -331,7 +331,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 8, getSubtargetInfo()); - OutStreamer->SwitchSection(Current.first, Current.second); + OutStreamer->switchSection(Current.first, Current.second); MCInst TmpInst; MCOperand &Reg = MappedInst.getOperand(0); TmpInst.setOpcode(Hexagon::L2_loadrdgp); @@ -348,7 +348,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, MCSectionSubPair Current = OutStreamer->getCurrentSection(); MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 4, getSubtargetInfo()); - OutStreamer->SwitchSection(Current.first, Current.second); + OutStreamer->switchSection(Current.first, Current.second); MCInst TmpInst; MCOperand &Reg = MappedInst.getOperand(0); TmpInst.setOpcode(Hexagon::L2_loadrigp); diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index b2a842233bb8..673b397ef3c5 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -62,6 +63,9 @@ static cl::opt MaxBitSplit("hexbit-max-bitsplit", cl::Hidden, cl::init(std::numeric_limits::max())); static unsigned CountBitSplit = 0; +static cl::opt RegisterSetLimit("hexbit-registerset-limit", + cl::Hidden, cl::init(1000)); + namespace llvm { void initializeHexagonBitSimplifyPass(PassRegistry& Registry); @@ -72,23 +76,29 @@ namespace llvm { namespace { // Set of virtual registers, based on BitVector. - struct RegisterSet : private BitVector { + struct RegisterSet { RegisterSet() = default; - explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} + explicit RegisterSet(unsigned s, bool t = false) : Bits(s, t) {} RegisterSet(const RegisterSet &RS) = default; - using BitVector::clear; - using BitVector::count; + void clear() { + Bits.clear(); + LRU.clear(); + } + + unsigned count() const { + return Bits.count(); + } unsigned find_first() const { - int First = BitVector::find_first(); + int First = Bits.find_first(); if (First < 0) return 0; return x2v(First); } unsigned find_next(unsigned Prev) const { - int Next = BitVector::find_next(v2x(Prev)); + int Next = Bits.find_next(v2x(Prev)); if (Next < 0) return 0; return x2v(Next); @@ -97,54 +107,72 @@ namespace { RegisterSet &insert(unsigned R) { unsigned Idx = v2x(R); ensure(Idx); - return static_cast(BitVector::set(Idx)); + bool Exists = Bits.test(Idx); + Bits.set(Idx); + if (!Exists) { + LRU.push_back(Idx); + if (LRU.size() > RegisterSetLimit) { + unsigned T = LRU.front(); + Bits.reset(T); + LRU.pop_front(); + } + } + return *this; } RegisterSet &remove(unsigned R) { unsigned Idx = v2x(R); - if (Idx >= size()) - return *this; - return static_cast(BitVector::reset(Idx)); + if (Idx < Bits.size()) { + bool Exists = Bits.test(Idx); + Bits.reset(Idx); + if (Exists) { + auto F = llvm::find(LRU, Idx); + assert(F != LRU.end()); + LRU.erase(F); + } + } + return *this; } RegisterSet &insert(const RegisterSet &Rs) { - return static_cast(BitVector::operator|=(Rs)); + for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) + insert(R); + return *this; } RegisterSet &remove(const RegisterSet &Rs) { - return static_cast(BitVector::reset(Rs)); + for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) + remove(R); + return *this; } - reference operator[](unsigned R) { - unsigned Idx = v2x(R); - ensure(Idx); - return BitVector::operator[](Idx); - } bool operator[](unsigned R) const { unsigned Idx = v2x(R); - assert(Idx < size()); - return BitVector::operator[](Idx); + return Idx < Bits.size() ? Bits[Idx] : false; } bool has(unsigned R) const { unsigned Idx = v2x(R); - if (Idx >= size()) + if (Idx >= Bits.size()) return false; - return BitVector::test(Idx); + return Bits.test(Idx); } bool empty() const { - return !BitVector::any(); + return !Bits.any(); } bool includes(const RegisterSet &Rs) const { - // A.BitVector::test(B) <=> A-B != {} - return !Rs.BitVector::test(*this); + // A.test(B) <=> A-B != {} + return !Rs.Bits.test(Bits); } bool intersects(const RegisterSet &Rs) const { - return BitVector::anyCommon(Rs); + return Bits.anyCommon(Rs.Bits); } private: + BitVector Bits; + std::deque LRU; + void ensure(unsigned Idx) { - if (size() <= Idx) - resize(std::max(Idx+1, 32U)); + if (Bits.size() <= Idx) + Bits.resize(std::max(Idx+1, 32U)); } static inline unsigned v2x(unsigned v) { @@ -1997,7 +2025,7 @@ bool BitSimplification::genStoreImmediate(MachineInstr *MI) { if (!isInt<8>(V)) return false; - MI->RemoveOperand(2); + MI->removeOperand(2); switch (Opc) { case Hexagon::S2_storerb_io: MI->setDesc(HII.get(Hexagon::S4_storeirb_io)); diff --git a/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp b/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp index faa48211cd82..ca7fddb0ebe5 100644 --- a/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp @@ -33,8 +33,9 @@ using namespace llvm; // Since we have no exact knowledge of code layout, allow some safety buffer // for jump target. This is measured in bytes. -static cl::opt BranchRelaxSafetyBuffer("branch-relax-safety-buffer", - cl::init(200), cl::Hidden, cl::ZeroOrMore, cl::desc("safety buffer size")); +static cl::opt + BranchRelaxSafetyBuffer("branch-relax-safety-buffer", cl::init(200), + cl::Hidden, cl::desc("safety buffer size")); namespace llvm { diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp index fc5e05d8c9a0..2fe2e032714a 100644 --- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -52,13 +52,12 @@ using namespace llvm; static cl::opt OptSpeculate("commgep-speculate", cl::init(true), - cl::Hidden, cl::ZeroOrMore); + cl::Hidden); -static cl::opt OptEnableInv("commgep-inv", cl::init(true), cl::Hidden, - cl::ZeroOrMore); +static cl::opt OptEnableInv("commgep-inv", cl::init(true), cl::Hidden); static cl::opt OptEnableConst("commgep-const", cl::init(true), - cl::Hidden, cl::ZeroOrMore); + cl::Hidden); namespace llvm { diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp index d8af35cbf3a8..56fb50cdb09e 100644 --- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -9,6 +9,7 @@ #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -28,12 +29,13 @@ using namespace llvm; -static cl::opt CountThreshold("hexagon-cext-threshold", - cl::init(3), cl::Hidden, cl::ZeroOrMore, - cl::desc("Minimum number of extenders to trigger replacement")); +static cl::opt CountThreshold( + "hexagon-cext-threshold", cl::init(3), cl::Hidden, + cl::desc("Minimum number of extenders to trigger replacement")); -static cl::opt ReplaceLimit("hexagon-cext-limit", cl::init(0), - cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum number of replacements")); +static cl::opt + ReplaceLimit("hexagon-cext-limit", cl::init(0), cl::Hidden, + cl::desc("Maximum number of replacements")); namespace llvm { void initializeHexagonConstExtendersPass(PassRegistry&); diff --git a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp index 105bf2811a20..8029dcff8052 100644 --- a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -868,8 +868,8 @@ void MachineConstPropagator::removeCFGEdge(MachineBasicBlock *From, int N = PN.getNumOperands() - 2; while (N > 0) { if (PN.getOperand(N + 1).getMBB() == From) { - PN.RemoveOperand(N + 1); - PN.RemoveOperand(N); + PN.removeOperand(N + 1); + PN.removeOperand(N); } N -= 2; } @@ -1217,8 +1217,8 @@ bool MachineConstEvaluator::evaluateCMPii(uint32_t Cmp, const APInt &A1, unsigned W2 = A2.getBitWidth(); unsigned MaxW = (W1 >= W2) ? W1 : W2; if (Cmp & Comparison::U) { - const APInt Zx1 = A1.zextOrSelf(MaxW); - const APInt Zx2 = A2.zextOrSelf(MaxW); + APInt Zx1 = A1.zext(MaxW); + APInt Zx2 = A2.zext(MaxW); if (Cmp & Comparison::L) Result = Zx1.ult(Zx2); else if (Cmp & Comparison::G) @@ -1227,8 +1227,8 @@ bool MachineConstEvaluator::evaluateCMPii(uint32_t Cmp, const APInt &A1, } // Signed comparison. - const APInt Sx1 = A1.sextOrSelf(MaxW); - const APInt Sx2 = A2.sextOrSelf(MaxW); + APInt Sx1 = A1.sext(MaxW); + APInt Sx2 = A2.sext(MaxW); if (Cmp & Comparison::L) Result = Sx1.slt(Sx2); else if (Cmp & Comparison::G) @@ -1813,7 +1813,7 @@ bool MachineConstEvaluator::evaluateSplati(const APInt &A1, unsigned Bits, unsigned Count, APInt &Result) { assert(Count > 0); unsigned BW = A1.getBitWidth(), SW = Count*Bits; - APInt LoBits = (Bits < BW) ? A1.trunc(Bits) : A1.zextOrSelf(Bits); + APInt LoBits = (Bits < BW) ? A1.trunc(Bits) : A1.zext(Bits); if (Count > 1) LoBits = LoBits.zext(SW); @@ -2510,7 +2510,7 @@ APInt HexagonConstEvaluator::getCmpImm(unsigned Opc, unsigned OpX, void HexagonConstEvaluator::replaceWithNop(MachineInstr &MI) { MI.setDesc(HII.get(Hexagon::A2_nop)); while (MI.getNumOperands() > 0) - MI.RemoveOperand(0); + MI.removeOperand(0); } bool HexagonConstEvaluator::evaluateHexRSEQ32(RegisterSubReg RL, RegisterSubReg RH, @@ -2538,9 +2538,9 @@ bool HexagonConstEvaluator::evaluateHexRSEQ32(RegisterSubReg RL, RegisterSubReg } for (unsigned i = 0; i < HiVs.size(); ++i) { - APInt HV = HiVs[i].zextOrSelf(64) << 32; + APInt HV = HiVs[i].zext(64) << 32; for (unsigned j = 0; j < LoVs.size(); ++j) { - APInt LV = LoVs[j].zextOrSelf(64); + APInt LV = LoVs[j].zext(64); const Constant *C = intToConst(HV | LV); Result.add(C); if (Result.isBottom()) @@ -3165,7 +3165,7 @@ bool HexagonConstEvaluator::rewriteHexBranch(MachineInstr &BrI, .addMBB(TargetB); BrI.setDesc(JD); while (BrI.getNumOperands() > 0) - BrI.RemoveOperand(0); + BrI.removeOperand(0); // This ensures that all implicit operands (e.g. implicit-def %r31, etc) // are present in the rewritten branch. for (auto &Op : NI->operands()) diff --git a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 2ee7f1325df9..dc5b674424c8 100644 --- a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -33,16 +33,14 @@ using namespace llvm; #define DEBUG_TYPE "hexagon-copy-combine" -static -cl::opt IsCombinesDisabled("disable-merge-into-combines", - cl::Hidden, cl::ZeroOrMore, - cl::init(false), - cl::desc("Disable merging into combines")); -static -cl::opt IsConst64Disabled("disable-const64", - cl::Hidden, cl::ZeroOrMore, - cl::init(false), - cl::desc("Disable generation of const64")); +static cl::opt + IsCombinesDisabled("disable-merge-into-combines", cl::Hidden, + + cl::desc("Disable merging into combines")); +static cl::opt + IsConst64Disabled("disable-const64", cl::Hidden, + + cl::desc("Disable generation of const64")); static cl::opt MaxNumOfInstsBetweenNewValueStoreAndTFR("max-num-inst-between-tfr-and-nv-store", diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.h b/llvm/lib/Target/Hexagon/HexagonDepArch.h index 56174dc7e136..41ce5c465d41 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepArch.h +++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h @@ -12,82 +12,28 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H #define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/ELF.h" - -#include -#include +#include "llvm/ADT/StringSwitch.h" namespace llvm { namespace Hexagon { enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66, V67, V68, V69 }; -static constexpr unsigned ArchValsNumArray[] = {5, 55, 60, 62, 65, 66, 67, 68, 69}; -static constexpr ArrayRef ArchValsNum(ArchValsNumArray); - -static constexpr StringLiteral ArchValsTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v68", "v69" }; -static constexpr ArrayRef ArchValsText(ArchValsTextArray); - -static constexpr StringLiteral CpuValsTextArray[] = { "hexagonv5", "hexagonv55", "hexagonv60", "hexagonv62", "hexagonv65", "hexagonv66", "hexagonv67", "hexagonv67t", "hexagonv68", "hexagonv69" }; -static constexpr ArrayRef CpuValsText(CpuValsTextArray); - -static constexpr StringLiteral CpuNickTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v67t", "v68", "v69" }; -static constexpr ArrayRef CpuNickText(CpuNickTextArray); - -static const std::map CpuTable{ - {"generic", Hexagon::ArchEnum::V5}, - {"hexagonv5", Hexagon::ArchEnum::V5}, - {"hexagonv55", Hexagon::ArchEnum::V55}, - {"hexagonv60", Hexagon::ArchEnum::V60}, - {"hexagonv62", Hexagon::ArchEnum::V62}, - {"hexagonv65", Hexagon::ArchEnum::V65}, - {"hexagonv66", Hexagon::ArchEnum::V66}, - {"hexagonv67", Hexagon::ArchEnum::V67}, - {"hexagonv67t", Hexagon::ArchEnum::V67}, - {"hexagonv68", Hexagon::ArchEnum::V68}, - {"hexagonv69", Hexagon::ArchEnum::V69}, -}; - -static const std::map ElfFlagsByCpuStr = { - {"generic", llvm::ELF::EF_HEXAGON_MACH_V5}, - {"hexagonv5", llvm::ELF::EF_HEXAGON_MACH_V5}, - {"hexagonv55", llvm::ELF::EF_HEXAGON_MACH_V55}, - {"hexagonv60", llvm::ELF::EF_HEXAGON_MACH_V60}, - {"hexagonv62", llvm::ELF::EF_HEXAGON_MACH_V62}, - {"hexagonv65", llvm::ELF::EF_HEXAGON_MACH_V65}, - {"hexagonv66", llvm::ELF::EF_HEXAGON_MACH_V66}, - {"hexagonv67", llvm::ELF::EF_HEXAGON_MACH_V67}, - {"hexagonv67t", llvm::ELF::EF_HEXAGON_MACH_V67T}, - {"hexagonv68", llvm::ELF::EF_HEXAGON_MACH_V68}, - {"hexagonv69", llvm::ELF::EF_HEXAGON_MACH_V69}, -}; -static const std::map ElfArchByMachFlags = { - {llvm::ELF::EF_HEXAGON_MACH_V5, "V5"}, - {llvm::ELF::EF_HEXAGON_MACH_V55, "V55"}, - {llvm::ELF::EF_HEXAGON_MACH_V60, "V60"}, - {llvm::ELF::EF_HEXAGON_MACH_V62, "V62"}, - {llvm::ELF::EF_HEXAGON_MACH_V65, "V65"}, - {llvm::ELF::EF_HEXAGON_MACH_V66, "V66"}, - {llvm::ELF::EF_HEXAGON_MACH_V67, "V67"}, - {llvm::ELF::EF_HEXAGON_MACH_V67T, "V67T"}, - {llvm::ELF::EF_HEXAGON_MACH_V68, "V68"}, - {llvm::ELF::EF_HEXAGON_MACH_V69, "V69"}, -}; -static const std::map ElfCpuByMachFlags = { - {llvm::ELF::EF_HEXAGON_MACH_V5, "hexagonv5"}, - {llvm::ELF::EF_HEXAGON_MACH_V55, "hexagonv55"}, - {llvm::ELF::EF_HEXAGON_MACH_V60, "hexagonv60"}, - {llvm::ELF::EF_HEXAGON_MACH_V62, "hexagonv62"}, - {llvm::ELF::EF_HEXAGON_MACH_V65, "hexagonv65"}, - {llvm::ELF::EF_HEXAGON_MACH_V66, "hexagonv66"}, - {llvm::ELF::EF_HEXAGON_MACH_V67, "hexagonv67"}, - {llvm::ELF::EF_HEXAGON_MACH_V67T, "hexagonv67t"}, - {llvm::ELF::EF_HEXAGON_MACH_V68, "hexagonv68"}, - {llvm::ELF::EF_HEXAGON_MACH_V69, "hexagonv69"}, -}; - +inline Optional getCpu(StringRef CPU) { + return StringSwitch>(CPU) + .Case("generic", Hexagon::ArchEnum::V5) + .Case("hexagonv5", Hexagon::ArchEnum::V5) + .Case("hexagonv55", Hexagon::ArchEnum::V55) + .Case("hexagonv60", Hexagon::ArchEnum::V60) + .Case("hexagonv62", Hexagon::ArchEnum::V62) + .Case("hexagonv65", Hexagon::ArchEnum::V65) + .Case("hexagonv66", Hexagon::ArchEnum::V66) + .Case("hexagonv67", Hexagon::ArchEnum::V67) + .Case("hexagonv67t", Hexagon::ArchEnum::V67) + .Case("hexagonv68", Hexagon::ArchEnum::V68) + .Case("hexagonv69", Hexagon::ArchEnum::V69) + .Default(None); +} } // namespace Hexagon -} // namespace llvm; +} // namespace llvm #endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H diff --git a/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc b/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc index 7164af3ad5c6..e979cfe6e325 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc +++ b/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc @@ -14,58 +14,58 @@ #pragma clang diagnostic ignored "-Wunused-function" #endif -static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { +static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const MCDisassembler *Decoder) { signedDecoder<6>(MI, tmp, Decoder); return MCDisassembler::Success; } -static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { +static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const MCDisassembler *Decoder) { signedDecoder<12>(MI, tmp, Decoder); return MCDisassembler::Success; } -static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { +static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const MCDisassembler *Decoder) { signedDecoder<13>(MI, tmp, Decoder); return MCDisassembler::Success; } -static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { +static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const MCDisassembler *Decoder) { signedDecoder<14>(MI, tmp, Decoder); return MCDisassembler::Success; } -static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { +static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const MCDisassembler *Decoder) { signedDecoder<3>(MI, tmp, Decoder); return MCDisassembler::Success; } -static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { +static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const MCDisassembler *Decoder) { signedDecoder<4>(MI, tmp, Decoder); return MCDisassembler::Success; } -static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { +static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const MCDisassembler *Decoder) { signedDecoder<5>(MI, tmp, Decoder); return MCDisassembler::Success; } -static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { +static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const MCDisassembler *Decoder) { signedDecoder<6>(MI, tmp, Decoder); return MCDisassembler::Success; } -static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { +static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const MCDisassembler *Decoder) { signedDecoder<7>(MI, tmp, Decoder); return MCDisassembler::Success; } -static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { +static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const MCDisassembler *Decoder) { signedDecoder<9>(MI, tmp, Decoder); return MCDisassembler::Success; } -static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { +static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const MCDisassembler *Decoder) { signedDecoder<8>(MI, tmp, Decoder); return MCDisassembler::Success; } diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index 2207925ceeba..f7227dca3b60 100644 --- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -826,8 +826,8 @@ void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB, FR = RO.getReg(), FSR = RO.getSubReg(); else continue; - PN->RemoveOperand(i+1); - PN->RemoveOperand(i); + PN->removeOperand(i+1); + PN->removeOperand(i); } if (TR == 0) TR = SR, TSR = SSR; diff --git a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp index 2693940bb1e9..853553f57ba4 100644 --- a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -696,7 +696,7 @@ bool HexagonExpandCondsets::split(MachineInstr &MI, MI.setDesc(HII->get(TargetOpcode::COPY)); unsigned S = getRegState(ST); while (MI.getNumOperands() > 1) - MI.RemoveOperand(MI.getNumOperands()-1); + MI.removeOperand(MI.getNumOperands()-1); MachineFunction &MF = *MI.getParent()->getParent(); MachineInstrBuilder(MF, MI).addReg(RT.Reg, S, RT.Sub); return true; diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index 989a98571434..0b4a95bc9ce5 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -152,33 +152,38 @@ using namespace llvm; static cl::opt DisableDeallocRet("disable-hexagon-dealloc-ret", cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target")); -static cl::opt NumberScavengerSlots("number-scavenger-slots", - cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2), - cl::ZeroOrMore); - -static cl::opt SpillFuncThreshold("spill-func-threshold", - cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"), - cl::init(6), cl::ZeroOrMore); - -static cl::opt SpillFuncThresholdOs("spill-func-threshold-Os", - cl::Hidden, cl::desc("Specify Os spill func threshold"), - cl::init(1), cl::ZeroOrMore); - -static cl::opt EnableStackOVFSanitizer("enable-stackovf-sanitizer", - cl::Hidden, cl::desc("Enable runtime checks for stack overflow."), - cl::init(false), cl::ZeroOrMore); - -static cl::opt EnableShrinkWrapping("hexagon-shrink-frame", - cl::init(true), cl::Hidden, cl::ZeroOrMore, - cl::desc("Enable stack frame shrink wrapping")); - -static cl::opt ShrinkLimit("shrink-frame-limit", - cl::init(std::numeric_limits::max()), cl::Hidden, cl::ZeroOrMore, - cl::desc("Max count of stack frame shrink-wraps")); - -static cl::opt EnableSaveRestoreLong("enable-save-restore-long", - cl::Hidden, cl::desc("Enable long calls for save-restore stubs."), - cl::init(false), cl::ZeroOrMore); +static cl::opt + NumberScavengerSlots("number-scavenger-slots", cl::Hidden, + cl::desc("Set the number of scavenger slots"), + cl::init(2)); + +static cl::opt + SpillFuncThreshold("spill-func-threshold", cl::Hidden, + cl::desc("Specify O2(not Os) spill func threshold"), + cl::init(6)); + +static cl::opt + SpillFuncThresholdOs("spill-func-threshold-Os", cl::Hidden, + cl::desc("Specify Os spill func threshold"), + cl::init(1)); + +static cl::opt EnableStackOVFSanitizer( + "enable-stackovf-sanitizer", cl::Hidden, + cl::desc("Enable runtime checks for stack overflow."), cl::init(false)); + +static cl::opt + EnableShrinkWrapping("hexagon-shrink-frame", cl::init(true), cl::Hidden, + cl::desc("Enable stack frame shrink wrapping")); + +static cl::opt + ShrinkLimit("shrink-frame-limit", + cl::init(std::numeric_limits::max()), cl::Hidden, + cl::desc("Max count of stack frame shrink-wraps")); + +static cl::opt + EnableSaveRestoreLong("enable-save-restore-long", cl::Hidden, + cl::desc("Enable long calls for save-restore stubs."), + cl::init(false)); static cl::opt EliminateFramePointer("hexagon-fp-elim", cl::init(true), cl::Hidden, cl::desc("Refrain from using FP whenever possible")); @@ -1018,7 +1023,7 @@ findCFILocation(MachineBasicBlock &B) { void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const { for (auto &B : MF) { auto At = findCFILocation(B); - if (At.hasValue()) + if (At) insertCFIInstructionsAt(B, At.getValue()); } } diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp index 0bb1658e7698..44f21dbacd3c 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -47,34 +47,36 @@ using namespace llvm; -static cl::opt VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), - cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation.")); +static cl::opt + VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden, + cl::desc("Vreg# cutoff for insert generation.")); // The distance cutoff is selected based on the precheckin-perf results: // cutoffs 20, 25, 35, and 40 are worse than 30. -static cl::opt VRegDistCutoff("insert-dist-cutoff", cl::init(30U), - cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert " - "generation.")); +static cl::opt + VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden, + cl::desc("Vreg distance cutoff for insert " + "generation.")); // Limit the container sizes for extreme cases where we run out of memory. -static cl::opt MaxORLSize("insert-max-orl", cl::init(4096), - cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of OrderedRegisterList")); +static cl::opt + MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden, + cl::desc("Maximum size of OrderedRegisterList")); static cl::opt MaxIFMSize("insert-max-ifmap", cl::init(1024), - cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of IFMap")); - -static cl::opt OptTiming("insert-timing", cl::init(false), cl::Hidden, - cl::ZeroOrMore, cl::desc("Enable timing of insert generation")); -static cl::opt OptTimingDetail("insert-timing-detail", cl::init(false), - cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert " - "generation")); - -static cl::opt OptSelectAll0("insert-all0", cl::init(false), cl::Hidden, - cl::ZeroOrMore); -static cl::opt OptSelectHas0("insert-has0", cl::init(false), cl::Hidden, - cl::ZeroOrMore); + cl::Hidden, + cl::desc("Maximum size of IFMap")); + +static cl::opt OptTiming("insert-timing", cl::Hidden, + cl::desc("Enable timing of insert generation")); +static cl::opt + OptTimingDetail("insert-timing-detail", cl::Hidden, + cl::desc("Enable detailed timing of insert " + "generation")); + +static cl::opt OptSelectAll0("insert-all0", cl::init(false), cl::Hidden); +static cl::opt OptSelectHas0("insert-has0", cl::init(false), cl::Hidden); // Whether to construct constant values via "insert". Could eliminate constant // extenders, but often not practical. -static cl::opt OptConst("insert-const", cl::init(false), cl::Hidden, - cl::ZeroOrMore); +static cl::opt OptConst("insert-const", cl::init(false), cl::Hidden); // The preprocessor gets confused when the DEBUG macro is passed larger // chunks of code. Use this function to detect debugging. @@ -92,11 +94,8 @@ namespace { struct RegisterSet : private BitVector { RegisterSet() = default; explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} - RegisterSet(const RegisterSet &RS) : BitVector(RS) {} - RegisterSet &operator=(const RegisterSet &RS) { - BitVector::operator=(RS); - return *this; - } + RegisterSet(const RegisterSet &RS) = default; + RegisterSet &operator=(const RegisterSet &RS) = default; using BitVector::clear; diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 43afae441457..acc0bb8941c1 100644 --- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -81,9 +81,9 @@ static cl::opt HWCreatePreheader("hexagon-hwloop-preheader", // Turn it off by default. If a preheader block is not created here, the // software pipeliner may be unable to find a block suitable to serve as // a preheader. In that case SWP will not run. -static cl::opt SpecPreheader("hwloop-spec-preheader", cl::init(false), - cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader " - "instructions")); +static cl::opt SpecPreheader("hwloop-spec-preheader", cl::Hidden, + cl::desc("Allow speculation of preheader " + "instructions")); STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); @@ -1911,8 +1911,8 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); if (PredB != Latch) { - PN->RemoveOperand(i+1); - PN->RemoveOperand(i); + PN->removeOperand(i+1); + PN->removeOperand(i); } } PN->addOperand(MachineOperand::CreateReg(NewPR, false)); diff --git a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp index e2215c9900d0..577eccd25c19 100644 --- a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp @@ -106,7 +106,7 @@ bool HexagonHazardRecognizer::isNewStore(MachineInstr &MI) { if (!TII->mayBeNewStore(MI)) return false; MachineOperand &MO = MI.getOperand(MI.getNumOperands() - 1); - return (MO.isReg() && RegDefs.count(MO.getReg()) != 0); + return MO.isReg() && RegDefs.contains(MO.getReg()); } void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) { diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 161768b8dc22..b4979c953516 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -1345,7 +1345,8 @@ inline bool HexagonDAGToDAGISel::SelectAnyInt(SDValue &N, SDValue &R) { EVT T = N.getValueType(); if (!T.isInteger() || T.getSizeInBits() != 32 || !isa(N)) return false; - R = N; + int32_t V = cast(N)->getZExtValue(); + R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType()); return true; } @@ -1540,7 +1541,7 @@ bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits, break; case ISD::AND: { // Check if this is an AND with NumBits of lower bits set to 1. - uint64_t Mask = (1 << NumBits) - 1; + uint64_t Mask = (1ULL << NumBits) - 1; if (ConstantSDNode *C = dyn_cast(Val.getOperand(0))) { if (C->getZExtValue() == Mask) { Src = Val.getOperand(1); @@ -1558,7 +1559,7 @@ bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits, case ISD::OR: case ISD::XOR: { // OR/XOR with the lower NumBits bits set to 0. - uint64_t Mask = (1 << NumBits) - 1; + uint64_t Mask = (1ULL << NumBits) - 1; if (ConstantSDNode *C = dyn_cast(Val.getOperand(0))) { if ((C->getZExtValue() & Mask) == 0) { Src = Val.getOperand(1); @@ -1580,7 +1581,7 @@ bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits, } bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const { - return N->getAlignment() >= N->getMemoryVT().getStoreSize(); + return N->getAlign().value() >= N->getMemoryVT().getStoreSize(); } bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const { @@ -1655,7 +1656,7 @@ struct WeightedLeaf { int Weight; int InsertionOrder; - WeightedLeaf() : Value(SDValue()) { } + WeightedLeaf() {} WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) : Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) { diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index 0a6dd727eb82..0848d30e7403 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -801,7 +801,7 @@ static const HexagonTargetLowering &getHexagonLowering(SelectionDAG &G) { return static_cast(G.getTargetLoweringInfo()); } static const HexagonSubtarget &getHexagonSubtarget(SelectionDAG &G) { - return static_cast(G.getSubtarget()); + return G.getSubtarget(); } namespace llvm { diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index d7ca934a23e6..94411b2e4f98 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -72,41 +72,41 @@ static cl::opt EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden, cl::desc("Control jump table emission on Hexagon target")); -static cl::opt EnableHexSDNodeSched("enable-hexagon-sdnode-sched", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Enable Hexagon SDNode scheduling")); +static cl::opt + EnableHexSDNodeSched("enable-hexagon-sdnode-sched", cl::Hidden, + cl::desc("Enable Hexagon SDNode scheduling")); -static cl::opt EnableFastMath("ffast-math", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Enable Fast Math processing")); +static cl::opt EnableFastMath("ffast-math", cl::Hidden, + cl::desc("Enable Fast Math processing")); -static cl::opt MinimumJumpTables("minimum-jump-tables", - cl::Hidden, cl::ZeroOrMore, cl::init(5), - cl::desc("Set minimum jump tables")); +static cl::opt MinimumJumpTables("minimum-jump-tables", cl::Hidden, + cl::init(5), + cl::desc("Set minimum jump tables")); -static cl::opt MaxStoresPerMemcpyCL("max-store-memcpy", - cl::Hidden, cl::ZeroOrMore, cl::init(6), - cl::desc("Max #stores to inline memcpy")); +static cl::opt + MaxStoresPerMemcpyCL("max-store-memcpy", cl::Hidden, cl::init(6), + cl::desc("Max #stores to inline memcpy")); -static cl::opt MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os", - cl::Hidden, cl::ZeroOrMore, cl::init(4), - cl::desc("Max #stores to inline memcpy")); +static cl::opt + MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os", cl::Hidden, cl::init(4), + cl::desc("Max #stores to inline memcpy")); -static cl::opt MaxStoresPerMemmoveCL("max-store-memmove", - cl::Hidden, cl::ZeroOrMore, cl::init(6), - cl::desc("Max #stores to inline memmove")); +static cl::opt + MaxStoresPerMemmoveCL("max-store-memmove", cl::Hidden, cl::init(6), + cl::desc("Max #stores to inline memmove")); -static cl::opt MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os", - cl::Hidden, cl::ZeroOrMore, cl::init(4), - cl::desc("Max #stores to inline memmove")); +static cl::opt + MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os", cl::Hidden, + cl::init(4), + cl::desc("Max #stores to inline memmove")); -static cl::opt MaxStoresPerMemsetCL("max-store-memset", - cl::Hidden, cl::ZeroOrMore, cl::init(8), - cl::desc("Max #stores to inline memset")); +static cl::opt + MaxStoresPerMemsetCL("max-store-memset", cl::Hidden, cl::init(8), + cl::desc("Max #stores to inline memset")); -static cl::opt MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", - cl::Hidden, cl::ZeroOrMore, cl::init(4), - cl::desc("Max #stores to inline memset")); +static cl::opt + MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", cl::Hidden, cl::init(4), + cl::desc("Max #stores to inline memset")); static cl::opt AlignLoads("hexagon-align-loads", cl::Hidden, cl::init(false), @@ -1396,10 +1396,9 @@ HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag); InFlag = Chain.getValue(1); - unsigned Flags = - static_cast(DAG.getSubtarget()).useLongCalls() - ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended - : HexagonII::MO_GDPLT; + unsigned Flags = DAG.getSubtarget().useLongCalls() + ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended + : HexagonII::MO_GDPLT; return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT, Hexagon::R0, Flags); @@ -2164,6 +2163,11 @@ HexagonTargetLowering::getPreferredVectorAction(MVT VT) const { // Always widen (remaining) vectors of i1. if (ElemTy == MVT::i1) return TargetLoweringBase::TypeWidenVector; + // Widen non-power-of-2 vectors. Such types cannot be split right now, + // and computeRegisterProperties will override "split" with "widen", + // which can cause other issues. + if (!isPowerOf2_32(VecLen)) + return TargetLoweringBase::TypeWidenVector; return TargetLoweringBase::TypeSplitVector; } @@ -2423,16 +2427,25 @@ HexagonTargetLowering::buildVector32(ArrayRef Elem, const SDLoc &dl, llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); })) return getZero(dl, VecTy, DAG); - if (ElemTy == MVT::i16) { + if (ElemTy == MVT::i16 || ElemTy == MVT::f16) { assert(Elem.size() == 2); if (AllConst) { + // The 'Consts' array will have all values as integers regardless + // of the vector element type. uint32_t V = (Consts[0]->getZExtValue() & 0xFFFF) | Consts[1]->getZExtValue() << 16; - return DAG.getBitcast(MVT::v2i16, DAG.getConstant(V, dl, MVT::i32)); + return DAG.getBitcast(VecTy, DAG.getConstant(V, dl, MVT::i32)); + } + SDValue E0, E1; + if (ElemTy == MVT::f16) { + E0 = DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Elem[0]), dl, MVT::i32); + E1 = DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Elem[1]), dl, MVT::i32); + } else { + E0 = Elem[0]; + E1 = Elem[1]; } - SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, - {Elem[1], Elem[0]}, DAG); - return DAG.getBitcast(MVT::v2i16, N); + SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {E1, E0}, DAG); + return DAG.getBitcast(VecTy, N); } if (ElemTy == MVT::i8) { @@ -2506,7 +2519,7 @@ HexagonTargetLowering::buildVector64(ArrayRef Elem, const SDLoc &dl, return getZero(dl, VecTy, DAG); // First try splat if possible. - if (ElemTy == MVT::i16) { + if (ElemTy == MVT::i16 || ElemTy == MVT::f16) { bool IsSplat = true; for (unsigned i = First+1; i != Num; ++i) { if (Elem[i] == Elem[First] || isUndef(Elem[i])) @@ -2516,7 +2529,9 @@ HexagonTargetLowering::buildVector64(ArrayRef Elem, const SDLoc &dl, } if (IsSplat) { // Legalize the operand of SPLAT_VECTOR - SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32); + SDValue S = ElemTy == MVT::f16 ? DAG.getBitcast(MVT::i16, Elem[First]) + : Elem[First]; + SDValue Ext = DAG.getZExtOrTrunc(S, dl, MVT::i32); return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext); } } @@ -2525,8 +2540,7 @@ HexagonTargetLowering::buildVector64(ArrayRef Elem, const SDLoc &dl, if (AllConst) { uint64_t Val = 0; unsigned W = ElemTy.getSizeInBits(); - uint64_t Mask = (ElemTy == MVT::i8) ? 0xFFull - : (ElemTy == MVT::i16) ? 0xFFFFull : 0xFFFFFFFFull; + uint64_t Mask = (1ull << W) - 1; for (unsigned i = 0; i != Num; ++i) Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask); SDValue V0 = DAG.getConstant(Val, dl, MVT::i64); @@ -3656,9 +3670,12 @@ HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { : AtomicExpansionKind::None; } -bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { +TargetLowering::AtomicExpansionKind +HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // Do not expand loads and stores that don't exceed 64 bits. - return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64; + return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64 + ? AtomicExpansionKind::Expand + : AtomicExpansionKind::None; } TargetLowering::AtomicExpansionKind diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index f9ce7a9407aa..9561dfe8a35d 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -328,7 +328,7 @@ public: Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; - bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override; AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 0ba75a544c04..da6ad3ca2c93 100755 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -24,7 +24,6 @@ static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 }; - void HexagonTargetLowering::initializeHVXLowering() { if (Subtarget.useHVX64BOps()) { @@ -79,80 +78,85 @@ HexagonTargetLowering::initializeHVXLowering() { // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32). // Note: v16i1 -> i16 is handled in type legalization instead of op // legalization. - setOperationAction(ISD::BITCAST, MVT::i16, Custom); - setOperationAction(ISD::BITCAST, MVT::i32, Custom); - setOperationAction(ISD::BITCAST, MVT::i64, Custom); + setOperationAction(ISD::BITCAST, MVT::i16, Custom); + setOperationAction(ISD::BITCAST, MVT::i32, Custom); + setOperationAction(ISD::BITCAST, MVT::i64, Custom); setOperationAction(ISD::BITCAST, MVT::v16i1, Custom); - setOperationAction(ISD::BITCAST, MVT::v128i1, Custom); - setOperationAction(ISD::BITCAST, MVT::i128, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); - setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); + setOperationAction(ISD::BITCAST, MVT::v128i1, Custom); + setOperationAction(ISD::BITCAST, MVT::i128, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { - setOperationAction(ISD::FMINNUM, MVT::v64f16, Legal); - setOperationAction(ISD::FMAXNUM, MVT::v64f16, Legal); - setOperationAction(ISD::FADD, MVT::v64f16, Legal); - setOperationAction(ISD::FSUB, MVT::v64f16, Legal); - setOperationAction(ISD::FMUL, MVT::v64f16, Legal); - setOperationAction(ISD::FADD, MVT::v32f32, Legal); - setOperationAction(ISD::FSUB, MVT::v32f32, Legal); - setOperationAction(ISD::FMUL, MVT::v32f32, Legal); - setOperationAction(ISD::FMINNUM, MVT::v32f32, Legal); - setOperationAction(ISD::FMAXNUM, MVT::v32f32, Legal); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64f16, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64f16, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom); - - // Handle ISD::BUILD_VECTOR for v32f32 in a custom way to generate vsplat - setOperationAction(ISD::BUILD_VECTOR, MVT::v32f32, Custom); + + static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 }; + static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 }; + + for (MVT T : FloatV) { + setOperationAction(ISD::FADD, T, Legal); + setOperationAction(ISD::FSUB, T, Legal); + setOperationAction(ISD::FMUL, T, Legal); + setOperationAction(ISD::FMINNUM, T, Legal); + setOperationAction(ISD::FMAXNUM, T, Legal); + + setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom); + + setOperationAction(ISD::SPLAT_VECTOR, T, Legal); + setOperationAction(ISD::SPLAT_VECTOR, T, Legal); + + setOperationAction(ISD::MLOAD, T, Custom); + setOperationAction(ISD::MSTORE, T, Custom); + // Custom-lower BUILD_VECTOR. The standard (target-independent) + // handling of it would convert it to a load, which is not always + // the optimal choice. + setOperationAction(ISD::BUILD_VECTOR, T, Custom); + } + // BUILD_VECTOR with f16 operands cannot be promoted without // promoting the result, so lower the node to vsplat or constant pool - setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16, Custom); - setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom); - setOperationAction(ISD::SPLAT_VECTOR, MVT::v64f16, Legal); - setOperationAction(ISD::SPLAT_VECTOR, MVT::v32f32, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16, Custom); + setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom); + // Vector shuffle is always promoted to ByteV and a bitcast to f16 is // generated. - setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV); - setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW); - setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV); - - // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- - // independent) handling of it would convert it to a load, which is - // not always the optimal choice. - setOperationAction(ISD::BUILD_VECTOR, MVT::v64f32, Custom); - // Make concat-vectors custom to handle concats of more than 2 vectors. - setOperationAction(ISD::CONCAT_VECTORS, MVT::v128f16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64f32, Custom); - - setOperationAction(ISD::LOAD, MVT::v64f32, Custom); - setOperationAction(ISD::STORE, MVT::v64f32, Custom); - setOperationAction(ISD::FADD, MVT::v64f32, Custom); - setOperationAction(ISD::FSUB, MVT::v64f32, Custom); - setOperationAction(ISD::FMUL, MVT::v64f32, Custom); - setOperationAction(ISD::FMINNUM, MVT::v64f32, Custom); - setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom); - setOperationAction(ISD::VSELECT, MVT::v64f32, Custom); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV); + + for (MVT P : FloatW) { + setOperationAction(ISD::LOAD, P, Custom); + setOperationAction(ISD::STORE, P, Custom); + setOperationAction(ISD::FADD, P, Custom); + setOperationAction(ISD::FSUB, P, Custom); + setOperationAction(ISD::FMUL, P, Custom); + setOperationAction(ISD::FMINNUM, P, Custom); + setOperationAction(ISD::FMAXNUM, P, Custom); + setOperationAction(ISD::VSELECT, P, Custom); + + // Custom-lower BUILD_VECTOR. The standard (target-independent) + // handling of it would convert it to a load, which is not always + // the optimal choice. + setOperationAction(ISD::BUILD_VECTOR, P, Custom); + // Make concat-vectors custom to handle concats of more than 2 vectors. + setOperationAction(ISD::CONCAT_VECTORS, P, Custom); + + setOperationAction(ISD::MLOAD, P, Custom); + setOperationAction(ISD::MSTORE, P, Custom); + } if (Subtarget.useHVXQFloatOps()) { setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom); - setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal); + setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal); } else if (Subtarget.useHVXIEEEFPOps()) { setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal); - setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal); + setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal); } - - setOperationAction(ISD::MLOAD, MVT::v32f32, Custom); - setOperationAction(ISD::MSTORE, MVT::v32f32, Custom); - setOperationAction(ISD::MLOAD, MVT::v64f16, Custom); - setOperationAction(ISD::MSTORE, MVT::v64f16, Custom); - setOperationAction(ISD::MLOAD, MVT::v64f32, Custom); - setOperationAction(ISD::MSTORE, MVT::v64f32, Custom); } for (MVT T : LegalV) { @@ -382,8 +386,7 @@ HexagonTargetLowering::initializeHVXLowering() { } } - setTargetDAGCombine(ISD::SPLAT_VECTOR); - setTargetDAGCombine(ISD::VSELECT); + setTargetDAGCombine({ISD::SPLAT_VECTOR, ISD::VSELECT}); } unsigned @@ -780,7 +783,6 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef Values, SDValue N = HalfV0; SDValue M = HalfV1; for (unsigned i = 0; i != NumWords/2; ++i) { - // Rotate by element count since last insertion. if (Words[i] != Words[n] || VecHist[n] <= 1) { Sn = DAG.getConstant(Rn, dl, MVT::i32); @@ -1411,6 +1413,17 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) for (unsigned i = 0; i != Size; ++i) Ops.push_back(Op.getOperand(i)); + // First, split the BUILD_VECTOR for vector pairs. We could generate + // some pairs directly (via splat), but splats should be generated + // by the combiner prior to getting here. + if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { + ArrayRef A(Ops); + MVT SingleTy = typeSplit(VecTy).first; + SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG); + SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1); + } + if (VecTy.getVectorElementType() == MVT::i1) return buildHvxVectorPred(Ops, dl, VecTy, DAG); @@ -1427,14 +1440,6 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0); } - if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { - ArrayRef A(Ops); - MVT SingleTy = typeSplit(VecTy).first; - SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG); - SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG); - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1); - } - return buildHvxVectorReg(Ops, dl, VecTy, DAG); } diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 9b4e92a16663..c8e6276aa4de 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -77,9 +77,9 @@ cl::opt ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden, static cl::opt EnableBranchPrediction("hexagon-enable-branch-prediction", cl::Hidden, cl::init(true), cl::desc("Enable branch prediction")); -static cl::opt DisableNVSchedule("disable-hexagon-nv-schedule", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable schedule adjustment for new value stores.")); +static cl::opt DisableNVSchedule( + "disable-hexagon-nv-schedule", cl::Hidden, + cl::desc("Disable schedule adjustment for new value stores.")); static cl::opt EnableTimingClassLatency( "enable-timing-class-latency", cl::Hidden, cl::init(false), @@ -94,11 +94,12 @@ static cl::opt EnableACCForwarding( cl::desc("Enable vec acc forwarding")); static cl::opt BranchRelaxAsmLarge("branch-relax-asm-large", - cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("branch relax asm")); + cl::init(true), cl::Hidden, + cl::desc("branch relax asm")); -static cl::opt UseDFAHazardRec("dfa-hazard-rec", - cl::init(true), cl::Hidden, cl::ZeroOrMore, - cl::desc("Use the DFA based hazard recognizer.")); +static cl::opt + UseDFAHazardRec("dfa-hazard-rec", cl::init(true), cl::Hidden, + cl::desc("Use the DFA based hazard recognizer.")); /// Constants for Hexagon instructions. const int Hexagon_MEMW_OFFSET_MAX = 4095; @@ -158,7 +159,7 @@ bool HexagonInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { auto Op = MI.getOperand(1); // If the instruction has a global address as operand, it is not cheap // since the operand will be constant extended. - if (Op.getType() == MachineOperand::MO_GlobalAddress) + if (Op.isGlobal()) return false; // If the instruction has an operand of size > 16bits, its will be // const-extended and hence, it is not cheap. @@ -1072,6 +1073,43 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { }; switch (Opc) { + case Hexagon::PS_call_instrprof_custom: { + auto Op0 = MI.getOperand(0); + assert(Op0.isGlobal() && + "First operand must be a global containing handler name."); + const GlobalValue *NameVar = Op0.getGlobal(); + const GlobalVariable *GV = dyn_cast(NameVar); + auto *Arr = cast(GV->getInitializer()); + StringRef NameStr = Arr->isCString() ? Arr->getAsCString() : Arr->getAsString(); + + MachineOperand &Op1 = MI.getOperand(1); + // Set R0 with the imm value to be passed to the custom profiling handler. + BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrsi), Hexagon::R0) + .addImm(Op1.getImm()); + // The call to the custom handler is being treated as a special one as the + // callee is responsible for saving and restoring all the registers + // (including caller saved registers) it needs to modify. This is + // done to reduce the impact of instrumentation on the code being + // instrumented/profiled. + // NOTE: R14, R15 and R28 are reserved for PLT handling. These registers + // are in the Def list of the Hexagon::PS_call_instrprof_custom and + // therefore will be handled appropriately duing register allocation. + + // TODO: It may be a good idea to add a separate pseudo instruction for + // static relocation which doesn't need to reserve r14, r15 and r28. + + auto MIB = BuildMI(MBB, MI, DL, get(Hexagon::J2_call)) + .addUse(Hexagon::R0, RegState::Implicit|RegState::InternalRead) + .addDef(Hexagon::R29, RegState::ImplicitDefine) + .addDef(Hexagon::R30, RegState::ImplicitDefine) + .addDef(Hexagon::R14, RegState::ImplicitDefine) + .addDef(Hexagon::R15, RegState::ImplicitDefine) + .addDef(Hexagon::R28, RegState::ImplicitDefine); + const char *cstr = MF.createExternalSymbolName(NameStr); + MIB.addExternalSymbol(cstr); + MBB.erase(MI); + return true; + } case TargetOpcode::COPY: { MachineOperand &MD = MI.getOperand(0); MachineOperand &MS = MI.getOperand(1); @@ -1392,8 +1430,8 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { // Generate a misaligned load that is guaranteed to cause a crash. class CrashPseudoSourceValue : public PseudoSourceValue { public: - CrashPseudoSourceValue(const TargetInstrInfo &TII) - : PseudoSourceValue(TargetCustom, TII) {} + CrashPseudoSourceValue(const TargetMachine &TM) + : PseudoSourceValue(TargetCustom, TM) {} bool isConstant(const MachineFrameInfo *) const override { return false; @@ -1409,7 +1447,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } }; - static const CrashPseudoSourceValue CrashPSV(*this); + static const CrashPseudoSourceValue CrashPSV(MF.getTarget()); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo(&CrashPSV), MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 8, @@ -1662,7 +1700,7 @@ bool HexagonInstrInfo::PredicateInstruction( MI.setDesc(get(PredOpc)); while (unsigned n = MI.getNumOperands()) - MI.RemoveOperand(n-1); + MI.removeOperand(n-1); for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i) MI.addOperand(T->getOperand(i)); @@ -4464,6 +4502,9 @@ unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr &MI) const { unsigned Size = getMemAccessSizeInBytes(MemAccessSize(S)); if (Size != 0) return Size; + // Y2_dcfetchbo is special + if (MI.getOpcode() == Hexagon::Y2_dcfetchbo) + return HexagonII::DoubleWordAccess; // Handle vector access sizes. const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); diff --git a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index ccaf1aac1ce0..2d49fa369642 100644 --- a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -192,10 +192,8 @@ private: void push_back(Value *V) { // Do not push back duplicates. - if (!S.count(V)) { + if (S.insert(V).second) Q.push_back(V); - S.insert(V); - } } Value *pop_front_val() { @@ -1152,9 +1150,8 @@ bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In, if (IsPhi && HadPhi) return false; HadPhi |= IsPhi; - if (Cycle.count(I)) + if (!Cycle.insert(I)) return false; - Cycle.insert(I); if (findCycle(I, In, Cycle)) break; Cycle.remove(I); @@ -1487,7 +1484,7 @@ bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB, void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) { for (auto &I : *LoopB) - if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT})) + if (Value *SV = simplifyInstruction(&I, {DL, &TLI, &DT})) I.replaceAllUsesWith(SV); for (Instruction &I : llvm::make_early_inc_range(*LoopB)) @@ -2169,7 +2166,7 @@ CleanupAndExit: SCEV::FlagNUW); Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt); if (Instruction *In = dyn_cast(NumBytes)) - if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT})) + if (Value *Simp = simplifyInstruction(In, {*DL, TLI, DT})) NumBytes = Simp; CallInst *NewCall; @@ -2279,7 +2276,7 @@ CleanupAndExit: Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty, MemmoveB->getTerminator()); if (Instruction *In = dyn_cast(NumWords)) - if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT})) + if (Value *Simp = simplifyInstruction(In, {*DL, TLI, DT})) NumWords = Simp; Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy) diff --git a/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp b/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp index aabae009d7c3..539db8f55005 100644 --- a/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp @@ -13,3 +13,9 @@ using namespace llvm; // pin vtable to this file void HexagonMachineFunctionInfo::anchor() {} +MachineFunctionInfo *HexagonMachineFunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const { + return DestMF.cloneInfo(*this); +} diff --git a/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h index 89ef5c2a891d..a02de24b176a 100644 --- a/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -42,6 +42,10 @@ public: HexagonMachineFunctionInfo() = default; HexagonMachineFunctionInfo(MachineFunction &MF) {} + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } diff --git a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp index 8edcb745d654..f539717e42d5 100644 --- a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -61,8 +61,7 @@ static cl::opt DbgNVJCount("nvj-count", cl::init(-1), cl::Hidden, "New Value Jump")); static cl::opt DisableNewValueJumps("disable-nvjump", cl::Hidden, - cl::ZeroOrMore, cl::init(false), - cl::desc("Disable New Value Jumps")); + cl::desc("Disable New Value Jumps")); namespace llvm { diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 3abbd896c519..80fbf33d83b7 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -3273,3 +3273,9 @@ let AddedComplexity = 100 in { def: Pat<(i1 (seteq (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)), (C2_not (S4_stored_locked I32:$Rs, I64:$Rt))>; } + +def: Pat<(int_hexagon_instrprof_custom (HexagonAtPcrel tglobaladdr:$addr), u32_0ImmPred:$I), + (PS_call_instrprof_custom tglobaladdr:$addr, imm:$I)>; + +def: Pat<(int_hexagon_instrprof_custom (HexagonCONST32 tglobaladdr:$addr), u32_0ImmPred:$I), + (PS_call_instrprof_custom tglobaladdr:$addr, imm:$I)>; diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 0a3dff057ccd..6fb1313667a9 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -37,7 +37,7 @@ def SDTHexagonVINSERTW0: SDTypeProfile<1, 2, def HexagonVINSERTW0: SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>; def HwLen2: SDNodeXForm(CurDAG->getSubtarget()); + const auto &ST = CurDAG->getSubtarget(); return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32); }]>; @@ -92,19 +92,19 @@ def IsVecOff : PatLeaf<(i32 imm), [{ def alignedload: PatFrag<(ops node:$a), (load $a), [{ - return isAlignedMemNode(dyn_cast(N)); + return isAlignedMemNode(cast(N)); }]>; def unalignedload: PatFrag<(ops node:$a), (load $a), [{ - return !isAlignedMemNode(dyn_cast(N)); + return !isAlignedMemNode(cast(N)); }]>; def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ - return isAlignedMemNode(dyn_cast(N)); + return isAlignedMemNode(cast(N)); }]>; def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ - return !isAlignedMemNode(dyn_cast(N)); + return !isAlignedMemNode(cast(N)); }]>; @@ -738,9 +738,14 @@ let Predicates = [UseHVX] in { def V2Q: OutPatFrag<(ops node:$Vs), (V6_vandvrt $Vs, (A2_tfrsi -1))>; -let Predicates = [UseHVX] in - def: Pat<(select I1:$Pu, VecI1:$Qs, VecI1:$Qt), +let Predicates = [UseHVX] in { + def: Pat<(select I1:$Pu, VecQ8:$Qs, VecQ8:$Qt), + (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>; + def: Pat<(select I1:$Pu, VecQ16:$Qs, VecQ16:$Qt), (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>; + def: Pat<(select I1:$Pu, VecQ32:$Qs, VecQ32:$Qt), + (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>; +} let Predicates = [UseHVX] in { def: Pat<(VecQ8 (qtrue)), (PS_qtrue)>; diff --git a/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/llvm/lib/Target/Hexagon/HexagonPeephole.cpp index 1ff248200572..ccd90f814813 100644 --- a/llvm/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/llvm/lib/Target/Hexagon/HexagonPeephole.cpp @@ -56,21 +56,21 @@ using namespace llvm; #define DEBUG_TYPE "hexagon-peephole" -static cl::opt DisableHexagonPeephole("disable-hexagon-peephole", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Peephole Optimization")); +static cl::opt + DisableHexagonPeephole("disable-hexagon-peephole", cl::Hidden, + cl::desc("Disable Peephole Optimization")); -static cl::opt DisablePNotP("disable-hexagon-pnotp", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Optimization of PNotP")); +static cl::opt DisablePNotP("disable-hexagon-pnotp", cl::Hidden, + cl::desc("Disable Optimization of PNotP")); -static cl::opt DisableOptSZExt("disable-hexagon-optszext", - cl::Hidden, cl::ZeroOrMore, cl::init(true), - cl::desc("Disable Optimization of Sign/Zero Extends")); +static cl::opt + DisableOptSZExt("disable-hexagon-optszext", cl::Hidden, cl::init(true), + cl::desc("Disable Optimization of Sign/Zero Extends")); -static cl::opt DisableOptExtTo64("disable-hexagon-opt-ext-to-64", - cl::Hidden, cl::ZeroOrMore, cl::init(true), - cl::desc("Disable Optimization of extensions to i64.")); +static cl::opt + DisableOptExtTo64("disable-hexagon-opt-ext-to-64", cl::Hidden, + cl::init(true), + cl::desc("Disable Optimization of extensions to i64.")); namespace llvm { FunctionPass *createHexagonPeephole(); @@ -208,14 +208,14 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // Try to find in the map. if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) { // Change the 1st operand. - MI.RemoveOperand(1); + MI.removeOperand(1); MI.addOperand(MachineOperand::CreateReg(PeepholeSrc, false)); } else { DenseMap >::iterator DI = PeepholeDoubleRegsMap.find(SrcReg); if (DI != PeepholeDoubleRegsMap.end()) { std::pair PeepholeSrc = DI->second; - MI.RemoveOperand(1); + MI.removeOperand(1); MI.addOperand(MachineOperand::CreateReg( PeepholeSrc.first, false /*isDef*/, false /*isImp*/, false /*isKill*/, false /*isDead*/, false /*isUndef*/, diff --git a/llvm/lib/Target/Hexagon/HexagonPseudo.td b/llvm/lib/Target/Hexagon/HexagonPseudo.td index afd63d6d4aa7..7c45568f7734 100644 --- a/llvm/lib/Target/Hexagon/HexagonPseudo.td +++ b/llvm/lib/Target/Hexagon/HexagonPseudo.td @@ -182,6 +182,28 @@ let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = [PC, R31, R6, R7, P0] in def PS_call_stk : T_Call<"">; +// This pseudo instruction is used to replace int_hexagon_instrprof_custom intrinsic +// with a call to custom handler passed as the first argument to the intrinsic. + +// Pleae Note: +// 1) The call to the custom handler is being treated as a special one as the +// callee is responsible for saving and restoring all the registers it needs +// to modify. This includes caller saved registers as well as r0-r5 argument +// registers. This is done to reduce the impact of instrumentation on the +// code being instrumented/profiled. +// 2) R14, R15 and R28 are reserved for PLT handling and therefore are +// part of the def list. +// 3) R0 is used to pass the unique id associated with an instrumentation site +// to the handler. +// 4) All the other registers (R29, R30, R31, PC) get modified by the call +// instruction. + +// TODO: It may be a good idea to add a separate pseudo instruction for +// static relocation which doesn't need to reserve r14, r15 and r28. + +let hasSideEffects = 1, isCall = 1, Defs = [R0, R14, R15, R28, R29, R30, R31, PC] in +def PS_call_instrprof_custom : Pseudo<(outs), (ins s32_0Imm:$dst, u32_0Imm:$Ii), "">; + // Call, no return. let isCall = 1, hasSideEffects = 1, cofMax1 = 1, isCodeGenOnly = 1 in def PS_callr_nr: InstHexagon<(outs), (ins IntRegs:$Rs), diff --git a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp index f26e23befde2..fb6918949cce 100644 --- a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp +++ b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp @@ -201,7 +201,7 @@ void HexagonDCE::removeOperand(NodeAddr IA, unsigned OpNum) { for (NodeAddr RA : Refs) OpMap.insert(std::make_pair(RA.Id, getOpNum(RA.Addr->getOp()))); - MI->RemoveOperand(OpNum); + MI->removeOperand(OpNum); for (NodeAddr RA : Refs) { unsigned N = OpMap[RA.Id]; diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 6e55bc6b5c2c..f0e56d74fcd1 100644 --- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -228,7 +228,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, case Hexagon::PS_fia: MI.setDesc(HII.get(Hexagon::A2_addi)); MI.getOperand(FIOp).ChangeToImmediate(RealOffset); - MI.RemoveOperand(FIOp+1); + MI.removeOperand(FIOp+1); return; case Hexagon::PS_fi: // Set up the instruction for updating below. diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index bdd2a2cfc5fa..2283d1b7f9c6 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -39,45 +39,46 @@ using namespace llvm; #define GET_SUBTARGETINFO_TARGET_DESC #include "HexagonGenSubtargetInfo.inc" -static cl::opt EnableBSBSched("enable-bsb-sched", - cl::Hidden, cl::ZeroOrMore, cl::init(true)); +static cl::opt EnableBSBSched("enable-bsb-sched", cl::Hidden, + cl::init(true)); -static cl::opt EnableTCLatencySched("enable-tc-latency-sched", - cl::Hidden, cl::ZeroOrMore, cl::init(false)); +static cl::opt EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden, + cl::init(false)); -static cl::opt EnableDotCurSched("enable-cur-sched", - cl::Hidden, cl::ZeroOrMore, cl::init(true), - cl::desc("Enable the scheduler to generate .cur")); +static cl::opt + EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true), + cl::desc("Enable the scheduler to generate .cur")); -static cl::opt DisableHexagonMISched("disable-hexagon-misched", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Hexagon MI Scheduling")); +static cl::opt + DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, + cl::desc("Disable Hexagon MI Scheduling")); -static cl::opt EnableSubregLiveness("hexagon-subreg-liveness", - cl::Hidden, cl::ZeroOrMore, cl::init(true), - cl::desc("Enable subregister liveness tracking for Hexagon")); +static cl::opt EnableSubregLiveness( + "hexagon-subreg-liveness", cl::Hidden, cl::init(true), + cl::desc("Enable subregister liveness tracking for Hexagon")); -static cl::opt OverrideLongCalls("hexagon-long-calls", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("If present, forces/disables the use of long calls")); +static cl::opt OverrideLongCalls( + "hexagon-long-calls", cl::Hidden, + cl::desc("If present, forces/disables the use of long calls")); -static cl::opt EnablePredicatedCalls("hexagon-pred-calls", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Consider calls to be predicable")); +static cl::opt + EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden, + cl::desc("Consider calls to be predicable")); -static cl::opt SchedPredsCloser("sched-preds-closer", - cl::Hidden, cl::ZeroOrMore, cl::init(true)); +static cl::opt SchedPredsCloser("sched-preds-closer", cl::Hidden, + cl::init(true)); static cl::opt SchedRetvalOptimization("sched-retval-optimization", - cl::Hidden, cl::ZeroOrMore, cl::init(true)); + cl::Hidden, cl::init(true)); -static cl::opt EnableCheckBankConflict("hexagon-check-bank-conflict", - cl::Hidden, cl::ZeroOrMore, cl::init(true), - cl::desc("Enable checking for cache bank conflicts")); +static cl::opt EnableCheckBankConflict( + "hexagon-check-bank-conflict", cl::Hidden, cl::init(true), + cl::desc("Enable checking for cache bank conflicts")); static cl::opt EnableV68FloatCodeGen( - "force-hvx-float", cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Enable the code-generation for vector float instructions on v68.")); + "force-hvx-float", cl::Hidden, + cl::desc( + "Enable the code-generation for vector float instructions on v68.")); HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM) @@ -95,8 +96,7 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, HexagonSubtarget & HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { - Optional ArchVer = - Hexagon::GetCpu(Hexagon::CpuTable, CPUString); + Optional ArchVer = Hexagon::getCpu(CPUString); if (ArchVer) HexagonArchVersion = *ArchVer; else diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index db682676cf12..f6c70928c2f6 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -13,7 +13,7 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H #define LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H -#include "HexagonArch.h" +#include "HexagonDepArch.h" #include "HexagonFrameLowering.h" #include "HexagonISelLowering.h" #include "HexagonInstrInfo.h" diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index c6703bb8a62a..4e04939e6690 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -32,41 +32,44 @@ using namespace llvm; -static cl::opt EnableCExtOpt("hexagon-cext", cl::Hidden, cl::ZeroOrMore, - cl::init(true), cl::desc("Enable Hexagon constant-extender optimization")); +static cl::opt + EnableCExtOpt("hexagon-cext", cl::Hidden, cl::init(true), + cl::desc("Enable Hexagon constant-extender optimization")); -static cl::opt EnableRDFOpt("rdf-opt", cl::Hidden, cl::ZeroOrMore, - cl::init(true), cl::desc("Enable RDF-based optimizations")); +static cl::opt EnableRDFOpt("rdf-opt", cl::Hidden, cl::init(true), + cl::desc("Enable RDF-based optimizations")); static cl::opt DisableHardwareLoops("disable-hexagon-hwloops", cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); -static cl::opt DisableAModeOpt("disable-hexagon-amodeopt", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Hexagon Addressing Mode Optimization")); +static cl::opt + DisableAModeOpt("disable-hexagon-amodeopt", cl::Hidden, + cl::desc("Disable Hexagon Addressing Mode Optimization")); -static cl::opt DisableHexagonCFGOpt("disable-hexagon-cfgopt", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Hexagon CFG Optimization")); +static cl::opt + DisableHexagonCFGOpt("disable-hexagon-cfgopt", cl::Hidden, + cl::desc("Disable Hexagon CFG Optimization")); -static cl::opt DisableHCP("disable-hcp", cl::init(false), cl::Hidden, - cl::ZeroOrMore, cl::desc("Disable Hexagon constant propagation")); +static cl::opt + DisableHCP("disable-hcp", cl::Hidden, + cl::desc("Disable Hexagon constant propagation")); static cl::opt DisableStoreWidening("disable-store-widen", cl::Hidden, cl::init(false), cl::desc("Disable store widening")); static cl::opt EnableExpandCondsets("hexagon-expand-condsets", - cl::init(true), cl::Hidden, cl::ZeroOrMore, - cl::desc("Early expansion of MUX")); + cl::init(true), cl::Hidden, + cl::desc("Early expansion of MUX")); static cl::opt EnableEarlyIf("hexagon-eif", cl::init(true), cl::Hidden, - cl::ZeroOrMore, cl::desc("Enable early if-conversion")); + cl::desc("Enable early if-conversion")); static cl::opt EnableGenInsert("hexagon-insert", cl::init(true), cl::Hidden, cl::desc("Generate \"insert\" instructions")); -static cl::opt EnableCommGEP("hexagon-commgep", cl::init(true), - cl::Hidden, cl::ZeroOrMore, cl::desc("Enable commoning of GEP instructions")); +static cl::opt + EnableCommGEP("hexagon-commgep", cl::init(true), cl::Hidden, + cl::desc("Enable commoning of GEP instructions")); static cl::opt EnableGenExtract("hexagon-extract", cl::init(true), cl::Hidden, cl::desc("Generate \"extract\" instructions")); @@ -78,9 +81,9 @@ static cl::opt EnableGenPred("hexagon-gen-pred", cl::init(true), cl::Hidden, cl::desc("Enable conversion of arithmetic operations to " "predicate instructions")); -static cl::opt EnableLoopPrefetch("hexagon-loop-prefetch", - cl::init(false), cl::Hidden, cl::ZeroOrMore, - cl::desc("Enable loop data prefetch on Hexagon")); +static cl::opt + EnableLoopPrefetch("hexagon-loop-prefetch", cl::Hidden, + cl::desc("Enable loop data prefetch on Hexagon")); static cl::opt DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden, cl::desc("Disable splitting double registers")); @@ -94,22 +97,24 @@ static cl::opt EnableLoopResched("hexagon-loop-resched", cl::init(true), static cl::opt HexagonNoOpt("hexagon-noopt", cl::init(false), cl::Hidden, cl::desc("Disable backend optimizations")); -static cl::opt EnableVectorPrint("enable-hexagon-vector-print", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Enable Hexagon Vector print instr pass")); +static cl::opt + EnableVectorPrint("enable-hexagon-vector-print", cl::Hidden, + cl::desc("Enable Hexagon Vector print instr pass")); -static cl::opt EnableVExtractOpt("hexagon-opt-vextract", cl::Hidden, - cl::ZeroOrMore, cl::init(true), cl::desc("Enable vextract optimization")); +static cl::opt + EnableVExtractOpt("hexagon-opt-vextract", cl::Hidden, cl::init(true), + cl::desc("Enable vextract optimization")); -static cl::opt EnableVectorCombine("hexagon-vector-combine", cl::Hidden, - cl::ZeroOrMore, cl::init(true), cl::desc("Enable HVX vector combining")); +static cl::opt + EnableVectorCombine("hexagon-vector-combine", cl::Hidden, cl::init(true), + cl::desc("Enable HVX vector combining")); -static cl::opt EnableInitialCFGCleanup("hexagon-initial-cfg-cleanup", - cl::Hidden, cl::ZeroOrMore, cl::init(true), - cl::desc("Simplify the CFG after atomic expansion pass")); +static cl::opt EnableInitialCFGCleanup( + "hexagon-initial-cfg-cleanup", cl::Hidden, cl::init(true), + cl::desc("Simplify the CFG after atomic expansion pass")); static cl::opt EnableInstSimplify("hexagon-instsimplify", cl::Hidden, - cl::ZeroOrMore, cl::init(true), + cl::init(true), cl::desc("Enable instsimplify")); /// HexagonTargetMachineModule - Note that this is used on hosts that @@ -189,7 +194,7 @@ namespace llvm { } // end namespace llvm; static Reloc::Model getEffectiveRelocModel(Optional RM) { - return RM.getValueOr(Reloc::Static); + return RM.value_or(Reloc::Static); } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTarget() { @@ -293,12 +298,11 @@ void HexagonTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { } TargetTransformInfo -HexagonTargetMachine::getTargetTransformInfo(const Function &F) { +HexagonTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(HexagonTTIImpl(this, F)); } - -HexagonTargetMachine::~HexagonTargetMachine() {} +HexagonTargetMachine::~HexagonTargetMachine() = default; namespace { /// Hexagon Code Generator Pass Configuration Options. @@ -345,6 +349,7 @@ void HexagonPassConfig::addIRPasses() { if (EnableInitialCFGCleanup) addPass(createCFGSimplificationPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) + .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h index 66679df93bd3..947df7574ab3 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h @@ -39,7 +39,7 @@ public: void adjustPassManager(PassManagerBuilder &PMB) override; void registerPassBuilderCallbacks(PassBuilder &PB) override; TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; HexagonTargetObjectFile *getObjFileLowering() const override { return static_cast(TLOF.get()); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index 7df32e4072e3..c83ed16f0272 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -41,9 +41,9 @@ static cl::opt SmallDataThreshold("hexagon-small-data-threshold", static cl::opt NoSmallDataSorting("mno-sort-sda", cl::init(false), cl::Hidden, cl::desc("Disable small data sections sorting")); -static cl::opt StaticsInSData("hexagon-statics-in-small-data", - cl::init(false), cl::Hidden, cl::ZeroOrMore, - cl::desc("Allow static variables in .sdata")); +static cl::opt + StaticsInSData("hexagon-statics-in-small-data", cl::Hidden, + cl::desc("Allow static variables in .sdata")); static cl::opt TraceGVPlacement("trace-gv-placement", cl::Hidden, cl::init(false), @@ -332,6 +332,7 @@ unsigned HexagonTargetObjectFile::getSmallestAddressableSize(const Type *Ty, case Type::X86_MMXTyID: case Type::X86_AMXTyID: case Type::TokenTyID: + case Type::DXILPointerTyID: return 0; } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 1bdd8c3c513a..bb0aaa3150fb 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -223,7 +223,8 @@ HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef Mask, int Index, - Type *SubTp) { + Type *SubTp, + ArrayRef Args) { return 1; } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 9e637dfc3e16..7bbaf7ae9cb2 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -86,12 +86,11 @@ public: unsigned getMinVectorRegisterBitWidth() const; ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const; - bool shouldMaximizeVectorBandwidth() const { + bool + shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const { return true; } - bool supportsEfficientVectorElementLoadStore() { - return false; - } + bool supportsEfficientVectorElementLoadStore() { return false; } bool hasBranchDivergence() { return false; } @@ -125,7 +124,8 @@ public: Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind); InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, - ArrayRef Mask, int Index, Type *SubTp); + ArrayRef Mask, int Index, Type *SubTp, + ArrayRef Args = None); InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, diff --git a/llvm/lib/Target/Hexagon/HexagonVExtract.cpp b/llvm/lib/Target/Hexagon/HexagonVExtract.cpp index b5f06ebd3189..845fa1e49578 100644 --- a/llvm/lib/Target/Hexagon/HexagonVExtract.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVExtract.cpp @@ -27,9 +27,9 @@ using namespace llvm; -static cl::opt VExtractThreshold("hexagon-vextract-threshold", - cl::Hidden, cl::ZeroOrMore, cl::init(1), - cl::desc("Threshold for triggering vextract replacement")); +static cl::opt VExtractThreshold( + "hexagon-vextract-threshold", cl::Hidden, cl::init(1), + cl::desc("Threshold for triggering vextract replacement")); namespace llvm { void initializeHexagonVExtractPass(PassRegistry& Registry); @@ -106,8 +106,7 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) { MachineFrameInfo &MFI = MF.getFrameInfo(); Register AR = MF.getInfo()->getStackAlignBaseVReg(); - std::map> VExtractMap; - MaybeAlign MaxAlign; + std::map> VExtractMap; bool Changed = false; for (MachineBasicBlock &MBB : MF) { @@ -131,6 +130,7 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) { return AddrR; }; + MaybeAlign MaxAlign; for (auto &P : VExtractMap) { unsigned VecR = P.first; if (P.second.size() <= VExtractThreshold) @@ -138,7 +138,7 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) { const auto &VecRC = *MRI.getRegClass(VecR); Align Alignment = HRI.getSpillAlign(VecRC); - MaxAlign = max(MaxAlign, Alignment); + MaxAlign = std::max(MaxAlign.valueOrOne(), Alignment); // Make sure this is not a spill slot: spill slots cannot be aligned // if there are variable-sized objects on the stack. They must be // accessible via FP (which is not aligned), because SP is unknown, diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index e9b658d18175..54d33a4113e7 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -55,24 +55,25 @@ using namespace llvm; #define DEBUG_TYPE "packets" -static cl::opt DisablePacketizer("disable-packetizer", cl::Hidden, - cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Hexagon packetizer pass")); +static cl::opt + DisablePacketizer("disable-packetizer", cl::Hidden, + cl::desc("Disable Hexagon packetizer pass")); static cl::opt Slot1Store("slot1-store-slot0-load", cl::Hidden, - cl::ZeroOrMore, cl::init(true), + cl::init(true), cl::desc("Allow slot1 store and slot0 load")); -static cl::opt PacketizeVolatiles("hexagon-packetize-volatiles", - cl::ZeroOrMore, cl::Hidden, cl::init(true), - cl::desc("Allow non-solo packetization of volatile memory references")); +static cl::opt PacketizeVolatiles( + "hexagon-packetize-volatiles", cl::Hidden, cl::init(true), + cl::desc("Allow non-solo packetization of volatile memory references")); -static cl::opt EnableGenAllInsnClass("enable-gen-insn", cl::init(false), - cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC")); +static cl::opt + EnableGenAllInsnClass("enable-gen-insn", cl::Hidden, + cl::desc("Generate all instruction with TC")); -static cl::opt DisableVecDblNVStores("disable-vecdbl-nv-stores", - cl::init(false), cl::Hidden, cl::ZeroOrMore, - cl::desc("Disable vector double new-value-stores")); +static cl::opt + DisableVecDblNVStores("disable-vecdbl-nv-stores", cl::Hidden, + cl::desc("Disable vector double new-value-stores")); extern cl::opt ScheduleInlineAsm; diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 6aca8d807872..abd84a188cfa 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -1310,7 +1310,7 @@ auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0, auto Simplify = [&](Value *V) { if (auto *I = dyn_cast(V)) { SimplifyQuery Q(DL, &TLI, &DT, &AC, I); - if (Value *S = SimplifyInstruction(I, Q)) + if (Value *S = simplifyInstruction(I, Q)) return S; } return V; @@ -1404,7 +1404,7 @@ auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In, if (isa(In) || (To != Block.end() && isa(*To))) return false; - if (!mayBeMemoryDependent(In)) + if (!mayHaveNonDefUseDependency(In)) return true; bool MayWrite = In.mayWriteToMemory(); auto MaybeLoc = getLocOrNone(In); diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp index 94b878e21f4d..2b004a9c5ad4 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp @@ -53,10 +53,10 @@ using namespace llvm; STATISTIC(HexagonNumVectorLoopCarriedReuse, "Number of values that were reused from a previous iteration."); -static cl::opt HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", - cl::Hidden, +static cl::opt HexagonVLCRIterationLim( + "hexagon-vlcr-iteration-lim", cl::Hidden, cl::desc("Maximum distance of loop carried dependences that are handled"), - cl::init(2), cl::ZeroOrMore); + cl::init(2)); namespace llvm { diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h index f1e0c5804ace..f826b2eb568f 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h +++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h @@ -127,7 +127,7 @@ class Loop; /// Hexagon Vector Loop Carried Reuse Pass struct HexagonVectorLoopCarriedReusePass : public PassInfoMixin { - HexagonVectorLoopCarriedReusePass() {} + HexagonVectorLoopCarriedReusePass() = default; /// Run pass over the Loop. PreservedAnalyses run(Loop &L, LoopAnalysisManager &LAM, diff --git a/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp b/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp index fbc5e5c344ed..b09a393f7dd5 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp @@ -36,9 +36,9 @@ using namespace llvm; #define DEBUG_TYPE "hexagon-vector-print" -static cl::opt TraceHexVectorStoresOnly("trace-hex-vector-stores-only", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Enables tracing of vector stores")); +static cl::opt + TraceHexVectorStoresOnly("trace-hex-vector-stores-only", cl::Hidden, + cl::desc("Enables tracing of vector stores")); namespace llvm { diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 5e5a26fea076..37866a73ed0f 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/EndianStream.h" diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp index e5e5d08937ef..f3da67562320 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -34,5 +34,4 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) { UsesELFSectionDirectiveForBSS = true; ExceptionsType = ExceptionHandling::DwarfCFI; UseLogicalShr = false; - UseIntegratedAssembler = false; } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index 8a866cfe9161..18ff901d6441 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/SourceMgr.h" #include @@ -29,8 +30,8 @@ using namespace llvm; static cl::opt - RelaxNVChecks("relax-nv-checks", cl::init(false), cl::ZeroOrMore, - cl::Hidden, cl::desc("Relax checks of new-value validity")); + RelaxNVChecks("relax-nv-checks", cl::Hidden, + cl::desc("Relax checks of new-value validity")); const HexagonMCChecker::PredSense HexagonMCChecker::Unconditional(Hexagon::NoRegister, false); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index f8ac35aed7c0..ed2856eb1fe9 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -789,7 +789,6 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO, } MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII, - MCRegisterInfo const &MRI, MCContext &MCT) { return new HexagonMCCodeEmitter(MII, MCT); } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index 0624214d284b..49725801f046 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -108,7 +108,7 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol, MCSection &Section = *getAssembler().getContext().getELFSection( SectionName, ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); MCSectionSubPair P = getCurrentSection(); - SwitchSection(&Section); + switchSection(&Section); if (ELFSymbol->isUndefined()) { emitValueToAlignment(ByteAlignment, 0, 1, 0); @@ -120,7 +120,7 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol, if (Align(ByteAlignment) > Section.getAlignment()) Section.setAlignment(Align(ByteAlignment)); - SwitchSection(P.first, P.second); + switchSection(P.first, P.second); } else { if (ELFSymbol->declareCommon(Size, ByteAlignment)) report_fatal_error("Symbol: " + Symbol->getName() + diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp index 1e708ba1bcd3..ab5e9eb4eca6 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 6a08d7503bac..d068baf05998 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/HexagonMCTargetDesc.h" -#include "HexagonArch.h" +#include "HexagonDepArch.h" #include "HexagonTargetStreamer.h" #include "MCTargetDesc/HexagonInstPrinter.h" #include "MCTargetDesc/HexagonMCAsmInfo.h" @@ -22,6 +22,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" @@ -409,8 +410,8 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) { } } -static bool isCPUValid(const std::string &CPU) { - return Hexagon::CpuTable.find(CPU) != Hexagon::CpuTable.cend(); +static bool isCPUValid(StringRef CPU) { + return Hexagon::getCpu(CPU).has_value(); } namespace { @@ -559,12 +560,18 @@ void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI, } unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) { - using llvm::Hexagon::ElfFlagsByCpuStr; - - const std::string CPU(STI.getCPU().str()); - auto F = ElfFlagsByCpuStr.find(CPU); - assert(F != ElfFlagsByCpuStr.end() && "Unrecognized Architecture"); - return F->second; + return StringSwitch(STI.getCPU()) + .Case("generic", llvm::ELF::EF_HEXAGON_MACH_V5) + .Case("hexagonv5", llvm::ELF::EF_HEXAGON_MACH_V5) + .Case("hexagonv55", llvm::ELF::EF_HEXAGON_MACH_V55) + .Case("hexagonv60", llvm::ELF::EF_HEXAGON_MACH_V60) + .Case("hexagonv62", llvm::ELF::EF_HEXAGON_MACH_V62) + .Case("hexagonv65", llvm::ELF::EF_HEXAGON_MACH_V65) + .Case("hexagonv66", llvm::ELF::EF_HEXAGON_MACH_V66) + .Case("hexagonv67", llvm::ELF::EF_HEXAGON_MACH_V67) + .Case("hexagonv67t", llvm::ELF::EF_HEXAGON_MACH_V67T) + .Case("hexagonv68", llvm::ELF::EF_HEXAGON_MACH_V68) + .Case("hexagonv69", llvm::ELF::EF_HEXAGON_MACH_V69); } llvm::ArrayRef Hexagon_MC::GetVectRegRev() { diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index 5bf7c9a1a908..d717e710f3c0 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -85,7 +85,6 @@ namespace Hexagon_MC { } MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &MCT); MCAsmBackend *createHexagonAsmBackend(const Target &T, diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index d82731e153fe..c8805296017d 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -295,7 +295,7 @@ void HexagonShuffler::restrictBranchOrder(HexagonPacketSummary const &Summary) { Summary.branchInsts[0]->Core.setUnits(jumpSlot.first); Summary.branchInsts[1]->Core.setUnits(jumpSlot.second); - const bool HasShuffledPacket = tryAuction(Summary).hasValue(); + const bool HasShuffledPacket = tryAuction(Summary).has_value(); if (HasShuffledPacket) return; @@ -599,7 +599,7 @@ void HexagonShuffler::restrictPreferSlot3(HexagonPacketSummary const &Summary, // and then pin it to slot #3 const unsigned saveUnits = PrefSlot3Inst->Core.getUnits(); PrefSlot3Inst->Core.setUnits(saveUnits & Slot3Mask); - const bool HasShuffledPacket = tryAuction(Summary).hasValue(); + const bool HasShuffledPacket = tryAuction(Summary).has_value(); if (HasShuffledPacket) return; diff --git a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index 660215ca7435..d715ba901a2b 100644 --- a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -704,14 +704,14 @@ LanaiAsmParser::parseRegister(bool RestoreOnFailure) { if (Lexer.getKind() == AsmToken::Identifier) { RegNum = MatchRegisterName(Lexer.getTok().getIdentifier()); if (RegNum == 0) { - if (PercentTok.hasValue() && RestoreOnFailure) + if (PercentTok && RestoreOnFailure) Lexer.UnLex(PercentTok.getValue()); return nullptr; } Parser.Lex(); // Eat identifier token return LanaiOperand::createReg(RegNum, Start, End); } - if (PercentTok.hasValue() && RestoreOnFailure) + if (PercentTok && RestoreOnFailure) Lexer.UnLex(PercentTok.getValue()); return nullptr; } diff --git a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp index 57343784237d..e9fecef4ac5b 100644 --- a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp +++ b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp @@ -16,7 +16,7 @@ #include "LanaiCondCode.h" #include "LanaiInstrInfo.h" #include "TargetInfo/LanaiTargetInfo.h" -#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" @@ -45,26 +45,30 @@ LanaiDisassembler::LanaiDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) // Definition is further down. static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus decodeRiMemoryValue(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus decodeRrMemoryValue(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus decodeSplsValue(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus decodeBranch(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus decodePredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus decodeShiftImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); #include "LanaiGenDisassemblerTables.inc" @@ -158,7 +162,7 @@ static const unsigned GPRDecoderTable[] = { DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, - const void * /*Decoder*/) { + const MCDisassembler * /*Decoder*/) { if (RegNo > 31) return MCDisassembler::Fail; @@ -168,7 +172,8 @@ DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus decodeRiMemoryValue(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // RI memory values encoded using 23 bits: // 5 bit register, 16 bit constant unsigned Register = (Insn >> 18) & 0x1f; @@ -180,7 +185,8 @@ static DecodeStatus decodeRiMemoryValue(MCInst &Inst, unsigned Insn, } static DecodeStatus decodeRrMemoryValue(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // RR memory values encoded using 20 bits: // 5 bit register, 5 bit register, 2 bit PQ, 3 bit ALU operator, 5 bit JJJJJ unsigned Register = (Insn >> 15) & 0x1f; @@ -192,7 +198,8 @@ static DecodeStatus decodeRrMemoryValue(MCInst &Inst, unsigned Insn, } static DecodeStatus decodeSplsValue(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // RI memory values encoded using 17 bits: // 5 bit register, 10 bit constant unsigned Register = (Insn >> 12) & 0x1f; @@ -206,14 +213,13 @@ static DecodeStatus decodeSplsValue(MCInst &Inst, unsigned Insn, static bool tryAddingSymbolicOperand(int64_t Value, bool IsBranch, uint64_t Address, uint64_t Offset, uint64_t Width, MCInst &MI, - const void *Decoder) { - const MCDisassembler *Dis = static_cast(Decoder); - return Dis->tryAddingSymbolicOperand(MI, Value, Address, IsBranch, Offset, - Width); + const MCDisassembler *Decoder) { + return Decoder->tryAddingSymbolicOperand(MI, Value, Address, IsBranch, Offset, + Width, /*InstSize=*/0); } static DecodeStatus decodeBranch(MCInst &MI, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (!tryAddingSymbolicOperand(Insn + Address, false, Address, 2, 23, MI, Decoder)) MI.addOperand(MCOperand::createImm(Insn)); @@ -221,7 +227,8 @@ static DecodeStatus decodeBranch(MCInst &MI, unsigned Insn, uint64_t Address, } static DecodeStatus decodeShiftImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Offset = (Insn & 0xffff); Inst.addOperand(MCOperand::createImm(SignExtend32<16>(Offset))); @@ -230,7 +237,7 @@ static DecodeStatus decodeShiftImm(MCInst &Inst, unsigned Insn, static DecodeStatus decodePredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (Val >= LPCC::UNKNOWN) return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(Val)); diff --git a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp index 010ff80ad42a..832cafb3dabe 100644 --- a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp +++ b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp @@ -138,11 +138,7 @@ LanaiTargetLowering::LanaiTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); } - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::SUB); - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::XOR); + setTargetDAGCombine({ISD::ADD, ISD::SUB, ISD::AND, ISD::OR, ISD::XOR}); // Function alignments setMinFunctionAlignment(Align(4)); diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp index 4217b8509676..bef2458fd126 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -592,9 +592,7 @@ bool LanaiInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } // If the block has any instructions after a branch, delete them. - while (std::next(Instruction) != MBB.end()) { - std::next(Instruction)->eraseFromParent(); - } + MBB.erase(std::next(Instruction), MBB.end()); Condition.clear(); FalseBlock = nullptr; diff --git a/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp b/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp index eeef1d919925..fe8ce1093bd8 100644 --- a/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp @@ -11,3 +11,10 @@ using namespace llvm; void LanaiMachineFunctionInfo::anchor() {} + +MachineFunctionInfo *LanaiMachineFunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const { + return DestMF.cloneInfo(*this); +} diff --git a/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h b/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h index de712637b5a4..edf5f2ee087e 100644 --- a/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h +++ b/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h @@ -40,6 +40,10 @@ class LanaiMachineFunctionInfo : public MachineFunctionInfo { public: explicit LanaiMachineFunctionInfo(MachineFunction &MF) : VarArgsFrameIndex(0) {} + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; Register getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(Register Reg) { SRetReturnReg = Reg; } diff --git a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp index 70b6fd2c185d..8af40d18d106 100644 --- a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp +++ b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp @@ -48,7 +48,7 @@ static std::string computeDataLayout() { } static Reloc::Model getEffectiveRelocModel(Optional RM) { - return RM.getValueOr(Reloc::PIC_); + return RM.value_or(Reloc::PIC_); } LanaiTargetMachine::LanaiTargetMachine(const Target &T, const Triple &TT, @@ -68,7 +68,7 @@ LanaiTargetMachine::LanaiTargetMachine(const Target &T, const Triple &TT, } TargetTransformInfo -LanaiTargetMachine::getTargetTransformInfo(const Function &F) { +LanaiTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(LanaiTTIImpl(this, F)); } diff --git a/llvm/lib/Target/Lanai/LanaiTargetMachine.h b/llvm/lib/Target/Lanai/LanaiTargetMachine.h index 00922f44f33a..258e58c86253 100644 --- a/llvm/lib/Target/Lanai/LanaiTargetMachine.h +++ b/llvm/lib/Target/Lanai/LanaiTargetMachine.h @@ -38,7 +38,7 @@ public: return &Subtarget; } - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &pass_manager) override; diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h index f0d287c858d8..08cc54b858ce 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h @@ -13,10 +13,10 @@ #ifndef LLVM_LIB_TARGET_LANAI_MCTARGETDESC_LANAIINSTPRINTER_H #define LLVM_LIB_TARGET_LANAI_MCTARGETDESC_LANAIINSTPRINTER_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInstPrinter.h" namespace llvm { +class StringRef; class LanaiInstPrinter : public MCInstPrinter { public: @@ -36,7 +36,6 @@ public: void printMemSplsOperand(const MCInst *MI, int OpNo, raw_ostream &O, const char *Modifier = nullptr); void printCCOperand(const MCInst *MI, int OpNo, raw_ostream &O); - void printAluOperand(const MCInst *MI, int OpNo, raw_ostream &O); void printHi16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printHi16AndImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printLo16AndImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp index df4ee297155f..ec573a189a70 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp @@ -304,7 +304,6 @@ unsigned LanaiMCCodeEmitter::getBranchTargetOpValue( llvm::MCCodeEmitter * llvm::createLanaiMCCodeEmitter(const MCInstrInfo &InstrInfo, - const MCRegisterInfo & /*MRI*/, MCContext &context) { return new LanaiMCCodeEmitter(InstrInfo, context); } diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h index 651ed36cdc24..e8da1bc88142 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h @@ -27,7 +27,6 @@ class MCSubtargetInfo; class Target; MCCodeEmitter *createLanaiMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCAsmBackend *createLanaiAsmBackend(const Target &T, const MCSubtargetInfo &STI, diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp new file mode 100644 index 000000000000..d11f5a9080a0 --- /dev/null +++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -0,0 +1,556 @@ +// LoongArchAsmParser.cpp - Parse LoongArch assembly to MCInst instructions -=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/LoongArchInstPrinter.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "TargetInfo/LoongArchTargetInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Casting.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-asm-parser" + +namespace { +class LoongArchAsmParser : public MCTargetAsmParser { + SMLoc getLoc() const { return getParser().getTok().getLoc(); } + + /// Parse a register as used in CFI directives. + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; + OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) override; + + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; + + bool ParseDirective(AsmToken DirectiveID) override { return true; } + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) override; + + unsigned checkTargetMatchPredicate(MCInst &Inst) override; + + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, + unsigned Kind) override; + + bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo, + int64_t Lower, int64_t Upper, Twine Msg); + + /// Helper for processing MC instructions that have been successfully matched + /// by MatchAndEmitInstruction. + bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands, + MCStreamer &Out); + +// Auto-generated instruction matching functions. +#define GET_ASSEMBLER_HEADER +#include "LoongArchGenAsmMatcher.inc" + + OperandMatchResultTy parseRegister(OperandVector &Operands); + OperandMatchResultTy parseImmediate(OperandVector &Operands); + + bool parseOperand(OperandVector &Operands, StringRef Mnemonic); + +public: + enum LoongArchMatchResultTy { + Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY, + Match_RequiresMsbNotLessThanLsb, + Match_RequiresOpnd2NotR0R1, +#define GET_OPERAND_DIAGNOSTIC_TYPES +#include "LoongArchGenAsmMatcher.inc" +#undef GET_OPERAND_DIAGNOSTIC_TYPES + }; + + LoongArchAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(Options, STI, MII) { + Parser.addAliasForDirective(".half", ".2byte"); + Parser.addAliasForDirective(".hword", ".2byte"); + Parser.addAliasForDirective(".word", ".4byte"); + Parser.addAliasForDirective(".dword", ".8byte"); + + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + } +}; + +// Instances of this class represent a parsed LoongArch machine instruction. +class LoongArchOperand : public MCParsedAsmOperand { + enum class KindTy { + Token, + Register, + Immediate, + } Kind; + + struct RegOp { + MCRegister RegNum; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + SMLoc StartLoc, EndLoc; + union { + StringRef Tok; + struct RegOp Reg; + struct ImmOp Imm; + }; + +public: + LoongArchOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + + bool isToken() const override { return Kind == KindTy::Token; } + bool isReg() const override { return Kind == KindTy::Register; } + bool isImm() const override { return Kind == KindTy::Immediate; } + bool isMem() const override { return false; } + void setReg(MCRegister PhysReg) { Reg.RegNum = PhysReg; } + + static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm) { + if (auto CE = dyn_cast(Expr)) { + Imm = CE->getValue(); + return true; + } + + return false; + } + + template bool isUImm() const { + if (!isImm()) + return false; + + int64_t Imm; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm); + return IsConstantImm && isUInt(Imm - P); + } + + template bool isSImm() const { + if (!isImm()) + return false; + + int64_t Imm; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm); + return IsConstantImm && isShiftedInt(Imm); + } + + bool isUImm2() const { return isUImm<2>(); } + bool isUImm2plus1() const { return isUImm<2, 1>(); } + bool isUImm3() const { return isUImm<3>(); } + bool isUImm5() const { return isUImm<5>(); } + bool isUImm6() const { return isUImm<6>(); } + bool isUImm8() const { return isUImm<8>(); } + bool isUImm12() const { return isUImm<12>(); } + bool isUImm14() const { return isUImm<14>(); } + bool isUImm15() const { return isUImm<15>(); } + bool isSImm12() const { return isSImm<12>(); } + bool isSImm14lsl2() const { return isSImm<14, 2>(); } + bool isSImm16() const { return isSImm<16>(); } + bool isSImm16lsl2() const { return isSImm<16, 2>(); } + bool isSImm20() const { return isSImm<20>(); } + bool isSImm21lsl2() const { return isSImm<21, 2>(); } + bool isSImm26lsl2() const { return isSImm<26, 2>(); } + + /// Gets location of the first token of this operand. + SMLoc getStartLoc() const override { return StartLoc; } + /// Gets location of the last token of this operand. + SMLoc getEndLoc() const override { return EndLoc; } + + unsigned getReg() const override { + assert(Kind == KindTy::Register && "Invalid type access!"); + return Reg.RegNum.id(); + } + + const MCExpr *getImm() const { + assert(Kind == KindTy::Immediate && "Invalid type access!"); + return Imm.Val; + } + + StringRef getToken() const { + assert(Kind == KindTy::Token && "Invalid type access!"); + return Tok; + } + + void print(raw_ostream &OS) const override { + auto RegName = [](unsigned Reg) { + if (Reg) + return LoongArchInstPrinter::getRegisterName(Reg); + else + return "noreg"; + }; + + switch (Kind) { + case KindTy::Immediate: + OS << *getImm(); + break; + case KindTy::Register: + OS << ""; + break; + case KindTy::Token: + OS << "'" << getToken() << "'"; + break; + } + } + + static std::unique_ptr createToken(StringRef Str, SMLoc S) { + auto Op = std::make_unique(KindTy::Token); + Op->Tok = Str; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static std::unique_ptr createReg(unsigned RegNo, SMLoc S, + SMLoc E) { + auto Op = std::make_unique(KindTy::Register); + Op->Reg.RegNum = RegNo; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr createImm(const MCExpr *Val, SMLoc S, + SMLoc E) { + auto Op = std::make_unique(KindTy::Immediate); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + if (auto CE = dyn_cast(Expr)) + Inst.addOperand(MCOperand::createImm(CE->getValue())); + else + Inst.addOperand(MCOperand::createExpr(Expr)); + } + + // Used by the TableGen Code. + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getReg())); + } + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } +}; +} // end anonymous namespace + +#define GET_REGISTER_MATCHER +#define GET_SUBTARGET_FEATURE_NAME +#define GET_MATCHER_IMPLEMENTATION +#define GET_MNEMONIC_SPELL_CHECKER +#include "LoongArchGenAsmMatcher.inc" + +static MCRegister convertFPR32ToFPR64(MCRegister Reg) { + assert(Reg >= LoongArch::F0 && Reg <= LoongArch::F31 && "Invalid register"); + return Reg - LoongArch::F0 + LoongArch::F0_64; +} + +// Attempts to match Name as a register (either using the default name or +// alternative ABI names), setting RegNo to the matching register. Upon +// failure, returns true and sets RegNo to 0. +static bool matchRegisterNameHelper(MCRegister &RegNo, StringRef Name) { + RegNo = MatchRegisterName(Name); + // The 32-bit and 64-bit FPRs have the same asm name. Check that the initial + // match always matches the 32-bit variant, and not the 64-bit one. + assert(!(RegNo >= LoongArch::F0_64 && RegNo <= LoongArch::F31_64)); + // The default FPR register class is based on the tablegen enum ordering. + static_assert(LoongArch::F0 < LoongArch::F0_64, + "FPR matching must be updated"); + if (RegNo == LoongArch::NoRegister) + RegNo = MatchRegisterAltName(Name); + + return RegNo == LoongArch::NoRegister; +} + +bool LoongArchAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + return Error(getLoc(), "invalid register number"); +} + +OperandMatchResultTy LoongArchAsmParser::tryParseRegister(unsigned &RegNo, + SMLoc &StartLoc, + SMLoc &EndLoc) { + llvm_unreachable("Unimplemented function."); +} + +OperandMatchResultTy +LoongArchAsmParser::parseRegister(OperandVector &Operands) { + if (getLexer().getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + + // Eat the $ prefix. + getLexer().Lex(); + if (getLexer().getKind() != AsmToken::Identifier) + return MatchOperand_NoMatch; + + StringRef Name = getLexer().getTok().getIdentifier(); + MCRegister RegNo; + matchRegisterNameHelper(RegNo, Name); + if (RegNo == LoongArch::NoRegister) + return MatchOperand_NoMatch; + + SMLoc S = getLoc(); + SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size()); + getLexer().Lex(); + Operands.push_back(LoongArchOperand::createReg(RegNo, S, E)); + + return MatchOperand_Success; +} + +OperandMatchResultTy +LoongArchAsmParser::parseImmediate(OperandVector &Operands) { + SMLoc S = getLoc(); + SMLoc E; + const MCExpr *Res; + + if (getParser().parseExpression(Res, E)) + return MatchOperand_ParseFail; + + Operands.push_back(LoongArchOperand::createImm(Res, S, E)); + return MatchOperand_Success; +} + +/// Looks at a token type and creates the relevant operand from this +/// information, adding to Operands. Return true upon an error. +bool LoongArchAsmParser::parseOperand(OperandVector &Operands, + StringRef Mnemonic) { + if (parseRegister(Operands) == MatchOperand_Success || + parseImmediate(Operands) == MatchOperand_Success) + return false; + + // Finally we have exhausted all options and must declare defeat. + Error(getLoc(), "unknown operand"); + return true; +} + +bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + OperandVector &Operands) { + // First operand in MCInst is instruction mnemonic. + Operands.push_back(LoongArchOperand::createToken(Name, NameLoc)); + + // If there are no more operands, then finish. + if (parseOptionalToken(AsmToken::EndOfStatement)) + return false; + + // Parse first operand. + if (parseOperand(Operands, Name)) + return true; + + // Parse until end of statement, consuming commas between operands. + while (parseOptionalToken(AsmToken::Comma)) + if (parseOperand(Operands, Name)) + return true; + + // Parse end of statement and return successfully. + if (parseOptionalToken(AsmToken::EndOfStatement)) + return false; + + SMLoc Loc = getLexer().getLoc(); + getParser().eatToEndOfStatement(); + return Error(Loc, "unexpected token"); +} + +bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, + OperandVector &Operands, + MCStreamer &Out) { + Inst.setLoc(IDLoc); + Out.emitInstruction(Inst, getSTI()); + return false; +} + +unsigned LoongArchAsmParser::checkTargetMatchPredicate(MCInst &Inst) { + switch (Inst.getOpcode()) { + default: + break; + case LoongArch::CSRXCHG: { + unsigned Rj = Inst.getOperand(2).getReg(); + if (Rj == LoongArch::R0 || Rj == LoongArch::R1) + return Match_RequiresOpnd2NotR0R1; + return Match_Success; + } + case LoongArch::BSTRINS_W: + case LoongArch::BSTRINS_D: + case LoongArch::BSTRPICK_W: + case LoongArch::BSTRPICK_D: { + unsigned Opc = Inst.getOpcode(); + const signed Msb = + (Opc == LoongArch::BSTRINS_W || Opc == LoongArch::BSTRINS_D) + ? Inst.getOperand(3).getImm() + : Inst.getOperand(2).getImm(); + const signed Lsb = + (Opc == LoongArch::BSTRINS_W || Opc == LoongArch::BSTRINS_D) + ? Inst.getOperand(4).getImm() + : Inst.getOperand(3).getImm(); + if (Msb < Lsb) + return Match_RequiresMsbNotLessThanLsb; + return Match_Success; + } + } + + return Match_Success; +} + +unsigned +LoongArchAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, + unsigned Kind) { + LoongArchOperand &Op = static_cast(AsmOp); + if (!Op.isReg()) + return Match_InvalidOperand; + + MCRegister Reg = Op.getReg(); + // As the parser couldn't differentiate an FPR32 from an FPR64, coerce the + // register from FPR32 to FPR64 if necessary. + if (LoongArchMCRegisterClasses[LoongArch::FPR32RegClassID].contains(Reg) && + Kind == MCK_FPR64) { + Op.setReg(convertFPR32ToFPR64(Reg)); + return Match_Success; + } + + return Match_InvalidOperand; +} + +bool LoongArchAsmParser::generateImmOutOfRangeError( + OperandVector &Operands, uint64_t ErrorInfo, int64_t Lower, int64_t Upper, + Twine Msg = "immediate must be an integer in the range") { + SMLoc ErrorLoc = ((LoongArchOperand &)*Operands[ErrorInfo]).getStartLoc(); + return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]"); +} + +bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + FeatureBitset MissingFeatures; + + auto Result = MatchInstructionImpl(Operands, Inst, ErrorInfo, MissingFeatures, + MatchingInlineAsm); + switch (Result) { + default: + break; + case Match_Success: + return processInstruction(Inst, IDLoc, Operands, Out); + case Match_MissingFeature: { + assert(MissingFeatures.any() && "Unknown missing features!"); + bool FirstFeature = true; + std::string Msg = "instruction requires the following:"; + for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) { + if (MissingFeatures[i]) { + Msg += FirstFeature ? " " : ", "; + Msg += getSubtargetFeatureName(i); + FirstFeature = false; + } + } + return Error(IDLoc, Msg); + } + case Match_MnemonicFail: { + FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + std::string Suggestion = LoongArchMnemonicSpellCheck( + ((LoongArchOperand &)*Operands[0]).getToken(), FBS, 0); + return Error(IDLoc, "unrecognized instruction mnemonic" + Suggestion); + } + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0ULL) { + if (ErrorInfo >= Operands.size()) + return Error(ErrorLoc, "too few operands for instruction"); + + ErrorLoc = ((LoongArchOperand &)*Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + return Error(ErrorLoc, "invalid operand for instruction"); + } + } + + // Handle the case when the error message is of specific type + // other than the generic Match_InvalidOperand, and the + // corresponding operand is missing. + if (Result > FIRST_TARGET_MATCH_RESULT_TY) { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0ULL && ErrorInfo >= Operands.size()) + return Error(ErrorLoc, "too few operands for instruction"); + } + + switch (Result) { + default: + break; + case Match_RequiresMsbNotLessThanLsb: { + SMLoc ErrorStart = Operands[3]->getStartLoc(); + return Error(ErrorStart, "msb is less than lsb", + SMRange(ErrorStart, Operands[4]->getEndLoc())); + } + case Match_RequiresOpnd2NotR0R1: + return Error(Operands[2]->getStartLoc(), "must not be $r0 or $r1"); + case Match_InvalidUImm2: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, + /*Upper=*/(1 << 2) - 1); + case Match_InvalidUImm2plus1: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/1, + /*Upper=*/(1 << 2)); + case Match_InvalidUImm3: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, + /*Upper=*/(1 << 3) - 1); + case Match_InvalidUImm5: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, + /*Upper=*/(1 << 5) - 1); + case Match_InvalidUImm6: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, + /*Upper=*/(1 << 6) - 1); + case Match_InvalidUImm12: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, + /*Upper=*/(1 << 12) - 1); + case Match_InvalidUImm15: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, + /*Upper=*/(1 << 15) - 1); + case Match_InvalidSImm12: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 11), + /*Upper=*/(1 << 11) - 1); + case Match_InvalidSImm14lsl2: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 15), /*Upper=*/(1 << 15) - 4, + "immediate must be a multiple of 4 in the range"); + case Match_InvalidSImm16: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 15), + /*Upper=*/(1 << 15) - 1); + case Match_InvalidSImm16lsl2: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 17), /*Upper=*/(1 << 17) - 4, + "immediate must be a multiple of 4 in the range"); + case Match_InvalidSImm20: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 19), + /*Upper=*/(1 << 19) - 1); + case Match_InvalidSImm21lsl2: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 22), /*Upper=*/(1 << 22) - 4, + "immediate must be a multiple of 4 in the range"); + case Match_InvalidSImm26lsl2: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 27), /*Upper=*/(1 << 27) - 4, + "immediate must be a multiple of 4 in the range"); + } + llvm_unreachable("Unknown match type detected!"); +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchAsmParser() { + RegisterMCAsmParser X(getTheLoongArch32Target()); + RegisterMCAsmParser Y(getTheLoongArch64Target()); +} diff --git a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp new file mode 100644 index 000000000000..215d061f11f2 --- /dev/null +++ b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp @@ -0,0 +1,145 @@ +//===-- LoongArchDisassembler.cpp - Disassembler for LoongArch ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the LoongArchDisassembler class. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "TargetInfo/LoongArchTargetInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-disassembler" + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +class LoongArchDisassembler : public MCDisassembler { +public: + LoongArchDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) + : MCDisassembler(STI, Ctx) {} + + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &CStream) const override; +}; +} // end anonymous namespace + +static MCDisassembler *createLoongArchDisassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new LoongArchDisassembler(STI, Ctx); +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchDisassembler() { + // Register the disassembler for each target. + TargetRegistry::RegisterMCDisassembler(getTheLoongArch32Target(), + createLoongArchDisassembler); + TargetRegistry::RegisterMCDisassembler(getTheLoongArch64Target(), + createLoongArchDisassembler); +} + +static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 32) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createReg(LoongArch::R0 + RegNo)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 32) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createReg(LoongArch::F0 + RegNo)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 32) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createReg(LoongArch::F0_64 + RegNo)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCFRRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 8) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createReg(LoongArch::FCC0 + RegNo)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 4) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createReg(LoongArch::FCSR0 + RegNo)); + return MCDisassembler::Success; +} + +template +static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::createImm(Imm + P)); + return MCDisassembler::Success; +} + +template +static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt(Imm) && "Invalid immediate"); + // Sign-extend the number in the bottom bits of Imm, then shift left + // bits. + Inst.addOperand(MCOperand::createImm(SignExtend64(Imm) << S)); + return MCDisassembler::Success; +} + +#include "LoongArchGenDisassemblerTables.inc" + +DecodeStatus LoongArchDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &CS) const { + uint32_t Insn; + DecodeStatus Result; + + // We want to read exactly 4 bytes of data because all LoongArch instructions + // are fixed 32 bits. + if (Bytes.size() < 4) { + Size = 0; + return MCDisassembler::Fail; + } + + Insn = support::endian::read32le(Bytes.data()); + // Calling the auto-generated decoder function. + Result = decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI); + Size = 4; + + return Result; +} diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h new file mode 100644 index 000000000000..caa7bd31e28b --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArch.h @@ -0,0 +1,38 @@ +//===-- LoongArch.h - Top-level interface for LoongArch ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// LoongArch back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H + +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class LoongArchTargetMachine; +class AsmPrinter; +class FunctionPass; +class MCInst; +class MCOperand; +class MachineInstr; +class MachineOperand; + +bool lowerLoongArchMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + AsmPrinter &AP); +bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, + MCOperand &MCOp, + const AsmPrinter &AP); + +FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM); +} // namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td new file mode 100644 index 000000000000..bf465c27ef99 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -0,0 +1,139 @@ +//===-- LoongArch.td - Describe the LoongArch Target -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// LoongArch subtarget features and instruction predicates. +//===----------------------------------------------------------------------===// + +// LoongArch is divided into two versions, the 32-bit version (LA32) and the +// 64-bit version (LA64). +def Feature64Bit + : SubtargetFeature<"64bit", "HasLA64", "true", + "LA64 Basic Integer and Privilege Instruction Set">; +def IsLA64 + : Predicate<"Subtarget->is64Bit()">, + AssemblerPredicate<(all_of Feature64Bit), + "LA64 Basic Integer and Privilege Instruction Set">; +def IsLA32 + : Predicate<"!Subtarget->is64Bit()">, + AssemblerPredicate<(all_of(not Feature64Bit)), + "LA32 Basic Integer and Privilege Instruction Set">; + +defvar LA32 = DefaultMode; +def LA64 : HwMode<"+64bit">; + +// Single Precision floating point +def FeatureBasicF + : SubtargetFeature<"f", "HasBasicF", "true", + "'F' (Single-Precision Floating-Point)">; +def HasBasicF + : Predicate<"Subtarget->hasBasicF()">, + AssemblerPredicate<(all_of FeatureBasicF), + "'F' (Single-Precision Floating-Point)">; + +// Double Precision floating point +def FeatureBasicD + : SubtargetFeature<"d", "HasBasicD", "true", + "'D' (Double-Precision Floating-Point)", + [FeatureBasicF]>; +def HasBasicD + : Predicate<"Subtarget->hasBasicD()">, + AssemblerPredicate<(all_of FeatureBasicD), + "'D' (Double-Precision Floating-Point)">; + +// Loongson SIMD eXtension (LSX) +def FeatureExtLSX + : SubtargetFeature<"lsx", "HasExtLSX", "true", + "'LSX' (Loongson SIMD Extension)", [FeatureBasicD]>; +def HasExtLSX + : Predicate<"Subtarget->hasExtLSX()">, + AssemblerPredicate<(all_of FeatureExtLSX), + "'LSX' (Loongson SIMD Extension)">; + +// Loongson Advanced SIMD eXtension (LASX) +def FeatureExtLASX + : SubtargetFeature<"lasx", "HasExtLASX", "true", + "'LASX' (Loongson Advanced SIMD Extension)", + [FeatureExtLSX]>; +def HasExtLASX + : Predicate<"Subtarget->hasExtLASX()">, + AssemblerPredicate<(all_of FeatureExtLASX), + "'LASX' (Loongson Advanced SIMD Extension)">; + +// Loongson VirtualiZation (LVZ) +def FeatureExtLVZ + : SubtargetFeature<"lvz", "HasExtLVZ", "true", + "'LVZ' (Loongson Virtualization Extension)">; +def HasExtLVZ + : Predicate<"Subtarget->hasExtLVZ()">, + AssemblerPredicate<(all_of FeatureExtLVZ), + "'LVZ' (Loongson Virtualization Extension)">; + +// Loongson Binary Translation (LBT) +def FeatureExtLBT + : SubtargetFeature<"lbt", "HasExtLBT", "true", + "'LBT' (Loongson Binary Translation Extension)">; +def HasExtLBT + : Predicate<"Subtarget->hasExtLBT()">, + AssemblerPredicate<(all_of FeatureExtLBT), + "'LBT' (Loongson Binary Translation Extension)">; + +//===----------------------------------------------------------------------===// +// Registers, instruction descriptions ... +//===----------------------------------------------------------------------===// + +include "LoongArchRegisterInfo.td" +include "LoongArchCallingConv.td" +include "LoongArchInstrInfo.td" + +//===----------------------------------------------------------------------===// +// LoongArch processors supported. +//===----------------------------------------------------------------------===// + +def : ProcessorModel<"generic-la32", NoSchedModel, []>; +def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>; + +def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit, + FeatureExtLASX, + FeatureExtLVZ, + FeatureExtLBT]>; + +//===----------------------------------------------------------------------===// +// Define the LoongArch target. +//===----------------------------------------------------------------------===// + +def LoongArchInstrInfo : InstrInfo { + // guess mayLoad, mayStore, and hasSideEffects + // This option is a temporary migration help. It will go away. + let guessInstructionProperties = 1; +} + +def LoongArchAsmParser : AsmParser { + let ShouldEmitMatchRegisterAltName = 1; + let AllowDuplicateRegisterNames = 1; +} + +def LoongArchAsmParserVariant : AsmParserVariant { + int Variant = 0; + // Recognize hard coded registers. + string RegisterPrefix = "$"; +} + +def LoongArchAsmWriter : AsmWriter { + int PassSubtarget = 1; +} + +def LoongArch : Target { + let InstructionSet = LoongArchInstrInfo; + let AssemblyParsers = [LoongArchAsmParser]; + let AssemblyParserVariants = [LoongArchAsmParserVariant]; + let AssemblyWriters = [LoongArchAsmWriter]; + let AllowRegisterRenaming = 1; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp new file mode 100644 index 000000000000..dd61bb2df077 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp @@ -0,0 +1,48 @@ +//===- LoongArchAsmPrinter.cpp - LoongArch LLVM Assembly Printer -*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format LoongArch assembly language. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchAsmPrinter.h" +#include "LoongArch.h" +#include "LoongArchTargetMachine.h" +#include "TargetInfo/LoongArchTargetInfo.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/TargetRegistry.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-asm-printer" + +// Simple pseudo-instructions have their lowering (with expansion to real +// instructions) auto-generated. +#include "LoongArchGenMCPseudoLowering.inc" + +void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) { + // Do any auto-generated pseudo lowerings. + if (emitPseudoExpansionLowering(*OutStreamer, MI)) + return; + + MCInst TmpInst; + if (!lowerLoongArchMachineInstrToMCInst(MI, TmpInst, *this)) + EmitToStreamer(*OutStreamer, TmpInst); +} + +bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + AsmPrinter::runOnMachineFunction(MF); + return true; +} + +// Force static initialization. +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchAsmPrinter() { + RegisterAsmPrinter X(getTheLoongArch32Target()); + RegisterAsmPrinter Y(getTheLoongArch64Target()); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h new file mode 100644 index 000000000000..7e5aa49f227c --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h @@ -0,0 +1,46 @@ +//===- LoongArchAsmPrinter.h - LoongArch LLVM Assembly Printer -*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// LoongArch Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H + +#include "LoongArchSubtarget.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class LLVM_LIBRARY_VISIBILITY LoongArchAsmPrinter : public AsmPrinter { + const MCSubtargetInfo *STI; + +public: + explicit LoongArchAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)), STI(TM.getMCSubtargetInfo()) {} + + StringRef getPassName() const override { + return "LoongArch Assembly Printer"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void emitInstruction(const MachineInstr *MI) override; + + // tblgen'erated function. + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H diff --git a/llvm/lib/Target/LoongArch/LoongArchCallingConv.td b/llvm/lib/Target/LoongArch/LoongArchCallingConv.td new file mode 100644 index 000000000000..9844163163a5 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchCallingConv.td @@ -0,0 +1,23 @@ +//=- LoongArchCallingConv.td - Calling Conventions LoongArch -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the LoongArch architecture. +// +//===----------------------------------------------------------------------===// + +def CSR_ILP32S_LP64S + : CalleeSavedRegs<(add R1, (sequence "R%u", 22, 31))>; + +def CSR_ILP32F_LP64F + : CalleeSavedRegs<(add CSR_ILP32S_LP64S, (sequence "F%u", 24, 31))>; + +def CSR_ILP32D_LP64D + : CalleeSavedRegs<(add CSR_ILP32S_LP64S, (sequence "F%u_64", 24, 31))>; + +// Needed for implementation of LoongArchRegisterInfo::getNoPreservedMask() +def CSR_NoRegs : CalleeSavedRegs<(add)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td new file mode 100644 index 000000000000..5b117d40e0a9 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -0,0 +1,177 @@ +//=-- LoongArchInstrInfoF.td - Single-Precision Float instr --*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the baisc single-precision floating-point instructions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasBasicF] in { + +// Arithmetic Operation Instructions +def FADD_S : FP_ALU_3R<0b00000001000000001, "fadd.s", FPR32>; +def FSUB_S : FP_ALU_3R<0b00000001000000101, "fsub.s", FPR32>; +def FMUL_S : FP_ALU_3R<0b00000001000001001, "fmul.s", FPR32>; +def FDIV_S : FP_ALU_3R<0b00000001000001101, "fdiv.s", FPR32>; +def FMADD_S : FP_ALU_4R<0b000010000001, "fmadd.s", FPR32>; +def FMSUB_S : FP_ALU_4R<0b000010000101, "fmsub.s", FPR32>; +def FNMADD_S : FP_ALU_4R<0b000010001001, "fnmadd.s", FPR32>; +def FNMSUB_S : FP_ALU_4R<0b000010001101, "fnmsub.s", FPR32>; +def FMAX_S : FP_ALU_3R<0b00000001000010001, "fmax.s", FPR32>; +def FMIN_S : FP_ALU_3R<0b00000001000010101, "fmin.s", FPR32>; +def FMAXA_S : FP_ALU_3R<0b00000001000011001, "fmaxa.s", FPR32>; +def FMINA_S : FP_ALU_3R<0b00000001000011101, "fmina.s", FPR32>; +def FABS_S : FP_ALU_2R<0b0000000100010100000001, "fabs.s", FPR32>; +def FNEG_S : FP_ALU_2R<0b0000000100010100000101, "fneg.s", FPR32>; +def FSQRT_S : FP_ALU_2R<0b0000000100010100010001, "fsqrt.s", FPR32>; +def FRECIP_S : FP_ALU_2R<0b0000000100010100010101, "frecip.s", FPR32>; +def FRSQRT_S : FP_ALU_2R<0b0000000100010100011001, "frsqrt.s", FPR32>; +def FSCALEB_S : FP_ALU_3R<0b00000001000100001, "fscaleb.s", FPR32>; +def FLOGB_S : FP_ALU_2R<0b0000000100010100001001, "flogb.s", FPR32>; +def FCOPYSIGN_S : FP_ALU_3R<0b00000001000100101, "fcopysign.s", FPR32>; +def FCLASS_S : FP_ALU_2R<0b0000000100010100001101, "fclass.s", FPR32>; + + +// Comparison Instructions +def FCMP_CAF_S : FP_CMP; +def FCMP_CUN_S : FP_CMP; +def FCMP_CEQ_S : FP_CMP; +def FCMP_CUEQ_S : FP_CMP; +def FCMP_CLT_S : FP_CMP; +def FCMP_CULT_S : FP_CMP; +def FCMP_CLE_S : FP_CMP; +def FCMP_CULE_S : FP_CMP; +def FCMP_CNE_S : FP_CMP; +def FCMP_COR_S : FP_CMP; +def FCMP_CUNE_S : FP_CMP; +def FCMP_SAF_S : FP_CMP; +def FCMP_SUN_S : FP_CMP; +def FCMP_SEQ_S : FP_CMP; +def FCMP_SUEQ_S : FP_CMP; +def FCMP_SLT_S : FP_CMP; +def FCMP_SULT_S : FP_CMP; +def FCMP_SLE_S : FP_CMP; +def FCMP_SULE_S : FP_CMP; +def FCMP_SNE_S : FP_CMP; +def FCMP_SOR_S : FP_CMP; +def FCMP_SUNE_S : FP_CMP; + +// Conversion Instructions +def FFINT_S_W : FP_CONV<0b0000000100011101000100, "ffint.s.w", FPR32, FPR32>; +def FTINT_W_S : FP_CONV<0b0000000100011011000001, "ftint.w.s", FPR32, FPR32>; +def FTINTRM_W_S : FP_CONV<0b0000000100011010000001, "ftintrm.w.s", FPR32, + FPR32>; +def FTINTRP_W_S : FP_CONV<0b0000000100011010010001, "ftintrp.w.s", FPR32, + FPR32>; +def FTINTRZ_W_S : FP_CONV<0b0000000100011010100001, "ftintrz.w.s", FPR32, + FPR32>; +def FTINTRNE_W_S : FP_CONV<0b0000000100011010110001, "ftintrne.w.s", FPR32, + FPR32>; +def FRINT_S : FP_CONV<0b0000000100011110010001, "frint.s", FPR32, FPR32>; + +// Move Instructions +def FSEL_S : FP_SEL<0b00001101000000, "fsel", FPR32>; +def FMOV_S : FP_MOV<0b0000000100010100100101, "fmov.s", FPR32, FPR32>; +def MOVGR2FR_W : FP_MOV<0b0000000100010100101001, "movgr2fr.w", FPR32, GPR>; +def MOVFR2GR_S : FP_MOV<0b0000000100010100101101, "movfr2gr.s", GPR, FPR32>; +def MOVGR2FCSR : FP_MOV<0b0000000100010100110000, "movgr2fcsr", FCSR, GPR>; +def MOVFCSR2GR : FP_MOV<0b0000000100010100110010, "movfcsr2gr", GPR, FCSR>; +def MOVFR2CF_S : FP_MOV<0b0000000100010100110100, "movfr2cf", CFR, FPR32>; +def MOVCF2FR_S : FP_MOV<0b0000000100010100110101, "movcf2fr", FPR32, CFR>; +def MOVGR2CF : FP_MOV<0b0000000100010100110110, "movgr2cf", CFR, GPR>; +def MOVCF2GR : FP_MOV<0b0000000100010100110111, "movcf2gr", GPR, CFR>; + +// Branch Instructions +def BCEQZ : FP_BRANCH<0b01001000, "bceqz">; +def BCNEZ : FP_BRANCH<0b01001001, "bcnez">; + +// Common Memory Access Instructions +def FLD_S : FP_LOAD_2RI12<0b0010101100, "fld.s", FPR32>; +def FST_S : FP_STORE_2RI12<0b0010101101, "fst.s", FPR32>; +def FLDX_S : FP_LOAD_3R<0b00111000001100000, "fldx.s", FPR32>; +def FSTX_S : FP_STORE_3R<0b00111000001110000, "fstx.s", FPR32>; + +// Bound Check Memory Access Instructions +def FLDGT_S : FP_LOAD_3R<0b00111000011101000, "fldgt.s", FPR32>; +def FLDLE_S : FP_LOAD_3R<0b00111000011101010, "fldle.s", FPR32>; +def FSTGT_S : FP_STORE_3R<0b00111000011101100, "fstgt.s", FPR32>; +def FSTLE_S : FP_STORE_3R<0b00111000011101110, "fstle.s", FPR32>; + +} // Predicates = [HasBasicF] + +//===----------------------------------------------------------------------===// +// Pseudo-instructions and codegen patterns +//===----------------------------------------------------------------------===// + +/// Generic pattern classes + +class PatFpr + : Pat<(OpNode RegTy:$fj), (Inst $fj)>; +class PatFprFpr + : Pat<(OpNode RegTy:$fj, RegTy:$fk), (Inst $fj, $fk)>; + +let Predicates = [HasBasicF] in { + +/// Float arithmetic operations + +def : PatFprFpr; +def : PatFprFpr; +def : PatFprFpr; +def : PatFprFpr; +def : PatFpr; + +/// Setcc + +// Match non-signaling comparison + +// TODO: change setcc to any_fsetcc after call is supported because +// we need to call llvm.experimental.constrained.fcmp.f32 in testcase. +// See RISCV float-fcmp-strict.ll for reference. +class PatFPSetcc + : Pat<(setcc RegTy:$fj, RegTy:$fk, cc), + (MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>; +// SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE. +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; + +// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_S instructions. + +/// Select + +def : Pat<(select GPR:$cc, FPR32:$fk, FPR32:$fj), + (FSEL_S FPR32:$fj, FPR32:$fk, (MOVGR2CF GPR:$cc))>; + +/// Selectcc + +class PatFPSelectcc + : Pat<(select (GRLenVT (setcc RegTy:$a, RegTy:$b, cc)), RegTy:$t, RegTy:$f), + (SelInst RegTy:$f, RegTy:$t, (CmpInst RegTy:$a, RegTy:$b))>; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; + +} // Predicates = [HasBasicF] diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td new file mode 100644 index 000000000000..07fa61f4c361 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -0,0 +1,188 @@ +//=-- LoongArchInstrInfoD.td - Double-Precision Float instr -*- tablegen -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the basic double-precision floating-point instructions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasBasicD] in { + +// Arithmetic Operation Instructions +def FADD_D : FP_ALU_3R<0b00000001000000010, "fadd.d", FPR64>; +def FSUB_D : FP_ALU_3R<0b00000001000000110, "fsub.d", FPR64>; +def FMUL_D : FP_ALU_3R<0b00000001000001010, "fmul.d", FPR64>; +def FDIV_D : FP_ALU_3R<0b00000001000001110, "fdiv.d", FPR64>; +def FMADD_D : FP_ALU_4R<0b000010000010, "fmadd.d", FPR64>; +def FMSUB_D : FP_ALU_4R<0b000010000110, "fmsub.d", FPR64>; +def FNMADD_D : FP_ALU_4R<0b000010001010, "fnmadd.d", FPR64>; +def FNMSUB_D : FP_ALU_4R<0b000010001110, "fnmsub.d", FPR64>; +def FMAX_D : FP_ALU_3R<0b00000001000010010, "fmax.d", FPR64>; +def FMIN_D : FP_ALU_3R<0b00000001000010110, "fmin.d", FPR64>; +def FMAXA_D : FP_ALU_3R<0b00000001000011010, "fmaxa.d", FPR64>; +def FMINA_D : FP_ALU_3R<0b00000001000011110, "fmina.d", FPR64>; +def FABS_D : FP_ALU_2R<0b0000000100010100000010, "fabs.d", FPR64>; +def FNEG_D : FP_ALU_2R<0b0000000100010100000110, "fneg.d", FPR64>; +def FSQRT_D : FP_ALU_2R<0b0000000100010100010010, "fsqrt.d", FPR64>; +def FRECIP_D : FP_ALU_2R<0b0000000100010100010110, "frecip.d", FPR64>; +def FRSQRT_D : FP_ALU_2R<0b0000000100010100011010, "frsqrt.d", FPR64>; +def FSCALEB_D : FP_ALU_3R<0b00000001000100010, "fscaleb.d", FPR64>; +def FLOGB_D : FP_ALU_2R<0b0000000100010100001010, "flogb.d", FPR64>; +def FCOPYSIGN_D : FP_ALU_3R<0b00000001000100110, "fcopysign.d", FPR64>; +def FCLASS_D : FP_ALU_2R<0b0000000100010100001110, "fclass.d", FPR64>; + +// Comparison Instructions +def FCMP_CAF_D : FP_CMP; +def FCMP_CUN_D : FP_CMP; +def FCMP_CEQ_D : FP_CMP; +def FCMP_CUEQ_D : FP_CMP; +def FCMP_CLT_D : FP_CMP; +def FCMP_CULT_D : FP_CMP; +def FCMP_CLE_D : FP_CMP; +def FCMP_CULE_D : FP_CMP; +def FCMP_CNE_D : FP_CMP; +def FCMP_COR_D : FP_CMP; +def FCMP_CUNE_D : FP_CMP; +def FCMP_SAF_D : FP_CMP; +def FCMP_SUN_D : FP_CMP; +def FCMP_SEQ_D : FP_CMP; +def FCMP_SUEQ_D : FP_CMP; +def FCMP_SLT_D : FP_CMP; +def FCMP_SULT_D : FP_CMP; +def FCMP_SLE_D : FP_CMP; +def FCMP_SULE_D : FP_CMP; +def FCMP_SNE_D : FP_CMP; +def FCMP_SOR_D : FP_CMP; +def FCMP_SUNE_D : FP_CMP; + +// Conversion Instructions +def FFINT_S_L : FP_CONV<0b0000000100011101000110, "ffint.s.l", FPR32, FPR64>; +def FTINT_L_S : FP_CONV<0b0000000100011011001001, "ftint.l.s", FPR64, FPR32>; +def FTINTRM_L_S : FP_CONV<0b0000000100011010001001, "ftintrm.l.s", FPR64, + FPR32>; +def FTINTRP_L_S : FP_CONV<0b0000000100011010011001, "ftintrp.l.s", FPR64, + FPR32>; +def FTINTRZ_L_S : FP_CONV<0b0000000100011010101001, "ftintrz.l.s", FPR64, + FPR32>; +def FTINTRNE_L_S : FP_CONV<0b0000000100011010111001, "ftintrne.l.s", FPR64, + FPR32>; +def FCVT_S_D : FP_CONV<0b0000000100011001000110, "fcvt.s.d", FPR32, FPR64>; +def FCVT_D_S : FP_CONV<0b0000000100011001001001, "fcvt.d.s", FPR64, FPR32>; +def FFINT_D_W : FP_CONV<0b0000000100011101001000, "ffint.d.w", FPR64, FPR32>; +def FFINT_D_L : FP_CONV<0b0000000100011101001010, "ffint.d.l", FPR64, FPR64>; +def FTINT_W_D : FP_CONV<0b0000000100011011000010, "ftint.w.d", FPR32, FPR64>; +def FTINT_L_D : FP_CONV<0b0000000100011011001010, "ftint.l.d", FPR64, FPR64>; +def FTINTRM_W_D : FP_CONV<0b0000000100011010000010, "ftintrm.w.d", FPR32, + FPR64>; +def FTINTRM_L_D : FP_CONV<0b0000000100011010001010, "ftintrm.l.d", FPR64, + FPR64>; +def FTINTRP_W_D : FP_CONV<0b0000000100011010010010, "ftintrp.w.d", FPR32, + FPR64>; +def FTINTRP_L_D : FP_CONV<0b0000000100011010011010, "ftintrp.l.d", FPR64, + FPR64>; +def FTINTRZ_W_D : FP_CONV<0b0000000100011010100010, "ftintrz.w.d", FPR32, + FPR64>; +def FTINTRZ_L_D : FP_CONV<0b0000000100011010101010, "ftintrz.l.d", FPR64, + FPR64>; +def FTINTRNE_W_D : FP_CONV<0b0000000100011010110010, "ftintrne.w.d", FPR32, + FPR64>; +def FTINTRNE_L_D : FP_CONV<0b0000000100011010111010, "ftintrne.l.d", FPR64, + FPR64>; +def FRINT_D : FP_CONV<0b0000000100011110010010, "frint.d", FPR64, FPR64>; + +// Move Instructions +def FMOV_D : FP_MOV<0b0000000100010100100110, "fmov.d", FPR64, FPR64>; +def MOVFRH2GR_S : FP_MOV<0b0000000100010100101111, "movfrh2gr.s", GPR, FPR64>; +let isCodeGenOnly = 1 in { +def MOVFR2GR_S_64 : FP_MOV<0b0000000100010100101101, "movfr2gr.s", GPR, FPR64>; +def FSEL_D : FP_SEL<0b00001101000000, "fsel", FPR64>; +} // isCodeGenOnly = 1 +let Constraints = "$dst = $out" in { +def MOVGR2FRH_W : FPFmtMOV<0b0000000100010100101011, (outs FPR64:$out), + (ins FPR64:$dst, GPR:$src), "movgr2frh.w", + "$dst, $src">; +} // Constraints = "$dst = $out" + +// Common Memory Access Instructions +def FLD_D : FP_LOAD_2RI12<0b0010101110, "fld.d", FPR64>; +def FST_D : FP_STORE_2RI12<0b0010101111, "fst.d", FPR64>; +def FLDX_D : FP_LOAD_3R<0b00111000001101000, "fldx.d", FPR64>; +def FSTX_D : FP_STORE_3R<0b00111000001111000, "fstx.d", FPR64>; + +// Bound Check Memory Access Instructions +def FLDGT_D : FP_LOAD_3R<0b00111000011101001, "fldgt.d", FPR64>; +def FLDLE_D : FP_LOAD_3R<0b00111000011101011, "fldle.d", FPR64>; +def FSTGT_D : FP_STORE_3R<0b00111000011101101, "fstgt.d", FPR64>; +def FSTLE_D : FP_STORE_3R<0b00111000011101111, "fstle.d", FPR64>; + +} // Predicates = [HasBasicD] + +// Instructions only available on LA64 +let Predicates = [HasBasicD, IsLA64] in { +def MOVGR2FR_D : FP_MOV<0b0000000100010100101010, "movgr2fr.d", FPR64, GPR>; +def MOVFR2GR_D : FP_MOV<0b0000000100010100101110, "movfr2gr.d", GPR, FPR64>; +} // Predicates = [HasBasicD, IsLA64] + +//===----------------------------------------------------------------------===// +// Pseudo-instructions and codegen patterns +//===----------------------------------------------------------------------===// + +let Predicates = [HasBasicD] in { + +/// Float arithmetic operations + +def : PatFprFpr; +def : PatFprFpr; +def : PatFprFpr; +def : PatFprFpr; +def : PatFpr; + +/// Setcc + +// Match non-signaling comparison + +// TODO: Change setcc to any_fsetcc after call is supported because +// we need to call llvm.experimental.constrained.fcmp.f64 in testcase. +// See RISCV float-fcmp-strict.ll for reference. + +// SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE. +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; +def : PatFPSetcc; + +// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_D instructions. + +/// Select + +def : Pat<(select GPR:$cc, FPR64:$fk, FPR64:$fj), + (FSEL_D FPR64:$fj, FPR64:$fk, (MOVGR2CF GPR:$cc))>; + +/// Selectcc + +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; + +} // Predicates = [HasBasicD] diff --git a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td new file mode 100644 index 000000000000..d2ba1fdfffe4 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td @@ -0,0 +1,241 @@ +//==- LoongArchInstrFormatsF.td - LoongArch FP Instr Formats -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe LoongArch floating-point instructions format +// +// opcode - operation code. +// fd - destination register operand. +// {c/f}{j/k/a} - source register operand. +// immN - immediate data operand. +// +//===----------------------------------------------------------------------===// + +// 2R-type +// +class FPFmt2R op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<5> fj; + bits<5> fd; + + let Inst{31-10} = op; + let Inst{9-5} = fj; + let Inst{4-0} = fd; +} + +// 3R-type +// +class FPFmt3R op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<5> fk; + bits<5> fj; + bits<5> fd; + + let Inst{31-15} = op; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-0} = fd; +} + +// 4R-type +// +class FPFmt4R op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<5> fa; + bits<5> fk; + bits<5> fj; + bits<5> fd; + + let Inst{31-20} = op; + let Inst{19-15} = fa; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-0} = fd; +} + +// 2RI12-type +// +class FPFmt2RI12 op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<12> imm12; + bits<5> rj; + bits<5> fd; + + let Inst{31-22} = op; + let Inst{21-10} = imm12; + let Inst{9-5} = rj; + let Inst{4-0} = fd; +} + +// FmtFCMP +// +class FPFmtFCMP op, bits<5> cond, dag outs, dag ins, string opcstr, + string opnstr, list pattern = []> + : LAInst { + bits<5> fk; + bits<5> fj; + bits<3> cd; + + let Inst{31-20} = op; + let Inst{19-15} = cond; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-3} = 0b00; + let Inst{2-0} = cd; +} + +// FPFmtBR +// +class FPFmtBR opcode, dag outs, dag ins, string opcstr, + string opnstr, list pattern = []> + : LAInst { + bits<21> imm21; + bits<3> cj; + + let Inst{31-26} = opcode{7-2}; + let Inst{25-10} = imm21{15-0}; + let Inst{9-8} = opcode{1-0}; + let Inst{7-5} = cj; + let Inst{4-0} = imm21{20-16}; +} + +// FmtFSEL +// +class FPFmtFSEL op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<3> ca; + bits<5> fk; + bits<5> fj; + bits<5> fd; + + let Inst{31-18} = op; + let Inst{17-15} = ca; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-0} = fd; +} + +// FPFmtMOV +// +class FPFmtMOV op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<5> src; + bits<5> dst; + + let Inst{31-10} = op; + let Inst{9-5} = src; + let Inst{4-0} = dst; +} + +// FPFmtMEM +// +class FPFmtMEM op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<5> rk; + bits<5> rj; + bits<5> fd; + + let Inst{31-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = fd; +} + +//===----------------------------------------------------------------------===// +// Instruction class templates +//===----------------------------------------------------------------------===// + +class FP_ALU_2R op, string opstr, RegisterClass rc> + : FPFmt2R; + +class FP_ALU_3R op, string opstr, RegisterClass rc> + : FPFmt3R; + +class FP_ALU_4R op, string opstr, RegisterClass rc> + : FPFmt4R; + +class FPCMPOpc value> { + bits<12> val = value; +} + +class FPCMPCond value> { + bits<5> val = value; +} + +class FP_CMP + : FPFmtFCMP; + +class FP_CONV op, string opstr, RegisterClass rcd, RegisterClass rcs> + : FPFmt2R; + +class FP_MOV op, string opstr, RegisterClass rcd, RegisterClass rcs> + : FPFmtMOV; + +class FP_SEL op, string opstr, RegisterClass rc> + : FPFmtFSEL; + +class FP_BRANCH opcode, string opstr> + : FPFmtBR { + let isBranch = 1; + let isTerminator = 1; +} + +let mayLoad = 1 in { +class FP_LOAD_3R op, string opstr, RegisterClass rc> + : FPFmtMEM; +class FP_LOAD_2RI12 op, string opstr, RegisterClass rc> + : FPFmt2RI12; +} // mayLoad = 1 + +let mayStore = 1 in { +class FP_STORE_3R op, string opstr, RegisterClass rc> + : FPFmtMEM; +class FP_STORE_2RI12 op, string opstr, RegisterClass rc> + : FPFmt2RI12; +} // mayStore = 1 + +def FPCMP_OPC_S : FPCMPOpc<0b000011000001>; +def FPCMP_OPC_D : FPCMPOpc<0b000011000010>; + +def FPCMP_COND_CAF : FPCMPCond<0x0>; +def FPCMP_COND_CUN : FPCMPCond<0x8>; +def FPCMP_COND_CEQ : FPCMPCond<0x4>; +def FPCMP_COND_CUEQ : FPCMPCond<0xC>; +def FPCMP_COND_CLT : FPCMPCond<0x2>; +def FPCMP_COND_CULT : FPCMPCond<0xA>; +def FPCMP_COND_CLE : FPCMPCond<0x6>; +def FPCMP_COND_CULE : FPCMPCond<0xE>; +def FPCMP_COND_CNE : FPCMPCond<0x10>; +def FPCMP_COND_COR : FPCMPCond<0x14>; +def FPCMP_COND_CUNE : FPCMPCond<0x18>; +def FPCMP_COND_SAF : FPCMPCond<0x1>; +def FPCMP_COND_SUN : FPCMPCond<0x9>; +def FPCMP_COND_SEQ : FPCMPCond<0x5>; +def FPCMP_COND_SUEQ : FPCMPCond<0xD>; +def FPCMP_COND_SLT : FPCMPCond<0x3>; +def FPCMP_COND_SULT : FPCMPCond<0xB>; +def FPCMP_COND_SLE : FPCMPCond<0x7>; +def FPCMP_COND_SULE : FPCMPCond<0xF>; +def FPCMP_COND_SNE : FPCMPCond<0x11>; +def FPCMP_COND_SOR : FPCMPCond<0x15>; +def FPCMP_COND_SUNE : FPCMPCond<0x19>; diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp new file mode 100644 index 000000000000..7182d55ca3cf --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -0,0 +1,55 @@ +//===-- LoongArchFrameLowering.cpp - LoongArch Frame Information -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchFrameLowering.h" +#include "LoongArchSubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/MC/MCDwarf.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-frame-lowering" + +// Return true if the specified function should have a dedicated frame +// pointer register. This is true if frame pointer elimination is +// disabled, if it needs dynamic stack realignment, if the function has +// variable sized allocas, or if the frame address is taken. +bool LoongArchFrameLowering::hasFP(const MachineFunction &MF) const { + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return MF.getTarget().Options.DisableFramePointerElim(MF) || + RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() || + MFI.isFrameAddressTaken(); +} + +bool LoongArchFrameLowering::hasBP(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF); +} + +void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + // TODO: Implement this when we have function calls +} + +void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + // TODO: Implement this when we have function calls +} diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h new file mode 100644 index 000000000000..25c53efc10f1 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h @@ -0,0 +1,38 @@ +//=- LoongArchFrameLowering.h - TargetFrameLowering for LoongArch -*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class implements LoongArch-specific bits of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H + +#include "llvm/CodeGen/TargetFrameLowering.h" + +namespace llvm { +class LoongArchSubtarget; + +class LoongArchFrameLowering : public TargetFrameLowering { + const LoongArchSubtarget &STI; + +public: + explicit LoongArchFrameLowering(const LoongArchSubtarget &STI) + : TargetFrameLowering(StackGrowsDown, + /*StackAlignment=*/Align(16), + /*LocalAreaOffset=*/0), + STI(STI) {} + + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + + bool hasFP(const MachineFunction &MF) const override; + bool hasBP(const MachineFunction &MF) const; +}; +} // namespace llvm +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp new file mode 100644 index 000000000000..cc9ea0255d98 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -0,0 +1,132 @@ +//=- LoongArchISelDAGToDAG.cpp - A dag to dag inst selector for LoongArch -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the LoongArch target. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchISelDAGToDAG.h" +#include "LoongArchISelLowering.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "MCTargetDesc/LoongArchMatInt.h" +#include "llvm/Support/KnownBits.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-isel" + +void LoongArchDAGToDAGISel::Select(SDNode *Node) { + // If we have a custom node, we have already selected. + if (Node->isMachineOpcode()) { + LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); + Node->setNodeId(-1); + return; + } + + // Instruction Selection not handled by the auto-generated tablegen selection + // should be handled here. + unsigned Opcode = Node->getOpcode(); + MVT GRLenVT = Subtarget->getGRLenVT(); + SDLoc DL(Node); + + switch (Opcode) { + default: + break; + case ISD::Constant: { + int64_t Imm = cast(Node)->getSExtValue(); + if (Imm == 0 && Node->getSimpleValueType(0) == GRLenVT) { + SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, + LoongArch::R0, GRLenVT); + ReplaceNode(Node, New.getNode()); + return; + } + SDNode *Result = nullptr; + SDValue SrcReg = CurDAG->getRegister(LoongArch::R0, GRLenVT); + // The instructions in the sequence are handled here. + for (LoongArchMatInt::Inst &Inst : LoongArchMatInt::generateInstSeq(Imm)) { + SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, GRLenVT); + if (Inst.Opc == LoongArch::LU12I_W) + Result = CurDAG->getMachineNode(LoongArch::LU12I_W, DL, GRLenVT, SDImm); + else + Result = CurDAG->getMachineNode(Inst.Opc, DL, GRLenVT, SrcReg, SDImm); + SrcReg = SDValue(Result, 0); + } + + ReplaceNode(Node, Result); + return; + } + // TODO: Add selection nodes needed later. + } + + // Select the default instruction. + SelectCode(Node); +} + +bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, + SDValue &ShAmt) { + // Shift instructions on LoongArch only read the lower 5 or 6 bits of the + // shift amount. If there is an AND on the shift amount, we can bypass it if + // it doesn't affect any of those bits. + if (N.getOpcode() == ISD::AND && isa(N.getOperand(1))) { + const APInt &AndMask = N->getConstantOperandAPInt(1); + + // Since the max shift amount is a power of 2 we can subtract 1 to make a + // mask that covers the bits needed to represent all shift amounts. + assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); + APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); + + if (ShMask.isSubsetOf(AndMask)) { + ShAmt = N.getOperand(0); + return true; + } + + // SimplifyDemandedBits may have optimized the mask so try restoring any + // bits that are known zero. + KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); + if (ShMask.isSubsetOf(AndMask | Known.Zero)) { + ShAmt = N.getOperand(0); + return true; + } + } else if (N.getOpcode() == LoongArchISD::BSTRPICK) { + // Similar to the above AND, if there is a BSTRPICK on the shift amount, we + // can bypass it. + assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); + assert(isa(N.getOperand(1)) && "Illegal msb operand!"); + assert(isa(N.getOperand(2)) && "Illegal lsb operand!"); + uint64_t msb = N.getConstantOperandVal(1), lsb = N.getConstantOperandVal(2); + if (lsb == 0 && Log2_32(ShiftWidth) <= msb + 1) { + ShAmt = N.getOperand(0); + return true; + } + } else if (N.getOpcode() == ISD::SUB && + isa(N.getOperand(0))) { + uint64_t Imm = N.getConstantOperandVal(0); + // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to + // generate a NEG instead of a SUB of a constant. + if (Imm != 0 && Imm % ShiftWidth == 0) { + SDLoc DL(N); + EVT VT = N.getValueType(); + SDValue Zero = + CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, LoongArch::R0, VT); + unsigned NegOpc = VT == MVT::i64 ? LoongArch::SUB_D : LoongArch::SUB_W; + MachineSDNode *Neg = + CurDAG->getMachineNode(NegOpc, DL, VT, Zero, N.getOperand(1)); + ShAmt = SDValue(Neg, 0); + return true; + } + } + + ShAmt = N; + return true; +} + +// This pass converts a legalized DAG into a LoongArch-specific DAG, ready +// for instruction scheduling. +FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) { + return new LoongArchDAGToDAGISel(TM); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h new file mode 100644 index 000000000000..f477129d933c --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h @@ -0,0 +1,55 @@ +//=- LoongArchISelDAGToDAG.h - A dag to dag inst selector for LoongArch ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the LoongArch target. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H + +#include "LoongArch.h" +#include "LoongArchTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" + +// LoongArch-specific code to select LoongArch machine instructions for +// SelectionDAG operations. +namespace llvm { +class LoongArchDAGToDAGISel : public SelectionDAGISel { + const LoongArchSubtarget *Subtarget = nullptr; + +public: + explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM) + : SelectionDAGISel(TM) {} + + StringRef getPassName() const override { + return "LoongArch DAG->DAG Pattern Instruction Selection"; + } + + bool runOnMachineFunction(MachineFunction &MF) override { + Subtarget = &MF.getSubtarget(); + return SelectionDAGISel::runOnMachineFunction(MF); + } + + void Select(SDNode *Node) override; + + bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); + bool selectShiftMaskGRLen(SDValue N, SDValue &ShAmt) { + return selectShiftMask(N, Subtarget->getGRLen(), ShAmt); + } + bool selectShiftMask32(SDValue N, SDValue &ShAmt) { + return selectShiftMask(N, 32, ShAmt); + } + +// Include the pieces autogenerated from the target description. +#include "LoongArchGenDAGISel.inc" +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp new file mode 100644 index 000000000000..d5a469216859 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -0,0 +1,531 @@ +//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that LoongArch uses to lower LLVM code into +// a selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchISelLowering.h" +#include "LoongArch.h" +#include "LoongArchMachineFunctionInfo.h" +#include "LoongArchRegisterInfo.h" +#include "LoongArchSubtarget.h" +#include "LoongArchTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-isel-lowering" + +LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + const LoongArchSubtarget &STI) + : TargetLowering(TM), Subtarget(STI) { + + MVT GRLenVT = Subtarget.getGRLenVT(); + // Set up the register classes. + addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); + if (Subtarget.hasBasicF()) + addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); + if (Subtarget.hasBasicD()) + addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); + + // TODO: add necessary setOperationAction calls later. + setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); + setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); + setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); + + if (Subtarget.is64Bit()) { + setOperationAction(ISD::SHL, MVT::i32, Custom); + setOperationAction(ISD::SRA, MVT::i32, Custom); + setOperationAction(ISD::SRL, MVT::i32, Custom); + } + + static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE, + ISD::SETUGT, ISD::SETUGE}; + + if (Subtarget.hasBasicF()) { + setCondCodeAction(FPCCToExpand, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + } + if (Subtarget.hasBasicD()) { + setCondCodeAction(FPCCToExpand, MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + } + + setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + // Compute derived properties from the register classes. + computeRegisterProperties(STI.getRegisterInfo()); + + setStackPointerRegisterToSaveRestore(LoongArch::R3); + + setBooleanContents(ZeroOrOneBooleanContent); + + // Function alignments. + const Align FunctionAlignment(4); + setMinFunctionAlignment(FunctionAlignment); + + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::SRL); +} + +SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: + report_fatal_error("unimplemented operand"); + case ISD::SHL_PARTS: + return lowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: + return lowerShiftRightParts(Op, DAG, true); + case ISD::SRL_PARTS: + return lowerShiftRightParts(Op, DAG, false); + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + // This can be called for an i32 shift amount that needs to be promoted. + assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + return SDValue(); + } +} + +SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + EVT VT = Lo.getValueType(); + + // if Shamt-GRLen < 0: // Shamt < GRLen + // Lo = Lo << Shamt + // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt)) + // else: + // Lo = 0 + // Hi = Lo << (Shamt-GRLen) + + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue One = DAG.getConstant(1, DL, VT); + SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); + SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); + SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); + SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); + + SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); + SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); + SDValue ShiftRightLo = + DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt); + SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); + SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); + SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen); + + SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); + + Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); + Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); + + SDValue Parts[2] = {Lo, Hi}; + return DAG.getMergeValues(Parts, DL); +} + +SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, + SelectionDAG &DAG, + bool IsSRA) const { + SDLoc DL(Op); + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + EVT VT = Lo.getValueType(); + + // SRA expansion: + // if Shamt-GRLen < 0: // Shamt < GRLen + // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) + // Hi = Hi >>s Shamt + // else: + // Lo = Hi >>s (Shamt-GRLen); + // Hi = Hi >>s (GRLen-1) + // + // SRL expansion: + // if Shamt-GRLen < 0: // Shamt < GRLen + // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) + // Hi = Hi >>u Shamt + // else: + // Lo = Hi >>u (Shamt-GRLen); + // Hi = 0; + + unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; + + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue One = DAG.getConstant(1, DL, VT); + SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); + SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); + SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); + SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); + + SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); + SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); + SDValue ShiftLeftHi = + DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt); + SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); + SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); + SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen); + SDValue HiFalse = + IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero; + + SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); + + Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); + Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); + + SDValue Parts[2] = {Lo, Hi}; + return DAG.getMergeValues(Parts, DL); +} + +// Returns the opcode of the target-specific SDNode that implements the 32-bit +// form of the given Opcode. +static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode"); + case ISD::SHL: + return LoongArchISD::SLL_W; + case ISD::SRA: + return LoongArchISD::SRA_W; + case ISD::SRL: + return LoongArchISD::SRL_W; + } +} + +// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG +// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would +// otherwise be promoted to i64, making it difficult to select the +// SLL_W/.../*W later one because the fact the operation was originally of +// type i8/i16/i32 is lost. +static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, + unsigned ExtOpc = ISD::ANY_EXTEND) { + SDLoc DL(N); + LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode()); + SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); + SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); + SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); + // ReplaceNodeResults requires we maintain the same type for the return value. + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); +} + +void LoongArchTargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { + SDLoc DL(N); + switch (N->getOpcode()) { + default: + llvm_unreachable("Don't know how to legalize this operation"); + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + if (N->getOperand(1).getOpcode() != ISD::Constant) { + Results.push_back(customLegalizeToWOp(N, DAG)); + break; + } + break; + } +} + +static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue FirstOperand = N->getOperand(0); + SDValue SecondOperand = N->getOperand(1); + unsigned FirstOperandOpc = FirstOperand.getOpcode(); + EVT ValTy = N->getValueType(0); + SDLoc DL(N); + uint64_t lsb, msb; + unsigned SMIdx, SMLen; + ConstantSDNode *CN; + SDValue NewOperand; + MVT GRLenVT = Subtarget.getGRLenVT(); + + // Op's second operand must be a shifted mask. + if (!(CN = dyn_cast(SecondOperand)) || + !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen)) + return SDValue(); + + if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { + // Pattern match BSTRPICK. + // $dst = and ((sra or srl) $src , lsb), (2**len - 1) + // => BSTRPICK $dst, $src, msb, lsb + // where msb = lsb + len - 1 + + // The second operand of the shift must be an immediate. + if (!(CN = dyn_cast(FirstOperand.getOperand(1)))) + return SDValue(); + + lsb = CN->getZExtValue(); + + // Return if the shifted mask does not start at bit 0 or the sum of its + // length and lsb exceeds the word's size. + if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits()) + return SDValue(); + + NewOperand = FirstOperand.getOperand(0); + } else { + // Pattern match BSTRPICK. + // $dst = and $src, (2**len- 1) , if len > 12 + // => BSTRPICK $dst, $src, msb, lsb + // where lsb = 0 and msb = len - 1 + + // If the mask is <= 0xfff, andi can be used instead. + if (CN->getZExtValue() <= 0xfff) + return SDValue(); + + // Return if the mask doesn't start at position 0. + if (SMIdx) + return SDValue(); + + lsb = 0; + NewOperand = FirstOperand; + } + msb = lsb + SMLen - 1; + return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand, + DAG.getConstant(msb, DL, GRLenVT), + DAG.getConstant(lsb, DL, GRLenVT)); +} + +static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + // $dst = srl (and $src, Mask), Shamt + // => + // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt + // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1 + // + + SDValue FirstOperand = N->getOperand(0); + ConstantSDNode *CN; + EVT ValTy = N->getValueType(0); + SDLoc DL(N); + MVT GRLenVT = Subtarget.getGRLenVT(); + unsigned MaskIdx, MaskLen; + uint64_t Shamt; + + // The first operand must be an AND and the second operand of the AND must be + // a shifted mask. + if (FirstOperand.getOpcode() != ISD::AND || + !(CN = dyn_cast(FirstOperand.getOperand(1))) || + !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen)) + return SDValue(); + + // The second operand (shift amount) must be an immediate. + if (!(CN = dyn_cast(N->getOperand(1)))) + return SDValue(); + + Shamt = CN->getZExtValue(); + if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1) + return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, + FirstOperand->getOperand(0), + DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), + DAG.getConstant(Shamt, DL, GRLenVT)); + + return SDValue(); +} + +SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + switch (N->getOpcode()) { + default: + break; + case ISD::AND: + return performANDCombine(N, DAG, DCI, Subtarget); + case ISD::SRL: + return performSRLCombine(N, DAG, DCI, Subtarget); + } + return SDValue(); +} + +const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch ((LoongArchISD::NodeType)Opcode) { + case LoongArchISD::FIRST_NUMBER: + break; + +#define NODE_NAME_CASE(node) \ + case LoongArchISD::node: \ + return "LoongArchISD::" #node; + + // TODO: Add more target-dependent nodes later. + NODE_NAME_CASE(RET) + NODE_NAME_CASE(SLL_W) + NODE_NAME_CASE(SRA_W) + NODE_NAME_CASE(SRL_W) + NODE_NAME_CASE(BSTRPICK) + } +#undef NODE_NAME_CASE + return nullptr; +} + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// +// FIXME: Now, we only support CallingConv::C with fixed arguments which are +// passed with integer or floating-point registers. +const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6, + LoongArch::R7, LoongArch::R8, LoongArch::R9, + LoongArch::R10, LoongArch::R11}; +const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2, + LoongArch::F3, LoongArch::F4, LoongArch::F5, + LoongArch::F6, LoongArch::F7}; +const MCPhysReg ArgFPR64s[] = { + LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, + LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; + +// Implements the LoongArch calling convention. Returns true upon failure. +static bool CC_LoongArch(unsigned ValNo, MVT ValVT, + CCValAssign::LocInfo LocInfo, CCState &State) { + // Allocate to a register if possible. + Register Reg; + + if (ValVT == MVT::f32) + Reg = State.AllocateReg(ArgFPR32s); + else if (ValVT == MVT::f64) + Reg = State.AllocateReg(ArgFPR64s); + else + Reg = State.AllocateReg(ArgGPRs); + if (Reg) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, ValVT, LocInfo)); + return false; + } + + // TODO: Handle arguments passed without register. + return true; +} + +void LoongArchTargetLowering::analyzeInputArgs( + CCState &CCInfo, const SmallVectorImpl &Ins, + LoongArchCCAssignFn Fn) const { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + MVT ArgVT = Ins[i].VT; + + if (Fn(i, ArgVT, CCValAssign::Full, CCInfo)) { + LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << '\n'); + llvm_unreachable(""); + } + } +} + +void LoongArchTargetLowering::analyzeOutputArgs( + CCState &CCInfo, const SmallVectorImpl &Outs, + LoongArchCCAssignFn Fn) const { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + MVT ArgVT = Outs[i].VT; + + if (Fn(i, ArgVT, CCValAssign::Full, CCInfo)) { + LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << "\n"); + llvm_unreachable(""); + } + } +} + +static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, + const CCValAssign &VA, const SDLoc &DL, + const LoongArchTargetLowering &TLI) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + EVT LocVT = VA.getLocVT(); + const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); + Register VReg = RegInfo.createVirtualRegister(RC); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + + return DAG.getCopyFromReg(Chain, DL, VReg, LocVT); +} + +// Transform physical registers into virtual registers. +SDValue LoongArchTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { + + MachineFunction &MF = DAG.getMachineFunction(); + + switch (CallConv) { + default: + llvm_unreachable("Unsupported calling convention"); + case CallingConv::C: + break; + } + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + + analyzeInputArgs(CCInfo, Ins, CC_LoongArch); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) + InVals.push_back(unpackFromRegLoc(DAG, Chain, ArgLocs[i], DL, *this)); + + return Chain; +} + +bool LoongArchTargetLowering::CanLowerReturn( + CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, + const SmallVectorImpl &Outs, LLVMContext &Context) const { + // Any return value split in to more than two values can't be returned + // directly. + return Outs.size() <= 2; +} + +SDValue LoongArchTargetLowering::LowerReturn( + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SDLoc &DL, + SelectionDAG &DAG) const { + // Stores the assignment of the return value to a location. + SmallVector RVLocs; + + // Info about the registers and stack slot. + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + + analyzeOutputArgs(CCInfo, Outs, CC_LoongArch); + + SDValue Glue; + SmallVector RetOps(1, Chain); + + // Copy the result values into the output registers. + for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + // Handle a 'normal' return. + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue); + + // Guarantee that all emitted copies are stuck together. + Glue = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + RetOps[0] = Chain; // Update chain. + + // Add the glue node if we have it. + if (Glue.getNode()) + RetOps.push_back(Glue); + + return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h new file mode 100644 index 000000000000..c852577a3744 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -0,0 +1,95 @@ +//=- LoongArchISelLowering.h - LoongArch DAG Lowering Interface -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that LoongArch uses to lower LLVM code into +// a selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H + +#include "LoongArch.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLowering.h" + +namespace llvm { +class LoongArchSubtarget; +struct LoongArchRegisterInfo; +namespace LoongArchISD { +enum NodeType : unsigned { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // TODO: add more LoongArchISDs + RET, + // 32-bit shifts, directly matching the semantics of the named LoongArch + // instructions. + SLL_W, + SRA_W, + SRL_W, + + BSTRPICK, + +}; +} // namespace LoongArchISD + +class LoongArchTargetLowering : public TargetLowering { + const LoongArchSubtarget &Subtarget; + +public: + explicit LoongArchTargetLowering(const TargetMachine &TM, + const LoongArchSubtarget &STI); + + const LoongArchSubtarget &getSubtarget() const { return Subtarget; } + + // Provide custom lowering hooks for some operations. + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + + // This method returns the name of a target specific DAG node. + const char *getTargetNodeName(unsigned Opcode) const override; + + // Lower incoming arguments, copy physregs into vregs. + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Ins, + const SDLoc &DL, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool IsVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SDLoc &DL, + SelectionDAG &DAG) const override; + +private: + /// Target-specific function used to lower LoongArch calling conventions. + typedef bool LoongArchCCAssignFn(unsigned ValNo, MVT ValVT, + CCValAssign::LocInfo LocInfo, + CCState &State); + + void analyzeInputArgs(CCState &CCInfo, + const SmallVectorImpl &Ins, + LoongArchCCAssignFn Fn) const; + void analyzeOutputArgs(CCState &CCInfo, + const SmallVectorImpl &Outs, + LoongArchCCAssignFn Fn) const; + + SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td new file mode 100644 index 000000000000..bebc83a861ae --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td @@ -0,0 +1,404 @@ +//===- LoongArchInstrFormats.td - LoongArch Instr. Formats -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe LoongArch instructions format +// +// opcode - operation code. +// rd - destination register operand. +// r{j/k} - source register operand. +// immN - immediate data operand. +// +//===----------------------------------------------------------------------===// + +class LAInst pattern = []> + : Instruction { + field bits<32> Inst; + // SoftFail is a field the disassembler can use to provide a way for + // instructions to not match without killing the whole decode process. It is + // mainly used for ARM, but Tablegen expects this field to exist or it fails + // to build the decode table. + field bits<32> SoftFail = 0; + + let Namespace = "LoongArch"; + let Size = 4; + let OutOperandList = outs; + let InOperandList = ins; + let AsmString = opcstr # "\t" # opnstr; + let Pattern = pattern; +} + +// Pseudo instructions +class Pseudo pattern = [], string opcstr = "", + string opnstr = ""> + : LAInst { + let isPseudo = 1; + let isCodeGenOnly = 1; +} + +// 2R-type +// +class Fmt2R op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<5> rj; + bits<5> rd; + + let Inst{31-10} = op; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// 3R-type +// +class Fmt3R op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<5> rk; + bits<5> rj; + bits<5> rd; + + let Inst{31-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// 3RI2-type +// +class Fmt3RI2 op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<2> imm2; + bits<5> rk; + bits<5> rj; + bits<5> rd; + + let Inst{31-17} = op; + let Inst{16-15} = imm2; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// 3RI3-type +// +class Fmt3RI3 op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<3> imm3; + bits<5> rk; + bits<5> rj; + bits<5> rd; + + let Inst{31-18} = op; + let Inst{17-15} = imm3; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// 2RI5-type +// +class Fmt2RI5 op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<5> imm5; + bits<5> rj; + bits<5> rd; + + let Inst{31-15} = op; + let Inst{14-10} = imm5; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// 2RI6-type +// +class Fmt2RI6 op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<6> imm6; + bits<5> rj; + bits<5> rd; + + let Inst{31-16} = op; + let Inst{15-10} = imm6; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// 2RI8-type +// +class Fmt2RI8 op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<8> imm8; + bits<5> rj; + bits<5> rd; + + let Inst{31-18} = op; + let Inst{17-10} = imm8; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// 2RI12-type +// +class Fmt2RI12 op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<12> imm12; + bits<5> rj; + bits<5> rd; + + let Inst{31-22} = op; + let Inst{21-10} = imm12; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// 2RI14-type +// +class Fmt2RI14 op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<14> imm14; + bits<5> rj; + bits<5> rd; + + let Inst{31-24} = op; + let Inst{23-10} = imm14; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// 2RI16-type +// +class Fmt2RI16 op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<16> imm16; + bits<5> rj; + bits<5> rd; + + let Inst{31-26} = op; + let Inst{25-10} = imm16; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// 1RI20-type +// +class Fmt1RI20 op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<20> imm20; + bits<5> rd; + + let Inst{31-25} = op; + let Inst{24-5} = imm20; + let Inst{4-0} = rd; +} + +// 1RI21-type +// +class Fmt1RI21 op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<21> imm21; + bits<5> rj; + + let Inst{31-26} = op; + let Inst{25-10} = imm21{15-0}; + let Inst{9-5} = rj; + let Inst{4-0} = imm21{20-16}; +} + +// I15-type +// +class FmtI15 op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<15> imm15; + + let Inst{31-15} = op; + let Inst{14-0} = imm15; +} + +// I26-type +// +class FmtI26 op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<26> imm26; + + let Inst{31-26} = op; + let Inst{25-10} = imm26{15-0}; + let Inst{9-0} = imm26{25-16}; +} + +// FmtBSTR_W +// +class FmtBSTR_W op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<5> msbw; + bits<5> lsbw; + bits<5> rj; + bits<5> rd; + + let Inst{31-21} = op{11-1}; + let Inst{20-16} = msbw; + let Inst{15} = op{0}; + let Inst{14-10} = lsbw; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// FmtBSTR_D +// +class FmtBSTR_D op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<6> msbd; + bits<6> lsbd; + bits<5> rj; + bits<5> rd; + + let Inst{31-22} = op; + let Inst{21-16} = msbd; + let Inst{15-10} = lsbd; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// FmtASRT +// +class FmtASRT op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<5> rk; + bits<5> rj; + + let Inst{31-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = 0x0; +} + +// FmtPRELD +// < 0b0010101011 | I12 | rj | I5> +class FmtPRELD pattern = []> + : LAInst { + bits<12> imm12; + bits<5> rj; + bits<5> imm5; + + let Inst{31-22} = 0b0010101011; + let Inst{21-10} = imm12; + let Inst{9-5} = rj; + let Inst{4-0} = imm5; +} + +// FmtPRELDX +// < 0b00111000001011000 | rk | rj | I5> +class FmtPRELDX pattern = []> + : LAInst { + bits<5> rk; + bits<5> rj; + bits<5> imm5; + + let Inst{31-15} = 0b00111000001011000; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = imm5; +} + +// FmtCSR +// +class FmtCSR op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<14> csr_num; + bits<5> rd; + + let Inst{31-24} = op{12-5}; + let Inst{23-10} = csr_num; + let Inst{9-5} = op{4-0}; + let Inst{4-0} = rd; +} + +// FmtCSRXCHG +// +class FmtCSRXCHG op, dag outs, dag ins, string opcstr, string opnstr, + list pattern = []> + : LAInst { + bits<14> csr_num; + bits<5> rj; + bits<5> rd; + + let Inst{31-24} = op; + let Inst{23-10} = csr_num; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// FmtCACOP +// <0b0000011000 | I12 | rj | I5> +class FmtCACOP pattern = []> + : LAInst { + bits<12> imm12; + bits<5> rj; + bits<5> op; + + let Inst{31-22} = 0b0000011000; + let Inst{21-10} = imm12; + let Inst{9-5} = rj; + let Inst{4-0} = op; +} + +// FmtIMM32 +// +class FmtI32 op, string opstr, list pattern = []> + : LAInst<(outs), (ins), opstr, "", pattern> { + let Inst{31-0} = op; +} + +// FmtINVTLB +// <0b00000110010010011 | rk | rj | I5> +class FmtINVTLB pattern = []> + : LAInst { + bits<5> rk; + bits<5> rj; + bits<5> op; + + let Inst{31-15} = 0b00000110010010011; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = op; +} + +// FmtLDPTE +// <0b00000110010001 | seq | rj | 00000> +class FmtLDPTE pattern = []> + : LAInst { + bits<8> seq; + bits<5> rj; + + let Inst{31-18} = 0b00000110010001; + let Inst{17-10} = seq; + let Inst{9-5} = rj; + let Inst{4-0} = 0b00000; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp new file mode 100644 index 000000000000..146ef53befd5 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -0,0 +1,49 @@ +//=- LoongArchInstrInfo.cpp - LoongArch Instruction Information -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchInstrInfo.h" +#include "LoongArch.h" + +using namespace llvm; + +#define GET_INSTRINFO_CTOR_DTOR +#include "LoongArchGenInstrInfo.inc" + +LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI) + // FIXME: add CFSetup and CFDestroy Inst when we implement function call. + : LoongArchGenInstrInfo() {} + +void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, MCRegister DstReg, + MCRegister SrcReg, bool KillSrc) const { + if (LoongArch::GPRRegClass.contains(DstReg, SrcReg)) { + BuildMI(MBB, MBBI, DL, get(LoongArch::OR), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(LoongArch::R0); + return; + } + + // FPR->FPR copies. + unsigned Opc; + if (LoongArch::FPR32RegClass.contains(DstReg, SrcReg)) { + Opc = LoongArch::FMOV_S; + } else if (LoongArch::FPR64RegClass.contains(DstReg, SrcReg)) { + Opc = LoongArch::FMOV_D; + } else { + // TODO: support other copies. + llvm_unreachable("Impossible reg-to-reg copy"); + } + + BuildMI(MBB, MBBI, DL, get(Opc), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h new file mode 100644 index 000000000000..f31943b85a51 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -0,0 +1,36 @@ +//=- LoongArchInstrInfo.h - LoongArch Instruction Information ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H + +#include "LoongArchRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "LoongArchGenInstrInfo.inc" + +namespace llvm { + +class LoongArchSubtarget; + +class LoongArchInstrInfo : public LoongArchGenInstrInfo { +public: + explicit LoongArchInstrInfo(LoongArchSubtarget &STI); + + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, + bool KillSrc) const override; +}; + +} // end namespace llvm +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td new file mode 100644 index 000000000000..6b8ee9e43f94 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -0,0 +1,730 @@ +//== LoongArchInstrInfo.td - Target Description for LoongArch -*- tablegen -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the LoongArch instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LoongArch specific DAG Nodes. +//===----------------------------------------------------------------------===// + +// Target-dependent type requirements. +def SDT_LoongArchIntBinOpW : SDTypeProfile<1, 2, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64> +]>; + +def SDT_LoongArchBStrPick: SDTypeProfile<1, 3, [ + SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisSameAs<2, 3> +]>; + +// TODO: Add LoongArch specific DAG Nodes +// Target-dependent nodes. +def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; +def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; +def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; +def loongarch_bstrpick + : SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>; + +//===----------------------------------------------------------------------===// +// Operand and SDNode transformation definitions. +//===----------------------------------------------------------------------===// + +class ImmAsmOperand + : AsmOperandClass { + let Name = prefix # "Imm" # width # suffix; + let DiagnosticType = !strconcat("Invalid", Name); + let RenderMethod = "addImmOperands"; +} + +class SImmAsmOperand + : ImmAsmOperand<"S", width, suffix> { +} + +class UImmAsmOperand + : ImmAsmOperand<"U", width, suffix> { +} + +def uimm2 : Operand { + let ParserMatchClass = UImmAsmOperand<2>; +} + +def uimm2_plus1 : Operand { + let ParserMatchClass = UImmAsmOperand<2, "plus1">; + let EncoderMethod = "getImmOpValueSub1"; + let DecoderMethod = "decodeUImmOperand<2, 1>"; +} + +def uimm3 : Operand { + let ParserMatchClass = UImmAsmOperand<3>; +} + +def uimm5 : Operand, ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<5>; +} + +def uimm6 : Operand, ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<6>; +} + +def uimm8 : Operand { + let ParserMatchClass = UImmAsmOperand<8>; +} + +def uimm12 : Operand, ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<12>; +} + +def uimm14 : Operand { + let ParserMatchClass = UImmAsmOperand<14>; +} + +def uimm15 : Operand { + let ParserMatchClass = UImmAsmOperand<15>; +} + +def simm12 : Operand, ImmLeaf(Imm);}]> { + let ParserMatchClass = SImmAsmOperand<12>; + let DecoderMethod = "decodeSImmOperand<12>"; +} + +def simm14_lsl2 : Operand { + let ParserMatchClass = SImmAsmOperand<14, "lsl2">; + let EncoderMethod = "getImmOpValueAsr2"; + let DecoderMethod = "decodeSImmOperand<14, 2>"; +} + +def simm16 : Operand { + let ParserMatchClass = SImmAsmOperand<16>; + let DecoderMethod = "decodeSImmOperand<16>"; +} + +def simm16_lsl2 : Operand { + let ParserMatchClass = SImmAsmOperand<16, "lsl2">; + let EncoderMethod = "getImmOpValueAsr2"; + let DecoderMethod = "decodeSImmOperand<16, 2>"; +} + +def simm20 : Operand { + let ParserMatchClass = SImmAsmOperand<20>; + let DecoderMethod = "decodeSImmOperand<20>"; +} + +def simm21_lsl2 : Operand { + let ParserMatchClass = SImmAsmOperand<21, "lsl2">; + let EncoderMethod = "getImmOpValueAsr2"; + let DecoderMethod = "decodeSImmOperand<21, 2>"; +} + +def simm26_lsl2 : Operand { + let ParserMatchClass = SImmAsmOperand<26, "lsl2">; + let EncoderMethod = "getImmOpValueAsr2"; + let DecoderMethod = "decodeSImmOperand<26, 2>"; +} + +// Standalone (codegen-only) immleaf patterns. + +// A 12-bit signed immediate plus one where the imm range will be [-2047, 2048]. +def simm12_plus1 : ImmLeaf(Imm) && Imm != -2048) || Imm == 2048;}]>; + +// Return the negation of an immediate value. +def NegImm : SDNodeXFormgetTargetConstant(-N->getSExtValue(), SDLoc(N), + N->getValueType(0)); +}]>; + +//===----------------------------------------------------------------------===// +// Instruction Formats +//===----------------------------------------------------------------------===// + +include "LoongArchInstrFormats.td" +include "LoongArchFloatInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Instruction Class Templates +//===----------------------------------------------------------------------===// + +class ALU_3R op, string opstr> + : Fmt3R; +class ALU_2R op, string opstr> + : Fmt2R; + +class ALU_3RI2 op, string opstr, Operand ImmOpnd> + : Fmt3RI2; +class ALU_3RI3 op, string opstr, Operand ImmOpnd> + : Fmt3RI3; +class ALU_2RI5 op, string opstr, Operand ImmOpnd> + : Fmt2RI5; +class ALU_2RI6 op, string opstr, Operand ImmOpnd> + : Fmt2RI6; +class ALU_2RI12 op, string opstr, Operand ImmOpnd> + : Fmt2RI12; +class ALU_2RI16 op, string opstr, Operand ImmOpnd> + : Fmt2RI16; +class ALU_1RI20 op, string opstr, Operand ImmOpnd> + : Fmt1RI20; + +class MISC_I15 op, string opstr> + : FmtI15; + +class RDTIME_2R op, string opstr> + : Fmt2R; + +class BrCC_2RI16 op, string opstr> + : Fmt2RI16 { + let isBranch = 1; + let isTerminator = 1; +} +class BrCCZ_1RI21 op, string opstr> + : Fmt1RI21 { + let isBranch = 1; + let isTerminator = 1; +} +class Br_I26 op, string opstr> + : FmtI26 { + let isBranch = 1; + let isTerminator = 1; +} + +let mayLoad = 1 in { +class LOAD_3R op, string opstr> + : Fmt3R; +class LOAD_2RI12 op, string opstr> + : Fmt2RI12; +class LOAD_2RI14 op, string opstr> + : Fmt2RI14; +} // mayLoad = 1 + +let mayStore = 1 in { +class STORE_3R op, string opstr> + : Fmt3R; +class STORE_2RI12 op, string opstr> + : Fmt2RI12; +class STORE_2RI14 op, string opstr> + : Fmt2RI14; +} // mayStore = 1 + +let mayLoad = 1, mayStore = 1 in +class AM_3R op, string opstr> + : Fmt3R; + +let mayLoad = 1 in +class LLBase op, string opstr> + : Fmt2RI14; + +let mayStore = 1, Constraints = "$rd = $dst" in +class SCBase op, string opstr> + : Fmt2RI14; + +class IOCSRRD op, string opstr> + : Fmt2R; + +class IOCSRWR op, string opstr> + : Fmt2R; + +//===----------------------------------------------------------------------===// +// Basic Integer Instructions +//===----------------------------------------------------------------------===// + +// Arithmetic Operation Instructions +def ADD_W : ALU_3R<0b00000000000100000, "add.w">; +def SUB_W : ALU_3R<0b00000000000100010, "sub.w">; +def ADDI_W : ALU_2RI12<0b0000001010, "addi.w", simm12>; +def ALSL_W : ALU_3RI2<0b000000000000010, "alsl.w", uimm2_plus1>; +def LU12I_W : ALU_1RI20<0b0001010, "lu12i.w", simm20>; +def SLT : ALU_3R<0b00000000000100100, "slt">; +def SLTU : ALU_3R<0b00000000000100101, "sltu">; +def SLTI : ALU_2RI12<0b0000001000, "slti", simm12>; +def SLTUI : ALU_2RI12<0b0000001001, "sltui", simm12>; +def PCADDI : ALU_1RI20<0b0001100, "pcaddi", simm20>; +def PCADDU12I : ALU_1RI20<0b0001110, "pcaddu12i", simm20>; +def PCALAU12I : ALU_1RI20<0b0001101, "pcalau12i", simm20>; +def AND : ALU_3R<0b00000000000101001, "and">; +def OR : ALU_3R<0b00000000000101010, "or">; +def NOR : ALU_3R<0b00000000000101000, "nor">; +def XOR : ALU_3R<0b00000000000101011, "xor">; +def ANDN : ALU_3R<0b00000000000101101, "andn">; +def ORN : ALU_3R<0b00000000000101100, "orn">; +def ANDI : ALU_2RI12<0b0000001101, "andi", uimm12>; +def ORI : ALU_2RI12<0b0000001110, "ori", uimm12>; +def XORI : ALU_2RI12<0b0000001111, "xori", uimm12>; +def MUL_W : ALU_3R<0b00000000000111000, "mul.w">; +def MULH_W : ALU_3R<0b00000000000111001, "mulh.w">; +def MULH_WU : ALU_3R<0b00000000000111010, "mulh.wu">; +def DIV_W : ALU_3R<0b00000000001000000, "div.w">; +def MOD_W : ALU_3R<0b00000000001000001, "mod.w">; +def DIV_WU : ALU_3R<0b00000000001000010, "div.wu">; +def MOD_WU : ALU_3R<0b00000000001000011, "mod.wu">; + +// Bit-shift Instructions +def SLL_W : ALU_3R<0b00000000000101110, "sll.w">; +def SRL_W : ALU_3R<0b00000000000101111, "srl.w">; +def SRA_W : ALU_3R<0b00000000000110000, "sra.w">; +def ROTR_W : ALU_3R<0b00000000000110110, "rotr.w">; + +def SLLI_W : ALU_2RI5<0b00000000010000001, "slli.w", uimm5>; +def SRLI_W : ALU_2RI5<0b00000000010001001, "srli.w", uimm5>; +def SRAI_W : ALU_2RI5<0b00000000010010001, "srai.w", uimm5>; +def ROTRI_W : ALU_2RI5<0b00000000010011001, "rotri.w", uimm5>; + +// Bit-manipulation Instructions +def EXT_W_B : ALU_2R<0b0000000000000000010111, "ext.w.b">; +def EXT_W_H : ALU_2R<0b0000000000000000010110, "ext.w.h">; +def CLO_W : ALU_2R<0b0000000000000000000100, "clo.w">; +def CLZ_W : ALU_2R<0b0000000000000000000101, "clz.w">; +def CTO_W : ALU_2R<0b0000000000000000000110, "cto.w">; +def CTZ_W : ALU_2R<0b0000000000000000000111, "ctz.w">; +def BYTEPICK_W : ALU_3RI2<0b000000000000100, "bytepick.w", uimm2>; +def REVB_2H : ALU_2R<0b0000000000000000001100, "revb.2h">; +def BITREV_4B : ALU_2R<0b0000000000000000010010, "bitrev.4b">; +def BITREV_W : ALU_2R<0b0000000000000000010100, "bitrev.w">; +let Constraints = "$rd = $dst" in { +def BSTRINS_W : FmtBSTR_W<0b000000000110, (outs GPR:$dst), + (ins GPR:$rd, GPR:$rj, uimm5:$msbw, uimm5:$lsbw), + "bstrins.w", "$rd, $rj, $msbw, $lsbw">; +} +def BSTRPICK_W : FmtBSTR_W<0b000000000111, (outs GPR:$rd), + (ins GPR:$rj, uimm5:$msbw, uimm5:$lsbw), + "bstrpick.w", "$rd, $rj, $msbw, $lsbw">; +def MASKEQZ : ALU_3R<0b00000000000100110, "maskeqz">; +def MASKNEZ : ALU_3R<0b00000000000100111, "masknez">; + +// Branch Instructions +def BEQ : BrCC_2RI16<0b010110, "beq">; +def BNE : BrCC_2RI16<0b010111, "bne">; +def BLT : BrCC_2RI16<0b011000, "blt">; +def BGE : BrCC_2RI16<0b011001, "bge">; +def BLTU : BrCC_2RI16<0b011010, "bltu">; +def BGEU : BrCC_2RI16<0b011011, "bgeu">; +def BEQZ : BrCCZ_1RI21<0b010000, "beqz">; +def BNEZ : BrCCZ_1RI21<0b010001, "bnez">; +def B : Br_I26<0b010100, "b">; + +let isCall = 1 in +def BL : FmtI26<0b010101, (outs), (ins simm26_lsl2:$imm26), "bl", "$imm26">; +def JIRL : Fmt2RI16<0b010011, (outs GPR:$rd), + (ins GPR:$rj, simm16_lsl2:$imm16), "jirl", + "$rd, $rj, $imm16">; + +// Common Memory Access Instructions +def LD_B : LOAD_2RI12<0b0010100000, "ld.b">; +def LD_H : LOAD_2RI12<0b0010100001, "ld.h">; +def LD_W : LOAD_2RI12<0b0010100010, "ld.w">; +def LD_BU : LOAD_2RI12<0b0010101000, "ld.bu">; +def LD_HU : LOAD_2RI12<0b0010101001, "ld.hu">; +def ST_B : STORE_2RI12<0b0010100100, "st.b">; +def ST_H : STORE_2RI12<0b0010100101, "st.h">; +def ST_W : STORE_2RI12<0b0010100110, "st.w">; +def PRELD : FmtPRELD<(outs), (ins uimm5:$imm5, GPR:$rj, simm12:$imm12), "preld", + "$imm5, $rj, $imm12">; + +// Atomic Memory Access Instructions +def LL_W : LLBase<0b00100000, "ll.w">; +def SC_W : SCBase<0b00100001, "sc.w">; + +// Barrier Instructions +def DBAR : MISC_I15<0b00111000011100100, "dbar">; +def IBAR : MISC_I15<0b00111000011100101, "ibar">; + +// Other Miscellaneous Instructions +def SYSCALL : MISC_I15<0b00000000001010110, "syscall">; +def BREAK : MISC_I15<0b00000000001010100, "break">; +def RDTIMEL_W : RDTIME_2R<0b0000000000000000011000, "rdtimel.w">; +def RDTIMEH_W : RDTIME_2R<0b0000000000000000011001, "rdtimeh.w">; +def CPUCFG : ALU_2R<0b0000000000000000011011, "cpucfg">; + +/// LA64 instructions + +let Predicates = [IsLA64] in { + +// Arithmetic Operation Instructions for 64-bits +def ADD_D : ALU_3R<0b00000000000100001, "add.d">; +def SUB_D : ALU_3R<0b00000000000100011, "sub.d">; +def ADDI_D : ALU_2RI12<0b0000001011, "addi.d", simm12>; +def ADDU16I_D : ALU_2RI16<0b000100, "addu16i.d", simm16>; +def ALSL_WU : ALU_3RI2<0b000000000000011, "alsl.wu", uimm2_plus1>; +def ALSL_D : ALU_3RI2<0b000000000010110, "alsl.d", uimm2_plus1>; +let Constraints = "$rd = $dst" in { +def LU32I_D : Fmt1RI20<0b0001011, (outs GPR:$dst), + (ins GPR:$rd, simm20:$imm20), "lu32i.d", + "$rd, $imm20">; +} +def LU52I_D : ALU_2RI12<0b0000001100, "lu52i.d", simm12>; +def PCADDU18I : ALU_1RI20<0b0001111, "pcaddu18i", simm20>; +def MUL_D : ALU_3R<0b00000000000111011, "mul.d">; +def MULH_D : ALU_3R<0b00000000000111100, "mulh.d">; +def MULH_DU : ALU_3R<0b00000000000111101, "mulh.du">; +def MULW_D_W : ALU_3R<0b00000000000111110, "mulw.d.w">; +def MULW_D_WU : ALU_3R<0b00000000000111111, "mulw.d.wu">; +def DIV_D : ALU_3R<0b00000000001000100, "div.d">; +def MOD_D : ALU_3R<0b00000000001000101, "mod.d">; +def DIV_DU : ALU_3R<0b00000000001000110, "div.du">; +def MOD_DU : ALU_3R<0b00000000001000111, "mod.du">; + +// Bit-shift Instructions for 64-bits +def SLL_D : ALU_3R<0b00000000000110001, "sll.d">; +def SRL_D : ALU_3R<0b00000000000110010, "srl.d">; +def SRA_D : ALU_3R<0b00000000000110011, "sra.d">; +def ROTR_D : ALU_3R<0b00000000000110111, "rotr.d">; +def SLLI_D : ALU_2RI6<0b0000000001000001, "slli.d", uimm6>; +def SRLI_D : ALU_2RI6<0b0000000001000101, "srli.d", uimm6>; +def SRAI_D : ALU_2RI6<0b0000000001001001, "srai.d", uimm6>; +def ROTRI_D : ALU_2RI6<0b0000000001001101, "rotri.d", uimm6>; + +// Bit-manipulation Instructions for 64-bits +def CLO_D : ALU_2R<0b0000000000000000001000, "clo.d">; +def CLZ_D : ALU_2R<0b0000000000000000001001, "clz.d">; +def CTO_D : ALU_2R<0b0000000000000000001010, "cto.d">; +def CTZ_D : ALU_2R<0b0000000000000000001011, "ctz.d">; +def BYTEPICK_D : ALU_3RI3<0b00000000000011, "bytepick.d", uimm3>; +def REVB_4H : ALU_2R<0b0000000000000000001101, "revb.4h">; +def REVB_2W : ALU_2R<0b0000000000000000001110, "revb.2w">; +def REVB_D : ALU_2R<0b0000000000000000001111, "revb.d">; +def REVH_2W : ALU_2R<0b0000000000000000010000, "revh.2w">; +def REVH_D : ALU_2R<0b0000000000000000010001, "revh.d">; +def BITREV_8B : ALU_2R<0b0000000000000000010011, "bitrev.8b">; +def BITREV_D : ALU_2R<0b0000000000000000010101, "bitrev.d">; +let Constraints = "$rd = $dst" in { +def BSTRINS_D : FmtBSTR_D<0b0000000010, (outs GPR:$dst), + (ins GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd), + "bstrins.d", "$rd, $rj, $msbd, $lsbd">; +} +def BSTRPICK_D : FmtBSTR_D<0b0000000011, (outs GPR:$rd), + (ins GPR:$rj, uimm6:$msbd, uimm6:$lsbd), + "bstrpick.d", "$rd, $rj, $msbd, $lsbd">; + +// Common Memory Access Instructions for 64-bits +def LD_WU : LOAD_2RI12<0b0010101010, "ld.wu">; +def LD_D : LOAD_2RI12<0b0010100011, "ld.d">; +def ST_D : STORE_2RI12<0b0010100111, "st.d">; +def LDX_B : LOAD_3R<0b00111000000000000, "ldx.b">; +def LDX_H : LOAD_3R<0b00111000000001000, "ldx.h">; +def LDX_W : LOAD_3R<0b00111000000010000, "ldx.w">; +def LDX_D : LOAD_3R<0b00111000000011000, "ldx.d">; +def LDX_BU : LOAD_3R<0b00111000001000000, "ldx.bu">; +def LDX_HU : LOAD_3R<0b00111000001001000, "ldx.hu">; +def LDX_WU : LOAD_3R<0b00111000001010000, "ldx.wu">; +def STX_B : STORE_3R<0b00111000000100000, "stx.b">; +def STX_H : STORE_3R<0b00111000000101000, "stx.h">; +def STX_W : STORE_3R<0b00111000000110000, "stx.w">; +def STX_D : STORE_3R<0b00111000000111000, "stx.d">; +def LDPTR_W : LOAD_2RI14<0b00100100, "ldptr.w">; +def LDPTR_D : LOAD_2RI14<0b00100110, "ldptr.d">; +def STPTR_W : STORE_2RI14<0b00100101, "stptr.w">; +def STPTR_D : STORE_2RI14<0b00100111, "stptr.d">; +def PRELDX : FmtPRELDX<(outs), (ins uimm5:$imm5, GPR:$rj, GPR:$rk), "preldx", + "$imm5, $rj, $rk">; + +// Bound Check Memory Access Instructions +def LDGT_B : LOAD_3R<0b00111000011110000, "ldgt.b">; +def LDGT_H : LOAD_3R<0b00111000011110001, "ldgt.h">; +def LDGT_W : LOAD_3R<0b00111000011110010, "ldgt.w">; +def LDGT_D : LOAD_3R<0b00111000011110011, "ldgt.d">; +def LDLE_B : LOAD_3R<0b00111000011110100, "ldle.b">; +def LDLE_H : LOAD_3R<0b00111000011110101, "ldle.h">; +def LDLE_W : LOAD_3R<0b00111000011110110, "ldle.w">; +def LDLE_D : LOAD_3R<0b00111000011110111, "ldle.d">; +def STGT_B : STORE_3R<0b00111000011111000, "stgt.b">; +def STGT_H : STORE_3R<0b00111000011111001, "stgt.h">; +def STGT_W : STORE_3R<0b00111000011111010, "stgt.w">; +def STGT_D : STORE_3R<0b00111000011111011, "stgt.d">; +def STLE_B : STORE_3R<0b00111000011111100, "stle.b">; +def STLE_H : STORE_3R<0b00111000011111101, "stle.h">; +def STLE_W : STORE_3R<0b00111000011111110, "stle.w">; +def STLE_D : STORE_3R<0b00111000011111111, "stle.d">; + +// Atomic Memory Access Instructions for 64-bits +def AMSWAP_W : AM_3R<0b00111000011000000, "amswap.w">; +def AMSWAP_D : AM_3R<0b00111000011000001, "amswap.d">; +def AMADD_W : AM_3R<0b00111000011000010, "amadd.w">; +def AMADD_D : AM_3R<0b00111000011000011, "amadd.d">; +def AMAND_W : AM_3R<0b00111000011000100, "amand.w">; +def AMAND_D : AM_3R<0b00111000011000101, "amand.d">; +def AMOR_W : AM_3R<0b00111000011000110, "amor.w">; +def AMOR_D : AM_3R<0b00111000011000111, "amor.d">; +def AMXOR_W : AM_3R<0b00111000011001000, "amxor.w">; +def AMXOR_D : AM_3R<0b00111000011001001, "amxor.d">; +def AMMAX_W : AM_3R<0b00111000011001010, "ammax.w">; +def AMMAX_D : AM_3R<0b00111000011001011, "ammax.d">; +def AMMIN_W : AM_3R<0b00111000011001100, "ammin.w">; +def AMMIN_D : AM_3R<0b00111000011001101, "ammin.d">; +def AMMAX_WU : AM_3R<0b00111000011001110, "ammax.wu">; +def AMMAX_DU : AM_3R<0b00111000011001111, "ammax.du">; +def AMMIN_WU : AM_3R<0b00111000011010000, "ammin.wu">; +def AMMIN_DU : AM_3R<0b00111000011010001, "ammin.du">; +def AMSWAP_DB_W : AM_3R<0b00111000011010010, "amswap_db.w">; +def AMSWAP_DB_D : AM_3R<0b00111000011010011, "amswap_db.d">; +def AMADD_DB_W : AM_3R<0b00111000011010100, "amadd_db.w">; +def AMADD_DB_D : AM_3R<0b00111000011010101, "amadd_db.d">; +def AMAND_DB_W : AM_3R<0b00111000011010110, "amand_db.w">; +def AMAND_DB_D : AM_3R<0b00111000011010111, "amand_db.d">; +def AMOR_DB_W : AM_3R<0b00111000011011000, "amor_db.w">; +def AMOR_DB_D : AM_3R<0b00111000011011001, "amor_db.d">; +def AMXOR_DB_W : AM_3R<0b00111000011011010, "amxor_db.w">; +def AMXOR_DB_D : AM_3R<0b00111000011011011, "amxor_db.d">; +def AMMAX_DB_W : AM_3R<0b00111000011011100, "ammax_db.w">; +def AMMAX_DB_D : AM_3R<0b00111000011011101, "ammax_db.d">; +def AMMIN_DB_W : AM_3R<0b00111000011011110, "ammin_db.w">; +def AMMIN_DB_D : AM_3R<0b00111000011011111, "ammin_db.d">; +def AMMAX_DB_WU : AM_3R<0b00111000011100000, "ammax_db.wu">; +def AMMAX_DB_DU : AM_3R<0b00111000011100001, "ammax_db.du">; +def AMMIN_DB_WU : AM_3R<0b00111000011100010, "ammin_db.wu">; +def AMMIN_DB_DU : AM_3R<0b00111000011100011, "ammin_db.du">; +def LL_D : LLBase<0b00100010, "ll.d">; +def SC_D : SCBase<0b00100011, "sc.d">; + +// CRC Check Instructions +def CRC_W_B_W : ALU_3R<0b00000000001001000, "crc.w.b.w">; +def CRC_W_H_W : ALU_3R<0b00000000001001001, "crc.w.h.w">; +def CRC_W_W_W : ALU_3R<0b00000000001001010, "crc.w.w.w">; +def CRC_W_D_W : ALU_3R<0b00000000001001011, "crc.w.d.w">; +def CRCC_W_B_W : ALU_3R<0b00000000001001100, "crcc.w.b.w">; +def CRCC_W_H_W : ALU_3R<0b00000000001001101, "crcc.w.h.w">; +def CRCC_W_W_W : ALU_3R<0b00000000001001110, "crcc.w.w.w">; +def CRCC_W_D_W : ALU_3R<0b00000000001001111, "crcc.w.d.w">; + +// Other Miscellaneous Instructions for 64-bits +def ASRTLE_D : FmtASRT<0b00000000000000010, (outs), (ins GPR:$rj, GPR:$rk), + "asrtle.d", "$rj, $rk">; +def ASRTGT_D : FmtASRT<0b00000000000000011, (outs), (ins GPR:$rj, GPR:$rk), + "asrtgt.d", "$rj, $rk">; +def RDTIME_D : RDTIME_2R<0b0000000000000000011010, "rdtime.d">; +} // Predicates = [IsLA64] + +//===----------------------------------------------------------------------===// +// Pseudo-instructions and codegen patterns +// +// Naming convention: For 'generic' pattern classes, we use the naming +// convention PatTy1Ty2. +//===----------------------------------------------------------------------===// + +/// Generic pattern classes + +class PatGprGpr + : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>; +class PatGprGpr_32 + : Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>; + +class PatGprImm + : Pat<(OpNode GPR:$rj, ImmOpnd:$imm), + (Inst GPR:$rj, ImmOpnd:$imm)>; +class PatGprImm_32 + : Pat<(sext_inreg (OpNode GPR:$rj, ImmOpnd:$imm), i32), + (Inst GPR:$rj, ImmOpnd:$imm)>; + +/// Simple arithmetic operations + +// Match both a plain shift and one where the shift amount is masked (this is +// typically introduced when the legalizer promotes the shift amount and +// zero-extends it). For LoongArch, the mask is unnecessary as shifts in the +// base ISA only read the least significant 5 bits (LA32) or 6 bits (LA64). +def shiftMaskGRLen + : ComplexPattern; +def shiftMask32 : ComplexPattern; + +class shiftop + : PatFrag<(ops node:$val, node:$count), + (operator node:$val, (GRLenVT (shiftMaskGRLen node:$count)))>; +class shiftopw + : PatFrag<(ops node:$val, node:$count), + (operator node:$val, (i64 (shiftMask32 node:$count)))>; + +let Predicates = [IsLA32] in { +def : PatGprGpr; +def : PatGprImm; +def : PatGprGpr; +} // Predicates = [IsLA32] + +let Predicates = [IsLA64] in { +def : PatGprGpr; +def : PatGprGpr_32; +def : PatGprImm; +def : PatGprImm_32; +def : PatGprGpr; +def : PatGprGpr_32; +} // Predicates = [IsLA64] + +def : PatGprGpr; +def : PatGprImm; +def : PatGprGpr; +def : PatGprImm; +def : PatGprGpr; +def : PatGprImm; + +/// Shift + +let Predicates = [IsLA32] in { +def : PatGprGpr, SLL_W>; +def : PatGprGpr, SRA_W>; +def : PatGprGpr, SRL_W>; +def : PatGprImm; +def : PatGprImm; +def : PatGprImm; +} // Predicates = [IsLA32] + +let Predicates = [IsLA64] in { +def : PatGprGpr, SLL_W>; +def : PatGprGpr, SRA_W>; +def : PatGprGpr, SRL_W>; +def : PatGprGpr, SLL_D>; +def : PatGprGpr, SRA_D>; +def : PatGprGpr, SRL_D>; +def : PatGprImm; +def : PatGprImm; +def : PatGprImm; +} // Predicates = [IsLA64] + +/// sext and zext + +def : Pat<(sext_inreg GPR:$rj, i8), (EXT_W_B GPR:$rj)>; +def : Pat<(sext_inreg GPR:$rj, i16), (EXT_W_H GPR:$rj)>; + +let Predicates = [IsLA64] in { +def : Pat<(sext_inreg GPR:$rj, i32), (ADDI_W GPR:$rj, 0)>; +} // Predicates = [IsLA64] + +/// Setcc + +def : PatGprGpr; +def : PatGprImm; +def : PatGprGpr; +def : PatGprImm; + +// Define pattern expansions for setcc operations that aren't directly +// handled by a LoongArch instruction. +def : Pat<(seteq GPR:$rj, 0), (SLTUI GPR:$rj, 1)>; +def : Pat<(seteq GPR:$rj, GPR:$rk), (SLTUI (XOR GPR:$rj, GPR:$rk), 1)>; +let Predicates = [IsLA32] in { +def : Pat<(seteq GPR:$rj, simm12_plus1:$imm12), + (SLTUI (ADDI_W GPR:$rj, (NegImm simm12_plus1:$imm12)), 1)>; +} // Predicates = [IsLA32] +let Predicates = [IsLA64] in { +def : Pat<(seteq GPR:$rj, simm12_plus1:$imm12), + (SLTUI (ADDI_D GPR:$rj, (NegImm simm12_plus1:$imm12)), 1)>; +} // Predicates = [IsLA64] +def : Pat<(setne GPR:$rj, 0), (SLTU R0, GPR:$rj)>; +def : Pat<(setne GPR:$rj, GPR:$rk), (SLTU R0, (XOR GPR:$rj, GPR:$rk))>; +let Predicates = [IsLA32] in { +def : Pat<(setne GPR:$rj, simm12_plus1:$imm12), + (SLTU R0, (ADDI_W GPR:$rj, (NegImm simm12_plus1:$imm12)))>; +} // Predicates = [IsLA32] +let Predicates = [IsLA64] in { +def : Pat<(setne GPR:$rj, simm12_plus1:$imm12), + (SLTU R0, (ADDI_D GPR:$rj, (NegImm simm12_plus1:$imm12)))>; +} // Predicates = [IsLA64] +def : Pat<(setugt GPR:$rj, GPR:$rk), (SLTU GPR:$rk, GPR:$rj)>; +def : Pat<(setuge GPR:$rj, GPR:$rk), (XORI (SLTU GPR:$rj, GPR:$rk), 1)>; +def : Pat<(setule GPR:$rj, GPR:$rk), (XORI (SLTU GPR:$rk, GPR:$rj), 1)>; +def : Pat<(setgt GPR:$rj, GPR:$rk), (SLT GPR:$rk, GPR:$rj)>; +def : Pat<(setge GPR:$rj, GPR:$rk), (XORI (SLT GPR:$rj, GPR:$rk), 1)>; +def : Pat<(setle GPR:$rj, GPR:$rk), (XORI (SLT GPR:$rk, GPR:$rj), 1)>; + +/// Select + +def : Pat<(select GPR:$cond, GPR:$t, GPR:$f), + (OR (MASKEQZ GPR:$t, GPR:$cond), (MASKNEZ GPR:$f, GPR:$cond))>; + +/// Branches and jumps + +let isBarrier = 1, isReturn = 1, isTerminator = 1 in +def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, + PseudoInstExpansion<(JIRL R0, R1, 0)>; + +/// BSTRPICK + +let Predicates = [IsLA32] in +def : Pat<(loongarch_bstrpick GPR:$rj, uimm5:$msbd, uimm5:$lsbd), + (BSTRPICK_W GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>; + +let Predicates = [IsLA64] in +def : Pat<(loongarch_bstrpick GPR:$rj, uimm6:$msbd, uimm6:$lsbd), + (BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; + +//===----------------------------------------------------------------------===// +// Assembler Pseudo Instructions +//===----------------------------------------------------------------------===// + +def : InstAlias<"nop", (ANDI R0, R0, 0)>; +def : InstAlias<"move $dst, $src", (OR GPR:$dst, GPR:$src, R0)>; + +//===----------------------------------------------------------------------===// +// Basic Floating-Point Instructions +//===----------------------------------------------------------------------===// + +include "LoongArchFloat32InstrInfo.td" +include "LoongArchFloat64InstrInfo.td" + +//===----------------------------------------------------------------------===// +// Privilege Instructions +//===----------------------------------------------------------------------===// + +// CSR Access Instructions +def CSRRD : FmtCSR<0b0000010000000, (outs GPR:$rd), (ins uimm14:$csr_num), + "csrrd", "$rd, $csr_num">; +let Constraints = "$rd = $dst" in { +def CSRWR : FmtCSR<0b0000010000001, (outs GPR:$dst), + (ins GPR:$rd, uimm14:$csr_num), "csrwr", "$rd, $csr_num">; +def CSRXCHG : FmtCSRXCHG<0b00000100, (outs GPR:$dst), + (ins GPR:$rd, GPR:$rj, uimm14:$csr_num), + "csrxchg", "$rd, $rj, $csr_num">; +} // Constraints = "$rd = $dst" + +// IOCSR Access Instructions +def IOCSRRD_B : IOCSRRD<0b0000011001001000000000, "iocsrrd.b">; +def IOCSRRD_H : IOCSRRD<0b0000011001001000000001, "iocsrrd.h">; +def IOCSRRD_W : IOCSRRD<0b0000011001001000000010, "iocsrrd.w">; +def IOCSRWR_B : IOCSRWR<0b0000011001001000000100, "iocsrwr.b">; +def IOCSRWR_H : IOCSRWR<0b0000011001001000000101, "iocsrwr.h">; +def IOCSRWR_W : IOCSRWR<0b0000011001001000000110, "iocsrwr.w">; +let Predicates = [IsLA64] in { +def IOCSRRD_D : IOCSRRD<0b0000011001001000000011, "iocsrrd.d">; +def IOCSRWR_D : IOCSRWR<0b0000011001001000000111, "iocsrwr.d">; +} // Predicates = [IsLA64] + +// Cache Maintenance Instructions +def CACOP : FmtCACOP<(outs), (ins uimm5:$op, GPR:$rj, simm12:$imm12), "cacop", + "$op, $rj, $imm12">; + +// TLB Maintenance Instructions +def TLBSRCH : FmtI32<0b00000110010010000010100000000000, "tlbsrch">; +def TLBRD : FmtI32<0b00000110010010000010110000000000, "tlbrd">; +def TLBWR : FmtI32<0b00000110010010000011000000000000, "tlbwr">; +def TLBFILL : FmtI32<0b00000110010010000011010000000000, "tlbfill">; +def TLBCLR : FmtI32<0b00000110010010000010000000000000, "tlbclr">; +def TLBFLUSH : FmtI32<0b00000110010010000010010000000000, "tlbflush">; +def INVTLB : FmtINVTLB<(outs), (ins GPR:$rk, GPR:$rj, uimm5:$op), "invtlb", + "$op, $rj, $rk">; + +// Software Page Walking Instructions +def LDDIR : Fmt2RI8<0b00000110010000, (outs GPR:$rd), + (ins GPR:$rj, uimm8:$imm8), "lddir", "$rd, $rj, $imm8">; +def LDPTE : FmtLDPTE<(outs), (ins GPR:$rj, uimm8:$seq), "ldpte", "$rj, $seq">; + + +// Other Miscellaneous Instructions +def ERTN : FmtI32<0b00000110010010000011100000000000, "ertn">; +def DBCL : MISC_I15<0b00000000001010101, "dbcl">; +def IDLE : MISC_I15<0b00000110010010001, "idle">; diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp new file mode 100644 index 000000000000..7416c93b4d05 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp @@ -0,0 +1,66 @@ +//=- LoongArchMCInstLower.cpp - Convert LoongArch MachineInstr to an MCInst -=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower LoongArch MachineInstrs to their +// corresponding MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "LoongArch.h" +#include "LoongArchSubtarget.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +bool llvm::lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, + MCOperand &MCOp, + const AsmPrinter &AP) { + switch (MO.getType()) { + default: + report_fatal_error( + "lowerLoongArchMachineOperandToMCOperand: unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) + return false; + MCOp = MCOperand::createReg(MO.getReg()); + break; + case MachineOperand::MO_RegisterMask: + // Regmasks are like implicit defs. + return false; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::createImm(MO.getImm()); + break; + // TODO: lower special operands + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: + break; + } + return true; +} + +bool llvm::lowerLoongArchMachineInstrToMCInst(const MachineInstr *MI, + MCInst &OutMI, AsmPrinter &AP) { + OutMI.setOpcode(MI->getOpcode()); + + for (const MachineOperand &MO : MI->operands()) { + MCOperand MCOp; + if (lowerLoongArchMachineOperandToMCOperand(MO, MCOp, AP)) + OutMI.addOperand(MCOp); + } + return false; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h new file mode 100644 index 000000000000..d4a6c884bc9d --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h @@ -0,0 +1,57 @@ +//=- LoongArchMachineFunctionInfo.h - LoongArch machine function info -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares LoongArch-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTIONINFO_H + +#include "LoongArchSubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// LoongArchMachineFunctionInfo - This class is derived from +/// MachineFunctionInfo and contains private LoongArch-specific information for +/// each MachineFunction. +class LoongArchMachineFunctionInfo : public MachineFunctionInfo { +private: + /// FrameIndex for start of varargs area + int VarArgsFrameIndex = 0; + /// Size of the save area used for varargs + int VarArgsSaveSize = 0; + + /// Size of stack frame to save callee saved registers + unsigned CalleeSavedStackSize = 0; + +public: + LoongArchMachineFunctionInfo(const MachineFunction &MF) {} + + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override { + return DestMF.cloneInfo(*this); + } + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } + + unsigned getVarArgsSaveSize() const { return VarArgsSaveSize; } + void setVarArgsSaveSize(int Size) { VarArgsSaveSize = Size; } + + unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } + void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTIONINFO_H diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp new file mode 100644 index 000000000000..b9bae8e56304 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp @@ -0,0 +1,115 @@ +//===- LoongArchRegisterInfo.cpp - LoongArch Register Information -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchRegisterInfo.h" +#include "LoongArch.h" +#include "LoongArchSubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +#define GET_REGINFO_TARGET_DESC +#include "LoongArchGenRegisterInfo.inc" + +LoongArchRegisterInfo::LoongArchRegisterInfo(unsigned HwMode) + : LoongArchGenRegisterInfo(LoongArch::R1, /*DwarfFlavour*/ 0, + /*EHFlavor*/ 0, + /*PC*/ 0, HwMode) {} + +const MCPhysReg * +LoongArchRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + auto &Subtarget = MF->getSubtarget(); + + switch (Subtarget.getTargetABI()) { + default: + llvm_unreachable("Unrecognized ABI"); + case LoongArchABI::ABI_ILP32S: + case LoongArchABI::ABI_LP64S: + return CSR_ILP32S_LP64S_SaveList; + case LoongArchABI::ABI_ILP32F: + case LoongArchABI::ABI_LP64F: + return CSR_ILP32F_LP64F_SaveList; + case LoongArchABI::ABI_ILP32D: + case LoongArchABI::ABI_LP64D: + return CSR_ILP32D_LP64D_SaveList; + } +} + +const uint32_t * +LoongArchRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + auto &Subtarget = MF.getSubtarget(); + + switch (Subtarget.getTargetABI()) { + default: + llvm_unreachable("Unrecognized ABI"); + case LoongArchABI::ABI_ILP32S: + case LoongArchABI::ABI_LP64S: + return CSR_ILP32S_LP64S_RegMask; + case LoongArchABI::ABI_ILP32F: + case LoongArchABI::ABI_LP64F: + return CSR_ILP32F_LP64F_RegMask; + case LoongArchABI::ABI_ILP32D: + case LoongArchABI::ABI_LP64D: + return CSR_ILP32D_LP64D_RegMask; + } +} + +const uint32_t *LoongArchRegisterInfo::getNoPreservedMask() const { + return CSR_NoRegs_RegMask; +} + +BitVector +LoongArchRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + const LoongArchFrameLowering *TFI = getFrameLowering(MF); + BitVector Reserved(getNumRegs()); + + // Use markSuperRegs to ensure any register aliases are also reserved + markSuperRegs(Reserved, LoongArch::R0); // zero + markSuperRegs(Reserved, LoongArch::R2); // tp + markSuperRegs(Reserved, LoongArch::R3); // sp + markSuperRegs(Reserved, LoongArch::R21); // non-allocatable + if (TFI->hasFP(MF)) + markSuperRegs(Reserved, LoongArch::R22); // fp + // Reserve the base register if we need to realign the stack and allocate + // variable-sized objects at runtime. + if (TFI->hasBP(MF)) + markSuperRegs(Reserved, LoongArchABI::getBPReg()); // bp + + assert(checkAllSuperRegsMarked(Reserved)); + return Reserved; +} + +bool LoongArchRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { + return PhysReg == LoongArch::R0; +} + +Register +LoongArchRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = getFrameLowering(MF); + return TFI->hasFP(MF) ? LoongArch::R22 : LoongArch::R3; +} + +void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected non-zero SPAdj value"); + // TODO: Implement this when we have function calls +} diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h new file mode 100644 index 000000000000..02c9156e2b87 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h @@ -0,0 +1,50 @@ +//= LoongArchRegisterInfo.h - LoongArch Register Information Impl -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H + +#include "llvm/CodeGen/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "LoongArchGenRegisterInfo.inc" + +namespace llvm { + +struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo { + + LoongArchRegisterInfo(unsigned HwMode); + + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; + const uint32_t *getNoPreservedMask() const override; + + BitVector getReservedRegs(const MachineFunction &MF) const override; + bool isConstantPhysReg(MCRegister PhysReg) const override; + + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override { + return &LoongArch::GPRRegClass; + } + + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; + + Register getFrameRegister(const MachineFunction &MF) const override; +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td new file mode 100644 index 000000000000..2d5ad99f6156 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td @@ -0,0 +1,161 @@ +//===-- LoongArchRegisterInfo.td - LoongArch Register defs -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the LoongArch register files +//===----------------------------------------------------------------------===// + +let Namespace = "LoongArch" in { +class LoongArchReg Enc, string n, list alt = []> + : Register { + let HWEncoding = Enc; + let AltNames = alt; +} + +class LoongArchReg32 Enc, string n, list alt = []> + : Register { + let HWEncoding = Enc; + let AltNames = alt; +} + +def sub_32 : SubRegIndex<32>; +class LoongArchReg64 + : Register<""> { + let HWEncoding = subreg.HWEncoding; + let SubRegs = [subreg]; + let SubRegIndices = [sub_32]; + let AsmName = subreg.AsmName; + let AltNames = subreg.AltNames; +} + +let FallbackRegAltNameIndex = NoRegAltName in +def RegAliasName : RegAltNameIndex; +} // Namespace = "LoongArch" + +// Integer registers + +let RegAltNameIndices = [RegAliasName] in { + def R0 : LoongArchReg<0, "r0", ["zero"]>, DwarfRegNum<[0]>; + def R1 : LoongArchReg<1, "r1", ["ra"]>, DwarfRegNum<[1]>; + def R2 : LoongArchReg<2, "r2", ["tp"]>, DwarfRegNum<[2]>; + def R3 : LoongArchReg<3, "r3", ["sp"]>, DwarfRegNum<[3]>; + def R4 : LoongArchReg<4, "r4", ["a0"]>, DwarfRegNum<[4]>; + def R5 : LoongArchReg<5, "r5", ["a1"]>, DwarfRegNum<[5]>; + def R6 : LoongArchReg<6, "r6", ["a2"]>, DwarfRegNum<[6]>; + def R7 : LoongArchReg<7, "r7", ["a3"]>, DwarfRegNum<[7]>; + def R8 : LoongArchReg<8, "r8", ["a4"]>, DwarfRegNum<[8]>; + def R9 : LoongArchReg<9, "r9", ["a5"]>, DwarfRegNum<[9]>; + def R10 : LoongArchReg<10, "r10", ["a6"]>, DwarfRegNum<[10]>; + def R11 : LoongArchReg<11, "r11", ["a7"]>, DwarfRegNum<[11]>; + def R12 : LoongArchReg<12, "r12", ["t0"]>, DwarfRegNum<[12]>; + def R13 : LoongArchReg<13, "r13", ["t1"]>, DwarfRegNum<[13]>; + def R14 : LoongArchReg<14, "r14", ["t2"]>, DwarfRegNum<[14]>; + def R15 : LoongArchReg<15, "r15", ["t3"]>, DwarfRegNum<[15]>; + def R16 : LoongArchReg<16, "r16", ["t4"]>, DwarfRegNum<[16]>; + def R17 : LoongArchReg<17, "r17", ["t5"]>, DwarfRegNum<[17]>; + def R18 : LoongArchReg<18, "r18", ["t6"]>, DwarfRegNum<[18]>; + def R19 : LoongArchReg<19, "r19", ["t7"]>, DwarfRegNum<[19]>; + def R20 : LoongArchReg<20, "r20", ["t8"]>, DwarfRegNum<[20]>; + def R21 : LoongArchReg<21, "r21", [""]>, DwarfRegNum<[21]>; + def R22 : LoongArchReg<22, "r22", ["fp", "s9"]>, DwarfRegNum<[22]>; + def R23 : LoongArchReg<23, "r23", ["s0"]>, DwarfRegNum<[23]>; + def R24 : LoongArchReg<24, "r24", ["s1"]>, DwarfRegNum<[24]>; + def R25 : LoongArchReg<25, "r25", ["s2"]>, DwarfRegNum<[25]>; + def R26 : LoongArchReg<26, "r26", ["s3"]>, DwarfRegNum<[26]>; + def R27 : LoongArchReg<27, "r27", ["s4"]>, DwarfRegNum<[27]>; + def R28 : LoongArchReg<28, "r28", ["s5"]>, DwarfRegNum<[28]>; + def R29 : LoongArchReg<29, "r29", ["s6"]>, DwarfRegNum<[29]>; + def R30 : LoongArchReg<30, "r30", ["s7"]>, DwarfRegNum<[30]>; + def R31 : LoongArchReg<31, "r31", ["s8"]>, DwarfRegNum<[31]>; +} // RegAltNameIndices = [RegAliasName] + +def GRLenVT : ValueTypeByHwMode<[LA32, LA64], + [i32, i64]>; +def GRLenRI : RegInfoByHwMode< + [LA32, LA64], + [RegInfo<32,32,32>, RegInfo<64,64,64>]>; + +// The order of registers represents the preferred allocation sequence. +// Registers are listed in the order caller-save, callee-save, specials. +def GPR : RegisterClass<"LoongArch", [GRLenVT], 32, (add + // Argument registers (a0...a7) + (sequence "R%u", 4, 11), + // Temporary registers (t0...t8) + (sequence "R%u", 12, 20), + // Static register (s9/fp, s0...s8) + (sequence "R%u", 22, 31), + // Specials (r0, ra, tp, sp) + (sequence "R%u", 0, 3), + // Reserved (Non-allocatable) + R21 + )> { + let RegInfos = GRLenRI; +} + +// Floating point registers + +let RegAltNameIndices = [RegAliasName] in { + def F0 : LoongArchReg32<0, "f0", ["fa0"]>, DwarfRegNum<[32]>; + def F1 : LoongArchReg32<1, "f1", ["fa1"]>, DwarfRegNum<[33]>; + def F2 : LoongArchReg32<2, "f2", ["fa2"]>, DwarfRegNum<[34]>; + def F3 : LoongArchReg32<3, "f3", ["fa3"]>, DwarfRegNum<[35]>; + def F4 : LoongArchReg32<4, "f4", ["fa4"]>, DwarfRegNum<[36]>; + def F5 : LoongArchReg32<5, "f5", ["fa5"]>, DwarfRegNum<[37]>; + def F6 : LoongArchReg32<6, "f6", ["fa6"]>, DwarfRegNum<[38]>; + def F7 : LoongArchReg32<7, "f7", ["fa7"]>, DwarfRegNum<[39]>; + def F8 : LoongArchReg32<8, "f8", ["ft0"]>, DwarfRegNum<[40]>; + def F9 : LoongArchReg32<9, "f9", ["ft1"]>, DwarfRegNum<[41]>; + def F10 : LoongArchReg32<10,"f10", ["ft2"]>, DwarfRegNum<[42]>; + def F11 : LoongArchReg32<11,"f11", ["ft3"]>, DwarfRegNum<[43]>; + def F12 : LoongArchReg32<12,"f12", ["ft4"]>, DwarfRegNum<[44]>; + def F13 : LoongArchReg32<13,"f13", ["ft5"]>, DwarfRegNum<[45]>; + def F14 : LoongArchReg32<14,"f14", ["ft6"]>, DwarfRegNum<[46]>; + def F15 : LoongArchReg32<15,"f15", ["ft7"]>, DwarfRegNum<[47]>; + def F16 : LoongArchReg32<16,"f16", ["ft8"]>, DwarfRegNum<[48]>; + def F17 : LoongArchReg32<17,"f17", ["ft9"]>, DwarfRegNum<[49]>; + def F18 : LoongArchReg32<18,"f18", ["ft10"]>, DwarfRegNum<[50]>; + def F19 : LoongArchReg32<19,"f19", ["ft11"]>, DwarfRegNum<[51]>; + def F20 : LoongArchReg32<20,"f20", ["ft12"]>, DwarfRegNum<[52]>; + def F21 : LoongArchReg32<21,"f21", ["ft13"]>, DwarfRegNum<[53]>; + def F22 : LoongArchReg32<22,"f22", ["ft14"]>, DwarfRegNum<[54]>; + def F23 : LoongArchReg32<23,"f23", ["ft15"]>, DwarfRegNum<[55]>; + def F24 : LoongArchReg32<24,"f24", ["fs0"]>, DwarfRegNum<[56]>; + def F25 : LoongArchReg32<25,"f25", ["fs1"]>, DwarfRegNum<[57]>; + def F26 : LoongArchReg32<26,"f26", ["fs2"]>, DwarfRegNum<[58]>; + def F27 : LoongArchReg32<27,"f27", ["fs3"]>, DwarfRegNum<[59]>; + def F28 : LoongArchReg32<28,"f28", ["fs4"]>, DwarfRegNum<[60]>; + def F29 : LoongArchReg32<29,"f29", ["fs5"]>, DwarfRegNum<[61]>; + def F30 : LoongArchReg32<30,"f30", ["fs6"]>, DwarfRegNum<[62]>; + def F31 : LoongArchReg32<31,"f31", ["fs7"]>, DwarfRegNum<[63]>; + + foreach I = 0-31 in { + def F#I#_64 : LoongArchReg64("F"#I)>, + DwarfRegNum<[!add(I, 32)]>; + } +} + +// The order of registers represents the preferred allocation sequence. +def FPR32 : RegisterClass<"LoongArch", [f32], 32, (sequence "F%u", 0, 31)>; +def FPR64 : RegisterClass<"LoongArch", [f64], 64, (sequence "F%u_64", 0, 31)>; + +// Condition flag registers + +foreach I = 0-7 in +def FCC#I : LoongArchReg; + +def CFR : RegisterClass<"LoongArch", [GRLenVT], 32, (sequence "FCC%u", 0, 7)> { + let RegInfos = GRLenRI; +} + +// Control and status registers + +foreach I = 0-3 in +def FCSR#I : LoongArchReg; + +let isAllocatable = false in +def FCSR : RegisterClass<"LoongArch", [i32], 32, (sequence "FCSR%u", 0, 3)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp new file mode 100644 index 000000000000..ff84e7c8cc1f --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp @@ -0,0 +1,54 @@ +//===-- LoongArchSubtarget.cpp - LoongArch Subtarget Information -*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the LoongArch specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchSubtarget.h" +#include "LoongArchFrameLowering.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-subtarget" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "LoongArchGenSubtargetInfo.inc" + +void LoongArchSubtarget::anchor() {} + +LoongArchSubtarget &LoongArchSubtarget::initializeSubtargetDependencies( + const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, + StringRef ABIName) { + bool Is64Bit = TT.isArch64Bit(); + if (CPU.empty()) + CPU = Is64Bit ? "generic-la64" : "generic-la32"; + + if (TuneCPU.empty()) + TuneCPU = CPU; + + ParseSubtargetFeatures(CPU, TuneCPU, FS); + if (Is64Bit) { + GRLenVT = MVT::i64; + GRLen = 64; + } + + // TODO: ILP32{S,F} LP64{S,F} + TargetABI = Is64Bit ? LoongArchABI::ABI_LP64D : LoongArchABI::ABI_ILP32D; + return *this; +} + +LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU, + StringRef TuneCPU, StringRef FS, + StringRef ABIName, + const TargetMachine &TM) + : LoongArchGenSubtargetInfo(TT, CPU, TuneCPU, FS), + FrameLowering( + initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)), + InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {} diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h new file mode 100644 index 000000000000..95c2c676cc3c --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h @@ -0,0 +1,89 @@ +//===- LoongArchSubtarget.h - Define Subtarget for the LoongArch -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the LoongArch specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H + +#include "LoongArchFrameLowering.h" +#include "LoongArchISelLowering.h" +#include "LoongArchInstrInfo.h" +#include "LoongArchRegisterInfo.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" + +#define GET_SUBTARGETINFO_HEADER +#include "LoongArchGenSubtargetInfo.inc" + +namespace llvm { +class StringRef; + +class LoongArchSubtarget : public LoongArchGenSubtargetInfo { + virtual void anchor(); + bool HasLA64 = false; + bool HasBasicF = false; + bool HasBasicD = false; + bool HasExtLSX = false; + bool HasExtLASX = false; + bool HasExtLVZ = false; + bool HasExtLBT = false; + unsigned GRLen = 32; + MVT GRLenVT = MVT::i32; + LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; + LoongArchFrameLowering FrameLowering; + LoongArchInstrInfo InstrInfo; + LoongArchRegisterInfo RegInfo; + LoongArchTargetLowering TLInfo; + + /// Initializes using the passed in CPU and feature strings so that we can + /// use initializer lists for subtarget initialization. + LoongArchSubtarget &initializeSubtargetDependencies(const Triple &TT, + StringRef CPU, + StringRef TuneCPU, + StringRef FS, + StringRef ABIName); + +public: + // Initializes the data members to match that of the specified triple. + LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, + StringRef FS, StringRef ABIName, const TargetMachine &TM); + + // Parses features string setting specified subtarget options. The + // definition of this function is auto-generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); + + const LoongArchFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const LoongArchInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const LoongArchRegisterInfo *getRegisterInfo() const override { + return &RegInfo; + } + const LoongArchTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + bool is64Bit() const { return HasLA64; } + bool hasBasicF() const { return HasBasicF; } + bool hasBasicD() const { return HasBasicD; } + bool hasExtLSX() const { return HasExtLSX; } + bool hasExtLASX() const { return HasExtLASX; } + bool hasExtLVZ() const { return HasExtLVZ; } + bool hasExtLBT() const { return HasExtLBT; } + MVT getGRLenVT() const { return GRLenVT; } + unsigned getGRLen() const { return GRLen; } + LoongArchABI::ABI getTargetABI() const { return TargetABI; } +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp new file mode 100644 index 000000000000..3a1a46a9e624 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -0,0 +1,118 @@ +//===-- LoongArchTargetMachine.cpp - Define TargetMachine for LoongArch ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements the info about LoongArch target spec. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchTargetMachine.h" +#include "LoongArch.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "TargetInfo/LoongArchTargetInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/MC/TargetRegistry.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarch" + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTarget() { + // Register the target. + RegisterTargetMachine X(getTheLoongArch32Target()); + RegisterTargetMachine Y(getTheLoongArch64Target()); +} + +static std::string computeDataLayout(const Triple &TT) { + if (TT.isArch64Bit()) + return "e-m:e-p:64:64-i64:64-i128:128-n64-S128"; + assert(TT.isArch32Bit() && "only LA32 and LA64 are currently supported"); + return "e-m:e-p:32:32-i64:64-n32-S128"; +} + +static Reloc::Model getEffectiveRelocModel(const Triple &TT, + Optional RM) { + if (!RM.hasValue()) + return Reloc::Static; + return *RM; +} + +LoongArchTargetMachine::LoongArchTargetMachine( + const Target &T, const Triple &TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Optional RM, + Optional CM, CodeGenOpt::Level OL, bool JIT) + : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, + getEffectiveRelocModel(TT, RM), + getEffectiveCodeModel(CM, CodeModel::Small), OL), + TLOF(std::make_unique()) { + initAsmInfo(); +} + +LoongArchTargetMachine::~LoongArchTargetMachine() = default; + +const LoongArchSubtarget * +LoongArchTargetMachine::getSubtargetImpl(const Function &F) const { + Attribute CPUAttr = F.getFnAttribute("target-cpu"); + Attribute TuneAttr = F.getFnAttribute("tune-cpu"); + Attribute FSAttr = F.getFnAttribute("target-features"); + + std::string CPU = + CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; + std::string TuneCPU = + TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; + std::string FS = + FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; + + std::string Key = CPU + TuneCPU + FS; + auto &I = SubtargetMap[Key]; + if (!I) { + // This needs to be done before we create a new subtarget since any + // creation will depend on the TM and the code generation flags on the + // function that reside in TargetOptions. + resetTargetOptions(F); + auto ABIName = Options.MCOptions.getABIName(); + if (const MDString *ModuleTargetABI = dyn_cast_or_null( + F.getParent()->getModuleFlag("target-abi"))) { + auto TargetABI = LoongArchABI::getTargetABI(ABIName); + if (TargetABI != LoongArchABI::ABI_Unknown && + ModuleTargetABI->getString() != ABIName) { + report_fatal_error("-target-abi option != target-abi module flag"); + } + ABIName = ModuleTargetABI->getString(); + } + I = std::make_unique(TargetTriple, CPU, TuneCPU, FS, + ABIName, *this); + } + return I.get(); +} + +namespace { +class LoongArchPassConfig : public TargetPassConfig { +public: + LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + LoongArchTargetMachine &getLoongArchTargetMachine() const { + return getTM(); + } + + bool addInstSelector() override; +}; +} // namespace + +TargetPassConfig * +LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) { + return new LoongArchPassConfig(*this, PM); +} + +bool LoongArchPassConfig::addInstSelector() { + addPass(createLoongArchISelDag(getLoongArchTargetMachine())); + + return false; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h new file mode 100644 index 000000000000..cbd872031a32 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h @@ -0,0 +1,46 @@ +//=- LoongArchTargetMachine.h - Define TargetMachine for LoongArch -*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the LoongArch specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H + +#include "LoongArchSubtarget.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class LoongArchTargetMachine : public LLVMTargetMachine { + std::unique_ptr TLOF; + mutable StringMap> SubtargetMap; + +public: + LoongArchTargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Optional RM, + Optional CM, CodeGenOpt::Level OL, + bool JIT); + ~LoongArchTargetMachine() override; + + const LoongArchSubtarget *getSubtargetImpl(const Function &F) const override; + const LoongArchSubtarget *getSubtargetImpl() const = delete; + + // Pass Pipeline Configuration + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp new file mode 100644 index 000000000000..94a068897f8c --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp @@ -0,0 +1,68 @@ +//===-- LoongArchAsmBackend.cpp - LoongArch Assembler Backend -*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the LoongArchAsmBackend class. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchAsmBackend.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/EndianStream.h" + +#define DEBUG_TYPE "loongarch-asmbackend" + +using namespace llvm; + +void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved, + const MCSubtargetInfo *STI) const { + // TODO: Apply the Value for given Fixup into the provided data fragment. + return; +} + +bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { + // TODO: Determine which relocation require special processing at linking + // time. + return false; +} + +bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const { + // Check for byte count not multiple of instruction word size + if (Count % 4 != 0) + return false; + + // The nop on LoongArch is andi r0, r0, 0. + for (; Count >= 4; Count -= 4) + support::endian::write(OS, 0x03400000, support::little); + + return true; +} + +std::unique_ptr +LoongArchAsmBackend::createObjectTargetWriter() const { + return createLoongArchELFObjectWriter(OSABI, Is64Bit); +} + +MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options) { + const Triple &TT = STI.getTargetTriple(); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); + return new LoongArchAsmBackend(STI, OSABI, TT.isArch64Bit()); +} diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h new file mode 100644 index 000000000000..77bbfb095747 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h @@ -0,0 +1,63 @@ +//===-- LoongArchAsmBackend.h - LoongArch Assembler Backend ---*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArchAsmBackend class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H + +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { + +class LoongArchAsmBackend : public MCAsmBackend { + uint8_t OSABI; + bool Is64Bit; + +public: + LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit) + : MCAsmBackend(support::little), OSABI(OSABI), Is64Bit(Is64Bit) {} + ~LoongArchAsmBackend() override {} + + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved, + const MCSubtargetInfo *STI) const override; + + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; + + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + return false; + } + + unsigned getNumFixupKinds() const override { + // FIXME: Implement this when we define fixup kind + return 0; + } + + void relaxInstruction(MCInst &Inst, + const MCSubtargetInfo &STI) const override {} + + bool writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const override; + + std::unique_ptr + createObjectTargetWriter() const override; +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp new file mode 100644 index 000000000000..f0c985883125 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp @@ -0,0 +1,40 @@ +//= LoongArchBaseInfo.cpp - Top level definitions for LoongArch MC -*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements helper functions for the LoongArch target useful for the +// compiler back-end and the MC libraries. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchBaseInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { + +namespace LoongArchABI { + +ABI getTargetABI(StringRef ABIName) { + auto TargetABI = StringSwitch(ABIName) + .Case("ilp32s", ABI_ILP32S) + .Case("ilp32f", ABI_ILP32F) + .Case("ilp32d", ABI_ILP32D) + .Case("lp64s", ABI_LP64S) + .Case("lp64f", ABI_LP64F) + .Case("lp64d", ABI_LP64D) + .Default(ABI_Unknown); + return TargetABI; +} + +// FIXME: other register? +MCRegister getBPReg() { return LoongArch::R31; } + +} // namespace LoongArchABI + +} // namespace llvm diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h new file mode 100644 index 000000000000..e26f22de0cbc --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h @@ -0,0 +1,44 @@ +//=- LoongArchBaseInfo.h - Top level definitions for LoongArch MC -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone enum definitions and helper function +// definitions for the LoongArch target useful for the compiler back-end and the +// MC libraries. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H + +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/SubtargetFeature.h" + +namespace llvm { + +namespace LoongArchABI { +enum ABI { + ABI_ILP32S, + ABI_ILP32F, + ABI_ILP32D, + ABI_LP64S, + ABI_LP64F, + ABI_LP64D, + ABI_Unknown +}; + +ABI getTargetABI(StringRef ABIName); + +// Returns the register used to hold the stack pointer after realignment. +MCRegister getBPReg(); +} // namespace LoongArchABI + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp new file mode 100644 index 000000000000..95e1314f363a --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp @@ -0,0 +1,64 @@ +//===-- LoongArchELFObjectWriter.cpp - LoongArch ELF Writer ---*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +namespace { +class LoongArchELFObjectWriter : public MCELFObjectTargetWriter { +public: + LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit); + + ~LoongArchELFObjectWriter() override; + + // Return true if the given relocation must be with a symbol rather than + // section plus offset. + bool needsRelocateWithSymbol(const MCSymbol &Sym, + unsigned Type) const override { + return true; + } + +protected: + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsPCRel) const override; +}; +} // namespace + +LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit) + : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH, + /*HasRelocationAddend*/ true) {} + +LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {} + +unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + // Determine the type of the relocation + unsigned Kind = Fixup.getTargetKind(); + + if (Kind >= FirstLiteralRelocationKind) + return Kind - FirstLiteralRelocationKind; + + switch (Kind) { + // TODO: Implement this when we defined fixup kind. + default: + return ELF::R_LARCH_NONE; + } +} + +std::unique_ptr +llvm::createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit) { + return std::make_unique(OSABI, Is64Bit); +} diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp new file mode 100644 index 000000000000..66183868f468 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp @@ -0,0 +1,63 @@ +//===- LoongArchInstPrinter.cpp - Convert LoongArch MCInst to asm syntax --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class prints an LoongArch MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchInstPrinter.h" +#include "LoongArchBaseInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +using namespace llvm; + +#define DEBUG_TYPE "loongarch-asm-printer" + +// Include the auto-generated portion of the assembly writer. +#define PRINT_ALIAS_INSTR +#include "LoongArchGenAsmWriter.inc" + +void LoongArchInstPrinter::printInst(const MCInst *MI, uint64_t Address, + StringRef Annot, + const MCSubtargetInfo &STI, + raw_ostream &O) { + if (!printAliasInstr(MI, Address, STI, O)) + printInstruction(MI, Address, STI, O); + printAnnotation(O, Annot); +} + +void LoongArchInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { + O << '$' << getRegisterName(RegNo); +} + +void LoongArchInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNo); + + if (MO.isReg()) { + printRegName(O, MO.getReg()); + return; + } + + if (MO.isImm()) { + O << MO.getImm(); + return; + } + + assert(MO.isExpr() && "Unknown operand kind in printOperand"); + MO.getExpr()->print(O, &MAI); +} + +const char *LoongArchInstPrinter::getRegisterName(unsigned RegNo) { + // Default print reg alias name + return getRegisterName(RegNo, LoongArch::RegAliasName); +} diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h new file mode 100644 index 000000000000..727fc6a3e1f3 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h @@ -0,0 +1,49 @@ +//===-- LoongArchInstPrinter.h - Convert LoongArch MCInst to asm syntax ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class prints a LoongArch MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHINSTPRINTER_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHINSTPRINTER_H + +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + +class LoongArchInstPrinter : public MCInstPrinter { +public: + LoongArchInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, + const MCSubtargetInfo &STI, raw_ostream &O) override; + void printRegName(raw_ostream &O, unsigned RegNo) const override; + + // Autogenerated by tblgen. + std::pair getMnemonic(const MCInst *MI) override; + void printInstruction(const MCInst *MI, uint64_t Address, + const MCSubtargetInfo &STI, raw_ostream &O); + bool printAliasInstr(const MCInst *MI, uint64_t Address, + const MCSubtargetInfo &STI, raw_ostream &O); + void printCustomAliasOperand(const MCInst *MI, uint64_t Address, + unsigned OpIdx, unsigned PrintMethodIdx, + const MCSubtargetInfo &STI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + static const char *getRegisterName(unsigned RegNo, unsigned AltIdx); + +private: + void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHINSTPRINTER_H diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp new file mode 100644 index 000000000000..bc946db2f449 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp @@ -0,0 +1,34 @@ +//===-- LoongArchMCAsmInfo.cpp - LoongArch Asm properties ------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the LoongArchMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchMCAsmInfo.h" +#include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +void LoongArchMCAsmInfo::anchor() {} + +LoongArchMCAsmInfo::LoongArchMCAsmInfo(const Triple &TT) { + CodePointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4; + AlignmentIsInBytes = false; + Data8bitsDirective = "\t.byte\t"; + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = "\t.dword\t"; + ZeroDirective = "\t.space\t"; + CommentString = "#"; + SupportsDebugInformation = true; + DwarfRegNumForCFI = true; + ExceptionsType = ExceptionHandling::DwarfCFI; +} diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h new file mode 100644 index 000000000000..1cf8a2fdf8aa --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h @@ -0,0 +1,30 @@ +//===-- LoongArchMCAsmInfo.h - LoongArch Asm Info --------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the LoongArchMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H + +#include "llvm/MC/MCAsmInfoELF.h" + +namespace llvm { +class Triple; + +class LoongArchMCAsmInfo : public MCAsmInfoELF { + void anchor() override; + +public: + explicit LoongArchMCAsmInfo(const Triple &TargetTriple); +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp new file mode 100644 index 000000000000..9c6a4f39b9ea --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -0,0 +1,127 @@ +//=- LoongArchMCCodeEmitter.cpp - Convert LoongArch code to machine code --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the LoongArchMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/EndianStream.h" + +using namespace llvm; + +#define DEBUG_TYPE "mccodeemitter" + +namespace { +class LoongArchMCCodeEmitter : public MCCodeEmitter { + LoongArchMCCodeEmitter(const LoongArchMCCodeEmitter &) = delete; + void operator=(const LoongArchMCCodeEmitter &) = delete; + MCContext &Ctx; + MCInstrInfo const &MCII; + +public: + LoongArchMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII) + : Ctx(ctx), MCII(MCII) {} + + ~LoongArchMCCodeEmitter() override {} + + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + + /// TableGen'erated function for getting the binary encoding for an + /// instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + /// Return binary encoding of operand. If the machine operand requires + /// relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + /// Return binary encoding of an immediate operand specified by OpNo. + /// The value returned is the value of the immediate minus 1. + /// Note that this function is dedicated to specific immediate types, + /// e.g. uimm2_plus1. + unsigned getImmOpValueSub1(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + /// Return binary encoding of an immediate operand specified by OpNo. + /// The value returned is the value of the immediate shifted right + // arithmetically by 2. + /// Note that this function is dedicated to specific immediate types, + /// e.g. simm14_lsl2, simm16_lsl2, simm21_lsl2 and simm26_lsl2. + unsigned getImmOpValueAsr2(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; +}; +} // end anonymous namespace + +unsigned +LoongArchMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + + if (MO.isReg()) + return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); + + if (MO.isImm()) + return static_cast(MO.getImm()); + + llvm_unreachable("Unhandled expression!"); +} + +unsigned +LoongArchMCCodeEmitter::getImmOpValueSub1(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + return MI.getOperand(OpNo).getImm() - 1; +} + +unsigned +LoongArchMCCodeEmitter::getImmOpValueAsr2(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + unsigned Res = MI.getOperand(OpNo).getImm(); + assert((Res & 3) == 0 && "lowest 2 bits are non-zero"); + return Res >> 2; +} + +void LoongArchMCCodeEmitter::encodeInstruction( + const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + // Get byte count of instruction. + unsigned Size = Desc.getSize(); + + switch (Size) { + default: + llvm_unreachable("Unhandled encodeInstruction length!"); + case 4: { + uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); + support::endian::write(OS, Bits, support::little); + break; + } + } +} + +MCCodeEmitter *llvm::createLoongArchMCCodeEmitter(const MCInstrInfo &MCII, + MCContext &Ctx) { + return new LoongArchMCCodeEmitter(Ctx, MCII); +} + +#include "LoongArchGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp new file mode 100644 index 000000000000..c733c194e6a2 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp @@ -0,0 +1,114 @@ +//===-- LoongArchMCTargetDesc.cpp - LoongArch Target Descriptions ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides LoongArch specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchMCTargetDesc.h" +#include "LoongArchBaseInfo.h" +#include "LoongArchInstPrinter.h" +#include "LoongArchMCAsmInfo.h" +#include "TargetInfo/LoongArchTargetInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Compiler.h" + +#define GET_INSTRINFO_MC_DESC +#include "LoongArchGenInstrInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "LoongArchGenRegisterInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "LoongArchGenSubtargetInfo.inc" + +using namespace llvm; + +static MCRegisterInfo *createLoongArchMCRegisterInfo(const Triple &TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitLoongArchMCRegisterInfo(X, LoongArch::R1); + return X; +} + +static MCInstrInfo *createLoongArchMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitLoongArchMCInstrInfo(X); + return X; +} + +static MCSubtargetInfo * +createLoongArchMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { + if (CPU.empty()) + CPU = TT.isArch64Bit() ? "la464" : "generic-la32"; + return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); +} + +static MCAsmInfo *createLoongArchMCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TT, + const MCTargetOptions &Options) { + MCAsmInfo *MAI = new LoongArchMCAsmInfo(TT); + + // Initial state of the frame pointer is sp(r3). + MCRegister SP = MRI.getDwarfRegNum(LoongArch::R3, true); + MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, SP, 0); + MAI->addInitialFrameState(Inst); + + return MAI; +} + +static MCInstPrinter *createLoongArchMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + return new LoongArchInstPrinter(MAI, MII, MRI); +} + +namespace { + +class LoongArchMCInstrAnalysis : public MCInstrAnalysis { +public: + explicit LoongArchMCInstrAnalysis(const MCInstrInfo *Info) + : MCInstrAnalysis(Info) {} + + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) const override { + unsigned NumOps = Inst.getNumOperands(); + if (isBranch(Inst) || Inst.getOpcode() == LoongArch::BL) { + Target = Addr + Inst.getOperand(NumOps - 1).getImm(); + return true; + } + + return false; + } +}; + +} // end anonymous namespace + +static MCInstrAnalysis *createLoongArchInstrAnalysis(const MCInstrInfo *Info) { + return new LoongArchMCInstrAnalysis(Info); +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetMC() { + for (Target *T : {&getTheLoongArch32Target(), &getTheLoongArch64Target()}) { + TargetRegistry::RegisterMCRegInfo(*T, createLoongArchMCRegisterInfo); + TargetRegistry::RegisterMCInstrInfo(*T, createLoongArchMCInstrInfo); + TargetRegistry::RegisterMCSubtargetInfo(*T, createLoongArchMCSubtargetInfo); + TargetRegistry::RegisterMCAsmInfo(*T, createLoongArchMCAsmInfo); + TargetRegistry::RegisterMCCodeEmitter(*T, createLoongArchMCCodeEmitter); + TargetRegistry::RegisterMCAsmBackend(*T, createLoongArchAsmBackend); + TargetRegistry::RegisterMCInstPrinter(*T, createLoongArchMCInstPrinter); + TargetRegistry::RegisterMCInstrAnalysis(*T, createLoongArchInstrAnalysis); + } +} diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h new file mode 100644 index 000000000000..e576b9a49cd6 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h @@ -0,0 +1,54 @@ +//===- LoongArchMCTargetDesc.h - LoongArch Target Descriptions --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides LoongArch specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H + +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Support/DataTypes.h" +#include + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectTargetWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class Target; + +MCCodeEmitter *createLoongArchMCCodeEmitter(const MCInstrInfo &MCII, + MCContext &Ctx); + +MCAsmBackend *createLoongArchAsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options); + +std::unique_ptr +createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit); + +} // namespace llvm + +// Defines symbolic names for LoongArch registers. +#define GET_REGINFO_ENUM +#include "LoongArchGenRegisterInfo.inc" + +// Defines symbolic names for LoongArch instructions. +#define GET_INSTRINFO_ENUM +#include "LoongArchGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "LoongArchGenSubtargetInfo.inc" + +#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp new file mode 100644 index 000000000000..1509c436c810 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp @@ -0,0 +1,51 @@ +//===- LoongArchMatInt.cpp - Immediate materialisation ---------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchMatInt.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/Support/MathExtras.h" + +using namespace llvm; + +LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) { + // Val: + // | hi32 | lo32 | + // +-----------+------------------+------------------+-----------+ + // | Highest12 | Higher20 | Hi20 | Lo12 | + // +-----------+------------------+------------------+-----------+ + // 63 52 51 32 31 12 11 0 + // + const int64_t Highest12 = Val >> 52 & 0xFFF; + const int64_t Higher20 = Val >> 32 & 0xFFFFF; + const int64_t Hi20 = Val >> 12 & 0xFFFFF; + const int64_t Lo12 = Val & 0xFFF; + InstSeq Insts; + + if (Highest12 != 0 && SignExtend64<52>(Val) == 0) { + Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12))); + return Insts; + } + + if (Hi20 == 0) + Insts.push_back(Inst(LoongArch::ORI, Lo12)); + else if (SignExtend32<1>(Lo12 >> 11) == SignExtend32<20>(Hi20)) + Insts.push_back(Inst(LoongArch::ADDI_W, SignExtend64<12>(Lo12))); + else { + Insts.push_back(Inst(LoongArch::LU12I_W, SignExtend64<20>(Hi20))); + if (Lo12 != 0) + Insts.push_back(Inst(LoongArch::ORI, Lo12)); + } + + if (SignExtend32<1>(Hi20 >> 19) != SignExtend32<20>(Higher20)) + Insts.push_back(Inst(LoongArch::LU32I_D, SignExtend64<20>(Higher20))); + + if (SignExtend32<1>(Higher20 >> 19) != SignExtend32<12>(Highest12)) + Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12))); + + return Insts; +} diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h new file mode 100644 index 000000000000..945aa91e40c0 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h @@ -0,0 +1,30 @@ +//===- LoongArchMatInt.h - Immediate materialisation - --------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_MATINT_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_MATINT_H + +#include "llvm/ADT/SmallVector.h" +#include + +namespace llvm { +namespace LoongArchMatInt { +struct Inst { + unsigned Opc; + int64_t Imm; + Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {} +}; +using InstSeq = SmallVector; + +// Helper to generate an instruction sequence that will materialise the given +// immediate value into a register. +InstSeq generateInstSeq(int64_t Val); +} // namespace LoongArchMatInt +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp new file mode 100644 index 000000000000..10654510032f --- /dev/null +++ b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp @@ -0,0 +1,30 @@ +//===-- LoongArchTargetInfo.cpp - LoongArch Target Implementation ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TargetInfo/LoongArchTargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +using namespace llvm; + +Target &llvm::getTheLoongArch32Target() { + static Target TheLoongArch32Target; + return TheLoongArch32Target; +} + +Target &llvm::getTheLoongArch64Target() { + static Target TheLoongArch64Target; + return TheLoongArch64Target; +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetInfo() { + RegisterTarget X( + getTheLoongArch32Target(), "loongarch32", "32-bit LoongArch", + "LoongArch"); + RegisterTarget Y( + getTheLoongArch64Target(), "loongarch64", "64-bit LoongArch", + "LoongArch"); +} diff --git a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h new file mode 100644 index 000000000000..6fc13d52c065 --- /dev/null +++ b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h @@ -0,0 +1,21 @@ +//===-- LoongArchTargetInfo.h - LoongArch Target Implementation -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H + +namespace llvm { + +class Target; + +Target &getTheLoongArch32Target(); +Target &getTheLoongArch64Target(); + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp index dcd581875f60..0a3d09552535 100644 --- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp +++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp @@ -11,6 +11,7 @@ #include "TargetInfo/M68kTargetInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" diff --git a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp index a565ff4e004d..31b59c17c0ca 100644 --- a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp +++ b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp @@ -20,8 +20,11 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -29,581 +32,112 @@ using namespace llvm; typedef MCDisassembler::DecodeStatus DecodeStatus; -namespace { -constexpr unsigned MaxInstructionWords = 11; - -class M68kInstructionBuffer { - typedef SmallVector BufferType; - BufferType Buffer; - -public: - M68kInstructionBuffer() {} - - template - M68kInstructionBuffer(TIt Start, TIt End) : Buffer(Start, End) {} - - unsigned size() const { return Buffer.size(); } - - BufferType::const_iterator begin() const { return Buffer.begin(); } - BufferType::const_iterator end() const { return Buffer.end(); } - - uint16_t operator[](unsigned Index) const { - assert((Index < Buffer.size()) && "tried to read out of bounds word"); - return Buffer[Index]; - } - - void truncate(unsigned NewLength) { - assert((NewLength <= Buffer.size()) && - "instruction buffer too short to truncate"); - Buffer.resize(NewLength); - } - - void dump() const; - - static M68kInstructionBuffer fill(ArrayRef Bytes); -}; - -class M68kInstructionReader { - M68kInstructionBuffer Buffer; - unsigned NumRead; - -public: - M68kInstructionReader(M68kInstructionBuffer Buf) : Buffer(Buf), NumRead(0) {} - - unsigned size() const { return (Buffer.size() * 16) - NumRead; } - - uint64_t readBits(unsigned NumBits); -}; - -struct M68kInstructionLookup { - unsigned OpCode; - M68kInstructionBuffer Mask; - M68kInstructionBuffer Value; - - unsigned size() const { return Mask.size(); } - - // Check whether this instruction could possibly match the given bytes. - bool matches(const M68kInstructionBuffer &Test) const; - void dump() const; -}; - -class M68kInstructionLookupBuilder { - std::array Mask; - std::array Value; - unsigned NumWritten; - -public: - M68kInstructionLookupBuilder() : NumWritten(0) { - Mask.fill(0); - Value.fill(0); - } - - unsigned numWords() const { - assert(!(NumWritten & 0xf) && "instructions must be whole words"); - return NumWritten >> 4; - } - - bool isValid() const; - M68kInstructionLookup build(unsigned OpCode); - void addBits(unsigned N, uint64_t Bits); - void skipBits(unsigned N); -}; - -/// A disassembler class for M68k. -class M68kDisassembler : public MCDisassembler { - MCInstrInfo *MCII; - std::vector Lookups; - -public: - M68kDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, - MCInstrInfo *MCII) - : MCDisassembler(STI, Ctx), MCII(MCII) { - buildBeadTable(); - } - virtual ~M68kDisassembler() {} - - void buildBeadTable(); - DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, - ArrayRef Bytes, uint64_t Address, - raw_ostream &CStream) const override; - void decodeReg(MCInst &Instr, unsigned int Bead, - M68kInstructionReader &Reader, unsigned &Scratch) const; - void decodeImm(MCInst &Instr, unsigned int Bead, - M68kInstructionReader &Reader, unsigned &Scratch) const; - unsigned int getRegOperandIndex(MCInst &Instr, unsigned int Bead) const; - unsigned int getImmOperandIndex(MCInst &Instr, unsigned int Bead) const; -}; -} // namespace - -static unsigned RegisterDecode[] = { - M68k::A0, M68k::A1, M68k::A2, M68k::A3, M68k::A4, M68k::A5, - M68k::A6, M68k::SP, M68k::D0, M68k::D1, M68k::D2, M68k::D3, - M68k::D4, M68k::D5, M68k::D6, M68k::D7, +static const unsigned RegisterDecode[] = { + M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5, + M68k::D6, M68k::D7, M68k::A0, M68k::A1, M68k::A2, M68k::A3, + M68k::A4, M68k::A5, M68k::A6, M68k::SP, }; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD -void M68kInstructionBuffer::dump() const { - for (auto Word : Buffer) { - for (unsigned B = 0; B < 16; ++B) { - uint16_t Bit = (1 << (16 - B - 1)); - unsigned IsClear = !(Word & Bit); - - if (B == 8) - dbgs() << " "; - - char Ch = IsClear ? '0' : '1'; - dbgs() << Ch; - } - - dbgs() << " "; - } - - dbgs() << "\n"; +static DecodeStatus DecodeRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo >= 16) + return DecodeStatus::Fail; + Inst.addOperand(MCOperand::createReg(RegisterDecode[RegNo])); + return DecodeStatus::Success; } -#endif - -M68kInstructionBuffer M68kInstructionBuffer::fill(ArrayRef Bytes) { - SmallVector Buffer; - Buffer.resize(std::min(Bytes.size() / 2, Buffer.max_size())); - - for (unsigned I = 0, E = Buffer.size(); I < E; ++I) { - unsigned Offset = I * 2; - uint64_t Hi = Bytes[Offset]; - uint64_t Lo = Bytes[Offset + 1]; - uint64_t Word = (Hi << 8) | Lo; - Buffer[I] = Word; - - LLVM_DEBUG( - errs() << format("Read word %x (%d)\n", (unsigned)Word, Buffer.size())); - } - - return M68kInstructionBuffer(Buffer.begin(), Buffer.end()); -} - -uint64_t M68kInstructionReader::readBits(unsigned NumBits) { - assert((size() >= NumBits) && "not enough bits to read"); - - // We have to read the bits in 16-bit chunks because we read them as - // 16-bit words but they're actually written in big-endian. If a read - // crosses a word boundary we have to be careful. - - uint64_t Value = 0; - unsigned BitsRead = 0; - - while (BitsRead < NumBits) { - unsigned AvailableThisWord = 16 - (NumRead & 0xf); - unsigned ToRead = std::min(NumBits, AvailableThisWord); - - unsigned WordIndex = NumRead >> 4; - uint64_t ThisWord = Buffer[WordIndex] >> (NumRead & 0xf); - uint64_t Mask = (1 << ToRead) - 1; - Value |= (ThisWord & Mask) << BitsRead; - NumRead += ToRead; - BitsRead += ToRead; - } - return Value; -} - -bool M68kInstructionLookup::matches(const M68kInstructionBuffer &Test) const { - if (Test.size() < Value.size()) - return false; - - for (unsigned I = 0, E = Value.size(); I < E; ++I) { - uint16_t Have = Test[I]; - uint16_t Need = Value[I]; - uint16_t WordMask = Mask[I]; - - if ((Have & WordMask) != Need) - return false; - } - - return true; -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD -void M68kInstructionLookup::dump() const { - dbgs() << "M68kInstructionLookup " << OpCode << " "; - - for (unsigned I = 0, E = Mask.size(); I < E; ++I) { - uint16_t WordMask = Mask[I]; - uint16_t WordValue = Value[I]; - - for (unsigned B = 0; B < 16; ++B) { - uint16_t Bit = (1 << (15 - B)); - unsigned IsMasked = !(WordMask & Bit); - unsigned IsClear = !(WordValue & Bit); - - if (B == 8) - dbgs() << " "; - - char Ch = IsMasked ? '?' : (IsClear ? '0' : '1'); - dbgs() << Ch; - } - - dbgs() << " "; - } - dbgs() << "\n"; +static DecodeStatus DecodeDR32RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeRegisterClass(Inst, RegNo, Address, Decoder); } -#endif -bool M68kInstructionLookupBuilder::isValid() const { - for (unsigned I = 0, E = numWords(); I < E; ++I) - if (Mask[I]) - return true; - - return false; +static DecodeStatus DecodeDR16RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeRegisterClass(Inst, RegNo, Address, Decoder); } -M68kInstructionLookup M68kInstructionLookupBuilder::build(unsigned OpCode) { - unsigned NumWords = numWords(); - M68kInstructionBuffer MaskBuffer(Mask.begin(), Mask.begin() + NumWords); - M68kInstructionBuffer ValueBuffer(Value.begin(), Value.begin() + NumWords); - M68kInstructionLookup Ret; - Ret.OpCode = OpCode; - Ret.Mask = MaskBuffer; - Ret.Value = ValueBuffer; - return Ret; +static DecodeStatus DecodeDR8RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeRegisterClass(Inst, RegNo, Address, Decoder); } -void M68kInstructionLookupBuilder::addBits(unsigned N, uint64_t Bits) { - while (N > 0) { - unsigned WordIndex = NumWritten >> 4; - unsigned WordOffset = NumWritten & 0xf; - unsigned AvailableThisWord = 16 - WordOffset; - unsigned ToWrite = std::min(AvailableThisWord, N); - - uint16_t WordMask = (1 << ToWrite) - 1; - uint16_t BitsToWrite = Bits & WordMask; - - Value[WordIndex] |= (BitsToWrite << WordOffset); - Mask[WordIndex] |= (WordMask << WordOffset); - - Bits >>= ToWrite; - N -= ToWrite; - NumWritten += ToWrite; - } +static DecodeStatus DecodeAR32RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeRegisterClass(Inst, RegNo | 8ULL, Address, Decoder); } -void M68kInstructionLookupBuilder::skipBits(unsigned N) { NumWritten += N; } - -// This is a bit of a hack: we can't generate this table at table-gen time -// because some of the definitions are in our platform. -void M68kDisassembler::buildBeadTable() { - const unsigned NumInstr = M68k::INSTRUCTION_LIST_END; - Lookups.reserve(NumInstr); - - for (unsigned I = 0; I < NumInstr; ++I) { - M68kInstructionLookupBuilder Builder; - - for (const uint8_t *PartPtr = M68k::getMCInstrBeads(I); *PartPtr; - ++PartPtr) { - uint8_t Bead = *PartPtr; - unsigned Ext = Bead >> 4; - unsigned Op = Bead & 0xf; - - switch (Op) { - case M68kBeads::Ctrl: - // Term will have already been skipped by the loop. - assert((Ext == M68kBeads::Ignore) && "unexpected command bead"); - break; - - case M68kBeads::Bits1: - Builder.addBits(1, Ext); - break; - - case M68kBeads::Bits2: - Builder.addBits(2, Ext); - break; - - case M68kBeads::Bits3: - Builder.addBits(3, Ext); - break; - - case M68kBeads::Bits4: - Builder.addBits(4, Ext); - break; - - case M68kBeads::DAReg: - case M68kBeads::DA: - case M68kBeads::DReg: - case M68kBeads::Reg: - if (Op != M68kBeads::DA) - Builder.skipBits(3); - - if (Op != M68kBeads::Reg && Op != M68kBeads::DReg) - Builder.skipBits(1); - - break; - - case M68kBeads::Disp8: - Builder.skipBits(8); - break; - - case M68kBeads::Imm8: - case M68kBeads::Imm16: - Builder.skipBits(16); - break; - - case M68kBeads::Imm32: - Builder.skipBits(32); - break; - - case M68kBeads::Imm3: - Builder.skipBits(3); - break; - - default: - llvm_unreachable("unhandled bead type"); - } - } - - // Ignore instructions which are unmatchable (usually pseudo instructions). - if (!Builder.isValid()) - continue; - - Lookups.push_back(Builder.build(I)); - } +static DecodeStatus DecodeAR16RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeRegisterClass(Inst, RegNo | 8ULL, Address, Decoder); } -unsigned M68kDisassembler::getRegOperandIndex(MCInst &Instr, - unsigned Bead) const { - unsigned Ext = Bead >> 4; - - const MCInstrDesc &Desc = MCII->get(Instr.getOpcode()); - auto MIOpIdx = M68k::getLogicalOperandIdx(Instr.getOpcode(), Ext & 7); - - if (M68kII::hasMultiMIOperands(Instr.getOpcode(), Ext & 7)) { - bool IsPCRel = Desc.OpInfo[MIOpIdx].OperandType == MCOI::OPERAND_PCREL; - if (IsPCRel) - MIOpIdx += M68k::PCRelIndex; - else if (Ext & 8) - MIOpIdx += M68k::MemIndex; - else - MIOpIdx += M68k::MemBase; - } - - return MIOpIdx; +static DecodeStatus DecodeXR32RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeRegisterClass(Inst, RegNo, Address, Decoder); } -unsigned M68kDisassembler::getImmOperandIndex(MCInst &Instr, - unsigned Bead) const { - unsigned Ext = Bead >> 4; - - const MCInstrDesc &Desc = MCII->get(Instr.getOpcode()); - auto MIOpIdx = M68k::getLogicalOperandIdx(Instr.getOpcode(), Ext & 7); - - if (M68kII::hasMultiMIOperands(Instr.getOpcode(), Ext & 7)) { - bool IsPCRel = Desc.OpInfo[MIOpIdx].OperandType == MCOI::OPERAND_PCREL; - if (IsPCRel) - MIOpIdx += M68k::PCRelDisp; - else if (Ext & 8) - MIOpIdx += M68k::MemOuter; - else - MIOpIdx += M68k::MemDisp; - } - - return MIOpIdx; +static DecodeStatus DecodeXR16RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeRegisterClass(Inst, RegNo, Address, Decoder); } -void M68kDisassembler::decodeReg(MCInst &Instr, unsigned Bead, - M68kInstructionReader &Reader, - unsigned &Scratch) const { - unsigned Op = Bead & 0xf; - LLVM_DEBUG(errs() << format("decodeReg %x\n", Bead)); - - if (Op != M68kBeads::DA) - Scratch = (Scratch & ~7) | Reader.readBits(3); - - if (Op != M68kBeads::Reg) { - bool DA = (Op != M68kBeads::DReg) && Reader.readBits(1); - if (!DA) - Scratch |= 8; - else - Scratch &= ~8; - } +static DecodeStatus DecodeCCRCRegisterClass(MCInst &Inst, APInt &Insn, + uint64_t Address, + const void *Decoder) { + llvm_unreachable("unimplemented"); } -void M68kDisassembler::decodeImm(MCInst &Instr, unsigned Bead, - M68kInstructionReader &Reader, - unsigned &Scratch) const { - unsigned Op = Bead & 0xf; - LLVM_DEBUG(errs() << format("decodeImm %x\n", Bead)); +#include "M68kGenDisassemblerTable.inc" - unsigned NumToRead; - switch (Op) { - case M68kBeads::Disp8: - NumToRead = 8; - break; - case M68kBeads::Imm8: - case M68kBeads::Imm16: - NumToRead = 16; - break; - case M68kBeads::Imm32: - NumToRead = 32; - break; - case M68kBeads::Imm3: - NumToRead = 3; - break; - default: - llvm_unreachable("invalid imm"); - } +/// A disassembler class for M68k. +struct M68kDisassembler : public MCDisassembler { + M68kDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) + : MCDisassembler(STI, Ctx) {} + virtual ~M68kDisassembler() {} - Scratch = (NumToRead < 32) ? (Scratch << NumToRead) : 0; - Scratch |= Reader.readBits(NumToRead); -} + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &CStream) const override; +}; DecodeStatus M68kDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const { - // Read and shift the input (fetch as much as we can for now). - auto Buffer = M68kInstructionBuffer::fill(Bytes); - if (Buffer.size() == 0) - return Fail; - - // Check through our lookup table. - bool Found = false; - for (unsigned I = 0, E = Lookups.size(); I < E; ++I) { - const M68kInstructionLookup &Lookup = Lookups[I]; - if (!Lookup.matches(Buffer)) - continue; - - Found = true; - Size = Lookup.size() * 2; - Buffer.truncate(Lookup.size()); - Instr.setOpcode(Lookup.OpCode); - LLVM_DEBUG(errs() << "decoding instruction " << MCII->getName(Lookup.OpCode) - << "\n"); - break; - } - - if (!Found) - return Fail; - - M68kInstructionReader Reader(Buffer); - const MCInstrDesc &Desc = MCII->get(Instr.getOpcode()); - unsigned NumOperands = Desc.NumOperands; - - // Now use the beads to decode the operands. - enum class OperandType { - Invalid, - Reg, - Imm, - }; - - SmallVector OpType(NumOperands, OperandType::Invalid); - SmallVector Scratch(NumOperands, 0); - for (const uint8_t *PartPtr = M68k::getMCInstrBeads(Instr.getOpcode()); - *PartPtr; ++PartPtr) { - uint8_t Bead = *PartPtr; - unsigned Ext = Bead >> 4; - unsigned Op = Bead & 0xf; - unsigned MIOpIdx; - - switch (Op) { - case M68kBeads::Ctrl: - // Term will have already been skipped by the loop. - assert((Ext == M68kBeads::Ignore) && "unexpected command bead"); - break; - - // These bits are constant - if we're here we've already matched them. - case M68kBeads::Bits1: - Reader.readBits(1); - break; - case M68kBeads::Bits2: - Reader.readBits(2); - break; - case M68kBeads::Bits3: - Reader.readBits(3); - break; - case M68kBeads::Bits4: - Reader.readBits(4); - break; - - case M68kBeads::DAReg: - case M68kBeads::DA: - case M68kBeads::DReg: - case M68kBeads::Reg: - MIOpIdx = getRegOperandIndex(Instr, Bead); - assert(((OpType[MIOpIdx] == OperandType::Invalid) || - (OpType[MIOpIdx] == OperandType::Reg)) && - "operands cannot change type"); - OpType[MIOpIdx] = OperandType::Reg; - decodeReg(Instr, Bead, Reader, Scratch[MIOpIdx]); - break; - - case M68kBeads::Disp8: - case M68kBeads::Imm8: - case M68kBeads::Imm16: - case M68kBeads::Imm32: - case M68kBeads::Imm3: - MIOpIdx = getImmOperandIndex(Instr, Bead); - assert(((OpType[MIOpIdx] == OperandType::Invalid) || - (OpType[MIOpIdx] == OperandType::Imm)) && - "operands cannot change type"); - OpType[MIOpIdx] = OperandType::Imm; - decodeImm(Instr, Bead, Reader, Scratch[MIOpIdx]); - break; - - default: - llvm_unreachable("unhandled bead type"); - } - } - - // Copy constrained operands. - for (unsigned DstMIOpIdx = 0; DstMIOpIdx < NumOperands; ++DstMIOpIdx) { - int TiedTo = Desc.getOperandConstraint(DstMIOpIdx, MCOI::TIED_TO); - if (TiedTo < 0) - continue; - - unsigned SrcMIOpIdx = TiedTo; - - unsigned OpCount = 0; - for (unsigned I = 0;; ++I) { - unsigned Offset = M68k::getLogicalOperandIdx(Instr.getOpcode(), I); - assert(Offset <= SrcMIOpIdx && "missing logical operand"); - if (Offset == SrcMIOpIdx) { - OpCount = M68k::getLogicalOperandSize(Instr.getOpcode(), I); - break; - } + DecodeStatus Result; + auto MakeUp = [&](APInt &Insn, unsigned InstrBits) { + unsigned Idx = Insn.getBitWidth() >> 3; + unsigned RoundUp = alignTo(InstrBits, Align(16)); + if (RoundUp > Insn.getBitWidth()) + Insn = Insn.zext(RoundUp); + RoundUp = RoundUp >> 3; + for (; Idx < RoundUp; Idx += 2) { + Insn.insertBits(support::endian::read16be(&Bytes[Idx]), Idx * 8, 16); } - assert(OpCount != 0 && "operand count not found"); - - for (unsigned I = 0; I < OpCount; ++I) { - assert(OpType[DstMIOpIdx + I] == OperandType::Invalid && - "tried to stomp over operand whilst applying constraints"); - OpType[DstMIOpIdx + I] = OpType[SrcMIOpIdx + I]; - Scratch[DstMIOpIdx + I] = Scratch[SrcMIOpIdx + I]; - } - } - - // Create the operands from our scratch space. - for (unsigned O = 0; O < NumOperands; ++O) { - switch (OpType[O]) { - case OperandType::Invalid: - assert(false && "operand not parsed"); - - case OperandType::Imm: - Instr.addOperand(MCOperand::createImm(Scratch[O])); - break; - - case OperandType::Reg: - Instr.addOperand(MCOperand::createReg(RegisterDecode[Scratch[O]])); - break; - } - } - - assert((Reader.size() == 0) && "wrong number of bits consumed"); - return Success; + }; + APInt Insn(16, support::endian::read16be(Bytes.data())); + // 2 bytes of data are consumed, so set Size to 2 + // If we don't do this, disassembler may generate result even + // the encoding is invalid. We need to let it fail correctly. + Size = 2; + Result = decodeInstruction(DecoderTable80, Instr, Insn, Address, this, STI, + MakeUp); + if (Result == DecodeStatus::Success) + Size = InstrLenTable[Instr.getOpcode()] >> 3; + return Result; } static MCDisassembler *createM68kDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new M68kDisassembler(STI, Ctx, T.createMCInstrInfo()); + return new M68kDisassembler(STI, Ctx); } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeM68kDisassembler() { diff --git a/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp b/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp index b3d17184f1fe..e0aaa9d51cc3 100644 --- a/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp +++ b/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/TargetCallingConv.h" using namespace llvm; @@ -27,10 +28,12 @@ using namespace llvm; M68kCallLowering::M68kCallLowering(const M68kTargetLowering &TLI) : CallLowering(&TLI) {} -struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { - OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, - MachineInstrBuilder MIB) - : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB) {} +struct M68kOutgoingArgHandler : public CallLowering::OutgoingValueHandler { + M68kOutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstrBuilder MIB) + : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), + DL(MIRBuilder.getMF().getDataLayout()), + STI(MIRBuilder.getMF().getSubtarget()) {} void assignValueToReg(Register ValVReg, Register PhysReg, CCValAssign VA) override { @@ -41,16 +44,29 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override { - llvm_unreachable("unimplemented"); + MachineFunction &MF = MIRBuilder.getMF(); + Register ExtReg = extendRegister(ValVReg, VA); + + auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy, + inferAlignFromPtrInfo(MF, MPO)); + MIRBuilder.buildStore(ExtReg, Addr, *MMO); } Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO, ISD::ArgFlagsTy Flags) override { - llvm_unreachable("unimplemented"); + LLT p0 = LLT::pointer(0, DL.getPointerSizeInBits(0)); + LLT SType = LLT::scalar(DL.getPointerSizeInBits(0)); + Register StackReg = STI.getRegisterInfo()->getStackRegister(); + auto SPReg = MIRBuilder.buildCopy(p0, StackReg).getReg(0); + auto OffsetReg = MIRBuilder.buildConstant(SType, Offset); + auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg); + MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset); + return AddrReg.getReg(0); } - MachineInstrBuilder MIB; + const DataLayout &DL; + const M68kSubtarget &STI; }; bool M68kCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef VRegs, @@ -72,7 +88,7 @@ bool M68kCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv()); OutgoingValueAssigner ArgAssigner(AssignFn); - OutgoingArgHandler ArgHandler(MIRBuilder, MRI, MIB); + M68kOutgoingArgHandler ArgHandler(MIRBuilder, MRI, MIB); Success = determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgs, MIRBuilder, F.getCallingConv(), F.isVarArg()); @@ -144,9 +160,73 @@ Register M68kIncomingValueHandler::getStackAddress(uint64_t Size, return AddrReg.getReg(0); } +void CallReturnHandler::assignValueToReg(Register ValVReg, Register PhysReg, + CCValAssign VA) { + MIB.addDef(PhysReg, RegState::Implicit); + MIRBuilder.buildCopy(ValVReg, PhysReg); +} + bool M68kCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const { - return false; + MachineFunction &MF = MIRBuilder.getMF(); + Function &F = MF.getFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + auto &DL = F.getParent()->getDataLayout(); + const M68kTargetLowering &TLI = *getTLI(); + const M68kSubtarget &STI = MF.getSubtarget(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + const M68kRegisterInfo *TRI = STI.getRegisterInfo(); + + SmallVector OutArgs; + for (auto &OrigArg : Info.OrigArgs) + splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv); + + SmallVector InArgs; + if (!Info.OrigRet.Ty->isVoidTy()) + splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv); + + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); + auto CallSeqStart = MIRBuilder.buildInstr(AdjStackDown); + + unsigned Opc = TLI.getTargetMachine().isPositionIndependent() ? M68k::CALLq + : Info.Callee.isReg() ? M68k::CALLj + : M68k::CALLb; + + auto MIB = MIRBuilder.buildInstrNoInsert(Opc) + .add(Info.Callee) + .addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv)); + + CCAssignFn *AssignFn = TLI.getCCAssignFn(Info.CallConv, false, Info.IsVarArg); + OutgoingValueAssigner Assigner(AssignFn); + M68kOutgoingArgHandler Handler(MIRBuilder, MRI, MIB); + if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder, + Info.CallConv, Info.IsVarArg)) + return false; + + if (Info.Callee.isReg()) + constrainOperandRegClass(MF, *TRI, MRI, *STI.getInstrInfo(), + *STI.getRegBankInfo(), *MIB, MIB->getDesc(), + Info.Callee, 0); + + MIRBuilder.insertInstr(MIB); + + if (!Info.OrigRet.Ty->isVoidTy()) { + CCAssignFn *RetAssignFn = + TLI.getCCAssignFn(Info.CallConv, true, Info.IsVarArg); + + OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn); + CallReturnHandler Handler(MIRBuilder, MRI, MIB); + if (!determineAndHandleAssignments(Handler, Assigner, InArgs, MIRBuilder, + Info.CallConv, Info.IsVarArg)) + return false; + } + + CallSeqStart.addImm(Assigner.StackOffset).addImm(0); + + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); + MIRBuilder.buildInstr(AdjStackUp).addImm(Assigner.StackOffset).addImm(0); + + return true; } bool M68kCallLowering::enableBigEndian() const { return true; } diff --git a/llvm/lib/Target/M68k/GISel/M68kCallLowering.h b/llvm/lib/Target/M68k/GISel/M68kCallLowering.h index 24212e6dd9c6..a1589e96aa3d 100644 --- a/llvm/lib/Target/M68k/GISel/M68kCallLowering.h +++ b/llvm/lib/Target/M68k/GISel/M68kCallLowering.h @@ -22,6 +22,7 @@ namespace llvm { class M68kTargetLowering; +class MachineInstrBuilder; class M68kCallLowering : public CallLowering { // TODO: We are only supporting return instruction with no value at this time @@ -67,6 +68,17 @@ struct FormalArgHandler : public M68kIncomingValueHandler { : M68kIncomingValueHandler(MIRBuilder, MRI) {} }; +struct CallReturnHandler : public M68kIncomingValueHandler { + CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstrBuilder &MIB) + : M68kIncomingValueHandler(MIRBuilder, MRI), MIB(MIB) {} + +private: + void assignValueToReg(Register ValVReg, Register PhysReg, + CCValAssign VA) override; + + MachineInstrBuilder &MIB; +}; } // end namespace llvm #endif // LLVM_LIB_TARGET_M68K_GLSEL_M68KCALLLOWERING_H diff --git a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp index b6ed6ab28a5d..f833eb2d19d4 100644 --- a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp +++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp @@ -13,9 +13,9 @@ #include "M68kRegisterBankInfo.h" #include "M68kInstrInfo.h" // For the register classes #include "M68kSubtarget.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #define GET_TARGET_REGBANK_IMPL diff --git a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h index 6c0b8ca7ba5a..493c139f018c 100644 --- a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h +++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h @@ -13,7 +13,7 @@ #ifndef LLVM_LIB_TARGET_M68K_GLSEL_M68KREGISTERBANKINFO_H #define LLVM_LIB_TARGET_M68K_GLSEL_M68KREGISTERBANKINFO_H -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #define GET_REGBANK_DECLARATIONS #include "M68kGenRegisterBank.inc" diff --git a/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp b/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp index 7f0c0dd92dbb..cbd69f24666e 100644 --- a/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp +++ b/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp @@ -231,7 +231,7 @@ public: } bool runOnMachineFunction(MachineFunction &MF) override { - STI = &static_cast(MF.getSubtarget()); + STI = &MF.getSubtarget(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); MFI = MF.getInfo(); diff --git a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp index acfa30f28c2b..51a148f5aa04 100644 --- a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp +++ b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp @@ -302,7 +302,7 @@ bool M68kExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { } bool M68kExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - STI = &static_cast(MF.getSubtarget()); + STI = &MF.getSubtarget(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); MFI = MF.getInfo(); diff --git a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp index 9ef97b96ea9a..f9459e284aef 100644 --- a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp +++ b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp @@ -181,6 +181,7 @@ public: } bool runOnMachineFunction(MachineFunction &MF) override; + bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; private: /// Keep a pointer to the M68kSubtarget around so that we can @@ -311,8 +312,35 @@ private: }; } // namespace +bool M68kDAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, + SDNode *Root) const { + if (OptLevel == CodeGenOpt::None) + return false; + + if (U == Root) { + switch (U->getOpcode()) { + default: + return true; + case M68kISD::SUB: + case ISD::SUB: + // Prefer NEG instruction when zero subtracts a value. + // e.g. + // move.l #0, %d0 + // sub.l (4,%sp), %d0 + // vs. + // move.l (4,%sp), %d0 + // neg.l %d0 + if (llvm::isNullConstant(U->getOperand(0))) + return false; + break; + } + } + + return true; +} + bool M68kDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &static_cast(MF.getSubtarget()); + Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp index dba190a2ebc0..250519efd14a 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.cpp +++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -101,6 +101,9 @@ M68kTargetLowering::M68kTargetLowering(const M68kTargetMachine &TM, setOperationAction(OP, MVT::i32, Expand); } + for (auto OP : {ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS}) + setOperationAction(OP, MVT::i32, Custom); + // Add/Sub overflow ops with MVT::Glues are lowered to CCR dependences. for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) { setOperationAction(ISD::ADDC, VT, Custom); @@ -170,7 +173,7 @@ MVT M68kTargetLowering::getScalarShiftAmountTy(const DataLayout &DL, if (Ty.isSimple()) { return Ty.getSimpleVT(); } - return MVT::getIntegerVT(8 * DL.getPointerSize(0)); + return MVT::getIntegerVT(DL.getPointerSizeInBits(0)); } #include "M68kGenCallingConv.inc" @@ -1354,6 +1357,12 @@ SDValue M68kTargetLowering::LowerOperation(SDValue Op, return LowerVASTART(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::SHL_PARTS: + return LowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: + return LowerShiftRightParts(Op, DAG, true); + case ISD::SRL_PARTS: + return LowerShiftRightParts(Op, DAG, false); } } @@ -3239,6 +3248,102 @@ SDValue M68kTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getMergeValues(Ops, DL); } +SDValue M68kTargetLowering::LowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + EVT VT = Lo.getValueType(); + + // if Shamt - register size < 0: // Shamt < register size + // Lo = Lo << Shamt + // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (register size - 1 ^ Shamt)) + // else: + // Lo = 0 + // Hi = Lo << (Shamt - register size) + + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue One = DAG.getConstant(1, DL, VT); + SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT); + SDValue RegisterSizeMinus1 = DAG.getConstant(32 - 1, DL, VT); + SDValue ShamtMinusRegisterSize = + DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize); + SDValue RegisterSizeMinus1Shamt = + DAG.getNode(ISD::XOR, DL, VT, RegisterSizeMinus1, Shamt); + + SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); + SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); + SDValue ShiftRightLo = + DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, RegisterSizeMinus1Shamt); + SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); + SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); + SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusRegisterSize); + + SDValue CC = + DAG.getSetCC(DL, MVT::i8, ShamtMinusRegisterSize, Zero, ISD::SETLT); + + Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); + Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); + + return DAG.getMergeValues({Lo, Hi}, DL); +} + +SDValue M68kTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, + bool IsSRA) const { + SDLoc DL(Op); + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + EVT VT = Lo.getValueType(); + + // SRA expansion: + // if Shamt - register size < 0: // Shamt < register size + // Lo = (Lo >>u Shamt) | ((Hi << 1) << (register size - 1 ^ Shamt)) + // Hi = Hi >>s Shamt + // else: + // Lo = Hi >>s (Shamt - register size); + // Hi = Hi >>s (register size - 1) + // + // SRL expansion: + // if Shamt - register size < 0: // Shamt < register size + // Lo = (Lo >>u Shamt) | ((Hi << 1) << (register size - 1 ^ Shamt)) + // Hi = Hi >>u Shamt + // else: + // Lo = Hi >>u (Shamt - register size); + // Hi = 0; + + unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; + + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue One = DAG.getConstant(1, DL, VT); + SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT); + SDValue RegisterSizeMinus1 = DAG.getConstant(32 - 1, DL, VT); + SDValue ShamtMinusRegisterSize = + DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize); + SDValue RegisterSizeMinus1Shamt = + DAG.getNode(ISD::XOR, DL, VT, RegisterSizeMinus1, Shamt); + + SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); + SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); + SDValue ShiftLeftHi = + DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, RegisterSizeMinus1Shamt); + SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); + SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); + SDValue LoFalse = + DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusRegisterSize); + SDValue HiFalse = + IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, RegisterSizeMinus1) : Zero; + + SDValue CC = + DAG.getSetCC(DL, MVT::i8, ShamtMinusRegisterSize, Zero, ISD::SETLT); + + Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); + Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); + + return DAG.getMergeValues({Lo, Hi}, DL); +} + //===----------------------------------------------------------------------===// // DAG Combine //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/M68k/M68kISelLowering.h b/llvm/lib/Target/M68k/M68kISelLowering.h index 9375a99962eb..f759a7d939c8 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.h +++ b/llvm/lib/Target/M68k/M68kISelLowering.h @@ -220,6 +220,8 @@ private: SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg, diff --git a/llvm/lib/Target/M68k/M68kInstrArithmetic.td b/llvm/lib/Target/M68k/M68kInstrArithmetic.td index ef50de576641..2339e3caa517 100644 --- a/llvm/lib/Target/M68k/M68kInstrArithmetic.td +++ b/llvm/lib/Target/M68k/M68kInstrArithmetic.td @@ -27,10 +27,35 @@ /// //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// OPMODE Encoding +//===----------------------------------------------------------------------===// +class MxOpModeEncoding encoding> { + bits<3> Value = encoding; +} + +// op EA, Dn +def MxOpMode8_d_EA : MxOpModeEncoding<0b000>; +def MxOpMode16_d_EA : MxOpModeEncoding<0b001>; +def MxOpMode32_d_EA : MxOpModeEncoding<0b010>; + +// op Dn, EA +def MxOpMode8_EA_d : MxOpModeEncoding<0b100>; +def MxOpMode16_EA_d : MxOpModeEncoding<0b101>; +def MxOpMode32_EA_d : MxOpModeEncoding<0b110>; + +// op EA, An +def MxOpMode16_a_EA : MxOpModeEncoding<0b011>; +def MxOpMode32_a_EA : MxOpModeEncoding<0b111>; + + //===----------------------------------------------------------------------===// // Encoding //===----------------------------------------------------------------------===// +let Defs = [CCR] in { +let Constraints = "$src = $dst" in { + /// Encoding for Normal forms /// ---------------------------------------------------- /// F E D C | B A 9 | 8 7 6 | 5 4 3 | 2 1 0 @@ -38,23 +63,54 @@ /// | | | EFFECTIVE ADDRESS /// x x x x | REG | OP MODE | MODE | REG /// ---------------------------------------------------- -class MxArithEncoding - : MxEncoding; -/// Encoding for Extended forms -/// ------------------------------------------------------ -/// F E D C | B A 9 | 8 | 7 6 | 5 4 | 3 | 2 1 0 -/// ------------------------------------------------------ -/// x x x x | REG Rx | 1 | SIZE | 0 0 | M | REG Ry -/// ------------------------------------------------------ -/// Rx - destination -/// Ry - source -/// M - address mode switch -class MxArithXEncoding - : MxEncoding, SIZE, MxBead1Bit<0b1>, DST, CMD>; +// $reg, $ccr <- $reg op $reg +class MxBiArOp_R_RR_xEA CMD> + : MxInst<(outs DST_TYPE.ROp:$dst), (ins DST_TYPE.ROp:$src, SRC_TYPE.ROp:$opd), + MN#"."#DST_TYPE.Prefix#"\t$opd, $dst", + [(set DST_TYPE.VT:$dst, CCR, (NODE DST_TYPE.VT:$src, SRC_TYPE.VT:$opd))]> { + let Inst = (descend + CMD, (operand "$dst", 3), + !cast("MxOpMode"#DST_TYPE.Size#"_"#DST_TYPE.RLet#"_EA").Value, + !cond( + !eq(SRC_TYPE.RLet, "r") : (descend 0b00, (operand "$opd", 4)), + !eq(SRC_TYPE.RLet, "d") : (descend 0b000, (operand "$opd", 3)) + ) + ); +} + +/// This Op is similar to the one above except it uses reversed opmode, some +/// commands(e.g. eor) do not support dEA or rEA modes and require EAd for +/// register only operations. +/// NOTE when using dd commands it is irrelevant which opmode to use(as it seems) +/// but some opcodes support address register and some do not which creates this +/// mess. +class MxBiArOp_R_RR_EAd CMD> + : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, TYPE.ROp:$opd), + MN#"."#TYPE.Prefix#"\t$opd, $dst", + [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.VT:$opd))]> { + let Inst = (descend + CMD, (operand "$opd", 3), + !cast("MxOpMode"#TYPE.Size#"_EA_"#TYPE.RLet).Value, + /*Destination can only be a data register*/ + /*MODE*/0b000, + /*REGISTER*/(operand "$dst", 3)); +} + +let mayLoad = 1 in +class MxBiArOp_R_RM CMD, MxEncMemOp SRC_ENC> + : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, OPD:$opd), + MN#"."#TYPE.Prefix#"\t$opd, $dst", + [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, (TYPE.Load PAT:$opd)))]> { + let Inst = (ascend + (descend CMD, (operand "$dst", 3), + !cast("MxOpMode"#TYPE.Size#"_"#TYPE.RLet#"_EA").Value, + SRC_ENC.EA), + SRC_ENC.Supplement + ); +} /// Encoding for Immediate forms /// --------------------------------------------------- @@ -69,211 +125,154 @@ class MxArithXEncoding - : MxEncoding, - // Source - SRC_EXT.Imm, SRC_EXT.B8, SRC_EXT.Scale, - SRC_EXT.WL, SRC_EXT.DAReg, - // Destination - DST_EXT.Imm, DST_EXT.B8, DST_EXT.Scale, - DST_EXT.WL, DST_EXT.DAReg>; - - -//===----------------------------------------------------------------------===// -// Add/Sub -//===----------------------------------------------------------------------===// - -let Defs = [CCR] in { -let Constraints = "$src = $dst" in { - -// $reg, $ccr <- $reg op $reg -class MxBiArOp_RFRR_xEA CMD, MxBead REG> - : MxInst<(outs DST_TYPE.ROp:$dst), (ins DST_TYPE.ROp:$src, SRC_TYPE.ROp:$opd), - MN#"."#DST_TYPE.Prefix#"\t$opd, $dst", - [(set DST_TYPE.VT:$dst, CCR, (NODE DST_TYPE.VT:$src, SRC_TYPE.VT:$opd))], - MxArithEncoding, - !cast("MxOpMode"#DST_TYPE.Size#DST_TYPE.RLet#"EA"), - REG, - !cast("MxEncEA"#SRC_TYPE.RLet#"_2"), - MxExtEmpty>>; - -/// This Op is similar to the one above except it uses reversed opmode, some -/// commands(e.g. eor) do not support dEA or rEA modes and require EAd for -/// register only operations. -/// NOTE when using dd commands it is irrelevant which opmode to use(as it seems) -/// but some opcodes support address register and some do not which creates this -/// mess. -class MxBiArOp_RFRR_EAd CMD> - : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, TYPE.ROp:$opd), - MN#"."#TYPE.Prefix#"\t$opd, $dst", - [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.VT:$opd))], - MxArithEncoding, - !cast("MxOpMode"#TYPE.Size#"EAd"), - MxBeadDReg<2>, MxEncEAd_0, MxExtEmpty>>; // $reg <- $reg op $imm -class MxBiArOp_RFRI_xEA CMD> +class MxBiArOp_R_RI_xEA CMD> : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, TYPE.IOp:$opd), MN#"."#TYPE.Prefix#"\t$opd, $dst", - [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.IPat:$opd))], - MxArithEncoding, - !cast("MxOpMode"#TYPE.Size#TYPE.RLet#"EA"), - MxBeadDReg<0>, MxEncEAi, - !cast("MxExtI"#TYPE.Size#"_2")>>; + [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.IPat:$opd))]> { + let Inst = (ascend + (descend CMD, (operand "$dst", 3), + !cast("MxOpMode"#TYPE.Size#"_"#TYPE.RLet#"_EA").Value, + MxEncAddrMode_i<"opd", TYPE.Size>.EA), + MxEncAddrMode_i<"opd", TYPE.Size>.Supplement + ); +} // Again, there are two ways to write an immediate to Dn register either dEA -// opmode or using *I encoding, and again some instrucitons also support address +// opmode or using *I encoding, and again some instructions also support address // registers some do not. -class MxBiArOp_RFRI CMD> +class MxBiArOp_R_RI CMD> : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, TYPE.IOp:$opd), MN#"i."#TYPE.Prefix#"\t$opd, $dst", - [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.IPat:$opd))], - MxArithImmEncoding, !cast("MxEncSize"#TYPE.Size), - !cast("MxEncEA"#TYPE.RLet#"_0"), MxExtEmpty, - !cast("MxExtI"#TYPE.Size#"_2")>>; - -let mayLoad = 1 in -class MxBiArOp_RFRM CMD, MxEncEA EA, MxEncExt EXT> - : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, OPD:$opd), - MN#"."#TYPE.Prefix#"\t$opd, $dst", - [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, (TYPE.Load PAT:$opd)))], - MxArithEncoding, - !cast("MxOpMode"#TYPE.Size#TYPE.RLet#"EA"), - MxBeadDReg<0>, EA, EXT>>; - + [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.IPat:$opd))]> { + let Inst = (ascend + (descend 0b0000, CMD, + !cast("MxNewEncSize"#TYPE.Size).Value, + // The destination cannot be address register, so it's always + // the MODE for data register direct mode. + /*MODE*/0b000, + /*REGISTER*/(operand "$dst", 3)), + // Source (i.e. immediate value) encoding + MxEncAddrMode_i<"opd", TYPE.Size>.Supplement + ); +} } // Constraints let mayLoad = 1, mayStore = 1 in { // FIXME MxBiArOp_FMR/FMI cannot consume CCR from MxAdd/MxSub which leads for // MxAdd to survive the match and subsequent mismatch. -class MxBiArOp_FMR CMD, MxEncEA EA, MxEncExt EXT> +class MxBiArOp_MR CMD, MxEncMemOp DST_ENC> : MxInst<(outs), (ins MEMOpd:$dst, TYPE.ROp:$opd), - MN#"."#TYPE.Prefix#"\t$opd, $dst", - [], - MxArithEncoding, - !cast("MxOpMode"#TYPE.Size#"EA"#TYPE.RLet), - MxBeadDReg<1>, EA, EXT>>; + MN#"."#TYPE.Prefix#"\t$opd, $dst", []> { + let Inst = (ascend + (descend CMD, (operand "$opd", 3), + !cast("MxOpMode"#TYPE.Size#"_EA_"#TYPE.RLet).Value, + DST_ENC.EA), + DST_ENC.Supplement + ); +} -class MxBiArOp_FMI CMD, MxEncEA MEMEA, MxEncExt MEMExt> +class MxBiArOp_MI CMD, MxEncMemOp DST_ENC> : MxInst<(outs), (ins MEMOpd:$dst, TYPE.IOp:$opd), - MN#"."#TYPE.Prefix#"\t$opd, $dst", - [], - MxArithImmEncoding, - !cast("MxEncSize"#TYPE.Size), - MEMEA, MEMExt, - !cast("MxExtI"#TYPE.Size#"_1")>>; + MN#"."#TYPE.Prefix#"\t$opd, $dst", []> { + let Inst = (ascend + (descend 0b0000, CMD, + !cast("MxNewEncSize"#TYPE.Size).Value, + DST_ENC.EA), + // Source (i.e. immediate value) encoding + MxEncAddrMode_i<"opd", TYPE.Size>.Supplement, + // Destination encoding + DST_ENC.Supplement + ); +} } // mayLoad, mayStore } // Defs = [CCR] multiclass MxBiArOp_DF CMD, bits<4> CMDI> { - // op $mem, $reg - def NAME#"8dk" : MxBiArOp_RFRM; - def NAME#"16dk" : MxBiArOp_RFRM; - def NAME#"32dk" : MxBiArOp_RFRM; - - def NAME#"8dq" : MxBiArOp_RFRM; - def NAME#"16dq" : MxBiArOp_RFRM; - def NAME#"32dq" : MxBiArOp_RFRM; - - def NAME#"8dp" : MxBiArOp_RFRM; - def NAME#"16dp" : MxBiArOp_RFRM; - def NAME#"32dp" : MxBiArOp_RFRM; - - def NAME#"8df" : MxBiArOp_RFRM; - def NAME#"16df" : MxBiArOp_RFRM; - def NAME#"32df" : MxBiArOp_RFRM; - - def NAME#"8dj" : MxBiArOp_RFRM; - def NAME#"16dj" : MxBiArOp_RFRM; - def NAME#"32dj" : MxBiArOp_RFRM; - - // op $imm, $reg - def NAME#"8di" : MxBiArOp_RFRI_xEA; - def NAME#"16di" : MxBiArOp_RFRI_xEA; - def NAME#"32di" : MxBiArOp_RFRI_xEA; - - // op $reg, $mem - def NAME#"8pd" : MxBiArOp_FMR; - def NAME#"16pd" : MxBiArOp_FMR; - def NAME#"32pd" : MxBiArOp_FMR; - - def NAME#"8fd" : MxBiArOp_FMR; - def NAME#"16fd" : MxBiArOp_FMR; - def NAME#"32fd" : MxBiArOp_FMR; - - def NAME#"8jd" : MxBiArOp_FMR; - def NAME#"16jd" : MxBiArOp_FMR; - def NAME#"32jd" : MxBiArOp_FMR; - - // op $imm, $mem - def NAME#"8pi" : MxBiArOp_FMI; - def NAME#"16pi" : MxBiArOp_FMI; - def NAME#"32pi" : MxBiArOp_FMI; - - def NAME#"8fi" : MxBiArOp_FMI; - def NAME#"16fi" : MxBiArOp_FMI; - def NAME#"32fi" : MxBiArOp_FMI; - - def NAME#"8ji" : MxBiArOp_FMI; - def NAME#"16ji" : MxBiArOp_FMI; - def NAME#"32ji" : MxBiArOp_FMI; - - def NAME#"16dr" : MxBiArOp_RFRR_xEA>; - def NAME#"32dr" : MxBiArOp_RFRR_xEA>; - - let isCommutable = isComm in { - - def NAME#"8dd" : MxBiArOp_RFRR_xEA>; - def NAME#"16dd" : MxBiArOp_RFRR_xEA>; - def NAME#"32dd" : MxBiArOp_RFRR_xEA>; - - } // isComm + foreach SZ = [8, 16, 32] in { + // op $mem, $reg + def NAME#SZ#"dk" : MxBiArOp_R_RM("MxType"#SZ#"d"), + !cast("MxType"#SZ).KOp, + !cast("MxType"#SZ).KPat, + CMD, MxEncAddrMode_k<"opd">>; + + def NAME#SZ#"dq" : MxBiArOp_R_RM("MxType"#SZ#"d"), + !cast("MxType"#SZ).QOp, + !cast("MxType"#SZ).QPat, + CMD, MxEncAddrMode_q<"opd">>; + + def NAME#SZ#"dp" : MxBiArOp_R_RM("MxType"#SZ#"d"), + !cast("MxType"#SZ).POp, + !cast("MxType"#SZ).PPat, + CMD, MxEncAddrMode_p<"opd">>; + + def NAME#SZ#"df" : MxBiArOp_R_RM("MxType"#SZ#"d"), + !cast("MxType"#SZ).FOp, + !cast("MxType"#SZ).FPat, + CMD, MxEncAddrMode_f<"opd">>; + + def NAME#SZ#"dj" : MxBiArOp_R_RM("MxType"#SZ#"d"), + !cast("MxType"#SZ).JOp, + !cast("MxType"#SZ).JPat, + CMD, MxEncAddrMode_j<"opd">>; + // op $imm, $reg + def NAME#SZ#"di" : MxBiArOp_R_RI_xEA("MxType"#SZ#"d"), + CMD>; + // op $reg, $mem + def NAME#SZ#"pd" : MxBiArOp_MR("MxType"#SZ#"d"), + !cast("MxType"#SZ).POp, + CMD, MxEncAddrMode_p<"dst">>; + + def NAME#SZ#"fd" : MxBiArOp_MR("MxType"#SZ#"d"), + !cast("MxType"#SZ).FOp, + CMD, MxEncAddrMode_f<"dst">>; + + def NAME#SZ#"jd" : MxBiArOp_MR("MxType"#SZ#"d"), + !cast("MxType"#SZ).JOp, + CMD, MxEncAddrMode_j<"dst">>; + // op $imm, $mem + def NAME#SZ#"pi" : MxBiArOp_MI("MxType"#SZ), + !cast("MxType"#SZ).POp, + CMDI, MxEncAddrMode_p<"dst">>; + + def NAME#SZ#"fi" : MxBiArOp_MI("MxType"#SZ), + !cast("MxType"#SZ).FOp, + CMDI, MxEncAddrMode_f<"dst">>; + + def NAME#SZ#"ji" : MxBiArOp_MI("MxType"#SZ), + !cast("MxType"#SZ).JOp, + CMDI, MxEncAddrMode_j<"dst">>; + // op $reg, $reg + let isCommutable = isComm in + def NAME#SZ#"dd" : MxBiArOp_R_RR_xEA("MxType"#SZ#"d"), + !cast("MxType"#SZ#"d"), + CMD>; + } // foreach SZ + + foreach SZ = [16, 32] in + def NAME#SZ#"dr" : MxBiArOp_R_RR_xEA("MxType"#SZ#"d"), + !cast("MxType"#SZ#"r"), + CMD>; } // MxBiArOp_DF @@ -284,25 +283,28 @@ multiclass MxBiArOp_DF CMD> { - def NAME#"32ak" : MxBiArOp_RFRM; - def NAME#"32aq" : MxBiArOp_RFRM; - def NAME#"32af" : MxBiArOp_RFRM; - def NAME#"32ap" : MxBiArOp_RFRM; - def NAME#"32aj" : MxBiArOp_RFRM; - def NAME#"32ai" : MxBiArOp_RFRI_xEA; - - def NAME#"32ar" : MxBiArOp_RFRR_xEA>; + def NAME#"32ak" : MxBiArOp_R_RM>; + def NAME#"32aq" : MxBiArOp_R_RM>; + def NAME#"32af" : MxBiArOp_R_RM>; + def NAME#"32ap" : MxBiArOp_R_RM>; + def NAME#"32aj" : MxBiArOp_R_RM>; + def NAME#"32ai" : MxBiArOp_R_RI_xEA; + + def NAME#"32ar" : MxBiArOp_R_RR_xEA; } // MxBiArOp_AF // NOTE These naturally produce CCR +//===----------------------------------------------------------------------===// +// Add/Sub +//===----------------------------------------------------------------------===// + defm ADD : MxBiArOp_DF<"add", MxAdd, 1, 0xD, 0x6>; defm ADD : MxBiArOp_AF<"adda", MxAdd, 0xD>; defm SUB : MxBiArOp_DF<"sub", MxSub, 0, 0x9, 0x4>; @@ -312,26 +314,42 @@ defm SUB : MxBiArOp_AF<"suba", MxSub, 0x9>; let Uses = [CCR], Defs = [CCR] in { let Constraints = "$src = $dst" in { +/// Encoding for Extended forms +/// ------------------------------------------------------ +/// F E D C | B A 9 | 8 | 7 6 | 5 4 | 3 | 2 1 0 +/// ------------------------------------------------------ +/// x x x x | REG Rx | 1 | SIZE | 0 0 | M | REG Ry +/// ------------------------------------------------------ +/// Rx - destination +/// Ry - source +/// M - address mode switch + // $reg, ccr <- $reg op $reg op ccr -class MxBiArOp_RFRRF CMD> +class MxBiArOp_R_RRX CMD> : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, TYPE.ROp:$opd), MN#"."#TYPE.Prefix#"\t$opd, $dst", - [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.VT:$opd, CCR))], - MxArithXEncoding, - !cast("MxEncSize"#TYPE.Size), - MxBead1Bit<0>, MxBeadDReg<2>, MxBeadDReg<0>>>; - + [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.VT:$opd, CCR))]> { + let Inst = (descend CMD, + // Destination register + (operand "$dst", 3), + 0b1, + // SIZE + !cond(!eq(TYPE.Size, 8): 0b00, + !eq(TYPE.Size, 16): 0b01, + !eq(TYPE.Size, 32): 0b10), + 0b00, /*R/M*/0b0, + // Source register + (operand "$opd", 3) + ); +} } // Constraints } // Uses, Defs multiclass MxBiArOp_RFF CMD> { let isCommutable = isComm in { - - def NAME#"8dd" : MxBiArOp_RFRRF; - def NAME#"16dd" : MxBiArOp_RFRRF; - def NAME#"32dd" : MxBiArOp_RFRRF; - + foreach SZ = [8, 16, 32] in + def NAME#SZ#"dd" : MxBiArOp_R_RRX("MxType"#SZ#"d"), CMD>; } // isComm } // MxBiArOp_RFF @@ -349,19 +367,16 @@ defm AND : MxBiArOp_DF<"and", MxAnd, 1, 0xC, 0x2>; defm OR : MxBiArOp_DF<"or", MxOr, 1, 0x8, 0x0>; multiclass MxBiArOp_DF_EAd CMD, bits<4> CMDI> { - - let isCommutable = 1 in { - - def NAME#"8dd" : MxBiArOp_RFRR_EAd; - def NAME#"16dd" : MxBiArOp_RFRR_EAd; - def NAME#"32dd" : MxBiArOp_RFRR_EAd; - - } // isCommutable = 1 - - def NAME#"8di" : MxBiArOp_RFRI; - def NAME#"16di" : MxBiArOp_RFRI; - def NAME#"32di" : MxBiArOp_RFRI; - + foreach SZ = [8, 16, 32] in { + let isCommutable = 1 in + def NAME#SZ#"dd" : MxBiArOp_R_RR_EAd("MxType"#SZ#"d"), + CMD>; + + def NAME#SZ#"di" : MxBiArOp_R_RI("MxType"#SZ#"d"), + CMDI>; + } // foreach SZ } // MxBiArOp_DF_EAd defm XOR : MxBiArOp_DF_EAd<"eor", MxXor, 0xB, 0xA>; @@ -372,84 +387,112 @@ defm XOR : MxBiArOp_DF_EAd<"eor", MxXor, 0xB, 0xA>; //===----------------------------------------------------------------------===// let Defs = [CCR] in { -class MxCmp_RR> +class MxCmp_RR : MxInst<(outs), (ins LHS_TYPE.ROp:$lhs, RHS_TYPE.ROp:$rhs), "cmp."#RHS_TYPE.Prefix#"\t$lhs, $rhs", - [(set CCR, (MxCmp LHS_TYPE.VT:$lhs, RHS_TYPE.VT:$rhs))], - MxArithEncoding, - !cast("MxOpMode"#RHS_TYPE.Size#RHS_TYPE.RLet#"EA"), - REG, - !cast("MxEncEA"#LHS_TYPE.RLet#"_0"), - MxExtEmpty>>; + [(set CCR, (MxCmp LHS_TYPE.VT:$lhs, RHS_TYPE.VT:$rhs))]> { + let Inst = (descend 0b1011, + // REGISTER + (operand "$rhs", 3), + // OPMODE + !cast("MxOpMode"#RHS_TYPE.Size#"_"#RHS_TYPE.RLet#"_EA").Value, + // MODE without last bit + 0b00, + // REGISTER prefixed by D/A bit + (operand "$lhs", 4) + ); +} class MxCmp_RI : MxInst<(outs), (ins TYPE.IOp:$imm, TYPE.ROp:$reg), "cmpi."#TYPE.Prefix#"\t$imm, $reg", - [(set CCR, (MxCmp TYPE.IPat:$imm, TYPE.VT:$reg))], - MxArithImmEncoding, - !cast("MxEncSize"#TYPE.Size), - MxEncEAd_1, MxExtEmpty, - !cast("MxExtI"#TYPE.Size#"_0")>>; + [(set CCR, (MxCmp TYPE.IPat:$imm, TYPE.VT:$reg))]> { + let Inst = (ascend + (descend 0b00001100, + !cast("MxNewEncSize"#TYPE.Size).Value, + // The destination cannot be address register, so it's always + // the MODE for data register direct mode. + /*MODE*/0b000, + /*REGISTER*/(operand "$reg", 3)), + // Source (i.e. immediate value) encoding + MxEncAddrMode_i<"imm", TYPE.Size>.Supplement + ); +} let mayLoad = 1 in { class MxCmp_MI + MxEncMemOp MEM_ENC> : MxInst<(outs), (ins TYPE.IOp:$imm, MEMOpd:$mem), "cmpi."#TYPE.Prefix#"\t$imm, $mem", - [(set CCR, (MxCmp TYPE.IPat:$imm, (load MEMPat:$mem)))], - MxArithImmEncoding, - !cast("MxEncSize"#TYPE.Size), - EA, EXT, - !cast("MxExtI"#TYPE.Size#"_0")>>; + [(set CCR, (MxCmp TYPE.IPat:$imm, (load MEMPat:$mem)))]> { + let Inst = (ascend + (descend 0b00001100, + !cast("MxNewEncSize"#TYPE.Size).Value, + MEM_ENC.EA), + // Source (i.e. immediate value) encoding + MxEncAddrMode_i<"imm", TYPE.Size>.Supplement, + // Destination (i.e. memory operand) encoding + MEM_ENC.Supplement + ); +} +// FIXME: What about abs.W? class MxCmp_BI : MxInst<(outs), (ins TYPE.IOp:$imm, MxAL32:$abs), "cmpi."#TYPE.Prefix#"\t$imm, $abs", [(set CCR, (MxCmp TYPE.IPat:$imm, - (load (i32 (MxWrapper tglobaladdr:$abs)))))], - MxArithImmEncoding, - !cast("MxEncSize"#TYPE.Size), - MxEncEAb, MxExtI32_1, - !cast("MxExtI"#TYPE.Size#"_0")>>; + (load (i32 (MxWrapper tglobaladdr:$abs)))))]> { + defvar AbsEncoding = MxEncAddrMode_abs<"abs", true>; + let Inst = (ascend + (descend 0b00001100, + !cast("MxNewEncSize"#TYPE.Size).Value, + AbsEncoding.EA), + // Source (i.e. immediate value) encoding + MxEncAddrMode_i<"imm", TYPE.Size>.Supplement, + // Destination (i.e. memory operand) encoding + AbsEncoding.Supplement + ); +} class MxCmp_RM + MxEncMemOp MEM_ENC> : MxInst<(outs), (ins TYPE.ROp:$reg, MEMOpd:$mem), "cmp."#TYPE.Prefix#"\t$mem, $reg", - [(set CCR, (MxCmp (load MEMPat:$mem), TYPE.ROp:$reg))], - MxArithEncoding, - !cast("MxOpMode"#TYPE.Size#"dEA"), - MxBeadDReg<0>, EA, EXT>>; + [(set CCR, (MxCmp (load MEMPat:$mem), TYPE.ROp:$reg))]> { + let Inst = (ascend + (descend 0b1011, + // REGISTER + (operand "$reg", 3), + // OPMODE + !cast("MxOpMode"#TYPE.Size#"_d_EA").Value, + MEM_ENC.EA), + MEM_ENC.Supplement + ); +} } // let mayLoad = 1 } // let Defs = [CCR] multiclass MMxCmp_RM { - def NAME#TYPE.KOp.Letter : MxCmp_RM; - def NAME#TYPE.QOp.Letter : MxCmp_RM; - def NAME#TYPE.POp.Letter : MxCmp_RM; - def NAME#TYPE.FOp.Letter : MxCmp_RM; - def NAME#TYPE.JOp.Letter : MxCmp_RM; + def NAME#TYPE.KOp.Letter : MxCmp_RM>; + def NAME#TYPE.QOp.Letter : MxCmp_RM>; + def NAME#TYPE.POp.Letter : MxCmp_RM>; + def NAME#TYPE.FOp.Letter : MxCmp_RM>; + def NAME#TYPE.JOp.Letter : MxCmp_RM>; } multiclass MMxCmp_MI { - def NAME#TYPE.KOp.Letter#"i" : MxCmp_MI; - def NAME#TYPE.QOp.Letter#"i" : MxCmp_MI; - def NAME#TYPE.POp.Letter#"i" : MxCmp_MI; - def NAME#TYPE.FOp.Letter#"i" : MxCmp_MI; - def NAME#TYPE.JOp.Letter#"i" : MxCmp_MI; + def NAME#TYPE.KOp.Letter#"i" : MxCmp_MI>; + def NAME#TYPE.QOp.Letter#"i" : MxCmp_MI>; + def NAME#TYPE.POp.Letter#"i" : MxCmp_MI>; + def NAME#TYPE.FOp.Letter#"i" : MxCmp_MI>; + def NAME#TYPE.JOp.Letter#"i" : MxCmp_MI>; } foreach S = [8, 16, 32] in { @@ -478,25 +521,31 @@ defm CMP32 : MMxCmp_MI; // EXT //===----------------------------------------------------------------------===// -def MxExtOpmode_wb : MxBead3Bits<0b010>; -def MxExtOpmode_lw : MxBead3Bits<0b011>; -def MxExtOpmode_lb : MxBead3Bits<0b111>; - /// --------------------------------------------------- /// F E D C B A 9 | 8 7 6 | 5 4 3 | 2 1 0 /// --------------------------------------------------- /// 0 1 0 0 1 0 0 | OPMODE | 0 0 0 | REG /// --------------------------------------------------- -class MxExtEncoding - : MxEncoding, MxBead3Bits<0b000>, OPMODE, - MxBead3Bits<0b100>, MxBead4Bits<0b0100>>; - let Defs = [CCR] in let Constraints = "$src = $dst" in class MxExt : MxInst<(outs TO.ROp:$dst), (ins TO.ROp:$src), - "ext."#TO.Prefix#"\t$src", [], - MxExtEncoding("MxExtOpmode_"#TO.Prefix#FROM.Prefix)>>; + "ext."#TO.Prefix#"\t$src", []> { + let Inst = (descend 0b0100100, + // OPMODE + !cond( + // byte -> word + !and(!eq(FROM.Size, 8), !eq(TO.Size, 16)): 0b010, + // word -> long + !and(!eq(FROM.Size, 16), !eq(TO.Size, 32)): 0b011, + // byte -> long + !and(!eq(FROM.Size, 8), !eq(TO.Size, 32)): 0b111 + ), + 0b000, + // REGISTER + (operand "$src", 3) + ); +} def EXT16 : MxExt; def EXT32 : MxExt; @@ -511,9 +560,6 @@ def : Pat<(sext_inreg i32:$src, i8), // DIV/MUL //===----------------------------------------------------------------------===// -def MxSDiMuOpmode : MxBead3Bits<0b111>; -def MxUDiMuOpmode : MxBead3Bits<0b011>; - /// Word operation: /// ---------------------------------------------------- /// F E D C | B A 9 | 8 7 6 | 5 4 3 | 2 1 0 @@ -521,40 +567,45 @@ def MxUDiMuOpmode : MxBead3Bits<0b011>; /// | | | EFFECTIVE ADDRESS /// x x x x | REG | OP MODE | MODE | REG /// ---------------------------------------------------- -class MxDiMuEncoding - : MxEncoding, CMD, - EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>; - let Defs = [CCR] in { let Constraints = "$src = $dst" in { -// $reg <- $reg op $reg -class MxDiMuOp_DD CMD, MxBead3Bits OPMODE, +// $dreg <- $dreg op $dreg +class MxDiMuOp_DD CMD, bit SIGNED = false, MxOperand DST, MxOperand OPD> - : MxInst<(outs DST:$dst), (ins DST:$src, OPD:$opd), MN#"\t$opd, $dst", [], - MxDiMuEncoding, OPMODE, MxEncEAd_2, MxExtEmpty>>; + : MxInst<(outs DST:$dst), (ins DST:$src, OPD:$opd), MN#"\t$opd, $dst", []> { + let Inst = (descend CMD, + // REGISTER + (operand "$dst", 3), + !if(SIGNED, 0b111, 0b011), + /*MODE*/0b000, /*REGISTER*/(operand "$opd", 3) + ); +} // $reg <- $reg op $imm -class MxDiMuOp_DI CMD, MxBead3Bits OPMODE, +class MxDiMuOp_DI CMD, bit SIGNED = false, MxOperand DST, MxOperand OPD> - : MxInst<(outs DST:$dst), (ins DST:$src, OPD:$opd), MN#"\t$opd, $dst", [], - MxDiMuEncoding, OPMODE, MxEncEAi, MxExtI16_2>>; + : MxInst<(outs DST:$dst), (ins DST:$src, OPD:$opd), MN#"\t$opd, $dst", []> { + // FIXME: Support immediates with different widths. + defvar ImmEnc = MxEncAddrMode_i<"opd", 16>; + let Inst = (ascend + (descend CMD, + // REGISTER + (operand "$dst", 3), + !if(SIGNED, 0b111, 0b011), ImmEnc.EA), + ImmEnc.Supplement + ); +} } // let Constraints } // Defs = [CCR] multiclass MxDiMuOp CMD, bit isComm = 0> { - let isCommutable = isComm in { - def "S"#NAME#"d32d16" : MxDiMuOp_DD; - def "U"#NAME#"d32d16" : MxDiMuOp_DD; + def "S"#NAME#"d32d16" : MxDiMuOp_DD; + def "U"#NAME#"d32d16" : MxDiMuOp_DD; } - def "S"#NAME#"d32i16" : MxDiMuOp_DI; - def "U"#NAME#"d32i16" : MxDiMuOp_DI; - + def "S"#NAME#"d32i16" : MxDiMuOp_DI; + def "U"#NAME#"d32i16" : MxDiMuOp_DI; } defm DIV : MxDiMuOp<"div", 0x8>; @@ -697,29 +748,35 @@ def : Pat<(mulhu i16:$dst, MximmSExt16:$opd), /// | | | EFFECTIVE ADDRESS /// 0 1 0 0 | x x x x | SIZE | MODE | REG /// ------------+------------+------+---------+--------- -class MxNEGEncoding - : MxEncoding, - EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>; - let Defs = [CCR] in { let Constraints = "$src = $dst" in { class MxNeg_D : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src), "neg."#TYPE.Prefix#"\t$dst", - [(set TYPE.VT:$dst, (ineg TYPE.VT:$src))], - MxNEGEncoding, - !cast("MxEncSize"#TYPE.Size), - MxEncEAd_0, MxExtEmpty>>; + [(set TYPE.VT:$dst, (ineg TYPE.VT:$src))]> { + let Inst = (descend 0b01000100, + /*SIZE*/!cast("MxNewEncSize"#TYPE.Size).Value, + //MODE without last bit + 0b00, + //REGISTER prefixed by D/A bit + (operand "$dst", 4) + ); +} let Uses = [CCR] in { class MxNegX_D : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src), "negx."#TYPE.Prefix#"\t$dst", - [(set TYPE.VT:$dst, (MxSubX 0, TYPE.VT:$src, CCR))], - MxNEGEncoding, - !cast("MxEncSize"#TYPE.Size), - MxEncEAd_0, MxExtEmpty>>; + [(set TYPE.VT:$dst, (MxSubX 0, TYPE.VT:$src, CCR))]> { + let Inst = (descend 0b01000000, + /*SIZE*/!cast("MxNewEncSize"#TYPE.Size).Value, + //MODE without last bit + 0b00, + //REGISTER prefixed by D/A bit + (operand "$dst", 4) + ); +} } } // let Constraints diff --git a/llvm/lib/Target/M68k/M68kInstrBits.td b/llvm/lib/Target/M68k/M68kInstrBits.td index 0d1278102378..abd2ab3cf012 100644 --- a/llvm/lib/Target/M68k/M68kInstrBits.td +++ b/llvm/lib/Target/M68k/M68kInstrBits.td @@ -32,9 +32,15 @@ /// ------------+---------+---------+---------+--------- /// 0 0 0 0 | REG | 1 0 0 | MODE | REG /// ------------+---------+---------+---------+--------- -class MxBTSTEnc_R - : MxEncoding, REG, MxBead4Bits<0b0000>, - EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>; +class MxBTSTEnc_R { + dag Value = (ascend + (descend 0b0000, + (operand "$"#bitno_name, 3), + 0b100, dst_enc.EA + ), + dst_enc.Supplement + ); +} /// -------------------------------+---------+--------- /// F E D C B A 9 8 . 7 6 | 5 4 3 | 2 1 0 @@ -43,33 +49,40 @@ class MxBTSTEnc_R /// ------------------------+------+---------+--------- /// 0 0 0 0 0 0 0 0 | BIT NUMBER /// ------------------------+-------------------------- -class MxBTSTEnc_I - : MxEncoding, - MxBead4Bits<0b1000>, MxBead4Bits<0b0000>, IMM, - EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>; +class MxBTSTEnc_I { + dag Value = (ascend + (descend 0b0000100000, dst_enc.EA), + (descend 0b00000000, (operand "$"#bitno_name, 8)), + dst_enc.Supplement + ); +} let Defs = [CCR] in { class MxBTST_RR : MxInst<(outs), (ins TYPE.ROp:$dst, TYPE.ROp:$bitno), "btst\t$bitno, $dst", - [(set CCR, (MxBtst TYPE.VT:$dst, TYPE.VT:$bitno))], - MxBTSTEnc_R, MxEncEAd_0, MxExtEmpty>>; + [(set CCR, (MxBtst TYPE.VT:$dst, TYPE.VT:$bitno))]> { + let Inst = MxBTSTEnc_R, "bitno">.Value; +} class MxBTST_RI : MxInst<(outs), (ins TYPE.ROp:$dst, TYPE.IOp:$bitno), "btst\t$bitno, $dst", - [(set CCR, (MxBtst TYPE.VT:$dst, TYPE.IPat:$bitno))], - MxBTSTEnc_I, MxEncEAd_0, MxExtEmpty>>; + [(set CCR, (MxBtst TYPE.VT:$dst, TYPE.IPat:$bitno))]> { + let Inst = MxBTSTEnc_I, "bitno">.Value; +} class MxBTST_MR + MxEncMemOp DST_ENC> : MxInst<(outs), (ins MEMOpd:$dst, TYPE.ROp:$bitno), "btst\t$bitno, $dst", - [(set CCR, (MxBtst (TYPE.Load MEMPat:$dst), TYPE.VT:$bitno))], - MxBTSTEnc_R, EA, EXT>>; + [(set CCR, (MxBtst (TYPE.Load MEMPat:$dst), TYPE.VT:$bitno))]> { + let Inst = MxBTSTEnc_R.Value; +} class MxBTST_MI + MxEncMemOp DST_ENC> : MxInst<(outs), (ins MEMOpd:$dst, TYPE.IOp:$bitno), "btst\t$bitno, $dst", - [(set CCR, (MxBtst (TYPE.Load MEMPat:$dst), TYPE.IPat:$bitno))], - MxBTSTEnc_I, EA, EXT>>; + [(set CCR, (MxBtst (TYPE.Load MEMPat:$dst), TYPE.IPat:$bitno))]> { + let Inst = MxBTSTEnc_I.Value; +} } // Defs = [CCR] // Register BTST limited to 32 bits only @@ -78,31 +91,31 @@ def BTST32di : MxBTST_RI; // Memory BTST limited to 8 bits only def BTST8jd : MxBTST_MR; + MxEncAddrMode_j<"dst">>; def BTST8od : MxBTST_MR; + MxEncAddrMode_o<"dst">>; def BTST8ed : MxBTST_MR; + MxEncAddrMode_e<"dst">>; def BTST8pd : MxBTST_MR; + MxEncAddrMode_p<"dst">>; def BTST8fd : MxBTST_MR; + MxEncAddrMode_f<"dst">>; def BTST8qd : MxBTST_MR; + MxEncAddrMode_q<"dst">>; def BTST8kd : MxBTST_MR; + MxEncAddrMode_k<"dst">>; def BTST8ji : MxBTST_MI; + MxEncAddrMode_j<"dst">>; def BTST8oi : MxBTST_MI; + MxEncAddrMode_o<"dst">>; def BTST8ei : MxBTST_MI; + MxEncAddrMode_e<"dst">>; def BTST8pi : MxBTST_MI; + MxEncAddrMode_p<"dst">>; def BTST8fi : MxBTST_MI; + MxEncAddrMode_f<"dst">>; def BTST8qi : MxBTST_MI; + MxEncAddrMode_q<"dst">>; def BTST8ki : MxBTST_MI; + MxEncAddrMode_k<"dst">>; diff --git a/llvm/lib/Target/M68k/M68kInstrControl.td b/llvm/lib/Target/M68k/M68kInstrControl.td index be9045b6e0d2..d15283c769f6 100644 --- a/llvm/lib/Target/M68k/M68kInstrControl.td +++ b/llvm/lib/Target/M68k/M68kInstrControl.td @@ -12,10 +12,10 @@ /// /// Machine: /// -/// BRA [x] BSR [ ] Bcc [ ] DBcc [ ] FBcc [ ] +/// BRA [x] BSR [ ] Bcc [~] DBcc [ ] FBcc [ ] /// FDBcc [ ] FNOP [ ] FPn [ ] FScc [ ] FTST [ ] /// JMP [~] JSR [x] NOP [x] RTD [!] RTR [ ] -/// RTS [x] Scc [x] TST [ ] +/// RTS [x] Scc [~] TST [ ] /// /// Pseudo: /// @@ -43,7 +43,9 @@ //===----------------------------------------------------------------------===// let hasSideEffects = 0 in { - def NOP : MxInst<(outs), (ins), "nop", [], MxEncFixed<0x4E71>>; + def NOP : MxInst<(outs), (ins), "nop", []> { + let Inst = (descend 0b0100, 0b1110, 0b0111, 0b0001); + } } @@ -61,51 +63,60 @@ let hasSideEffects = 0 in { /// NE—Not equal VS—Overflow set /// /// *Not applicable to the Bcc instructions. -def MxCCt : MxBead4Bits<0b0000>; -def MxCCf : MxBead4Bits<0b0001>; -def MxCChi : MxBead4Bits<0b0010>; -def MxCCls : MxBead4Bits<0b0011>; -def MxCCcc : MxBead4Bits<0b0100>; -def MxCCcs : MxBead4Bits<0b0101>; -def MxCCne : MxBead4Bits<0b0110>; -def MxCCeq : MxBead4Bits<0b0111>; -def MxCCvc : MxBead4Bits<0b1000>; -def MxCCvs : MxBead4Bits<0b1001>; -def MxCCpl : MxBead4Bits<0b1010>; -def MxCCmi : MxBead4Bits<0b1011>; -def MxCCge : MxBead4Bits<0b1100>; -def MxCClt : MxBead4Bits<0b1101>; -def MxCCgt : MxBead4Bits<0b1110>; -def MxCCle : MxBead4Bits<0b1111>; +class MxEncCondOp cond> { + dag Value = (descend cond); +} + +def MxCCt : MxEncCondOp<0b0000>; +def MxCCf : MxEncCondOp<0b0001>; +def MxCChi : MxEncCondOp<0b0010>; +def MxCCls : MxEncCondOp<0b0011>; +def MxCCcc : MxEncCondOp<0b0100>; +def MxCCcs : MxEncCondOp<0b0101>; +def MxCCne : MxEncCondOp<0b0110>; +def MxCCeq : MxEncCondOp<0b0111>; +def MxCCvc : MxEncCondOp<0b1000>; +def MxCCvs : MxEncCondOp<0b1001>; +def MxCCpl : MxEncCondOp<0b1010>; +def MxCCmi : MxEncCondOp<0b1011>; +def MxCCge : MxEncCondOp<0b1100>; +def MxCClt : MxEncCondOp<0b1101>; +def MxCCgt : MxEncCondOp<0b1110>; +def MxCCle : MxEncCondOp<0b1111>; + + /// --------------------------------+---------+--------- /// F E D C | B A 9 8 | 7 6 | 5 4 3 | 2 1 0 /// --------------------------------+---------+--------- /// 0 1 0 1 | CONDITION | 1 1 | MODE | REG /// ---------------------------------------------------- -class MxSccEncoding - : MxEncoding, CC, MxBead4Bits<0b0101>, - EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>; let Uses = [CCR] in { class MxSccR : MxInst<(outs MxDRD8:$dst), (ins), "s"#CC#"\t$dst", - [(set i8:$dst, (MxSetCC !cast("MxCOND"#CC), CCR))], - MxSccEncoding("MxCC"#CC)>>; + [(set i8:$dst, (MxSetCC !cast("MxCOND"#CC), CCR))]> { + let Inst = (descend 0b0101, !cast("MxCC"#CC).Value, 0b11, + /*MODE without last bit*/0b00, + /*REGISTER prefixed with D/A bit*/(operand "$dst", 4)); +} -class MxSccM +class MxSccM : MxInst<(outs), (ins MEMOpd:$dst), "s"#CC#"\t$dst", - [(store (MxSetCC !cast("MxCOND"#CC), CCR), MEMPat:$dst)], - MxSccEncoding("MxCC"#CC)>>; + [(store (MxSetCC !cast("MxCOND"#CC), CCR), MEMPat:$dst)]> { + let Inst = + (ascend + (descend 0b0101, !cast("MxCC"#CC).Value, 0b11, DST_ENC.EA), + DST_ENC.Supplement + ); +} } foreach cc = [ "cc", "ls", "lt", "eq", "mi", "f", "ne", "ge", "cs", "pl", "gt", "t", "hi", "vc", "le", "vs"] in { def SET#"d8"#cc : MxSccR; -def SET#"j8"#cc : MxSccM; -def SET#"p8"#cc : MxSccM; +def SET#"j8"#cc : MxSccM>; +def SET#"p8"#cc : MxSccM>; } //===----------------------------------------------------------------------===// @@ -118,13 +129,16 @@ def SET#"p8"#cc : MxSccM; /// 0 1 0 0 1 1 1 0 1 1 | MODE | REG ///------------------------------+---------+--------- let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in -class MxJMP - : MxInst<(outs), (ins LOCOp:$dst), "jmp\t$dst", [(brind iPTR:$dst)], - MxEncoding, - MxBead4Bits<0b1110>, MxBead4Bits<0b0100>, - EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>>; +class MxJMP + : MxInst<(outs), (ins LOCOp:$dst), "jmp\t$dst", [(brind iPTR:$dst)]> { + let Inst = + (ascend + (descend 0b0100, 0b1110, 0b11, DST_ENC.EA), + DST_ENC.Supplement + ); +} -def JMP32j : MxJMP; +def JMP32j : MxJMP>; // FIXME Support 16 bit indirect jump. @@ -147,20 +161,35 @@ def JMP32j : MxJMP; /// 32-BIT DISPLACEMENT IF 8-BIT DISPLACEMENT = $FF /// -------------------------------------------------- let isBranch = 1, isTerminator = 1, Uses = [CCR] in -class MxBcc - : MxInst<(outs), (ins TARGET:$dst), "b"#cc#"\t$dst", [], ENC>; +class MxBcc + : MxInst<(outs), (ins TARGET:$dst), "b"#cc#"\t$dst", []> { + // FIXME: If we want to avoid supplying disp_16_32 with empty + // (ascend) for 16/32 bits variants, we can use conditional + // bang operator like this: + // ``` + // class MxBcc + // ... + // let Inst = !cond( + // !eq(SIZE, 8): /* encoding for Bcc8 */ + // !eq(SIZE, 16): /* encoding for Bcc16 */ + // !eq(SIZE, 32): /* encoding for Bcc32 */ + // ); + let Inst = + (ascend + (descend 0b0110, !cast("MxCC"#cc).Value, disp_8), + disp_16_32 + ); +} foreach cc = [ "cc", "ls", "lt", "eq", "mi", "ne", "ge", "cs", "pl", "gt", "hi", "vc", "le", "vs"] in { def B#cc#"8" : MxBcc, - !cast("MxCC"#cc), MxBead4Bits<0x6>>>; + (operand "$dst", 8, (encoder "encodePCRelImm<8>")), (ascend)>; + def B#cc#"16" - : MxBcc, - MxBead4Bits<0x0>, !cast("MxCC"#cc), - MxBead4Bits<0x6>, MxBead16Imm<0>>>; + : MxBcc"))>; } foreach cc = [ "cc", "ls", "lt", "eq", "mi", "ne", "ge", @@ -178,17 +207,21 @@ def : Pat<(MxBrCond bb:$target, !cast("MxCOND"#cc), CCR), /// ------------------------------------------------- /// 32-BIT DISPLACEMENT IF 8-BIT DISPLACEMENT = $FF /// ------------------------------------------------- -let isBranch = 1, isTerminator = 1, isBarrier=1 in -class MxBra - : MxInst<(outs), (ins TARGET:$dst), "bra\t$dst", [], ENC>; +let isBranch = 1, isTerminator = 1, isBarrier = 1 in +class MxBra + : MxInst<(outs), (ins TARGET:$dst), "bra\t$dst", []> { + let Inst = + (ascend + (descend 0b0110, 0b0000, disp_8), + disp_16_32 + ); +} def BRA8 : MxBra, MxBead4Bits<0x0>, - MxBead4Bits<0x6>>>; -def BRA16 : MxBra, MxBead4Bits<0x0>, - MxBead4Bits<0x0>, MxBead4Bits<0x6>, - MxBead16Imm<0>>>; + (operand "$dst", 8, (encoder "encodePCRelImm<8>")), (ascend)>; + +def BRA16 : MxBra"))>; def : Pat<(br bb:$target), (BRA8 MxBrTarget8:$target)>; @@ -208,16 +241,19 @@ let isCall = 1 in ///------------------------------+---------+--------- /// 0 1 0 0 1 1 1 0 1 0 | MODE | REG ///------------------------------+---------+--------- -class MxCall - : MxInst<(outs), (ins LOCOp:$dst), "jsr\t$dst", [], - MxEncoding, - MxBead4Bits<0b1110>, MxBead4Bits<0b0100>, - EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>>; +class MxCall + : MxInst<(outs), (ins LOCOp:$dst), "jsr\t$dst", []> { + let Inst = + (ascend + (descend 0b0100, 0b1110, 0b10, DST_ENC.EA), + DST_ENC.Supplement + ); +} -def CALLk : MxCall; -def CALLq : MxCall; -def CALLb : MxCall; -def CALLj : MxCall; +def CALLk : MxCall>; +def CALLq : MxCall>; +def CALLb : MxCall>; +def CALLj : MxCall>; multiclass CallPat { let Predicates = [pred] in { @@ -261,7 +297,9 @@ def TAILJMPj : MxPseudo<(outs), (ins MxARI32_TC:$dst)>; let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { -def RTS : MxInst<(outs), (ins), "rts", [], MxEncFixed<0x4E75>>; +def RTS : MxInst<(outs), (ins), "rts", []> { + let Inst = (descend 0b0100, 0b1110, 0b0111, 0b0101); +} let isCodeGenOnly = 1 in def RET : MxPseudo<(outs), (ins i32imm:$adj, variable_ops), diff --git a/llvm/lib/Target/M68k/M68kInstrData.td b/llvm/lib/Target/M68k/M68kInstrData.td index 3dd5d9f8c7ac..863432b94005 100644 --- a/llvm/lib/Target/M68k/M68kInstrData.td +++ b/llvm/lib/Target/M68k/M68kInstrData.td @@ -42,290 +42,192 @@ /// ----------------------------------------------------- /// /// NOTE Move requires EA X version for direct register destination(0) -class MxMoveEncoding - : MxEncoding, - srcExt.Imm, srcExt.B8, srcExt.Scale, srcExt.WL, srcExt.DAReg, - dstExt.Imm, dstExt.B8, dstExt.Scale, dstExt.WL, dstExt.DAReg>; - -/// MOVE has alternate size encoding -class MxMoveSize value> : MxBead2Bits; + +// MOVE has a different size encoding. +class MxMoveSize value> { + bits<2> Value = value; +} def MxMoveSize8 : MxMoveSize<0b01>; def MxMoveSize16 : MxMoveSize<0b11>; def MxMoveSize32 : MxMoveSize<0b10>; -let Defs = [CCR] in -class MxMove pattern, MxEncoding enc> - : MxInst; - -class MxMove_RR - : MxMove; - -let mayStore = 1 in { -class MxMove_MR - : MxMove; - -class MxMove_MI - : MxMove; -} // let mayStore = 1 - -class MxMove_RI - : MxMove; - - -let mayLoad = 1 in -class MxMove_RM - : MxMove>; - -multiclass MMxMove_RM { - - // REG <- (An)+ - def NAME#REG.OOp.Letter#REG.Postfix : MxMove_RM; - - // REG <- -(An) - def NAME#REG.EOp.Letter#REG.Postfix : MxMove_RM; - - // REG <- (i,PC,Xn) - def NAME#REG.KOp.Letter#REG.Postfix : MxMove_RM; - - // REG <- (i,PC) - def NAME#REG.QOp.Letter#REG.Postfix : MxMove_RM; - - // REG <- (i,An,Xn) - def NAME#REG.FOp.Letter#REG.Postfix : MxMove_RM; - - // REG <- (i,An) - def NAME#REG.POp.Letter#REG.Postfix : MxMove_RM; - - // REG <- (ABS) - def NAME#REG.BOp.Letter#REG.Postfix : MxMove_RM; - - // REG <- (An) - def NAME#REG.JOp.Letter#REG.Postfix : MxMove_RM; +class MxMoveEncoding { + dag Value = (ascend + (descend 0b00, size.Value, + !cond( + !eq(!getdagop(dst_enc.EA), descend): !setdagop(dst_enc.EA, ascend), + !eq(!getdagop(dst_enc.EA), ascend): !setdagop(dst_enc.EA, descend)), + src_enc.EA), + // Source extension + src_enc.Supplement, + // Destination extension + dst_enc.Supplement + ); } -let mayLoad = 1, mayStore = 1 in { -class MxMove_MM - : MxMove>; -} // let mayLoad = 1, mayStore = 1 - -multiclass MMxMove_MM { - - // MEM <- (An)+ - def NAME#TYPE.OOp.Letter#TYPE.Postfix - : MxMove_MM; - - // MEM <- -(An) - def NAME#TYPE.EOp.Letter#TYPE.Postfix - : MxMove_MM; - - // MEM <- (i,An) - def NAME#TYPE.POp.Letter#TYPE.Postfix - : MxMove_MM; - - // MEM <- (i,An,Xn) - def NAME#TYPE.FOp.Letter#TYPE.Postfix - : MxMove_MM; - - // MEM <- (i,PC,Xn) - def NAME#TYPE.KOp.Letter#TYPE.Postfix - : MxMove_MM; - - // MEM <- (i,PC) - def NAME#TYPE.QOp.Letter#TYPE.Postfix - : MxMove_MM; - - // MEM <- (ABS) - def NAME#TYPE.BOp.Letter#TYPE.Postfix - : MxMove_MM; - - // MEM <- (An) - def NAME#TYPE.JOp.Letter#TYPE.Postfix - : MxMove_MM; +// Special encoding for Xn +class MxMoveEncAddrMode_r : MxEncMemOp { + let EA = (descend (descend 0b00, (slice "$"#reg_opnd, 3, 3)), + (operand "$"#reg_opnd, 3)); } -def MOV8dd - : MxMove_RR>; +// TODO: Generalize and adopt this utility in other .td files as well. +multiclass MxMoveOperandEncodings { + // Dn + def MxMove#NAME#OpEnc_d : MxEncAddrMode_d; + // An + def MxMove#NAME#OpEnc_a : MxEncAddrMode_a; + // Xn + def MxMove#NAME#OpEnc_r : MxMoveEncAddrMode_r; + // (An)+ + def MxMove#NAME#OpEnc_o : MxEncAddrMode_o; + // -(An) + def MxMove#NAME#OpEnc_e : MxEncAddrMode_e; + // (i,PC,Xn) + def MxMove#NAME#OpEnc_k : MxEncAddrMode_k; + // (i,PC) + def MxMove#NAME#OpEnc_q : MxEncAddrMode_q; + // (i,An,Xn) + def MxMove#NAME#OpEnc_f : MxEncAddrMode_f; + // (i,An) + def MxMove#NAME#OpEnc_p : MxEncAddrMode_p; + // (ABS).L + def MxMove#NAME#OpEnc_b : MxEncAddrMode_abs; + // (An) + def MxMove#NAME#OpEnc_j : MxEncAddrMode_j; +} -// M <- R -def MOV8fd : MxMove_MR>; +defm Src : MxMoveOperandEncodings<"src">; +defm Dst : MxMoveOperandEncodings<"dst">; -def MOV8pd : MxMove_MR>; +defvar MxMoveSupportedAMs = ["o", "e", "k", "q", "f", "p", "b", "j"]; -def MOV8ed : MxMove_MR>; +let Defs = [CCR] in +class MxMove pattern, MxMoveEncoding enc> + : MxInst { + let Inst = enc.Value; +} -def MOV8od : MxMove_MR>; +// R <- R +class MxMove_RR("MxOp"#TYPE.Size#"AddrMode_"#DST_REG), + MxOpBundle SRC = !cast("MxOp"#TYPE.Size#"AddrMode_"#SRC_REG)> + : MxMove; -def MOV8bd : MxMove_MR>; +foreach DST_REG = ["r", "a"] in { + foreach SRC_REG = ["r", "a"] in + foreach TYPE = [MxType16, MxType32] in + def MOV # TYPE.Size # DST_REG # SRC_REG # TYPE.Postfix + : MxMove_RR("MxMoveSize"#TYPE.Size), + !cast("MxMoveDstOpEnc_"#DST_REG), + !cast("MxMoveSrcOpEnc_"#SRC_REG)>>; +} // foreach DST_REG +foreach TYPE = [MxType8, MxType16, MxType32] in +def MOV # TYPE.Size # dd # TYPE.Postfix + : MxMove_RR("MxMoveSize"#TYPE.Size), + MxMoveDstOpEnc_d, MxMoveSrcOpEnc_d>>; -def MOV8jd : MxMove_MR>; +// M <- R +let mayStore = 1 in { +class MxMove_MR("MxOp"#TYPE.Size#"AddrMode_"#SRC_REG)> + : MxMove; + +class MxMove_MI("MxOp"#TYPE.Size#"AddrMode_i")> + : MxMove; +} // let mayStore = 1 +foreach REG = ["r", "a", "d"] in +foreach AM = MxMoveSupportedAMs in { + foreach TYPE = !if(!eq(REG, "d"), [MxType8, MxType16, MxType32], [MxType16, MxType32]) in + def MOV # TYPE.Size # AM # REG # TYPE.Postfix + : MxMove_MR("MxOp"#TYPE.Size#"AddrMode_"#AM), REG, + MxMoveEncoding("MxMoveSize"#TYPE.Size), + !cast("MxMoveDstOpEnc_"#AM), + !cast("MxMoveSrcOpEnc_"#REG)>>; +} // foreach AM + +foreach AM = MxMoveSupportedAMs in { + foreach TYPE = [MxType8, MxType16, MxType32] in + def MOV # TYPE.Size # AM # i # TYPE.Postfix + : MxMove_MI("MxOp"#TYPE.Size#"AddrMode_"#AM), + MxMoveEncoding("MxMoveSize"#TYPE.Size), + !cast("MxMoveDstOpEnc_"#AM), + MxEncAddrMode_i<"src", TYPE.Size>>>; +} // foreach AM // R <- I -def MOV8di : MxMove_RI>; - -foreach S = [16, 32] in { - foreach D = [ "r", "a" ] in { - - foreach O = [ "r", "a" ] in { - def MOV#S#D#O : MxMove_RR< - !cast("MxType"#S#D), - !cast("MxType"#S#O), - MxMoveEncoding("MxMoveSize"#S), - !cast("MxEncEA"#D#"_1"), MxExtEmpty, - !cast("MxEncEA"#D#"_0_reflected"), MxExtEmpty>>; - } - - // M <- R - def MOV#S#"f"#D : MxMove_MR< - !cast("MxType"#S).FOp, - !cast("MxType"#S).FPat, - !cast("MxType"#S#D), - MxMoveEncoding("MxMoveSize"#S), - !cast("MxEncEA"#D#"_1"), MxExtEmpty, - MxEncEAf_0, MxExtBrief_0>>; - - def MOV#S#"p"#D : MxMove_MR< - !cast("MxType"#S).POp, - !cast("MxType"#S).PPat, - !cast("MxType"#S#D), - MxMoveEncoding("MxMoveSize"#S), - !cast("MxEncEA"#D#"_1"), MxExtEmpty, - MxEncEAp_0, MxExtI16_0>>; - - def MOV#S#"e"#D : MxMove_MR< - !cast("MxType"#S).EOp, - !cast("MxType"#S).EPat, - !cast("MxType"#S#D), - MxMoveEncoding("MxMoveSize"#S), - !cast("MxEncEA"#D#"_1"), MxExtEmpty, - MxEncEAe_0, MxExtEmpty>>; - - def MOV#S#"o"#D : MxMove_MR< - !cast("MxType"#S).OOp, - !cast("MxType"#S).OPat, - !cast("MxType"#S#D), - MxMoveEncoding("MxMoveSize"#S), - !cast("MxEncEA"#D#"_1"), MxExtEmpty, - MxEncEAo_0, MxExtEmpty>>; - - def MOV#S#"b"#D : MxMove_MR< - !cast("MxType"#S).BOp, - !cast("MxType"#S).BPat, - !cast("MxType"#S#D), - MxMoveEncoding("MxMoveSize"#S), - !cast("MxEncEA"#D#"_1"), MxExtEmpty, - MxEncEAb, MxExtI32_0>>; - - def MOV#S#"j"#D : MxMove_MR< - !cast("MxType"#S).JOp, - !cast("MxType"#S).JPat, - !cast("MxType"#S#D), - MxMoveEncoding("MxMoveSize"#S), - !cast("MxEncEA"#D#"_1"), MxExtEmpty, - MxEncEAj_0, MxExtEmpty>>; - - - // R <- I - def MOV#S#D#"i" : MxMove_RI< - !cast("MxType"#S#D), - MxMoveEncoding("MxMoveSize"#S), - MxEncEAi, !cast("MxExtI"#S#"_1"), - !cast("MxEncEA"#D#"_0_reflected"), MxExtEmpty>>; - } -} +class MxMove_RI("MxOp"#TYPE.Size#"AddrMode_i"), + MxOpBundle DST = !cast("MxOp"#TYPE.Size#"AddrMode_"#DST_REG)> + : MxMove; + +foreach REG = ["r", "a", "d"] in { + foreach TYPE = !if(!eq(REG, "d"), [MxType8, MxType16, MxType32], [MxType16, MxType32]) in + def MOV # TYPE.Size # REG # i # TYPE.Postfix + : MxMove_RI("MxMoveSize"#TYPE.Size), + !cast("MxMoveDstOpEnc_"#REG), + MxEncAddrMode_i<"src", TYPE.Size>>>; +} // foreach REG -// M <- I -foreach S = [8, 16, 32] in { - def MOV#S#"f"#"i" : MxMove_MI< - !cast("MxType"#S).FOp, - !cast("MxType"#S).FPat, - !cast("MxType"#S), - MxMoveEncoding("MxMoveSize"#S), - MxEncEAi, !cast("MxExtI"#S#"_1"), - MxEncEAf_0, MxExtBrief_0>>; - - def MOV#S#"p"#"i" : MxMove_MI< - !cast("MxType"#S).POp, - !cast("MxType"#S).PPat, - !cast("MxType"#S), - MxMoveEncoding("MxMoveSize"#S), - MxEncEAi, !cast("MxExtI"#S#"_1"), - MxEncEAp_0, MxExtI16_0>>; - - def MOV#S#"b"#"i" : MxMove_MI< - !cast("MxType"#S).BOp, - !cast("MxType"#S).BPat, - !cast("MxType"#S), - MxMoveEncoding("MxMoveSize"#S), - MxEncEAi, !cast("MxExtI"#S#"_1"), - MxEncEAb, MxExtI32_0>>; - - def MOV#S#"j"#"i" : MxMove_MI< - !cast("MxType"#S).JOp, - !cast("MxType"#S).JPat, - !cast("MxType"#S), - MxMoveEncoding("MxMoveSize"#S), - MxEncEAi, !cast("MxExtI"#S#"_1"), - MxEncEAj_0, MxExtEmpty>>; -} +// R <- M +let mayLoad = 1 in +class MxMove_RM("MxMoveSize"#TYPE.Size), + MxOpBundle DST = !cast("MxOp"#TYPE.Size#"AddrMode_"#DST_REG), + MxEncMemOp DST_ENC = !cast("MxMoveDstOpEnc_"#DST_REG)> + : MxMove>; + +foreach REG = ["r", "a", "d"] in +foreach AM = MxMoveSupportedAMs in { + foreach TYPE = !if(!eq(REG, "d"), [MxType8, MxType16, MxType32], [MxType16, MxType32]) in + def MOV # TYPE.Size # REG # AM # TYPE.Postfix + : MxMove_RM("MxOp"#TYPE.Size#"AddrMode_"#AM), + !cast("MxMoveSrcOpEnc_"#AM)>; +} // foreach AM + +// Tail call version +let Pattern = [(null_frag)] in { + foreach REG = ["r", "a"] in + foreach AM = MxMoveSupportedAMs in { + foreach TYPE = [MxType16, MxType32] in + def MOV # TYPE.Size # REG # AM # _TC + : MxMove_RM("MxOp"#TYPE.Size#"AddrMode_"#AM), + !cast("MxMoveSrcOpEnc_"#AM)> { + let isCodeGenOnly = true; + } + } // foreach AM +} // let Pattern + +let mayLoad = 1, mayStore = 1 in +class MxMove_MM + : MxMove("MxMoveSize"#TYPE.Size), + DST_ENC, SRC_ENC>>; + +foreach DST_AM = MxMoveSupportedAMs in +foreach SRC_AM = MxMoveSupportedAMs in { + foreach TYPE = [MxType8, MxType16, MxType32] in + def MOV # TYPE.Size # DST_AM # SRC_AM # TYPE.Postfix + : MxMove_MM("MxOp"#TYPE.Size#"AddrMode_"#DST_AM), + !cast("MxOp"#TYPE.Size#"AddrMode_"#SRC_AM), + !cast("MxMoveDstOpEnc_"#DST_AM), + !cast("MxMoveSrcOpEnc_"#SRC_AM)>; +} // foreach SRC_AM // Store ABS(basically pointer) as Immdiate to Mem def : Pat<(store MxType32.BPat :$src, MxType32.PPat :$dst), @@ -340,66 +242,6 @@ def : Pat<(store MxType32.BPat :$src, MxType32.BPat :$dst), def : Pat<(store MxType32.BPat :$src, MxType32.JPat :$dst), (MOV32ji MxType32.JOp :$dst, MxType32.IOp :$src)>; -// R <- M -defm MOV8d : MMxMove_RM; - -defm MOV16r : MMxMove_RM; -defm MOV16a : MMxMove_RM; - -defm MOV32r : MMxMove_RM; -defm MOV32a : MMxMove_RM; - -let Pattern = [(null_frag)] in { -defm MOV16r : MMxMove_RM; -defm MOV16a : MMxMove_RM; - -defm MOV32r : MMxMove_RM; -defm MOV32a : MMxMove_RM; -} // Pattern - -// M <- M -defm MOV8p : MMxMove_MM; -defm MOV16p : MMxMove_MM; -defm MOV32p : MMxMove_MM; - -defm MOV8f : MMxMove_MM; -defm MOV16f : MMxMove_MM; -defm MOV32f : MMxMove_MM; - -defm MOV8b : MMxMove_MM; -defm MOV16b : MMxMove_MM; -defm MOV32b : MMxMove_MM; - -defm MOV8e : MMxMove_MM; -defm MOV16e : MMxMove_MM; -defm MOV32e : MMxMove_MM; - -defm MOV8o : MMxMove_MM; -defm MOV16o : MMxMove_MM; -defm MOV32o : MMxMove_MM; - -defm MOV8j : MMxMove_MM; -defm MOV16j : MMxMove_MM; -defm MOV32j : MMxMove_MM; - //===----------------------------------------------------------------------===// // MOVEM // @@ -407,12 +249,12 @@ defm MOV32j : MMxMove_MM; -def MxMOVEM_RM : MxBead1Bit<1>; +defvar MxMOVEM_MR = false; +defvar MxMOVEM_RM = true; // Size -def MxMOVEM_W : MxBead1Bit<0>; -def MxMOVEM_L : MxBead1Bit<1>; +defvar MxMOVEM_W = false; +defvar MxMOVEM_L = true; /// ---------------+-------------+-------------+--------- /// F E D C B | A | 9 8 7 | 6 | 5 4 3 | 2 1 0 @@ -423,31 +265,47 @@ def MxMOVEM_L : MxBead1Bit<1>; /// ----------------------------------------------------- /// D - direction(RM,MR) /// S - size(W,L) -class MxMOVEMEncoding - : MxEncoding, DIR, - MxBead1Bit<1>, MxBead4Bits<0b0100>, IMM, - EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>; +class MxMOVEMEncoding { + dag Value = (ascend + (descend 0b01001, direction, 0b001, size, opnd_enc.EA), + // Mask + (operand "$"#mask_op_name, 16), + opnd_enc.Supplement + ); +} let mayStore = 1 in -class MxMOVEM_MR +class MxMOVEM_MR : MxInst<(outs), (ins MEMOp:$dst, MxMoveMask:$mask), - "movem."#TYPE.Prefix#"\t$mask, $dst", [], - MxMOVEMEncoding>>; + "movem."#TYPE.Prefix#"\t$mask, $dst", []> { + let Inst = MxMOVEMEncoding.Value; +} + +foreach AM = MxMoveSupportedAMs in { + foreach TYPE = [MxType16, MxType32] in + def MOVM # TYPE.Size # AM # m # TYPE.Postfix + : MxMOVEM_MR("MxOp"#TYPE.Size#"AddrMode_"#AM).Op, + !cast("MxMoveDstOpEnc_"#AM)>; +} // foreach AM let mayLoad = 1 in -class MxMOVEM_RM +class MxMOVEM_RM : MxInst<(outs), (ins MxMoveMask:$mask, MEMOp:$src), - "movem."#TYPE.Prefix#"\t$src, $mask", [], - MxMOVEMEncoding>>; - -def MOVM32jm : MxMOVEM_MR; -def MOVM32pm : MxMOVEM_MR; + "movem."#TYPE.Prefix#"\t$src, $mask", []> { + let Inst = MxMOVEMEncoding.Value; +} -def MOVM32mj : MxMOVEM_RM; -def MOVM32mp : MxMOVEM_RM; +foreach AM = MxMoveSupportedAMs in { + foreach TYPE = [MxType16, MxType32] in + def MOVM # TYPE.Size # m # AM # TYPE.Postfix + : MxMOVEM_RM("MxOp"#TYPE.Size#"AddrMode_"#AM).Op, + !cast("MxMoveSrcOpEnc_"#AM)>; +} // foreach AM // Pseudo versions. These a required by virtual register spill/restore since // the mask requires real register to encode. These instruction will be expanded @@ -495,21 +353,27 @@ def MOVM32mp_P : MxMOVEM_RM_Pseudo; /// 0 1 0 0 0 1 0 0 1 1 | MODE | REG /// -------------------------------------------------- let Defs = [CCR] in -class MxMoveToCCR - : MxInst<(outs CCRC:$dst), INS, "move.w\t$src, $dst", [], - MxEncoding, MxBead4Bits<0b0001>, MxBead2Bits<0b01>, - EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>>; +class MxMoveToCCR + : MxInst<(outs CCRC:$dst), (ins MEMOp:$src), "move.w\t$src, $dst", []> { + let Inst = (ascend + (descend 0b0100010011, SRC_ENC.EA), + SRC_ENC.Supplement + ); +} -class MxMoveToCCRPseudo : MxPseudo<(outs CCRC:$dst), INS>; +class MxMoveToCCRPseudo + : MxPseudo<(outs CCRC:$dst), (ins MEMOp:$src)>; -let mayLoad = 1 in { -def MOV16cp : MxMoveToCCR<(ins MxType16d.POp:$src), MxEncEAp_1, MxExtI16_1>; -def MOV8cp : MxMoveToCCRPseudo<(ins MxType8d.POp:$src)>; -} // let mayLoad = 1 +let mayLoad = 1 in +foreach AM = MxMoveSupportedAMs in { + def MOV16c # AM : MxMoveToCCR("MxOp16AddrMode_"#AM).Op, + !cast("MxMoveSrcOpEnc_"#AM)>; + def MOV8c # AM : MxMoveToCCRPseudo("MxOp8AddrMode_"#AM).Op>; +} // foreach AM -def MOV16cd : MxMoveToCCR<(ins MxType16d.ROp:$src), MxEncEAd_1, MxExtEmpty>; -def MOV8cd : MxMoveToCCRPseudo<(ins MxType8d.ROp:$src)>; +// Only data register is allowed. +def MOV16cd : MxMoveToCCR; +def MOV8cd : MxMoveToCCRPseudo; /// Move from CCR /// -------------------------------------------------- @@ -518,27 +382,38 @@ def MOV8cd : MxMoveToCCRPseudo<(ins MxType8d.ROp:$src)>; /// | EFFECTIVE ADDRESS /// 0 1 0 0 0 0 1 0 1 1 | MODE | REG /// -------------------------------------------------- -let Uses = [CCR] in -class MxMoveFromCCR - : MxInst, MxBead4Bits<0b0000>, MxBead2Bits<0b01>, - EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>>, - Requires<[ IsM68010 ]>; - -class MxMoveFromCCRPseudo : MxPseudo<(outs), INS>; - -let mayStore = 1 in { -def MOV16pc - : MxMoveFromCCR<(outs), (ins MxType16d.POp:$dst, CCRC:$src), MxEncEAp_0, MxExtI16_0>; -def MOV8pc : MxMoveFromCCRPseudo<(ins MxType8d.POp:$dst, CCRC:$src)>; -} // let mayStore = 1 +let Uses = [CCR] in { +class MxMoveFromCCR_R + : MxInst<(outs MxDRD16:$dst), (ins CCRC:$src), "move.w\t$src, $dst", []>, + Requires<[ IsM68010 ]> { + let Inst = (descend 0b0100001011, MxEncAddrMode_d<"dst">.EA); +} -def MOV16dc - : MxMoveFromCCR<(outs MxType16d.ROp:$dst), (ins CCRC:$src), MxEncEAd_0, MxExtEmpty>; +class MxMoveFromCCR_M + : MxInst<(outs), (ins MEMOp:$dst, CCRC:$src), "move.w\t$src, $dst", []>, + Requires<[ IsM68010 ]> { + let Inst = (ascend + (descend 0b0100001011, DST_ENC.EA), + DST_ENC.Supplement + ); +} -def MOV8dc : MxMoveFromCCRPseudo<(ins MxType8d.ROp:$dst, CCRC:$src)>; +class MxMoveFromCCRPseudo + : MxPseudo<(outs), (ins MEMOp:$dst, CCRC:$src)>; +} // let Uses = [CCR] +let mayStore = 1 in +foreach AM = MxMoveSupportedAMs in { + def MOV16 # AM # c + : MxMoveFromCCR_M("MxOp16AddrMode_"#AM).Op, + !cast("MxMoveDstOpEnc_"#AM)>; + def MOV8 # AM # c + : MxMoveFromCCRPseudo("MxOp8AddrMode_"#AM).Op>; +} // foreach AM + +// Only data register is allowed. +def MOV16dc : MxMoveFromCCR_R; +def MOV8dc : MxMoveFromCCRPseudo; //===----------------------------------------------------------------------===// // LEA @@ -549,18 +424,18 @@ def MOV8dc : MxMoveFromCCRPseudo<(ins MxType8d.ROp:$dst, CCRC:$src)>; /// ---------------------------------------------------- /// 0 1 0 0 | DST REG | 1 1 1 | MODE | REG /// ---------------------------------------------------- -class MxLEA - : MxInst<(outs MxARD32:$dst), (ins SRCOpd:$src), - "lea\t$src, $dst", [(set i32:$dst, SRCPat:$src)], - MxEncoding, MxBeadReg<0>, MxBead4Bits<0x4>, - EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>>; - -def LEA32p : MxLEA; -def LEA32f : MxLEA; -def LEA32q : MxLEA; -def LEA32b : MxLEA; +class MxLEA + : MxInst<(outs MxARD32:$dst), (ins SRC.Op:$src), + "lea\t$src, $dst", [(set i32:$dst, SRC.Pat:$src)]> { + let Inst = (ascend + (descend 0b0100, (operand "$dst", 3), 0b111, SRC_ENC.EA), + SRC_ENC.Supplement + ); +} +foreach AM = ["p", "f", "b", "q", "k"] in +def LEA32 # AM : MxLEA("MxOp32AddrMode_"#AM), + !cast("MxMoveSrcOpEnc_"#AM)>; //===----------------------------------------------------------------------===// // Pseudos diff --git a/llvm/lib/Target/M68k/M68kInstrFormats.td b/llvm/lib/Target/M68k/M68kInstrFormats.td index 7e0c96a5b1f6..78aed521f13a 100644 --- a/llvm/lib/Target/M68k/M68kInstrFormats.td +++ b/llvm/lib/Target/M68k/M68kInstrFormats.td @@ -200,6 +200,11 @@ class MxEncEA { MxBead DA = da; } +class MxEncMemOp { + dag EA = (ascend); + dag Supplement = (ascend); +} + // FIXME: Is there a way to factorize the addressing mode suffix (i.e. // 'r', 'd', 'a' etc.) and use something like multiclass to replace? def MxEncEAr_0: MxEncEA, MxBead2Bits<0b00>>; @@ -237,6 +242,126 @@ def MxEncEAq : MxEncEA, MxBead2Bits<0b11>, MxBead1Bit<1>>; def MxEncEAk : MxEncEA, MxBead2Bits<0b11>, MxBead1Bit<1>>; def MxEncEAi : MxEncEA, MxBead2Bits<0b11>, MxBead1Bit<1>>; +class MxEncBriefExt { + dag Value = (descend + // D/A + REGISTER + (operand "$"#reg_opnd, 4), + // W/L + size_w_l, + // SCALE + !cond( + !eq(scale, 1) : 0b00, + !eq(scale, 2) : 0b01, + !eq(scale, 4) : 0b10, + !eq(scale, 8) : 0b11 + ), + 0b0, + // Displacement + (operand "$"#disp_opnd, 8, (encoder disp_encoder)) + ); +} + +class MxEncAddrMode_d : MxEncMemOp { + let EA = (descend /*MODE*/0b000, + /*REGISTER*/(operand "$"#reg_opnd, 3)); +} + +class MxEncAddrMode_a : MxEncMemOp { + let EA = (descend /*MODE*/0b001, + /*REGISTER*/(operand "$"#reg_opnd, 3)); +} + +class MxEncAddrMode_r : MxEncMemOp { + let EA = (descend /*MODE without the last bit*/0b00, + /*REGISTER with D/A bit*/(operand "$"#reg_opnd, 4)); +} + +class MxEncAddrMode_k : MxEncMemOp { + let EA = (descend /*MODE*/0b111, + /*REGISTER*/0b011); + + let Supplement = MxEncBriefExt">.Value; +} + +class MxEncAddrMode_q : MxEncMemOp { + let EA = (descend /*MODE*/0b111, + /*REGISTER*/0b010); + + // 16-bit Displacement + let Supplement = (operand "$"#opnd_name, 16, + (encoder "encodePCRelImm<16>")); +} + +class MxEncAddrMode_p : MxEncMemOp { + let EA = (descend /*MODE*/0b101, + /*REGISTER*/(operand "$"#opnd_name#".reg", 3)); + + // 16-bit Displacement + let Supplement = (operand "$"#opnd_name#".disp", 16, + (encoder "encodeRelocImm<16>")); +} + +class MxEncAddrMode_f : MxEncMemOp { + let EA = (descend /*MODE*/0b110, + /*REGISTER*/(operand "$"#opnd_name#".reg", 3)); + + let Supplement = MxEncBriefExt">.Value; +} + +class MxEncAddrMode_j : MxEncMemOp { + let EA = (descend /*MODE*/0b010, + /*REGISTER*/(operand "$"#reg_opnd, 3)); +} + +class MxEncAddrMode_i : MxEncMemOp { + let EA = (descend /*MODE*/0b111, + /*REGISTER*/0b100); + + // Immediate + let Supplement = + !cond( + !eq(size, 8) : (descend 0b00000000, (operand "$"#opnd_name, 8)), + !eq(size, 16) : (operand "$"#opnd_name, 16), + !eq(size, 32) : (ascend (slice "$"#opnd_name, 31, 16), + (slice "$"#opnd_name, 15, 0)) + ); +} + +// abs.W -> size_w_l = false +// abs.L -> size_w_l = true +class MxEncAddrMode_abs : MxEncMemOp { + let EA = (descend /*MODE*/0b111, + // Wrap the REGISTER part in another dag to make sure + // the dag assigned to EA only has two arguments. Such + // that it's easier for MOV instructions to reverse + // on its destination part. + /*REGISTER*/(descend 0b00, size_w_l)); + + // Absolute address + let Supplement = !if(size_w_l, + // abs.L + (operand "$"#opnd_name, 32, (encoder "encodeRelocImm<32>")), + // abs.W + (operand "$"#opnd_name, 16, (encoder "encodeRelocImm<16>")) + ); +} + +class MxEncAddrMode_o : MxEncMemOp { + let EA = (descend /*MODE*/0b011, + /*REGISTER*/(operand "$"#reg_opnd, 3)); +} + +class MxEncAddrMode_e : MxEncMemOp { + let EA = (descend /*MODE*/0b100, + /*REGISTER*/(operand "$"#reg_opnd, 3)); +} + // Allows you to specify each bit of opcode class MxEncOpMode { MxBead B0 = b0; @@ -332,6 +457,16 @@ def MxEncSize16 : MxEncSize<0b01>; def MxEncSize32 : MxEncSize<0b10>; def MxEncSize64 : MxEncSize<0b11>; +// TODO: Remove "New" in the name after the codebead-based +// representation is deprecated. +class MxNewEncSize value> { + bits<2> Value = value; +} +def MxNewEncSize8 : MxNewEncSize<0b00>; +def MxNewEncSize16 : MxNewEncSize<0b01>; +def MxNewEncSize32 : MxNewEncSize<0b10>; +def MxNewEncSize64 : MxNewEncSize<0b11>; + // M68k INSTRUCTION. Most instructions specify the location of an operand by // using the effective address field in the operation word. The effective address // is composed of two 3-bit fields: the mode field and the register field. The @@ -357,6 +492,7 @@ class MxInst Beads = beads.Value; + dag Inst = (ascend); // Number of bytes let Size = 0; diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp index 105c816f9885..b33469529ca5 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp +++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Regex.h" #include @@ -601,40 +602,26 @@ bool M68kInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { bool M68kInstrInfo::isPCRelRegisterOperandLegal( const MachineOperand &MO) const { assert(MO.isReg()); - const auto *MI = MO.getParent(); - const uint8_t *Beads = M68k::getMCInstrBeads(MI->getOpcode()); - assert(*Beads); - - // Only addressing mode k has (non-pc) register with PCRel - // So we're looking for EA Beads equal to - // `3Bits<011>_1Bit<1>_2Bits<11>` - // FIXME: There is an important caveat and two assumptions - // here: The caveat is that EA encoding always sit on the LSB. - // Where the assumptions are that if there are more than one - // operands, the EA encoding for the source operand always sit - // on the LSB. At the same time, k addressing mode can not be used - // on destination operand. - // The last assumption is kinda dirty so we need to find a way around - // it - const uint8_t EncEAk[3] = {0b011, 0b1, 0b11}; - for (const uint8_t Pat : EncEAk) { - uint8_t Bead = *(Beads++); - if (!Bead) - return false; - switch (Bead & 0xF) { - default: - return false; - case M68kBeads::Bits1: - case M68kBeads::Bits2: - case M68kBeads::Bits3: { - uint8_t Val = (Bead & 0xF0) >> 4; - if (Val != Pat) - return false; - } - } - } - return true; + // Check whether this MO belongs to an instruction with addressing mode 'k', + // Refer to TargetInstrInfo.h for more information about this function. + + const MachineInstr *MI = MO.getParent(); + const unsigned NameIndices = M68kInstrNameIndices[MI->getOpcode()]; + StringRef InstrName(&M68kInstrNameData[NameIndices]); + const unsigned OperandNo = MI->getOperandNo(&MO); + + // If this machine operand is the 2nd operand, then check + // whether the instruction has destination addressing mode 'k'. + if (OperandNo == 1) + return Regex("[A-Z]+(8|16|32)k[a-z](_TC)?$").match(InstrName); + + // If this machine operand is the last one, then check + // whether the instruction has source addressing mode 'k'. + if (OperandNo == MI->getNumExplicitOperands() - 1) + return Regex("[A-Z]+(8|16|32)[a-z]k(_TC)?$").match(InstrName); + + return false; } void M68kInstrInfo::copyPhysReg(MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.td b/llvm/lib/Target/M68k/M68kInstrInfo.td index c581dd91eaaa..67500af6bfb2 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.td +++ b/llvm/lib/Target/M68k/M68kInstrInfo.td @@ -291,13 +291,13 @@ def MxARIPD32_TC : MxMemOp<(ops AR32_TC), MxSize32, "e", "printARIPD32Mem", MxA // extension word. The reference is classified as a data reference with the // exception of the jump and jump-to-subroutine instructions. def MxARID : MxOpClass<"ARID">; -def MxARID8 : MxMemOp<(ops i16imm, AR32), MxSize8, "p", "printARID8Mem", MxARID>; -def MxARID16 : MxMemOp<(ops i16imm, AR32), MxSize16, "p", "printARID16Mem", MxARID>; -def MxARID32 : MxMemOp<(ops i16imm, AR32), MxSize32, "p", "printARID32Mem", MxARID>; +def MxARID8 : MxMemOp<(ops i16imm:$disp, AR32:$reg), MxSize8, "p", "printARID8Mem", MxARID>; +def MxARID16 : MxMemOp<(ops i16imm:$disp, AR32:$reg), MxSize16, "p", "printARID16Mem", MxARID>; +def MxARID32 : MxMemOp<(ops i16imm:$disp, AR32:$reg), MxSize32, "p", "printARID32Mem", MxARID>; -def MxARID8_TC : MxMemOp<(ops i16imm, AR32_TC), MxSize8, "p", "printARID8Mem", MxARID>; -def MxARID16_TC : MxMemOp<(ops i16imm, AR32_TC), MxSize16, "p", "printARID16Mem", MxARID>; -def MxARID32_TC : MxMemOp<(ops i16imm, AR32_TC), MxSize32, "p", "printARID32Mem", MxARID>; +def MxARID8_TC : MxMemOp<(ops i16imm:$disp, AR32_TC:$reg), MxSize8, "p", "printARID8Mem", MxARID>; +def MxARID16_TC : MxMemOp<(ops i16imm:$disp, AR32_TC:$reg), MxSize16, "p", "printARID16Mem", MxARID>; +def MxARID32_TC : MxMemOp<(ops i16imm:$disp, AR32_TC:$reg), MxSize32, "p", "printARID32Mem", MxARID>; // ADDRESS REGISTER INDIRECT WITH INDEX. This addressing mode requires one word // of extension. The address of the operand is the sum of the address in the @@ -306,13 +306,19 @@ def MxARID32_TC : MxMemOp<(ops i16imm, AR32_TC), MxSize32, "p", "printARID32Me // The reference is classified as a data reference with the exception of the // jump and jump-to-subroutine instructions def MxARII : MxOpClass<"ARII">; -def MxARII8 : MxMemOp<(ops i8imm, AR32, XR32), MxSize8, "f", "printARII8Mem", MxARII>; -def MxARII16 : MxMemOp<(ops i8imm, AR32, XR32), MxSize16, "f", "printARII16Mem", MxARII>; -def MxARII32 : MxMemOp<(ops i8imm, AR32, XR32), MxSize32, "f", "printARII32Mem", MxARII>; - -def MxARII8_TC : MxMemOp<(ops i8imm, AR32_TC, XR32_TC), MxSize8, "f", "printARII8Mem", MxARII>; -def MxARII16_TC : MxMemOp<(ops i8imm, AR32_TC, XR32_TC), MxSize16, "f", "printARII16Mem", MxARII>; -def MxARII32_TC : MxMemOp<(ops i8imm, AR32_TC, XR32_TC), MxSize32, "f", "printARII32Mem", MxARII>; +def MxARII8 : MxMemOp<(ops i8imm:$disp, AR32:$reg, XR32:$index), + MxSize8, "f", "printARII8Mem", MxARII>; +def MxARII16 : MxMemOp<(ops i8imm:$disp, AR32:$reg, XR32:$index), + MxSize16, "f", "printARII16Mem", MxARII>; +def MxARII32 : MxMemOp<(ops i8imm:$disp, AR32:$reg, XR32:$index), + MxSize32, "f", "printARII32Mem", MxARII>; + +def MxARII8_TC : MxMemOp<(ops i8imm:$disp, AR32_TC:$reg, XR32_TC:$index), + MxSize8, "f", "printARII8Mem", MxARII>; +def MxARII16_TC : MxMemOp<(ops i8imm:$disp, AR32_TC:$reg, XR32_TC:$index), + MxSize16, "f", "printARII16Mem", MxARII>; +def MxARII32_TC : MxMemOp<(ops i8imm:$disp, AR32_TC:$reg, XR32_TC:$index), + MxSize32, "f", "printARII32Mem", MxARII>; // ABSOLUTE SHORT ADDRESS. This addressing mode requires one word of extension. // The address of the operand is the extension word. The 16-bit address is sign @@ -360,9 +366,9 @@ def MxPCD32 : MxMemOp<(ops i16imm), MxSize32, "q", "printPCD32Mem", MxPCD>; // word, and the contents of the index register. The value in the program // counter is the address of the extension word. This reference is classified as // a program reference. -def MxPCI8 : MxMemOp<(ops i8imm, XR32), MxSize8, "k", "printPCI8Mem", MxPCI>; -def MxPCI16 : MxMemOp<(ops i8imm, XR32), MxSize16, "k", "printPCI16Mem", MxPCI>; -def MxPCI32 : MxMemOp<(ops i8imm, XR32), MxSize32, "k", "printPCI32Mem", MxPCI>; +def MxPCI8 : MxMemOp<(ops i8imm:$disp, XR32:$index), MxSize8, "k", "printPCI8Mem", MxPCI>; +def MxPCI16 : MxMemOp<(ops i8imm:$disp, XR32:$index), MxSize16, "k", "printPCI16Mem", MxPCI>; +def MxPCI32 : MxMemOp<(ops i8imm:$disp, XR32:$index), MxSize32, "k", "printPCI32Mem", MxPCI>; } // OPERAND_PCREL def MxImm : AsmOperandClass { @@ -633,6 +639,74 @@ class MxType { + int Size = size; + MxOperand Op = op; + ComplexPattern Pat = pat; +} + +class MxImmOpBundle + : MxOpBundle { + PatFrag ImmPat = pat; +} + +// TODO: We can use MxOpAddrMode_ in more places to +// replace MxType-based operand factoring. +foreach size = [8, 16, 32] in { + // Dn + def MxOp#size#AddrMode_d + : MxOpBundle("MxDRD"#size), ?>; + + // (An) + def MxOp#size#AddrMode_j + : MxOpBundle("MxARI"#size), MxCP_ARI>; + + // (An)+ + def MxOp#size#AddrMode_o + : MxOpBundle("MxARIPI"#size), MxCP_ARIPI>; + + // -(An) + def MxOp#size#AddrMode_e + : MxOpBundle("MxARIPD"#size), MxCP_ARIPD>; + + // (i,An) + def MxOp#size#AddrMode_p + : MxOpBundle("MxARID"#size), MxCP_ARID>; + + // (i,An,Xn) + def MxOp#size#AddrMode_f + : MxOpBundle("MxARII"#size), MxCP_ARII>; + + // (ABS).L + def MxOp#size#AddrMode_b + : MxOpBundle("MxAL"#size), MxCP_AL>; + + // (i,PC) + def MxOp#size#AddrMode_q + : MxOpBundle("MxPCD"#size), MxCP_PCD>; + + // (i,PC,Xn) + def MxOp#size#AddrMode_k + : MxOpBundle("MxPCI"#size), MxCP_PCI>; + + // #imm + def MxOp#size#AddrMode_i + : MxImmOpBundle("Mxi"#size#"imm"), + !cast("MximmSExt"#size)>; +} // foreach size = [8, 16, 32] + +foreach size = [16, 32] in { + // An + def MxOp#size#AddrMode_a + : MxOpBundle("MxARD"#size), ?>; + + // Xn + def MxOp#size#AddrMode_r + : MxOpBundle("MxXRD"#size), ?>; +} // foreach size = [16, 32] + class MxType8Class : MxType; -def MxRODI_L : MxBead1Bit<1>; +defvar MxROKind_R = true; +defvar MxROKind_I = false; -def MxROOP_AS : MxBead2Bits<0b00>; -def MxROOP_LS : MxBead2Bits<0b01>; -def MxROOP_ROX : MxBead2Bits<0b10>; -def MxROOP_RO : MxBead2Bits<0b11>; +defvar MxRODI_R = false; +defvar MxRODI_L = true; + +defvar MxROOP_AS = 0b00; +defvar MxROOP_LS = 0b01; +defvar MxROOP_ROX = 0b10; +defvar MxROOP_RO = 0b11; /// ------------+---------+---+------+---+------+--------- /// F E D C | B A 9 | 8 | 7 6 | 5 | 4 3 | 2 1 0 /// ------------+---------+---+------+---+------+--------- /// 1 1 1 0 | REG/IMM | D | SIZE |R/I| OP | REG /// ------------+---------+---+------+---+------+--------- -class MxSREncoding_R - : MxEncoding, ROOP, MxBead1Bit<1>, SIZE, DIRECTION, - MxBeadDReg<2>, MxBead4Bits<0b1110>>; - -class MxSREncoding_I - : MxEncoding, ROOP, MxBead1Bit<0>, SIZE, DIRECTION, - MxBead3Imm<2, 1>, MxBead4Bits<0b1110>>; +class MxSREncoding ro_op, MxNewEncSize size> { + dag Value = (descend 0b1110, + // REG/IMM + (operand "$"#src_opnd, 3), + direction, size.Value, kind, ro_op, + // REG + (operand "$"#dst_opnd, 3) + ); +} // $reg <- $reg op $reg -class MxSR_DD +class MxSR_DD ROOP> : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, TYPE.ROp:$opd), MN#"."#TYPE.Prefix#"\t$opd, $dst", - [(set TYPE.VT:$dst, (NODE TYPE.VT:$src, TYPE.VT:$opd))], - MxSREncoding_R("MxEncSize"#TYPE.Size)>>; + [(set TYPE.VT:$dst, (NODE TYPE.VT:$src, TYPE.VT:$opd))]> { + let Inst = MxSREncoding("MxNewEncSize"#TYPE.Size)>.Value; +} // $reg <- $reg op $imm -class MxSR_DI +class MxSR_DI ROOP> : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, !cast("Mxi"#TYPE.Size#"imm"):$opd), MN#"."#TYPE.Prefix#"\t$opd, $dst", [(set TYPE.VT:$dst, (NODE TYPE.VT:$src, - !cast("Mximm"#TYPE.Size#"_1to8"):$opd))], - MxSREncoding_I("MxEncSize"#TYPE.Size)>>; + !cast("Mximm"#TYPE.Size#"_1to8"):$opd))]> { + let Inst = MxSREncoding("MxNewEncSize"#TYPE.Size)>.Value; +} -multiclass MxSROp { +multiclass MxSROp ROOP> { let Defs = [CCR] in { let Constraints = "$src = $dst" in { diff --git a/llvm/lib/Target/M68k/M68kMachineFunction.cpp b/llvm/lib/Target/M68k/M68kMachineFunction.cpp index b1e7369116d7..ccc8f87db502 100644 --- a/llvm/lib/Target/M68k/M68kMachineFunction.cpp +++ b/llvm/lib/Target/M68k/M68kMachineFunction.cpp @@ -18,3 +18,10 @@ using namespace llvm; void M68kMachineFunctionInfo::anchor() {} + +MachineFunctionInfo *M68kMachineFunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const { + return DestMF.cloneInfo(*this); +} diff --git a/llvm/lib/Target/M68k/M68kMachineFunction.h b/llvm/lib/Target/M68k/M68kMachineFunction.h index 93c5255199d4..6ddf53d7d693 100644 --- a/llvm/lib/Target/M68k/M68kMachineFunction.h +++ b/llvm/lib/Target/M68k/M68kMachineFunction.h @@ -21,8 +21,6 @@ namespace llvm { class M68kMachineFunctionInfo : public MachineFunctionInfo { - MachineFunction &MF; - /// Non-zero if the function has base pointer and makes call to /// llvm.eh.sjlj.setjmp. When non-zero, the value is a displacement from the /// frame pointer to a slot where the base pointer is stashed. @@ -68,7 +66,12 @@ class M68kMachineFunctionInfo : public MachineFunctionInfo { unsigned ArgumentStackSize = 0; public: - explicit M68kMachineFunctionInfo(MachineFunction &MF) : MF(MF) {} + explicit M68kMachineFunctionInfo(const MachineFunction &MF) {} + + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; bool getRestoreBasePointer() const { return RestoreBasePointerOffset != 0; } void setRestoreBasePointer(const MachineFunction *MF); diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp index 0cae7ac4e312..5b632299fa4c 100644 --- a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp +++ b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp @@ -19,6 +19,7 @@ #include "MCTargetDesc/M68kMCTargetDesc.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.h b/llvm/lib/Target/M68k/M68kRegisterInfo.h index 7f822e1cb34f..fc55e19a958b 100644 --- a/llvm/lib/Target/M68k/M68kRegisterInfo.h +++ b/llvm/lib/Target/M68k/M68kRegisterInfo.h @@ -97,6 +97,14 @@ public: bool canRealignStack(const MachineFunction &MF) const override; Register getFrameRegister(const MachineFunction &MF) const override; + + const TargetRegisterClass * + getCrossCopyRegClass(const TargetRegisterClass *RC) const override { + if (RC == &M68k::CCRCRegClass) + return &M68k::DR32RegClass; + return RC; + } + unsigned getStackRegister() const { return StackPtr; } unsigned getBaseRegister() const { return BasePtr; } unsigned getGlobalBaseRegister() const { return GlobalBasePtr; } diff --git a/llvm/lib/Target/M68k/M68kSubtarget.h b/llvm/lib/Target/M68k/M68kSubtarget.h index 9bf2984983a1..9dd52095959e 100644 --- a/llvm/lib/Target/M68k/M68kSubtarget.h +++ b/llvm/lib/Target/M68k/M68kSubtarget.h @@ -22,7 +22,7 @@ #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp index 9227bd6c3a78..6b093623a106 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; @@ -39,31 +40,30 @@ class M68kMCCodeEmitter : public MCCodeEmitter { const MCInstrInfo &MCII; MCContext &Ctx; -public: - M68kMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) - : MCII(mcii), Ctx(ctx) {} + void getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl &Fixups, + APInt &Inst, APInt &Scratch, + const MCSubtargetInfo &STI) const; - ~M68kMCCodeEmitter() override {} + void getMachineOpValue(const MCInst &MI, const MCOperand &Op, + unsigned InsertPos, APInt &Value, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; - // TableGen'erated function - const uint8_t *getGenInstrBeads(const MCInst &MI) const { - return M68k::getMCInstrBeads(MI.getOpcode()); - } + template + void encodeRelocImm(const MCInst &MI, unsigned OpIdx, unsigned InsertPos, + APInt &Value, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; - unsigned encodeBits(unsigned ThisByte, uint8_t Bead, const MCInst &MI, - const MCInstrDesc &Desc, uint64_t &Buffer, - unsigned Offset, SmallVectorImpl &Fixups, + template + void encodePCRelImm(const MCInst &MI, unsigned OpIdx, unsigned InsertPos, + APInt &Value, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; - unsigned encodeReg(unsigned ThisByte, uint8_t Bead, const MCInst &MI, - const MCInstrDesc &Desc, uint64_t &Buffer, unsigned Offset, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; +public: + M68kMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) + : MCII(mcii), Ctx(ctx) {} - unsigned encodeImm(unsigned ThisByte, uint8_t Bead, const MCInst &MI, - const MCInstrDesc &Desc, uint64_t &Buffer, unsigned Offset, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + ~M68kMCCodeEmitter() override {} void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, @@ -72,316 +72,176 @@ public: } // end anonymous namespace -unsigned M68kMCCodeEmitter::encodeBits(unsigned ThisByte, uint8_t Bead, - const MCInst &MI, - const MCInstrDesc &Desc, - uint64_t &Buffer, unsigned Offset, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - unsigned Num = 0; - switch (Bead & 0xF) { - case M68kBeads::Bits1: - Num = 1; - break; - case M68kBeads::Bits2: - Num = 2; - break; - case M68kBeads::Bits3: - Num = 3; - break; - case M68kBeads::Bits4: - Num = 4; - break; - } - unsigned char Val = (Bead & 0xF0) >> 4; - - LLVM_DEBUG(dbgs() << "\tEncodeBits" - << " Num: " << Num << " Val: 0x"); - LLVM_DEBUG(dbgs().write_hex(Val) << "\n"); +#include "M68kGenMCCodeEmitter.inc" - Buffer |= (Val << Offset); - - return Num; -} +// Select the proper unsigned integer type from a bit size. +template struct select_uint_t { + using type = typename std::conditional< + Size == 8, uint8_t, + typename std::conditional< + Size == 16, uint16_t, + typename std::conditional::type>::type>::type; +}; -unsigned M68kMCCodeEmitter::encodeReg(unsigned ThisByte, uint8_t Bead, - const MCInst &MI, const MCInstrDesc &Desc, - uint64_t &Buffer, unsigned Offset, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - bool DA, Reg; - switch (Bead & 0xF) { - default: - llvm_unreachable("Unrecognized Bead code for register type"); - case M68kBeads::DAReg: - Reg = true; - DA = true; - break; - case M68kBeads::DA: - Reg = false; - DA = true; - break; - case M68kBeads::DReg: - case M68kBeads::Reg: - Reg = true; - DA = false; - break; +// On a LE host: +// MSB LSB MSB LSB +// | 0x12 0x34 | 0xAB 0xCD | -> | 0xAB 0xCD | 0x12 0x34 | +// (On a BE host nothing changes) +template static value_t swapWord(value_t Val) { + const unsigned NumWords = sizeof(Val) / 2; + if (NumWords <= 1) + return Val; + Val = support::endian::byte_swap(Val, support::big); + value_t NewVal = 0; + for (unsigned i = 0U; i != NumWords; ++i) { + uint16_t Part = (Val >> (i * 16)) & 0xFFFF; + Part = support::endian::byte_swap(Part, support::big); + NewVal |= (Part << (i * 16)); } + return NewVal; +} - unsigned Op = (Bead & 0x70) >> 4; - bool Alt = (Bead & 0x80); - LLVM_DEBUG(dbgs() << "\tEncodeReg" - << " Op: " << Op << ", DA: " << DA << ", Reg: " << Reg - << ", Alt: " << Alt << "\n"); - - auto MIOpIdx = M68k::getLogicalOperandIdx(MI.getOpcode(), Op); - bool IsPCRel = Desc.OpInfo[MIOpIdx].OperandType == MCOI::OPERAND_PCREL; - - MCOperand MCO; - if (M68kII::hasMultiMIOperands(MI.getOpcode(), Op)) { - if (IsPCRel) { - assert(Alt && - "PCRel addresses use Alt bead register encoding by default"); - MCO = MI.getOperand(MIOpIdx + M68k::PCRelIndex); - } else { - MCO = MI.getOperand(MIOpIdx + (Alt ? M68k::MemIndex : M68k::MemBase)); - } +// Figure out which byte we're at in big endian mode. +template static unsigned getBytePosition(unsigned BitPos) { + if (Size % 16) { + return static_cast(BitPos / 8 + ((BitPos & 0b1111) < 8 ? 1 : -1)); } else { - assert(!Alt && "You cannot use Alt register with a simple operand"); - MCO = MI.getOperand(MIOpIdx); + assert(!(BitPos & 0b1111) && "Not aligned to word boundary?"); + return BitPos / 8; } - - unsigned RegNum = MCO.getReg(); - auto RI = Ctx.getRegisterInfo(); - - unsigned Written = 0; - if (Reg) { - uint32_t Val = RI->getEncodingValue(RegNum); - Buffer |= (Val & 7) << Offset; - Offset += 3; - Written += 3; - } - - if (DA) { - Buffer |= (uint64_t)M68kII::isAddressRegister(RegNum) << Offset; - Written++; - } - - return Written; -} - -static unsigned EmitConstant(uint64_t Val, unsigned Size, unsigned Pad, - uint64_t &Buffer, unsigned Offset) { - assert(Size + Offset <= 64 && isUIntN(Size, Val) && "Value does not fit"); - - // Writing Value in host's endianness - Buffer |= (Val & ((1ULL << Size) - 1)) << Offset; - return Size + Pad; } -unsigned M68kMCCodeEmitter::encodeImm(unsigned ThisByte, uint8_t Bead, - const MCInst &MI, const MCInstrDesc &Desc, - uint64_t &Buffer, unsigned Offset, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - unsigned ThisWord = ThisByte / 2; - unsigned Size = 0; - unsigned Pad = 0; - unsigned FixOffset = 0; - int64_t Addendum = 0; - bool NoExpr = false; - - unsigned Type = Bead & 0xF; - unsigned Op = (Bead & 0x70) >> 4; - bool Alt = (Bead & 0x80); - - auto MIOpIdx = M68k::getLogicalOperandIdx(MI.getOpcode(), Op); - bool IsPCRel = Desc.OpInfo[MIOpIdx].OperandType == MCOI::OPERAND_PCREL; - - // The PC value upon instruction reading of a short jump will point to the - // next instruction, thus we need to compensate 2 bytes, which is the diff - // between the patch point and the PC. - if (IsPCRel && ThisWord == 0) - Addendum -= 2; - - switch (Type) { - // ??? what happens if it is not byte aligned - // ??? is it even possible - case M68kBeads::Disp8: - Size = 8; - Pad = 0; - FixOffset = ThisByte + 1; - Addendum += 1; - break; - case M68kBeads::Imm8: - Size = 8; - Pad = 8; - FixOffset = ThisByte; - break; - case M68kBeads::Imm16: - Size = 16; - Pad = 0; - FixOffset = ThisByte; - break; - case M68kBeads::Imm32: - Size = 32; - Pad = 0; - FixOffset = ThisByte; - break; - case M68kBeads::Imm3: - Size = 3; - Pad = 0; - NoExpr = true; - break; - } - - LLVM_DEBUG(dbgs() << "\tEncodeImm" - << " Op: " << Op << ", Size: " << Size << ", Alt: " << Alt - << "\n"); - - MCOperand MCO; - if (M68kII::hasMultiMIOperands(MI.getOpcode(), Op)) { - - if (IsPCRel) { - assert(!Alt && "You cannot use ALT operand with PCRel"); - MCO = MI.getOperand(MIOpIdx + M68k::PCRelDisp); - } else { - MCO = MI.getOperand(MIOpIdx + (Alt ? M68k::MemOuter : M68k::MemDisp)); +// We need special handlings for relocatable & pc-relative operands that are +// larger than a word. +// A M68k instruction is aligned by word (16 bits). That means, 32-bit +// (& 64-bit) immediate values are separated into hi & lo words and placed +// at lower & higher addresses, respectively. For immediate values that can +// be easily expressed in TG, we explicitly rotate the word ordering like +// this: +// ``` +// (ascend (slice "$imm", 31, 16), (slice "$imm", 15, 0)) +// ``` +// For operands that call into encoder functions, we need to use the `swapWord` +// function to assure the correct word ordering on LE host. Note that +// M68kMCCodeEmitter does massage _byte_ ordering of the final encoded +// instruction but it assumes everything aligns on word boundaries. So things +// will go wrong if we don't take care of the _word_ ordering here. +template +void M68kMCCodeEmitter::encodeRelocImm(const MCInst &MI, unsigned OpIdx, + unsigned InsertPos, APInt &Value, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + using value_t = typename select_uint_t::type; + const MCOperand &MCO = MI.getOperand(OpIdx); + if (MCO.isImm()) { + Value |= swapWord(static_cast(MCO.getImm())); + } else if (MCO.isExpr()) { + const MCExpr *Expr = MCO.getExpr(); + + // Absolute address + int64_t Addr; + if (Expr->evaluateAsAbsolute(Addr)) { + Value |= swapWord(static_cast(Addr)); + return; } - if (MCO.isExpr()) { - assert(!NoExpr && "Cannot use expression here"); - const MCExpr *Expr = MCO.getExpr(); + // Relocatable address + unsigned InsertByte = getBytePosition(InsertPos); + Fixups.push_back(MCFixup::create(InsertByte, Expr, + getFixupForSize(Size, /*IsPCRel=*/false), + MI.getLoc())); + } +} - // This only makes sense for PCRel instructions since PC points to the - // extension word and Disp8 for example is right justified and requires - // correction. E.g. R_68K_PC32 is calculated as S + A - P, P for Disp8 - // will be EXTENSION_WORD + 1 thus we need to have A equal to 1 to - // compensate. - // TODO count extension words - if (IsPCRel && Addendum != 0) { +template +void M68kMCCodeEmitter::encodePCRelImm(const MCInst &MI, unsigned OpIdx, + unsigned InsertPos, APInt &Value, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MCO = MI.getOperand(OpIdx); + if (MCO.isImm()) { + using value_t = typename select_uint_t::type; + Value |= swapWord(static_cast(MCO.getImm())); + } else if (MCO.isExpr()) { + const MCExpr *Expr = MCO.getExpr(); + unsigned InsertByte = getBytePosition(InsertPos); + + // Special handlings for sizes smaller than a word. + if (Size < 16) { + int LabelOffset = 0; + if (InsertPos < 16) + // If the patch point is at the first word, PC is pointing at the + // next word. + LabelOffset = InsertByte - 2; + else if (InsertByte % 2) + // Otherwise the PC is pointing at the first byte of this word. + // So we need to consider the offset between PC and the fixup byte. + LabelOffset = 1; + + if (LabelOffset) Expr = MCBinaryExpr::createAdd( - Expr, MCConstantExpr::create(Addendum, Ctx), Ctx); - } - - Fixups.push_back(MCFixup::create( - FixOffset, Expr, getFixupForSize(Size, IsPCRel), MI.getLoc())); - // Write zeros - return EmitConstant(0, Size, Pad, Buffer, Offset); + Expr, MCConstantExpr::create(LabelOffset, Ctx), Ctx); } - } else { - MCO = MI.getOperand(MIOpIdx); - if (MCO.isExpr()) { - assert(!NoExpr && "Cannot use expression here"); - const MCExpr *Expr = MCO.getExpr(); - - if (Addendum != 0) { - Expr = MCBinaryExpr::createAdd( - Expr, MCConstantExpr::create(Addendum, Ctx), Ctx); - } - - Fixups.push_back(MCFixup::create( - FixOffset, Expr, getFixupForSize(Size, IsPCRel), MI.getLoc())); - // Write zeros - return EmitConstant(0, Size, Pad, Buffer, Offset); - } + Fixups.push_back(MCFixup::create(InsertByte, Expr, + getFixupForSize(Size, /*IsPCRel=*/true), + MI.getLoc())); } +} - int64_t I = MCO.getImm(); - - // Store 8 as 0, thus making range 1-8 - if (Type == M68kBeads::Imm3 && Alt) { - assert(I && "Cannot encode Alt Imm3 zero value"); - I %= 8; +void M68kMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &Op, + unsigned InsertPos, APInt &Value, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Register + if (Op.isReg()) { + unsigned RegNum = Op.getReg(); + const auto *RI = Ctx.getRegisterInfo(); + Value |= RI->getEncodingValue(RegNum); + // Setup the D/A bit + if (M68kII::isAddressRegister(RegNum)) + Value |= 0b1000; + } else if (Op.isImm()) { + // Immediate + Value |= static_cast(Op.getImm()); + } else if (Op.isExpr()) { + // Absolute address + int64_t Addr; + if (!Op.getExpr()->evaluateAsAbsolute(Addr)) + report_fatal_error("Unsupported asm expression. Only absolute address " + "can be placed here."); + Value |= static_cast(Addr); } else { - assert(isIntN(Size, I)); + llvm_unreachable("Unsupported operand type"); } - - uint64_t Imm = I; - - // 32 bit Imm requires HI16 first then LO16 - if (Size == 32) { - Offset += EmitConstant((Imm >> 16) & 0xFFFF, 16, Pad, Buffer, Offset); - EmitConstant(Imm & 0xFFFF, 16, Pad, Buffer, Offset); - return Size; - } - - return EmitConstant(Imm & ((1ULL << Size) - 1), Size, Pad, Buffer, Offset); } -#include "M68kGenMCCodeBeads.inc" - void M68kMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { unsigned Opcode = MI.getOpcode(); - const MCInstrDesc &Desc = MCII.get(Opcode); LLVM_DEBUG(dbgs() << "EncodeInstruction: " << MCII.getName(Opcode) << "(" << Opcode << ")\n"); - const uint8_t *Beads = getGenInstrBeads(MI); - if (!Beads || !*Beads) { - llvm_unreachable("*** Instruction does not have Beads defined"); - } - - uint64_t Buffer = 0; - unsigned Offset = 0; - unsigned ThisByte = 0; - - for (uint8_t Bead = *Beads; Bead; Bead = *++Beads) { - // Check for control beads - if (!(Bead & 0xF)) { - switch (Bead >> 4) { - case M68kBeads::Ignore: - continue; - } - } - - switch (Bead & 0xF) { - default: - llvm_unreachable("Unknown Bead code"); - break; - case M68kBeads::Bits1: - case M68kBeads::Bits2: - case M68kBeads::Bits3: - case M68kBeads::Bits4: - Offset += - encodeBits(ThisByte, Bead, MI, Desc, Buffer, Offset, Fixups, STI); - break; - case M68kBeads::DAReg: - case M68kBeads::DA: - case M68kBeads::DReg: - case M68kBeads::Reg: - Offset += - encodeReg(ThisByte, Bead, MI, Desc, Buffer, Offset, Fixups, STI); - break; - case M68kBeads::Disp8: - case M68kBeads::Imm8: - case M68kBeads::Imm16: - case M68kBeads::Imm32: - case M68kBeads::Imm3: - Offset += - encodeImm(ThisByte, Bead, MI, Desc, Buffer, Offset, Fixups, STI); - break; - } - - // Since M68k is Big Endian we need to rotate each instruction word - while (Offset / 16) { - support::endian::write(OS, Buffer, support::big); - Buffer >>= 16; - Offset -= 16; - ThisByte += 2; + // Try using the new method first. + APInt EncodedInst(16, 0U); + APInt Scratch(16, 0U); + getBinaryCodeForInstr(MI, Fixups, EncodedInst, Scratch, STI); + + ArrayRef Data(EncodedInst.getRawData(), EncodedInst.getNumWords()); + int64_t InstSize = EncodedInst.getBitWidth(); + for (uint64_t Word : Data) { + for (int i = 0; i < 4 && InstSize > 0; ++i, InstSize -= 16) { + support::endian::write(OS, static_cast(Word), + support::big); + Word >>= 16; } } - - assert(Offset == 0 && "M68k Instructions are % 2 bytes"); - assert((ThisByte && !(ThisByte % 2)) && "M68k Instructions are % 2 bytes"); } MCCodeEmitter *llvm::createM68kMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new M68kMCCodeEmitter(MCII, Ctx); } diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h index aa53e13af4fc..0dc601ad876b 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h @@ -38,7 +38,6 @@ MCAsmBackend *createM68kAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCTargetOptions &Options); MCCodeEmitter *createM68kMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); /// Construct an M68k ELF object writer. diff --git a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp index 13cba8b079a9..196e492046b9 100644 --- a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp +++ b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" diff --git a/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp b/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp index 9bbb2938ab75..a4d63a62f6aa 100644 --- a/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp +++ b/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp @@ -14,8 +14,8 @@ #include "MSP430.h" #include "TargetInfo/MSP430TargetInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -72,7 +72,7 @@ static const unsigned GR8DecoderTable[] = { static DecodeStatus DecodeGR8RegisterClass(MCInst &MI, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; @@ -90,7 +90,7 @@ static const unsigned GR16DecoderTable[] = { static DecodeStatus DecodeGR16RegisterClass(MCInst &MI, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; @@ -100,16 +100,16 @@ static DecodeStatus DecodeGR16RegisterClass(MCInst &MI, uint64_t RegNo, } static DecodeStatus DecodeCGImm(MCInst &MI, uint64_t Bits, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeMemOperand(MCInst &MI, uint64_t Bits, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); #include "MSP430GenDisassemblerTables.inc" static DecodeStatus DecodeCGImm(MCInst &MI, uint64_t Bits, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int64_t Imm; switch (Bits) { default: @@ -127,7 +127,7 @@ static DecodeStatus DecodeCGImm(MCInst &MI, uint64_t Bits, uint64_t Address, static DecodeStatus DecodeMemOperand(MCInst &MI, uint64_t Bits, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Reg = Bits & 15; unsigned Imm = Bits >> 4; diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp index 953916776c57..23af7d1149ed 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp @@ -35,7 +35,7 @@ class MSP430AsmBackend : public MCAsmBackend { public: MSP430AsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI) : MCAsmBackend(support::little), OSABI(OSABI) {} - ~MSP430AsmBackend() override {} + ~MSP430AsmBackend() override = default; void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp index bb5351af6523..aa097ccb9de6 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp @@ -24,7 +24,7 @@ public: : MCELFObjectTargetWriter(false, OSABI, ELF::EM_MSP430, /*HasRelocationAddend*/ true) {} - ~MSP430ELFObjectWriter() override {} + ~MSP430ELFObjectWriter() override = default; protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp index 087045ccb1df..0cdb3a595f71 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp @@ -12,6 +12,7 @@ #include "MSP430MCTargetDesc.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCSectionELF.h" @@ -42,7 +43,7 @@ MSP430TargetELFStreamer::MSP430TargetELFStreamer(MCStreamer &S, // MSP430 EABI (slaa534.pdf, part 13). MCSection *AttributeSection = getStreamer().getContext().getELFSection( ".MSP430.attributes", ELF::SHT_MSP430_ATTRIBUTES, 0); - Streamer.SwitchSection(AttributeSection); + Streamer.switchSection(AttributeSection); // Format version. Streamer.emitInt8(0x41); diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp index cf57e87a073d..2b16c6234a51 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp @@ -167,7 +167,7 @@ unsigned MSP430MCCodeEmitter::getCGImmOpValue(const MCInst &MI, unsigned Op, const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(Op); assert(MO.isImm() && "Expr operand expected"); - + int64_t Imm = MO.getImm(); switch (Imm) { default: @@ -200,7 +200,6 @@ unsigned MSP430MCCodeEmitter::getCCOpValue(const MCInst &MI, unsigned Op, } MCCodeEmitter *createMSP430MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new MSP430MCCodeEmitter(Ctx, MCII); } diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h index 02bfbe40c6bf..24b0b3298592 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h @@ -31,7 +31,6 @@ class MCTargetStreamer; /// Creates a machine code emitter for MSP430. MCCodeEmitter *createMSP430MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCAsmBackend *createMSP430MCAsmBackend(const Target &T, diff --git a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp index 8eb3fbd58328..85c59d5b14b5 100644 --- a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -166,11 +166,11 @@ void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) { MCSection *IV = OutStreamer->getContext().getELFSection( "__interrupt_vector_" + IVIdx, ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_EXECINSTR); - OutStreamer->SwitchSection(IV); + OutStreamer->switchSection(IV); const MCSymbol *FunctionSymbol = getSymbol(F); OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); - OutStreamer->SwitchSection(Cur); + OutStreamer->switchSection(Cur); } bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) { diff --git a/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index abd48dfd5139..b623730e1574 100644 --- a/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -18,7 +18,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -255,7 +254,7 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N, Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) ? CurDAG->getTargetFrameIndex( AM.Base.FrameIndex, - getTargetLowering()->getPointerTy(CurDAG->getDataLayout())) + N.getValueType()) : AM.Base.Reg; if (AM.GV) diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index aebfc6b0ae2e..73ab3b52e907 100644 --- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -670,16 +670,17 @@ SDValue MSP430TargetLowering::LowerCCCArguments( InVals.push_back(ArgValue); } } else { - // Only arguments passed on the stack should make it here. + // Only arguments passed on the stack should make it here. assert(VA.isMemLoc()); SDValue InVal; ISD::ArgFlagsTy Flags = Ins[i].Flags; if (Flags.isByVal()) { + MVT PtrVT = VA.getLocVT(); int FI = MFI.CreateFixedObject(Flags.getByValSize(), VA.getLocMemOffset(), true); - InVal = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + InVal = DAG.getFrameIndex(FI, PtrVT); } else { // Load the argument to a virtual register unsigned ObjSize = VA.getLocVT().getSizeInBits()/8; @@ -777,13 +778,14 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, if (!Reg) llvm_unreachable("sret virtual register not created in entry block"); + MVT PtrVT = getFrameIndexTy(DAG.getDataLayout()); SDValue Val = - DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy(DAG.getDataLayout())); + DAG.getCopyFromReg(Chain, dl, Reg, PtrVT); unsigned R12 = MSP430::R12; Chain = DAG.getCopyToReg(Chain, dl, R12, Val, Flag); Flag = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(R12, getPointerTy(DAG.getDataLayout()))); + RetOps.push_back(DAG.getRegister(R12, PtrVT)); } unsigned Opc = (CallConv == CallingConv::MSP430_INTR ? @@ -814,7 +816,7 @@ SDValue MSP430TargetLowering::LowerCCCCallTo( // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); - auto PtrVT = getPointerTy(DAG.getDataLayout()); + MVT PtrVT = getFrameIndexTy(DAG.getDataLayout()); Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); @@ -1010,7 +1012,7 @@ SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = cast(Op)->getGlobal(); int64_t Offset = cast(Op)->getOffset(); - auto PtrVT = getPointerTy(DAG.getDataLayout()); + EVT PtrVT = Op.getValueType(); // Create the TargetGlobalAddress node, folding in the constant offset. SDValue Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op), PtrVT, Offset); @@ -1021,7 +1023,7 @@ SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); const char *Sym = cast(Op)->getSymbol(); - auto PtrVT = getPointerTy(DAG.getDataLayout()); + EVT PtrVT = Op.getValueType(); SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT); return DAG.getNode(MSP430ISD::Wrapper, dl, PtrVT, Result); @@ -1030,8 +1032,8 @@ SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op, SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); - auto PtrVT = getPointerTy(DAG.getDataLayout()); const BlockAddress *BA = cast(Op)->getBlockAddress(); + EVT PtrVT = Op.getValueType(); SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT); return DAG.getNode(MSP430ISD::Wrapper, dl, PtrVT, Result); @@ -1248,11 +1250,11 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MSP430MachineFunctionInfo *FuncInfo = MF.getInfo(); int ReturnAddrIndex = FuncInfo->getRAIndex(); - auto PtrVT = getPointerTy(MF.getDataLayout()); + MVT PtrVT = getFrameIndexTy(MF.getDataLayout()); if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. - uint64_t SlotSize = MF.getDataLayout().getPointerSize(); + uint64_t SlotSize = PtrVT.getStoreSize(); ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize, -SlotSize, true); FuncInfo->setRAIndex(ReturnAddrIndex); @@ -1271,12 +1273,12 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); SDLoc dl(Op); - auto PtrVT = getPointerTy(DAG.getDataLayout()); + EVT PtrVT = Op.getValueType(); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = - DAG.getConstant(DAG.getDataLayout().getPointerSize(), dl, MVT::i16); + DAG.getConstant(PtrVT.getStoreSize(), dl, MVT::i16); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), MachinePointerInfo()); @@ -1308,7 +1310,9 @@ SDValue MSP430TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MSP430MachineFunctionInfo *FuncInfo = MF.getInfo(); - auto PtrVT = getPointerTy(DAG.getDataLayout()); + + SDValue Ptr = Op.getOperand(1); + EVT PtrVT = Ptr.getValueType(); // Frame index of first vararg argument SDValue FrameIndex = @@ -1316,14 +1320,14 @@ SDValue MSP430TargetLowering::LowerVASTART(SDValue Op, const Value *SV = cast(Op.getOperand(2))->getValue(); // Create a store of the frame index to the location operand - return DAG.getStore(Op.getOperand(0), SDLoc(Op), FrameIndex, Op.getOperand(1), + return DAG.getStore(Op.getOperand(0), SDLoc(Op), FrameIndex, Ptr, MachinePointerInfo(SV)); } SDValue MSP430TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { JumpTableSDNode *JT = cast(Op); - auto PtrVT = getPointerTy(DAG.getDataLayout()); + EVT PtrVT = Op.getValueType(); SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); return DAG.getNode(MSP430ISD::Wrapper, SDLoc(JT), PtrVT, Result); } diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp index e9e26e295fd5..0646d6faebed 100644 --- a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -197,8 +197,7 @@ bool MSP430InstrInfo::analyzeBranch(MachineBasicBlock &MBB, } // If the block has any instructions after a JMP, delete them. - while (std::next(I) != MBB.end()) - std::next(I)->eraseFromParent(); + MBB.erase(std::next(I), MBB.end()); Cond.clear(); FBB = nullptr; diff --git a/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp b/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp index 1d3a6d118bd6..93b37b523a71 100644 --- a/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp +++ b/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp @@ -11,3 +11,10 @@ using namespace llvm; void MSP430MachineFunctionInfo::anchor() { } + +MachineFunctionInfo *MSP430MachineFunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const { + return DestMF.cloneInfo(*this); +} diff --git a/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h index 261db9e288f5..93b388255877 100644 --- a/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h +++ b/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h @@ -43,6 +43,11 @@ public: explicit MSP430MachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0), ReturnAddrIndex(0), SRetReturnReg(0) {} + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; + unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } diff --git a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp index a33146ce2239..6bba224aab8b 100644 --- a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -27,9 +27,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMSP430Target() { } static Reloc::Model getEffectiveRelocModel(Optional RM) { - if (!RM.hasValue()) - return Reloc::Static; - return *RM; + return RM.value_or(Reloc::Static); } static std::string computeDataLayout(const Triple &TT, StringRef CPU, @@ -51,7 +49,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT, initAsmInfo(); } -MSP430TargetMachine::~MSP430TargetMachine() {} +MSP430TargetMachine::~MSP430TargetMachine() = default; namespace { /// MSP430 Code Generator Pass Configuration Options. diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 736c41f8ac03..b5817d9ae700 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -25,6 +25,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" @@ -3412,10 +3413,10 @@ bool MipsAsmParser::expandLoadSingleImmToFPR(MCInst &Inst, SMLoc IDLoc, const MipsMCExpr *LoExpr = MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); - getStreamer().SwitchSection(ReadOnlySection); + getStreamer().switchSection(ReadOnlySection); getStreamer().emitLabel(Sym, IDLoc); getStreamer().emitInt32(ImmOp32); - getStreamer().SwitchSection(CS); + getStreamer().switchSection(CS); if (emitPartialAddress(TOut, IDLoc, Sym)) return true; @@ -3464,11 +3465,11 @@ bool MipsAsmParser::expandLoadDoubleImmToGPR(MCInst &Inst, SMLoc IDLoc, const MipsMCExpr *LoExpr = MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); - getStreamer().SwitchSection(ReadOnlySection); + getStreamer().switchSection(ReadOnlySection); getStreamer().emitLabel(Sym, IDLoc); getStreamer().emitValueToAlignment(8); getStreamer().emitIntValue(ImmOp64, 8); - getStreamer().SwitchSection(CS); + getStreamer().switchSection(CS); unsigned TmpReg = getATReg(IDLoc); if (!TmpReg) @@ -3547,11 +3548,11 @@ bool MipsAsmParser::expandLoadDoubleImmToFPR(MCInst &Inst, bool Is64FPU, const MipsMCExpr *LoExpr = MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); - getStreamer().SwitchSection(ReadOnlySection); + getStreamer().switchSection(ReadOnlySection); getStreamer().emitLabel(Sym, IDLoc); getStreamer().emitValueToAlignment(8); getStreamer().emitIntValue(ImmOp64, 8); - getStreamer().SwitchSection(CS); + getStreamer().switchSection(CS); if (emitPartialAddress(TOut, IDLoc, Sym)) return true; @@ -8179,7 +8180,7 @@ bool MipsAsmParser::parseRSectionDirective(StringRef Section) { MCSection *ELFSection = getContext().getELFSection( Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); - getParser().getStreamer().SwitchSection(ELFSection); + getParser().getStreamer().switchSection(ELFSection); getParser().Lex(); // Eat EndOfStatement token. return false; @@ -8197,7 +8198,7 @@ bool MipsAsmParser::parseSSectionDirective(StringRef Section, unsigned Type) { MCSection *ELFSection = getContext().getELFSection( Section, Type, ELF::SHF_WRITE | ELF::SHF_ALLOC | ELF::SHF_MIPS_GPREL); - getParser().getStreamer().SwitchSection(ELFSection); + getParser().getStreamer().switchSection(ELFSection); getParser().Lex(); // Eat EndOfStatement token. return false; diff --git a/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 9a66dd77c0d3..4e40a84ecfd0 100644 --- a/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -15,8 +15,8 @@ #include "TargetInfo/MipsTargetInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -79,338 +79,279 @@ public: // Forward declare these because the autogenerated code will reference them. // Definitions are further down. -static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeGPRMM16ZeroRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); +static DecodeStatus +DecodeGPRMM16ZeroRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); -static DecodeStatus DecodeGPRMM16MovePRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); +static DecodeStatus +DecodeGPRMM16MovePRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); -static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeBranchTarget(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeBranchTarget1SImm16(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget1SImm16(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeJumpTarget(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeJumpTarget(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeBranchTarget21(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget21(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeBranchTarget21MM(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget21MM(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeBranchTarget26(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget26(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); // DecodeBranchTarget7MM - Decode microMIPS branch offset, which is // shifted left by 1 bit. -static DecodeStatus DecodeBranchTarget7MM(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget7MM(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); // DecodeBranchTarget10MM - Decode microMIPS branch offset, which is // shifted left by 1 bit. -static DecodeStatus DecodeBranchTarget10MM(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget10MM(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); // DecodeBranchTargetMM - Decode microMIPS branch offset, which is // shifted left by 1 bit. -static DecodeStatus DecodeBranchTargetMM(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTargetMM(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); // DecodeBranchTarget26MM - Decode microMIPS branch offset, which is // shifted left by 1 bit. -static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); // DecodeJumpTargetMM - Decode microMIPS jump target, which is // shifted left by 1 bit. -static DecodeStatus DecodeJumpTargetMM(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeJumpTargetMM(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); // DecodeJumpTargetXMM - Decode microMIPS jump and link exchange target, // which is shifted left by 2 bit. -static DecodeStatus DecodeJumpTargetXMM(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeJumpTargetXMM(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeMem(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeMem(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); -static DecodeStatus DecodeMemEVA(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeMemEVA(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); -static DecodeStatus DecodeLoadByte15(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeLoadByte15(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeCacheOp(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeCacheOpMM(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeCacheOpMM(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodePrefeOpMM(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodePrefeOpMM(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeSyncI(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeSyncI(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); -static DecodeStatus DecodeSyncI_MM(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeSyncI_MM(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeSynciR6(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeSynciR6(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); -static DecodeStatus DecodeMemMMImm4(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeMemMMImm4(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeMemMMGPImm7Lsl2(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeMemMMGPImm7Lsl2(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeMemMMImm9(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeMemMMImm9(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeMemMMImm12(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeMemMMImm12(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeMemMMImm16(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeMemMMImm16(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeFMemMMR2(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeFMem2(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeFMem3(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeFMemCop2R6(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeFMemCop2MMR6(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst, - unsigned Value, +static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst, unsigned Value, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeLi16Imm(MCInst &Inst, - unsigned Value, +static DecodeStatus DecodeLi16Imm(MCInst &Inst, unsigned Value, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst, - unsigned Value, +static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst, unsigned Value, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); template static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); template static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeUImmWithOffsetAndScale(Inst, Value, Address, Decoder); } @@ -418,128 +359,132 @@ static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value, template static DecodeStatus DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeInsSize(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeInsSize(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); -static DecodeStatus DecodeSimm9SP(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSimm9SP(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeANDI16Imm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSimm23Lsl2(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); /// INSVE_[BHWD] have an implicit operand that the generated decoder doesn't /// handle. template static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); template static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); template static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); template -static DecodeStatus -DecodeAddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeAddiGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const MCDisassembler *Decoder); template -static DecodeStatus -DecodePOP35GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder); +static DecodeStatus DecodePOP35GroupBranchMMR6(MCInst &MI, InsnType insn, + uint64_t Address, + const MCDisassembler *Decoder); template -static DecodeStatus -DecodeDaddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeDaddiGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const MCDisassembler *Decoder); template -static DecodeStatus -DecodePOP37GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder); +static DecodeStatus DecodePOP37GroupBranchMMR6(MCInst &MI, InsnType insn, + uint64_t Address, + const MCDisassembler *Decoder); template -static DecodeStatus -DecodePOP65GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder); +static DecodeStatus DecodePOP65GroupBranchMMR6(MCInst &MI, InsnType insn, + uint64_t Address, + const MCDisassembler *Decoder); template -static DecodeStatus -DecodePOP75GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder); +static DecodeStatus DecodePOP75GroupBranchMMR6(MCInst &MI, InsnType insn, + uint64_t Address, + const MCDisassembler *Decoder); template -static DecodeStatus -DecodeBlezlGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const MCDisassembler *Decoder); template -static DecodeStatus -DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const MCDisassembler *Decoder); template -static DecodeStatus -DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const MCDisassembler *Decoder); template -static DecodeStatus -DecodeBlezGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const MCDisassembler *Decoder); template -static DecodeStatus -DecodeBgtzGroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeBgtzGroupBranchMMR6(MCInst &MI, InsnType insn, + uint64_t Address, + const MCDisassembler *Decoder); template -static DecodeStatus -DecodeBlezGroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeBlezGroupBranchMMR6(MCInst &MI, InsnType insn, + uint64_t Address, + const MCDisassembler *Decoder); template static DecodeStatus DecodeDINS(MCInst &MI, InsnType Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); template static DecodeStatus DecodeDEXT(MCInst &MI, InsnType Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); template static DecodeStatus DecodeCRC(MCInst &MI, InsnType Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeMovePOperands(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static MCDisassembler *createMipsDisassembler( const Target &T, @@ -569,16 +514,16 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsDisassembler() { #include "MipsGenDisassemblerTables.inc" -static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { - const MipsDisassembler *Dis = static_cast(D); - const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo(); +static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo) { + const MCRegisterInfo *RegInfo = D->getContext().getRegisterInfo(); return *(RegInfo->getRegClass(RC).begin() + RegNo); } template static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder) { - using DecodeFN = DecodeStatus (*)(MCInst &, unsigned, uint64_t, const void *); + const MCDisassembler *Decoder) { + using DecodeFN = + DecodeStatus (*)(MCInst &, unsigned, uint64_t, const MCDisassembler *); // The size of the n field depends on the element size // The register class also depends on this. @@ -624,7 +569,8 @@ static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, template static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { InsnType Rs = fieldFromInstruction(insn, 16, 5); InsnType Imm = fieldFromInstruction(insn, 0, 16); MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID, @@ -638,7 +584,7 @@ static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn, template static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { InsnType Rs = fieldFromInstruction(insn, 21, 5); InsnType Imm = fieldFromInstruction(insn, 0, 16); MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID, @@ -653,7 +599,7 @@ static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address, template static DecodeStatus DecodeAddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled // (otherwise we would have matched the ADDI instruction from the earlier // ISA's instead). @@ -692,7 +638,7 @@ static DecodeStatus DecodeAddiGroupBranch(MCInst &MI, InsnType insn, template static DecodeStatus DecodePOP35GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { InsnType Rt = fieldFromInstruction(insn, 21, 5); InsnType Rs = fieldFromInstruction(insn, 16, 5); int64_t Imm = 0; @@ -726,7 +672,7 @@ static DecodeStatus DecodePOP35GroupBranchMMR6(MCInst &MI, InsnType insn, template static DecodeStatus DecodeDaddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled // (otherwise we would have matched the ADDI instruction from the earlier // ISA's instead). @@ -765,7 +711,7 @@ static DecodeStatus DecodeDaddiGroupBranch(MCInst &MI, InsnType insn, template static DecodeStatus DecodePOP37GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { InsnType Rt = fieldFromInstruction(insn, 21, 5); InsnType Rs = fieldFromInstruction(insn, 16, 5); int64_t Imm = 0; @@ -799,7 +745,7 @@ static DecodeStatus DecodePOP37GroupBranchMMR6(MCInst &MI, InsnType insn, template static DecodeStatus DecodePOP65GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // We have: // 0b110101 ttttt sssss iiiiiiiiiiiiiiii // Invalid if rt == 0 @@ -838,7 +784,7 @@ static DecodeStatus DecodePOP65GroupBranchMMR6(MCInst &MI, InsnType insn, template static DecodeStatus DecodePOP75GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // We have: // 0b111101 ttttt sssss iiiiiiiiiiiiiiii // Invalid if rt == 0 @@ -877,7 +823,7 @@ static DecodeStatus DecodePOP75GroupBranchMMR6(MCInst &MI, InsnType insn, template static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled // (otherwise we would have matched the BLEZL instruction from the earlier // ISA's instead). @@ -920,7 +866,7 @@ static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn, template static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled // (otherwise we would have matched the BGTZL instruction from the earlier // ISA's instead). @@ -964,7 +910,7 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn, template static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled // (otherwise we would have matched the BGTZ instruction from the earlier // ISA's instead). @@ -1012,8 +958,8 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, template static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn, - uint64_t Address, - const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled // (otherwise we would have matched the BLEZL instruction from the earlier // ISA's instead). @@ -1056,7 +1002,7 @@ static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn, // for feature / behaviour parity with binutils. template static DecodeStatus DecodeDEXT(MCInst &MI, InsnType Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Msbd = fieldFromInstruction(Insn, 11, 5); unsigned Lsb = fieldFromInstruction(Insn, 6, 5); unsigned Size = 0; @@ -1098,7 +1044,7 @@ static DecodeStatus DecodeDEXT(MCInst &MI, InsnType Insn, uint64_t Address, // for feature / behaviour parity with binutils. template static DecodeStatus DecodeDINS(MCInst &MI, InsnType Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Msbd = fieldFromInstruction(Insn, 11, 5); unsigned Lsb = fieldFromInstruction(Insn, 6, 5); unsigned Size = 0; @@ -1140,7 +1086,7 @@ static DecodeStatus DecodeDINS(MCInst &MI, InsnType Insn, uint64_t Address, // Auto-generated decoder wouldn't add the third operand for CRC32*. template static DecodeStatus DecodeCRC(MCInst &MI, InsnType Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { InsnType Rs = fieldFromInstruction(Insn, 21, 5); InsnType Rt = fieldFromInstruction(Insn, 16, 5); MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID, @@ -1384,17 +1330,15 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, return MCDisassembler::Fail; } -static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeCPU16RegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { return MCDisassembler::Fail; } -static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -1403,10 +1347,9 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; unsigned Reg = getReg(Decoder, Mips::GPRMM16RegClassID, RegNo); @@ -1414,10 +1357,9 @@ static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeGPRMM16ZeroRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeGPRMM16ZeroRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; unsigned Reg = getReg(Decoder, Mips::GPRMM16ZeroRegClassID, RegNo); @@ -1425,10 +1367,9 @@ static DecodeStatus DecodeGPRMM16ZeroRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeGPRMM16MovePRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeGPRMM16MovePRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; unsigned Reg = getReg(Decoder, Mips::GPRMM16MovePRegClassID, RegNo); @@ -1436,10 +1377,9 @@ static DecodeStatus DecodeGPRMM16MovePRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; unsigned Reg = getReg(Decoder, Mips::GPR32RegClassID, RegNo); @@ -1447,27 +1387,24 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (static_cast(Decoder)->isGP64()) return DecodeGPR64RegisterClass(Inst, RegNo, Address, Decoder); return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder); } -static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder); } -static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -1476,10 +1413,9 @@ static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -1488,10 +1424,9 @@ static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; unsigned Reg = getReg(Decoder, Mips::CCRRegClassID, RegNo); @@ -1499,10 +1434,9 @@ static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; unsigned Reg = getReg(Decoder, Mips::FCCRegClassID, RegNo); @@ -1512,7 +1446,7 @@ static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst, static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -1521,10 +1455,8 @@ static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeMem(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeMem(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); unsigned Reg = fieldFromInstruction(Insn, 16, 5); unsigned Base = fieldFromInstruction(Insn, 21, 5); @@ -1543,10 +1475,8 @@ static DecodeStatus DecodeMem(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeMemEVA(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeMemEVA(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { int Offset = SignExtend32<9>(Insn >> 7); unsigned Reg = fieldFromInstruction(Insn, 16, 5); unsigned Base = fieldFromInstruction(Insn, 21, 5); @@ -1564,10 +1494,9 @@ static DecodeStatus DecodeMemEVA(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeLoadByte15(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeLoadByte15(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); unsigned Base = fieldFromInstruction(Insn, 16, 5); unsigned Reg = fieldFromInstruction(Insn, 21, 5); @@ -1582,10 +1511,8 @@ static DecodeStatus DecodeLoadByte15(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeCacheOp(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeCacheOp(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); unsigned Hint = fieldFromInstruction(Insn, 16, 5); unsigned Base = fieldFromInstruction(Insn, 21, 5); @@ -1599,10 +1526,9 @@ static DecodeStatus DecodeCacheOp(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeCacheOpMM(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeCacheOpMM(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int Offset = SignExtend32<12>(Insn & 0xfff); unsigned Base = fieldFromInstruction(Insn, 16, 5); unsigned Hint = fieldFromInstruction(Insn, 21, 5); @@ -1616,10 +1542,9 @@ static DecodeStatus DecodeCacheOpMM(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodePrefeOpMM(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodePrefeOpMM(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int Offset = SignExtend32<9>(Insn & 0x1ff); unsigned Base = fieldFromInstruction(Insn, 16, 5); unsigned Hint = fieldFromInstruction(Insn, 21, 5); @@ -1633,10 +1558,9 @@ static DecodeStatus DecodePrefeOpMM(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int Offset = SignExtend32<9>(Insn >> 7); unsigned Hint = fieldFromInstruction(Insn, 16, 5); unsigned Base = fieldFromInstruction(Insn, 21, 5); @@ -1650,10 +1574,8 @@ static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeSyncI(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeSyncI(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); unsigned Base = fieldFromInstruction(Insn, 21, 5); @@ -1666,7 +1588,8 @@ static DecodeStatus DecodeSyncI(MCInst &Inst, } static DecodeStatus DecodeSyncI_MM(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); unsigned Base = fieldFromInstruction(Insn, 16, 5); @@ -1678,10 +1601,8 @@ static DecodeStatus DecodeSyncI_MM(MCInst &Inst, unsigned Insn, return MCDisassembler::Success; } -static DecodeStatus DecodeSynciR6(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeSynciR6(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { int Immediate = SignExtend32<16>(Insn & 0xffff); unsigned Base = fieldFromInstruction(Insn, 16, 5); @@ -1694,7 +1615,8 @@ static DecodeStatus DecodeSynciR6(MCInst &Inst, } static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 16, 10)); unsigned Reg = fieldFromInstruction(Insn, 6, 5); unsigned Base = fieldFromInstruction(Insn, 11, 5); @@ -1739,10 +1661,9 @@ static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn, return MCDisassembler::Success; } -static DecodeStatus DecodeMemMMImm4(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeMemMMImm4(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Offset = Insn & 0xf; unsigned Reg = fieldFromInstruction(Insn, 7, 3); unsigned Base = fieldFromInstruction(Insn, 4, 3); @@ -1797,10 +1718,9 @@ static DecodeStatus DecodeMemMMImm4(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Offset = Insn & 0x1F; unsigned Reg = fieldFromInstruction(Insn, 5, 5); @@ -1813,10 +1733,9 @@ static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeMemMMGPImm7Lsl2(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeMemMMGPImm7Lsl2(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Offset = Insn & 0x7F; unsigned Reg = fieldFromInstruction(Insn, 7, 3); @@ -1829,10 +1748,9 @@ static DecodeStatus DecodeMemMMGPImm7Lsl2(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int Offset; switch (Inst.getOpcode()) { case Mips::LWM16_MMR6: @@ -1854,10 +1772,9 @@ static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeMemMMImm9(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeMemMMImm9(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int Offset = SignExtend32<9>(Insn & 0x1ff); unsigned Reg = fieldFromInstruction(Insn, 21, 5); unsigned Base = fieldFromInstruction(Insn, 16, 5); @@ -1875,10 +1792,9 @@ static DecodeStatus DecodeMemMMImm9(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeMemMMImm12(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeMemMMImm12(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int Offset = SignExtend32<12>(Insn & 0x0fff); unsigned Reg = fieldFromInstruction(Insn, 21, 5); unsigned Base = fieldFromInstruction(Insn, 16, 5); @@ -1910,10 +1826,9 @@ static DecodeStatus DecodeMemMMImm12(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeMemMMImm16(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeMemMMImm16(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); unsigned Reg = fieldFromInstruction(Insn, 21, 5); unsigned Base = fieldFromInstruction(Insn, 16, 5); @@ -1928,10 +1843,8 @@ static DecodeStatus DecodeMemMMImm16(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeFMem(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); unsigned Reg = fieldFromInstruction(Insn, 16, 5); unsigned Base = fieldFromInstruction(Insn, 21, 5); @@ -1947,7 +1860,8 @@ static DecodeStatus DecodeFMem(MCInst &Inst, } static DecodeStatus DecodeFMemMMR2(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // This function is the same as DecodeFMem but with the Reg and Base fields // swapped according to microMIPS spec. int Offset = SignExtend32<16>(Insn & 0xffff); @@ -1964,10 +1878,8 @@ static DecodeStatus DecodeFMemMMR2(MCInst &Inst, unsigned Insn, return MCDisassembler::Success; } -static DecodeStatus DecodeFMem2(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeFMem2(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); unsigned Reg = fieldFromInstruction(Insn, 16, 5); unsigned Base = fieldFromInstruction(Insn, 21, 5); @@ -1982,10 +1894,8 @@ static DecodeStatus DecodeFMem2(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeFMem3(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeFMem3(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); unsigned Reg = fieldFromInstruction(Insn, 16, 5); unsigned Base = fieldFromInstruction(Insn, 21, 5); @@ -2000,10 +1910,9 @@ static DecodeStatus DecodeFMem3(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeFMemCop2R6(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeFMemCop2R6(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { int Offset = SignExtend32<11>(Insn & 0x07ff); unsigned Reg = fieldFromInstruction(Insn, 16, 5); unsigned Base = fieldFromInstruction(Insn, 11, 5); @@ -2019,7 +1928,8 @@ static DecodeStatus DecodeFMemCop2R6(MCInst &Inst, } static DecodeStatus DecodeFMemCop2MMR6(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { int Offset = SignExtend32<11>(Insn & 0x07ff); unsigned Reg = fieldFromInstruction(Insn, 21, 5); unsigned Base = fieldFromInstruction(Insn, 16, 5); @@ -2034,10 +1944,9 @@ static DecodeStatus DecodeFMemCop2MMR6(MCInst &Inst, unsigned Insn, return MCDisassembler::Success; } -static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int64_t Offset = SignExtend64<9>((Insn >> 7) & 0x1ff); unsigned Rt = fieldFromInstruction(Insn, 16, 5); unsigned Base = fieldFromInstruction(Insn, 21, 5); @@ -2056,10 +1965,9 @@ static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // Currently only hardware register 29 is supported. if (RegNo != 29) return MCDisassembler::Fail; @@ -2067,10 +1975,9 @@ static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 30 || RegNo %2) return MCDisassembler::Fail; @@ -2079,10 +1986,9 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= 4) return MCDisassembler::Fail; @@ -2091,10 +1997,9 @@ static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= 4) return MCDisassembler::Fail; @@ -2103,10 +2008,9 @@ static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= 4) return MCDisassembler::Fail; @@ -2115,10 +2019,9 @@ static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -2127,10 +2030,9 @@ static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -2139,10 +2041,9 @@ static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -2151,10 +2052,9 @@ static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -2163,10 +2063,9 @@ static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; @@ -2175,10 +2074,9 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -2187,10 +2085,9 @@ static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -2199,122 +2096,109 @@ static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeBranchTarget(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int32_t BranchOffset = (SignExtend32<16>(Offset) * 4) + 4; Inst.addOperand(MCOperand::createImm(BranchOffset)); return MCDisassembler::Success; } -static DecodeStatus DecodeBranchTarget1SImm16(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget1SImm16(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int32_t BranchOffset = (SignExtend32<16>(Offset) * 2); Inst.addOperand(MCOperand::createImm(BranchOffset)); return MCDisassembler::Success; } -static DecodeStatus DecodeJumpTarget(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeJumpTarget(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2; Inst.addOperand(MCOperand::createImm(JumpOffset)); return MCDisassembler::Success; } -static DecodeStatus DecodeBranchTarget21(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget21(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int32_t BranchOffset = SignExtend32<21>(Offset) * 4 + 4; Inst.addOperand(MCOperand::createImm(BranchOffset)); return MCDisassembler::Success; } -static DecodeStatus DecodeBranchTarget21MM(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget21MM(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int32_t BranchOffset = SignExtend32<21>(Offset) * 4 + 4; Inst.addOperand(MCOperand::createImm(BranchOffset)); return MCDisassembler::Success; } -static DecodeStatus DecodeBranchTarget26(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget26(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int32_t BranchOffset = SignExtend32<26>(Offset) * 4 + 4; Inst.addOperand(MCOperand::createImm(BranchOffset)); return MCDisassembler::Success; } -static DecodeStatus DecodeBranchTarget7MM(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget7MM(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int32_t BranchOffset = SignExtend32<8>(Offset << 1); Inst.addOperand(MCOperand::createImm(BranchOffset)); return MCDisassembler::Success; } -static DecodeStatus DecodeBranchTarget10MM(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTarget10MM(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int32_t BranchOffset = SignExtend32<11>(Offset << 1); Inst.addOperand(MCOperand::createImm(BranchOffset)); return MCDisassembler::Success; } -static DecodeStatus DecodeBranchTargetMM(MCInst &Inst, - unsigned Offset, +static DecodeStatus DecodeBranchTargetMM(MCInst &Inst, unsigned Offset, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { int32_t BranchOffset = SignExtend32<16>(Offset) * 2 + 4; Inst.addOperand(MCOperand::createImm(BranchOffset)); return MCDisassembler::Success; } -static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst, - unsigned Offset, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst, unsigned Offset, + uint64_t Address, + const MCDisassembler *Decoder) { int32_t BranchOffset = SignExtend32<27>(Offset << 1); Inst.addOperand(MCOperand::createImm(BranchOffset)); return MCDisassembler::Success; } -static DecodeStatus DecodeJumpTargetMM(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeJumpTargetMM(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 1; Inst.addOperand(MCOperand::createImm(JumpOffset)); return MCDisassembler::Success; } -static DecodeStatus DecodeJumpTargetXMM(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeJumpTargetXMM(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2; Inst.addOperand(MCOperand::createImm(JumpOffset)); return MCDisassembler::Success; } -static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst, - unsigned Value, +static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst, unsigned Value, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (Value == 0) Inst.addOperand(MCOperand::createImm(1)); else if (Value == 0x7) @@ -2324,10 +2208,9 @@ static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeLi16Imm(MCInst &Inst, - unsigned Value, +static DecodeStatus DecodeLi16Imm(MCInst &Inst, unsigned Value, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (Value == 0x7F) Inst.addOperand(MCOperand::createImm(-1)); else @@ -2335,18 +2218,17 @@ static DecodeStatus DecodeLi16Imm(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst, - unsigned Value, +static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst, unsigned Value, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(Value == 0x0 ? 8 : Value)); return MCDisassembler::Success; } template -static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value, uint64_t Address, + const MCDisassembler *Decoder) { Value &= ((1 << Bits) - 1); Value *= Scale; Inst.addOperand(MCOperand::createImm(Value + Offset)); @@ -2354,18 +2236,16 @@ static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value, } template -static DecodeStatus DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value, uint64_t Address, + const MCDisassembler *Decoder) { int32_t Imm = SignExtend32(Value) * ScaleBy; Inst.addOperand(MCOperand::createImm(Imm + Offset)); return MCDisassembler::Success; } -static DecodeStatus DecodeInsSize(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeInsSize(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { // First we need to grab the pos(lsb) from MCInst. // This function only handles the 32 bit variants of ins, as dins // variants are handled differently. @@ -2376,19 +2256,21 @@ static DecodeStatus DecodeInsSize(MCInst &Inst, } static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(SignExtend32<19>(Insn) * 4)); return MCDisassembler::Success; } static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(SignExtend32<18>(Insn) * 8)); return MCDisassembler::Success; } -static DecodeStatus DecodeSimm9SP(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeSimm9SP(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { int32_t DecodedValue; switch (Insn) { case 0: DecodedValue = 256; break; @@ -2402,7 +2284,8 @@ static DecodeStatus DecodeSimm9SP(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeANDI16Imm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // Insn must be >= 0, since it is unsigned that condition is always true. assert(Insn < 16); int32_t DecodedValues[] = {128, 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, @@ -2411,10 +2294,9 @@ static DecodeStatus DecodeANDI16Imm(MCInst &Inst, unsigned Insn, return MCDisassembler::Success; } -static DecodeStatus DecodeRegListOperand(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7, Mips::FP}; unsigned RegNum; @@ -2442,7 +2324,7 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3}; unsigned RegLst; switch(Inst.getOpcode()) { @@ -2465,8 +2347,8 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeMovePOperands(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned RegPair = fieldFromInstruction(Insn, 7, 3); if (DecodeMovePRegPair(Inst, RegPair, Address, Decoder) == MCDisassembler::Fail) @@ -2491,7 +2373,8 @@ static DecodeStatus DecodeMovePOperands(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { switch (RegPair) { default: return MCDisassembler::Fail; @@ -2533,15 +2416,16 @@ static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair, } static DecodeStatus DecodeSimm23Lsl2(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(SignExtend32<25>(Insn << 2))); return MCDisassembler::Success; } template static DecodeStatus DecodeBgtzGroupBranchMMR6(MCInst &MI, InsnType insn, - uint64_t Address, - const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // We have: // 0b000111 ttttt sssss iiiiiiiiiiiiiiii // Invalid if rt == 0 @@ -2589,8 +2473,8 @@ static DecodeStatus DecodeBgtzGroupBranchMMR6(MCInst &MI, InsnType insn, template static DecodeStatus DecodeBlezGroupBranchMMR6(MCInst &MI, InsnType insn, - uint64_t Address, - const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // We have: // 0b000110 ttttt sssss iiiiiiiiiiiiiiii // Invalid if rt == 0 diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h index 6091ee24b04d..1a5bb64863ee 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h @@ -9,7 +9,6 @@ #ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H #define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H -#include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MipsABIFlags.h" #include @@ -17,6 +16,7 @@ namespace llvm { class MCStreamer; +class StringRef; struct MipsABIFlagsSection { // Internal representation of the fp_abi related values used in .module. diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp index 3315a8ba18d6..227947d2766e 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp @@ -9,8 +9,10 @@ #include "MipsABIInfo.h" #include "MipsRegisterInfo.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/LowLevelTypeImpl.h" using namespace llvm; diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index a3dbe6f84a1e..8050f9b8cae0 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -301,6 +301,15 @@ void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, } Optional MipsAsmBackend::getFixupKind(StringRef Name) const { + unsigned Type = llvm::StringSwitch(Name) + .Case("BFD_RELOC_NONE", ELF::R_MIPS_NONE) + .Case("BFD_RELOC_16", ELF::R_MIPS_16) + .Case("BFD_RELOC_32", ELF::R_MIPS_32) + .Case("BFD_RELOC_64", ELF::R_MIPS_64) + .Default(-1u); + if (Type != -1u) + return static_cast(FirstLiteralRelocationKind + Type); + return StringSwitch>(Name) .Case("R_MIPS_NONE", FK_NONE) .Case("R_MIPS_32", FK_Data_4) @@ -502,6 +511,8 @@ getFixupKindInfo(MCFixupKind Kind) const { static_assert(array_lengthof(BigEndianInfos) == Mips::NumTargetFixupKinds, "Not all MIPS big endian fixup kinds added!"); + if (Kind >= FirstLiteralRelocationKind) + return MCAsmBackend::getFixupKindInfo(FK_NONE); if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); @@ -534,6 +545,8 @@ bool MipsAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, bool MipsAsmBackend::shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target) { + if (Fixup.getKind() >= FirstLiteralRelocationKind) + return true; const unsigned FixupKind = Fixup.getKind(); switch (FixupKind) { default: diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 9c317e3f8840..4990696fcfe0 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -220,6 +220,8 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx, bool IsPCRel) const { // Determine the type of the relocation. unsigned Kind = Fixup.getTargetKind(); + if (Kind >= FirstLiteralRelocationKind) + return Kind - FirstLiteralRelocationKind; switch (Kind) { case FK_NONE: diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index e6e32ec7f27c..9843b6144343 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -90,9 +90,9 @@ void MipsELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) { Labels.push_back(Symbol); } -void MipsELFStreamer::SwitchSection(MCSection *Section, +void MipsELFStreamer::switchSection(MCSection *Section, const MCExpr *Subsection) { - MCELFStreamer::SwitchSection(Section, Subsection); + MCELFStreamer::switchSection(Section, Subsection); Labels.clear(); } diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h index f6a2c039c0c3..ac70e40d4dfe 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -50,7 +50,7 @@ public: /// Overriding this function allows us to dismiss all labels that are /// candidates for marking as microMIPS when .section directive is processed. - void SwitchSection(MCSection *Section, + void switchSection(MCSection *Section, const MCExpr *Subsection = nullptr) override; /// Overriding these functions allows us to dismiss all labels that are diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp index 3700d6309e1a..632192103d38 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp @@ -88,29 +88,30 @@ void MipsInstPrinter::printInst(const MCInst *MI, uint64_t Address, break; case Mips::Save16: O << "\tsave\t"; - printSaveRestore(MI, O); + printSaveRestore(MI, STI, O); O << " # 16 bit inst\n"; return; case Mips::SaveX16: O << "\tsave\t"; - printSaveRestore(MI, O); + printSaveRestore(MI, STI, O); O << "\n"; return; case Mips::Restore16: O << "\trestore\t"; - printSaveRestore(MI, O); + printSaveRestore(MI, STI, O); O << " # 16 bit inst\n"; return; case Mips::RestoreX16: O << "\trestore\t"; - printSaveRestore(MI, O); + printSaveRestore(MI, STI, O); O << "\n"; return; } // Try to print any aliases first. - if (!printAliasInstr(MI, Address, O) && !printAlias(*MI, O)) - printInstruction(MI, Address, O); + if (!printAliasInstr(MI, Address, STI, O) && + !printAlias(*MI, Address, STI, O)) + printInstruction(MI, Address, STI, O); printAnnotation(O, Annot); switch (MI->getOpcode()) { @@ -123,7 +124,7 @@ void MipsInstPrinter::printInst(const MCInst *MI, uint64_t Address, } void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { printRegName(O, Op.getReg()); @@ -139,8 +140,42 @@ void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, Op.getExpr()->print(O, &MAI, true); } +void MipsInstPrinter::printJumpOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (!Op.isImm()) + return printOperand(MI, OpNo, STI, O); + + if (PrintBranchImmAsAddress) + O << formatHex(Op.getImm()); + else + O << formatImm(Op.getImm()); +} + +void MipsInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address, + unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (!Op.isImm()) + return printOperand(MI, OpNo, STI, O); + + if (PrintBranchImmAsAddress) { + uint64_t Target = Address + Op.getImm(); + if (STI.hasFeature(Mips::FeatureMips32)) + Target &= 0xffffffff; + else if (STI.hasFeature(Mips::FeatureMips16)) + Target &= 0xffff; + O << formatHex(Target); + } else { + O << formatImm(Op.getImm()); + } +} + template -void MipsInstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) { +void MipsInstPrinter::printUImm(const MCInst *MI, int opNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(opNum); if (MO.isImm()) { uint64_t Imm = MO.getImm(); @@ -151,11 +186,12 @@ void MipsInstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) { return; } - printOperand(MI, opNum, O); + printOperand(MI, opNum, STI, O); } -void MipsInstPrinter:: -printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) { +void MipsInstPrinter::printMemOperand(const MCInst *MI, int opNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { // Load/Store memory operands -- imm($reg) // If PIC target the target is loaded as the // pattern lw $25,%call16($28) @@ -175,24 +211,26 @@ printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) { break; } - printOperand(MI, opNum+1, O); + printOperand(MI, opNum + 1, STI, O); O << "("; - printOperand(MI, opNum, O); + printOperand(MI, opNum, STI, O); O << ")"; } -void MipsInstPrinter:: -printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) { +void MipsInstPrinter::printMemOperandEA(const MCInst *MI, int opNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { // when using stack locations for not load/store instructions // print the same way as all normal 3 operand instructions. - printOperand(MI, opNum, O); + printOperand(MI, opNum, STI, O); O << ", "; - printOperand(MI, opNum+1, O); + printOperand(MI, opNum + 1, STI, O); } -void MipsInstPrinter:: -printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) { - const MCOperand& MO = MI->getOperand(opNum); +void MipsInstPrinter::printFCCOperand(const MCInst *MI, int opNum, + const MCSubtargetInfo & /* STI */, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(opNum); O << MipsFCCToString((Mips::CondCode)MO.getImm()); } @@ -202,82 +240,116 @@ printSHFMask(const MCInst *MI, int opNum, raw_ostream &O) { } bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI, - unsigned OpNo, raw_ostream &OS) { + uint64_t Address, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &OS, + bool IsBranch) { OS << "\t" << Str << "\t"; - printOperand(&MI, OpNo, OS); + if (IsBranch) + printBranchOperand(&MI, Address, OpNo, STI, OS); + else + printOperand(&MI, OpNo, STI, OS); return true; } bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI, - unsigned OpNo0, unsigned OpNo1, - raw_ostream &OS) { - printAlias(Str, MI, OpNo0, OS); + uint64_t Address, unsigned OpNo0, + unsigned OpNo1, const MCSubtargetInfo &STI, + raw_ostream &OS, bool IsBranch) { + printAlias(Str, MI, Address, OpNo0, STI, OS, IsBranch); OS << ", "; - printOperand(&MI, OpNo1, OS); + if (IsBranch) + printBranchOperand(&MI, Address, OpNo1, STI, OS); + else + printOperand(&MI, OpNo1, STI, OS); return true; } -bool MipsInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) { +bool MipsInstPrinter::printAlias(const MCInst &MI, uint64_t Address, + const MCSubtargetInfo &STI, raw_ostream &OS) { switch (MI.getOpcode()) { case Mips::BEQ: case Mips::BEQ_MM: // beq $zero, $zero, $L2 => b $L2 // beq $r0, $zero, $L2 => beqz $r0, $L2 return (isReg(MI, 0) && isReg(MI, 1) && - printAlias("b", MI, 2, OS)) || - (isReg(MI, 1) && printAlias("beqz", MI, 0, 2, OS)); + printAlias("b", MI, Address, 2, STI, OS, true)) || + (isReg(MI, 1) && + printAlias("beqz", MI, Address, 0, 2, STI, OS, true)); case Mips::BEQ64: // beq $r0, $zero, $L2 => beqz $r0, $L2 - return isReg(MI, 1) && printAlias("beqz", MI, 0, 2, OS); + return isReg(MI, 1) && + printAlias("beqz", MI, Address, 0, 2, STI, OS, true); case Mips::BNE: case Mips::BNE_MM: // bne $r0, $zero, $L2 => bnez $r0, $L2 - return isReg(MI, 1) && printAlias("bnez", MI, 0, 2, OS); + return isReg(MI, 1) && + printAlias("bnez", MI, Address, 0, 2, STI, OS, true); case Mips::BNE64: // bne $r0, $zero, $L2 => bnez $r0, $L2 - return isReg(MI, 1) && printAlias("bnez", MI, 0, 2, OS); + return isReg(MI, 1) && + printAlias("bnez", MI, Address, 0, 2, STI, OS, true); case Mips::BGEZAL: // bgezal $zero, $L1 => bal $L1 - return isReg(MI, 0) && printAlias("bal", MI, 1, OS); + return isReg(MI, 0) && + printAlias("bal", MI, Address, 1, STI, OS, true); case Mips::BC1T: // bc1t $fcc0, $L1 => bc1t $L1 - return isReg(MI, 0) && printAlias("bc1t", MI, 1, OS); + return isReg(MI, 0) && + printAlias("bc1t", MI, Address, 1, STI, OS, true); case Mips::BC1F: // bc1f $fcc0, $L1 => bc1f $L1 - return isReg(MI, 0) && printAlias("bc1f", MI, 1, OS); + return isReg(MI, 0) && + printAlias("bc1f", MI, Address, 1, STI, OS, true); case Mips::JALR: + // jalr $zero, $r1 => jr $r1 // jalr $ra, $r1 => jalr $r1 - return isReg(MI, 0) && printAlias("jalr", MI, 1, OS); + return (isReg(MI, 0) && + printAlias("jr", MI, Address, 1, STI, OS)) || + (isReg(MI, 0) && + printAlias("jalr", MI, Address, 1, STI, OS)); case Mips::JALR64: + // jalr $zero, $r1 => jr $r1 // jalr $ra, $r1 => jalr $r1 - return isReg(MI, 0) && printAlias("jalr", MI, 1, OS); + return (isReg(MI, 0) && + printAlias("jr", MI, Address, 1, STI, OS)) || + (isReg(MI, 0) && + printAlias("jalr", MI, Address, 1, STI, OS)); case Mips::NOR: case Mips::NOR_MM: case Mips::NOR_MMR6: // nor $r0, $r1, $zero => not $r0, $r1 - return isReg(MI, 2) && printAlias("not", MI, 0, 1, OS); + return isReg(MI, 2) && + printAlias("not", MI, Address, 0, 1, STI, OS); case Mips::NOR64: // nor $r0, $r1, $zero => not $r0, $r1 - return isReg(MI, 2) && printAlias("not", MI, 0, 1, OS); + return isReg(MI, 2) && + printAlias("not", MI, Address, 0, 1, STI, OS); case Mips::OR: + case Mips::ADDu: // or $r0, $r1, $zero => move $r0, $r1 - return isReg(MI, 2) && printAlias("move", MI, 0, 1, OS); - default: return false; + // addu $r0, $r1, $zero => move $r0, $r1 + return isReg(MI, 2) && + printAlias("move", MI, Address, 0, 1, STI, OS); + default: + return false; } } -void MipsInstPrinter::printSaveRestore(const MCInst *MI, raw_ostream &O) { +void MipsInstPrinter::printSaveRestore(const MCInst *MI, + const MCSubtargetInfo &STI, + raw_ostream &O) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (i != 0) O << ", "; if (MI->getOperand(i).isReg()) printRegName(O, MI->getOperand(i).getReg()); else - printUImm<16>(MI, i, O); + printUImm<16>(MI, i, STI, O); } } -void MipsInstPrinter:: -printRegisterList(const MCInst *MI, int opNum, raw_ostream &O) { +void MipsInstPrinter::printRegisterList(const MCInst *MI, int opNum, + const MCSubtargetInfo & /* STI */, + raw_ostream &O) { // - 2 because register List is always first operand of instruction and it is // always followed by memory operand (base + offset). for (int i = opNum, e = MI->getNumOperands() - 2; i != e; ++i) { diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h index 68b13bf1fcc3..d91612b15a1a 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h @@ -80,38 +80,50 @@ public: // Autogenerated by tblgen. std::pair getMnemonic(const MCInst *MI) override; - void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); + void printInstruction(const MCInst *MI, uint64_t Address, + const MCSubtargetInfo &STI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); void printRegName(raw_ostream &OS, unsigned RegNo) const override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS); + bool printAliasInstr(const MCInst *MI, uint64_t Address, + const MCSubtargetInfo &STI, raw_ostream &OS); void printCustomAliasOperand(const MCInst *MI, uint64_t Address, unsigned OpIdx, unsigned PrintMethodIdx, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); private: - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum, - raw_ostream &O) { - printOperand(MI, OpNum, O); - } + void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); + void printJumpOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printBranchOperand(const MCInst *MI, uint64_t Address, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); template - void printUImm(const MCInst *MI, int opNum, raw_ostream &O); - void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O); - void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O); - void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O); + void printUImm(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, + raw_ostream &O); + void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, + raw_ostream &O); + void printMemOperandEA(const MCInst *MI, int opNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printFCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, + raw_ostream &O); void printSHFMask(const MCInst *MI, int opNum, raw_ostream &O); - bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo, - raw_ostream &OS); - bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo0, - unsigned OpNo1, raw_ostream &OS); - bool printAlias(const MCInst &MI, raw_ostream &OS); - void printSaveRestore(const MCInst *MI, raw_ostream &O); - void printRegisterList(const MCInst *MI, int opNum, raw_ostream &O); + bool printAlias(const char *Str, const MCInst &MI, uint64_t Address, + unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &OS, + bool IsBranch = false); + bool printAlias(const char *Str, const MCInst &MI, uint64_t Address, + unsigned OpNo0, unsigned OpNo1, const MCSubtargetInfo &STI, + raw_ostream &OS, bool IsBranch = false); + bool printAlias(const MCInst &MI, uint64_t Address, + const MCSubtargetInfo &STI, raw_ostream &OS); + void printSaveRestore(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); + void printRegisterList(const MCInst *MI, int opNum, + const MCSubtargetInfo &STI, raw_ostream &O); }; } // end namespace llvm diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index b81ebedfb9c7..cf311337d5eb 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -42,13 +42,11 @@ using namespace llvm; namespace llvm { MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new MipsMCCodeEmitter(MCII, Ctx, false); } MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new MipsMCCodeEmitter(MCII, Ctx, true); } diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index b7ecb0fdca5e..8531177ee924 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -31,10 +31,8 @@ class Target; class Triple; MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCAsmBackend *createMipsAsmBackend(const Target &T, const MCSubtargetInfo &STI, diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp index befa883d5877..f1aa90d24023 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp @@ -24,7 +24,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() { MipsTargetStreamer *MTS = static_cast(Streamer->getTargetStreamer()); - Streamer->PushSection(); + Streamer->pushSection(); // We need to distinguish between N64 and the rest because at the moment // we don't emit .Mips.options for other ELFs other than N64. @@ -38,7 +38,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() { ELF::SHF_ALLOC | ELF::SHF_MIPS_NOSTRIP, 1); MCA.registerSection(*Sec); Sec->setAlignment(Align(8)); - Streamer->SwitchSection(Sec); + Streamer->switchSection(Sec); Streamer->emitInt8(ELF::ODK_REGINFO); // kind Streamer->emitInt8(40); // size @@ -56,7 +56,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() { ELF::SHF_ALLOC, 24); MCA.registerSection(*Sec); Sec->setAlignment(MTS->getABI().IsN32() ? Align(8) : Align(4)); - Streamer->SwitchSection(Sec); + Streamer->switchSection(Sec); Streamer->emitInt32(ri_gprmask); Streamer->emitInt32(ri_cprmask[0]); @@ -67,7 +67,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() { Streamer->emitInt32(ri_gp_value); } - Streamer->PopSection(); + Streamer->popSection(); } void MipsRegInfoRecord::SetPhysRegUsed(unsigned Reg, diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 57cd016da4dc..caae5890fae1 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -11,17 +11,19 @@ //===----------------------------------------------------------------------===// #include "MipsTargetStreamer.h" -#include "MipsInstPrinter.h" #include "MCTargetDesc/MipsABIInfo.h" #include "MipsELFStreamer.h" +#include "MipsInstPrinter.h" #include "MipsMCExpr.h" #include "MipsMCTargetDesc.h" #include "MipsTargetObjectFile.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" @@ -38,6 +40,10 @@ static bool isMicroMips(const MCSubtargetInfo *STI) { return STI->getFeatureBits()[Mips::FeatureMicroMips]; } +static bool isMips32r6(const MCSubtargetInfo *STI) { + return STI->getFeatureBits()[Mips::FeatureMips32r6]; +} + MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S) : MCTargetStreamer(S), GPReg(Mips::GP), ModuleDirectiveAllowed(true) { GPRInfoSet = FPRInfoSet = FrameInfoSet = false; @@ -277,10 +283,18 @@ void MipsTargetStreamer::emitDSLL(unsigned DstReg, unsigned SrcReg, void MipsTargetStreamer::emitEmptyDelaySlot(bool hasShortDelaySlot, SMLoc IDLoc, const MCSubtargetInfo *STI) { - if (hasShortDelaySlot) - emitRR(Mips::MOVE16_MM, Mips::ZERO, Mips::ZERO, IDLoc, STI); - else - emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI); + // The default case of `nop` is `sll $zero, $zero, 0`. + unsigned Opc = Mips::SLL; + if (isMicroMips(STI) && hasShortDelaySlot) { + Opc = isMips32r6(STI) ? Mips::MOVE16_MMR6 : Mips::MOVE16_MM; + emitRR(Opc, Mips::ZERO, Mips::ZERO, IDLoc, STI); + return; + } + + if (isMicroMips(STI)) + Opc = isMips32r6(STI) ? Mips::SLL_MMR6 : Mips::SLL_MM; + + emitRRI(Opc, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI); } void MipsTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) { @@ -900,8 +914,8 @@ void MipsTargetELFStreamer::finish() { unsigned Alignment = Section.getAlignment(); if (Alignment) { - OS.SwitchSection(&Section); - if (Section.UseCodeAlign()) + OS.switchSection(&Section); + if (Section.useCodeAlign()) OS.emitCodeAlignment(Alignment, &STI, Alignment); else OS.emitValueToAlignment(Alignment, 0, 1, Alignment); @@ -1012,9 +1026,9 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) { MCA.registerSection(*Sec); Sec->setAlignment(Align(4)); - OS.PushSection(); + OS.pushSection(); - OS.SwitchSection(Sec); + OS.switchSection(Sec); OS.emitValueImpl(ExprRef, 4); @@ -1032,7 +1046,7 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) { // the information gathered up until this point. GPRInfoSet = FPRInfoSet = FrameInfoSet = false; - OS.PopSection(); + OS.popSection(); // .end also implicitly sets the size. MCSymbol *CurPCSym = Context.createTempSymbol(); @@ -1312,7 +1326,7 @@ void MipsTargetELFStreamer::emitMipsAbiFlags() { ".MIPS.abiflags", ELF::SHT_MIPS_ABIFLAGS, ELF::SHF_ALLOC, 24); MCA.registerSection(*Sec); Sec->setAlignment(Align(8)); - OS.SwitchSection(Sec); + OS.switchSection(Sec); OS << ABIFlagsSection; } diff --git a/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td index b1a05388884b..26cc6ac4dd38 100644 --- a/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td +++ b/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td @@ -15,6 +15,7 @@ def brtarget21_mm : Operand { let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTarget21MM"; let ParserMatchClass = MipsJumpTargetAsmOperand; + let PrintMethod = "printBranchOperand"; } def brtarget26_mm : Operand { @@ -22,6 +23,7 @@ def brtarget26_mm : Operand { let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTarget26MM"; let ParserMatchClass = MipsJumpTargetAsmOperand; + let PrintMethod = "printBranchOperand"; } def brtargetr6 : Operand { @@ -29,6 +31,7 @@ def brtargetr6 : Operand { let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTargetMM"; let ParserMatchClass = MipsJumpTargetAsmOperand; + let PrintMethod = "printBranchOperand"; } def brtarget_lsl2_mm : Operand { @@ -38,6 +41,7 @@ def brtarget_lsl2_mm : Operand { // set with DecodeDisambiguates let DecoderMethod = ""; let ParserMatchClass = MipsJumpTargetAsmOperand; + let PrintMethod = "printBranchOperand"; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/llvm/lib/Target/Mips/MicroMipsInstrFPU.td index eea4d7746fa6..d5fc30cef695 100644 --- a/llvm/lib/Target/Mips/MicroMipsInstrFPU.td +++ b/llvm/lib/Target/Mips/MicroMipsInstrFPU.td @@ -278,18 +278,32 @@ let DecoderNamespace = "MicroMips" in { } let DecoderNamespace = "MicroMips", DecoderMethod = "DecodeFMemMMR2" in { - def LDC1_MM : MMRel, LW_FT<"ldc1", AFGR64Opnd, mem_mm_16, II_LDC1, load>, - LW_FM_MM<0x2f>, ISA_MICROMIPS, FGR_32 { + def LDC1_MM_D32 : MMRel, LW_FT<"ldc1", AFGR64Opnd, mem_mm_16, II_LDC1, load>, + LW_FM_MM<0x2f>, ISA_MICROMIPS, FGR_32 { let BaseOpcode = "LDC132"; } - def SDC1_MM : MMRel, SW_FT<"sdc1", AFGR64Opnd, mem_mm_16, II_SDC1, store>, - LW_FM_MM<0x2e>, ISA_MICROMIPS, FGR_32; + def SDC1_MM_D32 : MMRel, SW_FT<"sdc1", AFGR64Opnd, mem_mm_16, II_SDC1, store>, + LW_FM_MM<0x2e>, ISA_MICROMIPS, FGR_32 { + let BaseOpcode = "SDC164"; + } def LWC1_MM : MMRel, LW_FT<"lwc1", FGR32Opnd, mem_mm_16, II_LWC1, load>, LW_FM_MM<0x27>, ISA_MICROMIPS; def SWC1_MM : MMRel, SW_FT<"swc1", FGR32Opnd, mem_mm_16, II_SWC1, store>, LW_FM_MM<0x26>, ISA_MICROMIPS; } +let DecoderNamespace = "Mips64", DecoderMethod = "DecodeFMemMMR2" in { + def LDC1_MM_D64 : MMRel, LW_FT<"ldc1", FGR64Opnd, mem_mm_16, II_LDC1, load>, + LW_FM_MM<0x2f>, ISA_MICROMIPS, FGR_64 { + let BaseOpcode = "LDC164"; + } + def SDC1_MM_D64 : MMRel, SW_FT<"sdc1", FGR64Opnd, mem_mm_16, II_SDC1, store>, + LW_FM_MM<0x2e>, ISA_MICROMIPS, FGR_64 { + let BaseOpcode = "SDC164"; + } +} + + multiclass C_COND_MM fmt, InstrItinClass itin> { def C_F_#NAME#_MM : MMRel, C_COND_FT<"f", TypeStr, RC, itin>, @@ -400,8 +414,10 @@ let AdditionalPredicates = [NoNaNsFPMath, HasMadd4, // Patterns for loads/stores with a reg+imm operand. let AddedComplexity = 40 in { - def : LoadRegImmPat, ISA_MICROMIPS, FGR_32; - def : StoreRegImmPat, ISA_MICROMIPS, FGR_32; + def : LoadRegImmPat, ISA_MICROMIPS, FGR_32; + def : StoreRegImmPat, ISA_MICROMIPS, FGR_32; + def : LoadRegImmPat, ISA_MICROMIPS, FGR_64; + def : StoreRegImmPat, ISA_MICROMIPS, FGR_64; def : LoadRegImmPat, ISA_MICROMIPS; def : StoreRegImmPat, ISA_MICROMIPS; } diff --git a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td index 5f6354e19ebc..43b8eb7faf0e 100644 --- a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td @@ -163,10 +163,12 @@ def mem_mm_4sp : Operand { def jmptarget_mm : Operand { let EncoderMethod = "getJumpTargetOpValueMM"; + let PrintMethod = "printJumpOperand"; } def calltarget_mm : Operand { let EncoderMethod = "getJumpTargetOpValueMM"; + let PrintMethod = "printJumpOperand"; } def brtarget7_mm : Operand { @@ -174,6 +176,7 @@ def brtarget7_mm : Operand { let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTarget7MM"; let ParserMatchClass = MipsJumpTargetAsmOperand; + let PrintMethod = "printBranchOperand"; } def brtarget10_mm : Operand { @@ -181,6 +184,7 @@ def brtarget10_mm : Operand { let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTarget10MM"; let ParserMatchClass = MipsJumpTargetAsmOperand; + let PrintMethod = "printBranchOperand"; } def brtarget_mm : Operand { @@ -188,6 +192,7 @@ def brtarget_mm : Operand { let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTargetMM"; let ParserMatchClass = MipsJumpTargetAsmOperand; + let PrintMethod = "printBranchOperand"; } def simm23_lsl2 : Operand { diff --git a/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp b/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp index 55d3c59cbf03..b0de8dacf691 100644 --- a/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp +++ b/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp @@ -774,7 +774,7 @@ bool MicroMipsSizeReduce::ReplaceInstruction(MachineInstr *MI, bool MicroMipsSizeReduce::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &static_cast(MF.getSubtarget()); + Subtarget = &MF.getSubtarget(); // TODO: Add support for the subtarget microMIPS32R6. if (!Subtarget->inMicroMipsMode() || !Subtarget->hasMips32r2() || diff --git a/llvm/lib/Target/Mips/Mips.h b/llvm/lib/Target/Mips/Mips.h index faf58545db62..12dc29bbfe85 100644 --- a/llvm/lib/Target/Mips/Mips.h +++ b/llvm/lib/Target/Mips/Mips.h @@ -38,6 +38,7 @@ namespace llvm { FunctionPass *createMicroMipsSizeReducePass(); FunctionPass *createMipsExpandPseudoPass(); FunctionPass *createMipsPreLegalizeCombiner(); + FunctionPass *createMipsPostLegalizeCombiner(bool IsOptNone); FunctionPass *createMipsMulMulBugPass(); InstructionSelector *createMipsInstructionSelector(const MipsTargetMachine &, @@ -48,6 +49,7 @@ namespace llvm { void initializeMipsBranchExpansionPass(PassRegistry &); void initializeMicroMipsSizeReducePass(PassRegistry &); void initializeMipsPreLegalizerCombinerPass(PassRegistry&); + void initializeMipsPostLegalizerCombinerPass(PassRegistry &); void initializeMipsMulMulBugFixPass(PassRegistry&); } // end namespace llvm; diff --git a/llvm/lib/Target/Mips/Mips.td b/llvm/lib/Target/Mips/Mips.td index 792960332bcc..398c38e678ba 100644 --- a/llvm/lib/Target/Mips/Mips.td +++ b/llvm/lib/Target/Mips/Mips.td @@ -217,6 +217,7 @@ include "MipsSchedule.td" include "MipsInstrInfo.td" include "MipsCallingConv.td" include "MipsRegisterBanks.td" +include "MipsCombine.td" // Avoid forward declaration issues. include "MipsScheduleP5600.td" @@ -267,8 +268,13 @@ def MipsAsmParserVariant : AsmParserVariant { string RegisterPrefix = "$"; } +def MipsAsmWriter : AsmWriter { + int PassSubtarget = 1; +} + def Mips : Target { let InstructionSet = MipsInstrInfo; + let AssemblyWriters = [MipsAsmWriter]; let AssemblyParsers = [MipsAsmParser]; let AssemblyParserVariants = [MipsAsmParserVariant]; let AllowRegisterRenaming = 1; diff --git a/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp index 50147c019bfd..ce04124a7b00 100644 --- a/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -35,7 +35,7 @@ using namespace llvm; #define DEBUG_TYPE "mips-isel" bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &static_cast(MF.getSubtarget()); + Subtarget = &MF.getSubtarget(); if (!Subtarget->inMips16Mode()) return false; return MipsDAGToDAGISel::runOnMachineFunction(MF); diff --git a/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp index 563118dfe627..b7b1d74e66ed 100644 --- a/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -37,7 +37,7 @@ using namespace llvm; #define DEBUG_TYPE "mips16-registerinfo" -Mips16RegisterInfo::Mips16RegisterInfo() {} +Mips16RegisterInfo::Mips16RegisterInfo() = default; bool Mips16RegisterInfo::requiresRegisterScavenging (const MachineFunction &MF) const { diff --git a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td index 192d0013d89c..0ae946160477 100644 --- a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td @@ -39,6 +39,7 @@ def brtarget21 : Operand { let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTarget21"; let ParserMatchClass = MipsJumpTargetAsmOperand; + let PrintMethod = "printBranchOperand"; } def brtarget26 : Operand { @@ -46,6 +47,7 @@ def brtarget26 : Operand { let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTarget26"; let ParserMatchClass = MipsJumpTargetAsmOperand; + let PrintMethod = "printBranchOperand"; } def jmpoffset16 : Operand { diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 4bd8845e9cb9..9330a791a7cc 100644 --- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -522,27 +522,27 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // See if this is a generic print operand return AsmPrinter::PrintAsmOperand(MI, OpNum, ExtraCode, O); case 'X': // hex const int - if ((MO.getType()) != MachineOperand::MO_Immediate) + if (!MO.isImm()) return true; O << "0x" << Twine::utohexstr(MO.getImm()); return false; case 'x': // hex const int (low 16 bits) - if ((MO.getType()) != MachineOperand::MO_Immediate) + if (!MO.isImm()) return true; O << "0x" << Twine::utohexstr(MO.getImm() & 0xffff); return false; case 'd': // decimal const int - if ((MO.getType()) != MachineOperand::MO_Immediate) + if (!MO.isImm()) return true; O << MO.getImm(); return false; case 'm': // decimal const int minus 1 - if ((MO.getType()) != MachineOperand::MO_Immediate) + if (!MO.isImm()) return true; O << MO.getImm() - 1; return false; case 'y': // exact log2 - if ((MO.getType()) != MachineOperand::MO_Immediate) + if (!MO.isImm()) return true; if (!isPowerOf2_64(MO.getImm())) return true; @@ -550,7 +550,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return false; case 'z': // $0 if zero, regular printing otherwise - if (MO.getType() == MachineOperand::MO_Immediate && MO.getImm() == 0) { + if (MO.isImm() && MO.getImm() == 0) { O << "$0"; return false; } @@ -798,7 +798,7 @@ void MipsAsmPrinter::emitStartOfAsmFile(Module &M) { // Tell the assembler which ABI we are using std::string SectionName = std::string(".mdebug.") + getCurrentABIString(); - OutStreamer->SwitchSection( + OutStreamer->switchSection( OutContext.getELFSection(SectionName, ELF::SHT_PROGBITS, 0)); // NaN: At the moment we only support: @@ -825,7 +825,7 @@ void MipsAsmPrinter::emitStartOfAsmFile(Module &M) { TS.emitDirectiveModuleOddSPReg(); // Switch to the .text section. - OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); + OutStreamer->switchSection(getObjFileLowering().getTextSection()); } void MipsAsmPrinter::emitInlineAsmStart() const { @@ -841,12 +841,12 @@ void MipsAsmPrinter::emitInlineAsmStart() const { TS.emitDirectiveSetAt(); TS.emitDirectiveSetMacro(); TS.emitDirectiveSetReorder(); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } void MipsAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, const MCSubtargetInfo *EndInfo) const { - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); getTargetStreamer().emitDirectiveSetPop(); } @@ -1038,14 +1038,14 @@ void MipsAsmPrinter::EmitFPCallStub( // // probably not necessary but we save and restore the current section state // - OutStreamer->PushSection(); + OutStreamer->pushSection(); // // .section mips16.call.fpxxxx,"ax",@progbits // MCSectionELF *M = OutContext.getELFSection( ".mips16.call.fp." + std::string(Symbol), ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_EXECINSTR); - OutStreamer->SwitchSection(M, nullptr); + OutStreamer->switchSection(M, nullptr); // // .align 2 // @@ -1114,7 +1114,7 @@ void MipsAsmPrinter::EmitFPCallStub( const MCExpr *T_min_E = MCBinaryExpr::createSub(T, E, OutContext); OutStreamer->emitELFSize(Stub, T_min_E); TS.emitDirectiveEnd(x); - OutStreamer->PopSection(); + OutStreamer->popSection(); } void MipsAsmPrinter::emitEndOfAsmFile(Module &M) { @@ -1130,7 +1130,7 @@ void MipsAsmPrinter::emitEndOfAsmFile(Module &M) { EmitFPCallStub(Symbol, Signature); } // return to the text section - OutStreamer->SwitchSection(OutContext.getObjectFileInfo()->getTextSection()); + OutStreamer->switchSection(OutContext.getObjectFileInfo()->getTextSection()); } void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) { diff --git a/llvm/lib/Target/Mips/MipsBranchExpansion.cpp b/llvm/lib/Target/Mips/MipsBranchExpansion.cpp index 4e9a23d077da..a4fa0792a998 100644 --- a/llvm/lib/Target/Mips/MipsBranchExpansion.cpp +++ b/llvm/lib/Target/Mips/MipsBranchExpansion.cpp @@ -36,7 +36,8 @@ /// /// Regarding compact branch hazard prevention: /// -/// Hazards handled: forbidden slots for MIPSR6, FPU slots for MIPS3 and below. +/// Hazards handled: forbidden slots for MIPSR6, FPU slots for MIPS3 and below, +/// load delay slots for MIPS1. /// /// A forbidden slot hazard occurs when a compact branch instruction is executed /// and the adjacent instruction in memory is a control transfer instruction @@ -164,6 +165,7 @@ private: bool handleSlot(Pred Predicate, Safe SafeInSlot); bool handleForbiddenSlot(); bool handleFPUDelaySlot(); + bool handleLoadDelaySlot(); bool handlePossibleLongBranch(); const MipsSubtarget *STI; @@ -532,7 +534,7 @@ void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) { } if (hasDelaySlot) { if (STI->isTargetNaCl()) { - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::NOP)); + TII->insertNop(*BalTgtMBB, Pos, DL); } else { BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) .addReg(Mips::SP) @@ -675,9 +677,8 @@ void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) { // nop // $fallthrough: // - MIBundleBuilder(*LongBrMBB, Pos) - .append(BuildMI(*MFp, DL, TII->get(Mips::J)).addMBB(TgtMBB)) - .append(BuildMI(*MFp, DL, TII->get(Mips::NOP))); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::J)).addMBB(TgtMBB); + TII->insertNop(*LongBrMBB, Pos, DL)->bundleWithPred(); } else { // At this point, offset where we need to branch does not fit into // immediate field of the branch instruction and is not in the same @@ -722,7 +723,7 @@ void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) { if (I.Br->isUnconditionalBranch()) { // Change branch destination. assert(I.Br->getDesc().getNumOperands() == 1); - I.Br->RemoveOperand(0); + I.Br->removeOperand(0); I.Br->addOperand(MachineOperand::CreateMBB(LongBrMBB)); } else // Change branch destination and reverse condition. @@ -762,13 +763,12 @@ bool MipsBranchExpansion::handleSlot(Pred Predicate, Safe SafeInSlot) { } if (LastInstInFunction || !SafeInSlot(*IInSlot, *I)) { - MachineBasicBlock::instr_iterator Iit = I->getIterator(); if (std::next(Iit) == FI->end() || std::next(Iit)->getOpcode() != Mips::NOP) { Changed = true; - MIBundleBuilder(&*I).append( - BuildMI(*MFp, I->getDebugLoc(), TII->get(Mips::NOP))); + TII->insertNop(*(I->getParent()), std::next(I), I->getDebugLoc()) + ->bundleWithPred(); NumInsertedNops++; } } @@ -801,6 +801,18 @@ bool MipsBranchExpansion::handleFPUDelaySlot() { }); } +bool MipsBranchExpansion::handleLoadDelaySlot() { + // Load delay slot hazards are only for MIPS1. + if (STI->hasMips2()) + return false; + + return handleSlot( + [this](auto &I) -> bool { return TII->HasLoadDelaySlot(I); }, + [this](auto &IInSlot, auto &I) -> bool { + return TII->SafeInLoadDelaySlot(IInSlot, I); + }); +} + bool MipsBranchExpansion::handlePossibleLongBranch() { if (STI->inMips16Mode() || !STI->enableLongBranchPass()) return false; @@ -867,7 +879,7 @@ bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); IsPIC = TM.isPositionIndependent(); ABI = static_cast(TM).getABI(); - STI = &static_cast(MF.getSubtarget()); + STI = &MF.getSubtarget(); TII = static_cast(STI->getInstrInfo()); if (IsPIC && ABI.IsO32() && @@ -877,19 +889,21 @@ bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) { MFp = &MF; ForceLongBranchFirstPass = ForceLongBranch; - // Run these two at least once + // Run these at least once. bool longBranchChanged = handlePossibleLongBranch(); bool forbiddenSlotChanged = handleForbiddenSlot(); bool fpuDelaySlotChanged = handleFPUDelaySlot(); + bool loadDelaySlotChanged = handleLoadDelaySlot(); - bool Changed = - longBranchChanged || forbiddenSlotChanged || fpuDelaySlotChanged; + bool Changed = longBranchChanged || forbiddenSlotChanged || + fpuDelaySlotChanged || loadDelaySlotChanged; - // Then run them alternatively while there are changes + // Then run them alternatively while there are changes. while (forbiddenSlotChanged) { longBranchChanged = handlePossibleLongBranch(); fpuDelaySlotChanged = handleFPUDelaySlot(); - if (!longBranchChanged && !fpuDelaySlotChanged) + loadDelaySlotChanged = handleLoadDelaySlot(); + if (!longBranchChanged && !fpuDelaySlotChanged && !loadDelaySlotChanged) break; forbiddenSlotChanged = handleForbiddenSlot(); } diff --git a/llvm/lib/Target/Mips/MipsCallLowering.cpp b/llvm/lib/Target/Mips/MipsCallLowering.cpp index f6ec34c7f403..3c1c2bcd7a1b 100644 --- a/llvm/lib/Target/Mips/MipsCallLowering.cpp +++ b/llvm/lib/Target/Mips/MipsCallLowering.cpp @@ -18,6 +18,7 @@ #include "MipsTargetMachine.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" using namespace llvm; @@ -540,8 +541,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, } MIRBuilder.insertInstr(MIB); if (MIB->getOpcode() == Mips::JALRPseudo) { - const MipsSubtarget &STI = - static_cast(MIRBuilder.getMF().getSubtarget()); + const MipsSubtarget &STI = MIRBuilder.getMF().getSubtarget(); MIB.constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), *STI.getRegBankInfo()); } diff --git a/llvm/lib/Target/Mips/MipsCombine.td b/llvm/lib/Target/Mips/MipsCombine.td new file mode 100644 index 000000000000..29550a15d38d --- /dev/null +++ b/llvm/lib/Target/Mips/MipsCombine.td @@ -0,0 +1,15 @@ +//=- MipsCombine.td - Define Mips Combine Rules --------------*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +include "llvm/Target/GlobalISel/Combine.td" + +def MipsPostLegalizerCombinerHelper: GICombinerHelper< + "MipsGenPostLegalizerCombinerHelper", []> { + let DisableRuleOption = "mipspostlegalizercombiner-disable-rule"; +} + diff --git a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp index 1efbf5570287..0341af0caac4 100644 --- a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -436,7 +436,7 @@ bool MipsConstantIslands::runOnMachineFunction(MachineFunction &mf) { // FIXME: MF = &mf; MCP = mf.getConstantPool(); - STI = &static_cast(mf.getSubtarget()); + STI = &mf.getSubtarget(); LLVM_DEBUG(dbgs() << "constant island machine function " << "\n"); if (!STI->inMips16Mode() || !MipsSubtarget::useConstantIslands()) { @@ -1653,8 +1653,8 @@ void MipsConstantIslands::prescanForConstants() { I->getOperand(2).ChangeToImmediate(index); LLVM_DEBUG(dbgs() << "constant island constant " << *I << "\n"); I->setDesc(TII->get(Mips::LwRxPcTcp16)); - I->RemoveOperand(1); - I->RemoveOperand(1); + I->removeOperand(1); + I->removeOperand(1); I->addOperand(MachineOperand::CreateCPI(index, 0)); I->addOperand(MachineOperand::CreateImm(4)); } diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp index cf6cec22308c..94053fa2eb7a 100644 --- a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -677,7 +677,7 @@ bool MipsDelaySlotFiller::runOnMachineBasicBlock(MachineBasicBlock &MBB) { // Bundle the NOP to the instruction with the delay slot. LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": could not fill delay slot for "; I->dump()); - BuildMI(MBB, std::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); + TII->insertNop(MBB, std::next(I), I->getDebugLoc()); MIBundleBuilder(MBB, I, std::next(I, 2)); ++FilledSlots; Changed = true; diff --git a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp index 31180d5a23ef..d242083f958b 100644 --- a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp +++ b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp @@ -892,7 +892,7 @@ bool MipsExpandPseudo::expandMBB(MachineBasicBlock &MBB) { } bool MipsExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - STI = &static_cast(MF.getSubtarget()); + STI = &MF.getSubtarget(); TII = STI->getInstrInfo(); bool Modified = false; diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp index 6ddfec5d0f79..c1b8af70d8b0 100644 --- a/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -178,12 +178,8 @@ private: // Emit helper routines. bool emitCmp(unsigned DestReg, const CmpInst *CI); - bool emitLoad(MVT VT, unsigned &ResultReg, Address &Addr, - unsigned Alignment = 0); - bool emitStore(MVT VT, unsigned SrcReg, Address Addr, - MachineMemOperand *MMO = nullptr); - bool emitStore(MVT VT, unsigned SrcReg, Address &Addr, - unsigned Alignment = 0); + bool emitLoad(MVT VT, unsigned &ResultReg, Address &Addr); + bool emitStore(MVT VT, unsigned SrcReg, Address &Addr); unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); bool emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg, @@ -753,8 +749,7 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) { return true; } -bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr, - unsigned Alignment) { +bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr) { // // more cases will be handled here in following patches. // @@ -808,8 +803,7 @@ bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr, return false; } -bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr, - unsigned Alignment) { +bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr) { // // more cases will be handled here in following patches. // @@ -902,7 +896,7 @@ bool MipsFastISel::selectLoad(const Instruction *I) { return false; unsigned ResultReg; - if (!emitLoad(VT, ResultReg, Addr, cast(I)->getAlignment())) + if (!emitLoad(VT, ResultReg, Addr)) return false; updateValueMap(I, ResultReg); return true; @@ -931,7 +925,7 @@ bool MipsFastISel::selectStore(const Instruction *I) { if (!computeAddress(I->getOperand(1), Addr)) return false; - if (!emitStore(VT, SrcReg, Addr, cast(I)->getAlignment())) + if (!emitStore(VT, SrcReg, Addr)) return false; return true; } diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp index d88696525e9e..c4bb3d90b4d5 100644 --- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -54,7 +54,7 @@ void MipsDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { } bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &static_cast(MF.getSubtarget()); + Subtarget = &MF.getSubtarget(); bool Ret = SelectionDAGISel::runOnMachineFunction(MF); processFunctionAfterISel(MF); diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 0c2e129b8f1f..b98be4ae4b75 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -94,18 +94,6 @@ static const MCPhysReg Mips64DPRegs[8] = { Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64 }; -// If I is a shifted mask, set the size (Size) and the first bit of the -// mask (Pos), and return true. -// For example, if I is 0x003ff800, (Pos, Size) = (11, 11). -static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { - if (!isShiftedMask_64(I)) - return false; - - Size = countPopulation(I); - Pos = countTrailingZeros(I); - return true; -} - // The MIPS MSA ABI passes vector arguments in the integer register set. // The number of integer registers used is dependant on the ABI used. MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, @@ -192,6 +180,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::Ret: return "MipsISD::Ret"; case MipsISD::ERet: return "MipsISD::ERet"; case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN"; + case MipsISD::FAbs: return "MipsISD::FAbs"; case MipsISD::FMS: return "MipsISD::FMS"; case MipsISD::FPBrcond: return "MipsISD::FPBrcond"; case MipsISD::FPCmp: return "MipsISD::FPCmp"; @@ -353,15 +342,12 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::SETCC, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::f64, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); + setOperationAction(ISD::FABS, MVT::f32, Custom); + setOperationAction(ISD::FABS, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - if (!(TM.Options.NoNaNsFPMath || Subtarget.inAbs2008Mode())) { - setOperationAction(ISD::FABS, MVT::f32, Custom); - setOperationAction(ISD::FABS, MVT::f64, Custom); - } - if (Subtarget.isGP64bit()) { setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); @@ -494,15 +480,8 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::TRAP, MVT::Other, Legal); - setTargetDAGCombine(ISD::SDIVREM); - setTargetDAGCombine(ISD::UDIVREM); - setTargetDAGCombine(ISD::SELECT); - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::SUB); - setTargetDAGCombine(ISD::AssertZext); - setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine({ISD::SDIVREM, ISD::UDIVREM, ISD::SELECT, ISD::AND, + ISD::OR, ISD::ADD, ISD::SUB, ISD::AssertZext, ISD::SHL}); if (ABI.IsO32()) { // These libcalls are not available in 32-bit. @@ -794,14 +773,15 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, EVT ValTy = N->getValueType(0); SDLoc DL(N); - uint64_t Pos = 0, SMPos, SMSize; + uint64_t Pos = 0; + unsigned SMPos, SMSize; ConstantSDNode *CN; SDValue NewOperand; unsigned Opc; // Op's second operand must be a shifted mask. if (!(CN = dyn_cast(Mask)) || - !isShiftedMask(CN->getZExtValue(), SMPos, SMSize)) + !isShiftedMask_64(CN->getZExtValue(), SMPos, SMSize)) return SDValue(); if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { @@ -875,7 +855,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue And0 = N->getOperand(0), And1 = N->getOperand(1); - uint64_t SMPos0, SMSize0, SMPos1, SMSize1; + unsigned SMPos0, SMSize0, SMPos1, SMSize1; ConstantSDNode *CN, *CN1; // See if Op's first operand matches (and $src1 , mask0). @@ -883,7 +863,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); if (!(CN = dyn_cast(And0.getOperand(1))) || - !isShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0)) + !isShiftedMask_64(~CN->getSExtValue(), SMPos0, SMSize0)) return SDValue(); // See if Op's second operand matches (and (shl $src, pos), mask1). @@ -891,7 +871,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, And1.getOperand(0).getOpcode() == ISD::SHL) { if (!(CN = dyn_cast(And1.getOperand(1))) || - !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1)) + !isShiftedMask_64(CN->getZExtValue(), SMPos1, SMSize1)) return SDValue(); // The shift masks must have the same position and size. @@ -970,6 +950,14 @@ static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG, ROOTNode->getOperand(1).getOpcode() != ISD::MUL) return SDValue(); + // In the case where we have a multiplication as the left operand of + // of a subtraction, we can't combine into a MipsISD::MSub node as the + // the instruction definition of msub(u) places the multiplication on + // on the right. + if (ROOTNode->getOpcode() == ISD::SUB && + ROOTNode->getOperand(0).getOpcode() == ISD::MUL) + return SDValue(); + // We don't handle vector types here. if (ROOTNode->getValueType(0).isVector()) return SDValue(); @@ -1118,7 +1106,8 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, EVT ValTy = N->getValueType(0); SDLoc DL(N); - uint64_t Pos = 0, SMPos, SMSize; + uint64_t Pos = 0; + unsigned SMPos, SMSize; ConstantSDNode *CN; SDValue NewOperand; @@ -1136,7 +1125,7 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, // AND's second operand must be a shifted mask. if (!(CN = dyn_cast(FirstOperand.getOperand(1))) || - !isShiftedMask(CN->getZExtValue(), SMPos, SMSize)) + !isShiftedMask_64(CN->getZExtValue(), SMPos, SMSize)) return SDValue(); // Return if the shifted mask does not start at bit 0 or the sum of its size @@ -1191,6 +1180,16 @@ bool MipsTargetLowering::isCheapToSpeculateCtlz() const { return Subtarget.hasMips32(); } +bool MipsTargetLowering::hasBitTest(SDValue X, SDValue Y) const { + // We can use ANDI+SLTIU as a bit test. Y contains the bit position. + // For MIPSR2 or later, we may be able to use the `ext` instruction or its' + // double-word variants. + if (auto *C = dyn_cast(Y)) + return C->getAPIntValue().ule(15); + + return false; +} + bool MipsTargetLowering::shouldFoldConstantShiftPairToMask( const SDNode *N, CombineLevel Level) const { if (N->getOperand(0).getValueType().isVector()) @@ -2421,11 +2420,14 @@ MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { return lowerFCOPYSIGN32(Op, DAG, Subtarget.hasExtractInsert()); } -static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, - bool HasExtractInsert) { +SDValue MipsTargetLowering::lowerFABS32(SDValue Op, SelectionDAG &DAG, + bool HasExtractInsert) const { SDLoc DL(Op); SDValue Res, Const1 = DAG.getConstant(1, DL, MVT::i32); + if (DAG.getTarget().Options.NoNaNsFPMath || Subtarget.inAbs2008Mode()) + return DAG.getNode(MipsISD::FAbs, DL, Op.getValueType(), Op.getOperand(0)); + // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it // to i32. SDValue X = (Op.getValueType() == MVT::f32) @@ -2458,11 +2460,14 @@ static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res); } -static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, - bool HasExtractInsert) { +SDValue MipsTargetLowering::lowerFABS64(SDValue Op, SelectionDAG &DAG, + bool HasExtractInsert) const { SDLoc DL(Op); SDValue Res, Const1 = DAG.getConstant(1, DL, MVT::i32); + if (DAG.getTarget().Options.NoNaNsFPMath || Subtarget.inAbs2008Mode()) + return DAG.getNode(MipsISD::FAbs, DL, Op.getValueType(), Op.getOperand(0)); + // Bitcast to integer node. SDValue X = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(0)); @@ -2673,7 +2678,7 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { return Op; // Return if load is aligned or if MemVT is neither i32 nor i64. - if ((LD->getAlignment() >= MemVT.getSizeInBits() / 8) || + if ((LD->getAlign().value() >= (MemVT.getSizeInBits() / 8)) || ((MemVT != MVT::i32) && (MemVT != MVT::i64))) return SDValue(); @@ -2787,7 +2792,7 @@ static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG, SDValue Tr = DAG.getNode(MipsISD::TruncIntFP, SDLoc(Val), FPTy, Val.getOperand(0)); return DAG.getStore(SD->getChain(), SDLoc(SD), Tr, SD->getBasePtr(), - SD->getPointerInfo(), SD->getAlignment(), + SD->getPointerInfo(), SD->getAlign(), SD->getMemOperand()->getFlags()); } @@ -2797,7 +2802,7 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { // Lower unaligned integer stores. if (!Subtarget.systemSupportsUnalignedAccess() && - (SD->getAlignment() < MemVT.getSizeInBits() / 8) && + (SD->getAlign().value() < (MemVT.getSizeInBits() / 8)) && ((MemVT == MVT::i32) || (MemVT == MVT::i64))) return lowerUnalignedIntStore(SD, DAG, Subtarget.isLittle()); @@ -4732,18 +4737,19 @@ MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI, Register MipsTargetLowering::getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const { - // Named registers is expected to be fairly rare. For now, just support $28 - // since the linux kernel uses it. + // The Linux kernel uses $28 and sp. if (Subtarget.isGP64bit()) { Register Reg = StringSwitch(RegName) - .Case("$28", Mips::GP_64) - .Default(Register()); + .Case("$28", Mips::GP_64) + .Case("sp", Mips::SP_64) + .Default(Register()); if (Reg) return Reg; } else { Register Reg = StringSwitch(RegName) - .Case("$28", Mips::GP) - .Default(Register()); + .Case("$28", Mips::GP) + .Case("sp", Mips::SP) + .Default(Register()); if (Reg) return Reg; } diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 3905a18895de..1f921fbe9491 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -99,6 +99,9 @@ class TargetRegisterClass; // Floating Point Compare FPCmp, + // Floating point Abs + FAbs, + // Floating point select FSELECT, @@ -157,7 +160,7 @@ class TargetRegisterClass; Ins, CIns, - // EXTR.W instrinsic nodes. + // EXTR.W intrinsic nodes. EXTP, EXTPDP, EXTR_S_H, @@ -282,6 +285,7 @@ class TargetRegisterClass; bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + bool hasBitTest(SDValue X, SDValue Y) const override; bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; @@ -540,6 +544,10 @@ class TargetRegisterClass; SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, + bool HasExtractInsert) const; + SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, + bool HasExtractInsert) const; SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp index 2bf8562895d7..5cb7a0a1804d 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -54,7 +54,6 @@ bool MipsInstrInfo::isZeroImm(const MachineOperand &op) const { /// insertNoop - If data hazard condition is found insert the target nop /// instruction. -// FIXME: This appears to be dead code. void MipsInstrInfo:: insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { @@ -62,6 +61,19 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const BuildMI(MBB, MI, DL, get(Mips::NOP)); } +MachineInstrBuilder MipsInstrInfo::insertNop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const { + assert(!Subtarget.inMips16Mode() && + "insertNop does not support MIPS16e mode at this time"); + const unsigned MMOpc = + Subtarget.hasMips32r6() ? Mips::SLL_MMR6 : Mips::SLL_MM; + const unsigned Opc = Subtarget.inMicroMipsMode() ? MMOpc : Mips::SLL; + return BuildMI(MBB, MI, DL, get(Opc), Mips::ZERO) + .addReg(Mips::ZERO) + .addImm(0); +} + MachineMemOperand * MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI, MachineMemOperand::Flags Flags) const { @@ -598,6 +610,18 @@ bool MipsInstrInfo::SafeInFPUDelaySlot(const MachineInstr &MIInSlot, return true; } +/// Predicate for distinguishing instructions that are hazardous in a load delay +/// slot. Consider inline assembly as unsafe as well. +bool MipsInstrInfo::SafeInLoadDelaySlot(const MachineInstr &MIInSlot, + const MachineInstr &LoadMI) const { + if (MIInSlot.isInlineAsm()) + return false; + + return !llvm::any_of(LoadMI.defs(), [&](const MachineOperand &Op) { + return Op.isReg() && MIInSlot.readsRegister(Op.getReg()); + }); +} + /// Predicate for distingushing instructions that have forbidden slots. bool MipsInstrInfo::HasForbiddenSlot(const MachineInstr &MI) const { return (MI.getDesc().TSFlags & MipsII::HasForbiddenSlot) != 0; @@ -622,6 +646,22 @@ bool MipsInstrInfo::HasFPUDelaySlot(const MachineInstr &MI) const { } } +/// Predicate for distingushing instructions that have load delay slots. +bool MipsInstrInfo::HasLoadDelaySlot(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case Mips::LB: + case Mips::LBu: + case Mips::LH: + case Mips::LHu: + case Mips::LW: + case Mips::LWR: + case Mips::LWL: + return true; + default: + return false; + } +} + /// Return the number of bytes of code the specified instruction may be. unsigned MipsInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { switch (MI.getOpcode()) { @@ -695,7 +735,7 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, NewOpc == Mips::JIALC64) { if (NewOpc == Mips::JIALC || NewOpc == Mips::JIALC64) - MIB->RemoveOperand(0); + MIB->removeOperand(0); for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { MIB.add(I->getOperand(J)); diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.h b/llvm/lib/Target/Mips/MipsInstrInfo.h index 46c1b73d512f..8b98ad3dceea 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.h +++ b/llvm/lib/Target/Mips/MipsInstrInfo.h @@ -96,16 +96,29 @@ public: bool SafeInFPUDelaySlot(const MachineInstr &MIInSlot, const MachineInstr &FPUMI) const; + /// Predicate to determine if an instruction can go in a load delay slot. + bool SafeInLoadDelaySlot(const MachineInstr &MIInSlot, + const MachineInstr &LoadMI) const; + /// Predicate to determine if an instruction has a forbidden slot. bool HasForbiddenSlot(const MachineInstr &MI) const; /// Predicate to determine if an instruction has an FPU delay slot. bool HasFPUDelaySlot(const MachineInstr &MI) const; + /// Predicate to determine if an instruction has a load delay slot. + bool HasLoadDelaySlot(const MachineInstr &MI) const; + /// Insert nop instruction when hazard condition is found void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + /// Insert an ISA appropriate `nop`. + // FIXME: Add support for MIPS16e. + MachineInstrBuilder insertNop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const; + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td index 089fed9ec0bf..973f40a21dee 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -833,22 +833,26 @@ def MipsJumpTargetAsmOperand : AsmOperandClass { def jmptarget : Operand { let EncoderMethod = "getJumpTargetOpValue"; let ParserMatchClass = MipsJumpTargetAsmOperand; + let PrintMethod = "printJumpOperand"; } def brtarget : Operand { let EncoderMethod = "getBranchTargetOpValue"; let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTarget"; let ParserMatchClass = MipsJumpTargetAsmOperand; + let PrintMethod = "printBranchOperand"; } def brtarget1SImm16 : Operand { let EncoderMethod = "getBranchTargetOpValue1SImm16"; let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTarget1SImm16"; let ParserMatchClass = MipsJumpTargetAsmOperand; + let PrintMethod = "printBranchOperand"; } def calltarget : Operand { let EncoderMethod = "getJumpTargetOpValue"; let ParserMatchClass = MipsJumpTargetAsmOperand; + let PrintMethod = "printJumpOperand"; } def imm64: Operand; diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp index 588b7e85c94c..35b0fe218d8f 100644 --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -13,6 +13,7 @@ #include "MipsLegalizerInfo.h" #include "MipsTargetMachine.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/IR/IntrinsicsMips.h" using namespace llvm; @@ -502,8 +503,7 @@ static bool MSA2OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode, bool MipsLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; - const MipsSubtarget &ST = - static_cast(MI.getMF()->getSubtarget()); + const MipsSubtarget &ST = MI.getMF()->getSubtarget(); const MipsInstrInfo &TII = *ST.getInstrInfo(); const MipsRegisterInfo &TRI = *ST.getRegisterInfo(); const RegisterBankInfo &RBI = *ST.getRegBankInfo(); diff --git a/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/llvm/lib/Target/Mips/MipsMachineFunction.cpp index 411a26e42713..7d9824aaf8ec 100644 --- a/llvm/lib/Target/Mips/MipsMachineFunction.cpp +++ b/llvm/lib/Target/Mips/MipsMachineFunction.cpp @@ -22,6 +22,13 @@ static cl::opt FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true), cl::desc("Always use $gp as the global base register.")); +MachineFunctionInfo * +MipsFunctionInfo::clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap + &Src2DstMBB) const { + return DestMF.cloneInfo(*this); +} + MipsFunctionInfo::~MipsFunctionInfo() = default; bool MipsFunctionInfo::globalBaseRegSet() const { @@ -29,7 +36,7 @@ bool MipsFunctionInfo::globalBaseRegSet() const { } static const TargetRegisterClass &getGlobalBaseRegClass(MachineFunction &MF) { - auto &STI = static_cast(MF.getSubtarget()); + auto &STI = MF.getSubtarget(); auto &TM = static_cast(MF.getTarget()); if (STI.inMips16Mode()) diff --git a/llvm/lib/Target/Mips/MipsMachineFunction.h b/llvm/lib/Target/Mips/MipsMachineFunction.h index 786d210e2aaa..7b17fd3ed0cd 100644 --- a/llvm/lib/Target/Mips/MipsMachineFunction.h +++ b/llvm/lib/Target/Mips/MipsMachineFunction.h @@ -26,6 +26,11 @@ class MipsFunctionInfo : public MachineFunctionInfo { public: MipsFunctionInfo(MachineFunction &MF) {} + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; + ~MipsFunctionInfo() override; unsigned getSRetReturnReg() const { return SRetReturnReg; } diff --git a/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp index a2b55e8bddcd..2c23d3b72dc6 100644 --- a/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp @@ -10,8 +10,9 @@ #include "Mips.h" #include "MipsTargetMachine.h" -#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp b/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp index 2823d300dc6e..204c42ae5e5f 100644 --- a/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp +++ b/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp @@ -170,7 +170,7 @@ static void eraseGPOpnd(MachineInstr &MI) { for (unsigned I = 0; I < MI.getNumOperands(); ++I) { MachineOperand &MO = MI.getOperand(I); if (MO.isReg() && MO.getReg() == Reg) { - MI.RemoveOperand(I); + MI.removeOperand(I); return; } } @@ -194,7 +194,7 @@ void MBBInfo::postVisit() { // OptimizePICCall methods. bool OptimizePICCall::runOnMachineFunction(MachineFunction &F) { - if (static_cast(F.getSubtarget()).inMips16Mode()) + if (F.getSubtarget().inMips16Mode()) return false; // Do a pre-order traversal of the dominator tree. diff --git a/llvm/lib/Target/Mips/MipsOs16.cpp b/llvm/lib/Target/Mips/MipsOs16.cpp index ac4e55f8a1f5..f6346a8bbc8b 100644 --- a/llvm/lib/Target/Mips/MipsOs16.cpp +++ b/llvm/lib/Target/Mips/MipsOs16.cpp @@ -13,6 +13,7 @@ #include "Mips.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp b/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp new file mode 100644 index 000000000000..7723a10af2d7 --- /dev/null +++ b/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp @@ -0,0 +1,148 @@ +//=== lib/CodeGen/GlobalISel/MipsPostLegalizerCombiner.cpp ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass does combining of machine instructions at the generic MI level, +// after the legalizer. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/MipsMCTargetDesc.h" +#include "Mips.h" +#include "MipsLegalizerInfo.h" +#include "MipsSubtarget.h" +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "mips-postlegalizer-combiner" + +using namespace llvm; +using namespace MIPatternMatch; + +#define MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS +#include "MipsGenPostLegalizeGICombiner.inc" +#undef MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS + +namespace { +#define MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H +#include "MipsGenPostLegalizeGICombiner.inc" +#undef MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H + +class MipsPostLegalizerCombinerInfo final : public CombinerInfo { + GISelKnownBits *KB; + +public: + MipsGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; + + MipsPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, + GISelKnownBits *KB, const MipsLegalizerInfo *LI) + : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, + /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), + KB(KB) { + if (!GeneratedRuleCfg.parseCommandLineOption()) + report_fatal_error("Invalid rule identifier"); + } + + bool combine(GISelChangeObserver &Observer, MachineInstr &MI, + MachineIRBuilder &B) const override; +}; + +bool MipsPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, + MachineInstr &MI, + MachineIRBuilder &B) const { + + CombinerHelper Helper(Observer, B, KB, + /*DominatorTree*/ nullptr, LInfo); + MipsGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper); + return Generated.tryCombineAll(Observer, MI, B, Helper); +} + +#define MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP +#include "MipsGenPostLegalizeGICombiner.inc" +#undef MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP + +// Pass boilerplate +// ================ + +class MipsPostLegalizerCombiner : public MachineFunctionPass { +public: + static char ID; + + MipsPostLegalizerCombiner(bool IsOptNone = false); + + StringRef getPassName() const override { + return "MipsPostLegalizerCombiner"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + bool IsOptNone; +}; +} // end anonymous namespace + +void MipsPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesCFG(); + getSelectionDAGFallbackAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + if (!IsOptNone) { + AU.addRequired(); + AU.addPreserved(); + } + MachineFunctionPass::getAnalysisUsage(AU); +} + +MipsPostLegalizerCombiner::MipsPostLegalizerCombiner(bool IsOptNone) + : MachineFunctionPass(ID), IsOptNone(IsOptNone) { + initializeMipsPostLegalizerCombinerPass(*PassRegistry::getPassRegistry()); +} + +bool MipsPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + auto *TPC = &getAnalysis(); + const Function &F = MF.getFunction(); + bool EnableOpt = + MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); + + const MipsSubtarget &ST = MF.getSubtarget(); + const MipsLegalizerInfo *LI = + static_cast(ST.getLegalizerInfo()); + + GISelKnownBits *KB = &getAnalysis().get(MF); + MipsPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), + F.hasMinSize(), KB, LI); + Combiner C(PCInfo, TPC); + return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); +} + +char MipsPostLegalizerCombiner::ID = 0; +INITIALIZE_PASS_BEGIN(MipsPostLegalizerCombiner, DEBUG_TYPE, + "Combine Mips machine instrs after legalization", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) +INITIALIZE_PASS_END(MipsPostLegalizerCombiner, DEBUG_TYPE, + "Combine Mips machine instrs after legalization", false, + false) + +namespace llvm { +FunctionPass *createMipsPostLegalizeCombiner(bool IsOptNone) { + return new MipsPostLegalizerCombiner(IsOptNone); +} +} // end namespace llvm diff --git a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp index 2ad9ffe4eb77..cb6d53ec0a12 100644 --- a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/InitializePasses.h" @@ -50,8 +51,7 @@ bool MipsPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, // Don't attempt to combine non power of 2 loads or unaligned loads when // subtarget doesn't support them. auto MMO = *MI.memoperands_begin(); - const MipsSubtarget &STI = - static_cast(MI.getMF()->getSubtarget()); + const MipsSubtarget &STI = MI.getMF()->getSubtarget(); if (!isPowerOf2_64(MMO->getSize())) return false; bool isUnaligned = MMO->getAlign() < MMO->getSize(); diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp index 04b69c66bc0d..2544d9d9b76d 100644 --- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -73,8 +73,7 @@ RegisterBankInfo::ValueMapping ValueMappings[] = { using namespace llvm; -MipsRegisterBankInfo::MipsRegisterBankInfo(const TargetRegisterInfo &TRI) - : MipsGenRegisterBankInfo() {} +MipsRegisterBankInfo::MipsRegisterBankInfo(const TargetRegisterInfo &TRI) {} const RegisterBank & MipsRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, @@ -154,8 +153,7 @@ static bool isGprbTwoInstrUnalignedLoadOrStore(const MachineInstr *MI) { if (MI->getOpcode() == TargetOpcode::G_LOAD || MI->getOpcode() == TargetOpcode::G_STORE) { auto MMO = *MI->memoperands_begin(); - const MipsSubtarget &STI = - static_cast(MI->getMF()->getSubtarget()); + const MipsSubtarget &STI = MI->getMF()->getSubtarget(); if (MMO->getSize() == 4 && (!STI.systemSupportsUnalignedAccess() && MMO->getAlign() < MMO->getSize())) return true; @@ -399,7 +397,7 @@ void MipsRegisterBankInfo::TypeInfoForMF::cleanupIfNewFunction( static const MipsRegisterBankInfo::ValueMapping * getMSAMapping(const MachineFunction &MF) { - assert(static_cast(MF.getSubtarget()).hasMSA() && + assert(MF.getSubtarget().hasMSA() && "MSA mapping not available on target without MSA."); return &Mips::ValueMappings[Mips::MSAIdx]; } diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.h b/llvm/lib/Target/Mips/MipsRegisterBankInfo.h index df51606e1e8a..9eca4fdab3d6 100644 --- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.h +++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.h @@ -13,7 +13,7 @@ #ifndef LLVM_LIB_TARGET_MIPS_MIPSREGISTERBANKINFO_H #define LLVM_LIB_TARGET_MIPS_MIPSREGISTERBANKINFO_H -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #define GET_REGBANK_DECLARATIONS #include "MipsGenRegisterBank.inc" diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp index 7ee2ddf3605f..7729d9cf92da 100644 --- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -97,7 +97,7 @@ private: ExpandPseudo::ExpandPseudo(MachineFunction &MF_) : MF(MF_), MRI(MF.getRegInfo()), - Subtarget(static_cast(MF.getSubtarget())), + Subtarget(MF.getSubtarget()), TII(*static_cast(Subtarget.getInstrInfo())), RegInfo(*Subtarget.getRegisterInfo()) {} diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 03a545605fe1..1124111c1a6e 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -38,7 +38,7 @@ using namespace llvm; #define DEBUG_TYPE "mips-isel" bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &static_cast(MF.getSubtarget()); + Subtarget = &MF.getSubtarget(); if (Subtarget->inMips16Mode()) return false; return MipsDAGToDAGISel::runOnMachineFunction(MF); @@ -282,7 +282,7 @@ bool MipsSEDAGToDAGISel::selectAddrFrameIndexOffset( SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits, unsigned ShiftAmount = 0) const { if (CurDAG->isBaseWithConstantOffset(Addr)) { - ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); + auto *CN = cast(Addr.getOperand(1)); if (isIntN(OffsetBits + ShiftAmount, CN->getSExtValue())) { EVT ValTy = Addr.getValueType(); @@ -956,6 +956,38 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { break; } + case MipsISD::FAbs: { + MVT ResTy = Node->getSimpleValueType(0); + assert((ResTy == MVT::f64 || ResTy == MVT::f32) && + "Unsupported float type!"); + unsigned Opc = 0; + if (ResTy == MVT::f64) + Opc = (Subtarget->isFP64bit() ? Mips::FABS_D64 : Mips::FABS_D32); + else + Opc = Mips::FABS_S; + + if (Subtarget->inMicroMipsMode()) { + switch (Opc) { + case Mips::FABS_D64: + Opc = Mips::FABS_D64_MM; + break; + case Mips::FABS_D32: + Opc = Mips::FABS_D32_MM; + break; + case Mips::FABS_S: + Opc = Mips::FABS_S_MM; + break; + default: + llvm_unreachable("Unknown opcode for MIPS floating point abs!"); + } + } + + ReplaceNode(Node, + CurDAG->getMachineNode(Opc, DL, ResTy, Node->getOperand(0))); + + return true; + } + // Manually match MipsISD::Ins nodes to get the correct instruction. It has // to be done in this fashion so that we respect the differences between // dins and dinsm, as the difference is that the size operand has the range diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index 346ebe9664fc..f8bde3816fde 100644 --- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -99,11 +99,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::BITCAST, VecTy, Legal); } - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::SETCC); - setTargetDAGCombine(ISD::VSELECT); + setTargetDAGCombine( + {ISD::SHL, ISD::SRA, ISD::SRL, ISD::SETCC, ISD::VSELECT}); if (Subtarget.hasMips32r2()) { setOperationAction(ISD::ADDC, MVT::i32, Legal); @@ -161,11 +158,7 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::VSELECT); - setTargetDAGCombine(ISD::XOR); + setTargetDAGCombine({ISD::AND, ISD::OR, ISD::SRA, ISD::VSELECT, ISD::XOR}); } if (!Subtarget.useSoftFloat()) { @@ -1184,13 +1177,13 @@ SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { // i32 load from lower address. SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(), - Nd.getAlignment(), Nd.getMemOperand()->getFlags()); + Nd.getAlign(), Nd.getMemOperand()->getFlags()); // i32 load from higher address. Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); SDValue Hi = DAG.getLoad( MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(), - std::min(Nd.getAlignment(), 4U), Nd.getMemOperand()->getFlags()); + commonAlignment(Nd.getAlign(), 4), Nd.getMemOperand()->getFlags()); if (!Subtarget.isLittle()) std::swap(Lo, Hi); @@ -1219,14 +1212,13 @@ SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { std::swap(Lo, Hi); // i32 store to lower address. - Chain = - DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlignment(), - Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); + Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlign(), + Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); // i32 store to higher address. Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), - std::min(Nd.getAlignment(), 4U), + commonAlignment(Nd.getAlign(), 4), Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); } diff --git a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp index d6481793ef49..c86666cc40b6 100644 --- a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -38,7 +38,7 @@ using namespace llvm; #define DEBUG_TYPE "mips-reg-info" -MipsSERegisterInfo::MipsSERegisterInfo() {} +MipsSERegisterInfo::MipsSERegisterInfo() = default; bool MipsSERegisterInfo:: requiresRegisterScavenging(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/Mips/MipsScheduleGeneric.td b/llvm/lib/Target/Mips/MipsScheduleGeneric.td index f076f2f9cf10..931412cb261e 100644 --- a/llvm/lib/Target/Mips/MipsScheduleGeneric.td +++ b/llvm/lib/Target/Mips/MipsScheduleGeneric.td @@ -957,13 +957,13 @@ def : InstRW<[GenericWriteFPURcpS], (instrs RECIP_S_MM, RSQRT_S_MM)>; def : InstRW<[GenericWriteFPURcpD], (instrs RECIP_D32_MM, RECIP_D64_MM, RSQRT_D32_MM, RSQRT_D64_MM)>; -def : InstRW<[GenericWriteFPUStore], (instrs SDC1_MM, SWC1_MM, SUXC1_MM, - SWXC1_MM)>; +def : InstRW<[GenericWriteFPUStore], (instrs SDC1_MM_D32, SDC1_MM_D64, SWC1_MM, + SUXC1_MM, SWXC1_MM)>; def : InstRW<[GenericWriteFPUMoveGPRFPU], (instrs CFC1_MM, CTC1_MM)>; -def : InstRW<[GenericWriteFPULoad], (instrs LDC1_MM, LUXC1_MM, LWC1_MM, - LWXC1_MM)>; +def : InstRW<[GenericWriteFPULoad], (instrs LDC1_MM_D32, LDC1_MM_D64, LUXC1_MM, + LWC1_MM, LWXC1_MM)>; // microMIPS32r6 // ============= diff --git a/llvm/lib/Target/Mips/MipsSubtarget.cpp b/llvm/lib/Target/Mips/MipsSubtarget.cpp index c285385a19dd..10530cdafeed 100644 --- a/llvm/lib/Target/Mips/MipsSubtarget.cpp +++ b/llvm/lib/Target/Mips/MipsSubtarget.cpp @@ -64,6 +64,7 @@ bool MipsSubtarget::MSAWarningPrinted = false; bool MipsSubtarget::VirtWarningPrinted = false; bool MipsSubtarget::CRCWarningPrinted = false; bool MipsSubtarget::GINVWarningPrinted = false; +bool MipsSubtarget::MIPS1WarningPrinted = false; void MipsSubtarget::anchor() {} @@ -91,10 +92,14 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, if (MipsArchVersion == MipsDefault) MipsArchVersion = Mips32; - // Don't even attempt to generate code for MIPS-I and MIPS-V. They have not - // been tested and currently exist for the integrated assembler only. - if (MipsArchVersion == Mips1) - report_fatal_error("Code generation for MIPS-I is not implemented", false); + // MIPS-I has not been tested. + if (MipsArchVersion == Mips1 && !MIPS1WarningPrinted) { + errs() << "warning: MIPS-I support is experimental\n"; + MIPS1WarningPrinted = true; + } + + // Don't even attempt to generate code for MIPS-V. It has not + // been tested and currently exists for the integrated assembler only. if (MipsArchVersion == Mips5) report_fatal_error("Code generation for MIPS-V is not implemented", false); @@ -111,7 +116,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, if (isFP64bit() && !hasMips64() && hasMips32() && !hasMips32r2()) report_fatal_error( "FPU with 64-bit registers is not available on MIPS32 pre revision 2. " - "Use -mcpu=mips32r2 or greater."); + "Use -mcpu=mips32r2 or greater.", false); if (!isABI_O32() && !useOddSPReg()) report_fatal_error("-mattr=+nooddspreg requires the O32 ABI.", false); diff --git a/llvm/lib/Target/Mips/MipsSubtarget.h b/llvm/lib/Target/Mips/MipsSubtarget.h index 2b4c2b19a95d..ec8ca64c8ce8 100644 --- a/llvm/lib/Target/Mips/MipsSubtarget.h +++ b/llvm/lib/Target/Mips/MipsSubtarget.h @@ -17,12 +17,12 @@ #include "MipsFrameLowering.h" #include "MipsISelLowering.h" #include "MipsInstrInfo.h" -#include "llvm/CodeGen/SelectionDAGTargetInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/ErrorHandling.h" @@ -59,6 +59,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // Used to avoid printing ginv warnings multiple times. static bool GINVWarningPrinted; + // Used to avoid printing Mips1 warnings multiple times. + static bool MIPS1WarningPrinted; + // Used to avoid printing virt warnings multiple times. static bool VirtWarningPrinted; diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/llvm/lib/Target/Mips/MipsTargetMachine.cpp index f9f662a00117..fb0aa397d393 100644 --- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp +++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp @@ -18,12 +18,14 @@ #include "MipsSEISelDAGToDAG.h" #include "MipsSubtarget.h" #include "MipsTargetObjectFile.h" +#include "MipsTargetTransformInfo.h" #include "TargetInfo/MipsTargetInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" @@ -62,6 +64,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTarget() { initializeMipsBranchExpansionPass(*PR); initializeMicroMipsSizeReducePass(*PR); initializeMipsPreLegalizerCombinerPass(*PR); + initializeMipsPostLegalizerCombinerPass(*PR); initializeMipsMulMulBugFixPass(*PR); } @@ -103,7 +106,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU, static Reloc::Model getEffectiveRelocModel(bool JIT, Optional RM) { - if (!RM.hasValue() || JIT) + if (!RM || JIT) return Reloc::Static; return *RM; } @@ -238,6 +241,7 @@ public: bool addIRTranslator() override; void addPreLegalizeMachineIR() override; bool addLegalizeMachineIR() override; + void addPreRegBankSelect() override; bool addRegBankSelect() override; bool addGlobalInstructionSelect() override; @@ -276,7 +280,7 @@ void MipsPassConfig::addPreRegAlloc() { } TargetTransformInfo -MipsTargetMachine::getTargetTransformInfo(const Function &F) { +MipsTargetMachine::getTargetTransformInfo(const Function &F) const { if (Subtarget->allowMixed16_32()) { LLVM_DEBUG(errs() << "No Target Transform Info Pass Added\n"); // FIXME: This is no longer necessary as the TTI returned is per-function. @@ -284,7 +288,7 @@ MipsTargetMachine::getTargetTransformInfo(const Function &F) { } LLVM_DEBUG(errs() << "Target Transform Info Pass Added\n"); - return TargetTransformInfo(BasicTTIImpl(this, F)); + return TargetTransformInfo(MipsTTIImpl(this, F)); } // Implemented by targets that want to run passes immediately before @@ -333,6 +337,11 @@ bool MipsPassConfig::addLegalizeMachineIR() { return false; } +void MipsPassConfig::addPreRegBankSelect() { + bool IsOptNone = getOptLevel() == CodeGenOpt::None; + addPass(createMipsPostLegalizeCombiner(IsOptNone)); +} + bool MipsPassConfig::addRegBankSelect() { addPass(new RegBankSelect()); return false; diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.h b/llvm/lib/Target/Mips/MipsTargetMachine.h index e0de924be4fd..46ffc11738df 100644 --- a/llvm/lib/Target/Mips/MipsTargetMachine.h +++ b/llvm/lib/Target/Mips/MipsTargetMachine.h @@ -43,7 +43,7 @@ public: CodeGenOpt::Level OL, bool JIT, bool isLittle); ~MipsTargetMachine() override; - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; const MipsSubtarget *getSubtargetImpl() const { if (Subtarget) diff --git a/llvm/lib/Target/Mips/MipsTargetStreamer.h b/llvm/lib/Target/Mips/MipsTargetStreamer.h index 44615b987e3c..2f4b6eb37aa1 100644 --- a/llvm/lib/Target/Mips/MipsTargetStreamer.h +++ b/llvm/lib/Target/Mips/MipsTargetStreamer.h @@ -178,7 +178,7 @@ public: MipsABIFlagsSection &getABIFlagsSection() { return ABIFlagsSection; } const MipsABIInfo &getABI() const { - assert(ABI.hasValue() && "ABI hasn't been set!"); + assert(ABI && "ABI hasn't been set!"); return *ABI; } diff --git a/llvm/lib/Target/Mips/MipsTargetTransformInfo.cpp b/llvm/lib/Target/Mips/MipsTargetTransformInfo.cpp new file mode 100644 index 000000000000..bd88a0af0ecf --- /dev/null +++ b/llvm/lib/Target/Mips/MipsTargetTransformInfo.cpp @@ -0,0 +1,17 @@ +//===-- MipsTargetTransformInfo.cpp - Mips specific TTI ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MipsTargetTransformInfo.h" + +using namespace llvm; + +bool MipsTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) { + EVT VT = TLI->getValueType(DL, DataType); + return TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIVREM : ISD::UDIVREM, + VT); +} diff --git a/llvm/lib/Target/Mips/MipsTargetTransformInfo.h b/llvm/lib/Target/Mips/MipsTargetTransformInfo.h new file mode 100644 index 000000000000..6f52eaa2f833 --- /dev/null +++ b/llvm/lib/Target/Mips/MipsTargetTransformInfo.h @@ -0,0 +1,40 @@ +//===-- MipsTargetTransformInfo.h - Mips specific TTI -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_MIPS_MIPSTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_MIPS_MIPSTARGETTRANSFORMINFO_H + +#include "MipsTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" + +namespace llvm { + +class MipsTTIImpl : public BasicTTIImplBase { + using BaseT = BasicTTIImplBase; + using TTI = TargetTransformInfo; + + friend BaseT; + + const MipsSubtarget *ST; + const MipsTargetLowering *TLI; + + const MipsSubtarget *getST() const { return ST; } + const MipsTargetLowering *getTLI() const { return TLI; } + +public: + explicit MipsTTIImpl(const MipsTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + bool hasDivRemOp(Type *DataType, bool IsSigned); +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index f275011018a3..85ace96eeeaf 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -49,9 +49,20 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple, SupportsExtendedDwarfLocDirective = false; SupportsSignedData = false; + PrivateGlobalPrefix = "$L__"; + PrivateLabelPrefix = PrivateGlobalPrefix; + // @TODO: Can we just disable this? WeakDirective = "\t// .weak\t"; GlobalDirective = "\t// .globl\t"; UseIntegratedAssembler = false; + + // Avoid using parens for identifiers starting with $ - ptxas does + // not expect them. + UseParensForDollarSignNames = false; + + // ptxas does not support DWARF `.file fileno directory filename' + // syntax as of v11.X. + EnableDwarfFileDirectoryDefault = false; } diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp index 1cbd650bdf06..b72cea5d03f1 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp @@ -93,7 +93,7 @@ void NVPTXTargetStreamer::changeSection(const MCSection *CurSection, // Emit DWARF .file directives in the outermost scope. outputDwarfFileDirectives(); OS << "\t.section"; - Section->PrintSwitchToSection(*getStreamer().getContext().getAsmInfo(), + Section->printSwitchToSection(*getStreamer().getContext().getAsmInfo(), getStreamer().getContext().getTargetTriple(), OS, SubSection); // DWARF sections are enclosed into braces - emit the open one. diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 3a59306c4998..b1d842122060 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -45,7 +45,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Attributes.h" @@ -329,7 +328,7 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { const DataLayout &DL = getDataLayout(); const NVPTXSubtarget &STI = TM.getSubtarget(*F); - const TargetLowering *TLI = STI.getTargetLowering(); + const auto *TLI = cast(STI.getTargetLowering()); Type *Ty = F->getReturnType(); @@ -363,7 +362,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { unsigned totalsz = DL.getTypeAllocSize(Ty); unsigned retAlignment = 0; if (!getAlign(*F, 0, retAlignment)) - retAlignment = DL.getABITypeAlignment(Ty); + retAlignment = TLI->getFunctionParamOptimizedAlign(F, Ty, DL).value(); O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz << "]"; } else @@ -513,7 +512,7 @@ void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { OutStreamer->AddComment(Twine("implicit-def: ") + STI.getRegisterInfo()->getName(RegNo)); } - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, @@ -818,9 +817,13 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) { "Missed a global variable"); assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); + const NVPTXTargetMachine &NTM = static_cast(TM); + const NVPTXSubtarget &STI = + *static_cast(NTM.getSubtargetImpl()); + // Print out module-level global variables in proper order for (unsigned i = 0, e = Globals.size(); i != e; ++i) - printModuleLevelGV(Globals[i], OS2); + printModuleLevelGV(Globals[i], OS2, /*processDemoted=*/false, STI); OS2 << '\n'; @@ -888,17 +891,18 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { clearAnnotationCache(&M); - // Close the last emitted section - if (HasDebugInfo) { - static_cast(OutStreamer->getTargetStreamer()) - ->closeLastSection(); - // Emit empty .debug_loc section for better support of the empty files. - OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}"); - } + if (auto *TS = static_cast( + OutStreamer->getTargetStreamer())) { + // Close the last emitted section + if (HasDebugInfo) { + TS->closeLastSection(); + // Emit empty .debug_loc section for better support of the empty files. + OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}"); + } - // Output last DWARF .file directives, if any. - static_cast(OutStreamer->getTargetStreamer()) - ->outputDwarfFileDirectives(); + // Output last DWARF .file directives, if any. + TS->outputDwarfFileDirectives(); + } return ret; @@ -957,8 +961,8 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, } void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, - raw_ostream &O, - bool processDemoted) { + raw_ostream &O, bool processDemoted, + const NVPTXSubtarget &STI) { // Skip meta data if (GVar->hasSection()) { if (GVar->getSection() == "llvm.metadata") @@ -1001,7 +1005,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, // (extern) declarations, no definition or initializer // Currently the only known declaration is for an automatic __local // (.shared) promoted to global. - emitPTXGlobalVariable(GVar, O); + emitPTXGlobalVariable(GVar, O, STI); O << ";\n"; return; } @@ -1095,6 +1099,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, emitPTXAddressSpace(PTy->getAddressSpace(), O); if (isManaged(*GVar)) { + if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) { + report_fatal_error( + ".attribute(.managed) requires PTX version >= 4.0 and sm_30"); + } O << " .attribute(.managed)"; } @@ -1214,9 +1222,13 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { std::vector &gvars = localDecls[f]; + const NVPTXTargetMachine &NTM = static_cast(TM); + const NVPTXSubtarget &STI = + *static_cast(NTM.getSubtargetImpl()); + for (const GlobalVariable *GV : gvars) { O << "\t// demoted variable\n\t"; - printModuleLevelGV(GV, O, true); + printModuleLevelGV(GV, O, /*processDemoted=*/true, STI); } } @@ -1282,7 +1294,8 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const { } void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, - raw_ostream &O) { + raw_ostream &O, + const NVPTXSubtarget &STI) { const DataLayout &DL = getDataLayout(); // GlobalVariables are always constant pointers themselves. @@ -1290,6 +1303,13 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, O << "."; emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O); + if (isManaged(*GVar)) { + if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) { + report_fatal_error( + ".attribute(.managed) requires PTX version >= 4.0 and sm_30"); + } + O << " .attribute(.managed)"; + } if (MaybeAlign A = GVar->getAlign()) O << " .align " << A->value(); else @@ -1335,34 +1355,6 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, } } -static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) { - if (Ty->isSingleValueType()) - return DL.getPrefTypeAlignment(Ty); - - auto *ATy = dyn_cast(Ty); - if (ATy) - return getOpenCLAlignment(DL, ATy->getElementType()); - - auto *STy = dyn_cast(Ty); - if (STy) { - unsigned int alignStruct = 1; - // Go through each element of the struct and find the - // largest alignment. - for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { - Type *ETy = STy->getElementType(i); - unsigned int align = getOpenCLAlignment(DL, ETy); - if (align > alignStruct) - alignStruct = align; - } - return alignStruct; - } - - auto *FTy = dyn_cast(Ty); - if (FTy) - return DL.getPointerPrefAlignment().value(); - return DL.getPrefTypeAlignment(Ty); -} - void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O) { getSymbol(I->getParent())->print(O, MAI); @@ -1373,7 +1365,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { const DataLayout &DL = getDataLayout(); const AttributeList &PAL = F->getAttributes(); const NVPTXSubtarget &STI = TM.getSubtarget(*F); - const TargetLowering *TLI = STI.getTargetLowering(); + const auto *TLI = cast(STI.getTargetLowering()); + Function::const_arg_iterator I, E; unsigned paramIndex = 0; bool first = true; @@ -1430,18 +1423,24 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { } } + auto getOptimalAlignForParam = [TLI, &DL, &PAL, F, + paramIndex](Type *Ty) -> Align { + Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL); + MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex); + return std::max(TypeAlign, ParamAlign.valueOrOne()); + }; + if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) { if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { // Just print .param .align .b8 .param[size]; - // = PAL.getparamalignment + // = optimal alignment for the element type; always multiple of + // PAL.getParamAlignment // size = typeallocsize of element type - const Align align = DL.getValueOrABITypeAlignment( - PAL.getParamAlignment(paramIndex), Ty); + Align OptimalAlign = getOptimalAlignForParam(Ty); - unsigned sz = DL.getTypeAllocSize(Ty); - O << "\t.param .align " << align.value() << " .b8 "; + O << "\t.param .align " << OptimalAlign.value() << " .b8 "; printParamName(I, paramIndex, O); - O << "[" << sz << "]"; + O << "[" << DL.getTypeAllocSize(Ty) << "]"; continue; } @@ -1454,7 +1453,6 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { if (static_cast(TM).getDrvInterface() != NVPTX::CUDA) { - Type *ETy = PTy->getPointerElementType(); int addrSpace = PTy->getAddressSpace(); switch (addrSpace) { default: @@ -1470,7 +1468,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { O << ".ptr .global "; break; } - O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " "; + Align ParamAlign = I->getParamAlign().valueOrOne(); + O << ".align " << ParamAlign.value() << " "; } printParamName(I, paramIndex, O); continue; @@ -1511,17 +1510,17 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { continue; } - // param has byVal attribute. So should be a pointer - auto *PTy = dyn_cast(Ty); - assert(PTy && "Param with byval attribute should be a pointer type"); - Type *ETy = PTy->getPointerElementType(); + // param has byVal attribute. + Type *ETy = PAL.getParamByValType(paramIndex); + assert(ETy && "Param should have byval type"); if (isABI || isKernelFunc) { // Just print .param .align .b8 .param[size]; - // = PAL.getparamalignment + // = optimal alignment for the element type; always multiple of + // PAL.getParamAlignment // size = typeallocsize of element type - Align align = - DL.getValueOrABITypeAlignment(PAL.getParamAlignment(paramIndex), ETy); + Align OptimalAlign = getOptimalAlignForParam(ETy); + // Work around a bug in ptxas. When PTX code takes address of // byval parameter with alignment < 4, ptxas generates code to // spill argument into memory. Alas on sm_50+ ptxas generates @@ -1533,10 +1532,10 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // TODO: this will need to be undone when we get to support multi-TU // device-side compilation as it breaks ABI compatibility with nvcc. // Hopefully ptxas bug is fixed by then. - if (!isKernelFunc && align < Align(4)) - align = Align(4); + if (!isKernelFunc && OptimalAlign < Align(4)) + OptimalAlign = Align(4); unsigned sz = DL.getTypeAllocSize(ETy); - O << "\t.param .align " << align.value() << " .b8 "; + O << "\t.param .align " << OptimalAlign.value() << " .b8 "; printParamName(I, paramIndex, O); O << "[" << sz << "]"; continue; diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h index 2a3a38d7b2f1..cd61e99a103a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -218,7 +218,7 @@ private: void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier = nullptr); void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O, - bool = false); + bool processDemoted, const NVPTXSubtarget &STI); void printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O); void emitGlobals(const Module &M); @@ -258,7 +258,8 @@ private: // List of variables demoted to a function scope. std::map> localDecls; - void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O); + void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O, + const NVPTXSubtarget &STI); void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const; std::string getPTXFundamentalTypeStr(Type *Ty, bool = true) const; void printScalarConstant(const Constant *CPV, raw_ostream &O); diff --git a/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp index 10bf56fd9a91..9661dffd3dae 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp @@ -17,7 +17,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" -#include "llvm/Transforms/Scalar/LowerAtomic.h" +#include "llvm/Transforms/Utils/LowerAtomic.h" #include "MCTargetDesc/NVPTXBaseInfo.h" using namespace llvm; diff --git a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 888fc8ffac2c..2201eb19c80f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -83,6 +83,7 @@ bool GenericToNVVM::runOnModule(Module &M) { GV.hasInitializer() ? GV.getInitializer() : nullptr, "", &GV, GV.getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL); NewGV->copyAttributesFrom(&GV); + NewGV->copyMetadata(&GV, /*Offset=*/0); GVMap[&GV] = NewGV; } } @@ -269,24 +270,16 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, // ShuffleVector return Builder.CreateShuffleVector(NewOperands[0], NewOperands[1], NewOperands[2]); - case Instruction::ExtractValue: - // ExtractValueConstantExpr - return Builder.CreateExtractValue(NewOperands[0], C->getIndices()); case Instruction::InsertValue: // InsertValueConstantExpr return Builder.CreateInsertValue(NewOperands[0], NewOperands[1], C->getIndices()); case Instruction::GetElementPtr: // GetElementPtrConstantExpr - return cast(C)->isInBounds() - ? Builder.CreateGEP( - cast(C)->getSourceElementType(), - NewOperands[0], - makeArrayRef(&NewOperands[1], NumOperands - 1)) - : Builder.CreateInBoundsGEP( - cast(C)->getSourceElementType(), - NewOperands[0], - makeArrayRef(&NewOperands[1], NumOperands - 1)); + return Builder.CreateGEP(cast(C)->getSourceElementType(), + NewOperands[0], + makeArrayRef(&NewOperands[1], NumOperands - 1), "", + cast(C)->isInBounds()); case Instruction::Select: // SelectConstantExpr return Builder.CreateSelect(NewOperands[0], NewOperands[1], NewOperands[2]); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index dd4290a605a9..48fa387e563a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -42,7 +42,7 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, } bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &static_cast(MF.getSubtarget()); + Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } @@ -923,8 +923,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl), getI32Imm(fromType, dl), getI32Imm(fromTypeWidth, dl), Addr, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, - MVT::Other, Ops); + NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops); } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi, @@ -936,8 +935,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl), getI32Imm(fromType, dl), getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, - MVT::Other, Ops); + NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops); } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset) : SelectADDRri(N1.getNode(), N1, Base, Offset)) { if (PointerSize == 64) @@ -955,8 +953,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl), getI32Imm(fromType, dl), getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, - MVT::Other, Ops); + NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops); } else { if (PointerSize == 64) Opcode = pickOpcodeForVT( @@ -974,8 +971,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl), getI32Imm(fromType, dl), getI32Imm(fromTypeWidth, dl), N1, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, - MVT::Other, Ops); + NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops); } if (!NVPTXLD) @@ -1092,7 +1088,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL), getI32Imm(FromType, DL), getI32Imm(FromTypeWidth, DL), Addr, Chain }; - LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); + LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops); } else if (PointerSize == 64 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { @@ -1119,7 +1115,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL), getI32Imm(FromType, DL), getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; - LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); + LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops); } else if (PointerSize == 64 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { @@ -1169,7 +1165,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { getI32Imm(VecType, DL), getI32Imm(FromType, DL), getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; - LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); + LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops); } else { if (PointerSize == 64) { switch (N->getOpcode()) { @@ -1217,7 +1213,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL), getI32Imm(FromType, DL), getI32Imm(FromTypeWidth, DL), Op1, Chain }; - LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); + LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops); } MachineMemOperand *MemRef = cast(N)->getMemOperand(); @@ -1361,7 +1357,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { if (!Opcode) return false; SDValue Ops[] = { Addr, Chain }; - LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); + LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops); } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { if (TM.is64Bit()) { @@ -1508,7 +1504,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { if (!Opcode) return false; SDValue Ops[] = {Base, Offset, Chain}; - LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); + LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops); } else { if (TM.is64Bit()) { switch (N->getOpcode()) { @@ -1654,7 +1650,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { if (!Opcode) return false; SDValue Ops[] = { Op1, Chain }; - LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); + LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops); } MachineMemOperand *MemRef = Mem->getMemOperand(); @@ -1787,7 +1783,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { getI32Imm(toTypeWidth, dl), Addr, Chain}; - NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); + NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops); } else if (PointerSize == 64 ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset) : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) { @@ -1806,7 +1802,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { Base, Offset, Chain}; - NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); + NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops); } else if (PointerSize == 64 ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset) : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) { @@ -1832,7 +1828,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { Base, Offset, Chain}; - NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); + NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops); } else { if (PointerSize == 64) Opcode = @@ -1855,7 +1851,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { getI32Imm(toTypeWidth, dl), BasePtr, Chain}; - NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); + NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops); } if (!NVPTXST) @@ -2082,7 +2078,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { StOps.push_back(Chain); - ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps); + ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, StOps); MachineMemOperand *MemRef = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(ST), {MemRef}); @@ -2164,7 +2160,7 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) { Ops.push_back(Chain); Ops.push_back(Flag); - ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops)); + ReplaceNode(Node, CurDAG->getMachineNode(*Opcode, DL, VTs, Ops)); return true; } @@ -2230,7 +2226,7 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) { if (!Opcode) return false; - SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops); + SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops); MachineMemOperand *MemRef = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(Ret), {MemRef}); @@ -2333,8 +2329,7 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) { } SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue); - SDNode *Ret = - CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops); + SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, RetVTs, Ops); MachineMemOperand *MemRef = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(Ret), {MemRef}); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 7b5248906b56..746f652bfa36 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/FPEnv.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instruction.h" @@ -48,7 +49,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -71,14 +71,14 @@ static cl::opt sched4reg( "nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); -static cl::opt -FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, - cl::desc("NVPTX Specific: FMA contraction (0: don't do it" - " 1: do it 2: do it aggressively"), - cl::init(2)); +static cl::opt FMAContractLevelOpt( + "nvptx-fma-level", cl::Hidden, + cl::desc("NVPTX Specific: FMA contraction (0: don't do it" + " 1: do it 2: do it aggressively"), + cl::init(2)); static cl::opt UsePrecDivF32( - "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, + "nvptx-prec-divf32", cl::Hidden, cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" " IEEE Compliant F32 div.rnd if available."), cl::init(2)); @@ -487,6 +487,17 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::CTLZ, Ty, Legal); } + setOperationAction(ISD::ADDC, MVT::i32, Legal); + setOperationAction(ISD::ADDE, MVT::i32, Legal); + setOperationAction(ISD::SUBC, MVT::i32, Legal); + setOperationAction(ISD::SUBE, MVT::i32, Legal); + if (STI.getPTXVersion() >= 43) { + setOperationAction(ISD::ADDC, MVT::i64, Legal); + setOperationAction(ISD::ADDE, MVT::i64, Legal); + setOperationAction(ISD::SUBC, MVT::i64, Legal); + setOperationAction(ISD::SUBE, MVT::i64, Legal); + } + setOperationAction(ISD::CTTZ, MVT::i16, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i64, Expand); @@ -499,13 +510,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); // We have some custom DAG combine patterns for these nodes - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::FADD); - setTargetDAGCombine(ISD::MUL); - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SREM); - setTargetDAGCombine(ISD::UREM); + setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::FADD, ISD::MUL, ISD::SHL, + ISD::SREM, ISD::UREM}); // setcc for f16x2 needs special handling to prevent legalizer's // attempt to scalarize it due to v2i1 not being legal. @@ -583,6 +589,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // Now deduce the information based on the above mentioned // actions computeRegisterProperties(STI.getRegisterInfo()); + + setMinCmpXchgSizeInBits(32); } const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -1302,8 +1310,8 @@ std::string NVPTXTargetLowering::getPrototype( bool first = true; - unsigned OIdx = 0; - for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { + const Function *F = CB.getFunction(); + for (unsigned i = 0, e = Args.size(), OIdx = 0; i != e; ++i, ++OIdx) { Type *Ty = Args[i].Ty; if (!first) { O << ", "; @@ -1312,15 +1320,14 @@ std::string NVPTXTargetLowering::getPrototype( if (!Outs[OIdx].Flags.isByVal()) { if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { - unsigned align = 0; + unsigned ParamAlign = 0; const CallInst *CallI = cast(&CB); // +1 because index 0 is reserved for return type alignment - if (!getAlign(*CallI, i + 1, align)) - align = DL.getABITypeAlignment(Ty); - unsigned sz = DL.getTypeAllocSize(Ty); - O << ".param .align " << align << " .b8 "; + if (!getAlign(*CallI, i + 1, ParamAlign)) + ParamAlign = getFunctionParamOptimizedAlign(F, Ty, DL).value(); + O << ".param .align " << ParamAlign << " .b8 "; O << "_"; - O << "[" << sz << "]"; + O << "[" << DL.getTypeAllocSize(Ty) << "]"; // update the index for Outs SmallVector vtparts; ComputeValueVTs(*this, DL, Ty, vtparts); @@ -1351,15 +1358,18 @@ std::string NVPTXTargetLowering::getPrototype( O << "_"; continue; } - auto *PTy = dyn_cast(Ty); - assert(PTy && "Param with byval attribute should be a pointer type"); - Type *ETy = PTy->getPointerElementType(); - Align align = Outs[OIdx].Flags.getNonZeroByValAlign(); - unsigned sz = DL.getTypeAllocSize(ETy); - O << ".param .align " << align.value() << " .b8 "; + Align ParamByValAlign = Outs[OIdx].Flags.getNonZeroByValAlign(); + + // Try to increase alignment. This code matches logic in LowerCall when + // alignment increase is performed to increase vectorization options. + Type *ETy = Args[i].IndirectType; + Align AlignCandidate = getFunctionParamOptimizedAlign(F, ETy, DL); + ParamByValAlign = std::max(ParamByValAlign, AlignCandidate); + + O << ".param .align " << ParamByValAlign.value() << " .b8 "; O << "_"; - O << "[" << sz << "]"; + O << "[" << Outs[OIdx].Flags.getByValSize() << "]"; } O << ");"; return O.str(); @@ -1406,12 +1416,15 @@ Align NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, // Check for function alignment information if we found that the // ultimate target is a Function - if (DirectCallee) + if (DirectCallee) { if (getAlign(*DirectCallee, Idx, Alignment)) return Align(Alignment); + // If alignment information is not available, fall back to the + // default function param optimized type alignment + return getFunctionParamOptimizedAlign(DirectCallee, Ty, DL); + } - // Call is indirect or alignment information is not available, fall back to - // the ABI type alignment + // Call is indirect, fall back to the ABI type alignment return DL.getABITypeAlign(Ty); } @@ -1436,11 +1449,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, return Chain; unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1); - SDValue tempChain = Chain; + SDValue TempChain = Chain; Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl); SDValue InFlag = Chain.getValue(1); - unsigned paramCount = 0; + unsigned ParamCount = 0; // Args.size() and Outs.size() need not match. // Outs.size() will be larger // * if there is an aggregate argument with multiple fields (each field @@ -1456,173 +1469,155 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { EVT VT = Outs[OIdx].VT; Type *Ty = Args[i].Ty; + bool IsByVal = Outs[OIdx].Flags.isByVal(); - if (!Outs[OIdx].Flags.isByVal()) { - SmallVector VTs; - SmallVector Offsets; - ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets); - Align ArgAlign = getArgumentAlignment(Callee, CB, Ty, paramCount + 1, DL); - unsigned AllocSize = DL.getTypeAllocSize(Ty); - SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - bool NeedAlign; // Does argument declaration specify alignment? - if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { - // declare .param .align .b8 .param[]; - SDValue DeclareParamOps[] = { - Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32), - DAG.getConstant(paramCount, dl, MVT::i32), - DAG.getConstant(AllocSize, dl, MVT::i32), InFlag}; - Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, - DeclareParamOps); - NeedAlign = true; - } else { - // declare .param .b .param; - if ((VT.isInteger() || VT.isFloatingPoint()) && AllocSize < 4) { - // PTX ABI requires integral types to be at least 32 bits in - // size. FP16 is loaded/stored using i16, so it's handled - // here as well. - AllocSize = 4; - } - SDValue DeclareScalarParamOps[] = { - Chain, DAG.getConstant(paramCount, dl, MVT::i32), - DAG.getConstant(AllocSize * 8, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32), InFlag}; - Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, - DeclareScalarParamOps); - NeedAlign = false; - } - InFlag = Chain.getValue(1); + SmallVector VTs; + SmallVector Offsets; - // PTX Interoperability Guide 3.3(A): [Integer] Values shorter - // than 32-bits are sign extended or zero extended, depending on - // whether they are signed or unsigned types. This case applies - // only to scalar parameters and not to aggregate values. - bool ExtendIntegerParam = - Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32; - - auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign); - SmallVector StoreOperands; - for (unsigned j = 0, je = VTs.size(); j != je; ++j) { - // New store. - if (VectorInfo[j] & PVF_FIRST) { - assert(StoreOperands.empty() && "Unfinished preceding store."); - StoreOperands.push_back(Chain); - StoreOperands.push_back(DAG.getConstant(paramCount, dl, MVT::i32)); - StoreOperands.push_back(DAG.getConstant(Offsets[j], dl, MVT::i32)); - } + assert((!IsByVal || Args[i].IndirectType) && + "byval arg must have indirect type"); + Type *ETy = (IsByVal ? Args[i].IndirectType : Ty); + ComputePTXValueVTs(*this, DL, ETy, VTs, &Offsets); + + Align ArgAlign; + if (IsByVal) { + // The ByValAlign in the Outs[OIdx].Flags is always set at this point, + // so we don't need to worry whether it's naturally aligned or not. + // See TargetLowering::LowerCallTo(). + ArgAlign = Outs[OIdx].Flags.getNonZeroByValAlign(); + + // Try to increase alignment to enhance vectorization options. + ArgAlign = std::max(ArgAlign, getFunctionParamOptimizedAlign( + CB->getCalledFunction(), ETy, DL)); + + // Enforce minumum alignment of 4 to work around ptxas miscompile + // for sm_50+. See corresponding alignment adjustment in + // emitFunctionParamList() for details. + ArgAlign = std::max(ArgAlign, Align(4)); + } else { + ArgAlign = getArgumentAlignment(Callee, CB, Ty, ParamCount + 1, DL); + } - EVT EltVT = VTs[j]; - SDValue StVal = OutVals[OIdx]; - if (ExtendIntegerParam) { - assert(VTs.size() == 1 && "Scalar can't have multiple parts."); - // zext/sext to i32 - StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND - : ISD::ZERO_EXTEND, - dl, MVT::i32, StVal); - } else if (EltVT.getSizeInBits() < 16) { - // Use 16-bit registers for small stores as it's the - // smallest general purpose register size supported by NVPTX. - StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); - } + unsigned TypeSize = + (IsByVal ? Outs[OIdx].Flags.getByValSize() : DL.getTypeAllocSize(Ty)); + SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - // Record the value to store. - StoreOperands.push_back(StVal); - - if (VectorInfo[j] & PVF_LAST) { - unsigned NumElts = StoreOperands.size() - 3; - NVPTXISD::NodeType Op; - switch (NumElts) { - case 1: - Op = NVPTXISD::StoreParam; - break; - case 2: - Op = NVPTXISD::StoreParamV2; - break; - case 4: - Op = NVPTXISD::StoreParamV4; - break; - default: - llvm_unreachable("Invalid vector info."); - } + bool NeedAlign; // Does argument declaration specify alignment? + if (IsByVal || + (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128))) { + // declare .param .align .b8 .param[]; + SDValue DeclareParamOps[] = { + Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32), + DAG.getConstant(ParamCount, dl, MVT::i32), + DAG.getConstant(TypeSize, dl, MVT::i32), InFlag}; + Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, + DeclareParamOps); + NeedAlign = true; + } else { + // declare .param .b .param; + if ((VT.isInteger() || VT.isFloatingPoint()) && TypeSize < 4) { + // PTX ABI requires integral types to be at least 32 bits in + // size. FP16 is loaded/stored using i16, so it's handled + // here as well. + TypeSize = 4; + } + SDValue DeclareScalarParamOps[] = { + Chain, DAG.getConstant(ParamCount, dl, MVT::i32), + DAG.getConstant(TypeSize * 8, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), InFlag}; + Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, + DeclareScalarParamOps); + NeedAlign = false; + } + InFlag = Chain.getValue(1); - StoreOperands.push_back(InFlag); + // PTX Interoperability Guide 3.3(A): [Integer] Values shorter + // than 32-bits are sign extended or zero extended, depending on + // whether they are signed or unsigned types. This case applies + // only to scalar parameters and not to aggregate values. + bool ExtendIntegerParam = + Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32; - // Adjust type of the store op if we've extended the scalar - // return value. - EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j]; - MaybeAlign EltAlign; - if (NeedAlign) - EltAlign = commonAlignment(ArgAlign, Offsets[j]); + auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign); + SmallVector StoreOperands; + for (unsigned j = 0, je = VTs.size(); j != je; ++j) { + EVT EltVT = VTs[j]; + int CurOffset = Offsets[j]; + MaybeAlign PartAlign; + if (NeedAlign) + PartAlign = commonAlignment(ArgAlign, CurOffset); + + // New store. + if (VectorInfo[j] & PVF_FIRST) { + assert(StoreOperands.empty() && "Unfinished preceding store."); + StoreOperands.push_back(Chain); + StoreOperands.push_back(DAG.getConstant(ParamCount, dl, MVT::i32)); + StoreOperands.push_back(DAG.getConstant(CurOffset, dl, MVT::i32)); + } - Chain = DAG.getMemIntrinsicNode( - Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands, - TheStoreType, MachinePointerInfo(), EltAlign, - MachineMemOperand::MOStore); - InFlag = Chain.getValue(1); + SDValue StVal = OutVals[OIdx]; + if (IsByVal) { + auto PtrVT = getPointerTy(DL); + SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StVal, + DAG.getConstant(CurOffset, dl, PtrVT)); + StVal = DAG.getLoad(EltVT, dl, TempChain, srcAddr, MachinePointerInfo(), + PartAlign); + } else if (ExtendIntegerParam) { + assert(VTs.size() == 1 && "Scalar can't have multiple parts."); + // zext/sext to i32 + StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND, + dl, MVT::i32, StVal); + } - // Cleanup. - StoreOperands.clear(); - } - ++OIdx; + if (!ExtendIntegerParam && EltVT.getSizeInBits() < 16) { + // Use 16-bit registers for small stores as it's the + // smallest general purpose register size supported by NVPTX. + StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); } - assert(StoreOperands.empty() && "Unfinished parameter store."); - if (VTs.size() > 0) - --OIdx; - ++paramCount; - continue; - } - // ByVal arguments - SmallVector VTs; - SmallVector Offsets; - auto *PTy = dyn_cast(Args[i].Ty); - assert(PTy && "Type of a byval parameter should be pointer"); - ComputePTXValueVTs(*this, DL, PTy->getPointerElementType(), VTs, &Offsets, - 0); + // Record the value to store. + StoreOperands.push_back(StVal); - // declare .param .align .b8 .param[]; - unsigned sz = Outs[OIdx].Flags.getByValSize(); - SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - Align ArgAlign = Outs[OIdx].Flags.getNonZeroByValAlign(); - // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, - // so we don't need to worry about natural alignment or not. - // See TargetLowering::LowerCallTo(). - - // Enforce minumum alignment of 4 to work around ptxas miscompile - // for sm_50+. See corresponding alignment adjustment in - // emitFunctionParamList() for details. - if (ArgAlign < Align(4)) - ArgAlign = Align(4); - SDValue DeclareParamOps[] = { - Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32), - DAG.getConstant(paramCount, dl, MVT::i32), - DAG.getConstant(sz, dl, MVT::i32), InFlag}; - Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, - DeclareParamOps); - InFlag = Chain.getValue(1); - for (unsigned j = 0, je = VTs.size(); j != je; ++j) { - EVT elemtype = VTs[j]; - int curOffset = Offsets[j]; - unsigned PartAlign = GreatestCommonDivisor64(ArgAlign.value(), curOffset); - auto PtrVT = getPointerTy(DL); - SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx], - DAG.getConstant(curOffset, dl, PtrVT)); - SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, - MachinePointerInfo(), PartAlign); - if (elemtype.getSizeInBits() < 16) { - theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); - } - SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CopyParamOps[] = { Chain, - DAG.getConstant(paramCount, dl, MVT::i32), - DAG.getConstant(curOffset, dl, MVT::i32), - theVal, InFlag }; - Chain = DAG.getMemIntrinsicNode( - NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, elemtype, - MachinePointerInfo(), /* Align */ None, MachineMemOperand::MOStore); + if (VectorInfo[j] & PVF_LAST) { + unsigned NumElts = StoreOperands.size() - 3; + NVPTXISD::NodeType Op; + switch (NumElts) { + case 1: + Op = NVPTXISD::StoreParam; + break; + case 2: + Op = NVPTXISD::StoreParamV2; + break; + case 4: + Op = NVPTXISD::StoreParamV4; + break; + default: + llvm_unreachable("Invalid vector info."); + } - InFlag = Chain.getValue(1); + StoreOperands.push_back(InFlag); + + // Adjust type of the store op if we've extended the scalar + // return value. + EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT; + + Chain = DAG.getMemIntrinsicNode( + Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands, + TheStoreType, MachinePointerInfo(), PartAlign, + MachineMemOperand::MOStore); + InFlag = Chain.getValue(1); + + // Cleanup. + StoreOperands.clear(); + } + if (!IsByVal) + ++OIdx; } - ++paramCount; + assert(StoreOperands.empty() && "Unfinished parameter store."); + if (!IsByVal && VTs.size() > 0) + --OIdx; + ++ParamCount; } GlobalAddressSDNode *Func = dyn_cast(Callee.getNode()); @@ -1729,7 +1724,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallArgBeginOps); InFlag = Chain.getValue(1); - for (unsigned i = 0, e = paramCount; i != e; ++i) { + for (unsigned i = 0, e = ParamCount; i != e; ++i) { unsigned opcode; if (i == (e - 1)) opcode = NVPTXISD::LastCallArg; @@ -1865,7 +1860,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = Ret.getValue(1); InFlag = Ret.getValue(2); - if (ProxyRegTruncates[i].hasValue()) { + if (ProxyRegTruncates[i]) { Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].getValue(), Ret); } @@ -2249,7 +2244,7 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { assert(Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only"); SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), LD->getAlignment(), + LD->getPointerInfo(), LD->getAlign(), LD->getMemOperand()->getFlags()); SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); // The legalizer (the caller) is expecting two values from the legalized @@ -2414,7 +2409,7 @@ SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8, - ST->getAlignment(), ST->getMemOperand()->getFlags()); + ST->getAlign(), ST->getMemOperand()->getFlags()); return Result; } @@ -2431,29 +2426,6 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); } -// Check to see if the kernel argument is image*_t or sampler_t - -static bool isImageOrSamplerVal(const Value *arg, const Module *context) { - static const char *const specialTypes[] = { "struct._image2d_t", - "struct._image3d_t", - "struct._sampler_t" }; - - Type *Ty = arg->getType(); - auto *PTy = dyn_cast(Ty); - - if (!PTy) - return false; - - if (!context) - return false; - - auto *STy = dyn_cast(PTy->getPointerElementType()); - if (!STy || STy->isLiteral()) - return false; - - return llvm::is_contained(specialTypes, STy->getName()); -} - SDValue NVPTXTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, @@ -2495,19 +2467,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { Type *Ty = argTypes[i]; - // If the kernel argument is image*_t or sampler_t, convert it to - // a i32 constant holding the parameter position. This can later - // matched in the AsmPrinter to output the correct mangled name. - if (isImageOrSamplerVal( - theArgs[i], - (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() - : nullptr))) { - assert(isKernelFunction(*F) && - "Only kernels can have image/sampler params"); - InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32)); - continue; - } - if (theArgs[i]->use_empty()) { // argument is dead if (Ty->isAggregateType() || Ty->isIntegerTy(128)) { @@ -2658,7 +2617,8 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); + const MachineFunction &MF = DAG.getMachineFunction(); + const Function &F = MF.getFunction(); Type *RetTy = MF.getFunction().getReturnType(); bool isABI = (STI.getSmVersion() >= 20); @@ -2673,7 +2633,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, assert(VTs.size() == OutVals.size() && "Bad return value decomposition"); auto VectorInfo = VectorizePTXValueVTs( - VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlign(RetTy) : Align(1)); + VTs, Offsets, + RetTy->isSized() ? getFunctionParamOptimizedAlign(&F, RetTy, DL) + : Align(1)); // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than // 32-bits are sign extended or zero extended, depending on whether @@ -4293,6 +4255,26 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( return false; } +/// getFunctionParamOptimizedAlign - since function arguments are passed via +/// .param space, we may want to increase their alignment in a way that +/// ensures that we can effectively vectorize their loads & stores. We can +/// increase alignment only if the function has internal or has private +/// linkage as for other linkage types callers may already rely on default +/// alignment. To allow using 128-bit vectorized loads/stores, this function +/// ensures that alignment is 16 or greater. +Align NVPTXTargetLowering::getFunctionParamOptimizedAlign( + const Function *F, Type *ArgTy, const DataLayout &DL) const { + const uint64_t ABITypeAlign = DL.getABITypeAlign(ArgTy).value(); + + // If a function has linkage different from internal or private, we + // must use default ABI alignment as external users rely on it. + if (!F->hasLocalLinkage()) + return Align(ABITypeAlign); + + assert(!isKernelFunction(*F) && "Expect kernels to have non-local linkage"); + return Align(std::max(uint64_t(16), ABITypeAlign)); +} + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. /// Used to guide target specific optimizations, like loop strength reduction @@ -4516,6 +4498,17 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, return SDValue(); } +static SDValue PerformStoreRetvalCombine(SDNode *N) { + // Operands from the 2nd to the last one are the values to be stored + for (std::size_t I = 2, OpsCount = N->ops().size(); I != OpsCount; ++I) + if (!N->getOperand(I).isUndef()) + return SDValue(); + + // Operand 0 is the previous value in the chain. Cannot return EntryToken + // as the previous value will become unused and eliminated later. + return N->getOperand(0); +} + /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. /// static SDValue PerformADDCombine(SDNode *N, @@ -4844,6 +4837,10 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, return PerformREMCombine(N, DCI, OptLevel); case ISD::SETCC: return PerformSETCCCombine(N, DCI); + case NVPTXISD::StoreRetval: + case NVPTXISD::StoreRetvalV2: + case NVPTXISD::StoreRetvalV4: + return PerformStoreRetvalCombine(N); } return SDValue(); } @@ -5130,8 +5127,69 @@ void NVPTXTargetLowering::ReplaceNodeResults( } } +NVPTXTargetLowering::AtomicExpansionKind +NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + Type *Ty = AI->getValOperand()->getType(); + + if (AI->isFloatingPointOperation()) { + if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) { + if (Ty->isFloatTy()) + return AtomicExpansionKind::None; + if (Ty->isDoubleTy() && STI.hasAtomAddF64()) + return AtomicExpansionKind::None; + } + return AtomicExpansionKind::CmpXChg; + } + + assert(Ty->isIntegerTy() && "Ty should be integer at this point"); + auto ITy = cast(Ty); + + switch (AI->getOperation()) { + default: + return AtomicExpansionKind::CmpXChg; + case AtomicRMWInst::BinOp::And: + case AtomicRMWInst::BinOp::Or: + case AtomicRMWInst::BinOp::Xor: + case AtomicRMWInst::BinOp::Xchg: + switch (ITy->getBitWidth()) { + case 8: + case 16: + return AtomicExpansionKind::CmpXChg; + case 32: + return AtomicExpansionKind::None; + case 64: + if (STI.hasAtomBitwise64()) + return AtomicExpansionKind::None; + return AtomicExpansionKind::CmpXChg; + default: + llvm_unreachable("unsupported width encountered"); + } + case AtomicRMWInst::BinOp::Add: + case AtomicRMWInst::BinOp::Sub: + case AtomicRMWInst::BinOp::Max: + case AtomicRMWInst::BinOp::Min: + case AtomicRMWInst::BinOp::UMax: + case AtomicRMWInst::BinOp::UMin: + switch (ITy->getBitWidth()) { + case 8: + case 16: + return AtomicExpansionKind::CmpXChg; + case 32: + return AtomicExpansionKind::None; + case 64: + if (STI.hasAtomMinMax64()) + return AtomicExpansionKind::None; + return AtomicExpansionKind::CmpXChg; + default: + llvm_unreachable("unsupported width encountered"); + } + } + + return AtomicExpansionKind::CmpXChg; +} + // Pin NVPTXTargetObjectFile's vtables to this file. -NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {} +NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default; MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 13829b924d4b..fb09f99a019d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -451,6 +451,16 @@ public: MachineFunction &MF, unsigned Intrinsic) const override; + /// getFunctionParamOptimizedAlign - since function arguments are passed via + /// .param space, we may want to increase their alignment in a way that + /// ensures that we can effectively vectorize their loads & stores. We can + /// increase alignment only if the function has internal or has private + /// linkage as for other linkage types callers may already rely on default + /// alignment. To allow using 128-bit vectorized loads/stores, this function + /// ensures that alignment is 16 or greater. + Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, + const DataLayout &DL) const; + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type /// Used to guide target specific optimizations, like loop strength @@ -551,6 +561,17 @@ public: // instruction, so we say that ctlz is cheap to speculate. bool isCheapToSpeculateCtlz() const override { return true; } + AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override { + return AtomicExpansionKind::None; + } + + AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override { + return AtomicExpansionKind::None; + } + + AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + private: const NVPTXSubtarget &STI; // cache the subtarget here SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 953d95e55f65..8df6f13aa68e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -27,7 +27,7 @@ using namespace llvm; // Pin the vtable to this file. void NVPTXInstrInfo::anchor() {} -NVPTXInstrInfo::NVPTXInstrInfo() : NVPTXGenInstrInfo(), RegInfo() {} +NVPTXInstrInfo::NVPTXInstrInfo() : RegInfo() {} void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 22084cddc092..6f9c40feb10e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -145,6 +145,8 @@ def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">; def True : Predicate<"true">; def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; +def hasPTX42 : Predicate<"Subtarget->getPTXVersion() >= 42">; +def hasPTX43 : Predicate<"Subtarget->getPTXVersion() >= 43">; def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">; def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">; def hasPTX63 : Predicate<"Subtarget->getPTXVersion() >= 63">; @@ -152,12 +154,16 @@ def hasPTX64 : Predicate<"Subtarget->getPTXVersion() >= 64">; def hasPTX65 : Predicate<"Subtarget->getPTXVersion() >= 65">; def hasPTX70 : Predicate<"Subtarget->getPTXVersion() >= 70">; def hasPTX71 : Predicate<"Subtarget->getPTXVersion() >= 71">; +def hasPTX72 : Predicate<"Subtarget->getPTXVersion() >= 72">; def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">; +def hasSM32 : Predicate<"Subtarget->getSmVersion() >= 32">; +def hasSM53 : Predicate<"Subtarget->getSmVersion() >= 53">; def hasSM70 : Predicate<"Subtarget->getSmVersion() >= 70">; def hasSM72 : Predicate<"Subtarget->getSmVersion() >= 72">; def hasSM75 : Predicate<"Subtarget->getSmVersion() >= 75">; def hasSM80 : Predicate<"Subtarget->getSmVersion() >= 80">; +def hasSM86 : Predicate<"Subtarget->getSmVersion() >= 86">; // non-sync shfl instructions are not available on sm_70+ in PTX6.4+ def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" @@ -199,17 +205,29 @@ multiclass I3 { [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; } -// Template for instructions which take 3 int32 args. The instructions are +// Template for instructions which take 3 int args. The instructions are // named ".s32" (e.g. "addc.cc.s32"). -multiclass ADD_SUB_INT_32 { - def i32rr : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; - def i32ri : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; +multiclass ADD_SUB_INT_CARRY { + let hasSideEffects = 1 in { + def i32rr : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def i32ri : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i64rr : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>, + Requires<[hasPTX43]>; + def i64ri : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>, + Requires<[hasPTX43]>; + } } // Template for instructions which take three fp64 or fp32 args. The @@ -579,14 +597,13 @@ defm SUB_i1 : ADD_SUB_i1; defm ADD : I3<"add.s", add>; defm SUB : I3<"sub.s", sub>; -// int32 addition and subtraction with carry-out. -// FIXME: PTX 4.3 adds a 64-bit add.cc (and maybe also 64-bit addc.cc?). -defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>; -defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>; +// in32 and int64 addition and subtraction with carry-out. +defm ADDCC : ADD_SUB_INT_CARRY<"add.cc", addc>; +defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc>; -// int32 addition and subtraction with carry-in and carry-out. -defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>; -defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>; +// int32 and int64 addition and subtraction with carry-in and carry-out. +defm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde>; +defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube>; defm MULT : I3<"mul.lo.s", mul>; @@ -2653,6 +2670,8 @@ def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>; def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>; def BITCONVERT_32_I2F16x2 : F_BITCONVERT<"32", Int32Regs, Float16x2Regs>; def BITCONVERT_32_F16x22I : F_BITCONVERT<"32", Float16x2Regs, Int32Regs>; +def BITCONVERT_32_F2F16x2 : F_BITCONVERT<"32", Float32Regs, Float16x2Regs>; +def BITCONVERT_32_F16x22F : F_BITCONVERT<"32", Float16x2Regs, Float32Regs>; // NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where // we cannot specify floating-point literals in isel patterns. Therefore, we diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index ec069a0a02ae..1192cc078408 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -182,7 +182,7 @@ foreach sync = [false, true] in { foreach threadmask_imm = THREADMASK_INFO.ret in { def : SHFL_INSTR, - Requires; + Requires; } } } @@ -223,21 +223,21 @@ defm VOTE_SYNC_BALLOT : VOTE_SYNC { - def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value), + def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>, + [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>, Requires<[hasPTX60, hasSM70]>; - def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value), + def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, + [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, Requires<[hasPTX60, hasSM70]>; - def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value), + def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>, + [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>, Requires<[hasPTX60, hasSM70]>; - def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value), + def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, + [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, Requires<[hasPTX60, hasSM70]>; } @@ -248,25 +248,25 @@ defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC { - def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), + def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask, ImmOp:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, + [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, Requires<[hasPTX60, hasSM70]>; - def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), + def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask, ImmOp:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, + [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, Requires<[hasPTX60, hasSM70]>; - def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), + def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask, regclass:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, + [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, Requires<[hasPTX60, hasSM70]>; - def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), + def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask, regclass:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, + [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, Requires<[hasPTX60, hasSM70]>; } defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC + NVPTXRegClass src_regclass, Intrinsic IntOP, list Preds = []> : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), OpcStr, - [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>; + [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>, + Requires; // We need a full string for OpcStr here because we need to deal with the case // like INT_PTX_NATIVE_POWR_F. class F_MATH_2 + NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP, + list Preds = []> : NVPTXInst<(outs t_regclass:$dst), (ins s0_regclass:$src0, s1_regclass:$src1), OpcStr, - [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>; + [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>, + Requires; class F_MATH_3 + NVPTXRegClass s2_regclass, Intrinsic IntOP, list Preds = []> : NVPTXInst<(outs t_regclass:$dst), (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), OpcStr, [(set t_regclass:$dst, - (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>; + (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>, + Requires; // // MISC @@ -587,17 +591,145 @@ def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_f>; def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; +def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f, + [hasPTX70, hasSM80]>; +def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f, + [hasPTX70, hasSM80]>; +def INT_NVVM_FMIN_XORSIGN_ABS_F : + F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f, + [hasPTX72, hasSM86]>; +def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F : + F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f, + [hasPTX72, hasSM86]>; +def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F : + F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f, + [hasPTX72, hasSM86]>; +def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F : + F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f, + [hasPTX72, hasSM86]>; def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_f>; def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; +def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f, + [hasPTX70, hasSM80]>; +def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f, + [hasPTX70, hasSM80]>; +def INT_NVVM_FMAX_XORSIGN_ABS_F : + F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f, + [hasPTX72, hasSM86]>; +def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F : + F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f, + [hasPTX72, hasSM86]>; +def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F : + F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f, + [hasPTX72, hasSM86]>; +def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F : + F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f, + [hasPTX72, hasSM86]>; def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, Float64Regs, Float64Regs, int_nvvm_fmin_d>; def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, Float64Regs, Float64Regs, int_nvvm_fmax_d>; +// +// Min Max f16, f16x2, bf16, bf16x2 +// + +class MIN_MAX_TUPLE Preds = [hasPTX70, hasSM80]> { + string Variant = V; + Intrinsic Intr = I; + NVPTXRegClass RegClass = RC; + list Predicates = Preds; +} + +multiclass MIN_MAX { + foreach P = [ + MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16, + int_nvvm_fmax_f16), Float16Regs>, + MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16, + int_nvvm_fmax_ftz_f16), Float16Regs>, + MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16, + int_nvvm_fmax_nan_f16), Float16Regs>, + MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"), + int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Float16Regs>, + MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"), + int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16), + Float16Regs, [hasPTX72, hasSM86]>, + MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"), + int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16), + Float16Regs, [hasPTX72, hasSM86]>, + MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), + int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16), + Float16Regs, [hasPTX72, hasSM86]>, + MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), + int_nvvm_fmin_ftz_nan_xorsign_abs_f16, + int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Float16Regs, [hasPTX72, hasSM86]>, + MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2, + int_nvvm_fmax_f16x2), Float16x2Regs>, + MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"), + int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Float16x2Regs>, + MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"), + int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Float16x2Regs>, + MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"), + int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Float16x2Regs>, + MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"), + int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2), + Float16x2Regs, [hasPTX72, hasSM86]>, + MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"), + int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2), + Float16x2Regs, [hasPTX72, hasSM86]>, + MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), + int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2), + Float16x2Regs, [hasPTX72, hasSM86]>, + MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), + int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2, + int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2), + Float16x2Regs, [hasPTX72, hasSM86]>, + MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"), + int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>, + MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16, + int_nvvm_fmax_nan_bf16), Int16Regs>, + MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"), + int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16), + Int16Regs, [hasPTX72, hasSM86]>, + MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"), + int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16), + Int16Regs, [hasPTX72, hasSM86]>, + MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2, + int_nvvm_fmax_bf16x2), Int32Regs>, + MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"), + int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>, + MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"), + int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2), + Int32Regs, [hasPTX72, hasSM86]>, + MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"), + int_nvvm_fmin_nan_xorsign_abs_bf16x2, + int_nvvm_fmax_nan_xorsign_abs_bf16x2), + Int32Regs, [hasPTX72, hasSM86]>] in { + def P.Variant : F_MATH_2; + } +} + +defm INT_NVVM_FMIN : MIN_MAX<"min">; +defm INT_NVVM_FMAN : MIN_MAX<"max">; // // Multiplication @@ -719,6 +851,19 @@ def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_fabs_d>; +// +// Abs, Neg bf16, bf16x2 +// + +def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs, + Int16Regs, int_nvvm_abs_bf16, [hasPTX70, hasSM80]>; +def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs, + Int32Regs, int_nvvm_abs_bf16x2, [hasPTX70, hasSM80]>; +def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs, + Int16Regs, int_nvvm_neg_bf16, [hasPTX70, hasSM80]>; +def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs, + Int32Regs, int_nvvm_neg_bf16x2, [hasPTX70, hasSM80]>; + // // Round // @@ -762,6 +907,10 @@ def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; +def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;", + Float16Regs, Float16Regs, int_nvvm_ex2_approx_f16, [hasPTX70, hasSM75]>; +def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;", + Float16x2Regs, Float16x2Regs, int_nvvm_ex2_approx_f16x2, [hasPTX70, hasSM75]>; def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; @@ -788,35 +937,72 @@ def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", // Fma // -def INT_NVVM_FMA_RN_FTZ_F - : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>; -def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;", - Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>; -def INT_NVVM_FMA_RZ_FTZ_F - : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>; -def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;", - Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>; -def INT_NVVM_FMA_RM_FTZ_F - : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>; -def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;", - Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>; -def INT_NVVM_FMA_RP_FTZ_F - : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>; -def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;", - Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>; - -def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;", - Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>; -def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;", - Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>; -def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;", - Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>; -def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;", - Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>; +class FMA_TUPLE Preds = []> { + string Variant = V; + Intrinsic Intr = I; + NVPTXRegClass RegClass = RC; + list Predicates = Preds; +} + +multiclass FMA_INST { + foreach P = [ + FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>, + FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>, + FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>, + FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>, + + FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>, + FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>, + FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>, + FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>, + FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>, + FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>, + FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>, + FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>, + + FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Float16Regs, [hasPTX42, hasSM53]>, + FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Float16Regs, + [hasPTX42, hasSM53]>, + FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Float16Regs, + [hasPTX42, hasSM53]>, + FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Float16Regs, + [hasPTX42, hasSM53]>, + FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Float16Regs, + [hasPTX70, hasSM80]>, + FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs, + [hasPTX70, hasSM80]>, + + FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs, + [hasPTX42, hasSM53]>, + FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Float16x2Regs, + [hasPTX42, hasSM53]>, + FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Float16x2Regs, + [hasPTX42, hasSM53]>, + FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2, + Float16x2Regs, [hasPTX42, hasSM53]>, + FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Float16x2Regs, + [hasPTX70, hasSM80]>, + FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2, + Float16x2Regs, [hasPTX70, hasSM80]>, + + FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX70, hasSM80]>, + FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs, + [hasPTX70, hasSM80]>, + + FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs, + [hasPTX70, hasSM80]>, + FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs, + [hasPTX70, hasSM80]> + ] in { + def P.Variant : + F_MATH_3; + } +} + +defm INT_NVVM_FMA : FMA_INST; // // Rcp @@ -848,6 +1034,8 @@ def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_rcp_rp_d>; +def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>; def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; @@ -1472,13 +1660,13 @@ defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2; defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2; + ".max", atomic_load_max_64_g, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2; + ".max", atomic_load_max_64_s, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2; + atomic_load_max_64_gen, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2; + ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2; defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2; defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2; + ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2; + ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2; + atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2; + ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>; // atom_min @@ -1532,13 +1720,13 @@ defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2; defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2; + ".min", atomic_load_min_64_g, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2; + ".min", atomic_load_min_64_s, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2; + atomic_load_min_64_gen, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2; + ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2; defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2; defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2; + ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2; + ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2; + atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2; + ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>; // atom_inc atom_dec @@ -1612,13 +1800,13 @@ defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2; defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2; + atomic_load_and_64_g, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2; + atomic_load_and_64_s, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2; + atomic_load_and_64_gen, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2; + ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM32]>; // atom_or @@ -1644,13 +1832,13 @@ defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2; defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2; + atomic_load_or_64_g, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2; + atomic_load_or_64_gen, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2; + ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2; + atomic_load_or_64_s, i64imm, imm, [hasSM32]>; // atom_xor @@ -1676,13 +1864,13 @@ defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2; defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2; + atomic_load_xor_64_g, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2; + atomic_load_xor_64_s, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2; + atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>; defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2; + ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>; // atom_cas @@ -1788,7 +1976,7 @@ multiclass ATOM3P_impl; } -// Constructs instrinsic name and instruction asm strings. +// Constructs intrinsic name and instruction asm strings. multiclass ATOM2N_impl, + (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>, Requires<[noHWROT32]>; def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index f655f25602bc..f57c2920449b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -115,7 +115,8 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { /* SrcAlign */ LI->getAlign(), /* DestAlign */ SI->getAlign(), /* SrcIsVolatile */ LI->isVolatile(), - /* DstIsVolatile */ SI->isVolatile(), TTI); + /* DstIsVolatile */ SI->isVolatile(), + /* CanOverlap */ true, TTI); SI->eraseFromParent(); LI->eraseFromParent(); diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp index 67aa49132016..53812d7552a9 100644 --- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp @@ -88,16 +88,17 @@ // cancel the addrspacecast pair this pass emits. //===----------------------------------------------------------------------===// +#include "MCTargetDesc/NVPTXBaseInfo.h" #include "NVPTX.h" #include "NVPTXTargetMachine.h" #include "NVPTXUtilities.h" -#include "MCTargetDesc/NVPTXBaseInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" +#include #define DEBUG_TYPE "nvptx-lower-args" @@ -206,10 +207,8 @@ static void convertToParamAS(Value *OldUser, Value *Param) { // We've created a new instruction. Queue users of the old instruction to // be converted and the instruction itself to be deleted. We can't delete // the old instruction yet, because it's still in use by a load somewhere. - llvm::for_each( - I.OldInstruction->users(), [NewInst, &ItemsToConvert](Value *V) { - ItemsToConvert.push_back({cast(V), NewInst}); - }); + for (Value *V : I.OldInstruction->users()) + ItemsToConvert.push_back({cast(V), NewInst}); InstructionsToDelete.push_back(I.OldInstruction); } @@ -222,18 +221,99 @@ static void convertToParamAS(Value *OldUser, Value *Param) { // E.g if we have Value = Load(BitCast(GEP(arg))), InstructionsToDelete will // have {GEP,BitCast}. GEP can't be deleted first, because it's still used by // the BitCast. - llvm::for_each(reverse(InstructionsToDelete), - [](Instruction *I) { I->eraseFromParent(); }); + for (Instruction *I : llvm::reverse(InstructionsToDelete)) + I->eraseFromParent(); } -void NVPTXLowerArgs::handleByValParam(Argument *Arg) { +// Adjust alignment of arguments passed byval in .param address space. We can +// increase alignment of such arguments in a way that ensures that we can +// effectively vectorize their loads. We should also traverse all loads from +// byval pointer and adjust their alignment, if those were using known offset. +// Such alignment changes must be conformed with parameter store and load in +// NVPTXTargetLowering::LowerCall. +static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS, + const NVPTXTargetLowering *TLI) { Function *Func = Arg->getParent(); - Instruction *FirstInst = &(Func->getEntryBlock().front()); - PointerType *PType = dyn_cast(Arg->getType()); + Type *StructType = Arg->getParamByValType(); + const DataLayout DL(Func->getParent()); + + uint64_t NewArgAlign = + TLI->getFunctionParamOptimizedAlign(Func, StructType, DL).value(); + uint64_t CurArgAlign = + Arg->getAttribute(Attribute::Alignment).getValueAsInt(); + + if (CurArgAlign >= NewArgAlign) + return; + + LLVM_DEBUG(dbgs() << "Try to use alignment " << NewArgAlign << " instead of " + << CurArgAlign << " for " << *Arg << '\n'); + + auto NewAlignAttr = + Attribute::get(Func->getContext(), Attribute::Alignment, NewArgAlign); + Arg->removeAttr(Attribute::Alignment); + Arg->addAttr(NewAlignAttr); + + struct Load { + LoadInst *Inst; + uint64_t Offset; + }; + + struct LoadContext { + Value *InitialVal; + uint64_t Offset; + }; + + SmallVector Loads; + std::queue Worklist; + Worklist.push({ArgInParamAS, 0}); + + while (!Worklist.empty()) { + LoadContext Ctx = Worklist.front(); + Worklist.pop(); + + for (User *CurUser : Ctx.InitialVal->users()) { + if (auto *I = dyn_cast(CurUser)) { + Loads.push_back({I, Ctx.Offset}); + continue; + } + + if (auto *I = dyn_cast(CurUser)) { + Worklist.push({I, Ctx.Offset}); + continue; + } + + if (auto *I = dyn_cast(CurUser)) { + APInt OffsetAccumulated = + APInt::getZero(DL.getIndexSizeInBits(ADDRESS_SPACE_PARAM)); + + if (!I->accumulateConstantOffset(DL, OffsetAccumulated)) + continue; + + uint64_t OffsetLimit = -1; + uint64_t Offset = OffsetAccumulated.getLimitedValue(OffsetLimit); + assert(Offset != OffsetLimit && "Expect Offset less than UINT64_MAX"); + + Worklist.push({I, Ctx.Offset + Offset}); + continue; + } + + llvm_unreachable("All users must be one of: load, " + "bitcast, getelementptr."); + } + } - assert(PType && "Expecting pointer type in handleByValParam"); + for (Load &CurLoad : Loads) { + Align NewLoadAlign(greatestCommonDivisor(NewArgAlign, CurLoad.Offset)); + Align CurLoadAlign(CurLoad.Inst->getAlign()); + CurLoad.Inst->setAlignment(std::max(NewLoadAlign, CurLoadAlign)); + } +} - Type *StructType = PType->getPointerElementType(); +void NVPTXLowerArgs::handleByValParam(Argument *Arg) { + Function *Func = Arg->getParent(); + Instruction *FirstInst = &(Func->getEntryBlock().front()); + Type *StructType = Arg->getParamByValType(); + assert(StructType && "Missing byval type"); auto IsALoadChain = [&](Value *Start) { SmallVector ValuesToCheck = {Start}; @@ -269,10 +349,19 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) { Value *ArgInParamAS = new AddrSpaceCastInst( Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(), FirstInst); - llvm::for_each(UsersToUpdate, [ArgInParamAS](Value *V) { + for (Value *V : UsersToUpdate) convertToParamAS(V, ArgInParamAS); - }); LLVM_DEBUG(dbgs() << "No need to copy " << *Arg << "\n"); + + // Further optimizations require target lowering info. + if (!TM) + return; + + const auto *TLI = + cast(TM->getSubtargetImpl()->getTargetLowering()); + + adjustByValArgAlignment(Arg, ArgInParamAS, TLI); + return; } @@ -284,7 +373,7 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) { // later load/stores assume that alignment, and we are going to replace // the use of the byval parameter with this alloca instruction. AllocA->setAlignment(Func->getParamAlign(Arg->getArgNo()) - .getValueOr(DL.getPrefTypeAlign(StructType))); + .value_or(DL.getPrefTypeAlign(StructType))); Arg->replaceAllUsesWith(AllocA); Value *ArgInParam = new AddrSpaceCastInst( diff --git a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h index cf63fc33e621..0a7b9cf468a6 100644 --- a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h @@ -26,6 +26,13 @@ private: public: NVPTXMachineFunctionInfo(MachineFunction &MF) {} + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override { + return DestMF.cloneInfo(*this); + } + /// Returns the index for the symbol \p Symbol. If the symbol was previously, /// added, the same index is returned. Otherwise, the symbol is added and the /// new index is returned. diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index f4934f0bc20b..4bd820e98f05 100644 --- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -64,8 +64,12 @@ bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) { // This is needed in debug mode when code cleanup passes are not executed, // but we need the handle access to be eliminated because they are not // valid instructions when image handles are disabled. - for (MachineInstr *MI : InstrsToRemove) - MI->eraseFromParent(); + for (MachineInstr *MI : InstrsToRemove) { + unsigned DefReg = MI->getOperand(0).getReg(); + // Only these that are not used can be removed. + if (MF.getRegInfo().use_nodbg_empty(DefReg)) + MI->eraseFromParent(); + } return Changed; } diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp index 5a6440c91fca..a03492a92bac 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -23,7 +23,7 @@ using namespace llvm; #include "NVPTXGenSubtargetInfo.inc" static cl::opt - NoF16Math("nvptx-no-f16-math", cl::ZeroOrMore, cl::Hidden, + NoF16Math("nvptx-no-f16-math", cl::Hidden, cl::desc("NVPTX Specific: Disable generation of f16 math ops."), cl::init(false)); diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 0a1c61a35795..597b8af176a2 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -237,7 +237,7 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { } TargetTransformInfo -NVPTXTargetMachine::getTargetTransformInfo(const Function &F) { +NVPTXTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(NVPTXTTIImpl(this, F)); } @@ -330,6 +330,8 @@ void NVPTXPassConfig::addIRPasses() { addStraightLineScalarOptimizationPasses(); } + addPass(createAtomicExpandPass()); + // === LSR and other generic IR passes === TargetPassConfig::addIRPasses(); // EarlyCSE is not always strong enough to clean up what LSR produces. For diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h index 7a69197abcff..491e721479d3 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -65,7 +65,7 @@ public: void adjustPassManager(PassManagerBuilder &) override; void registerPassBuilderCallbacks(PassBuilder &PB) override; - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; bool isMachineVerifierClean() const override { return false; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h index 4645671a0cd8..37b0a44243cb 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -17,7 +17,7 @@ namespace llvm { class NVPTXTargetObjectFile : public TargetLoweringObjectFile { public: - NVPTXTargetObjectFile() {} + NVPTXTargetObjectFile() = default; ~NVPTXTargetObjectFile() override; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 466aa7130216..fc4bc6b3cbf7 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -96,7 +96,7 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { // Instructions that read threadIdx are obviously divergent. if (readsThreadIndex(II) || readsLaneId(II)) return true; - // Handle the NVPTX atomic instrinsics that cannot be represented as an + // Handle the NVPTX atomic intrinsics that cannot be represented as an // atomic IR instruction. if (isNVVMAtomic(II)) return true; @@ -145,11 +145,15 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { Optional Special; FtzRequirementTy FtzRequirement = FTZ_Any; + // Denormal handling is guarded by different attributes depending on the + // type (denormal-fp-math vs denormal-fp-math-f32), take note of halfs. + bool IsHalfTy = false; SimplifyAction() = default; - SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq) - : IID(IID), FtzRequirement(FtzReq) {} + SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq, + bool IsHalfTy = false) + : IID(IID), FtzRequirement(FtzReq), IsHalfTy(IsHalfTy) {} // Cast operations don't have anything to do with FTZ, so we skip that // argument. @@ -191,18 +195,66 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { return {Intrinsic::fma, FTZ_MustBeOff}; case Intrinsic::nvvm_fma_rn_ftz_f: return {Intrinsic::fma, FTZ_MustBeOn}; + case Intrinsic::nvvm_fma_rn_f16: + return {Intrinsic::fma, FTZ_MustBeOff, true}; + case Intrinsic::nvvm_fma_rn_ftz_f16: + return {Intrinsic::fma, FTZ_MustBeOn, true}; + case Intrinsic::nvvm_fma_rn_f16x2: + return {Intrinsic::fma, FTZ_MustBeOff, true}; + case Intrinsic::nvvm_fma_rn_ftz_f16x2: + return {Intrinsic::fma, FTZ_MustBeOn, true}; case Intrinsic::nvvm_fmax_d: return {Intrinsic::maxnum, FTZ_Any}; case Intrinsic::nvvm_fmax_f: return {Intrinsic::maxnum, FTZ_MustBeOff}; case Intrinsic::nvvm_fmax_ftz_f: return {Intrinsic::maxnum, FTZ_MustBeOn}; + case Intrinsic::nvvm_fmax_nan_f: + return {Intrinsic::maximum, FTZ_MustBeOff}; + case Intrinsic::nvvm_fmax_ftz_nan_f: + return {Intrinsic::maximum, FTZ_MustBeOn}; + case Intrinsic::nvvm_fmax_f16: + return {Intrinsic::maxnum, FTZ_MustBeOff, true}; + case Intrinsic::nvvm_fmax_ftz_f16: + return {Intrinsic::maxnum, FTZ_MustBeOn, true}; + case Intrinsic::nvvm_fmax_f16x2: + return {Intrinsic::maxnum, FTZ_MustBeOff, true}; + case Intrinsic::nvvm_fmax_ftz_f16x2: + return {Intrinsic::maxnum, FTZ_MustBeOn, true}; + case Intrinsic::nvvm_fmax_nan_f16: + return {Intrinsic::maximum, FTZ_MustBeOff, true}; + case Intrinsic::nvvm_fmax_ftz_nan_f16: + return {Intrinsic::maximum, FTZ_MustBeOn, true}; + case Intrinsic::nvvm_fmax_nan_f16x2: + return {Intrinsic::maximum, FTZ_MustBeOff, true}; + case Intrinsic::nvvm_fmax_ftz_nan_f16x2: + return {Intrinsic::maximum, FTZ_MustBeOn, true}; case Intrinsic::nvvm_fmin_d: return {Intrinsic::minnum, FTZ_Any}; case Intrinsic::nvvm_fmin_f: return {Intrinsic::minnum, FTZ_MustBeOff}; case Intrinsic::nvvm_fmin_ftz_f: return {Intrinsic::minnum, FTZ_MustBeOn}; + case Intrinsic::nvvm_fmin_nan_f: + return {Intrinsic::minimum, FTZ_MustBeOff}; + case Intrinsic::nvvm_fmin_ftz_nan_f: + return {Intrinsic::minimum, FTZ_MustBeOn}; + case Intrinsic::nvvm_fmin_f16: + return {Intrinsic::minnum, FTZ_MustBeOff, true}; + case Intrinsic::nvvm_fmin_ftz_f16: + return {Intrinsic::minnum, FTZ_MustBeOn, true}; + case Intrinsic::nvvm_fmin_f16x2: + return {Intrinsic::minnum, FTZ_MustBeOff, true}; + case Intrinsic::nvvm_fmin_ftz_f16x2: + return {Intrinsic::minnum, FTZ_MustBeOn, true}; + case Intrinsic::nvvm_fmin_nan_f16: + return {Intrinsic::minimum, FTZ_MustBeOff, true}; + case Intrinsic::nvvm_fmin_ftz_nan_f16: + return {Intrinsic::minimum, FTZ_MustBeOn, true}; + case Intrinsic::nvvm_fmin_nan_f16x2: + return {Intrinsic::minimum, FTZ_MustBeOff, true}; + case Intrinsic::nvvm_fmin_ftz_nan_f16x2: + return {Intrinsic::minimum, FTZ_MustBeOn, true}; case Intrinsic::nvvm_round_d: return {Intrinsic::round, FTZ_Any}; case Intrinsic::nvvm_round_f: @@ -316,9 +368,10 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { // intrinsic, we don't have to look up any module metadata, as // FtzRequirementTy will be FTZ_Any.) if (Action.FtzRequirement != FTZ_Any) { - StringRef Attr = II->getFunction() - ->getFnAttribute("denormal-fp-math-f32") - .getValueAsString(); + const char *AttrName = + Action.IsHalfTy ? "denormal-fp-math" : "denormal-fp-math-f32"; + StringRef Attr = + II->getFunction()->getFnAttribute(AttrName).getValueAsString(); DenormalMode Mode = parseDenormalFPAttribute(Attr); bool FtzEnabled = Mode.Output != DenormalMode::IEEE; diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp index 339f51d21087..3f3c4967609a 100644 --- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp +++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp @@ -133,15 +133,13 @@ static bool runNVVMReflect(Function &F, unsigned SmVersion) { // FIXME: Add assertions about ConvCall. Str = ConvCall->getArgOperand(0); } - assert(isa(Str) && - "Format of __nvvm__reflect function not recognized"); - const ConstantExpr *GEP = cast(Str); - - const Value *Sym = GEP->getOperand(0); - assert(isa(Sym) && + // Pre opaque pointers we have a constant expression wrapping the constant + // string. + Str = Str->stripPointerCasts(); + assert(isa(Str) && "Format of __nvvm_reflect function not recognized"); - const Value *Operand = cast(Sym)->getOperand(0); + const Value *Operand = cast(Str)->getOperand(0); if (const GlobalVariable *GV = dyn_cast(Operand)) { // For CUDA-7.0 style __nvvm_reflect calls, we need to find the operand's // initializer. diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 715cff72dcab..7113fe33b5d7 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -341,31 +341,11 @@ public: bool isU10Imm() const { return Kind == Immediate && isUInt<10>(getImm()); } bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); } - bool isU16Imm() const { - switch (Kind) { - case Expression: - return true; - case Immediate: - case ContextImmediate: - return isUInt<16>(getImmU16Context()); - default: - return false; - } - } - bool isS16Imm() const { - switch (Kind) { - case Expression: - return true; - case Immediate: - case ContextImmediate: - return isInt<16>(getImmS16Context()); - default: - return false; - } - } - bool isS16ImmX4() const { return Kind == Expression || - (Kind == Immediate && isInt<16>(getImm()) && - (getImm() & 3) == 0); } + bool isU16Imm() const { return isExtImm<16>(/*Signed*/ false, 1); } + bool isS16Imm() const { return isExtImm<16>(/*Signed*/ true, 1); } + bool isS16ImmX4() const { return isExtImm<16>(/*Signed*/ true, 4); } + bool isS16ImmX16() const { return isExtImm<16>(/*Signed*/ true, 16); } + bool isS17Imm() const { return isExtImm<17>(/*Signed*/ true, 1); } bool isHashImmX8() const { // The Hash Imm form is used for instructions that check or store a hash. @@ -375,9 +355,6 @@ public: (getImm() & 7) == 0); } - bool isS16ImmX16() const { return Kind == Expression || - (Kind == Immediate && isInt<16>(getImm()) && - (getImm() & 15) == 0); } bool isS34ImmX16() const { return Kind == Expression || (Kind == Immediate && isInt<34>(getImm()) && (getImm() & 15) == 0); @@ -388,17 +365,6 @@ public: return Kind == Expression || (Kind == Immediate && isInt<34>(getImm())); } - bool isS17Imm() const { - switch (Kind) { - case Expression: - return true; - case Immediate: - case ContextImmediate: - return isInt<17>(getImmS16Context()); - default: - return false; - } - } bool isTLSReg() const { return Kind == TLSRegister; } bool isDirectBr() const { if (Kind == Expression) @@ -712,6 +678,25 @@ public: return CreateExpr(Val, S, E, IsPPC64); } + +private: + template + bool isExtImm(bool Signed, unsigned Multiple) const { + switch (Kind) { + default: + return false; + case Expression: + return true; + case Immediate: + case ContextImmediate: + if (Signed) + return isInt(getImmS16Context()) && + (getImmS16Context() & (Multiple - 1)) == 0; + else + return isUInt(getImmU16Context()) && + (getImmU16Context() & (Multiple - 1)) == 0; + } + } }; } // end anonymous namespace. diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 5a12c3f22dee..d3d720054f16 100644 --- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -8,8 +8,8 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "TargetInfo/PowerPCTargetInfo.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" @@ -64,14 +64,14 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCDisassembler() { static DecodeStatus decodeCondBrTarget(MCInst &Inst, unsigned Imm, uint64_t /*Address*/, - const void * /*Decoder*/) { + const MCDisassembler * /*Decoder*/) { Inst.addOperand(MCOperand::createImm(SignExtend32<14>(Imm))); return MCDisassembler::Success; } static DecodeStatus decodeDirectBrTarget(MCInst &Inst, unsigned Imm, uint64_t /*Address*/, - const void * /*Decoder*/) { + const MCDisassembler * /*Decoder*/) { int32_t Offset = SignExtend32<24>(Imm); Inst.addOperand(MCOperand::createImm(Offset)); return MCDisassembler::Success; @@ -90,85 +90,85 @@ static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, CRRegs); } static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, CRBITRegs); } static DecodeStatus DecodeF4RCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, FRegs); } static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, FRegs); } static DecodeStatus DecodeVFRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, VFRegs); } static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, VRegs); } static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, VSRegs); } static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, VSFRegs); } static DecodeStatus DecodeVSSRCRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, VSSRegs); } static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, RRegs); } -static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, RRegsNoR0); } static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, XRegs); } static DecodeStatus DecodeG8pRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, XRegs); } -static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, XRegsNoX0); } @@ -176,44 +176,47 @@ static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo, #define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, SPERegs); } static DecodeStatus DecodeACCRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, ACCRegs); } static DecodeStatus DecodeVSRpRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, VSRpRegs); } #define DecodeQSRCRegisterClass DecodeQFRCRegisterClass #define DecodeQBRCRegisterClass DecodeQFRCRegisterClass -template +template static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, - int64_t Address, const void *Decoder) { + int64_t Address, + const MCDisassembler *Decoder) { assert(isUInt(Imm) && "Invalid immediate"); Inst.addOperand(MCOperand::createImm(Imm)); return MCDisassembler::Success; } -template +template static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, - int64_t Address, const void *Decoder) { + int64_t Address, + const MCDisassembler *Decoder) { assert(isUInt(Imm) && "Invalid immediate"); Inst.addOperand(MCOperand::createImm(SignExtend64(Imm))); return MCDisassembler::Success; } static DecodeStatus decodeImmZeroOperand(MCInst &Inst, uint64_t Imm, - int64_t Address, const void *Decoder) { + int64_t Address, + const MCDisassembler *Decoder) { if (Imm != 0) return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(Imm)); @@ -222,7 +225,7 @@ static DecodeStatus decodeImmZeroOperand(MCInst &Inst, uint64_t Imm, static DecodeStatus decodeVSRpEvenOperands(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo & 1) return MCDisassembler::Fail; Inst.addOperand(MCOperand::createReg(VSRpRegs[RegNo >> 1])); @@ -230,7 +233,8 @@ static DecodeStatus decodeVSRpEvenOperands(MCInst &Inst, uint64_t RegNo, } static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm, - int64_t Address, const void *Decoder) { + int64_t Address, + const MCDisassembler *Decoder) { // Decode the memri field (imm, reg), which has the low 16-bits as the // displacement and the next 5 bits as the register #. @@ -265,7 +269,8 @@ static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm, } static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm, - int64_t Address, const void *Decoder) { + int64_t Address, + const MCDisassembler *Decoder) { // Decode the memrix field (imm, reg), which has the low 14-bits as the // displacement and the next 5 bits as the register #. @@ -287,7 +292,7 @@ static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm, static DecodeStatus decodeMemRIHashOperands(MCInst &Inst, uint64_t Imm, int64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // Decode the memrix field for a hash store or hash check operation. // The field is composed of a register and an immediate value that is 6 bits // and covers the range -8 to -512. The immediate is always negative and 2s @@ -303,7 +308,8 @@ static DecodeStatus decodeMemRIHashOperands(MCInst &Inst, uint64_t Imm, } static DecodeStatus decodeMemRIX16Operands(MCInst &Inst, uint64_t Imm, - int64_t Address, const void *Decoder) { + int64_t Address, + const MCDisassembler *Decoder) { // Decode the memrix16 field (imm, reg), which has the low 12-bits as the // displacement with 16-byte aligned, and the next 5 bits as the register #. @@ -319,7 +325,7 @@ static DecodeStatus decodeMemRIX16Operands(MCInst &Inst, uint64_t Imm, static DecodeStatus decodeMemRI34PCRelOperands(MCInst &Inst, uint64_t Imm, int64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // Decode the memri34_pcrel field (imm, reg), which has the low 34-bits as the // displacement, and the next 5 bits as an immediate 0. uint64_t Base = Imm >> 34; @@ -333,7 +339,7 @@ static DecodeStatus decodeMemRI34PCRelOperands(MCInst &Inst, uint64_t Imm, static DecodeStatus decodeMemRI34Operands(MCInst &Inst, uint64_t Imm, int64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // Decode the memri34 field (imm, reg), which has the low 34-bits as the // displacement, and the next 5 bits as the register #. uint64_t Base = Imm >> 34; @@ -347,7 +353,8 @@ static DecodeStatus decodeMemRI34Operands(MCInst &Inst, uint64_t Imm, } static DecodeStatus decodeSPE8Operands(MCInst &Inst, uint64_t Imm, - int64_t Address, const void *Decoder) { + int64_t Address, + const MCDisassembler *Decoder) { // Decode the spe8disp field (imm, reg), which has the low 5-bits as the // displacement with 8-byte aligned, and the next 5 bits as the register #. @@ -362,7 +369,8 @@ static DecodeStatus decodeSPE8Operands(MCInst &Inst, uint64_t Imm, } static DecodeStatus decodeSPE4Operands(MCInst &Inst, uint64_t Imm, - int64_t Address, const void *Decoder) { + int64_t Address, + const MCDisassembler *Decoder) { // Decode the spe4disp field (imm, reg), which has the low 5-bits as the // displacement with 4-byte aligned, and the next 5 bits as the register #. @@ -377,7 +385,8 @@ static DecodeStatus decodeSPE4Operands(MCInst &Inst, uint64_t Imm, } static DecodeStatus decodeSPE2Operands(MCInst &Inst, uint64_t Imm, - int64_t Address, const void *Decoder) { + int64_t Address, + const MCDisassembler *Decoder) { // Decode the spe2disp field (imm, reg), which has the low 5-bits as the // displacement with 2-byte aligned, and the next 5 bits as the register #. @@ -392,7 +401,8 @@ static DecodeStatus decodeSPE2Operands(MCInst &Inst, uint64_t Imm, } static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm, - int64_t Address, const void *Decoder) { + int64_t Address, + const MCDisassembler *Decoder) { // The cr bit encoding is 0x80 >> cr_reg_num. unsigned Zeros = countTrailingZeros(Imm); diff --git a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp index 6b16af293244..b71d59ed79ed 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/TargetCallingConv.h" #include "llvm/Support/Debug.h" diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp index 6af79324919c..58165fcaac03 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp @@ -23,5 +23,4 @@ using namespace llvm; -PPCRegisterBankInfo::PPCRegisterBankInfo(const TargetRegisterInfo &TRI) - : PPCGenRegisterBankInfo() {} +PPCRegisterBankInfo::PPCRegisterBankInfo(const TargetRegisterInfo &TRI) {} diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h index 358d5ed3cf14..31a4c528751f 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h +++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h @@ -14,8 +14,8 @@ #ifndef LLVM_LIB_TARGET_PPC_GISEL_PPCREGISTERBANKINFO_H #define LLVM_LIB_TARGET_PPC_GISEL_PPCREGISTERBANKINFO_H -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #define GET_REGBANK_DECLARATIONS diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 9df94edc8cdf..2e678ffd58c2 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -44,6 +44,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { case PPC::fixup_ppc_half16: return Value & 0xffff; case PPC::fixup_ppc_half16ds: + case PPC::fixup_ppc_half16dq: return Value & 0xfffc; case PPC::fixup_ppc_pcrel34: case PPC::fixup_ppc_imm34: @@ -60,6 +61,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case FK_Data_2: case PPC::fixup_ppc_half16: case PPC::fixup_ppc_half16ds: + case PPC::fixup_ppc_half16dq: return 2; case FK_Data_4: case PPC::fixup_ppc_brcond14: diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 94ef7b45434f..1e58039582c2 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -125,6 +125,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, } break; case PPC::fixup_ppc_half16ds: + case PPC::fixup_ppc_half16dq: Target.print(errs()); errs() << '\n'; report_fatal_error("Invalid PC-relative half16ds relocation"); @@ -349,6 +350,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, } break; case PPC::fixup_ppc_half16ds: + case PPC::fixup_ppc_half16dq: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); case MCSymbolRefExpr::VK_None: diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp index b92b0fc342ec..b020635f4209 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp @@ -77,7 +77,7 @@ void PPCELFStreamer::emitPrefixedInstruction(const MCInst &Inst, // label to the top of the fragment containing the aligned instruction that // was just added. if (InstLine == LabelLine) { - AssignFragment(LastLabel, InstructionFragment); + assignFragment(LastLabel, InstructionFragment); LastLabel->setOffset(0); } } @@ -98,7 +98,7 @@ void PPCELFStreamer::emitInstruction(const MCInst &Inst, // For example, the load that will get the relocation as follows: // .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) // lwa 3, 4(3) - if (IsPartOfGOTToPCRelPair.hasValue() && !IsPartOfGOTToPCRelPair.getValue()) + if (IsPartOfGOTToPCRelPair && !*IsPartOfGOTToPCRelPair) emitGOTToPCRelReloc(Inst); // Special handling is only for prefixed instructions. @@ -113,7 +113,7 @@ void PPCELFStreamer::emitInstruction(const MCInst &Inst, // follows: // pld 3, vec@got@pcrel(0), 1 // .Lpcrel1: - if (IsPartOfGOTToPCRelPair.hasValue() && IsPartOfGOTToPCRelPair.getValue()) + if (IsPartOfGOTToPCRelPair && *IsPartOfGOTToPCRelPair) emitGOTToPCRelLabel(Inst); } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 73292f7b7938..df0c666f5b11 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -51,6 +51,10 @@ enum Fixups { /// register number. fixup_ppc_nofixup, + /// A 16-bit fixup corresponding to lo16(_foo) with implied 3 zero bits for + /// instrs like 'lxv'. Produces the same relocation as fixup_ppc_half16ds. + fixup_ppc_half16dq, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 4dfa7d5e600c..46bbc44e1681 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -34,7 +34,6 @@ using namespace llvm; STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new PPCMCCodeEmitter(MCII, Ctx); } @@ -47,10 +46,12 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo, if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); + + const PPCInstrInfo *InstrInfo = static_cast(&MCII); + unsigned Opcode = MI.getOpcode(); // Add a fixup for the branch target. Fixups.push_back(MCFixup::create(0, MO.getExpr(), - ((MI.getOpcode() == PPC::BL8_NOTOC || - MI.getOpcode() == PPC::BL8_NOTOC_TLS) + (InstrInfo->isNoTOCCallInstr(Opcode) ? (MCFixupKind)PPC::fixup_ppc_br24_notoc : (MCFixupKind)PPC::fixup_ppc_br24))); return 0; @@ -198,8 +199,8 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo, } // Otherwise add a fixup for the displacement field. - Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_half16ds)); + Fixups.push_back(MCFixup::create(IsLittleEndian ? 0 : 2, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_half16dq)); return RegBits; } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index abff44449131..6cd04ee018fd 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -110,9 +110,18 @@ PPCMCExpr::evaluateAsRelocatableImpl(MCValue &Res, if (Value.isAbsolute()) { int64_t Result = evaluateAsInt64(Value.getConstant()); - if ((Fixup == nullptr || (unsigned)Fixup->getKind() != PPC::fixup_ppc_half16) && - (Result >= 0x8000)) + bool IsHalf16 = Fixup && Fixup->getTargetKind() == PPC::fixup_ppc_half16; + bool IsHalf16DS = + Fixup && Fixup->getTargetKind() == PPC::fixup_ppc_half16ds; + bool IsHalf16DQ = + Fixup && Fixup->getTargetKind() == PPC::fixup_ppc_half16dq; + bool IsHalf = IsHalf16 || IsHalf16DS || IsHalf16DQ; + + if (!IsHalf && Result >= 0x8000) return false; + if ((IsHalf16DS && (Result & 0x3)) || (IsHalf16DQ && (Result & 0xf))) + return false; + Res = MCValue::get(Result); } else { if (!Layout) diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 03b316341717..acb860e16518 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -34,7 +34,6 @@ class MCTargetOptions; class Target; MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCAsmBackend *createPPCAsmBackend(const Target &T, const MCSubtargetInfo &STI, diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp index 64e11dbc1efc..729cb35cbebc 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp @@ -71,6 +71,19 @@ std::pair PPCXCOFFObjectWriter::getRelocTypeAndSignSize( return {XCOFF::RelocationType::R_TOCL, SignAndSizeForHalf16}; } } break; + case PPC::fixup_ppc_half16ds: + case PPC::fixup_ppc_half16dq: { + if (IsPCRel) + report_fatal_error("Invalid PC-relative relocation."); + switch (Modifier) { + default: + llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + return {XCOFF::RelocationType::R_TOC, 15}; + case MCSymbolRefExpr::VK_PPC_L: + return {XCOFF::RelocationType::R_TOCL, 15}; + } + } break; case PPC::fixup_ppc_br24: // Branches are 4 byte aligned, so the 24 bits we encode in // the instruction actually represents a 26 bit offset. @@ -78,15 +91,19 @@ std::pair PPCXCOFFObjectWriter::getRelocTypeAndSignSize( case PPC::fixup_ppc_br24abs: return {XCOFF::RelocationType::R_RBA, EncodedSignednessIndicator | 25}; case FK_Data_4: + case FK_Data_8: + const uint8_t SignAndSizeForFKData = + EncodedSignednessIndicator | + ((unsigned)Fixup.getKind() == FK_Data_4 ? 31 : 63); switch (Modifier) { default: report_fatal_error("Unsupported modifier"); case MCSymbolRefExpr::VK_PPC_AIX_TLSGD: - return {XCOFF::RelocationType::R_TLS, EncodedSignednessIndicator | 31}; + return {XCOFF::RelocationType::R_TLS, SignAndSizeForFKData}; case MCSymbolRefExpr::VK_PPC_AIX_TLSGDM: - return {XCOFF::RelocationType::R_TLSM, EncodedSignednessIndicator | 31}; + return {XCOFF::RelocationType::R_TLSM, SignAndSizeForFKData}; case MCSymbolRefExpr::VK_None: - return {XCOFF::RelocationType::R_POS, EncodedSignednessIndicator | 31}; + return {XCOFF::RelocationType::R_POS, SignAndSizeForFKData}; } } } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp index 79db03b0331b..f8b1914bd520 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp @@ -21,6 +21,7 @@ #include "PPCMCCodeEmitter.h" #include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCObjectWriter.h" diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td index edd3b42d47e1..a6ba5adda839 100644 --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -956,7 +956,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read], WAIT, XSABSDP, XSABSQP, - XSNABSDP, + XSNABSDP, XSNABSDPs, XSNABSQP, XSNEGDP, XSNEGQP, @@ -1372,7 +1372,7 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read, P10LD_Read] LDCIX, LHZCIX, LWZCIX, - MTSPR, MTSPR8, MTSR, MTVRSAVE, MTVRSAVEv + MTSPR, MTSPR8, MTSR, MTUDSCR, MTVRSAVE, MTVRSAVEv )>; // Expand instructions @@ -1469,7 +1469,7 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C], // 13 Cycles Unknown operations, 1 input operands def : InstRW<[P10W_MFL_13C, P10W_DISP_EVEN, P10W_DISP_ANY], (instrs - MFSPR, MFSPR8, MFSR, MFTB8, MFVRSAVE, MFVRSAVEv + MFSPR, MFSPR8, MFSR, MFTB8, MFUDSCR, MFVRSAVE, MFVRSAVEv )>; // 10 Cycles SIMD Matrix Multiply Engine operations, 0 input operands @@ -1625,6 +1625,7 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read], (instrs LVSL, LVSR, + LXVKQ, MFVSRLD, MTVSRWS, VCLZLSBB, @@ -1979,7 +1980,6 @@ def : InstRW<[P10W_SX, P10W_DISP_ANY], ICBTLS, ICCCI, LA, LA8, - LDMX, MFDCR, MFPMR, MFSRIN, diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td index c088d7847ce4..2bbab64ce0da 100644 --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -156,6 +156,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], MCRF, MCRXRX, XSNABSDP, + XSNABSDPs, XSXEXPDP, XSABSDP, XSNEGDP, @@ -807,14 +808,6 @@ def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, (instregex "ST(B|H|W|D)CX$") )>; -// Cracked Load Instruction. -// Two consecutive load operations for a total of 8 cycles. -def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C, - DISP_1C, DISP_1C], - (instrs - LDMX -)>; - // Cracked Load instruction. // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU // operations cannot be done at the same time and so their latencies are added. @@ -940,6 +933,7 @@ def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], (instregex "M(T|F)TB(8)?$"), (instregex "MF(SPR|CTR|LR)(8)?$"), (instregex "M(T|F)MSR(D)?$"), + (instregex "M(T|F)(U)?DSCR$"), (instregex "MTSPR(8)?$") )>; diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h index 7235a878e38b..4eceb3afc70f 100644 --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -33,7 +33,6 @@ class MCInst; class MCOperand; class ModulePass; -FunctionPass *createPPCCTRLoops(); #ifndef NDEBUG FunctionPass *createPPCCTRLoopsVerify(); #endif @@ -53,12 +52,12 @@ FunctionPass *createPPCCTRLoops(); FunctionPass *createPPCExpandISELPass(); FunctionPass *createPPCPreEmitPeepholePass(); FunctionPass *createPPCExpandAtomicPseudoPass(); + FunctionPass *createPPCCTRLoopsPass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP); bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &OutMO, AsmPrinter &AP); - void initializePPCCTRLoopsPass(PassRegistry&); #ifndef NDEBUG void initializePPCCTRLoopsVerifyPass(PassRegistry&); #endif @@ -77,6 +76,7 @@ FunctionPass *createPPCCTRLoops(); void initializePPCTLSDynamicCallPass(PassRegistry &); void initializePPCMIPeepholePass(PassRegistry&); void initializePPCExpandAtomicPseudoPass(PassRegistry &); + void initializePPCCTRLoopsPass(PassRegistry &); extern char &PPCVSXFMAMutateID; @@ -84,6 +84,10 @@ FunctionPass *createPPCCTRLoops(); void initializePPCLowerMASSVEntriesPass(PassRegistry &); extern char &PPCLowerMASSVEntriesID; + ModulePass *createPPCGenScalarMASSEntriesPass(); + void initializePPCGenScalarMASSEntriesPass(PassRegistry &); + extern char &PPCGenScalarMASSEntriesID; + InstructionSelector * createPPCInstructionSelector(const PPCTargetMachine &, const PPCSubtarget &, const PPCRegisterBankInfo &); diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index bbd5f5fd1941..310bf8125f1c 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -263,6 +263,10 @@ def FeatureISA3_1 : SubtargetFeature<"isa-v31-instructions", "IsISA3_1", "true", "Enable instructions in ISA 3.1.", [FeatureISA3_0]>; +def FeatureISAFuture : SubtargetFeature<"isa-future-instructions", + "IsISAFuture", "true", + "Enable instructions for Future ISA.", + [FeatureISA3_1]>; def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true", "Enable POWER9 Altivec instructions", [FeatureISA3_0, FeatureP8Altivec]>; @@ -376,7 +380,8 @@ def ProcessorFeatures { FeaturePartwordAtomic, FeatureQuadwordAtomic, FeaturePredictableSelectIsExpensive, - FeatureISA2_07 + FeatureISA2_07, + FeatureCRBits ]; list P8SpecificFeatures = [FeatureAddiLoadFusion, @@ -429,7 +434,7 @@ def ProcessorFeatures { // Future // For future CPU we assume that all of the existing features from Power10 // still exist with the exception of those we know are Power10 specific. - list FutureAdditionalFeatures = []; + list FutureAdditionalFeatures = [FeatureISAFuture]; list FutureSpecificFeatures = []; list FutureInheritableFeatures = !listconcat(P10InheritableFeatures, FutureAdditionalFeatures); @@ -591,7 +596,8 @@ def : ProcessorModel<"a2", PPCA2Model, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX, - Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>; + Feature64Bit /*, Feature64BitRegs */, FeatureMFTB, + FeatureISA2_06]>; def : ProcessorModel<"pwr3", G5Model, [DirectivePwr3, FeatureAltivec, FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF, diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 780981806996..22f35c8fa8d3 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -34,6 +34,7 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" @@ -254,6 +255,8 @@ public: void emitFunctionBodyEnd() override; + void emitPGORefs(); + void emitEndOfAsmFile(Module &) override; void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const override; @@ -879,7 +882,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { // Print MO for better readability if (isVerbose()) - OutStreamer->GetCommentOS() << MO << '\n'; + OutStreamer->getCommentOS() << MO << '\n'; EmitToStreamer(*OutStreamer, TmpInst); return; } @@ -950,7 +953,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { // Print MO for better readability if (isVerbose() && IsAIX) - OutStreamer->GetCommentOS() << MO << '\n'; + OutStreamer->getCommentOS() << MO << '\n'; EmitToStreamer(*OutStreamer, TmpInst); return; } @@ -1582,7 +1585,7 @@ void PPCLinuxAsmPrinter::emitStartOfAsmFile(Module &M) { if (M.getPICLevel() == PICLevel::SmallPIC) return AsmPrinter::emitStartOfAsmFile(M); - OutStreamer->SwitchSection(OutContext.getELFSection( + OutStreamer->switchSection(OutContext.getELFSection( ".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC)); MCSymbol *TOCSym = OutContext.getOrCreateSymbol(Twine(".LTOC")); @@ -1599,7 +1602,7 @@ void PPCLinuxAsmPrinter::emitStartOfAsmFile(Module &M) { OutStreamer->emitAssignment(TOCSym, tocExpr); - OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); + OutStreamer->switchSection(getObjFileLowering().getTextSection()); } void PPCLinuxAsmPrinter::emitFunctionEntryLabel() { @@ -1657,7 +1660,7 @@ void PPCLinuxAsmPrinter::emitFunctionEntryLabel() { MCSectionSubPair Current = OutStreamer->getCurrentSection(); MCSectionELF *Section = OutStreamer->getContext().getELFSection( ".opd", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); - OutStreamer->SwitchSection(Section); + OutStreamer->switchSection(Section); OutStreamer->emitLabel(CurrentFnSym); OutStreamer->emitValueToAlignment(8); MCSymbol *Symbol1 = CurrentFnSymForSize; @@ -1672,7 +1675,7 @@ void PPCLinuxAsmPrinter::emitFunctionEntryLabel() { 8/*size*/); // Emit a null environment pointer. OutStreamer->emitIntValue(0, 8 /* size */); - OutStreamer->SwitchSection(Current.first, Current.second); + OutStreamer->switchSection(Current.first, Current.second); } void PPCLinuxAsmPrinter::emitEndOfAsmFile(Module &M) { @@ -1689,7 +1692,7 @@ void PPCLinuxAsmPrinter::emitEndOfAsmFile(Module &M) { const char *Name = isPPC64 ? ".toc" : ".got2"; MCSectionELF *Section = OutContext.getELFSection( Name, ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); - OutStreamer->SwitchSection(Section); + OutStreamer->switchSection(Section); if (!isPPC64) OutStreamer->emitValueToAlignment(4); @@ -1895,10 +1898,15 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV, MCSymbolAttr VisibilityAttr = MCSA_Invalid; if (!TM.getIgnoreXCOFFVisibility()) { + if (GV->hasDLLExportStorageClass() && !GV->hasDefaultVisibility()) + report_fatal_error( + "Cannot not be both dllexport and non-default visibility"); switch (GV->getVisibility()) { - // TODO: "exported" and "internal" Visibility needs to go here. + // TODO: "internal" Visibility needs to go here. case GlobalValue::DefaultVisibility: + if (GV->hasDLLExportStorageClass()) + VisibilityAttr = MAI->getExportedVisibilityAttr(); break; case GlobalValue::HiddenVisibility: VisibilityAttr = MAI->getHiddenVisibilityAttr(); @@ -1956,7 +1964,7 @@ void PPCAIXAsmPrinter::emitFunctionBodyEnd() { if (!TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(MF) && (getNumberOfVRSaved() > 0)) { // Emit dummy EH Info Table. - OutStreamer->SwitchSection(getObjFileLowering().getCompactUnwindSection()); + OutStreamer->switchSection(getObjFileLowering().getCompactUnwindSection()); MCSymbol *EHInfoLabel = TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(MF); OutStreamer->emitLabel(EHInfoLabel); @@ -1971,7 +1979,7 @@ void PPCAIXAsmPrinter::emitFunctionBodyEnd() { OutStreamer->emitIntValue(0, PointerSize); OutStreamer->emitIntValue(0, PointerSize); - OutStreamer->SwitchSection(MF->getSection()); + OutStreamer->switchSection(MF->getSection()); } } @@ -2382,9 +2390,9 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) { // Print GV in verbose mode if (isVerbose()) { if (GV->hasInitializer()) { - GV->printAsOperand(OutStreamer->GetCommentOS(), + GV->printAsOperand(OutStreamer->getCommentOS(), /*PrintType=*/false, GV->getParent()); - OutStreamer->GetCommentOS() << '\n'; + OutStreamer->getCommentOS() << '\n'; } } @@ -2392,14 +2400,14 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) { getObjFileLowering().SectionForGlobal(GV, GVKind, TM)); // Switch to the containing csect. - OutStreamer->SwitchSection(Csect); + OutStreamer->switchSection(Csect); const DataLayout &DL = GV->getParent()->getDataLayout(); // Handle common and zero-initialized local symbols. if (GV->hasCommonLinkage() || GVKind.isBSSLocal() || GVKind.isThreadBSSLocal()) { - Align Alignment = GV->getAlign().getValueOr(DL.getPreferredAlign(GV)); + Align Alignment = GV->getAlign().value_or(DL.getPreferredAlign(GV)); uint64_t Size = DL.getTypeAllocSize(GV->getValueType()); GVSym->setStorageClass( TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV)); @@ -2424,9 +2432,8 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) { } // Emit aliasing label for global variable. - llvm::for_each(GOAliasMap[GV], [this](const GlobalAlias *Alias) { + for (const GlobalAlias *Alias : GOAliasMap[GV]) OutStreamer->emitLabel(getSymbol(Alias)); - }); emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); } @@ -2437,14 +2444,12 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() { MCSectionSubPair Current = OutStreamer->getCurrentSection(); // Emit function descriptor. - OutStreamer->SwitchSection( + OutStreamer->switchSection( cast(CurrentFnDescSym)->getRepresentedCsect()); // Emit aliasing label for function descriptor csect. - llvm::for_each(GOAliasMap[&MF->getFunction()], - [this](const GlobalAlias *Alias) { - OutStreamer->emitLabel(getSymbol(Alias)); - }); + for (const GlobalAlias *Alias : GOAliasMap[&MF->getFunction()]) + OutStreamer->emitLabel(getSymbol(Alias)); // Emit function entry point address. OutStreamer->emitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext), @@ -2458,7 +2463,7 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() { // Emit a null environment pointer. OutStreamer->emitIntValue(0, PointerSize); - OutStreamer->SwitchSection(Current.first, Current.second); + OutStreamer->switchSection(Current.first, Current.second); } void PPCAIXAsmPrinter::emitFunctionEntryLabel() { @@ -2468,11 +2473,34 @@ void PPCAIXAsmPrinter::emitFunctionEntryLabel() { PPCAsmPrinter::emitFunctionEntryLabel(); // Emit aliasing label for function entry point label. - llvm::for_each( - GOAliasMap[&MF->getFunction()], [this](const GlobalAlias *Alias) { - OutStreamer->emitLabel( - getObjFileLowering().getFunctionEntryPointSymbol(Alias, TM)); - }); + for (const GlobalAlias *Alias : GOAliasMap[&MF->getFunction()]) + OutStreamer->emitLabel( + getObjFileLowering().getFunctionEntryPointSymbol(Alias, TM)); +} + +void PPCAIXAsmPrinter::emitPGORefs() { + if (OutContext.hasXCOFFSection( + "__llvm_prf_cnts", + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) { + MCSection *CntsSection = OutContext.getXCOFFSection( + "__llvm_prf_cnts", SectionKind::getData(), + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD), + /*MultiSymbolsAllowed*/ true); + + OutStreamer->switchSection(CntsSection); + if (OutContext.hasXCOFFSection( + "__llvm_prf_data", + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) + OutStreamer->emitXCOFFRefDirective("__llvm_prf_data[RW]"); + if (OutContext.hasXCOFFSection( + "__llvm_prf_names", + XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD))) + OutStreamer->emitXCOFFRefDirective("__llvm_prf_names[RO]"); + if (OutContext.hasXCOFFSection( + "__llvm_prf_vnds", + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) + OutStreamer->emitXCOFFRefDirective("__llvm_prf_vnds[RW]"); + } } void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { @@ -2481,8 +2509,10 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { if (M.empty() && TOCDataGlobalVars.empty()) return; + emitPGORefs(); + // Switch to section to emit TOC base. - OutStreamer->SwitchSection(getObjFileLowering().getTOCBaseSection()); + OutStreamer->switchSection(getObjFileLowering().getTOCBaseSection()); PPCTargetStreamer *TS = static_cast(OutStreamer->getTargetStreamer()); @@ -2504,7 +2534,7 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { TCEntry = cast( getObjFileLowering().getSectionForTOCEntry(I.first.first, TM)); } - OutStreamer->SwitchSection(TCEntry); + OutStreamer->switchSection(TCEntry); OutStreamer->emitLabel(I.second); if (TS != nullptr) diff --git a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def index 38ed5f2e78e3..f1eecfea5a5e 100644 --- a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def +++ b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def @@ -434,6 +434,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1, XSMINDP, XSMINJDP, XSNABSDP, + XSNABSDPs, XSNABSQP, XSNEGDP, XSNEGQP, @@ -978,6 +979,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1, XSMINDP, XSMINJDP, XSNABSDP, + XSNABSDPs, XSNABSQP, XSNEGDP, XSNEGQP, diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp index b1f5bdd885cd..48167c3dc9ca 100644 --- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -1,4 +1,4 @@ -//===-- PPCCTRLoops.cpp - Verify CTR loops -----------------===// +//===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,31 +6,38 @@ // //===----------------------------------------------------------------------===// // -// This pass verifies that all bdnz/bdz instructions are dominated by a loop -// mtctr before any other instructions that might clobber the ctr register. +// This pass generates machine instructions for the CTR loops related pseudos: +// 1: MTCTRPseudo/DecreaseCTRPseudo +// 2: MTCTR8Pseudo/DecreaseCTR8Pseudo +// +// If a CTR loop can be generated: +// 1: MTCTRPseudo/MTCTR8Pseudo will be converted to "mtctr" +// 2: DecreaseCTRPseudo/DecreaseCTR8Pseudo will be converted to "bdnz/bdz" and +// its user branch instruction can be deleted. +// +// If a CTR loop can not be generated due to clobber of CTR: +// 1: MTCTRPseudo/MTCTR8Pseudo can be deleted. +// 2: DecreaseCTRPseudo/DecreaseCTR8Pseudo will be converted to "addi -1" and +// a "cmplwi/cmpldi". +// +// This pass runs just before register allocation, because we don't want +// register allocator to allocate register for DecreaseCTRPseudo if a CTR can be +// generated or if a CTR loop can not be generated, we don't have any condition +// register for the new added "cmplwi/cmpldi". // //===----------------------------------------------------------------------===// -// CTR loops are produced by the HardwareLoops pass and this pass is simply a -// verification that no invalid CTR loops are produced. As such, it isn't -// something that needs to be run (or even defined) for Release builds so the -// entire file is guarded by NDEBUG. -#ifndef NDEBUG -#include - -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPC.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/ilist_iterator.h" +#include "PPCInstrInfo.h" +#include "PPCSubtarget.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBundleIterator.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Register.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -38,148 +45,314 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GenericDomTreeConstruction.h" -#include "llvm/Support/Printable.h" -#include "llvm/Support/raw_ostream.h" +#include using namespace llvm; -#define DEBUG_TYPE "ppc-ctrloops-verify" +#define DEBUG_TYPE "ppc-ctrloops" + +STATISTIC(NumCTRLoops, "Number of CTR loops generated"); +STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated"); namespace { +class PPCCTRLoops : public MachineFunctionPass { +public: + static char ID; - struct PPCCTRLoopsVerify : public MachineFunctionPass { - public: - static char ID; + PPCCTRLoops() : MachineFunctionPass(ID) { + initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); + } - PPCCTRLoopsVerify() : MachineFunctionPass(ID) { - initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); - } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - MachineFunctionPass::getAnalysisUsage(AU); - } + bool runOnMachineFunction(MachineFunction &MF) override; - bool runOnMachineFunction(MachineFunction &MF) override; +private: + const PPCInstrInfo *TII = nullptr; + MachineRegisterInfo *MRI = nullptr; - private: - MachineDominatorTree *MDT; - }; + bool processLoop(MachineLoop *ML); + bool isCTRClobber(MachineInstr *MI, bool CheckReads) const; + void expandNormalLoops(MachineLoop *ML, MachineInstr *Start, + MachineInstr *Dec); + void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec); +}; +} // namespace + +char PPCCTRLoops::ID = 0; + +INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation", + false, false) - char PPCCTRLoopsVerify::ID = 0; -} // end anonymous namespace +FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); } -INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", - "PowerPC CTR Loops Verify", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", - "PowerPC CTR Loops Verify", false, false) +bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; -FunctionPass *llvm::createPPCCTRLoopsVerify() { - return new PPCCTRLoopsVerify(); + auto &MLI = getAnalysis(); + TII = static_cast(MF.getSubtarget().getInstrInfo()); + MRI = &MF.getRegInfo(); + + for (auto ML : MLI) { + if (ML->isOutermost()) + Changed |= processLoop(ML); + } + + return Changed; } -static bool clobbersCTR(const MachineInstr &MI) { - for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg()) { - if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) - return true; - } else if (MO.isRegMask()) { - if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8)) - return true; - } +bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const { + if (!CheckReads) { + // If we are only checking for defs, that is we are going to find + // definitions before MTCTRloop, for this case: + // CTR defination inside the callee of a call instruction will not impact + // the defination of MTCTRloop, so we can use definesRegister() for the + // check, no need to check the regmask. + return (MI->definesRegister(PPC::CTR) && + !MI->registerDefIsDead(PPC::CTR)) || + (MI->definesRegister(PPC::CTR8) && + !MI->registerDefIsDead(PPC::CTR8)); } + if ((MI->modifiesRegister(PPC::CTR) && !MI->registerDefIsDead(PPC::CTR)) || + (MI->modifiesRegister(PPC::CTR8) && !MI->registerDefIsDead(PPC::CTR8))) + return true; + + if (MI->getDesc().isCall()) + return true; + + // We define the CTR in the loop preheader, so if there is any CTR reader in + // the loop, we also can not use CTR loop form. + if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8)) + return true; + return false; } -static bool verifyCTRBranch(MachineBasicBlock *MBB, - MachineBasicBlock::iterator I) { - MachineBasicBlock::iterator BI = I; - SmallSet Visited; - SmallVector Preds; - bool CheckPreds; - - if (I == MBB->begin()) { - Visited.insert(MBB); - goto queue_preds; - } else - --I; - -check_block: - Visited.insert(MBB); - if (I == MBB->end()) - goto queue_preds; - - CheckPreds = true; - for (MachineBasicBlock::iterator IE = MBB->begin();; --I) { - unsigned Opc = I->getOpcode(); - if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) { - CheckPreds = false; +bool PPCCTRLoops::processLoop(MachineLoop *ML) { + bool Changed = false; + + // Align with HardwareLoop pass, process inner loops first. + for (auto I = ML->begin(), E = ML->end(); I != E; ++I) + Changed |= processLoop(*I); + + // If any inner loop is changed, outter loop must be without hardware loop + // intrinsics. + if (Changed) + return true; + + auto IsLoopStart = [](MachineInstr &MI) { + return MI.getOpcode() == PPC::MTCTRPseudo || + MI.getOpcode() == PPC::MTCTR8Pseudo; + }; + + auto SearchForStart = + [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * { + for (auto &MI : *MBB) { + if (IsLoopStart(MI)) + return &MI; + } + return nullptr; + }; + + MachineInstr *Start = nullptr; + MachineInstr *Dec = nullptr; + bool InvalidCTRLoop = false; + + MachineBasicBlock *Preheader = ML->getLoopPreheader(); + // If there is no preheader for this loop, there must be no MTCTRPseudo + // either. + if (!Preheader) + return false; + + Start = SearchForStart(Preheader); + // This is not a CTR loop candidate. + if (!Start) + return false; + + // If CTR is live to the preheader, we can not redefine the CTR register. + if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8)) + InvalidCTRLoop = true; + + // Make sure there is also no CTR clobber in the block preheader between the + // begin and MTCTR. + for (MachineBasicBlock::reverse_instr_iterator I = + std::next(Start->getReverseIterator()); + I != Preheader->instr_rend(); ++I) + // Only check the definitions of CTR. If there is non-dead definition for + // the CTR, we conservatively don't generate a CTR loop. + if (isCTRClobber(&*I, /* CheckReads */ false)) { + InvalidCTRLoop = true; break; } - if (I != BI && clobbersCTR(*I)) { - LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName() - << ") instruction " << *I - << " clobbers CTR, invalidating " - << printMBBReference(*BI->getParent()) << " (" - << BI->getParent()->getFullName() << ") instruction " - << *BI << "\n"); - return false; + // Make sure there is also no CTR clobber/user in the block preheader between + // MTCTR and the end. + for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator()); + I != Preheader->instr_end(); ++I) + if (isCTRClobber(&*I, /* CheckReads */ true)) { + InvalidCTRLoop = true; + break; } - if (I == IE) + // Find the CTR loop components and decide whether or not to fall back to a + // normal loop. + for (auto *MBB : reverse(ML->getBlocks())) { + for (auto &MI : *MBB) { + if (MI.getOpcode() == PPC::DecreaseCTRPseudo || + MI.getOpcode() == PPC::DecreaseCTR8Pseudo) + Dec = &MI; + else if (!InvalidCTRLoop) + // If any instruction clobber CTR, then we can not generate a CTR loop. + InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true); + } + if (Dec && InvalidCTRLoop) break; } - if (!CheckPreds && Preds.empty()) - return true; - - if (CheckPreds) { -queue_preds: - if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) { - LLVM_DEBUG(dbgs() << "Unable to find a MTCTR instruction for " - << printMBBReference(*BI->getParent()) << " (" - << BI->getParent()->getFullName() << ") instruction " - << *BI << "\n"); - return false; - } + assert(Dec && "CTR loop is not complete!"); - append_range(Preds, MBB->predecessors()); + if (InvalidCTRLoop) { + expandNormalLoops(ML, Start, Dec); + ++NumNormalLoops; } + else { + expandCTRLoops(ML, Start, Dec); + ++NumCTRLoops; + } + return true; +} + +void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start, + MachineInstr *Dec) { + bool Is64Bit = + Start->getParent()->getParent()->getSubtarget().isPPC64(); + + MachineBasicBlock *Preheader = Start->getParent(); + MachineBasicBlock *Exiting = Dec->getParent(); + assert((Preheader && Exiting) && + "Preheader and exiting should exist for CTR loop!"); + + assert(Dec->getOperand(1).getImm() == 1 && + "Loop decrement stride must be 1"); + + unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI; + unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI; + + Register PHIDef = + MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass + : &PPC::GPRC_and_GPRC_NOR0RegClass); - do { - MBB = Preds.pop_back_val(); - if (!Visited.count(MBB)) { - I = MBB->getLastNonDebugInstr(); - goto check_block; + Start->getParent()->getParent()->getProperties().reset( + MachineFunctionProperties::Property::NoPHIs); + + // Generate "PHI" in the header block. + auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(), + DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef); + PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader); + + Register ADDIDef = + MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass + : &PPC::GPRC_and_GPRC_NOR0RegClass); + // Generate "addi -1" in the exiting block. + BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef) + .addReg(PHIDef) + .addImm(-1); + + // Add other inputs for the PHI node. + if (ML->isLoopLatch(Exiting)) { + // There must be only two predecessors for the loop header, one is the + // Preheader and the other one is loop latch Exiting. In hardware loop + // insertion pass, the block containing DecreaseCTRloop must dominate all + // loop latches. So there must be only one latch. + assert(ML->getHeader()->pred_size() == 2 && + "Loop header predecessor is not right!"); + PHIMIB.addReg(ADDIDef).addMBB(Exiting); + } else { + // If the block containing DecreaseCTRloop is not a loop latch, we can use + // ADDIDef as the value for all other blocks for the PHI. In hardware loop + // insertion pass, the block containing DecreaseCTRloop must dominate all + // loop latches. + for (MachineBasicBlock *P : ML->getHeader()->predecessors()) { + if (ML->contains(P)) { + assert(ML->isLoopLatch(P) && + "Loop's header in-loop predecessor is not loop latch!"); + PHIMIB.addReg(ADDIDef).addMBB(P); + } else + assert(P == Preheader && + "CTR loop should not be generated for irreducible loop!"); } - } while (!Preds.empty()); + } - return true; + // Generate the compare in the exiting block. + Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass); + auto CMPMIB = + BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef) + .addReg(ADDIDef) + .addImm(0); + + BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY), + Dec->getOperand(0).getReg()) + .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt); + + // Remove the pseudo instructions. + Start->eraseFromParent(); + Dec->eraseFromParent(); } -bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { - MDT = &getAnalysis(); - - // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before - // any other instructions that might clobber the ctr register. - for (MachineBasicBlock &MBB : MF) { - if (!MDT->isReachableFromEntry(&MBB)) - continue; - - for (MachineBasicBlock::iterator MII = MBB.getFirstTerminator(), - MIIE = MBB.end(); MII != MIIE; ++MII) { - unsigned Opc = MII->getOpcode(); - if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ || - Opc == PPC::BDZ8 || Opc == PPC::BDZ) - if (!verifyCTRBranch(&MBB, MII)) - llvm_unreachable("Invalid PPC CTR loop!"); - } +void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start, + MachineInstr *Dec) { + bool Is64Bit = + Start->getParent()->getParent()->getSubtarget().isPPC64(); + + MachineBasicBlock *Preheader = Start->getParent(); + MachineBasicBlock *Exiting = Dec->getParent(); + assert((Preheader && Exiting) && + "Preheader and exiting should exist for CTR loop!"); + + assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!"); + + unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ; + unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ; + auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg()); + assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) && + "There should be only one user for loop decrement pseudo!"); + + unsigned Opcode = 0; + switch (BrInstr->getOpcode()) { + case PPC::BC: + Opcode = BDNZOpcode; + (void) ML; + assert(ML->contains(BrInstr->getOperand(1).getMBB()) && + "Invalid ctr loop!"); + break; + case PPC::BCn: + Opcode = BDZOpcode; + assert(!ML->contains(BrInstr->getOperand(1).getMBB()) && + "Invalid ctr loop!"); + break; + default: + llvm_unreachable("Unhandled branch user for DecreaseCTRloop."); } - return false; + unsigned MTCTROpcode = Is64Bit ? PPC::MTCTR8 : PPC::MTCTR; + + // Generate "mtctr" in the loop preheader. + BuildMI(*Preheader, Start, Start->getDebugLoc(), TII->get(MTCTROpcode)) + .addReg(Start->getOperand(0).getReg()); + + // Generate "bdnz/bdz" in the exiting block just before the terminator. + BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode)) + .addMBB(BrInstr->getOperand(1).getMBB()); + + // Remove the pseudo instructions. + Start->eraseFromParent(); + BrInstr->eraseFromParent(); + Dec->eraseFromParent(); } -#endif // NDEBUG diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp new file mode 100644 index 000000000000..b1f5bdd885cd --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp @@ -0,0 +1,185 @@ +//===-- PPCCTRLoops.cpp - Verify CTR loops -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass verifies that all bdnz/bdz instructions are dominated by a loop +// mtctr before any other instructions that might clobber the ctr register. +// +//===----------------------------------------------------------------------===// + +// CTR loops are produced by the HardwareLoops pass and this pass is simply a +// verification that no invalid CTR loops are produced. As such, it isn't +// something that needs to be run (or even defined) for Release builds so the +// entire file is guarded by NDEBUG. +#ifndef NDEBUG +#include + +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "PPC.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist_iterator.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GenericDomTreeConstruction.h" +#include "llvm/Support/Printable.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "ppc-ctrloops-verify" + +namespace { + + struct PPCCTRLoopsVerify : public MachineFunctionPass { + public: + static char ID; + + PPCCTRLoopsVerify() : MachineFunctionPass(ID) { + initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + MachineDominatorTree *MDT; + }; + + char PPCCTRLoopsVerify::ID = 0; +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", + "PowerPC CTR Loops Verify", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", + "PowerPC CTR Loops Verify", false, false) + +FunctionPass *llvm::createPPCCTRLoopsVerify() { + return new PPCCTRLoopsVerify(); +} + +static bool clobbersCTR(const MachineInstr &MI) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg()) { + if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) + return true; + } else if (MO.isRegMask()) { + if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8)) + return true; + } + } + + return false; +} + +static bool verifyCTRBranch(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I) { + MachineBasicBlock::iterator BI = I; + SmallSet Visited; + SmallVector Preds; + bool CheckPreds; + + if (I == MBB->begin()) { + Visited.insert(MBB); + goto queue_preds; + } else + --I; + +check_block: + Visited.insert(MBB); + if (I == MBB->end()) + goto queue_preds; + + CheckPreds = true; + for (MachineBasicBlock::iterator IE = MBB->begin();; --I) { + unsigned Opc = I->getOpcode(); + if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) { + CheckPreds = false; + break; + } + + if (I != BI && clobbersCTR(*I)) { + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName() + << ") instruction " << *I + << " clobbers CTR, invalidating " + << printMBBReference(*BI->getParent()) << " (" + << BI->getParent()->getFullName() << ") instruction " + << *BI << "\n"); + return false; + } + + if (I == IE) + break; + } + + if (!CheckPreds && Preds.empty()) + return true; + + if (CheckPreds) { +queue_preds: + if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) { + LLVM_DEBUG(dbgs() << "Unable to find a MTCTR instruction for " + << printMBBReference(*BI->getParent()) << " (" + << BI->getParent()->getFullName() << ") instruction " + << *BI << "\n"); + return false; + } + + append_range(Preds, MBB->predecessors()); + } + + do { + MBB = Preds.pop_back_val(); + if (!Visited.count(MBB)) { + I = MBB->getLastNonDebugInstr(); + goto check_block; + } + } while (!Preds.empty()); + + return true; +} + +bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { + MDT = &getAnalysis(); + + // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before + // any other instructions that might clobber the ctr register. + for (MachineBasicBlock &MBB : MF) { + if (!MDT->isReachableFromEntry(&MBB)) + continue; + + for (MachineBasicBlock::iterator MII = MBB.getFirstTerminator(), + MIIE = MBB.end(); MII != MIIE; ++MII) { + unsigned Opc = MII->getOpcode(); + if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ || + Opc == PPC::BDZ8 || Opc == PPC::BDZ) + if (!verifyCTRBranch(&MBB, MII)) + llvm_unreachable("Invalid PPC CTR loop!"); + } + } + + return false; +} +#endif // NDEBUG diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td index 1e81276f1de3..1901e8d1ebf1 100644 --- a/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -363,3 +363,25 @@ def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec, def CSR_64_AllRegs_AIX_Dflt_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec, (sequence "VSL%u", 0, 19))>; + +def CSR_ALL_VSRP : CalleeSavedRegs<(sequence "VSRp%u", 0, 31)>; + +def CSR_VSRP : + CalleeSavedRegs<(add VSRp26, VSRp27, VSRp28, VSRp29, VSRp30, VSRp31)>; + +def CSR_SVR432_VSRP : CalleeSavedRegs<(add CSR_SVR432_Altivec, CSR_VSRP)>; + +def CSR_SVR464_VSRP : CalleeSavedRegs<(add CSR_PPC64_Altivec, CSR_VSRP)>; + +def CSR_SVR464_R2_VSRP : CalleeSavedRegs<(add CSR_SVR464_VSRP, X2)>; + +def CSR_SVR32_ColdCC_VSRP : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Altivec, + (sub CSR_ALL_VSRP, VSRp17))>; + +def CSR_SVR64_ColdCC_VSRP : CalleeSavedRegs<(add CSR_SVR64_ColdCC, + (sub CSR_ALL_VSRP, VSRp17))>; + +def CSR_SVR64_ColdCC_R2_VSRP : CalleeSavedRegs<(add CSR_SVR64_ColdCC_VSRP, X2)>; + +def CSR_64_AllRegs_VSRP : + CalleeSavedRegs<(add CSR_64_AllRegs_VSX, CSR_ALL_VSRP)>; diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp index e7cd107c5046..5c7f0619161c 100644 --- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -775,7 +775,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) { if (!OptPPCPred) return false; - PPC::Predicate PPCPred = OptPPCPred.getValue(); + PPC::Predicate PPCPred = *OptPPCPred; // Take advantage of fall-through opportunities. if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 65c969c196e1..0f70ec576af1 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -10,14 +10,15 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCPredicates.h" #include "PPCFrameLowering.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -625,7 +626,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // Work out frame sizes. uint64_t FrameSize = determineFrameLayoutAndUpdate(MF); int64_t NegFrameSize = -FrameSize; - if (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)) + if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize))) llvm_unreachable("Unhandled stack size!"); if (MFI.isFrameAddressTaken()) @@ -660,10 +661,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, : PPC::STWU ); const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX); - const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 - : PPC::LIS ); - const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 - : PPC::ORI ); const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR ); const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 @@ -934,11 +931,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, .addImm(NegFrameSize); } else { assert(!SingleScratchReg && "Only a single scratch reg available"); - BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) - .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) - .addReg(TempReg, RegState::Kill) - .addImm(NegFrameSize & 0xFFFF); + TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize); BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) .addReg(ScratchReg, RegState::Kill) .addReg(TempReg, RegState::Kill); @@ -957,11 +950,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, .addReg(SPReg); } else { - BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) - .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) - .addReg(ScratchReg, RegState::Kill) - .addImm(NegFrameSize & 0xFFFF); + TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize); BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) .addReg(SPReg, RegState::Kill) .addReg(SPReg) @@ -1668,7 +1657,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // values from the stack, and set SPAdd to the value that needs to be added // to the SP at the end. The default values are as if red zone was present. unsigned RBReg = SPReg; - unsigned SPAdd = 0; + uint64_t SPAdd = 0; // Check if we can move the stack update instruction up the epilogue // past the callee saves. This will allow the move to LR instruction @@ -1726,11 +1715,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) .addReg(FPReg).addImm(FrameSize); } else { - BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) - .addImm(FrameSize >> 16); - BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) - .addReg(ScratchReg, RegState::Kill) - .addImm(FrameSize & 0xFFFF); + TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize); BuildMI(MBB, MBBI, dl, AddInst) .addReg(RBReg) .addReg(FPReg) @@ -1974,6 +1959,15 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + // Do not explicitly save the callee saved VSRp registers. + // The individual VSR subregisters will be saved instead. + SavedRegs.reset(PPC::VSRp26); + SavedRegs.reset(PPC::VSRp27); + SavedRegs.reset(PPC::VSRp28); + SavedRegs.reset(PPC::VSRp29); + SavedRegs.reset(PPC::VSRp30); + SavedRegs.reset(PPC::VSRp31); + // Save and clear the LR state. PPCFunctionInfo *FI = MF.getInfo(); unsigned LR = RegInfo->getRARegister(); @@ -2383,7 +2377,7 @@ bool PPCFrameLowering::spillCalleeSavedRegisters( // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one // or two GPRs, so we need table to record information for later save/restore. - llvm::for_each(CSI, [&](const CalleeSavedInfo &Info) { + for (const CalleeSavedInfo &Info : CSI) { if (Info.isSpilledToReg()) { auto &SpilledVSR = VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second; @@ -2394,7 +2388,7 @@ bool PPCFrameLowering::spillCalleeSavedRegisters( else SpilledVSR.second = Info.getReg(); } - }); + } for (const CalleeSavedInfo &I : CSI) { Register Reg = I.getReg(); diff --git a/llvm/lib/Target/PowerPC/PPCGenScalarMASSEntries.cpp b/llvm/lib/Target/PowerPC/PPCGenScalarMASSEntries.cpp new file mode 100644 index 000000000000..00931b1f63b2 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCGenScalarMASSEntries.cpp @@ -0,0 +1,149 @@ +//===-- PPCGenScalarMASSEntries.cpp ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This transformation converts standard math functions into their +// corresponding MASS (scalar) entries for PowerPC targets. +// Following are examples of such conversion: +// tanh ---> __xl_tanh_finite +// Such lowering is legal under the fast-math option. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCSubtarget.h" +#include "PPCTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" + +#define DEBUG_TYPE "ppc-gen-scalar-mass" + +using namespace llvm; + +namespace { + +class PPCGenScalarMASSEntries : public ModulePass { +public: + static char ID; + + PPCGenScalarMASSEntries() : ModulePass(ID) { + ScalarMASSFuncs = { +#define TLI_DEFINE_SCALAR_MASS_FUNCS +#include "llvm/Analysis/ScalarFuncs.def" + }; + } + + bool runOnModule(Module &M) override; + + StringRef getPassName() const override { + return "PPC Generate Scalar MASS Entries"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } + +private: + std::map ScalarMASSFuncs; + bool isCandidateSafeToLower(const CallInst &CI) const; + bool isFiniteCallSafe(const CallInst &CI) const; + bool createScalarMASSCall(StringRef MASSEntry, CallInst &CI, + Function &Func) const; +}; + +} // namespace + +// Returns true if 'afn' flag exists on the call instruction with the math +// function +bool PPCGenScalarMASSEntries::isCandidateSafeToLower(const CallInst &CI) const { + // skip functions with no scalar or vector FP type (like cosisin) + if (!isa(CI)) + return false; + + return CI.hasApproxFunc(); +} + +// Returns true if 'nnan', 'ninf' and 'nsz' flags exist on the call instruction +// with the math function +bool PPCGenScalarMASSEntries::isFiniteCallSafe(const CallInst &CI) const { + // skip functions with no scalar or vector FP type (like cosisin) + if (!isa(CI)) + return false; + + // FIXME: no-errno and trapping-math need to be set for MASS converstion + // but they don't have IR representation. + return CI.hasNoNaNs() && CI.hasNoInfs() && CI.hasNoSignedZeros(); +} + +/// Lowers scalar math functions to scalar MASS functions. +/// e.g.: tanh --> __xl_tanh_finite or __xl_tanh +/// Both function prototype and its callsite is updated during lowering. +bool PPCGenScalarMASSEntries::createScalarMASSCall(StringRef MASSEntry, + CallInst &CI, + Function &Func) const { + if (CI.use_empty()) + return false; + + Module *M = Func.getParent(); + assert(M && "Expecting a valid Module"); + + std::string MASSEntryStr = MASSEntry.str(); + if (isFiniteCallSafe(CI)) + MASSEntryStr += "_finite"; + + FunctionCallee FCache = M->getOrInsertFunction( + MASSEntryStr, Func.getFunctionType(), Func.getAttributes()); + + CI.setCalledFunction(FCache); + + return true; +} + +bool PPCGenScalarMASSEntries::runOnModule(Module &M) { + bool Changed = false; + + auto *TPC = getAnalysisIfAvailable(); + if (!TPC || skipModule(M)) + return false; + + for (Function &Func : M) { + if (!Func.isDeclaration()) + continue; + + auto Iter = ScalarMASSFuncs.find(Func.getName()); + if (Iter == ScalarMASSFuncs.end()) + continue; + + // The call to createScalarMASSCall() invalidates the iterator over users + // upon replacing the users. Precomputing the current list of users allows + // us to replace all the call sites. + SmallVector TheUsers; + for (auto *User : Func.users()) + TheUsers.push_back(User); + + for (auto *User : TheUsers) + if (auto *CI = dyn_cast_or_null(User)) { + if (isCandidateSafeToLower(*CI)) + Changed |= createScalarMASSCall(Iter->second, *CI, Func); + } + } + + return Changed; +} + +char PPCGenScalarMASSEntries::ID = 0; + +char &llvm::PPCGenScalarMASSEntriesID = PPCGenScalarMASSEntries::ID; + +INITIALIZE_PASS(PPCGenScalarMASSEntries, DEBUG_TYPE, + "Generate Scalar MASS entries", false, false) + +ModulePass *llvm::createPPCGenScalarMASSEntriesPass() { + return new PPCGenScalarMASSEntries(); +} diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index fdcf6e7e80f2..4247cf557c2a 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -188,7 +189,7 @@ namespace { } /// getSmallIPtrImm - Return a target constant of pointer type. - inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { + inline SDValue getSmallIPtrImm(uint64_t Imm, const SDLoc &dl) { return CurDAG->getTargetConstant( Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); } @@ -202,7 +203,7 @@ namespace { /// base register. Return the virtual register that holds this value. SDNode *getGlobalBaseReg(); - void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); + void selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset = 0); // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. @@ -639,7 +640,7 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { && isInt32Immediate(N->getOperand(1).getNode(), Imm); } -void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { +void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset) { SDLoc dl(SN); int FI = cast(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); @@ -4645,7 +4646,8 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, static bool isSWTestOp(SDValue N) { if (N.getOpcode() == PPCISD::FTSQRT) return true; - if (N.getNumOperands() < 1 || !isa(N.getOperand(0))) + if (N.getNumOperands() < 1 || !isa(N.getOperand(0)) || + N.getOpcode() != ISD::INTRINSIC_WO_CHAIN) return false; switch (N.getConstantOperandVal(0)) { case Intrinsic::ppc_vsx_xvtdivdp: @@ -5377,7 +5379,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // If this is equivalent to an add, then we can fold it with the // FrameIndex calculation. if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { - selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); + selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm); return; } } @@ -5435,7 +5437,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { - selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); + selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm); return; } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index cbeae0ab03b8..5b9d1e66b04e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -126,6 +126,16 @@ static cl::opt EnableQuadwordAtomics( cl::desc("enable quadword lock-free atomic operations"), cl::init(false), cl::Hidden); +static cl::opt + DisablePerfectShuffle("ppc-disable-perfect-shuffle", + cl::desc("disable vector permute decomposition"), + cl::init(true), cl::Hidden); + +cl::opt DisableAutoPairedVecSt( + "disable-auto-paired-vec-st", + cl::desc("disable automatically generated 32byte paired vector stores"), + cl::init(true), cl::Hidden); + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM"); @@ -379,6 +389,25 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); + + // MASS transformation for LLVM intrinsics with replicating fast-math flag + // to be consistent to PPCGenScalarMASSEntries pass + if (TM.getOptLevel() == CodeGenOpt::Aggressive && + TM.Options.PPCGenScalarMASSEntries) { + setOperationAction(ISD::FSIN , MVT::f64, Custom); + setOperationAction(ISD::FCOS , MVT::f64, Custom); + setOperationAction(ISD::FPOW , MVT::f64, Custom); + setOperationAction(ISD::FLOG, MVT::f64, Custom); + setOperationAction(ISD::FLOG10, MVT::f64, Custom); + setOperationAction(ISD::FEXP, MVT::f64, Custom); + setOperationAction(ISD::FSIN , MVT::f32, Custom); + setOperationAction(ISD::FCOS , MVT::f32, Custom); + setOperationAction(ISD::FPOW , MVT::f32, Custom); + setOperationAction(ISD::FLOG, MVT::f32, Custom); + setOperationAction(ISD::FLOG10, MVT::f32, Custom); + setOperationAction(ISD::FEXP, MVT::f32, Custom); + } + if (Subtarget.hasSPE()) { setOperationAction(ISD::FMA , MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f32, Expand); @@ -603,6 +632,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom); // To handle counter-based loop conditions. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); @@ -1000,7 +1031,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::LOAD, MVT::v2f64, Legal); setOperationAction(ISD::STORE, MVT::v2f64, Legal); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); if (Subtarget.hasP8Vector()) addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); @@ -1048,7 +1079,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::STORE, MVT::v2i64, Promote); AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); @@ -1264,6 +1295,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); } + + if (Subtarget.hasP10Vector()) { + setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); + } } if (Subtarget.pairedVectorMemops()) { @@ -1291,8 +1326,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); } - if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics()) { - setMaxAtomicSizeInBitsSupported(128); + if (shouldInlineQuadwordAtomics()) { setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom); @@ -1305,57 +1339,46 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); } + setLibcallName(RTLIB::MULO_I128, nullptr); if (!isPPC64) { // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); + setLibcallName(RTLIB::MUL_I128, nullptr); setLibcallName(RTLIB::MULO_I64, nullptr); } if (!isPPC64) setMaxAtomicSizeInBitsSupported(32); + else if (shouldInlineQuadwordAtomics()) + setMaxAtomicSizeInBitsSupported(128); + else + setMaxAtomicSizeInBitsSupported(64); setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); // We have target-specific dag combine patterns for the following nodes: - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::MUL); - setTargetDAGCombine(ISD::FMA); - setTargetDAGCombine(ISD::SINT_TO_FP); - setTargetDAGCombine(ISD::BUILD_VECTOR); + setTargetDAGCombine({ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL, ISD::MUL, + ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR}); if (Subtarget.hasFPCVT()) setTargetDAGCombine(ISD::UINT_TO_FP); - setTargetDAGCombine(ISD::LOAD); - setTargetDAGCombine(ISD::STORE); - setTargetDAGCombine(ISD::BR_CC); + setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC}); if (Subtarget.useCRBits()) setTargetDAGCombine(ISD::BRCOND); - setTargetDAGCombine(ISD::BSWAP); - setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); - setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); - setTargetDAGCombine(ISD::INTRINSIC_VOID); - - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::ANY_EXTEND); + setTargetDAGCombine({ISD::BSWAP, ISD::INTRINSIC_WO_CHAIN, + ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}); - setTargetDAGCombine(ISD::TRUNCATE); - setTargetDAGCombine(ISD::VECTOR_SHUFFLE); + setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, ISD::ANY_EXTEND}); + setTargetDAGCombine({ISD::TRUNCATE, ISD::VECTOR_SHUFFLE}); if (Subtarget.useCRBits()) { - setTargetDAGCombine(ISD::TRUNCATE); - setTargetDAGCombine(ISD::SETCC); - setTargetDAGCombine(ISD::SELECT_CC); + setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC}); } if (Subtarget.hasP9Altivec()) { - setTargetDAGCombine(ISD::ABS); - setTargetDAGCombine(ISD::VSELECT); + setTargetDAGCombine({ISD::ABS, ISD::VSELECT}); } setLibcallName(RTLIB::LOG_F128, "logf128"); @@ -1586,8 +1609,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; case PPCISD::FSEL: return "PPCISD::FSEL"; - case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP"; - case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP"; + case PPCISD::XSMAXC: return "PPCISD::XSMAXC"; + case PPCISD::XSMINC: return "PPCISD::XSMINC"; case PPCISD::FCFID: return "PPCISD::FCFID"; case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; @@ -1865,8 +1888,7 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { - const PPCSubtarget& Subtarget = - static_cast(DAG.getSubtarget()); + const PPCSubtarget &Subtarget = DAG.getSubtarget(); if (!Subtarget.hasP8Vector()) return false; @@ -2120,7 +2142,11 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, /// specifies a splat of a single element that is suitable for input to /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.). bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { - assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && + EVT VT = N->getValueType(0); + if (VT == MVT::v2i64 || VT == MVT::v2f64) + return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1); + + assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"); // The consecutive indices need to specify an element, not part of two @@ -2421,6 +2447,12 @@ unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(N); assert(isSplatShuffleMask(SVOp, EltSize)); + EVT VT = SVOp->getValueType(0); + + if (VT == MVT::v2i64 || VT == MVT::v2f64) + return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0) + : SVOp->getMaskElt(0); + if (DAG.getDataLayout().isLittleEndian()) return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); else @@ -2957,15 +2989,15 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, bool isLoad = true; SDValue Ptr; EVT VT; - unsigned Alignment; + Align Alignment; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); - Alignment = LD->getAlignment(); + Alignment = LD->getAlign(); } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); - Alignment = ST->getAlignment(); + Alignment = ST->getAlign(); isLoad = false; } else return false; @@ -3009,7 +3041,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, return false; } else { // LDU/STU need an address with at least 4-byte alignment. - if (Alignment < 4) + if (Alignment < Align(4)) return false; if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4))) @@ -4416,8 +4448,11 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue Off = DAG.getConstant(j, dl, PtrVT); Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); } - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, - MachinePointerInfo(&*FuncArg, j)); + unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8; + EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits); + SDValue Store = + DAG.getTruncStore(Val.getValue(1), dl, Val, Addr, + MachinePointerInfo(&*FuncArg, j), ObjType); MemOps.push_back(Store); ++GPR_idx; } @@ -6254,8 +6289,11 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); if (GPR_idx != NumGPRs) { - SDValue Load = - DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); + unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8; + EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg, + MachinePointerInfo(), ObjType); + MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; @@ -6888,8 +6926,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( if (useSoftFloat()) report_fatal_error("Soft float support is unimplemented on AIX."); - const PPCSubtarget &Subtarget = - static_cast(DAG.getSubtarget()); + const PPCSubtarget &Subtarget = DAG.getSubtarget(); const bool IsPPC64 = Subtarget.isPPC64(); const unsigned PtrByteSize = IsPPC64 ? 8 : 4; @@ -7194,8 +7231,7 @@ SDValue PPCTargetLowering::LowerCall_AIX( if (CFlags.IsPatchPoint) report_fatal_error("This call type is unimplemented on AIX."); - const PPCSubtarget& Subtarget = - static_cast(DAG.getSubtarget()); + const PPCSubtarget &Subtarget = DAG.getSubtarget(); MachineFunction &MF = DAG.getMachineFunction(); SmallVector ArgLocs; @@ -7879,7 +7915,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDNodeFlags Flags = Op.getNode()->getFlags(); - // We have xsmaxcdp/xsmincdp which are OK to emit even in the + // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the // presence of infinities. if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) { switch (CC) { @@ -7887,10 +7923,10 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { break; case ISD::SETOGT: case ISD::SETGT: - return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS); + return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS); case ISD::SETOLT: case ISD::SETLT: - return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS); + return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS); } } @@ -9037,7 +9073,7 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) { const SDValue *InputLoad = &Op; - if (InputLoad->getOpcode() == ISD::BITCAST) + while (InputLoad->getOpcode() == ISD::BITCAST) InputLoad = &InputLoad->getOperand(0); if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR || InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) { @@ -9801,7 +9837,7 @@ SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { SDValue N1 = peekThroughBitcasts(Op.getOperand(1)); unsigned SHLAmt = N1.getConstantOperandVal(0); if (SHLAmt % 8 == 0) { - SmallVector Mask(16, 0); + std::array Mask; std::iota(Mask.begin(), Mask.end(), 0); std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end()); if (SDValue Shuffle = @@ -9903,6 +9939,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, return LdSplt; } } + + // All v2i64 and v2f64 shuffles are legal + if (VT == MVT::v2i64 || VT == MVT::v2f64) + return Op; + if (Subtarget.hasP9Vector() && PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, isLittleEndian)) { @@ -10048,56 +10089,59 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // perfect shuffle table to emit an optimal matching sequence. ArrayRef PermMask = SVOp->getMask(); - unsigned PFIndexes[4]; - bool isFourElementShuffle = true; - for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number - unsigned EltNo = 8; // Start out undef. - for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. - if (PermMask[i*4+j] < 0) - continue; // Undef, ignore it. - - unsigned ByteSource = PermMask[i*4+j]; - if ((ByteSource & 3) != j) { - isFourElementShuffle = false; - break; - } + if (!DisablePerfectShuffle && !isLittleEndian) { + unsigned PFIndexes[4]; + bool isFourElementShuffle = true; + for (unsigned i = 0; i != 4 && isFourElementShuffle; + ++i) { // Element number + unsigned EltNo = 8; // Start out undef. + for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. + if (PermMask[i * 4 + j] < 0) + continue; // Undef, ignore it. + + unsigned ByteSource = PermMask[i * 4 + j]; + if ((ByteSource & 3) != j) { + isFourElementShuffle = false; + break; + } - if (EltNo == 8) { - EltNo = ByteSource/4; - } else if (EltNo != ByteSource/4) { - isFourElementShuffle = false; - break; + if (EltNo == 8) { + EltNo = ByteSource / 4; + } else if (EltNo != ByteSource / 4) { + isFourElementShuffle = false; + break; + } } + PFIndexes[i] = EltNo; + } + + // If this shuffle can be expressed as a shuffle of 4-byte elements, use the + // perfect shuffle vector to determine if it is cost effective to do this as + // discrete instructions, or whether we should use a vperm. + // For now, we skip this for little endian until such time as we have a + // little-endian perfect shuffle table. + if (isFourElementShuffle) { + // Compute the index in the perfect shuffle table. + unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + + PFIndexes[2] * 9 + PFIndexes[3]; + + unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; + unsigned Cost = (PFEntry >> 30); + + // Determining when to avoid vperm is tricky. Many things affect the cost + // of vperm, particularly how many times the perm mask needs to be + // computed. For example, if the perm mask can be hoisted out of a loop or + // is already used (perhaps because there are multiple permutes with the + // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the + // permute mask out of the loop requires an extra register. + // + // As a compromise, we only emit discrete instructions if the shuffle can + // be generated in 3 or fewer operations. When we have loop information + // available, if this block is within a loop, we should avoid using vperm + // for 3-operation perms and use a constant pool load instead. + if (Cost < 3) + return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } - PFIndexes[i] = EltNo; - } - - // If this shuffle can be expressed as a shuffle of 4-byte elements, use the - // perfect shuffle vector to determine if it is cost effective to do this as - // discrete instructions, or whether we should use a vperm. - // For now, we skip this for little endian until such time as we have a - // little-endian perfect shuffle table. - if (isFourElementShuffle && !isLittleEndian) { - // Compute the index in the perfect shuffle table. - unsigned PFTableIndex = - PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; - - unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; - unsigned Cost = (PFEntry >> 30); - - // Determining when to avoid vperm is tricky. Many things affect the cost - // of vperm, particularly how many times the perm mask needs to be computed. - // For example, if the perm mask can be hoisted out of a loop or is already - // used (perhaps because there are multiple permutes with the same shuffle - // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of - // the loop requires an extra register. - // - // As a compromise, we only emit discrete instructions if the shuffle can be - // generated in 3 or fewer operations. When we have loop information - // available, if this block is within a loop, we should avoid using vperm - // for 3-operation perms and use a constant pool load instead. - if (Cost < 3) - return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant @@ -10518,6 +10562,16 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}), 0); } + case Intrinsic::ppc_fnmsub: { + EVT VT = Op.getOperand(1).getValueType(); + if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128)) + return DAG.getNode( + ISD::FNEG, dl, VT, + DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2), + DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3)))); + return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + } case Intrinsic::ppc_convert_f128_to_ppcf128: case Intrinsic::ppc_convert_ppcf128_to_f128: { RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128 @@ -10529,6 +10583,31 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, dl, SDValue()); return Result.first; } + case Intrinsic::ppc_maxfe: + case Intrinsic::ppc_maxfl: + case Intrinsic::ppc_maxfs: + case Intrinsic::ppc_minfe: + case Intrinsic::ppc_minfl: + case Intrinsic::ppc_minfs: { + EVT VT = Op.getValueType(); + assert( + all_of(Op->ops().drop_front(4), + [VT](const SDUse &Use) { return Use.getValueType() == VT; }) && + "ppc_[max|min]f[e|l|s] must have uniform type arguments"); + (void)VT; + ISD::CondCode CC = ISD::SETGT; + if (IntrinsicID == Intrinsic::ppc_minfe || + IntrinsicID == Intrinsic::ppc_minfl || + IntrinsicID == Intrinsic::ppc_minfs) + CC = ISD::SETLT; + unsigned I = Op.getNumOperands() - 2, Cnt = I; + SDValue Res = Op.getOperand(I); + for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) { + Res = + DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC); + } + return Res; + } } // If this is a lowered altivec predicate compare, CompareOpc is set to the @@ -11055,6 +11134,12 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Wasn't expecting to be able to lower this!"); + case ISD::FPOW: return lowerPow(Op, DAG); + case ISD::FSIN: return lowerSin(Op, DAG); + case ISD::FCOS: return lowerCos(Op, DAG); + case ISD::FLOG: return lowerLog(Op, DAG); + case ISD::FLOG10: return lowerLog10(Op, DAG); + case ISD::FEXP: return lowerExp(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); @@ -11183,6 +11268,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, N->getOperand(2), N->getOperand(1))); break; + case Intrinsic::ppc_maxfe: + case Intrinsic::ppc_minfe: + case Intrinsic::ppc_fnmsub: case Intrinsic::ppc_convert_f128_to_ppcf128: Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG)); break; @@ -14075,13 +14163,13 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) { assert(LD1 && "Input needs to be a LoadSDNode."); return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(), LD1->getBasePtr(), LD1->getPointerInfo(), - LD1->getAlignment()); + LD1->getAlign()); } if (InputsAreReverseConsecutive) { assert(LDL && "Input needs to be a LoadSDNode."); - SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), - LDL->getBasePtr(), LDL->getPointerInfo(), - LDL->getAlignment()); + SDValue Load = + DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), LDL->getBasePtr(), + LDL->getPointerInfo(), LDL->getAlign()); SmallVector Ops; for (int i = N->getNumOperands() - 1; i >= 0; i--) Ops.push_back(i); @@ -14469,6 +14557,11 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, // builtins) into loads with swaps. SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const { + // Delay VSX load for LE combine until after LegalizeOps to prioritize other + // load combines. + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Chain; @@ -14503,13 +14596,6 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, MVT VecTy = N->getValueType(0).getSimpleVT(); - // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is - // aligned and the type is a vector with elements up to 4 bytes - if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) && - VecTy.getScalarSizeInBits() <= 32) { - return SDValue(); - } - SDValue LoadOps[] = { Chain, Base }; SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, DAG.getVTList(MVT::v2f64, MVT::Other), @@ -14537,6 +14623,11 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, // builtins) into stores with swaps. SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const { + // Delay VSX store for LE combine until after LegalizeOps to prioritize other + // store combines. + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Chain; @@ -14574,13 +14665,6 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, SDValue Src = N->getOperand(SrcOpnd); MVT VecTy = Src.getValueType().getSimpleVT(); - // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is - // aligned and the type is a vector with elements up to 4 bytes - if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) && - VecTy.getScalarSizeInBits() <= 32) { - return SDValue(); - } - // All stores are done as v2f64 and possible bit cast. if (VecTy != MVT::v2f64) { Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); @@ -14806,6 +14890,17 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN, SDValue SToVLHS = isScalarToVec(LHS); SDValue SToVRHS = isScalarToVec(RHS); if (SToVLHS || SToVRHS) { + // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the + // same type and have differing element sizes, then do not perform + // the following transformation. The current transformation for + // SCALAR_TO_VECTOR assumes that both input vectors have the same + // element size. This will be updated in the future to account for + // differing sizes of the LHS and RHS. + if (SToVLHS && SToVRHS && + (SToVLHS.getValueType().getScalarSizeInBits() != + SToVRHS.getValueType().getScalarSizeInBits())) + return Res; + int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements() : SToVRHS.getValueType().getVectorNumElements(); int NumEltsOut = ShuffV.size(); @@ -14889,24 +14984,36 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN, // Example (even elements from first vector): // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, if (Mask[0] < NumElts) - for (int i = 1, e = Mask.size(); i < e; i += 2) + for (int i = 1, e = Mask.size(); i < e; i += 2) { + if (ShuffV[i] < 0) + continue; ShuffV[i] = (ShuffV[i - 1] + NumElts); + } // Example (odd elements from first vector): // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, else - for (int i = 0, e = Mask.size(); i < e; i += 2) + for (int i = 0, e = Mask.size(); i < e; i += 2) { + if (ShuffV[i] < 0) + continue; ShuffV[i] = (ShuffV[i + 1] + NumElts); + } } else { // Example (even elements from first vector): // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> , t1 if (Mask[0] < NumElts) - for (int i = 0, e = Mask.size(); i < e; i += 2) + for (int i = 0, e = Mask.size(); i < e; i += 2) { + if (ShuffV[i] < 0) + continue; ShuffV[i] = ShuffV[i + 1] - NumElts; + } // Example (odd elements from first vector): // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> , t1 else - for (int i = 1, e = Mask.size(); i < e; i += 2) + for (int i = 1, e = Mask.size(); i < e; i += 2) { + if (ShuffV[i] < 0) + continue; ShuffV[i] = ShuffV[i - 1] - NumElts; + } } // If the RHS has undefs, we need to remove them since we may have created @@ -15223,7 +15330,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, auto MMOFlags = LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile; SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, - LD->getPointerInfo(), LD->getAlignment(), + LD->getPointerInfo(), LD->getAlign(), MMOFlags, LD->getAAInfo()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), @@ -15231,7 +15338,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDValue FloatLoad2 = DAG.getLoad( MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr, LD->getPointerInfo().getWithOffset(4), - MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo()); + commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo()); if (LD->isIndexed()) { // Note that DAGCombine should re-form any pre-increment load(s) from @@ -15544,7 +15651,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); SDValue BasePtr = LD->getBasePtr(); SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, - LD->getPointerInfo(), LD->getAlignment()); + LD->getPointerInfo(), LD->getAlign()); Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo); BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(4, dl)); @@ -17718,6 +17825,114 @@ bool PPCTargetLowering::splitValueIntoRegisterParts( return false; } +SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op, + SelectionDAG &DAG) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + TargetLowering::CallLoweringInfo CLI(DAG); + EVT RetVT = Op.getValueType(); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + SDValue Callee = + DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout())); + bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (const SDValue &N : Op->op_values()) { + EVT ArgVT = N.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = N; + Entry.Ty = ArgTy; + Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend); + Entry.IsZExt = !Entry.IsSExt; + Args.push_back(Entry); + } + + SDValue InChain = DAG.getEntryNode(); + SDValue TCChain = InChain; + const Function &F = DAG.getMachineFunction().getFunction(); + bool isTailCall = + TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) && + (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy()); + if (isTailCall) + InChain = TCChain; + CLI.setDebugLoc(SDLoc(Op)) + .setChain(InChain) + .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args)) + .setTailCall(isTailCall) + .setSExtResult(SignExtend) + .setZExtResult(!SignExtend) + .setIsPostTypeLegalization(true); + return TLI.LowerCallTo(CLI).first; +} + +SDValue PPCTargetLowering::lowerLibCallBasedOnType( + const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op, + SelectionDAG &DAG) const { + if (Op.getValueType() == MVT::f32) + return lowerToLibCall(LibCallFloatName, Op, DAG); + + if (Op.getValueType() == MVT::f64) + return lowerToLibCall(LibCallDoubleName, Op, DAG); + + return SDValue(); +} + +bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const { + SDNodeFlags Flags = Op.getNode()->getFlags(); + return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() && + Flags.hasNoNaNs() && Flags.hasNoInfs(); +} + +bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const { + return Op.getNode()->getFlags().hasApproximateFuncs(); +} + +SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName, + const char *LibCallFloatName, + const char *LibCallDoubleNameFinite, + const char *LibCallFloatNameFinite, + SDValue Op, + SelectionDAG &DAG) const { + if (!isLowringToMASSSafe(Op)) + return SDValue(); + + if (!isLowringToMASSFiniteSafe(Op)) + return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op, + DAG); + + return lowerLibCallBasedOnType(LibCallFloatNameFinite, + LibCallDoubleNameFinite, Op, DAG); +} + +SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const { + return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite", + "__xl_powf_finite", Op, DAG); +} + +SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const { + return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite", + "__xl_sinf_finite", Op, DAG); +} + +SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const { + return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite", + "__xl_cosf_finite", Op, DAG); +} + +SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const { + return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite", + "__xl_logf_finite", Op, DAG); +} + +SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const { + return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite", + "__xl_log10f_finite", Op, DAG); +} + +SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const { + return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite", + "__xl_expf_finite", Op, DAG); +} + // If we happen to match to an aligned D-Form, check if the Frame Index is // adequately aligned. If it is not, reset the mode to match to X-Form. static void setXFormForUnalignedFI(SDValue N, unsigned Flags, @@ -17878,10 +18093,18 @@ CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC, } } +bool PPCTargetLowering::shouldInlineQuadwordAtomics() const { + // TODO: 16-byte atomic type support for AIX is in progress; we should be able + // to inline 16-byte atomic ops on AIX too in the future. + return Subtarget.isPPC64() && + (EnableQuadwordAtomics || !Subtarget.getTargetTriple().isOSAIX()) && + Subtarget.hasQuadwordAtomics(); +} + TargetLowering::AtomicExpansionKind PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128) + if (shouldInlineQuadwordAtomics() && Size == 128) return AtomicExpansionKind::MaskedIntrinsic; return TargetLowering::shouldExpandAtomicRMWInIR(AI); } @@ -17889,7 +18112,7 @@ PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { TargetLowering::AtomicExpansionKind PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits(); - if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128) + if (shouldInlineQuadwordAtomics() && Size == 128) return AtomicExpansionKind::MaskedIntrinsic; return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI); } @@ -17919,10 +18142,9 @@ getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) { Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic( IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { - assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && - "Only support quadword now"); + assert(shouldInlineQuadwordAtomics() && "Only support quadword now"); Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - Type *ValTy = AlignedAddr->getType()->getPointerElementType(); + Type *ValTy = Incr->getType(); assert(ValTy->getPrimitiveSizeInBits() == 128); Function *RMW = Intrinsic::getDeclaration( M, getIntrinsicForAtomicRMWBinOp128(AI->getOperation())); @@ -17944,10 +18166,9 @@ Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic( Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { - assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && - "Only support quadword now"); + assert(shouldInlineQuadwordAtomics() && "Only support quadword now"); Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - Type *ValTy = AlignedAddr->getType()->getPointerElementType(); + Type *ValTy = CmpVal->getType(); assert(ValTy->getPrimitiveSizeInBits() == 128); Function *IntCmpXchg = Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index eb52e4aa6273..f92a117fe27f 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -51,9 +51,9 @@ namespace llvm { /// FSEL, - /// XSMAXCDP, XSMINCDP - C-type min/max instructions. - XSMAXCDP, - XSMINCDP, + /// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions. + XSMAXC, + XSMINC, /// FCFID - The FCFID instruction, taking an f64 operand and producing /// and f64 value containing the FP representation of the integer that @@ -77,7 +77,7 @@ namespace llvm { FCTIDUZ, FCTIWUZ, - /// Floating-point-to-interger conversion instructions + /// Floating-point-to-integer conversion instructions FP_TO_UINT_IN_VSR, FP_TO_SINT_IN_VSR, @@ -765,8 +765,19 @@ namespace llvm { /// then the VPERM for the shuffle. All in all a very slow sequence. TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override { - if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && - VT.getScalarSizeInBits() % 8 == 0) + // Default handling for scalable and single-element vectors. + if (VT.isScalableVector() || VT.getVectorNumElements() == 1) + return TargetLoweringBase::getPreferredVectorAction(VT); + + // Split and promote vNi1 vectors so we don't produce v256i1/v512i1 + // types as those are only for MMA instructions. + if (VT.getScalarSizeInBits() == 1 && VT.getSizeInBits() > 16) + return TypeSplitVector; + if (VT.getScalarSizeInBits() == 1) + return TypePromoteInteger; + + // Widen vectors that have reasonably sized elements. + if (VT.getScalarSizeInBits() % 8 == 0) return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } @@ -899,6 +910,8 @@ namespace llvm { Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override; + bool shouldInlineQuadwordAtomics() const; + TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; @@ -1273,6 +1286,24 @@ namespace llvm { SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerToLibCall(const char *LibCallName, SDValue Op, + SelectionDAG &DAG) const; + SDValue lowerLibCallBasedOnType(const char *LibCallFloatName, + const char *LibCallDoubleName, SDValue Op, + SelectionDAG &DAG) const; + bool isLowringToMASSFiniteSafe(SDValue Op) const; + bool isLowringToMASSSafe(SDValue Op) const; + SDValue lowerLibCallBase(const char *LibCallDoubleName, + const char *LibCallFloatName, + const char *LibCallDoubleNameFinite, + const char *LibCallFloatNameFinite, SDValue Op, + SelectionDAG &DAG) const; + SDValue lowerPow(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSin(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerCos(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerLog(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerLog10(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerExp(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index eae8e36e475e..dbe7a7805c61 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -580,6 +580,14 @@ def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), PPC970_DGroup_First, PPC970_Unit_FXU; } + +let hasSideEffects = 1, Defs = [CTR8] in +def MTCTR8Pseudo : PPCEmitTimePseudo<(outs), (ins g8rc:$rS), "#MTCTR8Pseudo", []>; + +let hasSideEffects = 1, Uses = [CTR8], Defs = [CTR8] in +def DecreaseCTR8Pseudo : PPCEmitTimePseudo<(outs crbitrc:$rT), (ins i64imm:$stride), + "#DecreaseCTR8Pseudo", []>; + let Pattern = [(set i64:$rT, readcyclecounter)] in def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins), "mfspr $rT, 268", IIC_SprMFTB>, @@ -1014,8 +1022,6 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in { def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA), "setb $RT, $BFA", IIC_IntGeneral>, isPPC64; } -def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins u2imm:$L), - "darn $RT, $L", IIC_LdStLD>, isPPC64; def ADDPCIS : DXForm<19, 2, (outs g8rc:$RT), (ins i32imm:$D), "addpcis $RT, $D", IIC_BrB, []>, isPPC64; def MODSD : XForm_8<31, 777, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), @@ -1040,6 +1046,11 @@ def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), [(set i64:$rD, (mul i64:$rA, imm64SExt16:$imm))]>; } +let hasSideEffects = 1 in { +def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins u2imm:$L), + "darn $RT, $L", IIC_LdStLD>, isPPC64; +} + let hasSideEffects = 0 in { defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA), (ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE), @@ -1396,10 +1407,6 @@ def LDUX : XForm_1_memOp<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), "ldux $rD, $addr", IIC_LdStLDUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; - -def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src), - "ldmx $rD, $src", IIC_LdStLD, []>, isPPC64, - Requires<[IsISA3_0]>; } let mayLoad = 1, hasNoSchedulingInfo = 1 in { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index eada872c2a7d..59486c323567 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2218,7 +2218,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, .addReg(Pred[1].getReg(), RegState::ImplicitDefine); } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); - MI.RemoveOperand(0); + MI.removeOperand(0); MI.setDesc(get(PPC::BC)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) @@ -2226,7 +2226,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, .addMBB(MBB); } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); - MI.RemoveOperand(0); + MI.removeOperand(0); MI.setDesc(get(PPC::BCn)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) @@ -2234,7 +2234,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, .addMBB(MBB); } else { MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); - MI.RemoveOperand(0); + MI.removeOperand(0); MI.setDesc(get(PPC::BCC)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) @@ -2714,8 +2714,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, } // If we've set the mask, we can transform. if (Mask != ~0LLU) { - MI->RemoveOperand(4); - MI->RemoveOperand(3); + MI->removeOperand(4); + MI->removeOperand(3); MI->getOperand(2).setImm(Mask); NumRcRotatesConvertedToRcAnd++; } @@ -2724,7 +2724,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, if (MB >= 48) { uint64_t Mask = (1LLU << (63 - MB + 1)) - 1; NewOpC = PPC::ANDI8_rec; - MI->RemoveOperand(3); + MI->removeOperand(3); MI->getOperand(2).setImm(Mask); NumRcRotatesConvertedToRcAnd++; } @@ -3026,8 +3026,8 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case PPC::KILL_PAIR: { MI.setDesc(get(PPC::UNENCODED_NOP)); - MI.RemoveOperand(1); - MI.RemoveOperand(0); + MI.removeOperand(1); + MI.removeOperand(0); return true; } case TargetOpcode::LOAD_STACK_GUARD: { @@ -3122,7 +3122,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { .addReg(PPC::CR7) .addImm(1); MI.setDesc(get(PPC::ISYNC)); - MI.RemoveOperand(0); + MI.removeOperand(0); return true; } } @@ -3188,7 +3188,7 @@ void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI, // - implicit reg uses // Therefore, removing the implicit operand won't change the explicit // operands layout. - MI.RemoveOperand(UseOpIdx); + MI.removeOperand(UseOpIdx); } } @@ -3199,7 +3199,7 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI, // Remove existing operands. int OperandToKeep = LII.SetCR ? 1 : 0; for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--) - MI.RemoveOperand(i); + MI.removeOperand(i); // Replace the instruction. if (LII.SetCR) { @@ -3234,6 +3234,47 @@ MachineInstr *PPCInstrInfo::getDefMIPostRA(unsigned Reg, MachineInstr &MI, return nullptr; } +void PPCInstrInfo::materializeImmPostRA(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register Reg, + int64_t Imm) const { + assert(!MBB.getParent()->getRegInfo().isSSA() && + "Register should be in non-SSA form after RA"); + bool isPPC64 = Subtarget.isPPC64(); + // FIXME: Materialization here is not optimal. + // For some special bit patterns we can use less instructions. + // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp. + if (isInt<16>(Imm)) { + BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm); + } else if (isInt<32>(Imm)) { + BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg) + .addImm(Imm >> 16); + if (Imm & 0xFFFF) + BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg) + .addReg(Reg, RegState::Kill) + .addImm(Imm & 0xFFFF); + } else { + assert(isPPC64 && "Materializing 64-bit immediate to single register is " + "only supported in PPC64"); + BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48); + if ((Imm >> 32) & 0xFFFF) + BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg) + .addReg(Reg, RegState::Kill) + .addImm((Imm >> 32) & 0xFFFF); + BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg) + .addReg(Reg, RegState::Kill) + .addImm(32) + .addImm(31); + BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg) + .addReg(Reg, RegState::Kill) + .addImm((Imm >> 16) & 0xFFFF); + if (Imm & 0xFFFF) + BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg) + .addReg(Reg, RegState::Kill) + .addImm(Imm & 0xFFFF); + } +} + MachineInstr *PPCInstrInfo::getForwardingDefMI( MachineInstr &MI, unsigned &OpNoForForwarding, @@ -3790,15 +3831,15 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI, if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) { // Replace MI with "LI 0" - MI.RemoveOperand(4); - MI.RemoveOperand(3); - MI.RemoveOperand(2); + MI.removeOperand(4); + MI.removeOperand(3); + MI.removeOperand(2); MI.getOperand(1).ChangeToImmediate(0); MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI)); } else { // Replace MI with "ANDI_rec reg, 0" - MI.RemoveOperand(4); - MI.RemoveOperand(3); + MI.removeOperand(4); + MI.removeOperand(3); MI.getOperand(2).setImm(0); MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec)); MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); @@ -4282,8 +4323,8 @@ static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) { unsigned MinOp = std::min(Op1, Op2); MachineOperand MOp1 = MI.getOperand(MinOp); MachineOperand MOp2 = MI.getOperand(MaxOp); - MI.RemoveOperand(std::max(Op1, Op2)); - MI.RemoveOperand(std::min(Op1, Op2)); + MI.removeOperand(std::max(Op1, Op2)); + MI.removeOperand(std::min(Op1, Op2)); // If the operands we are swapping are the two at the end (the common case) // we can just remove both and add them in the opposite order. @@ -4297,7 +4338,7 @@ static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) { unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops. for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) { MOps.push_back(MI.getOperand(i)); - MI.RemoveOperand(i); + MI.removeOperand(i); } // MOp2 needs to be added next. MI.addOperand(MOp2); @@ -4532,8 +4573,8 @@ bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI, if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) { CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI)); replaceInstrOperandWithImm(CompareUseMI, 1, 0); - CompareUseMI.RemoveOperand(3); - CompareUseMI.RemoveOperand(2); + CompareUseMI.removeOperand(3); + CompareUseMI.removeOperand(2); continue; } LLVM_DEBUG( @@ -4542,8 +4583,8 @@ bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI, LLVM_DEBUG(dbgs() << "Is converted to:\n"); // Convert to copy and remove unneeded operands. CompareUseMI.setDesc(get(PPC::COPY)); - CompareUseMI.RemoveOperand(3); - CompareUseMI.RemoveOperand(RegToCopy == TrueReg ? 2 : 1); + CompareUseMI.removeOperand(3); + CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1); CmpIselsConverted++; Changed = true; LLVM_DEBUG(CompareUseMI.dump()); @@ -4887,7 +4928,7 @@ bool PPCInstrInfo::transformToImmFormFedByAdd( SmallVector MOps; for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) { MOps.push_back(MI.getOperand(i)); - MI.RemoveOperand(i); + MI.removeOperand(i); } // Remove the last MO in the list, which is ZERO operand in fact. @@ -5010,7 +5051,7 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, // just convert this to a COPY. Can't do this post-RA since we've already // cleaned up the copies. else if (!SetCR && ShAmt == 0 && !PostRA) { - MI.RemoveOperand(2); + MI.removeOperand(2); MI.setDesc(get(PPC::COPY)); } else { // The 32 bit and 64 bit instructions are quite different. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index c16e146da247..e22b0086bde8 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -295,6 +295,99 @@ public: return get(Opcode).TSFlags & PPCII::Prefixed; } + /// Check if Opcode corresponds to a call instruction that should be marked + /// with the NOTOC relocation. + bool isNoTOCCallInstr(unsigned Opcode) const { + if (!get(Opcode).isCall()) + return false; + + switch (Opcode) { + default: +#ifndef NDEBUG + llvm_unreachable("Unknown call opcode"); +#endif + return false; + case PPC::BL8_NOTOC: + case PPC::BL8_NOTOC_TLS: + case PPC::BL8_NOTOC_RM: + return true; +#ifndef NDEBUG + case PPC::BL8: + case PPC::BL: + case PPC::BL8_TLS: + case PPC::BL_TLS: + case PPC::BLA8: + case PPC::BLA: + case PPC::BCCL: + case PPC::BCCLA: + case PPC::BCL: + case PPC::BCLn: + case PPC::BL8_NOP: + case PPC::BL_NOP: + case PPC::BL8_NOP_TLS: + case PPC::BLA8_NOP: + case PPC::BCTRL8: + case PPC::BCTRL: + case PPC::BCCCTRL8: + case PPC::BCCCTRL: + case PPC::BCCTRL8: + case PPC::BCCTRL: + case PPC::BCCTRL8n: + case PPC::BCCTRLn: + case PPC::BL8_RM: + case PPC::BLA8_RM: + case PPC::BL8_NOP_RM: + case PPC::BLA8_NOP_RM: + case PPC::BCTRL8_RM: + case PPC::BCTRL8_LDinto_toc: + case PPC::BCTRL8_LDinto_toc_RM: + case PPC::BL8_TLS_: + case PPC::TCRETURNdi8: + case PPC::TCRETURNai8: + case PPC::TCRETURNri8: + case PPC::TAILBCTR8: + case PPC::TAILB8: + case PPC::TAILBA8: + case PPC::BCLalways: + case PPC::BLRL: + case PPC::BCCLRL: + case PPC::BCLRL: + case PPC::BCLRLn: + case PPC::BDZL: + case PPC::BDNZL: + case PPC::BDZLA: + case PPC::BDNZLA: + case PPC::BDZLp: + case PPC::BDNZLp: + case PPC::BDZLAp: + case PPC::BDNZLAp: + case PPC::BDZLm: + case PPC::BDNZLm: + case PPC::BDZLAm: + case PPC::BDNZLAm: + case PPC::BDZLRL: + case PPC::BDNZLRL: + case PPC::BDZLRLp: + case PPC::BDNZLRLp: + case PPC::BDZLRLm: + case PPC::BDNZLRLm: + case PPC::BL_RM: + case PPC::BLA_RM: + case PPC::BL_NOP_RM: + case PPC::BCTRL_RM: + case PPC::TCRETURNdi: + case PPC::TCRETURNai: + case PPC::TCRETURNri: + case PPC::BCTRL_LWZinto_toc: + case PPC::BCTRL_LWZinto_toc_RM: + case PPC::TAILBCTR: + case PPC::TAILB: + case PPC::TAILBA: + return false; +#endif + } + } + static bool isSameClassPhysRegCopy(unsigned Opcode) { unsigned CopyOpcodes[] = {PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf, @@ -653,6 +746,12 @@ public: MachineInstr *getDefMIPostRA(unsigned Reg, MachineInstr &MI, bool &SeenIntermediateUse) const; + // Materialize immediate after RA. + void materializeImmPostRA(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register Reg, + int64_t Imm) const; + /// getRegNumForOperand - some operands use different numbering schemes /// for the same registers. For example, a VSX instruction may have any of /// vs0-vs63 allocated whereas an Altivec instruction could only have diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index c26b4f6ceb7d..f651b51d2684 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -198,8 +198,8 @@ def PPCfsel : SDNode<"PPCISD::FSEL", // Type constraint for fsel. SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0>, SDTCisVT<1, f64>]>, []>; -def PPCxsmaxc : SDNode<"PPCISD::XSMAXCDP", SDT_PPCFPMinMax, []>; -def PPCxsminc : SDNode<"PPCISD::XSMINCDP", SDT_PPCFPMinMax, []>; +def PPCxsmaxc : SDNode<"PPCISD::XSMAXC", SDT_PPCFPMinMax, []>; +def PPCxsminc : SDNode<"PPCISD::XSMINC", SDT_PPCFPMinMax, []>; def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>; def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>; def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, @@ -633,514 +633,6 @@ class NoEncode { } -//===----------------------------------------------------------------------===// -// PowerPC Operand Definitions. - -// In the default PowerPC assembler syntax, registers are specified simply -// by number, so they cannot be distinguished from immediate values (without -// looking at the opcode). This means that the default operand matching logic -// for the asm parser does not work, and we need to specify custom matchers. -// Since those can only be specified with RegisterOperand classes and not -// directly on the RegisterClass, all instructions patterns used by the asm -// parser need to use a RegisterOperand (instead of a RegisterClass) for -// all their register operands. -// For this purpose, we define one RegisterOperand for each RegisterClass, -// using the same name as the class, just in lower case. - -def PPCRegGPRCAsmOperand : AsmOperandClass { - let Name = "RegGPRC"; let PredicateMethod = "isRegNumber"; -} -def gprc : RegisterOperand { - let ParserMatchClass = PPCRegGPRCAsmOperand; -} -def PPCRegG8RCAsmOperand : AsmOperandClass { - let Name = "RegG8RC"; let PredicateMethod = "isRegNumber"; -} -def g8rc : RegisterOperand { - let ParserMatchClass = PPCRegG8RCAsmOperand; -} -def PPCRegG8pRCAsmOperand : AsmOperandClass { - let Name = "RegG8pRC"; let PredicateMethod = "isEvenRegNumber"; -} -def g8prc : RegisterOperand { - let ParserMatchClass = PPCRegG8pRCAsmOperand; -} -def PPCRegGPRCNoR0AsmOperand : AsmOperandClass { - let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber"; -} -def gprc_nor0 : RegisterOperand { - let ParserMatchClass = PPCRegGPRCNoR0AsmOperand; -} -def PPCRegG8RCNoX0AsmOperand : AsmOperandClass { - let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber"; -} -def g8rc_nox0 : RegisterOperand { - let ParserMatchClass = PPCRegG8RCNoX0AsmOperand; -} -def PPCRegF8RCAsmOperand : AsmOperandClass { - let Name = "RegF8RC"; let PredicateMethod = "isRegNumber"; -} -def f8rc : RegisterOperand { - let ParserMatchClass = PPCRegF8RCAsmOperand; -} -def PPCRegF4RCAsmOperand : AsmOperandClass { - let Name = "RegF4RC"; let PredicateMethod = "isRegNumber"; -} -def f4rc : RegisterOperand { - let ParserMatchClass = PPCRegF4RCAsmOperand; -} -def PPCRegVRRCAsmOperand : AsmOperandClass { - let Name = "RegVRRC"; let PredicateMethod = "isRegNumber"; -} -def vrrc : RegisterOperand { - let ParserMatchClass = PPCRegVRRCAsmOperand; -} -def PPCRegVFRCAsmOperand : AsmOperandClass { - let Name = "RegVFRC"; let PredicateMethod = "isRegNumber"; -} -def vfrc : RegisterOperand { - let ParserMatchClass = PPCRegVFRCAsmOperand; -} -def PPCRegCRBITRCAsmOperand : AsmOperandClass { - let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber"; -} -def crbitrc : RegisterOperand { - let ParserMatchClass = PPCRegCRBITRCAsmOperand; -} -def PPCRegCRRCAsmOperand : AsmOperandClass { - let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber"; -} -def crrc : RegisterOperand { - let ParserMatchClass = PPCRegCRRCAsmOperand; -} -def PPCRegSPERCAsmOperand : AsmOperandClass { - let Name = "RegSPERC"; let PredicateMethod = "isRegNumber"; -} -def sperc : RegisterOperand { - let ParserMatchClass = PPCRegSPERCAsmOperand; -} -def PPCRegSPE4RCAsmOperand : AsmOperandClass { - let Name = "RegSPE4RC"; let PredicateMethod = "isRegNumber"; -} -def spe4rc : RegisterOperand { - let ParserMatchClass = PPCRegSPE4RCAsmOperand; -} - -def PPCU1ImmAsmOperand : AsmOperandClass { - let Name = "U1Imm"; let PredicateMethod = "isU1Imm"; - let RenderMethod = "addImmOperands"; -} -def u1imm : Operand { - let PrintMethod = "printU1ImmOperand"; - let ParserMatchClass = PPCU1ImmAsmOperand; - let OperandType = "OPERAND_IMMEDIATE"; -} - -def PPCU2ImmAsmOperand : AsmOperandClass { - let Name = "U2Imm"; let PredicateMethod = "isU2Imm"; - let RenderMethod = "addImmOperands"; -} -def u2imm : Operand { - let PrintMethod = "printU2ImmOperand"; - let ParserMatchClass = PPCU2ImmAsmOperand; - let OperandType = "OPERAND_IMMEDIATE"; -} - -def PPCATBitsAsHintAsmOperand : AsmOperandClass { - let Name = "ATBitsAsHint"; let PredicateMethod = "isATBitsAsHint"; - let RenderMethod = "addImmOperands"; // Irrelevant, predicate always fails. -} -def atimm : Operand { - let PrintMethod = "printATBitsAsHint"; - let ParserMatchClass = PPCATBitsAsHintAsmOperand; - let OperandType = "OPERAND_IMMEDIATE"; -} - -def PPCU3ImmAsmOperand : AsmOperandClass { - let Name = "U3Imm"; let PredicateMethod = "isU3Imm"; - let RenderMethod = "addImmOperands"; -} -def u3imm : Operand { - let PrintMethod = "printU3ImmOperand"; - let ParserMatchClass = PPCU3ImmAsmOperand; - let OperandType = "OPERAND_IMMEDIATE"; -} - -def PPCU4ImmAsmOperand : AsmOperandClass { - let Name = "U4Imm"; let PredicateMethod = "isU4Imm"; - let RenderMethod = "addImmOperands"; -} -def u4imm : Operand { - let PrintMethod = "printU4ImmOperand"; - let ParserMatchClass = PPCU4ImmAsmOperand; - let OperandType = "OPERAND_IMMEDIATE"; -} -def PPCS5ImmAsmOperand : AsmOperandClass { - let Name = "S5Imm"; let PredicateMethod = "isS5Imm"; - let RenderMethod = "addImmOperands"; -} -def s5imm : Operand { - let PrintMethod = "printS5ImmOperand"; - let ParserMatchClass = PPCS5ImmAsmOperand; - let DecoderMethod = "decodeSImmOperand<5>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def PPCU5ImmAsmOperand : AsmOperandClass { - let Name = "U5Imm"; let PredicateMethod = "isU5Imm"; - let RenderMethod = "addImmOperands"; -} -def u5imm : Operand { - let PrintMethod = "printU5ImmOperand"; - let ParserMatchClass = PPCU5ImmAsmOperand; - let DecoderMethod = "decodeUImmOperand<5>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def PPCU6ImmAsmOperand : AsmOperandClass { - let Name = "U6Imm"; let PredicateMethod = "isU6Imm"; - let RenderMethod = "addImmOperands"; -} -def u6imm : Operand { - let PrintMethod = "printU6ImmOperand"; - let ParserMatchClass = PPCU6ImmAsmOperand; - let DecoderMethod = "decodeUImmOperand<6>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def PPCU7ImmAsmOperand : AsmOperandClass { - let Name = "U7Imm"; let PredicateMethod = "isU7Imm"; - let RenderMethod = "addImmOperands"; -} -def u7imm : Operand { - let PrintMethod = "printU7ImmOperand"; - let ParserMatchClass = PPCU7ImmAsmOperand; - let DecoderMethod = "decodeUImmOperand<7>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def PPCU8ImmAsmOperand : AsmOperandClass { - let Name = "U8Imm"; let PredicateMethod = "isU8Imm"; - let RenderMethod = "addImmOperands"; -} -def u8imm : Operand { - let PrintMethod = "printU8ImmOperand"; - let ParserMatchClass = PPCU8ImmAsmOperand; - let DecoderMethod = "decodeUImmOperand<8>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def PPCU10ImmAsmOperand : AsmOperandClass { - let Name = "U10Imm"; let PredicateMethod = "isU10Imm"; - let RenderMethod = "addImmOperands"; -} -def u10imm : Operand { - let PrintMethod = "printU10ImmOperand"; - let ParserMatchClass = PPCU10ImmAsmOperand; - let DecoderMethod = "decodeUImmOperand<10>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def PPCU12ImmAsmOperand : AsmOperandClass { - let Name = "U12Imm"; let PredicateMethod = "isU12Imm"; - let RenderMethod = "addImmOperands"; -} -def u12imm : Operand { - let PrintMethod = "printU12ImmOperand"; - let ParserMatchClass = PPCU12ImmAsmOperand; - let DecoderMethod = "decodeUImmOperand<12>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def PPCS16ImmAsmOperand : AsmOperandClass { - let Name = "S16Imm"; let PredicateMethod = "isS16Imm"; - let RenderMethod = "addS16ImmOperands"; -} -def s16imm : Operand { - let PrintMethod = "printS16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; - let ParserMatchClass = PPCS16ImmAsmOperand; - let DecoderMethod = "decodeSImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def PPCU16ImmAsmOperand : AsmOperandClass { - let Name = "U16Imm"; let PredicateMethod = "isU16Imm"; - let RenderMethod = "addU16ImmOperands"; -} -def u16imm : Operand { - let PrintMethod = "printU16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; - let ParserMatchClass = PPCU16ImmAsmOperand; - let DecoderMethod = "decodeUImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def PPCS17ImmAsmOperand : AsmOperandClass { - let Name = "S17Imm"; let PredicateMethod = "isS17Imm"; - let RenderMethod = "addS16ImmOperands"; -} -def s17imm : Operand { - // This operand type is used for addis/lis to allow the assembler parser - // to accept immediates in the range -65536..65535 for compatibility with - // the GNU assembler. The operand is treated as 16-bit otherwise. - let PrintMethod = "printS16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; - let ParserMatchClass = PPCS17ImmAsmOperand; - let DecoderMethod = "decodeSImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def PPCS34ImmAsmOperand : AsmOperandClass { - let Name = "S34Imm"; - let PredicateMethod = "isS34Imm"; - let RenderMethod = "addImmOperands"; -} -def s34imm : Operand { - let PrintMethod = "printS34ImmOperand"; - let EncoderMethod = "getImm34EncodingNoPCRel"; - let ParserMatchClass = PPCS34ImmAsmOperand; - let DecoderMethod = "decodeSImmOperand<34>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def s34imm_pcrel : Operand { - let PrintMethod = "printS34ImmOperand"; - let EncoderMethod = "getImm34EncodingPCRel"; - let ParserMatchClass = PPCS34ImmAsmOperand; - let DecoderMethod = "decodeSImmOperand<34>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def PPCImmZeroAsmOperand : AsmOperandClass { - let Name = "ImmZero"; - let PredicateMethod = "isImmZero"; - let RenderMethod = "addImmOperands"; -} -def immZero : Operand { - let PrintMethod = "printImmZeroOperand"; - let ParserMatchClass = PPCImmZeroAsmOperand; - let DecoderMethod = "decodeImmZeroOperand"; - let OperandType = "OPERAND_IMMEDIATE"; -} - -def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; - -def PPCDirectBrAsmOperand : AsmOperandClass { - let Name = "DirectBr"; let PredicateMethod = "isDirectBr"; - let RenderMethod = "addBranchTargetOperands"; -} -def directbrtarget : Operand { - let PrintMethod = "printBranchOperand"; - let EncoderMethod = "getDirectBrEncoding"; - let DecoderMethod = "decodeDirectBrTarget"; - let ParserMatchClass = PPCDirectBrAsmOperand; - let OperandType = "OPERAND_PCREL"; -} -def absdirectbrtarget : Operand { - let PrintMethod = "printAbsBranchOperand"; - let EncoderMethod = "getAbsDirectBrEncoding"; - let ParserMatchClass = PPCDirectBrAsmOperand; -} -def PPCCondBrAsmOperand : AsmOperandClass { - let Name = "CondBr"; let PredicateMethod = "isCondBr"; - let RenderMethod = "addBranchTargetOperands"; -} -def condbrtarget : Operand { - let PrintMethod = "printBranchOperand"; - let EncoderMethod = "getCondBrEncoding"; - let DecoderMethod = "decodeCondBrTarget"; - let ParserMatchClass = PPCCondBrAsmOperand; - let OperandType = "OPERAND_PCREL"; -} -def abscondbrtarget : Operand { - let PrintMethod = "printAbsBranchOperand"; - let EncoderMethod = "getAbsCondBrEncoding"; - let ParserMatchClass = PPCCondBrAsmOperand; -} -def calltarget : Operand { - let PrintMethod = "printBranchOperand"; - let EncoderMethod = "getDirectBrEncoding"; - let DecoderMethod = "decodeDirectBrTarget"; - let ParserMatchClass = PPCDirectBrAsmOperand; - let OperandType = "OPERAND_PCREL"; -} -def abscalltarget : Operand { - let PrintMethod = "printAbsBranchOperand"; - let EncoderMethod = "getAbsDirectBrEncoding"; - let ParserMatchClass = PPCDirectBrAsmOperand; -} -def PPCCRBitMaskOperand : AsmOperandClass { - let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask"; -} -def crbitm: Operand { - let PrintMethod = "printcrbitm"; - let EncoderMethod = "get_crbitm_encoding"; - let DecoderMethod = "decodeCRBitMOperand"; - let ParserMatchClass = PPCCRBitMaskOperand; -} -// Address operands -// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode). -def PPCRegGxRCNoR0Operand : AsmOperandClass { - let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber"; -} -def ptr_rc_nor0 : Operand, PointerLikeRegClass<1> { - let ParserMatchClass = PPCRegGxRCNoR0Operand; -} - -// New addressing modes with 34 bit immediates. -def PPCDispRI34Operand : AsmOperandClass { - let Name = "DispRI34"; let PredicateMethod = "isS34Imm"; - let RenderMethod = "addImmOperands"; -} -def dispRI34 : Operand { - let ParserMatchClass = PPCDispRI34Operand; -} -def memri34 : Operand { // memri, imm is a 34-bit value. - let PrintMethod = "printMemRegImm34"; - let MIOperandInfo = (ops dispRI34:$imm, ptr_rc_nor0:$reg); - let EncoderMethod = "getMemRI34Encoding"; - let DecoderMethod = "decodeMemRI34Operands"; -} -// memri, imm is a 34-bit value for pc-relative instructions where -// base register is set to zero. -def memri34_pcrel : Operand { // memri, imm is a 34-bit value. - let PrintMethod = "printMemRegImm34PCRel"; - let MIOperandInfo = (ops dispRI34:$imm, immZero:$reg); - let EncoderMethod = "getMemRI34PCRelEncoding"; - let DecoderMethod = "decodeMemRI34PCRelOperands"; -} - -// A version of ptr_rc usable with the asm parser. -def PPCRegGxRCOperand : AsmOperandClass { - let Name = "RegGxRC"; let PredicateMethod = "isRegNumber"; -} -def ptr_rc_idx : Operand, PointerLikeRegClass<0> { - let ParserMatchClass = PPCRegGxRCOperand; -} - -def PPCDispRIOperand : AsmOperandClass { - let Name = "DispRI"; let PredicateMethod = "isS16Imm"; - let RenderMethod = "addS16ImmOperands"; -} -def dispRI : Operand { - let ParserMatchClass = PPCDispRIOperand; -} -def PPCDispRIXOperand : AsmOperandClass { - let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4"; - let RenderMethod = "addImmOperands"; -} -def dispRIX : Operand { - let ParserMatchClass = PPCDispRIXOperand; -} -def PPCDispRIHashOperand : AsmOperandClass { - let Name = "DispRIHash"; let PredicateMethod = "isHashImmX8"; - let RenderMethod = "addImmOperands"; -} -def dispRIHash : Operand { - let ParserMatchClass = PPCDispRIHashOperand; -} -def PPCDispRIX16Operand : AsmOperandClass { - let Name = "DispRIX16"; let PredicateMethod = "isS16ImmX16"; - let RenderMethod = "addImmOperands"; -} -def dispRIX16 : Operand { - let ParserMatchClass = PPCDispRIX16Operand; -} -def PPCDispSPE8Operand : AsmOperandClass { - let Name = "DispSPE8"; let PredicateMethod = "isU8ImmX8"; - let RenderMethod = "addImmOperands"; -} -def dispSPE8 : Operand { - let ParserMatchClass = PPCDispSPE8Operand; -} -def PPCDispSPE4Operand : AsmOperandClass { - let Name = "DispSPE4"; let PredicateMethod = "isU7ImmX4"; - let RenderMethod = "addImmOperands"; -} -def dispSPE4 : Operand { - let ParserMatchClass = PPCDispSPE4Operand; -} -def PPCDispSPE2Operand : AsmOperandClass { - let Name = "DispSPE2"; let PredicateMethod = "isU6ImmX2"; - let RenderMethod = "addImmOperands"; -} -def dispSPE2 : Operand { - let ParserMatchClass = PPCDispSPE2Operand; -} - -def memri : Operand { - let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg); - let EncoderMethod = "getMemRIEncoding"; - let DecoderMethod = "decodeMemRIOperands"; - let OperandType = "OPERAND_MEMORY"; -} -def memrr : Operand { - let PrintMethod = "printMemRegReg"; - let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg); - let OperandType = "OPERAND_MEMORY"; -} -def memrix : Operand { // memri where the imm is 4-aligned. - let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg); - let EncoderMethod = "getMemRIXEncoding"; - let DecoderMethod = "decodeMemRIXOperands"; - let OperandType = "OPERAND_MEMORY"; -} -def memrihash : Operand { - // memrihash 8-aligned for ROP Protection Instructions. - let PrintMethod = "printMemRegImmHash"; - let MIOperandInfo = (ops dispRIHash:$imm, ptr_rc_nor0:$reg); - let EncoderMethod = "getMemRIHashEncoding"; - let DecoderMethod = "decodeMemRIHashOperands"; - let OperandType = "OPERAND_MEMORY"; -} -def memrix16 : Operand { // memri, imm is 16-aligned, 12-bit, Inst{16:27} - let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops dispRIX16:$imm, ptr_rc_nor0:$reg); - let EncoderMethod = "getMemRIX16Encoding"; - let DecoderMethod = "decodeMemRIX16Operands"; - let OperandType = "OPERAND_MEMORY"; -} -def spe8dis : Operand { // SPE displacement where the imm is 8-aligned. - let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg); - let EncoderMethod = "getSPE8DisEncoding"; - let DecoderMethod = "decodeSPE8Operands"; - let OperandType = "OPERAND_MEMORY"; -} -def spe4dis : Operand { // SPE displacement where the imm is 4-aligned. - let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg); - let EncoderMethod = "getSPE4DisEncoding"; - let DecoderMethod = "decodeSPE4Operands"; - let OperandType = "OPERAND_MEMORY"; -} -def spe2dis : Operand { // SPE displacement where the imm is 2-aligned. - let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg); - let EncoderMethod = "getSPE2DisEncoding"; - let DecoderMethod = "decodeSPE2Operands"; - let OperandType = "OPERAND_MEMORY"; -} - -// A single-register address. This is used with the SjLj -// pseudo-instructions which translates to LD/LWZ. These instructions requires -// G8RC_NOX0 registers. -def memr : Operand { - let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg); - let OperandType = "OPERAND_MEMORY"; -} -def PPCTLSRegOperand : AsmOperandClass { - let Name = "TLSReg"; let PredicateMethod = "isTLSReg"; - let RenderMethod = "addTLSRegOperands"; -} -def tlsreg32 : Operand { - let EncoderMethod = "getTLSRegEncoding"; - let ParserMatchClass = PPCTLSRegOperand; -} -def tlsgd32 : Operand {} -def tlscall32 : Operand { - let PrintMethod = "printTLSCall"; - let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym); - let EncoderMethod = "getTLSCallEncoding"; -} - -// PowerPC Predicate operand. -def pred : Operand { - let PrintMethod = "printPredicateOperand"; - let MIOperandInfo = (ops i32imm:$bibo, crrc:$reg); -} - // Define PowerPC specific addressing mode. // d-form @@ -1212,6 +704,7 @@ def ModernAs: Predicate<"!Subtarget->isAIXABI() || Subtarget->HasModernAIXAs">, AssemblerPredicate<(any_of (not AIXOS), FeatureModernAIXAs)>; def IsAIX : Predicate<"Subtarget->isAIXABI()">; def NotAIX : Predicate<"!Subtarget->isAIXABI()">; +def IsISAFuture : Predicate<"Subtarget->isISAFuture()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. @@ -3056,6 +2549,13 @@ def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), PPC970_DGroup_First, PPC970_Unit_FXU; } +let hasSideEffects = 1, Defs = [CTR] in +def MTCTRPseudo : PPCEmitTimePseudo<(outs), (ins gprc:$rS), "#MTCTRPseudo", []>; + +let hasSideEffects = 1, Uses = [CTR], Defs = [CTR] in +def DecreaseCTRPseudo : PPCEmitTimePseudo<(outs crbitrc:$rT), (ins i32imm:$stride), + "#DecreaseCTRPseudo", []>; + let hasSideEffects = 0 in { let Defs = [LR] in { def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS), @@ -3069,6 +2569,22 @@ def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins), } } +let hasSideEffects = 1 in { + def MTUDSCR : XFXForm_7_ext<31, 467, 3, (outs), (ins gprc:$rX), + "mtspr 3, $rX", IIC_SprMTSPR>, + PPC970_DGroup_Single, PPC970_Unit_FXU; + def MFUDSCR : XFXForm_1_ext<31, 339, 3, (outs gprc:$rX), (ins), + "mfspr $rX, 3", IIC_SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +// Disable these alias on AIX since they are not supported. +let Predicates = [ModernAs] in { +// Aliases for moving to/from dscr to mtspr/mfspr +def : InstAlias<"mtudscr $Rx", (MTUDSCR gprc:$Rx)>; +def : InstAlias<"mfudscr $Rx", (MFUDSCR gprc:$Rx)>; +} + let isCodeGenOnly = 1 in { // Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed // like a GPR on the PPC970. As such, copies in and out have the same @@ -3728,12 +3244,12 @@ def : Pat<(fcopysign f32:$frB, f64:$frA), // XL Compat intrinsics. def : Pat<(int_ppc_fmsub f64:$A, f64:$B, f64:$C), (FMSUB $A, $B, $C)>; def : Pat<(int_ppc_fmsubs f32:$A, f32:$B, f32:$C), (FMSUBS $A, $B, $C)>; -def : Pat<(int_ppc_fnmsub f64:$A, f64:$B, f64:$C), (FNMSUB $A, $B, $C)>; -def : Pat<(int_ppc_fnmsubs f32:$A, f32:$B, f32:$C), (FNMSUBS $A, $B, $C)>; def : Pat<(int_ppc_fnmadd f64:$A, f64:$B, f64:$C), (FNMADD $A, $B, $C)>; def : Pat<(int_ppc_fnmadds f32:$A, f32:$B, f32:$C), (FNMADDS $A, $B, $C)>; def : Pat<(int_ppc_fre f64:$A), (FRE $A)>; def : Pat<(int_ppc_fres f32:$A), (FRES $A)>; +def : Pat<(int_ppc_fnabs f64:$A), (FNABSD $A)>; +def : Pat<(int_ppc_fnabss f32:$A), (FNABSS $A)>; include "PPCInstrAltivec.td" include "PPCInstrSPE.td" @@ -3748,7 +3264,8 @@ def : Pat<(not i1:$in), // Prefixed instructions may require access to the above defs at a later // time so we include this after the def. -include "PPCInstrPrefix.td" +include "PPCInstrP10.td" +include "PPCInstrMMA.td" // Patterns for arithmetic i1 operations. def : Pat<(add i1:$a, i1:$b), diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td new file mode 100644 index 000000000000..a7e85cda781f --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td @@ -0,0 +1,628 @@ + +// Mask immediates for MMA instructions (2, 4 and 8 bits). +def Msk2Imm : ImmLeaf(Imm); }]>; +def Msk4Imm : ImmLeaf(Imm); }]>; +def Msk8Imm : ImmLeaf(Imm); }]>; + +def MMA : Predicate<"Subtarget->hasMMA()">; + + +// Multiclass definitions for MMA accumulator instructions. +// ---------------------------------------------------------------------------- + +// Defines 2 unmasked instructions where the xo field for acc/non-acc version +// is even/odd. +multiclass ACC_UM_XOEO opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + let Predicates = [MMA] in { + def NAME : + XX3Form_AT3_XAB6, + RegConstraint<"@earlyclobber $AT">; + def PP : + XX3Form_AT3_XAB6, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits. +// The XO field for acc/non-acc version is even/odd. +multiclass ACC_UM_M844_XOEO opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + defm NAME : ACC_UM_XOEO; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4P8_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4P8_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits. +// The XO field for acc/non-acc version is even/odd. +multiclass ACC_UM_M444_XOEO opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + defm NAME : ACC_UM_XOEO; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XYP4_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XYP4_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits. +// The XO field for acc/non-acc version is even/odd. +multiclass ACC_UM_M244_XOEO opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + defm NAME : ACC_UM_XOEO; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits. +// Upper nibble of XO field for acc/non-acc version is 0x4/0x6. +multiclass ACC_UM_M244_XO46 opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + let Predicates = [MMA] in { + def NAME : + XX3Form_AT3_XAB6, + RegConstraint<"@earlyclobber $AT">; + def PP : + XX3Form_AT3_XAB6< + opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, xo, (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x20), (outs acc:$AT), + !con((ins acc:$ATi), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4 +// bits. Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_M244_XOM84C opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_UM_M244_XOEO; + let Predicates = [MMA] in { + def PN : XX3Form_AT3_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NP : XX3Form_AT3_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NN : XX3Form_AT3_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME#PN : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NP : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NN : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 5 instructions, unmasked, operand negating. +// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_XOM84C opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_UM_XOEO; + let Predicates = [MMA] in { + def PN : XX3Form_AT3_XAB6, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NP : XX3Form_AT3_XAB6, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NN : XX3Form_AT3_XAB6, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits. +// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_M44_XOM84C opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_NEG_UM_XOM84C; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#PN : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NP : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NN : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits. +// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_M42_XOM84C opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_NEG_UM_XOM84C; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#PN : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NP : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NN : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// End of class definitions. +//----------------------------------------------------------------------------- + +let Predicates = [MMA] in { + def XXMFACC : + XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS", + IIC_VecGeneral, + [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>, + RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">; + def XXMTACC : + XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT", + IIC_VecGeneral, + [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp), + "#KILL_PAIR", []>, + RegConstraint<"$XTp = $XSp">; + def BUILD_UACC : PPCPostRAExpPseudo<(outs acc:$AT), (ins uacc:$AS), + "#BUILD_UACC $AT, $AS", []>; + // We define XXSETACCZ as rematerializable to undo CSE of that intrinsic in + // the backend. We avoid CSE here because it generates a copy of the acc + // register and this copy is more expensive than calling the intrinsic again. + let isAsCheapAsAMove = 1, isReMaterializable = 1 in { + def XXSETACCZ : + XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral, + [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>; + } + def XVI8GER4SPP : + XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB), + "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + let mayStore = 1 in { + def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst), + "#SPILL_ACC", []>; + def SPILL_UACC: PPCEmitTimePseudo<(outs), (ins uacc:$AT, memrix16:$dst), + "#SPILL_UACC", []>; + } + let mayLoad = 1, hasSideEffects = 0 in { + def RESTORE_ACC: PPCEmitTimePseudo<(outs acc:$AT), (ins memrix16:$src), + "#RESTORE_ACC", []>; + def RESTORE_UACC: PPCEmitTimePseudo<(outs uacc:$AT), (ins memrix16:$src), + "#RESTORE_UACC", []>; + } +} + +let Predicates = [MMA, PrefixInstrs] in { + def PMXVI8GER4SPP : + MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT), + (ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK, + u4imm:$YMSK, u4imm:$PMSK), + "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK", + IIC_VecGeneral, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; +} + +// MMA accumulating/non-accumulating instructions. +//------------------------------------------------------------------------------ + +// XVBF16GER2, XVBF16GER2PP, XVBF16GER2PN, XVBF16GER2NP, XVBF16GER2NN +// PMXVBF16GER2, PMXVBF16GER2PP, PMXVBF16GER2PN, PMXVBF16GER2NP, PMXVBF16GER2NN +defm XVBF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 50, (ins vsrc:$XA, vsrc:$XB), + "xvbf16ger2", "$AT, $XA, $XB">; + +// XVI4GER8, XVI4GER8PP, PMXVI4GER8, PMXVI4GER8PP +defm XVI4GER8 : ACC_UM_M844_XOEO<59, 34, (ins vsrc:$XA, vsrc:$XB), + "xvi4ger8", "$AT, $XA, $XB">; + +// XVI8GER4, XVI8GER4PP, PMXVI8GER4, PMXVI8GER4PP +defm XVI8GER4 : ACC_UM_M444_XOEO<59, 2, (ins vsrc:$XA, vsrc:$XB), + "xvi8ger4", "$AT, $XA, $XB">; + +// XVI16GER2, XVI16GER2PP, PMXVI16GER2, PMXVI16GER2PP +defm XVI16GER2 : ACC_UM_M244_XO46<59, 75, (ins vsrc:$XA, vsrc:$XB), + "xvi16ger2", "$AT, $XA, $XB">; + +// XVI16GER2S, XVI16GER2SPP, PMXVI16GER2S, PMXVI16GER2SPP +defm XVI16GER2S : ACC_UM_M244_XOEO<59, 42, (ins vsrc:$XA, vsrc:$XB), + "xvi16ger2s", "$AT, $XA, $XB">; + +// XVF16GER2, XVF16GER2PP, XVF16GER2PN, XVF16GER2NP, XVF16GER2NN +// PMXVF16GER2, PMXVF16GER2PP, PMXVF16GER2PN, PMXVF16GER2NP, PMXVF16GER2NN +defm XVF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 18, (ins vsrc:$XA, vsrc:$XB), + "xvf16ger2", "$AT, $XA, $XB">; + +// XVF32GER, XVF32GERPP, XVF32GERPN, XVF32GERNP, XVF32GERPP +// PMXVF32GER, PMXVF32GERPP, PMXVF32GERPN, PMXVF32GERNP, PMXVF32GERPP +defm XVF32GER : ACC_NEG_UM_M44_XOM84C<59, 26, (ins vsrc:$XA, vsrc:$XB), + "xvf32ger", "$AT, $XA, $XB">; + +// XVF64GER, XVF64GERPP, XVF64GERPN, XVF64GERNP, XVF64GERNN +// PMXVF64GER, PMXVF64GERPP, PMXVF64GERPN, PMXVF64GERNP, PMXVF64GERNN +defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB), + "xvf64ger", "$AT, $XA, $XB">; +//------------------------------------------------------------------------------ + +// MMA Intrinsics +let Predicates = [MMA] in { + def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)), + (XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)), + (XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)), + (XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)), + (XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)), + (XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)), + (XVF64GER $XA, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)), + (XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), + (XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)), + (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), + (XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)), + (XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)), + (XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; +} + +// MMA Intrinsics +let Predicates = [MMA, PrefixInstrs] in { + def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk8Imm:$PMSK)), + (PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk8Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk8Imm:$PMSK)), + (PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk8Imm:$PMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk4Imm:$PMSK)), + (PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk4Imm:$PMSK)), + (PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)), + (PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK)), + (PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK)), + (PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK)), + (PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK)), + (PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)), + (PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk2Imm:$YMSK)), + (PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk2Imm:$YMSK)), + (PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk2Imm:$YMSK)), + (PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk2Imm:$YMSK)), + (PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; +} + +def ConcatsMMA { + dag VecsToVecPair0 = + (v256i1 (INSERT_SUBREG + (INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1), + $vs1, sub_vsx0)); + dag VecsToVecPair1 = + (v256i1 (INSERT_SUBREG + (INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1), + $vs3, sub_vsx0)); + dag VecsToVecQuad = + (BUILD_UACC (INSERT_SUBREG + (INSERT_SUBREG (v512i1 (IMPLICIT_DEF)), + (KILL_PAIR VecsToVecPair0), sub_pair0), + (KILL_PAIR VecsToVecPair1), sub_pair1)); +} + +def Extracts { + dag Pair0 = (v256i1 (EXTRACT_SUBREG $v, sub_pair0)); + dag Pair1 = (v256i1 (EXTRACT_SUBREG $v, sub_pair1)); + dag Vec0 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx0)); + dag Vec1 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx1)); + dag Vec2 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx0)); + dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1)); +} + +let Predicates = [MMA] in { + def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)), + (XXMTACC ConcatsMMA.VecsToVecQuad)>; + def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0, + v16i8:$vs3, v16i8:$vs2)), + (XXMTACC ConcatsMMA.VecsToVecQuad)>; + def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>; + def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 0)), + Extracts.Vec0>; + def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 1)), + Extracts.Vec1>; + def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 2)), + Extracts.Vec2>; + def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 3)), + Extracts.Vec3>; +} + + diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td new file mode 100644 index 000000000000..6cf3f1d3341e --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -0,0 +1,2315 @@ +//===-- PPCInstrP10.td - Power10 Instruction Set -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the instructions introduced for the Power10 CPU. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Naming convention for future instruction formats +// +// {_}+ +// +// Where: +// - name of instruction format as per the ISA +// (X-Form, VX-Form, etc.) +// - operand type +// * FRT/RT/VT/XT/BT - target register +// (FPR, GPR, VR, VSR, CR-bit respectively) +// In some situations, the 'T' is replaced by +// 'D' when describing the target register. +// * [FR|R|V|X|B][A-Z] - register source (i.e. FRA, RA, XB, etc.) +// * IMM - immediate (where signedness matters, +// this is SI/UI for signed/unsigned) +// * [R|X|FR]Tp - register pair target (i.e. FRTp, RTp) +// * R - PC-Relative bit +// (denotes that the address is computed pc-relative) +// * VRM - Masked Registers +// * AT - target accumulator +// * N - the Nth bit in a VSR +// * Additional 1-bit operands may be required for certain +// instruction formats such as: MC, P, MP +// * X / Y / P - mask values. In the instruction encoding, this is +// represented as XMSK, YMSK and PMSK. +// * MEM - indicates if the instruction format requires any memory +// accesses. This does not have attached to it. +// - the length of each operand in bits. +// For operands that are 1 bit, the '1' is omitted from the name. +// +// Example: 8RR_XX4Form_IMM8_XTAB6 +// 8RR_XX4Form is the instruction format. +// The operand is an 8-bit immediate (IMM), the destination (XT) +// and sources (XA, XB) that are all 6-bits. The destination and +// source registers are combined if they are of the same length. +// Moreover, the order of operands reflects the order of operands +// in the encoding. + +//-------------------------- Predicate definitions ---------------------------// +def IsPPC32 : Predicate<"!Subtarget->isPPC64()">; + + +//===----------------------------------------------------------------------===// +// PowerPC ISA 3.1 specific type constraints. +// + +def SDT_PPCSplat32 : SDTypeProfile<1, 3, [ SDTCisVT<0, v2i64>, + SDTCisVec<1>, SDTCisInt<2>, SDTCisInt<3> +]>; +def SDT_PPCAccBuild : SDTypeProfile<1, 4, [ + SDTCisVT<0, v512i1>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>, + SDTCisVT<3, v4i32>, SDTCisVT<4, v4i32> +]>; +def SDT_PPCPairBuild : SDTypeProfile<1, 2, [ + SDTCisVT<0, v256i1>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32> +]>; +def SDT_PPCAccExtractVsx : SDTypeProfile<1, 2, [ + SDTCisVT<0, v4i32>, SDTCisVT<1, v512i1>, SDTCisPtrTy<2> +]>; +def SDT_PPCPairExtractVsx : SDTypeProfile<1, 2, [ + SDTCisVT<0, v4i32>, SDTCisVT<1, v256i1>, SDTCisPtrTy<2> +]>; +def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [ + SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1> +]>; + +//===----------------------------------------------------------------------===// +// ISA 3.1 specific PPCISD nodes. +// + +def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>; +def PPCAccBuild : SDNode<"PPCISD::ACC_BUILD", SDT_PPCAccBuild, []>; +def PPCPairBuild : SDNode<"PPCISD::PAIR_BUILD", SDT_PPCPairBuild, []>; +def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx, + []>; +def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx, + []>; +def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>; + +//===----------------------------------------------------------------------===// + +// PC Relative flag (for instructions that use the address of the prefix for +// address computations). +class isPCRel { bit PCRel = 1; } + +// PowerPC specific type constraints. +def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [ + SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2> +]>; + +// PPC Specific DAG Nodes. +def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX, + [SDNPHasChain, SDNPMayLoad]>; + +// Top-level class for prefixed instructions. +class PI pref, bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> : Instruction { + field bits<64> Inst; + field bits<64> SoftFail = 0; + bit PCRel = 0; // Default value, set by isPCRel. + let Size = 8; + + let Namespace = "PPC"; + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Itinerary = itin; + let Inst{0-5} = pref; + let Inst{32-37} = opcode; + + bits<1> PPC970_First = 0; + bits<1> PPC970_Single = 0; + bits<1> PPC970_Cracked = 0; + bits<3> PPC970_Unit = 0; + + /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to + /// these must be reflected there! See comments there for what these are. + let TSFlags{0} = PPC970_First; + let TSFlags{1} = PPC970_Single; + let TSFlags{2} = PPC970_Cracked; + let TSFlags{5-3} = PPC970_Unit; + + bits<1> Prefixed = 1; // This is a prefixed instruction. + let TSFlags{7} = Prefixed; + + // For cases where multiple instruction definitions really represent the + // same underlying instruction but with one definition for 64-bit arguments + // and one for 32-bit arguments, this bit breaks the degeneracy between + // the two forms and allows TableGen to generate mapping tables. + bit Interpretation64Bit = 0; + + // Fields used for relation models. + string BaseName = ""; +} + +// VX-Form: [ PO VT R VB RC XO ] +class VXForm_VTB5_RC xo, bits<5> R, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VT; + bits<5> VB; + bit RC = 0; + + let Pattern = pattern; + + let Inst{6-10} = VT; + let Inst{11-15} = R; + let Inst{16-20} = VB; + let Inst{21} = RC; + let Inst{22-31} = xo; +} + +// Multiclass definition to account for record and non-record form +// instructions of VXRForm. +multiclass VXForm_VTB5_RCr xo, bits<5> R, dag OOL, dag IOL, + string asmbase, string asmstr, + InstrItinClass itin, list pattern> { + let BaseName = asmbase in { + def NAME : VXForm_VTB5_RC, RecFormRel; + let Defs = [CR6] in + def _rec : VXForm_VTB5_RC, isRecordForm, RecFormRel; + } +} + +class MLS_DForm_R_SI34_RTA5_MEM opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<5> FRS; + bits<39> D_RA; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 2; + let Inst{8-10} = 0; + let Inst{11} = PCRel; + let Inst{12-13} = 0; + let Inst{14-31} = D_RA{33-16}; // d0 + + // The instruction. + let Inst{38-42} = FRS{4-0}; + let Inst{43-47} = D_RA{38-34}; // RA + let Inst{48-63} = D_RA{15-0}; // d1 +} + +class MLS_DForm_R_SI34_RTA5 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<5> RA; + bits<34> SI; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 2; + let Inst{8-10} = 0; + let Inst{11} = PCRel; + let Inst{12-13} = 0; + let Inst{14-31} = SI{33-16}; + + // The instruction. + let Inst{38-42} = RT; + let Inst{43-47} = RA; + let Inst{48-63} = SI{15-0}; +} + +class MLS_DForm_SI34_RT5 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<34> SI; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 2; + let Inst{8-10} = 0; + let Inst{11} = 0; + let Inst{12-13} = 0; + let Inst{14-31} = SI{33-16}; + + // The instruction. + let Inst{38-42} = RT; + let Inst{43-47} = 0; + let Inst{48-63} = SI{15-0}; +} + +multiclass MLS_DForm_R_SI34_RTA5_p opcode, dag OOL, dag IOL, + dag PCRel_IOL, string asmstr, + InstrItinClass itin> { + def NAME : MLS_DForm_R_SI34_RTA5; + def pc : MLS_DForm_R_SI34_RTA5, isPCRel; +} + +class 8LS_DForm_R_SI34_RTA5_MEM opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<39> D_RA; + + let Pattern = pattern; + + // The prefix. + let Inst{6-10} = 0; + let Inst{11} = PCRel; + let Inst{12-13} = 0; + let Inst{14-31} = D_RA{33-16}; // d0 + + // The instruction. + let Inst{38-42} = RT{4-0}; + let Inst{43-47} = D_RA{38-34}; // RA + let Inst{48-63} = D_RA{15-0}; // d1 +} + +// 8LS:D-Form: [ 1 0 0 // R // d0 +// PO TX T RA d1 ] +class 8LS_DForm_R_SI34_XT6_RA5_MEM opcode, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, { opcode, ? }, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<39> D_RA; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 0; + let Inst{8} = 0; + let Inst{9-10} = 0; // reserved + let Inst{11} = PCRel; + let Inst{12-13} = 0; // reserved + let Inst{14-31} = D_RA{33-16}; // d0 + + // The instruction. + let Inst{37} = XT{5}; + let Inst{38-42} = XT{4-0}; + let Inst{43-47} = D_RA{38-34}; // RA + let Inst{48-63} = D_RA{15-0}; // d1 +} + +// X-Form: [PO T IMM VRB XO TX] +class XForm_XT6_IMM5_VB5 opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : I { + bits<6> XT; + bits<5> VRB; + bits<5> IMM; + + let Pattern = pattern; + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = IMM; + let Inst{16-20} = VRB; + let Inst{21-30} = xo; + let Inst{31} = XT{5}; +} + +class 8RR_XX4Form_IMM8_XTAB6 opcode, bits<2> xo, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<6> XC; + bits<8> IMM; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 1; + let Inst{8} = 0; + let Inst{9-11} = 0; + let Inst{12-13} = 0; + let Inst{14-23} = 0; + let Inst{24-31} = IMM; + + // The instruction. + let Inst{38-42} = XT{4-0}; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-57} = XC{4-0}; + let Inst{58-59} = xo; + let Inst{60} = XC{5}; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = XT{5}; +} + +class VXForm_RD5_N3_VB5 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> RD; + bits<5> VB; + bits<3> N; + + let Pattern = pattern; + + let Inst{6-10} = RD; + let Inst{11-12} = 0; + let Inst{13-15} = N; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + + +// VX-Form: [PO VRT RA VRB XO]. +// Destructive (insert) forms are suffixed with _ins. +class VXForm_VTB5_RA5_ins xo, string opc, list pattern> + : VXForm_1, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; + +// VX-Form: [PO VRT RA RB XO]. +// Destructive (insert) forms are suffixed with _ins. +class VXForm_VRT5_RAB5_ins xo, string opc, list pattern> + : VXForm_1, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; + +// VX-Form: [ PO BF // VRA VRB XO ] +class VXForm_BF3_VAB5 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<5> VA; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-8} = BF; + let Inst{9-10} = 0; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + +// VN-Form: [PO VRT VRA VRB PS SD XO] +// SD is "Shift Direction" +class VNForm_VTAB5_SD3 xo, bits<2> ps, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VRT; + bits<5> VRA; + bits<5> VRB; + bits<3> SD; + + let Pattern = pattern; + + let Inst{6-10} = VRT; + let Inst{11-15} = VRA; + let Inst{16-20} = VRB; + let Inst{21-22} = ps; + let Inst{23-25} = SD; + let Inst{26-31} = xo; +} + +class VXForm_RD5_MP_VB5 xo, bits<4> eo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> RD; + bits<5> VB; + bit MP; + + let Pattern = pattern; + + let Inst{6-10} = RD; + let Inst{11-14} = eo; + let Inst{15} = MP; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + +// 8RR:D-Form: [ 1 1 0 // // imm0 +// PO T XO TX imm1 ]. +class 8RR_DForm_IMM32_XT6 opcode, bits<4> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<32> IMM32; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 1; + let Inst{8-11} = 0; + let Inst{12-13} = 0; // reserved + let Inst{14-15} = 0; // reserved + let Inst{16-31} = IMM32{31-16}; + + // The instruction. + let Inst{38-42} = XT{4-0}; + let Inst{43-46} = xo; + let Inst{47} = XT{5}; + let Inst{48-63} = IMM32{15-0}; +} + +// 8RR:D-Form: [ 1 1 0 // // imm0 +// PO T XO IX TX imm1 ]. +class 8RR_DForm_IMM32_XT6_IX opcode, bits<3> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bit IX; + bits<32> IMM32; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 1; + let Inst{8-11} = 0; + let Inst{12-13} = 0; // reserved + let Inst{14-15} = 0; // reserved + let Inst{16-31} = IMM32{31-16}; + + // The instruction. + let Inst{38-42} = XT{4-0}; + let Inst{43-45} = xo; + let Inst{46} = IX; + let Inst{47} = XT{5}; + let Inst{48-63} = IMM32{15-0}; +} + +class 8RR_XX4Form_XTABC6 opcode, bits<2> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<6> XC; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 1; + let Inst{8-11} = 0; + let Inst{12-13} = 0; + let Inst{14-31} = 0; + + // The instruction. + let Inst{38-42} = XT{4-0}; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-57} = XC{4-0}; + let Inst{58-59} = xo; + let Inst{60} = XC{5}; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = XT{5}; +} + +class 8RR_XX4Form_IMM3_XTABC6 opcode, bits<2> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<6> XC; + bits<3> IMM; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 1; + let Inst{8-11} = 0; + let Inst{12-13} = 0; + let Inst{14-28} = 0; + let Inst{29-31} = IMM; + + // The instruction. + let Inst{38-42} = XT{4-0}; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-57} = XC{4-0}; + let Inst{58-59} = xo; + let Inst{60} = XC{5}; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = XT{5}; +} + +// [PO BF / XO2 B XO BX /] +class XX2_BF3_XO5_XB6_XO9 opcode, bits<5> xo2, bits<9> xo, dag OOL, + dag IOL, string asmstr, InstrItinClass itin, + list pattern> + : I { + bits<3> BF; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-8} = BF; + let Inst{9-10} = 0; + let Inst{11-15} = xo2; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +// X-Form: [ PO RT BI /// XO / ] +class XForm_XT5_BI5 opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : XForm_base_r3xo { + let B = 0; +} + +multiclass MLS_DForm_R_SI34_RTA5_MEM_p opcode, dag OOL, dag IOL, + dag PCRel_IOL, string asmstr, + InstrItinClass itin> { + def NAME : MLS_DForm_R_SI34_RTA5_MEM; + def pc : MLS_DForm_R_SI34_RTA5_MEM, + isPCRel; +} + +multiclass 8LS_DForm_R_SI34_RTA5_MEM_p opcode, dag OOL, dag IOL, + dag PCRel_IOL, string asmstr, + InstrItinClass itin> { + def NAME : 8LS_DForm_R_SI34_RTA5_MEM; + def pc : 8LS_DForm_R_SI34_RTA5_MEM, + isPCRel; +} + +multiclass 8LS_DForm_R_SI34_XT6_RA5_MEM_p opcode, dag OOL, dag IOL, + dag PCRel_IOL, string asmstr, + InstrItinClass itin> { + def NAME : 8LS_DForm_R_SI34_XT6_RA5_MEM; + def pc : 8LS_DForm_R_SI34_XT6_RA5_MEM, + isPCRel; +} + +def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">; +def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">; +def PairedVectorMemops : Predicate<"Subtarget->pairedVectorMemops()">; +def RCCp { + dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC); + dag BToVSRC = (COPY_TO_REGCLASS $XB, VSRC); +} + +let Predicates = [PrefixInstrs] in { + let Interpretation64Bit = 1, isCodeGenOnly = 1 in { + defm PADDI8 : + MLS_DForm_R_SI34_RTA5_p<14, (outs g8rc:$RT), (ins g8rc:$RA, s34imm:$SI), + (ins immZero:$RA, s34imm_pcrel:$SI), + "paddi $RT, $RA, $SI", IIC_LdStLFD>; + let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { + def PLI8 : MLS_DForm_SI34_RT5<14, (outs g8rc:$RT), + (ins s34imm:$SI), + "pli $RT, $SI", IIC_IntSimple, []>; + } + } + defm PADDI : + MLS_DForm_R_SI34_RTA5_p<14, (outs gprc:$RT), (ins gprc:$RA, s34imm:$SI), + (ins immZero:$RA, s34imm_pcrel:$SI), + "paddi $RT, $RA, $SI", IIC_LdStLFD>; + let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { + def PLI : MLS_DForm_SI34_RT5<14, (outs gprc:$RT), + (ins s34imm:$SI), + "pli $RT, $SI", IIC_IntSimple, []>; + } + + let mayLoad = 1, mayStore = 0 in { + defm PLXV : + 8LS_DForm_R_SI34_XT6_RA5_MEM_p<25, (outs vsrc:$XT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), + "plxv $XT, $D_RA", IIC_LdStLFD>; + defm PLFS : + MLS_DForm_R_SI34_RTA5_MEM_p<48, (outs f4rc:$FRT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), "plfs $FRT, $D_RA", + IIC_LdStLFD>; + defm PLFD : + MLS_DForm_R_SI34_RTA5_MEM_p<50, (outs f8rc:$FRT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), "plfd $FRT, $D_RA", + IIC_LdStLFD>; + defm PLXSSP : + 8LS_DForm_R_SI34_RTA5_MEM_p<43, (outs vfrc:$VRT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), + "plxssp $VRT, $D_RA", IIC_LdStLFD>; + defm PLXSD : + 8LS_DForm_R_SI34_RTA5_MEM_p<42, (outs vfrc:$VRT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), + "plxsd $VRT, $D_RA", IIC_LdStLFD>; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in { + defm PLBZ8 : + MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs g8rc:$RT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), "plbz $RT, $D_RA", + IIC_LdStLFD>; + defm PLHZ8 : + MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs g8rc:$RT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), "plhz $RT, $D_RA", + IIC_LdStLFD>; + defm PLHA8 : + MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs g8rc:$RT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), "plha $RT, $D_RA", + IIC_LdStLFD>; + defm PLWA8 : + 8LS_DForm_R_SI34_RTA5_MEM_p<41, (outs g8rc:$RT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), + "plwa $RT, $D_RA", IIC_LdStLFD>; + defm PLWZ8 : + MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs g8rc:$RT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), "plwz $RT, $D_RA", + IIC_LdStLFD>; + } + defm PLBZ : + MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs gprc:$RT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), "plbz $RT, $D_RA", + IIC_LdStLFD>; + defm PLHZ : + MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs gprc:$RT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), "plhz $RT, $D_RA", + IIC_LdStLFD>; + defm PLHA : + MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs gprc:$RT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), "plha $RT, $D_RA", + IIC_LdStLFD>; + defm PLWZ : + MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs gprc:$RT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), "plwz $RT, $D_RA", + IIC_LdStLFD>; + defm PLWA : + 8LS_DForm_R_SI34_RTA5_MEM_p<41, (outs gprc:$RT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), "plwa $RT, $D_RA", + IIC_LdStLFD>; + defm PLD : + 8LS_DForm_R_SI34_RTA5_MEM_p<57, (outs g8rc:$RT), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), "pld $RT, $D_RA", + IIC_LdStLFD>; + } + + let mayStore = 1, mayLoad = 0 in { + defm PSTXV : + 8LS_DForm_R_SI34_XT6_RA5_MEM_p<27, (outs), (ins vsrc:$XS, memri34:$D_RA), + (ins vsrc:$XS, memri34_pcrel:$D_RA), + "pstxv $XS, $D_RA", IIC_LdStLFD>; + defm PSTFS : + MLS_DForm_R_SI34_RTA5_MEM_p<52, (outs), (ins f4rc:$FRS, memri34:$D_RA), + (ins f4rc:$FRS, memri34_pcrel:$D_RA), + "pstfs $FRS, $D_RA", IIC_LdStLFD>; + defm PSTFD : + MLS_DForm_R_SI34_RTA5_MEM_p<54, (outs), (ins f8rc:$FRS, memri34:$D_RA), + (ins f8rc:$FRS, memri34_pcrel:$D_RA), + "pstfd $FRS, $D_RA", IIC_LdStLFD>; + defm PSTXSSP : + 8LS_DForm_R_SI34_RTA5_MEM_p<47, (outs), (ins vfrc:$VRS, memri34:$D_RA), + (ins vfrc:$VRS, memri34_pcrel:$D_RA), + "pstxssp $VRS, $D_RA", IIC_LdStLFD>; + defm PSTXSD : + 8LS_DForm_R_SI34_RTA5_MEM_p<46, (outs), (ins vfrc:$VRS, memri34:$D_RA), + (ins vfrc:$VRS, memri34_pcrel:$D_RA), + "pstxsd $VRS, $D_RA", IIC_LdStLFD>; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in { + defm PSTB8 : + MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins g8rc:$RS, memri34:$D_RA), + (ins g8rc:$RS, memri34_pcrel:$D_RA), + "pstb $RS, $D_RA", IIC_LdStLFD>; + defm PSTH8 : + MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins g8rc:$RS, memri34:$D_RA), + (ins g8rc:$RS, memri34_pcrel:$D_RA), + "psth $RS, $D_RA", IIC_LdStLFD>; + defm PSTW8 : + MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins g8rc:$RS, memri34:$D_RA), + (ins g8rc:$RS, memri34_pcrel:$D_RA), + "pstw $RS, $D_RA", IIC_LdStLFD>; + } + defm PSTB : + MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins gprc:$RS, memri34:$D_RA), + (ins gprc:$RS, memri34_pcrel:$D_RA), + "pstb $RS, $D_RA", IIC_LdStLFD>; + defm PSTH : + MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins gprc:$RS, memri34:$D_RA), + (ins gprc:$RS, memri34_pcrel:$D_RA), + "psth $RS, $D_RA", IIC_LdStLFD>; + defm PSTW : + MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins gprc:$RS, memri34:$D_RA), + (ins gprc:$RS, memri34_pcrel:$D_RA), + "pstw $RS, $D_RA", IIC_LdStLFD>; + defm PSTD : + 8LS_DForm_R_SI34_RTA5_MEM_p<61, (outs), (ins g8rc:$RS, memri34:$D_RA), + (ins g8rc:$RS, memri34_pcrel:$D_RA), + "pstd $RS, $D_RA", IIC_LdStLFD>; + } +} + +class DQForm_XTp5_RA17_MEM opcode, bits<4> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : I { + bits<5> XTp; + bits<17> DQ_RA; + let Pattern = pattern; + + let Inst{6-9} = XTp{3-0}; + let Inst{10} = XTp{4}; + let Inst{11-15} = DQ_RA{16-12}; // Register # + let Inst{16-27} = DQ_RA{11-0}; // Displacement. + let Inst{28-31} = xo; +} + +class XForm_XTp5_XAB5 opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : I, XFormMemOp { + bits<5> XTp; + bits<5> A; + bits<5> B; + + let Pattern = pattern; + let Inst{6-9} = XTp{3-0}; + let Inst{10} = XTp{4}; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class 8LS_DForm_R_XTp5_SI34_MEM opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<5> XTp; + bits<39> D_RA; + + let Pattern = pattern; + + // The prefix. + let Inst{6-10} = 0; + let Inst{11} = PCRel; + let Inst{12-13} = 0; + let Inst{14-31} = D_RA{33-16}; // Imm18 + + // The instruction. + let Inst{38-41} = XTp{3-0}; + let Inst{42} = XTp{4}; + let Inst{43-47} = D_RA{38-34}; // Register # + let Inst{48-63} = D_RA{15-0}; // D +} + +multiclass 8LS_DForm_R_XTp5_SI34_MEM_p opcode, dag OOL, + dag IOL, dag PCRel_IOL, + string asmstr, InstrItinClass itin> { + def NAME : 8LS_DForm_R_XTp5_SI34_MEM; + def pc : 8LS_DForm_R_XTp5_SI34_MEM, + isPCRel; +} + + + +// [PO AS XO2 XO] +class XForm_AT3 opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : I { + bits<3> AT; + + let Pattern = pattern; + + let Inst{6-8} = AT; + let Inst{9-10} = 0; + let Inst{11-15} = xo2; + let Inst{16-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +// X-Form: [ PO T EO UIM XO TX ] +class XForm_XT6_IMM5 opcode, bits<5> eo, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : I { + bits<6> XT; + bits<5> UIM; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = eo; + let Inst{16-20} = UIM; + let Inst{21-30} = xo; + let Inst{31} = XT{5}; +} + +class XX3Form_AT3_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : I { + bits<3> AT; + bits<6> XA; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-8} = AT; + let Inst{9-10} = 0; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +class MMIRR_XX3Form_XY4P2_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<4> YMSK; + bits<2> PMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-15} = 0; + let Inst{16-17} = PMSK; + let Inst{18-23} = 0; + let Inst{24-27} = XMSK; + let Inst{28-31} = YMSK; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + +class MMIRR_XX3Form_XY4_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<4> YMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-23} = 0; + let Inst{24-27} = XMSK; + let Inst{28-31} = YMSK; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + +class MMIRR_XX3Form_X4Y2_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<2> YMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-23} = 0; + let Inst{24-27} = XMSK; + let Inst{28-29} = YMSK; + let Inst{30-31} = 0; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + +class MMIRR_XX3Form_XY4P8_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<4> YMSK; + bits<8> PMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-15} = 0; + let Inst{16-23} = PMSK; + let Inst{24-27} = XMSK; + let Inst{28-31} = YMSK; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + +class MMIRR_XX3Form_XYP4_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<4> YMSK; + bits<4> PMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-15} = 0; + let Inst{16-19} = PMSK; + let Inst{20-23} = 0; + let Inst{24-27} = XMSK; + let Inst{28-31} = YMSK; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + + + +def Concats { + dag VecsToVecPair0 = + (v256i1 (INSERT_SUBREG + (INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1), + $vs1, sub_vsx0)); + dag VecsToVecPair1 = + (v256i1 (INSERT_SUBREG + (INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1), + $vs3, sub_vsx0)); +} + +let Predicates = [PairedVectorMemops] in { + def : Pat<(v256i1 (PPCPairBuild v4i32:$vs1, v4i32:$vs0)), + Concats.VecsToVecPair0>; + def : Pat<(v256i1 (int_ppc_vsx_assemble_pair v16i8:$vs1, v16i8:$vs0)), + Concats.VecsToVecPair0>; + def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, 0)), + (v4i32 (EXTRACT_SUBREG $v, sub_vsx0))>; + def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, 1)), + (v4i32 (EXTRACT_SUBREG $v, sub_vsx1))>; +} + +let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops] in { + def LXVP : DQForm_XTp5_RA17_MEM<6, 0, (outs vsrprc:$XTp), + (ins memrix16:$DQ_RA), "lxvp $XTp, $DQ_RA", + IIC_LdStLFD, []>; + def LXVPX : XForm_XTp5_XAB5<31, 333, (outs vsrprc:$XTp), (ins memrr:$src), + "lxvpx $XTp, $src", IIC_LdStLFD, + []>; +} + +let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops] in { + def STXVP : DQForm_XTp5_RA17_MEM<6, 1, (outs), (ins vsrprc:$XTp, + memrix16:$DQ_RA), "stxvp $XTp, $DQ_RA", + IIC_LdStLFD, []>; + def STXVPX : XForm_XTp5_XAB5<31, 461, (outs), (ins vsrprc:$XTp, memrr:$dst), + "stxvpx $XTp, $dst", IIC_LdStLFD, + []>; +} + +let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops, PrefixInstrs] in { + defm PLXVP : + 8LS_DForm_R_XTp5_SI34_MEM_p<58, (outs vsrprc:$XTp), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), "plxvp $XTp, $D_RA", + IIC_LdStLFD>; +} + +let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] in { + defm PSTXVP : + 8LS_DForm_R_XTp5_SI34_MEM_p<62, (outs), (ins vsrprc:$XTp, memri34:$D_RA), + (ins vsrprc:$XTp, memri34_pcrel:$D_RA), + "pstxvp $XTp, $D_RA", IIC_LdStLFD>; +} + +let Predicates = [PairedVectorMemops] in { + // Intrinsics for Paired Vector Loads. + def : Pat<(v256i1 (int_ppc_vsx_lxvp DQForm:$src)), (LXVP memrix16:$src)>; + def : Pat<(v256i1 (int_ppc_vsx_lxvp XForm:$src)), (LXVPX XForm:$src)>; + let Predicates = [PairedVectorMemops, PrefixInstrs] in { + def : Pat<(v256i1 (int_ppc_vsx_lxvp PDForm:$src)), (PLXVP memri34:$src)>; + } + // Intrinsics for Paired Vector Stores. + def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, DQForm:$dst), + (STXVP $XSp, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, XForm:$dst), + (STXVPX $XSp, XForm:$dst)>; + let Predicates = [PairedVectorMemops, PrefixInstrs] in { + def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, PDForm:$dst), + (PSTXVP $XSp, memri34:$dst)>; + } +} + +let Predicates = [PCRelativeMemops] in { + // Load i32 + def : Pat<(i32 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))), + (PLBZpc $ga, 0)>; + def : Pat<(i32 (extloadi1 (PPCmatpcreladdr PCRelForm:$ga))), + (PLBZpc $ga, 0)>; + def : Pat<(i32 (zextloadi8 (PPCmatpcreladdr PCRelForm:$ga))), + (PLBZpc $ga, 0)>; + def : Pat<(i32 (extloadi8 (PPCmatpcreladdr PCRelForm:$ga))), + (PLBZpc $ga, 0)>; + def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), + (PLHApc $ga, 0)>; + def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), + (PLHZpc $ga, 0)>; + def : Pat<(i32 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))), + (PLHZpc $ga, 0)>; + def : Pat<(i32 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLWZpc $ga, 0)>; + + // Store i32 + def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)), + (PSTBpc $RS, $ga, 0)>; + def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)), + (PSTHpc $RS, $ga, 0)>; + def : Pat<(store i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)), + (PSTWpc $RS, $ga, 0)>; + + // Load i64 + def : Pat<(i64 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))), + (PLBZ8pc $ga, 0)>; + def : Pat<(i64 (extloadi1 (PPCmatpcreladdr PCRelForm:$ga))), + (PLBZ8pc $ga, 0)>; + def : Pat<(i64 (zextloadi8 (PPCmatpcreladdr PCRelForm:$ga))), + (PLBZ8pc $ga, 0)>; + def : Pat<(i64 (extloadi8 (PPCmatpcreladdr PCRelForm:$ga))), + (PLBZ8pc $ga, 0)>; + def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), + (PLHA8pc $ga, 0)>; + def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), + (PLHZ8pc $ga, 0)>; + def : Pat<(i64 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))), + (PLHZ8pc $ga, 0)>; + def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr PCRelForm:$ga))), + (PLWZ8pc $ga, 0)>; + def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr PCRelForm:$ga))), + (PLWA8pc $ga, 0)>; + def : Pat<(i64 (extloadi32 (PPCmatpcreladdr PCRelForm:$ga))), + (PLWZ8pc $ga, 0)>; + def : Pat<(i64 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLDpc $ga, 0)>; + + // Store i64 + def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), + (PSTB8pc $RS, $ga, 0)>; + def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), + (PSTH8pc $RS, $ga, 0)>; + def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), + (PSTW8pc $RS, $ga, 0)>; + def : Pat<(store i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), + (PSTDpc $RS, $ga, 0)>; + + // Load f32 + def : Pat<(f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFSpc $addr, 0)>; + + // Store f32 + def : Pat<(store f32:$FRS, (PPCmatpcreladdr PCRelForm:$ga)), + (PSTFSpc $FRS, $ga, 0)>; + + // Load f64 + def : Pat<(f64 (extloadf32 (PPCmatpcreladdr PCRelForm:$addr))), + (COPY_TO_REGCLASS (PLFSpc $addr, 0), VSFRC)>; + def : Pat<(f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFDpc $addr, 0)>; + + // Store f64 + def : Pat<(store f64:$FRS, (PPCmatpcreladdr PCRelForm:$ga)), + (PSTFDpc $FRS, $ga, 0)>; + + // Load f128 + def : Pat<(f128 (load (PPCmatpcreladdr PCRelForm:$addr))), + (COPY_TO_REGCLASS (PLXVpc $addr, 0), VRRC)>; + + // Store f128 + def : Pat<(store f128:$XS, (PPCmatpcreladdr PCRelForm:$ga)), + (PSTXVpc (COPY_TO_REGCLASS $XS, VSRC), $ga, 0)>; + + // Load v4i32 + def : Pat<(v4i32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; + + // Store v4i32 + def : Pat<(store v4i32:$XS, (PPCmatpcreladdr PCRelForm:$ga)), + (PSTXVpc $XS, $ga, 0)>; + + // Load v2i64 + def : Pat<(v2i64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; + + // Store v2i64 + def : Pat<(store v2i64:$XS, (PPCmatpcreladdr PCRelForm:$ga)), + (PSTXVpc $XS, $ga, 0)>; + + // Load v4f32 + def : Pat<(v4f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; + + // Store v4f32 + def : Pat<(store v4f32:$XS, (PPCmatpcreladdr PCRelForm:$ga)), + (PSTXVpc $XS, $ga, 0)>; + + // Load v2f64 + def : Pat<(v2f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; + + // Store v2f64 + def : Pat<(store v2f64:$XS, (PPCmatpcreladdr PCRelForm:$ga)), + (PSTXVpc $XS, $ga, 0)>; + + // Atomic Load + def : Pat<(atomic_load_8 (PPCmatpcreladdr PCRelForm:$ga)), + (PLBZpc $ga, 0)>; + def : Pat<(atomic_load_16 (PPCmatpcreladdr PCRelForm:$ga)), + (PLHZpc $ga, 0)>; + def : Pat<(atomic_load_32 (PPCmatpcreladdr PCRelForm:$ga)), + (PLWZpc $ga, 0)>; + def : Pat<(atomic_load_64 (PPCmatpcreladdr PCRelForm:$ga)), + (PLDpc $ga, 0)>; + + // Atomic Store + def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS), + (PSTBpc $RS, $ga, 0)>; + def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS), + (PSTHpc $RS, $ga, 0)>; + def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS), + (PSTWpc $RS, $ga, 0)>; + def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), + (PSTB8pc $RS, $ga, 0)>; + def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), + (PSTH8pc $RS, $ga, 0)>; + def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), + (PSTW8pc $RS, $ga, 0)>; + def : Pat<(atomic_store_64 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), + (PSTDpc $RS, $ga, 0)>; + + // Special Cases For PPCstore_scal_int_from_vsr + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), + (PPCmatpcreladdr PCRelForm:$dst), 8), + (PSTXSDpc (XSCVDPSXDS f64:$src), $dst, 0)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), + (PPCmatpcreladdr PCRelForm:$dst), 8), + (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), $dst, 0)>; + + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), + (PPCmatpcreladdr PCRelForm:$dst), 8), + (PSTXSDpc (XSCVDPUXDS f64:$src), $dst, 0)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), + (PPCmatpcreladdr PCRelForm:$dst), 8), + (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), $dst, 0)>; + + def : Pat<(v4f32 (PPCldvsxlh (PPCmatpcreladdr PCRelForm:$addr))), + (SUBREG_TO_REG (i64 1), (PLFDpc $addr, 0), sub_64)>; + + // If the PPCmatpcreladdr node is not caught by any other pattern it should be + // caught here and turned into a paddi instruction to materialize the address. + def : Pat<(PPCmatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>; + // PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize + // tls global address with paddi instruction. + def : Pat<(PPCtlsdynamatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>; + // PPCtlslocalexecmataddr node is used for TLS local exec models to + // materialize tls global address with paddi instruction. + def : Pat<(PPCaddTls i64:$in, (PPCtlslocalexecmataddr tglobaltlsaddr:$addr)), + (PADDI8 $in, $addr)>; +} + +let Predicates = [PrefixInstrs] in { + def XXPERMX : + 8RR_XX4Form_IMM3_XTABC6<34, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, + vsrc:$XC, u3imm:$UIM), + "xxpermx $XT, $XA, $XB, $XC, $UIM", + IIC_VecPerm, []>; + def XXBLENDVB : + 8RR_XX4Form_XTABC6<33, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, + vsrc:$XC), "xxblendvb $XT, $XA, $XB, $XC", + IIC_VecGeneral, []>; + def XXBLENDVH : + 8RR_XX4Form_XTABC6<33, 1, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, + vsrc:$XC), "xxblendvh $XT, $XA, $XB, $XC", + IIC_VecGeneral, []>; + def XXBLENDVW : + 8RR_XX4Form_XTABC6<33, 2, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, + vsrc:$XC), "xxblendvw $XT, $XA, $XB, $XC", + IIC_VecGeneral, []>; + def XXBLENDVD : + 8RR_XX4Form_XTABC6<33, 3, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, + vsrc:$XC), "xxblendvd $XT, $XA, $XB, $XC", + IIC_VecGeneral, []>; +} + +// XXSPLTIW/DP/32DX need extra flags to make sure the compiler does not attempt +// to spill part of the instruction when the values are similar. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, Predicates = [PrefixInstrs] in { + def XXSPLTIW : 8RR_DForm_IMM32_XT6<32, 3, (outs vsrc:$XT), + (ins i32imm:$IMM32), + "xxspltiw $XT, $IMM32", IIC_VecGeneral, + []>; + def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT), + (ins i32imm:$IMM32), + "xxspltidp $XT, $IMM32", IIC_VecGeneral, + [(set v2f64:$XT, + (PPCxxspltidp i32:$IMM32))]>; + def XXSPLTI32DX : + 8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT), + (ins vsrc:$XTi, u1imm:$IX, i32imm:$IMM32), + "xxsplti32dx $XT, $IX, $IMM32", IIC_VecGeneral, + [(set v2i64:$XT, + (PPCxxsplti32dx v2i64:$XTi, i32:$IX, + i32:$IMM32))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; +} + +let Predicates = [IsISA3_1] in { + def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RT), (ins crbitrc:$BI), + "setbc $RT, $BI", IIC_IntCompare, []>; + def SETBCR : XForm_XT5_BI5<31, 416, (outs gprc:$RT), (ins crbitrc:$BI), + "setbcr $RT, $BI", IIC_IntCompare, []>; + def SETNBC : XForm_XT5_BI5<31, 448, (outs gprc:$RT), (ins crbitrc:$BI), + "setnbc $RT, $BI", IIC_IntCompare, []>; + def SETNBCR : XForm_XT5_BI5<31, 480, (outs gprc:$RT), (ins crbitrc:$BI), + "setnbcr $RT, $BI", IIC_IntCompare, []>; + + let Interpretation64Bit = 1, isCodeGenOnly = 1 in { + def SETBC8 : XForm_XT5_BI5<31, 384, (outs g8rc:$RT), (ins crbitrc:$BI), + "setbc $RT, $BI", IIC_IntCompare, []>; + def SETBCR8 : XForm_XT5_BI5<31, 416, (outs g8rc:$RT), (ins crbitrc:$BI), + "setbcr $RT, $BI", IIC_IntCompare, []>; + def SETNBC8 : XForm_XT5_BI5<31, 448, (outs g8rc:$RT), (ins crbitrc:$BI), + "setnbc $RT, $BI", IIC_IntCompare, []>; + def SETNBCR8 : XForm_XT5_BI5<31, 480, (outs g8rc:$RT), (ins crbitrc:$BI), + "setnbcr $RT, $BI", IIC_IntCompare, []>; + } + + def VSLDBI : VNForm_VTAB5_SD3<22, 0, (outs vrrc:$VRT), + (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH), + "vsldbi $VRT, $VRA, $VRB, $SH", + IIC_VecGeneral, + [(set v16i8:$VRT, + (int_ppc_altivec_vsldbi v16i8:$VRA, + v16i8:$VRB, + timm:$SH))]>; + def VSRDBI : VNForm_VTAB5_SD3<22, 1, (outs vrrc:$VRT), + (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH), + "vsrdbi $VRT, $VRA, $VRB, $SH", + IIC_VecGeneral, + [(set v16i8:$VRT, + (int_ppc_altivec_vsrdbi v16i8:$VRA, + v16i8:$VRB, + timm:$SH))]>; + defm VSTRIBR : VXForm_VTB5_RCr<13, 1, (outs vrrc:$vT), (ins vrrc:$vB), + "vstribr", "$vT, $vB", IIC_VecGeneral, + [(set v16i8:$vT, + (int_ppc_altivec_vstribr v16i8:$vB))]>; + defm VSTRIBL : VXForm_VTB5_RCr<13, 0, (outs vrrc:$vT), (ins vrrc:$vB), + "vstribl", "$vT, $vB", IIC_VecGeneral, + [(set v16i8:$vT, + (int_ppc_altivec_vstribl v16i8:$vB))]>; + defm VSTRIHR : VXForm_VTB5_RCr<13, 3, (outs vrrc:$vT), (ins vrrc:$vB), + "vstrihr", "$vT, $vB", IIC_VecGeneral, + [(set v8i16:$vT, + (int_ppc_altivec_vstrihr v8i16:$vB))]>; + defm VSTRIHL : VXForm_VTB5_RCr<13, 2, (outs vrrc:$vT), (ins vrrc:$vB), + "vstrihl", "$vT, $vB", IIC_VecGeneral, + [(set v8i16:$vT, + (int_ppc_altivec_vstrihl v8i16:$vB))]>; + def VINSW : + VXForm_1<207, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, gprc:$rB), + "vinsw $vD, $rB, $UIM", IIC_VecGeneral, + [(set v4i32:$vD, + (int_ppc_altivec_vinsw v4i32:$vDi, i32:$rB, timm:$UIM))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; + def VINSD : + VXForm_1<463, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, g8rc:$rB), + "vinsd $vD, $rB, $UIM", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB, timm:$UIM))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; + def VINSBVLX : + VXForm_VTB5_RA5_ins<15, "vinsbvlx", + [(set v16i8:$vD, + (int_ppc_altivec_vinsbvlx v16i8:$vDi, i32:$rA, + v16i8:$vB))]>; + def VINSBVRX : + VXForm_VTB5_RA5_ins<271, "vinsbvrx", + [(set v16i8:$vD, + (int_ppc_altivec_vinsbvrx v16i8:$vDi, i32:$rA, + v16i8:$vB))]>; + def VINSHVLX : + VXForm_VTB5_RA5_ins<79, "vinshvlx", + [(set v8i16:$vD, + (int_ppc_altivec_vinshvlx v8i16:$vDi, i32:$rA, + v8i16:$vB))]>; + def VINSHVRX : + VXForm_VTB5_RA5_ins<335, "vinshvrx", + [(set v8i16:$vD, + (int_ppc_altivec_vinshvrx v8i16:$vDi, i32:$rA, + v8i16:$vB))]>; + def VINSWVLX : + VXForm_VTB5_RA5_ins<143, "vinswvlx", + [(set v4i32:$vD, + (int_ppc_altivec_vinswvlx v4i32:$vDi, i32:$rA, + v4i32:$vB))]>; + def VINSWVRX : + VXForm_VTB5_RA5_ins<399, "vinswvrx", + [(set v4i32:$vD, + (int_ppc_altivec_vinswvrx v4i32:$vDi, i32:$rA, + v4i32:$vB))]>; + def VINSBLX : + VXForm_VRT5_RAB5_ins<527, "vinsblx", + [(set v16i8:$vD, + (int_ppc_altivec_vinsblx v16i8:$vDi, i32:$rA, + i32:$rB))]>; + def VINSBRX : + VXForm_VRT5_RAB5_ins<783, "vinsbrx", + [(set v16i8:$vD, + (int_ppc_altivec_vinsbrx v16i8:$vDi, i32:$rA, + i32:$rB))]>; + def VINSHLX : + VXForm_VRT5_RAB5_ins<591, "vinshlx", + [(set v8i16:$vD, + (int_ppc_altivec_vinshlx v8i16:$vDi, i32:$rA, + i32:$rB))]>; + def VINSHRX : + VXForm_VRT5_RAB5_ins<847, "vinshrx", + [(set v8i16:$vD, + (int_ppc_altivec_vinshrx v8i16:$vDi, i32:$rA, + i32:$rB))]>; + def VINSWLX : + VXForm_VRT5_RAB5_ins<655, "vinswlx", + [(set v4i32:$vD, + (int_ppc_altivec_vinswlx v4i32:$vDi, i32:$rA, + i32:$rB))]>; + def VINSWRX : + VXForm_VRT5_RAB5_ins<911, "vinswrx", + [(set v4i32:$vD, + (int_ppc_altivec_vinswrx v4i32:$vDi, i32:$rA, + i32:$rB))]>; + def VINSDLX : + VXForm_1<719, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB), + "vinsdlx $vD, $rA, $rB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA, i64:$rB))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; + def VINSDRX : + VXForm_1<975, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB), + "vinsdrx $vD, $rA, $rB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA, i64:$rB))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; + def VEXTRACTBM : VXForm_RD5_XO5_RS5<1602, 8, (outs gprc:$rD), (ins vrrc:$vB), + "vextractbm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextractbm v16i8:$vB))]>; + def VEXTRACTHM : VXForm_RD5_XO5_RS5<1602, 9, (outs gprc:$rD), (ins vrrc:$vB), + "vextracthm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextracthm v8i16:$vB))]>; + def VEXTRACTWM : VXForm_RD5_XO5_RS5<1602, 10, (outs gprc:$rD), (ins vrrc:$vB), + "vextractwm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextractwm v4i32:$vB))]>; + def VEXTRACTDM : VXForm_RD5_XO5_RS5<1602, 11, (outs gprc:$rD), (ins vrrc:$vB), + "vextractdm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextractdm v2i64:$vB))]>; + def VEXTRACTQM : VXForm_RD5_XO5_RS5<1602, 12, (outs gprc:$rD), (ins vrrc:$vB), + "vextractqm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextractqm v1i128:$vB))]>; + def VEXPANDBM : VXForm_RD5_XO5_RS5<1602, 0, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpandbm $vD, $vB", IIC_VecGeneral, + [(set v16i8:$vD, (int_ppc_altivec_vexpandbm + v16i8:$vB))]>; + def VEXPANDHM : VXForm_RD5_XO5_RS5<1602, 1, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpandhm $vD, $vB", IIC_VecGeneral, + [(set v8i16:$vD, (int_ppc_altivec_vexpandhm + v8i16:$vB))]>; + def VEXPANDWM : VXForm_RD5_XO5_RS5<1602, 2, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpandwm $vD, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (int_ppc_altivec_vexpandwm + v4i32:$vB))]>; + def VEXPANDDM : VXForm_RD5_XO5_RS5<1602, 3, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpanddm $vD, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (int_ppc_altivec_vexpanddm + v2i64:$vB))]>; + def VEXPANDQM : VXForm_RD5_XO5_RS5<1602, 4, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpandqm $vD, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vexpandqm + v1i128:$vB))]>; + def MTVSRBM : VXForm_RD5_XO5_RS5<1602, 16, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrbm $vD, $rB", IIC_VecGeneral, + [(set v16i8:$vD, + (int_ppc_altivec_mtvsrbm i64:$rB))]>; + def MTVSRHM : VXForm_RD5_XO5_RS5<1602, 17, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrhm $vD, $rB", IIC_VecGeneral, + [(set v8i16:$vD, + (int_ppc_altivec_mtvsrhm i64:$rB))]>; + def MTVSRWM : VXForm_RD5_XO5_RS5<1602, 18, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrwm $vD, $rB", IIC_VecGeneral, + [(set v4i32:$vD, + (int_ppc_altivec_mtvsrwm i64:$rB))]>; + def MTVSRDM : VXForm_RD5_XO5_RS5<1602, 19, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrdm $vD, $rB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_mtvsrdm i64:$rB))]>; + def MTVSRQM : VXForm_RD5_XO5_RS5<1602, 20, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrqm $vD, $rB", IIC_VecGeneral, + [(set v1i128:$vD, + (int_ppc_altivec_mtvsrqm i64:$rB))]>; + def MTVSRBMI : DXForm<4, 10, (outs vrrc:$vD), (ins u16imm64:$D), + "mtvsrbmi $vD, $D", IIC_VecGeneral, + [(set v16i8:$vD, + (int_ppc_altivec_mtvsrbm imm:$D))]>; + def VCNTMBB : VXForm_RD5_MP_VB5<1602, 12, (outs g8rc:$rD), + (ins vrrc:$vB, u1imm:$MP), + "vcntmbb $rD, $vB, $MP", IIC_VecGeneral, + [(set i64:$rD, (int_ppc_altivec_vcntmbb + v16i8:$vB, timm:$MP))]>; + def VCNTMBH : VXForm_RD5_MP_VB5<1602, 13, (outs g8rc:$rD), + (ins vrrc:$vB, u1imm:$MP), + "vcntmbh $rD, $vB, $MP", IIC_VecGeneral, + [(set i64:$rD, (int_ppc_altivec_vcntmbh + v8i16:$vB, timm:$MP))]>; + def VCNTMBW : VXForm_RD5_MP_VB5<1602, 14, (outs g8rc:$rD), + (ins vrrc:$vB, u1imm:$MP), + "vcntmbw $rD, $vB, $MP", IIC_VecGeneral, + [(set i64:$rD, (int_ppc_altivec_vcntmbw + v4i32:$vB, timm:$MP))]>; + def VCNTMBD : VXForm_RD5_MP_VB5<1602, 15, (outs g8rc:$rD), + (ins vrrc:$vB, u1imm:$MP), + "vcntmbd $rD, $vB, $MP", IIC_VecGeneral, + [(set i64:$rD, (int_ppc_altivec_vcntmbd + v2i64:$vB, timm:$MP))]>; + def VEXTDUBVLX : VAForm_1a<24, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextdubvlx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextdubvlx v16i8:$vA, + v16i8:$vB, + i32:$rC))]>; + def VEXTDUBVRX : VAForm_1a<25, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextdubvrx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextdubvrx v16i8:$vA, + v16i8:$vB, + i32:$rC))]>; + def VEXTDUHVLX : VAForm_1a<26, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextduhvlx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextduhvlx v8i16:$vA, + v8i16:$vB, + i32:$rC))]>; + def VEXTDUHVRX : VAForm_1a<27, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextduhvrx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextduhvrx v8i16:$vA, + v8i16:$vB, + i32:$rC))]>; + def VEXTDUWVLX : VAForm_1a<28, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextduwvlx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextduwvlx v4i32:$vA, + v4i32:$vB, + i32:$rC))]>; + def VEXTDUWVRX : VAForm_1a<29, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextduwvrx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextduwvrx v4i32:$vA, + v4i32:$vB, + i32:$rC))]>; + def VEXTDDVLX : VAForm_1a<30, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextddvlx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextddvlx v2i64:$vA, + v2i64:$vB, + i32:$rC))]>; + def VEXTDDVRX : VAForm_1a<31, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextddvrx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextddvrx v2i64:$vA, + v2i64:$vB, + i32:$rC))]>; + def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vpdepd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vpdepd v2i64:$vA, v2i64:$vB))]>; + def VPEXTD : VXForm_1<1421, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vpextd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vpextd v2i64:$vA, v2i64:$vB))]>; + def PDEPD : XForm_6<31, 156, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "pdepd $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, (int_ppc_pdepd i64:$rS, i64:$rB))]>; + def PEXTD : XForm_6<31, 188, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "pextd $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, (int_ppc_pextd i64:$rS, i64:$rB))]>; + def VCFUGED : VXForm_1<1357, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vcfuged $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vcfuged v2i64:$vA, v2i64:$vB))]>; + def VGNB : VXForm_RD5_N3_VB5<1228, (outs g8rc:$rD), (ins vrrc:$vB, u3imm:$N), + "vgnb $rD, $vB, $N", IIC_VecGeneral, + [(set i64:$rD, + (int_ppc_altivec_vgnb v1i128:$vB, timm:$N))]>; + def CFUGED : XForm_6<31, 220, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "cfuged $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, (int_ppc_cfuged i64:$rS, i64:$rB))]>; + def XXEVAL : + 8RR_XX4Form_IMM8_XTAB6<34, 1, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, + vsrc:$XC, u8imm:$IMM), + "xxeval $XT, $XA, $XB, $XC, $IMM", IIC_VecGeneral, + [(set v2i64:$XT, (int_ppc_vsx_xxeval v2i64:$XA, + v2i64:$XB, v2i64:$XC, timm:$IMM))]>; + def VCLZDM : VXForm_1<1924, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vclzdm $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vclzdm v2i64:$vA, v2i64:$vB))]>; + def VCTZDM : VXForm_1<1988, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vctzdm $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vctzdm v2i64:$vA, v2i64:$vB))]>; + def CNTLZDM : XForm_6<31, 59, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "cntlzdm $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, + (int_ppc_cntlzdm i64:$rS, i64:$rB))]>; + def CNTTZDM : XForm_6<31, 571, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "cnttzdm $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, + (int_ppc_cnttzdm i64:$rS, i64:$rB))]>; + def XXGENPCVBM : + XForm_XT6_IMM5_VB5<60, 916, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM), + "xxgenpcvbm $XT, $VRB, $IMM", IIC_VecGeneral, []>; + def XXGENPCVHM : + XForm_XT6_IMM5_VB5<60, 917, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM), + "xxgenpcvhm $XT, $VRB, $IMM", IIC_VecGeneral, []>; + def XXGENPCVWM : + XForm_XT6_IMM5_VB5<60, 948, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM), + "xxgenpcvwm $XT, $VRB, $IMM", IIC_VecGeneral, []>; + def XXGENPCVDM : + XForm_XT6_IMM5_VB5<60, 949, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM), + "xxgenpcvdm $XT, $VRB, $IMM", IIC_VecGeneral, []>; + def VCLRLB : VXForm_1<397, (outs vrrc:$vD), (ins vrrc:$vA, gprc:$rB), + "vclrlb $vD, $vA, $rB", IIC_VecGeneral, + [(set v16i8:$vD, + (int_ppc_altivec_vclrlb v16i8:$vA, i32:$rB))]>; + def VCLRRB : VXForm_1<461, (outs vrrc:$vD), (ins vrrc:$vA, gprc:$rB), + "vclrrb $vD, $vA, $rB", IIC_VecGeneral, + [(set v16i8:$vD, + (int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>; + def VMULLD : VXForm_1<457, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulld $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>; + def VMULHSW : VXForm_1<905, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulhsw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (mulhs v4i32:$vA, v4i32:$vB))]>; + def VMULHUW : VXForm_1<649, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulhuw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (mulhu v4i32:$vA, v4i32:$vB))]>; + def VMULHSD : VXForm_1<969, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulhsd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (mulhs v2i64:$vA, v2i64:$vB))]>; + def VMULHUD : VXForm_1<713, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulhud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (mulhu v2i64:$vA, v2i64:$vB))]>; + def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmodsw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>; + def VMODUW : VXForm_1<1675, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmoduw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (urem v4i32:$vA, v4i32:$vB))]>; + def VMODSD : VXForm_1<1995, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmodsd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (srem v2i64:$vA, v2i64:$vB))]>; + def VMODUD : VXForm_1<1739, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmodud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (urem v2i64:$vA, v2i64:$vB))]>; + def VDIVSW : VXForm_1<395, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivsw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (sdiv v4i32:$vA, v4i32:$vB))]>; + def VDIVUW : VXForm_1<139, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivuw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (udiv v4i32:$vA, v4i32:$vB))]>; + def VDIVSD : VXForm_1<459, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivsd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (sdiv v2i64:$vA, v2i64:$vB))]>; + def VDIVUD : VXForm_1<203, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (udiv v2i64:$vA, v2i64:$vB))]>; + def VDIVESW : VXForm_1<907, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivesw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (int_ppc_altivec_vdivesw v4i32:$vA, + v4i32:$vB))]>; + def VDIVEUW : VXForm_1<651, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdiveuw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (int_ppc_altivec_vdiveuw v4i32:$vA, + v4i32:$vB))]>; + def VDIVESD : VXForm_1<971, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivesd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (int_ppc_altivec_vdivesd v2i64:$vA, + v2i64:$vB))]>; + def VDIVEUD : VXForm_1<715, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdiveud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (int_ppc_altivec_vdiveud v2i64:$vA, + v2i64:$vB))]>; + def XVTLSBB : XX2_BF3_XO5_XB6_XO9<60, 2, 475, (outs crrc:$BF), (ins vsrc:$XB), + "xvtlsbb $BF, $XB", IIC_VecGeneral, []>; + + // The XFormMemOp flag for the following 8 instructions is set on + // the instruction format. + let mayLoad = 1, mayStore = 0 in { + def LXVRBX : X_XT6_RA5_RB5<31, 13, "lxvrbx", vsrc, []>; + def LXVRHX : X_XT6_RA5_RB5<31, 45, "lxvrhx", vsrc, []>; + def LXVRWX : X_XT6_RA5_RB5<31, 77, "lxvrwx", vsrc, []>; + def LXVRDX : X_XT6_RA5_RB5<31, 109, "lxvrdx", vsrc, []>; + } + + let mayLoad = 0, mayStore = 1 in { + def STXVRBX : X_XS6_RA5_RB5<31, 141, "stxvrbx", vsrc, []>; + def STXVRHX : X_XS6_RA5_RB5<31, 173, "stxvrhx", vsrc, []>; + def STXVRWX : X_XS6_RA5_RB5<31, 205, "stxvrwx", vsrc, []>; + def STXVRDX : X_XS6_RA5_RB5<31, 237, "stxvrdx", vsrc, []>; + } + + def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulesd $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA, + v2i64:$vB))]>; + def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmuleud $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA, + v2i64:$vB))]>; + def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulosd $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA, + v2i64:$vB))]>; + def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmuloud $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmuloud v2i64:$vA, + v2i64:$vB))]>; + def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), + "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmsumcud + v2i64:$vA, v2i64:$vB, v1i128:$vC))]>; + def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivsq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (sdiv v1i128:$vA, v1i128:$vB))]>; + def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivuq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (udiv v1i128:$vA, v1i128:$vB))]>; + def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivesq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vdivesq v1i128:$vA, + v1i128:$vB))]>; + def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdiveuq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vdiveuq v1i128:$vA, + v1i128:$vB))]>; + def VCMPEQUQ : VCMP <455, "vcmpequq $vD, $vA, $vB" , v1i128>; + def VCMPGTSQ : VCMP <903, "vcmpgtsq $vD, $vA, $vB" , v1i128>; + def VCMPGTUQ : VCMP <647, "vcmpgtuq $vD, $vA, $vB" , v1i128>; + def VCMPEQUQ_rec : VCMP_rec <455, "vcmpequq. $vD, $vA, $vB" , v1i128>; + def VCMPGTSQ_rec : VCMP_rec <903, "vcmpgtsq. $vD, $vA, $vB" , v1i128>; + def VCMPGTUQ_rec : VCMP_rec <647, "vcmpgtuq. $vD, $vA, $vB" , v1i128>; + def VMODSQ : VXForm_1<1803, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmodsq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (srem v1i128:$vA, v1i128:$vB))]>; + def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmoduq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (urem v1i128:$vA, v1i128:$vB))]>; + def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB), + "vextsd2q $vD, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vextsd2q v2i64:$vB))]>; + def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), + "vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>; + def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), + "vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>; + def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", + [(set v1i128:$vD, + (int_ppc_altivec_vrlqnm v1i128:$vA, + v1i128:$vB))]>; + def VRLQMI : VXForm_1<69, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi), + "vrlqmi $vD, $vA, $vB", IIC_VecFP, + [(set v1i128:$vD, + (int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB, + v1i128:$vDi))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; + def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>; + def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>; + def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>; + def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>; + def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>; + def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>; + def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>; + def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>; + def LXVKQ : XForm_XT6_IMM5<60, 31, 360, (outs vsrc:$XT), (ins u5imm:$UIM), + "lxvkq $XT, $UIM", IIC_VecGeneral, []>; +} + +let Predicates = [IsISA3_1, HasVSX] in { + def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>; + def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>; + def XSMAXCQP : X_VT5_VA5_VB5<63, 676, "xsmaxcqp", + [(set f128:$vT, (PPCxsmaxc f128:$vA, f128:$vB))]>; + def XSMINCQP : X_VT5_VA5_VB5<63, 740, "xsmincqp", + [(set f128:$vT, (PPCxsminc f128:$vA, f128:$vB))]>; +} + +// Multiclass defining patterns for Set Boolean Extension Reverse Instructions. +// This is analogous to the CRNotPat multiclass but specifically for Power10 +// and newer subtargets since the extended forms use Set Boolean instructions. +// The first two anonymous patterns defined are actually a duplicate of those +// in CRNotPat, but it is preferable to define both multiclasses as complete +// ones rather than pulling that small common section out. +multiclass P10ReverseSetBool { + def : Pat; + def : Pat<(not pattern), result>; + + def : Pat<(i32 (zext pattern)), + (SETBCR result)>; + def : Pat<(i64 (zext pattern)), + (SETBCR8 result)>; + + def : Pat<(i32 (sext pattern)), + (SETNBCR result)>; + def : Pat<(i64 (sext pattern)), + (SETNBCR8 result)>; + + def : Pat<(i32 (anyext pattern)), + (SETBCR result)>; + def : Pat<(i64 (anyext pattern)), + (SETBCR8 result)>; +} + +multiclass IntSetP10RevSetBool { + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)), + (EXTRACT_SUBREG (Cmpl $s1, $s2), sub_lt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)), + (EXTRACT_SUBREG (Cmp $s1, $s2), sub_lt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)), + (EXTRACT_SUBREG (Cmpl $s1, $s2), sub_gt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)), + (EXTRACT_SUBREG (Cmp $s1, $s2), sub_gt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)), + (EXTRACT_SUBREG (Cmp $s1, $s2), sub_eq)>; + + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETUGE)), + (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_lt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETGE)), + (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_lt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETULE)), + (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_gt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETLE)), + (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_gt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETNE)), + (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_eq)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETNE)), + (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_eq)>; +} + +multiclass FSetP10RevSetBool { + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUNE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETO)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>; +} + +let Predicates = [IsISA3_1] in { + def : Pat<(i32 (zext i1:$in)), + (SETBC $in)>; + def : Pat<(i64 (zext i1:$in)), + (SETBC8 $in)>; + def : Pat<(i32 (sext i1:$in)), + (SETNBC $in)>; + def : Pat<(i64 (sext i1:$in)), + (SETNBC8 $in)>; + def : Pat<(i32 (anyext i1:$in)), + (SETBC $in)>; + def : Pat<(i64 (anyext i1:$in)), + (SETBC8 $in)>; + + // Instantiation of the set boolean reverse patterns for 32-bit integers. + defm : IntSetP10RevSetBool; + defm : P10ReverseSetBool<(i1 (setcc i32:$s1, imm:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + + // Instantiation of the set boolean reverse patterns for 64-bit integers. + defm : IntSetP10RevSetBool; + defm : P10ReverseSetBool<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; +} + +// Instantiation of the set boolean reverse patterns for f32, f64, f128. +let Predicates = [IsISA3_1, HasFPU] in { + defm : FSetP10RevSetBool; + defm : FSetP10RevSetBool; + defm : FSetP10RevSetBool; +} + +//---------------------------- Anonymous Patterns ----------------------------// +let Predicates = [IsISA3_1] in { + // Exploit the vector multiply high instructions using intrinsics. + def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$vA, v4i32:$vB)), + (v4i32 (VMULHSW $vA, $vB))>; + def : Pat<(v4i32 (int_ppc_altivec_vmulhuw v4i32:$vA, v4i32:$vB)), + (v4i32 (VMULHUW $vA, $vB))>; + def : Pat<(v2i64 (int_ppc_altivec_vmulhsd v2i64:$vA, v2i64:$vB)), + (v2i64 (VMULHSD $vA, $vB))>; + def : Pat<(v2i64 (int_ppc_altivec_vmulhud v2i64:$vA, v2i64:$vB)), + (v2i64 (VMULHUD $vA, $vB))>; + def : Pat<(v16i8 (int_ppc_vsx_xxgenpcvbm v16i8:$VRB, imm:$IMM)), + (v16i8 (COPY_TO_REGCLASS (XXGENPCVBM $VRB, imm:$IMM), VRRC))>; + def : Pat<(v8i16 (int_ppc_vsx_xxgenpcvhm v8i16:$VRB, imm:$IMM)), + (v8i16 (COPY_TO_REGCLASS (XXGENPCVHM $VRB, imm:$IMM), VRRC))>; + def : Pat<(v4i32 (int_ppc_vsx_xxgenpcvwm v4i32:$VRB, imm:$IMM)), + (v4i32 (COPY_TO_REGCLASS (XXGENPCVWM $VRB, imm:$IMM), VRRC))>; + def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)), + (v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>; + def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 1)), + (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>; + def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)), + (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>; + + def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 8)), + (v1i128 (COPY_TO_REGCLASS (LXVRBX ForceXForm:$src), VRRC))>; + def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 16)), + (v1i128 (COPY_TO_REGCLASS (LXVRHX ForceXForm:$src), VRRC))>; + def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 32)), + (v1i128 (COPY_TO_REGCLASS (LXVRWX ForceXForm:$src), VRRC))>; + def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 64)), + (v1i128 (COPY_TO_REGCLASS (LXVRDX ForceXForm:$src), VRRC))>; + + def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)), + (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>; + + def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)), + (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>; +} + +let Predicates = [IsISA3_1, HasVSX] in { + def : Pat<(v16i8 (int_ppc_vsx_xvcvspbf16 v16i8:$XA)), + (COPY_TO_REGCLASS (XVCVSPBF16 RCCp.AToVSRC), VRRC)>; + def : Pat<(v16i8 (int_ppc_vsx_xvcvbf16spn v16i8:$XA)), + (COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>; +} + +let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in { + // Store element 0 of a VSX register to memory + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), ForceXForm:$dst), + (STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), ForceXForm:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), ForceXForm:$dst), + (STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), ForceXForm:$dst)>; + def : Pat<(store (i32 (extractelt v4i32:$src, 0)), ForceXForm:$dst), + (STXVRWX $src, ForceXForm:$dst)>; + def : Pat<(store (f32 (extractelt v4f32:$src, 0)), ForceXForm:$dst), + (STXVRWX $src, ForceXForm:$dst)>; + def : Pat<(store (i64 (extractelt v2i64:$src, 0)), ForceXForm:$dst), + (STXVRDX $src, ForceXForm:$dst)>; + def : Pat<(store (f64 (extractelt v2f64:$src, 0)), ForceXForm:$dst), + (STXVRDX $src, ForceXForm:$dst)>; + // Load element 0 of a VSX register to memory + def : Pat<(v8i16 (scalar_to_vector (i32 (extloadi16 ForceXForm:$src)))), + (v8i16 (COPY_TO_REGCLASS (LXVRHX ForceXForm:$src), VSRC))>; + def : Pat<(v16i8 (scalar_to_vector (i32 (extloadi8 ForceXForm:$src)))), + (v16i8 (COPY_TO_REGCLASS (LXVRBX ForceXForm:$src), VSRC))>; + } + +// FIXME: The swap is overkill when the shift amount is a constant. +// We should just fix the constant in the DAG. +let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in { + def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSLQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSLQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSRQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSRQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSRAQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSRAQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; +} + +class xxevalPattern imm> : + Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {} + +let AddedComplexity = 400, Predicates = [PrefixInstrs] in { + def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A, + i32immNonAllOneNonZero:$A, + i32immNonAllOneNonZero:$A, + i32immNonAllOneNonZero:$A)), + (v4i32 (XXSPLTIW imm:$A))>; + def : Pat<(f32 nzFPImmAsi32:$A), + (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)), + VSFRC)>; + def : Pat<(f64 nzFPImmAsi32:$A), + (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)), + VSFRC)>; + +// To replace constant pool with XXSPLTI32DX for scalars. +def : Pat<(f32 nzFPImmAsi64:$A), + (COPY_TO_REGCLASS (XXSPLTI32DX (XXSPLTI32DX(IMPLICIT_DEF), 0, + (getFPAs64BitIntHi $A)), + 1, (getFPAs64BitIntLo $A)), + VSSRC)>; + +def : Pat<(f64 nzFPImmAsi64:$A), + (COPY_TO_REGCLASS (XXSPLTI32DX (XXSPLTI32DX (IMPLICIT_DEF), 0, + (getFPAs64BitIntHi $A)), + 1, (getFPAs64BitIntLo $A)), + VSFRC)>; + + // Anonymous patterns for XXEVAL + // AND + // and(A, B, C) + def : xxevalPattern<(and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>; + // and(A, xor(B, C)) + def : xxevalPattern<(and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>; + // and(A, or(B, C)) + def : xxevalPattern<(and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>; + // and(A, nor(B, C)) + def : xxevalPattern<(and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>; + // and(A, eqv(B, C)) + def : xxevalPattern<(and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>; + // and(A, nand(B, C)) + def : xxevalPattern<(and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>; + + // NAND + // nand(A, B, C) + def : xxevalPattern<(vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), + !sub(255, 1)>; + // nand(A, xor(B, C)) + def : xxevalPattern<(vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), + !sub(255, 6)>; + // nand(A, or(B, C)) + def : xxevalPattern<(vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), + !sub(255, 7)>; + // nand(A, nor(B, C)) + def : xxevalPattern<(or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), + !sub(255, 8)>; + // nand(A, eqv(B, C)) + def : xxevalPattern<(or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), + !sub(255, 9)>; + // nand(A, nand(B, C)) + def : xxevalPattern<(or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), + !sub(255, 14)>; + + // Anonymous patterns to select prefixed VSX loads and stores. + // Load / Store f128 + def : Pat<(f128 (load PDForm:$src)), + (COPY_TO_REGCLASS (PLXV memri34:$src), VRRC)>; + def : Pat<(store f128:$XS, PDForm:$dst), + (PSTXV (COPY_TO_REGCLASS $XS, VSRC), memri34:$dst)>; + + // Load / Store v4i32 + def : Pat<(v4i32 (load PDForm:$src)), (PLXV memri34:$src)>; + def : Pat<(store v4i32:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>; + + // Load / Store v2i64 + def : Pat<(v2i64 (load PDForm:$src)), (PLXV memri34:$src)>; + def : Pat<(store v2i64:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>; + + // Load / Store v4f32 + def : Pat<(v4f32 (load PDForm:$src)), (PLXV memri34:$src)>; + def : Pat<(store v4f32:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>; + + // Load / Store v2f64 + def : Pat<(v2f64 (load PDForm:$src)), (PLXV memri34:$src)>; + def : Pat<(store v2f64:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>; + + // Cases For PPCstore_scal_int_from_vsr + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), PDForm:$dst, 8), + (PSTXSD (XSCVDPUXDS f64:$src), PDForm:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), PDForm:$dst, 8), + (PSTXSD (XSCVDPSXDS f64:$src), PDForm:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), PDForm:$dst, 8), + (PSTXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), + PDForm:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), PDForm:$dst, 8), + (PSTXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), + PDForm:$dst)>; +} + +let Predicates = [PrefixInstrs] in { + def : Pat<(i32 imm34:$imm), (PLI (getImmAs64BitInt imm:$imm))>; + def : Pat<(i64 imm34:$imm), (PLI8 (getImmAs64BitInt imm:$imm))>; + def : Pat<(v16i8 (int_ppc_vsx_xxpermx v16i8:$A, v16i8:$B, v16i8:$C, timm:$D)), + (COPY_TO_REGCLASS (XXPERMX (COPY_TO_REGCLASS $A, VSRC), + (COPY_TO_REGCLASS $B, VSRC), + (COPY_TO_REGCLASS $C, VSRC), $D), VSRC)>; + def : Pat<(v16i8 (int_ppc_vsx_xxblendvb v16i8:$A, v16i8:$B, v16i8:$C)), + (COPY_TO_REGCLASS + (XXBLENDVB (COPY_TO_REGCLASS $A, VSRC), + (COPY_TO_REGCLASS $B, VSRC), + (COPY_TO_REGCLASS $C, VSRC)), VSRC)>; + def : Pat<(v8i16 (int_ppc_vsx_xxblendvh v8i16:$A, v8i16:$B, v8i16:$C)), + (COPY_TO_REGCLASS + (XXBLENDVH (COPY_TO_REGCLASS $A, VSRC), + (COPY_TO_REGCLASS $B, VSRC), + (COPY_TO_REGCLASS $C, VSRC)), VSRC)>; + def : Pat<(int_ppc_vsx_xxblendvw v4i32:$A, v4i32:$B, v4i32:$C), + (XXBLENDVW $A, $B, $C)>; + def : Pat<(int_ppc_vsx_xxblendvd v2i64:$A, v2i64:$B, v2i64:$C), + (XXBLENDVD $A, $B, $C)>; + + // Anonymous patterns to select prefixed loads and stores. + // Load i32 + def : Pat<(i32 (extloadi1 PDForm:$src)), (PLBZ memri34:$src)>; + def : Pat<(i32 (zextloadi1 PDForm:$src)), (PLBZ memri34:$src)>; + def : Pat<(i32 (extloadi8 PDForm:$src)), (PLBZ memri34:$src)>; + def : Pat<(i32 (zextloadi8 PDForm:$src)), (PLBZ memri34:$src)>; + def : Pat<(i32 (extloadi16 PDForm:$src)), (PLHZ memri34:$src)>; + def : Pat<(i32 (zextloadi16 PDForm:$src)), (PLHZ memri34:$src)>; + def : Pat<(i32 (sextloadi16 PDForm:$src)), (PLHA memri34:$src)>; + def : Pat<(i32 (load PDForm:$src)), (PLWZ memri34:$src)>; + + // Store i32 + def : Pat<(truncstorei8 i32:$rS, PDForm:$dst), (PSTB gprc:$rS, memri34:$dst)>; + def : Pat<(truncstorei16 i32:$rS, PDForm:$dst), (PSTH gprc:$rS, memri34:$dst)>; + def : Pat<(store i32:$rS, PDForm:$dst), (PSTW gprc:$rS, memri34:$dst)>; + + // Load i64 + def : Pat<(i64 (extloadi1 PDForm:$src)), (PLBZ8 memri34:$src)>; + def : Pat<(i64 (zextloadi1 PDForm:$src)), (PLBZ8 memri34:$src)>; + def : Pat<(i64 (extloadi8 PDForm:$src)), (PLBZ8 memri34:$src)>; + def : Pat<(i64 (zextloadi8 PDForm:$src)), (PLBZ8 memri34:$src)>; + def : Pat<(i64 (extloadi16 PDForm:$src)), (PLHZ8 memri34:$src)>; + def : Pat<(i64 (zextloadi16 PDForm:$src)), (PLHZ8 memri34:$src)>; + def : Pat<(i64 (sextloadi16 PDForm:$src)), (PLHA8 memri34:$src)>; + def : Pat<(i64 (extloadi32 PDForm:$src)), (PLWZ8 memri34:$src)>; + def : Pat<(i64 (zextloadi32 PDForm:$src)), (PLWZ8 memri34:$src)>; + def : Pat<(i64 (sextloadi32 PDForm:$src)), (PLWA8 memri34:$src)>; + def : Pat<(i64 (load PDForm:$src)), (PLD memri34:$src)>; + + // Store i64 + def : Pat<(truncstorei8 i64:$rS, PDForm:$dst), (PSTB8 g8rc:$rS, memri34:$dst)>; + def : Pat<(truncstorei16 i64:$rS, PDForm:$dst), (PSTH8 g8rc:$rS, memri34:$dst)>; + def : Pat<(truncstorei32 i64:$rS, PDForm:$dst), (PSTW8 g8rc:$rS, memri34:$dst)>; + def : Pat<(store i64:$rS, PDForm:$dst), (PSTD g8rc:$rS, memri34:$dst)>; + + // Load / Store f32 + def : Pat<(f32 (load PDForm:$src)), (PLFS memri34:$src)>; + def : Pat<(store f32:$FRS, PDForm:$dst), (PSTFS $FRS, memri34:$dst)>; + + // Load / Store f64 + def : Pat<(f64 (extloadf32 PDForm:$src)), + (COPY_TO_REGCLASS (PLFS memri34:$src), VSFRC)>; + def : Pat<(f64 (load PDForm:$src)), (PLFD memri34:$src)>; + def : Pat<(store f64:$FRS, PDForm:$dst), (PSTFD $FRS, memri34:$dst)>; + + // Atomic Load + def : Pat<(atomic_load_8 PDForm:$src), (PLBZ memri34:$src)>; + def : Pat<(atomic_load_16 PDForm:$src), (PLHZ memri34:$src)>; + def : Pat<(atomic_load_32 PDForm:$src), (PLWZ memri34:$src)>; + def : Pat<(atomic_load_64 PDForm:$src), (PLD memri34:$src)>; + + // Atomic Store + def : Pat<(atomic_store_8 PDForm:$dst, i32:$RS), (PSTB $RS, memri34:$dst)>; + def : Pat<(atomic_store_16 PDForm:$dst, i32:$RS), (PSTH $RS, memri34:$dst)>; + def : Pat<(atomic_store_32 PDForm:$dst, i32:$RS), (PSTW $RS, memri34:$dst)>; + def : Pat<(atomic_store_64 PDForm:$dst, i64:$RS), (PSTD $RS, memri34:$dst)>; + + // Prefixed fpext to v2f64 + def : Pat<(v4f32 (PPCldvsxlh PDForm:$src)), + (SUBREG_TO_REG (i64 1), (PLFD PDForm:$src), sub_64)>; +} + +def InsertEltShift { + dag Sub32 = (i32 (EXTRACT_SUBREG $rB, sub_32)); + dag Sub32Left1 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 1, 0, 30); + dag Sub32Left2 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 2, 0, 29); + dag Left1 = (RLWINM $rB, 1, 0, 30); + dag Left2 = (RLWINM $rB, 2, 0, 29); + dag Left3 = (RLWINM8 $rB, 3, 0, 28); +} + +let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in { + // Indexed vector insert element + def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)), + (VINSBRX $vDi, InsertEltShift.Sub32, $rA)>; + def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)), + (VINSHRX $vDi, InsertEltShift.Sub32Left1, $rA)>; + def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)), + (VINSWRX $vDi, InsertEltShift.Sub32Left2, $rA)>; + def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)), + (VINSDRX $vDi, InsertEltShift.Left3, $rA)>; + + def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)), + (VINSWVRX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>; + + def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)), + (VINSDRX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>; + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load DSForm:$rA)), i64:$rB)), + (VINSDRX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>; + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load PDForm:$rA)), i64:$rB)), + (VINSDRX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>; + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load XForm:$rA)), i64:$rB)), + (VINSDRX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>; + let AddedComplexity = 400 in { + // Immediate vector insert element + foreach Idx = [0, 1, 2, 3] in { + def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, Idx)), + (VINSW $vDi, !mul(!sub(3, Idx), 4), $rA)>; + } + foreach i = [0, 1] in + def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, (i64 i))), + (VINSD $vDi, !mul(!sub(1, i), 8), $rA)>; + } +} + +let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in { + // Indexed vector insert element + def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i32:$rB)), + (VINSBLX $vDi, $rB, $rA)>; + def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i32:$rB)), + (VINSHLX $vDi, InsertEltShift.Left1, $rA)>; + def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i32:$rB)), + (VINSWLX $vDi, InsertEltShift.Left2, $rA)>; + + def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i32:$rB)), + (VINSWVLX $vDi, InsertEltShift.Left2, (XSCVDPSPN $rA))>; +} + +let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in { + // Indexed vector insert element + def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)), + (VINSBLX $vDi, InsertEltShift.Sub32, $rA)>; + def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)), + (VINSHLX $vDi, InsertEltShift.Sub32Left1, $rA)>; + def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)), + (VINSWLX $vDi, InsertEltShift.Sub32Left2, $rA)>; + def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)), + (VINSDLX $vDi, InsertEltShift.Left3, $rA)>; + + def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)), + (VINSWVLX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>; + + def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)), + (VINSDLX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>; + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load DSForm:$rA)), i64:$rB)), + (VINSDLX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>; + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load PDForm:$rA)), i64:$rB)), + (VINSDLX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>; + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load XForm:$rA)), i64:$rB)), + (VINSDLX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>; +} + +let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX, IsBigEndian] in { + // Immediate vector insert element + foreach Ty = [i32, i64] in { + foreach Idx = [0, 1, 2, 3] in { + def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, (Ty Idx))), + (VINSW $vDi, !mul(Idx, 4), $rA)>; + } + } + + foreach Idx = [0, 1] in + def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, Idx)), + (VINSD $vDi, !mul(Idx, 8), $rA)>; +} diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td deleted file mode 100644 index ff43426dd1ef..000000000000 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ /dev/null @@ -1,2889 +0,0 @@ -//-------------------------- Predicate definitions ---------------------------// -def IsPPC32 : Predicate<"!Subtarget->isPPC64()">; - -// Mask immediates for MMA instructions (2, 4 and 8 bits). -def Msk2Imm : ImmLeaf(Imm); }]>; -def Msk4Imm : ImmLeaf(Imm); }]>; -def Msk8Imm : ImmLeaf(Imm); }]>; - -//===----------------------------------------------------------------------===// -// PowerPC ISA 3.1 specific type constraints. -// - -def SDT_PPCSplat32 : SDTypeProfile<1, 3, [ SDTCisVT<0, v2i64>, - SDTCisVec<1>, SDTCisInt<2>, SDTCisInt<3> -]>; -def SDT_PPCAccBuild : SDTypeProfile<1, 4, [ - SDTCisVT<0, v512i1>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>, - SDTCisVT<3, v4i32>, SDTCisVT<4, v4i32> -]>; -def SDT_PPCPairBuild : SDTypeProfile<1, 2, [ - SDTCisVT<0, v256i1>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32> -]>; -def SDT_PPCAccExtractVsx : SDTypeProfile<1, 2, [ - SDTCisVT<0, v4i32>, SDTCisVT<1, v512i1>, SDTCisPtrTy<2> -]>; -def SDT_PPCPairExtractVsx : SDTypeProfile<1, 2, [ - SDTCisVT<0, v4i32>, SDTCisVT<1, v256i1>, SDTCisPtrTy<2> -]>; -def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [ - SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1> -]>; - -//===----------------------------------------------------------------------===// -// ISA 3.1 specific PPCISD nodes. -// - -def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>; -def PPCAccBuild : SDNode<"PPCISD::ACC_BUILD", SDT_PPCAccBuild, []>; -def PPCPairBuild : SDNode<"PPCISD::PAIR_BUILD", SDT_PPCPairBuild, []>; -def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx, - []>; -def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx, - []>; -def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>; - -//===----------------------------------------------------------------------===// - -// PC Relative flag (for instructions that use the address of the prefix for -// address computations). -class isPCRel { bit PCRel = 1; } - -// PowerPC specific type constraints. -def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [ - SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2> -]>; - -// PPC Specific DAG Nodes. -def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX, - [SDNPHasChain, SDNPMayLoad]>; - -// Top-level class for prefixed instructions. -class PI pref, bits<6> opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin> : Instruction { - field bits<64> Inst; - field bits<64> SoftFail = 0; - bit PCRel = 0; // Default value, set by isPCRel. - let Size = 8; - - let Namespace = "PPC"; - let OutOperandList = OOL; - let InOperandList = IOL; - let AsmString = asmstr; - let Itinerary = itin; - let Inst{0-5} = pref; - let Inst{32-37} = opcode; - - bits<1> PPC970_First = 0; - bits<1> PPC970_Single = 0; - bits<1> PPC970_Cracked = 0; - bits<3> PPC970_Unit = 0; - - /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to - /// these must be reflected there! See comments there for what these are. - let TSFlags{0} = PPC970_First; - let TSFlags{1} = PPC970_Single; - let TSFlags{2} = PPC970_Cracked; - let TSFlags{5-3} = PPC970_Unit; - - bits<1> Prefixed = 1; // This is a prefixed instruction. - let TSFlags{7} = Prefixed; - - // For cases where multiple instruction definitions really represent the - // same underlying instruction but with one definition for 64-bit arguments - // and one for 32-bit arguments, this bit breaks the degeneracy between - // the two forms and allows TableGen to generate mapping tables. - bit Interpretation64Bit = 0; - - // Fields used for relation models. - string BaseName = ""; -} - -// VX-Form: [ PO VT R VB RC XO ] -class VXForm_VTB5_RC xo, bits<5> R, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : I<4, OOL, IOL, asmstr, itin> { - bits<5> VT; - bits<5> VB; - bit RC = 0; - - let Pattern = pattern; - - let Inst{6-10} = VT; - let Inst{11-15} = R; - let Inst{16-20} = VB; - let Inst{21} = RC; - let Inst{22-31} = xo; -} - -// Multiclass definition to account for record and non-record form -// instructions of VXRForm. -multiclass VXForm_VTB5_RCr xo, bits<5> R, dag OOL, dag IOL, - string asmbase, string asmstr, - InstrItinClass itin, list pattern> { - let BaseName = asmbase in { - def NAME : VXForm_VTB5_RC, RecFormRel; - let Defs = [CR6] in - def _rec : VXForm_VTB5_RC, isRecordForm, RecFormRel; - } -} - -class MLS_DForm_R_SI34_RTA5_MEM opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<5> FRS; - bits<39> D_RA; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 2; - let Inst{8-10} = 0; - let Inst{11} = PCRel; - let Inst{12-13} = 0; - let Inst{14-31} = D_RA{33-16}; // d0 - - // The instruction. - let Inst{38-42} = FRS{4-0}; - let Inst{43-47} = D_RA{38-34}; // RA - let Inst{48-63} = D_RA{15-0}; // d1 -} - -class MLS_DForm_R_SI34_RTA5 opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<5> RT; - bits<5> RA; - bits<34> SI; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 2; - let Inst{8-10} = 0; - let Inst{11} = PCRel; - let Inst{12-13} = 0; - let Inst{14-31} = SI{33-16}; - - // The instruction. - let Inst{38-42} = RT; - let Inst{43-47} = RA; - let Inst{48-63} = SI{15-0}; -} - -class MLS_DForm_SI34_RT5 opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<5> RT; - bits<34> SI; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 2; - let Inst{8-10} = 0; - let Inst{11} = 0; - let Inst{12-13} = 0; - let Inst{14-31} = SI{33-16}; - - // The instruction. - let Inst{38-42} = RT; - let Inst{43-47} = 0; - let Inst{48-63} = SI{15-0}; -} - -multiclass MLS_DForm_R_SI34_RTA5_p opcode, dag OOL, dag IOL, - dag PCRel_IOL, string asmstr, - InstrItinClass itin> { - def NAME : MLS_DForm_R_SI34_RTA5; - def pc : MLS_DForm_R_SI34_RTA5, isPCRel; -} - -class 8LS_DForm_R_SI34_RTA5 opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<5> RT; - bits<39> D_RA; - - let Pattern = pattern; - - // The prefix. - let Inst{6-10} = 0; - let Inst{11} = PCRel; - let Inst{12-13} = 0; - let Inst{14-31} = D_RA{33-16}; // d0 - - // The instruction. - let Inst{38-42} = RT{4-0}; - let Inst{43-47} = D_RA{38-34}; // RA - let Inst{48-63} = D_RA{15-0}; // d1 -} - -// 8LS:D-Form: [ 1 0 0 // R // d0 -// PO TX T RA d1 ] -class 8LS_DForm_R_SI34_XT6_RA5 opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : PI<1, { opcode, ? }, OOL, IOL, asmstr, itin> { - bits<6> XT; - bits<39> D_RA; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 0; - let Inst{8} = 0; - let Inst{9-10} = 0; // reserved - let Inst{11} = PCRel; - let Inst{12-13} = 0; // reserved - let Inst{14-31} = D_RA{33-16}; // d0 - - // The instruction. - let Inst{37} = XT{5}; - let Inst{38-42} = XT{4-0}; - let Inst{43-47} = D_RA{38-34}; // RA - let Inst{48-63} = D_RA{15-0}; // d1 -} - -// X-Form: [PO T IMM VRB XO TX] -class XForm_XT6_IMM5_VB5 opcode, bits<10> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list pattern> - : I { - bits<6> XT; - bits<5> VRB; - bits<5> IMM; - - let Pattern = pattern; - let Inst{6-10} = XT{4-0}; - let Inst{11-15} = IMM; - let Inst{16-20} = VRB; - let Inst{21-30} = xo; - let Inst{31} = XT{5}; -} - -class 8RR_XX4Form_IMM8_XTAB6 opcode, bits<2> xo, - dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<6> XT; - bits<6> XA; - bits<6> XB; - bits<6> XC; - bits<8> IMM; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 1; - let Inst{8} = 0; - let Inst{9-11} = 0; - let Inst{12-13} = 0; - let Inst{14-23} = 0; - let Inst{24-31} = IMM; - - // The instruction. - let Inst{38-42} = XT{4-0}; - let Inst{43-47} = XA{4-0}; - let Inst{48-52} = XB{4-0}; - let Inst{53-57} = XC{4-0}; - let Inst{58-59} = xo; - let Inst{60} = XC{5}; - let Inst{61} = XA{5}; - let Inst{62} = XB{5}; - let Inst{63} = XT{5}; -} - -class VXForm_RD5_N3_VB5 xo, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : I<4, OOL, IOL, asmstr, itin> { - bits<5> RD; - bits<5> VB; - bits<3> N; - - let Pattern = pattern; - - let Inst{6-10} = RD; - let Inst{11-12} = 0; - let Inst{13-15} = N; - let Inst{16-20} = VB; - let Inst{21-31} = xo; -} - - -// VX-Form: [PO VRT RA VRB XO]. -// Destructive (insert) forms are suffixed with _ins. -class VXForm_VTB5_RA5_ins xo, string opc, list pattern> - : VXForm_1, - RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; - -// VX-Form: [PO VRT RA RB XO]. -// Destructive (insert) forms are suffixed with _ins. -class VXForm_VRT5_RAB5_ins xo, string opc, list pattern> - : VXForm_1, - RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; - -// VX-Form: [ PO BF // VRA VRB XO ] -class VXForm_BF3_VAB5 xo, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : I<4, OOL, IOL, asmstr, itin> { - bits<3> BF; - bits<5> VA; - bits<5> VB; - - let Pattern = pattern; - - let Inst{6-8} = BF; - let Inst{9-10} = 0; - let Inst{11-15} = VA; - let Inst{16-20} = VB; - let Inst{21-31} = xo; -} - -// VN-Form: [PO VRT VRA VRB PS SD XO] -// SD is "Shift Direction" -class VNForm_VTAB5_SD3 xo, bits<2> ps, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : I<4, OOL, IOL, asmstr, itin> { - bits<5> VRT; - bits<5> VRA; - bits<5> VRB; - bits<3> SD; - - let Pattern = pattern; - - let Inst{6-10} = VRT; - let Inst{11-15} = VRA; - let Inst{16-20} = VRB; - let Inst{21-22} = ps; - let Inst{23-25} = SD; - let Inst{26-31} = xo; -} - -class VXForm_RD5_MP_VB5 xo, bits<4> eo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list pattern> - : I<4, OOL, IOL, asmstr, itin> { - bits<5> RD; - bits<5> VB; - bit MP; - - let Pattern = pattern; - - let Inst{6-10} = RD; - let Inst{11-14} = eo; - let Inst{15} = MP; - let Inst{16-20} = VB; - let Inst{21-31} = xo; -} - -// 8RR:D-Form: [ 1 1 0 // // imm0 -// PO T XO TX imm1 ]. -class 8RR_DForm_IMM32_XT6 opcode, bits<4> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<6> XT; - bits<32> IMM32; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 1; - let Inst{8-11} = 0; - let Inst{12-13} = 0; // reserved - let Inst{14-15} = 0; // reserved - let Inst{16-31} = IMM32{31-16}; - - // The instruction. - let Inst{38-42} = XT{4-0}; - let Inst{43-46} = xo; - let Inst{47} = XT{5}; - let Inst{48-63} = IMM32{15-0}; -} - -// 8RR:D-Form: [ 1 1 0 // // imm0 -// PO T XO IX TX imm1 ]. -class 8RR_DForm_IMM32_XT6_IX opcode, bits<3> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<6> XT; - bit IX; - bits<32> IMM32; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 1; - let Inst{8-11} = 0; - let Inst{12-13} = 0; // reserved - let Inst{14-15} = 0; // reserved - let Inst{16-31} = IMM32{31-16}; - - // The instruction. - let Inst{38-42} = XT{4-0}; - let Inst{43-45} = xo; - let Inst{46} = IX; - let Inst{47} = XT{5}; - let Inst{48-63} = IMM32{15-0}; -} - -class 8RR_XX4Form_XTABC6 opcode, bits<2> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<6> XT; - bits<6> XA; - bits<6> XB; - bits<6> XC; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 1; - let Inst{8-11} = 0; - let Inst{12-13} = 0; - let Inst{14-31} = 0; - - // The instruction. - let Inst{38-42} = XT{4-0}; - let Inst{43-47} = XA{4-0}; - let Inst{48-52} = XB{4-0}; - let Inst{53-57} = XC{4-0}; - let Inst{58-59} = xo; - let Inst{60} = XC{5}; - let Inst{61} = XA{5}; - let Inst{62} = XB{5}; - let Inst{63} = XT{5}; -} - -class 8RR_XX4Form_IMM3_XTABC6 opcode, bits<2> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<6> XT; - bits<6> XA; - bits<6> XB; - bits<6> XC; - bits<3> IMM; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 1; - let Inst{8-11} = 0; - let Inst{12-13} = 0; - let Inst{14-28} = 0; - let Inst{29-31} = IMM; - - // The instruction. - let Inst{38-42} = XT{4-0}; - let Inst{43-47} = XA{4-0}; - let Inst{48-52} = XB{4-0}; - let Inst{53-57} = XC{4-0}; - let Inst{58-59} = xo; - let Inst{60} = XC{5}; - let Inst{61} = XA{5}; - let Inst{62} = XB{5}; - let Inst{63} = XT{5}; -} - -// [PO BF / XO2 B XO BX /] -class XX2_BF3_XO5_XB6_XO9 opcode, bits<5> xo2, bits<9> xo, dag OOL, - dag IOL, string asmstr, InstrItinClass itin, - list pattern> - : I { - bits<3> BF; - bits<6> XB; - - let Pattern = pattern; - - let Inst{6-8} = BF; - let Inst{9-10} = 0; - let Inst{11-15} = xo2; - let Inst{16-20} = XB{4-0}; - let Inst{21-29} = xo; - let Inst{30} = XB{5}; - let Inst{31} = 0; -} - -// X-Form: [ PO RT BI /// XO / ] -class XForm_XT5_BI5 opcode, bits<10> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list pattern> - : XForm_base_r3xo { - let B = 0; -} - -multiclass MLS_DForm_R_SI34_RTA5_MEM_p opcode, dag OOL, dag IOL, - dag PCRel_IOL, string asmstr, - InstrItinClass itin> { - def NAME : MLS_DForm_R_SI34_RTA5_MEM; - def pc : MLS_DForm_R_SI34_RTA5_MEM, - isPCRel; -} - -multiclass 8LS_DForm_R_SI34_RTA5_p opcode, dag OOL, dag IOL, - dag PCRel_IOL, string asmstr, - InstrItinClass itin> { - def NAME : 8LS_DForm_R_SI34_RTA5; - def pc : 8LS_DForm_R_SI34_RTA5, isPCRel; -} - -multiclass 8LS_DForm_R_SI34_XT6_RA5_p opcode, dag OOL, dag IOL, - dag PCRel_IOL, string asmstr, - InstrItinClass itin> { - def NAME : 8LS_DForm_R_SI34_XT6_RA5; - def pc : 8LS_DForm_R_SI34_XT6_RA5, - isPCRel; -} - -def PPCRegVSRpRCAsmOperand : AsmOperandClass { - let Name = "RegVSRpRC"; let PredicateMethod = "isVSRpEvenRegNumber"; -} - -def vsrprc : RegisterOperand { - let ParserMatchClass = PPCRegVSRpRCAsmOperand; -} - -def PPCRegVSRpEvenRCAsmOperand : AsmOperandClass { - let Name = "RegVSRpEvenRC"; let PredicateMethod = "isVSRpEvenRegNumber"; -} - -def vsrpevenrc : RegisterOperand { - let ParserMatchClass = PPCRegVSRpEvenRCAsmOperand; - let EncoderMethod = "getVSRpEvenEncoding"; - let DecoderMethod = "decodeVSRpEvenOperands"; -} - -class DQForm_XTp5_RA17_MEM opcode, bits<4> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list pattern> - : I { - bits<5> XTp; - bits<17> DQ_RA; - let Pattern = pattern; - - let Inst{6-9} = XTp{3-0}; - let Inst{10} = XTp{4}; - let Inst{11-15} = DQ_RA{16-12}; // Register # - let Inst{16-27} = DQ_RA{11-0}; // Displacement. - let Inst{28-31} = xo; -} - -class XForm_XTp5_XAB5 opcode, bits<10> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list pattern> - : I, XFormMemOp { - bits<5> XTp; - bits<5> A; - bits<5> B; - - let Pattern = pattern; - let Inst{6-9} = XTp{3-0}; - let Inst{10} = XTp{4}; - let Inst{11-15} = A; - let Inst{16-20} = B; - let Inst{21-30} = xo; - let Inst{31} = 0; -} - -class 8LS_DForm_R_XTp5_SI34_MEM opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<5> XTp; - bits<39> D_RA; - - let Pattern = pattern; - - // The prefix. - let Inst{6-10} = 0; - let Inst{11} = PCRel; - let Inst{12-13} = 0; - let Inst{14-31} = D_RA{33-16}; // Imm18 - - // The instruction. - let Inst{38-41} = XTp{3-0}; - let Inst{42} = XTp{4}; - let Inst{43-47} = D_RA{38-34}; // Register # - let Inst{48-63} = D_RA{15-0}; // D -} - -multiclass 8LS_DForm_R_XTp5_SI34_MEM_p opcode, dag OOL, - dag IOL, dag PCRel_IOL, - string asmstr, InstrItinClass itin> { - def NAME : 8LS_DForm_R_XTp5_SI34_MEM; - def pc : 8LS_DForm_R_XTp5_SI34_MEM, - isPCRel; -} - -def PPCRegACCRCAsmOperand : AsmOperandClass { - let Name = "RegACCRC"; let PredicateMethod = "isACCRegNumber"; -} - -def acc : RegisterOperand { - let ParserMatchClass = PPCRegACCRCAsmOperand; -} - -def uacc : RegisterOperand { - let ParserMatchClass = PPCRegACCRCAsmOperand; -} - -// [PO AS XO2 XO] -class XForm_AT3 opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list pattern> - : I { - bits<3> AT; - - let Pattern = pattern; - - let Inst{6-8} = AT; - let Inst{9-10} = 0; - let Inst{11-15} = xo2; - let Inst{16-20} = 0; - let Inst{21-30} = xo; - let Inst{31} = 0; -} - -class XX3Form_AT3_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list pattern> - : I { - bits<3> AT; - bits<6> XA; - bits<6> XB; - - let Pattern = pattern; - - let Inst{6-8} = AT; - let Inst{9-10} = 0; - let Inst{11-15} = XA{4-0}; - let Inst{16-20} = XB{4-0}; - let Inst{21-28} = xo; - let Inst{29} = XA{5}; - let Inst{30} = XB{5}; - let Inst{31} = 0; -} - -class MMIRR_XX3Form_XY4P2_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<3> AT; - bits<6> XA; - bits<6> XB; - bits<4> XMSK; - bits<4> YMSK; - bits<2> PMSK; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 3; - let Inst{8-11} = 9; - let Inst{12-15} = 0; - let Inst{16-17} = PMSK; - let Inst{18-23} = 0; - let Inst{24-27} = XMSK; - let Inst{28-31} = YMSK; - - // The instruction. - let Inst{38-40} = AT; - let Inst{41-42} = 0; - let Inst{43-47} = XA{4-0}; - let Inst{48-52} = XB{4-0}; - let Inst{53-60} = xo; - let Inst{61} = XA{5}; - let Inst{62} = XB{5}; - let Inst{63} = 0; -} - -class MMIRR_XX3Form_XY4_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<3> AT; - bits<6> XA; - bits<6> XB; - bits<4> XMSK; - bits<4> YMSK; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 3; - let Inst{8-11} = 9; - let Inst{12-23} = 0; - let Inst{24-27} = XMSK; - let Inst{28-31} = YMSK; - - // The instruction. - let Inst{38-40} = AT; - let Inst{41-42} = 0; - let Inst{43-47} = XA{4-0}; - let Inst{48-52} = XB{4-0}; - let Inst{53-60} = xo; - let Inst{61} = XA{5}; - let Inst{62} = XB{5}; - let Inst{63} = 0; -} - -class MMIRR_XX3Form_X4Y2_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<3> AT; - bits<6> XA; - bits<6> XB; - bits<4> XMSK; - bits<2> YMSK; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 3; - let Inst{8-11} = 9; - let Inst{12-23} = 0; - let Inst{24-27} = XMSK; - let Inst{28-29} = YMSK; - let Inst{30-31} = 0; - - // The instruction. - let Inst{38-40} = AT; - let Inst{41-42} = 0; - let Inst{43-47} = XA{4-0}; - let Inst{48-52} = XB{4-0}; - let Inst{53-60} = xo; - let Inst{61} = XA{5}; - let Inst{62} = XB{5}; - let Inst{63} = 0; -} - -class MMIRR_XX3Form_XY4P8_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<3> AT; - bits<6> XA; - bits<6> XB; - bits<4> XMSK; - bits<4> YMSK; - bits<8> PMSK; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 3; - let Inst{8-11} = 9; - let Inst{12-15} = 0; - let Inst{16-23} = PMSK; - let Inst{24-27} = XMSK; - let Inst{28-31} = YMSK; - - // The instruction. - let Inst{38-40} = AT; - let Inst{41-42} = 0; - let Inst{43-47} = XA{4-0}; - let Inst{48-52} = XB{4-0}; - let Inst{53-60} = xo; - let Inst{61} = XA{5}; - let Inst{62} = XB{5}; - let Inst{63} = 0; -} - -class MMIRR_XX3Form_XYP4_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<3> AT; - bits<6> XA; - bits<6> XB; - bits<4> XMSK; - bits<4> YMSK; - bits<4> PMSK; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 3; - let Inst{8-11} = 9; - let Inst{12-15} = 0; - let Inst{16-19} = PMSK; - let Inst{20-23} = 0; - let Inst{24-27} = XMSK; - let Inst{28-31} = YMSK; - - // The instruction. - let Inst{38-40} = AT; - let Inst{41-42} = 0; - let Inst{43-47} = XA{4-0}; - let Inst{48-52} = XB{4-0}; - let Inst{53-60} = xo; - let Inst{61} = XA{5}; - let Inst{62} = XB{5}; - let Inst{63} = 0; -} - -def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">; -def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">; -def PairedVectorMemops : Predicate<"Subtarget->pairedVectorMemops()">; -def MMA : Predicate<"Subtarget->hasMMA()">; - -def RCCp { - dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC); - dag BToVSRC = (COPY_TO_REGCLASS $XB, VSRC); -} - -let Predicates = [PrefixInstrs] in { - let Interpretation64Bit = 1, isCodeGenOnly = 1 in { - defm PADDI8 : - MLS_DForm_R_SI34_RTA5_p<14, (outs g8rc:$RT), (ins g8rc:$RA, s34imm:$SI), - (ins immZero:$RA, s34imm_pcrel:$SI), - "paddi $RT, $RA, $SI", IIC_LdStLFD>; - let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { - def PLI8 : MLS_DForm_SI34_RT5<14, (outs g8rc:$RT), - (ins s34imm:$SI), - "pli $RT, $SI", IIC_IntSimple, []>; - } - } - defm PADDI : - MLS_DForm_R_SI34_RTA5_p<14, (outs gprc:$RT), (ins gprc:$RA, s34imm:$SI), - (ins immZero:$RA, s34imm_pcrel:$SI), - "paddi $RT, $RA, $SI", IIC_LdStLFD>; - let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { - def PLI : MLS_DForm_SI34_RT5<14, (outs gprc:$RT), - (ins s34imm:$SI), - "pli $RT, $SI", IIC_IntSimple, []>; - } - - let mayLoad = 1, mayStore = 0 in { - defm PLXV : - 8LS_DForm_R_SI34_XT6_RA5_p<25, (outs vsrc:$XT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plxv $XT, $D_RA", - IIC_LdStLFD>; - defm PLFS : - MLS_DForm_R_SI34_RTA5_MEM_p<48, (outs f4rc:$FRT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plfs $FRT, $D_RA", - IIC_LdStLFD>; - defm PLFD : - MLS_DForm_R_SI34_RTA5_MEM_p<50, (outs f8rc:$FRT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plfd $FRT, $D_RA", - IIC_LdStLFD>; - defm PLXSSP : - 8LS_DForm_R_SI34_RTA5_p<43, (outs vfrc:$VRT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plxssp $VRT, $D_RA", - IIC_LdStLFD>; - defm PLXSD : - 8LS_DForm_R_SI34_RTA5_p<42, (outs vfrc:$VRT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plxsd $VRT, $D_RA", - IIC_LdStLFD>; - let Interpretation64Bit = 1, isCodeGenOnly = 1 in { - defm PLBZ8 : - MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs g8rc:$RT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plbz $RT, $D_RA", - IIC_LdStLFD>; - defm PLHZ8 : - MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs g8rc:$RT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plhz $RT, $D_RA", - IIC_LdStLFD>; - defm PLHA8 : - MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs g8rc:$RT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plha $RT, $D_RA", - IIC_LdStLFD>; - defm PLWA8 : - 8LS_DForm_R_SI34_RTA5_p<41, (outs g8rc:$RT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plwa $RT, $D_RA", - IIC_LdStLFD>; - defm PLWZ8 : - MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs g8rc:$RT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plwz $RT, $D_RA", - IIC_LdStLFD>; - } - defm PLBZ : - MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs gprc:$RT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plbz $RT, $D_RA", - IIC_LdStLFD>; - defm PLHZ : - MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs gprc:$RT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plhz $RT, $D_RA", - IIC_LdStLFD>; - defm PLHA : - MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs gprc:$RT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plha $RT, $D_RA", - IIC_LdStLFD>; - defm PLWZ : - MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs gprc:$RT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plwz $RT, $D_RA", - IIC_LdStLFD>; - defm PLWA : - 8LS_DForm_R_SI34_RTA5_p<41, (outs gprc:$RT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plwa $RT, $D_RA", - IIC_LdStLFD>; - defm PLD : - 8LS_DForm_R_SI34_RTA5_p<57, (outs g8rc:$RT), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "pld $RT, $D_RA", - IIC_LdStLFD>; - } - - let mayStore = 1, mayLoad = 0 in { - defm PSTXV : - 8LS_DForm_R_SI34_XT6_RA5_p<27, (outs), (ins vsrc:$XS, memri34:$D_RA), - (ins vsrc:$XS, memri34_pcrel:$D_RA), - "pstxv $XS, $D_RA", IIC_LdStLFD>; - defm PSTFS : - MLS_DForm_R_SI34_RTA5_MEM_p<52, (outs), (ins f4rc:$FRS, memri34:$D_RA), - (ins f4rc:$FRS, memri34_pcrel:$D_RA), - "pstfs $FRS, $D_RA", IIC_LdStLFD>; - defm PSTFD : - MLS_DForm_R_SI34_RTA5_MEM_p<54, (outs), (ins f8rc:$FRS, memri34:$D_RA), - (ins f8rc:$FRS, memri34_pcrel:$D_RA), - "pstfd $FRS, $D_RA", IIC_LdStLFD>; - defm PSTXSSP : - 8LS_DForm_R_SI34_RTA5_p<47, (outs), (ins vfrc:$VRS, memri34:$D_RA), - (ins vfrc:$VRS, memri34_pcrel:$D_RA), - "pstxssp $VRS, $D_RA", IIC_LdStLFD>; - defm PSTXSD : - 8LS_DForm_R_SI34_RTA5_p<46, (outs), (ins vfrc:$VRS, memri34:$D_RA), - (ins vfrc:$VRS, memri34_pcrel:$D_RA), - "pstxsd $VRS, $D_RA", IIC_LdStLFD>; - let Interpretation64Bit = 1, isCodeGenOnly = 1 in { - defm PSTB8 : - MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins g8rc:$RS, memri34:$D_RA), - (ins g8rc:$RS, memri34_pcrel:$D_RA), - "pstb $RS, $D_RA", IIC_LdStLFD>; - defm PSTH8 : - MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins g8rc:$RS, memri34:$D_RA), - (ins g8rc:$RS, memri34_pcrel:$D_RA), - "psth $RS, $D_RA", IIC_LdStLFD>; - defm PSTW8 : - MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins g8rc:$RS, memri34:$D_RA), - (ins g8rc:$RS, memri34_pcrel:$D_RA), - "pstw $RS, $D_RA", IIC_LdStLFD>; - } - defm PSTB : - MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins gprc:$RS, memri34:$D_RA), - (ins gprc:$RS, memri34_pcrel:$D_RA), - "pstb $RS, $D_RA", IIC_LdStLFD>; - defm PSTH : - MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins gprc:$RS, memri34:$D_RA), - (ins gprc:$RS, memri34_pcrel:$D_RA), - "psth $RS, $D_RA", IIC_LdStLFD>; - defm PSTW : - MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins gprc:$RS, memri34:$D_RA), - (ins gprc:$RS, memri34_pcrel:$D_RA), - "pstw $RS, $D_RA", IIC_LdStLFD>; - defm PSTD : - 8LS_DForm_R_SI34_RTA5_p<61, (outs), (ins g8rc:$RS, memri34:$D_RA), - (ins g8rc:$RS, memri34_pcrel:$D_RA), - "pstd $RS, $D_RA", IIC_LdStLFD>; - } -} - -// Multiclass definitions for MMA accumulator instructions. -// ---------------------------------------------------------------------------- - -// Defines 2 unmasked instructions where the xo field for acc/non-acc version -// is even/odd. -multiclass ACC_UM_XOEO opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - let Predicates = [MMA] in { - def NAME : - XX3Form_AT3_XAB6, - RegConstraint<"@earlyclobber $AT">; - def PP : - XX3Form_AT3_XAB6, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits. -// The XO field for acc/non-acc version is even/odd. -multiclass ACC_UM_M844_XOEO opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - defm NAME : ACC_UM_XOEO; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XY4P8_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XY4P8_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits. -// The XO field for acc/non-acc version is even/odd. -multiclass ACC_UM_M444_XOEO opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - defm NAME : ACC_UM_XOEO; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XYP4_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XYP4_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits. -// The XO field for acc/non-acc version is even/odd. -multiclass ACC_UM_M244_XOEO opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - defm NAME : ACC_UM_XOEO; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits. -// Upper nibble of XO field for acc/non-acc version is 0x4/0x6. -multiclass ACC_UM_M244_XO46 opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - let Predicates = [MMA] in { - def NAME : - XX3Form_AT3_XAB6, - RegConstraint<"@earlyclobber $AT">; - def PP : - XX3Form_AT3_XAB6< - opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, xo, (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0x20), (outs acc:$AT), - !con((ins acc:$ATi), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4 -// bits. Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. -multiclass ACC_NEG_UM_M244_XOM84C opcode, bits<8> xo, dag IOL, - string asmbase, string asmstr> { - defm NAME : ACC_UM_M244_XOEO; - let Predicates = [MMA] in { - def PN : XX3Form_AT3_XAB6< - opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NP : XX3Form_AT3_XAB6< - opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NN : XX3Form_AT3_XAB6< - opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME#PN : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0x80), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NP : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0x40), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NN : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0xC0), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 5 instructions, unmasked, operand negating. -// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. -multiclass ACC_NEG_UM_XOM84C opcode, bits<8> xo, dag IOL, - string asmbase, string asmstr> { - defm NAME : ACC_UM_XOEO; - let Predicates = [MMA] in { - def PN : XX3Form_AT3_XAB6, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NP : XX3Form_AT3_XAB6, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NN : XX3Form_AT3_XAB6, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits. -// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. -multiclass ACC_NEG_UM_M44_XOM84C opcode, bits<8> xo, dag IOL, - string asmbase, string asmstr> { - defm NAME : ACC_NEG_UM_XOM84C; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XY4_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XY4_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#PN : - MMIRR_XX3Form_XY4_XAB6< - opcode, !or(xo, 0x80), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), - !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NP : - MMIRR_XX3Form_XY4_XAB6< - opcode, !or(xo, 0x40), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), - !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NN : - MMIRR_XX3Form_XY4_XAB6< - opcode, !or(xo, 0xC0), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), - !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits. -// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. -multiclass ACC_NEG_UM_M42_XOM84C opcode, bits<8> xo, dag IOL, - string asmbase, string asmstr> { - defm NAME : ACC_NEG_UM_XOM84C; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#PN : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, !or(xo, 0x80), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), - !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NP : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, !or(xo, 0x40), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), - !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NN : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, !or(xo, 0xC0), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), - !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// End of class definitions. -//----------------------------------------------------------------------------- - -let Predicates = [MMA] in { - def XXMFACC : - XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS", - IIC_VecGeneral, - [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>, - RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">; - def XXMTACC : - XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT", - IIC_VecGeneral, - [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp), - "#KILL_PAIR", []>, - RegConstraint<"$XTp = $XSp">; - def BUILD_UACC : PPCPostRAExpPseudo<(outs acc:$AT), (ins uacc:$AS), - "#BUILD_UACC $AT, $AS", []>; - // We define XXSETACCZ as rematerializable to undo CSE of that intrinsic in - // the backend. We avoid CSE here because it generates a copy of the acc - // register and this copy is more expensive than calling the intrinsic again. - let isAsCheapAsAMove = 1, isReMaterializable = 1 in { - def XXSETACCZ : - XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral, - [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>; - } - def XVI8GER4SPP : - XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB), - "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - let mayStore = 1 in { - def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst), - "#SPILL_ACC", []>; - def SPILL_UACC: PPCEmitTimePseudo<(outs), (ins uacc:$AT, memrix16:$dst), - "#SPILL_UACC", []>; - } - let mayLoad = 1, hasSideEffects = 0 in { - def RESTORE_ACC: PPCEmitTimePseudo<(outs acc:$AT), (ins memrix16:$src), - "#RESTORE_ACC", []>; - def RESTORE_UACC: PPCEmitTimePseudo<(outs uacc:$AT), (ins memrix16:$src), - "#RESTORE_UACC", []>; - } -} - -let Predicates = [MMA, PrefixInstrs] in { - def PMXVI8GER4SPP : - MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT), - (ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK, - u4imm:$YMSK, u4imm:$PMSK), - "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK", - IIC_VecGeneral, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; -} - -// MMA accumulating/non-accumulating instructions. -//------------------------------------------------------------------------------ - -// XVBF16GER2, XVBF16GER2PP, XVBF16GER2PN, XVBF16GER2NP, XVBF16GER2NN -// PMXVBF16GER2, PMXVBF16GER2PP, PMXVBF16GER2PN, PMXVBF16GER2NP, PMXVBF16GER2NN -defm XVBF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 50, (ins vsrc:$XA, vsrc:$XB), - "xvbf16ger2", "$AT, $XA, $XB">; - -// XVI4GER8, XVI4GER8PP, PMXVI4GER8, PMXVI4GER8PP -defm XVI4GER8 : ACC_UM_M844_XOEO<59, 34, (ins vsrc:$XA, vsrc:$XB), - "xvi4ger8", "$AT, $XA, $XB">; - -// XVI8GER4, XVI8GER4PP, PMXVI8GER4, PMXVI8GER4PP -defm XVI8GER4 : ACC_UM_M444_XOEO<59, 2, (ins vsrc:$XA, vsrc:$XB), - "xvi8ger4", "$AT, $XA, $XB">; - -// XVI16GER2, XVI16GER2PP, PMXVI16GER2, PMXVI16GER2PP -defm XVI16GER2 : ACC_UM_M244_XO46<59, 75, (ins vsrc:$XA, vsrc:$XB), - "xvi16ger2", "$AT, $XA, $XB">; - -// XVI16GER2S, XVI16GER2SPP, PMXVI16GER2S, PMXVI16GER2SPP -defm XVI16GER2S : ACC_UM_M244_XOEO<59, 42, (ins vsrc:$XA, vsrc:$XB), - "xvi16ger2s", "$AT, $XA, $XB">; - -// XVF16GER2, XVF16GER2PP, XVF16GER2PN, XVF16GER2NP, XVF16GER2NN -// PMXVF16GER2, PMXVF16GER2PP, PMXVF16GER2PN, PMXVF16GER2NP, PMXVF16GER2NN -defm XVF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 18, (ins vsrc:$XA, vsrc:$XB), - "xvf16ger2", "$AT, $XA, $XB">; - -// XVF32GER, XVF32GERPP, XVF32GERPN, XVF32GERNP, XVF32GERPP -// PMXVF32GER, PMXVF32GERPP, PMXVF32GERPN, PMXVF32GERNP, PMXVF32GERPP -defm XVF32GER : ACC_NEG_UM_M44_XOM84C<59, 26, (ins vsrc:$XA, vsrc:$XB), - "xvf32ger", "$AT, $XA, $XB">; - -// XVF64GER, XVF64GERPP, XVF64GERPN, XVF64GERNP, XVF64GERNN -// PMXVF64GER, PMXVF64GERPP, PMXVF64GERPN, PMXVF64GERNP, PMXVF64GERNN -defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB), - "xvf64ger", "$AT, $XA, $XB">; -//------------------------------------------------------------------------------ - -// MMA Intrinsics -let Predicates = [MMA] in { - def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)), - (XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)), - (XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)), - (XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)), - (XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)), - (XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)), - (XVF64GER $XA, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)), - (XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), - (XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)), - (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), - (XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)), - (XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)), - (XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; -} - -// MMA Intrinsics -let Predicates = [MMA, PrefixInstrs] in { - def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk8Imm:$PMSK)), - (PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk8Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk8Imm:$PMSK)), - (PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk8Imm:$PMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk4Imm:$PMSK)), - (PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk4Imm:$PMSK)), - (PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)), - (PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)), - (PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)), - (PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK)), - (PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK)), - (PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK)), - (PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK)), - (PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)), - (PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk2Imm:$YMSK)), - (PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk2Imm:$YMSK)), - (PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk2Imm:$YMSK)), - (PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk2Imm:$YMSK)), - (PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)), - (PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)), - (PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; -} - -def Concats { - dag VecsToVecPair0 = - (v256i1 (INSERT_SUBREG - (INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1), - $vs1, sub_vsx0)); - dag VecsToVecPair1 = - (v256i1 (INSERT_SUBREG - (INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1), - $vs3, sub_vsx0)); - dag VecsToVecQuad = - (BUILD_UACC (INSERT_SUBREG - (INSERT_SUBREG (v512i1 (IMPLICIT_DEF)), - (KILL_PAIR VecsToVecPair0), sub_pair0), - (KILL_PAIR VecsToVecPair1), sub_pair1)); -} - -def Extracts { - dag Pair0 = (v256i1 (EXTRACT_SUBREG $v, sub_pair0)); - dag Pair1 = (v256i1 (EXTRACT_SUBREG $v, sub_pair1)); - dag Vec0 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx0)); - dag Vec1 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx1)); - dag Vec2 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx0)); - dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1)); -} - -let Predicates = [MMA] in { - def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)), - (XXMTACC Concats.VecsToVecQuad)>; - def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0, - v16i8:$vs3, v16i8:$vs2)), - (XXMTACC Concats.VecsToVecQuad)>; - def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>; - def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 0)), - Extracts.Vec0>; - def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 1)), - Extracts.Vec1>; - def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 2)), - Extracts.Vec2>; - def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 3)), - Extracts.Vec3>; -} - -let Predicates = [PairedVectorMemops] in { - def : Pat<(v256i1 (PPCPairBuild v4i32:$vs1, v4i32:$vs0)), - Concats.VecsToVecPair0>; - def : Pat<(v256i1 (int_ppc_vsx_assemble_pair v16i8:$vs1, v16i8:$vs0)), - Concats.VecsToVecPair0>; - def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, 0)), - (v4i32 (EXTRACT_SUBREG $v, sub_vsx0))>; - def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, 1)), - (v4i32 (EXTRACT_SUBREG $v, sub_vsx1))>; -} - -let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops] in { - def LXVP : DQForm_XTp5_RA17_MEM<6, 0, (outs vsrprc:$XTp), - (ins memrix16:$DQ_RA), "lxvp $XTp, $DQ_RA", - IIC_LdStLFD, []>; - def LXVPX : XForm_XTp5_XAB5<31, 333, (outs vsrprc:$XTp), (ins memrr:$src), - "lxvpx $XTp, $src", IIC_LdStLFD, - []>; -} - -let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops] in { - def STXVP : DQForm_XTp5_RA17_MEM<6, 1, (outs), (ins vsrprc:$XTp, - memrix16:$DQ_RA), "stxvp $XTp, $DQ_RA", - IIC_LdStLFD, []>; - def STXVPX : XForm_XTp5_XAB5<31, 461, (outs), (ins vsrprc:$XTp, memrr:$dst), - "stxvpx $XTp, $dst", IIC_LdStLFD, - []>; -} - -let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops, PrefixInstrs] in { - defm PLXVP : - 8LS_DForm_R_XTp5_SI34_MEM_p<58, (outs vsrprc:$XTp), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plxvp $XTp, $D_RA", - IIC_LdStLFD>; -} - -let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] in { - defm PSTXVP : - 8LS_DForm_R_XTp5_SI34_MEM_p<62, (outs), (ins vsrprc:$XTp, memri34:$D_RA), - (ins vsrprc:$XTp, memri34_pcrel:$D_RA), - "pstxvp $XTp, $D_RA", IIC_LdStLFD>; -} - -let Predicates = [PairedVectorMemops] in { - // Intrinsics for Paired Vector Loads. - def : Pat<(v256i1 (int_ppc_vsx_lxvp DQForm:$src)), (LXVP memrix16:$src)>; - def : Pat<(v256i1 (int_ppc_vsx_lxvp XForm:$src)), (LXVPX XForm:$src)>; - let Predicates = [PairedVectorMemops, PrefixInstrs] in { - def : Pat<(v256i1 (int_ppc_vsx_lxvp PDForm:$src)), (PLXVP memri34:$src)>; - } - // Intrinsics for Paired Vector Stores. - def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, DQForm:$dst), - (STXVP $XSp, memrix16:$dst)>; - def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, XForm:$dst), - (STXVPX $XSp, XForm:$dst)>; - let Predicates = [PairedVectorMemops, PrefixInstrs] in { - def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, PDForm:$dst), - (PSTXVP $XSp, memri34:$dst)>; - } -} - -let Predicates = [PCRelativeMemops] in { - // Load i32 - def : Pat<(i32 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))), - (PLBZpc $ga, 0)>; - def : Pat<(i32 (extloadi1 (PPCmatpcreladdr PCRelForm:$ga))), - (PLBZpc $ga, 0)>; - def : Pat<(i32 (zextloadi8 (PPCmatpcreladdr PCRelForm:$ga))), - (PLBZpc $ga, 0)>; - def : Pat<(i32 (extloadi8 (PPCmatpcreladdr PCRelForm:$ga))), - (PLBZpc $ga, 0)>; - def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), - (PLHApc $ga, 0)>; - def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), - (PLHZpc $ga, 0)>; - def : Pat<(i32 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))), - (PLHZpc $ga, 0)>; - def : Pat<(i32 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLWZpc $ga, 0)>; - - // Store i32 - def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)), - (PSTBpc $RS, $ga, 0)>; - def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)), - (PSTHpc $RS, $ga, 0)>; - def : Pat<(store i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)), - (PSTWpc $RS, $ga, 0)>; - - // Load i64 - def : Pat<(i64 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))), - (PLBZ8pc $ga, 0)>; - def : Pat<(i64 (extloadi1 (PPCmatpcreladdr PCRelForm:$ga))), - (PLBZ8pc $ga, 0)>; - def : Pat<(i64 (zextloadi8 (PPCmatpcreladdr PCRelForm:$ga))), - (PLBZ8pc $ga, 0)>; - def : Pat<(i64 (extloadi8 (PPCmatpcreladdr PCRelForm:$ga))), - (PLBZ8pc $ga, 0)>; - def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), - (PLHA8pc $ga, 0)>; - def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), - (PLHZ8pc $ga, 0)>; - def : Pat<(i64 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))), - (PLHZ8pc $ga, 0)>; - def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr PCRelForm:$ga))), - (PLWZ8pc $ga, 0)>; - def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr PCRelForm:$ga))), - (PLWA8pc $ga, 0)>; - def : Pat<(i64 (extloadi32 (PPCmatpcreladdr PCRelForm:$ga))), - (PLWZ8pc $ga, 0)>; - def : Pat<(i64 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLDpc $ga, 0)>; - - // Store i64 - def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), - (PSTB8pc $RS, $ga, 0)>; - def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), - (PSTH8pc $RS, $ga, 0)>; - def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), - (PSTW8pc $RS, $ga, 0)>; - def : Pat<(store i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), - (PSTDpc $RS, $ga, 0)>; - - // Load f32 - def : Pat<(f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFSpc $addr, 0)>; - - // Store f32 - def : Pat<(store f32:$FRS, (PPCmatpcreladdr PCRelForm:$ga)), - (PSTFSpc $FRS, $ga, 0)>; - - // Load f64 - def : Pat<(f64 (extloadf32 (PPCmatpcreladdr PCRelForm:$addr))), - (COPY_TO_REGCLASS (PLFSpc $addr, 0), VSFRC)>; - def : Pat<(f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFDpc $addr, 0)>; - - // Store f64 - def : Pat<(store f64:$FRS, (PPCmatpcreladdr PCRelForm:$ga)), - (PSTFDpc $FRS, $ga, 0)>; - - // Load f128 - def : Pat<(f128 (load (PPCmatpcreladdr PCRelForm:$addr))), - (COPY_TO_REGCLASS (PLXVpc $addr, 0), VRRC)>; - - // Store f128 - def : Pat<(store f128:$XS, (PPCmatpcreladdr PCRelForm:$ga)), - (PSTXVpc (COPY_TO_REGCLASS $XS, VSRC), $ga, 0)>; - - // Load v4i32 - def : Pat<(v4i32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; - - // Store v4i32 - def : Pat<(store v4i32:$XS, (PPCmatpcreladdr PCRelForm:$ga)), - (PSTXVpc $XS, $ga, 0)>; - - // Load v2i64 - def : Pat<(v2i64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; - - // Store v2i64 - def : Pat<(store v2i64:$XS, (PPCmatpcreladdr PCRelForm:$ga)), - (PSTXVpc $XS, $ga, 0)>; - - // Load v4f32 - def : Pat<(v4f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; - - // Store v4f32 - def : Pat<(store v4f32:$XS, (PPCmatpcreladdr PCRelForm:$ga)), - (PSTXVpc $XS, $ga, 0)>; - - // Load v2f64 - def : Pat<(v2f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; - - // Store v2f64 - def : Pat<(store v2f64:$XS, (PPCmatpcreladdr PCRelForm:$ga)), - (PSTXVpc $XS, $ga, 0)>; - - // Atomic Load - def : Pat<(atomic_load_8 (PPCmatpcreladdr PCRelForm:$ga)), - (PLBZpc $ga, 0)>; - def : Pat<(atomic_load_16 (PPCmatpcreladdr PCRelForm:$ga)), - (PLHZpc $ga, 0)>; - def : Pat<(atomic_load_32 (PPCmatpcreladdr PCRelForm:$ga)), - (PLWZpc $ga, 0)>; - def : Pat<(atomic_load_64 (PPCmatpcreladdr PCRelForm:$ga)), - (PLDpc $ga, 0)>; - - // Atomic Store - def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS), - (PSTBpc $RS, $ga, 0)>; - def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS), - (PSTHpc $RS, $ga, 0)>; - def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS), - (PSTWpc $RS, $ga, 0)>; - def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), - (PSTB8pc $RS, $ga, 0)>; - def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), - (PSTH8pc $RS, $ga, 0)>; - def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), - (PSTW8pc $RS, $ga, 0)>; - def : Pat<(atomic_store_64 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), - (PSTDpc $RS, $ga, 0)>; - - // Special Cases For PPCstore_scal_int_from_vsr - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), - (PPCmatpcreladdr PCRelForm:$dst), 8), - (PSTXSDpc (XSCVDPSXDS f64:$src), $dst, 0)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), - (PPCmatpcreladdr PCRelForm:$dst), 8), - (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), $dst, 0)>; - - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), - (PPCmatpcreladdr PCRelForm:$dst), 8), - (PSTXSDpc (XSCVDPUXDS f64:$src), $dst, 0)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), - (PPCmatpcreladdr PCRelForm:$dst), 8), - (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), $dst, 0)>; - - def : Pat<(v4f32 (PPCldvsxlh (PPCmatpcreladdr PCRelForm:$addr))), - (SUBREG_TO_REG (i64 1), (PLFDpc $addr, 0), sub_64)>; - - // If the PPCmatpcreladdr node is not caught by any other pattern it should be - // caught here and turned into a paddi instruction to materialize the address. - def : Pat<(PPCmatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>; - // PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize - // tls global address with paddi instruction. - def : Pat<(PPCtlsdynamatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>; - // PPCtlslocalexecmataddr node is used for TLS local exec models to - // materialize tls global address with paddi instruction. - def : Pat<(PPCaddTls i64:$in, (PPCtlslocalexecmataddr tglobaltlsaddr:$addr)), - (PADDI8 $in, $addr)>; -} - -let Predicates = [PrefixInstrs] in { - def XXPERMX : - 8RR_XX4Form_IMM3_XTABC6<34, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, - vsrc:$XC, u3imm:$UIM), - "xxpermx $XT, $XA, $XB, $XC, $UIM", - IIC_VecPerm, []>; - def XXBLENDVB : - 8RR_XX4Form_XTABC6<33, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, - vsrc:$XC), "xxblendvb $XT, $XA, $XB, $XC", - IIC_VecGeneral, []>; - def XXBLENDVH : - 8RR_XX4Form_XTABC6<33, 1, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, - vsrc:$XC), "xxblendvh $XT, $XA, $XB, $XC", - IIC_VecGeneral, []>; - def XXBLENDVW : - 8RR_XX4Form_XTABC6<33, 2, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, - vsrc:$XC), "xxblendvw $XT, $XA, $XB, $XC", - IIC_VecGeneral, []>; - def XXBLENDVD : - 8RR_XX4Form_XTABC6<33, 3, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, - vsrc:$XC), "xxblendvd $XT, $XA, $XB, $XC", - IIC_VecGeneral, []>; -} - -// XXSPLTIW/DP/32DX need extra flags to make sure the compiler does not attempt -// to spill part of the instruction when the values are similar. -let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, Predicates = [PrefixInstrs] in { - def XXSPLTIW : 8RR_DForm_IMM32_XT6<32, 3, (outs vsrc:$XT), - (ins i32imm:$IMM32), - "xxspltiw $XT, $IMM32", IIC_VecGeneral, - []>; - def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT), - (ins i32imm:$IMM32), - "xxspltidp $XT, $IMM32", IIC_VecGeneral, - [(set v2f64:$XT, - (PPCxxspltidp i32:$IMM32))]>; - def XXSPLTI32DX : - 8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT), - (ins vsrc:$XTi, u1imm:$IX, i32imm:$IMM32), - "xxsplti32dx $XT, $IX, $IMM32", IIC_VecGeneral, - [(set v2i64:$XT, - (PPCxxsplti32dx v2i64:$XTi, i32:$IX, - i32:$IMM32))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; -} - -let Predicates = [IsISA3_1] in { - def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RT), (ins crbitrc:$BI), - "setbc $RT, $BI", IIC_IntCompare, []>; - def SETBCR : XForm_XT5_BI5<31, 416, (outs gprc:$RT), (ins crbitrc:$BI), - "setbcr $RT, $BI", IIC_IntCompare, []>; - def SETNBC : XForm_XT5_BI5<31, 448, (outs gprc:$RT), (ins crbitrc:$BI), - "setnbc $RT, $BI", IIC_IntCompare, []>; - def SETNBCR : XForm_XT5_BI5<31, 480, (outs gprc:$RT), (ins crbitrc:$BI), - "setnbcr $RT, $BI", IIC_IntCompare, []>; - - let Interpretation64Bit = 1, isCodeGenOnly = 1 in { - def SETBC8 : XForm_XT5_BI5<31, 384, (outs g8rc:$RT), (ins crbitrc:$BI), - "setbc $RT, $BI", IIC_IntCompare, []>; - def SETBCR8 : XForm_XT5_BI5<31, 416, (outs g8rc:$RT), (ins crbitrc:$BI), - "setbcr $RT, $BI", IIC_IntCompare, []>; - def SETNBC8 : XForm_XT5_BI5<31, 448, (outs g8rc:$RT), (ins crbitrc:$BI), - "setnbc $RT, $BI", IIC_IntCompare, []>; - def SETNBCR8 : XForm_XT5_BI5<31, 480, (outs g8rc:$RT), (ins crbitrc:$BI), - "setnbcr $RT, $BI", IIC_IntCompare, []>; - } - - def VSLDBI : VNForm_VTAB5_SD3<22, 0, (outs vrrc:$VRT), - (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH), - "vsldbi $VRT, $VRA, $VRB, $SH", - IIC_VecGeneral, - [(set v16i8:$VRT, - (int_ppc_altivec_vsldbi v16i8:$VRA, - v16i8:$VRB, - timm:$SH))]>; - def VSRDBI : VNForm_VTAB5_SD3<22, 1, (outs vrrc:$VRT), - (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH), - "vsrdbi $VRT, $VRA, $VRB, $SH", - IIC_VecGeneral, - [(set v16i8:$VRT, - (int_ppc_altivec_vsrdbi v16i8:$VRA, - v16i8:$VRB, - timm:$SH))]>; - defm VSTRIBR : VXForm_VTB5_RCr<13, 1, (outs vrrc:$vT), (ins vrrc:$vB), - "vstribr", "$vT, $vB", IIC_VecGeneral, - [(set v16i8:$vT, - (int_ppc_altivec_vstribr v16i8:$vB))]>; - defm VSTRIBL : VXForm_VTB5_RCr<13, 0, (outs vrrc:$vT), (ins vrrc:$vB), - "vstribl", "$vT, $vB", IIC_VecGeneral, - [(set v16i8:$vT, - (int_ppc_altivec_vstribl v16i8:$vB))]>; - defm VSTRIHR : VXForm_VTB5_RCr<13, 3, (outs vrrc:$vT), (ins vrrc:$vB), - "vstrihr", "$vT, $vB", IIC_VecGeneral, - [(set v8i16:$vT, - (int_ppc_altivec_vstrihr v8i16:$vB))]>; - defm VSTRIHL : VXForm_VTB5_RCr<13, 2, (outs vrrc:$vT), (ins vrrc:$vB), - "vstrihl", "$vT, $vB", IIC_VecGeneral, - [(set v8i16:$vT, - (int_ppc_altivec_vstrihl v8i16:$vB))]>; - def VINSW : - VXForm_1<207, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, gprc:$rB), - "vinsw $vD, $rB, $UIM", IIC_VecGeneral, - [(set v4i32:$vD, - (int_ppc_altivec_vinsw v4i32:$vDi, i32:$rB, timm:$UIM))]>, - RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; - def VINSD : - VXForm_1<463, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, g8rc:$rB), - "vinsd $vD, $rB, $UIM", IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB, timm:$UIM))]>, - RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; - def VINSBVLX : - VXForm_VTB5_RA5_ins<15, "vinsbvlx", - [(set v16i8:$vD, - (int_ppc_altivec_vinsbvlx v16i8:$vDi, i32:$rA, - v16i8:$vB))]>; - def VINSBVRX : - VXForm_VTB5_RA5_ins<271, "vinsbvrx", - [(set v16i8:$vD, - (int_ppc_altivec_vinsbvrx v16i8:$vDi, i32:$rA, - v16i8:$vB))]>; - def VINSHVLX : - VXForm_VTB5_RA5_ins<79, "vinshvlx", - [(set v8i16:$vD, - (int_ppc_altivec_vinshvlx v8i16:$vDi, i32:$rA, - v8i16:$vB))]>; - def VINSHVRX : - VXForm_VTB5_RA5_ins<335, "vinshvrx", - [(set v8i16:$vD, - (int_ppc_altivec_vinshvrx v8i16:$vDi, i32:$rA, - v8i16:$vB))]>; - def VINSWVLX : - VXForm_VTB5_RA5_ins<143, "vinswvlx", - [(set v4i32:$vD, - (int_ppc_altivec_vinswvlx v4i32:$vDi, i32:$rA, - v4i32:$vB))]>; - def VINSWVRX : - VXForm_VTB5_RA5_ins<399, "vinswvrx", - [(set v4i32:$vD, - (int_ppc_altivec_vinswvrx v4i32:$vDi, i32:$rA, - v4i32:$vB))]>; - def VINSBLX : - VXForm_VRT5_RAB5_ins<527, "vinsblx", - [(set v16i8:$vD, - (int_ppc_altivec_vinsblx v16i8:$vDi, i32:$rA, - i32:$rB))]>; - def VINSBRX : - VXForm_VRT5_RAB5_ins<783, "vinsbrx", - [(set v16i8:$vD, - (int_ppc_altivec_vinsbrx v16i8:$vDi, i32:$rA, - i32:$rB))]>; - def VINSHLX : - VXForm_VRT5_RAB5_ins<591, "vinshlx", - [(set v8i16:$vD, - (int_ppc_altivec_vinshlx v8i16:$vDi, i32:$rA, - i32:$rB))]>; - def VINSHRX : - VXForm_VRT5_RAB5_ins<847, "vinshrx", - [(set v8i16:$vD, - (int_ppc_altivec_vinshrx v8i16:$vDi, i32:$rA, - i32:$rB))]>; - def VINSWLX : - VXForm_VRT5_RAB5_ins<655, "vinswlx", - [(set v4i32:$vD, - (int_ppc_altivec_vinswlx v4i32:$vDi, i32:$rA, - i32:$rB))]>; - def VINSWRX : - VXForm_VRT5_RAB5_ins<911, "vinswrx", - [(set v4i32:$vD, - (int_ppc_altivec_vinswrx v4i32:$vDi, i32:$rA, - i32:$rB))]>; - def VINSDLX : - VXForm_1<719, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB), - "vinsdlx $vD, $rA, $rB", IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA, i64:$rB))]>, - RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; - def VINSDRX : - VXForm_1<975, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB), - "vinsdrx $vD, $rA, $rB", IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA, i64:$rB))]>, - RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; - def VEXTRACTBM : VXForm_RD5_XO5_RS5<1602, 8, (outs gprc:$rD), (ins vrrc:$vB), - "vextractbm $rD, $vB", IIC_VecGeneral, - [(set i32:$rD, - (int_ppc_altivec_vextractbm v16i8:$vB))]>; - def VEXTRACTHM : VXForm_RD5_XO5_RS5<1602, 9, (outs gprc:$rD), (ins vrrc:$vB), - "vextracthm $rD, $vB", IIC_VecGeneral, - [(set i32:$rD, - (int_ppc_altivec_vextracthm v8i16:$vB))]>; - def VEXTRACTWM : VXForm_RD5_XO5_RS5<1602, 10, (outs gprc:$rD), (ins vrrc:$vB), - "vextractwm $rD, $vB", IIC_VecGeneral, - [(set i32:$rD, - (int_ppc_altivec_vextractwm v4i32:$vB))]>; - def VEXTRACTDM : VXForm_RD5_XO5_RS5<1602, 11, (outs gprc:$rD), (ins vrrc:$vB), - "vextractdm $rD, $vB", IIC_VecGeneral, - [(set i32:$rD, - (int_ppc_altivec_vextractdm v2i64:$vB))]>; - def VEXTRACTQM : VXForm_RD5_XO5_RS5<1602, 12, (outs gprc:$rD), (ins vrrc:$vB), - "vextractqm $rD, $vB", IIC_VecGeneral, - [(set i32:$rD, - (int_ppc_altivec_vextractqm v1i128:$vB))]>; - def VEXPANDBM : VXForm_RD5_XO5_RS5<1602, 0, (outs vrrc:$vD), (ins vrrc:$vB), - "vexpandbm $vD, $vB", IIC_VecGeneral, - [(set v16i8:$vD, (int_ppc_altivec_vexpandbm - v16i8:$vB))]>; - def VEXPANDHM : VXForm_RD5_XO5_RS5<1602, 1, (outs vrrc:$vD), (ins vrrc:$vB), - "vexpandhm $vD, $vB", IIC_VecGeneral, - [(set v8i16:$vD, (int_ppc_altivec_vexpandhm - v8i16:$vB))]>; - def VEXPANDWM : VXForm_RD5_XO5_RS5<1602, 2, (outs vrrc:$vD), (ins vrrc:$vB), - "vexpandwm $vD, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (int_ppc_altivec_vexpandwm - v4i32:$vB))]>; - def VEXPANDDM : VXForm_RD5_XO5_RS5<1602, 3, (outs vrrc:$vD), (ins vrrc:$vB), - "vexpanddm $vD, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (int_ppc_altivec_vexpanddm - v2i64:$vB))]>; - def VEXPANDQM : VXForm_RD5_XO5_RS5<1602, 4, (outs vrrc:$vD), (ins vrrc:$vB), - "vexpandqm $vD, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vexpandqm - v1i128:$vB))]>; - def MTVSRBM : VXForm_RD5_XO5_RS5<1602, 16, (outs vrrc:$vD), (ins g8rc:$rB), - "mtvsrbm $vD, $rB", IIC_VecGeneral, - [(set v16i8:$vD, - (int_ppc_altivec_mtvsrbm i64:$rB))]>; - def MTVSRHM : VXForm_RD5_XO5_RS5<1602, 17, (outs vrrc:$vD), (ins g8rc:$rB), - "mtvsrhm $vD, $rB", IIC_VecGeneral, - [(set v8i16:$vD, - (int_ppc_altivec_mtvsrhm i64:$rB))]>; - def MTVSRWM : VXForm_RD5_XO5_RS5<1602, 18, (outs vrrc:$vD), (ins g8rc:$rB), - "mtvsrwm $vD, $rB", IIC_VecGeneral, - [(set v4i32:$vD, - (int_ppc_altivec_mtvsrwm i64:$rB))]>; - def MTVSRDM : VXForm_RD5_XO5_RS5<1602, 19, (outs vrrc:$vD), (ins g8rc:$rB), - "mtvsrdm $vD, $rB", IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_mtvsrdm i64:$rB))]>; - def MTVSRQM : VXForm_RD5_XO5_RS5<1602, 20, (outs vrrc:$vD), (ins g8rc:$rB), - "mtvsrqm $vD, $rB", IIC_VecGeneral, - [(set v1i128:$vD, - (int_ppc_altivec_mtvsrqm i64:$rB))]>; - def MTVSRBMI : DXForm<4, 10, (outs vrrc:$vD), (ins u16imm64:$D), - "mtvsrbmi $vD, $D", IIC_VecGeneral, - [(set v16i8:$vD, - (int_ppc_altivec_mtvsrbm imm:$D))]>; - def VCNTMBB : VXForm_RD5_MP_VB5<1602, 12, (outs g8rc:$rD), - (ins vrrc:$vB, u1imm:$MP), - "vcntmbb $rD, $vB, $MP", IIC_VecGeneral, - [(set i64:$rD, (int_ppc_altivec_vcntmbb - v16i8:$vB, timm:$MP))]>; - def VCNTMBH : VXForm_RD5_MP_VB5<1602, 13, (outs g8rc:$rD), - (ins vrrc:$vB, u1imm:$MP), - "vcntmbh $rD, $vB, $MP", IIC_VecGeneral, - [(set i64:$rD, (int_ppc_altivec_vcntmbh - v8i16:$vB, timm:$MP))]>; - def VCNTMBW : VXForm_RD5_MP_VB5<1602, 14, (outs g8rc:$rD), - (ins vrrc:$vB, u1imm:$MP), - "vcntmbw $rD, $vB, $MP", IIC_VecGeneral, - [(set i64:$rD, (int_ppc_altivec_vcntmbw - v4i32:$vB, timm:$MP))]>; - def VCNTMBD : VXForm_RD5_MP_VB5<1602, 15, (outs g8rc:$rD), - (ins vrrc:$vB, u1imm:$MP), - "vcntmbd $rD, $vB, $MP", IIC_VecGeneral, - [(set i64:$rD, (int_ppc_altivec_vcntmbd - v2i64:$vB, timm:$MP))]>; - def VEXTDUBVLX : VAForm_1a<24, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextdubvlx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextdubvlx v16i8:$vA, - v16i8:$vB, - i32:$rC))]>; - def VEXTDUBVRX : VAForm_1a<25, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextdubvrx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextdubvrx v16i8:$vA, - v16i8:$vB, - i32:$rC))]>; - def VEXTDUHVLX : VAForm_1a<26, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextduhvlx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextduhvlx v8i16:$vA, - v8i16:$vB, - i32:$rC))]>; - def VEXTDUHVRX : VAForm_1a<27, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextduhvrx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextduhvrx v8i16:$vA, - v8i16:$vB, - i32:$rC))]>; - def VEXTDUWVLX : VAForm_1a<28, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextduwvlx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextduwvlx v4i32:$vA, - v4i32:$vB, - i32:$rC))]>; - def VEXTDUWVRX : VAForm_1a<29, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextduwvrx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextduwvrx v4i32:$vA, - v4i32:$vB, - i32:$rC))]>; - def VEXTDDVLX : VAForm_1a<30, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextddvlx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextddvlx v2i64:$vA, - v2i64:$vB, - i32:$rC))]>; - def VEXTDDVRX : VAForm_1a<31, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextddvrx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextddvrx v2i64:$vA, - v2i64:$vB, - i32:$rC))]>; - def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vpdepd $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vpdepd v2i64:$vA, v2i64:$vB))]>; - def VPEXTD : VXForm_1<1421, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vpextd $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vpextd v2i64:$vA, v2i64:$vB))]>; - def PDEPD : XForm_6<31, 156, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "pdepd $rA, $rS, $rB", IIC_IntGeneral, - [(set i64:$rA, (int_ppc_pdepd i64:$rS, i64:$rB))]>; - def PEXTD : XForm_6<31, 188, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "pextd $rA, $rS, $rB", IIC_IntGeneral, - [(set i64:$rA, (int_ppc_pextd i64:$rS, i64:$rB))]>; - def VCFUGED : VXForm_1<1357, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vcfuged $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vcfuged v2i64:$vA, v2i64:$vB))]>; - def VGNB : VXForm_RD5_N3_VB5<1228, (outs g8rc:$rD), (ins vrrc:$vB, u3imm:$N), - "vgnb $rD, $vB, $N", IIC_VecGeneral, - [(set i64:$rD, - (int_ppc_altivec_vgnb v1i128:$vB, timm:$N))]>; - def CFUGED : XForm_6<31, 220, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "cfuged $rA, $rS, $rB", IIC_IntGeneral, - [(set i64:$rA, (int_ppc_cfuged i64:$rS, i64:$rB))]>; - def XXEVAL : - 8RR_XX4Form_IMM8_XTAB6<34, 1, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, - vsrc:$XC, u8imm:$IMM), - "xxeval $XT, $XA, $XB, $XC, $IMM", IIC_VecGeneral, - [(set v2i64:$XT, (int_ppc_vsx_xxeval v2i64:$XA, - v2i64:$XB, v2i64:$XC, timm:$IMM))]>; - def VCLZDM : VXForm_1<1924, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vclzdm $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vclzdm v2i64:$vA, v2i64:$vB))]>; - def VCTZDM : VXForm_1<1988, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vctzdm $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vctzdm v2i64:$vA, v2i64:$vB))]>; - def CNTLZDM : XForm_6<31, 59, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "cntlzdm $rA, $rS, $rB", IIC_IntGeneral, - [(set i64:$rA, - (int_ppc_cntlzdm i64:$rS, i64:$rB))]>; - def CNTTZDM : XForm_6<31, 571, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "cnttzdm $rA, $rS, $rB", IIC_IntGeneral, - [(set i64:$rA, - (int_ppc_cnttzdm i64:$rS, i64:$rB))]>; - def XXGENPCVBM : - XForm_XT6_IMM5_VB5<60, 916, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM), - "xxgenpcvbm $XT, $VRB, $IMM", IIC_VecGeneral, []>; - def XXGENPCVHM : - XForm_XT6_IMM5_VB5<60, 917, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM), - "xxgenpcvhm $XT, $VRB, $IMM", IIC_VecGeneral, []>; - def XXGENPCVWM : - XForm_XT6_IMM5_VB5<60, 948, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM), - "xxgenpcvwm $XT, $VRB, $IMM", IIC_VecGeneral, []>; - def XXGENPCVDM : - XForm_XT6_IMM5_VB5<60, 949, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM), - "xxgenpcvdm $XT, $VRB, $IMM", IIC_VecGeneral, []>; - def VCLRLB : VXForm_1<397, (outs vrrc:$vD), (ins vrrc:$vA, gprc:$rB), - "vclrlb $vD, $vA, $rB", IIC_VecGeneral, - [(set v16i8:$vD, - (int_ppc_altivec_vclrlb v16i8:$vA, i32:$rB))]>; - def VCLRRB : VXForm_1<461, (outs vrrc:$vD), (ins vrrc:$vA, gprc:$rB), - "vclrrb $vD, $vA, $rB", IIC_VecGeneral, - [(set v16i8:$vD, - (int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>; - def VMULLD : VXForm_1<457, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulld $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>; - def VMULHSW : VXForm_1<905, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulhsw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (mulhs v4i32:$vA, v4i32:$vB))]>; - def VMULHUW : VXForm_1<649, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulhuw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (mulhu v4i32:$vA, v4i32:$vB))]>; - def VMULHSD : VXForm_1<969, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulhsd $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (mulhs v2i64:$vA, v2i64:$vB))]>; - def VMULHUD : VXForm_1<713, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulhud $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (mulhu v2i64:$vA, v2i64:$vB))]>; - def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmodsw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>; - def VMODUW : VXForm_1<1675, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmoduw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (urem v4i32:$vA, v4i32:$vB))]>; - def VMODSD : VXForm_1<1995, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmodsd $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (srem v2i64:$vA, v2i64:$vB))]>; - def VMODUD : VXForm_1<1739, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmodud $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (urem v2i64:$vA, v2i64:$vB))]>; - def VDIVSW : VXForm_1<395, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivsw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (sdiv v4i32:$vA, v4i32:$vB))]>; - def VDIVUW : VXForm_1<139, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivuw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (udiv v4i32:$vA, v4i32:$vB))]>; - def VDIVSD : VXForm_1<459, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivsd $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (sdiv v2i64:$vA, v2i64:$vB))]>; - def VDIVUD : VXForm_1<203, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivud $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (udiv v2i64:$vA, v2i64:$vB))]>; - def VDIVESW : VXForm_1<907, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivesw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (int_ppc_altivec_vdivesw v4i32:$vA, - v4i32:$vB))]>; - def VDIVEUW : VXForm_1<651, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdiveuw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (int_ppc_altivec_vdiveuw v4i32:$vA, - v4i32:$vB))]>; - def VDIVESD : VXForm_1<971, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivesd $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (int_ppc_altivec_vdivesd v2i64:$vA, - v2i64:$vB))]>; - def VDIVEUD : VXForm_1<715, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdiveud $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (int_ppc_altivec_vdiveud v2i64:$vA, - v2i64:$vB))]>; - def XVTLSBB : XX2_BF3_XO5_XB6_XO9<60, 2, 475, (outs crrc:$BF), (ins vsrc:$XB), - "xvtlsbb $BF, $XB", IIC_VecGeneral, []>; - - // The XFormMemOp flag for the following 8 instructions is set on - // the instruction format. - let mayLoad = 1, mayStore = 0 in { - def LXVRBX : X_XT6_RA5_RB5<31, 13, "lxvrbx", vsrc, []>; - def LXVRHX : X_XT6_RA5_RB5<31, 45, "lxvrhx", vsrc, []>; - def LXVRWX : X_XT6_RA5_RB5<31, 77, "lxvrwx", vsrc, []>; - def LXVRDX : X_XT6_RA5_RB5<31, 109, "lxvrdx", vsrc, []>; - } - - let mayLoad = 0, mayStore = 1 in { - def STXVRBX : X_XS6_RA5_RB5<31, 141, "stxvrbx", vsrc, []>; - def STXVRHX : X_XS6_RA5_RB5<31, 173, "stxvrhx", vsrc, []>; - def STXVRWX : X_XS6_RA5_RB5<31, 205, "stxvrwx", vsrc, []>; - def STXVRDX : X_XS6_RA5_RB5<31, 237, "stxvrdx", vsrc, []>; - } - - def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulesd $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA, - v2i64:$vB))]>; - def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmuleud $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA, - v2i64:$vB))]>; - def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulosd $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA, - v2i64:$vB))]>; - def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmuloud $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vmuloud v2i64:$vA, - v2i64:$vB))]>; - def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), - "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vmsumcud - v2i64:$vA, v2i64:$vB, v1i128:$vC))]>; - def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivsq $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (sdiv v1i128:$vA, v1i128:$vB))]>; - def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivuq $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (udiv v1i128:$vA, v1i128:$vB))]>; - def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivesq $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vdivesq v1i128:$vA, - v1i128:$vB))]>; - def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdiveuq $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vdiveuq v1i128:$vA, - v1i128:$vB))]>; - def VCMPEQUQ : VCMP <455, "vcmpequq $vD, $vA, $vB" , v1i128>; - def VCMPGTSQ : VCMP <903, "vcmpgtsq $vD, $vA, $vB" , v1i128>; - def VCMPGTUQ : VCMP <647, "vcmpgtuq $vD, $vA, $vB" , v1i128>; - def VCMPEQUQ_rec : VCMP_rec <455, "vcmpequq. $vD, $vA, $vB" , v1i128>; - def VCMPGTSQ_rec : VCMP_rec <903, "vcmpgtsq. $vD, $vA, $vB" , v1i128>; - def VCMPGTUQ_rec : VCMP_rec <647, "vcmpgtuq. $vD, $vA, $vB" , v1i128>; - def VMODSQ : VXForm_1<1803, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmodsq $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (srem v1i128:$vA, v1i128:$vB))]>; - def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmoduq $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (urem v1i128:$vA, v1i128:$vB))]>; - def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB), - "vextsd2q $vD, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vextsd2q v2i64:$vB))]>; - def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), - "vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>; - def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), - "vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>; - def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", - [(set v1i128:$vD, - (int_ppc_altivec_vrlqnm v1i128:$vA, - v1i128:$vB))]>; - def VRLQMI : VXForm_1<69, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi), - "vrlqmi $vD, $vA, $vB", IIC_VecFP, - [(set v1i128:$vD, - (int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB, - v1i128:$vDi))]>, - RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; - def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>; - def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>; - def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>; - def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>; - def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>; - def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>; - def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>; - def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>; -} - -let Predicates = [IsISA3_1, HasVSX] in { - def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>; - def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>; - def XSMAXCQP : X_VT5_VA5_VB5<63, 676, "xsmaxcqp", []>; - def XSMINCQP : X_VT5_VA5_VB5<63, 740, "xsmincqp", []>; -} - -// Multiclass defining patterns for Set Boolean Extension Reverse Instructions. -// This is analogous to the CRNotPat multiclass but specifically for Power10 -// and newer subtargets since the extended forms use Set Boolean instructions. -// The first two anonymous patterns defined are actually a duplicate of those -// in CRNotPat, but it is preferable to define both multiclasses as complete -// ones rather than pulling that small common section out. -multiclass P10ReverseSetBool { - def : Pat; - def : Pat<(not pattern), result>; - - def : Pat<(i32 (zext pattern)), - (SETBCR result)>; - def : Pat<(i64 (zext pattern)), - (SETBCR8 result)>; - - def : Pat<(i32 (sext pattern)), - (SETNBCR result)>; - def : Pat<(i64 (sext pattern)), - (SETNBCR8 result)>; - - def : Pat<(i32 (anyext pattern)), - (SETBCR result)>; - def : Pat<(i64 (anyext pattern)), - (SETBCR8 result)>; -} - -multiclass IntSetP10RevSetBool { - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)), - (EXTRACT_SUBREG (Cmpl $s1, $s2), sub_lt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)), - (EXTRACT_SUBREG (Cmp $s1, $s2), sub_lt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)), - (EXTRACT_SUBREG (Cmpl $s1, $s2), sub_gt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)), - (EXTRACT_SUBREG (Cmp $s1, $s2), sub_gt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)), - (EXTRACT_SUBREG (Cmp $s1, $s2), sub_eq)>; - - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETUGE)), - (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_lt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETGE)), - (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_lt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETULE)), - (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_gt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETLE)), - (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_gt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETNE)), - (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_eq)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETNE)), - (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_eq)>; -} - -multiclass FSetP10RevSetBool { - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUNE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETO)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>; -} - -let Predicates = [IsISA3_1] in { - def : Pat<(i32 (zext i1:$in)), - (SETBC $in)>; - def : Pat<(i64 (zext i1:$in)), - (SETBC8 $in)>; - def : Pat<(i32 (sext i1:$in)), - (SETNBC $in)>; - def : Pat<(i64 (sext i1:$in)), - (SETNBC8 $in)>; - def : Pat<(i32 (anyext i1:$in)), - (SETBC $in)>; - def : Pat<(i64 (anyext i1:$in)), - (SETBC8 $in)>; - - // Instantiation of the set boolean reverse patterns for 32-bit integers. - defm : IntSetP10RevSetBool; - defm : P10ReverseSetBool<(i1 (setcc i32:$s1, imm:$imm, SETNE)), - (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), - (LO16 imm:$imm)), sub_eq)>; - - // Instantiation of the set boolean reverse patterns for 64-bit integers. - defm : IntSetP10RevSetBool; - defm : P10ReverseSetBool<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)), - (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), - (LO16 imm:$imm)), sub_eq)>; -} - -// Instantiation of the set boolean reverse patterns for f32, f64, f128. -let Predicates = [IsISA3_1, HasFPU] in { - defm : FSetP10RevSetBool; - defm : FSetP10RevSetBool; - defm : FSetP10RevSetBool; -} - -//---------------------------- Anonymous Patterns ----------------------------// -let Predicates = [IsISA3_1] in { - // Exploit the vector multiply high instructions using intrinsics. - def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$vA, v4i32:$vB)), - (v4i32 (VMULHSW $vA, $vB))>; - def : Pat<(v4i32 (int_ppc_altivec_vmulhuw v4i32:$vA, v4i32:$vB)), - (v4i32 (VMULHUW $vA, $vB))>; - def : Pat<(v2i64 (int_ppc_altivec_vmulhsd v2i64:$vA, v2i64:$vB)), - (v2i64 (VMULHSD $vA, $vB))>; - def : Pat<(v2i64 (int_ppc_altivec_vmulhud v2i64:$vA, v2i64:$vB)), - (v2i64 (VMULHUD $vA, $vB))>; - def : Pat<(v16i8 (int_ppc_vsx_xxgenpcvbm v16i8:$VRB, imm:$IMM)), - (v16i8 (COPY_TO_REGCLASS (XXGENPCVBM $VRB, imm:$IMM), VRRC))>; - def : Pat<(v8i16 (int_ppc_vsx_xxgenpcvhm v8i16:$VRB, imm:$IMM)), - (v8i16 (COPY_TO_REGCLASS (XXGENPCVHM $VRB, imm:$IMM), VRRC))>; - def : Pat<(v4i32 (int_ppc_vsx_xxgenpcvwm v4i32:$VRB, imm:$IMM)), - (v4i32 (COPY_TO_REGCLASS (XXGENPCVWM $VRB, imm:$IMM), VRRC))>; - def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)), - (v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>; - def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 1)), - (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>; - def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)), - (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>; - - def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 8)), - (v1i128 (COPY_TO_REGCLASS (LXVRBX ForceXForm:$src), VRRC))>; - def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 16)), - (v1i128 (COPY_TO_REGCLASS (LXVRHX ForceXForm:$src), VRRC))>; - def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 32)), - (v1i128 (COPY_TO_REGCLASS (LXVRWX ForceXForm:$src), VRRC))>; - def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 64)), - (v1i128 (COPY_TO_REGCLASS (LXVRDX ForceXForm:$src), VRRC))>; - - def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)), - (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>; - - def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)), - (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>; -} - -let Predicates = [IsISA3_1, HasVSX] in { - def : Pat<(v16i8 (int_ppc_vsx_xvcvspbf16 v16i8:$XA)), - (COPY_TO_REGCLASS (XVCVSPBF16 RCCp.AToVSRC), VRRC)>; - def : Pat<(v16i8 (int_ppc_vsx_xvcvbf16spn v16i8:$XA)), - (COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>; -} - -let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in { - // Store element 0 of a VSX register to memory - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), ForceXForm:$dst), - (STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), ForceXForm:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), ForceXForm:$dst), - (STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), ForceXForm:$dst)>; - def : Pat<(store (i32 (extractelt v4i32:$src, 0)), ForceXForm:$dst), - (STXVRWX $src, ForceXForm:$dst)>; - def : Pat<(store (f32 (extractelt v4f32:$src, 0)), ForceXForm:$dst), - (STXVRWX $src, ForceXForm:$dst)>; - def : Pat<(store (i64 (extractelt v2i64:$src, 0)), ForceXForm:$dst), - (STXVRDX $src, ForceXForm:$dst)>; - def : Pat<(store (f64 (extractelt v2f64:$src, 0)), ForceXForm:$dst), - (STXVRDX $src, ForceXForm:$dst)>; - // Load element 0 of a VSX register to memory - def : Pat<(v8i16 (scalar_to_vector (i32 (extloadi16 ForceXForm:$src)))), - (v8i16 (COPY_TO_REGCLASS (LXVRHX ForceXForm:$src), VSRC))>; - def : Pat<(v16i8 (scalar_to_vector (i32 (extloadi8 ForceXForm:$src)))), - (v16i8 (COPY_TO_REGCLASS (LXVRBX ForceXForm:$src), VSRC))>; - } - -// FIXME: The swap is overkill when the shift amount is a constant. -// We should just fix the constant in the DAG. -let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in { - def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSLQ v1i128:$VRA, - (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), - (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; - def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSLQ v1i128:$VRA, - (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), - (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; - def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSRQ v1i128:$VRA, - (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), - (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; - def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSRQ v1i128:$VRA, - (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), - (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; - def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSRAQ v1i128:$VRA, - (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), - (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; - def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSRAQ v1i128:$VRA, - (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), - (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; -} - -class xxevalPattern imm> : - Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {} - -let AddedComplexity = 400, Predicates = [PrefixInstrs] in { - def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A, - i32immNonAllOneNonZero:$A, - i32immNonAllOneNonZero:$A, - i32immNonAllOneNonZero:$A)), - (v4i32 (XXSPLTIW imm:$A))>; - def : Pat<(f32 nzFPImmAsi32:$A), - (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)), - VSFRC)>; - def : Pat<(f64 nzFPImmAsi32:$A), - (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)), - VSFRC)>; - -// To replace constant pool with XXSPLTI32DX for scalars. -def : Pat<(f32 nzFPImmAsi64:$A), - (COPY_TO_REGCLASS (XXSPLTI32DX (XXSPLTI32DX(IMPLICIT_DEF), 0, - (getFPAs64BitIntHi $A)), - 1, (getFPAs64BitIntLo $A)), - VSSRC)>; - -def : Pat<(f64 nzFPImmAsi64:$A), - (COPY_TO_REGCLASS (XXSPLTI32DX (XXSPLTI32DX (IMPLICIT_DEF), 0, - (getFPAs64BitIntHi $A)), - 1, (getFPAs64BitIntLo $A)), - VSFRC)>; - - // Anonymous patterns for XXEVAL - // AND - // and(A, B, C) - def : xxevalPattern<(and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>; - // and(A, xor(B, C)) - def : xxevalPattern<(and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>; - // and(A, or(B, C)) - def : xxevalPattern<(and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>; - // and(A, nor(B, C)) - def : xxevalPattern<(and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>; - // and(A, eqv(B, C)) - def : xxevalPattern<(and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>; - // and(A, nand(B, C)) - def : xxevalPattern<(and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>; - - // NAND - // nand(A, B, C) - def : xxevalPattern<(vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), - !sub(255, 1)>; - // nand(A, xor(B, C)) - def : xxevalPattern<(vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), - !sub(255, 6)>; - // nand(A, or(B, C)) - def : xxevalPattern<(vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), - !sub(255, 7)>; - // nand(A, nor(B, C)) - def : xxevalPattern<(or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), - !sub(255, 8)>; - // nand(A, eqv(B, C)) - def : xxevalPattern<(or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), - !sub(255, 9)>; - // nand(A, nand(B, C)) - def : xxevalPattern<(or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), - !sub(255, 14)>; - - // Anonymous patterns to select prefixed VSX loads and stores. - // Load / Store f128 - def : Pat<(f128 (load PDForm:$src)), - (COPY_TO_REGCLASS (PLXV memri34:$src), VRRC)>; - def : Pat<(store f128:$XS, PDForm:$dst), - (PSTXV (COPY_TO_REGCLASS $XS, VSRC), memri34:$dst)>; - - // Load / Store v4i32 - def : Pat<(v4i32 (load PDForm:$src)), (PLXV memri34:$src)>; - def : Pat<(store v4i32:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>; - - // Load / Store v2i64 - def : Pat<(v2i64 (load PDForm:$src)), (PLXV memri34:$src)>; - def : Pat<(store v2i64:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>; - - // Load / Store v4f32 - def : Pat<(v4f32 (load PDForm:$src)), (PLXV memri34:$src)>; - def : Pat<(store v4f32:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>; - - // Load / Store v2f64 - def : Pat<(v2f64 (load PDForm:$src)), (PLXV memri34:$src)>; - def : Pat<(store v2f64:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>; - - // Cases For PPCstore_scal_int_from_vsr - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), PDForm:$dst, 8), - (PSTXSD (XSCVDPUXDS f64:$src), PDForm:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), PDForm:$dst, 8), - (PSTXSD (XSCVDPSXDS f64:$src), PDForm:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), PDForm:$dst, 8), - (PSTXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), - PDForm:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), PDForm:$dst, 8), - (PSTXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), - PDForm:$dst)>; -} - -let Predicates = [PrefixInstrs] in { - def : Pat<(i32 imm34:$imm), (PLI (getImmAs64BitInt imm:$imm))>; - def : Pat<(i64 imm34:$imm), (PLI8 (getImmAs64BitInt imm:$imm))>; - def : Pat<(v16i8 (int_ppc_vsx_xxpermx v16i8:$A, v16i8:$B, v16i8:$C, timm:$D)), - (COPY_TO_REGCLASS (XXPERMX (COPY_TO_REGCLASS $A, VSRC), - (COPY_TO_REGCLASS $B, VSRC), - (COPY_TO_REGCLASS $C, VSRC), $D), VSRC)>; - def : Pat<(v16i8 (int_ppc_vsx_xxblendvb v16i8:$A, v16i8:$B, v16i8:$C)), - (COPY_TO_REGCLASS - (XXBLENDVB (COPY_TO_REGCLASS $A, VSRC), - (COPY_TO_REGCLASS $B, VSRC), - (COPY_TO_REGCLASS $C, VSRC)), VSRC)>; - def : Pat<(v8i16 (int_ppc_vsx_xxblendvh v8i16:$A, v8i16:$B, v8i16:$C)), - (COPY_TO_REGCLASS - (XXBLENDVH (COPY_TO_REGCLASS $A, VSRC), - (COPY_TO_REGCLASS $B, VSRC), - (COPY_TO_REGCLASS $C, VSRC)), VSRC)>; - def : Pat<(int_ppc_vsx_xxblendvw v4i32:$A, v4i32:$B, v4i32:$C), - (XXBLENDVW $A, $B, $C)>; - def : Pat<(int_ppc_vsx_xxblendvd v2i64:$A, v2i64:$B, v2i64:$C), - (XXBLENDVD $A, $B, $C)>; - - // Anonymous patterns to select prefixed loads and stores. - // Load i32 - def : Pat<(i32 (extloadi1 PDForm:$src)), (PLBZ memri34:$src)>; - def : Pat<(i32 (zextloadi1 PDForm:$src)), (PLBZ memri34:$src)>; - def : Pat<(i32 (extloadi8 PDForm:$src)), (PLBZ memri34:$src)>; - def : Pat<(i32 (zextloadi8 PDForm:$src)), (PLBZ memri34:$src)>; - def : Pat<(i32 (extloadi16 PDForm:$src)), (PLHZ memri34:$src)>; - def : Pat<(i32 (zextloadi16 PDForm:$src)), (PLHZ memri34:$src)>; - def : Pat<(i32 (sextloadi16 PDForm:$src)), (PLHA memri34:$src)>; - def : Pat<(i32 (load PDForm:$src)), (PLWZ memri34:$src)>; - - // Store i32 - def : Pat<(truncstorei8 i32:$rS, PDForm:$dst), (PSTB gprc:$rS, memri34:$dst)>; - def : Pat<(truncstorei16 i32:$rS, PDForm:$dst), (PSTH gprc:$rS, memri34:$dst)>; - def : Pat<(store i32:$rS, PDForm:$dst), (PSTW gprc:$rS, memri34:$dst)>; - - // Load i64 - def : Pat<(i64 (extloadi1 PDForm:$src)), (PLBZ8 memri34:$src)>; - def : Pat<(i64 (zextloadi1 PDForm:$src)), (PLBZ8 memri34:$src)>; - def : Pat<(i64 (extloadi8 PDForm:$src)), (PLBZ8 memri34:$src)>; - def : Pat<(i64 (zextloadi8 PDForm:$src)), (PLBZ8 memri34:$src)>; - def : Pat<(i64 (extloadi16 PDForm:$src)), (PLHZ8 memri34:$src)>; - def : Pat<(i64 (zextloadi16 PDForm:$src)), (PLHZ8 memri34:$src)>; - def : Pat<(i64 (sextloadi16 PDForm:$src)), (PLHA8 memri34:$src)>; - def : Pat<(i64 (extloadi32 PDForm:$src)), (PLWZ8 memri34:$src)>; - def : Pat<(i64 (zextloadi32 PDForm:$src)), (PLWZ8 memri34:$src)>; - def : Pat<(i64 (sextloadi32 PDForm:$src)), (PLWA8 memri34:$src)>; - def : Pat<(i64 (load PDForm:$src)), (PLD memri34:$src)>; - - // Store i64 - def : Pat<(truncstorei8 i64:$rS, PDForm:$dst), (PSTB8 g8rc:$rS, memri34:$dst)>; - def : Pat<(truncstorei16 i64:$rS, PDForm:$dst), (PSTH8 g8rc:$rS, memri34:$dst)>; - def : Pat<(truncstorei32 i64:$rS, PDForm:$dst), (PSTW8 g8rc:$rS, memri34:$dst)>; - def : Pat<(store i64:$rS, PDForm:$dst), (PSTD g8rc:$rS, memri34:$dst)>; - - // Load / Store f32 - def : Pat<(f32 (load PDForm:$src)), (PLFS memri34:$src)>; - def : Pat<(store f32:$FRS, PDForm:$dst), (PSTFS $FRS, memri34:$dst)>; - - // Load / Store f64 - def : Pat<(f64 (extloadf32 PDForm:$src)), - (COPY_TO_REGCLASS (PLFS memri34:$src), VSFRC)>; - def : Pat<(f64 (load PDForm:$src)), (PLFD memri34:$src)>; - def : Pat<(store f64:$FRS, PDForm:$dst), (PSTFD $FRS, memri34:$dst)>; - - // Atomic Load - def : Pat<(atomic_load_8 PDForm:$src), (PLBZ memri34:$src)>; - def : Pat<(atomic_load_16 PDForm:$src), (PLHZ memri34:$src)>; - def : Pat<(atomic_load_32 PDForm:$src), (PLWZ memri34:$src)>; - def : Pat<(atomic_load_64 PDForm:$src), (PLD memri34:$src)>; - - // Atomic Store - def : Pat<(atomic_store_8 PDForm:$dst, i32:$RS), (PSTB $RS, memri34:$dst)>; - def : Pat<(atomic_store_16 PDForm:$dst, i32:$RS), (PSTH $RS, memri34:$dst)>; - def : Pat<(atomic_store_32 PDForm:$dst, i32:$RS), (PSTW $RS, memri34:$dst)>; - def : Pat<(atomic_store_64 PDForm:$dst, i64:$RS), (PSTD $RS, memri34:$dst)>; - - // Prefixed fpext to v2f64 - def : Pat<(v4f32 (PPCldvsxlh PDForm:$src)), - (SUBREG_TO_REG (i64 1), (PLFD PDForm:$src), sub_64)>; -} - -def InsertEltShift { - dag Sub32 = (i32 (EXTRACT_SUBREG $rB, sub_32)); - dag Sub32Left1 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 1, 0, 30); - dag Sub32Left2 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 2, 0, 29); - dag Left1 = (RLWINM $rB, 1, 0, 30); - dag Left2 = (RLWINM $rB, 2, 0, 29); - dag Left3 = (RLWINM8 $rB, 3, 0, 28); -} - -let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in { - // Indexed vector insert element - def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)), - (VINSBRX $vDi, InsertEltShift.Sub32, $rA)>; - def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)), - (VINSHRX $vDi, InsertEltShift.Sub32Left1, $rA)>; - def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)), - (VINSWRX $vDi, InsertEltShift.Sub32Left2, $rA)>; - def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)), - (VINSDRX $vDi, InsertEltShift.Left3, $rA)>; - - def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)), - (VINSWVRX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>; - - def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)), - (VINSDRX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load DSForm:$rA)), i64:$rB)), - (VINSDRX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load PDForm:$rA)), i64:$rB)), - (VINSDRX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load XForm:$rA)), i64:$rB)), - (VINSDRX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>; - let AddedComplexity = 400 in { - // Immediate vector insert element - foreach Idx = [0, 1, 2, 3] in { - def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, Idx)), - (VINSW $vDi, !mul(!sub(3, Idx), 4), $rA)>; - } - foreach i = [0, 1] in - def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, (i64 i))), - (VINSD $vDi, !mul(!sub(1, i), 8), $rA)>; - } -} - -let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in { - // Indexed vector insert element - def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i32:$rB)), - (VINSBLX $vDi, $rB, $rA)>; - def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i32:$rB)), - (VINSHLX $vDi, InsertEltShift.Left1, $rA)>; - def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i32:$rB)), - (VINSWLX $vDi, InsertEltShift.Left2, $rA)>; - - def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i32:$rB)), - (VINSWVLX $vDi, InsertEltShift.Left2, (XSCVDPSPN $rA))>; -} - -let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in { - // Indexed vector insert element - def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)), - (VINSBLX $vDi, InsertEltShift.Sub32, $rA)>; - def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)), - (VINSHLX $vDi, InsertEltShift.Sub32Left1, $rA)>; - def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)), - (VINSWLX $vDi, InsertEltShift.Sub32Left2, $rA)>; - def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)), - (VINSDLX $vDi, InsertEltShift.Left3, $rA)>; - - def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)), - (VINSWVLX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>; - - def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)), - (VINSDLX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load DSForm:$rA)), i64:$rB)), - (VINSDLX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load PDForm:$rA)), i64:$rB)), - (VINSDLX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load XForm:$rA)), i64:$rB)), - (VINSDLX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>; -} - -let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX, IsBigEndian] in { - // Immediate vector insert element - foreach Ty = [i32, i64] in { - foreach Idx = [0, 1, 2, 3] in { - def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, (Ty Idx))), - (VINSW $vDi, !mul(Idx, 4), $rA)>; - } - } - - foreach Idx = [0, 1] in - def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, Idx)), - (VINSD $vDi, !mul(Idx, 8), $rA)>; -} diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 110f7d79fbc5..6e562498dcf9 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -51,35 +51,6 @@ // ** printing (for example: xxswapd for xxpermdi with 0x2 as the imm). ** // **************************************************************************** -def PPCRegVSRCAsmOperand : AsmOperandClass { - let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber"; -} -def vsrc : RegisterOperand { - let ParserMatchClass = PPCRegVSRCAsmOperand; -} - -def PPCRegVSFRCAsmOperand : AsmOperandClass { - let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber"; -} -def vsfrc : RegisterOperand { - let ParserMatchClass = PPCRegVSFRCAsmOperand; -} - -def PPCRegVSSRCAsmOperand : AsmOperandClass { - let Name = "RegVSSRC"; let PredicateMethod = "isVSRegNumber"; -} -def vssrc : RegisterOperand { - let ParserMatchClass = PPCRegVSSRCAsmOperand; -} - -def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass { - let Name = "RegSPILLTOVSRRC"; let PredicateMethod = "isVSRegNumber"; -} - -def spilltovsrrc : RegisterOperand { - let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand; -} - def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [ SDTCisVT<0, v4f32>, SDTCisPtrTy<1> ]>; @@ -732,6 +703,11 @@ let hasSideEffects = 0 in { (outs vsfrc:$XT), (ins vsfrc:$XB), "xsnabsdp $XT, $XB", IIC_VecFP, [(set f64:$XT, (fneg (fabs f64:$XB)))]>; + let isCodeGenOnly = 1 in + def XSNABSDPs : XX2Form<60, 361, + (outs vssrc:$XT), (ins vssrc:$XB), + "xsnabsdp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (fneg (fabs f32:$XB)))]>; def XSNEGDP : XX2Form<60, 377, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsnegdp $XT, $XB", IIC_VecFP, @@ -2897,10 +2873,32 @@ def : Pat<(v2i64 (PPCvcmp_rec v2i64:$vA, v2i64:$vB, 199)), // XL Compat builtins. def : Pat<(int_ppc_fmsub f64:$A, f64:$B, f64:$C), (XSMSUBMDP $A, $B, $C)>; -def : Pat<(int_ppc_fnmsub f64:$A, f64:$B, f64:$C), (XSNMSUBMDP $A, $B, $C)>; def : Pat<(int_ppc_fnmadd f64:$A, f64:$B, f64:$C), (XSNMADDMDP $A, $B, $C)>; def : Pat<(int_ppc_fre f64:$A), (XSREDP $A)>; def : Pat<(int_ppc_frsqrte vsfrc:$XB), (XSRSQRTEDP $XB)>; +def : Pat<(int_ppc_fnabs f64:$A), (XSNABSDP $A)>; +def : Pat<(int_ppc_fnabss f32:$A), (XSNABSDPs $A)>; + +// XXMRG[LH]W is a direct replacement for VMRG[LH]W respectively. +// Prefer the VSX form for greater register range. +def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef), + (COPY_TO_REGCLASS (XXMRGLW (COPY_TO_REGCLASS $vA, VSRC), + (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>; +def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef), + (COPY_TO_REGCLASS (XXMRGHW (COPY_TO_REGCLASS $vA, VSRC), + (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>; +def:Pat<(vmrglw_shuffle v16i8:$vA, v16i8:$vB), + (COPY_TO_REGCLASS (XXMRGLW (COPY_TO_REGCLASS $vA, VSRC), + (COPY_TO_REGCLASS $vB, VSRC)), VRRC)>; +def:Pat<(vmrghw_shuffle v16i8:$vA, v16i8:$vB), + (COPY_TO_REGCLASS (XXMRGHW (COPY_TO_REGCLASS $vA, VSRC), + (COPY_TO_REGCLASS $vB, VSRC)), VRRC)>; +def:Pat<(vmrglw_swapped_shuffle v16i8:$vA, v16i8:$vB), + (COPY_TO_REGCLASS (XXMRGLW (COPY_TO_REGCLASS $vB, VSRC), + (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>; +def:Pat<(vmrghw_swapped_shuffle v16i8:$vA, v16i8:$vB), + (COPY_TO_REGCLASS (XXMRGHW (COPY_TO_REGCLASS $vB, VSRC), + (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>; } // HasVSX // Any big endian VSX subtarget. @@ -3311,7 +3309,6 @@ def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))), // XL Compat builtins. def : Pat<(int_ppc_fmsubs f32:$A, f32:$B, f32:$C), (XSMSUBMSP $A, $B, $C)>; -def : Pat<(int_ppc_fnmsubs f32:$A, f32:$B, f32:$C), (XSNMSUBMSP $A, $B, $C)>; def : Pat<(int_ppc_fnmadds f32:$A, f32:$B, f32:$C), (XSNMADDMSP $A, $B, $C)>; def : Pat<(int_ppc_fres f32:$A), (XSRESP $A)>; def : Pat<(i32 (int_ppc_extract_exp f64:$A)), @@ -3370,6 +3367,15 @@ def : Pat<(f32 (vector_extract v4f32:$S, i32:$Idx)), def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)), (f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>; + +defm : ScalToVecWPermute< + v4i32, (i32 (load ForceXForm:$src)), + (XXSLDWIs (LIWZX ForceXForm:$src), 1), + (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; +defm : ScalToVecWPermute< + v4f32, (f32 (load ForceXForm:$src)), + (XXSLDWIs (LIWZX ForceXForm:$src), 1), + (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; } // HasVSX, HasP8Vector, IsBigEndian // Big endian Power8 64Bit VSX subtarget. @@ -3384,14 +3390,6 @@ def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 ForceXForm:$src)))), (v2i64 (SUBREG_TO_REG (i64 1), (LIWAX ForceXForm:$src), sub_64))>; def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 ForceXForm:$src)))), (v2i64 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64))>; -defm : ScalToVecWPermute< - v4i32, (i32 (load ForceXForm:$src)), - (XXSLDWIs (LIWZX ForceXForm:$src), 1), - (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; -defm : ScalToVecWPermute< - v4f32, (f32 (load ForceXForm:$src)), - (XXSLDWIs (LIWZX ForceXForm:$src), 1), - (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; def : Pat MaxVarsPrep("ppc-formprep-max-vars", cl::Hidden, cl::init(24), - cl::ZeroOrMore, cl::desc("Potential common base number threshold per function " "for PPC loop prep")); diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index 22c5b6c11289..976effb96adc 100644 --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -107,7 +107,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, if (Subtarget->isUsingPCRelativeCalls()) { if (MIOpcode == PPC::TAILB || MIOpcode == PPC::TAILB8 || MIOpcode == PPC::TCRETURNdi || MIOpcode == PPC::TCRETURNdi8 || - MIOpcode == PPC::BL8_NOTOC) { + MIOpcode == PPC::BL8_NOTOC || MIOpcode == PPC::BL8_NOTOC_RM) { RefKind = MCSymbolRefExpr::VK_PPC_NOTOC; } if (MO.getTargetFlags() == PPCII::MO_PCREL_OPT_FLAG) diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index e5fa02bc8ccf..67d91d23962c 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachinePostDominators.h" @@ -985,7 +986,7 @@ bool PPCMIPeephole::simplifyCode() { LiMI->getOpcode() == PPC::LI8) && "Invalid Opcode!"); auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI - LiMI->RemoveOperand(1); // remove the imm of LI + LiMI->removeOperand(1); // remove the imm of LI LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI : PPC::ADDI8)); MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI) diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp index 782d41f93ae5..9d6dfd16ff9d 100644 --- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp @@ -23,6 +23,13 @@ void PPCFunctionInfo::anchor() {} PPCFunctionInfo::PPCFunctionInfo(const MachineFunction &MF) : DisableNonVolatileCR(PPCDisableNonVolatileCR) {} +MachineFunctionInfo * +PPCFunctionInfo::clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap + &Src2DstMBB) const { + return DestMF.cloneInfo(*this); +} + MCSymbol *PPCFunctionInfo::getPICOffsetSymbol(MachineFunction &MF) const { const DataLayout &DL = MF.getDataLayout(); return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 07c503d47e98..b918e723de00 100644 --- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -153,6 +153,11 @@ private: public: explicit PPCFunctionInfo(const MachineFunction &MF); + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; + int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp index 9d5206f8fd43..58b74c6b8c7a 100644 --- a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp +++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp @@ -15,6 +15,7 @@ #include "PPCSubtarget.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MacroFusion.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" using namespace llvm; namespace { @@ -266,13 +267,13 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, continue; auto DepOpIdx = Feature.depOpIdx(); - if (DepOpIdx.hasValue()) { + if (DepOpIdx) { // Checking if the result of the FirstMI is the desired operand of the // SecondMI if the DepOpIdx is set. Otherwise, ignore it. if (!matchingRegOps(*FirstMI, 0, SecondMI, *DepOpIdx)) return false; } - + // Checking more on the instruction operands. if (checkOpConstraints(Feature.getKind(), *FirstMI, SecondMI)) return true; diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp index a8853609a7c8..82c150b988ab 100644 --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -46,6 +47,10 @@ static cl::opt RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true), cl::desc("Run pre-emit peephole optimizations.")); +static cl::opt +DSCRValue("ppc-set-dscr", cl::Hidden, + cl::desc("Set the Data Stream Control Register.")); + namespace { static bool hasPCRelativeForm(MachineInstr &Use) { @@ -407,6 +412,38 @@ static bool hasPCRelativeForm(MachineInstr &Use) { } bool runOnMachineFunction(MachineFunction &MF) override { + // If the user wants to set the DSCR using command-line options, + // load in the specified value at the start of main. + if (DSCRValue.getNumOccurrences() > 0 && MF.getName().equals("main") && + MF.getFunction().hasExternalLinkage()) { + DSCRValue = (uint32_t)(DSCRValue & 0x01FFFFFF); // 25-bit DSCR mask + RegScavenger RS; + MachineBasicBlock &MBB = MF.front(); + // Find an unused GPR according to register liveness + RS.enterBasicBlock(MBB); + unsigned InDSCR = RS.FindUnusedReg(&PPC::GPRCRegClass); + if (InDSCR) { + const PPCInstrInfo *TII = + MF.getSubtarget().getInstrInfo(); + DebugLoc dl; + MachineBasicBlock::iterator IP = MBB.begin(); // Insert Point + // Copy the 32-bit DSCRValue integer into the GPR InDSCR using LIS and + // ORI, then move to DSCR. If the requested DSCR value is contained + // in a 16-bit signed number, we can emit a single `LI`, but the + // impact of saving one instruction in one function does not warrant + // any additional complexity in the logic here. + BuildMI(MBB, IP, dl, TII->get(PPC::LIS), InDSCR) + .addImm(DSCRValue >> 16); + BuildMI(MBB, IP, dl, TII->get(PPC::ORI), InDSCR) + .addReg(InDSCR) + .addImm(DSCRValue & 0xFFFF); + BuildMI(MBB, IP, dl, TII->get(PPC::MTUDSCR)) + .addReg(InDSCR, RegState::Kill); + } else + errs() << "Warning: Ran out of registers - Unable to set DSCR as " + "requested"; + } + if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) { // Remove UNENCODED_NOP even when this pass is disabled. // This needs to be done unconditionally so we don't emit zeros diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 76b016c0ee79..7349eb8addc9 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -90,6 +91,8 @@ ReportAccMoves("ppc-report-acc-moves", cl::Hidden, cl::init(false)); #endif +extern cl::opt DisableAutoPairedVecSt; + static unsigned offsetMinAlignForOpcode(unsigned OpC); PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM) @@ -113,6 +116,8 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM) ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8; ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX; ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; + ImmToIdxMap[PPC::LQ] = PPC::LQX_PSEUDO; + ImmToIdxMap[PPC::STQ] = PPC::STQX_PSEUDO; // VSX ImmToIdxMap[PPC::DFLOADf32] = PPC::LXSSPX; @@ -183,6 +188,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (!TM.isPPC64() && Subtarget.isAIXABI()) report_fatal_error("AnyReg unimplemented on 32-bit AIX."); if (Subtarget.hasVSX()) { + if (Subtarget.pairedVectorMemops()) + return CSR_64_AllRegs_VSRP_SaveList; if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) return CSR_64_AllRegs_AIX_Dflt_VSX_SaveList; return CSR_64_AllRegs_VSX_SaveList; @@ -210,6 +217,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (Subtarget.isAIXABI()) report_fatal_error("Cold calling unimplemented on AIX."); if (TM.isPPC64()) { + if (Subtarget.pairedVectorMemops()) + return SaveR2 ? CSR_SVR64_ColdCC_R2_VSRP_SaveList + : CSR_SVR64_ColdCC_VSRP_SaveList; if (Subtarget.hasAltivec()) return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList : CSR_SVR64_ColdCC_Altivec_SaveList; @@ -217,7 +227,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { : CSR_SVR64_ColdCC_SaveList; } // 32-bit targets. - if (Subtarget.hasAltivec()) + if (Subtarget.pairedVectorMemops()) + return CSR_SVR32_ColdCC_VSRP_SaveList; + else if (Subtarget.hasAltivec()) return CSR_SVR32_ColdCC_Altivec_SaveList; else if (Subtarget.hasSPE()) return CSR_SVR32_ColdCC_SPE_SaveList; @@ -225,6 +237,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } // Standard calling convention CSRs. if (TM.isPPC64()) { + if (Subtarget.pairedVectorMemops()) + return SaveR2 ? CSR_SVR464_R2_VSRP_SaveList : CSR_SVR464_VSRP_SaveList; if (Subtarget.hasAltivec() && (!Subtarget.isAIXABI() || TM.getAIXExtendedAltivecABI())) { return SaveR2 ? CSR_PPC64_R2_Altivec_SaveList @@ -239,6 +253,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { : CSR_AIX32_SaveList; return CSR_AIX32_SaveList; } + if (Subtarget.pairedVectorMemops()) + return CSR_SVR432_VSRP_SaveList; if (Subtarget.hasAltivec()) return CSR_SVR432_Altivec_SaveList; else if (Subtarget.hasSPE()) @@ -252,6 +268,8 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF, const PPCSubtarget &Subtarget = MF.getSubtarget(); if (CC == CallingConv::AnyReg) { if (Subtarget.hasVSX()) { + if (Subtarget.pairedVectorMemops()) + return CSR_64_AllRegs_VSRP_RegMask; if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) return CSR_64_AllRegs_AIX_Dflt_VSX_RegMask; return CSR_64_AllRegs_VSX_RegMask; @@ -275,20 +293,32 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF, } if (CC == CallingConv::Cold) { - return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask - : CSR_SVR64_ColdCC_RegMask) - : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask - : (Subtarget.hasSPE() - ? CSR_SVR32_ColdCC_SPE_RegMask - : CSR_SVR32_ColdCC_RegMask)); + if (TM.isPPC64()) + return Subtarget.pairedVectorMemops() + ? CSR_SVR64_ColdCC_VSRP_RegMask + : (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask + : CSR_SVR64_ColdCC_RegMask); + else + return Subtarget.pairedVectorMemops() + ? CSR_SVR32_ColdCC_VSRP_RegMask + : (Subtarget.hasAltivec() + ? CSR_SVR32_ColdCC_Altivec_RegMask + : (Subtarget.hasSPE() ? CSR_SVR32_ColdCC_SPE_RegMask + : CSR_SVR32_ColdCC_RegMask)); } - return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask - : CSR_PPC64_RegMask) - : (Subtarget.hasAltivec() - ? CSR_SVR432_Altivec_RegMask - : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask - : CSR_SVR432_RegMask)); + if (TM.isPPC64()) + return Subtarget.pairedVectorMemops() + ? CSR_SVR464_VSRP_RegMask + : (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask + : CSR_PPC64_RegMask); + else + return Subtarget.pairedVectorMemops() + ? CSR_SVR432_VSRP_RegMask + : (Subtarget.hasAltivec() + ? CSR_SVR432_Altivec_RegMask + : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask + : CSR_SVR432_RegMask)); } const uint32_t* @@ -463,6 +493,14 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co LLVM_DEBUG(dbgs() << "TRUE - Memory operand is X-Form.\n"); return true; } + + // This is a spill/restore of a quadword. + if ((Opcode == PPC::RESTORE_QUADWORD) || (Opcode == PPC::SPILL_QUADWORD)) { + LLVM_DEBUG(dbgs() << "Memory Operand: " << InstrInfo->getName(Opcode) + << " for register " << printReg(Reg, this) << ".\n"); + LLVM_DEBUG(dbgs() << "TRUE - Memory operand is a quadword.\n"); + return true; + } } LLVM_DEBUG(dbgs() << "FALSE - Scavenging is not required.\n"); return false; @@ -1082,7 +1120,7 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, MBB.erase(II); if (SpillsKnownBit && KillsCRBit && !SeenUse) { Ins->setDesc(TII.get(PPC::UNENCODED_NOP)); - Ins->RemoveOperand(0); + Ins->removeOperand(0); } } @@ -1163,6 +1201,59 @@ static void emitAccSpillRestoreInfo(MachineBasicBlock &MBB, bool IsPrimed, #endif } +static void spillRegPairs(MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, DebugLoc DL, + const TargetInstrInfo &TII, Register SrcReg, + unsigned FrameIndex, bool IsLittleEndian, + bool IsKilled, bool TwoPairs) { + unsigned Offset = 0; + if (TwoPairs) + Offset = IsLittleEndian ? 48 : 0; + else + Offset = IsLittleEndian ? 16 : 0; + Register Reg = (SrcReg > PPC::VSRp15) ? PPC::V0 + (SrcReg - PPC::VSRp16) * 2 + : PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2; + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXV)) + .addReg(Reg, getKillRegState(IsKilled)), + FrameIndex, Offset); + Offset += IsLittleEndian ? -16 : 16; + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXV)) + .addReg(Reg + 1, getKillRegState(IsKilled)), + FrameIndex, Offset); + if (TwoPairs) { + Offset += IsLittleEndian ? -16 : 16; + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXV)) + .addReg(Reg + 2, getKillRegState(IsKilled)), + FrameIndex, Offset); + Offset += IsLittleEndian ? -16 : 16; + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXV)) + .addReg(Reg + 3, getKillRegState(IsKilled)), + FrameIndex, Offset); + } +} + +/// Remove any STXVP[X] instructions and split them out into a pair of +/// STXV[X] instructions if --disable-auto-paired-vec-st is specified on +/// the command line. +void PPCRegisterInfo::lowerOctWordSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + assert(DisableAutoPairedVecSt && + "Expecting to do this only if paired vector stores are disabled."); + MachineInstr &MI = *II; // STXVP , + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + Register SrcReg = MI.getOperand(0).getReg(); + bool IsLittleEndian = Subtarget.isLittleEndian(); + bool IsKilled = MI.getOperand(0).isKill(); + spillRegPairs(MBB, II, DL, TII, SrcReg, FrameIndex, IsLittleEndian, IsKilled, + /* TwoPairs */ false); + // Discard the original instruction. + MBB.erase(II); +} + /// lowerACCSpilling - Generate the code for spilling the accumulator register. /// Similarly to other spills/reloads that use pseudo-ops, we do not actually /// eliminate the FrameIndex here nor compute the stack offset. We simply @@ -1192,12 +1283,17 @@ void PPCRegisterInfo::lowerACCSpilling(MachineBasicBlock::iterator II, // adjust the offset of the store that is within the 64-byte stack slot. if (IsPrimed) BuildMI(MBB, II, DL, TII.get(PPC::XXMFACC), SrcReg).addReg(SrcReg); - addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP)) - .addReg(Reg, getKillRegState(IsKilled)), - FrameIndex, IsLittleEndian ? 32 : 0); - addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP)) - .addReg(Reg + 1, getKillRegState(IsKilled)), - FrameIndex, IsLittleEndian ? 0 : 32); + if (DisableAutoPairedVecSt) + spillRegPairs(MBB, II, DL, TII, Reg, FrameIndex, IsLittleEndian, IsKilled, + /* TwoPairs */ true); + else { + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP)) + .addReg(Reg, getKillRegState(IsKilled)), + FrameIndex, IsLittleEndian ? 32 : 0); + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP)) + .addReg(Reg + 1, getKillRegState(IsKilled)), + FrameIndex, IsLittleEndian ? 0 : 32); + } if (IsPrimed && !IsKilled) BuildMI(MBB, II, DL, TII.get(PPC::XXMTACC), SrcReg).addReg(SrcReg); @@ -1433,6 +1529,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else if (OpC == PPC::RESTORE_ACC || OpC == PPC::RESTORE_UACC) { lowerACCRestore(II, FrameIndex); return; + } else if (OpC == PPC::STXVP && DisableAutoPairedVecSt) { + lowerOctWordSpilling(II, FrameIndex); + return; } else if (OpC == PPC::SPILL_QUADWORD) { lowerQuadwordSpilling(II, FrameIndex); return; @@ -1451,7 +1550,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, OpC != TargetOpcode::PATCHPOINT && !ImmToIdxMap.count(OpC); // Now add the frame object offset to the offset from r1. - int Offset = MFI.getObjectOffset(FrameIndex); + int64_t Offset = MFI.getObjectOffset(FrameIndex); Offset += MI.getOperand(OffsetOperandNo).getImm(); // If we're not using a Frame Pointer that has been set to the value of the @@ -1507,17 +1606,21 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC; Register SRegHi = MF.getRegInfo().createVirtualRegister(RC), SReg = MF.getRegInfo().createVirtualRegister(RC); + unsigned NewOpcode = 0u; // Insert a set of rA with the full offset value before the ld, st, or add if (isInt<16>(Offset)) BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LI8 : PPC::LI), SReg) - .addImm(Offset); - else { + .addImm(Offset); + else if (isInt<32>(Offset)) { BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi) - .addImm(Offset >> 16); + .addImm(Offset >> 16); BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg) - .addReg(SRegHi, RegState::Kill) - .addImm(Offset); + .addReg(SRegHi, RegState::Kill) + .addImm(Offset); + } else { + assert(is64Bit && "Huge stack is only supported on PPC64"); + TII.materializeImmPostRA(MBB, II, dl, SReg, Offset); } // Convert into indexed form of the instruction: @@ -1532,7 +1635,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, OpC != TargetOpcode::INLINEASM_BR) { assert(ImmToIdxMap.count(OpC) && "No indexed form of load or store available!"); - unsigned NewOpcode = ImmToIdxMap.find(OpC)->second; + NewOpcode = ImmToIdxMap.find(OpC)->second; MI.setDesc(TII.get(NewOpcode)); OperandBase = 1; } else { @@ -1542,6 +1645,20 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Register StackReg = MI.getOperand(FIOperandNum).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true); + + // Since these are not real X-Form instructions, we must + // add the registers and access 0(NewReg) rather than + // emitting the X-Form pseudo. + if (NewOpcode == PPC::LQX_PSEUDO || NewOpcode == PPC::STQX_PSEUDO) { + assert(is64Bit && "Quadword loads/stores only supported in 64-bit mode"); + Register NewReg = MF.getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); + BuildMI(MBB, II, dl, TII.get(PPC::ADD8), NewReg) + .addReg(SReg, RegState::Kill) + .addReg(StackReg); + MI.setDesc(TII.get(NewOpcode == PPC::LQX_PSEUDO ? PPC::LQ : PPC::STQ)); + MI.getOperand(OperandBase + 1).ChangeToRegister(NewReg, false); + MI.getOperand(OperandBase).ChangeToImmediate(0); + } } Register PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index 114f6d0f4c66..aaa841fffa1b 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -130,6 +130,8 @@ public: void lowerCRBitRestore(MachineBasicBlock::iterator II, unsigned FrameIndex) const; + void lowerOctWordSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; void lowerACCSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex) const; void lowerACCRestore(MachineBasicBlock::iterator II, diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 044035e0ef29..7892b0d12d01 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -18,8 +18,6 @@ def sub_32 : SubRegIndex<32>; def sub_64 : SubRegIndex<64>; def sub_vsx0 : SubRegIndex<128>; def sub_vsx1 : SubRegIndex<128, 128>; -def sub_pair0 : SubRegIndex<256>; -def sub_pair1 : SubRegIndex<256, 256>; def sub_gp8_x0 : SubRegIndex<64>; def sub_gp8_x1 : SubRegIndex<64, 64>; } @@ -100,21 +98,6 @@ class CRBIT num, string n> : PPCReg { let HWEncoding{4-0} = num; } -// ACC - One of the 8 512-bit VSX accumulators. -class ACC num, string n, list subregs> : PPCReg { - let HWEncoding{2-0} = num; - let SubRegs = subregs; -} - -// UACC - One of the 8 512-bit VSX accumulators prior to being primed. -// Without using this register class, the register allocator has no way to -// differentiate a primed accumulator from an unprimed accumulator. -// This may result in invalid copies between primed and unprimed accumulators. -class UACC num, string n, list subregs> : PPCReg { - let HWEncoding{2-0} = num; - let SubRegs = subregs; -} - // VSR Pairs - One of the 32 paired even-odd consecutive VSRs. class VSRPair num, string n, list subregs> : PPCReg { let HWEncoding{4-0} = num; @@ -272,9 +255,6 @@ def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>; def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>; // SPE extra registers -// SPE Accumulator for multiply-accumulate SPE operations. Never directly -// accessed, so there's no real encoding for it. -def SPEACC: DwarfRegNum<[99, 111]>; def SPEFSCR: SPR<512, "spefscr">, DwarfRegNum<[612, 112]>; def XER: SPR<1, "xer">, DwarfRegNum<[76]>; @@ -448,72 +428,6 @@ def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> { let CopyCost = -1; } -let SubRegIndices = [sub_pair0, sub_pair1] in { - def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; - def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; - def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; - def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; - def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; - def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; - def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; - def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; -} -def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3, - ACC4, ACC5, ACC6, ACC7)> { - // The AllocationPriority is in the range [0, 63]. Assigned the ACC registers - // the highest possible priority in this range to force the register allocator - // to assign these registers first. This is done because the ACC registers - // must represent 4 advacent vector registers. For example ACC1 must be - // VS4 - VS7. The value here must be at least 32 as we want to allocate - // these registers even before we allocate global ranges. - let AllocationPriority = 63; - let Size = 512; -} - -let SubRegIndices = [sub_pair0, sub_pair1] in { - def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; - def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; - def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; - def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; - def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; - def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; - def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; - def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; -} -def UACCRC : RegisterClass<"PPC", [v512i1], 128, - (add UACC0, UACC1, UACC2, UACC3, - UACC4, UACC5, UACC6, UACC7)> { - // The AllocationPriority for the UACC registers is still high and must be at - // least 32 as we want to allocate these registers before we allocate other - // global ranges. The value must be less than the AllocationPriority of the - // ACC registers. - let AllocationPriority = 36; - let Size = 512; -} - -// FIXME: This allocation order may increase stack frame size when allocating -// non-volatile registers. -// -// Placing Altivec registers first and allocate the rest as underlying VSX -// ones, to reduce interference with accumulator registers (lower 32 VSRs). -// This reduces copies when loading for accumulators, which is common use for -// paired VSX registers. -def VSRpRC : - RegisterClass<"PPC", [v256i1], 128, - (add VSRp17, VSRp18, VSRp16, VSRp19, VSRp20, VSRp21, - VSRp22, VSRp23, VSRp24, VSRp25, VSRp31, VSRp30, - VSRp29, VSRp28, VSRp27, VSRp26, - (sequence "VSRp%u", 0, 6), - (sequence "VSRp%u", 15, 7))> { - // Give the VSRp registers a non-zero AllocationPriority. The value is less - // than 32 as these registers should not always be allocated before global - // ranges and the value should be less than the AllocationPriority - 32 for - // the UACC registers. Even global VSRp registers should be allocated after - // the UACC registers have been chosen. - let AllocationPriority = 2; - let Size = 256; -} - // Make AllocationOrder as similar as G8RC's to avoid potential spilling. // Similarly, we have an AltOrder for 64-bit ELF ABI which r2 is allocated // at last. @@ -528,3 +442,572 @@ def G8pRC : }]; let Size = 128; } + +include "PPCRegisterInfoMMA.td" + +//===----------------------------------------------------------------------===// +// PowerPC Operand Definitions. + +// In the default PowerPC assembler syntax, registers are specified simply +// by number, so they cannot be distinguished from immediate values (without +// looking at the opcode). This means that the default operand matching logic +// for the asm parser does not work, and we need to specify custom matchers. +// Since those can only be specified with RegisterOperand classes and not +// directly on the RegisterClass, all instructions patterns used by the asm +// parser need to use a RegisterOperand (instead of a RegisterClass) for +// all their register operands. +// For this purpose, we define one RegisterOperand for each RegisterClass, +// using the same name as the class, just in lower case. + +def PPCRegGPRCAsmOperand : AsmOperandClass { + let Name = "RegGPRC"; let PredicateMethod = "isRegNumber"; +} +def gprc : RegisterOperand { + let ParserMatchClass = PPCRegGPRCAsmOperand; +} +def PPCRegG8RCAsmOperand : AsmOperandClass { + let Name = "RegG8RC"; let PredicateMethod = "isRegNumber"; +} +def g8rc : RegisterOperand { + let ParserMatchClass = PPCRegG8RCAsmOperand; +} +def PPCRegG8pRCAsmOperand : AsmOperandClass { + let Name = "RegG8pRC"; let PredicateMethod = "isEvenRegNumber"; +} +def g8prc : RegisterOperand { + let ParserMatchClass = PPCRegG8pRCAsmOperand; +} +def PPCRegGPRCNoR0AsmOperand : AsmOperandClass { + let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber"; +} +def gprc_nor0 : RegisterOperand { + let ParserMatchClass = PPCRegGPRCNoR0AsmOperand; +} +def PPCRegG8RCNoX0AsmOperand : AsmOperandClass { + let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber"; +} +def g8rc_nox0 : RegisterOperand { + let ParserMatchClass = PPCRegG8RCNoX0AsmOperand; +} +def PPCRegF8RCAsmOperand : AsmOperandClass { + let Name = "RegF8RC"; let PredicateMethod = "isRegNumber"; +} +def f8rc : RegisterOperand { + let ParserMatchClass = PPCRegF8RCAsmOperand; +} +def PPCRegF4RCAsmOperand : AsmOperandClass { + let Name = "RegF4RC"; let PredicateMethod = "isRegNumber"; +} +def f4rc : RegisterOperand { + let ParserMatchClass = PPCRegF4RCAsmOperand; +} +def PPCRegVRRCAsmOperand : AsmOperandClass { + let Name = "RegVRRC"; let PredicateMethod = "isRegNumber"; +} +def vrrc : RegisterOperand { + let ParserMatchClass = PPCRegVRRCAsmOperand; +} +def PPCRegVFRCAsmOperand : AsmOperandClass { + let Name = "RegVFRC"; let PredicateMethod = "isRegNumber"; +} +def vfrc : RegisterOperand { + let ParserMatchClass = PPCRegVFRCAsmOperand; +} +def PPCRegCRBITRCAsmOperand : AsmOperandClass { + let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber"; +} +def crbitrc : RegisterOperand { + let ParserMatchClass = PPCRegCRBITRCAsmOperand; +} +def PPCRegCRRCAsmOperand : AsmOperandClass { + let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber"; +} +def crrc : RegisterOperand { + let ParserMatchClass = PPCRegCRRCAsmOperand; +} +def PPCRegSPERCAsmOperand : AsmOperandClass { + let Name = "RegSPERC"; let PredicateMethod = "isRegNumber"; +} +def sperc : RegisterOperand { + let ParserMatchClass = PPCRegSPERCAsmOperand; +} +def PPCRegSPE4RCAsmOperand : AsmOperandClass { + let Name = "RegSPE4RC"; let PredicateMethod = "isRegNumber"; +} +def spe4rc : RegisterOperand { + let ParserMatchClass = PPCRegSPE4RCAsmOperand; +} + +def PPCU1ImmAsmOperand : AsmOperandClass { + let Name = "U1Imm"; let PredicateMethod = "isU1Imm"; + let RenderMethod = "addImmOperands"; +} +def u1imm : Operand { + let PrintMethod = "printU1ImmOperand"; + let ParserMatchClass = PPCU1ImmAsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; +} + +def PPCU2ImmAsmOperand : AsmOperandClass { + let Name = "U2Imm"; let PredicateMethod = "isU2Imm"; + let RenderMethod = "addImmOperands"; +} +def u2imm : Operand { + let PrintMethod = "printU2ImmOperand"; + let ParserMatchClass = PPCU2ImmAsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; +} + +def PPCATBitsAsHintAsmOperand : AsmOperandClass { + let Name = "ATBitsAsHint"; let PredicateMethod = "isATBitsAsHint"; + let RenderMethod = "addImmOperands"; // Irrelevant, predicate always fails. +} +def atimm : Operand { + let PrintMethod = "printATBitsAsHint"; + let ParserMatchClass = PPCATBitsAsHintAsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; +} + +def PPCU3ImmAsmOperand : AsmOperandClass { + let Name = "U3Imm"; let PredicateMethod = "isU3Imm"; + let RenderMethod = "addImmOperands"; +} +def u3imm : Operand { + let PrintMethod = "printU3ImmOperand"; + let ParserMatchClass = PPCU3ImmAsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; +} + +def PPCU4ImmAsmOperand : AsmOperandClass { + let Name = "U4Imm"; let PredicateMethod = "isU4Imm"; + let RenderMethod = "addImmOperands"; +} +def u4imm : Operand { + let PrintMethod = "printU4ImmOperand"; + let ParserMatchClass = PPCU4ImmAsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; +} +def PPCS5ImmAsmOperand : AsmOperandClass { + let Name = "S5Imm"; let PredicateMethod = "isS5Imm"; + let RenderMethod = "addImmOperands"; +} +def s5imm : Operand { + let PrintMethod = "printS5ImmOperand"; + let ParserMatchClass = PPCS5ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<5>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def PPCU5ImmAsmOperand : AsmOperandClass { + let Name = "U5Imm"; let PredicateMethod = "isU5Imm"; + let RenderMethod = "addImmOperands"; +} +def u5imm : Operand { + let PrintMethod = "printU5ImmOperand"; + let ParserMatchClass = PPCU5ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<5>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def PPCU6ImmAsmOperand : AsmOperandClass { + let Name = "U6Imm"; let PredicateMethod = "isU6Imm"; + let RenderMethod = "addImmOperands"; +} +def u6imm : Operand { + let PrintMethod = "printU6ImmOperand"; + let ParserMatchClass = PPCU6ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<6>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def PPCU7ImmAsmOperand : AsmOperandClass { + let Name = "U7Imm"; let PredicateMethod = "isU7Imm"; + let RenderMethod = "addImmOperands"; +} +def u7imm : Operand { + let PrintMethod = "printU7ImmOperand"; + let ParserMatchClass = PPCU7ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<7>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def PPCU8ImmAsmOperand : AsmOperandClass { + let Name = "U8Imm"; let PredicateMethod = "isU8Imm"; + let RenderMethod = "addImmOperands"; +} +def u8imm : Operand { + let PrintMethod = "printU8ImmOperand"; + let ParserMatchClass = PPCU8ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<8>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def PPCU10ImmAsmOperand : AsmOperandClass { + let Name = "U10Imm"; let PredicateMethod = "isU10Imm"; + let RenderMethod = "addImmOperands"; +} +def u10imm : Operand { + let PrintMethod = "printU10ImmOperand"; + let ParserMatchClass = PPCU10ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<10>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def PPCU12ImmAsmOperand : AsmOperandClass { + let Name = "U12Imm"; let PredicateMethod = "isU12Imm"; + let RenderMethod = "addImmOperands"; +} +def u12imm : Operand { + let PrintMethod = "printU12ImmOperand"; + let ParserMatchClass = PPCU12ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<12>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def PPCS16ImmAsmOperand : AsmOperandClass { + let Name = "S16Imm"; let PredicateMethod = "isS16Imm"; + let RenderMethod = "addS16ImmOperands"; +} +def s16imm : Operand { + let PrintMethod = "printS16ImmOperand"; + let EncoderMethod = "getImm16Encoding"; + let ParserMatchClass = PPCS16ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def PPCU16ImmAsmOperand : AsmOperandClass { + let Name = "U16Imm"; let PredicateMethod = "isU16Imm"; + let RenderMethod = "addU16ImmOperands"; +} +def u16imm : Operand { + let PrintMethod = "printU16ImmOperand"; + let EncoderMethod = "getImm16Encoding"; + let ParserMatchClass = PPCU16ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<16>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def PPCS17ImmAsmOperand : AsmOperandClass { + let Name = "S17Imm"; let PredicateMethod = "isS17Imm"; + let RenderMethod = "addS16ImmOperands"; +} +def s17imm : Operand { + // This operand type is used for addis/lis to allow the assembler parser + // to accept immediates in the range -65536..65535 for compatibility with + // the GNU assembler. The operand is treated as 16-bit otherwise. + let PrintMethod = "printS16ImmOperand"; + let EncoderMethod = "getImm16Encoding"; + let ParserMatchClass = PPCS17ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def PPCS34ImmAsmOperand : AsmOperandClass { + let Name = "S34Imm"; + let PredicateMethod = "isS34Imm"; + let RenderMethod = "addImmOperands"; +} +def s34imm : Operand { + let PrintMethod = "printS34ImmOperand"; + let EncoderMethod = "getImm34EncodingNoPCRel"; + let ParserMatchClass = PPCS34ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<34>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def s34imm_pcrel : Operand { + let PrintMethod = "printS34ImmOperand"; + let EncoderMethod = "getImm34EncodingPCRel"; + let ParserMatchClass = PPCS34ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<34>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def PPCImmZeroAsmOperand : AsmOperandClass { + let Name = "ImmZero"; + let PredicateMethod = "isImmZero"; + let RenderMethod = "addImmOperands"; +} +def immZero : Operand { + let PrintMethod = "printImmZeroOperand"; + let ParserMatchClass = PPCImmZeroAsmOperand; + let DecoderMethod = "decodeImmZeroOperand"; + let OperandType = "OPERAND_IMMEDIATE"; +} + +def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; + +def PPCDirectBrAsmOperand : AsmOperandClass { + let Name = "DirectBr"; let PredicateMethod = "isDirectBr"; + let RenderMethod = "addBranchTargetOperands"; +} +def directbrtarget : Operand { + let PrintMethod = "printBranchOperand"; + let EncoderMethod = "getDirectBrEncoding"; + let DecoderMethod = "decodeDirectBrTarget"; + let ParserMatchClass = PPCDirectBrAsmOperand; + let OperandType = "OPERAND_PCREL"; +} +def absdirectbrtarget : Operand { + let PrintMethod = "printAbsBranchOperand"; + let EncoderMethod = "getAbsDirectBrEncoding"; + let ParserMatchClass = PPCDirectBrAsmOperand; +} +def PPCCondBrAsmOperand : AsmOperandClass { + let Name = "CondBr"; let PredicateMethod = "isCondBr"; + let RenderMethod = "addBranchTargetOperands"; +} +def condbrtarget : Operand { + let PrintMethod = "printBranchOperand"; + let EncoderMethod = "getCondBrEncoding"; + let DecoderMethod = "decodeCondBrTarget"; + let ParserMatchClass = PPCCondBrAsmOperand; + let OperandType = "OPERAND_PCREL"; +} +def abscondbrtarget : Operand { + let PrintMethod = "printAbsBranchOperand"; + let EncoderMethod = "getAbsCondBrEncoding"; + let ParserMatchClass = PPCCondBrAsmOperand; +} +def calltarget : Operand { + let PrintMethod = "printBranchOperand"; + let EncoderMethod = "getDirectBrEncoding"; + let DecoderMethod = "decodeDirectBrTarget"; + let ParserMatchClass = PPCDirectBrAsmOperand; + let OperandType = "OPERAND_PCREL"; +} +def abscalltarget : Operand { + let PrintMethod = "printAbsBranchOperand"; + let EncoderMethod = "getAbsDirectBrEncoding"; + let ParserMatchClass = PPCDirectBrAsmOperand; +} +def PPCCRBitMaskOperand : AsmOperandClass { + let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask"; +} +def crbitm: Operand { + let PrintMethod = "printcrbitm"; + let EncoderMethod = "get_crbitm_encoding"; + let DecoderMethod = "decodeCRBitMOperand"; + let ParserMatchClass = PPCCRBitMaskOperand; +} +// Address operands +// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode). +def PPCRegGxRCNoR0Operand : AsmOperandClass { + let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber"; +} +def ptr_rc_nor0 : Operand, PointerLikeRegClass<1> { + let ParserMatchClass = PPCRegGxRCNoR0Operand; +} + +// New addressing modes with 34 bit immediates. +def PPCDispRI34Operand : AsmOperandClass { + let Name = "DispRI34"; let PredicateMethod = "isS34Imm"; + let RenderMethod = "addImmOperands"; +} +def dispRI34 : Operand { + let ParserMatchClass = PPCDispRI34Operand; +} +def memri34 : Operand { // memri, imm is a 34-bit value. + let PrintMethod = "printMemRegImm34"; + let MIOperandInfo = (ops dispRI34:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getMemRI34Encoding"; + let DecoderMethod = "decodeMemRI34Operands"; +} +// memri, imm is a 34-bit value for pc-relative instructions where +// base register is set to zero. +def memri34_pcrel : Operand { // memri, imm is a 34-bit value. + let PrintMethod = "printMemRegImm34PCRel"; + let MIOperandInfo = (ops dispRI34:$imm, immZero:$reg); + let EncoderMethod = "getMemRI34PCRelEncoding"; + let DecoderMethod = "decodeMemRI34PCRelOperands"; +} + +// A version of ptr_rc usable with the asm parser. +def PPCRegGxRCOperand : AsmOperandClass { + let Name = "RegGxRC"; let PredicateMethod = "isRegNumber"; +} +def ptr_rc_idx : Operand, PointerLikeRegClass<0> { + let ParserMatchClass = PPCRegGxRCOperand; +} + +def PPCDispRIOperand : AsmOperandClass { + let Name = "DispRI"; let PredicateMethod = "isS16Imm"; + let RenderMethod = "addS16ImmOperands"; +} +def dispRI : Operand { + let ParserMatchClass = PPCDispRIOperand; +} +def PPCDispRIXOperand : AsmOperandClass { + let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4"; + let RenderMethod = "addS16ImmOperands"; +} +def dispRIX : Operand { + let ParserMatchClass = PPCDispRIXOperand; +} +def PPCDispRIHashOperand : AsmOperandClass { + let Name = "DispRIHash"; let PredicateMethod = "isHashImmX8"; + let RenderMethod = "addImmOperands"; +} +def dispRIHash : Operand { + let ParserMatchClass = PPCDispRIHashOperand; +} +def PPCDispRIX16Operand : AsmOperandClass { + let Name = "DispRIX16"; let PredicateMethod = "isS16ImmX16"; + let RenderMethod = "addS16ImmOperands"; +} +def dispRIX16 : Operand { + let ParserMatchClass = PPCDispRIX16Operand; +} +def PPCDispSPE8Operand : AsmOperandClass { + let Name = "DispSPE8"; let PredicateMethod = "isU8ImmX8"; + let RenderMethod = "addImmOperands"; +} +def dispSPE8 : Operand { + let ParserMatchClass = PPCDispSPE8Operand; +} +def PPCDispSPE4Operand : AsmOperandClass { + let Name = "DispSPE4"; let PredicateMethod = "isU7ImmX4"; + let RenderMethod = "addImmOperands"; +} +def dispSPE4 : Operand { + let ParserMatchClass = PPCDispSPE4Operand; +} +def PPCDispSPE2Operand : AsmOperandClass { + let Name = "DispSPE2"; let PredicateMethod = "isU6ImmX2"; + let RenderMethod = "addImmOperands"; +} +def dispSPE2 : Operand { + let ParserMatchClass = PPCDispSPE2Operand; +} + +def memri : Operand { + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getMemRIEncoding"; + let DecoderMethod = "decodeMemRIOperands"; + let OperandType = "OPERAND_MEMORY"; +} +def memrr : Operand { + let PrintMethod = "printMemRegReg"; + let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg); + let OperandType = "OPERAND_MEMORY"; +} +def memrix : Operand { // memri where the imm is 4-aligned. + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getMemRIXEncoding"; + let DecoderMethod = "decodeMemRIXOperands"; + let OperandType = "OPERAND_MEMORY"; +} +def memrihash : Operand { + // memrihash 8-aligned for ROP Protection Instructions. + let PrintMethod = "printMemRegImmHash"; + let MIOperandInfo = (ops dispRIHash:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getMemRIHashEncoding"; + let DecoderMethod = "decodeMemRIHashOperands"; + let OperandType = "OPERAND_MEMORY"; +} +def memrix16 : Operand { // memri, imm is 16-aligned, 12-bit, Inst{16:27} + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispRIX16:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getMemRIX16Encoding"; + let DecoderMethod = "decodeMemRIX16Operands"; + let OperandType = "OPERAND_MEMORY"; +} +def spe8dis : Operand { // SPE displacement where the imm is 8-aligned. + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getSPE8DisEncoding"; + let DecoderMethod = "decodeSPE8Operands"; + let OperandType = "OPERAND_MEMORY"; +} +def spe4dis : Operand { // SPE displacement where the imm is 4-aligned. + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getSPE4DisEncoding"; + let DecoderMethod = "decodeSPE4Operands"; + let OperandType = "OPERAND_MEMORY"; +} +def spe2dis : Operand { // SPE displacement where the imm is 2-aligned. + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getSPE2DisEncoding"; + let DecoderMethod = "decodeSPE2Operands"; + let OperandType = "OPERAND_MEMORY"; +} + +// A single-register address. This is used with the SjLj +// pseudo-instructions which translates to LD/LWZ. These instructions requires +// G8RC_NOX0 registers. +def memr : Operand { + let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg); + let OperandType = "OPERAND_MEMORY"; +} +def PPCTLSRegOperand : AsmOperandClass { + let Name = "TLSReg"; let PredicateMethod = "isTLSReg"; + let RenderMethod = "addTLSRegOperands"; +} +def tlsreg32 : Operand { + let EncoderMethod = "getTLSRegEncoding"; + let ParserMatchClass = PPCTLSRegOperand; +} +def tlsgd32 : Operand {} +def tlscall32 : Operand { + let PrintMethod = "printTLSCall"; + let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym); + let EncoderMethod = "getTLSCallEncoding"; +} + +// PowerPC Predicate operand. +def pred : Operand { + let PrintMethod = "printPredicateOperand"; + let MIOperandInfo = (ops i32imm:$bibo, crrc:$reg); +} + +def PPCRegVSRCAsmOperand : AsmOperandClass { + let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber"; +} +def vsrc : RegisterOperand { + let ParserMatchClass = PPCRegVSRCAsmOperand; +} + +def PPCRegVSFRCAsmOperand : AsmOperandClass { + let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber"; +} +def vsfrc : RegisterOperand { + let ParserMatchClass = PPCRegVSFRCAsmOperand; +} + +def PPCRegVSSRCAsmOperand : AsmOperandClass { + let Name = "RegVSSRC"; let PredicateMethod = "isVSRegNumber"; +} +def vssrc : RegisterOperand { + let ParserMatchClass = PPCRegVSSRCAsmOperand; +} + +def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass { + let Name = "RegSPILLTOVSRRC"; let PredicateMethod = "isVSRegNumber"; +} + +def spilltovsrrc : RegisterOperand { + let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand; +} + +def PPCRegVSRpRCAsmOperand : AsmOperandClass { + let Name = "RegVSRpRC"; let PredicateMethod = "isVSRpEvenRegNumber"; +} + +def vsrprc : RegisterOperand { + let ParserMatchClass = PPCRegVSRpRCAsmOperand; +} + +def PPCRegVSRpEvenRCAsmOperand : AsmOperandClass { + let Name = "RegVSRpEvenRC"; let PredicateMethod = "isVSRpEvenRegNumber"; +} + +def vsrpevenrc : RegisterOperand { + let ParserMatchClass = PPCRegVSRpEvenRCAsmOperand; + let EncoderMethod = "getVSRpEvenEncoding"; + let DecoderMethod = "decodeVSRpEvenOperands"; +} + +def PPCRegACCRCAsmOperand : AsmOperandClass { + let Name = "RegACCRC"; let PredicateMethod = "isACCRegNumber"; +} + +def acc : RegisterOperand { + let ParserMatchClass = PPCRegACCRCAsmOperand; +} + +def uacc : RegisterOperand { + let ParserMatchClass = PPCRegACCRCAsmOperand; +} diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td b/llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td new file mode 100644 index 000000000000..0b6305f95a0a --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td @@ -0,0 +1,106 @@ +//===-- PPCRegisterInfoMMA.td - The PowerPC Register File --*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Register info for registers related to MMA. These are the ACC and UACC +// registers. +// +//===----------------------------------------------------------------------===// + +let Namespace = "PPC" in { +def sub_pair0 : SubRegIndex<256>; +def sub_pair1 : SubRegIndex<256, 256>; +} + +// ACC - One of the 8 512-bit VSX accumulators. +class ACC num, string n, list subregs> : PPCReg { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +// UACC - One of the 8 512-bit VSX accumulators prior to being primed. +// Without using this register class, the register allocator has no way to +// differentiate a primed accumulator from an unprimed accumulator. +// This may result in invalid copies between primed and unprimed accumulators. +class UACC num, string n, list subregs> : PPCReg { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +// SPE Accumulator for multiply-accumulate SPE operations. Never directly +// accessed, so there's no real encoding for it. +def SPEACC: DwarfRegNum<[99, 111]>; + +let SubRegIndices = [sub_pair0, sub_pair1] in { + def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; + def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; + def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; + def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; + def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; + def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; + def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; + def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; +} +def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3, + ACC4, ACC5, ACC6, ACC7)> { + // The AllocationPriority is in the range [0, 63]. Assigned the ACC registers + // the highest possible priority in this range to force the register allocator + // to assign these registers first. This is done because the ACC registers + // must represent 4 advacent vector registers. For example ACC1 must be + // VS4 - VS7. The value here must be at least 32 as we want to allocate + // these registers even before we allocate global ranges. + let AllocationPriority = 63; + let Size = 512; +} + +let SubRegIndices = [sub_pair0, sub_pair1] in { + def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; + def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; + def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; + def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; + def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; + def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; + def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; + def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; +} +def UACCRC : RegisterClass<"PPC", [v512i1], 128, + (add UACC0, UACC1, UACC2, UACC3, + UACC4, UACC5, UACC6, UACC7)> { + // The AllocationPriority for the UACC registers is still high and must be at + // least 32 as we want to allocate these registers before we allocate other + // global ranges. The value must be less than the AllocationPriority of the + // ACC registers. + let AllocationPriority = 36; + let Size = 512; +} + +// FIXME: This allocation order may increase stack frame size when allocating +// non-volatile registers. +// +// Placing Altivec registers first and allocate the rest as underlying VSX +// ones, to reduce interference with accumulator registers (lower 32 VSRs). +// This reduces copies when loading for accumulators, which is common use for +// paired VSX registers. +def VSRpRC : + RegisterClass<"PPC", [v256i1], 128, + (add VSRp17, VSRp18, VSRp16, VSRp19, VSRp20, VSRp21, + VSRp22, VSRp23, VSRp24, VSRp25, VSRp31, VSRp30, + VSRp29, VSRp28, VSRp27, VSRp26, + (sequence "VSRp%u", 0, 6), + (sequence "VSRp%u", 15, 7))> { + // Give the VSRp registers a non-zero AllocationPriority. The value is less + // than 32 as these registers should not always be allocated before global + // ranges and the value should be less than the AllocationPriority - 32 for + // the UACC registers. Even global VSRp registers should be allocated after + // the UACC registers have been chosen. + let AllocationPriority = 2; + let Size = 256; +} + + + + diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP10.td b/llvm/lib/Target/PowerPC/PPCScheduleP10.td index bf56491f373a..f89ef735a367 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP10.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP10.td @@ -36,7 +36,7 @@ def P10Model : SchedMachineModel { let CompleteModel = 1; // Do not support SPE (Signal Procesing Engine) on Power 10. - let UnsupportedFeatures = [HasSPE, IsE500, IsBookE]; + let UnsupportedFeatures = [HasSPE, IsE500, IsBookE, IsISAFuture]; } let SchedModel = P10Model in { diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td index 3dc069ecad8a..d35011171715 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -42,7 +42,7 @@ def P9Model : SchedMachineModel { // Power 9, paired vector mem ops, MMA, PC relative mem ops, or instructions // introduced in ISA 3.1. let UnsupportedFeatures = [HasSPE, PrefixInstrs, PairedVectorMemops, MMA, - PCRelativeMemops, IsISA3_1]; + PCRelativeMemops, IsISA3_1, IsISAFuture]; } let SchedModel = P9Model in { @@ -404,7 +404,6 @@ let SchedModel = P9Model in { def P9_LoadAndALU2Op_7C : WriteSequence<[P9_LS_4C, P9_ALU_3C]>; def P9_LoadAndALU2Op_8C : WriteSequence<[P9_LS_5C, P9_ALU_3C]>; def P9_LoadAndPMOp_8C : WriteSequence<[P9_LS_5C, P9_PM_3C]>; - def P9_LoadAndLoadOp_8C : WriteSequence<[P9_LS_4C, P9_LS_4C]>; def P9_IntDivAndALUOp_18C_8 : WriteSequence<[P9_DIV_16C_8, P9_ALU_2C]>; def P9_IntDivAndALUOp_26C_8 : WriteSequence<[P9_DIV_24C_8, P9_ALU_2C]>; def P9_IntDivAndALUOp_42C_8 : WriteSequence<[P9_DIV_40C_8, P9_ALU_2C]>; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index f11b4e14073e..98424234a592 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -18,6 +18,7 @@ #include "PPCRegisterInfo.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/Attributes.h" @@ -140,6 +141,7 @@ void PPCSubtarget::initializeEnvironment() { IsISA2_07 = false; IsISA3_0 = false; IsISA3_1 = false; + IsISAFuture = false; UseLongCalls = false; SecurePlt = false; VectorsUseTwoUnits = false; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index 1300b62b623a..3281816eab4a 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -19,7 +19,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" @@ -160,6 +160,7 @@ protected: bool IsISA2_07; bool IsISA3_0; bool IsISA3_1; + bool IsISAFuture; bool UseLongCalls; bool SecurePlt; bool VectorsUseTwoUnits; @@ -336,6 +337,7 @@ public: bool isISA2_07() const { return IsISA2_07; } bool isISA3_0() const { return IsISA3_0; } bool isISA3_1() const { return IsISA3_1; } + bool isISAFuture() const { return IsISAFuture; } bool useLongCalls() const { return UseLongCalls; } bool hasFusion() const { return HasFusion; } bool hasStoreFusion() const { return HasStoreFusion; } diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 3eff00fc3c05..fe396cbfc011 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/Localizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" @@ -97,6 +98,13 @@ static cl::opt ReduceCRLogical("ppc-reduce-cr-logicals", cl::desc("Expand eligible cr-logical binary ops to branches"), cl::init(true), cl::Hidden); + +static cl::opt EnablePPCGenScalarMASSEntries( + "enable-ppc-gen-scalar-mass", cl::init(false), + cl::desc("Enable lowering math functions to their corresponding MASS " + "(scalar) entries"), + cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine A(getThePPC32Target()); @@ -123,8 +131,10 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() { initializePPCTLSDynamicCallPass(PR); initializePPCMIPeepholePass(PR); initializePPCLowerMASSVEntriesPass(PR); + initializePPCGenScalarMASSEntriesPass(PR); initializePPCExpandAtomicPseudoPass(PR); initializeGlobalISel(PR); + initializePPCCTRLoopsPass(PR); } static bool isLittleEndianTriple(const Triple &T) { @@ -236,10 +246,10 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, static Reloc::Model getEffectiveRelocModel(const Triple &TT, Optional RM) { - assert((!TT.isOSAIX() || !RM.hasValue() || *RM == Reloc::PIC_) && + assert((!TT.isOSAIX() || !RM || *RM == Reloc::PIC_) && "Invalid relocation model for AIX."); - if (RM.hasValue()) + if (RM) return *RM; // Big Endian PPC and AIX default to PIC. @@ -429,6 +439,14 @@ void PPCPassConfig::addIRPasses() { // Lower generic MASSV routines to PowerPC subtarget-specific entries. addPass(createPPCLowerMASSVEntriesPass()); + // Generate PowerPC target-specific entries for scalar math functions + // that are available in IBM MASS (scalar) library. + if (TM->getOptLevel() == CodeGenOpt::Aggressive && + EnablePPCGenScalarMASSEntries) { + TM->Options.PPCGenScalarMASSEntries = EnablePPCGenScalarMASSEntries; + addPass(createPPCGenScalarMASSEntriesPass()); + } + // If explicitly requested, add explicit data prefetch intrinsics. if (EnablePrefetch.getNumOccurrences() > 0) addPass(createLoopDataPrefetchPass()); @@ -522,6 +540,16 @@ void PPCPassConfig::addPreRegAlloc() { if (EnableExtraTOCRegDeps) addPass(createPPCTOCRegDepsPass()); + // Run CTR loops pass before MachinePipeliner pass. + // MachinePipeliner will pipeline all instructions before the terminator, but + // we don't want DecreaseCTRPseudo to be pipelined. + // Note we may lose some MachinePipeliner opportunities if we run CTR loops + // generation pass before MachinePipeliner and the loop is converted back to + // a normal loop. We can revisit this later for running PPCCTRLoops after + // MachinePipeliner and handling DecreaseCTRPseudo in MachinePipeliner pass. + if (getOptLevel() != CodeGenOpt::None) + addPass(createPPCCTRLoopsPass()); + if (getOptLevel() != CodeGenOpt::None) addPass(&MachinePipelinerID); } @@ -549,7 +577,7 @@ void PPCPassConfig::addPreEmitPass2() { } TargetTransformInfo -PPCTargetMachine::getTargetTransformInfo(const Function &F) { +PPCTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(PPCTTIImpl(this, F)); } diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/llvm/lib/Target/PowerPC/PPCTargetMachine.h index d3fe5362ccdc..bafb79c84942 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -51,7 +51,7 @@ public: // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); diff --git a/llvm/lib/Target/PowerPC/PPCTargetStreamer.h b/llvm/lib/Target/PowerPC/PPCTargetStreamer.h index 82fcd9e1c2bc..e3fc6285494c 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetStreamer.h +++ b/llvm/lib/Target/PowerPC/PPCTargetStreamer.h @@ -10,6 +10,7 @@ #define LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" namespace llvm { diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index cc5738a5d7b6..cf728933c08d 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -28,11 +28,6 @@ using namespace llvm; static cl::opt DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden); -// This is currently only used for the data prefetch pass -static cl::opt -CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), - cl::desc("The loop prefetch cache line size")); - static cl::opt EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false), cl::desc("Enable using coldcc calling conv for cold " @@ -491,15 +486,13 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, case Intrinsic::experimental_constrained_sin: case Intrinsic::experimental_constrained_cos: return true; - // There is no corresponding FMA instruction for PPC double double. - // Thus, we need to disable CTR loop generation for this type. - case Intrinsic::fmuladd: case Intrinsic::copysign: if (CI->getArgOperand(0)->getType()->getScalarType()-> isPPC_FP128Ty()) return true; else continue; // ISD::FCOPYSIGN is never a library call. + case Intrinsic::fmuladd: case Intrinsic::fma: Opcode = ISD::FMA; break; case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; @@ -903,10 +896,6 @@ PPCTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { } unsigned PPCTTIImpl::getCacheLineSize() const { - // Check first if the user specified a custom line size. - if (CacheLineSize.getNumOccurrences() > 0) - return CacheLineSize; - // Starting with P7 we have a cache line size of 128. unsigned Directive = ST->getCPUDirective(); // Assume that Future CPU has the same cache line size as the others. @@ -1015,7 +1004,8 @@ InstructionCost PPCTTIImpl::getArithmeticInstrCost( InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef Mask, int Index, - Type *SubTp) { + Type *SubTp, + ArrayRef Args) { InstructionCost CostFactor = vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr); @@ -1319,8 +1309,8 @@ bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, return true; } -bool PPCTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2) { +bool PPCTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) { // PowerPC default behaviour here is "instruction number 1st priority". // If LsrNoInsnsCost is set, call default implementation. if (!LsrNoInsnsCost) diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 0af6f2a308d9..790eb0b42afa 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -76,8 +76,8 @@ public: OptimizationRemarkEmitter *ORE); void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); - bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2); + bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2); bool isNumRegsMajorCostOfLSR(); bool shouldBuildRelLookupTables() const; /// @} @@ -111,7 +111,8 @@ public: ArrayRef Args = ArrayRef(), const Instruction *CxtI = nullptr); InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, - ArrayRef Mask, int Index, Type *SubTp); + ArrayRef Mask, int Index, Type *SubTp, + ArrayRef Args = None); InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index ff251f55afff..04fc7667257e 100644 --- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -519,6 +519,8 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { case PPC::XXSLDWI: case PPC::XSCVDPSPN: case PPC::XSCVSPDPN: + case PPC::MTVSCR: + case PPC::MFVSCR: break; } } diff --git a/llvm/lib/Target/PowerPC/README_P9.txt b/llvm/lib/Target/PowerPC/README_P9.txt index c9984b7604bd..ee1ea735acad 100644 --- a/llvm/lib/Target/PowerPC/README_P9.txt +++ b/llvm/lib/Target/PowerPC/README_P9.txt @@ -310,7 +310,7 @@ VSX: . I checked existing instruction "XSCMPUDP". They are different in target register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register - . Use instrinsic: + . Use intrinsic: (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB)) (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB)) (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB)) @@ -322,7 +322,7 @@ VSX: "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>; - . So we should use "XX3Form_Rcr" to implement instrinsic + . So we should use "XX3Form_Rcr" to implement intrinsic - Convert DP -> QP: xscvdpqp . Similar to XSCVDPSP: @@ -579,11 +579,6 @@ Atomic operations (l[dw]at, st[dw]at): - Provide builtins since not all FC's necessarily have an existing LLVM atomic operation -Load Doubleword Monitored (ldmx): -- Investigate whether there are any uses for this. It seems to be related to - Garbage Collection so it isn't likely to be all that useful for most - languages we deal with. - Move to CR from XER Extended (mcrxrx): - Is there a use for this in LLVM? diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 01f36e6dcdd2..69fb9d2844d3 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -24,6 +24,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -161,7 +162,7 @@ class RISCVAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseRegister(OperandVector &Operands, bool AllowParens = false); OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands); - OperandMatchResultTy parseAtomicMemOp(OperandVector &Operands); + OperandMatchResultTy parseZeroOffsetMemOp(OperandVector &Operands); OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands); OperandMatchResultTy parseBareSymbol(OperandVector &Operands); OperandMatchResultTy parseCallSymbol(OperandVector &Operands); @@ -170,6 +171,7 @@ class RISCVAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseVTypeI(OperandVector &Operands); OperandMatchResultTy parseMaskReg(OperandVector &Operands); OperandMatchResultTy parseInsnDirectiveOpcode(OperandVector &Operands); + OperandMatchResultTy parseGPRAsFPR(OperandVector &Operands); bool parseOperand(OperandVector &Operands, StringRef Mnemonic); @@ -254,6 +256,11 @@ public: "target-abi)\n"; } + // Use computeTargetABI to check if ABIName is valid. If invalid, output + // error message. + RISCVABI::computeTargetABI(STI.getTargetTriple(), STI.getFeatureBits(), + ABIName); + const MCObjectFileInfo *MOFI = Parser.getContext().getObjectFileInfo(); ParserOptions.IsPicEnabled = MOFI->isPositionIndependent(); } @@ -273,6 +280,8 @@ struct RISCVOperand : public MCParsedAsmOperand { bool IsRV64; + bool IsGPRAsFPR; + struct RegOp { MCRegister RegNum; }; @@ -343,6 +352,14 @@ public: RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum); } + bool isGPRAsFPR() const { return isGPR() && IsGPRAsFPR; } + + bool isGPRF64AsFPR() const { return isGPR() && IsGPRAsFPR && IsRV64; } + + bool isGPRPF64AsFPR() const { + return isGPR() && IsGPRAsFPR && !IsRV64 && !((Reg.RegNum - RISCV::X0) & 1); + } + static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm, RISCVMCExpr::VariantKind &VK) { if (auto *RE = dyn_cast(Expr)) { @@ -447,8 +464,16 @@ public: bool isFenceArg() const { if (!isImm()) return false; - const MCExpr *Val = getImm(); - auto *SVal = dyn_cast(Val); + + int64_t Imm; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; + if (evaluateConstantImm(getImm(), Imm, VK)) { + // Only accept 0 as a constant immediate. + return VK == RISCVMCExpr::VK_RISCV_None && Imm == 0; + } + + auto *SVal = dyn_cast(getImm()); + if (!SVal || SVal->getKind() != MCSymbolRefExpr::VK_None) return false; @@ -530,41 +555,19 @@ public: return (isRV64() && isUInt<5>(Imm)) || isUInt<4>(Imm); } - bool isUImm2() const { + template bool IsUImm() const { int64_t Imm; RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; if (!isImm()) return false; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); - return IsConstantImm && isUInt<2>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; + return IsConstantImm && isUInt(Imm) && VK == RISCVMCExpr::VK_RISCV_None; } - bool isUImm3() const { - int64_t Imm; - RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; - if (!isImm()) - return false; - bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); - return IsConstantImm && isUInt<3>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; - } - - bool isUImm5() const { - int64_t Imm; - RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; - if (!isImm()) - return false; - bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); - return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; - } - - bool isUImm7() const { - int64_t Imm; - RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; - if (!isImm()) - return false; - bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); - return IsConstantImm && isUInt<7>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; - } + bool isUImm2() { return IsUImm<2>(); } + bool isUImm3() { return IsUImm<3>(); } + bool isUImm5() { return IsUImm<5>(); } + bool isUImm7() { return IsUImm<7>(); } bool isRnumArg() const { int64_t Imm; @@ -686,6 +689,16 @@ public: bool isSImm12Lsb0() const { return isBareSimmNLsb0<12>(); } + bool isSImm12Lsb00000() const { + if (!isImm()) + return false; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; + int64_t Imm; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + return IsConstantImm && isShiftedInt<7, 5>(Imm) && + VK == RISCVMCExpr::VK_RISCV_None; + } + bool isSImm13Lsb0() const { return isBareSimmNLsb0<13>(); } bool isSImm10Lsb0000NonZero() const { @@ -831,12 +844,14 @@ public: } static std::unique_ptr createReg(unsigned RegNo, SMLoc S, - SMLoc E, bool IsRV64) { + SMLoc E, bool IsRV64, + bool IsGPRAsFPR = false) { auto Op = std::make_unique(KindTy::Register); Op->Reg.RegNum = RegNo; Op->StartLoc = S; Op->EndLoc = E; Op->IsRV64 = IsRV64; + Op->IsGPRAsFPR = IsGPRAsFPR; return Op; } @@ -897,6 +912,17 @@ public: void addFenceArgOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); + + int64_t Constant = 0; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; + if (evaluateConstantImm(getImm(), Constant, VK)) { + if (Constant == 0) { + Inst.addOperand(MCOperand::createImm(Constant)); + return; + } + llvm_unreachable("FenceArg must contain only [iorw] or be 0"); + } + // isFenceArg has validated the operand, meaning this cast is safe auto SE = cast(getImm()); @@ -904,7 +930,7 @@ public: for (char c : SE->getSymbol().getName()) { switch (c) { default: - llvm_unreachable("FenceArg must contain only [iorw]"); + llvm_unreachable("FenceArg must contain only [iorw] or be 0"); case 'i': Imm |= RISCVFenceField::I; break; @@ -1182,6 +1208,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return generateImmOutOfRangeError( Operands, ErrorInfo, -(1 << 11), (1 << 11) - 2, "immediate must be a multiple of 2 bytes in the range"); + case Match_InvalidSImm12Lsb00000: + return generateImmOutOfRangeError( + Operands, ErrorInfo, -(1 << 11), (1 << 11) - 32, + "immediate must be a multiple of 32 bytes in the range"); case Match_InvalidSImm13Lsb0: return generateImmOutOfRangeError( Operands, ErrorInfo, -(1 << 12), (1 << 12) - 2, @@ -1208,9 +1238,8 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } case Match_InvalidFenceArg: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); - return Error( - ErrorLoc, - "operand must be formed of letters selected in-order from 'iorw'"); + return Error(ErrorLoc, "operand must be formed of letters selected " + "in-order from 'iorw' or be 0"); } case Match_InvalidFRMArg: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); @@ -1594,9 +1623,11 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) { return MatchOperand_Success; case AsmToken::Plus: Opcode = MCBinaryExpr::Add; + getLexer().Lex(); break; case AsmToken::Minus: Opcode = MCBinaryExpr::Sub; + getLexer().Lex(); break; } @@ -1737,9 +1768,7 @@ OperandMatchResultTy RISCVAsmParser::parseVTypeI(OperandVector &Operands) { else goto MatchFail; - unsigned LmulLog2 = Log2_32(Lmul); - RISCVII::VLMUL VLMUL = - static_cast(Fractional ? 8 - LmulLog2 : LmulLog2); + RISCVII::VLMUL VLMUL = RISCVVType::encodeLMUL(Lmul, Fractional); unsigned VTypeI = RISCVVType::encodeVTYPE(VLMUL, Sew, TailAgnostic, MaskAgnostic); @@ -1780,6 +1809,26 @@ OperandMatchResultTy RISCVAsmParser::parseMaskReg(OperandVector &Operands) { return MatchOperand_Success; } +OperandMatchResultTy RISCVAsmParser::parseGPRAsFPR(OperandVector &Operands) { + switch (getLexer().getKind()) { + default: + return MatchOperand_NoMatch; + case AsmToken::Identifier: + StringRef Name = getLexer().getTok().getIdentifier(); + MCRegister RegNo; + matchRegisterNameHelper(isRV32E(), RegNo, Name); + + if (RegNo == RISCV::NoRegister) + return MatchOperand_NoMatch; + SMLoc S = getLoc(); + SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); + getLexer().Lex(); + Operands.push_back(RISCVOperand::createReg( + RegNo, S, E, isRV64(), !getSTI().hasFeature(RISCV::FeatureStdExtF))); + } + return MatchOperand_Success; +} + OperandMatchResultTy RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) { if (getLexer().isNot(AsmToken::LParen)) { @@ -1806,7 +1855,8 @@ RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) { return MatchOperand_Success; } -OperandMatchResultTy RISCVAsmParser::parseAtomicMemOp(OperandVector &Operands) { +OperandMatchResultTy +RISCVAsmParser::parseZeroOffsetMemOp(OperandVector &Operands) { // Atomic operations such as lr.w, sc.w, and amo*.w accept a "memory operand" // as one of their register operands, such as `(a0)`. This just denotes that // the register (in this case `a0`) contains a memory address. @@ -1822,9 +1872,9 @@ OperandMatchResultTy RISCVAsmParser::parseAtomicMemOp(OperandVector &Operands) { // offset if it is zero; require (and discard) parentheses; and add only the // parsed register operand to `Operands`. // - // These operands are printed with RISCVInstPrinter::printAtomicMemOp, which - // will only print the register surrounded by parentheses (which GNU as also - // uses as its canonical representation for these operands). + // These operands are printed with RISCVInstPrinter::printZeroOffsetMemOp, + // which will only print the register surrounded by parentheses (which GNU as + // also uses as its canonical representation for these operands). std::unique_ptr OptionalImmOp; if (getLexer().isNot(AsmToken::LParen)) { @@ -1935,7 +1985,6 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info, return true; // Parse until end of statement, consuming commas between operands - unsigned OperandIdx = 1; while (getLexer().is(AsmToken::Comma)) { // Consume comma token getLexer().Lex(); @@ -1943,8 +1992,6 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info, // Parse next operand if (parseOperand(Operands, Name)) return true; - - ++OperandIdx; } if (getLexer().isNot(AsmToken::EndOfStatement)) { @@ -2120,11 +2167,11 @@ bool RISCVAsmParser::parseDirectiveAttribute() { StringRef Name = Parser.getTok().getIdentifier(); Optional Ret = ELFAttrs::attrTypeFromString(Name, RISCVAttrs::getRISCVAttributeTags()); - if (!Ret.hasValue()) { + if (!Ret) { Error(TagLoc, "attribute name not recognised: " + Name); return false; } - Tag = Ret.getValue(); + Tag = *Ret; Parser.Lex(); } else { const MCExpr *AttrExpr; @@ -2170,8 +2217,7 @@ bool RISCVAsmParser::parseDirectiveAttribute() { Parser.Lex(); } - if (Parser.parseToken(AsmToken::EndOfStatement, - "unexpected token in '.attribute' directive")) + if (Parser.parseEOL()) return true; if (IsIntegerValue) @@ -2263,23 +2309,26 @@ void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value, MCRegister SrcReg = RISCV::X0; for (RISCVMatInt::Inst &Inst : Seq) { - if (Inst.Opc == RISCV::LUI) { + switch (Inst.getOpndKind()) { + case RISCVMatInt::Imm: + emitToStreamer(Out, + MCInstBuilder(Inst.Opc).addReg(DestReg).addImm(Inst.Imm)); + break; + case RISCVMatInt::RegX0: emitToStreamer( - Out, MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Inst.Imm)); - } else if (Inst.Opc == RISCV::ADD_UW) { - emitToStreamer(Out, MCInstBuilder(RISCV::ADD_UW) - .addReg(DestReg) - .addReg(SrcReg) - .addReg(RISCV::X0)); - } else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD || - Inst.Opc == RISCV::SH3ADD) { + Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addReg( + RISCV::X0)); + break; + case RISCVMatInt::RegReg: emitToStreamer( Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addReg( SrcReg)); - } else { + break; + case RISCVMatInt::RegImm: emitToStreamer( Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm( Inst.Imm)); + break; } // Only the first instruction has X0 as its source. @@ -2541,8 +2590,7 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst, } const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); - RISCVII::VConstraintType Constraints = - RISCVII::getConstraint(MCID.TSFlags); + RISCVII::VConstraintType Constraints = RISCVII::getConstraint(MCID.TSFlags); if (Constraints == RISCVII::NoConstraint) return false; diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index ff96b2b254ca..1c732a15de2f 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -14,8 +14,8 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "TargetInfo/RISCVTargetInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -60,11 +60,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVDisassembler() { static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { const FeatureBitset &FeatureBits = - static_cast(Decoder) - ->getSubtargetInfo() - .getFeatureBits(); + Decoder->getSubtargetInfo().getFeatureBits(); bool IsRV32E = FeatureBits[RISCV::FeatureRV32E]; if (RegNo >= 32 || (IsRV32E && RegNo >= 16)) @@ -77,7 +75,7 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= 32) return MCDisassembler::Fail; @@ -88,7 +86,7 @@ static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= 32) return MCDisassembler::Fail; @@ -99,7 +97,7 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeFPR32CRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= 8) { return MCDisassembler::Fail; } @@ -110,7 +108,7 @@ static DecodeStatus DecodeFPR32CRegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= 32) return MCDisassembler::Fail; @@ -121,7 +119,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeFPR64CRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= 8) { return MCDisassembler::Fail; } @@ -132,7 +130,7 @@ static DecodeStatus DecodeFPR64CRegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo == 0) { return MCDisassembler::Fail; } @@ -140,9 +138,9 @@ static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint64_t RegNo, return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); } -static DecodeStatus DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo == 2) { return MCDisassembler::Fail; } @@ -152,7 +150,7 @@ static DecodeStatus DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= 8) return MCDisassembler::Fail; @@ -161,9 +159,20 @@ static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, return MCDisassembler::Success; } +static DecodeStatus DecodeGPRPF64RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 32 || RegNo & 1) + return MCDisassembler::Fail; + + MCRegister Reg = RISCV::X0 + RegNo; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= 32) return MCDisassembler::Fail; @@ -174,7 +183,7 @@ static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= 32) return MCDisassembler::Fail; @@ -194,7 +203,7 @@ static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= 32) return MCDisassembler::Fail; @@ -214,7 +223,7 @@ static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeVRM8RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo >= 32) return MCDisassembler::Fail; @@ -233,7 +242,8 @@ static DecodeStatus DecodeVRM8RegisterClass(MCInst &Inst, uint64_t RegNo, } static DecodeStatus decodeVMaskReg(MCInst &Inst, uint64_t RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { MCRegister Reg = RISCV::NoRegister; switch (RegNo) { default: @@ -250,7 +260,8 @@ static DecodeStatus decodeVMaskReg(MCInst &Inst, uint64_t RegNo, // Add implied SP operand for instructions *SP compressed instructions. The SP // operand isn't explicitly encoded in the instruction. -static void addImplySP(MCInst &Inst, int64_t Address, const void *Decoder) { +static void addImplySP(MCInst &Inst, int64_t Address, + const MCDisassembler *Decoder) { if (Inst.getOpcode() == RISCV::C_LWSP || Inst.getOpcode() == RISCV::C_SWSP || Inst.getOpcode() == RISCV::C_LDSP || Inst.getOpcode() == RISCV::C_SDSP || Inst.getOpcode() == RISCV::C_FLWSP || @@ -268,7 +279,8 @@ static void addImplySP(MCInst &Inst, int64_t Address, const void *Decoder) { template static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, - int64_t Address, const void *Decoder) { + int64_t Address, + const MCDisassembler *Decoder) { assert(isUInt(Imm) && "Invalid immediate"); addImplySP(Inst, Address, Decoder); Inst.addOperand(MCOperand::createImm(Imm)); @@ -278,7 +290,7 @@ static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, template static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint64_t Imm, int64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (Imm == 0) return MCDisassembler::Fail; return decodeUImmOperand(Inst, Imm, Address, Decoder); @@ -286,7 +298,8 @@ static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint64_t Imm, template static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, - int64_t Address, const void *Decoder) { + int64_t Address, + const MCDisassembler *Decoder) { assert(isUInt(Imm) && "Invalid immediate"); addImplySP(Inst, Address, Decoder); // Sign-extend the number in the bottom N bits of Imm @@ -297,7 +310,7 @@ static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, template static DecodeStatus decodeSImmNonZeroOperand(MCInst &Inst, uint64_t Imm, int64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (Imm == 0) return MCDisassembler::Fail; return decodeSImmOperand(Inst, Imm, Address, Decoder); @@ -306,7 +319,7 @@ static DecodeStatus decodeSImmNonZeroOperand(MCInst &Inst, uint64_t Imm, template static DecodeStatus decodeSImmOperandAndLsl1(MCInst &Inst, uint64_t Imm, int64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { assert(isUInt(Imm) && "Invalid immediate"); // Sign-extend the number in the bottom N bits of Imm after accounting for // the fact that the N bit immediate is stored in N-1 bits (the LSB is @@ -317,7 +330,7 @@ static DecodeStatus decodeSImmOperandAndLsl1(MCInst &Inst, uint64_t Imm, static DecodeStatus decodeCLUIImmOperand(MCInst &Inst, uint64_t Imm, int64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { assert(isUInt<6>(Imm) && "Invalid immediate"); if (Imm > 31) { Imm = (SignExtend64<6>(Imm) & 0xfffff); @@ -326,9 +339,8 @@ static DecodeStatus decodeCLUIImmOperand(MCInst &Inst, uint64_t Imm, return MCDisassembler::Success; } -static DecodeStatus decodeFRMArg(MCInst &Inst, uint64_t Imm, - int64_t Address, - const void *Decoder) { +static DecodeStatus decodeFRMArg(MCInst &Inst, uint64_t Imm, int64_t Address, + const MCDisassembler *Decoder) { assert(isUInt<3>(Imm) && "Invalid immediate"); if (!llvm::RISCVFPRndMode::isValidRoundingMode(Imm)) return MCDisassembler::Fail; @@ -338,26 +350,30 @@ static DecodeStatus decodeFRMArg(MCInst &Inst, uint64_t Imm, } static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); #include "RISCVGenDisassemblerTables.inc" static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { uint64_t SImm6 = fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5); DecodeStatus Result = decodeSImmOperand<6>(Inst, SImm6, Address, Decoder); @@ -368,7 +384,7 @@ static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn, static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeGPRRegisterClass(Inst, 0, Address, Decoder); uint64_t SImm6 = fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5); @@ -380,7 +396,7 @@ static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn, static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeGPRRegisterClass(Inst, 0, Address, Decoder); Inst.addOperand(Inst.getOperand(0)); uint64_t UImm6 = @@ -392,7 +408,8 @@ static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn, } static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Rd = fieldFromInstruction(Insn, 7, 5); unsigned Rs2 = fieldFromInstruction(Insn, 2, 5); DecodeGPRRegisterClass(Inst, Rd, Address, Decoder); @@ -402,7 +419,7 @@ static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, unsigned Insn, static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Rd = fieldFromInstruction(Insn, 7, 5); unsigned Rs2 = fieldFromInstruction(Insn, 2, 5); DecodeGPRRegisterClass(Inst, Rd, Address, Decoder); @@ -427,6 +444,27 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return MCDisassembler::Fail; } Insn = support::endian::read32le(Bytes.data()); + if (STI.getFeatureBits()[RISCV::FeatureStdExtZdinx] && + !STI.getFeatureBits()[RISCV::Feature64Bit]) { + LLVM_DEBUG(dbgs() << "Trying RV32Zdinx table (Double in Integer and" + "rv32)\n"); + Result = decodeInstruction(DecoderTableRV32Zdinx32, MI, Insn, Address, + this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + } + + if (STI.getFeatureBits()[RISCV::FeatureStdExtZfinx]) { + LLVM_DEBUG(dbgs() << "Trying RVZfinx table (Float in Integer):\n"); + Result = decodeInstruction(DecoderTableRVZfinx32, MI, Insn, Address, this, + STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + } LLVM_DEBUG(dbgs() << "Trying RISCV32 table :\n"); Result = decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI); Size = 4; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 514789b3f645..a494adf8e210 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -583,16 +583,17 @@ void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, bool RISCVAsmBackend::shouldInsertExtraNopBytesForCodeAlign( const MCAlignFragment &AF, unsigned &Size) { // Calculate Nops Size only when linker relaxation enabled. - if (!STI.getFeatureBits()[RISCV::FeatureRelax]) + const MCSubtargetInfo *STI = AF.getSubtargetInfo(); + if (!STI->getFeatureBits()[RISCV::FeatureRelax]) return false; - bool HasStdExtC = STI.getFeatureBits()[RISCV::FeatureStdExtC]; + bool HasStdExtC = STI->getFeatureBits()[RISCV::FeatureStdExtC]; unsigned MinNopLen = HasStdExtC ? 2 : 4; if (AF.getAlignment() <= MinNopLen) { return false; } else { - Size = AF.getAlignment() - MinNopLen; + Size = AF.getAlignment().value() - MinNopLen; return true; } } @@ -606,7 +607,8 @@ bool RISCVAsmBackend::shouldInsertFixupForCodeAlign(MCAssembler &Asm, const MCAsmLayout &Layout, MCAlignFragment &AF) { // Insert the fixup only when linker relaxation enabled. - if (!STI.getFeatureBits()[RISCV::FeatureRelax]) + const MCSubtargetInfo *STI = AF.getSubtargetInfo(); + if (!STI->getFeatureBits()[RISCV::FeatureRelax]) return false; // Calculate total Nops we need to insert. If there are none to insert diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h index f04d2912f09d..5d62c3a8b0df 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h @@ -27,18 +27,15 @@ class RISCVAsmBackend : public MCAsmBackend { bool Is64Bit; bool ForceRelocs = false; const MCTargetOptions &TargetOptions; - RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown; public: RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, const MCTargetOptions &Options) : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) { - TargetABI = RISCVABI::computeTargetABI( - STI.getTargetTriple(), STI.getFeatureBits(), Options.getABIName()); RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits()); } - ~RISCVAsmBackend() override {} + ~RISCVAsmBackend() override = default; void setForceRelocs() { ForceRelocs = true; } @@ -103,7 +100,6 @@ public: const MCSubtargetInfo *STI) const override; const MCTargetOptions &getTargetOptions() const { return TargetOptions; } - RISCVABI::ABI getTargetABI() const { return TargetABI; } }; } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index 144e761f002d..9b69170d1c4a 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/RISCVISAInfo.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/raw_ostream.h" namespace llvm { @@ -61,15 +62,11 @@ ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits, if (TargetABI != ABI_Unknown) return TargetABI; - // For now, default to the ilp32/ilp32e/lp64 ABI if no explicit ABI is given - // or an invalid/unrecognised string is given. In the future, it might be - // worth changing this to default to ilp32f/lp64f and ilp32d/lp64d when - // hardware support for floating point is present. - if (IsRV32E) - return ABI_ILP32E; - if (IsRV64) - return ABI_LP64; - return ABI_ILP32; + // If no explicit ABI is given, try to compute the default ABI. + auto ISAInfo = RISCVFeatures::parseFeatureBits(IsRV64, FeatureBits); + if (!ISAInfo) + report_fatal_error(ISAInfo.takeError()); + return getTargetABI((*ISAInfo)->computeDefaultABI()); } ABI getTargetABI(StringRef ABIName) { @@ -106,13 +103,17 @@ void validate(const Triple &TT, const FeatureBitset &FeatureBits) { report_fatal_error("RV32E can't be enabled for an RV64 target"); } -void toFeatureVector(std::vector &FeatureVector, - const FeatureBitset &FeatureBits) { +llvm::Expected> +parseFeatureBits(bool IsRV64, const FeatureBitset &FeatureBits) { + unsigned XLen = IsRV64 ? 64 : 32; + std::vector FeatureVector; + // Convert FeatureBitset to FeatureVector. for (auto Feature : RISCVFeatureKV) { if (FeatureBits[Feature.Value] && llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key)) FeatureVector.push_back(std::string("+") + Feature.Key); } + return llvm::RISCVISAInfo::parseFeatures(XLen, FeatureVector); } } // namespace RISCVFeatures @@ -130,7 +131,7 @@ unsigned RISCVVType::encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic) { assert(isValidSEW(SEW) && "Invalid SEW"); unsigned VLMULBits = static_cast(VLMUL); - unsigned VSEWBits = Log2_32(SEW) - 3; + unsigned VSEWBits = encodeSEW(SEW); unsigned VTypeI = (VSEWBits << 3) | (VLMULBits & 0x7); if (TailAgnostic) VTypeI |= 0x40; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 01c6bd90ea58..fa408f7fc5d7 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -18,6 +18,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/RISCVISAInfo.h" namespace llvm { @@ -87,9 +88,16 @@ enum { // Pseudos. IsRVVWideningReductionShift = HasVecPolicyOpShift + 1, IsRVVWideningReductionMask = 1 << IsRVVWideningReductionShift, + + // Does this instruction care about mask policy. If it is not, the mask policy + // could be either agnostic or undisturbed. For example, unmasked, store, and + // reduction operations result would not be affected by mask policy, so + // compiler has free to select either one. + UsesMaskPolicyShift = IsRVVWideningReductionShift + 1, + UsesMaskPolicyMask = 1 << UsesMaskPolicyShift, }; -// Match with the definitions in RISCVInstrFormatsV.td +// Match with the definitions in RISCVInstrFormats.td enum VConstraintType { NoConstraint = 0, VS2Constraint = 0b001, @@ -109,8 +117,8 @@ enum VLMUL : uint8_t { }; enum { - TAIL_UNDISTURBED = 0, TAIL_AGNOSTIC = 1, + MASK_AGNOSTIC = 2, }; // Helper functions to read TSFlags. @@ -120,8 +128,8 @@ static inline unsigned getFormat(uint64_t TSFlags) { } /// \returns the constraint for the instruction. static inline VConstraintType getConstraint(uint64_t TSFlags) { - return static_cast - ((TSFlags & ConstraintMask) >> ConstraintShift); + return static_cast((TSFlags & ConstraintMask) >> + ConstraintShift); } /// \returns the LMUL for the instruction. static inline VLMUL getLMul(uint64_t TSFlags) { @@ -155,6 +163,30 @@ static inline bool hasVecPolicyOp(uint64_t TSFlags) { static inline bool isRVVWideningReduction(uint64_t TSFlags) { return TSFlags & IsRVVWideningReductionMask; } +/// \returns true if mask policy is valid for the instruction. +static inline bool usesMaskPolicy(uint64_t TSFlags) { + return TSFlags & UsesMaskPolicyMask; +} + +static inline unsigned getVLOpNum(const MCInstrDesc &Desc) { + const uint64_t TSFlags = Desc.TSFlags; + // This method is only called if we expect to have a VL operand, and all + // instructions with VL also have SEW. + assert(hasSEWOp(TSFlags) && hasVLOp(TSFlags)); + unsigned Offset = 2; + if (hasVecPolicyOp(TSFlags)) + Offset = 3; + return Desc.getNumOperands() - Offset; +} + +static inline unsigned getSEWOpNum(const MCInstrDesc &Desc) { + const uint64_t TSFlags = Desc.TSFlags; + assert(hasSEWOp(TSFlags)); + unsigned Offset = 1; + if (hasVecPolicyOp(TSFlags)) + Offset = 2; + return Desc.getNumOperands() - Offset; +} // RISC-V Specific Machine Operand Flags enum { @@ -189,6 +221,7 @@ enum OperandType : unsigned { OPERAND_UIMM7, OPERAND_UIMM12, OPERAND_SIMM12, + OPERAND_SIMM12_LSB00000, OPERAND_UIMM20, OPERAND_UIMMLOG2XLEN, OPERAND_RVKRNUM, @@ -344,9 +377,8 @@ namespace RISCVFeatures { // triple. Exits with report_fatal_error if not. void validate(const Triple &TT, const FeatureBitset &FeatureBits); -// Convert FeatureBitset to FeatureVector. -void toFeatureVector(std::vector &FeatureVector, - const FeatureBitset &FeatureBits); +llvm::Expected> +parseFeatureBits(bool IsRV64, const FeatureBitset &FeatureBits); } // namespace RISCVFeatures @@ -372,11 +404,22 @@ inline static RISCVII::VLMUL getVLMUL(unsigned VType) { // Decode VLMUL into 1,2,4,8 and fractional indicator. std::pair decodeVLMUL(RISCVII::VLMUL VLMUL); +inline static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional) { + assert(isValidLMUL(LMUL, Fractional) && "Unsupported LMUL"); + unsigned LmulLog2 = Log2_32(LMUL); + return static_cast(Fractional ? 8 - LmulLog2 : LmulLog2); +} + inline static unsigned decodeVSEW(unsigned VSEW) { assert(VSEW < 8 && "Unexpected VSEW value"); return 1 << (VSEW + 3); } +inline static unsigned encodeSEW(unsigned SEW) { + assert(isValidSEW(SEW) && "Unexpected SEW value"); + return Log2_32(SEW) - 3; +} + inline static unsigned getSEW(unsigned VType) { unsigned VSEW = (VType >> 3) & 0x7; return decodeVSEW(VSEW); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp index fb1ce19d73bc..0c362c57e5c0 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp @@ -43,7 +43,7 @@ RISCVELFObjectWriter::RISCVELFObjectWriter(uint8_t OSABI, bool Is64Bit) : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_RISCV, /*HasRelocationAddend*/ true) {} -RISCVELFObjectWriter::~RISCVELFObjectWriter() {} +RISCVELFObjectWriter::~RISCVELFObjectWriter() = default; unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index d88ba9e4ac72..c5f8a42bab6a 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -16,6 +16,7 @@ #include "RISCVMCTargetDesc.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectWriter.h" @@ -30,38 +31,12 @@ using namespace llvm; // This part is for ELF object output. RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI) - : RISCVTargetStreamer(S), CurrentVendor("riscv") { + : RISCVTargetStreamer(S), CurrentVendor("riscv"), STI(STI) { MCAssembler &MCA = getStreamer().getAssembler(); const FeatureBitset &Features = STI.getFeatureBits(); auto &MAB = static_cast(MCA.getBackend()); - RISCVABI::ABI ABI = MAB.getTargetABI(); - assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); - - unsigned EFlags = MCA.getELFHeaderEFlags(); - - if (Features[RISCV::FeatureStdExtC]) - EFlags |= ELF::EF_RISCV_RVC; - - switch (ABI) { - case RISCVABI::ABI_ILP32: - case RISCVABI::ABI_LP64: - break; - case RISCVABI::ABI_ILP32F: - case RISCVABI::ABI_LP64F: - EFlags |= ELF::EF_RISCV_FLOAT_ABI_SINGLE; - break; - case RISCVABI::ABI_ILP32D: - case RISCVABI::ABI_LP64D: - EFlags |= ELF::EF_RISCV_FLOAT_ABI_DOUBLE; - break; - case RISCVABI::ABI_ILP32E: - EFlags |= ELF::EF_RISCV_RVE; - break; - case RISCVABI::ABI_Unknown: - llvm_unreachable("Improperly initialised target ABI"); - } - - MCA.setELFHeaderEFlags(EFlags); + setTargetABI(RISCVABI::computeTargetABI(STI.getTargetTriple(), Features, + MAB.getTargetOptions().getABIName())); } MCELFStreamer &RISCVTargetELFStreamer::getStreamer() { @@ -98,12 +73,12 @@ void RISCVTargetELFStreamer::finishAttributeSection() { return; if (AttributeSection) { - Streamer.SwitchSection(AttributeSection); + Streamer.switchSection(AttributeSection); } else { MCAssembler &MCA = getStreamer().getAssembler(); AttributeSection = MCA.getContext().getELFSection( ".riscv.attributes", ELF::SHT_RISCV_ATTRIBUTES, 0); - Streamer.SwitchSection(AttributeSection); + Streamer.switchSection(AttributeSection); Streamer.emitInt8(ELFAttrs::Format_Version); } @@ -172,6 +147,44 @@ size_t RISCVTargetELFStreamer::calculateContentSize() const { return Result; } +void RISCVTargetELFStreamer::finish() { + RISCVTargetStreamer::finish(); + MCAssembler &MCA = getStreamer().getAssembler(); + const FeatureBitset &Features = STI.getFeatureBits(); + RISCVABI::ABI ABI = getTargetABI(); + + unsigned EFlags = MCA.getELFHeaderEFlags(); + + if (Features[RISCV::FeatureStdExtC]) + EFlags |= ELF::EF_RISCV_RVC; + + switch (ABI) { + case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_LP64: + break; + case RISCVABI::ABI_ILP32F: + case RISCVABI::ABI_LP64F: + EFlags |= ELF::EF_RISCV_FLOAT_ABI_SINGLE; + break; + case RISCVABI::ABI_ILP32D: + case RISCVABI::ABI_LP64D: + EFlags |= ELF::EF_RISCV_FLOAT_ABI_DOUBLE; + break; + case RISCVABI::ABI_ILP32E: + EFlags |= ELF::EF_RISCV_RVE; + break; + case RISCVABI::ABI_Unknown: + llvm_unreachable("Improperly initialised target ABI"); + } + + MCA.setELFHeaderEFlags(EFlags); +} + +void RISCVTargetELFStreamer::reset() { + AttributeSection = nullptr; + Contents.clear(); +} + namespace { class RISCVELFStreamer : public MCELFStreamer { static std::pair getRelocPairForSize(unsigned Size) { @@ -194,6 +207,14 @@ class RISCVELFStreamer : public MCELFStreamer { static bool requiresFixups(MCContext &C, const MCExpr *Value, const MCExpr *&LHS, const MCExpr *&RHS) { + auto IsMetadataOrEHFrameSection = [](const MCSection &S) -> bool { + // Additionally check .apple_names/.apple_types. They are fixed-size and + // do not need fixups. llvm-dwarfdump --apple-names does not process + // R_RISCV_{ADD,SUB}32 in them. + return S.getKind().isMetadata() || S.getName() == ".eh_frame" || + S.getName() == ".apple_names" || S.getName() == ".apple_types"; + }; + const auto *MBE = dyn_cast(Value); if (MBE == nullptr) return false; @@ -212,10 +233,20 @@ class RISCVELFStreamer : public MCELFStreamer { MCConstantExpr::create(E.getConstant(), C), C); RHS = E.getSymB(); - return (A.isInSection() ? A.getSection().hasInstructions() - : !A.getName().empty()) || - (B.isInSection() ? B.getSection().hasInstructions() - : !B.getName().empty()); + // TODO: when available, R_RISCV_n_PCREL should be preferred. + + // Avoid pairwise relocations for symbolic difference in debug and .eh_frame + if (A.isInSection()) + return !IsMetadataOrEHFrameSection(A.getSection()); + if (B.isInSection()) + return !IsMetadataOrEHFrameSection(B.getSection()); + // as well as for absolute symbols. + return !A.getName().empty() || !B.getName().empty(); + } + + void reset() override { + static_cast(getTargetStreamer())->reset(); + MCELFStreamer::reset(); } public: diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h index 7ce7dafb8ca1..7ca2f5ab5623 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h @@ -29,6 +29,7 @@ private: SmallVector Contents; MCSection *AttributeSection = nullptr; + const MCSubtargetInfo &STI; AttributeItem *getAttributeItem(unsigned Attribute) { for (size_t i = 0; i < Contents.size(); ++i) @@ -91,6 +92,8 @@ private: void finishAttributeSection() override; size_t calculateContentSize() const; + void reset() override; + public: MCELFStreamer &getStreamer(); RISCVTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); @@ -103,6 +106,8 @@ public: void emitDirectiveOptionNoRVC() override; void emitDirectiveOptionRelax() override; void emitDirectiveOptionNoRelax() override; + + void finish() override; }; MCELFStreamer *createRISCVELFStreamer(MCContext &C, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index 3268740849f0..7f88589374dd 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -146,7 +146,7 @@ void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo, if ((FenceArg & RISCVFenceField::W) != 0) O << 'w'; if (FenceArg == 0) - O << "unknown"; + O << "0"; } void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo, @@ -156,12 +156,12 @@ void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo, O << RISCVFPRndMode::roundingModeToString(FRMArg); } -void RISCVInstPrinter::printAtomicMemOp(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, - raw_ostream &O) { +void RISCVInstPrinter::printZeroOffsetMemOp(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNo); - assert(MO.isReg() && "printAtomicMemOp can only print register operands"); + assert(MO.isReg() && "printZeroOffsetMemOp can only print register operands"); O << "("; printRegName(O, MO.getReg()); O << ")"; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h index d078ead2c8ad..763ce9c95d73 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h @@ -40,8 +40,8 @@ public: const MCSubtargetInfo &STI, raw_ostream &O); void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); - void printAtomicMemOp(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); + void printZeroOffsetMemOp(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printVTypeI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printVMaskReg(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index 1078403a3fd2..7c062387fecd 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" #include "llvm/Support/EndianStream.h" @@ -46,7 +47,7 @@ public: RISCVMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII) : Ctx(ctx), MCII(MCII) {} - ~RISCVMCCodeEmitter() override {} + ~RISCVMCCodeEmitter() override = default; void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, @@ -93,7 +94,6 @@ private: } // end anonymous namespace MCCodeEmitter *llvm::createRISCVMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new RISCVMCCodeEmitter(Ctx, MCII); } @@ -132,9 +132,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS, const MCExpr *CallExpr = Func.getExpr(); // Emit AUIPC Ra, Func with R_RISCV_CALL relocation type. - TmpInst = MCInstBuilder(RISCV::AUIPC) - .addReg(Ra) - .addOperand(MCOperand::createExpr(CallExpr)); + TmpInst = MCInstBuilder(RISCV::AUIPC).addReg(Ra).addExpr(CallExpr); Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); support::endian::write(OS, Binary, support::little); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp index 65714b914c60..336289cf107b 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp index 9c9d9221578c..554711e87521 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp @@ -13,6 +13,7 @@ #include "RISCVMCObjectFileInfo.h" #include "RISCVMCTargetDesc.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSubtargetInfo.h" using namespace llvm; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp index 07c2be624932..917d93479f18 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp @@ -77,11 +77,9 @@ createRISCVMCObjectFileInfo(MCContext &Ctx, bool PIC, static MCSubtargetInfo *createRISCVMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - if (CPU.empty()) + if (CPU.empty() || CPU == "generic") CPU = TT.isArch64Bit() ? "generic-rv64" : "generic-rv32"; - if (CPU == "generic") - report_fatal_error(Twine("CPU 'generic' is not supported. Use ") + - (TT.isArch64Bit() ? "generic-rv64" : "generic-rv32")); + return createRISCVMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h index 5216a689715a..276fc9efb6c0 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h @@ -29,7 +29,6 @@ class MCSubtargetInfo; class Target; MCCodeEmitter *createRISCVMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCAsmBackend *createRISCVAsmBackend(const Target &T, const MCSubtargetInfo &STI, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp index e935179e5f9b..d19da6bd3664 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -18,10 +18,9 @@ static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) { int Cost = 0; for (auto Instr : Res) { - bool Compressed; + // Assume instructions that aren't listed aren't compressible. + bool Compressed = false; switch (Instr.Opc) { - default: - llvm_unreachable("Unexpected opcode"); case RISCV::SLLI: case RISCV::SRLI: Compressed = true; @@ -31,9 +30,6 @@ static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) { case RISCV::LUI: Compressed = isInt<6>(Instr.Imm); break; - case RISCV::ADD_UW: - Compressed = false; - break; } // Two RVC instructions take the same space as one RVI instruction, but // can take longer to execute than the single RVI instruction. Thus, we @@ -77,6 +73,12 @@ static void generateInstSeqImpl(int64_t Val, assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target"); + // Use BSETI for a single bit. + if (ActiveFeatures[RISCV::FeatureStdExtZbs] && isPowerOf2_64(Val)) { + Res.push_back(RISCVMatInt::Inst(RISCV::BSETI, Log2_64(Val))); + return; + } + // In the worst case, for a full 64-bit constant, a sequence of 8 instructions // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emitted. Note // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits @@ -101,43 +103,53 @@ static void generateInstSeqImpl(int64_t Val, // performed when the recursion returns. int64_t Lo12 = SignExtend64<12>(Val); - int64_t Hi52 = ((uint64_t)Val + 0x800ull) >> 12; - int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52); - Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount); + Val = (uint64_t)Val - (uint64_t)Lo12; - // If the remaining bits don't fit in 12 bits, we might be able to reduce the - // shift amount in order to use LUI which will zero the lower 12 bits. + int ShiftAmount = 0; bool Unsigned = false; - if (ShiftAmount > 12 && !isInt<12>(Hi52)) { - if (isInt<32>((uint64_t)Hi52 << 12)) { - // Reduce the shift amount and add zeros to the LSBs so it will match LUI. - ShiftAmount -= 12; - Hi52 = (uint64_t)Hi52 << 12; - } else if (isUInt<32>((uint64_t)Hi52 << 12) && - ActiveFeatures[RISCV::FeatureStdExtZba]) { - // Reduce the shift amount and add zeros to the LSBs so it will match - // LUI, then shift left with SLLI.UW to clear the upper 32 set bits. - ShiftAmount -= 12; - Hi52 = ((uint64_t)Hi52 << 12) | (0xffffffffull << 32); + + // Val might now be valid for LUI without needing a shift. + if (!isInt<32>(Val)) { + ShiftAmount = findFirstSet((uint64_t)Val); + Val >>= ShiftAmount; + + // If the remaining bits don't fit in 12 bits, we might be able to reduce the + // shift amount in order to use LUI which will zero the lower 12 bits. + if (ShiftAmount > 12 && !isInt<12>(Val)) { + if (isInt<32>((uint64_t)Val << 12)) { + // Reduce the shift amount and add zeros to the LSBs so it will match LUI. + ShiftAmount -= 12; + Val = (uint64_t)Val << 12; + } else if (isUInt<32>((uint64_t)Val << 12) && + ActiveFeatures[RISCV::FeatureStdExtZba]) { + // Reduce the shift amount and add zeros to the LSBs so it will match + // LUI, then shift left with SLLI.UW to clear the upper 32 set bits. + ShiftAmount -= 12; + Val = ((uint64_t)Val << 12) | (0xffffffffull << 32); + Unsigned = true; + } + } + + // Try to use SLLI_UW for Val when it is uint32 but not int32. + if (isUInt<32>((uint64_t)Val) && !isInt<32>((uint64_t)Val) && + ActiveFeatures[RISCV::FeatureStdExtZba]) { + // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with + // SLLI_UW. + Val = ((uint64_t)Val) | (0xffffffffull << 32); Unsigned = true; } } - // Try to use SLLI_UW for Hi52 when it is uint32 but not int32. - if (isUInt<32>((uint64_t)Hi52) && !isInt<32>((uint64_t)Hi52) && - ActiveFeatures[RISCV::FeatureStdExtZba]) { - // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with - // SLLI_UW. - Hi52 = ((uint64_t)Hi52) | (0xffffffffull << 32); - Unsigned = true; - } + generateInstSeqImpl(Val, ActiveFeatures, Res); - generateInstSeqImpl(Hi52, ActiveFeatures, Res); + // Skip shift if we were able to use LUI directly. + if (ShiftAmount) { + if (Unsigned) + Res.push_back(RISCVMatInt::Inst(RISCV::SLLI_UW, ShiftAmount)); + else + Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount)); + } - if (Unsigned) - Res.push_back(RISCVMatInt::Inst(RISCV::SLLI_UW, ShiftAmount)); - else - Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount)); if (Lo12) Res.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12)); } @@ -166,6 +178,24 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { RISCVMatInt::InstSeq Res; generateInstSeqImpl(Val, ActiveFeatures, Res); + // If there are trailing zeros, try generating a sign extended constant with + // no trailing zeros and use a final SLLI to restore them. + if ((Val & 1) == 0 && Res.size() > 2) { + unsigned TrailingZeros = countTrailingZeros((uint64_t)Val); + int64_t ShiftedVal = Val >> TrailingZeros; + RISCVMatInt::InstSeq TmpSeq; + generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq); + TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SLLI, TrailingZeros)); + + // Keep the new sequence if it is an improvement. + if (TmpSeq.size() < Res.size()) { + Res = TmpSeq; + // A 2 instruction sequence is the best we can do. + if (Res.size() <= 2) + return Res; + } + } + // If the constant is positive we might be able to generate a shifted constant // with no leading zeros and use a final SRLI to restore them. if (Val > 0 && Res.size() > 2) { @@ -302,32 +332,34 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0)); if (TmpSeq.size() < Res.size()) Res = TmpSeq; - } - // Try to use LUI+SH*ADD+ADDI. - int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull; - int64_t Lo12 = SignExtend64<12>(Val); - Div = 0; - if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) { - Div = 3; - Opc = RISCV::SH1ADD; - } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) { - Div = 5; - Opc = RISCV::SH2ADD; - } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) { - Div = 9; - Opc = RISCV::SH3ADD; - } - // Build the new instruction sequence. - if (Div > 0) { - // For Val that has zero Lo12 (implies Val equals to Hi52) should has - // already been processed to LUI+SH*ADD by previous optimization. - assert(Lo12 != 0 && - "unexpected instruction sequence for immediate materialisation"); - generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq); - TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0)); - TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12)); - if (TmpSeq.size() < Res.size()) - Res = TmpSeq; + } else { + // Try to use LUI+SH*ADD+ADDI. + int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull; + int64_t Lo12 = SignExtend64<12>(Val); + Div = 0; + if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) { + Div = 3; + Opc = RISCV::SH1ADD; + } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) { + Div = 5; + Opc = RISCV::SH2ADD; + } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) { + Div = 9; + Opc = RISCV::SH3ADD; + } + // Build the new instruction sequence. + if (Div > 0) { + // For Val that has zero Lo12 (implies Val equals to Hi52) should has + // already been processed to LUI+SH*ADD by previous optimization. + assert(Lo12 != 0 && + "unexpected instruction sequence for immediate materialisation"); + assert(TmpSeq.empty() && "Expected empty TmpSeq"); + generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq); + TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0)); + TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12)); + if (TmpSeq.size() < Res.size()) + Res = TmpSeq; + } } } @@ -362,5 +394,30 @@ int getIntMatCost(const APInt &Val, unsigned Size, } return std::max(1, Cost); } + +OpndKind Inst::getOpndKind() const { + switch (Opc) { + default: + llvm_unreachable("Unexpected opcode!"); + case RISCV::LUI: + return RISCVMatInt::Imm; + case RISCV::ADD_UW: + return RISCVMatInt::RegX0; + case RISCV::SH1ADD: + case RISCV::SH2ADD: + case RISCV::SH3ADD: + return RISCVMatInt::RegReg; + case RISCV::ADDI: + case RISCV::ADDIW: + case RISCV::SLLI: + case RISCV::SRLI: + case RISCV::SLLI_UW: + case RISCV::RORI: + case RISCV::BSETI: + case RISCV::BCLRI: + return RISCVMatInt::RegImm; + } +} + } // namespace RISCVMatInt } // namespace llvm diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h index 6a8e0c640001..90c29f01c43d 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h @@ -17,11 +17,21 @@ namespace llvm { class APInt; namespace RISCVMatInt { + +enum OpndKind { + RegImm, // ADDI/ADDIW/SLLI/SRLI/BSETI/BCLRI + Imm, // LUI + RegReg, // SH1ADD/SH2ADD/SH3ADD + RegX0, // ADD_UW +}; + struct Inst { unsigned Opc; int64_t Imm; Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {} + + OpndKind getOpndKind() const; }; using InstSeq = SmallVector; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp index 2f016374e6a2..5f9ed77d07cf 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -22,6 +22,7 @@ using namespace llvm; RISCVTargetStreamer::RISCVTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} void RISCVTargetStreamer::finish() { finishAttributeSection(); } +void RISCVTargetStreamer::reset() {} void RISCVTargetStreamer::emitDirectiveOptionPush() {} void RISCVTargetStreamer::emitDirectiveOptionPop() {} @@ -38,6 +39,10 @@ void RISCVTargetStreamer::emitTextAttribute(unsigned Attribute, void RISCVTargetStreamer::emitIntTextAttribute(unsigned Attribute, unsigned IntValue, StringRef StringValue) {} +void RISCVTargetStreamer::setTargetABI(RISCVABI::ABI ABI) { + assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialized target ABI"); + TargetABI = ABI; +} void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { if (STI.hasFeature(RISCV::FeatureRV32E)) @@ -45,15 +50,10 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { else emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16); - unsigned XLen = STI.hasFeature(RISCV::Feature64Bit) ? 64 : 32; - std::vector FeatureVector; - RISCVFeatures::toFeatureVector(FeatureVector, STI.getFeatureBits()); - - auto ParseResult = llvm::RISCVISAInfo::parseFeatures(XLen, FeatureVector); + auto ParseResult = RISCVFeatures::parseFeatureBits( + STI.hasFeature(RISCV::Feature64Bit), STI.getFeatureBits()); if (!ParseResult) { - /* Assume any error about features should handled earlier. */ - consumeError(ParseResult.takeError()); - llvm_unreachable("Parsing feature error when emitTargetAttributes?"); + report_fatal_error(ParseResult.takeError()); } else { auto &ISAInfo = *ParseResult; emitTextAttribute(RISCVAttrs::ARCH, ISAInfo->toString()); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h index 171780d94ce7..0d35d0b698a9 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVTARGETSTREAMER_H #define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVTARGETSTREAMER_H +#include "RISCV.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -17,9 +18,12 @@ namespace llvm { class formatted_raw_ostream; class RISCVTargetStreamer : public MCTargetStreamer { + RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown; + public: RISCVTargetStreamer(MCStreamer &S); void finish() override; + virtual void reset(); virtual void emitDirectiveOptionPush(); virtual void emitDirectiveOptionPop(); @@ -36,6 +40,8 @@ public: StringRef StringValue); void emitTargetAttributes(const MCSubtargetInfo &STI); + void setTargetABI(RISCVABI::ABI ABI); + RISCVABI::ABI getTargetABI() const { return TargetABI; } }; // This part is for ascii assembly output diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 03462240fd93..917837a307ad 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -32,10 +32,14 @@ class PassRegistry; bool lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP); -bool LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, +bool lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &MCOp, const AsmPrinter &AP); -FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM); +FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM, + CodeGenOpt::Level OptLevel); + +FunctionPass *createRISCVMakeCompressibleOptPass(); +void initializeRISCVMakeCompressibleOptPass(PassRegistry &); FunctionPass *createRISCVGatherScatterLoweringPass(); void initializeRISCVGatherScatterLoweringPass(PassRegistry &); @@ -55,6 +59,9 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &); FunctionPass *createRISCVInsertVSETVLIPass(); void initializeRISCVInsertVSETVLIPass(PassRegistry &); +FunctionPass *createRISCVRedundantCopyEliminationPass(); +void initializeRISCVRedundantCopyEliminationPass(PassRegistry &); + InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, RISCVSubtarget &, RISCVRegisterBankInfo &); diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index e32a8fb010de..e783ef38b448 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -41,6 +41,13 @@ def HasStdExtD : Predicate<"Subtarget->hasStdExtD()">, AssemblerPredicate<(all_of FeatureStdExtD), "'D' (Double-Precision Floating-Point)">; +def FeatureStdExtZihintpause + : SubtargetFeature<"zihintpause", "HasStdExtZihintpause", "true", + "'zihintpause' (Pause Hint)">; +def HasStdExtZihintpause : Predicate<"Subtarget->hasStdExtZihintpause()">, + AssemblerPredicate<(all_of FeatureStdExtZihintpause), + "'Zihintpause' (Pause Hint)">; + def FeatureStdExtZfhmin : SubtargetFeature<"zfhmin", "HasStdExtZfhmin", "true", "'Zfhmin' (Half-Precision Floating-Point Minimal)", @@ -63,6 +70,43 @@ def HasStdExtZfhOrZfhmin "'Zfh' (Half-Precision Floating-Point) or " "'Zfhmin' (Half-Precision Floating-Point Minimal)">; +def FeatureStdExtZfinx + : SubtargetFeature<"zfinx", "HasStdExtZfinx", "true", + "'Zfinx' (Float in Integer)">; +def HasStdExtZfinx : Predicate<"Subtarget->hasStdExtZfinx()">, + AssemblerPredicate<(all_of FeatureStdExtZfinx), + "'Zfinx' (Float in Integer)">; + +def FeatureStdExtZdinx + : SubtargetFeature<"zdinx", "HasStdExtZdinx", "true", + "'Zdinx' (Double in Integer)", + [FeatureStdExtZfinx]>; +def HasStdExtZdinx : Predicate<"Subtarget->hasStdExtZdinx()">, + AssemblerPredicate<(all_of FeatureStdExtZdinx), + "'Zdinx' (Double in Integer)">; + +def FeatureStdExtZhinxmin + : SubtargetFeature<"zhinxmin", "HasStdExtZhinxmin", "true", + "'Zhinxmin' (Half Float in Integer Minimal)", + [FeatureStdExtZfinx]>; +def HasStdExtZhinxmin : Predicate<"Subtarget->hasStdExtZhinxmin()">, + AssemblerPredicate<(all_of FeatureStdExtZhinxmin), + "'Zhinxmin' (Half Float in Integer Minimal)">; + +def FeatureStdExtZhinx + : SubtargetFeature<"zhinx", "HasStdExtZhinx", "true", + "'Zhinx' (Half Float in Integer)", + [FeatureStdExtZfinx]>; +def HasStdExtZhinx : Predicate<"Subtarget->hasStdExtZhinx()">, + AssemblerPredicate<(all_of FeatureStdExtZhinx), + "'Zhinx' (Half Float in Integer)">; + +def HasStdExtZhinxOrZhinxmin + : Predicate<"Subtarget->hasStdExtZhinx() || Subtarget->hasStdExtZhinxmin()">, + AssemblerPredicate<(any_of FeatureStdExtZhinx, FeatureStdExtZhinxmin), + "'Zhinx' (Half Float in Integer) or " + "'Zhinxmin' (Half Float in Integer Minimal)">; + def FeatureStdExtC : SubtargetFeature<"c", "HasStdExtC", "true", "'C' (Compressed Instructions)">; @@ -290,13 +334,13 @@ def HasRVCHints : Predicate<"Subtarget->enableRVCHintInstrs()">, AssemblerPredicate<(all_of(not FeatureNoRVCHints)), "RVC Hint Instructions">; -def FeatureStdExtZvl32b : SubtargetFeature<"zvl32b", "ZvlLen", "ExtZvl::Zvl32b", +def FeatureStdExtZvl32b : SubtargetFeature<"zvl32b", "ZvlLen", "32", "'Zvl' (Minimum Vector Length) 32">; foreach i = { 6-15 } in { defvar I = !shl(1, i); def FeatureStdExtZvl#I#b : - SubtargetFeature<"zvl"#I#"b", "ZvlLen", "ExtZvl::Zvl"#I#"b", + SubtargetFeature<"zvl"#I#"b", "ZvlLen", !cast(I), "'Zvl' (Minimum Vector Length) "#I, [!cast("FeatureStdExtZvl"#!srl(I, 1)#"b")]>; } @@ -333,24 +377,50 @@ def FeatureStdExtZve64d def FeatureStdExtV : SubtargetFeature<"v", "HasStdExtV", "true", "'V' (Vector Extension for Application Processors)", - [FeatureStdExtZvl128b, FeatureStdExtF, FeatureStdExtD]>; + [FeatureStdExtZvl128b, FeatureStdExtZve64d, FeatureStdExtF, FeatureStdExtD]>; def HasVInstructions : Predicate<"Subtarget->hasVInstructions()">, AssemblerPredicate< - (any_of FeatureStdExtZve32x, FeatureStdExtV), + (any_of FeatureStdExtZve32x), "'V' (Vector Extension for Application Processors), 'Zve32x' or " "'Zve64x' (Vector Extensions for Embedded Processors)">; def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">, AssemblerPredicate< - (any_of FeatureStdExtZve64x, FeatureStdExtV), + (any_of FeatureStdExtZve64x), "'V' (Vector Extension for Application Processors) or 'Zve64x' " "(Vector Extensions for Embedded Processors)">; def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">, AssemblerPredicate< - (any_of FeatureStdExtZve32f, FeatureStdExtV), + (any_of FeatureStdExtZve32f), "'V' (Vector Extension for Application Processors), 'Zve32f', " "'Zve64f' or 'Zve64d' (Vector Extensions for Embedded Processors)">; +def FeatureStdExtZvfh + : SubtargetFeature<"experimental-zvfh", "HasStdExtZvfh", "true", + "'Zvfh' (Vector Half-Precision Floating-Point)", + [FeatureStdExtZve32f]>; + +def FeatureStdExtZicbom + : SubtargetFeature<"zicbom", "HasStdExtZicbom", "true", + "'Zicbom' (Cache-Block Management Instructions)">; +def HasStdExtZicbom : Predicate<"Subtarget->hasStdExtZicbom()">, + AssemblerPredicate<(all_of FeatureStdExtZicbom), + "'Zicbom' (Cache-Block Management Instructions)">; + +def FeatureStdExtZicboz + : SubtargetFeature<"zicboz", "HasStdExtZicboz", "true", + "'Zicboz' (Cache-Block Zero Instructions)">; +def HasStdExtZicboz : Predicate<"Subtarget->hasStdExtZicboz()">, + AssemblerPredicate<(all_of FeatureStdExtZicboz), + "'Zicboz' (Cache-Block Zero Instructions)">; + +def FeatureStdExtZicbop + : SubtargetFeature<"zicbop", "HasStdExtZicbop", "true", + "'Zicbop' (Cache-Block Prefetch Instructions)">; +def HasStdExtZicbop : Predicate<"Subtarget->hasStdExtZicbop()">, + AssemblerPredicate<(all_of FeatureStdExtZicbop), + "'Zicbop' (Cache-Block Prefetch Instructions)">; + def Feature64Bit : SubtargetFeature<"64bit", "HasRV64", "true", "Implements RV64">; def IsRV64 : Predicate<"Subtarget->is64Bit()">, @@ -381,6 +451,19 @@ foreach i = {1-31} in def FeatureSaveRestore : SubtargetFeature<"save-restore", "EnableSaveRestore", "true", "Enable save/restore.">; +def FeatureUnalignedScalarMem + : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem", + "true", "Has reasonably performant unaligned scalar " + "loads and stores">; + +def TuneLUIADDIFusion + : SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion", + "true", "Enable LUI+ADDI macrofusion">; + +def TuneNoDefaultUnroll + : SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false", + "Disable default unroll preference.">; + def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7", "SiFive 7-Series processors">; @@ -408,14 +491,17 @@ include "RISCVSchedSiFive7.td" def : ProcessorModel<"generic-rv32", NoSchedModel, []>; def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit]>; +// Support generic for compatibility with other targets. The triple will be used +// to change to the appropriate rv32/rv64 version. +def : ProcessorModel<"generic", NoSchedModel, []>; def : ProcessorModel<"rocket-rv32", RocketModel, []>; def : ProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit]>; def : ProcessorModel<"sifive-7-rv32", SiFive7Model, [], - [TuneSiFive7]>; + [TuneSiFive7, TuneNoDefaultUnroll]>; def : ProcessorModel<"sifive-7-rv64", SiFive7Model, [Feature64Bit], - [TuneSiFive7]>; + [TuneSiFive7, TuneNoDefaultUnroll]>; def : ProcessorModel<"sifive-e20", RocketModel, [FeatureStdExtM, FeatureStdExtC]>; @@ -442,7 +528,7 @@ def : ProcessorModel<"sifive-e76", SiFive7Model, [FeatureStdExtM, FeatureStdExtA, FeatureStdExtF, FeatureStdExtC], - [TuneSiFive7]>; + [TuneSiFive7, TuneNoDefaultUnroll]>; def : ProcessorModel<"sifive-s21", RocketModel, [Feature64Bit, FeatureStdExtM, @@ -467,7 +553,7 @@ def : ProcessorModel<"sifive-s76", SiFive7Model, [Feature64Bit, FeatureStdExtF, FeatureStdExtD, FeatureStdExtC], - [TuneSiFive7]>; + [TuneSiFive7, TuneNoDefaultUnroll]>; def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit, FeatureStdExtM, @@ -482,7 +568,7 @@ def : ProcessorModel<"sifive-u74", SiFive7Model, [Feature64Bit, FeatureStdExtF, FeatureStdExtD, FeatureStdExtC], - [TuneSiFive7]>; + [TuneSiFive7, TuneNoDefaultUnroll]>; //===----------------------------------------------------------------------===// // Define the RISC-V target. diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 9fed6e7baadc..5b2a247ebda0 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -38,12 +38,13 @@ STATISTIC(RISCVNumInstrsCompressed, namespace { class RISCVAsmPrinter : public AsmPrinter { - const MCSubtargetInfo *STI; + const MCSubtargetInfo *MCSTI; + const RISCVSubtarget *STI; public: explicit RISCVAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) - : AsmPrinter(TM, std::move(Streamer)), STI(TM.getMCSubtargetInfo()) {} + : AsmPrinter(TM, std::move(Streamer)), MCSTI(TM.getMCSubtargetInfo()) {} StringRef getPassName() const override { return "RISCV Assembly Printer"; } @@ -62,12 +63,14 @@ public: // Wrapper needed for tblgenned pseudo lowering. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const { - return LowerRISCVMachineOperandToMCOperand(MO, MCOp, *this); + return lowerRISCVMachineOperandToMCOperand(MO, MCOp, *this); } void emitStartOfAsmFile(Module &M) override; void emitEndOfAsmFile(Module &M) override; + void emitFunctionEntryLabel() override; + private: void emitAttributes(); }; @@ -170,7 +173,8 @@ bool RISCVAsmPrinter::runOnMachineFunction(MachineFunction &MF) { MCSubtargetInfo &NewSTI = OutStreamer->getContext().getSubtargetCopy(*TM.getMCSubtargetInfo()); NewSTI.setFeatureBits(MF.getSubtarget().getFeatureBits()); - STI = &NewSTI; + MCSTI = &NewSTI; + STI = &MF.getSubtarget(); SetupMachineFunction(MF); emitFunctionBody(); @@ -193,7 +197,14 @@ void RISCVAsmPrinter::emitEndOfAsmFile(Module &M) { void RISCVAsmPrinter::emitAttributes() { RISCVTargetStreamer &RTS = static_cast(*OutStreamer->getTargetStreamer()); - RTS.emitTargetAttributes(*STI); + RTS.emitTargetAttributes(*MCSTI); +} + +void RISCVAsmPrinter::emitFunctionEntryLabel() { + AsmPrinter::emitFunctionEntryLabel(); + RISCVTargetStreamer &RTS = + static_cast(*OutStreamer->getTargetStreamer()); + RTS.setTargetABI(STI->getTargetABI()); } // Force static initialization. diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 0c5c13db7112..e4e01d9f6f2f 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -290,7 +290,7 @@ bool RISCVExpandPseudo::expandVSPILL(MachineBasicBlock &MBB, Register SrcReg = MBBI->getOperand(0).getReg(); Register Base = MBBI->getOperand(1).getReg(); Register VL = MBBI->getOperand(2).getReg(); - auto ZvlssegInfo = TII->isRVVSpillForZvlsseg(MBBI->getOpcode()); + auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(MBBI->getOpcode()); if (!ZvlssegInfo) return false; unsigned NF = ZvlssegInfo->first; @@ -314,10 +314,15 @@ bool RISCVExpandPseudo::expandVSPILL(MachineBasicBlock &MBB, assert(LMUL == 1 && "LMUL must be 1, 2, or 4."); for (unsigned I = 0; I < NF; ++I) { + // Adding implicit-use of super register to describe we are using part of + // super register, that prevents machine verifier complaining when part of + // subreg is undef, see comment in MachineVerifier::checkLiveness for more + // detail. BuildMI(MBB, MBBI, DL, TII->get(Opcode)) .addReg(TRI->getSubReg(SrcReg, SubRegIdx + I)) .addReg(Base) - .addMemOperand(*(MBBI->memoperands_begin())); + .addMemOperand(*(MBBI->memoperands_begin())) + .addReg(SrcReg, RegState::Implicit); if (I != NF - 1) BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADD), Base) .addReg(Base) @@ -335,7 +340,7 @@ bool RISCVExpandPseudo::expandVRELOAD(MachineBasicBlock &MBB, Register DestReg = MBBI->getOperand(0).getReg(); Register Base = MBBI->getOperand(1).getReg(); Register VL = MBBI->getOperand(2).getReg(); - auto ZvlssegInfo = TII->isRVVSpillForZvlsseg(MBBI->getOpcode()); + auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(MBBI->getOpcode()); if (!ZvlssegInfo) return false; unsigned NF = ZvlssegInfo->first; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index ad003404d793..57d8ba6f0161 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -21,6 +21,8 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/MC/MCDwarf.h" +#include + using namespace llvm; // For now we use x18, a.k.a s2, as pointer to shadow call stack. @@ -250,6 +252,7 @@ bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const { // Determines the size of the frame and maximum call frame size. void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *RVFI = MF.getInfo(); // Get the number of bytes to allocate from the FrameInfo. uint64_t FrameSize = MFI.getStackSize(); @@ -262,6 +265,28 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { // Update frame info. MFI.setStackSize(FrameSize); + + // When using SP or BP to access stack objects, we may require extra padding + // to ensure the bottom of the RVV stack is correctly aligned within the main + // stack. We calculate this as the amount required to align the scalar local + // variable section up to the RVV alignment. + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + if (RVFI->getRVVStackSize() && (!hasFP(MF) || TRI->hasStackRealignment(MF))) { + int ScalarLocalVarSize = FrameSize - RVFI->getCalleeSavedStackSize() - + RVFI->getVarArgsSaveSize(); + if (auto RVVPadding = + offsetToAlignment(ScalarLocalVarSize, RVFI->getRVVStackAlign())) + RVFI->setRVVPadding(RVVPadding); + } +} + +// Returns the stack size including RVV padding (when required), rounded back +// up to the required stack alignment. +uint64_t RISCVFrameLowering::getStackSizeWithRVVPadding( + const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *RVFI = MF.getInfo(); + return alignTo(MFI.getStackSize() + RVFI->getRVVPadding(), getStackAlign()); } void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB, @@ -280,21 +305,43 @@ void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB, .addReg(SrcReg) .addImm(Val) .setMIFlag(Flag); - } else { - unsigned Opc = RISCV::ADD; - bool isSub = Val < 0; - if (isSub) { - Val = -Val; - Opc = RISCV::SUB; - } + return; + } - Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - TII->movImm(MBB, MBBI, DL, ScratchReg, Val, Flag); - BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) + // Try to split the offset across two ADDIs. We need to keep the stack pointer + // aligned after each ADDI. We need to determine the maximum value we can put + // in each ADDI. In the negative direction, we can use -2048 which is always + // sufficiently aligned. In the positive direction, we need to find the + // largest 12-bit immediate that is aligned. Exclude -4096 since it can be + // created with LUI. + assert(getStackAlign().value() < 2048 && "Stack alignment too large"); + int64_t MaxPosAdjStep = 2048 - getStackAlign().value(); + if (Val > -4096 && Val <= (2 * MaxPosAdjStep)) { + int64_t FirstAdj = Val < 0 ? -2048 : MaxPosAdjStep; + Val -= FirstAdj; + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), DestReg) .addReg(SrcReg) - .addReg(ScratchReg, RegState::Kill) + .addImm(FirstAdj) + .setMIFlag(Flag); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), DestReg) + .addReg(DestReg, RegState::Kill) + .addImm(Val) .setMIFlag(Flag); + return; + } + + unsigned Opc = RISCV::ADD; + if (Val < 0) { + Val = -Val; + Opc = RISCV::SUB; } + + Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + TII->movImm(MBB, MBBI, DL, ScratchReg, Val, Flag); + BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) + .addReg(SrcReg) + .addReg(ScratchReg, RegState::Kill) + .setMIFlag(Flag); } // Returns the register used to hold the frame pointer. @@ -401,7 +448,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // FIXME (note copied from Lanai): This appears to be overallocating. Needs // investigation. Get the number of bytes to allocate from the FrameInfo. - uint64_t StackSize = MFI.getStackSize() + RVFI->getRVVPadding(); + uint64_t StackSize = getStackSizeWithRVVPadding(MF); uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize(); uint64_t RVVStackSize = RVFI->getRVVStackSize(); @@ -482,7 +529,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // Emit the second SP adjustment after saving callee saved registers. if (FirstSPAdjustAmount) { - uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + uint64_t SecondSPAdjustAmount = + getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount; assert(SecondSPAdjustAmount > 0 && "SecondSPAdjustAmount should be greater than zero"); adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount, @@ -492,8 +540,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // don't emit an sp-based .cfi_def_cfa_offset if (!hasFP(MF)) { // Emit ".cfi_def_cfa_offset StackSize" - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset( + nullptr, getStackSizeWithRVVPadding(MF))); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); @@ -561,15 +609,11 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock::iterator MBBI = MBB.end(); DebugLoc DL; if (!MBB.empty()) { - MBBI = MBB.getFirstTerminator(); - if (MBBI == MBB.end()) - MBBI = MBB.getLastNonDebugInstr(); - DL = MBBI->getDebugLoc(); + MBBI = MBB.getLastNonDebugInstr(); + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); - // If this is not a terminator, the actual insert location should be after the - // last instruction. - if (!MBBI->isTerminator()) - MBBI = std::next(MBBI); + MBBI = MBB.getFirstTerminator(); // If callee-saved registers are saved via libcall, place stack adjustment // before this call. @@ -587,7 +631,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, if (!CSI.empty()) LastFrameDestroy = std::prev(MBBI, CSI.size()); - uint64_t StackSize = MFI.getStackSize() + RVFI->getRVVPadding(); + uint64_t StackSize = getStackSizeWithRVVPadding(MF); uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize(); uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize(); uint64_t RVVStackSize = RVFI->getRVVStackSize(); @@ -595,7 +639,15 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, // Restore the stack pointer using the value of the frame pointer. Only // necessary if the stack pointer was modified, meaning the stack size is // unknown. - if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects()) { + // + // In order to make sure the stack point is right through the EH region, + // we also need to restore stack pointer from the frame pointer if we + // don't preserve stack space within prologue/epilogue for outgoing variables, + // normally it's just checking the variable sized object is present or not + // is enough, but we also don't preserve that at prologue/epilogue when + // have vector objects in stack. + if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() || + !hasReservedCallFrame(MF)) { assert(hasFP(MF) && "frame pointer should not have been eliminated"); adjustReg(MBB, LastFrameDestroy, DL, SPReg, FPReg, -FPOffset, MachineInstr::FrameDestroy); @@ -607,7 +659,8 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); if (FirstSPAdjustAmount) { - uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + uint64_t SecondSPAdjustAmount = + getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount; assert(SecondSPAdjustAmount > 0 && "SecondSPAdjustAmount should be greater than zero"); @@ -665,134 +718,138 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, if (FirstSPAdjustAmount) Offset += StackOffset::getFixed(FirstSPAdjustAmount); else - Offset += - StackOffset::getFixed(MFI.getStackSize() + RVFI->getRVVPadding()); - } else if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { + Offset += StackOffset::getFixed(getStackSizeWithRVVPadding(MF)); + return Offset; + } + + if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { // If the stack was realigned, the frame pointer is set in order to allow // SP to be restored, so we need another base register to record the stack // after realignment. + // |--------------------------| -- <-- FP + // | callee-allocated save | | <----| + // | area for register varargs| | | + // |--------------------------| | | + // | callee-saved registers | | | + // |--------------------------| -- | + // | realignment (the size of | | | + // | this area is not counted | | | + // | in MFI.getStackSize()) | | | + // |--------------------------| -- |-- MFI.getStackSize() + // | RVV alignment padding | | | + // | (not counted in | | | + // | MFI.getStackSize() but | | | + // | counted in | | | + // | RVFI.getRVVStackSize()) | | | + // |--------------------------| -- | + // | RVV objects | | | + // | (not counted in | | | + // | MFI.getStackSize()) | | | + // |--------------------------| -- | + // | padding before RVV | | | + // | (not counted in | | | + // | MFI.getStackSize() or in | | | + // | RVFI.getRVVStackSize()) | | | + // |--------------------------| -- | + // | scalar local variables | | <----' + // |--------------------------| -- <-- BP (if var sized objects present) + // | VarSize objects | | + // |--------------------------| -- <-- SP if (hasBP(MF)) { FrameReg = RISCVABI::getBPReg(); - // |--------------------------| -- <-- FP - // | callee-saved registers | | <----. - // |--------------------------| -- | - // | realignment (the size of | | | - // | this area is not counted | | | - // | in MFI.getStackSize()) | | | - // |--------------------------| -- | - // | Padding after RVV | | | - // | (not counted in | | | - // | MFI.getStackSize()) | | | - // |--------------------------| -- |-- MFI.getStackSize() - // | RVV objects | | | - // | (not counted in | | | - // | MFI.getStackSize()) | | | - // |--------------------------| -- | - // | Padding before RVV | | | - // | (not counted in | | | - // | MFI.getStackSize()) | | | - // |--------------------------| -- | - // | scalar local variables | | <----' - // |--------------------------| -- <-- BP - // | VarSize objects | | - // |--------------------------| -- <-- SP } else { + // VarSize objects must be empty in this case! + assert(!MFI.hasVarSizedObjects()); FrameReg = RISCV::X2; - // |--------------------------| -- <-- FP - // | callee-saved registers | | <----. - // |--------------------------| -- | - // | realignment (the size of | | | - // | this area is not counted | | | - // | in MFI.getStackSize()) | | | - // |--------------------------| -- | - // | Padding after RVV | | | - // | (not counted in | | | - // | MFI.getStackSize()) | | | - // |--------------------------| -- |-- MFI.getStackSize() - // | RVV objects | | | - // | (not counted in | | | - // | MFI.getStackSize()) | | | - // |--------------------------| -- | - // | Padding before RVV | | | - // | (not counted in | | | - // | MFI.getStackSize()) | | | - // |--------------------------| -- | - // | scalar local variables | | <----' - // |--------------------------| -- <-- SP - } - // The total amount of padding surrounding RVV objects is described by - // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV - // objects to 8 bytes. - if (MFI.getStackID(FI) == TargetStackID::Default) { - Offset += StackOffset::getFixed(MFI.getStackSize()); - if (FI < 0) - Offset += StackOffset::getFixed(RVFI->getLibCallStackSize()); - } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { - Offset += StackOffset::get( - alignTo(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), 8), - RVFI->getRVVStackSize()); } } else { FrameReg = RI->getFrameRegister(MF); - if (hasFP(MF)) { - Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize()); - if (FI >= 0) - Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize()); - // When using FP to access scalable vector objects, we need to minus - // the frame size. - // - // |--------------------------| -- <-- FP - // | callee-saved registers | | - // |--------------------------| | MFI.getStackSize() - // | scalar local variables | | - // |--------------------------| -- (Offset of RVV objects is from here.) - // | RVV objects | - // |--------------------------| - // | VarSize objects | - // |--------------------------| <-- SP - if (MFI.getStackID(FI) == TargetStackID::ScalableVector) - Offset -= StackOffset::getFixed(MFI.getStackSize()); - } else { - // When using SP to access frame objects, we need to add RVV stack size. - // - // |--------------------------| -- <-- FP - // | callee-saved registers | | <----. - // |--------------------------| -- | - // | Padding after RVV | | | - // | (not counted in | | | - // | MFI.getStackSize()) | | | - // |--------------------------| -- | - // | RVV objects | | |-- MFI.getStackSize() - // | (not counted in | | | - // | MFI.getStackSize()) | | | - // |--------------------------| -- | - // | Padding before RVV | | | - // | (not counted in | | | - // | MFI.getStackSize()) | | | - // |--------------------------| -- | - // | scalar local variables | | <----' - // |--------------------------| -- <-- SP - // - // The total amount of padding surrounding RVV objects is described by - // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV - // objects to 8 bytes. - if (MFI.getStackID(FI) == TargetStackID::Default) { - if (MFI.isFixedObjectIndex(FI)) { - Offset += - StackOffset::get(MFI.getStackSize() + RVFI->getRVVPadding() + - RVFI->getLibCallStackSize(), - RVFI->getRVVStackSize()); - } else { - Offset += StackOffset::getFixed(MFI.getStackSize()); - } - } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { - Offset += StackOffset::get( - alignTo(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), 8), - RVFI->getRVVStackSize()); - } + } + + if (FrameReg == getFPReg(STI)) { + Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize()); + if (FI >= 0) + Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize()); + // When using FP to access scalable vector objects, we need to minus + // the frame size. + // + // |--------------------------| -- <-- FP + // | callee-allocated save | | + // | area for register varargs| | + // |--------------------------| | + // | callee-saved registers | | + // |--------------------------| | MFI.getStackSize() + // | scalar local variables | | + // |--------------------------| -- (Offset of RVV objects is from here.) + // | RVV objects | + // |--------------------------| + // | VarSize objects | + // |--------------------------| <-- SP + if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { + assert(!RI->hasStackRealignment(MF) && + "Can't index across variable sized realign"); + // We don't expect any extra RVV alignment padding, as the stack size + // and RVV object sections should be correct aligned in their own + // right. + assert(MFI.getStackSize() == getStackSizeWithRVVPadding(MF) && + "Inconsistent stack layout"); + Offset -= StackOffset::getFixed(MFI.getStackSize()); } + return Offset; } + // This case handles indexing off both SP and BP. + // If indexing off SP, there must not be any var sized objects + assert(FrameReg == RISCVABI::getBPReg() || !MFI.hasVarSizedObjects()); + + // When using SP to access frame objects, we need to add RVV stack size. + // + // |--------------------------| -- <-- FP + // | callee-allocated save | | <----| + // | area for register varargs| | | + // |--------------------------| | | + // | callee-saved registers | | | + // |--------------------------| -- | + // | RVV alignment padding | | | + // | (not counted in | | | + // | MFI.getStackSize() but | | | + // | counted in | | | + // | RVFI.getRVVStackSize()) | | | + // |--------------------------| -- | + // | RVV objects | | |-- MFI.getStackSize() + // | (not counted in | | | + // | MFI.getStackSize()) | | | + // |--------------------------| -- | + // | padding before RVV | | | + // | (not counted in | | | + // | MFI.getStackSize()) | | | + // |--------------------------| -- | + // | scalar local variables | | <----' + // |--------------------------| -- <-- BP (if var sized objects present) + // | VarSize objects | | + // |--------------------------| -- <-- SP + // + // The total amount of padding surrounding RVV objects is described by + // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV + // objects to the required alignment. + if (MFI.getStackID(FI) == TargetStackID::Default) { + if (MFI.isFixedObjectIndex(FI)) { + assert(!RI->hasStackRealignment(MF) && + "Can't index across variable sized realign"); + Offset += StackOffset::get(getStackSizeWithRVVPadding(MF) + + RVFI->getLibCallStackSize(), + RVFI->getRVVStackSize()); + } else { + Offset += StackOffset::getFixed(MFI.getStackSize()); + } + } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { + // Ensure the base of the RVV stack is correctly aligned: add on the + // alignment padding. + int ScalarLocalVarSize = + MFI.getStackSize() - RVFI->getCalleeSavedStackSize() - + RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding(); + Offset += StackOffset::get(ScalarLocalVarSize, RVFI->getRVVStackSize()); + } return Offset; } @@ -841,9 +898,8 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, } } -int64_t +std::pair RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const { - int64_t Offset = 0; // Create a buffer of RVV objects to allocate. SmallVector ObjectsToAllocate; for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { @@ -857,29 +913,78 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const { } // Allocate all RVV locals and spills + int64_t Offset = 0; + // The minimum alignment is 16 bytes. + Align RVVStackAlign(16); for (int FI : ObjectsToAllocate) { // ObjectSize in bytes. int64_t ObjectSize = MFI.getObjectSize(FI); + auto ObjectAlign = std::max(Align(8), MFI.getObjectAlign(FI)); // If the data type is the fractional vector type, reserve one vector // register for it. if (ObjectSize < 8) ObjectSize = 8; - // Currently, all scalable vector types are aligned to 8 bytes. - Offset = alignTo(Offset + ObjectSize, 8); + Offset = alignTo(Offset + ObjectSize, ObjectAlign); MFI.setObjectOffset(FI, -Offset); + // Update the maximum alignment of the RVV stack section + RVVStackAlign = std::max(RVVStackAlign, ObjectAlign); } - return Offset; + // Ensure the alignment of the RVV stack. Since we want the most-aligned + // object right at the bottom (i.e., any padding at the top of the frame), + // readjust all RVV objects down by the alignment padding. + uint64_t StackSize = Offset; + if (auto AlignmentPadding = offsetToAlignment(StackSize, RVVStackAlign)) { + StackSize += AlignmentPadding; + for (int FI : ObjectsToAllocate) + MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding); + } + + return std::make_pair(StackSize, RVVStackAlign); } -static bool hasRVVSpillWithFIs(MachineFunction &MF, const RISCVInstrInfo &TII) { +static unsigned getScavSlotsNumForRVV(MachineFunction &MF) { + // For RVV spill, scalable stack offsets computing requires up to two scratch + // registers + static constexpr unsigned ScavSlotsNumRVVSpillScalableObject = 2; + + // For RVV spill, non-scalable stack offsets computing requires up to one + // scratch register. + static constexpr unsigned ScavSlotsNumRVVSpillNonScalableObject = 1; + + // ADDI instruction's destination register can be used for computing + // offsets. So Scalable stack offsets require up to one scratch register. + static constexpr unsigned ScavSlotsADDIScalableObject = 1; + + static constexpr unsigned MaxScavSlotsNumKnown = + std::max({ScavSlotsADDIScalableObject, ScavSlotsNumRVVSpillScalableObject, + ScavSlotsNumRVVSpillNonScalableObject}); + + unsigned MaxScavSlotsNum = 0; if (!MF.getSubtarget().hasVInstructions()) return false; - return any_of(MF, [&TII](const MachineBasicBlock &MBB) { - return any_of(MBB, [&TII](const MachineInstr &MI) { - return TII.isRVVSpill(MI, /*CheckFIs*/ true); - }); - }); + for (const MachineBasicBlock &MBB : MF) + for (const MachineInstr &MI : MBB) { + bool IsRVVSpill = RISCV::isRVVSpill(MI); + for (auto &MO : MI.operands()) { + if (!MO.isFI()) + continue; + bool IsScalableVectorID = MF.getFrameInfo().getStackID(MO.getIndex()) == + TargetStackID::ScalableVector; + if (IsRVVSpill) { + MaxScavSlotsNum = std::max( + MaxScavSlotsNum, IsScalableVectorID + ? ScavSlotsNumRVVSpillScalableObject + : ScavSlotsNumRVVSpillNonScalableObject); + } else if (MI.getOpcode() == RISCV::ADDI && IsScalableVectorID) { + MaxScavSlotsNum = + std::max(MaxScavSlotsNum, ScavSlotsADDIScalableObject); + } + } + if (MaxScavSlotsNum == MaxScavSlotsNumKnown) + return MaxScavSlotsNumKnown; + } + return MaxScavSlotsNum; } void RISCVFrameLowering::processFunctionBeforeFrameFinalized( @@ -890,9 +995,17 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized( const TargetRegisterClass *RC = &RISCV::GPRRegClass; auto *RVFI = MF.getInfo(); - int64_t RVVStackSize = assignRVVStackObjectOffsets(MFI); + int64_t RVVStackSize; + Align RVVStackAlign; + std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MFI); + RVFI->setRVVStackSize(RVVStackSize); - const RISCVInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + RVFI->setRVVStackAlign(RVVStackAlign); + + // Ensure the entire stack is aligned to at least the RVV requirement: some + // scalable-vector object alignments are not considered by the + // target-independent code. + MFI.ensureMaxAlignment(RVVStackAlign); // estimateStackSize has been observed to under-estimate the final stack // size, so give ourselves wiggle-room by checking for stack size @@ -903,17 +1016,14 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized( // RVV loads & stores have no capacity to hold the immediate address offsets // so we must always reserve an emergency spill slot if the MachineFunction // contains any RVV spills. - if (!isInt<11>(MFI.estimateStackSize(MF)) || hasRVVSpillWithFIs(MF, TII)) { - int RegScavFI = MFI.CreateStackObject(RegInfo->getSpillSize(*RC), - RegInfo->getSpillAlign(*RC), false); - RS->addScavengingFrameIndex(RegScavFI); - // For RVV, scalable stack offsets require up to two scratch registers to - // compute the final offset. Reserve an additional emergency spill slot. - if (RVVStackSize != 0) { - int RVVRegScavFI = MFI.CreateStackObject( - RegInfo->getSpillSize(*RC), RegInfo->getSpillAlign(*RC), false); - RS->addScavengingFrameIndex(RVVRegScavFI); - } + unsigned ScavSlotsNum = 0; + if (!isInt<11>(MFI.estimateStackSize(MF))) + ScavSlotsNum = 1; + + ScavSlotsNum = std::max(ScavSlotsNum, getScavSlotsNumForRVV(MF)); + for (unsigned i = 0; i < ScavSlotsNum; i++) { + RS->addScavengingFrameIndex(MFI.CreateStackObject( + RegInfo->getSpillSize(*RC), RegInfo->getSpillAlign(*RC), false)); } if (MFI.getCalleeSavedInfo().empty() || RVFI->useSaveRestoreLibCalls(MF)) { @@ -930,14 +1040,6 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized( Size += MFI.getObjectSize(FrameIdx); } RVFI->setCalleeSavedStackSize(Size); - - // Padding required to keep the RVV stack aligned to 8 bytes - // within the main stack. We only need this when not using FP. - if (RVVStackSize && !hasFP(MF) && Size % 8 != 0) { - // Because we add the padding to the size of the stack, adding - // getStackAlign() will keep it aligned. - RVFI->setRVVPadding(getStackAlign().value()); - } } static bool hasRVVFrameObject(const MachineFunction &MF) { @@ -1012,23 +1114,23 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const { const auto *RVFI = MF.getInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); const std::vector &CSI = MFI.getCalleeSavedInfo(); - uint64_t StackSize = MFI.getStackSize(); + uint64_t StackSize = getStackSizeWithRVVPadding(MF); - // Disable SplitSPAdjust if save-restore libcall used. The callee saved + // Disable SplitSPAdjust if save-restore libcall is used. The callee-saved // registers will be pushed by the save-restore libcalls, so we don't have to // split the SP adjustment in this case. if (RVFI->getLibCallStackSize()) return 0; - // Return the FirstSPAdjustAmount if the StackSize can not fit in signed - // 12-bit and there exists a callee saved register need to be pushed. + // Return the FirstSPAdjustAmount if the StackSize can not fit in a signed + // 12-bit and there exists a callee-saved register needing to be pushed. if (!isInt<12>(StackSize) && (CSI.size() > 0)) { - // FirstSPAdjustAmount is choosed as (2048 - StackAlign) - // because 2048 will cause sp = sp + 2048 in epilogue split into - // multi-instructions. The offset smaller than 2048 can fit in signle - // load/store instruction and we have to stick with the stack alignment. - // 2048 is 16-byte alignment. The stack alignment for RV32 and RV64 is 16, - // for RV32E is 4. So (2048 - StackAlign) will satisfy the stack alignment. + // FirstSPAdjustAmount is chosen as (2048 - StackAlign) because 2048 will + // cause sp = sp + 2048 in the epilogue to be split into multiple + // instructions. Offsets smaller than 2048 can fit in a single load/store + // instruction, and we have to stick with the stack alignment. 2048 has + // 16-byte alignment. The stack alignment for RV32 and RV64 is 16 and for + // RV32E it is 4. So (2048 - StackAlign) will satisfy the stack alignment. return 2048 - getStackAlign().value(); } return 0; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h index 1e94e34acf2f..466cd059b749 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -30,6 +30,8 @@ public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + uint64_t getStackSizeWithRVVPadding(const MachineFunction &MF) const; + StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override; @@ -81,7 +83,8 @@ private: void adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount, MachineInstr::MIFlag Flag) const; - int64_t assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const; + std::pair + assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const; }; } #endif diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp index ba91b16661a4..2410cc1f8859 100644 --- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp @@ -37,6 +37,11 @@ class RISCVGatherScatterLowering : public FunctionPass { SmallVector MaybeDeadPHIs; + // Cache of the BasePtr and Stride determined from this GEP. When a GEP is + // used by multiple gathers/scatters, this allow us to reuse the scalar + // instructions we created for the first gather/scatter for the others. + DenseMap> StridedAddrs; + public: static char ID; // Pass identification, replacement for typeid @@ -323,15 +328,19 @@ std::pair RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP, IRBuilder<> &Builder) { + auto I = StridedAddrs.find(GEP); + if (I != StridedAddrs.end()) + return I->second; + SmallVector Ops(GEP->operands()); // Base pointer needs to be a scalar. if (Ops[0]->getType()->isVectorTy()) return std::make_pair(nullptr, nullptr); - // Make sure we're in a loop and it is in loop simplify form. + // Make sure we're in a loop and that has a pre-header and a single latch. Loop *L = LI->getLoopFor(GEP->getParent()); - if (!L || !L->isLoopSimplifyForm()) + if (!L || !L->getLoopPreheader() || !L->getLoopLatch()) return std::make_pair(nullptr, nullptr); Optional VecOperand; @@ -387,13 +396,6 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP, Value *BasePtr = Builder.CreateGEP(SourceTy, Ops[0], makeArrayRef(Ops).drop_front()); - // Cast the GEP to an i8*. - LLVMContext &Ctx = GEP->getContext(); - Type *I8PtrTy = - Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace()); - if (BasePtr->getType() != I8PtrTy) - BasePtr = Builder.CreatePointerCast(BasePtr, I8PtrTy); - // Final adjustments to stride should go in the start block. Builder.SetInsertPoint( BasePhi->getIncomingBlock(1 - IncrementingBlock)->getTerminator()); @@ -406,7 +408,9 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP, if (TypeScale != 1) Stride = Builder.CreateMul(Stride, ConstantInt::get(IntPtrTy, TypeScale)); - return std::make_pair(BasePtr, Stride); + auto P = std::make_pair(BasePtr, Stride); + StridedAddrs[GEP] = P; + return P; } bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II, @@ -468,6 +472,8 @@ bool RISCVGatherScatterLowering::runOnFunction(Function &F) { DL = &F.getParent()->getDataLayout(); LI = &getAnalysis().getLoopInfo(); + StridedAddrs.clear(); + SmallVector Gathers; SmallVector Scatters; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 6f77428ae721..cfaafc7b53d2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -37,6 +37,7 @@ namespace RISCV { #define GET_RISCVVSETable_IMPL #define GET_RISCVVLXTable_IMPL #define GET_RISCVVSXTable_IMPL +#define GET_RISCVMaskedPseudosTable_IMPL #include "RISCVGenSearchableTables.inc" } // namespace RISCV } // namespace llvm @@ -47,17 +48,36 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() { I != E;) { SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. + // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point + // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. + if (N->getOpcode() == ISD::SPLAT_VECTOR) { + MVT VT = N->getSimpleValueType(0); + unsigned Opc = + VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; + SDLoc DL(N); + SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); + SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), + N->getOperand(0), VL); + + --I; + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + ++I; + CurDAG->DeleteNode(N); + continue; + } + // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector // load. Done after lowering and combining so that we have a chance to // optimize this to VMV_V_X_VL when the upper bits aren't needed. if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) continue; - assert(N->getNumOperands() == 3 && "Unexpected number of operands"); + assert(N->getNumOperands() == 4 && "Unexpected number of operands"); MVT VT = N->getSimpleValueType(0); - SDValue Lo = N->getOperand(0); - SDValue Hi = N->getOperand(1); - SDValue VL = N->getOperand(2); + SDValue Passthru = N->getOperand(0); + SDValue Lo = N->getOperand(1); + SDValue Hi = N->getOperand(2); + SDValue VL = N->getOperand(3); assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && "Unexpected VTs!"); @@ -88,7 +108,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() { CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); SDValue Ops[] = {Chain, IntID, - CurDAG->getUNDEF(VT), + Passthru, StackSlot, CurDAG->getRegister(RISCV::X0, MVT::i64), VL}; @@ -112,6 +132,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() { } void RISCVDAGToDAGISel::PostprocessISelDAG() { + HandleSDNode Dummy(CurDAG->getRoot()); SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); bool MadeChange = false; @@ -123,57 +144,70 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() { MadeChange |= doPeepholeSExtW(N); MadeChange |= doPeepholeLoadStoreADDI(N); + MadeChange |= doPeepholeMaskedRVV(N); } + CurDAG->setRoot(Dummy.getValue()); + if (MadeChange) CurDAG->RemoveDeadNodes(); } -static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL, - const MVT VT, int64_t Imm, - const RISCVSubtarget &Subtarget) { - assert(VT == MVT::i64 && "Expecting MVT::i64"); - const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); - ConstantPoolSDNode *CP = cast(CurDAG->getConstantPool( - ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT)); - SDValue Addr = TLI->getAddr(CP, *CurDAG); - SDValue Offset = CurDAG->getTargetConstant(0, DL, VT); - // Since there is no data race, the chain can be the entry node. - SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset, - CurDAG->getEntryNode()); - MachineFunction &MF = CurDAG->getMachineFunction(); - MachineMemOperand *MemOp = MF.getMachineMemOperand( - MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, - LLT(VT), CP->getAlign()); - CurDAG->setNodeMemRefs(cast(Load), {MemOp}); - return Load; -} - -static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, - int64_t Imm, const RISCVSubtarget &Subtarget) { - MVT XLenVT = Subtarget.getXLenVT(); - RISCVMatInt::InstSeq Seq = - RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); +// Returns true if N is a MachineSDNode that has a reg and simm12 memory +// operand. The indices of the base pointer and offset are returned in BaseOpIdx +// and OffsetOpIdx. +static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx, + unsigned &OffsetOpIdx) { + switch (N->getMachineOpcode()) { + case RISCV::LB: + case RISCV::LH: + case RISCV::LW: + case RISCV::LBU: + case RISCV::LHU: + case RISCV::LWU: + case RISCV::LD: + case RISCV::FLH: + case RISCV::FLW: + case RISCV::FLD: + BaseOpIdx = 0; + OffsetOpIdx = 1; + return true; + case RISCV::SB: + case RISCV::SH: + case RISCV::SW: + case RISCV::SD: + case RISCV::FSH: + case RISCV::FSW: + case RISCV::FSD: + BaseOpIdx = 1; + OffsetOpIdx = 2; + return true; + } - // If Imm is expensive to build, then we put it into constant pool. - if (Subtarget.useConstantPoolForLargeInts() && - Seq.size() > Subtarget.getMaxBuildIntsCost()) - return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget); + return false; +} +static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, + RISCVMatInt::InstSeq &Seq) { SDNode *Result = nullptr; - SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); + SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT); for (RISCVMatInt::Inst &Inst : Seq) { - SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); - if (Inst.Opc == RISCV::LUI) - Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm); - else if (Inst.Opc == RISCV::ADD_UW) - Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg, - CurDAG->getRegister(RISCV::X0, XLenVT)); - else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD || - Inst.Opc == RISCV::SH3ADD) - Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg); - else - Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm); + SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT); + switch (Inst.getOpndKind()) { + case RISCVMatInt::Imm: + Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm); + break; + case RISCVMatInt::RegX0: + Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, + CurDAG->getRegister(RISCV::X0, VT)); + break; + case RISCVMatInt::RegReg: + Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SrcReg); + break; + case RISCVMatInt::RegImm: + Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm); + break; + } // Only the first instruction has X0 as its source. SrcReg = SDValue(Result, 0); @@ -182,51 +216,28 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, return Result; } -static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef Regs, - unsigned RegClassID, unsigned SubReg0) { - assert(Regs.size() >= 2 && Regs.size() <= 8); - - SDLoc DL(Regs[0]); - SmallVector Ops; - - Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); +static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, + int64_t Imm, const RISCVSubtarget &Subtarget) { + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); - for (unsigned I = 0; I < Regs.size(); ++I) { - Ops.push_back(Regs[I]); - Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); - } - SDNode *N = - CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); - return SDValue(N, 0); + return selectImmSeq(CurDAG, DL, VT, Seq); } -static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef Regs, - unsigned NF) { - static const unsigned RegClassIDs[] = { +static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef Regs, + unsigned NF, RISCVII::VLMUL LMUL) { + static const unsigned M1TupleRegClassIDs[] = { RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, RISCV::VRN8M1RegClassID}; + static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID, + RISCV::VRN3M2RegClassID, + RISCV::VRN4M2RegClassID}; - return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0); -} - -static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef Regs, - unsigned NF) { - static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID, - RISCV::VRN3M2RegClassID, - RISCV::VRN4M2RegClassID}; - - return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0); -} - -static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef Regs, - unsigned NF) { - return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID, - RISCV::sub_vrm4_0); -} + assert(Regs.size() >= 2 && Regs.size() <= 8); -static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef Regs, - unsigned NF, RISCVII::VLMUL LMUL) { + unsigned RegClassID; + unsigned SubReg0; switch (LMUL) { default: llvm_unreachable("Invalid LMUL."); @@ -234,12 +245,37 @@ static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef Regs, case RISCVII::VLMUL::LMUL_F4: case RISCVII::VLMUL::LMUL_F2: case RISCVII::VLMUL::LMUL_1: - return createM1Tuple(CurDAG, Regs, NF); + static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, + "Unexpected subreg numbering"); + SubReg0 = RISCV::sub_vrm1_0; + RegClassID = M1TupleRegClassIDs[NF - 2]; + break; case RISCVII::VLMUL::LMUL_2: - return createM2Tuple(CurDAG, Regs, NF); + static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, + "Unexpected subreg numbering"); + SubReg0 = RISCV::sub_vrm2_0; + RegClassID = M2TupleRegClassIDs[NF - 2]; + break; case RISCVII::VLMUL::LMUL_4: - return createM4Tuple(CurDAG, Regs, NF); + static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, + "Unexpected subreg numbering"); + SubReg0 = RISCV::sub_vrm4_0; + RegClassID = RISCV::VRN2M4RegClassID; + break; + } + + SDLoc DL(Regs[0]); + SmallVector Ops; + + Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); + + for (unsigned I = 0; I < Regs.size(); ++I) { + Ops.push_back(Regs[I]); + Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); } + SDNode *N = + CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); + return SDValue(N, 0); } void RISCVDAGToDAGISel::addVectorLoadStoreOperands( @@ -287,6 +323,10 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands( Operands.push_back(Glue); } +static bool isAllUndef(ArrayRef Values) { + return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); }); +} + void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided) { SDLoc DL(Node); @@ -297,19 +337,21 @@ void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, unsigned CurOp = 2; SmallVector Operands; - if (IsMasked) { - SmallVector Regs(Node->op_begin() + CurOp, - Node->op_begin() + CurOp + NF); - SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); - Operands.push_back(MaskedOff); - CurOp += NF; + + SmallVector Regs(Node->op_begin() + CurOp, + Node->op_begin() + CurOp + NF); + bool IsTU = IsMasked || !isAllUndef(Regs); + if (IsTU) { + SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL); + Operands.push_back(Merge); } + CurOp += NF; addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, Operands, /*IsLoad=*/true); const RISCV::VLSEGPseudo *P = - RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, + RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, static_cast(LMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); @@ -338,25 +380,25 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { unsigned CurOp = 2; SmallVector Operands; - if (IsMasked) { - SmallVector Regs(Node->op_begin() + CurOp, - Node->op_begin() + CurOp + NF); + + SmallVector Regs(Node->op_begin() + CurOp, + Node->op_begin() + CurOp + NF); + bool IsTU = IsMasked || !isAllUndef(Regs); + if (IsTU) { SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); Operands.push_back(MaskedOff); - CurOp += NF; } + CurOp += NF; addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, /*IsStridedOrIndexed*/ false, Operands, /*IsLoad=*/true); const RISCV::VLSEGPseudo *P = - RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, + RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, Log2SEW, static_cast(LMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, - MVT::Other, MVT::Glue, Operands); - SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, - /*Glue*/ SDValue(Load, 2)); + XLenVT, MVT::Other, Operands); if (auto *MemOp = dyn_cast(Node)) CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); @@ -368,8 +410,8 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); } - ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0)); // VL - ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain + ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL + ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain CurDAG->RemoveDeadNode(Node); } @@ -383,13 +425,15 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, unsigned CurOp = 2; SmallVector Operands; - if (IsMasked) { - SmallVector Regs(Node->op_begin() + CurOp, - Node->op_begin() + CurOp + NF); + + SmallVector Regs(Node->op_begin() + CurOp, + Node->op_begin() + CurOp + NF); + bool IsTU = IsMasked || !isAllUndef(Regs); + if (IsTU) { SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); Operands.push_back(MaskedOff); - CurOp += NF; } + CurOp += NF; MVT IndexVT; addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, @@ -406,7 +450,7 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, "values when XLEN=32"); } const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( - NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast(LMUL), + NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast(LMUL), static_cast(IndexLMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); @@ -596,32 +640,125 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { int64_t Imm = ConstNode->getSExtValue(); // If the upper XLen-16 bits are not used, try to convert this to a simm12 // by sign extending bit 15. - if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) && + if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && hasAllHUsers(Node)) - Imm = SignExtend64(Imm, 16); + Imm = SignExtend64<16>(Imm); // If the upper 32-bits are not used try to convert this into a simm32 by // sign extending bit 32. if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) - Imm = SignExtend64(Imm, 32); + Imm = SignExtend64<32>(Imm); ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); return; } - case ISD::FrameIndex: { - SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT); - int FI = cast(Node)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); - ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm)); + case ISD::ADD: { + // Try to select ADD + immediate used as memory addresses to + // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by + // doPeepholeLoadStoreADDI. + + // LHS should be an immediate. + auto *N1C = dyn_cast(Node->getOperand(1)); + if (!N1C) + break; + + int64_t Offset = N1C->getSExtValue(); + int64_t Lo12 = SignExtend64<12>(Offset); + + // Don't do this if the lower 12 bits are 0 or we could use ADDI directly. + if (Lo12 == 0 || isInt<12>(Offset)) + break; + + // Don't do this if we can use a pair of ADDIs. + if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2)) + break; + + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(Offset, Subtarget->getFeatureBits()); + + Offset -= Lo12; + // Restore sign bits for RV32. + if (!Subtarget->is64Bit()) + Offset = SignExtend64<32>(Offset); + + // We can fold if the last operation is an ADDI or its an ADDIW that could + // be treated as an ADDI. + if (Seq.back().Opc != RISCV::ADDI && + !(Seq.back().Opc == RISCV::ADDIW && isInt<32>(Offset))) + break; + assert(Seq.back().Imm == Lo12 && "Expected immediate to match Lo12"); + // Drop the last operation. + Seq.pop_back(); + assert(!Seq.empty() && "Expected more instructions in sequence"); + + bool AllPointerUses = true; + for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + + // Is this user a memory instruction that uses a register and immediate + // that has this ADD as its pointer. + unsigned BaseOpIdx, OffsetOpIdx; + if (!User->isMachineOpcode() || + !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) || + UI.getOperandNo() != BaseOpIdx) { + AllPointerUses = false; + break; + } + + // If the memory instruction already has an offset, make sure the combined + // offset is foldable. + int64_t MemOffs = + cast(User->getOperand(OffsetOpIdx))->getSExtValue(); + MemOffs += Lo12; + if (!isInt<12>(MemOffs)) { + AllPointerUses = false; + break; + } + } + + if (!AllPointerUses) + break; + + // Emit (ADDI (ADD X, Hi), Lo) + SDNode *Imm = selectImmSeq(CurDAG, DL, VT, Seq); + SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT, + Node->getOperand(0), SDValue(Imm, 0)); + SDNode *ADDI = + CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0), + CurDAG->getTargetConstant(Lo12, DL, VT)); + ReplaceNode(Node, ADDI); return; } + case ISD::SHL: { + auto *N1C = dyn_cast(Node->getOperand(1)); + if (!N1C) + break; + SDValue N0 = Node->getOperand(0); + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || + !isa(N0.getOperand(1))) + break; + unsigned ShAmt = N1C->getZExtValue(); + uint64_t Mask = N0.getConstantOperandVal(1); + + // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has + // 32 leading zeros and C3 trailing zeros. + if (ShAmt <= 32 && isShiftedMask_64(Mask)) { + unsigned XLen = Subtarget->getXLen(); + unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask)); + unsigned TrailingZeros = countTrailingZeros(Mask); + if (TrailingZeros > 0 && LeadingZeros == 32) { + SDNode *SRLIW = CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, N0->getOperand(0), + CurDAG->getTargetConstant(TrailingZeros, DL, VT)); + SDNode *SLLI = CurDAG->getMachineNode( + RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), + CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT)); + ReplaceNode(Node, SLLI); + return; + } + } + break; + } case ISD::SRL: { - // Optimize (srl (and X, C2), C) -> - // (srli (slli X, (XLen-C3), (XLen-C3) + C) - // Where C2 is a mask with C3 trailing ones. - // Taking into account that the C2 may have had lower bits unset by - // SimplifyDemandedBits. This avoids materializing the C2 immediate. - // This pattern occurs when type legalizing right shifts for types with - // less than XLen bits. auto *N1C = dyn_cast(Node->getOperand(1)); if (!N1C) break; @@ -631,6 +768,32 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { break; unsigned ShAmt = N1C->getZExtValue(); uint64_t Mask = N0.getConstantOperandVal(1); + + // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has + // 32 leading zeros and C3 trailing zeros. + if (isShiftedMask_64(Mask)) { + unsigned XLen = Subtarget->getXLen(); + unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask)); + unsigned TrailingZeros = countTrailingZeros(Mask); + if (LeadingZeros == 32 && TrailingZeros > ShAmt) { + SDNode *SRLIW = CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, N0->getOperand(0), + CurDAG->getTargetConstant(TrailingZeros, DL, VT)); + SDNode *SLLI = CurDAG->getMachineNode( + RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), + CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT)); + ReplaceNode(Node, SLLI); + return; + } + } + + // Optimize (srl (and X, C2), C) -> + // (srli (slli X, (XLen-C3), (XLen-C3) + C) + // Where C2 is a mask with C3 trailing ones. + // Taking into account that the C2 may have had lower bits unset by + // SimplifyDemandedBits. This avoids materializing the C2 immediate. + // This pattern occurs when type legalizing right shifts for types with + // less than XLen bits. Mask |= maskTrailingOnes(ShAmt); if (!isMask_64(Mask)) break; @@ -700,13 +863,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { uint64_t C1 = N1C->getZExtValue(); - // Keep track of whether this is a andi, zext.h, or zext.w. - bool ZExtOrANDI = isInt<12>(N1C->getSExtValue()); - if (C1 == UINT64_C(0xFFFF) && - (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) - ZExtOrANDI = true; - if (C1 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()) - ZExtOrANDI = true; + // Keep track of whether this is a c.andi. If we can't use c.andi, the + // shift pair might offer more compression opportunities. + // TODO: We could check for C extension here, but we don't have many lit + // tests with the C extension enabled so not checking gets better coverage. + // TODO: What if ANDI faster than shift? + bool IsCANDI = isInt<6>(N1C->getSExtValue()); // Clear irrelevant bits in the mask. if (LeftShift) @@ -727,9 +889,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (C2 < C3) { // If the number of leading zeros is C2+32 this can be SRLIW. if (C2 + 32 == C3) { - SDNode *SRLIW = - CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X, - CurDAG->getTargetConstant(C2, DL, XLenVT)); + SDNode *SRLIW = CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); ReplaceNode(Node, SRLIW); return; } @@ -739,27 +900,33 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type // legalized and goes through DAG combine. - SDValue Y; if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() && - selectSExti32(X, Y)) { + X.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast(X.getOperand(1))->getVT() == MVT::i32) { SDNode *SRAIW = - CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, Y, - CurDAG->getTargetConstant(31, DL, XLenVT)); + CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0), + CurDAG->getTargetConstant(31, DL, VT)); SDNode *SRLIW = CurDAG->getMachineNode( - RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0), - CurDAG->getTargetConstant(C3 - 32, DL, XLenVT)); + RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), + CurDAG->getTargetConstant(C3 - 32, DL, VT)); ReplaceNode(Node, SRLIW); return; } // (srli (slli x, c3-c2), c3). - if (OneUseOrZExtW && !ZExtOrANDI) { + // Skip if we could use (zext.w (sraiw X, C2)). + bool Skip = Subtarget->hasStdExtZba() && C3 == 32 && + X.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast(X.getOperand(1))->getVT() == MVT::i32; + // Also Skip if we can use bexti. + Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1; + if (OneUseOrZExtW && !Skip) { SDNode *SLLI = CurDAG->getMachineNode( - RISCV::SLLI, DL, XLenVT, X, - CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); + RISCV::SLLI, DL, VT, X, + CurDAG->getTargetConstant(C3 - C2, DL, VT)); SDNode *SRLI = - CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), - CurDAG->getTargetConstant(C3, DL, XLenVT)); + CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0), + CurDAG->getTargetConstant(C3, DL, VT)); ReplaceNode(Node, SRLI); return; } @@ -775,21 +942,20 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { C1 == (maskTrailingOnes(XLen - (C2 + C3)) << C2)) { // Use slli.uw when possible. if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { - SDNode *SLLI_UW = - CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X, - CurDAG->getTargetConstant(C2, DL, XLenVT)); + SDNode *SLLI_UW = CurDAG->getMachineNode( + RISCV::SLLI_UW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); ReplaceNode(Node, SLLI_UW); return; } // (srli (slli c2+c3), c3) - if (OneUseOrZExtW && !ZExtOrANDI) { + if (OneUseOrZExtW && !IsCANDI) { SDNode *SLLI = CurDAG->getMachineNode( - RISCV::SLLI, DL, XLenVT, X, - CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); + RISCV::SLLI, DL, VT, X, + CurDAG->getTargetConstant(C2 + C3, DL, VT)); SDNode *SRLI = - CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), - CurDAG->getTargetConstant(C3, DL, XLenVT)); + CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0), + CurDAG->getTargetConstant(C3, DL, VT)); ReplaceNode(Node, SRLI); return; } @@ -801,25 +967,31 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (!LeftShift && isShiftedMask_64(C1)) { uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); uint64_t C3 = countTrailingZeros(C1); - if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !ZExtOrANDI) { + if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) { + unsigned SrliOpc = RISCV::SRLI; + // If the input is zexti32 we should use SRLIW. + if (X.getOpcode() == ISD::AND && isa(X.getOperand(1)) && + X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { + SrliOpc = RISCV::SRLIW; + X = X.getOperand(0); + } SDNode *SRLI = CurDAG->getMachineNode( - RISCV::SRLI, DL, XLenVT, X, - CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); + SrliOpc, DL, VT, X, CurDAG->getTargetConstant(C2 + C3, DL, VT)); SDNode *SLLI = - CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), - CurDAG->getTargetConstant(C3, DL, XLenVT)); + CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), + CurDAG->getTargetConstant(C3, DL, VT)); ReplaceNode(Node, SLLI); return; } // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 && - OneUseOrZExtW && !ZExtOrANDI) { - SDNode *SRLIW = CurDAG->getMachineNode( - RISCV::SRLIW, DL, XLenVT, X, - CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); + OneUseOrZExtW && !IsCANDI) { + SDNode *SRLIW = + CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X, + CurDAG->getTargetConstant(C2 + C3, DL, VT)); SDNode *SLLI = - CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), - CurDAG->getTargetConstant(C3, DL, XLenVT)); + CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), + CurDAG->getTargetConstant(C3, DL, VT)); ReplaceNode(Node, SLLI); return; } @@ -830,24 +1002,23 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (LeftShift && isShiftedMask_64(C1)) { uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); uint64_t C3 = countTrailingZeros(C1); - if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !ZExtOrANDI) { + if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) { SDNode *SRLI = CurDAG->getMachineNode( - RISCV::SRLI, DL, XLenVT, X, - CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); + RISCV::SRLI, DL, VT, X, CurDAG->getTargetConstant(C3 - C2, DL, VT)); SDNode *SLLI = - CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), - CurDAG->getTargetConstant(C3, DL, XLenVT)); + CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), + CurDAG->getTargetConstant(C3, DL, VT)); ReplaceNode(Node, SLLI); return; } // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. - if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !ZExtOrANDI) { - SDNode *SRLIW = CurDAG->getMachineNode( - RISCV::SRLIW, DL, XLenVT, X, - CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); + if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { + SDNode *SRLIW = + CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X, + CurDAG->getTargetConstant(C3 - C2, DL, VT)); SDNode *SLLI = - CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), - CurDAG->getTargetConstant(C3, DL, XLenVT)); + CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), + CurDAG->getTargetConstant(C3, DL, VT)); ReplaceNode(Node, SLLI); return; } @@ -908,7 +1079,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { uint64_t ShiftedC1 = C1 << ConstantShift; // If this RV32, we need to sign extend the constant. if (XLen == 32) - ShiftedC1 = SignExtend64(ShiftedC1, 32); + ShiftedC1 = SignExtend64<32>(ShiftedC1); // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); @@ -1005,45 +1176,44 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { } MVT Src1VT = Src1.getSimpleValueType(); unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, - VMSetOpcode, VMANDOpcode; + VMOROpcode; switch (RISCVTargetLowering::getLMUL(Src1VT)) { default: llvm_unreachable("Unexpected LMUL!"); -#define CASE_VMSLT_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ +#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ case RISCVII::VLMUL::lmulenum: \ VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ : RISCV::PseudoVMSLT_VX_##suffix; \ VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ - VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ break; - CASE_VMSLT_VMSET_OPCODES(LMUL_F8, MF8, B1) - CASE_VMSLT_VMSET_OPCODES(LMUL_F4, MF4, B2) - CASE_VMSLT_VMSET_OPCODES(LMUL_F2, MF2, B4) - CASE_VMSLT_VMSET_OPCODES(LMUL_1, M1, B8) - CASE_VMSLT_VMSET_OPCODES(LMUL_2, M2, B16) - CASE_VMSLT_VMSET_OPCODES(LMUL_4, M4, B32) - CASE_VMSLT_VMSET_OPCODES(LMUL_8, M8, B64) -#undef CASE_VMSLT_VMSET_OPCODES + CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) + CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) + CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) + CASE_VMSLT_OPCODES(LMUL_1, M1, B8) + CASE_VMSLT_OPCODES(LMUL_2, M2, B16) + CASE_VMSLT_OPCODES(LMUL_4, M4, B32) + CASE_VMSLT_OPCODES(LMUL_8, M8, B64) +#undef CASE_VMSLT_OPCODES } // Mask operations use the LMUL from the mask type. switch (RISCVTargetLowering::getLMUL(VT)) { default: llvm_unreachable("Unexpected LMUL!"); -#define CASE_VMXOR_VMANDN_VMAND_OPCODES(lmulenum, suffix) \ +#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ case RISCVII::VLMUL::lmulenum: \ VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ - VMANDOpcode = RISCV::PseudoVMAND_MM_##suffix; \ + VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ break; - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F8, MF8) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F4, MF4) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F2, MF2) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_1, M1) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_2, M2) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_4, M4) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_8, M8) -#undef CASE_VMXOR_VMANDN_VMAND_OPCODES + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) +#undef CASE_VMXOR_VMANDN_VMOR_OPCODES } SDValue SEW = CurDAG->getTargetConstant( Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); @@ -1053,12 +1223,17 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { SDValue MaskedOff = Node->getOperand(1); SDValue Mask = Node->getOperand(4); - // If vmsgeu_mask with 0 immediate, expand it to {vmset, vmand}. + // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. if (IsCmpUnsignedZero) { - SDValue VMSet = - SDValue(CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW), 0); - ReplaceNode(Node, CurDAG->getMachineNode(VMANDOpcode, DL, VT, - {Mask, VMSet, VL, MaskSEW})); + // We don't need vmor if the MaskedOff and the Mask are the same + // value. + if (Mask == MaskedOff) { + ReplaceUses(Node, Mask.getNode()); + return; + } + ReplaceNode(Node, + CurDAG->getMachineNode(VMOROpcode, DL, VT, + {Mask, MaskedOff, VL, MaskSEW})); return; } @@ -1082,10 +1257,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // Otherwise use // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 + // The result is mask undisturbed. + // We use the same instructions to emulate mask agnostic behavior, because + // the agnostic result can be either undisturbed or all 1. SDValue Cmp = SDValue( CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 0); + // vmxor.mm vd, vd, v0 is used to update active value. ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, {Cmp, Mask, VL, MaskSEW})); return; @@ -1215,7 +1394,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { unsigned CurOp = 2; // Masked intrinsic only have TU version pseduo instructions. - bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); + bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef(); SmallVector Operands; if (IsTU) Operands.push_back(Node->getOperand(CurOp++)); @@ -1267,9 +1446,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; // Masked intrinsic only have TU version pseduo instructions. - bool IsTU = - HasPassthruOperand && - ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked); + bool IsTU = HasPassthruOperand && + (IsMasked || !Node->getOperand(CurOp).isUndef()); SmallVector Operands; if (IsTU) Operands.push_back(Node->getOperand(CurOp++)); @@ -1302,7 +1480,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { unsigned CurOp = 2; // Masked intrinsic only have TU version pseduo instructions. - bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); + bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef(); SmallVector Operands; if (IsTU) Operands.push_back(Node->getOperand(CurOp++)); @@ -1318,19 +1496,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, Log2SEW, static_cast(LMUL)); - MachineSDNode *Load = - CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), - MVT::Other, MVT::Glue, Operands); - SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, - /*Glue*/ SDValue(Load, 2)); - + MachineSDNode *Load = CurDAG->getMachineNode( + P->Pseudo, DL, Node->getVTList(), Operands); if (auto *MemOp = dyn_cast(Node)) CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); - ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); - ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL - ReplaceUses(SDValue(Node, 2), SDValue(Load, 1)); // Chain - CurDAG->RemoveDeadNode(Node); + ReplaceNode(Node, Load); return; } } @@ -1610,9 +1781,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // Try to match splat of a scalar load to a strided load with stride of x0. bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || Node->getOpcode() == RISCVISD::VFMV_S_F_VL; - if (IsScalarMove && !Node->getOperand(0).isUndef()) + bool HasPassthruOperand = Node->getOpcode() != ISD::SPLAT_VECTOR; + if (HasPassthruOperand && !Node->getOperand(0).isUndef()) break; - SDValue Src = IsScalarMove ? Node->getOperand(1) : Node->getOperand(0); + SDValue Src = HasPassthruOperand ? Node->getOperand(1) : Node->getOperand(0); auto *Ld = dyn_cast(Src); if (!Ld) break; @@ -1634,7 +1806,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { break; selectVLOp(Node->getOperand(2), VL); } else - selectVLOp(Node->getOperand(1), VL); + selectVLOp(Node->getOperand(2), VL); unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); @@ -1650,8 +1822,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); - if (auto *MemOp = dyn_cast(Node)) - CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); + CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); ReplaceNode(Node, Load); return; @@ -1680,11 +1851,37 @@ bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( return true; } -bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { +bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, + SDValue &Offset) { if (auto *FIN = dyn_cast(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); return true; } + + return false; +} + +// Select a frame index and an optional immediate offset from an ADD or OR. +bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) { + if (SelectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (!CurDAG->isBaseWithConstantOffset(Addr)) + return false; + + if (auto *FIN = dyn_cast(Addr.getOperand(0))) { + int64_t CVal = cast(Addr.getOperand(1))->getSExtValue(); + if (isInt<12>(CVal)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), + Subtarget->getXLenVT()); + Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr), + Subtarget->getXLenVT()); + return true; + } + } + return false; } @@ -1698,6 +1895,76 @@ bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { return true; } +bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) { + if (SelectAddrFrameIndex(Addr, Base, Offset)) + return true; + + SDLoc DL(Addr); + MVT VT = Addr.getSimpleValueType(); + + if (Addr.getOpcode() == RISCVISD::ADD_LO) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + + if (CurDAG->isBaseWithConstantOffset(Addr)) { + int64_t CVal = cast(Addr.getOperand(1))->getSExtValue(); + if (isInt<12>(CVal)) { + Base = Addr.getOperand(0); + if (Base.getOpcode() == RISCVISD::ADD_LO) { + SDValue LoOperand = Base.getOperand(1); + if (auto *GA = dyn_cast(LoOperand)) { + // If the Lo in (ADD_LO hi, lo) is a global variable's address + // (its low part, really), then we can rely on the alignment of that + // variable to provide a margin of safety before low part can overflow + // the 12 bits of the load/store offset. Check if CVal falls within + // that margin; if so (low part + CVal) can't overflow. + const DataLayout &DL = CurDAG->getDataLayout(); + Align Alignment = commonAlignment( + GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); + if (CVal == 0 || Alignment > CVal) { + int64_t CombinedOffset = CVal + GA->getOffset(); + Base = Base.getOperand(0); + Offset = CurDAG->getTargetGlobalAddress( + GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(), + CombinedOffset, GA->getTargetFlags()); + return true; + } + } + } + + if (auto *FIN = dyn_cast(Base)) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); + Offset = CurDAG->getTargetConstant(CVal, DL, VT); + return true; + } + } + + // Handle ADD with large immediates. + if (Addr.getOpcode() == ISD::ADD && isa(Addr.getOperand(1))) { + int64_t CVal = cast(Addr.getOperand(1))->getSExtValue(); + assert(!isInt<12>(CVal) && "simm12 not already handled?"); + + if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) { + // We can use an ADDI for part of the offset and fold the rest into the + // load/store. This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. + int64_t Adj = CVal < 0 ? -2048 : 2047; + Base = SDValue( + CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), + CurDAG->getTargetConstant(Adj, DL, VT)), + 0); + Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT); + return true; + } + } + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, DL, VT); + return true; +} + bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt) { // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift @@ -1723,6 +1990,21 @@ bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, ShAmt = N.getOperand(0); return true; } + } else if (N.getOpcode() == ISD::SUB && + isa(N.getOperand(0))) { + uint64_t Imm = N.getConstantOperandVal(0); + // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to + // generate a NEG instead of a SUB of a constant. + if (Imm != 0 && Imm % ShiftWidth == 0) { + SDLoc DL(N); + EVT VT = N.getValueType(); + SDValue Zero = CurDAG->getRegister(RISCV::X0, VT); + unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; + MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, + N.getOperand(1)); + ShAmt = SDValue(Neg, 0); + return true; + } } ShAmt = N; @@ -1778,6 +2060,8 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::SIGN_EXTEND_INREG || + Node->getOpcode() == RISCVISD::GREV || + Node->getOpcode() == RISCVISD::GORC || isa(Node)) && "Unexpected opcode"); @@ -1812,6 +2096,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { case RISCV::CTZW: case RISCV::CPOPW: case RISCV::SLLI_UW: + case RISCV::FMV_W_X: case RISCV::FCVT_H_W: case RISCV::FCVT_H_WU: case RISCV::FCVT_S_W: @@ -1835,6 +2120,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { return false; break; case RISCV::SEXT_H: + case RISCV::FMV_H_X: case RISCV::ZEXT_H_RV32: case RISCV::ZEXT_H_RV64: if (Bits < 16) @@ -1871,22 +2157,32 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { // allows us to choose betwen VSETIVLI or VSETVLI later. bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { auto *C = dyn_cast(N); - if (C && (isUInt<5>(C->getZExtValue()) || - C->getSExtValue() == RISCV::VLMaxSentinel)) + if (C && isUInt<5>(C->getZExtValue())) { VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), N->getValueType(0)); - else + } else if (C && C->isAllOnesValue()) { + // Treat all ones as VLMax. + VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), + N->getValueType(0)); + } else if (isa(N) && + cast(N)->getReg() == RISCV::X0) { + // All our VL operands use an operand that allows GPRNoX0 or an immediate + // as the register class. Convert X0 to a special immediate to pass the + // MachineVerifier. This is recognized specially by the vsetvli insertion + // pass. + VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), + N->getValueType(0)); + } else { VL = N; + } return true; } bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { - if (N.getOpcode() != ISD::SPLAT_VECTOR && - N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && - N.getOpcode() != RISCVISD::VMV_V_X_VL) + if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef()) return false; - SplatVal = N.getOperand(0); + SplatVal = N.getOperand(1); return true; } @@ -1896,23 +2192,22 @@ static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, ValidateFn ValidateImm) { - if ((N.getOpcode() != ISD::SPLAT_VECTOR && - N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && - N.getOpcode() != RISCVISD::VMV_V_X_VL) || - !isa(N.getOperand(0))) + if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || + !isa(N.getOperand(1))) return false; - int64_t SplatImm = cast(N.getOperand(0))->getSExtValue(); + int64_t SplatImm = + cast(N.getOperand(1))->getSExtValue(); - // ISD::SPLAT_VECTOR, RISCVISD::SPLAT_VECTOR_I64 and RISCVISD::VMV_V_X_VL - // share semantics when the operand type is wider than the resulting vector - // element type: an implicit truncation first takes place. Therefore, perform - // a manual truncation/sign-extension in order to ignore any truncated bits - // and catch any zero-extended immediate. + // The semantics of RISCVISD::VMV_V_X_VL is that when the operand + // type is wider than the resulting vector element type: an implicit + // truncation first takes place. Therefore, perform a manual + // truncation/sign-extension in order to ignore any truncated bits and catch + // any zero-extended immediate. // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first // sign-extending to (XLenVT -1). MVT XLenVT = Subtarget.getXLenVT(); - assert(XLenVT == N.getOperand(0).getSimpleValueType() && + assert(XLenVT == N.getOperand(1).getSimpleValueType() && "Unexpected splat operand type"); MVT EltVT = N.getSimpleValueType().getVectorElementType(); if (EltVT.bitsLT(XLenVT)) @@ -1945,13 +2240,12 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, } bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { - if ((N.getOpcode() != ISD::SPLAT_VECTOR && - N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && - N.getOpcode() != RISCVISD::VMV_V_X_VL) || - !isa(N.getOperand(0))) + if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || + !isa(N.getOperand(1))) return false; - int64_t SplatImm = cast(N.getOperand(0))->getSExtValue(); + int64_t SplatImm = + cast(N.getOperand(1))->getSExtValue(); if (!isUInt<5>(SplatImm)) return false; @@ -1980,49 +2274,42 @@ bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, // Merge an ADDI into the offset of a load/store instruction where possible. // (load (addi base, off1), off2) -> (load base, off1+off2) // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) +// (load (add base, (addi src, off1)), off2) +// -> (load (add base, src), off1+off2) +// (store val, (add base, (addi src, off1)), off2) +// -> (store val, (add base, src), off1+off2) // This is possible when off1+off2 fits a 12-bit immediate. bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { - int OffsetOpIdx; - int BaseOpIdx; - - // Only attempt this optimisation for I-type loads and S-type stores. - switch (N->getMachineOpcode()) { - default: + unsigned OffsetOpIdx, BaseOpIdx; + if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx)) return false; - case RISCV::LB: - case RISCV::LH: - case RISCV::LW: - case RISCV::LBU: - case RISCV::LHU: - case RISCV::LWU: - case RISCV::LD: - case RISCV::FLH: - case RISCV::FLW: - case RISCV::FLD: - BaseOpIdx = 0; - OffsetOpIdx = 1; - break; - case RISCV::SB: - case RISCV::SH: - case RISCV::SW: - case RISCV::SD: - case RISCV::FSH: - case RISCV::FSW: - case RISCV::FSD: - BaseOpIdx = 1; - OffsetOpIdx = 2; - break; - } if (!isa(N->getOperand(OffsetOpIdx))) return false; SDValue Base = N->getOperand(BaseOpIdx); - // If the base is an ADDI, we can merge it in to the load/store. - if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI) + if (!Base.isMachineOpcode()) return false; + if (Base.getMachineOpcode() == RISCV::ADDI) { + // If the base is an ADDI, we can merge it in to the load/store. + } else if (Base.getMachineOpcode() == RISCV::ADDIW && + isa(Base.getOperand(1)) && + Base.getOperand(0).isMachineOpcode() && + Base.getOperand(0).getMachineOpcode() == RISCV::LUI && + isa(Base.getOperand(0).getOperand(0))) { + // ADDIW can be merged if it's part of LUI+ADDIW constant materialization + // and LUI+ADDI would have produced the same result. This is true for all + // simm32 values except 0x7ffff800-0x7fffffff. + int64_t Offset = + SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12); + Offset += cast(Base.getOperand(1))->getSExtValue(); + if (!isInt<32>(Offset)) + return false; + } else + return false; + SDValue ImmOperand = Base.getOperand(1); uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); @@ -2039,7 +2326,8 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { // to provide a margin of safety before off1 can overflow the 12 bits. // Check if off2 falls within that margin; if so off1+off2 can't overflow. const DataLayout &DL = CurDAG->getDataLayout(); - Align Alignment = GA->getGlobal()->getPointerAlignment(DL); + Align Alignment = commonAlignment(GA->getGlobal()->getPointerAlignment(DL), + GA->getOffset()); if (Offset2 != 0 && Alignment <= Offset2) return false; int64_t Offset1 = GA->getOffset(); @@ -2049,7 +2337,7 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { CombinedOffset, GA->getTargetFlags()); } else if (auto *CP = dyn_cast(ImmOperand)) { // Ditto. - Align Alignment = CP->getAlign(); + Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset()); if (Offset2 != 0 && Alignment <= Offset2) return false; int64_t Offset1 = CP->getOffset(); @@ -2068,12 +2356,13 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { LLVM_DEBUG(dbgs() << "\n"); // Modify the offset operand of the load/store. - if (BaseOpIdx == 0) // Load - CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, - N->getOperand(2)); - else // Store - CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), - ImmOperand, N->getOperand(3)); + if (BaseOpIdx == 0) { // Load + N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, + N->getOperand(2)); + } else { // Store + N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), + ImmOperand, N->getOperand(3)); + } return true; } @@ -2130,6 +2419,8 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { case RISCV::SUBW: case RISCV::MULW: case RISCV::SLLIW: + case RISCV::GREVIW: + case RISCV::GORCIW: // Result is already sign extended just remove the sext.w. // NOTE: We only handle the nodes that are selected with hasAllWUsers. ReplaceUses(N, N0.getNode()); @@ -2139,8 +2430,113 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { return false; } +// Optimize masked RVV pseudo instructions with a known all-ones mask to their +// corresponding "unmasked" pseudo versions. The mask we're interested in will +// take the form of a V0 physical register operand, with a glued +// register-setting instruction. +bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { + const RISCV::RISCVMaskedPseudoInfo *I = + RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); + if (!I) + return false; + + unsigned MaskOpIdx = I->MaskOpIdx; + + // Check that we're using V0 as a mask register. + if (!isa(N->getOperand(MaskOpIdx)) || + cast(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) + return false; + + // The glued user defines V0. + const auto *Glued = N->getGluedNode(); + + if (!Glued || Glued->getOpcode() != ISD::CopyToReg) + return false; + + // Check that we're defining V0 as a mask register. + if (!isa(Glued->getOperand(1)) || + cast(Glued->getOperand(1))->getReg() != RISCV::V0) + return false; + + // Check the instruction defining V0; it needs to be a VMSET pseudo. + SDValue MaskSetter = Glued->getOperand(2); + + const auto IsVMSet = [](unsigned Opc) { + return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || + Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || + Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || + Opc == RISCV::PseudoVMSET_M_B8; + }; + + // TODO: Check that the VMSET is the expected bitwidth? The pseudo has + // undefined behaviour if it's the wrong bitwidth, so we could choose to + // assume that it's all-ones? Same applies to its VL. + if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode())) + return false; + + // Retrieve the tail policy operand index, if any. + Optional TailPolicyOpIdx; + const RISCVInstrInfo &TII = *Subtarget->getInstrInfo(); + const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode()); + + bool IsTA = true; + if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { + // The last operand of the pseudo is the policy op, but we might have a + // Glue operand last. We might also have a chain. + TailPolicyOpIdx = N->getNumOperands() - 1; + if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue) + (*TailPolicyOpIdx)--; + if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other) + (*TailPolicyOpIdx)--; + + if (!(N->getConstantOperandVal(*TailPolicyOpIdx) & + RISCVII::TAIL_AGNOSTIC)) { + // Keep the true-masked instruction when there is no unmasked TU + // instruction + if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef()) + return false; + // We can't use TA if the tie-operand is not IMPLICIT_DEF + if (!N->getOperand(0).isUndef()) + IsTA = false; + } + } + + unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo; + + // Check that we're dropping the mask operand and any policy operand + // when we transform to this unmasked pseudo. Additionally, if this insturtion + // is tail agnostic, the unmasked instruction should not have a merge op. + uint64_t TSFlags = TII.get(Opc).TSFlags; + assert((IsTA != RISCVII::hasMergeOp(TSFlags)) && + RISCVII::hasDummyMaskOp(TSFlags) && + !RISCVII::hasVecPolicyOp(TSFlags) && + "Unexpected pseudo to transform to"); + (void)TSFlags; + + SmallVector Ops; + // Skip the merge operand at index 0 if IsTA + for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) { + // Skip the mask, the policy, and the Glue. + SDValue Op = N->getOperand(I); + if (I == MaskOpIdx || I == TailPolicyOpIdx || + Op.getValueType() == MVT::Glue) + continue; + Ops.push_back(Op); + } + + // Transitively apply any node glued to our new node. + if (auto *TGlued = Glued->getGluedNode()) + Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); + + SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); + ReplaceUses(N, Result); + + return true; +} + // This pass converts a legalized DAG into a RISCV-specific DAG, ready // for instruction scheduling. -FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) { - return new RISCVDAGToDAGISel(TM); +FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new RISCVDAGToDAGISel(TM, OptLevel); } diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index c429a9298739..b50927cfcca5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -24,8 +24,9 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { const RISCVSubtarget *Subtarget = nullptr; public: - explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine) - : SelectionDAGISel(TargetMachine) {} + explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(TargetMachine, OptLevel) {} StringRef getPassName() const override { return "RISCV DAG->DAG Pattern Instruction Selection"; @@ -44,8 +45,10 @@ public: bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override; - bool SelectAddrFI(SDValue Addr, SDValue &Base); + bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectBaseAddr(SDValue Addr, SDValue &Base); + bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset); bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); bool selectShiftMaskXLen(SDValue N, SDValue &ShAmt) { @@ -117,12 +120,14 @@ public: private: bool doPeepholeLoadStoreADDI(SDNode *Node); bool doPeepholeSExtW(SDNode *Node); + bool doPeepholeMaskedRVV(SDNode *Node); }; namespace RISCV { struct VLSEGPseudo { uint16_t NF : 4; uint16_t Masked : 1; + uint16_t IsTU : 1; uint16_t Strided : 1; uint16_t FF : 1; uint16_t Log2SEW : 3; @@ -133,6 +138,7 @@ struct VLSEGPseudo { struct VLXSEGPseudo { uint16_t NF : 4; uint16_t Masked : 1; + uint16_t IsTU : 1; uint16_t Ordered : 1; uint16_t Log2SEW : 3; uint16_t LMUL : 3; @@ -187,6 +193,13 @@ struct VLX_VSXPseudo { uint16_t Pseudo; }; +struct RISCVMaskedPseudoInfo { + uint16_t MaskedPseudo; + uint16_t UnmaskedPseudo; + uint16_t UnmaskedTUPseudo; + uint8_t MaskOpIdx; +}; + #define GET_RISCVVSSEGTable_DECL #define GET_RISCVVLSEGTable_DECL #define GET_RISCVVLXSEGTable_DECL @@ -195,6 +208,7 @@ struct VLX_VSXPseudo { #define GET_RISCVVSETable_DECL #define GET_RISCVVLXTable_DECL #define GET_RISCVVSXTable_DECL +#define GET_RISCVMaskedPseudosTable_DECL #include "RISCVGenSearchableTables.inc" } // namespace RISCV diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 97d24c8e9c0b..ff645dea4e7a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -112,17 +112,24 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.hasVInstructions()) { auto addRegClassForRVV = [this](MVT VT) { + // Disable the smallest fractional LMUL types if ELEN is less than + // RVVBitsPerBlock. + unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN(); + if (VT.getVectorMinNumElements() < MinElts) + return; + unsigned Size = VT.getSizeInBits().getKnownMinValue(); - assert(Size <= 512 && isPowerOf2_32(Size)); const TargetRegisterClass *RC; - if (Size <= 64) + if (Size <= RISCV::RVVBitsPerBlock) RC = &RISCV::VRRegClass; - else if (Size == 128) + else if (Size == 2 * RISCV::RVVBitsPerBlock) RC = &RISCV::VRM2RegClass; - else if (Size == 256) + else if (Size == 4 * RISCV::RVVBitsPerBlock) RC = &RISCV::VRM4RegClass; - else + else if (Size == 8 * RISCV::RVVBitsPerBlock) RC = &RISCV::VRM8RegClass; + else + llvm_unreachable("Unexpected size"); addRegisterClass(VT, RC); }; @@ -170,8 +177,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setStackPointerRegisterToSaveRestore(RISCV::X2); - for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) - setLoadExtAction(N, XLenVT, MVT::i1, Promote); + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT, + MVT::i1, Promote); // TODO: add all necessary setOperationAction calls. setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); @@ -181,100 +188,75 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::SELECT_CC, XLenVT, Expand); - setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); - setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); setOperationAction(ISD::VASTART, MVT::Other, Custom); - setOperationAction(ISD::VAARG, MVT::Other, Expand); - setOperationAction(ISD::VACOPY, MVT::Other, Expand); - setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (!Subtarget.hasStdExtZbb()) { - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); - } + + setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); + + if (!Subtarget.hasStdExtZbb()) + setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand); if (Subtarget.is64Bit()) { - setOperationAction(ISD::ADD, MVT::i32, Custom); - setOperationAction(ISD::SUB, MVT::i32, Custom); - setOperationAction(ISD::SHL, MVT::i32, Custom); - setOperationAction(ISD::SRA, MVT::i32, Custom); - setOperationAction(ISD::SRL, MVT::i32, Custom); - - setOperationAction(ISD::UADDO, MVT::i32, Custom); - setOperationAction(ISD::USUBO, MVT::i32, Custom); - setOperationAction(ISD::UADDSAT, MVT::i32, Custom); - setOperationAction(ISD::USUBSAT, MVT::i32, Custom); + setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); + + setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL}, + MVT::i32, Custom); + + setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT}, + MVT::i32, Custom); } else { - setLibcallName(RTLIB::SHL_I128, nullptr); - setLibcallName(RTLIB::SRL_I128, nullptr); - setLibcallName(RTLIB::SRA_I128, nullptr); - setLibcallName(RTLIB::MUL_I128, nullptr); + setLibcallName( + {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128}, + nullptr); setLibcallName(RTLIB::MULO_I64, nullptr); } if (!Subtarget.hasStdExtM()) { - setOperationAction(ISD::MUL, XLenVT, Expand); - setOperationAction(ISD::MULHS, XLenVT, Expand); - setOperationAction(ISD::MULHU, XLenVT, Expand); - setOperationAction(ISD::SDIV, XLenVT, Expand); - setOperationAction(ISD::UDIV, XLenVT, Expand); - setOperationAction(ISD::SREM, XLenVT, Expand); - setOperationAction(ISD::UREM, XLenVT, Expand); + setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU, ISD::SDIV, ISD::UDIV, + ISD::SREM, ISD::UREM}, + XLenVT, Expand); } else { if (Subtarget.is64Bit()) { - setOperationAction(ISD::MUL, MVT::i32, Custom); - setOperationAction(ISD::MUL, MVT::i128, Custom); - - setOperationAction(ISD::SDIV, MVT::i8, Custom); - setOperationAction(ISD::UDIV, MVT::i8, Custom); - setOperationAction(ISD::UREM, MVT::i8, Custom); - setOperationAction(ISD::SDIV, MVT::i16, Custom); - setOperationAction(ISD::UDIV, MVT::i16, Custom); - setOperationAction(ISD::UREM, MVT::i16, Custom); - setOperationAction(ISD::SDIV, MVT::i32, Custom); - setOperationAction(ISD::UDIV, MVT::i32, Custom); - setOperationAction(ISD::UREM, MVT::i32, Custom); + setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom); + + setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM}, + {MVT::i8, MVT::i16, MVT::i32}, Custom); } else { setOperationAction(ISD::MUL, MVT::i64, Custom); } } - setOperationAction(ISD::SDIVREM, XLenVT, Expand); - setOperationAction(ISD::UDIVREM, XLenVT, Expand); - setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); - setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); + setOperationAction( + {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT, + Expand); - setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); - setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); - setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); + setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT, + Custom); if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() || Subtarget.hasStdExtZbkb()) { - if (Subtarget.is64Bit()) { - setOperationAction(ISD::ROTL, MVT::i32, Custom); - setOperationAction(ISD::ROTR, MVT::i32, Custom); - } + if (Subtarget.is64Bit()) + setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); } else { - setOperationAction(ISD::ROTL, XLenVT, Expand); - setOperationAction(ISD::ROTR, XLenVT, Expand); + setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand); } if (Subtarget.hasStdExtZbp()) { // Custom lower bswap/bitreverse so we can convert them to GREVI to enable // more combining. - setOperationAction(ISD::BITREVERSE, XLenVT, Custom); - setOperationAction(ISD::BSWAP, XLenVT, Custom); - setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); + setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, XLenVT, Custom); + // BSWAP i8 doesn't exist. - setOperationAction(ISD::BITREVERSE, MVT::i16, Custom); - setOperationAction(ISD::BSWAP, MVT::i16, Custom); + setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); - if (Subtarget.is64Bit()) { - setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); - setOperationAction(ISD::BSWAP, MVT::i32, Custom); - } + setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, MVT::i16, Custom); + + if (Subtarget.is64Bit()) + setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, MVT::i32, Custom); } else { // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll // pattern match it directly in isel. @@ -288,36 +270,38 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } if (Subtarget.hasStdExtZbb()) { - setOperationAction(ISD::SMIN, XLenVT, Legal); - setOperationAction(ISD::SMAX, XLenVT, Legal); - setOperationAction(ISD::UMIN, XLenVT, Legal); - setOperationAction(ISD::UMAX, XLenVT, Legal); + setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT, + Legal); - if (Subtarget.is64Bit()) { - setOperationAction(ISD::CTTZ, MVT::i32, Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); - setOperationAction(ISD::CTLZ, MVT::i32, Custom); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); - } + if (Subtarget.is64Bit()) + setOperationAction( + {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, + MVT::i32, Custom); } else { - setOperationAction(ISD::CTTZ, XLenVT, Expand); - setOperationAction(ISD::CTLZ, XLenVT, Expand); - setOperationAction(ISD::CTPOP, XLenVT, Expand); + setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand); + + if (Subtarget.is64Bit()) + setOperationAction(ISD::ABS, MVT::i32, Custom); } if (Subtarget.hasStdExtZbt()) { - setOperationAction(ISD::FSHL, XLenVT, Custom); - setOperationAction(ISD::FSHR, XLenVT, Custom); + setOperationAction({ISD::FSHL, ISD::FSHR}, XLenVT, Custom); setOperationAction(ISD::SELECT, XLenVT, Legal); - if (Subtarget.is64Bit()) { - setOperationAction(ISD::FSHL, MVT::i32, Custom); - setOperationAction(ISD::FSHR, MVT::i32, Custom); - } + if (Subtarget.is64Bit()) + setOperationAction({ISD::FSHL, ISD::FSHR}, MVT::i32, Custom); } else { setOperationAction(ISD::SELECT, XLenVT, Custom); } + static constexpr ISD::NodeType FPLegalNodeTypes[] = { + ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT, + ISD::LLRINT, ISD::LROUND, ISD::LLROUND, + ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND, + ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD, + ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, + ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS}; + static const ISD::CondCode FPCCToExpand[] = { ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, @@ -331,50 +315,21 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITCAST, MVT::i16, Custom); if (Subtarget.hasStdExtZfh()) { - setOperationAction(ISD::FMINNUM, MVT::f16, Legal); - setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); - setOperationAction(ISD::LRINT, MVT::f16, Legal); - setOperationAction(ISD::LLRINT, MVT::f16, Legal); - setOperationAction(ISD::LROUND, MVT::f16, Legal); - setOperationAction(ISD::LLROUND, MVT::f16, Legal); - setOperationAction(ISD::STRICT_LRINT, MVT::f16, Legal); - setOperationAction(ISD::STRICT_LLRINT, MVT::f16, Legal); - setOperationAction(ISD::STRICT_LROUND, MVT::f16, Legal); - setOperationAction(ISD::STRICT_LLROUND, MVT::f16, Legal); - setOperationAction(ISD::STRICT_FADD, MVT::f16, Legal); - setOperationAction(ISD::STRICT_FMA, MVT::f16, Legal); - setOperationAction(ISD::STRICT_FSUB, MVT::f16, Legal); - setOperationAction(ISD::STRICT_FMUL, MVT::f16, Legal); - setOperationAction(ISD::STRICT_FDIV, MVT::f16, Legal); + for (auto NT : FPLegalNodeTypes) + setOperationAction(NT, MVT::f16, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Legal); - setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Legal); - setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Legal); - for (auto CC : FPCCToExpand) - setCondCodeAction(CC, MVT::f16, Expand); + setCondCodeAction(FPCCToExpand, MVT::f16, Expand); setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); setOperationAction(ISD::SELECT, MVT::f16, Custom); setOperationAction(ISD::BR_CC, MVT::f16, Expand); - setOperationAction(ISD::FREM, MVT::f16, Promote); - setOperationAction(ISD::FCEIL, MVT::f16, Promote); - setOperationAction(ISD::FFLOOR, MVT::f16, Promote); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); - setOperationAction(ISD::FRINT, MVT::f16, Promote); - setOperationAction(ISD::FROUND, MVT::f16, Promote); - setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); - setOperationAction(ISD::FTRUNC, MVT::f16, Promote); - setOperationAction(ISD::FPOW, MVT::f16, Promote); - setOperationAction(ISD::FPOWI, MVT::f16, Promote); - setOperationAction(ISD::FCOS, MVT::f16, Promote); - setOperationAction(ISD::FSIN, MVT::f16, Promote); - setOperationAction(ISD::FSINCOS, MVT::f16, Promote); - setOperationAction(ISD::FEXP, MVT::f16, Promote); - setOperationAction(ISD::FEXP2, MVT::f16, Promote); - setOperationAction(ISD::FLOG, MVT::f16, Promote); - setOperationAction(ISD::FLOG2, MVT::f16, Promote); - setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction({ISD::FREM, ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, + ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN, ISD::FTRUNC, + ISD::FPOW, ISD::FPOWI, ISD::FCOS, ISD::FSIN, + ISD::FSINCOS, ISD::FEXP, ISD::FEXP2, ISD::FLOG, + ISD::FLOG2, ISD::FLOG10}, + MVT::f16, Promote); // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have // complete support for all operations in LegalizeDAG. @@ -385,26 +340,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } if (Subtarget.hasStdExtF()) { - setOperationAction(ISD::FMINNUM, MVT::f32, Legal); - setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); - setOperationAction(ISD::LRINT, MVT::f32, Legal); - setOperationAction(ISD::LLRINT, MVT::f32, Legal); - setOperationAction(ISD::LROUND, MVT::f32, Legal); - setOperationAction(ISD::LLROUND, MVT::f32, Legal); - setOperationAction(ISD::STRICT_LRINT, MVT::f32, Legal); - setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Legal); - setOperationAction(ISD::STRICT_LROUND, MVT::f32, Legal); - setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); - for (auto CC : FPCCToExpand) - setCondCodeAction(CC, MVT::f32, Expand); + for (auto NT : FPLegalNodeTypes) + setOperationAction(NT, MVT::f32, Legal); + setCondCodeAction(FPCCToExpand, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Expand); @@ -418,28 +356,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITCAST, MVT::i32, Custom); if (Subtarget.hasStdExtD()) { - setOperationAction(ISD::FMINNUM, MVT::f64, Legal); - setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); - setOperationAction(ISD::LRINT, MVT::f64, Legal); - setOperationAction(ISD::LLRINT, MVT::f64, Legal); - setOperationAction(ISD::LROUND, MVT::f64, Legal); - setOperationAction(ISD::LLROUND, MVT::f64, Legal); - setOperationAction(ISD::STRICT_LRINT, MVT::f64, Legal); - setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Legal); - setOperationAction(ISD::STRICT_LROUND, MVT::f64, Legal); - setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); + for (auto NT : FPLegalNodeTypes) + setOperationAction(NT, MVT::f64, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); - for (auto CC : FPCCToExpand) - setCondCodeAction(CC, MVT::f64, Expand); + setCondCodeAction(FPCCToExpand, MVT::f64, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::BR_CC, MVT::f64, Expand); @@ -451,40 +372,38 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::f64, MVT::f16, Expand); } - if (Subtarget.is64Bit()) { - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); - } + if (Subtarget.is64Bit()) + setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT, + ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT}, + MVT::i32, Custom); if (Subtarget.hasStdExtF()) { - setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom); - setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom); + setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT, + Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, XLenVT, Legal); - setOperationAction(ISD::STRICT_FP_TO_SINT, XLenVT, Legal); - setOperationAction(ISD::STRICT_UINT_TO_FP, XLenVT, Legal); - setOperationAction(ISD::STRICT_SINT_TO_FP, XLenVT, Legal); + setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, + ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, + XLenVT, Legal); setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom); setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); } - setOperationAction(ISD::GlobalAddress, XLenVT, Custom); - setOperationAction(ISD::BlockAddress, XLenVT, Custom); - setOperationAction(ISD::ConstantPool, XLenVT, Custom); - setOperationAction(ISD::JumpTable, XLenVT, Custom); + setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, + ISD::JumpTable}, + XLenVT, Custom); setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::Constant, MVT::i64, Custom); + // TODO: On M-mode only targets, the cycle[h] CSR may not be present. // Unfortunately this can't be determined just from the ISA naming string. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Subtarget.is64Bit() ? Legal : Custom); - setOperationAction(ISD::TRAP, MVT::Other, Legal); - setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); + setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); if (Subtarget.is64Bit()) setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); @@ -505,19 +424,16 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // RVV intrinsics may have illegal operands. // We also need to custom legalize vmv.x.s. - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); - if (Subtarget.is64Bit()) { + setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN}, + {MVT::i8, MVT::i16}, Custom); + if (Subtarget.is64Bit()) setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); - } else { - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); - } + else + setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN}, + MVT::i64, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, + MVT::Other, Custom); static const unsigned IntegerVPOps[] = { ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, @@ -527,191 +443,175 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, - ISD::VP_MERGE, ISD::VP_SELECT}; + ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FPTOSI, + ISD::VP_FPTOUI, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, + ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE}; static const unsigned FloatingPointVPOps[] = { - ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, - ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, - ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, - ISD::VP_SELECT}; + ISD::VP_FADD, ISD::VP_FSUB, + ISD::VP_FMUL, ISD::VP_FDIV, + ISD::VP_FNEG, ISD::VP_FMA, + ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, + ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, + ISD::VP_MERGE, ISD::VP_SELECT, + ISD::VP_SITOFP, ISD::VP_UITOFP, + ISD::VP_SETCC, ISD::VP_FP_ROUND, + ISD::VP_FP_EXTEND}; if (!Subtarget.is64Bit()) { // We must custom-lower certain vXi64 operations on RV32 due to the vector // element type being illegal. - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); - - setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); - setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); - setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); - setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); - setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); - setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); - setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); - setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); - - setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom); - setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom); - setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom); - setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom); - setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom); - setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom); - setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom); - setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom); + setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, + MVT::i64, Custom); + + setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, + ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, + ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, + ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}, + MVT::i64, Custom); + + setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, + ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, + ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN, + ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN}, + MVT::i64, Custom); } for (MVT VT : BoolVecVTs) { + if (!isTypeLegal(VT)) + continue; + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); // Mask VTs are custom-expanded into a series of standard nodes - setOperationAction(ISD::TRUNCATE, VT, Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS, + ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, + VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, + Custom); setOperationAction(ISD::SELECT, VT, Custom); - setOperationAction(ISD::SELECT_CC, VT, Expand); - setOperationAction(ISD::VSELECT, VT, Expand); - setOperationAction(ISD::VP_MERGE, VT, Expand); - setOperationAction(ISD::VP_SELECT, VT, Expand); + setOperationAction( + {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT, + Expand); - setOperationAction(ISD::VP_AND, VT, Custom); - setOperationAction(ISD::VP_OR, VT, Custom); - setOperationAction(ISD::VP_XOR, VT, Custom); + setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom); - setOperationAction(ISD::VECREDUCE_AND, VT, Custom); - setOperationAction(ISD::VECREDUCE_OR, VT, Custom); - setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); + setOperationAction( + {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, + Custom); - setOperationAction(ISD::VP_REDUCE_AND, VT, Custom); - setOperationAction(ISD::VP_REDUCE_OR, VT, Custom); - setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom); + setOperationAction( + {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, + Custom); // RVV has native int->float & float->int conversions where the // element type sizes are within one power-of-two of each other. Any // wider distances between type sizes have to be lowered as sequences // which progressively narrow the gap in stages. - setOperationAction(ISD::SINT_TO_FP, VT, Custom); - setOperationAction(ISD::UINT_TO_FP, VT, Custom); - setOperationAction(ISD::FP_TO_SINT, VT, Custom); - setOperationAction(ISD::FP_TO_UINT, VT, Custom); + setOperationAction( + {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}, + VT, Custom); // Expand all extending loads to types larger than this, and truncating // stores from types larger than this. for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { setTruncStoreAction(OtherVT, VT, Expand); - setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); - setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); - setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT, + VT, Expand); } + + setOperationAction( + {ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_TRUNCATE, ISD::VP_SETCC}, VT, + Custom); + setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); } for (MVT VT : IntVecVTs) { - if (VT.getVectorElementType() == MVT::i64 && - !Subtarget.hasVInstructionsI64()) + if (!isTypeLegal(VT)) continue; setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); // Vectors implement MULHS/MULHU. - setOperationAction(ISD::SMUL_LOHI, VT, Expand); - setOperationAction(ISD::UMUL_LOHI, VT, Expand); + setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand); // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*. - if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) { - setOperationAction(ISD::MULHU, VT, Expand); - setOperationAction(ISD::MULHS, VT, Expand); - } + if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) + setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand); - setOperationAction(ISD::SMIN, VT, Legal); - setOperationAction(ISD::SMAX, VT, Legal); - setOperationAction(ISD::UMIN, VT, Legal); - setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT, + Legal); - setOperationAction(ISD::ROTL, VT, Expand); - setOperationAction(ISD::ROTR, VT, Expand); + setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand); - setOperationAction(ISD::CTTZ, VT, Expand); - setOperationAction(ISD::CTLZ, VT, Expand); - setOperationAction(ISD::CTPOP, VT, Expand); + setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP, ISD::BSWAP}, VT, + Expand); setOperationAction(ISD::BSWAP, VT, Expand); // Custom-lower extensions and truncations from/to mask types. - setOperationAction(ISD::ANY_EXTEND, VT, Custom); - setOperationAction(ISD::SIGN_EXTEND, VT, Custom); - setOperationAction(ISD::ZERO_EXTEND, VT, Custom); + setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, + VT, Custom); // RVV has native int->float & float->int conversions where the // element type sizes are within one power-of-two of each other. Any // wider distances between type sizes have to be lowered as sequences // which progressively narrow the gap in stages. - setOperationAction(ISD::SINT_TO_FP, VT, Custom); - setOperationAction(ISD::UINT_TO_FP, VT, Custom); - setOperationAction(ISD::FP_TO_SINT, VT, Custom); - setOperationAction(ISD::FP_TO_UINT, VT, Custom); + setOperationAction( + {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}, + VT, Custom); - setOperationAction(ISD::SADDSAT, VT, Legal); - setOperationAction(ISD::UADDSAT, VT, Legal); - setOperationAction(ISD::SSUBSAT, VT, Legal); - setOperationAction(ISD::USUBSAT, VT, Legal); + setOperationAction( + {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal); // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" // nodes which truncate by one power of two at a time. setOperationAction(ISD::TRUNCATE, VT, Custom); // Custom-lower insert/extract operations to simplify patterns. - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, + Custom); // Custom-lower reduction operations to set up the corresponding custom // nodes' operands. - setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); - setOperationAction(ISD::VECREDUCE_AND, VT, Custom); - setOperationAction(ISD::VECREDUCE_OR, VT, Custom); - setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); - setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); - setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); - setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); - setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); - - for (unsigned VPOpc : IntegerVPOps) - setOperationAction(VPOpc, VT, Custom); - - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); - - setOperationAction(ISD::MLOAD, VT, Custom); - setOperationAction(ISD::MSTORE, VT, Custom); - setOperationAction(ISD::MGATHER, VT, Custom); - setOperationAction(ISD::MSCATTER, VT, Custom); - - setOperationAction(ISD::VP_LOAD, VT, Custom); - setOperationAction(ISD::VP_STORE, VT, Custom); - setOperationAction(ISD::VP_GATHER, VT, Custom); - setOperationAction(ISD::VP_SCATTER, VT, Custom); - - setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, + ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, + ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, + ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}, + VT, Custom); + + setOperationAction(IntegerVPOps, VT, Custom); + + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + + setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, + VT, Custom); + + setOperationAction( + {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT, + Custom); + + setOperationAction( + {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, + VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); - setOperationAction(ISD::STEP_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); + setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom); for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { setTruncStoreAction(VT, OtherVT, Expand); - setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); - setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); - setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT, + VT, Expand); } + // Splice + setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); + // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point // type that can represent the value exactly. if (VT.getVectorElementType() != MVT::i64) { @@ -719,8 +619,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32; EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); if (isTypeLegal(FloatVT)) { - setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); + setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, + Custom); } } } @@ -745,21 +645,35 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // sizes are within one power-of-two of each other. Therefore conversions // between vXf16 and vXf64 must be lowered as sequences which convert via // vXf32. - setOperationAction(ISD::FP_ROUND, VT, Custom); - setOperationAction(ISD::FP_EXTEND, VT, Custom); + setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); // Custom-lower insert/extract operations to simplify patterns. - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, + Custom); // Expand various condition codes (explained above). - for (auto CC : VFPCCToExpand) - setCondCodeAction(CC, VT, Expand); - - setOperationAction(ISD::FMINNUM, VT, Legal); - setOperationAction(ISD::FMAXNUM, VT, Legal); - - setOperationAction(ISD::FTRUNC, VT, Custom); - setOperationAction(ISD::FCEIL, VT, Custom); - setOperationAction(ISD::FFLOOR, VT, Custom); + setCondCodeAction(VFPCCToExpand, VT, Expand); + + setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal); + + setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND}, + VT, Custom); + + setOperationAction({ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, + ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX}, + VT, Custom); + + // Expand FP operations that need libcalls. + setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FPOW, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); + setOperationAction(ISD::FEXP, VT, Expand); + setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FLOG, VT, Expand); + setOperationAction(ISD::FLOG2, VT, Expand); + setOperationAction(ISD::FLOG10, VT, Expand); + setOperationAction(ISD::FRINT, VT, Expand); + setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); @@ -768,30 +682,25 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, VT, Legal); - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); - setOperationAction(ISD::MLOAD, VT, Custom); - setOperationAction(ISD::MSTORE, VT, Custom); - setOperationAction(ISD::MGATHER, VT, Custom); - setOperationAction(ISD::MSCATTER, VT, Custom); + setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, + VT, Custom); - setOperationAction(ISD::VP_LOAD, VT, Custom); - setOperationAction(ISD::VP_STORE, VT, Custom); - setOperationAction(ISD::VP_GATHER, VT, Custom); - setOperationAction(ISD::VP_SCATTER, VT, Custom); + setOperationAction( + {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT, + Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); - setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction( + {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, + VT, Custom); - setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); + setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom); - for (unsigned VPOpc : FloatingPointVPOps) - setOperationAction(VPOpc, VT, Custom); + setOperationAction(FloatingPointVPOps, VT, Custom); }; // Sets common extload/truncstore actions on RVV floating-point vector @@ -804,21 +713,31 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } }; - if (Subtarget.hasVInstructionsF16()) - for (MVT VT : F16VecVTs) + if (Subtarget.hasVInstructionsF16()) { + for (MVT VT : F16VecVTs) { + if (!isTypeLegal(VT)) + continue; SetCommonVFPActions(VT); + } + } - for (MVT VT : F32VecVTs) { - if (Subtarget.hasVInstructionsF32()) + if (Subtarget.hasVInstructionsF32()) { + for (MVT VT : F32VecVTs) { + if (!isTypeLegal(VT)) + continue; SetCommonVFPActions(VT); - SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); + SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); + } } - for (MVT VT : F64VecVTs) { - if (Subtarget.hasVInstructionsF64()) + if (Subtarget.hasVInstructionsF64()) { + for (MVT VT : F64VecVTs) { + if (!isTypeLegal(VT)) + continue; SetCommonVFPActions(VT); - SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); - SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); + SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); + SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); + } } if (Subtarget.useRVVForFixedLengthVectors()) { @@ -831,23 +750,21 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(Op, VT, Expand); for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, OtherVT, Expand); - setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); - setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); - setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, + OtherVT, VT, Expand); } // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, + Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); + setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT, + Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, + VT, Custom); - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); @@ -857,100 +774,80 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITCAST, VT, Custom); - setOperationAction(ISD::VECREDUCE_AND, VT, Custom); - setOperationAction(ISD::VECREDUCE_OR, VT, Custom); - setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); + setOperationAction( + {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, + Custom); - setOperationAction(ISD::VP_REDUCE_AND, VT, Custom); - setOperationAction(ISD::VP_REDUCE_OR, VT, Custom); - setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom); + setOperationAction( + {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, + Custom); - setOperationAction(ISD::SINT_TO_FP, VT, Custom); - setOperationAction(ISD::UINT_TO_FP, VT, Custom); - setOperationAction(ISD::FP_TO_SINT, VT, Custom); - setOperationAction(ISD::FP_TO_UINT, VT, Custom); + setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, + ISD::FP_TO_UINT}, + VT, Custom); // Operations below are different for between masks and other vectors. if (VT.getVectorElementType() == MVT::i1) { - setOperationAction(ISD::VP_AND, VT, Custom); - setOperationAction(ISD::VP_OR, VT, Custom); - setOperationAction(ISD::VP_XOR, VT, Custom); - setOperationAction(ISD::AND, VT, Custom); - setOperationAction(ISD::OR, VT, Custom); - setOperationAction(ISD::XOR, VT, Custom); + setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND, + ISD::OR, ISD::XOR}, + VT, Custom); + + setOperationAction( + {ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_SETCC, ISD::VP_TRUNCATE}, + VT, Custom); continue; } - // Use SPLAT_VECTOR to prevent type legalization from destroying the - // splats when type legalizing i64 scalar on RV32. + // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to + // it before type legalization for i64 vectors on RV32. It will then be + // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle. // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs // improvements first. if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) { - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); } setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::MLOAD, VT, Custom); - setOperationAction(ISD::MSTORE, VT, Custom); - setOperationAction(ISD::MGATHER, VT, Custom); - setOperationAction(ISD::MSCATTER, VT, Custom); - - setOperationAction(ISD::VP_LOAD, VT, Custom); - setOperationAction(ISD::VP_STORE, VT, Custom); - setOperationAction(ISD::VP_GATHER, VT, Custom); - setOperationAction(ISD::VP_SCATTER, VT, Custom); - - setOperationAction(ISD::ADD, VT, Custom); - setOperationAction(ISD::MUL, VT, Custom); - setOperationAction(ISD::SUB, VT, Custom); - setOperationAction(ISD::AND, VT, Custom); - setOperationAction(ISD::OR, VT, Custom); - setOperationAction(ISD::XOR, VT, Custom); - setOperationAction(ISD::SDIV, VT, Custom); - setOperationAction(ISD::SREM, VT, Custom); - setOperationAction(ISD::UDIV, VT, Custom); - setOperationAction(ISD::UREM, VT, Custom); - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); - setOperationAction(ISD::SRL, VT, Custom); - - setOperationAction(ISD::SMIN, VT, Custom); - setOperationAction(ISD::SMAX, VT, Custom); - setOperationAction(ISD::UMIN, VT, Custom); - setOperationAction(ISD::UMAX, VT, Custom); - setOperationAction(ISD::ABS, VT, Custom); + setOperationAction( + {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom); + + setOperationAction( + {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT, + Custom); + + setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR, + ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV, + ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL}, + VT, Custom); + + setOperationAction( + {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom); // vXi64 MULHS/MULHU requires the V extension instead of Zve64*. - if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) { - setOperationAction(ISD::MULHS, VT, Custom); - setOperationAction(ISD::MULHU, VT, Custom); - } + if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) + setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom); - setOperationAction(ISD::SADDSAT, VT, Custom); - setOperationAction(ISD::UADDSAT, VT, Custom); - setOperationAction(ISD::SSUBSAT, VT, Custom); - setOperationAction(ISD::USUBSAT, VT, Custom); + setOperationAction( + {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, + Custom); setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); - setOperationAction(ISD::ANY_EXTEND, VT, Custom); - setOperationAction(ISD::SIGN_EXTEND, VT, Custom); - setOperationAction(ISD::ZERO_EXTEND, VT, Custom); + setOperationAction( + {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom); // Custom-lower reduction operations to set up the corresponding custom // nodes' operands. - setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); - setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); - setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); - setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); - setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); + setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX, + ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX, + ISD::VECREDUCE_UMIN}, + VT, Custom); - for (unsigned VPOpc : IntegerVPOps) - setOperationAction(VPOpc, VT, Custom); + setOperationAction(IntegerVPOps, VT, Custom); // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point // type that can represent the value exactly. @@ -959,10 +856,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32; EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); - if (isTypeLegal(FloatVT)) { - setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); - } + if (isTypeLegal(FloatVT)) + setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, + Custom); } } @@ -979,69 +875,50 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, + Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); - setOperationAction(ISD::MLOAD, VT, Custom); - setOperationAction(ISD::MSTORE, VT, Custom); - setOperationAction(ISD::MGATHER, VT, Custom); - setOperationAction(ISD::MSCATTER, VT, Custom); - - setOperationAction(ISD::VP_LOAD, VT, Custom); - setOperationAction(ISD::VP_STORE, VT, Custom); - setOperationAction(ISD::VP_GATHER, VT, Custom); - setOperationAction(ISD::VP_SCATTER, VT, Custom); - - setOperationAction(ISD::FADD, VT, Custom); - setOperationAction(ISD::FSUB, VT, Custom); - setOperationAction(ISD::FMUL, VT, Custom); - setOperationAction(ISD::FDIV, VT, Custom); - setOperationAction(ISD::FNEG, VT, Custom); - setOperationAction(ISD::FABS, VT, Custom); - setOperationAction(ISD::FCOPYSIGN, VT, Custom); - setOperationAction(ISD::FSQRT, VT, Custom); - setOperationAction(ISD::FMA, VT, Custom); - setOperationAction(ISD::FMINNUM, VT, Custom); - setOperationAction(ISD::FMAXNUM, VT, Custom); - - setOperationAction(ISD::FP_ROUND, VT, Custom); - setOperationAction(ISD::FP_EXTEND, VT, Custom); - - setOperationAction(ISD::FTRUNC, VT, Custom); - setOperationAction(ISD::FCEIL, VT, Custom); - setOperationAction(ISD::FFLOOR, VT, Custom); + setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, + ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT, + ISD::EXTRACT_VECTOR_ELT}, + VT, Custom); + + setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, + ISD::MGATHER, ISD::MSCATTER}, + VT, Custom); + + setOperationAction( + {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT, + Custom); + + setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, + ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT, + ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM}, + VT, Custom); + + setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); + + setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND}, + VT, Custom); for (auto CC : VFPCCToExpand) setCondCodeAction(CC, VT, Expand); - setOperationAction(ISD::VSELECT, VT, Custom); - setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::BITCAST, VT, Custom); - setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); - setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); - setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); - setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); + setOperationAction({ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, + ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX}, + VT, Custom); - for (unsigned VPOpc : FloatingPointVPOps) - setOperationAction(VPOpc, VT, Custom); + setOperationAction(FloatingPointVPOps, VT, Custom); } // Custom-legalize bitcasts from fixed-length vectors to scalar types. - setOperationAction(ISD::BITCAST, MVT::i8, Custom); - setOperationAction(ISD::BITCAST, MVT::i16, Custom); - setOperationAction(ISD::BITCAST, MVT::i32, Custom); - setOperationAction(ISD::BITCAST, MVT::i64, Custom); + setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64}, + Custom); if (Subtarget.hasStdExtZfh()) setOperationAction(ISD::BITCAST, MVT::f16, Custom); if (Subtarget.hasStdExtF()) @@ -1061,30 +938,33 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Jumps are expensive, compared to logic setJumpIsExpensive(); - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::SUB); - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::XOR); - setTargetDAGCombine(ISD::ANY_EXTEND); - if (Subtarget.hasStdExtF()) { - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::FP_TO_SINT); - setTargetDAGCombine(ISD::FP_TO_UINT); - setTargetDAGCombine(ISD::FP_TO_SINT_SAT); - setTargetDAGCombine(ISD::FP_TO_UINT_SAT); - } - if (Subtarget.hasVInstructions()) { - setTargetDAGCombine(ISD::FCOPYSIGN); - setTargetDAGCombine(ISD::MGATHER); - setTargetDAGCombine(ISD::MSCATTER); - setTargetDAGCombine(ISD::VP_GATHER); - setTargetDAGCombine(ISD::VP_SCATTER); + setTargetDAGCombine({ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, + ISD::OR, ISD::XOR}); + if (Subtarget.is64Bit()) setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::STORE); - } + + if (Subtarget.hasStdExtF()) + setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM}); + + if (Subtarget.hasStdExtZbp()) + setTargetDAGCombine({ISD::ROTL, ISD::ROTR}); + + if (Subtarget.hasStdExtZbb()) + setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}); + + if (Subtarget.hasStdExtZbkb()) + setTargetDAGCombine(ISD::BITREVERSE); + if (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZbb()) + setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); + if (Subtarget.hasStdExtF()) + setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT, + ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}); + if (Subtarget.hasVInstructions()) + setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER, + ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL, + ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR}); + if (Subtarget.useRVVForFixedLengthVectors()) + setTargetDAGCombine(ISD::BITCAST); setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); @@ -1149,6 +1029,24 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.size = MemoryLocation::UnknownSize; Info.flags |= MachineMemOperand::MOStore; return true; + case Intrinsic::riscv_seg2_load: + case Intrinsic::riscv_seg3_load: + case Intrinsic::riscv_seg4_load: + case Intrinsic::riscv_seg5_load: + case Intrinsic::riscv_seg6_load: + case Intrinsic::riscv_seg7_load: + case Intrinsic::riscv_seg8_load: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.ptrVal = I.getArgOperand(0); + Info.memVT = + getValueType(DL, I.getType()->getStructElementType(0)->getScalarType()); + Info.align = + Align(DL.getTypeSizeInBits( + I.getType()->getStructElementType(0)->getScalarType()) / + 8); + Info.size = MemoryLocation::UnknownSize; + Info.flags |= MachineMemOperand::MOLoad; + return true; } } @@ -1160,6 +1058,10 @@ bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, if (AM.BaseGV) return false; + // RVV instructions only support register addressing. + if (Subtarget.hasVInstructions() && isa(Ty)) + return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs; + // Require a 12-bit signed offset. if (!isInt<12>(AM.BaseOffs)) return false; @@ -1225,6 +1127,10 @@ bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; } +bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const { + return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32); +} + bool RISCVTargetLowering::isCheapToSpeculateCttz() const { return Subtarget.hasStdExtZbb(); } @@ -1245,6 +1151,36 @@ bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const { !isa(Y); } +bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const { + // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position. + auto *C = dyn_cast(Y); + return C && C->getAPIntValue().ule(10); +} + +bool RISCVTargetLowering:: + shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const { + // One interesting pattern that we'd want to form is 'bit extract': + // ((1 >> Y) & 1) ==/!= 0 + // But we also need to be careful not to try to reverse that fold. + + // Is this '((1 >> Y) & 1)'? + if (XC && OldShiftOpcode == ISD::SRL && XC->isOne()) + return false; // Keep the 'bit extract' pattern. + + // Will this be '((1 >> Y) & 1)' after the transform? + if (NewShiftOpcode == ISD::SRL && CC->isOne()) + return true; // Do form the 'bit extract' pattern. + + // If 'X' is a constant, and we transform, then we will immediately + // try to undo the fold, thus causing endless combine loop. + // So only do the transform if X is not a constant. This matches the default + // implementation of this function. + return !XC; +} + /// Check if sinking \p I's operands to I's basic block is profitable, because /// the operands can be folded into a target instruction, e.g. /// splats of scalars can fold into vector instructions. @@ -1282,6 +1218,7 @@ bool RISCVTargetLowering::shouldSinkOperands( if (auto *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { case Intrinsic::fma: + case Intrinsic::vp_fma: return Operand == 0 || Operand == 1; // FIXME: Our patterns can only match vx/vf instructions when the splat // it on the RHS, because TableGen doesn't recognize our VP operations @@ -1345,6 +1282,15 @@ bool RISCVTargetLowering::shouldSinkOperands( return true; } +bool RISCVTargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { + // In order to maximise the opportunity for common subexpression elimination, + // keep a separate ADD node for the global address offset instead of folding + // it in the global address node. Later peephole optimisations may choose to + // fold it back in when profitable. + return false; +} + bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin. @@ -1583,7 +1529,7 @@ static bool useRVVForFixedLengthVectorVT(MVT VT, if (VT.getFixedSizeInBits() > 1024 * 8) return false; - unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits(); + unsigned MinVLen = Subtarget.getRealMinVLen(); MVT EltVT = VT.getVectorElementType(); @@ -1621,7 +1567,7 @@ static bool useRVVForFixedLengthVectorVT(MVT VT, } // Reject elements larger than ELEN. - if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors()) + if (EltVT.getSizeInBits() > Subtarget.getELEN()) return false; unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen); @@ -1649,8 +1595,8 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT, useRVVForFixedLengthVectorVT(VT, Subtarget)) && "Expected legal fixed length vector!"); - unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits(); - unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors(); + unsigned MinVLen = Subtarget.getRealMinVLen(); + unsigned MaxELen = Subtarget.getELEN(); MVT EltVT = VT.getVectorElementType(); switch (EltVT.SimpleTy) { @@ -1710,6 +1656,23 @@ static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); } +/// Return the type of the mask type suitable for masking the provided +/// vector type. This is simply an i1 element type vector of the same +/// (possibly scalable) length. +static MVT getMaskTypeFor(EVT VecVT) { + assert(VecVT.isVector()); + ElementCount EC = VecVT.getVectorElementCount(); + return MVT::getVectorVT(MVT::i1, EC); +} + +/// Creates an all ones mask suitable for masking a vector of type VecTy with +/// vector length VL. . +static SDValue getAllOnesMask(MVT VecVT, SDValue VL, SDLoc DL, + SelectionDAG &DAG) { + MVT MaskVT = getMaskTypeFor(VecVT); + return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); +} + // Gets the two common "VL" operands: an all-ones mask and the vector length. // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is // the vector type that it is contained in. @@ -1720,9 +1683,8 @@ getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, MVT XLenVT = Subtarget.getXLenVT(); SDValue VL = VecVT.isFixedLengthVector() ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT) - : DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); - MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); - SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); + : DAG.getRegister(RISCV::X0, XLenVT); + SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); return {Mask, VL}; } @@ -1747,14 +1709,6 @@ bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( return false; } -bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { - // Only splats are currently supported. - if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) - return true; - - return false; -} - static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { // RISCV FP-to-int conversions saturate to the destination register size, but @@ -1796,7 +1750,7 @@ static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); // Freeze the source since we are increasing the number of uses. - SDValue Src = DAG.getNode(ISD::FREEZE, DL, VT, Op.getOperand(0)); + SDValue Src = DAG.getFreeze(Op.getOperand(0)); // Truncate to integer and convert back to FP. MVT IntVT = VT.changeVectorElementTypeToInteger(); @@ -1844,21 +1798,56 @@ static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) { return DAG.getSelect(DL, VT, Setcc, Truncated, Src); } -static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { +// ISD::FROUND is defined to round to nearest with ties rounding away from 0. +// This mode isn't supported in vector hardware on RISCV. But as long as we +// aren't compiling with trapping math, we can emulate this with +// floor(X + copysign(nextafter(0.5, 0.0), X)). +// FIXME: Could be shorter by changing rounding mode, but we don't have FRM +// dependencies modeled yet. +// FIXME: Use masked operations to avoid final merge. +static SDValue lowerFROUND(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); - assert(VT.isFixedLengthVector() && "Unexpected vector!"); - - MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + assert(VT.isVector() && "Unexpected type"); SDLoc DL(Op); - SDValue Mask, VL; - std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); - unsigned Opc = - VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; - SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL); - return convertFromScalableVector(VT, Splat, DAG, Subtarget); + // Freeze the source since we are increasing the number of uses. + SDValue Src = DAG.getFreeze(Op.getOperand(0)); + + // We do the conversion on the absolute value and fix the sign at the end. + SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src); + + const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); + bool Ignored; + APFloat Point5Pred = APFloat(0.5f); + Point5Pred.convert(FltSem, APFloat::rmNearestTiesToEven, &Ignored); + Point5Pred.next(/*nextDown*/ true); + + // Add the adjustment. + SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Abs, + DAG.getConstantFP(Point5Pred, DL, VT)); + + // Truncate to integer and convert back to fp. + MVT IntVT = VT.changeVectorElementTypeToInteger(); + SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Adjust); + Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated); + + // Restore the original sign. + Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src); + + // Determine the largest integer that can be represented exactly. This and + // values larger than it don't have any fractional bits so don't need to + // be converted. + unsigned Precision = APFloat::semanticsPrecision(FltSem); + APFloat MaxVal = APFloat(FltSem); + MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), + /*IsSigned*/ false, APFloat::rmNearestTiesToEven); + SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT); + + // If abs(Src) was larger than MaxVal or nan, keep it. + MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); + SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT); + return DAG.getSelect(DL, VT, Setcc, Truncated, Src); } struct VIDSequence { @@ -1908,37 +1897,27 @@ static Optional isSimpleVIDSequence(SDValue Op) { // A zero-value value difference means that we're somewhere in the middle // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a // step change before evaluating the sequence. - if (ValDiff != 0) { - int64_t Remainder = ValDiff % IdxDiff; - // Normalize the step if it's greater than 1. - if (Remainder != ValDiff) { - // The difference must cleanly divide the element span. - if (Remainder != 0) - return None; - ValDiff /= IdxDiff; - IdxDiff = 1; - } - - if (!SeqStepNum) - SeqStepNum = ValDiff; - else if (ValDiff != SeqStepNum) - return None; + if (ValDiff == 0) + continue; - if (!SeqStepDenom) - SeqStepDenom = IdxDiff; - else if (IdxDiff != *SeqStepDenom) + int64_t Remainder = ValDiff % IdxDiff; + // Normalize the step if it's greater than 1. + if (Remainder != ValDiff) { + // The difference must cleanly divide the element span. + if (Remainder != 0) return None; + ValDiff /= IdxDiff; + IdxDiff = 1; } - } - // Record and/or check any addend. - if (SeqStepNum && SeqStepDenom) { - uint64_t ExpectedVal = - (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; - int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits); - if (!SeqAddend) - SeqAddend = Addend; - else if (SeqAddend != Addend) + if (!SeqStepNum) + SeqStepNum = ValDiff; + else if (ValDiff != SeqStepNum) + return None; + + if (!SeqStepDenom) + SeqStepDenom = IdxDiff; + else if (IdxDiff != *SeqStepDenom) return None; } @@ -1946,14 +1925,68 @@ static Optional isSimpleVIDSequence(SDValue Op) { if (!PrevElt || PrevElt->first != Val) PrevElt = std::make_pair(Val, Idx); } - // We need to have logged both a step and an addend for this to count as - // a legal index sequence. - if (!SeqStepNum || !SeqStepDenom || !SeqAddend) + + // We need to have logged a step for this to count as a legal index sequence. + if (!SeqStepNum || !SeqStepDenom) return None; + // Loop back through the sequence and validate elements we might have skipped + // while waiting for a valid step. While doing this, log any sequence addend. + for (unsigned Idx = 0; Idx < NumElts; Idx++) { + if (Op.getOperand(Idx).isUndef()) + continue; + uint64_t Val = Op.getConstantOperandVal(Idx) & + maskTrailingOnes(EltSizeInBits); + uint64_t ExpectedVal = + (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; + int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits); + if (!SeqAddend) + SeqAddend = Addend; + else if (Addend != SeqAddend) + return None; + } + + assert(SeqAddend && "Must have an addend if we have a step"); + return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend}; } +// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT +// and lower it as a VRGATHER_VX_VL from the source vector. +static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, + SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + SDValue Vec = SplatVal.getOperand(0); + // Only perform this optimization on vectors of the same size for simplicity. + // Don't perform this optimization for i1 vectors. + // FIXME: Support i1 vectors, maybe by promoting to i8? + if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1) + return SDValue(); + SDValue Idx = SplatVal.getOperand(1); + // The index must be a legal type. + if (Idx.getValueType() != Subtarget.getXLenVT()) + return SDValue(); + + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + } + + SDValue Mask, VL; + std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + + SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec, + Idx, Mask, DAG.getUNDEF(ContainerVT), VL); + + if (!VT.isFixedLengthVector()) + return Gather; + + return convertFromScalableVector(VT, Gather, DAG, Subtarget); +} + static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); @@ -1989,8 +2022,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // codegen across RV32 and RV64. unsigned NumViaIntegerBits = std::min(std::max(NumElts, 8u), Subtarget.getXLen()); - NumViaIntegerBits = std::min(NumViaIntegerBits, - Subtarget.getMaxELENForFixedLengthVectors()); + NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN()); if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { // If we have to use more than one INSERT_VECTOR_ELT then this // optimization is likely to increase code size; avoid peforming it in @@ -2012,7 +2044,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // our vector and clear our accumulated data. if (I != 0 && I % NumViaIntegerBits == 0) { if (NumViaIntegerBits <= 32) - Bits = SignExtend64(Bits, 32); + Bits = SignExtend64<32>(Bits); SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT)); @@ -2028,7 +2060,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // Insert the (remaining) scalar value into position in our integer // vector type. if (NumViaIntegerBits <= 32) - Bits = SignExtend64(Bits, 32); + Bits = SignExtend64<32>(Bits); SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT)); @@ -2077,9 +2109,12 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } if (SDValue Splat = cast(Op)->getSplatValue()) { + if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget)) + return Gather; unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; - Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); + Splat = + DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL); return convertFromScalableVector(VT, Splat, DAG, Subtarget); } @@ -2109,7 +2144,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // a single addi instruction. if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) || (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) && - isPowerOf2_32(StepDenominator) && isInt<5>(Addend)) { + isPowerOf2_32(StepDenominator) && + (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) { SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); // Convert right out of the scalable type so we can use standard ISD // nodes for the rest of the computation. If we used scalable types with @@ -2118,18 +2154,18 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, VID = convertFromScalableVector(VT, VID, DAG, Subtarget); if ((StepOpcode == ISD::MUL && SplatStepVal != 1) || (StepOpcode == ISD::SHL && SplatStepVal != 0)) { - SDValue SplatStep = DAG.getSplatVector( + SDValue SplatStep = DAG.getSplatBuildVector( VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT)); VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep); } if (StepDenominator != 1) { - SDValue SplatStep = DAG.getSplatVector( + SDValue SplatStep = DAG.getSplatBuildVector( VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT)); VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep); } if (Addend != 0 || Negate) { - SDValue SplatAddend = - DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT)); + SDValue SplatAddend = DAG.getSplatBuildVector( + VT, DL, DAG.getConstant(Addend, DL, XLenVT)); VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID); } return VID; @@ -2172,7 +2208,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // On RV64, sign-extend from 32 to 64 bits where possible in order to // achieve better constant materializion. if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) - SplatValue = SignExtend64(SplatValue, 32); + SplatValue = SignExtend64<32>(SplatValue); // Since we can't introduce illegal i64 types at this stage, we can only // perform an i64 splat on RV32 if it is its own sign-extended value. That @@ -2187,6 +2223,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget); SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT, + DAG.getUNDEF(ViaContainerVT), DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); return DAG.getBitcast(VT, Splat); @@ -2274,57 +2311,66 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, return SDValue(); } -static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo, - SDValue Hi, SDValue VL, SelectionDAG &DAG) { +static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, + SDValue Lo, SDValue Hi, SDValue VL, + SelectionDAG &DAG) { + if (!Passthru) + Passthru = DAG.getUNDEF(VT); if (isa(Lo) && isa(Hi)) { int32_t LoC = cast(Lo)->getSExtValue(); int32_t HiC = cast(Hi)->getSExtValue(); // If Hi constant is all the same sign bit as Lo, lower this as a custom // node in order to try and match RVV vector/scalar instructions. if ((LoC >> 31) == HiC) - return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL); + return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); - // If vl is equal to VLMax and Hi constant is equal to Lo, we could use + // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use // vmv.v.x whose EEW = 32 to lower it. auto *Const = dyn_cast(VL); - if (LoC == HiC && Const && Const->getSExtValue() == RISCV::VLMaxSentinel) { + if (LoC == HiC && Const && Const->isAllOnesValue()) { MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); // TODO: if vl <= min(VLMAX), we can also do this. But we could not // access the subtarget here now. - auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT, Lo, VL); + auto InterVec = DAG.getNode( + RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo, + DAG.getRegister(RISCV::X0, MVT::i32)); return DAG.getNode(ISD::BITCAST, DL, VT, InterVec); } } // Fall back to a stack store and stride x0 vector load. - return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL); + return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo, + Hi, VL); } // Called by type legalization to handle splat of i64 on RV32. // FIXME: We can optimize this when the type has sign or zero bits in one // of the halves. -static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar, - SDValue VL, SelectionDAG &DAG) { +static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, + SDValue Scalar, SDValue VL, + SelectionDAG &DAG) { assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!"); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, DAG.getConstant(0, DL, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, DAG.getConstant(1, DL, MVT::i32)); - return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG); + return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG); } // This function lowers a splat of a scalar operand Splat with the vector // length VL. It ensures the final sequence is type legal, which is useful when // lowering a splat after type legalization. -static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL, - SelectionDAG &DAG, +static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, + MVT VT, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { + bool HasPassthru = Passthru && !Passthru.isUndef(); + if (!HasPassthru && !Passthru) + Passthru = DAG.getUNDEF(VT); if (VT.isFloatingPoint()) { // If VL is 1, we could use vfmv.s.f. if (isOneConstant(VL)) - return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), - Scalar, VL); - return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL); + return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL); + return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); } MVT XLenVT = Subtarget.getXLenVT(); @@ -2343,55 +2389,25 @@ static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL, // use vmv.s.x. if (isOneConstant(VL) && (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue()))) - return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar, - VL); - return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL); + return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL); + return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); } assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 && "Unexpected scalar for splat lowering!"); if (isOneConstant(VL) && isNullConstant(Scalar)) - return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), + return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, DAG.getConstant(0, DL, XLenVT), VL); // Otherwise use the more complicated splatting algorithm. - return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); -} - -// Is the mask a slidedown that shifts in undefs. -static int matchShuffleAsSlideDown(ArrayRef Mask) { - int Size = Mask.size(); - - // Elements shifted in should be undef. - auto CheckUndefs = [&](int Shift) { - for (int i = Size - Shift; i != Size; ++i) - if (Mask[i] >= 0) - return false; - return true; - }; - - // Elements should be shifted or undef. - auto MatchShift = [&](int Shift) { - for (int i = 0; i != Size - Shift; ++i) - if (Mask[i] >= 0 && Mask[i] != Shift + i) - return false; - return true; - }; - - // Try all possible shifts. - for (int Shift = 1; Shift != Size; ++Shift) - if (CheckUndefs(Shift) && MatchShift(Shift)) - return Shift; - - // No match. - return -1; + return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG); } static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, bool &SwapSources, const RISCVSubtarget &Subtarget) { // We need to be able to widen elements to the next larger integer type. - if (VT.getScalarSizeInBits() >= Subtarget.getMaxELENForFixedLengthVectors()) + if (VT.getScalarSizeInBits() >= Subtarget.getELEN()) return false; int Size = Mask.size(); @@ -2430,6 +2446,79 @@ static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, bool &SwapSources, return true; } +/// Match shuffles that concatenate two vectors, rotate the concatenation, +/// and then extract the original number of elements from the rotated result. +/// This is equivalent to vector.splice or X86's PALIGNR instruction. The +/// returned rotation amount is for a rotate right, where elements move from +/// higher elements to lower elements. \p LoSrc indicates the first source +/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector +/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be +/// 0 or 1 if a rotation is found. +/// +/// NOTE: We talk about rotate to the right which matches how bit shift and +/// rotate instructions are described where LSBs are on the right, but LLVM IR +/// and the table below write vectors with the lowest elements on the left. +static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef Mask) { + int Size = Mask.size(); + + // We need to detect various ways of spelling a rotation: + // [11, 12, 13, 14, 15, 0, 1, 2] + // [-1, 12, 13, 14, -1, -1, 1, -1] + // [-1, -1, -1, -1, -1, -1, 1, 2] + // [ 3, 4, 5, 6, 7, 8, 9, 10] + // [-1, 4, 5, 6, -1, -1, 9, -1] + // [-1, 4, 5, 6, -1, -1, -1, -1] + int Rotation = 0; + LoSrc = -1; + HiSrc = -1; + for (int i = 0; i != Size; ++i) { + int M = Mask[i]; + if (M < 0) + continue; + + // Determine where a rotate vector would have started. + int StartIdx = i - (M % Size); + // The identity rotation isn't interesting, stop. + if (StartIdx == 0) + return -1; + + // If we found the tail of a vector the rotation must be the missing + // front. If we found the head of a vector, it must be how much of the + // head. + int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx; + + if (Rotation == 0) + Rotation = CandidateRotation; + else if (Rotation != CandidateRotation) + // The rotations don't match, so we can't match this mask. + return -1; + + // Compute which value this mask is pointing at. + int MaskSrc = M < Size ? 0 : 1; + + // Compute which of the two target values this index should be assigned to. + // This reflects whether the high elements are remaining or the low elemnts + // are remaining. + int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc; + + // Either set up this value if we've not encountered it before, or check + // that it remains consistent. + if (TargetSrc < 0) + TargetSrc = MaskSrc; + else if (TargetSrc != MaskSrc) + // This may be a rotation, but it pulls from the inputs in some + // unsupported interleaving. + return -1; + } + + // Check that we successfully analyzed the mask, and normalize the results. + assert(Rotation != 0 && "Failed to locate a viable rotation!"); + assert((LoSrc >= 0 || HiSrc >= 0) && + "Failed to find a rotated input vector!"); + + return Rotation; +} + static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { SDValue V1 = Op.getOperand(0); @@ -2506,33 +2595,59 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; - SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL); + SDValue Splat = + DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL); return convertFromScalableVector(VT, Splat, DAG, Subtarget); } V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); assert(Lane < (int)NumElts && "Unexpected lane!"); - SDValue Gather = - DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1, - DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL); + SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, + V1, DAG.getConstant(Lane, DL, XLenVT), + TrueMask, DAG.getUNDEF(ContainerVT), VL); return convertFromScalableVector(VT, Gather, DAG, Subtarget); } } ArrayRef Mask = SVN->getMask(); - // Try to match as a slidedown. - int SlideAmt = matchShuffleAsSlideDown(Mask); - if (SlideAmt >= 0) { - // TODO: Should we reduce the VL to account for the upper undef elements? - // Requires additional vsetvlis, but might be faster to execute. - V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); - SDValue SlideDown = - DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, - DAG.getUNDEF(ContainerVT), V1, - DAG.getConstant(SlideAmt, DL, XLenVT), - TrueMask, VL); - return convertFromScalableVector(VT, SlideDown, DAG, Subtarget); + // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may + // be undef which can be handled with a single SLIDEDOWN/UP. + int LoSrc, HiSrc; + int Rotation = isElementRotate(LoSrc, HiSrc, Mask); + if (Rotation > 0) { + SDValue LoV, HiV; + if (LoSrc >= 0) { + LoV = LoSrc == 0 ? V1 : V2; + LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget); + } + if (HiSrc >= 0) { + HiV = HiSrc == 0 ? V1 : V2; + HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget); + } + + // We found a rotation. We need to slide HiV down by Rotation. Then we need + // to slide LoV up by (NumElts - Rotation). + unsigned InvRotate = NumElts - Rotation; + + SDValue Res = DAG.getUNDEF(ContainerVT); + if (HiV) { + // If we are doing a SLIDEDOWN+SLIDEUP, reduce the VL for the SLIDEDOWN. + // FIXME: If we are only doing a SLIDEDOWN, don't reduce the VL as it + // causes multiple vsetvlis in some test cases such as lowering + // reduce.mul + SDValue DownVL = VL; + if (LoV) + DownVL = DAG.getConstant(InvRotate, DL, XLenVT); + Res = + DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, Res, HiV, + DAG.getConstant(Rotation, DL, XLenVT), TrueMask, DownVL); + } + if (LoV) + Res = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Res, LoV, + DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL); + + return convertFromScalableVector(VT, Res, DAG, Subtarget); } // Detect an interleave shuffle and lower to @@ -2576,18 +2691,17 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // Freeze V2 since we use it twice and we need to be sure that the add and // multiply see the same value. - V2 = DAG.getNode(ISD::FREEZE, DL, IntHalfVT, V2); + V2 = DAG.getFreeze(V2); // Recreate TrueMask using the widened type's element count. - MVT MaskVT = - MVT::getVectorVT(MVT::i1, HalfContainerVT.getVectorElementCount()); - TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); + TrueMask = getAllOnesMask(HalfContainerVT, VL, DL, DAG); // Widen V1 and V2 with 0s and add one copy of V2 to V1. SDValue Add = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1, V2, TrueMask, VL); // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer. SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT, + DAG.getUNDEF(IntHalfVT), DAG.getAllOnesConstant(DL, XLenVT)); SDValue WidenMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT, V2, Multiplier, TrueMask, VL); @@ -2691,7 +2805,8 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // TODO: This doesn't trigger for i64 vectors on RV32, since there we // encounter a bitcasted BUILD_VECTOR with low/high i32 values. if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) { - Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget); + Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG, + Subtarget); } else { V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); // If only one index is used, we can use a "splat" vrgather. @@ -2699,16 +2814,16 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // that's beneficial. if (LHSIndexCounts.size() == 1) { int SplatIndex = LHSIndexCounts.begin()->getFirst(); - Gather = - DAG.getNode(GatherVXOpc, DL, ContainerVT, V1, - DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL); + Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1, + DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, + DAG.getUNDEF(ContainerVT), VL); } else { SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget); Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, - TrueMask, VL); + TrueMask, DAG.getUNDEF(ContainerVT), VL); } } @@ -2716,45 +2831,46 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // additional vrgather. if (!V2.isUndef()) { V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); + + MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); + SelectMask = + convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); + // If only one index is used, we can use a "splat" vrgather. // TODO: We can splat the most-common index and fix-up any stragglers, if // that's beneficial. if (RHSIndexCounts.size() == 1) { int SplatIndex = RHSIndexCounts.begin()->getFirst(); - V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, - DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL); + Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, + DAG.getConstant(SplatIndex, DL, XLenVT), SelectMask, + Gather, VL); } else { SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); RHSIndices = convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); - V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask, - VL); + Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, + SelectMask, Gather, VL); } - - MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); - SelectMask = - convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); - - Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2, - Gather, VL); } return convertFromScalableVector(VT, Gather, DAG, Subtarget); } -static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT, - SDLoc DL, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - if (VT.isScalableVector()) - return DAG.getFPExtendOrRound(Op, DL, VT); - assert(VT.isFixedLengthVector() && - "Unexpected value type for RVV FP extend/round lowering"); - SDValue Mask, VL; - std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); - unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType()) - ? RISCVISD::FP_EXTEND_VL - : RISCVISD::FP_ROUND_VL; - return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL); +bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { + // Support splats for any type. These should type legalize well. + if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) + return true; + + // Only support legal VTs for other shuffles for now. + if (!isTypeLegal(VT)) + return false; + + MVT SVT = VT.getSimpleVT(); + + bool SwapSources; + int LoSrc, HiSrc; + return (isElementRotate(LoSrc, HiSrc, M) > 0) || + isInterleaveShuffle(M, SVT, SwapSources, Subtarget); } // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting @@ -2868,13 +2984,39 @@ SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, Store->getMemOperand()->getFlags()); } -SDValue RISCVTargetLowering::LowerOperation(SDValue Op, - SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - default: - report_fatal_error("unimplemented operand"); - case ISD::GlobalAddress: - return lowerGlobalAddress(Op, DAG); +static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(Op.getValueType() == MVT::i64 && "Unexpected VT"); + + int64_t Imm = cast(Op)->getSExtValue(); + + // All simm32 constants should be handled by isel. + // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making + // this check redundant, but small immediates are common so this check + // should have better compile time. + if (isInt<32>(Imm)) + return Op; + + // We only need to cost the immediate, if constant pool lowering is enabled. + if (!Subtarget.useConstantPoolForLargeInts()) + return Op; + + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); + if (Seq.size() <= Subtarget.getMaxBuildIntsCost()) + return Op; + + // Expand to a constant pool using the default expansion code. + return SDValue(); +} + +SDValue RISCVTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: + report_fatal_error("unimplemented operand"); + case ISD::GlobalAddress: + return lowerGlobalAddress(Op, DAG); case ISD::BlockAddress: return lowerBlockAddress(Op, DAG); case ISD::ConstantPool: @@ -2883,6 +3025,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerJumpTable(Op, DAG); case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG); + case ISD::Constant: + return lowerConstant(Op, DAG, Subtarget); case ISD::SELECT: return lowerSELECT(Op, DAG); case ISD::BRCOND: @@ -2905,6 +3049,30 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, SDValue Op0 = Op.getOperand(0); EVT Op0VT = Op0.getValueType(); MVT XLenVT = Subtarget.getXLenVT(); + if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) { + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); + SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); + return FPConv; + } + if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() && + Subtarget.hasStdExtF()) { + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); + SDValue FPConv = + DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); + return FPConv; + } + + // Consider other scalar<->scalar casts as legal if the types are legal. + // Otherwise expand them. + if (!VT.isVector() && !Op0VT.isVector()) { + if (isTypeLegal(VT) && isTypeLegal(Op0VT)) + return Op; + return SDValue(); + } + + assert(!VT.isScalableVector() && !Op0VT.isScalableVector() && + "Unexpected types"); + if (VT.isFixedLengthVector()) { // We can handle fixed length vector bitcasts with a simple replacement // in isel. @@ -2934,18 +3102,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, DAG.getConstant(0, DL, XLenVT)); } - if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) { - SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); - SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); - return FPConv; - } - if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() && - Subtarget.hasStdExtF()) { - SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); - SDValue FPConv = - DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); - return FPConv; - } return SDValue(); } case ISD::INTRINSIC_WO_CHAIN: @@ -3002,55 +3158,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, } return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt); } - case ISD::TRUNCATE: { - SDLoc DL(Op); - MVT VT = Op.getSimpleValueType(); + case ISD::TRUNCATE: // Only custom-lower vector truncates - if (!VT.isVector()) + if (!Op.getSimpleValueType().isVector()) return Op; - - // Truncates to mask types are handled differently - if (VT.getVectorElementType() == MVT::i1) - return lowerVectorMaskTrunc(Op, DAG); - - // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary - // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which - // truncate by one power of two at a time. - MVT DstEltVT = VT.getVectorElementType(); - - SDValue Src = Op.getOperand(0); - MVT SrcVT = Src.getSimpleValueType(); - MVT SrcEltVT = SrcVT.getVectorElementType(); - - assert(DstEltVT.bitsLT(SrcEltVT) && - isPowerOf2_64(DstEltVT.getSizeInBits()) && - isPowerOf2_64(SrcEltVT.getSizeInBits()) && - "Unexpected vector truncate lowering"); - - MVT ContainerVT = SrcVT; - if (SrcVT.isFixedLengthVector()) { - ContainerVT = getContainerForFixedLengthVector(SrcVT); - Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); - } - - SDValue Result = Src; - SDValue Mask, VL; - std::tie(Mask, VL) = - getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); - LLVMContext &Context = *DAG.getContext(); - const ElementCount Count = ContainerVT.getVectorElementCount(); - do { - SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); - EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); - Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, - Mask, VL); - } while (SrcEltVT != DstEltVT); - - if (SrcVT.isFixedLengthVector()) - Result = convertFromScalableVector(VT, Result, DAG, Subtarget); - - return Result; - } + return lowerVectorTruncLike(Op, DAG); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: if (Op.getOperand(0).getValueType().isVector() && @@ -3076,28 +3188,26 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, // minimum size. e.g. . VLENB is in bytes so we calculate // vscale as VLENB / 8. static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!"); - if (Subtarget.getMinVLen() < RISCV::RVVBitsPerBlock) + if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) report_fatal_error("Support for VLEN==32 is incomplete."); - if (isa(Op.getOperand(0))) { - // We assume VLENB is a multiple of 8. We manually choose the best shift - // here because SimplifyDemandedBits isn't always able to simplify it. - uint64_t Val = Op.getConstantOperandVal(0); - if (isPowerOf2_64(Val)) { - uint64_t Log2 = Log2_64(Val); - if (Log2 < 3) - return DAG.getNode(ISD::SRL, DL, VT, VLENB, - DAG.getConstant(3 - Log2, DL, VT)); - if (Log2 > 3) - return DAG.getNode(ISD::SHL, DL, VT, VLENB, - DAG.getConstant(Log2 - 3, DL, VT)); - return VLENB; - } - // If the multiplier is a multiple of 8, scale it down to avoid needing - // to shift the VLENB value. - if ((Val % 8) == 0) - return DAG.getNode(ISD::MUL, DL, VT, VLENB, - DAG.getConstant(Val / 8, DL, VT)); - } + // We assume VLENB is a multiple of 8. We manually choose the best shift + // here because SimplifyDemandedBits isn't always able to simplify it. + uint64_t Val = Op.getConstantOperandVal(0); + if (isPowerOf2_64(Val)) { + uint64_t Log2 = Log2_64(Val); + if (Log2 < 3) + return DAG.getNode(ISD::SRL, DL, VT, VLENB, + DAG.getConstant(3 - Log2, DL, VT)); + if (Log2 > 3) + return DAG.getNode(ISD::SHL, DL, VT, VLENB, + DAG.getConstant(Log2 - 3, DL, VT)); + return VLENB; + } + // If the multiplier is a multiple of 8, scale it down to avoid needing + // to shift the VLENB value. + if ((Val % 8) == 0) + return DAG.getNode(ISD::MUL, DL, VT, VLENB, + DAG.getConstant(Val / 8, DL, VT)); SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, DAG.getConstant(3, DL, VT)); @@ -3117,88 +3227,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, } return SDValue(); } - case ISD::FP_EXTEND: { - // RVV can only do fp_extend to types double the size as the source. We - // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going - // via f32. - SDLoc DL(Op); - MVT VT = Op.getSimpleValueType(); - SDValue Src = Op.getOperand(0); - MVT SrcVT = Src.getSimpleValueType(); - - // Prepare any fixed-length vector operands. - MVT ContainerVT = VT; - if (SrcVT.isFixedLengthVector()) { - ContainerVT = getContainerForFixedLengthVector(VT); - MVT SrcContainerVT = - ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); - Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); - } - - if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || - SrcVT.getVectorElementType() != MVT::f16) { - // For scalable vectors, we only need to close the gap between - // vXf16->vXf64. - if (!VT.isFixedLengthVector()) - return Op; - // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version. - Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); - return convertFromScalableVector(VT, Src, DAG, Subtarget); - } - - MVT InterVT = VT.changeVectorElementType(MVT::f32); - MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32); - SDValue IntermediateExtend = getRVVFPExtendOrRound( - Src, InterVT, InterContainerVT, DL, DAG, Subtarget); - - SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT, - DL, DAG, Subtarget); - if (VT.isFixedLengthVector()) - return convertFromScalableVector(VT, Extend, DAG, Subtarget); - return Extend; - } - case ISD::FP_ROUND: { - // RVV can only do fp_round to types half the size as the source. We - // custom-lower f64->f16 rounds via RVV's round-to-odd float - // conversion instruction. - SDLoc DL(Op); - MVT VT = Op.getSimpleValueType(); - SDValue Src = Op.getOperand(0); - MVT SrcVT = Src.getSimpleValueType(); - - // Prepare any fixed-length vector operands. - MVT ContainerVT = VT; - if (VT.isFixedLengthVector()) { - MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); - ContainerVT = - SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); - Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); - } - - if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || - SrcVT.getVectorElementType() != MVT::f64) { - // For scalable vectors, we only need to close the gap between - // vXf64<->vXf16. - if (!VT.isFixedLengthVector()) - return Op; - // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version. - Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); - return convertFromScalableVector(VT, Src, DAG, Subtarget); - } - - SDValue Mask, VL; - std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); - - MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); - SDValue IntermediateRound = - DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL); - SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT, - DL, DAG, Subtarget); - - if (VT.isFixedLengthVector()) - return convertFromScalableVector(VT, Round, DAG, Subtarget); - return Round; - } + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + if (!Op.getValueType().isVector()) + return Op; + return lowerVectorFPExtendOrRoundLike(Op, DAG); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: @@ -3221,10 +3254,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, bool IsInt2FP = SrcEltVT.isInteger(); // Widening conversions - if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { + if (EltSize > (2 * SrcEltSize)) { if (IsInt2FP) { // Do a regular integer sign/zero extension then convert to float. - MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), + MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), VT.getVectorElementCount()); unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND @@ -3242,7 +3275,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, } // Narrowing conversions - if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { + if (SrcEltSize > (2 * EltSize)) { if (IsInt2FP) { // One narrowing int_to_fp, then an fp_round. assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); @@ -3253,9 +3286,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, // FP2Int // One narrowing fp_to_int, then truncate the integer. If the float isn't // representable by the integer, the result is poison. - MVT IVecVT = - MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), - VT.getVectorElementCount()); + MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), + VT.getVectorElementCount()); SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); } @@ -3309,6 +3341,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FCEIL: case ISD::FFLOOR: return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG); + case ISD::FROUND: + return lowerFROUND(Op, DAG); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_SMAX: @@ -3350,12 +3384,14 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerSTEP_VECTOR(Op, DAG); case ISD::VECTOR_REVERSE: return lowerVECTOR_REVERSE(Op, DAG); + case ISD::VECTOR_SPLICE: + return lowerVECTOR_SPLICE(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::SPLAT_VECTOR: if (Op.getValueType().getVectorElementType() == MVT::i1) return lowerVectorMaskSplat(Op, DAG); - return lowerSPLAT_VECTOR(Op, DAG, Subtarget); + return SDValue(); case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); case ISD::CONCAT_VECTORS: { @@ -3455,7 +3491,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FSQRT: return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL); case ISD::FMA: - return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); + return lowerToScalableOp(Op, DAG, RISCVISD::VFMADD_VL); case ISD::SMIN: return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL); case ISD::SMAX: @@ -3487,6 +3523,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerGET_ROUNDING(Op, DAG); case ISD::SET_ROUNDING: return lowerSET_ROUNDING(Op, DAG); + case ISD::EH_DWARF_CFA: + return lowerEH_DWARF_CFA(Op, DAG); case ISD::VP_SELECT: return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL); case ISD::VP_MERGE: @@ -3525,6 +3563,35 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL); case ISD::VP_FDIV: return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL); + case ISD::VP_FNEG: + return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL); + case ISD::VP_FMA: + return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL); + case ISD::VP_SIGN_EXTEND: + case ISD::VP_ZERO_EXTEND: + if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) + return lowerVPExtMaskOp(Op, DAG); + return lowerVPOp(Op, DAG, + Op.getOpcode() == ISD::VP_SIGN_EXTEND + ? RISCVISD::VSEXT_VL + : RISCVISD::VZEXT_VL); + case ISD::VP_TRUNCATE: + return lowerVectorTruncLike(Op, DAG); + case ISD::VP_FP_EXTEND: + case ISD::VP_FP_ROUND: + return lowerVectorFPExtendOrRoundLike(Op, DAG); + case ISD::VP_FPTOSI: + return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_SINT_VL); + case ISD::VP_FPTOUI: + return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_UINT_VL); + case ISD::VP_SITOFP: + return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL); + case ISD::VP_UITOFP: + return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL); + case ISD::VP_SETCC: + if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) + return lowerVPSetCCMaskOp(Op, DAG); + return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL); } } @@ -3562,12 +3629,21 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, // Use PC-relative addressing to access the symbol. This generates the // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) // %pcrel_lo(auipc)). - return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); + return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); // Use PC-relative addressing to access the GOT for this symbol, then load // the address from the GOT. This generates the pattern (PseudoLA sym), // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). - return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MemOp = MF.getMachineMemOperand( + MachinePointerInfo::getGOT(MF), + MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant, + LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); + SDValue Load = + DAG.getMemIntrinsicNode(RISCVISD::LA, DL, DAG.getVTList(Ty, MVT::Other), + {DAG.getEntryNode(), Addr}, Ty, MemOp); + return Load; } switch (getTargetMachine().getCodeModel()) { @@ -3578,15 +3654,15 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); - SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); - return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); + SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); + return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo); } case CodeModel::Medium: { // Generate a sequence for accessing addresses within any 2GiB range within // the address space. This generates the pattern (PseudoLLA sym), which // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); - return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); + return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); } } } @@ -3594,23 +3670,12 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - EVT Ty = Op.getValueType(); GlobalAddressSDNode *N = cast(Op); - int64_t Offset = N->getOffset(); - MVT XLenVT = Subtarget.getXLenVT(); + assert(N->getOffset() == 0 && "unexpected offset in global node"); const GlobalValue *GV = N->getGlobal(); bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); - SDValue Addr = getAddr(N, DAG, IsLocal); - - // In order to maximise the opportunity for common subexpression elimination, - // emit a separate ADD node for the global address offset instead of folding - // it in the global address node. Later peephole optimisations may choose to - // fold it back in when profitable. - if (Offset != 0) - return DAG.getNode(ISD::ADD, DL, Ty, Addr, - DAG.getConstant(Offset, DL, XLenVT)); - return Addr; + return getAddr(N, DAG, IsLocal); } SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, @@ -3648,8 +3713,15 @@ SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, // the pattern (PseudoLA_TLS_IE sym), which expands to // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); - SDValue Load = - SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MemOp = MF.getMachineMemOperand( + MachinePointerInfo::getGOT(MF), + MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant, + LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); + SDValue Load = DAG.getMemIntrinsicNode( + RISCVISD::LA_TLS_IE, DL, DAG.getVTList(Ty, MVT::Other), + {DAG.getEntryNode(), Addr}, Ty, MemOp); // Add the thread pointer. SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); @@ -3667,12 +3739,11 @@ SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, SDValue AddrLo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); - SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); + SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); - SDValue MNAdd = SDValue( - DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), - 0); - return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); + SDValue MNAdd = + DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd); + return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo); } SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, @@ -3686,8 +3757,7 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, // This generates the pattern (PseudoLA_TLS_GD sym), which expands to // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); - SDValue Load = - SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); + SDValue Load = DAG.getNode(RISCVISD::LA_TLS_GD, DL, Ty, Addr); // Prepare argument list to generate call. ArgListTy Args; @@ -3710,10 +3780,8 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - EVT Ty = Op.getValueType(); GlobalAddressSDNode *N = cast(Op); - int64_t Offset = N->getOffset(); - MVT XLenVT = Subtarget.getXLenVT(); + assert(N->getOffset() == 0 && "unexpected offset in global node"); TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); @@ -3735,13 +3803,6 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, break; } - // In order to maximise the opportunity for common subexpression elimination, - // emit a separate ADD node for the global address offset instead of folding - // it in the global address node. Later peephole optimisations may choose to - // fold it back in when profitable. - if (Offset != 0) - return DAG.getNode(ISD::ADD, DL, Ty, Addr, - DAG.getConstant(Offset, DL, XLenVT)); return Addr; } @@ -3911,7 +3972,7 @@ SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, // if Shamt-XLEN < 0: // Shamt < XLEN // Lo = Lo << Shamt - // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) + // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt)) // else: // Lo = 0 // Hi = Lo << (Shamt-XLEN) @@ -3921,7 +3982,7 @@ SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); - SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); + SDValue XLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, XLenMinus1); SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); @@ -3950,7 +4011,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, // SRA expansion: // if Shamt-XLEN < 0: // Shamt < XLEN - // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) + // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1)) // Hi = Hi >>s Shamt // else: // Lo = Hi >>s (Shamt-XLEN); @@ -3958,7 +4019,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, // // SRL expansion: // if Shamt-XLEN < 0: // Shamt < XLEN - // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) + // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1)) // Hi = Hi >>u Shamt // else: // Lo = Hi >>u (Shamt-XLEN); @@ -3971,7 +4032,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); - SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); + SDValue XLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, XLenMinus1); SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); @@ -4022,7 +4083,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op, // Custom-lower a SPLAT_VECTOR_PARTS where XLEN> 31) == HiC) - return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); + return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), + Lo, DAG.getRegister(RISCV::X0, MVT::i32)); } // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended. if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo && isa(Hi.getOperand(1)) && Hi.getConstantOperandVal(1) == 31) - return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); + return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo, + DAG.getRegister(RISCV::X0, MVT::i32)); // Fall back to use a stack store and stride x0 vector load. Use X0 as VL. - return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi, - DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, MVT::i64)); + return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, + DAG.getUNDEF(VecVT), Lo, Hi, + DAG.getRegister(RISCV::X0, MVT::i32)); } // Custom-lower extensions from mask vectors by using a vselect either with 1 @@ -4078,27 +4143,9 @@ SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, assert(Src.getValueType().isVector() && Src.getValueType().getVectorElementType() == MVT::i1); - MVT XLenVT = Subtarget.getXLenVT(); - SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); - SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); - if (VecVT.isScalableVector()) { - // Be careful not to introduce illegal scalar types at this stage, and be - // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is - // illegal and must be expanded. Since we know that the constants are - // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. - bool IsRV32E64 = - !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; - - if (!IsRV32E64) { - SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); - SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); - } else { - SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); - SplatTrueVal = - DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); - } - + SDValue SplatZero = DAG.getConstant(0, DL, VecVT); + SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT); return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); } @@ -4111,9 +4158,14 @@ SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, SDValue Mask, VL; std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); - SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL); - SplatTrueVal = - DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL); + MVT XLenVT = Subtarget.getXLenVT(); + SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); + SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); + + SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), SplatZero, VL); + SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), SplatTrueVal, VL); SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, SplatTrueVal, SplatZero, VL); @@ -4151,8 +4203,9 @@ SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( // Custom-lower truncations from vectors to mask vectors by using a mask and a // setcc operation: // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) -SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, - SelectionDAG &DAG) const { +SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op, + SelectionDAG &DAG) const { + bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; SDLoc DL(Op); EVT MaskVT = Op.getValueType(); // Only expect to custom-lower truncations to mask types @@ -4160,34 +4213,176 @@ SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, "Unexpected type for vector mask lowering"); SDValue Src = Op.getOperand(0); MVT VecVT = Src.getSimpleValueType(); - + SDValue Mask, VL; + if (IsVPTrunc) { + Mask = Op.getOperand(1); + VL = Op.getOperand(2); + } // If this is a fixed vector, we need to convert it to a scalable vector. MVT ContainerVT = VecVT; + if (VecVT.isFixedLengthVector()) { ContainerVT = getContainerForFixedLengthVector(VecVT); Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); + if (IsVPTrunc) { + MVT MaskContainerVT = + getContainerForFixedLengthVector(Mask.getSimpleValueType()); + Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); + } + } + + if (!IsVPTrunc) { + std::tie(Mask, VL) = + getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); } SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); - SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne); - SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero); - - if (VecVT.isScalableVector()) { - SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); - return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); - } - - SDValue Mask, VL; - std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); + SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), SplatOne, VL); + SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), SplatZero, VL); MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL); Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero, DAG.getCondCode(ISD::SETNE), Mask, VL); - return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); + if (MaskVT.isFixedLengthVector()) + Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); + return Trunc; +} + +SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op, + SelectionDAG &DAG) const { + bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; + SDLoc DL(Op); + + MVT VT = Op.getSimpleValueType(); + // Only custom-lower vector truncates + assert(VT.isVector() && "Unexpected type for vector truncate lowering"); + + // Truncates to mask types are handled differently + if (VT.getVectorElementType() == MVT::i1) + return lowerVectorMaskTruncLike(Op, DAG); + + // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary + // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which + // truncate by one power of two at a time. + MVT DstEltVT = VT.getVectorElementType(); + + SDValue Src = Op.getOperand(0); + MVT SrcVT = Src.getSimpleValueType(); + MVT SrcEltVT = SrcVT.getVectorElementType(); + + assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) && + isPowerOf2_64(SrcEltVT.getSizeInBits()) && + "Unexpected vector truncate lowering"); + + MVT ContainerVT = SrcVT; + SDValue Mask, VL; + if (IsVPTrunc) { + Mask = Op.getOperand(1); + VL = Op.getOperand(2); + } + if (SrcVT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(SrcVT); + Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); + if (IsVPTrunc) { + MVT MaskVT = getMaskTypeFor(ContainerVT); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + } + } + + SDValue Result = Src; + if (!IsVPTrunc) { + std::tie(Mask, VL) = + getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); + } + + LLVMContext &Context = *DAG.getContext(); + const ElementCount Count = ContainerVT.getVectorElementCount(); + do { + SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); + EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); + Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, + Mask, VL); + } while (SrcEltVT != DstEltVT); + + if (SrcVT.isFixedLengthVector()) + Result = convertFromScalableVector(VT, Result, DAG, Subtarget); + + return Result; +} + +SDValue +RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op, + SelectionDAG &DAG) const { + bool IsVP = + Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND; + bool IsExtend = + Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND; + // RVV can only do truncate fp to types half the size as the source. We + // custom-lower f64->f16 rounds via RVV's round-to-odd float + // conversion instruction. + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + + assert(VT.isVector() && "Unexpected type for vector truncate lowering"); + + SDValue Src = Op.getOperand(0); + MVT SrcVT = Src.getSimpleValueType(); + + bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 || + SrcVT.getVectorElementType() != MVT::f16); + bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 || + SrcVT.getVectorElementType() != MVT::f64); + + bool IsDirectConv = IsDirectExtend || IsDirectTrunc; + + // Prepare any fixed-length vector operands. + MVT ContainerVT = VT; + SDValue Mask, VL; + if (IsVP) { + Mask = Op.getOperand(1); + VL = Op.getOperand(2); + } + if (VT.isFixedLengthVector()) { + MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); + ContainerVT = + SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); + Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); + if (IsVP) { + MVT MaskVT = getMaskTypeFor(ContainerVT); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + } + } + + if (!IsVP) + std::tie(Mask, VL) = + getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); + + unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL; + + if (IsDirectConv) { + Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL); + if (VT.isFixedLengthVector()) + Src = convertFromScalableVector(VT, Src, DAG, Subtarget); + return Src; + } + + unsigned InterConvOpc = + IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL; + + MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); + SDValue IntermediateConv = + DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL); + SDValue Result = + DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL); + if (VT.isFixedLengthVector()) + return convertFromScalableVector(VT, Result, DAG, Subtarget); + return Result; } // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the @@ -4268,13 +4463,15 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied // undef doesn't obey the earlyclobber constraint. Just splat a zero value. - ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero, - InsertI64VL); + ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, + DAG.getUNDEF(I32ContainerVT), Zero, InsertI64VL); // First slide in the hi value, then the lo in underneath it. - ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec, - ValHi, I32Mask, InsertI64VL); - ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec, - ValLo, I32Mask, InsertI64VL); + ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, + DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi, + I32Mask, InsertI64VL); + ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, + DAG.getUNDEF(I32ContainerVT), ValInVec, ValLo, + I32Mask, InsertI64VL); // Bitcast back to the right container type. ValInVec = DAG.getBitcast(ContainerVT, ValInVec); } @@ -4310,7 +4507,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, unsigned WidenVecLen; SDValue ExtractElementIdx; SDValue ExtractBitIdx; - unsigned MaxEEW = Subtarget.getMaxELENForFixedLengthVectors(); + unsigned MaxEEW = Subtarget.getELEN(); MVT LargestEltVT = MVT::getIntegerVT( std::min(MaxEEW, unsigned(XLenVT.getSizeInBits()))); if (NumElts <= LargestEltVT.getSizeInBits()) { @@ -4360,8 +4557,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, if (!isNullConstant(Idx)) { // Use a VL of 1 to avoid processing more elements than we need. SDValue VL = DAG.getConstant(1, DL, XLenVT); - MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); - SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); + SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); } @@ -4378,8 +4574,8 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, // Some RVV intrinsics may claim that they want an integer operand to be // promoted or expanded. -static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { +static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && "Unexpected opcode"); @@ -4393,10 +4589,10 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG, const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); - if (!II || !II->hasSplatOperand()) + if (!II || !II->hasScalarOperand()) return SDValue(); - unsigned SplatOp = II->SplatOperand + 1 + HasChain; + unsigned SplatOp = II->ScalarOperand + 1 + HasChain; assert(SplatOp < Op.getNumOperands()); SmallVector Operands(Op->op_begin(), Op->op_end()); @@ -4426,28 +4622,141 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG, // that a widening operation never uses SEW=64. // NOTE: If this fails the below assert, we can probably just find the // element count from any operand or result and use it to construct the VT. - assert(II->SplatOperand > 0 && "Unexpected splat operand!"); + assert(II->ScalarOperand > 0 && "Unexpected splat operand!"); MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType(); // The more complex case is when the scalar is larger than XLenVT. assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); - // If this is a sign-extended 32-bit constant, we can truncate it and rely - // on the instruction to sign-extend since SEW>XLEN. - if (auto *CVal = dyn_cast(ScalarOp)) { - if (isInt<32>(CVal->getSExtValue())) { - ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); - return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); + // If this is a sign-extended 32-bit value, we can truncate it and rely on the + // instruction to sign-extend since SEW>XLEN. + if (DAG.ComputeNumSignBits(ScalarOp) > 32) { + ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp); + return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); + } + + switch (IntNo) { + case Intrinsic::riscv_vslide1up: + case Intrinsic::riscv_vslide1down: + case Intrinsic::riscv_vslide1up_mask: + case Intrinsic::riscv_vslide1down_mask: { + // We need to special case these when the scalar is larger than XLen. + unsigned NumOps = Op.getNumOperands(); + bool IsMasked = NumOps == 7; + + // Convert the vector source to the equivalent nxvXi32 vector. + MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); + SDValue Vec = DAG.getBitcast(I32VT, Operands[2]); + + SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp, + DAG.getConstant(0, DL, XLenVT)); + SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp, + DAG.getConstant(1, DL, XLenVT)); + + // Double the VL since we halved SEW. + SDValue AVL = getVLOperand(Op); + SDValue I32VL; + + // Optimize for constant AVL + if (isa(AVL)) { + unsigned EltSize = VT.getScalarSizeInBits(); + unsigned MinSize = VT.getSizeInBits().getKnownMinValue(); + + unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); + unsigned MaxVLMAX = + RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); + + unsigned VectorBitsMin = Subtarget.getRealMinVLen(); + unsigned MinVLMAX = + RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize); + + uint64_t AVLInt = cast(AVL)->getZExtValue(); + if (AVLInt <= MinVLMAX) { + I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT); + } else if (AVLInt >= 2 * MaxVLMAX) { + // Just set vl to VLMAX in this situation + RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT); + SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); + unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits()); + SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); + SDValue SETVLMAX = DAG.getTargetConstant( + Intrinsic::riscv_vsetvlimax_opt, DL, MVT::i32); + I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW, + LMUL); + } else { + // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl + // is related to the hardware implementation. + // So let the following code handle + } + } + if (!I32VL) { + RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT); + SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); + unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits()); + SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); + SDValue SETVL = + DAG.getTargetConstant(Intrinsic::riscv_vsetvli_opt, DL, MVT::i32); + // Using vsetvli instruction to get actually used length which related to + // the hardware implementation + SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL, + SEW, LMUL); + I32VL = + DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); + } + + SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG); + + // Shift the two scalar parts in using SEW=32 slide1up/slide1down + // instructions. + SDValue Passthru; + if (IsMasked) + Passthru = DAG.getUNDEF(I32VT); + else + Passthru = DAG.getBitcast(I32VT, Operands[1]); + + if (IntNo == Intrinsic::riscv_vslide1up || + IntNo == Intrinsic::riscv_vslide1up_mask) { + Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, + ScalarHi, I32Mask, I32VL); + Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, + ScalarLo, I32Mask, I32VL); + } else { + Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, + ScalarLo, I32Mask, I32VL); + Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, + ScalarHi, I32Mask, I32VL); } + + // Convert back to nxvXi64. + Vec = DAG.getBitcast(VT, Vec); + + if (!IsMasked) + return Vec; + // Apply mask after the operation. + SDValue Mask = Operands[NumOps - 3]; + SDValue MaskedOff = Operands[1]; + // Assume Policy operand is the last operand. + uint64_t Policy = + cast(Operands[NumOps - 1])->getZExtValue(); + // We don't need to select maskedoff if it's undef. + if (MaskedOff.isUndef()) + return Vec; + // TAMU + if (Policy == RISCVII::TAIL_AGNOSTIC) + return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, + AVL); + // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. + // It's fine because vmerge does not care mask policy. + return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff, + AVL); + } } // We need to convert the scalar to a splat vector. - // FIXME: Can we implicitly truncate the scalar if it is known to - // be sign extended? SDValue VL = getVLOperand(Op); assert(VL.getValueType() == XLenVT); - ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG); + ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG); return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); } @@ -4481,7 +4790,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::riscv_zip: case Intrinsic::riscv_unzip: { // Lower to the SHFLI encoding for zip or the UNSHFLI encoding for unzip. - // For i32 the immdiate is 15. For i64 the immediate is 31. + // For i32 the immediate is 15. For i64 the immediate is 31. unsigned Opc = IntNo == Intrinsic::riscv_zip ? RISCVISD::SHFL : RISCVISD::UNSHFL; unsigned BitWidth = Op.getValueSizeInBits(); @@ -4516,10 +4825,11 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1)); case Intrinsic::riscv_vmv_v_x: return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2), - Op.getSimpleValueType(), DL, DAG, Subtarget); + Op.getOperand(3), Op.getSimpleValueType(), DL, DAG, + Subtarget); case Intrinsic::riscv_vfmv_v_f: return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case Intrinsic::riscv_vmv_s_x: { SDValue Scalar = Op.getOperand(2); @@ -4533,7 +4843,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // This is an i64 value that lives in two scalar registers. We have to // insert this in a convoluted way. First we build vXi64 splat containing - // the/ two values that we assemble using some bit math. Next we'll use + // the two values that we assemble using some bit math. Next we'll use // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask // to merge element 0 from our splat into the source vector. // FIXME: This is probably not the best way to do this, but it is @@ -4550,12 +4860,15 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue Vec = Op.getOperand(1); SDValue VL = getVLOperand(Op); - SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); - SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, - DAG.getConstant(0, DL, MVT::i32), VL); + SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG); + if (Op.getOperand(1).isUndef()) + return SplattedVal; + SDValue SplattedIdx = + DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), + DAG.getConstant(0, DL, MVT::i32), VL); - MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); - SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); + MVT MaskVT = getMaskTypeFor(VT); + SDValue Mask = getAllOnesMask(VT, VL, DL, DAG); SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); SDValue SelectCond = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx, @@ -4563,73 +4876,9 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, Vec, VL); } - case Intrinsic::riscv_vslide1up: - case Intrinsic::riscv_vslide1down: - case Intrinsic::riscv_vslide1up_mask: - case Intrinsic::riscv_vslide1down_mask: { - // We need to special case these when the scalar is larger than XLen. - unsigned NumOps = Op.getNumOperands(); - bool IsMasked = NumOps == 7; - unsigned OpOffset = IsMasked ? 1 : 0; - SDValue Scalar = Op.getOperand(2 + OpOffset); - if (Scalar.getValueType().bitsLE(XLenVT)) - break; - - // Splatting a sign extended constant is fine. - if (auto *CVal = dyn_cast(Scalar)) - if (isInt<32>(CVal->getSExtValue())) - break; - - MVT VT = Op.getSimpleValueType(); - assert(VT.getVectorElementType() == MVT::i64 && - Scalar.getValueType() == MVT::i64 && "Unexpected VTs"); - - // Convert the vector source to the equivalent nxvXi32 vector. - MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); - SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset)); - - SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, - DAG.getConstant(0, DL, XLenVT)); - SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, - DAG.getConstant(1, DL, XLenVT)); - - // Double the VL since we halved SEW. - SDValue VL = getVLOperand(Op); - SDValue I32VL = - DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); - - MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount()); - SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL); - - // Shift the two scalar parts in using SEW=32 slide1up/slide1down - // instructions. - if (IntNo == Intrinsic::riscv_vslide1up || - IntNo == Intrinsic::riscv_vslide1up_mask) { - Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi, - I32Mask, I32VL); - Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo, - I32Mask, I32VL); - } else { - Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo, - I32Mask, I32VL); - Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi, - I32Mask, I32VL); - } - - // Convert back to nxvXi64. - Vec = DAG.getBitcast(VT, Vec); - - if (!IsMasked) - return Vec; - - // Apply mask after the operation. - SDValue Mask = Op.getOperand(NumOps - 3); - SDValue MaskedOff = Op.getOperand(1); - return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL); - } } - return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); + return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); } SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, @@ -4652,8 +4901,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue PassThru = Op.getOperand(2); if (!IsUnmasked) { - MVT MaskVT = - MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + MVT MaskVT = getMaskTypeFor(ContainerVT); Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); } @@ -4680,17 +4928,56 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Ops.push_back(Policy); } - SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); + SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); + SDValue Result = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, + Load->getMemoryVT(), Load->getMemOperand()); + SDValue Chain = Result.getValue(1); + Result = convertFromScalableVector(VT, Result, DAG, Subtarget); + return DAG.getMergeValues({Result, Chain}, DL); + } + case Intrinsic::riscv_seg2_load: + case Intrinsic::riscv_seg3_load: + case Intrinsic::riscv_seg4_load: + case Intrinsic::riscv_seg5_load: + case Intrinsic::riscv_seg6_load: + case Intrinsic::riscv_seg7_load: + case Intrinsic::riscv_seg8_load: { + SDLoc DL(Op); + static const Intrinsic::ID VlsegInts[7] = { + Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, + Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, + Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, + Intrinsic::riscv_vlseg8}; + unsigned NF = Op->getNumValues() - 1; + assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); + MVT XLenVT = Subtarget.getXLenVT(); + MVT VT = Op->getSimpleValueType(0); + MVT ContainerVT = getContainerForFixedLengthVector(VT); + + SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); + SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT); + auto *Load = cast(Op); + SmallVector ContainerVTs(NF, ContainerVT); + ContainerVTs.push_back(MVT::Other); + SDVTList VTs = DAG.getVTList(ContainerVTs); + SmallVector Ops = {Load->getChain(), IntID}; + Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT)); + Ops.push_back(Op.getOperand(2)); + Ops.push_back(VL); SDValue Result = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, Load->getMemoryVT(), Load->getMemOperand()); - SDValue Chain = Result.getValue(1); - Result = convertFromScalableVector(VT, Result, DAG, Subtarget); - return DAG.getMergeValues({Result, Chain}, DL); + SmallVector Results; + for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) + Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx), + DAG, Subtarget)); + Results.push_back(Result.getValue(NF)); + return DAG.getMergeValues(Results, DL); } } - return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); + return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); } SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, @@ -4714,8 +5001,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); if (!IsUnmasked) { - MVT MaskVT = - MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + MVT MaskVT = getMaskTypeFor(ContainerVT); Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); } @@ -4898,8 +5184,9 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); - SDValue IdentitySplat = lowerScalarSplat( - NeutralElem, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget); + SDValue IdentitySplat = + lowerScalarSplat(SDValue(), NeutralElem, DAG.getConstant(1, DL, XLenVT), + M1VT, DL, DAG, Subtarget); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec, IdentitySplat, Mask, VL); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, @@ -4960,8 +5247,9 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, SDValue Mask, VL; std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); - SDValue ScalarSplat = lowerScalarSplat( - ScalarVal, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget); + SDValue ScalarSplat = + lowerScalarSplat(SDValue(), ScalarVal, DAG.getConstant(1, DL, XLenVT), + M1VT, DL, DAG, Subtarget); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), VectorVal, ScalarSplat, Mask, VL); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, @@ -5027,9 +5315,9 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op, MVT XLenVT = Subtarget.getXLenVT(); MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT; - SDValue StartSplat = - lowerScalarSplat(Op.getOperand(0), DAG.getConstant(1, DL, XLenVT), M1VT, - DL, DAG, Subtarget); + SDValue StartSplat = lowerScalarSplat(SDValue(), Op.getOperand(0), + DAG.getConstant(1, DL, XLenVT), M1VT, + DL, DAG, Subtarget); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction, @@ -5331,13 +5619,13 @@ SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op, if (StepValImm != 1) { if (isPowerOf2_64(StepValImm)) { SDValue StepVal = - DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, + DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), DAG.getConstant(Log2_64(StepValImm), DL, XLenVT)); StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal); } else { SDValue StepVal = lowerScalarSplat( - DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), VL, VT, - DL, DAG, Subtarget); + SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), + VL, VT, DL, DAG, Subtarget); StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal); } } @@ -5353,22 +5641,26 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); MVT VecVT = Op.getSimpleValueType(); + if (VecVT.getVectorElementType() == MVT::i1) { + MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); + SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0)); + SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1); + return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2); + } unsigned EltSize = VecVT.getScalarSizeInBits(); unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); - - unsigned MaxVLMAX = 0; - unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits(); - if (VectorBitsMax != 0) - MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; + unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); + unsigned MaxVLMAX = + RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; MVT IntVT = VecVT.changeVectorElementTypeToInteger(); - // If this is SEW=8 and VLMAX is unknown or more than 256, we need + // If this is SEW=8 and VLMAX is potentially more than 256, we need // to use vrgatherei16.vv. // TODO: It's also possible to use vrgatherei16.vv for other types to // decrease register width for the index calculation. - if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) { + if (MaxVLMAX > 256 && EltSize == 8) { // If this is LMUL=8, we have to split before can use vrgatherei16.vv. // Reverse each half, then reassemble them in reverse order. // NOTE: It's also possible that after splitting that VLMAX no longer @@ -5413,13 +5705,51 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, if (!IsRV32E64) SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); else - SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1); + SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT), + VLMinus1, DAG.getRegister(RISCV::X0, XLenVT)); SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL); - return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL); + return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, + DAG.getUNDEF(VecVT), VL); +} + +SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + MVT XLenVT = Subtarget.getXLenVT(); + MVT VecVT = Op.getSimpleValueType(); + + unsigned MinElts = VecVT.getVectorMinNumElements(); + SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, + DAG.getConstant(MinElts, DL, XLenVT)); + + int64_t ImmValue = cast(Op.getOperand(2))->getSExtValue(); + SDValue DownOffset, UpOffset; + if (ImmValue >= 0) { + // The operand is a TargetConstant, we need to rebuild it as a regular + // constant. + DownOffset = DAG.getConstant(ImmValue, DL, XLenVT); + UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset); + } else { + // The operand is a TargetConstant, we need to rebuild it as a regular + // constant rather than negating the original operand. + UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT); + DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset); + } + + SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG); + + SDValue SlideDown = + DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT, DAG.getUNDEF(VecVT), V1, + DownOffset, TrueMask, UpOffset); + return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VecVT, SlideDown, V2, UpOffset, + TrueMask, + DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT)); } SDValue @@ -5434,18 +5764,26 @@ RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, "Expecting a correctly-aligned load"); MVT VT = Op.getSimpleValueType(); + MVT XLenVT = Subtarget.getXLenVT(); MVT ContainerVT = getContainerForFixedLengthVector(VT); - SDValue VL = - DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); + SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); + bool IsMaskOp = VT.getVectorElementType() == MVT::i1; + SDValue IntID = DAG.getTargetConstant( + IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT); + SmallVector Ops{Load->getChain(), IntID}; + if (!IsMaskOp) + Ops.push_back(DAG.getUNDEF(ContainerVT)); + Ops.push_back(Load->getBasePtr()); + Ops.push_back(VL); SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); - SDValue NewLoad = DAG.getMemIntrinsicNode( - RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL}, - Load->getMemoryVT(), Load->getMemOperand()); + SDValue NewLoad = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, + Load->getMemoryVT(), Load->getMemOperand()); SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); - return DAG.getMergeValues({Result, Load->getChain()}, DL); + return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); } SDValue @@ -5461,6 +5799,7 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, SDValue StoreVal = Store->getValue(); MVT VT = StoreVal.getSimpleValueType(); + MVT XLenVT = Subtarget.getXLenVT(); // If the size less than a byte, we need to pad with zeros to make a byte. if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) { @@ -5472,14 +5811,17 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, MVT ContainerVT = getContainerForFixedLengthVector(VT); - SDValue VL = - DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); + SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); SDValue NewValue = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); + + bool IsMaskOp = VT.getVectorElementType() == MVT::i1; + SDValue IntID = DAG.getTargetConstant( + IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT); return DAG.getMemIntrinsicNode( - RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other), - {Store->getChain(), NewValue, Store->getBasePtr(), VL}, + ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), + {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL}, Store->getMemoryVT(), Store->getMemOperand()); } @@ -5514,8 +5856,7 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op, ContainerVT = getContainerForFixedLengthVector(VT); PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); if (!IsUnmasked) { - MVT MaskVT = - MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + MVT MaskVT = getMaskTypeFor(ContainerVT); Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); } } @@ -5581,8 +5922,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); if (!IsUnmasked) { - MVT MaskVT = - MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + MVT MaskVT = getMaskTypeFor(ContainerVT); Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); } } @@ -5620,8 +5960,8 @@ RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); - MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); - SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); + MVT MaskVT = getMaskTypeFor(ContainerVT); + SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2, Op.getOperand(2), Mask, VL); @@ -5667,9 +6007,9 @@ SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { SDValue Mask, VL; std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); - SDValue SplatZero = - DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, - DAG.getConstant(0, DL, Subtarget.getXLenVT())); + SDValue SplatZero = DAG.getNode( + RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), + DAG.getConstant(0, DL, Subtarget.getXLenVT())); SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL); SDValue Max = @@ -5787,15 +6127,260 @@ SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG, } if (!VT.isFixedLengthVector()) - return DAG.getNode(RISCVISDOpc, DL, VT, Ops); + return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags()); MVT ContainerVT = getContainerForFixedLengthVector(VT); - SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops); + SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags()); return convertFromScalableVector(VT, VPOp, DAG, Subtarget); } +SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + + SDValue Src = Op.getOperand(0); + // NOTE: Mask is dropped. + SDValue VL = Op.getOperand(2); + + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VT); + MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget); + } + + MVT XLenVT = Subtarget.getXLenVT(); + SDValue Zero = DAG.getConstant(0, DL, XLenVT); + SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), Zero, VL); + + SDValue SplatValue = DAG.getConstant( + Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT); + SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), SplatValue, VL); + + SDValue Result = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Src, + Splat, ZeroSplat, VL); + if (!VT.isFixedLengthVector()) + return Result; + return convertFromScalableVector(VT, Result, DAG, Subtarget); +} + +SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + + SDValue Op1 = Op.getOperand(0); + SDValue Op2 = Op.getOperand(1); + ISD::CondCode Condition = cast(Op.getOperand(2))->get(); + // NOTE: Mask is dropped. + SDValue VL = Op.getOperand(4); + + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VT); + Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); + Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); + } + + SDValue Result; + SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); + + switch (Condition) { + default: + break; + // X != Y --> (X^Y) + case ISD::SETNE: + Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL); + break; + // X == Y --> ~(X^Y) + case ISD::SETEQ: { + SDValue Temp = + DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL); + Result = + DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL); + break; + } + // X >s Y --> X == 0 & Y == 1 --> ~X & Y + // X X == 0 & Y == 1 --> ~X & Y + case ISD::SETGT: + case ISD::SETULT: { + SDValue Temp = + DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL); + Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL); + break; + } + // X X == 1 & Y == 0 --> ~Y & X + // X >u Y --> X == 1 & Y == 0 --> ~Y & X + case ISD::SETLT: + case ISD::SETUGT: { + SDValue Temp = + DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL); + Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL); + break; + } + // X >=s Y --> X == 0 | Y == 1 --> ~X | Y + // X <=u Y --> X == 0 | Y == 1 --> ~X | Y + case ISD::SETGE: + case ISD::SETULE: { + SDValue Temp = + DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL); + Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL); + break; + } + // X <=s Y --> X == 1 | Y == 0 --> ~Y | X + // X >=u Y --> X == 1 | Y == 0 --> ~Y | X + case ISD::SETLE: + case ISD::SETUGE: { + SDValue Temp = + DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL); + Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL); + break; + } + } + + if (!VT.isFixedLengthVector()) + return Result; + return convertFromScalableVector(VT, Result, DAG, Subtarget); +} + +// Lower Floating-Point/Integer Type-Convert VP SDNodes +SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG, + unsigned RISCVISDOpc) const { + SDLoc DL(Op); + + SDValue Src = Op.getOperand(0); + SDValue Mask = Op.getOperand(1); + SDValue VL = Op.getOperand(2); + + MVT DstVT = Op.getSimpleValueType(); + MVT SrcVT = Src.getSimpleValueType(); + if (DstVT.isFixedLengthVector()) { + DstVT = getContainerForFixedLengthVector(DstVT); + SrcVT = getContainerForFixedLengthVector(SrcVT); + Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget); + MVT MaskVT = getMaskTypeFor(DstVT); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + } + + unsigned RISCVISDExtOpc = (RISCVISDOpc == RISCVISD::SINT_TO_FP_VL || + RISCVISDOpc == RISCVISD::FP_TO_SINT_VL) + ? RISCVISD::VSEXT_VL + : RISCVISD::VZEXT_VL; + + unsigned DstEltSize = DstVT.getScalarSizeInBits(); + unsigned SrcEltSize = SrcVT.getScalarSizeInBits(); + + SDValue Result; + if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion. + if (SrcVT.isInteger()) { + assert(DstVT.isFloatingPoint() && "Wrong input/output vector types"); + + // Do we need to do any pre-widening before converting? + if (SrcEltSize == 1) { + MVT IntVT = DstVT.changeVectorElementTypeToInteger(); + MVT XLenVT = Subtarget.getXLenVT(); + SDValue Zero = DAG.getConstant(0, DL, XLenVT); + SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, + DAG.getUNDEF(IntVT), Zero, VL); + SDValue One = DAG.getConstant( + RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT); + SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, + DAG.getUNDEF(IntVT), One, VL); + Src = DAG.getNode(RISCVISD::VSELECT_VL, DL, IntVT, Src, OneSplat, + ZeroSplat, VL); + } else if (DstEltSize > (2 * SrcEltSize)) { + // Widen before converting. + MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2), + DstVT.getVectorElementCount()); + Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL); + } + + Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); + } else { + assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && + "Wrong input/output vector types"); + + // Convert f16 to f32 then convert f32 to i64. + if (DstEltSize > (2 * SrcEltSize)) { + assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); + MVT InterimFVT = + MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); + Src = + DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL); + } + + Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); + } + } else { // Narrowing + Conversion + if (SrcVT.isInteger()) { + assert(DstVT.isFloatingPoint() && "Wrong input/output vector types"); + // First do a narrowing convert to an FP type half the size, then round + // the FP type to a small FP type if needed. + + MVT InterimFVT = DstVT; + if (SrcEltSize > (2 * DstEltSize)) { + assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!"); + assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); + InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); + } + + Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL); + + if (InterimFVT != DstVT) { + Src = Result; + Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL); + } + } else { + assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && + "Wrong input/output vector types"); + // First do a narrowing conversion to an integer half the size, then + // truncate if needed. + + if (DstEltSize == 1) { + // First convert to the same size integer, then convert to mask using + // setcc. + assert(SrcEltSize >= 16 && "Unexpected FP type!"); + MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize), + DstVT.getVectorElementCount()); + Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); + + // Compare the integer result to 0. The integer should be 0 or 1/-1, + // otherwise the conversion was undefined. + MVT XLenVT = Subtarget.getXLenVT(); + SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); + SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT, + DAG.getUNDEF(InterimIVT), SplatZero); + Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, Result, SplatZero, + DAG.getCondCode(ISD::SETNE), Mask, VL); + } else { + MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), + DstVT.getVectorElementCount()); + + Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); + + while (InterimIVT != DstVT) { + SrcEltSize /= 2; + Src = Result; + InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), + DstVT.getVectorElementCount()); + Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT, + Src, Mask, VL); + } + } + } + } + + MVT VT = Op.getSimpleValueType(); + if (!VT.isFixedLengthVector()) + return Result; + return convertFromScalableVector(VT, Result, DAG, Subtarget); +} + SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const { @@ -5876,23 +6461,14 @@ SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op, MVT ContainerVT = VT; if (VT.isFixedLengthVector()) { - // We need to use the larger of the result and index type to determine the - // scalable type to use so we don't increase LMUL for any operand/result. - if (VT.bitsGE(IndexVT)) { - ContainerVT = getContainerForFixedLengthVector(VT); - IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), - ContainerVT.getVectorElementCount()); - } else { - IndexVT = getContainerForFixedLengthVector(IndexVT); - ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(), - IndexVT.getVectorElementCount()); - } + ContainerVT = getContainerForFixedLengthVector(VT); + IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), + ContainerVT.getVectorElementCount()); Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); if (!IsUnmasked) { - MVT MaskVT = - MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + MVT MaskVT = getMaskTypeFor(ContainerVT); Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); } @@ -5987,24 +6563,15 @@ SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op, MVT ContainerVT = VT; if (VT.isFixedLengthVector()) { - // We need to use the larger of the value and index type to determine the - // scalable type to use so we don't increase LMUL for any operand/result. - if (VT.bitsGE(IndexVT)) { - ContainerVT = getContainerForFixedLengthVector(VT); - IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), - ContainerVT.getVectorElementCount()); - } else { - IndexVT = getContainerForFixedLengthVector(IndexVT); - ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), - IndexVT.getVectorElementCount()); - } + ContainerVT = getContainerForFixedLengthVector(VT); + IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), + ContainerVT.getVectorElementCount()); Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); if (!IsUnmasked) { - MVT MaskVT = - MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + MVT MaskVT = getMaskTypeFor(ContainerVT); Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); } } @@ -6095,14 +6662,21 @@ SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op, RMValue); } +SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + + bool isRISCV64 = Subtarget.is64Bit(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false); + return DAG.getFrameIndex(FI, PtrVT); +} + static RISCVISD::NodeType getRISCVWOpcodeByIntr(unsigned IntNo) { switch (IntNo) { default: llvm_unreachable("Unexpected Intrinsic"); - case Intrinsic::riscv_grev: - return RISCVISD::GREVW; - case Intrinsic::riscv_gorc: - return RISCVISD::GORCW; case Intrinsic::riscv_bcompress: return RISCVISD::BCOMPRESSW; case Intrinsic::riscv_bdecompress: @@ -6121,9 +6695,12 @@ static SDValue customLegalizeToWOpByIntr(SDNode *N, SelectionDAG &DAG, unsigned IntNo) { SDLoc DL(N); RISCVISD::NodeType WOpcode = getRISCVWOpcodeByIntr(IntNo); - SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); - SDValue NewOp2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); - SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp1, NewOp2); + // Deal with the Instruction Operands + SmallVector NewOps; + for (SDValue Op : drop_begin(N->ops())) + // Promote the operand to i64 type + NewOps.push_back(DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op)); + SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOps); // ReplaceNodeResults requires we maintain the same type for the return value. return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); } @@ -6150,10 +6727,6 @@ static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { return RISCVISD::ROLW; case ISD::ROTR: return RISCVISD::RORW; - case RISCVISD::GREV: - return RISCVISD::GREVW; - case RISCVISD::GORC: - return RISCVISD::GORCW; } } @@ -6309,6 +6882,10 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); if (N->getOperand(1).getOpcode() != ISD::Constant) { + // If we can use a BSET instruction, allow default promotion to apply. + if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() && + isOneConstant(N->getOperand(0))) + break; Results.push_back(customLegalizeToWOp(N, DAG)); break; } @@ -6388,12 +6965,23 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, DAG.getValueType(MVT::i32)); - // Sign extend the LHS and perform an unsigned compare with the ADDW result. - // Since the inputs are sign extended from i32, this is equivalent to - // comparing the lower 32 bits. - LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); - SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS, - IsAdd ? ISD::SETULT : ISD::SETUGT); + SDValue Overflow; + if (IsAdd && isOneConstant(RHS)) { + // Special case uaddo X, 1 overflowed if the addition result is 0. + // The general case (X + C) < C is not necessarily beneficial. Although we + // reduce the live range of X, we may introduce the materialization of + // constant C, especially when the setcc result is used by branch. We have + // no compare with constant and branch instructions. + Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, + DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ); + } else { + // Sign extend the LHS and perform an unsigned compare with the ADDW + // result. Since the inputs are sign extended from i32, this is equivalent + // to comparing the lower 32 bits. + LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); + Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS, + IsAdd ? ISD::SETULT : ISD::SETUGT); + } Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); Results.push_back(Overflow); @@ -6421,6 +7009,33 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(expandAddSubSat(N, DAG)); return; } + case ISD::ABS: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + + // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y) + + SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); + + // Freeze the source so we can increase it's use count. + Src = DAG.getFreeze(Src); + + // Copy sign bit to all bits using the sraiw pattern. + SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src, + DAG.getValueType(MVT::i32)); + SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill, + DAG.getConstant(31, DL, MVT::i64)); + + SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill); + NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill); + + // NOTE: The result is only required to be anyextended, but sext is + // consistent with type legalization of sub. + NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes, + DAG.getValueType(MVT::i32)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); + return; + } case ISD::BITCAST: { EVT VT = N->getValueType(0); assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!"); @@ -6451,37 +7066,24 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, break; } case RISCVISD::GREV: - case RISCVISD::GORC: { - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && - "Unexpected custom legalisation"); - assert(isa(N->getOperand(1)) && "Expected constant"); - // This is similar to customLegalizeToWOp, except that we pass the second - // operand (a TargetConstant) straight through: it is already of type - // XLenVT. - RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); - SDValue NewOp0 = - DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); - SDValue NewOp1 = - DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); - SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); - // ReplaceNodeResults requires we maintain the same type for the return - // value. - Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); - break; - } + case RISCVISD::GORC: case RISCVISD::SHFL: { - // There is no SHFLIW instruction, but we can just promote the operation. - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + MVT VT = N->getSimpleValueType(0); + MVT XLenVT = Subtarget.getXLenVT(); + assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) && "Unexpected custom legalisation"); assert(isa(N->getOperand(1)) && "Expected constant"); - SDValue NewOp0 = - DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); + assert((Subtarget.hasStdExtZbp() || + (Subtarget.hasStdExtZbkb() && N->getOpcode() == RISCVISD::GREV && + N->getConstantOperandVal(1) == 7)) && + "Unexpected extension"); + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0)); SDValue NewOp1 = - DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); - SDValue NewRes = DAG.getNode(RISCVISD::SHFL, DL, MVT::i64, NewOp0, NewOp1); + DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1)); + SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp0, NewOp1); // ReplaceNodeResults requires we maintain the same type for the return // value. - Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes)); break; } case ISD::BSWAP: @@ -6496,9 +7098,8 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. if (N->getOpcode() == ISD::BSWAP) Imm &= ~0x7U; - unsigned Opc = Subtarget.is64Bit() ? RISCVISD::GREVW : RISCVISD::GREV; - SDValue GREVI = - DAG.getNode(Opc, DL, XLenVT, NewOp0, DAG.getConstant(Imm, DL, XLenVT)); + SDValue GREVI = DAG.getNode(RISCVISD::GREV, DL, XLenVT, NewOp0, + DAG.getConstant(Imm, DL, XLenVT)); // ReplaceNodeResults requires we maintain the same type for the return // value. Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, GREVI)); @@ -6564,9 +7165,8 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, MVT XLenVT = Subtarget.getXLenVT(); // Use a VL of 1 to avoid processing more elements than we need. - MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); SDValue VL = DAG.getConstant(1, DL, XLenVT); - SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); + SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); // Unless the index is known to be 0, we must slide the vector down to get // the desired element into index 0. @@ -6581,6 +7181,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, // To extract the upper XLEN bits of the vector element, shift the first // element right by 32 bits and re-extract the lower XLEN bits. SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), DAG.getConstant(32, DL, XLenVT), VL); SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV, Mask, VL); @@ -6597,38 +7198,42 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, llvm_unreachable( "Don't know how to custom type legalize this intrinsic!"); case Intrinsic::riscv_grev: - case Intrinsic::riscv_gorc: - case Intrinsic::riscv_bcompress: - case Intrinsic::riscv_bdecompress: - case Intrinsic::riscv_bfp: { + case Intrinsic::riscv_gorc: { assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); - Results.push_back(customLegalizeToWOpByIntr(N, DAG, IntNo)); + SDValue NewOp1 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewOp2 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); + unsigned Opc = + IntNo == Intrinsic::riscv_grev ? RISCVISD::GREVW : RISCVISD::GORCW; + // If the control is a constant, promote the node by clearing any extra + // bits bits in the control. isel will form greviw/gorciw if the result is + // sign extended. + if (isa(NewOp2)) { + NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, + DAG.getConstant(0x1f, DL, MVT::i64)); + Opc = IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC; + } + SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); break; } + case Intrinsic::riscv_bcompress: + case Intrinsic::riscv_bdecompress: + case Intrinsic::riscv_bfp: case Intrinsic::riscv_fsl: case Intrinsic::riscv_fsr: { assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); - SDValue NewOp1 = - DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); - SDValue NewOp2 = - DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); - SDValue NewOp3 = - DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)); - unsigned Opc = getRISCVWOpcodeByIntr(IntNo); - SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2, NewOp3); - Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + Results.push_back(customLegalizeToWOpByIntr(N, DAG, IntNo)); break; } case Intrinsic::riscv_orc_b: { // Lower to the GORCI encoding for orc.b with the operand extended. SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); - // If Zbp is enabled, use GORCIW which will sign extend the result. - unsigned Opc = - Subtarget.hasStdExtZbp() ? RISCVISD::GORCW : RISCVISD::GORC; - SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp, + SDValue Res = DAG.getNode(RISCVISD::GORC, DL, MVT::i64, NewOp, DAG.getConstant(7, DL, MVT::i64)); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); return; @@ -6681,10 +7286,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, // To extract the upper XLEN bits of the vector element, shift the first // element right by 32 bits and re-extract the lower XLEN bits. SDValue VL = DAG.getConstant(1, DL, XLenVT); - MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); - SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); - SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, - DAG.getConstant(32, DL, XLenVT), VL); + SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG); + + SDValue ThirtyTwoV = + DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), + DAG.getConstant(32, DL, XLenVT), VL); SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL); SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); @@ -6840,6 +7446,110 @@ static Optional matchGREVIPat(SDValue Op) { return matchRISCVBitmanipPat(Op, BitmanipMasks); } +// Try to fold ( x, (reduction. vec, start)) +static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG) { + auto BinOpToRVVReduce = [](unsigned Opc) { + switch (Opc) { + default: + llvm_unreachable("Unhandled binary to transfrom reduction"); + case ISD::ADD: + return RISCVISD::VECREDUCE_ADD_VL; + case ISD::UMAX: + return RISCVISD::VECREDUCE_UMAX_VL; + case ISD::SMAX: + return RISCVISD::VECREDUCE_SMAX_VL; + case ISD::UMIN: + return RISCVISD::VECREDUCE_UMIN_VL; + case ISD::SMIN: + return RISCVISD::VECREDUCE_SMIN_VL; + case ISD::AND: + return RISCVISD::VECREDUCE_AND_VL; + case ISD::OR: + return RISCVISD::VECREDUCE_OR_VL; + case ISD::XOR: + return RISCVISD::VECREDUCE_XOR_VL; + case ISD::FADD: + return RISCVISD::VECREDUCE_FADD_VL; + case ISD::FMAXNUM: + return RISCVISD::VECREDUCE_FMAX_VL; + case ISD::FMINNUM: + return RISCVISD::VECREDUCE_FMIN_VL; + } + }; + + auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) { + return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isNullConstant(V.getOperand(1)) && + V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc); + }; + + unsigned Opc = N->getOpcode(); + unsigned ReduceIdx; + if (IsReduction(N->getOperand(0), Opc)) + ReduceIdx = 0; + else if (IsReduction(N->getOperand(1), Opc)) + ReduceIdx = 1; + else + return SDValue(); + + // Skip if FADD disallows reassociation but the combiner needs. + if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation()) + return SDValue(); + + SDValue Extract = N->getOperand(ReduceIdx); + SDValue Reduce = Extract.getOperand(0); + if (!Reduce.hasOneUse()) + return SDValue(); + + SDValue ScalarV = Reduce.getOperand(2); + + // Make sure that ScalarV is a splat with VL=1. + if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL && + ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL && + ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL) + return SDValue(); + + if (!isOneConstant(ScalarV.getOperand(2))) + return SDValue(); + + // TODO: Deal with value other than neutral element. + auto IsRVVNeutralElement = [Opc, &DAG](SDNode *N, SDValue V) { + if (Opc == ISD::FADD && N->getFlags().hasNoSignedZeros() && + isNullFPConstant(V)) + return true; + return DAG.getNeutralElement(Opc, SDLoc(V), V.getSimpleValueType(), + N->getFlags()) == V; + }; + + // Check the scalar of ScalarV is neutral element + if (!IsRVVNeutralElement(N, ScalarV.getOperand(1))) + return SDValue(); + + if (!ScalarV.hasOneUse()) + return SDValue(); + + EVT SplatVT = ScalarV.getValueType(); + SDValue NewStart = N->getOperand(1 - ReduceIdx); + unsigned SplatOpc = RISCVISD::VFMV_S_F_VL; + if (SplatVT.isInteger()) { + auto *C = dyn_cast(NewStart.getNode()); + if (!C || C->isZero() || !isInt<5>(C->getSExtValue())) + SplatOpc = RISCVISD::VMV_S_X_VL; + else + SplatOpc = RISCVISD::VMV_V_X_VL; + } + + SDValue NewScalarV = + DAG.getNode(SplatOpc, SDLoc(N), SplatVT, ScalarV.getOperand(0), NewStart, + ScalarV.getOperand(2)); + SDValue NewReduce = + DAG.getNode(Reduce.getOpcode(), SDLoc(Reduce), Reduce.getValueType(), + Reduce.getOperand(0), Reduce.getOperand(1), NewScalarV, + Reduce.getOperand(3), Reduce.getOperand(4)); + return DAG.getNode(Extract.getOpcode(), SDLoc(Extract), + Extract.getValueType(), NewReduce, Extract.getOperand(1)); +} + // Match the following pattern as a GREVI(W) operation // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, @@ -7066,11 +7776,70 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT)); } +// Combine +// ROTR ((GREVI x, 24), 16) -> (GREVI x, 8) for RV32 +// ROTL ((GREVI x, 24), 16) -> (GREVI x, 8) for RV32 +// ROTR ((GREVI x, 56), 32) -> (GREVI x, 24) for RV64 +// ROTL ((GREVI x, 56), 32) -> (GREVI x, 24) for RV64 +// RORW ((GREVI x, 24), 16) -> (GREVIW x, 8) for RV64 +// ROLW ((GREVI x, 24), 16) -> (GREVIW x, 8) for RV64 +// The grev patterns represents BSWAP. +// FIXME: This can be generalized to any GREV. We just need to toggle the MSB +// off the grev. +static SDValue combineROTR_ROTL_RORW_ROLW(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + bool IsWInstruction = + N->getOpcode() == RISCVISD::RORW || N->getOpcode() == RISCVISD::ROLW; + assert((N->getOpcode() == ISD::ROTR || N->getOpcode() == ISD::ROTL || + IsWInstruction) && + "Unexpected opcode!"); + SDValue Src = N->getOperand(0); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + if (!Subtarget.hasStdExtZbp() || Src.getOpcode() != RISCVISD::GREV) + return SDValue(); + + if (!isa(N->getOperand(1)) || + !isa(Src.getOperand(1))) + return SDValue(); + + unsigned BitWidth = IsWInstruction ? 32 : VT.getSizeInBits(); + assert(isPowerOf2_32(BitWidth) && "Expected a power of 2"); + + // Needs to be a rotate by half the bitwidth for ROTR/ROTL or by 16 for + // RORW/ROLW. And the grev should be the encoding for bswap for this width. + unsigned ShAmt1 = N->getConstantOperandVal(1); + unsigned ShAmt2 = Src.getConstantOperandVal(1); + if (BitWidth < 32 || ShAmt1 != (BitWidth / 2) || ShAmt2 != (BitWidth - 8)) + return SDValue(); + + Src = Src.getOperand(0); + + // Toggle bit the MSB of the shift. + unsigned CombinedShAmt = ShAmt1 ^ ShAmt2; + if (CombinedShAmt == 0) + return Src; + + SDValue Res = DAG.getNode( + RISCVISD::GREV, DL, VT, Src, + DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType())); + if (!IsWInstruction) + return Res; + + // Sign extend the result to match the behavior of the rotate. This will be + // selected to GREVIW in isel. + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Res, + DAG.getValueType(MVT::i32)); +} + // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does // not undo itself, but they are redundant. static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { + bool IsGORC = N->getOpcode() == RISCVISD::GORC; + assert((IsGORC || N->getOpcode() == RISCVISD::GREV) && "Unexpected opcode"); SDValue Src = N->getOperand(0); if (Src.getOpcode() != N->getOpcode()) @@ -7085,7 +7854,7 @@ static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { Src = Src.getOperand(0); unsigned CombinedShAmt; - if (N->getOpcode() == RISCVISD::GORC || N->getOpcode() == RISCVISD::GORCW) + if (IsGORC) CombinedShAmt = ShAmt1 | ShAmt2; else CombinedShAmt = ShAmt1 ^ ShAmt2; @@ -7203,6 +7972,11 @@ static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, auto *N1C = dyn_cast(N->getOperand(1)); if (!N0C || !N1C) return SDValue(); + // If N0C has multiple uses it's possible one of the cases in + // DAGCombiner::isMulAddWithConstProfitable will be true, which would result + // in an infinite loop. + if (!N0C->hasOneUse()) + return SDValue(); int64_t C0 = N0C->getSExtValue(); int64_t C1 = N1C->getSExtValue(); int64_t CA, CB; @@ -7238,6 +8012,8 @@ static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, return V; if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) return V; + if (SDValue V = combineBinOpToReduce(N, DAG)) + return V; // fold (add (select lhs, rhs, cc, 0, y), x) -> // (select lhs, rhs, cc, x, (add x, y)) return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false); @@ -7251,7 +8027,30 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG) { return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false); } -static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDValue N0 = N->getOperand(0); + // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero + // extending X. This is safe since we only need the LSB after the shift and + // shift amounts larger than 31 would produce poison. If we wait until + // type legalization, we'll create RISCVISD::SRLW and we can't recover it + // to use a BEXT instruction. + if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && + N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) && + N0.getOpcode() == ISD::SRL && !isa(N0.getOperand(1)) && + N0.hasOneUse()) { + SDLoc DL(N); + SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); + SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); + SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); + SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl, + DAG.getConstant(1, DL, MVT::i64)); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); + } + + if (SDValue V = combineBinOpToReduce(N, DAG)) + return V; + // fold (and (select lhs, rhs, cc, -1, y), x) -> // (select lhs, rhs, cc, x, (and x, y)) return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true); @@ -7268,99 +8067,197 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return SHFL; } + if (SDValue V = combineBinOpToReduce(N, DAG)) + return V; // fold (or (select cond, 0, y), x) -> // (select cond, x, (or x, y)) return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false); } static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // fold (xor (sllw 1, x), -1) -> (rolw ~1, x) + // NOTE: Assumes ROL being legal means ROLW is legal. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (N0.getOpcode() == RISCVISD::SLLW && + isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) && + TLI.isOperationLegal(ISD::ROTL, MVT::i64)) { + SDLoc DL(N); + return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64, + DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1)); + } + + if (SDValue V = combineBinOpToReduce(N, DAG)) + return V; // fold (xor (select cond, 0, y), x) -> // (select cond, x, (xor x, y)) return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false); } -// Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND -// has users that require SIGN_EXTEND and the SIGN_EXTEND can be done for free -// by an instruction like ADDW/SUBW/MULW. Without this the ANY_EXTEND would be -// removed during type legalization leaving an ADD/SUB/MUL use that won't use -// ADDW/SUBW/MULW. -static SDValue performANY_EXTENDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const RISCVSubtarget &Subtarget) { - if (!Subtarget.is64Bit()) - return SDValue(); - - SelectionDAG &DAG = DCI.DAG; - +static SDValue +performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { SDValue Src = N->getOperand(0); EVT VT = N->getValueType(0); - if (VT != MVT::i64 || Src.getValueType() != MVT::i32) - return SDValue(); - // The opcode must be one that can implicitly sign_extend. - // FIXME: Additional opcodes. - switch (Src.getOpcode()) { - default: - return SDValue(); - case ISD::MUL: - if (!Subtarget.hasStdExtM()) - return SDValue(); - LLVM_FALLTHROUGH; - case ISD::ADD: - case ISD::SUB: - break; + // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X) + if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH && + cast(N->getOperand(1))->getVT().bitsGE(MVT::i16)) + return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT, + Src.getOperand(0)); + + // Fold (i64 (sext_inreg (abs X), i32)) -> + // (i64 (smax (sext_inreg (neg X), i32), X)) if X has more than 32 sign bits. + // The (sext_inreg (neg X), i32) will be selected to negw by isel. This + // pattern occurs after type legalization of (i32 (abs X)) on RV64 if the user + // of the (i32 (abs X)) is a sext or setcc or something else that causes type + // legalization to add a sext_inreg after the abs. The (i32 (abs X)) will have + // been type legalized to (i64 (abs (sext_inreg X, i32))), but the sext_inreg + // may get combined into an earlier operation so we need to use + // ComputeNumSignBits. + // NOTE: (i64 (sext_inreg (abs X), i32)) can also be created for + // (i64 (ashr (shl (abs X), 32), 32)) without any type legalization so + // we can't assume that X has 33 sign bits. We must check. + if (Subtarget.hasStdExtZbb() && Subtarget.is64Bit() && + Src.getOpcode() == ISD::ABS && Src.hasOneUse() && VT == MVT::i64 && + cast(N->getOperand(1))->getVT() == MVT::i32 && + DAG.ComputeNumSignBits(Src.getOperand(0)) > 32) { + SDLoc DL(N); + SDValue Freeze = DAG.getFreeze(Src.getOperand(0)); + SDValue Neg = + DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, MVT::i64), Freeze); + Neg = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Neg, + DAG.getValueType(MVT::i32)); + return DAG.getNode(ISD::SMAX, DL, MVT::i64, Freeze, Neg); } - // Only handle cases where the result is used by a CopyToReg. That likely - // means the value is a liveout of the basic block. This helps prevent - // infinite combine loops like PR51206. - if (none_of(N->uses(), - [](SDNode *User) { return User->getOpcode() == ISD::CopyToReg; })) - return SDValue(); + return SDValue(); +} - SmallVector SetCCs; - for (SDNode::use_iterator UI = Src.getNode()->use_begin(), - UE = Src.getNode()->use_end(); - UI != UE; ++UI) { - SDNode *User = *UI; - if (User == N) - continue; - if (UI.getUse().getResNo() != Src.getResNo()) - continue; - // All i32 setccs are legalized by sign extending operands. - if (User->getOpcode() == ISD::SETCC) { - SetCCs.push_back(User); - continue; - } - // We don't know if we can extend this user. - break; +// Try to form vwadd(u).wv/wx or vwsub(u).wv/wx. It might later be optimized to +// vwadd(u).vv/vx or vwsub(u).vv/vx. +static SDValue combineADDSUB_VLToVWADDSUB_VL(SDNode *N, SelectionDAG &DAG, + bool Commute = false) { + assert((N->getOpcode() == RISCVISD::ADD_VL || + N->getOpcode() == RISCVISD::SUB_VL) && + "Unexpected opcode"); + bool IsAdd = N->getOpcode() == RISCVISD::ADD_VL; + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + if (Commute) + std::swap(Op0, Op1); + + MVT VT = N->getSimpleValueType(0); + + // Determine the narrow size for a widening add/sub. + unsigned NarrowSize = VT.getScalarSizeInBits() / 2; + MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize), + VT.getVectorElementCount()); + + SDValue Mask = N->getOperand(2); + SDValue VL = N->getOperand(3); + + SDLoc DL(N); + + // If the RHS is a sext or zext, we can form a widening op. + if ((Op1.getOpcode() == RISCVISD::VZEXT_VL || + Op1.getOpcode() == RISCVISD::VSEXT_VL) && + Op1.hasOneUse() && Op1.getOperand(1) == Mask && Op1.getOperand(2) == VL) { + unsigned ExtOpc = Op1.getOpcode(); + Op1 = Op1.getOperand(0); + // Re-introduce narrower extends if needed. + if (Op1.getValueType() != NarrowVT) + Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL); + + unsigned WOpc; + if (ExtOpc == RISCVISD::VSEXT_VL) + WOpc = IsAdd ? RISCVISD::VWADD_W_VL : RISCVISD::VWSUB_W_VL; + else + WOpc = IsAdd ? RISCVISD::VWADDU_W_VL : RISCVISD::VWSUBU_W_VL; + + return DAG.getNode(WOpc, DL, VT, Op0, Op1, Mask, VL); } - // If we don't have any SetCCs, this isn't worthwhile. - if (SetCCs.empty()) - return SDValue(); + // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar + // sext/zext? + + return SDValue(); +} + +// Try to convert vwadd(u).wv/wx or vwsub(u).wv/wx to vwadd(u).vv/vx or +// vwsub(u).vv/vx. +static SDValue combineVWADD_W_VL_VWSUB_W_VL(SDNode *N, SelectionDAG &DAG) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue Mask = N->getOperand(2); + SDValue VL = N->getOperand(3); + + MVT VT = N->getSimpleValueType(0); + MVT NarrowVT = Op1.getSimpleValueType(); + unsigned NarrowSize = NarrowVT.getScalarSizeInBits(); + + unsigned VOpc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected opcode"); + case RISCVISD::VWADD_W_VL: VOpc = RISCVISD::VWADD_VL; break; + case RISCVISD::VWSUB_W_VL: VOpc = RISCVISD::VWSUB_VL; break; + case RISCVISD::VWADDU_W_VL: VOpc = RISCVISD::VWADDU_VL; break; + case RISCVISD::VWSUBU_W_VL: VOpc = RISCVISD::VWSUBU_VL; break; + } + + bool IsSigned = N->getOpcode() == RISCVISD::VWADD_W_VL || + N->getOpcode() == RISCVISD::VWSUB_W_VL; SDLoc DL(N); - SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src); - DCI.CombineTo(N, SExt); - // Promote all the setccs. - for (SDNode *SetCC : SetCCs) { - SmallVector Ops; + // If the LHS is a sext or zext, we can narrow this op to the same size as + // the RHS. + if (((Op0.getOpcode() == RISCVISD::VZEXT_VL && !IsSigned) || + (Op0.getOpcode() == RISCVISD::VSEXT_VL && IsSigned)) && + Op0.hasOneUse() && Op0.getOperand(1) == Mask && Op0.getOperand(2) == VL) { + unsigned ExtOpc = Op0.getOpcode(); + Op0 = Op0.getOperand(0); + // Re-introduce narrower extends if needed. + if (Op0.getValueType() != NarrowVT) + Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL); + return DAG.getNode(VOpc, DL, VT, Op0, Op1, Mask, VL); + } - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Src) - Ops.push_back(SExt); - else - Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, SOp)); + bool IsAdd = N->getOpcode() == RISCVISD::VWADD_W_VL || + N->getOpcode() == RISCVISD::VWADDU_W_VL; + + // Look for splats on the left hand side of a vwadd(u).wv. We might be able + // to commute and use a vwadd(u).vx instead. + if (IsAdd && Op0.getOpcode() == RISCVISD::VMV_V_X_VL && + Op0.getOperand(0).isUndef() && Op0.getOperand(2) == VL) { + Op0 = Op0.getOperand(1); + + // See if have enough sign bits or zero bits in the scalar to use a + // widening add/sub by splatting to smaller element size. + unsigned EltBits = VT.getScalarSizeInBits(); + unsigned ScalarBits = Op0.getValueSizeInBits(); + // Make sure we're getting all element bits from the scalar register. + // FIXME: Support implicit sign extension of vmv.v.x? + if (ScalarBits < EltBits) + return SDValue(); + + if (IsSigned) { + if (DAG.ComputeNumSignBits(Op0) <= (ScalarBits - NarrowSize)) + return SDValue(); + } else { + APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize); + if (!DAG.MaskedValueIsZero(Op0, Mask)) + return SDValue(); } - Ops.push_back(SetCC->getOperand(2)); - DCI.CombineTo(SetCC, - DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); + Op0 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, + DAG.getUNDEF(NarrowVT), Op0, VL); + return DAG.getNode(VOpc, DL, VT, Op1, Op0, Mask, VL); } - return SDValue(N, 0); + + return SDValue(); } // Try to form VWMUL, VWMULU or VWMULSU. @@ -7408,12 +8305,15 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG, } else if (Op1.getOpcode() == RISCVISD::VMV_V_X_VL) { // The operand is a splat of a scalar. + // The pasthru must be undef for tail agnostic + if (!Op1.getOperand(0).isUndef()) + return SDValue(); // The VL must be the same. - if (Op1.getOperand(1) != VL) + if (Op1.getOperand(2) != VL) return SDValue(); // Get the scalar value. - Op1 = Op1.getOperand(0); + Op1 = Op1.getOperand(1); // See if have enough sign bits or zero bits in the scalar to use a // widening multiply by splatting to smaller element size. @@ -7424,16 +8324,20 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG, if (ScalarBits < EltBits) return SDValue(); - if (IsSignExt) { - if (DAG.ComputeNumSignBits(Op1) <= (ScalarBits - NarrowSize)) - return SDValue(); + // If the LHS is a sign extend, try to use vwmul. + if (IsSignExt && DAG.ComputeNumSignBits(Op1) > (ScalarBits - NarrowSize)) { + // Can use vwmul. } else { + // Otherwise try to use vwmulu or vwmulsu. APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize); - if (!DAG.MaskedValueIsZero(Op1, Mask)) + if (DAG.MaskedValueIsZero(Op1, Mask)) + IsVWMULSU = IsSignExt; + else return SDValue(); } - Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op1, VL); + Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, + DAG.getUNDEF(NarrowVT), Op1, VL); } else return SDValue(); @@ -7443,6 +8347,8 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG, unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL; if (Op0.getValueType() != NarrowVT) Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL); + // vwmulsu requires second operand to be zero extended. + ExtOpc = IsVWMULSU ? RISCVISD::VZEXT_VL : ExtOpc; if (Op1.getValueType() != NarrowVT) Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL); @@ -7569,6 +8475,133 @@ static SDValue performFP_TO_INT_SATCombine(SDNode *N, return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); } +// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is +// smaller than XLenVT. +static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(Subtarget.hasStdExtZbkb() && "Unexpected extension"); + + SDValue Src = N->getOperand(0); + if (Src.getOpcode() != ISD::BSWAP) + return SDValue(); + + EVT VT = N->getValueType(0); + if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() || + !isPowerOf2_32(VT.getSizeInBits())) + return SDValue(); + + SDLoc DL(N); + return DAG.getNode(RISCVISD::GREV, DL, VT, Src.getOperand(0), + DAG.getConstant(7, DL, VT)); +} + +// Convert from one FMA opcode to another based on whether we are negating the +// multiply result and/or the accumulator. +// NOTE: Only supports RVV operations with VL. +static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { + assert((NegMul || NegAcc) && "Not negating anything?"); + + // Negating the multiply result changes ADD<->SUB and toggles 'N'. + if (NegMul) { + // clang-format off + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode"); + case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; + case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; + case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; + case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; + } + // clang-format on + } + + // Negating the accumulator changes ADD<->SUB. + if (NegAcc) { + // clang-format off + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode"); + case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; + case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; + case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; + case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; + } + // clang-format on + } + + return Opcode; +} + +// Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C) +// FIXME: Should this be a generic combine? There's a similar combine on X86. +// +// Also try these folds where an add or sub is in the middle. +// (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C) +// (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C) +static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(N->getOpcode() == ISD::SRA && "Unexpected opcode"); + + if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit()) + return SDValue(); + + auto *ShAmtC = dyn_cast(N->getOperand(1)); + if (!ShAmtC || ShAmtC->getZExtValue() > 32) + return SDValue(); + + SDValue N0 = N->getOperand(0); + + SDValue Shl; + ConstantSDNode *AddC = nullptr; + + // We might have an ADD or SUB between the SRA and SHL. + bool IsAdd = N0.getOpcode() == ISD::ADD; + if ((IsAdd || N0.getOpcode() == ISD::SUB)) { + if (!N0.hasOneUse()) + return SDValue(); + // Other operand needs to be a constant we can modify. + AddC = dyn_cast(N0.getOperand(IsAdd ? 1 : 0)); + if (!AddC) + return SDValue(); + + // AddC needs to have at least 32 trailing zeros. + if (AddC->getAPIntValue().countTrailingZeros() < 32) + return SDValue(); + + Shl = N0.getOperand(IsAdd ? 0 : 1); + } else { + // Not an ADD or SUB. + Shl = N0; + } + + // Look for a shift left by 32. + if (Shl.getOpcode() != ISD::SHL || !Shl.hasOneUse() || + !isa(Shl.getOperand(1)) || + Shl.getConstantOperandVal(1) != 32) + return SDValue(); + + SDLoc DL(N); + SDValue In = Shl.getOperand(0); + + // If we looked through an ADD or SUB, we need to rebuild it with the shifted + // constant. + if (AddC) { + SDValue ShiftedAddC = + DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64); + if (IsAdd) + In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC); + else + In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In); + } + + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In, + DAG.getValueType(MVT::i32)); + if (ShAmtC->getZExtValue() == 32) + return SExt; + + return DAG.getNode( + ISD::SHL, DL, MVT::i64, SExt, + DAG.getConstant(32 - ShAmtC->getZExtValue(), DL, MVT::i64)); +} + SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -7597,6 +8630,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, if (Op0->getOpcode() == RISCVISD::BuildPairF64) return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); + if (Op0->isUndef()) { + SDValue Lo = DAG.getUNDEF(MVT::i32); + SDValue Hi = DAG.getUNDEF(MVT::i32); + return DCI.CombineTo(N, Lo, Hi); + } + SDLoc DL(N); // It's cheaper to materialise two 32-bit integers than to load a double @@ -7634,15 +8673,27 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, } case RISCVISD::SLLW: case RISCVISD::SRAW: - case RISCVISD::SRLW: - case RISCVISD::ROLW: - case RISCVISD::RORW: { + case RISCVISD::SRLW: { // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. if (SimplifyDemandedLowBitsHelper(0, 32) || SimplifyDemandedLowBitsHelper(1, 5)) return SDValue(N, 0); + break; } + case ISD::ROTR: + case ISD::ROTL: + case RISCVISD::RORW: + case RISCVISD::ROLW: { + if (N->getOpcode() == RISCVISD::RORW || N->getOpcode() == RISCVISD::ROLW) { + // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. + if (SimplifyDemandedLowBitsHelper(0, 32) || + SimplifyDemandedLowBitsHelper(1, 5)) + return SDValue(N, 0); + } + + return combineROTR_ROTL_RORW_ROLW(N, DAG, Subtarget); + } case RISCVISD::CLZW: case RISCVISD::CTZW: { // Only the lower 32 bits of the first operand are read @@ -7667,7 +8718,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, SimplifyDemandedLowBitsHelper(1, 5)) return SDValue(N, 0); - return combineGREVI_GORCI(N, DAG); + break; } case RISCVISD::SHFL: case RISCVISD::UNSHFL: { @@ -7682,10 +8733,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case RISCVISD::SHFLW: case RISCVISD::UNSHFLW: { // Only the lower 32 bits of LHS and lower 4 bits of RHS are read. - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); - APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 4); if (SimplifyDemandedLowBitsHelper(0, 32) || SimplifyDemandedLowBitsHelper(1, 4)) return SDValue(N, 0); @@ -7701,6 +8748,21 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, break; } + case RISCVISD::FSR: + case RISCVISD::FSL: + case RISCVISD::FSRW: + case RISCVISD::FSLW: { + bool IsWInstruction = + N->getOpcode() == RISCVISD::FSRW || N->getOpcode() == RISCVISD::FSLW; + unsigned BitWidth = + IsWInstruction ? 32 : N->getSimpleValueType(0).getSizeInBits(); + assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); + // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read. + if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) + 1)) + return SDValue(N, 0); + + break; + } case RISCVISD::FMV_X_ANYEXTH: case RISCVISD::FMV_X_ANYEXTW_RV64: { SDLoc DL(N); @@ -7727,7 +8789,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, break; SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0)); unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16; - APInt SignBit = APInt::getSignMask(FPBits).sextOrSelf(VT.getSizeInBits()); + APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits()); if (Op0.getOpcode() == ISD::FNEG) return DAG.getNode(ISD::XOR, DL, VT, NewFMV, DAG.getConstant(SignBit, DL, VT)); @@ -7741,13 +8803,21 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SUB: return performSUBCombine(N, DAG); case ISD::AND: - return performANDCombine(N, DAG); + return performANDCombine(N, DAG, Subtarget); case ISD::OR: return performORCombine(N, DAG, Subtarget); case ISD::XOR: return performXORCombine(N, DAG); - case ISD::ANY_EXTEND: - return performANY_EXTENDCombine(N, DCI, Subtarget); + case ISD::FADD: + case ISD::UMAX: + case ISD::UMIN: + case ISD::SMAX: + case ISD::SMIN: + case ISD::FMAXNUM: + case ISD::FMINNUM: + return combineBinOpToReduce(N, DAG); + case ISD::SIGN_EXTEND_INREG: + return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); case ISD::ZERO_EXTEND: // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during // type legalization. This is safe because fp_to_uint produces poison if @@ -7879,6 +8949,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, } break; } + case ISD::BITREVERSE: + return performBITREVERSECombine(N, DAG, Subtarget); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return performFP_TO_INTCombine(N, DCI, Subtarget); @@ -7952,40 +9024,41 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DL, IndexVT, Index); } - unsigned Scale = cast(ScaleOp)->getZExtValue(); - if (IsIndexScaled && Scale != 1) { - // Manually scale the indices by the element size. + if (IsIndexScaled) { + // Manually scale the indices. // TODO: Sanitize the scale operand here? // TODO: For VP nodes, should we use VP_SHL here? + unsigned Scale = cast(ScaleOp)->getZExtValue(); assert(isPowerOf2_32(Scale) && "Expecting power-of-two types"); SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT); Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale); + ScaleOp = DAG.getTargetConstant(1, DL, ScaleOp.getValueType()); } - ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED; + ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_SCALED; if (const auto *VPGN = dyn_cast(N)) return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, {VPGN->getChain(), VPGN->getBasePtr(), Index, - VPGN->getScale(), VPGN->getMask(), + ScaleOp, VPGN->getMask(), VPGN->getVectorLength()}, VPGN->getMemOperand(), NewIndexTy); if (const auto *VPSN = dyn_cast(N)) return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, {VPSN->getChain(), VPSN->getValue(), - VPSN->getBasePtr(), Index, VPSN->getScale(), + VPSN->getBasePtr(), Index, ScaleOp, VPSN->getMask(), VPSN->getVectorLength()}, VPSN->getMemOperand(), NewIndexTy); if (const auto *MGN = dyn_cast(N)) return DAG.getMaskedGather( N->getVTList(), MGN->getMemoryVT(), DL, {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), - MGN->getBasePtr(), Index, MGN->getScale()}, + MGN->getBasePtr(), Index, ScaleOp}, MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType()); const auto *MSN = cast(N); return DAG.getMaskedScatter( N->getVTList(), MSN->getMemoryVT(), DL, {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), - Index, MSN->getScale()}, + Index, ScaleOp}, MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore()); } case RISCVISD::SRA_VL: @@ -7997,14 +9070,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, SDLoc DL(N); SDValue VL = N->getOperand(3); EVT VT = N->getValueType(0); - ShAmt = - DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, ShAmt.getOperand(0), VL); + ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), + ShAmt.getOperand(1), VL); return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt, N->getOperand(2), N->getOperand(3)); } break; } case ISD::SRA: + if (SDValue V = performSRACombine(N, DAG, Subtarget)) + return V; + LLVM_FALLTHROUGH; case ISD::SRL: case ISD::SHL: { SDValue ShAmt = N->getOperand(1); @@ -8012,17 +9088,63 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, // We don't need the upper 32 bits of a 64-bit element for a shift amount. SDLoc DL(N); EVT VT = N->getValueType(0); - ShAmt = - DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT, ShAmt.getOperand(0)); + ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), + ShAmt.getOperand(1), + DAG.getRegister(RISCV::X0, Subtarget.getXLenVT())); return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt); } break; } + case RISCVISD::ADD_VL: + if (SDValue V = combineADDSUB_VLToVWADDSUB_VL(N, DAG, /*Commute*/ false)) + return V; + return combineADDSUB_VLToVWADDSUB_VL(N, DAG, /*Commute*/ true); + case RISCVISD::SUB_VL: + return combineADDSUB_VLToVWADDSUB_VL(N, DAG); + case RISCVISD::VWADD_W_VL: + case RISCVISD::VWADDU_W_VL: + case RISCVISD::VWSUB_W_VL: + case RISCVISD::VWSUBU_W_VL: + return combineVWADD_W_VL_VWSUB_W_VL(N, DAG); case RISCVISD::MUL_VL: if (SDValue V = combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ false)) return V; // Mul is commutative. return combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ true); + case RISCVISD::VFMADD_VL: + case RISCVISD::VFNMADD_VL: + case RISCVISD::VFMSUB_VL: + case RISCVISD::VFNMSUB_VL: { + // Fold FNEG_VL into FMA opcodes. + SDValue A = N->getOperand(0); + SDValue B = N->getOperand(1); + SDValue C = N->getOperand(2); + SDValue Mask = N->getOperand(3); + SDValue VL = N->getOperand(4); + + auto invertIfNegative = [&Mask, &VL](SDValue &V) { + if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask && + V.getOperand(2) == VL) { + // Return the negated input. + V = V.getOperand(0); + return true; + } + + return false; + }; + + bool NegA = invertIfNegative(A); + bool NegB = invertIfNegative(B); + bool NegC = invertIfNegative(C); + + // If no operands are negated, we're done. + if (!NegA && !NegB && !NegC) + return SDValue(); + + unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC); + return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask, + VL); + } case ISD::STORE: { auto *Store = cast(N); SDValue Val = Store->getValue(); @@ -8035,7 +9157,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, // The memory VT and the element type must match. if (VecVT.getVectorElementType() == MemVT) { SDLoc DL(N); - MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); + MVT MaskVT = getMaskTypeFor(VecVT); return DAG.getStoreVP( Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(), DAG.getConstant(1, DL, MaskVT), @@ -8047,6 +9169,73 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, break; } + case ISD::SPLAT_VECTOR: { + EVT VT = N->getValueType(0); + // Only perform this combine on legal MVT types. + if (!isTypeLegal(VT)) + break; + if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N, + DAG, Subtarget)) + return Gather; + break; + } + case RISCVISD::VMV_V_X_VL: { + // Tail agnostic VMV.V.X only demands the vector element bitwidth from the + // scalar input. + unsigned ScalarSize = N->getOperand(1).getValueSizeInBits(); + unsigned EltWidth = N->getValueType(0).getScalarSizeInBits(); + if (ScalarSize > EltWidth && N->getOperand(0).isUndef()) + if (SimplifyDemandedLowBitsHelper(1, EltWidth)) + return SDValue(N, 0); + + break; + } + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = N->getConstantOperandVal(0); + switch (IntNo) { + // By default we do not combine any intrinsic. + default: + return SDValue(); + case Intrinsic::riscv_vcpop: + case Intrinsic::riscv_vcpop_mask: + case Intrinsic::riscv_vfirst: + case Intrinsic::riscv_vfirst_mask: { + SDValue VL = N->getOperand(2); + if (IntNo == Intrinsic::riscv_vcpop_mask || + IntNo == Intrinsic::riscv_vfirst_mask) + VL = N->getOperand(3); + if (!isNullConstant(VL)) + return SDValue(); + // If VL is 0, vcpop -> li 0, vfirst -> li -1. + SDLoc DL(N); + EVT VT = N->getValueType(0); + if (IntNo == Intrinsic::riscv_vfirst || + IntNo == Intrinsic::riscv_vfirst_mask) + return DAG.getConstant(-1, DL, VT); + return DAG.getConstant(0, DL, VT); + } + } + } + case ISD::BITCAST: { + assert(Subtarget.useRVVForFixedLengthVectors()); + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT SrcVT = N0.getValueType(); + // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer + // type, widen both sides to avoid a trip through memory. + if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) && + VT.isScalarInteger()) { + unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); + SmallVector Ops(NumConcats, DAG.getUNDEF(SrcVT)); + Ops[0] = N0; + SDLoc DL(N); + N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops); + N0 = DAG.getBitcast(MVT::i8, N0); + return DAG.getNode(ISD::TRUNCATE, DL, VT, N0); + } + + return SDValue(); + } } return SDValue(); @@ -8182,22 +9371,23 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant( return UseMask(NewMask); } -static void computeGREV(APInt &Src, unsigned ShAmt) { - ShAmt &= Src.getBitWidth() - 1; - uint64_t x = Src.getZExtValue(); - if (ShAmt & 1) - x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); - if (ShAmt & 2) - x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); - if (ShAmt & 4) - x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); - if (ShAmt & 8) - x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); - if (ShAmt & 16) - x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); - if (ShAmt & 32) - x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32); - Src = x; +static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) { + static const uint64_t GREVMasks[] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, + 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; + + for (unsigned Stage = 0; Stage != 6; ++Stage) { + unsigned Shift = 1 << Stage; + if (ShAmt & Shift) { + uint64_t Mask = GREVMasks[Stage]; + uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask); + if (IsGORC) + Res |= x; + x = Res; + } + } + + return x; } void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, @@ -8263,28 +9453,28 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, break; } case RISCVISD::GREV: - case RISCVISD::GREVW: { + case RISCVISD::GORC: { if (auto *C = dyn_cast(Op.getOperand(1))) { Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); - if (Opc == RISCVISD::GREVW) - Known = Known.trunc(32); - unsigned ShAmt = C->getZExtValue(); - computeGREV(Known.Zero, ShAmt); - computeGREV(Known.One, ShAmt); - if (Opc == RISCVISD::GREVW) - Known = Known.sext(BitWidth); + unsigned ShAmt = C->getZExtValue() & (Known.getBitWidth() - 1); + bool IsGORC = Op.getOpcode() == RISCVISD::GORC; + // To compute zeros, we need to invert the value and invert it back after. + Known.Zero = + ~computeGREVOrGORC(~Known.Zero.getZExtValue(), ShAmt, IsGORC); + Known.One = computeGREVOrGORC(Known.One.getZExtValue(), ShAmt, IsGORC); } break; } case RISCVISD::READ_VLENB: { - // If we know the minimum VLen from Zvl extensions, we can use that to - // determine the trailing zeros of VLENB. - // FIXME: Limit to 128 bit vectors until we have more testing. - unsigned MinVLenB = std::min(128U, Subtarget.getMinVLen()) / 8; - if (MinVLenB > 0) - Known.Zero.setLowBits(Log2_32(MinVLenB)); - // We assume VLENB is no more than 65536 / 8 bytes. - Known.Zero.setBitsFrom(14); + // We can use the minimum and maximum VLEN values to bound VLENB. We + // know VLEN must be a power of two. + const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8; + const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8; + assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?"); + Known.Zero.setLowBits(Log2_32(MinVLenB)); + Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1); + if (MaxVLenB == MinVLenB) + Known.One.setBit(Log2_32(MinVLenB)); break; } case ISD::INTRINSIC_W_CHAIN: @@ -8381,6 +9571,51 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( return 1; } +const Constant * +RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const { + assert(Ld && "Unexpected null LoadSDNode"); + if (!ISD::isNormalLoad(Ld)) + return nullptr; + + SDValue Ptr = Ld->getBasePtr(); + + // Only constant pools with no offset are supported. + auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * { + auto *CNode = dyn_cast(Ptr); + if (!CNode || CNode->isMachineConstantPoolEntry() || + CNode->getOffset() != 0) + return nullptr; + + return CNode; + }; + + // Simple case, LLA. + if (Ptr.getOpcode() == RISCVISD::LLA) { + auto *CNode = GetSupportedConstantPool(Ptr); + if (!CNode || CNode->getTargetFlags() != 0) + return nullptr; + + return CNode->getConstVal(); + } + + // Look for a HI and ADD_LO pair. + if (Ptr.getOpcode() != RISCVISD::ADD_LO || + Ptr.getOperand(0).getOpcode() != RISCVISD::HI) + return nullptr; + + auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1)); + auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0)); + + if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO || + !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI) + return nullptr; + + if (CNodeLo->getConstVal() != CNodeHi->getConstVal()) + return nullptr; + + return CNodeLo->getConstVal(); +} + static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, MachineBasicBlock *BB) { assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); @@ -8559,6 +9794,109 @@ static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, return BB; } +static MachineBasicBlock * +EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, + MachineBasicBlock *ThisMBB, + const RISCVSubtarget &Subtarget) { + // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5) + // Without this, custom-inserter would have generated: + // + // A + // | \ + // | B + // | / + // C + // | \ + // | D + // | / + // E + // + // A: X = ...; Y = ... + // B: empty + // C: Z = PHI [X, A], [Y, B] + // D: empty + // E: PHI [X, C], [Z, D] + // + // If we lower both Select_FPRX_ in a single step, we can instead generate: + // + // A + // | \ + // | C + // | /| + // |/ | + // | | + // | D + // | / + // E + // + // A: X = ...; Y = ... + // D: empty + // E: PHI [X, A], [X, C], [Y, D] + + const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); + const DebugLoc &DL = First.getDebugLoc(); + const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); + MachineFunction *F = ThisMBB->getParent(); + MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++ThisMBB->getIterator(); + F->insert(It, FirstMBB); + F->insert(It, SecondMBB); + F->insert(It, SinkMBB); + + // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. + SinkMBB->splice(SinkMBB->begin(), ThisMBB, + std::next(MachineBasicBlock::iterator(First)), + ThisMBB->end()); + SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); + + // Fallthrough block for ThisMBB. + ThisMBB->addSuccessor(FirstMBB); + // Fallthrough block for FirstMBB. + FirstMBB->addSuccessor(SecondMBB); + ThisMBB->addSuccessor(SinkMBB); + FirstMBB->addSuccessor(SinkMBB); + // This is fallthrough. + SecondMBB->addSuccessor(SinkMBB); + + auto FirstCC = static_cast(First.getOperand(3).getImm()); + Register FLHS = First.getOperand(1).getReg(); + Register FRHS = First.getOperand(2).getReg(); + // Insert appropriate branch. + BuildMI(ThisMBB, DL, TII.getBrCond(FirstCC)) + .addReg(FLHS) + .addReg(FRHS) + .addMBB(SinkMBB); + + Register SLHS = Second.getOperand(1).getReg(); + Register SRHS = Second.getOperand(2).getReg(); + Register Op1Reg4 = First.getOperand(4).getReg(); + Register Op1Reg5 = First.getOperand(5).getReg(); + + auto SecondCC = static_cast(Second.getOperand(3).getImm()); + // Insert appropriate branch. + BuildMI(FirstMBB, DL, TII.getBrCond(SecondCC)) + .addReg(SLHS) + .addReg(SRHS) + .addMBB(SinkMBB); + + Register DestReg = Second.getOperand(0).getReg(); + Register Op2Reg4 = Second.getOperand(4).getReg(); + BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg) + .addReg(Op1Reg4) + .addMBB(ThisMBB) + .addReg(Op2Reg4) + .addMBB(FirstMBB) + .addReg(Op1Reg5) + .addMBB(SecondMBB); + + // Now remove the Select_FPRX_s. + First.eraseFromParent(); + Second.eraseFromParent(); + return SinkMBB; +} + static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget) { @@ -8586,6 +9924,10 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, // previous selects in the sequence. // These conditions could be further relaxed. See the X86 target for a // related approach and more information. + // + // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)) + // is checked here and handled by a separate function - + // EmitLoweredCascadedSelect. Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); auto CC = static_cast(MI.getOperand(3).getImm()); @@ -8595,12 +9937,19 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, SelectDests.insert(MI.getOperand(0).getReg()); MachineInstr *LastSelectPseudo = &MI; + auto Next = next_nodbg(MI.getIterator(), BB->instr_end()); + if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() && + Next->getOpcode() == MI.getOpcode() && + Next->getOperand(5).getReg() == MI.getOperand(0).getReg() && + Next->getOperand(5).isKill()) { + return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget); + } for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); SequenceMBBI != E; ++SequenceMBBI) { if (SequenceMBBI->isDebugInstr()) continue; - else if (isSelectPseudo(*SequenceMBBI)) { + if (isSelectPseudo(*SequenceMBBI)) { if (SequenceMBBI->getOperand(1).getReg() != LHS || SequenceMBBI->getOperand(2).getReg() != RHS || SequenceMBBI->getOperand(3).getImm() != CC || @@ -8831,7 +10180,7 @@ static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo, // Assign the first mask argument to V0. // This is an interim calling convention and it may be changed in the // future. - if (FirstMaskArgument.hasValue() && ValNo == FirstMaskArgument.getValue()) + if (FirstMaskArgument && ValNo == *FirstMaskArgument) return State.AllocateReg(RISCV::V0); return State.AllocateReg(ArgVRs); } @@ -10112,6 +11461,13 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BuildPairF64) NODE_NAME_CASE(SplitF64) NODE_NAME_CASE(TAIL) + NODE_NAME_CASE(ADD_LO) + NODE_NAME_CASE(HI) + NODE_NAME_CASE(LLA) + NODE_NAME_CASE(ADD_TPREL) + NODE_NAME_CASE(LA) + NODE_NAME_CASE(LA_TLS_IE) + NODE_NAME_CASE(LA_TLS_GD) NODE_NAME_CASE(MULHSU) NODE_NAME_CASE(SLLW) NODE_NAME_CASE(SRAW) @@ -10129,6 +11485,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FSR) NODE_NAME_CASE(FMV_H_X) NODE_NAME_CASE(FMV_X_ANYEXTH) + NODE_NAME_CASE(FMV_X_SIGNEXTH) NODE_NAME_CASE(FMV_W_X_RV64) NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) NODE_NAME_CASE(FCVT_X) @@ -10157,7 +11514,6 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VMV_X_S) NODE_NAME_CASE(VMV_S_X_VL) NODE_NAME_CASE(VFMV_S_F_VL) - NODE_NAME_CASE(SPLAT_VECTOR_I64) NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL) NODE_NAME_CASE(READ_VLENB) NODE_NAME_CASE(TRUNCATE_VECTOR_VL) @@ -10203,7 +11559,10 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FNEG_VL) NODE_NAME_CASE(FABS_VL) NODE_NAME_CASE(FSQRT_VL) - NODE_NAME_CASE(FMA_VL) + NODE_NAME_CASE(VFMADD_VL) + NODE_NAME_CASE(VFNMADD_VL) + NODE_NAME_CASE(VFMSUB_VL) + NODE_NAME_CASE(VFNMSUB_VL) NODE_NAME_CASE(FCOPYSIGN_VL) NODE_NAME_CASE(SMIN_VL) NODE_NAME_CASE(SMAX_VL) @@ -10222,7 +11581,14 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VWMUL_VL) NODE_NAME_CASE(VWMULU_VL) NODE_NAME_CASE(VWMULSU_VL) + NODE_NAME_CASE(VWADD_VL) NODE_NAME_CASE(VWADDU_VL) + NODE_NAME_CASE(VWSUB_VL) + NODE_NAME_CASE(VWSUBU_VL) + NODE_NAME_CASE(VWADD_W_VL) + NODE_NAME_CASE(VWADDU_W_VL) + NODE_NAME_CASE(VWSUB_W_VL) + NODE_NAME_CASE(VWSUBU_W_VL) NODE_NAME_CASE(SETCC_VL) NODE_NAME_CASE(VSELECT_VL) NODE_NAME_CASE(VP_MERGE_VL) @@ -10237,8 +11603,6 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VSEXT_VL) NODE_NAME_CASE(VZEXT_VL) NODE_NAME_CASE(VCPOP_VL) - NODE_NAME_CASE(VLE_VL) - NODE_NAME_CASE(VSE_VL) NODE_NAME_CASE(READ_CSR) NODE_NAME_CASE(WRITE_CSR) NODE_NAME_CASE(SWAP_CSR) @@ -10459,7 +11823,18 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } } - return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); + std::pair Res = + TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); + + // If we picked one of the Zfinx register classes, remap it to the GPR class. + // FIXME: When Zfinx is supported in CodeGen this will need to take the + // Subtarget into account. + if (Res.second == &RISCV::GPRF16RegClass || + Res.second == &RISCV::GPRF32RegClass || + Res.second == &RISCV::GPRF64RegClass) + return std::make_pair(Res.first, &RISCV::GPRRegClass); + + return Res; } unsigned @@ -10681,7 +12056,8 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( return Result; } -bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { +bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT, + EVT DataVT) const { return false; } @@ -10797,7 +12173,7 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || (1 - ImmS).isPowerOf2()) - return true; + return true; } } } @@ -10805,8 +12181,8 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, return false; } -bool RISCVTargetLowering::isMulAddWithConstProfitable( - const SDValue &AddNode, const SDValue &ConstNode) const { +bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode, + SDValue ConstNode) const { // Let the DAGCombiner decide for vectors. EVT VT = AddNode.getValueType(); if (VT.isVector()) @@ -10831,9 +12207,13 @@ bool RISCVTargetLowering::isMulAddWithConstProfitable( bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { - if (!VT.isVector()) - return false; + if (!VT.isVector()) { + if (Fast) + *Fast = false; + return Subtarget.enableUnalignedScalarMem(); + } + // All vector implementations must support element alignment EVT ElemVT = VT.getVectorElementType(); if (Alignment >= ElemVT.getStoreSize()) { if (Fast) @@ -10847,7 +12227,7 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( bool RISCVTargetLowering::splitValueIntoRegisterParts( SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, Optional CC) const { - bool IsABIRegCopy = CC.hasValue(); + bool IsABIRegCopy = CC.has_value(); EVT ValueVT = Val.getValueType(); if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { // Cast the f16 to i16, extend to i32, pad with ones to make a float nan, @@ -10901,7 +12281,7 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts( SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, Optional CC) const { - bool IsABIRegCopy = CC.hasValue(); + bool IsABIRegCopy = CC.has_value(); if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { SDValue Val = Parts[0]; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 840a821870a7..eb013d4b6682 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -41,6 +41,21 @@ enum NodeType : unsigned { BuildPairF64, SplitF64, TAIL, + + // Add the Lo 12 bits from an address. Selected to ADDI. + ADD_LO, + // Get the Hi 20 bits from an address. Selected to LUI. + HI, + + // Represents an AUIPC+ADDI pair. Selected to PseudoLLA. + LLA, + + // Selected as PseudoAddTPRel. Used to emit a TP-relative relocation. + ADD_TPREL, + + // Load address. + LA_TLS_GD, + // Multiply high for signedxunsigned. MULHSU, // RV64I shifts, directly matching the semantics of the named RISC-V @@ -75,6 +90,7 @@ enum NodeType : unsigned { // // FMV_H_X matches the semantics of the FMV.H.X. // FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result. + // FMV_X_SIGNEXTH is similar to FMV.X.H and has a sign-extended result. // FMV_W_X_RV64 matches the semantics of the FMV.W.X. // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result. // @@ -82,6 +98,7 @@ enum NodeType : unsigned { // unnecessary GPR->FPR->GPR moves. FMV_H_X, FMV_X_ANYEXTH, + FMV_X_SIGNEXTH, FMV_W_X_RV64, FMV_X_ANYEXTW_RV64, // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and @@ -129,10 +146,12 @@ enum NodeType : unsigned { BFPW, // Vector Extension // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand - // for the VL value to be used for the operation. + // for the VL value to be used for the operation. The first operand is + // passthru operand. VMV_V_X_VL, // VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand - // for the VL value to be used for the operation. + // for the VL value to be used for the operation. The first operand is + // passthru operand. VFMV_V_F_VL, // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign // extended from the vector element size. @@ -141,11 +160,9 @@ enum NodeType : unsigned { VMV_S_X_VL, // VFMV_S_F_VL matches the semantics of vfmv.s.f. It carries a VL operand. VFMV_S_F_VL, - // Splats an i64 scalar to a vector type (with element type i64) where the - // scalar is a sign-extended i32. - SPLAT_VECTOR_I64, // Splats an 64-bit value that has been split into two i32 parts. This is // expanded late to two scalar stores and a stride 0 vector load. + // The first operand is passthru operand. SPLAT_VECTOR_SPLIT_I64_VL, // Read VLENB CSR READ_VLENB, @@ -158,9 +175,9 @@ enum NodeType : unsigned { // and the fifth the VL. VSLIDEUP_VL, VSLIDEDOWN_VL, - // Matches the semantics of vslide1up/slide1down. The first operand is the - // source vector, the second is the XLenVT scalar value. The third and fourth - // operands are the mask and VL operands. + // Matches the semantics of vslide1up/slide1down. The first operand is + // passthru operand, the second is source vector, third is the XLenVT scalar + // value. The fourth and fifth operands are the mask and VL operands. VSLIDE1UP_VL, VSLIDE1DOWN_VL, // Matches the semantics of the vid.v instruction, with a mask and VL @@ -225,7 +242,10 @@ enum NodeType : unsigned { FNEG_VL, FABS_VL, FSQRT_VL, - FMA_VL, + VFMADD_VL, + VFNMADD_VL, + VFMSUB_VL, + VFNMSUB_VL, FCOPYSIGN_VL, SMIN_VL, SMAX_VL, @@ -246,7 +266,14 @@ enum NodeType : unsigned { VWMUL_VL, VWMULU_VL, VWMULSU_VL, + VWADD_VL, VWADDU_VL, + VWSUB_VL, + VWSUBU_VL, + VWADD_W_VL, + VWADDU_W_VL, + VWSUB_W_VL, + VWSUBU_W_VL, // Vector compare producing a mask. Fourth operand is input mask. Fifth // operand is VL. @@ -268,8 +295,8 @@ enum NodeType : unsigned { VMCLR_VL, VMSET_VL, - // Matches the semantics of vrgather.vx and vrgather.vv with an extra operand - // for VL. + // Matches the semantics of vrgather.vx and vrgather.vv with extra operands + // for passthru and VL. Operands are (src, index, mask, passthru, vl). VRGATHER_VX_VL, VRGATHER_VV_VL, VRGATHEREI16_VV_VL, @@ -302,16 +329,21 @@ enum NodeType : unsigned { STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE, STRICT_FCVT_WU_RV64, - // Memory opcodes start here. - VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE, - VSE_VL, - // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all // opcodes will be thought as target memory ops! + + // Load address. + LA = ISD::FIRST_TARGET_MEMORY_OPCODE, + LA_TLS_IE, }; } // namespace RISCVISD +namespace RISCV { +// We use 64 bits as the known part in the scalable vector types. +static constexpr unsigned RVVBitsPerBlock = 64; +} // namespace RISCV + class RISCVTargetLowering : public TargetLowering { const RISCVSubtarget &Subtarget; @@ -333,11 +365,18 @@ public: bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override; + bool signExtendConstant(const ConstantInt *CI) const override; bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; bool hasAndNotCompare(SDValue Y) const override; + bool hasBitTest(SDValue X, SDValue Y) const override; + bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const override; bool shouldSinkOperands(Instruction *I, SmallVectorImpl &Ops) const override; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; @@ -384,6 +423,8 @@ public: const SelectionDAG &DAG, unsigned Depth) const override; + const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; + // This method returns the name of a target specific DAG node. const char *getTargetNodeName(unsigned Opcode) const override; @@ -477,8 +518,6 @@ public: SelectionDAG &DAG) const override; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; - template - SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override { @@ -490,8 +529,8 @@ public: bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override; - bool isMulAddWithConstProfitable(const SDValue &AddNode, - const SDValue &ConstNode) const override; + bool isMulAddWithConstProfitable(SDValue AddNode, + SDValue ConstNode) const override; TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; @@ -526,6 +565,15 @@ public: Optional CC) const override; static RISCVII::VLMUL getLMUL(MVT VT); + inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, + unsigned MinSize) { + // Original equation: + // VLMAX = (VectorBits / EltSize) * LMUL + // where LMUL = MinSize / RISCV::RVVBitsPerBlock + // The following equations have been reordered to prevent loss of precision + // when calculating fractional LMUL. + return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; + }; static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul); static unsigned getSubregIndexByMVT(MVT VT, unsigned Index); static unsigned getRegClassIDForVecVT(MVT VT); @@ -535,7 +583,7 @@ public: const RISCVRegisterInfo *TRI); MVT getContainerForFixedLengthVector(MVT VT) const; - bool shouldRemoveExtendFromGSIndex(EVT VT) const override; + bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override; bool isLegalElementTypeForRVV(Type *ScalarTy) const; @@ -571,6 +619,8 @@ private: bool IsRet, CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const; + template + SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, bool UseGOT) const; SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; @@ -591,7 +641,9 @@ private: SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, int64_t ExtTrueVal) const; - SDValue lowerVectorMaskTrunc(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; @@ -606,6 +658,7 @@ private: SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const; @@ -627,11 +680,17 @@ private: SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc) const; SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const; + SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG, + unsigned RISCVISDOpc) const; SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const; SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; + SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; @@ -665,21 +724,15 @@ private: return false; }; }; - -namespace RISCV { -// We use 64 bits as the known part in the scalable vector types. -static constexpr unsigned RVVBitsPerBlock = 64; -} // namespace RISCV - namespace RISCVVIntrinsicsTable { struct RISCVVIntrinsicInfo { unsigned IntrinsicID; - uint8_t SplatOperand; + uint8_t ScalarOperand; uint8_t VLOperand; - bool hasSplatOperand() const { - // 0xF is not valid. See NoSplatOperand in IntrinsicsRISCV.td. - return SplatOperand != 0xF; + bool hasScalarOperand() const { + // 0xF is not valid. See NoScalarOperand in IntrinsicsRISCV.td. + return ScalarOperand != 0xF; } bool hasVLOperand() const { // 0x1F is not valid. See NoVLOperand in IntrinsicsRISCV.td. diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 649eb57b325b..fc0a983f6542 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -7,7 +7,8 @@ //===----------------------------------------------------------------------===// // // This file implements a function pass that inserts VSETVLI instructions where -// needed. +// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL +// instructions. // // This pass consists of 3 phases: // @@ -37,8 +38,371 @@ static cl::opt DisableInsertVSETVLPHIOpt( "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, cl::desc("Disable looking through phis when inserting vsetvlis.")); +static cl::opt UseStrictAsserts( + "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, + cl::desc("Enable strict assertion checking for the dataflow algorithm")); + namespace { +static unsigned getVLOpNum(const MachineInstr &MI) { + return RISCVII::getVLOpNum(MI.getDesc()); +} + +static unsigned getSEWOpNum(const MachineInstr &MI) { + return RISCVII::getSEWOpNum(MI.getDesc()); +} + +static bool isScalarMoveInstr(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case RISCV::PseudoVMV_S_X_M1: + case RISCV::PseudoVMV_S_X_M2: + case RISCV::PseudoVMV_S_X_M4: + case RISCV::PseudoVMV_S_X_M8: + case RISCV::PseudoVMV_S_X_MF2: + case RISCV::PseudoVMV_S_X_MF4: + case RISCV::PseudoVMV_S_X_MF8: + case RISCV::PseudoVFMV_S_F16_M1: + case RISCV::PseudoVFMV_S_F16_M2: + case RISCV::PseudoVFMV_S_F16_M4: + case RISCV::PseudoVFMV_S_F16_M8: + case RISCV::PseudoVFMV_S_F16_MF2: + case RISCV::PseudoVFMV_S_F16_MF4: + case RISCV::PseudoVFMV_S_F32_M1: + case RISCV::PseudoVFMV_S_F32_M2: + case RISCV::PseudoVFMV_S_F32_M4: + case RISCV::PseudoVFMV_S_F32_M8: + case RISCV::PseudoVFMV_S_F32_MF2: + case RISCV::PseudoVFMV_S_F64_M1: + case RISCV::PseudoVFMV_S_F64_M2: + case RISCV::PseudoVFMV_S_F64_M4: + case RISCV::PseudoVFMV_S_F64_M8: + return true; + } +} + +/// Get the EEW for a load or store instruction. Return None if MI is not +/// a load or store which ignores SEW. +static Optional getEEWForLoadStore(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return None; + case RISCV::PseudoVLE8_V_M1: + case RISCV::PseudoVLE8_V_M1_MASK: + case RISCV::PseudoVLE8_V_M2: + case RISCV::PseudoVLE8_V_M2_MASK: + case RISCV::PseudoVLE8_V_M4: + case RISCV::PseudoVLE8_V_M4_MASK: + case RISCV::PseudoVLE8_V_M8: + case RISCV::PseudoVLE8_V_M8_MASK: + case RISCV::PseudoVLE8_V_MF2: + case RISCV::PseudoVLE8_V_MF2_MASK: + case RISCV::PseudoVLE8_V_MF4: + case RISCV::PseudoVLE8_V_MF4_MASK: + case RISCV::PseudoVLE8_V_MF8: + case RISCV::PseudoVLE8_V_MF8_MASK: + case RISCV::PseudoVLSE8_V_M1: + case RISCV::PseudoVLSE8_V_M1_MASK: + case RISCV::PseudoVLSE8_V_M2: + case RISCV::PseudoVLSE8_V_M2_MASK: + case RISCV::PseudoVLSE8_V_M4: + case RISCV::PseudoVLSE8_V_M4_MASK: + case RISCV::PseudoVLSE8_V_M8: + case RISCV::PseudoVLSE8_V_M8_MASK: + case RISCV::PseudoVLSE8_V_MF2: + case RISCV::PseudoVLSE8_V_MF2_MASK: + case RISCV::PseudoVLSE8_V_MF4: + case RISCV::PseudoVLSE8_V_MF4_MASK: + case RISCV::PseudoVLSE8_V_MF8: + case RISCV::PseudoVLSE8_V_MF8_MASK: + case RISCV::PseudoVSE8_V_M1: + case RISCV::PseudoVSE8_V_M1_MASK: + case RISCV::PseudoVSE8_V_M2: + case RISCV::PseudoVSE8_V_M2_MASK: + case RISCV::PseudoVSE8_V_M4: + case RISCV::PseudoVSE8_V_M4_MASK: + case RISCV::PseudoVSE8_V_M8: + case RISCV::PseudoVSE8_V_M8_MASK: + case RISCV::PseudoVSE8_V_MF2: + case RISCV::PseudoVSE8_V_MF2_MASK: + case RISCV::PseudoVSE8_V_MF4: + case RISCV::PseudoVSE8_V_MF4_MASK: + case RISCV::PseudoVSE8_V_MF8: + case RISCV::PseudoVSE8_V_MF8_MASK: + case RISCV::PseudoVSSE8_V_M1: + case RISCV::PseudoVSSE8_V_M1_MASK: + case RISCV::PseudoVSSE8_V_M2: + case RISCV::PseudoVSSE8_V_M2_MASK: + case RISCV::PseudoVSSE8_V_M4: + case RISCV::PseudoVSSE8_V_M4_MASK: + case RISCV::PseudoVSSE8_V_M8: + case RISCV::PseudoVSSE8_V_M8_MASK: + case RISCV::PseudoVSSE8_V_MF2: + case RISCV::PseudoVSSE8_V_MF2_MASK: + case RISCV::PseudoVSSE8_V_MF4: + case RISCV::PseudoVSSE8_V_MF4_MASK: + case RISCV::PseudoVSSE8_V_MF8: + case RISCV::PseudoVSSE8_V_MF8_MASK: + return 8; + case RISCV::PseudoVLE16_V_M1: + case RISCV::PseudoVLE16_V_M1_MASK: + case RISCV::PseudoVLE16_V_M2: + case RISCV::PseudoVLE16_V_M2_MASK: + case RISCV::PseudoVLE16_V_M4: + case RISCV::PseudoVLE16_V_M4_MASK: + case RISCV::PseudoVLE16_V_M8: + case RISCV::PseudoVLE16_V_M8_MASK: + case RISCV::PseudoVLE16_V_MF2: + case RISCV::PseudoVLE16_V_MF2_MASK: + case RISCV::PseudoVLE16_V_MF4: + case RISCV::PseudoVLE16_V_MF4_MASK: + case RISCV::PseudoVLSE16_V_M1: + case RISCV::PseudoVLSE16_V_M1_MASK: + case RISCV::PseudoVLSE16_V_M2: + case RISCV::PseudoVLSE16_V_M2_MASK: + case RISCV::PseudoVLSE16_V_M4: + case RISCV::PseudoVLSE16_V_M4_MASK: + case RISCV::PseudoVLSE16_V_M8: + case RISCV::PseudoVLSE16_V_M8_MASK: + case RISCV::PseudoVLSE16_V_MF2: + case RISCV::PseudoVLSE16_V_MF2_MASK: + case RISCV::PseudoVLSE16_V_MF4: + case RISCV::PseudoVLSE16_V_MF4_MASK: + case RISCV::PseudoVSE16_V_M1: + case RISCV::PseudoVSE16_V_M1_MASK: + case RISCV::PseudoVSE16_V_M2: + case RISCV::PseudoVSE16_V_M2_MASK: + case RISCV::PseudoVSE16_V_M4: + case RISCV::PseudoVSE16_V_M4_MASK: + case RISCV::PseudoVSE16_V_M8: + case RISCV::PseudoVSE16_V_M8_MASK: + case RISCV::PseudoVSE16_V_MF2: + case RISCV::PseudoVSE16_V_MF2_MASK: + case RISCV::PseudoVSE16_V_MF4: + case RISCV::PseudoVSE16_V_MF4_MASK: + case RISCV::PseudoVSSE16_V_M1: + case RISCV::PseudoVSSE16_V_M1_MASK: + case RISCV::PseudoVSSE16_V_M2: + case RISCV::PseudoVSSE16_V_M2_MASK: + case RISCV::PseudoVSSE16_V_M4: + case RISCV::PseudoVSSE16_V_M4_MASK: + case RISCV::PseudoVSSE16_V_M8: + case RISCV::PseudoVSSE16_V_M8_MASK: + case RISCV::PseudoVSSE16_V_MF2: + case RISCV::PseudoVSSE16_V_MF2_MASK: + case RISCV::PseudoVSSE16_V_MF4: + case RISCV::PseudoVSSE16_V_MF4_MASK: + return 16; + case RISCV::PseudoVLE32_V_M1: + case RISCV::PseudoVLE32_V_M1_MASK: + case RISCV::PseudoVLE32_V_M2: + case RISCV::PseudoVLE32_V_M2_MASK: + case RISCV::PseudoVLE32_V_M4: + case RISCV::PseudoVLE32_V_M4_MASK: + case RISCV::PseudoVLE32_V_M8: + case RISCV::PseudoVLE32_V_M8_MASK: + case RISCV::PseudoVLE32_V_MF2: + case RISCV::PseudoVLE32_V_MF2_MASK: + case RISCV::PseudoVLSE32_V_M1: + case RISCV::PseudoVLSE32_V_M1_MASK: + case RISCV::PseudoVLSE32_V_M2: + case RISCV::PseudoVLSE32_V_M2_MASK: + case RISCV::PseudoVLSE32_V_M4: + case RISCV::PseudoVLSE32_V_M4_MASK: + case RISCV::PseudoVLSE32_V_M8: + case RISCV::PseudoVLSE32_V_M8_MASK: + case RISCV::PseudoVLSE32_V_MF2: + case RISCV::PseudoVLSE32_V_MF2_MASK: + case RISCV::PseudoVSE32_V_M1: + case RISCV::PseudoVSE32_V_M1_MASK: + case RISCV::PseudoVSE32_V_M2: + case RISCV::PseudoVSE32_V_M2_MASK: + case RISCV::PseudoVSE32_V_M4: + case RISCV::PseudoVSE32_V_M4_MASK: + case RISCV::PseudoVSE32_V_M8: + case RISCV::PseudoVSE32_V_M8_MASK: + case RISCV::PseudoVSE32_V_MF2: + case RISCV::PseudoVSE32_V_MF2_MASK: + case RISCV::PseudoVSSE32_V_M1: + case RISCV::PseudoVSSE32_V_M1_MASK: + case RISCV::PseudoVSSE32_V_M2: + case RISCV::PseudoVSSE32_V_M2_MASK: + case RISCV::PseudoVSSE32_V_M4: + case RISCV::PseudoVSSE32_V_M4_MASK: + case RISCV::PseudoVSSE32_V_M8: + case RISCV::PseudoVSSE32_V_M8_MASK: + case RISCV::PseudoVSSE32_V_MF2: + case RISCV::PseudoVSSE32_V_MF2_MASK: + return 32; + case RISCV::PseudoVLE64_V_M1: + case RISCV::PseudoVLE64_V_M1_MASK: + case RISCV::PseudoVLE64_V_M2: + case RISCV::PseudoVLE64_V_M2_MASK: + case RISCV::PseudoVLE64_V_M4: + case RISCV::PseudoVLE64_V_M4_MASK: + case RISCV::PseudoVLE64_V_M8: + case RISCV::PseudoVLE64_V_M8_MASK: + case RISCV::PseudoVLSE64_V_M1: + case RISCV::PseudoVLSE64_V_M1_MASK: + case RISCV::PseudoVLSE64_V_M2: + case RISCV::PseudoVLSE64_V_M2_MASK: + case RISCV::PseudoVLSE64_V_M4: + case RISCV::PseudoVLSE64_V_M4_MASK: + case RISCV::PseudoVLSE64_V_M8: + case RISCV::PseudoVLSE64_V_M8_MASK: + case RISCV::PseudoVSE64_V_M1: + case RISCV::PseudoVSE64_V_M1_MASK: + case RISCV::PseudoVSE64_V_M2: + case RISCV::PseudoVSE64_V_M2_MASK: + case RISCV::PseudoVSE64_V_M4: + case RISCV::PseudoVSE64_V_M4_MASK: + case RISCV::PseudoVSE64_V_M8: + case RISCV::PseudoVSE64_V_M8_MASK: + case RISCV::PseudoVSSE64_V_M1: + case RISCV::PseudoVSSE64_V_M1_MASK: + case RISCV::PseudoVSSE64_V_M2: + case RISCV::PseudoVSSE64_V_M2_MASK: + case RISCV::PseudoVSSE64_V_M4: + case RISCV::PseudoVSSE64_V_M4_MASK: + case RISCV::PseudoVSSE64_V_M8: + case RISCV::PseudoVSSE64_V_M8_MASK: + return 64; + } +} + +/// Return true if this is an operation on mask registers. Note that +/// this includes both arithmetic/logical ops and load/store (vlm/vsm). +static bool isMaskRegOp(const MachineInstr &MI) { + if (RISCVII::hasSEWOp(MI.getDesc().TSFlags)) { + const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); + // A Log2SEW of 0 is an operation on mask registers only. + return Log2SEW == 0; + } + return false; +} + +static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) { + unsigned LMul; + bool Fractional; + std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul); + + // Convert LMul to a fixed point value with 3 fractional bits. + LMul = Fractional ? (8 / LMul) : (LMul * 8); + + assert(SEW >= 8 && "Unexpected SEW value"); + return (SEW * 8) / LMul; +} + +/// Which subfields of VL or VTYPE have values we need to preserve? +struct DemandedFields { + bool VL = false; + bool SEW = false; + bool LMUL = false; + bool SEWLMULRatio = false; + bool TailPolicy = false; + bool MaskPolicy = false; + + // Return true if any part of VTYPE was used + bool usedVTYPE() { + return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; + } + + // Mark all VTYPE subfields and properties as demanded + void demandVTYPE() { + SEW = true; + LMUL = true; + SEWLMULRatio = true; + TailPolicy = true; + MaskPolicy = true; + } +}; + +/// Return true if the two values of the VTYPE register provided are +/// indistinguishable from the perspective of an instruction (or set of +/// instructions) which use only the Used subfields and properties. +static bool areCompatibleVTYPEs(uint64_t VType1, + uint64_t VType2, + const DemandedFields &Used) { + if (Used.SEW && + RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2)) + return false; + + if (Used.LMUL && + RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2)) + return false; + + if (Used.SEWLMULRatio) { + auto Ratio1 = getSEWLMULRatio(RISCVVType::getSEW(VType1), + RISCVVType::getVLMUL(VType1)); + auto Ratio2 = getSEWLMULRatio(RISCVVType::getSEW(VType2), + RISCVVType::getVLMUL(VType2)); + if (Ratio1 != Ratio2) + return false; + } + + if (Used.TailPolicy && + RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2)) + return false; + if (Used.MaskPolicy && + RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2)) + return false; + return true; +} + +/// Return the fields and properties demanded by the provided instruction. +static DemandedFields getDemanded(const MachineInstr &MI) { + // Warning: This function has to work on both the lowered (i.e. post + // emitVSETVLIs) and pre-lowering forms. The main implication of this is + // that it can't use the value of a SEW, VL, or Policy operand as they might + // be stale after lowering. + + // Most instructions don't use any of these subfeilds. + DemandedFields Res; + // Start conservative if registers are used + if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL)) + Res.VL = true; + if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE)) + Res.demandVTYPE(); + // Start conservative on the unlowered form too + uint64_t TSFlags = MI.getDesc().TSFlags; + if (RISCVII::hasSEWOp(TSFlags)) { + Res.demandVTYPE(); + if (RISCVII::hasVLOp(TSFlags)) + Res.VL = true; + } + + // Loads and stores with implicit EEW do not demand SEW or LMUL directly. + // They instead demand the ratio of the two which is used in computing + // EMUL, but which allows us the flexibility to change SEW and LMUL + // provided we don't change the ratio. + // Note: We assume that the instructions initial SEW is the EEW encoded + // in the opcode. This is asserted when constructing the VSETVLIInfo. + if (getEEWForLoadStore(MI)) { + Res.SEW = false; + Res.LMUL = false; + } + + // Store instructions don't use the policy fields. + if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { + Res.TailPolicy = false; + Res.MaskPolicy = false; + } + + // If this is a mask reg operation, it only cares about VLMAX. + // TODO: Possible extensions to this logic + // * Probably ok if available VLMax is larger than demanded + // * The policy bits can probably be ignored.. + if (isMaskRegOp(MI)) { + Res.SEW = false; + Res.LMUL = false; + } + + return Res; +} + +/// Defines the abstract state with which the forward dataflow models the +/// values of the VL and VTYPE registers after insertion. class VSETVLIInfo { union { Register AVLReg; @@ -57,15 +421,12 @@ class VSETVLIInfo { uint8_t SEW = 0; uint8_t TailAgnostic : 1; uint8_t MaskAgnostic : 1; - uint8_t MaskRegOp : 1; - uint8_t StoreOp : 1; - uint8_t ScalarMovOp : 1; uint8_t SEWLMULRatioOnly : 1; public: VSETVLIInfo() - : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false), - StoreOp(false), ScalarMovOp(false), SEWLMULRatioOnly(false) {} + : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), + SEWLMULRatioOnly(false) {} static VSETVLIInfo getUnknown() { VSETVLIInfo Info; @@ -97,11 +458,10 @@ public: assert(hasAVLImm()); return AVLImm; } - bool hasZeroAVL() const { - if (hasAVLImm()) - return getAVLImm() == 0; - return false; - } + + unsigned getSEW() const { return SEW; } + RISCVII::VLMUL getVLMUL() const { return VLMul; } + bool hasNonZeroAVL() const { if (hasAVLImm()) return getAVLImm() > 0; @@ -132,17 +492,13 @@ public: TailAgnostic = RISCVVType::isTailAgnostic(VType); MaskAgnostic = RISCVVType::isMaskAgnostic(VType); } - void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA, bool MRO, - bool IsStore, bool IsScalarMovOp) { + void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { assert(isValid() && !isUnknown() && "Can't set VTYPE for uninitialized or unknown"); VLMul = L; SEW = S; TailAgnostic = TA; MaskAgnostic = MA; - MaskRegOp = MRO; - StoreOp = IsStore; - ScalarMovOp = IsScalarMovOp; } unsigned encodeVTYPE() const { @@ -175,25 +531,16 @@ public: Other.MaskAgnostic); } - static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) { - unsigned LMul; - bool Fractional; - std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul); - - // Convert LMul to a fixed point value with 3 fractional bits. - LMul = Fractional ? (8 / LMul) : (LMul * 8); - - assert(SEW >= 8 && "Unexpected SEW value"); - return (SEW * 8) / LMul; - } - unsigned getSEWLMULRatio() const { assert(isValid() && !isUnknown() && "Can't use VTYPE for uninitialized or unknown"); - return getSEWLMULRatio(SEW, VLMul); + return ::getSEWLMULRatio(SEW, VLMul); } // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. + // Note that having the same VLMAX ensures that both share the same + // function from AVL to VL; that is, they must produce the same VL value + // for any given AVL value. bool hasSameVLMAX(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && "Can't compare invalid VSETVLIInfos"); @@ -211,36 +558,22 @@ public: MaskAgnostic == Other.MaskAgnostic; } - bool hasCompatibleVTYPE(const VSETVLIInfo &InstrInfo, bool Strict) const { - // Simple case, see if full VTYPE matches. - if (hasSameVTYPE(InstrInfo)) - return true; - - if (Strict) - return false; - - // If this is a mask reg operation, it only cares about VLMAX. - // FIXME: Mask reg operations are probably ok if "this" VLMAX is larger - // than "InstrInfo". - // FIXME: The policy bits can probably be ignored for mask reg operations. - if (InstrInfo.MaskRegOp && hasSameVLMAX(InstrInfo) && - TailAgnostic == InstrInfo.TailAgnostic && - MaskAgnostic == InstrInfo.MaskAgnostic) - return true; - - return false; + bool hasCompatibleVTYPE(const MachineInstr &MI, + const VSETVLIInfo &Require) const { + const DemandedFields Used = getDemanded(MI); + return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used); } // Determine whether the vector instructions requirements represented by - // InstrInfo are compatible with the previous vsetvli instruction represented - // by this. - bool isCompatible(const VSETVLIInfo &InstrInfo, bool Strict) const { - assert(isValid() && InstrInfo.isValid() && + // Require are compatible with the previous vsetvli instruction represented + // by this. MI is the instruction whose requirements we're considering. + bool isCompatible(const MachineInstr &MI, const VSETVLIInfo &Require) const { + assert(isValid() && Require.isValid() && "Can't compare invalid VSETVLIInfos"); - assert(!InstrInfo.SEWLMULRatioOnly && + assert(!Require.SEWLMULRatioOnly && "Expected a valid VTYPE for instruction!"); // Nothing is compatible with Unknown. - if (isUnknown() || InstrInfo.isUnknown()) + if (isUnknown() || Require.isUnknown()) return false; // If only our VLMAX ratio is valid, then this isn't compatible. @@ -249,61 +582,11 @@ public: // If the instruction doesn't need an AVLReg and the SEW matches, consider // it compatible. - if (!Strict && InstrInfo.hasAVLReg() && - InstrInfo.AVLReg == RISCV::NoRegister) { - if (SEW == InstrInfo.SEW) + if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister) + if (SEW == Require.SEW) return true; - } - - // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0. - // So it's compatible when we could make sure that both VL be the same - // situation. - if (!Strict && InstrInfo.ScalarMovOp && InstrInfo.hasAVLImm() && - ((hasNonZeroAVL() && InstrInfo.hasNonZeroAVL()) || - (hasZeroAVL() && InstrInfo.hasZeroAVL())) && - hasSameSEW(InstrInfo) && hasSamePolicy(InstrInfo)) - return true; - - // The AVL must match. - if (!hasSameAVL(InstrInfo)) - return false; - - if (hasCompatibleVTYPE(InstrInfo, Strict)) - return true; - - // Strict matches must ensure a full VTYPE match. - if (Strict) - return false; - - // Store instructions don't use the policy fields. - // TODO: Move into hasCompatibleVTYPE? - if (InstrInfo.StoreOp && VLMul == InstrInfo.VLMul && SEW == InstrInfo.SEW) - return true; - - // Anything else is not compatible. - return false; - } - bool isCompatibleWithLoadStoreEEW(unsigned EEW, - const VSETVLIInfo &InstrInfo) const { - assert(isValid() && InstrInfo.isValid() && - "Can't compare invalid VSETVLIInfos"); - assert(!InstrInfo.SEWLMULRatioOnly && - "Expected a valid VTYPE for instruction!"); - assert(EEW == InstrInfo.SEW && "Mismatched EEW/SEW for store"); - - if (isUnknown() || hasSEWLMULRatioOnly()) - return false; - - if (!hasSameAVL(InstrInfo)) - return false; - - // Stores can ignore the tail and mask policies. - if (!InstrInfo.StoreOp && (TailAgnostic != InstrInfo.TailAgnostic || - MaskAgnostic != InstrInfo.MaskAgnostic)) - return false; - - return getSEWLMULRatio() == getSEWLMULRatio(EEW, InstrInfo.VLMul); + return hasSameAVL(Require) && hasCompatibleVTYPE(MI, Require); } bool operator==(const VSETVLIInfo &Other) const { @@ -322,16 +605,20 @@ public: if (!hasSameAVL(Other)) return false; + // If the SEWLMULRatioOnly bits are different, then they aren't equal. + if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) + return false; + // If only the VLMAX is valid, check that it is the same. - if (SEWLMULRatioOnly && Other.SEWLMULRatioOnly) + if (SEWLMULRatioOnly) return hasSameVLMAX(Other); // If the full VTYPE is valid, check that it is the same. - if (!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly) - return hasSameVTYPE(Other); + return hasSameVTYPE(Other); + } - // If the SEWLMULRatioOnly bits are different, then they aren't equal. - return false; + bool operator!=(const VSETVLIInfo &Other) const { + return !(*this == Other); } // Calculate the VSETVLIInfo visible to a block assuming this and Other are @@ -365,25 +652,43 @@ public: return VSETVLIInfo::getUnknown(); } - // Calculate the VSETVLIInfo visible at the end of the block assuming this - // is the predecessor value, and Other is change for this block. - VSETVLIInfo merge(const VSETVLIInfo &Other) const { - assert(isValid() && "Can only merge with a valid VSETVLInfo"); - - // Nothing changed from the predecessor, keep it. - if (!Other.isValid()) - return *this; - - // If the change is compatible with the input, we won't create a VSETVLI - // and should keep the predecessor. - if (isCompatible(Other, /*Strict*/ true)) - return *this; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Support for debugging, callable in GDB: V->dump() + LLVM_DUMP_METHOD void dump() const { + print(dbgs()); + dbgs() << "\n"; + } - // Otherwise just use whatever is in this block. - return Other; + /// Implement operator<<. + /// @{ + void print(raw_ostream &OS) const { + OS << "{"; + if (!isValid()) + OS << "Uninitialized"; + if (isUnknown()) + OS << "unknown"; + if (hasAVLReg()) + OS << "AVLReg=" << (unsigned)AVLReg; + if (hasAVLImm()) + OS << "AVLImm=" << (unsigned)AVLImm; + OS << ", " + << "VLMul=" << (unsigned)VLMul << ", " + << "SEW=" << (unsigned)SEW << ", " + << "TailAgnostic=" << (bool)TailAgnostic << ", " + << "MaskAgnostic=" << (bool)MaskAgnostic << ", " + << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; } +#endif }; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_ATTRIBUTE_USED +inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { + V.print(OS); + return OS; +} +#endif + struct BlockData { // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers // made by this block. Calculated in Phase 1. @@ -400,7 +705,7 @@ struct BlockData { // Keeps track of whether the block is already in the queue. bool InQueue = false; - BlockData() {} + BlockData() = default; }; class RISCVInsertVSETVLI : public MachineFunctionPass { @@ -426,14 +731,24 @@ public: StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } private: - bool needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo); - bool needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB); + bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, + const VSETVLIInfo &CurInfo) const; + bool needVSETVLIPHI(const VSETVLIInfo &Require, + const MachineBasicBlock &MBB) const; void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); + void insertVSETVLI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertPt, DebugLoc DL, + const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); + void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI); + void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI); bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); void emitVSETVLIs(MachineBasicBlock &MBB); + void doLocalPostpass(MachineBasicBlock &MBB); + void doPRE(MachineBasicBlock &MBB); + void insertReadVL(MachineBasicBlock &MBB); }; } // end anonymous namespace @@ -443,474 +758,349 @@ char RISCVInsertVSETVLI::ID = 0; INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, false, false) -static MachineInstr *elideCopies(MachineInstr *MI, - const MachineRegisterInfo *MRI) { - while (true) { - if (!MI->isFullCopy()) - return MI; - if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) - return nullptr; - MI = MRI->getVRegDef(MI->getOperand(1).getReg()); - if (!MI) - return nullptr; - } -} - -static bool isScalarMoveInstr(const MachineInstr &MI) { - switch (MI.getOpcode()) { - default: - return false; - case RISCV::PseudoVMV_S_X_M1: - case RISCV::PseudoVMV_S_X_M2: - case RISCV::PseudoVMV_S_X_M4: - case RISCV::PseudoVMV_S_X_M8: - case RISCV::PseudoVMV_S_X_MF2: - case RISCV::PseudoVMV_S_X_MF4: - case RISCV::PseudoVMV_S_X_MF8: - case RISCV::PseudoVFMV_S_F16_M1: - case RISCV::PseudoVFMV_S_F16_M2: - case RISCV::PseudoVFMV_S_F16_M4: - case RISCV::PseudoVFMV_S_F16_M8: - case RISCV::PseudoVFMV_S_F16_MF2: - case RISCV::PseudoVFMV_S_F16_MF4: - case RISCV::PseudoVFMV_S_F32_M1: - case RISCV::PseudoVFMV_S_F32_M2: - case RISCV::PseudoVFMV_S_F32_M4: - case RISCV::PseudoVFMV_S_F32_M8: - case RISCV::PseudoVFMV_S_F32_MF2: - case RISCV::PseudoVFMV_S_F64_M1: - case RISCV::PseudoVFMV_S_F64_M2: - case RISCV::PseudoVFMV_S_F64_M4: - case RISCV::PseudoVFMV_S_F64_M8: - return true; - } -} - -static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, - const MachineRegisterInfo *MRI) { - VSETVLIInfo InstrInfo; - unsigned NumOperands = MI.getNumExplicitOperands(); - bool HasPolicy = RISCVII::hasVecPolicyOp(TSFlags); - - // Default to tail agnostic unless the destination is tied to a source. - // Unless the source is undef. In that case the user would have some control - // over the tail values. Some pseudo instructions force a tail agnostic policy - // despite having a tied def. - bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags); - bool TailAgnostic = true; - // If the instruction has policy argument, use the argument. - if (HasPolicy) { - const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); - TailAgnostic = Op.getImm() & 0x1; - } - - unsigned UseOpIdx; - if (!(ForceTailAgnostic || (HasPolicy && TailAgnostic)) && - MI.isRegTiedToUseOperand(0, &UseOpIdx)) { - TailAgnostic = false; - // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. - const MachineOperand &UseMO = MI.getOperand(UseOpIdx); - MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg()); - if (UseMI) { - UseMI = elideCopies(UseMI, MRI); - if (UseMI && UseMI->isImplicitDef()) - TailAgnostic = true; - } - } - - // Remove the tail policy so we can find the SEW and VL. - if (HasPolicy) - --NumOperands; - - RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); - - unsigned Log2SEW = MI.getOperand(NumOperands - 1).getImm(); - // A Log2SEW of 0 is an operation on mask registers only. - bool MaskRegOp = Log2SEW == 0; - unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; - assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); - - // If there are no explicit defs, this is a store instruction which can - // ignore the tail and mask policies. - bool StoreOp = MI.getNumExplicitDefs() == 0; - bool ScalarMovOp = isScalarMoveInstr(MI); - - if (RISCVII::hasVLOp(TSFlags)) { - const MachineOperand &VLOp = MI.getOperand(NumOperands - 2); - if (VLOp.isImm()) { - int64_t Imm = VLOp.getImm(); - // Conver the VLMax sentintel to X0 register. - if (Imm == RISCV::VLMaxSentinel) - InstrInfo.setAVLReg(RISCV::X0); - else - InstrInfo.setAVLImm(Imm); - } else { - InstrInfo.setAVLReg(VLOp.getReg()); - } - } else - InstrInfo.setAVLReg(RISCV::NoRegister); - InstrInfo.setVTYPE(VLMul, SEW, /*TailAgnostic*/ TailAgnostic, - /*MaskAgnostic*/ false, MaskRegOp, StoreOp, ScalarMovOp); - - return InstrInfo; -} - -void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, - const VSETVLIInfo &Info, - const VSETVLIInfo &PrevInfo) { - DebugLoc DL = MI.getDebugLoc(); - - // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same - // VLMAX. - if (PrevInfo.isValid() && !PrevInfo.isUnknown() && - Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { - BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLIX0)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addReg(RISCV::X0, RegState::Kill) - .addImm(Info.encodeVTYPE()) - .addReg(RISCV::VL, RegState::Implicit); - return; - } - - if (Info.hasAVLImm()) { - BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETIVLI)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addImm(Info.getAVLImm()) - .addImm(Info.encodeVTYPE()); - return; - } - - Register AVLReg = Info.getAVLReg(); - if (AVLReg == RISCV::NoRegister) { - // We can only use x0, x0 if there's no chance of the vtype change causing - // the previous vl to become invalid. - if (PrevInfo.isValid() && !PrevInfo.isUnknown() && - Info.hasSameVLMAX(PrevInfo)) { - BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLIX0)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addReg(RISCV::X0, RegState::Kill) - .addImm(Info.encodeVTYPE()) - .addReg(RISCV::VL, RegState::Implicit); - return; - } - // Otherwise use an AVL of 0 to avoid depending on previous vl. - BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETIVLI)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addImm(0) - .addImm(Info.encodeVTYPE()); - return; - } - - if (AVLReg.isVirtual()) - MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); - - // Use X0 as the DestReg unless AVLReg is X0. We also need to change the - // opcode if the AVLReg is X0 as they have different register classes for - // the AVL operand. - Register DestReg = RISCV::X0; - unsigned Opcode = RISCV::PseudoVSETVLI; - if (AVLReg == RISCV::X0) { - DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); - Opcode = RISCV::PseudoVSETVLIX0; - } - BuildMI(MBB, MI, DL, TII->get(Opcode)) - .addReg(DestReg, RegState::Define | RegState::Dead) - .addReg(AVLReg) - .addImm(Info.encodeVTYPE()); -} - -// Return a VSETVLIInfo representing the changes made by this VSETVLI or -// VSETIVLI instruction. -static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { - VSETVLIInfo NewInfo; - if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { - NewInfo.setAVLImm(MI.getOperand(1).getImm()); - } else { - assert(MI.getOpcode() == RISCV::PseudoVSETVLI || - MI.getOpcode() == RISCV::PseudoVSETVLIX0); - Register AVLReg = MI.getOperand(1).getReg(); - assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && - "Can't handle X0, X0 vsetvli yet"); - NewInfo.setAVLReg(AVLReg); - } - NewInfo.setVTYPE(MI.getOperand(2).getImm()); - - return NewInfo; +static bool isVectorConfigInstr(const MachineInstr &MI) { + return MI.getOpcode() == RISCV::PseudoVSETVLI || + MI.getOpcode() == RISCV::PseudoVSETVLIX0 || + MI.getOpcode() == RISCV::PseudoVSETIVLI; } -bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require, - const VSETVLIInfo &CurInfo) { - if (CurInfo.isCompatible(Require, /*Strict*/ false)) +/// Return true if this is 'vsetvli x0, x0, vtype' which preserves +/// VL and only sets VTYPE. +static bool isVLPreservingConfig(const MachineInstr &MI) { + if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) return false; - - // We didn't find a compatible value. If our AVL is a virtual register, - // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need - // and the last VL/VTYPE we observed is the same, we don't need a - // VSETVLI here. - if (!CurInfo.isUnknown() && Require.hasAVLReg() && - Require.getAVLReg().isVirtual() && !CurInfo.hasSEWLMULRatioOnly() && - CurInfo.hasCompatibleVTYPE(Require, /*Strict*/ false)) { - if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { - if (DefMI->getOpcode() == RISCV::PseudoVSETVLI || - DefMI->getOpcode() == RISCV::PseudoVSETVLIX0 || - DefMI->getOpcode() == RISCV::PseudoVSETIVLI) { - VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); - if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo)) - return false; - } - } - } - - return true; -} - -bool canSkipVSETVLIForLoadStore(const MachineInstr &MI, - const VSETVLIInfo &Require, - const VSETVLIInfo &CurInfo) { - unsigned EEW; - switch (MI.getOpcode()) { - default: - return false; - case RISCV::PseudoVLE8_V_M1: - case RISCV::PseudoVLE8_V_M1_MASK: - case RISCV::PseudoVLE8_V_M2: - case RISCV::PseudoVLE8_V_M2_MASK: - case RISCV::PseudoVLE8_V_M4: - case RISCV::PseudoVLE8_V_M4_MASK: - case RISCV::PseudoVLE8_V_M8: - case RISCV::PseudoVLE8_V_M8_MASK: - case RISCV::PseudoVLE8_V_MF2: - case RISCV::PseudoVLE8_V_MF2_MASK: - case RISCV::PseudoVLE8_V_MF4: - case RISCV::PseudoVLE8_V_MF4_MASK: - case RISCV::PseudoVLE8_V_MF8: - case RISCV::PseudoVLE8_V_MF8_MASK: - case RISCV::PseudoVLSE8_V_M1: - case RISCV::PseudoVLSE8_V_M1_MASK: - case RISCV::PseudoVLSE8_V_M2: - case RISCV::PseudoVLSE8_V_M2_MASK: - case RISCV::PseudoVLSE8_V_M4: - case RISCV::PseudoVLSE8_V_M4_MASK: - case RISCV::PseudoVLSE8_V_M8: - case RISCV::PseudoVLSE8_V_M8_MASK: - case RISCV::PseudoVLSE8_V_MF2: - case RISCV::PseudoVLSE8_V_MF2_MASK: - case RISCV::PseudoVLSE8_V_MF4: - case RISCV::PseudoVLSE8_V_MF4_MASK: - case RISCV::PseudoVLSE8_V_MF8: - case RISCV::PseudoVLSE8_V_MF8_MASK: - case RISCV::PseudoVSE8_V_M1: - case RISCV::PseudoVSE8_V_M1_MASK: - case RISCV::PseudoVSE8_V_M2: - case RISCV::PseudoVSE8_V_M2_MASK: - case RISCV::PseudoVSE8_V_M4: - case RISCV::PseudoVSE8_V_M4_MASK: - case RISCV::PseudoVSE8_V_M8: - case RISCV::PseudoVSE8_V_M8_MASK: - case RISCV::PseudoVSE8_V_MF2: - case RISCV::PseudoVSE8_V_MF2_MASK: - case RISCV::PseudoVSE8_V_MF4: - case RISCV::PseudoVSE8_V_MF4_MASK: - case RISCV::PseudoVSE8_V_MF8: - case RISCV::PseudoVSE8_V_MF8_MASK: - case RISCV::PseudoVSSE8_V_M1: - case RISCV::PseudoVSSE8_V_M1_MASK: - case RISCV::PseudoVSSE8_V_M2: - case RISCV::PseudoVSSE8_V_M2_MASK: - case RISCV::PseudoVSSE8_V_M4: - case RISCV::PseudoVSSE8_V_M4_MASK: - case RISCV::PseudoVSSE8_V_M8: - case RISCV::PseudoVSSE8_V_M8_MASK: - case RISCV::PseudoVSSE8_V_MF2: - case RISCV::PseudoVSSE8_V_MF2_MASK: - case RISCV::PseudoVSSE8_V_MF4: - case RISCV::PseudoVSSE8_V_MF4_MASK: - case RISCV::PseudoVSSE8_V_MF8: - case RISCV::PseudoVSSE8_V_MF8_MASK: - EEW = 8; - break; - case RISCV::PseudoVLE16_V_M1: - case RISCV::PseudoVLE16_V_M1_MASK: - case RISCV::PseudoVLE16_V_M2: - case RISCV::PseudoVLE16_V_M2_MASK: - case RISCV::PseudoVLE16_V_M4: - case RISCV::PseudoVLE16_V_M4_MASK: - case RISCV::PseudoVLE16_V_M8: - case RISCV::PseudoVLE16_V_M8_MASK: - case RISCV::PseudoVLE16_V_MF2: - case RISCV::PseudoVLE16_V_MF2_MASK: - case RISCV::PseudoVLE16_V_MF4: - case RISCV::PseudoVLE16_V_MF4_MASK: - case RISCV::PseudoVLSE16_V_M1: - case RISCV::PseudoVLSE16_V_M1_MASK: - case RISCV::PseudoVLSE16_V_M2: - case RISCV::PseudoVLSE16_V_M2_MASK: - case RISCV::PseudoVLSE16_V_M4: - case RISCV::PseudoVLSE16_V_M4_MASK: - case RISCV::PseudoVLSE16_V_M8: - case RISCV::PseudoVLSE16_V_M8_MASK: - case RISCV::PseudoVLSE16_V_MF2: - case RISCV::PseudoVLSE16_V_MF2_MASK: - case RISCV::PseudoVLSE16_V_MF4: - case RISCV::PseudoVLSE16_V_MF4_MASK: - case RISCV::PseudoVSE16_V_M1: - case RISCV::PseudoVSE16_V_M1_MASK: - case RISCV::PseudoVSE16_V_M2: - case RISCV::PseudoVSE16_V_M2_MASK: - case RISCV::PseudoVSE16_V_M4: - case RISCV::PseudoVSE16_V_M4_MASK: - case RISCV::PseudoVSE16_V_M8: - case RISCV::PseudoVSE16_V_M8_MASK: - case RISCV::PseudoVSE16_V_MF2: - case RISCV::PseudoVSE16_V_MF2_MASK: - case RISCV::PseudoVSE16_V_MF4: - case RISCV::PseudoVSE16_V_MF4_MASK: - case RISCV::PseudoVSSE16_V_M1: - case RISCV::PseudoVSSE16_V_M1_MASK: - case RISCV::PseudoVSSE16_V_M2: - case RISCV::PseudoVSSE16_V_M2_MASK: - case RISCV::PseudoVSSE16_V_M4: - case RISCV::PseudoVSSE16_V_M4_MASK: - case RISCV::PseudoVSSE16_V_M8: - case RISCV::PseudoVSSE16_V_M8_MASK: - case RISCV::PseudoVSSE16_V_MF2: - case RISCV::PseudoVSSE16_V_MF2_MASK: - case RISCV::PseudoVSSE16_V_MF4: - case RISCV::PseudoVSSE16_V_MF4_MASK: - EEW = 16; - break; - case RISCV::PseudoVLE32_V_M1: - case RISCV::PseudoVLE32_V_M1_MASK: - case RISCV::PseudoVLE32_V_M2: - case RISCV::PseudoVLE32_V_M2_MASK: - case RISCV::PseudoVLE32_V_M4: - case RISCV::PseudoVLE32_V_M4_MASK: - case RISCV::PseudoVLE32_V_M8: - case RISCV::PseudoVLE32_V_M8_MASK: - case RISCV::PseudoVLE32_V_MF2: - case RISCV::PseudoVLE32_V_MF2_MASK: - case RISCV::PseudoVLSE32_V_M1: - case RISCV::PseudoVLSE32_V_M1_MASK: - case RISCV::PseudoVLSE32_V_M2: - case RISCV::PseudoVLSE32_V_M2_MASK: - case RISCV::PseudoVLSE32_V_M4: - case RISCV::PseudoVLSE32_V_M4_MASK: - case RISCV::PseudoVLSE32_V_M8: - case RISCV::PseudoVLSE32_V_M8_MASK: - case RISCV::PseudoVLSE32_V_MF2: - case RISCV::PseudoVLSE32_V_MF2_MASK: - case RISCV::PseudoVSE32_V_M1: - case RISCV::PseudoVSE32_V_M1_MASK: - case RISCV::PseudoVSE32_V_M2: - case RISCV::PseudoVSE32_V_M2_MASK: - case RISCV::PseudoVSE32_V_M4: - case RISCV::PseudoVSE32_V_M4_MASK: - case RISCV::PseudoVSE32_V_M8: - case RISCV::PseudoVSE32_V_M8_MASK: - case RISCV::PseudoVSE32_V_MF2: - case RISCV::PseudoVSE32_V_MF2_MASK: - case RISCV::PseudoVSSE32_V_M1: - case RISCV::PseudoVSSE32_V_M1_MASK: - case RISCV::PseudoVSSE32_V_M2: - case RISCV::PseudoVSSE32_V_M2_MASK: - case RISCV::PseudoVSSE32_V_M4: - case RISCV::PseudoVSSE32_V_M4_MASK: - case RISCV::PseudoVSSE32_V_M8: - case RISCV::PseudoVSSE32_V_M8_MASK: - case RISCV::PseudoVSSE32_V_MF2: - case RISCV::PseudoVSSE32_V_MF2_MASK: - EEW = 32; - break; - case RISCV::PseudoVLE64_V_M1: - case RISCV::PseudoVLE64_V_M1_MASK: - case RISCV::PseudoVLE64_V_M2: - case RISCV::PseudoVLE64_V_M2_MASK: - case RISCV::PseudoVLE64_V_M4: - case RISCV::PseudoVLE64_V_M4_MASK: - case RISCV::PseudoVLE64_V_M8: - case RISCV::PseudoVLE64_V_M8_MASK: - case RISCV::PseudoVLSE64_V_M1: - case RISCV::PseudoVLSE64_V_M1_MASK: - case RISCV::PseudoVLSE64_V_M2: - case RISCV::PseudoVLSE64_V_M2_MASK: - case RISCV::PseudoVLSE64_V_M4: - case RISCV::PseudoVLSE64_V_M4_MASK: - case RISCV::PseudoVLSE64_V_M8: - case RISCV::PseudoVLSE64_V_M8_MASK: - case RISCV::PseudoVSE64_V_M1: - case RISCV::PseudoVSE64_V_M1_MASK: - case RISCV::PseudoVSE64_V_M2: - case RISCV::PseudoVSE64_V_M2_MASK: - case RISCV::PseudoVSE64_V_M4: - case RISCV::PseudoVSE64_V_M4_MASK: - case RISCV::PseudoVSE64_V_M8: - case RISCV::PseudoVSE64_V_M8_MASK: - case RISCV::PseudoVSSE64_V_M1: - case RISCV::PseudoVSSE64_V_M1_MASK: - case RISCV::PseudoVSSE64_V_M2: - case RISCV::PseudoVSSE64_V_M2_MASK: - case RISCV::PseudoVSSE64_V_M4: - case RISCV::PseudoVSSE64_V_M4_MASK: - case RISCV::PseudoVSSE64_V_M8: - case RISCV::PseudoVSSE64_V_M8_MASK: - EEW = 64; - break; + assert(RISCV::X0 == MI.getOperand(1).getReg()); + return RISCV::X0 == MI.getOperand(0).getReg(); +} + +static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, + const MachineRegisterInfo *MRI) { + VSETVLIInfo InstrInfo; + + // If the instruction has policy argument, use the argument. + // If there is no policy argument, default to tail agnostic unless the + // destination is tied to a source. Unless the source is undef. In that case + // the user would have some control over the policy values. + bool TailAgnostic = true; + bool UsesMaskPolicy = RISCVII::usesMaskPolicy(TSFlags); + // FIXME: Could we look at the above or below instructions to choose the + // matched mask policy to reduce vsetvli instructions? Default mask policy is + // agnostic if instructions use mask policy, otherwise is undisturbed. Because + // most mask operations are mask undisturbed, so we could possibly reduce the + // vsetvli between mask and nomasked instruction sequence. + bool MaskAgnostic = UsesMaskPolicy; + unsigned UseOpIdx; + if (RISCVII::hasVecPolicyOp(TSFlags)) { + const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); + uint64_t Policy = Op.getImm(); + assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && + "Invalid Policy Value"); + // Although in some cases, mismatched passthru/maskedoff with policy value + // does not make sense (ex. tied operand is IMPLICIT_DEF with non-TAMA + // policy, or tied operand is not IMPLICIT_DEF with TAMA policy), but users + // have set the policy value explicitly, so compiler would not fix it. + TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; + MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; + } else if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) { + TailAgnostic = false; + if (UsesMaskPolicy) + MaskAgnostic = false; + // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. + const MachineOperand &UseMO = MI.getOperand(UseOpIdx); + MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg()); + if (UseMI && UseMI->isImplicitDef()) { + TailAgnostic = true; + if (UsesMaskPolicy) + MaskAgnostic = true; + } + // Some pseudo instructions force a tail agnostic policy despite having a + // tied def. + if (RISCVII::doesForceTailAgnostic(TSFlags)) + TailAgnostic = true; + } + + RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); + + unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); + // A Log2SEW of 0 is an operation on mask registers only. + unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; + assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); + + if (RISCVII::hasVLOp(TSFlags)) { + const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); + if (VLOp.isImm()) { + int64_t Imm = VLOp.getImm(); + // Conver the VLMax sentintel to X0 register. + if (Imm == RISCV::VLMaxSentinel) + InstrInfo.setAVLReg(RISCV::X0); + else + InstrInfo.setAVLImm(Imm); + } else { + InstrInfo.setAVLReg(VLOp.getReg()); + } + } else { + InstrInfo.setAVLReg(RISCV::NoRegister); } +#ifndef NDEBUG + if (Optional EEW = getEEWForLoadStore(MI)) { + assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); + } +#endif + InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); - return CurInfo.isCompatibleWithLoadStoreEEW(EEW, Require); + return InstrInfo; } -bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { - bool HadVectorOp = false; +void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, + const VSETVLIInfo &Info, + const VSETVLIInfo &PrevInfo) { + DebugLoc DL = MI.getDebugLoc(); + insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo); +} - BlockData &BBInfo = BlockInfo[MBB.getNumber()]; - for (const MachineInstr &MI : MBB) { - // If this is an explicit VSETVLI or VSETIVLI, update our state. - if (MI.getOpcode() == RISCV::PseudoVSETVLI || - MI.getOpcode() == RISCV::PseudoVSETVLIX0 || - MI.getOpcode() == RISCV::PseudoVSETIVLI) { - HadVectorOp = true; - BBInfo.Change = getInfoForVSETVLI(MI); - continue; +void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertPt, DebugLoc DL, + const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { + + // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same + // VLMAX. + if (PrevInfo.isValid() && !PrevInfo.isUnknown() && + Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { + BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(RISCV::X0, RegState::Kill) + .addImm(Info.encodeVTYPE()) + .addReg(RISCV::VL, RegState::Implicit); + return; + } + + if (Info.hasAVLImm()) { + BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addImm(Info.getAVLImm()) + .addImm(Info.encodeVTYPE()); + return; + } + + Register AVLReg = Info.getAVLReg(); + if (AVLReg == RISCV::NoRegister) { + // We can only use x0, x0 if there's no chance of the vtype change causing + // the previous vl to become invalid. + if (PrevInfo.isValid() && !PrevInfo.isUnknown() && + Info.hasSameVLMAX(PrevInfo)) { + BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(RISCV::X0, RegState::Kill) + .addImm(Info.encodeVTYPE()) + .addReg(RISCV::VL, RegState::Implicit); + return; } + // Otherwise use an AVL of 0 to avoid depending on previous vl. + BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addImm(0) + .addImm(Info.encodeVTYPE()); + return; + } - uint64_t TSFlags = MI.getDesc().TSFlags; - if (RISCVII::hasSEWOp(TSFlags)) { - HadVectorOp = true; + if (AVLReg.isVirtual()) + MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); + + // Use X0 as the DestReg unless AVLReg is X0. We also need to change the + // opcode if the AVLReg is X0 as they have different register classes for + // the AVL operand. + Register DestReg = RISCV::X0; + unsigned Opcode = RISCV::PseudoVSETVLI; + if (AVLReg == RISCV::X0) { + DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); + Opcode = RISCV::PseudoVSETVLIX0; + } + BuildMI(MBB, InsertPt, DL, TII->get(Opcode)) + .addReg(DestReg, RegState::Define | RegState::Dead) + .addReg(AVLReg) + .addImm(Info.encodeVTYPE()); +} + +// Return a VSETVLIInfo representing the changes made by this VSETVLI or +// VSETIVLI instruction. +static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { + VSETVLIInfo NewInfo; + if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { + NewInfo.setAVLImm(MI.getOperand(1).getImm()); + } else { + assert(MI.getOpcode() == RISCV::PseudoVSETVLI || + MI.getOpcode() == RISCV::PseudoVSETVLIX0); + Register AVLReg = MI.getOperand(1).getReg(); + assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && + "Can't handle X0, X0 vsetvli yet"); + NewInfo.setAVLReg(AVLReg); + } + NewInfo.setVTYPE(MI.getOperand(2).getImm()); + + return NewInfo; +} + +/// Return true if a VSETVLI is required to transition from CurInfo to Require +/// before MI. +bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, + const VSETVLIInfo &Require, + const VSETVLIInfo &CurInfo) const { + assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI)); + + if (CurInfo.isCompatible(MI, Require)) + return false; + + if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) + return true; + + // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0. + // VL=0 is uninteresting (as it should have been deleted already), so it is + // compatible if we can prove both are non-zero. Additionally, if writing + // to an implicit_def operand, we don't need to preserve any other bits and + // are thus compatible with any larger etype, and can disregard policy bits. + if (isScalarMoveInstr(MI) && + CurInfo.hasNonZeroAVL() && Require.hasNonZeroAVL()) { + auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg()); + if (VRegDef && VRegDef->isImplicitDef() && + CurInfo.getSEW() >= Require.getSEW()) + return false; + if (CurInfo.hasSameSEW(Require) && CurInfo.hasSamePolicy(Require)) + return false; + } - VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); - - if (!BBInfo.Change.isValid()) { - BBInfo.Change = NewInfo; - } else { - // If this instruction isn't compatible with the previous VL/VTYPE - // we need to insert a VSETVLI. - // If this is a unit-stride or strided load/store, we may be able to use - // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. - // NOTE: We only do this if the vtype we're comparing against was - // created in this block. We need the first and third phase to treat - // the store the same way. - if (!canSkipVSETVLIForLoadStore(MI, NewInfo, BBInfo.Change) && - needVSETVLI(NewInfo, BBInfo.Change)) - BBInfo.Change = NewInfo; + // We didn't find a compatible value. If our AVL is a virtual register, + // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need + // and the last VL/VTYPE we observed is the same, we don't need a + // VSETVLI here. + if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() && + CurInfo.hasCompatibleVTYPE(MI, Require)) { + if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { + if (isVectorConfigInstr(*DefMI)) { + VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); + if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo)) + return false; } } + } - // If this is something that updates VL/VTYPE that we don't know about, set - // the state to unknown. - if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || - MI.modifiesRegister(RISCV::VTYPE)) { - BBInfo.Change = VSETVLIInfo::getUnknown(); - } + return true; +} + +// Given an incoming state reaching MI, modifies that state so that it is minimally +// compatible with MI. The resulting state is guaranteed to be semantically legal +// for MI, but may not be the state requested by MI. +void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) { + uint64_t TSFlags = MI.getDesc().TSFlags; + if (!RISCVII::hasSEWOp(TSFlags)) + return; + + const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); + if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info)) + return; + + const VSETVLIInfo PrevInfo = Info; + Info = NewInfo; + + if (!RISCVII::hasVLOp(TSFlags)) + return; + + // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and + // VL > 0. We can discard the user requested AVL and just use the last + // one if we can prove it equally zero. This removes a vsetvli entirely + // if the types match or allows use of cheaper avl preserving variant + // if VLMAX doesn't change. If VLMAX might change, we couldn't use + // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to + // prevent extending live range of an avl register operand. + // TODO: We can probably relax this for immediates. + if (isScalarMoveInstr(MI) && PrevInfo.isValid() && + PrevInfo.hasNonZeroAVL() && Info.hasNonZeroAVL() && + Info.hasSameVLMAX(PrevInfo)) { + if (PrevInfo.hasAVLImm()) + Info.setAVLImm(PrevInfo.getAVLImm()); + else + Info.setAVLReg(PrevInfo.getAVLReg()); + return; + } + + // Two cases involving an AVL resulting from a previous vsetvli. + // 1) If the AVL is the result of a previous vsetvli which has the + // same AVL and VLMAX as our current state, we can reuse the AVL + // from the current state for the new one. This allows us to + // generate 'vsetvli x0, x0, vtype" or possible skip the transition + // entirely. + // 2) If AVL is defined by a vsetvli with the same VLMAX, we can + // replace the AVL operand with the AVL of the defining vsetvli. + // We avoid general register AVLs to avoid extending live ranges + // without being sure we can kill the original source reg entirely. + if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual()) + return; + MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg()); + if (!DefMI || !isVectorConfigInstr(*DefMI)) + return; + + VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); + // case 1 + if (PrevInfo.isValid() && !PrevInfo.isUnknown() && + DefInfo.hasSameAVL(PrevInfo) && + DefInfo.hasSameVLMAX(PrevInfo)) { + if (PrevInfo.hasAVLImm()) + Info.setAVLImm(PrevInfo.getAVLImm()); + else + Info.setAVLReg(PrevInfo.getAVLReg()); + return; } + // case 2 + if (DefInfo.hasSameVLMAX(Info) && + (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) { + if (DefInfo.hasAVLImm()) + Info.setAVLImm(DefInfo.getAVLImm()); + else + Info.setAVLReg(DefInfo.getAVLReg()); + return; + } +} + +// Given a state with which we evaluated MI (see transferBefore above for why +// this might be different that the state MI requested), modify the state to +// reflect the changes MI might make. +void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) { + if (isVectorConfigInstr(MI)) { + Info = getInfoForVSETVLI(MI); + return; + } + + if (RISCV::isFaultFirstLoad(MI)) { + // Update AVL to vl-output of the fault first load. + Info.setAVLReg(MI.getOperand(1).getReg()); + return; + } + + // If this is something that updates VL/VTYPE that we don't know about, set + // the state to unknown. + if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || + MI.modifiesRegister(RISCV::VTYPE)) + Info = VSETVLIInfo::getUnknown(); +} + +bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { + bool HadVectorOp = false; + + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + BBInfo.Change = BBInfo.Pred; + for (const MachineInstr &MI : MBB) { + transferBefore(BBInfo.Change, MI); + + if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags)) + HadVectorOp = true; - // Initial exit state is whatever change we found in the block. - BBInfo.Exit = BBInfo.Change; + transferAfter(BBInfo.Change, MI); + } return HadVectorOp; } void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; BBInfo.InQueue = false; @@ -928,9 +1118,20 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { if (!InInfo.isValid()) return; + // If no change, no need to rerun block + if (InInfo == BBInfo.Pred) + return; + BBInfo.Pred = InInfo; + LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) + << " changed to " << BBInfo.Pred << "\n"); - VSETVLIInfo TmpStatus = BBInfo.Pred.merge(BBInfo.Change); + // Note: It's tempting to cache the state changes here, but due to the + // compatibility checks performed a blocks output state can change based on + // the input state. To cache, we'd have to add logic for finding + // never-compatible state changes. + computeVLVTYPEChanges(MBB); + VSETVLIInfo TmpStatus = BBInfo.Change; // If the new exit value matches the old exit value, we don't need to revisit // any blocks. @@ -938,6 +1139,8 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { return; BBInfo.Exit = TmpStatus; + LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) + << " changed to " << BBInfo.Exit << "\n"); // Add the successors to the work list so we can propagate the changed exit // status. @@ -947,10 +1150,10 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { } // If we weren't able to prove a vsetvli was directly unneeded, it might still -// be/ unneeded if the AVL is a phi node where all incoming values are VL +// be unneeded if the AVL is a phi node where all incoming values are VL // outputs from the last VSETVLI in their respective basic blocks. bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, - const MachineBasicBlock &MBB) { + const MachineBasicBlock &MBB) const { if (DisableInsertVSETVLPHIOpt) return true; @@ -973,15 +1176,12 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; // If the exit from the predecessor has the VTYPE we are looking for // we might be able to avoid a VSETVLI. - if (PBBInfo.Exit.isUnknown() || - !PBBInfo.Exit.hasCompatibleVTYPE(Require, /*Strict*/ false)) + if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require)) return true; // We need the PHI input to the be the output of a VSET(I)VLI. MachineInstr *DefMI = MRI->getVRegDef(InReg); - if (!DefMI || (DefMI->getOpcode() != RISCV::PseudoVSETVLI && - DefMI->getOpcode() != RISCV::PseudoVSETVLIX0 && - DefMI->getOpcode() != RISCV::PseudoVSETIVLI)) + if (!DefMI || !isVectorConfigInstr(*DefMI)) return true; // We found a VSET(I)VLI make sure it matches the output of the @@ -998,42 +1198,42 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, } void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { - VSETVLIInfo CurInfo; - // BBLocalInfo tracks the VL/VTYPE state the same way BBInfo.Change was - // calculated in computeIncomingVLVTYPE. We need this to apply - // canSkipVSETVLIForLoadStore the same way computeIncomingVLVTYPE did. We - // can't include predecessor information in that decision to avoid disagreeing - // with the global analysis. - VSETVLIInfo BBLocalInfo; - // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI. - MachineInstr *PrevVSETVLIMI = nullptr; - + VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; + // Track whether the prefix of the block we've scanned is transparent + // (meaning has not yet changed the abstract state). + bool PrefixTransparent = true; for (MachineInstr &MI : MBB) { + const VSETVLIInfo PrevInfo = CurInfo; + transferBefore(CurInfo, MI); + // If this is an explicit VSETVLI or VSETIVLI, update our state. - if (MI.getOpcode() == RISCV::PseudoVSETVLI || - MI.getOpcode() == RISCV::PseudoVSETVLIX0 || - MI.getOpcode() == RISCV::PseudoVSETIVLI) { + if (isVectorConfigInstr(MI)) { // Conservatively, mark the VL and VTYPE as live. assert(MI.getOperand(3).getReg() == RISCV::VL && MI.getOperand(4).getReg() == RISCV::VTYPE && "Unexpected operands where VL and VTYPE should be"); MI.getOperand(3).setIsDead(false); MI.getOperand(4).setIsDead(false); - CurInfo = getInfoForVSETVLI(MI); - BBLocalInfo = getInfoForVSETVLI(MI); - PrevVSETVLIMI = &MI; - continue; + PrefixTransparent = false; } uint64_t TSFlags = MI.getDesc().TSFlags; if (RISCVII::hasSEWOp(TSFlags)) { - VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); + if (PrevInfo != CurInfo) { + // If this is the first implicit state change, and the state change + // requested can be proven to produce the same register contents, we + // can skip emitting the actual state change and continue as if we + // had since we know the GPR result of the implicit state change + // wouldn't be used and VL/VTYPE registers are correct. Note that + // we *do* need to model the state as if it changed as while the + // register contents are unchanged, the abstract model can change. + if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB)) + insertVSETVLI(MBB, MI, CurInfo, PrevInfo); + PrefixTransparent = false; + } + if (RISCVII::hasVLOp(TSFlags)) { - unsigned Offset = 2; - if (RISCVII::hasVecPolicyOp(TSFlags)) - Offset = 3; - MachineOperand &VLOp = - MI.getOperand(MI.getNumExplicitOperands() - Offset); + MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); if (VLOp.isReg()) { // Erase the AVL operand from the instruction. VLOp.setReg(RISCV::NoRegister); @@ -1044,76 +1244,217 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { } MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, /*isImp*/ true)); + } - if (!CurInfo.isValid()) { - // We haven't found any vector instructions or VL/VTYPE changes yet, - // use the predecessor information. - assert(BlockInfo[MBB.getNumber()].Pred.isValid() && - "Expected a valid predecessor state."); - // Don't use predecessor information if there was an earlier instruction - // in this block that allowed a vsetvli to be skipped for load/store. - if (!(BBLocalInfo.isValid() && - canSkipVSETVLIForLoadStore(MI, NewInfo, BBLocalInfo)) && - needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) && - needVSETVLIPHI(NewInfo, MBB)) { - insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred); - CurInfo = NewInfo; - BBLocalInfo = NewInfo; - } + if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || + MI.modifiesRegister(RISCV::VTYPE)) + PrefixTransparent = false; - // We must update BBLocalInfo for every vector instruction. - if (!BBLocalInfo.isValid()) - BBLocalInfo = NewInfo; - } else { - assert(BBLocalInfo.isValid()); - // If this instruction isn't compatible with the previous VL/VTYPE - // we need to insert a VSETVLI. - // If this is a unit-stride or strided load/store, we may be able to use - // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. - // NOTE: We can't use predecessor information for the store. We must - // treat it the same as the first phase so that we produce the correct - // vl/vtype for succesor blocks. - if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) && - needVSETVLI(NewInfo, CurInfo)) { - // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it - // with current VL/VTYPE. - bool NeedInsertVSETVLI = true; - if (PrevVSETVLIMI) { - bool HasSameAVL = - CurInfo.hasSameAVL(NewInfo) || - (NewInfo.hasAVLReg() && NewInfo.getAVLReg().isVirtual() && - NewInfo.getAVLReg() == PrevVSETVLIMI->getOperand(0).getReg()); - // If these two VSETVLI have the same AVL and the same VLMAX, - // we could merge these two VSETVLI. - if (HasSameAVL && - CurInfo.getSEWLMULRatio() == NewInfo.getSEWLMULRatio()) { - PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); - NeedInsertVSETVLI = false; - } - if (isScalarMoveInstr(MI) && - ((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) || - (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) && - NewInfo.hasSameVLMAX(CurInfo)) { - PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); - NeedInsertVSETVLI = false; - } - } - if (NeedInsertVSETVLI) - insertVSETVLI(MBB, MI, NewInfo, CurInfo); - CurInfo = NewInfo; - BBLocalInfo = NewInfo; - } + transferAfter(CurInfo, MI); + } + + // If we reach the end of the block and our current info doesn't match the + // expected info, insert a vsetvli to correct. + if (!UseStrictAsserts) { + const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; + if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && + CurInfo != ExitInfo) { + // Note there's an implicit assumption here that terminators never use + // or modify VL or VTYPE. Also, fallthrough will return end(). + auto InsertPt = MBB.getFirstInstrTerminator(); + insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo, + CurInfo); + CurInfo = ExitInfo; + } + } + + if (UseStrictAsserts && CurInfo.isValid()) { + const auto &Info = BlockInfo[MBB.getNumber()]; + if (CurInfo != Info.Exit) { + LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n"); + LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n"); + LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n"); + LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n"); + } + assert(CurInfo == Info.Exit && + "InsertVSETVLI dataflow invariant violated"); + } +} + +/// Return true if the VL value configured must be equal to the requested one. +static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { + if (!Info.hasAVLImm()) + // VLMAX is always the same value. + // TODO: Could extend to other registers by looking at the associated vreg + // def placement. + return RISCV::X0 == Info.getAVLReg(); + + unsigned AVL = Info.getAVLImm(); + unsigned SEW = Info.getSEW(); + unsigned AVLInBits = AVL * SEW; + + unsigned LMul; + bool Fractional; + std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL()); + + if (Fractional) + return ST.getRealMinVLen() / LMul >= AVLInBits; + return ST.getRealMinVLen() * LMul >= AVLInBits; +} + +/// Perform simple partial redundancy elimination of the VSETVLI instructions +/// we're about to insert by looking for cases where we can PRE from the +/// beginning of one block to the end of one of its predecessors. Specifically, +/// this is geared to catch the common case of a fixed length vsetvl in a single +/// block loop when it could execute once in the preheader instead. +void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { + const MachineFunction &MF = *MBB.getParent(); + const RISCVSubtarget &ST = MF.getSubtarget(); + + if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) + return; + + MachineBasicBlock *UnavailablePred = nullptr; + VSETVLIInfo AvailableInfo; + for (MachineBasicBlock *P : MBB.predecessors()) { + const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; + if (PredInfo.isUnknown()) { + if (UnavailablePred) + return; + UnavailablePred = P; + } else if (!AvailableInfo.isValid()) { + AvailableInfo = PredInfo; + } else if (AvailableInfo != PredInfo) { + return; + } + } + + // Unreachable, single pred, or full redundancy. Note that FRE is handled by + // phase 3. + if (!UnavailablePred || !AvailableInfo.isValid()) + return; + + // Critical edge - TODO: consider splitting? + if (UnavailablePred->succ_size() != 1) + return; + + // If VL can be less than AVL, then we can't reduce the frequency of exec. + if (!hasFixedResult(AvailableInfo, ST)) + return; + + // Does it actually let us remove an implicit transition in MBB? + bool Found = false; + for (auto &MI : MBB) { + if (isVectorConfigInstr(MI)) + return; + + const uint64_t TSFlags = MI.getDesc().TSFlags; + if (RISCVII::hasSEWOp(TSFlags)) { + if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI)) + return; + Found = true; + break; + } + } + if (!Found) + return; + + // Finally, update both data flow state and insert the actual vsetvli. + // Doing both keeps the code in sync with the dataflow results, which + // is critical for correctness of phase 3. + auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit; + LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " + << UnavailablePred->getName() << " with state " + << AvailableInfo << "\n"); + BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; + BlockInfo[MBB.getNumber()].Pred = AvailableInfo; + + // Note there's an implicit assumption here that terminators never use + // or modify VL or VTYPE. Also, fallthrough will return end(). + auto InsertPt = UnavailablePred->getFirstInstrTerminator(); + insertVSETVLI(*UnavailablePred, InsertPt, + UnavailablePred->findDebugLoc(InsertPt), + AvailableInfo, OldInfo); +} + +static void doUnion(DemandedFields &A, DemandedFields B) { + A.VL |= B.VL; + A.SEW |= B.SEW; + A.LMUL |= B.LMUL; + A.SEWLMULRatio |= B.SEWLMULRatio; + A.TailPolicy |= B.TailPolicy; + A.MaskPolicy |= B.MaskPolicy; +} + +// Return true if we can mutate PrevMI's VTYPE to match MI's +// without changing any the fields which have been used. +// TODO: Restructure code to allow code reuse between this and isCompatible +// above. +static bool canMutatePriorConfig(const MachineInstr &PrevMI, + const MachineInstr &MI, + const DemandedFields &Used) { + // TODO: Extend this to handle cases where VL does change, but VL + // has not been used. (e.g. over a vmv.x.s) + if (!isVLPreservingConfig(MI)) + // Note: `vsetvli x0, x0, vtype' is the canonical instruction + // for this case. If you find yourself wanting to add other forms + // to this "unused VTYPE" case, we're probably missing a + // canonicalization earlier. + return false; + + if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) + return false; + + auto PriorVType = PrevMI.getOperand(2).getImm(); + auto VType = MI.getOperand(2).getImm(); + return areCompatibleVTYPEs(PriorVType, VType, Used); +} + +void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { + MachineInstr *PrevMI = nullptr; + DemandedFields Used; + SmallVector ToDelete; + for (MachineInstr &MI : MBB) { + // Note: Must be *before* vsetvli handling to account for config cases + // which only change some subfields. + doUnion(Used, getDemanded(MI)); + + if (!isVectorConfigInstr(MI)) + continue; + + if (PrevMI) { + if (!Used.VL && !Used.usedVTYPE()) { + ToDelete.push_back(PrevMI); + // fallthrough + } else if (canMutatePriorConfig(*PrevMI, MI, Used)) { + PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm()); + ToDelete.push_back(&MI); + // Leave PrevMI unchanged + continue; } - PrevVSETVLIMI = nullptr; } + PrevMI = &MI; + Used = getDemanded(MI); + Register VRegDef = MI.getOperand(0).getReg(); + if (VRegDef != RISCV::X0 && + !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) + Used.VL = true; + } - // If this is something updates VL/VTYPE that we don't know about, set - // the state to unknown. - if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || - MI.modifiesRegister(RISCV::VTYPE)) { - CurInfo = VSETVLIInfo::getUnknown(); - BBLocalInfo = VSETVLIInfo::getUnknown(); - PrevVSETVLIMI = nullptr; + for (auto *MI : ToDelete) + MI->eraseFromParent(); +} + +void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { + for (auto I = MBB.begin(), E = MBB.end(); I != E;) { + MachineInstr &MI = *I++; + if (RISCV::isFaultFirstLoad(MI)) { + Register VLOutput = MI.getOperand(1).getReg(); + if (!MRI->use_nodbg_empty(VLOutput)) + BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL), + VLOutput); + // We don't use the vl output of the VLEFF/VLSEGFF anymore. + MI.getOperand(1).setReg(RISCV::X0); } } } @@ -1124,6 +1465,8 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { if (!ST.hasVInstructions()) return false; + LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n"); + TII = ST.getInstrInfo(); MRI = &MF.getRegInfo(); @@ -1133,34 +1476,77 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { bool HaveVectorOp = false; // Phase 1 - determine how VL/VTYPE are affected by the each block. - for (const MachineBasicBlock &MBB : MF) + for (const MachineBasicBlock &MBB : MF) { HaveVectorOp |= computeVLVTYPEChanges(MBB); + // Initial exit state is whatever change we found in the block. + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + BBInfo.Exit = BBInfo.Change; + LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) + << " is " << BBInfo.Exit << "\n"); + + } // If we didn't find any instructions that need VSETVLI, we're done. - if (HaveVectorOp) { - // Phase 2 - determine the exit VL/VTYPE from each block. We add all - // blocks to the list here, but will also add any that need to be revisited - // during Phase 2 processing. - for (const MachineBasicBlock &MBB : MF) { - WorkList.push(&MBB); - BlockInfo[MBB.getNumber()].InQueue = true; - } - while (!WorkList.empty()) { - const MachineBasicBlock &MBB = *WorkList.front(); - WorkList.pop(); - computeIncomingVLVTYPE(MBB); - } + if (!HaveVectorOp) { + BlockInfo.clear(); + return false; + } - // Phase 3 - add any vsetvli instructions needed in the block. Use the - // Phase 2 information to avoid adding vsetvlis before the first vector - // instruction in the block if the VL/VTYPE is satisfied by its - // predecessors. - for (MachineBasicBlock &MBB : MF) - emitVSETVLIs(MBB); + // Phase 2 - determine the exit VL/VTYPE from each block. We add all + // blocks to the list here, but will also add any that need to be revisited + // during Phase 2 processing. + for (const MachineBasicBlock &MBB : MF) { + WorkList.push(&MBB); + BlockInfo[MBB.getNumber()].InQueue = true; + } + while (!WorkList.empty()) { + const MachineBasicBlock &MBB = *WorkList.front(); + WorkList.pop(); + computeIncomingVLVTYPE(MBB); } - BlockInfo.clear(); + // Perform partial redundancy elimination of vsetvli transitions. + for (MachineBasicBlock &MBB : MF) + doPRE(MBB); + + // Phase 3 - add any vsetvli instructions needed in the block. Use the + // Phase 2 information to avoid adding vsetvlis before the first vector + // instruction in the block if the VL/VTYPE is satisfied by its + // predecessors. + for (MachineBasicBlock &MBB : MF) + emitVSETVLIs(MBB); + + // Now that all vsetvlis are explicit, go through and do block local + // DSE and peephole based demanded fields based transforms. Note that + // this *must* be done outside the main dataflow so long as we allow + // any cross block analysis within the dataflow. We can't have both + // demanded fields based mutation and non-local analysis in the + // dataflow at the same time without introducing inconsistencies. + for (MachineBasicBlock &MBB : MF) + doLocalPostpass(MBB); + + // Once we're fully done rewriting all the instructions, do a final pass + // through to check for VSETVLIs which write to an unused destination. + // For the non X0, X0 variant, we can replace the destination register + // with X0 to reduce register pressure. This is really a generic + // optimization which can be applied to any dead def (TODO: generalize). + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (MI.getOpcode() == RISCV::PseudoVSETVLI || + MI.getOpcode() == RISCV::PseudoVSETIVLI) { + Register VRegDef = MI.getOperand(0).getReg(); + if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef)) + MI.getOperand(0).setReg(RISCV::X0); + } + } + } + // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output + // of VLEFF/VLSEGFF. + for (MachineBasicBlock &MBB : MF) + insertReadVL(MBB); + + BlockInfo.clear(); return HaveVectorOp; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index f99d0f56c406..18b31f85bfdb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -196,7 +196,10 @@ class RVInstEncoding) + .addReg(RISCV::X0); + return; + } + // FPR->FPR copies and VR->VR copies. unsigned Opc; bool IsScalableVector = true; @@ -631,11 +641,7 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register DstReg, uint64_t Val, MachineInstr::MIFlag Flag) const { - MachineFunction *MF = MBB.getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); Register SrcReg = RISCV::X0; - Register Result = MRI.createVirtualRegister(&RISCV::GPRRegClass); - unsigned Num = 0; if (!STI.is64Bit() && !isInt<32>(Val)) report_fatal_error("Should only materialize 32-bit constants for RV32"); @@ -645,34 +651,34 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, assert(!Seq.empty()); for (RISCVMatInt::Inst &Inst : Seq) { - // Write the final result to DstReg if it's the last instruction in the Seq. - // Otherwise, write the result to the temp register. - if (++Num == Seq.size()) - Result = DstReg; - - if (Inst.Opc == RISCV::LUI) { - BuildMI(MBB, MBBI, DL, get(RISCV::LUI), Result) + switch (Inst.getOpndKind()) { + case RISCVMatInt::Imm: + BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg) .addImm(Inst.Imm) .setMIFlag(Flag); - } else if (Inst.Opc == RISCV::ADD_UW) { - BuildMI(MBB, MBBI, DL, get(RISCV::ADD_UW), Result) + break; + case RISCVMatInt::RegX0: + BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg) .addReg(SrcReg, RegState::Kill) .addReg(RISCV::X0) .setMIFlag(Flag); - } else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD || - Inst.Opc == RISCV::SH3ADD) { - BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result) + break; + case RISCVMatInt::RegReg: + BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg) .addReg(SrcReg, RegState::Kill) .addReg(SrcReg, RegState::Kill) .setMIFlag(Flag); - } else { - BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result) + break; + case RISCVMatInt::RegImm: + BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg) .addReg(SrcReg, RegState::Kill) .addImm(Inst.Imm) .setMIFlag(Flag); + break; } + // Only the first instruction has X0 as its source. - SrcReg = Result; + SrcReg = DstReg; } } @@ -1052,29 +1058,25 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, switch (OpType) { default: llvm_unreachable("Unexpected operand type"); - case RISCVOp::OPERAND_UIMM2: - Ok = isUInt<2>(Imm); - break; - case RISCVOp::OPERAND_UIMM3: - Ok = isUInt<3>(Imm); - break; - case RISCVOp::OPERAND_UIMM4: - Ok = isUInt<4>(Imm); - break; - case RISCVOp::OPERAND_UIMM5: - Ok = isUInt<5>(Imm); - break; - case RISCVOp::OPERAND_UIMM7: - Ok = isUInt<7>(Imm); - break; - case RISCVOp::OPERAND_UIMM12: - Ok = isUInt<12>(Imm); - break; + + // clang-format off +#define CASE_OPERAND_UIMM(NUM) \ + case RISCVOp::OPERAND_UIMM##NUM: \ + Ok = isUInt(Imm); \ + break; + CASE_OPERAND_UIMM(2) + CASE_OPERAND_UIMM(3) + CASE_OPERAND_UIMM(4) + CASE_OPERAND_UIMM(5) + CASE_OPERAND_UIMM(7) + CASE_OPERAND_UIMM(12) + CASE_OPERAND_UIMM(20) + // clang-format on case RISCVOp::OPERAND_SIMM12: Ok = isInt<12>(Imm); break; - case RISCVOp::OPERAND_UIMM20: - Ok = isUInt<20>(Imm); + case RISCVOp::OPERAND_SIMM12_LSB00000: + Ok = isShiftedInt<7, 5>(Imm); break; case RISCVOp::OPERAND_UIMMLOG2XLEN: if (STI.getTargetTriple().isArch64Bit()) @@ -1205,6 +1207,11 @@ enum MachineOutlinerConstructionID { MachineOutlinerDefault }; +bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault( + MachineFunction &MF) const { + return MF.getFunction().hasMinSize(); +} + outliner::OutlinedFunction RISCVInstrInfo::getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const { @@ -1212,10 +1219,7 @@ outliner::OutlinedFunction RISCVInstrInfo::getOutliningCandidateInfo( // be used to setup the function call. auto CannotInsertCall = [](outliner::Candidate &C) { const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo(); - - C.initLRU(*TRI); - LiveRegUnits LRU = C.LRU; - return !LRU.available(RISCV::X5); + return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI); }; llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall); @@ -1258,7 +1262,12 @@ RISCVInstrInfo::getOutliningType(MachineBasicBlock::iterator &MBBI, if (MI.isPosition()) { // We can manually strip out CFI instructions later. if (MI.isCFIInstruction()) - return outliner::InstrType::Invisible; + // If current function has exception handling code, we can't outline & + // strip these CFI instructions since it may break .eh_frame section + // needed in unwinding. + return MI.getMF()->getFunction().needsUnwindTableEntry() + ? outliner::InstrType::Illegal + : outliner::InstrType::Invisible; return outliner::InstrType::Illegal; } @@ -1325,7 +1334,7 @@ void RISCVInstrInfo::buildOutlinedFrame( MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall( Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, - MachineFunction &MF, const outliner::Candidate &C) const { + MachineFunction &MF, outliner::Candidate &C) const { // Add in a call instruction to the outlined function at the given location. It = MBB.insert(It, @@ -1335,6 +1344,53 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall( return It; } +// MIR printer helper function to annotate Operands with a comment. +std::string RISCVInstrInfo::createMIROperandComment( + const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, + const TargetRegisterInfo *TRI) const { + // Print a generic comment for this operand if there is one. + std::string GenericComment = + TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI); + if (!GenericComment.empty()) + return GenericComment; + + // If not, we must have an immediate operand. + if (!Op.isImm()) + return std::string(); + + std::string Comment; + raw_string_ostream OS(Comment); + + uint64_t TSFlags = MI.getDesc().TSFlags; + + // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW + // operand of vector codegen pseudos. + if ((MI.getOpcode() == RISCV::VSETVLI || MI.getOpcode() == RISCV::VSETIVLI || + MI.getOpcode() == RISCV::PseudoVSETVLI || + MI.getOpcode() == RISCV::PseudoVSETIVLI || + MI.getOpcode() == RISCV::PseudoVSETVLIX0) && + OpIdx == 2) { + unsigned Imm = MI.getOperand(OpIdx).getImm(); + RISCVVType::printVType(Imm, OS); + } else if (RISCVII::hasSEWOp(TSFlags)) { + unsigned NumOperands = MI.getNumExplicitOperands(); + bool HasPolicy = RISCVII::hasVecPolicyOp(TSFlags); + + // The SEW operand is before any policy operand. + if (OpIdx != NumOperands - HasPolicy - 1) + return std::string(); + + unsigned Log2SEW = MI.getOperand(OpIdx).getImm(); + unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; + assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); + + OS << "e" << SEW; + } + + OS.flush(); + return Comment; +} + // clang-format off #define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL) \ RISCV::PseudoV##OP##_##TYPE##_##LMUL @@ -1653,6 +1709,12 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI, case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV): case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV): case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): { + // If the tail policy is undisturbed we can't convert. + assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) && + MI.getNumExplicitOperands() == 6); + if ((MI.getOperand(5).getImm() & 1) == 0) + return nullptr; + // clang-format off unsigned NewOpc; switch (MI.getOpcode()) { @@ -1722,11 +1784,10 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, "Reserve the stack by the multiple of one vector size."); MachineRegisterInfo &MRI = MF.getRegInfo(); - const RISCVInstrInfo *TII = MF.getSubtarget().getInstrInfo(); int64_t NumOfVReg = Amount / 8; Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); - BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL) + BuildMI(MBB, II, DL, get(RISCV::PseudoReadVLENB), VL) .setMIFlag(Flag); assert(isInt<32>(NumOfVReg) && "Expect the number of vector registers within 32-bits."); @@ -1734,47 +1795,55 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, uint32_t ShiftAmount = Log2_32(NumOfVReg); if (ShiftAmount == 0) return VL; - BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL) + BuildMI(MBB, II, DL, get(RISCV::SLLI), VL) .addReg(VL, RegState::Kill) .addImm(ShiftAmount) .setMIFlag(Flag); + } else if ((NumOfVReg == 3 || NumOfVReg == 5 || NumOfVReg == 9) && + STI.hasStdExtZba()) { + // We can use Zba SHXADD instructions for multiply in some cases. + // TODO: Generalize to SHXADD+SLLI. + unsigned Opc; + switch (NumOfVReg) { + default: llvm_unreachable("Unexpected number of vregs"); + case 3: Opc = RISCV::SH1ADD; break; + case 5: Opc = RISCV::SH2ADD; break; + case 9: Opc = RISCV::SH3ADD; break; + } + BuildMI(MBB, II, DL, get(Opc), VL) + .addReg(VL, RegState::Kill) + .addReg(VL) + .setMIFlag(Flag); } else if (isPowerOf2_32(NumOfVReg - 1)) { Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); uint32_t ShiftAmount = Log2_32(NumOfVReg - 1); - BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), ScaledRegister) + BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) .addReg(VL) .addImm(ShiftAmount) .setMIFlag(Flag); - BuildMI(MBB, II, DL, TII->get(RISCV::ADD), VL) + BuildMI(MBB, II, DL, get(RISCV::ADD), VL) .addReg(ScaledRegister, RegState::Kill) .addReg(VL, RegState::Kill) .setMIFlag(Flag); } else if (isPowerOf2_32(NumOfVReg + 1)) { Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); uint32_t ShiftAmount = Log2_32(NumOfVReg + 1); - BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), ScaledRegister) + BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) .addReg(VL) .addImm(ShiftAmount) .setMIFlag(Flag); - BuildMI(MBB, II, DL, TII->get(RISCV::SUB), VL) + BuildMI(MBB, II, DL, get(RISCV::SUB), VL) .addReg(ScaledRegister, RegState::Kill) .addReg(VL, RegState::Kill) .setMIFlag(Flag); } else { Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass); - if (!isInt<12>(NumOfVReg)) - movImm(MBB, II, DL, N, NumOfVReg); - else { - BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), N) - .addReg(RISCV::X0) - .addImm(NumOfVReg) - .setMIFlag(Flag); - } - if (!MF.getSubtarget().hasStdExtM()) + movImm(MBB, II, DL, N, NumOfVReg, Flag); + if (!STI.hasStdExtM()) MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ MF.getFunction(), "M-extension must be enabled to calculate the vscaled size/offset."}); - BuildMI(MBB, II, DL, TII->get(RISCV::MUL), VL) + BuildMI(MBB, II, DL, get(RISCV::MUL), VL) .addReg(VL, RegState::Kill) .addReg(N, RegState::Kill) .setMIFlag(Flag); @@ -1811,20 +1880,18 @@ static bool isRVVWholeLoadStore(unsigned Opcode) { } } -bool RISCVInstrInfo::isRVVSpill(const MachineInstr &MI, bool CheckFIs) const { +bool RISCV::isRVVSpill(const MachineInstr &MI) { // RVV lacks any support for immediate addressing for stack addresses, so be // conservative. unsigned Opcode = MI.getOpcode(); if (!RISCVVPseudosTable::getPseudoInfo(Opcode) && !isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode)) return false; - return !CheckFIs || any_of(MI.operands(), [](const MachineOperand &MO) { - return MO.isFI(); - }); + return true; } Optional> -RISCVInstrInfo::isRVVSpillForZvlsseg(unsigned Opcode) const { +RISCV::isRVVSpillForZvlsseg(unsigned Opcode) { switch (Opcode) { default: return None; @@ -1863,3 +1930,8 @@ RISCVInstrInfo::isRVVSpillForZvlsseg(unsigned Opcode) const { return std::make_pair(8u, 1u); } } + +bool RISCV::isFaultFirstLoad(const MachineInstr &MI) { + return MI.getNumExplicitDefs() == 2 && MI.modifiesRegister(RISCV::VL) && + !MI.isInlineAsm(); +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index da0877c4299a..5368437618bd 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -135,6 +135,8 @@ public: virtual bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override; + bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; + // Calculate target-specific information for a set of outlining candidates. outliner::OutlinedFunction getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const override; @@ -153,7 +155,7 @@ public: virtual MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - const outliner::Candidate &C) const override; + outliner::Candidate &C) const override; bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; @@ -164,25 +166,31 @@ public: MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override; + // MIR printer helper function to annotate Operands with a comment. + std::string + createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, + unsigned OpIdx, + const TargetRegisterInfo *TRI) const override; + Register getVLENFactoredAmount( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator II, const DebugLoc &DL, int64_t Amount, MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const; - // Returns true if the given MI is an RVV instruction opcode for which we may - // expect to see a FrameIndex operand. When CheckFIs is true, the instruction - // must contain at least one FrameIndex operand. - bool isRVVSpill(const MachineInstr &MI, bool CheckFIs) const; - - Optional> - isRVVSpillForZvlsseg(unsigned Opcode) const; - protected: const RISCVSubtarget &STI; }; namespace RISCV { +// Returns true if the given MI is an RVV instruction opcode for which we may +// expect to see a FrameIndex operand. +bool isRVVSpill(const MachineInstr &MI); + +Optional> isRVVSpillForZvlsseg(unsigned Opcode); + +bool isFaultFirstLoad(const MachineInstr &MI); + // Implemented in RISCVGenInstrInfo.inc int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index ee6a74b7f14f..ee4c026af8f4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -83,6 +83,21 @@ def riscv_read_cycle_wide : SDNode<"RISCVISD::READ_CYCLE_WIDE", SDT_RISCVReadCycleWide, [SDNPHasChain, SDNPSideEffect]>; +def riscv_add_lo : SDNode<"RISCVISD::ADD_LO", SDTIntBinOp>; +def riscv_hi : SDNode<"RISCVISD::HI", SDTIntUnaryOp>; +def riscv_lla : SDNode<"RISCVISD::LLA", SDTIntUnaryOp>; +def riscv_add_tprel : SDNode<"RISCVISD::ADD_TPREL", + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>]>>; + +def riscv_la : SDNode<"RISCVISD::LA", SDTLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def riscv_la_tls_ie : SDNode<"RISCVISD::LA_TLS_IE", SDTLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def riscv_la_tls_gd : SDNode<"RISCVISD::LA_TLS_GD", SDTIntUnaryOp>; + //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// @@ -105,6 +120,19 @@ def ImmZeroAsmOperand : AsmOperandClass { let DiagnosticType = !strconcat("Invalid", Name); } +// A parse method for (${gpr}) or 0(${gpr}), where the 0 is be silently ignored. +def ZeroOffsetMemOpOperand : AsmOperandClass { + let Name = "ZeroOffsetMemOpOperand"; + let RenderMethod = "addRegOperands"; + let PredicateMethod = "isGPR"; + let ParserMethod = "parseZeroOffsetMemOp"; +} + +def GPRMemZeroOffset : RegisterOperand { + let ParserMatchClass = ZeroOffsetMemOpOperand; + let PrintMethod = "printZeroOffsetMemOp"; +} + class SImmAsmOperand : ImmAsmOperand<"S", width, suffix> { } @@ -334,10 +362,19 @@ def ixlenimm_li : Operand { // Standalone (codegen-only) immleaf patterns. -// A 12-bit signed immediate plus one where the imm range will be -2047~2048. +// A 12-bit signed immediate plus one where the imm range will be [-2047, 2048]. def simm12_plus1 : ImmLeaf(Imm) && Imm != -2048) || Imm == 2048;}]>; +// A 12-bit signed immediate sub one and exclude zero +def simm12_minus1_nonzero : PatLeaf<(imm), [{ + if (!N->hasOneUse()) + return false; + // The immediate operand must be in range [-2049, 0) or (0, 2046]. + int64_t Imm = N->getSExtValue(); + return (Imm >= -2049 && Imm < 0) || (Imm > 0 && Imm <= 2046); +}]>; + // A 6-bit constant greater than 32. def uimm6gt32 : ImmLeaf(Imm) && Imm > 32; @@ -345,8 +382,10 @@ def uimm6gt32 : ImmLeaf; +def FrameAddrRegImm : ComplexPattern; def BaseAddr : ComplexPattern; +def AddrRegImm : ComplexPattern; // Return the negation of an immediate value. def NegImm : SDNodeXFormgetValueType(0)); }]>; -// Return an immediate value plus 32. -def ImmPlus32 : SDNodeXFormgetTargetConstant(N->getSExtValue() + 32, SDLoc(N), +// Return an immediate value plus 1. +def ImmPlus1 : SDNodeXFormgetTargetConstant(N->getSExtValue() + 1, SDLoc(N), N->getValueType(0)); }]>; @@ -380,7 +419,9 @@ def ImmSubFrom32 : SDNodeXForm; // Check if (add r, imm) can be optimized to (ADDI (ADDI r, imm0), imm1), -// in which imm = imm0 + imm1 and both imm0 and imm1 are simm12. +// in which imm = imm0 + imm1 and both imm0 and imm1 are simm12. We make imm0 +// as large as possible and imm1 as small as possible so that we might be able +// to use c.addi for the small immediate. def AddiPair : PatLeaf<(imm), [{ if (!N->hasOneUse()) return false; @@ -389,19 +430,27 @@ def AddiPair : PatLeaf<(imm), [{ return (-4096 <= Imm && Imm <= -2049) || (2048 <= Imm && Imm <= 4094); }]>; -// Return imm/2. -def AddiPairImmA : SDNodeXFormgetTargetConstant(N->getSExtValue() / 2, SDLoc(N), +// Return imm - (imm < 0 ? -2048 : 2047). +def AddiPairImmSmall : SDNodeXFormgetSExtValue(); + int64_t Adj = N->getSExtValue() < 0 ? -2048 : 2047; + return CurDAG->getTargetConstant(Imm - Adj, SDLoc(N), N->getValueType(0)); }]>; -// Return imm - imm/2. -def AddiPairImmB : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(Imm - Imm / 2, SDLoc(N), +// Return -2048 if immediate is negative or 2047 if positive. These are the +// largest simm12 values. +def AddiPairImmLarge : SDNodeXFormgetSExtValue() < 0 ? -2048 : 2047; + return CurDAG->getTargetConstant(Imm, SDLoc(N), N->getValueType(0)); }]>; +def TrailingZeros : SDNodeXFormgetTargetConstant(N->getAPIntValue().countTrailingZeros(), + SDLoc(N), N->getValueType(0)); +}]>; + def XLenSubTrailingOnes : SDNodeXFormgetXLen(); uint64_t TrailingOnes = N->getAPIntValue().countTrailingOnes(); @@ -410,7 +459,13 @@ def XLenSubTrailingOnes : SDNodeXForm; // Checks if this mask is a non-empty sequence of ones starting at the -// least significant bit with the remainder zero and exceeds simm12. +// most/least significant bit with the remainder zero and exceeds simm32/simm12. +def LeadingOnesMask : PatLeaf<(imm), [{ + if (!N->hasOneUse()) + return false; + return !isInt<32>(N->getSExtValue()) && isMask_64(~N->getSExtValue()); +}], TrailingZeros>; + def TrailingOnesMask : PatLeaf<(imm), [{ if (!N->hasOneUse()) return false; @@ -437,20 +492,35 @@ class BranchCC_rri funct3, string opcodestr> let isTerminator = 1; } -let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { class Load_ri funct3, string opcodestr> : RVInstI; +class HLoad_r funct7, bits<5> funct5, string opcodestr> + : RVInstR { + let rs2 = funct5; +} +} + // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction // encoding. -let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { class Store_rri funct3, string opcodestr> : RVInstS; +class HStore_rr funct7, string opcodestr> + : RVInstR { + let rd = 0; +} +} + let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class ALU_ri funct3, string opcodestr> : RVInstI imm11_7, bits<3> funct3, string opcodestr> Sched<[WriteShiftImm, ReadShiftImm]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class ALU_rr funct7, bits<3> funct3, string opcodestr> +class ALU_rr funct7, bits<3> funct3, string opcodestr, + bit Commutable = 0> : RVInstR; + opcodestr, "$rd, $rs1, $rs2"> { + let isCommutable = Commutable; +} let hasNoSchedulingInfo = 1, hasSideEffects = 1, mayLoad = 0, mayStore = 0 in @@ -490,15 +563,25 @@ class ShiftW_ri imm11_5, bits<3> funct3, string opcodestr> Sched<[WriteShiftImm32, ReadShiftImm32]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class ALUW_rr funct7, bits<3> funct3, string opcodestr> +class ALUW_rr funct7, bits<3> funct3, string opcodestr, + bit Commutable = 0> : RVInstR; + (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2"> { + let isCommutable = Commutable; +} let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in class Priv funct7> : RVInstR; +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +class Priv_rr funct7> + : RVInstR { + let rd = 0; +} + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -558,16 +641,26 @@ def SLLI : Shift_ri<0b00000, 0b001, "slli">; def SRLI : Shift_ri<0b00000, 0b101, "srli">; def SRAI : Shift_ri<0b01000, 0b101, "srai">; -def ADD : ALU_rr<0b0000000, 0b000, "add">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; -def SUB : ALU_rr<0b0100000, 0b000, "sub">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; -def SLL : ALU_rr<0b0000000, 0b001, "sll">, Sched<[WriteShiftReg, ReadShiftReg, ReadShiftReg]>; -def SLT : ALU_rr<0b0000000, 0b010, "slt">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; -def SLTU : ALU_rr<0b0000000, 0b011, "sltu">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; -def XOR : ALU_rr<0b0000000, 0b100, "xor">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; -def SRL : ALU_rr<0b0000000, 0b101, "srl">, Sched<[WriteShiftReg, ReadShiftReg, ReadShiftReg]>; -def SRA : ALU_rr<0b0100000, 0b101, "sra">, Sched<[WriteShiftReg, ReadShiftReg, ReadShiftReg]>; -def OR : ALU_rr<0b0000000, 0b110, "or">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; -def AND : ALU_rr<0b0000000, 0b111, "and">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def ADD : ALU_rr<0b0000000, 0b000, "add", /*Commutable*/1>, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SUB : ALU_rr<0b0100000, 0b000, "sub">, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SLL : ALU_rr<0b0000000, 0b001, "sll">, + Sched<[WriteShiftReg, ReadShiftReg, ReadShiftReg]>; +def SLT : ALU_rr<0b0000000, 0b010, "slt">, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SLTU : ALU_rr<0b0000000, 0b011, "sltu">, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def XOR : ALU_rr<0b0000000, 0b100, "xor", /*Commutable*/1>, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SRL : ALU_rr<0b0000000, 0b101, "srl">, + Sched<[WriteShiftReg, ReadShiftReg, ReadShiftReg]>; +def SRA : ALU_rr<0b0100000, 0b101, "sra">, + Sched<[WriteShiftReg, ReadShiftReg, ReadShiftReg]>; +def OR : ALU_rr<0b0000000, 0b110, "or", /*Commutable*/1>, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def AND : ALU_rr<0b0000000, 0b111, "and", /*Commutable*/1>, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in { def FENCE : RVInstI<0b000, OPC_MISC_MEM, (outs), @@ -642,7 +735,7 @@ def SLLIW : ShiftW_ri<0b0000000, 0b001, "slliw">; def SRLIW : ShiftW_ri<0b0000000, 0b101, "srliw">; def SRAIW : ShiftW_ri<0b0100000, 0b101, "sraiw">; -def ADDW : ALUW_rr<0b0000000, 0b000, "addw">, +def ADDW : ALUW_rr<0b0000000, 0b000, "addw", /*Commutable*/1>, Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; def SUBW : ALUW_rr<0b0100000, 0b000, "subw">, Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; @@ -684,11 +777,40 @@ def WFI : Priv<"wfi", 0b0001000>, Sched<[]> { let rs2 = 0b00101; } -let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in -def SFENCE_VMA : RVInstR<0b0001001, 0b000, OPC_SYSTEM, (outs), - (ins GPR:$rs1, GPR:$rs2), - "sfence.vma", "$rs1, $rs2">, Sched<[]> { +def SFENCE_W_INVAL : Priv<"sfence.w.inval", 0b0001100>, Sched<[]> { let rd = 0; + let rs1 = 0; + let rs2 = 0; +} + +def SFENCE_INVAL_IR : Priv<"sfence.inval.ir", 0b0001100>, Sched<[]> { + let rd = 0; + let rs1 = 0; + let rs2 = 0b00001; +} + +def SFENCE_VMA : Priv_rr<"sfence.vma", 0b0001001>, Sched<[]>; +def SINVAL_VMA : Priv_rr<"sinval.vma", 0b0001011>, Sched<[]>; +def HFENCE_VVMA : Priv_rr<"hfence.vvma", 0b0010001>, Sched<[]>; +def HFENCE_GVMA : Priv_rr<"hfence.gvma", 0b0110001>, Sched<[]>; +def HINVAL_VVMA : Priv_rr<"hinval.vvma", 0b0010011>, Sched<[]>; +def HINVAL_GVMA : Priv_rr<"hinval.gvma", 0b0110011>, Sched<[]>; + +def HLV_B : HLoad_r<0b0110000, 0b00000, "hlv.b">, Sched<[]>; +def HLV_BU : HLoad_r<0b0110000, 0b00001, "hlv.bu">, Sched<[]>; +def HLV_H : HLoad_r<0b0110010, 0b00000, "hlv.h">, Sched<[]>; +def HLV_HU : HLoad_r<0b0110010, 0b00001, "hlv.hu">, Sched<[]>; +def HLVX_HU : HLoad_r<0b0110010, 0b00011, "hlvx.hu">, Sched<[]>; +def HLV_W : HLoad_r<0b0110100, 0b00000, "hlv.w">, Sched<[]>; +def HLVX_WU : HLoad_r<0b0110100, 0b00011, "hlvx.wu">, Sched<[]>; +def HSV_B : HStore_rr<0b0110001, "hsv.b">, Sched<[]>; +def HSV_H : HStore_rr<0b0110011, "hsv.h">, Sched<[]>; +def HSV_W : HStore_rr<0b0110101, "hsv.w">, Sched<[]>; + +let Predicates = [IsRV64] in { +def HLV_WU : HLoad_r<0b0110100, 0b00001, "hlv.wu">, Sched<[]>; +def HLV_D : HLoad_r<0b0110110, 0b00000, "hlv.d">, Sched<[]>; +def HSV_D : HStore_rr<0b0110111, "hsv.d">, Sched<[]>; } //===----------------------------------------------------------------------===// @@ -799,6 +921,9 @@ def : InstAlias<"jalr $rd, $rs, $offset", (JALR GPR:$rd, GPR:$rs, simm12:$offset def : InstAlias<"fence", (FENCE 0xF, 0xF)>; // 0xF == iorw +let Predicates = [HasStdExtZihintpause] in +def : InstAlias<"pause", (FENCE 0x1, 0x0)>; // 0x1 == w + def : InstAlias<"rdinstret $rd", (CSRRS GPR:$rd, INSTRET.Encoding, X0)>; def : InstAlias<"rdcycle $rd", (CSRRS GPR:$rd, CYCLE.Encoding, X0)>; def : InstAlias<"rdtime $rd", (CSRRS GPR:$rd, TIME.Encoding, X0)>; @@ -831,6 +956,12 @@ def : InstAlias<"csrrc $rd, $csr, $imm", (CSRRCI GPR:$rd, csr_sysreg:$csr, uimm5 def : InstAlias<"sfence.vma", (SFENCE_VMA X0, X0)>; def : InstAlias<"sfence.vma $rs", (SFENCE_VMA GPR:$rs, X0)>; +def : InstAlias<"hfence.gvma", (HFENCE_GVMA X0, X0)>; +def : InstAlias<"hfence.gvma $rs", (HFENCE_GVMA GPR:$rs, X0)>; + +def : InstAlias<"hfence.vvma", (HFENCE_VVMA X0, X0)>; +def : InstAlias<"hfence.vvma $rs", (HFENCE_VVMA GPR:$rs, X0)>; + let EmitPriority = 0 in { def : InstAlias<"lb $rd, (${rs1})", (LB GPR:$rd, GPR:$rs1, 0)>; @@ -1006,9 +1137,6 @@ class PatGprUimmLog2XLen /// Predicates -def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{ - return isOrEquivalentToAdd(N); -}]>; def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{ return cast(N->getOperand(1))->getVT().bitsLE(MVT::i32); }]>; @@ -1018,13 +1146,14 @@ def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{ }]>; def zexti32 : ComplexPattern; -def add_oneuse : PatFrag<(ops node:$A, node:$B), (add node:$A, node:$B), [{ +class binop_oneuse + : PatFrag<(ops node:$A, node:$B), + (operator node:$A, node:$B), [{ return N->hasOneUse(); }]>; -def mul_oneuse : PatFrag<(ops node:$A, node:$B), (mul node:$A, node:$B), [{ - return N->hasOneUse(); -}]>; +def add_oneuse : binop_oneuse; +def mul_oneuse : binop_oneuse; def mul_const_oneuse : PatFrag<(ops node:$A, node:$B), (mul node:$A, node:$B), [{ @@ -1034,22 +1163,16 @@ def mul_const_oneuse : PatFrag<(ops node:$A, node:$B), return false; }]>; -def sext_oneuse : PatFrag<(ops node:$A), (sext node:$A), [{ - return N->hasOneUse(); -}]>; - -def zext_oneuse : PatFrag<(ops node:$A), (zext node:$A), [{ +class unop_oneuse + : PatFrag<(ops node:$A), + (operator node:$A), [{ return N->hasOneUse(); }]>; -def anyext_oneuse : PatFrag<(ops node:$A), (anyext node:$A), [{ - return N->hasOneUse(); -}]>; - -def fpext_oneuse : PatFrag<(ops node:$A), - (any_fpextend node:$A), [{ - return N->hasOneUse(); -}]>; +def sext_oneuse : unop_oneuse; +def zext_oneuse : unop_oneuse; +def anyext_oneuse : unop_oneuse; +def fpext_oneuse : unop_oneuse; /// Simple arithmetic operations @@ -1066,7 +1189,9 @@ def : PatGprUimmLog2XLen; def : PatGprUimmLog2XLen; def : PatGprUimmLog2XLen; -// AND with trailing ones mask exceeding simm12. +// AND with leading/trailing ones mask exceeding simm32/simm12. +def : Pat<(i64 (and GPR:$rs, LeadingOnesMask:$mask)), + (SLLI (SRLI $rs, LeadingOnesMask:$mask), LeadingOnesMask:$mask)>; def : Pat<(XLenVT (and GPR:$rs, TrailingOnesMask:$mask)), (SRLI (SLLI $rs, TrailingOnesMask:$mask), TrailingOnesMask:$mask)>; @@ -1099,10 +1224,32 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd), /// FrameIndex calculations -def : Pat<(add (XLenVT AddrFI:$Rs), simm12:$imm12), - (ADDI (XLenVT AddrFI:$Rs), simm12:$imm12)>; -def : Pat<(IsOrAdd (XLenVT AddrFI:$Rs), simm12:$imm12), - (ADDI (XLenVT AddrFI:$Rs), simm12:$imm12)>; +def : Pat<(FrameAddrRegImm GPR:$rs1, simm12:$imm12), + (ADDI GPR:$rs1, simm12:$imm12)>; + +/// HI and ADD_LO address nodes. + +def : Pat<(riscv_hi tglobaladdr:$in), (LUI tglobaladdr:$in)>; +def : Pat<(riscv_hi tblockaddress:$in), (LUI tblockaddress:$in)>; +def : Pat<(riscv_hi tjumptable:$in), (LUI tjumptable:$in)>; +def : Pat<(riscv_hi tconstpool:$in), (LUI tconstpool:$in)>; + +def : Pat<(riscv_add_lo GPR:$hi, tglobaladdr:$lo), + (ADDI GPR:$hi, tglobaladdr:$lo)>; +def : Pat<(riscv_add_lo GPR:$hi, tblockaddress:$lo), + (ADDI GPR:$hi, tblockaddress:$lo)>; +def : Pat<(riscv_add_lo GPR:$hi, tjumptable:$lo), + (ADDI GPR:$hi, tjumptable:$lo)>; +def : Pat<(riscv_add_lo GPR:$hi, tconstpool:$lo), + (ADDI GPR:$hi, tconstpool:$lo)>; + +/// TLS address nodes. + +def : Pat<(riscv_hi tglobaltlsaddr:$in), (LUI tglobaltlsaddr:$in)>; +def : Pat<(riscv_add_tprel GPR:$rs1, GPR:$rs2, tglobaltlsaddr:$src), + (PseudoAddTPRel GPR:$rs1, GPR:$rs2, tglobaltlsaddr:$src)>; +def : Pat<(riscv_add_lo GPR:$src, tglobaltlsaddr:$lo), + (ADDI GPR:$src, tglobaltlsaddr:$lo)>; /// Setcc @@ -1127,6 +1274,10 @@ def : Pat<(setule GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs2, GPR:$rs1), 1)>; def : Pat<(setgt GPR:$rs1, GPR:$rs2), (SLT GPR:$rs2, GPR:$rs1)>; def : Pat<(setge GPR:$rs1, GPR:$rs2), (XORI (SLT GPR:$rs1, GPR:$rs2), 1)>; def : Pat<(setle GPR:$rs1, GPR:$rs2), (XORI (SLT GPR:$rs2, GPR:$rs1), 1)>; +def : Pat<(setgt GPR:$rs1, simm12_minus1_nonzero:$imm), + (XORI (SLTI GPR:$rs1, (ImmPlus1 simm12_minus1_nonzero:$imm)), 1)>; +def : Pat<(setugt GPR:$rs1, simm12_minus1_nonzero:$imm), + (XORI (SLTIU GPR:$rs1, (ImmPlus1 simm12_minus1_nonzero:$imm)), 1)>; def IntCCtoRISCVCC : SDNodeXForm(N->getOperand(2))->get(); @@ -1185,7 +1336,8 @@ def : Pat<(brind (add GPRJALR:$rs1, simm12:$imm12)), // Define isCodeGenOnly = 0 to support parsing assembly "call" instruction. let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, Size = 8, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in -def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []> { +def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []>, + Sched<[WriteIALU, WriteJalr, ReadJalr]> { let AsmString = "call\t$rd, $func"; } @@ -1196,7 +1348,8 @@ def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []> { // Define AsmString to print "call" when compile with -S flag. // Define isCodeGenOnly = 0 to support parsing assembly "call" instruction. let isCall = 1, Defs = [X1], isCodeGenOnly = 0, Size = 8 in -def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> { +def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []>, + Sched<[WriteIALU, WriteJalr, ReadJalr]> { let AsmString = "call\t$func"; } @@ -1221,7 +1374,8 @@ def PseudoRET : Pseudo<(outs), (ins), [(riscv_ret_flag)]>, // Define AsmString to print "tail" when compile with -S flag. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [X2], Size = 8, isCodeGenOnly = 0 in -def PseudoTAIL : Pseudo<(outs), (ins call_symbol:$dst), []> { +def PseudoTAIL : Pseudo<(outs), (ins call_symbol:$dst), []>, + Sched<[WriteIALU, WriteJalr, ReadJalr]> { let AsmString = "tail\t$dst"; } @@ -1231,13 +1385,14 @@ def PseudoTAILIndirect : Pseudo<(outs), (ins GPRTC:$rs1), PseudoInstExpansion<(JALR X0, GPR:$rs1, 0)>; def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)), - (PseudoTAIL texternalsym:$dst)>; + (PseudoTAIL tglobaladdr:$dst)>; def : Pat<(riscv_tail (iPTR texternalsym:$dst)), (PseudoTAIL texternalsym:$dst)>; let isCall = 0, isBarrier = 1, isBranch = 1, isTerminator = 1, Size = 8, isCodeGenOnly = 0, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in -def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), []> { +def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), []>, + Sched<[WriteIALU, WriteJalr, ReadJalr]> { let AsmString = "jump\t$target, $rd"; } @@ -1246,21 +1401,33 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0, def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "lla", "$dst, $src">; +def : Pat<(riscv_lla tglobaladdr:$in), (PseudoLLA tglobaladdr:$in)>; +def : Pat<(riscv_lla tblockaddress:$in), (PseudoLLA tblockaddress:$in)>; +def : Pat<(riscv_lla tjumptable:$in), (PseudoLLA tjumptable:$in)>; +def : Pat<(riscv_lla tconstpool:$in), (PseudoLLA tconstpool:$in)>; + let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0, isAsmParserOnly = 1 in def PseudoLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la", "$dst, $src">; +def : Pat<(riscv_la tglobaladdr:$in), (PseudoLA tglobaladdr:$in)>; + let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0, isAsmParserOnly = 1 in def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.tls.ie", "$dst, $src">; -let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0, +def : Pat<(riscv_la_tls_ie tglobaltlsaddr:$in), + (PseudoLA_TLS_IE tglobaltlsaddr:$in)>; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0, isAsmParserOnly = 1 in def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.tls.gd", "$dst, $src">; +def : Pat<(riscv_la_tls_gd tglobaltlsaddr:$in), + (PseudoLA_TLS_GD tglobaltlsaddr:$in)>; /// Sign/Zero Extends @@ -1283,11 +1450,8 @@ def PseudoZEXT_W : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "zext.w", "$rd, $rs /// Loads multiclass LdPat { - def : Pat<(vt (LoadOp BaseAddr:$rs1)), (Inst BaseAddr:$rs1, 0)>; - def : Pat<(vt (LoadOp (add BaseAddr:$rs1, simm12:$imm12))), - (Inst BaseAddr:$rs1, simm12:$imm12)>; - def : Pat<(vt (LoadOp (IsOrAdd AddrFI:$rs1, simm12:$imm12))), - (Inst AddrFI:$rs1, simm12:$imm12)>; + def : Pat<(vt (LoadOp (AddrRegImm GPR:$rs1, simm12:$imm12))), + (Inst GPR:$rs1, simm12:$imm12)>; } defm : LdPat; @@ -1302,12 +1466,8 @@ defm : LdPat; multiclass StPat { - def : Pat<(StoreOp (vt StTy:$rs2), BaseAddr:$rs1), - (Inst StTy:$rs2, BaseAddr:$rs1, 0)>; - def : Pat<(StoreOp (vt StTy:$rs2), (add BaseAddr:$rs1, simm12:$imm12)), - (Inst StTy:$rs2, BaseAddr:$rs1, simm12:$imm12)>; - def : Pat<(StoreOp (vt StTy:$rs2), (IsOrAdd AddrFI:$rs1, simm12:$imm12)), - (Inst StTy:$rs2, AddrFI:$rs1, simm12:$imm12)>; + def : Pat<(StoreOp (vt StTy:$rs2), (AddrRegImm GPR:$rs1, simm12:$imm12)), + (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>; } defm : StPat; @@ -1415,7 +1575,7 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)), // if only the lower 32 bits of their result is used. class binop_allwusers : PatFrag<(ops node:$lhs, node:$rhs), - (operator node:$lhs, node:$rhs), [{ + (i64 (operator node:$lhs, node:$rhs)), [{ return hasAllWUsers(Node); }]>; @@ -1496,14 +1656,14 @@ def : Pat<(debugtrap), (EBREAK)>; /// Simple optimization def : Pat<(add GPR:$rs1, (AddiPair:$rs2)), - (ADDI (ADDI GPR:$rs1, (AddiPairImmB AddiPair:$rs2)), - (AddiPairImmA GPR:$rs2))>; + (ADDI (ADDI GPR:$rs1, (AddiPairImmLarge AddiPair:$rs2)), + (AddiPairImmSmall GPR:$rs2))>; let Predicates = [IsRV64] in { // Select W instructions if only the lower 32-bits of the result are used. def : Pat<(binop_allwusers GPR:$rs1, (AddiPair:$rs2)), - (ADDIW (ADDIW GPR:$rs1, (AddiPairImmB AddiPair:$rs2)), - (AddiPairImmA AddiPair:$rs2))>; + (ADDIW (ADDIW GPR:$rs1, (AddiPairImmLarge AddiPair:$rs2)), + (AddiPairImmSmall AddiPair:$rs2))>; } //===----------------------------------------------------------------------===// @@ -1519,3 +1679,4 @@ include "RISCVInstrInfoZb.td" include "RISCVInstrInfoZk.td" include "RISCVInstrInfoV.td" include "RISCVInstrInfoZfh.td" +include "RISCVInstrInfoZicbo.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 7d23dafb0346..dd4b174d7e62 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -11,24 +11,6 @@ // //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// Operand and SDNode transformation definitions. -//===----------------------------------------------------------------------===// - -// A parse method for (${gpr}) or 0(${gpr}), where the 0 is be silently ignored. -// Used for GNU as Compatibility. -def AtomicMemOpOperand : AsmOperandClass { - let Name = "AtomicMemOpOperand"; - let RenderMethod = "addRegOperands"; - let PredicateMethod = "isGPR"; - let ParserMethod = "parseAtomicMemOp"; -} - -def GPRMemAtomic : RegisterOperand { - let ParserMatchClass = AtomicMemOpOperand; - let PrintMethod = "printAtomicMemOp"; -} - //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// @@ -36,7 +18,7 @@ def GPRMemAtomic : RegisterOperand { let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in class LR_r funct3, string opcodestr> : RVInstRAtomic<0b00010, aq, rl, funct3, OPC_AMO, - (outs GPR:$rd), (ins GPRMemAtomic:$rs1), + (outs GPR:$rd), (ins GPRMemZeroOffset:$rs1), opcodestr, "$rd, $rs1"> { let rs2 = 0; } @@ -51,7 +33,7 @@ multiclass LR_r_aq_rl funct3, string opcodestr> { let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in class AMO_rr funct5, bit aq, bit rl, bits<3> funct3, string opcodestr> : RVInstRAtomic; multiclass AMO_rr_aq_rl funct5, bits<3> funct3, string opcodestr> { @@ -63,12 +45,8 @@ multiclass AMO_rr_aq_rl funct5, bits<3> funct3, string opcodestr> { multiclass AtomicStPat { - def : Pat<(StoreOp BaseAddr:$rs1, (vt StTy:$rs2)), - (Inst StTy:$rs2, BaseAddr:$rs1, 0)>; - def : Pat<(StoreOp (add BaseAddr:$rs1, simm12:$imm12), (vt StTy:$rs2)), - (Inst StTy:$rs2, BaseAddr:$rs1, simm12:$imm12)>; - def : Pat<(StoreOp (IsOrAdd AddrFI:$rs1, simm12:$imm12), (vt StTy:$rs2)), - (Inst StTy:$rs2, AddrFI:$rs1, simm12:$imm12)>; + def : Pat<(StoreOp (AddrRegImm GPR:$rs1, simm12:$imm12), (vt StTy:$rs2)), + (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 2837b92da81f..6fb9e36d7666 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -25,6 +25,69 @@ def SDT_RISCVSplitF64 : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, def RISCVBuildPairF64 : SDNode<"RISCVISD::BuildPairF64", SDT_RISCVBuildPairF64>; def RISCVSplitF64 : SDNode<"RISCVISD::SplitF64", SDT_RISCVSplitF64>; +//===----------------------------------------------------------------------===// +// Operand and SDNode transformation definitions. +//===----------------------------------------------------------------------===// + +// Zdinx + +def GPRPF64AsFPR : AsmOperandClass { + let Name = "GPRPF64AsFPR"; + let ParserMethod = "parseGPRAsFPR"; + let RenderMethod = "addRegOperands"; +} + +def GPRF64AsFPR : AsmOperandClass { + let Name = "GPRF64AsFPR"; + let ParserMethod = "parseGPRAsFPR"; + let RenderMethod = "addRegOperands"; +} + +def FPR64INX : RegisterOperand { + let ParserMatchClass = GPRF64AsFPR; + let DecoderMethod = "DecodeGPRRegisterClass"; +} + +def FPR64IN32X : RegisterOperand { + let ParserMatchClass = GPRPF64AsFPR; +} + +def DExt : ExtInfo<0, [HasStdExtD]>; +def D64Ext : ExtInfo<0, [HasStdExtD, IsRV64]>; +def ZdinxExt : ExtInfo<1, [HasStdExtZdinx, IsRV64]>; +def Zdinx32Ext : ExtInfo<2, [HasStdExtZdinx, IsRV32]>; + +def D : ExtInfo_r; +def D_INX : ExtInfo_r; +def D_IN32X : ExtInfo_r; + +def DD : ExtInfo_rr; +def DD_INX : ExtInfo_rr; +def DD_IN32X : ExtInfo_rr; +def DF : ExtInfo_rr; +def DF_INX : ExtInfo_rr; +def DF_IN32X : ExtInfo_rr; +def DX : ExtInfo_rr; +def DX_INX : ExtInfo_rr; +def DX_IN32X : ExtInfo_rr; +def DX_64 : ExtInfo_rr; +def FD : ExtInfo_rr; +def FD_INX : ExtInfo_rr; +def FD_IN32X : ExtInfo_rr; +def XD : ExtInfo_rr; +def XD_INX : ExtInfo_rr; +def XD_IN32X : ExtInfo_rr; +def XD_64 : ExtInfo_rr; + +defvar DINX = [D, D_INX, D_IN32X]; +defvar DDINX = [DD, DD_INX, DD_IN32X]; +defvar DXINX = [DX, DX_INX, DX_IN32X]; +defvar DFINX = [DF, DF_INX, DF_IN32X]; +defvar FDINX = [FD, FD_INX, FD_IN32X]; +defvar XDINX = [XD, XD_INX, XD_IN32X]; +defvar DXIN64X = [DX_64, DX_INX]; +defvar XDIN64X = [XD_64, XD_INX]; + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -36,106 +99,104 @@ def FLD : FPLoad_r<0b011, "fld", FPR64, WriteFLD64>; // reflecting the order these fields are specified in the instruction // encoding. def FSD : FPStore_r<0b011, "fsd", FPR64, WriteFST64>; +} // Predicates = [HasStdExtD] let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in { -def FMADD_D : FPFMA_rrr_frm; -def FMSUB_D : FPFMA_rrr_frm; -def FNMSUB_D : FPFMA_rrr_frm; -def FNMADD_D : FPFMA_rrr_frm; +defm FMADD_D : FPFMA_rrr_frm_m; +defm FMSUB_D : FPFMA_rrr_frm_m; +defm FNMSUB_D : FPFMA_rrr_frm_m; +defm FNMADD_D : FPFMA_rrr_frm_m; } -def : FPFMADynFrmAlias; -def : FPFMADynFrmAlias; -def : FPFMADynFrmAlias; -def : FPFMADynFrmAlias; - -def FADD_D : FPALU_rr_frm<0b0000001, "fadd.d", FPR64>, - Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>; -def FSUB_D : FPALU_rr_frm<0b0000101, "fsub.d", FPR64>, - Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>; -def FMUL_D : FPALU_rr_frm<0b0001001, "fmul.d", FPR64>, - Sched<[WriteFMul64, ReadFMul64, ReadFMul64]>; -def FDIV_D : FPALU_rr_frm<0b0001101, "fdiv.d", FPR64>, - Sched<[WriteFDiv64, ReadFDiv64, ReadFDiv64]>; - -def : FPALUDynFrmAlias; -def : FPALUDynFrmAlias; -def : FPALUDynFrmAlias; -def : FPALUDynFrmAlias; - -def FSQRT_D : FPUnaryOp_r_frm<0b0101101, 0b00000, FPR64, FPR64, "fsqrt.d">, - Sched<[WriteFSqrt64, ReadFSqrt64]>; -def : FPUnaryOpDynFrmAlias; +defm : FPFMADynFrmAlias_m; +defm : FPFMADynFrmAlias_m; +defm : FPFMADynFrmAlias_m; +defm : FPFMADynFrmAlias_m; + +let SchedRW = [WriteFALU64, ReadFALU64, ReadFALU64] in { +defm FADD_D : FPALU_rr_frm_m<0b0000001, "fadd.d", DINX, /*Commutable*/1>; +defm FSUB_D : FPALU_rr_frm_m<0b0000101, "fsub.d", DINX>; +} +let SchedRW = [WriteFMul64, ReadFMul64, ReadFMul64] in +defm FMUL_D : FPALU_rr_frm_m<0b0001001, "fmul.d", DINX, /*Commutable*/1>; + +let SchedRW = [WriteFDiv64, ReadFDiv64, ReadFDiv64] in +defm FDIV_D : FPALU_rr_frm_m<0b0001101, "fdiv.d", DINX>; + +defm : FPALUDynFrmAlias_m; +defm : FPALUDynFrmAlias_m; +defm : FPALUDynFrmAlias_m; +defm : FPALUDynFrmAlias_m; + +defm FSQRT_D : FPUnaryOp_r_frm_m<0b0101101, 0b00000, DDINX, "fsqrt.d">, + Sched<[WriteFSqrt64, ReadFSqrt64]>; +defm : FPUnaryOpDynFrmAlias_m; let SchedRW = [WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64], mayRaiseFPException = 0 in { -def FSGNJ_D : FPALU_rr<0b0010001, 0b000, "fsgnj.d", FPR64>; -def FSGNJN_D : FPALU_rr<0b0010001, 0b001, "fsgnjn.d", FPR64>; -def FSGNJX_D : FPALU_rr<0b0010001, 0b010, "fsgnjx.d", FPR64>; +defm FSGNJ_D : FPALU_rr_m<0b0010001, 0b000, "fsgnj.d", DINX>; +defm FSGNJN_D : FPALU_rr_m<0b0010001, 0b001, "fsgnjn.d", DINX>; +defm FSGNJX_D : FPALU_rr_m<0b0010001, 0b010, "fsgnjx.d", DINX>; } let SchedRW = [WriteFMinMax64, ReadFMinMax64, ReadFMinMax64] in { -def FMIN_D : FPALU_rr<0b0010101, 0b000, "fmin.d", FPR64>; -def FMAX_D : FPALU_rr<0b0010101, 0b001, "fmax.d", FPR64>; +defm FMIN_D : FPALU_rr_m<0b0010101, 0b000, "fmin.d", DINX, /*Commutable*/1>; +defm FMAX_D : FPALU_rr_m<0b0010101, 0b001, "fmax.d", DINX, /*Commutable*/1>; } -def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, 0b00001, FPR32, FPR64, "fcvt.s.d">, - Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]>; -def : FPUnaryOpDynFrmAlias; +defm FCVT_S_D : FPUnaryOp_r_frm_m<0b0100000, 0b00001, FDINX, "fcvt.s.d">, + Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]>; +defm : FPUnaryOpDynFrmAlias_m; -def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b00000, 0b000, FPR64, FPR32, "fcvt.d.s">, - Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]>; +defm FCVT_D_S : FPUnaryOp_r_m<0b0100001, 0b00000, 0b000, DFINX, "fcvt.d.s">, + Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]>; let SchedRW = [WriteFCmp64, ReadFCmp64, ReadFCmp64] in { -def FEQ_D : FPCmp_rr<0b1010001, 0b010, "feq.d", FPR64>; -def FLT_D : FPCmp_rr<0b1010001, 0b001, "flt.d", FPR64>; -def FLE_D : FPCmp_rr<0b1010001, 0b000, "fle.d", FPR64>; +defm FEQ_D : FPCmp_rr_m<0b1010001, 0b010, "feq.d", DINX, /*Commutable*/1>; +defm FLT_D : FPCmp_rr_m<0b1010001, 0b001, "flt.d", DINX>; +defm FLE_D : FPCmp_rr_m<0b1010001, 0b000, "fle.d", DINX>; } -let mayRaiseFPException = 0 in -def FCLASS_D : FPUnaryOp_r<0b1110001, 0b00000, 0b001, GPR, FPR64, "fclass.d">, - Sched<[WriteFClass64, ReadFClass64]>; +defm FCLASS_D : FPUnaryOp_r_m<0b1110001, 0b00000, 0b001, XDINX, "fclass.d">, + Sched<[WriteFClass64, ReadFClass64]>; -def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, 0b00000, GPR, FPR64, "fcvt.w.d">, +defm FCVT_W_D : FPUnaryOp_r_frm_m<0b1100001, 0b00000, XDINX, "fcvt.w.d">, Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>; -def : FPUnaryOpDynFrmAlias; +defm : FPUnaryOpDynFrmAlias_m; -def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, 0b00001, GPR, FPR64, "fcvt.wu.d">, - Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>; -def : FPUnaryOpDynFrmAlias; +defm FCVT_WU_D : FPUnaryOp_r_frm_m<0b1100001, 0b00001, XDINX, "fcvt.wu.d">, + Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>; +defm : FPUnaryOpDynFrmAlias_m; -def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b00000, 0b000, FPR64, GPR, "fcvt.d.w">, - Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>; - -def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b00001, 0b000, FPR64, GPR, "fcvt.d.wu">, +defm FCVT_D_W : FPUnaryOp_r_m<0b1101001, 0b00000, 0b000, DXINX, "fcvt.d.w">, Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>; -} // Predicates = [HasStdExtD] -let Predicates = [HasStdExtD, IsRV64] in { -def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, 0b00010, GPR, FPR64, "fcvt.l.d">, - Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>; -def : FPUnaryOpDynFrmAlias; +defm FCVT_D_WU : FPUnaryOp_r_m<0b1101001, 0b00001, 0b000, DXINX, "fcvt.d.wu">, + Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>; -def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, 0b00011, GPR, FPR64, "fcvt.lu.d">, +defm FCVT_L_D : FPUnaryOp_r_frm_m<0b1100001, 0b00010, XDIN64X, "fcvt.l.d">, Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>; -def : FPUnaryOpDynFrmAlias; +defm : FPUnaryOpDynFrmAlias_m; -let mayRaiseFPException = 0 in +defm FCVT_LU_D : FPUnaryOp_r_frm_m<0b1100001, 0b00011, XDIN64X, "fcvt.lu.d">, + Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>; +defm : FPUnaryOpDynFrmAlias_m; + +let Predicates = [HasStdExtD, IsRV64], mayRaiseFPException = 0 in def FMV_X_D : FPUnaryOp_r<0b1110001, 0b00000, 0b000, GPR, FPR64, "fmv.x.d">, Sched<[WriteFMovF64ToI64, ReadFMovF64ToI64]>; -def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, 0b00010, FPR64, GPR, "fcvt.d.l">, - Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>; -def : FPUnaryOpDynFrmAlias; - -def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, 0b00011, FPR64, GPR, "fcvt.d.lu">, +defm FCVT_D_L : FPUnaryOp_r_frm_m<0b1101001, 0b00010, DXIN64X, "fcvt.d.l">, Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>; -def : FPUnaryOpDynFrmAlias; +defm : FPUnaryOpDynFrmAlias_m; -let mayRaiseFPException = 0 in +defm FCVT_D_LU : FPUnaryOp_r_frm_m<0b1101001, 0b00011, DXIN64X, "fcvt.d.lu">, + Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>; +defm : FPUnaryOpDynFrmAlias_m; + +let Predicates = [HasStdExtD, IsRV64], mayRaiseFPException = 0 in def FMV_D_X : FPUnaryOp_r<0b1111001, 0b00000, 0b000, FPR64, GPR, "fmv.d.x">, Sched<[WriteFMovI64ToF64, ReadFMovI64ToF64]>; -} // Predicates = [HasStdExtD, IsRV64] //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) @@ -164,16 +225,30 @@ def PseudoQuietFLT_D : PseudoQuietFCMP; } } // Predicates = [HasStdExtD] +let Predicates = [HasStdExtZdinx, IsRV64] in { +def : InstAlias<"fabs.d $rd, $rs", (FSGNJX_D_INX FPR64INX:$rd, FPR64INX:$rs, FPR64INX:$rs)>; +def : InstAlias<"fneg.d $rd, $rs", (FSGNJN_D_INX FPR64INX:$rd, FPR64INX:$rs, FPR64INX:$rs)>; + +def : InstAlias<"fgt.d $rd, $rs, $rt", + (FLT_D_INX GPR:$rd, FPR64INX:$rt, FPR64INX:$rs), 0>; +def : InstAlias<"fge.d $rd, $rs, $rt", + (FLE_D_INX GPR:$rd, FPR64INX:$rt, FPR64INX:$rs), 0>; +} // Predicates = [HasStdExtZdinx, IsRV64] + +let Predicates = [HasStdExtZdinx, IsRV32] in { +def : InstAlias<"fabs.d $rd, $rs", (FSGNJX_D_IN32X FPR64IN32X:$rd, FPR64IN32X:$rs, FPR64IN32X:$rs)>; +def : InstAlias<"fneg.d $rd, $rs", (FSGNJN_D_IN32X FPR64IN32X:$rd, FPR64IN32X:$rs, FPR64IN32X:$rs)>; + +def : InstAlias<"fgt.d $rd, $rs, $rt", + (FLT_D_IN32X GPR:$rd, FPR64IN32X:$rt, FPR64IN32X:$rs), 0>; +def : InstAlias<"fge.d $rd, $rs, $rt", + (FLE_D_IN32X GPR:$rd, FPR64IN32X:$rt, FPR64IN32X:$rs), 0>; +} // Predicates = [HasStdExtZdinx, IsRV32] + //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// -class PatFpr64Fpr64 - : Pat<(OpNode FPR64:$rs1, FPR64:$rs2), (Inst $rs1, $rs2)>; - -class PatFpr64Fpr64DynFrm - : Pat<(OpNode FPR64:$rs1, FPR64:$rs2), (Inst $rs1, $rs2, 0b111)>; - let Predicates = [HasStdExtD] in { /// Float conversion operations @@ -187,17 +262,17 @@ def : Pat<(any_fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>; /// Float arithmetic operations -def : PatFpr64Fpr64DynFrm; -def : PatFpr64Fpr64DynFrm; -def : PatFpr64Fpr64DynFrm; -def : PatFpr64Fpr64DynFrm; +def : PatFprFprDynFrm; +def : PatFprFprDynFrm; +def : PatFprFprDynFrm; +def : PatFprFprDynFrm; def : Pat<(any_fsqrt FPR64:$rs1), (FSQRT_D FPR64:$rs1, 0b111)>; def : Pat<(fneg FPR64:$rs1), (FSGNJN_D $rs1, $rs1)>; def : Pat<(fabs FPR64:$rs1), (FSGNJX_D $rs1, $rs1)>; -def : PatFpr64Fpr64; +def : PatFprFpr; def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>; def : Pat<(fcopysign FPR64:$rs1, FPR32:$rs2), (FSGNJ_D $rs1, (FCVT_D_S $rs2))>; def : Pat<(fcopysign FPR32:$rs1, FPR64:$rs2), (FSGNJ_S $rs1, (FCVT_S_D $rs2, @@ -219,11 +294,15 @@ def : Pat<(any_fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3), def : Pat<(any_fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)), (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; +// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA) +def : Pat<(fneg (any_fma_nsz FPR64:$rs1, FPR64:$rs2, FPR64:$rs3)), + (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; + // The ratified 20191213 ISA spec defines fmin and fmax in a way that matches // LLVM's fminnum and fmaxnum. // . -def : PatFpr64Fpr64; -def : PatFpr64Fpr64; +def : PatFprFpr; +def : PatFprFpr; /// Setcc // FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index a8ac06ba8da3..a71d5b4737c3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -53,10 +53,81 @@ def riscv_any_fcvt_wu_rv64 : PatFrags<(ops node:$src, node:$frm), [(riscv_strict_fcvt_wu_rv64 node:$src, node:$frm), (riscv_fcvt_wu_rv64 node:$src, node:$frm)]>; +def any_fma_nsz : PatFrag<(ops node:$rs1, node:$rs2, node:$rs3), + (any_fma node:$rs1, node:$rs2, node:$rs3), [{ + return N->getFlags().hasNoSignedZeros(); +}]>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// +// Zfinx + +def GPRAsFPR : AsmOperandClass { + let Name = "GPRAsFPR"; + let ParserMethod = "parseGPRAsFPR"; + let RenderMethod = "addRegOperands"; +} + +def FPR32INX : RegisterOperand { + let ParserMatchClass = GPRAsFPR; + let DecoderMethod = "DecodeGPRRegisterClass"; +} + +// inx = 0 : f, d, zfh, zfhmin +// = 1 : zfinx, zdinx, zhinx, zhinxmin +// = 2 : zdinx_rv32 +class ExtInfo inx, list pres> { + string Suffix = !cond(!eq(inx, 0): "", + !eq(inx, 1): "_INX", + !eq(inx, 2): "_IN32X"); + list Predicates = pres; + string Space = !cond(!eq(inx, 0): "", + !eq(inx, 1): "RVZfinx", + !eq(inx, 2): "RV32Zdinx"); +} + +class ExtInfo_r { + string Suffix = ext.Suffix; + list Predicates = ext.Predicates; + string Space = ext.Space; + DAGOperand Reg = reg; +} + +class ExtInfo_rr { + string Suffix = ext.Suffix; + list Predicates = ext.Predicates; + string Space = ext.Space; + DAGOperand RdTy = rdty; + DAGOperand Rs1Ty = rs1ty; +} + +def FExt : ExtInfo<0, [HasStdExtF]>; +def F64Ext : ExtInfo<0, [HasStdExtF, IsRV64]>; +def ZfinxExt : ExtInfo<1, [HasStdExtZfinx]>; +def Zfinx64Ext : ExtInfo<1, [HasStdExtZfinx, IsRV64]>; + +def F : ExtInfo_r; +def F_INX : ExtInfo_r; + +def FF : ExtInfo_rr; +def FF_INX : ExtInfo_rr; +def FX : ExtInfo_rr; +def FX_INX : ExtInfo_rr; +def FX_64 : ExtInfo_rr; +def FX_INX_64 : ExtInfo_rr; +def XF : ExtInfo_rr; +def XF_64 : ExtInfo_rr; +def XF_INX : ExtInfo_rr; +def XF_INX_64 : ExtInfo_rr; + +defvar FINX = [F, F_INX]; +defvar FFINX = [FF, FF_INX]; +defvar FXINX = [FX, FX_INX]; +defvar XFINX = [XF, XF_INX]; +defvar XFIN64X = [XF_64, XF_INX_64]; +defvar FXIN64X = [FX_64, FX_INX_64]; + // Floating-point rounding mode def FRMArg : AsmOperandClass { @@ -92,64 +163,131 @@ class FPStore_r funct3, string opcodestr, RegisterClass rty, Sched<[sw, ReadStoreData, ReadFMemBase]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, - UseNamedOperandTable = 1, hasPostISelHook = 1 in + UseNamedOperandTable = 1, hasPostISelHook = 1, isCommutable = 1 in class FPFMA_rrr_frm funct2, string opcodestr, - RegisterClass rty> + DAGOperand rty> : RVInstR4Frm; +multiclass FPFMA_rrr_frm_m funct2, + string opcodestr, list Exts> { + foreach Ext = Exts in + let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in + def Ext.Suffix : FPFMA_rrr_frm; +} + class FPFMADynFrmAlias + DAGOperand rty> : InstAlias; +multiclass FPFMADynFrmAlias_m Exts> { + foreach Ext = Exts in + let Predicates = Ext.Predicates in + def : FPFMADynFrmAlias(Inst#Ext.Suffix), OpcodeStr, + Ext.Reg>; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in class FPALU_rr funct7, bits<3> funct3, string opcodestr, - RegisterClass rty> + DAGOperand rty, bit Commutable> : RVInstR; + (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2"> { + let isCommutable = Commutable; +} +multiclass FPALU_rr_m funct7, bits<3> funct3, string opcodestr, + list Exts, bit Commutable = 0> { + foreach Ext = Exts in + let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in + def Ext.Suffix : FPALU_rr; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, UseNamedOperandTable = 1, hasPostISelHook = 1 in -class FPALU_rr_frm funct7, string opcodestr, RegisterClass rty> +class FPALU_rr_frm funct7, string opcodestr, DAGOperand rty, + bit Commutable> : RVInstRFrm; + "$rd, $rs1, $rs2, $frm"> { + let isCommutable = Commutable; +} +multiclass FPALU_rr_frm_m funct7, string opcodestr, + list Exts, bit Commutable = 0> { + foreach Ext = Exts in + let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in + def Ext.Suffix : FPALU_rr_frm; +} class FPALUDynFrmAlias + DAGOperand rty> : InstAlias; +multiclass FPALUDynFrmAlias_m Exts> { + foreach Ext = Exts in + let Predicates = Ext.Predicates in + def : FPALUDynFrmAlias(Inst#Ext.Suffix), OpcodeStr, + Ext.Reg>; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in class FPUnaryOp_r funct7, bits<5> rs2val, bits<3> funct3, - RegisterClass rdty, RegisterClass rs1ty, string opcodestr> + DAGOperand rdty, DAGOperand rs1ty, string opcodestr> : RVInstR { let rs2 = rs2val; } +multiclass FPUnaryOp_r_m funct7, bits<5> rs2val, bits<3> funct3, + list Exts, string opcodestr> { + foreach Ext = Exts in + let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in + def Ext.Suffix : FPUnaryOp_r; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, UseNamedOperandTable = 1, hasPostISelHook = 1 in -class FPUnaryOp_r_frm funct7, bits<5> rs2val, RegisterClass rdty, - RegisterClass rs1ty, string opcodestr> +class FPUnaryOp_r_frm funct7, bits<5> rs2val, DAGOperand rdty, + DAGOperand rs1ty, string opcodestr> : RVInstRFrm { let rs2 = rs2val; } +multiclass FPUnaryOp_r_frm_m funct7, bits<5> rs2val, + list Exts, string opcodestr> { + foreach Ext = Exts in + let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in + def Ext.Suffix : FPUnaryOp_r_frm; +} class FPUnaryOpDynFrmAlias + DAGOperand rdty, DAGOperand rs1ty> : InstAlias; +multiclass FPUnaryOpDynFrmAlias_m Exts> { + foreach Ext = Exts in + let Predicates = Ext.Predicates in + def : FPUnaryOpDynFrmAlias(Inst#Ext.Suffix), + OpcodeStr, Ext.RdTy, Ext.Rs1Ty>; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in class FPCmp_rr funct7, bits<3> funct3, string opcodestr, - RegisterClass rty> + DAGOperand rty, bit Commutable> : RVInstR; + (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2"> { + let isCommutable = Commutable; +} +multiclass FPCmp_rr_m funct7, bits<3> funct3, string opcodestr, + list Exts, bit Commutable = 0> { + foreach Ext = Exts in + let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in + def Ext.Suffix : FPCmp_rr; +} //===----------------------------------------------------------------------===// // Instructions @@ -162,101 +300,100 @@ def FLW : FPLoad_r<0b010, "flw", FPR32, WriteFLD32>; // reflecting the order these fields are specified in the instruction // encoding. def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>; +} // Predicates = [HasStdExtF] let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in { -def FMADD_S : FPFMA_rrr_frm; -def FMSUB_S : FPFMA_rrr_frm; -def FNMSUB_S : FPFMA_rrr_frm; -def FNMADD_S : FPFMA_rrr_frm; +defm FMADD_S : FPFMA_rrr_frm_m; +defm FMSUB_S : FPFMA_rrr_frm_m; +defm FNMSUB_S : FPFMA_rrr_frm_m; +defm FNMADD_S : FPFMA_rrr_frm_m; +} + +defm : FPFMADynFrmAlias_m; +defm : FPFMADynFrmAlias_m; +defm : FPFMADynFrmAlias_m; +defm : FPFMADynFrmAlias_m; + +let SchedRW = [WriteFALU32, ReadFALU32, ReadFALU32] in { +defm FADD_S : FPALU_rr_frm_m<0b0000000, "fadd.s", FINX, /*Commutable*/1>; +defm FSUB_S : FPALU_rr_frm_m<0b0000100, "fsub.s", FINX>; } +let SchedRW = [WriteFMul32, ReadFMul32, ReadFMul32] in +defm FMUL_S : FPALU_rr_frm_m<0b0001000, "fmul.s", FINX, /*Commutable*/1>; -def : FPFMADynFrmAlias; -def : FPFMADynFrmAlias; -def : FPFMADynFrmAlias; -def : FPFMADynFrmAlias; - -def FADD_S : FPALU_rr_frm<0b0000000, "fadd.s", FPR32>, - Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>; -def FSUB_S : FPALU_rr_frm<0b0000100, "fsub.s", FPR32>, - Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>; -def FMUL_S : FPALU_rr_frm<0b0001000, "fmul.s", FPR32>, - Sched<[WriteFMul32, ReadFMul32, ReadFMul32]>; -def FDIV_S : FPALU_rr_frm<0b0001100, "fdiv.s", FPR32>, - Sched<[WriteFDiv32, ReadFDiv32, ReadFDiv32]>; - -def : FPALUDynFrmAlias; -def : FPALUDynFrmAlias; -def : FPALUDynFrmAlias; -def : FPALUDynFrmAlias; - -def FSQRT_S : FPUnaryOp_r_frm<0b0101100, 0b00000, FPR32, FPR32, "fsqrt.s">, - Sched<[WriteFSqrt32, ReadFSqrt32]>; -def : FPUnaryOpDynFrmAlias; +let SchedRW = [WriteFDiv32, ReadFDiv32, ReadFDiv32] in +defm FDIV_S : FPALU_rr_frm_m<0b0001100, "fdiv.s", FINX>; + +defm : FPALUDynFrmAlias_m; +defm : FPALUDynFrmAlias_m; +defm : FPALUDynFrmAlias_m; +defm : FPALUDynFrmAlias_m; + +defm FSQRT_S : FPUnaryOp_r_frm_m<0b0101100, 0b00000, FFINX, "fsqrt.s">, + Sched<[WriteFSqrt32, ReadFSqrt32]>; +defm : FPUnaryOpDynFrmAlias_m; let SchedRW = [WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32], mayRaiseFPException = 0 in { -def FSGNJ_S : FPALU_rr<0b0010000, 0b000, "fsgnj.s", FPR32>; -def FSGNJN_S : FPALU_rr<0b0010000, 0b001, "fsgnjn.s", FPR32>; -def FSGNJX_S : FPALU_rr<0b0010000, 0b010, "fsgnjx.s", FPR32>; +defm FSGNJ_S : FPALU_rr_m<0b0010000, 0b000, "fsgnj.s", FINX>; +defm FSGNJN_S : FPALU_rr_m<0b0010000, 0b001, "fsgnjn.s", FINX>; +defm FSGNJX_S : FPALU_rr_m<0b0010000, 0b010, "fsgnjx.s", FINX>; } let SchedRW = [WriteFMinMax32, ReadFMinMax32, ReadFMinMax32] in { -def FMIN_S : FPALU_rr<0b0010100, 0b000, "fmin.s", FPR32>; -def FMAX_S : FPALU_rr<0b0010100, 0b001, "fmax.s", FPR32>; +defm FMIN_S : FPALU_rr_m<0b0010100, 0b000, "fmin.s", FINX, /*Commutable*/1>; +defm FMAX_S : FPALU_rr_m<0b0010100, 0b001, "fmax.s", FINX, /*Commutable*/1>; } -def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, 0b00000, GPR, FPR32, "fcvt.w.s">, - Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>; -def : FPUnaryOpDynFrmAlias; - -def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, 0b00001, GPR, FPR32, "fcvt.wu.s">, +defm FCVT_W_S : FPUnaryOp_r_frm_m<0b1100000, 0b00000, XFINX, "fcvt.w.s">, Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>; -def : FPUnaryOpDynFrmAlias; +defm : FPUnaryOpDynFrmAlias_m; + +defm FCVT_WU_S : FPUnaryOp_r_frm_m<0b1100000, 0b00001, XFINX, "fcvt.wu.s">, + Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>; +defm : FPUnaryOpDynFrmAlias_m; let mayRaiseFPException = 0 in def FMV_X_W : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR32, "fmv.x.w">, Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>; let SchedRW = [WriteFCmp32, ReadFCmp32, ReadFCmp32] in { -def FEQ_S : FPCmp_rr<0b1010000, 0b010, "feq.s", FPR32>; -def FLT_S : FPCmp_rr<0b1010000, 0b001, "flt.s", FPR32>; -def FLE_S : FPCmp_rr<0b1010000, 0b000, "fle.s", FPR32>; +defm FEQ_S : FPCmp_rr_m<0b1010000, 0b010, "feq.s", FINX, /*Commutable*/1>; +defm FLT_S : FPCmp_rr_m<0b1010000, 0b001, "flt.s", FINX>; +defm FLE_S : FPCmp_rr_m<0b1010000, 0b000, "fle.s", FINX>; } let mayRaiseFPException = 0 in -def FCLASS_S : FPUnaryOp_r<0b1110000, 0b00000, 0b001, GPR, FPR32, "fclass.s">, - Sched<[WriteFClass32, ReadFClass32]>; - -def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, 0b00000, FPR32, GPR, "fcvt.s.w">, - Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>; -def : FPUnaryOpDynFrmAlias; +defm FCLASS_S : FPUnaryOp_r_m<0b1110000, 0b00000, 0b001, XFINX, "fclass.s">, + Sched<[WriteFClass32, ReadFClass32]>; -def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, 0b00001, FPR32, GPR, "fcvt.s.wu">, +defm FCVT_S_W : FPUnaryOp_r_frm_m<0b1101000, 0b00000, FXINX, "fcvt.s.w">, Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>; -def : FPUnaryOpDynFrmAlias; +defm : FPUnaryOpDynFrmAlias_m; + +defm FCVT_S_WU : FPUnaryOp_r_frm_m<0b1101000, 0b00001, FXINX, "fcvt.s.wu">, + Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>; +defm : FPUnaryOpDynFrmAlias_m; let mayRaiseFPException = 0 in def FMV_W_X : FPUnaryOp_r<0b1111000, 0b00000, 0b000, FPR32, GPR, "fmv.w.x">, Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]>; -} // Predicates = [HasStdExtF] - -let Predicates = [HasStdExtF, IsRV64] in { -def FCVT_L_S : FPUnaryOp_r_frm<0b1100000, 0b00010, GPR, FPR32, "fcvt.l.s">, - Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>; -def : FPUnaryOpDynFrmAlias; -def FCVT_LU_S : FPUnaryOp_r_frm<0b1100000, 0b00011, GPR, FPR32, "fcvt.lu.s">, +defm FCVT_L_S : FPUnaryOp_r_frm_m<0b1100000, 0b00010, XFIN64X, "fcvt.l.s">, Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>; -def : FPUnaryOpDynFrmAlias; +defm : FPUnaryOpDynFrmAlias_m; -def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, 0b00010, FPR32, GPR, "fcvt.s.l">, - Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>; -def : FPUnaryOpDynFrmAlias; +defm FCVT_LU_S : FPUnaryOp_r_frm_m<0b1100000, 0b00011, XFIN64X, "fcvt.lu.s">, + Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>; +defm : FPUnaryOpDynFrmAlias_m; -def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, 0b00011, FPR32, GPR, "fcvt.s.lu">, +defm FCVT_S_L : FPUnaryOp_r_frm_m<0b1101000, 0b00010, FXIN64X, "fcvt.s.l">, Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>; -def : FPUnaryOpDynFrmAlias; -} // Predicates = [HasStdExtF, IsRV64] +defm : FPUnaryOpDynFrmAlias_m; + +defm FCVT_S_LU : FPUnaryOp_r_frm_m<0b1101000, 0b00011, FXIN64X, "fcvt.s.lu">, + Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>; +defm : FPUnaryOpDynFrmAlias_m; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) @@ -315,6 +452,16 @@ def PseudoQuietFLT_S : PseudoQuietFCMP; } } // Predicates = [HasStdExtF] +let Predicates = [HasStdExtZfinx] in { +def : InstAlias<"fabs.s $rd, $rs", (FSGNJX_S_INX FPR32INX:$rd, FPR32INX:$rs, FPR32INX:$rs)>; +def : InstAlias<"fneg.s $rd, $rs", (FSGNJN_S_INX FPR32INX:$rd, FPR32INX:$rs, FPR32INX:$rs)>; + +def : InstAlias<"fgt.s $rd, $rs, $rt", + (FLT_S_INX GPR:$rd, FPR32INX:$rt, FPR32INX:$rs), 0>; +def : InstAlias<"fge.s $rd, $rs, $rt", + (FLE_S_INX GPR:$rd, FPR32INX:$rt, FPR32INX:$rs), 0>; +} // Predicates = [HasStdExtZfinx] + //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// @@ -327,11 +474,13 @@ def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>; class PatSetCC : Pat<(OpNode Ty:$rs1, Ty:$rs2, Cond), (Inst $rs1, $rs2)>; -class PatFpr32Fpr32 - : Pat<(OpNode FPR32:$rs1, FPR32:$rs2), (Inst $rs1, $rs2)>; +class PatFprFpr + : Pat<(OpNode RegTy:$rs1, RegTy:$rs2), (Inst $rs1, $rs2)>; -class PatFpr32Fpr32DynFrm - : Pat<(OpNode FPR32:$rs1, FPR32:$rs2), (Inst $rs1, $rs2, 0b111)>; +class PatFprFprDynFrm + : Pat<(OpNode RegTy:$rs1, RegTy:$rs2), (Inst $rs1, $rs2, 0b111)>; let Predicates = [HasStdExtF] in { @@ -346,17 +495,17 @@ def : Pat<(f32 (fpimmneg0)), (FSGNJN_S (FMV_W_X X0), (FMV_W_X X0))>; /// Float arithmetic operations -def : PatFpr32Fpr32DynFrm; -def : PatFpr32Fpr32DynFrm; -def : PatFpr32Fpr32DynFrm; -def : PatFpr32Fpr32DynFrm; +def : PatFprFprDynFrm; +def : PatFprFprDynFrm; +def : PatFprFprDynFrm; +def : PatFprFprDynFrm; def : Pat<(any_fsqrt FPR32:$rs1), (FSQRT_S FPR32:$rs1, 0b111)>; def : Pat<(fneg FPR32:$rs1), (FSGNJN_S $rs1, $rs1)>; def : Pat<(fabs FPR32:$rs1), (FSGNJX_S $rs1, $rs1)>; -def : PatFpr32Fpr32; +def : PatFprFpr; def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), (FSGNJN_S $rs1, $rs2)>; // fmadd: rs1 * rs2 + rs3 @@ -375,11 +524,15 @@ def : Pat<(any_fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3), def : Pat<(any_fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)), (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; +// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA) +def : Pat<(fneg (any_fma_nsz FPR32:$rs1, FPR32:$rs2, FPR32:$rs3)), + (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; + // The ratified 20191213 ISA spec defines fmin and fmax in a way that matches // LLVM's fminnum and fmaxnum // . -def : PatFpr32Fpr32; -def : PatFpr32Fpr32; +def : PatFprFpr; +def : PatFprFpr; /// Setcc // FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td index b62e23d3b0fa..72ba8460116f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td @@ -25,13 +25,13 @@ def riscv_remuw : SDNode<"RISCVISD::REMUW", SDT_RISCVIntBinOpW>; //===----------------------------------------------------------------------===// let Predicates = [HasStdExtM] in { -def MUL : ALU_rr<0b0000001, 0b000, "mul">, +def MUL : ALU_rr<0b0000001, 0b000, "mul", /*Commutable*/1>, Sched<[WriteIMul, ReadIMul, ReadIMul]>; -def MULH : ALU_rr<0b0000001, 0b001, "mulh">, +def MULH : ALU_rr<0b0000001, 0b001, "mulh", /*Commutable*/1>, Sched<[WriteIMul, ReadIMul, ReadIMul]>; def MULHSU : ALU_rr<0b0000001, 0b010, "mulhsu">, Sched<[WriteIMul, ReadIMul, ReadIMul]>; -def MULHU : ALU_rr<0b0000001, 0b011, "mulhu">, +def MULHU : ALU_rr<0b0000001, 0b011, "mulhu", /*Commutable*/1>, Sched<[WriteIMul, ReadIMul, ReadIMul]>; def DIV : ALU_rr<0b0000001, 0b100, "div">, Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; @@ -44,7 +44,7 @@ def REMU : ALU_rr<0b0000001, 0b111, "remu">, } // Predicates = [HasStdExtM] let Predicates = [HasStdExtM, IsRV64] in { -def MULW : ALUW_rr<0b0000001, 0b000, "mulw">, +def MULW : ALUW_rr<0b0000001, 0b000, "mulw", /*Commutable*/1>, Sched<[WriteIMul32, ReadIMul32, ReadIMul32]>; def DIVW : ALUW_rr<0b0000001, 0b100, "divw">, Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 306024a3e4fd..f8bc241039f8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -7,9 +7,7 @@ //===----------------------------------------------------------------------===// /// /// This file describes the RISC-V instructions from the standard 'V' Vector -/// extension, version 0.10. -/// This version is still experimental as the 'V' extension hasn't been -/// ratified yet. +/// extension, version 1.0. /// //===----------------------------------------------------------------------===// @@ -895,6 +893,7 @@ defm VSUB_V : VALU_IV_V_X<"vsub", 0b000010>; defm VRSUB_V : VALU_IV_X_I<"vrsub", 0b000011>; def : InstAlias<"vneg.v $vd, $vs$vm", (VRSUB_VX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>; +def : InstAlias<"vneg.v $vd, $vs", (VRSUB_VX VR:$vd, VR:$vs, X0, zero_reg)>; // Vector Widening Integer Add/Subtract // Refer to 11.2 Widening Vector Arithmetic Instructions @@ -922,8 +921,12 @@ defm VWSUB_W : VALU_MV_V_X<"vwsub", 0b110111, "w">; def : InstAlias<"vwcvt.x.x.v $vd, $vs$vm", (VWADD_VX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>; +def : InstAlias<"vwcvt.x.x.v $vd, $vs", + (VWADD_VX VR:$vd, VR:$vs, X0, zero_reg)>; def : InstAlias<"vwcvtu.x.x.v $vd, $vs$vm", (VWADDU_VX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>; +def : InstAlias<"vwcvtu.x.x.v $vd, $vs", + (VWADDU_VX VR:$vd, VR:$vs, X0, zero_reg)>; // Vector Integer Extension defm VZEXT_VF8 : VALU_MV_VS2<"vzext.vf8", 0b010010, 0b00010>; @@ -952,6 +955,8 @@ defm VXOR_V : VALU_IV_V_X_I<"vxor", 0b001011>; def : InstAlias<"vnot.v $vd, $vs$vm", (VXOR_VI VR:$vd, VR:$vs, -1, VMaskOp:$vm)>; +def : InstAlias<"vnot.v $vd, $vs", + (VXOR_VI VR:$vd, VR:$vs, -1, zero_reg)>; // Vector Single-Width Bit Shift Instructions defm VSLL_V : VSHT_IV_V_X_I<"vsll", 0b100101, uimm5>; @@ -970,6 +975,8 @@ defm VNSRA_W : VNSHT_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">; def : InstAlias<"vncvt.x.x.w $vd, $vs$vm", (VNSRL_WX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>; +def : InstAlias<"vncvt.x.x.w $vd, $vs", + (VNSRL_WX VR:$vd, VR:$vs, X0, zero_reg)>; // Vector Integer Comparison Instructions let RVVConstraint = NoConstraint in { @@ -1124,12 +1131,16 @@ defm VNCLIP_W : VNCLP_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">; let Predicates = [HasVInstructionsAnyF] in { // Vector Single-Width Floating-Point Add/Subtract Instructions +let Uses = [FRM], mayRaiseFPException = true in { defm VFADD_V : VALU_FV_V_F<"vfadd", 0b000000>; defm VFSUB_V : VALU_FV_V_F<"vfsub", 0b000010>; defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>; +} // Vector Widening Floating-Point Add/Subtract Instructions -let Constraints = "@earlyclobber $vd" in { +let Constraints = "@earlyclobber $vd", + Uses = [FRM], + mayRaiseFPException = true in { let RVVConstraint = WidenV in { defm VFWADD_V : VWALU_FV_V_F<"vfwadd", 0b110000>; defm VFWSUB_V : VWALU_FV_V_F<"vfwsub", 0b110010>; @@ -1142,19 +1153,23 @@ let RVVConstraint = WidenW in { defm VFWADD_W : VWALU_FV_V_F<"vfwadd", 0b110100, "w">; defm VFWSUB_W : VWALU_FV_V_F<"vfwsub", 0b110110, "w">; } // RVVConstraint = WidenW -} // Constraints = "@earlyclobber $vd" +} // Constraints = "@earlyclobber $vd", Uses = [FRM], mayRaiseFPException = true // Vector Single-Width Floating-Point Multiply/Divide Instructions +let Uses = [FRM], mayRaiseFPException = true in { defm VFMUL_V : VMUL_FV_V_F<"vfmul", 0b100100>; defm VFDIV_V : VDIV_FV_V_F<"vfdiv", 0b100000>; defm VFRDIV_V : VRDIV_FV_F<"vfrdiv", 0b100001>; +} // Vector Widening Floating-Point Multiply -let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { +let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, + Uses = [FRM], mayRaiseFPException = true in { defm VFWMUL_V : VWMUL_FV_V_F<"vfwmul", 0b111000>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV +} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true // Vector Single-Width Floating-Point Fused Multiply-Add Instructions +let Uses = [FRM], mayRaiseFPException = true in { defm VFMACC_V : VMAC_FV_V_F<"vfmacc", 0b101100>; defm VFNMACC_V : VMAC_FV_V_F<"vfnmacc", 0b101101>; defm VFMSAC_V : VMAC_FV_V_F<"vfmsac", 0b101110>; @@ -1163,23 +1178,31 @@ defm VFMADD_V : VMAC_FV_V_F<"vfmadd", 0b101000>; defm VFNMADD_V : VMAC_FV_V_F<"vfnmadd", 0b101001>; defm VFMSUB_V : VMAC_FV_V_F<"vfmsub", 0b101010>; defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>; +} // Vector Widening Floating-Point Fused Multiply-Add Instructions -let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { +let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, + Uses = [FRM], mayRaiseFPException = true in { defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>; defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>; defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>; defm VFWNMSAC_V : VWMAC_FV_V_F<"vfwnmsac", 0b111111>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV +} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true // Vector Floating-Point Square-Root Instruction +let Uses = [FRM], mayRaiseFPException = true in { defm VFSQRT_V : VSQR_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>; -defm VFRSQRT7_V : VRCP_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>; defm VFREC7_V : VRCP_FV_VS2<"vfrec7.v", 0b010011, 0b00101>; +} + +let mayRaiseFPException = true in +defm VFRSQRT7_V : VRCP_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>; // Vector Floating-Point MIN/MAX Instructions +let mayRaiseFPException = true in { defm VFMIN_V : VCMP_FV_V_F<"vfmin", 0b000100>; defm VFMAX_V : VCMP_FV_V_F<"vfmax", 0b000110>; +} // Vector Floating-Point Sign-Injection Instructions defm VFSGNJ_V : VSGNJ_FV_V_F<"vfsgnj", 0b001000>; @@ -1188,18 +1211,22 @@ defm VFSGNJX_V : VSGNJ_FV_V_F<"vfsgnjx", 0b001010>; def : InstAlias<"vfneg.v $vd, $vs$vm", (VFSGNJN_VV VR:$vd, VR:$vs, VR:$vs, VMaskOp:$vm)>; +def : InstAlias<"vfneg.v $vd, $vs", + (VFSGNJN_VV VR:$vd, VR:$vs, VR:$vs, zero_reg)>; def : InstAlias<"vfabs.v $vd, $vs$vm", (VFSGNJX_VV VR:$vd, VR:$vs, VR:$vs, VMaskOp:$vm)>; +def : InstAlias<"vfabs.v $vd, $vs", + (VFSGNJX_VV VR:$vd, VR:$vs, VR:$vs, zero_reg)>; // Vector Floating-Point Compare Instructions -let RVVConstraint = NoConstraint in { +let RVVConstraint = NoConstraint, mayRaiseFPException = true in { defm VMFEQ_V : VCMP_FV_V_F<"vmfeq", 0b011000>; defm VMFNE_V : VCMP_FV_V_F<"vmfne", 0b011100>; defm VMFLT_V : VCMP_FV_V_F<"vmflt", 0b011011>; defm VMFLE_V : VCMP_FV_V_F<"vmfle", 0b011001>; defm VMFGT_V : VCMP_FV_F<"vmfgt", 0b011101>; defm VMFGE_V : VCMP_FV_F<"vmfge", 0b011111>; -} // RVVConstraint = NoConstraint +} // RVVConstraint = NoConstraint, mayRaiseFPException = true def : InstAlias<"vmfgt.vv $vd, $va, $vb$vm", (VMFLT_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>; @@ -1288,10 +1315,14 @@ defm VWREDSUM : VWRED_IV_V<"vwredsum", 0b110001>; let Predicates = [HasVInstructionsAnyF] in { // Vector Single-Width Floating-Point Reduction Instructions let RVVConstraint = NoConstraint in { +let Uses = [FRM], mayRaiseFPException = true in { defm VFREDOSUM : VREDO_FV_V<"vfredosum", 0b000011>; defm VFREDUSUM : VRED_FV_V<"vfredusum", 0b000001>; +} +let mayRaiseFPException = true in { defm VFREDMAX : VRED_FV_V<"vfredmax", 0b000111>; defm VFREDMIN : VRED_FV_V<"vfredmin", 0b000101>; +} } // RVVConstraint = NoConstraint def : InstAlias<"vfredsum.vs $vd, $vs2, $vs1$vm", @@ -1303,8 +1334,10 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. +let Uses = [FRM], mayRaiseFPException = true in { defm VFWREDOSUM : VWREDO_FV_V<"vfwredosum", 0b110011>; defm VFWREDUSUM : VWRED_FV_V<"vfwredusum", 0b110001>; +} } // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint def : InstAlias<"vfwredsum.vs $vd, $vs2, $vs1$vm", diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 9087ed50f9fc..fbe396d278b4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -7,8 +7,7 @@ //===----------------------------------------------------------------------===// /// /// This file contains the required infrastructure to support code generation -/// for the standard 'V' (Vector) extension, version 0.10. This version is still -/// experimental as the 'V' extension hasn't been ratified yet. +/// for the standard 'V' (Vector) extension, version 1.0. /// /// This file is included from RISCVInstrInfoV.td /// @@ -40,13 +39,37 @@ def DecImm : SDNodeXFormgetValueType(0)); }]>; -defvar TAIL_UNDISTURBED = 0; +defvar TAIL_UNDISTURBED_MASK_UNDISTURBED = 0; defvar TAIL_AGNOSTIC = 1; //===----------------------------------------------------------------------===// // Utilities. //===----------------------------------------------------------------------===// +class PseudoToVInst { + string VInst = !subst("_M8", "", + !subst("_M4", "", + !subst("_M2", "", + !subst("_M1", "", + !subst("_MF2", "", + !subst("_MF4", "", + !subst("_MF8", "", + !subst("_B1", "", + !subst("_B2", "", + !subst("_B4", "", + !subst("_B8", "", + !subst("_B16", "", + !subst("_B32", "", + !subst("_B64", "", + !subst("_MASK", "", + !subst("_TIED", "", + !subst("_TU", "", + !subst("F16", "F", + !subst("F32", "F", + !subst("F64", "F", + !subst("Pseudo", "", PseudoInst))))))))))))))))))))); +} + // This class describes information associated to the LMUL. class LMULInfo { @@ -403,7 +426,7 @@ class CONST8b val> { def InvalidIndex : CONST8b<0x80>; class RISCVVPseudo { Pseudo Pseudo = !cast(NAME); // Used as a key. - Instruction BaseInstr; + Instruction BaseInstr = !cast(PseudoToVInst.VInst); } // The actual table. @@ -419,11 +442,26 @@ def RISCVVPseudosTable : GenericTable { def RISCVVIntrinsicsTable : GenericTable { let FilterClass = "RISCVVIntrinsic"; let CppTypeName = "RISCVVIntrinsicInfo"; - let Fields = ["IntrinsicID", "SplatOperand", "VLOperand"]; + let Fields = ["IntrinsicID", "ScalarOperand", "VLOperand"]; let PrimaryKey = ["IntrinsicID"]; let PrimaryKeyName = "getRISCVVIntrinsicInfo"; } +class RISCVMaskedPseudo MaskIdx, bit HasTU = true> { + Pseudo MaskedPseudo = !cast(NAME); + Pseudo UnmaskedPseudo = !cast(!subst("_MASK", "", NAME)); + Pseudo UnmaskedTUPseudo = !if(HasTU, !cast(!subst("_MASK", "", NAME # "_TU")), MaskedPseudo); + bits<4> MaskOpIdx = MaskIdx; +} + +def RISCVMaskedPseudosTable : GenericTable { + let FilterClass = "RISCVMaskedPseudo"; + let CppTypeName = "RISCVMaskedPseudoInfo"; + let Fields = ["MaskedPseudo", "UnmaskedPseudo", "UnmaskedTUPseudo", "MaskOpIdx"]; + let PrimaryKey = ["MaskedPseudo"]; + let PrimaryKeyName = "getMaskedPseudoInfo"; +} + class RISCVVLE S, bits<3> L> { bits<1> Masked = M; bits<1> IsTU = TU; @@ -489,9 +527,10 @@ def RISCVVSXTable : RISCVVLX_VSXTable { let PrimaryKeyName = "getVSXPseudo"; } -class RISCVVLSEG N, bit M, bit Str, bit F, bits<3> S, bits<3> L> { +class RISCVVLSEG N, bit M, bit TU, bit Str, bit F, bits<3> S, bits<3> L> { bits<4> NF = N; bits<1> Masked = M; + bits<1> IsTU = TU; bits<1> Strided = Str; bits<1> FF = F; bits<3> Log2SEW = S; @@ -502,14 +541,15 @@ class RISCVVLSEG N, bit M, bit Str, bit F, bits<3> S, bits<3> L> { def RISCVVLSEGTable : GenericTable { let FilterClass = "RISCVVLSEG"; let CppTypeName = "VLSEGPseudo"; - let Fields = ["NF", "Masked", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"]; - let PrimaryKey = ["NF", "Masked", "Strided", "FF", "Log2SEW", "LMUL"]; + let Fields = ["NF", "Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"]; + let PrimaryKey = ["NF", "Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL"]; let PrimaryKeyName = "getVLSEGPseudo"; } -class RISCVVLXSEG N, bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> { +class RISCVVLXSEG N, bit M, bit TU, bit O, bits<3> S, bits<3> L, bits<3> IL> { bits<4> NF = N; bits<1> Masked = M; + bits<1> IsTU = TU; bits<1> Ordered = O; bits<3> Log2SEW = S; bits<3> LMUL = L; @@ -520,8 +560,8 @@ class RISCVVLXSEG N, bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> { def RISCVVLXSEGTable : GenericTable { let FilterClass = "RISCVVLXSEG"; let CppTypeName = "VLXSEGPseudo"; - let Fields = ["NF", "Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"]; - let PrimaryKey = ["NF", "Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"]; + let Fields = ["NF", "Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"]; + let PrimaryKey = ["NF", "Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"]; let PrimaryKeyName = "getVLXSEGPseudo"; } @@ -564,30 +604,6 @@ def RISCVVSXSEGTable : GenericTable { // Helpers to define the different pseudo instructions. //===----------------------------------------------------------------------===// -class PseudoToVInst { - string VInst = !subst("_M8", "", - !subst("_M4", "", - !subst("_M2", "", - !subst("_M1", "", - !subst("_MF2", "", - !subst("_MF4", "", - !subst("_MF8", "", - !subst("_B1", "", - !subst("_B2", "", - !subst("_B4", "", - !subst("_B8", "", - !subst("_B16", "", - !subst("_B32", "", - !subst("_B64", "", - !subst("_MASK", "", - !subst("_TIED", "", - !subst("_TU", "", - !subst("F16", "F", - !subst("F32", "F", - !subst("F64", "F", - !subst("Pseudo", "", PseudoInst))))))))))))))))))))); -} - // The destination vector register group for a masked vector instruction cannot // overlap the source mask register (v0), unless the destination vector register // is being written with a mask value (e.g., comparisons) or the scalar result @@ -627,25 +643,24 @@ class VPseudo : let VLMul = m.value; } -class VPseudoUSLoadNoMask : +class VPseudoUSLoadNoMask : Pseudo<(outs RetClass:$rd), (ins GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, - RISCVVLE.val, VLMul> { + RISCVVLE.val, VLMul> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; - let HasDummyMask = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); + let HasDummyMask = DummyMask; } -class VPseudoUSLoadNoMaskTU : +class VPseudoUSLoadNoMaskTU : Pseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, - RISCVVLE.val, VLMul> { + RISCVVLE.val, VLMul> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -654,16 +669,15 @@ class VPseudoUSLoadNoMaskTU : let HasDummyMask = 1; let HasMergeOp = 1; let Constraints = "$rd = $dest"; - let BaseInstr = !cast(PseudoToVInst.VInst); } -class VPseudoUSLoadMask : +class VPseudoUSLoadMask : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, GPR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLE.val, VLMul> { + RISCVVLE.val, VLMul> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -672,7 +686,53 @@ class VPseudoUSLoadMask : let HasSEWOp = 1; let HasMergeOp = 1; let HasVecPolicyOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); + let UsesMaskPolicy = 1; +} + +class VPseudoUSLoadFFNoMask : + Pseudo<(outs RetClass:$rd, GPR:$vl), + (ins GPR:$rs1, AVL:$avl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVVLE.val, VLMul> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = DummyMask; +} + +class VPseudoUSLoadFFNoMaskTU : + Pseudo<(outs RetClass:$rd, GPR:$vl), + (ins RetClass:$dest, GPR:$rs1, AVL:$avl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVVLE.val, VLMul> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let HasMergeOp = 1; + let Constraints = "$rd = $dest"; +} + +class VPseudoUSLoadFFMask : + Pseudo<(outs GetVRegNoV0.R:$rd, GPR:$vl), + (ins GetVRegNoV0.R:$merge, + GPR:$rs1, + VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy),[]>, + RISCVVPseudo, + RISCVVLE.val, VLMul> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = "$rd = $merge"; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let HasVecPolicyOp = 1; + let UsesMaskPolicy = 1; } class VPseudoSLoadNoMask: @@ -686,7 +746,6 @@ class VPseudoSLoadNoMask: let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); } class VPseudoSLoadNoMaskTU: @@ -702,7 +761,6 @@ class VPseudoSLoadNoMaskTU: let HasDummyMask = 1; let HasMergeOp = 1; let Constraints = "$rd = $dest"; - let BaseInstr = !cast(PseudoToVInst.VInst); } class VPseudoSLoadMask: @@ -720,7 +778,7 @@ class VPseudoSLoadMask: let HasSEWOp = 1; let HasMergeOp = 1; let HasVecPolicyOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); + let UsesMaskPolicy = 1; } class VPseudoILoadNoMask LMUL, @@ -737,7 +795,6 @@ class VPseudoILoadNoMask LMUL, let HasSEWOp = 1; let HasDummyMask = 1; let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd", ""); - let BaseInstr = !cast(PseudoToVInst.VInst); } class VPseudoILoadNoMaskTU LMUL, @@ -755,7 +812,6 @@ class VPseudoILoadNoMaskTU LMUL, let HasDummyMask = 1; let HasMergeOp = 1; let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $dest", "$rd = $dest"); - let BaseInstr = !cast(PseudoToVInst.VInst); } class VPseudoILoadMask LMUL, @@ -774,10 +830,10 @@ class VPseudoILoadMask LMUL, let HasSEWOp = 1; let HasMergeOp = 1; let HasVecPolicyOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); + let UsesMaskPolicy = 1; } -class VPseudoUSStoreNoMask: +class VPseudoUSStoreNoMask: Pseudo<(outs), (ins StClass:$rd, GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, @@ -787,8 +843,7 @@ class VPseudoUSStoreNoMask: let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; - let HasDummyMask = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); + let HasDummyMask = DummyMask; } class VPseudoUSStoreMask: @@ -801,7 +856,6 @@ class VPseudoUSStoreMask: let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); } class VPseudoSStoreNoMask: @@ -815,7 +869,6 @@ class VPseudoSStoreNoMask: let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); } class VPseudoSStoreMask: @@ -828,7 +881,6 @@ class VPseudoSStoreMask: let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); } // Unary instruction that is never masked so HasDummyMask=0. @@ -842,7 +894,20 @@ class VPseudoUnaryNoDummyMask(PseudoToVInst.VInst); +} + +class VPseudoUnaryNoDummyMaskTU : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$dest, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let Constraints = "$rd = $dest"; } class VPseudoNullaryNoMask: @@ -855,13 +920,26 @@ class VPseudoNullaryNoMask: let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); +} + +class VPseudoNullaryNoMaskTU: + Pseudo<(outs RegClass:$rd), + (ins RegClass:$merge, AVL:$vl, ixlenimm:$sew), + []>, RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = "$rd = $merge"; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let HasMergeOp = 1; } class VPseudoNullaryMask: Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, VMaskOp:$vm, AVL:$vl, - ixlenimm:$sew), []>, RISCVVPseudo { + ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -869,7 +947,8 @@ class VPseudoNullaryMask: let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); + let UsesMaskPolicy = 1; + let HasVecPolicyOp = 1; } // Nullary for pseudo instructions. They are expanded in @@ -899,7 +978,21 @@ class VPseudoUnaryNoMask(PseudoToVInst.VInst); +} + +// RetClass could be GPR or VReg. +class VPseudoUnaryNoMaskTU : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, OpClass:$rs2, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let HasMergeOp = 1; } class VPseudoUnaryMask : @@ -914,7 +1007,7 @@ class VPseudoUnaryMask : let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); + let UsesMaskPolicy = 1; } class VPseudoUnaryMaskTA : @@ -930,7 +1023,7 @@ class VPseudoUnaryMaskTA : let HasSEWOp = 1; let HasMergeOp = 1; let HasVecPolicyOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); + let UsesMaskPolicy = 1; } // mask unary operation without maskedoff @@ -943,7 +1036,6 @@ class VPseudoMaskUnarySOutMask: let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); } // Mask can be V0~V31 @@ -962,13 +1054,13 @@ class VPseudoUnaryAnyMask(PseudoToVInst.VInst); } class VPseudoBinaryNoMask : + string Constraint, + int DummyMask = 1> : Pseudo<(outs RetClass:$rd), (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { @@ -978,8 +1070,24 @@ class VPseudoBinaryNoMask : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; let HasDummyMask = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); + let HasMergeOp = 1; } // Special version of VPseudoBinaryNoMask where we pretend the first source is @@ -989,7 +1097,8 @@ class VPseudoTiedBinaryNoMask : Pseudo<(outs RetClass:$rd), - (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, + (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew, + ixlenimm:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -998,9 +1107,8 @@ class VPseudoTiedBinaryNoMask(PseudoToVInst.VInst); } class VPseudoIStoreNoMask LMUL, @@ -1015,7 +1123,6 @@ class VPseudoIStoreNoMask LMUL, let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); } class VPseudoIStoreMask LMUL, @@ -1029,7 +1136,6 @@ class VPseudoIStoreMask LMUL, let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); } class VPseudoBinaryMask(PseudoToVInst.VInst); } -class VPseudoBinaryMaskTA : +class VPseudoBinaryMaskPolicy : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, Op1Class:$rs2, Op2Class:$rs1, @@ -1068,7 +1173,7 @@ class VPseudoBinaryMaskTA(PseudoToVInst.VInst); + let UsesMaskPolicy = 1; } // Like VPseudoBinaryMask, but output can be V0. @@ -1088,7 +1193,7 @@ class VPseudoBinaryMOutMask(PseudoToVInst.VInst); + let UsesMaskPolicy = 1; } // Special version of VPseudoBinaryMask where we pretend the first source is @@ -1110,7 +1215,7 @@ class VPseudoTiedBinaryMask(PseudoToVInst.VInst); + let UsesMaskPolicy = 1; } class VPseudoBinaryCarryIn(PseudoToVInst.VInst); let VLMul = MInfo.value; } @@ -1156,7 +1260,6 @@ class VPseudoTiedBinaryCarryIn(PseudoToVInst.VInst); let VLMul = MInfo.value; } @@ -1177,7 +1280,6 @@ class VPseudoTernaryNoMask(PseudoToVInst.VInst); } class VPseudoTernaryNoMaskWithPolicy(PseudoToVInst.VInst); } -class VPseudoUSSegLoadNoMask NF, bit isFF>: +class VPseudoUSSegLoadNoMask NF>: Pseudo<(outs RetClass:$rd), (ins GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, - RISCVVLSEG.val, VLMul> { + RISCVVLSEG.val, VLMul> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); } -class VPseudoUSSegLoadMask NF, bit isFF>: +class VPseudoUSSegLoadNoMaskTU NF>: + Pseudo<(outs RetClass:$rd), + (ins RetClass:$dest, GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVVLSEG.val, VLMul> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let HasMergeOp = 1; + let Constraints = "$rd = $dest"; +} + +class VPseudoUSSegLoadMask NF>: Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, GPR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLSEG.val, VLMul> { + RISCVVLSEG.val, VLMul> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = "$rd = $merge"; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let HasVecPolicyOp = 1; + let UsesMaskPolicy = 1; +} + +class VPseudoUSSegLoadFFNoMask NF>: + Pseudo<(outs RetClass:$rd, GPR:$vl), + (ins GPR:$rs1, AVL:$avl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVVLSEG.val, VLMul> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; +} + +class VPseudoUSSegLoadFFNoMaskTU NF>: + Pseudo<(outs RetClass:$rd, GPR:$vl), + (ins RetClass:$dest, GPR:$rs1, AVL:$avl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVVLSEG.val, VLMul> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let HasMergeOp = 1; + let Constraints = "$rd = $dest"; +} + +class VPseudoUSSegLoadFFMask NF>: + Pseudo<(outs GetVRegNoV0.R:$rd, GPR:$vl), + (ins GetVRegNoV0.R:$merge, GPR:$rs1, + VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy),[]>, + RISCVVPseudo, + RISCVVLSEG.val, VLMul> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -1229,14 +1389,14 @@ class VPseudoUSSegLoadMask NF, bit isFF>: let HasSEWOp = 1; let HasMergeOp = 1; let HasVecPolicyOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); + let UsesMaskPolicy = 1; } class VPseudoSSegLoadNoMask NF>: Pseudo<(outs RetClass:$rd), (ins GPR:$rs1, GPR:$offset, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, - RISCVVLSEG.val, VLMul> { + RISCVVLSEG.val, VLMul> { let mayLoad = 1; let mayLoad = 1; let mayStore = 0; @@ -1244,7 +1404,22 @@ class VPseudoSSegLoadNoMask NF>: let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); +} + +class VPseudoSSegLoadNoMaskTU NF>: + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, GPR:$rs1, GPR:$offset, AVL:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVVLSEG.val, VLMul> { + let mayLoad = 1; + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let HasMergeOp = 1; + let Constraints = "$rd = $merge"; } class VPseudoSSegLoadMask NF>: @@ -1253,7 +1428,7 @@ class VPseudoSSegLoadMask NF>: GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLSEG.val, VLMul> { + RISCVVLSEG.val, VLMul> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -1262,7 +1437,7 @@ class VPseudoSSegLoadMask NF>: let HasSEWOp = 1; let HasMergeOp = 1; let HasVecPolicyOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); + let UsesMaskPolicy = 1; } class VPseudoISegLoadNoMask LMUL, @@ -1270,7 +1445,7 @@ class VPseudoISegLoadNoMask LMUL, Pseudo<(outs RetClass:$rd), (ins GPR:$rs1, IdxClass:$offset, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, - RISCVVLXSEG.val, VLMul, LMUL> { + RISCVVLXSEG.val, VLMul, LMUL> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -1280,7 +1455,24 @@ class VPseudoISegLoadNoMask LMUL, let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); +} + +class VPseudoISegLoadNoMaskTU LMUL, + bits<4> NF, bit Ordered>: + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, GPR:$rs1, IdxClass:$offset, AVL:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVVLXSEG.val, VLMul, LMUL> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + // For vector indexed segment loads, the destination vector register groups + // cannot overlap the source vector register group + let Constraints = "@earlyclobber $rd, $rd = $merge"; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let HasMergeOp = 1; } class VPseudoISegLoadMask LMUL, @@ -1290,7 +1482,7 @@ class VPseudoISegLoadMask LMUL, IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLXSEG.val, VLMul, LMUL> { + RISCVVLXSEG.val, VLMul, LMUL> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -1301,7 +1493,7 @@ class VPseudoISegLoadMask LMUL, let HasSEWOp = 1; let HasMergeOp = 1; let HasVecPolicyOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); + let UsesMaskPolicy = 1; } class VPseudoUSSegStoreNoMask NF>: @@ -1315,7 +1507,6 @@ class VPseudoUSSegStoreNoMask NF>: let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); } class VPseudoUSSegStoreMask NF>: @@ -1329,7 +1520,6 @@ class VPseudoUSSegStoreMask NF>: let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); } class VPseudoSSegStoreNoMask NF>: @@ -1343,7 +1533,6 @@ class VPseudoSSegStoreNoMask NF>: let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); } class VPseudoSSegStoreMask NF>: @@ -1357,7 +1546,6 @@ class VPseudoSSegStoreMask NF>: let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); } class VPseudoISegStoreNoMask LMUL, @@ -1373,7 +1561,6 @@ class VPseudoISegStoreNoMask LMUL let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); } class VPseudoISegStoreMask LMUL, @@ -1388,7 +1575,6 @@ class VPseudoISegStoreMask LMUL, let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; - let BaseInstr = !cast(PseudoToVInst.VInst); } multiclass VPseudoUSLoad { @@ -1398,13 +1584,13 @@ multiclass VPseudoUSLoad { defvar vreg = lmul.vrclass; let VLMul = lmul.value in { def "E" # eew # "_V_" # LInfo : - VPseudoUSLoadNoMask, + VPseudoUSLoadNoMask, VLESched; def "E" # eew # "_V_" # LInfo # "_TU": - VPseudoUSLoadNoMaskTU, + VPseudoUSLoadNoMaskTU, VLESched; def "E" # eew # "_V_" # LInfo # "_MASK" : - VPseudoUSLoadMask, + VPseudoUSLoadMask, VLESched; } } @@ -1417,14 +1603,14 @@ multiclass VPseudoFFLoad { defvar LInfo = lmul.MX; defvar vreg = lmul.vrclass; let VLMul = lmul.value in { - def "E" # eew # "FF_V_" # LInfo : - VPseudoUSLoadNoMask, + def "E" # eew # "FF_V_" # LInfo: + VPseudoUSLoadFFNoMask, VLFSched; def "E" # eew # "FF_V_" # LInfo # "_TU": - VPseudoUSLoadNoMaskTU, + VPseudoUSLoadFFNoMaskTU, VLFSched; - def "E" # eew # "FF_V_" # LInfo # "_MASK" : - VPseudoUSLoadMask, + def "E" # eew # "FF_V_" # LInfo # "_MASK": + VPseudoUSLoadFFMask, VLFSched; } } @@ -1434,7 +1620,7 @@ multiclass VPseudoFFLoad { multiclass VPseudoLoadMask { foreach mti = AllMasks in { let VLMul = mti.LMul.value in { - def "_V_" # mti.BX : VPseudoUSLoadNoMask; + def "_V_" # mti.BX : VPseudoUSLoadNoMask; } } } @@ -1506,7 +1692,7 @@ multiclass VPseudoUSStore { multiclass VPseudoStoreMask { foreach mti = AllMasks in { let VLMul = mti.LMul.value in { - def "_V_" # mti.BX : VPseudoUSStoreNoMask; + def "_V_" # mti.BX : VPseudoUSStoreNoMask; } } } @@ -1596,6 +1782,8 @@ multiclass VPseudoVID_V { let VLMul = m.value in { def "_V_" # m.MX : VPseudoNullaryNoMask, Sched<[WriteVMIdxV, ReadVMask]>; + def "_V_" # m.MX # "_TU": VPseudoNullaryNoMaskTU, + Sched<[WriteVMIdxV, ReadVMask]>; def "_V_" # m.MX # "_MASK" : VPseudoNullaryMask, Sched<[WriteVMIdxV, ReadVMask]>; } @@ -1616,7 +1804,9 @@ multiclass VPseudoVIOT_M { let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>; - def "_" # m.MX # "_MASK" : VPseudoUnaryMask, + def "_" # m.MX # "_TU" : VPseudoUnaryNoMaskTU, + Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>; + def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>; } } @@ -1638,8 +1828,11 @@ multiclass VPseudoBinary; - def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskTA; + def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy, + RISCVMaskedPseudo; } } @@ -1653,7 +1846,8 @@ multiclass VPseudoBinaryM; let ForceTailAgnostic = true in def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMOutMask; + Op2Class, Constraint>, + RISCVMaskedPseudo; } } @@ -1666,8 +1860,11 @@ multiclass VPseudoBinaryEmul; - def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMaskTA; + def "_" # lmul.MX # "_" # emul.MX # "_TU": VPseudoBinaryNoMaskTU; + def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMaskPolicy, + RISCVMaskedPseudo; } } @@ -1744,7 +1941,7 @@ multiclass VPseudoBinaryV_VI { multiclass VPseudoVALU_MM { foreach m = MxList in let VLMul = m.value in { - def "_MM_" # m.MX : VPseudoBinaryNoMask, + def "_MM_" # m.MX : VPseudoBinaryNoMask, Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>; } } @@ -1907,6 +2104,12 @@ multiclass VPseudoUnaryVMV_V_X_I { Sched<[WriteVIMovX, ReadVIMovX]>; def "_I_" # m.MX : VPseudoUnaryNoDummyMask, Sched<[WriteVIMovI]>; + def "_V_" # m.MX # "_TU": VPseudoUnaryNoDummyMaskTU, + Sched<[WriteVIMovV, ReadVIMovV]>; + def "_X_" # m.MX # "_TU": VPseudoUnaryNoDummyMaskTU, + Sched<[WriteVIMovX, ReadVIMovX]>; + def "_I_" # m.MX # "_TU": VPseudoUnaryNoDummyMaskTU, + Sched<[WriteVIMovI]>; } } } @@ -1918,6 +2121,9 @@ multiclass VPseudoVMV_F { def "_" # f.FX # "_" # m.MX : VPseudoUnaryNoDummyMask, Sched<[WriteVFMovV, ReadVFMovF]>; + def "_" # f.FX # "_" # m.MX # "_TU": + VPseudoUnaryNoDummyMaskTU, + Sched<[WriteVFMovV, ReadVFMovF]>; } } } @@ -1928,7 +2134,9 @@ multiclass VPseudoVCLS_V { let VLMul = m.value in { def "_V_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; - def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask, + def "_V_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, + Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; + def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; } } @@ -1939,6 +2147,8 @@ multiclass VPseudoVSQR_V { let VLMul = m.value in { def "_V_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; + def "_V_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, + Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; } @@ -1950,6 +2160,8 @@ multiclass VPseudoVRCP_V { let VLMul = m.value in { def "_V_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; + def "_V_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, + Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; } @@ -1963,8 +2175,11 @@ multiclass PseudoVEXT_VF2 { let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + def "_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, + RISCVMaskedPseudo, Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; } } @@ -1977,8 +2192,11 @@ multiclass PseudoVEXT_VF4 { let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + def "_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, + RISCVMaskedPseudo, Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; } } @@ -1991,8 +2209,11 @@ multiclass PseudoVEXT_VF8 { let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + def "_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, + RISCVMaskedPseudo, Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; } } @@ -2248,6 +2469,13 @@ multiclass VPseudoVCALU_VM_XM_IM { Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; defm "" : VPseudoBinaryV_IM, Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>; + // Tied versions to allow codegen control over the tail elements + defm "" : VPseudoTiedBinaryV_VM, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_XM, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_IM, + Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>; } multiclass VPseudoVCALU_VM_XM { @@ -2255,6 +2483,11 @@ multiclass VPseudoVCALU_VM_XM { Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; defm "" : VPseudoBinaryV_XM, Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; + // Tied versions to allow codegen control over the tail elements + defm "" : VPseudoTiedBinaryV_VM, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_XM, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; } multiclass VPseudoVCALUM_VM_XM_IM { @@ -2318,6 +2551,19 @@ multiclass VPseudoTernary { + let VLMul = MInfo.value in { + def "_" # MInfo.MX : VPseudoTernaryNoMask; + def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy; + + } +} + multiclass VPseudoTernaryWithPolicy; - def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMask; + def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy; } } @@ -2339,9 +2585,9 @@ multiclass VPseudoTernaryV_VV_AAXA { +multiclass VPseudoVSLDV_VX { foreach m = MxList in - defm _VX : VPseudoTernary; + defm _VX : VPseudoTernaryWithPolicy; } multiclass VPseudoTernaryV_VX_AAXA { @@ -2380,9 +2626,9 @@ multiclass VPseudoTernaryW_VF { m.vrclass, m, constraint>; } -multiclass VPseudoTernaryV_VI { +multiclass VPseudoVSLDV_VI { foreach m = MxList in - defm _VI : VPseudoTernary; + defm _VI : VPseudoTernaryWithPolicy; } multiclass VPseudoVMAC_VV_VX_AAXA { @@ -2400,9 +2646,9 @@ multiclass VPseudoVMAC_VV_VF_AAXA { } multiclass VPseudoVSLD_VX_VI { - defm "" : VPseudoTernaryV_VX, + defm "" : VPseudoVSLDV_VX, Sched<[WriteVISlideX, ReadVISlideV, ReadVISlideV, ReadVISlideX, ReadVMask]>; - defm "" : VPseudoTernaryV_VI, + defm "" : VPseudoVSLDV_VI, Sched<[WriteVISlideI, ReadVISlideV, ReadVISlideV, ReadVMask]>; } @@ -2501,8 +2747,10 @@ multiclass VPseudoConversion { let VLMul = MInfo.value in { def "_" # MInfo.MX : VPseudoUnaryNoMask; + def "_" # MInfo.MX # "_TU": VPseudoUnaryNoMaskTU; def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskTA; + Constraint>, + RISCVMaskedPseudo; } } @@ -2566,18 +2814,38 @@ multiclass VPseudoVNCVTD_W { Sched<[WriteVFNCvtFToFV, ReadVFNCvtFToFV, ReadVMask]>; } -multiclass VPseudoUSSegLoad { +multiclass VPseudoUSSegLoad { foreach eew = EEWList in { foreach lmul = MxSet.m in { defvar LInfo = lmul.MX; let VLMul = lmul.value in { foreach nf = NFSet.L in { defvar vreg = SegRegClass.RC; - defvar FFStr = !if(isFF, "FF", ""); - def nf # "E" # eew # FFStr # "_V_" # LInfo : - VPseudoUSSegLoadNoMask; - def nf # "E" # eew # FFStr # "_V_" # LInfo # "_MASK" : - VPseudoUSSegLoadMask; + def nf # "E" # eew # "_V_" # LInfo : + VPseudoUSSegLoadNoMask; + def nf # "E" # eew # "_V_" # LInfo # "_TU" : + VPseudoUSSegLoadNoMaskTU; + def nf # "E" # eew # "_V_" # LInfo # "_MASK" : + VPseudoUSSegLoadMask; + } + } + } + } +} + +multiclass VPseudoUSSegLoadFF { + foreach eew = EEWList in { + foreach lmul = MxSet.m in { + defvar LInfo = lmul.MX; + let VLMul = lmul.value in { + foreach nf = NFSet.L in { + defvar vreg = SegRegClass.RC; + def nf # "E" # eew # "FF_V_" # LInfo : + VPseudoUSSegLoadFFNoMask; + def nf # "E" # eew # "FF_V_" # LInfo # "_TU" : + VPseudoUSSegLoadFFNoMaskTU; + def nf # "E" # eew # "FF_V_" # LInfo # "_MASK" : + VPseudoUSSegLoadFFMask; } } } @@ -2592,6 +2860,7 @@ multiclass VPseudoSSegLoad { foreach nf = NFSet.L in { defvar vreg = SegRegClass.RC; def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegLoadNoMask; + def nf # "E" # eew # "_V_" # LInfo # "_TU" : VPseudoSSegLoadNoMaskTU; def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegLoadMask; } } @@ -2618,6 +2887,9 @@ multiclass VPseudoISegLoad { def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo : VPseudoISegLoadNoMask; + def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo # "_TU" : + VPseudoISegLoadNoMaskTU; def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo # "_MASK" : VPseudoISegLoadMask; @@ -2702,12 +2974,31 @@ class VPatUnaryNoMask : Pat<(result_type (!cast(intrinsic_name) + (result_type undef), (op2_type op2_reg_class:$rs2), VLOpFrag)), (!cast(inst#"_"#kind#"_"#vlmul.MX) (op2_type op2_reg_class:$rs2), GPR:$vl, sew)>; +class VPatUnaryNoMaskTU : + Pat<(result_type (!cast(intrinsic_name) + (result_type result_reg_class:$merge), + (op2_type op2_reg_class:$rs2), + VLOpFrag)), + (!cast(inst#"_"#kind#"_"#vlmul.MX#"_TU") + (result_type result_reg_class:$merge), + (op2_type op2_reg_class:$rs2), + GPR:$vl, sew)>; + class VPatUnaryMask; -class VPatBinaryNoMask : +class VPatBinaryM : + Pat<(result_type (!cast(intrinsic_name) + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + VLOpFrag)), + (!cast(inst) + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + GPR:$vl, sew)>; + +class VPatBinaryNoMaskTA : Pat<(result_type (!cast(intrinsic_name) + (result_type (undef)), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), VLOpFrag)), @@ -2809,6 +3118,26 @@ class VPatBinaryNoMask; +class VPatBinaryNoMaskTU : + Pat<(result_type (!cast(intrinsic_name) + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + VLOpFrag)), + (!cast(inst#"_TU") + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + GPR:$vl, sew)>; + // Same as above but source operands are swapped. class VPatBinaryNoMaskSwapped : Pat<(result_type (!cast(intrinsic_name) + (result_type (undef)), (result_type result_reg_class:$rs1), (op2_type op2_kind:$rs2), VLOpFrag)), (!cast(inst#"_TIED") (result_type result_reg_class:$rs1), (op2_type op2_kind:$rs2), - GPR:$vl, sew)>; + GPR:$vl, sew, TAIL_AGNOSTIC)>; + +class VPatTiedBinaryNoMaskTU : + Pat<(result_type (!cast(intrinsic_name) + (result_type result_reg_class:$merge), + (result_type result_reg_class:$merge), + (op2_type op2_kind:$rs2), + VLOpFrag)), + (!cast(inst#"_TIED") + (result_type result_reg_class:$merge), + (op2_type op2_kind:$rs2), + GPR:$vl, sew, TAIL_UNDISTURBED_MASK_UNDISTURBED)>; class VPatTiedBinaryMask(inst#"_"#kind#"_"#vlmul.MX) result_reg_class:$rs3, (op1_type op1_reg_class:$rs1), op2_kind:$rs2, - GPR:$vl, sew, TAIL_UNDISTURBED)>; + GPR:$vl, sew, (XLenVT timm:$policy))>; class VPatTernaryMask; +class VPatTernaryMaskPolicy : + Pat<(result_type (!cast(intrinsic#"_mask") + (result_type result_reg_class:$rs3), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (mask_type V0), + VLOpFrag, (XLenVT timm:$policy))), + (!cast(inst#"_"#kind#"_"#vlmul.MX # "_MASK") + result_reg_class:$rs3, + (op1_type op1_reg_class:$rs1), + op2_kind:$rs2, + (mask_type V0), + GPR:$vl, sew, (XLenVT timm:$policy))>; + multiclass VPatUnaryS_M { @@ -3037,8 +3409,10 @@ multiclass VPatUnaryV_M foreach vti = AllIntegerVectors in { def : VPatUnaryNoMask; - def : VPatUnaryMask; + def : VPatUnaryNoMaskTU; + def : VPatUnaryMaskTA; } } @@ -3052,6 +3426,9 @@ multiclass VPatUnaryV_VF; + def : VPatUnaryNoMaskTU; def : VPatUnaryMaskTA; @@ -3064,6 +3441,9 @@ multiclass VPatUnaryV_V; + def : VPatUnaryNoMaskTU; def : VPatUnaryMaskTA; @@ -3074,27 +3454,33 @@ multiclass VPatNullaryV { foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (!cast(intrinsic) + (vti.Vector undef), VLOpFrag)), (!cast(instruction#"_V_" # vti.LMul.MX) GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (!cast(intrinsic) + (vti.Vector vti.RegClass:$merge), + VLOpFrag)), + (!cast(instruction#"_V_" # vti.LMul.MX # "_TU") + vti.RegClass:$merge, GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (!cast(intrinsic # "_mask") (vti.Vector vti.RegClass:$merge), - (vti.Mask V0), VLOpFrag)), + (vti.Mask V0), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction#"_V_" # vti.LMul.MX # "_MASK") vti.RegClass:$merge, (vti.Mask V0), - GPR:$vl, vti.Log2SEW)>; + GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; } } multiclass VPatNullaryM { foreach mti = AllMasks in def : Pat<(mti.Mask (!cast(intrinsic) - (XLenVT (VLOp (XLenVT (XLenVT GPR:$vl)))))), + VLOpFrag)), (!cast(inst#"_M_"#mti.BX) GPR:$vl, mti.Log2SEW)>; } -multiclass VPatBinary { - def : VPatBinaryNoMask; + def : VPatBinaryM; def : VPatBinaryMask; @@ -3123,8 +3509,10 @@ multiclass VPatBinaryTA { - def : VPatBinaryNoMask; + def : VPatBinaryNoMaskTA; + def : VPatBinaryNoMaskTU; def : VPatBinaryMaskTA; @@ -3148,6 +3536,42 @@ multiclass VPatBinarySwapped; } +multiclass VPatBinaryCarryInTAIL +{ + def : Pat<(result_type (!cast(intrinsic) + (result_type undef), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (mask_type V0), + VLOpFrag)), + (!cast(inst#"_"#kind#"_"#vlmul.MX) + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (mask_type V0), GPR:$vl, sew)>; + def : Pat<(result_type (!cast(intrinsic) + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (mask_type V0), + VLOpFrag)), + (!cast(inst#"_"#kind#"_"#vlmul.MX#"_TU") + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (mask_type V0), GPR:$vl, sew)>; +} + multiclass VPatBinaryCarryIn; } -multiclass VPatConversion -{ - def : VPatUnaryNoMask; - def : VPatUnaryMask; -} - multiclass VPatConversionTA; + def : VPatUnaryNoMaskTU; def : VPatUnaryMaskTA; } @@ -3296,9 +3705,9 @@ multiclass VPatBinaryV_VI { foreach mti = AllMasks in - def : VPatBinaryNoMask; + def : VPatBinaryM; } multiclass VPatBinaryW_VV; - let AddedComplexity = 1 in + def : VPatBinaryNoMaskTU; + let AddedComplexity = 1 in { + def : VPatTiedBinaryNoMaskTU; def : VPatTiedBinaryMask; + } def : VPatBinaryMaskTA; } +multiclass VPatBinaryV_VM_TAIL vtilist = AllIntegerVectors> { + foreach vti = vtilist in + defm : VPatBinaryCarryInTAIL; +} + +multiclass VPatBinaryV_XM_TAIL vtilist = AllIntegerVectors> { + foreach vti = vtilist in + defm : VPatBinaryCarryInTAIL; +} + +multiclass VPatBinaryV_IM_TAIL { + foreach vti = AllIntegerVectors in + defm : VPatBinaryCarryInTAIL; +} + multiclass VPatBinaryV_V { foreach vti = AllIntegerVectors in defm : VPatBinaryMaskOut { multiclass VPatBinaryM_VV vtilist> { foreach vti = vtilist in - defm : VPatBinary; + defm : VPatBinaryM; } multiclass VPatBinarySwappedM_VV vtilist> { foreach vti = vtilist in { defvar kind = "V"#vti.ScalarSuffix; - defm : VPatBinary; + defm : VPatBinaryM; } } multiclass VPatBinaryM_VI vtilist> { foreach vti = vtilist in - defm : VPatBinary; + defm : VPatBinaryM; } multiclass VPatBinaryV_VV_VX_VI; multiclass VPatBinaryV_VM_XM_IM - : VPatBinaryV_VM, - VPatBinaryV_XM, - VPatBinaryV_IM; + : VPatBinaryV_VM_TAIL, + VPatBinaryV_XM_TAIL, + VPatBinaryV_IM_TAIL; multiclass VPatBinaryM_VM_XM_IM : VPatBinaryV_VM, @@ -3538,8 +3987,8 @@ multiclass VPatBinaryM_V_X_I VPatBinaryV_I; multiclass VPatBinaryV_VM_XM - : VPatBinaryV_VM, - VPatBinaryV_XM; + : VPatBinaryV_VM_TAIL, + VPatBinaryV_XM_TAIL; multiclass VPatBinaryM_VM_XM : VPatBinaryV_VM, @@ -3569,6 +4018,26 @@ multiclass VPatTernary; } +multiclass VPatTernaryNoMaskNoPolicy { + def : VPatTernaryNoMask; + def : VPatTernaryMaskPolicy; +} + multiclass VPatTernaryWithPolicy; - def : VPatTernaryMask; + def : VPatTernaryMaskPolicy; } multiclass VPatTernaryV_VV_AAXA vtilist> { foreach vti = vtilist in - defm : VPatTernary; + defm : VPatTernaryWithPolicy; } multiclass VPatTernaryV_VX_AAXA vtilist, Operand Imm_type> { foreach vti = vtilist in - defm : VPatTernary; + defm : VPatTernaryWithPolicy; } multiclass VPatTernaryW_VV, VPatTernaryV_VI; + multiclass VPatBinaryM_VV_VX_VI vtilist> : VPatBinaryM_VV, @@ -3724,19 +4194,6 @@ multiclass VPatReductionW_VS -{ - foreach fvti = AllFloatVectors in - { - defvar ivti = GetIntVTypeInfo.Vti; - - defm : VPatConversion; - } -} - multiclass VPatConversionVI_VF { @@ -3973,7 +4430,7 @@ defm PseudoVL : VPseudoFFLoad; //===----------------------------------------------------------------------===// // 7.8. Vector Load/Store Segment Instructions //===----------------------------------------------------------------------===// -defm PseudoVLSEG : VPseudoUSSegLoad; +defm PseudoVLSEG : VPseudoUSSegLoad; defm PseudoVLSSEG : VPseudoSSegLoad; defm PseudoVLOXSEG : VPseudoISegLoad; defm PseudoVLUXSEG : VPseudoISegLoad; @@ -3983,8 +4440,9 @@ defm PseudoVSOXSEG : VPseudoISegStore; defm PseudoVSUXSEG : VPseudoISegStore; // vlsegeff.v may update VL register -let hasSideEffects = 1, Defs = [VL] in -defm PseudoVLSEG : VPseudoUSSegLoad; +let hasSideEffects = 1, Defs = [VL] in { +defm PseudoVLSEG : VPseudoUSSegLoadFF; +} //===----------------------------------------------------------------------===// // 12. Vector Integer Arithmetic Instructions @@ -4002,13 +4460,24 @@ foreach vti = AllIntegerVectors in { // Occurs when legalizing vrsub.vx intrinsics for i64 on RV32 since we need // to use a more complex splat sequence. Add the pattern for all VTs for // consistency. - def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector vti.RegClass:$rs2), + def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector (undef)), + (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$rs1), VLOpFrag)), (!cast("PseudoVSUB_VV_"#vti.LMul.MX) vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector vti.RegClass:$merge), + (vti.Vector vti.RegClass:$rs2), + (vti.Vector vti.RegClass:$rs1), + VLOpFrag)), + (!cast("PseudoVSUB_VV_"#vti.LMul.MX#"_TU") + vti.RegClass:$merge, + vti.RegClass:$rs1, + vti.RegClass:$rs2, + GPR:$vl, + vti.Log2SEW)>; def : Pat<(vti.Vector (int_riscv_vrsub_mask (vti.Vector vti.RegClass:$merge), (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$rs1), @@ -4025,7 +4494,8 @@ foreach vti = AllIntegerVectors in { (XLenVT timm:$policy))>; // Match VSUB with a small immediate to vadd.vi by negating the immediate. - def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector vti.RegClass:$rs1), + def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector (undef)), + (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), VLOpFrag)), (!cast("PseudoVADD_VI_"#vti.LMul.MX) vti.RegClass:$rs1, @@ -4219,33 +4689,42 @@ let Predicates = [HasVInstructionsAnyF] in { //===----------------------------------------------------------------------===// // 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions //===----------------------------------------------------------------------===// +let Uses = [FRM], mayRaiseFPException = true in { defm PseudoVFADD : VPseudoVALU_VV_VF; defm PseudoVFSUB : VPseudoVALU_VV_VF; defm PseudoVFRSUB : VPseudoVALU_VF; +} //===----------------------------------------------------------------------===// // 14.3. Vector Widening Floating-Point Add/Subtract Instructions //===----------------------------------------------------------------------===// +let Uses = [FRM], mayRaiseFPException = true in { defm PseudoVFWADD : VPseudoVFWALU_VV_VF; defm PseudoVFWSUB : VPseudoVFWALU_VV_VF; defm PseudoVFWADD : VPseudoVFWALU_WV_WF; defm PseudoVFWSUB : VPseudoVFWALU_WV_WF; +} //===----------------------------------------------------------------------===// // 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions //===----------------------------------------------------------------------===// +let Uses = [FRM], mayRaiseFPException = true in { defm PseudoVFMUL : VPseudoVFMUL_VV_VF; defm PseudoVFDIV : VPseudoVFDIV_VV_VF; defm PseudoVFRDIV : VPseudoVFRDIV_VF; +} //===----------------------------------------------------------------------===// // 14.5. Vector Widening Floating-Point Multiply //===----------------------------------------------------------------------===// +let Uses = [FRM], mayRaiseFPException = true in { defm PseudoVFWMUL : VPseudoVWMUL_VV_VF; +} //===----------------------------------------------------------------------===// // 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions //===----------------------------------------------------------------------===// +let Uses = [FRM], mayRaiseFPException = true in { defm PseudoVFMACC : VPseudoVMAC_VV_VF_AAXA; defm PseudoVFNMACC : VPseudoVMAC_VV_VF_AAXA; defm PseudoVFMSAC : VPseudoVMAC_VV_VF_AAXA; @@ -4254,35 +4733,43 @@ defm PseudoVFMADD : VPseudoVMAC_VV_VF_AAXA; defm PseudoVFNMADD : VPseudoVMAC_VV_VF_AAXA; defm PseudoVFMSUB : VPseudoVMAC_VV_VF_AAXA; defm PseudoVFNMSUB : VPseudoVMAC_VV_VF_AAXA; +} //===----------------------------------------------------------------------===// // 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions //===----------------------------------------------------------------------===// +let Uses = [FRM], mayRaiseFPException = true in { defm PseudoVFWMACC : VPseudoVWMAC_VV_VF; defm PseudoVFWNMACC : VPseudoVWMAC_VV_VF; defm PseudoVFWMSAC : VPseudoVWMAC_VV_VF; defm PseudoVFWNMSAC : VPseudoVWMAC_VV_VF; +} //===----------------------------------------------------------------------===// // 14.8. Vector Floating-Point Square-Root Instruction //===----------------------------------------------------------------------===// +let Uses = [FRM], mayRaiseFPException = true in defm PseudoVFSQRT : VPseudoVSQR_V; //===----------------------------------------------------------------------===// // 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction //===----------------------------------------------------------------------===// +let mayRaiseFPException = true in defm PseudoVFRSQRT7 : VPseudoVRCP_V; //===----------------------------------------------------------------------===// // 14.10. Vector Floating-Point Reciprocal Estimate Instruction //===----------------------------------------------------------------------===// +let Uses = [FRM], mayRaiseFPException = true in defm PseudoVFREC7 : VPseudoVRCP_V; //===----------------------------------------------------------------------===// // 14.11. Vector Floating-Point Min/Max Instructions //===----------------------------------------------------------------------===// +let mayRaiseFPException = true in { defm PseudoVFMIN : VPseudoVMAX_VV_VF; defm PseudoVFMAX : VPseudoVMAX_VV_VF; +} //===----------------------------------------------------------------------===// // 14.12. Vector Floating-Point Sign-Injection Instructions @@ -4294,12 +4781,14 @@ defm PseudoVFSGNJX : VPseudoVSGNJ_VV_VF; //===----------------------------------------------------------------------===// // 14.13. Vector Floating-Point Compare Instructions //===----------------------------------------------------------------------===// +let mayRaiseFPException = true in { defm PseudoVMFEQ : VPseudoVCMPM_VV_VF; defm PseudoVMFNE : VPseudoVCMPM_VV_VF; defm PseudoVMFLT : VPseudoVCMPM_VV_VF; defm PseudoVMFLE : VPseudoVCMPM_VV_VF; defm PseudoVMFGT : VPseudoVCMPM_VF; defm PseudoVMFGE : VPseudoVCMPM_VF; +} //===----------------------------------------------------------------------===// // 14.14. Vector Floating-Point Classify Instruction @@ -4376,15 +4865,21 @@ let Predicates = [HasVInstructionsAnyF] in { //===----------------------------------------------------------------------===// // 15.3. Vector Single-Width Floating-Point Reduction Instructions //===----------------------------------------------------------------------===// +let Uses = [FRM], mayRaiseFPException = true in { defm PseudoVFREDOSUM : VPseudoVFREDO_VS; defm PseudoVFREDUSUM : VPseudoVFRED_VS; +} +let mayRaiseFPException = true in { defm PseudoVFREDMIN : VPseudoVFRED_VS; defm PseudoVFREDMAX : VPseudoVFRED_VS; +} //===----------------------------------------------------------------------===// // 15.4. Vector Widening Floating-Point Reduction Instructions //===----------------------------------------------------------------------===// -let IsRVVWideningReduction = 1 in { +let IsRVVWideningReduction = 1, + Uses = [FRM], + mayRaiseFPException = true in { defm PseudoVFWREDUSUM : VPseudoVFWRED_VS; defm PseudoVFWREDOSUM : VPseudoVFWRED_VS; } @@ -4611,7 +5106,8 @@ defm : VPatBinaryV_VV_VX_VI<"int_riscv_vsra", "PseudoVSRA", AllIntegerVectors, foreach vti = AllIntegerVectors in { // Emit shift by 1 as an add since it might be faster. - def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector vti.RegClass:$rs1), + def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector undef), + (vti.Vector vti.RegClass:$rs1), (XLenVT 1), VLOpFrag)), (!cast("PseudoVADD_VV_"#vti.LMul.MX) vti.RegClass:$rs1, vti.RegClass:$rs1, @@ -4726,10 +5222,16 @@ defm : VPatBinaryV_VM_XM_IM<"int_riscv_vmerge", "PseudoVMERGE">; // 12.16. Vector Integer Move Instructions //===----------------------------------------------------------------------===// foreach vti = AllVectors in { - def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector vti.RegClass:$rs1), + def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector undef), + (vti.Vector vti.RegClass:$rs1), VLOpFrag)), (!cast("PseudoVMV_V_V_"#vti.LMul.MX) $rs1, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector vti.RegClass:$passthru), + (vti.Vector vti.RegClass:$rs1), + VLOpFrag)), + (!cast("PseudoVMV_V_V_"#vti.LMul.MX#"_TU") + $passthru, $rs1, GPR:$vl, vti.Log2SEW)>; // vmv.v.x/vmv.v.i are handled in RISCInstrVInstrInfoVVLPatterns.td } @@ -4862,7 +5364,7 @@ defm : VPatBinarySwappedM_VV<"int_riscv_vmfge", "PseudoVMFLE", AllFloatVectors>; //===----------------------------------------------------------------------===// // 14.14. Vector Floating-Point Classify Instruction //===----------------------------------------------------------------------===// -defm : VPatClassifyVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">; +defm : VPatConversionVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">; //===----------------------------------------------------------------------===// // 14.15. Vector Floating-Point Merge Instruction @@ -4870,19 +5372,27 @@ defm : VPatClassifyVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">; // We can use vmerge.vvm to support vector-vector vfmerge. // NOTE: Clang previously used int_riscv_vfmerge for vector-vector, but now uses // int_riscv_vmerge. Support both for compatibility. -defm : VPatBinaryV_VM<"int_riscv_vmerge", "PseudoVMERGE", - /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; -defm : VPatBinaryV_VM<"int_riscv_vfmerge", "PseudoVMERGE", - /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; -defm : VPatBinaryV_XM<"int_riscv_vfmerge", "PseudoVFMERGE", - /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; +defm : VPatBinaryV_VM_TAIL<"int_riscv_vmerge", "PseudoVMERGE", + /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; +defm : VPatBinaryV_VM_TAIL<"int_riscv_vfmerge", "PseudoVMERGE", + /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; +defm : VPatBinaryV_XM_TAIL<"int_riscv_vfmerge", "PseudoVFMERGE", + /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; foreach fvti = AllFloatVectors in { defvar instr = !cast("PseudoVMERGE_VIM_"#fvti.LMul.MX); - def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$rs2), + def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector undef), + (fvti.Vector fvti.RegClass:$rs2), (fvti.Scalar (fpimm0)), (fvti.Mask V0), VLOpFrag)), (instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; + defvar instr_tu = !cast("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU"); + def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$merge), + (fvti.Vector fvti.RegClass:$rs2), + (fvti.Scalar (fpimm0)), + (fvti.Mask V0), VLOpFrag)), + (instr_tu fvti.RegClass:$merge, fvti.RegClass:$rs2, 0, + (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; } //===----------------------------------------------------------------------===// @@ -5048,6 +5558,11 @@ foreach fvti = AllFloatVectors in { (fvti.Vector $rs1), (fvti.Scalar fvti.ScalarRegClass:$rs2), GPR:$vl, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1), + (fvti.Scalar (fpimm0)), VLOpFrag)), + (!cast("PseudoVMV_S_X_" # fvti.LMul.MX) + (fvti.Vector $rs1), X0, GPR:$vl, fvti.Log2SEW)>; } } // Predicates = [HasVInstructionsAnyF] @@ -5097,5 +5612,5 @@ let Predicates = [HasVInstructionsAnyF] in { } // Predicates = [HasVInstructionsAnyF] // Include the non-intrinsic ISel patterns -include "RISCVInstrInfoVSDPatterns.td" include "RISCVInstrInfoVVLPatterns.td" +include "RISCVInstrInfoVSDPatterns.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 2b920d29ab81..06d4c4d0a9e6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -8,8 +8,7 @@ /// /// This file contains the required infrastructure and SDNode patterns to /// support code generation for the standard 'V' (Vector) extension, version -/// 0.10. This version is still experimental as the 'V' extension hasn't been -/// ratified yet. +/// version 1.0. /// /// This file is included from and depends upon RISCVInstrInfoVPseudos.td /// @@ -22,35 +21,9 @@ // Helpers to define the SDNode patterns. //===----------------------------------------------------------------------===// -def SDTSplatI64 : SDTypeProfile<1, 1, [ - SDTCVecEltisVT<0, i64>, SDTCisVT<1, i32> -]>; - -def rv32_splat_i64 : SDNode<"RISCVISD::SPLAT_VECTOR_I64", SDTSplatI64>; - -def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i1>, - SDTCisVT<1, XLenVT>]>; -def riscv_vmclr_vl : SDNode<"RISCVISD::VMCLR_VL", SDT_RISCVVMSETCLR_VL>; -def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>; - def rvv_vnot : PatFrag<(ops node:$in), (xor node:$in, (riscv_vmset_vl (XLenVT srcvalue)))>; -// Give explicit Complexity to prefer simm5/uimm5. -def SplatPat : ComplexPattern; -def SplatPat_simm5 : ComplexPattern; -def SplatPat_uimm5 : ComplexPattern; -def SplatPat_simm5_plus1 - : ComplexPattern; -def SplatPat_simm5_plus1_nonzero - : ComplexPattern; - -class SwapHelper { - dag Value = !con(Prefix, !if(swap, B, A), !if(swap, A, B), Suffix); -} - multiclass VPatUSLoadStoreSDNode : Pat<(result_type (vop (vop_type vop_reg_class:$rs1), - (vop_type (splat_vector xop_kind:$rs2)))), + (vop_type (SplatFPOp xop_kind:$rs2)))), (!cast(instruction_name#"_"#vlmul.MX) vop_reg_class:$rs1, (xop_type xop_kind:$rs2), @@ -189,7 +162,7 @@ multiclass VPatBinaryFPSDNode_VV_VF { multiclass VPatBinaryFPSDNode_R_VF { foreach fvti = AllFloatVectors in - def : Pat<(fvti.Vector (vop (fvti.Vector (splat_vector fvti.Scalar:$rs2)), + def : Pat<(fvti.Vector (vop (fvti.Vector (SplatFPOp fvti.Scalar:$rs2)), (fvti.Vector fvti.RegClass:$rs1))), (!cast(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) fvti.RegClass:$rs1, @@ -197,67 +170,70 @@ multiclass VPatBinaryFPSDNode_R_VF { fvti.AVL, fvti.Log2SEW)>; } -multiclass VPatIntegerSetCCSDNode_VV { +multiclass VPatIntegerSetCCSDNode_VV { foreach vti = AllIntegerVectors in { defvar instruction = !cast(instruction_name#"_VV_"#vti.LMul.MX); def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2), cc)), - SwapHelper<(instruction), - (instruction vti.RegClass:$rs1), - (instruction vti.RegClass:$rs2), - (instruction vti.AVL, vti.Log2SEW), - swap>.Value>; + (instruction vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, + vti.Log2SEW)>; } } -multiclass VPatIntegerSetCCSDNode_XI + : VPatIntegerSetCCSDNode_VV { + foreach vti = AllIntegerVectors in { + defvar instruction = !cast(instruction_name#"_VV_"#vti.LMul.MX); + def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs2), + (vti.Vector vti.RegClass:$rs1), invcc)), + (instruction vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, + vti.Log2SEW)>; + } +} + +multiclass VPatIntegerSetCCSDNode_XI< string instruction_name, + CondCode cc, string kind, ComplexPattern SplatPatKind, - DAGOperand xop_kind, - bit swap = 0> { + DAGOperand xop_kind> { foreach vti = AllIntegerVectors in { defvar instruction = !cast(instruction_name#_#kind#_#vti.LMul.MX); def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1), (vti.Vector (SplatPatKind xop_kind:$rs2)), cc)), - SwapHelper<(instruction), - (instruction vti.RegClass:$rs1), - (instruction xop_kind:$rs2), - (instruction vti.AVL, vti.Log2SEW), - swap>.Value>; + (instruction vti.RegClass:$rs1, xop_kind:$rs2, vti.AVL, vti.Log2SEW)>; } } -multiclass VPatIntegerSetCCSDNode_VV_VX_VI { - defm : VPatIntegerSetCCSDNode_VV; - defm : VPatIntegerSetCCSDNode_XI; - defm : VPatIntegerSetCCSDNode_XI; +multiclass VPatIntegerSetCCSDNode_XI_Swappable + : VPatIntegerSetCCSDNode_XI { + foreach vti = AllIntegerVectors in { + defvar instruction = !cast(instruction_name#_#kind#_#vti.LMul.MX); + def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatPatKind xop_kind:$rs2)), cc)), + (instruction vti.RegClass:$rs1, xop_kind:$rs2, vti.AVL, vti.Log2SEW)>; + def : Pat<(vti.Mask (setcc (vti.Vector (SplatPatKind xop_kind:$rs2)), + (vti.Vector vti.RegClass:$rs1), invcc)), + (instruction vti.RegClass:$rs1, xop_kind:$rs2, vti.AVL, vti.Log2SEW)>; + } } -multiclass VPatIntegerSetCCSDNode_VV_VX { - defm : VPatIntegerSetCCSDNode_VV; - defm : VPatIntegerSetCCSDNode_XI; -} +multiclass VPatIntegerSetCCSDNode_VX_Swappable + : VPatIntegerSetCCSDNode_XI_Swappable; -multiclass VPatIntegerSetCCSDNode_VX_VI { - defm : VPatIntegerSetCCSDNode_XI; - defm : VPatIntegerSetCCSDNode_XI; -} +multiclass VPatIntegerSetCCSDNode_VI + : VPatIntegerSetCCSDNode_XI; -multiclass VPatIntegerSetCCSDNode_VIPlus1 { foreach vti = AllIntegerVectors in { defvar instruction = !cast(instruction_name#"_VI_"#vti.LMul.MX); @@ -279,12 +255,12 @@ multiclass VPatFPSetCCSDNode_VV_VF_FV(inst_name#"_VV_"#fvti.LMul.MX) fvti.RegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.Log2SEW)>; def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1), - (splat_vector fvti.ScalarRegClass:$rs2), + (SplatFPOp fvti.ScalarRegClass:$rs2), cc)), (!cast(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, fvti.AVL, fvti.Log2SEW)>; - def : Pat<(fvti.Mask (setcc (splat_vector fvti.ScalarRegClass:$rs2), + def : Pat<(fvti.Mask (setcc (SplatFPOp fvti.ScalarRegClass:$rs2), (fvti.Vector fvti.RegClass:$rs1), cc)), (!cast(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) @@ -363,83 +339,122 @@ multiclass VPatNConvertFP2ISDNode_V { } } -multiclass VPatWidenBinarySDNode_VV_VX_WV_WX { - foreach vti = AllWidenableIntVectors in { - def : Pat<(op (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs2))), - (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))), - (!cast(instruction_name#"_VV_"#vti.Vti.LMul.MX) - vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1, - vti.Vti.AVL, vti.Vti.Log2SEW)>; - def : Pat<(op (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs2))), - (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))), - (!cast(instruction_name#"_VX_"#vti.Vti.LMul.MX) - vti.Vti.RegClass:$rs2, GPR:$rs1, - vti.Vti.AVL, vti.Vti.Log2SEW)>; - def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2), - (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))), - (!cast(instruction_name#"_WV_"#vti.Vti.LMul.MX) - vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1, - vti.Vti.AVL, vti.Vti.Log2SEW)>; - def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2), - (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))), - (!cast(instruction_name#"_WX_"#vti.Vti.LMul.MX) - vti.Wti.RegClass:$rs2, GPR:$rs1, - vti.Vti.AVL, vti.Vti.Log2SEW)>; +multiclass VPatWidenBinarySDNode_VV_VX { + foreach vtiToWti = AllWidenableIntVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + def : Pat<(op (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs2))), + (wti.Vector (extop2 (vti.Vector vti.RegClass:$rs1)))), + (!cast(instruction_name#"_VV_"#vti.LMul.MX) + vti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; + def : Pat<(op (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs2))), + (wti.Vector (extop2 (vti.Vector (SplatPat GPR:$rs1))))), + (!cast(instruction_name#"_VX_"#vti.LMul.MX) + vti.RegClass:$rs2, GPR:$rs1, vti.AVL, vti.Log2SEW)>; + } +} + +multiclass VPatWidenBinarySDNode_WV_WX { + foreach vtiToWti = AllWidenableIntVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + def : Pat<(op (wti.Vector wti.RegClass:$rs2), + (wti.Vector (extop (vti.Vector vti.RegClass:$rs1)))), + (!cast(instruction_name#"_WV_"#vti.LMul.MX) + wti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; + def : Pat<(op (wti.Vector wti.RegClass:$rs2), + (wti.Vector (extop (vti.Vector (SplatPat GPR:$rs1))))), + (!cast(instruction_name#"_WX_"#vti.LMul.MX) + wti.RegClass:$rs2, GPR:$rs1, vti.AVL, vti.Log2SEW)>; } } +multiclass VPatWidenBinarySDNode_VV_VX_WV_WX { + defm : VPatWidenBinarySDNode_VV_VX; + defm : VPatWidenBinarySDNode_WV_WX; +} + multiclass VPatWidenMulAddSDNode_VV { - foreach vti = AllWidenableIntVectors in { + foreach vtiToWti = AllWidenableIntVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; def : Pat< - (add (vti.Wti.Vector vti.Wti.RegClass:$rd), - (mul_oneuse (vti.Wti.Vector (extop1 (vti.Vti.Vector vti.Vti.RegClass:$rs1))), - (vti.Wti.Vector (extop2 (vti.Vti.Vector vti.Vti.RegClass:$rs2))))), - (!cast(instruction_name#"_VV_"#vti.Vti.LMul.MX) - vti.Wti.RegClass:$rd, vti.Vti.RegClass:$rs1, vti.Vti.RegClass:$rs2, - vti.Vti.AVL, vti.Vti.Log2SEW, TAIL_AGNOSTIC + (add (wti.Vector wti.RegClass:$rd), + (mul_oneuse (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs1))), + (wti.Vector (extop2 (vti.Vector vti.RegClass:$rs2))))), + (!cast(instruction_name#"_VV_"#vti.LMul.MX) + wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC )>; } } multiclass VPatWidenMulAddSDNode_VX { - foreach vti = AllWidenableIntVectors in { + foreach vtiToWti = AllWidenableIntVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; def : Pat< - (add (vti.Wti.Vector vti.Wti.RegClass:$rd), - (mul_oneuse (vti.Wti.Vector (extop1 (vti.Vti.Vector (SplatPat GPR:$rs1)))), - (vti.Wti.Vector (extop2 (vti.Vti.Vector vti.Vti.RegClass:$rs2))))), - (!cast(instruction_name#"_VX_"#vti.Vti.LMul.MX) - vti.Wti.RegClass:$rd, GPR:$rs1, vti.Vti.RegClass:$rs2, - vti.Vti.AVL, vti.Vti.Log2SEW, TAIL_AGNOSTIC + (add (wti.Vector wti.RegClass:$rd), + (mul_oneuse (wti.Vector (extop1 (vti.Vector (SplatPat GPR:$rs1)))), + (wti.Vector (extop2 (vti.Vector vti.RegClass:$rs2))))), + (!cast(instruction_name#"_VX_"#vti.LMul.MX) + wti.RegClass:$rd, GPR:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC )>; } } multiclass VPatWidenBinaryFPSDNode_VV_VF { - foreach vti = AllWidenableFloatVectors in { - def : Pat<(op (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs2))), - (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs1)))), - (!cast(instruction_name#"_VV_"#vti.Vti.LMul.MX) - vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1, - vti.Vti.AVL, vti.Vti.Log2SEW)>; - def : Pat<(op (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs2))), - (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector (SplatPat vti.Vti.ScalarRegClass:$rs1))))), - (!cast(instruction_name#"_V"#vti.Vti.ScalarSuffix#"_"#vti.Vti.LMul.MX) - vti.Vti.RegClass:$rs2, vti.Vti.ScalarRegClass:$rs1, - vti.Vti.AVL, vti.Vti.Log2SEW)>; + foreach vtiToWti = AllWidenableFloatVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue))), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), (XLenVT srcvalue)))), + (!cast(instruction_name#"_VV_"#vti.LMul.MX) + vti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; + def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue))), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), (XLenVT srcvalue)))), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + vti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>; + def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue))), + (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + vti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>; } } multiclass VPatWidenBinaryFPSDNode_WV_WF { - foreach vti = AllWidenableFloatVectors in { - def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2), - (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs1)))), - (!cast(instruction_name#"_WV_"#vti.Vti.LMul.MX) - vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1, - vti.Vti.AVL, vti.Vti.Log2SEW)>; - def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2), - (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector (SplatPat vti.Vti.ScalarRegClass:$rs1))))), - (!cast(instruction_name#"_W"#vti.Vti.ScalarSuffix#"_"#vti.Vti.LMul.MX) - vti.Wti.RegClass:$rs2, vti.Vti.ScalarRegClass:$rs1, - vti.Vti.AVL, vti.Vti.Log2SEW)>; + foreach vtiToWti = AllWidenableFloatVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + def : Pat<(op (wti.Vector wti.RegClass:$rs2), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), (XLenVT srcvalue)))), + (!cast(instruction_name#"_WV_"#vti.LMul.MX) + wti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; + def : Pat<(op (wti.Vector wti.RegClass:$rs2), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), (XLenVT srcvalue)))), + (!cast(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>; + def : Pat<(op (wti.Vector wti.RegClass:$rs2), + (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))), + (!cast(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>; } } @@ -448,6 +463,148 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_WV_WF; } +multiclass VPatWidenFPMulAccSDNode_VV_VF { + foreach vtiToWti = AllWidenableFloatVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), (XLenVT srcvalue))), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue))), + (wti.Vector wti.RegClass:$rd)), + (!cast(instruction_name#"_VV_"#vti.LMul.MX) + wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), (XLenVT srcvalue))), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue))), + (wti.Vector wti.RegClass:$rd)), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} + +multiclass VPatWidenFPNegMulAccSDNode_VV_VF { + foreach vtiToWti = AllWidenableFloatVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), (XLenVT srcvalue)))), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue)), + (fneg wti.RegClass:$rd)), + (!cast(instruction_name#"_VV_"#vti.LMul.MX) + wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(fma (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), (XLenVT srcvalue)), + (fneg (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue)))), + (fneg wti.RegClass:$rd)), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), (XLenVT srcvalue)))), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue)), + (fneg wti.RegClass:$rd)), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} + +multiclass VPatWidenFPMulSacSDNode_VV_VF { + foreach vtiToWti = AllWidenableFloatVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), (XLenVT srcvalue))), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue)), + (fneg wti.RegClass:$rd)), + (!cast(instruction_name#"_VV_"#vti.LMul.MX) + wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), (XLenVT srcvalue))), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue)), + (fneg wti.RegClass:$rd)), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} + +multiclass VPatWidenFPNegMulSacSDNode_VV_VF { + foreach vtiToWti = AllWidenableFloatVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), (XLenVT srcvalue)))), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue)), + wti.RegClass:$rd), + (!cast(instruction_name#"_VV_"#vti.LMul.MX) + wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), (XLenVT srcvalue))), + (fneg (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue)))), + wti.RegClass:$rd), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), (XLenVT srcvalue)))), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue)), + wti.RegClass:$rd), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} + +multiclass VPatMultiplyAddSDNode_VV_VX { + foreach vti = AllIntegerVectors in { + defvar suffix = vti.LMul.MX; + // NOTE: We choose VMADD because it has the most commuting freedom. So it + // works best with how TwoAddressInstructionPass tries commuting. + def : Pat<(vti.Vector (op vti.RegClass:$rs2, + (mul_oneuse vti.RegClass:$rs1, vti.RegClass:$rd))), + (!cast(instruction_name#"_VV_"# suffix) + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; + // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally + // commutable. + def : Pat<(vti.Vector (op vti.RegClass:$rs2, + (mul_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rd))), + (!cast(instruction_name#"_VX_" # suffix) + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -520,42 +677,45 @@ defm : VPatBinarySDNode_VV_VX_VI; foreach vti = AllIntegerVectors in { // Emit shift by 1 as an add since it might be faster. def : Pat<(shl (vti.Vector vti.RegClass:$rs1), - (vti.Vector (splat_vector (XLenVT 1)))), - (!cast("PseudoVADD_VV_"# vti.LMul.MX) - vti.RegClass:$rs1, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; -} -foreach vti = [VI64M1, VI64M2, VI64M4, VI64M8] in { - def : Pat<(shl (vti.Vector vti.RegClass:$rs1), - (vti.Vector (rv32_splat_i64 (XLenVT 1)))), + (vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), 1, (XLenVT srcvalue)))), (!cast("PseudoVADD_VV_"# vti.LMul.MX) vti.RegClass:$rs1, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; } // 12.8. Vector Integer Comparison Instructions -defm : VPatIntegerSetCCSDNode_VV_VX_VI; -defm : VPatIntegerSetCCSDNode_VV_VX_VI; - -defm : VPatIntegerSetCCSDNode_VV_VX; -defm : VPatIntegerSetCCSDNode_VV_VX; -defm : VPatIntegerSetCCSDNode_VIPlus1; +defm : VPatIntegerSetCCSDNode_VV<"PseudoVMSNE", SETNE>; + +defm : VPatIntegerSetCCSDNode_VV_Swappable<"PseudoVMSLT", SETLT, SETGT>; +defm : VPatIntegerSetCCSDNode_VV_Swappable<"PseudoVMSLTU", SETULT, SETUGT>; +defm : VPatIntegerSetCCSDNode_VV_Swappable<"PseudoVMSLE", SETLE, SETGE>; +defm : VPatIntegerSetCCSDNode_VV_Swappable<"PseudoVMSLEU", SETULE, SETUGE>; + +defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSEQ", SETEQ, SETEQ>; +defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSNE", SETNE, SETNE>; +defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSLT", SETLT, SETGT>; +defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSLTU", SETULT, SETUGT>; +defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSLE", SETLE, SETGE>; +defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSLEU", SETULE, SETUGE>; +defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSGT", SETGT, SETLT>; +defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSGTU", SETUGT, SETULT>; +// There is no VMSGE(U)_VX instruction + +defm : VPatIntegerSetCCSDNode_VI<"PseudoVMSEQ", SETEQ>; +defm : VPatIntegerSetCCSDNode_VI<"PseudoVMSNE", SETNE>; +defm : VPatIntegerSetCCSDNode_VI<"PseudoVMSLE", SETLE>; +defm : VPatIntegerSetCCSDNode_VI<"PseudoVMSLEU", SETULE>; +defm : VPatIntegerSetCCSDNode_VI<"PseudoVMSGT", SETGT>; +defm : VPatIntegerSetCCSDNode_VI<"PseudoVMSGTU", SETUGT>; + +defm : VPatIntegerSetCCSDNode_VIPlus1<"PseudoVMSLE", SETLT, SplatPat_simm5_plus1_nonzero>; -defm : VPatIntegerSetCCSDNode_VIPlus1; - -defm : VPatIntegerSetCCSDNode_VV; -defm : VPatIntegerSetCCSDNode_VV; -defm : VPatIntegerSetCCSDNode_VX_VI; -defm : VPatIntegerSetCCSDNode_VX_VI; - -defm : VPatIntegerSetCCSDNode_VV_VX_VI; -defm : VPatIntegerSetCCSDNode_VV_VX_VI; - -defm : VPatIntegerSetCCSDNode_VV; -defm : VPatIntegerSetCCSDNode_VV; -defm : VPatIntegerSetCCSDNode_VIPlus1; -defm : VPatIntegerSetCCSDNode_VIPlus1; // 12.9. Vector Integer Min/Max Instructions @@ -575,37 +735,23 @@ defm : VPatBinarySDNode_VV_VX; defm : VPatBinarySDNode_VV_VX; defm : VPatBinarySDNode_VV_VX; -// 12.13 Vector Single-Width Integer Multiply-Add Instructions. -foreach vti = AllIntegerVectors in { - // NOTE: We choose VMADD because it has the most commuting freedom. So it - // works best with how TwoAddressInstructionPass tries commuting. - defvar suffix = vti.LMul.MX; - def : Pat<(vti.Vector (add vti.RegClass:$rs2, - (mul_oneuse vti.RegClass:$rs1, vti.RegClass:$rd))), - (!cast("PseudoVMADD_VV_"# suffix) - vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (sub vti.RegClass:$rs2, - (mul_oneuse vti.RegClass:$rs1, vti.RegClass:$rd))), - (!cast("PseudoVNMSUB_VV_"# suffix) - vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; +// 12.12. Vector Widening Integer Multiply Instructions +defm : VPatWidenBinarySDNode_VV_VX; +defm : VPatWidenBinarySDNode_VV_VX; +defm : VPatWidenBinarySDNode_VV_VX; +defm : VPatWidenBinarySDNode_VV_VX; +defm : VPatWidenBinarySDNode_VV_VX; +defm : VPatWidenBinarySDNode_VV_VX; - // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally - // commutable. - def : Pat<(vti.Vector (add vti.RegClass:$rs2, - (mul_oneuse (SplatPat XLenVT:$rs1), - vti.RegClass:$rd))), - (!cast("PseudoVMADD_VX_" # suffix) - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (sub vti.RegClass:$rs2, - (mul_oneuse (SplatPat XLenVT:$rs1), - vti.RegClass:$rd))), - (!cast("PseudoVNMSUB_VX_" # suffix) - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; -} +// 12.13 Vector Single-Width Integer Multiply-Add Instructions. +defm : VPatMultiplyAddSDNode_VV_VX; +defm : VPatMultiplyAddSDNode_VV_VX; // 12.14 Vector Widening Integer Multiply-Add Instructions defm : VPatWidenMulAddSDNode_VV; @@ -725,41 +871,47 @@ foreach fvti = AllFloatVectors in { // The choice of VFMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally // commutable. - def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1), + def : Pat<(fvti.Vector (fma (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rd, fvti.RegClass:$rs2)), (!cast("PseudoVFMADD_V" # fvti.ScalarSuffix # "_" # suffix) fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1), + def : Pat<(fvti.Vector (fma (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))), (!cast("PseudoVFMSUB_V" # fvti.ScalarSuffix # "_" # suffix) fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1), + def : Pat<(fvti.Vector (fma (SplatFPOp fvti.ScalarRegClass:$rs1), (fneg fvti.RegClass:$rd), (fneg fvti.RegClass:$rs2))), (!cast("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # suffix) fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1), + def : Pat<(fvti.Vector (fma (SplatFPOp fvti.ScalarRegClass:$rs1), (fneg fvti.RegClass:$rd), fvti.RegClass:$rs2)), (!cast("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # suffix) fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>; // The splat might be negated. - def : Pat<(fvti.Vector (fma (fneg (splat_vector fvti.ScalarRegClass:$rs1)), + def : Pat<(fvti.Vector (fma (fneg (SplatFPOp fvti.ScalarRegClass:$rs1)), fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))), (!cast("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # suffix) fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(fvti.Vector (fma (fneg (splat_vector fvti.ScalarRegClass:$rs1)), + def : Pat<(fvti.Vector (fma (fneg (SplatFPOp fvti.ScalarRegClass:$rs1)), fvti.RegClass:$rd, fvti.RegClass:$rs2)), (!cast("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # suffix) fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>; } +// 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions +defm : VPatWidenFPMulAccSDNode_VV_VF<"PseudoVFWMACC">; +defm : VPatWidenFPNegMulAccSDNode_VV_VF<"PseudoVFWNMACC">; +defm : VPatWidenFPMulSacSDNode_VV_VF<"PseudoVFWMSAC">; +defm : VPatWidenFPNegMulSacSDNode_VV_VF<"PseudoVFWNMSAC">; + foreach vti = AllFloatVectors in { // 14.8. Vector Floating-Point Square-Root Instruction def : Pat<(fsqrt (vti.Vector vti.RegClass:$rs2)), @@ -780,7 +932,7 @@ foreach vti = AllFloatVectors in { (!cast("PseudoVFSGNJ_VV_"# vti.LMul.MX) vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), - (vti.Vector (splat_vector vti.ScalarRegClass:$rs2)))), + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))), (!cast("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW)>; @@ -789,7 +941,7 @@ foreach vti = AllFloatVectors in { (!cast("PseudoVFSGNJN_VV_"# vti.LMul.MX) vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), - (vti.Vector (fneg (splat_vector vti.ScalarRegClass:$rs2))))), + (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))), (!cast("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW)>; } @@ -822,7 +974,7 @@ foreach fvti = AllFloatVectors in { fvti.AVL, fvti.Log2SEW)>; def : Pat<(fvti.Vector (vselect (fvti.Mask V0), - (splat_vector fvti.ScalarRegClass:$rs1), + (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2)), (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) fvti.RegClass:$rs2, @@ -830,7 +982,7 @@ foreach fvti = AllFloatVectors in { (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; def : Pat<(fvti.Vector (vselect (fvti.Mask V0), - (splat_vector (fvti.Scalar fpimm0)), + (SplatFPOp (fvti.Scalar fpimm0)), fvti.RegClass:$rs2)), (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX) fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; @@ -847,13 +999,6 @@ defm : VPatWConvertFP2ISDNode_V; defm : VPatWConvertFP2ISDNode_V; defm : VPatWConvertI2FPSDNode_V; defm : VPatWConvertI2FPSDNode_V; -foreach fvtiToFWti = AllWidenableFloatVectors in { - defvar fvti = fvtiToFWti.Vti; - defvar fwti = fvtiToFWti.Wti; - def : Pat<(fwti.Vector (fpextend (fvti.Vector fvti.RegClass:$rs1))), - (!cast("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX) - fvti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW)>; -} // 14.19. Narrowing Floating-Point/Integer Type-Convert Instructions defm : VPatNConvertFP2ISDNode_V; @@ -873,25 +1018,14 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { // Vector Splats //===----------------------------------------------------------------------===// -let Predicates = [HasVInstructions] in { -foreach vti = AllIntegerVectors in { - def : Pat<(vti.Vector (SplatPat GPR:$rs1)), - (!cast("PseudoVMV_V_X_" # vti.LMul.MX) - GPR:$rs1, vti.AVL, vti.Log2SEW)>; - def : Pat<(vti.Vector (SplatPat_simm5 simm5:$rs1)), - (!cast("PseudoVMV_V_I_" # vti.LMul.MX) - simm5:$rs1, vti.AVL, vti.Log2SEW)>; -} -} // Predicates = [HasVInstructions] - let Predicates = [HasVInstructionsAnyF] in { foreach fvti = AllFloatVectors in { - def : Pat<(fvti.Vector (splat_vector fvti.ScalarRegClass:$rs1)), + def : Pat<(fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)), (!cast("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) (fvti.Scalar fvti.ScalarRegClass:$rs1), fvti.AVL, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (splat_vector (fvti.Scalar fpimm0))), + def : Pat<(fvti.Vector (SplatFPOp (fvti.Scalar fpimm0))), (!cast("PseudoVMV_V_I_"#fvti.LMul.MX) 0, fvti.AVL, fvti.Log2SEW)>; } @@ -902,6 +1036,13 @@ foreach fvti = AllFloatVectors in { //===----------------------------------------------------------------------===// let Predicates = [HasVInstructionsAnyF] in foreach vti = AllFloatVectors in { + // Fold store of vmv.f.s to a vse with VL=1. + defvar store_instr = !cast("PseudoVSE"#vti.SEW#"_V_"#vti.LMul.MX); + def : Pat<(store (vti.Scalar (int_riscv_vfmv_f_s (vti.Vector vti.RegClass:$rs2))), BaseAddr:$rs1), + (store_instr vti.RegClass:$rs2, BaseAddr:$rs1, 1, vti.Log2SEW)>; + def : Pat<(store (extractelt (vti.Vector vti.RegClass:$rs2), 0), BaseAddr:$rs1), + (store_instr vti.RegClass:$rs2, BaseAddr:$rs1, 1, vti.Log2SEW)>; + defvar vmv_f_s_inst = !cast(!strconcat("PseudoVFMV_", vti.ScalarSuffix, "_S_", vti.LMul.MX)); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index e71c498fd5f4..081f61617d59 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -8,8 +8,7 @@ /// /// This file contains the required infrastructure and VL patterns to /// support code generation for the standard 'V' (Vector) extension, version -/// 0.10. This version is still experimental as the 'V' extension hasn't been -/// ratified yet. +/// version 1.0. /// /// This file is included from and depends upon RISCVInstrInfoVPseudos.td /// @@ -22,11 +21,6 @@ // Helpers to define the VL patterns. //===----------------------------------------------------------------------===// -def SDT_RISCVVLE_VL : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>, - SDTCisVT<2, XLenVT>]>; -def SDT_RISCVVSE_VL : SDTypeProfile<0, 3, [SDTCisVec<0>, SDTCisPtrTy<1>, - SDTCisVT<2, XLenVT>]>; - def SDT_RISCVIntBinOp_VL : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVec<0>, SDTCisInt<0>, @@ -47,13 +41,15 @@ def SDT_RISCVFPBinOp_VL : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisVT<4, XLenVT>]>; def riscv_vmv_v_x_vl : SDNode<"RISCVISD::VMV_V_X_VL", - SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, - SDTCisVT<1, XLenVT>, - SDTCisVT<2, XLenVT>]>>; + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>, + SDTCisSameAs<0, 1>, + SDTCisVT<2, XLenVT>, + SDTCisVT<3, XLenVT>]>>; def riscv_vfmv_v_f_vl : SDNode<"RISCVISD::VFMV_V_F_VL", - SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisFP<0>, - SDTCisEltOfVec<1, 0>, - SDTCisVT<2, XLenVT>]>>; + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>, + SDTCisSameAs<0, 1>, + SDTCisEltOfVec<2, 0>, + SDTCisVT<3, XLenVT>]>>; def riscv_vmv_s_x_vl : SDNode<"RISCVISD::VMV_S_X_VL", SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, @@ -65,11 +61,6 @@ def riscv_vfmv_s_f_vl : SDNode<"RISCVISD::VFMV_S_F_VL", SDTCisEltOfVec<2, 0>, SDTCisVT<3, XLenVT>]>>; -def riscv_vle_vl : SDNode<"RISCVISD::VLE_VL", SDT_RISCVVLE_VL, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; -def riscv_vse_vl : SDNode<"RISCVISD::VSE_VL", SDT_RISCVVSE_VL, - [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; - def riscv_add_vl : SDNode<"RISCVISD::ADD_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_sub_vl : SDNode<"RISCVISD::SUB_VL", SDT_RISCVIntBinOp_VL>; def riscv_mul_vl : SDNode<"RISCVISD::MUL_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; @@ -113,7 +104,10 @@ def SDT_RISCVVecFMA_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>, SDTCisVT<5, XLenVT>]>; -def riscv_fma_vl : SDNode<"RISCVISD::FMA_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>; +def riscv_vfmadd_vl : SDNode<"RISCVISD::VFMADD_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>; +def riscv_vfnmadd_vl : SDNode<"RISCVISD::VFNMADD_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>; +def riscv_vfmsub_vl : SDNode<"RISCVISD::VFMSUB_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>; +def riscv_vfnmsub_vl : SDNode<"RISCVISD::VFNMSUB_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>; def SDT_RISCVFPRoundOp_VL : SDTypeProfile<1, 3, [ SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>, @@ -152,30 +146,33 @@ def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL", SDTCisVT<5, XLenVT>]>>; def riscv_vrgather_vx_vl : SDNode<"RISCVISD::VRGATHER_VX_VL", - SDTypeProfile<1, 4, [SDTCisVec<0>, + SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, XLenVT>, SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<0, 3>, - SDTCisVT<4, XLenVT>]>>; + SDTCisSameAs<0, 4>, + SDTCisVT<5, XLenVT>]>>; def riscv_vrgather_vv_vl : SDNode<"RISCVISD::VRGATHER_VV_VL", - SDTypeProfile<1, 4, [SDTCisVec<0>, + SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisSameNumEltsAs<0, 2>, SDTCisSameSizeAs<0, 2>, SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<0, 3>, - SDTCisVT<4, XLenVT>]>>; + SDTCisSameAs<0, 4>, + SDTCisVT<5, XLenVT>]>>; def riscv_vrgatherei16_vv_vl : SDNode<"RISCVISD::VRGATHEREI16_VV_VL", - SDTypeProfile<1, 4, [SDTCisVec<0>, + SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCVecEltisVT<2, i16>, SDTCisSameNumEltsAs<0, 2>, SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<0, 3>, - SDTCisVT<4, XLenVT>]>>; + SDTCisSameAs<0, 4>, + SDTCisVT<5, XLenVT>]>>; def SDT_RISCVSelect_VL : SDTypeProfile<1, 4, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<1, i1>, @@ -185,6 +182,11 @@ def SDT_RISCVSelect_VL : SDTypeProfile<1, 4, [ def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL", SDT_RISCVSelect_VL>; def riscv_vp_merge_vl : SDNode<"RISCVISD::VP_MERGE_VL", SDT_RISCVSelect_VL>; +def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i1>, + SDTCisVT<1, XLenVT>]>; +def riscv_vmclr_vl : SDNode<"RISCVISD::VMCLR_VL", SDT_RISCVVMSETCLR_VL>; +def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>; + def SDT_RISCVMaskBinOp_VL : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCVecEltisVT<0, i1>, @@ -229,7 +231,22 @@ def SDT_RISCVVWBinOp_VL : SDTypeProfile<1, 4, [SDTCisVec<0>, def riscv_vwmul_vl : SDNode<"RISCVISD::VWMUL_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>; def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>; def riscv_vwmulsu_vl : SDNode<"RISCVISD::VWMULSU_VL", SDT_RISCVVWBinOp_VL>; +def riscv_vwadd_vl : SDNode<"RISCVISD::VWADD_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>; def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>; +def riscv_vwsub_vl : SDNode<"RISCVISD::VWSUB_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>; +def riscv_vwsubu_vl : SDNode<"RISCVISD::VWSUBU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>; + +def SDT_RISCVVWBinOpW_VL : SDTypeProfile<1, 4, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameNumEltsAs<1, 2>, + SDTCisOpSmallerThanOp<2, 1>, + SDTCisSameNumEltsAs<1, 3>, + SDTCVecEltisVT<3, i1>, + SDTCisVT<4, XLenVT>]>; +def riscv_vwadd_w_vl : SDNode<"RISCVISD::VWADD_W_VL", SDT_RISCVVWBinOpW_VL>; +def riscv_vwaddu_w_vl : SDNode<"RISCVISD::VWADDU_W_VL", SDT_RISCVVWBinOpW_VL>; +def riscv_vwsub_w_vl : SDNode<"RISCVISD::VWSUB_W_VL", SDT_RISCVVWBinOpW_VL>; +def riscv_vwsubu_w_vl : SDNode<"RISCVISD::VWSUBU_W_VL", SDT_RISCVVWBinOpW_VL>; def SDTRVVVecReduce : SDTypeProfile<1, 5, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>, @@ -254,45 +271,69 @@ def riscv_vwmulu_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D), return N->hasOneUse(); }]>; +def riscv_vwmulsu_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D), + (riscv_vwmulsu_vl node:$A, node:$B, node:$C, + node:$D), [{ + return N->hasOneUse(); +}]>; + +def riscv_sext_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C), + (riscv_sext_vl node:$A, node:$B, node:$C), [{ + return N->hasOneUse(); +}]>; + +def riscv_zext_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C), + (riscv_zext_vl node:$A, node:$B, node:$C), [{ + return N->hasOneUse(); +}]>; + +def riscv_fpextend_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C), + (riscv_fpextend_vl node:$A, node:$B, node:$C), [{ + return N->hasOneUse(); +}]>; + foreach kind = ["ADD", "UMAX", "SMAX", "UMIN", "SMIN", "AND", "OR", "XOR", "FADD", "SEQ_FADD", "FMIN", "FMAX"] in def rvv_vecreduce_#kind#_vl : SDNode<"RISCVISD::VECREDUCE_"#kind#"_VL", SDTRVVVecReduce>; +// Give explicit Complexity to prefer simm5/uimm5. +def SplatPat : ComplexPattern; +def SplatPat_simm5 : ComplexPattern; +def SplatPat_uimm5 : ComplexPattern; +def SplatPat_simm5_plus1 + : ComplexPattern; +def SplatPat_simm5_plus1_nonzero + : ComplexPattern; + // Ignore the vl operand. def SplatFPOp : PatFrag<(ops node:$op), - (riscv_vfmv_v_f_vl node:$op, srcvalue)>; + (riscv_vfmv_v_f_vl undef, node:$op, srcvalue)>; def sew8simm5 : ComplexPattern", []>; def sew16simm5 : ComplexPattern", []>; def sew32simm5 : ComplexPattern", []>; def sew64simm5 : ComplexPattern", []>; -multiclass VPatBinaryVL_VV { - def : Pat<(result_type (vop - (op_type op_reg_class:$rs1), - (op_type op_reg_class:$rs2), - (mask_type true_mask), - VLOpFrag)), - (!cast(instruction_name#"_VV_"# vlmul.MX) - op_reg_class:$rs1, - op_reg_class:$rs2, - GPR:$vl, sew)>; +multiclass VPatBinaryVL_V { def : Pat<(result_type (vop - (op_type op_reg_class:$rs1), - (op_type op_reg_class:$rs2), + (op1_type op1_reg_class:$rs1), + (op2_type op2_reg_class:$rs2), (mask_type V0), VLOpFrag)), - (!cast(instruction_name#"_VV_"# vlmul.MX#"_MASK") + (!cast(instruction_name#"_"#suffix#"_"# vlmul.MX#"_MASK") (result_type (IMPLICIT_DEF)), - op_reg_class:$rs1, - op_reg_class:$rs2, + op1_reg_class:$rs1, + op2_reg_class:$rs2, (mask_type V0), GPR:$vl, sew, TAIL_AGNOSTIC)>; } @@ -300,7 +341,8 @@ multiclass VPatBinaryVL_XI { def : Pat<(result_type (vop - (vop_type vop_reg_class:$rs1), - (vop_type (SplatPatKind (XLenVT xop_kind:$rs2))), - (mask_type true_mask), - VLOpFrag)), - (!cast(instruction_name#_#suffix#_# vlmul.MX) - vop_reg_class:$rs1, - xop_kind:$rs2, - GPR:$vl, sew)>; - def : Pat<(result_type (vop - (vop_type vop_reg_class:$rs1), - (vop_type (SplatPatKind (XLenVT xop_kind:$rs2))), + (vop1_type vop_reg_class:$rs1), + (vop2_type (SplatPatKind (XLenVT xop_kind:$rs2))), (mask_type V0), VLOpFrag)), (!cast(instruction_name#_#suffix#_# vlmul.MX#"_MASK") @@ -330,12 +363,12 @@ multiclass VPatBinaryVL_XI { foreach vti = AllIntegerVectors in { - defm : VPatBinaryVL_VV; + defm : VPatBinaryVL_V; defm : VPatBinaryVL_XI; + vti.Vector, vti.Vector, vti.Vector, vti.Mask, + vti.Log2SEW, vti.LMul, vti.RegClass, SplatPat, GPR>; } } @@ -344,8 +377,8 @@ multiclass VPatBinaryVL_VV_VX_VI { foreach vti = AllIntegerVectors in { defm : VPatBinaryVL_XI(SplatPat#_#ImmType), ImmType>; } @@ -355,12 +388,26 @@ multiclass VPatBinaryWVL_VV_VX { foreach VtiToWti = AllWidenableIntVectors in { defvar vti = VtiToWti.Vti; defvar wti = VtiToWti.Wti; - defm : VPatBinaryVL_VV; + defm : VPatBinaryVL_V; defm : VPatBinaryVL_XI; + wti.Vector, vti.Vector, vti.Vector, vti.Mask, + vti.Log2SEW, vti.LMul, vti.RegClass, SplatPat, GPR>; + } +} +multiclass VPatBinaryWVL_VV_VX_WV_WX + : VPatBinaryWVL_VV_VX { + foreach VtiToWti = AllWidenableIntVectors in { + defvar vti = VtiToWti.Vti; + defvar wti = VtiToWti.Wti; + defm : VPatBinaryVL_V; + defm : VPatBinaryVL_XI; } } @@ -373,14 +420,6 @@ multiclass VPatBinaryVL_VF { - def : Pat<(result_type (vop (vop_type vop_reg_class:$rs1), - (vop_type (SplatFPOp scalar_reg_class:$rs2)), - (mask_type true_mask), - VLOpFrag)), - (!cast(instruction_name#"_"#vlmul.MX) - vop_reg_class:$rs1, - scalar_reg_class:$rs2, - GPR:$vl, sew)>; def : Pat<(result_type (vop (vop_type vop_reg_class:$rs1), (vop_type (SplatFPOp scalar_reg_class:$rs2)), (mask_type V0), @@ -394,9 +433,9 @@ multiclass VPatBinaryVL_VF { foreach vti = AllFloatVectors in { - defm : VPatBinaryVL_VV; + defm : VPatBinaryVL_V; defm : VPatBinaryVL_VF; @@ -405,13 +444,6 @@ multiclass VPatBinaryFPVL_VV_VF { multiclass VPatBinaryFPVL_R_VF { foreach fvti = AllFloatVectors in { - def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2), - fvti.RegClass:$rs1, - (fvti.Mask true_mask), - VLOpFrag)), - (!cast(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) - fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, - GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2), fvti.RegClass:$rs1, (fvti.Mask V0), @@ -427,65 +459,87 @@ multiclass VPatIntegerSetCCVL_VV { def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), vti.RegClass:$rs2, cc, - (vti.Mask true_mask), + (vti.Mask V0), VLOpFrag)), - (!cast(instruction_name#"_VV_"#vti.LMul.MX) - vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, - vti.Log2SEW)>; + (!cast(instruction_name#"_VV_"#vti.LMul.MX#"_MASK") + (vti.Mask (IMPLICIT_DEF)), + vti.RegClass:$rs1, + vti.RegClass:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; } // Inherits from VPatIntegerSetCCVL_VV and adds a pattern with operands swapped. multiclass VPatIntegerSetCCVL_VV_Swappable : - VPatIntegerSetCCVL_VV { + CondCode cc, CondCode invcc> + : VPatIntegerSetCCVL_VV { def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs2), vti.RegClass:$rs1, invcc, - (vti.Mask true_mask), + (vti.Mask V0), VLOpFrag)), - (!cast(instruction_name#"_VV_"#vti.LMul.MX) - vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, - vti.Log2SEW)>; + (!cast(instruction_name#"_VV_"#vti.LMul.MX#"_MASK") + (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; } multiclass VPatIntegerSetCCVL_VX_Swappable { - defvar instruction = !cast(instruction_name#"_VX_"#vti.LMul.MX); + defvar instruction_masked = !cast(instruction_name#"_VX_"#vti.LMul.MX#"_MASK"); def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), (SplatPat (XLenVT GPR:$rs2)), cc, - (vti.Mask true_mask), + (vti.Mask V0), VLOpFrag)), - (instruction vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; + (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat (XLenVT GPR:$rs2)), (vti.Vector vti.RegClass:$rs1), invcc, - (vti.Mask true_mask), + (vti.Mask V0), VLOpFrag)), - (instruction vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; + (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; } multiclass VPatIntegerSetCCVL_VI_Swappable { - defvar instruction = !cast(instruction_name#"_VI_"#vti.LMul.MX); + defvar instruction_masked = !cast(instruction_name#"_VI_"#vti.LMul.MX#"_MASK"); def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), (SplatPat_simm5 simm5:$rs2), cc, - (vti.Mask true_mask), + (vti.Mask V0), VLOpFrag)), - (instruction vti.RegClass:$rs1, XLenVT:$rs2, GPR:$vl, vti.Log2SEW)>; + (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + XLenVT:$rs2, (vti.Mask V0), GPR:$vl, + vti.Log2SEW)>; + + // FIXME: Can do some canonicalization to remove these patterns. def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat_simm5 simm5:$rs2), (vti.Vector vti.RegClass:$rs1), invcc, - (vti.Mask true_mask), + (vti.Mask V0), VLOpFrag)), - (instruction vti.RegClass:$rs1, simm5:$rs2, GPR:$vl, vti.Log2SEW)>; + (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + simm5:$rs2, (vti.Mask V0), GPR:$vl, + vti.Log2SEW)>; } -multiclass VPatIntegerSetCCVL_VIPlus1 { - defvar instruction = !cast(instruction_name#"_VI_"#vti.LMul.MX); +multiclass VPatIntegerSetCCVL_VIPlus1_Swappable { + defvar instruction_masked = !cast(instruction_name#"_VI_"#vti.LMul.MX#"_MASK"); def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), (splatpat_kind simm5:$rs2), cc, - (vti.Mask true_mask), + (vti.Mask V0), VLOpFrag)), - (instruction vti.RegClass:$rs1, (DecImm simm5:$rs2), - GPR:$vl, vti.Log2SEW)>; + (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + (DecImm simm5:$rs2), (vti.Mask V0), GPR:$vl, + vti.Log2SEW)>; + + // FIXME: Can do some canonicalization to remove these patterns. + def : Pat<(vti.Mask (riscv_setcc_vl (splatpat_kind simm5:$rs2), + (vti.Vector vti.RegClass:$rs1), invcc, + (vti.Mask V0), + VLOpFrag)), + (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + (DecImm simm5:$rs2), (vti.Mask V0), GPR:$vl, + vti.Log2SEW)>; } multiclass VPatFPSetCCVL_VV_VF_FV(inst_name#"_VV_"#fvti.LMul.MX) - fvti.RegClass:$rs1, fvti.RegClass:$rs2, GPR:$vl, fvti.Log2SEW)>; + (!cast(inst_name#"_VV_"#fvti.LMul.MX#"_MASK") + (fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1, + fvti.RegClass:$rs2, (fvti.Mask V0), + GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Mask (riscv_setcc_vl (fvti.Vector fvti.RegClass:$rs1), (SplatFPOp fvti.ScalarRegClass:$rs2), cc, - (fvti.Mask true_mask), + (fvti.Mask V0), VLOpFrag)), - (!cast(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) - fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, + (!cast(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK") + (fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1, + fvti.ScalarRegClass:$rs2, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Mask (riscv_setcc_vl (SplatFPOp fvti.ScalarRegClass:$rs2), (fvti.Vector fvti.RegClass:$rs1), cc, - (fvti.Mask true_mask), + (fvti.Mask V0), VLOpFrag)), - (!cast(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) - fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, + (!cast(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK") + (fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1, + fvti.ScalarRegClass:$rs2, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; } } @@ -524,9 +582,11 @@ multiclass VPatExtendSDNode_V_VL(inst_name#"_"#suffix#"_"#vti.LMul.MX) - fti.RegClass:$rs2, GPR:$vl, vti.Log2SEW)>; + (fti.Mask V0), VLOpFrag)), + (!cast(inst_name#"_"#suffix#"_"#vti.LMul.MX#"_MASK") + (vti.Vector (IMPLICIT_DEF)), + fti.RegClass:$rs2, + (fti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -534,10 +594,11 @@ multiclass VPatConvertFP2ISDNode_V_VL { foreach fvti = AllFloatVectors in { defvar ivti = GetIntVTypeInfo.Vti; def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask true_mask), + (fvti.Mask V0), VLOpFrag)), - (!cast(instruction_name#"_"#ivti.LMul.MX) - fvti.RegClass:$rs1, GPR:$vl, ivti.Log2SEW)>; + (!cast(instruction_name#"_"#ivti.LMul.MX#"_MASK") + (ivti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, + (fvti.Mask V0), GPR:$vl, ivti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -545,10 +606,11 @@ multiclass VPatConvertI2FPSDNode_V_VL { foreach fvti = AllFloatVectors in { defvar ivti = GetIntVTypeInfo.Vti; def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), - (ivti.Mask true_mask), + (ivti.Mask V0), VLOpFrag)), - (!cast(instruction_name#"_"#fvti.LMul.MX) - ivti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>; + (!cast(instruction_name#"_"#fvti.LMul.MX#"_MASK") + (fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, + (ivti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -557,10 +619,11 @@ multiclass VPatWConvertFP2ISDNode_V_VL { defvar fvti = fvtiToFWti.Vti; defvar iwti = GetIntVTypeInfo.Vti; def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask true_mask), + (fvti.Mask V0), VLOpFrag)), - (!cast(instruction_name#"_"#fvti.LMul.MX) - fvti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>; + (!cast(instruction_name#"_"#fvti.LMul.MX#"_MASK") + (iwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, + (fvti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -569,10 +632,11 @@ multiclass VPatWConvertI2FPSDNode_V_VL { defvar ivti = vtiToWti.Vti; defvar fwti = vtiToWti.Wti; def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), - (ivti.Mask true_mask), + (ivti.Mask V0), VLOpFrag)), - (!cast(instruction_name#"_"#ivti.LMul.MX) - ivti.RegClass:$rs1, GPR:$vl, ivti.Log2SEW)>; + (!cast(instruction_name#"_"#ivti.LMul.MX#"_MASK") + (fwti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, + (ivti.Mask V0), GPR:$vl, ivti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -581,10 +645,11 @@ multiclass VPatNConvertFP2ISDNode_V_VL { defvar vti = vtiToWti.Vti; defvar fwti = vtiToWti.Wti; def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask true_mask), + (fwti.Mask V0), VLOpFrag)), - (!cast(instruction_name#"_"#vti.LMul.MX) - fwti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>; + (!cast(instruction_name#"_"#vti.LMul.MX#"_MASK") + (vti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, + (fwti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -593,10 +658,11 @@ multiclass VPatNConvertI2FPSDNode_V_VL { defvar fvti = fvtiToFWti.Vti; defvar iwti = GetIntVTypeInfo.Vti; def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1), - (iwti.Mask true_mask), + (iwti.Mask V0), VLOpFrag)), - (!cast(instruction_name#"_"#fvti.LMul.MX) - iwti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>; + (!cast(instruction_name#"_"#fvti.LMul.MX#"_MASK") + (fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1, + (iwti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -622,45 +688,286 @@ multiclass VPatReductionVL { } } -multiclass VPatBinarySDNodeExt_V_WV { - foreach vti = AllWidenableIntVectors in { +multiclass VPatBinarySDNodeExt_V_WV_WX { + foreach vtiToWti = AllWidenableIntVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + def : Pat< + (vti.Vector + (riscv_trunc_vector_vl + (op (wti.Vector wti.RegClass:$rs2), + (wti.Vector (extop (vti.Vector vti.RegClass:$rs1)))), + (vti.Mask true_mask), + VLOpFrag)), + (!cast(instruction_name#"_WV_"#vti.LMul.MX) + wti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>; + def : Pat< + (vti.Vector + (riscv_trunc_vector_vl + (op (wti.Vector wti.RegClass:$rs2), + (wti.Vector (extop (vti.Vector (SplatPat GPR:$rs1))))), + (vti.Mask true_mask), + VLOpFrag)), + (!cast(instruction_name#"_WX_"#vti.LMul.MX) + wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW)>; + } +} + +multiclass VPatBinarySDNode_V_WV_WX_WI { + defm : VPatBinarySDNodeExt_V_WV_WX; + defm : VPatBinarySDNodeExt_V_WV_WX; + foreach vtiToWti = AllWidenableIntVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; def : Pat< - (vti.Vti.Vector + (vti.Vector (riscv_trunc_vector_vl - (op (vti.Wti.Vector vti.Wti.RegClass:$rs2), - (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))), - (riscv_vmset_vl VLMax), - VLMax)), - (!cast(instruction_name#"_WV_"#vti.Vti.LMul.MX) - vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1, - vti.Vti.AVL, vti.Vti.Log2SEW)>; + (op (wti.Vector wti.RegClass:$rs2), + (wti.Vector (SplatPat_uimm5 uimm5:$rs1))), (vti.Mask true_mask), + VLOpFrag)), + (!cast(instruction_name#"_WI_"#vti.LMul.MX) + wti.RegClass:$rs2, uimm5:$rs1, GPR:$vl, vti.Log2SEW)>; + } +} + +multiclass VPatWidenReductionVL { + foreach vtiToWti = !if(is_float, AllWidenableFloatVectors, AllWidenableIntVectors) in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + defvar wti_m1 = !cast(!if(is_float, "VF", "VI") # wti.SEW # "M1"); + def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), + (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))), + VR:$rs2, (vti.Mask true_mask), VLOpFrag)), + (!cast(instruction_name#"_VS_"#vti.LMul.MX) + (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), + (wti_m1.Vector VR:$rs2), GPR:$vl, vti.Log2SEW)>; + def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), + (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))), + VR:$rs2, (vti.Mask V0), VLOpFrag)), + (!cast(instruction_name#"_VS_"#vti.LMul.MX#"_MASK") + (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), + (wti_m1.Vector VR:$rs2), (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + } +} + +multiclass VPatWidenReductionVL_Ext_VL { + foreach vtiToWti = !if(is_float, AllWidenableFloatVectors, AllWidenableIntVectors) in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + defvar wti_m1 = !cast(!if(is_float, "VF", "VI") # wti.SEW # "M1"); + def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), + (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)), + VR:$rs2, (vti.Mask true_mask), VLOpFrag)), + (!cast(instruction_name#"_VS_"#vti.LMul.MX) + (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), + (wti_m1.Vector VR:$rs2), GPR:$vl, vti.Log2SEW)>; + def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), + (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)), + VR:$rs2, (vti.Mask V0), VLOpFrag)), + (!cast(instruction_name#"_VS_"#vti.LMul.MX#"_MASK") + (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), + (wti_m1.Vector VR:$rs2), (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; } } -multiclass VPatBinarySDNodeExt_V_WX { - foreach vti = AllWidenableIntVectors in { +multiclass VPatWidenBinaryFPVL_VV_VF { + foreach fvtiToFWti = AllWidenableFloatVectors in { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; + def : Pat<(fwti.Vector (op (fwti.Vector (extop (fvti.Vector fvti.RegClass:$rs2), + (fvti.Mask true_mask), VLOpFrag)), + (fwti.Vector (extop (fvti.Vector fvti.RegClass:$rs1), + (fvti.Mask true_mask), VLOpFrag)), + (fwti.Mask true_mask), VLOpFrag)), + (!cast(instruction_name#"_VV_"#fvti.LMul.MX) + fvti.RegClass:$rs2, fvti.RegClass:$rs1, + GPR:$vl, fvti.Log2SEW)>; + def : Pat<(fwti.Vector (op (fwti.Vector (extop (fvti.Vector fvti.RegClass:$rs2), + (fvti.Mask true_mask), VLOpFrag)), + (fwti.Vector (extop (fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)), + (fvti.Mask true_mask), VLOpFrag)), + (fwti.Mask true_mask), VLOpFrag)), + (!cast(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) + fvti.RegClass:$rs2, fvti.ScalarRegClass:$rs1, + GPR:$vl, fvti.Log2SEW)>; + } +} + +multiclass VPatWidenBinaryFPVL_WV_WF { + foreach fvtiToFWti = AllWidenableFloatVectors in { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; + def : Pat<(fwti.Vector (op (fwti.Vector fwti.RegClass:$rs2), + (fwti.Vector (extop (fvti.Vector fvti.RegClass:$rs1), + (fvti.Mask true_mask), VLOpFrag)), + (fwti.Mask true_mask), VLOpFrag)), + (!cast(instruction_name#"_WV_"#fvti.LMul.MX) + fwti.RegClass:$rs2, fvti.RegClass:$rs1, + GPR:$vl, fvti.Log2SEW)>; + def : Pat<(fwti.Vector (op (fwti.Vector fwti.RegClass:$rs2), + (fwti.Vector (extop (fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)), + (fvti.Mask true_mask), VLOpFrag)), + (fwti.Mask true_mask), VLOpFrag)), + (!cast(instruction_name#"_W"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) + fwti.RegClass:$rs2, fvti.ScalarRegClass:$rs1, + GPR:$vl, fvti.Log2SEW)>; + } +} + +multiclass VPatWidenBinaryFPVL_VV_VF_WV_WF { + defm : VPatWidenBinaryFPVL_VV_VF; + defm : VPatWidenBinaryFPVL_WV_WF; +} + +multiclass VPatNarrowShiftSplatExt_WX { + foreach vtiToWti = AllWidenableIntVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; def : Pat< - (vti.Vti.Vector + (vti.Vector (riscv_trunc_vector_vl - (op (vti.Wti.Vector vti.Wti.RegClass:$rs2), - (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))), - (riscv_vmset_vl VLMax), - VLMax)), - (!cast(instruction_name#"_WX_"#vti.Vti.LMul.MX) - vti.Wti.RegClass:$rs2, GPR:$rs1, - vti.Vti.AVL, vti.Vti.Log2SEW)>; + (op (wti.Vector wti.RegClass:$rs2), + (wti.Vector (extop (vti.Vector (SplatPat GPR:$rs1)), + (vti.Mask true_mask), VLOpFrag)), + (wti.Mask true_mask), VLOpFrag), + (vti.Mask true_mask), VLOpFrag)), + (!cast(instruction_name#"_WX_"#vti.LMul.MX) + wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW)>; + } +} + +multiclass VPatMultiplyAddVL_VV_VX { + foreach vti = AllIntegerVectors in { + defvar suffix = vti.LMul.MX; + // NOTE: We choose VMADD because it has the most commuting freedom. So it + // works best with how TwoAddressInstructionPass tries commuting. + def : Pat<(vti.Vector + (op vti.RegClass:$rs2, + (riscv_mul_vl_oneuse vti.RegClass:$rs1, + vti.RegClass:$rd, + (vti.Mask true_mask), VLOpFrag), + (vti.Mask true_mask), VLOpFrag)), + (!cast(instruction_name#"_VV_"# suffix) + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally + // commutable. + def : Pat<(vti.Vector + (op vti.RegClass:$rs2, + (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), + vti.RegClass:$rd, + (vti.Mask true_mask), VLOpFrag), + (vti.Mask true_mask), VLOpFrag)), + (!cast(instruction_name#"_VX_" # suffix) + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} + +multiclass VPatWidenMultiplyAddVL_VV_VX { + foreach vtiTowti = AllWidenableIntVectors in { + defvar vti = vtiTowti.Vti; + defvar wti = vtiTowti.Wti; + def : Pat<(wti.Vector + (riscv_add_vl wti.RegClass:$rd, + (op1 vti.RegClass:$rs1, + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag), + (vti.Mask true_mask), VLOpFrag)), + (!cast(instruction_name#"_VV_" # vti.LMul.MX) + wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(wti.Vector + (riscv_add_vl wti.RegClass:$rd, + (op1 (SplatPat XLenVT:$rs1), + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag), + (vti.Mask true_mask), VLOpFrag)), + (!cast(instruction_name#"_VX_" # vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } +multiclass VPatNarrowShiftSplat_WX_WI { + foreach vtiTowti = AllWidenableIntVectors in { + defvar vti = vtiTowti.Vti; + defvar wti = vtiTowti.Wti; + def : Pat<(vti.Vector (riscv_trunc_vector_vl + (wti.Vector (op wti.RegClass:$rs1, (SplatPat XLenVT:$rs2), + true_mask, VLOpFrag)), true_mask, VLOpFrag)), + (!cast(instruction_name#"_WX_"#vti.LMul.MX) + wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_trunc_vector_vl + (wti.Vector (op wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2), + true_mask, VLOpFrag)), true_mask, VLOpFrag)), + (!cast(instruction_name#"_WI_"#vti.LMul.MX) + wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW)>; + } +} -multiclass VPatBinarySDNode_V_WV { - defm : VPatBinarySDNodeExt_V_WV; - defm : VPatBinarySDNodeExt_V_WV; +multiclass VPatFPMulAddVL_VV_VF { + foreach vti = AllFloatVectors in { + defvar suffix = vti.LMul.MX; + def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd, + vti.RegClass:$rs2, (vti.Mask true_mask), + VLOpFrag)), + (!cast(instruction_name#"_VV_"# suffix) + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd, + vti.RegClass:$rs2, (vti.Mask V0), + VLOpFrag)), + (!cast(instruction_name#"_VV_"# suffix #"_MASK") + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + + def : Pat<(vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), + vti.RegClass:$rd, vti.RegClass:$rs2, + (vti.Mask true_mask), + VLOpFrag)), + (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix) + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), + vti.RegClass:$rd, vti.RegClass:$rs2, + (vti.Mask V0), + VLOpFrag)), + (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } } -multiclass VPatBinarySDNode_V_WX { - defm : VPatBinarySDNodeExt_V_WX; - defm : VPatBinarySDNodeExt_V_WX; +multiclass VPatWidenFPMulAccVL_VV_VF { + foreach vtiToWti = AllWidenableFloatVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + def : Pat<(vop + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), VLOpFrag)), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag)), + (wti.Vector wti.RegClass:$rd), (vti.Mask true_mask), + VLOpFrag), + (!cast(instruction_name#"_VV_"#vti.LMul.MX) + wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(vop + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), VLOpFrag)), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag)), + (wti.Vector wti.RegClass:$rd), (vti.Mask true_mask), + VLOpFrag), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } } //===----------------------------------------------------------------------===// @@ -669,51 +976,18 @@ multiclass VPatBinarySDNode_V_WX { let Predicates = [HasVInstructions] in { -// 7.4. Vector Unit-Stride Instructions -foreach vti = AllVectors in { - defvar load_instr = !cast("PseudoVLE"#vti.SEW#"_V_"#vti.LMul.MX); - defvar store_instr = !cast("PseudoVSE"#vti.SEW#"_V_"#vti.LMul.MX); - // Load - def : Pat<(vti.Vector (riscv_vle_vl BaseAddr:$rs1, VLOpFrag)), - (load_instr BaseAddr:$rs1, GPR:$vl, vti.Log2SEW)>; - // Store - def : Pat<(riscv_vse_vl (vti.Vector vti.RegClass:$rs2), BaseAddr:$rs1, - VLOpFrag), - (store_instr vti.RegClass:$rs2, BaseAddr:$rs1, GPR:$vl, vti.Log2SEW)>; -} - -foreach mti = AllMasks in { - defvar load_instr = !cast("PseudoVLM_V_"#mti.BX); - defvar store_instr = !cast("PseudoVSM_V_"#mti.BX); - def : Pat<(mti.Mask (riscv_vle_vl BaseAddr:$rs1, VLOpFrag)), - (load_instr BaseAddr:$rs1, GPR:$vl, mti.Log2SEW)>; - def : Pat<(riscv_vse_vl (mti.Mask VR:$rs2), BaseAddr:$rs1, - VLOpFrag), - (store_instr VR:$rs2, BaseAddr:$rs1, GPR:$vl, mti.Log2SEW)>; -} - // 12.1. Vector Single-Width Integer Add and Subtract defm : VPatBinaryVL_VV_VX_VI; defm : VPatBinaryVL_VV_VX; // Handle VRSUB specially since it's the only integer binary op with reversed // pattern operands foreach vti = AllIntegerVectors in { - def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))), - (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), - VLOpFrag), - (!cast("PseudoVRSUB_VX_"# vti.LMul.MX) - vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))), (vti.Vector vti.RegClass:$rs1), (vti.Mask V0), VLOpFrag), (!cast("PseudoVRSUB_VX_"# vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)), - (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), - VLOpFrag), - (!cast("PseudoVRSUB_VI_"# vti.LMul.MX) - vti.RegClass:$rs1, simm5:$rs2, GPR:$vl, vti.Log2SEW)>; def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)), (vti.Vector vti.RegClass:$rs1), (vti.Mask V0), VLOpFrag), @@ -723,7 +997,10 @@ foreach vti = AllIntegerVectors in { } // 12.2. Vector Widening Integer Add/Subtract -defm : VPatBinaryWVL_VV_VX; +defm : VPatBinaryWVL_VV_VX_WV_WX; +defm : VPatBinaryWVL_VV_VX_WV_WX; +defm : VPatBinaryWVL_VV_VX_WV_WX; +defm : VPatBinaryWVL_VV_VX_WV_WX; // 12.3. Vector Integer Extension defm : VPatExtendSDNode_V_VL; defm : VPatExtendSDNode_V_VL; + AllFractionableVF8IntVectors>; // 12.5. Vector Bitwise Logical Instructions defm : VPatBinaryVL_VV_VX_VI; @@ -752,7 +1029,7 @@ defm : VPatBinaryVL_VV_VX_VI; foreach vti = AllIntegerVectors in { // Emit shift by 1 as an add since it might be faster. def : Pat<(riscv_shl_vl (vti.Vector vti.RegClass:$rs1), - (riscv_vmv_v_x_vl 1, (XLenVT srcvalue)), + (riscv_vmv_v_x_vl (vti.Vector undef), 1, (XLenVT srcvalue)), (vti.Mask true_mask), VLOpFrag), (!cast("PseudoVADD_VV_"# vti.LMul.MX) @@ -760,49 +1037,25 @@ foreach vti = AllIntegerVectors in { } // 12.7. Vector Narrowing Integer Right Shift Instructions -defm : VPatBinarySDNode_V_WV; -defm : VPatBinarySDNode_V_WX; -defm : VPatBinarySDNode_V_WV; -defm : VPatBinarySDNode_V_WX; +defm : VPatBinarySDNode_V_WV_WX_WI; +defm : VPatBinarySDNode_V_WV_WX_WI; + +defm : VPatNarrowShiftSplat_WX_WI; +defm : VPatNarrowShiftSplat_WX_WI; +defm : VPatNarrowShiftSplatExt_WX; +defm : VPatNarrowShiftSplatExt_WX; +defm : VPatNarrowShiftSplatExt_WX; +defm : VPatNarrowShiftSplatExt_WX; foreach vtiTowti = AllWidenableIntVectors in { defvar vti = vtiTowti.Vti; defvar wti = vtiTowti.Wti; def : Pat<(vti.Vector (riscv_trunc_vector_vl (wti.Vector wti.RegClass:$rs1), - (vti.Mask true_mask), + (vti.Mask V0), VLOpFrag)), - (!cast("PseudoVNSRL_WX_"#vti.LMul.MX) - wti.RegClass:$rs1, X0, GPR:$vl, vti.Log2SEW)>; - - def : Pat<(vti.Vector - (riscv_trunc_vector_vl - (wti.Vector - (riscv_sra_vl wti.RegClass:$rs1, (SplatPat XLenVT:$rs2), - true_mask, VLOpFrag)), true_mask, VLOpFrag)), - (!cast("PseudoVNSRA_WX_"#vti.LMul.MX) - wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector - (riscv_trunc_vector_vl - (wti.Vector - (riscv_sra_vl wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2), - true_mask, VLOpFrag)), true_mask, VLOpFrag)), - (!cast("PseudoVNSRA_WI_"#vti.LMul.MX) - wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW)>; - - def : Pat<(vti.Vector - (riscv_trunc_vector_vl - (wti.Vector - (riscv_srl_vl wti.RegClass:$rs1, (SplatPat XLenVT:$rs2), - true_mask, VLOpFrag)), true_mask, VLOpFrag)), - (!cast("PseudoVNSRL_WX_"#vti.LMul.MX) - wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector - (riscv_trunc_vector_vl - (wti.Vector - (riscv_srl_vl wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2), - true_mask, VLOpFrag)), true_mask, VLOpFrag)), - (!cast("PseudoVNSRL_WI_"#vti.LMul.MX) - wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW)>; + (!cast("PseudoVNSRL_WX_"#vti.LMul.MX#"_MASK") + (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, X0, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } // 12.8. Vector Integer Comparison Instructions @@ -832,14 +1085,14 @@ foreach vti = AllIntegerVectors in { defm : VPatIntegerSetCCVL_VI_Swappable; defm : VPatIntegerSetCCVL_VI_Swappable; - defm : VPatIntegerSetCCVL_VIPlus1; - defm : VPatIntegerSetCCVL_VIPlus1; - defm : VPatIntegerSetCCVL_VIPlus1; - defm : VPatIntegerSetCCVL_VIPlus1; + defm : VPatIntegerSetCCVL_VIPlus1_Swappable; + defm : VPatIntegerSetCCVL_VIPlus1_Swappable; + defm : VPatIntegerSetCCVL_VIPlus1_Swappable; + defm : VPatIntegerSetCCVL_VIPlus1_Swappable; } // foreach vti = AllIntegerVectors // 12.9. Vector Integer Min/Max Instructions @@ -865,92 +1118,24 @@ defm : VPatBinaryWVL_VV_VX; defm : VPatBinaryWVL_VV_VX; // 12.13 Vector Single-Width Integer Multiply-Add Instructions -foreach vti = AllIntegerVectors in { - // NOTE: We choose VMADD because it has the most commuting freedom. So it - // works best with how TwoAddressInstructionPass tries commuting. - defvar suffix = vti.LMul.MX; - def : Pat<(vti.Vector - (riscv_add_vl vti.RegClass:$rs2, - (riscv_mul_vl_oneuse vti.RegClass:$rs1, - vti.RegClass:$rd, - (vti.Mask true_mask), VLOpFrag), - (vti.Mask true_mask), VLOpFrag)), - (!cast("PseudoVMADD_VV_"# suffix) - vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector - (riscv_sub_vl vti.RegClass:$rs2, - (riscv_mul_vl_oneuse vti.RegClass:$rs1, - vti.RegClass:$rd, - (vti.Mask true_mask), VLOpFrag), - (vti.Mask true_mask), VLOpFrag)), - (!cast("PseudoVNMSUB_VV_"# suffix) - vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - - // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally - // commutable. - def : Pat<(vti.Vector - (riscv_add_vl vti.RegClass:$rs2, - (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), - vti.RegClass:$rd, - (vti.Mask true_mask), VLOpFrag), - (vti.Mask true_mask), VLOpFrag)), - (!cast("PseudoVMADD_VX_" # suffix) - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector - (riscv_sub_vl vti.RegClass:$rs2, - (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), - vti.RegClass:$rd, - (vti.Mask true_mask), - VLOpFrag), - (vti.Mask true_mask), VLOpFrag)), - (!cast("PseudoVNMSUB_VX_" # suffix) - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; -} +defm : VPatMultiplyAddVL_VV_VX; +defm : VPatMultiplyAddVL_VV_VX; // 12.14. Vector Widening Integer Multiply-Add Instructions +defm : VPatWidenMultiplyAddVL_VV_VX; +defm : VPatWidenMultiplyAddVL_VV_VX; +defm : VPatWidenMultiplyAddVL_VV_VX; foreach vtiTowti = AllWidenableIntVectors in { defvar vti = vtiTowti.Vti; defvar wti = vtiTowti.Wti; def : Pat<(wti.Vector (riscv_add_vl wti.RegClass:$rd, - (riscv_vwmul_vl_oneuse vti.RegClass:$rs1, - (vti.Vector vti.RegClass:$rs2), - (vti.Mask true_mask), VLOpFrag), - (vti.Mask true_mask), VLOpFrag)), - (!cast("PseudoVWMACC_VV_" # vti.LMul.MX) - wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(wti.Vector - (riscv_add_vl wti.RegClass:$rd, - (riscv_vwmulu_vl_oneuse vti.RegClass:$rs1, - (vti.Vector vti.RegClass:$rs2), - (vti.Mask true_mask), VLOpFrag), + (riscv_vwmulsu_vl_oneuse (vti.Vector vti.RegClass:$rs1), + (SplatPat XLenVT:$rs2), + (vti.Mask true_mask), VLOpFrag), (vti.Mask true_mask), VLOpFrag)), - (!cast("PseudoVWMACCU_VV_" # vti.LMul.MX) - wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - - def : Pat<(wti.Vector - (riscv_add_vl wti.RegClass:$rd, - (riscv_vwmul_vl_oneuse (SplatPat XLenVT:$rs1), - (vti.Vector vti.RegClass:$rs2), - (vti.Mask true_mask), VLOpFrag), - (vti.Mask true_mask), VLOpFrag)), - (!cast("PseudoVWMACC_VX_" # vti.LMul.MX) - wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(wti.Vector - (riscv_add_vl wti.RegClass:$rd, - (riscv_vwmulu_vl_oneuse (SplatPat XLenVT:$rs1), - (vti.Vector vti.RegClass:$rs2), - (vti.Mask true_mask), VLOpFrag), - (vti.Mask true_mask), VLOpFrag)), - (!cast("PseudoVWMACCU_VX_" # vti.LMul.MX) - wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + (!cast("PseudoVWMACCUS_VX_" # vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } @@ -1005,14 +1190,21 @@ foreach vti = AllIntegerVectors in { // 12.16. Vector Integer Move Instructions foreach vti = AllIntegerVectors in { - def : Pat<(vti.Vector (riscv_vmv_v_x_vl GPR:$rs2, VLOpFrag)), + def : Pat<(vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), GPR:$rs2, VLOpFrag)), (!cast("PseudoVMV_V_X_"#vti.LMul.MX) $rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_vmv_v_x_vl vti.Vector:$passthru, GPR:$rs2, VLOpFrag)), + (!cast("PseudoVMV_V_X_"#vti.LMul.MX#"_TU") + $passthru, $rs2, GPR:$vl, vti.Log2SEW)>; defvar ImmPat = !cast("sew"#vti.SEW#"simm5"); - def : Pat<(vti.Vector (riscv_vmv_v_x_vl (ImmPat XLenVT:$imm5), + def : Pat<(vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), (ImmPat XLenVT:$imm5), VLOpFrag)), (!cast("PseudoVMV_V_I_"#vti.LMul.MX) XLenVT:$imm5, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_vmv_v_x_vl vti.Vector:$passthru, (ImmPat XLenVT:$imm5), + VLOpFrag)), + (!cast("PseudoVMV_V_I_"#vti.LMul.MX#"_TU") + $passthru, XLenVT:$imm5, GPR:$vl, vti.Log2SEW)>; } // 12.1. Vector Single-Width Saturating Add and Subtract @@ -1033,6 +1225,13 @@ defm : VPatReductionVL; defm : VPatReductionVL; defm : VPatReductionVL; defm : VPatReductionVL; + +// 15.2. Vector Widening Integer Reduction Instructions +defm : VPatWidenReductionVL; +defm : VPatWidenReductionVL; +defm : VPatWidenReductionVL_Ext_VL; +defm : VPatWidenReductionVL; +defm : VPatWidenReductionVL_Ext_VL; } // Predicates = [HasVInstructions] // 15.3. Vector Single-Width Floating-Point Reduction Instructions @@ -1041,6 +1240,12 @@ defm : VPatReductionVL; defm : VPatReductionVL; defm : VPatReductionVL; + +// 15.4. Vector Widening Floating-Point Reduction Instructions +defm : VPatWidenReductionVL; +defm : VPatWidenReductionVL_Ext_VL; +defm : VPatWidenReductionVL; +defm : VPatWidenReductionVL_Ext_VL; } // Predicates = [HasVInstructionsAnyF] let Predicates = [HasVInstructionsAnyF] in { @@ -1050,118 +1255,29 @@ defm : VPatBinaryFPVL_VV_VF; defm : VPatBinaryFPVL_VV_VF; defm : VPatBinaryFPVL_R_VF; +// 14.3. Vector Widening Floating-Point Add/Subtract Instructions +defm : VPatWidenBinaryFPVL_VV_VF_WV_WF; +defm : VPatWidenBinaryFPVL_VV_VF_WV_WF; + // 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions defm : VPatBinaryFPVL_VV_VF; defm : VPatBinaryFPVL_VV_VF; defm : VPatBinaryFPVL_R_VF; -// 14.6 Vector Single-Width Floating-Point Fused Multiply-Add Instructions. -foreach vti = AllFloatVectors in { - // NOTE: We choose VFMADD because it has the most commuting freedom. So it - // works best with how TwoAddressInstructionPass tries commuting. - defvar suffix = vti.LMul.MX; - def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd, - vti.RegClass:$rs2, (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVFMADD_VV_"# suffix) - vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd, - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag), - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVFMSUB_VV_"# suffix) - vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1, - (vti.Mask true_mask), - VLOpFrag), - vti.RegClass:$rd, - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag), - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVFNMADD_VV_"# suffix) - vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1, - (vti.Mask true_mask), - VLOpFrag), - vti.RegClass:$rd, vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVFNMSUB_VV_"# suffix) - vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; +// 14.5. Vector Widening Floating-Point Multiply Instructions +defm : VPatWidenBinaryFPVL_VV_VF; - // The choice of VFMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally - // commutable. - def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1), - vti.RegClass:$rd, vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVFMADD_V" # vti.ScalarSuffix # "_" # suffix) - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1), - vti.RegClass:$rd, - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag), - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVFMSUB_V" # vti.ScalarSuffix # "_" # suffix) - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1), - (riscv_fneg_vl vti.RegClass:$rd, - (vti.Mask true_mask), - VLOpFrag), - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag), - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix) - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1), - (riscv_fneg_vl vti.RegClass:$rd, - (vti.Mask true_mask), - VLOpFrag), - vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix) - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; +// 14.6 Vector Single-Width Floating-Point Fused Multiply-Add Instructions. +defm : VPatFPMulAddVL_VV_VF; +defm : VPatFPMulAddVL_VV_VF; +defm : VPatFPMulAddVL_VV_VF; +defm : VPatFPMulAddVL_VV_VF; - // The splat might be negated. - def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1), - (vti.Mask true_mask), - VLOpFrag), - vti.RegClass:$rd, - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag), - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix) - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1), - (vti.Mask true_mask), - VLOpFrag), - vti.RegClass:$rd, vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix) - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; -} +// 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions +defm : VPatWidenFPMulAccVL_VV_VF; +defm : VPatWidenFPMulAccVL_VV_VF; +defm : VPatWidenFPMulAccVL_VV_VF; +defm : VPatWidenFPMulAccVL_VV_VF; // 14.11. Vector Floating-Point MIN/MAX Instructions defm : VPatBinaryFPVL_VV_VF; @@ -1193,10 +1309,13 @@ foreach vti = AllFloatVectors in { (!cast("PseudoVFSGNJX_VV_"# vti.LMul.MX) vti.RegClass:$rs, vti.RegClass:$rs, GPR:$vl, vti.Log2SEW)>; // Handle fneg with VFSGNJN using the same input for both operands. - def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask true_mask), + def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask V0), VLOpFrag), - (!cast("PseudoVFSGNJN_VV_"# vti.LMul.MX) - vti.RegClass:$rs, vti.RegClass:$rs, GPR:$vl, vti.Log2SEW)>; + (!cast("PseudoVFSGNJN_VV_"# vti.LMul.MX #"_MASK") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, + vti.RegClass:$rs, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + TAIL_AGNOSTIC)>; + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2), (vti.Mask true_mask), @@ -1276,16 +1395,26 @@ foreach fvti = AllFloatVectors in { // 14.16. Vector Floating-Point Move Instruction // If we're splatting fpimm0, use vmv.v.x vd, x0. def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl - (fvti.Scalar (fpimm0)), VLOpFrag)), + (fvti.Vector undef), (fvti.Scalar (fpimm0)), VLOpFrag)), (!cast("PseudoVMV_V_I_"#fvti.LMul.MX) 0, GPR:$vl, fvti.Log2SEW)>; + def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl + fvti.Vector:$passthru, (fvti.Scalar (fpimm0)), VLOpFrag)), + (!cast("PseudoVMV_V_I_"#fvti.LMul.MX#"_TU") + $passthru, 0, GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl - (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)), + (fvti.Vector undef), (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)), (!cast("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) (fvti.Scalar fvti.ScalarRegClass:$rs2), GPR:$vl, fvti.Log2SEW)>; + def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl + fvti.Vector:$passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)), + (!cast("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" # + fvti.LMul.MX # "_TU") + $passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), + GPR:$vl, fvti.Log2SEW)>; // 14.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions defm : VPatConvertFP2ISDNode_V_VL; @@ -1302,10 +1431,11 @@ foreach fvti = AllFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; def : Pat<(fwti.Vector (riscv_fpextend_vl (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask true_mask), + (fvti.Mask V0), VLOpFrag)), - (!cast("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX) - fvti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>; + (!cast("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX#"_MASK") + (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, + (fvti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>; } // 14.19 Narrowing Floating-Point/Integer Type-Convert Instructions @@ -1317,16 +1447,18 @@ foreach fvti = AllFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; def : Pat<(fvti.Vector (riscv_fpround_vl (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask true_mask), + (fwti.Mask V0), VLOpFrag)), - (!cast("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX) - fwti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>; + (!cast("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_MASK") + (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, + (fwti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(fvti.Vector (riscv_fncvt_rod_vl (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask true_mask), + (fwti.Mask V0), VLOpFrag)), - (!cast("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX) - fwti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>; + (!cast("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_MASK") + (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, + (fwti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -1412,43 +1544,27 @@ foreach vti = AllIntegerVectors in { (!cast("PseudoVMV_S_X_"#vti.LMul.MX) vti.RegClass:$merge, (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2, - (vti.Vector vti.RegClass:$rs1), - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVRGATHER_VV_"# vti.LMul.MX) - vti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVRGATHER_VX_"# vti.LMul.MX) - vti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, uimm5:$imm, - (vti.Mask true_mask), + vti.RegClass:$rs1, + (vti.Mask V0), + vti.RegClass:$merge, VLOpFrag)), - (!cast("PseudoVRGATHER_VI_"# vti.LMul.MX) - vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.Log2SEW)>; - - def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0), - (riscv_vrgather_vv_vl - vti.RegClass:$rs2, - vti.RegClass:$rs1, - (vti.Mask true_mask), - VLOpFrag), - vti.RegClass:$merge, - VLOpFrag)), (!cast("PseudoVRGATHER_VV_"# vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - - def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0), - (riscv_vrgather_vx_vl - vti.RegClass:$rs2, - uimm5:$imm, - (vti.Mask true_mask), - VLOpFrag), - vti.RegClass:$merge, - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1, + (vti.Mask V0), + vti.RegClass:$merge, + VLOpFrag)), + (!cast("PseudoVRGATHER_VX_"# vti.LMul.MX#"_MASK") + vti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, + uimm5:$imm, + (vti.Mask V0), + vti.RegClass:$merge, + VLOpFrag)), (!cast("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$imm, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -1461,21 +1577,13 @@ foreach vti = AllIntegerVectors in { defvar emul_str = octuple_to_str.ret; defvar ivti = !cast("VI16" # emul_str); defvar inst = "PseudoVRGATHEREI16_VV_" # vti.LMul.MX # "_" # emul_str; - def : Pat<(vti.Vector (riscv_vrgatherei16_vv_vl vti.RegClass:$rs2, - (ivti.Vector ivti.RegClass:$rs1), - (vti.Mask true_mask), - VLOpFrag)), - (!cast(inst) - vti.RegClass:$rs2, ivti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>; - - def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0), - (riscv_vrgatherei16_vv_vl - vti.RegClass:$rs2, - (ivti.Vector ivti.RegClass:$rs1), - (vti.Mask true_mask), - VLOpFrag), - vti.RegClass:$merge, - VLOpFrag)), + + def : Pat<(vti.Vector + (riscv_vrgatherei16_vv_vl vti.RegClass:$rs2, + (ivti.Vector ivti.RegClass:$rs1), + (vti.Mask V0), + vti.RegClass:$merge, + VLOpFrag)), (!cast(inst#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, ivti.RegClass:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -1500,43 +1608,29 @@ foreach vti = AllFloatVectors in { vti.RegClass:$merge, (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>; defvar ivti = GetIntVTypeInfo.Vti; - def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2, - (ivti.Vector vti.RegClass:$rs1), - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVRGATHER_VV_"# vti.LMul.MX) - vti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVRGATHER_VX_"# vti.LMul.MX) - vti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, uimm5:$imm, - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVRGATHER_VI_"# vti.LMul.MX) - vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0), - (riscv_vrgather_vv_vl - vti.RegClass:$rs2, - (ivti.Vector vti.RegClass:$rs1), - (vti.Mask true_mask), - VLOpFrag), - vti.RegClass:$merge, - VLOpFrag)), + def : Pat<(vti.Vector + (riscv_vrgather_vv_vl vti.RegClass:$rs2, + (ivti.Vector vti.RegClass:$rs1), + (vti.Mask V0), + vti.RegClass:$merge, + VLOpFrag)), (!cast("PseudoVRGATHER_VV_"# vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - - def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0), - (riscv_vrgather_vx_vl - vti.RegClass:$rs2, - uimm5:$imm, - (vti.Mask true_mask), - VLOpFrag), - vti.RegClass:$merge, - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1, + (vti.Mask V0), + vti.RegClass:$merge, + VLOpFrag)), + (!cast("PseudoVRGATHER_VX_"# vti.LMul.MX#"_MASK") + vti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(vti.Vector + (riscv_vrgather_vx_vl vti.RegClass:$rs2, + uimm5:$imm, + (vti.Mask V0), + vti.RegClass:$merge, + VLOpFrag)), (!cast("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$imm, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -1548,21 +1642,13 @@ foreach vti = AllFloatVectors in { defvar emul_str = octuple_to_str.ret; defvar ivti = !cast("VI16" # emul_str); defvar inst = "PseudoVRGATHEREI16_VV_" # vti.LMul.MX # "_" # emul_str; - def : Pat<(vti.Vector (riscv_vrgatherei16_vv_vl vti.RegClass:$rs2, - (ivti.Vector ivti.RegClass:$rs1), - (vti.Mask true_mask), - VLOpFrag)), - (!cast(inst) - vti.RegClass:$rs2, ivti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>; - - def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0), - (riscv_vrgatherei16_vv_vl - vti.RegClass:$rs2, - (ivti.Vector ivti.RegClass:$rs1), - (vti.Mask true_mask), - VLOpFrag), - vti.RegClass:$merge, - VLOpFrag)), + + def : Pat<(vti.Vector + (riscv_vrgatherei16_vv_vl vti.RegClass:$rs2, + (ivti.Vector ivti.RegClass:$rs1), + (vti.Mask V0), + vti.RegClass:$merge, + VLOpFrag)), (!cast(inst#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, ivti.RegClass:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -1583,9 +1669,10 @@ def SDTRVVSlide : SDTypeProfile<1, 5, [ SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisVT<3, XLenVT>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>, SDTCisVT<5, XLenVT> ]>; -def SDTRVVSlide1 : SDTypeProfile<1, 4, [ - SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisInt<0>, SDTCisVT<2, XLenVT>, - SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<0, 3>, SDTCisVT<4, XLenVT> +def SDTRVVSlide1 : SDTypeProfile<1, 5, [ + SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisInt<0>, + SDTCisVT<3, XLenVT>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>, + SDTCisVT<5, XLenVT> ]>; def riscv_slideup_vl : SDNode<"RISCVISD::VSLIDEUP_VL", SDTRVVSlide, []>; @@ -1600,16 +1687,30 @@ foreach vti = AllIntegerVectors in { VLOpFrag)), (!cast("PseudoVID_V_"#vti.LMul.MX) GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rs1), + def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector undef), + (vti.Vector vti.RegClass:$rs1), GPR:$rs2, (vti.Mask true_mask), VLOpFrag)), (!cast("PseudoVSLIDE1UP_VX_"#vti.LMul.MX) vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rs1), + def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rd), + (vti.Vector vti.RegClass:$rs1), GPR:$rs2, (vti.Mask true_mask), VLOpFrag)), + (!cast("PseudoVSLIDE1UP_VX_"#vti.LMul.MX#"_TU") + vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector undef), + (vti.Vector vti.RegClass:$rs1), + GPR:$rs2, (vti.Mask true_mask), + VLOpFrag)), (!cast("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX) vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rd), + (vti.Vector vti.RegClass:$rs1), + GPR:$rs2, (vti.Mask true_mask), + VLOpFrag)), + (!cast("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX#"_TU") + vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; } foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in { @@ -1619,7 +1720,7 @@ foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in { VLOpFrag)), (!cast("PseudoVSLIDEUP_VI_"#vti.LMul.MX) vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2, - GPR:$vl, vti.Log2SEW)>; + GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>; def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3), (vti.Vector vti.RegClass:$rs1), @@ -1627,7 +1728,7 @@ foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in { VLOpFrag)), (!cast("PseudoVSLIDEUP_VX_"#vti.LMul.MX) vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2, - GPR:$vl, vti.Log2SEW)>; + GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>; def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3), (vti.Vector vti.RegClass:$rs1), @@ -1635,7 +1736,14 @@ foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in { VLOpFrag)), (!cast("PseudoVSLIDEDOWN_VI_"#vti.LMul.MX) vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2, - GPR:$vl, vti.Log2SEW)>; + GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>; + def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector undef), + (vti.Vector vti.RegClass:$rs1), + uimm5:$rs2, (vti.Mask true_mask), + VLOpFrag)), + (!cast("PseudoVSLIDEDOWN_VI_"#vti.LMul.MX) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, uimm5:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3), (vti.Vector vti.RegClass:$rs1), @@ -1643,7 +1751,14 @@ foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in { VLOpFrag)), (!cast("PseudoVSLIDEDOWN_VX_"#vti.LMul.MX) vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2, - GPR:$vl, vti.Log2SEW)>; + GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>; + def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector undef), + (vti.Vector vti.RegClass:$rs1), + GPR:$rs2, (vti.Mask true_mask), + VLOpFrag)), + (!cast("PseudoVSLIDEDOWN_VX_"#vti.LMul.MX) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } // Predicates = [HasVInstructions] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 07884d35f63c..9532d1dd3dd2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -211,15 +211,16 @@ def CSImm12MulBy4 : PatLeaf<(imm), [{ return false; int64_t C = N->getSExtValue(); // Skip if C is simm12 or can be optimized by the PatLeaf AddiPair. - return !isInt<13>(C) && isInt<14>(C) && (C & 3) == 0; + return !isInt<13>(C) && isShiftedInt<12, 2>(C); }]>; def CSImm12MulBy8 : PatLeaf<(imm), [{ if (!N->hasOneUse()) return false; int64_t C = N->getSExtValue(); - // Skip if C is simm12 or can be optimized by the PatLeaf AddiPair. - return !isInt<13>(C) && isInt<15>(C) && (C & 7) == 0; + // Skip if C is simm12 or can be optimized by the PatLeaf AddiPair or + // CSImm12MulBy4. + return !isInt<14>(C) && isShiftedInt<12, 3>(C); }]>; def SimmShiftRightBy2XForm : SDNodeXFormgetValueType(0)); }]>; +// Pattern to exclude simm12 immediates from matching. +def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{ + auto *C = dyn_cast(N); + return !C || !isInt<12>(C->getSExtValue()); +}]>; + //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// @@ -348,7 +355,7 @@ def SH2ADD_UW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">, Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>; def SH3ADD_UW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">, Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>; -} // Predicates = [HasStdExtZbb, IsRV64] +} // Predicates = [HasStdExtZba, IsRV64] let Predicates = [HasStdExtZbbOrZbpOrZbkb] in { def ROL : ALU_rr<0b0110000, 0b001, "rol">, @@ -368,7 +375,7 @@ def RORW : ALUW_rr<0b0110000, 0b101, "rorw">, def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">, Sched<[WriteRotateImm32, ReadRotateImm32]>; -} // Predicates = [HasStdExtZbbOrZbp, IsRV64] +} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] let Predicates = [HasStdExtZbs] in { def BCLR : ALU_rr<0b0100100, 0b001, "bclr">, @@ -391,32 +398,48 @@ def BEXTI : RVBShift_ri<0b01001, 0b101, OPC_OP_IMM, "bexti">, } // Predicates = [HasStdExtZbs] let Predicates = [HasStdExtZbp] in { -def GORC : ALU_rr<0b0010100, 0b101, "gorc">, Sched<[]>; -def GREV : ALU_rr<0b0110100, 0b101, "grev">, Sched<[]>; - -def GREVI : RVBShift_ri<0b01101, 0b101, OPC_OP_IMM, "grevi">, Sched<[]>; -def GORCI : RVBShift_ri<0b00101, 0b101, OPC_OP_IMM, "gorci">, Sched<[]>; - -def SHFL : ALU_rr<0b0000100, 0b001, "shfl">, Sched<[]>; -def UNSHFL : ALU_rr<0b0000100, 0b101, "unshfl">, Sched<[]>; - -def SHFLI : RVBShfl_ri<0b0000100, 0b001, OPC_OP_IMM, "shfli">, Sched<[]>; -def UNSHFLI : RVBShfl_ri<0b0000100, 0b101, OPC_OP_IMM, "unshfli">, Sched<[]>; - -def XPERM_H : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>; +def GORC : ALU_rr<0b0010100, 0b101, "gorc">, + Sched<[WriteORC, ReadORC, ReadORC]>; +def GREV : ALU_rr<0b0110100, 0b101, "grev">, + Sched<[WriteREV, ReadREV, ReadREV]>; + +def GREVI : RVBShift_ri<0b01101, 0b101, OPC_OP_IMM, "grevi">, + Sched<[WriteREVImm, ReadREVImm]>; +def GORCI : RVBShift_ri<0b00101, 0b101, OPC_OP_IMM, "gorci">, + Sched<[WriteORCImm, ReadORCImm]>; + +def SHFL : ALU_rr<0b0000100, 0b001, "shfl">, + Sched<[WriteSHFL, ReadSHFL, ReadSHFL]>; +def UNSHFL : ALU_rr<0b0000100, 0b101, "unshfl">, + Sched<[WriteUNSHFL, ReadUNSHFL, ReadUNSHFL]>; + +def SHFLI : RVBShfl_ri<0b0000100, 0b001, OPC_OP_IMM, "shfli">, + Sched<[WriteSHFLImm, ReadSHFLImm]>; +def UNSHFLI : RVBShfl_ri<0b0000100, 0b101, OPC_OP_IMM, "unshfli">, + Sched<[WriteUNSHFLImm, ReadUNSHFLImm]>; + +def XPERM_H : ALU_rr<0b0010100, 0b110, "xperm.h">, + Sched<[WriteXPERMH, ReadXPERMH, ReadXPERMH]>; } // Predicates = [HasStdExtZbp] let Predicates = [HasStdExtZbp, IsRV64] in { -def GORCW : ALUW_rr<0b0010100, 0b101, "gorcw">, Sched<[]>; -def GREVW : ALUW_rr<0b0110100, 0b101, "grevw">, Sched<[]>; - -def GORCIW : RVBShiftW_ri<0b0010100, 0b101, OPC_OP_IMM_32, "gorciw">, Sched<[]>; -def GREVIW : RVBShiftW_ri<0b0110100, 0b101, OPC_OP_IMM_32, "greviw">, Sched<[]>; - -def SHFLW : ALUW_rr<0b0000100, 0b001, "shflw">, Sched<[]>; -def UNSHFLW : ALUW_rr<0b0000100, 0b101, "unshflw">, Sched<[]>; - -def XPERM_W : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>; +def GORCW : ALUW_rr<0b0010100, 0b101, "gorcw">, + Sched<[WriteORC32, ReadORC32, ReadORC32]>; +def GREVW : ALUW_rr<0b0110100, 0b101, "grevw">, + Sched<[WriteREV32, ReadREV32, ReadREV32]>; + +def GORCIW : RVBShiftW_ri<0b0010100, 0b101, OPC_OP_IMM_32, "gorciw">, + Sched<[WriteREVImm32, ReadREVImm32]>; +def GREVIW : RVBShiftW_ri<0b0110100, 0b101, OPC_OP_IMM_32, "greviw">, + Sched<[WriteORCImm32, ReadORCImm32]>; + +def SHFLW : ALUW_rr<0b0000100, 0b001, "shflw">, + Sched<[WriteSHFL32, ReadSHFL32, ReadSHFL32]>; +def UNSHFLW : ALUW_rr<0b0000100, 0b101, "unshflw">, + Sched<[WriteUNSHFL32, ReadUNSHFL32, ReadUNSHFL32]>; + +def XPERM_W : ALU_rr<0b0010100, 0b000, "xperm.w">, + Sched<[WriteXPERMW, ReadXPERMW, ReadXPERMW]>; } // Predicates = [HasStdExtZbp, IsRV64] // These instructions were named xperm.n and xperm.b in the last version of @@ -429,24 +452,28 @@ def XPERM8 : ALU_rr<0b0010100, 0b100, "xperm8">, Sched<[]>; let Predicates = [HasStdExtZbt] in { def CMIX : RVBTernaryR<0b11, 0b001, OPC_OP, "cmix", "$rd, $rs2, $rs1, $rs3">, - Sched<[]>; + Sched<[WriteCMix, ReadCMix, ReadCMix, ReadCMix]>; def CMOV : RVBTernaryR<0b11, 0b101, OPC_OP, "cmov", "$rd, $rs2, $rs1, $rs3">, - Sched<[]>; + Sched<[WriteCMov, ReadCMov, ReadCMov, ReadCMov]>; def FSL : RVBTernaryR<0b10, 0b001, OPC_OP, "fsl", "$rd, $rs1, $rs3, $rs2">, - Sched<[]>; + Sched<[WriteFSReg, ReadFSReg, ReadFSReg, ReadFSReg]>; def FSR : RVBTernaryR<0b10, 0b101, OPC_OP, "fsr", "$rd, $rs1, $rs3, $rs2">, - Sched<[]>; + Sched<[WriteFSReg, ReadFSReg, ReadFSReg, ReadFSReg]>; def FSRI : RVBTernaryImm6<0b101, OPC_OP_IMM, "fsri", - "$rd, $rs1, $rs3, $shamt">, Sched<[]>; + "$rd, $rs1, $rs3, $shamt">, + Sched<[WriteFSRImm, ReadFSRImm, ReadFSRImm]>; } // Predicates = [HasStdExtZbt] let Predicates = [HasStdExtZbt, IsRV64] in { def FSLW : RVBTernaryR<0b10, 0b001, OPC_OP_32, - "fslw", "$rd, $rs1, $rs3, $rs2">, Sched<[]>; + "fslw", "$rd, $rs1, $rs3, $rs2">, + Sched<[WriteFSReg32, ReadFSReg32, ReadFSReg32, ReadFSReg32]>; def FSRW : RVBTernaryR<0b10, 0b101, OPC_OP_32, "fsrw", - "$rd, $rs1, $rs3, $rs2">, Sched<[]>; + "$rd, $rs1, $rs3, $rs2">, + Sched<[WriteFSReg32, ReadFSReg32, ReadFSReg32, ReadFSReg32]>; def FSRIW : RVBTernaryImm5<0b10, 0b101, OPC_OP_IMM_32, - "fsriw", "$rd, $rs1, $rs3, $shamt">, Sched<[]>; + "fsriw", "$rd, $rs1, $rs3, $shamt">, + Sched<[WriteFSRImm32, ReadFSRImm32, ReadFSRImm32]>; } // Predicates = [HasStdExtZbt, IsRV64] let Predicates = [HasStdExtZbb] in { @@ -476,88 +503,96 @@ def SEXT_H : RVBUnary<0b0110000, 0b00101, 0b001, OPC_OP_IMM, "sext.h">, let Predicates = [HasStdExtZbr] in { def CRC32_B : RVBUnary<0b0110000, 0b10000, 0b001, OPC_OP_IMM, "crc32.b">, - Sched<[]>; + Sched<[WriteCRCB, ReadCRCB]>; def CRC32_H : RVBUnary<0b0110000, 0b10001, 0b001, OPC_OP_IMM, "crc32.h">, - Sched<[]>; + Sched<[WriteCRCH, ReadCRCH]>; def CRC32_W : RVBUnary<0b0110000, 0b10010, 0b001, OPC_OP_IMM, "crc32.w">, - Sched<[]>; + Sched<[WriteCRCW, ReadCRCW]>; def CRC32C_B : RVBUnary<0b0110000, 0b11000, 0b001, OPC_OP_IMM, "crc32c.b">, - Sched<[]>; + Sched<[WriteCRCCB, ReadCRCCB]>; def CRC32C_H : RVBUnary<0b0110000, 0b11001, 0b001, OPC_OP_IMM, "crc32c.h">, - Sched<[]>; + Sched<[WriteCRCCH, ReadCRCCH]>; def CRC32C_W : RVBUnary<0b0110000, 0b11010, 0b001, OPC_OP_IMM, "crc32c.w">, - Sched<[]>; + Sched<[WriteCRCCW, ReadCRCCW]>; } // Predicates = [HasStdExtZbr] let Predicates = [HasStdExtZbr, IsRV64] in { def CRC32_D : RVBUnary<0b0110000, 0b10011, 0b001, OPC_OP_IMM, "crc32.d">, - Sched<[]>; + Sched<[WriteCRCD, ReadCRCD]>; def CRC32C_D : RVBUnary<0b0110000, 0b11011, 0b001, OPC_OP_IMM, "crc32c.d">, - Sched<[]>; + Sched<[WriteCRCCD, ReadCRCCD]>; } // Predicates = [HasStdExtZbr, IsRV64] let Predicates = [HasStdExtZbc] in { -def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr">, +def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr", /*Commutable*/1>, Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>; } // Predicates = [HasStdExtZbc] let Predicates = [HasStdExtZbcOrZbkc] in { -def CLMUL : ALU_rr<0b0000101, 0b001, "clmul">, +def CLMUL : ALU_rr<0b0000101, 0b001, "clmul", /*Commutable*/1>, Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>; -def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh">, +def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh", /*Commutable*/1>, Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>; } // Predicates = [HasStdExtZbcOrZbkc] let Predicates = [HasStdExtZbb] in { -def MIN : ALU_rr<0b0000101, 0b100, "min">, +def MIN : ALU_rr<0b0000101, 0b100, "min", /*Commutable*/1>, Sched<[WriteIALU, ReadIALU, ReadIALU]>; -def MINU : ALU_rr<0b0000101, 0b101, "minu">, +def MINU : ALU_rr<0b0000101, 0b101, "minu", /*Commutable*/1>, Sched<[WriteIALU, ReadIALU, ReadIALU]>; -def MAX : ALU_rr<0b0000101, 0b110, "max">, +def MAX : ALU_rr<0b0000101, 0b110, "max", /*Commutable*/1>, Sched<[WriteIALU, ReadIALU, ReadIALU]>; -def MAXU : ALU_rr<0b0000101, 0b111, "maxu">, +def MAXU : ALU_rr<0b0000101, 0b111, "maxu", /*Commutable*/1>, Sched<[WriteIALU, ReadIALU, ReadIALU]>; } // Predicates = [HasStdExtZbb] -let Predicates = [HasStdExtZbp] in { -} // Predicates = [HasStdExtZbp] - let Predicates = [HasStdExtZbe] in { // NOTE: These mnemonics are from the 0.94 spec. There is a name conflict with // bext in the 0.93 spec. -def BDECOMPRESS : ALU_rr<0b0100100, 0b110, "bdecompress">, Sched<[]>; -def BCOMPRESS : ALU_rr<0b0000100, 0b110, "bcompress">, Sched<[]>; +def BDECOMPRESS : ALU_rr<0b0100100, 0b110, "bdecompress">, + Sched<[WriteDecompress, ReadDecompress, ReadDecompress]>; +def BCOMPRESS : ALU_rr<0b0000100, 0b110, "bcompress">, + Sched<[WriteCompress, ReadCompress, ReadCompress]>; } // Predicates = [HasStdExtZbe] let Predicates = [HasStdExtZbe, IsRV64] in { // NOTE: These mnemonics are from the 0.94 spec. There is a name conflict with // bextw in the 0.93 spec. -def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">, Sched<[]>; -def BCOMPRESSW : ALUW_rr<0b0000100, 0b110, "bcompressw">, Sched<[]>; +def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">, + Sched<[WriteDecompress32, ReadDecompress32, ReadDecompress32]>; +def BCOMPRESSW : ALUW_rr<0b0000100, 0b110, "bcompressw">, + Sched<[WriteCompress32, ReadCompress32, ReadCompress32]>; } // Predicates = [HasStdExtZbe, IsRV64] let Predicates = [HasStdExtZbpOrZbkb] in { -def PACK : ALU_rr<0b0000100, 0b100, "pack">, Sched<[]>; -def PACKH : ALU_rr<0b0000100, 0b111, "packh">, Sched<[]>; +def PACK : ALU_rr<0b0000100, 0b100, "pack">, + Sched<[WritePACK, ReadPACK, ReadPACK]>; +def PACKH : ALU_rr<0b0000100, 0b111, "packh">, + Sched<[WritePACK, ReadPACK, ReadPACK]>; } // Predicates = [HasStdExtZbpOrZbkb] let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in -def PACKW : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[]>; +def PACKW : ALUW_rr<0b0000100, 0b100, "packw">, + Sched<[WritePACK32, ReadPACK32, ReadPACK32]>; let Predicates = [HasStdExtZbp] in -def PACKU : ALU_rr<0b0100100, 0b100, "packu">, Sched<[]>; +def PACKU : ALU_rr<0b0100100, 0b100, "packu">, + Sched<[WritePACKU, ReadPACKU, ReadPACKU]>; let Predicates = [HasStdExtZbp, IsRV64] in -def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, Sched<[]>; +def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, + Sched<[WritePACKU32, ReadPACKU32, ReadPACKU32]>; let Predicates = [HasStdExtZbm, IsRV64] in { def BMATFLIP : RVBUnary<0b0110000, 0b00011, 0b001, OPC_OP_IMM, "bmatflip">, - Sched<[]>; + Sched<[WriteBMatrix, ReadBMatrix]>; -def BMATOR : ALU_rr<0b0000100, 0b011, "bmator">, Sched<[]>; -def BMATXOR : ALU_rr<0b0100100, 0b011, "bmatxor">, Sched<[]>; +def BMATOR : ALU_rr<0b0000100, 0b011, "bmator">, + Sched<[WriteBMatrix, ReadBMatrix, ReadBMatrix]>; +def BMATXOR : ALU_rr<0b0100100, 0b011, "bmatxor">, + Sched<[WriteBMatrix, ReadBMatrix, ReadBMatrix]>; } // Predicates = [HasStdExtZbm, IsRV64] let Predicates = [HasStdExtZbf] in @@ -601,12 +636,15 @@ def ORC_B : RVBUnary<0b0010100, 0b00111, 0b101, OPC_OP_IMM, "orc.b">, } // Predicates = [HasStdExtZbbOrZbp] let Predicates = [HasStdExtZbpOrZbkb] in -def BREV8 : RVBUnary<0b0110100, 0b00111, 0b101, OPC_OP_IMM, "brev8">; +def BREV8 : RVBUnary<0b0110100, 0b00111, 0b101, OPC_OP_IMM, "brev8">, + Sched<[]>; let Predicates = [HasStdExtZbpOrZbkb, IsRV32] in { -def ZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b001, OPC_OP_IMM, "zip">; -def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">; -} // Predicates = [HasStdExtZbkb, IsRV32] +def ZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b001, OPC_OP_IMM, "zip">, + Sched<[]>; +def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">, + Sched<[]>; +} // Predicates = [HasStdExtZbpOrZbkb, IsRV32] //===----------------------------------------------------------------------===// @@ -615,7 +653,7 @@ def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">; let Predicates = [HasStdExtZba, IsRV64] in { def : InstAlias<"zext.w $rd, $rs", (ADD_UW GPR:$rd, GPR:$rs, X0)>; -} +} // Predicates = [HasStdExtZba, IsRV64] let Predicates = [HasStdExtZbp] in { def : InstAlias<"rev.p $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00001)>; @@ -780,8 +818,8 @@ def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZbbOrZbpOrZbkb] let Predicates = [HasStdExtZbbOrZbpOrZbkb] in { -def : PatGprGpr; -def : PatGprGpr; +def : PatGprGpr, ROL>; +def : PatGprGpr, ROR>; def : PatGprImm; // There's no encoding for roli in the the 'B' extension as it can be @@ -791,8 +829,8 @@ def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt), } // Predicates = [HasStdExtZbbOrZbpOrZbkb] let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in { -def : PatGprGpr; -def : PatGprGpr; +def : PatGprGpr, ROLW>; +def : PatGprGpr, RORW>; def : PatGprImm; def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2), (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>; @@ -843,23 +881,25 @@ def : Pat<(and GPR:$r, BCLRITwoBitsMask:$i), def : Pat<(and GPR:$r, BCLRIANDIMask:$i), (BCLRI (ANDI GPR:$r, (BCLRIANDIMaskLow BCLRIANDIMask:$i)), (BCLRITwoBitsMaskHigh BCLRIANDIMask:$i))>; -} +} // Predicates = [HasStdExtZbs] let Predicates = [HasStdExtZbbOrZbp] in { // We treat orc.b as a separate instruction, so match it directly. We also // lower the Zbb orc.b intrinsic to this. def : Pat<(riscv_gorc GPR:$rs1, 7), (ORC_B GPR:$rs1)>; -} +} // Predicates = [HasStdExtZbbOrZbp] let Predicates = [HasStdExtZbpOrZbkb] in { // We treat brev8 as a separate instruction, so match it directly. We also // use this for brev8 when lowering bitreverse with Zbkb. def : Pat<(riscv_grev GPR:$rs1, 7), (BREV8 GPR:$rs1)>; +} // Predicates = [HasStdExtZbpOrZbkb] +let Predicates = [HasStdExtZbpOrZbkb, IsRV32] in { // We treat zip and unzip as separate instructions, so match it directly. def : Pat<(i32 (riscv_shfl GPR:$rs1, 15)), (ZIP_RV32 GPR:$rs1)>; def : Pat<(i32 (riscv_unshfl GPR:$rs1, 15)), (UNZIP_RV32 GPR:$rs1)>; -} +} // Predicates = [HasStdExtZbpOrZbkb, IsRV32] let Predicates = [HasStdExtZbp] in { def : PatGprGpr; @@ -880,12 +920,16 @@ def : PatGprGpr; let Predicates = [HasStdExtZbp, IsRV64] in { def : PatGprGpr; def : PatGprGpr; -def : PatGprImm; -def : PatGprImm; -// FIXME: Move to DAG combine. -def : Pat<(riscv_rorw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>; -def : Pat<(riscv_rolw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>; +// Select GREVIW/GORCIW when the immediate doesn't have bit 5 set and the result +// is sign extended. +// FIXME: Two special patterns keeped when Imm is 7. +def : Pat<(i64 (sext_inreg (binop_oneuse GPR:$rs1, 7), i32)), + (GREVIW GPR:$rs1, 7)>; +def : Pat<(i64 (sext_inreg (binop_oneuse GPR:$rs1, 7), i32)), + (GORCIW GPR:$rs1, 7)>; +def : PatGprImm, GREVIW, uimm5>; +def : PatGprImm, GORCIW, uimm5>; def : PatGprGpr; def : PatGprGpr; @@ -895,10 +939,6 @@ let Predicates = [HasStdExtZbp, IsRV64] in def : PatGprGpr; let Predicates = [HasStdExtZbp, IsRV32] in { -// FIXME : Move to DAG combine. -def : Pat<(i32 (rotr (riscv_grev GPR:$rs1, 24), (i32 16))), (GREVI GPR:$rs1, 8)>; -def : Pat<(i32 (rotl (riscv_grev GPR:$rs1, 24), (i32 16))), (GREVI GPR:$rs1, 8)>; - // We treat rev8 as a separate instruction, so match it directly. def : Pat<(i32 (riscv_grev GPR:$rs1, 24)), (REV8_RV32 GPR:$rs1)>; } // Predicates = [HasStdExtZbp, IsRV32] @@ -911,6 +951,8 @@ def : Pat<(i64 (riscv_grev GPR:$rs1, 56)), (REV8_RV64 GPR:$rs1)>; let Predicates = [HasStdExtZbt] in { def : Pat<(or (and (not GPR:$rs2), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)), (CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(xor (and (xor GPR:$rs1, GPR:$rs3), GPR:$rs2), GPR:$rs3), + (CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>; def : Pat<(select (XLenVT (setne GPR:$rs2, 0)), GPR:$rs1, GPR:$rs3), (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; @@ -932,6 +974,13 @@ def : Pat<(select (XLenVT (setge GPR:$x, GPR:$y)), GPR:$rs3, GPR:$rs1), (CMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>; def : Pat<(select (XLenVT (setle GPR:$y, GPR:$x)), GPR:$rs3, GPR:$rs1), (CMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>; + +// setge X, Imm is canonicalized to setgt X, (Imm - 1). +def : Pat<(select (XLenVT (setgt GPR:$x, simm12_minus1_nonzero:$imm)), GPR:$rs3, GPR:$rs1), + (CMOV GPR:$rs1, (SLTI GPR:$x, (ImmPlus1 simm12_minus1_nonzero:$imm)), GPR:$rs3)>; +def : Pat<(select (XLenVT (setugt GPR:$x, simm12_minus1_nonzero:$imm)), GPR:$rs3, GPR:$rs1), + (CMOV GPR:$rs1, (SLTIU GPR:$x, (ImmPlus1 simm12_minus1_nonzero:$imm)), GPR:$rs3)>; + def : Pat<(select GPR:$rs2, GPR:$rs1, GPR:$rs3), (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; } // Predicates = [HasStdExtZbt] @@ -977,7 +1026,7 @@ def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>; let Predicates = [HasStdExtZbb] in { def : Pat<(sext_inreg GPR:$rs1, i8), (SEXT_B GPR:$rs1)>; def : Pat<(sext_inreg GPR:$rs1, i16), (SEXT_H GPR:$rs1)>; -} +} // Predicates = [HasStdExtZbb] let Predicates = [HasStdExtZbb] in { def : PatGprGpr; @@ -1018,7 +1067,7 @@ def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)), def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32), (and GPR:$rs1, 0x000000000000FFFF))), (PACKW GPR:$rs1, GPR:$rs2)>; -} +} // Predicates = [HasStdExtZbpOrZbkb, IsRV64] let Predicates = [HasStdExtZbp, IsRV32] in def : Pat<(i32 (or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16)))), @@ -1031,19 +1080,13 @@ def : Pat<(i64 (or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32))) def : Pat<(i64 (or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000), (srl (and GPR:$rs1, 0xFFFFFFFF), (i64 16)))), (PACKUW GPR:$rs1, GPR:$rs2)>; -} +} // Predicates = [HasStdExtZbp, IsRV64] let Predicates = [HasStdExtZbbOrZbp, IsRV32] in def : Pat<(i32 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV32 GPR:$rs)>; let Predicates = [HasStdExtZbbOrZbp, IsRV64] in def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV64 GPR:$rs)>; -// Pattern to exclude simm12 immediates from matching. -def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{ - auto *C = dyn_cast(N); - return !C || !isInt<12>(C->getSExtValue()); -}]>; - let Predicates = [HasStdExtZba] in { def : Pat<(add (shl GPR:$rs1, (XLenVT 1)), non_imm12:$rs2), (SH1ADD GPR:$rs1, GPR:$rs2)>; @@ -1132,6 +1175,33 @@ def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2)) (SH2ADD_UW GPR:$rs1, GPR:$rs2)>; def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)), (SH3ADD_UW GPR:$rs1, GPR:$rs2)>; + +def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFE), non_imm12:$rs2)), + (SH1ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>; +def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFC), non_imm12:$rs2)), + (SH2ADD (SRLIW GPR:$rs1, 2), GPR:$rs2)>; +def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFF8), non_imm12:$rs2)), + (SH3ADD (SRLIW GPR:$rs1, 3), GPR:$rs2)>; + +// Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. +def : Pat<(i64 (add (and GPR:$rs1, 0x1FFFFFFFE), non_imm12:$rs2)), + (SH1ADD_UW (SRLI GPR:$rs1, 1), GPR:$rs2)>; +def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), non_imm12:$rs2)), + (SH2ADD_UW (SRLI GPR:$rs1, 2), GPR:$rs2)>; +def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)), + (SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>; + +// Use SRLIW to shift out the LSBs and zero the upper 32-bits. Use SHXADD to +// shift zeros into the LSBs the addition shl amount. +def : Pat<(i64 (add (shl (binop_oneuse GPR:$rs1, 0xFFFFFFFE), (i64 1)), + non_imm12:$rs2)), + (SH2ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>; +def : Pat<(i64 (add (shl (binop_oneuse GPR:$rs1, 0xFFFFFFFE), (i64 2)), + non_imm12:$rs2)), + (SH3ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>; +def : Pat<(i64 (add (shl (binop_oneuse GPR:$rs1, 0xFFFFFFFC), (i64 1)), + non_imm12:$rs2)), + (SH3ADD (SRLIW GPR:$rs1, 2), GPR:$rs2)>; } // Predicates = [HasStdExtZba, IsRV64] let Predicates = [HasStdExtZbcOrZbkc] in { @@ -1175,4 +1245,4 @@ def : PatGprGpr; let Predicates = [HasStdExtZbkx] in { def : PatGprGpr; def : PatGprGpr; -} +} // Predicates = [HasStdExtZbkx] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index a2753c132354..5a4366b0908c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -17,13 +17,71 @@ def SDT_RISCVFMV_H_X : SDTypeProfile<1, 1, [SDTCisVT<0, f16>, SDTCisVT<1, XLenVT>]>; -def SDT_RISCVFMV_X_ANYEXTH +def SDT_RISCVFMV_X_EXTH : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisVT<1, f16>]>; def riscv_fmv_h_x : SDNode<"RISCVISD::FMV_H_X", SDT_RISCVFMV_H_X>; def riscv_fmv_x_anyexth - : SDNode<"RISCVISD::FMV_X_ANYEXTH", SDT_RISCVFMV_X_ANYEXTH>; + : SDNode<"RISCVISD::FMV_X_ANYEXTH", SDT_RISCVFMV_X_EXTH>; +def riscv_fmv_x_signexth + : SDNode<"RISCVISD::FMV_X_SIGNEXTH", SDT_RISCVFMV_X_EXTH>; + +//===----------------------------------------------------------------------===// +// Operand and SDNode transformation definitions. +//===----------------------------------------------------------------------===// + +// Zhinxmin and Zhinx + +def FPR16INX : RegisterOperand { + let ParserMatchClass = GPRAsFPR; + let DecoderMethod = "DecodeGPRRegisterClass"; +} + +def ZfhExt : ExtInfo<0, [HasStdExtZfh]>; +def Zfh64Ext : ExtInfo<0, [HasStdExtZfh, IsRV64]>; +def ZfhminExt : ExtInfo<0, [HasStdExtZfhOrZfhmin]>; +def ZhinxExt : ExtInfo<1, [HasStdExtZhinx]>; +def ZhinxminExt : ExtInfo<1, [HasStdExtZhinxOrZhinxmin]>; +def Zhinx64Ext : ExtInfo<1, [HasStdExtZhinx, IsRV64]>; + +def ZfhminDExt : ExtInfo<0, [HasStdExtZfhOrZfhmin, HasStdExtD]>; +def ZhinxminZdinxExt : ExtInfo<1, [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx]>; + +def H : ExtInfo_r; +def H_INX : ExtInfo_r; + +def HH : ExtInfo_rr; +def HH_INX : ExtInfo_rr; +def XH : ExtInfo_rr; +def XH_INX : ExtInfo_rr; +def HX : ExtInfo_rr; +def HX_INX : ExtInfo_rr; +def XH_64 : ExtInfo_rr; +def HX_64 : ExtInfo_rr; +def XH_INX_64 : ExtInfo_rr; +def HX_INX_64 : ExtInfo_rr; +def HFmin : ExtInfo_rr; +def HF_INXmin : ExtInfo_rr; +def HF_INX : ExtInfo_rr; +def FHmin : ExtInfo_rr; +def FH_INXmin : ExtInfo_rr; +def FH_INX : ExtInfo_rr; +def DHmin : ExtInfo_rr; +def DH_INXmin : ExtInfo_rr; +def HDmin : ExtInfo_rr; +def HD_INXmin : ExtInfo_rr; + +defvar HINX = [H, H_INX]; +defvar HHINX = [HH, HH_INX]; +defvar XHINX = [XH, XH_INX]; +defvar HXINX = [HX, HX_INX]; +defvar XHIN64X = [XH_64, XH_INX_64]; +defvar HXIN64X = [HX_64, HX_INX_64]; +defvar HFINXmin = [HFmin, HF_INXmin]; +defvar FHINXmin = [FHmin, FH_INXmin]; +defvar DHINXmin = [DHmin, DH_INXmin]; +defvar HDINXmin = [HDmin, HD_INXmin]; //===----------------------------------------------------------------------===// // Instructions @@ -38,74 +96,73 @@ def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>; def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>; } // Predicates = [HasStdExtZfhOrZfhmin] -let Predicates = [HasStdExtZfh] in { let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in { -def FMADD_H : FPFMA_rrr_frm; -def FMSUB_H : FPFMA_rrr_frm; -def FNMSUB_H : FPFMA_rrr_frm; -def FNMADD_H : FPFMA_rrr_frm; +defm FMADD_H : FPFMA_rrr_frm_m; +defm FMSUB_H : FPFMA_rrr_frm_m; +defm FNMSUB_H : FPFMA_rrr_frm_m; +defm FNMADD_H : FPFMA_rrr_frm_m; +} + +defm : FPFMADynFrmAlias_m; +defm : FPFMADynFrmAlias_m; +defm : FPFMADynFrmAlias_m; +defm : FPFMADynFrmAlias_m; + +let SchedRW = [WriteFALU16, ReadFALU16, ReadFALU16] in { +defm FADD_H : FPALU_rr_frm_m<0b0000010, "fadd.h", HINX, /*Commutable*/1>; +defm FSUB_H : FPALU_rr_frm_m<0b0000110, "fsub.h", HINX>; } +let SchedRW = [WriteFMul16, ReadFMul16, ReadFMul16] in +defm FMUL_H : FPALU_rr_frm_m<0b0001010, "fmul.h", HINX, /*Commutable*/1>; + +let SchedRW = [WriteFDiv16, ReadFDiv16, ReadFDiv16] in +defm FDIV_H : FPALU_rr_frm_m<0b0001110, "fdiv.h", HINX>; -def : FPFMADynFrmAlias; -def : FPFMADynFrmAlias; -def : FPFMADynFrmAlias; -def : FPFMADynFrmAlias; - -def FADD_H : FPALU_rr_frm<0b0000010, "fadd.h", FPR16>, - Sched<[WriteFALU16, ReadFALU16, ReadFALU16]>; -def FSUB_H : FPALU_rr_frm<0b0000110, "fsub.h", FPR16>, - Sched<[WriteFALU16, ReadFALU16, ReadFALU16]>; -def FMUL_H : FPALU_rr_frm<0b0001010, "fmul.h", FPR16>, - Sched<[WriteFMul16, ReadFMul16, ReadFMul16]>; -def FDIV_H : FPALU_rr_frm<0b0001110, "fdiv.h", FPR16>, - Sched<[WriteFDiv16, ReadFDiv16, ReadFDiv16]>; - -def : FPALUDynFrmAlias; -def : FPALUDynFrmAlias; -def : FPALUDynFrmAlias; -def : FPALUDynFrmAlias; - -def FSQRT_H : FPUnaryOp_r_frm<0b0101110, 0b00000, FPR16, FPR16, "fsqrt.h">, - Sched<[WriteFSqrt16, ReadFSqrt16]>; -def : FPUnaryOpDynFrmAlias; +defm : FPALUDynFrmAlias_m; +defm : FPALUDynFrmAlias_m; +defm : FPALUDynFrmAlias_m; +defm : FPALUDynFrmAlias_m; + +defm FSQRT_H : FPUnaryOp_r_frm_m<0b0101110, 0b00000, HHINX, "fsqrt.h">, + Sched<[WriteFSqrt16, ReadFSqrt16]>; +defm : FPUnaryOpDynFrmAlias_m; let SchedRW = [WriteFSGNJ16, ReadFSGNJ16, ReadFSGNJ16], mayRaiseFPException = 0 in { -def FSGNJ_H : FPALU_rr<0b0010010, 0b000, "fsgnj.h", FPR16>; -def FSGNJN_H : FPALU_rr<0b0010010, 0b001, "fsgnjn.h", FPR16>; -def FSGNJX_H : FPALU_rr<0b0010010, 0b010, "fsgnjx.h", FPR16>; +defm FSGNJ_H : FPALU_rr_m<0b0010010, 0b000, "fsgnj.h", HINX>; +defm FSGNJN_H : FPALU_rr_m<0b0010010, 0b001, "fsgnjn.h", HINX>; +defm FSGNJX_H : FPALU_rr_m<0b0010010, 0b010, "fsgnjx.h", HINX>; } let SchedRW = [WriteFMinMax16, ReadFMinMax16, ReadFMinMax16] in { -def FMIN_H : FPALU_rr<0b0010110, 0b000, "fmin.h", FPR16>; -def FMAX_H : FPALU_rr<0b0010110, 0b001, "fmax.h", FPR16>; +defm FMIN_H : FPALU_rr_m<0b0010110, 0b000, "fmin.h", HINX, /*Commutable*/1>; +defm FMAX_H : FPALU_rr_m<0b0010110, 0b001, "fmax.h", HINX, /*Commutable*/1>; } -def FCVT_W_H : FPUnaryOp_r_frm<0b1100010, 0b00000, GPR, FPR16, "fcvt.w.h">, - Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>; -def : FPUnaryOpDynFrmAlias; - -def FCVT_WU_H : FPUnaryOp_r_frm<0b1100010, 0b00001, GPR, FPR16, "fcvt.wu.h">, +defm FCVT_W_H : FPUnaryOp_r_frm_m<0b1100010, 0b00000, XHINX, "fcvt.w.h">, Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>; -def : FPUnaryOpDynFrmAlias; +defm : FPUnaryOpDynFrmAlias_m; -def FCVT_H_W : FPUnaryOp_r_frm<0b1101010, 0b00000, FPR16, GPR, "fcvt.h.w">, - Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>; -def : FPUnaryOpDynFrmAlias; +defm FCVT_WU_H : FPUnaryOp_r_frm_m<0b1100010, 0b00001, XHINX, "fcvt.wu.h">, + Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>; +defm : FPUnaryOpDynFrmAlias_m; -def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, 0b00001, FPR16, GPR, "fcvt.h.wu">, +defm FCVT_H_W : FPUnaryOp_r_frm_m<0b1101010, 0b00000, HXINX, "fcvt.h.w">, Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>; -def : FPUnaryOpDynFrmAlias; -} // Predicates = [HasStdExtZfh] +defm : FPUnaryOpDynFrmAlias_m; -let Predicates = [HasStdExtZfhOrZfhmin] in { -def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, 0b00000, FPR16, FPR32, "fcvt.h.s">, - Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>; -def : FPUnaryOpDynFrmAlias; +defm FCVT_H_WU : FPUnaryOp_r_frm_m<0b1101010, 0b00001, HXINX, "fcvt.h.wu">, + Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>; +defm : FPUnaryOpDynFrmAlias_m; -def FCVT_S_H : FPUnaryOp_r<0b0100000, 0b00010, 0b000, FPR32, FPR16, "fcvt.s.h">, +defm FCVT_H_S : FPUnaryOp_r_frm_m<0b0100010, 0b00000, HFINXmin, "fcvt.h.s">, + Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>; +defm : FPUnaryOpDynFrmAlias_m; + +defm FCVT_S_H : FPUnaryOp_r_m<0b0100000, 0b00010, 0b000, FHINXmin, "fcvt.s.h">, Sched<[WriteFCvtF16ToF32, ReadFCvtF16ToF32]>; +let Predicates = [HasStdExtZfhOrZfhmin] in { let mayRaiseFPException = 0 in def FMV_X_H : FPUnaryOp_r<0b1110010, 0b00000, 0b000, GPR, FPR16, "fmv.x.h">, Sched<[WriteFMovF16ToI16, ReadFMovF16ToI16]>; @@ -115,45 +172,38 @@ def FMV_H_X : FPUnaryOp_r<0b1111010, 0b00000, 0b000, FPR16, GPR, "fmv.h.x">, Sched<[WriteFMovI16ToF16, ReadFMovI16ToF16]>; } // Predicates = [HasStdExtZfhOrZfhmin] -let Predicates = [HasStdExtZfh] in { - let SchedRW = [WriteFCmp16, ReadFCmp16, ReadFCmp16] in { -def FEQ_H : FPCmp_rr<0b1010010, 0b010, "feq.h", FPR16>; -def FLT_H : FPCmp_rr<0b1010010, 0b001, "flt.h", FPR16>; -def FLE_H : FPCmp_rr<0b1010010, 0b000, "fle.h", FPR16>; +defm FEQ_H : FPCmp_rr_m<0b1010010, 0b010, "feq.h", HINX, /*Commutable*/1>; +defm FLT_H : FPCmp_rr_m<0b1010010, 0b001, "flt.h", HINX>; +defm FLE_H : FPCmp_rr_m<0b1010010, 0b000, "fle.h", HINX>; } let mayRaiseFPException = 0 in -def FCLASS_H : FPUnaryOp_r<0b1110010, 0b00000, 0b001, GPR, FPR16, "fclass.h">, - Sched<[WriteFClass16, ReadFClass16]>; -} // Predicates = [HasStdExtZfh] - -let Predicates = [HasStdExtZfh, IsRV64] in { -def FCVT_L_H : FPUnaryOp_r_frm<0b1100010, 0b00010, GPR, FPR16, "fcvt.l.h">, - Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>; -def : FPUnaryOpDynFrmAlias; +defm FCLASS_H : FPUnaryOp_r_m<0b1110010, 0b00000, 0b001, XHINX, "fclass.h">, + Sched<[WriteFClass16, ReadFClass16]>; -def FCVT_LU_H : FPUnaryOp_r_frm<0b1100010, 0b00011, GPR, FPR16, "fcvt.lu.h">, +defm FCVT_L_H : FPUnaryOp_r_frm_m<0b1100010, 0b00010, XHIN64X, "fcvt.l.h">, Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>; -def : FPUnaryOpDynFrmAlias; +defm : FPUnaryOpDynFrmAlias_m; -def FCVT_H_L : FPUnaryOp_r_frm<0b1101010, 0b00010, FPR16, GPR, "fcvt.h.l">, - Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>; -def : FPUnaryOpDynFrmAlias; +defm FCVT_LU_H : FPUnaryOp_r_frm_m<0b1100010, 0b00011, XHIN64X, "fcvt.lu.h">, + Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>; +defm : FPUnaryOpDynFrmAlias_m; -def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, 0b00011, FPR16, GPR, "fcvt.h.lu">, +defm FCVT_H_L : FPUnaryOp_r_frm_m<0b1101010, 0b00010, HXIN64X, "fcvt.h.l">, Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>; -def : FPUnaryOpDynFrmAlias; -} // Predicates = [HasStdExtZfh, IsRV64] +defm : FPUnaryOpDynFrmAlias_m; -let Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] in { -def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, 0b00001, FPR16, FPR64, "fcvt.h.d">, - Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>; -def : FPUnaryOpDynFrmAlias; +defm FCVT_H_LU : FPUnaryOp_r_frm_m<0b1101010, 0b00011, HXIN64X, "fcvt.h.lu">, + Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>; +defm : FPUnaryOpDynFrmAlias_m; -def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b00010, 0b000, FPR64, FPR16, "fcvt.d.h">, - Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>; -} // Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] +defm FCVT_H_D : FPUnaryOp_r_frm_m<0b0100010, 0b00001, HDINXmin, "fcvt.h.d">, + Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>; +defm : FPUnaryOpDynFrmAlias_m; + +defm FCVT_D_H : FPUnaryOp_r_m<0b0100001, 0b00010, 0b000, DHINXmin, "fcvt.d.h">, + Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) @@ -186,17 +236,21 @@ def PseudoQuietFLT_H : PseudoQuietFCMP; } } // Predicates = [HasStdExtZfhOrZfhmin] +let Predicates = [HasStdExtZhinx] in { +def : InstAlias<"fmv.h $rd, $rs", (FSGNJ_H_INX FPR16INX:$rd, FPR16INX:$rs, FPR16INX:$rs)>; +def : InstAlias<"fabs.h $rd, $rs", (FSGNJX_H_INX FPR16INX:$rd, FPR16INX:$rs, FPR16INX:$rs)>; +def : InstAlias<"fneg.h $rd, $rs", (FSGNJN_H_INX FPR16INX:$rd, FPR16INX:$rs, FPR16INX:$rs)>; + +def : InstAlias<"fgt.h $rd, $rs, $rt", + (FLT_H_INX GPR:$rd, FPR16INX:$rt, FPR16INX:$rs), 0>; +def : InstAlias<"fge.h $rd, $rs, $rt", + (FLE_H_INX GPR:$rd, FPR16INX:$rt, FPR16INX:$rs), 0>; +} // Predicates = [HasStdExtZhinx] + //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// -/// Generic pattern classes -class PatFpr16Fpr16 - : Pat<(OpNode FPR16:$rs1, FPR16:$rs2), (Inst $rs1, $rs2)>; - -class PatFpr16Fpr16DynFrm - : Pat<(OpNode FPR16:$rs1, FPR16:$rs2), (Inst $rs1, $rs2, 0b111)>; - let Predicates = [HasStdExtZfh] in { /// Float constants @@ -210,17 +264,17 @@ def : Pat<(f16 (fpimmneg0)), (FSGNJN_H (FMV_H_X X0), (FMV_H_X X0))>; /// Float arithmetic operations -def : PatFpr16Fpr16DynFrm; -def : PatFpr16Fpr16DynFrm; -def : PatFpr16Fpr16DynFrm; -def : PatFpr16Fpr16DynFrm; +def : PatFprFprDynFrm; +def : PatFprFprDynFrm; +def : PatFprFprDynFrm; +def : PatFprFprDynFrm; def : Pat<(any_fsqrt FPR16:$rs1), (FSQRT_H FPR16:$rs1, 0b111)>; def : Pat<(fneg FPR16:$rs1), (FSGNJN_H $rs1, $rs1)>; def : Pat<(fabs FPR16:$rs1), (FSGNJX_H $rs1, $rs1)>; -def : PatFpr16Fpr16; +def : PatFprFpr; def : Pat<(fcopysign FPR16:$rs1, (fneg FPR16:$rs2)), (FSGNJN_H $rs1, $rs2)>; def : Pat<(fcopysign FPR16:$rs1, FPR32:$rs2), (FSGNJ_H $rs1, (FCVT_H_S $rs2, 0b111))>; @@ -242,11 +296,15 @@ def : Pat<(any_fma (fneg FPR16:$rs1), FPR16:$rs2, FPR16:$rs3), def : Pat<(any_fma (fneg FPR16:$rs1), FPR16:$rs2, (fneg FPR16:$rs3)), (FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; +// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA) +def : Pat<(fneg (any_fma_nsz FPR16:$rs1, FPR16:$rs2, FPR16:$rs3)), + (FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; + // The ratified 20191213 ISA spec defines fmin and fmax in a way that matches // LLVM's fminnum and fmaxnum // . -def : PatFpr16Fpr16; -def : PatFpr16Fpr16; +def : PatFprFpr; +def : PatFprFpr; /// Setcc // FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for @@ -299,6 +357,7 @@ def : Pat<(any_fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>; // Moves (no conversion) def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>; def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>; +def : Pat<(riscv_fmv_x_signexth FPR16:$src), (FMV_X_H FPR16:$src)>; } // Predicates = [HasStdExtZfhOrZfhmin] let Predicates = [HasStdExtZfh, IsRV32] in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td new file mode 100644 index 000000000000..57fd74b0c0fe --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td @@ -0,0 +1,71 @@ +//===-- RISCVInstrInfoZicbo.td - RISC-V CMO instructions ---*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the RISC-V instructions from the standard Base Cache +// Management Operation ISA Extensions document (Zicbop, Zicboz, and Zicbop). +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Operand definitions. +//===----------------------------------------------------------------------===// + +// A 12-bit signed immediate where the least significant five bits are zero. +def simm12_lsb00000 : Operand, + ImmLeaf(Imm);}]> { + let ParserMatchClass = SImmAsmOperand<12, "Lsb00000">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeSImmOperand<12>"; + let MCOperandPredicate = [{ + int64_t Imm; + if (MCOp.evaluateAsConstantImm(Imm)) + return isShiftedInt<7, 5>(Imm); + return MCOp.isBareSymbolRef(); + }]; + let OperandType = "OPERAND_SIMM12_LSB00000"; + let OperandNamespace = "RISCVOp"; +} + +//===----------------------------------------------------------------------===// +// Instruction Class Templates +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +class CBO_r optype, string opcodestr> + : RVInstI<0b010, OPC_MISC_MEM, (outs), (ins GPRMemZeroOffset:$rs1), + opcodestr, "$rs1"> { + let imm12 = optype; + let rd = 0b00000; +} + +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +class Prefetch_ri optype, string opcodestr> + : RVInstS<0b110, OPC_OP_IMM, (outs), (ins GPR:$rs1, simm12_lsb00000:$imm12), + opcodestr, "${imm12}(${rs1})"> { + let Inst{11-7} = 0b00000; + let rs2 = optype; +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtZicbom] in { +def CBO_CLEAN : CBO_r<0b000000000001, "cbo.clean">, Sched<[]>; +def CBO_FLUSH : CBO_r<0b000000000010, "cbo.flush">, Sched<[]>; +def CBO_INVAL : CBO_r<0b000000000000, "cbo.inval">, Sched<[]>; +} // Predicates = [HasStdExtZicbom] + +let Predicates = [HasStdExtZicboz] in { +def CBO_ZERO : CBO_r<0b000000000100, "cbo.zero">, Sched<[]>; +} // Predicates = [HasStdExtZicboz] + +let Predicates = [HasStdExtZicbop] in { +def PREFETCH_I : Prefetch_ri<0b00000, "prefetch.i">, Sched<[]>; +def PREFETCH_R : Prefetch_ri<0b00001, "prefetch.r">, Sched<[]>; +def PREFETCH_W : Prefetch_ri<0b00011, "prefetch.w">, Sched<[]>; +} // Predicates = [HasStdExtZicbop] diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp index c167c095521a..c457a95544cf 100644 --- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp +++ b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp @@ -87,7 +87,7 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, return MCOperand::createExpr(ME); } -bool llvm::LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, +bool llvm::lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &MCOp, const AsmPrinter &AP) { switch (MO.getType()) { @@ -145,6 +145,7 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI, const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + assert(TRI && "TargetRegisterInfo expected"); uint64_t TSFlags = MI->getDesc().TSFlags; @@ -158,12 +159,16 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI, if (RISCVII::hasSEWOp(TSFlags)) --NumOps; + bool hasVLOutput = RISCV::isFaultFirstLoad(*MI); for (unsigned OpNo = 0; OpNo != NumOps; ++OpNo) { const MachineOperand &MO = MI->getOperand(OpNo); + // Skip vl ouput. It should be the second output. + if (hasVLOutput && OpNo == 1) + continue; // Skip merge op. It should be the first operand after the result. - if (RISCVII::hasMergeOp(TSFlags) && OpNo == 1) { - assert(MI->getNumExplicitDefs() == 1); + if (RISCVII::hasMergeOp(TSFlags) && OpNo == 1U + hasVLOutput) { + assert(MI->getNumExplicitDefs() == 1U + hasVLOutput); continue; } @@ -214,7 +219,7 @@ bool llvm::lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp; - if (LowerRISCVMachineOperandToMCOperand(MO, MCOp, AP)) + if (lowerRISCVMachineOperandToMCOperand(MO, MCOp, AP)) OutMI.addOperand(MCOp); } diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp new file mode 100644 index 000000000000..8cb046bcfbb6 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp @@ -0,0 +1,37 @@ +//=- RISCVMachineFunctionInfo.cpp - RISCV machine function info ---*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares RISCV-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#include "RISCVMachineFunctionInfo.h" + +using namespace llvm; + +yaml::RISCVMachineFunctionInfo::RISCVMachineFunctionInfo( + const llvm::RISCVMachineFunctionInfo &MFI) + : VarArgsFrameIndex(MFI.getVarArgsFrameIndex()), + VarArgsSaveSize(MFI.getVarArgsSaveSize()) {} + +MachineFunctionInfo *RISCVMachineFunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const { + return DestMF.cloneInfo(*this); +} + +void yaml::RISCVMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { + MappingTraits::mapping(YamlIO, *this); +} + +void RISCVMachineFunctionInfo::initializeBaseYamlFields( + const yaml::RISCVMachineFunctionInfo &YamlMFI) { + VarArgsFrameIndex = YamlMFI.VarArgsFrameIndex; + VarArgsSaveSize = YamlMFI.VarArgsSaveSize; +} diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h index b5609e9a3890..622767540d99 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -14,11 +14,34 @@ #define LLVM_LIB_TARGET_RISCV_RISCVMACHINEFUNCTIONINFO_H #include "RISCVSubtarget.h" +#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" namespace llvm { +class RISCVMachineFunctionInfo; + +namespace yaml { +struct RISCVMachineFunctionInfo final : public yaml::MachineFunctionInfo { + int VarArgsFrameIndex; + int VarArgsSaveSize; + + RISCVMachineFunctionInfo() = default; + RISCVMachineFunctionInfo(const llvm::RISCVMachineFunctionInfo &MFI); + + void mappingImpl(yaml::IO &YamlIO) override; + ~RISCVMachineFunctionInfo() = default; +}; + +template <> struct MappingTraits { + static void mapping(IO &YamlIO, RISCVMachineFunctionInfo &MFI) { + YamlIO.mapOptional("varArgsFrameIndex", MFI.VarArgsFrameIndex); + YamlIO.mapOptional("varArgsSaveSize", MFI.VarArgsSaveSize); + } +}; +} // end namespace yaml + /// RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo /// and contains private RISCV-specific information for each MachineFunction. class RISCVMachineFunctionInfo : public MachineFunctionInfo { @@ -34,6 +57,8 @@ private: unsigned LibCallStackSize = 0; /// Size of RVV stack. uint64_t RVVStackSize = 0; + /// Alignment of RVV stack. + Align RVVStackAlign; /// Padding required to keep RVV stack aligned within the main stack. uint64_t RVVPadding = 0; /// Size of stack frame to save callee saved registers @@ -42,6 +67,11 @@ private: public: RISCVMachineFunctionInfo(const MachineFunction &MF) {} + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } @@ -69,11 +99,16 @@ public: uint64_t getRVVStackSize() const { return RVVStackSize; } void setRVVStackSize(uint64_t Size) { RVVStackSize = Size; } + Align getRVVStackAlign() const { return RVVStackAlign; } + void setRVVStackAlign(Align StackAlign) { RVVStackAlign = StackAlign; } + uint64_t getRVVPadding() const { return RVVPadding; } void setRVVPadding(uint64_t Padding) { RVVPadding = Padding; } unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } + + void initializeBaseYamlFields(const yaml::RISCVMachineFunctionInfo &YamlMFI); }; } // end namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp new file mode 100644 index 000000000000..3b9177bc1635 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp @@ -0,0 +1,67 @@ +//===- RISCVMacroFusion.cpp - RISCV Macro Fusion --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the RISCV implementation of the DAG scheduling +/// mutation to pair instructions back to back. +// +//===----------------------------------------------------------------------===// +// +#include "RISCVMacroFusion.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MacroFusion.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +using namespace llvm; + +// Fuse LUI followed by ADDI or ADDIW. +// rd = imm[31:0] which decomposes to +// lui rd, imm[31:12] +// addi(w) rd, rd, imm[11:0] +static bool isLUIADDI(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + if (SecondMI.getOpcode() != RISCV::ADDI && + SecondMI.getOpcode() != RISCV::ADDIW) + return false; + + // Assume the 1st instr to be a wildcard if it is unspecified. + if (!FirstMI) + return true; + + if (FirstMI->getOpcode() != RISCV::LUI) + return false; + + // The first operand of ADDI might be a frame index. + if (!SecondMI.getOperand(1).isReg()) + return false; + + Register FirstDest = FirstMI->getOperand(0).getReg(); + + // Destination of LUI should be the ADDI(W) source register. + if (SecondMI.getOperand(1).getReg() != FirstDest) + return false; + + // If the FirstMI destination is non-virtual, it should match the SecondMI + // destination. + return FirstDest.isVirtual() || SecondMI.getOperand(0).getReg() == FirstDest; +} + +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + const RISCVSubtarget &ST = static_cast(TSI); + + if (ST.hasLUIADDIFusion() && isLUIADDI(FirstMI, SecondMI)) + return true; + + return false; +} + +std::unique_ptr llvm::createRISCVMacroFusionDAGMutation() { + return createMacroFusionDAGMutation(shouldScheduleAdjacent); +} diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.h b/llvm/lib/Target/RISCV/RISCVMacroFusion.h new file mode 100644 index 000000000000..c238dacc37f6 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.h @@ -0,0 +1,28 @@ +//===- RISCVMacroFusion.h - RISCV Macro Fusion ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the RISCV definition of the DAG scheduling mutation +/// to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACROFUSION_H +#define LLVM_LIB_TARGET_RISCV_RISCVMACROFUSION_H + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// Note that you have to add: +/// DAG.addMutation(createRISCVMacroFusionDAGMutation()); +/// to RISCVPassConfig::createMachineScheduler() to have an effect. +std::unique_ptr createRISCVMacroFusionDAGMutation(); + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp new file mode 100644 index 000000000000..1fc424411c12 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp @@ -0,0 +1,382 @@ +//===-- RISCVMakeCompressible.cpp - Make more instructions compressible ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass searches for instructions that are prevented from being compressed +// by one of the following: +// +// 1. The use of a single uncompressed register. +// 2. A base register + offset where the offset is too large to be compressed +// and the base register may or may not be compressed. +// +// +// For case 1, if a compressed register is available, then the uncompressed +// register is copied to the compressed register and its uses are replaced. +// +// For example, storing zero uses the uncompressible zero register: +// sw zero, 0(a0) # if zero +// sw zero, 8(a0) # if zero +// sw zero, 4(a0) # if zero +// sw zero, 24(a0) # if zero +// +// If a compressed register (e.g. a1) is available, the above can be transformed +// to the following to improve code size: +// li a1, 0 +// c.sw a1, 0(a0) +// c.sw a1, 8(a0) +// c.sw a1, 4(a0) +// c.sw a1, 24(a0) +// +// +// For case 2, if a compressed register is available, then the original base +// is copied and adjusted such that: +// +// new_base_register = base_register + adjustment +// base_register + large_offset = new_base_register + small_offset +// +// For example, the following offsets are too large for c.sw: +// lui a2, 983065 +// sw a1, -236(a2) +// sw a1, -240(a2) +// sw a1, -244(a2) +// sw a1, -248(a2) +// sw a1, -252(a2) +// sw a0, -256(a2) +// +// If a compressed register is available (e.g. a3), a new base could be created +// such that the addresses can accessed with a compressible offset, thus +// improving code size: +// lui a2, 983065 +// addi a3, a2, -256 +// c.sw a1, 20(a3) +// c.sw a1, 16(a3) +// c.sw a1, 12(a3) +// c.sw a1, 8(a3) +// c.sw a1, 4(a3) +// c.sw a0, 0(a3) +// +// +// This optimization is only applied if there are enough uses of the copied +// register for code size to be reduced. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-make-compressible" +#define RISCV_COMPRESS_INSTRS_NAME "RISCV Make Compressible" + +namespace { + +struct RISCVMakeCompressibleOpt : public MachineFunctionPass { + static char ID; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) { + initializeRISCVMakeCompressibleOptPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; } +}; +} // namespace + +char RISCVMakeCompressibleOpt::ID = 0; +INITIALIZE_PASS(RISCVMakeCompressibleOpt, "riscv-make-compressible", + RISCV_COMPRESS_INSTRS_NAME, false, false) + +// Return log2(widthInBytes) of load/store done by Opcode. +static unsigned log2LdstWidth(unsigned Opcode) { + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode"); + case RISCV::LW: + case RISCV::SW: + case RISCV::FLW: + case RISCV::FSW: + return 2; + case RISCV::LD: + case RISCV::SD: + case RISCV::FLD: + case RISCV::FSD: + return 3; + } +} + +// Return a mask for the offset bits of a non-stack-pointer based compressed +// load/store. +static uint8_t compressedLDSTOffsetMask(unsigned Opcode) { + return 0x1f << log2LdstWidth(Opcode); +} + +// Return true if Offset fits within a compressed stack-pointer based +// load/store. +static bool compressibleSPOffset(int64_t Offset, unsigned Opcode) { + return log2LdstWidth(Opcode) == 2 ? isShiftedUInt<6, 2>(Offset) + : isShiftedUInt<6, 3>(Offset); +} + +// Given an offset for a load/store, return the adjustment required to the base +// register such that the address can be accessed with a compressible offset. +// This will return 0 if the offset is already compressible. +static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) { + // Return the excess bits that do not fit in a compressible offset. + return Offset & ~compressedLDSTOffsetMask(Opcode); +} + +// Return true if Reg is in a compressed register class. +static bool isCompressedReg(Register Reg) { + return RISCV::GPRCRegClass.contains(Reg) || + RISCV::FPR32CRegClass.contains(Reg) || + RISCV::FPR64CRegClass.contains(Reg); +} + +// Return true if MI is a load for which there exists a compressed version. +static bool isCompressibleLoad(const MachineInstr &MI) { + const RISCVSubtarget &STI = MI.getMF()->getSubtarget(); + const unsigned Opcode = MI.getOpcode(); + + return Opcode == RISCV::LW || (!STI.is64Bit() && Opcode == RISCV::FLW) || + Opcode == RISCV::LD || Opcode == RISCV::FLD; +} + +// Return true if MI is a store for which there exists a compressed version. +static bool isCompressibleStore(const MachineInstr &MI) { + const RISCVSubtarget &STI = MI.getMF()->getSubtarget(); + const unsigned Opcode = MI.getOpcode(); + + return Opcode == RISCV::SW || (!STI.is64Bit() && Opcode == RISCV::FSW) || + Opcode == RISCV::SD || Opcode == RISCV::FSD; +} + +// Find a single register and/or large offset which, if compressible, would +// allow the given instruction to be compressed. +// +// Possible return values: +// +// {Reg, 0} - Uncompressed Reg needs replacing with a compressed +// register. +// {Reg, N} - Reg needs replacing with a compressed register and +// N needs adding to the new register. (Reg may be +// compressed or uncompressed). +// {RISCV::NoRegister, 0} - No suitable optimization found for this +// instruction. +static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) { + const unsigned Opcode = MI.getOpcode(); + + if (isCompressibleLoad(MI) || isCompressibleStore(MI)) { + const MachineOperand &MOImm = MI.getOperand(2); + if (!MOImm.isImm()) + return RegImmPair(RISCV::NoRegister, 0); + + int64_t Offset = MOImm.getImm(); + int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode); + Register Base = MI.getOperand(1).getReg(); + + // Memory accesses via the stack pointer do not have a requirement for + // either of the registers to be compressible and can take a larger offset. + if (RISCV::SPRegClass.contains(Base)) { + if (!compressibleSPOffset(Offset, Opcode) && NewBaseAdjust) + return RegImmPair(Base, NewBaseAdjust); + } else { + Register SrcDest = MI.getOperand(0).getReg(); + bool SrcDestCompressed = isCompressedReg(SrcDest); + bool BaseCompressed = isCompressedReg(Base); + + // If only Base and/or offset prevent compression, then return Base and + // any adjustment required to make the offset compressible. + if ((!BaseCompressed || NewBaseAdjust) && SrcDestCompressed) + return RegImmPair(Base, NewBaseAdjust); + + // For loads, we can only change the base register since dest is defined + // rather than used. + // + // For stores, we can change SrcDest (and Base if SrcDest == Base) but + // cannot resolve an uncompressible offset in this case. + if (isCompressibleStore(MI)) { + if (!SrcDestCompressed && (BaseCompressed || SrcDest == Base) && + !NewBaseAdjust) + return RegImmPair(SrcDest, NewBaseAdjust); + } + } + } + return RegImmPair(RISCV::NoRegister, 0); +} + +// Check all uses after FirstMI of the given register, keeping a vector of +// instructions that would be compressible if the given register (and offset if +// applicable) were compressible. +// +// If there are enough uses for this optimization to improve code size and a +// compressed register is available, return that compressed register. +static Register analyzeCompressibleUses(MachineInstr &FirstMI, + RegImmPair RegImm, + SmallVectorImpl &MIs) { + MachineBasicBlock &MBB = *FirstMI.getParent(); + const TargetRegisterInfo *TRI = + MBB.getParent()->getSubtarget().getRegisterInfo(); + + RegScavenger RS; + RS.enterBasicBlock(MBB); + + for (MachineBasicBlock::instr_iterator I = FirstMI.getIterator(), + E = MBB.instr_end(); + I != E; ++I) { + MachineInstr &MI = *I; + + // Determine if this is an instruction which would benefit from using the + // new register. + RegImmPair CandidateRegImm = getRegImmPairPreventingCompression(MI); + if (CandidateRegImm.Reg == RegImm.Reg && + CandidateRegImm.Imm == RegImm.Imm) { + // Advance tracking since the value in the new register must be live for + // this instruction too. + RS.forward(I); + + MIs.push_back(&MI); + } + + // If RegImm.Reg is modified by this instruction, then we cannot optimize + // past this instruction. If the register is already compressed, then it may + // possible to optimize a large offset in the current instruction - this + // will have been detected by the preceeding call to + // getRegImmPairPreventingCompression. + if (MI.modifiesRegister(RegImm.Reg, TRI)) + break; + } + + // Adjusting the base costs one new uncompressed addi and therefore three uses + // are required for a code size reduction. If no base adjustment is required, + // then copying the register costs one new c.mv (or c.li Rd, 0 for "copying" + // the zero register) and therefore two uses are required for a code size + // reduction. + if (MIs.size() < 2 || (RegImm.Imm != 0 && MIs.size() < 3)) + return RISCV::NoRegister; + + // Find a compressible register which will be available from the first + // instruction we care about to the last. + const TargetRegisterClass *RCToScavenge; + + // Work out the compressed register class from which to scavenge. + if (RISCV::GPRRegClass.contains(RegImm.Reg)) + RCToScavenge = &RISCV::GPRCRegClass; + else if (RISCV::FPR32RegClass.contains(RegImm.Reg)) + RCToScavenge = &RISCV::FPR32CRegClass; + else if (RISCV::FPR64RegClass.contains(RegImm.Reg)) + RCToScavenge = &RISCV::FPR64CRegClass; + else + return RISCV::NoRegister; + + return RS.scavengeRegisterBackwards(*RCToScavenge, FirstMI.getIterator(), + /*RestoreAfter=*/false, /*SPAdj=*/0, + /*AllowSpill=*/false); +} + +// Update uses of the old register in the given instruction to the new register. +static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm, + Register NewReg) { + unsigned Opcode = MI.getOpcode(); + + // If this pass is extended to support more instructions, the check for + // definedness may need to be strengthened. + assert((isCompressibleLoad(MI) || isCompressibleStore(MI)) && + "Unsupported instruction for this optimization."); + + // Update registers + for (MachineOperand &MO : MI.operands()) + if (MO.isReg() && MO.getReg() == OldRegImm.Reg) { + // Do not update operands that define the old register. + // + // The new register was scavenged for the range of instructions that are + // being updated, therefore it should not be defined within this range + // except possibly in the final instruction. + if (MO.isDef()) { + assert(isCompressibleLoad(MI)); + continue; + } + // Update reg + MO.setReg(NewReg); + } + + // Update offset + MachineOperand &MOImm = MI.getOperand(2); + int64_t NewOffset = MOImm.getImm() & compressedLDSTOffsetMask(Opcode); + MOImm.setImm(NewOffset); +} + +bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) { + // This is a size optimization. + if (skipFunction(Fn.getFunction()) || !Fn.getFunction().hasMinSize()) + return false; + + const RISCVSubtarget &STI = Fn.getSubtarget(); + const RISCVInstrInfo &TII = *STI.getInstrInfo(); + + // This optimization only makes sense if compressed instructions are emitted. + if (!STI.hasStdExtC()) + return false; + + for (MachineBasicBlock &MBB : Fn) { + LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); + for (MachineInstr &MI : MBB) { + // Determine if this instruction would otherwise be compressed if not for + // an uncompressible register or offset. + RegImmPair RegImm = getRegImmPairPreventingCompression(MI); + if (!RegImm.Reg && RegImm.Imm == 0) + continue; + + // Determine if there is a set of instructions for which replacing this + // register with a compressed register (and compressible offset if + // applicable) is possible and will allow compression. + SmallVector MIs; + Register NewReg = analyzeCompressibleUses(MI, RegImm, MIs); + if (!NewReg) + continue; + + // Create the appropriate copy and/or offset. + if (RISCV::GPRRegClass.contains(RegImm.Reg)) { + assert(isInt<12>(RegImm.Imm)); + BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::ADDI), NewReg) + .addReg(RegImm.Reg) + .addImm(RegImm.Imm); + } else { + // If we are looking at replacing an FPR register we don't expect to + // have any offset. The only compressible FP instructions with an offset + // are loads and stores, for which the offset applies to the GPR operand + // not the FPR operand. + assert(RegImm.Imm == 0); + unsigned Opcode = RISCV::FPR32RegClass.contains(RegImm.Reg) + ? RISCV::FSGNJ_S + : RISCV::FSGNJ_D; + BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(Opcode), NewReg) + .addReg(RegImm.Reg) + .addReg(RegImm.Reg); + } + + // Update the set of instructions to use the compressed register and + // compressible offset instead. These instructions should now be + // compressible. + // TODO: Update all uses if RegImm.Imm == 0? Not just those that are + // expected to become compressible. + for (MachineInstr *UpdateMI : MIs) + updateOperands(*UpdateMI, RegImm, NewReg); + } + } + return true; +} + +/// Returns an instance of the Make Compressible Optimization pass. +FunctionPass *llvm::createRISCVMakeCompressibleOptPass() { + return new RISCVMakeCompressibleOpt(); +} diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp index 5f4022439abb..b060a73846c4 100644 --- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -25,6 +25,7 @@ #include "RISCV.h" #include "RISCVTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Debug.h" @@ -37,6 +38,10 @@ using namespace llvm; namespace { struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass { +private: + const RISCVSubtarget *ST = nullptr; + +public: static char ID; bool runOnMachineFunction(MachineFunction &Fn) override; bool detectLuiAddiGlobal(MachineInstr &LUI, MachineInstr *&ADDI); @@ -45,6 +50,9 @@ struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass { void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail, int64_t Offset); bool matchLargeOffset(MachineInstr &TailAdd, Register GSReg, int64_t &Offset); + bool matchShiftedOffset(MachineInstr &TailShXAdd, Register GSReg, + int64_t &Offset); + RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {} MachineFunctionProperties getRequiredProperties() const override { @@ -85,17 +93,16 @@ bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI, MachineInstr *&LoADDI) { if (HiLUI.getOpcode() != RISCV::LUI || HiLUI.getOperand(1).getTargetFlags() != RISCVII::MO_HI || - HiLUI.getOperand(1).getType() != MachineOperand::MO_GlobalAddress || + !HiLUI.getOperand(1).isGlobal() || HiLUI.getOperand(1).getOffset() != 0 || !MRI->hasOneUse(HiLUI.getOperand(0).getReg())) return false; Register HiLuiDestReg = HiLUI.getOperand(0).getReg(); - LoADDI = MRI->use_begin(HiLuiDestReg)->getParent(); + LoADDI = &*MRI->use_instr_begin(HiLuiDestReg); if (LoADDI->getOpcode() != RISCV::ADDI || LoADDI->getOperand(2).getTargetFlags() != RISCVII::MO_LO || - LoADDI->getOperand(2).getType() != MachineOperand::MO_GlobalAddress || - LoADDI->getOperand(2).getOffset() != 0 || - !MRI->hasOneUse(LoADDI->getOperand(0).getReg())) + !LoADDI->getOperand(2).isGlobal() || + LoADDI->getOperand(2).getOffset() != 0) return false; return true; } @@ -106,6 +113,7 @@ bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI, void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail, int64_t Offset) { + assert(isInt<32>(Offset) && "Unexpected offset"); // Put the offset back in HiLUI and the LoADDI HiLUI.getOperand(1).setOffset(Offset); LoADDI.getOperand(2).setOffset(Offset); @@ -148,7 +156,8 @@ bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd, return false; // This can point to an ADDI or a LUI: MachineInstr &OffsetTail = *MRI->getVRegDef(Reg); - if (OffsetTail.getOpcode() == RISCV::ADDI) { + if (OffsetTail.getOpcode() == RISCV::ADDI || + OffsetTail.getOpcode() == RISCV::ADDIW) { // The offset value has non zero bits in both %hi and %lo parts. // Detect an ADDI that feeds from a LUI instruction. MachineOperand &AddiImmOp = OffsetTail.getOperand(2); @@ -162,8 +171,14 @@ bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd, LuiImmOp.getTargetFlags() != RISCVII::MO_None || !MRI->hasOneUse(OffsetLui.getOperand(0).getReg())) return false; - int64_t OffHi = OffsetLui.getOperand(1).getImm(); - Offset = (OffHi << 12) + OffLo; + Offset = SignExtend64<32>(LuiImmOp.getImm() << 12); + Offset += OffLo; + // RV32 ignores the upper 32 bits. ADDIW sign extends the result. + if (!ST->is64Bit() || OffsetTail.getOpcode() == RISCV::ADDIW) + Offset = SignExtend64<32>(Offset); + // We can only fold simm32 offsets. + if (!isInt<32>(Offset)) + return false; LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail << " " << OffsetLui); DeadInstrs.insert(&OffsetTail); @@ -173,98 +188,204 @@ bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd, // The offset value has all zero bits in the lower 12 bits. Only LUI // exists. LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail); - Offset = OffsetTail.getOperand(1).getImm() << 12; + Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12); DeadInstrs.insert(&OffsetTail); return true; } return false; } +// Detect patterns for offsets that are passed into a SHXADD instruction. +// The offset has 1,2, or 3 trailing zeros and fits in simm13, simm14, simm15. +// The constant is created with addi voff, x0, C, and shXadd is used to +// fill insert the trailing zeros and do the addition. +// +// HiLUI: lui vreg1, %hi(s) +// LoADDI: addi vreg2, vreg1, %lo(s) +// OffsetTail: addi voff, x0, C +// TailAdd: shXadd vreg4, voff, vreg2 +bool RISCVMergeBaseOffsetOpt::matchShiftedOffset(MachineInstr &TailShXAdd, + Register GAReg, + int64_t &Offset) { + assert((TailShXAdd.getOpcode() == RISCV::SH1ADD || + TailShXAdd.getOpcode() == RISCV::SH2ADD || + TailShXAdd.getOpcode() == RISCV::SH3ADD) && + "Expected SHXADD instruction!"); + + // The first source is the shifted operand. + Register Rs1 = TailShXAdd.getOperand(1).getReg(); + + if (GAReg != TailShXAdd.getOperand(2).getReg()) + return false; + + // Can't fold if the register has more than one use. + if (!MRI->hasOneUse(Rs1)) + return false; + // This can point to an ADDI X0, C. + MachineInstr &OffsetTail = *MRI->getVRegDef(Rs1); + if (OffsetTail.getOpcode() != RISCV::ADDI) + return false; + if (!OffsetTail.getOperand(1).isReg() || + OffsetTail.getOperand(1).getReg() != RISCV::X0 || + !OffsetTail.getOperand(2).isImm()) + return false; + + Offset = OffsetTail.getOperand(2).getImm(); + assert(isInt<12>(Offset) && "Unexpected offset"); + + unsigned ShAmt; + switch (TailShXAdd.getOpcode()) { + default: llvm_unreachable("Unexpected opcode"); + case RISCV::SH1ADD: ShAmt = 1; break; + case RISCV::SH2ADD: ShAmt = 2; break; + case RISCV::SH3ADD: ShAmt = 3; break; + } + + Offset = (uint64_t)Offset << ShAmt; + + LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail); + DeadInstrs.insert(&OffsetTail); + return true; +} + bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI) { Register DestReg = LoADDI.getOperand(0).getReg(); - assert(MRI->hasOneUse(DestReg) && "expected one use for LoADDI"); - // LoADDI has only one use. - MachineInstr &Tail = *MRI->use_begin(DestReg)->getParent(); - switch (Tail.getOpcode()) { - default: - LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:" - << Tail); - return false; - case RISCV::ADDI: { - // Offset is simply an immediate operand. - int64_t Offset = Tail.getOperand(2).getImm(); - LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail); - foldOffset(HiLUI, LoADDI, Tail, Offset); - return true; + + // First, look for arithmetic instructions we can get an offset from. + // We might be able to remove the arithmetic instructions by folding the + // offset into the LUI+ADDI. + if (MRI->hasOneUse(DestReg)) { + // LoADDI has only one use. + MachineInstr &Tail = *MRI->use_instr_begin(DestReg); + switch (Tail.getOpcode()) { + default: + LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:" + << Tail); + break; + case RISCV::ADDI: { + // Offset is simply an immediate operand. + int64_t Offset = Tail.getOperand(2).getImm(); + + // We might have two ADDIs in a row. + Register TailDestReg = Tail.getOperand(0).getReg(); + if (MRI->hasOneUse(TailDestReg)) { + MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg); + if (TailTail.getOpcode() == RISCV::ADDI) { + Offset += TailTail.getOperand(2).getImm(); + LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail); + DeadInstrs.insert(&Tail); + foldOffset(HiLUI, LoADDI, TailTail, Offset); + return true; + } + } + + LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail); + foldOffset(HiLUI, LoADDI, Tail, Offset); + return true; + } + case RISCV::ADD: { + // The offset is too large to fit in the immediate field of ADDI. + // This can be in two forms: + // 1) LUI hi_Offset followed by: + // ADDI lo_offset + // This happens in case the offset has non zero bits in + // both hi 20 and lo 12 bits. + // 2) LUI (offset20) + // This happens in case the lower 12 bits of the offset are zeros. + int64_t Offset; + if (!matchLargeOffset(Tail, DestReg, Offset)) + return false; + foldOffset(HiLUI, LoADDI, Tail, Offset); + return true; + } + case RISCV::SH1ADD: + case RISCV::SH2ADD: + case RISCV::SH3ADD: { + // The offset is too large to fit in the immediate field of ADDI. + // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or + // (SH3ADD (ADDI X0, C), DestReg). + int64_t Offset; + if (!matchShiftedOffset(Tail, DestReg, Offset)) + return false; + foldOffset(HiLUI, LoADDI, Tail, Offset); + return true; + } + } } - case RISCV::ADD: { - // The offset is too large to fit in the immediate field of ADDI. - // This can be in two forms: - // 1) LUI hi_Offset followed by: - // ADDI lo_offset - // This happens in case the offset has non zero bits in - // both hi 20 and lo 12 bits. - // 2) LUI (offset20) - // This happens in case the lower 12 bits of the offset are zeros. - int64_t Offset; - if (!matchLargeOffset(Tail, DestReg, Offset)) + + // We didn't find an arithmetic instruction. If all the uses are memory ops + // with the same offset, we can transform + // HiLUI: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8) + // LoADDI: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1) + // Tail: lw vreg3, 8(vreg2) + + Optional CommonOffset; + for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) { + switch (UseMI.getOpcode()) { + default: + LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI); return false; - foldOffset(HiLUI, LoADDI, Tail, Offset); - return true; + case RISCV::LB: + case RISCV::LH: + case RISCV::LW: + case RISCV::LBU: + case RISCV::LHU: + case RISCV::LWU: + case RISCV::LD: + case RISCV::FLH: + case RISCV::FLW: + case RISCV::FLD: + case RISCV::SB: + case RISCV::SH: + case RISCV::SW: + case RISCV::SD: + case RISCV::FSH: + case RISCV::FSW: + case RISCV::FSD: { + if (UseMI.getOperand(1).isFI()) + return false; + // Register defined by LoADDI should not be the value register. + if (DestReg == UseMI.getOperand(0).getReg()) + return false; + assert(DestReg == UseMI.getOperand(1).getReg() && + "Expected base address use"); + // All load/store instructions must use the same offset. + int64_t Offset = UseMI.getOperand(2).getImm(); + if (CommonOffset && Offset != CommonOffset) + return false; + CommonOffset = Offset; + } + } } - case RISCV::LB: - case RISCV::LH: - case RISCV::LW: - case RISCV::LBU: - case RISCV::LHU: - case RISCV::LWU: - case RISCV::LD: - case RISCV::FLH: - case RISCV::FLW: - case RISCV::FLD: - case RISCV::SB: - case RISCV::SH: - case RISCV::SW: - case RISCV::SD: - case RISCV::FSH: - case RISCV::FSW: - case RISCV::FSD: { - // Transforms the sequence: Into: - // HiLUI: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8) - // LoADDI: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1) - // Tail: lw vreg3, 8(vreg2) - if (Tail.getOperand(1).isFI()) - return false; - // Register defined by LoADDI should be used in the base part of the - // load\store instruction. Otherwise, no folding possible. - Register BaseAddrReg = Tail.getOperand(1).getReg(); - if (DestReg != BaseAddrReg) - return false; - MachineOperand &TailImmOp = Tail.getOperand(2); - int64_t Offset = TailImmOp.getImm(); - // Update the offsets in global address lowering. - HiLUI.getOperand(1).setOffset(Offset); - // Update the immediate in the Tail instruction to add the offset. - Tail.RemoveOperand(2); - MachineOperand &ImmOp = LoADDI.getOperand(2); - ImmOp.setOffset(Offset); - Tail.addOperand(ImmOp); + + // We found a common offset. + // Update the offsets in global address lowering. + HiLUI.getOperand(1).setOffset(*CommonOffset); + MachineOperand &ImmOp = LoADDI.getOperand(2); + ImmOp.setOffset(*CommonOffset); + + // Update the immediate in the load/store instructions to add the offset. + for (MachineInstr &UseMI : + llvm::make_early_inc_range(MRI->use_instructions(DestReg))) { + UseMI.removeOperand(2); + UseMI.addOperand(ImmOp); // Update the base reg in the Tail instruction to feed from LUI. // Output of HiLUI is only used in LoADDI, no need to use // MRI->replaceRegWith(). - Tail.getOperand(1).setReg(HiLUI.getOperand(0).getReg()); - DeadInstrs.insert(&LoADDI); - return true; + UseMI.getOperand(1).setReg(HiLUI.getOperand(0).getReg()); } - } - return false; + + DeadInstrs.insert(&LoADDI); + return true; } bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) { if (skipFunction(Fn.getFunction())) return false; + ST = &Fn.getSubtarget(); + bool MadeChange = false; DeadInstrs.clear(); MRI = &Fn.getRegInfo(); @@ -274,9 +395,8 @@ bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) { MachineInstr *LoADDI = nullptr; if (!detectLuiAddiGlobal(HiLUI, LoADDI)) continue; - LLVM_DEBUG(dbgs() << " Found lowered global address with one use: " + LLVM_DEBUG(dbgs() << " Found lowered global address: " << *LoADDI->getOperand(2).getGlobal() << "\n"); - // If the use count is only one, merge the offset MadeChange |= detectAndFoldOffset(HiLUI, *LoADDI); } } diff --git a/llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp b/llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp new file mode 100644 index 000000000000..3c4a60b81d8e --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp @@ -0,0 +1,179 @@ +//=- RISCVRedundantCopyElimination.cpp - Remove useless copy for RISCV ------=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass removes unnecessary zero copies in BBs that are targets of +// beqz/bnez instructions. For instance, the copy instruction in the code below +// can be removed because the beqz jumps to BB#2 when a0 is zero. +// BB#1: +// beqz %a0, +// BB#2: +// %a0 = COPY %x0 +// This pass should be run after register allocation. +// +// This pass is based on the earliest versions of +// AArch64RedundantCopyElimination. +// +// FIXME: Support compares with constants other than zero? This is harder to +// do on RISC-V since branches can't have immediates. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-copyelim" + +STATISTIC(NumCopiesRemoved, "Number of copies removed."); + +namespace { +class RISCVRedundantCopyElimination : public MachineFunctionPass { + const MachineRegisterInfo *MRI; + const TargetRegisterInfo *TRI; + +public: + static char ID; + RISCVRedundantCopyElimination() : MachineFunctionPass(ID) { + initializeRISCVRedundantCopyEliminationPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "RISCV Redundant Copy Elimination"; + } + +private: + bool optimizeBlock(MachineBasicBlock &MBB); +}; + +} // end anonymous namespace + +char RISCVRedundantCopyElimination::ID = 0; + +INITIALIZE_PASS(RISCVRedundantCopyElimination, "riscv-copyelim", + "RISCV redundant copy elimination pass", false, false) + +static bool guaranteesZeroRegInBlock(const MachineInstr &MI, + const MachineBasicBlock &MBB) { + unsigned Opc = MI.getOpcode(); + if (Opc == RISCV::BEQ && MI.getOperand(1).getReg() == RISCV::X0 && + &MBB == MI.getOperand(2).getMBB()) + return true; + if (Opc == RISCV::BNE && MI.getOperand(1).getReg() == RISCV::X0 && + &MBB != MI.getOperand(2).getMBB()) + return true; + + return false; +} + +bool RISCVRedundantCopyElimination::optimizeBlock(MachineBasicBlock &MBB) { + // Check if the current basic block has a single predecessor. + if (MBB.pred_size() != 1) + return false; + + // Check if the predecessor has two successors, implying the block ends in a + // conditional branch. + MachineBasicBlock *PredMBB = *MBB.pred_begin(); + if (PredMBB->succ_size() != 2) + return false; + + MachineBasicBlock::iterator CondBr = PredMBB->getLastNonDebugInstr(); + if (CondBr == PredMBB->end()) + return false; + + while (true) { + // If we run out of terminators, give up. + if (!CondBr->isTerminator()) + return false; + // If we found a branch with X0, stop searching and try to remove copies. + // TODO: Handle multiple branches with different registers. + if (guaranteesZeroRegInBlock(*CondBr, MBB)) + break; + // If we reached the beginning of the basic block, give up. + if (CondBr == PredMBB->begin()) + return false; + --CondBr; + } + + Register TargetReg = CondBr->getOperand(0).getReg(); + if (!TargetReg) + return false; + + bool Changed = false; + MachineBasicBlock::iterator LastChange = MBB.begin(); + // Remove redundant Copy instructions unless TargetReg is modified. + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { + MachineInstr *MI = &*I; + ++I; + if (MI->isCopy() && MI->getOperand(0).isReg() && + MI->getOperand(1).isReg()) { + Register DefReg = MI->getOperand(0).getReg(); + Register SrcReg = MI->getOperand(1).getReg(); + + if (SrcReg == RISCV::X0 && !MRI->isReserved(DefReg) && + TargetReg == DefReg) { + LLVM_DEBUG(dbgs() << "Remove redundant Copy : "); + LLVM_DEBUG(MI->print(dbgs())); + + MI->eraseFromParent(); + Changed = true; + LastChange = I; + ++NumCopiesRemoved; + continue; + } + } + + if (MI->modifiesRegister(TargetReg, TRI)) + break; + } + + if (!Changed) + return false; + + // Otherwise, we have to fixup the use-def chain, starting with the + // BEQ/BNE. Conservatively mark as much as we can live. + CondBr->clearRegisterKills(TargetReg, TRI); + + // Add newly used reg to the block's live-in list if it isn't there already. + if (!MBB.isLiveIn(TargetReg)) + MBB.addLiveIn(TargetReg); + + // Clear any kills of TargetReg between CondBr and the last removed COPY. + for (MachineInstr &MMI : make_range(MBB.begin(), LastChange)) + MMI.clearRegisterKills(TargetReg, TRI); + + return true; +} + +bool RISCVRedundantCopyElimination::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + + bool Changed = false; + for (MachineBasicBlock &MBB : MF) + Changed |= optimizeBlock(MBB); + + return Changed; +} + +FunctionPass *llvm::createRISCVRedundantCopyEliminationPass() { + return new RISCVRedundantCopyElimination(); +} diff --git a/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp index bd3b95a98b9f..5371b790a148 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp @@ -12,9 +12,9 @@ #include "RISCVRegisterBankInfo.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #define GET_TARGET_REGBANK_IMPL @@ -22,5 +22,4 @@ using namespace llvm; -RISCVRegisterBankInfo::RISCVRegisterBankInfo(const TargetRegisterInfo &TRI) - : RISCVGenRegisterBankInfo() {} +RISCVRegisterBankInfo::RISCVRegisterBankInfo(const TargetRegisterInfo &TRI) {} diff --git a/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.h index 05fac992734d..194a1548af24 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.h @@ -13,7 +13,7 @@ #ifndef LLVM_LIB_TARGET_RISCV_RISCVREGISTERBANKINFO_H #define LLVM_LIB_TARGET_RISCV_RISCVREGISTERBANKINFO_H -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #define GET_REGBANK_DECLARATIONS #include "RISCVGenRegisterBank.inc" diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 35363bf37c0d..0c9219076498 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -14,6 +14,7 @@ #include "RISCV.h" #include "RISCVMachineFunctionInfo.h" #include "RISCVSubtarget.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -101,6 +102,7 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { markSuperRegs(Reserved, RISCV::VTYPE); markSuperRegs(Reserved, RISCV::VXSAT); markSuperRegs(Reserved, RISCV::VXRM); + markSuperRegs(Reserved, RISCV::VLENB); // vlenb (constant) // Floating point environment registers. markSuperRegs(Reserved, RISCV::FRM); @@ -116,7 +118,7 @@ bool RISCVRegisterInfo::isAsmClobberable(const MachineFunction &MF, } bool RISCVRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { - return PhysReg == RISCV::X0; + return PhysReg == RISCV::X0 || PhysReg == RISCV::VLENB; } const uint32_t *RISCVRegisterInfo::getNoPreservedMask() const { @@ -125,7 +127,7 @@ const uint32_t *RISCVRegisterInfo::getNoPreservedMask() const { // Frame indexes representing locations of CSRs which are given a fixed location // by save/restore libcalls. -static const std::map FixedCSRFIMap = { +static const std::pair FixedCSRFIMap[] = { {/*ra*/ RISCV::X1, -1}, {/*s0*/ RISCV::X8, -2}, {/*s1*/ RISCV::X9, -3}, @@ -148,8 +150,9 @@ bool RISCVRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, if (!RVFI->useSaveRestoreLibCalls(MF)) return false; - auto FII = FixedCSRFIMap.find(Reg); - if (FII == FixedCSRFIMap.end()) + const auto *FII = + llvm::find_if(FixedCSRFIMap, [&](auto P) { return P.first == Reg; }); + if (FII == std::end(FixedCSRFIMap)) return false; FrameIdx = FII->second; @@ -171,7 +174,7 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Register FrameReg; StackOffset Offset = getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg); - bool IsRVVSpill = TII->isRVVSpill(MI, /*CheckFIs*/ false); + bool IsRVVSpill = RISCV::isRVVSpill(MI); if (!IsRVVSpill) Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); @@ -270,7 +273,7 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); } - auto ZvlssegInfo = TII->isRVVSpillForZvlsseg(MI.getOpcode()); + auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(MI.getOpcode()); if (ZvlssegInfo) { Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL); diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 8c1c03b51c24..4ff60ebda5aa 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -66,6 +66,7 @@ def sub_vrm1_5 : ComposedSubRegIndex; def sub_vrm1_6 : ComposedSubRegIndex; def sub_vrm1_7 : ComposedSubRegIndex; +def sub_32_hi : SubRegIndex<32, 32>; } // Namespace = "RISCV" // Integer registers @@ -461,6 +462,12 @@ let RegAltNameIndices = [ABIRegAltName] in { DwarfRegNum<[!add(4096, SysRegVLENB.Encoding)]>; } +def VCSR : RegisterClass<"RISCV", [XLenVT], 32, + (add VTYPE, VL, VLENB)> { + let RegInfos = XLenRI; +} + + foreach m = [1, 2, 4] in { foreach n = NFList.L in { def "VN" # n # "M" # m # "NoV0": RegisterTuples< @@ -534,6 +541,35 @@ def VMV0 : RegisterClass<"RISCV", VMaskVTs, 64, (add V0)> { let Size = 64; } +let RegInfos = XLenRI in { +def GPRF16 : RegisterClass<"RISCV", [f16], 16, (add GPR)>; +def GPRF32 : RegisterClass<"RISCV", [f32], 32, (add GPR)>; +def GPRF64 : RegisterClass<"RISCV", [f64], 64, (add GPR)>; +} // RegInfos = XLenRI + +let RegAltNameIndices = [ABIRegAltName] in { + foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, + 24, 26, 28, 30] in { + defvar Reg = !cast("X"#Index); + def X#Index#_PD : RISCVRegWithSubRegs("X"#Index), + !cast("X"#!add(Index, 1))], + Reg.AltNames> { + let SubRegIndices = [sub_32, sub_32_hi]; + } + } +} + +let RegInfos = RegInfoByHwMode<[RV64], [RegInfo<64, 64, 64>]> in +def GPRPF64 : RegisterClass<"RISCV", [f64], 64, (add + X10_PD, X12_PD, X14_PD, X16_PD, + X6_PD, + X28_PD, X30_PD, + X8_PD, + X18_PD, X20_PD, X22_PD, X24_PD, X26_PD, + X0_PD, X2_PD, X4_PD +)>; + // The register class is added for inline assembly for vector mask types. def VM : VReg DisableSExtWRemoval("riscv-disable-sextw-removal", cl::desc("Disable removal of sext.w"), @@ -55,11 +57,143 @@ FunctionPass *llvm::createRISCVSExtWRemovalPass() { return new RISCVSExtWRemoval(); } +// add uses of MI to the Worklist +static void addUses(const MachineInstr &MI, + SmallVectorImpl &Worklist, + MachineRegisterInfo &MRI) { + for (auto &UserOp : MRI.reg_operands(MI.getOperand(0).getReg())) { + const auto *User = UserOp.getParent(); + if (User == &MI) // ignore the def, current MI + continue; + Worklist.push_back(User); + } +} + +// returns true if all uses of OrigMI only depend on the lower word of its +// output, so we can transform OrigMI to the corresponding W-version. +// TODO: handle multiple interdependent transformations +static bool isAllUsesReadW(const MachineInstr &OrigMI, + MachineRegisterInfo &MRI) { + + SmallPtrSet Visited; + SmallVector Worklist; + + Visited.insert(&OrigMI); + addUses(OrigMI, Worklist, MRI); + + while (!Worklist.empty()) { + const MachineInstr *MI = Worklist.pop_back_val(); + + if (!Visited.insert(MI).second) { + // If we've looped back to OrigMI through a PHI cycle, we can't transform + // LD or LWU, because these operations use all 64 bits of input. + if (MI == &OrigMI) { + unsigned opcode = MI->getOpcode(); + if (opcode == RISCV::LD || opcode == RISCV::LWU) + return false; + } + continue; + } + + switch (MI->getOpcode()) { + case RISCV::ADDIW: + case RISCV::ADDW: + case RISCV::DIVUW: + case RISCV::DIVW: + case RISCV::MULW: + case RISCV::REMUW: + case RISCV::REMW: + case RISCV::SLLIW: + case RISCV::SLLW: + case RISCV::SRAIW: + case RISCV::SRAW: + case RISCV::SRLIW: + case RISCV::SRLW: + case RISCV::SUBW: + case RISCV::ROLW: + case RISCV::RORW: + case RISCV::RORIW: + case RISCV::CLZW: + case RISCV::CTZW: + case RISCV::CPOPW: + case RISCV::SLLI_UW: + case RISCV::FCVT_S_W: + case RISCV::FCVT_S_WU: + case RISCV::FCVT_D_W: + case RISCV::FCVT_D_WU: + continue; + + // these overwrite higher input bits, otherwise the lower word of output + // depends only on the lower word of input. So check their uses read W. + case RISCV::SLLI: + if (MI->getOperand(2).getImm() >= 32) + continue; + addUses(*MI, Worklist, MRI); + continue; + case RISCV::ANDI: + if (isUInt<11>(MI->getOperand(2).getImm())) + continue; + addUses(*MI, Worklist, MRI); + continue; + case RISCV::ORI: + if (!isUInt<11>(MI->getOperand(2).getImm())) + continue; + addUses(*MI, Worklist, MRI); + continue; + + case RISCV::BEXTI: + if (MI->getOperand(2).getImm() >= 32) + return false; + continue; + + // For these, lower word of output in these operations, depends only on + // the lower word of input. So, we check all uses only read lower word. + case RISCV::COPY: + case RISCV::PHI: + + case RISCV::ADD: + case RISCV::ADDI: + case RISCV::AND: + case RISCV::MUL: + case RISCV::OR: + case RISCV::SLL: + case RISCV::SUB: + case RISCV::XOR: + case RISCV::XORI: + + case RISCV::ADD_UW: + case RISCV::ANDN: + case RISCV::CLMUL: + case RISCV::ORC_B: + case RISCV::ORN: + case RISCV::SEXT_B: + case RISCV::SEXT_H: + case RISCV::SH1ADD: + case RISCV::SH1ADD_UW: + case RISCV::SH2ADD: + case RISCV::SH2ADD_UW: + case RISCV::SH3ADD: + case RISCV::SH3ADD_UW: + case RISCV::XNOR: + case RISCV::ZEXT_H_RV64: + addUses(*MI, Worklist, MRI); + continue; + default: + return false; + } + } + return true; +} + // This function returns true if the machine instruction always outputs a value // where bits 63:32 match bit 31. +// Alternatively, if the instruction can be converted to W variant +// (e.g. ADD->ADDW) and all of its uses only use the lower word of its output, +// then return true and add the instr to FixableDef to be convereted later // TODO: Allocate a bit in TSFlags for the W instructions? // TODO: Add other W instructions. -static bool isSignExtendingOpW(const MachineInstr &MI) { +static bool isSignExtendingOpW(MachineInstr &MI, MachineRegisterInfo &MRI, + SmallPtrSetImpl &FixableDef) { switch (MI.getOpcode()) { case RISCV::LUI: case RISCV::LW: @@ -89,8 +223,9 @@ static bool isSignExtendingOpW(const MachineInstr &MI) { case RISCV::FCVT_WU_S: case RISCV::FCVT_W_D: case RISCV::FCVT_WU_D: + case RISCV::FMV_X_W: // The following aren't W instructions, but are either sign extended from a - // smaller size or put zeros in bits 63:31. + // smaller size, always outputs a small integer, or put zeros in bits 63:31. case RISCV::LBU: case RISCV::LHU: case RISCV::LB: @@ -102,6 +237,12 @@ static bool isSignExtendingOpW(const MachineInstr &MI) { case RISCV::SEXT_B: case RISCV::SEXT_H: case RISCV::ZEXT_H_RV64: + case RISCV::FMV_X_H: + case RISCV::BEXT: + case RISCV::BEXTI: + case RISCV::CLZ: + case RISCV::CPOP: + case RISCV::CTZ: return true; // shifting right sufficiently makes the value 32-bit sign-extended case RISCV::SRAI: @@ -110,7 +251,14 @@ static bool isSignExtendingOpW(const MachineInstr &MI) { return MI.getOperand(2).getImm() > 32; // The LI pattern ADDI rd, X0, imm is sign extended. case RISCV::ADDI: - return MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0; + if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0) + return true; + if (isAllUsesReadW(MI, MRI)) { + // transform to ADDIW + FixableDef.insert(&MI); + return true; + } + return false; // An ANDI with an 11 bit immediate will zero bits 63:11. case RISCV::ANDI: return isUInt<11>(MI.getOperand(2).getImm()); @@ -120,28 +268,45 @@ static bool isSignExtendingOpW(const MachineInstr &MI) { // Copying from X0 produces zero. case RISCV::COPY: return MI.getOperand(1).getReg() == RISCV::X0; + + // With these opcode, we can "fix" them with the W-version + // if we know all users of the result only rely on bits 31:0 + case RISCV::SLLI: + // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits + if (MI.getOperand(2).getImm() >= 32) + return false; + LLVM_FALLTHROUGH; + case RISCV::ADD: + case RISCV::LD: + case RISCV::LWU: + case RISCV::MUL: + case RISCV::SUB: + if (isAllUsesReadW(MI, MRI)) { + FixableDef.insert(&MI); + return true; + } } return false; } -static bool isSignExtendedW(const MachineInstr &OrigMI, - MachineRegisterInfo &MRI) { +static bool isSignExtendedW(MachineInstr &OrigMI, MachineRegisterInfo &MRI, + SmallPtrSetImpl &FixableDef) { SmallPtrSet Visited; - SmallVector Worklist; + SmallVector Worklist; Worklist.push_back(&OrigMI); while (!Worklist.empty()) { - const MachineInstr *MI = Worklist.pop_back_val(); + MachineInstr *MI = Worklist.pop_back_val(); // If we already visited this instruction, we don't need to check it again. if (!Visited.insert(MI).second) continue; // If this is a sign extending operation we don't need to look any further. - if (isSignExtendingOpW(*MI)) + if (isSignExtendingOpW(*MI, MRI, FixableDef)) continue; // Is this an instruction that propagates sign extend. @@ -157,7 +322,7 @@ static bool isSignExtendedW(const MachineInstr &OrigMI, // If this is a copy from another register, check its source instruction. if (!SrcReg.isVirtual()) return false; - const MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); if (!SrcMI) return false; @@ -165,18 +330,25 @@ static bool isSignExtendedW(const MachineInstr &OrigMI, Worklist.push_back(SrcMI); break; } + + // For these, we just need to check if the 1st operand is sign extended. + case RISCV::BCLRI: + case RISCV::BINVI: + case RISCV::BSETI: + if (MI->getOperand(2).getImm() >= 31) + return false; + LLVM_FALLTHROUGH; case RISCV::REM: case RISCV::ANDI: case RISCV::ORI: case RISCV::XORI: { // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R. // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1 - // Logical operations use a sign extended 12-bit immediate. We just need - // to check if the other operand is sign extended. + // Logical operations use a sign extended 12-bit immediate. Register SrcReg = MI->getOperand(1).getReg(); if (!SrcReg.isVirtual()) return false; - const MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); if (!SrcMI) return false; @@ -214,7 +386,7 @@ static bool isSignExtendedW(const MachineInstr &OrigMI, Register SrcReg = MI->getOperand(I).getReg(); if (!SrcReg.isVirtual()) return false; - const MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); if (!SrcMI) return false; @@ -232,6 +404,26 @@ static bool isSignExtendedW(const MachineInstr &OrigMI, return true; } +static unsigned getWOp(unsigned Opcode) { + switch (Opcode) { + case RISCV::ADDI: + return RISCV::ADDIW; + case RISCV::ADD: + return RISCV::ADDW; + case RISCV::LD: + case RISCV::LWU: + return RISCV::LW; + case RISCV::MUL: + return RISCV::MULW; + case RISCV::SLLI: + return RISCV::SLLIW; + case RISCV::SUB: + return RISCV::SUBW; + default: + llvm_unreachable("Unexpected opcode for replacement with W variant"); + } +} + bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction()) || DisableSExtWRemoval) return false; @@ -242,7 +434,10 @@ bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) { if (!ST.is64Bit()) return false; - bool MadeChange = false; + SmallPtrSet SExtWRemovalCands; + + // Replacing instructions invalidates the MI iterator + // we collect the candidates, then iterate over them separately. for (MachineBasicBlock &MBB : MF) { for (auto I = MBB.begin(), IE = MBB.end(); I != IE;) { MachineInstr *MI = &*I++; @@ -257,21 +452,49 @@ bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) { if (!SrcReg.isVirtual()) continue; - const MachineInstr &SrcMI = *MRI.getVRegDef(SrcReg); - if (!isSignExtendedW(SrcMI, MRI)) - continue; + SExtWRemovalCands.insert(MI); + } + } - Register DstReg = MI->getOperand(0).getReg(); - if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg))) - continue; + bool MadeChange = false; + for (auto MI : SExtWRemovalCands) { + SmallPtrSet FixableDef; + Register SrcReg = MI->getOperand(1).getReg(); + MachineInstr &SrcMI = *MRI.getVRegDef(SrcReg); + + // If all definitions reaching MI sign-extend their output, + // then sext.w is redundant + if (!isSignExtendedW(SrcMI, MRI, FixableDef)) + continue; - LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n"); - MRI.replaceRegWith(DstReg, SrcReg); - MRI.clearKillFlags(SrcReg); - MI->eraseFromParent(); - ++NumRemovedSExtW; - MadeChange = true; + Register DstReg = MI->getOperand(0).getReg(); + if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg))) + continue; + // Replace Fixable instructions with their W versions. + for (MachineInstr *Fixable : FixableDef) { + MachineBasicBlock &MBB = *Fixable->getParent(); + const DebugLoc &DL = Fixable->getDebugLoc(); + unsigned Code = getWOp(Fixable->getOpcode()); + MachineInstrBuilder Replacement = + BuildMI(MBB, Fixable, DL, ST.getInstrInfo()->get(Code)); + for (auto Op : Fixable->operands()) + Replacement.add(Op); + for (auto Op : Fixable->memoperands()) + Replacement.addMemOperand(Op); + + LLVM_DEBUG(dbgs() << "Replacing " << *Fixable); + LLVM_DEBUG(dbgs() << " with " << *Replacement); + + Fixable->eraseFromParent(); + ++NumTransformedToWInstrs; } + + LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n"); + MRI.replaceRegWith(DstReg, SrcReg); + MRI.clearKillFlags(SrcReg); + MI->eraseFromParent(); + ++NumRemovedSExtW; + MadeChange = true; } return MadeChange; diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index 78cf34c8c582..5a3c8deb7943 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -242,6 +242,11 @@ defm : UnsupportedSchedZba; defm : UnsupportedSchedZbb; defm : UnsupportedSchedZbc; defm : UnsupportedSchedZbs; +defm : UnsupportedSchedZbe; defm : UnsupportedSchedZbf; +defm : UnsupportedSchedZbm; +defm : UnsupportedSchedZbp; +defm : UnsupportedSchedZbr; +defm : UnsupportedSchedZbt; defm : UnsupportedSchedZfh; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 9f5e5ff1223c..cfbd9722d7bc 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -229,6 +229,11 @@ defm : UnsupportedSchedZba; defm : UnsupportedSchedZbb; defm : UnsupportedSchedZbc; defm : UnsupportedSchedZbs; +defm : UnsupportedSchedZbe; defm : UnsupportedSchedZbf; +defm : UnsupportedSchedZbm; +defm : UnsupportedSchedZbp; +defm : UnsupportedSchedZbr; +defm : UnsupportedSchedZbt; defm : UnsupportedSchedZfh; } diff --git a/llvm/lib/Target/RISCV/RISCVScheduleB.td b/llvm/lib/Target/RISCV/RISCVScheduleB.td index 193760e1e15b..4bfe7b316eeb 100644 --- a/llvm/lib/Target/RISCV/RISCVScheduleB.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleB.td @@ -33,10 +33,59 @@ def WriteCLMUL : SchedWrite; // CLMUL/CLMULR/CLMULH def WriteSingleBit : SchedWrite; // BCLR/BSET/BINV/BEXT def WriteSingleBitImm: SchedWrite; // BCLRI/BSETI/BINVI/BEXTI +// Zbe extension +def WriteDecompress : SchedWrite; // bdecompress +def WriteCompress : SchedWrite; // bcompress +def WriteDecompress32: SchedWrite; // bdecompressw +def WriteCompress32 : SchedWrite; // bcompressw + // Zbf extension def WriteBFP : SchedWrite; // BFP def WriteBFP32 : SchedWrite; // BFPW +// Zbm extension +def WriteBMatrix : SchedWrite; // bmator/bmatxor/bmatflip + +// Zbp extension +def WriteORC : SchedWrite; // gorc +def WriteREV : SchedWrite; // grev +def WriteORC32 : SchedWrite; // gorcw +def WriteREV32 : SchedWrite; // grevw +def WriteREVImm : SchedWrite; // grevi +def WriteORCImm : SchedWrite; // gorci +def WriteREVImm32 : SchedWrite; // greviw +def WriteORCImm32 : SchedWrite; // gorciw +def WriteSHFL : SchedWrite; // shfl +def WriteUNSHFL : SchedWrite; // unshfl +def WriteSHFL32 : SchedWrite; // shflw +def WriteUNSHFL32 : SchedWrite; // unshflw +def WriteSHFLImm : SchedWrite; // shfli +def WriteUNSHFLImm : SchedWrite; // unshfli +def WriteXPERMH : SchedWrite; // xperm.h +def WriteXPERMW : SchedWrite; // xperm.w +def WritePACK : SchedWrite; // pack/packh +def WritePACK32 : SchedWrite; // packw +def WritePACKU : SchedWrite; // packu +def WritePACKU32 : SchedWrite; // packuw + +// Zbr extension +def WriteCRCB : SchedWrite; // crc32.b +def WriteCRCH : SchedWrite; // crc32.h +def WriteCRCW : SchedWrite; // crc32.w +def WriteCRCD : SchedWrite; // crc32.d +def WriteCRCCB : SchedWrite; // crc32c.b +def WriteCRCCH : SchedWrite; // crc32c.h +def WriteCRCCW : SchedWrite; // crc32c.w +def WriteCRCCD : SchedWrite; // crc32c.d + +// Zbt extension +def WriteCMix : SchedWrite; // cmix +def WriteCMov : SchedWrite; // cmov +def WriteFSReg : SchedWrite; // fsl/fsr +def WriteFSRImm : SchedWrite; // fsri +def WriteFSReg32 : SchedWrite; // fslw/fsrw +def WriteFSRImm32 : SchedWrite; // fsriw + /// Define scheduler resources associated with use operands. // Zba extension @@ -64,10 +113,59 @@ def ReadCLMUL : SchedRead; // CLMUL/CLMULR/CLMULH def ReadSingleBit : SchedRead; // BCLR/BSET/BINV/BEXT def ReadSingleBitImm: SchedRead; // BCLRI/BSETI/BINVI/BEXTI +// Zbe extension +def ReadDecompress : SchedRead; // bdecompress +def ReadCompress : SchedRead; // bcompress +def ReadDecompress32: SchedRead; // bdecompressw +def ReadCompress32 : SchedRead; // bcompressw + // Zbf extension def ReadBFP : SchedRead; // BFP def ReadBFP32 : SchedRead; // BFPW +// Zbm extension +def ReadBMatrix : SchedRead; // bmator/bmatxor/bmatflip + +// Zbp extension +def ReadORC : SchedRead; // gorc +def ReadREV : SchedRead; // grev +def ReadORC32 : SchedRead; // gorcw +def ReadREV32 : SchedRead; // grevw +def ReadREVImm : SchedRead; // grevi +def ReadORCImm : SchedRead; // groci +def ReadREVImm32 : SchedRead; // greviw +def ReadORCImm32 : SchedRead; // gorciw +def ReadSHFL : SchedRead; // shfl +def ReadUNSHFL : SchedRead; // unshfl +def ReadSHFL32 : SchedRead; // shflw +def ReadUNSHFL32 : SchedRead; // unshflw +def ReadSHFLImm : SchedRead; // shfli +def ReadUNSHFLImm : SchedRead; // unshfli +def ReadXPERMH : SchedRead; // xperm.h +def ReadXPERMW : SchedRead; // xperm.w +def ReadPACK : SchedRead; // pack/packh +def ReadPACK32 : SchedRead; // packw +def ReadPACKU : SchedRead; // packu +def ReadPACKU32 : SchedRead; // packuw + +// Zbr extension +def ReadCRCB : SchedRead; // crc32.b +def ReadCRCH : SchedRead; // crc32.h +def ReadCRCW : SchedRead; // crc32.w +def ReadCRCD : SchedRead; // crc32.d +def ReadCRCCB : SchedRead; // crc32c.b +def ReadCRCCH : SchedRead; // crc32c.h +def ReadCRCCW : SchedRead; // crc32c.w +def ReadCRCCD : SchedRead; // crc32c.d + +// Zbt extension +def ReadCMix : SchedRead; // cmix +def ReadCMov : SchedRead; // cmov +def ReadFSReg : SchedRead; // fsl/fsr +def ReadFSRImm : SchedRead; // fsri +def ReadFSReg32 : SchedRead; // fslw/fsrw +def ReadFSRImm32 : SchedRead; // fsriw + /// Define default scheduler resources for B. multiclass UnsupportedSchedZba { @@ -128,6 +226,20 @@ def : ReadAdvance; } } +multiclass UnsupportedSchedZbe { +let Unsupported = true in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +} +} + multiclass UnsupportedSchedZbf { let Unsupported = true in { def : WriteRes; @@ -137,3 +249,97 @@ def : ReadAdvance; def : ReadAdvance; } } + +multiclass UnsupportedSchedZbm { +let Unsupported = true in { +def : WriteRes; + +def : ReadAdvance; +} +} + +multiclass UnsupportedSchedZbp { +let Unsupported = true in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +} +} + +multiclass UnsupportedSchedZbr { +let Unsupported = true in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +} +} + +multiclass UnsupportedSchedZbt { +let Unsupported = true in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +} +} diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index 976e4ccb1422..7589b44b81d3 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -15,6 +15,7 @@ #include "RISCVCallLowering.h" #include "RISCVFrameLowering.h" #include "RISCVLegalizerInfo.h" +#include "RISCVMacroFusion.h" #include "RISCVRegisterBankInfo.h" #include "RISCVTargetMachine.h" #include "llvm/MC/TargetRegistry.h" @@ -28,16 +29,21 @@ using namespace llvm; #define GET_SUBTARGETINFO_CTOR #include "RISCVGenSubtargetInfo.inc" -static cl::opt RVVVectorBitsMax( +static cl::opt EnableSubRegLiveness("riscv-enable-subreg-liveness", + cl::init(false), cl::Hidden); + +static cl::opt RVVVectorBitsMax( "riscv-v-vector-bits-max", cl::desc("Assume V extension vector registers are at most this big, " "with zero meaning no maximum size is assumed."), cl::init(0), cl::Hidden); -static cl::opt RVVVectorBitsMin( +static cl::opt RVVVectorBitsMin( "riscv-v-vector-bits-min", cl::desc("Assume V extension vector registers are at least this big, " - "with zero meaning no minimum size is assumed."), + "with zero meaning no minimum size is assumed. A value of -1 " + "means use Zvl*b extension. This is primarily used to enable " + "autovectorization with fixed width vectors."), cl::init(0), cl::Hidden); static cl::opt RVVVectorLMULMax( @@ -46,11 +52,6 @@ static cl::opt RVVVectorLMULMax( "Fractional LMUL values are not supported."), cl::init(8), cl::Hidden); -static cl::opt RVVVectorELENMax( - "riscv-v-fixed-length-vector-elen-max", - cl::desc("The maximum ELEN value to use for fixed length vectors."), - cl::init(64), cl::Hidden); - static cl::opt RISCVDisableUsingConstantPoolForLargeInts( "riscv-disable-using-constant-pool-for-large-ints", cl::desc("Disable using constant pool for large integers."), @@ -69,11 +70,8 @@ RISCVSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef CPU, StringRef ABIName) { // Determine default and user-specified characteristics bool Is64Bit = TT.isArch64Bit(); - if (CPU.empty()) + if (CPU.empty() || CPU == "generic") CPU = Is64Bit ? "generic-rv64" : "generic-rv32"; - if (CPU == "generic") - report_fatal_error(Twine("CPU 'generic' is not supported. Use ") + - (Is64Bit ? "generic-rv64" : "generic-rv32")); if (TuneCPU.empty()) TuneCPU = CPU; @@ -144,7 +142,7 @@ unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const { // ZvlLen specifies the minimum required vlen. The upper bound provided by // riscv-v-vector-bits-max should be no less than it. - if (RVVVectorBitsMax < ZvlLen) + if (RVVVectorBitsMax < (int)ZvlLen) report_fatal_error("riscv-v-vector-bits-max specified is lower " "than the Zvl*b limitation"); @@ -162,14 +160,18 @@ unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const { } unsigned RISCVSubtarget::getMinRVVVectorSizeInBits() const { + assert(hasVInstructions() && + "Tried to get vector length without Zve or V extension support!"); + + if (RVVVectorBitsMin == -1) + return ZvlLen; + // ZvlLen specifies the minimum required vlen. The lower bound provided by // riscv-v-vector-bits-min should be no less than it. - if (RVVVectorBitsMin != 0 && RVVVectorBitsMin < ZvlLen) + if (RVVVectorBitsMin != 0 && RVVVectorBitsMin < (int)ZvlLen) report_fatal_error("riscv-v-vector-bits-min specified is lower " "than the Zvl*b limitation"); - assert(hasVInstructions() && - "Tried to get vector length without Zve or V extension support!"); // FIXME: Change to >= 32 when VLEN = 32 is supported assert( (RVVVectorBitsMin == 0 || @@ -195,17 +197,19 @@ unsigned RISCVSubtarget::getMaxLMULForFixedLengthVectors() const { std::max(std::min(RVVVectorLMULMax, 8), 1)); } -unsigned RISCVSubtarget::getMaxELENForFixedLengthVectors() const { - assert(hasVInstructions() && - "Tried to get maximum ELEN without Zve or V extension support!"); - assert(RVVVectorELENMax <= 64 && RVVVectorELENMax >= 8 && - isPowerOf2_32(RVVVectorELENMax) && - "V extension requires a ELEN to be a power of 2 between 8 and 64!"); - unsigned ELEN = hasVInstructionsI64() ? 64 : 32; - return PowerOf2Floor( - std::max(std::min(RVVVectorELENMax, ELEN), 8)); -} - bool RISCVSubtarget::useRVVForFixedLengthVectors() const { return hasVInstructions() && getMinRVVVectorSizeInBits() != 0; } + +bool RISCVSubtarget::enableSubRegLiveness() const { + if (EnableSubRegLiveness.getNumOccurrences()) + return EnableSubRegLiveness; + // Enable subregister liveness for RVV to better handle LMUL>1 and segment + // load/store. + return hasVInstructions(); +} + +void RISCVSubtarget::getPostRAMutations( + std::vector> &Mutations) const { + Mutations.push_back(createRISCVMacroFusionDAGMutation()); +} diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 34c6e8e684ac..831f7fadaa62 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -20,7 +20,7 @@ #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" @@ -34,22 +34,6 @@ class StringRef; class RISCVSubtarget : public RISCVGenSubtargetInfo { public: - enum ExtZvl : unsigned { - NotSet = 0, - Zvl32b = 32, - Zvl64b = 64, - Zvl128b = 128, - Zvl256b = 256, - Zvl512b = 512, - Zvl1024b = 1024, - Zvl2048b = 2048, - Zvl4096b = 4096, - Zvl8192b = 8192, - Zvl16384b = 16384, - Zvl32768b = 32768, - Zvl65536b = 65536 - }; - enum RISCVProcFamilyEnum : uint8_t { Others, SiFive7, @@ -65,6 +49,7 @@ private: bool HasStdExtF = false; bool HasStdExtD = false; bool HasStdExtC = false; + bool HasStdExtZihintpause = false; bool HasStdExtZba = false; bool HasStdExtZbb = false; bool HasStdExtZbc = false; @@ -81,8 +66,13 @@ private: bool HasStdExtZve64x = false; bool HasStdExtZve64f = false; bool HasStdExtZve64d = false; + bool HasStdExtZvfh = false; bool HasStdExtZfhmin = false; bool HasStdExtZfh = false; + bool HasStdExtZfinx = false; + bool HasStdExtZdinx = false; + bool HasStdExtZhinxmin = false; + bool HasStdExtZhinx = false; bool HasStdExtZbkb = false; bool HasStdExtZbkc = false; bool HasStdExtZbkx = false; @@ -96,13 +86,19 @@ private: bool HasStdExtZks = false; bool HasStdExtZkt = false; bool HasStdExtZk = false; + bool HasStdExtZicbom = false; + bool HasStdExtZicboz = false; + bool HasStdExtZicbop = false; bool HasRV64 = false; bool IsRV32E = false; bool EnableLinkerRelax = false; bool EnableRVCHintInstrs = true; + bool EnableDefaultUnroll = true; bool EnableSaveRestore = false; + bool EnableUnalignedScalarMem = false; + bool HasLUIADDIFusion = false; unsigned XLen = 32; - ExtZvl ZvlLen = ExtZvl::NotSet; + unsigned ZvlLen = 0; MVT XLenVT = MVT::i32; uint8_t MaxInterleaveFactor = 2; RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown; @@ -157,6 +153,7 @@ public: bool hasStdExtD() const { return HasStdExtD; } bool hasStdExtC() const { return HasStdExtC; } bool hasStdExtV() const { return HasStdExtV; } + bool hasStdExtZihintpause() const { return HasStdExtZihintpause; } bool hasStdExtZba() const { return HasStdExtZba; } bool hasStdExtZbb() const { return HasStdExtZbb; } bool hasStdExtZbc() const { return HasStdExtZbc; } @@ -167,9 +164,14 @@ public: bool hasStdExtZbr() const { return HasStdExtZbr; } bool hasStdExtZbs() const { return HasStdExtZbs; } bool hasStdExtZbt() const { return HasStdExtZbt; } - bool hasStdExtZvl() const { return ZvlLen != ExtZvl::NotSet; } + bool hasStdExtZvl() const { return ZvlLen != 0; } + bool hasStdExtZvfh() const { return HasStdExtZvfh; } bool hasStdExtZfhmin() const { return HasStdExtZfhmin; } bool hasStdExtZfh() const { return HasStdExtZfh; } + bool hasStdExtZfinx() const { return HasStdExtZfinx; } + bool hasStdExtZdinx() const { return HasStdExtZdinx; } + bool hasStdExtZhinxmin() const { return HasStdExtZhinxmin; } + bool hasStdExtZhinx() const { return HasStdExtZhinx; } bool hasStdExtZbkb() const { return HasStdExtZbkb; } bool hasStdExtZbkc() const { return HasStdExtZbkc; } bool hasStdExtZbkx() const { return HasStdExtZbkx; } @@ -179,11 +181,17 @@ public: bool hasStdExtZksed() const { return HasStdExtZksed; } bool hasStdExtZksh() const { return HasStdExtZksh; } bool hasStdExtZkr() const { return HasStdExtZkr; } + bool hasStdExtZicbom() const { return HasStdExtZicbom; } + bool hasStdExtZicboz() const { return HasStdExtZicboz; } + bool hasStdExtZicbop() const { return HasStdExtZicbop; } bool is64Bit() const { return HasRV64; } bool isRV32E() const { return IsRV32E; } bool enableLinkerRelax() const { return EnableLinkerRelax; } bool enableRVCHintInstrs() const { return EnableRVCHintInstrs; } + bool enableDefaultUnroll() const { return EnableDefaultUnroll; } bool enableSaveRestore() const { return EnableSaveRestore; } + bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; } + bool hasLUIADDIFusion() const { return HasLUIADDIFusion; } MVT getXLenVT() const { return XLenVT; } unsigned getXLen() const { return XLen; } unsigned getFLen() const { @@ -195,27 +203,34 @@ public: return 0; } - unsigned getMinVLen() const { return ZvlLen; } + unsigned getELEN() const { + assert(hasVInstructions() && "Expected V extension"); + return hasVInstructionsI64() ? 64 : 32; + } + unsigned getRealMinVLen() const { + unsigned VLen = getMinRVVVectorSizeInBits(); + return VLen == 0 ? getArchMinVLen() : VLen; + } + unsigned getRealMaxVLen() const { + unsigned VLen = getMaxRVVVectorSizeInBits(); + return VLen == 0 ? getArchMaxVLen() : VLen; + } RISCVABI::ABI getTargetABI() const { return TargetABI; } bool isRegisterReservedByUser(Register i) const { assert(i < RISCV::NUM_TARGET_REGS && "Register out of range"); return UserReservedRegister[i]; } + bool hasMacroFusion() const { return hasLUIADDIFusion(); } + // Vector codegen related methods. - bool hasVInstructions() const { return HasStdExtV || HasStdExtZve32x; } - bool hasVInstructionsI64() const { return HasStdExtV || HasStdExtZve64x; } - bool hasVInstructionsF16() const { - return (HasStdExtV || HasStdExtZve32f) && HasStdExtZfh; - } + bool hasVInstructions() const { return HasStdExtZve32x; } + bool hasVInstructionsI64() const { return HasStdExtZve64x; } + bool hasVInstructionsF16() const { return HasStdExtZvfh && HasStdExtZfh; } // FIXME: Consider Zfinx in the future - bool hasVInstructionsF32() const { - return HasStdExtV || (HasStdExtZve32f && HasStdExtF); - } + bool hasVInstructionsF32() const { return HasStdExtZve32f && HasStdExtF; } // FIXME: Consider Zdinx in the future - bool hasVInstructionsF64() const { - return HasStdExtV || (HasStdExtZve64d && HasStdExtD); - } + bool hasVInstructionsF64() const { return HasStdExtZve64d && HasStdExtD; } // F16 and F64 both require F32. bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); } unsigned getMaxInterleaveFactor() const { @@ -229,6 +244,18 @@ protected: std::unique_ptr Legalizer; std::unique_ptr RegBankInfo; + // Return the known range for the bit length of RVV data registers as set + // at the command line. A value of 0 means nothing is known about that particular + // limit beyond what's implied by the architecture. + // NOTE: Please use getRealMinVLen and getRealMaxVLen instead! + unsigned getMaxRVVVectorSizeInBits() const; + unsigned getMinRVVVectorSizeInBits() const; + + // Return the known range for the bit length of RVV data registers as indicated + // by -march and -mattr. + unsigned getArchMinVLen() const { return ZvlLen; } + unsigned getArchMaxVLen() const { return 65536; } + public: const CallLowering *getCallLowering() const override; InstructionSelector *getInstructionSelector() const override; @@ -241,14 +268,13 @@ public: // pool if exceeded. unsigned getMaxBuildIntsCost() const; - // Return the known range for the bit length of RVV data registers. A value - // of 0 means nothing is known about that particular limit beyond what's - // implied by the architecture. - unsigned getMaxRVVVectorSizeInBits() const; - unsigned getMinRVVVectorSizeInBits() const; unsigned getMaxLMULForFixedLengthVectors() const; - unsigned getMaxELENForFixedLengthVectors() const; bool useRVVForFixedLengthVectors() const; + + bool enableSubRegLiveness() const override; + + void getPostRAMutations(std::vector> + &Mutations) const override; }; } // End llvm namespace diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index db5e2f1eeb6f..b2707b753e87 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -13,6 +13,8 @@ #include "RISCVTargetMachine.h" #include "MCTargetDesc/RISCVBaseInfo.h" #include "RISCV.h" +#include "RISCVMachineFunctionInfo.h" +#include "RISCVMacroFusion.h" #include "RISCVTargetObjectFile.h" #include "RISCVTargetTransformInfo.h" #include "TargetInfo/RISCVTargetInfo.h" @@ -22,6 +24,8 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#include "llvm/CodeGen/MIRParser/MIParser.h" +#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -30,13 +34,20 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; +static cl::opt EnableRedundantCopyElimination( + "riscv-enable-copyelim", + cl::desc("Enable the redundant copy elimination pass"), cl::init(true), + cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine X(getTheRISCV32Target()); RegisterTargetMachine Y(getTheRISCV64Target()); auto *PR = PassRegistry::getPassRegistry(); initializeGlobalISel(*PR); + initializeRISCVMakeCompressibleOptPass(*PR); initializeRISCVGatherScatterLoweringPass(*PR); initializeRISCVMergeBaseOffsetOptPass(*PR); initializeRISCVSExtWRemovalPass(*PR); @@ -53,9 +64,7 @@ static StringRef computeDataLayout(const Triple &TT) { static Reloc::Model getEffectiveRelocModel(const Triple &TT, Optional RM) { - if (!RM.hasValue()) - return Reloc::Static; - return *RM; + return RM.value_or(Reloc::Static); } RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT, @@ -72,6 +81,7 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT, // RISC-V supports the MachineOutliner. setMachineOutliner(true); + setSupportsDefaultOutlining(true); } const RISCVSubtarget * @@ -109,7 +119,7 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const { } TargetTransformInfo -RISCVTargetMachine::getTargetTransformInfo(const Function &F) { +RISCVTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(RISCVTTIImpl(this, F)); } @@ -132,7 +142,30 @@ public: return getTM(); } + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + const RISCVSubtarget &ST = C->MF->getSubtarget(); + if (ST.hasMacroFusion()) { + ScheduleDAGMILive *DAG = createGenericSchedLive(C); + DAG->addMutation(createRISCVMacroFusionDAGMutation()); + return DAG; + } + return nullptr; + } + + ScheduleDAGInstrs * + createPostMachineScheduler(MachineSchedContext *C) const override { + const RISCVSubtarget &ST = C->MF->getSubtarget(); + if (ST.hasMacroFusion()) { + ScheduleDAGMI *DAG = createGenericSchedPostRA(C); + DAG->addMutation(createRISCVMacroFusionDAGMutation()); + return DAG; + } + return nullptr; + } + void addIRPasses() override; + bool addPreISel() override; bool addInstSelector() override; bool addIRTranslator() override; bool addLegalizeMachineIR() override; @@ -143,6 +176,7 @@ public: void addPreSched2() override; void addMachineSSAOptimization() override; void addPreRegAlloc() override; + void addPostRegAlloc() override; }; } // namespace @@ -158,8 +192,18 @@ void RISCVPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); } +bool RISCVPassConfig::addPreISel() { + if (TM->getOptLevel() != CodeGenOpt::None) { + // Add a barrier before instruction selection so that we will not get + // deleted block address after enabling default outlining. See D99707 for + // more details. + addPass(createBarrierNoopPass()); + } + return false; +} + bool RISCVPassConfig::addInstSelector() { - addPass(createRISCVISelDag(getRISCVTargetMachine())); + addPass(createRISCVISelDag(getRISCVTargetMachine(), getOptLevel())); return false; } @@ -186,7 +230,10 @@ bool RISCVPassConfig::addGlobalInstructionSelect() { void RISCVPassConfig::addPreSched2() {} -void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } +void RISCVPassConfig::addPreEmitPass() { + addPass(&BranchRelaxationPassID); + addPass(createRISCVMakeCompressibleOptPass()); +} void RISCVPassConfig::addPreEmitPass2() { addPass(createRISCVExpandPseudoPass()); @@ -208,3 +255,28 @@ void RISCVPassConfig::addPreRegAlloc() { addPass(createRISCVMergeBaseOffsetOptPass()); addPass(createRISCVInsertVSETVLIPass()); } + +void RISCVPassConfig::addPostRegAlloc() { + if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination) + addPass(createRISCVRedundantCopyEliminationPass()); +} + +yaml::MachineFunctionInfo * +RISCVTargetMachine::createDefaultFuncInfoYAML() const { + return new yaml::RISCVMachineFunctionInfo(); +} + +yaml::MachineFunctionInfo * +RISCVTargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const { + const auto *MFI = MF.getInfo(); + return new yaml::RISCVMachineFunctionInfo(*MFI); +} + +bool RISCVTargetMachine::parseMachineFunctionInfo( + const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, + SMDiagnostic &Error, SMRange &SourceRange) const { + const auto &YamlMFI = + static_cast(MFI); + PFS.MF.getInfo()->initializeBaseYamlFields(YamlMFI); + return false; +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/llvm/lib/Target/RISCV/RISCVTargetMachine.h index 3156333f7ee1..087646fb5ed9 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.h +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.h @@ -42,10 +42,18 @@ public: return TLOF.get(); } - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DstAS) const override; + + yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override; + yaml::MachineFunctionInfo * + convertFuncInfoToYAML(const MachineFunction &MF) const override; + bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, + PerFunctionMIParsingState &PFS, + SMDiagnostic &Error, + SMRange &SourceRange) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 99e6774a02e4..29d3c5e491de 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -11,6 +11,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/TargetLowering.h" +#include using namespace llvm; #define DEBUG_TYPE "riscvtti" @@ -131,19 +132,17 @@ bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const { } Optional RISCVTTIImpl::getMaxVScale() const { - // There is no assumption of the maximum vector length in V specification. - // We use the value specified by users as the maximum vector length. - // This function will use the assumed maximum vector length to get the - // maximum vscale for LoopVectorizer. - // If users do not specify the maximum vector length, we have no way to - // know whether the LoopVectorizer is safe to do or not. - // We only consider to use single vector register (LMUL = 1) to vectorize. - unsigned MaxVectorSizeInBits = ST->getMaxRVVVectorSizeInBits(); - if (ST->hasVInstructions() && MaxVectorSizeInBits != 0) - return MaxVectorSizeInBits / RISCV::RVVBitsPerBlock; + if (ST->hasVInstructions()) + return ST->getRealMaxVLen() / RISCV::RVVBitsPerBlock; return BaseT::getMaxVScale(); } +Optional RISCVTTIImpl::getVScaleForTuning() const { + if (ST->hasVInstructions()) + return ST->getRealMinVLen() / RISCV::RVVBitsPerBlock; + return BaseT::getVScaleForTuning(); +} + TypeSize RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { unsigned LMUL = PowerOf2Floor( @@ -153,7 +152,7 @@ RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { return TypeSize::getFixed(ST->getXLen()); case TargetTransformInfo::RGK_FixedWidthVector: return TypeSize::getFixed( - ST->hasVInstructions() ? LMUL * ST->getMinRVVVectorSizeInBits() : 0); + ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0); case TargetTransformInfo::RGK_ScalableVector: return TypeSize::getScalable( ST->hasVInstructions() ? LMUL * RISCV::RVVBitsPerBlock : 0); @@ -162,6 +161,61 @@ RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { llvm_unreachable("Unsupported register kind"); } +InstructionCost RISCVTTIImpl::getSpliceCost(VectorType *Tp, int Index) { + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + + unsigned Cost = 2; // vslidedown+vslideup. + // TODO: LMUL should increase cost. + // TODO: Multiplying by LT.first implies this legalizes into multiple copies + // of similar code, but I think we expand through memory. + return Cost * LT.first; +} + +InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, + VectorType *Tp, ArrayRef Mask, + int Index, VectorType *SubTp, + ArrayRef Args) { + if (isa(Tp)) { + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + switch (Kind) { + default: + // Fallthrough to generic handling. + // TODO: Most of these cases will return getInvalid in generic code, and + // must be implemented here. + break; + case TTI::SK_Broadcast: { + return LT.first * 1; + } + case TTI::SK_Splice: + return getSpliceCost(Tp, Index); + case TTI::SK_Reverse: + // Most of the cost here is producing the vrgather index register + // Example sequence: + // csrr a0, vlenb + // srli a0, a0, 3 + // addi a0, a0, -1 + // vsetvli a1, zero, e8, mf8, ta, mu (ignored) + // vid.v v9 + // vrsub.vx v10, v9, a0 + // vrgather.vv v9, v8, v10 + return LT.first * 6; + } + } + + return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp); +} + +InstructionCost +RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind) { + if (!isa(Src)) + return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); + + return getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); +} + InstructionCost RISCVTTIImpl::getGatherScatterOpCost( unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) { @@ -176,31 +230,152 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost( return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I); - // FIXME: Only supporting fixed vectors for now. - if (!isa(DataTy)) - return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, - Alignment, CostKind, I); - - auto *VTy = cast(DataTy); - unsigned NumLoads = VTy->getNumElements(); - InstructionCost MemOpCost = - getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0, CostKind, I); + // Cost is proportional to the number of memory operations implied. For + // scalable vectors, we use an upper bound on that number since we don't + // know exactly what VL will be. + auto &VTy = *cast(DataTy); + InstructionCost MemOpCost = getMemoryOpCost(Opcode, VTy.getElementType(), + Alignment, 0, CostKind, I); + unsigned NumLoads = getMaxVLFor(&VTy); return NumLoads * MemOpCost; } +InstructionCost +RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) { + auto *RetTy = ICA.getReturnType(); + switch (ICA.getID()) { + // TODO: add more intrinsic + case Intrinsic::experimental_stepvector: { + unsigned Cost = 1; // vid + auto LT = TLI->getTypeLegalizationCost(DL, RetTy); + return Cost + (LT.first - 1); + } + default: + break; + } + return BaseT::getIntrinsicInstrCost(ICA, CostKind); +} + +InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src, + TTI::CastContextHint CCH, + TTI::TargetCostKind CostKind, + const Instruction *I) { + if (isa(Dst) && isa(Src)) { + // FIXME: Need to compute legalizing cost for illegal types. + if (!isTypeLegal(Src) || !isTypeLegal(Dst)) + return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); + + // Skip if element size of Dst or Src is bigger than ELEN. + if (Src->getScalarSizeInBits() > ST->getELEN() || + Dst->getScalarSizeInBits() > ST->getELEN()) + return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // FIXME: Need to consider vsetvli and lmul. + int PowDiff = (int)Log2_32(Dst->getScalarSizeInBits()) - + (int)Log2_32(Src->getScalarSizeInBits()); + switch (ISD) { + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + return 1; + case ISD::TRUNCATE: + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + // Counts of narrow/widen instructions. + return std::abs(PowDiff); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + if (std::abs(PowDiff) <= 1) + return 1; + // Backend could lower (v[sz]ext i8 to double) to vfcvt(v[sz]ext.f8 i8), + // so it only need two conversion. + if (Src->isIntOrIntVectorTy()) + return 2; + // Counts of narrow/widen instructions. + return std::abs(PowDiff); + } + } + return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); +} + +unsigned RISCVTTIImpl::getMaxVLFor(VectorType *Ty) { + if (isa(Ty)) { + const unsigned EltSize = DL.getTypeSizeInBits(Ty->getElementType()); + const unsigned MinSize = DL.getTypeSizeInBits(Ty).getKnownMinValue(); + const unsigned VectorBitsMax = ST->getRealMaxVLen(); + return RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); + } + return cast(Ty)->getNumElements(); +} + +InstructionCost +RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, + bool IsUnsigned, + TTI::TargetCostKind CostKind) { + if (isa(Ty) && !ST->useRVVForFixedLengthVectors()) + return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind); + + // Skip if scalar size of Ty is bigger than ELEN. + if (Ty->getScalarSizeInBits() > ST->getELEN()) + return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind); + + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + if (Ty->getElementType()->isIntegerTy(1)) + // vcpop sequences, see vreduction-mask.ll. umax, smin actually only + // cost 2, but we don't have enough info here so we slightly over cost. + return (LT.first - 1) + 3; + + // IR Reduction is composed by two vmv and one rvv reduction instruction. + InstructionCost BaseCost = 2; + unsigned VL = getMaxVLFor(Ty); + return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL); +} + +InstructionCost +RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + Optional FMF, + TTI::TargetCostKind CostKind) { + if (isa(Ty) && !ST->useRVVForFixedLengthVectors()) + return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); + + // Skip if scalar size of Ty is bigger than ELEN. + if (Ty->getScalarSizeInBits() > ST->getELEN()) + return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + if (ISD != ISD::ADD && ISD != ISD::OR && ISD != ISD::XOR && ISD != ISD::AND && + ISD != ISD::FADD) + return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); + + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + if (Ty->getElementType()->isIntegerTy(1)) + // vcpop sequences, see vreduction-mask.ll + return (LT.first - 1) + (ISD == ISD::AND ? 3 : 2); + + // IR Reduction is composed by two vmv and one rvv reduction instruction. + InstructionCost BaseCost = 2; + unsigned VL = getMaxVLFor(Ty); + if (TTI::requiresOrderedReduction(FMF)) + return (LT.first - 1) + BaseCost + VL; + return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL); +} + void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) { // TODO: More tuning on benchmarks and metrics with changes as needed // would apply to all settings below to enable performance. - // Support explicit targets enabled for SiFive with the unrolling preferences - // below - bool UseDefaultPreferences = true; - if (ST->getProcFamily() == RISCVSubtarget::SiFive7) - UseDefaultPreferences = false; - if (UseDefaultPreferences) + if (ST->enableDefaultUnroll()) return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE); // Enable Upper bound unrolling universally, not dependant upon the conditions @@ -276,14 +451,14 @@ void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, BaseT::getPeelingPreferences(L, SE, PP); } -InstructionCost RISCVTTIImpl::getRegUsageForType(Type *Ty) { +unsigned RISCVTTIImpl::getRegUsageForType(Type *Ty) { TypeSize Size = Ty->getPrimitiveSizeInBits(); if (Ty->isVectorTy()) { if (Size.isScalable() && ST->hasVInstructions()) return divideCeil(Size.getKnownMinValue(), RISCV::RVVBitsPerBlock); if (ST->useRVVForFixedLengthVectors()) - return divideCeil(Size, ST->getMinRVVVectorSizeInBits()); + return divideCeil(Size, ST->getRealMinVLen()); } return BaseT::getRegUsageForType(Ty); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index e79c4f75712b..7caf0fedb2ca 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -37,6 +37,7 @@ class RISCVTTIImpl : public BasicTTIImplBase { const RISCVSubtarget *getST() const { return ST; } const RISCVTargetLowering *getTLI() const { return TLI; } + unsigned getMaxVLFor(VectorType *Ty); public: explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), @@ -57,10 +58,15 @@ public: bool shouldExpandReduction(const IntrinsicInst *II) const; bool supportsScalableVectors() const { return ST->hasVInstructions(); } Optional getMaxVScale() const; + Optional getVScaleForTuning() const; TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; - InstructionCost getRegUsageForType(Type *Ty); + unsigned getRegUsageForType(Type *Ty); + + InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + Align Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, @@ -73,24 +79,50 @@ public: return ST->useRVVForFixedLengthVectors() ? 16 : 0; } + InstructionCost getSpliceCost(VectorType *Tp, int Index); + InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, + ArrayRef Mask, int Index, + VectorType *SubTp, + ArrayRef Args = None); + + InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind); + InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I); + InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::CastContextHint CCH, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); + + InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, + bool IsUnsigned, + TTI::TargetCostKind CostKind); + + InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + Optional FMF, + TTI::TargetCostKind CostKind); + + bool isElementTypeLegalForScalableVector(Type *Ty) const { + return TLI->isLegalElementTypeForRVV(Ty); + } + bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) { if (!ST->hasVInstructions()) return false; // Only support fixed vectors if we know the minimum vector size. - if (isa(DataType) && ST->getMinRVVVectorSizeInBits() == 0) + if (isa(DataType) && !ST->useRVVForFixedLengthVectors()) return false; // Don't allow elements larger than the ELEN. // FIXME: How to limit for scalable vectors? if (isa(DataType) && - DataType->getScalarSizeInBits() > ST->getMaxELENForFixedLengthVectors()) + DataType->getScalarSizeInBits() > ST->getELEN()) return false; if (Alignment < @@ -112,13 +144,13 @@ public: return false; // Only support fixed vectors if we know the minimum vector size. - if (isa(DataType) && ST->getMinRVVVectorSizeInBits() == 0) + if (isa(DataType) && !ST->useRVVForFixedLengthVectors()) return false; // Don't allow elements larger than the ELEN. // FIXME: How to limit for scalable vectors? if (isa(DataType) && - DataType->getScalarSizeInBits() > ST->getMaxELENForFixedLengthVectors()) + DataType->getScalarSizeInBits() > ST->getELEN()) return false; if (Alignment < @@ -135,6 +167,16 @@ public: return isLegalMaskedGatherScatter(DataType, Alignment); } + bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) { + // Scalarize masked gather for RV64 if EEW=64 indices aren't supported. + return ST->is64Bit() && !ST->hasVInstructionsI64(); + } + + bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) { + // Scalarize masked scatter for RV64 if EEW=64 indices aren't supported. + return ST->is64Bit() && !ST->hasVInstructionsI64(); + } + /// \returns How the target needs this vector-predicated operation to be /// transformed. TargetTransformInfo::VPLegalization @@ -145,9 +187,6 @@ public: bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const { - if (!ST->hasVInstructions()) - return false; - if (!VF.isScalable()) return true; @@ -179,18 +218,53 @@ public: return VF == 1 ? 1 : ST->getMaxInterleaveFactor(); } - // TODO: We should define RISC-V's own register classes. - // e.g. register class for FPR. + enum RISCVRegisterClass { GPRRC, FPRRC, VRRC }; unsigned getNumberOfRegisters(unsigned ClassID) const { - bool Vector = (ClassID == 1); - if (Vector) { - if (ST->hasVInstructions()) + switch (ClassID) { + case RISCVRegisterClass::GPRRC: + // 31 = 32 GPR - x0 (zero register) + // FIXME: Should we exclude fixed registers like SP, TP or GP? + return 31; + case RISCVRegisterClass::FPRRC: + if (ST->hasStdExtF()) return 32; return 0; + case RISCVRegisterClass::VRRC: + // Although there are 32 vector registers, v0 is special in that it is the + // only register that can be used to hold a mask. + // FIXME: Should we conservatively return 31 as the number of usable + // vector registers? + return ST->hasVInstructions() ? 32 : 0; + } + llvm_unreachable("unknown register class"); + } + + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { + if (Vector) + return RISCVRegisterClass::VRRC; + if (!Ty) + return RISCVRegisterClass::GPRRC; + + Type *ScalarTy = Ty->getScalarType(); + if ((ScalarTy->isHalfTy() && ST->hasStdExtZfh()) || + (ScalarTy->isFloatTy() && ST->hasStdExtF()) || + (ScalarTy->isDoubleTy() && ST->hasStdExtD())) { + return RISCVRegisterClass::FPRRC; + } + + return RISCVRegisterClass::GPRRC; + } + + const char *getRegisterClassName(unsigned ClassID) const { + switch (ClassID) { + case RISCVRegisterClass::GPRRC: + return "RISCV::GPRRC"; + case RISCVRegisterClass::FPRRC: + return "RISCV::FPRRC"; + case RISCVRegisterClass::VRRC: + return "RISCV::VRRC"; } - // 31 = 32 GPR - x0 (zero register) - // FIXME: Should we exclude fixed registers like SP, TP or GP? - return 31; + llvm_unreachable("unknown register class"); } }; diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp new file mode 100644 index 000000000000..4156a0026411 --- /dev/null +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp @@ -0,0 +1,63 @@ +//===-- SPIRVAsmBackend.cpp - SPIR-V Assembler Backend ---------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SPIRVMCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/Support/EndianStream.h" + +using namespace llvm; + +namespace { + +class SPIRVAsmBackend : public MCAsmBackend { +public: + SPIRVAsmBackend(support::endianness Endian) : MCAsmBackend(Endian) {} + + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved, + const MCSubtargetInfo *STI) const override {} + + std::unique_ptr + createObjectTargetWriter() const override { + return createSPIRVObjectTargetWriter(); + } + + // No instruction requires relaxation. + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + return false; + } + + unsigned getNumFixupKinds() const override { return 1; } + + bool mayNeedRelaxation(const MCInst &Inst, + const MCSubtargetInfo &STI) const override { + return false; + } + + void relaxInstruction(MCInst &Inst, + const MCSubtargetInfo &STI) const override {} + + bool writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const override { + return false; + } +}; + +} // end anonymous namespace + +MCAsmBackend *llvm::createSPIRVAsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &) { + return new SPIRVAsmBackend(support::little); +} diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp new file mode 100644 index 000000000000..1a3e35a5f901 --- /dev/null +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp @@ -0,0 +1,1072 @@ +//===-- SPIRVBaseInfo.cpp - Top level definitions for SPIRV ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the SPIRV target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVBaseInfo.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { +namespace SPIRV { + +#define CASE(CLASS, ATTR) \ + case CLASS::ATTR: \ + return #ATTR; +#define CASE_SUF(CLASS, SF, ATTR) \ + case CLASS::SF##_##ATTR: \ + return #ATTR; + +// Implement getEnumName(Enum e) helper functions. +// TODO: re-implement all the functions using TableGen. +StringRef getCapabilityName(Capability e) { + switch (e) { + CASE(Capability, Matrix) + CASE(Capability, Shader) + CASE(Capability, Geometry) + CASE(Capability, Tessellation) + CASE(Capability, Addresses) + CASE(Capability, Linkage) + CASE(Capability, Kernel) + CASE(Capability, Vector16) + CASE(Capability, Float16Buffer) + CASE(Capability, Float16) + CASE(Capability, Float64) + CASE(Capability, Int64) + CASE(Capability, Int64Atomics) + CASE(Capability, ImageBasic) + CASE(Capability, ImageReadWrite) + CASE(Capability, ImageMipmap) + CASE(Capability, Pipes) + CASE(Capability, Groups) + CASE(Capability, DeviceEnqueue) + CASE(Capability, LiteralSampler) + CASE(Capability, AtomicStorage) + CASE(Capability, Int16) + CASE(Capability, TessellationPointSize) + CASE(Capability, GeometryPointSize) + CASE(Capability, ImageGatherExtended) + CASE(Capability, StorageImageMultisample) + CASE(Capability, UniformBufferArrayDynamicIndexing) + CASE(Capability, SampledImageArrayDymnamicIndexing) + CASE(Capability, ClipDistance) + CASE(Capability, CullDistance) + CASE(Capability, ImageCubeArray) + CASE(Capability, SampleRateShading) + CASE(Capability, ImageRect) + CASE(Capability, SampledRect) + CASE(Capability, GenericPointer) + CASE(Capability, Int8) + CASE(Capability, InputAttachment) + CASE(Capability, SparseResidency) + CASE(Capability, MinLod) + CASE(Capability, Sampled1D) + CASE(Capability, Image1D) + CASE(Capability, SampledCubeArray) + CASE(Capability, SampledBuffer) + CASE(Capability, ImageBuffer) + CASE(Capability, ImageMSArray) + CASE(Capability, StorageImageExtendedFormats) + CASE(Capability, ImageQuery) + CASE(Capability, DerivativeControl) + CASE(Capability, InterpolationFunction) + CASE(Capability, TransformFeedback) + CASE(Capability, GeometryStreams) + CASE(Capability, StorageImageReadWithoutFormat) + CASE(Capability, StorageImageWriteWithoutFormat) + CASE(Capability, MultiViewport) + CASE(Capability, SubgroupDispatch) + CASE(Capability, NamedBarrier) + CASE(Capability, PipeStorage) + CASE(Capability, GroupNonUniform) + CASE(Capability, GroupNonUniformVote) + CASE(Capability, GroupNonUniformArithmetic) + CASE(Capability, GroupNonUniformBallot) + CASE(Capability, GroupNonUniformShuffle) + CASE(Capability, GroupNonUniformShuffleRelative) + CASE(Capability, GroupNonUniformClustered) + CASE(Capability, GroupNonUniformQuad) + CASE(Capability, SubgroupBallotKHR) + CASE(Capability, DrawParameters) + CASE(Capability, SubgroupVoteKHR) + CASE(Capability, StorageBuffer16BitAccess) + CASE(Capability, StorageUniform16) + CASE(Capability, StoragePushConstant16) + CASE(Capability, StorageInputOutput16) + CASE(Capability, DeviceGroup) + CASE(Capability, MultiView) + CASE(Capability, VariablePointersStorageBuffer) + CASE(Capability, VariablePointers) + CASE(Capability, AtomicStorageOps) + CASE(Capability, SampleMaskPostDepthCoverage) + CASE(Capability, StorageBuffer8BitAccess) + CASE(Capability, UniformAndStorageBuffer8BitAccess) + CASE(Capability, StoragePushConstant8) + CASE(Capability, DenormPreserve) + CASE(Capability, DenormFlushToZero) + CASE(Capability, SignedZeroInfNanPreserve) + CASE(Capability, RoundingModeRTE) + CASE(Capability, RoundingModeRTZ) + CASE(Capability, Float16ImageAMD) + CASE(Capability, ImageGatherBiasLodAMD) + CASE(Capability, FragmentMaskAMD) + CASE(Capability, StencilExportEXT) + CASE(Capability, ImageReadWriteLodAMD) + CASE(Capability, SampleMaskOverrideCoverageNV) + CASE(Capability, GeometryShaderPassthroughNV) + CASE(Capability, ShaderViewportIndexLayerEXT) + CASE(Capability, ShaderViewportMaskNV) + CASE(Capability, ShaderStereoViewNV) + CASE(Capability, PerViewAttributesNV) + CASE(Capability, FragmentFullyCoveredEXT) + CASE(Capability, MeshShadingNV) + CASE(Capability, ShaderNonUniformEXT) + CASE(Capability, RuntimeDescriptorArrayEXT) + CASE(Capability, InputAttachmentArrayDynamicIndexingEXT) + CASE(Capability, UniformTexelBufferArrayDynamicIndexingEXT) + CASE(Capability, StorageTexelBufferArrayDynamicIndexingEXT) + CASE(Capability, UniformBufferArrayNonUniformIndexingEXT) + CASE(Capability, SampledImageArrayNonUniformIndexingEXT) + CASE(Capability, StorageBufferArrayNonUniformIndexingEXT) + CASE(Capability, StorageImageArrayNonUniformIndexingEXT) + CASE(Capability, InputAttachmentArrayNonUniformIndexingEXT) + CASE(Capability, UniformTexelBufferArrayNonUniformIndexingEXT) + CASE(Capability, StorageTexelBufferArrayNonUniformIndexingEXT) + CASE(Capability, RayTracingNV) + CASE(Capability, SubgroupShuffleINTEL) + CASE(Capability, SubgroupBufferBlockIOINTEL) + CASE(Capability, SubgroupImageBlockIOINTEL) + CASE(Capability, SubgroupImageMediaBlockIOINTEL) + CASE(Capability, SubgroupAvcMotionEstimationINTEL) + CASE(Capability, SubgroupAvcMotionEstimationIntraINTEL) + CASE(Capability, SubgroupAvcMotionEstimationChromaINTEL) + CASE(Capability, GroupNonUniformPartitionedNV) + CASE(Capability, VulkanMemoryModelKHR) + CASE(Capability, VulkanMemoryModelDeviceScopeKHR) + CASE(Capability, ImageFootprintNV) + CASE(Capability, FragmentBarycentricNV) + CASE(Capability, ComputeDerivativeGroupQuadsNV) + CASE(Capability, ComputeDerivativeGroupLinearNV) + CASE(Capability, FragmentDensityEXT) + CASE(Capability, PhysicalStorageBufferAddressesEXT) + CASE(Capability, CooperativeMatrixNV) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getSourceLanguageName(SourceLanguage e) { + switch (e) { + CASE(SourceLanguage, Unknown) + CASE(SourceLanguage, ESSL) + CASE(SourceLanguage, GLSL) + CASE(SourceLanguage, OpenCL_C) + CASE(SourceLanguage, OpenCL_CPP) + CASE(SourceLanguage, HLSL) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getExecutionModelName(ExecutionModel e) { + switch (e) { + CASE(ExecutionModel, Vertex) + CASE(ExecutionModel, TessellationControl) + CASE(ExecutionModel, TessellationEvaluation) + CASE(ExecutionModel, Geometry) + CASE(ExecutionModel, Fragment) + CASE(ExecutionModel, GLCompute) + CASE(ExecutionModel, Kernel) + CASE(ExecutionModel, TaskNV) + CASE(ExecutionModel, MeshNV) + CASE(ExecutionModel, RayGenerationNV) + CASE(ExecutionModel, IntersectionNV) + CASE(ExecutionModel, AnyHitNV) + CASE(ExecutionModel, ClosestHitNV) + CASE(ExecutionModel, MissNV) + CASE(ExecutionModel, CallableNV) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getAddressingModelName(AddressingModel e) { + switch (e) { + CASE(AddressingModel, Logical) + CASE(AddressingModel, Physical32) + CASE(AddressingModel, Physical64) + CASE(AddressingModel, PhysicalStorageBuffer64EXT) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getMemoryModelName(MemoryModel e) { + switch (e) { + CASE(MemoryModel, Simple) + CASE(MemoryModel, GLSL450) + CASE(MemoryModel, OpenCL) + CASE(MemoryModel, VulkanKHR) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getExecutionModeName(ExecutionMode e) { + switch (e) { + CASE(ExecutionMode, Invocations) + CASE(ExecutionMode, SpacingEqual) + CASE(ExecutionMode, SpacingFractionalEven) + CASE(ExecutionMode, SpacingFractionalOdd) + CASE(ExecutionMode, VertexOrderCw) + CASE(ExecutionMode, VertexOrderCcw) + CASE(ExecutionMode, PixelCenterInteger) + CASE(ExecutionMode, OriginUpperLeft) + CASE(ExecutionMode, OriginLowerLeft) + CASE(ExecutionMode, EarlyFragmentTests) + CASE(ExecutionMode, PointMode) + CASE(ExecutionMode, Xfb) + CASE(ExecutionMode, DepthReplacing) + CASE(ExecutionMode, DepthGreater) + CASE(ExecutionMode, DepthLess) + CASE(ExecutionMode, DepthUnchanged) + CASE(ExecutionMode, LocalSize) + CASE(ExecutionMode, LocalSizeHint) + CASE(ExecutionMode, InputPoints) + CASE(ExecutionMode, InputLines) + CASE(ExecutionMode, InputLinesAdjacency) + CASE(ExecutionMode, Triangles) + CASE(ExecutionMode, InputTrianglesAdjacency) + CASE(ExecutionMode, Quads) + CASE(ExecutionMode, Isolines) + CASE(ExecutionMode, OutputVertices) + CASE(ExecutionMode, OutputPoints) + CASE(ExecutionMode, OutputLineStrip) + CASE(ExecutionMode, OutputTriangleStrip) + CASE(ExecutionMode, VecTypeHint) + CASE(ExecutionMode, ContractionOff) + CASE(ExecutionMode, Initializer) + CASE(ExecutionMode, Finalizer) + CASE(ExecutionMode, SubgroupSize) + CASE(ExecutionMode, SubgroupsPerWorkgroup) + CASE(ExecutionMode, SubgroupsPerWorkgroupId) + CASE(ExecutionMode, LocalSizeId) + CASE(ExecutionMode, LocalSizeHintId) + CASE(ExecutionMode, PostDepthCoverage) + CASE(ExecutionMode, DenormPreserve) + CASE(ExecutionMode, DenormFlushToZero) + CASE(ExecutionMode, SignedZeroInfNanPreserve) + CASE(ExecutionMode, RoundingModeRTE) + CASE(ExecutionMode, RoundingModeRTZ) + CASE(ExecutionMode, StencilRefReplacingEXT) + CASE(ExecutionMode, OutputLinesNV) + CASE(ExecutionMode, DerivativeGroupQuadsNV) + CASE(ExecutionMode, DerivativeGroupLinearNV) + CASE(ExecutionMode, OutputTrianglesNV) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getStorageClassName(StorageClass e) { + switch (e) { + CASE(StorageClass, UniformConstant) + CASE(StorageClass, Input) + CASE(StorageClass, Uniform) + CASE(StorageClass, Output) + CASE(StorageClass, Workgroup) + CASE(StorageClass, CrossWorkgroup) + CASE(StorageClass, Private) + CASE(StorageClass, Function) + CASE(StorageClass, Generic) + CASE(StorageClass, PushConstant) + CASE(StorageClass, AtomicCounter) + CASE(StorageClass, Image) + CASE(StorageClass, StorageBuffer) + CASE(StorageClass, CallableDataNV) + CASE(StorageClass, IncomingCallableDataNV) + CASE(StorageClass, RayPayloadNV) + CASE(StorageClass, HitAttributeNV) + CASE(StorageClass, IncomingRayPayloadNV) + CASE(StorageClass, ShaderRecordBufferNV) + CASE(StorageClass, PhysicalStorageBufferEXT) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getDimName(Dim dim) { + switch (dim) { + CASE_SUF(Dim, DIM, 1D) + CASE_SUF(Dim, DIM, 2D) + CASE_SUF(Dim, DIM, 3D) + CASE_SUF(Dim, DIM, Cube) + CASE_SUF(Dim, DIM, Rect) + CASE_SUF(Dim, DIM, Buffer) + CASE_SUF(Dim, DIM, SubpassData) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getSamplerAddressingModeName(SamplerAddressingMode e) { + switch (e) { + CASE(SamplerAddressingMode, None) + CASE(SamplerAddressingMode, ClampToEdge) + CASE(SamplerAddressingMode, Clamp) + CASE(SamplerAddressingMode, Repeat) + CASE(SamplerAddressingMode, RepeatMirrored) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getSamplerFilterModeName(SamplerFilterMode e) { + switch (e) { + CASE(SamplerFilterMode, Nearest) + CASE(SamplerFilterMode, Linear) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getImageFormatName(ImageFormat e) { + switch (e) { + CASE(ImageFormat, Unknown) + CASE(ImageFormat, Rgba32f) + CASE(ImageFormat, Rgba16f) + CASE(ImageFormat, R32f) + CASE(ImageFormat, Rgba8) + CASE(ImageFormat, Rgba8Snorm) + CASE(ImageFormat, Rg32f) + CASE(ImageFormat, Rg16f) + CASE(ImageFormat, R11fG11fB10f) + CASE(ImageFormat, R16f) + CASE(ImageFormat, Rgba16) + CASE(ImageFormat, Rgb10A2) + CASE(ImageFormat, Rg16) + CASE(ImageFormat, Rg8) + CASE(ImageFormat, R16) + CASE(ImageFormat, R8) + CASE(ImageFormat, Rgba16Snorm) + CASE(ImageFormat, Rg16Snorm) + CASE(ImageFormat, Rg8Snorm) + CASE(ImageFormat, R16Snorm) + CASE(ImageFormat, R8Snorm) + CASE(ImageFormat, Rgba32i) + CASE(ImageFormat, Rgba16i) + CASE(ImageFormat, Rgba8i) + CASE(ImageFormat, R32i) + CASE(ImageFormat, Rg32i) + CASE(ImageFormat, Rg16i) + CASE(ImageFormat, Rg8i) + CASE(ImageFormat, R16i) + CASE(ImageFormat, R8i) + CASE(ImageFormat, Rgba32ui) + CASE(ImageFormat, Rgba16ui) + CASE(ImageFormat, Rgba8ui) + CASE(ImageFormat, R32ui) + CASE(ImageFormat, Rgb10a2ui) + CASE(ImageFormat, Rg32ui) + CASE(ImageFormat, Rg16ui) + CASE(ImageFormat, Rg8ui) + CASE(ImageFormat, R16ui) + CASE(ImageFormat, R8ui) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getImageChannelOrderName(ImageChannelOrder e) { + switch (e) { + CASE(ImageChannelOrder, R) + CASE(ImageChannelOrder, A) + CASE(ImageChannelOrder, RG) + CASE(ImageChannelOrder, RA) + CASE(ImageChannelOrder, RGB) + CASE(ImageChannelOrder, RGBA) + CASE(ImageChannelOrder, BGRA) + CASE(ImageChannelOrder, ARGB) + CASE(ImageChannelOrder, Intensity) + CASE(ImageChannelOrder, Luminance) + CASE(ImageChannelOrder, Rx) + CASE(ImageChannelOrder, RGx) + CASE(ImageChannelOrder, RGBx) + CASE(ImageChannelOrder, Depth) + CASE(ImageChannelOrder, DepthStencil) + CASE(ImageChannelOrder, sRGB) + CASE(ImageChannelOrder, sRGBx) + CASE(ImageChannelOrder, sRGBA) + CASE(ImageChannelOrder, sBGRA) + CASE(ImageChannelOrder, ABGR) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getImageChannelDataTypeName(ImageChannelDataType e) { + switch (e) { + CASE(ImageChannelDataType, SnormInt8) + CASE(ImageChannelDataType, SnormInt16) + CASE(ImageChannelDataType, UnormInt8) + CASE(ImageChannelDataType, UnormInt16) + CASE(ImageChannelDataType, UnormShort565) + CASE(ImageChannelDataType, UnormShort555) + CASE(ImageChannelDataType, UnormInt101010) + CASE(ImageChannelDataType, SignedInt8) + CASE(ImageChannelDataType, SignedInt16) + CASE(ImageChannelDataType, SignedInt32) + CASE(ImageChannelDataType, UnsignedInt8) + CASE(ImageChannelDataType, UnsignedInt16) + CASE(ImageChannelDataType, UnsigendInt32) + CASE(ImageChannelDataType, HalfFloat) + CASE(ImageChannelDataType, Float) + CASE(ImageChannelDataType, UnormInt24) + CASE(ImageChannelDataType, UnormInt101010_2) + break; + } + llvm_unreachable("Unexpected operand"); +} + +std::string getImageOperandName(uint32_t e) { + std::string nameString = ""; + std::string sep = ""; + if (e == static_cast(ImageOperand::None)) + return "None"; + if (e == static_cast(ImageOperand::Bias)) + return "Bias"; + if (e & static_cast(ImageOperand::Bias)) { + nameString += sep + "Bias"; + sep = "|"; + } + if (e == static_cast(ImageOperand::Lod)) + return "Lod"; + if (e & static_cast(ImageOperand::Lod)) { + nameString += sep + "Lod"; + sep = "|"; + } + if (e == static_cast(ImageOperand::Grad)) + return "Grad"; + if (e & static_cast(ImageOperand::Grad)) { + nameString += sep + "Grad"; + sep = "|"; + } + if (e == static_cast(ImageOperand::ConstOffset)) + return "ConstOffset"; + if (e & static_cast(ImageOperand::ConstOffset)) { + nameString += sep + "ConstOffset"; + sep = "|"; + } + if (e == static_cast(ImageOperand::Offset)) + return "Offset"; + if (e & static_cast(ImageOperand::Offset)) { + nameString += sep + "Offset"; + sep = "|"; + } + if (e == static_cast(ImageOperand::ConstOffsets)) + return "ConstOffsets"; + if (e & static_cast(ImageOperand::ConstOffsets)) { + nameString += sep + "ConstOffsets"; + sep = "|"; + } + if (e == static_cast(ImageOperand::Sample)) + return "Sample"; + if (e & static_cast(ImageOperand::Sample)) { + nameString += sep + "Sample"; + sep = "|"; + } + if (e == static_cast(ImageOperand::MinLod)) + return "MinLod"; + if (e & static_cast(ImageOperand::MinLod)) { + nameString += sep + "MinLod"; + sep = "|"; + } + if (e == static_cast(ImageOperand::MakeTexelAvailableKHR)) + return "MakeTexelAvailableKHR"; + if (e & static_cast(ImageOperand::MakeTexelAvailableKHR)) { + nameString += sep + "MakeTexelAvailableKHR"; + sep = "|"; + } + if (e == static_cast(ImageOperand::MakeTexelVisibleKHR)) + return "MakeTexelVisibleKHR"; + if (e & static_cast(ImageOperand::MakeTexelVisibleKHR)) { + nameString += sep + "MakeTexelVisibleKHR"; + sep = "|"; + } + if (e == static_cast(ImageOperand::NonPrivateTexelKHR)) + return "NonPrivateTexelKHR"; + if (e & static_cast(ImageOperand::NonPrivateTexelKHR)) { + nameString += sep + "NonPrivateTexelKHR"; + sep = "|"; + } + if (e == static_cast(ImageOperand::VolatileTexelKHR)) + return "VolatileTexelKHR"; + if (e & static_cast(ImageOperand::VolatileTexelKHR)) { + nameString += sep + "VolatileTexelKHR"; + sep = "|"; + } + if (e == static_cast(ImageOperand::SignExtend)) + return "SignExtend"; + if (e & static_cast(ImageOperand::SignExtend)) { + nameString += sep + "SignExtend"; + sep = "|"; + } + if (e == static_cast(ImageOperand::ZeroExtend)) + return "ZeroExtend"; + if (e & static_cast(ImageOperand::ZeroExtend)) { + nameString += sep + "ZeroExtend"; + sep = "|"; + }; + return nameString; +} + +std::string getFPFastMathModeName(uint32_t e) { + std::string nameString = ""; + std::string sep = ""; + if (e == static_cast(FPFastMathMode::None)) + return "None"; + if (e == static_cast(FPFastMathMode::NotNaN)) + return "NotNaN"; + if (e & static_cast(FPFastMathMode::NotNaN)) { + nameString += sep + "NotNaN"; + sep = "|"; + } + if (e == static_cast(FPFastMathMode::NotInf)) + return "NotInf"; + if (e & static_cast(FPFastMathMode::NotInf)) { + nameString += sep + "NotInf"; + sep = "|"; + } + if (e == static_cast(FPFastMathMode::NSZ)) + return "NSZ"; + if (e & static_cast(FPFastMathMode::NSZ)) { + nameString += sep + "NSZ"; + sep = "|"; + } + if (e == static_cast(FPFastMathMode::AllowRecip)) + return "AllowRecip"; + if (e & static_cast(FPFastMathMode::AllowRecip)) { + nameString += sep + "AllowRecip"; + sep = "|"; + } + if (e == static_cast(FPFastMathMode::Fast)) + return "Fast"; + if (e & static_cast(FPFastMathMode::Fast)) { + nameString += sep + "Fast"; + sep = "|"; + }; + return nameString; +} + +StringRef getFPRoundingModeName(FPRoundingMode e) { + switch (e) { + CASE(FPRoundingMode, RTE) + CASE(FPRoundingMode, RTZ) + CASE(FPRoundingMode, RTP) + CASE(FPRoundingMode, RTN) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getLinkageTypeName(LinkageType e) { + switch (e) { + CASE(LinkageType, Export) + CASE(LinkageType, Import) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getAccessQualifierName(AccessQualifier e) { + switch (e) { + CASE(AccessQualifier, ReadOnly) + CASE(AccessQualifier, WriteOnly) + CASE(AccessQualifier, ReadWrite) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getFunctionParameterAttributeName(FunctionParameterAttribute e) { + switch (e) { + CASE(FunctionParameterAttribute, Zext) + CASE(FunctionParameterAttribute, Sext) + CASE(FunctionParameterAttribute, ByVal) + CASE(FunctionParameterAttribute, Sret) + CASE(FunctionParameterAttribute, NoAlias) + CASE(FunctionParameterAttribute, NoCapture) + CASE(FunctionParameterAttribute, NoWrite) + CASE(FunctionParameterAttribute, NoReadWrite) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getDecorationName(Decoration e) { + switch (e) { + CASE(Decoration, RelaxedPrecision) + CASE(Decoration, SpecId) + CASE(Decoration, Block) + CASE(Decoration, BufferBlock) + CASE(Decoration, RowMajor) + CASE(Decoration, ColMajor) + CASE(Decoration, ArrayStride) + CASE(Decoration, MatrixStride) + CASE(Decoration, GLSLShared) + CASE(Decoration, GLSLPacked) + CASE(Decoration, CPacked) + CASE(Decoration, BuiltIn) + CASE(Decoration, NoPerspective) + CASE(Decoration, Flat) + CASE(Decoration, Patch) + CASE(Decoration, Centroid) + CASE(Decoration, Sample) + CASE(Decoration, Invariant) + CASE(Decoration, Restrict) + CASE(Decoration, Aliased) + CASE(Decoration, Volatile) + CASE(Decoration, Constant) + CASE(Decoration, Coherent) + CASE(Decoration, NonWritable) + CASE(Decoration, NonReadable) + CASE(Decoration, Uniform) + CASE(Decoration, UniformId) + CASE(Decoration, SaturatedConversion) + CASE(Decoration, Stream) + CASE(Decoration, Location) + CASE(Decoration, Component) + CASE(Decoration, Index) + CASE(Decoration, Binding) + CASE(Decoration, DescriptorSet) + CASE(Decoration, Offset) + CASE(Decoration, XfbBuffer) + CASE(Decoration, XfbStride) + CASE(Decoration, FuncParamAttr) + CASE(Decoration, FPRoundingMode) + CASE(Decoration, FPFastMathMode) + CASE(Decoration, LinkageAttributes) + CASE(Decoration, NoContraction) + CASE(Decoration, InputAttachmentIndex) + CASE(Decoration, Alignment) + CASE(Decoration, MaxByteOffset) + CASE(Decoration, AlignmentId) + CASE(Decoration, MaxByteOffsetId) + CASE(Decoration, NoSignedWrap) + CASE(Decoration, NoUnsignedWrap) + CASE(Decoration, ExplicitInterpAMD) + CASE(Decoration, OverrideCoverageNV) + CASE(Decoration, PassthroughNV) + CASE(Decoration, ViewportRelativeNV) + CASE(Decoration, SecondaryViewportRelativeNV) + CASE(Decoration, PerPrimitiveNV) + CASE(Decoration, PerViewNV) + CASE(Decoration, PerVertexNV) + CASE(Decoration, NonUniformEXT) + CASE(Decoration, CountBuffer) + CASE(Decoration, UserSemantic) + CASE(Decoration, RestrictPointerEXT) + CASE(Decoration, AliasedPointerEXT) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getBuiltInName(BuiltIn e) { + switch (e) { + CASE(BuiltIn, Position) + CASE(BuiltIn, PointSize) + CASE(BuiltIn, ClipDistance) + CASE(BuiltIn, CullDistance) + CASE(BuiltIn, VertexId) + CASE(BuiltIn, InstanceId) + CASE(BuiltIn, PrimitiveId) + CASE(BuiltIn, InvocationId) + CASE(BuiltIn, Layer) + CASE(BuiltIn, ViewportIndex) + CASE(BuiltIn, TessLevelOuter) + CASE(BuiltIn, TessLevelInner) + CASE(BuiltIn, TessCoord) + CASE(BuiltIn, PatchVertices) + CASE(BuiltIn, FragCoord) + CASE(BuiltIn, PointCoord) + CASE(BuiltIn, FrontFacing) + CASE(BuiltIn, SampleId) + CASE(BuiltIn, SamplePosition) + CASE(BuiltIn, SampleMask) + CASE(BuiltIn, FragDepth) + CASE(BuiltIn, HelperInvocation) + CASE(BuiltIn, NumWorkgroups) + CASE(BuiltIn, WorkgroupSize) + CASE(BuiltIn, WorkgroupId) + CASE(BuiltIn, LocalInvocationId) + CASE(BuiltIn, GlobalInvocationId) + CASE(BuiltIn, LocalInvocationIndex) + CASE(BuiltIn, WorkDim) + CASE(BuiltIn, GlobalSize) + CASE(BuiltIn, EnqueuedWorkgroupSize) + CASE(BuiltIn, GlobalOffset) + CASE(BuiltIn, GlobalLinearId) + CASE(BuiltIn, SubgroupSize) + CASE(BuiltIn, SubgroupMaxSize) + CASE(BuiltIn, NumSubgroups) + CASE(BuiltIn, NumEnqueuedSubgroups) + CASE(BuiltIn, SubgroupId) + CASE(BuiltIn, SubgroupLocalInvocationId) + CASE(BuiltIn, VertexIndex) + CASE(BuiltIn, InstanceIndex) + CASE(BuiltIn, SubgroupEqMask) + CASE(BuiltIn, SubgroupGeMask) + CASE(BuiltIn, SubgroupGtMask) + CASE(BuiltIn, SubgroupLeMask) + CASE(BuiltIn, SubgroupLtMask) + CASE(BuiltIn, BaseVertex) + CASE(BuiltIn, BaseInstance) + CASE(BuiltIn, DrawIndex) + CASE(BuiltIn, DeviceIndex) + CASE(BuiltIn, ViewIndex) + CASE(BuiltIn, BaryCoordNoPerspAMD) + CASE(BuiltIn, BaryCoordNoPerspCentroidAMD) + CASE(BuiltIn, BaryCoordNoPerspSampleAMD) + CASE(BuiltIn, BaryCoordSmoothAMD) + CASE(BuiltIn, BaryCoordSmoothCentroid) + CASE(BuiltIn, BaryCoordSmoothSample) + CASE(BuiltIn, BaryCoordPullModel) + CASE(BuiltIn, FragStencilRefEXT) + CASE(BuiltIn, ViewportMaskNV) + CASE(BuiltIn, SecondaryPositionNV) + CASE(BuiltIn, SecondaryViewportMaskNV) + CASE(BuiltIn, PositionPerViewNV) + CASE(BuiltIn, ViewportMaskPerViewNV) + CASE(BuiltIn, FullyCoveredEXT) + CASE(BuiltIn, TaskCountNV) + CASE(BuiltIn, PrimitiveCountNV) + CASE(BuiltIn, PrimitiveIndicesNV) + CASE(BuiltIn, ClipDistancePerViewNV) + CASE(BuiltIn, CullDistancePerViewNV) + CASE(BuiltIn, LayerPerViewNV) + CASE(BuiltIn, MeshViewCountNV) + CASE(BuiltIn, MeshViewIndices) + CASE(BuiltIn, BaryCoordNV) + CASE(BuiltIn, BaryCoordNoPerspNV) + CASE(BuiltIn, FragSizeEXT) + CASE(BuiltIn, FragInvocationCountEXT) + CASE(BuiltIn, LaunchIdNV) + CASE(BuiltIn, LaunchSizeNV) + CASE(BuiltIn, WorldRayOriginNV) + CASE(BuiltIn, WorldRayDirectionNV) + CASE(BuiltIn, ObjectRayOriginNV) + CASE(BuiltIn, ObjectRayDirectionNV) + CASE(BuiltIn, RayTminNV) + CASE(BuiltIn, RayTmaxNV) + CASE(BuiltIn, InstanceCustomIndexNV) + CASE(BuiltIn, ObjectToWorldNV) + CASE(BuiltIn, WorldToObjectNV) + CASE(BuiltIn, HitTNV) + CASE(BuiltIn, HitKindNV) + CASE(BuiltIn, IncomingRayFlagsNV) + break; + } + llvm_unreachable("Unexpected operand"); +} + +std::string getSelectionControlName(uint32_t e) { + std::string nameString = ""; + std::string sep = ""; + if (e == static_cast(SelectionControl::None)) + return "None"; + if (e == static_cast(SelectionControl::Flatten)) + return "Flatten"; + if (e & static_cast(SelectionControl::Flatten)) { + nameString += sep + "Flatten"; + sep = "|"; + } + if (e == static_cast(SelectionControl::DontFlatten)) + return "DontFlatten"; + if (e & static_cast(SelectionControl::DontFlatten)) { + nameString += sep + "DontFlatten"; + sep = "|"; + }; + return nameString; +} + +std::string getLoopControlName(uint32_t e) { + std::string nameString = ""; + std::string sep = ""; + if (e == static_cast(LoopControl::None)) + return "None"; + if (e == static_cast(LoopControl::Unroll)) + return "Unroll"; + if (e & static_cast(LoopControl::Unroll)) { + nameString += sep + "Unroll"; + sep = "|"; + } + if (e == static_cast(LoopControl::DontUnroll)) + return "DontUnroll"; + if (e & static_cast(LoopControl::DontUnroll)) { + nameString += sep + "DontUnroll"; + sep = "|"; + } + if (e == static_cast(LoopControl::DependencyInfinite)) + return "DependencyInfinite"; + if (e & static_cast(LoopControl::DependencyInfinite)) { + nameString += sep + "DependencyInfinite"; + sep = "|"; + } + if (e == static_cast(LoopControl::DependencyLength)) + return "DependencyLength"; + if (e & static_cast(LoopControl::DependencyLength)) { + nameString += sep + "DependencyLength"; + sep = "|"; + } + if (e == static_cast(LoopControl::MinIterations)) + return "MinIterations"; + if (e & static_cast(LoopControl::MinIterations)) { + nameString += sep + "MinIterations"; + sep = "|"; + } + if (e == static_cast(LoopControl::MaxIterations)) + return "MaxIterations"; + if (e & static_cast(LoopControl::MaxIterations)) { + nameString += sep + "MaxIterations"; + sep = "|"; + } + if (e == static_cast(LoopControl::IterationMultiple)) + return "IterationMultiple"; + if (e & static_cast(LoopControl::IterationMultiple)) { + nameString += sep + "IterationMultiple"; + sep = "|"; + } + if (e == static_cast(LoopControl::PeelCount)) + return "PeelCount"; + if (e & static_cast(LoopControl::PeelCount)) { + nameString += sep + "PeelCount"; + sep = "|"; + } + if (e == static_cast(LoopControl::PartialCount)) + return "PartialCount"; + if (e & static_cast(LoopControl::PartialCount)) { + nameString += sep + "PartialCount"; + sep = "|"; + }; + return nameString; +} + +std::string getFunctionControlName(uint32_t e) { + std::string nameString = ""; + std::string sep = ""; + if (e == static_cast(FunctionControl::None)) + return "None"; + if (e == static_cast(FunctionControl::Inline)) + return "Inline"; + if (e & static_cast(FunctionControl::Inline)) { + nameString += sep + "Inline"; + sep = "|"; + } + if (e == static_cast(FunctionControl::DontInline)) + return "DontInline"; + if (e & static_cast(FunctionControl::DontInline)) { + nameString += sep + "DontInline"; + sep = "|"; + } + if (e == static_cast(FunctionControl::Pure)) + return "Pure"; + if (e & static_cast(FunctionControl::Pure)) { + nameString += sep + "Pure"; + sep = "|"; + } + if (e == static_cast(FunctionControl::Const)) + return "Const"; + if (e & static_cast(FunctionControl::Const)) { + nameString += sep + "Const"; + sep = "|"; + }; + return nameString; +} + +std::string getMemorySemanticsName(uint32_t e) { + std::string nameString = ""; + std::string sep = ""; + if (e == static_cast(MemorySemantics::None)) + return "None"; + if (e == static_cast(MemorySemantics::Acquire)) + return "Acquire"; + if (e & static_cast(MemorySemantics::Acquire)) { + nameString += sep + "Acquire"; + sep = "|"; + } + if (e == static_cast(MemorySemantics::Release)) + return "Release"; + if (e & static_cast(MemorySemantics::Release)) { + nameString += sep + "Release"; + sep = "|"; + } + if (e == static_cast(MemorySemantics::AcquireRelease)) + return "AcquireRelease"; + if (e & static_cast(MemorySemantics::AcquireRelease)) { + nameString += sep + "AcquireRelease"; + sep = "|"; + } + if (e == static_cast(MemorySemantics::SequentiallyConsistent)) + return "SequentiallyConsistent"; + if (e & static_cast(MemorySemantics::SequentiallyConsistent)) { + nameString += sep + "SequentiallyConsistent"; + sep = "|"; + } + if (e == static_cast(MemorySemantics::UniformMemory)) + return "UniformMemory"; + if (e & static_cast(MemorySemantics::UniformMemory)) { + nameString += sep + "UniformMemory"; + sep = "|"; + } + if (e == static_cast(MemorySemantics::SubgroupMemory)) + return "SubgroupMemory"; + if (e & static_cast(MemorySemantics::SubgroupMemory)) { + nameString += sep + "SubgroupMemory"; + sep = "|"; + } + if (e == static_cast(MemorySemantics::WorkgroupMemory)) + return "WorkgroupMemory"; + if (e & static_cast(MemorySemantics::WorkgroupMemory)) { + nameString += sep + "WorkgroupMemory"; + sep = "|"; + } + if (e == static_cast(MemorySemantics::CrossWorkgroupMemory)) + return "CrossWorkgroupMemory"; + if (e & static_cast(MemorySemantics::CrossWorkgroupMemory)) { + nameString += sep + "CrossWorkgroupMemory"; + sep = "|"; + } + if (e == static_cast(MemorySemantics::AtomicCounterMemory)) + return "AtomicCounterMemory"; + if (e & static_cast(MemorySemantics::AtomicCounterMemory)) { + nameString += sep + "AtomicCounterMemory"; + sep = "|"; + } + if (e == static_cast(MemorySemantics::ImageMemory)) + return "ImageMemory"; + if (e & static_cast(MemorySemantics::ImageMemory)) { + nameString += sep + "ImageMemory"; + sep = "|"; + } + if (e == static_cast(MemorySemantics::OutputMemoryKHR)) + return "OutputMemoryKHR"; + if (e & static_cast(MemorySemantics::OutputMemoryKHR)) { + nameString += sep + "OutputMemoryKHR"; + sep = "|"; + } + if (e == static_cast(MemorySemantics::MakeAvailableKHR)) + return "MakeAvailableKHR"; + if (e & static_cast(MemorySemantics::MakeAvailableKHR)) { + nameString += sep + "MakeAvailableKHR"; + sep = "|"; + } + if (e == static_cast(MemorySemantics::MakeVisibleKHR)) + return "MakeVisibleKHR"; + if (e & static_cast(MemorySemantics::MakeVisibleKHR)) { + nameString += sep + "MakeVisibleKHR"; + sep = "|"; + }; + return nameString; +} + +std::string getMemoryOperandName(uint32_t e) { + std::string nameString = ""; + std::string sep = ""; + if (e == static_cast(MemoryOperand::None)) + return "None"; + if (e == static_cast(MemoryOperand::Volatile)) + return "Volatile"; + if (e & static_cast(MemoryOperand::Volatile)) { + nameString += sep + "Volatile"; + sep = "|"; + } + if (e == static_cast(MemoryOperand::Aligned)) + return "Aligned"; + if (e & static_cast(MemoryOperand::Aligned)) { + nameString += sep + "Aligned"; + sep = "|"; + } + if (e == static_cast(MemoryOperand::Nontemporal)) + return "Nontemporal"; + if (e & static_cast(MemoryOperand::Nontemporal)) { + nameString += sep + "Nontemporal"; + sep = "|"; + } + if (e == static_cast(MemoryOperand::MakePointerAvailableKHR)) + return "MakePointerAvailableKHR"; + if (e & static_cast(MemoryOperand::MakePointerAvailableKHR)) { + nameString += sep + "MakePointerAvailableKHR"; + sep = "|"; + } + if (e == static_cast(MemoryOperand::MakePointerVisibleKHR)) + return "MakePointerVisibleKHR"; + if (e & static_cast(MemoryOperand::MakePointerVisibleKHR)) { + nameString += sep + "MakePointerVisibleKHR"; + sep = "|"; + } + if (e == static_cast(MemoryOperand::NonPrivatePointerKHR)) + return "NonPrivatePointerKHR"; + if (e & static_cast(MemoryOperand::NonPrivatePointerKHR)) { + nameString += sep + "NonPrivatePointerKHR"; + sep = "|"; + }; + return nameString; +} + +StringRef getScopeName(Scope e) { + switch (e) { + CASE(Scope, CrossDevice) + CASE(Scope, Device) + CASE(Scope, Workgroup) + CASE(Scope, Subgroup) + CASE(Scope, Invocation) + CASE(Scope, QueueFamilyKHR) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getGroupOperationName(GroupOperation e) { + switch (e) { + CASE(GroupOperation, Reduce) + CASE(GroupOperation, InclusiveScan) + CASE(GroupOperation, ExclusiveScan) + CASE(GroupOperation, ClusteredReduce) + CASE(GroupOperation, PartitionedReduceNV) + CASE(GroupOperation, PartitionedInclusiveScanNV) + CASE(GroupOperation, PartitionedExclusiveScanNV) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getKernelEnqueueFlagsName(KernelEnqueueFlags e) { + switch (e) { + CASE(KernelEnqueueFlags, NoWait) + CASE(KernelEnqueueFlags, WaitKernel) + CASE(KernelEnqueueFlags, WaitWorkGroup) + break; + } + llvm_unreachable("Unexpected operand"); +} + +StringRef getKernelProfilingInfoName(KernelProfilingInfo e) { + switch (e) { + CASE(KernelProfilingInfo, None) + CASE(KernelProfilingInfo, CmdExecTime) + break; + } + llvm_unreachable("Unexpected operand"); +} +} // namespace SPIRV +} // namespace llvm diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h new file mode 100644 index 000000000000..2aa9f076c78e --- /dev/null +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h @@ -0,0 +1,739 @@ +//===-- SPIRVBaseInfo.h - Top level definitions for SPIRV ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the SPIRV target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVBASEINFO_H +#define LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVBASEINFO_H + +#include "llvm/ADT/StringRef.h" +#include + +namespace llvm { +namespace SPIRV { +enum class Capability : uint32_t { + Matrix = 0, + Shader = 1, + Geometry = 2, + Tessellation = 3, + Addresses = 4, + Linkage = 5, + Kernel = 6, + Vector16 = 7, + Float16Buffer = 8, + Float16 = 9, + Float64 = 10, + Int64 = 11, + Int64Atomics = 12, + ImageBasic = 13, + ImageReadWrite = 14, + ImageMipmap = 15, + Pipes = 17, + Groups = 18, + DeviceEnqueue = 19, + LiteralSampler = 20, + AtomicStorage = 21, + Int16 = 22, + TessellationPointSize = 23, + GeometryPointSize = 24, + ImageGatherExtended = 25, + StorageImageMultisample = 27, + UniformBufferArrayDynamicIndexing = 28, + SampledImageArrayDymnamicIndexing = 29, + ClipDistance = 32, + CullDistance = 33, + ImageCubeArray = 34, + SampleRateShading = 35, + ImageRect = 36, + SampledRect = 37, + GenericPointer = 38, + Int8 = 39, + InputAttachment = 40, + SparseResidency = 41, + MinLod = 42, + Sampled1D = 43, + Image1D = 44, + SampledCubeArray = 45, + SampledBuffer = 46, + ImageBuffer = 47, + ImageMSArray = 48, + StorageImageExtendedFormats = 49, + ImageQuery = 50, + DerivativeControl = 51, + InterpolationFunction = 52, + TransformFeedback = 53, + GeometryStreams = 54, + StorageImageReadWithoutFormat = 55, + StorageImageWriteWithoutFormat = 56, + MultiViewport = 57, + SubgroupDispatch = 58, + NamedBarrier = 59, + PipeStorage = 60, + GroupNonUniform = 61, + GroupNonUniformVote = 62, + GroupNonUniformArithmetic = 63, + GroupNonUniformBallot = 64, + GroupNonUniformShuffle = 65, + GroupNonUniformShuffleRelative = 66, + GroupNonUniformClustered = 67, + GroupNonUniformQuad = 68, + SubgroupBallotKHR = 4423, + DrawParameters = 4427, + SubgroupVoteKHR = 4431, + StorageBuffer16BitAccess = 4433, + StorageUniform16 = 4434, + StoragePushConstant16 = 4435, + StorageInputOutput16 = 4436, + DeviceGroup = 4437, + MultiView = 4439, + VariablePointersStorageBuffer = 4441, + VariablePointers = 4442, + AtomicStorageOps = 4445, + SampleMaskPostDepthCoverage = 4447, + StorageBuffer8BitAccess = 4448, + UniformAndStorageBuffer8BitAccess = 4449, + StoragePushConstant8 = 4450, + DenormPreserve = 4464, + DenormFlushToZero = 4465, + SignedZeroInfNanPreserve = 4466, + RoundingModeRTE = 4467, + RoundingModeRTZ = 4468, + Float16ImageAMD = 5008, + ImageGatherBiasLodAMD = 5009, + FragmentMaskAMD = 5010, + StencilExportEXT = 5013, + ImageReadWriteLodAMD = 5015, + SampleMaskOverrideCoverageNV = 5249, + GeometryShaderPassthroughNV = 5251, + ShaderViewportIndexLayerEXT = 5254, + ShaderViewportMaskNV = 5255, + ShaderStereoViewNV = 5259, + PerViewAttributesNV = 5260, + FragmentFullyCoveredEXT = 5265, + MeshShadingNV = 5266, + ShaderNonUniformEXT = 5301, + RuntimeDescriptorArrayEXT = 5302, + InputAttachmentArrayDynamicIndexingEXT = 5303, + UniformTexelBufferArrayDynamicIndexingEXT = 5304, + StorageTexelBufferArrayDynamicIndexingEXT = 5305, + UniformBufferArrayNonUniformIndexingEXT = 5306, + SampledImageArrayNonUniformIndexingEXT = 5307, + StorageBufferArrayNonUniformIndexingEXT = 5308, + StorageImageArrayNonUniformIndexingEXT = 5309, + InputAttachmentArrayNonUniformIndexingEXT = 5310, + UniformTexelBufferArrayNonUniformIndexingEXT = 5311, + StorageTexelBufferArrayNonUniformIndexingEXT = 5312, + RayTracingNV = 5340, + SubgroupShuffleINTEL = 5568, + SubgroupBufferBlockIOINTEL = 5569, + SubgroupImageBlockIOINTEL = 5570, + SubgroupImageMediaBlockIOINTEL = 5579, + SubgroupAvcMotionEstimationINTEL = 5696, + SubgroupAvcMotionEstimationIntraINTEL = 5697, + SubgroupAvcMotionEstimationChromaINTEL = 5698, + GroupNonUniformPartitionedNV = 5297, + VulkanMemoryModelKHR = 5345, + VulkanMemoryModelDeviceScopeKHR = 5346, + ImageFootprintNV = 5282, + FragmentBarycentricNV = 5284, + ComputeDerivativeGroupQuadsNV = 5288, + ComputeDerivativeGroupLinearNV = 5350, + FragmentDensityEXT = 5291, + PhysicalStorageBufferAddressesEXT = 5347, + CooperativeMatrixNV = 5357, +}; +StringRef getCapabilityName(Capability e); + +enum class SourceLanguage : uint32_t { + Unknown = 0, + ESSL = 1, + GLSL = 2, + OpenCL_C = 3, + OpenCL_CPP = 4, + HLSL = 5, +}; +StringRef getSourceLanguageName(SourceLanguage e); + +enum class AddressingModel : uint32_t { + Logical = 0, + Physical32 = 1, + Physical64 = 2, + PhysicalStorageBuffer64EXT = 5348, +}; +StringRef getAddressingModelName(AddressingModel e); + +enum class ExecutionModel : uint32_t { + Vertex = 0, + TessellationControl = 1, + TessellationEvaluation = 2, + Geometry = 3, + Fragment = 4, + GLCompute = 5, + Kernel = 6, + TaskNV = 5267, + MeshNV = 5268, + RayGenerationNV = 5313, + IntersectionNV = 5314, + AnyHitNV = 5315, + ClosestHitNV = 5316, + MissNV = 5317, + CallableNV = 5318, +}; +StringRef getExecutionModelName(ExecutionModel e); + +enum class MemoryModel : uint32_t { + Simple = 0, + GLSL450 = 1, + OpenCL = 2, + VulkanKHR = 3, +}; +StringRef getMemoryModelName(MemoryModel e); + +enum class ExecutionMode : uint32_t { + Invocations = 0, + SpacingEqual = 1, + SpacingFractionalEven = 2, + SpacingFractionalOdd = 3, + VertexOrderCw = 4, + VertexOrderCcw = 5, + PixelCenterInteger = 6, + OriginUpperLeft = 7, + OriginLowerLeft = 8, + EarlyFragmentTests = 9, + PointMode = 10, + Xfb = 11, + DepthReplacing = 12, + DepthGreater = 14, + DepthLess = 15, + DepthUnchanged = 16, + LocalSize = 17, + LocalSizeHint = 18, + InputPoints = 19, + InputLines = 20, + InputLinesAdjacency = 21, + Triangles = 22, + InputTrianglesAdjacency = 23, + Quads = 24, + Isolines = 25, + OutputVertices = 26, + OutputPoints = 27, + OutputLineStrip = 28, + OutputTriangleStrip = 29, + VecTypeHint = 30, + ContractionOff = 31, + Initializer = 33, + Finalizer = 34, + SubgroupSize = 35, + SubgroupsPerWorkgroup = 36, + SubgroupsPerWorkgroupId = 37, + LocalSizeId = 38, + LocalSizeHintId = 39, + PostDepthCoverage = 4446, + DenormPreserve = 4459, + DenormFlushToZero = 4460, + SignedZeroInfNanPreserve = 4461, + RoundingModeRTE = 4462, + RoundingModeRTZ = 4463, + StencilRefReplacingEXT = 5027, + OutputLinesNV = 5269, + DerivativeGroupQuadsNV = 5289, + DerivativeGroupLinearNV = 5290, + OutputTrianglesNV = 5298, +}; +StringRef getExecutionModeName(ExecutionMode e); + +enum class StorageClass : uint32_t { + UniformConstant = 0, + Input = 1, + Uniform = 2, + Output = 3, + Workgroup = 4, + CrossWorkgroup = 5, + Private = 6, + Function = 7, + Generic = 8, + PushConstant = 9, + AtomicCounter = 10, + Image = 11, + StorageBuffer = 12, + CallableDataNV = 5328, + IncomingCallableDataNV = 5329, + RayPayloadNV = 5338, + HitAttributeNV = 5339, + IncomingRayPayloadNV = 5342, + ShaderRecordBufferNV = 5343, + PhysicalStorageBufferEXT = 5349, +}; +StringRef getStorageClassName(StorageClass e); + +enum class Dim : uint32_t { + DIM_1D = 0, + DIM_2D = 1, + DIM_3D = 2, + DIM_Cube = 3, + DIM_Rect = 4, + DIM_Buffer = 5, + DIM_SubpassData = 6, +}; +StringRef getDimName(Dim e); + +enum class SamplerAddressingMode : uint32_t { + None = 0, + ClampToEdge = 1, + Clamp = 2, + Repeat = 3, + RepeatMirrored = 4, +}; +StringRef getSamplerAddressingModeName(SamplerAddressingMode e); + +enum class SamplerFilterMode : uint32_t { + Nearest = 0, + Linear = 1, +}; +StringRef getSamplerFilterModeName(SamplerFilterMode e); + +enum class ImageFormat : uint32_t { + Unknown = 0, + Rgba32f = 1, + Rgba16f = 2, + R32f = 3, + Rgba8 = 4, + Rgba8Snorm = 5, + Rg32f = 6, + Rg16f = 7, + R11fG11fB10f = 8, + R16f = 9, + Rgba16 = 10, + Rgb10A2 = 11, + Rg16 = 12, + Rg8 = 13, + R16 = 14, + R8 = 15, + Rgba16Snorm = 16, + Rg16Snorm = 17, + Rg8Snorm = 18, + R16Snorm = 19, + R8Snorm = 20, + Rgba32i = 21, + Rgba16i = 22, + Rgba8i = 23, + R32i = 24, + Rg32i = 25, + Rg16i = 26, + Rg8i = 27, + R16i = 28, + R8i = 29, + Rgba32ui = 30, + Rgba16ui = 31, + Rgba8ui = 32, + R32ui = 33, + Rgb10a2ui = 34, + Rg32ui = 35, + Rg16ui = 36, + Rg8ui = 37, + R16ui = 38, + R8ui = 39, +}; +StringRef getImageFormatName(ImageFormat e); + +enum class ImageChannelOrder : uint32_t { + R = 0, + A = 1, + RG = 2, + RA = 3, + RGB = 4, + RGBA = 5, + BGRA = 6, + ARGB = 7, + Intensity = 8, + Luminance = 9, + Rx = 10, + RGx = 11, + RGBx = 12, + Depth = 13, + DepthStencil = 14, + sRGB = 15, + sRGBx = 16, + sRGBA = 17, + sBGRA = 18, + ABGR = 19, +}; +StringRef getImageChannelOrderName(ImageChannelOrder e); + +enum class ImageChannelDataType : uint32_t { + SnormInt8 = 0, + SnormInt16 = 1, + UnormInt8 = 2, + UnormInt16 = 3, + UnormShort565 = 4, + UnormShort555 = 5, + UnormInt101010 = 6, + SignedInt8 = 7, + SignedInt16 = 8, + SignedInt32 = 9, + UnsignedInt8 = 10, + UnsignedInt16 = 11, + UnsigendInt32 = 12, + HalfFloat = 13, + Float = 14, + UnormInt24 = 15, + UnormInt101010_2 = 16, +}; +StringRef getImageChannelDataTypeName(ImageChannelDataType e); + +enum class ImageOperand : uint32_t { + None = 0x0, + Bias = 0x1, + Lod = 0x2, + Grad = 0x4, + ConstOffset = 0x8, + Offset = 0x10, + ConstOffsets = 0x20, + Sample = 0x40, + MinLod = 0x80, + MakeTexelAvailableKHR = 0x100, + MakeTexelVisibleKHR = 0x200, + NonPrivateTexelKHR = 0x400, + VolatileTexelKHR = 0x800, + SignExtend = 0x1000, + ZeroExtend = 0x2000, +}; +std::string getImageOperandName(uint32_t e); + +enum class FPFastMathMode : uint32_t { + None = 0x0, + NotNaN = 0x1, + NotInf = 0x2, + NSZ = 0x4, + AllowRecip = 0x8, + Fast = 0x10, +}; +std::string getFPFastMathModeName(uint32_t e); + +enum class FPRoundingMode : uint32_t { + RTE = 0, + RTZ = 1, + RTP = 2, + RTN = 3, +}; +StringRef getFPRoundingModeName(FPRoundingMode e); + +enum class LinkageType : uint32_t { + Export = 0, + Import = 1, +}; +StringRef getLinkageTypeName(LinkageType e); + +enum class AccessQualifier : uint32_t { + ReadOnly = 0, + WriteOnly = 1, + ReadWrite = 2, +}; +StringRef getAccessQualifierName(AccessQualifier e); + +enum class FunctionParameterAttribute : uint32_t { + Zext = 0, + Sext = 1, + ByVal = 2, + Sret = 3, + NoAlias = 4, + NoCapture = 5, + NoWrite = 6, + NoReadWrite = 7, +}; +StringRef getFunctionParameterAttributeName(FunctionParameterAttribute e); + +enum class Decoration : uint32_t { + RelaxedPrecision = 0, + SpecId = 1, + Block = 2, + BufferBlock = 3, + RowMajor = 4, + ColMajor = 5, + ArrayStride = 6, + MatrixStride = 7, + GLSLShared = 8, + GLSLPacked = 9, + CPacked = 10, + BuiltIn = 11, + NoPerspective = 13, + Flat = 14, + Patch = 15, + Centroid = 16, + Sample = 17, + Invariant = 18, + Restrict = 19, + Aliased = 20, + Volatile = 21, + Constant = 22, + Coherent = 23, + NonWritable = 24, + NonReadable = 25, + Uniform = 26, + UniformId = 27, + SaturatedConversion = 28, + Stream = 29, + Location = 30, + Component = 31, + Index = 32, + Binding = 33, + DescriptorSet = 34, + Offset = 35, + XfbBuffer = 36, + XfbStride = 37, + FuncParamAttr = 38, + FPRoundingMode = 39, + FPFastMathMode = 40, + LinkageAttributes = 41, + NoContraction = 42, + InputAttachmentIndex = 43, + Alignment = 44, + MaxByteOffset = 45, + AlignmentId = 46, + MaxByteOffsetId = 47, + NoSignedWrap = 4469, + NoUnsignedWrap = 4470, + ExplicitInterpAMD = 4999, + OverrideCoverageNV = 5248, + PassthroughNV = 5250, + ViewportRelativeNV = 5252, + SecondaryViewportRelativeNV = 5256, + PerPrimitiveNV = 5271, + PerViewNV = 5272, + PerVertexNV = 5273, + NonUniformEXT = 5300, + CountBuffer = 5634, + UserSemantic = 5635, + RestrictPointerEXT = 5355, + AliasedPointerEXT = 5356, +}; +StringRef getDecorationName(Decoration e); + +enum class BuiltIn : uint32_t { + Position = 0, + PointSize = 1, + ClipDistance = 3, + CullDistance = 4, + VertexId = 5, + InstanceId = 6, + PrimitiveId = 7, + InvocationId = 8, + Layer = 9, + ViewportIndex = 10, + TessLevelOuter = 11, + TessLevelInner = 12, + TessCoord = 13, + PatchVertices = 14, + FragCoord = 15, + PointCoord = 16, + FrontFacing = 17, + SampleId = 18, + SamplePosition = 19, + SampleMask = 20, + FragDepth = 22, + HelperInvocation = 23, + NumWorkgroups = 24, + WorkgroupSize = 25, + WorkgroupId = 26, + LocalInvocationId = 27, + GlobalInvocationId = 28, + LocalInvocationIndex = 29, + WorkDim = 30, + GlobalSize = 31, + EnqueuedWorkgroupSize = 32, + GlobalOffset = 33, + GlobalLinearId = 34, + SubgroupSize = 36, + SubgroupMaxSize = 37, + NumSubgroups = 38, + NumEnqueuedSubgroups = 39, + SubgroupId = 40, + SubgroupLocalInvocationId = 41, + VertexIndex = 42, + InstanceIndex = 43, + SubgroupEqMask = 4416, + SubgroupGeMask = 4417, + SubgroupGtMask = 4418, + SubgroupLeMask = 4419, + SubgroupLtMask = 4420, + BaseVertex = 4424, + BaseInstance = 4425, + DrawIndex = 4426, + DeviceIndex = 4438, + ViewIndex = 4440, + BaryCoordNoPerspAMD = 4492, + BaryCoordNoPerspCentroidAMD = 4493, + BaryCoordNoPerspSampleAMD = 4494, + BaryCoordSmoothAMD = 4495, + BaryCoordSmoothCentroid = 4496, + BaryCoordSmoothSample = 4497, + BaryCoordPullModel = 4498, + FragStencilRefEXT = 5014, + ViewportMaskNV = 5253, + SecondaryPositionNV = 5257, + SecondaryViewportMaskNV = 5258, + PositionPerViewNV = 5261, + ViewportMaskPerViewNV = 5262, + FullyCoveredEXT = 5264, + TaskCountNV = 5274, + PrimitiveCountNV = 5275, + PrimitiveIndicesNV = 5276, + ClipDistancePerViewNV = 5277, + CullDistancePerViewNV = 5278, + LayerPerViewNV = 5279, + MeshViewCountNV = 5280, + MeshViewIndices = 5281, + BaryCoordNV = 5286, + BaryCoordNoPerspNV = 5287, + FragSizeEXT = 5292, + FragInvocationCountEXT = 5293, + LaunchIdNV = 5319, + LaunchSizeNV = 5320, + WorldRayOriginNV = 5321, + WorldRayDirectionNV = 5322, + ObjectRayOriginNV = 5323, + ObjectRayDirectionNV = 5324, + RayTminNV = 5325, + RayTmaxNV = 5326, + InstanceCustomIndexNV = 5327, + ObjectToWorldNV = 5330, + WorldToObjectNV = 5331, + HitTNV = 5332, + HitKindNV = 5333, + IncomingRayFlagsNV = 5351, +}; +StringRef getBuiltInName(BuiltIn e); + +enum class SelectionControl : uint32_t { + None = 0x0, + Flatten = 0x1, + DontFlatten = 0x2, +}; +std::string getSelectionControlName(uint32_t e); + +enum class LoopControl : uint32_t { + None = 0x0, + Unroll = 0x1, + DontUnroll = 0x2, + DependencyInfinite = 0x4, + DependencyLength = 0x8, + MinIterations = 0x10, + MaxIterations = 0x20, + IterationMultiple = 0x40, + PeelCount = 0x80, + PartialCount = 0x100, +}; +std::string getLoopControlName(uint32_t e); + +enum class FunctionControl : uint32_t { + None = 0x0, + Inline = 0x1, + DontInline = 0x2, + Pure = 0x4, + Const = 0x8, +}; +std::string getFunctionControlName(uint32_t e); + +enum class MemorySemantics : uint32_t { + None = 0x0, + Acquire = 0x2, + Release = 0x4, + AcquireRelease = 0x8, + SequentiallyConsistent = 0x10, + UniformMemory = 0x40, + SubgroupMemory = 0x80, + WorkgroupMemory = 0x100, + CrossWorkgroupMemory = 0x200, + AtomicCounterMemory = 0x400, + ImageMemory = 0x800, + OutputMemoryKHR = 0x1000, + MakeAvailableKHR = 0x2000, + MakeVisibleKHR = 0x4000, +}; +std::string getMemorySemanticsName(uint32_t e); + +enum class MemoryOperand : uint32_t { + None = 0x0, + Volatile = 0x1, + Aligned = 0x2, + Nontemporal = 0x4, + MakePointerAvailableKHR = 0x8, + MakePointerVisibleKHR = 0x10, + NonPrivatePointerKHR = 0x20, +}; +std::string getMemoryOperandName(uint32_t e); + +enum class Scope : uint32_t { + CrossDevice = 0, + Device = 1, + Workgroup = 2, + Subgroup = 3, + Invocation = 4, + QueueFamilyKHR = 5, +}; +StringRef getScopeName(Scope e); + +enum class GroupOperation : uint32_t { + Reduce = 0, + InclusiveScan = 1, + ExclusiveScan = 2, + ClusteredReduce = 3, + PartitionedReduceNV = 6, + PartitionedInclusiveScanNV = 7, + PartitionedExclusiveScanNV = 8, +}; +StringRef getGroupOperationName(GroupOperation e); + +enum class KernelEnqueueFlags : uint32_t { + NoWait = 0, + WaitKernel = 1, + WaitWorkGroup = 2, +}; +StringRef getKernelEnqueueFlagsName(KernelEnqueueFlags e); + +enum class KernelProfilingInfo : uint32_t { + None = 0x0, + CmdExecTime = 0x1, +}; +StringRef getKernelProfilingInfoName(KernelProfilingInfo e); +} // namespace SPIRV +} // namespace llvm + +// Return a string representation of the operands from startIndex onwards. +// Templated to allow both MachineInstr and MCInst to use the same logic. +template +std::string getSPIRVStringOperand(const InstType &MI, unsigned StartIndex) { + std::string s; // Iteratively append to this string. + + const unsigned NumOps = MI.getNumOperands(); + bool IsFinished = false; + for (unsigned i = StartIndex; i < NumOps && !IsFinished; ++i) { + const auto &Op = MI.getOperand(i); + if (!Op.isImm()) // Stop if we hit a register operand. + break; + assert((Op.getImm() >> 32) == 0 && "Imm operand should be i32 word"); + const uint32_t Imm = Op.getImm(); // Each i32 word is up to 4 characters. + for (unsigned ShiftAmount = 0; ShiftAmount < 32; ShiftAmount += 8) { + char c = (Imm >> ShiftAmount) & 0xff; + if (c == 0) { // Stop if we hit a null-terminator character. + IsFinished = true; + break; + } else { + s += c; // Otherwise, append the character to the result string. + } + } + } + return s; +} + +#endif // LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVBASEINFO_H diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp new file mode 100644 index 000000000000..3105baa02c90 --- /dev/null +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp @@ -0,0 +1,556 @@ +//===-- SPIRVInstPrinter.cpp - Output SPIR-V MCInsts as ASM -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class prints a SPIR-V MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVInstPrinter.h" +#include "SPIRV.h" +#include "SPIRVBaseInfo.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +// Include the auto-generated portion of the assembly writer. +#include "SPIRVGenAsmWriter.inc" + +void SPIRVInstPrinter::printRemainingVariableOps(const MCInst *MI, + unsigned StartIndex, + raw_ostream &O, + bool SkipFirstSpace, + bool SkipImmediates) { + const unsigned NumOps = MI->getNumOperands(); + for (unsigned i = StartIndex; i < NumOps; ++i) { + if (!SkipImmediates || !MI->getOperand(i).isImm()) { + if (!SkipFirstSpace || i != StartIndex) + O << ' '; + printOperand(MI, i, O); + } + } +} + +void SPIRVInstPrinter::printOpConstantVarOps(const MCInst *MI, + unsigned StartIndex, + raw_ostream &O) { + O << ' '; + if (MI->getNumOperands() - StartIndex == 2) { // Handle 64 bit literals. + uint64_t Imm = MI->getOperand(StartIndex).getImm(); + Imm |= (MI->getOperand(StartIndex + 1).getImm() << 32); + O << Imm; + } else { + printRemainingVariableOps(MI, StartIndex, O, true, false); + } +} + +void SPIRVInstPrinter::recordOpExtInstImport(const MCInst *MI) { + llvm_unreachable("Unimplemented recordOpExtInstImport"); +} + +void SPIRVInstPrinter::printInst(const MCInst *MI, uint64_t Address, + StringRef Annot, const MCSubtargetInfo &STI, + raw_ostream &OS) { + const unsigned OpCode = MI->getOpcode(); + printInstruction(MI, Address, OS); + + if (OpCode == SPIRV::OpDecorate) { + printOpDecorate(MI, OS); + } else if (OpCode == SPIRV::OpExtInstImport) { + recordOpExtInstImport(MI); + } else if (OpCode == SPIRV::OpExtInst) { + printOpExtInst(MI, OS); + } else { + // Print any extra operands for variadic instructions. + MCInstrDesc MCDesc = MII.get(OpCode); + if (MCDesc.isVariadic()) { + const unsigned NumFixedOps = MCDesc.getNumOperands(); + const unsigned LastFixedIndex = NumFixedOps - 1; + const int FirstVariableIndex = NumFixedOps; + if (NumFixedOps > 0 && + MCDesc.OpInfo[LastFixedIndex].OperandType == MCOI::OPERAND_UNKNOWN) { + // For instructions where a custom type (not reg or immediate) comes as + // the last operand before the variable_ops. This is usually a StringImm + // operand, but there are a few other cases. + switch (OpCode) { + case SPIRV::OpTypeImage: + OS << ' '; + printAccessQualifier(MI, FirstVariableIndex, OS); + break; + case SPIRV::OpVariable: + OS << ' '; + printOperand(MI, FirstVariableIndex, OS); + break; + case SPIRV::OpEntryPoint: { + // Print the interface ID operands, skipping the name's string + // literal. + printRemainingVariableOps(MI, NumFixedOps, OS, false, true); + break; + } + case SPIRV::OpExecutionMode: + case SPIRV::OpExecutionModeId: + case SPIRV::OpLoopMerge: { + // Print any literals after the OPERAND_UNKNOWN argument normally. + printRemainingVariableOps(MI, NumFixedOps, OS); + break; + } + default: + break; // printStringImm has already been handled + } + } else { + // For instructions with no fixed ops or a reg/immediate as the final + // fixed operand, we can usually print the rest with "printOperand", but + // check for a few cases with custom types first. + switch (OpCode) { + case SPIRV::OpLoad: + case SPIRV::OpStore: + OS << ' '; + printMemoryOperand(MI, FirstVariableIndex, OS); + printRemainingVariableOps(MI, FirstVariableIndex + 1, OS); + break; + case SPIRV::OpImageSampleImplicitLod: + case SPIRV::OpImageSampleDrefImplicitLod: + case SPIRV::OpImageSampleProjImplicitLod: + case SPIRV::OpImageSampleProjDrefImplicitLod: + case SPIRV::OpImageFetch: + case SPIRV::OpImageGather: + case SPIRV::OpImageDrefGather: + case SPIRV::OpImageRead: + case SPIRV::OpImageWrite: + case SPIRV::OpImageSparseSampleImplicitLod: + case SPIRV::OpImageSparseSampleDrefImplicitLod: + case SPIRV::OpImageSparseSampleProjImplicitLod: + case SPIRV::OpImageSparseSampleProjDrefImplicitLod: + case SPIRV::OpImageSparseFetch: + case SPIRV::OpImageSparseGather: + case SPIRV::OpImageSparseDrefGather: + case SPIRV::OpImageSparseRead: + case SPIRV::OpImageSampleFootprintNV: + OS << ' '; + printImageOperand(MI, FirstVariableIndex, OS); + printRemainingVariableOps(MI, NumFixedOps + 1, OS); + break; + case SPIRV::OpCopyMemory: + case SPIRV::OpCopyMemorySized: { + const unsigned NumOps = MI->getNumOperands(); + for (unsigned i = NumFixedOps; i < NumOps; ++i) { + OS << ' '; + printMemoryOperand(MI, i, OS); + if (MI->getOperand(i).getImm() & + static_cast(SPIRV::MemoryOperand::Aligned)) { + assert(i + 1 < NumOps && "Missing alignment operand"); + OS << ' '; + printOperand(MI, i + 1, OS); + i += 1; + } + } + break; + } + case SPIRV::OpConstantI: + case SPIRV::OpConstantF: + printOpConstantVarOps(MI, NumFixedOps, OS); + break; + default: + printRemainingVariableOps(MI, NumFixedOps, OS); + break; + } + } + } + } + + printAnnotation(OS, Annot); +} + +void SPIRVInstPrinter::printOpExtInst(const MCInst *MI, raw_ostream &O) { + llvm_unreachable("Unimplemented printOpExtInst"); +} + +void SPIRVInstPrinter::printOpDecorate(const MCInst *MI, raw_ostream &O) { + // The fixed operands have already been printed, so just need to decide what + // type of decoration operands to print based on the Decoration type. + MCInstrDesc MCDesc = MII.get(MI->getOpcode()); + unsigned NumFixedOps = MCDesc.getNumOperands(); + + if (NumFixedOps != MI->getNumOperands()) { + auto DecOp = MI->getOperand(NumFixedOps - 1); + auto Dec = static_cast(DecOp.getImm()); + + O << ' '; + + switch (Dec) { + case SPIRV::Decoration::BuiltIn: + printBuiltIn(MI, NumFixedOps, O); + break; + case SPIRV::Decoration::UniformId: + printScope(MI, NumFixedOps, O); + break; + case SPIRV::Decoration::FuncParamAttr: + printFunctionParameterAttribute(MI, NumFixedOps, O); + break; + case SPIRV::Decoration::FPRoundingMode: + printFPRoundingMode(MI, NumFixedOps, O); + break; + case SPIRV::Decoration::FPFastMathMode: + printFPFastMathMode(MI, NumFixedOps, O); + break; + case SPIRV::Decoration::LinkageAttributes: + case SPIRV::Decoration::UserSemantic: + printStringImm(MI, NumFixedOps, O); + break; + default: + printRemainingVariableOps(MI, NumFixedOps, O, true); + break; + } + } +} + +static void printExpr(const MCExpr *Expr, raw_ostream &O) { +#ifndef NDEBUG + const MCSymbolRefExpr *SRE; + + if (const MCBinaryExpr *BE = dyn_cast(Expr)) + SRE = cast(BE->getLHS()); + else + SRE = cast(Expr); + + MCSymbolRefExpr::VariantKind Kind = SRE->getKind(); + + assert(Kind == MCSymbolRefExpr::VK_None); +#endif + O << *Expr; +} + +void SPIRVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O, const char *Modifier) { + assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + if (OpNo < MI->getNumOperands()) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) + O << '%' << (Register::virtReg2Index(Op.getReg()) + 1); + else if (Op.isImm()) + O << formatImm((int64_t)Op.getImm()); + else if (Op.isDFPImm()) + O << formatImm((double)Op.getDFPImm()); + else if (Op.isExpr()) + printExpr(Op.getExpr(), O); + else + llvm_unreachable("Unexpected operand type"); + } +} + +void SPIRVInstPrinter::printStringImm(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const unsigned NumOps = MI->getNumOperands(); + unsigned StrStartIndex = OpNo; + while (StrStartIndex < NumOps) { + if (MI->getOperand(StrStartIndex).isReg()) + break; + + std::string Str = getSPIRVStringOperand(*MI, OpNo); + if (StrStartIndex != OpNo) + O << ' '; // Add a space if we're starting a new string/argument. + O << '"'; + for (char c : Str) { + if (c == '"') + O.write('\\'); // Escape " characters (might break for complex UTF-8). + O.write(c); + } + O << '"'; + + unsigned numOpsInString = (Str.size() / 4) + 1; + StrStartIndex += numOpsInString; + + // Check for final Op of "OpDecorate %x %stringImm %linkageAttribute". + if (MI->getOpcode() == SPIRV::OpDecorate && + MI->getOperand(1).getImm() == + static_cast(SPIRV::Decoration::LinkageAttributes)) { + O << ' '; + printLinkageType(MI, StrStartIndex, O); + break; + } + } +} + +void SPIRVInstPrinter::printExtInst(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Unimplemented printExtInst"); +} + +void SPIRVInstPrinter::printCapability(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::Capability e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getCapabilityName(e); + } +} + +void SPIRVInstPrinter::printSourceLanguage(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::SourceLanguage e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getSourceLanguageName(e); + } +} + +void SPIRVInstPrinter::printExecutionModel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::ExecutionModel e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getExecutionModelName(e); + } +} + +void SPIRVInstPrinter::printAddressingModel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::AddressingModel e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getAddressingModelName(e); + } +} + +void SPIRVInstPrinter::printMemoryModel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::MemoryModel e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getMemoryModelName(e); + } +} + +void SPIRVInstPrinter::printExecutionMode(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::ExecutionMode e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getExecutionModeName(e); + } +} + +void SPIRVInstPrinter::printStorageClass(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::StorageClass e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getStorageClassName(e); + } +} + +void SPIRVInstPrinter::printDim(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::Dim e = static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getDimName(e); + } +} + +void SPIRVInstPrinter::printSamplerAddressingMode(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::SamplerAddressingMode e = static_cast( + MI->getOperand(OpNo).getImm()); + O << SPIRV::getSamplerAddressingModeName(e); + } +} + +void SPIRVInstPrinter::printSamplerFilterMode(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::SamplerFilterMode e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getSamplerFilterModeName(e); + } +} + +void SPIRVInstPrinter::printImageFormat(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::ImageFormat e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getImageFormatName(e); + } +} + +void SPIRVInstPrinter::printImageChannelOrder(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::ImageChannelOrder e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getImageChannelOrderName(e); + } +} + +void SPIRVInstPrinter::printImageChannelDataType(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::ImageChannelDataType e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getImageChannelDataTypeName(e); + } +} + +void SPIRVInstPrinter::printImageOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + unsigned e = static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getImageOperandName(e); + } +} + +void SPIRVInstPrinter::printFPFastMathMode(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + unsigned e = static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getFPFastMathModeName(e); + } +} + +void SPIRVInstPrinter::printFPRoundingMode(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::FPRoundingMode e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getFPRoundingModeName(e); + } +} + +void SPIRVInstPrinter::printLinkageType(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::LinkageType e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getLinkageTypeName(e); + } +} + +void SPIRVInstPrinter::printAccessQualifier(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::AccessQualifier e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getAccessQualifierName(e); + } +} + +void SPIRVInstPrinter::printFunctionParameterAttribute(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::FunctionParameterAttribute e = + static_cast( + MI->getOperand(OpNo).getImm()); + O << SPIRV::getFunctionParameterAttributeName(e); + } +} + +void SPIRVInstPrinter::printDecoration(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::Decoration e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getDecorationName(e); + } +} + +void SPIRVInstPrinter::printBuiltIn(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::BuiltIn e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getBuiltInName(e); + } +} + +void SPIRVInstPrinter::printSelectionControl(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + unsigned e = static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getSelectionControlName(e); + } +} + +void SPIRVInstPrinter::printLoopControl(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + unsigned e = static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getLoopControlName(e); + } +} + +void SPIRVInstPrinter::printFunctionControl(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + unsigned e = static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getFunctionControlName(e); + } +} + +void SPIRVInstPrinter::printMemorySemantics(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + unsigned e = static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getMemorySemanticsName(e); + } +} + +void SPIRVInstPrinter::printMemoryOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + unsigned e = static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getMemoryOperandName(e); + } +} + +void SPIRVInstPrinter::printScope(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::Scope e = static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getScopeName(e); + } +} + +void SPIRVInstPrinter::printGroupOperation(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::GroupOperation e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getGroupOperationName(e); + } +} + +void SPIRVInstPrinter::printKernelEnqueueFlags(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::KernelEnqueueFlags e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getKernelEnqueueFlagsName(e); + } +} + +void SPIRVInstPrinter::printKernelProfilingInfo(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo < MI->getNumOperands()) { + SPIRV::KernelProfilingInfo e = + static_cast(MI->getOperand(OpNo).getImm()); + O << SPIRV::getKernelProfilingInfoName(e); + } +} diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.h new file mode 100644 index 000000000000..cd3b6f1e6d66 --- /dev/null +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.h @@ -0,0 +1,94 @@ +//===-- SPIRVInstPrinter.h - Output SPIR-V MCInsts as ASM -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class prints a SPIR-V MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_INSTPRINTER_SPIRVINSTPRINTER_H +#define LLVM_LIB_TARGET_SPIRV_INSTPRINTER_SPIRVINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { +class SPIRVInstPrinter : public MCInstPrinter { +private: + void recordOpExtInstImport(const MCInst *MI); + +public: + using MCInstPrinter::MCInstPrinter; + + void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, + const MCSubtargetInfo &STI, raw_ostream &OS) override; + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, + const char *Modifier = nullptr); + + void printStringImm(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printOpDecorate(const MCInst *MI, raw_ostream &O); + void printOpExtInst(const MCInst *MI, raw_ostream &O); + void printRemainingVariableOps(const MCInst *MI, unsigned StartIndex, + raw_ostream &O, bool SkipFirstSpace = false, + bool SkipImmediates = false); + void printOpConstantVarOps(const MCInst *MI, unsigned StartIndex, + raw_ostream &O); + + void printExtInst(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + // SPIR-V enumerations printing. + void printCapability(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSourceLanguage(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printExecutionModel(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printAddressingModel(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemoryModel(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printExecutionMode(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printStorageClass(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printDim(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printSamplerAddressingMode(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printSamplerFilterMode(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printImageFormat(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printImageChannelOrder(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printImageChannelDataType(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printImageOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printFPFastMathMode(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printFPRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printLinkageType(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printAccessQualifier(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printFunctionParameterAttribute(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + + void printDecoration(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBuiltIn(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printSelectionControl(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printLoopControl(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printFunctionControl(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printMemorySemantics(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemoryOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printScope(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printGroupOperation(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printKernelEnqueueFlags(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printKernelProfilingInfo(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + // Autogenerated by tblgen. + std::pair getMnemonic(const MCInst *MI) override; + void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_INSTPRINTER_SPIRVINSTPRINTER_H diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.cpp new file mode 100644 index 000000000000..2f3462f419e5 --- /dev/null +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.cpp @@ -0,0 +1,34 @@ +//===-- SPIRVMCAsmInfo.h - SPIR-V asm properties --------------*- C++ -*--====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the SPIRVMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVMCAsmInfo.h" +#include "llvm/ADT/Triple.h" + +using namespace llvm; + +SPIRVMCAsmInfo::SPIRVMCAsmInfo(const Triple &TT, + const MCTargetOptions &Options) { + IsLittleEndian = true; + + HasSingleParameterDotFile = false; + HasDotTypeDotSizeDirective = false; + + MinInstAlignment = 4; + + CodePointerSize = 4; + CommentString = ";"; + HasFunctionAlignment = false; +} + +bool SPIRVMCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const { + return true; +} diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.h new file mode 100644 index 000000000000..08e579e1c32c --- /dev/null +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.h @@ -0,0 +1,29 @@ +//===-- SPIRVMCAsmInfo.h - SPIR-V asm properties --------------*- C++ -*--====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the SPIRVMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVMCASMINFO_H +#define LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVMCASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + +class Triple; + +class SPIRVMCAsmInfo : public MCAsmInfo { +public: + explicit SPIRVMCAsmInfo(const Triple &TT, const MCTargetOptions &Options); + bool shouldOmitSectionDirective(StringRef SectionName) const override; +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVMCASMINFO_H diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp new file mode 100644 index 000000000000..d953bc590473 --- /dev/null +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp @@ -0,0 +1,132 @@ +//===-- SPIRVMCCodeEmitter.cpp - Emit SPIR-V machine code -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SPIRVMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SPIRVMCTargetDesc.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/EndianStream.h" + +using namespace llvm; + +#define DEBUG_TYPE "spirv-mccodeemitter" + +namespace { + +class SPIRVMCCodeEmitter : public MCCodeEmitter { + const MCInstrInfo &MCII; + +public: + SPIRVMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {} + SPIRVMCCodeEmitter(const SPIRVMCCodeEmitter &) = delete; + void operator=(const SPIRVMCCodeEmitter &) = delete; + ~SPIRVMCCodeEmitter() override = default; + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + +private: + FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; + void + verifyInstructionPredicates(const MCInst &MI, + const FeatureBitset &AvailableFeatures) const; +}; + +} // end anonymous namespace + +MCCodeEmitter *llvm::createSPIRVMCCodeEmitter(const MCInstrInfo &MCII, + MCContext &Ctx) { + return new SPIRVMCCodeEmitter(MCII); +} + +using EndianWriter = support::endian::Writer; + +// Check if the instruction has a type argument for operand 1, and defines an ID +// output register in operand 0. If so, we need to swap operands 0 and 1 so the +// type comes first in the output, despide coming second in the MCInst. +static bool hasType(const MCInst &MI, const MCInstrInfo &MII) { + MCInstrDesc MCDesc = MII.get(MI.getOpcode()); + // If we define an output, and have at least one other argument. + if (MCDesc.getNumDefs() == 1 && MCDesc.getNumOperands() >= 2) { + // Check if we define an ID, and take a type as operand 1. + auto DefOpInfo = MCDesc.opInfo_begin(); + auto FirstArgOpInfo = MCDesc.opInfo_begin() + 1; + return (DefOpInfo->RegClass == SPIRV::IDRegClassID || + DefOpInfo->RegClass == SPIRV::ANYIDRegClassID) && + FirstArgOpInfo->RegClass == SPIRV::TYPERegClassID; + } + return false; +} + +static void emitOperand(const MCOperand &Op, EndianWriter &OSE) { + if (Op.isReg()) { + // Emit the id index starting at 1 (0 is an invalid index). + OSE.write(Register::virtReg2Index(Op.getReg()) + 1); + } else if (Op.isImm()) { + OSE.write(Op.getImm()); + } else { + llvm_unreachable("Unexpected operand type in VReg"); + } +} + +// Emit the type in operand 1 before the ID in operand 0 it defines, and all +// remaining operands in the order they come naturally. +static void emitTypedInstrOperands(const MCInst &MI, EndianWriter &OSE) { + unsigned NumOps = MI.getNumOperands(); + emitOperand(MI.getOperand(1), OSE); + emitOperand(MI.getOperand(0), OSE); + for (unsigned i = 2; i < NumOps; ++i) + emitOperand(MI.getOperand(i), OSE); +} + +// Emit operands in the order they come naturally. +static void emitUntypedInstrOperands(const MCInst &MI, EndianWriter &OSE) { + for (const auto &Op : MI) + emitOperand(Op, OSE); +} + +void SPIRVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + auto Features = computeAvailableFeatures(STI.getFeatureBits()); + verifyInstructionPredicates(MI, Features); + + EndianWriter OSE(OS, support::little); + + // Encode the first 32 SPIR-V bytes with the number of args and the opcode. + const uint64_t OpCode = getBinaryCodeForInstr(MI, Fixups, STI); + const uint32_t NumWords = MI.getNumOperands() + 1; + const uint32_t FirstWord = (NumWords << 16) | OpCode; + OSE.write(FirstWord); + + // Emit the instruction arguments (emitting the output type first if present). + if (hasType(MI, MCII)) + emitTypedInstrOperands(MI, OSE); + else + emitUntypedInstrOperands(MI, OSE); +} + +#define ENABLE_INSTR_PREDICATE_VERIFIER +#include "SPIRVGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp new file mode 100644 index 000000000000..6b8b4a73af92 --- /dev/null +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp @@ -0,0 +1,102 @@ +//===-- SPIRVMCTargetDesc.cpp - SPIR-V Target Descriptions ----*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides SPIR-V specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVMCTargetDesc.h" +#include "SPIRVInstPrinter.h" +#include "SPIRVMCAsmInfo.h" +#include "SPIRVTargetStreamer.h" +#include "TargetInfo/SPIRVTargetInfo.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "SPIRVGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "SPIRVGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "SPIRVGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createSPIRVMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitSPIRVMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createSPIRVMCRegisterInfo(const Triple &TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + return X; +} + +static MCSubtargetInfo * +createSPIRVMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { + return createSPIRVMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); +} + +static MCStreamer * +createSPIRVMCStreamer(const Triple &T, MCContext &Ctx, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter, bool RelaxAll) { + return createSPIRVStreamer(Ctx, std::move(MAB), std::move(OW), + std::move(Emitter), RelaxAll); +} + +static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &, + MCInstPrinter *, bool) { + return new SPIRVTargetStreamer(S); +} + +static MCInstPrinter *createSPIRVMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + assert(SyntaxVariant == 0); + return new SPIRVInstPrinter(MAI, MII, MRI); +} + +namespace { + +class SPIRVMCInstrAnalysis : public MCInstrAnalysis { +public: + explicit SPIRVMCInstrAnalysis(const MCInstrInfo *Info) + : MCInstrAnalysis(Info) {} +}; + +} // end anonymous namespace + +static MCInstrAnalysis *createSPIRVInstrAnalysis(const MCInstrInfo *Info) { + return new SPIRVMCInstrAnalysis(Info); +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVTargetMC() { + for (Target *T : {&getTheSPIRV32Target(), &getTheSPIRV64Target()}) { + RegisterMCAsmInfo X(*T); + TargetRegistry::RegisterMCInstrInfo(*T, createSPIRVMCInstrInfo); + TargetRegistry::RegisterMCRegInfo(*T, createSPIRVMCRegisterInfo); + TargetRegistry::RegisterMCSubtargetInfo(*T, createSPIRVMCSubtargetInfo); + TargetRegistry::RegisterSPIRVStreamer(*T, createSPIRVMCStreamer); + TargetRegistry::RegisterMCInstPrinter(*T, createSPIRVMCInstPrinter); + TargetRegistry::RegisterMCInstrAnalysis(*T, createSPIRVInstrAnalysis); + TargetRegistry::RegisterMCCodeEmitter(*T, createSPIRVMCCodeEmitter); + TargetRegistry::RegisterMCAsmBackend(*T, createSPIRVAsmBackend); + TargetRegistry::RegisterAsmTargetStreamer(*T, createTargetAsmStreamer); + } +} diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h new file mode 100644 index 000000000000..4009fa96aa68 --- /dev/null +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h @@ -0,0 +1,52 @@ +//===-- SPIRVMCTargetDesc.h - SPIR-V Target Descriptions --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides SPIR-V specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVMCTARGETDESC_H +#define LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVMCTARGETDESC_H + +#include "llvm/Support/DataTypes.h" +#include + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectTargetWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class MCTargetOptions; +class Target; + +MCCodeEmitter *createSPIRVMCCodeEmitter(const MCInstrInfo &MCII, + MCContext &Ctx); + +MCAsmBackend *createSPIRVAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options); + +std::unique_ptr createSPIRVObjectTargetWriter(); +} // namespace llvm + +// Defines symbolic names for SPIR-V registers. This defines a mapping from +// register name to register number. +#define GET_REGINFO_ENUM +#include "SPIRVGenRegisterInfo.inc" + +// Defines symbolic names for the SPIR-V instructions. +#define GET_INSTRINFO_ENUM +#include "SPIRVGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "SPIRVGenSubtargetInfo.inc" + +#endif // LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVMCTARGETDESC_H diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVObjectTargetWriter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVObjectTargetWriter.cpp new file mode 100644 index 000000000000..685168b4073d --- /dev/null +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVObjectTargetWriter.cpp @@ -0,0 +1,25 @@ +//===- SPIRVObjectTargetWriter.cpp - SPIR-V Object Target Writer *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SPIRVMCTargetDesc.h" +#include "llvm/MC/MCSPIRVObjectWriter.h" + +using namespace llvm; + +namespace { + +class SPIRVObjectTargetWriter : public MCSPIRVObjectTargetWriter { +public: + SPIRVObjectTargetWriter() = default; +}; + +} // namespace + +std::unique_ptr llvm::createSPIRVObjectTargetWriter() { + return std::make_unique(); +} diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp new file mode 100644 index 000000000000..0a318e0e01e5 --- /dev/null +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp @@ -0,0 +1,18 @@ +//=====- SPIRVTargetStreamer.cpp - SPIRVTargetStreamer class ------------=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SPIRVTargetStreamer class. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVTargetStreamer.h" + +using namespace llvm; + +SPIRVTargetStreamer::SPIRVTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} +SPIRVTargetStreamer::~SPIRVTargetStreamer() {} diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.h new file mode 100644 index 000000000000..2cc8f50aba67 --- /dev/null +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.h @@ -0,0 +1,28 @@ +//===-- SPIRVTargetStreamer.h - SPIRV Target Streamer ----------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVTARGETSTREAMER_H +#define LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVTARGETSTREAMER_H + +#include "llvm/MC/MCStreamer.h" + +namespace llvm { + +class MCSection; + +class SPIRVTargetStreamer : public MCTargetStreamer { +public: + SPIRVTargetStreamer(MCStreamer &S); + ~SPIRVTargetStreamer() override; + + void changeSection(const MCSection *CurSection, MCSection *Section, + const MCExpr *SubSection, raw_ostream &OS) override{}; +}; +} // namespace llvm + +#endif // LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVTARGETSTREAMER_H_ diff --git a/llvm/lib/Target/SPIRV/SPIRV.h b/llvm/lib/Target/SPIRV/SPIRV.h new file mode 100644 index 000000000000..8da54a5d6e61 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRV.h @@ -0,0 +1,34 @@ +//===-- SPIRV.h - Top-level interface for SPIR-V representation -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRV_H +#define LLVM_LIB_TARGET_SPIRV_SPIRV_H + +#include "MCTargetDesc/SPIRVMCTargetDesc.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class SPIRVTargetMachine; +class SPIRVSubtarget; +class InstructionSelector; +class RegisterBankInfo; + +FunctionPass *createSPIRVPreLegalizerPass(); +FunctionPass *createSPIRVEmitIntrinsicsPass(SPIRVTargetMachine *TM); +InstructionSelector * +createSPIRVInstructionSelector(const SPIRVTargetMachine &TM, + const SPIRVSubtarget &Subtarget, + const RegisterBankInfo &RBI); + +void initializeSPIRVModuleAnalysisPass(PassRegistry &); +void initializeSPIRVPreLegalizerPass(PassRegistry &); +void initializeSPIRVEmitIntrinsicsPass(PassRegistry &); +} // namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_SPIRV_H diff --git a/llvm/lib/Target/SPIRV/SPIRV.td b/llvm/lib/Target/SPIRV/SPIRV.td new file mode 100644 index 000000000000..27374acb8882 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRV.td @@ -0,0 +1,43 @@ +//===-- SPIRV.td - Describe the SPIR-V Target Machine ------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +include "SPIRVRegisterInfo.td" +include "SPIRVRegisterBanks.td" +include "SPIRVInstrInfo.td" + +def SPIRVInstrInfo : InstrInfo; + +class Proc Features> + : Processor; + +def : Proc<"generic", []>; + +def SPIRV10 : SubtargetFeature<"spirv1.0", "SPIRVVersion", "10", + "Use SPIR-V version 1.0">; +def SPIRV11 : SubtargetFeature<"spirv1.1", "SPIRVVersion", "11", + "Use SPIR-V version 1.1">; +def SPIRV12 : SubtargetFeature<"spirv1.2", "SPIRVVersion", "12", + "Use SPIR-V version 1.2">; +def SPIRV13 : SubtargetFeature<"spirv1.3", "SPIRVVersion", "13", + "Use SPIR-V version 1.3">; +def SPIRV14 : SubtargetFeature<"spirv1.4", "SPIRVVersion", "14", + "Use SPIR-V version 1.4">; +def SPIRV15 : SubtargetFeature<"spirv1.5", "SPIRVVersion", "15", + "Use SPIR-V version 1.5">; + +def SPIRVInstPrinter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + +def SPIRV : Target { + let InstructionSet = SPIRVInstrInfo; + let AssemblyWriters = [SPIRVInstPrinter]; +} diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp new file mode 100644 index 000000000000..0de232651377 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp @@ -0,0 +1,348 @@ +//===-- SPIRVAsmPrinter.cpp - SPIR-V LLVM assembly writer ------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to the SPIR-V assembly language. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SPIRVInstPrinter.h" +#include "SPIRV.h" +#include "SPIRVInstrInfo.h" +#include "SPIRVMCInstLower.h" +#include "SPIRVModuleAnalysis.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "TargetInfo/SPIRVTargetInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +namespace { +class SPIRVAsmPrinter : public AsmPrinter { +public: + explicit SPIRVAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)), ST(nullptr), TII(nullptr) {} + bool ModuleSectionsEmitted; + const SPIRVSubtarget *ST; + const SPIRVInstrInfo *TII; + + StringRef getPassName() const override { return "SPIRV Assembly Printer"; } + void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) override; + + void outputMCInst(MCInst &Inst); + void outputInstruction(const MachineInstr *MI); + void outputModuleSection(SPIRV::ModuleSectionType MSType); + void outputEntryPoints(); + void outputDebugSourceAndStrings(const Module &M); + void outputOpMemoryModel(); + void outputOpFunctionEnd(); + void outputExtFuncDecls(); + void outputModuleSections(); + + void emitInstruction(const MachineInstr *MI) override; + void emitFunctionEntryLabel() override {} + void emitFunctionHeader() override; + void emitFunctionBodyStart() override {} + void emitFunctionBodyEnd() override; + void emitBasicBlockStart(const MachineBasicBlock &MBB) override; + void emitBasicBlockEnd(const MachineBasicBlock &MBB) override {} + void emitGlobalVariable(const GlobalVariable *GV) override {} + void emitOpLabel(const MachineBasicBlock &MBB); + void emitEndOfAsmFile(Module &M) override; + bool doInitialization(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + SPIRV::ModuleAnalysisInfo *MAI; +}; +} // namespace + +void SPIRVAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + AsmPrinter::getAnalysisUsage(AU); +} + +// If the module has no functions, we need output global info anyway. +void SPIRVAsmPrinter::emitEndOfAsmFile(Module &M) { + if (ModuleSectionsEmitted == false) { + outputModuleSections(); + ModuleSectionsEmitted = true; + } +} + +void SPIRVAsmPrinter::emitFunctionHeader() { + if (ModuleSectionsEmitted == false) { + outputModuleSections(); + ModuleSectionsEmitted = true; + } + // Get the subtarget from the current MachineFunction. + ST = &MF->getSubtarget(); + TII = ST->getInstrInfo(); + const Function &F = MF->getFunction(); + + if (isVerbose()) { + OutStreamer->getCommentOS() + << "-- Begin function " + << GlobalValue::dropLLVMManglingEscape(F.getName()) << '\n'; + } + + auto Section = getObjFileLowering().SectionForGlobal(&F, TM); + MF->setSection(Section); +} + +void SPIRVAsmPrinter::outputOpFunctionEnd() { + MCInst FunctionEndInst; + FunctionEndInst.setOpcode(SPIRV::OpFunctionEnd); + outputMCInst(FunctionEndInst); +} + +// Emit OpFunctionEnd at the end of MF and clear BBNumToRegMap. +void SPIRVAsmPrinter::emitFunctionBodyEnd() { + outputOpFunctionEnd(); + MAI->BBNumToRegMap.clear(); +} + +void SPIRVAsmPrinter::emitOpLabel(const MachineBasicBlock &MBB) { + MCInst LabelInst; + LabelInst.setOpcode(SPIRV::OpLabel); + LabelInst.addOperand(MCOperand::createReg(MAI->getOrCreateMBBRegister(MBB))); + outputMCInst(LabelInst); +} + +void SPIRVAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { + // If it's the first MBB in MF, it has OpFunction and OpFunctionParameter, so + // OpLabel should be output after them. + if (MBB.getNumber() == MF->front().getNumber()) { + for (const MachineInstr &MI : MBB) + if (MI.getOpcode() == SPIRV::OpFunction) + return; + // TODO: this case should be checked by the verifier. + report_fatal_error("OpFunction is expected in the front MBB of MF"); + } + emitOpLabel(MBB); +} + +void SPIRVAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNum); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << SPIRVInstPrinter::getRegisterName(MO.getReg()); + break; + + case MachineOperand::MO_Immediate: + O << MO.getImm(); + break; + + case MachineOperand::MO_FPImmediate: + O << MO.getFPImm(); + break; + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + break; + + case MachineOperand::MO_GlobalAddress: + O << *getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_BlockAddress: { + MCSymbol *BA = GetBlockAddressSymbol(MO.getBlockAddress()); + O << BA->getName(); + break; + } + + case MachineOperand::MO_ExternalSymbol: + O << *GetExternalSymbolSymbol(MO.getSymbolName()); + break; + + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + default: + llvm_unreachable(""); + } +} + +bool SPIRVAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Invalid instruction - SPIR-V does not have special modifiers + + printOperand(MI, OpNo, O); + return false; +} + +static bool isFuncOrHeaderInstr(const MachineInstr *MI, + const SPIRVInstrInfo *TII) { + return TII->isHeaderInstr(*MI) || MI->getOpcode() == SPIRV::OpFunction || + MI->getOpcode() == SPIRV::OpFunctionParameter; +} + +void SPIRVAsmPrinter::outputMCInst(MCInst &Inst) { + OutStreamer->emitInstruction(Inst, *OutContext.getSubtargetInfo()); +} + +void SPIRVAsmPrinter::outputInstruction(const MachineInstr *MI) { + SPIRVMCInstLower MCInstLowering; + MCInst TmpInst; + MCInstLowering.lower(MI, TmpInst, MAI); + outputMCInst(TmpInst); +} + +void SPIRVAsmPrinter::emitInstruction(const MachineInstr *MI) { + if (!MAI->getSkipEmission(MI)) + outputInstruction(MI); + + // Output OpLabel after OpFunction and OpFunctionParameter in the first MBB. + const MachineInstr *NextMI = MI->getNextNode(); + if (!MAI->hasMBBRegister(*MI->getParent()) && isFuncOrHeaderInstr(MI, TII) && + (!NextMI || !isFuncOrHeaderInstr(NextMI, TII))) { + assert(MI->getParent()->getNumber() == MF->front().getNumber() && + "OpFunction is not in the front MBB of MF"); + emitOpLabel(*MI->getParent()); + } +} + +void SPIRVAsmPrinter::outputModuleSection(SPIRV::ModuleSectionType MSType) { + for (MachineInstr *MI : MAI->getMSInstrs(MSType)) + outputInstruction(MI); +} + +void SPIRVAsmPrinter::outputDebugSourceAndStrings(const Module &M) { + // Output OpSource. + MCInst Inst; + Inst.setOpcode(SPIRV::OpSource); + Inst.addOperand(MCOperand::createImm(static_cast(MAI->SrcLang))); + Inst.addOperand( + MCOperand::createImm(static_cast(MAI->SrcLangVersion))); + outputMCInst(Inst); +} + +void SPIRVAsmPrinter::outputOpMemoryModel() { + MCInst Inst; + Inst.setOpcode(SPIRV::OpMemoryModel); + Inst.addOperand(MCOperand::createImm(static_cast(MAI->Addr))); + Inst.addOperand(MCOperand::createImm(static_cast(MAI->Mem))); + outputMCInst(Inst); +} + +// Before the OpEntryPoints' output, we need to add the entry point's +// interfaces. The interface is a list of IDs of global OpVariable instructions. +// These declare the set of global variables from a module that form +// the interface of this entry point. +void SPIRVAsmPrinter::outputEntryPoints() { + // Find all OpVariable IDs with required StorageClass. + DenseSet InterfaceIDs; + for (MachineInstr *MI : MAI->GlobalVarList) { + assert(MI->getOpcode() == SPIRV::OpVariable); + auto SC = static_cast(MI->getOperand(2).getImm()); + // Before version 1.4, the interface's storage classes are limited to + // the Input and Output storage classes. Starting with version 1.4, + // the interface's storage classes are all storage classes used in + // declaring all global variables referenced by the entry point call tree. + if (ST->getSPIRVVersion() >= 14 || SC == SPIRV::StorageClass::Input || + SC == SPIRV::StorageClass::Output) { + MachineFunction *MF = MI->getMF(); + Register Reg = MAI->getRegisterAlias(MF, MI->getOperand(0).getReg()); + InterfaceIDs.insert(Reg); + } + } + + // Output OpEntryPoints adding interface args to all of them. + for (MachineInstr *MI : MAI->getMSInstrs(SPIRV::MB_EntryPoints)) { + SPIRVMCInstLower MCInstLowering; + MCInst TmpInst; + MCInstLowering.lower(MI, TmpInst, MAI); + for (Register Reg : InterfaceIDs) { + assert(Reg.isValid()); + TmpInst.addOperand(MCOperand::createReg(Reg)); + } + outputMCInst(TmpInst); + } +} + +void SPIRVAsmPrinter::outputExtFuncDecls() { + // Insert OpFunctionEnd after each declaration. + SmallVectorImpl::iterator + I = MAI->getMSInstrs(SPIRV::MB_ExtFuncDecls).begin(), + E = MAI->getMSInstrs(SPIRV::MB_ExtFuncDecls).end(); + for (; I != E; ++I) { + outputInstruction(*I); + if ((I + 1) == E || (*(I + 1))->getOpcode() == SPIRV::OpFunction) + outputOpFunctionEnd(); + } +} + +void SPIRVAsmPrinter::outputModuleSections() { + const Module *M = MMI->getModule(); + // Get the global subtarget to output module-level info. + ST = static_cast(TM).getSubtargetImpl(); + TII = ST->getInstrInfo(); + MAI = &SPIRVModuleAnalysis::MAI; + assert(ST && TII && MAI && M && "Module analysis is required"); + // Output instructions according to the Logical Layout of a Module: + // TODO: 1,2. All OpCapability instructions, then optional OpExtension + // instructions. + // TODO: 3. Optional OpExtInstImport instructions. + // 4. The single required OpMemoryModel instruction. + outputOpMemoryModel(); + // 5. All entry point declarations, using OpEntryPoint. + outputEntryPoints(); + // 6. Execution-mode declarations, using OpExecutionMode or OpExecutionModeId. + // TODO: + // 7a. Debug: all OpString, OpSourceExtension, OpSource, and + // OpSourceContinued, without forward references. + outputDebugSourceAndStrings(*M); + // 7b. Debug: all OpName and all OpMemberName. + outputModuleSection(SPIRV::MB_DebugNames); + // 7c. Debug: all OpModuleProcessed instructions. + outputModuleSection(SPIRV::MB_DebugModuleProcessed); + // 8. All annotation instructions (all decorations). + outputModuleSection(SPIRV::MB_Annotations); + // 9. All type declarations (OpTypeXXX instructions), all constant + // instructions, and all global variable declarations. This section is + // the first section to allow use of: OpLine and OpNoLine debug information; + // non-semantic instructions with OpExtInst. + outputModuleSection(SPIRV::MB_TypeConstVars); + // 10. All function declarations (functions without a body). + outputExtFuncDecls(); + // 11. All function definitions (functions with a body). + // This is done in regular function output. +} + +bool SPIRVAsmPrinter::doInitialization(Module &M) { + ModuleSectionsEmitted = false; + // We need to call the parent's one explicitly. + return AsmPrinter::doInitialization(M); +} + +// Force static initialization. +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVAsmPrinter() { + RegisterAsmPrinter X(getTheSPIRV32Target()); + RegisterAsmPrinter Y(getTheSPIRV64Target()); +} diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp new file mode 100644 index 000000000000..df07a126eeea --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -0,0 +1,223 @@ +//===--- SPIRVCallLowering.cpp - Call lowering ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the lowering of LLVM calls to machine code calls for +// GlobalISel. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVCallLowering.h" +#include "MCTargetDesc/SPIRVBaseInfo.h" +#include "SPIRV.h" +#include "SPIRVGlobalRegistry.h" +#include "SPIRVISelLowering.h" +#include "SPIRVRegisterInfo.h" +#include "SPIRVSubtarget.h" +#include "SPIRVUtils.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" + +using namespace llvm; + +SPIRVCallLowering::SPIRVCallLowering(const SPIRVTargetLowering &TLI, + const SPIRVSubtarget &ST, + SPIRVGlobalRegistry *GR) + : CallLowering(&TLI), ST(ST), GR(GR) {} + +bool SPIRVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, + const Value *Val, ArrayRef VRegs, + FunctionLoweringInfo &FLI, + Register SwiftErrorVReg) const { + // Currently all return types should use a single register. + // TODO: handle the case of multiple registers. + if (VRegs.size() > 1) + return false; + if (Val) + return MIRBuilder.buildInstr(SPIRV::OpReturnValue) + .addUse(VRegs[0]) + .constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(), + *ST.getRegBankInfo()); + MIRBuilder.buildInstr(SPIRV::OpReturn); + return true; +} + +// Based on the LLVM function attributes, get a SPIR-V FunctionControl. +static uint32_t getFunctionControl(const Function &F) { + uint32_t FuncControl = static_cast(SPIRV::FunctionControl::None); + if (F.hasFnAttribute(Attribute::AttrKind::AlwaysInline)) { + FuncControl |= static_cast(SPIRV::FunctionControl::Inline); + } + if (F.hasFnAttribute(Attribute::AttrKind::ReadNone)) { + FuncControl |= static_cast(SPIRV::FunctionControl::Pure); + } + if (F.hasFnAttribute(Attribute::AttrKind::ReadOnly)) { + FuncControl |= static_cast(SPIRV::FunctionControl::Const); + } + if (F.hasFnAttribute(Attribute::AttrKind::NoInline)) { + FuncControl |= static_cast(SPIRV::FunctionControl::DontInline); + } + return FuncControl; +} + +bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, + const Function &F, + ArrayRef> VRegs, + FunctionLoweringInfo &FLI) const { + assert(GR && "Must initialize the SPIRV type registry before lowering args."); + + // Assign types and names to all args, and store their types for later. + SmallVector ArgTypeVRegs; + if (VRegs.size() > 0) { + unsigned i = 0; + for (const auto &Arg : F.args()) { + // Currently formal args should use single registers. + // TODO: handle the case of multiple registers. + if (VRegs[i].size() > 1) + return false; + auto *SpirvTy = + GR->assignTypeToVReg(Arg.getType(), VRegs[i][0], MIRBuilder); + ArgTypeVRegs.push_back(GR->getSPIRVTypeID(SpirvTy)); + + if (Arg.hasName()) + buildOpName(VRegs[i][0], Arg.getName(), MIRBuilder); + if (Arg.getType()->isPointerTy()) { + auto DerefBytes = static_cast(Arg.getDereferenceableBytes()); + if (DerefBytes != 0) + buildOpDecorate(VRegs[i][0], MIRBuilder, + SPIRV::Decoration::MaxByteOffset, {DerefBytes}); + } + if (Arg.hasAttribute(Attribute::Alignment)) { + buildOpDecorate(VRegs[i][0], MIRBuilder, SPIRV::Decoration::Alignment, + {static_cast(Arg.getParamAlignment())}); + } + if (Arg.hasAttribute(Attribute::ReadOnly)) { + auto Attr = + static_cast(SPIRV::FunctionParameterAttribute::NoWrite); + buildOpDecorate(VRegs[i][0], MIRBuilder, + SPIRV::Decoration::FuncParamAttr, {Attr}); + } + if (Arg.hasAttribute(Attribute::ZExt)) { + auto Attr = + static_cast(SPIRV::FunctionParameterAttribute::Zext); + buildOpDecorate(VRegs[i][0], MIRBuilder, + SPIRV::Decoration::FuncParamAttr, {Attr}); + } + ++i; + } + } + + // Generate a SPIR-V type for the function. + auto MRI = MIRBuilder.getMRI(); + Register FuncVReg = MRI->createGenericVirtualRegister(LLT::scalar(32)); + MRI->setRegClass(FuncVReg, &SPIRV::IDRegClass); + + auto *FTy = F.getFunctionType(); + auto FuncTy = GR->assignTypeToVReg(FTy, FuncVReg, MIRBuilder); + + // Build the OpTypeFunction declaring it. + Register ReturnTypeID = FuncTy->getOperand(1).getReg(); + uint32_t FuncControl = getFunctionControl(F); + + MIRBuilder.buildInstr(SPIRV::OpFunction) + .addDef(FuncVReg) + .addUse(ReturnTypeID) + .addImm(FuncControl) + .addUse(GR->getSPIRVTypeID(FuncTy)); + + // Add OpFunctionParameters. + const unsigned NumArgs = ArgTypeVRegs.size(); + for (unsigned i = 0; i < NumArgs; ++i) { + assert(VRegs[i].size() == 1 && "Formal arg has multiple vregs"); + MRI->setRegClass(VRegs[i][0], &SPIRV::IDRegClass); + MIRBuilder.buildInstr(SPIRV::OpFunctionParameter) + .addDef(VRegs[i][0]) + .addUse(ArgTypeVRegs[i]); + } + // Name the function. + if (F.hasName()) + buildOpName(FuncVReg, F.getName(), MIRBuilder); + + // Handle entry points and function linkage. + if (F.getCallingConv() == CallingConv::SPIR_KERNEL) { + auto MIB = MIRBuilder.buildInstr(SPIRV::OpEntryPoint) + .addImm(static_cast(SPIRV::ExecutionModel::Kernel)) + .addUse(FuncVReg); + addStringImm(F.getName(), MIB); + } else if (F.getLinkage() == GlobalValue::LinkageTypes::ExternalLinkage || + F.getLinkage() == GlobalValue::LinkOnceODRLinkage) { + auto LnkTy = F.isDeclaration() ? SPIRV::LinkageType::Import + : SPIRV::LinkageType::Export; + buildOpDecorate(FuncVReg, MIRBuilder, SPIRV::Decoration::LinkageAttributes, + {static_cast(LnkTy)}, F.getGlobalIdentifier()); + } + + return true; +} + +bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const { + // Currently call returns should have single vregs. + // TODO: handle the case of multiple registers. + if (Info.OrigRet.Regs.size() > 1) + return false; + + Register ResVReg = + Info.OrigRet.Regs.empty() ? Register(0) : Info.OrigRet.Regs[0]; + // Emit a regular OpFunctionCall. If it's an externally declared function, + // be sure to emit its type and function declaration here. It will be + // hoisted globally later. + if (Info.Callee.isGlobal()) { + auto *CF = dyn_cast_or_null(Info.Callee.getGlobal()); + // TODO: support constexpr casts and indirect calls. + if (CF == nullptr) + return false; + if (CF->isDeclaration()) { + // Emit the type info and forward function declaration to the first MBB + // to ensure VReg definition dependencies are valid across all MBBs. + MachineBasicBlock::iterator OldII = MIRBuilder.getInsertPt(); + MachineBasicBlock &OldBB = MIRBuilder.getMBB(); + MachineBasicBlock &FirstBB = *MIRBuilder.getMF().getBlockNumbered(0); + MIRBuilder.setInsertPt(FirstBB, FirstBB.instr_end()); + + SmallVector, 8> VRegArgs; + SmallVector, 8> ToInsert; + for (const Argument &Arg : CF->args()) { + if (MIRBuilder.getDataLayout().getTypeStoreSize(Arg.getType()).isZero()) + continue; // Don't handle zero sized types. + ToInsert.push_back({MIRBuilder.getMRI()->createGenericVirtualRegister( + LLT::scalar(32))}); + VRegArgs.push_back(ToInsert.back()); + } + // TODO: Reuse FunctionLoweringInfo. + FunctionLoweringInfo FuncInfo; + lowerFormalArguments(MIRBuilder, *CF, VRegArgs, FuncInfo); + MIRBuilder.setInsertPt(OldBB, OldII); + } + } + + // Make sure there's a valid return reg, even for functions returning void. + if (!ResVReg.isValid()) { + ResVReg = MIRBuilder.getMRI()->createVirtualRegister(&SPIRV::IDRegClass); + } + SPIRVType *RetType = + GR->assignTypeToVReg(Info.OrigRet.Ty, ResVReg, MIRBuilder); + + // Emit the OpFunctionCall and its args. + auto MIB = MIRBuilder.buildInstr(SPIRV::OpFunctionCall) + .addDef(ResVReg) + .addUse(GR->getSPIRVTypeID(RetType)) + .add(Info.Callee); + + for (const auto &Arg : Info.OrigArgs) { + // Currently call args should have single vregs. + if (Arg.Regs.size() > 1) + return false; + MIB.addUse(Arg.Regs[0]); + } + return MIB.constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(), + *ST.getRegBankInfo()); +} diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.h b/llvm/lib/Target/SPIRV/SPIRVCallLowering.h new file mode 100644 index 000000000000..c179bb35154b --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.h @@ -0,0 +1,50 @@ +//===--- SPIRVCallLowering.h - Call lowering --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes how to lower LLVM calls to machine code calls. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVCALLLOWERING_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVCALLLOWERING_H + +#include "llvm/CodeGen/GlobalISel/CallLowering.h" + +namespace llvm { + +class SPIRVGlobalRegistry; +class SPIRVSubtarget; +class SPIRVTargetLowering; + +class SPIRVCallLowering : public CallLowering { +private: + const SPIRVSubtarget &ST; + // Used to create and assign function, argument, and return type information. + SPIRVGlobalRegistry *GR; + +public: + SPIRVCallLowering(const SPIRVTargetLowering &TLI, const SPIRVSubtarget &ST, + SPIRVGlobalRegistry *GR); + + // Built OpReturn or OpReturnValue. + bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val, + ArrayRef VRegs, FunctionLoweringInfo &FLI, + Register SwiftErrorVReg) const override; + + // Build OpFunction, OpFunctionParameter, and any EntryPoint or Linkage data. + bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef> VRegs, + FunctionLoweringInfo &FLI) const override; + + // Build OpCall, or replace with a builtin function. + bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const override; +}; +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVCALLLOWERING_H diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp new file mode 100644 index 000000000000..9624482e3622 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -0,0 +1,433 @@ +//===-- SPIRVEmitIntrinsics.cpp - emit SPIRV intrinsics ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The pass emits SPIRV intrinsics keeping essential high-level information for +// the translation of LLVM IR to SPIR-V. +// +//===----------------------------------------------------------------------===// + +#include "SPIRV.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/IntrinsicsSPIRV.h" + +#include + +// This pass performs the following transformation on LLVM IR level required +// for the following translation to SPIR-V: +// - replaces direct usages of aggregate constants with target-specific +// intrinsics; +// - replaces aggregates-related instructions (extract/insert, ld/st, etc) +// with a target-specific intrinsics; +// - emits intrinsics for the global variable initializers since IRTranslator +// doesn't handle them and it's not very convenient to translate them +// ourselves; +// - emits intrinsics to keep track of the string names assigned to the values; +// - emits intrinsics to keep track of constants (this is necessary to have an +// LLVM IR constant after the IRTranslation is completed) for their further +// deduplication; +// - emits intrinsics to keep track of original LLVM types of the values +// to be able to emit proper SPIR-V types eventually. +// +// TODO: consider removing spv.track.constant in favor of spv.assign.type. + +using namespace llvm; + +namespace llvm { +void initializeSPIRVEmitIntrinsicsPass(PassRegistry &); +} // namespace llvm + +namespace { +class SPIRVEmitIntrinsics + : public FunctionPass, + public InstVisitor { + SPIRVTargetMachine *TM = nullptr; + IRBuilder<> *IRB = nullptr; + Function *F = nullptr; + bool TrackConstants = true; + DenseMap AggrConsts; + DenseSet AggrStores; + void preprocessCompositeConstants(); + CallInst *buildIntrWithMD(Intrinsic::ID IntrID, ArrayRef Types, + Value *Arg, Value *Arg2) { + ConstantAsMetadata *CM = ValueAsMetadata::getConstant(Arg); + MDTuple *TyMD = MDNode::get(F->getContext(), CM); + MetadataAsValue *VMD = MetadataAsValue::get(F->getContext(), TyMD); + return IRB->CreateIntrinsic(IntrID, {Types}, {Arg2, VMD}); + } + void replaceMemInstrUses(Instruction *Old, Instruction *New); + void processInstrAfterVisit(Instruction *I); + void insertAssignTypeIntrs(Instruction *I); + void processGlobalValue(GlobalVariable &GV); + +public: + static char ID; + SPIRVEmitIntrinsics() : FunctionPass(ID) { + initializeSPIRVEmitIntrinsicsPass(*PassRegistry::getPassRegistry()); + } + SPIRVEmitIntrinsics(SPIRVTargetMachine *_TM) : FunctionPass(ID), TM(_TM) { + initializeSPIRVEmitIntrinsicsPass(*PassRegistry::getPassRegistry()); + } + Instruction *visitInstruction(Instruction &I) { return &I; } + Instruction *visitSwitchInst(SwitchInst &I); + Instruction *visitGetElementPtrInst(GetElementPtrInst &I); + Instruction *visitBitCastInst(BitCastInst &I); + Instruction *visitInsertElementInst(InsertElementInst &I); + Instruction *visitExtractElementInst(ExtractElementInst &I); + Instruction *visitInsertValueInst(InsertValueInst &I); + Instruction *visitExtractValueInst(ExtractValueInst &I); + Instruction *visitLoadInst(LoadInst &I); + Instruction *visitStoreInst(StoreInst &I); + Instruction *visitAllocaInst(AllocaInst &I); + bool runOnFunction(Function &F) override; +}; +} // namespace + +char SPIRVEmitIntrinsics::ID = 0; + +INITIALIZE_PASS(SPIRVEmitIntrinsics, "emit-intrinsics", "SPIRV emit intrinsics", + false, false) + +static inline bool isAssignTypeInstr(const Instruction *I) { + return isa(I) && + cast(I)->getIntrinsicID() == Intrinsic::spv_assign_type; +} + +static bool isMemInstrToReplace(Instruction *I) { + return isa(I) || isa(I) || isa(I) || + isa(I); +} + +static bool isAggrToReplace(const Value *V) { + return isa(V) || isa(V) || + (isa(V) && !V->getType()->isVectorTy()); +} + +static void setInsertPointSkippingPhis(IRBuilder<> &B, Instruction *I) { + if (isa(I)) + B.SetInsertPoint(I->getParent(), I->getParent()->getFirstInsertionPt()); + else + B.SetInsertPoint(I); +} + +static bool requireAssignType(Instruction *I) { + IntrinsicInst *Intr = dyn_cast(I); + if (Intr) { + switch (Intr->getIntrinsicID()) { + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + return false; + } + } + return true; +} + +void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old, + Instruction *New) { + while (!Old->user_empty()) { + auto *U = Old->user_back(); + if (isMemInstrToReplace(U) || isa(U)) { + U->replaceUsesOfWith(Old, New); + } else if (isAssignTypeInstr(U)) { + IRB->SetInsertPoint(U); + SmallVector Args = {New, U->getOperand(1)}; + IRB->CreateIntrinsic(Intrinsic::spv_assign_type, {New->getType()}, Args); + U->eraseFromParent(); + } else { + llvm_unreachable("illegal aggregate intrinsic user"); + } + } + Old->eraseFromParent(); +} + +void SPIRVEmitIntrinsics::preprocessCompositeConstants() { + std::queue Worklist; + for (auto &I : instructions(F)) + Worklist.push(&I); + + while (!Worklist.empty()) { + auto *I = Worklist.front(); + assert(I); + bool KeepInst = false; + for (const auto &Op : I->operands()) { + auto BuildCompositeIntrinsic = [&KeepInst, &Worklist, &I, &Op, + this](Constant *AggrC, + ArrayRef Args) { + IRB->SetInsertPoint(I); + auto *CCI = + IRB->CreateIntrinsic(Intrinsic::spv_const_composite, {}, {Args}); + Worklist.push(CCI); + I->replaceUsesOfWith(Op, CCI); + KeepInst = true; + AggrConsts[CCI] = AggrC; + }; + + if (auto *AggrC = dyn_cast(Op)) { + SmallVector Args(AggrC->op_begin(), AggrC->op_end()); + BuildCompositeIntrinsic(AggrC, Args); + } else if (auto *AggrC = dyn_cast(Op)) { + SmallVector Args; + for (unsigned i = 0; i < AggrC->getNumElements(); ++i) + Args.push_back(AggrC->getElementAsConstant(i)); + BuildCompositeIntrinsic(AggrC, Args); + } else if (isa(Op) && + !Op->getType()->isVectorTy()) { + auto *AggrC = cast(Op); + SmallVector Args(AggrC->op_begin(), AggrC->op_end()); + BuildCompositeIntrinsic(AggrC, Args); + } + } + if (!KeepInst) + Worklist.pop(); + } +} + +Instruction *SPIRVEmitIntrinsics::visitSwitchInst(SwitchInst &I) { + SmallVector Args; + for (auto &Op : I.operands()) + if (Op.get()->getType()->isSized()) + Args.push_back(Op); + IRB->CreateIntrinsic(Intrinsic::spv_switch, {I.getOperand(0)->getType()}, + {Args}); + return &I; +} + +Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) { + SmallVector Types = {I.getType(), I.getOperand(0)->getType()}; + SmallVector Args; + Args.push_back(IRB->getInt1(I.isInBounds())); + for (auto &Op : I.operands()) + Args.push_back(Op); + auto *NewI = IRB->CreateIntrinsic(Intrinsic::spv_gep, {Types}, {Args}); + I.replaceAllUsesWith(NewI); + I.eraseFromParent(); + return NewI; +} + +Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) { + SmallVector Types = {I.getType(), I.getOperand(0)->getType()}; + SmallVector Args(I.op_begin(), I.op_end()); + auto *NewI = IRB->CreateIntrinsic(Intrinsic::spv_bitcast, {Types}, {Args}); + std::string InstName = I.hasName() ? I.getName().str() : ""; + I.replaceAllUsesWith(NewI); + I.eraseFromParent(); + NewI->setName(InstName); + return NewI; +} + +Instruction *SPIRVEmitIntrinsics::visitInsertElementInst(InsertElementInst &I) { + SmallVector Types = {I.getType(), I.getOperand(0)->getType(), + I.getOperand(1)->getType(), + I.getOperand(2)->getType()}; + SmallVector Args(I.op_begin(), I.op_end()); + auto *NewI = IRB->CreateIntrinsic(Intrinsic::spv_insertelt, {Types}, {Args}); + std::string InstName = I.hasName() ? I.getName().str() : ""; + I.replaceAllUsesWith(NewI); + I.eraseFromParent(); + NewI->setName(InstName); + return NewI; +} + +Instruction * +SPIRVEmitIntrinsics::visitExtractElementInst(ExtractElementInst &I) { + SmallVector Types = {I.getType(), I.getVectorOperandType(), + I.getIndexOperand()->getType()}; + SmallVector Args = {I.getVectorOperand(), I.getIndexOperand()}; + auto *NewI = IRB->CreateIntrinsic(Intrinsic::spv_extractelt, {Types}, {Args}); + std::string InstName = I.hasName() ? I.getName().str() : ""; + I.replaceAllUsesWith(NewI); + I.eraseFromParent(); + NewI->setName(InstName); + return NewI; +} + +Instruction *SPIRVEmitIntrinsics::visitInsertValueInst(InsertValueInst &I) { + SmallVector Types = {I.getInsertedValueOperand()->getType()}; + SmallVector Args; + for (auto &Op : I.operands()) + if (isa(Op)) + Args.push_back(UndefValue::get(IRB->getInt32Ty())); + else + Args.push_back(Op); + for (auto &Op : I.indices()) + Args.push_back(IRB->getInt32(Op)); + Instruction *NewI = + IRB->CreateIntrinsic(Intrinsic::spv_insertv, {Types}, {Args}); + replaceMemInstrUses(&I, NewI); + return NewI; +} + +Instruction *SPIRVEmitIntrinsics::visitExtractValueInst(ExtractValueInst &I) { + SmallVector Args; + for (auto &Op : I.operands()) + Args.push_back(Op); + for (auto &Op : I.indices()) + Args.push_back(IRB->getInt32(Op)); + auto *NewI = + IRB->CreateIntrinsic(Intrinsic::spv_extractv, {I.getType()}, {Args}); + I.replaceAllUsesWith(NewI); + I.eraseFromParent(); + return NewI; +} + +Instruction *SPIRVEmitIntrinsics::visitLoadInst(LoadInst &I) { + if (!I.getType()->isAggregateType()) + return &I; + TrackConstants = false; + const auto *TLI = TM->getSubtargetImpl()->getTargetLowering(); + MachineMemOperand::Flags Flags = + TLI->getLoadMemOperandFlags(I, F->getParent()->getDataLayout()); + auto *NewI = + IRB->CreateIntrinsic(Intrinsic::spv_load, {I.getOperand(0)->getType()}, + {I.getPointerOperand(), IRB->getInt16(Flags), + IRB->getInt8(I.getAlign().value())}); + replaceMemInstrUses(&I, NewI); + return NewI; +} + +Instruction *SPIRVEmitIntrinsics::visitStoreInst(StoreInst &I) { + if (!AggrStores.contains(&I)) + return &I; + TrackConstants = false; + const auto *TLI = TM->getSubtargetImpl()->getTargetLowering(); + MachineMemOperand::Flags Flags = + TLI->getStoreMemOperandFlags(I, F->getParent()->getDataLayout()); + auto *PtrOp = I.getPointerOperand(); + auto *NewI = + IRB->CreateIntrinsic(Intrinsic::spv_store, {PtrOp->getType()}, + {I.getValueOperand(), PtrOp, IRB->getInt16(Flags), + IRB->getInt8(I.getAlign().value())}); + I.eraseFromParent(); + return NewI; +} + +Instruction *SPIRVEmitIntrinsics::visitAllocaInst(AllocaInst &I) { + TrackConstants = false; + return &I; +} + +void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV) { + // Skip special artifical variable llvm.global.annotations. + if (GV.getName() == "llvm.global.annotations") + return; + if (GV.hasInitializer() && !isa(GV.getInitializer())) { + Constant *Init = GV.getInitializer(); + Type *Ty = isAggrToReplace(Init) ? IRB->getInt32Ty() : Init->getType(); + Constant *Const = isAggrToReplace(Init) ? IRB->getInt32(1) : Init; + auto *InitInst = IRB->CreateIntrinsic(Intrinsic::spv_init_global, + {GV.getType(), Ty}, {&GV, Const}); + InitInst->setArgOperand(1, Init); + } + if ((!GV.hasInitializer() || isa(GV.getInitializer())) && + GV.getNumUses() == 0) + IRB->CreateIntrinsic(Intrinsic::spv_unref_global, GV.getType(), &GV); +} + +void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I) { + Type *Ty = I->getType(); + if (!Ty->isVoidTy() && requireAssignType(I)) { + setInsertPointSkippingPhis(*IRB, I->getNextNode()); + Type *TypeToAssign = Ty; + if (auto *II = dyn_cast(I)) { + if (II->getIntrinsicID() == Intrinsic::spv_const_composite) { + auto t = AggrConsts.find(II); + assert(t != AggrConsts.end()); + TypeToAssign = t->second->getType(); + } + } + Constant *Const = Constant::getNullValue(TypeToAssign); + buildIntrWithMD(Intrinsic::spv_assign_type, {Ty}, Const, I); + } + for (const auto &Op : I->operands()) { + if (isa(Op) || isa(Op) || + // Check GetElementPtrConstantExpr case. + (isa(Op) && isa(Op))) { + IRB->SetInsertPoint(I); + buildIntrWithMD(Intrinsic::spv_assign_type, {Op->getType()}, Op, Op); + } + } + // StoreInst's operand type can be changed in the next stage so we need to + // store it in the set. + if (isa(I) && + cast(I)->getValueOperand()->getType()->isAggregateType()) + AggrStores.insert(I); +} + +void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I) { + auto *II = dyn_cast(I); + if (II && II->getIntrinsicID() == Intrinsic::spv_const_composite && + TrackConstants) { + IRB->SetInsertPoint(I->getNextNode()); + Type *Ty = IRB->getInt32Ty(); + auto t = AggrConsts.find(I); + assert(t != AggrConsts.end()); + auto *NewOp = + buildIntrWithMD(Intrinsic::spv_track_constant, {Ty, Ty}, t->second, I); + I->replaceAllUsesWith(NewOp); + NewOp->setArgOperand(0, I); + } + for (const auto &Op : I->operands()) { + if ((isa(Op) && Op->getType()->isVectorTy()) || + isa(I) || isa(I)) + TrackConstants = false; + if (isa(Op) && TrackConstants) { + unsigned OpNo = Op.getOperandNo(); + if (II && ((II->getIntrinsicID() == Intrinsic::spv_gep && OpNo == 0) || + (II->paramHasAttr(OpNo, Attribute::ImmArg)))) + continue; + IRB->SetInsertPoint(I); + auto *NewOp = buildIntrWithMD(Intrinsic::spv_track_constant, + {Op->getType(), Op->getType()}, Op, Op); + I->setOperand(OpNo, NewOp); + } + } + if (I->hasName()) { + setInsertPointSkippingPhis(*IRB, I->getNextNode()); + std::vector Args = {I}; + addStringImm(I->getName(), *IRB, Args); + IRB->CreateIntrinsic(Intrinsic::spv_assign_name, {I->getType()}, Args); + } +} + +bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { + if (Func.isDeclaration()) + return false; + F = &Func; + IRB = new IRBuilder<>(Func.getContext()); + AggrConsts.clear(); + AggrStores.clear(); + + IRB->SetInsertPoint(&Func.getEntryBlock().front()); + + for (auto &GV : Func.getParent()->globals()) + processGlobalValue(GV); + + preprocessCompositeConstants(); + SmallVector Worklist; + for (auto &I : instructions(Func)) + Worklist.push_back(&I); + + for (auto &I : Worklist) + insertAssignTypeIntrs(I); + + for (auto *I : Worklist) { + TrackConstants = true; + if (!I->getType()->isVoidTy() || isa(I)) + IRB->SetInsertPoint(I->getNextNode()); + I = visit(*I); + processInstrAfterVisit(I); + } + return true; +} + +FunctionPass *llvm::createSPIRVEmitIntrinsicsPass(SPIRVTargetMachine *TM) { + return new SPIRVEmitIntrinsics(TM); +} diff --git a/llvm/lib/Target/SPIRV/SPIRVEnums.td b/llvm/lib/Target/SPIRV/SPIRVEnums.td new file mode 100644 index 000000000000..1d0c6ffd6e37 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVEnums.td @@ -0,0 +1,51 @@ +//===-- SPIRVEnums.td - Describe SPIRV Enum Operands -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// All SPIRV enums defined in SPIRVBaseInfo.h should have a corresponding enum +// operand here. This enables the correct PrintMethod to be defined so +// its name or mask bits can be automatically printed in SPIRVInstPrinter +// when referred to in SPIRVInstrInfo.td. +// +//===----------------------------------------------------------------------===// + +class EnumOperand : Operand{ + let PrintMethod = "print"#Name; +} + +def ExtInst : EnumOperand<"ExtInst">; + +def Capability : EnumOperand<"Capability">; +def SourceLanguage : EnumOperand<"SourceLanguage">; +def ExecutionModel : EnumOperand<"ExecutionModel">; +def AddressingModel : EnumOperand<"AddressingModel">; +def MemoryModel : EnumOperand<"MemoryModel">; +def ExecutionMode : EnumOperand<"ExecutionMode">; +def StorageClass : EnumOperand<"StorageClass">; +def Dim : EnumOperand<"Dim">; +def SamplerAddressingMode : EnumOperand<"SamplerAddressingMode">; +def SamplerFilterMode : EnumOperand<"SamplerFilterMode">; +def ImageFormat : EnumOperand<"ImageFormat">; +def ImageChannelOrder : EnumOperand<"ImageChannelOrder">; +def ImageChannelDataType : EnumOperand<"ImageChannelDataType">; +def ImageOperand : EnumOperand<"ImageOperand">; +def FPFastMathMode : EnumOperand<"FPFastMathMode">; +def FProundingMode : EnumOperand<"FPRoundingMode">; +def LinkageType : EnumOperand<"LinkageType">; +def AccessQualifier : EnumOperand<"AccessQualifier">; +def FunctionParameterAttribute : EnumOperand<"FunctionParameterAttribute">; +def Decoration : EnumOperand<"Decoration">; +def Builtin : EnumOperand<"Builtin">; +def SelectionControl: EnumOperand<"SelectionControl">; +def LoopControl: EnumOperand<"LoopControl">; +def FunctionControl : EnumOperand<"FunctionControl">; +def MemorySemantics : EnumOperand<"MemorySemantics">; +def MemoryOperand : EnumOperand<"MemoryOperand">; +def Scope : EnumOperand<"Scope">; +def GroupOperation : EnumOperand<"GroupOperation">; +def KernelEnqueueFlags : EnumOperand<"KernelEnqueueFlags">; +def KernelProfilingInfo : EnumOperand<"KernelProfilingInfo">; diff --git a/llvm/lib/Target/SPIRV/SPIRVFrameLowering.h b/llvm/lib/Target/SPIRV/SPIRVFrameLowering.h new file mode 100644 index 000000000000..b98f8d0928e5 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVFrameLowering.h @@ -0,0 +1,39 @@ +//===-- SPIRVFrameLowering.h - Define frame lowering for SPIR-V -*- C++-*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class implements SPIRV-specific bits of TargetFrameLowering class. +// The target uses only virtual registers. It does not operate with stack frame +// explicitly and does not generate prologues/epilogues of functions. +// As a result, we are not required to implemented the frame lowering +// functionality substantially. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVFRAMELOWERING_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVFRAMELOWERING_H + +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/Support/Alignment.h" + +namespace llvm { +class SPIRVSubtarget; + +class SPIRVFrameLowering : public TargetFrameLowering { +public: + explicit SPIRVFrameLowering(const SPIRVSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), 0) {} + + void emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const override {} + void emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const override {} + + bool hasFP(const MachineFunction &MF) const override { return false; } +}; +} // namespace llvm +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVFRAMELOWERING_H diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp new file mode 100644 index 000000000000..02a6905a1abc --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -0,0 +1,459 @@ +//===-- SPIRVGlobalRegistry.cpp - SPIR-V Global Registry --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the SPIRVGlobalRegistry class, +// which is used to maintain rich type information required for SPIR-V even +// after lowering from LLVM IR to GMIR. It can convert an llvm::Type into +// an OpTypeXXX instruction, and map it to a virtual register. Also it builds +// and supports consistency of constants and global variables. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVGlobalRegistry.h" +#include "SPIRV.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" + +using namespace llvm; +SPIRVGlobalRegistry::SPIRVGlobalRegistry(unsigned PointerSize) + : PointerSize(PointerSize) {} + +SPIRVType *SPIRVGlobalRegistry::assignTypeToVReg( + const Type *Type, Register VReg, MachineIRBuilder &MIRBuilder, + SPIRV::AccessQualifier AccessQual, bool EmitIR) { + + SPIRVType *SpirvType = + getOrCreateSPIRVType(Type, MIRBuilder, AccessQual, EmitIR); + assignSPIRVTypeToVReg(SpirvType, VReg, MIRBuilder.getMF()); + return SpirvType; +} + +void SPIRVGlobalRegistry::assignSPIRVTypeToVReg(SPIRVType *SpirvType, + Register VReg, + MachineFunction &MF) { + VRegToTypeMap[&MF][VReg] = SpirvType; +} + +static Register createTypeVReg(MachineIRBuilder &MIRBuilder) { + auto &MRI = MIRBuilder.getMF().getRegInfo(); + auto Res = MRI.createGenericVirtualRegister(LLT::scalar(32)); + MRI.setRegClass(Res, &SPIRV::TYPERegClass); + return Res; +} + +static Register createTypeVReg(MachineRegisterInfo &MRI) { + auto Res = MRI.createGenericVirtualRegister(LLT::scalar(32)); + MRI.setRegClass(Res, &SPIRV::TYPERegClass); + return Res; +} + +SPIRVType *SPIRVGlobalRegistry::getOpTypeBool(MachineIRBuilder &MIRBuilder) { + return MIRBuilder.buildInstr(SPIRV::OpTypeBool) + .addDef(createTypeVReg(MIRBuilder)); +} + +SPIRVType *SPIRVGlobalRegistry::getOpTypeInt(uint32_t Width, + MachineIRBuilder &MIRBuilder, + bool IsSigned) { + auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeInt) + .addDef(createTypeVReg(MIRBuilder)) + .addImm(Width) + .addImm(IsSigned ? 1 : 0); + return MIB; +} + +SPIRVType *SPIRVGlobalRegistry::getOpTypeFloat(uint32_t Width, + MachineIRBuilder &MIRBuilder) { + auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeFloat) + .addDef(createTypeVReg(MIRBuilder)) + .addImm(Width); + return MIB; +} + +SPIRVType *SPIRVGlobalRegistry::getOpTypeVoid(MachineIRBuilder &MIRBuilder) { + return MIRBuilder.buildInstr(SPIRV::OpTypeVoid) + .addDef(createTypeVReg(MIRBuilder)); +} + +SPIRVType *SPIRVGlobalRegistry::getOpTypeVector(uint32_t NumElems, + SPIRVType *ElemType, + MachineIRBuilder &MIRBuilder) { + auto EleOpc = ElemType->getOpcode(); + assert((EleOpc == SPIRV::OpTypeInt || EleOpc == SPIRV::OpTypeFloat || + EleOpc == SPIRV::OpTypeBool) && + "Invalid vector element type"); + + auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeVector) + .addDef(createTypeVReg(MIRBuilder)) + .addUse(getSPIRVTypeID(ElemType)) + .addImm(NumElems); + return MIB; +} + +Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val, + MachineIRBuilder &MIRBuilder, + SPIRVType *SpvType, + bool EmitIR) { + auto &MF = MIRBuilder.getMF(); + Register Res; + const IntegerType *LLVMIntTy; + if (SpvType) + LLVMIntTy = cast(getTypeForSPIRVType(SpvType)); + else + LLVMIntTy = IntegerType::getInt32Ty(MF.getFunction().getContext()); + // Find a constant in DT or build a new one. + const auto ConstInt = + ConstantInt::get(const_cast(LLVMIntTy), Val); + unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32; + Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth)); + assignTypeToVReg(LLVMIntTy, Res, MIRBuilder); + if (EmitIR) + MIRBuilder.buildConstant(Res, *ConstInt); + else + MIRBuilder.buildInstr(SPIRV::OpConstantI) + .addDef(Res) + .addImm(ConstInt->getSExtValue()); + return Res; +} + +Register SPIRVGlobalRegistry::buildConstantFP(APFloat Val, + MachineIRBuilder &MIRBuilder, + SPIRVType *SpvType) { + auto &MF = MIRBuilder.getMF(); + Register Res; + const Type *LLVMFPTy; + if (SpvType) { + LLVMFPTy = getTypeForSPIRVType(SpvType); + assert(LLVMFPTy->isFloatingPointTy()); + } else { + LLVMFPTy = IntegerType::getFloatTy(MF.getFunction().getContext()); + } + // Find a constant in DT or build a new one. + const auto ConstFP = ConstantFP::get(LLVMFPTy->getContext(), Val); + unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32; + Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth)); + assignTypeToVReg(LLVMFPTy, Res, MIRBuilder); + MIRBuilder.buildFConstant(Res, *ConstFP); + return Res; +} + +Register SPIRVGlobalRegistry::buildGlobalVariable( + Register ResVReg, SPIRVType *BaseType, StringRef Name, + const GlobalValue *GV, SPIRV::StorageClass Storage, + const MachineInstr *Init, bool IsConst, bool HasLinkageTy, + SPIRV::LinkageType LinkageType, MachineIRBuilder &MIRBuilder, + bool IsInstSelector) { + const GlobalVariable *GVar = nullptr; + if (GV) + GVar = cast(GV); + else { + // If GV is not passed explicitly, use the name to find or construct + // the global variable. + Module *M = MIRBuilder.getMF().getFunction().getParent(); + GVar = M->getGlobalVariable(Name); + if (GVar == nullptr) { + const Type *Ty = getTypeForSPIRVType(BaseType); // TODO: check type. + GVar = new GlobalVariable(*M, const_cast(Ty), false, + GlobalValue::ExternalLinkage, nullptr, + Twine(Name)); + } + GV = GVar; + } + Register Reg; + auto MIB = MIRBuilder.buildInstr(SPIRV::OpVariable) + .addDef(ResVReg) + .addUse(getSPIRVTypeID(BaseType)) + .addImm(static_cast(Storage)); + + if (Init != 0) { + MIB.addUse(Init->getOperand(0).getReg()); + } + + // ISel may introduce a new register on this step, so we need to add it to + // DT and correct its type avoiding fails on the next stage. + if (IsInstSelector) { + const auto &Subtarget = CurMF->getSubtarget(); + constrainSelectedInstRegOperands(*MIB, *Subtarget.getInstrInfo(), + *Subtarget.getRegisterInfo(), + *Subtarget.getRegBankInfo()); + } + Reg = MIB->getOperand(0).getReg(); + + // Set to Reg the same type as ResVReg has. + auto MRI = MIRBuilder.getMRI(); + assert(MRI->getType(ResVReg).isPointer() && "Pointer type is expected"); + if (Reg != ResVReg) { + LLT RegLLTy = LLT::pointer(MRI->getType(ResVReg).getAddressSpace(), 32); + MRI->setType(Reg, RegLLTy); + assignSPIRVTypeToVReg(BaseType, Reg, MIRBuilder.getMF()); + } + + // If it's a global variable with name, output OpName for it. + if (GVar && GVar->hasName()) + buildOpName(Reg, GVar->getName(), MIRBuilder); + + // Output decorations for the GV. + // TODO: maybe move to GenerateDecorations pass. + if (IsConst) + buildOpDecorate(Reg, MIRBuilder, SPIRV::Decoration::Constant, {}); + + if (GVar && GVar->getAlign().valueOrOne().value() != 1) + buildOpDecorate( + Reg, MIRBuilder, SPIRV::Decoration::Alignment, + {static_cast(GVar->getAlign().valueOrOne().value())}); + + if (HasLinkageTy) + buildOpDecorate(Reg, MIRBuilder, SPIRV::Decoration::LinkageAttributes, + {static_cast(LinkageType)}, Name); + return Reg; +} + +SPIRVType *SPIRVGlobalRegistry::getOpTypeArray(uint32_t NumElems, + SPIRVType *ElemType, + MachineIRBuilder &MIRBuilder, + bool EmitIR) { + assert((ElemType->getOpcode() != SPIRV::OpTypeVoid) && + "Invalid array element type"); + Register NumElementsVReg = + buildConstantInt(NumElems, MIRBuilder, nullptr, EmitIR); + auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeArray) + .addDef(createTypeVReg(MIRBuilder)) + .addUse(getSPIRVTypeID(ElemType)) + .addUse(NumElementsVReg); + return MIB; +} + +SPIRVType *SPIRVGlobalRegistry::getOpTypePointer(SPIRV::StorageClass SC, + SPIRVType *ElemType, + MachineIRBuilder &MIRBuilder) { + auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypePointer) + .addDef(createTypeVReg(MIRBuilder)) + .addImm(static_cast(SC)) + .addUse(getSPIRVTypeID(ElemType)); + return MIB; +} + +SPIRVType *SPIRVGlobalRegistry::getOpTypeFunction( + SPIRVType *RetType, const SmallVectorImpl &ArgTypes, + MachineIRBuilder &MIRBuilder) { + auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeFunction) + .addDef(createTypeVReg(MIRBuilder)) + .addUse(getSPIRVTypeID(RetType)); + for (const SPIRVType *ArgType : ArgTypes) + MIB.addUse(getSPIRVTypeID(ArgType)); + return MIB; +} + +SPIRVType *SPIRVGlobalRegistry::createSPIRVType(const Type *Ty, + MachineIRBuilder &MIRBuilder, + SPIRV::AccessQualifier AccQual, + bool EmitIR) { + if (auto IType = dyn_cast(Ty)) { + const unsigned Width = IType->getBitWidth(); + return Width == 1 ? getOpTypeBool(MIRBuilder) + : getOpTypeInt(Width, MIRBuilder, false); + } + if (Ty->isFloatingPointTy()) + return getOpTypeFloat(Ty->getPrimitiveSizeInBits(), MIRBuilder); + if (Ty->isVoidTy()) + return getOpTypeVoid(MIRBuilder); + if (Ty->isVectorTy()) { + auto El = getOrCreateSPIRVType(cast(Ty)->getElementType(), + MIRBuilder); + return getOpTypeVector(cast(Ty)->getNumElements(), El, + MIRBuilder); + } + if (Ty->isArrayTy()) { + auto *El = getOrCreateSPIRVType(Ty->getArrayElementType(), MIRBuilder); + return getOpTypeArray(Ty->getArrayNumElements(), El, MIRBuilder, EmitIR); + } + assert(!isa(Ty) && "Unsupported StructType"); + if (auto FType = dyn_cast(Ty)) { + SPIRVType *RetTy = getOrCreateSPIRVType(FType->getReturnType(), MIRBuilder); + SmallVector ParamTypes; + for (const auto &t : FType->params()) { + ParamTypes.push_back(getOrCreateSPIRVType(t, MIRBuilder)); + } + return getOpTypeFunction(RetTy, ParamTypes, MIRBuilder); + } + if (auto PType = dyn_cast(Ty)) { + SPIRVType *SpvElementType; + // At the moment, all opaque pointers correspond to i8 element type. + // TODO: change the implementation once opaque pointers are supported + // in the SPIR-V specification. + if (PType->isOpaque()) { + SpvElementType = getOrCreateSPIRVIntegerType(8, MIRBuilder); + } else { + Type *ElemType = PType->getNonOpaquePointerElementType(); + // TODO: support OpenCL and SPIRV builtins like image2d_t that are passed + // as pointers, but should be treated as custom types like OpTypeImage. + assert(!isa(ElemType) && "Unsupported StructType pointer"); + + // Otherwise, treat it as a regular pointer type. + SpvElementType = getOrCreateSPIRVType( + ElemType, MIRBuilder, SPIRV::AccessQualifier::ReadWrite, EmitIR); + } + auto SC = addressSpaceToStorageClass(PType->getAddressSpace()); + return getOpTypePointer(SC, SpvElementType, MIRBuilder); + } + llvm_unreachable("Unable to convert LLVM type to SPIRVType"); +} + +SPIRVType *SPIRVGlobalRegistry::getSPIRVTypeForVReg(Register VReg) const { + auto t = VRegToTypeMap.find(CurMF); + if (t != VRegToTypeMap.end()) { + auto tt = t->second.find(VReg); + if (tt != t->second.end()) + return tt->second; + } + return nullptr; +} + +SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType( + const Type *Type, MachineIRBuilder &MIRBuilder, + SPIRV::AccessQualifier AccessQual, bool EmitIR) { + SPIRVType *SpirvType = createSPIRVType(Type, MIRBuilder, AccessQual, EmitIR); + VRegToTypeMap[&MIRBuilder.getMF()][getSPIRVTypeID(SpirvType)] = SpirvType; + SPIRVToLLVMType[SpirvType] = Type; + return SpirvType; +} + +bool SPIRVGlobalRegistry::isScalarOfType(Register VReg, + unsigned TypeOpcode) const { + SPIRVType *Type = getSPIRVTypeForVReg(VReg); + assert(Type && "isScalarOfType VReg has no type assigned"); + return Type->getOpcode() == TypeOpcode; +} + +bool SPIRVGlobalRegistry::isScalarOrVectorOfType(Register VReg, + unsigned TypeOpcode) const { + SPIRVType *Type = getSPIRVTypeForVReg(VReg); + assert(Type && "isScalarOrVectorOfType VReg has no type assigned"); + if (Type->getOpcode() == TypeOpcode) + return true; + if (Type->getOpcode() == SPIRV::OpTypeVector) { + Register ScalarTypeVReg = Type->getOperand(1).getReg(); + SPIRVType *ScalarType = getSPIRVTypeForVReg(ScalarTypeVReg); + return ScalarType->getOpcode() == TypeOpcode; + } + return false; +} + +unsigned +SPIRVGlobalRegistry::getScalarOrVectorBitWidth(const SPIRVType *Type) const { + assert(Type && "Invalid Type pointer"); + if (Type->getOpcode() == SPIRV::OpTypeVector) { + auto EleTypeReg = Type->getOperand(1).getReg(); + Type = getSPIRVTypeForVReg(EleTypeReg); + } + if (Type->getOpcode() == SPIRV::OpTypeInt || + Type->getOpcode() == SPIRV::OpTypeFloat) + return Type->getOperand(1).getImm(); + if (Type->getOpcode() == SPIRV::OpTypeBool) + return 1; + llvm_unreachable("Attempting to get bit width of non-integer/float type."); +} + +bool SPIRVGlobalRegistry::isScalarOrVectorSigned(const SPIRVType *Type) const { + assert(Type && "Invalid Type pointer"); + if (Type->getOpcode() == SPIRV::OpTypeVector) { + auto EleTypeReg = Type->getOperand(1).getReg(); + Type = getSPIRVTypeForVReg(EleTypeReg); + } + if (Type->getOpcode() == SPIRV::OpTypeInt) + return Type->getOperand(2).getImm() != 0; + llvm_unreachable("Attempting to get sign of non-integer type."); +} + +SPIRV::StorageClass +SPIRVGlobalRegistry::getPointerStorageClass(Register VReg) const { + SPIRVType *Type = getSPIRVTypeForVReg(VReg); + assert(Type && Type->getOpcode() == SPIRV::OpTypePointer && + Type->getOperand(1).isImm() && "Pointer type is expected"); + return static_cast(Type->getOperand(1).getImm()); +} + +SPIRVType * +SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType(unsigned BitWidth, + MachineIRBuilder &MIRBuilder) { + return getOrCreateSPIRVType( + IntegerType::get(MIRBuilder.getMF().getFunction().getContext(), BitWidth), + MIRBuilder); +} + +SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(Type *LLVMTy, + MachineInstrBuilder MIB) { + SPIRVType *SpirvType = MIB; + VRegToTypeMap[CurMF][getSPIRVTypeID(SpirvType)] = SpirvType; + SPIRVToLLVMType[SpirvType] = LLVMTy; + return SpirvType; +} + +SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType( + unsigned BitWidth, MachineInstr &I, const SPIRVInstrInfo &TII) { + Type *LLVMTy = IntegerType::get(CurMF->getFunction().getContext(), BitWidth); + MachineBasicBlock &BB = *I.getParent(); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeInt)) + .addDef(createTypeVReg(CurMF->getRegInfo())) + .addImm(BitWidth) + .addImm(0); + return restOfCreateSPIRVType(LLVMTy, MIB); +} + +SPIRVType * +SPIRVGlobalRegistry::getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder) { + return getOrCreateSPIRVType( + IntegerType::get(MIRBuilder.getMF().getFunction().getContext(), 1), + MIRBuilder); +} + +SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType( + SPIRVType *BaseType, unsigned NumElements, MachineIRBuilder &MIRBuilder) { + return getOrCreateSPIRVType( + FixedVectorType::get(const_cast(getTypeForSPIRVType(BaseType)), + NumElements), + MIRBuilder); +} + +SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType( + SPIRVType *BaseType, unsigned NumElements, MachineInstr &I, + const SPIRVInstrInfo &TII) { + Type *LLVMTy = FixedVectorType::get( + const_cast(getTypeForSPIRVType(BaseType)), NumElements); + MachineBasicBlock &BB = *I.getParent(); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeVector)) + .addDef(createTypeVReg(CurMF->getRegInfo())) + .addUse(getSPIRVTypeID(BaseType)) + .addImm(NumElements); + return restOfCreateSPIRVType(LLVMTy, MIB); +} + +SPIRVType * +SPIRVGlobalRegistry::getOrCreateSPIRVPointerType(SPIRVType *BaseType, + MachineIRBuilder &MIRBuilder, + SPIRV::StorageClass SClass) { + return getOrCreateSPIRVType( + PointerType::get(const_cast(getTypeForSPIRVType(BaseType)), + storageClassToAddressSpace(SClass)), + MIRBuilder); +} + +SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVPointerType( + SPIRVType *BaseType, MachineInstr &I, const SPIRVInstrInfo &TII, + SPIRV::StorageClass SC) { + Type *LLVMTy = + PointerType::get(const_cast(getTypeForSPIRVType(BaseType)), + storageClassToAddressSpace(SC)); + MachineBasicBlock &BB = *I.getParent(); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypePointer)) + .addDef(createTypeVReg(CurMF->getRegInfo())) + .addImm(static_cast(SC)) + .addUse(getSPIRVTypeID(BaseType)); + return restOfCreateSPIRVType(LLVMTy, MIB); +} diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h new file mode 100644 index 000000000000..952ab4c13e29 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h @@ -0,0 +1,174 @@ +//===-- SPIRVGlobalRegistry.h - SPIR-V Global Registry ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// SPIRVGlobalRegistry is used to maintain rich type information required for +// SPIR-V even after lowering from LLVM IR to GMIR. It can convert an llvm::Type +// into an OpTypeXXX instruction, and map it to a virtual register. Also it +// builds and supports consistency of constants and global variables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVTYPEMANAGER_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVTYPEMANAGER_H + +#include "MCTargetDesc/SPIRVBaseInfo.h" +#include "SPIRVInstrInfo.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" + +namespace llvm { +using SPIRVType = const MachineInstr; + +class SPIRVGlobalRegistry { + // Registers holding values which have types associated with them. + // Initialized upon VReg definition in IRTranslator. + // Do not confuse this with DuplicatesTracker as DT maps Type* to + // where Reg = OpType... + // while VRegToTypeMap tracks SPIR-V type assigned to other regs (i.e. not + // type-declaring ones) + DenseMap> VRegToTypeMap; + + DenseMap SPIRVToLLVMType; + + // Number of bits pointers and size_t integers require. + const unsigned PointerSize; + + // Add a new OpTypeXXX instruction without checking for duplicates. + SPIRVType * + createSPIRVType(const Type *Type, MachineIRBuilder &MIRBuilder, + SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite, + bool EmitIR = true); + +public: + SPIRVGlobalRegistry(unsigned PointerSize); + + MachineFunction *CurMF; + + // Get or create a SPIR-V type corresponding the given LLVM IR type, + // and map it to the given VReg by creating an ASSIGN_TYPE instruction. + SPIRVType *assignTypeToVReg( + const Type *Type, Register VReg, MachineIRBuilder &MIRBuilder, + SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite, + bool EmitIR = true); + + // In cases where the SPIR-V type is already known, this function can be + // used to map it to the given VReg via an ASSIGN_TYPE instruction. + void assignSPIRVTypeToVReg(SPIRVType *Type, Register VReg, + MachineFunction &MF); + + // Either generate a new OpTypeXXX instruction or return an existing one + // corresponding to the given LLVM IR type. + // EmitIR controls if we emit GMIR or SPV constants (e.g. for array sizes) + // because this method may be called from InstructionSelector and we don't + // want to emit extra IR instructions there. + SPIRVType *getOrCreateSPIRVType( + const Type *Type, MachineIRBuilder &MIRBuilder, + SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite, + bool EmitIR = true); + + const Type *getTypeForSPIRVType(const SPIRVType *Ty) const { + auto Res = SPIRVToLLVMType.find(Ty); + assert(Res != SPIRVToLLVMType.end()); + return Res->second; + } + + // Return the SPIR-V type instruction corresponding to the given VReg, or + // nullptr if no such type instruction exists. + SPIRVType *getSPIRVTypeForVReg(Register VReg) const; + + // Whether the given VReg has a SPIR-V type mapped to it yet. + bool hasSPIRVTypeForVReg(Register VReg) const { + return getSPIRVTypeForVReg(VReg) != nullptr; + } + + // Return the VReg holding the result of the given OpTypeXXX instruction. + Register getSPIRVTypeID(const SPIRVType *SpirvType) const { + assert(SpirvType && "Attempting to get type id for nullptr type."); + return SpirvType->defs().begin()->getReg(); + } + + void setCurrentFunc(MachineFunction &MF) { CurMF = &MF; } + + // Whether the given VReg has an OpTypeXXX instruction mapped to it with the + // given opcode (e.g. OpTypeFloat). + bool isScalarOfType(Register VReg, unsigned TypeOpcode) const; + + // Return true if the given VReg's assigned SPIR-V type is either a scalar + // matching the given opcode, or a vector with an element type matching that + // opcode (e.g. OpTypeBool, or OpTypeVector %x 4, where %x is OpTypeBool). + bool isScalarOrVectorOfType(Register VReg, unsigned TypeOpcode) const; + + // For vectors or scalars of ints/floats, return the scalar type's bitwidth. + unsigned getScalarOrVectorBitWidth(const SPIRVType *Type) const; + + // For integer vectors or scalars, return whether the integers are signed. + bool isScalarOrVectorSigned(const SPIRVType *Type) const; + + // Gets the storage class of the pointer type assigned to this vreg. + SPIRV::StorageClass getPointerStorageClass(Register VReg) const; + + // Return the number of bits SPIR-V pointers and size_t variables require. + unsigned getPointerSize() const { return PointerSize; } + +private: + SPIRVType *getOpTypeBool(MachineIRBuilder &MIRBuilder); + + SPIRVType *getOpTypeInt(uint32_t Width, MachineIRBuilder &MIRBuilder, + bool IsSigned = false); + + SPIRVType *getOpTypeFloat(uint32_t Width, MachineIRBuilder &MIRBuilder); + + SPIRVType *getOpTypeVoid(MachineIRBuilder &MIRBuilder); + + SPIRVType *getOpTypeVector(uint32_t NumElems, SPIRVType *ElemType, + MachineIRBuilder &MIRBuilder); + + SPIRVType *getOpTypeArray(uint32_t NumElems, SPIRVType *ElemType, + MachineIRBuilder &MIRBuilder, bool EmitIR = true); + + SPIRVType *getOpTypePointer(SPIRV::StorageClass SC, SPIRVType *ElemType, + MachineIRBuilder &MIRBuilder); + + SPIRVType *getOpTypeFunction(SPIRVType *RetType, + const SmallVectorImpl &ArgTypes, + MachineIRBuilder &MIRBuilder); + SPIRVType *restOfCreateSPIRVType(Type *LLVMTy, MachineInstrBuilder MIB); + +public: + Register buildConstantInt(uint64_t Val, MachineIRBuilder &MIRBuilder, + SPIRVType *SpvType = nullptr, bool EmitIR = true); + Register buildConstantFP(APFloat Val, MachineIRBuilder &MIRBuilder, + SPIRVType *SpvType = nullptr); + Register + buildGlobalVariable(Register Reg, SPIRVType *BaseType, StringRef Name, + const GlobalValue *GV, SPIRV::StorageClass Storage, + const MachineInstr *Init, bool IsConst, bool HasLinkageTy, + SPIRV::LinkageType LinkageType, + MachineIRBuilder &MIRBuilder, bool IsInstSelector); + + // Convenient helpers for getting types with check for duplicates. + SPIRVType *getOrCreateSPIRVIntegerType(unsigned BitWidth, + MachineIRBuilder &MIRBuilder); + SPIRVType *getOrCreateSPIRVIntegerType(unsigned BitWidth, MachineInstr &I, + const SPIRVInstrInfo &TII); + SPIRVType *getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder); + SPIRVType *getOrCreateSPIRVVectorType(SPIRVType *BaseType, + unsigned NumElements, + MachineIRBuilder &MIRBuilder); + SPIRVType *getOrCreateSPIRVVectorType(SPIRVType *BaseType, + unsigned NumElements, MachineInstr &I, + const SPIRVInstrInfo &TII); + + SPIRVType *getOrCreateSPIRVPointerType( + SPIRVType *BaseType, MachineIRBuilder &MIRBuilder, + SPIRV::StorageClass SClass = SPIRV::StorageClass::Function); + SPIRVType *getOrCreateSPIRVPointerType( + SPIRVType *BaseType, MachineInstr &I, const SPIRVInstrInfo &TII, + SPIRV::StorageClass SClass = SPIRV::StorageClass::Function); +}; +} // end namespace llvm +#endif // LLLVM_LIB_TARGET_SPIRV_SPIRVTYPEMANAGER_H diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp new file mode 100644 index 000000000000..66ff51c912b0 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp @@ -0,0 +1,45 @@ +//===- SPIRVISelLowering.cpp - SPIR-V DAG Lowering Impl ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SPIRVTargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVISelLowering.h" +#include "SPIRV.h" + +#define DEBUG_TYPE "spirv-lower" + +using namespace llvm; + +unsigned SPIRVTargetLowering::getNumRegistersForCallingConv( + LLVMContext &Context, CallingConv::ID CC, EVT VT) const { + // This code avoids CallLowering fail inside getVectorTypeBreakdown + // on v3i1 arguments. Maybe we need to return 1 for all types. + // TODO: remove it once this case is supported by the default implementation. + if (VT.isVector() && VT.getVectorNumElements() == 3 && + (VT.getVectorElementType() == MVT::i1 || + VT.getVectorElementType() == MVT::i8)) + return 1; + return getNumRegisters(Context, VT); +} + +MVT SPIRVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const { + // This code avoids CallLowering fail inside getVectorTypeBreakdown + // on v3i1 arguments. Maybe we need to return i32 for all types. + // TODO: remove it once this case is supported by the default implementation. + if (VT.isVector() && VT.getVectorNumElements() == 3) { + if (VT.getVectorElementType() == MVT::i1) + return MVT::v4i1; + else if (VT.getVectorElementType() == MVT::i8) + return MVT::v4i8; + } + return getRegisterType(Context, VT); +} diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h new file mode 100644 index 000000000000..bee9220f5248 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h @@ -0,0 +1,47 @@ +//===-- SPIRVISelLowering.h - SPIR-V DAG Lowering Interface -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that SPIR-V uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVISELLOWERING_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVISELLOWERING_H + +#include "llvm/CodeGen/TargetLowering.h" + +namespace llvm { +class SPIRVSubtarget; + +class SPIRVTargetLowering : public TargetLowering { +public: + explicit SPIRVTargetLowering(const TargetMachine &TM, + const SPIRVSubtarget &STI) + : TargetLowering(TM) {} + + // Stop IRTranslator breaking up FMA instrs to preserve types information. + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT) const override { + return true; + } + + // This is to prevent sexts of non-i64 vector indices which are generated + // within general IRTranslator hence type generation for it is omitted. + MVT getVectorIdxTy(const DataLayout &DL) const override { + return MVT::getIntegerVT(32); + } + unsigned getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const override; + MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, + EVT VT) const override; +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVISELLOWERING_H diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrFormats.td b/llvm/lib/Target/SPIRV/SPIRVInstrFormats.td new file mode 100644 index 000000000000..c78c8ee11590 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVInstrFormats.td @@ -0,0 +1,31 @@ +//===-- SPIRVInstrFormats.td - SPIR-V Instruction Formats --*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +def StringImm: Operand{ + let PrintMethod="printStringImm"; +} + +class Op Opcode, dag outs, dag ins, string asmstr, list pattern = []> + : Instruction { + field bits<16> Inst; + + let Inst = Opcode; + + let Namespace = "SPIRV"; + let DecoderNamespace = "SPIRV"; + + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; +} + +// Pseudo instructions +class Pseudo : Op<0, outs, ins, ""> { + let isPseudo = 1; +} diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp new file mode 100644 index 000000000000..754906308114 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp @@ -0,0 +1,195 @@ +//===-- SPIRVInstrInfo.cpp - SPIR-V Instruction Information ------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the SPIR-V implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVInstrInfo.h" +#include "SPIRV.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Support/ErrorHandling.h" + +#define GET_INSTRINFO_CTOR_DTOR +#include "SPIRVGenInstrInfo.inc" + +using namespace llvm; + +SPIRVInstrInfo::SPIRVInstrInfo() : SPIRVGenInstrInfo() {} + +bool SPIRVInstrInfo::isConstantInstr(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case SPIRV::OpConstantTrue: + case SPIRV::OpConstantFalse: + case SPIRV::OpConstantI: + case SPIRV::OpConstantF: + case SPIRV::OpConstantComposite: + case SPIRV::OpConstantSampler: + case SPIRV::OpConstantNull: + case SPIRV::OpSpecConstantTrue: + case SPIRV::OpSpecConstantFalse: + case SPIRV::OpSpecConstant: + case SPIRV::OpSpecConstantComposite: + case SPIRV::OpSpecConstantOp: + case SPIRV::OpUndef: + return true; + default: + return false; + } +} + +bool SPIRVInstrInfo::isTypeDeclInstr(const MachineInstr &MI) const { + auto &MRI = MI.getMF()->getRegInfo(); + if (MI.getNumDefs() >= 1 && MI.getOperand(0).isReg()) { + auto DefRegClass = MRI.getRegClassOrNull(MI.getOperand(0).getReg()); + return DefRegClass && DefRegClass->getID() == SPIRV::TYPERegClass.getID(); + } else { + return false; + } +} + +bool SPIRVInstrInfo::isDecorationInstr(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case SPIRV::OpDecorate: + case SPIRV::OpDecorateId: + case SPIRV::OpDecorateString: + case SPIRV::OpMemberDecorate: + case SPIRV::OpMemberDecorateString: + return true; + default: + return false; + } +} + +bool SPIRVInstrInfo::isHeaderInstr(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case SPIRV::OpCapability: + case SPIRV::OpExtension: + case SPIRV::OpExtInstImport: + case SPIRV::OpMemoryModel: + case SPIRV::OpEntryPoint: + case SPIRV::OpExecutionMode: + case SPIRV::OpExecutionModeId: + case SPIRV::OpString: + case SPIRV::OpSourceExtension: + case SPIRV::OpSource: + case SPIRV::OpSourceContinued: + case SPIRV::OpName: + case SPIRV::OpMemberName: + case SPIRV::OpModuleProcessed: + return true; + default: + return isTypeDeclInstr(MI) || isConstantInstr(MI) || isDecorationInstr(MI); + } +} + +// Analyze the branching code at the end of MBB, returning +// true if it cannot be understood (e.g. it's a switch dispatch or isn't +// implemented for a target). Upon success, this returns false and returns +// with the following information in various cases: +// +// 1. If this block ends with no branches (it just falls through to its succ) +// just return false, leaving TBB/FBB null. +// 2. If this block ends with only an unconditional branch, it sets TBB to be +// the destination block. +// 3. If this block ends with a conditional branch and it falls through to a +// successor block, it sets TBB to be the branch destination block and a +// list of operands that evaluate the condition. These operands can be +// passed to other TargetInstrInfo methods to create new branches. +// 4. If this block ends with a conditional branch followed by an +// unconditional branch, it returns the 'true' destination in TBB, the +// 'false' destination in FBB, and a list of operands that evaluate the +// condition. These operands can be passed to other TargetInstrInfo +// methods to create new branches. +// +// Note that removeBranch and insertBranch must be implemented to support +// cases where this method returns success. +// +// If AllowModify is true, then this routine is allowed to modify the basic +// block (e.g. delete instructions after the unconditional branch). +// +// The CFG information in MBB.Predecessors and MBB.Successors must be valid +// before calling this function. +bool SPIRVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + TBB = nullptr; + FBB = nullptr; + if (MBB.empty()) + return false; + auto MI = MBB.getLastNonDebugInstr(); + if (!MI.isValid()) + return false; + if (MI->getOpcode() == SPIRV::OpBranch) { + TBB = MI->getOperand(0).getMBB(); + return false; + } else if (MI->getOpcode() == SPIRV::OpBranchConditional) { + Cond.push_back(MI->getOperand(0)); + TBB = MI->getOperand(1).getMBB(); + if (MI->getNumOperands() == 3) { + FBB = MI->getOperand(2).getMBB(); + } + return false; + } else { + return true; + } +} + +// Remove the branching code at the end of the specific MBB. +// This is only invoked in cases where analyzeBranch returns success. It +// returns the number of instructions that were removed. +// If \p BytesRemoved is non-null, report the change in code size from the +// removed instructions. +unsigned SPIRVInstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { + report_fatal_error("Branch removal not supported, as MBB info not propagated" + " to OpPhi instructions. Try using -O0 instead."); +} + +// Insert branch code into the end of the specified MachineBasicBlock. The +// operands to this method are the same as those returned by analyzeBranch. +// This is only invoked in cases where analyzeBranch returns success. It +// returns the number of instructions inserted. If \p BytesAdded is non-null, +// report the change in code size from the added instructions. +// +// It is also invoked by tail merging to add unconditional branches in +// cases where analyzeBranch doesn't apply because there was no original +// branch to analyze. At least this much must be implemented, else tail +// merging needs to be disabled. +// +// The CFG information in MBB.Predecessors and MBB.Successors must be valid +// before calling this function. +unsigned SPIRVInstrInfo::insertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { + report_fatal_error("Branch insertion not supported, as MBB info not " + "propagated to OpPhi instructions. Try using " + "-O0 instead."); +} + +void SPIRVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, MCRegister DestReg, + MCRegister SrcReg, bool KillSrc) const { + // Actually we don't need this COPY instruction. However if we do nothing with + // it, post RA pseudo instrs expansion just removes it and we get the code + // with undef registers. Therefore, we need to replace all uses of dst with + // the src register. COPY instr itself will be safely removed later. + assert(I->isCopy() && "Copy instruction is expected"); + auto DstOp = I->getOperand(0); + auto SrcOp = I->getOperand(1); + assert(DstOp.isReg() && SrcOp.isReg() && + "Register operands are expected in COPY"); + auto &MRI = I->getMF()->getRegInfo(); + MRI.replaceRegWith(DstOp.getReg(), SrcOp.getReg()); +} diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h new file mode 100644 index 000000000000..2600d9cfca2e --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h @@ -0,0 +1,54 @@ +//===-- SPIRVInstrInfo.h - SPIR-V Instruction Information -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the SPIR-V implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVINSTRINFO_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVINSTRINFO_H + +#include "SPIRVRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "SPIRVGenInstrInfo.inc" + +namespace llvm { + +class SPIRVInstrInfo : public SPIRVGenInstrInfo { + const SPIRVRegisterInfo RI; + +public: + SPIRVInstrInfo(); + + const SPIRVRegisterInfo &getRegisterInfo() const { return RI; } + bool isHeaderInstr(const MachineInstr &MI) const; + bool isConstantInstr(const MachineInstr &MI) const; + bool isTypeDeclInstr(const MachineInstr &MI) const; + bool isDecorationInstr(const MachineInstr &MI) const; + + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify = false) const override; + + unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; + + unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef Cond, + const DebugLoc &DL, + int *BytesAdded = nullptr) const override; + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, + bool KillSrc) const override; +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVINSTRINFO_H diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td new file mode 100644 index 000000000000..d6fec5fd0785 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -0,0 +1,732 @@ +//===-- SPIRVInstrInfo.td - Target Description for SPIR-V Target ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the SPIR-V instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +include "SPIRVInstrFormats.td" +include "SPIRVEnums.td" + +// Codegen only metadata instructions +let isCodeGenOnly=1 in { + def ASSIGN_TYPE: Pseudo<(outs ANYID:$dst_id), (ins ANYID:$src_id, TYPE:$src_ty)>; + def DECL_TYPE: Pseudo<(outs ANYID:$dst_id), (ins ANYID:$src_id, TYPE:$src_ty)>; + def GET_ID: Pseudo<(outs ID:$dst_id), (ins ANYID:$src)>; + def GET_fID: Pseudo<(outs fID:$dst_id), (ins ANYID:$src)>; + def GET_pID: Pseudo<(outs pID:$dst_id), (ins ANYID:$src)>; + def GET_vID: Pseudo<(outs vID:$dst_id), (ins ANYID:$src)>; + def GET_vfID: Pseudo<(outs vfID:$dst_id), (ins ANYID:$src)>; +} + +def SPVTypeBin : SDTypeProfile<1, 2, []>; + +def assigntype : SDNode<"SPIRVISD::AssignType", SPVTypeBin>; + +def : GINodeEquiv; + +class BinOp opCode, list pattern=[]> + : Op; + +class BinOpTyped opCode, RegisterClass CID, SDNode node> + : Op; + +class TernOpTyped opCode, RegisterClass CCond, RegisterClass CID, SDNode node> + : Op; + +multiclass BinOpTypedGen opCode, SDNode node, bit genF = 0, bit genV = 0> { + if genF then + def S: BinOpTyped; + else + def S: BinOpTyped; + if genV then { + if genF then + def V: BinOpTyped; + else + def V: BinOpTyped; + } +} + +multiclass TernOpTypedGen opCode, SDNode node, bit genI = 1, bit genF = 0, bit genV = 0> { + if genF then { + def SFSCond: TernOpTyped; + def SFVCond: TernOpTyped; + } + if genI then { + def SISCond: TernOpTyped; + def SIVCond: TernOpTyped; + } + if genV then { + if genF then { + def VFSCond: TernOpTyped; + def VFVCond: TernOpTyped; + } + if genI then { + def VISCond: TernOpTyped; + def VIVCond: TernOpTyped; + } + } +} + +class UnOp opCode, list pattern=[]> + : Op; +class UnOpTyped opCode, RegisterClass CID, SDNode node> + : Op; + +class SimpleOp opCode>: Op; + +// 3.42.1 Miscellaneous Instructions + +def OpNop: SimpleOp<"OpNop", 0>; +def OpUndef: Op<1, (outs ID:$res), (ins TYPE:$type), "$res = OpUndef $type">; +def OpSizeOf: Op<321, (outs ID:$res), (ins TYPE:$ty, ID:$ptr), "$res = OpSizeOf $ty $ptr">; + +// 3.42.2 Debug Instructions + +def OpSourceContinued: Op<2, (outs), (ins StringImm:$str, variable_ops), + "OpSourceContinued $str">; +def OpSource: Op<3, (outs), (ins SourceLanguage:$lang, i32imm:$version, variable_ops), + "OpSource $lang $version">; +def OpSourceExtension: Op<4, (outs), (ins StringImm:$extension, variable_ops), + "OpSourceExtension $extension">; +def OpName: Op<5, (outs), (ins ANY:$tar, StringImm:$name, variable_ops), "OpName $tar $name">; +def OpMemberName: Op<6, (outs), (ins TYPE:$ty, i32imm:$mem, StringImm:$name, variable_ops), + "OpMemberName $ty $mem $name">; +def OpString: Op<7, (outs ID:$r), (ins StringImm:$s, variable_ops), "$r = OpString $s">; +def OpLine: Op<8, (outs), (ins ID:$file, i32imm:$ln, i32imm:$col), "OpLine $file $ln $col">; +def OpNoLine: Op<317, (outs), (ins), "OpNoLine">; +def OpModuleProcessed: Op<330, (outs), (ins StringImm:$process, variable_ops), + "OpModuleProcessed $process">; + +// 3.42.3 Annotation Instructions + +def OpDecorate: Op<71, (outs), (ins ANY:$target, Decoration:$dec, variable_ops), + "OpDecorate $target $dec">; +def OpMemberDecorate: Op<72, (outs), (ins TYPE:$t, i32imm:$m, Decoration:$d, variable_ops), + "OpMemberDecorate $t $m $d">; + +// TODO Currently some deprecated opcodes are missing: OpDecorationGroup, +// OpGroupDecorate and OpGroupMemberDecorate + +def OpDecorateId: Op<332, (outs), (ins ANY:$target, Decoration:$dec, variable_ops), + "OpDecorateId $target $dec">; +def OpDecorateString: Op<5632, (outs), (ins ANY:$t, Decoration:$d, StringImm:$s, variable_ops), + "OpDecorateString $t $d $s">; +def OpMemberDecorateString: Op<5633, (outs), + (ins TYPE:$ty, i32imm:$mem, Decoration:$dec, StringImm:$str, variable_ops), + "OpMemberDecorateString $ty $mem $dec $str">; + +// 3.42.4 Extension Instructions + +def OpExtension: Op<10, (outs), (ins StringImm:$name, variable_ops), "OpExtension $name">; +def OpExtInstImport: Op<11, (outs ID:$res), (ins StringImm:$extInstsName, variable_ops), + "$res = OpExtInstImport $extInstsName">; +def OpExtInst: Op<12, (outs ID:$res), (ins TYPE:$ty, ID:$set, ExtInst:$inst, variable_ops), + "$res = OpExtInst $ty $set $inst">; + +// 3.42.5 Mode-Setting Instructions + +def OpMemoryModel: Op<14, (outs), (ins AddressingModel:$addr, MemoryModel:$mem), + "OpMemoryModel $addr $mem">; +def OpEntryPoint: Op<15, (outs), + (ins ExecutionModel:$model, ID:$entry, StringImm:$name, variable_ops), + "OpEntryPoint $model $entry $name">; +def OpExecutionMode: Op<16, (outs), (ins ID:$entry, ExecutionMode:$mode, variable_ops), + "OpExecutionMode $entry $mode">; +def OpCapability: Op<17, (outs), (ins Capability:$cap), "OpCapability $cap">; +def OpExecutionModeId: Op<331, (outs), (ins ID:$entry, ExecutionMode:$mode, variable_ops), + "OpExecutionModeId $entry $mode">; + +// 3.42.6 Type-Declaration Instructions + +def OpTypeVoid: Op<19, (outs TYPE:$type), (ins), "$type = OpTypeVoid">; +def OpTypeBool: Op<20, (outs TYPE:$type), (ins), "$type = OpTypeBool">; +def OpTypeInt: Op<21, (outs TYPE:$type), (ins i32imm:$width, i32imm:$signedness), + "$type = OpTypeInt $width $signedness">; +def OpTypeFloat: Op<22, (outs TYPE:$type), (ins i32imm:$width), + "$type = OpTypeFloat $width">; +def OpTypeVector: Op<23, (outs TYPE:$type), (ins TYPE:$compType, i32imm:$compCount), + "$type = OpTypeVector $compType $compCount">; +def OpTypeMatrix: Op<24, (outs TYPE:$type), (ins TYPE:$colType, i32imm:$colCount), + "$type = OpTypeMatrix $colType $colCount">; +def OpTypeImage: Op<25, (outs TYPE:$res), (ins TYPE:$sampTy, Dim:$dim, i32imm:$depth, + i32imm:$arrayed, i32imm:$MS, i32imm:$sampled, ImageFormat:$imFormat, variable_ops), + "$res = OpTypeImage $sampTy $dim $depth $arrayed $MS $sampled $imFormat">; +def OpTypeSampler: Op<26, (outs TYPE:$res), (ins), "$res = OpTypeSampler">; +def OpTypeSampledImage: Op<27, (outs TYPE:$res), (ins TYPE:$imageType), + "$res = OpTypeSampledImage $imageType">; +def OpTypeArray: Op<28, (outs TYPE:$type), (ins TYPE:$elementType, ID:$length), + "$type = OpTypeArray $elementType $length">; +def OpTypeRuntimeArray: Op<29, (outs TYPE:$type), (ins TYPE:$elementType), + "$type = OpTypeRuntimeArray $elementType">; +def OpTypeStruct: Op<30, (outs TYPE:$res), (ins variable_ops), "$res = OpTypeStruct">; +def OpTypeOpaque: Op<31, (outs TYPE:$res), (ins StringImm:$name, variable_ops), + "$res = OpTypeOpaque $name">; +def OpTypePointer: Op<32, (outs TYPE:$res), (ins StorageClass:$storage, TYPE:$type), + "$res = OpTypePointer $storage $type">; +def OpTypeFunction: Op<33, (outs TYPE:$funcType), (ins TYPE:$returnType, variable_ops), + "$funcType = OpTypeFunction $returnType">; +def OpTypeEvent: Op<34, (outs TYPE:$res), (ins), "$res = OpTypeEvent">; +def OpTypeDeviceEvent: Op<35, (outs TYPE:$res), (ins), "$res = OpTypeDeviceEvent">; +def OpTypeReserveId: Op<36, (outs TYPE:$res), (ins), "$res = OpTypeReserveId">; +def OpTypeQueue: Op<37, (outs TYPE:$res), (ins), "$res = OpTypeQueue">; +def OpTypePipe: Op<38, (outs TYPE:$res), (ins AccessQualifier:$a), "$res = OpTypePipe $a">; +def OpTypeForwardPointer: Op<39, (outs), (ins TYPE:$ptrType, StorageClass:$storageClass), + "OpTypeForwardPointer $ptrType $storageClass">; +def OpTypePipeStorage: Op<322, (outs TYPE:$res), (ins), "$res = OpTypePipeStorage">; +def OpTypeNamedBarrier: Op<327, (outs TYPE:$res), (ins), "$res = OpTypeNamedBarrier">; +def OpTypeAccelerationStructureNV: Op<5341, (outs TYPE:$res), (ins), + "$res = OpTypeAccelerationStructureNV">; +def OpTypeCooperativeMatrixNV: Op<5358, (outs TYPE:$res), + (ins TYPE:$compType, ID:$scope, ID:$rows, ID:$cols), + "$res = OpTypeCooperativeMatrixNV $compType $scope $rows $cols">; + +// 3.42.7 Constant-Creation Instructions + +def imm_to_i32 : SDNodeXFormgetTargetConstant( + N->getValueAP().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def fimm_to_i32 : SDNodeXFormgetTargetConstant( + N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def gi_bitcast_fimm_to_i32 : GICustomOperandRenderer<"renderFImm32">, + GISDNodeXFormEquiv; + +def gi_bitcast_imm_to_i32 : GICustomOperandRenderer<"renderImm32">, + GISDNodeXFormEquiv; + +def PseudoConstI: IntImmLeaf; +def PseudoConstF: FPImmLeaf; +def ConstPseudoTrue: IntImmLeaf; +def ConstPseudoFalse: IntImmLeaf; +def ConstPseudoNull: IntImmLeaf; + +multiclass IntFPImm opCode, string name> { + def I: Op; + def F: Op; +} + +def OpConstantTrue: Op<41, (outs ID:$dst), (ins TYPE:$src_ty), "$dst = OpConstantTrue $src_ty", + [(set ID:$dst, (assigntype ConstPseudoTrue, TYPE:$src_ty))]>; +def OpConstantFalse: Op<42, (outs ID:$dst), (ins TYPE:$src_ty), "$dst = OpConstantFalse $src_ty", + [(set ID:$dst, (assigntype ConstPseudoFalse, TYPE:$src_ty))]>; + +defm OpConstant: IntFPImm<43, "OpConstant">; + +def OpConstantComposite: Op<44, (outs ID:$res), (ins TYPE:$type, variable_ops), + "$res = OpConstantComposite $type">; +def OpConstantSampler: Op<45, (outs ID:$res), + (ins TYPE:$t, SamplerAddressingMode:$s, i32imm:$p, SamplerFilterMode:$f), + "$res = OpConstantSampler $t $s $p $f">; +def OpConstantNull: Op<46, (outs ID:$dst), (ins TYPE:$src_ty), "$dst = OpConstantNull $src_ty", + [(set ID:$dst, (assigntype ConstPseudoNull, TYPE:$src_ty))]>; + +def OpSpecConstantTrue: Op<48, (outs ID:$r), (ins TYPE:$t), "$r = OpSpecConstantTrue $t">; +def OpSpecConstantFalse: Op<49, (outs ID:$r), (ins TYPE:$t), "$r = OpSpecConstantFalse $t">; +def OpSpecConstant: Op<50, (outs ID:$res), (ins TYPE:$type, i32imm:$imm, variable_ops), + "$res = OpSpecConstant $type $imm">; +def OpSpecConstantComposite: Op<51, (outs ID:$res), (ins TYPE:$type, variable_ops), + "$res = OpSpecConstantComposite $type">; +def OpSpecConstantOp: Op<52, (outs ID:$res), (ins TYPE:$t, i32imm:$c, ID:$o, variable_ops), + "$res = OpSpecConstantOp $t $c $o">; + +// 3.42.8 Memory Instructions + +def OpVariable: Op<59, (outs ID:$res), (ins TYPE:$type, StorageClass:$sc, variable_ops), + "$res = OpVariable $type $sc">; +def OpImageTexelPointer: Op<60, (outs ID:$res), + (ins TYPE:$resType, ID:$image, ID:$coord, ID:$sample), + "$res = OpImageTexelPointer $resType $image $coord $sample">; +def OpLoad: Op<61, (outs ID:$res), (ins TYPE:$resType, ID:$pointer, variable_ops), + "$res = OpLoad $resType $pointer">; +def OpStore: Op<62, (outs), (ins ID:$pointer, ID:$objectToStore, variable_ops), + "OpStore $pointer $objectToStore">; +def OpCopyMemory: Op<63, (outs), (ins ID:$dest, ID:$src, variable_ops), + "OpCopyMemory $dest $src">; +def OpCopyMemorySized: Op<64, (outs), (ins ID:$dest, ID:$src, ID:$size, variable_ops), + "OpCopyMemorySized $dest $src $size">; +def OpAccessChain: Op<65, (outs ID:$res), (ins TYPE:$type, ID:$base, variable_ops), + "$res = OpAccessChain $type $base">; +def OpInBoundsAccessChain: Op<66, (outs ID:$res), + (ins TYPE:$type, ID:$base, variable_ops), + "$res = OpInBoundsAccessChain $type $base">; +def OpPtrAccessChain: Op<67, (outs ID:$res), + (ins TYPE:$type, ID:$base, ID:$element, variable_ops), + "$res = OpPtrAccessChain $type $base $element">; +def OpArrayLength: Op<68, (outs ID:$res), (ins TYPE:$resTy, ID:$struct, i32imm:$arrayMember), + "$res = OpArrayLength $resTy $struct $arrayMember">; +def OpGenericPtrMemSemantics: Op<69, (outs ID:$res), (ins TYPE:$resType, ID:$pointer), + "$res = OpGenericPtrMemSemantics $resType $pointer">; +def OpInBoundsPtrAccessChain: Op<70, (outs ID:$res), + (ins TYPE:$type, ID:$base, ID:$element, variable_ops), + "$res = OpInBoundsPtrAccessChain $type $base $element">; +def OpPtrEqual: Op<401, (outs ID:$res), (ins TYPE:$resType, ID:$a, ID:$b), + "$res = OpPtrEqual $resType $a $b">; +def OpPtrNotEqual: Op<402, (outs ID:$res), (ins TYPE:$resType, ID:$a, ID:$b), + "$res = OpPtrNotEqual $resType $a $b">; +def OpPtrDiff: Op<403, (outs ID:$res), (ins TYPE:$resType, ID:$a, ID:$b), + "$res = OpPtrDiff $resType $a $b">; + +// 3.42.9 Function Instructions + +def OpFunction: Op<54, (outs ID:$func), + (ins TYPE:$resType, FunctionControl:$funcControl, TYPE:$funcType), + "$func = OpFunction $resType $funcControl $funcType">; +def OpFunctionParameter: Op<55, (outs ID:$arg), (ins TYPE:$type), + "$arg = OpFunctionParameter $type">; +def OpFunctionEnd: Op<56, (outs), (ins), "OpFunctionEnd"> { + let isTerminator=1; +} +def OpFunctionCall: Op<57, (outs ID:$res), (ins TYPE:$resType, ID:$function, variable_ops), + "$res = OpFunctionCall $resType $function">; + +// 3.42.10 Image Instructions + +def OpSampledImage: BinOp<"OpSampledImage", 86>; + +def OpImageSampleImplicitLod: Op<87, (outs ID:$res), + (ins TYPE:$type, ID:$sampledImage, ID:$coord, variable_ops), + "$res = OpImageSampleImplicitLod $type $sampledImage $coord">; +def OpImageSampleExplicitLod: Op<88, (outs ID:$res), + (ins TYPE:$ty, ID:$sImage, ID:$uv, ImageOperand:$op, ID:$i, variable_ops), + "$res = OpImageSampleExplicitLod $ty $sImage $uv $op $i">; + +def OpImageSampleDrefImplicitLod: Op<89, (outs ID:$res), + (ins TYPE:$type, ID:$sampledImage, ID:$coord, ID:$dref, variable_ops), + "$res = OpImageSampleDrefImplicitLod $type $sampledImage $dref $coord">; +def OpImageSampleDrefExplicitLod: Op<90, (outs ID:$res), + (ins TYPE:$ty, ID:$im, ID:$uv, ID:$d, ImageOperand:$op, ID:$i, variable_ops), + "$res = OpImageSampleDrefExplicitLod $ty $im $uv $d $op $i">; + +def OpImageSampleProjImplicitLod: Op<91, (outs ID:$res), + (ins TYPE:$type, ID:$sampledImage, ID:$coord, variable_ops), + "$res = OpImageSampleProjImplicitLod $type $sampledImage $coord">; +def OpImageSampleProjExplicitLod: Op<92, (outs ID:$res), + (ins TYPE:$ty, ID:$im, ID:$uv, ID:$d, ImageOperand:$op, ID:$i, variable_ops), + "$res = OpImageSampleProjExplicitLod $ty $im $uv $op $i">; + +def OpImageSampleProjDrefImplicitLod: Op<93, (outs ID:$res), + (ins TYPE:$type, ID:$sampledImage, ID:$coord, ID:$dref, variable_ops), + "$res = OpImageSampleProjDrefImplicitLod $type $sampledImage $dref $coord">; +def OpImageSampleProjDrefExplicitLod: Op<94, (outs ID:$res), + (ins TYPE:$ty, ID:$im, ID:$uv, ID:$d, ImageOperand:$op, ID:$i, variable_ops), + "$res = OpImageSampleProjDrefExplicitLod $ty $im $uv $d $op $i">; + +def OpImageFetch: Op<95, (outs ID:$res), + (ins TYPE:$type, ID:$image, ID:$coord, variable_ops), + "$res = OpImageFetch $type $image $coord">; +def OpImageGather: Op<96, (outs ID:$res), + (ins TYPE:$type, ID:$sampledImage, ID:$coord, ID:$component, variable_ops), + "$res = OpImageGather $type $sampledImage $coord $component">; +def OpImageDrefGather: Op<97, (outs ID:$res), + (ins TYPE:$type, ID:$sampledImage, ID:$coord, ID:$dref, variable_ops), + "$res = OpImageDrefGather $type $sampledImage $coord $dref">; + +def OpImageRead: Op<98, (outs ID:$res), + (ins TYPE:$type, ID:$image, ID:$coord, variable_ops), + "$res = OpImageRead $type $image $coord">; +def OpImageWrite: Op<99, (outs), (ins ID:$image, ID:$coord, ID:$texel, variable_ops), + "OpImageWrite $image $coord $texel">; + +def OpImage: UnOp<"OpImage", 100>; +def OpImageQueryFormat: UnOp<"OpImageQueryFormat", 101>; +def OpImageQueryOrder: UnOp<"OpImageQueryOrder", 102>; +def OpImageQuerySizeLod: BinOp<"OpImageQuerySizeLod", 103>; +def OpImageQuerySize: UnOp<"OpImageQuerySize", 104>; +def OpImageQueryLod: BinOp<"OpImageQueryLod", 105>; +def OpImageQueryLevels: UnOp<"OpImageQueryLevels", 106>; +def OpImageQuerySamples: UnOp<"OpImageQuerySamples", 107>; + +def OpImageSparseSampleImplicitLod: Op<305, (outs ID:$res), + (ins TYPE:$type, ID:$sampledImage, ID:$coord, variable_ops), + "$res = OpImageSparseSampleImplicitLod $type $sampledImage $coord">; +def OpImageSparseSampleExplicitLod: Op<306, (outs ID:$res), + (ins TYPE:$ty, ID:$sImage, ID:$uv, ImageOperand:$op, ID:$i, variable_ops), + "$res = OpImageSparseSampleExplicitLod $ty $sImage $uv $op $i">; + +def OpImageSparseSampleDrefImplicitLod: Op<307, (outs ID:$res), + (ins TYPE:$type, ID:$sampledImg, ID:$coord, ID:$dref, variable_ops), + "$res = OpImageSparseSampleDrefImplicitLod $type $sampledImg $dref $coord">; +def OpImageSparseSampleDrefExplicitLod: Op<308, (outs ID:$res), + (ins TYPE:$ty, ID:$im, ID:$uv, ID:$d, ImageOperand:$op, ID:$i, variable_ops), + "$res = OpImageSparseSampleDrefExplicitLod $ty $im $uv $d $op $i">; + +def OpImageSparseSampleProjImplicitLod: Op<309, (outs ID:$res), + (ins TYPE:$type, ID:$sampledImage, ID:$coord, variable_ops), + "$res = OpImageSparseSampleProjImplicitLod $type $sampledImage $coord">; +def OpImageSparseSampleProjExplicitLod: Op<310, (outs ID:$res), + (ins TYPE:$ty, ID:$im, ID:$uv, ID:$d, ImageOperand:$op, ID:$i, variable_ops), + "$res = OpImageSparseSampleProjExplicitLod $ty $im $uv $op $i">; + +def OpImageSparseSampleProjDrefImplicitLod: Op<311, (outs ID:$res), + (ins TYPE:$type, ID:$sImage, ID:$coord, ID:$dref, variable_ops), + "$res = OpImageSparseSampleProjDrefImplicitLod $type $sImage $dref $coord">; +def OpImageSparseSampleProjDrefExplicitLod: Op<312, (outs ID:$res), + (ins TYPE:$ty, ID:$im, ID:$uv, ID:$d, ImageOperand:$op, ID:$i, variable_ops), + "$res = OpImageSparseSampleProjDrefExplicitLod $ty $im $uv $d $op $i">; + +def OpImageSparseFetch: Op<313, (outs ID:$res), + (ins TYPE:$type, ID:$image, ID:$coord, variable_ops), + "$res = OpImageSparseFetch $type $image $coord">; +def OpImageSparseGather: Op<314, (outs ID:$res), + (ins TYPE:$type, ID:$sampledImage, ID:$coord, ID:$component, variable_ops), + "$res = OpImageSparseGather $type $sampledImage $coord $component">; +def OpImageSparseDrefGather: Op<315, (outs ID:$res), + (ins TYPE:$type, ID:$sampledImage, ID:$coord, ID:$dref, variable_ops), + "$res = OpImageSparseDrefGather $type $sampledImage $coord $dref">; + +def OpImageSparseTexelsResident: UnOp<"OpImageSparseTexelsResident", 316>; + +def OpImageSparseRead: Op<320, (outs ID:$res), + (ins TYPE:$type, ID:$image, ID:$coord, variable_ops), + "$res = OpImageSparseRead $type $image $coord">; + +def OpImageSampleFootprintNV: Op<5283, (outs ID:$res), + (ins TYPE:$ty, ID:$sImg, ID:$uv, ID:$granularity, ID:$coarse, variable_ops), + "$res = OpImageSampleFootprintNV $ty $sImg $uv $granularity $coarse">; + +// 3.42.11 Conversion instructions + +def OpConvertFToU : UnOp<"OpConvertFToU", 109>; +def OpConvertFToS : UnOp<"OpConvertFToS", 110>; +def OpConvertSToF : UnOp<"OpConvertSToF", 111>; +def OpConvertUToF : UnOp<"OpConvertUToF", 112>; + +def OpUConvert : UnOp<"OpUConvert", 113>; +def OpSConvert : UnOp<"OpSConvert", 114>; +def OpFConvert : UnOp<"OpFConvert", 115>; + +def OpQuantizeToF16 : UnOp<"OpQuantizeToF16", 116>; + +def OpConvertPtrToU : UnOp<"OpConvertPtrToU", 117>; + +def OpSatConvertSToU : UnOp<"OpSatConvertSToU", 118>; +def OpSatConvertUToS : UnOp<"OpSatConvertUToS", 119>; + +def OpConvertUToPtr : UnOp<"OpConvertUToPtr", 120>; +def OpPtrCastToGeneric : UnOp<"OpPtrCastToGeneric", 121>; +def OpGenericCastToPtr : UnOp<"OpGenericCastToPtr", 122>; +def OpGenericCastToPtrExplicit : Op<123, (outs ID:$r), (ins TYPE:$t, ID:$p, StorageClass:$s), + "$r = OpGenericCastToPtrExplicit $t $p $s">; +def OpBitcast : UnOp<"OpBitcast", 124>; + +// 3.42.12 Composite Instructions + +def OpVectorExtractDynamic: Op<77, (outs ID:$res), (ins TYPE:$type, vID:$vec, ID:$idx), + "$res = OpVectorExtractDynamic $type $vec $idx", [(set ID:$res, (assigntype (extractelt vID:$vec, ID:$idx), TYPE:$type))]>; + +def OpVectorInsertDynamic: Op<78, (outs ID:$res), (ins TYPE:$ty, ID:$vec, ID:$comp, ID:$idx), + "$res = OpVectorInsertDynamic $ty $vec $comp $idx">; +def OpVectorShuffle: Op<79, (outs ID:$res), (ins TYPE:$ty, ID:$v1, ID:$v2, variable_ops), + "$res = OpVectorShuffle $ty $v1 $v2">; +def OpCompositeConstruct: Op<80, (outs ID:$res), (ins TYPE:$type, variable_ops), + "$res = OpCompositeConstruct $type">; +def OpCompositeExtract: Op<81, (outs ID:$res), (ins TYPE:$type, ID:$base, variable_ops), + "$res = OpCompositeExtract $type $base">; +def OpCompositeInsert: Op<82, (outs ID:$r), (ins TYPE:$ty, ID:$obj, ID:$base, variable_ops), + "$r = OpCompositeInsert $ty $obj $base">; +def OpCopyObject: UnOp<"OpCopyObject", 83>; +def OpTranspose: UnOp<"OpTranspose", 84>; +def OpCopyLogical: UnOp<"OpCopyLogical", 400>; + +// 3.42.13 Arithmetic Instructions + +def OpSNegate: UnOp<"OpSNegate", 126>; +def OpFNegate: UnOpTyped<"OpFNegate", 127, fID, fneg>; +defm OpIAdd: BinOpTypedGen<"OpIAdd", 128, add, 0, 1>; +defm OpFAdd: BinOpTypedGen<"OpFAdd", 129, fadd, 1, 1>; + +defm OpISub: BinOpTypedGen<"OpISub", 130, sub, 0, 1>; +defm OpFSub: BinOpTypedGen<"OpFSub", 131, fsub, 1, 1>; + +defm OpIMul: BinOpTypedGen<"OpIMul", 132, mul, 0, 1>; +defm OpFMul: BinOpTypedGen<"OpFMul", 133, fmul, 1, 1>; + +defm OpUDiv: BinOpTypedGen<"OpUDiv", 134, udiv, 0, 1>; +defm OpSDiv: BinOpTypedGen<"OpSDiv", 135, sdiv, 0, 1>; +defm OpFDiv: BinOpTypedGen<"OpFDiv", 136, fdiv, 1, 1>; + +defm OpUMod: BinOpTypedGen<"OpUMod", 137, urem, 0, 1>; +defm OpSRem: BinOpTypedGen<"OpSRem", 138, srem, 0, 1>; + +def OpSMod: BinOp<"OpSMod", 139>; + +defm OpFRem: BinOpTypedGen<"OpFRem", 140, frem, 1, 1>; +def OpFMod: BinOp<"OpFMod", 141>; + +def OpVectorTimesScalar: BinOp<"OpVectorTimesScalar", 142>; +def OpMatrixTimesScalar: BinOp<"OpMatrixTimesScalar", 143>; +def OpVectorTimesMatrix: BinOp<"OpVectorTimesMatrix", 144>; +def OpMatrixTimesVector: BinOp<"OpMatrixTimesVector", 145>; +def OpMatrixTimesMatrix: BinOp<"OpMatrixTimesMatrix", 146>; + +def OpOuterProduct: BinOp<"OpOuterProduct", 147>; +def OpDot: BinOp<"OpDot", 148>; + +def OpIAddCarry: BinOpTyped<"OpIAddCarry", 149, ID, addc>; +def OpISubBorrow: BinOpTyped<"OpISubBorrow", 150, ID, subc>; +def OpUMulExtended: BinOp<"OpUMulExtended", 151>; +def OpSMulExtended: BinOp<"OpSMulExtended", 152>; + +// 3.42.14 Bit Instructions + +defm OpShiftRightLogical: BinOpTypedGen<"OpShiftRightLogical", 194, srl, 0, 1>; +defm OpShiftRightArithmetic: BinOpTypedGen<"OpShiftRightArithmetic", 195, sra, 0, 1>; +defm OpShiftLeftLogical: BinOpTypedGen<"OpShiftLeftLogical", 196, shl, 0, 1>; + +defm OpBitwiseOr: BinOpTypedGen<"OpBitwiseOr", 197, or, 0, 1>; +defm OpBitwiseXor: BinOpTypedGen<"OpBitwiseXor", 198, xor, 0, 1>; +defm OpBitwiseAnd: BinOpTypedGen<"OpBitwiseAnd", 199, and, 0, 1>; +def OpNot: UnOp<"OpNot", 200>; + +def OpBitFieldInsert: Op<201, (outs ID:$res), + (ins TYPE:$ty, ID:$base, ID:$insert, ID:$offset, ID:$count), + "$res = OpBitFieldInsert $ty $base $insert $offset $count">; +def OpBitFieldSExtract: Op<202, (outs ID:$res), + (ins TYPE:$ty, ID:$base, ID:$offset, ID:$count), + "$res = OpBitFieldSExtract $ty $base $offset $count">; +def OpBitFieldUExtract: Op<203, (outs ID:$res), + (ins TYPE:$ty, ID:$base, ID:$offset, ID:$count), + "$res = OpBitFieldUExtract $ty $base $offset $count">; +def OpBitReverse: Op<204, (outs ID:$r), (ins TYPE:$ty, ID:$b), "$r = OpBitReverse $ty $b">; +def OpBitCount: Op<205, (outs ID:$r), (ins TYPE:$ty, ID:$b), "$r = OpBitCount $ty $b">; + +// 3.42.15 Relational and Logical Instructions + +def OpAny: Op<154, (outs ID:$res), (ins TYPE:$ty, ID:$vec), + "$res = OpAny $ty $vec">; +def OpAll: Op<155, (outs ID:$res), (ins TYPE:$ty, ID:$vec), + "$res = OpAll $ty $vec">; + +def OpIsNan: UnOp<"OpIsNan", 156>; +def OpIsInf: UnOp<"OpIsInf", 157>; +def OpIsFinite: UnOp<"OpIsFinite", 158>; +def OpIsNormal: UnOp<"OpIsNormal", 159>; +def OpSignBitSet: UnOp<"OpSignBitSet", 160>; + +def OpLessOrGreater: BinOp<"OpLessOrGreater", 161>; +def OpOrdered: BinOp<"OpOrdered", 162>; +def OpUnordered: BinOp<"OpUnordered", 163>; + +def OpLogicalEqual: BinOp<"OpLogicalEqual", 164>; +def OpLogicalNotEqual: BinOp<"OpLogicalNotEqual", 165>; +def OpLogicalOr: BinOp<"OpLogicalOr", 166>; +def OpLogicalAnd: BinOp<"OpLogicalAnd", 167>; +def OpLogicalNot: UnOp<"OpLogicalNot", 168>; + +defm OpSelect: TernOpTypedGen<"OpSelect", 169, select, 1, 1, 1>; + +def OpIEqual: BinOp<"OpIEqual", 170>; +def OpINotEqual: BinOp<"OpINotEqual", 171>; + +def OpUGreaterThan: BinOp<"OpUGreaterThan", 172>; +def OpSGreaterThan: BinOp<"OpSGreaterThan", 173>; +def OpUGreaterThanEqual: BinOp<"OpUGreaterThanEqual", 174>; +def OpSGreaterThanEqual: BinOp<"OpSGreaterThanEqual", 175>; +def OpULessThan: BinOp<"OpULessThan", 176>; +def OpSLessThan: BinOp<"OpSLessThan", 177>; +def OpULessThanEqual: BinOp<"OpULessThanEqual", 178>; +def OpSLessThanEqual: BinOp<"OpSLessThanEqual", 179>; + +def OpFOrdEqual: BinOp<"OpFOrdEqual", 180>; +def OpFUnordEqual: BinOp<"OpFUnordEqual", 181>; +def OpFOrdNotEqual: BinOp<"OpFOrdNotEqual", 182>; +def OpFUnordNotEqual: BinOp<"OpFUnordNotEqual", 183>; + +def OpFOrdLessThan: BinOp<"OpFOrdLessThan", 184>; +def OpFUnordLessThan: BinOp<"OpFUnordLessThan", 185>; +def OpFOrdGreaterThan: BinOp<"OpFOrdGreaterThan", 186>; +def OpFUnordGreaterThan: BinOp<"OpFUnordGreaterThan", 187>; + +def OpFOrdLessThanEqual: BinOp<"OpFOrdLessThanEqual", 188>; +def OpFUnordLessThanEqual: BinOp<"OpFUnordLessThanEqual", 189>; +def OpFOrdGreaterThanEqual: BinOp<"OpFOrdGreaterThanEqual", 190>; +def OpFUnordGreaterThanEqual: BinOp<"OpFUnordGreaterThanEqual", 191>; + +// 3.42.16 Derivative Instructions + +def OpDPdx: UnOp<"OpDPdx", 207>; +def OpDPdy: UnOp<"OpDPdy", 208>; +def OpFwidth: UnOp<"OpFwidth", 209>; + +def OpDPdxFine: UnOp<"OpDPdxFine", 210>; +def OpDPdyFine: UnOp<"OpDPdyFine", 211>; +def OpFwidthFine: UnOp<"OpFwidthFine", 212>; + +def OpDPdxCoarse: UnOp<"OpDPdxCoarse", 213>; +def OpDPdyCoarse: UnOp<"OpDPdyCoarse", 214>; +def OpFwidthCoarse: UnOp<"OpFwidthCoarse", 215>; + +// 3.42.17 Control-Flow Instructions + +def OpPhi: Op<245, (outs ID:$res), (ins TYPE:$type, ID:$var0, ID:$block0, variable_ops), + "$res = OpPhi $type $var0 $block0">; +def OpLoopMerge: Op<246, (outs), (ins ID:$merge, ID:$continue, LoopControl:$lc, variable_ops), + "OpLoopMerge $merge $merge $continue $lc">; +def OpSelectionMerge: Op<247, (outs), (ins ID:$merge, SelectionControl:$sc), + "OpSelectionMerge $merge $sc">; +def OpLabel: Op<248, (outs ID:$label), (ins), "$label = OpLabel">; +let isTerminator=1 in { + def OpBranch: Op<249, (outs), (ins ID:$label), "OpBranch $label">; + def OpBranchConditional: Op<250, (outs), (ins ID:$cond, ID:$true, ID:$false, variable_ops), + "OpBranchConditional $cond $true $false">; + def OpSwitch: Op<251, (outs), (ins ID:$sel, ID:$dflt, variable_ops), "OpSwitch $sel $dflt">; +} +let isReturn = 1, hasDelaySlot=0, isBarrier = 0, isTerminator=1, isNotDuplicable = 1 in { + def OpKill: SimpleOp<"OpKill", 252>; + def OpReturn: SimpleOp<"OpReturn", 253>; + def OpReturnValue: Op<254, (outs), (ins ANYID:$ret), "OpReturnValue $ret">; + def OpUnreachable: SimpleOp<"OpUnreachable", 255>; +} +def OpLifetimeStart: Op<256, (outs), (ins ID:$ptr, i32imm:$sz), "OpLifetimeStart $ptr, $sz">; +def OpLifetimeStop: Op<257, (outs), (ins ID:$ptr, i32imm:$sz), "OpLifetimeStop $ptr, $sz">; + +// 3.42.18 Atomic Instructions + +class AtomicOp opCode>: Op; + +class AtomicOpVal opCode>: Op; + +def OpAtomicLoad: AtomicOp<"OpAtomicLoad", 227>; + +def OpAtomicStore: Op<228, (outs), (ins ID:$ptr, ID:$sc, ID:$sem, ID:$val), + "OpAtomicStore $ptr $sc $sem $val">; +def OpAtomicExchange: Op<229, (outs ID:$res), + (ins TYPE:$ty, ID:$ptr, ID:$sc, ID:$sem, ID:$val), + "$res = OpAtomicExchange $ty $ptr $sc $sem $val">; +def OpAtomicCompareExchange: Op<230, (outs ID:$res), + (ins TYPE:$ty, ID:$ptr, ID:$sc, ID:$eq, + ID:$neq, ID:$val, ID:$cmp), + "$res = OpAtomicCompareExchange $ty $ptr $sc $eq $neq $val $cmp">; +// TODO Currently the following deprecated opcode is missing: +// OpAtomicCompareExchangeWeak + +def OpAtomicIIncrement: AtomicOp<"OpAtomicIIncrement", 232>; +def OpAtomicIDecrement: AtomicOp<"OpAtomicIDecrement", 233>; + +def OpAtomicIAdd: AtomicOpVal<"OpAtomicIAdd", 234>; +def OpAtomicISub: AtomicOpVal<"OpAtomicISub", 235>; + +def OpAtomicSMin: AtomicOpVal<"OpAtomicSMin", 236>; +def OpAtomicUMin: AtomicOpVal<"OpAtomicUMin", 237>; +def OpAtomicSMax: AtomicOpVal<"OpAtomicSMax", 238>; +def OpAtomicUMax: AtomicOpVal<"OpAtomicUMax", 239>; + +def OpAtomicAnd: AtomicOpVal<"OpAtomicAnd", 240>; +def OpAtomicOr: AtomicOpVal<"OpAtomicOr", 241>; +def OpAtomicXor: AtomicOpVal<"OpAtomicXor", 242>; + + +def OpAtomicFlagTestAndSet: AtomicOp<"OpAtomicFlagTestAndSet", 318>; +def OpAtomicFlagClear: Op<319, (outs), (ins ID:$ptr, ID:$sc, ID:$sem), + "OpAtomicFlagClear $ptr $sc $sem">; + +// 3.42.19 Primitive Instructions + +def OpEmitVertex: SimpleOp<"OpEmitVertex", 218>; +def OpEndPrimitive: SimpleOp<"OpEndPrimitive", 219>; +def OpEmitStreamVertex: Op<220, (outs), (ins ID:$stream), "OpEmitStreamVertex $stream">; +def OpEndStreamPrimitive: Op<221, (outs), (ins ID:$stream), "OpEndStreamPrimitive $stream">; + +// 3.42.20 Barrier Instructions + +def OpControlBarrier: Op<224, (outs), (ins ID:$exec, ID:$mem, ID:$sem), + "OpControlBarrier $exec $mem $sem">; +def OpMemoryBarrier: Op<225, (outs), (ins ID:$mem, ID:$sem), + "OpMemoryBarrier $mem $sem">; +def OpNamedBarrierInitialize: UnOp<"OpNamedBarrierInitialize", 328>; +def OpMemoryNamedBarrier: Op<329, (outs), (ins ID:$barr, ID:$mem, ID:$sem), + "OpMemoryNamedBarrier $barr $mem $sem">; + +// 3.42.21. Group and Subgroup Instructions + +def OpGroupAll: Op<261, (outs ID:$res), (ins TYPE:$ty, ID:$scope, ID:$pr), + "$res = OpGroupAll $ty $scope $pr">; +def OpGroupAny: Op<262, (outs ID:$res), (ins TYPE:$ty, ID:$scope, ID:$pr), + "$res = OpGroupAny $ty $scope $pr">; +def OpGroupBroadcast: Op<263, (outs ID:$res), (ins TYPE:$ty, ID:$scope, + ID:$val, ID:$id), + "$res = OpGroupBroadcast $ty $scope $val $id">; +class OpGroup opCode>: Op; +def OpGroupIAdd: OpGroup<"IAdd", 264>; +def OpGroupFAdd: OpGroup<"FAdd", 265>; +def OpGroupFMin: OpGroup<"FMin", 266>; +def OpGroupUMin: OpGroup<"UMin", 267>; +def OpGroupSMin: OpGroup<"SMin", 268>; +def OpGroupFMax: OpGroup<"FMax", 269>; +def OpGroupUMax: OpGroup<"UMax", 270>; +def OpGroupSMax: OpGroup<"SMax", 271>; + +// TODO: 3.42.22. Device-Side Enqueue Instructions +// TODO: 3.42.23. Pipe Instructions + +// 3.42.24. Non-Uniform Instructions + +def OpGroupNonUniformElect: Op<333, (outs ID:$res), (ins TYPE:$ty, ID:$scope), + "$res = OpGroupNonUniformElect $ty $scope">; +class OpGroupNU3 opCode>: Op; +class OpGroupNU4 opCode>: Op; +def OpGroupNonUniformAll: OpGroupNU3<"All", 334>; +def OpGroupNonUniformAny: OpGroupNU3<"Any", 335>; +def OpGroupNonUniformAllEqual: OpGroupNU3<"AllEqual", 336>; +def OpGroupNonUniformBroadcast: OpGroupNU4<"Broadcast", 337>; +def OpGroupNonUniformBroadcastFirst: OpGroupNU3<"BroadcastFirst", 338>; +def OpGroupNonUniformBallot: OpGroupNU3<"Ballot", 339>; +def OpGroupNonUniformInverseBallot: OpGroupNU3<"InverseBallot", 340>; +def OpGroupNonUniformBallotBitExtract: OpGroupNU4<"BallotBitExtract", 341>; +def OpGroupNonUniformBallotBitCount: Op<342, (outs ID:$res), + (ins TYPE:$ty, ID:$scope, GroupOperation:$groupOp, ID:$val), + "$res = OpGroupNonUniformBallotBitCount " + "$ty $scope $groupOp $val">; +def OpGroupNonUniformBallotFindLSB: OpGroupNU3<"BallotFindLSB", 343>; +def OpGroupNonUniformBallotFindMSB: OpGroupNU3<"BallotFindMSB", 344>; +def OpGroupNonUniformShuffle: OpGroupNU4<"Shuffle", 345>; +def OpGroupNonUniformShuffleXor: OpGroupNU4<"ShuffleXor", 346>; +def OpGroupNonUniformShuffleUp: OpGroupNU4<"ShuffleUp", 347>; +def OpGroupNonUniformShuffleDown: OpGroupNU4<"ShuffleDown", 348>; +class OpGroupNUGroup opCode>: Op; +def OpGroupNonUniformIAdd: OpGroupNUGroup<"IAdd", 349>; +def OpGroupNonUniformFAdd: OpGroupNUGroup<"FAdd", 350>; +def OpGroupNonUniformIMul: OpGroupNUGroup<"IMul", 351>; +def OpGroupNonUniformFMul: OpGroupNUGroup<"FMul", 352>; +def OpGroupNonUniformSMin: OpGroupNUGroup<"SMin", 353>; +def OpGroupNonUniformUMin: OpGroupNUGroup<"UMin", 354>; +def OpGroupNonUniformFMin: OpGroupNUGroup<"FMin", 355>; +def OpGroupNonUniformSMax: OpGroupNUGroup<"SMax", 356>; +def OpGroupNonUniformUMax: OpGroupNUGroup<"UMax", 357>; +def OpGroupNonUniformFMax: OpGroupNUGroup<"FMax", 358>; +def OpGroupNonUniformBitwiseAnd: OpGroupNUGroup<"BitwiseAnd", 359>; +def OpGroupNonUniformBitwiseOr: OpGroupNUGroup<"BitwiseOr", 360>; +def OpGroupNonUniformBitwiseXor: OpGroupNUGroup<"BitwiseXor", 361>; +def OpGroupNonUniformLogicalAnd: OpGroupNUGroup<"LogicalAnd", 362>; +def OpGroupNonUniformLogicalOr: OpGroupNUGroup<"LogicalOr", 363>; +def OpGroupNonUniformLogicalXor: OpGroupNUGroup<"LogicalXor", 364>; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp new file mode 100644 index 000000000000..9294a60506a8 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -0,0 +1,1268 @@ +//===- SPIRVInstructionSelector.cpp ------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the targeting of the InstructionSelector class for +// SPIRV. +// TODO: This should be generated by TableGen. +// +//===----------------------------------------------------------------------===// + +#include "SPIRV.h" +#include "SPIRVGlobalRegistry.h" +#include "SPIRVInstrInfo.h" +#include "SPIRVRegisterBankInfo.h" +#include "SPIRVRegisterInfo.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "spirv-isel" + +using namespace llvm; + +namespace { + +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "SPIRVGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + +class SPIRVInstructionSelector : public InstructionSelector { + const SPIRVSubtarget &STI; + const SPIRVInstrInfo &TII; + const SPIRVRegisterInfo &TRI; + const RegisterBankInfo &RBI; + SPIRVGlobalRegistry &GR; + MachineRegisterInfo *MRI; + +public: + SPIRVInstructionSelector(const SPIRVTargetMachine &TM, + const SPIRVSubtarget &ST, + const RegisterBankInfo &RBI); + void setupMF(MachineFunction &MF, GISelKnownBits *KB, + CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) override; + // Common selection code. Instruction-specific selection occurs in spvSelect. + bool select(MachineInstr &I) override; + static const char *getName() { return DEBUG_TYPE; } + +#define GET_GLOBALISEL_PREDICATES_DECL +#include "SPIRVGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_DECL + +#define GET_GLOBALISEL_TEMPORARIES_DECL +#include "SPIRVGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_DECL + +private: + // tblgen-erated 'select' implementation, used as the initial selector for + // the patterns that don't require complex C++. + bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; + + // All instruction-specific selection that didn't happen in "select()". + // Is basically a large Switch/Case delegating to all other select method. + bool spvSelect(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + + bool selectGlobalValue(Register ResVReg, MachineInstr &I, + const MachineInstr *Init = nullptr) const; + + bool selectUnOpWithSrc(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, Register SrcReg, + unsigned Opcode) const; + bool selectUnOp(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, + unsigned Opcode) const; + + bool selectLoad(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectStore(MachineInstr &I) const; + + bool selectMemOperation(Register ResVReg, MachineInstr &I) const; + + bool selectAtomicRMW(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, unsigned NewOpcode) const; + + bool selectAtomicCmpXchg(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + + bool selectFence(MachineInstr &I) const; + + bool selectAddrSpaceCast(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + + bool selectBitreverse(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + + bool selectConstVector(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + + bool selectCmp(Register ResVReg, const SPIRVType *ResType, + unsigned comparisonOpcode, MachineInstr &I) const; + + bool selectICmp(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectFCmp(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + + void renderImm32(MachineInstrBuilder &MIB, const MachineInstr &I, + int OpIdx) const; + void renderFImm32(MachineInstrBuilder &MIB, const MachineInstr &I, + int OpIdx) const; + + bool selectConst(Register ResVReg, const SPIRVType *ResType, const APInt &Imm, + MachineInstr &I) const; + + bool selectSelect(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, + bool IsSigned) const; + bool selectIToF(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, + bool IsSigned, unsigned Opcode) const; + bool selectExt(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, + bool IsSigned) const; + + bool selectTrunc(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + + bool selectIntToBool(Register IntReg, Register ResVReg, + const SPIRVType *intTy, const SPIRVType *boolTy, + MachineInstr &I) const; + + bool selectOpUndef(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectIntrinsic(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectExtractVal(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectInsertVal(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectExtractElt(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectInsertElt(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectGEP(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + + bool selectFrameIndex(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + + bool selectBranch(MachineInstr &I) const; + bool selectBranchCond(MachineInstr &I) const; + + bool selectPhi(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + + Register buildI32Constant(uint32_t Val, MachineInstr &I, + const SPIRVType *ResType = nullptr) const; + + Register buildZerosVal(const SPIRVType *ResType, MachineInstr &I) const; + Register buildOnesVal(bool AllOnes, const SPIRVType *ResType, + MachineInstr &I) const; +}; + +} // end anonymous namespace + +#define GET_GLOBALISEL_IMPL +#include "SPIRVGenGlobalISel.inc" +#undef GET_GLOBALISEL_IMPL + +SPIRVInstructionSelector::SPIRVInstructionSelector(const SPIRVTargetMachine &TM, + const SPIRVSubtarget &ST, + const RegisterBankInfo &RBI) + : InstructionSelector(), STI(ST), TII(*ST.getInstrInfo()), + TRI(*ST.getRegisterInfo()), RBI(RBI), GR(*ST.getSPIRVGlobalRegistry()), +#define GET_GLOBALISEL_PREDICATES_INIT +#include "SPIRVGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_INIT +#define GET_GLOBALISEL_TEMPORARIES_INIT +#include "SPIRVGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_INIT +{ +} + +void SPIRVInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB, + CodeGenCoverage &CoverageInfo, + ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) { + MRI = &MF.getRegInfo(); + GR.setCurrentFunc(MF); + InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); +} + +// Defined in SPIRVLegalizerInfo.cpp. +extern bool isTypeFoldingSupported(unsigned Opcode); + +bool SPIRVInstructionSelector::select(MachineInstr &I) { + assert(I.getParent() && "Instruction should be in a basic block!"); + assert(I.getParent()->getParent() && "Instruction should be in a function!"); + + Register Opcode = I.getOpcode(); + // If it's not a GMIR instruction, we've selected it already. + if (!isPreISelGenericOpcode(Opcode)) { + if (Opcode == SPIRV::ASSIGN_TYPE) { // These pseudos aren't needed any more. + auto *Def = MRI->getVRegDef(I.getOperand(1).getReg()); + if (isTypeFoldingSupported(Def->getOpcode())) { + auto Res = selectImpl(I, *CoverageInfo); + assert(Res || Def->getOpcode() == TargetOpcode::G_CONSTANT); + if (Res) + return Res; + } + MRI->replaceRegWith(I.getOperand(1).getReg(), I.getOperand(0).getReg()); + I.removeFromParent(); + } else if (I.getNumDefs() == 1) { + // Make all vregs 32 bits (for SPIR-V IDs). + MRI->setType(I.getOperand(0).getReg(), LLT::scalar(32)); + } + return true; + } + + if (I.getNumOperands() != I.getNumExplicitOperands()) { + LLVM_DEBUG(errs() << "Generic instr has unexpected implicit operands\n"); + return false; + } + + // Common code for getting return reg+type, and removing selected instr + // from parent occurs here. Instr-specific selection happens in spvSelect(). + bool HasDefs = I.getNumDefs() > 0; + Register ResVReg = HasDefs ? I.getOperand(0).getReg() : Register(0); + SPIRVType *ResType = HasDefs ? GR.getSPIRVTypeForVReg(ResVReg) : nullptr; + assert(!HasDefs || ResType || I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE); + if (spvSelect(ResVReg, ResType, I)) { + if (HasDefs) // Make all vregs 32 bits (for SPIR-V IDs). + MRI->setType(ResVReg, LLT::scalar(32)); + I.removeFromParent(); + return true; + } + return false; +} + +bool SPIRVInstructionSelector::spvSelect(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + assert(!isTypeFoldingSupported(I.getOpcode()) || + I.getOpcode() == TargetOpcode::G_CONSTANT); + const unsigned Opcode = I.getOpcode(); + switch (Opcode) { + case TargetOpcode::G_CONSTANT: + return selectConst(ResVReg, ResType, I.getOperand(1).getCImm()->getValue(), + I); + case TargetOpcode::G_GLOBAL_VALUE: + return selectGlobalValue(ResVReg, I); + case TargetOpcode::G_IMPLICIT_DEF: + return selectOpUndef(ResVReg, ResType, I); + + case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: + return selectIntrinsic(ResVReg, ResType, I); + case TargetOpcode::G_BITREVERSE: + return selectBitreverse(ResVReg, ResType, I); + + case TargetOpcode::G_BUILD_VECTOR: + return selectConstVector(ResVReg, ResType, I); + + case TargetOpcode::G_SHUFFLE_VECTOR: { + MachineBasicBlock &BB = *I.getParent(); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpVectorShuffle)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(I.getOperand(1).getReg()) + .addUse(I.getOperand(2).getReg()); + for (auto V : I.getOperand(3).getShuffleMask()) + MIB.addImm(V); + return MIB.constrainAllUses(TII, TRI, RBI); + } + case TargetOpcode::G_MEMMOVE: + case TargetOpcode::G_MEMCPY: + return selectMemOperation(ResVReg, I); + + case TargetOpcode::G_ICMP: + return selectICmp(ResVReg, ResType, I); + case TargetOpcode::G_FCMP: + return selectFCmp(ResVReg, ResType, I); + + case TargetOpcode::G_FRAME_INDEX: + return selectFrameIndex(ResVReg, ResType, I); + + case TargetOpcode::G_LOAD: + return selectLoad(ResVReg, ResType, I); + case TargetOpcode::G_STORE: + return selectStore(I); + + case TargetOpcode::G_BR: + return selectBranch(I); + case TargetOpcode::G_BRCOND: + return selectBranchCond(I); + + case TargetOpcode::G_PHI: + return selectPhi(ResVReg, ResType, I); + + case TargetOpcode::G_FPTOSI: + return selectUnOp(ResVReg, ResType, I, SPIRV::OpConvertFToS); + case TargetOpcode::G_FPTOUI: + return selectUnOp(ResVReg, ResType, I, SPIRV::OpConvertFToU); + + case TargetOpcode::G_SITOFP: + return selectIToF(ResVReg, ResType, I, true, SPIRV::OpConvertSToF); + case TargetOpcode::G_UITOFP: + return selectIToF(ResVReg, ResType, I, false, SPIRV::OpConvertUToF); + + case TargetOpcode::G_CTPOP: + return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitCount); + + case TargetOpcode::G_SEXT: + return selectExt(ResVReg, ResType, I, true); + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_ZEXT: + return selectExt(ResVReg, ResType, I, false); + case TargetOpcode::G_TRUNC: + return selectTrunc(ResVReg, ResType, I); + case TargetOpcode::G_FPTRUNC: + case TargetOpcode::G_FPEXT: + return selectUnOp(ResVReg, ResType, I, SPIRV::OpFConvert); + + case TargetOpcode::G_PTRTOINT: + return selectUnOp(ResVReg, ResType, I, SPIRV::OpConvertPtrToU); + case TargetOpcode::G_INTTOPTR: + return selectUnOp(ResVReg, ResType, I, SPIRV::OpConvertUToPtr); + case TargetOpcode::G_BITCAST: + return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitcast); + case TargetOpcode::G_ADDRSPACE_CAST: + return selectAddrSpaceCast(ResVReg, ResType, I); + + case TargetOpcode::G_ATOMICRMW_OR: + return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicOr); + case TargetOpcode::G_ATOMICRMW_ADD: + return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicIAdd); + case TargetOpcode::G_ATOMICRMW_AND: + return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicAnd); + case TargetOpcode::G_ATOMICRMW_MAX: + return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicSMax); + case TargetOpcode::G_ATOMICRMW_MIN: + return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicSMin); + case TargetOpcode::G_ATOMICRMW_SUB: + return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicISub); + case TargetOpcode::G_ATOMICRMW_XOR: + return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicXor); + case TargetOpcode::G_ATOMICRMW_UMAX: + return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicUMax); + case TargetOpcode::G_ATOMICRMW_UMIN: + return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicUMin); + case TargetOpcode::G_ATOMICRMW_XCHG: + return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicExchange); + case TargetOpcode::G_ATOMIC_CMPXCHG: + return selectAtomicCmpXchg(ResVReg, ResType, I); + + case TargetOpcode::G_FENCE: + return selectFence(I); + + default: + return false; + } +} + +bool SPIRVInstructionSelector::selectUnOpWithSrc(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + Register SrcReg, + unsigned Opcode) const { + return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(SrcReg) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectUnOp(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + unsigned Opcode) const { + return selectUnOpWithSrc(ResVReg, ResType, I, I.getOperand(1).getReg(), + Opcode); +} + +static SPIRV::MemorySemantics getMemSemantics(AtomicOrdering Ord) { + switch (Ord) { + case AtomicOrdering::Acquire: + return SPIRV::MemorySemantics::Acquire; + case AtomicOrdering::Release: + return SPIRV::MemorySemantics::Release; + case AtomicOrdering::AcquireRelease: + return SPIRV::MemorySemantics::AcquireRelease; + case AtomicOrdering::SequentiallyConsistent: + return SPIRV::MemorySemantics::SequentiallyConsistent; + case AtomicOrdering::Unordered: + case AtomicOrdering::Monotonic: + case AtomicOrdering::NotAtomic: + return SPIRV::MemorySemantics::None; + } +} + +static SPIRV::Scope getScope(SyncScope::ID Ord) { + switch (Ord) { + case SyncScope::SingleThread: + return SPIRV::Scope::Invocation; + case SyncScope::System: + return SPIRV::Scope::Device; + default: + llvm_unreachable("Unsupported synchronization Scope ID."); + } +} + +static void addMemoryOperands(MachineMemOperand *MemOp, + MachineInstrBuilder &MIB) { + uint32_t SpvMemOp = static_cast(SPIRV::MemoryOperand::None); + if (MemOp->isVolatile()) + SpvMemOp |= static_cast(SPIRV::MemoryOperand::Volatile); + if (MemOp->isNonTemporal()) + SpvMemOp |= static_cast(SPIRV::MemoryOperand::Nontemporal); + if (MemOp->getAlign().value()) + SpvMemOp |= static_cast(SPIRV::MemoryOperand::Aligned); + + if (SpvMemOp != static_cast(SPIRV::MemoryOperand::None)) { + MIB.addImm(SpvMemOp); + if (SpvMemOp & static_cast(SPIRV::MemoryOperand::Aligned)) + MIB.addImm(MemOp->getAlign().value()); + } +} + +static void addMemoryOperands(uint64_t Flags, MachineInstrBuilder &MIB) { + uint32_t SpvMemOp = static_cast(SPIRV::MemoryOperand::None); + if (Flags & MachineMemOperand::Flags::MOVolatile) + SpvMemOp |= static_cast(SPIRV::MemoryOperand::Volatile); + if (Flags & MachineMemOperand::Flags::MONonTemporal) + SpvMemOp |= static_cast(SPIRV::MemoryOperand::Nontemporal); + + if (SpvMemOp != static_cast(SPIRV::MemoryOperand::None)) + MIB.addImm(SpvMemOp); +} + +bool SPIRVInstructionSelector::selectLoad(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + unsigned OpOffset = + I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ? 1 : 0; + Register Ptr = I.getOperand(1 + OpOffset).getReg(); + auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpLoad)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(Ptr); + if (!I.getNumMemOperands()) { + assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); + addMemoryOperands(I.getOperand(2 + OpOffset).getImm(), MIB); + } else { + addMemoryOperands(*I.memoperands_begin(), MIB); + } + return MIB.constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectStore(MachineInstr &I) const { + unsigned OpOffset = + I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ? 1 : 0; + Register StoreVal = I.getOperand(0 + OpOffset).getReg(); + Register Ptr = I.getOperand(1 + OpOffset).getReg(); + MachineBasicBlock &BB = *I.getParent(); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpStore)) + .addUse(Ptr) + .addUse(StoreVal); + if (!I.getNumMemOperands()) { + assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); + addMemoryOperands(I.getOperand(2 + OpOffset).getImm(), MIB); + } else { + addMemoryOperands(*I.memoperands_begin(), MIB); + } + return MIB.constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectMemOperation(Register ResVReg, + MachineInstr &I) const { + MachineBasicBlock &BB = *I.getParent(); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCopyMemorySized)) + .addDef(I.getOperand(0).getReg()) + .addUse(I.getOperand(1).getReg()) + .addUse(I.getOperand(2).getReg()); + if (I.getNumMemOperands()) + addMemoryOperands(*I.memoperands_begin(), MIB); + bool Result = MIB.constrainAllUses(TII, TRI, RBI); + if (ResVReg.isValid() && ResVReg != MIB->getOperand(0).getReg()) { + BuildMI(BB, I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), ResVReg) + .addUse(MIB->getOperand(0).getReg()); + } + return Result; +} + +bool SPIRVInstructionSelector::selectAtomicRMW(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + unsigned NewOpcode) const { + assert(I.hasOneMemOperand()); + const MachineMemOperand *MemOp = *I.memoperands_begin(); + uint32_t Scope = static_cast(getScope(MemOp->getSyncScopeID())); + Register ScopeReg = buildI32Constant(Scope, I); + + Register Ptr = I.getOperand(1).getReg(); + // TODO: Changed as it's implemented in the translator. See test/atomicrmw.ll + // auto ScSem = + // getMemSemanticsForStorageClass(GR.getPointerStorageClass(Ptr)); + AtomicOrdering AO = MemOp->getSuccessOrdering(); + uint32_t MemSem = static_cast(getMemSemantics(AO)); + Register MemSemReg = buildI32Constant(MemSem /*| ScSem*/, I); + + return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(NewOpcode)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(Ptr) + .addUse(ScopeReg) + .addUse(MemSemReg) + .addUse(I.getOperand(2).getReg()) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectFence(MachineInstr &I) const { + AtomicOrdering AO = AtomicOrdering(I.getOperand(0).getImm()); + uint32_t MemSem = static_cast(getMemSemantics(AO)); + Register MemSemReg = buildI32Constant(MemSem, I); + SyncScope::ID Ord = SyncScope::ID(I.getOperand(1).getImm()); + uint32_t Scope = static_cast(getScope(Ord)); + Register ScopeReg = buildI32Constant(Scope, I); + MachineBasicBlock &BB = *I.getParent(); + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpMemoryBarrier)) + .addUse(ScopeReg) + .addUse(MemSemReg) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + assert(I.hasOneMemOperand()); + const MachineMemOperand *MemOp = *I.memoperands_begin(); + uint32_t Scope = static_cast(getScope(MemOp->getSyncScopeID())); + Register ScopeReg = buildI32Constant(Scope, I); + + Register Ptr = I.getOperand(2).getReg(); + Register Cmp = I.getOperand(3).getReg(); + Register Val = I.getOperand(4).getReg(); + + SPIRVType *SpvValTy = GR.getSPIRVTypeForVReg(Val); + SPIRV::StorageClass SC = GR.getPointerStorageClass(Ptr); + uint32_t ScSem = static_cast(getMemSemanticsForStorageClass(SC)); + AtomicOrdering AO = MemOp->getSuccessOrdering(); + uint32_t MemSemEq = static_cast(getMemSemantics(AO)) | ScSem; + Register MemSemEqReg = buildI32Constant(MemSemEq, I); + AtomicOrdering FO = MemOp->getFailureOrdering(); + uint32_t MemSemNeq = static_cast(getMemSemantics(FO)) | ScSem; + Register MemSemNeqReg = + MemSemEq == MemSemNeq ? MemSemEqReg : buildI32Constant(MemSemNeq, I); + const DebugLoc &DL = I.getDebugLoc(); + return BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpAtomicCompareExchange)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(SpvValTy)) + .addUse(Ptr) + .addUse(ScopeReg) + .addUse(MemSemEqReg) + .addUse(MemSemNeqReg) + .addUse(Val) + .addUse(Cmp) + .constrainAllUses(TII, TRI, RBI); +} + +static bool isGenericCastablePtr(SPIRV::StorageClass SC) { + switch (SC) { + case SPIRV::StorageClass::Workgroup: + case SPIRV::StorageClass::CrossWorkgroup: + case SPIRV::StorageClass::Function: + return true; + default: + return false; + } +} + +// In SPIR-V address space casting can only happen to and from the Generic +// storage class. We can also only case Workgroup, CrossWorkgroup, or Function +// pointers to and from Generic pointers. As such, we can convert e.g. from +// Workgroup to Function by going via a Generic pointer as an intermediary. All +// other combinations can only be done by a bitcast, and are probably not safe. +bool SPIRVInstructionSelector::selectAddrSpaceCast(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + Register SrcPtr = I.getOperand(1).getReg(); + SPIRVType *SrcPtrTy = GR.getSPIRVTypeForVReg(SrcPtr); + SPIRV::StorageClass SrcSC = GR.getPointerStorageClass(SrcPtr); + SPIRV::StorageClass DstSC = GR.getPointerStorageClass(ResVReg); + + // Casting from an eligable pointer to Generic. + if (DstSC == SPIRV::StorageClass::Generic && isGenericCastablePtr(SrcSC)) + return selectUnOp(ResVReg, ResType, I, SPIRV::OpPtrCastToGeneric); + // Casting from Generic to an eligable pointer. + if (SrcSC == SPIRV::StorageClass::Generic && isGenericCastablePtr(DstSC)) + return selectUnOp(ResVReg, ResType, I, SPIRV::OpGenericCastToPtr); + // Casting between 2 eligable pointers using Generic as an intermediary. + if (isGenericCastablePtr(SrcSC) && isGenericCastablePtr(DstSC)) { + Register Tmp = MRI->createVirtualRegister(&SPIRV::IDRegClass); + SPIRVType *GenericPtrTy = GR.getOrCreateSPIRVPointerType( + SrcPtrTy, I, TII, SPIRV::StorageClass::Generic); + MachineBasicBlock &BB = *I.getParent(); + const DebugLoc &DL = I.getDebugLoc(); + bool Success = BuildMI(BB, I, DL, TII.get(SPIRV::OpPtrCastToGeneric)) + .addDef(Tmp) + .addUse(GR.getSPIRVTypeID(GenericPtrTy)) + .addUse(SrcPtr) + .constrainAllUses(TII, TRI, RBI); + return Success && BuildMI(BB, I, DL, TII.get(SPIRV::OpGenericCastToPtr)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(Tmp) + .constrainAllUses(TII, TRI, RBI); + } + // TODO Should this case just be disallowed completely? + // We're casting 2 other arbitrary address spaces, so have to bitcast. + return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitcast); +} + +static unsigned getFCmpOpcode(unsigned PredNum) { + auto Pred = static_cast(PredNum); + switch (Pred) { + case CmpInst::FCMP_OEQ: + return SPIRV::OpFOrdEqual; + case CmpInst::FCMP_OGE: + return SPIRV::OpFOrdGreaterThanEqual; + case CmpInst::FCMP_OGT: + return SPIRV::OpFOrdGreaterThan; + case CmpInst::FCMP_OLE: + return SPIRV::OpFOrdLessThanEqual; + case CmpInst::FCMP_OLT: + return SPIRV::OpFOrdLessThan; + case CmpInst::FCMP_ONE: + return SPIRV::OpFOrdNotEqual; + case CmpInst::FCMP_ORD: + return SPIRV::OpOrdered; + case CmpInst::FCMP_UEQ: + return SPIRV::OpFUnordEqual; + case CmpInst::FCMP_UGE: + return SPIRV::OpFUnordGreaterThanEqual; + case CmpInst::FCMP_UGT: + return SPIRV::OpFUnordGreaterThan; + case CmpInst::FCMP_ULE: + return SPIRV::OpFUnordLessThanEqual; + case CmpInst::FCMP_ULT: + return SPIRV::OpFUnordLessThan; + case CmpInst::FCMP_UNE: + return SPIRV::OpFUnordNotEqual; + case CmpInst::FCMP_UNO: + return SPIRV::OpUnordered; + default: + llvm_unreachable("Unknown predicate type for FCmp"); + } +} + +static unsigned getICmpOpcode(unsigned PredNum) { + auto Pred = static_cast(PredNum); + switch (Pred) { + case CmpInst::ICMP_EQ: + return SPIRV::OpIEqual; + case CmpInst::ICMP_NE: + return SPIRV::OpINotEqual; + case CmpInst::ICMP_SGE: + return SPIRV::OpSGreaterThanEqual; + case CmpInst::ICMP_SGT: + return SPIRV::OpSGreaterThan; + case CmpInst::ICMP_SLE: + return SPIRV::OpSLessThanEqual; + case CmpInst::ICMP_SLT: + return SPIRV::OpSLessThan; + case CmpInst::ICMP_UGE: + return SPIRV::OpUGreaterThanEqual; + case CmpInst::ICMP_UGT: + return SPIRV::OpUGreaterThan; + case CmpInst::ICMP_ULE: + return SPIRV::OpULessThanEqual; + case CmpInst::ICMP_ULT: + return SPIRV::OpULessThan; + default: + llvm_unreachable("Unknown predicate type for ICmp"); + } +} + +static unsigned getPtrCmpOpcode(unsigned Pred) { + switch (static_cast(Pred)) { + case CmpInst::ICMP_EQ: + return SPIRV::OpPtrEqual; + case CmpInst::ICMP_NE: + return SPIRV::OpPtrNotEqual; + default: + llvm_unreachable("Unknown predicate type for pointer comparison"); + } +} + +// Return the logical operation, or abort if none exists. +static unsigned getBoolCmpOpcode(unsigned PredNum) { + auto Pred = static_cast(PredNum); + switch (Pred) { + case CmpInst::ICMP_EQ: + return SPIRV::OpLogicalEqual; + case CmpInst::ICMP_NE: + return SPIRV::OpLogicalNotEqual; + default: + llvm_unreachable("Unknown predicate type for Bool comparison"); + } +} + +bool SPIRVInstructionSelector::selectBitreverse(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + MachineBasicBlock &BB = *I.getParent(); + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpBitReverse)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(I.getOperand(1).getReg()) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectConstVector(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + // TODO: only const case is supported for now. + assert(std::all_of( + I.operands_begin(), I.operands_end(), [this](const MachineOperand &MO) { + if (MO.isDef()) + return true; + if (!MO.isReg()) + return false; + SPIRVType *ConstTy = this->MRI->getVRegDef(MO.getReg()); + assert(ConstTy && ConstTy->getOpcode() == SPIRV::ASSIGN_TYPE && + ConstTy->getOperand(1).isReg()); + Register ConstReg = ConstTy->getOperand(1).getReg(); + const MachineInstr *Const = this->MRI->getVRegDef(ConstReg); + assert(Const); + return (Const->getOpcode() == TargetOpcode::G_CONSTANT || + Const->getOpcode() == TargetOpcode::G_FCONSTANT); + })); + + auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(SPIRV::OpConstantComposite)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)); + for (unsigned i = I.getNumExplicitDefs(); i < I.getNumExplicitOperands(); ++i) + MIB.addUse(I.getOperand(i).getReg()); + return MIB.constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectCmp(Register ResVReg, + const SPIRVType *ResType, + unsigned CmpOpc, + MachineInstr &I) const { + Register Cmp0 = I.getOperand(2).getReg(); + Register Cmp1 = I.getOperand(3).getReg(); + assert(GR.getSPIRVTypeForVReg(Cmp0)->getOpcode() == + GR.getSPIRVTypeForVReg(Cmp1)->getOpcode() && + "CMP operands should have the same type"); + return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CmpOpc)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(Cmp0) + .addUse(Cmp1) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectICmp(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + auto Pred = I.getOperand(1).getPredicate(); + unsigned CmpOpc; + + Register CmpOperand = I.getOperand(2).getReg(); + if (GR.isScalarOfType(CmpOperand, SPIRV::OpTypePointer)) + CmpOpc = getPtrCmpOpcode(Pred); + else if (GR.isScalarOrVectorOfType(CmpOperand, SPIRV::OpTypeBool)) + CmpOpc = getBoolCmpOpcode(Pred); + else + CmpOpc = getICmpOpcode(Pred); + return selectCmp(ResVReg, ResType, CmpOpc, I); +} + +void SPIRVInstructionSelector::renderFImm32(MachineInstrBuilder &MIB, + const MachineInstr &I, + int OpIdx) const { + assert(I.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && + "Expected G_FCONSTANT"); + const ConstantFP *FPImm = I.getOperand(1).getFPImm(); + addNumImm(FPImm->getValueAPF().bitcastToAPInt(), MIB); +} + +void SPIRVInstructionSelector::renderImm32(MachineInstrBuilder &MIB, + const MachineInstr &I, + int OpIdx) const { + assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); + addNumImm(I.getOperand(1).getCImm()->getValue(), MIB); +} + +Register +SPIRVInstructionSelector::buildI32Constant(uint32_t Val, MachineInstr &I, + const SPIRVType *ResType) const { + const SPIRVType *SpvI32Ty = + ResType ? ResType : GR.getOrCreateSPIRVIntegerType(32, I, TII); + Register NewReg; + NewReg = MRI->createGenericVirtualRegister(LLT::scalar(32)); + MachineInstr *MI; + MachineBasicBlock &BB = *I.getParent(); + if (Val == 0) { + MI = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull)) + .addDef(NewReg) + .addUse(GR.getSPIRVTypeID(SpvI32Ty)); + } else { + MI = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI)) + .addDef(NewReg) + .addUse(GR.getSPIRVTypeID(SpvI32Ty)) + .addImm(APInt(32, Val).getZExtValue()); + } + constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); + return NewReg; +} + +bool SPIRVInstructionSelector::selectFCmp(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + unsigned CmpOp = getFCmpOpcode(I.getOperand(1).getPredicate()); + return selectCmp(ResVReg, ResType, CmpOp, I); +} + +Register SPIRVInstructionSelector::buildZerosVal(const SPIRVType *ResType, + MachineInstr &I) const { + return buildI32Constant(0, I, ResType); +} + +Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes, + const SPIRVType *ResType, + MachineInstr &I) const { + unsigned BitWidth = GR.getScalarOrVectorBitWidth(ResType); + APInt One = AllOnes ? APInt::getAllOnesValue(BitWidth) + : APInt::getOneBitSet(BitWidth, 0); + Register OneReg = buildI32Constant(One.getZExtValue(), I, ResType); + if (ResType->getOpcode() == SPIRV::OpTypeVector) { + const unsigned NumEles = ResType->getOperand(2).getImm(); + Register OneVec = MRI->createVirtualRegister(&SPIRV::IDRegClass); + unsigned Opcode = SPIRV::OpConstantComposite; + auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode)) + .addDef(OneVec) + .addUse(GR.getSPIRVTypeID(ResType)); + for (unsigned i = 0; i < NumEles; ++i) + MIB.addUse(OneReg); + constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); + return OneVec; + } + return OneReg; +} + +bool SPIRVInstructionSelector::selectSelect(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + bool IsSigned) const { + // To extend a bool, we need to use OpSelect between constants. + Register ZeroReg = buildZerosVal(ResType, I); + Register OneReg = buildOnesVal(IsSigned, ResType, I); + bool IsScalarBool = + GR.isScalarOfType(I.getOperand(1).getReg(), SPIRV::OpTypeBool); + unsigned Opcode = + IsScalarBool ? SPIRV::OpSelectSISCond : SPIRV::OpSelectSIVCond; + return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(I.getOperand(1).getReg()) + .addUse(OneReg) + .addUse(ZeroReg) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectIToF(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, bool IsSigned, + unsigned Opcode) const { + Register SrcReg = I.getOperand(1).getReg(); + // We can convert bool value directly to float type without OpConvert*ToF, + // however the translator generates OpSelect+OpConvert*ToF, so we do the same. + if (GR.isScalarOrVectorOfType(I.getOperand(1).getReg(), SPIRV::OpTypeBool)) { + unsigned BitWidth = GR.getScalarOrVectorBitWidth(ResType); + SPIRVType *TmpType = GR.getOrCreateSPIRVIntegerType(BitWidth, I, TII); + if (ResType->getOpcode() == SPIRV::OpTypeVector) { + const unsigned NumElts = ResType->getOperand(2).getImm(); + TmpType = GR.getOrCreateSPIRVVectorType(TmpType, NumElts, I, TII); + } + SrcReg = MRI->createVirtualRegister(&SPIRV::IDRegClass); + selectSelect(SrcReg, TmpType, I, false); + } + return selectUnOpWithSrc(ResVReg, ResType, I, SrcReg, Opcode); +} + +bool SPIRVInstructionSelector::selectExt(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, bool IsSigned) const { + if (GR.isScalarOrVectorOfType(I.getOperand(1).getReg(), SPIRV::OpTypeBool)) + return selectSelect(ResVReg, ResType, I, IsSigned); + unsigned Opcode = IsSigned ? SPIRV::OpSConvert : SPIRV::OpUConvert; + return selectUnOp(ResVReg, ResType, I, Opcode); +} + +bool SPIRVInstructionSelector::selectIntToBool(Register IntReg, + Register ResVReg, + const SPIRVType *IntTy, + const SPIRVType *BoolTy, + MachineInstr &I) const { + // To truncate to a bool, we use OpBitwiseAnd 1 and OpINotEqual to zero. + Register BitIntReg = MRI->createVirtualRegister(&SPIRV::IDRegClass); + bool IsVectorTy = IntTy->getOpcode() == SPIRV::OpTypeVector; + unsigned Opcode = IsVectorTy ? SPIRV::OpBitwiseAndV : SPIRV::OpBitwiseAndS; + Register Zero = buildZerosVal(IntTy, I); + Register One = buildOnesVal(false, IntTy, I); + MachineBasicBlock &BB = *I.getParent(); + BuildMI(BB, I, I.getDebugLoc(), TII.get(Opcode)) + .addDef(BitIntReg) + .addUse(GR.getSPIRVTypeID(IntTy)) + .addUse(IntReg) + .addUse(One) + .constrainAllUses(TII, TRI, RBI); + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpINotEqual)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(BoolTy)) + .addUse(BitIntReg) + .addUse(Zero) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectTrunc(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + if (GR.isScalarOrVectorOfType(ResVReg, SPIRV::OpTypeBool)) { + Register IntReg = I.getOperand(1).getReg(); + const SPIRVType *ArgType = GR.getSPIRVTypeForVReg(IntReg); + return selectIntToBool(IntReg, ResVReg, ArgType, ResType, I); + } + bool IsSigned = GR.isScalarOrVectorSigned(ResType); + unsigned Opcode = IsSigned ? SPIRV::OpSConvert : SPIRV::OpUConvert; + return selectUnOp(ResVReg, ResType, I, Opcode); +} + +bool SPIRVInstructionSelector::selectConst(Register ResVReg, + const SPIRVType *ResType, + const APInt &Imm, + MachineInstr &I) const { + assert(ResType->getOpcode() != SPIRV::OpTypePointer || Imm.isNullValue()); + MachineBasicBlock &BB = *I.getParent(); + if (ResType->getOpcode() == SPIRV::OpTypePointer && Imm.isNullValue()) { + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .constrainAllUses(TII, TRI, RBI); + } + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)); + // <=32-bit integers should be caught by the sdag pattern. + assert(Imm.getBitWidth() > 32); + addNumImm(Imm, MIB); + return MIB.constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectOpUndef(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpUndef)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .constrainAllUses(TII, TRI, RBI); +} + +static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI) { + assert(MO.isReg()); + const SPIRVType *TypeInst = MRI->getVRegDef(MO.getReg()); + if (TypeInst->getOpcode() != SPIRV::ASSIGN_TYPE) + return false; + assert(TypeInst->getOperand(1).isReg()); + MachineInstr *ImmInst = MRI->getVRegDef(TypeInst->getOperand(1).getReg()); + return ImmInst->getOpcode() == TargetOpcode::G_CONSTANT; +} + +static int64_t foldImm(const MachineOperand &MO, MachineRegisterInfo *MRI) { + const SPIRVType *TypeInst = MRI->getVRegDef(MO.getReg()); + MachineInstr *ImmInst = MRI->getVRegDef(TypeInst->getOperand(1).getReg()); + assert(ImmInst->getOpcode() == TargetOpcode::G_CONSTANT); + return ImmInst->getOperand(1).getCImm()->getZExtValue(); +} + +bool SPIRVInstructionSelector::selectInsertVal(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + MachineBasicBlock &BB = *I.getParent(); + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeInsert)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + // object to insert + .addUse(I.getOperand(3).getReg()) + // composite to insert into + .addUse(I.getOperand(2).getReg()) + // TODO: support arbitrary number of indices + .addImm(foldImm(I.getOperand(4), MRI)) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectExtractVal(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + MachineBasicBlock &BB = *I.getParent(); + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(I.getOperand(2).getReg()) + // TODO: support arbitrary number of indices + .addImm(foldImm(I.getOperand(3), MRI)) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectInsertElt(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + if (isImm(I.getOperand(4), MRI)) + return selectInsertVal(ResVReg, ResType, I); + MachineBasicBlock &BB = *I.getParent(); + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpVectorInsertDynamic)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(I.getOperand(2).getReg()) + .addUse(I.getOperand(3).getReg()) + .addUse(I.getOperand(4).getReg()) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectExtractElt(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + if (isImm(I.getOperand(3), MRI)) + return selectExtractVal(ResVReg, ResType, I); + MachineBasicBlock &BB = *I.getParent(); + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpVectorExtractDynamic)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(I.getOperand(2).getReg()) + .addUse(I.getOperand(3).getReg()) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectGEP(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + // In general we should also support OpAccessChain instrs here (i.e. not + // PtrAccessChain) but SPIRV-LLVM Translator doesn't emit them at all and so + // do we to stay compliant with its test and more importantly consumers. + unsigned Opcode = I.getOperand(2).getImm() ? SPIRV::OpInBoundsPtrAccessChain + : SPIRV::OpPtrAccessChain; + auto Res = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + // Object to get a pointer to. + .addUse(I.getOperand(3).getReg()); + // Adding indices. + for (unsigned i = 4; i < I.getNumExplicitOperands(); ++i) + Res.addUse(I.getOperand(i).getReg()); + return Res.constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + MachineBasicBlock &BB = *I.getParent(); + switch (I.getIntrinsicID()) { + case Intrinsic::spv_load: + return selectLoad(ResVReg, ResType, I); + break; + case Intrinsic::spv_store: + return selectStore(I); + break; + case Intrinsic::spv_extractv: + return selectExtractVal(ResVReg, ResType, I); + break; + case Intrinsic::spv_insertv: + return selectInsertVal(ResVReg, ResType, I); + break; + case Intrinsic::spv_extractelt: + return selectExtractElt(ResVReg, ResType, I); + break; + case Intrinsic::spv_insertelt: + return selectInsertElt(ResVReg, ResType, I); + break; + case Intrinsic::spv_gep: + return selectGEP(ResVReg, ResType, I); + break; + case Intrinsic::spv_unref_global: + case Intrinsic::spv_init_global: { + MachineInstr *MI = MRI->getVRegDef(I.getOperand(1).getReg()); + MachineInstr *Init = I.getNumExplicitOperands() > 2 + ? MRI->getVRegDef(I.getOperand(2).getReg()) + : nullptr; + assert(MI); + return selectGlobalValue(MI->getOperand(0).getReg(), *MI, Init); + } break; + case Intrinsic::spv_const_composite: { + // If no values are attached, the composite is null constant. + bool IsNull = I.getNumExplicitDefs() + 1 == I.getNumExplicitOperands(); + unsigned Opcode = + IsNull ? SPIRV::OpConstantNull : SPIRV::OpConstantComposite; + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(Opcode)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)); + // skip type MD node we already used when generated assign.type for this + if (!IsNull) { + for (unsigned i = I.getNumExplicitDefs() + 1; + i < I.getNumExplicitOperands(); ++i) { + MIB.addUse(I.getOperand(i).getReg()); + } + } + return MIB.constrainAllUses(TII, TRI, RBI); + } break; + case Intrinsic::spv_assign_name: { + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpName)); + MIB.addUse(I.getOperand(I.getNumExplicitDefs() + 1).getReg()); + for (unsigned i = I.getNumExplicitDefs() + 2; + i < I.getNumExplicitOperands(); ++i) { + MIB.addImm(I.getOperand(i).getImm()); + } + return MIB.constrainAllUses(TII, TRI, RBI); + } break; + case Intrinsic::spv_switch: { + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpSwitch)); + for (unsigned i = 1; i < I.getNumExplicitOperands(); ++i) { + if (I.getOperand(i).isReg()) + MIB.addReg(I.getOperand(i).getReg()); + else if (I.getOperand(i).isCImm()) + addNumImm(I.getOperand(i).getCImm()->getValue(), MIB); + else if (I.getOperand(i).isMBB()) + MIB.addMBB(I.getOperand(i).getMBB()); + else + llvm_unreachable("Unexpected OpSwitch operand"); + } + return MIB.constrainAllUses(TII, TRI, RBI); + } break; + default: + llvm_unreachable("Intrinsic selection not implemented"); + } + return true; +} + +bool SPIRVInstructionSelector::selectFrameIndex(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVariable)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addImm(static_cast(SPIRV::StorageClass::Function)) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectBranch(MachineInstr &I) const { + // InstructionSelector walks backwards through the instructions. We can use + // both a G_BR and a G_BRCOND to create an OpBranchConditional. We hit G_BR + // first, so can generate an OpBranchConditional here. If there is no + // G_BRCOND, we just use OpBranch for a regular unconditional branch. + const MachineInstr *PrevI = I.getPrevNode(); + MachineBasicBlock &MBB = *I.getParent(); + if (PrevI != nullptr && PrevI->getOpcode() == TargetOpcode::G_BRCOND) { + return BuildMI(MBB, I, I.getDebugLoc(), TII.get(SPIRV::OpBranchConditional)) + .addUse(PrevI->getOperand(0).getReg()) + .addMBB(PrevI->getOperand(1).getMBB()) + .addMBB(I.getOperand(0).getMBB()) + .constrainAllUses(TII, TRI, RBI); + } + return BuildMI(MBB, I, I.getDebugLoc(), TII.get(SPIRV::OpBranch)) + .addMBB(I.getOperand(0).getMBB()) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectBranchCond(MachineInstr &I) const { + // InstructionSelector walks backwards through the instructions. For an + // explicit conditional branch with no fallthrough, we use both a G_BR and a + // G_BRCOND to create an OpBranchConditional. We should hit G_BR first, and + // generate the OpBranchConditional in selectBranch above. + // + // If an OpBranchConditional has been generated, we simply return, as the work + // is alread done. If there is no OpBranchConditional, LLVM must be relying on + // implicit fallthrough to the next basic block, so we need to create an + // OpBranchConditional with an explicit "false" argument pointing to the next + // basic block that LLVM would fall through to. + const MachineInstr *NextI = I.getNextNode(); + // Check if this has already been successfully selected. + if (NextI != nullptr && NextI->getOpcode() == SPIRV::OpBranchConditional) + return true; + // Must be relying on implicit block fallthrough, so generate an + // OpBranchConditional with the "next" basic block as the "false" target. + MachineBasicBlock &MBB = *I.getParent(); + unsigned NextMBBNum = MBB.getNextNode()->getNumber(); + MachineBasicBlock *NextMBB = I.getMF()->getBlockNumbered(NextMBBNum); + return BuildMI(MBB, I, I.getDebugLoc(), TII.get(SPIRV::OpBranchConditional)) + .addUse(I.getOperand(0).getReg()) + .addMBB(I.getOperand(1).getMBB()) + .addMBB(NextMBB) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectPhi(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpPhi)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)); + const unsigned NumOps = I.getNumOperands(); + for (unsigned i = 1; i < NumOps; i += 2) { + MIB.addUse(I.getOperand(i + 0).getReg()); + MIB.addMBB(I.getOperand(i + 1).getMBB()); + } + return MIB.constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectGlobalValue( + Register ResVReg, MachineInstr &I, const MachineInstr *Init) const { + // FIXME: don't use MachineIRBuilder here, replace it with BuildMI. + MachineIRBuilder MIRBuilder(I); + const GlobalValue *GV = I.getOperand(1).getGlobal(); + SPIRVType *ResType = GR.getOrCreateSPIRVType( + GV->getType(), MIRBuilder, SPIRV::AccessQualifier::ReadWrite, false); + + std::string GlobalIdent = GV->getGlobalIdentifier(); + // TODO: suport @llvm.global.annotations. + auto GlobalVar = cast(GV); + + bool HasInit = GlobalVar->hasInitializer() && + !isa(GlobalVar->getInitializer()); + // Skip empty declaration for GVs with initilaizers till we get the decl with + // passed initializer. + if (HasInit && !Init) + return true; + + unsigned AddrSpace = GV->getAddressSpace(); + SPIRV::StorageClass Storage = addressSpaceToStorageClass(AddrSpace); + bool HasLnkTy = GV->getLinkage() != GlobalValue::InternalLinkage && + Storage != SPIRV::StorageClass::Function; + SPIRV::LinkageType LnkType = + (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) + ? SPIRV::LinkageType::Import + : SPIRV::LinkageType::Export; + + Register Reg = GR.buildGlobalVariable(ResVReg, ResType, GlobalIdent, GV, + Storage, Init, GlobalVar->isConstant(), + HasLnkTy, LnkType, MIRBuilder, true); + return Reg.isValid(); +} + +namespace llvm { +InstructionSelector * +createSPIRVInstructionSelector(const SPIRVTargetMachine &TM, + const SPIRVSubtarget &Subtarget, + const RegisterBankInfo &RBI) { + return new SPIRVInstructionSelector(TM, Subtarget, RBI); +} +} // namespace llvm diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp new file mode 100644 index 000000000000..87f9e9545dd3 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp @@ -0,0 +1,301 @@ +//===- SPIRVLegalizerInfo.cpp --- SPIR-V Legalization Rules ------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the targeting of the Machinelegalizer class for SPIR-V. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVLegalizerInfo.h" +#include "SPIRV.h" +#include "SPIRVGlobalRegistry.h" +#include "SPIRVSubtarget.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" + +using namespace llvm; +using namespace llvm::LegalizeActions; +using namespace llvm::LegalityPredicates; + +static const std::set TypeFoldingSupportingOpcs = { + TargetOpcode::G_ADD, + TargetOpcode::G_FADD, + TargetOpcode::G_SUB, + TargetOpcode::G_FSUB, + TargetOpcode::G_MUL, + TargetOpcode::G_FMUL, + TargetOpcode::G_SDIV, + TargetOpcode::G_UDIV, + TargetOpcode::G_FDIV, + TargetOpcode::G_SREM, + TargetOpcode::G_UREM, + TargetOpcode::G_FREM, + TargetOpcode::G_FNEG, + TargetOpcode::G_CONSTANT, + TargetOpcode::G_FCONSTANT, + TargetOpcode::G_AND, + TargetOpcode::G_OR, + TargetOpcode::G_XOR, + TargetOpcode::G_SHL, + TargetOpcode::G_ASHR, + TargetOpcode::G_LSHR, + TargetOpcode::G_SELECT, + TargetOpcode::G_EXTRACT_VECTOR_ELT, +}; + +bool isTypeFoldingSupported(unsigned Opcode) { + return TypeFoldingSupportingOpcs.count(Opcode) > 0; +} + +SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) { + using namespace TargetOpcode; + + this->ST = &ST; + GR = ST.getSPIRVGlobalRegistry(); + + const LLT s1 = LLT::scalar(1); + const LLT s8 = LLT::scalar(8); + const LLT s16 = LLT::scalar(16); + const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); + + const LLT v16s64 = LLT::fixed_vector(16, 64); + const LLT v16s32 = LLT::fixed_vector(16, 32); + const LLT v16s16 = LLT::fixed_vector(16, 16); + const LLT v16s8 = LLT::fixed_vector(16, 8); + const LLT v16s1 = LLT::fixed_vector(16, 1); + + const LLT v8s64 = LLT::fixed_vector(8, 64); + const LLT v8s32 = LLT::fixed_vector(8, 32); + const LLT v8s16 = LLT::fixed_vector(8, 16); + const LLT v8s8 = LLT::fixed_vector(8, 8); + const LLT v8s1 = LLT::fixed_vector(8, 1); + + const LLT v4s64 = LLT::fixed_vector(4, 64); + const LLT v4s32 = LLT::fixed_vector(4, 32); + const LLT v4s16 = LLT::fixed_vector(4, 16); + const LLT v4s8 = LLT::fixed_vector(4, 8); + const LLT v4s1 = LLT::fixed_vector(4, 1); + + const LLT v3s64 = LLT::fixed_vector(3, 64); + const LLT v3s32 = LLT::fixed_vector(3, 32); + const LLT v3s16 = LLT::fixed_vector(3, 16); + const LLT v3s8 = LLT::fixed_vector(3, 8); + const LLT v3s1 = LLT::fixed_vector(3, 1); + + const LLT v2s64 = LLT::fixed_vector(2, 64); + const LLT v2s32 = LLT::fixed_vector(2, 32); + const LLT v2s16 = LLT::fixed_vector(2, 16); + const LLT v2s8 = LLT::fixed_vector(2, 8); + const LLT v2s1 = LLT::fixed_vector(2, 1); + + const unsigned PSize = ST.getPointerSize(); + const LLT p0 = LLT::pointer(0, PSize); // Function + const LLT p1 = LLT::pointer(1, PSize); // CrossWorkgroup + const LLT p2 = LLT::pointer(2, PSize); // UniformConstant + const LLT p3 = LLT::pointer(3, PSize); // Workgroup + const LLT p4 = LLT::pointer(4, PSize); // Generic + const LLT p5 = LLT::pointer(5, PSize); // Input + + // TODO: remove copy-pasting here by using concatenation in some way. + auto allPtrsScalarsAndVectors = { + p0, p1, p2, p3, p4, p5, s1, s8, s16, + s32, s64, v2s1, v2s8, v2s16, v2s32, v2s64, v3s1, v3s8, + v3s16, v3s32, v3s64, v4s1, v4s8, v4s16, v4s32, v4s64, v8s1, + v8s8, v8s16, v8s32, v8s64, v16s1, v16s8, v16s16, v16s32, v16s64}; + + auto allScalarsAndVectors = { + s1, s8, s16, s32, s64, v2s1, v2s8, v2s16, v2s32, v2s64, + v3s1, v3s8, v3s16, v3s32, v3s64, v4s1, v4s8, v4s16, v4s32, v4s64, + v8s1, v8s8, v8s16, v8s32, v8s64, v16s1, v16s8, v16s16, v16s32, v16s64}; + + auto allIntScalarsAndVectors = {s8, s16, s32, s64, v2s8, v2s16, + v2s32, v2s64, v3s8, v3s16, v3s32, v3s64, + v4s8, v4s16, v4s32, v4s64, v8s8, v8s16, + v8s32, v8s64, v16s8, v16s16, v16s32, v16s64}; + + auto allBoolScalarsAndVectors = {s1, v2s1, v3s1, v4s1, v8s1, v16s1}; + + auto allIntScalars = {s8, s16, s32, s64}; + + auto allFloatScalarsAndVectors = { + s16, s32, s64, v2s16, v2s32, v2s64, v3s16, v3s32, v3s64, + v4s16, v4s32, v4s64, v8s16, v8s32, v8s64, v16s16, v16s32, v16s64}; + + auto allFloatAndIntScalars = allIntScalars; + + auto allPtrs = {p0, p1, p2, p3, p4, p5}; + auto allWritablePtrs = {p0, p1, p3, p4}; + + for (auto Opc : TypeFoldingSupportingOpcs) + getActionDefinitionsBuilder(Opc).custom(); + + getActionDefinitionsBuilder(G_GLOBAL_VALUE).alwaysLegal(); + + // TODO: add proper rules for vectors legalization. + getActionDefinitionsBuilder({G_BUILD_VECTOR, G_SHUFFLE_VECTOR}).alwaysLegal(); + + getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE}) + .legalIf(all(typeInSet(0, allWritablePtrs), typeInSet(1, allPtrs))); + + getActionDefinitionsBuilder(G_ADDRSPACE_CAST) + .legalForCartesianProduct(allPtrs, allPtrs); + + getActionDefinitionsBuilder({G_LOAD, G_STORE}).legalIf(typeInSet(1, allPtrs)); + + getActionDefinitionsBuilder(G_BITREVERSE).legalFor(allFloatScalarsAndVectors); + + getActionDefinitionsBuilder(G_FMA).legalFor(allFloatScalarsAndVectors); + + getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) + .legalForCartesianProduct(allIntScalarsAndVectors, + allFloatScalarsAndVectors); + + getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) + .legalForCartesianProduct(allFloatScalarsAndVectors, + allScalarsAndVectors); + + getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX, G_ABS}) + .legalFor(allIntScalarsAndVectors); + + getActionDefinitionsBuilder(G_CTPOP).legalForCartesianProduct( + allIntScalarsAndVectors, allIntScalarsAndVectors); + + getActionDefinitionsBuilder(G_PHI).legalFor(allPtrsScalarsAndVectors); + + getActionDefinitionsBuilder(G_BITCAST).legalIf(all( + typeInSet(0, allPtrsScalarsAndVectors), + typeInSet(1, allPtrsScalarsAndVectors), + LegalityPredicate(([=](const LegalityQuery &Query) { + return Query.Types[0].getSizeInBits() == Query.Types[1].getSizeInBits(); + })))); + + getActionDefinitionsBuilder(G_IMPLICIT_DEF).alwaysLegal(); + + getActionDefinitionsBuilder(G_INTTOPTR) + .legalForCartesianProduct(allPtrs, allIntScalars); + getActionDefinitionsBuilder(G_PTRTOINT) + .legalForCartesianProduct(allIntScalars, allPtrs); + getActionDefinitionsBuilder(G_PTR_ADD).legalForCartesianProduct( + allPtrs, allIntScalars); + + // ST.canDirectlyComparePointers() for pointer args is supported in + // legalizeCustom(). + getActionDefinitionsBuilder(G_ICMP).customIf( + all(typeInSet(0, allBoolScalarsAndVectors), + typeInSet(1, allPtrsScalarsAndVectors))); + + getActionDefinitionsBuilder(G_FCMP).legalIf( + all(typeInSet(0, allBoolScalarsAndVectors), + typeInSet(1, allFloatScalarsAndVectors))); + + getActionDefinitionsBuilder({G_ATOMICRMW_OR, G_ATOMICRMW_ADD, G_ATOMICRMW_AND, + G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, + G_ATOMICRMW_SUB, G_ATOMICRMW_XOR, + G_ATOMICRMW_UMAX, G_ATOMICRMW_UMIN}) + .legalForCartesianProduct(allIntScalars, allWritablePtrs); + + getActionDefinitionsBuilder(G_ATOMICRMW_XCHG) + .legalForCartesianProduct(allFloatAndIntScalars, allWritablePtrs); + + getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS).lower(); + // TODO: add proper legalization rules. + getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG).alwaysLegal(); + + getActionDefinitionsBuilder({G_UADDO, G_USUBO, G_SMULO, G_UMULO}) + .alwaysLegal(); + + // Extensions. + getActionDefinitionsBuilder({G_TRUNC, G_ZEXT, G_SEXT, G_ANYEXT}) + .legalForCartesianProduct(allScalarsAndVectors); + + // FP conversions. + getActionDefinitionsBuilder({G_FPTRUNC, G_FPEXT}) + .legalForCartesianProduct(allFloatScalarsAndVectors); + + // Pointer-handling. + getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); + + // Control-flow. + getActionDefinitionsBuilder(G_BRCOND).legalFor({s1}); + + getActionDefinitionsBuilder({G_FPOW, + G_FEXP, + G_FEXP2, + G_FLOG, + G_FLOG2, + G_FABS, + G_FMINNUM, + G_FMAXNUM, + G_FCEIL, + G_FCOS, + G_FSIN, + G_FSQRT, + G_FFLOOR, + G_FRINT, + G_FNEARBYINT, + G_INTRINSIC_ROUND, + G_INTRINSIC_TRUNC, + G_FMINIMUM, + G_FMAXIMUM, + G_INTRINSIC_ROUNDEVEN}) + .legalFor(allFloatScalarsAndVectors); + + getActionDefinitionsBuilder(G_FCOPYSIGN) + .legalForCartesianProduct(allFloatScalarsAndVectors, + allFloatScalarsAndVectors); + + getActionDefinitionsBuilder(G_FPOWI).legalForCartesianProduct( + allFloatScalarsAndVectors, allIntScalarsAndVectors); + + getLegacyLegalizerInfo().computeTables(); + verify(*ST.getInstrInfo()); +} + +static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpirvType, + LegalizerHelper &Helper, + MachineRegisterInfo &MRI, + SPIRVGlobalRegistry *GR) { + Register ConvReg = MRI.createGenericVirtualRegister(ConvTy); + GR->assignSPIRVTypeToVReg(SpirvType, ConvReg, Helper.MIRBuilder.getMF()); + Helper.MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT) + .addDef(ConvReg) + .addUse(Reg); + return ConvReg; +} + +bool SPIRVLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, + MachineInstr &MI) const { + auto Opc = MI.getOpcode(); + MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); + if (!isTypeFoldingSupported(Opc)) { + assert(Opc == TargetOpcode::G_ICMP); + assert(GR->getSPIRVTypeForVReg(MI.getOperand(0).getReg())); + auto &Op0 = MI.getOperand(2); + auto &Op1 = MI.getOperand(3); + Register Reg0 = Op0.getReg(); + Register Reg1 = Op1.getReg(); + CmpInst::Predicate Cond = + static_cast(MI.getOperand(1).getPredicate()); + if ((!ST->canDirectlyComparePointers() || + (Cond != CmpInst::ICMP_EQ && Cond != CmpInst::ICMP_NE)) && + MRI.getType(Reg0).isPointer() && MRI.getType(Reg1).isPointer()) { + LLT ConvT = LLT::scalar(ST->getPointerSize()); + Type *LLVMTy = IntegerType::get(MI.getMF()->getFunction().getContext(), + ST->getPointerSize()); + SPIRVType *SpirvTy = GR->getOrCreateSPIRVType(LLVMTy, Helper.MIRBuilder); + Op0.setReg(convertPtrToInt(Reg0, ConvT, SpirvTy, Helper, MRI, GR)); + Op1.setReg(convertPtrToInt(Reg1, ConvT, SpirvTy, Helper, MRI, GR)); + } + return true; + } + // TODO: implement legalization for other opcodes. + return true; +} diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h new file mode 100644 index 000000000000..2541ff29edb0 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h @@ -0,0 +1,36 @@ +//===- SPIRVLegalizerInfo.h --- SPIR-V Legalization Rules --------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the targeting of the MachineLegalizer class for SPIR-V. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVMACHINELEGALIZER_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVMACHINELEGALIZER_H + +#include "SPIRVGlobalRegistry.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +bool isTypeFoldingSupported(unsigned Opcode); + +namespace llvm { + +class LLVMContext; +class SPIRVSubtarget; + +// This class provides the information for legalizing SPIR-V instructions. +class SPIRVLegalizerInfo : public LegalizerInfo { + const SPIRVSubtarget *ST; + SPIRVGlobalRegistry *GR; + +public: + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + SPIRVLegalizerInfo(const SPIRVSubtarget &ST); +}; +} // namespace llvm +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVMACHINELEGALIZER_H diff --git a/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp b/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp new file mode 100644 index 000000000000..8e4ab973bf07 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp @@ -0,0 +1,58 @@ +//=- SPIRVMCInstLower.cpp - Convert SPIR-V MachineInstr to MCInst -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower SPIR-V MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVMCInstLower.h" +#include "SPIRV.h" +#include "SPIRVModuleAnalysis.h" +#include "SPIRVUtils.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/Constants.h" + +using namespace llvm; + +void SPIRVMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI, + SPIRV::ModuleAnalysisInfo *MAI) const { + OutMI.setOpcode(MI->getOpcode()); + const MachineFunction *MF = MI->getMF(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCOp; + switch (MO.getType()) { + default: + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_GlobalAddress: { + Register FuncReg = MAI->getFuncReg(MO.getGlobal()->getGlobalIdentifier()); + assert(FuncReg.isValid() && "Cannot find function Id"); + MCOp = MCOperand::createReg(FuncReg); + break; + } + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::createReg(MAI->getOrCreateMBBRegister(*MO.getMBB())); + break; + case MachineOperand::MO_Register: { + Register NewReg = MAI->getRegisterAlias(MF, MO.getReg()); + MCOp = MCOperand::createReg(NewReg.isValid() ? NewReg : MO.getReg()); + break; + } + case MachineOperand::MO_Immediate: + MCOp = MCOperand::createImm(MO.getImm()); + break; + case MachineOperand::MO_FPImmediate: + MCOp = MCOperand::createDFPImm( + MO.getFPImm()->getValueAPF().convertToFloat()); + break; + } + + OutMI.addOperand(MCOp); + } +} diff --git a/llvm/lib/Target/SPIRV/SPIRVMCInstLower.h b/llvm/lib/Target/SPIRV/SPIRVMCInstLower.h new file mode 100644 index 000000000000..8392656ed067 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVMCInstLower.h @@ -0,0 +1,29 @@ +//=- SPIRVMCInstLower.h -- Convert SPIR-V MachineInstr to MCInst --*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVMCINSTLOWER_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVMCINSTLOWER_H + +#include "llvm/Support/Compiler.h" + +namespace llvm { +class MCInst; +class MachineInstr; +namespace SPIRV { +struct ModuleAnalysisInfo; +} // namespace SPIRV + +// This class is used to lower a MachineInstr into an MCInst. +class LLVM_LIBRARY_VISIBILITY SPIRVMCInstLower { +public: + void lower(const MachineInstr *MI, MCInst &OutMI, + SPIRV::ModuleAnalysisInfo *MAI) const; +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVMCINSTLOWER_H diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp new file mode 100644 index 000000000000..fa78dd7942c6 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -0,0 +1,250 @@ +//===- SPIRVModuleAnalysis.cpp - analysis of global instrs & regs - C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The analysis collects instructions that should be output at the module level +// and performs the global register numbering. +// +// The results of this analysis are used in AsmPrinter to rename registers +// globally and to output required instructions at the module level. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVModuleAnalysis.h" +#include "SPIRV.h" +#include "SPIRVGlobalRegistry.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "TargetInfo/SPIRVTargetInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" + +using namespace llvm; + +#define DEBUG_TYPE "spirv-module-analysis" + +char llvm::SPIRVModuleAnalysis::ID = 0; + +namespace llvm { +void initializeSPIRVModuleAnalysisPass(PassRegistry &); +} // namespace llvm + +INITIALIZE_PASS(SPIRVModuleAnalysis, DEBUG_TYPE, "SPIRV module analysis", true, + true) + +// Retrieve an unsigned from an MDNode with a list of them as operands. +static unsigned getMetadataUInt(MDNode *MdNode, unsigned OpIndex, + unsigned DefaultVal = 0) { + if (MdNode && OpIndex < MdNode->getNumOperands()) { + const auto &Op = MdNode->getOperand(OpIndex); + return mdconst::extract(Op)->getZExtValue(); + } + return DefaultVal; +} + +void SPIRVModuleAnalysis::setBaseInfo(const Module &M) { + MAI.MaxID = 0; + for (int i = 0; i < SPIRV::NUM_MODULE_SECTIONS; i++) + MAI.MS[i].clear(); + MAI.RegisterAliasTable.clear(); + MAI.InstrsToDelete.clear(); + MAI.FuncNameMap.clear(); + MAI.GlobalVarList.clear(); + + // TODO: determine memory model and source language from the configuratoin. + MAI.Mem = SPIRV::MemoryModel::OpenCL; + MAI.SrcLang = SPIRV::SourceLanguage::OpenCL_C; + unsigned PtrSize = ST->getPointerSize(); + MAI.Addr = PtrSize == 32 ? SPIRV::AddressingModel::Physical32 + : PtrSize == 64 ? SPIRV::AddressingModel::Physical64 + : SPIRV::AddressingModel::Logical; + // Get the OpenCL version number from metadata. + // TODO: support other source languages. + MAI.SrcLangVersion = 0; + if (auto VerNode = M.getNamedMetadata("opencl.ocl.version")) { + // Construct version literal according to OpenCL 2.2 environment spec. + auto VersionMD = VerNode->getOperand(0); + unsigned MajorNum = getMetadataUInt(VersionMD, 0, 2); + unsigned MinorNum = getMetadataUInt(VersionMD, 1); + unsigned RevNum = getMetadataUInt(VersionMD, 2); + MAI.SrcLangVersion = 0 | (MajorNum << 16) | (MinorNum << 8) | RevNum; + } +} + +// True if there is an instruction in the MS list with all the same operands as +// the given instruction has (after the given starting index). +// TODO: maybe it needs to check Opcodes too. +static bool findSameInstrInMS(const MachineInstr &A, + SPIRV::ModuleSectionType MSType, + SPIRV::ModuleAnalysisInfo &MAI, + bool UpdateRegAliases, + unsigned StartOpIndex = 0) { + for (const auto *B : MAI.MS[MSType]) { + const unsigned NumAOps = A.getNumOperands(); + if (NumAOps == B->getNumOperands() && A.getNumDefs() == B->getNumDefs()) { + bool AllOpsMatch = true; + for (unsigned i = StartOpIndex; i < NumAOps && AllOpsMatch; ++i) { + if (A.getOperand(i).isReg() && B->getOperand(i).isReg()) { + Register RegA = A.getOperand(i).getReg(); + Register RegB = B->getOperand(i).getReg(); + AllOpsMatch = MAI.getRegisterAlias(A.getMF(), RegA) == + MAI.getRegisterAlias(B->getMF(), RegB); + } else { + AllOpsMatch = A.getOperand(i).isIdenticalTo(B->getOperand(i)); + } + } + if (AllOpsMatch) { + if (UpdateRegAliases) { + assert(A.getOperand(0).isReg() && B->getOperand(0).isReg()); + Register LocalReg = A.getOperand(0).getReg(); + Register GlobalReg = + MAI.getRegisterAlias(B->getMF(), B->getOperand(0).getReg()); + MAI.setRegisterAlias(A.getMF(), LocalReg, GlobalReg); + } + return true; + } + } + } + return false; +} + +// Look for IDs declared with Import linkage, and map the imported name string +// to the register defining that variable (which will usually be the result of +// an OpFunction). This lets us call externally imported functions using +// the correct ID registers. +void SPIRVModuleAnalysis::collectFuncNames(MachineInstr &MI, + const Function &F) { + if (MI.getOpcode() == SPIRV::OpDecorate) { + // If it's got Import linkage. + auto Dec = MI.getOperand(1).getImm(); + if (Dec == static_cast(SPIRV::Decoration::LinkageAttributes)) { + auto Lnk = MI.getOperand(MI.getNumOperands() - 1).getImm(); + if (Lnk == static_cast(SPIRV::LinkageType::Import)) { + // Map imported function name to function ID register. + std::string Name = getStringImm(MI, 2); + Register Target = MI.getOperand(0).getReg(); + // TODO: check defs from different MFs. + MAI.FuncNameMap[Name] = MAI.getRegisterAlias(MI.getMF(), Target); + } + } + } else if (MI.getOpcode() == SPIRV::OpFunction) { + // Record all internal OpFunction declarations. + Register Reg = MI.defs().begin()->getReg(); + Register GlobalReg = MAI.getRegisterAlias(MI.getMF(), Reg); + assert(GlobalReg.isValid()); + // TODO: check that it does not conflict with existing entries. + MAI.FuncNameMap[F.getGlobalIdentifier()] = GlobalReg; + } +} + +// Collect the given instruction in the specified MS. We assume global register +// numbering has already occurred by this point. We can directly compare reg +// arguments when detecting duplicates. +static void collectOtherInstr(MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI, + SPIRV::ModuleSectionType MSType, + bool IsConstOrType = false) { + MAI.setSkipEmission(&MI); + if (findSameInstrInMS(MI, MSType, MAI, IsConstOrType, IsConstOrType ? 1 : 0)) + return; // Found a duplicate, so don't add it. + // No duplicates, so add it. + MAI.MS[MSType].push_back(&MI); +} + +// Some global instructions make reference to function-local ID regs, so cannot +// be correctly collected until these registers are globally numbered. +void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) { + for (auto F = M.begin(), E = M.end(); F != E; ++F) { + if ((*F).isDeclaration()) + continue; + MachineFunction *MF = MMI->getMachineFunction(*F); + assert(MF); + unsigned FCounter = 0; + for (MachineBasicBlock &MBB : *MF) + for (MachineInstr &MI : MBB) { + if (MI.getOpcode() == SPIRV::OpFunction) + FCounter++; + if (MAI.getSkipEmission(&MI)) + continue; + const unsigned OpCode = MI.getOpcode(); + const bool IsFuncOrParm = + OpCode == SPIRV::OpFunction || OpCode == SPIRV::OpFunctionParameter; + const bool IsConstOrType = + TII->isConstantInstr(MI) || TII->isTypeDeclInstr(MI); + if (OpCode == SPIRV::OpName || OpCode == SPIRV::OpMemberName) { + collectOtherInstr(MI, MAI, SPIRV::MB_DebugNames); + } else if (OpCode == SPIRV::OpEntryPoint) { + collectOtherInstr(MI, MAI, SPIRV::MB_EntryPoints); + } else if (TII->isDecorationInstr(MI)) { + collectOtherInstr(MI, MAI, SPIRV::MB_Annotations); + collectFuncNames(MI, *F); + } else if (IsConstOrType || (FCounter > 1 && IsFuncOrParm)) { + // Now OpSpecConstant*s are not in DT, + // but they need to be collected anyway. + enum SPIRV::ModuleSectionType Type = + IsFuncOrParm ? SPIRV::MB_ExtFuncDecls : SPIRV::MB_TypeConstVars; + collectOtherInstr(MI, MAI, Type, IsConstOrType); + } else if (OpCode == SPIRV::OpFunction) { + collectFuncNames(MI, *F); + } + } + } +} + +// Number registers in all functions globally from 0 onwards and store +// the result in global register alias table. +void SPIRVModuleAnalysis::numberRegistersGlobally(const Module &M) { + for (auto F = M.begin(), E = M.end(); F != E; ++F) { + if ((*F).isDeclaration()) + continue; + MachineFunction *MF = MMI->getMachineFunction(*F); + assert(MF); + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + for (MachineOperand &Op : MI.operands()) { + if (!Op.isReg()) + continue; + Register Reg = Op.getReg(); + if (MAI.hasRegisterAlias(MF, Reg)) + continue; + Register NewReg = Register::index2VirtReg(MAI.getNextID()); + MAI.setRegisterAlias(MF, Reg, NewReg); + } + } + } + } +} + +struct SPIRV::ModuleAnalysisInfo SPIRVModuleAnalysis::MAI; + +void SPIRVModuleAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); +} + +bool SPIRVModuleAnalysis::runOnModule(Module &M) { + SPIRVTargetMachine &TM = + getAnalysis().getTM(); + ST = TM.getSubtargetImpl(); + GR = ST->getSPIRVGlobalRegistry(); + TII = ST->getInstrInfo(); + + MMI = &getAnalysis().getMMI(); + + setBaseInfo(M); + + // TODO: Process type/const/global var/func decl instructions, number their + // destination registers from 0 to N, collect Extensions and Capabilities. + + // Number rest of registers from N+1 onwards. + numberRegistersGlobally(M); + + // Collect OpName, OpEntryPoint, OpDecorate etc, process other instructions. + processOtherInstrs(M); + + return false; +} diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h new file mode 100644 index 000000000000..1bef13d458c1 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h @@ -0,0 +1,137 @@ +//===- SPIRVModuleAnalysis.h - analysis of global instrs & regs -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The analysis collects instructions that should be output at the module level +// and performs the global register numbering. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVMODULEANALYSIS_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVMODULEANALYSIS_H + +#include "MCTargetDesc/SPIRVBaseInfo.h" +#include "SPIRVSubtarget.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" + +namespace llvm { +class MachineFunction; +class MachineModuleInfo; + +namespace SPIRV { +// The enum contains logical module sections for the instruction collection. +enum ModuleSectionType { + // MB_Capabilities, MB_Extensions, MB_ExtInstImports, MB_MemoryModel, + MB_EntryPoints, // All OpEntryPoint instructions (if any). + // MB_ExecutionModes, MB_DebugSourceAndStrings, + MB_DebugNames, // All OpName and OpMemberName intrs. + MB_DebugModuleProcessed, // All OpModuleProcessed instructions. + MB_Annotations, // OpDecorate, OpMemberDecorate etc. + MB_TypeConstVars, // OpTypeXXX, OpConstantXXX, and global OpVariables. + MB_ExtFuncDecls, // OpFunction etc. to declare for external funcs. + NUM_MODULE_SECTIONS // Total number of sections requiring basic blocks. +}; + +using InstrList = SmallVector; +// Maps a local register to the corresponding global alias. +using LocalToGlobalRegTable = std::map; +using RegisterAliasMapTy = + std::map; + +// The struct contains results of the module analysis and methods +// to access them. +struct ModuleAnalysisInfo { + SPIRV::MemoryModel Mem; + SPIRV::AddressingModel Addr; + SPIRV::SourceLanguage SrcLang; + unsigned SrcLangVersion; + // Contains the list of all global OpVariables in the module. + SmallVector GlobalVarList; + // Maps function names to coresponding function ID registers. + StringMap FuncNameMap; + // The set contains machine instructions which are necessary + // for correct MIR but will not be emitted in function bodies. + DenseSet InstrsToDelete; + // The table contains global aliases of local registers for each machine + // function. The aliases are used to substitute local registers during + // code emission. + RegisterAliasMapTy RegisterAliasTable; + // The counter holds the maximum ID we have in the module. + unsigned MaxID; + // The array contains lists of MIs for each module section. + InstrList MS[NUM_MODULE_SECTIONS]; + // The table maps MBB number to SPIR-V unique ID register. + DenseMap BBNumToRegMap; + + Register getFuncReg(std::string FuncName) { + auto FuncReg = FuncNameMap.find(FuncName); + assert(FuncReg != FuncNameMap.end() && "Cannot find function Id"); + return FuncReg->second; + } + InstrList &getMSInstrs(unsigned MSType) { return MS[MSType]; } + void setSkipEmission(MachineInstr *MI) { InstrsToDelete.insert(MI); } + bool getSkipEmission(const MachineInstr *MI) { + return InstrsToDelete.contains(MI); + } + void setRegisterAlias(const MachineFunction *MF, Register Reg, + Register AliasReg) { + RegisterAliasTable[MF][Reg] = AliasReg; + } + Register getRegisterAlias(const MachineFunction *MF, Register Reg) { + auto RI = RegisterAliasTable[MF].find(Reg); + if (RI == RegisterAliasTable[MF].end()) { + return Register(0); + } + return RegisterAliasTable[MF][Reg]; + } + bool hasRegisterAlias(const MachineFunction *MF, Register Reg) { + return RegisterAliasTable.find(MF) != RegisterAliasTable.end() && + RegisterAliasTable[MF].find(Reg) != RegisterAliasTable[MF].end(); + } + unsigned getNextID() { return MaxID++; } + bool hasMBBRegister(const MachineBasicBlock &MBB) { + return BBNumToRegMap.find(MBB.getNumber()) != BBNumToRegMap.end(); + } + // Convert MBB's number to corresponding ID register. + Register getOrCreateMBBRegister(const MachineBasicBlock &MBB) { + auto f = BBNumToRegMap.find(MBB.getNumber()); + if (f != BBNumToRegMap.end()) + return f->second; + Register NewReg = Register::index2VirtReg(getNextID()); + BBNumToRegMap[MBB.getNumber()] = NewReg; + return NewReg; + } +}; +} // namespace SPIRV + +struct SPIRVModuleAnalysis : public ModulePass { + static char ID; + +public: + SPIRVModuleAnalysis() : ModulePass(ID) {} + + bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + static struct SPIRV::ModuleAnalysisInfo MAI; + +private: + void setBaseInfo(const Module &M); + template void collectTypesConstsVars(); + void processDefInstrs(const Module &M); + void collectFuncNames(MachineInstr &MI, const Function &F); + void processOtherInstrs(const Module &M); + void numberRegistersGlobally(const Module &M); + + const SPIRVSubtarget *ST; + SPIRVGlobalRegistry *GR; + const SPIRVInstrInfo *TII; + MachineModuleInfo *MMI; +}; +} // namespace llvm +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVMODULEANALYSIS_H diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp new file mode 100644 index 000000000000..687f84046650 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -0,0 +1,440 @@ +//===-- SPIRVPreLegalizer.cpp - prepare IR for legalization -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The pass prepares IR for legalization: it assigns SPIR-V types to registers +// and removes intrinsics which holded these types during IR translation. +// Also it processes constants and registers them in GR to avoid duplication. +// +//===----------------------------------------------------------------------===// + +#include "SPIRV.h" +#include "SPIRVGlobalRegistry.h" +#include "SPIRVSubtarget.h" +#include "SPIRVUtils.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/Target/TargetIntrinsicInfo.h" + +#define DEBUG_TYPE "spirv-prelegalizer" + +using namespace llvm; + +namespace { +class SPIRVPreLegalizer : public MachineFunctionPass { +public: + static char ID; + SPIRVPreLegalizer() : MachineFunctionPass(ID) { + initializeSPIRVPreLegalizerPass(*PassRegistry::getPassRegistry()); + } + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // namespace + +static bool isSpvIntrinsic(MachineInstr &MI, Intrinsic::ID IntrinsicID) { + if (MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS && + MI.getIntrinsicID() == IntrinsicID) + return true; + return false; +} + +static void foldConstantsIntoIntrinsics(MachineFunction &MF) { + SmallVector ToErase; + MachineRegisterInfo &MRI = MF.getRegInfo(); + const unsigned AssignNameOperandShift = 2; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!isSpvIntrinsic(MI, Intrinsic::spv_assign_name)) + continue; + unsigned NumOp = MI.getNumExplicitDefs() + AssignNameOperandShift; + while (MI.getOperand(NumOp).isReg()) { + MachineOperand &MOp = MI.getOperand(NumOp); + MachineInstr *ConstMI = MRI.getVRegDef(MOp.getReg()); + assert(ConstMI->getOpcode() == TargetOpcode::G_CONSTANT); + MI.removeOperand(NumOp); + MI.addOperand(MachineOperand::CreateImm( + ConstMI->getOperand(1).getCImm()->getZExtValue())); + if (MRI.use_empty(ConstMI->getOperand(0).getReg())) + ToErase.push_back(ConstMI); + } + } + } + for (MachineInstr *MI : ToErase) + MI->eraseFromParent(); +} + +static void insertBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR, + MachineIRBuilder MIB) { + SmallVector ToErase; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!isSpvIntrinsic(MI, Intrinsic::spv_bitcast)) + continue; + assert(MI.getOperand(2).isReg()); + MIB.setInsertPt(*MI.getParent(), MI); + MIB.buildBitcast(MI.getOperand(0).getReg(), MI.getOperand(2).getReg()); + ToErase.push_back(&MI); + } + } + for (MachineInstr *MI : ToErase) + MI->eraseFromParent(); +} + +// Translating GV, IRTranslator sometimes generates following IR: +// %1 = G_GLOBAL_VALUE +// %2 = COPY %1 +// %3 = G_ADDRSPACE_CAST %2 +// New registers have no SPIRVType and no register class info. +// +// Set SPIRVType for GV, propagate it from GV to other instructions, +// also set register classes. +static SPIRVType *propagateSPIRVType(MachineInstr *MI, SPIRVGlobalRegistry *GR, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIB) { + SPIRVType *SpirvTy = nullptr; + assert(MI && "Machine instr is expected"); + if (MI->getOperand(0).isReg()) { + Register Reg = MI->getOperand(0).getReg(); + SpirvTy = GR->getSPIRVTypeForVReg(Reg); + if (!SpirvTy) { + switch (MI->getOpcode()) { + case TargetOpcode::G_CONSTANT: { + MIB.setInsertPt(*MI->getParent(), MI); + Type *Ty = MI->getOperand(1).getCImm()->getType(); + SpirvTy = GR->getOrCreateSPIRVType(Ty, MIB); + break; + } + case TargetOpcode::G_GLOBAL_VALUE: { + MIB.setInsertPt(*MI->getParent(), MI); + Type *Ty = MI->getOperand(1).getGlobal()->getType(); + SpirvTy = GR->getOrCreateSPIRVType(Ty, MIB); + break; + } + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_ADDRSPACE_CAST: + case TargetOpcode::COPY: { + MachineOperand &Op = MI->getOperand(1); + MachineInstr *Def = Op.isReg() ? MRI.getVRegDef(Op.getReg()) : nullptr; + if (Def) + SpirvTy = propagateSPIRVType(Def, GR, MRI, MIB); + break; + } + default: + break; + } + if (SpirvTy) + GR->assignSPIRVTypeToVReg(SpirvTy, Reg, MIB.getMF()); + if (!MRI.getRegClassOrNull(Reg)) + MRI.setRegClass(Reg, &SPIRV::IDRegClass); + } + } + return SpirvTy; +} + +// Insert ASSIGN_TYPE instuction between Reg and its definition, set NewReg as +// a dst of the definition, assign SPIRVType to both registers. If SpirvTy is +// provided, use it as SPIRVType in ASSIGN_TYPE, otherwise create it from Ty. +// TODO: maybe move to SPIRVUtils. +static Register insertAssignInstr(Register Reg, Type *Ty, SPIRVType *SpirvTy, + SPIRVGlobalRegistry *GR, + MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) { + MachineInstr *Def = MRI.getVRegDef(Reg); + assert((Ty || SpirvTy) && "Either LLVM or SPIRV type is expected."); + MIB.setInsertPt(*Def->getParent(), + (Def->getNextNode() ? Def->getNextNode()->getIterator() + : Def->getParent()->end())); + Register NewReg = MRI.createGenericVirtualRegister(MRI.getType(Reg)); + if (auto *RC = MRI.getRegClassOrNull(Reg)) + MRI.setRegClass(NewReg, RC); + SpirvTy = SpirvTy ? SpirvTy : GR->getOrCreateSPIRVType(Ty, MIB); + GR->assignSPIRVTypeToVReg(SpirvTy, Reg, MIB.getMF()); + // This is to make it convenient for Legalizer to get the SPIRVType + // when processing the actual MI (i.e. not pseudo one). + GR->assignSPIRVTypeToVReg(SpirvTy, NewReg, MIB.getMF()); + MIB.buildInstr(SPIRV::ASSIGN_TYPE) + .addDef(Reg) + .addUse(NewReg) + .addUse(GR->getSPIRVTypeID(SpirvTy)); + Def->getOperand(0).setReg(NewReg); + MRI.setRegClass(Reg, &SPIRV::ANYIDRegClass); + return NewReg; +} + +static void generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, + MachineIRBuilder MIB) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + SmallVector ToErase; + + for (MachineBasicBlock *MBB : post_order(&MF)) { + if (MBB->empty()) + continue; + + bool ReachedBegin = false; + for (auto MII = std::prev(MBB->end()), Begin = MBB->begin(); + !ReachedBegin;) { + MachineInstr &MI = *MII; + + if (isSpvIntrinsic(MI, Intrinsic::spv_assign_type)) { + Register Reg = MI.getOperand(1).getReg(); + Type *Ty = getMDOperandAsType(MI.getOperand(2).getMetadata(), 0); + MachineInstr *Def = MRI.getVRegDef(Reg); + assert(Def && "Expecting an instruction that defines the register"); + // G_GLOBAL_VALUE already has type info. + if (Def->getOpcode() != TargetOpcode::G_GLOBAL_VALUE) + insertAssignInstr(Reg, Ty, nullptr, GR, MIB, MF.getRegInfo()); + ToErase.push_back(&MI); + } else if (MI.getOpcode() == TargetOpcode::G_CONSTANT || + MI.getOpcode() == TargetOpcode::G_FCONSTANT || + MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR) { + // %rc = G_CONSTANT ty Val + // ===> + // %cty = OpType* ty + // %rctmp = G_CONSTANT ty Val + // %rc = ASSIGN_TYPE %rctmp, %cty + Register Reg = MI.getOperand(0).getReg(); + if (MRI.hasOneUse(Reg)) { + MachineInstr &UseMI = *MRI.use_instr_begin(Reg); + if (isSpvIntrinsic(UseMI, Intrinsic::spv_assign_type) || + isSpvIntrinsic(UseMI, Intrinsic::spv_assign_name)) + continue; + } + Type *Ty = nullptr; + if (MI.getOpcode() == TargetOpcode::G_CONSTANT) + Ty = MI.getOperand(1).getCImm()->getType(); + else if (MI.getOpcode() == TargetOpcode::G_FCONSTANT) + Ty = MI.getOperand(1).getFPImm()->getType(); + else { + assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); + Type *ElemTy = nullptr; + MachineInstr *ElemMI = MRI.getVRegDef(MI.getOperand(1).getReg()); + assert(ElemMI); + + if (ElemMI->getOpcode() == TargetOpcode::G_CONSTANT) + ElemTy = ElemMI->getOperand(1).getCImm()->getType(); + else if (ElemMI->getOpcode() == TargetOpcode::G_FCONSTANT) + ElemTy = ElemMI->getOperand(1).getFPImm()->getType(); + else + llvm_unreachable("Unexpected opcode"); + unsigned NumElts = + MI.getNumExplicitOperands() - MI.getNumExplicitDefs(); + Ty = VectorType::get(ElemTy, NumElts, false); + } + insertAssignInstr(Reg, Ty, nullptr, GR, MIB, MRI); + } else if (MI.getOpcode() == TargetOpcode::G_TRUNC || + MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE || + MI.getOpcode() == TargetOpcode::COPY || + MI.getOpcode() == TargetOpcode::G_ADDRSPACE_CAST) { + propagateSPIRVType(&MI, GR, MRI, MIB); + } + + if (MII == Begin) + ReachedBegin = true; + else + --MII; + } + } + for (MachineInstr *MI : ToErase) + MI->eraseFromParent(); +} + +static std::pair +createNewIdReg(Register ValReg, unsigned Opcode, MachineRegisterInfo &MRI, + const SPIRVGlobalRegistry &GR) { + LLT NewT = LLT::scalar(32); + SPIRVType *SpvType = GR.getSPIRVTypeForVReg(ValReg); + assert(SpvType && "VReg is expected to have SPIRV type"); + bool IsFloat = SpvType->getOpcode() == SPIRV::OpTypeFloat; + bool IsVectorFloat = + SpvType->getOpcode() == SPIRV::OpTypeVector && + GR.getSPIRVTypeForVReg(SpvType->getOperand(1).getReg())->getOpcode() == + SPIRV::OpTypeFloat; + IsFloat |= IsVectorFloat; + auto GetIdOp = IsFloat ? SPIRV::GET_fID : SPIRV::GET_ID; + auto DstClass = IsFloat ? &SPIRV::fIDRegClass : &SPIRV::IDRegClass; + if (MRI.getType(ValReg).isPointer()) { + NewT = LLT::pointer(0, 32); + GetIdOp = SPIRV::GET_pID; + DstClass = &SPIRV::pIDRegClass; + } else if (MRI.getType(ValReg).isVector()) { + NewT = LLT::fixed_vector(2, NewT); + GetIdOp = IsFloat ? SPIRV::GET_vfID : SPIRV::GET_vID; + DstClass = IsFloat ? &SPIRV::vfIDRegClass : &SPIRV::vIDRegClass; + } + Register IdReg = MRI.createGenericVirtualRegister(NewT); + MRI.setRegClass(IdReg, DstClass); + return {IdReg, GetIdOp}; +} + +static void processInstr(MachineInstr &MI, MachineIRBuilder &MIB, + MachineRegisterInfo &MRI, SPIRVGlobalRegistry *GR) { + unsigned Opc = MI.getOpcode(); + assert(MI.getNumDefs() > 0 && MRI.hasOneUse(MI.getOperand(0).getReg())); + MachineInstr &AssignTypeInst = + *(MRI.use_instr_begin(MI.getOperand(0).getReg())); + auto NewReg = createNewIdReg(MI.getOperand(0).getReg(), Opc, MRI, *GR).first; + AssignTypeInst.getOperand(1).setReg(NewReg); + MI.getOperand(0).setReg(NewReg); + MIB.setInsertPt(*MI.getParent(), + (MI.getNextNode() ? MI.getNextNode()->getIterator() + : MI.getParent()->end())); + for (auto &Op : MI.operands()) { + if (!Op.isReg() || Op.isDef()) + continue; + auto IdOpInfo = createNewIdReg(Op.getReg(), Opc, MRI, *GR); + MIB.buildInstr(IdOpInfo.second).addDef(IdOpInfo.first).addUse(Op.getReg()); + Op.setReg(IdOpInfo.first); + } +} + +// Defined in SPIRVLegalizerInfo.cpp. +extern bool isTypeFoldingSupported(unsigned Opcode); + +static void processInstrsWithTypeFolding(MachineFunction &MF, + SPIRVGlobalRegistry *GR, + MachineIRBuilder MIB) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (isTypeFoldingSupported(MI.getOpcode())) + processInstr(MI, MIB, MRI, GR); + } + } +} + +static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR, + MachineIRBuilder MIB) { + DenseMap> + SwitchRegToMBB; + DenseMap DefaultMBBs; + DenseSet SwitchRegs; + MachineRegisterInfo &MRI = MF.getRegInfo(); + // Before IRTranslator pass, spv_switch calls are inserted before each + // switch instruction. IRTranslator lowers switches to ICMP+CBr+Br triples. + // A switch with two cases may be translated to this MIR sequesnce: + // intrinsic(@llvm.spv.switch), %CmpReg, %Const0, %Const1 + // %Dst0 = G_ICMP intpred(eq), %CmpReg, %Const0 + // G_BRCOND %Dst0, %bb.2 + // G_BR %bb.5 + // bb.5.entry: + // %Dst1 = G_ICMP intpred(eq), %CmpReg, %Const1 + // G_BRCOND %Dst1, %bb.3 + // G_BR %bb.4 + // bb.2.sw.bb: + // ... + // bb.3.sw.bb1: + // ... + // bb.4.sw.epilog: + // ... + // Walk MIs and collect information about destination MBBs to update + // spv_switch call. We assume that all spv_switch precede corresponding ICMPs. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (isSpvIntrinsic(MI, Intrinsic::spv_switch)) { + assert(MI.getOperand(1).isReg()); + Register Reg = MI.getOperand(1).getReg(); + SwitchRegs.insert(Reg); + // Set the first successor as default MBB to support empty switches. + DefaultMBBs[Reg] = *MBB.succ_begin(); + } + // Process only ICMPs that relate to spv_switches. + if (MI.getOpcode() == TargetOpcode::G_ICMP && MI.getOperand(2).isReg() && + SwitchRegs.contains(MI.getOperand(2).getReg())) { + assert(MI.getOperand(0).isReg() && MI.getOperand(1).isPredicate() && + MI.getOperand(3).isReg()); + Register Dst = MI.getOperand(0).getReg(); + // Set type info for destination register of switch's ICMP instruction. + if (GR->getSPIRVTypeForVReg(Dst) == nullptr) { + MIB.setInsertPt(*MI.getParent(), MI); + Type *LLVMTy = IntegerType::get(MF.getFunction().getContext(), 1); + SPIRVType *SpirvTy = GR->getOrCreateSPIRVType(LLVMTy, MIB); + MRI.setRegClass(Dst, &SPIRV::IDRegClass); + GR->assignSPIRVTypeToVReg(SpirvTy, Dst, MIB.getMF()); + } + Register CmpReg = MI.getOperand(2).getReg(); + MachineOperand &PredOp = MI.getOperand(1); + const auto CC = static_cast(PredOp.getPredicate()); + assert(CC == CmpInst::ICMP_EQ && MRI.hasOneUse(Dst) && + MRI.hasOneDef(CmpReg)); + uint64_t Val = getIConstVal(MI.getOperand(3).getReg(), &MRI); + MachineInstr *CBr = MRI.use_begin(Dst)->getParent(); + assert(CBr->getOpcode() == SPIRV::G_BRCOND && + CBr->getOperand(1).isMBB()); + SwitchRegToMBB[CmpReg][Val] = CBr->getOperand(1).getMBB(); + // The next MI is always BR to either the next case or the default. + MachineInstr *NextMI = CBr->getNextNode(); + assert(NextMI->getOpcode() == SPIRV::G_BR && + NextMI->getOperand(0).isMBB()); + MachineBasicBlock *NextMBB = NextMI->getOperand(0).getMBB(); + assert(NextMBB != nullptr); + // The default MBB is not started by ICMP with switch's cmp register. + if (NextMBB->front().getOpcode() != SPIRV::G_ICMP || + (NextMBB->front().getOperand(2).isReg() && + NextMBB->front().getOperand(2).getReg() != CmpReg)) + DefaultMBBs[CmpReg] = NextMBB; + } + } + } + // Modify spv_switch's operands by collected values. For the example above, + // the result will be like this: + // intrinsic(@llvm.spv.switch), %CmpReg, %bb.4, i32 0, %bb.2, i32 1, %bb.3 + // Note that ICMP+CBr+Br sequences are not removed, but ModuleAnalysis marks + // them as skipped and AsmPrinter does not output them. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!isSpvIntrinsic(MI, Intrinsic::spv_switch)) + continue; + assert(MI.getOperand(1).isReg()); + Register Reg = MI.getOperand(1).getReg(); + unsigned NumOp = MI.getNumExplicitOperands(); + SmallVector Vals; + SmallVector MBBs; + for (unsigned i = 2; i < NumOp; i++) { + Register CReg = MI.getOperand(i).getReg(); + uint64_t Val = getIConstVal(CReg, &MRI); + MachineInstr *ConstInstr = getDefInstrMaybeConstant(CReg, &MRI); + Vals.push_back(ConstInstr->getOperand(1).getCImm()); + MBBs.push_back(SwitchRegToMBB[Reg][Val]); + } + for (unsigned i = MI.getNumExplicitOperands() - 1; i > 1; i--) + MI.removeOperand(i); + MI.addOperand(MachineOperand::CreateMBB(DefaultMBBs[Reg])); + for (unsigned i = 0; i < Vals.size(); i++) { + MI.addOperand(MachineOperand::CreateCImm(Vals[i])); + MI.addOperand(MachineOperand::CreateMBB(MBBs[i])); + } + } + } +} + +bool SPIRVPreLegalizer::runOnMachineFunction(MachineFunction &MF) { + // Initialize the type registry. + const SPIRVSubtarget &ST = MF.getSubtarget(); + SPIRVGlobalRegistry *GR = ST.getSPIRVGlobalRegistry(); + GR->setCurrentFunc(MF); + MachineIRBuilder MIB(MF); + foldConstantsIntoIntrinsics(MF); + insertBitcasts(MF, GR, MIB); + generateAssignInstrs(MF, GR, MIB); + processInstrsWithTypeFolding(MF, GR, MIB); + processSwitches(MF, GR, MIB); + + return true; +} + +INITIALIZE_PASS(SPIRVPreLegalizer, DEBUG_TYPE, "SPIRV pre legalizer", false, + false) + +char SPIRVPreLegalizer::ID = 0; + +FunctionPass *llvm::createSPIRVPreLegalizerPass() { + return new SPIRVPreLegalizer(); +} diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.cpp new file mode 100644 index 000000000000..9bf9d7fe5b39 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.cpp @@ -0,0 +1,47 @@ +//===- SPIRVRegisterBankInfo.cpp ------------------------------*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the targeting of the RegisterBankInfo class for SPIR-V. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVRegisterBankInfo.h" +#include "SPIRVRegisterInfo.h" +#include "llvm/CodeGen/RegisterBank.h" + +#define GET_REGINFO_ENUM +#include "SPIRVGenRegisterInfo.inc" + +#define GET_TARGET_REGBANK_IMPL +#include "SPIRVGenRegisterBank.inc" + +using namespace llvm; + +// This required for .td selection patterns to work or we'd end up with RegClass +// checks being redundant as all the classes would be mapped to the same bank. +const RegisterBank & +SPIRVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, + LLT Ty) const { + switch (RC.getID()) { + case SPIRV::TYPERegClassID: + return SPIRV::TYPERegBank; + case SPIRV::pIDRegClassID: + case SPIRV::IDRegClassID: + return SPIRV::IDRegBank; + case SPIRV::fIDRegClassID: + return SPIRV::fIDRegBank; + case SPIRV::vIDRegClassID: + return SPIRV::vIDRegBank; + case SPIRV::vfIDRegClassID: + return SPIRV::vfIDRegBank; + case SPIRV::ANYIDRegClassID: + case SPIRV::ANYRegClassID: + return SPIRV::IDRegBank; + } + llvm_unreachable("Unknown register class"); +} diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.h b/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.h new file mode 100644 index 000000000000..67ddcdefb7dd --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.h @@ -0,0 +1,38 @@ +//===- SPIRVRegisterBankInfo.h -----------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the targeting of the RegisterBankInfo class for SPIR-V. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVREGISTERBANKINFO_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVREGISTERBANKINFO_H + +#include "llvm/CodeGen/RegisterBankInfo.h" + +#define GET_REGBANK_DECLARATIONS +#include "SPIRVGenRegisterBank.inc" + +namespace llvm { + +class TargetRegisterInfo; + +class SPIRVGenRegisterBankInfo : public RegisterBankInfo { +protected: +#define GET_TARGET_REGBANK_CLASS +#include "SPIRVGenRegisterBank.inc" +}; + +// This class provides the information for the target register banks. +class SPIRVRegisterBankInfo final : public SPIRVGenRegisterBankInfo { +public: + const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC, + LLT Ty) const override; +}; +} // namespace llvm +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVREGISTERBANKINFO_H diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterBanks.td b/llvm/lib/Target/SPIRV/SPIRVRegisterBanks.td new file mode 100644 index 000000000000..90c7f3a6e672 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVRegisterBanks.td @@ -0,0 +1,15 @@ +//===-- SPIRVRegisterBanks.td - Describe SPIR-V RegBanks ---*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Although RegisterBankSelection is disabled we need to distinct the banks +// as InstructionSelector RegClass checking code relies on them +def IDRegBank : RegisterBank<"IDBank", [ID]>; +def fIDRegBank : RegisterBank<"fIDBank", [fID]>; +def vIDRegBank : RegisterBank<"vIDBank", [vID]>; +def vfIDRegBank : RegisterBank<"vfIDBank", [vfID]>; +def TYPERegBank : RegisterBank<"TYPEBank", [TYPE]>; diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.cpp new file mode 100644 index 000000000000..cf8a967d59c4 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.cpp @@ -0,0 +1,32 @@ +//===-- SPIRVRegisterInfo.cpp - SPIR-V Register Information -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the SPIR-V implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVRegisterInfo.h" +#include "SPIRV.h" +#include "SPIRVSubtarget.h" +#include "llvm/CodeGen/MachineFunction.h" + +#define GET_REGINFO_TARGET_DESC +#include "SPIRVGenRegisterInfo.inc" +using namespace llvm; + +SPIRVRegisterInfo::SPIRVRegisterInfo() : SPIRVGenRegisterInfo(SPIRV::ID0) {} + +BitVector SPIRVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + return BitVector(getNumRegs()); +} + +const MCPhysReg * +SPIRVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + static const MCPhysReg CalleeSavedReg = {0}; + return &CalleeSavedReg; +} diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.h b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.h new file mode 100644 index 000000000000..f6f22b81e0bc --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.h @@ -0,0 +1,36 @@ +//===-- SPIRVRegisterInfo.h - SPIR-V Register Information -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the SPIR-V implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVREGISTERINFO_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVREGISTERINFO_H + +#include "llvm/CodeGen/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "SPIRVGenRegisterInfo.inc" + +namespace llvm { + +struct SPIRVRegisterInfo : public SPIRVGenRegisterInfo { + SPIRVRegisterInfo(); + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + BitVector getReservedRegs(const MachineFunction &MF) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override {} + Register getFrameRegister(const MachineFunction &MF) const override { + return 0; + } +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVREGISTERINFO_H diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.td b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.td new file mode 100644 index 000000000000..d0b64b6895d0 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.td @@ -0,0 +1,39 @@ +//===-- SPIRVRegisterInfo.td - SPIR-V Register defs --------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declarations that describe the SPIR-V register file. +// +//===----------------------------------------------------------------------===// + +let Namespace = "SPIRV" in { + def p0 : PtrValueType ; + // All registers are for 32-bit identifiers, so have a single dummy register + + // Class for registers that are the result of OpTypeXXX instructions + def TYPE0 : Register<"TYPE0">; + def TYPE : RegisterClass<"SPIRV", [i32], 32, (add TYPE0)>; + + // Class for every other non-type ID + def ID0 : Register<"ID0">; + def ID : RegisterClass<"SPIRV", [i32], 32, (add ID0)>; + def fID0 : Register<"FID0">; + def fID : RegisterClass<"SPIRV", [f32], 32, (add fID0)>; + def pID0 : Register<"pID0">; + def pID : RegisterClass<"SPIRV", [p0], 32, (add pID0)>; + def vID0 : Register<"pID0">; + def vID : RegisterClass<"SPIRV", [v2i32], 32, (add vID0)>; + def vfID0 : Register<"pID0">; + def vfID : RegisterClass<"SPIRV", [v2f32], 32, (add vfID0)>; + + def ANYID : RegisterClass<"SPIRV", [i32, f32, p0, v2i32, v2f32], 32, (add ID, fID, pID, vID, vfID)>; + + // A few instructions like OpName can take ids from both type and non-type + // instructions, so we need a super-class to allow for both to count as valid + // arguments for these instructions. + def ANY : RegisterClass<"SPIRV", [i32], 32, (add TYPE, ID)>; +} diff --git a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp new file mode 100644 index 000000000000..cdf3a160f373 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp @@ -0,0 +1,68 @@ +//===-- SPIRVSubtarget.cpp - SPIR-V Subtarget Information ------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SPIR-V specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVSubtarget.h" +#include "SPIRV.h" +#include "SPIRVGlobalRegistry.h" +#include "SPIRVLegalizerInfo.h" +#include "SPIRVRegisterBankInfo.h" +#include "SPIRVTargetMachine.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Host.h" + +using namespace llvm; + +#define DEBUG_TYPE "spirv-subtarget" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "SPIRVGenSubtargetInfo.inc" + +// Compare version numbers, but allow 0 to mean unspecified. +static bool isAtLeastVer(uint32_t Target, uint32_t VerToCompareTo) { + return Target == 0 || Target >= VerToCompareTo; +} + +static unsigned computePointerSize(const Triple &TT) { + const auto Arch = TT.getArch(); + // TODO: unify this with pointers legalization. + assert(TT.isSPIRV()); + return Arch == Triple::spirv32 ? 32 : 64; +} + +SPIRVSubtarget::SPIRVSubtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, + const SPIRVTargetMachine &TM) + : SPIRVGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), + PointerSize(computePointerSize(TT)), SPIRVVersion(0), InstrInfo(), + FrameLowering(initSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) { + GR = std::make_unique(PointerSize); + CallLoweringInfo = + std::make_unique(TLInfo, *this, GR.get()); + Legalizer = std::make_unique(*this); + RegBankInfo = std::make_unique(); + InstSelector.reset( + createSPIRVInstructionSelector(TM, *this, *RegBankInfo.get())); +} + +SPIRVSubtarget &SPIRVSubtarget::initSubtargetDependencies(StringRef CPU, + StringRef FS) { + ParseSubtargetFeatures(CPU, /*TuneCPU=*/CPU, FS); + if (SPIRVVersion == 0) + SPIRVVersion = 14; + return *this; +} + +// If the SPIR-V version is >= 1.4 we can call OpPtrEqual and OpPtrNotEqual. +bool SPIRVSubtarget::canDirectlyComparePointers() const { + return isAtLeastVer(SPIRVVersion, 14); +} diff --git a/llvm/lib/Target/SPIRV/SPIRVSubtarget.h b/llvm/lib/Target/SPIRV/SPIRVSubtarget.h new file mode 100644 index 000000000000..a6332cfefa8e --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVSubtarget.h @@ -0,0 +1,93 @@ +//===-- SPIRVSubtarget.h - SPIR-V Subtarget Information --------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the SPIR-V specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVSUBTARGET_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVSUBTARGET_H + +#include "SPIRVCallLowering.h" +#include "SPIRVFrameLowering.h" +#include "SPIRVISelLowering.h" +#include "SPIRVInstrInfo.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" + +#define GET_SUBTARGETINFO_HEADER +#include "SPIRVGenSubtargetInfo.inc" + +namespace llvm { +class StringRef; +class SPIRVGlobalRegistry; +class SPIRVTargetMachine; + +class SPIRVSubtarget : public SPIRVGenSubtargetInfo { +private: + const unsigned PointerSize; + uint32_t SPIRVVersion; + + std::unique_ptr GR; + + SPIRVInstrInfo InstrInfo; + SPIRVFrameLowering FrameLowering; + SPIRVTargetLowering TLInfo; + + // GlobalISel related APIs. + std::unique_ptr CallLoweringInfo; + std::unique_ptr RegBankInfo; + std::unique_ptr Legalizer; + std::unique_ptr InstSelector; + +public: + // This constructor initializes the data members to match that + // of the specified triple. + SPIRVSubtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, const SPIRVTargetMachine &TM); + SPIRVSubtarget &initSubtargetDependencies(StringRef CPU, StringRef FS); + + // Parses features string setting specified subtarget options. + // The definition of this function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); + unsigned getPointerSize() const { return PointerSize; } + bool canDirectlyComparePointers() const; + uint32_t getSPIRVVersion() const { return SPIRVVersion; }; + SPIRVGlobalRegistry *getSPIRVGlobalRegistry() const { return GR.get(); } + + const CallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + const SPIRVInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const SPIRVFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const SPIRVTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const SPIRVRegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVSUBTARGET_H diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp new file mode 100644 index 000000000000..f7c88a5c6d4a --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -0,0 +1,186 @@ +//===- SPIRVTargetMachine.cpp - Define TargetMachine for SPIR-V -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements the info about SPIR-V target spec. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVTargetMachine.h" +#include "SPIRV.h" +#include "SPIRVCallLowering.h" +#include "SPIRVGlobalRegistry.h" +#include "SPIRVLegalizerInfo.h" +#include "SPIRVTargetObjectFile.h" +#include "SPIRVTargetTransformInfo.h" +#include "TargetInfo/SPIRVTargetInfo.h" +#include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVTarget() { + // Register the target. + RegisterTargetMachine X(getTheSPIRV32Target()); + RegisterTargetMachine Y(getTheSPIRV64Target()); + + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeGlobalISel(PR); + initializeSPIRVModuleAnalysisPass(PR); +} + +static std::string computeDataLayout(const Triple &TT) { + const auto Arch = TT.getArch(); + if (Arch == Triple::spirv32) + return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-" + "v96:128-v192:256-v256:256-v512:512-v1024:1024"; + return "e-i64:64-v16:16-v24:32-v32:32-v48:64-" + "v96:128-v192:256-v256:256-v512:512-v1024:1024"; +} + +static Reloc::Model getEffectiveRelocModel(Optional RM) { + if (!RM) + return Reloc::PIC_; + return *RM; +} + +// Pin SPIRVTargetObjectFile's vtables to this file. +SPIRVTargetObjectFile::~SPIRVTargetObjectFile() {} + +SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Optional RM, + Optional CM, + CodeGenOpt::Level OL, bool JIT) + : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, + getEffectiveRelocModel(RM), + getEffectiveCodeModel(CM, CodeModel::Small), OL), + TLOF(std::make_unique()), + Subtarget(TT, CPU.str(), FS.str(), *this) { + initAsmInfo(); + setGlobalISel(true); + setFastISel(false); + setO0WantsFastISel(false); + setRequiresStructuredCFG(false); +} + +namespace { +// SPIR-V Code Generator Pass Configuration Options. +class SPIRVPassConfig : public TargetPassConfig { +public: + SPIRVPassConfig(SPIRVTargetMachine &TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + SPIRVTargetMachine &getSPIRVTargetMachine() const { + return getTM(); + } + void addIRPasses() override; + void addISelPrepare() override; + + bool addIRTranslator() override; + void addPreLegalizeMachineIR() override; + bool addLegalizeMachineIR() override; + bool addRegBankSelect() override; + bool addGlobalInstructionSelect() override; + + FunctionPass *createTargetRegisterAllocator(bool) override; + void addFastRegAlloc() override {} + void addOptimizedRegAlloc() override {} + + void addPostRegAlloc() override; +}; +} // namespace + +// We do not use physical registers, and maintain virtual registers throughout +// the entire pipeline, so return nullptr to disable register allocation. +FunctionPass *SPIRVPassConfig::createTargetRegisterAllocator(bool) { + return nullptr; +} + +// Disable passes that break from assuming no virtual registers exist. +void SPIRVPassConfig::addPostRegAlloc() { + // Do not work with vregs instead of physical regs. + disablePass(&MachineCopyPropagationID); + disablePass(&PostRAMachineSinkingID); + disablePass(&PostRASchedulerID); + disablePass(&FuncletLayoutID); + disablePass(&StackMapLivenessID); + disablePass(&PatchableFunctionID); + disablePass(&ShrinkWrapID); + disablePass(&LiveDebugValuesID); + + // Do not work with OpPhi. + disablePass(&BranchFolderPassID); + disablePass(&MachineBlockPlacementID); + + TargetPassConfig::addPostRegAlloc(); +} + +TargetTransformInfo +SPIRVTargetMachine::getTargetTransformInfo(const Function &F) const { + return TargetTransformInfo(SPIRVTTIImpl(this, F)); +} + +TargetPassConfig *SPIRVTargetMachine::createPassConfig(PassManagerBase &PM) { + return new SPIRVPassConfig(*this, PM); +} + +void SPIRVPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); } + +void SPIRVPassConfig::addISelPrepare() { + addPass(createSPIRVEmitIntrinsicsPass(&getTM())); + TargetPassConfig::addISelPrepare(); +} + +bool SPIRVPassConfig::addIRTranslator() { + addPass(new IRTranslator(getOptLevel())); + return false; +} + +void SPIRVPassConfig::addPreLegalizeMachineIR() { + addPass(createSPIRVPreLegalizerPass()); +} + +// Use a default legalizer. +bool SPIRVPassConfig::addLegalizeMachineIR() { + addPass(new Legalizer()); + return false; +} + +// Do not add a RegBankSelect pass, as we only ever need virtual registers. +bool SPIRVPassConfig::addRegBankSelect() { + disablePass(&RegBankSelect::ID); + return false; +} + +namespace { +// A custom subclass of InstructionSelect, which is mostly the same except from +// not requiring RegBankSelect to occur previously. +class SPIRVInstructionSelect : public InstructionSelect { + // We don't use register banks, so unset the requirement for them + MachineFunctionProperties getRequiredProperties() const override { + return InstructionSelect::getRequiredProperties().reset( + MachineFunctionProperties::Property::RegBankSelected); + } +}; +} // namespace + +bool SPIRVPassConfig::addGlobalInstructionSelect() { + addPass(new SPIRVInstructionSelect()); + return false; +} diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h new file mode 100644 index 000000000000..f3597971bc95 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h @@ -0,0 +1,47 @@ +//===-- SPIRVTargetMachine.h - Define TargetMachine for SPIR-V -*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the SPIR-V specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVTARGETMACHINE_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVTARGETMACHINE_H + +#include "SPIRVSubtarget.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class SPIRVTargetMachine : public LLVMTargetMachine { + std::unique_ptr TLOF; + SPIRVSubtarget Subtarget; + +public: + SPIRVTargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Optional RM, Optional CM, + CodeGenOpt::Level OL, bool JIT); + + const SPIRVSubtarget *getSubtargetImpl() const { return &Subtarget; } + + const SPIRVSubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } + + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; + + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + bool usesPhysRegsForValues() const override { return false; } + + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVTARGETMACHINE_H diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetObjectFile.h b/llvm/lib/Target/SPIRV/SPIRVTargetObjectFile.h new file mode 100644 index 000000000000..00c456971ef1 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVTargetObjectFile.h @@ -0,0 +1,45 @@ +//===-- SPIRVTargetObjectFile.h - SPIRV Object Info -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVTARGETOBJECTFILE_H + +#include "llvm/MC/MCSection.h" +#include "llvm/MC/SectionKind.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { + +class SPIRVTargetObjectFile : public TargetLoweringObjectFile { +public: + ~SPIRVTargetObjectFile() override; + + void Initialize(MCContext &ctx, const TargetMachine &TM) override { + TargetLoweringObjectFile::Initialize(ctx, TM); + } + // All words in a SPIR-V module (excepting the first 5 ones) are a linear + // sequence of instructions in a specific order. We put all the instructions + // in the single text section. + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, + const Constant *C, + Align &Alignment) const override { + return TextSection; + } + MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const override { + return TextSection; + } + MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const override { + return TextSection; + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVTARGETOBJECTFILE_H diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h new file mode 100644 index 000000000000..ac351cf42f5c --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h @@ -0,0 +1,44 @@ +//===- SPIRVTargetTransformInfo.h - SPIR-V specific TTI ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// \file +// This file contains a TargetTransformInfo::Concept conforming object specific +// to the SPIRV target machine. It uses the target's detailed information to +// provide more precise answers to certain TTI queries, while letting the +// target independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVTARGETTRANSFORMINFO_H + +#include "SPIRV.h" +#include "SPIRVTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" + +namespace llvm { +class SPIRVTTIImpl : public BasicTTIImplBase { + using BaseT = BasicTTIImplBase; + + friend BaseT; + + const SPIRVSubtarget *ST; + const SPIRVTargetLowering *TLI; + + const TargetSubtargetInfo *getST() const { return ST; } + const SPIRVTargetLowering *getTLI() const { return TLI; } + +public: + explicit SPIRVTTIImpl(const SPIRVTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVTARGETTRANSFORMINFO_H diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp new file mode 100644 index 000000000000..b92dc12735f8 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -0,0 +1,207 @@ +//===--- SPIRVUtils.cpp ---- SPIR-V Utility Functions -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains miscellaneous utility functions. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVUtils.h" +#include "MCTargetDesc/SPIRVBaseInfo.h" +#include "SPIRV.h" +#include "SPIRVInstrInfo.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/IntrinsicsSPIRV.h" + +using namespace llvm; + +// The following functions are used to add these string literals as a series of +// 32-bit integer operands with the correct format, and unpack them if necessary +// when making string comparisons in compiler passes. +// SPIR-V requires null-terminated UTF-8 strings padded to 32-bit alignment. +static uint32_t convertCharsToWord(const StringRef &Str, unsigned i) { + uint32_t Word = 0u; // Build up this 32-bit word from 4 8-bit chars. + for (unsigned WordIndex = 0; WordIndex < 4; ++WordIndex) { + unsigned StrIndex = i + WordIndex; + uint8_t CharToAdd = 0; // Initilize char as padding/null. + if (StrIndex < Str.size()) { // If it's within the string, get a real char. + CharToAdd = Str[StrIndex]; + } + Word |= (CharToAdd << (WordIndex * 8)); + } + return Word; +} + +// Get length including padding and null terminator. +static size_t getPaddedLen(const StringRef &Str) { + const size_t Len = Str.size() + 1; + return (Len % 4 == 0) ? Len : Len + (4 - (Len % 4)); +} + +void addStringImm(const StringRef &Str, MachineInstrBuilder &MIB) { + const size_t PaddedLen = getPaddedLen(Str); + for (unsigned i = 0; i < PaddedLen; i += 4) { + // Add an operand for the 32-bits of chars or padding. + MIB.addImm(convertCharsToWord(Str, i)); + } +} + +void addStringImm(const StringRef &Str, IRBuilder<> &B, + std::vector &Args) { + const size_t PaddedLen = getPaddedLen(Str); + for (unsigned i = 0; i < PaddedLen; i += 4) { + // Add a vector element for the 32-bits of chars or padding. + Args.push_back(B.getInt32(convertCharsToWord(Str, i))); + } +} + +std::string getStringImm(const MachineInstr &MI, unsigned StartIndex) { + return getSPIRVStringOperand(MI, StartIndex); +} + +void addNumImm(const APInt &Imm, MachineInstrBuilder &MIB) { + const auto Bitwidth = Imm.getBitWidth(); + switch (Bitwidth) { + case 1: + break; // Already handled. + case 8: + case 16: + case 32: + MIB.addImm(Imm.getZExtValue()); + break; + case 64: { + uint64_t FullImm = Imm.getZExtValue(); + uint32_t LowBits = FullImm & 0xffffffff; + uint32_t HighBits = (FullImm >> 32) & 0xffffffff; + MIB.addImm(LowBits).addImm(HighBits); + break; + } + default: + report_fatal_error("Unsupported constant bitwidth"); + } +} + +void buildOpName(Register Target, const StringRef &Name, + MachineIRBuilder &MIRBuilder) { + if (!Name.empty()) { + auto MIB = MIRBuilder.buildInstr(SPIRV::OpName).addUse(Target); + addStringImm(Name, MIB); + } +} + +static void finishBuildOpDecorate(MachineInstrBuilder &MIB, + const std::vector &DecArgs, + StringRef StrImm) { + if (!StrImm.empty()) + addStringImm(StrImm, MIB); + for (const auto &DecArg : DecArgs) + MIB.addImm(DecArg); +} + +void buildOpDecorate(Register Reg, MachineIRBuilder &MIRBuilder, + llvm::SPIRV::Decoration Dec, + const std::vector &DecArgs, StringRef StrImm) { + auto MIB = MIRBuilder.buildInstr(SPIRV::OpDecorate) + .addUse(Reg) + .addImm(static_cast(Dec)); + finishBuildOpDecorate(MIB, DecArgs, StrImm); +} + +void buildOpDecorate(Register Reg, MachineInstr &I, const SPIRVInstrInfo &TII, + llvm::SPIRV::Decoration Dec, + const std::vector &DecArgs, StringRef StrImm) { + MachineBasicBlock &MBB = *I.getParent(); + auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(SPIRV::OpDecorate)) + .addUse(Reg) + .addImm(static_cast(Dec)); + finishBuildOpDecorate(MIB, DecArgs, StrImm); +} + +// TODO: maybe the following two functions should be handled in the subtarget +// to allow for different OpenCL vs Vulkan handling. +unsigned storageClassToAddressSpace(SPIRV::StorageClass SC) { + switch (SC) { + case SPIRV::StorageClass::Function: + return 0; + case SPIRV::StorageClass::CrossWorkgroup: + return 1; + case SPIRV::StorageClass::UniformConstant: + return 2; + case SPIRV::StorageClass::Workgroup: + return 3; + case SPIRV::StorageClass::Generic: + return 4; + case SPIRV::StorageClass::Input: + return 7; + default: + llvm_unreachable("Unable to get address space id"); + } +} + +SPIRV::StorageClass addressSpaceToStorageClass(unsigned AddrSpace) { + switch (AddrSpace) { + case 0: + return SPIRV::StorageClass::Function; + case 1: + return SPIRV::StorageClass::CrossWorkgroup; + case 2: + return SPIRV::StorageClass::UniformConstant; + case 3: + return SPIRV::StorageClass::Workgroup; + case 4: + return SPIRV::StorageClass::Generic; + case 7: + return SPIRV::StorageClass::Input; + default: + llvm_unreachable("Unknown address space"); + } +} + +SPIRV::MemorySemantics getMemSemanticsForStorageClass(SPIRV::StorageClass SC) { + switch (SC) { + case SPIRV::StorageClass::StorageBuffer: + case SPIRV::StorageClass::Uniform: + return SPIRV::MemorySemantics::UniformMemory; + case SPIRV::StorageClass::Workgroup: + return SPIRV::MemorySemantics::WorkgroupMemory; + case SPIRV::StorageClass::CrossWorkgroup: + return SPIRV::MemorySemantics::CrossWorkgroupMemory; + case SPIRV::StorageClass::AtomicCounter: + return SPIRV::MemorySemantics::AtomicCounterMemory; + case SPIRV::StorageClass::Image: + return SPIRV::MemorySemantics::ImageMemory; + default: + return SPIRV::MemorySemantics::None; + } +} + +MachineInstr *getDefInstrMaybeConstant(Register &ConstReg, + const MachineRegisterInfo *MRI) { + MachineInstr *ConstInstr = MRI->getVRegDef(ConstReg); + if (ConstInstr->getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS && + ConstInstr->getIntrinsicID() == Intrinsic::spv_track_constant) { + ConstReg = ConstInstr->getOperand(2).getReg(); + ConstInstr = MRI->getVRegDef(ConstReg); + } else if (ConstInstr->getOpcode() == SPIRV::ASSIGN_TYPE) { + ConstReg = ConstInstr->getOperand(1).getReg(); + ConstInstr = MRI->getVRegDef(ConstReg); + } + return ConstInstr; +} + +uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI) { + const MachineInstr *MI = getDefInstrMaybeConstant(ConstReg, MRI); + assert(MI && MI->getOpcode() == TargetOpcode::G_CONSTANT); + return MI->getOperand(1).getCImm()->getValue().getZExtValue(); +} + +Type *getMDOperandAsType(const MDNode *N, unsigned I) { + return cast(N->getOperand(I))->getType(); +} diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h new file mode 100644 index 000000000000..ffa82c9c1fe4 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -0,0 +1,83 @@ +//===--- SPIRVUtils.h ---- SPIR-V Utility Functions -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains miscellaneous utility functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H + +#include "MCTargetDesc/SPIRVBaseInfo.h" +#include "llvm/IR/IRBuilder.h" +#include + +namespace llvm { +class MCInst; +class MachineFunction; +class MachineInstr; +class MachineInstrBuilder; +class MachineIRBuilder; +class MachineRegisterInfo; +class Register; +class StringRef; +class SPIRVInstrInfo; +} // namespace llvm + +// Add the given string as a series of integer operand, inserting null +// terminators and padding to make sure the operands all have 32-bit +// little-endian words. +void addStringImm(const llvm::StringRef &Str, llvm::MachineInstrBuilder &MIB); +void addStringImm(const llvm::StringRef &Str, llvm::IRBuilder<> &B, + std::vector &Args); + +// Read the series of integer operands back as a null-terminated string using +// the reverse of the logic in addStringImm. +std::string getStringImm(const llvm::MachineInstr &MI, unsigned StartIndex); + +// Add the given numerical immediate to MIB. +void addNumImm(const llvm::APInt &Imm, llvm::MachineInstrBuilder &MIB); + +// Add an OpName instruction for the given target register. +void buildOpName(llvm::Register Target, const llvm::StringRef &Name, + llvm::MachineIRBuilder &MIRBuilder); + +// Add an OpDecorate instruction for the given Reg. +void buildOpDecorate(llvm::Register Reg, llvm::MachineIRBuilder &MIRBuilder, + llvm::SPIRV::Decoration Dec, + const std::vector &DecArgs, + llvm::StringRef StrImm = ""); +void buildOpDecorate(llvm::Register Reg, llvm::MachineInstr &I, + const llvm::SPIRVInstrInfo &TII, + llvm::SPIRV::Decoration Dec, + const std::vector &DecArgs, + llvm::StringRef StrImm = ""); + +// Convert a SPIR-V storage class to the corresponding LLVM IR address space. +unsigned storageClassToAddressSpace(llvm::SPIRV::StorageClass SC); + +// Convert an LLVM IR address space to a SPIR-V storage class. +llvm::SPIRV::StorageClass addressSpaceToStorageClass(unsigned AddrSpace); + +llvm::SPIRV::MemorySemantics +getMemSemanticsForStorageClass(llvm::SPIRV::StorageClass SC); + +// Find def instruction for the given ConstReg, walking through +// spv_track_constant and ASSIGN_TYPE instructions. Updates ConstReg by def +// of OpConstant instruction. +llvm::MachineInstr * +getDefInstrMaybeConstant(llvm::Register &ConstReg, + const llvm::MachineRegisterInfo *MRI); + +// Get constant integer value of the given ConstReg. +uint64_t getIConstVal(llvm::Register ConstReg, + const llvm::MachineRegisterInfo *MRI); + +// Get type of i-th operand of the metadata node. +llvm::Type *getMDOperandAsType(const llvm::MDNode *N, unsigned I); +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H diff --git a/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.cpp b/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.cpp new file mode 100644 index 000000000000..fb7cab4fe779 --- /dev/null +++ b/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.cpp @@ -0,0 +1,28 @@ +//===-- SPIRVTargetInfo.cpp - SPIR-V Target Implementation ----*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TargetInfo/SPIRVTargetInfo.h" +#include "llvm/MC/TargetRegistry.h" + +using namespace llvm; + +Target &llvm::getTheSPIRV32Target() { + static Target TheSPIRV32Target; + return TheSPIRV32Target; +} +Target &llvm::getTheSPIRV64Target() { + static Target TheSPIRV64Target; + return TheSPIRV64Target; +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVTargetInfo() { + RegisterTarget X(getTheSPIRV32Target(), "spirv32", + "SPIR-V 32-bit", "SPIRV"); + RegisterTarget Y(getTheSPIRV64Target(), "spirv64", + "SPIR-V 64-bit", "SPIRV"); +} diff --git a/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.h b/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.h new file mode 100644 index 000000000000..4353258e1d1a --- /dev/null +++ b/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.h @@ -0,0 +1,21 @@ +//===-- SPIRVTargetInfo.h - SPIRV Target Implementation ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_TARGETINFO_SPIRVTARGETINFO_H +#define LLVM_LIB_TARGET_SPIRV_TARGETINFO_SPIRVTARGETINFO_H + +namespace llvm { + +class Target; + +Target &getTheSPIRV32Target(); +Target &getTheSPIRV64Target(); + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_TARGETINFO_SPIRVTARGETINFO_H diff --git a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index af3304f0907d..77e9b1d96612 100644 --- a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" @@ -54,6 +55,8 @@ class SparcOperand; class SparcAsmParser : public MCTargetAsmParser { MCAsmParser &Parser; + enum class TailRelocKind { Load_GOT, Add_TLS, Load_TLS, Call_TLS }; + /// @name Auto-generated Match Functions /// { @@ -82,6 +85,9 @@ class SparcAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseMembarTag(OperandVector &Operands); + template + OperandMatchResultTy parseTailRelocSym(OperandVector &Operands); + template OperandMatchResultTy parseShiftAmtImm(OperandVector &Operands); @@ -112,6 +118,8 @@ class SparcAsmParser : public MCTargetAsmParser { bool expandSET(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions); + SMLoc getLoc() const { return getParser().getTok().getLoc(); } + public: SparcAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, const MCInstrInfo &MII, @@ -266,6 +274,7 @@ public: bool isMEMrr() const { return Kind == k_MemoryReg; } bool isMEMri() const { return Kind == k_MemoryImm; } bool isMembarTag() const { return Kind == k_Immediate; } + bool isTailRelocSym() const { return Kind == k_Immediate; } bool isCallTarget() const { if (!isImm()) @@ -426,6 +435,11 @@ public: addExpr(Inst, getImm()); } + void addTailRelocSymOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + static std::unique_ptr CreateToken(StringRef Str, SMLoc S) { auto Op = std::make_unique(k_Token); Op->Tok.Data = Str.data(); @@ -849,6 +863,97 @@ OperandMatchResultTy SparcAsmParser::parseShiftAmtImm(OperandVector &Operands) { return MatchOperand_Success; } +template +OperandMatchResultTy +SparcAsmParser::parseTailRelocSym(OperandVector &Operands) { + SMLoc S = getLoc(); + SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); + + auto MatchesKind = [](SparcMCExpr::VariantKind VK) -> bool { + switch (Kind) { + case TailRelocKind::Load_GOT: + // Non-TLS relocations on ld (or ldx). + // ld [%rr + %rr], %rr, %rel(sym) + return VK == SparcMCExpr::VK_Sparc_GOTDATA_OP; + case TailRelocKind::Add_TLS: + // TLS relocations on add. + // add %rr, %rr, %rr, %rel(sym) + switch (VK) { + case SparcMCExpr::VK_Sparc_TLS_GD_ADD: + case SparcMCExpr::VK_Sparc_TLS_IE_ADD: + case SparcMCExpr::VK_Sparc_TLS_LDM_ADD: + case SparcMCExpr::VK_Sparc_TLS_LDO_ADD: + return true; + default: + return false; + } + case TailRelocKind::Load_TLS: + // TLS relocations on ld (or ldx). + // ld[x] %addr, %rr, %rel(sym) + switch (VK) { + case SparcMCExpr::VK_Sparc_TLS_IE_LD: + case SparcMCExpr::VK_Sparc_TLS_IE_LDX: + return true; + default: + return false; + } + case TailRelocKind::Call_TLS: + // TLS relocations on call. + // call sym, %rel(sym) + switch (VK) { + case SparcMCExpr::VK_Sparc_TLS_GD_CALL: + case SparcMCExpr::VK_Sparc_TLS_LDM_CALL: + return true; + default: + return false; + } + } + llvm_unreachable("Unhandled SparcAsmParser::TailRelocKind enum"); + }; + + if (getLexer().getKind() != AsmToken::Percent) { + Error(getLoc(), "expected '%' for operand modifier"); + return MatchOperand_ParseFail; + } + + const AsmToken Tok = Parser.getTok(); + getParser().Lex(); // Eat '%' + + if (getLexer().getKind() != AsmToken::Identifier) { + Error(getLoc(), "expected valid identifier for operand modifier"); + return MatchOperand_ParseFail; + } + + StringRef Name = getParser().getTok().getIdentifier(); + SparcMCExpr::VariantKind VK = SparcMCExpr::parseVariantKind(Name); + if (VK == SparcMCExpr::VK_Sparc_None) { + Error(getLoc(), "invalid operand modifier"); + return MatchOperand_ParseFail; + } + + if (!MatchesKind(VK)) { + // Did not match the specified set of relocation types, put '%' back. + getLexer().UnLex(Tok); + return MatchOperand_NoMatch; + } + + Parser.Lex(); // Eat the identifier. + if (getLexer().getKind() != AsmToken::LParen) { + Error(getLoc(), "expected '('"); + return MatchOperand_ParseFail; + } + + getParser().Lex(); // Eat '(' + const MCExpr *SubExpr; + if (getParser().parseParenExpression(SubExpr, E)) { + return MatchOperand_ParseFail; + } + + const MCExpr *Val = adjustPICRelocation(VK, SubExpr); + Operands.push_back(SparcOperand::CreateImm(Val, S, E)); + return MatchOperand_Success; +} + OperandMatchResultTy SparcAsmParser::parseMembarTag(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const MCExpr *EVal; @@ -1408,10 +1513,27 @@ bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal, StringRef name = Tok.getString(); SparcMCExpr::VariantKind VK = SparcMCExpr::parseVariantKind(name); + switch (VK) { + case SparcMCExpr::VK_Sparc_None: + Error(getLoc(), "invalid operand modifier"); + return false; - if (VK == SparcMCExpr::VK_Sparc_None) + case SparcMCExpr::VK_Sparc_GOTDATA_OP: + case SparcMCExpr::VK_Sparc_TLS_GD_ADD: + case SparcMCExpr::VK_Sparc_TLS_GD_CALL: + case SparcMCExpr::VK_Sparc_TLS_IE_ADD: + case SparcMCExpr::VK_Sparc_TLS_IE_LD: + case SparcMCExpr::VK_Sparc_TLS_IE_LDX: + case SparcMCExpr::VK_Sparc_TLS_LDM_ADD: + case SparcMCExpr::VK_Sparc_TLS_LDM_CALL: + case SparcMCExpr::VK_Sparc_TLS_LDO_ADD: + // These are special-cased at tablegen level. return false; + default: + break; + } + Parser.Lex(); // Eat the identifier. if (Parser.getTok().getKind() != AsmToken::LParen) return false; diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp index 259b37954183..cc132d46de85 100644 --- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp @@ -174,17 +174,20 @@ Filler::findDelayInstr(MachineBasicBlock &MBB, if (slot == MBB.begin()) return MBB.end(); - if (slot->getOpcode() == SP::RET || slot->getOpcode() == SP::TLS_CALL) + unsigned Opc = slot->getOpcode(); + + if (Opc == SP::RET || Opc == SP::TLS_CALL) return MBB.end(); - if (slot->getOpcode() == SP::RETL) { + if (Opc == SP::RETL || Opc == SP::TAIL_CALL || Opc == SP::TAIL_CALLri) { MachineBasicBlock::iterator J = slot; --J; if (J->getOpcode() == SP::RESTORErr || J->getOpcode() == SP::RESTOREri) { // change retl to ret. - slot->setDesc(Subtarget->getInstrInfo()->get(SP::RET)); + if (Opc == SP::RETL) + slot->setDesc(Subtarget->getInstrInfo()->get(SP::RET)); return J; } } @@ -360,6 +363,8 @@ bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize) case SP::CALLrr: case SP::CALLri: structSizeOpNum = 2; break; case SP::TLS_CALL: return false; + case SP::TAIL_CALLri: + case SP::TAIL_CALL: return false; } const MachineOperand &MO = I->getOperand(structSizeOpNum); diff --git a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp index 142124a8e0d9..1825b95dd6ac 100644 --- a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp +++ b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp @@ -14,8 +14,8 @@ #include "TargetInfo/SparcTargetInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/TargetRegistry.h" @@ -32,7 +32,7 @@ class SparcDisassembler : public MCDisassembler { public: SparcDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} - virtual ~SparcDisassembler() {} + virtual ~SparcDisassembler() = default; DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, @@ -142,10 +142,9 @@ static const uint16_t CPPairDecoderTable[] = { SP::C24_C25, SP::C26_C27, SP::C28_C29, SP::C30_C31 }; -static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; unsigned Reg = IntRegDecoderTable[RegNo]; @@ -153,10 +152,9 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeI64RegsRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeI64RegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; unsigned Reg = IntRegDecoderTable[RegNo]; @@ -164,11 +162,9 @@ static DecodeStatus DecodeI64RegsRegisterClass(MCInst &Inst, return MCDisassembler::Success; } - -static DecodeStatus DecodeFPRegsRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeFPRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; unsigned Reg = FPRegDecoderTable[RegNo]; @@ -176,11 +172,9 @@ static DecodeStatus DecodeFPRegsRegisterClass(MCInst &Inst, return MCDisassembler::Success; } - -static DecodeStatus DecodeDFPRegsRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeDFPRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; unsigned Reg = DFPRegDecoderTable[RegNo]; @@ -188,11 +182,9 @@ static DecodeStatus DecodeDFPRegsRegisterClass(MCInst &Inst, return MCDisassembler::Success; } - -static DecodeStatus DecodeQFPRegsRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeQFPRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -203,10 +195,9 @@ static DecodeStatus DecodeQFPRegsRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeCPRegsRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeCPRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; unsigned Reg = CPRegDecoderTable[RegNo]; @@ -216,7 +207,7 @@ static DecodeStatus DecodeCPRegsRegisterClass(MCInst &Inst, static DecodeStatus DecodeFCCRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 3) return MCDisassembler::Fail; Inst.addOperand(MCOperand::createReg(FCCRegDecoderTable[RegNo])); @@ -225,7 +216,7 @@ static DecodeStatus DecodeFCCRegsRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeASRRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; Inst.addOperand(MCOperand::createReg(ASRRegDecoderTable[RegNo])); @@ -233,8 +224,8 @@ static DecodeStatus DecodeASRRegsRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodePRRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo >= array_lengthof(PRRegDecoderTable)) return MCDisassembler::Fail; Inst.addOperand(MCOperand::createReg(PRRegDecoderTable[RegNo])); @@ -242,7 +233,8 @@ static DecodeStatus DecodePRRegsRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeIntPairRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (RegNo > 31) @@ -257,7 +249,8 @@ static DecodeStatus DecodeIntPairRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeCPPairRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -267,45 +260,52 @@ static DecodeStatus DecodeCPPairRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeLoadDFP(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeLoadQFP(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeLoadCP(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeLoadCPPair(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus DecodeLoadCPPair(MCInst &Inst, unsigned insn, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeStoreDFP(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeStoreQFP(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeStoreCP(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeStoreCP(MCInst &Inst, unsigned insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeStoreCPPair(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCall(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSIMM13(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeCall(MCInst &Inst, unsigned insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeSIMM13(MCInst &Inst, unsigned insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeJMPL(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeReturn(MCInst &MI, unsigned insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeSWAP(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeTRAP(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); #include "SparcGenDisassemblerTables.inc" @@ -363,13 +363,12 @@ DecodeStatus SparcDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, return MCDisassembler::Fail; } - typedef DecodeStatus (*DecodeFunc)(MCInst &MI, unsigned insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeMem(MCInst &MI, unsigned insn, uint64_t Address, - const void *Decoder, - bool isLoad, DecodeFunc DecodeRD) { + const MCDisassembler *Decoder, bool isLoad, + DecodeFunc DecodeRD) { unsigned rd = fieldFromInstruction(insn, 25, 5); unsigned rs1 = fieldFromInstruction(insn, 14, 5); bool isImm = fieldFromInstruction(insn, 13, 1); @@ -415,100 +414,106 @@ static DecodeStatus DecodeMem(MCInst &MI, unsigned insn, uint64_t Address, } static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, true, DecodeIntRegsRegisterClass); } -static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, true, DecodeIntPairRegisterClass); } static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, true, DecodeFPRegsRegisterClass); } static DecodeStatus DecodeLoadDFP(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, true, DecodeDFPRegsRegisterClass); } static DecodeStatus DecodeLoadQFP(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, true, DecodeQFPRegsRegisterClass); } static DecodeStatus DecodeLoadCP(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, true, DecodeCPRegsRegisterClass); } -static DecodeStatus DecodeLoadCPPair(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeLoadCPPair(MCInst &Inst, unsigned insn, + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, true, DecodeCPPairRegisterClass); } static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, false, DecodeIntRegsRegisterClass); } static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, false, DecodeIntPairRegisterClass); } static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, false, DecodeFPRegsRegisterClass); } static DecodeStatus DecodeStoreDFP(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, false, DecodeDFPRegsRegisterClass); } static DecodeStatus DecodeStoreQFP(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, false, DecodeQFPRegsRegisterClass); } -static DecodeStatus DecodeStoreCP(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeStoreCP(MCInst &Inst, unsigned insn, uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, false, DecodeCPRegsRegisterClass); } static DecodeStatus DecodeStoreCPPair(MCInst &Inst, unsigned insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, false, DecodeCPPairRegisterClass); } -static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, +static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, uint64_t Address, uint64_t Offset, uint64_t Width, MCInst &MI, - const void *Decoder) { - const MCDisassembler *Dis = static_cast(Decoder); - return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, - Offset, Width); + const MCDisassembler *Decoder) { + return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset, + Width, /*InstSize=*/4); } -static DecodeStatus DecodeCall(MCInst &MI, unsigned insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeCall(MCInst &MI, unsigned insn, uint64_t Address, + const MCDisassembler *Decoder) { unsigned tgt = fieldFromInstruction(insn, 0, 30); tgt <<= 2; if (!tryAddingSymbolicOperand(tgt+Address, false, Address, @@ -517,15 +522,15 @@ static DecodeStatus DecodeCall(MCInst &MI, unsigned insn, return MCDisassembler::Success; } -static DecodeStatus DecodeSIMM13(MCInst &MI, unsigned insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeSIMM13(MCInst &MI, unsigned insn, uint64_t Address, + const MCDisassembler *Decoder) { unsigned tgt = SignExtend32<13>(fieldFromInstruction(insn, 0, 13)); MI.addOperand(MCOperand::createImm(tgt)); return MCDisassembler::Success; } static DecodeStatus DecodeJMPL(MCInst &MI, unsigned insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned rd = fieldFromInstruction(insn, 25, 5); unsigned rs1 = fieldFromInstruction(insn, 14, 5); @@ -559,7 +564,7 @@ static DecodeStatus DecodeJMPL(MCInst &MI, unsigned insn, uint64_t Address, } static DecodeStatus DecodeReturn(MCInst &MI, unsigned insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned rs1 = fieldFromInstruction(insn, 14, 5); unsigned isImm = fieldFromInstruction(insn, 13, 1); @@ -587,7 +592,7 @@ static DecodeStatus DecodeReturn(MCInst &MI, unsigned insn, uint64_t Address, } static DecodeStatus DecodeSWAP(MCInst &MI, unsigned insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned rd = fieldFromInstruction(insn, 25, 5); unsigned rs1 = fieldFromInstruction(insn, 14, 5); @@ -627,7 +632,7 @@ static DecodeStatus DecodeSWAP(MCInst &MI, unsigned insn, uint64_t Address, } static DecodeStatus DecodeTRAP(MCInst &MI, unsigned insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned rs1 = fieldFromInstruction(insn, 14, 5); unsigned isImm = fieldFromInstruction(insn, 13, 1); diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index 4d69040a4508..7b2d8afd3605 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -47,6 +47,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case Sparc::fixup_sparc_br16_14: return (Value >> 2) & 0x3fff; + case Sparc::fixup_sparc_hix22: + return (~Value >> 10) & 0x3fffff; + case Sparc::fixup_sparc_pc22: case Sparc::fixup_sparc_got22: case Sparc::fixup_sparc_tls_gd_hi22: @@ -60,6 +63,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case Sparc::fixup_sparc_13: return Value & 0x1fff; + case Sparc::fixup_sparc_lox10: + return (Value & 0x3ff) | 0x1c00; + case Sparc::fixup_sparc_pc10: case Sparc::fixup_sparc_got10: case Sparc::fixup_sparc_tls_gd_lo10: @@ -98,6 +104,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case Sparc::fixup_sparc_tls_ie_ld: case Sparc::fixup_sparc_tls_ie_ldx: case Sparc::fixup_sparc_tls_ie_add: + case Sparc::fixup_sparc_gotdata_lox10: + case Sparc::fixup_sparc_gotdata_hix22: + case Sparc::fixup_sparc_gotdata_op: return 0; } } @@ -189,7 +198,12 @@ namespace { { "fixup_sparc_tls_ie_ldx", 0, 0, 0 }, { "fixup_sparc_tls_ie_add", 0, 0, 0 }, { "fixup_sparc_tls_le_hix22", 0, 0, 0 }, - { "fixup_sparc_tls_le_lox10", 0, 0, 0 } + { "fixup_sparc_tls_le_lox10", 0, 0, 0 }, + { "fixup_sparc_hix22", 10, 22, 0 }, + { "fixup_sparc_lox10", 19, 13, 0 }, + { "fixup_sparc_gotdata_hix22", 0, 0, 0 }, + { "fixup_sparc_gotdata_lox10", 0, 0, 0 }, + { "fixup_sparc_gotdata_op", 0, 0, 0 }, }; const static MCFixupKindInfo InfosLE[Sparc::NumTargetFixupKinds] = { @@ -231,7 +245,12 @@ namespace { { "fixup_sparc_tls_ie_ldx", 0, 0, 0 }, { "fixup_sparc_tls_ie_add", 0, 0, 0 }, { "fixup_sparc_tls_le_hix22", 0, 0, 0 }, - { "fixup_sparc_tls_le_lox10", 0, 0, 0 } + { "fixup_sparc_tls_le_lox10", 0, 0, 0 }, + { "fixup_sparc_hix22", 0, 22, 0 }, + { "fixup_sparc_lox10", 0, 13, 0 }, + { "fixup_sparc_gotdata_hix22", 0, 0, 0 }, + { "fixup_sparc_gotdata_lox10", 0, 0, 0 }, + { "fixup_sparc_gotdata_op", 0, 0, 0 }, }; // Fixup kinds from .reloc directive are like R_SPARC_NONE. They do diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp index 02261dc5c4cd..9c50c41f6bf2 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp @@ -26,7 +26,7 @@ namespace { Is64Bit ? ELF::EM_SPARCV9 : ELF::EM_SPARC, /*HasRelocationAddend*/ true) {} - ~SparcELFObjectWriter() override {} + ~SparcELFObjectWriter() override = default; protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, @@ -112,6 +112,11 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx, case Sparc::fixup_sparc_tls_ie_add: return ELF::R_SPARC_TLS_IE_ADD; case Sparc::fixup_sparc_tls_le_hix22: return ELF::R_SPARC_TLS_LE_HIX22; case Sparc::fixup_sparc_tls_le_lox10: return ELF::R_SPARC_TLS_LE_LOX10; + case Sparc::fixup_sparc_hix22: return ELF::R_SPARC_HIX22; + case Sparc::fixup_sparc_lox10: return ELF::R_SPARC_LOX10; + case Sparc::fixup_sparc_gotdata_hix22: return ELF::R_SPARC_GOTDATA_HIX22; + case Sparc::fixup_sparc_gotdata_lox10: return ELF::R_SPARC_GOTDATA_LOX10; + case Sparc::fixup_sparc_gotdata_op: return ELF::R_SPARC_GOTDATA_OP; } return ELF::R_SPARC_NONE; diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h index e0a43095ec0b..701d8513e657 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h @@ -95,6 +95,18 @@ namespace llvm { fixup_sparc_tls_le_hix22, fixup_sparc_tls_le_lox10, + /// 22-bit fixup corresponding to %hix(foo) + fixup_sparc_hix22, + /// 13-bit fixup corresponding to %lox(foo) + fixup_sparc_lox10, + + /// 22-bit fixup corresponding to %gdop_hix22(foo) + fixup_sparc_gotdata_hix22, + /// 13-bit fixup corresponding to %gdop_lox10(foo) + fixup_sparc_gotdata_lox10, + /// 32-bit fixup corresponding to %gdop(foo) + fixup_sparc_gotdata_op, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp index 9f8522541332..d75d41b35838 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp @@ -104,17 +104,21 @@ void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, support::endian::write(OS, Bits, Ctx.getAsmInfo()->isLittleEndian() ? support::little : support::big); - unsigned tlsOpNo = 0; + + // Some instructions have phantom operands that only contribute a fixup entry. + unsigned SymOpNo = 0; switch (MI.getOpcode()) { default: break; - case SP::TLS_CALL: tlsOpNo = 1; break; + case SP::TLS_CALL: SymOpNo = 1; break; + case SP::GDOP_LDrr: + case SP::GDOP_LDXrr: case SP::TLS_ADDrr: case SP::TLS_ADDXrr: case SP::TLS_LDrr: - case SP::TLS_LDXrr: tlsOpNo = 3; break; + case SP::TLS_LDXrr: SymOpNo = 3; break; } - if (tlsOpNo != 0) { - const MCOperand &MO = MI.getOperand(tlsOpNo); + if (SymOpNo != 0) { + const MCOperand &MO = MI.getOperand(SymOpNo); uint64_t op = getMachineOpValue(MI, MO, Fixups, STI); assert(op == 0 && "Unexpected operand value!"); (void)op; // suppress warning. @@ -253,7 +257,6 @@ getBranchOnRegTargetOpValue(const MCInst &MI, unsigned OpNo, #include "SparcGenMCCodeEmitter.inc" MCCodeEmitter *llvm::createSparcMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new SparcMCCodeEmitter(MCII, Ctx); } diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp index c2db4526ef66..cc73ea7e6120 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/Casting.h" using namespace llvm; @@ -80,6 +81,11 @@ bool SparcMCExpr::printVariantKind(raw_ostream &OS, VariantKind Kind) case VK_Sparc_TLS_IE_ADD: OS << "%tie_add("; return true; case VK_Sparc_TLS_LE_HIX22: OS << "%tle_hix22("; return true; case VK_Sparc_TLS_LE_LOX10: OS << "%tle_lox10("; return true; + case VK_Sparc_HIX22: OS << "%hix("; return true; + case VK_Sparc_LOX10: OS << "%lox("; return true; + case VK_Sparc_GOTDATA_HIX22: OS << "%gdop_hix22("; return true; + case VK_Sparc_GOTDATA_LOX10: OS << "%gdop_lox10("; return true; + case VK_Sparc_GOTDATA_OP: OS << "%gdop("; return true; } llvm_unreachable("Unhandled SparcMCExpr::VariantKind"); } @@ -119,6 +125,11 @@ SparcMCExpr::VariantKind SparcMCExpr::parseVariantKind(StringRef name) .Case("tie_add", VK_Sparc_TLS_IE_ADD) .Case("tle_hix22", VK_Sparc_TLS_LE_HIX22) .Case("tle_lox10", VK_Sparc_TLS_LE_LOX10) + .Case("hix", VK_Sparc_HIX22) + .Case("lox", VK_Sparc_LOX10) + .Case("gdop_hix22", VK_Sparc_GOTDATA_HIX22) + .Case("gdop_lox10", VK_Sparc_GOTDATA_LOX10) + .Case("gdop", VK_Sparc_GOTDATA_OP) .Default(VK_Sparc_None); } @@ -159,6 +170,11 @@ Sparc::Fixups SparcMCExpr::getFixupKind(SparcMCExpr::VariantKind Kind) { case VK_Sparc_TLS_IE_ADD: return Sparc::fixup_sparc_tls_ie_add; case VK_Sparc_TLS_LE_HIX22: return Sparc::fixup_sparc_tls_le_hix22; case VK_Sparc_TLS_LE_LOX10: return Sparc::fixup_sparc_tls_le_lox10; + case VK_Sparc_HIX22: return Sparc::fixup_sparc_hix22; + case VK_Sparc_LOX10: return Sparc::fixup_sparc_lox10; + case VK_Sparc_GOTDATA_HIX22: return Sparc::fixup_sparc_gotdata_hix22; + case VK_Sparc_GOTDATA_LOX10: return Sparc::fixup_sparc_gotdata_lox10; + case VK_Sparc_GOTDATA_OP: return Sparc::fixup_sparc_gotdata_op; } } diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h index 504e959194f5..d98ad26c96a9 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h @@ -58,7 +58,12 @@ public: VK_Sparc_TLS_IE_LDX, VK_Sparc_TLS_IE_ADD, VK_Sparc_TLS_LE_HIX22, - VK_Sparc_TLS_LE_LOX10 + VK_Sparc_TLS_LE_LOX10, + VK_Sparc_HIX22, + VK_Sparc_LOX10, + VK_Sparc_GOTDATA_HIX22, + VK_Sparc_GOTDATA_LOX10, + VK_Sparc_GOTDATA_OP, }; private: diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h index f360946b9a79..7ef043d9df40 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h @@ -29,7 +29,6 @@ class MCTargetOptions; class Target; MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCAsmBackend *createSparcAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, diff --git a/llvm/lib/Target/Sparc/SparcCallingConv.td b/llvm/lib/Target/Sparc/SparcCallingConv.td index db540d6f0c42..e6d23f741ea5 100644 --- a/llvm/lib/Target/Sparc/SparcCallingConv.td +++ b/llvm/lib/Target/Sparc/SparcCallingConv.td @@ -134,7 +134,7 @@ def RetCC_Sparc64 : CallingConv<[ // Callee-saved registers are handled by the register window mechanism. def CSR : CalleeSavedRegs<(add)> { let OtherPreserved = (add (sequence "I%u", 0, 7), - (sequence "L%u", 0, 7)); + (sequence "L%u", 0, 7), O6); } // Callee-saved registers for calls with ReturnsTwice attribute. diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp index a740de9123c9..000418be9a9e 100644 --- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp @@ -218,8 +218,9 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF, const SparcInstrInfo &TII = *static_cast(MF.getSubtarget().getInstrInfo()); DebugLoc dl = MBBI->getDebugLoc(); - assert(MBBI->getOpcode() == SP::RETL && - "Can only put epilog before 'retl' instruction!"); + assert((MBBI->getOpcode() == SP::RETL || MBBI->getOpcode() == SP::TAIL_CALL || + MBBI->getOpcode() == SP::TAIL_CALLri) && + "Can only put epilog before 'retl' or 'tail_call' instruction!"); if (!FuncInfo->isLeafProc()) { BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0) .addReg(SP::G0); @@ -228,10 +229,19 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF, MachineFrameInfo &MFI = MF.getFrameInfo(); int NumBytes = (int) MFI.getStackSize(); - if (NumBytes == 0) - return; - - emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri); + if (NumBytes != 0) + emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri); + + // Preserve return address in %o7 + if (MBBI->getOpcode() == SP::TAIL_CALL) { + MBB.addLiveIn(SP::O7); + BuildMI(MBB, MBBI, dl, TII.get(SP::ORrr), SP::G1) + .addReg(SP::G0) + .addReg(SP::O7); + BuildMI(MBB, MBBI, dl, TII.get(SP::ORrr), SP::O7) + .addReg(SP::G0) + .addReg(SP::G1); + } } bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { @@ -316,10 +326,11 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); - return !(MFI.hasCalls() // has calls - || MRI.isPhysRegUsed(SP::L0) // Too many registers needed - || MRI.isPhysRegUsed(SP::O6) // %sp is used - || hasFP(MF)); // need %fp + return !(MFI.hasCalls() // has calls + || MRI.isPhysRegUsed(SP::L0) // Too many registers needed + || MRI.isPhysRegUsed(SP::O6) // %sp is used + || hasFP(MF) // need %fp + || MF.hasInlineAsm()); // has inline assembly } void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const { diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 6d6879bc94b3..2cb74e7709c7 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -710,6 +710,36 @@ static bool hasReturnsTwiceAttr(SelectionDAG &DAG, SDValue Callee, return CalleeFn->hasFnAttribute(Attribute::ReturnsTwice); } +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. +bool SparcTargetLowering::IsEligibleForTailCallOptimization( + CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF) const { + + auto &Outs = CLI.Outs; + auto &Caller = MF.getFunction(); + + // Do not tail call opt functions with "disable-tail-calls" attribute. + if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true") + return false; + + // Do not tail call opt if the stack is used to pass parameters. + if (CCInfo.getNextStackOffset() != 0) + return false; + + // Do not tail call opt if either the callee or caller returns + // a struct and the other does not. + if (!Outs.empty() && Caller.hasStructRetAttr() != Outs[0].Flags.isSRet()) + return false; + + // Byval parameters hand the function a pointer directly into the stack area + // we want to reuse during a tail call. + for (auto &Arg : Outs) + if (Arg.Flags.isByVal()) + return false; + + return true; +} + // Lower a call for the 32-bit ABI. SDValue SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, @@ -725,15 +755,15 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; - // Sparc target does not yet support tail call optimization. - isTailCall = false; - // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32); + isTailCall = isTailCall && IsEligibleForTailCallOptimization( + CCInfo, CLI, DAG.getMachineFunction()); + // Get the size of the outgoing arguments stack space requirement. unsigned ArgsSize = CCInfo.getNextStackOffset(); @@ -771,7 +801,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, } } - Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, dl); + assert(!isTailCall || ArgsSize == 0); + + if (!isTailCall) + Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, dl); SmallVector, 8> RegsToPass; SmallVector MemOpChains; @@ -816,6 +849,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, if (Flags.isSRet()) { assert(VA.needsCustom()); + + if (isTailCall) + continue; + // store SRet argument in %sp+64 SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(64, dl); @@ -825,9 +862,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, hasStructRetAttr = true; // sret only allowed on first argument assert(Outs[realArgIdx].OrigArgIndex == 0); - PointerType *Ty = cast(CLI.getArgs()[0].Ty); - Type *ElementTy = Ty->getPointerElementType(); - SRetArgSize = DAG.getDataLayout().getTypeAllocSize(ElementTy); + SRetArgSize = + DAG.getDataLayout().getTypeAllocSize(CLI.getArgs()[0].IndirectType); continue; } @@ -929,7 +965,9 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Register Reg = toCallerWindow(RegsToPass[i].first); + Register Reg = RegsToPass[i].first; + if (!isTailCall) + Reg = toCallerWindow(Reg); Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } @@ -953,9 +991,12 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(Callee); if (hasStructRetAttr) Ops.push_back(DAG.getTargetConstant(SRetArgSize, dl, MVT::i32)); - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first), - RegsToPass[i].second.getValueType())); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Register Reg = RegsToPass[i].first; + if (!isTailCall) + Reg = toCallerWindow(Reg); + Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType())); + } // Add a register mask operand representing the call-preserved registers. const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo(); @@ -969,6 +1010,11 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, if (InFlag.getNode()) Ops.push_back(InFlag); + if (isTailCall) { + DAG.getMachineFunction().getFrameInfo().setHasTailCall(); + return DAG.getNode(SPISD::TAIL_CALL, dl, MVT::Other, Ops); + } + Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops); InFlag = Chain.getValue(1); @@ -1408,7 +1454,7 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) { SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, const SparcSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { - MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0)); + MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); // Instructions which use registers as conditionals examine all the // bits (as does the pseudo SELECT_CC expansion). I don't think it @@ -1853,6 +1899,8 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const { case SPISD::TLS_ADD: return "SPISD::TLS_ADD"; case SPISD::TLS_LD: return "SPISD::TLS_LD"; case SPISD::TLS_CALL: return "SPISD::TLS_CALL"; + case SPISD::TAIL_CALL: return "SPISD::TAIL_CALL"; + case SPISD::LOAD_GDOP: return "SPISD::LOAD_GDOP"; } return nullptr; } @@ -2178,8 +2226,10 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG, RetPtr = DAG.getFrameIndex(RetFI, PtrVT); Entry.Node = RetPtr; Entry.Ty = PointerType::getUnqual(RetTy); - if (!Subtarget->is64Bit()) + if (!Subtarget->is64Bit()) { Entry.IsSRet = true; + Entry.IndirectType = RetTy; + } Entry.IsReturned = false; Args.push_back(Entry); RetTyABI = Type::getVoidTy(*DAG.getContext()); @@ -3126,6 +3176,11 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case SP::SELECT_CC_DFP_ICC: case SP::SELECT_CC_QFP_ICC: return expandSelectCC(MI, BB, SP::BCOND); + case SP::SELECT_CC_Int_XCC: + case SP::SELECT_CC_FP_XCC: + case SP::SELECT_CC_DFP_XCC: + case SP::SELECT_CC_QFP_XCC: + return expandSelectCC(MI, BB, SP::BPXCC); case SP::SELECT_CC_Int_FCC: case SP::SELECT_CC_FP_FCC: case SP::SELECT_CC_DFP_FCC: @@ -3276,6 +3331,9 @@ std::pair SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { + if (Constraint.empty()) + return std::make_pair(0U, nullptr); + if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': @@ -3304,46 +3362,60 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // This will generate an error message return std::make_pair(0U, nullptr); } - } else if (!Constraint.empty() && Constraint.size() <= 5 - && Constraint[0] == '{' && *(Constraint.end()-1) == '}') { - // constraint = '{r}' - // Remove the braces from around the name. - StringRef name(Constraint.data()+1, Constraint.size()-2); - // Handle register aliases: - // r0-r7 -> g0-g7 - // r8-r15 -> o0-o7 - // r16-r23 -> l0-l7 - // r24-r31 -> i0-i7 - uint64_t intVal = 0; - if (name.substr(0, 1).equals("r") - && !name.substr(1).getAsInteger(10, intVal) && intVal <= 31) { - const char regTypes[] = { 'g', 'o', 'l', 'i' }; - char regType = regTypes[intVal/8]; - char regIdx = '0' + (intVal % 8); - char tmp[] = { '{', regType, regIdx, '}', 0 }; - std::string newConstraint = std::string(tmp); - return TargetLowering::getRegForInlineAsmConstraint(TRI, newConstraint, - VT); - } - if (name.substr(0, 1).equals("f") && - !name.substr(1).getAsInteger(10, intVal) && intVal <= 63) { - std::string newConstraint; - - if (VT == MVT::f32 || VT == MVT::Other) { - newConstraint = "{f" + utostr(intVal) + "}"; - } else if (VT == MVT::f64 && (intVal % 2 == 0)) { - newConstraint = "{d" + utostr(intVal / 2) + "}"; - } else if (VT == MVT::f128 && (intVal % 4 == 0)) { - newConstraint = "{q" + utostr(intVal / 4) + "}"; - } else { - return std::make_pair(0U, nullptr); - } - return TargetLowering::getRegForInlineAsmConstraint(TRI, newConstraint, - VT); + } + + if (Constraint.front() != '{') + return std::make_pair(0U, nullptr); + + assert(Constraint.back() == '}' && "Not a brace enclosed constraint?"); + StringRef RegName(Constraint.data() + 1, Constraint.size() - 2); + if (RegName.empty()) + return std::make_pair(0U, nullptr); + + unsigned long long RegNo; + // Handle numbered register aliases. + if (RegName[0] == 'r' && + getAsUnsignedInteger(RegName.begin() + 1, 10, RegNo)) { + // r0-r7 -> g0-g7 + // r8-r15 -> o0-o7 + // r16-r23 -> l0-l7 + // r24-r31 -> i0-i7 + if (RegNo > 31) + return std::make_pair(0U, nullptr); + const char RegTypes[] = {'g', 'o', 'l', 'i'}; + char RegType = RegTypes[RegNo / 8]; + char RegIndex = '0' + (RegNo % 8); + char Tmp[] = {'{', RegType, RegIndex, '}', 0}; + return getRegForInlineAsmConstraint(TRI, Tmp, VT); + } + + // Rewrite the fN constraint according to the value type if needed. + if (VT != MVT::f32 && VT != MVT::Other && RegName[0] == 'f' && + getAsUnsignedInteger(RegName.begin() + 1, 10, RegNo)) { + if (VT == MVT::f64 && (RegNo % 2 == 0)) { + return getRegForInlineAsmConstraint( + TRI, StringRef("{d" + utostr(RegNo / 2) + "}"), VT); + } else if (VT == MVT::f128 && (RegNo % 4 == 0)) { + return getRegForInlineAsmConstraint( + TRI, StringRef("{q" + utostr(RegNo / 4) + "}"), VT); + } else { + return std::make_pair(0U, nullptr); } } - return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); + auto ResultPair = + TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); + if (!ResultPair.second) + return std::make_pair(0U, nullptr); + + // Force the use of I64Regs over IntRegs for 64-bit values. + if (Subtarget->is64Bit() && VT == MVT::i64) { + assert(ResultPair.second == &SP::IntRegsRegClass && + "Unexpected register class"); + return std::make_pair(ResultPair.first, &SP::I64RegsRegClass); + } + + return ResultPair; } bool diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h index 5c9703823a64..2768bb20566a 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.h +++ b/llvm/lib/Target/Sparc/SparcISelLowering.h @@ -44,9 +44,13 @@ namespace llvm { GLOBAL_BASE_REG, // Global base reg for PIC. FLUSHW, // FLUSH register windows to stack. + TAIL_CALL, // Tail call + TLS_ADD, // For Thread Local Storage (TLS). TLS_LD, - TLS_CALL + TLS_CALL, + + LOAD_GDOP, // Load operation w/ gdop relocation. }; } @@ -182,6 +186,10 @@ namespace llvm { SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + bool IsEligibleForTailCallOptimization(CCState &CCInfo, + CallLoweringInfo &CLI, + MachineFunction &MF) const; + bool ShouldShrinkFPConstant(EVT VT) const override { // Do not shrink FP constpool if VT == MVT::f128. // (ldd, call _Q_fdtoq) is more expensive than two ldds. diff --git a/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/llvm/lib/Target/Sparc/SparcInstr64Bit.td index df65c5457c1d..a471d65201c3 100644 --- a/llvm/lib/Target/Sparc/SparcInstr64Bit.td +++ b/llvm/lib/Target/Sparc/SparcInstr64Bit.td @@ -163,7 +163,7 @@ defm ADDX : F3_12<"add", 0b000000, add, I64Regs, i64, i64imm>; defm SUBX : F3_12<"sub", 0b000100, sub, I64Regs, i64, i64imm>; def TLS_ADDXrr : F3_1<2, 0b000000, (outs I64Regs:$rd), - (ins I64Regs:$rs1, I64Regs:$rs2, TLSSym:$sym), + (ins I64Regs:$rs1, I64Regs:$rs2, TailRelocSymTLSAdd:$sym), "add $rs1, $rs2, $rd, $sym", [(set i64:$rd, (tlsadd i64:$rs1, i64:$rs2, tglobaltlsaddr:$sym))]>; @@ -238,12 +238,20 @@ let Predicates = [Is64Bit] in { let DecoderMethod = "DecodeLoadInt" in defm LDX : Load<"ldx", 0b001011, load, I64Regs, i64>; -let mayLoad = 1, isAsmParserOnly = 1 in +let mayLoad = 1, isAsmParserOnly = 1 in { def TLS_LDXrr : F3_1<3, 0b001011, - (outs IntRegs:$dst), (ins MEMrr:$addr, TLSSym:$sym), + (outs IntRegs:$dst), + (ins MEMrr:$addr, TailRelocSymTLSLoad:$sym), "ldx [$addr], $dst, $sym", [(set i64:$dst, (tlsld ADDRrr:$addr, tglobaltlsaddr:$sym))]>; + def GDOP_LDXrr : F3_1<3, 0b001011, + (outs I64Regs:$dst), + (ins MEMrr:$addr, TailRelocSymGOTLoad:$sym), + "ldx [$addr], $dst, $sym", + [(set i64:$dst, + (load_gdop ADDRrr:$addr, tglobaladdr:$sym))]>; +} // Extending loads to i64. def : Pat<(i64 (zextloadi1 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>; @@ -336,6 +344,7 @@ def FMOVD_XCC : F4_3<0b110101, 0b000010, (outs DFPRegs:$rd), "fmovd$cond %xcc, $rs2, $rd", [(set f64:$rd, (SPselectxcc f64:$rs2, f64:$f, imm:$cond))]>; +let Predicates = [Is64Bit, HasHardQuad] in def FMOVQ_XCC : F4_3<0b110101, 0b000011, (outs QFPRegs:$rd), (ins QFPRegs:$rs2, QFPRegs:$f, CCOp:$cond), "fmovq$cond %xcc, $rs2, $rd", @@ -436,11 +445,11 @@ def FXTOD : F3_3u<2, 0b110100, 0b010001000, (outs DFPRegs:$rd), (ins DFPRegs:$rs2), "fxtod $rs2, $rd", [(set DFPRegs:$rd, (SPxtof DFPRegs:$rs2))]>; +let Predicates = [Is64Bit, HasHardQuad] in def FXTOQ : F3_3u<2, 0b110100, 0b010001100, (outs QFPRegs:$rd), (ins DFPRegs:$rs2), "fxtoq $rs2, $rd", - [(set QFPRegs:$rd, (SPxtof DFPRegs:$rs2))]>, - Requires<[HasHardQuad]>; + [(set QFPRegs:$rd, (SPxtof DFPRegs:$rs2))]>; def FSTOX : F3_3u<2, 0b110100, 0b010000001, (outs DFPRegs:$rd), (ins FPRegs:$rs2), @@ -450,11 +459,11 @@ def FDTOX : F3_3u<2, 0b110100, 0b010000010, (outs DFPRegs:$rd), (ins DFPRegs:$rs2), "fdtox $rs2, $rd", [(set DFPRegs:$rd, (SPftox DFPRegs:$rs2))]>; +let Predicates = [Is64Bit, HasHardQuad] in def FQTOX : F3_3u<2, 0b110100, 0b010000011, (outs DFPRegs:$rd), (ins QFPRegs:$rs2), "fqtox $rs2, $rd", - [(set DFPRegs:$rd, (SPftox QFPRegs:$rs2))]>, - Requires<[HasHardQuad]>; + [(set DFPRegs:$rd, (SPftox QFPRegs:$rs2))]>; } // Predicates = [Is64Bit] diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td index 5e305fc9df71..481bd7d2f7fa 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.td +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td @@ -147,7 +147,29 @@ def MEMri : Operand { let ParserMatchClass = SparcMEMriAsmOperand; } -def TLSSym : Operand; +// Represents a tail relocation operand for instructions such as add, ld, call. +class SparcTailRelocSymAsmOperand : AsmOperandClass { + let Name = "TailRelocSym" # Kind; + let RenderMethod = "addTailRelocSymOperands"; + let PredicateMethod = "isTailRelocSym"; + let ParserMethod = "parseTailRelocSym"; +} + +def TailRelocSymGOTLoad : Operand { + let ParserMatchClass = SparcTailRelocSymAsmOperand<"Load_GOT">; +} + +def TailRelocSymTLSAdd : Operand { + let ParserMatchClass = SparcTailRelocSymAsmOperand<"Add_TLS">; +} + +def TailRelocSymTLSLoad : Operand { + let ParserMatchClass = SparcTailRelocSymAsmOperand<"Load_TLS">; +} + +def TailRelocSymTLSCall : Operand { + let ParserMatchClass = SparcTailRelocSymAsmOperand<"Call_TLS">; +} def SparcMembarTagAsmOperand : AsmOperandClass { let Name = "MembarTag"; @@ -214,6 +236,9 @@ SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDTSPtlsld : SDTypeProfile<1, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; +def SDTSPloadgdop : +SDTypeProfile<1, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; + def SPcmpicc : SDNode<"SPISD::CMPICC", SDTSPcmpicc, [SDNPOutGlue]>; def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>; def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>; @@ -248,6 +273,10 @@ def call : SDNode<"SPISD::CALL", SDT_SPCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def tailcall : SDNode<"SPISD::TAIL_CALL", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; @@ -261,6 +290,8 @@ def tlscall : SDNode<"SPISD::TLS_CALL", SDT_SPCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def load_gdop : SDNode<"SPISD::LOAD_GDOP", SDTSPloadgdop>; + def getPCX : Operand { let PrintMethod = "printGetPCX"; } @@ -484,6 +515,27 @@ let Uses = [ICC], usesCustomInserter = 1 in { [(set f128:$dst, (SPselecticc f128:$T, f128:$F, imm:$Cond))]>; } +let Uses = [ICC], usesCustomInserter = 1 in { + def SELECT_CC_Int_XCC + : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond), + "; SELECT_CC_Int_XCC PSEUDO!", + [(set i32:$dst, (SPselectxcc i32:$T, i32:$F, imm:$Cond))]>; + def SELECT_CC_FP_XCC + : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond), + "; SELECT_CC_FP_XCC PSEUDO!", + [(set f32:$dst, (SPselectxcc f32:$T, f32:$F, imm:$Cond))]>; + + def SELECT_CC_DFP_XCC + : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), + "; SELECT_CC_DFP_XCC PSEUDO!", + [(set f64:$dst, (SPselectxcc f64:$T, f64:$F, imm:$Cond))]>; + + def SELECT_CC_QFP_XCC + : Pseudo<(outs QFPRegs:$dst), (ins QFPRegs:$T, QFPRegs:$F, i32imm:$Cond), + "; SELECT_CC_QFP_XCC PSEUDO!", + [(set f128:$dst, (SPselectxcc f128:$T, f128:$F, imm:$Cond))]>; +} + let usesCustomInserter = 1, Uses = [FCC0] in { def SELECT_CC_Int_FCC @@ -562,6 +614,15 @@ let DecoderMethod = "DecodeLoadFP" in } } +let mayLoad = 1, isAsmParserOnly = 1 in { + def GDOP_LDrr : F3_1<3, 0b000000, + (outs IntRegs:$dst), + (ins MEMrr:$addr, TailRelocSymGOTLoad:$sym), + "ld [$addr], $dst, $sym", + [(set i32:$dst, + (load_gdop ADDRrr:$addr, tglobaladdr:$sym))]>; +} + // Section B.4 - Store Integer Instructions, p. 95 let DecoderMethod = "DecodeStoreInt" in { defm STB : StoreA<"stb", 0b000101, 0b010101, truncstorei8, IntRegs, i32>; @@ -1344,21 +1405,24 @@ let Defs = [FCC0], rd = 0, isCodeGenOnly = 1 in { let isAsmParserOnly = 1 in { def TLS_ADDrr : F3_1<2, 0b000000, (outs IntRegs:$rd), - (ins IntRegs:$rs1, IntRegs:$rs2, TLSSym:$sym), + (ins IntRegs:$rs1, IntRegs:$rs2, TailRelocSymTLSAdd:$sym), "add $rs1, $rs2, $rd, $sym", [(set i32:$rd, (tlsadd i32:$rs1, i32:$rs2, tglobaltlsaddr:$sym))]>; -let mayLoad = 1 in +let mayLoad = 1 in { def TLS_LDrr : F3_1<3, 0b000000, - (outs IntRegs:$dst), (ins MEMrr:$addr, TLSSym:$sym), + (outs IntRegs:$dst), + (ins MEMrr:$addr, TailRelocSymTLSLoad:$sym), "ld [$addr], $dst, $sym", [(set i32:$dst, (tlsld ADDRrr:$addr, tglobaltlsaddr:$sym))]>; +} let Uses = [O6], isCall = 1, hasDelaySlot = 1 in def TLS_CALL : InstSP<(outs), - (ins calltarget:$disp, TLSSym:$sym, variable_ops), + (ins calltarget:$disp, TailRelocSymTLSCall:$sym, + variable_ops), "call $disp, $sym", [(tlscall texternalsym:$disp, tglobaltlsaddr:$sym)], IIC_jmp_or_call> { @@ -1368,6 +1432,31 @@ let Uses = [O6], isCall = 1, hasDelaySlot = 1 in } } +//===----------------------------------------------------------------------===// +// Instructions for tail calls. +//===----------------------------------------------------------------------===// +let isCodeGenOnly = 1, isReturn = 1, hasDelaySlot = 1, + isTerminator = 1, isBarrier = 1 in { + def TAIL_CALL : InstSP<(outs), (ins calltarget:$disp, variable_ops), + "call $disp", + [(tailcall tglobaladdr:$disp)]> { + bits<30> disp; + let op = 1; + let Inst{29-0} = disp; + } +} + +def : Pat<(tailcall (iPTR texternalsym:$dst)), + (TAIL_CALL texternalsym:$dst)>; + +let isCodeGenOnly = 1, isReturn = 1, hasDelaySlot = 1, isTerminator = 1, + isBarrier = 1, rd = 0 in { + def TAIL_CALLri : F3_2<2, 0b111000, + (outs), (ins MEMri:$ptr, variable_ops), + "jmp $ptr", + [(tailcall ADDRri:$ptr)]>; +} + //===----------------------------------------------------------------------===// // V9 Instructions //===----------------------------------------------------------------------===// @@ -1415,12 +1504,12 @@ let Predicates = [HasV9], Constraints = "$f = $rd" in { (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond), "fmovd$cond %icc, $rs2, $rd", [(set f64:$rd, (SPselecticc f64:$rs2, f64:$f, imm:$cond))]>; + let Predicates = [HasV9, HasHardQuad] in def FMOVQ_ICC : F4_3<0b110101, 0b000011, (outs QFPRegs:$rd), (ins QFPRegs:$rs2, QFPRegs:$f, CCOp:$cond), "fmovq$cond %icc, $rs2, $rd", - [(set f128:$rd, (SPselecticc f128:$rs2, f128:$f, imm:$cond))]>, - Requires<[HasHardQuad]>; + [(set f128:$rd, (SPselecticc f128:$rs2, f128:$f, imm:$cond))]>; } let Uses = [FCC0], intcc = 0, opf_cc = 0b00 in { @@ -1434,12 +1523,12 @@ let Predicates = [HasV9], Constraints = "$f = $rd" in { (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond), "fmovd$cond %fcc0, $rs2, $rd", [(set f64:$rd, (SPselectfcc f64:$rs2, f64:$f, imm:$cond))]>; + let Predicates = [HasV9, HasHardQuad] in def FMOVQ_FCC : F4_3<0b110101, 0b000011, (outs QFPRegs:$rd), (ins QFPRegs:$rs2, QFPRegs:$f, CCOp:$cond), "fmovq$cond %fcc0, $rs2, $rd", - [(set f128:$rd, (SPselectfcc f128:$rs2, f128:$f, imm:$cond))]>, - Requires<[HasHardQuad]>; + [(set f128:$rd, (SPselectfcc f128:$rs2, f128:$f, imm:$cond))]>; } } @@ -1449,28 +1538,28 @@ let Predicates = [HasV9] in { def FMOVD : F3_3u<2, 0b110100, 0b000000010, (outs DFPRegs:$rd), (ins DFPRegs:$rs2), "fmovd $rs2, $rd", []>; + let Predicates = [HasV9, HasHardQuad] in def FMOVQ : F3_3u<2, 0b110100, 0b000000011, (outs QFPRegs:$rd), (ins QFPRegs:$rs2), - "fmovq $rs2, $rd", []>, - Requires<[HasHardQuad]>; + "fmovq $rs2, $rd", []>; def FNEGD : F3_3u<2, 0b110100, 0b000000110, (outs DFPRegs:$rd), (ins DFPRegs:$rs2), "fnegd $rs2, $rd", [(set f64:$rd, (fneg f64:$rs2))]>; + let Predicates = [HasV9, HasHardQuad] in def FNEGQ : F3_3u<2, 0b110100, 0b000000111, (outs QFPRegs:$rd), (ins QFPRegs:$rs2), "fnegq $rs2, $rd", - [(set f128:$rd, (fneg f128:$rs2))]>, - Requires<[HasHardQuad]>; + [(set f128:$rd, (fneg f128:$rs2))]>; def FABSD : F3_3u<2, 0b110100, 0b000001010, (outs DFPRegs:$rd), (ins DFPRegs:$rs2), "fabsd $rs2, $rd", [(set f64:$rd, (fabs f64:$rs2))]>; + let Predicates = [HasV9, HasHardQuad] in def FABSQ : F3_3u<2, 0b110100, 0b000001011, (outs QFPRegs:$rd), (ins QFPRegs:$rs2), "fabsq $rs2, $rd", - [(set f128:$rd, (fabs f128:$rs2))]>, - Requires<[HasHardQuad]>; + [(set f128:$rd, (fabs f128:$rs2))]>; } // Floating-point compare instruction with %fcc0-%fcc3. @@ -1517,11 +1606,11 @@ let Predicates = [HasV9] in { : F4_3<0b110101, 0b000010, (outs DFPRegs:$rd), (ins FCCRegs:$opf_cc, DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond), "fmovd$cond $opf_cc, $rs2, $rd", []>; + let Predicates = [HasV9, HasHardQuad] in def V9FMOVQ_FCC : F4_3<0b110101, 0b000011, (outs QFPRegs:$rd), (ins FCCRegs:$opf_cc, QFPRegs:$rs2, QFPRegs:$f, CCOp:$cond), - "fmovq$cond $opf_cc, $rs2, $rd", []>, - Requires<[HasHardQuad]>; + "fmovq$cond $opf_cc, $rs2, $rd", []>; } // Constraints = "$f = $rd", ... } // let Predicates = [hasV9] diff --git a/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.cpp b/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.cpp index 7c36c4ab865f..01db1f3747eb 100644 --- a/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.cpp @@ -11,3 +11,10 @@ using namespace llvm; void SparcMachineFunctionInfo::anchor() { } + +MachineFunctionInfo *SparcMachineFunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const { + return DestMF.cloneInfo(*this); +} diff --git a/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h b/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h index d557c8ea22e2..e1a1568d28a2 100644 --- a/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h +++ b/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h @@ -38,6 +38,11 @@ namespace llvm { : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0), IsLeafProc(false) {} + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; + Register getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; } diff --git a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp index 27c49a408a02..8bd51a703d47 100644 --- a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp @@ -55,7 +55,7 @@ static std::string computeDataLayout(const Triple &T, bool is64Bit) { } static Reloc::Model getEffectiveRelocModel(Optional RM) { - return RM.getValueOr(Reloc::Static); + return RM.value_or(Reloc::Static); } // Code models. Some only make sense for 64-bit code. @@ -102,7 +102,7 @@ SparcTargetMachine::SparcTargetMachine( initAsmInfo(); } -SparcTargetMachine::~SparcTargetMachine() {} +SparcTargetMachine::~SparcTargetMachine() = default; const SparcSubtarget * SparcTargetMachine::getSubtargetImpl(const Function &F) const { diff --git a/llvm/lib/Target/Sparc/SparcTargetObjectFile.h b/llvm/lib/Target/Sparc/SparcTargetObjectFile.h index f30ddc7b4955..28ab13918042 100644 --- a/llvm/lib/Target/Sparc/SparcTargetObjectFile.h +++ b/llvm/lib/Target/Sparc/SparcTargetObjectFile.h @@ -18,7 +18,7 @@ class TargetMachine; class SparcELFTargetObjectFile : public TargetLoweringObjectFileELF { public: - SparcELFTargetObjectFile() {} + SparcELFTargetObjectFile() = default; void Initialize(MCContext &Ctx, const TargetMachine &TM) override; diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 40ed417d0817..60e1b05a6d1a 100644 --- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCAsmParserExtension.h" @@ -1589,9 +1590,11 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, if (getParser().parseExpression(Expr)) return MatchOperand_NoMatch; - auto isOutOfRangeConstant = [&](const MCExpr *E) -> bool { + auto isOutOfRangeConstant = [&](const MCExpr *E, bool Negate) -> bool { if (auto *CE = dyn_cast(E)) { int64_t Value = CE->getValue(); + if (Negate) + Value = -Value; if ((Value & 1) || Value < MinVal || Value > MaxVal) return true; } @@ -1605,7 +1608,7 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, Error(StartLoc, "Expected PC-relative expression"); return MatchOperand_ParseFail; } - if (isOutOfRangeConstant(CE)) { + if (isOutOfRangeConstant(CE, false)) { Error(StartLoc, "offset out of range"); return MatchOperand_ParseFail; } @@ -1620,8 +1623,9 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, // For consistency with the GNU assembler, conservatively assume that a // constant offset must by itself be within the given size range. if (const auto *BE = dyn_cast(Expr)) - if (isOutOfRangeConstant(BE->getLHS()) || - isOutOfRangeConstant(BE->getRHS())) { + if (isOutOfRangeConstant(BE->getLHS(), false) || + isOutOfRangeConstant(BE->getRHS(), + BE->getOpcode() == MCBinaryExpr::Sub)) { Error(StartLoc, "offset out of range"); return MatchOperand_ParseFail; } diff --git a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index 5eba150dadc3..979141a1962a 100644 --- a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -9,8 +9,8 @@ #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "SystemZ.h" #include "TargetInfo/SystemZTargetInfo.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" @@ -73,10 +73,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZDisassembler() { static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, uint64_t Address, uint64_t Offset, uint64_t Width, MCInst &MI, - const void *Decoder) { - const MCDisassembler *Dis = static_cast(Decoder); - return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, - Offset, Width); + const MCDisassembler *Decoder) { + return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset, + Width, /*InstSize=*/0); } static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, @@ -91,79 +90,79 @@ static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs, 16); } static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs, 16); } static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16); } static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs, 16); } -static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16); } static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs, 16); } static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs, 16); } static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs, 16); } static DecodeStatus DecodeVR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, SystemZMC::VR32Regs, 32); } static DecodeStatus DecodeVR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, SystemZMC::VR64Regs, 32); } static DecodeStatus DecodeVR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, SystemZMC::VR128Regs, 32); } static DecodeStatus DecodeAR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, SystemZMC::AR32Regs, 16); } static DecodeStatus DecodeCR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeRegisterClass(Inst, RegNo, SystemZMC::CR64Regs, 16); } @@ -184,70 +183,81 @@ static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) { } static DecodeStatus decodeU1ImmOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeUImmOperand<1>(Inst, Imm); } static DecodeStatus decodeU2ImmOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeUImmOperand<2>(Inst, Imm); } static DecodeStatus decodeU3ImmOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeUImmOperand<3>(Inst, Imm); } static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeUImmOperand<4>(Inst, Imm); } static DecodeStatus decodeU6ImmOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeUImmOperand<6>(Inst, Imm); } static DecodeStatus decodeU8ImmOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeUImmOperand<8>(Inst, Imm); } static DecodeStatus decodeU12ImmOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeUImmOperand<12>(Inst, Imm); } static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeUImmOperand<16>(Inst, Imm); } static DecodeStatus decodeU32ImmOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeUImmOperand<32>(Inst, Imm); } static DecodeStatus decodeS8ImmOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeSImmOperand<8>(Inst, Imm); } static DecodeStatus decodeS16ImmOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeSImmOperand<16>(Inst, Imm); } static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return decodeSImmOperand<32>(Inst, Imm); } -template +template static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, - bool isBranch, - const void *Decoder) { + uint64_t Address, bool isBranch, + const MCDisassembler *Decoder) { assert(isUInt(Imm) && "Invalid PC-relative offset"); uint64_t Value = SignExtend64(Imm) * 2 + Address; @@ -260,31 +270,31 @@ static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm, static DecodeStatus decodePC12DBLBranchOperand(MCInst &Inst, uint64_t Imm, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodePCDBLOperand<12>(Inst, Imm, Address, true, Decoder); } static DecodeStatus decodePC16DBLBranchOperand(MCInst &Inst, uint64_t Imm, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodePCDBLOperand<16>(Inst, Imm, Address, true, Decoder); } static DecodeStatus decodePC24DBLBranchOperand(MCInst &Inst, uint64_t Imm, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodePCDBLOperand<24>(Inst, Imm, Address, true, Decoder); } static DecodeStatus decodePC32DBLBranchOperand(MCInst &Inst, uint64_t Imm, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodePCDBLOperand<32>(Inst, Imm, Address, true, Decoder); } static DecodeStatus decodePC32DBLOperand(MCInst &Inst, uint64_t Imm, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodePCDBLOperand<32>(Inst, Imm, Address, false, Decoder); } @@ -382,64 +392,61 @@ static DecodeStatus decodeBDVAddr12Operand(MCInst &Inst, uint64_t Field, static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR32Regs); } static DecodeStatus decodeBDAddr32Disp20Operand(MCInst &Inst, uint64_t Field, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR32Regs); } static DecodeStatus decodeBDAddr64Disp12Operand(MCInst &Inst, uint64_t Field, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR64Regs); } static DecodeStatus decodeBDAddr64Disp20Operand(MCInst &Inst, uint64_t Field, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR64Regs); } -static DecodeStatus decodeBDXAddr64Disp12Operand(MCInst &Inst, uint64_t Field, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +decodeBDXAddr64Disp12Operand(MCInst &Inst, uint64_t Field, uint64_t Address, + const MCDisassembler *Decoder) { return decodeBDXAddr12Operand(Inst, Field, SystemZMC::GR64Regs); } -static DecodeStatus decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field, uint64_t Address, + const MCDisassembler *Decoder) { return decodeBDXAddr20Operand(Inst, Field, SystemZMC::GR64Regs); } -static DecodeStatus decodeBDLAddr64Disp12Len4Operand(MCInst &Inst, - uint64_t Field, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +decodeBDLAddr64Disp12Len4Operand(MCInst &Inst, uint64_t Field, uint64_t Address, + const MCDisassembler *Decoder) { return decodeBDLAddr12Len4Operand(Inst, Field, SystemZMC::GR64Regs); } -static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst, - uint64_t Field, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +decodeBDLAddr64Disp12Len8Operand(MCInst &Inst, uint64_t Field, uint64_t Address, + const MCDisassembler *Decoder) { return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs); } -static DecodeStatus decodeBDRAddr64Disp12Operand(MCInst &Inst, - uint64_t Field, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +decodeBDRAddr64Disp12Operand(MCInst &Inst, uint64_t Field, uint64_t Address, + const MCDisassembler *Decoder) { return decodeBDRAddr12Operand(Inst, Field, SystemZMC::GR64Regs); } -static DecodeStatus decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field, uint64_t Address, + const MCDisassembler *Decoder) { return decodeBDVAddr12Operand(Inst, Field, SystemZMC::GR64Regs); } diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index c83796b8579b..242f566da2c9 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -37,6 +37,8 @@ class SystemZMCCodeEmitter : public MCCodeEmitter { const MCInstrInfo &MCII; MCContext &Ctx; + mutable unsigned MemOpsEmitted; + public: SystemZMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : MCII(mcii), Ctx(ctx) { @@ -165,6 +167,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, verifyInstructionPredicates(MI, computeAvailableFeatures(STI.getFeatureBits())); + MemOpsEmitted = 0; uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); unsigned Size = MCII.get(MI.getOpcode()).getSize(); // Big-endian insertion of Size bytes. @@ -191,12 +194,14 @@ getDispOpValue(const MCInst &MI, unsigned OpNum, SmallVectorImpl &Fixups, SystemZ::FixupKind Kind) const { const MCOperand &MO = MI.getOperand(OpNum); - if (MO.isImm()) + if (MO.isImm()) { + ++MemOpsEmitted; return static_cast(MO.getImm()); + } if (MO.isExpr()) { // All instructions follow the pattern where the first displacement has a // 2 bytes offset, and the second one 4 bytes. - unsigned ByteOffs = Fixups.size() == 0 ? 2 : 4; + unsigned ByteOffs = MemOpsEmitted++ == 0 ? 2 : 4; Fixups.push_back(MCFixup::create(ByteOffs, MO.getExpr(), (MCFixupKind)Kind, MI.getLoc())); assert(Fixups.size() <= 2 && "More than two memory operands in MI?"); @@ -328,7 +333,6 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, #include "SystemZGenMCCodeEmitter.inc" MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new SystemZMCCodeEmitter(MCII, Ctx); } diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index c7b73fd3b805..03141ecf551d 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -13,6 +13,7 @@ #include "TargetInfo/SystemZTargetInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" @@ -193,7 +194,7 @@ void SystemZTargetStreamer::emitConstantPools() { return; // Switch to the .text section. const MCObjectFileInfo &OFI = *Streamer.getContext().getObjectFileInfo(); - Streamer.SwitchSection(OFI.getTextSection()); + Streamer.switchSection(OFI.getTextSection()); for (auto &I : EXRLTargets2Sym) { Streamer.emitLabel(I.second); const MCInstSTIPair &MCI_STI = I.first; diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index e76fa03af3bf..db4485423416 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -78,7 +78,6 @@ inline unsigned getRegAsVR128(unsigned Reg) { } // end namespace SystemZMC MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCAsmBackend *createSystemZMCAsmBackend(const Target &T, diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index e01adcce04ab..6fb080607f51 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -88,13 +88,19 @@ static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) { // an instruction with the corresponding hint set. static void lowerAlignmentHint(const MachineInstr *MI, MCInst &LoweredMI, unsigned Opcode) { - if (!MI->hasOneMemOperand()) + if (MI->memoperands_empty()) return; - const MachineMemOperand *MMO = *MI->memoperands_begin(); + + Align Alignment = Align(16); + for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(), + EE = MI->memoperands_end(); MMOI != EE; ++MMOI) + if ((*MMOI)->getAlign() < Alignment) + Alignment = (*MMOI)->getAlign(); + unsigned AlignmentHint = 0; - if (MMO->getAlign() >= Align(16)) + if (Alignment >= Align(16)) AlignmentHint = 4; - else if (MMO->getAlign() >= Align(8)) + else if (Alignment >= Align(8)) AlignmentHint = 3; if (AlignmentHint == 0) return; @@ -124,17 +130,32 @@ static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) { .addImm(0); } +// The XPLINK ABI requires that a no-op encoding the call type is emitted after +// each call to a subroutine. This information can be used by the called +// function to determine its entry point, e.g. for generating a backtrace. The +// call type is encoded as a register number in the bcr instruction. See +// enumeration CallType for the possible values. +void SystemZAsmPrinter::emitCallInformation(CallType CT) { + EmitToStreamer(*OutStreamer, + MCInstBuilder(SystemZ::BCRAsm) + .addImm(0) + .addReg(SystemZMC::GR64Regs[static_cast(CT)])); +} + void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) { SystemZMCInstLower Lower(MF->getContext(), *this); - const SystemZSubtarget *Subtarget = &MF->getSubtarget(); MCInst LoweredMI; switch (MI->getOpcode()) { case SystemZ::Return: - if (Subtarget->isTargetXPLINK64()) - LoweredMI = - MCInstBuilder(SystemZ::B).addReg(SystemZ::R7D).addImm(2).addReg(0); - else - LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R14D); + LoweredMI = MCInstBuilder(SystemZ::BR) + .addReg(SystemZ::R14D); + break; + + case SystemZ::Return_XPLINK: + LoweredMI = MCInstBuilder(SystemZ::B) + .addReg(SystemZ::R7D) + .addImm(2) + .addReg(0); break; case SystemZ::CondReturn: @@ -144,6 +165,15 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) { .addReg(SystemZ::R14D); break; + case SystemZ::CondReturn_XPLINK: + LoweredMI = MCInstBuilder(SystemZ::BC) + .addImm(MI->getOperand(0).getImm()) + .addImm(MI->getOperand(1).getImm()) + .addReg(SystemZ::R7D) + .addImm(2) + .addReg(0); + break; + case SystemZ::CRBReturn: LoweredMI = MCInstBuilder(SystemZ::CRB) .addReg(MI->getOperand(0).getReg()) @@ -222,18 +252,21 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) { .addReg(SystemZ::R7D) .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT))); - EmitToStreamer( - *OutStreamer, - MCInstBuilder(SystemZ::BCRAsm).addImm(0).addReg(SystemZ::R3D)); + emitCallInformation(CallType::BRASL7); return; case SystemZ::CallBASR_XPLINK64: EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::BASR) .addReg(SystemZ::R7D) .addReg(MI->getOperand(0).getReg())); - EmitToStreamer( - *OutStreamer, - MCInstBuilder(SystemZ::BCRAsm).addImm(0).addReg(SystemZ::R0D)); + emitCallInformation(CallType::BASR76); + return; + + case SystemZ::CallBASR_STACKEXT: + EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::BASR) + .addReg(SystemZ::R3D) + .addReg(MI->getOperand(0).getReg())); + emitCallInformation(CallType::BASR33); return; case SystemZ::CallBRASL: @@ -608,11 +641,11 @@ void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, MCContext &Ctx = MF->getContext(); if (MF->getFunction().hasFnAttribute("mrecord-mcount")) { MCSymbol *DotSym = OutContext.createTempSymbol(); - OutStreamer->PushSection(); - OutStreamer->SwitchSection( + OutStreamer->pushSection(); + OutStreamer->switchSection( Ctx.getELFSection("__mcount_loc", ELF::SHT_PROGBITS, ELF::SHF_ALLOC)); OutStreamer->emitSymbolValue(DotSym, 8); - OutStreamer->PopSection(); + OutStreamer->popSection(); OutStreamer->emitLabel(DotSym); } @@ -630,8 +663,7 @@ void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, } void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { - const SystemZInstrInfo *TII = - static_cast(MF->getSubtarget().getInstrInfo()); + auto *TII = MF->getSubtarget().getInstrInfo(); unsigned NumNOPBytes = MI.getOperand(1).getImm(); @@ -786,13 +818,253 @@ void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) { emitStackMaps(SM); } +void SystemZAsmPrinter::emitFunctionBodyEnd() { + if (TM.getTargetTriple().isOSzOS()) { + // Emit symbol for the end of function if the z/OS target streamer + // is used. This is needed to calculate the size of the function. + MCSymbol *FnEndSym = createTempSymbol("func_end"); + OutStreamer->emitLabel(FnEndSym); + + OutStreamer->pushSection(); + OutStreamer->switchSection(getObjFileLowering().getPPA1Section()); + emitPPA1(FnEndSym); + OutStreamer->popSection(); + + CurrentFnPPA1Sym = nullptr; + CurrentFnEPMarkerSym = nullptr; + } +} + +static void emitPPA1Flags(std::unique_ptr &OutStreamer, bool VarArg, + bool StackProtector, bool FPRMask, bool VRMask) { + enum class PPA1Flag1 : uint8_t { + DSA64Bit = (0x80 >> 0), + VarArg = (0x80 >> 7), + LLVM_MARK_AS_BITMASK_ENUM(DSA64Bit) + }; + enum class PPA1Flag2 : uint8_t { + ExternalProcedure = (0x80 >> 0), + STACKPROTECTOR = (0x80 >> 3), + LLVM_MARK_AS_BITMASK_ENUM(ExternalProcedure) + }; + enum class PPA1Flag3 : uint8_t { + FPRMask = (0x80 >> 2), + LLVM_MARK_AS_BITMASK_ENUM(FPRMask) + }; + enum class PPA1Flag4 : uint8_t { + EPMOffsetPresent = (0x80 >> 0), + VRMask = (0x80 >> 2), + ProcedureNamePresent = (0x80 >> 7), + LLVM_MARK_AS_BITMASK_ENUM(EPMOffsetPresent) + }; + + // Declare optional section flags that can be modified. + auto Flags1 = PPA1Flag1(0); + auto Flags2 = PPA1Flag2::ExternalProcedure; + auto Flags3 = PPA1Flag3(0); + auto Flags4 = PPA1Flag4::EPMOffsetPresent | PPA1Flag4::ProcedureNamePresent; + + Flags1 |= PPA1Flag1::DSA64Bit; + + if (VarArg) + Flags1 |= PPA1Flag1::VarArg; + + if (StackProtector) + Flags2 |= PPA1Flag2::STACKPROTECTOR; + + // SavedGPRMask, SavedFPRMask, and SavedVRMask are precomputed in. + if (FPRMask) + Flags3 |= PPA1Flag3::FPRMask; // Add emit FPR mask flag. + + if (VRMask) + Flags4 |= PPA1Flag4::VRMask; // Add emit VR mask flag. + + OutStreamer->AddComment("PPA1 Flags 1"); + if ((Flags1 & PPA1Flag1::DSA64Bit) == PPA1Flag1::DSA64Bit) + OutStreamer->AddComment(" Bit 0: 1 = 64-bit DSA"); + else + OutStreamer->AddComment(" Bit 0: 0 = 32-bit DSA"); + if ((Flags1 & PPA1Flag1::VarArg) == PPA1Flag1::VarArg) + OutStreamer->AddComment(" Bit 7: 1 = Vararg function"); + OutStreamer->emitInt8(static_cast(Flags1)); // Flags 1. + + OutStreamer->AddComment("PPA1 Flags 2"); + if ((Flags2 & PPA1Flag2::ExternalProcedure) == PPA1Flag2::ExternalProcedure) + OutStreamer->AddComment(" Bit 0: 1 = External procedure"); + if ((Flags2 & PPA1Flag2::STACKPROTECTOR) == PPA1Flag2::STACKPROTECTOR) + OutStreamer->AddComment(" Bit 3: 1 = STACKPROTECT is enabled"); + else + OutStreamer->AddComment(" Bit 3: 0 = STACKPROTECT is not enabled"); + OutStreamer->emitInt8(static_cast(Flags2)); // Flags 2. + + OutStreamer->AddComment("PPA1 Flags 3"); + if ((Flags3 & PPA1Flag3::FPRMask) == PPA1Flag3::FPRMask) + OutStreamer->AddComment(" Bit 2: 1 = FP Reg Mask is in optional area"); + OutStreamer->emitInt8( + static_cast(Flags3)); // Flags 3 (optional sections). + + OutStreamer->AddComment("PPA1 Flags 4"); + if ((Flags4 & PPA1Flag4::VRMask) == PPA1Flag4::VRMask) + OutStreamer->AddComment(" Bit 2: 1 = Vector Reg Mask is in optional area"); + OutStreamer->emitInt8(static_cast( + Flags4)); // Flags 4 (optional sections, always emit these). +} + +void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) { + const TargetRegisterInfo *TRI = MF->getRegInfo().getTargetRegisterInfo(); + const SystemZSubtarget &Subtarget = MF->getSubtarget(); + const auto TargetHasVector = Subtarget.hasVector(); + + const SystemZMachineFunctionInfo *ZFI = + MF->getInfo(); + const auto *ZFL = static_cast( + Subtarget.getFrameLowering()); + const MachineFrameInfo &MFFrame = MF->getFrameInfo(); + + // Get saved GPR/FPR/VPR masks. + const std::vector &CSI = MFFrame.getCalleeSavedInfo(); + uint16_t SavedGPRMask = 0; + uint16_t SavedFPRMask = 0; + uint8_t SavedVRMask = 0; + int64_t OffsetFPR = 0; + int64_t OffsetVR = 0; + const int64_t TopOfStack = + MFFrame.getOffsetAdjustment() + MFFrame.getStackSize(); + + // Loop over the spilled registers. The CalleeSavedInfo can't be used because + // it does not contain all spilled registers. + for (unsigned I = ZFI->getSpillGPRRegs().LowGPR, + E = ZFI->getSpillGPRRegs().HighGPR; + I && E && I <= E; ++I) { + unsigned V = TRI->getEncodingValue((Register)I); + assert(V < 16 && "GPR index out of range"); + SavedGPRMask |= 1 << (15 - V); + } + + for (auto &CS : CSI) { + unsigned Reg = CS.getReg(); + unsigned I = TRI->getEncodingValue(Reg); + + if (SystemZ::FP64BitRegClass.contains(Reg)) { + assert(I < 16 && "FPR index out of range"); + SavedFPRMask |= 1 << (15 - I); + int64_t Temp = MFFrame.getObjectOffset(CS.getFrameIdx()); + if (Temp < OffsetFPR) + OffsetFPR = Temp; + } else if (SystemZ::VR128BitRegClass.contains(Reg)) { + assert(I >= 16 && I <= 23 && "VPR index out of range"); + unsigned BitNum = I - 16; + SavedVRMask |= 1 << (7 - BitNum); + int64_t Temp = MFFrame.getObjectOffset(CS.getFrameIdx()); + if (Temp < OffsetVR) + OffsetVR = Temp; + } + } + + // Adjust the offset. + OffsetFPR += (OffsetFPR < 0) ? TopOfStack : 0; + OffsetVR += (OffsetVR < 0) ? TopOfStack : 0; + + // Get alloca register. + uint8_t FrameReg = TRI->getEncodingValue(TRI->getFrameRegister(*MF)); + uint8_t AllocaReg = ZFL->hasFP(*MF) ? FrameReg : 0; + assert(AllocaReg < 16 && "Can't have alloca register larger than 15"); + (void)AllocaReg; + + // Build FPR save area offset. + uint32_t FrameAndFPROffset = 0; + if (SavedFPRMask) { + uint64_t FPRSaveAreaOffset = OffsetFPR; + assert(FPRSaveAreaOffset < 0x10000000 && "Offset out of range"); + + FrameAndFPROffset = FPRSaveAreaOffset & 0x0FFFFFFF; // Lose top 4 bits. + FrameAndFPROffset |= FrameReg << 28; // Put into top 4 bits. + } + + // Build VR save area offset. + uint32_t FrameAndVROffset = 0; + if (TargetHasVector && SavedVRMask) { + uint64_t VRSaveAreaOffset = OffsetVR; + assert(VRSaveAreaOffset < 0x10000000 && "Offset out of range"); + + FrameAndVROffset = VRSaveAreaOffset & 0x0FFFFFFF; // Lose top 4 bits. + FrameAndVROffset |= FrameReg << 28; // Put into top 4 bits. + } + + // Emit PPA1 section. + OutStreamer->AddComment("PPA1"); + OutStreamer->emitLabel(CurrentFnPPA1Sym); + OutStreamer->AddComment("Version"); + OutStreamer->emitInt8(0x02); // Version. + OutStreamer->AddComment("LE Signature X'CE'"); + OutStreamer->emitInt8(0xCE); // CEL signature. + OutStreamer->AddComment("Saved GPR Mask"); + OutStreamer->emitInt16(SavedGPRMask); + + emitPPA1Flags(OutStreamer, MF->getFunction().isVarArg(), + MFFrame.hasStackProtectorIndex(), SavedFPRMask != 0, + TargetHasVector && SavedVRMask != 0); + + OutStreamer->AddComment("Length/4 of Parms"); + OutStreamer->emitInt16( + static_cast(MFFrame.getMaxCallFrameSize() / 4)); // Parms/4. + OutStreamer->AddComment("Length of Code"); + OutStreamer->emitAbsoluteSymbolDiff(FnEndSym, CurrentFnEPMarkerSym, 4); + + // Emit saved FPR mask and offset to FPR save area (0x20 of flags 3). + if (SavedFPRMask) { + OutStreamer->AddComment("FPR mask"); + OutStreamer->emitInt16(SavedFPRMask); + OutStreamer->AddComment("AR mask"); + OutStreamer->emitInt16(0); // AR Mask, unused currently. + OutStreamer->AddComment("FPR Save Area Locator"); + OutStreamer->AddComment(Twine(" Bit 0-3: Register R") + .concat(utostr(FrameAndFPROffset >> 28)) + .str()); + OutStreamer->AddComment(Twine(" Bit 4-31: Offset ") + .concat(utostr(FrameAndFPROffset & 0x0FFFFFFF)) + .str()); + OutStreamer->emitInt32(FrameAndFPROffset); // Offset to FPR save area with + // register to add value to + // (alloca reg). + } + + // Emit saved VR mask to VR save area. + if (TargetHasVector && SavedVRMask) { + OutStreamer->AddComment("VR mask"); + OutStreamer->emitInt8(SavedVRMask); + OutStreamer->emitInt8(0); // Reserved. + OutStreamer->emitInt16(0); // Also reserved. + OutStreamer->AddComment("VR Save Area Locator"); + OutStreamer->AddComment(Twine(" Bit 0-3: Register R") + .concat(utostr(FrameAndVROffset >> 28)) + .str()); + OutStreamer->AddComment(Twine(" Bit 4-31: Offset ") + .concat(utostr(FrameAndVROffset & 0x0FFFFFFF)) + .str()); + OutStreamer->emitInt32(FrameAndVROffset); + } + + // Emit offset to entry point optional section (0x80 of flags 4). + OutStreamer->emitAbsoluteSymbolDiff(CurrentFnEPMarkerSym, CurrentFnPPA1Sym, + 4); +} + void SystemZAsmPrinter::emitFunctionEntryLabel() { - const SystemZSubtarget &Subtarget = - static_cast(MF->getSubtarget()); + const SystemZSubtarget &Subtarget = MF->getSubtarget(); if (Subtarget.getTargetTriple().isOSzOS()) { MCContext &OutContext = OutStreamer->getContext(); - MCSymbol *EPMarkerSym = OutContext.createTempSymbol("CM_", true); + + // Save information for later use. + std::string N(MF->getFunction().hasName() + ? Twine(MF->getFunction().getName()).concat("_").str() + : ""); + + CurrentFnEPMarkerSym = + OutContext.createTempSymbol(Twine("EPM_").concat(N).str(), true); + CurrentFnPPA1Sym = + OutContext.createTempSymbol(Twine("PPA1_").concat(N).str(), true); // EntryPoint Marker const MachineFrameInfo &MFFrame = MF->getFrameInfo(); @@ -811,11 +1083,14 @@ void SystemZAsmPrinter::emitFunctionEntryLabel() { // Emit entry point marker section. OutStreamer->AddComment("XPLINK Routine Layout Entry"); - OutStreamer->emitLabel(EPMarkerSym); + OutStreamer->emitLabel(CurrentFnEPMarkerSym); OutStreamer->AddComment("Eyecatcher 0x00C300C500C500"); OutStreamer->emitIntValueInHex(0x00C300C500C500, 7); // Eyecatcher. OutStreamer->AddComment("Mark Type C'1'"); OutStreamer->emitInt8(0xF1); // Mark Type. + OutStreamer->AddComment("Offset to PPA1"); + OutStreamer->emitAbsoluteSymbolDiff(CurrentFnPPA1Sym, CurrentFnEPMarkerSym, + 4); if (OutStreamer->isVerboseAsm()) { OutStreamer->AddComment("DSA Size 0x" + Twine::utohexstr(DSASize)); OutStreamer->AddComment("Entry Flags"); diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h index 80d68d1b93ff..f14b4a184f62 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h @@ -26,6 +26,8 @@ class raw_ostream; class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter { private: StackMaps SM; + MCSymbol *CurrentFnPPA1Sym; // PPA1 Symbol. + MCSymbol *CurrentFnEPMarkerSym; // Entry Point Marker. SystemZTargetStreamer *getTargetStreamer() { MCTargetStreamer *TS = OutStreamer->getTargetStreamer(); @@ -33,9 +35,24 @@ private: return static_cast(TS); } + /// Call type information for XPLINK. + enum class CallType { + BASR76 = 0, // b'x000' == BASR r7,r6 + BRAS7 = 1, // b'x001' == BRAS r7,ep + RESVD_2 = 2, // b'x010' + BRASL7 = 3, // b'x011' == BRASL r7,ep + RESVD_4 = 4, // b'x100' + RESVD_5 = 5, // b'x101' + BALR1415 = 6, // b'x110' == BALR r14,r15 + BASR33 = 7, // b'x111' == BASR r3,r3 + }; + + void emitPPA1(MCSymbol *FnEndSym); + public: SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) - : AsmPrinter(TM, std::move(Streamer)), SM(*this) {} + : AsmPrinter(TM, std::move(Streamer)), SM(*this), + CurrentFnPPA1Sym(nullptr), CurrentFnEPMarkerSym(nullptr) {} // Override AsmPrinter. StringRef getPassName() const override { return "SystemZ Assembly Printer"; } @@ -52,8 +69,10 @@ public: return AsmPrinter::doInitialization(M); } void emitFunctionEntryLabel() override; + void emitFunctionBodyEnd() override; private: + void emitCallInformation(CallType CT); void LowerFENTRY_CALL(const MachineInstr &MI, SystemZMCInstLower &MCIL); void LowerSTACKMAP(const MachineInstr &MI); void LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower); diff --git a/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp index 763aa8c0e41f..9fc6765dbbf7 100644 --- a/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp +++ b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp @@ -100,7 +100,7 @@ bool SystemZCopyPhysRegs::visitMBB(MachineBasicBlock &MBB) { } bool SystemZCopyPhysRegs::runOnMachineFunction(MachineFunction &F) { - TII = static_cast(F.getSubtarget().getInstrInfo()); + TII = F.getSubtarget().getInstrInfo(); MRI = &F.getRegInfo(); bool Modified = false; diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp index 4893acc81335..340dba1362af 100644 --- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -224,7 +224,7 @@ bool SystemZElimCompare::convertToBRCT( // The transformation is OK. Rebuild Branch as a BRCT(G) or BRCTH. MachineOperand Target(Branch->getOperand(2)); while (Branch->getNumOperands()) - Branch->RemoveOperand(0); + Branch->removeOperand(0); Branch->setDesc(TII->get(BRCT)); MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); MIB.add(MI.getOperand(0)).add(MI.getOperand(1)).add(Target); @@ -267,7 +267,7 @@ bool SystemZElimCompare::convertToLoadAndTrap( // The transformation is OK. Rebuild Branch as a load-and-trap. while (Branch->getNumOperands()) - Branch->RemoveOperand(0); + Branch->removeOperand(0); Branch->setDesc(TII->get(LATOpcode)); MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) .add(MI.getOperand(0)) @@ -649,16 +649,16 @@ bool SystemZElimCompare::fuseCompareOperations( // Clear out all current operands. int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI); assert(CCUse >= 0 && "BRC/BCR must use CC"); - Branch->RemoveOperand(CCUse); + Branch->removeOperand(CCUse); // Remove regmask (sibcall). if (Type == SystemZII::CompareAndSibcall) - Branch->RemoveOperand(3); + Branch->removeOperand(3); // Remove target (branch or sibcall). if (Type == SystemZII::CompareAndBranch || Type == SystemZII::CompareAndSibcall) - Branch->RemoveOperand(2); - Branch->RemoveOperand(1); - Branch->RemoveOperand(0); + Branch->removeOperand(2); + Branch->removeOperand(1); + Branch->removeOperand(0); // Rebuild Branch as a fused compare and branch. // SrcNOps is the number of MI operands of the compare instruction @@ -735,7 +735,7 @@ bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { if (skipFunction(F.getFunction())) return false; - TII = static_cast(F.getSubtarget().getInstrInfo()); + TII = F.getSubtarget().getInstrInfo(); TRI = &TII->getRegisterInfo(); bool Changed = false; diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 610627e7e3f0..43bc7426cfa8 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -13,6 +13,7 @@ #include "SystemZMachineFunctionInfo.h" #include "SystemZRegisterInfo.h" #include "SystemZSubtarget.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" @@ -95,8 +96,7 @@ typedef std::vector SZFrameObjVec; void SystemZELFFrameLowering::orderFrameObjects( const MachineFunction &MF, SmallVectorImpl &ObjectsToAllocate) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); - const SystemZInstrInfo *TII = - static_cast(MF.getSubtarget().getInstrInfo()); + auto *TII = MF.getSubtarget().getInstrInfo(); // Make a vector of sorting objects to track all MFI objects and mark those // to be sorted as valid. @@ -1153,12 +1153,6 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF, MFFrame.setStackSize(MFFrame.getStackSize() + Regs.getCallFrameSize()); uint64_t StackSize = MFFrame.getStackSize(); - // FIXME: Implement support for large stack sizes, when the stack extension - // routine needs to be called. - if (StackSize > 1024 * 1024) { - llvm_unreachable("Huge Stack Frame not yet supported on z/OS"); - } - if (ZFI->getSpillGPRRegs().LowGPR) { // Skip over the GPR saves. if ((MBBI != MBB.end()) && ((MBBI->getOpcode() == SystemZ::STMG))) { @@ -1201,6 +1195,18 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF, emitIncrement(MBB, InsertPt, DL, Regs.getStackPointerRegister(), Delta, ZII); + + // If the requested stack size is larger than the guard page, then we need + // to check if we need to call the stack extender. This requires adding a + // conditional branch, but splitting the prologue block is not possible at + // this point since it would invalidate the SaveBlocks / RestoreBlocks sets + // of PEI in the single block function case. Build a pseudo to be handled + // later by inlineStackProbe(). + const uint64_t GuardPageSize = 1024 * 1024; + if (StackSize > GuardPageSize) { + assert(StoreInstr && "Wrong insertion point"); + BuildMI(MBB, InsertPt, DL, ZII->get(SystemZ::XPLINK_STACKALLOC)); + } } if (HasFP) { @@ -1239,6 +1245,74 @@ void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF, } } +// Emit a compare of the stack pointer against the stack floor, and a call to +// the LE stack extender if needed. +void SystemZXPLINKFrameLowering::inlineStackProbe( + MachineFunction &MF, MachineBasicBlock &PrologMBB) const { + auto *ZII = + static_cast(MF.getSubtarget().getInstrInfo()); + + MachineInstr *StackAllocMI = nullptr; + for (MachineInstr &MI : PrologMBB) + if (MI.getOpcode() == SystemZ::XPLINK_STACKALLOC) { + StackAllocMI = &MI; + break; + } + if (StackAllocMI == nullptr) + return; + + MachineBasicBlock &MBB = PrologMBB; + const DebugLoc DL = StackAllocMI->getDebugLoc(); + + // The 2nd half of block MBB after split. + MachineBasicBlock *NextMBB; + + // Add new basic block for the call to the stack overflow function. + MachineBasicBlock *StackExtMBB = + MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + MF.push_back(StackExtMBB); + + // LG r3,72(,r3) + BuildMI(StackExtMBB, DL, ZII->get(SystemZ::LG), SystemZ::R3D) + .addReg(SystemZ::R3D) + .addImm(72) + .addReg(0); + // BASR r3,r3 + BuildMI(StackExtMBB, DL, ZII->get(SystemZ::CallBASR_STACKEXT)) + .addReg(SystemZ::R3D); + + // LLGT r3,1208 + BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::LLGT), SystemZ::R3D) + .addReg(0) + .addImm(1208) + .addReg(0); + // CG r4,64(,r3) + BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::CG)) + .addReg(SystemZ::R4D) + .addReg(SystemZ::R3D) + .addImm(64) + .addReg(0); + // JLL b'0100',F'37' + BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP) + .addImm(SystemZ::CCMASK_CMP_LT) + .addMBB(StackExtMBB); + + NextMBB = SystemZ::splitBlockBefore(StackAllocMI, &MBB); + MBB.addSuccessor(NextMBB); + MBB.addSuccessor(StackExtMBB); + + // Add jump back from stack extension BB. + BuildMI(StackExtMBB, DL, ZII->get(SystemZ::J)).addMBB(NextMBB); + StackExtMBB->addSuccessor(NextMBB); + + StackAllocMI->eraseFromParent(); + + // Compute the live-in lists for the new blocks. + recomputeLiveIns(*NextMBB); + recomputeLiveIns(*StackExtMBB); +} + bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const { return (MF.getFrameInfo().hasVarSizedObjects()); } diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h index 2b3d7efed53b..bec83a9457e0 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -127,6 +127,9 @@ public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologMBB) const override; + bool hasFP(const MachineFunction &MF) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index cf55318d328d..9ac7eafd5f34 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -968,7 +968,7 @@ bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND && RISBG.Input.getOpcode() != ISD::TRUNCATE) Count += 1; - if (Count == 0) + if (Count == 0 || isa(RISBG.Input)) return false; // Prefer to use normal shift instructions over RISBG, since they can handle @@ -1472,7 +1472,7 @@ bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const { assert(MMO && "Expected a memory operand."); // The memory access must have a proper alignment and no index register. - if (MemAccess->getAlignment() < StoreSize || + if (MemAccess->getAlign().value() < StoreSize || !MemAccess->getOffset().isUndef()) return false; @@ -1683,16 +1683,19 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, llvm_unreachable("Unexpected asm memory constraint"); case InlineAsm::Constraint_i: case InlineAsm::Constraint_Q: + case InlineAsm::Constraint_ZQ: // Accept an address with a short displacement, but no index. Form = SystemZAddressingMode::FormBD; DispRange = SystemZAddressingMode::Disp12Only; break; case InlineAsm::Constraint_R: + case InlineAsm::Constraint_ZR: // Accept an address with a short displacement and an index. Form = SystemZAddressingMode::FormBDXNormal; DispRange = SystemZAddressingMode::Disp12Only; break; case InlineAsm::Constraint_S: + case InlineAsm::Constraint_ZS: // Accept an address with a long displacement, but no index. Form = SystemZAddressingMode::FormBD; DispRange = SystemZAddressingMode::Disp20Only; @@ -1700,6 +1703,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, case InlineAsm::Constraint_T: case InlineAsm::Constraint_m: case InlineAsm::Constraint_o: + case InlineAsm::Constraint_p: + case InlineAsm::Constraint_ZT: // Accept an address with a long displacement and an index. // m works the same as T, as this is the most general case. // We don't really have any special handling of "offsettable" diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index f10651d5c5d7..42c1c77f14e4 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -80,7 +80,7 @@ static MachineOperand earlyUseOperand(MachineOperand Op) { SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { - MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0)); + MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); auto *Regs = STI.getSpecialRegisters(); @@ -471,6 +471,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); + // Special treatment. + setOperationAction(ISD::IS_FPCLASS, VT, Custom); + // Handle constrained floating-point operations. setOperationAction(ISD::STRICT_FADD, VT, Legal); setOperationAction(ISD::STRICT_FSUB, VT, Legal); @@ -640,33 +643,33 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VAEND, MVT::Other, Expand); // Codes for which we want to perform some z-specific combinations. - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); - setTargetDAGCombine(ISD::LOAD); - setTargetDAGCombine(ISD::STORE); - setTargetDAGCombine(ISD::VECTOR_SHUFFLE); - setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); - setTargetDAGCombine(ISD::FP_ROUND); - setTargetDAGCombine(ISD::STRICT_FP_ROUND); - setTargetDAGCombine(ISD::FP_EXTEND); - setTargetDAGCombine(ISD::SINT_TO_FP); - setTargetDAGCombine(ISD::UINT_TO_FP); - setTargetDAGCombine(ISD::STRICT_FP_EXTEND); - setTargetDAGCombine(ISD::BSWAP); - setTargetDAGCombine(ISD::SDIV); - setTargetDAGCombine(ISD::UDIV); - setTargetDAGCombine(ISD::SREM); - setTargetDAGCombine(ISD::UREM); - setTargetDAGCombine(ISD::INTRINSIC_VOID); - setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); + setTargetDAGCombine({ISD::ZERO_EXTEND, + ISD::SIGN_EXTEND, + ISD::SIGN_EXTEND_INREG, + ISD::LOAD, + ISD::STORE, + ISD::VECTOR_SHUFFLE, + ISD::EXTRACT_VECTOR_ELT, + ISD::FP_ROUND, + ISD::STRICT_FP_ROUND, + ISD::FP_EXTEND, + ISD::SINT_TO_FP, + ISD::UINT_TO_FP, + ISD::STRICT_FP_EXTEND, + ISD::BSWAP, + ISD::SDIV, + ISD::UDIV, + ISD::SREM, + ISD::UREM, + ISD::INTRINSIC_VOID, + ISD::INTRINSIC_W_CHAIN}); // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); // We want to use MVC in preference to even a single load/store pair. - MaxStoresPerMemcpy = 0; + MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0; MaxStoresPerMemcpyOptSize = 0; // The main memset sequence is a byte store followed by an MVC. @@ -674,7 +677,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // generated by target-independent code don't when the byte value is // variable. E.g. "STC ;MHI ,257;STH " is not better // than "STC;MVC". Handle the choice in target-specific code instead. - MaxStoresPerMemset = 0; + MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0; MaxStoresPerMemsetOptSize = 0; // Default to having -disable-strictnode-mutation on @@ -716,8 +719,7 @@ bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( // such as VGM, VGMB or VREPI. bool SystemZVectorConstantInfo::isVectorConstantLegal( const SystemZSubtarget &Subtarget) { - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); if (!Subtarget.hasVector() || (isFP128 && !Subtarget.hasVectorEnhancements1())) return false; @@ -790,14 +792,17 @@ bool SystemZVectorConstantInfo::isVectorConstantLegal( return tryValue(SplatBitsZ | Middle); } -SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) { - IntBits = FPImm.bitcastToAPInt().zextOrSelf(128); - isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad()); - SplatBits = FPImm.bitcastToAPInt(); - unsigned Width = SplatBits.getBitWidth(); - IntBits <<= (SystemZ::VectorBits - Width); +SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) { + if (IntImm.isSingleWord()) { + IntBits = APInt(128, IntImm.getZExtValue()); + IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth()); + } else + IntBits = IntImm; + assert(IntBits.getBitWidth() == 128 && "Unsupported APInt."); // Find the smallest splat. + SplatBits = IntImm; + unsigned Width = SplatBits.getBitWidth(); while (Width > 8) { unsigned HalfSize = Width / 2; APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize); @@ -973,7 +978,8 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, if (!isInt<20>(AM.BaseOffs)) return false; - AddressingMode SupportedAM(true, true); + bool RequireD12 = Subtarget.hasVector() && Ty->isVectorTy(); + AddressingMode SupportedAM(!RequireD12, true); if (I != nullptr) SupportedAM = supportedAddressingMode(I, Subtarget.hasVector()); @@ -988,6 +994,30 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, return AM.Scale == 0 || AM.Scale == 1; } +bool SystemZTargetLowering::findOptimalMemOpLowering( + std::vector &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, + unsigned SrcAS, const AttributeList &FuncAttributes) const { + const int MVCFastLen = 16; + + if (Limit != ~unsigned(0)) { + // Don't expand Op into scalar loads/stores in these cases: + if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen) + return false; // Small memcpy: Use MVC + if (Op.isMemset() && Op.size() - 1 <= MVCFastLen) + return false; // Small memset (first byte with STC/MVI): Use MVC + if (Op.isZeroMemset()) + return false; // Memset zero: Use XC + } + + return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS, + SrcAS, FuncAttributes); +} + +EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op, + const AttributeList &FuncAttributes) const { + return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other; +} + bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) return false; @@ -1034,6 +1064,17 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const { case 'M': // 0x7fffffff return C_Immediate; + default: + break; + } + } else if (Constraint.size() == 2 && Constraint[0] == 'Z') { + switch (Constraint[1]) { + case 'Q': // Address with base and unsigned 12-bit displacement + case 'R': // Likewise, plus an index + case 'S': // Address with base and signed 20-bit displacement + case 'T': // Likewise, plus an index + return C_Address; + default: break; } @@ -1218,12 +1259,17 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -Register SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, - const MachineFunction &MF) const { +Register +SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const { + const SystemZSubtarget *Subtarget = &MF.getSubtarget(); + + Register Reg = + StringSwitch(RegName) + .Case("r4", Subtarget->isTargetXPLINK64() ? SystemZ::R4D : 0) + .Case("r15", Subtarget->isTargetELF() ? SystemZ::R15D : 0) + .Default(0); - Register Reg = StringSwitch(RegName) - .Case("r15", SystemZ::R15D) - .Default(0); if (Reg) return Reg; report_fatal_error("Invalid register name global variable"); @@ -1833,6 +1879,40 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, return Chain; } +// Generate a call taking the given operands as arguments and returning a +// result of type RetVT. +std::pair SystemZTargetLowering::makeExternalCall( + SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, + ArrayRef Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, + bool DoesNotReturn, bool IsReturnValueUsed) const { + TargetLowering::ArgListTy Args; + Args.reserve(Ops.size()); + + TargetLowering::ArgListEntry Entry; + for (SDValue Op : Ops) { + Entry.Node = Op; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned); + Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned); + Args.push_back(Entry); + } + + SDValue Callee = + DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout())); + + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + TargetLowering::CallLoweringInfo CLI(DAG); + bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned); + CLI.setDebugLoc(DL) + .setChain(Chain) + .setCallee(CallConv, RetTy, Callee, std::move(Args)) + .setNoReturn(DoesNotReturn) + .setDiscardResult(!IsReturnValueUsed) + .setSExtResult(SignExtend) + .setZExtResult(!SignExtend); + return LowerCallTo(CLI); +} + bool SystemZTargetLowering:: CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, @@ -2237,7 +2317,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Load->getExtensionType() != ExtType) { C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(), Load->getBasePtr(), Load->getPointerInfo(), - Load->getMemoryVT(), Load->getAlignment(), + Load->getMemoryVT(), Load->getAlign(), Load->getMemOperand()->getFlags()); // Update the chain uses. DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1)); @@ -3471,6 +3551,32 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { + + if (Subtarget.isTargetXPLINK64()) + return lowerVASTART_XPLINK(Op, DAG); + else + return lowerVASTART_ELF(Op, DAG); +} + +SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + SystemZMachineFunctionInfo *FuncInfo = + MF.getInfo(); + + SDLoc DL(Op); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), + MachinePointerInfo(SV)); +} + +SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op, + SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); SystemZMachineFunctionInfo *FuncInfo = MF.getInfo(); @@ -3514,14 +3620,90 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, const Value *SrcSV = cast(Op.getOperand(4))->getValue(); SDLoc DL(Op); - return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL), + uint32_t Sz = + Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32; + return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL), Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false, /*isTailCall*/ false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } -SDValue SystemZTargetLowering:: -lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { +SDValue +SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + if (Subtarget.isTargetXPLINK64()) + return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG); + else + return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG); +} + +SDValue +SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, + SelectionDAG &DAG) const { + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + MachineFunction &MF = DAG.getMachineFunction(); + bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + SDValue Align = Op.getOperand(2); + SDLoc DL(Op); + + // If user has set the no alignment function attribute, ignore + // alloca alignments. + uint64_t AlignVal = + (RealignOpt ? cast(Align)->getZExtValue() : 0); + + uint64_t StackAlign = TFI->getStackAlignment(); + uint64_t RequiredAlign = std::max(AlignVal, StackAlign); + uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; + + SDValue NeededSpace = Size; + + // Add extra space for alignment if needed. + EVT PtrVT = getPointerTy(MF.getDataLayout()); + if (ExtraAlignSpace) + NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace, + DAG.getConstant(ExtraAlignSpace, DL, PtrVT)); + + bool IsSigned = false; + bool DoesNotReturn = false; + bool IsReturnValueUsed = false; + EVT VT = Op.getValueType(); + SDValue AllocaCall = + makeExternalCall(Chain, DAG, "@@ALCAXP", VT, makeArrayRef(NeededSpace), + CallingConv::C, IsSigned, DL, DoesNotReturn, + IsReturnValueUsed) + .first; + + // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue + // to end of call in order to ensure it isn't broken up from the call + // sequence. + auto &Regs = Subtarget.getSpecialRegisters(); + Register SPReg = Regs.getStackPointerRegister(); + Chain = AllocaCall.getValue(1); + SDValue Glue = AllocaCall.getValue(2); + SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue); + Chain = NewSPRegNode.getValue(1); + + MVT PtrMVT = getPointerMemTy(MF.getDataLayout()); + SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT); + SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust); + + // Dynamically realign if needed. + if (ExtraAlignSpace) { + Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, + DAG.getConstant(ExtraAlignSpace, DL, PtrVT)); + Result = DAG.getNode(ISD::AND, DL, PtrVT, Result, + DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT)); + } + + SDValue Ops[2] = {Result, Chain}; + return DAG.getMergeValues(Ops, DL); +} + +SDValue +SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, + SelectionDAG &DAG) const { const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); MachineFunction &MF = DAG.getMachineFunction(); bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); @@ -5468,6 +5650,41 @@ SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, return Op; } +SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT ResultVT = Op.getSimpleValueType(); + SDValue Arg = Op.getOperand(0); + auto CNode = cast(Op.getOperand(1)); + unsigned Check = CNode->getZExtValue(); + + unsigned TDCMask = 0; + if (Check & fcSNan) + TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS; + if (Check & fcQNan) + TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS; + if (Check & fcPosInf) + TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS; + if (Check & fcNegInf) + TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS; + if (Check & fcPosNormal) + TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS; + if (Check & fcNegNormal) + TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS; + if (Check & fcPosSubnormal) + TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS; + if (Check & fcNegSubnormal) + TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS; + if (Check & fcPosZero) + TDCMask |= SystemZ::TDCMASK_ZERO_PLUS; + if (Check & fcNegZero) + TDCMask |= SystemZ::TDCMASK_ZERO_MINUS; + SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64); + + SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV); + return getCCResult(DAG, Intr); +} + SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -5585,6 +5802,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR); case ISD::SRA: return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); + case ISD::IS_FPCLASS: + return lowerIS_FPCLASS(Op, DAG); default: llvm_unreachable("Unexpected node to lower"); } @@ -6142,6 +6361,23 @@ static bool isVectorElementSwap(ArrayRef M, EVT VT) { return true; } +static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) { + for (auto *U : StoredVal->uses()) { + if (StoreSDNode *ST = dyn_cast(U)) { + EVT CurrMemVT = ST->getMemoryVT().getScalarType(); + if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16) + continue; + } else if (isa(U)) { + SDValue BuildVector = SDValue(U, 0); + if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) && + isOnlyUsedByStores(BuildVector, DAG)) + continue; + } + return false; + } + return true; +} + SDValue SystemZTargetLowering::combineSTORE( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -6200,6 +6436,82 @@ SDValue SystemZTargetLowering::combineSTORE( } } + // Replicate a reg or immediate with VREP instead of scalar multiply or + // immediate load. It seems best to do this during the first DAGCombine as + // it is straight-forward to handle the zero-extend node in the initial + // DAG, and also not worry about the keeping the new MemVT legal (e.g. when + // extracting an i16 element from a v16i8 vector). + if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes && + isOnlyUsedByStores(Op1, DAG)) { + SDValue Word = SDValue(); + EVT WordVT; + + // Find a replicated immediate and return it if found in Word and its + // type in WordVT. + auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) { + // Some constants are better handled with a scalar store. + if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() || + isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2) + return; + SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue())); + if (VCI.isVectorConstantLegal(Subtarget) && + VCI.Opcode == SystemZISD::REPLICATE) { + Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32); + WordVT = VCI.VecVT.getScalarType(); + } + }; + + // Find a replicated register and return it if found in Word and its type + // in WordVT. + auto FindReplicatedReg = [&](SDValue MulOp) { + EVT MulVT = MulOp.getValueType(); + if (MulOp->getOpcode() == ISD::MUL && + (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) { + // Find a zero extended value and its type. + SDValue LHS = MulOp->getOperand(0); + if (LHS->getOpcode() == ISD::ZERO_EXTEND) + WordVT = LHS->getOperand(0).getValueType(); + else if (LHS->getOpcode() == ISD::AssertZext) + WordVT = cast(LHS->getOperand(1))->getVT(); + else + return; + // Find a replicating constant, e.g. 0x00010001. + if (auto *C = dyn_cast(MulOp->getOperand(1))) { + SystemZVectorConstantInfo VCI( + APInt(MulVT.getSizeInBits(), C->getZExtValue())); + if (VCI.isVectorConstantLegal(Subtarget) && + VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 && + WordVT == VCI.VecVT.getScalarType()) + Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT); + } + } + }; + + if (isa(Op1) && + DAG.isSplatValue(Op1, true/*AllowUndefs*/)) { + SDValue SplatVal = Op1->getOperand(0); + if (auto *C = dyn_cast(SplatVal)) + FindReplicatedImm(C, SplatVal.getValueType().getStoreSize()); + else + FindReplicatedReg(SplatVal); + } else { + if (auto *C = dyn_cast(Op1)) + FindReplicatedImm(C, MemVT.getStoreSize()); + else + FindReplicatedReg(Op1); + } + + if (Word != SDValue()) { + assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 && + "Bad type handling"); + unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits(); + EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts); + SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word); + return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal, + SN->getBasePtr(), SN->getMemOperand()); + } + } + return SDValue(); } @@ -6442,22 +6754,26 @@ SDValue SystemZTargetLowering::combineINT_TO_FP( SDNode *N, DAGCombinerInfo &DCI) const { if (DCI.Level != BeforeLegalizeTypes) return SDValue(); + SelectionDAG &DAG = DCI.DAG; + LLVMContext &Ctx = *DAG.getContext(); unsigned Opcode = N->getOpcode(); EVT OutVT = N->getValueType(0); - SelectionDAG &DAG = DCI.DAG; + Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx); SDValue Op = N->getOperand(0); - unsigned OutScalarBits = OutVT.getScalarSizeInBits(); + unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits(); unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits(); // Insert an extension before type-legalization to avoid scalarization, e.g.: // v2f64 = uint_to_fp v2i16 // => // v2f64 = uint_to_fp (v2i64 zero_extend v2i16) - if (OutVT.isVector() && OutScalarBits > InScalarBits) { - MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(OutVT.getScalarSizeInBits()), - OutVT.getVectorNumElements()); + if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits && + OutScalarBits <= 64) { + unsigned NumElts = cast(OutLLVMTy)->getNumElements(); + EVT ExtVT = EVT::getVectorVT( + Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts); unsigned ExtOpcode = - (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); + (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op); return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp); } @@ -7271,8 +7587,7 @@ MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr &MI, MachineBasicBlock *MBB) const { assert(isSelectPseudo(MI) && "Bad call to emitSelect()"); - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); unsigned CCValid = MI.getOperand(3).getImm(); unsigned CCMask = MI.getOperand(4).getImm(); @@ -7368,8 +7683,7 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const { - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); Register SrcReg = MI.getOperand(0).getReg(); MachineOperand Base = MI.getOperand(1); @@ -7460,8 +7774,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, unsigned BitSize, bool Invert) const { MachineFunction &MF = *MBB->getParent(); - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); bool IsSubWord = (BitSize < 32); @@ -7579,8 +7892,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, unsigned KeepOldMask, unsigned BitSize) const { MachineFunction &MF = *MBB->getParent(); - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); bool IsSubWord = (BitSize < 32); @@ -7693,8 +8005,7 @@ MachineBasicBlock * SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, MachineBasicBlock *MBB) const { MachineFunction &MF = *MBB->getParent(); - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); // Extract the operands. Base can be a register or a frame index. @@ -7810,8 +8121,7 @@ MachineBasicBlock * SystemZTargetLowering::emitPair128(MachineInstr &MI, MachineBasicBlock *MBB) const { MachineFunction &MF = *MBB->getParent(); - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); @@ -7838,8 +8148,7 @@ MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, MachineBasicBlock *MBB, bool ClearEven) const { MachineFunction &MF = *MBB->getParent(); - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); @@ -7870,8 +8179,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, bool IsMemset) const { MachineFunction &MF = *MBB->getParent(); - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); @@ -8225,8 +8533,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI, MachineBasicBlock *SystemZTargetLowering::emitStringWrapper( MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); @@ -8331,8 +8638,7 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo *MRI = &MF.getRegInfo(); - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); Register SrcReg = MI.getOperand(0).getReg(); @@ -8355,8 +8661,7 @@ MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca( MachineInstr &MI, MachineBasicBlock *MBB) const { MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo *MRI = &MF.getRegInfo(); - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); const unsigned ProbeSize = getStackProbeSize(MF); Register DstReg = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index de446f33f5f1..b9c95274f62b 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -419,8 +419,7 @@ public: getNumRegisters(LLVMContext &Context, EVT VT, Optional RegisterVT) const override { // i128 inline assembly operand. - if (VT == MVT::i128 && - RegisterVT.hasValue() && RegisterVT.getValue() == MVT::Untyped) + if (VT == MVT::i128 && RegisterVT && *RegisterVT == MVT::Untyped) return 1; return TargetLowering::getNumRegisters(Context, VT); } @@ -457,6 +456,12 @@ public: bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const override; + bool + findOptimalMemOpLowering(std::vector &MemOps, unsigned Limit, + const MemOp &Op, unsigned DstAS, unsigned SrcAS, + const AttributeList &FuncAttributes) const override; + EVT getOptimalMemOpType(const MemOp &Op, + const AttributeList &FuncAttributes) const override; bool isTruncateFree(Type *, Type *) const override; bool isTruncateFree(EVT, EVT) const override; @@ -467,6 +472,8 @@ public: return VT == MVT::i32 || VT == MVT::i64; } + bool shouldConsiderGEPOffsetSplit() const override { return true; } + const char *getTargetNodeName(unsigned Opcode) const override; std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, @@ -497,6 +504,19 @@ public: case 'T': return InlineAsm::Constraint_T; } + } else if (ConstraintCode.size() == 2 && ConstraintCode[0] == 'Z') { + switch (ConstraintCode[1]) { + default: + break; + case 'Q': + return InlineAsm::Constraint_ZQ; + case 'R': + return InlineAsm::Constraint_ZR; + case 'S': + return InlineAsm::Constraint_ZS; + case 'T': + return InlineAsm::Constraint_ZT; + } } return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } @@ -553,6 +573,12 @@ public: SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; + std::pair + makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, + EVT RetVT, ArrayRef Ops, CallingConv::ID CallConv, + bool IsSigned, SDLoc DL, bool DoesNotReturn, + bool IsReturnValueUsed) const; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, @@ -622,8 +648,12 @@ private: SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVASTART_ELF(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVASTART_XPLINK(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; @@ -657,6 +687,7 @@ private: SDValue lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; + SDValue lowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; bool canTreatAsByteVector(EVT VT) const; SDValue combineExtract(const SDLoc &DL, EVT ElemVT, EVT VecVT, SDValue OrigOp, @@ -743,12 +774,15 @@ private: APInt SplatUndef; // Bits correspoding to undef operands of the BVN. unsigned SplatBitSize = 0; bool isFP128 = false; - public: unsigned Opcode = 0; SmallVector OpVals; MVT VecVT; - SystemZVectorConstantInfo(APFloat FPImm); + SystemZVectorConstantInfo(APInt IntImm); + SystemZVectorConstantInfo(APFloat FPImm) + : SystemZVectorConstantInfo(FPImm.bitcastToAPInt()) { + isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad()); + } SystemZVectorConstantInfo(BuildVectorSDNode *BVN); bool isVectorConstantLegal(const SystemZSubtarget &Subtarget); }; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 4b6aa60f5d55..1436be1e4052 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -30,6 +31,7 @@ #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/BranchProbability.h" @@ -119,9 +121,11 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const { MachineFunction &MF = *MBB->getParent(); MachineFrameInfo &MFFrame = MF.getFrameInfo(); MachineOperand &OffsetMO = MI->getOperand(2); + SystemZCallingConventionRegisters *Regs = STI.getSpecialRegisters(); uint64_t Offset = (MFFrame.getMaxCallFrameSize() + - SystemZMC::ELFCallFrameSize + + Regs->getCallFrameSize() + + Regs->getStackPointerBias() + OffsetMO.getImm()); unsigned NewOpcode = getOpcodeForOffset(SystemZ::LA, Offset); assert(NewOpcode && "No support for huge argument lists yet"); @@ -393,8 +397,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } // If the block has any instructions after a JMP, delete them. - while (std::next(I) != MBB.end()) - std::next(I)->eraseFromParent(); + MBB.erase(std::next(I), MBB.end()); Cond.clear(); FBB = nullptr; @@ -674,6 +677,7 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, bool SystemZInstrInfo::isPredicable(const MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); if (Opcode == SystemZ::Return || + Opcode == SystemZ::Return_XPLINK || Opcode == SystemZ::Trap || Opcode == SystemZ::CallJG || Opcode == SystemZ::CallBR) @@ -731,18 +735,20 @@ bool SystemZInstrInfo::PredicateInstruction( .addReg(SystemZ::CC, RegState::Implicit); return true; } - if (Opcode == SystemZ::Return) { - MI.setDesc(get(SystemZ::CondReturn)); + if (Opcode == SystemZ::Return || Opcode == SystemZ::Return_XPLINK) { + MI.setDesc(get(Opcode == SystemZ::Return ? SystemZ::CondReturn + : SystemZ::CondReturn_XPLINK)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addImm(CCValid).addImm(CCMask) - .addReg(SystemZ::CC, RegState::Implicit); + .addImm(CCValid) + .addImm(CCMask) + .addReg(SystemZ::CC, RegState::Implicit); return true; } if (Opcode == SystemZ::CallJG) { MachineOperand FirstOp = MI.getOperand(0); const uint32_t *RegMask = MI.getOperand(1).getRegMask(); - MI.RemoveOperand(1); - MI.RemoveOperand(0); + MI.removeOperand(1); + MI.removeOperand(0); MI.setDesc(get(SystemZ::CallBRCL)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) .addImm(CCValid) @@ -755,8 +761,8 @@ bool SystemZInstrInfo::PredicateInstruction( if (Opcode == SystemZ::CallBR) { MachineOperand Target = MI.getOperand(0); const uint32_t *RegMask = MI.getOperand(1).getRegMask(); - MI.RemoveOperand(1); - MI.RemoveOperand(0); + MI.removeOperand(1); + MI.removeOperand(0); MI.setDesc(get(SystemZ::CallBCR)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) .addImm(CCValid).addImm(CCMask) @@ -1626,7 +1632,8 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC, } unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode, - int64_t Offset) const { + int64_t Offset, + const MachineInstr *MI) const { const MCInstrDesc &MCID = get(Opcode); int64_t Offset2 = (MCID.TSFlags & SystemZII::Is128Bit ? Offset + 8 : Offset); if (isUInt<12>(Offset) && isUInt<12>(Offset2)) { @@ -1648,6 +1655,24 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode, // Check whether Opcode allows signed 20-bit displacements. if (MCID.TSFlags & SystemZII::Has20BitOffset) return Opcode; + + // If a VR32/VR64 reg ended up in an FP register, use the FP opcode. + if (MI && MI->getOperand(0).isReg()) { + Register Reg = MI->getOperand(0).getReg(); + if (Reg.isPhysical() && SystemZMC::getFirstReg(Reg) < 16) { + switch (Opcode) { + case SystemZ::VL32: + return SystemZ::LEY; + case SystemZ::VST32: + return SystemZ::STEY; + case SystemZ::VL64: + return SystemZ::LDY; + case SystemZ::VST64: + return SystemZ::STDY; + default: break; + } + } + } } return 0; } diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 9e5b2729a707..0525f5827736 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -47,8 +47,7 @@ enum { CCMaskFirst = (1 << 18), CCMaskLast = (1 << 19), IsLogical = (1 << 20), - CCIfNoSignedWrap = (1 << 21), - MemMemOp = (1 << 22) + CCIfNoSignedWrap = (1 << 21) }; static inline unsigned getAccessSize(unsigned int Flags) { @@ -309,8 +308,10 @@ public: // and the caller wants to perform that instruction's operation on an // address that has displacement Offset. Return the opcode of a suitable // instruction (which might be Opcode itself) or 0 if no such instruction - // exists. - unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const; + // exists. MI may be passed in order to allow examination of physical + // register operands (i.e. if a VR32/64 reg ended up as an FP or Vector reg). + unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset, + const MachineInstr *MI = nullptr) const; // Return true if Opcode has a mapping in 12 <-> 20 bit displacements. bool hasDisplacementPairInsn(unsigned Opcode) const; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 84f1e0fb428c..ed7e3c02a10d 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -42,6 +42,10 @@ let Defs = [R1D, R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1, hasSideEffects = 1 in def PROBED_STACKALLOC : Pseudo<(outs), (ins i64imm:$stacksize), []>; +let Defs = [R3D, CC], Uses = [R3D, R4D], hasNoSchedulingInfo = 1, + hasSideEffects = 1 in + def XPLINK_STACKALLOC : Pseudo<(outs), (ins), []>; + //===----------------------------------------------------------------------===// // Branch instructions //===----------------------------------------------------------------------===// @@ -285,6 +289,10 @@ let Predicates = [IsTargetXPLINK64] in { def CallBASR_XPLINK64 : Alias<4, (outs), (ins ADDR64:$R2, variable_ops), [(z_call ADDR64:$R2)]>; } + + let isCall = 1, Defs = [R3D, CC], Uses = [FPC] in { + def CallBASR_STACKEXT : Alias<4, (outs), (ins ADDR64:$R2), []>; + } } // Regular calls. @@ -336,13 +344,25 @@ let isCall = 1, isTerminator = 1, isReturn = 1 in { def CLGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3, ADDR64:$R4), []>; } -// A return instruction (br %r14) for ELF and (b 2 %r7) for XPLink. -let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in - def Return : Alias<2, (outs), (ins), [(z_retflag)]>; +let Predicates = [IsTargetXPLINK64] in { + // A return instruction (b 2(%r7)). + let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in + def Return_XPLINK : Alias<4, (outs), (ins), [(z_retflag)]>; + + // A conditional return instruction (bc , 2(%r7)). + let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in + def CondReturn_XPLINK : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>; +} + +let Predicates = [IsTargetELF] in { + // A return instruction (br %r14). + let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in + def Return : Alias<2, (outs), (ins), [(z_retflag)]>; -// A conditional return instruction (bcr , %r14). -let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in - def CondReturn : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>; + // A conditional return instruction (bcr , %r14). + let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in + def CondReturn : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>; +} // Fused compare and conditional returns. let isReturn = 1, isTerminator = 1, hasCtrlDep = 1 in { diff --git a/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp b/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp index d6c795985448..1e6f971906e9 100644 --- a/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp +++ b/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -66,7 +66,7 @@ bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) { if (skipFunction(F.getFunction())) return false; - TII = static_cast(F.getSubtarget().getInstrInfo()); + TII = F.getSubtarget().getInstrInfo(); MF = &F; SystemZMachineFunctionInfo* MFI = F.getInfo(); diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp index 9b6aa3593ce0..cada880a82d8 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp @@ -14,3 +14,9 @@ using namespace llvm; // pin vtable to this file void SystemZMachineFunctionInfo::anchor() {} +MachineFunctionInfo *SystemZMachineFunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const { + return DestMF.cloneInfo(*this); +} diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index ec4b812eb0e1..de73a5d86422 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -41,6 +41,11 @@ public: : VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0), NumLocalDynamics(0) {} + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; + // Get and set the first and last call-saved GPR that should be saved by // this function and the SP offset for the STMG. These are 0 if no GPRs // need to be saved or restored. diff --git a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp index 5a2cfc53da49..e15f9027cc20 100644 --- a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp +++ b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp @@ -17,6 +17,7 @@ #include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" using namespace llvm; @@ -253,7 +254,7 @@ bool SystemZPostRewrite::selectMBB(MachineBasicBlock &MBB) { } bool SystemZPostRewrite::runOnMachineFunction(MachineFunction &MF) { - TII = static_cast(MF.getSubtarget().getInstrInfo()); + TII = MF.getSubtarget().getInstrInfo(); bool Modified = false; for (auto &MBB : MF) diff --git a/llvm/lib/Target/SystemZ/SystemZProcessors.td b/llvm/lib/Target/SystemZ/SystemZProcessors.td index 4fceaa14c598..d00b94d00242 100644 --- a/llvm/lib/Target/SystemZ/SystemZProcessors.td +++ b/llvm/lib/Target/SystemZ/SystemZProcessors.td @@ -38,5 +38,6 @@ def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>; def : ProcessorModel<"arch13", Z15Model, Arch13SupportedFeatures.List>; def : ProcessorModel<"z15", Z15Model, Arch13SupportedFeatures.List>; -def : ProcessorModel<"arch14", Z15Model, Arch14SupportedFeatures.List>; +def : ProcessorModel<"arch14", Z16Model, Arch14SupportedFeatures.List>; +def : ProcessorModel<"z16", Z16Model, Arch14SupportedFeatures.List>; diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 48cec176b006..be65fe55c634 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -290,8 +290,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MachineBasicBlock &MBB = *MI->getParent(); MachineFunction &MF = *MBB.getParent(); - auto *TII = - static_cast(MF.getSubtarget().getInstrInfo()); + auto *TII = MF.getSubtarget().getInstrInfo(); const SystemZFrameLowering *TFI = getFrameLowering(MF); DebugLoc DL = MI->getDebugLoc(); @@ -321,7 +320,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, // See if the offset is in range, or if an equivalent instruction that // accepts the offset exists. unsigned Opcode = MI->getOpcode(); - unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset); + unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset, &*MI); if (OpcodeForOffset) { if (OpcodeForOffset == SystemZ::LE && MF.getSubtarget().hasVector()) { diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h index 8ce01074873a..93ffa9847f06 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -74,7 +74,7 @@ public: /// Destroys the object. Bogus destructor allowing derived classes /// to override it. - virtual ~SystemZCallingConventionRegisters(){}; + virtual ~SystemZCallingConventionRegisters() = default; }; /// XPLINK64 calling convention specific use registers @@ -102,7 +102,7 @@ public: int getStackPointerBias() override final { return 2048; } /// Destroys the object. Bogus destructor overriding base class destructor - ~SystemZXPLINK64Registers(){}; + ~SystemZXPLINK64Registers() = default; }; /// ELF calling convention specific use registers @@ -128,7 +128,7 @@ public: int getStackPointerBias() override final { return 0; } /// Destroys the object. Bogus destructor overriding base class destructor - ~SystemZELFRegisters(){}; + ~SystemZELFRegisters() = default; }; struct SystemZRegisterInfo : public SystemZGenRegisterInfo { diff --git a/llvm/lib/Target/SystemZ/SystemZSchedule.td b/llvm/lib/Target/SystemZ/SystemZSchedule.td index 119e3ee7c22c..d683cc042e5c 100644 --- a/llvm/lib/Target/SystemZ/SystemZSchedule.td +++ b/llvm/lib/Target/SystemZ/SystemZSchedule.td @@ -53,12 +53,14 @@ foreach Num = ["", "2", "3", "4", "5", "6"] in { def "DFU"#Num : SchedWrite; } -def VecFPd : SchedWrite; // Blocking BFP div/sqrt unit. +def VecFPd : SchedWrite; // Blocking BFP div/sqrt unit (30 cycles). +def VecFPd20 : SchedWrite; // Blocking BFP div/sqrt unit, 20 cycles. def VBU : SchedWrite; // Virtual branching unit def MCD : SchedWrite; // Millicode +include "SystemZScheduleZ16.td" include "SystemZScheduleZ15.td" include "SystemZScheduleZ14.td" include "SystemZScheduleZ13.td" diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td index f4777b0097f1..fd01a8a941c9 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -168,12 +168,12 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; // Call def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; -def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; // Return -def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn$")>; +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return(_XPLINK)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn(_XPLINK)?$")>; //===----------------------------------------------------------------------===// // Move instructions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td index f74c0d594482..3f406736a71f 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -169,12 +169,12 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; // Call def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; -def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; // Return -def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn$")>; +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return(_XPLINK)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn(_XPLINK)?$")>; //===----------------------------------------------------------------------===// // Move instructions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td index d17e58fc6318..6ae911c3f3eb 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td @@ -169,12 +169,12 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; // Call def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; -def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; // Return -def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn$")>; +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return(_XPLINK)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn(_XPLINK)?$")>; //===----------------------------------------------------------------------===// // Move instructions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td new file mode 100644 index 000000000000..ca688671a7e2 --- /dev/null +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td @@ -0,0 +1,1728 @@ +//-- SystemZScheduleZ16.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Z16 to support instruction +// scheduling and other instruction cost heuristics. +// +// Pseudos expanded right after isel do not need to be modelled here. +// +//===----------------------------------------------------------------------===// + +def Z16Model : SchedMachineModel { + + let UnsupportedFeatures = Arch14UnsupportedFeatures.List; + + let IssueWidth = 6; // Number of instructions decoded per cycle. + let MicroOpBufferSize = 60; // Issue queues + let LoadLatency = 1; // Optimistic load latency. + + let PostRAScheduler = 1; + + // Extra cycles for a mispredicted branch. + let MispredictPenalty = 20; +} + +let SchedModel = Z16Model in { +// These definitions need the SchedModel value. They could be put in a +// subtarget common include file, but it seems the include system in Tablegen +// currently (2016) rejects multiple includes of same file. + +// Decoder grouping rules +let NumMicroOps = 1 in { + def : WriteRes; + def : WriteRes { let BeginGroup = 1; } + def : WriteRes { let EndGroup = 1; } +} +def : WriteRes { + let NumMicroOps = 2; + let BeginGroup = 1; +} +def : WriteRes { + let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} + +// Incoming latency removed from the register operand which is used together +// with a memory operand by the instruction. +def : ReadAdvance; + +// LoadLatency (above) is not used for instructions in this file. This is +// instead the role of LSULatency, which is the latency value added to the +// result of loads and instructions with folded memory operands. +def : WriteRes { let Latency = 4; let NumMicroOps = 0; } + +let NumMicroOps = 0 in { + foreach L = 1-30 in + def : WriteRes("WLat"#L), []> { let Latency = L; } +} + +// Execution units. +def Z16_FXaUnit : ProcResource<2>; +def Z16_FXbUnit : ProcResource<2>; +def Z16_LSUnit : ProcResource<2>; +def Z16_VecUnit : ProcResource<2>; +def Z16_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ } +def Z16_VBUnit : ProcResource<2>; +def Z16_MCD : ProcResource<1>; + +// Subtarget specific definitions of scheduling resources. +let NumMicroOps = 0 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + foreach Num = 2-5 in { let ResourceCycles = [Num] in { + def : WriteRes("FXa"#Num), [Z16_FXaUnit]>; + def : WriteRes("FXb"#Num), [Z16_FXbUnit]>; + def : WriteRes("LSU"#Num), [Z16_LSUnit]>; + def : WriteRes("VecBF"#Num), [Z16_VecUnit]>; + def : WriteRes("VecDF"#Num), [Z16_VecUnit]>; + def : WriteRes("VecDFX"#Num), [Z16_VecUnit]>; + def : WriteRes("VecMul"#Num), [Z16_VecUnit]>; + def : WriteRes("VecStr"#Num), [Z16_VecUnit]>; + def : WriteRes("VecXsPm"#Num), [Z16_VecUnit]>; + }} + + def : WriteRes { let ResourceCycles = [30]; } + def : WriteRes { let ResourceCycles = [20]; } + + def : WriteRes; // Virtual Branching Unit +} + +def : WriteRes { let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; } + +// -------------------------- INSTRUCTIONS ---------------------------------- // + +// InstRW constructs have been used in order to preserve the +// readability of the InstrInfo files. + +// For each instruction, as matched by a regexp, provide a list of +// resources that it needs. These will be combined into a SchedClass. + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +// Pseudo -> LA / LAY +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>; + +//===----------------------------------------------------------------------===// +// Branch instructions +//===----------------------------------------------------------------------===// + +// Branch +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "B(R)?X(H|L).*$")>; + +// Compare and branch +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>; +def : InstRW<[WLat1, FXb2, GroupAlone], + (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Trap instructions +//===----------------------------------------------------------------------===// + +// Trap +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>; + +// Compare and trap +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Call and return instructions +//===----------------------------------------------------------------------===// + +// Call +def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; + +// Return +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return(_XPLINK)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn(_XPLINK)?$")>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Moves +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>; + +// Move character +def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>; + +// Pseudo -> reg move +def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>; + +// Loads +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>; +def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>; + +// Load and test +def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>; + +// Stores +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>; + +// String moves. +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>; + +//===----------------------------------------------------------------------===// +// Conditional move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], + (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "SELRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "SEL(G|FH)?R(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>; + +def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>; + +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Load multiple (estimated average of 5 ops) +def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>; + +// Load multiple disjoint +def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>; + +// Store multiple +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>; + +// Load the Global Offset Table address ( -> larl ) +def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LP(G)?R$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "L(N|P)GFR$")>; +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LN(R|GR)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "IC32(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr], + (instregex "ICM(H|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "A(Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AL(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "ALG(F)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; + +// Logical addition with carry +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "ALC(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>; + +// Add with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>; + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "S(G|Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SL(G|GF|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; + +// Subtraction with borrow +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "SLB(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>; + +// Subtraction with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>; + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "N(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "O(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "X(G|Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>; + +//===----------------------------------------------------------------------===// +// Combined logical operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NC(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OC(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NN(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NO(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NX(G)?RK$")>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat5LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MS(GF|Y)?$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MS(R|FI)$")>; +def : InstRW<[WLat7LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>; +def : InstRW<[WLat7, FXa, NormalGr], (instregex "MSGR$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MSGF(I|R)$")>; +def : InstRW<[WLat8LSU, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MLG$")>; +def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MGHI$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MHI$")>; +def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>; +def : InstRW<[WLat6, FXa2, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[WLat6LSU, RegReadAdv, FXa2, LSU, GroupAlone], + (instregex "M(FY|L)?$")>; +def : InstRW<[WLat8, RegReadAdv, FXa, LSU, NormalGr], (instregex "MGH$")>; +def : InstRW<[WLat12, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MG$")>; +def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MGRK$")>; +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSC$")>; +def : InstRW<[WLat8LSU, WLat8LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSGC$")>; +def : InstRW<[WLat6, WLat6, FXa, NormalGr], (instregex "MSRKC$")>; +def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>; +def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2], + (instregex "DSG(F)?$")>; +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>; +def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], + (instregex "DL(G)?$")>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>; +def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2], + (instregex "S(L|R)D(A|L)$")>; + +// Rotate +def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>; + +// Rotate and insert +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>; + +// Rotate and Select +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "R(N|O|X)SBG$")>; + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "C(G|Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CL(Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; + +// Compare halfword +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>; +def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>; + +// Compare with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>; + +// Compare logical character +def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>; + +// Test under mask +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>; + +// Compare logical characters under mask +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CLM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Prefetch and execution hint +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>; +def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>; + +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>; + +// Test and set +def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>; + +// Compare and swap +def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone], + (instregex "CS(G|Y)?$")>; + +// Compare double and swap +def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2], + (instregex "CDS(Y)?$")>; +def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, + GroupAlone3], (instregex "CDSG$")>; + +// Compare and swap and store +def : InstRW<[WLat30, MCD], (instregex "CSST$")>; + +// Perform locked operation +def : InstRW<[WLat30, MCD], (instregex "PLO$")>; + +// Load/store pair from/to quadword +def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>; +def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>; + +// Load pair disjoint +def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; + +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>; +def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2], + (instregex "TRT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "KM(C|F|O|CTR|A)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(KIMD|KLMD|KMAC|KDSA)$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(PCC|PPNO|PRNO)$")>; + +//===----------------------------------------------------------------------===// +// Guarded storage +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LGG$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLGFSG$")>; +def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2], + (instregex "CVBG$")>; +def : InstRW<[WLat20, RegReadAdv, FXb, VecDF, LSU, GroupAlone2], + (instregex "CVB(Y)?$")>; +def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>; + +def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[WLat1, FXb, VecDFX2, LSU3, GroupAlone2], (instregex "MP$")>; +def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "DP$")>; +def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>; +def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>; +def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>; +def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>; + +//===----------------------------------------------------------------------===// +// Access registers +//===----------------------------------------------------------------------===// + +// Extract/set/copy access register +def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>; + +// Load address extended +def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>; + +// Load/store access multiple (not modeled precisely) +def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>; + +//===----------------------------------------------------------------------===// +// Program mask and addressing mode +//===----------------------------------------------------------------------===// + +// Insert Program Mask +def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>; + +// Set Program Mask +def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>; + +// Branch and link +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>; + +// Test addressing mode +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>; + +// Set addressing mode +def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>; + +// Branch (and save) and set mode. +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>; + +//===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +// Transaction begin +def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>; + +// Transaction end +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>; + +// Transaction abort +def : InstRW<[WLat30, MCD], (instregex "TABORT$")>; + +// Extract Transaction Nesting Depth +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>; + +// Nontransactional store +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>; + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "PPA$")>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Find leftmost one +def : InstRW<[WLat5, WLat5, FXa2, GroupAlone], (instregex "FLOGR$")>; + +// Population count +def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT(Opt)?$")>; + +// String instructions +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "UPT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "SORTL$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "DFLTCC$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "NNPA$")>; + +// Execute +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>; + +//===----------------------------------------------------------------------===// +// .insn directive instructions +//===----------------------------------------------------------------------===// + +// An "empty" sched-class will be assigned instead of the "invalid sched-class". +// getNumDecoderSlots() will then return 1 instead of 0. +def : InstRW<[], (instregex "Insn.*")>; + + +// ----------------------------- Floating point ----------------------------- // + +//===----------------------------------------------------------------------===// +// FP: Move instructions +//===----------------------------------------------------------------------===// + +// Load zero +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; + +// Load +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; +def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], + (instregex "LTXBR(Compare)?$")>; + +// Copy sign +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; + +//===----------------------------------------------------------------------===// +// FP: Load instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; + +//===----------------------------------------------------------------------===// +// FP: Store instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; + +//===----------------------------------------------------------------------===// +// FP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEDBR(A)?$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>; + +// Load lengthened +def : InstRW<[WLat6LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LDEBR$")>; +def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>; +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], + (instregex "C(F|G)(E|D)BR(A)?$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], + (instregex "C(F|G)XBR(A)?$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLFDBR$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; + +//===----------------------------------------------------------------------===// +// FP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "SQEBR$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQDBR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>; + +// Load FP integer +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D)B$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>; + +// Subtraction +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D)B$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>; + +// Multiply +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|EE)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXDB$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDBR$")>; +def : InstRW<[WLat20, VecDF4, GroupAlone], (instregex "MXBR$")>; + +// Multiply and add / subtract +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)EB$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(A|S)DBR$")>; + +// Division +def : InstRW<[WLat20, RegReadAdv, VecFPd20, LSU, NormalGr], (instregex "DEB$")>; +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "DDB$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "DEBR$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "DDBR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>; + +// Divide to integer +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>; + +//===----------------------------------------------------------------------===// +// FP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "(K|C)(E|D)B$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>; + +// Test Data Class +def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>; +def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>; + +//===----------------------------------------------------------------------===// +// FP: Floating-point control register instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>; +def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>; +def : InstRW<[WLat30, MCD], (instregex "SFASR$")>; +def : InstRW<[WLat30, MCD], (instregex "LFAS$")>; +def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>; + + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEXR$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>; +def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>; + +// Convert from fixed +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "THD(E)?R$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "SQER$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQDR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)R$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D|U|W)$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D|U|W)$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXD$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[WLat20, VecDF4, GroupAlone], (instregex "MXR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], (instregex "MY$")>; +def : InstRW<[WLat6LSU, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MY(H|L)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MYR$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)(E|D)$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MAY$")>; +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MAY(H|L)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; + +// Division +def : InstRW<[WLat20, RegReadAdv, VecFPd20, LSU, NormalGr], (instregex "DE$")>; +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "DD$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "DER$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "DDR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "C(E|D)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "C(E|D)R$")>; +def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>; +def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDFTR(A)?$")>; +def : InstRW<[WLat20, FXb, VecDF, Cracked], (instregex "CDGTR(A)?$")>; +def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXFTR(A)?$")>; +def : InstRW<[WLat20, FXb, VecDF4, GroupAlone2], (instregex "CXGTR(A)?$")>; +def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDLFTR$")>; +def : InstRW<[WLat20, FXb, VecDF, Cracked], (instregex "CDLGTR$")>; +def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXLFTR$")>; +def : InstRW<[WLat20, FXb, VecDF4, GroupAlone2], (instregex "CXLGTR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat20, WLat20, FXb, VecDF, Cracked], + (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[WLat20, WLat20, FXb, VecDF2, Cracked], + (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[WLat20, WLat20, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>; +def : InstRW<[WLat20, WLat20, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>; +def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>; +def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>; + +// Convert from / to packed +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>; + +// Perform floating-point operation +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>; +def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[WLat20, VecDF, NormalGr], (instregex "MDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>; +def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>; +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>; + + +// --------------------------------- Vector --------------------------------- // + +//===----------------------------------------------------------------------===// +// Vector: Move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Immediate instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Loads +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLE(B|F|G|H)$")>; +def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked], + (instregex "VGE(F|G)$")>; +def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], + (instregex "VLM(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Stores +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; +def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBR(H|F|G|Q)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLER(H|F|G)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLEBR(H|F|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEBRZ(H|F|G|E)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBRREP(H|F|G)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTBR(H|F|G|Q)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTER(H|F|G)?$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTEBRH$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTEBR(F|G)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Selects and permutes +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>; + +//===----------------------------------------------------------------------===// +// Vector: Widening and narrowing +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|W)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O|N|X)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO(C)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VMSL(G)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)B$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLD$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSRD$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>; + +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H)?$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)S$")>; +def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point arithmetic +//===----------------------------------------------------------------------===// + +// Conversion and rounding +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCFP(S|L)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?G$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?GB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCD(L)?GB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCE(L)?FB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCE(L)?FB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(S|L)FP$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GD$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?GDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?FEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?FEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(L|R)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(LS|RD)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFL(LS|RD)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFLLD$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFLRX$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFI(DB)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFIDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFISB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFISB$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFIXB$")>; + +// Sign operations +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSOSB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFPSOXB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFL(C|N|P)XB$")>; + +// Minimum / maximum +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WF(MAX|MIN)XB$")>; + +// Test data class +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCISB$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFTCIXB$")>; + +// Add / subtract +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)SB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)SB$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WF(A|S)XB$")>; + +// Multiply / multiply-and-add/subtract +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFM(DB)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFM(D|S)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFMSB$")>; +def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WFMXB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)SB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)SB$")>; +def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WF(N)?M(A|S)XB$")>; + +// Divide / square root +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "WFDSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFDSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFDXB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "WFSQSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFSQXB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFK(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "WF(C|K)(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "VF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XBS$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)SB$")>; +def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>; + +//===----------------------------------------------------------------------===// +// Vector: String instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRS(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRSZ(B|F|H)$")>; + +//===----------------------------------------------------------------------===// +// NNP assist instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCFN$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLFN(L|H)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VC(R)?NF$")>; + +//===----------------------------------------------------------------------===// +// Vector: Packed-decimal instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "VLIP$")>; +def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>; +def : InstRW<[WLat1, VecDFX, FXb, LSU2, GroupAlone2], (instregex "VUPKZ$")>; +def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], + (instregex "VCVB(G)?(Opt)?$")>; +def : InstRW<[WLat15, WLat15, VecDF2, FXb, GroupAlone], + (instregex "VCVD(G)?$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "V(A|S)P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VM(S)?P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "V(D|R)P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VSDP$")>; +def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRP$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "VPSOP$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>; + +def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VSCH(S|D|X)?P$")>; +def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VSCSHP$")>; +def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VCSPH")>; +def : InstRW<[WLat2, WLat2, VecXsPm, NormalGr], (instregex "VCLZDP")>; +def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRPR")>; +def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VPKZR")>; +def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZH")>; +def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZL")>; + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>; +def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?(Y)?$")>; +def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>; + +//===----------------------------------------------------------------------===// +// System: Breaking-Event-Address-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LBEAR")>; +def : InstRW<[WLat1, LSU2, FXb, GroupAlone], (instregex "STBEAR")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "ISKE$")>; +def : InstRW<[WLat30, MCD], (instregex "IVSK$")>; +def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>; +def : InstRW<[WLat30, MCD], (instregex "IRBM$")>; +def : InstRW<[WLat30, MCD], (instregex "PFMF$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>; +def : InstRW<[WLat30, MCD], (instregex "PGIN$")>; +def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RDP(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "PTLB$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STRAG$")>; +def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>; +def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>; +def : InstRW<[WLat30, MCD], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "LASP$")>; +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[WLat30, MCD], (instregex "PC$")>; +def : InstRW<[WLat30, MCD], (instregex "PR$")>; +def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RP$")>; +def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "BAKR$")>; +def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "PTFF$")>; +def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>; +def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>; +def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>; +def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; +def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "STAP$")>; +def : InstRW<[WLat30, MCD], (instregex "STIDP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>; +def : InstRW<[WLat30, MCD], (instregex "ECAG$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>; +def : InstRW<[WLat30, MCD], (instregex "PTF$")>; +def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "QPACI$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "SVC$")>; +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>; +def : InstRW<[WLat30, MCD], (instregex "DIAG$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRAC(E|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>; +def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>; +def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>; +def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>; +def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "RCHP$")>; +def : InstRW<[WLat30, MCD], (instregex "SCHM$")>; +def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPI$")>; +def : InstRW<[WLat30, MCD], (instregex "SAL$")>; + +} + diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td index 0f01a4291cf7..173cf960d2bd 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -147,12 +147,12 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>; // Call def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BRAS$")>; def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; -def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>; +def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; // Return -def : InstRW<[WLat1, LSU, EndGroup], (instregex "Return$")>; -def : InstRW<[WLat1, LSU, EndGroup], (instregex "CondReturn$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "Return(_XPLINK)?$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "CondReturn(_XPLINK)?$")>; //===----------------------------------------------------------------------===// // Move instructions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td index 096a95a82ec8..d2060471d65e 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -152,12 +152,12 @@ def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; // Call def : InstRW<[WLat1, FXU2, VBU, GroupAlone], (instregex "(Call)?BRAS$")>; def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; -def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; // Return -def : InstRW<[WLat1, LSU, EndGroup], (instregex "Return$")>; -def : InstRW<[WLat1, LSU, NormalGr], (instregex "CondReturn$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "Return(_XPLINK)?$")>; +def : InstRW<[WLat1, LSU, NormalGr], (instregex "CondReturn(_XPLINK)?$")>; //===----------------------------------------------------------------------===// // Move instructions diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index db4b4879b33a..ce30d8ef2cba 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -88,7 +88,7 @@ static SDValue memsetStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Byte, SDValue Size, Align Alignment, bool IsVolatile, - MachinePointerInfo DstPtrInfo) const { + bool AlwaysInline, MachinePointerInfo DstPtrInfo) const { EVT PtrVT = Dst.getValueType(); if (IsVolatile) diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index da6725777e43..6ac5bf8c6c1a 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -31,7 +31,7 @@ public: SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Byte, SDValue Size, Align Alignment, - bool IsVolatile, + bool IsVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override; std::pair diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp index 92930dad80ef..30b22fa1ce92 100644 --- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -162,10 +162,10 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) { MachineOperand Src(MI.getOperand(1)); MachineOperand Suppress(MI.getOperand(2)); MachineOperand Mode(MI.getOperand(3)); - MI.RemoveOperand(3); - MI.RemoveOperand(2); - MI.RemoveOperand(1); - MI.RemoveOperand(0); + MI.removeOperand(3); + MI.removeOperand(2); + MI.removeOperand(1); + MI.removeOperand(0); MI.setDesc(TII->get(Opcode)); MachineInstrBuilder(*MI.getParent()->getParent(), &MI) .add(Dest) @@ -190,9 +190,9 @@ bool SystemZShortenInst::shortenFusedFPOp(MachineInstr &MI, unsigned Opcode) { MachineOperand Lhs(LHSMO); MachineOperand Rhs(RHSMO); MachineOperand Src(AccMO); - MI.RemoveOperand(3); - MI.RemoveOperand(2); - MI.RemoveOperand(1); + MI.removeOperand(3); + MI.removeOperand(2); + MI.removeOperand(1); MI.setDesc(TII->get(Opcode)); MachineInstrBuilder(*MI.getParent()->getParent(), &MI) .add(Src) diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp index 75c0d454d904..f6889035b654 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -27,13 +27,14 @@ static cl::opt UseSubRegLiveness( // Pin the vtable to this file. void SystemZSubtarget::anchor() {} -SystemZSubtarget & -SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { - StringRef CPUName = CPU; - if (CPUName.empty()) - CPUName = "generic"; +SystemZSubtarget &SystemZSubtarget::initializeSubtargetDependencies( + StringRef CPU, StringRef TuneCPU, StringRef FS) { + if (CPU.empty()) + CPU = "generic"; + if (TuneCPU.empty()) + TuneCPU = CPU; // Parse features string. - ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS); + ParseSubtargetFeatures(CPU, TuneCPU, FS); // -msoft-float implies -mno-vx. if (HasSoftFloat) @@ -64,9 +65,10 @@ SystemZSubtarget::initializeSpecialRegisters() { } SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, + const std::string &TuneCPU, const std::string &FS, const TargetMachine &TM) - : SystemZGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), + : SystemZGenSubtargetInfo(TT, CPU, TuneCPU, FS), HasDistinctOps(false), HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), HasPopulationCount(false), HasMessageSecurityAssist3(false), HasMessageSecurityAssist4(false), @@ -88,8 +90,8 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, HasResetDATProtection(false), HasProcessorActivityInstrumentation(false), HasSoftFloat(false), TargetTriple(TT), SpecialRegisters(initializeSpecialRegisters()), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - FrameLowering(SystemZFrameLowering::create(*this)) {} + InstrInfo(initializeSubtargetDependencies(CPU, TuneCPU, FS)), + TLInfo(TM, *this), FrameLowering(SystemZFrameLowering::create(*this)) {} bool SystemZSubtarget::enableSubRegLiveness() const { return UseSubRegLiveness; diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h index 98f7094fcb48..cd16c19f9bfa 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -84,12 +84,14 @@ private: std::unique_ptr FrameLowering; SystemZSubtarget &initializeSubtargetDependencies(StringRef CPU, + StringRef TuneCPU, StringRef FS); SystemZCallingConventionRegisters *initializeSpecialRegisters(); public: SystemZSubtarget(const Triple &TT, const std::string &CPU, - const std::string &FS, const TargetMachine &TM); + const std::string &TuneCPU, const std::string &FS, + const TargetMachine &TM); SystemZCallingConventionRegisters *getSpecialRegisters() const { assert(SpecialRegisters && "Unsupported SystemZ calling convention"); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index f1469fe8f56b..31f8ee2f894d 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -118,7 +118,7 @@ static std::unique_ptr createTLOF(const Triple &TT) { static Reloc::Model getEffectiveRelocModel(Optional RM) { // Static code is suitable for use in a dynamic executable; there is no // separate DynamicNoPIC model. - if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC) + if (!RM || *RM == Reloc::DynamicNoPIC) return Reloc::Static; return *RM; } @@ -187,10 +187,13 @@ SystemZTargetMachine::~SystemZTargetMachine() = default; const SystemZSubtarget * SystemZTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); + Attribute TuneAttr = F.getFnAttribute("tune-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); std::string CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; + std::string TuneCPU = + TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; std::string FS = FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; @@ -202,13 +205,14 @@ SystemZTargetMachine::getSubtargetImpl(const Function &F) const { if (softFloat) FS += FS.empty() ? "+soft-float" : ",+soft-float"; - auto &I = SubtargetMap[CPU + FS]; + auto &I = SubtargetMap[CPU + TuneCPU + FS]; if (!I) { // This needs to be done before we create a new subtarget since any // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = std::make_unique(TargetTriple, CPU, FS, *this); + I = std::make_unique(TargetTriple, CPU, TuneCPU, FS, + *this); } return I.get(); @@ -334,6 +338,6 @@ TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { } TargetTransformInfo -SystemZTargetMachine::getTargetTransformInfo(const Function &F) { +SystemZTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(SystemZTTIImpl(this, F)); } diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h index 9ea03e104fc9..2cdb33a5064b 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h @@ -44,7 +44,7 @@ public: // Override LLVMTargetMachine TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h b/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h index a610a90d2069..1b4e93ebe39b 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h @@ -10,6 +10,7 @@ #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETSTREAMER_H #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" namespace llvm { diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 6d66ebfced05..69914049a00c 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -30,6 +30,42 @@ using namespace llvm; // //===----------------------------------------------------------------------===// +static bool isUsedAsMemCpySource(const Value *V, bool &OtherUse) { + bool UsedAsMemCpySource = false; + for (const User *U : V->users()) + if (const Instruction *User = dyn_cast(U)) { + if (isa(User) || isa(User)) { + UsedAsMemCpySource |= isUsedAsMemCpySource(User, OtherUse); + continue; + } + if (const MemCpyInst *Memcpy = dyn_cast(User)) { + if (Memcpy->getOperand(1) == V && !Memcpy->isVolatile()) { + UsedAsMemCpySource = true; + continue; + } + } + OtherUse = true; + } + return UsedAsMemCpySource; +} + +unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const { + unsigned Bonus = 0; + + // Increase the threshold if an incoming argument is used only as a memcpy + // source. + if (Function *Callee = CB->getCalledFunction()) + for (Argument &Arg : Callee->args()) { + bool OtherUse = false; + if (isUsedAsMemCpySource(&Arg, OtherUse) && !OtherUse) + Bonus += 150; + } + + LLVM_DEBUG(if (Bonus) + dbgs() << "++ SZTTI Adding inlining bonus: " << Bonus << "\n";); + return Bonus; +} + InstructionCost SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); @@ -303,8 +339,8 @@ void SystemZTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, BaseT::getPeelingPreferences(L, SE, PP); } -bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2) { +bool SystemZTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) { // SystemZ specific: check instruction count (first), and don't care about // ImmCost, since offsets are checked explicitly. return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, @@ -559,7 +595,8 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost( InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, - VectorType *SubTp) { + VectorType *SubTp, + ArrayRef Args) { Kind = improveShuffleKindFromMask(Kind, Mask); if (ST->hasVector()) { unsigned NumVectors = getNumVectorRegs(Tp); @@ -781,7 +818,11 @@ InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) { if (SrcScalarBits >= 8) { - // ZExt/SExt will be handled with one unpack per doubling of width. + // ZExt will use either a single unpack or a vector permute. + if (Opcode == Instruction::ZExt) + return NumDstVectors; + + // SExt will be handled with one unpack per doubling of width. unsigned NumUnpacks = getElSizeLog2Diff(Src, Dst); // For types that spans multiple vector registers, some additional diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index db4ec794b3e4..33317e799eab 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -37,6 +37,7 @@ public: /// @{ unsigned getInliningThresholdMultiplier() { return 3; } + unsigned adjustInliningThreshold(const CallBase *CB) const; InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); @@ -58,8 +59,8 @@ public: void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); - bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2); + bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2); /// @} /// \name Vector TTI Implementations @@ -92,7 +93,8 @@ public: const Instruction *CxtI = nullptr); InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, - VectorType *SubTp); + VectorType *SubTp, + ArrayRef Args = None); unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy); unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy); unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, diff --git a/llvm/lib/Target/TargetIntrinsicInfo.cpp b/llvm/lib/Target/TargetIntrinsicInfo.cpp index 256514c8c22d..d44a34984c42 100644 --- a/llvm/lib/Target/TargetIntrinsicInfo.cpp +++ b/llvm/lib/Target/TargetIntrinsicInfo.cpp @@ -11,15 +11,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringMapEntry.h" #include "llvm/IR/Function.h" using namespace llvm; -TargetIntrinsicInfo::TargetIntrinsicInfo() { -} +TargetIntrinsicInfo::TargetIntrinsicInfo() = default; -TargetIntrinsicInfo::~TargetIntrinsicInfo() { -} +TargetIntrinsicInfo::~TargetIntrinsicInfo() = default; unsigned TargetIntrinsicInfo::getIntrinsicID(const Function *F) const { const ValueName *ValName = F->getValueName(); diff --git a/llvm/lib/Target/TargetLoweringObjectFile.cpp b/llvm/lib/Target/TargetLoweringObjectFile.cpp index 7954f0f09faf..8f633adbb9ef 100644 --- a/llvm/lib/Target/TargetLoweringObjectFile.cpp +++ b/llvm/lib/Target/TargetLoweringObjectFile.cpp @@ -24,10 +24,8 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp index 390457dbb2bc..8d1ad617889c 100644 --- a/llvm/lib/Target/TargetMachine.cpp +++ b/llvm/lib/Target/TargetMachine.cpp @@ -13,17 +13,14 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCTargetOptions.h" -#include "llvm/MC/SectionKind.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; @@ -63,16 +60,13 @@ void TargetMachine::resetTargetOptions(const Function &F) const { RESET_OPTION(NoInfsFPMath, "no-infs-fp-math"); RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math"); RESET_OPTION(NoSignedZerosFPMath, "no-signed-zeros-fp-math"); + RESET_OPTION(ApproxFuncFPMath, "approx-func-fp-math"); } /// Returns the code generation relocation model. The choices are static, PIC, /// and dynamic-no-pic. Reloc::Model TargetMachine::getRelocationModel() const { return RM; } -/// Returns the code model. The choices are small, kernel, medium, large, and -/// target default. -CodeModel::Model TargetMachine::getCodeModel() const { return CMModel; } - /// Get the IR-specified TLS model for Var. static TLSModel::Model getSelectedTLSModel(const GlobalValue *GV) { switch (GV->getThreadLocalMode()) { @@ -189,7 +183,8 @@ CodeGenOpt::Level TargetMachine::getOptLevel() const { return OptLevel; } void TargetMachine::setOptLevel(CodeGenOpt::Level Level) { OptLevel = Level; } -TargetTransformInfo TargetMachine::getTargetTransformInfo(const Function &F) { +TargetTransformInfo +TargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(F.getParent()->getDataLayout()); } @@ -217,7 +212,7 @@ MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV) const { return TLOF->getContext().getOrCreateSymbol(NameStr); } -TargetIRAnalysis TargetMachine::getTargetIRAnalysis() { +TargetIRAnalysis TargetMachine::getTargetIRAnalysis() const { // Since Analysis can't depend on Target, use a std::function to invert the // dependency. return TargetIRAnalysis( diff --git a/llvm/lib/Target/TargetMachineC.cpp b/llvm/lib/Target/TargetMachineC.cpp index 55047a1bb3cd..b8cefbe5b6b7 100644 --- a/llvm/lib/Target/TargetMachineC.cpp +++ b/llvm/lib/Target/TargetMachineC.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm-c/Core.h" -#include "llvm-c/Target.h" #include "llvm-c/TargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" @@ -20,13 +19,10 @@ #include "llvm/MC/SubtargetFeature.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Support/Host.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/CodeGenCWrappers.h" #include "llvm/Target/TargetMachine.h" -#include -#include #include using namespace llvm; @@ -217,7 +213,9 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M, } LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, - char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) { + const char *Filename, + LLVMCodeGenFileType codegen, + char **ErrorMessage) { std::error_code EC; raw_fd_ostream dest(Filename, EC, sys::fs::OF_None); if (EC) { diff --git a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp index 4a318e493c52..f39be036d21f 100644 --- a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp +++ b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" diff --git a/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp b/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp index 72c40cbe78c4..00487a1f5bb3 100644 --- a/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp +++ b/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp @@ -15,8 +15,8 @@ #include "VE.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/TargetRegistry.h" @@ -33,7 +33,7 @@ class VEDisassembler : public MCDisassembler { public: VEDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} - virtual ~VEDisassembler() {} + virtual ~VEDisassembler() = default; DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, @@ -126,7 +126,7 @@ static const unsigned MiscRegDecoderTable[] = { static DecodeStatus DecodeI32RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 63) return MCDisassembler::Fail; unsigned Reg = I32RegDecoderTable[RegNo]; @@ -136,7 +136,7 @@ static DecodeStatus DecodeI32RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeI64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 63) return MCDisassembler::Fail; unsigned Reg = I64RegDecoderTable[RegNo]; @@ -146,7 +146,7 @@ static DecodeStatus DecodeI64RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeF32RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 63) return MCDisassembler::Fail; unsigned Reg = F32RegDecoderTable[RegNo]; @@ -156,7 +156,7 @@ static DecodeStatus DecodeF32RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeF128RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo % 2 || RegNo > 63) return MCDisassembler::Fail; unsigned Reg = F128RegDecoderTable[RegNo / 2]; @@ -166,7 +166,7 @@ static DecodeStatus DecodeF128RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeV64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Reg = VE::NoRegister; if (RegNo == 255) Reg = VE::VIX; @@ -180,7 +180,7 @@ static DecodeStatus DecodeV64RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeVMRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; unsigned Reg = VMRegDecoderTable[RegNo]; @@ -190,7 +190,7 @@ static DecodeStatus DecodeVMRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeVM512RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo % 2 || RegNo > 15) return MCDisassembler::Fail; unsigned Reg = VM512RegDecoderTable[RegNo / 2]; @@ -200,7 +200,7 @@ static DecodeStatus DecodeVM512RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeMISCRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 30) return MCDisassembler::Fail; unsigned Reg = MiscRegDecoderTable[RegNo]; @@ -211,47 +211,56 @@ static DecodeStatus DecodeMISCRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeASX(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeLoadI32(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeStoreI32(MCInst &Inst, uint64_t insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeLoadI64(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeStoreI64(MCInst &Inst, uint64_t insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeLoadF32(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeStoreF32(MCInst &Inst, uint64_t insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeLoadASI64(MCInst &Inst, uint64_t insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeStoreASI64(MCInst &Inst, uint64_t insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeTS1AMI64(MCInst &Inst, uint64_t insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeTS1AMI32(MCInst &Inst, uint64_t insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeCASI64(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeCASI32(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeCall(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeSIMM7(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeSIMM32(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeCCOperand(MCInst &Inst, uint64_t insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeRDOperand(MCInst &Inst, uint64_t insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeBranchCondition(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeBranchConditionAlways(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); #include "VEGenDisassemblerTables.inc" @@ -302,10 +311,10 @@ DecodeStatus VEDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, } typedef DecodeStatus (*DecodeFunc)(MCInst &MI, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeASX(MCInst &MI, uint64_t insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned sy = fieldFromInstruction(insn, 40, 7); bool cy = fieldFromInstruction(insn, 47, 1); unsigned sz = fieldFromInstruction(insn, 32, 7); @@ -338,7 +347,7 @@ static DecodeStatus DecodeASX(MCInst &MI, uint64_t insn, uint64_t Address, } static DecodeStatus DecodeAS(MCInst &MI, uint64_t insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned sz = fieldFromInstruction(insn, 32, 7); bool cz = fieldFromInstruction(insn, 39, 1); uint64_t simm32 = SignExtend64<32>(fieldFromInstruction(insn, 0, 32)); @@ -360,7 +369,7 @@ static DecodeStatus DecodeAS(MCInst &MI, uint64_t insn, uint64_t Address, } static DecodeStatus DecodeMem(MCInst &MI, uint64_t insn, uint64_t Address, - const void *Decoder, bool isLoad, + const MCDisassembler *Decoder, bool isLoad, DecodeFunc DecodeSX) { unsigned sx = fieldFromInstruction(insn, 48, 7); @@ -384,7 +393,7 @@ static DecodeStatus DecodeMem(MCInst &MI, uint64_t insn, uint64_t Address, } static DecodeStatus DecodeMemAS(MCInst &MI, uint64_t insn, uint64_t Address, - const void *Decoder, bool isLoad, + const MCDisassembler *Decoder, bool isLoad, DecodeFunc DecodeSX) { unsigned sx = fieldFromInstruction(insn, 48, 7); @@ -408,50 +417,55 @@ static DecodeStatus DecodeMemAS(MCInst &MI, uint64_t insn, uint64_t Address, } static DecodeStatus DecodeLoadI32(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, true, DecodeI32RegisterClass); } static DecodeStatus DecodeStoreI32(MCInst &Inst, uint64_t insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, false, DecodeI32RegisterClass); } static DecodeStatus DecodeLoadI64(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, true, DecodeI64RegisterClass); } static DecodeStatus DecodeStoreI64(MCInst &Inst, uint64_t insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, false, DecodeI64RegisterClass); } static DecodeStatus DecodeLoadF32(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, true, DecodeF32RegisterClass); } static DecodeStatus DecodeStoreF32(MCInst &Inst, uint64_t insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, false, DecodeF32RegisterClass); } static DecodeStatus DecodeLoadASI64(MCInst &Inst, uint64_t insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMemAS(Inst, insn, Address, Decoder, true, DecodeI64RegisterClass); } static DecodeStatus DecodeStoreASI64(MCInst &Inst, uint64_t insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMemAS(Inst, insn, Address, Decoder, false, DecodeI64RegisterClass); } static DecodeStatus DecodeCAS(MCInst &MI, uint64_t insn, uint64_t Address, - const void *Decoder, bool isImmOnly, bool isUImm, - DecodeFunc DecodeSX) { + const MCDisassembler *Decoder, bool isImmOnly, + bool isUImm, DecodeFunc DecodeSX) { unsigned sx = fieldFromInstruction(insn, 48, 7); bool cy = fieldFromInstruction(insn, 47, 1); unsigned sy = fieldFromInstruction(insn, 40, 7); @@ -488,43 +502,43 @@ static DecodeStatus DecodeCAS(MCInst &MI, uint64_t insn, uint64_t Address, } static DecodeStatus DecodeTS1AMI64(MCInst &MI, uint64_t insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeCAS(MI, insn, Address, Decoder, false, true, DecodeI64RegisterClass); } static DecodeStatus DecodeTS1AMI32(MCInst &MI, uint64_t insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeCAS(MI, insn, Address, Decoder, false, true, DecodeI32RegisterClass); } static DecodeStatus DecodeCASI64(MCInst &MI, uint64_t insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeCAS(MI, insn, Address, Decoder, false, false, DecodeI64RegisterClass); } static DecodeStatus DecodeCASI32(MCInst &MI, uint64_t insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeCAS(MI, insn, Address, Decoder, false, false, DecodeI32RegisterClass); } static DecodeStatus DecodeCall(MCInst &Inst, uint64_t insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, true, DecodeI64RegisterClass); } static DecodeStatus DecodeSIMM7(MCInst &MI, uint64_t insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { uint64_t tgt = SignExtend64<7>(insn); MI.addOperand(MCOperand::createImm(tgt)); return MCDisassembler::Success; } static DecodeStatus DecodeSIMM32(MCInst &MI, uint64_t insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { uint64_t tgt = SignExtend64<32>(insn); MI.addOperand(MCOperand::createImm(tgt)); return MCDisassembler::Success; @@ -568,14 +582,14 @@ static bool isIntegerBCKind(MCInst &MI) { // Decode CC Operand field. static DecodeStatus DecodeCCOperand(MCInst &MI, uint64_t cf, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { MI.addOperand(MCOperand::createImm(VEValToCondCode(cf, isIntegerBCKind(MI)))); return MCDisassembler::Success; } // Decode RD Operand field. static DecodeStatus DecodeRDOperand(MCInst &MI, uint64_t cf, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { MI.addOperand(MCOperand::createImm(VEValToRD(cf))); return MCDisassembler::Success; } @@ -583,7 +597,7 @@ static DecodeStatus DecodeRDOperand(MCInst &MI, uint64_t cf, uint64_t Address, // Decode branch condition instruction and CCOperand field in it. static DecodeStatus DecodeBranchCondition(MCInst &MI, uint64_t insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned cf = fieldFromInstruction(insn, 48, 4); bool cy = fieldFromInstruction(insn, 47, 1); unsigned sy = fieldFromInstruction(insn, 40, 7); @@ -607,7 +621,7 @@ static DecodeStatus DecodeBranchCondition(MCInst &MI, uint64_t insn, static DecodeStatus DecodeBranchConditionAlways(MCInst &MI, uint64_t insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // Decode MEMri. return DecodeAS(MI, insn, Address, Decoder); } diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp index ae065407409a..1c89d6444d11 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp @@ -25,7 +25,7 @@ public: : MCELFObjectTargetWriter(/* Is64Bit */ true, OSABI, ELF::EM_VE, /* HasRelocationAddend */ true) {} - ~VEELFObjectWriter() override {} + ~VEELFObjectWriter() override = default; protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h index 46b995cee840..0e2d55c0182e 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h +++ b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h @@ -20,28 +20,28 @@ enum Fixups { /// fixup_ve_srel32 - 32-bit fixup corresponding to foo for relative branch fixup_ve_srel32, - /// fixup_ve_hi32 - 32-bit fixup corresponding to foo@hi + /// fixup_ve_hi32 - 32-bit fixup corresponding to foo\@hi fixup_ve_hi32, - /// fixup_ve_lo32 - 32-bit fixup corresponding to foo@lo + /// fixup_ve_lo32 - 32-bit fixup corresponding to foo\@lo fixup_ve_lo32, - /// fixup_ve_pc_hi32 - 32-bit fixup corresponding to foo@pc_hi + /// fixup_ve_pc_hi32 - 32-bit fixup corresponding to foo\@pc_hi fixup_ve_pc_hi32, - /// fixup_ve_pc_lo32 - 32-bit fixup corresponding to foo@pc_lo + /// fixup_ve_pc_lo32 - 32-bit fixup corresponding to foo\@pc_lo fixup_ve_pc_lo32, - /// fixup_ve_got_hi32 - 32-bit fixup corresponding to foo@got_hi + /// fixup_ve_got_hi32 - 32-bit fixup corresponding to foo\@got_hi fixup_ve_got_hi32, - /// fixup_ve_got_lo32 - 32-bit fixup corresponding to foo@got_lo + /// fixup_ve_got_lo32 - 32-bit fixup corresponding to foo\@got_lo fixup_ve_got_lo32, - /// fixup_ve_gotoff_hi32 - 32-bit fixup corresponding to foo@gotoff_hi + /// fixup_ve_gotoff_hi32 - 32-bit fixup corresponding to foo\@gotoff_hi fixup_ve_gotoff_hi32, - /// fixup_ve_gotoff_lo32 - 32-bit fixup corresponding to foo@gotoff_lo + /// fixup_ve_gotoff_lo32 - 32-bit fixup corresponding to foo\@gotoff_lo fixup_ve_gotoff_lo32, /// fixup_ve_plt_hi32/lo32 diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp index 65bb0cf8b0d7..3eb246f73679 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp @@ -159,7 +159,6 @@ uint64_t VEMCCodeEmitter::getRDOpValue(const MCInst &MI, unsigned OpNo, #include "VEGenMCCodeEmitter.inc" MCCodeEmitter *llvm::createVEMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new VEMCCodeEmitter(MCII, Ctx); } diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp index 4d45918ad0aa..a1045107a832 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Casting.h" using namespace llvm; diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h index f0bb6e3acdee..d8f9d0634c24 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h @@ -28,8 +28,7 @@ class MCSubtargetInfo; class MCTargetOptions; class Target; -MCCodeEmitter *createVEMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); +MCCodeEmitter *createVEMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx); MCAsmBackend *createVEAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const MCTargetOptions &Options); diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h index 2a729a1a311c..2794d1458be7 100644 --- a/llvm/lib/Target/VE/VE.h +++ b/llvm/lib/Target/VE/VE.h @@ -27,7 +27,6 @@ class MCInst; class MachineInstr; FunctionPass *createVEISelDag(VETargetMachine &TM); -FunctionPass *createVEPromoteToI1Pass(); FunctionPass *createLVLGenPass(); void LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, @@ -370,5 +369,8 @@ inline static uint64_t mimm2Val(uint64_t Val) { inline unsigned M0(unsigned Val) { return Val + 64; } inline unsigned M1(unsigned Val) { return Val; } +static const unsigned StandardVectorWidth = 256; +static const unsigned PackedVectorWidth = 512; + } // namespace llvm #endif diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp index af3e4af13814..8f11eba6d5fd 100644 --- a/llvm/lib/Target/VE/VECustomDAG.cpp +++ b/llvm/lib/Target/VE/VECustomDAG.cpp @@ -19,17 +19,52 @@ namespace llvm { -static const int StandardVectorWidth = 256; - bool isPackedVectorType(EVT SomeVT) { if (!SomeVT.isVector()) return false; return SomeVT.getVectorNumElements() > StandardVectorWidth; } +MVT splitVectorType(MVT VT) { + if (!VT.isVector()) + return VT; + return MVT::getVectorVT(VT.getVectorElementType(), StandardVectorWidth); +} + +MVT getLegalVectorType(Packing P, MVT ElemVT) { + return MVT::getVectorVT(ElemVT, P == Packing::Normal ? StandardVectorWidth + : PackedVectorWidth); +} + +Packing getTypePacking(EVT VT) { + assert(VT.isVector()); + return isPackedVectorType(VT) ? Packing::Dense : Packing::Normal; +} + +bool isMaskType(EVT SomeVT) { + if (!SomeVT.isVector()) + return false; + return SomeVT.getVectorElementType() == MVT::i1; +} + +bool isMaskArithmetic(SDValue Op) { + switch (Op.getOpcode()) { + default: + return false; + case ISD::AND: + case ISD::XOR: + case ISD::OR: + return isMaskType(Op.getValueType()); + } +} + /// \returns the VVP_* SDNode opcode corresponsing to \p OC. Optional getVVPOpcode(unsigned Opcode) { switch (Opcode) { + case ISD::MLOAD: + return VEISD::VVP_LOAD; + case ISD::MSTORE: + return VEISD::VVP_STORE; #define HANDLE_VP_TO_VVP(VPOPC, VVPNAME) \ case ISD::VPOPC: \ return VEISD::VVPNAME; @@ -38,10 +73,76 @@ Optional getVVPOpcode(unsigned Opcode) { case ISD::SDNAME: \ return VEISD::VVPNAME; #include "VVPNodes.def" + // TODO: Map those in VVPNodes.def too + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + return VEISD::VVP_LOAD; + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + return VEISD::VVP_STORE; } return None; } +bool maySafelyIgnoreMask(SDValue Op) { + auto VVPOpc = getVVPOpcode(Op->getOpcode()); + auto Opc = VVPOpc.value_or(Op->getOpcode()); + + switch (Opc) { + case VEISD::VVP_SDIV: + case VEISD::VVP_UDIV: + case VEISD::VVP_FDIV: + case VEISD::VVP_SELECT: + return false; + + default: + return true; + } +} + +bool supportsPackedMode(unsigned Opcode, EVT IdiomVT) { + bool IsPackedOp = isPackedVectorType(IdiomVT); + bool IsMaskOp = isMaskType(IdiomVT); + switch (Opcode) { + default: + return false; + + case VEISD::VEC_BROADCAST: + return true; +#define REGISTER_PACKED(VVP_NAME) case VEISD::VVP_NAME: +#include "VVPNodes.def" + return IsPackedOp && !IsMaskOp; + } +} + +bool isPackingSupportOpcode(unsigned Opc) { + switch (Opc) { + case VEISD::VEC_PACK: + case VEISD::VEC_UNPACK_LO: + case VEISD::VEC_UNPACK_HI: + return true; + } + return false; +} + +bool isVVPOrVEC(unsigned Opcode) { + switch (Opcode) { + case VEISD::VEC_BROADCAST: +#define ADD_VVP_OP(VVPNAME, ...) case VEISD::VVPNAME: +#include "VVPNodes.def" + return true; + } + return false; +} + +bool isVVPUnaryOp(unsigned VVPOpcode) { + switch (VVPOpcode) { +#define ADD_UNARY_VVP_OP(VVPNAME, ...) \ + case VEISD::VVPNAME: \ + return true; +#include "VVPNodes.def" + } + return false; +} + bool isVVPBinaryOp(unsigned VVPOpcode) { switch (VVPOpcode) { #define ADD_BINARY_VVP_OP(VVPNAME, ...) \ @@ -52,16 +153,308 @@ bool isVVPBinaryOp(unsigned VVPOpcode) { return false; } +bool isVVPReductionOp(unsigned Opcode) { + switch (Opcode) { +#define ADD_REDUCE_VVP_OP(VVP_NAME, SDNAME) case VEISD::VVP_NAME: +#include "VVPNodes.def" + return true; + } + return false; +} + +// Return the AVL operand position for this VVP or VEC Op. +Optional getAVLPos(unsigned Opc) { + // This is only available for VP SDNodes + auto PosOpt = ISD::getVPExplicitVectorLengthIdx(Opc); + if (PosOpt) + return *PosOpt; + + // VVP Opcodes. + if (isVVPBinaryOp(Opc)) + return 3; + + // VM Opcodes. + switch (Opc) { + case VEISD::VEC_BROADCAST: + return 1; + case VEISD::VVP_SELECT: + return 3; + case VEISD::VVP_LOAD: + return 4; + case VEISD::VVP_STORE: + return 5; + } + + return None; +} + +Optional getMaskPos(unsigned Opc) { + // This is only available for VP SDNodes + auto PosOpt = ISD::getVPMaskIdx(Opc); + if (PosOpt) + return *PosOpt; + + // VVP Opcodes. + if (isVVPBinaryOp(Opc)) + return 2; + + // Other opcodes. + switch (Opc) { + case ISD::MSTORE: + return 4; + case ISD::MLOAD: + return 3; + case VEISD::VVP_SELECT: + return 2; + } + + return None; +} + +bool isLegalAVL(SDValue AVL) { return AVL->getOpcode() == VEISD::LEGALAVL; } + +/// Node Properties { + +SDValue getNodeChain(SDValue Op) { + if (MemSDNode *MemN = dyn_cast(Op.getNode())) + return MemN->getChain(); + + switch (Op->getOpcode()) { + case VEISD::VVP_LOAD: + case VEISD::VVP_STORE: + return Op->getOperand(0); + } + return SDValue(); +} + +SDValue getMemoryPtr(SDValue Op) { + if (auto *MemN = dyn_cast(Op.getNode())) + return MemN->getBasePtr(); + + switch (Op->getOpcode()) { + case VEISD::VVP_LOAD: + return Op->getOperand(1); + case VEISD::VVP_STORE: + return Op->getOperand(2); + } + return SDValue(); +} + +Optional getIdiomaticVectorType(SDNode *Op) { + unsigned OC = Op->getOpcode(); + + // For memory ops -> the transfered data type + if (auto MemN = dyn_cast(Op)) + return MemN->getMemoryVT(); + + switch (OC) { + // Standard ISD. + case ISD::SELECT: // not aliased with VVP_SELECT + case ISD::CONCAT_VECTORS: + case ISD::EXTRACT_SUBVECTOR: + case ISD::VECTOR_SHUFFLE: + case ISD::BUILD_VECTOR: + case ISD::SCALAR_TO_VECTOR: + return Op->getValueType(0); + } + + // Translate to VVP where possible. + unsigned OriginalOC = OC; + if (auto VVPOpc = getVVPOpcode(OC)) + OC = *VVPOpc; + + if (isVVPReductionOp(OC)) + return Op->getOperand(hasReductionStartParam(OriginalOC) ? 1 : 0) + .getValueType(); + + switch (OC) { + default: + case VEISD::VVP_SETCC: + return Op->getOperand(0).getValueType(); + + case VEISD::VVP_SELECT: +#define ADD_BINARY_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME: +#include "VVPNodes.def" + return Op->getValueType(0); + + case VEISD::VVP_LOAD: + return Op->getValueType(0); + + case VEISD::VVP_STORE: + return Op->getOperand(1)->getValueType(0); + + // VEC + case VEISD::VEC_BROADCAST: + return Op->getValueType(0); + } +} + +SDValue getLoadStoreStride(SDValue Op, VECustomDAG &CDAG) { + switch (Op->getOpcode()) { + case VEISD::VVP_STORE: + return Op->getOperand(3); + case VEISD::VVP_LOAD: + return Op->getOperand(2); + } + + if (auto *StoreN = dyn_cast(Op.getNode())) + return StoreN->getStride(); + if (auto *StoreN = dyn_cast(Op.getNode())) + return StoreN->getStride(); + + if (isa(Op.getNode())) { + // Regular MLOAD/MSTORE/LOAD/STORE + // No stride argument -> use the contiguous element size as stride. + uint64_t ElemStride = getIdiomaticVectorType(Op.getNode()) + ->getVectorElementType() + .getStoreSize(); + return CDAG.getConstant(ElemStride, MVT::i64); + } + return SDValue(); +} + +SDValue getGatherScatterIndex(SDValue Op) { + if (auto *N = dyn_cast(Op.getNode())) + return N->getIndex(); + if (auto *N = dyn_cast(Op.getNode())) + return N->getIndex(); + return SDValue(); +} + +SDValue getGatherScatterScale(SDValue Op) { + if (auto *N = dyn_cast(Op.getNode())) + return N->getScale(); + if (auto *N = dyn_cast(Op.getNode())) + return N->getScale(); + return SDValue(); +} + +SDValue getStoredValue(SDValue Op) { + switch (Op->getOpcode()) { + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + case VEISD::VVP_STORE: + return Op->getOperand(1); + } + if (auto *StoreN = dyn_cast(Op.getNode())) + return StoreN->getValue(); + if (auto *StoreN = dyn_cast(Op.getNode())) + return StoreN->getValue(); + if (auto *StoreN = dyn_cast(Op.getNode())) + return StoreN->getValue(); + if (auto *StoreN = dyn_cast(Op.getNode())) + return StoreN->getValue(); + if (auto *StoreN = dyn_cast(Op.getNode())) + return StoreN->getValue(); + if (auto *StoreN = dyn_cast(Op.getNode())) + return StoreN->getValue(); + return SDValue(); +} + +SDValue getNodePassthru(SDValue Op) { + if (auto *N = dyn_cast(Op.getNode())) + return N->getPassThru(); + if (auto *N = dyn_cast(Op.getNode())) + return N->getPassThru(); + + return SDValue(); +} + +bool hasReductionStartParam(unsigned OPC) { + // TODO: Ordered reduction opcodes. + if (ISD::isVPReduction(OPC)) + return true; + return false; +} + +unsigned getScalarReductionOpcode(unsigned VVPOC, bool IsMask) { + assert(!IsMask && "Mask reduction isel"); + + switch (VVPOC) { +#define HANDLE_VVP_REDUCE_TO_SCALAR(VVP_RED_ISD, REDUCE_ISD) \ + case VEISD::VVP_RED_ISD: \ + return ISD::REDUCE_ISD; +#include "VVPNodes.def" + default: + break; + } + llvm_unreachable("Cannot not scalarize this reduction Opcode!"); +} + +/// } Node Properties + +SDValue getNodeAVL(SDValue Op) { + auto PosOpt = getAVLPos(Op->getOpcode()); + return PosOpt ? Op->getOperand(*PosOpt) : SDValue(); +} + +SDValue getNodeMask(SDValue Op) { + auto PosOpt = getMaskPos(Op->getOpcode()); + return PosOpt ? Op->getOperand(*PosOpt) : SDValue(); +} + +std::pair getAnnotatedNodeAVL(SDValue Op) { + SDValue AVL = getNodeAVL(Op); + if (!AVL) + return {SDValue(), true}; + if (isLegalAVL(AVL)) + return {AVL->getOperand(0), true}; + return {AVL, false}; +} + SDValue VECustomDAG::getConstant(uint64_t Val, EVT VT, bool IsTarget, bool IsOpaque) const { return DAG.getConstant(Val, DL, VT, IsTarget, IsOpaque); } +SDValue VECustomDAG::getConstantMask(Packing Packing, bool AllTrue) const { + auto MaskVT = getLegalVectorType(Packing, MVT::i1); + + // VEISelDAGtoDAG will replace this pattern with the constant-true VM. + auto TrueVal = DAG.getConstant(-1, DL, MVT::i32); + auto AVL = getConstant(MaskVT.getVectorNumElements(), MVT::i32); + auto Res = getNode(VEISD::VEC_BROADCAST, MaskVT, {TrueVal, AVL}); + if (AllTrue) + return Res; + + return DAG.getNOT(DL, Res, Res.getValueType()); +} + +SDValue VECustomDAG::getMaskBroadcast(EVT ResultVT, SDValue Scalar, + SDValue AVL) const { + // Constant mask splat. + if (auto BcConst = dyn_cast(Scalar)) + return getConstantMask(getTypePacking(ResultVT), + BcConst->getSExtValue() != 0); + + // Expand the broadcast to a vector comparison. + auto ScalarBoolVT = Scalar.getSimpleValueType(); + assert(ScalarBoolVT == MVT::i32); + + // Cast to i32 ty. + SDValue CmpElem = DAG.getSExtOrTrunc(Scalar, DL, MVT::i32); + unsigned ElemCount = ResultVT.getVectorNumElements(); + MVT CmpVecTy = MVT::getVectorVT(ScalarBoolVT, ElemCount); + + // Broadcast to vector. + SDValue BCVec = + DAG.getNode(VEISD::VEC_BROADCAST, DL, CmpVecTy, {CmpElem, AVL}); + SDValue ZeroVec = + getBroadcast(CmpVecTy, {DAG.getConstant(0, DL, ScalarBoolVT)}, AVL); + + MVT BoolVecTy = MVT::getVectorVT(MVT::i1, ElemCount); + + // Broadcast(Data) != Broadcast(0) + // TODO: Use a VVP operation for this. + return DAG.getSetCC(DL, BoolVecTy, BCVec, ZeroVec, ISD::CondCode::SETNE); +} + SDValue VECustomDAG::getBroadcast(EVT ResultVT, SDValue Scalar, SDValue AVL) const { assert(ResultVT.isVector()); auto ScaVT = Scalar.getValueType(); - assert(ScaVT != MVT::i1 && "TODO: Mask broadcasts"); + + if (isMaskType(ResultVT)) + return getMaskBroadcast(ResultVT, Scalar, AVL); if (isPackedVectorType(ResultVT)) { // v512x packed mode broadcast @@ -78,4 +471,119 @@ SDValue VECustomDAG::getBroadcast(EVT ResultVT, SDValue Scalar, return getNode(VEISD::VEC_BROADCAST, ResultVT, {Scalar, AVL}); } +SDValue VECustomDAG::annotateLegalAVL(SDValue AVL) const { + if (isLegalAVL(AVL)) + return AVL; + return getNode(VEISD::LEGALAVL, AVL.getValueType(), AVL); +} + +SDValue VECustomDAG::getUnpack(EVT DestVT, SDValue Vec, PackElem Part, + SDValue AVL) const { + assert(getAnnotatedNodeAVL(AVL).second && "Expected a pack-legalized AVL"); + + // TODO: Peek through VEC_PACK and VEC_BROADCAST(REPL_ ..) operands. + unsigned OC = + (Part == PackElem::Lo) ? VEISD::VEC_UNPACK_LO : VEISD::VEC_UNPACK_HI; + return DAG.getNode(OC, DL, DestVT, Vec, AVL); +} + +SDValue VECustomDAG::getPack(EVT DestVT, SDValue LoVec, SDValue HiVec, + SDValue AVL) const { + assert(getAnnotatedNodeAVL(AVL).second && "Expected a pack-legalized AVL"); + + // TODO: Peek through VEC_UNPACK_LO|HI operands. + return DAG.getNode(VEISD::VEC_PACK, DL, DestVT, LoVec, HiVec, AVL); +} + +VETargetMasks VECustomDAG::getTargetSplitMask(SDValue RawMask, SDValue RawAVL, + PackElem Part) const { + // Adjust AVL for this part + SDValue NewAVL; + SDValue OneV = getConstant(1, MVT::i32); + if (Part == PackElem::Hi) + NewAVL = getNode(ISD::ADD, MVT::i32, {RawAVL, OneV}); + else + NewAVL = RawAVL; + NewAVL = getNode(ISD::SRL, MVT::i32, {NewAVL, OneV}); + + NewAVL = annotateLegalAVL(NewAVL); + + // Legalize Mask (unpack or all-true) + SDValue NewMask; + if (!RawMask) + NewMask = getConstantMask(Packing::Normal, true); + else + NewMask = getUnpack(MVT::v256i1, RawMask, Part, NewAVL); + + return VETargetMasks(NewMask, NewAVL); +} + +SDValue VECustomDAG::getSplitPtrOffset(SDValue Ptr, SDValue ByteStride, + PackElem Part) const { + // High starts at base ptr but has more significant bits in the 64bit vector + // element. + if (Part == PackElem::Hi) + return Ptr; + return getNode(ISD::ADD, MVT::i64, {Ptr, ByteStride}); +} + +SDValue VECustomDAG::getSplitPtrStride(SDValue PackStride) const { + if (auto ConstBytes = dyn_cast(PackStride)) + return getConstant(2 * ConstBytes->getSExtValue(), MVT::i64); + return getNode(ISD::SHL, MVT::i64, {PackStride, getConstant(1, MVT::i32)}); +} + +SDValue VECustomDAG::getGatherScatterAddress(SDValue BasePtr, SDValue Scale, + SDValue Index, SDValue Mask, + SDValue AVL) const { + EVT IndexVT = Index.getValueType(); + + // Apply scale. + SDValue ScaledIndex; + if (!Scale || isOneConstant(Scale)) + ScaledIndex = Index; + else { + SDValue ScaleBroadcast = getBroadcast(IndexVT, Scale, AVL); + ScaledIndex = + getNode(VEISD::VVP_MUL, IndexVT, {Index, ScaleBroadcast, Mask, AVL}); + } + + // Add basePtr. + if (isNullConstant(BasePtr)) + return ScaledIndex; + + // re-constitute pointer vector (basePtr + index * scale) + SDValue BaseBroadcast = getBroadcast(IndexVT, BasePtr, AVL); + auto ResPtr = + getNode(VEISD::VVP_ADD, IndexVT, {BaseBroadcast, ScaledIndex, Mask, AVL}); + return ResPtr; +} + +SDValue VECustomDAG::getLegalReductionOpVVP(unsigned VVPOpcode, EVT ResVT, + SDValue StartV, SDValue VectorV, + SDValue Mask, SDValue AVL, + SDNodeFlags Flags) const { + + // Optionally attach the start param with a scalar op (where it is + // unsupported). + bool scalarizeStartParam = StartV && !hasReductionStartParam(VVPOpcode); + bool IsMaskReduction = isMaskType(VectorV.getValueType()); + assert(!IsMaskReduction && "TODO Implement"); + auto AttachStartValue = [&](SDValue ReductionResV) { + if (!scalarizeStartParam) + return ReductionResV; + auto ScalarOC = getScalarReductionOpcode(VVPOpcode, IsMaskReduction); + return getNode(ScalarOC, ResVT, {StartV, ReductionResV}); + }; + + // Fixup: Always Use sequential 'fmul' reduction. + if (!scalarizeStartParam && StartV) { + assert(hasReductionStartParam(VVPOpcode)); + return AttachStartValue( + getNode(VVPOpcode, ResVT, {StartV, VectorV, Mask, AVL}, Flags)); + } else + return AttachStartValue( + getNode(VVPOpcode, ResVT, {VectorV, Mask, AVL}, Flags)); +} + } // namespace llvm diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h index ddd6ce783366..0d35c098048e 100644 --- a/llvm/lib/Target/VE/VECustomDAG.h +++ b/llvm/lib/Target/VE/VECustomDAG.h @@ -23,10 +23,122 @@ namespace llvm { Optional getVVPOpcode(unsigned Opcode); +bool isVVPUnaryOp(unsigned Opcode); bool isVVPBinaryOp(unsigned Opcode); +bool isVVPReductionOp(unsigned Opcode); + +MVT splitVectorType(MVT VT); bool isPackedVectorType(EVT SomeVT); +bool isMaskType(EVT SomeVT); + +bool isMaskArithmetic(SDValue Op); + +bool isVVPOrVEC(unsigned); + +bool supportsPackedMode(unsigned Opcode, EVT IdiomVT); + +bool isPackingSupportOpcode(unsigned Opc); + +bool maySafelyIgnoreMask(SDValue Op); + +/// The VE backend uses a two-staged process to lower and legalize vector +/// instructions: +// +/// 1. VP and standard vector SDNodes are lowered to SDNodes of the VVP_* layer. +// +// All VVP nodes have a mask and an Active Vector Length (AVL) parameter. +// The AVL parameters refers to the element position in the vector the VVP +// node operates on. +// +// +// 2. The VVP SDNodes are legalized. The AVL in a legal VVP node refers to +// chunks of 64bit. We track this by wrapping the AVL in a LEGALAVL node. +// +// The AVL mechanism in the VE architecture always refers to chunks of +// 64bit, regardless of the actual element type vector instructions are +// operating on. For vector types v256.32 or v256.64 nothing needs to be +// legalized since each element occupies a 64bit chunk - there is no +// difference between counting 64bit chunks or element positions. However, +// all vector types with > 256 elements store more than one logical element +// per 64bit chunk and need to be transformed. +// However legalization is performed, the resulting legal VVP SDNodes will +// have a LEGALAVL node as their AVL operand. The LEGALAVL nodes wraps +// around an AVL that refers to 64 bit chunks just as the architecture +// demands - that is, the wrapped AVL is the correct setting for the VL +// register for this VVP operation to get the desired behavior. +// +/// AVL Functions { +// The AVL operand position of this node. +Optional getAVLPos(unsigned); + +// Whether this is a LEGALAVL node. +bool isLegalAVL(SDValue AVL); + +// The AVL operand of this node. +SDValue getNodeAVL(SDValue); + +// Mask position of this node. +Optional getMaskPos(unsigned); + +SDValue getNodeMask(SDValue); + +// Return the AVL operand of this node. If it is a LEGALAVL node, unwrap it. +// Return with the boolean whether unwrapping happened. +std::pair getAnnotatedNodeAVL(SDValue); + +/// } AVL Functions + +/// Node Properties { + +Optional getIdiomaticVectorType(SDNode *Op); + +SDValue getLoadStoreStride(SDValue Op, VECustomDAG &CDAG); + +SDValue getMemoryPtr(SDValue Op); + +SDValue getNodeChain(SDValue Op); + +SDValue getStoredValue(SDValue Op); + +SDValue getNodePassthru(SDValue Op); + +SDValue getGatherScatterIndex(SDValue Op); + +SDValue getGatherScatterScale(SDValue Op); + +unsigned getScalarReductionOpcode(unsigned VVPOC, bool IsMask); + +// Whether this VP_REDUCE_*/ VECREDUCE_*/VVP_REDUCE_* SDNode has a start +// parameter. +bool hasReductionStartParam(unsigned VVPOC); + +/// } Node Properties + +enum class Packing { + Normal = 0, // 256 element standard mode. + Dense = 1 // 512 element packed mode. +}; + +// Get the vector or mask register type for this packing and element type. +MVT getLegalVectorType(Packing P, MVT ElemVT); + +// Whether this type belongs to a packed mask or vector register. +Packing getTypePacking(EVT); + +enum class PackElem : int8_t { + Lo = 0, // Integer (63, 32] + Hi = 1 // Float (32, 0] +}; + +struct VETargetMasks { + SDValue Mask; + SDValue AVL; + VETargetMasks(SDValue Mask = SDValue(), SDValue AVL = SDValue()) + : Mask(Mask), AVL(AVL) {} +}; + class VECustomDAG { SelectionDAG &DAG; SDLoc DL; @@ -68,10 +180,42 @@ public: SDValue getUNDEF(EVT VT) const { return DAG.getUNDEF(VT); } /// } getNode + /// Legalizing getNode { + SDValue getLegalReductionOpVVP(unsigned VVPOpcode, EVT ResVT, SDValue StartV, + SDValue VectorV, SDValue Mask, SDValue AVL, + SDNodeFlags Flags) const; + /// } Legalizing getNode + + /// Packing { + SDValue getUnpack(EVT DestVT, SDValue Vec, PackElem Part, SDValue AVL) const; + SDValue getPack(EVT DestVT, SDValue LoVec, SDValue HiVec, SDValue AVL) const; + /// } Packing + + SDValue getMergeValues(ArrayRef Values) const { + return DAG.getMergeValues(Values, DL); + } + SDValue getConstant(uint64_t Val, EVT VT, bool IsTarget = false, bool IsOpaque = false) const; + SDValue getConstantMask(Packing Packing, bool AllTrue) const; + SDValue getMaskBroadcast(EVT ResultVT, SDValue Scalar, SDValue AVL) const; SDValue getBroadcast(EVT ResultVT, SDValue Scalar, SDValue AVL) const; + + // Wrap AVL in a LEGALAVL node (unless it is one already). + SDValue annotateLegalAVL(SDValue AVL) const; + VETargetMasks getTargetSplitMask(SDValue RawMask, SDValue RawAVL, + PackElem Part) const; + + // Splitting support + SDValue getSplitPtrOffset(SDValue Ptr, SDValue ByteStride, + PackElem Part) const; + SDValue getSplitPtrStride(SDValue PackStride) const; + SDValue getGatherScatterAddress(SDValue BasePtr, SDValue Scale, SDValue Index, + SDValue Mask, SDValue AVL) const; + EVT getVectorVT(EVT ElemVT, unsigned NumElems) const { + return EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + } }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp index e2608e82c9d4..a4319ec1c975 100644 --- a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp +++ b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "VE.h" #include "VETargetMachine.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" @@ -335,6 +336,42 @@ void VEDAGToDAGISel::Select(SDNode *N) { } switch (N->getOpcode()) { + + // Late eliminate the LEGALAVL wrapper + case VEISD::LEGALAVL: + ReplaceNode(N, N->getOperand(0).getNode()); + return; + + // Lower (broadcast 1) and (broadcast 0) to VM[P]0 + case VEISD::VEC_BROADCAST: { + MVT SplatResTy = N->getSimpleValueType(0); + if (SplatResTy.getVectorElementType() != MVT::i1) + break; + + // Constant non-zero broadcast. + auto BConst = dyn_cast(N->getOperand(0)); + if (!BConst) + break; + bool BCTrueMask = (BConst->getSExtValue() != 0); + if (!BCTrueMask) + break; + + // Packed or non-packed. + SDValue New; + if (SplatResTy.getVectorNumElements() == StandardVectorWidth) { + New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(N), VE::VM0, + MVT::v256i1); + } else if (SplatResTy.getVectorNumElements() == PackedVectorWidth) { + New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(N), VE::VMP0, + MVT::v512i1); + } else + break; + + // Replace. + ReplaceNode(N, New.getNode()); + return; + } + case VEISD::GLOBAL_BASE_REG: ReplaceNode(N, getGlobalBaseReg()); return; diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index 9137c476777e..2eea65033870 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -76,6 +76,8 @@ bool VETargetLowering::CanLowerReturn( static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64, MVT::v256f32, MVT::v512f32, MVT::v256f64}; +static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1}; + static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32}; void VETargetLowering::initRegisterClasses() { @@ -294,6 +296,12 @@ void VETargetLowering::initSPUActions() { } void VETargetLowering::initVPUActions() { + for (MVT LegalMaskVT : AllMaskVTs) + setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom); + + for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR}) + setOperationAction(Opc, MVT::v512i1, Custom); + for (MVT LegalVecVT : AllVectorVTs) { setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal); @@ -307,6 +315,8 @@ void VETargetLowering::initVPUActions() { setOperationAction(ISD::VP_OPC, LegalVecVT, Custom); #define ADD_VVP_OP(VVP_NAME, ISD_NAME) \ setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, LegalVecVT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE, LegalVecVT, Custom); #include "VVPNodes.def" } @@ -314,6 +324,32 @@ void VETargetLowering::initVPUActions() { setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom); } + + // vNt32, vNt64 ops (legal element types) + for (MVT VT : MVT::vector_valuetypes()) { + MVT ElemVT = VT.getVectorElementType(); + unsigned ElemBits = ElemVT.getScalarSizeInBits(); + if (ElemBits != 32 && ElemBits != 64) + continue; + + for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE}) + setOperationAction(MemOpc, VT, Custom); + + const ISD::NodeType IntReductionOCs[] = { + ISD::VECREDUCE_ADD, ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND, + ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMIN, + ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX}; + + for (unsigned IntRedOpc : IntReductionOCs) + setOperationAction(IntRedOpc, VT, Custom); + } + + // v256i1 and v512i1 ops + for (MVT MaskVT : AllMaskVTs) { + // Custom lower mask ops + setOperationAction(ISD::STORE, MaskVT, Custom); + setOperationAction(ISD::LOAD, MaskVT, Custom); + } } SDValue @@ -898,10 +934,15 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const { TARGET_NODE_CASE(MEMBARRIER) TARGET_NODE_CASE(RET_FLAG) TARGET_NODE_CASE(TS1AM) + TARGET_NODE_CASE(VEC_UNPACK_LO) + TARGET_NODE_CASE(VEC_UNPACK_HI) + TARGET_NODE_CASE(VEC_PACK) TARGET_NODE_CASE(VEC_BROADCAST) TARGET_NODE_CASE(REPL_I32) TARGET_NODE_CASE(REPL_F32) + TARGET_NODE_CASE(LEGALAVL) + // Register the VVP_* SDNodes. #define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME) #include "VVPNodes.def" @@ -1305,9 +1346,81 @@ static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues(Ops, DL); } +// Lower a vXi1 load into following instructions +// LDrii %1, (,%addr) +// LVMxir %vm, 0, %1 +// LDrii %2, 8(,%addr) +// LVMxir %vm, 0, %2 +// ... +static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + LoadSDNode *LdNode = dyn_cast(Op.getNode()); + assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type"); + + SDValue BasePtr = LdNode->getBasePtr(); + unsigned Alignment = LdNode->getAlign().value(); + if (Alignment > 8) + Alignment = 8; + + EVT AddrVT = BasePtr.getValueType(); + EVT MemVT = LdNode->getMemoryVT(); + if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) { + SDValue OutChains[4]; + SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT); + for (int i = 0; i < 4; ++i) { + // Generate load dag and prepare chains. + SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr, + DAG.getConstant(8 * i, DL, AddrVT)); + SDValue Val = + DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr, + LdNode->getPointerInfo(), Alignment, + LdNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + OutChains[i] = SDValue(Val.getNode(), 1); + + VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64, + DAG.getTargetConstant(i, DL, MVT::i64), Val, + SDValue(VM, 0)); + } + SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); + SDValue Ops[2] = {SDValue(VM, 0), OutChain}; + return DAG.getMergeValues(Ops, DL); + } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) { + SDValue OutChains[8]; + SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT); + for (int i = 0; i < 8; ++i) { + // Generate load dag and prepare chains. + SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr, + DAG.getConstant(8 * i, DL, AddrVT)); + SDValue Val = + DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr, + LdNode->getPointerInfo(), Alignment, + LdNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + OutChains[i] = SDValue(Val.getNode(), 1); + + VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64, + DAG.getTargetConstant(i, DL, MVT::i64), Val, + SDValue(VM, 0)); + } + SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); + SDValue Ops[2] = {SDValue(VM, 0), OutChain}; + return DAG.getMergeValues(Ops, DL); + } else { + // Otherwise, ask llvm to expand it. + return SDValue(); + } +} + SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { LoadSDNode *LdNode = cast(Op.getNode()); + EVT MemVT = LdNode->getMemoryVT(); + + // Dispatch to vector isel. + if (MemVT.isVector() && !isMaskType(MemVT)) + return lowerToVVP(Op, DAG); + SDValue BasePtr = LdNode->getBasePtr(); if (isa(BasePtr.getNode())) { // Do not expand store instruction with frame index here because of @@ -1315,9 +1428,10 @@ SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { return Op; } - EVT MemVT = LdNode->getMemoryVT(); if (MemVT == MVT::f128) return lowerLoadF128(Op, DAG); + if (isMaskType(MemVT)) + return lowerLoadI1(Op, DAG); return Op; } @@ -1358,10 +1472,68 @@ static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); } +// Lower a vXi1 store into following instructions +// SVMi %1, %vm, 0 +// STrii %1, (,%addr) +// SVMi %2, %vm, 1 +// STrii %2, 8(,%addr) +// ... +static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + StoreSDNode *StNode = dyn_cast(Op.getNode()); + assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type"); + + SDValue BasePtr = StNode->getBasePtr(); + unsigned Alignment = StNode->getAlign().value(); + if (Alignment > 8) + Alignment = 8; + EVT AddrVT = BasePtr.getValueType(); + EVT MemVT = StNode->getMemoryVT(); + if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) { + SDValue OutChains[4]; + for (int i = 0; i < 4; ++i) { + SDNode *V = + DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(), + DAG.getTargetConstant(i, DL, MVT::i64)); + SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr, + DAG.getConstant(8 * i, DL, AddrVT)); + OutChains[i] = + DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr, + MachinePointerInfo(), Alignment, + StNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); + } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) { + SDValue OutChains[8]; + for (int i = 0; i < 8; ++i) { + SDNode *V = + DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(), + DAG.getTargetConstant(i, DL, MVT::i64)); + SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr, + DAG.getConstant(8 * i, DL, AddrVT)); + OutChains[i] = + DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr, + MachinePointerInfo(), Alignment, + StNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); + } else { + // Otherwise, ask llvm to expand it. + return SDValue(); + } +} + SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { StoreSDNode *StNode = cast(Op.getNode()); assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type"); + // always expand non-mask vector loads to VVP + EVT MemVT = StNode->getMemoryVT(); + if (MemVT.isVector() && !isMaskType(MemVT)) + return lowerToVVP(Op, DAG); + SDValue BasePtr = StNode->getBasePtr(); if (isa(BasePtr.getNode())) { // Do not expand store instruction with frame index here because of @@ -1369,9 +1541,10 @@ SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { return Op; } - EVT MemVT = StNode->getMemoryVT(); if (MemVT == MVT::f128) return lowerStoreF128(Op, DAG); + if (isMaskType(MemVT)) + return lowerStoreI1(Op, DAG); // Otherwise, ask llvm to expand it. return SDValue(); @@ -1410,9 +1583,9 @@ SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDValue NextPtr; if (VT == MVT::f128) { - // VE f128 values must be stored with 16 bytes alignment. We doesn't + // VE f128 values must be stored with 16 bytes alignment. We don't // know the actual alignment of VAList, so we take alignment of it - // dyanmically. + // dynamically. int Align = 16; VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(Align - 1, DL, PtrVT)); @@ -1658,25 +1831,37 @@ SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op, // Else emit a broadcast. if (SDValue ScalarV = getSplatValue(Op.getNode())) { unsigned NumEls = ResultVT.getVectorNumElements(); - // TODO: Legalize packed-mode AVL. - // For now, cap the AVL at 256. - auto CappedLength = std::min(256, NumEls); - auto AVL = CDAG.getConstant(CappedLength, MVT::i32); - return CDAG.getBroadcast(ResultVT, Op.getOperand(0), AVL); + auto AVL = CDAG.getConstant(NumEls, MVT::i32); + return CDAG.getBroadcast(ResultVT, ScalarV, AVL); } // Expand return SDValue(); } +TargetLowering::LegalizeAction +VETargetLowering::getCustomOperationAction(SDNode &Op) const { + // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize + // these operations (transform nodes such that their AVL parameter refers to + // packs of 64bit, instead of number of elements. + + // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to + // re-visit them. + if (isPackingSupportOpcode(Op.getOpcode())) + return Legal; + + // Custom lower to legalize AVL for packed mode. + if (isVVPOrVEC(Op.getOpcode())) + return Custom; + return Legal; +} + SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "::LowerOperation"; Op->print(dbgs());); unsigned Opcode = Op.getOpcode(); - if (ISD::isVPOpcode(Opcode)) - return lowerToVVP(Op, DAG); + /// Scalar isel. switch (Opcode) { - default: - llvm_unreachable("Should not custom lower this!"); case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG); case ISD::ATOMIC_SWAP: @@ -1720,9 +1905,33 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return lowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); + } + /// Vector isel. + LLVM_DEBUG(dbgs() << "::LowerOperation_VVP"; Op->print(dbgs());); + if (ISD::isVPOpcode(Opcode)) + return lowerToVVP(Op, DAG); + + switch (Opcode) { + default: + llvm_unreachable("Should not custom lower this!"); + + // Legalize the AVL of this internal node. + case VEISD::VEC_BROADCAST: +#define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME: +#include "VVPNodes.def" + // AVL already legalized. + if (getAnnotatedNodeAVL(Op).second) + return Op; + return legalizeInternalVectorOp(Op, DAG); + + // Translate into a VEC_*/VVP_* layer operation. + case ISD::MLOAD: + case ISD::MSTORE: #define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME: #include "VVPNodes.def" + if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType())) + return splitMaskArithmetic(Op, DAG); return lowerToVVP(Op, DAG); } } @@ -2667,52 +2876,6 @@ bool VETargetLowering::hasAndNot(SDValue Y) const { return true; } -SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const { - // Can we represent this as a VVP node. - const unsigned Opcode = Op->getOpcode(); - auto VVPOpcodeOpt = getVVPOpcode(Opcode); - if (!VVPOpcodeOpt.hasValue()) - return SDValue(); - unsigned VVPOpcode = VVPOpcodeOpt.getValue(); - const bool FromVP = ISD::isVPOpcode(Opcode); - - // The representative and legalized vector type of this operation. - VECustomDAG CDAG(DAG, Op); - MVT MaskVT = MVT::v256i1; // TODO: packed mode. - EVT OpVecVT = Op.getValueType(); - EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT); - - SDValue AVL; - SDValue Mask; - - if (FromVP) { - // All upstream VP SDNodes always have a mask and avl. - auto MaskIdx = ISD::getVPMaskIdx(Opcode).getValue(); - auto AVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode).getValue(); - Mask = Op->getOperand(MaskIdx); - AVL = Op->getOperand(AVLIdx); - - } else { - // Materialize the VL parameter. - AVL = CDAG.getConstant(OpVecVT.getVectorNumElements(), MVT::i32); - SDValue ConstTrue = CDAG.getConstant(1, MVT::i32); - Mask = CDAG.getBroadcast(MaskVT, ConstTrue, AVL); - } - - if (isVVPBinaryOp(VVPOpcode)) { - assert(LegalVecVT.isSimple()); - return CDAG.getNode(VVPOpcode, LegalVecVT, - {Op->getOperand(0), Op->getOperand(1), Mask, AVL}); - } - if (VVPOpcode == VEISD::VVP_SELECT) { - auto Mask = Op->getOperand(0); - auto OnTrue = Op->getOperand(1); - auto OnFalse = Op->getOperand(2); - return CDAG.getNode(VVPOpcode, LegalVecVT, {OnTrue, OnFalse, Mask, AVL}); - } - llvm_unreachable("lowerToVVP called for unexpected SDNode."); -} - SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!"); diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h index 09bd19e83717..087b0e215407 100644 --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -38,17 +38,30 @@ enum NodeType : unsigned { MEMBARRIER, // Compiler barrier only; generate a no-op. RET_FLAG, // Return with a flag operand. TS1AM, // A TS1AM instruction used for 1/2 bytes swap. - VEC_BROADCAST, // A vector broadcast instruction. - // 0: scalar value, 1: VL + VEC_UNPACK_LO, // unpack the lo v256 slice of a packed v512 vector. + VEC_UNPACK_HI, // unpack the hi v256 slice of a packed v512 vector. + // 0: v512 vector, 1: AVL + VEC_PACK, // pack a lo and a hi vector into one v512 vector + // 0: v256 lo vector, 1: v256 hi vector, 2: AVL + + VEC_BROADCAST, // A vector broadcast instruction. + // 0: scalar value, 1: VL REPL_I32, REPL_F32, // Replicate subregister to other half. + // Annotation as a wrapper. LEGALAVL(VL) means that VL refers to 64bit of + // data, whereas the raw EVL coming in from VP nodes always refers to number + // of elements, regardless of their size. + LEGALAVL, + // VVP_* nodes. #define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME, #include "VVPNodes.def" }; } +class VECustomDAG; + class VETargetLowering : public TargetLowering { const VESubtarget *Subtarget; @@ -105,6 +118,9 @@ public: } /// Custom Lower { + TargetLoweringBase::LegalizeAction + getCustomOperationAction(SDNode &) const override; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; unsigned getJumpTableEncoding() const override; const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, @@ -170,6 +186,15 @@ public: /// VVP Lowering { SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVVP_LOAD_STORE(SDValue Op, VECustomDAG &) const; + SDValue lowerVVP_GATHER_SCATTER(SDValue Op, VECustomDAG &) const; + + SDValue legalizeInternalVectorOp(SDValue Op, SelectionDAG &DAG) const; + SDValue legalizeInternalLoadStoreOp(SDValue Op, VECustomDAG &CDAG) const; + SDValue splitVectorOp(SDValue Op, VECustomDAG &CDAG) const; + SDValue splitPackedLoadStore(SDValue Op, VECustomDAG &CDAG) const; + SDValue legalizePackedAVL(SDValue Op, VECustomDAG &CDAG) const; + SDValue splitMaskArithmetic(SDValue Op, SelectionDAG &DAG) const; /// } VVPLowering /// Custom DAGCombine { diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp index 7c1bd5201867..94ebb59c4c77 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -811,7 +811,7 @@ static void expandPseudoVFMK(const TargetInstrInfo &TI, MachineInstr &MI) { // replace to pvfmk.w.up and pvfmk.w.lo // replace to pvfmk.s.up and pvfmk.s.lo - static std::map> VFMKMap = { + static const std::pair> VFMKMap[] = { {VE::VFMKyal, {VE::VFMKLal, VE::VFMKLal}}, {VE::VFMKynal, {VE::VFMKLnal, VE::VFMKLnal}}, {VE::VFMKWyvl, {VE::PVFMKWUPvl, VE::PVFMKWLOvl}}, @@ -822,8 +822,9 @@ static void expandPseudoVFMK(const TargetInstrInfo &TI, MachineInstr &MI) { unsigned Opcode = MI.getOpcode(); - auto Found = VFMKMap.find(Opcode); - if (Found == VFMKMap.end()) + const auto *Found = + llvm::find_if(VFMKMap, [&](auto P) { return P.first == Opcode; }); + if (Found == std::end(VFMKMap)) report_fatal_error("unexpected opcode for pseudo vfmk"); unsigned OpcodeUpper = (*Found).second.first; diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td index 717427c3f48d..85285749b4fa 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -875,14 +875,14 @@ multiclass BCRm opc, // e.g. LCR let hasSideEffects = 1 in multiclass LOADCRmopc, RegisterClass RC> { - def rr : RR; - let cy = 0 in def ri : RR; - let cz = 0 in def zr : RR; let cy = 0, cz = 0 in - def zi : RR; } @@ -890,17 +890,31 @@ multiclass LOADCRmopc, RegisterClass RC> { // e.g. SCR let hasSideEffects = 1 in multiclass STORECRmopc, RegisterClass RC> { - def rr : RR; - let cy = 0 in def ri : RR; - let cz = 0 in def zr : RR; + let cy = 0 in def irr : RR; + let cz = 0 in def rzr : RR; let cy = 0, cz = 0 in - def zi : RR; + def izr : RR; +} + +let hasSideEffects = 1, Constraints = "$sx = $sx_in", DisableEncoding = "$sx_in" in +multiclass TSCRmopc, RegisterClass RC> { + def rrr : RR; + let cy = 0 in def irr : RR; + let cz = 0 in def rzr : RR; + let cy = 0, cz = 0 in + def izr : RR; } + // Multiclass for communication register instructions. // e.g. FIDCR let cz = 0, hasSideEffects = 1 in @@ -1528,7 +1542,7 @@ defm LCR : LOADCRm<"lcr", 0x40, I64>; defm SCR : STORECRm<"scr", 0x50, I64>; // Section 8.19.11 - TSCR (Test & Set Communication Register) -defm TSCR : LOADCRm<"tscr", 0x41, I64>; +defm TSCR : TSCRm<"tscr", 0x41, I64>; // Section 8.19.12 - FIDCR (Fetch & Increment/Decrement CR) defm FIDCR : FIDCRm<"fidcr", 0x51, I64>; @@ -2293,6 +2307,18 @@ class IsVLVT : SDTCisVT; def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2, [SDTCisVec<0>, IsVLVT<2>]>>; +///// Packed mode Support ///// +// unpack the lo part of this vector +def vec_unpack_lo : SDNode<"VEISD::VEC_UNPACK_LO", SDTypeProfile<1, 2, + [SDTCisVec<0>, SDTCisVec<1>, IsVLVT<2>]>>; +// unpack the hipart of this vector +def vec_unpack_hi : SDNode<"VEISD::VEC_UNPACK_HI", SDTypeProfile<1, 2, + [SDTCisVec<0>, SDTCisVec<1>, IsVLVT<2>]>>; +// re-pack v256i32, v256f32 back into tone v512.32 +def vec_pack : SDNode<"VEISD::VEC_PACK", SDTypeProfile<1, 3, + [SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, + SDTCisSameNumEltsAs<1,2>, IsVLVT<3>]>>; + // replicate lower 32bit to upper 32bit (f32 scalar replication). def repl_f32 : SDNode<"VEISD::REPL_F32", SDTypeProfile<1, 1, diff --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td index 9ec10838db05..2ef621ae7477 100644 --- a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td +++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td @@ -601,6 +601,42 @@ def : Pat<(int_ve_vl_pveqv_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVEQVrvl i64:$s def : Pat<(int_ve_vl_pveqv_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVEQVrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; def : Pat<(int_ve_vl_pveqv_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVEQVvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; def : Pat<(int_ve_vl_pveqv_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVEQVrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldz_vvl v256f64:$vz, i32:$vl), (VLDZvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vldz_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (VLDZvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldz_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VLDZvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvldzlo_vvl v256f64:$vz, i32:$vl), (PVLDZLOvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvldzlo_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVLDZLOvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvldzlo_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVLDZLOvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvldzup_vvl v256f64:$vz, i32:$vl), (PVLDZUPvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvldzup_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVLDZUPvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvldzup_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVLDZUPvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvldz_vvl v256f64:$vz, i32:$vl), (PVLDZvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvldz_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVLDZvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvldz_vvMvl v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVLDZvml_v v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vpcnt_vvl v256f64:$vz, i32:$vl), (VPCNTvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vpcnt_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (VPCNTvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vpcnt_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VPCNTvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvpcntlo_vvl v256f64:$vz, i32:$vl), (PVPCNTLOvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvpcntlo_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVPCNTLOvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvpcntlo_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVPCNTLOvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvpcntup_vvl v256f64:$vz, i32:$vl), (PVPCNTUPvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvpcntup_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVPCNTUPvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvpcntup_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVPCNTUPvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvpcnt_vvl v256f64:$vz, i32:$vl), (PVPCNTvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvpcnt_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVPCNTvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvpcnt_vvMvl v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVPCNTvml_v v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrv_vvl v256f64:$vz, i32:$vl), (VBRVvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vbrv_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (VBRVvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrv_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRVvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvbrvlo_vvl v256f64:$vz, i32:$vl), (PVBRVLOvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvbrvlo_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVBRVLOvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvbrvlo_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVBRVLOvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvbrvup_vvl v256f64:$vz, i32:$vl), (PVBRVUPvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvbrvup_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVBRVUPvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvbrvup_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVBRVUPvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvbrv_vvl v256f64:$vz, i32:$vl), (PVBRVvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvbrv_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVBRVvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvbrv_vvMvl v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVBRVvml_v v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; def : Pat<(int_ve_vl_vseq_vl i32:$vl), (VSEQl i32:$vl)>; def : Pat<(int_ve_vl_vseq_vvl v256f64:$pt, i32:$vl), (VSEQl_v i32:$vl, v256f64:$pt)>; def : Pat<(int_ve_vl_pvseqlo_vl i32:$vl), (PVSEQLOl i32:$vl)>; @@ -1602,3 +1638,21 @@ def : Pat<(int_ve_vl_negm_MM v512i1:$vmy), (NEGMy v512i1:$vmy)>; def : Pat<(int_ve_vl_pcvm_sml v256i1:$vmy, i32:$vl), (PCVMml v256i1:$vmy, i32:$vl)>; def : Pat<(int_ve_vl_lzvm_sml v256i1:$vmy, i32:$vl), (LZVMml v256i1:$vmy, i32:$vl)>; def : Pat<(int_ve_vl_tovm_sml v256i1:$vmy, i32:$vl), (TOVMml v256i1:$vmy, i32:$vl)>; +def : Pat<(int_ve_vl_lcr_sss i64:$sy, i64:$sz), (LCRrr i64:$sy, i64:$sz)>; +def : Pat<(int_ve_vl_lcr_sss i64:$sy, zero:$Z), (LCRrz i64:$sy, (LO7 $Z))>; +def : Pat<(int_ve_vl_lcr_sss uimm7:$N, i64:$sz), (LCRir (ULO7 $N), i64:$sz)>; +def : Pat<(int_ve_vl_lcr_sss uimm7:$N, zero:$Z), (LCRiz (ULO7 $N), (LO7 $Z))>; +def : Pat<(int_ve_vl_scr_sss i64:$sx, i64:$sy, i64:$sz), (SCRrrr i64:$sy, i64:$sz, i64:$sx)>; +def : Pat<(int_ve_vl_scr_sss i64:$sx, i64:$sy, zero:$Z), (SCRrzr i64:$sy, (LO7 $Z), i64:$sx)>; +def : Pat<(int_ve_vl_scr_sss i64:$sx, uimm7:$N, i64:$sz), (SCRirr (ULO7 $N), i64:$sz, i64:$sx)>; +def : Pat<(int_ve_vl_scr_sss i64:$sx, uimm7:$N, zero:$Z), (SCRizr (ULO7 $N), (LO7 $Z), i64:$sx)>; +def : Pat<(int_ve_vl_tscr_ssss i64:$sx, i64:$sy, i64:$sz), (TSCRrrr i64:$sy, i64:$sz, i64:$sx)>; +def : Pat<(int_ve_vl_tscr_ssss i64:$sx, i64:$sy, zero:$Z), (TSCRrzr i64:$sy, (LO7 $Z), i64:$sx)>; +def : Pat<(int_ve_vl_tscr_ssss i64:$sx, uimm7:$N, i64:$sz), (TSCRirr (ULO7 $N), i64:$sz, i64:$sx)>; +def : Pat<(int_ve_vl_tscr_ssss i64:$sx, uimm7:$N, zero:$Z), (TSCRizr (ULO7 $N), (LO7 $Z), i64:$sx)>; +def : Pat<(int_ve_vl_fidcr_sss i64:$sy, uimm3:$I), (FIDCRri i64:$sy, (LO7 $I))>; +def : Pat<(int_ve_vl_fidcr_sss uimm7:$N, uimm3:$I), (FIDCRii (ULO7 $N), (LO7 $I))>; +def : Pat<(int_ve_vl_fencei ), (FENCEI )>; +def : Pat<(int_ve_vl_fencem_s uimm2:$I), (FENCEM (LO7 $I))>; +def : Pat<(int_ve_vl_fencec_s uimm3:$I), (FENCEC (LO7 $I))>; +def : Pat<(int_ve_vl_svob ), (SVOB )>; diff --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.td index 69ea133ceed0..fca0572cf9b1 100644 --- a/llvm/lib/Target/VE/VEInstrIntrinsicVL.td +++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.td @@ -2,9 +2,6 @@ /// Intrinsic patterns written by hand. -// SVOB pattern. -def : Pat<(int_ve_vl_svob), (SVOB)>; - // Pack patterns. def : Pat<(i64 (int_ve_vl_pack_f32p ADDRrii:$addr0, ADDRrii:$addr1)), (ORrr (f2l (LDUrii MEMrii:$addr0)), diff --git a/llvm/lib/Target/VE/VEInstrPatternsVec.td b/llvm/lib/Target/VE/VEInstrPatternsVec.td index 6c5b80315efb..71199717a3a2 100644 --- a/llvm/lib/Target/VE/VEInstrPatternsVec.td +++ b/llvm/lib/Target/VE/VEInstrPatternsVec.td @@ -105,3 +105,46 @@ defm : vbrd_elem64; defm : vbrd_elem64; defm : vbrd_elem64; defm : vbrd_elem64; + +class Mask_Binary : + Pat<(MaskVT (MaskOp MaskVT:$ma, MaskVT:$mb)), (!cast(InstName#"mm") $ma, $mb)>; + +def: Mask_Binary; +def: Mask_Binary; +def: Mask_Binary; + +///// Packing support ///// + +// v256i1 <> v512i1 +def : Pat<(v256i1 (vec_unpack_lo v512i1:$vm, (i32 srcvalue))), + (EXTRACT_SUBREG $vm, sub_vm_odd)>; +def : Pat<(v256i1 (vec_unpack_hi v512i1:$vm, (i32 srcvalue))), + (EXTRACT_SUBREG $vm, sub_vm_even)>; +def : Pat<(v512i1 (vec_pack v256i1:$vlo, v256i1:$vhi, (i32 srcvalue))), + (INSERT_SUBREG (INSERT_SUBREG + (v512i1 (IMPLICIT_DEF)), + $vlo, sub_vm_odd), + $vhi, sub_vm_even)>; + +// v256.32 <> v512.32 +multiclass Packing { + // no-op unpacks + def : Pat<(v256i32 (vec_unpack_lo PackVT:$vp, (i32 srcvalue))), + (COPY_TO_REGCLASS $vp, V64)>; + def : Pat<(v256f32 (vec_unpack_hi PackVT:$vp, (i32 srcvalue))), + (COPY_TO_REGCLASS $vp, V64)>; + + // shuffle unpacks + def : Pat<(v256f32 (vec_unpack_lo PackVT:$vp, i32:$avl)), + (VSHFvvil $vp, $vp, 4, $avl)>; // always pick lo + def : Pat<(v256i32 (vec_unpack_hi PackVT:$vp, i32:$avl)), + (VSHFvvil $vp, $vp, 0, $avl)>; // always pick hi +} + +defm : Packing; +defm : Packing; + +def : Pat<(v512i32 (vec_pack v256i32:$vlo, v256i32:$vhi, i32:$avl)), + (VSHFvvil $vlo, $vhi, 13, $avl)>; +def : Pat<(v512f32 (vec_pack v256f32:$vlo, v256f32:$vhi, i32:$avl)), + (VSHFvvil $vlo, $vhi, 8, $avl)>; diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp index 1addfc7174eb..2ada2581291d 100644 --- a/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp +++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp @@ -11,3 +11,10 @@ using namespace llvm; void VEMachineFunctionInfo::anchor() {} + +MachineFunctionInfo *VEMachineFunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const { + return DestMF.cloneInfo(*this); +} diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.h b/llvm/lib/Target/VE/VEMachineFunctionInfo.h index 3160f6a552d7..d9d30ad5b8c5 100644 --- a/llvm/lib/Target/VE/VEMachineFunctionInfo.h +++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.h @@ -33,6 +33,11 @@ public: explicit VEMachineFunctionInfo(MachineFunction &MF) : VarArgsFrameOffset(0), IsLeafProc(false) {} + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; + Register getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; } diff --git a/llvm/lib/Target/VE/VERegisterInfo.td b/llvm/lib/Target/VE/VERegisterInfo.td index 70ff104b65b7..cca0ad26b3e9 100644 --- a/llvm/lib/Target/VE/VERegisterInfo.td +++ b/llvm/lib/Target/VE/VERegisterInfo.td @@ -152,8 +152,10 @@ foreach I = 0-15 in def VM#I : VEMaskReg, DwarfRegNum<[!add(128,I)]>; // Aliases of VMs to use as a pair of two VM for packed instructions +def VMP0 : VEMaskReg<0, "vm0", [], ["vm0"]>; + let SubRegIndices = [sub_vm_even, sub_vm_odd], CoveredBySubRegs = 1 in -foreach I = 0-7 in +foreach I = 1-7 in def VMP#I : VEMaskReg("VM"#!shl(I,1)), !cast("VM"#!add(!shl(I,1),1))], diff --git a/llvm/lib/Target/VE/VETargetMachine.cpp b/llvm/lib/Target/VE/VETargetMachine.cpp index 9f294f15da91..d7c1457fb0a8 100644 --- a/llvm/lib/Target/VE/VETargetMachine.cpp +++ b/llvm/lib/Target/VE/VETargetMachine.cpp @@ -61,7 +61,7 @@ static std::string computeDataLayout(const Triple &T) { } static Reloc::Model getEffectiveRelocModel(Optional RM) { - return RM.getValueOr(Reloc::Static); + return RM.value_or(Reloc::Static); } class VEELFTargetObjectFile : public TargetLoweringObjectFileELF { @@ -90,9 +90,10 @@ VETargetMachine::VETargetMachine(const Target &T, const Triple &TT, initAsmInfo(); } -VETargetMachine::~VETargetMachine() {} +VETargetMachine::~VETargetMachine() = default; -TargetTransformInfo VETargetMachine::getTargetTransformInfo(const Function &F) { +TargetTransformInfo +VETargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(VETTIImpl(this, F)); } diff --git a/llvm/lib/Target/VE/VETargetMachine.h b/llvm/lib/Target/VE/VETargetMachine.h index 041d3b197ec3..9cf194444aa5 100644 --- a/llvm/lib/Target/VE/VETargetMachine.h +++ b/llvm/lib/Target/VE/VETargetMachine.h @@ -49,7 +49,7 @@ public: bool isMachineVerifierClean() const override { return false; } - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; unsigned getSjLjDataSize() const override { return 64; } }; diff --git a/llvm/lib/Target/VE/VETargetTransformInfo.h b/llvm/lib/Target/VE/VETargetTransformInfo.h index 0242fa1b0117..c68844708878 100644 --- a/llvm/lib/Target/VE/VETargetTransformInfo.h +++ b/llvm/lib/Target/VE/VETargetTransformInfo.h @@ -21,6 +21,32 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" +static llvm::Type *getVectorElementType(llvm::Type *Ty) { + return llvm::cast(Ty)->getElementType(); +} + +static llvm::Type *getLaneType(llvm::Type *Ty) { + using namespace llvm; + if (!isa(Ty)) + return Ty; + return getVectorElementType(Ty); +} + +static bool isVectorLaneType(llvm::Type &ElemTy) { + // check element sizes for vregs + if (ElemTy.isIntegerTy()) { + unsigned ScaBits = ElemTy.getScalarSizeInBits(); + return ScaBits == 1 || ScaBits == 32 || ScaBits == 64; + } + if (ElemTy.isPointerTy()) { + return true; + } + if (ElemTy.isFloatTy() || ElemTy.isDoubleTy()) { + return true; + } + return false; +} + namespace llvm { class VETTIImpl : public BasicTTIImplBase { @@ -35,6 +61,25 @@ class VETTIImpl : public BasicTTIImplBase { bool enableVPU() const { return getST()->enableVPU(); } + static bool isSupportedReduction(Intrinsic::ID ReductionID) { +#define VEC_VP_CASE(SUFFIX) \ + case Intrinsic::vp_reduce_##SUFFIX: \ + case Intrinsic::vector_reduce_##SUFFIX: + + switch (ReductionID) { + VEC_VP_CASE(add) + VEC_VP_CASE(and) + VEC_VP_CASE(or) + VEC_VP_CASE(xor) + VEC_VP_CASE(smax) + return true; + + default: + return false; + } +#undef VEC_VP_CASE + } + public: explicit VETTIImpl(const VETargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), @@ -86,6 +131,27 @@ public: // output return false; } + + // Load & Store { + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { + return isVectorLaneType(*getLaneType(DataType)); + } + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { + return isVectorLaneType(*getLaneType(DataType)); + } + bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) { + return isVectorLaneType(*getLaneType(DataType)); + }; + bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) { + return isVectorLaneType(*getLaneType(DataType)); + } + // } Load & Store + + bool shouldExpandReduction(const IntrinsicInst *II) const { + if (!enableVPU()) + return true; + return !isSupportedReduction(II->getIntrinsicID()); + } }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VVPISelLowering.cpp b/llvm/lib/Target/VE/VVPISelLowering.cpp new file mode 100644 index 000000000000..330eef4c7c2b --- /dev/null +++ b/llvm/lib/Target/VE/VVPISelLowering.cpp @@ -0,0 +1,443 @@ +//===-- VVPISelLowering.cpp - VE DAG Lowering Implementation --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the lowering and legalization of vector instructions to +// VVP_*layer SDNodes. +// +//===----------------------------------------------------------------------===// + +#include "VECustomDAG.h" +#include "VEISelLowering.h" + +using namespace llvm; + +#define DEBUG_TYPE "ve-lower" + +SDValue VETargetLowering::splitMaskArithmetic(SDValue Op, + SelectionDAG &DAG) const { + VECustomDAG CDAG(DAG, Op); + SDValue AVL = + CDAG.getConstant(Op.getValueType().getVectorNumElements(), MVT::i32); + SDValue A = Op->getOperand(0); + SDValue B = Op->getOperand(1); + SDValue LoA = CDAG.getUnpack(MVT::v256i1, A, PackElem::Lo, AVL); + SDValue HiA = CDAG.getUnpack(MVT::v256i1, A, PackElem::Hi, AVL); + SDValue LoB = CDAG.getUnpack(MVT::v256i1, B, PackElem::Lo, AVL); + SDValue HiB = CDAG.getUnpack(MVT::v256i1, B, PackElem::Hi, AVL); + unsigned Opc = Op.getOpcode(); + auto LoRes = CDAG.getNode(Opc, MVT::v256i1, {LoA, LoB}); + auto HiRes = CDAG.getNode(Opc, MVT::v256i1, {HiA, HiB}); + return CDAG.getPack(MVT::v512i1, LoRes, HiRes, AVL); +} + +SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const { + // Can we represent this as a VVP node. + const unsigned Opcode = Op->getOpcode(); + auto VVPOpcodeOpt = getVVPOpcode(Opcode); + if (!VVPOpcodeOpt) + return SDValue(); + unsigned VVPOpcode = VVPOpcodeOpt.getValue(); + const bool FromVP = ISD::isVPOpcode(Opcode); + + // The representative and legalized vector type of this operation. + VECustomDAG CDAG(DAG, Op); + // Dispatch to complex lowering functions. + switch (VVPOpcode) { + case VEISD::VVP_LOAD: + case VEISD::VVP_STORE: + return lowerVVP_LOAD_STORE(Op, CDAG); + case VEISD::VVP_GATHER: + case VEISD::VVP_SCATTER: + return lowerVVP_GATHER_SCATTER(Op, CDAG); + } + + EVT OpVecVT = *getIdiomaticVectorType(Op.getNode()); + EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT); + auto Packing = getTypePacking(LegalVecVT.getSimpleVT()); + + SDValue AVL; + SDValue Mask; + + if (FromVP) { + // All upstream VP SDNodes always have a mask and avl. + auto MaskIdx = ISD::getVPMaskIdx(Opcode); + auto AVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode); + if (MaskIdx) + Mask = Op->getOperand(*MaskIdx); + if (AVLIdx) + AVL = Op->getOperand(*AVLIdx); + } + + // Materialize default mask and avl. + if (!AVL) + AVL = CDAG.getConstant(OpVecVT.getVectorNumElements(), MVT::i32); + if (!Mask) + Mask = CDAG.getConstantMask(Packing, true); + + assert(LegalVecVT.isSimple()); + if (isVVPUnaryOp(VVPOpcode)) + return CDAG.getNode(VVPOpcode, LegalVecVT, {Op->getOperand(0), Mask, AVL}); + if (isVVPBinaryOp(VVPOpcode)) + return CDAG.getNode(VVPOpcode, LegalVecVT, + {Op->getOperand(0), Op->getOperand(1), Mask, AVL}); + if (isVVPReductionOp(VVPOpcode)) { + auto SrcHasStart = hasReductionStartParam(Op->getOpcode()); + SDValue StartV = SrcHasStart ? Op->getOperand(0) : SDValue(); + SDValue VectorV = Op->getOperand(SrcHasStart ? 1 : 0); + return CDAG.getLegalReductionOpVVP(VVPOpcode, Op.getValueType(), StartV, + VectorV, Mask, AVL, Op->getFlags()); + } + + switch (VVPOpcode) { + default: + llvm_unreachable("lowerToVVP called for unexpected SDNode."); + case VEISD::VVP_FFMA: { + // VE has a swizzled operand order in FMA (compared to LLVM IR and + // SDNodes). + auto X = Op->getOperand(2); + auto Y = Op->getOperand(0); + auto Z = Op->getOperand(1); + return CDAG.getNode(VVPOpcode, LegalVecVT, {X, Y, Z, Mask, AVL}); + } + case VEISD::VVP_SELECT: { + auto Mask = Op->getOperand(0); + auto OnTrue = Op->getOperand(1); + auto OnFalse = Op->getOperand(2); + return CDAG.getNode(VVPOpcode, LegalVecVT, {OnTrue, OnFalse, Mask, AVL}); + } + case VEISD::VVP_SETCC: { + EVT LegalResVT = getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); + auto LHS = Op->getOperand(0); + auto RHS = Op->getOperand(1); + auto Pred = Op->getOperand(2); + return CDAG.getNode(VVPOpcode, LegalResVT, {LHS, RHS, Pred, Mask, AVL}); + } + } +} + +SDValue VETargetLowering::lowerVVP_LOAD_STORE(SDValue Op, + VECustomDAG &CDAG) const { + auto VVPOpc = *getVVPOpcode(Op->getOpcode()); + const bool IsLoad = (VVPOpc == VEISD::VVP_LOAD); + + // Shares. + SDValue BasePtr = getMemoryPtr(Op); + SDValue Mask = getNodeMask(Op); + SDValue Chain = getNodeChain(Op); + SDValue AVL = getNodeAVL(Op); + // Store specific. + SDValue Data = getStoredValue(Op); + // Load specific. + SDValue PassThru = getNodePassthru(Op); + + SDValue StrideV = getLoadStoreStride(Op, CDAG); + + auto DataVT = *getIdiomaticVectorType(Op.getNode()); + auto Packing = getTypePacking(DataVT); + + // TODO: Infer lower AVL from mask. + if (!AVL) + AVL = CDAG.getConstant(DataVT.getVectorNumElements(), MVT::i32); + + // Default to the all-true mask. + if (!Mask) + Mask = CDAG.getConstantMask(Packing, true); + + if (IsLoad) { + MVT LegalDataVT = getLegalVectorType( + Packing, DataVT.getVectorElementType().getSimpleVT()); + + auto NewLoadV = CDAG.getNode(VEISD::VVP_LOAD, {LegalDataVT, MVT::Other}, + {Chain, BasePtr, StrideV, Mask, AVL}); + + if (!PassThru || PassThru->isUndef()) + return NewLoadV; + + // Convert passthru to an explicit select node. + SDValue DataV = CDAG.getNode(VEISD::VVP_SELECT, DataVT, + {NewLoadV, PassThru, Mask, AVL}); + SDValue NewLoadChainV = SDValue(NewLoadV.getNode(), 1); + + // Merge them back into one node. + return CDAG.getMergeValues({DataV, NewLoadChainV}); + } + + // VVP_STORE + assert(VVPOpc == VEISD::VVP_STORE); + return CDAG.getNode(VEISD::VVP_STORE, Op.getNode()->getVTList(), + {Chain, Data, BasePtr, StrideV, Mask, AVL}); +} + +SDValue VETargetLowering::splitPackedLoadStore(SDValue Op, + VECustomDAG &CDAG) const { + auto VVPOC = *getVVPOpcode(Op.getOpcode()); + assert((VVPOC == VEISD::VVP_LOAD) || (VVPOC == VEISD::VVP_STORE)); + + MVT DataVT = getIdiomaticVectorType(Op.getNode())->getSimpleVT(); + assert(getTypePacking(DataVT) == Packing::Dense && + "Can only split packed load/store"); + MVT SplitDataVT = splitVectorType(DataVT); + + assert(!getNodePassthru(Op) && + "Should have been folded in lowering to VVP layer"); + + // Analyze the operation + SDValue PackedMask = getNodeMask(Op); + SDValue PackedAVL = getAnnotatedNodeAVL(Op).first; + SDValue PackPtr = getMemoryPtr(Op); + SDValue PackData = getStoredValue(Op); + SDValue PackStride = getLoadStoreStride(Op, CDAG); + + unsigned ChainResIdx = PackData ? 0 : 1; + + SDValue PartOps[2]; + + SDValue UpperPartAVL; // we will use this for packing things back together + for (PackElem Part : {PackElem::Hi, PackElem::Lo}) { + // VP ops already have an explicit mask and AVL. When expanding from non-VP + // attach those additional inputs here. + auto SplitTM = CDAG.getTargetSplitMask(PackedMask, PackedAVL, Part); + + // Keep track of the (higher) lvl. + if (Part == PackElem::Hi) + UpperPartAVL = SplitTM.AVL; + + // Attach non-predicating value operands + SmallVector OpVec; + + // Chain + OpVec.push_back(getNodeChain(Op)); + + // Data + if (PackData) { + SDValue PartData = + CDAG.getUnpack(SplitDataVT, PackData, Part, SplitTM.AVL); + OpVec.push_back(PartData); + } + + // Ptr & Stride + // Push (ptr + ElemBytes * , 2 * ElemBytes) + // Stride info + // EVT DataVT = LegalizeVectorType(getMemoryDataVT(Op), Op, DAG, Mode); + OpVec.push_back(CDAG.getSplitPtrOffset(PackPtr, PackStride, Part)); + OpVec.push_back(CDAG.getSplitPtrStride(PackStride)); + + // Add predicating args and generate part node + OpVec.push_back(SplitTM.Mask); + OpVec.push_back(SplitTM.AVL); + + if (PackData) { + // Store + PartOps[(int)Part] = CDAG.getNode(VVPOC, MVT::Other, OpVec); + } else { + // Load + PartOps[(int)Part] = + CDAG.getNode(VVPOC, {SplitDataVT, MVT::Other}, OpVec); + } + } + + // Merge the chains + SDValue LowChain = SDValue(PartOps[(int)PackElem::Lo].getNode(), ChainResIdx); + SDValue HiChain = SDValue(PartOps[(int)PackElem::Hi].getNode(), ChainResIdx); + SDValue FusedChains = + CDAG.getNode(ISD::TokenFactor, MVT::Other, {LowChain, HiChain}); + + // Chain only [store] + if (PackData) + return FusedChains; + + // Re-pack into full packed vector result + MVT PackedVT = + getLegalVectorType(Packing::Dense, DataVT.getVectorElementType()); + SDValue PackedVals = CDAG.getPack(PackedVT, PartOps[(int)PackElem::Lo], + PartOps[(int)PackElem::Hi], UpperPartAVL); + + return CDAG.getMergeValues({PackedVals, FusedChains}); +} + +SDValue VETargetLowering::lowerVVP_GATHER_SCATTER(SDValue Op, + VECustomDAG &CDAG) const { + EVT DataVT = *getIdiomaticVectorType(Op.getNode()); + auto Packing = getTypePacking(DataVT); + MVT LegalDataVT = + getLegalVectorType(Packing, DataVT.getVectorElementType().getSimpleVT()); + + SDValue AVL = getAnnotatedNodeAVL(Op).first; + SDValue Index = getGatherScatterIndex(Op); + SDValue BasePtr = getMemoryPtr(Op); + SDValue Mask = getNodeMask(Op); + SDValue Chain = getNodeChain(Op); + SDValue Scale = getGatherScatterScale(Op); + SDValue PassThru = getNodePassthru(Op); + SDValue StoredValue = getStoredValue(Op); + if (PassThru && PassThru->isUndef()) + PassThru = SDValue(); + + bool IsScatter = (bool)StoredValue; + + // TODO: Infer lower AVL from mask. + if (!AVL) + AVL = CDAG.getConstant(DataVT.getVectorNumElements(), MVT::i32); + + // Default to the all-true mask. + if (!Mask) + Mask = CDAG.getConstantMask(Packing, true); + + SDValue AddressVec = + CDAG.getGatherScatterAddress(BasePtr, Scale, Index, Mask, AVL); + if (IsScatter) + return CDAG.getNode(VEISD::VVP_SCATTER, MVT::Other, + {Chain, StoredValue, AddressVec, Mask, AVL}); + + // Gather. + SDValue NewLoadV = CDAG.getNode(VEISD::VVP_GATHER, {LegalDataVT, MVT::Other}, + {Chain, AddressVec, Mask, AVL}); + + if (!PassThru) + return NewLoadV; + + // TODO: Use vvp_select + SDValue DataV = CDAG.getNode(VEISD::VVP_SELECT, LegalDataVT, + {NewLoadV, PassThru, Mask, AVL}); + SDValue NewLoadChainV = SDValue(NewLoadV.getNode(), 1); + return CDAG.getMergeValues({DataV, NewLoadChainV}); +} + +SDValue VETargetLowering::legalizeInternalLoadStoreOp(SDValue Op, + VECustomDAG &CDAG) const { + LLVM_DEBUG(dbgs() << "::legalizeInternalLoadStoreOp\n";); + MVT DataVT = getIdiomaticVectorType(Op.getNode())->getSimpleVT(); + + // TODO: Recognize packable load,store. + if (isPackedVectorType(DataVT)) + return splitPackedLoadStore(Op, CDAG); + + return legalizePackedAVL(Op, CDAG); +} + +SDValue VETargetLowering::legalizeInternalVectorOp(SDValue Op, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "::legalizeInternalVectorOp\n";); + VECustomDAG CDAG(DAG, Op); + + // Dispatch to specialized legalization functions. + switch (Op->getOpcode()) { + case VEISD::VVP_LOAD: + case VEISD::VVP_STORE: + return legalizeInternalLoadStoreOp(Op, CDAG); + } + + EVT IdiomVT = Op.getValueType(); + if (isPackedVectorType(IdiomVT) && + !supportsPackedMode(Op.getOpcode(), IdiomVT)) + return splitVectorOp(Op, CDAG); + + // TODO: Implement odd/even splitting. + return legalizePackedAVL(Op, CDAG); +} + +SDValue VETargetLowering::splitVectorOp(SDValue Op, VECustomDAG &CDAG) const { + MVT ResVT = splitVectorType(Op.getValue(0).getSimpleValueType()); + + auto AVLPos = getAVLPos(Op->getOpcode()); + auto MaskPos = getMaskPos(Op->getOpcode()); + + SDValue PackedMask = getNodeMask(Op); + auto AVLPair = getAnnotatedNodeAVL(Op); + SDValue PackedAVL = AVLPair.first; + assert(!AVLPair.second && "Expecting non pack-legalized oepration"); + + // request the parts + SDValue PartOps[2]; + + SDValue UpperPartAVL; // we will use this for packing things back together + for (PackElem Part : {PackElem::Hi, PackElem::Lo}) { + // VP ops already have an explicit mask and AVL. When expanding from non-VP + // attach those additional inputs here. + auto SplitTM = CDAG.getTargetSplitMask(PackedMask, PackedAVL, Part); + + if (Part == PackElem::Hi) + UpperPartAVL = SplitTM.AVL; + + // Attach non-predicating value operands + SmallVector OpVec; + for (unsigned i = 0; i < Op.getNumOperands(); ++i) { + if (AVLPos && ((int)i) == *AVLPos) + continue; + if (MaskPos && ((int)i) == *MaskPos) + continue; + + // Value operand + auto PackedOperand = Op.getOperand(i); + auto UnpackedOpVT = splitVectorType(PackedOperand.getSimpleValueType()); + SDValue PartV = + CDAG.getUnpack(UnpackedOpVT, PackedOperand, Part, SplitTM.AVL); + OpVec.push_back(PartV); + } + + // Add predicating args and generate part node. + OpVec.push_back(SplitTM.Mask); + OpVec.push_back(SplitTM.AVL); + // Emit legal VVP nodes. + PartOps[(int)Part] = + CDAG.getNode(Op.getOpcode(), ResVT, OpVec, Op->getFlags()); + } + + // Re-package vectors. + return CDAG.getPack(Op.getValueType(), PartOps[(int)PackElem::Lo], + PartOps[(int)PackElem::Hi], UpperPartAVL); +} + +SDValue VETargetLowering::legalizePackedAVL(SDValue Op, + VECustomDAG &CDAG) const { + LLVM_DEBUG(dbgs() << "::legalizePackedAVL\n";); + // Only required for VEC and VVP ops. + if (!isVVPOrVEC(Op->getOpcode())) + return Op; + + // Operation already has a legal AVL. + auto AVL = getNodeAVL(Op); + if (isLegalAVL(AVL)) + return Op; + + // Half and round up EVL for 32bit element types. + SDValue LegalAVL = AVL; + MVT IdiomVT = getIdiomaticVectorType(Op.getNode())->getSimpleVT(); + if (isPackedVectorType(IdiomVT)) { + assert(maySafelyIgnoreMask(Op) && + "TODO Shift predication from EVL into Mask"); + + if (auto *ConstAVL = dyn_cast(AVL)) { + LegalAVL = CDAG.getConstant((ConstAVL->getZExtValue() + 1) / 2, MVT::i32); + } else { + auto ConstOne = CDAG.getConstant(1, MVT::i32); + auto PlusOne = CDAG.getNode(ISD::ADD, MVT::i32, {AVL, ConstOne}); + LegalAVL = CDAG.getNode(ISD::SRL, MVT::i32, {PlusOne, ConstOne}); + } + } + + SDValue AnnotatedLegalAVL = CDAG.annotateLegalAVL(LegalAVL); + + // Copy the operand list. + int NumOp = Op->getNumOperands(); + auto AVLPos = getAVLPos(Op->getOpcode()); + std::vector FixedOperands; + for (int i = 0; i < NumOp; ++i) { + if (AVLPos && (i == *AVLPos)) { + FixedOperands.push_back(AnnotatedLegalAVL); + continue; + } + FixedOperands.push_back(Op->getOperand(i)); + } + + // Clone the operation with fixed operands. + auto Flags = Op->getFlags(); + SDValue NewN = + CDAG.getNode(Op->getOpcode(), Op->getVTList(), FixedOperands, Flags); + return NewN; +} diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td index ef9c238066c0..a4e4984e3d12 100644 --- a/llvm/lib/Target/VE/VVPInstrInfo.td +++ b/llvm/lib/Target/VE/VVPInstrInfo.td @@ -18,7 +18,40 @@ // TODO explain how VVP nodes relate to VP SDNodes once VP ISel is uptream. //===----------------------------------------------------------------------===// -// Binary Operators { +// vvp_load(ptr, stride, mask, avl) +def SDTLoadVVP : SDTypeProfile<1, 4, [ + SDTCisVec<0>, + SDTCisPtrTy<1>, + SDTCisInt<2>, + SDTCisVec<3>, + IsVLVT<4> +]>; + +// vvp_store(data, ptr, stride, mask, avl) +def SDTStoreVVP: SDTypeProfile<0, 5, [ + SDTCisVec<0>, + SDTCisPtrTy<1>, + SDTCisInt<2>, + SDTCisVec<3>, + IsVLVT<4> +]>; + +// vvp_scatter(chain, data, addr, mask, avl) +def SDTScatterVVP: SDTypeProfile<0, 4, [ + SDTCisVec<0>, + SDTCisVec<1>, + SDTCisVec<2>, + SDTCisSameNumEltsAs<0, 2>, + IsVLVT<3> +]>; + +// vvp_gather(chain, addr, mask, avl) +def SDTGatherVVP: SDTypeProfile<1, 3, [ + SDTCisVec<0>, + SDTCisVec<1>, + SDTCisSameNumEltsAs<0, 2>, + IsVLVT<3> +]>; // BinaryOp(x,y,mask,vl) def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc. @@ -29,6 +62,15 @@ def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc. IsVLVT<4> ]>; +// UnaryFPOp(x,mask,vl) +def SDTFPUnaryOpVVP : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, + SDTCisFP<0>, + SDTCisInt<2>, + SDTCisSameNumEltsAs<0, 2>, + IsVLVT<3> +]>; + // BinaryFPOp(x,y,mask,vl) def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc. SDTCisSameAs<0, 1>, @@ -39,6 +81,17 @@ def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc. IsVLVT<4> ]>; +// TernaryFPOp(x,y,z,mask,vl) +def SDTFPTernaryOpVVP : SDTypeProfile<1, 5, [ + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisFP<0>, + SDTCisInt<4>, + SDTCisSameNumEltsAs<0, 4>, + IsVLVT<5> +]>; + // Select(OnTrue, OnFalse, SelMask, vl) def SDTSelectVVP : SDTypeProfile<1, 4, [ // vp_select, vp_merge SDTCisVec<0>, @@ -48,6 +101,28 @@ def SDTSelectVVP : SDTypeProfile<1, 4, [ // vp_select, vp_merge IsVLVT<4> ]>; +// SetCC (lhs, rhs, cc, mask, vl) +def SDTSetCCVVP : SDTypeProfile<1, 5, [ // vp_setcc + SDTCisVec<0>, + SDTCisVec<1>, + SDTCisSameNumEltsAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, OtherVT>, + SDTCisInt<4>, + SDTCisSameNumEltsAs<0, 4>, + IsVLVT<5> +]>; + +// vvp_reduce(vector, mask, vl) +def SDTReduceVVP : SDTypeProfile<1, 3, [ + SDTCisVec<1>, + SDTCisInt<2>, + SDTCisVec<2>, + SDTCisSameNumEltsAs<1,2>, + IsVLVT<3> +]>; + + // Binary operator commutative pattern. class vvp_commutative : PatFrags< @@ -55,6 +130,12 @@ class vvp_commutative : [(RootOp node:$lhs, node:$rhs, node:$mask, node:$vlen), (RootOp node:$rhs, node:$lhs, node:$mask, node:$vlen)]>; +class vvp_fma_commutative : + PatFrags< + (ops node:$X, node:$Y, node:$Z, node:$mask, node:$vlen), + [(RootOp node:$X, node:$Y, node:$Z, node:$mask, node:$vlen), + (RootOp node:$X, node:$Z, node:$Y, node:$mask, node:$vlen)]>; + // VVP node definitions. def vvp_add : SDNode<"VEISD::VVP_ADD", SDTIntBinOpVVP>; def c_vvp_add : vvp_commutative; @@ -80,6 +161,8 @@ def vvp_srl : SDNode<"VEISD::VVP_SRL", SDTIntBinOpVVP>; def vvp_sra : SDNode<"VEISD::VVP_SRA", SDTIntBinOpVVP>; def vvp_shl : SDNode<"VEISD::VVP_SHL", SDTIntBinOpVVP>; +def vvp_fneg : SDNode<"VEISD::VVP_FNEG", SDTFPUnaryOpVVP>; + def vvp_fadd : SDNode<"VEISD::VVP_FADD", SDTFPBinOpVVP>; def c_vvp_fadd : vvp_commutative; def vvp_fsub : SDNode<"VEISD::VVP_FSUB", SDTFPBinOpVVP>; @@ -87,6 +170,30 @@ def vvp_fmul : SDNode<"VEISD::VVP_FMUL", SDTFPBinOpVVP>; def c_vvp_fmul : vvp_commutative; def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>; -// } Binary Operators +def vvp_ffma : SDNode<"VEISD::VVP_FFMA", SDTFPTernaryOpVVP>; +def c_vvp_ffma : vvp_fma_commutative; + +def vvp_scatter : SDNode<"VEISD::VVP_SCATTER", SDTScatterVVP, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def vvp_gather : SDNode<"VEISD::VVP_GATHER", SDTGatherVVP, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +def vvp_load : SDNode<"VEISD::VVP_LOAD", SDTLoadVVP, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand ]>; +def vvp_store : SDNode<"VEISD::VVP_STORE", SDTStoreVVP, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// Reductions + +// int reductions +def vvp_reduce_add : SDNode<"VEISD::VVP_REDUCE_ADD", SDTReduceVVP>; +def vvp_reduce_and : SDNode<"VEISD::VVP_REDUCE_AND", SDTReduceVVP>; +def vvp_reduce_or : SDNode<"VEISD::VVP_REDUCE_OR", SDTReduceVVP>; +def vvp_reduce_xor : SDNode<"VEISD::VVP_REDUCE_XOR", SDTReduceVVP>; +def vvp_reduce_smax : SDNode<"VEISD::VVP_REDUCE_SMAX", SDTReduceVVP>; + def vvp_select : SDNode<"VEISD::VVP_SELECT", SDTSelectVVP>; + +// setcc (lhs, rhs, cc, mask, vl) +def vvp_setcc : SDNode<"VEISD::VVP_SETCC", SDTSetCCVVP>; diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td index 74720fd1f419..33316ad054c6 100644 --- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td +++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td @@ -17,6 +17,167 @@ //===----------------------------------------------------------------------===// include "VVPInstrInfo.td" +multiclass VectorStore { + // Unmasked (imm stride). + def : Pat<(vvp_store + DataVT:$val, PtrVT:$addr, + (i64 simm7:$stride), (MaskVT true_mask), i32:$avl), + (!cast(STNoMask#"irvl") + (LO7 $stride), $addr, $val, $avl)>; + // Unmasked. + def : Pat<(vvp_store + DataVT:$val, PtrVT:$addr, + i64:$stride, (MaskVT true_mask), i32:$avl), + (!cast(STNoMask#"rrvl") + $stride, $addr, $val, $avl)>; + // Masked (imm stride). + def : Pat<(vvp_store + DataVT:$val, PtrVT:$addr, + (i64 simm7:$stride), MaskVT:$mask, i32:$avl), + (!cast(STWithMask#"irvml") + (LO7 $stride), $addr, $val, $mask, $avl)>; + // Masked. + def : Pat<(vvp_store + DataVT:$val, PtrVT:$addr, + i64:$stride, MaskVT:$mask, i32:$avl), + (!cast(STWithMask#"rrvml") + $stride, $addr, $val, $mask, $avl)>; +} + +defm : VectorStore; +defm : VectorStore; +defm : VectorStore; +defm : VectorStore; + +multiclass VectorLoad { + // Unmasked (imm stride). + def : Pat<(DataVT (vvp_load + PtrVT:$addr, (i64 simm7:$stride), + (MaskVT true_mask), i32:$avl)), + (!cast(LDNoMask#"irl") + (LO7 $stride), $addr, $avl)>; + // Unmasked. + def : Pat<(DataVT (vvp_load + PtrVT:$addr, i64:$stride, + (MaskVT true_mask), i32:$avl)), + (!cast(LDNoMask#"rrl") + $stride, PtrVT:$addr, $avl)>; + // Masked (imm stride). + def : Pat<(DataVT (vvp_load + PtrVT:$addr, (i64 simm7:$stride), + MaskVT:$mask, i32:$avl)), + (!cast(GTWithMask#"vizml") + (VADDULrvml $addr, + (VMULULivml (LO7 $stride), (VSEQl $avl), $mask, $avl), + $mask, $avl), + 0, 0, + $mask, + $avl)>; + // Masked. + def : Pat<(DataVT (vvp_load + PtrVT:$addr, i64:$stride, MaskVT:$mask, i32:$avl)), + (!cast(GTWithMask#"vizml") + (VADDULrvml $addr, + (VMULULrvml $stride, (VSEQl $avl), $mask, $avl), + $mask, $avl), + 0, 0, + $mask, + $avl)>; +} + +defm : VectorLoad; +defm : VectorLoad; +defm : VectorLoad; +defm : VectorLoad; + +// Vector Gather and scatter +multiclass VectorGather { + // Unmasked. + def : Pat<(DataVT (vvp_gather + PtrVT:$addr, (MaskVT true_mask), i32:$avl)), + (!cast(GTPrefix#"vizl") $addr, 0, 0, $avl)>; + // Masked. + def : Pat<(DataVT (vvp_gather PtrVT:$addr, MaskVT:$mask, i32:$avl)), + (!cast(GTPrefix#"vizml") $addr, 0, 0, $mask, $avl)>; +} + +defm : VectorGather; +defm : VectorGather; +defm : VectorGather; +defm : VectorGather; + +multiclass VectorScatter { + // Unmasked. + def : Pat<(vvp_scatter + DataVT:$data, PtrVT:$addr, (MaskVT true_mask), i32:$avl), + (!cast(SCPrefix#"vizvl") $addr, 0, 0, $data, $avl)>; + // Masked. + def : Pat<(vvp_scatter + DataVT:$data, PtrVT:$addr, MaskVT:$mask, i32:$avl), + (!cast(SCPrefix#"vizvml") $addr, 0, 0, $data, $mask, $avl)>; +} + +defm : VectorScatter; +defm : VectorScatter; +defm : VectorScatter; +defm : VectorScatter; + + +/// FNEG { +// Directly modify the sign bit to flip the sign. + +// Set sign bits in a pack of <2 x f32>. +def packed_fneg_imm : OutPatFrag<(ins ), + (i64 (SLLri (i64 (ORim 1, (i32 32))), 31))>; + + +multiclass FNeg { + // Masked with select. + def : Pat<(vvp_select (vvp_fneg DataVT:$vx, (v256i1 srcvalue), (i32 srcvalue)), + DataVT:$vfalse, + v256i1:$mask, + i32:$avl), + (VXORmvml_v (i32 1), $vx, $mask, $avl, $vfalse)>; + + // Unmasked. + def : Pat<(vvp_fneg DataVT:$vx, (v256i1 true_mask), i32:$avl), + (VXORmvl (i32 1), $vx, $avl)>; + + // Masked. + def : Pat<(vvp_fneg DataVT:$vx, v256i1:$mask, i32:$avl), + (VXORmvml (i32 1), $vx, $mask, $avl)>; +} + +defm: FNeg; +defm: FNeg; + +///// Packed FNeg ///// + +// Masked with select. +def : Pat<(vvp_select (vvp_fneg v512f32:$vx, (v512i1 srcvalue), (i32 srcvalue)), + v512f32:$vfalse, + v512i1:$mask, + i32:$avl), + (v512f32 (PVXORrvml_v (packed_fneg_imm ), $vx, $mask, $avl, $vfalse))>; + +// Unmasked. +def : Pat<(vvp_fneg v512f32:$vx, (v512i1 true_mask), i32:$avl), + (v512f32 (PVXORrvl (packed_fneg_imm ), $vx, $avl))>; + +// Masked. +def : Pat<(vvp_fneg v512f32:$vx, v512i1:$mask, i32:$avl), + (v512f32 (PVXORrvml (packed_fneg_imm ), $vx, $mask, $avl))>; + +/// } FNEG + multiclass Binary_rv { @@ -237,6 +398,143 @@ defm : Binary_rv_vr_vv_ShortLong; +defm : Binary_rv_vv; +defm : Binary_rv_vv; +defm : Binary_rv_vv; + +defm : Binary_rv_vv; +defm : Binary_rv_vv; +defm : Binary_vr_vv; +defm : Binary_vr_vv; +defm : Binary_vr_vv; + +defm : Binary_rv_vv; +defm : Binary_rv_vv; +defm : Binary_rv_vv; + +multiclass Ternary_vvv< + SDPatternOperator OpNode, ValueType DataVT, + ValueType MaskVT, string OpBaseName> { + // Masked with passthru. + def : Pat<(vvp_select + (OpNode DataVT:$vx, DataVT:$vy, DataVT:$vz, + (MaskVT srcvalue), (i32 srcvalue)), + DataVT:$vfalse, + MaskVT:$mask, + i32:$avl), + (!cast(OpBaseName#"vvvml_v") + $vx, $vy, $vz, $mask, $avl, $vfalse)>; + + // Unmasked. + def : Pat<(OpNode DataVT:$vx, DataVT:$vy, DataVT:$vz, + (MaskVT true_mask), i32:$avl), + (!cast(OpBaseName#"vvvl") + $vx, $vy, $vz, $avl)>; + + // Masked. + def : Pat<(OpNode DataVT:$vx, DataVT:$vy, DataVT:$vz, + MaskVT:$mask, i32:$avl), + (!cast(OpBaseName#"vvvml") + $vx, $vy, $vz, $mask, $avl)>; +} + +multiclass Ternary_rvv< + SDPatternOperator OpNode, + ValueType ScalarVT, ValueType DataVT, + ValueType MaskVT, string OpBaseName> { + // Masked with passthru, broadcast first. + def : Pat<(vvp_select + (OpNode + (any_broadcast ScalarVT:$sx), DataVT:$vy, DataVT:$vz, + (MaskVT srcvalue), (i32 srcvalue)), + DataVT:$vfalse, + MaskVT:$mask, + i32:$avl), + (!cast(OpBaseName#"rvvml_v") + $sx, $vy, $vz, $mask, $avl, $vfalse)>; + + // Unmasked, broadcast first. + def : Pat<(OpNode + (any_broadcast ScalarVT:$sx), DataVT:$vy, DataVT:$vz, + (MaskVT true_mask), i32:$avl), + (!cast(OpBaseName#"rvvl") + $sx, $vy, $vz, $avl)>; + + // Masked, broadcast first. + def : Pat<(OpNode + (any_broadcast ScalarVT:$sx), DataVT:$vy, DataVT:$vz, + MaskVT:$mask, i32:$avl), + (!cast(OpBaseName#"rvvml") + $sx, $vy, $vz, $mask, $avl)>; +} + +multiclass Ternary_vrv< + SDPatternOperator OpNode, + ValueType ScalarVT, ValueType DataVT, + ValueType MaskVT, string OpBaseName> { + // Masked with passthru, broadcast second. + def : Pat<(vvp_select + (OpNode + DataVT:$vx, (any_broadcast ScalarVT:$sy), DataVT:$vz, + (MaskVT srcvalue), (i32 srcvalue)), + DataVT:$vfalse, + MaskVT:$mask, + i32:$avl), + (!cast(OpBaseName#"vrvml_v") + $vx, $sy, $vz, + $mask, $avl, $vfalse)>; + + // Unmasked, broadcast second. + def : Pat<(OpNode + DataVT:$vx, (any_broadcast ScalarVT:$sy), DataVT:$vz, + (MaskVT true_mask), i32:$avl), + (!cast(OpBaseName#"vrvl") + $vx, $sy, $vz, $avl)>; + + // Masked, broadcast second. + def : Pat<(OpNode + DataVT:$vx, (any_broadcast ScalarVT:$sy), DataVT:$vz, + MaskVT:$mask, i32:$avl), + (!cast(OpBaseName#"vrvml") + $vx, $sy, $vz, $mask, $avl)>; +} + +multiclass Ternary_rvv_vrv_vvv< + SDPatternOperator OpNode, + ValueType ScalarVT, ValueType DataVT, + ValueType MaskVT, string OpBaseName> { + defm : Ternary_rvv; + defm : Ternary_vrv; + defm : Ternary_vvv; +} + +// Expand both 64bit and 32 bit variant (256 elements) +multiclass Ternary_ShortLong< + SDPatternOperator OpNode, + ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName, + ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> { + defm : Ternary_rvv_vrv_vvv; + defm : Ternary_rvv_vrv_vvv; +} + +defm : Ternary_ShortLong; +defm : Ternary_rvv_vrv_vvv; + multiclass Merge_mvv< SDPatternOperator OpNode, ValueType DataVT, ValueType MaskVT, @@ -268,3 +566,63 @@ defm : Merge_mvv_ShortLong; + +multiclass Set_CC { + // Unmasked. + def : Pat<(v256i1 (vvp_setcc + DataVT:$LHS, DataVT:$RHS, CCMatcher:$cond, (v256i1 true_mask), i32:$vl)), + (!cast(FmkBaseName#"vl") + (CCConv $cond), + (!cast(CmpBaseName#"vvl") + $LHS, $RHS, $vl), + $vl)>; + // Masked. + def : Pat<(v256i1 (vvp_setcc + DataVT:$LHS, DataVT:$RHS, CCMatcher:$cond, v256i1:$vm, i32:$vl)), + (!cast(FmkBaseName#"vml") + (CCConv $cond), + (!cast(CmpBaseName#"vvl") + $LHS, $RHS, $vl), + $vm, $vl)>; +} + +defm : Set_CC; +defm : Set_CC; +defm : Set_CC; + +defm : Set_CC; +defm : Set_CC; +defm : Set_CC; + +multiclass Reduce_GenericInt { + // Unmasked. + def : Pat <(ResVT (!cast("vvp_reduce_"#VVPRedOp) + VectorVT:$vx, (v256i1 true_mask), i32:$vl)), + (COPY_TO_REGCLASS + (!cast("LVSvi") + (!cast(RedInstName#"vl") $vx, $vl), 0), + ResRC)>; + + // Masked. + def : Pat <(ResVT (!cast("vvp_reduce_"#VVPRedOp) + VectorVT:$vx, v256i1:$vm, i32:$vl)), + (COPY_TO_REGCLASS + (!cast("LVSvi") + (!cast(RedInstName#"vml") $vx, $vm, $vl), 0), + ResRC)>; +} + +multiclass IntReduce_ShortLong { + defm: Reduce_GenericInt; + defm: Reduce_GenericInt; + defm: Reduce_GenericInt; + defm: Reduce_GenericInt; + defm: Reduce_GenericInt; +} + +defm: IntReduce_ShortLong; +defm: IntReduce_ShortLong; diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def index 8000f84c5dbe..a60588672293 100644 --- a/llvm/lib/Target/VE/VVPNodes.def +++ b/llvm/lib/Target/VE/VVPNodes.def @@ -24,6 +24,14 @@ #define ADD_VVP_OP(X, Y) #endif +/// ADD_UNARY_VVP_OP(VVPNAME,SDNAME) +/// \p VVPName is a VVP Unary operator. +/// \p SDNAME is the generic SD opcode corresponding to \p VVPName. +#ifndef ADD_UNARY_VVP_OP +#define ADD_UNARY_VVP_OP(VVPNAME,SDNAME) \ + ADD_VVP_OP(VVPNAME,SDNAME) +#endif + /// ADD_BINARY_VVP_OP(VVPNAME,SDNAME) /// \p VVPName is a VVP Binary operator. /// \p SDNAME is the generic SD opcode corresponding to \p VVPName. @@ -33,38 +41,95 @@ HANDLE_VP_TO_VVP(VPNAME, VVPNAME) #endif +/// ADD_TERNARY_VVP_OP(VVPNAME,SDNAME) +/// \p VVPName is a VVP Ternary operator. +/// \p SDNAME is the generic SD opcode corresponding to \p VVPName. +#ifndef ADD_TERNARY_VVP_OP +#define ADD_TERNARY_VVP_OP(VVPNAME,SDNAME) \ + ADD_VVP_OP(VVPNAME,SDNAME) +#endif + #ifndef ADD_BINARY_VVP_OP_COMPACT #define ADD_BINARY_VVP_OP_COMPACT(NAME) \ ADD_BINARY_VVP_OP(VVP_##NAME,VP_##NAME,NAME) #endif +/// REGISTER_PACKED(OPC) +/// \p OPC The VVP opcode of the operation. +#ifndef REGISTER_PACKED +#define REGISTER_PACKED(OPC) +#endif + +/// ADD_REDUCE_VVP_OP(OPC) +/// \p OPC The VVP opcode of the operation. +/// \p SDNAME The standard opcode of the operation. +#ifndef ADD_REDUCE_VVP_OP +#define ADD_REDUCE_VVP_OP(OPC, SDNAME) ADD_VVP_OP(OPC, SDNAME) +#endif + +// Scalar standard ISD to perform this reduction. +#ifndef HANDLE_VVP_REDUCE_TO_SCALAR +#define HANDLE_VVP_REDUCE_TO_SCALAR(VVP_RED_ISD, REDUCE_ISD) +#endif + +/// Reductions. +#define HELPER_REDUCTION(OPC, SCALAR_OPC) \ + ADD_REDUCE_VVP_OP(VVP_REDUCE_##OPC,VECREDUCE_##OPC) \ + HANDLE_VP_TO_VVP(VP_REDUCE_##OPC, VVP_REDUCE_##OPC) \ + HANDLE_VVP_REDUCE_TO_SCALAR(VVP_REDUCE_##OPC, SCALAR_OPC) + +HELPER_REDUCTION(ADD, ADD) +HELPER_REDUCTION(AND, AND) +HELPER_REDUCTION(OR, OR) +HELPER_REDUCTION(XOR, XOR) +HELPER_REDUCTION(SMAX, SMAX) + +#undef HELPER_REDUCTION + +ADD_VVP_OP(VVP_LOAD,LOAD) HANDLE_VP_TO_VVP(VP_LOAD, VVP_LOAD) REGISTER_PACKED(VVP_LOAD) +ADD_VVP_OP(VVP_STORE,STORE) HANDLE_VP_TO_VVP(VP_STORE, VVP_STORE) REGISTER_PACKED(VVP_STORE) + +ADD_VVP_OP(VVP_GATHER, MGATHER) HANDLE_VP_TO_VVP(VP_GATHER, VVP_GATHER) +ADD_VVP_OP(VVP_SCATTER, MSCATTER) HANDLE_VP_TO_VVP(VP_SCATTER, VVP_SCATTER) + // Integer arithmetic. -ADD_BINARY_VVP_OP_COMPACT(ADD) -ADD_BINARY_VVP_OP_COMPACT(SUB) +ADD_BINARY_VVP_OP_COMPACT(ADD) REGISTER_PACKED(VVP_ADD) +ADD_BINARY_VVP_OP_COMPACT(SUB) REGISTER_PACKED(VVP_SUB) ADD_BINARY_VVP_OP_COMPACT(MUL) ADD_BINARY_VVP_OP_COMPACT(UDIV) ADD_BINARY_VVP_OP_COMPACT(SDIV) -ADD_BINARY_VVP_OP(VVP_SRA,VP_ASHR,SRA) -ADD_BINARY_VVP_OP(VVP_SRL,VP_LSHR,SRL) -ADD_BINARY_VVP_OP_COMPACT(SHL) +ADD_BINARY_VVP_OP(VVP_SRA,VP_ASHR,SRA) REGISTER_PACKED(VVP_SRA) +ADD_BINARY_VVP_OP(VVP_SRL,VP_LSHR,SRL) REGISTER_PACKED(VVP_SRL) +ADD_BINARY_VVP_OP_COMPACT(SHL) REGISTER_PACKED(VVP_SHL) -ADD_BINARY_VVP_OP_COMPACT(AND) -ADD_BINARY_VVP_OP_COMPACT(OR) -ADD_BINARY_VVP_OP_COMPACT(XOR) +ADD_BINARY_VVP_OP_COMPACT(AND) REGISTER_PACKED(VVP_AND) +ADD_BINARY_VVP_OP_COMPACT(OR) REGISTER_PACKED(VVP_OR) +ADD_BINARY_VVP_OP_COMPACT(XOR) REGISTER_PACKED(VVP_XOR) // FP arithmetic. -ADD_BINARY_VVP_OP_COMPACT(FADD) -ADD_BINARY_VVP_OP_COMPACT(FSUB) -ADD_BINARY_VVP_OP_COMPACT(FMUL) +ADD_UNARY_VVP_OP(VVP_FNEG, FNEG) HANDLE_VP_TO_VVP(VP_FNEG, VVP_FNEG) REGISTER_PACKED(VVP_FNEG) +ADD_BINARY_VVP_OP_COMPACT(FADD) REGISTER_PACKED(VVP_FADD) +ADD_BINARY_VVP_OP_COMPACT(FSUB) REGISTER_PACKED(VVP_FSUB) +ADD_BINARY_VVP_OP_COMPACT(FMUL) REGISTER_PACKED(VVP_FMUL) ADD_BINARY_VVP_OP_COMPACT(FDIV) +ADD_TERNARY_VVP_OP(VVP_FFMA,FMA) HANDLE_VP_TO_VVP(VP_FMA, VVP_FFMA) REGISTER_PACKED(VVP_FFMA) + +ADD_VVP_OP(VVP_SETCC, SETCC) + // Shuffles. -ADD_VVP_OP(VVP_SELECT,VSELECT) +ADD_VVP_OP(VVP_SELECT,VSELECT) REGISTER_PACKED(VVP_SELECT) HANDLE_VP_TO_VVP(VP_SELECT, VVP_SELECT) HANDLE_VP_TO_VVP(VP_MERGE, VVP_SELECT) + #undef ADD_BINARY_VVP_OP +#undef ADD_TERNARY_VVP_OP +#undef ADD_UNARY_VVP_OP #undef ADD_BINARY_VVP_OP_COMPACT +#undef ADD_REDUCE_VVP_OP #undef ADD_VVP_OP #undef HANDLE_VP_TO_VVP +#undef HANDLE_VVP_REDUCE_TO_SCALAR +#undef REGISTER_PACKED diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index 56689d3ee06b..7bafa53af2af 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -24,6 +24,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCSectionWasm.h" @@ -374,7 +375,7 @@ public: auto Type = WebAssembly::parseType(Lexer.getTok().getString()); if (!Type) return error("unknown type: ", Lexer.getTok()); - Types.push_back(Type.getValue()); + Types.push_back(*Type); Parser.Lex(); if (!isNext(AsmToken::Comma)) break; @@ -670,11 +671,12 @@ public: } else { // Assume this identifier is a label. const MCExpr *Val; + SMLoc Start = Id.getLoc(); SMLoc End; if (Parser.parseExpression(Val, End)) return error("Cannot parse symbol: ", Lexer.getTok()); Operands.push_back(std::make_unique( - WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(), + WebAssemblyOperand::Symbol, Start, End, WebAssemblyOperand::SymOp{Val})); if (checkForP2AlignIfLoadStore(Operands, Name)) return true; @@ -815,8 +817,7 @@ public: // Now set this symbol with the correct type. auto WasmSym = cast(Ctx.getOrCreateSymbol(SymName)); WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); - WasmSym->setGlobalType( - wasm::WasmGlobalType{uint8_t(Type.getValue()), Mutable}); + WasmSym->setGlobalType(wasm::WasmGlobalType{uint8_t(*Type), Mutable}); // And emit the directive again. TOut.emitGlobalType(WasmSym); return expect(AsmToken::EndOfStatement, "EOL"); @@ -846,7 +847,7 @@ public: // symbol auto WasmSym = cast(Ctx.getOrCreateSymbol(SymName)); WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE); - wasm::WasmTableType Type = {uint8_t(ElemType.getValue()), Limits}; + wasm::WasmTableType Type = {uint8_t(*ElemType), Limits}; WasmSym->setTableType(Type); TOut.emitTableType(WasmSym); return expect(AsmToken::EndOfStatement, "EOL"); @@ -1016,7 +1017,7 @@ public: Inst.setOpcode(Opc64); } } - if (!SkipTypeCheck && TC.typeCheck(IDLoc, Inst)) + if (!SkipTypeCheck && TC.typeCheck(IDLoc, Inst, Operands)) return true; Out.emitInstruction(Inst, getSTI()); if (CurrentState == EndFunction) { @@ -1094,14 +1095,15 @@ public: auto *WS = getContext().getWasmSection(SecName, SectionKind::getText(), 0, Group, MCContext::GenericSectionID, nullptr); - getStreamer().SwitchSection(WS); + getStreamer().switchSection(WS); // Also generate DWARF for this section if requested. if (getContext().getGenDwarfForAssembly()) getContext().addGenDwarfSection(WS); } void onEndOfFunction(SMLoc ErrorLoc) { - TC.endOfFunction(ErrorLoc); + if (!SkipTypeCheck) + TC.endOfFunction(ErrorLoc); // Reset the type checker state. TC.Clear(); diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp index 128ce5c4fec0..ec72c1de0503 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp @@ -86,14 +86,12 @@ bool WebAssemblyAsmTypeCheck::popType(SMLoc ErrorLoc, Optional EVT) { if (Stack.empty()) { return typeError(ErrorLoc, - EVT.hasValue() - ? StringRef("empty stack while popping ") + - WebAssembly::typeToString(EVT.getValue()) - : StringRef( - "empty stack while popping value")); + EVT ? StringRef("empty stack while popping ") + + WebAssembly::typeToString(EVT.getValue()) + : StringRef("empty stack while popping value")); } auto PVT = Stack.pop_back_val(); - if (EVT.hasValue() && EVT.getValue() != PVT) { + if (EVT && EVT.getValue() != PVT) { return typeError( ErrorLoc, StringRef("popped ") + WebAssembly::typeToString(PVT) + ", expected " + @@ -102,6 +100,19 @@ bool WebAssemblyAsmTypeCheck::popType(SMLoc ErrorLoc, return false; } +bool WebAssemblyAsmTypeCheck::popRefType(SMLoc ErrorLoc) { + if (Stack.empty()) { + return typeError(ErrorLoc, StringRef("empty stack while popping reftype")); + } + auto PVT = Stack.pop_back_val(); + if (!WebAssembly::isRefType(PVT)) { + return typeError(ErrorLoc, StringRef("popped ") + + WebAssembly::typeToString(PVT) + + ", expected reftype"); + } + return false; +} + bool WebAssemblyAsmTypeCheck::getLocal(SMLoc ErrorLoc, const MCInst &Inst, wasm::ValType &Type) { auto Local = static_cast(Inst.getOperand(0).getImm()); @@ -160,7 +171,7 @@ bool WebAssemblyAsmTypeCheck::getGlobal(SMLoc ErrorLoc, const MCInst &Inst, if (getSymRef(ErrorLoc, Inst, SymRef)) return true; auto WasmSym = cast(&SymRef->getSymbol()); - switch (WasmSym->getType().getValueOr(wasm::WASM_SYMBOL_TYPE_DATA)) { + switch (WasmSym->getType().value_or(wasm::WASM_SYMBOL_TYPE_DATA)) { case wasm::WASM_SYMBOL_TYPE_GLOBAL: Type = static_cast(WasmSym->getGlobalType().Type); break; @@ -182,6 +193,20 @@ bool WebAssemblyAsmTypeCheck::getGlobal(SMLoc ErrorLoc, const MCInst &Inst, return false; } +bool WebAssemblyAsmTypeCheck::getTable(SMLoc ErrorLoc, const MCInst &Inst, + wasm::ValType &Type) { + const MCSymbolRefExpr *SymRef; + if (getSymRef(ErrorLoc, Inst, SymRef)) + return true; + auto WasmSym = cast(&SymRef->getSymbol()); + if (WasmSym->getType().value_or(wasm::WASM_SYMBOL_TYPE_DATA) != + wasm::WASM_SYMBOL_TYPE_TABLE) + return typeError(ErrorLoc, StringRef("symbol ") + WasmSym->getName() + + " missing .tabletype"); + Type = static_cast(WasmSym->getTableType().ElemType); + return false; +} + bool WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc) { // Check the return types. for (auto RVT : llvm::reverse(ReturnTypes)) { @@ -196,35 +221,58 @@ bool WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc) { return false; } -bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) { +bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, + OperandVector &Operands) { auto Opc = Inst.getOpcode(); auto Name = GetMnemonic(Opc); dumpTypeStack("typechecking " + Name + ": "); wasm::ValType Type; if (Name == "local.get") { - if (getLocal(ErrorLoc, Inst, Type)) + if (getLocal(Operands[1]->getStartLoc(), Inst, Type)) return true; Stack.push_back(Type); } else if (Name == "local.set") { - if (getLocal(ErrorLoc, Inst, Type)) + if (getLocal(Operands[1]->getStartLoc(), Inst, Type)) return true; if (popType(ErrorLoc, Type)) return true; } else if (Name == "local.tee") { - if (getLocal(ErrorLoc, Inst, Type)) + if (getLocal(Operands[1]->getStartLoc(), Inst, Type)) return true; if (popType(ErrorLoc, Type)) return true; Stack.push_back(Type); } else if (Name == "global.get") { - if (getGlobal(ErrorLoc, Inst, Type)) + if (getGlobal(Operands[1]->getStartLoc(), Inst, Type)) return true; Stack.push_back(Type); } else if (Name == "global.set") { - if (getGlobal(ErrorLoc, Inst, Type)) + if (getGlobal(Operands[1]->getStartLoc(), Inst, Type)) + return true; + if (popType(ErrorLoc, Type)) + return true; + } else if (Name == "table.get") { + if (getTable(Operands[1]->getStartLoc(), Inst, Type)) + return true; + if (popType(ErrorLoc, wasm::ValType::I32)) + return true; + Stack.push_back(Type); + } else if (Name == "table.set") { + if (getTable(Operands[1]->getStartLoc(), Inst, Type)) return true; if (popType(ErrorLoc, Type)) return true; + if (popType(ErrorLoc, wasm::ValType::I32)) + return true; + } else if (Name == "table.fill") { + if (getTable(Operands[1]->getStartLoc(), Inst, Type)) + return true; + if (popType(ErrorLoc, wasm::ValType::I32)) + return true; + if (popType(ErrorLoc, Type)) + return true; + if (popType(ErrorLoc, wasm::ValType::I32)) + return true; } else if (Name == "drop") { if (popType(ErrorLoc, {})) return true; @@ -245,33 +293,36 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) { return true; } else if (Name == "call" || Name == "return_call") { const MCSymbolRefExpr *SymRef; - if (getSymRef(ErrorLoc, Inst, SymRef)) + if (getSymRef(Operands[1]->getStartLoc(), Inst, SymRef)) return true; auto WasmSym = cast(&SymRef->getSymbol()); auto Sig = WasmSym->getSignature(); if (!Sig || WasmSym->getType() != wasm::WASM_SYMBOL_TYPE_FUNCTION) - return typeError(ErrorLoc, StringRef("symbol ") + WasmSym->getName() + - " missing .functype"); + return typeError(Operands[1]->getStartLoc(), StringRef("symbol ") + + WasmSym->getName() + + " missing .functype"); if (checkSig(ErrorLoc, *Sig)) return true; if (Name == "return_call" && endOfFunction(ErrorLoc)) return true; } else if (Name == "catch") { const MCSymbolRefExpr *SymRef; - if (getSymRef(ErrorLoc, Inst, SymRef)) + if (getSymRef(Operands[1]->getStartLoc(), Inst, SymRef)) return true; const auto *WasmSym = cast(&SymRef->getSymbol()); const auto *Sig = WasmSym->getSignature(); if (!Sig || WasmSym->getType() != wasm::WASM_SYMBOL_TYPE_TAG) - return typeError(ErrorLoc, StringRef("symbol ") + WasmSym->getName() + - " missing .tagtype"); + return typeError(Operands[1]->getStartLoc(), StringRef("symbol ") + + WasmSym->getName() + + " missing .tagtype"); // catch instruction pushes values whose types are specified in the tag's // "params" part Stack.insert(Stack.end(), Sig->Params.begin(), Sig->Params.end()); - } else if (Name == "ref.null") { - auto VT = static_cast(Inst.getOperand(0).getImm()); - Stack.push_back(VT); } else if (Name == "unreachable") { Unreachable = true; + } else if (Name == "ref.is_null") { + if (popRefType(ErrorLoc)) + return true; + Stack.push_back(wasm::ValType::I32); } else { // The current instruction is a stack instruction which doesn't have // explicit operands that indicate push/pop types, so we get those from diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h index 2b07faf67a18..3be966b5739c 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h @@ -16,9 +16,10 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_ASMPARSER_TYPECHECK_H #define LLVM_LIB_TARGET_WEBASSEMBLY_ASMPARSER_TYPECHECK_H -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCInstrInfo.h" #include "llvm/BinaryFormat/Wasm.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCSymbol.h" namespace llvm { @@ -38,12 +39,14 @@ class WebAssemblyAsmTypeCheck final { void dumpTypeStack(Twine Msg); bool typeError(SMLoc ErrorLoc, const Twine &Msg); bool popType(SMLoc ErrorLoc, Optional EVT); + bool popRefType(SMLoc ErrorLoc); bool getLocal(SMLoc ErrorLoc, const MCInst &Inst, wasm::ValType &Type); bool checkEnd(SMLoc ErrorLoc, bool PopVals = false); bool checkSig(SMLoc ErrorLoc, const wasm::WasmSignature &Sig); bool getSymRef(SMLoc ErrorLoc, const MCInst &Inst, const MCSymbolRefExpr *&SymRef); bool getGlobal(SMLoc ErrorLoc, const MCInst &Inst, wasm::ValType &Type); + bool getTable(SMLoc ErrorLoc, const MCInst &Inst, wasm::ValType &Type); public: WebAssemblyAsmTypeCheck(MCAsmParser &Parser, const MCInstrInfo &MII, bool is64); @@ -52,7 +55,7 @@ public: void localDecl(const SmallVector &Locals); void setLastSig(const wasm::WasmSignature &Sig) { LastSig = Sig; } bool endOfFunction(SMLoc ErrorLoc); - bool typeCheck(SMLoc ErrorLoc, const MCInst &Inst); + bool typeCheck(SMLoc ErrorLoc, const MCInst &Inst, OperandVector &Operands); void Clear() { Stack.clear(); diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp index 5d38145559da..ae65a9dc2a4e 100644 --- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -17,8 +17,8 @@ #include "TargetInfo/WebAssemblyTargetInfo.h" #include "Utils/WebAssemblyTypeUtilities.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp index d8122950e061..5727708a84ad 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp @@ -52,6 +52,4 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T, // we make sure this info is set correctly. if (WebAssembly::WasmEnableEH || WebAssembly::WasmEnableSjLj) ExceptionsType = ExceptionHandling::Wasm; - - // TODO: UseIntegratedAssembler? } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index 8f670ec88897..f52545a65dbb 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -62,7 +62,6 @@ static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/, } static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo & /*MRI*/, MCContext &Ctx) { return createWebAssemblyMCCodeEmitter(MCII); } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp index 397b9b0ee9da..2da219d54c73 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -58,8 +58,6 @@ void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef Types) { } } -void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; } - void WebAssemblyTargetAsmStreamer::emitFunctionType(const MCSymbolWasm *Sym) { assert(Sym->isFunction()); OS << "\t.functype\t" << Sym->getName() << " "; @@ -136,10 +134,6 @@ void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef Types) { } } -void WebAssemblyTargetWasmStreamer::emitEndFunc() { - llvm_unreachable(".end_func is not needed for direct wasm output"); -} - void WebAssemblyTargetWasmStreamer::emitIndIdx(const MCExpr *Value) { llvm_unreachable(".indidx encoding not yet implemented"); } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index c0ad63c8dd50..522f6356c28b 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -32,8 +32,6 @@ public: /// .local virtual void emitLocal(ArrayRef Types) = 0; - /// .endfunc - virtual void emitEndFunc() = 0; /// .functype virtual void emitFunctionType(const MCSymbolWasm *Sym) = 0; /// .indidx @@ -66,7 +64,6 @@ public: WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); void emitLocal(ArrayRef Types) override; - void emitEndFunc() override; void emitFunctionType(const MCSymbolWasm *Sym) override; void emitIndIdx(const MCExpr *Value) override; void emitGlobalType(const MCSymbolWasm *Sym) override; @@ -83,7 +80,6 @@ public: explicit WebAssemblyTargetWasmStreamer(MCStreamer &S); void emitLocal(ArrayRef Types) override; - void emitEndFunc() override; void emitFunctionType(const MCSymbolWasm *Sym) override {} void emitIndIdx(const MCExpr *Value) override; void emitGlobalType(const MCSymbolWasm *Sym) override {} @@ -104,7 +100,6 @@ public: : WebAssemblyTargetStreamer(S) {} void emitLocal(ArrayRef) override {} - void emitEndFunc() override {} void emitFunctionType(const MCSymbolWasm *) override {} void emitIndIdx(const MCExpr *) override {} void emitGlobalType(const MCSymbolWasm *) override {} diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h index cdb95d48398d..8fc67d37925c 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h @@ -80,6 +80,10 @@ inline bool isRefType(const Type *Ty) { return isFuncrefType(Ty) || isExternrefType(Ty); } +inline bool isRefType(wasm::ValType Type) { + return Type == wasm::ValType::EXTERNREF || Type == wasm::ValType::FUNCREF; +} + // Convert StringRef to ValType / HealType / BlockType Optional parseType(StringRef Type); diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.h b/llvm/lib/Target/WebAssembly/WebAssembly.h index 803786e0c9c2..aee8f160f38d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.h +++ b/llvm/lib/Target/WebAssembly/WebAssembly.h @@ -26,7 +26,6 @@ class FunctionPass; // LLVM IR passes. ModulePass *createWebAssemblyLowerEmscriptenEHSjLj(); -ModulePass *createWebAssemblyLowerGlobalDtors(); ModulePass *createWebAssemblyAddMissingPrototypes(); ModulePass *createWebAssemblyFixFunctionBitcasts(); FunctionPass *createWebAssemblyOptimizeReturned(); @@ -41,7 +40,6 @@ FunctionPass *createWebAssemblySetP2AlignOperands(); // Late passes. FunctionPass *createWebAssemblyReplacePhysRegs(); FunctionPass *createWebAssemblyNullifyDebugValueLists(); -FunctionPass *createWebAssemblyPrepareForLiveIntervals(); FunctionPass *createWebAssemblyOptimizeLiveIntervals(); FunctionPass *createWebAssemblyMemIntrinsicResults(); FunctionPass *createWebAssemblyRegStackify(); @@ -61,14 +59,12 @@ ModulePass *createWebAssemblyMCLowerPrePass(); // PassRegistry initialization declarations. void initializeWebAssemblyAddMissingPrototypesPass(PassRegistry &); void initializeWebAssemblyLowerEmscriptenEHSjLjPass(PassRegistry &); -void initializeLowerGlobalDtorsPass(PassRegistry &); void initializeFixFunctionBitcastsPass(PassRegistry &); void initializeOptimizeReturnedPass(PassRegistry &); void initializeWebAssemblyArgumentMovePass(PassRegistry &); void initializeWebAssemblySetP2AlignOperandsPass(PassRegistry &); void initializeWebAssemblyReplacePhysRegsPass(PassRegistry &); void initializeWebAssemblyNullifyDebugValueListsPass(PassRegistry &); -void initializeWebAssemblyPrepareForLiveIntervalsPass(PassRegistry &); void initializeWebAssemblyOptimizeLiveIntervalsPass(PassRegistry &); void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &); void initializeWebAssemblyRegStackifyPass(PassRegistry &); diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td index a529c6217189..b83dcf3a8e65 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.td +++ b/llvm/lib/Target/WebAssembly/WebAssembly.td @@ -67,6 +67,10 @@ def FeatureReferenceTypes : SubtargetFeature<"reference-types", "HasReferenceTypes", "true", "Enable reference types">; +def FeatureExtendedConst : + SubtargetFeature<"extended-const", "HasExtendedConst", "true", + "Enable extended const expressions">; + //===----------------------------------------------------------------------===// // Architectures. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index bf326e5106be..57d51634e849 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -180,30 +180,30 @@ void WebAssemblyAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { MCSymbolWasm *Sym = cast(getSymbol(GV)); if (!Sym->getType()) { - const WebAssemblyTargetLowering &TLI = *Subtarget->getTargetLowering(); SmallVector VTs; Type *GlobalVT = GV->getValueType(); - computeLegalValueVTs(TLI, GV->getParent()->getContext(), - GV->getParent()->getDataLayout(), GlobalVT, VTs); + if (Subtarget) { + // Subtarget is only set when a function is defined, because + // each function can declare a different subtarget. For example, + // on ARM a compilation unit might have a function on ARM and + // another on Thumb. Therefore only if Subtarget is non-null we + // can actually calculate the legal VTs. + const WebAssemblyTargetLowering &TLI = *Subtarget->getTargetLowering(); + computeLegalValueVTs(TLI, GV->getParent()->getContext(), + GV->getParent()->getDataLayout(), GlobalVT, VTs); + } WebAssembly::wasmSymbolSetType(Sym, GlobalVT, VTs); } - // If the GlobalVariable refers to a table, we handle it here instead of - // in emitExternalDecls - if (Sym->isTable()) { - getTargetStreamer()->emitTableType(Sym); - return; - } - emitVisibility(Sym, GV->getVisibility(), !GV->isDeclaration()); + emitSymbolType(Sym); if (GV->hasInitializer()) { assert(getSymbolPreferLocal(*GV) == Sym); emitLinkage(GV, Sym); - getTargetStreamer()->emitGlobalType(Sym); OutStreamer->emitLabel(Sym); // TODO: Actually emit the initializer value. Otherwise the global has the // default value for its type (0, ref.null, etc). - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } } @@ -211,7 +211,7 @@ MCSymbol *WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) { auto *WasmSym = cast(GetExternalSymbolSymbol(Name)); // May be called multiple times, so early out. - if (WasmSym->getType().hasValue()) + if (WasmSym->getType()) return WasmSym; const WebAssemblySubtarget &Subtarget = getSubtarget(); @@ -271,31 +271,52 @@ MCSymbol *WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) { return WasmSym; } -void WebAssemblyAsmPrinter::emitExternalDecls(const Module &M) { +void WebAssemblyAsmPrinter::emitSymbolType(const MCSymbolWasm *Sym) { + Optional WasmTy = Sym->getType(); + if (!WasmTy) + return; + + switch (*WasmTy) { + case wasm::WASM_SYMBOL_TYPE_GLOBAL: + getTargetStreamer()->emitGlobalType(Sym); + break; + case wasm::WASM_SYMBOL_TYPE_TAG: + getTargetStreamer()->emitTagType(Sym); + break; + case wasm::WASM_SYMBOL_TYPE_TABLE: + getTargetStreamer()->emitTableType(Sym); + break; + default: + break; // We only handle globals, tags and tables here + } +} + +void WebAssemblyAsmPrinter::emitDecls(const Module &M) { if (signaturesEmitted) return; signaturesEmitted = true; // Normally symbols for globals get discovered as the MI gets lowered, - // but we need to know about them ahead of time. + // but we need to know about them ahead of time. This will however, + // only find symbols that have been used. Unused symbols from globals will + // not be found here. MachineModuleInfoWasm &MMIW = MMI->getObjFileInfo(); for (const auto &Name : MMIW.MachineSymbolsUsed) { - getOrCreateWasmSymbol(Name.getKey()); + auto *WasmSym = cast(getOrCreateWasmSymbol(Name.getKey())); + if (WasmSym->isFunction()) { + // TODO(wvo): is there any case where this overlaps with the call to + // emitFunctionType in the loop below? + getTargetStreamer()->emitFunctionType(WasmSym); + } } for (auto &It : OutContext.getSymbols()) { - // Emit .globaltype, .tagtype, or .tabletype declarations. + // Emit .globaltype, .tagtype, or .tabletype declarations for extern + // declarations, i.e. those that have only been declared (but not defined) + // in the current module auto Sym = cast(It.getValue()); - if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_GLOBAL) { - // .globaltype already handled by emitGlobalVariable for defined - // variables; here we make sure the types of external wasm globals get - // written to the file. - if (Sym->isUndefined()) - getTargetStreamer()->emitGlobalType(Sym); - } else if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_TAG) - getTargetStreamer()->emitTagType(Sym); - else if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_TABLE) - getTargetStreamer()->emitTableType(Sym); + if (!Sym->isDefined()) + emitSymbolType(Sym); } DenseSet InvokeSymbols; @@ -303,55 +324,56 @@ void WebAssemblyAsmPrinter::emitExternalDecls(const Module &M) { if (F.isIntrinsic()) continue; - // Emit function type info for all undefined functions - if (F.isDeclarationForLinker()) { - SmallVector Results; - SmallVector Params; - computeSignatureVTs(F.getFunctionType(), &F, F, TM, Params, Results); - // At this point these MCSymbols may or may not have been created already - // and thus also contain a signature, but we need to get the signature - // anyway here in case it is an invoke that has not yet been created. We - // will discard it later if it turns out not to be necessary. - auto Signature = signatureFromMVTs(Results, Params); - bool InvokeDetected = false; - auto *Sym = getMCSymbolForFunction( - &F, WebAssembly::WasmEnableEmEH || WebAssembly::WasmEnableEmSjLj, - Signature.get(), InvokeDetected); - - // Multiple functions can be mapped to the same invoke symbol. For - // example, two IR functions '__invoke_void_i8*' and '__invoke_void_i32' - // are both mapped to '__invoke_vi'. We keep them in a set once we emit an - // Emscripten EH symbol so we don't emit the same symbol twice. - if (InvokeDetected && !InvokeSymbols.insert(Sym).second) - continue; + // Emit function type info for all functions. This will emit duplicate + // information for defined functions (which already have function type + // info emitted alongside their definition), but this is necessary in + // order to enable the single-pass WebAssemblyAsmTypeCheck to succeed. + SmallVector Results; + SmallVector Params; + computeSignatureVTs(F.getFunctionType(), &F, F, TM, Params, Results); + // At this point these MCSymbols may or may not have been created already + // and thus also contain a signature, but we need to get the signature + // anyway here in case it is an invoke that has not yet been created. We + // will discard it later if it turns out not to be necessary. + auto Signature = signatureFromMVTs(Results, Params); + bool InvokeDetected = false; + auto *Sym = getMCSymbolForFunction( + &F, WebAssembly::WasmEnableEmEH || WebAssembly::WasmEnableEmSjLj, + Signature.get(), InvokeDetected); + + // Multiple functions can be mapped to the same invoke symbol. For + // example, two IR functions '__invoke_void_i8*' and '__invoke_void_i32' + // are both mapped to '__invoke_vi'. We keep them in a set once we emit an + // Emscripten EH symbol so we don't emit the same symbol twice. + if (InvokeDetected && !InvokeSymbols.insert(Sym).second) + continue; - Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); - if (!Sym->getSignature()) { - Sym->setSignature(Signature.get()); - addSignature(std::move(Signature)); - } else { - // This symbol has already been created and had a signature. Discard it. - Signature.reset(); - } + Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + if (!Sym->getSignature()) { + Sym->setSignature(Signature.get()); + addSignature(std::move(Signature)); + } else { + // This symbol has already been created and had a signature. Discard it. + Signature.reset(); + } - getTargetStreamer()->emitFunctionType(Sym); + getTargetStreamer()->emitFunctionType(Sym); - if (F.hasFnAttribute("wasm-import-module")) { - StringRef Name = - F.getFnAttribute("wasm-import-module").getValueAsString(); - Sym->setImportModule(storeName(Name)); - getTargetStreamer()->emitImportModule(Sym, Name); - } - if (F.hasFnAttribute("wasm-import-name")) { - // If this is a converted Emscripten EH/SjLj symbol, we shouldn't use - // the original function name but the converted symbol name. - StringRef Name = - InvokeDetected - ? Sym->getName() - : F.getFnAttribute("wasm-import-name").getValueAsString(); - Sym->setImportName(storeName(Name)); - getTargetStreamer()->emitImportName(Sym, Name); - } + if (F.hasFnAttribute("wasm-import-module")) { + StringRef Name = + F.getFnAttribute("wasm-import-module").getValueAsString(); + Sym->setImportModule(storeName(Name)); + getTargetStreamer()->emitImportModule(Sym, Name); + } + if (F.hasFnAttribute("wasm-import-name")) { + // If this is a converted Emscripten EH/SjLj symbol, we shouldn't use + // the original function name but the converted symbol name. + StringRef Name = + InvokeDetected + ? Sym->getName() + : F.getFnAttribute("wasm-import-name").getValueAsString(); + Sym->setImportName(storeName(Name)); + getTargetStreamer()->emitImportName(Sym, Name); } if (F.hasFnAttribute("wasm-export-name")) { @@ -362,9 +384,12 @@ void WebAssemblyAsmPrinter::emitExternalDecls(const Module &M) { } } } - + void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) { - emitExternalDecls(M); + // This is required to emit external declarations (like .functypes) when + // no functions are defined in the compilation unit and therefore, + // emitDecls() is not called until now. + emitDecls(M); // When a function's address is taken, a TABLE_INDEX relocation is emitted // against the function symbol at the use site. However the relocation @@ -401,13 +426,13 @@ void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) { if (!Name || !Contents) continue; - OutStreamer->PushSection(); + OutStreamer->pushSection(); std::string SectionName = (".custom_section." + Name->getString()).str(); MCSectionWasm *MySection = OutContext.getWasmSection(SectionName, SectionKind::getMetadata()); - OutStreamer->SwitchSection(MySection); + OutStreamer->switchSection(MySection); OutStreamer->emitBytes(Contents->getString()); - OutStreamer->PopSection(); + OutStreamer->popSection(); } } @@ -445,8 +470,8 @@ void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) { if (FieldCount != 0) { MCSectionWasm *Producers = OutContext.getWasmSection( ".custom_section.producers", SectionKind::getMetadata()); - OutStreamer->PushSection(); - OutStreamer->SwitchSection(Producers); + OutStreamer->pushSection(); + OutStreamer->switchSection(Producers); OutStreamer->emitULEB128IntValue(FieldCount); for (auto &Producers : {std::make_pair("language", &Languages), std::make_pair("processed-by", &Tools)}) { @@ -462,7 +487,7 @@ void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) { OutStreamer->emitBytes(Producer.second); } } - OutStreamer->PopSection(); + OutStreamer->popSection(); } } @@ -518,8 +543,8 @@ void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) { // Emit features and linkage policies into the "target_features" section MCSectionWasm *FeaturesSection = OutContext.getWasmSection( ".custom_section.target_features", SectionKind::getMetadata()); - OutStreamer->PushSection(); - OutStreamer->SwitchSection(FeaturesSection); + OutStreamer->pushSection(); + OutStreamer->switchSection(FeaturesSection); OutStreamer->emitULEB128IntValue(EmittedFeatures.size()); for (auto &F : EmittedFeatures) { @@ -528,10 +553,11 @@ void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) { OutStreamer->emitBytes(F.Name); } - OutStreamer->PopSection(); + OutStreamer->popSection(); } void WebAssemblyAsmPrinter::emitConstantPool() { + emitDecls(*MMI->getModule()); assert(MF->getConstantPool()->getConstants().empty() && "WebAssembly disables constant pools"); } @@ -540,17 +566,6 @@ void WebAssemblyAsmPrinter::emitJumpTableInfo() { // Nothing to do; jump tables are incorporated into the instruction stream. } -void WebAssemblyAsmPrinter::emitLinkage(const GlobalValue *GV, MCSymbol *Sym) - const { - AsmPrinter::emitLinkage(GV, Sym); - // This gets called before the function label and type are emitted. - // We use it to emit signatures of external functions. - // FIXME casts! - const_cast(this) - ->emitExternalDecls(*MMI->getModule()); -} - - void WebAssemblyAsmPrinter::emitFunctionBodyStart() { const Function &F = MF->getFunction(); SmallVector ResultVTs; @@ -612,7 +627,7 @@ void WebAssemblyAsmPrinter::emitInstruction(const MachineInstr *MI) { // function body. if (isVerbose()) { OutStreamer->AddComment("fallthrough-return"); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } break; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h index 6b2f2000a0bd..65d6ee415180 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h @@ -66,10 +66,10 @@ public: void emitEndOfAsmFile(Module &M) override; void EmitProducerInfo(Module &M); void EmitTargetFeatures(Module &M); + void emitSymbolType(const MCSymbolWasm *Sym); void emitGlobalVariable(const GlobalVariable *GV) override; void emitJumpTableInfo() override; void emitConstantPool() override; - void emitLinkage(const GlobalValue *, MCSymbol *) const override; void emitFunctionBodyStart() override; void emitInstruction(const MachineInstr *MI) override; bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -84,7 +84,7 @@ public: wasm::WasmSignature *Sig, bool &InvokeDetected); MCSymbol *getOrCreateWasmSymbol(StringRef Name); - void emitExternalDecls(const Module &M); + void emitDecls(const Module &M); }; } // end namespace llvm diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 17e867e4c7d8..02e873a0f9a6 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -1716,7 +1716,7 @@ void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) { // Rewrite MBB operands to be depth immediates. SmallVector Ops(MI.operands()); while (MI.getNumOperands() > 0) - MI.RemoveOperand(MI.getNumOperands() - 1); + MI.removeOperand(MI.getNumOperands() - 1); for (auto MO : Ops) { if (MO.isMBB()) { if (MI.getOpcode() == WebAssembly::DELEGATE) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp index b94981245f8b..81fe5395a6de 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp @@ -14,6 +14,7 @@ #include "WebAssemblyExceptionInfo.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "Utils/WebAssemblyUtilities.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp index 5bdec89f1125..fa5b4a508fa5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp @@ -130,7 +130,7 @@ MachineBasicBlock *fixBrTableDefault(MachineInstr &MI, MachineBasicBlock *MBB, return nullptr; // Remove the dummy default target and install the real one. - MI.RemoveOperand(MI.getNumExplicitOperands() - 1); + MI.removeOperand(MI.getNumExplicitOperands() - 1); MI.addOperand(MF, MachineOperand::CreateMBB(TBB)); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp index 1ceae59dc993..83e71d731bfa 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -55,6 +55,7 @@ #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" #include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -221,10 +222,8 @@ private: assert(!Enterers.count(MBB)); if (Blocks.insert(MBB).second) { for (auto *Pred : MBB->predecessors()) { - if (!AddedToWorkList.count(Pred)) { + if (AddedToWorkList.insert(Pred).second) WorkList.push_back(Pred); - AddedToWorkList.insert(Pred); - } } } } @@ -491,6 +490,46 @@ FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() { return new WebAssemblyFixIrreducibleControlFlow(); } +// Test whether the given register has an ARGUMENT def. +static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) { + for (const auto &Def : MRI.def_instructions(Reg)) + if (WebAssembly::isArgument(Def.getOpcode())) + return true; + return false; +} + +// Add a register definition with IMPLICIT_DEFs for every register to cover for +// register uses that don't have defs in every possible path. +// TODO: This is fairly heavy-handed; find a better approach. +static void addImplicitDefs(MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const auto &TII = *MF.getSubtarget().getInstrInfo(); + MachineBasicBlock &Entry = *MF.begin(); + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { + Register Reg = Register::index2VirtReg(I); + + // Skip unused registers. + if (MRI.use_nodbg_empty(Reg)) + continue; + + // Skip registers that have an ARGUMENT definition. + if (hasArgumentDef(Reg, MRI)) + continue; + + BuildMI(Entry, Entry.begin(), DebugLoc(), + TII.get(WebAssembly::IMPLICIT_DEF), Reg); + } + + // Move ARGUMENT_* instructions to the top of the entry block, so that their + // liveness reflects the fact that these really are live-in values. + for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) { + if (WebAssembly::isArgument(MI.getOpcode())) { + MI.removeFromParent(); + Entry.insert(Entry.begin(), &MI); + } + } +} + bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction( MachineFunction &MF) { LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n" @@ -505,8 +544,15 @@ bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction( if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) { // We rewrote part of the function; recompute relevant things. - MF.getRegInfo().invalidateLiveness(); MF.RenumberBlocks(); + // Now we've inserted dispatch blocks, some register uses can have incoming + // paths without a def. For example, before this pass register %a was + // defined in BB1 and used in BB2, and there was only one path from BB1 and + // BB2. But if this pass inserts a dispatch block having multiple + // predecessors between the two BBs, now there are paths to BB2 without + // visiting BB1, and %a's use in BB2 is not dominated by its def. Adding + // IMPLICIT_DEFs to all regs is one simple way to fix it. + addImplicitDefs(MF); return true; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index a221f37cfd94..2636acaf1604 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -19,6 +19,8 @@ #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -159,22 +161,17 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setTargetDAGCombine(ISD::VECTOR_SHUFFLE); // Combine extends of extract_subvectors into widening ops - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}); // Combine int_to_fp or fp_extend of extract_vectors and vice versa into // conversions ops - setTargetDAGCombine(ISD::SINT_TO_FP); - setTargetDAGCombine(ISD::UINT_TO_FP); - setTargetDAGCombine(ISD::FP_EXTEND); - setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR); + setTargetDAGCombine({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_EXTEND, + ISD::EXTRACT_SUBVECTOR}); // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa // into conversion ops - setTargetDAGCombine(ISD::FP_TO_SINT_SAT); - setTargetDAGCombine(ISD::FP_TO_UINT_SAT); - setTargetDAGCombine(ISD::FP_ROUND); - setTargetDAGCombine(ISD::CONCAT_VECTORS); + setTargetDAGCombine({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, + ISD::FP_ROUND, ISD::CONCAT_VECTORS}); setTargetDAGCombine(ISD::TRUNCATE); @@ -577,7 +574,7 @@ LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, // Move the function pointer to the end of the arguments for indirect calls if (IsIndirect) { auto FnPtr = CallParams.getOperand(0); - CallParams.RemoveOperand(0); + CallParams.removeOperand(0); // For funcrefs, call_indirect is done through __funcref_call_table and the // funcref is always installed in slot 0 of the table, therefore instead of having @@ -909,6 +906,30 @@ WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const { return TargetLoweringBase::getPreferredVectorAction(VT); } +bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts( + SDValue Op, const TargetLoweringOpt &TLO) const { + // ISel process runs DAGCombiner after legalization; this step is called + // SelectionDAG optimization phase. This post-legalization combining process + // runs DAGCombiner on each node, and if there was a change to be made, + // re-runs legalization again on it and its user nodes to make sure + // everythiing is in a legalized state. + // + // The legalization calls lowering routines, and we do our custom lowering for + // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements + // into zeros. But there is a set of routines in DAGCombiner that turns unused + // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts + // turns unused vector elements into undefs. But this routine does not work + // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This + // combination can result in a infinite loop, in which undefs are converted to + // zeros in legalization and back to undefs in combining. + // + // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from + // running for build_vectors. + if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys) + return false; + return true; +} + //===----------------------------------------------------------------------===// // WebAssembly Lowering private implementation. //===----------------------------------------------------------------------===// @@ -2110,8 +2131,7 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, auto GetMostCommon = [](auto &Counts) { auto CommonIt = - std::max_element(Counts.begin(), Counts.end(), - [](auto A, auto B) { return A.second < B.second; }); + std::max_element(Counts.begin(), Counts.end(), llvm::less_second()); assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector"); return *CommonIt; }; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index f7b460f61dbb..d86f2e59e3d2 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -113,6 +113,10 @@ private: report_fatal_error("llvm.clear_cache is not supported on wasm"); } + bool + shouldSimplifyDemandedVectorElts(SDValue Op, + const TargetLoweringOpt &TLO) const override; + // Custom lowering hooks. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td index 42183d1645e1..ed80ed39f09c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -15,7 +15,7 @@ let UseNamedOperandTable = 1 in multiclass ATOMIC_I pattern_r, string asmstr_r, string asmstr_s, bits<32> atomic_op, - string is64 = "false"> { + bit is64 = false> { defm "" : I, Requires<[HasAtomics]>; @@ -38,13 +38,13 @@ defm MEMORY_ATOMIC_NOTIFY_A32 : (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$count), (outs), (ins P2Align:$p2align, offset32_op:$off), [], "memory.atomic.notify \t$dst, ${off}(${addr})${p2align}, $count", - "memory.atomic.notify \t${off}${p2align}", 0x00, "false">; + "memory.atomic.notify \t${off}${p2align}", 0x00, false>; defm MEMORY_ATOMIC_NOTIFY_A64 : ATOMIC_I<(outs I32:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, I32:$count), (outs), (ins P2Align:$p2align, offset64_op:$off), [], "memory.atomic.notify \t$dst, ${off}(${addr})${p2align}, $count", - "memory.atomic.notify \t${off}${p2align}", 0x00, "true">; + "memory.atomic.notify \t${off}${p2align}", 0x00, true>; let mayLoad = 1 in { defm MEMORY_ATOMIC_WAIT32_A32 : ATOMIC_I<(outs I32:$dst), @@ -52,28 +52,28 @@ defm MEMORY_ATOMIC_WAIT32_A32 : I64:$timeout), (outs), (ins P2Align:$p2align, offset32_op:$off), [], "memory.atomic.wait32 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", - "memory.atomic.wait32 \t${off}${p2align}", 0x01, "false">; + "memory.atomic.wait32 \t${off}${p2align}", 0x01, false>; defm MEMORY_ATOMIC_WAIT32_A64 : ATOMIC_I<(outs I32:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, I32:$exp, I64:$timeout), (outs), (ins P2Align:$p2align, offset64_op:$off), [], "memory.atomic.wait32 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", - "memory.atomic.wait32 \t${off}${p2align}", 0x01, "true">; + "memory.atomic.wait32 \t${off}${p2align}", 0x01, true>; defm MEMORY_ATOMIC_WAIT64_A32 : ATOMIC_I<(outs I32:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I64:$exp, I64:$timeout), (outs), (ins P2Align:$p2align, offset32_op:$off), [], "memory.atomic.wait64 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", - "memory.atomic.wait64 \t${off}${p2align}", 0x02, "false">; + "memory.atomic.wait64 \t${off}${p2align}", 0x02, false>; defm MEMORY_ATOMIC_WAIT64_A64 : ATOMIC_I<(outs I32:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, I64:$exp, I64:$timeout), (outs), (ins P2Align:$p2align, offset64_op:$off), [], "memory.atomic.wait64 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", - "memory.atomic.wait64 \t${off}${p2align}", 0x02, "true">; + "memory.atomic.wait64 \t${off}${p2align}", 0x02, true>; } // mayLoad = 1 } // hasSideEffects = 1 @@ -469,13 +469,13 @@ multiclass WebAssemblyBinRMW; + !strconcat(name, "\t${off}${p2align}"), atomic_op, false>; defm "_A64" : ATOMIC_I<(outs rc:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, rc:$val), (outs), (ins P2Align:$p2align, offset64_op:$off), [], !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $val"), - !strconcat(name, "\t${off}${p2align}"), atomic_op, "true">; + !strconcat(name, "\t${off}${p2align}"), atomic_op, true>; } defm ATOMIC_RMW_ADD_I32 : WebAssemblyBinRMW; @@ -767,14 +767,14 @@ multiclass WebAssemblyTerRMW; + !strconcat(name, "\t${off}${p2align}"), atomic_op, false>; defm "_A64" : ATOMIC_I<(outs rc:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, rc:$exp, rc:$new_), (outs), (ins P2Align:$p2align, offset64_op:$off), [], !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"), - !strconcat(name, "\t${off}${p2align}"), atomic_op, "true">; + !strconcat(name, "\t${off}${p2align}"), atomic_op, true>; } defm ATOMIC_RMW_CMPXCHG_I32 : diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td index 4dc0c9a46c38..f2e73dd19d6b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td @@ -14,12 +14,12 @@ // WebAssembly Instruction Format. // We instantiate 2 of these for every actual instruction (register based // and stack based), see below. -class WebAssemblyInst inst, string asmstr, string stack, string is64> +class WebAssemblyInst inst, string asmstr, bit stack, bit is64> : StackRel, RegisterRel, Wasm64Rel, Instruction { bits<32> Inst = inst; // Instruction encoding. - string StackBased = stack; + bit StackBased = stack; string BaseName = NAME; - string IsWasm64 = is64; + bit IsWasm64 = is64; string Wasm32Name = !subst("_A64", "_A32", NAME); let Namespace = "WebAssembly"; let Pattern = []; @@ -30,8 +30,8 @@ class WebAssemblyInst inst, string asmstr, string stack, string is64> } // Normal instructions. Default instantiation of a WebAssemblyInst. -class NI pattern, string stack, - string asmstr = "", bits<32> inst = -1, string is64 = "false"> +class NI pattern, bit stack, + string asmstr = "", bits<32> inst = -1, bit is64 = false> : WebAssemblyInst { dag OutOperandList = oops; dag InOperandList = iops; @@ -54,11 +54,11 @@ class NI pattern, string stack, // there is always an equivalent pair of instructions. multiclass I pattern_r, string asmstr_r = "", string asmstr_s = "", - bits<32> inst = -1, string is64 = "false"> { + bits<32> inst = -1, bit is64 = false> { let isCodeGenOnly = 1 in - def "" : NI; + def "" : NI; let BaseName = NAME in - def _S : NI; + def _S : NI; } // For instructions that have no register ops, so both sets are the same. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 3fb0af1d47a0..134a0efc6822 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -66,6 +66,10 @@ def HasReferenceTypes : Predicate<"Subtarget->hasReferenceTypes()">, AssemblerPredicate<(all_of FeatureReferenceTypes), "reference-types">; +def HasExtendedConst : + Predicate<"Subtarget->hasExtendedConst()">, + AssemblerPredicate<(all_of FeatureExtendedConst), "extended-const">; + //===----------------------------------------------------------------------===// // WebAssembly-specific DAG Node Types. //===----------------------------------------------------------------------===// @@ -221,8 +225,8 @@ def getStackOpcode : InstrMapping { let FilterClass = "StackRel"; let RowFields = ["BaseName"]; let ColFields = ["StackBased"]; - let KeyCol = ["false"]; - let ValueCols = [["true"]]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; } //===----------------------------------------------------------------------===// @@ -234,8 +238,8 @@ def getRegisterOpcode : InstrMapping { let FilterClass = "RegisterRel"; let RowFields = ["BaseName"]; let ColFields = ["StackBased"]; - let KeyCol = ["true"]; - let ValueCols = [["false"]]; + let KeyCol = ["1"]; + let ValueCols = [["0"]]; } //===----------------------------------------------------------------------===// @@ -247,8 +251,8 @@ def getWasm64Opcode : InstrMapping { let FilterClass = "Wasm64Rel"; let RowFields = ["Wasm32Name"]; let ColFields = ["IsWasm64"]; - let KeyCol = ["false"]; - let ValueCols = [["true"]]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index a70f62dde845..d5bb9e9e48b4 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -47,13 +47,13 @@ multiclass WebAssemblyLoad, + !strconcat(Name, "\t${off}${p2align}"), Opcode, false>, Requires; defm "_A64": I<(outs rc:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), (outs), (ins P2Align:$p2align, offset64_op:$off), [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"), - !strconcat(Name, "\t${off}${p2align}"), Opcode, "true">, + !strconcat(Name, "\t${off}${p2align}"), Opcode, true>, Requires; } } @@ -244,7 +244,7 @@ multiclass WebAssemblyStore, + !strconcat(Name, "\t${off}${p2align}"), Opcode, false>, Requires; let mayStore = 1, UseNamedOperandTable = 1 in defm "_A64" : I<(outs), @@ -252,7 +252,7 @@ multiclass WebAssemblyStore, + !strconcat(Name, "\t${off}${p2align}"), Opcode, true>, Requires; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td index 76a88caafc47..608963d58863 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td @@ -27,6 +27,12 @@ multiclass REF_I { vt#".select\t$dst, $lhs, $rhs, $cond", vt#".select", 0x1b>, Requires<[HasReferenceTypes]>; + defm REF_IS_NULL_#rc + : I<(outs I32:$dst), (ins rc:$ref), (outs), (ins), + [(set I32:$dst, (!cast("int_wasm_ref_is_null_" # ht) rc:$ref))], + "ref.is_null\t$ref", + "ref.is_null", 0xd1>, + Requires<[HasReferenceTypes]>; } defm "" : REF_I; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 5bb12c7fbdc7..ed3cc7ed1c53 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1229,9 +1229,9 @@ def trunc_sat_zero_s : SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_S", trunc_sat_zero_t>; def trunc_sat_zero_u : SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_U", trunc_sat_zero_t>; -defm "" : SIMDConvert; -defm "" : SIMDConvert; // Integer to floating point: convert @@ -1307,7 +1307,7 @@ defm "" : SIMDConvert, SDTCisVec<1>]>; def demote_zero : SDNode<"WebAssemblyISD::DEMOTE_ZERO", demote_t>; defm "" : SIMDConvert; + "demote_f64x2_zero", 0x5e>; def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>; @@ -1334,7 +1334,37 @@ defm Q15MULR_SAT_S : SIMDBinary; //===----------------------------------------------------------------------===// -// Fused Multiply- Add and Subtract (FMA/FMS) +// Relaxed swizzle +//===----------------------------------------------------------------------===// + +defm RELAXED_SWIZZLE : + RELAXED_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), + [(set (v16i8 V128:$dst), + (int_wasm_relaxed_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], + "i8x16.relaxed_swizzle\t$dst, $src, $mask", "i8x16.relaxed_swizzle", 0x100>; + +//===----------------------------------------------------------------------===// +// Relaxed floating-point to int conversions +//===----------------------------------------------------------------------===// + +multiclass RelaxedConvert simdop> { + defm op#_#vec : + RELAXED_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), + [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))], + vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>; +} + +defm "" : RelaxedConvert; +defm "" : RelaxedConvert; +defm "" : RelaxedConvert; +defm "" : RelaxedConvert; + +//===----------------------------------------------------------------------===// +// Relaxed Fused Multiply- Add and Subtract (FMA/FMS) //===----------------------------------------------------------------------===// multiclass SIMDFM simdopA, bits<32> simdopS> { @@ -1342,16 +1372,18 @@ multiclass SIMDFM simdopA, bits<32> simdopS> { RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), [(set (vec.vt V128:$dst), (int_wasm_fma (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], - vec.prefix#".fma\t$dst, $a, $b, $c", vec.prefix#".fma", simdopA>; + vec.prefix#".relaxed_fma\t$dst, $a, $b, $c", + vec.prefix#".relaxed_fma", simdopA>; defm FMS_#vec : RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), [(set (vec.vt V128:$dst), (int_wasm_fms (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], - vec.prefix#".fms\t$dst, $a, $b, $c", vec.prefix#".fms", simdopS>; + vec.prefix#".relaxed_fms\t$dst, $a, $b, $c", + vec.prefix#".relaxed_fms", simdopS>; } -defm "" : SIMDFM; -defm "" : SIMDFM; +defm "" : SIMDFM; +defm "" : SIMDFM; //===----------------------------------------------------------------------===// // Laneselect @@ -1362,58 +1394,61 @@ multiclass SIMDLANESELECT op> { RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), [(set (vec.vt V128:$dst), (int_wasm_laneselect (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], - vec.prefix#".laneselect\t$dst, $a, $b, $c", vec.prefix#".laneselect", op>; + vec.prefix#".relaxed_laneselect\t$dst, $a, $b, $c", + vec.prefix#".relaxed_laneselect", op>; } -defm "" : SIMDLANESELECT; -defm "" : SIMDLANESELECT; -defm "" : SIMDLANESELECT; -defm "" : SIMDLANESELECT; - - -//===----------------------------------------------------------------------===// -// Relaxed swizzle -//===----------------------------------------------------------------------===// - -defm RELAXED_SWIZZLE : - RELAXED_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), - [(set (v16i8 V128:$dst), - (int_wasm_relaxed_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], - "i8x16.relaxed_swizzle\t$dst, $src, $mask", "i8x16.relaxed_swizzle", 162>; +defm "" : SIMDLANESELECT; +defm "" : SIMDLANESELECT; +defm "" : SIMDLANESELECT; +defm "" : SIMDLANESELECT; //===----------------------------------------------------------------------===// // Relaxed floating-point min and max. //===----------------------------------------------------------------------===// -multiclass SIMD_RELAXED_FMINMAX simdopMin, bits<32> simdopMax> { - defm RELAXED_FMIN_#vec : - RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b), (outs), (ins), - [(set (vec.vt V128:$dst), (int_wasm_relaxed_min - (vec.vt V128:$a), (vec.vt V128:$b)))], - vec.prefix#".relaxed_min\t$dst, $a, $b", vec.prefix#".relaxed_min", simdopMin>; - defm RELAXED_FMAX_#vec : - RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b), (outs), (ins), - [(set (vec.vt V128:$dst), (int_wasm_relaxed_max - (vec.vt V128:$a), (vec.vt V128:$b)))], - vec.prefix#".relaxed_max\t$dst, $a, $b", vec.prefix#".relaxed_max", simdopMax>; +multiclass RelaxedBinary simdop> { + defm _#vec : RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), + (outs), (ins), + [(set (vec.vt V128:$dst), + (node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))], + vec.prefix#"."#name#"\t$dst, $lhs, $rhs", + vec.prefix#"."#name, simdop>; } -defm "" : SIMD_RELAXED_FMINMAX; -defm "" : SIMD_RELAXED_FMINMAX; +defm SIMD_RELAXED_FMIN : + RelaxedBinary; +defm SIMD_RELAXED_FMAX : + RelaxedBinary; +defm SIMD_RELAXED_FMIN : + RelaxedBinary; +defm SIMD_RELAXED_FMAX : + RelaxedBinary; //===----------------------------------------------------------------------===// -// Relaxed floating-point to int conversions +// Relaxed rounding q15 multiplication //===----------------------------------------------------------------------===// -multiclass SIMD_RELAXED_CONVERT simdop> { - defm op#_#vec : - RELAXED_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), - [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))], - vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>; -} +defm RELAXED_Q15MULR_S : + RelaxedBinary; -defm "" : SIMD_RELAXED_CONVERT; -defm "" : SIMD_RELAXED_CONVERT; +//===----------------------------------------------------------------------===// +// Relaxed integer dot product +//===----------------------------------------------------------------------===// -defm "" : SIMD_RELAXED_CONVERT; -defm "" : SIMD_RELAXED_CONVERT; +defm RELAXED_DOT : + RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), + [(set (v8i16 V128:$dst), (int_wasm_dot_i8x16_i7x16_signed + (v16i8 V128:$lhs), (v16i8 V128:$rhs)))], + "i16x8.dot_i8x16_i7x16_s\t$dst, $lhs, $rhs", + "i16x8.dot_i8x16_i7x16_s", 0x112>; + +defm RELAXED_DOT_ADD : + RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc), + (outs), (ins), + [(set (v4i32 V128:$dst), (int_wasm_dot_i8x16_i7x16_add_signed + (v16i8 V128:$lhs), (v16i8 V128:$rhs), (v4i32 V128:$acc)))], + "i32x4.dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc", + "i32x4.dot_i8x16_i7x16_add_s", 0x113>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp index 309fcaf340eb..d16bb6b6648a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp @@ -16,6 +16,7 @@ #include "WebAssembly.h" #include "WebAssemblySubtarget.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/MC/MCAsmInfo.h" @@ -72,9 +73,8 @@ WebAssemblyLateEHPrepare::getMatchingEHPad(MachineInstr *MI) { MachineBasicBlock *EHPad = nullptr; while (!WL.empty()) { MachineBasicBlock *MBB = WL.pop_back_val(); - if (Visited.count(MBB)) + if (!Visited.insert(MBB).second) continue; - Visited.insert(MBB); if (MBB->isEHPad()) { if (EHPad && EHPad != MBB) return nullptr; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index b6c43be03aba..2db4bd822349 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -406,8 +406,9 @@ static bool canThrow(const Value *V) { return true; } -// Get a global variable with the given name. If it doesn't exist declare it, -// which will generate an import and assume that it will exist at link time. +// Get a thread-local global variable with the given name. If it doesn't exist +// declare it, which will generate an import and assume that it will exist at +// link time. static GlobalVariable *getGlobalVariable(Module &M, Type *Ty, WebAssemblyTargetMachine &TM, const char *Name) { @@ -415,16 +416,11 @@ static GlobalVariable *getGlobalVariable(Module &M, Type *Ty, if (!GV) report_fatal_error(Twine("unable to create global: ") + Name); - // If the target supports TLS, make this variable thread-local. We can't just - // unconditionally make it thread-local and depend on - // CoalesceFeaturesAndStripAtomics to downgrade it, because stripping TLS has - // the side effect of disallowing the object from being linked into a - // shared-memory module, which we don't want to be responsible for. - auto *Subtarget = TM.getSubtargetImpl(); - auto TLS = Subtarget->hasAtomics() && Subtarget->hasBulkMemory() - ? GlobalValue::LocalExecTLSModel - : GlobalValue::NotThreadLocal; - GV->setThreadLocalMode(TLS); + // Variables created by this function are thread local. If the target does not + // support TLS, we depend on CoalesceFeaturesAndStripAtomics to downgrade it + // to non-thread-local ones, in which case we don't allow this object to be + // linked with other objects using shared memory. + GV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel); return GV; } @@ -556,7 +552,7 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) { Optional NEltArg; std::tie(SizeArg, NEltArg) = FnAttrs.getAllocSizeArgs(); SizeArg += 1; - if (NEltArg.hasValue()) + if (NEltArg) NEltArg = NEltArg.getValue() + 1; FnAttrs.addAllocSizeAttr(SizeArg, NEltArg); } @@ -1064,22 +1060,16 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { nullifySetjmp(F); } - if (!Changed) { - // Delete unused global variables and functions - if (ResumeF) - ResumeF->eraseFromParent(); - if (EHTypeIDF) - EHTypeIDF->eraseFromParent(); - if (EmLongjmpF) - EmLongjmpF->eraseFromParent(); - if (SaveSetjmpF) - SaveSetjmpF->eraseFromParent(); - if (TestSetjmpF) - TestSetjmpF->eraseFromParent(); - return false; - } + // Delete unused global variables and functions + for (auto *V : {ThrewGV, ThrewValueGV}) + if (V && V->use_empty()) + V->eraseFromParent(); + for (auto *V : {GetTempRet0F, SetTempRet0F, ResumeF, EHTypeIDF, EmLongjmpF, + SaveSetjmpF, TestSetjmpF, WasmLongjmpF, CatchF}) + if (V && V->use_empty()) + V->eraseFromParent(); - return true; + return Changed; } bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { @@ -1324,9 +1314,14 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { BasicBlock *BB = CB->getParent(); if (BB->getParent() != &F) // in other function continue; - if (CB->getOperandBundle(LLVMContext::OB_funclet)) - report_fatal_error( - "setjmp within a catch clause is not supported in Wasm EH"); + if (CB->getOperandBundle(LLVMContext::OB_funclet)) { + std::string S; + raw_string_ostream SS(S); + SS << "In function " + F.getName() + + ": setjmp within a catch clause is not supported in Wasm EH:\n"; + SS << *CB; + report_fatal_error(StringRef(SS.str())); + } CallInst *CI = nullptr; // setjmp cannot throw. So if it is an invoke, lower it to a call @@ -1502,10 +1497,16 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForEmscriptenSjLj( for (unsigned I = 0; I < BBs.size(); I++) { BasicBlock *BB = BBs[I]; for (Instruction &I : *BB) { - if (isa(&I)) - report_fatal_error("When using Wasm EH with Emscripten SjLj, there is " - "a restriction that `setjmp` function call and " - "exception cannot be used within the same function"); + if (isa(&I)) { + std::string S; + raw_string_ostream SS(S); + SS << "In function " << F.getName() + << ": When using Wasm EH with Emscripten SjLj, there is a " + "restriction that `setjmp` function call and exception cannot be " + "used within the same function:\n"; + SS << I; + report_fatal_error(StringRef(SS.str())); + } auto *CI = dyn_cast(&I); if (!CI) continue; @@ -1829,7 +1830,8 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj( if (auto *CPI = dyn_cast(FromPad)) { UnwindDest = CPI->getCatchSwitch()->getUnwindDest(); break; - } else if (auto *CPI = dyn_cast(FromPad)) { + } + if (auto *CPI = dyn_cast(FromPad)) { // getCleanupRetUnwindDest() can return nullptr when // 1. This cleanuppad's matching cleanupret uwninds to caller // 2. There is no matching cleanupret because it ends with diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp deleted file mode 100644 index ca6f3f194645..000000000000 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp +++ /dev/null @@ -1,210 +0,0 @@ -//===-- WebAssemblyLowerGlobalDtors.cpp - Lower @llvm.global_dtors --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Lower @llvm.global_dtors. -/// -/// WebAssembly doesn't have a builtin way to invoke static destructors. -/// Implement @llvm.global_dtors by creating wrapper functions that are -/// registered in @llvm.global_ctors and which contain a call to -/// `__cxa_atexit` to register their destructor functions. -/// -//===----------------------------------------------------------------------===// - -#include "WebAssembly.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" -#include - -using namespace llvm; - -#define DEBUG_TYPE "wasm-lower-global-dtors" - -namespace { -class LowerGlobalDtors final : public ModulePass { - StringRef getPassName() const override { - return "WebAssembly Lower @llvm.global_dtors"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - ModulePass::getAnalysisUsage(AU); - } - - bool runOnModule(Module &M) override; - -public: - static char ID; - LowerGlobalDtors() : ModulePass(ID) {} -}; -} // End anonymous namespace - -char LowerGlobalDtors::ID = 0; -INITIALIZE_PASS(LowerGlobalDtors, DEBUG_TYPE, - "Lower @llvm.global_dtors for WebAssembly", false, false) - -ModulePass *llvm::createWebAssemblyLowerGlobalDtors() { - return new LowerGlobalDtors(); -} - -bool LowerGlobalDtors::runOnModule(Module &M) { - LLVM_DEBUG(dbgs() << "********** Lower Global Destructors **********\n"); - - GlobalVariable *GV = M.getGlobalVariable("llvm.global_dtors"); - if (!GV || !GV->hasInitializer()) - return false; - - const ConstantArray *InitList = dyn_cast(GV->getInitializer()); - if (!InitList) - return false; - - // Validate @llvm.global_dtor's type. - auto *ETy = dyn_cast(InitList->getType()->getElementType()); - if (!ETy || ETy->getNumElements() != 3 || - !ETy->getTypeAtIndex(0U)->isIntegerTy() || - !ETy->getTypeAtIndex(1U)->isPointerTy() || - !ETy->getTypeAtIndex(2U)->isPointerTy()) - return false; // Not (int, ptr, ptr). - - // Collect the contents of @llvm.global_dtors, ordered by priority. Within a - // priority, sequences of destructors with the same associated object are - // recorded so that we can register them as a group. - std::map< - uint16_t, - std::vector>> - > DtorFuncs; - for (Value *O : InitList->operands()) { - auto *CS = dyn_cast(O); - if (!CS) - continue; // Malformed. - - auto *Priority = dyn_cast(CS->getOperand(0)); - if (!Priority) - continue; // Malformed. - uint16_t PriorityValue = Priority->getLimitedValue(UINT16_MAX); - - Constant *DtorFunc = CS->getOperand(1); - if (DtorFunc->isNullValue()) - break; // Found a null terminator, skip the rest. - - Constant *Associated = CS->getOperand(2); - Associated = cast(Associated->stripPointerCasts()); - - auto &AtThisPriority = DtorFuncs[PriorityValue]; - if (AtThisPriority.empty() || AtThisPriority.back().first != Associated) { - std::vector NewList; - NewList.push_back(DtorFunc); - AtThisPriority.push_back(std::make_pair(Associated, NewList)); - } else { - AtThisPriority.back().second.push_back(DtorFunc); - } - } - if (DtorFuncs.empty()) - return false; - - // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d); - LLVMContext &C = M.getContext(); - PointerType *VoidStar = Type::getInt8PtrTy(C); - Type *AtExitFuncArgs[] = {VoidStar}; - FunctionType *AtExitFuncTy = - FunctionType::get(Type::getVoidTy(C), AtExitFuncArgs, - /*isVarArg=*/false); - - FunctionCallee AtExit = M.getOrInsertFunction( - "__cxa_atexit", - FunctionType::get(Type::getInt32Ty(C), - {PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar}, - /*isVarArg=*/false)); - - // Declare __dso_local. - Constant *DsoHandle = M.getNamedValue("__dso_handle"); - if (!DsoHandle) { - Type *DsoHandleTy = Type::getInt8Ty(C); - GlobalVariable *Handle = new GlobalVariable( - M, DsoHandleTy, /*isConstant=*/true, - GlobalVariable::ExternalWeakLinkage, nullptr, "__dso_handle"); - Handle->setVisibility(GlobalVariable::HiddenVisibility); - DsoHandle = Handle; - } - - // For each unique priority level and associated symbol, generate a function - // to call all the destructors at that level, and a function to register the - // first function with __cxa_atexit. - for (auto &PriorityAndMore : DtorFuncs) { - uint16_t Priority = PriorityAndMore.first; - uint64_t Id = 0; - auto &AtThisPriority = PriorityAndMore.second; - for (auto &AssociatedAndMore : AtThisPriority) { - Constant *Associated = AssociatedAndMore.first; - auto ThisId = Id++; - - Function *CallDtors = Function::Create( - AtExitFuncTy, Function::PrivateLinkage, - "call_dtors" + - (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority)) - : Twine()) + - (AtThisPriority.size() > 1 ? Twine("$") + Twine(ThisId) - : Twine()) + - (!Associated->isNullValue() ? (Twine(".") + Associated->getName()) - : Twine()), - &M); - BasicBlock *BB = BasicBlock::Create(C, "body", CallDtors); - FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C), - /*isVarArg=*/false); - - for (auto Dtor : reverse(AssociatedAndMore.second)) - CallInst::Create(VoidVoid, Dtor, "", BB); - ReturnInst::Create(C, BB); - - Function *RegisterCallDtors = Function::Create( - VoidVoid, Function::PrivateLinkage, - "register_call_dtors" + - (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority)) - : Twine()) + - (AtThisPriority.size() > 1 ? Twine("$") + Twine(ThisId) - : Twine()) + - (!Associated->isNullValue() ? (Twine(".") + Associated->getName()) - : Twine()), - &M); - BasicBlock *EntryBB = BasicBlock::Create(C, "entry", RegisterCallDtors); - BasicBlock *FailBB = BasicBlock::Create(C, "fail", RegisterCallDtors); - BasicBlock *RetBB = BasicBlock::Create(C, "return", RegisterCallDtors); - - Value *Null = ConstantPointerNull::get(VoidStar); - Value *Args[] = {CallDtors, Null, DsoHandle}; - Value *Res = CallInst::Create(AtExit, Args, "call", EntryBB); - Value *Cmp = new ICmpInst(*EntryBB, ICmpInst::ICMP_NE, Res, - Constant::getNullValue(Res->getType())); - BranchInst::Create(FailBB, RetBB, Cmp, EntryBB); - - // If `__cxa_atexit` hits out-of-memory, trap, so that we don't misbehave. - // This should be very rare, because if the process is running out of - // memory before main has even started, something is wrong. - CallInst::Create(Intrinsic::getDeclaration(&M, Intrinsic::trap), "", - FailBB); - new UnreachableInst(C, FailBB); - - ReturnInst::Create(C, RetBB); - - // Now register the registration function with @llvm.global_ctors. - appendToGlobalCtors(M, RegisterCallDtors, Priority, Associated); - } - } - - // Now that we've lowered everything, remove @llvm.global_dtors. - GV->eraseFromParent(); - - return true; -} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp index 37ac8e75f4b7..21f6fd37d402 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp @@ -65,6 +65,9 @@ ModulePass *llvm::createWebAssemblyMCLowerPrePass() { // for all functions before AsmPrinter. If this way of doing things is ever // suboptimal, we could opt to make it a MachineFunctionPass and instead use // something like createBarrierNoopPass() to enforce ordering. +// +// The information stored here is essential for emitExternalDecls in the Wasm +// AsmPrinter bool WebAssemblyMCLowerPrePass::runOnModule(Module &M) { auto *MMIWP = getAnalysisIfAvailable(); if (!MMIWP) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp index ea80e96d50de..96284687971c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp @@ -24,6 +24,16 @@ using namespace llvm; WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() = default; // anchor. +MachineFunctionInfo *WebAssemblyFunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const { + WebAssemblyFunctionInfo *Clone = + DestMF.cloneInfo(*this); + Clone->MF = &DestMF; + return Clone; +} + void WebAssemblyFunctionInfo::initWARegs(MachineRegisterInfo &MRI) { assert(WARegs.empty()); unsigned Reg = UnusedReg; @@ -153,7 +163,7 @@ void WebAssemblyFunctionInfo::initializeBaseYamlFields( addResult(WebAssembly::parseMVT(VT.Value)); if (WasmEHInfo) { for (auto KV : YamlMFI.SrcToUnwindDest) - WasmEHInfo->setUnwindDest(MF.getBlockNumbered(KV.first), - MF.getBlockNumbered(KV.second)); + WasmEHInfo->setUnwindDest(MF->getBlockNumbered(KV.first), + MF->getBlockNumbered(KV.second)); } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h index 413d0d1dc554..619617049bb2 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -31,7 +31,7 @@ struct WebAssemblyFunctionInfo; /// This class is derived from MachineFunctionInfo and contains private /// WebAssembly-specific information for each MachineFunction. class WebAssemblyFunctionInfo final : public MachineFunctionInfo { - const MachineFunction &MF; + const MachineFunction *MF; std::vector Params; std::vector Results; @@ -70,11 +70,16 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo { WasmEHFuncInfo *WasmEHInfo = nullptr; public: - explicit WebAssemblyFunctionInfo(MachineFunction &MF) - : MF(MF), WasmEHInfo(MF.getWasmEHFuncInfo()) {} + explicit WebAssemblyFunctionInfo(MachineFunction &MF_) + : MF(&MF_), WasmEHInfo(MF_.getWasmEHFuncInfo()) {} ~WebAssemblyFunctionInfo() override; - const MachineFunction &getMachineFunction() const { return MF; } + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; + + const MachineFunction &getMachineFunction() const { return *MF; } void initializeBaseYamlFields(const yaml::WebAssemblyFunctionInfo &YamlMFI); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp index 62fa089a94d4..5d8c58dcc334 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp @@ -16,6 +16,7 @@ #include "WebAssembly.h" #include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; #define DEBUG_TYPE "wasm-nullify-dbg-value-lists" diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp index 6a6cac6d956f..d542ddb45c2e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp @@ -49,6 +49,11 @@ class WebAssemblyOptimizeLiveIntervals final : public MachineFunctionPass { MachineFunctionPass::getAnalysisUsage(AU); } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::TracksLiveness); + } + bool runOnMachineFunction(MachineFunction &MF) override; public: @@ -102,7 +107,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction( SplitLIs.clear(); } - // In PrepareForLiveIntervals, we conservatively inserted IMPLICIT_DEF + // In FixIrreducibleControlFlow, we conservatively inserted IMPLICIT_DEF // instructions to satisfy LiveIntervals' requirement that all uses be // dominated by defs. Now that LiveIntervals has computed which of these // defs are actually needed and which are dead, remove the dead ones. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp deleted file mode 100644 index 5682cadc1a64..000000000000 --- a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp +++ /dev/null @@ -1,126 +0,0 @@ -//===- WebAssemblyPrepareForLiveIntervals.cpp - Prepare for LiveIntervals -===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Fix up code to meet LiveInterval's requirements. -/// -/// Some CodeGen passes don't preserve LiveInterval's requirements, because -/// they run after register allocation and it isn't important. However, -/// WebAssembly runs LiveIntervals in a late pass. This pass transforms code -/// to meet LiveIntervals' requirements; primarily, it ensures that all -/// virtual register uses have definitions (IMPLICIT_DEF definitions if -/// nothing else). -/// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" -#include "Utils/WebAssemblyUtilities.h" -#include "WebAssembly.h" -#include "WebAssemblyMachineFunctionInfo.h" -#include "WebAssemblySubtarget.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -#define DEBUG_TYPE "wasm-prepare-for-live-intervals" - -namespace { -class WebAssemblyPrepareForLiveIntervals final : public MachineFunctionPass { -public: - static char ID; // Pass identification, replacement for typeid - WebAssemblyPrepareForLiveIntervals() : MachineFunctionPass(ID) {} - -private: - StringRef getPassName() const override { - return "WebAssembly Prepare For LiveIntervals"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - bool runOnMachineFunction(MachineFunction &MF) override; -}; -} // end anonymous namespace - -char WebAssemblyPrepareForLiveIntervals::ID = 0; -INITIALIZE_PASS(WebAssemblyPrepareForLiveIntervals, DEBUG_TYPE, - "Fix up code for LiveIntervals", false, false) - -FunctionPass *llvm::createWebAssemblyPrepareForLiveIntervals() { - return new WebAssemblyPrepareForLiveIntervals(); -} - -// Test whether the given register has an ARGUMENT def. -static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) { - for (const auto &Def : MRI.def_instructions(Reg)) - if (WebAssembly::isArgument(Def.getOpcode())) - return true; - return false; -} - -bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction( - MachineFunction &MF) { - LLVM_DEBUG({ - dbgs() << "********** Prepare For LiveIntervals **********\n" - << "********** Function: " << MF.getName() << '\n'; - }); - - bool Changed = false; - MachineRegisterInfo &MRI = MF.getRegInfo(); - const auto &TII = *MF.getSubtarget().getInstrInfo(); - MachineBasicBlock &Entry = *MF.begin(); - - assert(!mustPreserveAnalysisID(LiveIntervalsID) && - "LiveIntervals shouldn't be active yet!"); - - // We don't preserve SSA form. - MRI.leaveSSA(); - - // BranchFolding and perhaps other passes don't preserve IMPLICIT_DEF - // instructions. LiveIntervals requires that all paths to virtual register - // uses provide a definition. Insert IMPLICIT_DEFs in the entry block to - // conservatively satisfy this. - // - // TODO: This is fairly heavy-handed; find a better approach. - // - for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { - Register Reg = Register::index2VirtReg(I); - - // Skip unused registers. - if (MRI.use_nodbg_empty(Reg)) - continue; - - // Skip registers that have an ARGUMENT definition. - if (hasArgumentDef(Reg, MRI)) - continue; - - BuildMI(Entry, Entry.begin(), DebugLoc(), - TII.get(WebAssembly::IMPLICIT_DEF), Reg); - Changed = true; - } - - // Move ARGUMENT_* instructions to the top of the entry block, so that their - // liveness reflects the fact that these really are live-in values. - for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) { - if (WebAssembly::isArgument(MI.getOpcode())) { - MI.removeFromParent(); - Entry.insert(Entry.begin(), &MI); - } - } - - // Ok, we're now ready to run the LiveIntervals analysis again. - MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness); - - return Changed; -} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp index 71f0bd28e1be..1e2bee7a5c73 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp @@ -72,9 +72,6 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) { assert(!mustPreserveAnalysisID(LiveIntervalsID) && "LiveIntervals shouldn't be active yet!"); - // We don't preserve SSA or liveness. - MRI.leaveSSA(); - MRI.invalidateLiveness(); for (unsigned PReg = WebAssembly::NoRegister + 1; PReg < WebAssembly::NUM_TARGET_REGS; ++PReg) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp index 16e05150c64e..74af4c8873f7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp @@ -44,7 +44,7 @@ SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemmove( SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Val, - SDValue Size, Align Alignment, bool IsVolatile, + SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const { auto &ST = DAG.getMachineFunction().getSubtarget(); if (!ST.hasBulkMemory()) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h index f4d2132fd3af..fd517b238715 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h @@ -37,6 +37,7 @@ public: SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, Align Alignment, bool IsVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override; }; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h index b553c8150652..780694980523 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -48,6 +48,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool HasMutableGlobals = false; bool HasTailCall = false; bool HasReferenceTypes = false; + bool HasExtendedConst = false; /// What processor and OS we're targeting. Triple TargetTriple; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 482837178f3d..76f036358ae8 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -25,11 +25,12 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LowerAtomic.h" +#include "llvm/Transforms/Scalar/LowerAtomicPass.h" #include "llvm/Transforms/Utils.h" using namespace llvm; @@ -56,13 +57,12 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTarget() { auto &PR = *PassRegistry::getPassRegistry(); initializeWebAssemblyAddMissingPrototypesPass(PR); initializeWebAssemblyLowerEmscriptenEHSjLjPass(PR); - initializeLowerGlobalDtorsPass(PR); + initializeLowerGlobalDtorsLegacyPassPass(PR); initializeFixFunctionBitcastsPass(PR); initializeOptimizeReturnedPass(PR); initializeWebAssemblyArgumentMovePass(PR); initializeWebAssemblySetP2AlignOperandsPass(PR); initializeWebAssemblyReplacePhysRegsPass(PR); - initializeWebAssemblyPrepareForLiveIntervalsPass(PR); initializeWebAssemblyOptimizeLiveIntervalsPass(PR); initializeWebAssemblyMemIntrinsicResultsPass(PR); initializeWebAssemblyRegStackifyPass(PR); @@ -87,7 +87,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTarget() { static Reloc::Model getEffectiveRelocModel(Optional RM, const Triple &TT) { - if (!RM.hasValue()) { + if (!RM) { // Default to static relocation model. This should always be more optimial // than PIC since the static linker can determine all global addresses and // assume direct function calls. @@ -203,11 +203,12 @@ public: bool StrippedAtomics = false; bool StrippedTLS = false; - if (!Features[WebAssembly::FeatureAtomics]) + if (!Features[WebAssembly::FeatureAtomics]) { StrippedAtomics = stripAtomics(M); - - if (!Features[WebAssembly::FeatureBulkMemory]) StrippedTLS = stripThreadLocals(M); + } else if (!Features[WebAssembly::FeatureBulkMemory]) { + StrippedTLS |= stripThreadLocals(M); + } if (StrippedAtomics && !StrippedTLS) stripThreadLocals(M); @@ -320,6 +321,7 @@ public: FunctionPass *createTargetRegisterAllocator(bool) override; void addIRPasses() override; + void addISelPrepare() override; bool addInstSelector() override; void addPostRegAlloc() override; bool addGCPasses() override { return false; } @@ -335,7 +337,7 @@ public: } // end anonymous namespace TargetTransformInfo -WebAssemblyTargetMachine::getTargetTransformInfo(const Function &F) { +WebAssemblyTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(WebAssemblyTTIImpl(this, F)); } @@ -407,17 +409,11 @@ static void basicCheckForEHAndSjLj(TargetMachine *TM) { //===----------------------------------------------------------------------===// void WebAssemblyPassConfig::addIRPasses() { - // Lower atomics and TLS if necessary - addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine())); - - // This is a no-op if atomics are not used in the module - addPass(createAtomicExpandPass()); - // Add signatures to prototype-less function declarations addPass(createWebAssemblyAddMissingPrototypes()); // Lower .llvm.global_dtors into .llvm_global_ctors with __cxa_atexit calls. - addPass(createWebAssemblyLowerGlobalDtors()); + addPass(createLowerGlobalDtorsLegacyPass()); // Fix function bitcasts, as WebAssembly requires caller and callee signatures // to match. @@ -455,6 +451,16 @@ void WebAssemblyPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); } +void WebAssemblyPassConfig::addISelPrepare() { + // Lower atomics and TLS if necessary + addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine())); + + // This is a no-op if atomics are not used in the module + addPass(createAtomicExpandPass()); + + TargetPassConfig::addISelPrepare(); +} + bool WebAssemblyPassConfig::addInstSelector() { (void)TargetPassConfig::addInstSelector(); addPass( @@ -517,9 +523,6 @@ void WebAssemblyPassConfig::addPreEmitPass() { // Preparations and optimizations related to register stackification. if (getOptLevel() != CodeGenOpt::None) { - // LiveIntervals isn't commonly run this late. Re-establish preconditions. - addPass(createWebAssemblyPrepareForLiveIntervals()); - // Depend on LiveIntervals and perform some optimizations on it. addPass(createWebAssemblyOptimizeLiveIntervals()); @@ -588,8 +591,7 @@ yaml::MachineFunctionInfo *WebAssemblyTargetMachine::convertFuncInfoToYAML( bool WebAssemblyTargetMachine::parseMachineFunctionInfo( const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const { - const auto &YamlMFI = - reinterpret_cast(MFI); + const auto &YamlMFI = static_cast(MFI); MachineFunction &MF = PFS.MF; MF.getInfo()->initializeBaseYamlFields(YamlMFI); return false; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h index 29e968bfe8eb..5d5378f76567 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h @@ -46,7 +46,7 @@ public: return TLOF.get(); } - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; bool usesPhysRegsForValues() const override { return false; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index f1ebcbc6fc51..62f7155e794a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -139,3 +139,7 @@ void WebAssemblyTTIImpl::getUnrollingPreferences( // becomes "fall through" to default value of 2. UP.BEInsns = 2; } + +bool WebAssemblyTTIImpl::supportsTailCalls() const { + return getST()->hasTailCall(); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 50036f7f7e98..fde58a9587b6 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -74,6 +74,8 @@ public: bool areInlineCompatible(const Function *Caller, const Function *Callee) const; + + bool supportsTailCalls() const; }; } // end namespace llvm diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index e9ecff3bf514..871b23f80efe 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -9,6 +9,7 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86IntelInstPrinter.h" #include "MCTargetDesc/X86MCExpr.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "MCTargetDesc/X86TargetStreamer.h" #include "TargetInfo/X86TargetInfo.h" #include "X86AsmParserCommon.h" @@ -124,12 +125,12 @@ private: bool matchingInlineAsm, unsigned VariantID = 0) { // In Code16GCC mode, match as 32-bit. if (Code16GCC) - SwitchMode(X86::Mode32Bit); + SwitchMode(X86::Is32Bit); unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo, MissingFeatures, matchingInlineAsm, VariantID); if (Code16GCC) - SwitchMode(X86::Mode16Bit); + SwitchMode(X86::Is16Bit); return rv; } @@ -422,16 +423,18 @@ private: }; class IntelExprStateMachine { - IntelExprState State, PrevState; - unsigned BaseReg, IndexReg, TmpReg, Scale; - int64_t Imm; - const MCExpr *Sym; + IntelExprState State = IES_INIT, PrevState = IES_ERROR; + unsigned BaseReg = 0, IndexReg = 0, TmpReg = 0, Scale = 0; + int64_t Imm = 0; + const MCExpr *Sym = nullptr; StringRef SymName; InfixCalculator IC; InlineAsmIdentifierInfo Info; - short BracCount; - bool MemExpr; - bool OffsetOperator; + short BracCount = 0; + bool MemExpr = false; + bool OffsetOperator = false; + bool AttachToOperandIdx = false; + bool IsPIC = false; SMLoc OffsetOperatorLoc; AsmTypeInfo CurType; @@ -446,10 +449,7 @@ private: } public: - IntelExprStateMachine() - : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), - TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0), - MemExpr(false), OffsetOperator(false) {} + IntelExprStateMachine() = default; void addImm(int64_t imm) { Imm += imm; } short getBracCount() const { return BracCount; } @@ -469,9 +469,29 @@ private: bool isValidEndState() const { return State == IES_RBRAC || State == IES_INTEGER; } + + // Is the intel expression appended after an operand index. + // [OperandIdx][Intel Expression] + // This is neccessary for checking if it is an independent + // intel expression at back end when parse inline asm. + void setAppendAfterOperand() { AttachToOperandIdx = true; } + + bool isPIC() const { return IsPIC; } + void setPIC() { IsPIC = true; } + bool hadError() const { return State == IES_ERROR; } const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; } + bool regsUseUpError(StringRef &ErrMsg) { + // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg] + // can not intruduce additional register in inline asm in PIC model. + if (IsPIC && AttachToOperandIdx) + ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!"; + else + ErrMsg = "BaseReg/IndexReg already set!"; + return true; + } + void onOr() { IntelExprState CurrState = State; switch (State) { @@ -655,10 +675,8 @@ private: if (!BaseReg) { BaseReg = TmpReg; } else { - if (IndexReg) { - ErrMsg = "BaseReg/IndexReg already set!"; - return true; - } + if (IndexReg) + return regsUseUpError(ErrMsg); IndexReg = TmpReg; Scale = 0; } @@ -716,10 +734,8 @@ private: if (!BaseReg) { BaseReg = TmpReg; } else { - if (IndexReg) { - ErrMsg = "BaseReg/IndexReg already set!"; - return true; - } + if (IndexReg) + return regsUseUpError(ErrMsg); IndexReg = TmpReg; Scale = 0; } @@ -777,10 +793,8 @@ private: case IES_MULTIPLY: // Index Register - Scale * Register if (PrevState == IES_INTEGER) { - if (IndexReg) { - ErrMsg = "BaseReg/IndexReg already set!"; - return true; - } + if (IndexReg) + return regsUseUpError(ErrMsg); State = IES_REGISTER; IndexReg = Reg; // Get the scale and replace the 'Scale * Register' with '0'. @@ -861,10 +875,8 @@ private: State = IES_INTEGER; if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { // Index Register - Register * Scale - if (IndexReg) { - ErrMsg = "BaseReg/IndexReg already set!"; - return true; - } + if (IndexReg) + return regsUseUpError(ErrMsg); IndexReg = TmpReg; Scale = TmpInt; if (checkScale(Scale, ErrMsg)) @@ -945,7 +957,7 @@ private: BracCount++; return false; } - bool onRBrac() { + bool onRBrac(StringRef &ErrMsg) { IntelExprState CurrState = State; switch (State) { default: @@ -955,8 +967,10 @@ private: case IES_OFFSET: case IES_REGISTER: case IES_RPAREN: - if (BracCount-- != 1) + if (BracCount-- != 1) { + ErrMsg = "unexpected bracket encountered"; return true; + } State = IES_RBRAC; if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { // If we already have a BaseReg, then assume this is the IndexReg with @@ -964,7 +978,8 @@ private: if (!BaseReg) { BaseReg = TmpReg; } else { - assert (!IndexReg && "BaseReg/IndexReg already set!"); + if (IndexReg) + return regsUseUpError(ErrMsg); IndexReg = TmpReg; Scale = 0; } @@ -1089,9 +1104,9 @@ private: std::unique_ptr &&Dst); bool VerifyAndAdjustOperands(OperandVector &OrigOperands, OperandVector &FinalOperands); - bool ParseOperand(OperandVector &Operands); - bool ParseATTOperand(OperandVector &Operands); - bool ParseIntelOperand(OperandVector &Operands); + bool parseOperand(OperandVector &Operands, StringRef Name); + bool parseATTOperand(OperandVector &Operands); + bool parseIntelOperand(OperandVector &Operands, StringRef Name); bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, InlineAsmIdentifierInfo &Info, SMLoc &End); bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End); @@ -1111,6 +1126,8 @@ private: InlineAsmIdentifierInfo &Info, bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator = false); + void tryParseOperandIdx(AsmToken::TokenKind PrevTK, + IntelExprStateMachine &SM); bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, OperandVector &Operands); @@ -1193,19 +1210,19 @@ private: bool is64BitMode() const { // FIXME: Can tablegen auto-generate this? - return getSTI().getFeatureBits()[X86::Mode64Bit]; + return getSTI().getFeatureBits()[X86::Is64Bit]; } bool is32BitMode() const { // FIXME: Can tablegen auto-generate this? - return getSTI().getFeatureBits()[X86::Mode32Bit]; + return getSTI().getFeatureBits()[X86::Is32Bit]; } bool is16BitMode() const { // FIXME: Can tablegen auto-generate this? - return getSTI().getFeatureBits()[X86::Mode16Bit]; + return getSTI().getFeatureBits()[X86::Is16Bit]; } void SwitchMode(unsigned mode) { MCSubtargetInfo &STI = copySTI(); - FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit}); + FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit}); FeatureBitset OldMode = STI.getFeatureBits() & AllModes; FeatureBitset FB = ComputeAvailableFeatures( STI.ToggleFeature(OldMode.flip(mode))); @@ -1716,11 +1733,11 @@ bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands, return false; } -bool X86AsmParser::ParseOperand(OperandVector &Operands) { +bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) { if (isParsingIntelSyntax()) - return ParseIntelOperand(Operands); + return parseIntelOperand(Operands, Name); - return ParseATTOperand(Operands); + return parseATTOperand(Operands); } bool X86AsmParser::CreateMemForMSInlineAsm( @@ -1759,8 +1776,8 @@ bool X86AsmParser::CreateMemForMSInlineAsm( // registers in a mmory expression, and though unaccessible via rip/eip. if (IsGlobalLV && (BaseReg || IndexReg)) { Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start, - End, Size, Identifier, Decl, - FrontendSize)); + End, Size, Identifier, Decl, 0, + BaseReg && IndexReg)); return false; } // Otherwise, we set the base register to a non-zero value @@ -1841,11 +1858,25 @@ bool X86AsmParser::ParseMasmNamedOperator(StringRef Name, return true; } +// Check if current intel expression append after an operand. +// Like: [Operand][Intel Expression] +void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK, + IntelExprStateMachine &SM) { + if (PrevTK != AsmToken::RBrac) + return; + + SM.setAppendAfterOperand(); +} + bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { MCAsmParser &Parser = getParser(); StringRef ErrMsg; AsmToken::TokenKind PrevTK = AsmToken::Error; + + if (getContext().getObjectFileInfo()->isPositionIndependent()) + SM.setPIC(); + bool Done = false; while (!Done) { // Get a fresh reference on each loop iteration in case the previous @@ -2123,10 +2154,12 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { case AsmToken::LBrac: if (SM.onLBrac()) return Error(Tok.getLoc(), "unexpected bracket encountered"); + tryParseOperandIdx(PrevTK, SM); break; case AsmToken::RBrac: - if (SM.onRBrac()) - return Error(Tok.getLoc(), "unexpected bracket encountered"); + if (SM.onRBrac(ErrMsg)) { + return Error(Tok.getLoc(), ErrMsg); + } break; case AsmToken::LParen: SM.onLParen(); break; case AsmToken::RParen: SM.onRParen(); break; @@ -2477,7 +2510,7 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) { return false; } -bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) { +bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); SMLoc Start, End; @@ -2552,6 +2585,8 @@ bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) { StringRef ErrMsg; unsigned BaseReg = SM.getBaseReg(); unsigned IndexReg = SM.getIndexReg(); + if (IndexReg && BaseReg == X86::RIP) + BaseReg = 0; unsigned Scale = SM.getScale(); if (!PtrInOperand) Size = SM.getElementSize() << 3; @@ -2597,25 +2632,49 @@ bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) { // When parsing x64 MS-style assembly, all non-absolute references to a named // variable default to RIP-relative. - if (Parser.isParsingMasm() && is64BitMode() && SM.getElementSize() > 0) { - Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp, - BaseReg, IndexReg, Scale, Start, - End, Size, - /*DefaultBaseReg=*/X86::RIP)); - return false; + unsigned DefaultBaseReg = X86::NoRegister; + bool MaybeDirectBranchDest = true; + + if (Parser.isParsingMasm()) { + bool IsUnconditionalBranch = + Name.equals_insensitive("jmp") || Name.equals_insensitive("call"); + if (is64BitMode() && SM.getElementSize() > 0) { + DefaultBaseReg = X86::RIP; + } + if (IsUnconditionalBranch) { + if (PtrInOperand) { + MaybeDirectBranchDest = false; + if (is64BitMode()) + DefaultBaseReg = X86::RIP; + } else if (!BaseReg && !IndexReg && Disp && + Disp->getKind() == MCExpr::SymbolRef) { + if (is64BitMode()) { + if (SM.getSize() == 8) { + MaybeDirectBranchDest = false; + DefaultBaseReg = X86::RIP; + } + } else { + if (SM.getSize() == 4 || SM.getSize() == 2) + MaybeDirectBranchDest = false; + } + } + } } - if ((BaseReg || IndexReg || RegNo)) - Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp, - BaseReg, IndexReg, Scale, Start, - End, Size)); + if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister)) + Operands.push_back(X86Operand::CreateMem( + getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End, + Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr, + /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest)); else - Operands.push_back( - X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size)); + Operands.push_back(X86Operand::CreateMem( + getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(), + /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false, + MaybeDirectBranchDest)); return false; } -bool X86AsmParser::ParseATTOperand(OperandVector &Operands) { +bool X86AsmParser::parseATTOperand(OperandVector &Operands) { MCAsmParser &Parser = getParser(); switch (getLexer().getKind()) { case AsmToken::Dollar: { @@ -2722,7 +2781,7 @@ bool X86AsmParser::ParseZ(std::unique_ptr &Z, if (!getLexer().is(AsmToken::RCurly)) return Error(getLexer().getLoc(), "Expected } at this point"); Parser.Lex(); // Eat '}' - // Assign Z with the {z} mark opernad + // Assign Z with the {z} mark operand Z = X86Operand::CreateToken("{z}", StartLoc); return false; } @@ -3346,7 +3405,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Name = Next; PatchedName = Name; - ForcedDataPrefix = X86::Mode32Bit; + ForcedDataPrefix = X86::Is32Bit; IsPrefix = false; } } @@ -3371,7 +3430,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // Read the operands. while (true) { - if (ParseOperand(Operands)) + if (parseOperand(Operands, Name)) return true; if (HandleAVX512Operand(Operands)) return true; @@ -3774,84 +3833,27 @@ bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { } bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { + using namespace X86; const MCRegisterInfo *MRI = getContext().getRegisterInfo(); - - switch (Inst.getOpcode()) { - case X86::VGATHERDPDYrm: - case X86::VGATHERDPDrm: - case X86::VGATHERDPSYrm: - case X86::VGATHERDPSrm: - case X86::VGATHERQPDYrm: - case X86::VGATHERQPDrm: - case X86::VGATHERQPSYrm: - case X86::VGATHERQPSrm: - case X86::VPGATHERDDYrm: - case X86::VPGATHERDDrm: - case X86::VPGATHERDQYrm: - case X86::VPGATHERDQrm: - case X86::VPGATHERQDYrm: - case X86::VPGATHERQDrm: - case X86::VPGATHERQQYrm: - case X86::VPGATHERQQrm: { - unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); - unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg()); - unsigned Index = - MRI->getEncodingValue(Inst.getOperand(3 + X86::AddrIndexReg).getReg()); - if (Dest == Mask || Dest == Index || Mask == Index) - return Warning(Ops[0]->getStartLoc(), "mask, index, and destination " - "registers should be distinct"); - break; - } - case X86::VGATHERDPDZ128rm: - case X86::VGATHERDPDZ256rm: - case X86::VGATHERDPDZrm: - case X86::VGATHERDPSZ128rm: - case X86::VGATHERDPSZ256rm: - case X86::VGATHERDPSZrm: - case X86::VGATHERQPDZ128rm: - case X86::VGATHERQPDZ256rm: - case X86::VGATHERQPDZrm: - case X86::VGATHERQPSZ128rm: - case X86::VGATHERQPSZ256rm: - case X86::VGATHERQPSZrm: - case X86::VPGATHERDDZ128rm: - case X86::VPGATHERDDZ256rm: - case X86::VPGATHERDDZrm: - case X86::VPGATHERDQZ128rm: - case X86::VPGATHERDQZ256rm: - case X86::VPGATHERDQZrm: - case X86::VPGATHERQDZ128rm: - case X86::VPGATHERQDZ256rm: - case X86::VPGATHERQDZrm: - case X86::VPGATHERQQZ128rm: - case X86::VPGATHERQQZ256rm: - case X86::VPGATHERQQZrm: { - unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); - unsigned Index = - MRI->getEncodingValue(Inst.getOperand(4 + X86::AddrIndexReg).getReg()); - if (Dest == Index) - return Warning(Ops[0]->getStartLoc(), "index and destination registers " - "should be distinct"); - break; - } - case X86::V4FMADDPSrm: - case X86::V4FMADDPSrmk: - case X86::V4FMADDPSrmkz: - case X86::V4FMADDSSrm: - case X86::V4FMADDSSrmk: - case X86::V4FMADDSSrmkz: - case X86::V4FNMADDPSrm: - case X86::V4FNMADDPSrmk: - case X86::V4FNMADDPSrmkz: - case X86::V4FNMADDSSrm: - case X86::V4FNMADDSSrmk: - case X86::V4FNMADDSSrmkz: - case X86::VP4DPWSSDSrm: - case X86::VP4DPWSSDSrmk: - case X86::VP4DPWSSDSrmkz: - case X86::VP4DPWSSDrm: - case X86::VP4DPWSSDrmk: - case X86::VP4DPWSSDrmkz: { + unsigned Opcode = Inst.getOpcode(); + uint64_t TSFlags = MII.get(Opcode).TSFlags; + if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) || + isVFMADDCSH(Opcode)) { + unsigned Dest = Inst.getOperand(0).getReg(); + for (unsigned i = 2; i < Inst.getNumOperands(); i++) + if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg()) + return Warning(Ops[0]->getStartLoc(), "Destination register should be " + "distinct from source registers"); + } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) || + isVFMULCSH(Opcode)) { + unsigned Dest = Inst.getOperand(0).getReg(); + for (unsigned i = 1; i < Inst.getNumOperands(); i++) + if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg()) + return Warning(Ops[0]->getStartLoc(), "Destination register should be " + "distinct from source registers"); + } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) || + isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) || + isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) { unsigned Src2 = Inst.getOperand(Inst.getNumOperands() - X86::AddrNumOperands - 1).getReg(); unsigned Src2Enc = MRI->getEncodingValue(Src2); @@ -3865,186 +3867,34 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { RegName.take_front(3) + Twine(GroupEnd) + "' source group"); } - break; - } - case X86::VFCMADDCPHZ128m: - case X86::VFCMADDCPHZ256m: - case X86::VFCMADDCPHZm: - case X86::VFCMADDCPHZ128mb: - case X86::VFCMADDCPHZ256mb: - case X86::VFCMADDCPHZmb: - case X86::VFCMADDCPHZ128mbk: - case X86::VFCMADDCPHZ256mbk: - case X86::VFCMADDCPHZmbk: - case X86::VFCMADDCPHZ128mbkz: - case X86::VFCMADDCPHZ256mbkz: - case X86::VFCMADDCPHZmbkz: - case X86::VFCMADDCPHZ128mk: - case X86::VFCMADDCPHZ256mk: - case X86::VFCMADDCPHZmk: - case X86::VFCMADDCPHZ128mkz: - case X86::VFCMADDCPHZ256mkz: - case X86::VFCMADDCPHZmkz: - case X86::VFCMADDCPHZ128r: - case X86::VFCMADDCPHZ256r: - case X86::VFCMADDCPHZr: - case X86::VFCMADDCPHZ128rk: - case X86::VFCMADDCPHZ256rk: - case X86::VFCMADDCPHZrk: - case X86::VFCMADDCPHZ128rkz: - case X86::VFCMADDCPHZ256rkz: - case X86::VFCMADDCPHZrkz: - case X86::VFCMADDCPHZrb: - case X86::VFCMADDCPHZrbk: - case X86::VFCMADDCPHZrbkz: - case X86::VFCMADDCSHZm: - case X86::VFCMADDCSHZmk: - case X86::VFCMADDCSHZmkz: - case X86::VFCMADDCSHZr: - case X86::VFCMADDCSHZrb: - case X86::VFCMADDCSHZrbk: - case X86::VFCMADDCSHZrbkz: - case X86::VFCMADDCSHZrk: - case X86::VFCMADDCSHZrkz: - case X86::VFMADDCPHZ128m: - case X86::VFMADDCPHZ256m: - case X86::VFMADDCPHZm: - case X86::VFMADDCPHZ128mb: - case X86::VFMADDCPHZ256mb: - case X86::VFMADDCPHZmb: - case X86::VFMADDCPHZ128mbk: - case X86::VFMADDCPHZ256mbk: - case X86::VFMADDCPHZmbk: - case X86::VFMADDCPHZ128mbkz: - case X86::VFMADDCPHZ256mbkz: - case X86::VFMADDCPHZmbkz: - case X86::VFMADDCPHZ128mk: - case X86::VFMADDCPHZ256mk: - case X86::VFMADDCPHZmk: - case X86::VFMADDCPHZ128mkz: - case X86::VFMADDCPHZ256mkz: - case X86::VFMADDCPHZmkz: - case X86::VFMADDCPHZ128r: - case X86::VFMADDCPHZ256r: - case X86::VFMADDCPHZr: - case X86::VFMADDCPHZ128rk: - case X86::VFMADDCPHZ256rk: - case X86::VFMADDCPHZrk: - case X86::VFMADDCPHZ128rkz: - case X86::VFMADDCPHZ256rkz: - case X86::VFMADDCPHZrkz: - case X86::VFMADDCPHZrb: - case X86::VFMADDCPHZrbk: - case X86::VFMADDCPHZrbkz: - case X86::VFMADDCSHZm: - case X86::VFMADDCSHZmk: - case X86::VFMADDCSHZmkz: - case X86::VFMADDCSHZr: - case X86::VFMADDCSHZrb: - case X86::VFMADDCSHZrbk: - case X86::VFMADDCSHZrbkz: - case X86::VFMADDCSHZrk: - case X86::VFMADDCSHZrkz: { - unsigned Dest = Inst.getOperand(0).getReg(); - for (unsigned i = 2; i < Inst.getNumOperands(); i++) - if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg()) - return Warning(Ops[0]->getStartLoc(), "Destination register should be " - "distinct from source registers"); - break; - } - case X86::VFCMULCPHZ128rm: - case X86::VFCMULCPHZ256rm: - case X86::VFCMULCPHZrm: - case X86::VFCMULCPHZ128rmb: - case X86::VFCMULCPHZ256rmb: - case X86::VFCMULCPHZrmb: - case X86::VFCMULCPHZ128rmbk: - case X86::VFCMULCPHZ256rmbk: - case X86::VFCMULCPHZrmbk: - case X86::VFCMULCPHZ128rmbkz: - case X86::VFCMULCPHZ256rmbkz: - case X86::VFCMULCPHZrmbkz: - case X86::VFCMULCPHZ128rmk: - case X86::VFCMULCPHZ256rmk: - case X86::VFCMULCPHZrmk: - case X86::VFCMULCPHZ128rmkz: - case X86::VFCMULCPHZ256rmkz: - case X86::VFCMULCPHZrmkz: - case X86::VFCMULCPHZ128rr: - case X86::VFCMULCPHZ256rr: - case X86::VFCMULCPHZrr: - case X86::VFCMULCPHZ128rrk: - case X86::VFCMULCPHZ256rrk: - case X86::VFCMULCPHZrrk: - case X86::VFCMULCPHZ128rrkz: - case X86::VFCMULCPHZ256rrkz: - case X86::VFCMULCPHZrrkz: - case X86::VFCMULCPHZrrb: - case X86::VFCMULCPHZrrbk: - case X86::VFCMULCPHZrrbkz: - case X86::VFCMULCSHZrm: - case X86::VFCMULCSHZrmk: - case X86::VFCMULCSHZrmkz: - case X86::VFCMULCSHZrr: - case X86::VFCMULCSHZrrb: - case X86::VFCMULCSHZrrbk: - case X86::VFCMULCSHZrrbkz: - case X86::VFCMULCSHZrrk: - case X86::VFCMULCSHZrrkz: - case X86::VFMULCPHZ128rm: - case X86::VFMULCPHZ256rm: - case X86::VFMULCPHZrm: - case X86::VFMULCPHZ128rmb: - case X86::VFMULCPHZ256rmb: - case X86::VFMULCPHZrmb: - case X86::VFMULCPHZ128rmbk: - case X86::VFMULCPHZ256rmbk: - case X86::VFMULCPHZrmbk: - case X86::VFMULCPHZ128rmbkz: - case X86::VFMULCPHZ256rmbkz: - case X86::VFMULCPHZrmbkz: - case X86::VFMULCPHZ128rmk: - case X86::VFMULCPHZ256rmk: - case X86::VFMULCPHZrmk: - case X86::VFMULCPHZ128rmkz: - case X86::VFMULCPHZ256rmkz: - case X86::VFMULCPHZrmkz: - case X86::VFMULCPHZ128rr: - case X86::VFMULCPHZ256rr: - case X86::VFMULCPHZrr: - case X86::VFMULCPHZ128rrk: - case X86::VFMULCPHZ256rrk: - case X86::VFMULCPHZrrk: - case X86::VFMULCPHZ128rrkz: - case X86::VFMULCPHZ256rrkz: - case X86::VFMULCPHZrrkz: - case X86::VFMULCPHZrrb: - case X86::VFMULCPHZrrbk: - case X86::VFMULCPHZrrbkz: - case X86::VFMULCSHZrm: - case X86::VFMULCSHZrmk: - case X86::VFMULCSHZrmkz: - case X86::VFMULCSHZrr: - case X86::VFMULCSHZrrb: - case X86::VFMULCSHZrrbk: - case X86::VFMULCSHZrrbkz: - case X86::VFMULCSHZrrk: - case X86::VFMULCSHZrrkz: { - unsigned Dest = Inst.getOperand(0).getReg(); - for (unsigned i = 1; i < Inst.getNumOperands(); i++) - if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg()) - return Warning(Ops[0]->getStartLoc(), "Destination register should be " - "distinct from source registers"); - break; - } + } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) || + isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) || + isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) || + isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) { + bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX; + if (HasEVEX) { + unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); + unsigned Index = MRI->getEncodingValue( + Inst.getOperand(4 + X86::AddrIndexReg).getReg()); + if (Dest == Index) + return Warning(Ops[0]->getStartLoc(), "index and destination registers " + "should be distinct"); + } else { + unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); + unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg()); + unsigned Index = MRI->getEncodingValue( + Inst.getOperand(3 + X86::AddrIndexReg).getReg()); + if (Dest == Mask || Dest == Index || Mask == Index) + return Warning(Ops[0]->getStartLoc(), "mask, index, and destination " + "registers should be distinct"); + } } - const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to // check this with the legacy encoding, VEX/EVEX/XOP don't use REX. - if ((MCID.TSFlags & X86II::EncodingMask) == 0) { + if ((TSFlags & X86II::EncodingMask) == 0) { MCPhysReg HReg = X86::NoRegister; - bool UsesRex = MCID.TSFlags & X86II::REX_W; + bool UsesRex = TSFlags & X86II::REX_W; unsigned NumOps = Inst.getNumOperands(); for (unsigned i = 0; i != NumOps; ++i) { const MCOperand &MO = Inst.getOperand(i); @@ -4313,15 +4163,15 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode // when matching the instruction. - if (ForcedDataPrefix == X86::Mode32Bit) - SwitchMode(X86::Mode32Bit); + if (ForcedDataPrefix == X86::Is32Bit) + SwitchMode(X86::Is32Bit); // First, try a direct match. FeatureBitset MissingFeatures; unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, isParsingIntelSyntax()); - if (ForcedDataPrefix == X86::Mode32Bit) { - SwitchMode(X86::Mode16Bit); + if (ForcedDataPrefix == X86::Is32Bit) { + SwitchMode(X86::Is16Bit); ForcedDataPrefix = 0; } switch (OriginalError) { @@ -4840,8 +4690,7 @@ bool X86AsmParser::parseDirectiveNops(SMLoc L) { if (getParser().parseAbsoluteExpression(Control)) return true; } - if (getParser().parseToken(AsmToken::EndOfStatement, - "unexpected token in '.nops' directive")) + if (getParser().parseEOL()) return true; if (NumBytes <= 0) { @@ -4863,7 +4712,7 @@ bool X86AsmParser::parseDirectiveNops(SMLoc L) { /// parseDirectiveEven /// ::= .even bool X86AsmParser::parseDirectiveEven(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + if (parseEOL()) return false; const MCSection *Section = getStreamer().getCurrentSectionOnly(); @@ -4871,7 +4720,7 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) { getStreamer().initSections(false, getSTI()); Section = getStreamer().getCurrentSectionOnly(); } - if (Section->UseCodeAlign()) + if (Section->useCodeAlign()) getStreamer().emitCodeAlignment(2, &getSTI(), 0); else getStreamer().emitValueToAlignment(2, 0, 1, 0); @@ -4886,7 +4735,7 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { if (IDVal == ".code16") { Parser.Lex(); if (!is16BitMode()) { - SwitchMode(X86::Mode16Bit); + SwitchMode(X86::Is16Bit); getParser().getStreamer().emitAssemblerFlag(MCAF_Code16); } } else if (IDVal == ".code16gcc") { @@ -4894,19 +4743,19 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { Parser.Lex(); Code16GCC = true; if (!is16BitMode()) { - SwitchMode(X86::Mode16Bit); + SwitchMode(X86::Is16Bit); getParser().getStreamer().emitAssemblerFlag(MCAF_Code16); } } else if (IDVal == ".code32") { Parser.Lex(); if (!is32BitMode()) { - SwitchMode(X86::Mode32Bit); + SwitchMode(X86::Is32Bit); getParser().getStreamer().emitAssemblerFlag(MCAF_Code32); } } else if (IDVal == ".code64") { Parser.Lex(); if (!is64BitMode()) { - SwitchMode(X86::Mode64Bit); + SwitchMode(X86::Is64Bit); getParser().getStreamer().emitAssemblerFlag(MCAF_Code64); } } else { @@ -5035,7 +4884,7 @@ bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) { return TokError("unexpected token in directive"); getParser().Lex(); - getStreamer().EmitWinCFIPushReg(Reg, Loc); + getStreamer().emitWinCFIPushReg(Reg, Loc); return false; } @@ -5055,7 +4904,7 @@ bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) { return TokError("unexpected token in directive"); getParser().Lex(); - getStreamer().EmitWinCFISetFrame(Reg, Off, Loc); + getStreamer().emitWinCFISetFrame(Reg, Off, Loc); return false; } @@ -5075,7 +4924,7 @@ bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) { return TokError("unexpected token in directive"); getParser().Lex(); - getStreamer().EmitWinCFISaveReg(Reg, Off, Loc); + getStreamer().emitWinCFISaveReg(Reg, Off, Loc); return false; } @@ -5095,7 +4944,7 @@ bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) { return TokError("unexpected token in directive"); getParser().Lex(); - getStreamer().EmitWinCFISaveXMM(Reg, Off, Loc); + getStreamer().emitWinCFISaveXMM(Reg, Off, Loc); return false; } @@ -5116,7 +4965,7 @@ bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) { return TokError("unexpected token in directive"); getParser().Lex(); - getStreamer().EmitWinCFIPushFrame(Code, Loc); + getStreamer().emitWinCFIPushFrame(Code, Loc); return false; } diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h index 67b1244708a8..075b800f9e20 100644 --- a/llvm/lib/Target/X86/AsmParser/X86Operand.h +++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h @@ -17,6 +17,8 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" #include "llvm/Support/SMLoc.h" #include @@ -35,6 +37,10 @@ struct X86Operand final : public MCParsedAsmOperand { void *OpDecl; bool AddressOf; + /// This used for inline asm which may specify base reg and index reg for + /// MemOp. e.g. ARR[eax + ecx*4], so no extra reg can be used for MemOp. + bool UseUpRegs = false; + struct TokOp { const char *Data; unsigned Length; @@ -66,6 +72,11 @@ struct X86Operand final : public MCParsedAsmOperand { /// If the memory operand is unsized and there are multiple instruction /// matches, prefer the one with this size. unsigned FrontendSize; + + /// If false, then this operand must be a memory operand for an indirect + /// branch instruction. Otherwise, this operand may belong to either a + /// direct or indirect branch instruction. + bool MaybeDirectBranchDest; }; union { @@ -203,6 +214,10 @@ struct X86Operand final : public MCParsedAsmOperand { assert(Kind == Memory && "Invalid access!"); return Mem.FrontendSize; } + bool isMaybeDirectBranchDest() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.MaybeDirectBranchDest; + } bool isToken() const override {return Kind == Token; } @@ -285,12 +300,6 @@ struct X86Operand final : public MCParsedAsmOperand { bool isOffsetOfLocal() const override { return isImm() && Imm.LocalRef; } - bool isMemPlaceholder(const MCInstrDesc &Desc) const override { - // Only MS InlineAsm uses global variables with registers rather than - // rip/eip. - return isMem() && !Mem.DefaultBaseReg && Mem.FrontendSize; - } - bool needAddressOf() const override { return AddressOf; } bool isMem() const override { return Kind == Memory; } @@ -374,8 +383,9 @@ struct X86Operand final : public MCParsedAsmOperand { bool isAbsMem() const { return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && - !getMemIndexReg() && getMemScale() == 1; + !getMemIndexReg() && getMemScale() == 1 && isMaybeDirectBranchDest(); } + bool isAVX512RC() const{ return isImm(); } @@ -384,6 +394,8 @@ struct X86Operand final : public MCParsedAsmOperand { return isAbsMem() && Mem.ModeSize == 16; } + bool isMemUseUpRegs() const override { return UseUpRegs; } + bool isSrcIdx() const { return !getMemIndexReg() && getMemScale() == 1 && (getMemBaseReg() == X86::RSI || getMemBaseReg() == X86::ESI || @@ -669,7 +681,8 @@ struct X86Operand final : public MCParsedAsmOperand { static std::unique_ptr CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, unsigned Size = 0, StringRef SymName = StringRef(), - void *OpDecl = nullptr, unsigned FrontendSize = 0) { + void *OpDecl = nullptr, unsigned FrontendSize = 0, + bool UseUpRegs = false, bool MaybeDirectBranchDest = true) { auto Res = std::make_unique(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -680,6 +693,8 @@ struct X86Operand final : public MCParsedAsmOperand { Res->Mem.Size = Size; Res->Mem.ModeSize = ModeSize; Res->Mem.FrontendSize = FrontendSize; + Res->Mem.MaybeDirectBranchDest = MaybeDirectBranchDest; + Res->UseUpRegs = UseUpRegs; Res->SymName = SymName; Res->OpDecl = OpDecl; Res->AddressOf = false; @@ -693,7 +708,8 @@ struct X86Operand final : public MCParsedAsmOperand { SMLoc EndLoc, unsigned Size = 0, unsigned DefaultBaseReg = X86::NoRegister, StringRef SymName = StringRef(), void *OpDecl = nullptr, - unsigned FrontendSize = 0) { + unsigned FrontendSize = 0, bool UseUpRegs = false, + bool MaybeDirectBranchDest = true) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg || DefaultBaseReg) && @@ -712,6 +728,8 @@ struct X86Operand final : public MCParsedAsmOperand { Res->Mem.Size = Size; Res->Mem.ModeSize = ModeSize; Res->Mem.FrontendSize = FrontendSize; + Res->Mem.MaybeDirectBranchDest = MaybeDirectBranchDest; + Res->UseUpRegs = UseUpRegs; Res->SymName = SymName; Res->OpDecl = OpDecl; Res->AddressOf = false; diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 908eb6d1fab1..1da6bf86397e 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -493,16 +493,15 @@ static int readPrefixes(struct InternalInstruction *insn) { insn->displacementSize = (insn->hasAdSize ? 2 : 4); insn->immediateSize = (insn->hasOpSize ? 2 : 4); } else if (insn->mode == MODE_64BIT) { + insn->displacementSize = 4; if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { insn->registerSize = 8; insn->addressSize = (insn->hasAdSize ? 4 : 8); - insn->displacementSize = 4; insn->immediateSize = 4; insn->hasOpSize = false; } else { insn->registerSize = (insn->hasOpSize ? 2 : 4); insn->addressSize = (insn->hasAdSize ? 4 : 8); - insn->displacementSize = (insn->hasOpSize ? 2 : 4); insn->immediateSize = (insn->hasOpSize ? 2 : 4); } } @@ -1722,13 +1721,13 @@ X86GenericDisassembler::X86GenericDisassembler( std::unique_ptr MII) : MCDisassembler(STI, Ctx), MII(std::move(MII)) { const FeatureBitset &FB = STI.getFeatureBits(); - if (FB[X86::Mode16Bit]) { + if (FB[X86::Is16Bit]) { fMode = MODE_16BIT; return; - } else if (FB[X86::Mode32Bit]) { + } else if (FB[X86::Is32Bit]) { fMode = MODE_32BIT; return; - } else if (FB[X86::Mode64Bit]) { + } else if (FB[X86::Is64Bit]) { fMode = MODE_64BIT; return; } @@ -1801,46 +1800,6 @@ static void translateRegister(MCInst &mcInst, Reg reg) { mcInst.addOperand(MCOperand::createReg(llvmRegnum)); } -/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the -/// immediate Value in the MCInst. -/// -/// @param Value - The immediate Value, has had any PC adjustment made by -/// the caller. -/// @param isBranch - If the instruction is a branch instruction -/// @param Address - The starting address of the instruction -/// @param Offset - The byte offset to this immediate in the instruction -/// @param Width - The byte width of this immediate in the instruction -/// -/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was -/// called then that function is called to get any symbolic information for the -/// immediate in the instruction using the Address, Offset and Width. If that -/// returns non-zero then the symbolic information it returns is used to create -/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() -/// returns zero and isBranch is true then a symbol look up for immediate Value -/// is done and if a symbol is found an MCExpr is created with that, else -/// an MCExpr with the immediate Value is created. This function returns true -/// if it adds an operand to the MCInst and false otherwise. -static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, - uint64_t Address, uint64_t Offset, - uint64_t Width, MCInst &MI, - const MCDisassembler *Dis) { - return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, - Offset, Width); -} - -/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being -/// referenced by a load instruction with the base register that is the rip. -/// These can often be addresses in a literal pool. The Address of the -/// instruction and its immediate Value are used to determine the address -/// being referenced in the literal pool entry. The SymbolLookUp call back will -/// return a pointer to a literal 'C' string if the referenced address is an -/// address into a section with 'C' string literals. -static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, - const void *Decoder) { - const MCDisassembler *Dis = static_cast(Decoder); - Dis->tryAddingPcLoadReferenceComment(Value, Address); -} - static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 0, // SEG_OVERRIDE_NONE X86::CS, @@ -1914,8 +1873,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, uint64_t pcrel = 0; if (type == TYPE_REL) { isBranch = true; - pcrel = insn.startLocation + - insn.immediateOffset + insn.immediateSize; + pcrel = insn.startLocation + insn.length; switch (operand.encoding) { default: break; @@ -1990,9 +1948,9 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, break; } - if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation, - insn.immediateOffset, insn.immediateSize, - mcInst, Dis)) + if (!Dis->tryAddingSymbolicOperand( + mcInst, immediate + pcrel, insn.startLocation, isBranch, + insn.immediateOffset, insn.immediateSize, insn.length)) mcInst.addOperand(MCOperand::createImm(immediate)); if (type == TYPE_MOFFS) { @@ -2129,11 +2087,10 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, return true; } if (insn.mode == MODE_64BIT){ - pcrel = insn.startLocation + - insn.displacementOffset + insn.displacementSize; - tryAddingPcLoadReferenceComment(insn.startLocation + - insn.displacementOffset, - insn.displacement + pcrel, Dis); + pcrel = insn.startLocation + insn.length; + Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel, + insn.startLocation + + insn.displacementOffset); // Section 2.2.1.6 baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP : X86::RIP); @@ -2193,9 +2150,13 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, mcInst.addOperand(baseReg); mcInst.addOperand(scaleAmount); mcInst.addOperand(indexReg); - if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false, - insn.startLocation, insn.displacementOffset, - insn.displacementSize, mcInst, Dis)) + + const uint8_t dispSize = + (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize; + + if (!Dis->tryAddingSymbolicOperand( + mcInst, insn.displacement + pcrel, insn.startLocation, false, + insn.displacementOffset, dispSize, insn.length)) mcInst.addOperand(displacement); mcInst.addOperand(segmentReg); return false; diff --git a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h index 24d26751f0a1..61e1b6b27a85 100644 --- a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h +++ b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h @@ -35,7 +35,7 @@ public: X86InstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII) : InstrPostProcess(STI, MCII) {} - ~X86InstrPostProcess() {} + ~X86InstrPostProcess() = default; void postProcessInstruction(std::unique_ptr &Inst, const MCInst &MCI) override; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp index baacf2f46183..6fd3db4515ec 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp @@ -46,7 +46,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address, if (CommentStream) HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, MII); - printInstFlags(MI, OS); + printInstFlags(MI, OS, STI); // Output CALLpcrel32 as "callq" in 64-bit mode. // In Intel annotation it's always emitted as "call". @@ -55,7 +55,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address, // InstrInfo.td as soon as Requires clause is supported properly // for InstAlias. if (MI->getOpcode() == X86::CALLpcrel32 && - (STI.getFeatureBits()[X86::Mode64Bit])) { + (STI.getFeatureBits()[X86::Is64Bit])) { OS << "\tcallq\t"; printPCRelImm(MI, Address, 0, OS); } @@ -65,8 +65,8 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address, // 0x66 to be interpreted as "data16" by the asm printer. // Thus we add an adjustment here in order to print the "right" instruction. else if (MI->getOpcode() == X86::DATA16_PREFIX && - STI.getFeatureBits()[X86::Mode16Bit]) { - OS << "\tdata32"; + STI.getFeatureBits()[X86::Is16Bit]) { + OS << "\tdata32"; } // Try to print any aliases first. else if (!printAliasInstr(MI, Address, OS) && !printVecCompareInstr(MI, OS)) diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 3df48b466d07..2d92b8d5b574 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -8,6 +8,7 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86FixupKinds.h" +#include "MCTargetDesc/X86InstrRelaxTables.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/BinaryFormat/MachO.h" @@ -222,87 +223,7 @@ static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) { static unsigned getRelaxedOpcodeArith(const MCInst &Inst) { unsigned Op = Inst.getOpcode(); - switch (Op) { - default: - return Op; - - // IMUL - case X86::IMUL16rri8: return X86::IMUL16rri; - case X86::IMUL16rmi8: return X86::IMUL16rmi; - case X86::IMUL32rri8: return X86::IMUL32rri; - case X86::IMUL32rmi8: return X86::IMUL32rmi; - case X86::IMUL64rri8: return X86::IMUL64rri32; - case X86::IMUL64rmi8: return X86::IMUL64rmi32; - - // AND - case X86::AND16ri8: return X86::AND16ri; - case X86::AND16mi8: return X86::AND16mi; - case X86::AND32ri8: return X86::AND32ri; - case X86::AND32mi8: return X86::AND32mi; - case X86::AND64ri8: return X86::AND64ri32; - case X86::AND64mi8: return X86::AND64mi32; - - // OR - case X86::OR16ri8: return X86::OR16ri; - case X86::OR16mi8: return X86::OR16mi; - case X86::OR32ri8: return X86::OR32ri; - case X86::OR32mi8: return X86::OR32mi; - case X86::OR64ri8: return X86::OR64ri32; - case X86::OR64mi8: return X86::OR64mi32; - - // XOR - case X86::XOR16ri8: return X86::XOR16ri; - case X86::XOR16mi8: return X86::XOR16mi; - case X86::XOR32ri8: return X86::XOR32ri; - case X86::XOR32mi8: return X86::XOR32mi; - case X86::XOR64ri8: return X86::XOR64ri32; - case X86::XOR64mi8: return X86::XOR64mi32; - - // ADD - case X86::ADD16ri8: return X86::ADD16ri; - case X86::ADD16mi8: return X86::ADD16mi; - case X86::ADD32ri8: return X86::ADD32ri; - case X86::ADD32mi8: return X86::ADD32mi; - case X86::ADD64ri8: return X86::ADD64ri32; - case X86::ADD64mi8: return X86::ADD64mi32; - - // ADC - case X86::ADC16ri8: return X86::ADC16ri; - case X86::ADC16mi8: return X86::ADC16mi; - case X86::ADC32ri8: return X86::ADC32ri; - case X86::ADC32mi8: return X86::ADC32mi; - case X86::ADC64ri8: return X86::ADC64ri32; - case X86::ADC64mi8: return X86::ADC64mi32; - - // SUB - case X86::SUB16ri8: return X86::SUB16ri; - case X86::SUB16mi8: return X86::SUB16mi; - case X86::SUB32ri8: return X86::SUB32ri; - case X86::SUB32mi8: return X86::SUB32mi; - case X86::SUB64ri8: return X86::SUB64ri32; - case X86::SUB64mi8: return X86::SUB64mi32; - - // SBB - case X86::SBB16ri8: return X86::SBB16ri; - case X86::SBB16mi8: return X86::SBB16mi; - case X86::SBB32ri8: return X86::SBB32ri; - case X86::SBB32mi8: return X86::SBB32mi; - case X86::SBB64ri8: return X86::SBB64ri32; - case X86::SBB64mi8: return X86::SBB64mi32; - - // CMP - case X86::CMP16ri8: return X86::CMP16ri; - case X86::CMP16mi8: return X86::CMP16mi; - case X86::CMP32ri8: return X86::CMP32ri; - case X86::CMP32mi8: return X86::CMP32mi; - case X86::CMP64ri8: return X86::CMP64ri32; - case X86::CMP64mi8: return X86::CMP64mi32; - - // PUSH - case X86::PUSH32i8: return X86::PUSHi32; - case X86::PUSH16i8: return X86::PUSHi16; - case X86::PUSH64i8: return X86::PUSH64i32; - } + return X86::getRelaxedOpcodeArith(Op); } static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) { @@ -372,7 +293,7 @@ static bool isFirstMacroFusibleInst(const MCInst &Inst, /// - If the instruction has a ESP/EBP base register, use SS. /// - Otherwise use DS. uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const { - assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) && + assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) && "Prefixes can be added only in 32-bit or 64-bit mode."); const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); uint64_t TSFlags = Desc.TSFlags; @@ -413,7 +334,7 @@ uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const { if (SegmentReg != 0) return X86::getSegmentOverridePrefixForReg(SegmentReg); - if (STI.hasFeature(X86::Mode64Bit)) + if (STI.hasFeature(X86::Is64Bit)) return X86::CS_Encoding; if (MemoryOperand >= 0) { @@ -572,7 +493,7 @@ bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const { return false; // Branches only need to be aligned in 32-bit or 64-bit mode. - if (!(STI.hasFeature(X86::Mode64Bit) || STI.hasFeature(X86::Mode32Bit))) + if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit))) return false; return true; @@ -834,7 +755,7 @@ bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, void X86AsmBackend::relaxInstruction(MCInst &Inst, const MCSubtargetInfo &STI) const { // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. - bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit]; + bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit]; unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode); if (RelaxedOp == Inst.getOpcode()) { @@ -853,7 +774,7 @@ void X86AsmBackend::relaxInstruction(MCInst &Inst, static bool isFullyRelaxed(const MCRelaxableFragment &RF) { auto &Inst = RF.getInst(); auto &STI = *RF.getSubtargetInfo(); - bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit]; + bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit]; return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode(); } @@ -1077,9 +998,9 @@ void X86AsmBackend::finishLayout(MCAssembler const &Asm, } unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const { - if (STI.hasFeature(X86::Mode16Bit)) + if (STI.hasFeature(X86::Is16Bit)) return 4; - if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) + if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit)) return 1; if (STI.getFeatureBits()[X86::TuningFast7ByteNOP]) return 7; @@ -1134,7 +1055,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, }; const char(*Nops)[11] = - STI->getFeatureBits()[X86::Mode16Bit] ? Nops16Bit : Nops32Bit; + STI->getFeatureBits()[X86::Is16Bit] ? Nops16Bit : Nops32Bit; uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI); @@ -1449,7 +1370,6 @@ public: unsigned InstrOffset = 0; unsigned StackAdjust = 0; unsigned StackSize = 0; - unsigned NumDefCFAOffsets = 0; int MinAbsOffset = std::numeric_limits::max(); for (const MCCFIInstruction &Inst : Instrs) { @@ -1457,7 +1377,7 @@ public: default: // Any other CFI directives indicate a frame that we aren't prepared // to represent via compact unwind, so just bail out. - return 0; + return CU::UNWIND_MODE_DWARF; case MCCFIInstruction::OpDefCfaRegister: { // Defines a frame pointer. E.g. // @@ -1471,7 +1391,7 @@ public: // generate a compact unwinding representation, so bail out. if (*MRI.getLLVMRegNum(Inst.getRegister(), true) != (Is64Bit ? X86::RBP : X86::EBP)) - return 0; + return CU::UNWIND_MODE_DWARF; // Reset the counts. memset(SavedRegs, 0, sizeof(SavedRegs)); @@ -1497,7 +1417,6 @@ public: // .cfi_def_cfa_offset 80 // StackSize = Inst.getOffset() / StackDivide; - ++NumDefCFAOffsets; break; } case MCCFIInstruction::OpOffset: { diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp index 167580ec1ed0..e78e98cfc09e 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp @@ -18,10 +18,11 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Casting.h" -#include +#include "llvm/Support/raw_ostream.h" #include +#include using namespace llvm; @@ -349,7 +350,8 @@ void X86InstPrinterCommon::printOptionalSegReg(const MCInst *MI, unsigned OpNo, } } -void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O) { +void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O, + const MCSubtargetInfo &STI) { const MCInstrDesc &Desc = MII.get(MI->getOpcode()); uint64_t TSFlags = Desc.TSFlags; unsigned Flags = MI->getFlags(); @@ -379,6 +381,20 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O) { O << "\t{disp8}"; else if (Flags & X86::IP_USE_DISP32) O << "\t{disp32}"; + + // Determine where the memory operand starts, if present + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); + if (MemoryOperand != -1) + MemoryOperand += X86II::getOperandBias(Desc); + + // Address-Size override prefix + if (Flags & X86::IP_HAS_AD_SIZE && + !X86_MC::needsAddressSizeOverride(*MI, STI, MemoryOperand, TSFlags)) { + if (STI.hasFeature(X86::Is16Bit) || STI.hasFeature(X86::Is64Bit)) + O << "\taddr32\t"; + else if (STI.hasFeature(X86::Is32Bit)) + O << "\taddr16\t"; + } } void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h index fd82bdcd1a23..0cb5bf014b20 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h @@ -33,7 +33,8 @@ public: raw_ostream &O); protected: - void printInstFlags(const MCInst *MI, raw_ostream &O); + void printInstFlags(const MCInst *MI, raw_ostream &O, + const MCSubtargetInfo &STI); void printOptionalSegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printVKPair(const MCInst *MI, unsigned OpNo, raw_ostream &OS); }; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp new file mode 100644 index 000000000000..901082ce6cf3 --- /dev/null +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp @@ -0,0 +1,165 @@ +//===- X86InstrRelaxTables.cpp - X86 Instruction Relaxation Tables -*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the X86 instruction relaxation tables. +// +//===----------------------------------------------------------------------===// + +#include "X86InstrRelaxTables.h" +#include "X86InstrInfo.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; + +// These tables are sorted by their ShortOp value allowing them to be binary +// searched at runtime without the need for additional storage. The enum values +// are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which +// makes sorting these tables a simple matter of alphabetizing the table. +static const X86InstrRelaxTableEntry InstrRelaxTable[] = { + // ADC + { X86::ADC16mi8, X86::ADC16mi }, + { X86::ADC16ri8, X86::ADC16ri }, + { X86::ADC32mi8, X86::ADC32mi }, + { X86::ADC32ri8, X86::ADC32ri }, + { X86::ADC64mi8, X86::ADC64mi32 }, + { X86::ADC64ri8, X86::ADC64ri32 }, + // ADD + { X86::ADD16mi8, X86::ADD16mi }, + { X86::ADD16ri8, X86::ADD16ri }, + { X86::ADD32mi8, X86::ADD32mi }, + { X86::ADD32ri8, X86::ADD32ri }, + { X86::ADD64mi8, X86::ADD64mi32 }, + { X86::ADD64ri8, X86::ADD64ri32 }, + // AND + { X86::AND16mi8, X86::AND16mi }, + { X86::AND16ri8, X86::AND16ri }, + { X86::AND32mi8, X86::AND32mi }, + { X86::AND32ri8, X86::AND32ri }, + { X86::AND64mi8, X86::AND64mi32 }, + { X86::AND64ri8, X86::AND64ri32 }, + // CMP + { X86::CMP16mi8, X86::CMP16mi }, + { X86::CMP16ri8, X86::CMP16ri }, + { X86::CMP32mi8, X86::CMP32mi }, + { X86::CMP32ri8, X86::CMP32ri }, + { X86::CMP64mi8, X86::CMP64mi32 }, + { X86::CMP64ri8, X86::CMP64ri32 }, + // IMUL + { X86::IMUL16rmi8, X86::IMUL16rmi }, + { X86::IMUL16rri8, X86::IMUL16rri }, + { X86::IMUL32rmi8, X86::IMUL32rmi }, + { X86::IMUL32rri8, X86::IMUL32rri }, + { X86::IMUL64rmi8, X86::IMUL64rmi32 }, + { X86::IMUL64rri8, X86::IMUL64rri32 }, + // OR + { X86::OR16mi8, X86::OR16mi }, + { X86::OR16ri8, X86::OR16ri }, + { X86::OR32mi8, X86::OR32mi }, + { X86::OR32ri8, X86::OR32ri }, + { X86::OR64mi8, X86::OR64mi32 }, + { X86::OR64ri8, X86::OR64ri32 }, + // PUSH + { X86::PUSH16i8, X86::PUSHi16 }, + { X86::PUSH32i8, X86::PUSHi32 }, + { X86::PUSH64i8, X86::PUSH64i32 }, + // SBB + { X86::SBB16mi8, X86::SBB16mi }, + { X86::SBB16ri8, X86::SBB16ri }, + { X86::SBB32mi8, X86::SBB32mi }, + { X86::SBB32ri8, X86::SBB32ri }, + { X86::SBB64mi8, X86::SBB64mi32 }, + { X86::SBB64ri8, X86::SBB64ri32 }, + // SUB + { X86::SUB16mi8, X86::SUB16mi }, + { X86::SUB16ri8, X86::SUB16ri }, + { X86::SUB32mi8, X86::SUB32mi }, + { X86::SUB32ri8, X86::SUB32ri }, + { X86::SUB64mi8, X86::SUB64mi32 }, + { X86::SUB64ri8, X86::SUB64ri32 }, + // XOR + { X86::XOR16mi8, X86::XOR16mi }, + { X86::XOR16ri8, X86::XOR16ri }, + { X86::XOR32mi8, X86::XOR32mi }, + { X86::XOR32ri8, X86::XOR32ri }, + { X86::XOR64mi8, X86::XOR64mi32 }, + { X86::XOR64ri8, X86::XOR64ri32 }, +}; + +static const X86InstrRelaxTableEntry * +lookupRelaxTableImpl(ArrayRef Table, + unsigned ShortOp) { +#ifndef NDEBUG + // Make sure the tables are sorted. + static std::atomic RelaxTableChecked(false); + if (!RelaxTableChecked.load(std::memory_order_relaxed)) { + assert(llvm::is_sorted(InstrRelaxTable) && + std::adjacent_find(std::begin(InstrRelaxTable), + std::end(InstrRelaxTable)) == + std::end(InstrRelaxTable) && + "InstrRelaxTable is not sorted and unique!"); + RelaxTableChecked.store(true, std::memory_order_relaxed); + } +#endif + + const X86InstrRelaxTableEntry *Data = llvm::lower_bound(Table, ShortOp); + if (Data != Table.end() && Data->KeyOp == ShortOp) + return Data; + return nullptr; +} + +const X86InstrRelaxTableEntry *llvm::lookupRelaxTable(unsigned ShortOp) { + return lookupRelaxTableImpl(InstrRelaxTable, ShortOp); +} + +namespace { + +// This class stores the short form tables. It is instantiated as a +// ManagedStatic to lazily init the short form table. +struct X86ShortFormTable { + // Stores relaxation table entries sorted by relaxed form opcode. + SmallVector Table; + + X86ShortFormTable() { + for (const X86InstrRelaxTableEntry &Entry : InstrRelaxTable) + Table.push_back({Entry.DstOp, Entry.KeyOp}); + + llvm::sort(Table); + + // Now that it's sorted, ensure its unique. + assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() && + "Short form table is not unique!"); + } +}; +} // namespace + +static ManagedStatic ShortTable; + +const X86InstrRelaxTableEntry *llvm::lookupShortTable(unsigned RelaxOp) { + auto &Table = ShortTable->Table; + auto I = llvm::lower_bound(Table, RelaxOp); + if (I != Table.end() && I->KeyOp == RelaxOp) + return &*I; + return nullptr; +} + +namespace llvm { + +/// Get the short instruction opcode for a given relaxed opcode. +unsigned X86::getShortOpcodeArith(unsigned RelaxOp) { + if (const X86InstrRelaxTableEntry *I = lookupShortTable(RelaxOp)) + return I->DstOp; + return RelaxOp; +} + +/// Get the relaxed instruction opcode for a given short opcode. +unsigned X86::getRelaxedOpcodeArith(unsigned ShortOp) { + if (const X86InstrRelaxTableEntry *I = lookupRelaxTable(ShortOp)) + return I->DstOp; + return ShortOp; +} +} // namespace llvm diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.h b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.h new file mode 100644 index 000000000000..0551c1861a58 --- /dev/null +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.h @@ -0,0 +1,54 @@ +//===-- X86InstrRelaxTables.h - X86 Instruction Relaxation Tables -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the interface to query the X86 instruction relaxation +// tables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_X86_X86INSTRRELAXTABLES_H +#define LLVM_LIB_TARGET_X86_X86INSTRRELAXTABLES_H + +#include + +namespace llvm { + +// This struct is used for both the relaxed and short tables. The KeyOp is used +// to determine the sorting order. +struct X86InstrRelaxTableEntry { + uint16_t KeyOp; + uint16_t DstOp; + + bool operator<(const X86InstrRelaxTableEntry &RHS) const { + return KeyOp < RHS.KeyOp; + } + bool operator==(const X86InstrRelaxTableEntry &RHS) const { + return KeyOp == RHS.KeyOp; + } + friend bool operator<(const X86InstrRelaxTableEntry &TE, unsigned Opcode) { + return TE.KeyOp < Opcode; + } +}; + +/// Look up the relaxed form table entry for a given \p ShortOp. +const X86InstrRelaxTableEntry *lookupRelaxTable(unsigned ShortOp); + +/// Look up the short form table entry for a given \p RelaxOp. +const X86InstrRelaxTableEntry *lookupShortTable(unsigned RelaxOp); + +namespace X86 { + +/// Get the short instruction opcode for a given relaxed opcode. +unsigned getShortOpcodeArith(unsigned RelaxOp); + +/// Get the relaxed instruction opcode for a given short opcode. +unsigned getRelaxedOpcodeArith(unsigned ShortOp); +} // namespace X86 +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp index 48c335f9a777..2a2afa925a9c 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp @@ -40,11 +40,11 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { void X86IntelInstPrinter::printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &OS) { - printInstFlags(MI, OS); + printInstFlags(MI, OS, STI); // In 16-bit mode, print data16 as data32. if (MI->getOpcode() == X86::DATA16_PREFIX && - STI.getFeatureBits()[X86::Mode16Bit]) { + STI.getFeatureBits()[X86::Is16Bit]) { OS << "\tdata32"; } else if (!printAliasInstr(MI, Address, OS) && !printVecCompareInstr(MI, OS)) printInstruction(MI, Address, OS); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 4fa8bc64b245..a21bb6da86de 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -24,6 +24,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -155,65 +156,6 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) { return MCFixup::getKindForSize(Size, isPCRel); } -/// \param Op operand # of the memory operand. -/// -/// \returns true if the specified instruction has a 16-bit memory operand. -static bool is16BitMemOperand(const MCInst &MI, unsigned Op, - const MCSubtargetInfo &STI) { - const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg); - const MCOperand &Index = MI.getOperand(Op + X86::AddrIndexReg); - - unsigned BaseReg = Base.getReg(); - unsigned IndexReg = Index.getReg(); - - if (STI.hasFeature(X86::Mode16Bit) && BaseReg == 0 && IndexReg == 0) - return true; - if ((BaseReg != 0 && - X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) || - (IndexReg != 0 && - X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))) - return true; - return false; -} - -/// \param Op operand # of the memory operand. -/// -/// \returns true if the specified instruction has a 32-bit memory operand. -static bool is32BitMemOperand(const MCInst &MI, unsigned Op) { - const MCOperand &BaseReg = MI.getOperand(Op + X86::AddrBaseReg); - const MCOperand &IndexReg = MI.getOperand(Op + X86::AddrIndexReg); - - if ((BaseReg.getReg() != 0 && - X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) || - (IndexReg.getReg() != 0 && - X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg()))) - return true; - if (BaseReg.getReg() == X86::EIP) { - assert(IndexReg.getReg() == 0 && "Invalid eip-based address."); - return true; - } - if (IndexReg.getReg() == X86::EIZ) - return true; - return false; -} - -/// \param Op operand # of the memory operand. -/// -/// \returns true if the specified instruction has a 64-bit memory operand. -#ifndef NDEBUG -static bool is64BitMemOperand(const MCInst &MI, unsigned Op) { - const MCOperand &BaseReg = MI.getOperand(Op + X86::AddrBaseReg); - const MCOperand &IndexReg = MI.getOperand(Op + X86::AddrIndexReg); - - if ((BaseReg.getReg() != 0 && - X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) || - (IndexReg.getReg() != 0 && - X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg()))) - return true; - return false; -} -#endif - enum GlobalOffsetTableExprKind { GOT_None, GOT_Normal, GOT_SymDiff }; /// Check if this expression starts with _GLOBAL_OFFSET_TABLE_ and if it is @@ -391,7 +333,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, // Handle %rip relative addressing. if (BaseReg == X86::RIP || BaseReg == X86::EIP) { // [disp32+rIP] in X86-64 mode - assert(STI.hasFeature(X86::Mode64Bit) && + assert(STI.hasFeature(X86::Is64Bit) && "Rip-relative addressing requires 64-bit mode"); assert(IndexReg.getReg() == 0 && !ForceSIB && "Invalid rip-relative address"); @@ -462,7 +404,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, // 16-bit addressing forms of the ModR/M byte have a different encoding for // the R/M field and are far more limited in which registers can be used. - if (is16BitMemOperand(MI, Op, STI)) { + if (X86_MC::is16BitMemOperand(MI, Op, STI)) { if (BaseReg) { // For 32-bit addressing, the row and column values in Table 2-2 are // basically the same. It's AX/CX/DX/BX/SP/BP/SI/DI in that order, with @@ -540,7 +482,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, BaseRegNo != N86::ESP && // If there is no base register and we're in 64-bit mode, we need a SIB // byte to emit an addr that is just 'disp32' (the non-RIP relative form). - (!STI.hasFeature(X86::Mode64Bit) || BaseReg != 0)) { + (!STI.hasFeature(X86::Is64Bit) || BaseReg != 0)) { if (BaseReg == 0) { // [disp32] in X86-32 mode emitByte(modRMByte(0, RegOpcodeField, 5), OS); @@ -671,75 +613,29 @@ bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI, emitByte(0xF2, OS); // Emit the address size opcode prefix as needed. - bool NeedAddressOverride; - uint64_t AdSize = TSFlags & X86II::AdSizeMask; - if ((STI.hasFeature(X86::Mode16Bit) && AdSize == X86II::AdSize32) || - (STI.hasFeature(X86::Mode32Bit) && AdSize == X86II::AdSize16) || - (STI.hasFeature(X86::Mode64Bit) && AdSize == X86II::AdSize32)) { - NeedAddressOverride = true; - } else if (MemoryOperand < 0) { - NeedAddressOverride = false; - } else if (STI.hasFeature(X86::Mode64Bit)) { - assert(!is16BitMemOperand(MI, MemoryOperand, STI)); - NeedAddressOverride = is32BitMemOperand(MI, MemoryOperand); - } else if (STI.hasFeature(X86::Mode32Bit)) { - assert(!is64BitMemOperand(MI, MemoryOperand)); - NeedAddressOverride = is16BitMemOperand(MI, MemoryOperand, STI); - } else { - assert(STI.hasFeature(X86::Mode16Bit)); - assert(!is64BitMemOperand(MI, MemoryOperand)); - NeedAddressOverride = !is16BitMemOperand(MI, MemoryOperand, STI); - } - - if (NeedAddressOverride) + if (X86_MC::needsAddressSizeOverride(MI, STI, MemoryOperand, TSFlags) || + Flags & X86::IP_HAS_AD_SIZE) emitByte(0x67, OS); - // Encoding type for this instruction. - uint64_t Encoding = TSFlags & X86II::EncodingMask; - bool HasREX = false; - if (Encoding) - emitVEXOpcodePrefix(MemoryOperand, MI, OS); - else - HasREX = emitOpcodePrefix(MemoryOperand, MI, STI, OS); - uint64_t Form = TSFlags & X86II::FormMask; switch (Form) { default: break; case X86II::RawFrmDstSrc: { - unsigned siReg = MI.getOperand(1).getReg(); - assert(((siReg == X86::SI && MI.getOperand(0).getReg() == X86::DI) || - (siReg == X86::ESI && MI.getOperand(0).getReg() == X86::EDI) || - (siReg == X86::RSI && MI.getOperand(0).getReg() == X86::RDI)) && - "SI and DI register sizes do not match"); // Emit segment override opcode prefix as needed (not for %ds). if (MI.getOperand(2).getReg() != X86::DS) emitSegmentOverridePrefix(2, MI, OS); - // Emit AdSize prefix as needed. - if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::ESI) || - (STI.hasFeature(X86::Mode32Bit) && siReg == X86::SI)) - emitByte(0x67, OS); CurOp += 3; // Consume operands. break; } case X86II::RawFrmSrc: { - unsigned siReg = MI.getOperand(0).getReg(); // Emit segment override opcode prefix as needed (not for %ds). if (MI.getOperand(1).getReg() != X86::DS) emitSegmentOverridePrefix(1, MI, OS); - // Emit AdSize prefix as needed. - if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::ESI) || - (STI.hasFeature(X86::Mode32Bit) && siReg == X86::SI)) - emitByte(0x67, OS); CurOp += 2; // Consume operands. break; } case X86II::RawFrmDst: { - unsigned siReg = MI.getOperand(0).getReg(); - // Emit AdSize prefix as needed. - if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::EDI) || - (STI.hasFeature(X86::Mode32Bit) && siReg == X86::DI)) - emitByte(0x67, OS); ++CurOp; // Consume operand. break; } @@ -750,6 +646,15 @@ bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI, } } + // REX prefix is optional, but if used must be immediately before the opcode + // Encoding type for this instruction. + uint64_t Encoding = TSFlags & X86II::EncodingMask; + bool HasREX = false; + if (Encoding) + emitVEXOpcodePrefix(MemoryOperand, MI, OS); + else + HasREX = emitOpcodePrefix(MemoryOperand, MI, STI, OS); + return HasREX; } @@ -1347,7 +1252,7 @@ bool X86MCCodeEmitter::emitOpcodePrefix(int MemOperand, const MCInst &MI, // Emit the operand size opcode prefix as needed. if ((TSFlags & X86II::OpSizeMask) == - (STI.hasFeature(X86::Mode16Bit) ? X86II::OpSize32 : X86II::OpSize16)) + (STI.hasFeature(X86::Is16Bit) ? X86II::OpSize32 : X86II::OpSize16)) emitByte(0x66, OS); // Emit the LOCK opcode prefix. @@ -1371,9 +1276,9 @@ bool X86MCCodeEmitter::emitOpcodePrefix(int MemOperand, const MCInst &MI, } // Handle REX prefix. - assert((STI.hasFeature(X86::Mode64Bit) || !(TSFlags & X86II::REX_W)) && + assert((STI.hasFeature(X86::Is64Bit) || !(TSFlags & X86II::REX_W)) && "REX.W requires 64bit mode."); - bool HasREX = STI.hasFeature(X86::Mode64Bit) + bool HasREX = STI.hasFeature(X86::Is64Bit) ? emitREXPrefix(MemOperand, MI, STI, OS) : false; @@ -1472,7 +1377,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::RawFrm: emitByte(BaseOpcode + OpcodeOffset, OS); - if (!STI.hasFeature(X86::Mode64Bit) || !isPCRel32Branch(MI, MCII)) + if (!STI.hasFeature(X86::Is64Bit) || !isPCRel32Branch(MI, MCII)) break; const MCOperand &Op = MI.getOperand(CurOp++); @@ -1842,7 +1747,6 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, } MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new X86MCCodeEmitter(MCII, Ctx); } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h b/llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h index 532fecd9951b..cd2baeb1c98e 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h @@ -18,6 +18,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" namespace llvm { diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 8913e405539e..49660883ad83 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -72,6 +72,97 @@ bool X86_MC::hasLockPrefix(const MCInst &MI) { return MI.getFlags() & X86::IP_HAS_LOCK; } +static bool isMemOperand(const MCInst &MI, unsigned Op, unsigned RegClassID) { + const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg); + const MCOperand &Index = MI.getOperand(Op + X86::AddrIndexReg); + const MCRegisterClass &RC = X86MCRegisterClasses[RegClassID]; + + return (Base.isReg() && Base.getReg() != 0 && RC.contains(Base.getReg())) || + (Index.isReg() && Index.getReg() != 0 && RC.contains(Index.getReg())); +} + +bool X86_MC::is16BitMemOperand(const MCInst &MI, unsigned Op, + const MCSubtargetInfo &STI) { + const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg); + const MCOperand &Index = MI.getOperand(Op + X86::AddrIndexReg); + + if (STI.hasFeature(X86::Is16Bit) && Base.isReg() && Base.getReg() == 0 && + Index.isReg() && Index.getReg() == 0) + return true; + return isMemOperand(MI, Op, X86::GR16RegClassID); +} + +bool X86_MC::is32BitMemOperand(const MCInst &MI, unsigned Op) { + const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg); + const MCOperand &Index = MI.getOperand(Op + X86::AddrIndexReg); + if (Base.isReg() && Base.getReg() == X86::EIP) { + assert(Index.isReg() && Index.getReg() == 0 && "Invalid eip-based address"); + return true; + } + if (Index.isReg() && Index.getReg() == X86::EIZ) + return true; + return isMemOperand(MI, Op, X86::GR32RegClassID); +} + +#ifndef NDEBUG +bool X86_MC::is64BitMemOperand(const MCInst &MI, unsigned Op) { + return isMemOperand(MI, Op, X86::GR64RegClassID); +} +#endif + +bool X86_MC::needsAddressSizeOverride(const MCInst &MI, + const MCSubtargetInfo &STI, + int MemoryOperand, uint64_t TSFlags) { + uint64_t AdSize = TSFlags & X86II::AdSizeMask; + bool Is16BitMode = STI.hasFeature(X86::Is16Bit); + bool Is32BitMode = STI.hasFeature(X86::Is32Bit); + bool Is64BitMode = STI.hasFeature(X86::Is64Bit); + if ((Is16BitMode && AdSize == X86II::AdSize32) || + (Is32BitMode && AdSize == X86II::AdSize16) || + (Is64BitMode && AdSize == X86II::AdSize32)) + return true; + uint64_t Form = TSFlags & X86II::FormMask; + switch (Form) { + default: + break; + case X86II::RawFrmDstSrc: { + unsigned siReg = MI.getOperand(1).getReg(); + assert(((siReg == X86::SI && MI.getOperand(0).getReg() == X86::DI) || + (siReg == X86::ESI && MI.getOperand(0).getReg() == X86::EDI) || + (siReg == X86::RSI && MI.getOperand(0).getReg() == X86::RDI)) && + "SI and DI register sizes do not match"); + return (!Is32BitMode && siReg == X86::ESI) || + (Is32BitMode && siReg == X86::SI); + } + case X86II::RawFrmSrc: { + unsigned siReg = MI.getOperand(0).getReg(); + return (!Is32BitMode && siReg == X86::ESI) || + (Is32BitMode && siReg == X86::SI); + } + case X86II::RawFrmDst: { + unsigned siReg = MI.getOperand(0).getReg(); + return (!Is32BitMode && siReg == X86::EDI) || + (Is32BitMode && siReg == X86::DI); + } + } + + // Determine where the memory operand starts, if present. + if (MemoryOperand < 0) + return false; + + if (STI.hasFeature(X86::Is64Bit)) { + assert(!is16BitMemOperand(MI, MemoryOperand, STI)); + return is32BitMemOperand(MI, MemoryOperand); + } + if (STI.hasFeature(X86::Is32Bit)) { + assert(!is64BitMemOperand(MI, MemoryOperand)); + return is16BitMemOperand(MI, MemoryOperand, STI); + } + assert(STI.hasFeature(X86::Is16Bit)); + assert(!is64BitMemOperand(MI, MemoryOperand)); + return !is16BitMemOperand(MI, MemoryOperand, STI); +} + void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) { // FIXME: TableGen these. for (unsigned Reg = X86::NoRegister + 1; Reg < X86::NUM_TARGET_REGS; ++Reg) { diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 35604cd3ec0a..d0530bd4d650 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -63,6 +63,28 @@ void initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI); /// Returns true if this instruction has a LOCK prefix. bool hasLockPrefix(const MCInst &MI); +/// \param Op operand # of the memory operand. +/// +/// \returns true if the specified instruction has a 16-bit memory operand. +bool is16BitMemOperand(const MCInst &MI, unsigned Op, + const MCSubtargetInfo &STI); + +/// \param Op operand # of the memory operand. +/// +/// \returns true if the specified instruction has a 32-bit memory operand. +bool is32BitMemOperand(const MCInst &MI, unsigned Op); + +/// \param Op operand # of the memory operand. +/// +/// \returns true if the specified instruction has a 64-bit memory operand. +#ifndef NDEBUG +bool is64BitMemOperand(const MCInst &MI, unsigned Op); +#endif + +/// Returns true if this instruction needs an Address-Size override prefix. +bool needsAddressSizeOverride(const MCInst &MI, const MCSubtargetInfo &STI, + int MemoryOperand, uint64_t TSFlags); + /// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc. /// do not need to go through TargetRegistry. MCSubtargetInfo *createX86MCSubtargetInfo(const Triple &TT, StringRef CPU, @@ -70,7 +92,6 @@ MCSubtargetInfo *createX86MCSubtargetInfo(const Triple &TT, StringRef CPU, } MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCAsmBackend *createX86_32AsmBackend(const Target &T, @@ -142,4 +163,7 @@ MCRegister getX86SubSuperRegisterOrZero(MCRegister, unsigned, #define GET_SUBTARGETINFO_ENUM #include "X86GenSubtargetInfo.inc" +#define GET_X86_MNEMONIC_TABLES_H +#include "X86GenMnemonicTables.inc" + #endif diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MnemonicTables.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MnemonicTables.cpp new file mode 100644 index 000000000000..39b7f0f4160e --- /dev/null +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MnemonicTables.cpp @@ -0,0 +1,16 @@ +//===-- X86MnemonicTables.cpp - X86 Mnemonic Tables -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides X86 mnemonic tables. +// +//===----------------------------------------------------------------------===// + +#include "X86InstrInfo.h" + +#define GET_X86_MNEMONIC_TABLES_CPP +#include "X86GenMnemonicTables.inc" diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp index c29211246123..36945d1f6746 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -9,6 +9,7 @@ #include "X86MCTargetDesc.h" #include "X86TargetStreamer.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCWin64EH.h" @@ -25,15 +26,15 @@ public: std::unique_ptr OW) : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {} - void EmitWinEHHandlerData(SMLoc Loc) override; - void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) override; - void EmitWindowsUnwindTables() override; - void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) override; + void emitWinEHHandlerData(SMLoc Loc) override; + void emitWindowsUnwindTables(WinEH::FrameInfo *Frame) override; + void emitWindowsUnwindTables() override; + void emitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) override; void finishImpl() override; }; -void X86WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { - MCStreamer::EmitWinEHHandlerData(Loc); +void X86WinCOFFStreamer::emitWinEHHandlerData(SMLoc Loc) { + MCStreamer::emitWinEHHandlerData(Loc); // We have to emit the unwind info now, because this directive // actually switches to the .xdata section. @@ -41,17 +42,17 @@ void X86WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { EHStreamer.EmitUnwindInfo(*this, CurFrame, /* HandlerData = */ true); } -void X86WinCOFFStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) { +void X86WinCOFFStreamer::emitWindowsUnwindTables(WinEH::FrameInfo *Frame) { EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false); } -void X86WinCOFFStreamer::EmitWindowsUnwindTables() { +void X86WinCOFFStreamer::emitWindowsUnwindTables() { if (!getNumWinFrameInfos()) return; EHStreamer.Emit(*this); } -void X86WinCOFFStreamer::EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) { +void X86WinCOFFStreamer::emitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) { X86TargetStreamer *XTS = static_cast(getTargetStreamer()); XTS->emitFPOData(ProcSym, Loc); @@ -59,7 +60,7 @@ void X86WinCOFFStreamer::EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) { void X86WinCOFFStreamer::finishImpl() { emitFrames(nullptr); - EmitWindowsUnwindTables(); + emitWindowsUnwindTables(); MCWinCOFFStreamer::finishImpl(); } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp index bf3f4e990ecc..f2827c568109 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/FormattedStream.h" using namespace llvm; diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 10e1c5d6ed38..7344900f2e31 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -79,6 +79,9 @@ FunctionPass *createX86DynAllocaExpander(); /// Return a pass that config the tile registers. FunctionPass *createX86TileConfigPass(); +/// Return a pass that preconfig the tile registers before fast reg allocation. +FunctionPass *createX86FastPreTileConfigPass(); + /// Return a pass that config the tile registers after fast reg allocation. FunctionPass *createX86FastTileConfigPass(); @@ -175,6 +178,7 @@ void initializeX86PartialReductionPass(PassRegistry &); void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); void initializeX86SpeculativeExecutionSideEffectSuppressionPass(PassRegistry &); void initializeX86PreTileConfigPass(PassRegistry &); +void initializeX86FastPreTileConfigPass(PassRegistry &); void initializeX86FastTileConfigPass(PassRegistry &); void initializeX86TileConfigPass(PassRegistry &); void initializeX86LowerAMXTypeLegacyPassPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 380507308c3d..a5c6b40c493c 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -18,13 +18,13 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// // X86 Subtarget state // - -def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", - "64-bit mode (x86_64)">; -def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true", - "32-bit mode (80386)">; -def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true", - "16-bit mode (i8086)">; +// disregarding specific ABI / programming model +def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true", + "64-bit mode (x86_64)">; +def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true", + "32-bit mode (80386)">; +def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true", + "16-bit mode (i8086)">; //===----------------------------------------------------------------------===// // X86 Subtarget ISA features @@ -34,16 +34,16 @@ def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", "Enable X87 float instructions">; def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", - "Enable NOPL instruction">; + "Enable NOPL instruction (generally pentium pro+)">; -def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", +def FeatureCMOV : SubtargetFeature<"cmov","HasCMOV", "true", "Enable conditional move instructions">; -def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true", - "Support CMPXCHG8B instructions">; +def FeatureCX8 : SubtargetFeature<"cx8", "HasCX8", "true", + "Support CMPXCHG8B instructions">; def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true", - "Enable SSE 4.2 CRC32 instruction">; + "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">; def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", "Support POPCNT instruction">; @@ -98,11 +98,11 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", // feature, because SSE2 can be disabled (e.g. for compiling OS kernels) // without disabling 64-bit mode. Nothing should imply this feature bit. It // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. -def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", +def FeatureX86_64 : SubtargetFeature<"64bit", "HasX86_64", "true", "Support 64-bit instructions">; -def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", - "64-bit with cmpxchg16b", - [FeatureCMPXCHG8B]>; +def FeatureCX16 : SubtargetFeature<"cx16", "HasCX16", "true", + "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)", + [FeatureCX8]>; def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", "Support SSE 4a instructions", [FeatureSSE3]>; @@ -119,7 +119,7 @@ def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", "Support 16-bit floating point conversion instructions", [FeatureAVX]>; -def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F", +def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512", "Enable AVX-512 instructions", [FeatureAVX2, FeatureFMA, FeatureF16C]>; def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", @@ -198,7 +198,7 @@ def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", [FeatureFMA4]>; def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", "HasSSEUnalignedMem", "true", - "Allow unaligned memory operands with SSE instructions">; + "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">; def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", "Enable AES instructions", [FeatureSSE2]>; @@ -228,20 +228,22 @@ def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", "Enable SHA instructions", [FeatureSSE2]>; +// Processor supports CET SHSTK - Control-Flow Enforcement Technology +// using Shadow Stack def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true", "Support CET Shadow-Stack instructions">; def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", "Support PRFCHW instructions">; def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", "Support RDSEED instruction">; -def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true", +def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true", "Support LAHF and SAHF instructions in 64-bit mode">; def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", "Enable MONITORX/MWAITX timer functionality">; def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", "Enable Cache Line Zero">; def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", - "Enable Cache Demote">; + "Enable Cache Line Demote">; def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true", "Support ptwrite instruction">; def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true", @@ -285,9 +287,9 @@ def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true", def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", "platform configuration instruction">; def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", - "Support movdiri instruction">; + "Support movdiri instruction (direct store integer)">; def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", - "Support movdir64b instruction">; + "Support movdir64b instruction (direct store 64 bytes)">; // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka // "string operations"). See "REP String Enhancement" in the Intel Software @@ -380,6 +382,17 @@ def FeatureTaggedGlobals "Use an instruction sequence for taking the address of a global " "that allows a memory tag in the upper address bits.">; +// Control codegen mitigation against Straight Line Speculation vulnerability. +def FeatureHardenSlsRet + : SubtargetFeature< + "harden-sls-ret", "HardenSlsRet", "true", + "Harden against straight line speculation across RET instructions.">; + +def FeatureHardenSlsIJmp + : SubtargetFeature< + "harden-sls-ijmp", "HardenSlsIJmp", "true", + "Harden against straight line speculation across indirect JMP instructions.">; + //===----------------------------------------------------------------------===// // X86 Subtarget Tuning features //===----------------------------------------------------------------------===// @@ -388,7 +401,7 @@ def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", "SHLD instruction is slow">; def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", - "PMULLD instruction is slow">; + "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">; def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", "true", @@ -396,27 +409,31 @@ def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", // FIXME: This should not apply to CPUs that do not have SSE. def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", - "IsUAMem16Slow", "true", + "IsUnalignedMem16Slow", "true", "Slow unaligned 16-byte memory access">; def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", - "IsUAMem32Slow", "true", + "IsUnalignedMem32Slow", "true", "Slow unaligned 32-byte memory access">; def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", - "Use LEA for adjusting the stack pointer">; + "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">; +// True if 8-bit divisions are significantly faster than +// 32-bit divisions and should be used when possible. def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb", "HasSlowDivide32", "true", "Use 8-bit divide for positive values less than 256">; +// True if 32-bit divides are significantly faster than +// 64-bit divisions and should be used when possible. def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl", "HasSlowDivide64", "true", "Use 32-bit divide for positive values less than 2^32">; def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", - "Pad short functions">; + "Pad short functions (to prevent a stall when returning too early)">; // On some processors, instructions that implicitly take two memory operands are // slow. In practice, this means that CALL, PUSH, and POP with memory operands @@ -425,15 +442,21 @@ def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", "SlowTwoMemOps", "true", "Two memory operand instructions are slow">; -def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", +// True if the LEA instruction inputs have to be ready at address generation +// (AG) time. +def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true", "LEA instruction needs inputs at AG stage">; def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", "LEA instruction with certain arguments is slow">; +// True if the LEA instruction has all three source operands: base, index, +// and offset or if the LEA instruction uses base and index registers where +// the base is EBP, RBP,or R13 def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", "LEA instruction with 3 ops or certain registers is slow">; +// True if INC and DEC instructions are slow when writing to flags def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", "INC and DEC instructions are slower than ADD and SUB">; @@ -445,6 +468,31 @@ def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", "HasLZCNTFalseDeps", "true", "LZCNT/TZCNT have a false dependency on dest register">; +def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc", + "HasMULCFalseDeps", "true", + "VF[C]MULCPH/SH has a false dependency on dest register">; + +def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm", + "HasPERMFalseDeps", "true", + "VPERMD/Q/PS/PD has a false dependency on dest register">; + +def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range", + "HasRANGEFalseDeps", "true", + "VRANGEPD/PS/SD/SS has a false dependency on dest register">; + +def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant", + "HasGETMANTFalseDeps", "true", + "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a" + " false dependency on dest register">; + +def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq", + "HasMULLQFalseDeps", "true", + "VPMULLQ has a false dependency on dest register">; + +def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking", + "HasSBBDepBreaking", "true", + "SBB with same register has no source dependency">; + // On recent X86 (port bound) processors, its preferable to combine to a single shuffle // using a variable mask over multiple fixed shuffles. def TuningFastVariableCrossLaneShuffle @@ -470,9 +518,14 @@ def TuningInsertVZEROUPPER // vectorized code we should care about the throughput of SQRT operations. // But if the code is scalar that probably means that the code has some kind of // dependency and we should care more about reducing the latency. + +// True if hardware SQRTSS instruction is at least as fast (latency) as +// RSQRTSS followed by a Newton-Raphson iteration. def TuningFastScalarFSQRT : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", "true", "Scalar SQRT is fast (disable Newton-Raphson)">; +// True if hardware SQRTPS/VSQRTPS instructions are at least as fast +// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration. def TuningFastVectorFSQRT : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", "true", "Vector SQRT is fast (disable Newton-Raphson)">; @@ -529,7 +582,7 @@ def TuningMacroFusion // similar to Skylake Server (AVX-512). def TuningFastGather : SubtargetFeature<"fast-gather", "HasFastGather", "true", - "Indicates if gather is reasonably fast">; + "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">; def TuningPrefer128Bit : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", @@ -578,17 +631,13 @@ def TuningUseGLMDivSqrtCosts : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", "Use Goldmont specific floating point div/sqrt costs">; -// Enable use of alias analysis during code generation. -def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", - "Use alias analysis during codegen">; - //===----------------------------------------------------------------------===// // X86 CPU Families // TODO: Remove these - use general tuning features to determine codegen. //===----------------------------------------------------------------------===// // Bonnell -def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">; +def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">; //===----------------------------------------------------------------------===// // Register File Description @@ -632,11 +681,11 @@ include "X86SchedIceLake.td" def ProcessorFeatures { // x86-64 and x86-64-v[234] list X86_64V1Features = [ - FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2, - FeatureFXSR, FeatureNOPL, Feature64Bit + FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2, + FeatureFXSR, FeatureNOPL, FeatureX86_64, ]; list X86_64V2Features = !listconcat(X86_64V1Features, [ - FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureCRC32, FeaturePOPCNT, + FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT, FeatureSSE42 ]); list X86_64V3Features = !listconcat(X86_64V2Features, [ @@ -862,22 +911,27 @@ def ProcessorFeatures { FeatureMOVDIRI, FeatureMOVDIR64B, FeatureUINTR]; - list SPRTuning = ICXTuning; + list SPRAdditionalTuning = [TuningMULCFalseDeps, + TuningPERMFalseDeps, + TuningRANGEFalseDeps, + TuningGETMANTFalseDeps, + TuningMULLQFalseDeps]; + list SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning); list SPRFeatures = !listconcat(ICXFeatures, SPRAdditionalFeatures); // Atom list AtomFeatures = [FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSSE3, FeatureFXSR, FeatureNOPL, - Feature64Bit, - FeatureCMPXCHG16B, + FeatureX86_64, + FeatureCX16, FeatureMOVBE, - FeatureLAHFSAHF]; + FeatureLAHFSAHF64]; list AtomTuning = [ProcIntelAtom, TuningSlowUAMem16, TuningLEAForSP, @@ -968,25 +1022,26 @@ def ProcessorFeatures { FeatureMOVDIRI, FeatureMOVDIR64B, FeatureWAITPKG]; - list ADLTuning = SKLTuning; + list ADLAdditionalTuning = [TuningPERMFalseDeps]; + list ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning); list ADLFeatures = !listconcat(TRMFeatures, ADLAdditionalFeatures); // Knights Landing list KNLFeatures = [FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureCMOV, FeatureMMX, FeatureFXSR, FeatureNOPL, - Feature64Bit, - FeatureCMPXCHG16B, + FeatureX86_64, + FeatureCX16, FeatureCRC32, FeaturePOPCNT, FeaturePCLMUL, FeatureXSAVE, FeatureXSAVEOPT, - FeatureLAHFSAHF, + FeatureLAHFSAHF64, FeatureAES, FeatureRDRAND, FeatureF16C, @@ -1018,41 +1073,43 @@ def ProcessorFeatures { // Barcelona list BarcelonaFeatures = [FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureSSE4A, Feature3DNowA, FeatureFXSR, FeatureNOPL, - FeatureCMPXCHG16B, + FeatureCX16, FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT, - FeatureLAHFSAHF, + FeatureLAHFSAHF64, FeatureCMOV, - Feature64Bit]; + FeatureX86_64]; list BarcelonaTuning = [TuningFastScalarShiftMasks, TuningSlowSHLD, + TuningSBBDepBreaking, TuningInsertVZEROUPPER]; // Bobcat list BtVer1Features = [FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSSE3, FeatureSSE4A, FeatureFXSR, FeatureNOPL, - Feature64Bit, - FeatureCMPXCHG16B, + FeatureX86_64, + FeatureCX16, FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT, - FeatureLAHFSAHF]; + FeatureLAHFSAHF64]; list BtVer1Tuning = [TuningFast15ByteNOP, TuningFastScalarShiftMasks, TuningFastVectorShiftMasks, TuningSlowSHLD, + TuningSBBDepBreaking, TuningInsertVZEROUPPER]; // Jaguar @@ -1072,17 +1129,18 @@ def ProcessorFeatures { TuningFastScalarShiftMasks, TuningFastVectorShiftMasks, TuningFastMOVBE, + TuningSBBDepBreaking, TuningSlowSHLD]; list BtVer2Features = !listconcat(BtVer1Features, BtVer2AdditionalFeatures); // Bulldozer list BdVer1Features = [FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureCMOV, FeatureXOP, - Feature64Bit, - FeatureCMPXCHG16B, + FeatureX86_64, + FeatureCX16, FeatureAES, FeatureCRC32, FeaturePRFCHW, @@ -1094,11 +1152,12 @@ def ProcessorFeatures { FeaturePOPCNT, FeatureXSAVE, FeatureLWP, - FeatureLAHFSAHF]; + FeatureLAHFSAHF64]; list BdVer1Tuning = [TuningSlowSHLD, TuningFast11ByteNOP, TuningFastScalarShiftMasks, TuningBranchFusion, + TuningSBBDepBreaking, TuningInsertVZEROUPPER]; // PileDriver @@ -1140,15 +1199,15 @@ def ProcessorFeatures { FeatureCLFLUSHOPT, FeatureCLZERO, FeatureCMOV, - Feature64Bit, - FeatureCMPXCHG16B, + FeatureX86_64, + FeatureCX16, FeatureCRC32, FeatureF16C, FeatureFMA, FeatureFSGSBase, FeatureFXSR, FeatureNOPL, - FeatureLAHFSAHF, + FeatureLAHFSAHF64, FeatureLZCNT, FeatureMMX, FeatureMOVBE, @@ -1169,9 +1228,13 @@ def ProcessorFeatures { TuningFastBEXTR, TuningFast15ByteNOP, TuningBranchFusion, + TuningFastScalarFSQRT, + TuningFastVectorFSQRT, TuningFastScalarShiftMasks, + TuningFastVariablePerLaneShuffle, TuningFastMOVBE, TuningSlowSHLD, + TuningSBBDepBreaking, TuningInsertVZEROUPPER]; list ZN2AdditionalFeatures = [FeatureCLWB, FeatureRDPID, @@ -1184,11 +1247,9 @@ def ProcessorFeatures { FeaturePKU, FeatureVAES, FeatureVPCLMULQDQ]; - list ZN3AdditionalTuning = - [TuningMacroFusion, - TuningFastVariablePerLaneShuffle]; + list ZN3AdditionalTuning = [TuningMacroFusion]; list ZN3Tuning = - !listconcat(ZNTuning, ZN3AdditionalTuning); + !listconcat(ZN2Tuning, ZN3AdditionalTuning); list ZN3Features = !listconcat(ZN2Features, ZN3AdditionalFeatures); } @@ -1209,39 +1270,43 @@ class ProcModel; def : Proc<"i386", [FeatureX87], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; def : Proc<"i486", [FeatureX87], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"i586", [FeatureX87, FeatureCMPXCHG8B], +def : Proc<"i586", [FeatureX87, FeatureCX8], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"pentium", [FeatureX87, FeatureCMPXCHG8B], +def : Proc<"pentium", [FeatureX87, FeatureCX8], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"pentium-mmx", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX], +def : Proc<"pentium-mmx", [FeatureX87, FeatureCX8, FeatureMMX], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV], +def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, +def : Proc<"pentiumpro", [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV, +def : Proc<"pentium2", [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV, FeatureFXSR, FeatureNOPL], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; foreach P = ["pentium3", "pentium3m"] in { - def : Proc; } @@ -1257,42 +1322,42 @@ foreach P = ["pentium3", "pentium3m"] in { // changes slightly. def : ProcModel<"pentium-m", GenericPostRAModel, - [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2, + [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; foreach P = ["pentium4", "pentium4m"] in { def : ProcModel; } // Intel Quark. -def : Proc<"lakemont", [FeatureCMPXCHG8B], +def : Proc<"lakemont", [FeatureCX8], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; // Intel Core Duo. def : ProcModel<"yonah", SandyBridgeModel, - [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3, + [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, FeatureCMOV], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; // NetBurst. def : ProcModel<"prescott", GenericPostRAModel, - [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3, + [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, FeatureCMOV], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; def : ProcModel<"nocona", GenericPostRAModel, [ FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, - Feature64Bit, - FeatureCMPXCHG16B, + FeatureX86_64, + FeatureCX16, ], [ TuningSlowUAMem16, @@ -1302,15 +1367,15 @@ def : ProcModel<"nocona", GenericPostRAModel, [ // Intel Core 2 Solo/Duo. def : ProcModel<"core2", SandyBridgeModel, [ FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSSE3, FeatureFXSR, FeatureNOPL, - Feature64Bit, - FeatureCMPXCHG16B, - FeatureLAHFSAHF + FeatureX86_64, + FeatureCX16, + FeatureLAHFSAHF64 ], [ TuningMacroFusion, @@ -1319,15 +1384,15 @@ def : ProcModel<"core2", SandyBridgeModel, [ ]>; def : ProcModel<"penryn", SandyBridgeModel, [ FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE41, FeatureFXSR, FeatureNOPL, - Feature64Bit, - FeatureCMPXCHG16B, - FeatureLAHFSAHF + FeatureX86_64, + FeatureCX16, + FeatureLAHFSAHF64 ], [ TuningMacroFusion, @@ -1416,38 +1481,38 @@ def : ProcModel<"alderlake", SkylakeClientModel, // AMD CPUs. -def : Proc<"k6", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX], +def : Proc<"k6", [FeatureX87, FeatureCX8, FeatureMMX], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow], +def : Proc<"k6-2", [FeatureX87, FeatureCX8, Feature3DNow], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow], +def : Proc<"k6-3", [FeatureX87, FeatureCX8, Feature3DNow], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; foreach P = ["athlon", "athlon-tbird"] in { - def : Proc; } foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { - def : Proc; } foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { - def : Proc; + TuningSBBDepBreaking, TuningInsertVZEROUPPER]>; } foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { - def : Proc; + TuningSBBDepBreaking, TuningInsertVZEROUPPER]>; } foreach P = ["amdfam10", "barcelona"] in { @@ -1482,7 +1547,7 @@ def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features, def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features, ProcessorFeatures.ZN3Tuning>; -def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA], +def : Proc<"geode", [FeatureX87, FeatureCX8, Feature3DNowA], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; def : Proc<"winchip-c6", [FeatureX87, FeatureMMX], @@ -1491,7 +1556,7 @@ def : Proc<"winchip2", [FeatureX87, Feature3DNow], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; def : Proc<"c3", [FeatureX87, Feature3DNow], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, +def : Proc<"c3-2", [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE1, FeatureFXSR, FeatureCMOV], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index d48b8e458219..c205395aa084 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -60,8 +61,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { SMShadowTracker.startFunction(MF); CodeEmitter.reset(TM.getTarget().createMCCodeEmitter( - *Subtarget->getInstrInfo(), *Subtarget->getRegisterInfo(), - MF.getContext())); + *Subtarget->getInstrInfo(), MF.getContext())); EmitFPOData = Subtarget->isTargetWin32() && MF.getMMI().getModule()->getCodeViewFlag(); @@ -70,12 +70,12 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (Subtarget->isTargetCOFF()) { bool Local = MF.getFunction().hasLocalLinkage(); - OutStreamer->BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer->EmitCOFFSymbolStorageClass( + OutStreamer->beginCOFFSymbolDef(CurrentFnSym); + OutStreamer->emitCOFFSymbolStorageClass( Local ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL); - OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION - << COFF::SCT_COMPLEX_TYPE_SHIFT); - OutStreamer->EndCOFFSymbolDef(); + OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); + OutStreamer->endCOFFSymbolDef(); } // Emit the rest of the function body. @@ -249,7 +249,7 @@ void X86AsmPrinter::PrintOperand(const MachineInstr *MI, unsigned OpNo, void X86AsmPrinter::PrintModifiedOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O, const char *Modifier) { const MachineOperand &MO = MI->getOperand(OpNo); - if (!Modifier || MO.getType() != MachineOperand::MO_Register) + if (!Modifier || !MO.isReg()) return PrintOperand(MI, OpNo, O); if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT) O << '%'; @@ -336,6 +336,37 @@ void X86AsmPrinter::PrintLeaMemReference(const MachineInstr *MI, unsigned OpNo, } } +static bool isSimpleReturn(const MachineInstr &MI) { + // We exclude all tail calls here which set both isReturn and isCall. + return MI.getDesc().isReturn() && !MI.getDesc().isCall(); +} + +static bool isIndirectBranchOrTailCall(const MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + return MI.getDesc().isIndirectBranch() /*Make below code in a good shape*/ || + Opc == X86::TAILJMPr || Opc == X86::TAILJMPm || + Opc == X86::TAILJMPr64 || Opc == X86::TAILJMPm64 || + Opc == X86::TCRETURNri || Opc == X86::TCRETURNmi || + Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNmi64 || + Opc == X86::TAILJMPr64_REX || Opc == X86::TAILJMPm64_REX; +} + +void X86AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { + if (Subtarget->hardenSlsRet() || Subtarget->hardenSlsIJmp()) { + auto I = MBB.getLastNonDebugInstr(); + if (I != MBB.end()) { + if ((Subtarget->hardenSlsRet() && isSimpleReturn(*I)) || + (Subtarget->hardenSlsIJmp() && isIndirectBranchOrTailCall(*I))) { + MCInst TmpInst; + TmpInst.setOpcode(X86::INT3); + EmitToStreamer(*OutStreamer, TmpInst); + } + } + } + AsmPrinter::emitBasicBlockEnd(MBB); + SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); +} + void X86AsmPrinter::PrintMemReference(const MachineInstr *MI, unsigned OpNo, raw_ostream &O, const char *Modifier) { assert(isMem(*MI, OpNo) && "Invalid memory reference!"); @@ -363,6 +394,12 @@ void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI, BaseReg.getReg() == X86::RIP) HasBaseReg = false; + // If we really just want to print out displacement. + if (Modifier && (DispSpec.isGlobal() || DispSpec.isSymbol()) && + !strcmp(Modifier, "disp-only")) { + HasBaseReg = false; + } + // If this has a segment register, print it. if (SegReg.getReg()) { PrintOperand(MI, OpNo + X86::AddrSegmentReg, O); @@ -606,11 +643,14 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, PrintMemReference(MI, OpNo, O, "H"); } return false; - case 'P': // Don't print @PLT, but do print as memory. + // Print memory only with displacement. The Modifer 'P' is used in inline + // asm to present a call symbol or a global symbol which can not use base + // reg or index reg. + case 'P': if (MI->getInlineAsmDialect() == InlineAsm::AD_Intel) { - PrintIntelMemReference(MI, OpNo, O, "no-rip"); + PrintIntelMemReference(MI, OpNo, O, "disp-only"); } else { - PrintMemReference(MI, OpNo, O, "no-rip"); + PrintMemReference(MI, OpNo, O, "disp-only"); } return false; } @@ -641,7 +681,7 @@ void X86AsmPrinter::emitStartOfAsmFile(Module &M) { MCSection *Cur = OutStreamer->getCurrentSectionOnly(); MCSection *Nt = MMI->getContext().getELFSection( ".note.gnu.property", ELF::SHT_NOTE, ELF::SHF_ALLOC); - OutStreamer->SwitchSection(Nt); + OutStreamer->switchSection(Nt); // Emitting note header. const int WordSize = TT.isArch64Bit() && !TT.isX32() ? 8 : 4; @@ -658,21 +698,21 @@ void X86AsmPrinter::emitStartOfAsmFile(Module &M) { emitAlignment(WordSize == 4 ? Align(4) : Align(8)); // padding OutStreamer->endSection(Nt); - OutStreamer->SwitchSection(Cur); + OutStreamer->switchSection(Cur); } } if (TT.isOSBinFormatMachO()) - OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); + OutStreamer->switchSection(getObjFileLowering().getTextSection()); if (TT.isOSBinFormatCOFF()) { // Emit an absolute @feat.00 symbol. This appears to be some kind of // compiler features bitfield read by link.exe. MCSymbol *S = MMI->getContext().getOrCreateSymbol(StringRef("@feat.00")); - OutStreamer->BeginCOFFSymbolDef(S); - OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); - OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL); - OutStreamer->EndCOFFSymbolDef(); + OutStreamer->beginCOFFSymbolDef(S); + OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); + OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL); + OutStreamer->endCOFFSymbolDef(); int64_t Feat00Flags = 0; if (TT.getArch() == Triple::x86) { @@ -739,7 +779,7 @@ static void emitNonLazyStubs(MachineModuleInfo *MMI, MCStreamer &OutStreamer) { // Output stubs for external and common global variables. Stubs = MMIMacho.GetGVStubList(); if (!Stubs.empty()) { - OutStreamer.SwitchSection(MMI->getContext().getMachOSection( + OutStreamer.switchSection(MMI->getContext().getMachOSection( "__IMPORT", "__pointers", MachO::S_NON_LAZY_SYMBOL_POINTERS, SectionKind::getMetadata())); @@ -747,7 +787,7 @@ static void emitNonLazyStubs(MachineModuleInfo *MMI, MCStreamer &OutStreamer) { emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second); Stubs.clear(); - OutStreamer.AddBlankLine(); + OutStreamer.addBlankLine(); } } @@ -795,6 +835,22 @@ void X86AsmPrinter::emitEndOfAsmFile(Module &M) { emitStackMaps(SM); FM.serializeToFaultMapSection(); } + + // Emit __morestack address if needed for indirect calls. + if (TT.getArch() == Triple::x86_64 && TM.getCodeModel() == CodeModel::Large) { + if (MCSymbol *AddrSymbol = OutContext.lookupSymbol("__morestack_addr")) { + Align Alignment(1); + MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant( + getDataLayout(), SectionKind::getReadOnly(), + /*C=*/nullptr, Alignment); + OutStreamer->switchSection(ReadOnlySection); + OutStreamer->emitLabel(AddrSymbol); + + unsigned PtrSize = MAI->getCodePointerSize(); + OutStreamer->emitSymbolValue(GetExternalSymbolSymbol("__morestack"), + PtrSize); + } + } } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86AsmPrinter.h b/llvm/lib/Target/X86/X86AsmPrinter.h index 94679e6e3d11..d53c26b729ef 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.h +++ b/llvm/lib/Target/X86/X86AsmPrinter.h @@ -131,10 +131,7 @@ public: void emitInstruction(const MachineInstr *MI) override; - void emitBasicBlockEnd(const MachineBasicBlock &MBB) override { - AsmPrinter::emitBasicBlockEnd(MBB); - SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); - } + void emitBasicBlockEnd(const MachineBasicBlock &MBB) override; bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override; diff --git a/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp b/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp index 0899783d5f60..2ecf49382d29 100644 --- a/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp +++ b/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp @@ -35,6 +35,7 @@ #include "X86.h" #include "X86InstrInfo.h" #include "X86Subtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #define AVOIDCALL_DESC "X86 avoid trailing call pass" @@ -69,8 +70,8 @@ INITIALIZE_PASS(X86AvoidTrailingCallPass, AVOIDCALL_NAME, AVOIDCALL_DESC, false, // A real instruction is a non-meta, non-pseudo instruction. Some pseudos // expand to nothing, and some expand to code. This logic conservatively assumes // they might expand to nothing. -static bool isRealInstruction(MachineInstr &MI) { - return !MI.isPseudo() && !MI.isMetaInstruction(); +static bool isCallOrRealInstruction(MachineInstr &MI) { + return MI.isCall() || (!MI.isPseudo() && !MI.isMetaInstruction()); } // Return true if this is a call instruction, but not a tail call. @@ -100,7 +101,7 @@ bool X86AvoidTrailingCallPass::runOnMachineFunction(MachineFunction &MF) { continue; // Find the last real instruction in this block. - auto LastRealInstr = llvm::find_if(reverse(MBB), isRealInstruction); + auto LastRealInstr = llvm::find_if(reverse(MBB), isCallOrRealInstruction); // If the block is empty or the last real instruction is a call instruction, // insert an int3. If there is a call instruction, insert the int3 between diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp index c80a5d5bb332..ded93fdc011c 100644 --- a/llvm/lib/Target/X86/X86CallingConv.cpp +++ b/llvm/lib/Target/X86/X86CallingConv.cpp @@ -299,7 +299,7 @@ static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT, ISD::ArgFlagsTy &ArgFlags, CCState &State) { const MachineFunction &MF = State.getMachineFunction(); size_t ArgCount = State.getMachineFunction().getFunction().arg_size(); - bool Is64Bit = static_cast(MF.getSubtarget()).is64Bit(); + bool Is64Bit = MF.getSubtarget().is64Bit(); unsigned SlotSize = Is64Bit ? 8 : 4; unsigned Offset; if (ArgCount == 1 && ValNo == 0) { diff --git a/llvm/lib/Target/X86/X86CmovConversion.cpp b/llvm/lib/Target/X86/X86CmovConversion.cpp index 96d3d1390a59..f32891552a82 100644 --- a/llvm/lib/Target/X86/X86CmovConversion.cpp +++ b/llvm/lib/Target/X86/X86CmovConversion.cpp @@ -97,6 +97,11 @@ static cl::opt ForceMemOperand( cl::desc("Convert cmovs to branches whenever they have memory operands."), cl::init(true), cl::Hidden); +static cl::opt ForceAll( + "x86-cmov-converter-force-all", + cl::desc("Convert all cmovs to branches."), + cl::init(false), cl::Hidden); + namespace { /// Converts X86 cmov instructions into branches when profitable. @@ -174,11 +179,11 @@ bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) { TSchedModel.init(&STI); // Before we handle the more subtle cases of register-register CMOVs inside - // of potentially hot loops, we want to quickly remove all CMOVs with - // a memory operand. The CMOV will risk a stall waiting for the load to - // complete that speculative execution behind a branch is better suited to - // handle on modern x86 chips. - if (ForceMemOperand) { + // of potentially hot loops, we want to quickly remove all CMOVs (ForceAll) or + // the ones with a memory operand (ForceMemOperand option). The latter CMOV + // will risk a stall waiting for the load to complete that speculative + // execution behind a branch is better suited to handle on modern x86 chips. + if (ForceMemOperand || ForceAll) { CmovGroups AllCmovGroups; SmallVector Blocks; for (auto &MBB : MF) @@ -186,7 +191,8 @@ bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) { if (collectCmovCandidates(Blocks, AllCmovGroups, /*IncludeLoads*/ true)) { for (auto &Group : AllCmovGroups) { // Skip any group that doesn't do at least one memory operand cmov. - if (llvm::none_of(Group, [&](MachineInstr *I) { return I->mayLoad(); })) + if (ForceMemOperand && !ForceAll && + llvm::none_of(Group, [&](MachineInstr *I) { return I->mayLoad(); })) continue; // For CMOV groups which we can rewrite and which contain a memory load, @@ -196,12 +202,15 @@ bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) { convertCmovInstsToBranches(Group); } } + // Early return as ForceAll converts all CmovGroups. + if (ForceAll) + return Changed; } //===--------------------------------------------------------------------===// // Register-operand Conversion Algorithm // --------- - // For each inner most loop + // For each innermost loop // collectCmovCandidates() { // Find all CMOV-group-candidates. // } @@ -230,7 +239,7 @@ bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) { Loops.push_back(Child); for (MachineLoop *CurrLoop : Loops) { - // Optimize only inner most loops. + // Optimize only innermost loops. if (!CurrLoop->getSubLoops().empty()) continue; @@ -520,7 +529,7 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates( //===--------------------------------------------------------------------===// // Step 3: Check for each CMOV-group-candidate if it worth to be optimized. // Worth-Optimize-Group: - // Iff it worths to optimize all CMOV instructions in the group. + // Iff it is worth to optimize all CMOV instructions in the group. // // Worth-Optimize-CMOV: // Predicted branch is faster than CMOV by the difference between depth of diff --git a/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp b/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp index 2ff8ee19561b..29668f4b2761 100644 --- a/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp +++ b/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp @@ -16,6 +16,7 @@ #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/ProfileData/SampleProf.h" @@ -159,7 +160,7 @@ bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) { } // Since we were able to encode, bump the MemOpDiscriminators. ++MemOpDiscriminators[L]; - DI = DI->cloneWithDiscriminator(EncodedDiscriminator.getValue()); + DI = DI->cloneWithDiscriminator(*EncodedDiscriminator); assert(DI && "DI should not be nullptr"); updateDebugInfo(&MI, DI); Changed = true; diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index 9826bf4bf861..9d4338deca35 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -15,6 +15,7 @@ #include "X86.h" #include "X86InstrInfo.h" #include "X86Subtarget.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/STLExtras.h" @@ -86,7 +87,7 @@ protected: public: InstrConverterBase(unsigned SrcOpcode) : SrcOpcode(SrcOpcode) {} - virtual ~InstrConverterBase() {} + virtual ~InstrConverterBase() = default; /// \returns true if \p MI is legal to convert. virtual bool isLegal(const MachineInstr *MI, @@ -374,7 +375,7 @@ class X86DomainReassignment : public MachineFunctionPass { const X86InstrInfo *TII = nullptr; /// All edges that are included in some closure - DenseSet EnclosedEdges; + BitVector EnclosedEdges{8, false}; /// All instructions that are included in some closure. DenseMap EnclosedInstrs; @@ -429,10 +430,10 @@ char X86DomainReassignment::ID = 0; void X86DomainReassignment::visitRegister(Closure &C, Register Reg, RegDomain &Domain, SmallVectorImpl &Worklist) { - if (EnclosedEdges.count(Reg)) + if (!Reg.isVirtual()) return; - if (!Reg.isVirtual()) + if (EnclosedEdges.test(Register::virtReg2Index(Reg))) return; if (!MRI->hasOneDef(Reg)) @@ -550,7 +551,7 @@ void X86DomainReassignment::buildClosure(Closure &C, Register Reg) { // Register already in this closure. if (!C.insertEdge(CurReg)) continue; - EnclosedEdges.insert(Reg); + EnclosedEdges.set(Register::virtReg2Index(Reg)); MachineInstr *DefMI = MRI->getVRegDef(CurReg); encloseInstr(C, DefMI); @@ -742,6 +743,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; EnclosedEdges.clear(); + EnclosedEdges.resize(MRI->getNumVirtRegs()); EnclosedInstrs.clear(); std::vector Closures; @@ -756,7 +758,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { continue; // Register already in closure. - if (EnclosedEdges.count(Reg)) + if (EnclosedEdges.test(Idx)) continue; // Calculate closure starting with Reg. diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 6a047838f0b5..aebeec5a6d27 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -19,6 +19,7 @@ #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved. @@ -552,7 +553,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case X86::PTILELOADDV: case X86::PTILELOADDT1V: { for (unsigned i = 2; i > 0; --i) - MI.RemoveOperand(i); + MI.removeOperand(i); unsigned Opc = Opcode == X86::PTILELOADDV ? X86::TILELOADD : X86::TILELOADDT1; MI.setDesc(TII->get(Opc)); @@ -565,7 +566,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case X86::PTDPBF16PSV: { MI.untieRegOperand(4); for (unsigned i = 3; i > 0; --i) - MI.RemoveOperand(i); + MI.removeOperand(i); unsigned Opc; switch (Opcode) { case X86::PTDPBSSDV: Opc = X86::TDPBSSD; break; @@ -581,13 +582,13 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, } case X86::PTILESTOREDV: { for (int i = 1; i >= 0; --i) - MI.RemoveOperand(i); + MI.removeOperand(i); MI.setDesc(TII->get(X86::TILESTORED)); return true; } case X86::PTILEZEROV: { for (int i = 2; i > 0; --i) // Remove row, col - MI.RemoveOperand(i); + MI.removeOperand(i); MI.setDesc(TII->get(X86::TILEZERO)); return true; } @@ -729,7 +730,7 @@ bool X86ExpandPseudo::ExpandPseudosWhichAffectControlFlow(MachineFunction &MF) { } bool X86ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - STI = &static_cast(MF.getSubtarget()); + STI = &MF.getSubtarget(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); X86FI = MF.getInfo(); diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 1ac998b7ff7e..f2c362eeaa48 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -49,22 +49,11 @@ class X86FastISel final : public FastISel { /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; - /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 - /// floating point ops. - /// When SSE is available, use it for f32 operations. - /// When SSE2 is available, use it for f64 operations. - bool X86ScalarSSEf64; - bool X86ScalarSSEf32; - bool X86ScalarSSEf16; - public: explicit X86FastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FastISel(funcInfo, libInfo) { Subtarget = &funcInfo.MF->getSubtarget(); - X86ScalarSSEf64 = Subtarget->hasSSE2(); - X86ScalarSSEf32 = Subtarget->hasSSE1(); - X86ScalarSSEf16 = Subtarget->hasFP16(); } bool fastSelectInstruction(const Instruction *I) override; @@ -158,9 +147,8 @@ private: /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is /// computed in an SSE register, not on the X87 floating point stack. bool isScalarFPTypeInSSEReg(EVT VT) const { - return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 - (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1 - (VT == MVT::f16 && X86ScalarSSEf16); // f16 is when AVX512FP16 + return (VT == MVT::f64 && Subtarget->hasSSE2()) || + (VT == MVT::f32 && Subtarget->hasSSE1()) || VT == MVT::f16; } bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false); @@ -292,6 +280,11 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, if (I->isTerminator() && llvm::any_of(successors(I), HasPhis)) return false; + // Make sure there are no potentially eflags clobbering constant + // materializations in between. + if (llvm::any_of(I->operands(), [](Value *V) { return isa(V); })) + return false; + CC = TmpCC; return true; } @@ -305,9 +298,9 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { VT = evt.getSimpleVT(); // For now, require SSE/SSE2 for performing floating-point operations, // since x87 requires additional work. - if (VT == MVT::f64 && !X86ScalarSSEf64) + if (VT == MVT::f64 && !Subtarget->hasSSE2()) return false; - if (VT == MVT::f32 && !X86ScalarSSEf32) + if (VT == MVT::f32 && !Subtarget->hasSSE1()) return false; // Similarly, no f80 support yet. if (VT == MVT::f80) @@ -325,6 +318,8 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM, MachineMemOperand *MMO, unsigned &ResultReg, unsigned Alignment) { + bool HasSSE1 = Subtarget->hasSSE1(); + bool HasSSE2 = Subtarget->hasSSE2(); bool HasSSE41 = Subtarget->hasSSE41(); bool HasAVX = Subtarget->hasAVX(); bool HasAVX2 = Subtarget->hasAVX2(); @@ -354,20 +349,16 @@ bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM, Opc = X86::MOV64rm; break; case MVT::f32: - if (X86ScalarSSEf32) - Opc = HasAVX512 ? X86::VMOVSSZrm_alt : - HasAVX ? X86::VMOVSSrm_alt : - X86::MOVSSrm_alt; - else - Opc = X86::LD_Fp32m; + Opc = HasAVX512 ? X86::VMOVSSZrm_alt + : HasAVX ? X86::VMOVSSrm_alt + : HasSSE1 ? X86::MOVSSrm_alt + : X86::LD_Fp32m; break; case MVT::f64: - if (X86ScalarSSEf64) - Opc = HasAVX512 ? X86::VMOVSDZrm_alt : - HasAVX ? X86::VMOVSDrm_alt : - X86::MOVSDrm_alt; - else - Opc = X86::LD_Fp64m; + Opc = HasAVX512 ? X86::VMOVSDZrm_alt + : HasAVX ? X86::VMOVSDrm_alt + : HasSSE2 ? X86::MOVSDrm_alt + : X86::LD_Fp64m; break; case MVT::f80: // No f80 support yet. @@ -521,7 +512,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM, Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr; break; case MVT::f32: - if (X86ScalarSSEf32) { + if (HasSSE1) { if (IsNonTemporal && HasSSE4A) Opc = X86::MOVNTSS; else @@ -531,7 +522,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM, Opc = X86::ST_Fp32m; break; case MVT::f64: - if (X86ScalarSSEf32) { + if (HasSSE2) { if (IsNonTemporal && HasSSE4A) Opc = X86::MOVNTSD; else @@ -1362,8 +1353,8 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) { static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { bool HasAVX512 = Subtarget->hasAVX512(); bool HasAVX = Subtarget->hasAVX(); - bool X86ScalarSSEf32 = Subtarget->hasSSE1(); - bool X86ScalarSSEf64 = Subtarget->hasSSE2(); + bool HasSSE1 = Subtarget->hasSSE1(); + bool HasSSE2 = Subtarget->hasSSE2(); switch (VT.getSimpleVT().SimpleTy) { default: return 0; @@ -1372,15 +1363,15 @@ static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { case MVT::i32: return X86::CMP32rr; case MVT::i64: return X86::CMP64rr; case MVT::f32: - return X86ScalarSSEf32 - ? (HasAVX512 ? X86::VUCOMISSZrr - : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) - : 0; + return HasAVX512 ? X86::VUCOMISSZrr + : HasAVX ? X86::VUCOMISSrr + : HasSSE1 ? X86::UCOMISSrr + : 0; case MVT::f64: - return X86ScalarSSEf64 - ? (HasAVX512 ? X86::VUCOMISDZrr - : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) - : 0; + return HasAVX512 ? X86::VUCOMISDZrr + : HasAVX ? X86::VUCOMISDrr + : HasSSE2 ? X86::UCOMISDrr + : 0; } } @@ -2036,7 +2027,7 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) { /// the select. bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { // Check if the subtarget supports these instructions. - if (!Subtarget->hasCMov()) + if (!Subtarget->canUseCMOV()) return false; // FIXME: Add support for i8. @@ -2289,12 +2280,13 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { default: return false; case MVT::i8: Opc = X86::CMOV_GR8; break; case MVT::i16: Opc = X86::CMOV_GR16; break; - case MVT::f16: Opc = X86::CMOV_FR16X; break; case MVT::i32: Opc = X86::CMOV_GR32; break; - case MVT::f32: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X - : X86::CMOV_FR32; break; - case MVT::f64: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X - : X86::CMOV_FR64; break; + case MVT::f16: + Opc = Subtarget->hasAVX512() ? X86::CMOV_FR16X : X86::CMOV_FR16; break; + case MVT::f32: + Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X : X86::CMOV_FR32; break; + case MVT::f64: + Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X : X86::CMOV_FR64; break; } const Value *Cond = I->getOperand(0); @@ -2495,7 +2487,7 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I, } bool X86FastISel::X86SelectFPExt(const Instruction *I) { - if (X86ScalarSSEf64 && I->getType()->isDoubleTy() && + if (Subtarget->hasSSE2() && I->getType()->isDoubleTy() && I->getOperand(0)->getType()->isFloatTy()) { bool HasAVX512 = Subtarget->hasAVX512(); // fpext from float to double. @@ -2509,7 +2501,7 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) { } bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { - if (X86ScalarSSEf64 && I->getType()->isFloatTy() && + if (Subtarget->hasSSE2() && I->getType()->isFloatTy() && I->getOperand(0)->getType()->isDoubleTy()) { bool HasAVX512 = Subtarget->hasAVX512(); // fptrunc from double to float. @@ -3733,25 +3725,23 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) { // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; + bool HasSSE1 = Subtarget->hasSSE1(); + bool HasSSE2 = Subtarget->hasSSE2(); bool HasAVX = Subtarget->hasAVX(); bool HasAVX512 = Subtarget->hasAVX512(); switch (VT.SimpleTy) { default: return 0; case MVT::f32: - if (X86ScalarSSEf32) - Opc = HasAVX512 ? X86::VMOVSSZrm_alt : - HasAVX ? X86::VMOVSSrm_alt : - X86::MOVSSrm_alt; - else - Opc = X86::LD_Fp32m; + Opc = HasAVX512 ? X86::VMOVSSZrm_alt + : HasAVX ? X86::VMOVSSrm_alt + : HasSSE1 ? X86::MOVSSrm_alt + : X86::LD_Fp32m; break; case MVT::f64: - if (X86ScalarSSEf64) - Opc = HasAVX512 ? X86::VMOVSDZrm_alt : - HasAVX ? X86::VMOVSDrm_alt : - X86::MOVSDrm_alt; - else - Opc = X86::LD_Fp64m; + Opc = HasAVX512 ? X86::VMOVSDZrm_alt + : HasAVX ? X86::VMOVSDrm_alt + : HasSSE2 ? X86::MOVSDrm_alt + : X86::LD_Fp64m; break; case MVT::f80: // No f80 support yet. @@ -3852,11 +3842,11 @@ unsigned X86FastISel::fastMaterializeConstant(const Constant *C) { default: break; case MVT::f32: - if (!X86ScalarSSEf32) + if (!Subtarget->hasSSE1()) Opc = X86::LD_Fp032; break; case MVT::f64: - if (!X86ScalarSSEf64) + if (!Subtarget->hasSSE2()) Opc = X86::LD_Fp064; break; case MVT::f80: @@ -3907,21 +3897,24 @@ unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) { return 0; // Get opcode and regclass for the given zero. + bool HasSSE1 = Subtarget->hasSSE1(); + bool HasSSE2 = Subtarget->hasSSE2(); bool HasAVX512 = Subtarget->hasAVX512(); unsigned Opc = 0; switch (VT.SimpleTy) { default: return 0; + case MVT::f16: + Opc = HasAVX512 ? X86::AVX512_FsFLD0SH : X86::FsFLD0SH; + break; case MVT::f32: - if (X86ScalarSSEf32) - Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS; - else - Opc = X86::LD_Fp032; + Opc = HasAVX512 ? X86::AVX512_FsFLD0SS + : HasSSE1 ? X86::FsFLD0SS + : X86::LD_Fp032; break; case MVT::f64: - if (X86ScalarSSEf64) - Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD; - else - Opc = X86::LD_Fp064; + Opc = HasAVX512 ? X86::AVX512_FsFLD0SD + : HasSSE2 ? X86::FsFLD0SD + : X86::LD_Fp064; break; case MVT::f80: // No f80 support yet. diff --git a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp new file mode 100644 index 000000000000..7e5540022cc8 --- /dev/null +++ b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp @@ -0,0 +1,709 @@ +//===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Pass to preconfig the shape of physical tile registers +/// It inserts ldtilecfg ahead of each group of tile registers. The algorithm +/// walk each instruction of basic block in reverse order. All the tile +/// registers that live out the basic block would be spilled and reloaded +/// before its user. It also check the depenedency of the shape to ensure +/// the shape is defined before ldtilecfg. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86MachineFunctionInfo.h" +#include "X86RegisterInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "fastpretileconfig" + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads, "Number of loads added"); + +namespace { + +class X86FastPreTileConfig : public MachineFunctionPass { + MachineFunction *MF = nullptr; + const X86Subtarget *ST = nullptr; + const TargetInstrInfo *TII = nullptr; + MachineRegisterInfo *MRI = nullptr; + X86MachineFunctionInfo *X86FI = nullptr; + MachineFrameInfo *MFI = nullptr; + const TargetRegisterInfo *TRI = nullptr; + MachineBasicBlock *MBB = nullptr; + int CfgSS = -1; + struct PHIInfo { + Register Row; + Register Col; + Register StackAddr; + }; + DenseMap VisitedPHIs; + + /// Maps virtual regs to the frame index where these values are spilled. + IndexedMap StackSlotForVirtReg; + + /// Has a bit set for tile virtual register for which it was determined + /// that it is alive across blocks. + BitVector MayLiveAcrossBlocks; + + int getStackSpaceFor(Register VirtReg); + void InitializeTileConfigStackSpace(); + bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI); + void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill); + void reload(MachineBasicBlock::iterator UseMI, Register VirtReg, + MachineOperand *RowMO, MachineOperand *ColMO); + void canonicalizePHIs(MachineBasicBlock &MBB); + void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI); + void convertPHIs(MachineBasicBlock &MBB); + bool configBasicBlock(MachineBasicBlock &MBB); + +public: + X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {} + + /// Return the pass name. + StringRef getPassName() const override { + return "Fast Tile Register Preconfigure"; + } + + /// Perform tile register configure. + bool runOnMachineFunction(MachineFunction &MFunc) override; + + static char ID; +}; + +} // end anonymous namespace + +char X86FastPreTileConfig::ID = 0; + +INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE, + "Fast Tile Register Preconfigure", false, false) +INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE, + "Fast Tile Register Preconfigure", false, false) + +static bool dominates(MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator A, + MachineBasicBlock::const_iterator B) { + auto MBBEnd = MBB.end(); + if (B == MBBEnd) + return true; + + MachineBasicBlock::const_iterator I = MBB.begin(); + for (; &*I != A && &*I != B; ++I) + ; + + return &*I == A; +} + +/// This allocates space for the specified virtual register to be held on the +/// stack. +int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) { + // Find the location Reg would belong... + int SS = StackSlotForVirtReg[VirtReg]; + // Already has space allocated? + if (SS != -1) + return SS; + + // Allocate a new stack object for this spill location... + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + unsigned Size = TRI->getSpillSize(RC); + Align Alignment = TRI->getSpillAlign(RC); + int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment); + + // Assign the slot. + StackSlotForVirtReg[VirtReg] = FrameIdx; + return FrameIdx; +} + +/// Returns false if \p VirtReg is known to not live out of the current config. +/// If \p VirtReg live out of the current MBB, it must live out of the current +/// config +bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) { + if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) + return true; + + for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) { + if (UseInst.getParent() != MBB) { + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); + return true; + } + + // The use and def are in the same MBB. If the tile register is + // reconfigured, it is crobbered and we need to spill and reload + // tile register. + if (CfgMI) { + if (dominates(*MBB, *CfgMI, UseInst)) { + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); + return true; + } + } + } + + return false; +} + +void X86FastPreTileConfig::InitializeTileConfigStackSpace() { + MachineBasicBlock &MBB = MF->front(); + MachineInstr *MI = &*MBB.getFirstNonPHI(); + DebugLoc DL; + if (ST->hasAVX512()) { + Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass); + BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS) + .addReg(Zmm); + } else if (ST->hasAVX2()) { + Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass); + BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS) + .addReg(Ymm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS, + 32) + .addReg(Ymm); + } else { + assert(ST->hasSSE2() && "AMX should assume SSE2 enabled"); + unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; + Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass); + BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS) + .addReg(Xmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16) + .addReg(Xmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32) + .addReg(Xmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48) + .addReg(Xmm); + } + // Fill in the palette first. + addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS) + .addImm(1); +} + +/// Insert spill instruction for \p AssignedReg before \p Before. +/// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot. +void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before, + Register VirtReg, bool Kill) { + LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n"); + int FI = getStackSpaceFor(VirtReg); + LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n'); + + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + // Don't need shape information for tile store, becasue it is adjacent to + // the tile def instruction. + TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI); + ++NumStores; + + // TODO: update DBG_VALUEs +} + +/// Insert reload instruction for \p PhysReg before \p Before. +void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI, + Register OrigReg, MachineOperand *RowMO, + MachineOperand *ColMO) { + int FI = getStackSpaceFor(OrigReg); + const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg); + Register TileReg; + // Fold copy to tileload + // BB1: + // spill src to s + // + // BB2: + // t = copy src + // --> + // t = tileload (s) + if (UseMI->isCopy()) + TileReg = UseMI->getOperand(0).getReg(); + else + TileReg = MRI->createVirtualRegister(&RC); + // Can't use TII->loadRegFromStackSlot(), because we need the shape + // information for reload. + // tileloadd (%sp, %idx), %tmm + unsigned Opc = X86::PTILELOADDV; + Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); + // FIXME: MBB is not the parent of UseMI. + MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), + TII->get(X86::MOV64ri), StrideReg) + .addImm(64); + NewMI = addFrameReference( + BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg) + .addReg(RowMO->getReg()) + .addReg(ColMO->getReg()), + FI); + MachineOperand &MO = NewMI->getOperand(5); + MO.setReg(StrideReg); + MO.setIsKill(true); + RowMO->setIsKill(false); + ColMO->setIsKill(false); + // Erase copy instruction after it is folded. + if (UseMI->isCopy()) { + UseMI->eraseFromParent(); + } else { + // Replace the register in the user MI. + for (auto &MO : UseMI->operands()) { + if (MO.isReg() && MO.getReg() == OrigReg) + MO.setReg(TileReg); + } + } + + ++NumLoads; + LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into " + << printReg(TileReg, TRI) << '\n'); +} + +static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) { + // The instruction must have 3 operands: tile def, row, col. + if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo()) + return false; + MachineOperand &MO = MI.getOperand(0); + + if (MO.isReg()) { + Register Reg = MO.getReg(); + // FIXME it may be used after Greedy RA and the physical + // register is not rewritten yet. + if (Reg.isVirtual() && + MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) + return true; + if (Reg >= X86::TMM0 && Reg <= X86::TMM7) + return true; + } + + return false; +} + +static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) { + MachineInstr *MI = MRI->getVRegDef(TileReg); + if (isTileDef(MRI, *MI)) { + MachineOperand *RowMO = &MI->getOperand(1); + MachineOperand *ColMO = &MI->getOperand(2); + return ShapeT(RowMO, ColMO, MRI); + } else if (MI->isCopy()) { + TileReg = MI->getOperand(1).getReg(); + return getShape(MRI, TileReg); + } + + // The def should not be PHI node, because we walk the MBB in reverse post + // order. + assert(MI->isPHI() && "Unexpected PHI when get shape."); + llvm_unreachable("Unexpected MI when get shape."); +} + +// BB0: +// spill t0 to s0 +// BB1: +// spill t1 to s1 +// +// BB2: +// t = phi [t0, bb0] [t1, bb1] +// --> +// row = phi [r0, bb0] [r1, bb1] +// col = phi [c0, bb0] [c1, bb1] +// s = phi [s0, bb0] [s1, bb1] +// t = tileload row, col, s +// The new instruction is inserted at the end of the phi node. The order +// of the original phi node is not ensured. +void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB, + MachineInstr &PHI) { + // 1. Create instruction to get stack slot address of each incoming block. + // 2. Create PHI node for the stack address. + // 3. Create PHI node for shape. If one of the incoming shape is immediate + // use the immediate and delete the PHI node. + // 4. Create tileload instruction from the stack address. + Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); + MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), + TII->get(X86::PHI), StackAddrReg); + Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass); + MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), + TII->get(X86::PHI), RowReg); + Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass); + MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), + TII->get(X86::PHI), ColReg); + // Record the mapping of phi node and its row/column information. + VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg}; + + for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) { + // Get the 2 incoming value of tile register and MBB. + Register InTileReg = PHI.getOperand(I).getReg(); + // Mark it as liveout, so that it will be spilled when visit + // the incoming MBB. Otherwise since phi will be deleted, it + // would miss spill when visit incoming MBB. + MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg)); + MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB(); + + MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg); + MachineBasicBlock::iterator InsertPos; + if (TileDefMI->isPHI()) { + InsertPos = TileDefMI->getParent()->getFirstNonPHI(); + if (VisitedPHIs.count(TileDefMI)) { // circular phi reference + // def t1 + // / \ + // def t2 t3 = phi(t1, t4) <-- + // \ / | + // t4 = phi(t2, t3)------------- + // + // For each (row, column and stack address) append phi incoming value. + // Create r3 = phi(r1, r4) + // Create r4 = phi(r2, r3) + Register InRowReg = VisitedPHIs[TileDefMI].Row; + Register InColReg = VisitedPHIs[TileDefMI].Col; + Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr; + RowPHI.addReg(InRowReg).addMBB(InMBB); + ColPHI.addReg(InColReg).addMBB(InMBB); + AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); + continue; + } else { + // Recursively convert PHI to tileload + convertPHI(TileDefMI->getParent(), *TileDefMI); + // The PHI node is coverted to tileload instruction. Get the stack + // address from tileload operands. + MachineInstr *TileLoad = MRI->getVRegDef(InTileReg); + assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV); + Register InRowReg = TileLoad->getOperand(1).getReg(); + Register InColReg = TileLoad->getOperand(2).getReg(); + Register InStackAddrReg = TileLoad->getOperand(3).getReg(); + RowPHI.addReg(InRowReg).addMBB(InMBB); + ColPHI.addReg(InColReg).addMBB(InMBB); + AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); + } + } else { + InsertPos = TileDefMI->getIterator(); + + // Fill the incoming operand of row/column phi instruction. + ShapeT Shape = getShape(MRI, InTileReg); + Shape.getRow()->setIsKill(false); + Shape.getCol()->setIsKill(false); + RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB); + ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB); + + // The incoming tile register live out of its def BB, it would be spilled. + // Create MI to get the spill stack slot address for the tile register + int FI = getStackSpaceFor(InTileReg); + Register InStackAddrReg = + MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); + addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(), + TII->get(X86::LEA64r), InStackAddrReg) + .addFrameIndex(FI), + 0); + AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); + } + } + + MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); + Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); + BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg) + .addImm(64); + Register TileReg = PHI.getOperand(0).getReg(); + MachineInstr *NewMI = addDirectMem( + BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg) + .addReg(RowReg) + .addReg(ColReg), + StackAddrReg); + MachineOperand &MO = NewMI->getOperand(5); + MO.setReg(StrideReg); + MO.setIsKill(true); + PHI.eraseFromParent(); + VisitedPHIs.erase(&PHI); +} + +static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) { + MachineOperand &MO = MI.getOperand(0); + if (MO.isReg() && MO.getReg().isVirtual() && + MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID) + return true; + return false; +} + +void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) { + SmallVector PHIs; + + for (MachineInstr &MI : MBB) { + if (!MI.isPHI()) + break; + if (!isTileRegDef(MRI, MI)) + continue; + PHIs.push_back(&MI); + } + // Canonicalize the phi node first. One tile phi may depeneds previous + // phi node. For below case, we need convert %t4. + // + // BB0: + // %t3 = phi (t1 BB1, t2 BB0) + // %t4 = phi (t5 BB1, t3 BB0) + // --> + // %t3 = phi (t1 BB1, t2 BB0) + // %t4 = phi (t5 BB1, t2 BB0) + // + while (!PHIs.empty()) { + MachineInstr *PHI = PHIs.pop_back_val(); + + // Find the operand that is incoming from the same MBB and the def + // is also phi node. + MachineOperand *InMO = nullptr; + MachineInstr *DefMI = nullptr; + for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) { + Register InTileReg = PHI->getOperand(I).getReg(); + MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB(); + DefMI = MRI->getVRegDef(InTileReg); + if (InMBB != &MBB || !DefMI->isPHI()) + continue; + + InMO = &PHI->getOperand(I); + break; + } + // If can't find such operand, do nothing. + if (!InMO) + continue; + + // Current phi node depends on previous phi node. Break the + // dependency. + Register DefTileReg; + for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) { + MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB(); + if (InMBB != &MBB) + continue; + DefTileReg = DefMI->getOperand(I).getReg(); + InMO->setReg(DefTileReg); + break; + } + } +} + +void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) { + SmallVector PHIs; + for (MachineInstr &MI : MBB) { + if (!MI.isPHI()) + break; + if (!isTileRegDef(MRI, MI)) + continue; + PHIs.push_back(&MI); + } + while (!PHIs.empty()) { + MachineInstr *MI = PHIs.pop_back_val(); + VisitedPHIs.clear(); + convertPHI(&MBB, *MI); + } +} + +// PreTileConfig should configure the tile registers based on basic +// block. +bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) { + this->MBB = &MBB; + bool Change = false; + MachineInstr *LastShapeMI = nullptr; + MachineInstr *LastTileCfg = nullptr; + bool HasUnconfigTile = false; + + auto Config = [&](MachineInstr &Before) { + if (CfgSS == -1) + CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(), + ST->getTileConfigAlignment(), false); + LastTileCfg = addFrameReference( + BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS); + LastShapeMI = nullptr; + Change = true; + }; + auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (Reg.isVirtual() && + MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) + return true; + } + return false; + }; + for (MachineInstr &MI : reverse(MBB)) { + // We have transformed phi node before configuring BB. + if (MI.isPHI()) + break; + // Don't collect the shape of used tile, the tile should be defined + // before the tile use. Spill and reload would happen if there is only + // tile use after ldtilecfg, so the shape can be collected from reload. + // Take below code for example. %t would be reloaded before tilestore + // call + // .... + // tilestore %r, %c, %t + // --> + // call + // ldtilecfg + // %t = tileload %r, %c + // tilestore %r, %c, %t + if (HasTileOperand(MRI, MI)) + HasUnconfigTile = true; + // According to AMX ABI, all the tile registers including config register + // are volatile. Caller need to save/restore config register. + if (MI.isCall() && HasUnconfigTile) { + MachineBasicBlock::iterator I; + if (LastShapeMI && dominates(MBB, MI, LastShapeMI)) + I = ++LastShapeMI->getIterator(); + else + I = ++MI.getIterator(); + Config(*I); + HasUnconfigTile = false; + continue; + } + if (!isTileDef(MRI, MI)) + continue; + // + //--------------------------------------------------------------------- + // Don't handle COPY instruction. If the src and dst of the COPY can be + // in the same config in below case, we just check the shape of t0. + // def row0 + // def col0 + // ldtilecfg + // t0 = tielzero(row0, col0) + // t1 = copy t0 + // ... + // If the src and dst of the COPY can NOT be in the same config in below + // case. Reload would be generated befor the copy instruction. + // def row0 + // def col0 + // t0 = tielzero(row0, col0) + // spill t0 + // ... + // def row1 + // def col1 + // ldtilecfg + // t1 = tilezero(row1, col1) + // reload t0 + // t1 = copy t0 + //--------------------------------------------------------------------- + // + // If MI dominate the last shape def instruction, we need insert + // ldtilecfg after LastShapeMI now. The config doesn't include + // current MI. + // def row0 + // def col0 + // tilezero(row0, col0) <- MI + // def row1 + // def col1 + // ldtilecfg <- insert + // tilezero(row1, col1) + if (LastShapeMI && dominates(MBB, MI, LastShapeMI)) + Config(*(++LastShapeMI->getIterator())); + MachineOperand *RowMO = &MI.getOperand(1); + MachineOperand *ColMO = &MI.getOperand(2); + MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg()); + MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg()); + // If the shape is defined in current MBB, check the domination. + // FIXME how about loop? + if (RowMI->getParent() == &MBB) { + if (!LastShapeMI) + LastShapeMI = RowMI; + else if (dominates(MBB, LastShapeMI, RowMI)) + LastShapeMI = RowMI; + } + if (ColMI->getParent() == &MBB) { + if (!LastShapeMI) + LastShapeMI = ColMI; + else if (dominates(MBB, LastShapeMI, ColMI)) + LastShapeMI = ColMI; + } + // If there is user live out of the tilecfg, spill it and reload in + // before the user. + Register TileReg = MI.getOperand(0).getReg(); + if (mayLiveOut(TileReg, LastTileCfg)) + spill(++MI.getIterator(), TileReg, false); + for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) { + if (UseMI.getParent() == &MBB) { + // check user should not across ldtilecfg + if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI)) + continue; + // reload befor UseMI + reload(UseMI.getIterator(), TileReg, RowMO, ColMO); + } else { + // Don't reload for phi instruction, we handle phi reload separately. + // TODO: merge the reload for the same user MBB. + if (!UseMI.isPHI()) + reload(UseMI.getIterator(), TileReg, RowMO, ColMO); + } + } + } + + // Configure tile registers at the head of the MBB + if (HasUnconfigTile) { + MachineInstr *Before; + if (LastShapeMI == nullptr || LastShapeMI->isPHI()) + Before = &*MBB.getFirstNonPHI(); + else + Before = &*(++LastShapeMI->getIterator()); + + Config(*Before); + } + + return Change; +} + +bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) { + MF = &MFunc; + MRI = &MFunc.getRegInfo(); + ST = &MFunc.getSubtarget(); + TII = ST->getInstrInfo(); + X86FI = MFunc.getInfo(); + MFI = &MFunc.getFrameInfo(); + TRI = ST->getRegisterInfo(); + CfgSS = -1; + + unsigned NumVirtRegs = MRI->getNumVirtRegs(); + // Abandon early if there is no tile register to config. + bool HasVirtTileReg = false; + for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) { + Register VirtReg = Register::index2VirtReg(I); + if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) { + HasVirtTileReg = true; + break; + } + } + if (!HasVirtTileReg) + return false; + + StackSlotForVirtReg.resize(NumVirtRegs); + MayLiveAcrossBlocks.clear(); + // We will create register during config. *3 is to make sure + // the virtual register number doesn't exceed the size of + // the bit vector. + MayLiveAcrossBlocks.resize(NumVirtRegs * 3); + bool Change = false; + assert(MRI->isSSA()); + + // Canonicalize the phi node first. + for (MachineBasicBlock &MBB : MFunc) + canonicalizePHIs(MBB); + + // Loop over all of the basic blocks in reverse post order and insert + // ldtilecfg for tile registers. The reserse post order is to facilitate + // PHI node convert. + ReversePostOrderTraversal RPOT(MF); + for (MachineBasicBlock *MBB : RPOT) { + convertPHIs(*MBB); + Change |= configBasicBlock(*MBB); + } + + if (Change) + InitializeTileConfigStackSpace(); + + StackSlotForVirtReg.clear(); + return Change; +} + +FunctionPass *llvm::createX86FastPreTileConfigPass() { + return new X86FastPreTileConfig(); +} diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp index 061fff50bcea..2a20cd13791d 100644 --- a/llvm/lib/Target/X86/X86FastTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp @@ -40,40 +40,25 @@ namespace { class X86FastTileConfig : public MachineFunctionPass { // context MachineFunction *MF = nullptr; - const X86Subtarget *ST = nullptr; - const TargetRegisterInfo *TRI = nullptr; const TargetInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; + const TargetRegisterInfo *TRI = nullptr; X86MachineFunctionInfo *X86FI = nullptr; - MachineInstr *getTileConfigPoint(); - void tileConfig(); + bool configBasicBlock(MachineBasicBlock &MBB); public: X86FastTileConfig() : MachineFunctionPass(ID) {} - bool fastTileConfig(); - bool isTileLoad(MachineInstr &MI); - bool isTileStore(MachineInstr &MI); - bool isAMXInstr(MachineInstr &MI); - - MachineInstr *getKeyAMXInstr(MachineInstr *MI); - void getTileShapesCfg(MachineInstr *MI, - SmallVector &ShapedTiles); - void getShapeCfgInstrs(MachineInstr *MI, - std::map &RowCfgs, - std::map &ColCfgs); - /// Return the pass name. StringRef getPassName() const override { return "Fast Tile Register Configure"; } - void materializeTileCfg(MachineInstr *MI); - - void rewriteTileCfg(SmallVector &ShapedTiles, - std::map &RowCfgs, - std::map &ColCfgs); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } /// Perform register allocation. bool runOnMachineFunction(MachineFunction &MFunc) override; @@ -95,209 +80,107 @@ INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE, INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE, "Fast Tile Register Configure", false, false) -static bool isTilePhysReg(MachineOperand &Op) { - if (!Op.isReg()) +static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) { + // There is no phi instruction after register allocation. + assert(MI.isPHI() == false); + // The instruction must have 3 operands: tile def, row, col. + // It should be AMX pseudo instruction that have shape operand. + if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 || + !MI.isPseudo()) return false; + MachineOperand &MO = MI.getOperand(0); + + if (MO.isReg()) { + Register Reg = MO.getReg(); + // FIXME it may be used after Greedy RA and the physical + // register is not rewritten yet. + if (Reg.isVirtual() && + MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) + return true; + if (Reg >= X86::TMM0 && Reg <= X86::TMM7) + return true; + } - Register Reg = Op.getReg(); - if (Reg >= X86::TMM0 && Reg <= X86::TMM7) - return true; return false; } -static unsigned getTilePhysRegIdx(MachineOperand *Op) { - assert(isTilePhysReg(*Op) && "Tile Operand is invalid"); - return Op->getReg() - X86::TMM0; -} - -static inline void adjustRowCfg(unsigned TIdx, MachineInstr *MI) { - unsigned Offset = 48 + TIdx; - MI->getOperand(3).ChangeToImmediate(Offset); -} - -static inline void adjustColCfg(unsigned TIdx, MachineInstr *MI) { - unsigned Offset = 16 + TIdx * 2; - MI->getOperand(3).ChangeToImmediate(Offset); -} - -bool X86FastTileConfig::isTileLoad(MachineInstr &MI) { - return MI.getOpcode() == X86::PTILELOADDV || - MI.getOpcode() == X86::PTILELOADDT1V; -} -bool X86FastTileConfig::isTileStore(MachineInstr &MI) { - return MI.getOpcode() == X86::PTILESTOREDV; -} -bool X86FastTileConfig::isAMXInstr(MachineInstr &MI) { - // TODO: May need to handle some special nontile amx instrucion. - if (MI.getOpcode() == X86::PLDTILECFGV || MI.isDebugInstr()) - return false; - - return llvm::any_of(MI.operands(), isTilePhysReg); -} - -MachineInstr *X86FastTileConfig::getKeyAMXInstr(MachineInstr *MI) { - auto Cfg = MachineBasicBlock::iterator(MI); - MachineBasicBlock *MBB = MI->getParent(); - MachineInstr *KeyMI = nullptr; - int KeyAMXNum = 0; - - for (auto II = Cfg; II != MBB->end(); II++) { - if (isTileLoad(*II)) { - KeyMI = &*II; +// PreTileConfig should configure the tile registers based on basic +// block. +bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) { + bool Change = false; + SmallVector, 6> ShapeInfos; + for (MachineInstr &MI : reverse(MBB)) { + if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV) continue; + // AMX instructions that define tile register. + if (MI.getOpcode() != X86::PLDTILECFGV) { + MachineOperand &Row = MI.getOperand(1); + MachineOperand &Col = MI.getOperand(2); + unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0; + ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)}); + } else { // PLDTILECFGV + // Rewrite the shape information to memory. Stack slot should have + // been initialized to zero in pre config. + int SS = MI.getOperand(0).getIndex(); // tile config stack slot. + for (auto &ShapeInfo : ShapeInfos) { + DebugLoc DL; + unsigned TMMIdx = ShapeInfo.first; + Register RowReg = ShapeInfo.second.getRow()->getReg(); + Register ColReg = ShapeInfo.second.getCol()->getReg(); + // Here is the data format for the tile config. + // 0 palette + // 1 start_row + // 2-15 reserved, must be zero + // 16-17 tile0.colsb Tile 0 bytes per row. + // 18-19 tile1.colsb Tile 1 bytes per row. + // 20-21 tile2.colsb Tile 2 bytes per row. + // ... (sequence continues) + // 30-31 tile7.colsb Tile 7 bytes per row. + // 32-47 reserved, must be zero + // 48 tile0.rows Tile 0 rows. + // 49 tile1.rows Tile 1 rows. + // 50 tile2.rows Tile 2 rows. + // ... (sequence continues) + // 55 tile7.rows Tile 7 rows. + // 56-63 reserved, must be zero + int RowOffset = 48 + TMMIdx; + int ColOffset = 16 + TMMIdx * 2; + + Register SubRowReg = TRI->getSubReg(RowReg, X86::sub_8bit); + BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), SubRowReg); + MachineInstrBuilder StoreRow = + BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr)); + addFrameReference(StoreRow, SS, RowOffset).addReg(SubRowReg); + + MachineInstrBuilder StoreCol = + BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr)); + addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg); + } + ShapeInfos.clear(); + Change = true; } - - if (isTileStore(*II)) { - assert(KeyMI && "Key AMX Should be found before!"); - break; - } - - if (isAMXInstr(*II)) { - assert((KeyAMXNum == 0) && "Too many Key AMX instruction!"); - KeyAMXNum++; - KeyMI = &*II; - } - } - assert(KeyMI && "There must be an AMX instruction."); - return KeyMI; -} - -// Orderly get the tiles in key amx instruction, uses before defs. -void X86FastTileConfig::getTileShapesCfg( - MachineInstr *CfgMI, SmallVector &ShapedTiles) { - MachineInstr *KeyMI = getKeyAMXInstr(CfgMI); - - SmallVector DefTiles; - for (MachineOperand &MO : KeyMI->operands()) { - if (!isTilePhysReg(MO)) - continue; - if (MO.isDef()) - DefTiles.push_back(&MO); - else - ShapedTiles.push_back(&MO); - } - ShapedTiles.append(DefTiles); -} - -// We pre-config the shapes at position named with "amx.tmm.N.shape.row* and -// amx.shape.N.col*" at pass "Pre AMX Tile Config". -// The 'N' implies the order of tiles in key amx intrinsic. -void X86FastTileConfig::getShapeCfgInstrs( - MachineInstr *MI, std::map &RowCfgs, - std::map &ColCfgs) { - auto Cfg = MachineBasicBlock::iterator(MI); - MachineBasicBlock *MBB = MI->getParent(); - - for (auto II = Cfg; II != MBB->begin(); II--) { - if (isAMXInstr(*II) || II->isTerminator() || II->isCall()) - break; - if (!II->mayStore() || !II->hasOneMemOperand()) - continue; - const Value *MemPtr = II->memoperands()[0]->getValue(); - if (!MemPtr) - continue; - - StringRef Name = MemPtr->getName(); - if (!Name.startswith("amx.tmm.")) - continue; - - // Get the 'N'th tile shape config in key amx instruction. - auto N = Name.find(".shape"); - StringRef STileIdx = Name.slice(8, N); - unsigned Idx; - STileIdx.getAsInteger(10, Idx); - - // And related them with their store instructions. - if (Name.contains("row")) - RowCfgs[Idx] = &*II; - else if (Name.contains("col")) - ColCfgs[Idx] = &*II; - else - llvm_unreachable("Invalid tile shape info!"); } - assert((RowCfgs.size() == ColCfgs.size()) && - "The number of tile row and col must be equal!"); -} - -// Here is the data format for the tile config. -// 0 palette = 1 now. -// 1 start_row = 0 now. -// 2-15 reserved, must be zero -// 16-17 tile0.colsb Tile 0 bytes per row. -// 18-19 tile1.colsb Tile 1 bytes per row. -// 20-21 tile2.colsb Tile 2 bytes per row. -// ... (sequence continues) -// 30-31 tile7.colsb Tile 7 bytes per row. -// 32-47 reserved, must be zero -// 48 tile0.rows Tile 0 rows. -// 49 tile1.rows Tile 1 rows. -// 50 tile2.rows Tile 2 rows. -// ... (sequence continues) -// 55 tile7.rows Tile 7 rows. -// 56-63 reserved, must be zero -void X86FastTileConfig::rewriteTileCfg( - SmallVector &ShapedTiles, - std::map &RowCfgs, - std::map &ColCfgs) { - assert((RowCfgs.size() == ShapedTiles.size()) && - "The number of tile shapes not equal with the number of tiles!"); - // Orderly get the tiles and adjust the shape config. - for (unsigned I = 0, E = ShapedTiles.size(); I < E; I++) { - MachineOperand *MO = ShapedTiles[I]; - unsigned TmmIdx = getTilePhysRegIdx(MO); - if (I == TmmIdx) - continue; - adjustRowCfg(TmmIdx, RowCfgs[I]); - adjustColCfg(TmmIdx, ColCfgs[I]); - } -} - -// We have already preconfig the shapes before fast register allocation at -// X86PreAMXConfig::preWriteTileCfg(). Now, we have done fast register -// allocation, the shapes pre-written before may not rightly corresponding -// to the correct tmm registers, so we need adjust them. -void X86FastTileConfig::materializeTileCfg(MachineInstr *CfgMI) { - SmallVector ShapedTiles; - std::map RowCfgs; - std::map ColCfgs; - - // Orderly keep the tile uses and def in ShapedTiles; - getTileShapesCfg(CfgMI, ShapedTiles); - assert(ShapedTiles.size() && "Not find shapes config!"); - - getShapeCfgInstrs(CfgMI, RowCfgs, ColCfgs); - - rewriteTileCfg(ShapedTiles, RowCfgs, ColCfgs); -} - -bool X86FastTileConfig::fastTileConfig() { - bool Changed = false; - - for (MachineBasicBlock &MBB : *MF) { - SmallVector CFGs; - for (MachineInstr &MI : MBB) - if (MI.getOpcode() == X86::PLDTILECFGV) - CFGs.push_back(&MI); - for (auto *MI : CFGs) - materializeTileCfg(MI); - if (!CFGs.empty()) - Changed = true; - } - if (Changed) + if (Change) X86FI->setHasVirtualTileReg(true); - return Changed; + + return Change; } bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) { MF = &MFunc; MRI = &MFunc.getRegInfo(); - ST = &MFunc.getSubtarget(); + const TargetSubtargetInfo *ST = &MFunc.getSubtarget(); TRI = ST->getRegisterInfo(); TII = MFunc.getSubtarget().getInstrInfo(); X86FI = MFunc.getInfo(); + bool Change = false; + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineBasicBlock &MBB : MFunc) + Change |= configBasicBlock(MBB); - return fastTileConfig(); + return Change; } FunctionPass *llvm::createX86FastTileConfigPass() { diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp index 4730b936ec1f..b01145809ac6 100644 --- a/llvm/lib/Target/X86/X86FixupLEAs.cpp +++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp @@ -229,7 +229,7 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { const X86Subtarget &ST = MF.getSubtarget(); bool IsSlowLEA = ST.slowLEA(); bool IsSlow3OpsLEA = ST.slow3OpsLEA(); - bool LEAUsesAG = ST.LEAusesAG(); + bool LEAUsesAG = ST.leaUsesAG(); bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize(); bool UseLEAForSP = ST.useLeaForSP(); @@ -546,7 +546,6 @@ bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I, if (KilledIndex) KilledIndex->setIsKill(false); - MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1); MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1); MBB.erase(I); MBB.erase(AluI); diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp index 2f0ab4ca9de4..33f5bb365da8 100644 --- a/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -99,17 +99,17 @@ namespace { // but the exact mapping of FP registers to stack slots is fixed later. struct LiveBundle { // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c. - unsigned Mask; + unsigned Mask = 0; // Number of pre-assigned live registers in FixStack. This is 0 when the // stack order has not yet been fixed. - unsigned FixCount; + unsigned FixCount = 0; // Assigned stack order for live-in registers. // FixStack[i] == getStackEntry(i) for all i < FixCount. unsigned char FixStack[8]; - LiveBundle() : Mask(0), FixCount(0) {} + LiveBundle() = default; // Have the live registers been assigned a stack order yet? bool isFixed() const { return !Mask || FixCount; } @@ -866,7 +866,7 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) { if (Opcode != -1) { I->setDesc(TII->get(Opcode)); if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr) - I->RemoveOperand(0); + I->removeOperand(0); MI.dropDebugNumber(); } else { // Insert an explicit pop // If this instruction sets FPSW, which is read in following instruction, @@ -1034,7 +1034,7 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) { STReturns |= 1 << getFPReg(Op); // Remove the operand so that later passes don't see it. - MI.RemoveOperand(i); + MI.removeOperand(i); --i; --e; } @@ -1098,7 +1098,7 @@ void FPS::handleReturn(MachineBasicBlock::iterator &I) { LiveMask |= (1 << getFPReg(Op)); // Remove the operand so that later passes don't see it. - MI.RemoveOperand(i); + MI.removeOperand(i); --i; --e; } @@ -1162,7 +1162,7 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { unsigned DestReg = getFPReg(MI.getOperand(0)); // Change from the pseudo instruction to the concrete instruction. - MI.RemoveOperand(0); // Remove the explicit ST(0) operand + MI.removeOperand(0); // Remove the explicit ST(0) operand MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode()))); MI.addOperand( MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true)); @@ -1210,7 +1210,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { } // Convert from the pseudo instruction to the concrete instruction. - MI.RemoveOperand(NumOps - 1); // Remove explicit ST(0) operand + MI.removeOperand(NumOps - 1); // Remove explicit ST(0) operand MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode()))); MI.addOperand( MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true)); @@ -1263,8 +1263,8 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { } // Change from the pseudo instruction to the concrete instruction. - MI.RemoveOperand(1); // Drop the source operand. - MI.RemoveOperand(0); // Drop the destination operand. + MI.removeOperand(1); // Drop the source operand. + MI.removeOperand(0); // Drop the destination operand. MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode()))); MI.dropDebugNumber(); } @@ -1464,7 +1464,7 @@ void FPS::handleCompareFP(MachineBasicBlock::iterator &I) { // Change from the pseudo instruction to the concrete instruction. MI.getOperand(0).setReg(getSTReg(Op1)); - MI.RemoveOperand(1); + MI.removeOperand(1); MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode()))); MI.dropDebugNumber(); @@ -1489,8 +1489,8 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { // Change the second operand to the stack register that the operand is in. // Change from the pseudo instruction to the concrete instruction. - MI.RemoveOperand(0); - MI.RemoveOperand(1); + MI.removeOperand(0); + MI.removeOperand(1); MI.getOperand(0).setReg(getSTReg(Op1)); MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode()))); MI.dropDebugNumber(); diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 51f2ced321bb..d524090f902e 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "X86FrameLowering.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "X86InstrBuilder.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" @@ -19,6 +20,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -99,7 +101,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { MF.getInfo()->hasPreallocatedCall() || MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() || MFI.hasStackMap() || MFI.hasPatchPoint() || - MFI.hasCopyImplyingStackAdjustment()); + (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment())); } static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) { @@ -435,11 +437,13 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, - const MCCFIInstruction &CFIInst) const { + const MCCFIInstruction &CFIInst, + MachineInstr::MIFlag Flag) const { MachineFunction &MF = *MBB.getParent(); unsigned CFIIndex = MF.addFrameInst(CFIInst); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlag(Flag); } /// Emits Dwarf Info specifying offsets of callee saved registers and @@ -492,6 +496,87 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( } } +void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, + MachineBasicBlock &MBB) const { + const MachineFunction &MF = *MBB.getParent(); + + // Insertion point. + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + + // Fake a debug loc. + DebugLoc DL; + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + + // Zero out FP stack if referenced. Do this outside of the loop below so that + // it's done only once. + const X86Subtarget &ST = MF.getSubtarget(); + for (MCRegister Reg : RegsToZero.set_bits()) { + if (!X86::RFP80RegClass.contains(Reg)) + continue; + + unsigned NumFPRegs = ST.is64Bit() ? 8 : 7; + for (unsigned i = 0; i != NumFPRegs; ++i) + BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0)); + + for (unsigned i = 0; i != NumFPRegs; ++i) + BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0); + break; + } + + // For GPRs, we only care to clear out the 32-bit register. + BitVector GPRsToZero(TRI->getNumRegs()); + for (MCRegister Reg : RegsToZero.set_bits()) + if (TRI->isGeneralPurposeRegister(MF, Reg)) { + GPRsToZero.set(getX86SubSuperRegisterOrZero(Reg, 32)); + RegsToZero.reset(Reg); + } + + for (MCRegister Reg : GPRsToZero.set_bits()) + BuildMI(MBB, MBBI, DL, TII.get(X86::XOR32rr), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + + // Zero out registers. + for (MCRegister Reg : RegsToZero.set_bits()) { + if (ST.hasMMX() && X86::VR64RegClass.contains(Reg)) + // FIXME: Ignore MMX registers? + continue; + + unsigned XorOp; + if (X86::VR128RegClass.contains(Reg)) { + // XMM# + if (!ST.hasSSE1()) + continue; + XorOp = X86::PXORrr; + } else if (X86::VR256RegClass.contains(Reg)) { + // YMM# + if (!ST.hasAVX()) + continue; + XorOp = X86::VPXORrr; + } else if (X86::VR512RegClass.contains(Reg)) { + // ZMM# + if (!ST.hasAVX512()) + continue; + XorOp = X86::VPXORYrr; + } else if (X86::VK1RegClass.contains(Reg) || + X86::VK2RegClass.contains(Reg) || + X86::VK4RegClass.contains(Reg) || + X86::VK8RegClass.contains(Reg) || + X86::VK16RegClass.contains(Reg)) { + if (!ST.hasVLX()) + continue; + XorOp = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr; + } else { + continue; + } + + BuildMI(MBB, MBBI, DL, TII.get(XorOp), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + } +} + void X86FrameLowering::emitStackProbe( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, @@ -1289,6 +1374,9 @@ bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const { return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone); } +/// Return true if we need to use the restricted Windows x64 prologue and +/// epilogue code patterns that can be described with WinCFI (.seh_* +/// directives). bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const { return MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); } @@ -1558,12 +1646,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Define the current CFA rule to use the provided offset. assert(StackSize); BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth)); + MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth), + MachineInstr::FrameSetup); // Change the rule for the FramePtr to be an "offset" rule. unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); - BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createOffset( - nullptr, DwarfFramePtr, 2 * stackGrowth)); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset(nullptr, DwarfFramePtr, + 2 * stackGrowth), + MachineInstr::FrameSetup); } if (NeedsWinCFI) { @@ -1630,7 +1721,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); BuildCFI( MBB, MBBI, DL, - MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); + MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr), + MachineInstr::FrameSetup); } if (NeedsWinFPO) { @@ -1681,7 +1773,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Define the current CFA rule to use the provided offset. assert(StackSize); BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset)); + MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset), + MachineInstr::FrameSetup); StackOffset += stackGrowth; } @@ -1962,7 +2055,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, assert(StackSize); BuildCFI( MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth)); + MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth), + MachineInstr::FrameSetup); } // Emit DWARF info specifying the offsets of the callee-saved registers. @@ -2145,11 +2239,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, unsigned DwarfStackPtr = TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize)); + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), + MachineInstr::FrameDestroy); if (!MBB.succ_empty() && !MBB.isReturnBlock()) { unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); BuildCFI(MBB, AfterPop, DL, - MCCFIInstruction::createRestore(nullptr, DwarfFramePtr)); + MCCFIInstruction::createRestore(nullptr, DwarfFramePtr), + MachineInstr::FrameDestroy); --MBBI; --AfterPop; } @@ -2226,7 +2322,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // Define the current CFA rule to use the provided offset. BuildCFI(MBB, MBBI, DL, MCCFIInstruction::cfiDefCfaOffset( - nullptr, CSSize + TailCallArgReserveSize + SlotSize)); + nullptr, CSSize + TailCallArgReserveSize + SlotSize), + MachineInstr::FrameDestroy); } --MBBI; } @@ -2252,7 +2349,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (Opc == X86::POP32r || Opc == X86::POP64r) { Offset += SlotSize; BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset)); + MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset), + MachineInstr::FrameDestroy); } } } @@ -2830,17 +2928,8 @@ void X86FrameLowering::adjustForSegmentedStacks( // prologue. StackSize = MFI.getStackSize(); - // Do not generate a prologue for leaf functions with a stack of size zero. - // For non-leaf functions we have to allow for the possibility that the - // callis to a non-split function, as in PR37807. This function could also - // take the address of a non-split function. When the linker tries to adjust - // its non-existent prologue, it would fail with an error. Mark the object - // file so that such failures are not errors. See this Go language bug-report - // https://go-review.googlesource.com/c/go/+/148819/ - if (StackSize == 0 && !MFI.hasTailCall()) { - MF.getMMI().setHasNosplitStack(true); + if (!MFI.needsSplitStackProlog()) return; - } MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); @@ -3023,7 +3112,6 @@ void X86FrameLowering::adjustForSegmentedStacks( .addReg(0) .addExternalSymbol("__morestack_addr") .addReg(0); - MF.getMMI().setUsesMorestackAddr(true); } else { if (Is64Bit) BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index 987facbfeae4..9b83fe77d505 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -176,7 +176,8 @@ public: /// Wraps up getting a CFI index and building a MachineInstr for it. void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, const MCCFIInstruction &CFIInst) const; + const DebugLoc &DL, const MCCFIInstruction &CFIInst, + MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const; /// Sets up EBP and optionally ESI based on the incoming EBP value. Only /// needed for 32-bit. Used in funclet prologues and at catchret destinations. @@ -233,6 +234,10 @@ private: const DebugLoc &DL, uint64_t Offset, uint64_t Align) const; + /// Emit target zero call-used regs. + void emitZeroCallUsedRegs(BitVector RegsToZero, + MachineBasicBlock &MBB) const override; + void adjustFrameForMsvcCxxEh(MachineFunction &MF) const; /// Aligns the stack pointer by ANDing it with -MaxAlign. diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 5b90c67deae6..f88037e95d33 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -59,30 +59,27 @@ namespace { enum { RegBase, FrameIndexBase - } BaseType; + } BaseType = RegBase; // This is really a union, discriminated by BaseType! SDValue Base_Reg; - int Base_FrameIndex; + int Base_FrameIndex = 0; - unsigned Scale; + unsigned Scale = 1; SDValue IndexReg; - int32_t Disp; + int32_t Disp = 0; SDValue Segment; - const GlobalValue *GV; - const Constant *CP; - const BlockAddress *BlockAddr; - const char *ES; - MCSymbol *MCSym; - int JT; + const GlobalValue *GV = nullptr; + const Constant *CP = nullptr; + const BlockAddress *BlockAddr = nullptr; + const char *ES = nullptr; + MCSymbol *MCSym = nullptr; + int JT = -1; Align Alignment; // CP alignment. - unsigned char SymbolFlags; // X86II::MO_* + unsigned char SymbolFlags = X86II::MO_NO_FLAG; // X86II::MO_* bool NegateIndex = false; - X86ISelAddressMode() - : BaseType(RegBase), Base_FrameIndex(0), Scale(1), Disp(0), GV(nullptr), - CP(nullptr), BlockAddr(nullptr), ES(nullptr), MCSym(nullptr), JT(-1), - SymbolFlags(X86II::MO_NO_FLAG) {} + X86ISelAddressMode() = default; bool hasSymbolicDisplacement() const { return GV != nullptr || CP != nullptr || ES != nullptr || @@ -446,6 +443,43 @@ namespace { return getI8Imm(InsertIdx ? 0x02 : 0x30, DL); } + SDValue getSBBZero(SDNode *N) { + SDLoc dl(N); + MVT VT = N->getSimpleValueType(0); + + // Create zero. + SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32); + SDValue Zero = + SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0); + if (VT == MVT::i64) { + Zero = SDValue( + CurDAG->getMachineNode( + TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, + CurDAG->getTargetConstant(0, dl, MVT::i64), Zero, + CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)), + 0); + } + + // Copy flags to the EFLAGS register and glue it to next node. + unsigned Opcode = N->getOpcode(); + assert((Opcode == X86ISD::SBB || Opcode == X86ISD::SETCC_CARRY) && + "Unexpected opcode for SBB materialization"); + unsigned FlagOpIndex = Opcode == X86ISD::SBB ? 2 : 1; + SDValue EFLAGS = + CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, + N->getOperand(FlagOpIndex), SDValue()); + + // Create a 64-bit instruction if the result is 64-bits otherwise use the + // 32-bit version. + unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr; + MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; + VTs = CurDAG->getVTList(SBBVT, MVT::i32); + return SDValue( + CurDAG->getMachineNode(Opc, dl, VTs, + {Zero, Zero, EFLAGS, EFLAGS.getValue(1)}), + 0); + } + // Helper to detect unneeded and instructions on shift amounts. Called // from PatFrags in tablegen. bool isUnneededShiftMask(SDNode *N, unsigned Width) const { @@ -476,6 +510,9 @@ namespace { return Subtarget->getInstrInfo(); } + /// Return a condition code of the given SDNode + X86::CondCode getCondFromNode(SDNode *N) const; + /// Address-mode matching performs shift-of-and to and-of-shift /// reassociation in order to expose more scaled addressing /// opportunities. @@ -492,7 +529,7 @@ namespace { unsigned StoreSize = N->getMemoryVT().getStoreSize(); - if (N->getAlignment() < StoreSize) + if (N->getAlign().value() < StoreSize) return false; switch (StoreSize) { @@ -2391,6 +2428,14 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, return false; break; + case ISD::XOR: + // We want to look through a transform in InstCombine that + // turns 'add' with min_signed_val into 'xor', so we can treat this 'xor' + // exactly like an 'add'. + if (isMinSignedConstant(N.getOperand(1)) && !matchAdd(N, AM, Depth)) + return false; + break; + case ISD::AND: { // Perform some heroic transforms on an and of a constant-count shift // with a constant to enable use of the scaled offset field. @@ -2745,10 +2790,10 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N, case X86ISD::SUB: case X86ISD::ADC: case X86ISD::SBB: - /* TODO: These opcodes can be added safely, but we may want to justify - their inclusion for different reasons (better for reg-alloc). case X86ISD::SMUL: case X86ISD::UMUL: + /* TODO: These opcodes can be added safely, but we may want to justify + their inclusion for different reasons (better for reg-alloc). case X86ISD::OR: case X86ISD::XOR: case X86ISD::AND: @@ -2759,10 +2804,9 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N, return false; } }; - // TODO: This could be an 'or' rather than 'and' to make the transform more - // likely to happen. We might want to factor in whether there's a - // load folding opportunity for the math op that disappears with LEA. - if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1))) + // TODO: We might want to factor in whether there's a load folding + // opportunity for the math op that disappears with LEA. + if (isMathWithFlags(N.getOperand(0)) || isMathWithFlags(N.getOperand(1))) Complexity++; } @@ -2891,24 +2935,15 @@ bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const { CR->getSignedMax().slt(1ull << Width); } -static X86::CondCode getCondFromNode(SDNode *N) { +X86::CondCode X86DAGToDAGISel::getCondFromNode(SDNode *N) const { assert(N->isMachineOpcode() && "Unexpected node"); - X86::CondCode CC = X86::COND_INVALID; unsigned Opc = N->getMachineOpcode(); - if (Opc == X86::JCC_1) - CC = static_cast(N->getConstantOperandVal(1)); - else if (Opc == X86::SETCCr) - CC = static_cast(N->getConstantOperandVal(0)); - else if (Opc == X86::SETCCm) - CC = static_cast(N->getConstantOperandVal(5)); - else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr || - Opc == X86::CMOV64rr) - CC = static_cast(N->getConstantOperandVal(2)); - else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm || - Opc == X86::CMOV64rm) - CC = static_cast(N->getConstantOperandVal(6)); - - return CC; + const MCInstrDesc &MCID = getInstrInfo()->get(Opc); + int CondNo = X86::getCondSrcNoFromDesc(MCID); + if (CondNo < 0) + return X86::COND_INVALID; + + return static_cast(N->getConstantOperandVal(CondNo)); } /// Test whether the given X86ISD::CMP node has any users that use a flag @@ -3464,7 +3499,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { const bool AllowExtraUsesByDefault = Subtarget->hasBMI2(); auto checkUses = [AllowExtraUsesByDefault](SDValue Op, unsigned NUses, Optional AllowExtraUses) { - return AllowExtraUses.getValueOr(AllowExtraUsesByDefault) || + return AllowExtraUses.value_or(AllowExtraUsesByDefault) || Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo()); }; auto checkOneUse = [checkUses](SDValue Op, @@ -5478,7 +5513,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { MVT CmpVT = N0.getSimpleValueType(); // Floating point needs special handling if we don't have FCOMI. - if (Subtarget->hasCMov()) + if (Subtarget->canUseCMOV()) break; bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS; @@ -5518,7 +5553,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { // Move AH into flags. // Some 64-bit targets lack SAHF support, but they do support FCOMI. - assert(Subtarget->hasLAHFSAHF() && + assert(Subtarget->canUseLAHFSAHF() && "Target doesn't support SAHF or FCOMI?"); SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue()); Chain = AH; @@ -5567,40 +5602,86 @@ void X86DAGToDAGISel::Select(SDNode *Node) { // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to // use a smaller encoding. // Look past the truncate if CMP is the only use of it. - if (N0.getOpcode() == ISD::AND && - N0.getNode()->hasOneUse() && + if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && N0.getValueType() != MVT::i8) { - ConstantSDNode *C = dyn_cast(N0.getOperand(1)); - if (!C) break; - uint64_t Mask = C->getZExtValue(); + auto *MaskC = dyn_cast(N0.getOperand(1)); + if (!MaskC) + break; + // We may have looked through a truncate so mask off any bits that // shouldn't be part of the compare. + uint64_t Mask = MaskC->getZExtValue(); Mask &= maskTrailingOnes(CmpVT.getScalarSizeInBits()); - // Check if we can replace AND+IMM64 with a shift. This is possible for - // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero - // flag. - if (CmpVT == MVT::i64 && !isInt<32>(Mask) && + // Check if we can replace AND+IMM{32,64} with a shift. This is possible + // for masks like 0xFF000000 or 0x00FFFFFF and if we care only about the + // zero flag. + if (CmpVT == MVT::i64 && !isInt<8>(Mask) && isShiftedMask_64(Mask) && onlyUsesZeroFlag(SDValue(Node, 0))) { - if (isMask_64(~Mask)) { - unsigned TrailingZeros = countTrailingZeros(Mask); - SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64); - SDValue Shift = - SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32, - N0.getOperand(0), Imm), 0); - MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl, - MVT::i32, Shift, Shift); - ReplaceNode(Node, Test); - return; + unsigned ShiftOpcode = ISD::DELETED_NODE; + unsigned ShiftAmt; + unsigned SubRegIdx; + MVT SubRegVT; + unsigned TestOpcode; + unsigned LeadingZeros = countLeadingZeros(Mask); + unsigned TrailingZeros = countTrailingZeros(Mask); + + // With leading/trailing zeros, the transform is profitable if we can + // eliminate a movabsq or shrink a 32-bit immediate to 8-bit without + // incurring any extra register moves. + bool SavesBytes = !isInt<32>(Mask) || N0.getOperand(0).hasOneUse(); + if (LeadingZeros == 0 && SavesBytes) { + // If the mask covers the most significant bit, then we can replace + // TEST+AND with a SHR and check eflags. + // This emits a redundant TEST which is subsequently eliminated. + ShiftOpcode = X86::SHR64ri; + ShiftAmt = TrailingZeros; + SubRegIdx = 0; + TestOpcode = X86::TEST64rr; + } else if (TrailingZeros == 0 && SavesBytes) { + // If the mask covers the least significant bit, then we can replace + // TEST+AND with a SHL and check eflags. + // This emits a redundant TEST which is subsequently eliminated. + ShiftOpcode = X86::SHL64ri; + ShiftAmt = LeadingZeros; + SubRegIdx = 0; + TestOpcode = X86::TEST64rr; + } else if (MaskC->hasOneUse() && !isInt<32>(Mask)) { + // If the shifted mask extends into the high half and is 8/16/32 bits + // wide, then replace it with a SHR and a TEST8rr/TEST16rr/TEST32rr. + unsigned PopCount = 64 - LeadingZeros - TrailingZeros; + if (PopCount == 8) { + ShiftOpcode = X86::SHR64ri; + ShiftAmt = TrailingZeros; + SubRegIdx = X86::sub_8bit; + SubRegVT = MVT::i8; + TestOpcode = X86::TEST8rr; + } else if (PopCount == 16) { + ShiftOpcode = X86::SHR64ri; + ShiftAmt = TrailingZeros; + SubRegIdx = X86::sub_16bit; + SubRegVT = MVT::i16; + TestOpcode = X86::TEST16rr; + } else if (PopCount == 32) { + ShiftOpcode = X86::SHR64ri; + ShiftAmt = TrailingZeros; + SubRegIdx = X86::sub_32bit; + SubRegVT = MVT::i32; + TestOpcode = X86::TEST32rr; + } } - if (isMask_64(Mask)) { - unsigned LeadingZeros = countLeadingZeros(Mask); - SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64); - SDValue Shift = - SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32, - N0.getOperand(0), Imm), 0); - MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl, - MVT::i32, Shift, Shift); + if (ShiftOpcode != ISD::DELETED_NODE) { + SDValue ShiftC = CurDAG->getTargetConstant(ShiftAmt, dl, MVT::i64); + SDValue Shift = SDValue( + CurDAG->getMachineNode(ShiftOpcode, dl, MVT::i64, MVT::i32, + N0.getOperand(0), ShiftC), + 0); + if (SubRegIdx != 0) { + Shift = + CurDAG->getTargetExtractSubreg(SubRegIdx, dl, SubRegVT, Shift); + } + MachineSDNode *Test = + CurDAG->getMachineNode(TestOpcode, dl, MVT::i32, Shift, Shift); ReplaceNode(Node, Test); return; } @@ -5769,21 +5850,28 @@ void X86DAGToDAGISel::Select(SDNode *Node) { break; case X86ISD::SETCC_CARRY: { - // We have to do this manually because tblgen will put the eflags copy in - // the wrong place if we use an extract_subreg in the pattern. MVT VT = Node->getSimpleValueType(0); + SDValue Result; + if (Subtarget->hasSBBDepBreaking()) { + // We have to do this manually because tblgen will put the eflags copy in + // the wrong place if we use an extract_subreg in the pattern. + // Copy flags to the EFLAGS register and glue it to next node. + SDValue EFLAGS = + CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, + Node->getOperand(1), SDValue()); - // Copy flags to the EFLAGS register and glue it to next node. - SDValue EFLAGS = - CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, - Node->getOperand(1), SDValue()); - - // Create a 64-bit instruction if the result is 64-bits otherwise use the - // 32-bit version. - unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r; - MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; - SDValue Result = SDValue( - CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), 0); + // Create a 64-bit instruction if the result is 64-bits otherwise use the + // 32-bit version. + unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r; + MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; + Result = SDValue( + CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), + 0); + } else { + // The target does not recognize sbb with the same reg operand as a + // no-source idiom, so we explicitly zero the input values. + Result = getSBBZero(Node); + } // For less than 32-bits we need to extract from the 32-bit node. if (VT == MVT::i8 || VT == MVT::i16) { @@ -5798,35 +5886,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::SBB: { if (isNullConstant(Node->getOperand(0)) && isNullConstant(Node->getOperand(1))) { - MVT VT = Node->getSimpleValueType(0); - - // Create zero. - SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32); - SDValue Zero = - SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0); - if (VT == MVT::i64) { - Zero = SDValue( - CurDAG->getMachineNode( - TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, - CurDAG->getTargetConstant(0, dl, MVT::i64), Zero, - CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)), - 0); - } - - // Copy flags to the EFLAGS register and glue it to next node. - SDValue EFLAGS = - CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, - Node->getOperand(2), SDValue()); - - // Create a 64-bit instruction if the result is 64-bits otherwise use the - // 32-bit version. - unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr; - MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; - VTs = CurDAG->getVTList(SBBVT, MVT::i32); - SDValue Result = - SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {Zero, Zero, EFLAGS, - EFLAGS.getValue(1)}), - 0); + SDValue Result = getSBBZero(Node); // Replace the flag use. ReplaceUses(SDValue(Node, 1), Result.getValue(1)); @@ -5834,6 +5894,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { // Replace the result use. if (!SDValue(Node, 0).use_empty()) { // For less than 32-bits we need to extract from the 32-bit node. + MVT VT = Node->getSimpleValueType(0); if (VT == MVT::i8 || VT == MVT::i16) { int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit; Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result); @@ -6112,6 +6173,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, case InlineAsm::Constraint_v: // not offsetable ?? case InlineAsm::Constraint_m: // memory case InlineAsm::Constraint_X: + case InlineAsm::Constraint_p: // address if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) return true; break; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 90753b5b4d33..61c1fd25031d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -108,9 +108,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, const X86Subtarget &STI) : TargetLowering(TM), Subtarget(STI) { bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87(); - X86ScalarSSEf64 = Subtarget.hasSSE2(); - X86ScalarSSEf32 = Subtarget.hasSSE1(); - X86ScalarSSEf16 = Subtarget.hasFP16(); MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); // Set up the TargetLowering object. @@ -170,7 +167,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b. // FIXME: Should we be limiting the atomic size on other configs? Default is // 1024. - if (!Subtarget.hasCmpxchg8b()) + if (!Subtarget.canUseCMPXCHG8B()) setMaxAtomicSizeInBitsSupported(32); // Set up the register classes. @@ -200,7 +197,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } // Integer absolute. - if (Subtarget.hasCMov()) { + if (Subtarget.canUseCMOV()) { setOperationAction(ISD::ABS , MVT::i16 , Custom); setOperationAction(ISD::ABS , MVT::i32 , Custom); if (Subtarget.is64Bit()) @@ -314,7 +311,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); // TODO: when we have SSE, these could be more efficient, by using movd/movq. - if (!X86ScalarSSEf64) { + if (!Subtarget.hasSSE2()) { setOperationAction(ISD::BITCAST , MVT::f32 , Expand); setOperationAction(ISD::BITCAST , MVT::i32 , Expand); if (Subtarget.is64Bit()) { @@ -415,14 +412,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f128, Expand); } - setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand); - setTruncStoreAction(MVT::f32, MVT::f16, Expand); - setTruncStoreAction(MVT::f64, MVT::f16, Expand); - setTruncStoreAction(MVT::f80, MVT::f16, Expand); - setTruncStoreAction(MVT::f128, MVT::f16, Expand); + for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand); + setTruncStoreAction(VT, MVT::f16, Expand); + setTruncStoreAction(VT, MVT::bf16, Expand); + + setOperationAction(ISD::BF16_TO_FP, VT, Expand); + setOperationAction(ISD::FP_TO_BF16, VT, Expand); + } setOperationAction(ISD::PARITY, MVT::i8, Custom); setOperationAction(ISD::PARITY, MVT::i16, Custom); @@ -497,7 +495,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, VT, Custom); } - if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow()) + if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); @@ -516,9 +514,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (!Subtarget.is64Bit()) setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); - if (Subtarget.hasCmpxchg16b()) { + if (Subtarget.canUseCMPXCHG16B()) setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); - } // FIXME - use subtarget debug flags if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() && @@ -535,7 +532,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::TRAP, MVT::Other, Legal); setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); - if (Subtarget.getTargetTriple().isPS4CPU()) + if (Subtarget.isTargetPS()) setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand); else setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal); @@ -556,9 +553,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom); setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom); - if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) { - // f32 and f64 use SSE. + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); + + if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { + // f16, f32 and f64 use SSE. // Set up the FP register classes. + addRegisterClass(MVT::f16, Subtarget.hasAVX512() ? &X86::FR16XRegClass + : &X86::FR16RegClass); addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass); addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass @@ -590,11 +591,54 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSINCOS, VT, Expand); } + // Half type will be promoted by default. + setOperationAction(ISD::FABS, MVT::f16, Promote); + setOperationAction(ISD::FNEG, MVT::f16, Promote); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); + setOperationAction(ISD::FADD, MVT::f16, Promote); + setOperationAction(ISD::FSUB, MVT::f16, Promote); + setOperationAction(ISD::FMUL, MVT::f16, Promote); + setOperationAction(ISD::FDIV, MVT::f16, Promote); + setOperationAction(ISD::FREM, MVT::f16, Promote); + setOperationAction(ISD::FMA, MVT::f16, Promote); + setOperationAction(ISD::FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); + setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FSQRT, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FCEIL, MVT::f16, Promote); + setOperationAction(ISD::FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::FRINT, MVT::f16, Promote); + setOperationAction(ISD::BR_CC, MVT::f16, Promote); + setOperationAction(ISD::SETCC, MVT::f16, Promote); + setOperationAction(ISD::SELECT, MVT::f16, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); + setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); + setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FP_ROUND, MVT::f16, LibCall); + setOperationAction(ISD::FP_EXTEND, MVT::f32, LibCall); + setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom); + + setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); + setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); + // Lower this to MOVMSK plus an AND. setOperationAction(ISD::FGETSIGN, MVT::i64, Custom); setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); - } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 && + } else if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1() && (UseX87 || Is64Bit)) { // Use SSE for f32, x87 for f64. // Set up the FP register classes. @@ -664,6 +708,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } else // SSE immediates. addLegalFPImmediate(APFloat(+0.0)); // xorpd } + // Support fp16 0 immediate. + if (isTypeLegal(MVT::f16)) + addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf())); + // Handle constrained floating-point operations of scalar. setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); @@ -673,7 +721,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); @@ -725,7 +772,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal); setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal); setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal); + if (isTypeLegal(MVT::f16)) { + setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom); + } else { + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal); + } // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten // as Custom. setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal); @@ -877,7 +929,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are // split/scalarized right now. - if (VT.getVectorElementType() == MVT::f16) + if (VT.getVectorElementType() == MVT::f16 || + VT.getVectorElementType() == MVT::bf16) setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); } } @@ -949,6 +1002,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MULHU, MVT::v8i16, Legal); setOperationAction(ISD::MULHS, MVT::v8i16, Legal); setOperationAction(ISD::MUL, MVT::v8i16, Legal); + setOperationAction(ISD::AVGCEILU, MVT::v16i8, Legal); + setOperationAction(ISD::AVGCEILU, MVT::v8i16, Legal); setOperationAction(ISD::SMULO, MVT::v16i8, Custom); setOperationAction(ISD::UMULO, MVT::v16i8, Custom); @@ -1067,6 +1122,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STORE, MVT::v4i16, Custom); setOperationAction(ISD::STORE, MVT::v8i8, Custom); + // Add 32-bit vector stores to help vectorization opportunities. + setOperationAction(ISD::STORE, MVT::v2i16, Custom); + setOperationAction(ISD::STORE, MVT::v4i8, Custom); + setOperationAction(ISD::BITCAST, MVT::v2i32, Custom); setOperationAction(ISD::BITCAST, MVT::v4i16, Custom); setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); @@ -1285,13 +1344,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (VT == MVT::v4i64) continue; setOperationAction(ISD::ROTL, VT, Custom); setOperationAction(ISD::ROTR, VT, Custom); + setOperationAction(ISD::FSHL, VT, Custom); + setOperationAction(ISD::FSHR, VT, Custom); } - setOperationAction(ISD::FSHL, MVT::v32i8, Custom); - setOperationAction(ISD::FSHR, MVT::v32i8, Custom); - setOperationAction(ISD::FSHL, MVT::v8i32, Custom); - setOperationAction(ISD::FSHR, MVT::v8i32, Custom); - // These types need custom splitting if their input is a 128-bit vector. setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); @@ -1353,6 +1409,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom); setOperationAction(ISD::MULHU, MVT::v32i8, Custom); setOperationAction(ISD::MULHS, MVT::v32i8, Custom); + setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom); + setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom); setOperationAction(ISD::SMULO, MVT::v32i8, Custom); setOperationAction(ISD::UMULO, MVT::v32i8, Custom); @@ -1446,6 +1504,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } + if (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) { + setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom); + } + // This block controls legalization of the mask vector sizes that are // available with AVX512. 512-bit vectors are in a separate block controlled // by useAVX512Regs. @@ -1652,6 +1717,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom); setOperationAction(ISD::MULHS, MVT::v64i8, Custom); setOperationAction(ISD::MULHU, MVT::v64i8, Custom); + setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom); + setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom); setOperationAction(ISD::SMULO, MVT::v64i8, Custom); setOperationAction(ISD::UMULO, MVT::v64i8, Custom); @@ -1698,6 +1765,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSHL, MVT::v64i8, Custom); setOperationAction(ISD::FSHR, MVT::v64i8, Custom); + setOperationAction(ISD::FSHL, MVT::v32i16, Custom); + setOperationAction(ISD::FSHR, MVT::v32i16, Custom); setOperationAction(ISD::FSHL, MVT::v16i32, Custom); setOperationAction(ISD::FSHR, MVT::v16i32, Custom); @@ -1970,10 +2039,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); - if (isTypeLegal(MVT::f80)) { - setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom); - } setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand); setCondCodeAction(ISD::SETUNE, MVT::f16, Expand); @@ -2059,9 +2124,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::LOAD, MVT::v4f16, Custom); setOperationAction(ISD::STORE, MVT::v4f16, Custom); } - - // Support fp16 0 immediate - addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf())); } if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { @@ -2209,55 +2271,55 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); // We have target-specific dag combine patterns for the following nodes: - setTargetDAGCombine(ISD::VECTOR_SHUFFLE); - setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); - setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); - setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); - setTargetDAGCombine(ISD::CONCAT_VECTORS); - setTargetDAGCombine(ISD::INSERT_SUBVECTOR); - setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR); - setTargetDAGCombine(ISD::BITCAST); - setTargetDAGCombine(ISD::VSELECT); - setTargetDAGCombine(ISD::SELECT); - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::FADD); - setTargetDAGCombine(ISD::FSUB); - setTargetDAGCombine(ISD::FNEG); - setTargetDAGCombine(ISD::FMA); - setTargetDAGCombine(ISD::STRICT_FMA); - setTargetDAGCombine(ISD::FMINNUM); - setTargetDAGCombine(ISD::FMAXNUM); - setTargetDAGCombine(ISD::SUB); - setTargetDAGCombine(ISD::LOAD); - setTargetDAGCombine(ISD::MLOAD); - setTargetDAGCombine(ISD::STORE); - setTargetDAGCombine(ISD::MSTORE); - setTargetDAGCombine(ISD::TRUNCATE); - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::ANY_EXTEND); - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); - setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG); - setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG); - setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG); - setTargetDAGCombine(ISD::SINT_TO_FP); - setTargetDAGCombine(ISD::UINT_TO_FP); - setTargetDAGCombine(ISD::STRICT_SINT_TO_FP); - setTargetDAGCombine(ISD::STRICT_UINT_TO_FP); - setTargetDAGCombine(ISD::SETCC); - setTargetDAGCombine(ISD::MUL); - setTargetDAGCombine(ISD::XOR); - setTargetDAGCombine(ISD::MSCATTER); - setTargetDAGCombine(ISD::MGATHER); - setTargetDAGCombine(ISD::FP16_TO_FP); - setTargetDAGCombine(ISD::FP_EXTEND); - setTargetDAGCombine(ISD::STRICT_FP_EXTEND); - setTargetDAGCombine(ISD::FP_ROUND); + setTargetDAGCombine({ISD::VECTOR_SHUFFLE, + ISD::SCALAR_TO_VECTOR, + ISD::INSERT_VECTOR_ELT, + ISD::EXTRACT_VECTOR_ELT, + ISD::CONCAT_VECTORS, + ISD::INSERT_SUBVECTOR, + ISD::EXTRACT_SUBVECTOR, + ISD::BITCAST, + ISD::VSELECT, + ISD::SELECT, + ISD::SHL, + ISD::SRA, + ISD::SRL, + ISD::OR, + ISD::AND, + ISD::ADD, + ISD::FADD, + ISD::FSUB, + ISD::FNEG, + ISD::FMA, + ISD::STRICT_FMA, + ISD::FMINNUM, + ISD::FMAXNUM, + ISD::SUB, + ISD::LOAD, + ISD::MLOAD, + ISD::STORE, + ISD::MSTORE, + ISD::TRUNCATE, + ISD::ZERO_EXTEND, + ISD::ANY_EXTEND, + ISD::SIGN_EXTEND, + ISD::SIGN_EXTEND_INREG, + ISD::ANY_EXTEND_VECTOR_INREG, + ISD::SIGN_EXTEND_VECTOR_INREG, + ISD::ZERO_EXTEND_VECTOR_INREG, + ISD::SINT_TO_FP, + ISD::UINT_TO_FP, + ISD::STRICT_SINT_TO_FP, + ISD::STRICT_UINT_TO_FP, + ISD::SETCC, + ISD::MUL, + ISD::XOR, + ISD::MSCATTER, + ISD::MGATHER, + ISD::FP16_TO_FP, + ISD::FP_EXTEND, + ISD::STRICT_FP_EXTEND, + ISD::FP_ROUND}); computeRegisterProperties(Subtarget.getRegisterInfo()); @@ -2568,9 +2630,9 @@ EVT X86TargetLowering::getOptimalMemOpType( bool X86TargetLowering::isSafeMemOpType(MVT VT) const { if (VT == MVT::f32) - return X86ScalarSSEf32; + return Subtarget.hasSSE1(); if (VT == MVT::f64) - return X86ScalarSSEf64; + return Subtarget.hasSSE2(); return true; } @@ -3566,10 +3628,15 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, MFI.setObjectSExt(FI, true); } + MaybeAlign Alignment; + if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() && + ValVT != MVT::f80) + Alignment = MaybeAlign(4); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getLoad( ValVT, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), + Alignment); return ExtendedInMem ? (VA.getValVT().isVector() ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val) @@ -3906,7 +3973,7 @@ SDValue X86TargetLowering::LowerFormalArguments( else if (Is64Bit && RegVT == MVT::i64) RC = &X86::GR64RegClass; else if (RegVT == MVT::f16) - RC = &X86::FR16XRegClass; + RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass; else if (RegVT == MVT::f32) RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; else if (RegVT == MVT::f64) @@ -4088,9 +4155,14 @@ SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, if (isByVal) return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); + MaybeAlign Alignment; + if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() && + Arg.getSimpleValueType() != MVT::f80) + Alignment = MaybeAlign(4); return DAG.getStore( Chain, dl, Arg, PtrOff, - MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset)); + MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset), + Alignment); } /// Emit a load of return address if tail call @@ -5076,7 +5148,7 @@ bool X86::mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget, // If this is an unaligned vector, make sure the target supports folding it. auto *Ld = cast(Op.getNode()); if (!Subtarget.hasAVX() && !Subtarget.hasSSEUnalignedMem() && - Ld->getValueSizeInBits(0) == 128 && Ld->getAlignment() < 16) + Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() < Align(16)) return false; // TODO: If this is a non-temporal load and the target has an instruction @@ -5171,13 +5243,6 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) { } } -static bool isTargetShuffleSplat(SDValue Op) { - unsigned Opcode = Op.getOpcode(); - if (Opcode == ISD::EXTRACT_SUBVECTOR) - return isTargetShuffleSplat(Op.getOperand(0)); - return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD; -} - SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); @@ -5429,6 +5494,18 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.align = Align(1); Info.flags |= MachineMemOperand::MOLoad; return true; + case Intrinsic::x86_atomic_bts: + case Intrinsic::x86_atomic_btc: + case Intrinsic::x86_atomic_btr: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.ptrVal = I.getArgOperand(0); + unsigned Size = I.getType()->getScalarSizeInBits(); + Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size); + Info.align = Align(Size); + Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore | + MachineMemOperand::MOVolatile; + return true; + } } return false; } @@ -5643,6 +5720,22 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const { return Subtarget.hasLZCNT(); } +bool X86TargetLowering::hasBitPreservingFPLogic(EVT VT) const { + return VT == MVT::f32 || VT == MVT::f64 || VT.isVector(); +} + +bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const { + // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more + // expensive than a straight movsd. On the other hand, it's important to + // shrink long double fp constant since fldt is very slow. + return !Subtarget.hasSSE2() || VT == MVT::f80; +} + +bool X86TargetLowering::isScalarFPTypeInSSEReg(EVT VT) const { + return (VT == MVT::f64 && Subtarget.hasSSE2()) || + (VT == MVT::f32 && Subtarget.hasSSE1()) || VT == MVT::f16; +} + bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const { @@ -5755,6 +5848,7 @@ bool X86TargetLowering::shouldFoldConstantShiftPairToMask( (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && "Expected shift-shift mask"); + // TODO: Should we always create i64 masks? Or only folded immediates? EVT VT = N->getValueType(0); if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) || (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) { @@ -6281,7 +6375,8 @@ static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements, // Helper function to collect subvector ops that are concatenated together, // either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series. // The subvectors in Ops are guaranteed to be the same type. -static bool collectConcatOps(SDNode *N, SmallVectorImpl &Ops) { +static bool collectConcatOps(SDNode *N, SmallVectorImpl &Ops, + SelectionDAG &DAG) { assert(Ops.empty() && "Expected an empty ops vector"); if (N->getOpcode() == ISD::CONCAT_VECTORS) { @@ -6297,21 +6392,34 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl &Ops) { EVT SubVT = Sub.getValueType(); // TODO - Handle more general insert_subvector chains. - if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) && - Idx == (VT.getVectorNumElements() / 2)) { - // insert_subvector(insert_subvector(undef, x, lo), y, hi) - if (Src.getOpcode() == ISD::INSERT_SUBVECTOR && - Src.getOperand(1).getValueType() == SubVT && - isNullConstant(Src.getOperand(2))) { - Ops.push_back(Src.getOperand(1)); + if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2)) { + // insert_subvector(undef, x, lo) + if (Idx == 0 && Src.isUndef()) { Ops.push_back(Sub); + Ops.push_back(DAG.getUNDEF(SubVT)); return true; } - // insert_subvector(x, extract_subvector(x, lo), hi) - if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR && - Sub.getOperand(0) == Src && isNullConstant(Sub.getOperand(1))) { - Ops.append(2, Sub); - return true; + if (Idx == (VT.getVectorNumElements() / 2)) { + // insert_subvector(insert_subvector(undef, x, lo), y, hi) + if (Src.getOpcode() == ISD::INSERT_SUBVECTOR && + Src.getOperand(1).getValueType() == SubVT && + isNullConstant(Src.getOperand(2))) { + Ops.push_back(Src.getOperand(1)); + Ops.push_back(Sub); + return true; + } + // insert_subvector(x, extract_subvector(x, lo), hi) + if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR && + Sub.getOperand(0) == Src && isNullConstant(Sub.getOperand(1))) { + Ops.append(2, Sub); + return true; + } + // insert_subvector(undef, x, hi) + if (Src.isUndef()) { + Ops.push_back(DAG.getUNDEF(SubVT)); + Ops.push_back(Sub); + return true; + } } } } @@ -6770,7 +6878,7 @@ static SDValue IsNOT(SDValue V, SelectionDAG &DAG) { } } SmallVector CatOps; - if (collectConcatOps(V.getNode(), CatOps)) { + if (collectConcatOps(V.getNode(), CatOps, DAG)) { for (SDValue &CatOp : CatOps) { SDValue NotCat = IsNOT(CatOp, DAG); if (!NotCat) return SDValue(); @@ -7934,8 +8042,35 @@ static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl &Mask, } } +// Attempt to create a shuffle mask from a VSELECT/BLENDV condition mask. +static bool createShuffleMaskFromVSELECT(SmallVectorImpl &Mask, + SDValue Cond, bool IsBLENDV = false) { + EVT CondVT = Cond.getValueType(); + unsigned EltSizeInBits = CondVT.getScalarSizeInBits(); + unsigned NumElts = CondVT.getVectorNumElements(); + + APInt UndefElts; + SmallVector EltBits; + if (!getTargetConstantBitsFromNode(Cond, EltSizeInBits, UndefElts, EltBits, + true, false)) + return false; + + Mask.resize(NumElts, SM_SentinelUndef); + + for (int i = 0; i != (int)NumElts; ++i) { + Mask[i] = i; + // Arbitrarily choose from the 2nd operand if the select condition element + // is undef. + // TODO: Can we do better by matching patterns such as even/odd? + if (UndefElts[i] || (!IsBLENDV && EltBits[i].isZero()) || + (IsBLENDV && EltBits[i].isNonNegative())) + Mask[i] += NumElts; + } + + return true; +} + // Forward declaration (for getFauxShuffleMask recursive check). -// TODO: Use DemandedElts variant. static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, SmallVectorImpl &Mask, const SelectionDAG &DAG, unsigned Depth, @@ -7987,11 +8122,11 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, uint64_t ZeroMask = IsAndN ? 255 : 0; if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits)) return false; + // We can't assume an undef src element gives an undef dst - the other src + // might be zero. + if (!UndefElts.isZero()) + return false; for (int i = 0, e = (int)EltBits.size(); i != e; ++i) { - if (UndefElts[i]) { - Mask.push_back(SM_SentinelUndef); - continue; - } const APInt &ByteBits = EltBits[i]; if (ByteBits != 0 && ByteBits != 255) return false; @@ -8240,6 +8375,16 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, } return true; } + case ISD::VSELECT: + case X86ISD::BLENDV: { + SDValue Cond = N.getOperand(0); + if (createShuffleMaskFromVSELECT(Mask, Cond, Opcode == X86ISD::BLENDV)) { + Ops.push_back(N.getOperand(1)); + Ops.push_back(N.getOperand(2)); + return true; + } + return false; + } case X86ISD::VTRUNC: { SDValue Src = N.getOperand(0); EVT SrcVT = Src.getValueType(); @@ -9076,7 +9221,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, // Don't create 256-bit non-temporal aligned loads without AVX2 as these // will lower to regular temporal loads and use the cache. - if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 && + if (LDBase->isNonTemporal() && LDBase->getAlign() >= Align(32) && VT.is256BitVector() && !Subtarget.hasInt256()) return SDValue(); @@ -9462,7 +9607,8 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, // For size optimization, also splat v2f64 and v2i64, and for size opt // with AVX2, also splat i8 and i16. // With pattern matching, the VBROADCAST node may become a VMOVDDUP. - if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || + if (ScalarSize == 32 || + (ScalarSize == 64 && (IsGE256 || Subtarget.hasVLX())) || (ScalarSize == 16 && Subtarget.hasFP16() && CVT.isFloatingPoint()) || (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) { const Constant *C = nullptr; @@ -11651,33 +11797,6 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef Mask, return true; } -// Attempt to create a shuffle mask from a VSELECT condition mask. -static bool createShuffleMaskFromVSELECT(SmallVectorImpl &Mask, - SDValue Cond) { - EVT CondVT = Cond.getValueType(); - unsigned EltSizeInBits = CondVT.getScalarSizeInBits(); - unsigned NumElts = CondVT.getVectorNumElements(); - - APInt UndefElts; - SmallVector EltBits; - if (!getTargetConstantBitsFromNode(Cond, EltSizeInBits, UndefElts, EltBits, - true, false)) - return false; - - Mask.resize(NumElts, SM_SentinelUndef); - - for (int i = 0; i != (int)NumElts; ++i) { - Mask[i] = i; - // Arbitrarily choose from the 2nd operand if the select condition element - // is undef. - // TODO: Can we do better by matching patterns such as even/odd? - if (UndefElts[i] || EltBits[i].isZero()) - Mask[i] += NumElts; - } - - return true; -} - // Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd // instructions. static bool isUnpackWdShuffleMask(ArrayRef Mask, MVT VT) { @@ -13943,8 +14062,8 @@ static SDValue getScalarValueForVectorElement(SDValue V, int Idx, /// This is particularly important because the set of instructions varies /// significantly based on whether the operand is a load or not. static bool isShuffleFoldableLoad(SDValue V) { - V = peekThroughBitcasts(V); - return ISD::isNON_EXTLoad(V.getNode()); + return V->hasOneUse() && + ISD::isNON_EXTLoad(peekThroughOneUseBitcasts(V).getNode()); } /// Try to lower insertion of a single element into a zero vector. @@ -15796,7 +15915,8 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef Mask, V1 = extract128BitVector(V1V2, 0, DAG, DL); V2 = extract128BitVector(V1V2, 4, DAG, DL); } else { - SmallVector DWordClearOps(4, DAG.getConstant(0, DL, MVT::i32)); + SmallVector DWordClearOps(4, + DAG.getConstant(0, DL, MVT::i32)); for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1)) DWordClearOps[i] = DAG.getConstant(0xFFFF, DL, MVT::i32); SDValue DWordClearMask = @@ -16615,9 +16735,7 @@ static SDValue lowerShuffleAsLanePermuteAndShuffle( // otherwise we're (probably) better off doing a split. if (VT == MVT::v4f64 && !all_of(Mask, [LaneSize](int M) { return M < LaneSize; })) - if (SDValue V = - lowerShuffleAsLanePermuteAndSHUFP(DL, VT, V1, V2, Mask, DAG)) - return V; + return lowerShuffleAsLanePermuteAndSHUFP(DL, VT, V1, V2, Mask, DAG); // If there are only inputs from one 128-bit lane, splitting will in fact be // less expensive. The flags track whether the given lane contains an element @@ -17229,114 +17347,135 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute( return SDValue(); // Bail if we already have a repeated lane shuffle mask. - SmallVector RepeatedShuffleMask; - if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedShuffleMask)) + if (is128BitLaneRepeatedShuffleMask(VT, Mask)) return SDValue(); - // On AVX2 targets we can permute 256-bit vectors as 64-bit sub-lanes - // (with PERMQ/PERMPD), otherwise we can only permute whole 128-bit lanes. - int SubLaneScale = Subtarget.hasAVX2() && VT.is256BitVector() ? 2 : 1; - int NumSubLanes = NumLanes * SubLaneScale; - int NumSubLaneElts = NumLaneElts / SubLaneScale; - - // Check that all the sources are coming from the same lane and see if we can - // form a repeating shuffle mask (local to each sub-lane). At the same time, - // determine the source sub-lane for each destination sub-lane. - int TopSrcSubLane = -1; - SmallVector Dst2SrcSubLanes((unsigned)NumSubLanes, -1); - SmallVector RepeatedSubLaneMasks[2] = { - SmallVector((unsigned)NumSubLaneElts, SM_SentinelUndef), - SmallVector((unsigned)NumSubLaneElts, SM_SentinelUndef)}; - - for (int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) { - // Extract the sub-lane mask, check that it all comes from the same lane - // and normalize the mask entries to come from the first lane. - int SrcLane = -1; - SmallVector SubLaneMask((unsigned)NumSubLaneElts, -1); - for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) { - int M = Mask[(DstSubLane * NumSubLaneElts) + Elt]; - if (M < 0) + // Helper to look for repeated mask in each split sublane, and that those + // sublanes can then be permuted into place. + auto ShuffleSubLanes = [&](int SubLaneScale) { + int NumSubLanes = NumLanes * SubLaneScale; + int NumSubLaneElts = NumLaneElts / SubLaneScale; + + // Check that all the sources are coming from the same lane and see if we + // can form a repeating shuffle mask (local to each sub-lane). At the same + // time, determine the source sub-lane for each destination sub-lane. + int TopSrcSubLane = -1; + SmallVector Dst2SrcSubLanes((unsigned)NumSubLanes, -1); + SmallVector> RepeatedSubLaneMasks( + SubLaneScale, + SmallVector((unsigned)NumSubLaneElts, SM_SentinelUndef)); + + for (int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) { + // Extract the sub-lane mask, check that it all comes from the same lane + // and normalize the mask entries to come from the first lane. + int SrcLane = -1; + SmallVector SubLaneMask((unsigned)NumSubLaneElts, -1); + for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) { + int M = Mask[(DstSubLane * NumSubLaneElts) + Elt]; + if (M < 0) + continue; + int Lane = (M % NumElts) / NumLaneElts; + if ((0 <= SrcLane) && (SrcLane != Lane)) + return SDValue(); + SrcLane = Lane; + int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts); + SubLaneMask[Elt] = LocalM; + } + + // Whole sub-lane is UNDEF. + if (SrcLane < 0) continue; - int Lane = (M % NumElts) / NumLaneElts; - if ((0 <= SrcLane) && (SrcLane != Lane)) - return SDValue(); - SrcLane = Lane; - int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts); - SubLaneMask[Elt] = LocalM; - } - // Whole sub-lane is UNDEF. - if (SrcLane < 0) - continue; + // Attempt to match against the candidate repeated sub-lane masks. + for (int SubLane = 0; SubLane != SubLaneScale; ++SubLane) { + auto MatchMasks = [NumSubLaneElts](ArrayRef M1, ArrayRef M2) { + for (int i = 0; i != NumSubLaneElts; ++i) { + if (M1[i] < 0 || M2[i] < 0) + continue; + if (M1[i] != M2[i]) + return false; + } + return true; + }; + + auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane]; + if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask)) + continue; - // Attempt to match against the candidate repeated sub-lane masks. - for (int SubLane = 0; SubLane != SubLaneScale; ++SubLane) { - auto MatchMasks = [NumSubLaneElts](ArrayRef M1, ArrayRef M2) { + // Merge the sub-lane mask into the matching repeated sub-lane mask. for (int i = 0; i != NumSubLaneElts; ++i) { - if (M1[i] < 0 || M2[i] < 0) + int M = SubLaneMask[i]; + if (M < 0) continue; - if (M1[i] != M2[i]) - return false; + assert((RepeatedSubLaneMask[i] < 0 || RepeatedSubLaneMask[i] == M) && + "Unexpected mask element"); + RepeatedSubLaneMask[i] = M; } - return true; - }; - auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane]; - if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask)) - continue; + // Track the top most source sub-lane - by setting the remaining to + // UNDEF we can greatly simplify shuffle matching. + int SrcSubLane = (SrcLane * SubLaneScale) + SubLane; + TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane); + Dst2SrcSubLanes[DstSubLane] = SrcSubLane; + break; + } - // Merge the sub-lane mask into the matching repeated sub-lane mask. - for (int i = 0; i != NumSubLaneElts; ++i) { - int M = SubLaneMask[i]; + // Bail if we failed to find a matching repeated sub-lane mask. + if (Dst2SrcSubLanes[DstSubLane] < 0) + return SDValue(); + } + assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes && + "Unexpected source lane"); + + // Create a repeating shuffle mask for the entire vector. + SmallVector RepeatedMask((unsigned)NumElts, -1); + for (int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) { + int Lane = SubLane / SubLaneScale; + auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale]; + for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) { + int M = RepeatedSubLaneMask[Elt]; if (M < 0) continue; - assert((RepeatedSubLaneMask[i] < 0 || RepeatedSubLaneMask[i] == M) && - "Unexpected mask element"); - RepeatedSubLaneMask[i] = M; + int Idx = (SubLane * NumSubLaneElts) + Elt; + RepeatedMask[Idx] = M + (Lane * NumLaneElts); } - - // Track the top most source sub-lane - by setting the remaining to UNDEF - // we can greatly simplify shuffle matching. - int SrcSubLane = (SrcLane * SubLaneScale) + SubLane; - TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane); - Dst2SrcSubLanes[DstSubLane] = SrcSubLane; - break; } + SDValue RepeatedShuffle = + DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask); - // Bail if we failed to find a matching repeated sub-lane mask. - if (Dst2SrcSubLanes[DstSubLane] < 0) - return SDValue(); - } - assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes && - "Unexpected source lane"); - - // Create a repeating shuffle mask for the entire vector. - SmallVector RepeatedMask((unsigned)NumElts, -1); - for (int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) { - int Lane = SubLane / SubLaneScale; - auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale]; - for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) { - int M = RepeatedSubLaneMask[Elt]; - if (M < 0) + // Shuffle each source sub-lane to its destination. + SmallVector SubLaneMask((unsigned)NumElts, -1); + for (int i = 0; i != NumElts; i += NumSubLaneElts) { + int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts]; + if (SrcSubLane < 0) continue; - int Idx = (SubLane * NumSubLaneElts) + Elt; - RepeatedMask[Idx] = M + (Lane * NumLaneElts); + for (int j = 0; j != NumSubLaneElts; ++j) + SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts); } - } - SDValue RepeatedShuffle = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask); - // Shuffle each source sub-lane to its destination. - SmallVector SubLaneMask((unsigned)NumElts, -1); - for (int i = 0; i != NumElts; i += NumSubLaneElts) { - int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts]; - if (SrcSubLane < 0) - continue; - for (int j = 0; j != NumSubLaneElts; ++j) - SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts); - } + return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT), + SubLaneMask); + }; - return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT), - SubLaneMask); + // On AVX2 targets we can permute 256-bit vectors as 64-bit sub-lanes + // (with PERMQ/PERMPD). On AVX2/AVX512BW targets, permuting 32-bit sub-lanes, + // even with a variable shuffle, can be worth it for v32i8/v64i8 vectors. + // Otherwise we can only permute whole 128-bit lanes. + int MinSubLaneScale = 1, MaxSubLaneScale = 1; + if (Subtarget.hasAVX2() && VT.is256BitVector()) { + bool OnlyLowestElts = isUndefOrInRange(Mask, 0, NumLaneElts); + MinSubLaneScale = 2; + MaxSubLaneScale = + (!OnlyLowestElts && V2.isUndef() && VT == MVT::v32i8) ? 4 : 2; + } + if (Subtarget.hasBWI() && VT == MVT::v64i8) + MinSubLaneScale = MaxSubLaneScale = 4; + + for (int Scale = MinSubLaneScale; Scale <= MaxSubLaneScale; Scale *= 2) + if (SDValue Shuffle = ShuffleSubLanes(Scale)) + return Shuffle; + + return SDValue(); } static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, @@ -17513,6 +17652,9 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return Op; + bool V1IsInPlace = isShuffleMaskInputInPlace(0, Mask); + bool V2IsInPlace = isShuffleMaskInputInPlace(1, Mask); + // If we have lane crossing shuffles AND they don't all come from the lower // lane elements, lower to SHUFPD(VPERM2F128(V1, V2), VPERM2F128(V1, V2)). // TODO: Handle BUILD_VECTOR sources which getVectorShuffle currently @@ -17521,13 +17663,11 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, !all_of(Mask, [](int M) { return M < 2 || (4 <= M && M < 6); }) && (V1.getOpcode() != ISD::BUILD_VECTOR) && (V2.getOpcode() != ISD::BUILD_VECTOR)) - if (SDValue Op = lowerShuffleAsLanePermuteAndSHUFP(DL, MVT::v4f64, V1, V2, - Mask, DAG)) - return Op; + return lowerShuffleAsLanePermuteAndSHUFP(DL, MVT::v4f64, V1, V2, Mask, DAG); // If we have one input in place, then we can permute the other input and // blend the result. - if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)) + if (V1IsInPlace || V2IsInPlace) return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG); @@ -17541,8 +17681,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, // shuffle. However, if we have AVX2 and either inputs are already in place, // we will be able to shuffle even across lanes the other input in a single // instruction so skip this pattern. - if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) || - isShuffleMaskInputInPlace(1, Mask)))) + if (!(Subtarget.hasAVX2() && (V1IsInPlace || V2IsInPlace))) if (SDValue V = lowerShuffleAsLanePermuteAndRepeatedMask( DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) return V; @@ -17635,9 +17774,12 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef Mask, if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG)) return V; + bool V1IsInPlace = isShuffleMaskInputInPlace(0, Mask); + bool V2IsInPlace = isShuffleMaskInputInPlace(1, Mask); + // If we have one input in place, then we can permute the other input and // blend the result. - if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)) + if (V1IsInPlace || V2IsInPlace) return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG); @@ -17647,12 +17789,16 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef Mask, DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return V; + // Try to lower to PERMQ(BLENDD(V1,V2)). + if (SDValue V = + lowerShuffleAsBlendAndPermute(DL, MVT::v4i64, V1, V2, Mask, DAG)) + return V; + // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. However, if we have AVX2 and either inputs are already in place, // we will be able to shuffle even across lanes the other input in a single // instruction so skip this pattern. - if (!isShuffleMaskInputInPlace(0, Mask) && - !isShuffleMaskInputInPlace(1, Mask)) + if (!V1IsInPlace && !V2IsInPlace) if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return Result; @@ -18657,20 +18803,34 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return PSHUFB; - // VBMI can use VPERMV/VPERMV3 byte shuffles. - if (Subtarget.hasVBMI()) - return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, Subtarget, DAG); - // Try to create an in-lane repeating shuffle mask and then shuffle the // results into the target lanes. if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) return V; + if (SDValue Result = lowerShuffleAsLanePermuteAndPermute( + DL, MVT::v64i8, V1, V2, Mask, DAG, Subtarget)) + return Result; + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; + if (!is128BitLaneCrossingShuffleMask(MVT::v64i8, Mask)) { + // Use PALIGNR+Permute if possible - permute might become PSHUFB but the + // PALIGNR will be cheaper than the second PSHUFB+OR. + if (SDValue V = lowerShuffleAsByteRotateAndPermute(DL, MVT::v64i8, V1, V2, + Mask, Subtarget, DAG)) + return V; + + // If we can't directly blend but can use PSHUFB, that will be better as it + // can both shuffle and set up the inefficient blend. + bool V1InUse, V2InUse; + return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v64i8, V1, V2, Mask, Zeroable, + DAG, V1InUse, V2InUse); + } + // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. if (!V2.isUndef()) @@ -18678,7 +18838,10 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef Mask, DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) return Result; - // FIXME: Implement direct support for this type! + // VBMI can use VPERMV/VPERMV3 byte shuffles. + if (Subtarget.hasVBMI()) + return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, Subtarget, DAG); + return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG); } @@ -18915,7 +19078,18 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef Mask, Offset += NumElts; // Increment for next iteration. } - + // If we're broadcasting a SETCC result, try to broadcast the ops instead. + // TODO: What other unary shuffles would benefit from this? + if (isBroadcastShuffleMask(Mask) && V1.getOpcode() == ISD::SETCC && + V1->hasOneUse()) { + SDValue Op0 = V1.getOperand(0); + SDValue Op1 = V1.getOperand(1); + ISD::CondCode CC = cast(V1.getOperand(2))->get(); + EVT OpVT = Op0.getValueType(); + return DAG.getSetCC( + DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask), + DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC); + } MVT ExtVT; switch (VT.SimpleTy) { @@ -19619,9 +19793,11 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1); if (IsZeroElt || IsAllOnesElt) { - // Lower insertion of i8 -1 as an 'OR' blend. + // Lower insertion of v16i8/v32i8/v64i16 -1 elts as an 'OR' blend. // We don't deal with i8 0 since it appears to be handled elsewhere. - if (IsAllOnesElt && EltSizeInBits == 8 && !Subtarget.hasSSE41()) { + if (IsAllOnesElt && + ((VT == MVT::v16i8 && !Subtarget.hasSSE41()) || + ((VT == MVT::v32i8 || VT == MVT::v16i16) && !Subtarget.hasInt256()))) { SDValue ZeroCst = DAG.getConstant(0, dl, VT.getScalarType()); SDValue OnesCst = DAG.getAllOnesConstant(dl, VT.getScalarType()); SmallVector CstVectorElts(NumElts, ZeroCst); @@ -19652,7 +19828,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // and incur a domain crossing penalty if that's what we'll end up // doing anyway after extracting to a 128-bit vector. if ((Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) || - (Subtarget.hasAVX2() && EltVT == MVT::i32)) { + (Subtarget.hasAVX2() && (EltVT == MVT::i32 || EltVT == MVT::i64))) { SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, DAG.getTargetConstant(1, dl, MVT::i8)); @@ -19666,7 +19842,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // If we are not inserting into the low 128-bit vector chunk, // then prefer the broadcast+blend sequence. // FIXME: relax the profitability check iff all N1 uses are insertions. - if (!VT.is128BitVector() && IdxVal >= NumEltsIn128 && + if (IdxVal >= NumEltsIn128 && ((Subtarget.hasAVX2() && EltSizeInBits != 8) || (Subtarget.hasAVX() && (EltSizeInBits >= 32) && X86::mayFoldLoad(N1, Subtarget)))) { @@ -20617,6 +20793,35 @@ static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG, return Cvt; } +template +static bool isSoftFP16(T VT, const X86Subtarget &Subtarget) { + return VT == MVT::f16 && !Subtarget.hasFP16(); +} + +template +bool X86TargetLowering::isSoftFP16(T VT) const { + return ::isSoftFP16(VT, Subtarget); +} + +static SDValue promoteXINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + bool IsStrict = Op->isStrictFPOpcode(); + SDValue Src = Op.getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode(); + MVT VT = Op.getSimpleValueType(); + MVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32; + SDLoc dl(Op); + + SDValue Rnd = DAG.getIntPtrConstant(0, dl); + if (IsStrict) + return DAG.getNode( + ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other}, + {Chain, + DAG.getNode(Op.getOpcode(), dl, {NVT, MVT::Other}, {Chain, Src}), + Rnd}); + return DAG.getNode(ISD::FP_ROUND, dl, VT, + DAG.getNode(Op.getOpcode(), dl, NVT, Src), Rnd); +} + SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { bool IsStrict = Op->isStrictFPOpcode(); @@ -20627,6 +20832,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); + if (isSoftFP16(VT)) + return promoteXINT_TO_FP(Op, DAG); + if (Subtarget.isTargetWin64() && SrcVT == MVT::i128) return LowerWin64_INT128_TO_FP(Op, DAG); @@ -21123,9 +21331,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, MVT DstVT = Op->getSimpleValueType(0); SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); + // Bail out when we don't have native conversion instructions. if (DstVT == MVT::f128) return SDValue(); + if (isSoftFP16(DstVT)) + return promoteXINT_TO_FP(Op, DAG); + if (DstVT.isVector()) return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); @@ -21158,9 +21370,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // The transform for i64->f64 isn't correct for 0 when rounding to negative // infinity. It produces -0.0, so disable under strictfp. - if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64 && !IsStrict) + if (SrcVT == MVT::i64 && DstVT == MVT::f64 && Subtarget.hasSSE2() && + !IsStrict) return LowerUINT_TO_FP_i64(Op, DAG, Subtarget); - if (SrcVT == MVT::i32 && X86ScalarSSEf64 && DstVT != MVT::f80) + // The transform for i32->f64/f32 isn't correct for 0 when rounding to + // negative infinity. So disable under strictfp. Using FILD instead. + if (SrcVT == MVT::i32 && Subtarget.hasSSE2() && DstVT != MVT::f80 && + !IsStrict) return LowerUINT_TO_FP_i32(Op, DAG, Subtarget); if (Subtarget.is64Bit() && SrcVT == MVT::i64 && (DstVT == MVT::f32 || DstVT == MVT::f64)) @@ -21819,27 +22035,25 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!"); if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { - In = DAG.getBitcast(MVT::v8i32, In); - // On AVX2, v4i64 -> v4i32 becomes VPERMD. if (Subtarget.hasInt256()) { static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; + In = DAG.getBitcast(MVT::v8i32, In); In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In, DAG.getIntPtrConstant(0, DL)); } - SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, + SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(0, DL)); - SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, - DAG.getIntPtrConstant(4, DL)); + SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, + DAG.getIntPtrConstant(2, DL)); static const int ShufMask[] = {0, 2, 4, 6}; - return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask); + return DAG.getVectorShuffle(VT, DL, DAG.getBitcast(MVT::v4i32, OpLo), + DAG.getBitcast(MVT::v4i32, OpHi), ShufMask); } if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) { - In = DAG.getBitcast(MVT::v32i8, In); - // On AVX2, v8i32 -> v8i16 becomes PSHUFB. if (Subtarget.hasInt256()) { // The PSHUFB mask: @@ -21847,27 +22061,30 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { -1, -1, -1, -1, -1, -1, -1, -1, 16, 17, 20, 21, 24, 25, 28, 29, -1, -1, -1, -1, -1, -1, -1, -1 }; + In = DAG.getBitcast(MVT::v32i8, In); In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1); In = DAG.getBitcast(MVT::v4i64, In); static const int ShufMask2[] = {0, 2, -1, -1}; In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, - DAG.getBitcast(MVT::v16i16, In), - DAG.getIntPtrConstant(0, DL)); + In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, + DAG.getIntPtrConstant(0, DL)); + return DAG.getBitcast(MVT::v8i16, In); } - SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In, + SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, DAG.getIntPtrConstant(0, DL)); - SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In, - DAG.getIntPtrConstant(16, DL)); + SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, + DAG.getIntPtrConstant(4, DL)); // The PSHUFB mask: - static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, - -1, -1, -1, -1, -1, -1, -1, -1}; + static const int ShufMask1[] = {0, 2, 4, 6, -1, -1, -1, -1}; - OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, OpLo, ShufMask1); - OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, OpHi, ShufMask1); + OpLo = DAG.getBitcast(MVT::v8i16, OpLo); + OpHi = DAG.getBitcast(MVT::v8i16, OpHi); + + OpLo = DAG.getVectorShuffle(MVT::v8i16, DL, OpLo, OpLo, ShufMask1); + OpHi = DAG.getVectorShuffle(MVT::v8i16, DL, OpHi, OpHi, ShufMask1); OpLo = DAG.getBitcast(MVT::v4i32, OpLo); OpHi = DAG.getBitcast(MVT::v4i32, OpHi); @@ -21941,6 +22158,16 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue Res; + if (isSoftFP16(SrcVT)) { + MVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32; + if (IsStrict) + return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other}, + {Chain, DAG.getNode(ISD::STRICT_FP_EXTEND, dl, + {NVT, MVT::Other}, {Chain, Src})}); + return DAG.getNode(Op.getOpcode(), dl, VT, + DAG.getNode(ISD::FP_EXTEND, dl, NVT, Src)); + } + if (VT.isVector()) { if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) { MVT ResVT = MVT::v4i32; @@ -22278,6 +22505,9 @@ SDValue X86TargetLowering::LowerLRINT_LLRINT(SDValue Op, SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); + if (SrcVT == MVT::f16) + return SDValue(); + // If the source is in an SSE register, the node is Legal. if (isScalarFPTypeInSSEReg(SrcVT)) return Op; @@ -22349,7 +22579,7 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const { // This code is only for floats and doubles. Fall back to generic code for // anything else. - if (!isScalarFPTypeInSSEReg(SrcVT)) + if (!isScalarFPTypeInSSEReg(SrcVT) || isSoftFP16(SrcVT)) return SDValue(); EVT SatVT = cast(Node->getOperand(1))->getVT(); @@ -22381,11 +22611,11 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const { // floating-point values. APInt MinInt, MaxInt; if (IsSigned) { - MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth); - MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth); + MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth); + MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth); } else { - MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth); - MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth); + MinInt = APInt::getMinValue(SatWidth).zext(DstWidth); + MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth); } APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT)); @@ -22484,28 +22714,54 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); SDValue In = Op.getOperand(IsStrict ? 1 : 0); MVT SVT = In.getSimpleValueType(); - if (VT == MVT::f128) + if (VT == MVT::f128 || (SVT == MVT::f16 && VT == MVT::f80)) return SDValue(); - if (VT == MVT::f80) { - if (SVT == MVT::f16) { - assert(Subtarget.hasFP16() && "Unexpected features!"); - RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT); - MakeLibCallOptions CallOptions; - std::pair Tmp = - makeLibCall(DAG, LC, VT, In, CallOptions, DL, - IsStrict ? Op.getOperand(0) : SDValue()); + if (SVT == MVT::f16) { + if (Subtarget.hasFP16()) + return Op; + + if (VT != MVT::f32) { if (IsStrict) - return DAG.getMergeValues({Tmp.first, Tmp.second}, DL); - else - return Tmp.first; + return DAG.getNode( + ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, + {Chain, DAG.getNode(ISD::STRICT_FP_EXTEND, DL, + {MVT::f32, MVT::Other}, {Chain, In})}); + + return DAG.getNode(ISD::FP_EXTEND, DL, VT, + DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, In)); } - return Op; + + if (!Subtarget.hasF16C()) + return SDValue(); + + In = DAG.getBitcast(MVT::i16, In); + In = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v8i16, + getZeroVector(MVT::v8i16, Subtarget, DAG, DL), In, + DAG.getIntPtrConstant(0, DL)); + SDValue Res; + if (IsStrict) { + Res = DAG.getNode(X86ISD::STRICT_CVTPH2PS, DL, {MVT::v4f32, MVT::Other}, + {Chain, In}); + Chain = Res.getValue(1); + } else { + Res = DAG.getNode(X86ISD::CVTPH2PS, DL, MVT::v4f32, In, + DAG.getTargetConstant(4, DL, MVT::i32)); + } + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Res, + DAG.getIntPtrConstant(0, DL)); + if (IsStrict) + return DAG.getMergeValues({Res, Chain}, DL); + return Res; } + if (!SVT.isVector()) + return Op; + if (SVT.getVectorElementType() == MVT::f16) { assert(Subtarget.hasFP16() && Subtarget.hasVLX() && "Unexpected features!"); if (SVT == MVT::v2f16) @@ -22531,15 +22787,65 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { bool IsStrict = Op->isStrictFPOpcode(); + + SDLoc DL(Op); + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); SDValue In = Op.getOperand(IsStrict ? 1 : 0); + SDValue Op2 = Op.getOperand(IsStrict ? 2 : 1); MVT VT = Op.getSimpleValueType(); MVT SVT = In.getSimpleValueType(); - // It's legal except when f128 is involved or we're converting f80->f16. - if (SVT != MVT::f128 && !(VT == MVT::f16 && SVT == MVT::f80)) - return Op; + if (SVT == MVT::f128 || (VT == MVT::f16 && SVT == MVT::f80)) + return SDValue(); - return SDValue(); + if (VT == MVT::f16) { + if (Subtarget.hasFP16()) + return Op; + + if (SVT != MVT::f32) { + if (IsStrict) + return DAG.getNode( + ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other}, + {Chain, + DAG.getNode(ISD::STRICT_FP_ROUND, DL, {MVT::f32, MVT::Other}, + {Chain, In, Op2}), + Op2}); + + return DAG.getNode(ISD::FP_ROUND, DL, VT, + DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, In, Op2), + Op2); + } + + if (!Subtarget.hasF16C()) + return SDValue(); + + SDValue Res; + SDValue Rnd = DAG.getTargetConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, DL, + MVT::i32); + if (IsStrict) { + Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4f32, + DAG.getConstantFP(0, DL, MVT::v4f32), In, + DAG.getIntPtrConstant(0, DL)); + Res = DAG.getNode(X86ISD::STRICT_CVTPS2PH, DL, {MVT::v8i16, MVT::Other}, + {Chain, Res, Rnd}); + Chain = Res.getValue(1); + } else { + // FIXME: Should we use zeros for upper elements for non-strict? + Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4f32, In); + Res = DAG.getNode(X86ISD::CVTPS2PH, DL, MVT::v8i16, Res, Rnd); + } + + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i16, Res, + DAG.getIntPtrConstant(0, DL)); + Res = DAG.getBitcast(MVT::f16, Res); + + if (IsStrict) + return DAG.getMergeValues({Res, Chain}, DL); + + return Res; + } + + return Op; } static SDValue LowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG) { @@ -22857,6 +23163,47 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { return Res; } +/// Helper for attempting to create a X86ISD::BT node. +static SDValue getBT(SDValue Src, SDValue BitNo, const SDLoc &DL, SelectionDAG &DAG) { + // If Src is i8, promote it to i32 with any_extend. There is no i8 BT + // instruction. Since the shift amount is in-range-or-undefined, we know + // that doing a bittest on the i32 value is ok. We extend to i32 because + // the encoding for the i16 version is larger than the i32 version. + // Also promote i16 to i32 for performance / code size reason. + if (Src.getValueType().getScalarSizeInBits() < 32) + Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src); + + // No legal type found, give up. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType())) + return SDValue(); + + // See if we can use the 32-bit instruction instead of the 64-bit one for a + // shorter encoding. Since the former takes the modulo 32 of BitNo and the + // latter takes the modulo 64, this is only valid if the 5th bit of BitNo is + // known to be zero. + if (Src.getValueType() == MVT::i64 && + DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32))) + Src = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src); + + // If the operand types disagree, extend the shift amount to match. Since + // BT ignores high bits (like shifts) we can use anyextend. + if (Src.getValueType() != BitNo.getValueType()) { + // Peek through a mask/modulo operation. + // TODO: DAGCombine fails to do this as it just checks isTruncateFree, but + // we probably need a better IsDesirableToPromoteOp to handle this as well. + if (BitNo.getOpcode() == ISD::AND && BitNo->hasOneUse()) + BitNo = DAG.getNode(ISD::AND, DL, Src.getValueType(), + DAG.getNode(ISD::ANY_EXTEND, DL, Src.getValueType(), + BitNo.getOperand(0)), + DAG.getNode(ISD::ANY_EXTEND, DL, Src.getValueType(), + BitNo.getOperand(1))); + else + BitNo = DAG.getNode(ISD::ANY_EXTEND, DL, Src.getValueType(), BitNo); + } + + return DAG.getNode(X86ISD::BT, DL, MVT::i32, Src, BitNo); +} + /// Helper for creating a X86ISD::SETCC node. static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl, SelectionDAG &DAG) { @@ -23303,7 +23650,7 @@ bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const { return true; // We never want to use both SQRT and RSQRT instructions for the same input. - if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op)) + if (DAG.doesNodeExist(X86ISD::FRSQRT, DAG.getVTList(VT), Op)) return false; if (VT.isVector()) @@ -23439,7 +23786,7 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, // Only perform this transform if CMOV is supported otherwise the select // below will become a branch. - if (!Subtarget.hasCMov()) + if (!Subtarget.canUseCMOV()) return SDValue(); // fold (sdiv X, pow2) @@ -23485,9 +23832,8 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, /// Result of 'and' is compared against zero. Change to a BT node if possible. /// Returns the BT node and the condition code needed to use it. -static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, - const SDLoc &dl, SelectionDAG &DAG, - SDValue &X86CC) { +static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl, + SelectionDAG &DAG, X86::CondCode &X86CC) { assert(And.getOpcode() == ISD::AND && "Expected AND node!"); SDValue Op0 = And.getOperand(0); SDValue Op1 = And.getOperand(1); @@ -23538,30 +23884,24 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, if (!Src.getNode()) return SDValue(); - // If Src is i8, promote it to i32 with any_extend. There is no i8 BT - // instruction. Since the shift amount is in-range-or-undefined, we know - // that doing a bittest on the i32 value is ok. We extend to i32 because - // the encoding for the i16 version is larger than the i32 version. - // Also promote i16 to i32 for performance / code size reason. - if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16) - Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src); + // Remove any bit flip. + if (isBitwiseNot(Src)) { + Src = Src.getOperand(0); + CC = CC == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ; + } - // See if we can use the 32-bit instruction instead of the 64-bit one for a - // shorter encoding. Since the former takes the modulo 32 of BitNo and the - // latter takes the modulo 64, this is only valid if the 5th bit of BitNo is - // known to be zero. - if (Src.getValueType() == MVT::i64 && - DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32))) - Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); + // Attempt to create the X86ISD::BT node. + if (SDValue BT = getBT(Src, BitNo, dl, DAG)) { + X86CC = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; + return BT; + } - // If the operand types disagree, extend the shift amount to match. Since - // BT ignores high bits (like shifts) we can use anyextend. - if (Src.getValueType() != BitNo.getValueType()) - BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo); + return SDValue(); +} - X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, - dl, MVT::i8); - return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo); +// Check if pre-AVX condcode can be performed by a single FCMP op. +static bool cheapX86FSETCC_SSE(ISD::CondCode SetCCOpcode) { + return (SetCCOpcode != ISD::SETONE) && (SetCCOpcode != ISD::SETUEQ); } /// Turns an ISD::CondCode into a value suitable for SSE floating-point mask @@ -23831,7 +24171,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // In the two cases not handled by SSE compare predicates (SETUEQ/SETONE), // emit two comparisons and a logic op to tie them together. - if (SSECC >= 8) { + if (!cheapX86FSETCC_SSE(Cond)) { // LLVM predicate is SETUEQ or SETONE. unsigned CC0, CC1; unsigned CombineOpc; @@ -23996,10 +24336,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, if (VT.is256BitVector() && !Subtarget.hasInt256()) return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl); - if (VT == MVT::v32i16 || VT == MVT::v64i8) { - assert(!Subtarget.hasBWI() && "Unexpected VT with AVX512BW!"); + // Break 512-bit integer vector compare into smaller ones. + // TODO: Try harder to use VPCMPx + VPMOV2x? + if (VT.is512BitVector()) return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl); - } // If we have a limit constant, try to form PCMPGT (signed cmp) to avoid // not-of-PCMPEQ: @@ -24117,12 +24457,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. The lower // compare is always unsigned. - SDValue SB; - if (FlipSigns) { - SB = DAG.getConstant(0x8000000080000000ULL, dl, MVT::v2i64); - } else { - SB = DAG.getConstant(0x0000000080000000ULL, dl, MVT::v2i64); - } + SDValue SB = DAG.getConstant(FlipSigns ? 0x8000000080000000ULL + : 0x0000000080000000ULL, + dl, MVT::v2i64); + Op0 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op0, SB); Op1 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op1, SB); @@ -24261,8 +24599,11 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, // Lower ((X >>s N) & 1) != 0 to BT(X, N). if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CC)) + X86::CondCode X86CondCode; + if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CondCode)) { + X86CC = DAG.getTargetConstant(X86CondCode, dl, MVT::i8); return BT; + } } // Try to use PTEST/PMOVMSKB for a tree ORs equality compared with 0. @@ -24527,6 +24868,11 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op1.getSimpleValueType(); SDValue CC; + if (isSoftFP16(VT)) + return DAG.getBitcast(MVT::f16, DAG.getNode(ISD::SELECT, DL, MVT::i16, Cond, + DAG.getBitcast(MVT::i16, Op1), + DAG.getBitcast(MVT::i16, Op2))); + // Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops // are available or VBLENDV if AVX is available. // Otherwise FP cmovs get lowered into a less efficient branch sequence later. @@ -24591,7 +24937,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); } - if (Cond.getOpcode() == ISD::SETCC) { + if (Cond.getOpcode() == ISD::SETCC && + !isSoftFP16(Cond.getOperand(0).getSimpleValueType())) { if (SDValue NewCond = LowerSETCC(Cond, DAG)) { Cond = NewCond; // If the condition was updated, it's possible that the operands of the @@ -24608,6 +24955,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y // (select (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z ) ^ y // (select (and (x , 0x1) == 0), y, (z | y) ) -> (-(and (x , 0x1)) & z ) | y + // (select (x > 0), x, 0) -> (~(x >> (size_in_bits(x)-1))) & x + // (select (x < 0), x, 0) -> ((x >> (size_in_bits(x)-1))) & x if (Cond.getOpcode() == X86ISD::SETCC && Cond.getOperand(1).getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1).getOperand(1))) { @@ -24624,7 +24973,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return (Op1.getOpcode() == ISD::CTTZ_ZERO_UNDEF && Op1.hasOneUse() && Op1.getOperand(0) == CmpOp0 && isAllOnesConstant(Op2)); }; - if (Subtarget.hasCMov() && (VT == MVT::i32 || VT == MVT::i64) && + if (Subtarget.canUseCMOV() && (VT == MVT::i32 || VT == MVT::i64) && ((CondCode == X86::COND_NE && MatchFFSMinus1(Op1, Op2)) || (CondCode == X86::COND_E && MatchFFSMinus1(Op2, Op1)))) { // Keep Cmp. @@ -24652,7 +25001,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), Sub.getValue(1)); return DAG.getNode(ISD::OR, DL, VT, SBB, Y); - } else if (!Subtarget.hasCMov() && CondCode == X86::COND_E && + } else if (!Subtarget.canUseCMOV() && CondCode == X86::COND_E && Cmp.getOperand(0).getOpcode() == ISD::AND && isOneConstant(Cmp.getOperand(0).getOperand(1))) { SDValue Src1, Src2; @@ -24688,6 +25037,22 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2); // And Op y } + } else if ((VT == MVT::i32 || VT == MVT::i64) && isNullConstant(Op2) && + Cmp.getNode()->hasOneUse() && (CmpOp0 == Op1) && + ((CondCode == X86::COND_S) || // smin(x, 0) + (CondCode == X86::COND_G && hasAndNot(Op1)))) { // smax(x, 0) + // (select (x < 0), x, 0) -> ((x >> (size_in_bits(x)-1))) & x + // + // If the comparison is testing for a positive value, we have to invert + // the sign bit mask, so only do that transform if the target has a + // bitwise 'and not' instruction (the invert is free). + // (select (x > 0), x, 0) -> (~(x >> (size_in_bits(x)-1))) & x + unsigned ShCt = VT.getSizeInBits() - 1; + SDValue ShiftAmt = DAG.getConstant(ShCt, DL, VT); + SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, Op1, ShiftAmt); + if (CondCode == X86::COND_G) + Shift = DAG.getNOT(DL, Shift, VT); + return DAG.getNode(ISD::AND, DL, VT, Shift, Op1); } } @@ -24707,7 +25072,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = Cond.getOperand(1); bool IllegalFPCMov = false; if (VT.isFloatingPoint() && !VT.isVector() && - !isScalarFPTypeInSSEReg(VT) && Subtarget.hasCMov()) // FPStack? + !isScalarFPTypeInSSEReg(VT) && Subtarget.canUseCMOV()) // FPStack? IllegalFPCMov = !hasFPCMov(cast(CC)->getSExtValue()); if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) || @@ -24734,9 +25099,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // We know the result of AND is compared against zero. Try to match // it to BT. if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { - SDValue BTCC; - if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, BTCC)) { - CC = BTCC; + X86::CondCode X86CondCode; + if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, X86CondCode)) { + CC = DAG.getTargetConstant(X86CondCode, DL, MVT::i8); Cond = BT; AddTest = false; } @@ -24788,7 +25153,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // legal, but EmitLoweredSelect() can not deal with these extensions // being inserted between two CMOV's. (in i16 case too TBN) // https://bugs.llvm.org/show_bug.cgi?id=40974 - if ((Op.getValueType() == MVT::i8 && Subtarget.hasCMov()) || + if ((Op.getValueType() == MVT::i8 && Subtarget.canUseCMOV()) || (Op.getValueType() == MVT::i16 && !X86::mayFoldLoad(Op1, Subtarget) && !X86::mayFoldLoad(Op2, Subtarget))) { Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); @@ -25153,16 +25518,20 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, ((StoreVT == MVT::v32i16 || StoreVT == MVT::v64i8) && !Subtarget.hasBWI())) { SmallVector CatOps; - if (StoredVal.hasOneUse() && collectConcatOps(StoredVal.getNode(), CatOps)) + if (StoredVal.hasOneUse() && + collectConcatOps(StoredVal.getNode(), CatOps, DAG)) return splitVectorStore(St, DAG); return SDValue(); } + if (StoreVT.is32BitVector()) + return SDValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 && - "Unexpected VT"); + assert(StoreVT.is64BitVector() && "Unexpected VT"); assert(TLI.getTypeAction(*DAG.getContext(), StoreVT) == - TargetLowering::TypeWidenVector && "Unexpected type action!"); + TargetLowering::TypeWidenVector && + "Unexpected type action!"); EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StoreVT); StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal, @@ -25247,8 +25616,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(2); SDLoc dl(Op); + // Bail out when we don't have native compare instructions. if (Cond.getOpcode() == ISD::SETCC && - Cond.getOperand(0).getValueType() != MVT::f128) { + Cond.getOperand(0).getValueType() != MVT::f128 && + !isSoftFP16(Cond.getOperand(0).getValueType())) { SDValue LHS = Cond.getOperand(0); SDValue RHS = Cond.getOperand(1); ISD::CondCode CC = cast(Cond.getOperand(2))->get(); @@ -25647,116 +26018,116 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, // Fold this packed vector shift into a build vector if SrcOp is a // vector of Constants or UNDEFs. if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) { - SmallVector Elts; - unsigned NumElts = SrcOp->getNumOperands(); - + unsigned ShiftOpc; switch (Opc) { default: llvm_unreachable("Unknown opcode!"); case X86ISD::VSHLI: - for (unsigned i = 0; i != NumElts; ++i) { - SDValue CurrentOp = SrcOp->getOperand(i); - if (CurrentOp->isUndef()) { - // Must produce 0s in the correct bits. - Elts.push_back(DAG.getConstant(0, dl, ElementType)); - continue; - } - auto *ND = cast(CurrentOp); - const APInt &C = ND->getAPIntValue(); - Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), dl, ElementType)); - } + ShiftOpc = ISD::SHL; break; case X86ISD::VSRLI: - for (unsigned i = 0; i != NumElts; ++i) { - SDValue CurrentOp = SrcOp->getOperand(i); - if (CurrentOp->isUndef()) { - // Must produce 0s in the correct bits. - Elts.push_back(DAG.getConstant(0, dl, ElementType)); - continue; - } - auto *ND = cast(CurrentOp); - const APInt &C = ND->getAPIntValue(); - Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), dl, ElementType)); - } + ShiftOpc = ISD::SRL; break; case X86ISD::VSRAI: - for (unsigned i = 0; i != NumElts; ++i) { - SDValue CurrentOp = SrcOp->getOperand(i); - if (CurrentOp->isUndef()) { - // All shifted in bits must be the same so use 0. - Elts.push_back(DAG.getConstant(0, dl, ElementType)); - continue; - } - auto *ND = cast(CurrentOp); - const APInt &C = ND->getAPIntValue(); - Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), dl, ElementType)); - } + ShiftOpc = ISD::SRA; break; } - return DAG.getBuildVector(VT, dl, Elts); + SDValue Amt = DAG.getConstant(ShiftAmt, dl, VT); + if (SDValue C = DAG.FoldConstantArithmetic(ShiftOpc, dl, VT, {SrcOp, Amt})) + return C; } return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getTargetConstant(ShiftAmt, dl, MVT::i8)); } -/// Handle vector element shifts where the shift amount may or may not be a -/// constant. Takes immediate version of shift as input. -/// TODO: Replace with vector + (splat) idx to avoid extract_element nodes. +/// Handle vector element shifts by a splat shift amount static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, - SDValue SrcOp, SDValue ShAmt, + SDValue SrcOp, SDValue ShAmt, int ShAmtIdx, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - MVT SVT = ShAmt.getSimpleValueType(); - assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!"); - - // Change opcode to non-immediate version. - Opc = getTargetVShiftUniformOpcode(Opc, true); - - // Need to build a vector containing shift amount. - // SSE/AVX packed shifts only use the lower 64-bit of the shift count. - // +====================+============+=======================================+ - // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as | - // +====================+============+=======================================+ - // | i64 | Yes, No | Use ShAmt as lowest elt | - // | i32 | Yes | zero-extend in-reg | - // | (i32 zext(i16/i8)) | Yes | zero-extend in-reg | - // | (i32 zext(i16/i8)) | No | byte-shift-in-reg | - // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) | - // +====================+============+=======================================+ - - if (SVT == MVT::i64) - ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt); - else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND && - ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && - (ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 || - ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) { + MVT AmtVT = ShAmt.getSimpleValueType(); + assert(AmtVT.isVector() && "Vector shift type mismatch"); + assert(0 <= ShAmtIdx && ShAmtIdx < (int)AmtVT.getVectorNumElements() && + "Illegal vector splat index"); + + // Move the splat element to the bottom element. + if (ShAmtIdx != 0) { + SmallVector Mask(AmtVT.getVectorNumElements(), -1); + Mask[0] = ShAmtIdx; + ShAmt = DAG.getVectorShuffle(AmtVT, dl, ShAmt, DAG.getUNDEF(AmtVT), Mask); + } + + // Peek through any zext node if we can get back to a 128-bit source. + if (AmtVT.getScalarSizeInBits() == 64 && + (ShAmt.getOpcode() == ISD::ZERO_EXTEND || + ShAmt.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && + ShAmt.getOperand(0).getValueType().isSimple() && + ShAmt.getOperand(0).getValueType().is128BitVector()) { ShAmt = ShAmt.getOperand(0); - MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16; - ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt); - if (Subtarget.hasSSE41()) + AmtVT = ShAmt.getSimpleValueType(); + } + + // See if we can mask off the upper elements using the existing source node. + // The shift uses the entire lower 64-bits of the amount vector, so no need to + // do this for vXi64 types. + bool IsMasked = false; + if (AmtVT.getScalarSizeInBits() < 64) { + if (ShAmt.getOpcode() == ISD::BUILD_VECTOR || + ShAmt.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // If the shift amount has come from a scalar, then zero-extend the scalar + // before moving to the vector. + ShAmt = DAG.getZExtOrTrunc(ShAmt.getOperand(0), dl, MVT::i32); + ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, ShAmt); + ShAmt = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, ShAmt); + AmtVT = MVT::v4i32; + IsMasked = true; + } else if (ShAmt.getOpcode() == ISD::AND) { + // See if the shift amount is already masked (e.g. for rotation modulo), + // then we can zero-extend it by setting all the other mask elements to + // zero. + SmallVector MaskElts( + AmtVT.getVectorNumElements(), + DAG.getConstant(0, dl, AmtVT.getScalarType())); + MaskElts[0] = DAG.getAllOnesConstant(dl, AmtVT.getScalarType()); + SDValue Mask = DAG.getBuildVector(AmtVT, dl, MaskElts); + if ((Mask = DAG.FoldConstantArithmetic(ISD::AND, dl, AmtVT, + {ShAmt.getOperand(1), Mask}))) { + ShAmt = DAG.getNode(ISD::AND, dl, AmtVT, ShAmt.getOperand(0), Mask); + IsMasked = true; + } + } + } + + // Extract if the shift amount vector is larger than 128-bits. + if (AmtVT.getSizeInBits() > 128) { + ShAmt = extract128BitVector(ShAmt, 0, DAG, dl); + AmtVT = ShAmt.getSimpleValueType(); + } + + // Zero-extend bottom element to v2i64 vector type, either by extension or + // shuffle masking. + if (!IsMasked && AmtVT.getScalarSizeInBits() < 64) { + if (AmtVT == MVT::v4i32 && (ShAmt.getOpcode() == X86ISD::VBROADCAST || + ShAmt.getOpcode() == X86ISD::VBROADCAST_LOAD)) { + ShAmt = DAG.getNode(X86ISD::VZEXT_MOVL, SDLoc(ShAmt), MVT::v4i32, ShAmt); + } else if (Subtarget.hasSSE41()) { ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), MVT::v2i64, ShAmt); - else { + } else { SDValue ByteShift = DAG.getTargetConstant( - (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); + (128 - AmtVT.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt); ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, ByteShift); ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, ByteShift); } - } else if (Subtarget.hasSSE41() && - ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { - ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt); - ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), - MVT::v2i64, ShAmt); - } else { - SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT), - DAG.getUNDEF(SVT)}; - ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps); } + // Change opcode to non-immediate version. + Opc = getTargetVShiftUniformOpcode(Opc, true); + // The return type has to be a 128-bit type with the same element // type as the input type. MVT EltVT = VT.getVectorElementType(); @@ -25907,8 +26278,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, // Return EntryEBP + ParentFrameOffset for x64. This adjusts from RSP after // prologue to RBP in the parent function. - const X86Subtarget &Subtarget = - static_cast(DAG.getSubtarget()); + const X86Subtarget &Subtarget = DAG.getSubtarget(); if (Subtarget.is64Bit()) return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset); @@ -26444,6 +26814,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case VSHIFT: { SDValue SrcOp = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); + assert(ShAmt.getValueType() == MVT::i32 && + "Unexpected VSHIFT amount type"); // Catch shift-by-constant. if (auto *CShAmt = dyn_cast(ShAmt)) @@ -26451,8 +26823,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getSimpleValueType(), SrcOp, CShAmt->getZExtValue(), DAG); + ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, ShAmt); return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), - SrcOp, ShAmt, Subtarget, DAG); + SrcOp, ShAmt, 0, Subtarget, DAG); } case COMPRESS_EXPAND_IN_REG: { SDValue Mask = Op.getOperand(3); @@ -27411,6 +27784,30 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, Operation.getValue(1)); } + case Intrinsic::x86_atomic_bts: + case Intrinsic::x86_atomic_btc: + case Intrinsic::x86_atomic_btr: { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + SDValue Chain = Op.getOperand(0); + SDValue Op1 = Op.getOperand(2); + SDValue Op2 = Op.getOperand(3); + unsigned Opc = IntNo == Intrinsic::x86_atomic_bts ? X86ISD::LBTS + : IntNo == Intrinsic::x86_atomic_btc ? X86ISD::LBTC + : X86ISD::LBTR; + SDValue Size = DAG.getConstant(VT.getScalarSizeInBits(), DL, MVT::i32); + MachineMemOperand *MMO = cast(Op)->getMemOperand(); + SDValue Res = + DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::i32, MVT::Other), + {Chain, Op1, Op2, Size}, VT, MMO); + Chain = Res.getValue(1); + Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT); + unsigned Imm = cast(Op2)->getZExtValue(); + if (Imm) + Res = DAG.getNode(ISD::SHL, DL, VT, Res, + DAG.getShiftAmountConstant(Imm, VT, DL)); + return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Res, Chain); + } } return SDValue(); } @@ -28394,11 +28791,27 @@ static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget, return SDValue(); } -static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerAVG(SDValue Op, const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + + // For AVX1 cases, split to use legal ops. + if (VT.is256BitVector() && !Subtarget.hasInt256()) + return splitVectorIntBinary(Op, DAG); + + if (VT == MVT::v32i16 || VT == MVT::v64i8) + return splitVectorIntBinary(Op, DAG); + + // Default to expand. + return SDValue(); +} + +static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget, + SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); - // For AVX1 cases, split to use legal ops (everything but v4i64). - if (VT.getScalarType() != MVT::i64 && VT.is256BitVector()) + // For AVX1 cases, split to use legal ops. + if (VT.is256BitVector() && !Subtarget.hasInt256()) return splitVectorIntBinary(Op, DAG); if (VT == MVT::v32i16 || VT == MVT::v64i8) @@ -29188,19 +29601,12 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG, SDValue Amt = Op.getOperand(1); unsigned Opcode = Op.getOpcode(); unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false); - unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true); - if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) { - if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) { - MVT EltVT = VT.getVectorElementType(); - assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!"); - if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt); - else if (EltVT.bitsLT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); - - return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG); - } + int BaseShAmtIdx = -1; + if (SDValue BaseShAmt = DAG.getSplatSourceVector(Amt, BaseShAmtIdx)) { + if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) + return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, BaseShAmtIdx, + Subtarget, DAG); // vXi8 shifts - shift as v8i16 + mask result. if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) || @@ -29212,13 +29618,12 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG, if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) { unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL); unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false); - BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); // Create the mask using vXi16 shifts. For shift-rights we need to move // the upper byte down before splatting the vXi8 mask. SDValue BitMask = DAG.getConstant(-1, dl, ExtVT); BitMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, BitMask, - BaseShAmt, Subtarget, DAG); + BaseShAmt, BaseShAmtIdx, Subtarget, DAG); if (Opcode != ISD::SHL) BitMask = getTargetVShiftByConstNode(LogicalX86Op, dl, ExtVT, BitMask, 8, DAG); @@ -29228,7 +29633,7 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG, SDValue Res = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, DAG.getBitcast(ExtVT, R), BaseShAmt, - Subtarget, DAG); + BaseShAmtIdx, Subtarget, DAG); Res = DAG.getBitcast(VT, Res); Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask); @@ -29236,8 +29641,9 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG, // ashr(R, Amt) === sub(xor(lshr(R, Amt), SignMask), SignMask) // SignMask = lshr(SignBit, Amt) - safe to do this with PSRLW. SDValue SignMask = DAG.getConstant(0x8080, dl, ExtVT); - SignMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask, - BaseShAmt, Subtarget, DAG); + SignMask = + getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask, BaseShAmt, + BaseShAmtIdx, Subtarget, DAG); SignMask = DAG.getBitcast(VT, SignMask); Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask); Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask); @@ -29247,23 +29653,6 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG, } } - // Check cases (mainly 32-bit) where i64 is expanded into high and low parts. - if (VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST && - Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { - Amt = Amt.getOperand(0); - unsigned Ratio = 64 / Amt.getScalarValueSizeInBits(); - std::vector Vals(Ratio); - for (unsigned i = 0; i != Ratio; ++i) - Vals[i] = Amt.getOperand(i); - for (unsigned i = Ratio, e = Amt.getNumOperands(); i != e; i += Ratio) { - for (unsigned j = 0; j != Ratio; ++j) - if (Vals[j] != Amt.getOperand(i + j)) - return SDValue(); - } - - if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) - return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1)); - } return SDValue(); } @@ -29843,8 +30232,8 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, {Op0, Op1, Amt}, DAG, Subtarget); } assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 || - VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v8i32 || - VT == MVT::v16i32) && + VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16 || + VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) && "Unexpected funnel shift type!"); // fshl(x,y,z) -> unpack(y,x) << (z & (bw-1))) >> bw. @@ -29867,7 +30256,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, // Split 256-bit integers on XOP/pre-AVX2 targets. // Split 512-bit integers on non 512-bit BWI targets. - if ((VT.is256BitVector() && ((Subtarget.hasXOP() && EltSizeInBits < 32) || + if ((VT.is256BitVector() && ((Subtarget.hasXOP() && EltSizeInBits < 16) || !Subtarget.hasAVX2())) || (VT.is512BitVector() && !Subtarget.useBWIRegs() && EltSizeInBits < 32)) { @@ -29878,18 +30267,18 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, // Attempt to fold scalar shift as unpack(y,x) << zext(splat(z)) if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, ShiftOpc)) { - if (SDValue ScalarAmt = DAG.getSplatValue(AmtMod)) { + int ScalarAmtIdx = -1; + if (SDValue ScalarAmt = DAG.getSplatSourceVector(AmtMod, ScalarAmtIdx)) { // Uniform vXi16 funnel shifts can be efficiently handled by default. if (EltSizeInBits == 16) return SDValue(); SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0)); SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0)); - ScalarAmt = DAG.getZExtOrTrunc(ScalarAmt, DL, MVT::i32); - Lo = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Lo, ScalarAmt, Subtarget, - DAG); - Hi = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Hi, ScalarAmt, Subtarget, - DAG); + Lo = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Lo, ScalarAmt, + ScalarAmtIdx, Subtarget, DAG); + Hi = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Hi, ScalarAmt, + ScalarAmtIdx, Subtarget, DAG); return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !IsFSHR); } } @@ -30079,18 +30468,20 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, // Attempt to fold as unpack(x,x) << zext(splat(y)): // rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw. // rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))). - // TODO: Handle vXi16 cases on all targets. - if (EltSizeInBits == 8 || EltSizeInBits == 32 || - (IsROTL && EltSizeInBits == 16 && !Subtarget.hasAVX())) { - if (SDValue BaseRotAmt = DAG.getSplatValue(AmtMod)) { + if (EltSizeInBits == 8 || EltSizeInBits == 16 || EltSizeInBits == 32) { + int BaseRotAmtIdx = -1; + if (SDValue BaseRotAmt = DAG.getSplatSourceVector(AmtMod, BaseRotAmtIdx)) { + if (EltSizeInBits == 16 && Subtarget.hasSSE41()) { + unsigned FunnelOpc = IsROTL ? ISD::FSHL : ISD::FSHR; + return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt); + } unsigned ShiftX86Opc = IsROTL ? X86ISD::VSHLI : X86ISD::VSRLI; SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R)); SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R)); - BaseRotAmt = DAG.getZExtOrTrunc(BaseRotAmt, DL, MVT::i32); Lo = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Lo, BaseRotAmt, - Subtarget, DAG); + BaseRotAmtIdx, Subtarget, DAG); Hi = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Hi, BaseRotAmt, - Subtarget, DAG); + BaseRotAmtIdx, Subtarget, DAG); return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL); } } @@ -30273,14 +30664,15 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const { unsigned OpWidth = MemType->getPrimitiveSizeInBits(); if (OpWidth == 64) - return Subtarget.hasCmpxchg8b() && !Subtarget.is64Bit(); + return Subtarget.canUseCMPXCHG8B() && !Subtarget.is64Bit(); if (OpWidth == 128) - return Subtarget.hasCmpxchg16b(); + return Subtarget.canUseCMPXCHG16B(); return false; } -bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { +TargetLoweringBase::AtomicExpansionKind +X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { Type *MemType = SI->getValueOperand()->getType(); bool NoImplicitFloatOps = @@ -30288,9 +30680,10 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && !Subtarget.useSoftFloat() && !NoImplicitFloatOps && (Subtarget.hasSSE1() || Subtarget.hasX87())) - return false; + return AtomicExpansionKind::None; - return needsCmpXchgNb(MemType); + return needsCmpXchgNb(MemType) ? AtomicExpansionKind::Expand + : AtomicExpansionKind::None; } // Note: this turns large loads into lock cmpxchg8b/16b. @@ -30313,6 +30706,65 @@ X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { : AtomicExpansionKind::None; } +TargetLowering::AtomicExpansionKind +X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const { + // If the atomicrmw's result isn't actually used, we can just add a "lock" + // prefix to a normal instruction for these operations. + if (AI->use_empty()) + return AtomicExpansionKind::None; + + // If the atomicrmw's result is used by a single bit AND, we may use + // bts/btr/btc instruction for these operations. + auto *C1 = dyn_cast(AI->getValOperand()); + Instruction *I = AI->user_back(); + if (!C1 || !AI->hasOneUse() || I->getOpcode() != Instruction::And || + AI->getParent() != I->getParent()) + return AtomicExpansionKind::CmpXChg; + // The following instruction must be a AND single bit. + auto *C2 = dyn_cast(I->getOperand(1)); + unsigned Bits = AI->getType()->getPrimitiveSizeInBits(); + if (!C2 || Bits == 8 || !isPowerOf2_64(C2->getZExtValue())) + return AtomicExpansionKind::CmpXChg; + + if (AI->getOperation() == AtomicRMWInst::And) + return ~C1->getValue() == C2->getValue() + ? AtomicExpansionKind::BitTestIntrinsic + : AtomicExpansionKind::CmpXChg; + + return C1 == C2 ? AtomicExpansionKind::BitTestIntrinsic + : AtomicExpansionKind::CmpXChg; +} + +void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const { + IRBuilder<> Builder(AI); + Intrinsic::ID IID = Intrinsic::not_intrinsic; + switch (AI->getOperation()) { + default: + llvm_unreachable("Unknown atomic operation"); + case AtomicRMWInst::Or: + IID = Intrinsic::x86_atomic_bts; + break; + case AtomicRMWInst::Xor: + IID = Intrinsic::x86_atomic_btc; + break; + case AtomicRMWInst::And: + IID = Intrinsic::x86_atomic_btr; + break; + } + Instruction *I = AI->user_back(); + LLVMContext &Ctx = AI->getContext(); + unsigned Imm = + countTrailingZeros(cast(I->getOperand(1))->getZExtValue()); + Function *BitTest = + Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType()); + Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(), + Type::getInt8PtrTy(Ctx)); + Value *Result = Builder.CreateCall(BitTest, {Addr, Builder.getInt8(Imm)}); + I->replaceAllUsesWith(Result); + I->eraseFromParent(); + AI->eraseFromParent(); +} + TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; @@ -30337,10 +30789,7 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { case AtomicRMWInst::Or: case AtomicRMWInst::And: case AtomicRMWInst::Xor: - // If the atomicrmw's result isn't actually used, we can just add a "lock" - // prefix to a normal instruction for these operations. - return !AI->use_empty() ? AtomicExpansionKind::CmpXChg - : AtomicExpansionKind::None; + return shouldExpandLogicAtomicRMWInIR(AI); case AtomicRMWInst::Nand: case AtomicRMWInst::Max: case AtomicRMWInst::Min: @@ -31552,16 +32001,12 @@ SDValue X86TargetLowering::LowerGC_TRANSITION(SDValue Op, // require special handling for these nodes), lower them as literal NOOPs for // the time being. SmallVector Ops; - Ops.push_back(Op.getOperand(0)); if (Op->getGluedNode()) Ops.push_back(Op->getOperand(Op->getNumOperands() - 1)); - SDLoc OpDL(Op); SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0); - - return NOOP; + return SDValue(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0); } // Custom split CVTPS2PH with wide types. @@ -31710,8 +32155,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SMAX: case ISD::SMIN: case ISD::UMAX: - case ISD::UMIN: return LowerMINMAX(Op, DAG); + case ISD::UMIN: return LowerMINMAX(Op, Subtarget, DAG); case ISD::ABS: return LowerABS(Op, Subtarget, DAG); + case ISD::AVGCEILU: return LowerAVG(Op, Subtarget, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG); case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG); @@ -31807,9 +32253,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Res); return; } - case X86ISD::VPMADDWD: - case X86ISD::AVG: { - // Legalize types for X86ISD::AVG/VPMADDWD by widening. + case X86ISD::VPMADDWD: { + // Legalize types for X86ISD::VPMADDWD by widening. assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); EVT VT = N->getValueType(0); @@ -32462,7 +32907,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT T = N->getValueType(0); assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair"); bool Regs64bit = T == MVT::i128; - assert((!Regs64bit || Subtarget.hasCmpxchg16b()) && + assert((!Regs64bit || Subtarget.canUseCMPXCHG16B()) && "64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B"); MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; SDValue cpInL, cpInH; @@ -32821,6 +33266,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(LOR) NODE_NAME_CASE(LXOR) NODE_NAME_CASE(LAND) + NODE_NAME_CASE(LBTS) + NODE_NAME_CASE(LBTC) + NODE_NAME_CASE(LBTR) NODE_NAME_CASE(VZEXT_MOVL) NODE_NAME_CASE(VZEXT_LOAD) NODE_NAME_CASE(VEXTRACT_STORE) @@ -33041,7 +33489,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(SCALEF_RND) NODE_NAME_CASE(SCALEFS) NODE_NAME_CASE(SCALEFS_RND) - NODE_NAME_CASE(AVG) NODE_NAME_CASE(MULHRS) NODE_NAME_CASE(SINT_TO_FP_RND) NODE_NAME_CASE(UINT_TO_FP_RND) @@ -33222,7 +33669,6 @@ bool X86TargetLowering::isBinOp(unsigned Opcode) const { bool X86TargetLowering::isCommutativeBinOp(unsigned Opcode) const { switch (Opcode) { // TODO: Add more X86ISD opcodes once we have test coverage. - case X86ISD::AVG: case X86ISD::PCMPEQ: case X86ISD::PMULDQ: case X86ISD::PMULUDQ: @@ -33418,6 +33864,20 @@ bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { return !(VT1 == MVT::i32 && VT2 == MVT::i16); } +bool X86TargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode, + EVT VT) const { + // TODO: This is too general. There are cases where pre-AVX512 codegen would + // benefit. The transform may also be profitable for scalar code. + if (!Subtarget.hasAVX512()) + return false; + if (!Subtarget.hasVLX() && !VT.is512BitVector()) + return false; + if (!VT.isVector()) + return false; + + return true; +} + /// Targets can use this to indicate that they only support *some* /// VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values @@ -33460,6 +33920,16 @@ bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { return TargetLowering::areJTsAllowed(Fn); } +MVT X86TargetLowering::getPreferredSwitchConditionType(LLVMContext &Context, + EVT ConditionVT) const { + // Avoid 8 and 16 bit types because they increase the chance for unnecessary + // zero-extensions. + if (ConditionVT.getSizeInBits() < 32) + return MVT::i32; + return TargetLoweringBase::getPreferredSwitchConditionType(Context, + ConditionVT); +} + //===----------------------------------------------------------------------===// // X86 Scheduler Hooks //===----------------------------------------------------------------------===// @@ -33871,6 +34341,7 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, // conditional jump around it. static bool isCMOVPseudo(MachineInstr &MI) { switch (MI.getOpcode()) { + case X86::CMOV_FR16: case X86::CMOV_FR16X: case X86::CMOV_FR32: case X86::CMOV_FR32X: @@ -34090,7 +34561,7 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV, // SinkMBB: // %Result = phi [ %FalseValue, SecondInsertedMBB ], [ %TrueValue, ThisMBB ] - Register DestReg = FirstCMOV.getOperand(0).getReg(); + Register DestReg = SecondCascadedCMOV.getOperand(0).getReg(); Register Op1Reg = FirstCMOV.getOperand(1).getReg(); Register Op2Reg = FirstCMOV.getOperand(2).getReg(); MachineInstrBuilder MIB = @@ -34103,11 +34574,6 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV, // The second SecondInsertedMBB provides the same incoming value as the // FirstInsertedMBB (the True operand of the SELECT_CC/CMOV nodes). MIB.addReg(FirstCMOV.getOperand(2).getReg()).addMBB(FirstInsertedMBB); - // Copy the PHI result to the register defined by the second CMOV. - BuildMI(*SinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())), DL, - TII->get(TargetOpcode::COPY), - SecondCascadedCMOV.getOperand(0).getReg()) - .addReg(FirstCMOV.getOperand(0).getReg()); // Now remove the CMOVs. FirstCMOV.eraseFromParent(); @@ -35546,6 +36012,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case X86::TLSCall_32: case X86::TLSCall_64: return EmitLoweredTLSCall(MI, BB); + case X86::CMOV_FR16: + case X86::CMOV_FR16X: case X86::CMOV_FR32: case X86::CMOV_FR32X: case X86::CMOV_FR64: @@ -36116,6 +36584,15 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, } break; } + case X86ISD::AND: { + if (Op.getResNo() == 0) { + KnownBits Known2; + Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known &= Known2; + } + break; + } case X86ISD::ANDNP: { KnownBits Known2; Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -36257,6 +36734,28 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.setAllZero(); break; } + case X86ISD::VBROADCAST_LOAD: { + APInt UndefElts; + SmallVector EltBits; + if (getTargetConstantBitsFromNode(Op, BitWidth, UndefElts, EltBits, + /*AllowWholeUndefs*/ false, + /*AllowPartialUndefs*/ false)) { + Known.Zero.setAllBits(); + Known.One.setAllBits(); + for (unsigned I = 0; I != NumElts; ++I) { + if (!DemandedElts[I]) + continue; + if (UndefElts[I]) { + Known.resetAll(); + break; + } + KnownBits Known2 = KnownBits::makeConstant(EltBits[I]); + Known = KnownBits::commonBits(Known, Known2); + } + return; + } + break; + } } // Handle target shuffles. @@ -37113,9 +37612,10 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, unsigned NumRootElts = RootVT.getVectorNumElements(); // Canonicalize shuffle input op to the requested type. - // TODO: Support cases where Op is smaller than VT. auto CanonicalizeShuffleInput = [&](MVT VT, SDValue Op) { - if (VT.getSizeInBits() < Op.getValueSizeInBits()) + if (VT.getSizeInBits() > Op.getValueSizeInBits()) + Op = widenSubVector(Op, false, Subtarget, DAG, DL, VT.getSizeInBits()); + else if (VT.getSizeInBits() < Op.getValueSizeInBits()) Op = extractSubVector(Op, 0, DAG, DL, VT.getSizeInBits()); return DAG.getBitcast(VT, Op); }; @@ -37129,8 +37629,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, MVT VT1 = V1.getSimpleValueType(); MVT VT2 = V2.getSimpleValueType(); - assert(VT1.getSizeInBits() == RootSizeInBits && - VT2.getSizeInBits() == RootSizeInBits && "Vector size mismatch"); + assert((RootSizeInBits % VT1.getSizeInBits()) == 0 && + (RootSizeInBits % VT2.getSizeInBits()) == 0 && "Vector size mismatch"); SDValue Res; @@ -37157,12 +37657,13 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } } - // If we are shuffling a broadcast (and not introducing zeros) then - // we can just use the broadcast directly. This works for smaller broadcast - // elements as well as they already repeat across each mask element - if (UnaryShuffle && isTargetShuffleSplat(V1) && !isAnyZero(BaseMask) && + // If we are shuffling a splat (and not introducing zeros) then we can just + // use it directly. This works for smaller elements as well as they already + // repeat across each mask element. + if (UnaryShuffle && !isAnyZero(BaseMask) && + V1.getValueSizeInBits() >= RootSizeInBits && (BaseMaskEltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && - V1.getValueSizeInBits() >= RootSizeInBits) { + DAG.isSplatValue(V1, /*AllowUndefs*/ false)) { return CanonicalizeShuffleInput(RootVT, V1); } @@ -37543,7 +38044,11 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (RootVT.is128BitVector() && Subtarget.hasVLX())) && (MaskEltSizeInBits > 8 || Subtarget.hasBWI()) && isSequentialOrUndefInRange(Mask, 0, NumMaskElts, 0, 2)) { - if (Depth == 0 && Root.getOpcode() == ISD::TRUNCATE) + // Bail if this was already a truncation or PACK node. + // We sometimes fail to match PACK if we demand known undef elements. + if (Depth == 0 && (Root.getOpcode() == ISD::TRUNCATE || + Root.getOpcode() == X86ISD::PACKSS || + Root.getOpcode() == X86ISD::PACKUS)) return SDValue(); // Nothing to do! ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2); ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts / 2); @@ -37852,6 +38357,12 @@ static SDValue combineX86ShuffleChainWithExtract( unsigned RootSizeInBits = RootVT.getSizeInBits(); assert((RootSizeInBits % NumMaskElts) == 0 && "Unexpected root shuffle mask"); + // Bail if we have any smaller inputs. + if (llvm::any_of(Inputs, [RootSizeInBits](SDValue Input) { + return Input.getValueSizeInBits() < RootSizeInBits; + })) + return SDValue(); + SmallVector WideInputs(Inputs.begin(), Inputs.end()); SmallVector Offsets(NumInputs, 0); @@ -37894,16 +38405,6 @@ static SDValue combineX86ShuffleChainWithExtract( })) return SDValue(); - for (SDValue &NewInput : WideInputs) { - assert((WideSizeInBits % NewInput.getValueSizeInBits()) == 0 && - "Shuffle vector size mismatch"); - if (WideSizeInBits > NewInput.getValueSizeInBits()) - NewInput = widenSubVector(NewInput, false, Subtarget, DAG, - SDLoc(NewInput), WideSizeInBits); - assert(WideSizeInBits == NewInput.getValueSizeInBits() && - "Unexpected subvector extraction"); - } - // Create new mask for larger type. for (unsigned i = 1; i != NumInputs; ++i) Offsets[i] += i * Scale * NumMaskElts; @@ -37928,7 +38429,10 @@ static SDValue combineX86ShuffleChainWithExtract( // Attempt to combine wider chain. // TODO: Can we use a better Root? - SDValue WideRoot = WideInputs[0]; + SDValue WideRoot = WideInputs.front().getValueSizeInBits() > + WideInputs.back().getValueSizeInBits() + ? WideInputs.front() + : WideInputs.back(); if (SDValue WideShuffle = combineX86ShuffleChain(WideInputs, WideRoot, WideMask, Depth, HasVariableMask, AllowVariableCrossLaneMask, @@ -38267,9 +38771,9 @@ static SDValue combineX86ShufflesRecursively( assert(RootMask.size() > 0 && (RootMask.size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) && "Illegal shuffle root mask"); - assert(Root.getSimpleValueType().isVector() && - "Shuffles operate on vector types!"); - unsigned RootSizeInBits = Root.getSimpleValueType().getSizeInBits(); + MVT RootVT = Root.getSimpleValueType(); + assert(RootVT.isVector() && "Shuffles operate on vector types!"); + unsigned RootSizeInBits = RootVT.getSizeInBits(); // Bound the depth of our recursive combine because this is ultimately // quadratic in nature. @@ -38298,16 +38802,27 @@ static SDValue combineX86ShufflesRecursively( APInt OpUndef, OpZero; APInt OpDemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); bool IsOpVariableMask = isTargetShuffleVariableMask(Op.getOpcode()); - if (!getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef, - OpZero, DAG, Depth, false)) - return SDValue(); - - // Shuffle inputs must not be larger than the shuffle result. - // TODO: Relax this for single input faux shuffles (trunc/extract_subvector). - if (llvm::any_of(OpInputs, [VT](SDValue OpInput) { - return OpInput.getValueSizeInBits() > VT.getSizeInBits(); - })) + if (getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef, + OpZero, DAG, Depth, false)) { + // Shuffle inputs must not be larger than the shuffle result. + // TODO: Relax this for single input faux shuffles (e.g. trunc). + if (llvm::any_of(OpInputs, [VT](SDValue OpInput) { + return OpInput.getValueSizeInBits() > VT.getSizeInBits(); + })) + return SDValue(); + } else if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && + (RootSizeInBits % Op.getOperand(0).getValueSizeInBits()) == 0 && + !isNullConstant(Op.getOperand(1))) { + SDValue SrcVec = Op.getOperand(0); + int ExtractIdx = Op.getConstantOperandVal(1); + unsigned NumElts = VT.getVectorNumElements(); + OpInputs.assign({SrcVec}); + OpMask.assign(NumElts, SM_SentinelUndef); + std::iota(OpMask.begin(), OpMask.end(), ExtractIdx); + OpZero = OpUndef = APInt::getNullValue(NumElts); + } else { return SDValue(); + } // If the shuffle result was smaller than the root, we need to adjust the // mask indices and pad the mask with undefs. @@ -38467,13 +38982,12 @@ static SDValue combineX86ShufflesRecursively( // Handle the all undef/zero/ones cases early. if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) - return DAG.getUNDEF(Root.getValueType()); + return DAG.getUNDEF(RootVT); if (all_of(Mask, [](int Idx) { return Idx < 0; })) - return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, - SDLoc(Root)); + return getZeroVector(RootVT, Subtarget, DAG, SDLoc(Root)); if (Ops.size() == 1 && ISD::isBuildVectorAllOnes(Ops[0].getNode()) && none_of(Mask, [](int M) { return M == SM_SentinelZero; })) - return getOnesVector(Root.getValueType(), DAG, SDLoc(Root)); + return getOnesVector(RootVT, DAG, SDLoc(Root)); assert(!Ops.empty() && "Shuffle with no inputs detected"); HasVariableMask |= IsOpVariableMask; @@ -38533,7 +39047,7 @@ static SDValue combineX86ShufflesRecursively( // NOTE: This will update the Ops and Mask. if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp( Ops, Mask, RootSizeInBits, SDLoc(Root), DAG, Subtarget)) - return DAG.getBitcast(Root.getValueType(), HOp); + return DAG.getBitcast(RootVT, HOp); // Try to refine our inputs given our knowledge of target shuffle mask. for (auto I : enumerate(Ops)) { @@ -38578,6 +39092,8 @@ static SDValue combineX86ShufflesRecursively( // FIXME: should we rerun resolveTargetShuffleInputsAndMask() now? // Widen any subvector shuffle inputs we've collected. + // TODO: Remove this to avoid generating temporary nodes, we should only + // widen once combineX86ShuffleChain has found a match. if (any_of(Ops, [RootSizeInBits](SDValue Op) { return Op.getValueSizeInBits() < RootSizeInBits; })) { @@ -38823,8 +39339,7 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL, SDValue N0 = V.getOperand(0); SDValue N1 = V.getOperand(1); unsigned Imm = V.getConstantOperandVal(2); - const X86Subtarget &Subtarget = - static_cast(DAG.getSubtarget()); + const X86Subtarget &Subtarget = DAG.getSubtarget(); if (!X86::mayFoldLoad(peekThroughOneUseBitcasts(N0), Subtarget) || X86::mayFoldLoad(peekThroughOneUseBitcasts(N1), Subtarget)) return SDValue(); @@ -38869,21 +39384,24 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT ShuffleVT = N.getValueType(); - auto IsMergeableWithShuffle = [](SDValue Op) { + auto IsMergeableWithShuffle = [&DAG](SDValue Op, bool FoldLoad = false) { // AllZeros/AllOnes constants are freely shuffled and will peek through // bitcasts. Other constant build vectors do not peek through bitcasts. Only // merge with target shuffles if it has one use so shuffle combining is - // likely to kick in. + // likely to kick in. Shuffles of splats are expected to be removed. return ISD::isBuildVectorAllOnes(Op.getNode()) || ISD::isBuildVectorAllZeros(Op.getNode()) || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()) || - (isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()); + (isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) || + (FoldLoad && isShuffleFoldableLoad(Op)) || + DAG.isSplatValue(Op, /*AllowUndefs*/ false); }; auto IsSafeToMoveShuffle = [ShuffleVT](SDValue Op, unsigned BinOp) { // Ensure we only shuffle whole vector src elements, unless its a logical // binops where we can more aggressively move shuffles from dst to src. return BinOp == ISD::AND || BinOp == ISD::OR || BinOp == ISD::XOR || + BinOp == X86ISD::ANDNP || (Op.getScalarValueSizeInBits() <= ShuffleVT.getScalarSizeInBits()); }; @@ -38913,7 +39431,8 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG, if (TLI.isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) { SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0)); SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1)); - if (IsMergeableWithShuffle(Op00) || IsMergeableWithShuffle(Op01)) { + if (IsMergeableWithShuffle(Op00, Opc != X86ISD::PSHUFB) || + IsMergeableWithShuffle(Op01, Opc != X86ISD::PSHUFB)) { SDValue LHS, RHS; Op00 = DAG.getBitcast(ShuffleVT, Op00); Op01 = DAG.getBitcast(ShuffleVT, Op01); @@ -39054,6 +39573,11 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, SmallVector Mask; unsigned Opcode = N.getOpcode(); + // FIXME: Remove this after we support vector FP16 + if (isSoftFP16(peekThroughBitcasts(N.getOperand(0)).getSimpleValueType(), + Subtarget)) + return SDValue(); + if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG)) return R; @@ -39471,7 +39995,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, return SDValue(); SDValue Src = peekThroughBitcasts(N.getOperand(Idx < 2 ? 0 : 1)); SmallVector SubOps; - if (collectConcatOps(Src.getNode(), SubOps) && SubOps.size() == 2) + if (collectConcatOps(Src.getNode(), SubOps, DAG) && SubOps.size() == 2) return SubOps[Idx & 1]; unsigned NumElts = Src.getValueType().getVectorNumElements(); if ((Idx & 1) == 1 && Src.getOpcode() == ISD::INSERT_SUBVECTOR && @@ -39581,7 +40105,9 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) { // No change if element is already zero or the inserted element. continue; - } else if (KnownUndef0[i] || KnownZero0[i]) { + } + + if (KnownUndef0[i] || KnownZero0[i]) { // If the target mask is undef/zero then we must zero the element. InsertPSMask |= (1u << i); Updated = true; @@ -40016,16 +40542,14 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, // Simplify source operands based on shuffle mask. // TODO - merge this into combineX86ShufflesRecursively. - APInt KnownUndef, KnownZero; APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); - if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, - DCI)) + if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, DCI)) return SDValue(N, 0); // Canonicalize SHUFFLE(BINOP(X,Y)) -> BINOP(SHUFFLE(X),SHUFFLE(Y)). // Perform this after other shuffle combines to allow inner shuffles to be // combined away first. - if (SDValue BinOp = canonicalizeShuffleWithBinOps(Op, DAG, SDLoc(N))) + if (SDValue BinOp = canonicalizeShuffleWithBinOps(Op, DAG, dl)) return BinOp; } @@ -40212,6 +40736,11 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( Depth + 1)) return true; + // Fold shift(0,x) -> 0 + if (DemandedElts.isSubsetOf(KnownZero)) + return TLO.CombineTo( + Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op))); + // Aggressively peek through ops to get at the demanded elts. if (!DemandedElts.isAllOnes()) if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( @@ -40232,9 +40761,16 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO, Depth + 1)) return true; + + // Fold shift(0,x) -> 0 + if (DemandedElts.isSubsetOf(LHSZero)) + return TLO.CombineTo( + Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op))); + if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO, Depth + 1)) return true; + KnownZero = LHSZero; break; } @@ -40316,6 +40852,57 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( KnownZero.setHighBits(ShiftAmt); break; } + case X86ISD::ANDNP: { + // ANDNP = (~LHS & RHS); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + auto GetDemandedMasks = [&](SDValue Op, bool Invert = false) { + APInt UndefElts; + SmallVector EltBits; + int NumElts = VT.getVectorNumElements(); + int EltSizeInBits = VT.getScalarSizeInBits(); + APInt OpBits = APInt::getAllOnes(EltSizeInBits); + APInt OpElts = DemandedElts; + if (getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, + EltBits)) { + OpBits.clearAllBits(); + OpElts.clearAllBits(); + for (int I = 0; I != NumElts; ++I) + if (DemandedElts[I] && ((Invert && !EltBits[I].isAllOnes()) || + (!Invert && !EltBits[I].isZero()))) { + OpBits |= Invert ? ~EltBits[I] : EltBits[I]; + OpElts.setBit(I); + } + } + return std::make_pair(OpBits, OpElts); + }; + std::pair DemandLHS = GetDemandedMasks(RHS); + std::pair DemandRHS = GetDemandedMasks(LHS, true); + + APInt LHSUndef, LHSZero; + APInt RHSUndef, RHSZero; + if (SimplifyDemandedVectorElts(LHS, DemandLHS.second, LHSUndef, LHSZero, + TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(RHS, DemandRHS.second, RHSUndef, RHSZero, + TLO, Depth + 1)) + return true; + + if (!DemandedElts.isAllOnes()) { + SDValue NewLHS = SimplifyMultipleUseDemandedBits( + LHS, DemandLHS.first, DemandLHS.second, TLO.DAG, Depth + 1); + SDValue NewRHS = SimplifyMultipleUseDemandedBits( + RHS, DemandRHS.first, DemandRHS.second, TLO.DAG, Depth + 1); + if (NewLHS || NewRHS) { + NewLHS = NewLHS ? NewLHS : LHS; + NewRHS = NewRHS ? NewRHS : RHS; + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewLHS, NewRHS)); + } + } + break; + } case X86ISD::CVTSI2P: case X86ISD::CVTUI2P: { SDValue Src = Op.getOperand(0); @@ -40620,7 +41207,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( case X86ISD::UNPCKH: case X86ISD::BLENDI: // Integer ops. - case X86ISD::AVG: case X86ISD::PACKSS: case X86ISD::PACKUS: // Horizontal Ops. @@ -40651,10 +41237,10 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( } } - // For broadcasts, unless we *only* demand the 0'th element, + // For splats, unless we *only* demand the 0'th element, // stop attempts at simplification here, we aren't going to improve things, // this is better than any potential shuffle. - if (isTargetShuffleSplat(Op) && !DemandedElts.isOne()) + if (!DemandedElts.isOne() && TLO.DAG.isSplatValue(Op, /*AllowUndefs*/false)) return false; // Get target/faux shuffle mask. @@ -40770,20 +41356,31 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( KnownBits KnownOp; SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); + + // Don't mask bits on 32-bit AVX512 targets which might lose a broadcast. // FIXME: Can we bound this better? APInt DemandedMask = APInt::getLowBitsSet(64, 32); - if (SimplifyDemandedBits(LHS, DemandedMask, OriginalDemandedElts, KnownOp, - TLO, Depth + 1)) + APInt DemandedMaskLHS = APInt::getAllOnes(64); + APInt DemandedMaskRHS = APInt::getAllOnes(64); + + bool Is32BitAVX512 = !Subtarget.is64Bit() && Subtarget.hasAVX512(); + if (!Is32BitAVX512 || !TLO.DAG.isSplatValue(LHS)) + DemandedMaskLHS = DemandedMask; + if (!Is32BitAVX512 || !TLO.DAG.isSplatValue(RHS)) + DemandedMaskRHS = DemandedMask; + + if (SimplifyDemandedBits(LHS, DemandedMaskLHS, OriginalDemandedElts, + KnownOp, TLO, Depth + 1)) return true; - if (SimplifyDemandedBits(RHS, DemandedMask, OriginalDemandedElts, KnownOp, - TLO, Depth + 1)) + if (SimplifyDemandedBits(RHS, DemandedMaskRHS, OriginalDemandedElts, + KnownOp, TLO, Depth + 1)) return true; // Aggressively peek through ops to get at the demanded low bits. SDValue DemandedLHS = SimplifyMultipleUseDemandedBits( - LHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1); + LHS, DemandedMaskLHS, OriginalDemandedElts, TLO.DAG, Depth + 1); SDValue DemandedRHS = SimplifyMultipleUseDemandedBits( - RHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1); + RHS, DemandedMaskRHS, OriginalDemandedElts, TLO.DAG, Depth + 1); if (DemandedLHS || DemandedRHS) { DemandedLHS = DemandedLHS ? DemandedLHS : LHS; DemandedRHS = DemandedRHS ? DemandedRHS : RHS; @@ -41084,7 +41681,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( TLO, Depth + 1)) return true; - Known.Zero = KnownZero.zextOrSelf(BitWidth); + Known.Zero = KnownZero.zext(BitWidth); Known.Zero.setHighBits(BitWidth - NumElts); // MOVMSK only uses the MSB from each vector element. @@ -41291,12 +41888,8 @@ bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op, switch (Opc) { case X86ISD::VBROADCAST: case X86ISD::VBROADCAST_LOAD: - // TODO: Permit vXi64 types on 32-bit targets. - if (isTypeLegal(Op.getValueType().getVectorElementType())) { - UndefElts = APInt::getNullValue(NumElts); - return true; - } - return false; + UndefElts = APInt::getNullValue(NumElts); + return true; } return TargetLowering::isSplatValueForTargetNode(Op, DemandedElts, UndefElts, @@ -42840,10 +43433,29 @@ static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG, return SDValue(); SDLoc DL(ExtElt); + unsigned NumElts = VecVT.getVectorNumElements(); + unsigned EltSizeInBits = VecVT.getScalarSizeInBits(); + + // Extend v4i8/v8i8 vector to v16i8, with undef upper 64-bits. + auto WidenToV16I8 = [&](SDValue V, bool ZeroExtend) { + if (V.getValueType() == MVT::v4i8) { + if (ZeroExtend && Subtarget.hasSSE41()) { + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, + DAG.getConstant(0, DL, MVT::v4i32), + DAG.getBitcast(MVT::i32, V), + DAG.getIntPtrConstant(0, DL)); + return DAG.getBitcast(MVT::v16i8, V); + } + V = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, V, + ZeroExtend ? DAG.getConstant(0, DL, MVT::v4i8) + : DAG.getUNDEF(MVT::v4i8)); + } + return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V, + DAG.getUNDEF(MVT::v8i8)); + }; // vXi8 mul reduction - promote to vXi16 mul reduction. if (Opc == ISD::MUL) { - unsigned NumElts = VecVT.getVectorNumElements(); if (VT != MVT::i8 || NumElts < 4 || !isPowerOf2_32(NumElts)) return SDValue(); if (VecVT.getSizeInBits() >= 128) { @@ -42858,11 +43470,7 @@ static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG, Rdx = DAG.getNode(Opc, DL, Lo.getValueType(), Lo, Hi); } } else { - if (VecVT == MVT::v4i8) - Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx, - DAG.getUNDEF(MVT::v4i8)); - Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx, - DAG.getUNDEF(MVT::v8i8)); + Rdx = WidenToV16I8(Rdx, false); Rdx = getUnpackl(DAG, DL, MVT::v16i8, Rdx, DAG.getUNDEF(MVT::v16i8)); Rdx = DAG.getBitcast(MVT::v8i16, Rdx); } @@ -42882,24 +43490,7 @@ static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG, // vXi8 add reduction - sub 128-bit vector. if (VecVT == MVT::v4i8 || VecVT == MVT::v8i8) { - if (VecVT == MVT::v4i8) { - // Pad with zero. - if (Subtarget.hasSSE41()) { - Rdx = DAG.getBitcast(MVT::i32, Rdx); - Rdx = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, - DAG.getConstant(0, DL, MVT::v4i32), Rdx, - DAG.getIntPtrConstant(0, DL)); - Rdx = DAG.getBitcast(MVT::v16i8, Rdx); - } else { - Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx, - DAG.getConstant(0, DL, VecVT)); - } - } - if (Rdx.getValueType() == MVT::v8i8) { - // Pad with undef. - Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx, - DAG.getUNDEF(MVT::v8i8)); - } + Rdx = WidenToV16I8(Rdx, true); Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx, DAG.getConstant(0, DL, MVT::v16i8)); Rdx = DAG.getBitcast(MVT::v16i8, Rdx); @@ -42907,8 +43498,7 @@ static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG, } // Must be a >=128-bit vector with pow2 elements. - if ((VecVT.getSizeInBits() % 128) != 0 || - !isPowerOf2_32(VecVT.getVectorNumElements())) + if ((VecVT.getSizeInBits() % 128) != 0 || !isPowerOf2_32(NumElts)) return SDValue(); // vXi8 add reduction - sum lo/hi halves then use PSADBW. @@ -42931,6 +43521,48 @@ static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG, return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); } + // See if we can use vXi8 PSADBW add reduction for larger zext types. + // If the source vector values are 0-255, then we can use PSADBW to + // sum+zext v8i8 subvectors to vXi64, then perform the reduction. + // TODO: See if its worth avoiding vXi16/i32 truncations? + if (Opc == ISD::ADD && NumElts >= 4 && EltSizeInBits >= 16 && + DAG.computeKnownBits(Rdx).getMaxValue().ule(255) && + (EltSizeInBits == 16 || Rdx.getOpcode() == ISD::ZERO_EXTEND || + Subtarget.hasAVX512())) { + EVT ByteVT = VecVT.changeVectorElementType(MVT::i8); + Rdx = DAG.getNode(ISD::TRUNCATE, DL, ByteVT, Rdx); + if (ByteVT.getSizeInBits() < 128) + Rdx = WidenToV16I8(Rdx, true); + + // Build the PSADBW, split as 128/256/512 bits for SSE/AVX2/AVX512BW. + auto PSADBWBuilder = [](SelectionDAG &DAG, const SDLoc &DL, + ArrayRef Ops) { + MVT VT = MVT::getVectorVT(MVT::i64, Ops[0].getValueSizeInBits() / 64); + SDValue Zero = DAG.getConstant(0, DL, Ops[0].getValueType()); + return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops[0], Zero); + }; + MVT SadVT = MVT::getVectorVT(MVT::i64, Rdx.getValueSizeInBits() / 64); + Rdx = SplitOpsAndApply(DAG, Subtarget, DL, SadVT, {Rdx}, PSADBWBuilder); + + // TODO: We could truncate to vXi16/vXi32 before performing the reduction. + while (Rdx.getValueSizeInBits() > 128) { + SDValue Lo, Hi; + std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL); + VecVT = Lo.getValueType(); + Rdx = DAG.getNode(ISD::ADD, DL, VecVT, Lo, Hi); + } + assert(Rdx.getValueType() == MVT::v2i64 && "v2i64 reduction expected"); + + if (NumElts > 8) { + SDValue RdxHi = DAG.getVectorShuffle(MVT::v2i64, DL, Rdx, Rdx, {1, -1}); + Rdx = DAG.getNode(ISD::ADD, DL, MVT::v2i64, Rdx, RdxHi); + } + + VecVT = MVT::getVectorVT(VT.getSimpleVT(), 128 / VT.getSizeInBits()); + Rdx = DAG.getBitcast(VecVT, Rdx); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); + } + // Only use (F)HADD opcodes if they aren't microcoded or minimizes codesize. if (!shouldUseHorizontalOp(true, DAG, Subtarget)) return SDValue(); @@ -42994,8 +43626,8 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, uint64_t Idx = CIdx->getZExtValue(); if (UndefVecElts[Idx]) return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT); - return DAG.getConstant(EltBits[Idx].zextOrSelf(VT.getScalarSizeInBits()), - dl, VT); + return DAG.getConstant(EltBits[Idx].zext(VT.getScalarSizeInBits()), dl, + VT); } } @@ -43076,29 +43708,32 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, // but not // i1 = extract_vector_elt t0:1, Constant:i64<2> // since the latter would need its own MOVMSK. - if (CIdx && SrcVT.getScalarType() == MVT::i1) { + if (SrcVT.getScalarType() == MVT::i1) { + bool IsVar = !CIdx; SmallVector BoolExtracts; unsigned ResNo = InputVector.getResNo(); - auto IsBoolExtract = [&BoolExtracts, &ResNo](SDNode *Use) { + auto IsBoolExtract = [&BoolExtracts, &ResNo, &IsVar](SDNode *Use) { if (Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && - isa(Use->getOperand(1)) && Use->getOperand(0).getResNo() == ResNo && Use->getValueType(0) == MVT::i1) { BoolExtracts.push_back(Use); + IsVar |= !isa(Use->getOperand(1)); return true; } return false; }; + // TODO: Can we drop the oneuse check for constant extracts? if (all_of(InputVector->uses(), IsBoolExtract) && - BoolExtracts.size() > 1) { + (IsVar || BoolExtracts.size() > 1)) { EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcElts); if (SDValue BC = combineBitcastvxi1(DAG, BCVT, InputVector, dl, Subtarget)) { for (SDNode *Use : BoolExtracts) { // extractelement vXi1 X, MaskIdx --> ((movmsk X) & Mask) == Mask - unsigned MaskIdx = Use->getConstantOperandVal(1); - APInt MaskBit = APInt::getOneBitSet(NumSrcElts, MaskIdx); - SDValue Mask = DAG.getConstant(MaskBit, dl, BCVT); + // Mask = 1 << MaskIdx + SDValue MaskIdx = DAG.getZExtOrTrunc(Use->getOperand(1), dl, MVT::i8); + SDValue MaskBit = DAG.getConstant(1, dl, BCVT); + SDValue Mask = DAG.getNode(ISD::SHL, dl, BCVT, MaskBit, MaskIdx); SDValue Res = DAG.getNode(ISD::AND, dl, BCVT, BC, Mask); Res = DAG.getSetCC(dl, MVT::i1, Res, Mask, ISD::SETEQ); DCI.CombineTo(Use, Res); @@ -43123,7 +43758,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, auto *LoadVec = dyn_cast(InputVector); if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() && SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() && - !LikelyUsedAsVector) { + !LikelyUsedAsVector && LoadVec->isSimple()) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue NewPtr = TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx); @@ -43133,16 +43768,111 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, SDValue Load = DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment, LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo()); - SDValue Chain = Load.getValue(1); - SDValue From[] = {SDValue(N, 0), SDValue(LoadVec, 1)}; - SDValue To[] = {Load, Chain}; - DAG.ReplaceAllUsesOfValuesWith(From, To, 2); - return SDValue(N, 0); + DAG.makeEquivalentMemoryOrdering(LoadVec, Load); + return Load; } return SDValue(); } +// Convert (vXiY *ext(vXi1 bitcast(iX))) to extend_in_reg(broadcast(iX)). +// This is more or less the reverse of combineBitcastvxi1. +static SDValue combineToExtendBoolVectorInReg( + unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N0, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { + if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND && + Opcode != ISD::ANY_EXTEND) + return SDValue(); + if (!DCI.isBeforeLegalizeOps()) + return SDValue(); + if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) + return SDValue(); + + EVT SVT = VT.getScalarType(); + EVT InSVT = N0.getValueType().getScalarType(); + unsigned EltSizeInBits = SVT.getSizeInBits(); + + // Input type must be extending a bool vector (bit-casted from a scalar + // integer) to legal integer types. + if (!VT.isVector()) + return SDValue(); + if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8) + return SDValue(); + if (InSVT != MVT::i1 || N0.getOpcode() != ISD::BITCAST) + return SDValue(); + + SDValue N00 = N0.getOperand(0); + EVT SclVT = N00.getValueType(); + if (!SclVT.isScalarInteger()) + return SDValue(); + + SDValue Vec; + SmallVector ShuffleMask; + unsigned NumElts = VT.getVectorNumElements(); + assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size"); + + // Broadcast the scalar integer to the vector elements. + if (NumElts > EltSizeInBits) { + // If the scalar integer is greater than the vector element size, then we + // must split it down into sub-sections for broadcasting. For example: + // i16 -> v16i8 (i16 -> v8i16 -> v16i8) with 2 sub-sections. + // i32 -> v32i8 (i32 -> v8i32 -> v32i8) with 4 sub-sections. + assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale"); + unsigned Scale = NumElts / EltSizeInBits; + EVT BroadcastVT = EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits); + Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00); + Vec = DAG.getBitcast(VT, Vec); + + for (unsigned i = 0; i != Scale; ++i) + ShuffleMask.append(EltSizeInBits, i); + Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask); + } else if (Subtarget.hasAVX2() && NumElts < EltSizeInBits && + (SclVT == MVT::i8 || SclVT == MVT::i16 || SclVT == MVT::i32)) { + // If we have register broadcast instructions, use the scalar size as the + // element type for the shuffle. Then cast to the wider element type. The + // widened bits won't be used, and this might allow the use of a broadcast + // load. + assert((EltSizeInBits % NumElts) == 0 && "Unexpected integer scale"); + unsigned Scale = EltSizeInBits / NumElts; + EVT BroadcastVT = + EVT::getVectorVT(*DAG.getContext(), SclVT, NumElts * Scale); + Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00); + ShuffleMask.append(NumElts * Scale, 0); + Vec = DAG.getVectorShuffle(BroadcastVT, DL, Vec, Vec, ShuffleMask); + Vec = DAG.getBitcast(VT, Vec); + } else { + // For smaller scalar integers, we can simply any-extend it to the vector + // element size (we don't care about the upper bits) and broadcast it to all + // elements. + SDValue Scl = DAG.getAnyExtOrTrunc(N00, DL, SVT); + Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl); + ShuffleMask.append(NumElts, 0); + Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask); + } + + // Now, mask the relevant bit in each element. + SmallVector Bits; + for (unsigned i = 0; i != NumElts; ++i) { + int BitIdx = (i % EltSizeInBits); + APInt Bit = APInt::getBitsSet(EltSizeInBits, BitIdx, BitIdx + 1); + Bits.push_back(DAG.getConstant(Bit, DL, SVT)); + } + SDValue BitMask = DAG.getBuildVector(VT, DL, Bits); + Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask); + + // Compare against the bitmask and extend the result. + EVT CCVT = VT.changeVectorElementType(MVT::i1); + Vec = DAG.getSetCC(DL, CCVT, Vec, BitMask, ISD::SETEQ); + Vec = DAG.getSExtOrTrunc(Vec, DL, VT); + + // For SEXT, this is now done, otherwise shift the result down for + // zero-extension. + if (Opcode == ISD::SIGN_EXTEND) + return Vec; + return DAG.getNode(ISD::SRL, DL, VT, Vec, + DAG.getConstant(EltSizeInBits - 1, DL, VT)); +} + /// If a vector select has an operand that is -1 or 0, try to simplify the /// select to a bitwise logic operation. /// TODO: Move to DAGCombiner, possibly using TargetLowering::hasAndNot()? @@ -43270,8 +44000,8 @@ static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, SDValue FVal = N->getOperand(2); SmallVector CatOpsT, CatOpsF; if (!TVal.hasOneUse() || !FVal.hasOneUse() || - !collectConcatOps(TVal.getNode(), CatOpsT) || - !collectConcatOps(FVal.getNode(), CatOpsF)) + !collectConcatOps(TVal.getNode(), CatOpsT, DAG) || + !collectConcatOps(FVal.getNode(), CatOpsF, DAG)) return SDValue(); auto makeBlend = [Opcode](SelectionDAG &DAG, const SDLoc &DL, @@ -43360,19 +44090,17 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { /// This function will also call SimplifyDemandedBits on already created /// BLENDV to perform additional simplifications. static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { SDValue Cond = N->getOperand(0); if ((N->getOpcode() != ISD::VSELECT && N->getOpcode() != X86ISD::BLENDV) || ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) return SDValue(); - // Don't optimize before the condition has been transformed to a legal type - // and don't ever optimize vector selects that map to AVX512 mask-registers. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned BitWidth = Cond.getScalarValueSizeInBits(); - if (BitWidth < 8 || BitWidth > 64) - return SDValue(); + EVT VT = N->getValueType(0); // We can only handle the cases where VSELECT is directly legal on the // subtarget. We custom lower VSELECT nodes with constant conditions and @@ -43384,8 +44112,6 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG, // Potentially, we should combine constant-condition vselect nodes // pre-legalization into shuffles and not mark as many types as custom // lowered. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = N->getValueType(0); if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) return SDValue(); // FIXME: We don't support i16-element blends currently. We could and @@ -43403,6 +44129,11 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG, if (VT.is512BitVector()) return SDValue(); + // Don't optimize before the condition has been transformed to a legal type + // and don't ever optimize vector selects that map to AVX512 mask-registers. + if (BitWidth < 8 || BitWidth > 64) + return SDValue(); + auto OnlyUsedAsSelectCond = [](SDValue Cond) { for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end(); UI != UE; ++UI) @@ -43542,9 +44273,11 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return V; // Convert vselects with constant condition into shuffles. - if (CondConstantVector && DCI.isBeforeLegalizeOps()) { + if (CondConstantVector && DCI.isBeforeLegalizeOps() && + (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::BLENDV)) { SmallVector Mask; - if (createShuffleMaskFromVSELECT(Mask, Cond)) + if (createShuffleMaskFromVSELECT(Mask, Cond, + N->getOpcode() == X86ISD::BLENDV)) return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask); } @@ -43565,11 +44298,11 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // getConstVector sets negative shuffle mask values as undef, so ensure // we hardcode SM_SentinelZero values to zero (0x80). if (CondMask[i] < NumElts) { - LHSMask[i] = (LHSMask[i] == SM_SentinelZero) ? 0x80 : LHSMask[i]; + LHSMask[i] = isUndefOrZero(LHSMask[i]) ? 0x80 : LHSMask[i]; RHSMask[i] = 0x80; } else { LHSMask[i] = 0x80; - RHSMask[i] = (RHSMask[i] == SM_SentinelZero) ? 0x80 : RHSMask[i]; + RHSMask[i] = isUndefOrZero(RHSMask[i]) ? 0x80 : RHSMask[i]; } } LHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, LHS.getOperand(0), @@ -43586,7 +44319,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // ignored in unsafe-math mode). // We also try to create v2f32 min/max nodes, which we later widen to v4f32. if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() && - VT != MVT::f80 && VT != MVT::f128 && + VT != MVT::f80 && VT != MVT::f128 && !isSoftFP16(VT, Subtarget) && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) && (Subtarget.hasSSE2() || (Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) { @@ -43880,7 +44613,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // If this an avx512 target we can improve the use of zero masking by // swapping the operands and inverting the condition. if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() && - Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 && + Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 && ISD::isBuildVectorAllZeros(LHS.getNode()) && !ISD::isBuildVectorAllZeros(RHS.getNode())) { // Invert the cond to not(cond) : xor(op,allones)=not(op) @@ -43889,6 +44622,19 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return DAG.getSelect(DL, VT, CondNew, RHS, LHS); } + // Attempt to convert a (vXi1 bitcast(iX Cond)) selection mask before it might + // get split by legalization. + if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::BITCAST && + CondVT.getVectorElementType() == MVT::i1 && Cond.hasOneUse() && + TLI.isTypeLegal(VT.getScalarType())) { + EVT ExtCondVT = VT.changeVectorElementTypeToInteger(); + if (SDValue ExtCond = combineToExtendBoolVectorInReg( + ISD::SIGN_EXTEND, DL, ExtCondVT, Cond, DAG, DCI, Subtarget)) { + ExtCond = DAG.getNode(ISD::TRUNCATE, DL, CondVT, ExtCond); + return DAG.getSelect(DL, VT, ExtCond, LHS, RHS); + } + } + // Early exit check if (!TLI.isTypeLegal(VT)) return SDValue(); @@ -44301,14 +45047,15 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0, static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) { if (EFLAGS.getOpcode() == X86ISD::ADD) { if (isAllOnesConstant(EFLAGS.getOperand(1))) { + bool FoundAndLSB = false; SDValue Carry = EFLAGS.getOperand(0); while (Carry.getOpcode() == ISD::TRUNCATE || Carry.getOpcode() == ISD::ZERO_EXTEND || - Carry.getOpcode() == ISD::SIGN_EXTEND || - Carry.getOpcode() == ISD::ANY_EXTEND || (Carry.getOpcode() == ISD::AND && - isOneConstant(Carry.getOperand(1)))) + isOneConstant(Carry.getOperand(1)))) { + FoundAndLSB |= Carry.getOpcode() == ISD::AND; Carry = Carry.getOperand(0); + } if (Carry.getOpcode() == X86ISD::SETCC || Carry.getOpcode() == X86ISD::SETCC_CARRY) { // TODO: Merge this code with equivalent in combineAddOrSubToADCOrSBB? @@ -44339,6 +45086,14 @@ static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) { CarryOp1.getOpcode() == X86ISD::ADD && isOneConstant(CarryOp1.getOperand(1))) return CarryOp1; + } else if (FoundAndLSB) { + SDLoc DL(Carry); + SDValue BitNo = DAG.getConstant(0, DL, Carry.getValueType()); + if (Carry.getOpcode() == ISD::SRL) { + BitNo = Carry.getOperand(1); + Carry = Carry.getOperand(0); + } + return getBT(Carry, BitNo, DL, DAG); } } } @@ -44533,6 +45288,12 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, if (!IsAnyOf && !IsAllOf) return SDValue(); + // TODO: Check more combining cases for me. + // Here we check the cmp use number to decide do combining or not. + // Currently we only get 2 tests about combining "MOVMSK(CONCAT(..))" + // and "MOVMSK(PCMPEQ(..))" are fit to use this constraint. + bool IsOneUse = CmpOp.getNode()->hasOneUse(); + // See if we can peek through to a vector with a wider element type, if the // signbits extend down to all the sub-elements as well. // Calling MOVMSK with the wider type, avoiding the bitcast, helps expose @@ -44561,9 +45322,9 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, // MOVMSK(CONCAT(X,Y)) != 0 -> MOVMSK(OR(X,Y)). // MOVMSK(CONCAT(X,Y)) == -1 -> MOVMSK(AND(X,Y)). // MOVMSK(CONCAT(X,Y)) != -1 -> MOVMSK(AND(X,Y)). - if (VecVT.is256BitVector() && NumElts <= CmpBits) { + if (VecVT.is256BitVector() && NumElts <= CmpBits && IsOneUse) { SmallVector Ops; - if (collectConcatOps(peekThroughBitcasts(Vec).getNode(), Ops) && + if (collectConcatOps(peekThroughBitcasts(Vec).getNode(), Ops, DAG) && Ops.size() == 2) { SDLoc DL(EFLAGS); EVT SubVT = Ops[0].getValueType().changeTypeToInteger(); @@ -44582,7 +45343,7 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, // MOVMSK(PCMPEQ(X,0)) != -1 -> !PTESTZ(X,X). // MOVMSK(PCMPEQ(X,Y)) == -1 -> PTESTZ(SUB(X,Y),SUB(X,Y)). // MOVMSK(PCMPEQ(X,Y)) != -1 -> !PTESTZ(SUB(X,Y),SUB(X,Y)). - if (IsAllOf && Subtarget.hasSSE41()) { + if (IsAllOf && Subtarget.hasSSE41() && IsOneUse) { MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; SDValue BC = peekThroughBitcasts(Vec); // Ensure MOVMSK was testing every signbit of BC. @@ -44734,7 +45495,7 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, if (!(FalseOp.getValueType() == MVT::f80 || (FalseOp.getValueType() == MVT::f64 && !Subtarget.hasSSE2()) || (FalseOp.getValueType() == MVT::f32 && !Subtarget.hasSSE1())) || - !Subtarget.hasCMov() || hasFPCMov(CC)) { + !Subtarget.canUseCMOV() || hasFPCMov(CC)) { SDValue Ops[] = {FalseOp, TrueOp, DAG.getTargetConstant(CC, DL, MVT::i8), Flags}; return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); @@ -45181,8 +45942,6 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, if (NumElts == 1 || !isPowerOf2_32(NumElts)) return SDValue(); - EVT WVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, 2 * NumElts); - // With AVX512 but without BWI, we would need to split v32i16. if (32 <= (2 * NumElts) && Subtarget.hasAVX512() && !Subtarget.hasBWI()) return SDValue(); @@ -45265,11 +46024,13 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, // Use SplitOpsAndApply to handle AVX splitting. auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { - MVT OpVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32); - return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops); + MVT ResVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32); + MVT OpVT = MVT::getVectorVT(MVT::i16, Ops[0].getValueSizeInBits() / 16); + return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, + DAG.getBitcast(OpVT, Ops[0]), + DAG.getBitcast(OpVT, Ops[1])); }; - return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, - { DAG.getBitcast(WVT, N0), DAG.getBitcast(WVT, N1) }, + return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {N0, N1}, PMADDWDBuilder); } @@ -45622,12 +46383,11 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, SarConst = SarConst - (Size - ShiftSize); if (SarConst == 0) return NN; - else if (SarConst.isNegative()) + if (SarConst.isNegative()) return DAG.getNode(ISD::SHL, DL, VT, NN, DAG.getConstant(-SarConst, DL, CVT)); - else - return DAG.getNode(ISD::SRA, DL, VT, NN, - DAG.getConstant(SarConst, DL, CVT)); + return DAG.getNode(ISD::SRA, DL, VT, NN, + DAG.getConstant(SarConst, DL, CVT)); } return SDValue(); } @@ -46034,11 +46794,9 @@ static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG, EltBits[0].getZExtValue(), DAG); } - APInt KnownUndef, KnownZero; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); - if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, - KnownZero, DCI)) + if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI)) return SDValue(N, 0); return SDValue(); @@ -46461,11 +47219,17 @@ static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG, return DAG.getBitcast(VT, FPLogic); } + if (VT != MVT::i1 || N0.getOpcode() != ISD::SETCC || !N0.hasOneUse() || + !N1.hasOneUse()) + return SDValue(); + + ISD::CondCode CC0 = cast(N0.getOperand(2))->get(); + ISD::CondCode CC1 = cast(N1.getOperand(2))->get(); + // The vector ISA for FP predicates is incomplete before AVX, so converting // COMIS* to CMPS* may not be a win before AVX. - // TODO: Check types/predicates to see if they are available with SSE/SSE2. - if (!Subtarget.hasAVX() || VT != MVT::i1 || N0.getOpcode() != ISD::SETCC || - !N0.hasOneUse() || !N1.hasOneUse()) + if (!Subtarget.hasAVX() && + !(cheapX86FSETCC_SSE(CC0) && cheapX86FSETCC_SSE(CC1))) return SDValue(); // Convert scalar FP compares and logic to vector compares (COMIS* to CMPS*) @@ -46482,10 +47246,8 @@ static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG, SDValue Vec01 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N01); SDValue Vec10 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N10); SDValue Vec11 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N11); - SDValue Setcc0 = DAG.getSetCC(DL, BoolVecVT, Vec00, Vec01, - cast(N0.getOperand(2))->get()); - SDValue Setcc1 = DAG.getSetCC(DL, BoolVecVT, Vec10, Vec11, - cast(N1.getOperand(2))->get()); + SDValue Setcc0 = DAG.getSetCC(DL, BoolVecVT, Vec00, Vec01, CC0); + SDValue Setcc1 = DAG.getSetCC(DL, BoolVecVT, Vec10, Vec11, CC1); SDValue Logic = DAG.getNode(N->getOpcode(), DL, BoolVecVT, Setcc0, Setcc1); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Logic, ZeroIndex); } @@ -46891,6 +47653,53 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget)) return R; + // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2)) + // iff c2 is all/no bits mask - i.e. a select-with-zero mask. + // TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW? + if (VT.isVector() && getTargetConstantFromNode(N1)) { + unsigned Opc0 = N0.getOpcode(); + if ((Opc0 == ISD::MUL || Opc0 == ISD::MULHU || Opc0 == ISD::MULHS) && + getTargetConstantFromNode(N0.getOperand(1)) && + DAG.ComputeNumSignBits(N1) == VT.getScalarSizeInBits() && + N0->hasOneUse() && N0.getOperand(1)->hasOneUse()) { + SDValue MaskMul = DAG.getNode(ISD::AND, dl, VT, N0.getOperand(1), N1); + return DAG.getNode(Opc0, dl, VT, N0.getOperand(0), MaskMul); + } + } + + // Fold AND(SRL(X,Y),1) -> SETCC(BT(X,Y), COND_B) iff Y is not a constant + // avoids slow variable shift (moving shift amount to ECX etc.) + if (isOneConstant(N1) && N0->hasOneUse()) { + SDValue Src = N0; + while ((Src.getOpcode() == ISD::ZERO_EXTEND || + Src.getOpcode() == ISD::TRUNCATE) && + Src.getOperand(0)->hasOneUse()) + Src = Src.getOperand(0); + bool ContainsNOT = false; + X86::CondCode X86CC = X86::COND_B; + // Peek through AND(NOT(SRL(X,Y)),1). + if (isBitwiseNot(Src)) { + Src = Src.getOperand(0); + X86CC = X86::COND_AE; + ContainsNOT = true; + } + if (Src.getOpcode() == ISD::SRL && + !isa(Src.getOperand(1))) { + SDValue BitNo = Src.getOperand(1); + Src = Src.getOperand(0); + // Peek through AND(SRL(NOT(X),Y),1). + if (isBitwiseNot(Src)) { + Src = Src.getOperand(0); + X86CC = X86CC == X86::COND_AE ? X86::COND_B : X86::COND_AE; + ContainsNOT = true; + } + // If we have BMI2 then SHRX should be faster for i32/i64 cases. + if (!(Subtarget.hasBMI2() && !ContainsNOT && VT.getSizeInBits() >= 32)) + if (SDValue BT = getBT(Src, BitNo, dl, DAG)) + return DAG.getZExtOrTrunc(getSETCC(X86CC, BT, dl, DAG), dl, VT); + } + } + if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { // Attempt to recursively combine a bitmask AND with shuffles. SDValue Op(N, 0); @@ -46899,32 +47708,44 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, // If either operand is a constant mask, then only the elements that aren't // zero are actually demanded by the other operand. - auto SimplifyUndemandedElts = [&](SDValue Op, SDValue OtherOp) { + auto GetDemandedMasks = [&](SDValue Op) { APInt UndefElts; SmallVector EltBits; int NumElts = VT.getVectorNumElements(); int EltSizeInBits = VT.getScalarSizeInBits(); - if (!getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, EltBits)) - return false; - - APInt DemandedBits = APInt::getZero(EltSizeInBits); - APInt DemandedElts = APInt::getZero(NumElts); - for (int I = 0; I != NumElts; ++I) - if (!EltBits[I].isZero()) { - DemandedBits |= EltBits[I]; - DemandedElts.setBit(I); - } - - APInt KnownUndef, KnownZero; - return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, KnownUndef, - KnownZero, DCI) || - TLI.SimplifyDemandedBits(OtherOp, DemandedBits, DemandedElts, DCI); + APInt DemandedBits = APInt::getAllOnes(EltSizeInBits); + APInt DemandedElts = APInt::getAllOnes(NumElts); + if (getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, + EltBits)) { + DemandedBits.clearAllBits(); + DemandedElts.clearAllBits(); + for (int I = 0; I != NumElts; ++I) + if (!EltBits[I].isZero()) { + DemandedBits |= EltBits[I]; + DemandedElts.setBit(I); + } + } + return std::make_pair(DemandedBits, DemandedElts); }; - if (SimplifyUndemandedElts(N0, N1) || SimplifyUndemandedElts(N1, N0)) { + std::pair Demand0 = GetDemandedMasks(N1); + std::pair Demand1 = GetDemandedMasks(N0); + + if (TLI.SimplifyDemandedVectorElts(N0, Demand0.second, DCI) || + TLI.SimplifyDemandedVectorElts(N1, Demand1.second, DCI) || + TLI.SimplifyDemandedBits(N0, Demand0.first, Demand0.second, DCI) || + TLI.SimplifyDemandedBits(N1, Demand1.first, Demand1.second, DCI)) { if (N->getOpcode() != ISD::DELETED_NODE) DCI.AddToWorklist(N); return SDValue(N, 0); } + + SDValue NewN0 = TLI.SimplifyMultipleUseDemandedBits(N0, Demand0.first, + Demand0.second, DAG); + SDValue NewN1 = TLI.SimplifyMultipleUseDemandedBits(N1, Demand1.first, + Demand1.second, DAG); + if (NewN0 || NewN1) + return DAG.getNode(ISD::AND, dl, VT, NewN0 ? NewN0 : N0, + NewN1 ? NewN1 : N1); } // Attempt to combine a scalar bitmask AND with an extracted shuffle. @@ -47127,8 +47948,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, // into: // srl(ctlz x), log2(bitsize(x)) // Input pattern is checked by caller. -static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy, - SelectionDAG &DAG) { +static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) { SDValue Cmp = Op.getOperand(1); EVT VT = Cmp.getOperand(0).getValueType(); unsigned Log2b = Log2_32(VT.getSizeInBits()); @@ -47139,7 +47959,7 @@ static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy, SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32); SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc, DAG.getConstant(Log2b, dl, MVT::i8)); - return DAG.getZExtOrTrunc(Scc, dl, ExtTy); + return Scc; } // Try to transform: @@ -47199,11 +48019,10 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, // or(srl(ctlz),srl(ctlz)). // The dag combiner can then fold it into: // srl(or(ctlz, ctlz)). - EVT VT = OR->getValueType(0); - SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG); + SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, DAG); SDValue Ret, NewRHS; - if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG))) - Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS); + if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG))) + Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, NewLHS, NewRHS); if (!Ret) return SDValue(); @@ -47216,21 +48035,18 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, // Swap rhs with lhs to match or(setcc(eq, cmp, 0), or). if (RHS->getOpcode() == ISD::OR) std::swap(LHS, RHS); - NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG); + NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG); if (!NewRHS) return SDValue(); - Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS); + Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, Ret, NewRHS); } - if (Ret) - Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret); - - return Ret; + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret); } static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R, - SDValue And1_L, SDValue And1_R, SDLoc DL, - SelectionDAG &DAG) { + SDValue And1_L, SDValue And1_R, + const SDLoc &DL, SelectionDAG &DAG) { if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse()) return SDValue(); SDValue NotOp = And0_L->getOperand(0); @@ -47352,7 +48168,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, APInt UpperElts = APInt::getHighBitsSet(NumElts, HalfElts); if (NumElts >= 16 && N1.getOpcode() == X86ISD::KSHIFTL && N1.getConstantOperandAPInt(1) == HalfElts && - DAG.MaskedValueIsZero(N0, APInt(1, 1), UpperElts)) { + DAG.MaskedVectorIsZero(N0, UpperElts)) { return DAG.getNode( ISD::CONCAT_VECTORS, dl, VT, extractSubVector(N0, 0, DAG, dl, HalfElts), @@ -47360,7 +48176,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, } if (NumElts >= 16 && N0.getOpcode() == X86ISD::KSHIFTL && N0.getConstantOperandAPInt(1) == HalfElts && - DAG.MaskedValueIsZero(N1, APInt(1, 1), UpperElts)) { + DAG.MaskedVectorIsZero(N1, UpperElts)) { return DAG.getNode( ISD::CONCAT_VECTORS, dl, VT, extractSubVector(N1, 0, DAG, dl, HalfElts), @@ -47389,9 +48205,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, if (!EltBits[I].isAllOnes()) DemandedElts.setBit(I); - APInt KnownUndef, KnownZero; - return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, KnownUndef, - KnownZero, DCI); + return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, DCI); }; if (SimplifyUndemandedElts(N0, N1) || SimplifyUndemandedElts(N1, N0)) { if (N->getOpcode() != ISD::DELETED_NODE) @@ -47618,7 +48432,7 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, // clip to 0-255. if (Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && InVT == MVT::v16i32 && VT == MVT::v16i8) { - if (auto USatVal = detectSSatPattern(In, VT, true)) { + if (SDValue USatVal = detectSSatPattern(In, VT, true)) { // Emit a VPACKUSDW+VPERMQ followed by a VPMOVUSWB. SDValue Mid = truncateVectorWithPACK(X86ISD::PACKUS, MVT::v16i16, USatVal, DL, DAG, Subtarget); @@ -47643,7 +48457,7 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, VT.getSizeInBits() >= 64 && (SVT == MVT::i8 || SVT == MVT::i16) && (InSVT == MVT::i16 || InSVT == MVT::i32)) { - if (auto USatVal = detectSSatPattern(In, VT, true)) { + if (SDValue USatVal = detectSSatPattern(In, VT, true)) { // vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW). // Only do this when the result is at least 64 bits or we'll leaving // dangling PACKSSDW nodes. @@ -47660,7 +48474,7 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, return truncateVectorWithPACK(X86ISD::PACKUS, VT, USatVal, DL, DAG, Subtarget); } - if (auto SSatVal = detectSSatPattern(In, VT)) + if (SDValue SSatVal = detectSSatPattern(In, VT)) return truncateVectorWithPACK(X86ISD::PACKSS, VT, SSatVal, DL, DAG, Subtarget); } @@ -47671,10 +48485,10 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, (SVT == MVT::i32 || SVT == MVT::i16 || SVT == MVT::i8)) { unsigned TruncOpc = 0; SDValue SatVal; - if (auto SSatVal = detectSSatPattern(In, VT)) { + if (SDValue SSatVal = detectSSatPattern(In, VT)) { SatVal = SSatVal; TruncOpc = X86ISD::VTRUNCS; - } else if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) { + } else if (SDValue USatVal = detectUSatPattern(In, VT, DAG, DL)) { SatVal = USatVal; TruncOpc = X86ISD::VTRUNCUS; } @@ -47706,7 +48520,7 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, /// This function detects the AVG pattern between vectors of unsigned i8/i16, /// which is c = (a + b + 1) / 2, and replace this operation with the efficient -/// X86ISD::AVG instruction. +/// ISD::AVGCEILU (AVG) instruction. static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &DL) { @@ -47769,7 +48583,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, auto AVGBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { - return DAG.getNode(X86ISD::AVG, DL, Ops[0].getValueType(), Ops); + return DAG.getNode(ISD::AVGCEILU, DL, Ops[0].getValueType(), Ops); }; auto AVGSplitter = [&](std::array Ops) { @@ -47872,7 +48686,7 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() && Ext == ISD::NON_EXTLOAD && ((Ld->isNonTemporal() && !Subtarget.hasInt256() && - Ld->getAlignment() >= 16) || + Ld->getAlign() >= Align(16)) || (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, *Ld->getMemOperand(), &Fast) && !Fast))) { @@ -48340,7 +49154,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, // Split under-aligned vector non-temporal stores. if (St->isNonTemporal() && StVT == VT && - St->getAlignment() < VT.getStoreSize()) { + St->getAlign().value() < VT.getStoreSize()) { // ZMM/YMM nt-stores - either it can be stored as a series of shorter // vectors or the legalizer can scalarize it to use MOVNTI. if (VT.is256BitVector() || VT.is512BitVector()) { @@ -48374,9 +49188,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, } // Try to fold a VTRUNCUS or VTRUNCS into a truncating store. - if (!St->isTruncatingStore() && StoredVal.hasOneUse() && + if (!St->isTruncatingStore() && (StoredVal.getOpcode() == X86ISD::VTRUNCUS || StoredVal.getOpcode() == X86ISD::VTRUNCS) && + StoredVal.hasOneUse() && TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) { bool IsSigned = StoredVal.getOpcode() == X86ISD::VTRUNCS; return EmitTruncSStore(IsSigned, St->getChain(), @@ -48385,15 +49200,15 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, } // Try to fold a extract_element(VTRUNC) pattern into a truncating store. - if (!St->isTruncatingStore() && StoredVal.hasOneUse()) { + if (!St->isTruncatingStore()) { auto IsExtractedElement = [](SDValue V) { - if (V.getOpcode() == ISD::TRUNCATE && V.getOperand(0).hasOneUse()) + if (V.getOpcode() == ISD::TRUNCATE && V.hasOneUse()) V = V.getOperand(0); unsigned Opc = V.getOpcode(); - if (Opc == ISD::EXTRACT_VECTOR_ELT || Opc == X86ISD::PEXTRW) { - if (V.getOperand(0).hasOneUse() && isNullConstant(V.getOperand(1))) - return V.getOperand(0); - } + if ((Opc == ISD::EXTRACT_VECTOR_ELT || Opc == X86ISD::PEXTRW) && + isNullConstant(V.getOperand(1)) && V.hasOneUse() && + V.getOperand(0).hasOneUse()) + return V.getOperand(0); return SDValue(); }; if (SDValue Extract = IsExtractedElement(StoredVal)) { @@ -48531,10 +49346,8 @@ static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG, unsigned StElts = MemVT.getSizeInBits() / VT.getScalarSizeInBits(); APInt DemandedElts = APInt::getLowBitsSet(VT.getVectorNumElements(), StElts); - APInt KnownUndef, KnownZero; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.SimplifyDemandedVectorElts(StoredVal, DemandedElts, KnownUndef, - KnownZero, DCI)) { + if (TLI.SimplifyDemandedVectorElts(StoredVal, DemandedElts, DCI)) { if (N->getOpcode() != ISD::DELETED_NODE) DCI.AddToWorklist(N); return SDValue(N, 0); @@ -49165,7 +49978,8 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL, // PACK should still be worth it for 128-bit vectors if the sources were // originally concatenated from subvectors. SmallVector ConcatOps; - if (VT.getSizeInBits() > 128 || !collectConcatOps(In.getNode(), ConcatOps)) + if (VT.getSizeInBits() > 128 || + !collectConcatOps(In.getNode(), ConcatOps, DAG)) return SDValue(); } @@ -49478,9 +50292,9 @@ static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG, SDValue In = N->getOperand(0); SDLoc DL(N); - if (auto SSatVal = detectSSatPattern(In, VT)) + if (SDValue SSatVal = detectSSatPattern(In, VT)) return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); - if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) + if (SDValue USatVal = detectUSatPattern(In, VT, DAG, DL)) return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -49567,10 +50381,14 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) { if (!UndefElts[I] && !EltBits[I].isSignMask()) return SDValue(); - return peekThroughBitcasts(Op0); + // Only allow bitcast from correctly-sized constant. + Op0 = peekThroughBitcasts(Op0); + if (Op0.getScalarValueSizeInBits() == ScalarSize) + return Op0; } - } - } + break; + } // case + } // switch return SDValue(); } @@ -50074,10 +50892,8 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - APInt KnownUndef, KnownZero; APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); - if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, - KnownZero, DCI)) + if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI)) return SDValue(N, 0); // Convert a full vector load into vzload when not all bits are needed. @@ -50144,26 +50960,70 @@ static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG, static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); MVT VT = N->getSimpleValueType(0); + // ANDNP(undef, x) -> 0 + // ANDNP(x, undef) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, SDLoc(N), VT); + // ANDNP(0, x) -> x - if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) - return N->getOperand(1); + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; // ANDNP(x, 0) -> 0 - if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode())) + if (ISD::isBuildVectorAllZeros(N1.getNode())) return DAG.getConstant(0, SDLoc(N), VT); // Turn ANDNP back to AND if input is inverted. - if (SDValue Not = IsNOT(N->getOperand(0), DAG)) - return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), - N->getOperand(1)); + if (SDValue Not = IsNOT(N0, DAG)) + return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), N1); + + // TODO: Constant fold NOT(N0) to allow us to use AND. + // TODO: Do this in IsNOT with suitable oneuse checks? // Attempt to recursively combine a bitmask ANDNP with shuffles. if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) return Res; + + // If either operand is a constant mask, then only the elements that aren't + // zero are actually demanded by the other operand. + auto GetDemandedMasks = [&](SDValue Op, bool Invert = false) { + APInt UndefElts; + SmallVector EltBits; + int NumElts = VT.getVectorNumElements(); + int EltSizeInBits = VT.getScalarSizeInBits(); + APInt DemandedBits = APInt::getAllOnes(EltSizeInBits); + APInt DemandedElts = APInt::getAllOnes(NumElts); + if (getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, + EltBits)) { + DemandedBits.clearAllBits(); + DemandedElts.clearAllBits(); + for (int I = 0; I != NumElts; ++I) + if ((Invert && !EltBits[I].isAllOnes()) || + (!Invert && !EltBits[I].isZero())) { + DemandedBits |= Invert ? ~EltBits[I] : EltBits[I]; + DemandedElts.setBit(I); + } + } + return std::make_pair(DemandedBits, DemandedElts); + }; + std::pair Demand0 = GetDemandedMasks(N1); + std::pair Demand1 = GetDemandedMasks(N0, true); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.SimplifyDemandedVectorElts(N0, Demand0.second, DCI) || + TLI.SimplifyDemandedVectorElts(N1, Demand1.second, DCI) || + TLI.SimplifyDemandedBits(N0, Demand0.first, Demand0.second, DCI) || + TLI.SimplifyDemandedBits(N1, Demand1.first, Demand1.second, DCI)) { + if (N->getOpcode() != ISD::DELETED_NODE) + DCI.AddToWorklist(N); + return SDValue(N, 0); + } } return SDValue(); @@ -50191,11 +51051,9 @@ static SDValue combineCVTPH2PS(SDNode *N, SelectionDAG &DAG, SDValue Src = N->getOperand(IsStrict ? 1 : 0); if (N->getValueType(0) == MVT::v4f32 && Src.getValueType() == MVT::v8i16) { - APInt KnownUndef, KnownZero; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); APInt DemandedElts = APInt::getLowBitsSet(8, 4); - if (TLI.SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, - DCI)) { + if (TLI.SimplifyDemandedVectorElts(Src, DemandedElts, DCI)) { if (N->getOpcode() != ISD::DELETED_NODE) DCI.AddToWorklist(N); return SDValue(N, 0); @@ -50453,110 +51311,6 @@ static SDValue combineToExtendCMOV(SDNode *Extend, SelectionDAG &DAG) { return Res; } -// Convert (vXiY *ext(vXi1 bitcast(iX))) to extend_in_reg(broadcast(iX)). -// This is more or less the reverse of combineBitcastvxi1. -static SDValue -combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { - unsigned Opcode = N->getOpcode(); - if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND && - Opcode != ISD::ANY_EXTEND) - return SDValue(); - if (!DCI.isBeforeLegalizeOps()) - return SDValue(); - if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) - return SDValue(); - - SDValue N0 = N->getOperand(0); - EVT VT = N->getValueType(0); - EVT SVT = VT.getScalarType(); - EVT InSVT = N0.getValueType().getScalarType(); - unsigned EltSizeInBits = SVT.getSizeInBits(); - - // Input type must be extending a bool vector (bit-casted from a scalar - // integer) to legal integer types. - if (!VT.isVector()) - return SDValue(); - if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8) - return SDValue(); - if (InSVT != MVT::i1 || N0.getOpcode() != ISD::BITCAST) - return SDValue(); - - SDValue N00 = N0.getOperand(0); - EVT SclVT = N0.getOperand(0).getValueType(); - if (!SclVT.isScalarInteger()) - return SDValue(); - - SDLoc DL(N); - SDValue Vec; - SmallVector ShuffleMask; - unsigned NumElts = VT.getVectorNumElements(); - assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size"); - - // Broadcast the scalar integer to the vector elements. - if (NumElts > EltSizeInBits) { - // If the scalar integer is greater than the vector element size, then we - // must split it down into sub-sections for broadcasting. For example: - // i16 -> v16i8 (i16 -> v8i16 -> v16i8) with 2 sub-sections. - // i32 -> v32i8 (i32 -> v8i32 -> v32i8) with 4 sub-sections. - assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale"); - unsigned Scale = NumElts / EltSizeInBits; - EVT BroadcastVT = - EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits); - Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00); - Vec = DAG.getBitcast(VT, Vec); - - for (unsigned i = 0; i != Scale; ++i) - ShuffleMask.append(EltSizeInBits, i); - Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask); - } else if (Subtarget.hasAVX2() && NumElts < EltSizeInBits && - (SclVT == MVT::i8 || SclVT == MVT::i16 || SclVT == MVT::i32)) { - // If we have register broadcast instructions, use the scalar size as the - // element type for the shuffle. Then cast to the wider element type. The - // widened bits won't be used, and this might allow the use of a broadcast - // load. - assert((EltSizeInBits % NumElts) == 0 && "Unexpected integer scale"); - unsigned Scale = EltSizeInBits / NumElts; - EVT BroadcastVT = - EVT::getVectorVT(*DAG.getContext(), SclVT, NumElts * Scale); - Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00); - ShuffleMask.append(NumElts * Scale, 0); - Vec = DAG.getVectorShuffle(BroadcastVT, DL, Vec, Vec, ShuffleMask); - Vec = DAG.getBitcast(VT, Vec); - } else { - // For smaller scalar integers, we can simply any-extend it to the vector - // element size (we don't care about the upper bits) and broadcast it to all - // elements. - SDValue Scl = DAG.getAnyExtOrTrunc(N00, DL, SVT); - Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl); - ShuffleMask.append(NumElts, 0); - Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask); - } - - // Now, mask the relevant bit in each element. - SmallVector Bits; - for (unsigned i = 0; i != NumElts; ++i) { - int BitIdx = (i % EltSizeInBits); - APInt Bit = APInt::getBitsSet(EltSizeInBits, BitIdx, BitIdx + 1); - Bits.push_back(DAG.getConstant(Bit, DL, SVT)); - } - SDValue BitMask = DAG.getBuildVector(VT, DL, Bits); - Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask); - - // Compare against the bitmask and extend the result. - EVT CCVT = VT.changeVectorElementType(MVT::i1); - Vec = DAG.getSetCC(DL, CCVT, Vec, BitMask, ISD::SETEQ); - Vec = DAG.getSExtOrTrunc(Vec, DL, VT); - - // For SEXT, this is now done, otherwise shift the result down for - // zero-extension. - if (Opcode == ISD::SIGN_EXTEND) - return Vec; - return DAG.getNode(ISD::SRL, DL, VT, Vec, - DAG.getConstant(EltSizeInBits - 1, DL, VT)); -} - // Attempt to combine a (sext/zext (setcc)) to a setcc with a xmm/ymm/zmm // result type. static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, @@ -50636,7 +51390,8 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) return V; - if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) + if (SDValue V = combineToExtendBoolVectorInReg(N->getOpcode(), DL, VT, N0, + DAG, DCI, Subtarget)) return V; if (VT.isVector()) { @@ -50790,7 +51545,8 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) return V; - if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) + if (SDValue V = combineToExtendBoolVectorInReg(N->getOpcode(), dl, VT, N0, + DAG, DCI, Subtarget)) return V; if (VT.isVector()) @@ -50832,7 +51588,7 @@ static bool isOrXorXorTree(SDValue X, bool Root = true) { /// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp /// expansion. -template +template static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG, EVT VecVT, EVT CmpVT, bool HasPT, F SToV) { SDValue Op0 = X.getOperand(0); @@ -50845,7 +51601,8 @@ static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG, if (HasPT) return DAG.getNode(ISD::OR, DL, VecVT, A, B); return DAG.getNode(ISD::AND, DL, CmpVT, A, B); - } else if (X.getOpcode() == ISD::XOR) { + } + if (X.getOpcode() == ISD::XOR) { SDValue A = SToV(Op0); SDValue B = SToV(Op1); if (VecVT != CmpVT) @@ -51134,6 +51891,16 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, LHS.getValueType() == MVT::v4f32) return LowerVSETCC(SDValue(N, 0), Subtarget, DAG); + // X pred 0.0 --> X pred -X + // If the negation of X already exists, use it in the comparison. This removes + // the need to materialize 0.0 and allows matching to SSE's MIN/MAX + // instructions in patterns with a 'select' node. + if (isNullFPScalarOrVectorConst(RHS)) { + SDVTList FNegVT = DAG.getVTList(OpVT); + if (SDNode *FNeg = DAG.getNodeIfExists(ISD::FNEG, FNegVT, {LHS})) + return DAG.getSetCC(DL, VT, LHS, SDValue(FNeg, 0), CC); + } + return SDValue(); } @@ -51145,16 +51912,18 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, MVT VT = N->getSimpleValueType(0); unsigned NumBits = VT.getScalarSizeInBits(); unsigned NumElts = SrcVT.getVectorNumElements(); + unsigned NumBitsPerElt = SrcVT.getScalarSizeInBits(); + assert(VT == MVT::i32 && NumElts <= NumBits && "Unexpected MOVMSK types"); // Perform constant folding. - if (ISD::isBuildVectorOfConstantSDNodes(Src.getNode())) { - assert(VT == MVT::i32 && "Unexpected result type"); + APInt UndefElts; + SmallVector EltBits; + if (getTargetConstantBitsFromNode(Src, NumBitsPerElt, UndefElts, EltBits)) { APInt Imm(32, 0); - for (unsigned Idx = 0, e = Src.getNumOperands(); Idx < e; ++Idx) { - if (!Src.getOperand(Idx).isUndef() && - Src.getConstantOperandAPInt(Idx).isNegative()) + for (unsigned Idx = 0; Idx != NumElts; ++Idx) + if (!UndefElts[Idx] && EltBits[Idx].isNegative()) Imm.setBit(Idx); - } + return DAG.getConstant(Imm, SDLoc(N), VT); } @@ -51713,8 +52482,6 @@ static bool needCarryOrOverflowFlag(SDValue Flags) { CC = (X86::CondCode)User->getConstantOperandVal(0); break; case X86ISD::BRCOND: - CC = (X86::CondCode)User->getConstantOperandVal(2); - break; case X86ISD::CMOV: CC = (X86::CondCode)User->getConstantOperandVal(2); break; @@ -51743,10 +52510,14 @@ static bool onlyZeroFlagUsed(SDValue Flags) { default: // Be conservative. return false; - case X86ISD::SETCC: CCOpNo = 0; break; - case X86ISD::SETCC_CARRY: CCOpNo = 0; break; - case X86ISD::BRCOND: CCOpNo = 2; break; - case X86ISD::CMOV: CCOpNo = 2; break; + case X86ISD::SETCC: + case X86ISD::SETCC_CARRY: + CCOpNo = 0; + break; + case X86ISD::BRCOND: + case X86ISD::CMOV: + CCOpNo = 2; + break; } X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo); @@ -51757,6 +52528,215 @@ static bool onlyZeroFlagUsed(SDValue Flags) { return true; } +/// If this is an add or subtract where one operand is produced by a cmp+setcc, +/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB} +/// with CMP+{ADC, SBB}. +/// Also try (ADD/SUB)+(AND(SRL,1)) bit extraction pattern with BT+{ADC, SBB}. +static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT, + SDValue X, SDValue Y, + SelectionDAG &DAG, + bool ZeroSecondOpOnly = false) { + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + // Look through a one-use zext. + if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse()) + Y = Y.getOperand(0); + + X86::CondCode CC; + SDValue EFLAGS; + if (Y.getOpcode() == X86ISD::SETCC && Y.hasOneUse()) { + CC = (X86::CondCode)Y.getConstantOperandVal(0); + EFLAGS = Y.getOperand(1); + } else if (Y.getOpcode() == ISD::AND && isOneConstant(Y.getOperand(1)) && + Y.hasOneUse()) { + EFLAGS = LowerAndToBT(Y, ISD::SETNE, DL, DAG, CC); + } + + if (!EFLAGS) + return SDValue(); + + // If X is -1 or 0, then we have an opportunity to avoid constants required in + // the general case below. + auto *ConstantX = dyn_cast(X); + if (ConstantX && !ZeroSecondOpOnly) { + if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnes()) || + (IsSub && CC == X86::COND_B && ConstantX->isZero())) { + // This is a complicated way to get -1 or 0 from the carry flag: + // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax + // 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), + EFLAGS); + } + + if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnes()) || + (IsSub && CC == X86::COND_A && ConstantX->isZero())) { + if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && + EFLAGS.getValueType().isInteger() && + !isa(EFLAGS.getOperand(1))) { + // Swap the operands of a SUB, and we have the same pattern as above. + // -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB + // 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB + SDValue NewSub = DAG.getNode( + X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), + NewEFLAGS); + } + } + } + + if (CC == X86::COND_B) { + // X + SETB Z --> adc X, 0 + // X - SETB Z --> sbb X, 0 + return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, + DAG.getVTList(VT, MVT::i32), X, + DAG.getConstant(0, DL, VT), EFLAGS); + } + + if (ZeroSecondOpOnly) + return SDValue(); + + if (CC == X86::COND_A) { + // Try to convert COND_A into COND_B in an attempt to facilitate + // materializing "setb reg". + // + // Do not flip "e > c", where "c" is a constant, because Cmp instruction + // cannot take an immediate as its first operand. + // + if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && + EFLAGS.getValueType().isInteger() && + !isa(EFLAGS.getOperand(1))) { + SDValue NewSub = + DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); + return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, + DAG.getVTList(VT, MVT::i32), X, + DAG.getConstant(0, DL, VT), NewEFLAGS); + } + } + + if (CC == X86::COND_AE) { + // X + SETAE --> sbb X, -1 + // X - SETAE --> adc X, -1 + return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, + DAG.getVTList(VT, MVT::i32), X, + DAG.getConstant(-1, DL, VT), EFLAGS); + } + + if (CC == X86::COND_BE) { + // X + SETBE --> sbb X, -1 + // X - SETBE --> adc X, -1 + // Try to convert COND_BE into COND_AE in an attempt to facilitate + // materializing "setae reg". + // + // Do not flip "e <= c", where "c" is a constant, because Cmp instruction + // cannot take an immediate as its first operand. + // + if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && + EFLAGS.getValueType().isInteger() && + !isa(EFLAGS.getOperand(1))) { + SDValue NewSub = + DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); + return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, + DAG.getVTList(VT, MVT::i32), X, + DAG.getConstant(-1, DL, VT), NewEFLAGS); + } + } + + if (CC != X86::COND_E && CC != X86::COND_NE) + return SDValue(); + + if (EFLAGS.getOpcode() != X86ISD::CMP || !EFLAGS.hasOneUse() || + !X86::isZeroNode(EFLAGS.getOperand(1)) || + !EFLAGS.getOperand(0).getValueType().isInteger()) + return SDValue(); + + SDValue Z = EFLAGS.getOperand(0); + EVT ZVT = Z.getValueType(); + + // If X is -1 or 0, then we have an opportunity to avoid constants required in + // the general case below. + if (ConstantX) { + // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with + // fake operands: + // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z) + // -1 + (Z == 0) --> sbb %eax, %eax, (neg Z) + if ((IsSub && CC == X86::COND_NE && ConstantX->isZero()) || + (!IsSub && CC == X86::COND_E && ConstantX->isAllOnes())) { + SDValue Zero = DAG.getConstant(0, DL, ZVT); + SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); + SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z); + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), + SDValue(Neg.getNode(), 1)); + } + + // cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb' + // with fake operands: + // 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1) + // -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1) + if ((IsSub && CC == X86::COND_E && ConstantX->isZero()) || + (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnes())) { + SDValue One = DAG.getConstant(1, DL, ZVT); + SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); + SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One); + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), + Cmp1.getValue(1)); + } + } + + // (cmp Z, 1) sets the carry flag if Z is 0. + SDValue One = DAG.getConstant(1, DL, ZVT); + SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); + SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One); + + // Add the flags type for ADC/SBB nodes. + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + + // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1) + // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1) + if (CC == X86::COND_NE) + return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X, + DAG.getConstant(-1ULL, DL, VT), Cmp1.getValue(1)); + + // X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1) + // X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1) + return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X, + DAG.getConstant(0, DL, VT), Cmp1.getValue(1)); +} + +/// If this is an add or subtract where one operand is produced by a cmp+setcc, +/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB} +/// with CMP+{ADC, SBB}. +static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { + bool IsSub = N->getOpcode() == ISD::SUB; + SDValue X = N->getOperand(0); + SDValue Y = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, X, Y, DAG)) + return ADCOrSBB; + + // Commute and try again (negate the result for subtracts). + if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, Y, X, DAG)) { + if (IsSub) + ADCOrSBB = + DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), ADCOrSBB); + return ADCOrSBB; + } + + return SDValue(); +} + static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) { // Only handle test patterns. if (!isNullConstant(N->getOperand(1))) @@ -51792,6 +52772,16 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) { } } + // Peek through any zero-extend if we're only testing for a zero result. + if (Op.getOpcode() == ISD::ZERO_EXTEND && onlyZeroFlagUsed(SDValue(N, 0))) { + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + if (SrcVT.getScalarSizeInBits() >= 8 && + DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)) + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Src, + DAG.getConstant(0, dl, SrcVT)); + } + // Look for a truncate. if (Op.getOpcode() != ISD::TRUNCATE) return SDValue(); @@ -51867,7 +52857,8 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); MVT VT = LHS.getSimpleValueType(); - unsigned GenericOpc = X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB; + bool IsSub = X86ISD::SUB == N->getOpcode(); + unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD; // If we don't use the flag result, simplify back to a generic ADD/SUB. if (!N->hasAnyUseOfValue(1)) { @@ -51889,26 +52880,29 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, MatchGeneric(LHS, RHS, false); MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode()); - return SDValue(); + // TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the + // EFLAGS result doesn't change. + return combineAddOrSubToADCOrSBB(IsSub, DL, VT, LHS, RHS, DAG, + /*ZeroSecondOpOnly*/ true); } static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) { - if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue BorrowIn = N->getOperand(2); + + if (SDValue Flags = combineCarryThroughADD(BorrowIn, DAG)) { MVT VT = N->getSimpleValueType(0); SDVTList VTs = DAG.getVTList(VT, MVT::i32); - return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs, - N->getOperand(0), N->getOperand(1), - Flags); + return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs, LHS, RHS, Flags); } // Fold SBB(SUB(X,Y),0,Carry) -> SBB(X,Y,Carry) // iff the flag result is dead. - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op1) && + if (LHS.getOpcode() == ISD::SUB && isNullConstant(RHS) && !N->hasAnyUseOfValue(1)) - return DAG.getNode(X86ISD::SBB, SDLoc(N), N->getVTList(), Op0.getOperand(0), - Op0.getOperand(1), N->getOperand(2)); + return DAG.getNode(X86ISD::SBB, SDLoc(N), N->getVTList(), LHS.getOperand(0), + LHS.getOperand(1), BorrowIn); return SDValue(); } @@ -51916,228 +52910,60 @@ static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) { // Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS static SDValue combineADC(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + auto *LHSC = dyn_cast(LHS); + auto *RHSC = dyn_cast(RHS); + + // Canonicalize constant to RHS. + if (LHSC && !RHSC) + return DAG.getNode(X86ISD::ADC, SDLoc(N), N->getVTList(), RHS, LHS, + CarryIn); + // If the LHS and RHS of the ADC node are zero, then it can't overflow and // the result is either zero or one (depending on the input carry bit). // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1. - if (X86::isZeroNode(N->getOperand(0)) && - X86::isZeroNode(N->getOperand(1)) && + if (LHSC && RHSC && LHSC->isZero() && RHSC->isZero() && // We don't have a good way to replace an EFLAGS use, so only do this when // dead right now. SDValue(N, 1).use_empty()) { SDLoc DL(N); EVT VT = N->getValueType(0); SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1)); - SDValue Res1 = - DAG.getNode(ISD::AND, DL, VT, - DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), - N->getOperand(2)), - DAG.getConstant(1, DL, VT)); + SDValue Res1 = DAG.getNode( + ISD::AND, DL, VT, + DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), CarryIn), + DAG.getConstant(1, DL, VT)); return DCI.CombineTo(N, Res1, CarryOut); } - if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) { - MVT VT = N->getSimpleValueType(0); - SDVTList VTs = DAG.getVTList(VT, MVT::i32); - return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs, - N->getOperand(0), N->getOperand(1), - Flags); - } - - return SDValue(); -} - -/// If this is an add or subtract where one operand is produced by a cmp+setcc, -/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB} -/// with CMP+{ADC, SBB}. -static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { - bool IsSub = N->getOpcode() == ISD::SUB; - SDValue X = N->getOperand(0); - SDValue Y = N->getOperand(1); - - // If this is an add, canonicalize a zext operand to the RHS. - // TODO: Incomplete? What if both sides are zexts? - if (!IsSub && X.getOpcode() == ISD::ZERO_EXTEND && - Y.getOpcode() != ISD::ZERO_EXTEND) - std::swap(X, Y); - - // Look through a one-use zext. - bool PeekedThroughZext = false; - if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse()) { - Y = Y.getOperand(0); - PeekedThroughZext = true; - } - - // If this is an add, canonicalize a setcc operand to the RHS. - // TODO: Incomplete? What if both sides are setcc? - // TODO: Should we allow peeking through a zext of the other operand? - if (!IsSub && !PeekedThroughZext && X.getOpcode() == X86ISD::SETCC && - Y.getOpcode() != X86ISD::SETCC) - std::swap(X, Y); - - if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse()) - return SDValue(); - - SDLoc DL(N); - EVT VT = N->getValueType(0); - X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0); - - // If X is -1 or 0, then we have an opportunity to avoid constants required in - // the general case below. - auto *ConstantX = dyn_cast(X); - if (ConstantX) { - if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnes()) || - (IsSub && CC == X86::COND_B && ConstantX->isZero())) { - // This is a complicated way to get -1 or 0 from the carry flag: - // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax - // 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax - return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), - Y.getOperand(1)); - } - - if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnes()) || - (IsSub && CC == X86::COND_A && ConstantX->isZero())) { - SDValue EFLAGS = Y->getOperand(1); - if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && - EFLAGS.getValueType().isInteger() && - !isa(EFLAGS.getOperand(1))) { - // Swap the operands of a SUB, and we have the same pattern as above. - // -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB - // 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB - SDValue NewSub = DAG.getNode( - X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), - EFLAGS.getOperand(1), EFLAGS.getOperand(0)); - SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); - return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), - NewEFLAGS); - } - } - } - - if (CC == X86::COND_B) { - // X + SETB Z --> adc X, 0 - // X - SETB Z --> sbb X, 0 - return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, - DAG.getVTList(VT, MVT::i32), X, - DAG.getConstant(0, DL, VT), Y.getOperand(1)); - } - - if (CC == X86::COND_A) { - SDValue EFLAGS = Y.getOperand(1); - // Try to convert COND_A into COND_B in an attempt to facilitate - // materializing "setb reg". - // - // Do not flip "e > c", where "c" is a constant, because Cmp instruction - // cannot take an immediate as its first operand. - // - if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && - EFLAGS.getValueType().isInteger() && - !isa(EFLAGS.getOperand(1))) { - SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), - EFLAGS.getNode()->getVTList(), - EFLAGS.getOperand(1), EFLAGS.getOperand(0)); - SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); - return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, - DAG.getVTList(VT, MVT::i32), X, - DAG.getConstant(0, DL, VT), NewEFLAGS); - } - } - - if (CC == X86::COND_AE) { - // X + SETAE --> sbb X, -1 - // X - SETAE --> adc X, -1 - return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, - DAG.getVTList(VT, MVT::i32), X, - DAG.getConstant(-1, DL, VT), Y.getOperand(1)); - } - - if (CC == X86::COND_BE) { - // X + SETBE --> sbb X, -1 - // X - SETBE --> adc X, -1 - SDValue EFLAGS = Y.getOperand(1); - // Try to convert COND_BE into COND_AE in an attempt to facilitate - // materializing "setae reg". - // - // Do not flip "e <= c", where "c" is a constant, because Cmp instruction - // cannot take an immediate as its first operand. - // - if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && - EFLAGS.getValueType().isInteger() && - !isa(EFLAGS.getOperand(1))) { - SDValue NewSub = DAG.getNode( - X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), - EFLAGS.getOperand(1), EFLAGS.getOperand(0)); - SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); - return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, - DAG.getVTList(VT, MVT::i32), X, - DAG.getConstant(-1, DL, VT), NewEFLAGS); - } + // Fold ADC(C1,C2,Carry) -> ADC(0,C1+C2,Carry) + // iff the flag result is dead. + // TODO: Allow flag result if C1+C2 doesn't signed/unsigned overflow. + if (LHSC && RHSC && !LHSC->isZero() && !N->hasAnyUseOfValue(1)) { + SDLoc DL(N); + APInt Sum = LHSC->getAPIntValue() + RHSC->getAPIntValue(); + return DAG.getNode(X86ISD::ADC, DL, N->getVTList(), + DAG.getConstant(0, DL, LHS.getValueType()), + DAG.getConstant(Sum, DL, LHS.getValueType()), CarryIn); } - if (CC != X86::COND_E && CC != X86::COND_NE) - return SDValue(); - - SDValue Cmp = Y.getOperand(1); - if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() || - !X86::isZeroNode(Cmp.getOperand(1)) || - !Cmp.getOperand(0).getValueType().isInteger()) - return SDValue(); - - SDValue Z = Cmp.getOperand(0); - EVT ZVT = Z.getValueType(); - - // If X is -1 or 0, then we have an opportunity to avoid constants required in - // the general case below. - if (ConstantX) { - // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with - // fake operands: - // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z) - // -1 + (Z == 0) --> sbb %eax, %eax, (neg Z) - if ((IsSub && CC == X86::COND_NE && ConstantX->isZero()) || - (!IsSub && CC == X86::COND_E && ConstantX->isAllOnes())) { - SDValue Zero = DAG.getConstant(0, DL, ZVT); - SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); - SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z); - return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), - SDValue(Neg.getNode(), 1)); - } - - // cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb' - // with fake operands: - // 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1) - // -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1) - if ((IsSub && CC == X86::COND_E && ConstantX->isZero()) || - (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnes())) { - SDValue One = DAG.getConstant(1, DL, ZVT); - SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); - SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One); - return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), - Cmp1.getValue(1)); - } + if (SDValue Flags = combineCarryThroughADD(CarryIn, DAG)) { + MVT VT = N->getSimpleValueType(0); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs, LHS, RHS, Flags); } - // (cmp Z, 1) sets the carry flag if Z is 0. - SDValue One = DAG.getConstant(1, DL, ZVT); - SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); - SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One); - - // Add the flags type for ADC/SBB nodes. - SDVTList VTs = DAG.getVTList(VT, MVT::i32); - - // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1) - // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1) - if (CC == X86::COND_NE) - return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X, - DAG.getConstant(-1ULL, DL, VT), Cmp1.getValue(1)); + // Fold ADC(ADD(X,Y),0,Carry) -> ADC(X,Y,Carry) + // iff the flag result is dead. + if (LHS.getOpcode() == ISD::ADD && RHSC && RHSC->isZero() && + !N->hasAnyUseOfValue(1)) + return DAG.getNode(X86ISD::ADC, SDLoc(N), N->getVTList(), LHS.getOperand(0), + LHS.getOperand(1), CarryIn); - // X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1) - // X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1) - return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X, - DAG.getConstant(0, DL, VT), Cmp1.getValue(1)); + return SDValue(); } static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1, @@ -52432,7 +53258,8 @@ static SDValue combineAddOfPMADDWD(SelectionDAG &DAG, SDValue N0, SDValue N1, /// Try to fold those constants into an 'add' instruction to reduce instruction /// count. We do this with CMOV rather the generic 'select' because there are /// earlier folds that may be used to turn select-of-constants into logic hacks. -static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) { +static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { // If an operand is zero, add-of-0 gets simplified away, so that's clearly // better because we eliminate 1-2 instructions. This transform is still // an improvement without zero operands because we trade 2 move constants and @@ -52457,6 +53284,11 @@ static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) { if (!isSuitableCmov(Cmov)) return SDValue(); + // Don't remove a load folding opportunity for the add. That would neutralize + // any improvements from removing constant materializations. + if (X86::mayFoldLoad(OtherOp, Subtarget)) + return SDValue(); + EVT VT = N->getValueType(0); SDLoc DL(N); SDValue FalseOp = Cmov.getOperand(0); @@ -52499,7 +53331,7 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, SDValue Op1 = N->getOperand(1); SDLoc DL(N); - if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG)) + if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG, Subtarget)) return Select; if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, DL, VT, Subtarget)) @@ -52535,6 +53367,14 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, } } + // Fold ADD(ADC(Y,0,W),X) -> ADC(X,Y,W) + if (Op0.getOpcode() == X86ISD::ADC && Op0->hasOneUse() && + X86::isZeroNode(Op0.getOperand(1))) { + assert(!Op0->hasAnyUseOfValue(1) && "Overflow bit in use"); + return DAG.getNode(X86ISD::ADC, SDLoc(Op0), Op0->getVTList(), Op1, + Op0.getOperand(0), Op0.getOperand(2)); + } + return combineAddOrSubToADCOrSBB(N, DAG); } @@ -52617,6 +53457,25 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) return V; + // Fold SUB(X,ADC(Y,0,W)) -> SBB(X,Y,W) + if (Op1.getOpcode() == X86ISD::ADC && Op1->hasOneUse() && + X86::isZeroNode(Op1.getOperand(1))) { + assert(!Op1->hasAnyUseOfValue(1) && "Overflow bit in use"); + return DAG.getNode(X86ISD::SBB, SDLoc(Op1), Op1->getVTList(), Op0, + Op1.getOperand(0), Op1.getOperand(2)); + } + + // Fold SUB(X,SBB(Y,Z,W)) -> SUB(ADC(X,Z,W),Y) + // Don't fold to ADC(0,0,W)/SETCC_CARRY pattern which will prevent more folds. + if (Op1.getOpcode() == X86ISD::SBB && Op1->hasOneUse() && + !(X86::isZeroNode(Op0) && X86::isZeroNode(Op1.getOperand(1)))) { + assert(!Op1->hasAnyUseOfValue(1) && "Overflow bit in use"); + SDValue ADC = DAG.getNode(X86ISD::ADC, SDLoc(Op1), Op1->getVTList(), Op0, + Op1.getOperand(1), Op1.getOperand(2)); + return DAG.getNode(ISD::SUB, SDLoc(N), Op0.getValueType(), ADC.getValue(0), + Op1.getOperand(0)); + } + return combineAddOrSubToADCOrSBB(N, DAG); } @@ -52745,6 +53604,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, Subs.push_back(SubOp.getOperand(I)); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs); }; + auto IsConcatFree = [](MVT VT, ArrayRef SubOps, unsigned Op) { + for (unsigned I = 0, E = SubOps.size(); I != E; ++I) { + SDValue Sub = SubOps[I].getOperand(Op); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + if (Sub.getOpcode() != ISD::EXTRACT_SUBVECTOR || + Sub.getOperand(0).getValueType() != VT || + Sub.getConstantOperandAPInt(1) != (I * NumSubElts)) + return false; + } + return true; + }; unsigned NumOps = Ops.size(); switch (Op0.getOpcode()) { @@ -52802,6 +53672,14 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, DAG.getTargetConstant(Idx, DL, MVT::i8)); } break; + case X86ISD::PSHUFB: + if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) || + (VT.is512BitVector() && Subtarget.useBWIRegs()))) { + return DAG.getNode(Op0.getOpcode(), DL, VT, + ConcatSubOperand(VT, Ops, 0), + ConcatSubOperand(VT, Ops, 1)); + } + break; case X86ISD::VPERMV3: if (!IsSplat && NumOps == 2 && VT.is512BitVector()) { MVT OpVT = Op0.getSimpleValueType(); @@ -52920,6 +53798,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2)); } break; + case ISD::VSELECT: + case X86ISD::BLENDV: + if (!IsSplat && VT.is256BitVector() && Ops.size() == 2 && + (VT.getScalarSizeInBits() >= 32 || Subtarget.hasInt256()) && + IsConcatFree(VT, Ops, 1) && IsConcatFree(VT, Ops, 2)) { + EVT SelVT = Ops[0].getOperand(0).getValueType(); + SelVT = SelVT.getDoubleNumVectorElementsVT(*DAG.getContext()); + return DAG.getNode(Op0.getOpcode(), DL, VT, + ConcatSubOperand(SelVT.getSimpleVT(), Ops, 0), + ConcatSubOperand(VT, Ops, 1), + ConcatSubOperand(VT, Ops, 2)); + } + break; } } @@ -52937,12 +53828,29 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, } } + // Attempt to fold target constant loads. + if (all_of(Ops, [](SDValue Op) { return getTargetConstantFromNode(Op); })) { + SmallVector EltBits; + APInt UndefElts = APInt::getNullValue(VT.getVectorNumElements()); + for (unsigned I = 0, E = Ops.size(); I != E; ++I) { + APInt OpUndefElts; + SmallVector OpEltBits; + if (!getTargetConstantBitsFromNode(Ops[I], EltSizeInBits, OpUndefElts, + OpEltBits, true, false)) + break; + EltBits.append(OpEltBits); + UndefElts.insertBits(OpUndefElts, I * OpUndefElts.getBitWidth()); + } + if (EltBits.size() == VT.getVectorNumElements()) + return getConstVector(EltBits, UndefElts, VT, DAG, DL); + } + return SDValue(); } -static SDValue combineConcatVectors(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { +static SDValue combineCONCAT_VECTORS(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); EVT SrcVT = N->getOperand(0).getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -52961,9 +53869,9 @@ static SDValue combineConcatVectors(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { +static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -53044,7 +53952,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, // Match concat_vector style patterns. SmallVector SubVectorOps; - if (collectConcatOps(N, SubVectorOps)) { + if (collectConcatOps(N, SubVectorOps, DAG)) { if (SDValue Fold = combineConcatVectorOps(dl, OpVT, SubVectorOps, DAG, DCI, Subtarget)) return Fold; @@ -53103,10 +54011,10 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, /// This function should only be called with legal types (otherwise, the calls /// to get simple value types will assert). static SDValue narrowExtractedVectorSelect(SDNode *Ext, SelectionDAG &DAG) { - SDValue Sel = peekThroughBitcasts(Ext->getOperand(0)); + SDValue Sel = Ext->getOperand(0); SmallVector CatOps; if (Sel.getOpcode() != ISD::VSELECT || - !collectConcatOps(Sel.getOperand(0).getNode(), CatOps)) + !collectConcatOps(Sel.getOperand(0).getNode(), CatOps, DAG)) return SDValue(); // Note: We assume simple value types because this should only be called with @@ -53154,9 +54062,9 @@ static SDValue narrowExtractedVectorSelect(SDNode *Ext, SelectionDAG &DAG) { return DAG.getBitcast(VT, NarrowSel); } -static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { +static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { // For AVX1 only, if we are extracting from a 256-bit and+not (which will // eventually get combined/lowered into ANDNP) with a concatenated operand, // split the 'and' into 128-bit ops to avoid the concatenate and extract. @@ -53177,6 +54085,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, EVT InVecVT = InVec.getValueType(); unsigned SizeInBits = VT.getSizeInBits(); unsigned InSizeInBits = InVecVT.getSizeInBits(); + unsigned NumSubElts = VT.getVectorNumElements(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (Subtarget.hasAVX() && !Subtarget.hasAVX2() && @@ -53214,22 +54123,24 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, } if (InVec.getOpcode() == ISD::BUILD_VECTOR) - return DAG.getBuildVector( - VT, SDLoc(N), - InVec.getNode()->ops().slice(IdxVal, VT.getVectorNumElements())); + return DAG.getBuildVector(VT, SDLoc(N), + InVec->ops().slice(IdxVal, NumSubElts)); - // If we are extracting from an insert into a zero vector, replace with a - // smaller insert into zero if we don't access less than the original - // subvector. Don't do this for i1 vectors. + // If we are extracting from an insert into a larger vector, replace with a + // smaller insert if we don't access less than the original subvector. Don't + // do this for i1 vectors. + // TODO: Relax the matching indices requirement? if (VT.getVectorElementType() != MVT::i1 && - InVec.getOpcode() == ISD::INSERT_SUBVECTOR && IdxVal == 0 && - InVec.hasOneUse() && isNullConstant(InVec.getOperand(2)) && - ISD::isBuildVectorAllZeros(InVec.getOperand(0).getNode()) && + InVec.getOpcode() == ISD::INSERT_SUBVECTOR && InVec.hasOneUse() && + IdxVal == InVec.getConstantOperandVal(2) && InVec.getOperand(1).getValueSizeInBits() <= SizeInBits) { SDLoc DL(N); - return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, - getZeroVector(VT, Subtarget, DAG, DL), - InVec.getOperand(1), InVec.getOperand(2)); + SDValue NewExt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, + InVec.getOperand(0), N->getOperand(1)); + unsigned NewIdxVal = InVec.getConstantOperandVal(2) - IdxVal; + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, NewExt, + InVec.getOperand(1), + DAG.getVectorIdxConstant(NewIdxVal, DL)); } // If we're extracting an upper subvector from a broadcast we should just @@ -53246,8 +54157,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, return extractSubVector(InVec, 0, DAG, SDLoc(N), SizeInBits); // Attempt to extract from the source of a shuffle vector. - if ((InSizeInBits % SizeInBits) == 0 && - (IdxVal % VT.getVectorNumElements()) == 0) { + if ((InSizeInBits % SizeInBits) == 0 && (IdxVal % NumSubElts) == 0) { SmallVector ShuffleMask; SmallVector ScaledMask; SmallVector ShuffleInputs; @@ -53255,7 +54165,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, // Decode the shuffle mask and scale it so its shuffling subvectors. if (getTargetShuffleInputs(InVecBC, ShuffleInputs, ShuffleMask, DAG) && scaleShuffleElements(ShuffleMask, NumSubVecs, ScaledMask)) { - unsigned SubVecIdx = IdxVal / VT.getVectorNumElements(); + unsigned SubVecIdx = IdxVal / NumSubElts; if (ScaledMask[SubVecIdx] == SM_SentinelUndef) return DAG.getUNDEF(VT); if (ScaledMask[SubVecIdx] == SM_SentinelZero) @@ -53263,7 +54173,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, SDValue Src = ShuffleInputs[ScaledMask[SubVecIdx] / NumSubVecs]; if (Src.getValueSizeInBits() == InSizeInBits) { unsigned SrcSubVecIdx = ScaledMask[SubVecIdx] % NumSubVecs; - unsigned SrcEltIdx = SrcSubVecIdx * VT.getVectorNumElements(); + unsigned SrcEltIdx = SrcSubVecIdx * NumSubElts; return extractSubVector(DAG.getBitcast(InVecVT, Src), SrcEltIdx, DAG, SDLoc(N), SizeInBits); } @@ -53273,8 +54183,8 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, // If we're extracting the lowest subvector and we're the only user, // we may be able to perform this with a smaller vector width. unsigned InOpcode = InVec.getOpcode(); - if (IdxVal == 0 && InVec.hasOneUse()) { - if (VT == MVT::v2f64 && InVecVT == MVT::v4f64) { + if (InVec.hasOneUse()) { + if (IdxVal == 0 && VT == MVT::v2f64 && InVecVT == MVT::v4f64) { // v2f64 CVTDQ2PD(v4i32). if (InOpcode == ISD::SINT_TO_FP && InVec.getOperand(0).getValueType() == MVT::v4i32) { @@ -53291,7 +54201,8 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), VT, InVec.getOperand(0)); } } - if ((InOpcode == ISD::ANY_EXTEND || + if (IdxVal == 0 && + (InOpcode == ISD::ANY_EXTEND || InOpcode == ISD::ANY_EXTEND_VECTOR_INREG || InOpcode == ISD::ZERO_EXTEND || InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || @@ -53306,7 +54217,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, unsigned ExtOp = getOpcode_EXTEND_VECTOR_INREG(InOpcode); return DAG.getNode(ExtOp, DL, VT, Ext); } - if (InOpcode == ISD::VSELECT && + if (IdxVal == 0 && InOpcode == ISD::VSELECT && InVec.getOperand(0).getValueType().is256BitVector() && InVec.getOperand(1).getValueType().is256BitVector() && InVec.getOperand(2).getValueType().is256BitVector()) { @@ -53316,7 +54227,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, SDValue Ext2 = extractSubVector(InVec.getOperand(2), 0, DAG, DL, 128); return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, Ext2); } - if (InOpcode == ISD::TRUNCATE && Subtarget.hasVLX() && + if (IdxVal == 0 && InOpcode == ISD::TRUNCATE && Subtarget.hasVLX() && (VT.is128BitVector() || VT.is256BitVector())) { SDLoc DL(N); SDValue InVecSrc = InVec.getOperand(0); @@ -53324,6 +54235,13 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, SDValue Ext = extractSubVector(InVecSrc, 0, DAG, DL, Scale * SizeInBits); return DAG.getNode(InOpcode, DL, VT, Ext); } + if (InOpcode == X86ISD::MOVDDUP && + (VT.is128BitVector() || VT.is256BitVector())) { + SDLoc DL(N); + SDValue Ext0 = + extractSubVector(InVec.getOperand(0), IdxVal, DAG, DL, SizeInBits); + return DAG.getNode(InOpcode, DL, VT, Ext0); + } } // Always split vXi64 logical shifts where we're extracting the upper 32-bits @@ -53476,11 +54394,9 @@ static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG, ISD::isBuildVectorAllZeros(RHS.getNode())) return DAG.getConstant(0, SDLoc(N), VT); - APInt KnownUndef, KnownZero; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); - if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, - KnownZero, DCI)) + if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI)) return SDValue(N, 0); return SDValue(); @@ -53494,6 +54410,7 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, unsigned Opcode = N->getOpcode(); unsigned InOpcode = In.getOpcode(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc DL(N); // Try to merge vector loads and extend_inreg to an extload. if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) && @@ -53506,10 +54423,9 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, : ISD::ZEXTLOAD; EVT MemVT = VT.changeVectorElementType(SVT); if (TLI.isLoadExtLegal(Ext, VT, MemVT)) { - SDValue Load = - DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(), - Ld->getMemOperand()->getFlags()); + SDValue Load = DAG.getExtLoad( + Ext, DL, VT, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), + MemVT, Ld->getOriginalAlign(), Ld->getMemOperand()->getFlags()); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); return Load; } @@ -53518,7 +54434,7 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, // Fold EXTEND_VECTOR_INREG(EXTEND_VECTOR_INREG(X)) -> EXTEND_VECTOR_INREG(X). if (Opcode == InOpcode) - return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0)); + return DAG.getNode(Opcode, DL, VT, In.getOperand(0)); // Fold EXTEND_VECTOR_INREG(EXTRACT_SUBVECTOR(EXTEND(X),0)) // -> EXTEND_VECTOR_INREG(X). @@ -53527,12 +54443,26 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, In.getOperand(0).getOpcode() == getOpcode_EXTEND(Opcode) && In.getOperand(0).getOperand(0).getValueSizeInBits() == In.getValueSizeInBits()) - return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0).getOperand(0)); + return DAG.getNode(Opcode, DL, VT, In.getOperand(0).getOperand(0)); - // Attempt to combine as a shuffle. - // TODO: General ZERO_EXTEND_VECTOR_INREG support. - if (Opcode == ISD::ANY_EXTEND_VECTOR_INREG || - (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG && Subtarget.hasSSE41())) { + // Fold EXTEND_VECTOR_INREG(BUILD_VECTOR(X,Y,?,?)) -> BUILD_VECTOR(X,0,Y,0). + // TODO: Move to DAGCombine? + if (!DCI.isBeforeLegalizeOps() && Opcode == ISD::ZERO_EXTEND_VECTOR_INREG && + In.getOpcode() == ISD::BUILD_VECTOR && In.hasOneUse() && + In.getValueSizeInBits() == VT.getSizeInBits()) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned Scale = VT.getScalarSizeInBits() / In.getScalarValueSizeInBits(); + EVT EltVT = In.getOperand(0).getValueType(); + SmallVector Elts(Scale * NumElts, DAG.getConstant(0, DL, EltVT)); + for (unsigned I = 0; I != NumElts; ++I) + Elts[I * Scale] = In.getOperand(I); + return DAG.getBitcast(VT, DAG.getBuildVector(In.getValueType(), DL, Elts)); + } + + // Attempt to combine as a shuffle on SSE41+ targets. + if ((Opcode == ISD::ANY_EXTEND_VECTOR_INREG || + Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && + Subtarget.hasSSE41()) { SDValue Op(N, 0); if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType())) if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) @@ -53549,11 +54479,9 @@ static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG, if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) return DAG.getConstant(0, SDLoc(N), VT); - APInt KnownUndef, KnownZero; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); - if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, - KnownZero, DCI)) + if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI)) return SDValue(N, 0); return SDValue(); @@ -53781,11 +54709,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PEXTRB: return combineExtractVectorElt(N, DAG, DCI, Subtarget); case ISD::CONCAT_VECTORS: - return combineConcatVectors(N, DAG, DCI, Subtarget); + return combineCONCAT_VECTORS(N, DAG, DCI, Subtarget); case ISD::INSERT_SUBVECTOR: - return combineInsertSubvector(N, DAG, DCI, Subtarget); + return combineINSERT_SUBVECTOR(N, DAG, DCI, Subtarget); case ISD::EXTRACT_SUBVECTOR: - return combineExtractSubvector(N, DAG, DCI, Subtarget); + return combineEXTRACT_SUBVECTOR(N, DAG, DCI, Subtarget); case ISD::VSELECT: case ISD::SELECT: case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget); @@ -54397,37 +55325,37 @@ TargetLowering::ConstraintWeight weight = CW_Register; break; case 'I': - if (ConstantInt *C = dyn_cast(info.CallOperandVal)) { + if (auto *C = dyn_cast(info.CallOperandVal)) { if (C->getZExtValue() <= 31) weight = CW_Constant; } break; case 'J': - if (ConstantInt *C = dyn_cast(CallOperandVal)) { + if (auto *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() <= 63) weight = CW_Constant; } break; case 'K': - if (ConstantInt *C = dyn_cast(CallOperandVal)) { + if (auto *C = dyn_cast(CallOperandVal)) { if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f)) weight = CW_Constant; } break; case 'L': - if (ConstantInt *C = dyn_cast(CallOperandVal)) { + if (auto *C = dyn_cast(CallOperandVal)) { if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff)) weight = CW_Constant; } break; case 'M': - if (ConstantInt *C = dyn_cast(CallOperandVal)) { + if (auto *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() <= 3) weight = CW_Constant; } break; case 'N': - if (ConstantInt *C = dyn_cast(CallOperandVal)) { + if (auto *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() <= 0xff) weight = CW_Constant; } @@ -54439,14 +55367,14 @@ TargetLowering::ConstraintWeight } break; case 'e': - if (ConstantInt *C = dyn_cast(CallOperandVal)) { + if (auto *C = dyn_cast(CallOperandVal)) { if ((C->getSExtValue() >= -0x80000000LL) && (C->getSExtValue() <= 0x7fffffffLL)) weight = CW_Constant; } break; case 'Z': - if (ConstantInt *C = dyn_cast(CallOperandVal)) { + if (auto *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() <= 0xffffffff) weight = CW_Constant; } @@ -54511,7 +55439,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, switch (ConstraintLetter) { default: break; case 'I': - if (ConstantSDNode *C = dyn_cast(Op)) { + if (auto *C = dyn_cast(Op)) { if (C->getZExtValue() <= 31) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); @@ -54520,7 +55448,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } return; case 'J': - if (ConstantSDNode *C = dyn_cast(Op)) { + if (auto *C = dyn_cast(Op)) { if (C->getZExtValue() <= 63) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); @@ -54529,7 +55457,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } return; case 'K': - if (ConstantSDNode *C = dyn_cast(Op)) { + if (auto *C = dyn_cast(Op)) { if (isInt<8>(C->getSExtValue())) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); @@ -54538,7 +55466,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } return; case 'L': - if (ConstantSDNode *C = dyn_cast(Op)) { + if (auto *C = dyn_cast(Op)) { if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff || (Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) { Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), @@ -54548,7 +55476,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } return; case 'M': - if (ConstantSDNode *C = dyn_cast(Op)) { + if (auto *C = dyn_cast(Op)) { if (C->getZExtValue() <= 3) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); @@ -54557,7 +55485,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } return; case 'N': - if (ConstantSDNode *C = dyn_cast(Op)) { + if (auto *C = dyn_cast(Op)) { if (C->getZExtValue() <= 255) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); @@ -54566,7 +55494,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } return; case 'O': - if (ConstantSDNode *C = dyn_cast(Op)) { + if (auto *C = dyn_cast(Op)) { if (C->getZExtValue() <= 127) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); @@ -54576,7 +55504,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'e': { // 32-bit signed value - if (ConstantSDNode *C = dyn_cast(Op)) { + if (auto *C = dyn_cast(Op)) { if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), C->getSExtValue())) { // Widen to 64 bits here to get it sign extended. @@ -54590,7 +55518,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } case 'Z': { // 32-bit unsigned value - if (ConstantSDNode *C = dyn_cast(Op)) { + if (auto *C = dyn_cast(Op)) { if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), C->getZExtValue())) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), @@ -54604,7 +55532,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } case 'i': { // Literal immediates are always ok. - if (ConstantSDNode *CST = dyn_cast(Op)) { + if (auto *CST = dyn_cast(Op)) { bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1; BooleanContent BCont = getBooleanContents(MVT::i64); ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont) @@ -54617,8 +55545,9 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // In any sort of PIC mode addresses need to be computed at runtime by // adding in a register or some sort of table lookup. These can't - // be used as immediates. - if (Subtarget.isPICStyleGOT() || Subtarget.isPICStyleStubPIC()) + // be used as immediates. BlockAddresses are fine though. + if ((Subtarget.isPICStyleGOT() || Subtarget.isPICStyleStubPIC()) && + !isa(Op)) return; // If we are in non-pic codegen mode, we allow the address of a global (with diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 3f6d567d3f4d..af110884049b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -249,9 +249,6 @@ namespace llvm { SCALEFS, SCALEFS_RND, - // Unsigned Integer average. - AVG, - /// Integer horizontal add/sub. HADD, HSUB, @@ -790,6 +787,9 @@ namespace llvm { LOR, LXOR, LAND, + LBTS, + LBTC, + LBTR, // Load, scalar_to_vector, and zero extend. VZEXT_LOAD, @@ -1039,10 +1039,7 @@ namespace llvm { bool isCtlzFast() const override; - bool hasBitPreservingFPLogic(EVT VT) const override { - return VT == MVT::f32 || VT == MVT::f64 || VT.isVector() || - (VT == MVT::f16 && X86ScalarSSEf16); - } + bool hasBitPreservingFPLogic(EVT VT) const override; bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { // If the pair to store is a mixture of float and int values, we will @@ -1163,6 +1160,19 @@ namespace llvm { APInt &UndefElts, unsigned Depth) const override; + bool isTargetCanonicalConstantNode(SDValue Op) const override { + // Peek through bitcasts/extracts/inserts to see if we have a broadcast + // vector from memory. + while (Op.getOpcode() == ISD::BITCAST || + Op.getOpcode() == ISD::EXTRACT_SUBVECTOR || + (Op.getOpcode() == ISD::INSERT_SUBVECTOR && + Op.getOperand(0).isUndef())) + Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0); + + return Op.getOpcode() == X86ISD::VBROADCAST_LOAD || + TargetLowering::isTargetCanonicalConstantNode(Op); + } + const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; SDValue unwrapAddress(SDValue N) const override; @@ -1288,6 +1298,9 @@ namespace llvm { /// from i32 to i8 but not from i32 to i16. bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; + bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, + EVT VT) const override; + /// Given an intrinsic, checks if on the target the intrinsic will need to map /// to a MemIntrinsicNode (touches memory). If this is the case, it returns /// true and stores the intrinsic information into the IntrinsicInfo that was @@ -1316,15 +1329,13 @@ namespace llvm { /// Returns true if lowering to a jump table is allowed. bool areJTsAllowed(const Function *Fn) const override; + MVT getPreferredSwitchConditionType(LLVMContext &Context, + EVT ConditionVT) const override; + /// If true, then instruction selection should /// seek to shrink the FP constant of the specified type to a smaller type /// in order to save space and / or reduce runtime. - bool ShouldShrinkFPConstant(EVT VT) const override { - // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more - // expensive than a straight movsd. On the other hand, it's important to - // shrink long double fp constant since fldt is very slow. - return !X86ScalarSSEf64 || VT == MVT::f80; - } + bool ShouldShrinkFPConstant(EVT VT) const override; /// Return true if we believe it is correct and profitable to reduce the /// load node to a smaller type. @@ -1333,11 +1344,7 @@ namespace llvm { /// Return true if the specified scalar FP type is computed in an SSE /// register, not on the X87 floating point stack. - bool isScalarFPTypeInSSEReg(EVT VT) const { - return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 - (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1 - (VT == MVT::f16 && X86ScalarSSEf16); // f16 is when AVX512FP16 - } + bool isScalarFPTypeInSSEReg(EVT VT) const; /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. @@ -1491,13 +1498,6 @@ namespace llvm { /// make the right decision when generating code for different targets. const X86Subtarget &Subtarget; - /// Select between SSE or x87 floating point ops. - /// When SSE is available, use it for f32 operations. - /// When SSE2 is available, use it for f64 operations. - bool X86ScalarSSEf32; - bool X86ScalarSSEf64; - bool X86ScalarSSEf16; - /// A list of legal FP immediates. std::vector LegalFPImmediates; @@ -1637,9 +1637,13 @@ namespace llvm { TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; - bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicStoreInIR(StoreInst *SI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const; + void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; @@ -1649,6 +1653,8 @@ namespace llvm { bool needsCmpXchgNb(Type *MemType) const; + template bool isSoftFP16(T VT) const; + void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const; diff --git a/llvm/lib/Target/X86/X86IndirectThunks.cpp b/llvm/lib/Target/X86/X86IndirectThunks.cpp index e08b4b7c03c6..001aa2dcb879 100644 --- a/llvm/lib/Target/X86/X86IndirectThunks.cpp +++ b/llvm/lib/Target/X86/X86IndirectThunks.cpp @@ -31,6 +31,7 @@ #include "X86Subtarget.h" #include "llvm/CodeGen/IndirectThunks.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" diff --git a/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/llvm/lib/Target/X86/X86InsertPrefetch.cpp index 004e6fa5ebf4..08dc514a6476 100644 --- a/llvm/lib/Target/X86/X86InsertPrefetch.cpp +++ b/llvm/lib/Target/X86/X86InsertPrefetch.cpp @@ -23,6 +23,7 @@ #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/ProfileData/SampleProf.h" diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index ff8710634e89..c098122685be 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -354,10 +354,9 @@ static Value *simplifyX86varShift(const IntrinsicInst &II, // If the shift amount is guaranteed to be in-range we can replace it with a // generic shift. - APInt UpperBits = - APInt::getHighBitsSet(BitWidth, BitWidth - Log2_32(BitWidth)); - if (llvm::MaskedValueIsZero(Amt, UpperBits, - II.getModule()->getDataLayout())) { + KnownBits KnownAmt = + llvm::computeKnownBits(Amt, II.getModule()->getDataLayout()); + if (KnownAmt.getMaxValue().ult(BitWidth)) { return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt) : Builder.CreateLShr(Vec, Amt)) : Builder.CreateAShr(Vec, Amt)); @@ -521,11 +520,10 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II, // %int = bitcast <16 x i1> %cmp to i16 // %res = zext i16 %int to i32 unsigned NumElts = ArgTy->getNumElements(); - Type *IntegerVecTy = VectorType::getInteger(ArgTy); Type *IntegerTy = Builder.getIntNTy(NumElts); - Value *Res = Builder.CreateBitCast(Arg, IntegerVecTy); - Res = Builder.CreateICmpSLT(Res, Constant::getNullValue(IntegerVecTy)); + Value *Res = Builder.CreateBitCast(Arg, VectorType::getInteger(ArgTy)); + Res = Builder.CreateIsNeg(Res); Res = Builder.CreateBitCast(Res, IntegerTy); Res = Builder.CreateZExtOrTrunc(Res, ResTy); return Res; @@ -997,20 +995,18 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { return IC.replaceInstUsesWith(II, II.getArgOperand(0)); } - if (MaskC->getValue().isShiftedMask()) { + unsigned MaskIdx, MaskLen; + if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { // any single contingous sequence of 1s anywhere in the mask simply // describes a subset of the input bits shifted to the appropriate // position. Replace with the straight forward IR. - unsigned ShiftAmount = MaskC->getValue().countTrailingZeros(); Value *Input = II.getArgOperand(0); Value *Masked = IC.Builder.CreateAnd(Input, II.getArgOperand(1)); - Value *Shifted = IC.Builder.CreateLShr(Masked, - ConstantInt::get(II.getType(), - ShiftAmount)); + Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx); + Value *Shifted = IC.Builder.CreateLShr(Masked, ShiftAmt); return IC.replaceInstUsesWith(II, Shifted); } - if (auto *SrcC = dyn_cast(II.getArgOperand(0))) { uint64_t Src = SrcC->getZExtValue(); uint64_t Mask = MaskC->getZExtValue(); @@ -1042,15 +1038,15 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { if (MaskC->isAllOnesValue()) { return IC.replaceInstUsesWith(II, II.getArgOperand(0)); } - if (MaskC->getValue().isShiftedMask()) { + + unsigned MaskIdx, MaskLen; + if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { // any single contingous sequence of 1s anywhere in the mask simply // describes a subset of the input bits shifted to the appropriate // position. Replace with the straight forward IR. - unsigned ShiftAmount = MaskC->getValue().countTrailingZeros(); Value *Input = II.getArgOperand(0); - Value *Shifted = IC.Builder.CreateShl(Input, - ConstantInt::get(II.getType(), - ShiftAmount)); + Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx); + Value *Shifted = IC.Builder.CreateShl(Input, ShiftAmt); Value *Masked = IC.Builder.CreateAnd(Shifted, II.getArgOperand(1)); return IC.replaceInstUsesWith(II, Masked); } @@ -1934,6 +1930,23 @@ Optional X86TTIImpl::simplifyDemandedVectorEltsIntrinsic( break; } + // General per-element vector operations. + case Intrinsic::x86_avx2_psllv_d: + case Intrinsic::x86_avx2_psllv_d_256: + case Intrinsic::x86_avx2_psllv_q: + case Intrinsic::x86_avx2_psllv_q_256: + case Intrinsic::x86_avx2_psrlv_d: + case Intrinsic::x86_avx2_psrlv_d_256: + case Intrinsic::x86_avx2_psrlv_q: + case Intrinsic::x86_avx2_psrlv_q_256: + case Intrinsic::x86_avx2_psrav_d: + case Intrinsic::x86_avx2_psrav_d_256: { + simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); + simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2); + UndefElts &= UndefElts2; + break; + } + case Intrinsic::x86_sse2_packssdw_128: case Intrinsic::x86_sse2_packsswb_128: case Intrinsic::x86_sse2_packuswb_128: diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td index d825981a6b36..5da06bc87b06 100644 --- a/llvm/lib/Target/X86/X86InstrAMX.td +++ b/llvm/lib/Target/X86/X86InstrAMX.td @@ -48,18 +48,23 @@ let Predicates = [HasAMXTILE, In64BitMode] in { VEX, T8XD; // Pseduo instruction for RA. - def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), - [(int_x86_ldtilecfg_internal addr:$src)]>; + let isPseudo = true, mayLoad = 1, hasSideEffects = 1, + Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in + def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>; + let isPseudo = true, mayLoad = 1 in def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, opaquemem:$src3), []>; + let isPseudo = true, mayLoad = 1 in def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, opaquemem:$src3), []>; + let isPseudo = true, mayStore = 1 in def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1, GR16:$src2, opaquemem:$src3, TILE:$src4), []>; - let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in + let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1, + canFoldAsLoad = 1 in def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2), [(set TILE:$dst, (int_x86_tilezero_internal GR16:$src1, GR16:$src2))]>; @@ -67,9 +72,12 @@ let Predicates = [HasAMXTILE, In64BitMode] in { let usesCustomInserter = 1 in { // Pseudo instructions, using immediates instead of tile registers. // To be translated to the actual instructions in X86ISelLowering.cpp + let mayLoad = 1 in def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>; + let mayLoad = 1 in def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>; + let mayStore = 1 in def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>; def PTILEZERO : PseudoI<(outs), (ins u8imm:$src), [(int_x86_tilezero timm:$src)]>; @@ -99,7 +107,7 @@ let Predicates = [HasAMXINT8, In64BitMode] in { } // Pseduo instruction for RA. - let Constraints = "$src4 = $dst" in { + let isPseudo = true, Constraints = "$src4 = $dst" in { def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), @@ -158,7 +166,7 @@ let Predicates = [HasAMXBF16, In64BitMode] in { []>, VEX_4V, T8XS; // Pseduo instruction for RA. - let Constraints = "$src4 = $dst" in + let isPseudo = true, Constraints = "$src4 = $dst" in def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index bc67d1f89d7f..48da7b3ac882 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -476,6 +476,7 @@ let Predicates = [HasAVX512] in { def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>; def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; } @@ -508,25 +509,23 @@ let Predicates = [HasAVX512] in { def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>; def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>; def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; } -let Predicates = [HasFP16] in { -def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>; -def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>; -def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>; -} - // Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { + def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "", + [(set FR16X:$dst, fp16imm0)]>; def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", [(set FR32X:$dst, fp32imm0)]>; def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", @@ -535,12 +534,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, [(set VR128X:$dst, fp128imm0)]>; } -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasFP16] in { - def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "", - [(set FR16X:$dst, fp16imm0)]>; -} - //===----------------------------------------------------------------------===// // AVX-512 - VECTOR INSERT // @@ -678,21 +671,21 @@ defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info, - vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16, HasVLX]>; + vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; // Codegen pattern with the alternative types insert VEC128 into VEC512 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info, - vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16]>; + vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; // Codegen pattern with the alternative types insert VEC256 into VEC512 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info, - vinsert256_insert, INSERT_get_vinsert256_imm, [HasFP16]>; + vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; multiclass vinsert_for_mask_cast; defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info, - vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16, HasVLX]>; + vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; // Codegen pattern with the alternative types extract VEC128 from VEC512 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, @@ -987,14 +980,14 @@ defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info, - vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16]>; + vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; // Codegen pattern with the alternative types extract VEC256 from VEC512 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info, - vextract256_extract, EXTRACT_get_vextract256_imm, [HasFP16]>; + vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; // A 128-bit extract from bits [255:128] of a 512-bit vector should use a @@ -1020,6 +1013,10 @@ def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), (v8i16 (VEXTRACTI128rr (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), (iPTR 1)))>; +def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), + (v8f16 (VEXTRACTF128rr + (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), + (iPTR 1)))>; def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), (v16i8 (VEXTRACTI128rr (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), @@ -1049,18 +1046,16 @@ def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), (v8i16 (VEXTRACTI32x4Z256rr (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), (iPTR 1)))>; +def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), + (v8f16 (VEXTRACTF32x4Z256rr + (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), + (iPTR 1)))>; def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), (v16i8 (VEXTRACTI32x4Z256rr (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), (iPTR 1)))>; } -let Predicates = [HasFP16, HasVLX] in -def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), - (v8f16 (VEXTRACTF32x4Z256rr - (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), - (iPTR 1)))>; - // Additional patterns for handling a bitcast between the vselect and the // extract_subvector. @@ -1478,7 +1473,7 @@ multiclass avx512_subvec_broadcast_rm_dq opc, string OpcodeStr, Sched<[SchedWriteShuffle.YMM.Folded]>, AVX5128IBase, EVEX; } -let Predicates = [HasFP16] in { +let Predicates = [HasBWI] in { def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)), (VPBROADCASTWZrm addr:$src)>; @@ -1487,7 +1482,7 @@ let Predicates = [HasFP16] in { def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))), (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; } -let Predicates = [HasVLX, HasFP16] in { +let Predicates = [HasVLX, HasBWI] in { def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)), (VPBROADCASTWZ128rm addr:$src)>; def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)), @@ -3763,6 +3758,9 @@ let Predicates = [HasBWI, NoVLX] in { defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; + + defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>; + defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>; } let Predicates = [HasAVX512] in { @@ -3852,7 +3850,7 @@ let Predicates = [HasVLX] in { def : Pat<(store (v32i8 VR256X:$src), addr:$dst), (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; } -let Predicates = [HasFP16] in { +let Predicates = [HasBWI] in { def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), (v32f16 VR512:$src0))), (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>; def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)), @@ -3887,7 +3885,7 @@ let Predicates = [HasFP16] in { def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask), (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>; } -let Predicates = [HasFP16, HasVLX] in { +let Predicates = [HasBWI, HasVLX] in { def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), (v16f16 VR256X:$src0))), (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>; def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)), @@ -4099,14 +4097,14 @@ def : Pat<(f64 (bitconvert VK64:$src)), //===----------------------------------------------------------------------===// multiclass avx512_move_scalar prd = [HasAVX512, OptForSize]> { - let Predicates = prd in + X86VectorVTInfo _, Predicate prd = HasAVX512> { + let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; + let Predicates = [prd] in { def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", @@ -4159,6 +4157,7 @@ multiclass avx512_move_scalar, EVEX, EVEX_K, Sched<[WriteFStore]>, NotMemoryFoldable; + } } defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, @@ -4168,7 +4167,7 @@ defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info, - [HasFP16]>, + HasFP16>, VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>; multiclass avx512_move_scalar_lowering; } -defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>; defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; -defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, - (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; -defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, - (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, @@ -4353,6 +4347,12 @@ defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; +let Predicates = [HasFP16] in { +defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>; +defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, + (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; +defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, + (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, (v32i1 (insert_subvector (v32i1 immAllZerosV), @@ -4360,6 +4360,30 @@ defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, (iPTR 0))), (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), GR8, sub_8bit>; + +defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, + (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; +defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, + (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; +defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, + (v32i1 (insert_subvector + (v32i1 immAllZerosV), + (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), + (iPTR 0))), + (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), + GR8, sub_8bit>; + +def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))), + (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk + (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)), + VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), + (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; + +def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)), + (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), + (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; +} + defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, (v16i1 (insert_subvector (v16i1 immAllZerosV), @@ -4385,10 +4409,6 @@ defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), (iPTR 0))), GR8, sub_8bit>; -defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, - (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; -defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, - (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, @@ -4396,13 +4416,6 @@ defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; -defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, - (v32i1 (insert_subvector - (v32i1 immAllZerosV), - (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), - (iPTR 0))), - (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), - GR8, sub_8bit>; defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, (v16i1 (insert_subvector (v16i1 immAllZerosV), @@ -4428,16 +4441,6 @@ defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), (iPTR 0))), GR8, sub_8bit>; -def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))), - (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk - (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)), - VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), - (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; - -def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)), - (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), - (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; - def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), @@ -5039,7 +5042,7 @@ defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, HasBWI, 1>; defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SchedWriteVecIMul, HasBWI, 1>, T8PD; -defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, +defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu, SchedWriteVecALU, HasBWI, 1>; defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, SchedWriteVecIMul, HasAVX512, 1>, T8PD; @@ -11651,6 +11654,14 @@ defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; +// Always select FP16 instructions if available. +let Predicates = [HasBWI], AddedComplexity = -10 in { + def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>; + def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>; + def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>; + def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>; +} + //===----------------------------------------------------------------------===// // VSHUFPS - VSHUFPD Operations //===----------------------------------------------------------------------===// @@ -12988,7 +12999,6 @@ def : Pat<(i16 (bitconvert FR16X:$src)), sub_16bit))>; def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))), (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>; -} // Allow "vmovw" to use GR64 let hasSideEffects = 0 in { @@ -12997,6 +13007,7 @@ let hasSideEffects = 0 in { def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>; } +} // Convert 16-bit float to i16/u16 multiclass avx512_cvtph2w opc, string OpcodeStr, SDPatternOperator OpNode, diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 8337d2b37383..f08ecdf6afc9 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -541,7 +541,7 @@ class X86TypeInfo { + bit hasREX_W> { /// VT - This is the value type itself. ValueType VT = vt; @@ -596,9 +596,9 @@ class X86TypeInfo>", SDTIntLeaf,[],"<>">; @@ -634,7 +634,7 @@ class ITy opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, // Infer instruction prefixes from type info. let OpSize = typeinfo.OpSize; - let hasREX_WPrefix = typeinfo.HasREX_WPrefix; + let hasREX_W = typeinfo.HasREX_W; } // BinOpRR - Instructions like "add reg, reg, reg". diff --git a/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/llvm/lib/Target/X86/X86InstrCMovSetCC.td index 330b8c7a8a43..79ac2a2d8019 100644 --- a/llvm/lib/Target/X86/X86InstrCMovSetCC.td +++ b/llvm/lib/Target/X86/X86InstrCMovSetCC.td @@ -14,7 +14,7 @@ // CMOV instructions. let isCodeGenOnly = 1, ForceDisassemble = 1 in { -let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", +let Uses = [EFLAGS], Predicates = [HasCMOV], Constraints = "$src1 = $dst", isCommutable = 1, SchedRW = [WriteCMOV] in { def CMOV16rr : I<0x40, MRMSrcRegCC, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, ccode:$cond), @@ -35,7 +35,7 @@ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", (X86cmov GR64:$src1, GR64:$src2, timm:$cond, EFLAGS))]>, TB; } -let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", +let Uses = [EFLAGS], Predicates = [HasCMOV], Constraints = "$src1 = $dst", SchedRW = [WriteCMOV.Folded, WriteCMOV.ReadAfterFold] in { def CMOV16rm : I<0x40, MRMSrcMemCC, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2, ccode:$cond), @@ -52,7 +52,7 @@ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), timm:$cond, EFLAGS))]>, TB; -} // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" +} // Uses = [EFLAGS], Predicates = [HasCMOV], Constraints = "$src1 = $dst" } // isCodeGenOnly = 1, ForceDisassemble = 1 def inv_cond_XFORM : SDNodeXForm; def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, timm:$cond, EFLAGS), diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 7288ce812138..a55b95960aa6 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -544,10 +544,10 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in { // i8 register pressure. defm _GR8 : CMOVrr_PSEUDO; - let Predicates = [NoCMov] in { + let Predicates = [NoCMOV] in { defm _GR32 : CMOVrr_PSEUDO; defm _GR16 : CMOVrr_PSEUDO; - } // Predicates = [NoCMov] + } // Predicates = [NoCMOV] // fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no // SSE1/SSE2. @@ -562,12 +562,14 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in { let Predicates = [HasMMX] in defm _VR64 : CMOVrr_PSEUDO; - defm _FR16X : CMOVrr_PSEUDO; let Predicates = [HasSSE1,NoAVX512] in defm _FR32 : CMOVrr_PSEUDO; - let Predicates = [HasSSE2,NoAVX512] in + let Predicates = [HasSSE2,NoAVX512] in { + defm _FR16 : CMOVrr_PSEUDO; defm _FR64 : CMOVrr_PSEUDO; + } let Predicates = [HasAVX512] in { + defm _FR16X : CMOVrr_PSEUDO; defm _FR32X : CMOVrr_PSEUDO; defm _FR64X : CMOVrr_PSEUDO; } @@ -670,7 +672,7 @@ def OR32mi8Locked : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$zero), Requires<[Not64BitMode]>, OpSize32, LOCK, Sched<[WriteALURMW]>; -let hasSideEffects = 1 in +let hasSideEffects = 1, isMeta = 1 in def Int_MemBarrier : I<0, Pseudo, (outs), (ins), "#MEMBARRIER", [(X86MemBarrier)]>, Sched<[WriteLoad]>; @@ -839,6 +841,38 @@ let Predicates = [UseIncDec] in { def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>; } +// Atomic bit test. +def X86LBTest : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, + SDTCisVT<2, i8>, SDTCisVT<3, i32>]>; +def x86bts : SDNode<"X86ISD::LBTS", X86LBTest, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +def x86btc : SDNode<"X86ISD::LBTC", X86LBTest, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +def x86btr : SDNode<"X86ISD::LBTR", X86LBTest, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; + +multiclass ATOMIC_LOGIC_OP { + let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteBitTestSetRegRMW] in { + def 16m : Ii8<0xBA, Form, (outs), (ins i16mem:$src1, i8imm:$src2), + !strconcat(s, "{w}\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (!cast("x86" # s) addr:$src1, timm:$src2, (i32 16)))]>, + OpSize16, TB, LOCK; + def 32m : Ii8<0xBA, Form, (outs), (ins i32mem:$src1, i8imm:$src2), + !strconcat(s, "{l}\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (!cast("x86" # s) addr:$src1, timm:$src2, (i32 32)))]>, + OpSize32, TB, LOCK; + def 64m : RIi8<0xBA, Form, (outs), (ins i64mem:$src1, i8imm:$src2), + !strconcat(s, "{q}\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (!cast("x86" # s) addr:$src1, timm:$src2, (i32 64)))]>, + TB, LOCK; + } +} + +defm LOCK_BTS : ATOMIC_LOGIC_OP; +defm LOCK_BTC : ATOMIC_LOGIC_OP; +defm LOCK_BTR : ATOMIC_LOGIC_OP; + // Atomic compare and swap. multiclass LCMPXCHG_BinOp Opc8, bits<8> Opc, Format Form, string mnemonic, SDPatternOperator frag> { @@ -863,7 +897,7 @@ let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in { } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], - Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW], + Predicates = [HasCX8], SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, usesCustomInserter = 1 in { def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), "cmpxchg8b\t$ptr", @@ -871,7 +905,7 @@ def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), } let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], - Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW], + Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in { def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr), "cmpxchg16b\t$ptr", @@ -898,7 +932,7 @@ def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr), // the instruction and we are sure we will have a valid register to restore // the value of RBX. let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX], - Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW], + Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0, Constraints = "$rbx_save = $dst" in { @@ -910,7 +944,7 @@ def LCMPXCHG16B_SAVE_RBX : // Pseudo instruction that doesn't read/write RBX. Will be turned into either // LCMPXCHG16B_SAVE_RBX or LCMPXCHG16B via a custom inserter. let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RCX, RDX], - Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW], + Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0, usesCustomInserter = 1 in { @@ -1235,6 +1269,21 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off), return true; }]>; +def X86tcret_1reg : PatFrag<(ops node:$ptr, node:$off), + (X86tcret node:$ptr, node:$off), [{ + // X86tcret args: (*chain, ptr, imm, regs..., glue) + unsigned NumRegs = 1; + const SDValue& BasePtr = cast(N->getOperand(1))->getBasePtr(); + if (isa(BasePtr)) + NumRegs = 3; + else if (BasePtr->getNumOperands() && isa(BasePtr->getOperand(0))) + NumRegs = 3; + for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i) + if (isa(N->getOperand(i)) && ( NumRegs-- == 0)) + return false; + return true; +}]>; + def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), (TCRETURNri ptr_rc_tailcall:$dst, timm:$off)>, Requires<[Not64BitMode, NotUseIndirectThunkCalls]>; @@ -1242,7 +1291,8 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), // FIXME: This is disabled for 32-bit PIC mode because the global base // register which is part of the address mode may be assigned a // callee-saved register. -def : Pat<(X86tcret (load addr:$dst), timm:$off), +// Similar to X86tcret_6regs, here we only have 1 register left +def : Pat<(X86tcret_1reg (load addr:$dst), timm:$off), (TCRETURNmi addr:$dst, timm:$off)>, Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>; @@ -1466,6 +1516,21 @@ def ADD64ri32_DB : I<0, Pseudo, } } // AddedComplexity, SchedRW +//===----------------------------------------------------------------------===// +// Pattern match XOR as ADD +//===----------------------------------------------------------------------===// + +// Prefer to pattern match XOR with min_signed_value as ADD at isel time. +// ADD can be 3-addressified into an LEA instruction to avoid copies. +let AddedComplexity = 5 in { +def : Pat<(xor GR8:$src1, -128), + (ADD8ri GR8:$src1, -128)>; +def : Pat<(xor GR16:$src1, -32768), + (ADD16ri GR16:$src1, -32768)>; +def : Pat<(xor GR32:$src1, -2147483648), + (ADD32ri GR32:$src1, -2147483648)>; +} + //===----------------------------------------------------------------------===// // Pattern match SUB as XOR //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td index 6d969962afff..aa89a6f0ff9d 100644 --- a/llvm/lib/Target/X86/X86InstrControl.td +++ b/llvm/lib/Target/X86/X86InstrControl.td @@ -147,7 +147,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // Win64 wants indirect jumps leaving the function to have a REX_W prefix. // These are switched from TAILJMPr/m64_REX in MCInstLower. - let isCodeGenOnly = 1, hasREX_WPrefix = 1 in { + let isCodeGenOnly = 1, hasREX_W = 1 in { def JMP64r_REX : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "rex64 jmp{q}\t{*}$dst", []>, Sched<[WriteJump]>; let mayLoad = 1 in @@ -384,7 +384,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, []>, Sched<[WriteJumpLd]>; // Win64 wants indirect jumps leaving the function to have a REX_W prefix. - let hasREX_WPrefix = 1 in { + let hasREX_W = 1 in { def TAILJMPr64_REX : PseudoI<(outs), (ins ptr_rc_tailcall:$dst), []>, Sched<[WriteJump]>; diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td index e310f369be08..a68d61043c5c 100644 --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -423,9 +423,9 @@ def FBSTPm : FPI<0xDF, MRM6m, (outs), (ins f80mem:$dst), "fbstp\t$dst">; // Floating point cmovs. class FpIf32CMov pattern> : - FpI_, Requires<[FPStackf32, HasCMov]>; + FpI_, Requires<[FPStackf32, HasCMOV]>; class FpIf64CMov pattern> : - FpI_, Requires<[FPStackf64, HasCMov]>; + FpI_, Requires<[FPStackf64, HasCMOV]>; multiclass FPCMov { def _Fp32 : FpIf32CMov<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), @@ -440,7 +440,7 @@ multiclass FPCMov { CondMovFP, [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2, cc, EFLAGS))]>, - Requires<[HasCMov]>; + Requires<[HasCMOV]>; } let SchedRW = [WriteFCMOV] in { @@ -455,7 +455,7 @@ defm CMOVNE : FPCMov; defm CMOVNP : FPCMov; } // Uses = [EFLAGS], Constraints = "$src1 = $dst" -let Predicates = [HasCMov] in { +let Predicates = [HasCMOV] in { // These are not factored because there's no clean way to pass DA/DB. def CMOVB_F : FPI<0xDA, MRM0r, (outs), (ins RSTi:$op), "fcmovb\t{$op, %st|st, $op}">; @@ -473,7 +473,7 @@ def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RSTi:$op), "fcmovne\t{$op, %st|st, $op}">; def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RSTi:$op), "fcmovnu\t{$op, %st|st, $op}">; -} // Predicates = [HasCMov] +} // Predicates = [HasCMOV] } // SchedRW let mayRaiseFPException = 1 in { @@ -664,22 +664,22 @@ let SchedRW = [WriteFCom], mayRaiseFPException = 1 in { let Defs = [EFLAGS, FPSW], Uses = [FPCW] in { def UCOM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, [(set EFLAGS, (X86any_fcmp RFP32:$lhs, RFP32:$rhs))]>, - Requires<[FPStackf32, HasCMov]>; + Requires<[FPStackf32, HasCMOV]>; def UCOM_FpIr64: FpI_<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, [(set EFLAGS, (X86any_fcmp RFP64:$lhs, RFP64:$rhs))]>, - Requires<[FPStackf64, HasCMov]>; + Requires<[FPStackf64, HasCMOV]>; def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, [(set EFLAGS, (X86any_fcmp RFP80:$lhs, RFP80:$rhs))]>, - Requires<[HasCMov]>; + Requires<[HasCMOV]>; def COM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, [(set EFLAGS, (X86strict_fcmps RFP32:$lhs, RFP32:$rhs))]>, - Requires<[FPStackf32, HasCMov]>; + Requires<[FPStackf32, HasCMOV]>; def COM_FpIr64: FpI_<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, [(set EFLAGS, (X86strict_fcmps RFP64:$lhs, RFP64:$rhs))]>, - Requires<[FPStackf64, HasCMov]>; + Requires<[FPStackf64, HasCMOV]>; def COM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, [(set EFLAGS, (X86strict_fcmps RFP80:$lhs, RFP80:$rhs))]>, - Requires<[HasCMov]>; + Requires<[HasCMOV]>; } let Uses = [ST0, FPCW] in { diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp index 226349485238..27220a8d4d99 100644 --- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp +++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp @@ -292,8 +292,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = { { X86::JMP32r_NT, X86::JMP32m_NT, TB_FOLDED_LOAD }, { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD }, { X86::JMP64r_NT, X86::JMP64m_NT, TB_FOLDED_LOAD }, - { X86::MMX_MOVD64from64rr, X86::MMX_MOVD64from64rm, TB_FOLDED_STORE | TB_NO_REVERSE }, - { X86::MMX_MOVD64grr, X86::MMX_MOVD64mr, TB_FOLDED_STORE | TB_NO_REVERSE }, + { X86::MMX_MOVD64from64rr, X86::MMX_MOVQ64mr, TB_FOLDED_STORE }, + { X86::MMX_MOVD64grr, X86::MMX_MOVD64mr, TB_FOLDED_STORE }, { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE }, { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE }, { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE }, diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index 0e7033fc233a..3a44b4570e9b 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -196,7 +196,7 @@ class OpSize32 { OperandSize OpSize = OpSize32; } class AdSize16 { AddressSize AdSize = AdSize16; } class AdSize32 { AddressSize AdSize = AdSize32; } class AdSize64 { AddressSize AdSize = AdSize64; } -class REX_W { bit hasREX_WPrefix = 1; } +class REX_W { bit hasREX_W = 1; } class LOCK { bit hasLockPrefix = 1; } class REP { bit hasREPPrefix = 1; } class TB { Map OpMap = TB; } @@ -316,7 +316,7 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, bits<3> OpPrefixBits = OpPrefix.Value; Map OpMap = OB; // Which opcode map does this inst have? bits<4> OpMapBits = OpMap.Value; - bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix? + bit hasREX_W = 0; // Does this inst require the REX.W prefix? FPFormat FPForm = NotFP; // What flavor of FP instruction is this? bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix? Domain ExeDomain = d; @@ -375,7 +375,7 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, // No need for 3rd bit, we don't need to distinguish NoPrfx from PS. let TSFlags{12-11} = OpPrefixBits{1-0}; let TSFlags{16-13} = OpMapBits; - let TSFlags{17} = hasREX_WPrefix; + let TSFlags{17} = hasREX_W; let TSFlags{21-18} = ImmT.Value; let TSFlags{24-22} = FPForm.Value; let TSFlags{25} = hasLockPrefix; diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 166f1f8c3251..57ba4683c6a4 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -287,7 +287,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<2, 1>]>; def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>; -def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp, [SDNPCommutative]>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 4dcd886fa3b2..ec32ac2acad1 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -25,13 +25,16 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -137,298 +140,70 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, } bool X86InstrInfo::isDataInvariant(MachineInstr &MI) { - switch (MI.getOpcode()) { - default: - // By default, assume that the instruction is not data invariant. + if (MI.mayLoad() || MI.mayStore()) return false; - // Some target-independent operations that trivially lower to data-invariant - // instructions. - case TargetOpcode::COPY: - case TargetOpcode::INSERT_SUBREG: - case TargetOpcode::SUBREG_TO_REG: + // Some target-independent operations that trivially lower to data-invariant + // instructions. + if (MI.isCopyLike() || MI.isInsertSubreg()) return true; + unsigned Opcode = MI.getOpcode(); + using namespace X86; // On x86 it is believed that imul is constant time w.r.t. the loaded data. // However, they set flags and are perhaps the most surprisingly constant // time operations so we call them out here separately. - case X86::IMUL16rr: - case X86::IMUL16rri8: - case X86::IMUL16rri: - case X86::IMUL32rr: - case X86::IMUL32rri8: - case X86::IMUL32rri: - case X86::IMUL64rr: - case X86::IMUL64rri32: - case X86::IMUL64rri8: - + if (isIMUL(Opcode)) + return true; // Bit scanning and counting instructions that are somewhat surprisingly // constant time as they scan across bits and do other fairly complex // operations like popcnt, but are believed to be constant time on x86. // However, these set flags. - case X86::BSF16rr: - case X86::BSF32rr: - case X86::BSF64rr: - case X86::BSR16rr: - case X86::BSR32rr: - case X86::BSR64rr: - case X86::LZCNT16rr: - case X86::LZCNT32rr: - case X86::LZCNT64rr: - case X86::POPCNT16rr: - case X86::POPCNT32rr: - case X86::POPCNT64rr: - case X86::TZCNT16rr: - case X86::TZCNT32rr: - case X86::TZCNT64rr: - + if (isBSF(Opcode) || isBSR(Opcode) || isLZCNT(Opcode) || isPOPCNT(Opcode) || + isTZCNT(Opcode)) + return true; // Bit manipulation instructions are effectively combinations of basic // arithmetic ops, and should still execute in constant time. These also // set flags. - case X86::BLCFILL32rr: - case X86::BLCFILL64rr: - case X86::BLCI32rr: - case X86::BLCI64rr: - case X86::BLCIC32rr: - case X86::BLCIC64rr: - case X86::BLCMSK32rr: - case X86::BLCMSK64rr: - case X86::BLCS32rr: - case X86::BLCS64rr: - case X86::BLSFILL32rr: - case X86::BLSFILL64rr: - case X86::BLSI32rr: - case X86::BLSI64rr: - case X86::BLSIC32rr: - case X86::BLSIC64rr: - case X86::BLSMSK32rr: - case X86::BLSMSK64rr: - case X86::BLSR32rr: - case X86::BLSR64rr: - case X86::TZMSK32rr: - case X86::TZMSK64rr: - + if (isBLCFILL(Opcode) || isBLCI(Opcode) || isBLCIC(Opcode) || + isBLCMSK(Opcode) || isBLCS(Opcode) || isBLSFILL(Opcode) || + isBLSI(Opcode) || isBLSIC(Opcode) || isBLSMSK(Opcode) || isBLSR(Opcode) || + isTZMSK(Opcode)) + return true; // Bit extracting and clearing instructions should execute in constant time, // and set flags. - case X86::BEXTR32rr: - case X86::BEXTR64rr: - case X86::BEXTRI32ri: - case X86::BEXTRI64ri: - case X86::BZHI32rr: - case X86::BZHI64rr: - + if (isBEXTR(Opcode) || isBZHI(Opcode)) + return true; // Shift and rotate. - case X86::ROL8r1: - case X86::ROL16r1: - case X86::ROL32r1: - case X86::ROL64r1: - case X86::ROL8rCL: - case X86::ROL16rCL: - case X86::ROL32rCL: - case X86::ROL64rCL: - case X86::ROL8ri: - case X86::ROL16ri: - case X86::ROL32ri: - case X86::ROL64ri: - case X86::ROR8r1: - case X86::ROR16r1: - case X86::ROR32r1: - case X86::ROR64r1: - case X86::ROR8rCL: - case X86::ROR16rCL: - case X86::ROR32rCL: - case X86::ROR64rCL: - case X86::ROR8ri: - case X86::ROR16ri: - case X86::ROR32ri: - case X86::ROR64ri: - case X86::SAR8r1: - case X86::SAR16r1: - case X86::SAR32r1: - case X86::SAR64r1: - case X86::SAR8rCL: - case X86::SAR16rCL: - case X86::SAR32rCL: - case X86::SAR64rCL: - case X86::SAR8ri: - case X86::SAR16ri: - case X86::SAR32ri: - case X86::SAR64ri: - case X86::SHL8r1: - case X86::SHL16r1: - case X86::SHL32r1: - case X86::SHL64r1: - case X86::SHL8rCL: - case X86::SHL16rCL: - case X86::SHL32rCL: - case X86::SHL64rCL: - case X86::SHL8ri: - case X86::SHL16ri: - case X86::SHL32ri: - case X86::SHL64ri: - case X86::SHR8r1: - case X86::SHR16r1: - case X86::SHR32r1: - case X86::SHR64r1: - case X86::SHR8rCL: - case X86::SHR16rCL: - case X86::SHR32rCL: - case X86::SHR64rCL: - case X86::SHR8ri: - case X86::SHR16ri: - case X86::SHR32ri: - case X86::SHR64ri: - case X86::SHLD16rrCL: - case X86::SHLD32rrCL: - case X86::SHLD64rrCL: - case X86::SHLD16rri8: - case X86::SHLD32rri8: - case X86::SHLD64rri8: - case X86::SHRD16rrCL: - case X86::SHRD32rrCL: - case X86::SHRD64rrCL: - case X86::SHRD16rri8: - case X86::SHRD32rri8: - case X86::SHRD64rri8: - + if (isROL(Opcode) || isROR(Opcode) || isSAR(Opcode) || isSHL(Opcode) || + isSHR(Opcode) || isSHLD(Opcode) || isSHRD(Opcode)) + return true; // Basic arithmetic is constant time on the input but does set flags. - case X86::ADC8rr: - case X86::ADC8ri: - case X86::ADC16rr: - case X86::ADC16ri: - case X86::ADC16ri8: - case X86::ADC32rr: - case X86::ADC32ri: - case X86::ADC32ri8: - case X86::ADC64rr: - case X86::ADC64ri8: - case X86::ADC64ri32: - case X86::ADD8rr: - case X86::ADD8ri: - case X86::ADD16rr: - case X86::ADD16ri: - case X86::ADD16ri8: - case X86::ADD32rr: - case X86::ADD32ri: - case X86::ADD32ri8: - case X86::ADD64rr: - case X86::ADD64ri8: - case X86::ADD64ri32: - case X86::AND8rr: - case X86::AND8ri: - case X86::AND16rr: - case X86::AND16ri: - case X86::AND16ri8: - case X86::AND32rr: - case X86::AND32ri: - case X86::AND32ri8: - case X86::AND64rr: - case X86::AND64ri8: - case X86::AND64ri32: - case X86::OR8rr: - case X86::OR8ri: - case X86::OR16rr: - case X86::OR16ri: - case X86::OR16ri8: - case X86::OR32rr: - case X86::OR32ri: - case X86::OR32ri8: - case X86::OR64rr: - case X86::OR64ri8: - case X86::OR64ri32: - case X86::SBB8rr: - case X86::SBB8ri: - case X86::SBB16rr: - case X86::SBB16ri: - case X86::SBB16ri8: - case X86::SBB32rr: - case X86::SBB32ri: - case X86::SBB32ri8: - case X86::SBB64rr: - case X86::SBB64ri8: - case X86::SBB64ri32: - case X86::SUB8rr: - case X86::SUB8ri: - case X86::SUB16rr: - case X86::SUB16ri: - case X86::SUB16ri8: - case X86::SUB32rr: - case X86::SUB32ri: - case X86::SUB32ri8: - case X86::SUB64rr: - case X86::SUB64ri8: - case X86::SUB64ri32: - case X86::XOR8rr: - case X86::XOR8ri: - case X86::XOR16rr: - case X86::XOR16ri: - case X86::XOR16ri8: - case X86::XOR32rr: - case X86::XOR32ri: - case X86::XOR32ri8: - case X86::XOR64rr: - case X86::XOR64ri8: - case X86::XOR64ri32: + if (isADC(Opcode) || isADD(Opcode) || isAND(Opcode) || isOR(Opcode) || + isSBB(Opcode) || isSUB(Opcode) || isXOR(Opcode)) + return true; // Arithmetic with just 32-bit and 64-bit variants and no immediates. - case X86::ADCX32rr: - case X86::ADCX64rr: - case X86::ADOX32rr: - case X86::ADOX64rr: - case X86::ANDN32rr: - case X86::ANDN64rr: + if (isADCX(Opcode) || isADOX(Opcode) || isANDN(Opcode)) + return true; // Unary arithmetic operations. - case X86::DEC8r: - case X86::DEC16r: - case X86::DEC32r: - case X86::DEC64r: - case X86::INC8r: - case X86::INC16r: - case X86::INC32r: - case X86::INC64r: - case X86::NEG8r: - case X86::NEG16r: - case X86::NEG32r: - case X86::NEG64r: - + if (isDEC(Opcode) || isINC(Opcode) || isNEG(Opcode)) + return true; // Unlike other arithmetic, NOT doesn't set EFLAGS. - case X86::NOT8r: - case X86::NOT16r: - case X86::NOT32r: - case X86::NOT64r: - + if (isNOT(Opcode)) + return true; // Various move instructions used to zero or sign extend things. Note that we // intentionally don't support the _NOREX variants as we can't handle that // register constraint anyways. - case X86::MOVSX16rr8: - case X86::MOVSX32rr8: - case X86::MOVSX32rr16: - case X86::MOVSX64rr8: - case X86::MOVSX64rr16: - case X86::MOVSX64rr32: - case X86::MOVZX16rr8: - case X86::MOVZX32rr8: - case X86::MOVZX32rr16: - case X86::MOVZX64rr8: - case X86::MOVZX64rr16: - case X86::MOV32rr: - + if (isMOVSX(Opcode) || isMOVZX(Opcode) || isMOVSXD(Opcode) || isMOV(Opcode)) + return true; // Arithmetic instructions that are both constant time and don't set flags. - case X86::RORX32ri: - case X86::RORX64ri: - case X86::SARX32rr: - case X86::SARX64rr: - case X86::SHLX32rr: - case X86::SHLX64rr: - case X86::SHRX32rr: - case X86::SHRX64rr: - + if (isRORX(Opcode) || isSARX(Opcode) || isSHLX(Opcode) || isSHRX(Opcode)) + return true; // LEA doesn't actually access memory, and its arithmetic is constant time. - case X86::LEA16r: - case X86::LEA32r: - case X86::LEA64_32r: - case X86::LEA64r: + if (isLEA(Opcode)) return true; - } + // By default, assume that the instruction is not data invariant. + return false; } bool X86InstrInfo::isDataInvariantLoad(MachineInstr &MI) { @@ -990,6 +765,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, case X86::AVX_SET0: case X86::FsFLD0SD: case X86::FsFLD0SS: + case X86::FsFLD0SH: case X86::FsFLD0F128: case X86::KSET0D: case X86::KSET0Q: @@ -1192,6 +968,102 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) { return ShAmt < 4 && ShAmt > 0; } +static bool findRedundantFlagInstr(MachineInstr &CmpInstr, + MachineInstr &CmpValDefInstr, + const MachineRegisterInfo *MRI, + MachineInstr **AndInstr, + const TargetRegisterInfo *TRI, + bool &NoSignFlag, bool &ClearsOverflowFlag) { + if (CmpValDefInstr.getOpcode() != X86::SUBREG_TO_REG) + return false; + + if (CmpInstr.getOpcode() != X86::TEST64rr) + return false; + + // CmpInstr is a TEST64rr instruction, and `X86InstrInfo::analyzeCompare` + // guarantees that it's analyzable only if two registers are identical. + assert( + (CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) && + "CmpInstr is an analyzable TEST64rr, and `X86InstrInfo::analyzeCompare` " + "requires two reg operands are the same."); + + // Caller (`X86InstrInfo::optimizeCompareInstr`) guarantees that + // `CmpValDefInstr` defines the value that's used by `CmpInstr`; in this case + // if `CmpValDefInstr` sets the EFLAGS, it is likely that `CmpInstr` is + // redundant. + assert( + (MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &CmpValDefInstr) && + "Caller guarantees that TEST64rr is a user of SUBREG_TO_REG."); + + // As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is typically + // 0. + if (CmpValDefInstr.getOperand(1).getImm() != 0) + return false; + + // As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically + // sub_32bit or sub_xmm. + if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit) + return false; + + MachineInstr *VregDefInstr = + MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg()); + + assert(VregDefInstr && "Must have a definition (SSA)"); + + // Requires `CmpValDefInstr` and `VregDefInstr` are from the same MBB + // to simplify the subsequent analysis. + // + // FIXME: If `VregDefInstr->getParent()` is the only predecessor of + // `CmpValDefInstr.getParent()`, this could be handled. + if (VregDefInstr->getParent() != CmpValDefInstr.getParent()) + return false; + + if (X86::isAND(VregDefInstr->getOpcode())) { + // Get a sequence of instructions like + // %reg = and* ... // Set EFLAGS + // ... // EFLAGS not changed + // %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_32bit + // test64rr %extended_reg, %extended_reg, implicit-def $eflags + // + // If subsequent readers use a subset of bits that don't change + // after `and*` instructions, it's likely that the test64rr could + // be optimized away. + for (const MachineInstr &Instr : + make_range(std::next(MachineBasicBlock::iterator(VregDefInstr)), + MachineBasicBlock::iterator(CmpValDefInstr))) { + // There are instructions between 'VregDefInstr' and + // 'CmpValDefInstr' that modifies EFLAGS. + if (Instr.modifiesRegister(X86::EFLAGS, TRI)) + return false; + } + + *AndInstr = VregDefInstr; + + // AND instruction will essentially update SF and clear OF, so + // NoSignFlag should be false in the sense that SF is modified by `AND`. + // + // However, the implementation artifically sets `NoSignFlag` to true + // to poison the SF bit; that is to say, if SF is looked at later, the + // optimization (to erase TEST64rr) will be disabled. + // + // The reason to poison SF bit is that SF bit value could be different + // in the `AND` and `TEST` operation; signed bit is not known for `AND`, + // and is known to be 0 as a result of `TEST64rr`. + // + // FIXME: As opposed to poisoning the SF bit directly, consider peeking into + // the AND instruction and using the static information to guide peephole + // optimization if possible. For example, it's possible to fold a + // conditional move into a copy if the relevant EFLAG bits could be deduced + // from an immediate operand of and operation. + // + NoSignFlag = true; + // ClearsOverflowFlag is true for AND operation (no surprise). + ClearsOverflowFlag = true; + return true; + } + return false; +} + bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, unsigned Opc, bool AllowSP, Register &NewSrc, bool &isKill, MachineOperand &ImplicitOp, @@ -1314,8 +1186,11 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, case X86::SHL8ri: case X86::SHL16ri: { unsigned ShAmt = MI.getOperand(2).getImm(); - MIB.addReg(0).addImm(1ULL << ShAmt) - .addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0); + MIB.addReg(0) + .addImm(1LL << ShAmt) + .addReg(InRegLEA, RegState::Kill) + .addImm(0) + .addReg(0); break; } case X86::INC8r: @@ -1478,7 +1353,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI, NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)) .add(Dest) .addReg(0) - .addImm(1ULL << ShAmt) + .addImm(1LL << ShAmt) .add(Src) .addImm(0) .addReg(0); @@ -1502,7 +1377,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI, BuildMI(MF, MI.getDebugLoc(), get(Opc)) .add(Dest) .addReg(0) - .addImm(1ULL << ShAmt) + .addImm(1LL << ShAmt) .addReg(SrcReg, getKillRegState(isKill)) .addImm(0) .addReg(0); @@ -1957,14 +1832,13 @@ unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands( FMAForms[0] = FMA3Group.get132Opcode(); FMAForms[1] = FMA3Group.get213Opcode(); FMAForms[2] = FMA3Group.get231Opcode(); - unsigned FormIndex; - for (FormIndex = 0; FormIndex < 3; FormIndex++) - if (Opc == FMAForms[FormIndex]) - break; // Everything is ready, just adjust the FMA opcode and return it. - FormIndex = FormMapping[Case][FormIndex]; - return FMAForms[FormIndex]; + for (unsigned FormIndex = 0; FormIndex < 3; FormIndex++) + if (Opc == FMAForms[FormIndex]) + return FMAForms[FormMapping[Case][FormIndex]]; + + llvm_unreachable("Illegal FMA3 format"); } static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1, @@ -2141,7 +2015,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, if ((MI.getOperand(3).getImm() ^ Mask) == 1) { auto &WorkingMI = cloneIfNew(MI); WorkingMI.setDesc(get(Opc)); - WorkingMI.RemoveOperand(3); + WorkingMI.removeOperand(3); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); @@ -2238,7 +2112,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!"); auto &WorkingMI = cloneIfNew(MI); WorkingMI.setDesc(get(X86::MOVSDrr)); - WorkingMI.RemoveOperand(3); + WorkingMI.removeOperand(3); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } @@ -2813,34 +2687,37 @@ bool X86InstrInfo::hasCommutePreference(MachineInstr &MI, bool &Commute) const { return false; } +int X86::getCondSrcNoFromDesc(const MCInstrDesc &MCID) { + unsigned Opcode = MCID.getOpcode(); + if (!(X86::isJCC(Opcode) || X86::isSETCC(Opcode) || X86::isCMOVCC(Opcode))) + return -1; + // Assume that condition code is always the last use operand. + unsigned NumUses = MCID.getNumOperands() - MCID.getNumDefs(); + return NumUses - 1; +} + +X86::CondCode X86::getCondFromMI(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + int CondNo = getCondSrcNoFromDesc(MCID); + if (CondNo < 0) + return X86::COND_INVALID; + CondNo += MCID.getNumDefs(); + return static_cast(MI.getOperand(CondNo).getImm()); +} + X86::CondCode X86::getCondFromBranch(const MachineInstr &MI) { - switch (MI.getOpcode()) { - default: return X86::COND_INVALID; - case X86::JCC_1: - return static_cast( - MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); - } + return X86::isJCC(MI.getOpcode()) ? X86::getCondFromMI(MI) + : X86::COND_INVALID; } -/// Return condition code of a SETCC opcode. X86::CondCode X86::getCondFromSETCC(const MachineInstr &MI) { - switch (MI.getOpcode()) { - default: return X86::COND_INVALID; - case X86::SETCCr: case X86::SETCCm: - return static_cast( - MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); - } + return X86::isSETCC(MI.getOpcode()) ? X86::getCondFromMI(MI) + : X86::COND_INVALID; } -/// Return condition code of a CMov opcode. X86::CondCode X86::getCondFromCMov(const MachineInstr &MI) { - switch (MI.getOpcode()) { - default: return X86::COND_INVALID; - case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: - case X86::CMOV16rm: case X86::CMOV32rm: case X86::CMOV64rm: - return static_cast( - MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); - } + return X86::isCMOVCC(MI.getOpcode()) ? X86::getCondFromMI(MI) + : X86::COND_INVALID; } /// Return the inverse of the specified condition, @@ -3166,8 +3043,7 @@ bool X86InstrInfo::AnalyzeBranchImpl( } // If the block has any instructions after a JMP, delete them. - while (std::next(I) != MBB.end()) - std::next(I)->eraseFromParent(); + MBB.erase(std::next(I), MBB.end()); Cond.clear(); FBB = nullptr; @@ -3464,7 +3340,7 @@ bool X86InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const { // Not all subtargets have cmov instructions. - if (!Subtarget.hasCMov()) + if (!Subtarget.canUseCMOV()) return false; if (Cond.size() != 1) return false; @@ -3708,10 +3584,6 @@ static unsigned getLoadStoreRegOpcode(Register Reg, case 2: if (X86::VK16RegClass.hasSubClassEq(RC)) return load ? X86::KMOVWkm : X86::KMOVWmk; - if (X86::FR16XRegClass.hasSubClassEq(RC)) { - assert(STI.hasFP16()); - return load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr; - } assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass"); return load ? X86::MOV16rm : X86::MOV16mr; case 4: @@ -3739,6 +3611,10 @@ static unsigned getLoadStoreRegOpcode(Register Reg, X86::VK8PAIRRegClass.hasSubClassEq(RC) || X86::VK16PAIRRegClass.hasSubClassEq(RC)) return load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE; + if ((X86::FR16RegClass.hasSubClassEq(RC) || + X86::FR16XRegClass.hasSubClassEq(RC)) && + STI.hasFP16()) + return load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr; llvm_unreachable("Unknown 4-byte regclass"); case 8: if (X86::GR64RegClass.hasSubClassEq(RC)) @@ -3845,6 +3721,35 @@ X86InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI, return AM; } +bool X86InstrInfo::verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const { + Optional AMOrNone = getAddrModeFromMemoryOp(MI, nullptr); + if (!AMOrNone) + return true; + + ExtAddrMode AM = *AMOrNone; + + if (AM.ScaledReg != X86::NoRegister) { + switch (AM.Scale) { + case 1: + case 2: + case 4: + case 8: + break; + default: + ErrInfo = "Scale factor in address must be 1, 2, 4 or 8"; + return false; + } + } + if (!isInt<32>(AM.Displacement)) { + ErrInfo = "Displacement in address must fit into 32-bit signed " + "integer"; + return false; + } + + return true; +} + bool X86InstrInfo::getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const { @@ -3949,12 +3854,12 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && "Stack slot too small for store"); if (RC->getID() == X86::TILERegClassID) { unsigned Opc = X86::TILESTORED; // tilestored %tmm, (%sp, %idx) - MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64); MachineInstr *NewMI = @@ -3963,6 +3868,14 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineOperand &MO = NewMI->getOperand(2); MO.setReg(VirtReg); MO.setIsKill(true); + } else if ((RC->getID() == X86::FR16RegClassID || + RC->getID() == X86::FR16XRegClassID) && + !Subtarget.hasFP16()) { + unsigned Opc = Subtarget.hasAVX512() ? X86::VMOVSSZmr + : Subtarget.hasAVX() ? X86::VMOVSSmr + : X86::MOVSSmr; + addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx) + .addReg(SrcReg, getKillRegState(isKill)); } else { unsigned Alignment = std::max(TRI->getSpillSize(*RC), 16); bool isAligned = @@ -3991,6 +3904,14 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineOperand &MO = NewMI->getOperand(3); MO.setReg(VirtReg); MO.setIsKill(true); + } else if ((RC->getID() == X86::FR16RegClassID || + RC->getID() == X86::FR16XRegClassID) && + !Subtarget.hasFP16()) { + unsigned Opc = Subtarget.hasAVX512() ? X86::VMOVSSZrm + : Subtarget.hasAVX() ? X86::VMOVSSrm + : X86::MOVSSrm; + addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), + FrameIdx); } else { const MachineFunction &MF = *MBB.getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -4375,7 +4296,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, case X86::SUB8ri: NewOpcode = X86::CMP8ri; break; } CmpInstr.setDesc(get(NewOpcode)); - CmpInstr.RemoveOperand(0); + CmpInstr.removeOperand(0); // Mutating this instruction invalidates any debug data associated with it. CmpInstr.dropDebugNumber(); // Fall through to optimize Cmp if Cmp is CMPrr or CMPri. @@ -4423,6 +4344,23 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, MI = &Inst; break; } + + // Look back for the following pattern, in which case the test64rr + // instruction could be erased. + // + // Example: + // %reg = and32ri %in_reg, 5 + // ... // EFLAGS not changed. + // %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index + // test64rr %src_reg, %src_reg, implicit-def $eflags + MachineInstr *AndInstr = nullptr; + if (IsCmpZero && + findRedundantFlagInstr(CmpInstr, Inst, MRI, &AndInstr, TRI, + NoSignFlag, ClearsOverflowFlag)) { + assert(AndInstr != nullptr && X86::isAND(AndInstr->getOpcode())); + MI = AndInstr; + break; + } // Cannot find other candidates before definition of SrcReg. return false; } @@ -4524,6 +4462,11 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, return false; case X86::COND_G: case X86::COND_GE: case X86::COND_L: case X86::COND_LE: + // If SF is used, but the instruction doesn't update the SF, then we + // can't do the optimization. + if (NoSignFlag) + return false; + LLVM_FALLTHROUGH; case X86::COND_O: case X86::COND_NO: // If OF is used, the instruction needs to clear it like CmpZero does. if (!ClearsOverflowFlag) @@ -4811,7 +4754,7 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm); MIB->setDesc(TII.get(X86::POP32r)); } - MIB->RemoveOperand(1); + MIB->removeOperand(1); MIB->addImplicitDefUseOperands(*MBB.getParent()); // Build CFI if necessary. @@ -4918,7 +4861,7 @@ static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) { MIB->setDesc(Desc); int64_t ShiftAmt = MIB->getOperand(2).getImm(); // Temporarily remove the immediate so we can add another source register. - MIB->RemoveOperand(2); + MIB->removeOperand(2); // Add the register. Don't copy the kill flag if there is one. MIB.addReg(MIB.getReg(1), getUndefRegState(MIB->getOperand(1).isUndef())); @@ -4949,6 +4892,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case X86::V_SET0: case X86::FsFLD0SS: case X86::FsFLD0SD: + case X86::FsFLD0SH: case X86::FsFLD0F128: return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); case X86::AVX_SET0: { @@ -5026,7 +4970,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { unsigned MaskState = getRegState(MIB->getOperand(1)); unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ? X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz; - MI.RemoveOperand(1); + MI.removeOperand(1); MIB->setDesc(get(Opc)); // VPTERNLOG needs 3 register inputs and an immediate. // 0xff will return 1s for any input. @@ -5165,6 +5109,255 @@ static bool hasPartialRegUpdate(unsigned Opcode, case X86::SQRTSDr_Int: case X86::SQRTSDm_Int: return true; + case X86::VFCMULCPHZ128rm: + case X86::VFCMULCPHZ128rmb: + case X86::VFCMULCPHZ128rmbkz: + case X86::VFCMULCPHZ128rmkz: + case X86::VFCMULCPHZ128rr: + case X86::VFCMULCPHZ128rrkz: + case X86::VFCMULCPHZ256rm: + case X86::VFCMULCPHZ256rmb: + case X86::VFCMULCPHZ256rmbkz: + case X86::VFCMULCPHZ256rmkz: + case X86::VFCMULCPHZ256rr: + case X86::VFCMULCPHZ256rrkz: + case X86::VFCMULCPHZrm: + case X86::VFCMULCPHZrmb: + case X86::VFCMULCPHZrmbkz: + case X86::VFCMULCPHZrmkz: + case X86::VFCMULCPHZrr: + case X86::VFCMULCPHZrrb: + case X86::VFCMULCPHZrrbkz: + case X86::VFCMULCPHZrrkz: + case X86::VFMULCPHZ128rm: + case X86::VFMULCPHZ128rmb: + case X86::VFMULCPHZ128rmbkz: + case X86::VFMULCPHZ128rmkz: + case X86::VFMULCPHZ128rr: + case X86::VFMULCPHZ128rrkz: + case X86::VFMULCPHZ256rm: + case X86::VFMULCPHZ256rmb: + case X86::VFMULCPHZ256rmbkz: + case X86::VFMULCPHZ256rmkz: + case X86::VFMULCPHZ256rr: + case X86::VFMULCPHZ256rrkz: + case X86::VFMULCPHZrm: + case X86::VFMULCPHZrmb: + case X86::VFMULCPHZrmbkz: + case X86::VFMULCPHZrmkz: + case X86::VFMULCPHZrr: + case X86::VFMULCPHZrrb: + case X86::VFMULCPHZrrbkz: + case X86::VFMULCPHZrrkz: + case X86::VFCMULCSHZrm: + case X86::VFCMULCSHZrmkz: + case X86::VFCMULCSHZrr: + case X86::VFCMULCSHZrrb: + case X86::VFCMULCSHZrrbkz: + case X86::VFCMULCSHZrrkz: + case X86::VFMULCSHZrm: + case X86::VFMULCSHZrmkz: + case X86::VFMULCSHZrr: + case X86::VFMULCSHZrrb: + case X86::VFMULCSHZrrbkz: + case X86::VFMULCSHZrrkz: + return Subtarget.hasMULCFalseDeps(); + case X86::VPERMDYrm: + case X86::VPERMDYrr: + case X86::VPERMQYmi: + case X86::VPERMQYri: + case X86::VPERMPSYrm: + case X86::VPERMPSYrr: + case X86::VPERMPDYmi: + case X86::VPERMPDYri: + case X86::VPERMDZ256rm: + case X86::VPERMDZ256rmb: + case X86::VPERMDZ256rmbkz: + case X86::VPERMDZ256rmkz: + case X86::VPERMDZ256rr: + case X86::VPERMDZ256rrkz: + case X86::VPERMDZrm: + case X86::VPERMDZrmb: + case X86::VPERMDZrmbkz: + case X86::VPERMDZrmkz: + case X86::VPERMDZrr: + case X86::VPERMDZrrkz: + case X86::VPERMQZ256mbi: + case X86::VPERMQZ256mbikz: + case X86::VPERMQZ256mi: + case X86::VPERMQZ256mikz: + case X86::VPERMQZ256ri: + case X86::VPERMQZ256rikz: + case X86::VPERMQZ256rm: + case X86::VPERMQZ256rmb: + case X86::VPERMQZ256rmbkz: + case X86::VPERMQZ256rmkz: + case X86::VPERMQZ256rr: + case X86::VPERMQZ256rrkz: + case X86::VPERMQZmbi: + case X86::VPERMQZmbikz: + case X86::VPERMQZmi: + case X86::VPERMQZmikz: + case X86::VPERMQZri: + case X86::VPERMQZrikz: + case X86::VPERMQZrm: + case X86::VPERMQZrmb: + case X86::VPERMQZrmbkz: + case X86::VPERMQZrmkz: + case X86::VPERMQZrr: + case X86::VPERMQZrrkz: + case X86::VPERMPSZ256rm: + case X86::VPERMPSZ256rmb: + case X86::VPERMPSZ256rmbkz: + case X86::VPERMPSZ256rmkz: + case X86::VPERMPSZ256rr: + case X86::VPERMPSZ256rrkz: + case X86::VPERMPSZrm: + case X86::VPERMPSZrmb: + case X86::VPERMPSZrmbkz: + case X86::VPERMPSZrmkz: + case X86::VPERMPSZrr: + case X86::VPERMPSZrrkz: + case X86::VPERMPDZ256mbi: + case X86::VPERMPDZ256mbikz: + case X86::VPERMPDZ256mi: + case X86::VPERMPDZ256mikz: + case X86::VPERMPDZ256ri: + case X86::VPERMPDZ256rikz: + case X86::VPERMPDZ256rm: + case X86::VPERMPDZ256rmb: + case X86::VPERMPDZ256rmbkz: + case X86::VPERMPDZ256rmkz: + case X86::VPERMPDZ256rr: + case X86::VPERMPDZ256rrkz: + case X86::VPERMPDZmbi: + case X86::VPERMPDZmbikz: + case X86::VPERMPDZmi: + case X86::VPERMPDZmikz: + case X86::VPERMPDZri: + case X86::VPERMPDZrikz: + case X86::VPERMPDZrm: + case X86::VPERMPDZrmb: + case X86::VPERMPDZrmbkz: + case X86::VPERMPDZrmkz: + case X86::VPERMPDZrr: + case X86::VPERMPDZrrkz: + return Subtarget.hasPERMFalseDeps(); + case X86::VRANGEPDZ128rmbi: + case X86::VRANGEPDZ128rmbikz: + case X86::VRANGEPDZ128rmi: + case X86::VRANGEPDZ128rmikz: + case X86::VRANGEPDZ128rri: + case X86::VRANGEPDZ128rrikz: + case X86::VRANGEPDZ256rmbi: + case X86::VRANGEPDZ256rmbikz: + case X86::VRANGEPDZ256rmi: + case X86::VRANGEPDZ256rmikz: + case X86::VRANGEPDZ256rri: + case X86::VRANGEPDZ256rrikz: + case X86::VRANGEPDZrmbi: + case X86::VRANGEPDZrmbikz: + case X86::VRANGEPDZrmi: + case X86::VRANGEPDZrmikz: + case X86::VRANGEPDZrri: + case X86::VRANGEPDZrrib: + case X86::VRANGEPDZrribkz: + case X86::VRANGEPDZrrikz: + case X86::VRANGEPSZ128rmbi: + case X86::VRANGEPSZ128rmbikz: + case X86::VRANGEPSZ128rmi: + case X86::VRANGEPSZ128rmikz: + case X86::VRANGEPSZ128rri: + case X86::VRANGEPSZ128rrikz: + case X86::VRANGEPSZ256rmbi: + case X86::VRANGEPSZ256rmbikz: + case X86::VRANGEPSZ256rmi: + case X86::VRANGEPSZ256rmikz: + case X86::VRANGEPSZ256rri: + case X86::VRANGEPSZ256rrikz: + case X86::VRANGEPSZrmbi: + case X86::VRANGEPSZrmbikz: + case X86::VRANGEPSZrmi: + case X86::VRANGEPSZrmikz: + case X86::VRANGEPSZrri: + case X86::VRANGEPSZrrib: + case X86::VRANGEPSZrribkz: + case X86::VRANGEPSZrrikz: + case X86::VRANGESDZrmi: + case X86::VRANGESDZrmikz: + case X86::VRANGESDZrri: + case X86::VRANGESDZrrib: + case X86::VRANGESDZrribkz: + case X86::VRANGESDZrrikz: + case X86::VRANGESSZrmi: + case X86::VRANGESSZrmikz: + case X86::VRANGESSZrri: + case X86::VRANGESSZrrib: + case X86::VRANGESSZrribkz: + case X86::VRANGESSZrrikz: + return Subtarget.hasRANGEFalseDeps(); + case X86::VGETMANTSSZrmi: + case X86::VGETMANTSSZrmikz: + case X86::VGETMANTSSZrri: + case X86::VGETMANTSSZrrib: + case X86::VGETMANTSSZrribkz: + case X86::VGETMANTSSZrrikz: + case X86::VGETMANTSDZrmi: + case X86::VGETMANTSDZrmikz: + case X86::VGETMANTSDZrri: + case X86::VGETMANTSDZrrib: + case X86::VGETMANTSDZrribkz: + case X86::VGETMANTSDZrrikz: + case X86::VGETMANTSHZrmi: + case X86::VGETMANTSHZrmikz: + case X86::VGETMANTSHZrri: + case X86::VGETMANTSHZrrib: + case X86::VGETMANTSHZrribkz: + case X86::VGETMANTSHZrrikz: + case X86::VGETMANTPSZ128rmbi: + case X86::VGETMANTPSZ128rmbikz: + case X86::VGETMANTPSZ128rmi: + case X86::VGETMANTPSZ128rmikz: + case X86::VGETMANTPSZ256rmbi: + case X86::VGETMANTPSZ256rmbikz: + case X86::VGETMANTPSZ256rmi: + case X86::VGETMANTPSZ256rmikz: + case X86::VGETMANTPSZrmbi: + case X86::VGETMANTPSZrmbikz: + case X86::VGETMANTPSZrmi: + case X86::VGETMANTPSZrmikz: + case X86::VGETMANTPDZ128rmbi: + case X86::VGETMANTPDZ128rmbikz: + case X86::VGETMANTPDZ128rmi: + case X86::VGETMANTPDZ128rmikz: + case X86::VGETMANTPDZ256rmbi: + case X86::VGETMANTPDZ256rmbikz: + case X86::VGETMANTPDZ256rmi: + case X86::VGETMANTPDZ256rmikz: + case X86::VGETMANTPDZrmbi: + case X86::VGETMANTPDZrmbikz: + case X86::VGETMANTPDZrmi: + case X86::VGETMANTPDZrmikz: + return Subtarget.hasGETMANTFalseDeps(); + case X86::VPMULLQZ128rm: + case X86::VPMULLQZ128rmb: + case X86::VPMULLQZ128rmbkz: + case X86::VPMULLQZ128rmkz: + case X86::VPMULLQZ128rr: + case X86::VPMULLQZ128rrkz: + case X86::VPMULLQZ256rm: + case X86::VPMULLQZ256rmb: + case X86::VPMULLQZ256rmbkz: + case X86::VPMULLQZ256rmkz: + case X86::VPMULLQZ256rr: + case X86::VPMULLQZ256rrkz: + case X86::VPMULLQZrm: + case X86::VPMULLQZrmb: + case X86::VPMULLQZrmbkz: + case X86::VPMULLQZrmkz: + case X86::VPMULLQZrr: + case X86::VPMULLQZrrkz: + return Subtarget.hasMULLQFalseDeps(); // GPR case X86::POPCNT32rm: case X86::POPCNT32rr: @@ -5591,6 +5784,28 @@ void X86InstrInfo::breakPartialRegDependency( .addReg(XReg, RegState::Undef) .addReg(Reg, RegState::ImplicitDefine); MI.addRegisterKilled(Reg, TRI, true); + } else if (X86::VR128XRegClass.contains(Reg)) { + // Only handle VLX targets. + if (!Subtarget.hasVLX()) + return; + // Since vxorps requires AVX512DQ, vpxord should be the best choice. + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VPXORDZ128rr), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MI.addRegisterKilled(Reg, TRI, true); + } else if (X86::VR256XRegClass.contains(Reg) || + X86::VR512RegClass.contains(Reg)) { + // Only handle VLX targets. + if (!Subtarget.hasVLX()) + return; + // Use vpxord to clear the full ymm/zmm register. + // It wants to read and write the xmm sub-register. + Register XReg = TRI->getSubReg(Reg, X86::sub_xmm); + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VPXORDZ128rr), XReg) + .addReg(XReg, RegState::Undef) + .addReg(XReg, RegState::Undef) + .addReg(Reg, RegState::ImplicitDefine); + MI.addRegisterKilled(Reg, TRI, true); } else if (X86::GR64RegClass.contains(Reg)) { // Using XOR32rr because it has shorter encoding and zeros up the upper bits // as well. @@ -6413,6 +6628,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::AVX512_FsFLD0SS: Alignment = Align(4); break; + case X86::FsFLD0SH: case X86::AVX512_FsFLD0SH: Alignment = Align(2); break; @@ -6451,6 +6667,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::AVX512_256_SET0: case X86::AVX512_512_SET0: case X86::AVX512_512_SETALLONES: + case X86::FsFLD0SH: case X86::AVX512_FsFLD0SH: case X86::FsFLD0SD: case X86::AVX512_FsFLD0SD: @@ -6490,7 +6707,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( Ty = Type::getDoubleTy(MF.getFunction().getContext()); else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128) Ty = Type::getFP128Ty(MF.getFunction().getContext()); - else if (Opc == X86::AVX512_FsFLD0SH) + else if (Opc == X86::FsFLD0SH || Opc == X86::AVX512_FsFLD0SH) Ty = Type::getHalfTy(MF.getFunction().getContext()); else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES) Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), @@ -7170,7 +7387,7 @@ bool X86InstrInfo::isSchedulingBoundary(const MachineInstr &MI, // ENDBR instructions should not be scheduled around. unsigned Opcode = MI.getOpcode(); if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32 || - Opcode == X86::LDTILECFG) + Opcode == X86::PLDTILECFGV) return true; return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF); @@ -9298,12 +9515,10 @@ outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo( // We check to see if CFI Instructions are present, and if they are // we find the number of CFI Instructions in the candidates. unsigned CFICount = 0; - MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front(); - for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx(); - Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) { - if (MBBI->isCFIInstruction()) + for (auto &I : make_range(RepeatedSequenceLocs[0].front(), + std::next(RepeatedSequenceLocs[0].back()))) { + if (I.isCFIInstruction()) CFICount++; - MBBI++; } // We compare the number of found CFI Instructions to the number of CFI @@ -9440,7 +9655,7 @@ MachineBasicBlock::iterator X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - const outliner::Candidate &C) const { + outliner::Candidate &C) const { // Is it a tail call? if (C.CallConstructionID == MachineOutlinerTailCall) { // Yes, just insert a JMP. diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 33ce55bbdb2b..4943d2152fd2 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -40,13 +40,21 @@ std::pair getX86ConditionCode(CmpInst::Predicate Predicate); /// Return a cmov opcode for the given register size in bytes, and operand type. unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand = false); -// Turn jCC instruction into condition code. +/// Return the source operand # for condition code by \p MCID. If the +/// instruction doesn't have a condition code, return -1. +int getCondSrcNoFromDesc(const MCInstrDesc &MCID); + +/// Return the condition code of the instruction. If the instruction doesn't +/// have a condition code, return X86::COND_INVALID. +CondCode getCondFromMI(const MachineInstr &MI); + +// Turn JCC instruction into condition code. CondCode getCondFromBranch(const MachineInstr &MI); -// Turn setCC instruction into condition code. +// Turn SETCC instruction into condition code. CondCode getCondFromSETCC(const MachineInstr &MI); -// Turn CMov instruction into condition code. +// Turn CMOV instruction into condition code. CondCode getCondFromCMov(const MachineInstr &MI); /// GetOppositeBranchCondition - Return the inverse of the specified cond, @@ -552,8 +560,10 @@ public: MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - const outliner::Candidate &C) const override; + outliner::Candidate &C) const override; + bool verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const override; #define GET_INSTRINFO_HELPER_DECLS #include "X86GenInstrInfo.inc" diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index fee9939b8dfc..7f6ef3479d40 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -388,17 +388,19 @@ def X86AbsMemAsmOperand : AsmOperandClass { } class X86MemOperand : Operand { + AsmOperandClass parserMatchClass = X86MemAsmOperand, + int size = 0> : Operand { let PrintMethod = printMethod; let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, SEGMENT_REG); let ParserMatchClass = parserMatchClass; let OperandType = "OPERAND_MEMORY"; + int Size = size; } // Gather mem operands class X86VMemOperand - : X86MemOperand { + AsmOperandClass parserMatchClass, int size = 0> + : X86MemOperand { let MIOperandInfo = (ops ptr_rc, i8imm, RC, i32imm, SEGMENT_REG); } @@ -413,48 +415,45 @@ def opaquemem : X86MemOperand<"printMemReference">; def sibmem: X86MemOperand<"printMemReference", X86SibMemOperand>; -def i8mem : X86MemOperand<"printbytemem", X86Mem8AsmOperand>; -def i16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand>; -def i32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>; -def i64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>; -def i128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand>; -def i256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand>; -def i512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand>; -def f16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand>; -def f32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>; -def f64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>; -def f80mem : X86MemOperand<"printtbytemem", X86Mem80AsmOperand>; -def f128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand>; -def f256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand>; -def f512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand>; +def i8mem : X86MemOperand<"printbytemem", X86Mem8AsmOperand, 8>; +def i16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand, 16>; +def i32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand, 32>; +def i64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64>; +def i128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand, 128>; +def i256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand, 256>; +def i512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand, 512>; +def f16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand, 16>; +def f32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand, 32>; +def f64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64>; +def f80mem : X86MemOperand<"printtbytemem", X86Mem80AsmOperand, 80>; +def f128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand, 128>; +def f256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand, 256>; +def f512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand, 512>; // Gather mem operands -def vx64mem : X86VMemOperand; -def vx128mem : X86VMemOperand; -def vx256mem : X86VMemOperand; -def vy128mem : X86VMemOperand; -def vy256mem : X86VMemOperand; - -def vx64xmem : X86VMemOperand; -def vx128xmem : X86VMemOperand; -def vx256xmem : X86VMemOperand; -def vy128xmem : X86VMemOperand; -def vy256xmem : X86VMemOperand; -def vy512xmem : X86VMemOperand; -def vz256mem : X86VMemOperand; -def vz512mem : X86VMemOperand; +def vx64mem : X86VMemOperand; +def vx128mem : X86VMemOperand; +def vx256mem : X86VMemOperand; +def vy128mem : X86VMemOperand; +def vy256mem : X86VMemOperand; + +def vx64xmem : X86VMemOperand; +def vx128xmem : X86VMemOperand; +def vx256xmem : X86VMemOperand; +def vy128xmem : X86VMemOperand; +def vy256xmem : X86VMemOperand; +def vy512xmem : X86VMemOperand; +def vz256mem : X86VMemOperand; +def vz512mem : X86VMemOperand; // A version of i8mem for use on x86-64 and x32 that uses a NOREX GPR instead // of a plain GPR, so that it doesn't potentially require a REX prefix. def ptr_rc_norex : PointerLikeRegClass<2>; def ptr_rc_norex_nosp : PointerLikeRegClass<3>; -def i8mem_NOREX : Operand { - let PrintMethod = "printbytemem"; +def i8mem_NOREX : X86MemOperand<"printbytemem", X86Mem8AsmOperand, 8> { let MIOperandInfo = (ops ptr_rc_norex, i8imm, ptr_rc_norex_nosp, i32imm, SEGMENT_REG); - let ParserMatchClass = X86Mem8AsmOperand; - let OperandType = "OPERAND_MEMORY"; } // GPRs available for tailcall. @@ -840,11 +839,11 @@ def VK16Pair : RegisterOperand { // Define X86-specific addressing mode. def addr : ComplexPattern; def lea32addr : ComplexPattern; // In 64-bit mode 32-bit LEAs can use RIP-relative addressing. def lea64_32addr : ComplexPattern; @@ -855,7 +854,7 @@ def tls32baseaddr : ComplexPattern; def lea64addr : ComplexPattern; def tls64addr : ComplexPattern; -def HasCMov : Predicate<"Subtarget->hasCMov()">; -def NoCMov : Predicate<"!Subtarget->hasCMov()">; +def HasCMOV : Predicate<"Subtarget->canUseCMOV()">; +def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">; def HasMMX : Predicate<"Subtarget->hasMMX()">; -def Has3DNow : Predicate<"Subtarget->has3DNow()">; -def Has3DNowA : Predicate<"Subtarget->has3DNowA()">; +def Has3DNow : Predicate<"Subtarget->hasThreeDNow()">; +def Has3DNowA : Predicate<"Subtarget->hasThreeDNowA()">; def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; def UseSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">; def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; @@ -981,8 +980,8 @@ def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">; def HasRDPID : Predicate<"Subtarget->hasRDPID()">; def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">; def HasINVPCID : Predicate<"Subtarget->hasINVPCID()">; -def HasCmpxchg8b : Predicate<"Subtarget->hasCmpxchg8b()">; -def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; +def HasCX8 : Predicate<"Subtarget->hasCX8()">; +def HasCX16 : Predicate<"Subtarget->hasCX16()">; def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">; def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">; def HasKL : Predicate<"Subtarget->hasKL()">; @@ -996,25 +995,25 @@ def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">; def HasUINTR : Predicate<"Subtarget->hasUINTR()">; def HasCRC32 : Predicate<"Subtarget->hasCRC32()">; def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, - AssemblerPredicate<(all_of (not Mode64Bit)), "Not 64-bit mode">; + AssemblerPredicate<(all_of (not Is64Bit)), "Not 64-bit mode">; def In64BitMode : Predicate<"Subtarget->is64Bit()">, - AssemblerPredicate<(all_of Mode64Bit), "64-bit mode">; + AssemblerPredicate<(all_of Is64Bit), "64-bit mode">; def IsLP64 : Predicate<"Subtarget->isTarget64BitLP64()">; def NotLP64 : Predicate<"!Subtarget->isTarget64BitLP64()">; def In16BitMode : Predicate<"Subtarget->is16Bit()">, - AssemblerPredicate<(all_of Mode16Bit), "16-bit mode">; + AssemblerPredicate<(all_of Is16Bit), "16-bit mode">; def Not16BitMode : Predicate<"!Subtarget->is16Bit()">, - AssemblerPredicate<(all_of (not Mode16Bit)), "Not 16-bit mode">; + AssemblerPredicate<(all_of (not Is16Bit)), "Not 16-bit mode">; def In32BitMode : Predicate<"Subtarget->is32Bit()">, - AssemblerPredicate<(all_of Mode32Bit), "32-bit mode">; + AssemblerPredicate<(all_of Is32Bit), "32-bit mode">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||" "Subtarget->getFrameLowering()->hasFP(*MF)"> { let RecomputePerFunction = 1; } -def IsPS4 : Predicate<"Subtarget->isTargetPS4()">; -def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">; +def IsPS : Predicate<"Subtarget->isTargetPS()">; +def NotPS : Predicate<"!Subtarget->isTargetPS()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">; def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; @@ -2229,13 +2228,13 @@ def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), - "cmpxchg8b\t$dst", []>, TB, Requires<[HasCmpxchg8b]>; + "cmpxchg8b\t$dst", []>, TB, Requires<[HasCX8]>; let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in // NOTE: In64BitMode check needed for the AssemblerPredicate. def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), "cmpxchg16b\t$dst", []>, - TB, Requires<[HasCmpxchg16b,In64BitMode]>; + TB, Requires<[HasCX16,In64BitMode]>; } // SchedRW, mayLoad, mayStore, hasSideEffects @@ -2851,7 +2850,7 @@ let SchedRW = [WriteSystem] in { def TPAUSE : I<0xAE, MRM6r, (outs), (ins GR32orGR64:$src), "tpause\t$src", [(set EFLAGS, (X86tpause GR32orGR64:$src, EDX, EAX))]>, - PD, Requires<[HasWAITPKG]>, NotMemoryFoldable; + PD, Requires<[HasWAITPKG]>; } } // SchedRW @@ -2939,7 +2938,7 @@ def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>; let SchedRW = [WriteSystem] in { let Uses = [EAX, EDX] in def INVLPGB32 : I<0x01, MRM_FE, (outs), (ins), - "invlpgb}", []>, + "invlpgb", []>, PS, Requires<[Not64BitMode]>; let Uses = [RAX, EDX] in def INVLPGB64 : I<0x01, MRM_FE, (outs), (ins), @@ -3124,7 +3123,7 @@ def CLFLUSHOPT : I<0xAE, MRM7m, (outs), (ins i8mem:$src), let Predicates = [HasCLWB], SchedRW = [WriteLoad] in def CLWB : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src", - [(int_x86_clwb addr:$src)]>, PD, NotMemoryFoldable; + [(int_x86_clwb addr:$src)]>, PD; let Predicates = [HasCLDEMOTE], SchedRW = [WriteLoad] in def CLDEMOTE : I<0x1C, MRM0m, (outs), (ins i8mem:$src), "cldemote\t$src", diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index aeecc25ddea2..4196aff240c4 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -211,10 +211,10 @@ def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}", (MMX_MOVQ64rr_REV VR64:$dst, VR64:$src), 0>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in -def MMX_MOVD64from64rm : MMXRI<0x7E, MRMDestMem, +def MMX_MOVD64from64mr : MMXRI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", []>, - Sched<[SchedWriteVecMoveLS.MMX.MR]>; + Sched<[SchedWriteVecMoveLS.MMX.MR]>, NotMemoryFoldable; let SchedRW = [SchedWriteVecMoveLS.MMX.RM] in { let canFoldAsLoad = 1 in diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 035f139e6f33..06cb280e860a 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -112,6 +112,8 @@ multiclass sse12_fp_packed_logical_rm opc, RegisterClass RC, Domain d, // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isPseudo = 1, SchedRW = [WriteZero] in { + def FsFLD0SH : I<0, Pseudo, (outs FR16:$dst), (ins), "", + [(set FR16:$dst, fp16imm0)]>, Requires<[HasSSE2, NoAVX512]>; def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>; def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", @@ -3471,9 +3473,9 @@ defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; -defm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8, +defm PAVGB : PDI_binop_all<0xE0, "pavgb", avgceilu, v16i8, v32i8, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; -defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16, +defm PAVGW : PDI_binop_all<0xE3, "pavgw", avgceilu, v8i16, v16i16, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64, SchedWriteVecIMul, 1, NoVLX>; @@ -3965,6 +3967,20 @@ defm PINSRW : sse2_pinsrw, PD; } // ExeDomain = SSEPackedInt +// Always select FP16 instructions if available. +let Predicates = [UseSSE2], AddedComplexity = -10 in { + def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (PINSRWrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16)>; + def : Pat<(store f16:$src, addr:$dst), (MOV16mr addr:$dst, (EXTRACT_SUBREG (PEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit))>; + def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (PEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit)>; + def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (PINSRWrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16)>; +} + +let Predicates = [HasAVX, NoBWI] in { + def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16)>; + def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit)>; + def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16)>; +} + //===---------------------------------------------------------------------===// // SSE2 - Packed Mask Creation //===---------------------------------------------------------------------===// @@ -3997,7 +4013,10 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), //===---------------------------------------------------------------------===// let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { -let Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in +// As VEX does not have separate instruction contexts for address size +// overrides, VMASKMOVDQU and VMASKMOVDQU64 would have a decode conflict. +// Prefer VMASKMODDQU64. +let Uses = [EDI], Predicates = [HasAVX], isAsmParserOnly = 1 in def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", @@ -4008,32 +4027,16 @@ def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, - VEX, VEX_WIG, AdSize64; -let Uses = [EDI], Predicates = [HasAVX,In64BitMode] in -def VMASKMOVDQUX32 : VPDI<0xF7, MRMSrcReg, (outs), - (ins VR128:$src, VR128:$mask), "", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, - VEX, VEX_WIG, AdSize32 { - let AsmString = "addr32 vmaskmovdqu\t{$mask, $src|$src, $mask}"; - let AsmVariantName = "NonParsable"; -} + VEX, VEX_WIG; -let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in +let Uses = [EDI], Predicates = [UseSSE2] in def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, - AdSize64; -let Uses = [EDI], Predicates = [UseSSE2,In64BitMode] in -def MASKMOVDQUX32 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), - "addr32 maskmovdqu\t{$mask, $src|$src, $mask}", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, - AdSize32 { - let AsmVariantName = "NonParsable"; -} + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; } // ExeDomain = SSEPackedInt @@ -5206,6 +5209,12 @@ let Predicates = [HasAVX, NoBWI] in defm PEXTRW : SS41I_extract16<0x15, "pextrw">; +let Predicates = [UseSSE41] in + def : Pat<(store f16:$src, addr:$dst), (PEXTRWmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>; + +let Predicates = [HasAVX, NoBWI] in + def : Pat<(store f16:$src, addr:$dst), (VPEXTRWmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>; + /// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination multiclass SS41I_extract32 opc, string OpcodeStr> { @@ -7588,6 +7597,21 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { (VPBROADCASTWYrr (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit))))>; + + def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)), + (VPBROADCASTWrm addr:$src)>; + def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)), + (VPBROADCASTWYrm addr:$src)>; + + def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128:$src))), + (VPBROADCASTWrr VR128:$src)>; + def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128:$src))), + (VPBROADCASTWYrr VR128:$src)>; + + def : Pat<(v8f16 (X86VBroadcast (f16 FR16:$src))), + (VPBROADCASTWrr (COPY_TO_REGCLASS FR16:$src, VR128))>; + def : Pat<(v16f16 (X86VBroadcast (f16 FR16:$src))), + (VPBROADCASTWYrr (COPY_TO_REGCLASS FR16:$src, VR128))>; } let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v4i32 (X86VBroadcast GR32:$src)), diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index b4dd99d08a62..3a653a56e534 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -25,18 +25,18 @@ let mayLoad = 1, mayStore = 0, hasSideEffects = 1, isTrap = 1 in { def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; def UD1Wm : I<0xB9, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2), - "ud1{w} {$src2, $src1|$src1, $src2}", []>, TB, OpSize16; + "ud1{w}\t{$src2, $src1|$src1, $src2}", []>, TB, OpSize16; def UD1Lm : I<0xB9, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2), - "ud1{l} {$src2, $src1|$src1, $src2}", []>, TB, OpSize32; + "ud1{l}\t{$src2, $src1|$src1, $src2}", []>, TB, OpSize32; def UD1Qm : RI<0xB9, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2), - "ud1{q} {$src2, $src1|$src1, $src2}", []>, TB; + "ud1{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def UD1Wr : I<0xB9, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2), - "ud1{w} {$src2, $src1|$src1, $src2}", []>, TB, OpSize16; + "ud1{w}\t{$src2, $src1|$src1, $src2}", []>, TB, OpSize16; def UD1Lr : I<0xB9, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2), - "ud1{l} {$src2, $src1|$src1, $src2}", []>, TB, OpSize32; + "ud1{l}\t{$src2, $src1|$src1, $src2}", []>, TB, OpSize32; def UD1Qr : RI<0xB9, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), - "ud1{q} {$src2, $src1|$src1, $src2}", []>, TB; + "ud1{q}\t{$src2, $src1|$src1, $src2}", []>, TB; } let isTerminator = 1 in @@ -71,9 +71,9 @@ def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexitq", []>, TB, } // SchedRW def : Pat<(debugtrap), - (INT3)>, Requires<[NotPS4]>; + (INT3)>, Requires<[NotPS]>; def : Pat<(debugtrap), - (INT (i8 0x41))>, Requires<[IsPS4]>; + (INT (i8 0x41))>, Requires<[IsPS]>; //===----------------------------------------------------------------------===// // Input/Output Instructions. diff --git a/llvm/lib/Target/X86/X86InstrTSX.td b/llvm/lib/Target/X86/X86InstrTSX.td index 28563eeb4484..7671eb4676ee 100644 --- a/llvm/lib/Target/X86/X86InstrTSX.td +++ b/llvm/lib/Target/X86/X86InstrTSX.td @@ -51,6 +51,8 @@ def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm), // HLE prefixes let SchedRW = [WriteSystem] in { +// XACQUIRE and XRELEASE reuse REPNE and REP respectively. +// For now, just prefer the REP versions. let isAsmParserOnly = 1 in { def XACQUIRE_PREFIX : I<0xF2, PrefixByte, (outs), (ins), "xacquire", []>; def XRELEASE_PREFIX : I<0xF3, PrefixByte, (outs), (ins), "xrelease", []>; diff --git a/llvm/lib/Target/X86/X86InstrVecCompiler.td b/llvm/lib/Target/X86/X86InstrVecCompiler.td index 2429aa113fb1..e6ecbb652100 100644 --- a/llvm/lib/Target/X86/X86InstrVecCompiler.td +++ b/llvm/lib/Target/X86/X86InstrVecCompiler.td @@ -17,6 +17,8 @@ let Predicates = [NoAVX512] in { // A vector extract of the first f32/f64 position is a subregister copy + def : Pat<(f16 (extractelt (v8f16 VR128:$src), (iPTR 0))), + (COPY_TO_REGCLASS (v8f16 VR128:$src), FR16)>; def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))), (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>; def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))), @@ -34,8 +36,8 @@ let Predicates = [HasAVX512] in { } let Predicates = [NoVLX] in { - def : Pat<(v8f16 (scalar_to_vector FR16X:$src)), - (COPY_TO_REGCLASS FR16X:$src, VR128)>; + def : Pat<(v8f16 (scalar_to_vector FR16:$src)), + (COPY_TO_REGCLASS FR16:$src, VR128)>; // Implicitly promote a 32-bit scalar to a vector. def : Pat<(v4f32 (scalar_to_vector FR32:$src)), (COPY_TO_REGCLASS FR32:$src, VR128)>; diff --git a/llvm/lib/Target/X86/X86InstrXOP.td b/llvm/lib/Target/X86/X86InstrXOP.td index a5976b7d2d74..d89e481f4522 100644 --- a/llvm/lib/Target/X86/X86InstrXOP.td +++ b/llvm/lib/Target/X86/X86InstrXOP.td @@ -13,11 +13,11 @@ multiclass xop2op opc, string OpcodeStr, Intrinsic Int> { def rr : IXOP, XOP, Sched<[SchedWritePHAdd.XMM]>; + [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWriteVecALU.XMM]>; def rm : IXOP, XOP, - Sched<[SchedWritePHAdd.XMM.Folded, SchedWritePHAdd.XMM.ReadAfterFold]>; + Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>; } let ExeDomain = SSEPackedInt in { diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp index 28d57ca9ae3c..ff701159b95e 100644 --- a/llvm/lib/Target/X86/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp @@ -21,7 +21,6 @@ #include "X86TargetMachine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -31,6 +30,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBank.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DataLayout.h" @@ -179,6 +179,8 @@ X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const { return &X86::GR64RegClass; } if (RB.getID() == X86::VECRRegBankID) { + if (Ty.getSizeInBits() == 16) + return STI.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass; if (Ty.getSizeInBits() == 32) return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; if (Ty.getSizeInBits() == 64) @@ -516,7 +518,7 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, // is already on the instruction we're mutating, and thus we don't need to // make any changes. So long as we select an opcode which is capable of // loading or storing the appropriate size atomically, the rest of the - // backend is required to respect the MMO state. + // backend is required to respect the MMO state. if (!MemOp.isUnordered()) { LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n"); return false; @@ -537,12 +539,12 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, I.setDesc(TII.get(NewOpc)); MachineInstrBuilder MIB(MF, I); if (Opc == TargetOpcode::G_LOAD) { - I.RemoveOperand(1); + I.removeOperand(1); addFullAddress(MIB, AM); } else { // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL) - I.RemoveOperand(1); - I.RemoveOperand(0); + I.removeOperand(1); + I.removeOperand(0); addFullAddress(MIB, AM).addUse(DefReg); } return constrainSelectedInstRegOperands(I, TII, TRI, RBI); @@ -625,7 +627,7 @@ bool X86InstructionSelector::selectGlobalValue(MachineInstr &I, I.setDesc(TII.get(NewOpc)); MachineInstrBuilder MIB(MF, I); - I.RemoveOperand(1); + I.removeOperand(1); addFullAddress(MIB, AM); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); @@ -1412,7 +1414,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I, MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, - MF.getDataLayout().getPointerSize(), Alignment); + LLT::pointer(0, MF.getDataLayout().getPointerSizeInBits()), Alignment); LoadInst = addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 1edec96bbec3..3c8be95b43e3 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -371,8 +371,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(avx2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0), - X86_INTRINSIC_DATA(avx2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0), + X86_INTRINSIC_DATA(avx2_pavg_b, INTR_TYPE_2OP, ISD::AVGCEILU, 0), + X86_INTRINSIC_DATA(avx2_pavg_w, INTR_TYPE_2OP, ISD::AVGCEILU, 0), X86_INTRINSIC_DATA(avx2_pblendvb, BLENDV, X86ISD::BLENDV, 0), X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0), @@ -818,8 +818,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0), X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(avx512_pavg_b_512, INTR_TYPE_2OP, X86ISD::AVG, 0), - X86_INTRINSIC_DATA(avx512_pavg_w_512, INTR_TYPE_2OP, X86ISD::AVG, 0), + X86_INTRINSIC_DATA(avx512_pavg_b_512, INTR_TYPE_2OP, ISD::AVGCEILU, 0), + X86_INTRINSIC_DATA(avx512_pavg_w_512, INTR_TYPE_2OP, ISD::AVGCEILU, 0), X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0), @@ -1281,8 +1281,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(sse2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0), - X86_INTRINSIC_DATA(sse2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0), + X86_INTRINSIC_DATA(sse2_pavg_b, INTR_TYPE_2OP, ISD::AVGCEILU, 0), + X86_INTRINSIC_DATA(sse2_pavg_w, INTR_TYPE_2OP, ISD::AVGCEILU, 0), X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0), X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0), X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0), diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp index 4710e524931c..23976fb1a142 100644 --- a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp @@ -558,7 +558,7 @@ int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes( } // Find and eliminate gadget edges that have been mitigated. - int MitigatedGadgets = 0, RemainingGadgets = 0; + int RemainingGadgets = 0; NodeSet ReachableNodes{G}; for (const Node &RootN : G.nodes()) { if (llvm::none_of(RootN.edges(), MachineGadgetGraph::isGadgetEdge)) @@ -586,7 +586,6 @@ int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes( // This gadget's sink is reachable ++RemainingGadgets; } else { // This gadget's sink is unreachable, and therefore mitigated - ++MitigatedGadgets; ElimEdges.insert(E); } } diff --git a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp index 6b564a0356a6..70964b352b8c 100644 --- a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp index 6206d8efb3d0..540182cb7911 100644 --- a/llvm/lib/Target/X86/X86LowerAMXType.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp @@ -74,6 +74,24 @@ static bool isAMXCast(Instruction *II) { match(II, m_Intrinsic(m_Value())); } +static bool isAMXIntrinsic(Value *I) { + auto *II = dyn_cast(I); + if (!II) + return false; + if (isAMXCast(II)) + return false; + // Check if return type or parameter is x86_amx. If it is x86_amx + // the intrinsic must be x86 amx intrinsics. + if (II->getType()->isX86_AMXTy()) + return true; + for (Value *V : II->args()) { + if (V->getType()->isX86_AMXTy()) + return true; + } + + return false; +} + static AllocaInst *createAllocaInstAtEntry(IRBuilder<> &Builder, BasicBlock *BB, Type *Ty) { Function &F = *BB->getParent(); @@ -162,6 +180,36 @@ static std::pair getShape(IntrinsicInst *II, unsigned OpNo) { return std::make_pair(Row, Col); } +static std::pair getShape(PHINode *Phi) { + Use &U = *(Phi->use_begin()); + unsigned OpNo = U.getOperandNo(); + User *V = U.getUser(); + // TODO We don't traverse all users. To make the algorithm simple, here we + // just traverse the first user. If we can find shape, then return the shape, + // otherwise just return nullptr and the optimization for undef/zero will be + // abandoned. + while (V) { + if (isAMXCast(dyn_cast(V))) { + if (V->use_empty()) + break; + Use &U = *(V->use_begin()); + OpNo = U.getOperandNo(); + V = U.getUser(); + } else if (isAMXIntrinsic(V)) { + return getShape(cast(V), OpNo); + } else if (isa(V)) { + if (V->use_empty()) + break; + Use &U = *(V->use_begin()); + V = U.getUser(); + } else { + break; + } + } + + return std::make_pair(nullptr, nullptr); +} + namespace { class X86LowerAMXType { Function &Func; @@ -655,6 +703,9 @@ class X86LowerAMXCast { public: X86LowerAMXCast(Function &F) : Func(F) {} + void combineCastStore(IntrinsicInst *Cast, StoreInst *ST); + void combineLoadCast(IntrinsicInst *Cast, LoadInst *LD); + bool combineLdSt(SmallVectorImpl &Casts); bool combineAMXcast(TargetLibraryInfo *TLI); bool transformAMXCast(IntrinsicInst *AMXCast); bool transformAllAMXCast(); @@ -720,11 +771,33 @@ bool X86LowerAMXCast::optimizeAMXCastFromPhi( OldPhiNodes.insert(PN); while (!PhiWorklist.empty()) { auto *OldPN = PhiWorklist.pop_back_val(); - for (Value *IncValue : OldPN->incoming_values()) { + for (unsigned I = 0; I < OldPN->getNumOperands(); ++I) { + Value *IncValue = OldPN->getIncomingValue(I); // TODO: currently, We ignore cases where it is a const. In the future, we // might support const. - if (isa(IncValue)) - return false; + if (isa(IncValue)) { + auto *IncConst = dyn_cast(IncValue); + if (!isa(IncValue) && !IncConst->isZeroValue()) + return false; + Value *Row = nullptr, *Col = nullptr; + std::tie(Row, Col) = getShape(OldPN); + // TODO: If it is not constant the Row and Col must domoniate tilezero + // that we are going to create. + if (!Row || !Col || !isa(Row) || !isa(Col)) + return false; + // Create tilezero at the end of incoming block. + auto *Block = OldPN->getIncomingBlock(I); + BasicBlock::iterator Iter = Block->getTerminator()->getIterator(); + Instruction *NewInst = Builder.CreateIntrinsic( + Intrinsic::x86_tilezero_internal, None, {Row, Col}); + NewInst->moveBefore(&*Iter); + NewInst = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, + {IncValue->getType()}, {NewInst}); + NewInst->moveBefore(&*Iter); + // Replace InValue with new Value. + OldPN->setIncomingValue(I, NewInst); + IncValue = NewInst; + } if (auto *PNode = dyn_cast(IncValue)) { if (OldPhiNodes.insert(PNode)) @@ -838,6 +911,99 @@ bool X86LowerAMXCast::optimizeAMXCastFromPhi( return true; } +// %43 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %42) +// store <256 x i32> %43, <256 x i32>* %p, align 64 +// --> +// call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %p, +// i64 64, x86_amx %42) +void X86LowerAMXCast::combineCastStore(IntrinsicInst *Cast, StoreInst *ST) { + Value *Tile = Cast->getOperand(0); + // TODO: If it is cast intrinsic or phi node, we can propagate the + // shape information through def-use chain. + if (!isAMXIntrinsic(Tile)) + return; + auto *II = cast(Tile); + // Tile is output from AMX intrinsic. The first operand of the + // intrinsic is row, the second operand of the intrinsic is column. + Value *Row = II->getOperand(0); + Value *Col = II->getOperand(1); + IRBuilder<> Builder(ST); + // Use the maximum column as stride. It must be the same with load + // stride. + Value *Stride = Builder.getInt64(64); + Value *I8Ptr = + Builder.CreateBitCast(ST->getOperand(1), Builder.getInt8PtrTy()); + std::array Args = {Row, Col, I8Ptr, Stride, Tile}; + Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, None, Args); +} + +// %65 = load <256 x i32>, <256 x i32>* %p, align 64 +// %66 = call x86_amx @llvm.x86.cast.vector.to.tile(<256 x i32> %65) +// --> +// %66 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, +// i8* %p, i64 64) +void X86LowerAMXCast::combineLoadCast(IntrinsicInst *Cast, LoadInst *LD) { + Value *Row = nullptr, *Col = nullptr; + Use &U = *(Cast->use_begin()); + unsigned OpNo = U.getOperandNo(); + auto *II = cast(U.getUser()); + // TODO: If it is cast intrinsic or phi node, we can propagate the + // shape information through def-use chain. + if (!isAMXIntrinsic(II)) + return; + std::tie(Row, Col) = getShape(II, OpNo); + IRBuilder<> Builder(LD); + // Use the maximun column as stride. + Value *Stride = Builder.getInt64(64); + Value *I8Ptr = + Builder.CreateBitCast(LD->getOperand(0), Builder.getInt8PtrTy()); + std::array Args = {Row, Col, I8Ptr, Stride}; + + Value *NewInst = + Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, None, Args); + Cast->replaceAllUsesWith(NewInst); +} + +bool X86LowerAMXCast::combineLdSt(SmallVectorImpl &Casts) { + bool Change = false; + for (auto *Cast : Casts) { + auto *II = cast(Cast); + // %43 = call <256 x i32> @llvm.x86.cast.tile.to.vector(x86_amx %42) + // store <256 x i32> %43, <256 x i32>* %p, align 64 + // --> + // call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %p, + // i64 64, x86_amx %42) + if (II->getIntrinsicID() == Intrinsic::x86_cast_tile_to_vector) { + SmallVector DeadStores; + for (User *U : Cast->users()) { + StoreInst *Store = dyn_cast(U); + if (!Store) + continue; + combineCastStore(cast(Cast), Store); + DeadStores.push_back(Store); + Change = true; + } + for (auto *Store : DeadStores) + Store->eraseFromParent(); + } else { // x86_cast_vector_to_tile + SmallVector DeadLoads; + auto *Load = dyn_cast(Cast->getOperand(0)); + if (!Load || !Load->hasOneUse()) + continue; + // %65 = load <256 x i32>, <256 x i32>* %p, align 64 + // %66 = call x86_amx @llvm.x86.cast.vector.to.tile(<256 x i32> %65) + // --> + // %66 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, + // i8* %p, i64 64) + combineLoadCast(cast(Cast), Load); + // Set the operand is null so that load instruction can be erased. + Cast->setOperand(0, nullptr); + Load->eraseFromParent(); + } + } + return Change; +} + bool X86LowerAMXCast::combineAMXcast(TargetLibraryInfo *TLI) { bool Change = false; // Collect tile cast instruction. @@ -879,17 +1045,22 @@ bool X86LowerAMXCast::combineAMXcast(TargetLibraryInfo *TLI) { Convert(Vec2TileInsts, Intrinsic::x86_cast_tile_to_vector); Convert(Tile2VecInsts, Intrinsic::x86_cast_vector_to_tile); + SmallVector LiveCasts; auto EraseInst = [&](SmallVectorImpl &Insts) { for (auto *Inst : Insts) { if (Inst->use_empty()) { Inst->eraseFromParent(); Change = true; + } else { + LiveCasts.push_back(Inst); } } }; EraseInst(Vec2TileInsts); EraseInst(Tile2VecInsts); + Change |= combineLdSt(LiveCasts); + EraseInst(LiveCasts); // Handle the A->B->A cast, and there is an intervening PHI node. for (BasicBlock &BB : Func) { @@ -947,6 +1118,10 @@ bool X86LowerAMXCast::transformAMXCast(IntrinsicInst *AMXCast) { // i64 60) // call void @llvm.x86.tilestored64.internal(i16 15, i16 60, // i8* %addr3, i64 60, x86_amx %2) + if (AMXCast->use_empty()) { + AMXCast->eraseFromParent(); + return true; + } Use &U = *(AMXCast->use_begin()); unsigned OpNo = U.getOperandNo(); auto *II = dyn_cast(U.getUser()); diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 9044f10ec630..b107de692365 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -501,7 +501,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { for (const MachineOperand &MO : MI->operands()) if (auto MaybeMCOp = LowerMachineOperand(MI, MO)) - OutMI.addOperand(MaybeMCOp.getValue()); + OutMI.addOperand(*MaybeMCOp); // Handle a few special cases to eliminate operand modifiers. switch (OutMI.getOpcode()) { @@ -962,6 +962,12 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { // These are not truly commutable so hide them from the default case. break; + case X86::MASKMOVDQU: + case X86::VMASKMOVDQU: + if (AsmPrinter.getSubtarget().is64Bit()) + OutMI.setFlags(X86::IP_HAS_AD_SIZE); + break; + default: { // If the instruction is a commutable arithmetic instruction we might be // able to commute the operands to get a 2 byte VEX prefix. @@ -1311,7 +1317,7 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, E = FaultingMI.operands_end(); I != E; ++I) if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I)) - MI.addOperand(MaybeOperand.getValue()); + MI.addOperand(*MaybeOperand); OutStreamer->AddComment("on-fault: " + HandlerLabel->getName()); OutStreamer->emitInstruction(MI, getSubtargetInfo()); @@ -1347,11 +1353,12 @@ void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) { AccessInfo.CompileKernel, &ShadowBase, &MappingScale, &OrShadowOffset); - std::string Name = AccessInfo.IsWrite ? "store" : "load"; - std::string Op = OrShadowOffset ? "or" : "add"; - std::string SymName = "__asan_check_" + Name + "_" + Op + "_" + - utostr(1ULL << AccessInfo.AccessSizeIndex) + "_" + - TM.getMCRegisterInfo()->getName(Reg.asMCReg()); + StringRef Name = AccessInfo.IsWrite ? "store" : "load"; + StringRef Op = OrShadowOffset ? "or" : "add"; + std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" + + Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" + + TM.getMCRegisterInfo()->getName(Reg.asMCReg())) + .str(); if (OrShadowOffset) report_fatal_error( "OrShadowOffset is not supported with optimized callbacks"); @@ -1375,7 +1382,7 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, MCI.setOpcode(Opcode); for (auto &MO : drop_begin(MI.operands(), 2)) if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) - MCI.addOperand(MaybeOperand.getValue()); + MCI.addOperand(*MaybeOperand); SmallString<256> Code; SmallVector Fixups; @@ -1751,7 +1758,7 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, Ret.setOpcode(OpCode); for (auto &MO : drop_begin(MI.operands())) if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) - Ret.addOperand(MaybeOperand.getValue()); + Ret.addOperand(*MaybeOperand); OutStreamer->emitInstruction(Ret, getSubtargetInfo()); emitX86Nops(*OutStreamer, 10, Subtarget); recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2); @@ -1790,7 +1797,7 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, OutStreamer->AddComment("TAILCALL"); for (auto &MO : drop_begin(MI.operands())) if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) - TC.addOperand(MaybeOperand.getValue()); + TC.addOperand(*MaybeOperand); OutStreamer->emitInstruction(TC, getSubtargetInfo()); } @@ -1985,34 +1992,34 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { // Otherwise, use the .seh_ directives for all other Windows platforms. switch (MI->getOpcode()) { case X86::SEH_PushReg: - OutStreamer->EmitWinCFIPushReg(MI->getOperand(0).getImm()); + OutStreamer->emitWinCFIPushReg(MI->getOperand(0).getImm()); break; case X86::SEH_SaveReg: - OutStreamer->EmitWinCFISaveReg(MI->getOperand(0).getImm(), + OutStreamer->emitWinCFISaveReg(MI->getOperand(0).getImm(), MI->getOperand(1).getImm()); break; case X86::SEH_SaveXMM: - OutStreamer->EmitWinCFISaveXMM(MI->getOperand(0).getImm(), + OutStreamer->emitWinCFISaveXMM(MI->getOperand(0).getImm(), MI->getOperand(1).getImm()); break; case X86::SEH_StackAlloc: - OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm()); + OutStreamer->emitWinCFIAllocStack(MI->getOperand(0).getImm()); break; case X86::SEH_SetFrame: - OutStreamer->EmitWinCFISetFrame(MI->getOperand(0).getImm(), + OutStreamer->emitWinCFISetFrame(MI->getOperand(0).getImm(), MI->getOperand(1).getImm()); break; case X86::SEH_PushFrame: - OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm()); + OutStreamer->emitWinCFIPushFrame(MI->getOperand(0).getImm()); break; case X86::SEH_EndPrologue: - OutStreamer->EmitWinCFIEndProlog(); + OutStreamer->emitWinCFIEndProlog(); break; default: diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp b/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp index 05f846bfb219..2e88e01ce7fd 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp @@ -13,6 +13,13 @@ using namespace llvm; +MachineFunctionInfo *X86MachineFunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const { + return DestMF.cloneInfo(*this); +} + void X86MachineFunctionInfo::anchor() { } void X86MachineFunctionInfo::setRestoreBasePointer(const MachineFunction *MF) { diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index 99d1a97380dd..99cc9f525b2c 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -119,7 +119,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { Optional SwiftAsyncContextFrameIdx; - ValueMap PreallocatedIds; + // Preallocated fields are only used during isel. + // FIXME: Can we find somewhere else to store these? + DenseMap PreallocatedIds; SmallVector PreallocatedStackSizes; SmallVector, 0> PreallocatedArgOffsets; @@ -132,6 +134,12 @@ public: X86MachineFunctionInfo() = default; explicit X86MachineFunctionInfo(MachineFunction &MF) {} + explicit X86MachineFunctionInfo(const X86MachineFunctionInfo &) = default; + + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } diff --git a/llvm/lib/Target/X86/X86MacroFusion.cpp b/llvm/lib/Target/X86/X86MacroFusion.cpp index 425054cfdd92..aa6e8645e092 100644 --- a/llvm/lib/Target/X86/X86MacroFusion.cpp +++ b/llvm/lib/Target/X86/X86MacroFusion.cpp @@ -15,6 +15,7 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "X86Subtarget.h" #include "llvm/CodeGen/MacroFusion.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/CodeGen/TargetInstrInfo.h" using namespace llvm; diff --git a/llvm/lib/Target/X86/X86PadShortFunction.cpp b/llvm/lib/Target/X86/X86PadShortFunction.cpp index e92b1b002bb0..bb59cee8badb 100644 --- a/llvm/lib/Target/X86/X86PadShortFunction.cpp +++ b/llvm/lib/Target/X86/X86PadShortFunction.cpp @@ -37,21 +37,20 @@ STATISTIC(NumBBsPadded, "Number of basic blocks padded"); namespace { struct VisitedBBInfo { // HasReturn - Whether the BB contains a return instruction - bool HasReturn; + bool HasReturn = false; // Cycles - Number of cycles until return if HasReturn is true, otherwise // number of cycles until end of the BB - unsigned int Cycles; + unsigned int Cycles = 0; - VisitedBBInfo() : HasReturn(false), Cycles(0) {} + VisitedBBInfo() = default; VisitedBBInfo(bool HasReturn, unsigned int Cycles) : HasReturn(HasReturn), Cycles(Cycles) {} }; struct PadShortFunc : public MachineFunctionPass { static char ID; - PadShortFunc() : MachineFunctionPass(ID) - , Threshold(4) {} + PadShortFunc() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -82,7 +81,7 @@ namespace { MachineBasicBlock::iterator &MBBI, unsigned int NOOPsToAdd); - const unsigned int Threshold; + const unsigned int Threshold = 4; // ReturnBBs - Maps basic blocks that return to the minimum number of // cycles until the return, starting from the entry block. diff --git a/llvm/lib/Target/X86/X86PartialReduction.cpp b/llvm/lib/Target/X86/X86PartialReduction.cpp index 4342ac089cae..7761f7323358 100644 --- a/llvm/lib/Target/X86/X86PartialReduction.cpp +++ b/llvm/lib/Target/X86/X86PartialReduction.cpp @@ -19,8 +19,10 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" #include "llvm/Support/KnownBits.h" @@ -220,16 +222,21 @@ bool X86PartialReduction::trySADReplacement(Instruction *Op) { if (!cast(Op->getType())->getElementType()->isIntegerTy(32)) return false; - // Operand should be a select. - auto *SI = dyn_cast(Op); - if (!SI) - return false; - - // Select needs to implement absolute value. - Value *LHS, *RHS; - auto SPR = matchSelectPattern(SI, LHS, RHS); - if (SPR.Flavor != SPF_ABS) - return false; + Value *LHS; + if (match(Op, PatternMatch::m_Intrinsic())) { + LHS = Op->getOperand(0); + } else { + // Operand should be a select. + auto *SI = dyn_cast(Op); + if (!SI) + return false; + + Value *RHS; + // Select needs to implement absolute value. + auto SPR = matchSelectPattern(SI, LHS, RHS); + if (SPR.Flavor != SPF_ABS) + return false; + } // Need a subtract of two values. auto *Sub = dyn_cast(LHS); @@ -253,7 +260,7 @@ bool X86PartialReduction::trySADReplacement(Instruction *Op) { if (!Op0 || !Op1) return false; - IRBuilder<> Builder(SI); + IRBuilder<> Builder(Op); auto *OpTy = cast(Op->getType()); unsigned NumElts = OpTy->getNumElements(); @@ -271,7 +278,7 @@ bool X86PartialReduction::trySADReplacement(Instruction *Op) { IntrinsicNumElts = 16; } - Function *PSADBWFn = Intrinsic::getDeclaration(SI->getModule(), IID); + Function *PSADBWFn = Intrinsic::getDeclaration(Op->getModule(), IID); if (NumElts < 16) { // Pad input with zeroes. @@ -336,8 +343,8 @@ bool X86PartialReduction::trySADReplacement(Instruction *Op) { Ops[0] = Builder.CreateShuffleVector(Ops[0], Zero, ConcatMask); } - SI->replaceAllUsesWith(Ops[0]); - SI->eraseFromParent(); + Op->replaceAllUsesWith(Ops[0]); + Op->eraseFromParent(); return true; } diff --git a/llvm/lib/Target/X86/X86PreAMXConfig.cpp b/llvm/lib/Target/X86/X86PreAMXConfig.cpp index d9c6d08ada73..cd0d448238a6 100644 --- a/llvm/lib/Target/X86/X86PreAMXConfig.cpp +++ b/llvm/lib/Target/X86/X86PreAMXConfig.cpp @@ -91,16 +91,17 @@ static bool brokenVolatile(Instruction *I) { namespace { class X86PreAMXConfig { + using PosAndShapesMap = MapVector>; + Function &F; public: X86PreAMXConfig(Function &Func) : F(Func) {} bool preTileConfig(); - bool addTileConfig(Instruction *ModelStart, SmallVector &Shapes); - bool findConfigShapes( - DenseMap> &PosAndShapes); + void addTileConfig(Instruction *ModelStart, SmallVector &Shapes); + bool findConfigShapes(PosAndShapesMap &PosAndShapes); bool getKeyAMXShapes(IntrinsicInst *KeyAMX, SmallVector &Shapes); - bool preWriteTileCfg(Value *I8Ptr, Instruction *Pos, + void preWriteTileCfg(Value *I8Ptr, IRBuilderBase &Builder, SmallVector &Shapes); BasicBlock::iterator getShapesAndConfigPosEnd(BasicBlock::iterator Iter, @@ -149,10 +150,9 @@ public: // %td = tail call x86_amx @llvm.x86.tdpbssd.internal(m, n, k, t1, t2, t3) // call void @llvm.x86.tilestored64.internal(... td) area // -------------------------------------------------------------------------- -bool X86PreAMXConfig::preWriteTileCfg(Value *I8Ptr, Instruction *Pos, +void X86PreAMXConfig::preWriteTileCfg(Value *I8Ptr, IRBuilderBase &Builder, SmallVector &Shapes) { - bool Write = false; - LLVMContext &Ctx = Pos->getParent()->getContext(); + LLVMContext &Ctx = Builder.getContext(); Type *I8Ty = Type::getInt8Ty(Ctx); Type *I16Ty = Type::getInt16Ty(Ctx); @@ -160,30 +160,27 @@ bool X86PreAMXConfig::preWriteTileCfg(Value *I8Ptr, Instruction *Pos, // other value in the future. Value *PaletteOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 0); Value *PaletteValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1); - Value *PalettePos = - GetElementPtrInst::Create(I8Ty, I8Ptr, PaletteOffset, "", Pos); - new StoreInst(PaletteValue, PalettePos, Pos); + Value *PalettePos = Builder.CreateGEP(I8Ty, I8Ptr, PaletteOffset); + Builder.CreateStore(PaletteValue, PalettePos); for (int I = 0, E = Shapes.size() / 2; I < E; I++) { Value *RowOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 48 + I); Value *ColOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 16 + I * 2); const std::string ShapeName = "amx.tmm." + itostr(I); - Value *RowPos = GetElementPtrInst::Create(I8Ty, I8Ptr, RowOffset, - ShapeName + ".shape.row", Pos); - Value *ColPos = GetElementPtrInst::Create(I8Ty, I8Ptr, ColOffset, "", Pos); - ColPos = new BitCastInst(ColPos, PointerType::get(I16Ty, 0), - ShapeName + ".shape.col", Pos); + Value *RowPos = Builder.CreateGEP(I8Ty, I8Ptr, RowOffset, + ShapeName + ".shape.row"); + Value *ColPos = Builder.CreateGEP(I8Ty, I8Ptr, ColOffset); + ColPos = Builder.CreateBitCast(ColPos, PointerType::get(I16Ty, 0), + ShapeName + ".shape.col"); Value *Row = Shapes[I * 2]; Value *Col = Shapes[I * 2 + 1]; - Row = new TruncInst(Row, I8Ty, "", Pos); - new StoreInst(Row, RowPos, Pos); - new StoreInst(Col, ColPos, Pos); - Write = true; + Row = Builder.CreateTrunc(Row, I8Ty); + Builder.CreateStore(Row, RowPos); + Builder.CreateStore(Col, ColPos); } - return Write; } -bool X86PreAMXConfig::addTileConfig(Instruction *ModelStart, +void X86PreAMXConfig::addTileConfig(Instruction *ModelStart, SmallVector &Shapes) { Module *M = F.getParent(); IRBuilder<> Builder(ModelStart); @@ -198,17 +195,11 @@ bool X86PreAMXConfig::addTileConfig(Instruction *ModelStart, Addr->setAlignment(Alignment); Value *I8Ptr = Builder.CreateBitCast(Addr, Builder.getInt8PtrTy()); - std::array Args = {I8Ptr}; - Instruction *Cfg = - Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, None, Args); - - Value *Val0 = Constant::getNullValue(V512Ty); - Instruction *Init0 = new StoreInst(Val0, Addr, false, Alignment, Cfg); - assert(Init0 && "Not Zero initilizate the cfg mem!"); + Builder.CreateAlignedStore(Constant::getNullValue(V512Ty), Addr, Alignment); - preWriteTileCfg(I8Ptr, Cfg, Shapes); + preWriteTileCfg(I8Ptr, Builder, Shapes); - return Init0; + Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, None, {I8Ptr}); } // Todo: We may need to handle "more than one store" case in the future. @@ -315,8 +306,7 @@ X86PreAMXConfig::getShapesAndConfigPosEnd(BasicBlock::iterator Iter, // %td = call x86_amx @llvm.x86.tdpbssd.internal(...t1, t2, t3) (m,k)(k,n) // call void @llvm.x86.tilestored64.internal(m, n,... td) (m,n)(m,n) // -------------------------------------------------------------------------- -bool X86PreAMXConfig::findConfigShapes( - DenseMap> &PosAndShapes) { +bool X86PreAMXConfig::findConfigShapes(PosAndShapesMap &PosAndShapes) { bool Find = false; for (BasicBlock &BB : F) { for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { @@ -365,7 +355,7 @@ bool X86PreAMXConfig::findConfigShapes( // call void @llvm.x86.tilestored64.internal(... td) area // -------------------------------------------------------------------------- bool X86PreAMXConfig::preTileConfig() { - DenseMap> PosAndShapes; + PosAndShapesMap PosAndShapes; bool NeedCfg = findConfigShapes(PosAndShapes); if (!NeedCfg) return false; diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp index 5d21f8666ec6..479db8585ca0 100644 --- a/llvm/lib/Target/X86/X86PreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -40,10 +41,15 @@ using namespace llvm; #define DEBUG_TYPE "tile-pre-config" -#define REPORT_CONFIG_FAIL \ - report_fatal_error( \ - MF.getName() + \ - ": Failed to config tile register, please define the shape earlier"); + +static void emitErrorMsg(MachineFunction &MF) { + SmallString<32> Str; + Twine ErrorMsg = + MF.getName() + + ": Failed to config tile register, please define the shape earlier"; + LLVMContext &Context = MF.getMMI().getModule()->getContext(); + Context.emitError(ErrorMsg); +} namespace { @@ -302,12 +308,19 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) { SmallVector WorkList; for (auto &I : ShapeBBs) { // TODO: We can hoist shapes across BBs here. - if (BBVisitedInfo[I.first].HasAMXRegLiveIn) - REPORT_CONFIG_FAIL + if (BBVisitedInfo[I.first].HasAMXRegLiveIn) { + // We are not able to config tile registers since the shape to config + // is not defined yet. Emit error message and continue. The function + // would not config tile registers. + emitErrorMsg(MF); + return false; + } if (BBVisitedInfo[I.first].FirstAMX && BBVisitedInfo[I.first].FirstAMX < I.second.back() && - !hoistShapesInBB(I.first, I.second)) - REPORT_CONFIG_FAIL + !hoistShapesInBB(I.first, I.second)) { + emitErrorMsg(MF); + return false; + } WorkList.push_back(I.first); } while (!WorkList.empty()) { @@ -356,7 +369,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) { // multi insert. if (VisitedOrInserted.insert(I).second) { auto II = I.MI ? I.MI->getIterator() : I.MBB->instr_begin(); - addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::LDTILECFG)), + addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::PLDTILECFGV)), SS); } } @@ -367,33 +380,27 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = &*MBB.begin(); if (ST.hasAVX512()) { Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass); - BuildMI(MBB, MI, DL, TII->get(X86::VPXORDZrr), Zmm) - .addReg(Zmm, RegState::Undef) - .addReg(Zmm, RegState::Undef); + BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm); addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), SS) .addReg(Zmm); } else if (ST.hasAVX2()) { Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass); - BuildMI(MBB, MI, DL, TII->get(X86::VPXORYrr), Ymm) - .addReg(Ymm, RegState::Undef) - .addReg(Ymm, RegState::Undef); + BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm); addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS) .addReg(Ymm); addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS, 32) .addReg(Ymm); } else { assert(ST.hasSSE2() && "AMX should assume SSE2 enabled"); + unsigned StoreOpc = ST.hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass); - BuildMI(MBB, MI, DL, TII->get(X86::PXORrr), Xmm) - .addReg(Xmm, RegState::Undef) - .addReg(Xmm, RegState::Undef); - addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS) - .addReg(Xmm); - addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 16) + BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS).addReg(Xmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS, 16) .addReg(Xmm); - addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 32) + addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS, 32) .addReg(Xmm); - addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 48) + addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS, 48) .addReg(Xmm); } // Fill in the palette first. diff --git a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp index 9c076d2d6769..c49fc458eab3 100644 --- a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp @@ -12,9 +12,9 @@ #include "X86RegisterBankInfo.h" #include "X86InstrInfo.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #define GET_TARGET_REGBANK_IMPL @@ -25,8 +25,7 @@ using namespace llvm; #define GET_TARGET_REGBANK_INFO_IMPL #include "X86GenRegisterBankInfo.def" -X86RegisterBankInfo::X86RegisterBankInfo(const TargetRegisterInfo &TRI) - : X86GenRegisterBankInfo() { +X86RegisterBankInfo::X86RegisterBankInfo(const TargetRegisterInfo &TRI) { // validate RegBank initialization. const RegisterBank &RBGPR = getRegBank(X86::GPRRegBankID); diff --git a/llvm/lib/Target/X86/X86RegisterBankInfo.h b/llvm/lib/Target/X86/X86RegisterBankInfo.h index d5afd2cae761..fca36a317b58 100644 --- a/llvm/lib/Target/X86/X86RegisterBankInfo.h +++ b/llvm/lib/Target/X86/X86RegisterBankInfo.h @@ -13,7 +13,7 @@ #ifndef LLVM_LIB_TARGET_X86_X86REGISTERBANKINFO_H #define LLVM_LIB_TARGET_X86_X86REGISTERBANKINFO_H -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #define GET_REGBANK_DECLARATIONS #include "X86GenRegisterBank.inc" diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 130cb61cdde2..f2658f70434b 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -26,6 +26,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TileShapeInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" @@ -618,6 +620,66 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF, + MCRegister Reg) const { + const X86Subtarget &ST = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); + auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) { + return TRI.isSuperOrSubRegisterEq(RegA, RegB); + }; + + if (!ST.is64Bit()) + return llvm::any_of( + SmallVector{X86::EAX, X86::ECX, X86::EDX}, + [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) || + (ST.hasMMX() && X86::VR64RegClass.contains(Reg)); + + CallingConv::ID CC = MF.getFunction().getCallingConv(); + + if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg)) + return true; + + if (llvm::any_of( + SmallVector{X86::RDX, X86::RCX, X86::R8, X86::R9}, + [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) + return true; + + if (CC != CallingConv::Win64 && + llvm::any_of(SmallVector{X86::RDI, X86::RSI}, + [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) + return true; + + if (ST.hasSSE1() && + llvm::any_of(SmallVector{X86::XMM0, X86::XMM1, X86::XMM2, + X86::XMM3, X86::XMM4, X86::XMM5, + X86::XMM6, X86::XMM7}, + [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) + return true; + + return X86GenRegisterInfo::isArgumentRegister(MF, Reg); +} + +bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF, + MCRegister PhysReg) const { + const X86Subtarget &ST = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); + + // Stack pointer. + if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg)) + return true; + + // Don't use the frame pointer if it's being used. + const X86FrameLowering &TFI = *getFrameLowering(MF); + if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg)) + return true; + + return X86GenRegisterInfo::isFixedRegister(MF, PhysReg); +} + +bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const { + return RC->getID() == X86::TILERegClassID; +} + void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { // Check if the EFLAGS register is marked as live-out. This shouldn't happen, // because the calling convention defines the EFLAGS register as NOT diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h index 7fd10ddd1a15..6f4fb405d29f 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -115,6 +115,18 @@ public: /// register scavenger to determine what registers are free. BitVector getReservedRegs(const MachineFunction &MF) const override; + /// isArgumentReg - Returns true if Reg can be used as an argument to a + /// function. + bool isArgumentRegister(const MachineFunction &MF, + MCRegister Reg) const override; + + /// Return true if it is tile register class. + bool isTileRegisterClass(const TargetRegisterClass *RC) const; + + /// Returns true if PhysReg is a fixed register. + bool isFixedRegister(const MachineFunction &MF, + MCRegister PhysReg) const override; + void adjustStackMapLiveOutMask(uint32_t *Mask) const override; bool hasBasePointer(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td index 1b704bcb8e08..6dc51e37d3c2 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -537,6 +537,8 @@ def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>; def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>; +def FR16 : RegisterClass<"X86", [f16], 16, (add FR32)> {let Size = 32;} + // FIXME: This sets up the floating point register files as though they are f64 // values, though they really are f80 values. This will cause us to spill @@ -599,7 +601,7 @@ def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>; def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>; -def FR16X : RegisterClass<"X86", [f16], 16, (add FR32X)>; +def FR16X : RegisterClass<"X86", [f16], 16, (add FR32X)> {let Size = 32;} // Extended VR128 and VR256 for AVX-512 instructions def VR128X : RegisterClass<"X86", [v4f32, v2f64, v8f16, v16i8, v8i16, v4i32, v2i64, f128], @@ -638,3 +640,14 @@ def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;} let CopyCost = -1 in // Don't allow copying of tile registers def TILE : RegisterClass<"X86", [x86amx], 8192, (sequence "TMM%u", 0, 7)> {let Size = 8192;} + +//===----------------------------------------------------------------------===// +// Register categories. +// + +// The TILE and VK*PAIR registers may not be "fixed", but we don't want them +// anyway. +def FixedRegisters : RegisterCategory<[DEBUG_REG, CONTROL_REG, CCR, FPCCR, + DFCCR, TILE, VK1PAIR, VK2PAIR, VK4PAIR, + VK8PAIR, VK16PAIR]>; +def GeneralPurposeRegisters : RegisterCategory<[GR64, GR32, GR16, GR8]>; diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 8e317dc22bd6..e4b95cb0807f 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -814,12 +814,26 @@ def BWWriteResGroup34 : SchedWriteRes<[BWPort6,BWPort0156]> { def: InstRW<[BWWriteResGroup34], (instregex "CLD")>; def BWWriteResGroup35 : SchedWriteRes<[BWPort06,BWPort0156]> { - let Latency = 3; + let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[BWWriteResGroup35], (instregex "RCL(8|16|32|64)r(1|i)", - "RCR(8|16|32|64)r(1|i)")>; +def: InstRW<[BWWriteResGroup35], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, + RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; + +def BWWriteResGroup36 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { + let Latency = 5; + let NumMicroOps = 8; + let ResourceCycles = [2,4,2]; +} +def: InstRW<[BWWriteResGroup36], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; + +def BWWriteResGroup36b : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { + let Latency = 6; + let NumMicroOps = 8; + let ResourceCycles = [2,4,2]; +} +def: InstRW<[BWWriteResGroup36b], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; def BWWriteResGroup37 : SchedWriteRes<[BWPort4,BWPort6,BWPort237,BWPort0156]> { let Latency = 3; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 1cd0b3379684..7b1a31d2a4df 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -1299,12 +1299,26 @@ def HWWriteResGroup58 : SchedWriteRes<[HWPort6,HWPort0156]> { def: InstRW<[HWWriteResGroup58], (instregex "CLD")>; def HWWriteResGroup59 : SchedWriteRes<[HWPort06,HWPort0156]> { - let Latency = 3; + let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[HWWriteResGroup59], (instregex "RCL(8|16|32|64)r(1|i)", - "RCR(8|16|32|64)r(1|i)")>; +def: InstRW<[HWWriteResGroup59], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, + RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; + +def HWWriteResGroup60 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> { + let Latency = 5; + let NumMicroOps = 8; + let ResourceCycles = [2,4,2]; +} +def: InstRW<[HWWriteResGroup60], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; + +def HWWriteResGroup60b : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> { + let Latency = 6; + let NumMicroOps = 8; + let ResourceCycles = [2,4,2]; +} +def: InstRW<[HWWriteResGroup60b], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; def HWWriteResGroup61 : SchedWriteRes<[HWPort0,HWPort4,HWPort237]> { let Latency = 4; diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td index 9fd986e34181..b66db7e7e73a 100644 --- a/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/llvm/lib/Target/X86/X86SchedIceLake.td @@ -923,12 +923,26 @@ def ICXWriteResGroup43 : SchedWriteRes<[ICXPort237,ICXPort0156]> { def: InstRW<[ICXWriteResGroup43], (instrs MFENCE)>; def ICXWriteResGroup44 : SchedWriteRes<[ICXPort06,ICXPort0156]> { - let Latency = 3; + let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[ICXWriteResGroup44], (instregex "RCL(8|16|32|64)r(1|i)", - "RCR(8|16|32|64)r(1|i)")>; +def: InstRW<[ICXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, + RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; + +def ICXWriteResGroup44b : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> { + let Latency = 5; + let NumMicroOps = 7; + let ResourceCycles = [2,3,2]; +} +def: InstRW<[ICXWriteResGroup44b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; + +def ICXWriteResGroup44c : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> { + let Latency = 6; + let NumMicroOps = 7; + let ResourceCycles = [2,3,2]; +} +def: InstRW<[ICXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; def ICXWriteResGroup45 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237]> { let Latency = 3; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 7e619a3a8722..49858ca0a800 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -111,8 +111,17 @@ def : WriteRes; def : WriteRes; def : WriteRes { let Latency = 5; } def : WriteRes; + +// Treat misc copies as a move. +def : InstRW<[WriteMove], (instrs COPY)>; + +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. def : WriteRes; -def : WriteRes { let Latency = 5; let NumMicroOps = 0; } + +// Model the effect of clobbering the read-write mask operand of the GATHER operation. +// Does not cost anything by itself, only has latency, matching that of the WriteLoad, +defm : X86WriteRes; // Arithmetic. defm : SBWriteResPair; @@ -678,13 +687,27 @@ def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> { } def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrr")>; -def SBWriteResGroup23 : SchedWriteRes<[SBPort05]> { +def SBWriteResGroup23 : SchedWriteRes<[SBPort05,SBPort015]> { let Latency = 2; let NumMicroOps = 3; - let ResourceCycles = [3]; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup23], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, + RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; + +def SBWriteResGroup24 : SchedWriteRes<[SBPort1,SBPort5,SBPort05,SBPort015]> { + let Latency = 3; + let NumMicroOps = 8; + let ResourceCycles = [1,1,4,2]; +} +def: InstRW<[SBWriteResGroup24], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; + +def SBWriteResGroup24b : SchedWriteRes<[SBPort1,SBPort5,SBPort05,SBPort015]> { + let Latency = 4; + let NumMicroOps = 8; + let ResourceCycles = [1,1,4,2]; } -def: InstRW<[SBWriteResGroup23], (instregex "RCL(8|16|32|64)r1", - "RCR(8|16|32|64)r1")>; +def: InstRW<[SBWriteResGroup24b], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; def SBWriteResGroup25_1 : SchedWriteRes<[SBPort23,SBPort015]> { let Latency = 7; @@ -727,8 +750,8 @@ def SBWriteResGroup76 : SchedWriteRes<[SBPort05]> { let NumMicroOps = 8; let ResourceCycles = [8]; } -def: InstRW<[SBWriteResGroup76], (instregex "RCL(8|16|32|64)r(i|CL)", - "RCR(8|16|32|64)r(i|CL)")>; +def: InstRW<[SBWriteResGroup76], (instregex "RCL(8|16|32|64)rCL", + "RCR(8|16|32|64)rCL")>; def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> { let Latency = 5; @@ -802,8 +825,7 @@ def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SBWriteResGroup48], (instrs MMX_MOVD64from64rm, - VBROADCASTSSrm)>; +def: InstRW<[SBWriteResGroup48], (instrs VBROADCASTSSrm)>; def: InstRW<[SBWriteResGroup48], (instregex "POP(16|32|64)r", "(V?)MOV64toPQIrm", "(V?)MOVDDUPrm", diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 0a88bac5aa66..05364e3434e4 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -836,12 +836,26 @@ def SKLWriteResGroup41 : SchedWriteRes<[SKLPort237,SKLPort0156]> { def: InstRW<[SKLWriteResGroup41], (instrs MFENCE)>; def SKLWriteResGroup42 : SchedWriteRes<[SKLPort06,SKLPort0156]> { - let Latency = 3; + let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SKLWriteResGroup42], (instregex "RCL(8|16|32|64)r(1|i)", - "RCR(8|16|32|64)r(1|i)")>; +def: InstRW<[SKLWriteResGroup42], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, + RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; + +def SKLWriteResGroup42b : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { + let Latency = 5; + let NumMicroOps = 8; + let ResourceCycles = [2,4,2]; +} +def: InstRW<[SKLWriteResGroup42b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; + +def SKLWriteResGroup42c : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { + let Latency = 6; + let NumMicroOps = 8; + let ResourceCycles = [2,4,2]; +} +def: InstRW<[SKLWriteResGroup42c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; def SKLWriteResGroup43 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort237]> { let Latency = 3; @@ -921,8 +935,7 @@ def SKLWriteResGroup58 : SchedWriteRes<[SKLPort23]> { let ResourceCycles = [1]; } def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)", - "MOVZX(16|32|64)rm(8|16)", - "(V?)MOVDDUPrm")>; // TODO: Should this be SKLWriteResGroup67? + "MOVZX(16|32|64)rm(8|16)")>; def SKLWriteResGroup59 : SchedWriteRes<[SKLPort0,SKLPort5]> { let Latency = 5; @@ -979,7 +992,8 @@ def: InstRW<[SKLWriteResGroup67], (instrs VBROADCASTSSrm, VPBROADCASTDrm, VPBROADCASTQrm)>; def: InstRW<[SKLWriteResGroup67], (instregex "(V?)MOVSHDUPrm", - "(V?)MOVSLDUPrm")>; + "(V?)MOVSLDUPrm", + "(V?)MOVDDUPrm")>; def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0]> { let Latency = 6; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index b28a18f0dcd7..b682b51c298a 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -905,12 +905,26 @@ def SKXWriteResGroup43 : SchedWriteRes<[SKXPort237,SKXPort0156]> { def: InstRW<[SKXWriteResGroup43], (instrs MFENCE)>; def SKXWriteResGroup44 : SchedWriteRes<[SKXPort06,SKXPort0156]> { - let Latency = 3; + let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SKXWriteResGroup44], (instregex "RCL(8|16|32|64)r(1|i)", - "RCR(8|16|32|64)r(1|i)")>; +def: InstRW<[SKXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, + RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; + +def SKXWriteResGroup44b : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { + let Latency = 5; + let NumMicroOps = 8; + let ResourceCycles = [2,4,2]; +} +def: InstRW<[SKXWriteResGroup44b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; + +def SKXWriteResGroup44c : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { + let Latency = 6; + let NumMicroOps = 8; + let ResourceCycles = [2,4,2]; +} +def: InstRW<[SKXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> { let Latency = 3; @@ -1041,8 +1055,7 @@ def SKXWriteResGroup58 : SchedWriteRes<[SKXPort23]> { let ResourceCycles = [1]; } def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)", - "MOVZX(16|32|64)rm(8|16)", - "(V?)MOVDDUPrm")>; // TODO: Should this be SKXWriteResGroup71? + "MOVZX(16|32|64)rm(8|16)")>; def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> { let Latency = 5; @@ -1145,11 +1158,10 @@ def SKXWriteResGroup71 : SchedWriteRes<[SKXPort23]> { } def: InstRW<[SKXWriteResGroup71], (instrs VBROADCASTSSrm, VPBROADCASTDrm, - VPBROADCASTQrm, - VMOVSHDUPrm, - VMOVSLDUPrm, - MOVSHDUPrm, - MOVSLDUPrm)>; + VPBROADCASTQrm)>; +def: InstRW<[SKXWriteResGroup71], (instregex "(V?)MOVSHDUPrm", + "(V?)MOVSLDUPrm", + "(V?)MOVDDUPrm")>; def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> { let Latency = 6; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 4b2fa87a25b5..1e9fcf6cc8cf 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -840,8 +840,8 @@ def JWriteMASKMOVDQU: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JAL let ResourceCycles = [1, 1, 2, 2, 2, 16, 42]; let NumMicroOps = 63; } -def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64, MASKMOVDQUX32, - VMASKMOVDQU, VMASKMOVDQU64, VMASKMOVDQUX32)>; +def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64, + VMASKMOVDQU, VMASKMOVDQU64)>; /////////////////////////////////////////////////////////////////////////////// // SchedWriteVariant definitions. diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 52605c031617..de4e7dd3cb90 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -377,10 +377,8 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; -// FIXME: The below is closer to correct, but caused some perf regressions. -//defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index fe0484afd227..aada3e0bd906 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -189,15 +189,6 @@ defm : ZnWriteResPair; defm : ZnWriteResPair; defm : ZnWriteResPair; -//defm : ZnWriteResPair; -//defm : ZnWriteResPair; -//defm : ZnWriteResPair; -//defm : ZnWriteResPair; -//defm : ZnWriteResPair; -//defm : ZnWriteResPair; -//defm : ZnWriteResPair; -//defm : ZnWriteResPair; -//defm : ZnWriteResPair; defm : X86WriteRes; defm : X86WriteRes; @@ -227,12 +218,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -//defm : X86WriteRes; -//defm : X86WriteRes; // Bit counts. -defm : ZnWriteResPair; -defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; defm : ZnWriteResPair; defm : ZnWriteResPair; defm : ZnWriteResPair; @@ -240,9 +229,8 @@ defm : ZnWriteResPair; // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; -// BMI1 BEXTR/BLS, BMI2 BZHI +// BMI1 BEXTR, BMI2 BZHI defm : ZnWriteResPair; -//defm : ZnWriteResPair; defm : ZnWriteResPair; // IDIV @@ -271,13 +259,13 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; + defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; - defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -288,24 +276,24 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteResUnsupported; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; @@ -346,8 +334,8 @@ defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; @@ -410,20 +398,23 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; @@ -448,7 +439,7 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; @@ -456,11 +447,6 @@ defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; -// Vector Shift Operations -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; -defm : X86WriteResPairUnsupported; - // Vector insert/extract operations. defm : ZnWriteResFpuPair; @@ -623,15 +609,14 @@ def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { def : SchedAlias; def : SchedAlias; // TODO: is this right? def : SchedAlias; // TODO: is this right? -def : SchedAlias; // TODO: this is definitely wrong but matches what the instregex did. -def : SchedAlias; // TODO: this is definitely wrong but matches what the instregex did. // m16. def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { let Latency = 8; } def : SchedAlias; - +def : SchedAlias; // TODO: this is definitely wrong but matches what the instregex did. +def : SchedAlias; // TODO: this is definitely wrong but matches what the instregex did. // r32. def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { let Latency = 3; @@ -639,14 +624,14 @@ def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { def : SchedAlias; def : SchedAlias; // TODO: is this right? def : SchedAlias; // TODO: is this right? -def : SchedAlias; // TODO: this is definitely wrong but matches what the instregex did. -def : SchedAlias; // TODO: this is definitely wrong but matches what the instregex did. // m32. def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { let Latency = 8; } def : SchedAlias; +def : SchedAlias; // TODO: this is definitely wrong but matches what the instregex did. +def : SchedAlias; // TODO: this is definitely wrong but matches what the instregex did. // r64. def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { @@ -656,8 +641,6 @@ def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { def : SchedAlias; def : SchedAlias; // TODO: is this right? def : SchedAlias; // TODO: is this right? -def : SchedAlias; // TODO: this is definitely wrong but matches what the instregex did. -def : SchedAlias; // TODO: this is definitely wrong but matches what the instregex did. // m64. def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { @@ -665,6 +648,8 @@ def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { let NumMicroOps = 2; } def : SchedAlias; +def : SchedAlias; // TODO: this is definitely wrong but matches what the instregex did. +def : SchedAlias; // TODO: this is definitely wrong but matches what the instregex did. // MULX // Numbers are based on the AMD SOG for Family 17h - Instruction Latencies. @@ -1101,12 +1086,11 @@ def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>; // HADD, HSUB PS/PD // PHADD|PHSUB (S) W/D. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; // PCMPGTQ. def ZnWritePCMPGTQr : SchedWriteRes<[ZnFPU03]>; @@ -1446,12 +1430,6 @@ def : InstRW<[ZnWriteSHA256RNDS2Ld], (instrs SHA256RNDS2rm)>; //-- Arithmetic instructions --// -// HADD, HSUB PS/PD -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - // VDIVPS. // TODO - convert to ZnWriteResFpuPair // y,y,y. diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index 38908a987595..c47d235eab9b 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -195,7 +195,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : Zn2WriteResPair; +defm : Zn2WriteResPair; defm : Zn2WriteResPair; defm : Zn2WriteResPair; defm : Zn2WriteResPair; @@ -219,8 +219,8 @@ defm : X86WriteRes; defm : X86WriteRes; // Bit counts. -defm : Zn2WriteResPair; -defm : Zn2WriteResPair; +defm : Zn2WriteResPair; +defm : Zn2WriteResPair; defm : Zn2WriteResPair; defm : Zn2WriteResPair; defm : Zn2WriteResPair; @@ -230,7 +230,7 @@ def : InstRW<[WriteMove], (instrs COPY)>; // BMI1 BEXTR, BMI2 BZHI defm : Zn2WriteResPair; -defm : Zn2WriteResPair; +defm : Zn2WriteResPair; // IDIV defm : Zn2WriteResPair; @@ -247,23 +247,17 @@ def Zn2WriteIMulH : WriteRes{ let Latency = 3; let NumMicroOps = 0; } - def : WriteRes{ let Latency = !add(Zn2WriteIMulH.Latency, Znver2Model.LoadLatency); let NumMicroOps = Zn2WriteIMulH.NumMicroOps; } - // Floating point operations defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -271,29 +265,34 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteResUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; @@ -332,8 +331,8 @@ defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; @@ -394,20 +393,23 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; @@ -440,11 +442,6 @@ defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; -// Vector Shift Operations -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; -defm : X86WriteResPairUnsupported; - // Vector insert/extract operations. defm : Zn2WriteResFpuPair; @@ -486,12 +483,6 @@ defm : Zn2WriteResFpuPair; def Zn2WriteMicrocoded : SchedWriteRes<[]> { let Latency = 100; } -defm : Zn2WriteResPair; -defm : Zn2WriteResPair; -defm : Zn2WriteResPair; -defm : Zn2WriteResPair; -defm : Zn2WriteResPair; -defm : Zn2WriteResPair; def : SchedAlias; def : SchedAlias; @@ -1109,6 +1100,14 @@ def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>; //-- Arithmetic instructions --// +// HADD, HSUB PS/PD +// PHADD|PHSUB (S) W/D. +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; + // PCMPGTQ. def Zn2WritePCMPGTQr : SchedWriteRes<[Zn2FPU03]>; def : InstRW<[Zn2WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>; @@ -1479,6 +1478,7 @@ def : SchedAlias; // DPPS. // x,x,i / v,v,v,i. +defm : Zn2WriteResPair; def : SchedAlias; // x,m,i / v,v,m,i. diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp index 5e59081c63b0..78a286ae5b28 100644 --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -46,7 +46,7 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible( SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val, - SDValue Size, Align Alignment, bool isVolatile, + SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast(Size); const X86Subtarget &Subtarget = @@ -67,40 +67,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. if (Alignment < Align(4) || !ConstantSize || - ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) { - // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *ValC = dyn_cast(Val); - - if (const char *bzeroName = - (ValC && ValC->isZero()) - ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) - : nullptr) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); - Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - Entry.Node = Size; - Args.push_back(Entry); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl) - .setChain(Chain) - .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(bzeroName, IntPtr), - std::move(Args)) - .setDiscardResult(); - - std::pair CallResult = TLI.LowerCallTo(CLI); - return CallResult.second; - } - - // Otherwise have the target-independent code call memset. + ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) return SDValue(); - } uint64_t SizeVal = ConstantSize->getZExtValue(); SDValue InFlag; @@ -175,7 +143,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( DAG.getNode(ISD::ADD, dl, AddrVT, Dst, DAG.getConstant(Offset, dl, AddrVT)), Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment, - isVolatile, false, DstPtrInfo.getWithOffset(Offset)); + isVolatile, AlwaysInline, + /* isTailCall */ false, DstPtrInfo.getWithOffset(Offset)); } // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/llvm/lib/Target/X86/X86SelectionDAGInfo.h index dac62973636c..19136ca4f6f5 100644 --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.h +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.h @@ -29,7 +29,7 @@ public: SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, - bool isVolatile, + bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp index dba11e8b4000..3317db891cf0 100644 --- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -181,17 +181,18 @@ private: void tracePredStateThroughBlocksAndHarden(MachineFunction &MF); unsigned saveEFLAGS(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertPt, DebugLoc Loc); + MachineBasicBlock::iterator InsertPt, + const DebugLoc &Loc); void restoreEFLAGS(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertPt, DebugLoc Loc, + MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc, Register Reg); void mergePredStateIntoSP(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertPt, DebugLoc Loc, - unsigned PredStateReg); + MachineBasicBlock::iterator InsertPt, + const DebugLoc &Loc, unsigned PredStateReg); unsigned extractPredStateFromSP(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, - DebugLoc Loc); + const DebugLoc &Loc); void hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO, @@ -203,7 +204,7 @@ private: bool canHardenRegister(Register Reg); unsigned hardenValueInRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, - DebugLoc Loc); + const DebugLoc &Loc); unsigned hardenPostLoad(MachineInstr &MI); void hardenReturnInstr(MachineInstr &MI); void tracePredStateThroughCall(MachineInstr &MI); @@ -356,8 +357,8 @@ static void canonicalizePHIOperands(MachineFunction &MF) { int OpIdx = DupIndices.pop_back_val(); // Remove both the block and value operand, again in reverse order to // preserve indices. - MI.RemoveOperand(OpIdx + 1); - MI.RemoveOperand(OpIdx); + MI.removeOperand(OpIdx + 1); + MI.removeOperand(OpIdx); } Preds.clear(); @@ -1500,7 +1501,7 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden( /// as the save so that no PHI nodes are inserted. unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, - DebugLoc Loc) { + const DebugLoc &Loc) { // FIXME: Hard coding this to a 32-bit register class seems weird, but matches // what instruction selection does. Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass); @@ -1517,8 +1518,8 @@ unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS( /// This must be done within the same basic block as the save in order to /// reliably lower. void X86SpeculativeLoadHardeningPass::restoreEFLAGS( - MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc, - Register Reg) { + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, + const DebugLoc &Loc, Register Reg) { BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg); ++NumInstsInserted; } @@ -1528,8 +1529,8 @@ void X86SpeculativeLoadHardeningPass::restoreEFLAGS( /// a way that won't form non-canonical pointers and also will be preserved /// across normal stack adjustments. void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP( - MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc, - unsigned PredStateReg) { + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, + const DebugLoc &Loc, unsigned PredStateReg) { Register TmpReg = MRI->createVirtualRegister(PS->RC); // FIXME: This hard codes a shift distance based on the number of bits needed // to stay canonical on 64-bit. We should compute this somehow and support @@ -1549,7 +1550,7 @@ void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP( /// Extracts the predicate state stored in the high bits of the stack pointer. unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, - DebugLoc Loc) { + const DebugLoc &Loc) { Register PredStateReg = MRI->createVirtualRegister(PS->RC); Register TmpReg = MRI->createVirtualRegister(PS->RC); @@ -1907,7 +1908,7 @@ bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) { /// register class as `Reg`. unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister( Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, - DebugLoc Loc) { + const DebugLoc &Loc) { assert(canHardenRegister(Reg) && "Cannot harden this register!"); assert(Reg.isVirtual() && "Cannot harden a physical register!"); diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index a3d4d04b1e0d..0d091adc8e77 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -21,6 +21,8 @@ #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Function.h" @@ -247,7 +249,7 @@ bool X86Subtarget::isLegalToCallImmediateAddr() const { // FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32 // but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does, // the following check for Win32 should be removed. - if (In64BitMode || isTargetWin32()) + if (Is64Bit || isTargetWin32()) return false; return isTargetELF() || TM.getRelocationModel() == Reloc::Static; } @@ -274,12 +276,12 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, // introduced with Intel's Nehalem/Silvermont and AMD's Family10h // micro-architectures respectively. if (hasSSE42() || hasSSE4A()) - IsUAMem16Slow = false; + IsUnalignedMem16Slow = false; LLVM_DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel << ", 3DNowLevel " << X863DNowLevel << ", 64bit " << HasX86_64 << "\n"); - if (In64BitMode && !HasX86_64) + if (Is64Bit && !HasX86_64) report_fatal_error("64-bit code requested on a subtarget that doesn't " "support it!"); @@ -289,7 +291,7 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, if (StackAlignOverride) stackAlignment = *StackAlignOverride; else if (isTargetDarwin() || isTargetLinux() || isTargetKFreeBSD() || - isTargetNaCl() || In64BitMode) + isTargetNaCl() || Is64Bit) stackAlignment = Align(16); // Consume the vector width attribute or apply any target specific limit. @@ -357,7 +359,7 @@ const RegisterBankInfo *X86Subtarget::getRegBankInfo() const { } bool X86Subtarget::enableEarlyIfConversion() const { - return hasCMov() && X86EarlyIfConv; + return canUseCMOV() && X86EarlyIfConv; } void X86Subtarget::getPostRAMutations( diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 5d773f0c57df..09a8b1f1aafb 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -50,24 +50,14 @@ enum class Style { } // end namespace PICStyles class X86Subtarget final : public X86GenSubtargetInfo { - // NOTE: Do not add anything new to this list. Coarse, CPU name based flags - // are not a good idea. We should be migrating away from these. - enum X86ProcFamilyEnum { - Others, - IntelAtom - }; - enum X86SSEEnum { - NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F + NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512 }; enum X863DNowEnum { NoThreeDNow, MMX, ThreeDNow, ThreeDNowA }; - /// X86 processor family: Intel Atom, and others - X86ProcFamilyEnum X86ProcFamily = Others; - /// Which PIC style to use PICStyles::Style PICStyle; @@ -79,412 +69,9 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// MMX, 3DNow, 3DNow Athlon, or none supported. X863DNowEnum X863DNowLevel = NoThreeDNow; - /// True if the processor supports X87 instructions. - bool HasX87 = false; - - /// True if the processor supports CMPXCHG8B. - bool HasCmpxchg8b = false; - - /// True if this processor has NOPL instruction - /// (generally pentium pro+). - bool HasNOPL = false; - - /// True if this processor has conditional move instructions - /// (generally pentium pro+). - bool HasCMov = false; - - /// True if the processor supports X86-64 instructions. - bool HasX86_64 = false; - - /// True if the processor supports POPCNT. - bool HasPOPCNT = false; - - /// True if the processor supports SSE4A instructions. - bool HasSSE4A = false; - - /// Target has AES instructions - bool HasAES = false; - bool HasVAES = false; - - /// Target has FXSAVE/FXRESTOR instructions - bool HasFXSR = false; - - /// Target has XSAVE instructions - bool HasXSAVE = false; - - /// Target has XSAVEOPT instructions - bool HasXSAVEOPT = false; - - /// Target has XSAVEC instructions - bool HasXSAVEC = false; - - /// Target has XSAVES instructions - bool HasXSAVES = false; - - /// Target has carry-less multiplication - bool HasPCLMUL = false; - bool HasVPCLMULQDQ = false; - - /// Target has Galois Field Arithmetic instructions - bool HasGFNI = false; - - /// Target has 3-operand fused multiply-add - bool HasFMA = false; - - /// Target has 4-operand fused multiply-add - bool HasFMA4 = false; - - /// Target has XOP instructions - bool HasXOP = false; - - /// Target has TBM instructions. - bool HasTBM = false; - - /// Target has LWP instructions - bool HasLWP = false; - - /// True if the processor has the MOVBE instruction. - bool HasMOVBE = false; - - /// True if the processor has the RDRAND instruction. - bool HasRDRAND = false; - - /// Processor has 16-bit floating point conversion instructions. - bool HasF16C = false; - - /// Processor has FS/GS base insturctions. - bool HasFSGSBase = false; - - /// Processor has LZCNT instruction. - bool HasLZCNT = false; - - /// Processor has BMI1 instructions. - bool HasBMI = false; - - /// Processor has BMI2 instructions. - bool HasBMI2 = false; - - /// Processor has VBMI instructions. - bool HasVBMI = false; - - /// Processor has VBMI2 instructions. - bool HasVBMI2 = false; - - /// Processor has Integer Fused Multiply Add - bool HasIFMA = false; - - /// Processor has RTM instructions. - bool HasRTM = false; - - /// Processor has ADX instructions. - bool HasADX = false; - - /// Processor has SHA instructions. - bool HasSHA = false; - - /// Processor has PRFCHW instructions. - bool HasPRFCHW = false; - - /// Processor has RDSEED instructions. - bool HasRDSEED = false; - - /// Processor has LAHF/SAHF instructions in 64-bit mode. - bool HasLAHFSAHF64 = false; - - /// Processor has MONITORX/MWAITX instructions. - bool HasMWAITX = false; - - /// Processor has Cache Line Zero instruction - bool HasCLZERO = false; - - /// Processor has Cache Line Demote instruction - bool HasCLDEMOTE = false; - - /// Processor has MOVDIRI instruction (direct store integer). - bool HasMOVDIRI = false; - - /// Processor has MOVDIR64B instruction (direct store 64 bytes). - bool HasMOVDIR64B = false; - - /// Processor has ptwrite instruction. - bool HasPTWRITE = false; - - /// Processor has Prefetch with intent to Write instruction - bool HasPREFETCHWT1 = false; - - /// True if SHLD instructions are slow. - bool IsSHLDSlow = false; - - /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and - // PMULUDQ. - bool IsPMULLDSlow = false; - - /// True if the PMADDWD instruction is slow compared to PMULLD. - bool IsPMADDWDSlow = false; - - /// True if unaligned memory accesses of 16-bytes are slow. - bool IsUAMem16Slow = false; - - /// True if unaligned memory accesses of 32-bytes are slow. - bool IsUAMem32Slow = false; - - /// True if SSE operations can have unaligned memory operands. - /// This may require setting a configuration bit in the processor. - bool HasSSEUnalignedMem = false; - - /// True if this processor has the CMPXCHG16B instruction; - /// this is true for most x86-64 chips, but not the first AMD chips. - bool HasCmpxchg16b = false; - - /// True if the LEA instruction should be used for adjusting - /// the stack pointer. This is an optimization for Intel Atom processors. - bool UseLeaForSP = false; - - /// True if POPCNT instruction has a false dependency on the destination register. - bool HasPOPCNTFalseDeps = false; - - /// True if LZCNT/TZCNT instructions have a false dependency on the destination register. - bool HasLZCNTFalseDeps = false; - - /// True if its preferable to combine to a single cross-lane shuffle - /// using a variable mask over multiple fixed shuffles. - bool HasFastVariableCrossLaneShuffle = false; - - /// True if its preferable to combine to a single per-lane shuffle - /// using a variable mask over multiple fixed shuffles. - bool HasFastVariablePerLaneShuffle = false; - - /// True if vzeroupper instructions should be inserted after code that uses - /// ymm or zmm registers. - bool InsertVZEROUPPER = false; - - /// True if there is no performance penalty for writing NOPs with up to - /// 7 bytes. - bool HasFast7ByteNOP = false; - - /// True if there is no performance penalty for writing NOPs with up to - /// 11 bytes. - bool HasFast11ByteNOP = false; - - /// True if there is no performance penalty for writing NOPs with up to - /// 15 bytes. - bool HasFast15ByteNOP = false; - - /// True if gather is reasonably fast. This is true for Skylake client and - /// all AVX-512 CPUs. - bool HasFastGather = false; - - /// True if hardware SQRTSS instruction is at least as fast (latency) as - /// RSQRTSS followed by a Newton-Raphson iteration. - bool HasFastScalarFSQRT = false; - - /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast - /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration. - bool HasFastVectorFSQRT = false; - - /// True if 8-bit divisions are significantly faster than - /// 32-bit divisions and should be used when possible. - bool HasSlowDivide32 = false; - - /// True if 32-bit divides are significantly faster than - /// 64-bit divisions and should be used when possible. - bool HasSlowDivide64 = false; - - /// True if LZCNT instruction is fast. - bool HasFastLZCNT = false; - - /// True if SHLD based rotate is fast. - bool HasFastSHLDRotate = false; - - /// True if the processor supports macrofusion. - bool HasMacroFusion = false; - - /// True if the processor supports branch fusion. - bool HasBranchFusion = false; - - /// True if the processor has enhanced REP MOVSB/STOSB. - bool HasERMSB = false; - - /// True if the processor has fast short REP MOV. - bool HasFSRM = false; - - /// True if the short functions should be padded to prevent - /// a stall when returning too early. - bool PadShortFunctions = false; - - /// True if two memory operand instructions should use a temporary register - /// instead. - bool SlowTwoMemOps = false; - - /// True if the LEA instruction inputs have to be ready at address generation - /// (AG) time. - bool LEAUsesAG = false; - - /// True if the LEA instruction with certain arguments is slow - bool SlowLEA = false; - - /// True if the LEA instruction has all three source operands: base, index, - /// and offset or if the LEA instruction uses base and index registers where - /// the base is EBP, RBP,or R13 - bool Slow3OpsLEA = false; - - /// True if INC and DEC instructions are slow when writing to flags - bool SlowIncDec = false; - - /// Processor has AVX-512 PreFetch Instructions - bool HasPFI = false; - - /// Processor has AVX-512 Exponential and Reciprocal Instructions - bool HasERI = false; - - /// Processor has AVX-512 Conflict Detection Instructions - bool HasCDI = false; - - /// Processor has AVX-512 population count Instructions - bool HasVPOPCNTDQ = false; - - /// Processor has AVX-512 Doubleword and Quadword instructions - bool HasDQI = false; - - /// Processor has AVX-512 Byte and Word instructions - bool HasBWI = false; - - /// Processor has AVX-512 Vector Length eXtenstions - bool HasVLX = false; - - /// Processor has AVX-512 16 bit floating-point extenstions - bool HasFP16 = false; - - /// Processor has PKU extenstions - bool HasPKU = false; - - /// Processor has AVX-512 Vector Neural Network Instructions - bool HasVNNI = false; - - /// Processor has AVX Vector Neural Network Instructions - bool HasAVXVNNI = false; - - /// Processor has AVX-512 bfloat16 floating-point extensions - bool HasBF16 = false; - - /// Processor supports ENQCMD instructions - bool HasENQCMD = false; - - /// Processor has AVX-512 Bit Algorithms instructions - bool HasBITALG = false; - - /// Processor has AVX-512 vp2intersect instructions - bool HasVP2INTERSECT = false; - - /// Processor supports CET SHSTK - Control-Flow Enforcement Technology - /// using Shadow Stack - bool HasSHSTK = false; - - /// Processor supports Invalidate Process-Context Identifier - bool HasINVPCID = false; - - /// Processor has Software Guard Extensions - bool HasSGX = false; - - /// Processor supports Flush Cache Line instruction - bool HasCLFLUSHOPT = false; - - /// Processor supports Cache Line Write Back instruction - bool HasCLWB = false; - - /// Processor supports Write Back No Invalidate instruction - bool HasWBNOINVD = false; - - /// Processor support RDPID instruction - bool HasRDPID = false; - - /// Processor supports WaitPKG instructions - bool HasWAITPKG = false; - - /// Processor supports PCONFIG instruction - bool HasPCONFIG = false; - - /// Processor support key locker instructions - bool HasKL = false; - - /// Processor support key locker wide instructions - bool HasWIDEKL = false; - - /// Processor supports HRESET instruction - bool HasHRESET = false; - - /// Processor supports SERIALIZE instruction - bool HasSERIALIZE = false; - - /// Processor supports TSXLDTRK instruction - bool HasTSXLDTRK = false; - - /// Processor has AMX support - bool HasAMXTILE = false; - bool HasAMXBF16 = false; - bool HasAMXINT8 = false; - - /// Processor supports User Level Interrupt instructions - bool HasUINTR = false; - - /// Enable SSE4.2 CRC32 instruction (Used when SSE4.2 is supported but - /// function is GPR only) - bool HasCRC32 = false; - - /// Processor has a single uop BEXTR implementation. - bool HasFastBEXTR = false; - - /// Try harder to combine to horizontal vector ops if they are fast. - bool HasFastHorizontalOps = false; - - /// Prefer a left/right scalar logical shifts pair over a shift+and pair. - bool HasFastScalarShiftMasks = false; - - /// Prefer a left/right vector logical shifts pair over a shift+and pair. - bool HasFastVectorShiftMasks = false; - - /// Prefer a movbe over a single-use load + bswap / single-use bswap + store. - bool HasFastMOVBE = false; - - /// Use a retpoline thunk rather than indirect calls to block speculative - /// execution. - bool UseRetpolineIndirectCalls = false; - - /// Use a retpoline thunk or remove any indirect branch to block speculative - /// execution. - bool UseRetpolineIndirectBranches = false; - - /// Deprecated flag, query `UseRetpolineIndirectCalls` and - /// `UseRetpolineIndirectBranches` instead. - bool DeprecatedUseRetpoline = false; - - /// When using a retpoline thunk, call an externally provided thunk rather - /// than emitting one inside the compiler. - bool UseRetpolineExternalThunk = false; - - /// Prevent generation of indirect call/branch instructions from memory, - /// and force all indirect call/branch instructions from a register to be - /// preceded by an LFENCE. Also decompose RET instructions into a - /// POP+LFENCE+JMP sequence. - bool UseLVIControlFlowIntegrity = false; - - /// Enable Speculative Execution Side Effect Suppression - bool UseSpeculativeExecutionSideEffectSuppression = false; - - /// Insert LFENCE instructions to prevent data speculatively injected into - /// loads from being used maliciously. - bool UseLVILoadHardening = false; - - /// Use an instruction sequence for taking the address of a global that allows - /// a memory tag in the upper address bits. - bool AllowTaggedGlobals = false; - - /// Use software floating point for code generation. - bool UseSoftFloat = false; - - /// Use alias analysis during code generation. - bool UseAA = false; - +#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ + bool ATTRIBUTE = DEFAULT; +#include "X86GenSubtargetInfo.inc" /// The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. Align stackAlignment = Align(4); @@ -496,21 +83,6 @@ class X86Subtarget final : public X86GenSubtargetInfo { // FIXME: this is a known good value for Yonah. How about others? unsigned MaxInlineSizeThreshold = 128; - /// Indicates target prefers 128 bit instructions. - bool Prefer128Bit = false; - - /// Indicates target prefers 256 bit instructions. - bool Prefer256Bit = false; - - /// Indicates target prefers AVX512 mask registers. - bool PreferMaskRegisters = false; - - /// Use Silvermont specific arithmetic costs. - bool UseSLMArithCosts = false; - - /// Use Goldmont specific floating point div/sqrt costs. - bool UseGLMDivSqrtCosts = false; - /// What processor and OS we're targeting. Triple TargetTriple; @@ -520,7 +92,6 @@ class X86Subtarget final : public X86GenSubtargetInfo { std::unique_ptr RegBankInfo; std::unique_ptr InstSelector; -private: /// Override the stack alignment. MaybeAlign StackAlignOverride; @@ -534,15 +105,6 @@ private: /// Required vector width from function attribute. unsigned RequiredVectorWidth; - /// True if compiling for 64-bit, false for 16-bit or 32-bit. - bool In64BitMode = false; - - /// True if compiling for 32-bit, false for 16-bit or 64-bit. - bool In32BitMode = false; - - /// True if compiling for 16-bit, false for 32-bit or 64-bit. - bool In16BitMode = false; - X86SelectionDAGInfo TSInfo; // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which // X86TargetLowering needs. @@ -608,38 +170,32 @@ private: void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); public: - /// Is this x86_64? (disregarding specific ABI / programming model) - bool is64Bit() const { - return In64BitMode; - } - bool is32Bit() const { - return In32BitMode; - } - - bool is16Bit() const { - return In16BitMode; - } +#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ + bool GETTER() const { return ATTRIBUTE; } +#include "X86GenSubtargetInfo.inc" /// Is this x86_64 with the ILP32 programming model (x32 ABI)? bool isTarget64BitILP32() const { - return In64BitMode && (TargetTriple.isX32() || TargetTriple.isOSNaCl()); + return Is64Bit && (TargetTriple.isX32() || TargetTriple.isOSNaCl()); } /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)? bool isTarget64BitLP64() const { - return In64BitMode && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl()); + return Is64Bit && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl()); } PICStyles::Style getPICStyle() const { return PICStyle; } void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } - bool hasX87() const { return HasX87; } - bool hasCmpxchg8b() const { return HasCmpxchg8b; } - bool hasNOPL() const { return HasNOPL; } + bool canUseCMPXCHG8B() const { return hasCX8(); } + bool canUseCMPXCHG16B() const { + // CX16 is just the CPUID bit, instruction requires 64-bit mode too. + return hasCX16() && is64Bit(); + } // SSE codegen depends on cmovs, and all SSE1+ processors support them. // All 64-bit processors support cmov. - bool hasCMov() const { return HasCMov || X86SSELevel >= SSE1 || is64Bit(); } + bool canUseCMOV() const { return hasCMOV() || hasSSE1() || is64Bit(); } bool hasSSE1() const { return X86SSELevel >= SSE1; } bool hasSSE2() const { return X86SSELevel >= SSE2; } bool hasSSE3() const { return X86SSELevel >= SSE3; } @@ -648,146 +204,26 @@ public: bool hasSSE42() const { return X86SSELevel >= SSE42; } bool hasAVX() const { return X86SSELevel >= AVX; } bool hasAVX2() const { return X86SSELevel >= AVX2; } - bool hasAVX512() const { return X86SSELevel >= AVX512F; } + bool hasAVX512() const { return X86SSELevel >= AVX512; } bool hasInt256() const { return hasAVX2(); } - bool hasSSE4A() const { return HasSSE4A; } bool hasMMX() const { return X863DNowLevel >= MMX; } - bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } - bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } - bool hasPOPCNT() const { return HasPOPCNT; } - bool hasAES() const { return HasAES; } - bool hasVAES() const { return HasVAES; } - bool hasFXSR() const { return HasFXSR; } - bool hasXSAVE() const { return HasXSAVE; } - bool hasXSAVEOPT() const { return HasXSAVEOPT; } - bool hasXSAVEC() const { return HasXSAVEC; } - bool hasXSAVES() const { return HasXSAVES; } - bool hasPCLMUL() const { return HasPCLMUL; } - bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; } - bool hasGFNI() const { return HasGFNI; } - // Prefer FMA4 to FMA - its better for commutation/memory folding and - // has equal or better performance on all supported targets. - bool hasFMA() const { return HasFMA; } - bool hasFMA4() const { return HasFMA4; } + bool hasThreeDNow() const { return X863DNowLevel >= ThreeDNow; } + bool hasThreeDNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } - bool hasXOP() const { return HasXOP; } - bool hasTBM() const { return HasTBM; } - bool hasLWP() const { return HasLWP; } - bool hasMOVBE() const { return HasMOVBE; } - bool hasRDRAND() const { return HasRDRAND; } - bool hasF16C() const { return HasF16C; } - bool hasFSGSBase() const { return HasFSGSBase; } - bool hasLZCNT() const { return HasLZCNT; } - bool hasBMI() const { return HasBMI; } - bool hasBMI2() const { return HasBMI2; } - bool hasVBMI() const { return HasVBMI; } - bool hasVBMI2() const { return HasVBMI2; } - bool hasIFMA() const { return HasIFMA; } - bool hasRTM() const { return HasRTM; } - bool hasADX() const { return HasADX; } - bool hasSHA() const { return HasSHA; } - bool hasPRFCHW() const { return HasPRFCHW; } - bool hasPREFETCHWT1() const { return HasPREFETCHWT1; } bool hasPrefetchW() const { // The PREFETCHW instruction was added with 3DNow but later CPUs gave it // its own CPUID bit as part of deprecating 3DNow. Intel eventually added // it and KNL has another that prefetches to L2 cache. We assume the // L1 version exists if the L2 version does. - return has3DNow() || hasPRFCHW() || hasPREFETCHWT1(); + return hasThreeDNow() || hasPRFCHW() || hasPREFETCHWT1(); } bool hasSSEPrefetch() const { // We implicitly enable these when we have a write prefix supporting cache // level OR if we have prfchw, but don't already have a read prefetch from // 3dnow. - return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1(); - } - bool hasRDSEED() const { return HasRDSEED; } - bool hasLAHFSAHF() const { return HasLAHFSAHF64 || !is64Bit(); } - bool hasMWAITX() const { return HasMWAITX; } - bool hasCLZERO() const { return HasCLZERO; } - bool hasCLDEMOTE() const { return HasCLDEMOTE; } - bool hasMOVDIRI() const { return HasMOVDIRI; } - bool hasMOVDIR64B() const { return HasMOVDIR64B; } - bool hasPTWRITE() const { return HasPTWRITE; } - bool isSHLDSlow() const { return IsSHLDSlow; } - bool isPMULLDSlow() const { return IsPMULLDSlow; } - bool isPMADDWDSlow() const { return IsPMADDWDSlow; } - bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } - bool isUnalignedMem32Slow() const { return IsUAMem32Slow; } - bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } - bool hasCmpxchg16b() const { return HasCmpxchg16b && is64Bit(); } - bool useLeaForSP() const { return UseLeaForSP; } - bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; } - bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; } - bool hasFastVariableCrossLaneShuffle() const { - return HasFastVariableCrossLaneShuffle; - } - bool hasFastVariablePerLaneShuffle() const { - return HasFastVariablePerLaneShuffle; + return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1(); } - bool insertVZEROUPPER() const { return InsertVZEROUPPER; } - bool hasFastGather() const { return HasFastGather; } - bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; } - bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; } - bool hasFastLZCNT() const { return HasFastLZCNT; } - bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } - bool hasFastBEXTR() const { return HasFastBEXTR; } - bool hasFastHorizontalOps() const { return HasFastHorizontalOps; } - bool hasFastScalarShiftMasks() const { return HasFastScalarShiftMasks; } - bool hasFastVectorShiftMasks() const { return HasFastVectorShiftMasks; } - bool hasFastMOVBE() const { return HasFastMOVBE; } - bool hasMacroFusion() const { return HasMacroFusion; } - bool hasBranchFusion() const { return HasBranchFusion; } - bool hasERMSB() const { return HasERMSB; } - bool hasFSRM() const { return HasFSRM; } - bool hasSlowDivide32() const { return HasSlowDivide32; } - bool hasSlowDivide64() const { return HasSlowDivide64; } - bool padShortFunctions() const { return PadShortFunctions; } - bool slowTwoMemOps() const { return SlowTwoMemOps; } - bool LEAusesAG() const { return LEAUsesAG; } - bool slowLEA() const { return SlowLEA; } - bool slow3OpsLEA() const { return Slow3OpsLEA; } - bool slowIncDec() const { return SlowIncDec; } - bool hasCDI() const { return HasCDI; } - bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; } - bool hasPFI() const { return HasPFI; } - bool hasERI() const { return HasERI; } - bool hasDQI() const { return HasDQI; } - bool hasBWI() const { return HasBWI; } - bool hasVLX() const { return HasVLX; } - bool hasFP16() const { return HasFP16; } - bool hasPKU() const { return HasPKU; } - bool hasVNNI() const { return HasVNNI; } - bool hasBF16() const { return HasBF16; } - bool hasVP2INTERSECT() const { return HasVP2INTERSECT; } - bool hasBITALG() const { return HasBITALG; } - bool hasSHSTK() const { return HasSHSTK; } - bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; } - bool hasCLWB() const { return HasCLWB; } - bool hasWBNOINVD() const { return HasWBNOINVD; } - bool hasRDPID() const { return HasRDPID; } - bool hasWAITPKG() const { return HasWAITPKG; } - bool hasPCONFIG() const { return HasPCONFIG; } - bool hasSGX() const { return HasSGX; } - bool hasINVPCID() const { return HasINVPCID; } - bool hasENQCMD() const { return HasENQCMD; } - bool hasKL() const { return HasKL; } - bool hasWIDEKL() const { return HasWIDEKL; } - bool hasHRESET() const { return HasHRESET; } - bool hasSERIALIZE() const { return HasSERIALIZE; } - bool hasTSXLDTRK() const { return HasTSXLDTRK; } - bool hasUINTR() const { return HasUINTR; } - bool hasCRC32() const { return HasCRC32; } - bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; } - bool useRetpolineIndirectBranches() const { - return UseRetpolineIndirectBranches; - } - bool hasAVXVNNI() const { return HasAVXVNNI; } - bool hasAMXTILE() const { return HasAMXTILE; } - bool hasAMXBF16() const { return HasAMXBF16; } - bool hasAMXINT8() const { return HasAMXINT8; } - bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } - + bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); } // These are generic getters that OR together all of the thunk types // supported by the subtarget. Therefore useIndirectThunk*() will return true // if any respective thunk feature is enabled. @@ -798,16 +234,6 @@ public: return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity(); } - bool preferMaskRegisters() const { return PreferMaskRegisters; } - bool useSLMArithCosts() const { return UseSLMArithCosts; } - bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; } - bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; } - bool allowTaggedGlobals() const { return AllowTaggedGlobals; } - bool useLVILoadHardening() const { return UseLVILoadHardening; } - bool useSpeculativeExecutionSideEffectSuppression() const { - return UseSpeculativeExecutionSideEffectSuppression; - } - unsigned getPreferVectorWidth() const { return PreferVectorWidth; } unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; } @@ -834,11 +260,6 @@ public: bool isXRaySupported() const override { return is64Bit(); } - /// TODO: to be removed later and replaced with suitable properties - bool isAtom() const { return X86ProcFamily == IntelAtom; } - bool useSoftFloat() const { return UseSoftFloat; } - bool useAA() const override { return UseAA; } - /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for /// no-sse2). There isn't any reason to disable it if the target processor /// supports it. @@ -850,7 +271,7 @@ public: bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); } bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); } bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); } - bool isTargetPS4() const { return TargetTriple.isPS4CPU(); } + bool isTargetPS() const { return TargetTriple.isPS(); } bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } @@ -890,9 +311,9 @@ public: bool isOSWindows() const { return TargetTriple.isOSWindows(); } - bool isTargetWin64() const { return In64BitMode && isOSWindows(); } + bool isTargetWin64() const { return Is64Bit && isOSWindows(); } - bool isTargetWin32() const { return !In64BitMode && isOSWindows(); } + bool isTargetWin32() const { return !Is64Bit && isOSWindows(); } bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; } bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; } @@ -990,8 +411,6 @@ public: AntiDepBreakMode getAntiDepBreakMode() const override { return TargetSubtargetInfo::ANTIDEP_CRITICAL; } - - bool enableAdvancedRASplitCost() const override { return false; } }; } // end namespace llvm diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index e3d0128dd73d..4249788e3540 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -27,13 +27,16 @@ #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/ExecutionDomainFix.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DataLayout.h" @@ -56,6 +59,11 @@ static cl::opt EnableMachineCombinerPass("x86-machine-combiner", cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); +static cl::opt + EnableTileRAPass("x86-tile-ra", + cl::desc("Enable the tile register allocation pass"), + cl::init(true), cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { // Register the target. RegisterTargetMachine X(getTheX86_32Target()); @@ -65,6 +73,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { initializeX86LowerAMXIntrinsicsLegacyPassPass(PR); initializeX86LowerAMXTypeLegacyPassPass(PR); initializeX86PreAMXConfigPassPass(PR); + initializeX86PreTileConfigPass(PR); initializeGlobalISel(PR); initializeWinEHStatePassPass(PR); initializeFixupBWInstPassPass(PR); @@ -75,6 +84,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { initializeX86CallFrameOptimizationPass(PR); initializeX86CmovConverterPassPass(PR); initializeX86TileConfigPass(PR); + initializeX86FastPreTileConfigPass(PR); initializeX86FastTileConfigPass(PR); initializeX86LowerTileCopyPass(PR); initializeX86ExpandPseudoPass(PR); @@ -154,7 +164,7 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT, bool JIT, Optional RM) { bool is64Bit = TT.getArch() == Triple::x86_64; - if (!RM.hasValue()) { + if (!RM) { // JIT codegen should use static relocations by default, since it's // typically executed in process and not relocatable. if (JIT) @@ -218,9 +228,9 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT, getEffectiveX86CodeModel(CM, JIT, TT.getArch() == Triple::x86_64), OL), TLOF(createTLOF(getTargetTriple())), IsJIT(JIT) { - // On PS4, the "return address" of a 'noreturn' call must still be within + // On PS4/PS5, the "return address" of a 'noreturn' call must still be within // the calling function, and TrapUnreachable is an easy way to get that. - if (TT.isPS4() || TT.isOSBinFormatMachO()) { + if (TT.isPS() || TT.isOSBinFormatMachO()) { this->Options.TrapUnreachable = true; this->Options.NoTrapAfterNoreturn = TT.isOSBinFormatMachO(); } @@ -333,7 +343,7 @@ bool X86TargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, //===----------------------------------------------------------------------===// TargetTransformInfo -X86TargetMachine::getTargetTransformInfo(const Function &F) { +X86TargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(X86TTIImpl(this, F)); } @@ -382,7 +392,7 @@ public: void addPreEmitPass() override; void addPreEmitPass2() override; void addPreSched2() override; - bool addPreRewrite() override; + bool addRegAssignAndRewriteOptimized() override; std::unique_ptr getCSEConfig() const override; }; @@ -417,9 +427,6 @@ void X86PassConfig::addIRPasses() { addPass(createX86LowerAMXIntrinsicsPass()); addPass(createX86LowerAMXTypePass()); - if (TM->getOptLevel() == CodeGenOpt::None) - addPass(createX86PreAMXConfigPass()); - TargetPassConfig::addIRPasses(); if (TM->getOptLevel() != CodeGenOpt::None) { @@ -441,6 +448,9 @@ void X86PassConfig::addIRPasses() { addPass(createCFGuardCheckPass()); } } + + if (TM->Options.JMCInstrument) + addPass(createJMCInstrumenterPass()); } bool X86PassConfig::addInstSelector() { @@ -505,9 +515,10 @@ void X86PassConfig::addPreRegAlloc() { addPass(createX86FlagsCopyLoweringPass()); addPass(createX86DynAllocaExpander()); - if (getOptLevel() != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) addPass(createX86PreTileConfigPass()); - } + else + addPass(createX86FastPreTileConfigPass()); } void X86PassConfig::addMachineSSAOptimization() { @@ -607,11 +618,21 @@ bool X86PassConfig::addPostFastRegAllocRewrite() { return true; } -bool X86PassConfig::addPreRewrite() { - addPass(createX86TileConfigPass()); - return true; -} - std::unique_ptr X86PassConfig::getCSEConfig() const { return getStandardCSEConfigForOpt(TM->getOptLevel()); } + +static bool onlyAllocateTileRegisters(const TargetRegisterInfo &TRI, + const TargetRegisterClass &RC) { + return static_cast(TRI).isTileRegisterClass(&RC); +} + +bool X86PassConfig::addRegAssignAndRewriteOptimized() { + // Don't support tile RA when RA is specified by command line "-regalloc". + if (!isCustomizedRegAlloc() && EnableTileRAPass) { + // Allocate tile register first. + addPass(createGreedyRegisterAllocator(onlyAllocateTileRegisters)); + addPass(createX86TileConfigPass()); + } + return TargetPassConfig::addRegAssignAndRewriteOptimized(); +} diff --git a/llvm/lib/Target/X86/X86TargetMachine.h b/llvm/lib/Target/X86/X86TargetMachine.h index 69d7e48b8977..70df8da77641 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.h +++ b/llvm/lib/Target/X86/X86TargetMachine.h @@ -44,7 +44,7 @@ public: // attributes of each function. const X86Subtarget *getSubtargetImpl() const = delete; - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; // Set up the pass pipeline. TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 5b95c10332dc..b36f8a3d06d0 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1085,7 +1085,8 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp, ArrayRef Mask, int Index, - VectorType *SubTp) { + VectorType *SubTp, + ArrayRef Args) { // 64-bit packed float vectors (v2f32) are widened to type v4f32. // 64-bit packed integer vectors (v2i32) are widened to type v4i32. std::pair LT = TLI->getTypeLegalizationCost(DL, BaseTp); @@ -1223,6 +1224,63 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, auto *SingleOpTy = FixedVectorType::get(BaseTp->getElementType(), LegalVT.getVectorNumElements()); + if (!Mask.empty() && NumOfDests.isValid()) { + // Try to perform better estimation of the permutation. + // 1. Split the source/destination vectors into real registers. + // 2. Do the mask analysis to identify which real registers are + // permuted. If more than 1 source registers are used for the + // destination register building, the cost for this destination register + // is (Number_of_source_register - 1) * Cost_PermuteTwoSrc. If only one + // source register is used, build mask and calculate the cost as a cost + // of PermuteSingleSrc. + // Also, for the single register permute we try to identify if the + // destination register is just a copy of the source register or the + // copy of the previous destination register (the cost is + // TTI::TCC_Basic). If the source register is just reused, the cost for + // this operation is 0. + unsigned E = *NumOfDests.getValue(); + unsigned NormalizedVF = + LegalVT.getVectorNumElements() * std::max(NumOfSrcs, E); + unsigned NumOfSrcRegs = NormalizedVF / LegalVT.getVectorNumElements(); + unsigned NumOfDestRegs = NormalizedVF / LegalVT.getVectorNumElements(); + SmallVector NormalizedMask(NormalizedVF, UndefMaskElem); + copy(Mask, NormalizedMask.begin()); + unsigned PrevSrcReg = 0; + ArrayRef PrevRegMask; + InstructionCost Cost = 0; + processShuffleMasks( + NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {}, + [this, SingleOpTy, &PrevSrcReg, &PrevRegMask, + &Cost](ArrayRef RegMask, unsigned SrcReg, unsigned DestReg) { + if (!ShuffleVectorInst::isIdentityMask(RegMask)) { + // Check if the previous register can be just copied to the next + // one. + if (PrevRegMask.empty() || PrevSrcReg != SrcReg || + PrevRegMask != RegMask) + Cost += getShuffleCost(TTI::SK_PermuteSingleSrc, SingleOpTy, + RegMask, 0, nullptr); + else + // Just a copy of previous destination register. + Cost += TTI::TCC_Basic; + return; + } + if (SrcReg != DestReg && + any_of(RegMask, [](int I) { return I != UndefMaskElem; })) { + // Just a copy of the source register. + Cost += TTI::TCC_Basic; + } + PrevSrcReg = SrcReg; + PrevRegMask = RegMask; + }, + [this, SingleOpTy, &Cost](ArrayRef RegMask, + unsigned /*Unused*/, + unsigned /*Unused*/) { + Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, RegMask, + 0, nullptr); + }); + return Cost; + } + InstructionCost NumOfShuffles = (NumOfSrcs - 1) * NumOfDests; return NumOfShuffles * getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, None, 0, nullptr); @@ -1545,9 +1603,25 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, { TTI::SK_PermuteTwoSrc, MVT::v16i8, 13 }, // blend+permute }; - if (ST->hasSSE2()) + static const CostTblEntry SSE3BroadcastLoadTbl[] = { + {TTI::SK_Broadcast, MVT::v2f64, 0}, // broadcast handled by movddup + }; + + if (ST->hasSSE2()) { + bool IsLoad = + llvm::any_of(Args, [](const auto &V) { return isa(V); }); + if (ST->hasSSE3() && IsLoad) + if (const auto *Entry = + CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) { + assert(isLegalBroadcastLoad(BaseTp->getElementType(), + LT.second.getVectorElementCount()) && + "Table entry missing from isLegalBroadcastLoad()"); + return LT.first * Entry->Cost; + } + if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; + } static const CostTblEntry SSE1ShuffleTbl[] = { { TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps @@ -2444,6 +2518,10 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, std::pair LTDest = TLI->getTypeLegalizationCost(DL, Dst); + // If we're truncating to the same legalized type - just assume its free. + if (ISD == ISD::TRUNCATE && LTSrc.second == LTDest.second) + return TTI::TCC_Free; + if (ST->useAVX512Regs()) { if (ST->hasBWI()) if (const auto *Entry = ConvertCostTableLookup( @@ -2545,7 +2623,7 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - unsigned ExtraCost = 0; + InstructionCost ExtraCost = 0; if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) { // Some vector comparison predicates cost extra instructions. // TODO: Should we invert this and assume worst case cmp costs @@ -2619,15 +2697,29 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SETCC, MVT::v16f32, 1 }, { ISD::SELECT, MVT::v8i64, 1 }, + { ISD::SELECT, MVT::v4i64, 1 }, + { ISD::SELECT, MVT::v2i64, 1 }, { ISD::SELECT, MVT::v16i32, 1 }, + { ISD::SELECT, MVT::v8i32, 1 }, + { ISD::SELECT, MVT::v4i32, 1 }, { ISD::SELECT, MVT::v8f64, 1 }, + { ISD::SELECT, MVT::v4f64, 1 }, + { ISD::SELECT, MVT::v2f64, 1 }, + { ISD::SELECT, MVT::f64, 1 }, { ISD::SELECT, MVT::v16f32, 1 }, + { ISD::SELECT, MVT::v8f32 , 1 }, + { ISD::SELECT, MVT::v4f32, 1 }, + { ISD::SELECT, MVT::f32 , 1 }, { ISD::SETCC, MVT::v32i16, 2 }, // FIXME: should probably be 4 { ISD::SETCC, MVT::v64i8, 2 }, // FIXME: should probably be 4 - { ISD::SELECT, MVT::v32i16, 2 }, // FIXME: should be 3 - { ISD::SELECT, MVT::v64i8, 2 }, // FIXME: should be 3 + { ISD::SELECT, MVT::v32i16, 2 }, + { ISD::SELECT, MVT::v16i16, 1 }, + { ISD::SELECT, MVT::v8i16, 1 }, + { ISD::SELECT, MVT::v64i8, 2 }, + { ISD::SELECT, MVT::v32i8, 1 }, + { ISD::SELECT, MVT::v16i8, 1 }, }; static const CostTblEntry AVX2CostTbl[] = { @@ -2636,10 +2728,12 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SETCC, MVT::v16i16, 1 }, { ISD::SETCC, MVT::v32i8, 1 }, - { ISD::SELECT, MVT::v4i64, 1 }, // pblendvb - { ISD::SELECT, MVT::v8i32, 1 }, // pblendvb - { ISD::SELECT, MVT::v16i16, 1 }, // pblendvb - { ISD::SELECT, MVT::v32i8, 1 }, // pblendvb + { ISD::SELECT, MVT::v4f64, 2 }, // vblendvpd + { ISD::SELECT, MVT::v8f32, 2 }, // vblendvps + { ISD::SELECT, MVT::v4i64, 2 }, // pblendvb + { ISD::SELECT, MVT::v8i32, 2 }, // pblendvb + { ISD::SELECT, MVT::v16i16, 2 }, // pblendvb + { ISD::SELECT, MVT::v32i8, 2 }, // pblendvb }; static const CostTblEntry AVX1CostTbl[] = { @@ -2651,49 +2745,54 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SETCC, MVT::v16i16, 4 }, { ISD::SETCC, MVT::v32i8, 4 }, - { ISD::SELECT, MVT::v4f64, 1 }, // vblendvpd - { ISD::SELECT, MVT::v8f32, 1 }, // vblendvps - { ISD::SELECT, MVT::v4i64, 1 }, // vblendvpd - { ISD::SELECT, MVT::v8i32, 1 }, // vblendvps + { ISD::SELECT, MVT::v4f64, 3 }, // vblendvpd + { ISD::SELECT, MVT::v8f32, 3 }, // vblendvps + { ISD::SELECT, MVT::v4i64, 3 }, // vblendvpd + { ISD::SELECT, MVT::v8i32, 3 }, // vblendvps { ISD::SELECT, MVT::v16i16, 3 }, // vandps + vandnps + vorps { ISD::SELECT, MVT::v32i8, 3 }, // vandps + vandnps + vorps }; static const CostTblEntry SSE42CostTbl[] = { - { ISD::SETCC, MVT::v2f64, 1 }, - { ISD::SETCC, MVT::v4f32, 1 }, { ISD::SETCC, MVT::v2i64, 1 }, }; static const CostTblEntry SSE41CostTbl[] = { - { ISD::SELECT, MVT::v2f64, 1 }, // blendvpd - { ISD::SELECT, MVT::v4f32, 1 }, // blendvps - { ISD::SELECT, MVT::v2i64, 1 }, // pblendvb - { ISD::SELECT, MVT::v4i32, 1 }, // pblendvb - { ISD::SELECT, MVT::v8i16, 1 }, // pblendvb - { ISD::SELECT, MVT::v16i8, 1 }, // pblendvb + { ISD::SETCC, MVT::v2f64, 1 }, + { ISD::SETCC, MVT::v4f32, 1 }, + + { ISD::SELECT, MVT::v2f64, 2 }, // blendvpd + { ISD::SELECT, MVT::f64, 2 }, // blendvpd + { ISD::SELECT, MVT::v4f32, 2 }, // blendvps + { ISD::SELECT, MVT::f32 , 2 }, // blendvps + { ISD::SELECT, MVT::v2i64, 2 }, // pblendvb + { ISD::SELECT, MVT::v4i32, 2 }, // pblendvb + { ISD::SELECT, MVT::v8i16, 2 }, // pblendvb + { ISD::SELECT, MVT::v16i8, 2 }, // pblendvb }; static const CostTblEntry SSE2CostTbl[] = { { ISD::SETCC, MVT::v2f64, 2 }, { ISD::SETCC, MVT::f64, 1 }, - { ISD::SETCC, MVT::v2i64, 8 }, + { ISD::SETCC, MVT::v2i64, 5 }, // pcmpeqd/pcmpgtd expansion { ISD::SETCC, MVT::v4i32, 1 }, { ISD::SETCC, MVT::v8i16, 1 }, { ISD::SETCC, MVT::v16i8, 1 }, - { ISD::SELECT, MVT::v2f64, 3 }, // andpd + andnpd + orpd - { ISD::SELECT, MVT::v2i64, 3 }, // pand + pandn + por - { ISD::SELECT, MVT::v4i32, 3 }, // pand + pandn + por - { ISD::SELECT, MVT::v8i16, 3 }, // pand + pandn + por - { ISD::SELECT, MVT::v16i8, 3 }, // pand + pandn + por + { ISD::SELECT, MVT::v2f64, 2 }, // andpd + andnpd + orpd + { ISD::SELECT, MVT::f64, 2 }, // andpd + andnpd + orpd + { ISD::SELECT, MVT::v2i64, 2 }, // pand + pandn + por + { ISD::SELECT, MVT::v4i32, 2 }, // pand + pandn + por + { ISD::SELECT, MVT::v8i16, 2 }, // pand + pandn + por + { ISD::SELECT, MVT::v16i8, 2 }, // pand + pandn + por }; static const CostTblEntry SSE1CostTbl[] = { { ISD::SETCC, MVT::v4f32, 2 }, { ISD::SETCC, MVT::f32, 1 }, - { ISD::SELECT, MVT::v4f32, 3 }, // andps + andnps + orps + { ISD::SELECT, MVT::v4f32, 2 }, // andps + andnps + orps + { ISD::SELECT, MVT::f32, 2 }, // andps + andnps + orps }; if (ST->useSLMArithCosts()) @@ -3555,7 +3654,7 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, assert(Val->isVectorTy() && "This must be a vector type"); Type *ScalarType = Val->getScalarType(); - int RegisterFileMoveCost = 0; + InstructionCost RegisterFileMoveCost = 0; // Non-immediate extraction/insertion can be handled as a sequence of // aliased loads+stores via the stack. @@ -3589,6 +3688,12 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, if (Index != -1U && (Opcode == Instruction::ExtractElement || Opcode == Instruction::InsertElement)) { + // Extraction of vXi1 elements are now efficiently handled by MOVMSK. + if (Opcode == Instruction::ExtractElement && + ScalarType->getScalarSizeInBits() == 1 && + cast(Val)->getNumElements() > 1) + return 1; + // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Val); @@ -3597,15 +3702,16 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, return 0; // The type may be split. Normalize the index to the new type. + unsigned SizeInBits = LT.second.getSizeInBits(); unsigned NumElts = LT.second.getVectorNumElements(); unsigned SubNumElts = NumElts; Index = Index % NumElts; // For >128-bit vectors, we need to extract higher 128-bit subvectors. // For inserts, we also need to insert the subvector back. - if (LT.second.getSizeInBits() > 128) { - assert((LT.second.getSizeInBits() % 128) == 0 && "Illegal vector"); - unsigned NumSubVecs = LT.second.getSizeInBits() / 128; + if (SizeInBits > 128) { + assert((SizeInBits % 128) == 0 && "Illegal vector"); + unsigned NumSubVecs = SizeInBits / 128; SubNumElts = NumElts / NumSubVecs; if (SubNumElts <= Index) { RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1); @@ -3673,20 +3779,25 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) { + assert(DemandedElts.getBitWidth() == + cast(Ty)->getNumElements() && + "Vector size mismatch"); + + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + MVT MScalarTy = LT.second.getScalarType(); + unsigned SizeInBits = LT.second.getSizeInBits(); + InstructionCost Cost = 0; // For insertions, a ISD::BUILD_VECTOR style vector initialization can be much // cheaper than an accumulation of ISD::INSERT_VECTOR_ELT. if (Insert) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); - MVT MScalarTy = LT.second.getScalarType(); - if ((MScalarTy == MVT::i16 && ST->hasSSE2()) || (MScalarTy.isInteger() && ST->hasSSE41()) || (MScalarTy == MVT::f32 && ST->hasSSE41())) { // For types we can insert directly, insertion into 128-bit sub vectors is // cheap, followed by a cheap chain of concatenations. - if (LT.second.getSizeInBits() <= 128) { + if (SizeInBits <= 128) { Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, false); } else { @@ -3704,9 +3815,9 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty, // Case#3: inserting into 4,5,6,7 index needs 4*vpinsrd + inserti128. const int CostValue = *LT.first.getValue(); assert(CostValue >= 0 && "Negative cost!"); - unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * CostValue; + unsigned Num128Lanes = SizeInBits / 128 * CostValue; unsigned NumElts = LT.second.getVectorNumElements() * CostValue; - APInt WidenedDemandedElts = DemandedElts.zextOrSelf(NumElts); + APInt WidenedDemandedElts = DemandedElts.zext(NumElts); unsigned Scale = NumElts / Num128Lanes; // We iterate each 128-lane, and check if we need a // extracti128/inserti128 for this 128-lane. @@ -3747,10 +3858,59 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty, } } - // TODO: Use default extraction for now, but we should investigate extending this - // to handle repeated subvector extraction. - if (Extract) + if (Extract) { + // vXi1 can be efficiently extracted with MOVMSK. + // TODO: AVX512 predicate mask handling. + // NOTE: This doesn't work well for roundtrip scalarization. + if (!Insert && Ty->getScalarSizeInBits() == 1 && !ST->hasAVX512()) { + unsigned NumElts = cast(Ty)->getNumElements(); + unsigned MaxElts = ST->hasAVX2() ? 32 : 16; + unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts; + return MOVMSKCost; + } + + if (LT.second.isVector()) { + int CostValue = *LT.first.getValue(); + assert(CostValue >= 0 && "Negative cost!"); + + unsigned NumElts = LT.second.getVectorNumElements() * CostValue; + assert(NumElts >= DemandedElts.getBitWidth() && + "Vector has been legalized to smaller element count"); + + // If we're extracting elements from a 128-bit subvector lane, we only need + // to extract each lane once, not for every element. + if (SizeInBits > 128) { + assert((SizeInBits % 128) == 0 && "Illegal vector"); + unsigned NumLegal128Lanes = SizeInBits / 128; + unsigned Num128Lanes = NumLegal128Lanes * CostValue; + APInt WidenedDemandedElts = DemandedElts.zext(NumElts); + unsigned Scale = NumElts / Num128Lanes; + + // Add cost for each demanded 128-bit subvector extraction. + // Luckily this is a lot easier than for insertion. + APInt DemandedUpper128Lanes = + APIntOps::ScaleBitMask(WidenedDemandedElts, Num128Lanes); + auto *Ty128 = FixedVectorType::get(Ty->getElementType(), Scale); + for (unsigned I = 0; I != Num128Lanes; ++I) + if (DemandedUpper128Lanes[I]) + Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, + I * Scale, Ty128); + + // Add all the demanded element extractions together, but adjust the + // index to use the equivalent of the bottom 128 bit lane. + for (unsigned I = 0; I != NumElts; ++I) + if (WidenedDemandedElts[I]) { + unsigned Idx = I % Scale; + Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, Idx); + } + + return Cost; + } + } + + // Fallback to default extraction. Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, false, Extract); + } return Cost; } @@ -3855,8 +4015,7 @@ X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, // if all elements that will form a single Dst vector aren't demanded, // then we won't need to do that shuffle, so adjust the cost accordingly. APInt DemandedDstVectors = APIntOps::ScaleBitMask( - DemandedDstElts.zextOrSelf(NumDstVectors * NumEltsPerDstVec), - NumDstVectors); + DemandedDstElts.zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors); unsigned NumDstVectorsDemanded = DemandedDstVectors.countPopulation(); InstructionCost SingleShuffleCost = @@ -5029,8 +5188,8 @@ InstructionCost X86TTIImpl::getGatherScatterOpCost( return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace); } -bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2) { +bool X86TTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) { // X86 specific here are "instruction number 1st priority". return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, @@ -5110,6 +5269,14 @@ bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) { return true; } +bool X86TTIImpl::isLegalBroadcastLoad(Type *ElementTy, + ElementCount NumElements) const { + // movddup + return ST->hasSSE3() && !NumElements.isScalable() && + NumElements.getFixedValue() == 2 && + ElementTy == Type::getDoubleTy(ElementTy->getContext()); +} + bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) { if (!isa(DataTy)) return false; @@ -5174,6 +5341,39 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) { return IntWidth == 32 || IntWidth == 64; } +bool X86TTIImpl::isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, + unsigned Opcode1, + const SmallBitVector &OpcodeMask) const { + // ADDSUBPS 4xf32 SSE3 + // VADDSUBPS 4xf32 AVX + // VADDSUBPS 8xf32 AVX2 + // ADDSUBPD 2xf64 SSE3 + // VADDSUBPD 2xf64 AVX + // VADDSUBPD 4xf64 AVX2 + + unsigned NumElements = cast(VecTy)->getNumElements(); + assert(OpcodeMask.size() == NumElements && "Mask and VecTy are incompatible"); + if (!isPowerOf2_32(NumElements)) + return false; + // Check the opcode pattern. We apply the mask on the opcode arguments and + // then check if it is what we expect. + for (int Lane : seq(0, NumElements)) { + unsigned Opc = OpcodeMask.test(Lane) ? Opcode1 : Opcode0; + // We expect FSub for even lanes and FAdd for odd lanes. + if (Lane % 2 == 0 && Opc != Instruction::FSub) + return false; + if (Lane % 2 == 1 && Opc != Instruction::FAdd) + return false; + } + // Now check that the pattern is supported by the target ISA. + Type *ElemTy = cast(VecTy)->getElementType(); + if (ElemTy->isFloatTy()) + return ST->hasSSE3() && NumElements % 4 == 0; + if (ElemTy->isDoubleTy()) + return ST->hasSSE3() && NumElements % 2 == 0; + return false; +} + bool X86TTIImpl::isLegalMaskedScatter(Type *DataType, Align Alignment) { // AVX2 doesn't support scatter if (!ST->hasAVX512()) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 69715072426f..bd3c3fb1bb2f 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -38,12 +38,12 @@ class X86TTIImpl : public BasicTTIImplBase { const FeatureBitset InlineFeatureIgnoreList = { // This indicates the CPU is 64 bit capable not that we are in 64-bit // mode. - X86::Feature64Bit, + X86::FeatureX86_64, // These features don't have any intrinsics or ABI effect. X86::FeatureNOPL, - X86::FeatureCMPXCHG16B, - X86::FeatureLAHFSAHF, + X86::FeatureCX16, + X86::FeatureLAHFSAHF64, // Some older targets can be setup to fold unaligned loads. X86::FeatureSSEUnalignedMem, @@ -68,6 +68,11 @@ class X86TTIImpl : public BasicTTIImplBase { X86::TuningMacroFusion, X86::TuningPadShortFunctions, X86::TuningPOPCNTFalseDeps, + X86::TuningMULCFalseDeps, + X86::TuningPERMFalseDeps, + X86::TuningRANGEFalseDeps, + X86::TuningGETMANTFalseDeps, + X86::TuningMULLQFalseDeps, X86::TuningSlow3OpsLEA, X86::TuningSlowDivide32, X86::TuningSlowDivide64, @@ -131,7 +136,8 @@ public: const Instruction *CxtI = nullptr); InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, - VectorType *SubTp); + VectorType *SubTp, + ArrayRef Args = None); InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, @@ -219,13 +225,14 @@ public: InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); - bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2); + bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2); bool canMacroFuseCmp(); bool isLegalMaskedLoad(Type *DataType, Align Alignment); bool isLegalMaskedStore(Type *DataType, Align Alignment); bool isLegalNTLoad(Type *DataType, Align Alignment); bool isLegalNTStore(Type *DataType, Align Alignment); + bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const; bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment); bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) { return forceScalarizeMaskedGather(VTy, Alignment); @@ -234,6 +241,8 @@ public: bool isLegalMaskedScatter(Type *DataType, Align Alignment); bool isLegalMaskedExpandLoad(Type *DataType); bool isLegalMaskedCompressStore(Type *DataType); + bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, + const SmallBitVector &OpcodeMask) const; bool hasDivRemOp(Type *DataType, bool IsSigned); bool isFCmpOrdCheaperThanFCmpZero(Type *Ty); bool areInlineCompatible(const Function *Caller, diff --git a/llvm/lib/Target/X86/X86TileConfig.cpp b/llvm/lib/Target/X86/X86TileConfig.cpp index 8114a0b2d423..5cada924e006 100644 --- a/llvm/lib/Target/X86/X86TileConfig.cpp +++ b/llvm/lib/Target/X86/X86TileConfig.cpp @@ -36,7 +36,7 @@ using namespace llvm; -#define DEBUG_TYPE "tile-config" +#define DEBUG_TYPE "tileconfig" namespace { @@ -70,11 +70,11 @@ struct X86TileConfig : public MachineFunctionPass { char X86TileConfig::ID = 0; -INITIALIZE_PASS_BEGIN(X86TileConfig, "tileconfig", "Tile Register Configure", +INITIALIZE_PASS_BEGIN(X86TileConfig, DEBUG_TYPE, "Tile Register Configure", false, false) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) -INITIALIZE_PASS_END(X86TileConfig, "tileconfig", "Tile Register Configure", - false, false) +INITIALIZE_PASS_END(X86TileConfig, DEBUG_TYPE, "Tile Register Configure", false, + false) bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) { const X86Subtarget &ST = MF.getSubtarget(); @@ -90,7 +90,7 @@ bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) { int SS = INT_MAX; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { - if (MI.getOpcode() == X86::LDTILECFG) { + if (MI.getOpcode() == X86::PLDTILECFGV) { SS = MI.getOperand(0).getIndex(); break; } @@ -98,6 +98,9 @@ bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) { if (SS != INT_MAX) break; } + // Didn't find PLDTILECFGV, just return false; + if (SS == INT_MAX) + return false; // Try to find a point to insert MIs for constant shapes. // Here we are leveraging the palette id inserted in PreRA pass. @@ -120,6 +123,8 @@ bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) { continue; if (MRI.getRegClass(VirtReg)->getID() != X86::TILERegClassID) continue; + if (VRM.getPhys(VirtReg) == VirtRegMap::NO_PHYS_REG) + continue; unsigned Index = VRM.getPhys(VirtReg) - X86::TMM0; if (!Phys2Virt[Index]) Phys2Virt[Index] = VirtReg; diff --git a/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index f6b97e9e84b3..57801752f170 100644 --- a/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -15,8 +15,8 @@ #include "XCore.h" #include "XCoreRegisterInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" @@ -66,140 +66,116 @@ static bool readInstruction32(ArrayRef Bytes, uint64_t Address, return true; } -static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { - const XCoreDisassembler *Dis = static_cast(D); - const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo(); +static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo) { + const MCRegisterInfo *RegInfo = D->getContext().getRegisterInfo(); return *(RegInfo->getRegClass(RC).begin() + RegNo); } -static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeNegImmOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); -static DecodeStatus Decode2RInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus Decode2RImmInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus Decode2RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeR2RInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus Decode2RSrcDstInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeRUSInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeRUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeRUSBitpInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeRUSSrcDstBitpInstruction(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); +static DecodeStatus +DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); -static DecodeStatus DecodeL2RInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeL2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeLR2RInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus Decode3RInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus Decode3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus Decode3RImmInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus Decode3RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus Decode2RUSInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus Decode2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus Decode2RUSBitpInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeL3RInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeL3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeL3RSrcDstInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeL2RUSInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeL2RUSBitpInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeL6RInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeL6RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeL5RInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeL5RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeL4RSrcDstInstruction(MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeL4RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); -static DecodeStatus DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); +static DecodeStatus +DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); #include "XCoreGenDisassemblerTables.inc" -static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) -{ + const MCDisassembler *Decoder) { if (RegNo > 11) return MCDisassembler::Fail; unsigned Reg = getReg(Decoder, XCore::GRRegsRegClassID, RegNo); @@ -207,11 +183,9 @@ static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst, - unsigned RegNo, +static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) -{ + const MCDisassembler *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; unsigned Reg = getReg(Decoder, XCore::RRegsRegClassID, RegNo); @@ -220,7 +194,8 @@ static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst, } static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (Val > 11) return MCDisassembler::Fail; static const unsigned Values[] = { @@ -231,7 +206,8 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeNegImmOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(-(int64_t)Val)); return MCDisassembler::Success; } @@ -270,9 +246,9 @@ Decode3OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2, return MCDisassembler::Success; } -static DecodeStatus -Decode2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus Decode2OpInstructionFail(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { // Try and decode as a 3R instruction. unsigned Opcode = fieldFromInstruction(Insn, 11, 5); switch (Opcode) { @@ -340,9 +316,9 @@ Decode2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; } -static DecodeStatus -Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus Decode2RInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); if (S != MCDisassembler::Success) @@ -353,9 +329,9 @@ Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -Decode2RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus Decode2RImmInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); if (S != MCDisassembler::Success) @@ -366,9 +342,9 @@ Decode2RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeR2RInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op2, Op1); if (S != MCDisassembler::Success) @@ -379,9 +355,9 @@ DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); if (S != MCDisassembler::Success) @@ -393,9 +369,9 @@ Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -DecodeRUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeRUSInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); if (S != MCDisassembler::Success) @@ -406,9 +382,9 @@ DecodeRUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); if (S != MCDisassembler::Success) @@ -421,7 +397,7 @@ DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, static DecodeStatus DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); if (S != MCDisassembler::Success) @@ -433,9 +409,9 @@ DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { // Try and decode as a L3R / L2RUS instruction. unsigned Opcode = fieldFromInstruction(Insn, 16, 4) | fieldFromInstruction(Insn, 27, 5) << 4; @@ -504,9 +480,9 @@ DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; } -static DecodeStatus -DecodeL2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeL2RInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2); @@ -518,9 +494,9 @@ DecodeL2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2); @@ -532,9 +508,9 @@ DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -Decode3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus Decode3RInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2, Op3; DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3); if (S == MCDisassembler::Success) { @@ -545,9 +521,9 @@ Decode3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -Decode3RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus Decode3RImmInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2, Op3; DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3); if (S == MCDisassembler::Success) { @@ -558,9 +534,9 @@ Decode3RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -Decode2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus Decode2RUSInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2, Op3; DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3); if (S == MCDisassembler::Success) { @@ -571,9 +547,9 @@ Decode2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2, Op3; DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3); if (S == MCDisassembler::Success) { @@ -584,9 +560,9 @@ Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -DecodeL3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeL3RInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2, Op3; DecodeStatus S = Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); @@ -598,9 +574,9 @@ DecodeL3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2, Op3; DecodeStatus S = Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); @@ -613,9 +589,9 @@ DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2, Op3; DecodeStatus S = Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); @@ -627,9 +603,9 @@ DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2, Op3; DecodeStatus S = Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); @@ -641,9 +617,9 @@ DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -DecodeL6RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeL6RInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2, Op3, Op4, Op5, Op6; DecodeStatus S = Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); @@ -661,9 +637,9 @@ DecodeL6RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -DecodeL5RInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeL5RInstructionFail(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { // Try and decode as a L6R instruction. Inst.clear(); unsigned Opcode = fieldFromInstruction(Insn, 27, 5); @@ -675,9 +651,9 @@ DecodeL5RInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; } -static DecodeStatus -DecodeL5RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeL5RInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2, Op3, Op4, Op5; DecodeStatus S = Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); @@ -695,9 +671,9 @@ DecodeL5RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus -DecodeL4RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeL4RSrcDstInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Op1, Op2, Op3; unsigned Op4 = fieldFromInstruction(Insn, 16, 4); DecodeStatus S = @@ -716,7 +692,7 @@ DecodeL4RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, static DecodeStatus DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { unsigned Op1, Op2, Op3; unsigned Op4 = fieldFromInstruction(Insn, 16, 4); DecodeStatus S = diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h index 0ea47106434c..a8801fc2c5bc 100644 --- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h +++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h @@ -15,10 +15,10 @@ #ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREINSTPRINTER_H #define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREINSTPRINTER_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInstPrinter.h" namespace llvm { +class StringRef; class XCoreInstPrinter : public MCInstPrinter { public: @@ -39,7 +39,6 @@ private: void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O); void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O); }; } // end namespace llvm diff --git a/llvm/lib/Target/XCore/XCore.h b/llvm/lib/Target/XCore/XCore.h index d31c34910ef6..6118775d16fe 100644 --- a/llvm/lib/Target/XCore/XCore.h +++ b/llvm/lib/Target/XCore/XCore.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_XCORE_XCORE_H #include "MCTargetDesc/XCoreMCTargetDesc.h" +#include "llvm/PassRegistry.h" #include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp index 38b613700674..8fea61d125d2 100644 --- a/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -110,7 +110,7 @@ void XCoreAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { return; const DataLayout &DL = getDataLayout(); - OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(GV, TM)); + OutStreamer->switchSection(getObjFileLowering().SectionForGlobal(GV, TM)); MCSymbol *GVSym = getSymbol(GV); const Constant *C = GV->getInitializer(); diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp index 7c86262269fc..70a1901bb04f 100644 --- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -167,10 +167,8 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM, = MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 2; // We have target-specific dag combine patterns for the following nodes: - setTargetDAGCombine(ISD::STORE); - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::INTRINSIC_VOID); - setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); + setTargetDAGCombine( + {ISD::STORE, ISD::ADD, ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN}); setMinFunctionAlignment(Align(2)); setPrefFunctionAlignment(Align(4)); @@ -442,7 +440,7 @@ SDValue XCoreTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } } - if (LD->getAlignment() == 2) { + if (LD->getAlign() == Align(2)) { SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain, BasePtr, LD->getPointerInfo(), MVT::i16, Align(2), LD->getMemOperand()->getFlags()); @@ -497,7 +495,7 @@ SDValue XCoreTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue Value = ST->getValue(); SDLoc dl(Op); - if (ST->getAlignment() == 2) { + if (ST->getAlign() == Align(2)) { SDValue Low = Value; SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value, DAG.getConstant(16, dl, MVT::i32)); @@ -941,25 +939,25 @@ LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { N->getSuccessOrdering() == AtomicOrdering::Monotonic) && "setInsertFencesForAtomic(true) expects unordered / monotonic"); if (N->getMemoryVT() == MVT::i32) { - if (N->getAlignment() < 4) + if (N->getAlign() < Align(4)) report_fatal_error("atomic load must be aligned"); return DAG.getLoad(getPointerTy(DAG.getDataLayout()), SDLoc(Op), N->getChain(), N->getBasePtr(), N->getPointerInfo(), - N->getAlignment(), N->getMemOperand()->getFlags(), + N->getAlign(), N->getMemOperand()->getFlags(), N->getAAInfo(), N->getRanges()); } if (N->getMemoryVT() == MVT::i16) { - if (N->getAlignment() < 2) + if (N->getAlign() < Align(2)) report_fatal_error("atomic load must be aligned"); return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(), N->getBasePtr(), N->getPointerInfo(), MVT::i16, - N->getAlignment(), N->getMemOperand()->getFlags(), + N->getAlign(), N->getMemOperand()->getFlags(), N->getAAInfo()); } if (N->getMemoryVT() == MVT::i8) return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(), N->getBasePtr(), N->getPointerInfo(), MVT::i8, - N->getAlignment(), N->getMemOperand()->getFlags(), + N->getAlign(), N->getMemOperand()->getFlags(), N->getAAInfo()); return SDValue(); } @@ -972,24 +970,24 @@ LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const { N->getSuccessOrdering() == AtomicOrdering::Monotonic) && "setInsertFencesForAtomic(true) expects unordered / monotonic"); if (N->getMemoryVT() == MVT::i32) { - if (N->getAlignment() < 4) + if (N->getAlign() < Align(4)) report_fatal_error("atomic store must be aligned"); return DAG.getStore(N->getChain(), SDLoc(Op), N->getVal(), N->getBasePtr(), - N->getPointerInfo(), N->getAlignment(), + N->getPointerInfo(), N->getAlign(), N->getMemOperand()->getFlags(), N->getAAInfo()); } if (N->getMemoryVT() == MVT::i16) { - if (N->getAlignment() < 2) + if (N->getAlign() < Align(2)) report_fatal_error("atomic store must be aligned"); return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(), N->getBasePtr(), N->getPointerInfo(), MVT::i16, - N->getAlignment(), N->getMemOperand()->getFlags(), + N->getAlign(), N->getMemOperand()->getFlags(), N->getAAInfo()); } if (N->getMemoryVT() == MVT::i8) return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(), N->getBasePtr(), N->getPointerInfo(), MVT::i8, - N->getAlignment(), N->getMemOperand()->getFlags(), + N->getAlign(), N->getMemOperand()->getFlags(), N->getAAInfo()); return SDValue(); } @@ -1791,17 +1789,17 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits(); assert((StoreBits % 8) == 0 && "Store size in bits must be a multiple of 8"); - unsigned Alignment = ST->getAlignment(); + Align Alignment = ST->getAlign(); if (LoadSDNode *LD = dyn_cast(ST->getValue())) { if (LD->hasNUsesOfValue(1, 0) && ST->getMemoryVT() == LD->getMemoryVT() && - LD->getAlignment() == Alignment && + LD->getAlign() == Alignment && !LD->isVolatile() && !LD->isIndexed() && Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) { bool isTail = isInTailCallPosition(DAG, ST, Chain); return DAG.getMemmove(Chain, dl, ST->getBasePtr(), LD->getBasePtr(), DAG.getConstant(StoreBits / 8, dl, MVT::i32), - Align(Alignment), false, isTail, + Alignment, false, isTail, ST->getPointerInfo(), LD->getPointerInfo()); } } diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.td b/llvm/lib/Target/XCore/XCoreInstrInfo.td index aa3739d0335e..23f80b126404 100644 --- a/llvm/lib/Target/XCore/XCoreInstrInfo.td +++ b/llvm/lib/Target/XCore/XCoreInstrInfo.td @@ -363,7 +363,7 @@ let usesCustomInserter = 1 in { (select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>; } -let hasSideEffects = 1 in +let hasSideEffects = 1, isMeta = 1 in def Int_MemBarrier : PseudoInstXCore<(outs), (ins), "#MEMBARRIER", [(XCoreMemBarrier)]>; diff --git a/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp index ec44d2899dd5..f039f4f67955 100644 --- a/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp +++ b/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp @@ -15,6 +15,13 @@ using namespace llvm; void XCoreFunctionInfo::anchor() { } +MachineFunctionInfo *XCoreFunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const { + return DestMF.cloneInfo(*this); +} + bool XCoreFunctionInfo::isLargeFrame(const MachineFunction &MF) const { if (CachedEStackSize == -1) { CachedEStackSize = MF.getFrameInfo().estimateStackSize(MF); diff --git a/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h b/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h index aebe11b15b54..6cdb1239750a 100644 --- a/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h +++ b/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h @@ -45,6 +45,11 @@ public: explicit XCoreFunctionInfo(MachineFunction &MF) {} + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap &Src2DstMBB) + const override; + ~XCoreFunctionInfo() override = default; void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; } diff --git a/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/llvm/lib/Target/XCore/XCoreTargetMachine.cpp index 2e49627a19bf..3c27fcd9ba53 100644 --- a/llvm/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/llvm/lib/Target/XCore/XCoreTargetMachine.cpp @@ -26,7 +26,7 @@ using namespace llvm; static Reloc::Model getEffectiveRelocModel(Optional RM) { - return RM.getValueOr(Reloc::Static); + return RM.value_or(Reloc::Static); } static CodeModel::Model @@ -108,6 +108,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeXCoreTarget() { } TargetTransformInfo -XCoreTargetMachine::getTargetTransformInfo(const Function &F) { +XCoreTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(XCoreTTIImpl(this, F)); } diff --git a/llvm/lib/Target/XCore/XCoreTargetMachine.h b/llvm/lib/Target/XCore/XCoreTargetMachine.h index 9c3bdcf78f9c..a4754fd77e65 100644 --- a/llvm/lib/Target/XCore/XCoreTargetMachine.h +++ b/llvm/lib/Target/XCore/XCoreTargetMachine.h @@ -15,13 +15,13 @@ #include "XCoreSubtarget.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" #include namespace llvm { +class StringRef; class XCoreTargetMachine : public LLVMTargetMachine { std::unique_ptr TLOF; @@ -42,7 +42,7 @@ public: // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); diff --git a/llvm/lib/Testing/Support/Annotations.cpp b/llvm/lib/Testing/Support/Annotations.cpp index 44d3acccfdb2..557b6cdf98ce 100644 --- a/llvm/lib/Testing/Support/Annotations.cpp +++ b/llvm/lib/Testing/Support/Annotations.cpp @@ -33,12 +33,12 @@ Annotations::Annotations(llvm::StringRef Text) { Code.reserve(Text.size()); while (!Text.empty()) { if (Text.consume_front("^")) { - Points[Name.getValueOr("")].push_back(Code.size()); + Points[Name.value_or("")].push_back(Code.size()); Name = llvm::None; continue; } if (Text.consume_front("[[")) { - OpenRanges.emplace_back(Name.getValueOr(""), Code.size()); + OpenRanges.emplace_back(Name.value_or(""), Code.size()); Name = llvm::None; continue; } diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp index 8f69282d3443..5f4d0cdf2b57 100644 --- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp +++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp @@ -77,7 +77,7 @@ static std::vector getSearchPaths(opt::InputArgList *Args, // Add $LIB. Optional EnvOpt = sys::Process::GetEnv("LIB"); - if (!EnvOpt.hasValue()) + if (!EnvOpt) return Ret; StringRef Env = Saver.save(*EnvOpt); while (!Env.empty()) { @@ -229,10 +229,11 @@ static void appendFile(std::vector &Members, (Magic == file_magic::coff_object) ? getCOFFFileMachine(MB) : getBitcodeFileMachine(MB); if (!MaybeFileMachine) { - handleAllErrors(MaybeFileMachine.takeError(), [&](const ErrorInfoBase &EIB) { - llvm::errs() << MB.getBufferIdentifier() << ": " << EIB.message() - << "\n"; - }); + handleAllErrors(MaybeFileMachine.takeError(), + [&](const ErrorInfoBase &EIB) { + llvm::errs() << MB.getBufferIdentifier() << ": " + << EIB.message() << "\n"; + }); exit(1); } COFF::MachineTypes FileMachine = *MaybeFileMachine; @@ -291,10 +292,25 @@ int llvm::libDriverMain(ArrayRef ArgsArr) { return 0; } + // Parse /ignore: + llvm::StringSet<> IgnoredWarnings; + for (auto *Arg : Args.filtered(OPT_ignore)) + IgnoredWarnings.insert(Arg->getValue()); + // If no input files and not told otherwise, silently do nothing to match // lib.exe - if (!Args.hasArgNoClaim(OPT_INPUT) && !Args.hasArg(OPT_llvmlibempty)) + if (!Args.hasArgNoClaim(OPT_INPUT) && !Args.hasArg(OPT_llvmlibempty)) { + if (!IgnoredWarnings.contains("emptyoutput")) { + llvm::errs() << "warning: no input files, not writing output file\n"; + llvm::errs() << " pass /llvmlibempty to write empty .lib file,\n"; + llvm::errs() << " pass /ignore:emptyoutput to suppress warning\n"; + if (Args.hasFlag(OPT_WX, OPT_WX_no, false)) { + llvm::errs() << "treating warning as error due to /WX\n"; + return 1; + } + } return 0; + } if (Args.hasArg(OPT_lst)) { doList(Args); diff --git a/llvm/lib/ToolDrivers/llvm-lib/Options.td b/llvm/lib/ToolDrivers/llvm-lib/Options.td index 5891e238a328..0d97f77e525f 100644 --- a/llvm/lib/ToolDrivers/llvm-lib/Options.td +++ b/llvm/lib/ToolDrivers/llvm-lib/Options.td @@ -9,6 +9,14 @@ class F : Flag<["/", "-", "/?", "-?"], name>; class P : Joined<["/", "-", "/?", "-?"], name#":">, HelpText; +// Boolean flag which can be suffixed by ":no". Using it unsuffixed turns the +// flag on and using it suffixed by ":no" turns it off. +multiclass B { + def "" : F, HelpText; + def _no : F, HelpText; +} + +def ignore : P<"ignore", "Specify warning codes to ignore">; def libpath: P<"libpath", "Object file search path">; // Can't be called "list" since that's a keyword. @@ -23,6 +31,9 @@ def llvmlibempty : F<"llvmlibempty">, def machine: P<"machine", "Specify target platform">; +defm WX : B<"WX", "Treat warnings as errors", + "Don't treat warnings as errors (default)">; + def help : F<"help">; // /?? and -?? must be before /? and -? to not confuse lib/Options. @@ -32,7 +43,6 @@ def help_q : Flag<["/??", "-??", "/?", "-?"], "">, Alias; // The flags below do nothing. They are defined only for lib.exe compatibility. //============================================================================== -class QF : Joined<["/", "-", "/?", "-?"], name#":">; - -def ignore : QF<"ignore">; +def ltcg : F<"ltcg">; def nologo : F<"nologo">; +def subsystem : P<"subsystem", "">; diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 7243e39c9029..1fd8b88dd776 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -22,8 +22,8 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -36,6 +36,10 @@ using namespace llvm; using namespace PatternMatch; +namespace llvm { +class DataLayout; +} + #define DEBUG_TYPE "aggressive-instcombine" STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded"); @@ -200,14 +204,13 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) { /// of 'and' ops, then we also need to capture the fact that we saw an /// "and X, 1", so that's an extra return value for that case. struct MaskOps { - Value *Root; + Value *Root = nullptr; APInt Mask; bool MatchAndChain; - bool FoundAnd1; + bool FoundAnd1 = false; MaskOps(unsigned BitWidth, bool MatchAnds) - : Root(nullptr), Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds), - FoundAnd1(false) {} + : Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds) {} }; /// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a @@ -363,10 +366,72 @@ static bool tryToRecognizePopCount(Instruction &I) { return false; } +/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and +/// C2 saturate the value of the fp conversion. The transform is not reversable +/// as the fptosi.sat is more defined than the input - all values produce a +/// valid value for the fptosi.sat, where as some produce poison for original +/// that were out of range of the integer conversion. The reversed pattern may +/// use fmax and fmin instead. As we cannot directly reverse the transform, and +/// it is not always profitable, we make it conditional on the cost being +/// reported as lower by TTI. +static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) { + // Look for min(max(fptosi, converting to fptosi_sat. + Value *In; + const APInt *MinC, *MaxC; + if (!match(&I, m_SMax(m_OneUse(m_SMin(m_OneUse(m_FPToSI(m_Value(In))), + m_APInt(MinC))), + m_APInt(MaxC))) && + !match(&I, m_SMin(m_OneUse(m_SMax(m_OneUse(m_FPToSI(m_Value(In))), + m_APInt(MaxC))), + m_APInt(MinC)))) + return false; + + // Check that the constants clamp a saturate. + if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1) + return false; + + Type *IntTy = I.getType(); + Type *FpTy = In->getType(); + Type *SatTy = + IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1); + if (auto *VecTy = dyn_cast(IntTy)) + SatTy = VectorType::get(SatTy, VecTy->getElementCount()); + + // Get the cost of the intrinsic, and check that against the cost of + // fptosi+smin+smax + InstructionCost SatCost = TTI.getIntrinsicInstrCost( + IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}), + TTI::TCK_RecipThroughput); + SatCost += TTI.getCastInstrCost(Instruction::SExt, SatTy, IntTy, + TTI::CastContextHint::None, + TTI::TCK_RecipThroughput); + + InstructionCost MinMaxCost = TTI.getCastInstrCost( + Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None, + TTI::TCK_RecipThroughput); + MinMaxCost += TTI.getIntrinsicInstrCost( + IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}), + TTI::TCK_RecipThroughput); + MinMaxCost += TTI.getIntrinsicInstrCost( + IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}), + TTI::TCK_RecipThroughput); + + if (SatCost >= MinMaxCost) + return false; + + IRBuilder<> Builder(&I); + Function *Fn = Intrinsic::getDeclaration(I.getModule(), Intrinsic::fptosi_sat, + {SatTy, FpTy}); + Value *Sat = Builder.CreateCall(Fn, In); + I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy)); + return true; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. -static bool foldUnusualPatterns(Function &F, DominatorTree &DT) { +static bool foldUnusualPatterns(Function &F, DominatorTree &DT, + TargetTransformInfo &TTI) { bool MadeChange = false; for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. @@ -382,6 +447,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) { MadeChange |= foldAnyOrAllBitsSet(I); MadeChange |= foldGuardedFunnelShift(I, DT); MadeChange |= tryToRecognizePopCount(I); + MadeChange |= tryToFPToSat(I, TTI); } } @@ -395,13 +461,13 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) { /// This is the entry point for all transforms. Pass manager differences are /// handled in the callers of this function. -static bool runImpl(Function &F, AssumptionCache &AC, TargetLibraryInfo &TLI, - DominatorTree &DT) { +static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, + TargetLibraryInfo &TLI, DominatorTree &DT) { bool MadeChange = false; const DataLayout &DL = F.getParent()->getDataLayout(); TruncInstCombine TIC(AC, TLI, DL, DT); MadeChange |= TIC.run(F); - MadeChange |= foldUnusualPatterns(F, DT); + MadeChange |= foldUnusualPatterns(F, DT, TTI); return MadeChange; } @@ -411,6 +477,7 @@ void AggressiveInstCombinerLegacyPass::getAnalysisUsage( AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); @@ -421,7 +488,8 @@ bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) { auto &AC = getAnalysis().getAssumptionCache(F); auto &TLI = getAnalysis().getTLI(F); auto &DT = getAnalysis().getDomTree(); - return runImpl(F, AC, TLI, DT); + auto &TTI = getAnalysis().getTTI(F); + return runImpl(F, AC, TTI, TLI, DT); } PreservedAnalyses AggressiveInstCombinePass::run(Function &F, @@ -429,7 +497,8 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F, auto &AC = AM.getResult(F); auto &TLI = AM.getResult(F); auto &DT = AM.getResult(F); - if (!runImpl(F, AC, TLI, DT)) { + auto &TTI = AM.getResult(F); + if (!runImpl(F, AC, TTI, TLI, DT)) { // No changes, all analyses are preserved. return PreservedAnalyses::all(); } @@ -446,6 +515,7 @@ INITIALIZE_PASS_BEGIN(AggressiveInstCombinerLegacyPass, INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(AggressiveInstCombinerLegacyPass, "aggressive-instcombine", "Combine pattern based expressions", false, false) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h index 5d69e26d6ecc..9fc103d45d98 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h @@ -23,14 +23,14 @@ using namespace llvm; //===----------------------------------------------------------------------===// -// TruncInstCombine - looks for expression dags dominated by trunc instructions -// and for each eligible dag, it will create a reduced bit-width expression and -// replace the old expression with this new one and remove the old one. -// Eligible expression dag is such that: +// TruncInstCombine - looks for expression graphs dominated by trunc +// instructions and for each eligible graph, it will create a reduced bit-width +// expression and replace the old expression with this new one and remove the +// old one. Eligible expression graph is such that: // 1. Contains only supported instructions. // 2. Supported leaves: ZExtInst, SExtInst, TruncInst and Constant value. // 3. Can be evaluated into type with reduced legal bit-width (or Trunc type). -// 4. All instructions in the dag must not have users outside the dag. +// 4. All instructions in the graph must not have users outside the graph. // Only exception is for {ZExt, SExt}Inst with operand type equal to the // new reduced type chosen in (3). // @@ -61,9 +61,9 @@ class TruncInstCombine { SmallVector Worklist; /// Current processed TruncInst instruction. - TruncInst *CurrentTruncInst; + TruncInst *CurrentTruncInst = nullptr; - /// Information per each instruction in the expression dag. + /// Information per each instruction in the expression graph. struct Info { /// Number of LSBs that are needed to generate a valid expression. unsigned ValidBitWidth = 0; @@ -72,26 +72,26 @@ class TruncInstCombine { /// The reduced value generated to replace the old instruction. Value *NewValue = nullptr; }; - /// An ordered map representing expression dag post-dominated by current - /// processed TruncInst. It maps each instruction in the dag to its Info + /// An ordered map representing expression graph post-dominated by current + /// processed TruncInst. It maps each instruction in the graph to its Info /// structure. The map is ordered such that each instruction appears before - /// all other instructions in the dag that uses it. + /// all other instructions in the graph that uses it. MapVector InstInfoMap; public: TruncInstCombine(AssumptionCache &AC, TargetLibraryInfo &TLI, const DataLayout &DL, const DominatorTree &DT) - : AC(AC), TLI(TLI), DL(DL), DT(DT), CurrentTruncInst(nullptr) {} + : AC(AC), TLI(TLI), DL(DL), DT(DT) {} /// Perform TruncInst pattern optimization on given function. bool run(Function &F); private: - /// Build expression dag dominated by the /p CurrentTruncInst and append it to - /// the InstInfoMap container. + /// Build expression graph dominated by the /p CurrentTruncInst and append it + /// to the InstInfoMap container. /// - /// \return true only if succeed to generate an eligible sub expression dag. - bool buildTruncExpressionDag(); + /// \return true only if succeed to generate an eligible sub expression graph. + bool buildTruncExpressionGraph(); /// Calculate the minimal allowed bit-width of the chain ending with the /// currently visited truncate's operand. @@ -100,12 +100,12 @@ private: /// truncate's operand can be shrunk to. unsigned getMinBitWidth(); - /// Build an expression dag dominated by the current processed TruncInst and + /// Build an expression graph dominated by the current processed TruncInst and /// Check if it is eligible to be reduced to a smaller type. /// /// \return the scalar version of the new type to be used for the reduced - /// expression dag, or nullptr if the expression dag is not eligible - /// to be reduced. + /// expression graph, or nullptr if the expression graph is not + /// eligible to be reduced. Type *getBestTruncatedType(); KnownBits computeKnownBits(const Value *V) const { @@ -128,12 +128,12 @@ private: /// \return the new reduced value. Value *getReducedOperand(Value *V, Type *SclTy); - /// Create a new expression dag using the reduced /p SclTy type and replace - /// the old expression dag with it. Also erase all instructions in the old - /// dag, except those that are still needed outside the dag. + /// Create a new expression graph using the reduced /p SclTy type and replace + /// the old expression graph with it. Also erase all instructions in the old + /// graph, except those that are still needed outside the graph. /// - /// \param SclTy scalar version of new type to reduce expression dag into. - void ReduceExpressionDag(Type *SclTy); + /// \param SclTy scalar version of new type to reduce expression graph into. + void ReduceExpressionGraph(Type *SclTy); }; } // end namespace llvm. diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp index 4624b735bef8..70ea68587b8e 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp @@ -6,14 +6,14 @@ // //===----------------------------------------------------------------------===// // -// TruncInstCombine - looks for expression dags post-dominated by TruncInst and -// for each eligible dag, it will create a reduced bit-width expression, replace -// the old expression with this new one and remove the old expression. -// Eligible expression dag is such that: +// TruncInstCombine - looks for expression graphs post-dominated by TruncInst +// and for each eligible graph, it will create a reduced bit-width expression, +// replace the old expression with this new one and remove the old expression. +// Eligible expression graph is such that: // 1. Contains only supported instructions. // 2. Supported leaves: ZExtInst, SExtInst, TruncInst and Constant value. // 3. Can be evaluated into type with reduced legal bit-width. -// 4. All instructions in the dag must not have users outside the dag. +// 4. All instructions in the graph must not have users outside the graph. // The only exception is for {ZExt, SExt}Inst with operand type equal to // the new reduced type evaluated in (3). // @@ -28,7 +28,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" @@ -39,14 +38,13 @@ using namespace llvm; #define DEBUG_TYPE "aggressive-instcombine" -STATISTIC( - NumDAGsReduced, - "Number of truncations eliminated by reducing bit width of expression DAG"); +STATISTIC(NumExprsReduced, "Number of truncations eliminated by reducing bit " + "width of expression graph"); STATISTIC(NumInstrsReduced, "Number of instructions whose bit width was reduced"); /// Given an instruction and a container, it fills all the relevant operands of -/// that instruction, with respect to the Trunc expression dag optimizaton. +/// that instruction, with respect to the Trunc expression graph optimizaton. static void getRelevantOperands(Instruction *I, SmallVectorImpl &Ops) { unsigned Opc = I->getOpcode(); switch (Opc) { @@ -78,15 +76,19 @@ static void getRelevantOperands(Instruction *I, SmallVectorImpl &Ops) { Ops.push_back(I->getOperand(1)); Ops.push_back(I->getOperand(2)); break; + case Instruction::PHI: + for (Value *V : cast(I)->incoming_values()) + Ops.push_back(V); + break; default: llvm_unreachable("Unreachable!"); } } -bool TruncInstCombine::buildTruncExpressionDag() { +bool TruncInstCombine::buildTruncExpressionGraph() { SmallVector Worklist; SmallVector Stack; - // Clear old expression dag. + // Clear old instructions info. InstInfoMap.clear(); Worklist.push_back(CurrentTruncInst->getOperand(0)); @@ -150,11 +152,19 @@ bool TruncInstCombine::buildTruncExpressionDag() { append_range(Worklist, Operands); break; } + case Instruction::PHI: { + SmallVector Operands; + getRelevantOperands(I, Operands); + // Add only operands not in Stack to prevent cycle + for (auto *Op : Operands) + if (all_of(Stack, [Op](Value *V) { return Op != V; })) + Worklist.push_back(Op); + break; + } default: // TODO: Can handle more cases here: // 1. shufflevector // 2. sdiv, srem - // 3. phi node(and loop handling) // ... return false; } @@ -254,7 +264,7 @@ unsigned TruncInstCombine::getMinBitWidth() { } Type *TruncInstCombine::getBestTruncatedType() { - if (!buildTruncExpressionDag()) + if (!buildTruncExpressionGraph()) return nullptr; // We don't want to duplicate instructions, which isn't profitable. Thus, we @@ -367,8 +377,10 @@ Value *TruncInstCombine::getReducedOperand(Value *V, Type *SclTy) { return Entry.NewValue; } -void TruncInstCombine::ReduceExpressionDag(Type *SclTy) { +void TruncInstCombine::ReduceExpressionGraph(Type *SclTy) { NumInstrsReduced += InstInfoMap.size(); + // Pairs of old and new phi-nodes + SmallVector, 2> OldNewPHINodes; for (auto &Itr : InstInfoMap) { // Forward Instruction *I = Itr.first; TruncInstCombine::Info &NodeInfo = Itr.second; @@ -451,6 +463,12 @@ void TruncInstCombine::ReduceExpressionDag(Type *SclTy) { Res = Builder.CreateSelect(Op0, LHS, RHS); break; } + case Instruction::PHI: { + Res = Builder.CreatePHI(getReducedType(I, SclTy), I->getNumOperands()); + OldNewPHINodes.push_back( + std::make_pair(cast(I), cast(Res))); + break; + } default: llvm_unreachable("Unhandled instruction"); } @@ -460,6 +478,14 @@ void TruncInstCombine::ReduceExpressionDag(Type *SclTy) { ResI->takeName(I); } + for (auto &Node : OldNewPHINodes) { + PHINode *OldPN = Node.first; + PHINode *NewPN = Node.second; + for (auto Incoming : zip(OldPN->incoming_values(), OldPN->blocks())) + NewPN->addIncoming(getReducedOperand(std::get<0>(Incoming), SclTy), + std::get<1>(Incoming)); + } + Value *Res = getReducedOperand(CurrentTruncInst->getOperand(0), SclTy); Type *DstTy = CurrentTruncInst->getType(); if (Res->getType() != DstTy) { @@ -470,17 +496,29 @@ void TruncInstCombine::ReduceExpressionDag(Type *SclTy) { } CurrentTruncInst->replaceAllUsesWith(Res); - // Erase old expression dag, which was replaced by the reduced expression dag. - // We iterate backward, which means we visit the instruction before we visit - // any of its operands, this way, when we get to the operand, we already - // removed the instructions (from the expression dag) that uses it. + // Erase old expression graph, which was replaced by the reduced expression + // graph. CurrentTruncInst->eraseFromParent(); + // First, erase old phi-nodes and its uses + for (auto &Node : OldNewPHINodes) { + PHINode *OldPN = Node.first; + OldPN->replaceAllUsesWith(PoisonValue::get(OldPN->getType())); + InstInfoMap.erase(OldPN); + OldPN->eraseFromParent(); + } + // Now we have expression graph turned into dag. + // We iterate backward, which means we visit the instruction before we + // visit any of its operands, this way, when we get to the operand, we already + // removed the instructions (from the expression dag) that uses it. for (auto &I : llvm::reverse(InstInfoMap)) { // We still need to check that the instruction has no users before we erase // it, because {SExt, ZExt}Inst Instruction might have other users that was // not reduced, in such case, we need to keep that instruction. if (I.first->use_empty()) I.first->eraseFromParent(); + else + assert((isa(I.first) || isa(I.first)) && + "Only {SExt, ZExt}Inst might have unreduced users"); } } @@ -498,18 +536,18 @@ bool TruncInstCombine::run(Function &F) { } // Process all TruncInst in the Worklist, for each instruction: - // 1. Check if it dominates an eligible expression dag to be reduced. - // 2. Create a reduced expression dag and replace the old one with it. + // 1. Check if it dominates an eligible expression graph to be reduced. + // 2. Create a reduced expression graph and replace the old one with it. while (!Worklist.empty()) { CurrentTruncInst = Worklist.pop_back_val(); if (Type *NewDstSclTy = getBestTruncatedType()) { LLVM_DEBUG( - dbgs() << "ICE: TruncInstCombine reducing type of expression dag " + dbgs() << "ICE: TruncInstCombine reducing type of expression graph " "dominated by: " << CurrentTruncInst << '\n'); - ReduceExpressionDag(NewDstSclTy); - ++NumDAGsReduced; + ReduceExpressionGraph(NewDstSclTy); + ++NumExprsReduced; MadeIRChange = true; } } diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp index 67f8828e4c75..f7bbdcffd2ec 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -10,9 +10,9 @@ #include "CoroInternal.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Function.h" +#include "llvm/Transforms/Scalar/SimplifyCFG.h" using namespace llvm; @@ -23,19 +23,10 @@ namespace { struct Lowerer : coro::LowererBase { IRBuilder<> Builder; Lowerer(Module &M) : LowererBase(M), Builder(Context) {} - bool lowerRemainingCoroIntrinsics(Function &F); + bool lower(Function &F); }; } -static void simplifyCFG(Function &F) { - llvm::legacy::FunctionPassManager FPM(F.getParent()); - FPM.add(createCFGSimplificationPass()); - - FPM.doInitialization(); - FPM.run(F); - FPM.doFinalization(); -} - static void lowerSubFn(IRBuilder<> &Builder, CoroSubFnInst *SubFn) { Builder.SetInsertPoint(SubFn); Value *FrameRaw = SubFn->getFrame(); @@ -53,12 +44,10 @@ static void lowerSubFn(IRBuilder<> &Builder, CoroSubFnInst *SubFn) { SubFn->replaceAllUsesWith(Load); } -bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) { +bool Lowerer::lower(Function &F) { + bool IsPrivateAndUnprocessed = F.isPresplitCoroutine() && F.hasLocalLinkage(); bool Changed = false; - bool IsPrivateAndUnprocessed = - F.hasFnAttribute(CORO_PRESPLIT_ATTR) && F.hasLocalLinkage(); - for (Instruction &I : llvm::make_early_inc_range(instructions(F))) { if (auto *II = dyn_cast(&I)) { switch (II->getIntrinsicID()) { @@ -116,11 +105,6 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) { } } - if (Changed) { - // After replacement were made we can cleanup the function body a little. - simplifyCFG(F); - } - return Changed; } @@ -132,50 +116,21 @@ static bool declaresCoroCleanupIntrinsics(const Module &M) { "llvm.coro.async.resume"}); } -PreservedAnalyses CoroCleanupPass::run(Function &F, - FunctionAnalysisManager &AM) { - auto &M = *F.getParent(); - if (!declaresCoroCleanupIntrinsics(M) || - !Lowerer(M).lowerRemainingCoroIntrinsics(F)) +PreservedAnalyses CoroCleanupPass::run(Module &M, + ModuleAnalysisManager &MAM) { + if (!declaresCoroCleanupIntrinsics(M)) return PreservedAnalyses::all(); - return PreservedAnalyses::none(); -} - -namespace { - -struct CoroCleanupLegacy : FunctionPass { - static char ID; // Pass identification, replacement for typeid + FunctionAnalysisManager &FAM = + MAM.getResult(M).getManager(); - CoroCleanupLegacy() : FunctionPass(ID) { - initializeCoroCleanupLegacyPass(*PassRegistry::getPassRegistry()); - } + FunctionPassManager FPM; + FPM.addPass(SimplifyCFGPass()); - std::unique_ptr L; + Lowerer L(M); + for (auto &F : M) + if (L.lower(F)) + FPM.run(F, FAM); - // This pass has work to do only if we find intrinsics we are going to lower - // in the module. - bool doInitialization(Module &M) override { - if (declaresCoroCleanupIntrinsics(M)) - L = std::make_unique(M); - return false; - } - - bool runOnFunction(Function &F) override { - if (L) - return L->lowerRemainingCoroIntrinsics(F); - return false; - } - void getAnalysisUsage(AnalysisUsage &AU) const override { - if (!L) - AU.setPreservesAll(); - } - StringRef getPassName() const override { return "Coroutine Cleanup"; } -}; + return PreservedAnalyses::none(); } - -char CoroCleanupLegacy::ID = 0; -INITIALIZE_PASS(CoroCleanupLegacy, "coro-cleanup", - "Lower all coroutine related intrinsics", false, false) - -Pass *llvm::createCoroCleanupLegacyPass() { return new CoroCleanupLegacy(); } diff --git a/llvm/lib/Transforms/Coroutines/CoroConditionalWrapper.cpp b/llvm/lib/Transforms/Coroutines/CoroConditionalWrapper.cpp new file mode 100644 index 000000000000..3d26a43ceba7 --- /dev/null +++ b/llvm/lib/Transforms/Coroutines/CoroConditionalWrapper.cpp @@ -0,0 +1,24 @@ +//===- CoroConditionalWrapper.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h" +#include "CoroInternal.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +CoroConditionalWrapper::CoroConditionalWrapper(ModulePassManager &&PM) + : PM(std::move(PM)) {} + +PreservedAnalyses CoroConditionalWrapper::run(Module &M, + ModuleAnalysisManager &AM) { + if (!coro::declaresAnyIntrinsic(M)) + return PreservedAnalyses::all(); + + return PM.run(M, AM); +} diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index 1533e1805f17..dd7cb23f3f3d 100644 --- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -8,10 +8,10 @@ #include "llvm/Transforms/Coroutines/CoroEarly.h" #include "CoroInternal.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" -#include "llvm/Pass.h" using namespace llvm; @@ -35,7 +35,7 @@ public: AnyResumeFnPtrTy(FunctionType::get(Type::getVoidTy(Context), Int8Ptr, /*isVarArg=*/false) ->getPointerTo()) {} - bool lowerEarlyIntrinsics(Function &F); + void lowerEarlyIntrinsics(Function &F); }; } @@ -145,14 +145,16 @@ static void setCannotDuplicate(CoroIdInst *CoroId) { CB->setCannotDuplicate(); } -bool Lowerer::lowerEarlyIntrinsics(Function &F) { - bool Changed = false; +void Lowerer::lowerEarlyIntrinsics(Function &F) { CoroIdInst *CoroId = nullptr; SmallVector CoroFrees; bool HasCoroSuspend = false; for (Instruction &I : llvm::make_early_inc_range(instructions(F))) { - if (auto *CB = dyn_cast(&I)) { - switch (CB->getIntrinsicID()) { + auto *CB = dyn_cast(&I); + if (!CB) + continue; + + switch (CB->getIntrinsicID()) { default: continue; case Intrinsic::coro_free: @@ -178,12 +180,9 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { case Intrinsic::coro_id: if (auto *CII = cast(&I)) { if (CII->getInfo().isPreSplit()) { - assert(F.hasFnAttribute(CORO_PRESPLIT_ATTR) && - F.getFnAttribute(CORO_PRESPLIT_ATTR).getValueAsString() == - UNPREPARED_FOR_SPLIT && + assert(F.isPresplitCoroutine() && "The frontend uses Swtich-Resumed ABI should emit " - "\"coroutine.presplit\" attribute with value \"0\" for the " - "coroutine."); + "\"coroutine.presplit\" attribute for the coroutine."); setCannotDuplicate(CII); CII->setCoroutineSelf(); CoroId = cast(&I); @@ -193,9 +192,7 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { case Intrinsic::coro_id_retcon: case Intrinsic::coro_id_retcon_once: case Intrinsic::coro_id_async: - // TODO: Remove the line once we support it in the corresponding - // frontend. - F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT); + F.setPresplitCoroutine(); break; case Intrinsic::coro_resume: lowerResumeOrDestroy(*CB, CoroSubFnInst::ResumeIndex); @@ -209,16 +206,16 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { case Intrinsic::coro_done: lowerCoroDone(cast(&I)); break; - } - Changed = true; } } + // Make sure that all CoroFree reference the coro.id intrinsic. // Token type is not exposed through coroutine C/C++ builtins to plain C, so // we allow specifying none and fixing it up here. if (CoroId) for (CoroFreeInst *CF : CoroFrees) CF->setArgOperand(0, CoroId); + // Coroutine suspention could potentially lead to any argument modified // outside of the function, hence arguments should not have noalias // attributes. @@ -226,7 +223,6 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { for (Argument &A : F.args()) if (A.hasNoAliasAttr()) A.removeAttr(Attribute::NoAlias); - return Changed; } static bool declaresCoroEarlyIntrinsics(const Module &M) { @@ -238,52 +234,15 @@ static bool declaresCoroEarlyIntrinsics(const Module &M) { "llvm.coro.suspend"}); } -PreservedAnalyses CoroEarlyPass::run(Function &F, FunctionAnalysisManager &) { - Module &M = *F.getParent(); - if (!declaresCoroEarlyIntrinsics(M) || !Lowerer(M).lowerEarlyIntrinsics(F)) +PreservedAnalyses CoroEarlyPass::run(Module &M, ModuleAnalysisManager &) { + if (!declaresCoroEarlyIntrinsics(M)) return PreservedAnalyses::all(); + Lowerer L(M); + for (auto &F : M) + L.lowerEarlyIntrinsics(F); + PreservedAnalyses PA; PA.preserveSet(); return PA; } - -namespace { - -struct CoroEarlyLegacy : public FunctionPass { - static char ID; // Pass identification, replacement for typeid. - CoroEarlyLegacy() : FunctionPass(ID) { - initializeCoroEarlyLegacyPass(*PassRegistry::getPassRegistry()); - } - - std::unique_ptr L; - - // This pass has work to do only if we find intrinsics we are going to lower - // in the module. - bool doInitialization(Module &M) override { - if (declaresCoroEarlyIntrinsics(M)) - L = std::make_unique(M); - return false; - } - - bool runOnFunction(Function &F) override { - if (!L) - return false; - - return L->lowerEarlyIntrinsics(F); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - } - StringRef getPassName() const override { - return "Lower early coroutine intrinsics"; - } -}; -} - -char CoroEarlyLegacy::ID = 0; -INITIALIZE_PASS(CoroEarlyLegacy, "coro-early", - "Lower early coroutine intrinsics", false, false) - -Pass *llvm::createCoroEarlyLegacyPass() { return new CoroEarlyLegacy(); } diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp index 84bebb7bf42d..6f78fc8db311 100644 --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -14,8 +14,6 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" @@ -103,21 +101,12 @@ static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) { // Given a resume function @f.resume(%f.frame* %frame), returns the size // and expected alignment of %f.frame type. -static std::pair getFrameLayout(Function *Resume) { - // Prefer to pull information from the function attributes. +static Optional> getFrameLayout(Function *Resume) { + // Pull information from the function attributes. auto Size = Resume->getParamDereferenceableBytes(0); - auto Align = Resume->getParamAlign(0); - - // If those aren't given, extract them from the type. - if (Size == 0 || !Align) { - auto *FrameTy = Resume->arg_begin()->getType()->getPointerElementType(); - - const DataLayout &DL = Resume->getParent()->getDataLayout(); - if (!Size) Size = DL.getTypeAllocSize(FrameTy); - if (!Align) Align = DL.getABITypeAlign(FrameTy); - } - - return std::make_pair(Size, *Align); + if (!Size) + return None; + return std::make_pair(Size, Resume->getParamAlign(0).valueOrOne()); } // Finds first non alloca instruction in the entry block of a function. @@ -347,56 +336,37 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA, assert(Resumers && "PostSplit coro.id Info argument must refer to an array" "of coroutine subfunctions"); auto *ResumeAddrConstant = - ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::ResumeIndex); + Resumers->getAggregateElement(CoroSubFnInst::ResumeIndex); replaceWithConstant(ResumeAddrConstant, ResumeAddr); bool ShouldElide = shouldElide(CoroId->getFunction(), DT); - auto *DestroyAddrConstant = ConstantExpr::getExtractValue( - Resumers, + auto *DestroyAddrConstant = Resumers->getAggregateElement( ShouldElide ? CoroSubFnInst::CleanupIndex : CoroSubFnInst::DestroyIndex); for (auto &It : DestroyAddr) replaceWithConstant(DestroyAddrConstant, It.second); if (ShouldElide) { - auto FrameSizeAndAlign = getFrameLayout(cast(ResumeAddrConstant)); - elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign.first, - FrameSizeAndAlign.second, AA); - coro::replaceCoroFree(CoroId, /*Elide=*/true); - NumOfCoroElided++; + if (auto FrameSizeAndAlign = + getFrameLayout(cast(ResumeAddrConstant))) { + elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign->first, + FrameSizeAndAlign->second, AA); + coro::replaceCoroFree(CoroId, /*Elide=*/true); + NumOfCoroElided++; #ifndef NDEBUG - if (!CoroElideInfoOutputFilename.empty()) - *getOrCreateLogFile() - << "Elide " << CoroId->getCoroutine()->getName() << " in " - << CoroId->getFunction()->getName() << "\n"; + if (!CoroElideInfoOutputFilename.empty()) + *getOrCreateLogFile() + << "Elide " << CoroId->getCoroutine()->getName() << " in " + << CoroId->getFunction()->getName() << "\n"; #endif + } } return true; } -// See if there are any coro.subfn.addr instructions referring to coro.devirt -// trigger, if so, replace them with a direct call to devirt trigger function. -static bool replaceDevirtTrigger(Function &F) { - SmallVector DevirtAddr; - for (auto &I : instructions(F)) - if (auto *SubFn = dyn_cast(&I)) - if (SubFn->getIndex() == CoroSubFnInst::RestartTrigger) - DevirtAddr.push_back(SubFn); - - if (DevirtAddr.empty()) - return false; - - Module &M = *F.getParent(); - Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN); - assert(DevirtFn && "coro.devirt.fn not found"); - replaceWithConstant(DevirtFn, DevirtAddr); - - return true; -} - static bool declaresCoroElideIntrinsics(Module &M) { return coro::declaresIntrinsics(M, {"llvm.coro.id", "llvm.coro.id.async"}); } @@ -422,62 +392,3 @@ PreservedAnalyses CoroElidePass::run(Function &F, FunctionAnalysisManager &AM) { return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } - -namespace { -struct CoroElideLegacy : FunctionPass { - static char ID; - CoroElideLegacy() : FunctionPass(ID) { - initializeCoroElideLegacyPass(*PassRegistry::getPassRegistry()); - } - - std::unique_ptr L; - - bool doInitialization(Module &M) override { - if (declaresCoroElideIntrinsics(M)) - L = std::make_unique(M); - return false; - } - - bool runOnFunction(Function &F) override { - if (!L) - return false; - - bool Changed = false; - - if (F.hasFnAttribute(CORO_PRESPLIT_ATTR)) - Changed = replaceDevirtTrigger(F); - - L->CoroIds.clear(); - L->collectPostSplitCoroIds(&F); - // If we did not find any coro.id, there is nothing to do. - if (L->CoroIds.empty()) - return Changed; - - AAResults &AA = getAnalysis().getAAResults(); - DominatorTree &DT = getAnalysis().getDomTree(); - - for (auto *CII : L->CoroIds) - Changed |= L->processCoroId(CII, AA, DT); - - return Changed; - } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - } - StringRef getPassName() const override { return "Coroutine Elision"; } -}; -} - -char CoroElideLegacy::ID = 0; -INITIALIZE_PASS_BEGIN( - CoroElideLegacy, "coro-elide", - "Coroutine frame allocation elision and indirect calls replacement", false, - false) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END( - CoroElideLegacy, "coro-elide", - "Coroutine frame allocation elision and indirect calls replacement", false, - false) - -Pass *llvm::createCoroElideLegacyPass() { return new CoroElideLegacy(); } diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 9c16d3750998..d09607bb1c4c 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -27,7 +27,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/OptimizedStructLayout.h" @@ -44,13 +44,6 @@ using namespace llvm; // "coro-frame", which results in leaner debug spew. #define DEBUG_TYPE "coro-suspend-crossing" -static cl::opt EnableReuseStorageInFrame( - "reuse-storage-in-coroutine-frame", cl::Hidden, - cl::desc( - "Enable the optimization which would reuse the storage in the coroutine \ - frame for allocas whose liferanges are not overlapped, for testing purposes"), - llvm::cl::init(false)); - enum { SmallVectorThreshold = 32 }; // Provides two way mapping between the blocks and numbers. @@ -347,15 +340,26 @@ struct FrameDataInfo { FieldIndexMap[V] = Index; } - uint64_t getAlign(Value *V) const { + Align getAlign(Value *V) const { auto Iter = FieldAlignMap.find(V); assert(Iter != FieldAlignMap.end()); return Iter->second; } - void setAlign(Value *V, uint64_t Align) { + void setAlign(Value *V, Align AL) { assert(FieldAlignMap.count(V) == 0); - FieldAlignMap.insert({V, Align}); + FieldAlignMap.insert({V, AL}); + } + + uint64_t getDynamicAlign(Value *V) const { + auto Iter = FieldDynamicAlignMap.find(V); + assert(Iter != FieldDynamicAlignMap.end()); + return Iter->second; + } + + void setDynamicAlign(Value *V, uint64_t Align) { + assert(FieldDynamicAlignMap.count(V) == 0); + FieldDynamicAlignMap.insert({V, Align}); } uint64_t getOffset(Value *V) const { @@ -382,7 +386,8 @@ private: DenseMap FieldIndexMap; // Map from values to their alignment on the frame. They would be set after // the frame is built. - DenseMap FieldAlignMap; + DenseMap FieldAlignMap; + DenseMap FieldDynamicAlignMap; // Map from values to their offset on the frame. They would be set after // the frame is built. DenseMap FieldOffsetMap; @@ -423,6 +428,7 @@ private: FieldIDType LayoutFieldIndex; Align Alignment; Align TyAlignment; + uint64_t DynamicAlignBuffer; }; const DataLayout &DL; @@ -489,7 +495,7 @@ public: coro::Shape &Shape); /// Add a field to this structure. - LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment, + LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign MaybeFieldAlignment, bool IsHeader = false, bool IsSpillOfValue = false) { assert(!IsFinished && "adding fields to a finished builder"); @@ -508,13 +514,21 @@ public: // to remember the type alignment anyway to build the type. // If we are spilling values we don't need to worry about ABI alignment // concerns. - auto ABIAlign = DL.getABITypeAlign(Ty); - Align TyAlignment = - (IsSpillOfValue && MaxFrameAlignment) - ? (*MaxFrameAlignment < ABIAlign ? *MaxFrameAlignment : ABIAlign) - : ABIAlign; - if (!FieldAlignment) { - FieldAlignment = TyAlignment; + Align ABIAlign = DL.getABITypeAlign(Ty); + Align TyAlignment = ABIAlign; + if (IsSpillOfValue && MaxFrameAlignment && *MaxFrameAlignment < ABIAlign) + TyAlignment = *MaxFrameAlignment; + Align FieldAlignment = MaybeFieldAlignment.value_or(TyAlignment); + + // The field alignment could be bigger than the max frame case, in that case + // we request additional storage to be able to dynamically align the + // pointer. + uint64_t DynamicAlignBuffer = 0; + if (MaxFrameAlignment && (FieldAlignment > *MaxFrameAlignment)) { + DynamicAlignBuffer = + offsetToAlignment(MaxFrameAlignment->value(), FieldAlignment); + FieldAlignment = *MaxFrameAlignment; + FieldSize = FieldSize + DynamicAlignBuffer; } // Lay out header fields immediately. @@ -523,12 +537,13 @@ public: Offset = alignTo(StructSize, FieldAlignment); StructSize = Offset + FieldSize; - // Everything else has a flexible offset. + // Everything else has a flexible offset. } else { Offset = OptimizedStructLayoutField::FlexibleOffset; } - Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment}); + Fields.push_back({FieldSize, Offset, Ty, 0, FieldAlignment, TyAlignment, + DynamicAlignBuffer}); return Fields.size() - 1; } @@ -561,7 +576,12 @@ void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) { auto Updater = [&](Value *I) { auto Field = B.getLayoutField(getFieldIndex(I)); setFieldIndex(I, Field.LayoutFieldIndex); - setAlign(I, Field.Alignment.value()); + setAlign(I, Field.Alignment); + uint64_t dynamicAlign = + Field.DynamicAlignBuffer + ? Field.DynamicAlignBuffer + Field.Alignment.value() + : 0; + setDynamicAlign(I, dynamicAlign); setOffset(I, Field.Offset); }; LayoutIndexUpdateStarted = true; @@ -588,7 +608,7 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F, } }); - if (!Shape.OptimizeFrame && !EnableReuseStorageInFrame) { + if (!Shape.OptimizeFrame) { for (const auto &A : FrameData.Allocas) { AllocaInst *Alloca = A.Alloca; NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); @@ -755,6 +775,10 @@ void FrameTypeBuilder::finish(StructType *Ty) { F.LayoutFieldIndex = FieldTypes.size(); FieldTypes.push_back(F.Ty); + if (F.DynamicAlignBuffer) { + FieldTypes.push_back( + ArrayType::get(Type::getInt8Ty(Context), F.DynamicAlignBuffer)); + } LastOffset = Offset + F.Size; } @@ -807,9 +831,10 @@ static StringRef solveTypeName(Type *Ty) { return "__floating_type_"; } - if (Ty->isPointerTy()) { - auto *PtrTy = cast(Ty); - Type *PointeeTy = PtrTy->getPointerElementType(); + if (auto *PtrTy = dyn_cast(Ty)) { + if (PtrTy->isOpaque()) + return "PointerType"; + Type *PointeeTy = PtrTy->getNonOpaquePointerElementType(); auto Name = solveTypeName(PointeeTy); if (Name == "UnknownType") return "PointerType"; @@ -826,10 +851,9 @@ static StringRef solveTypeName(Type *Ty) { auto Name = Ty->getStructName(); SmallString<16> Buffer(Name); - for_each(Buffer, [](auto &Iter) { + for (auto &Iter : Buffer) if (Iter == '.' || Iter == ':') Iter = '_'; - }); auto *MDName = MDString::get(Ty->getContext(), Buffer.str()); return MDName->getString(); } @@ -1012,7 +1036,7 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape, auto Index = FrameData.getFieldIndex(V); OffsetCache.insert( - {Index, {FrameData.getAlign(V), FrameData.getOffset(V)}}); + {Index, {FrameData.getAlign(V).value(), FrameData.getOffset(V)}}); } DenseMap DITypeCache; @@ -1078,7 +1102,7 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape, DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar, DBuilder.createExpression(), DILoc, - Shape.FramePtr->getNextNode()); + Shape.getInsertPtAfterFramePtr()); } // Build a struct that will keep state for an active coroutine. @@ -1367,7 +1391,7 @@ struct AllocaUseVisitor : PtrUseVisitor { bool getShouldLiveOnFrame() const { if (!ShouldLiveOnFrame) ShouldLiveOnFrame = computeShouldLiveOnFrame(); - return ShouldLiveOnFrame.getValue(); + return *ShouldLiveOnFrame; } bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; } @@ -1455,7 +1479,7 @@ private: auto Itr = AliasOffetMap.find(&I); if (Itr == AliasOffetMap.end()) { AliasOffetMap[&I] = Offset; - } else if (Itr->second.hasValue() && Itr->second.getValue() != Offset) { + } else if (Itr->second && *Itr->second != Offset) { // If we have seen two different possible values for this alias, we set // it to empty. AliasOffetMap[&I].reset(); @@ -1517,13 +1541,12 @@ static void createFramePtr(coro::Shape &Shape) { // whatever // // -static Instruction *insertSpills(const FrameDataInfo &FrameData, - coro::Shape &Shape) { +static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) { auto *CB = Shape.CoroBegin; LLVMContext &C = CB->getContext(); IRBuilder<> Builder(C); StructType *FrameTy = Shape.FrameTy; - Instruction *FramePtr = Shape.FramePtr; + Value *FramePtr = Shape.FramePtr; DominatorTree DT(*CB->getFunction()); SmallDenseMap DbgPtrAllocaCache; @@ -1550,7 +1573,18 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData, auto GEP = cast( Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices)); - if (isa(Orig)) { + if (auto *AI = dyn_cast(Orig)) { + if (FrameData.getDynamicAlign(Orig) != 0) { + assert(FrameData.getDynamicAlign(Orig) == AI->getAlign().value()); + auto *M = AI->getModule(); + auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType()); + auto *PtrValue = Builder.CreatePtrToInt(GEP, IntPtrTy); + auto *AlignMask = + ConstantInt::get(IntPtrTy, AI->getAlign().value() - 1); + PtrValue = Builder.CreateAdd(PtrValue, AlignMask); + PtrValue = Builder.CreateAnd(PtrValue, Builder.CreateNot(AlignMask)); + return Builder.CreateIntToPtr(PtrValue, AI->getType()); + } // If the type of GEP is not equal to the type of AllocaInst, it implies // that the AllocaInst may be reused in the Frame slot of other // AllocaInst. So We cast GEP to the AllocaInst here to re-use @@ -1571,20 +1605,19 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData, // Create a store instruction storing the value into the // coroutine frame. Instruction *InsertPt = nullptr; - bool NeedToCopyArgPtrValue = false; + Type *ByValTy = nullptr; if (auto *Arg = dyn_cast(Def)) { // For arguments, we will place the store instruction right after // the coroutine frame pointer instruction, i.e. bitcast of // coro.begin from i8* to %f.frame*. - InsertPt = FramePtr->getNextNode(); + InsertPt = Shape.getInsertPtAfterFramePtr(); // If we're spilling an Argument, make sure we clear 'nocapture' // from the coroutine function. Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture); if (Arg->hasByValAttr()) - NeedToCopyArgPtrValue = true; - + ByValTy = Arg->getParamByValType(); } else if (auto *CSI = dyn_cast(Def)) { // Don't spill immediately after a suspend; splitting assumes // that the suspend will be followed by a branch. @@ -1594,7 +1627,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData, if (!DT.dominates(CB, I)) { // If it is not dominated by CoroBegin, then spill should be // inserted immediately after CoroFrame is computed. - InsertPt = FramePtr->getNextNode(); + InsertPt = Shape.getInsertPtAfterFramePtr(); } else if (auto *II = dyn_cast(I)) { // If we are spilling the result of the invoke instruction, split // the normal edge and insert the spill in the new block. @@ -1619,11 +1652,10 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData, Builder.SetInsertPoint(InsertPt); auto *G = Builder.CreateConstInBoundsGEP2_32( FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr")); - if (NeedToCopyArgPtrValue) { + if (ByValTy) { // For byval arguments, we need to store the pointed value in the frame, // instead of the pointer itself. - auto *Value = - Builder.CreateLoad(Def->getType()->getPointerElementType(), Def); + auto *Value = Builder.CreateLoad(ByValTy, Def); Builder.CreateAlignedStore(Value, G, SpillAlignment); } else { Builder.CreateAlignedStore(Def, G, SpillAlignment); @@ -1641,7 +1673,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData, auto *GEP = GetFramePointer(E.first); GEP->setName(E.first->getName() + Twine(".reload.addr")); - if (NeedToCopyArgPtrValue) + if (ByValTy) CurrentReload = GEP; else CurrentReload = Builder.CreateAlignedLoad( @@ -1664,6 +1696,12 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData, } } + // Salvage debug info on any dbg.addr that we see. We do not insert them + // into each block where we have a use though. + if (auto *DI = dyn_cast(U)) { + coro::salvageDebugInfo(DbgPtrAllocaCache, DI, Shape.OptimizeFrame); + } + // If we have a single edge PHINode, remove it and replace it with a // reload from the coroutine frame. (We already took care of multi edge // PHINodes by rewriting them in the rewritePHIs function). @@ -1682,10 +1720,10 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData, } } - BasicBlock *FramePtrBB = FramePtr->getParent(); + BasicBlock *FramePtrBB = Shape.getInsertPtAfterFramePtr()->getParent(); - auto SpillBlock = - FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); + auto SpillBlock = FramePtrBB->splitBasicBlock( + Shape.getInsertPtAfterFramePtr(), "AllocaSpillBB"); SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill"); Shape.AllocaSpillBlock = SpillBlock; @@ -1704,7 +1742,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData, Alloca->replaceAllUsesWith(G); Alloca->eraseFromParent(); } - return FramePtr; + return; } // If we found any alloca, replace all of their remaining uses with GEP @@ -1735,7 +1773,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData, for (Instruction *I : UsersToUpdate) I->replaceUsesOfWith(Alloca, G); } - Builder.SetInsertPoint(FramePtr->getNextNode()); + Builder.SetInsertPoint(Shape.getInsertPtAfterFramePtr()); for (const auto &A : FrameData.Allocas) { AllocaInst *Alloca = A.Alloca; if (A.MayWriteBeforeCoroBegin) { @@ -1755,16 +1793,16 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData, auto *FramePtr = GetFramePointer(Alloca); auto *FramePtrRaw = Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C)); - auto *AliasPtr = Builder.CreateGEP( - Type::getInt8Ty(C), FramePtrRaw, - ConstantInt::get(Type::getInt64Ty(C), Alias.second.getValue())); + auto &Value = *Alias.second; + auto ITy = IntegerType::get(C, Value.getBitWidth()); + auto *AliasPtr = Builder.CreateGEP(Type::getInt8Ty(C), FramePtrRaw, + ConstantInt::get(ITy, Value)); auto *AliasPtrTyped = Builder.CreateBitCast(AliasPtr, Alias.first->getType()); Alias.first->replaceUsesWithIf( AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); }); } } - return FramePtr; } // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new @@ -2130,7 +2168,7 @@ static void lowerLocalAllocas(ArrayRef LocalAllocas, // Allocate memory. auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize()); - Alloca->setAlignment(Align(AI->getAlignment())); + Alloca->setAlignment(AI->getAlignment()); for (auto U : AI->users()) { // Replace gets with the allocation. @@ -2279,7 +2317,10 @@ static void eliminateSwiftErrorArgument(Function &F, Argument &Arg, IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); auto ArgTy = cast(Arg.getType()); - auto ValueTy = ArgTy->getPointerElementType(); + // swifterror arguments are required to have pointer-to-pointer type, + // so create a pointer-typed alloca with opaque pointers. + auto ValueTy = ArgTy->isOpaque() ? PointerType::getUnqual(F.getContext()) + : ArgTy->getNonOpaquePointerElementType(); // Reduce to the alloca case: @@ -2520,6 +2561,7 @@ void coro::salvageDebugInfo( bool SkipOutermostLoad = !isa(DVI); Value *Storage = DVI->getVariableLocationOp(0); Value *OriginalStorage = Storage; + while (auto *Inst = dyn_cast_or_null(Storage)) { if (auto *LdInst = dyn_cast(Inst)) { Storage = LdInst->getOperand(0); @@ -2559,7 +2601,7 @@ void coro::salvageDebugInfo( // // Avoid to create the alloca would be eliminated by optimization // passes and the corresponding dbg.declares would be invalid. - if (!OptimizeFrame && !EnableReuseStorageInFrame) + if (!OptimizeFrame) if (auto *Arg = dyn_cast(Storage)) { auto &Cached = DbgPtrAllocaCache[Storage]; if (!Cached) { @@ -2575,14 +2617,15 @@ void coro::salvageDebugInfo( // expression, we need to add a DW_OP_deref at the *start* of the // expression to first load the contents of the alloca before // adjusting it with the expression. - if (Expr && Expr->isComplex()) - Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); + Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); } DVI->replaceVariableLocationOp(OriginalStorage, Storage); DVI->setExpression(Expr); - /// It makes no sense to move the dbg.value intrinsic. - if (!isa(DVI)) { + // We only hoist dbg.declare today since it doesn't make sense to hoist + // dbg.value or dbg.addr since they do not have the same function wide + // guarantees that dbg.declare does. + if (!isa(DVI) && !isa(DVI)) { if (auto *II = dyn_cast(Storage)) DVI->moveBefore(II->getNormalDest()->getFirstNonPHI()); else if (auto *CBI = dyn_cast(Storage)) @@ -2661,13 +2704,6 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { for (User *U : I.users()) if (Checker.isDefinitionAcrossSuspend(I, U)) Spills[&I].push_back(cast(U)); - - // Manually add dbg.value metadata uses of I. - SmallVector DVIs; - findDbgValues(DVIs, &I); - for (auto *DVI : DVIs) - if (Checker.isDefinitionAcrossSuspend(I, DVI)) - Spills[&I].push_back(DVI); } if (Spills.empty()) @@ -2754,10 +2790,9 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { auto *V = Iter.first; SmallVector DVIs; findDbgValues(DVIs, V); - llvm::for_each(DVIs, [&](DbgValueInst *DVI) { + for (DbgValueInst *DVI : DVIs) if (Checker.isDefinitionAcrossSuspend(*V, DVI)) FrameData.Spills[V].push_back(DVI); - }); } LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills)); diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index 9a17068df3a9..5557370c82ba 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -13,7 +13,6 @@ #include "CoroInstr.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/Transforms/Coroutines.h" namespace llvm { @@ -21,40 +20,13 @@ class CallGraph; class CallGraphSCC; class PassRegistry; -void initializeCoroEarlyLegacyPass(PassRegistry &); -void initializeCoroSplitLegacyPass(PassRegistry &); -void initializeCoroElideLegacyPass(PassRegistry &); -void initializeCoroCleanupLegacyPass(PassRegistry &); - -// CoroEarly pass marks every function that has coro.begin with a string -// attribute "coroutine.presplit"="0". CoroSplit pass processes the coroutine -// twice. First, it lets it go through complete IPO optimization pipeline as a -// single function. It forces restart of the pipeline by inserting an indirect -// call to an empty function "coro.devirt.trigger" which is devirtualized by -// CoroElide pass that triggers a restart of the pipeline by CGPassManager. -// When CoroSplit pass sees the same coroutine the second time, it splits it up, -// adds coroutine subfunctions to the SCC to be processed by IPO pipeline. -// Async lowering similarily triggers a restart of the pipeline after it has -// split the coroutine. -// -// FIXME: Refactor these attributes as LLVM attributes instead of string -// attributes since these attributes are already used outside LLVM's -// coroutine module. -// FIXME: Remove these values once we remove the Legacy PM. -#define CORO_PRESPLIT_ATTR "coroutine.presplit" -#define UNPREPARED_FOR_SPLIT "0" -#define PREPARED_FOR_SPLIT "1" -#define ASYNC_RESTART_AFTER_SPLIT "2" - -#define CORO_DEVIRT_TRIGGER_FN "coro.devirt.trigger" - namespace coro { +bool declaresAnyIntrinsic(const Module &M); bool declaresIntrinsics(const Module &M, const std::initializer_list); void replaceCoroFree(CoroIdInst *CoroId, bool Elide); -void updateCallGraph(Function &Caller, ArrayRef Funcs, - CallGraph &CG, CallGraphSCC &SCC); + /// Recover a dbg.declare prepared by the frontend and emit an alloca /// holding a pointer to the coroutine frame. void salvageDebugInfo( @@ -128,7 +100,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape { StructType *FrameTy; Align FrameAlign; uint64_t FrameSize; - Instruction *FramePtr; + Value *FramePtr; BasicBlock *AllocaSpillBlock; /// This would only be true if optimization are enabled. @@ -210,10 +182,9 @@ struct LLVM_LIBRARY_VISIBILITY Shape { FunctionType *getResumeFunctionType() const { switch (ABI) { - case coro::ABI::Switch: { - auto *FnPtrTy = getSwitchResumePointerType(); - return cast(FnPtrTy->getPointerElementType()); - } + case coro::ABI::Switch: + return FunctionType::get(Type::getVoidTy(FrameTy->getContext()), + FrameTy->getPointerTo(), /*IsVarArg*/false); case coro::ABI::Retcon: case coro::ABI::RetconOnce: return RetconLowering.ResumePrototype->getFunctionType(); @@ -267,6 +238,12 @@ struct LLVM_LIBRARY_VISIBILITY Shape { return nullptr; } + Instruction *getInsertPtAfterFramePtr() const { + if (auto *I = dyn_cast(FramePtr)) + return I->getNextNode(); + return &cast(FramePtr)->getParent()->getEntryBlock().front(); + } + /// Allocate memory according to the rules of the active lowering. /// /// \param CG - if non-null, will be updated for the new call diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index b5129809c6a6..ead552d9be4e 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -22,15 +22,17 @@ #include "CoroInstr.h" #include "CoroInternal.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PriorityWorklist.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -50,13 +52,10 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/PrettyStackTrace.h" @@ -869,11 +868,16 @@ void CoroCloner::create() { OrigF.getParent()->end(), ActiveSuspend); } - // Replace all args with undefs. The buildCoroutineFrame algorithm already - // rewritten access to the args that occurs after suspend points with loads - // and stores to/from the coroutine frame. - for (Argument &A : OrigF.args()) - VMap[&A] = UndefValue::get(A.getType()); + // Replace all args with dummy instructions. If an argument is the old frame + // pointer, the dummy will be replaced by the new frame pointer once it is + // computed below. Uses of all other arguments should have already been + // rewritten by buildCoroutineFrame() to use loads/stores on the coroutine + // frame. + SmallVector DummyArgs; + for (Argument &A : OrigF.args()) { + DummyArgs.push_back(new FreezeInst(UndefValue::get(A.getType()))); + VMap[&A] = DummyArgs.back(); + } SmallVector Returns; @@ -923,6 +927,12 @@ void CoroCloner::create() { NewF->setVisibility(savedVisibility); NewF->setUnnamedAddr(savedUnnamedAddr); NewF->setDLLStorageClass(savedDLLStorageClass); + // The function sanitizer metadata needs to match the signature of the + // function it is being attached to. However this does not hold for split + // functions here. Thus remove the metadata for split functions. + if (Shape.ABI == coro::ABI::Switch && + NewF->hasMetadata(LLVMContext::MD_func_sanitize)) + NewF->eraseMetadata(LLVMContext::MD_func_sanitize); // Replace the attributes of the new function: auto OrigAttrs = NewF->getAttributes(); @@ -932,7 +942,8 @@ void CoroCloner::create() { case coro::ABI::Switch: // Bootstrap attributes by copying function attributes from the // original function. This should include optimization settings and so on. - NewAttrs = NewAttrs.addFnAttributes(Context, AttrBuilder(Context, OrigAttrs.getFnAttrs())); + NewAttrs = NewAttrs.addFnAttributes( + Context, AttrBuilder(Context, OrigAttrs.getFnAttrs())); addFramePointerAttrs(NewAttrs, Context, 0, Shape.FrameSize, Shape.FrameAlign); @@ -1013,7 +1024,15 @@ void CoroCloner::create() { auto *NewVFrame = Builder.CreateBitCast( NewFramePtr, Type::getInt8PtrTy(Builder.getContext()), "vFrame"); Value *OldVFrame = cast(VMap[Shape.CoroBegin]); - OldVFrame->replaceAllUsesWith(NewVFrame); + if (OldVFrame != NewVFrame) + OldVFrame->replaceAllUsesWith(NewVFrame); + + // All uses of the arguments should have been resolved by this point, + // so we can safely remove the dummy values. + for (Instruction *DummyArg : DummyArgs) { + DummyArg->replaceAllUsesWith(UndefValue::get(DummyArg->getType())); + DummyArg->deleteValue(); + } switch (Shape.ABI) { case coro::ABI::Switch: @@ -1063,13 +1082,6 @@ static Function *createClone(Function &F, const Twine &Suffix, return Cloner.getFunction(); } -/// Remove calls to llvm.coro.end in the original function. -static void removeCoroEnds(const coro::Shape &Shape, CallGraph *CG) { - for (auto End : Shape.CoroEnds) { - replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, CG); - } -} - static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) { assert(Shape.ABI == coro::ABI::Async); @@ -1150,7 +1162,8 @@ static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn, Function *DestroyFn, Function *CleanupFn) { assert(Shape.ABI == coro::ABI::Switch); - IRBuilder<> Builder(Shape.FramePtr->getNextNode()); + IRBuilder<> Builder(Shape.getInsertPtAfterFramePtr()); + auto *ResumeAddr = Builder.CreateStructGEP( Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume, "resume.addr"); @@ -1559,7 +1572,8 @@ static void simplifySuspendPoints(coro::Shape &Shape) { } static void splitSwitchCoroutine(Function &F, coro::Shape &Shape, - SmallVectorImpl &Clones) { + SmallVectorImpl &Clones, + TargetTransformInfo &TTI) { assert(Shape.ABI == coro::ABI::Switch); createResumeEntryBlock(F, Shape); @@ -1574,7 +1588,13 @@ static void splitSwitchCoroutine(Function &F, coro::Shape &Shape, postSplitCleanup(*DestroyClone); postSplitCleanup(*CleanupClone); - addMustTailToCoroResumes(*ResumeClone); + // Adding musttail call to support symmetric transfer. + // Skip targets which don't support tail call. + // + // FIXME: Could we support symmetric transfer effectively without musttail + // call? + if (TTI.supportsTailCalls()) + addMustTailToCoroResumes(*ResumeClone); // Store addresses resume/destroy/cleanup functions in the coroutine frame. updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); @@ -1661,7 +1681,7 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape, // Map all uses of llvm.coro.begin to the allocated frame pointer. { // Make sure we don't invalidate Shape.FramePtr. - TrackingVH Handle(Shape.FramePtr); + TrackingVH Handle(Shape.FramePtr); Shape.CoroBegin->replaceAllUsesWith(FramePtr); Shape.FramePtr = Handle.getValPtr(); } @@ -1773,7 +1793,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape, // Map all uses of llvm.coro.begin to the allocated frame pointer. { // Make sure we don't invalidate Shape.FramePtr. - TrackingVH Handle(Shape.FramePtr); + TrackingVH Handle(Shape.FramePtr); Shape.CoroBegin->replaceAllUsesWith(RawFramePtr); Shape.FramePtr = Handle.getValPtr(); } @@ -1879,6 +1899,7 @@ namespace { static coro::Shape splitCoroutine(Function &F, SmallVectorImpl &Clones, + TargetTransformInfo &TTI, bool OptimizeFrame) { PrettyStackTraceFunction prettyStackTrace(F); @@ -1901,7 +1922,7 @@ static coro::Shape splitCoroutine(Function &F, } else { switch (Shape.ABI) { case coro::ABI::Switch: - splitSwitchCoroutine(F, Shape, Clones); + splitSwitchCoroutine(F, Shape, Clones, TTI); break; case coro::ABI::Async: splitAsyncCoroutine(F, Shape, Clones); @@ -1917,21 +1938,27 @@ static coro::Shape splitCoroutine(Function &F, // This invalidates SwiftErrorOps in the Shape. replaceSwiftErrorOps(F, Shape, nullptr); - return Shape; -} - -static void -updateCallGraphAfterCoroutineSplit(Function &F, const coro::Shape &Shape, - const SmallVectorImpl &Clones, - CallGraph &CG, CallGraphSCC &SCC) { - if (!Shape.CoroBegin) - return; - - removeCoroEnds(Shape, &CG); - postSplitCleanup(F); + // Finally, salvage the llvm.dbg.{declare,addr} in our original function that + // point into the coroutine frame. We only do this for the current function + // since the Cloner salvaged debug info for us in the new coroutine funclets. + SmallVector Worklist; + SmallDenseMap DbgPtrAllocaCache; + for (auto &BB : F) { + for (auto &I : BB) { + if (auto *DDI = dyn_cast(&I)) { + Worklist.push_back(DDI); + continue; + } + if (auto *DDI = dyn_cast(&I)) { + Worklist.push_back(DDI); + continue; + } + } + } + for (auto *DDI : Worklist) + coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.OptimizeFrame); - // Update call graph and add the functions we created to the SCC. - coro::updateCallGraph(F, Clones, CG, SCC); + return Shape; } static void updateCallGraphAfterCoroutineSplit( @@ -1976,70 +2003,6 @@ static void updateCallGraphAfterCoroutineSplit( updateCGAndAnalysisManagerForFunctionPass(CG, C, N, AM, UR, FAM); } -// When we see the coroutine the first time, we insert an indirect call to a -// devirt trigger function and mark the coroutine that it is now ready for -// split. -// Async lowering uses this after it has split the function to restart the -// pipeline. -static void prepareForSplit(Function &F, CallGraph &CG, - bool MarkForAsyncRestart = false) { - Module &M = *F.getParent(); - LLVMContext &Context = F.getContext(); -#ifndef NDEBUG - Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN); - assert(DevirtFn && "coro.devirt.trigger function not found"); -#endif - - F.addFnAttr(CORO_PRESPLIT_ATTR, MarkForAsyncRestart - ? ASYNC_RESTART_AFTER_SPLIT - : PREPARED_FOR_SPLIT); - - // Insert an indirect call sequence that will be devirtualized by CoroElide - // pass: - // %0 = call i8* @llvm.coro.subfn.addr(i8* null, i8 -1) - // %1 = bitcast i8* %0 to void(i8*)* - // call void %1(i8* null) - coro::LowererBase Lowerer(M); - Instruction *InsertPt = - MarkForAsyncRestart ? F.getEntryBlock().getFirstNonPHIOrDbgOrLifetime() - : F.getEntryBlock().getTerminator(); - auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(Context)); - auto *DevirtFnAddr = - Lowerer.makeSubFnCall(Null, CoroSubFnInst::RestartTrigger, InsertPt); - FunctionType *FnTy = FunctionType::get(Type::getVoidTy(Context), - {Type::getInt8PtrTy(Context)}, false); - auto *IndirectCall = CallInst::Create(FnTy, DevirtFnAddr, Null, "", InsertPt); - - // Update CG graph with an indirect call we just added. - CG[&F]->addCalledFunction(IndirectCall, CG.getCallsExternalNode()); -} - -// Make sure that there is a devirtualization trigger function that the -// coro-split pass uses to force a restart of the CGSCC pipeline. If the devirt -// trigger function is not found, we will create one and add it to the current -// SCC. -static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) { - Module &M = CG.getModule(); - if (M.getFunction(CORO_DEVIRT_TRIGGER_FN)) - return; - - LLVMContext &C = M.getContext(); - auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C), - /*isVarArg=*/false); - Function *DevirtFn = - Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage, - CORO_DEVIRT_TRIGGER_FN, &M); - DevirtFn->addFnAttr(Attribute::AlwaysInline); - auto *Entry = BasicBlock::Create(C, "entry", DevirtFn); - ReturnInst::Create(C, Entry); - - auto *Node = CG.getOrInsertFunction(DevirtFn); - - SmallVector Nodes(SCC.begin(), SCC.end()); - Nodes.push_back(Node); - SCC.initialize(Nodes); -} - /// Replace a call to llvm.coro.prepare.retcon. static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG, LazyCallGraph::SCC &C) { @@ -2076,59 +2039,6 @@ static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG, Cast->eraseFromParent(); } } -/// Replace a call to llvm.coro.prepare.retcon. -static void replacePrepare(CallInst *Prepare, CallGraph &CG) { - auto CastFn = Prepare->getArgOperand(0); // as an i8* - auto Fn = CastFn->stripPointerCasts(); // as its original type - - // Find call graph nodes for the preparation. - CallGraphNode *PrepareUserNode = nullptr, *FnNode = nullptr; - if (auto ConcreteFn = dyn_cast(Fn)) { - PrepareUserNode = CG[Prepare->getFunction()]; - FnNode = CG[ConcreteFn]; - } - - // Attempt to peephole this pattern: - // %0 = bitcast [[TYPE]] @some_function to i8* - // %1 = call @llvm.coro.prepare.retcon(i8* %0) - // %2 = bitcast %1 to [[TYPE]] - // ==> - // %2 = @some_function - for (Use &U : llvm::make_early_inc_range(Prepare->uses())) { - // Look for bitcasts back to the original function type. - auto *Cast = dyn_cast(U.getUser()); - if (!Cast || Cast->getType() != Fn->getType()) continue; - - // Check whether the replacement will introduce new direct calls. - // If so, we'll need to update the call graph. - if (PrepareUserNode) { - for (auto &Use : Cast->uses()) { - if (auto *CB = dyn_cast(Use.getUser())) { - if (!CB->isCallee(&Use)) - continue; - PrepareUserNode->removeCallEdgeFor(*CB); - PrepareUserNode->addCalledFunction(CB, FnNode); - } - } - } - - // Replace and remove the cast. - Cast->replaceAllUsesWith(Fn); - Cast->eraseFromParent(); - } - - // Replace any remaining uses with the function as an i8*. - // This can never directly be a callee, so we don't need to update CG. - Prepare->replaceAllUsesWith(CastFn); - Prepare->eraseFromParent(); - - // Kill dead bitcasts. - while (auto *Cast = dyn_cast(CastFn)) { - if (!Cast->use_empty()) break; - CastFn = Cast->getOperand(0); - Cast->eraseFromParent(); - } -} static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG, LazyCallGraph::SCC &C) { @@ -2143,30 +2053,6 @@ static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG, return Changed; } -/// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent -/// IPO from operating on calls to a retcon coroutine before it's been -/// split. This is only safe to do after we've split all retcon -/// coroutines in the module. We can do that this in this pass because -/// this pass does promise to split all retcon coroutines (as opposed to -/// switch coroutines, which are lowered in multiple stages). -static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) { - bool Changed = false; - for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) { - // Intrinsics can only be used in calls. - auto *Prepare = cast(P.getUser()); - replacePrepare(Prepare, CG); - Changed = true; - } - - return Changed; -} - -static bool declaresCoroSplitIntrinsics(const Module &M) { - return coro::declaresIntrinsics(M, {"llvm.coro.begin", - "llvm.coro.prepare.retcon", - "llvm.coro.prepare.async"}); -} - static void addPrepareFunction(const Module &M, SmallVectorImpl &Fns, StringRef Name) { @@ -2185,18 +2071,15 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, auto &FAM = AM.getResult(C, CG).getManager(); - if (!declaresCoroSplitIntrinsics(M)) - return PreservedAnalyses::all(); - // Check for uses of llvm.coro.prepare.retcon/async. SmallVector PrepareFns; addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon"); addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async"); // Find coroutines for processing. - SmallVector Coroutines; + SmallVector Coroutines; for (LazyCallGraph::Node &N : C) - if (N.getFunction().hasFnAttribute(CORO_PRESPLIT_ATTR)) + if (N.getFunction().isPresplitCoroutine()) Coroutines.push_back(&N); if (Coroutines.empty() && PrepareFns.empty()) @@ -2212,13 +2095,12 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, for (LazyCallGraph::Node *N : Coroutines) { Function &F = N->getFunction(); LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName() - << "' state: " - << F.getFnAttribute(CORO_PRESPLIT_ATTR).getValueAsString() << "\n"); - F.removeFnAttr(CORO_PRESPLIT_ATTR); + F.setSplittedCoroutine(); SmallVector Clones; - const coro::Shape Shape = splitCoroutine(F, Clones, OptimizeFrame); + const coro::Shape Shape = splitCoroutine( + F, Clones, FAM.getResult(F), OptimizeFrame); updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); if (!Shape.CoroSuspends.empty()) { @@ -2237,122 +2119,3 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, return PreservedAnalyses::none(); } - -namespace { - -// We present a coroutine to LLVM as an ordinary function with suspension -// points marked up with intrinsics. We let the optimizer party on the coroutine -// as a single function for as long as possible. Shortly before the coroutine is -// eligible to be inlined into its callers, we split up the coroutine into parts -// corresponding to initial, resume and destroy invocations of the coroutine, -// add them to the current SCC and restart the IPO pipeline to optimize the -// coroutine subfunctions we extracted before proceeding to the caller of the -// coroutine. -struct CoroSplitLegacy : public CallGraphSCCPass { - static char ID; // Pass identification, replacement for typeid - - CoroSplitLegacy(bool OptimizeFrame = false) - : CallGraphSCCPass(ID), OptimizeFrame(OptimizeFrame) { - initializeCoroSplitLegacyPass(*PassRegistry::getPassRegistry()); - } - - bool Run = false; - bool OptimizeFrame; - - // A coroutine is identified by the presence of coro.begin intrinsic, if - // we don't have any, this pass has nothing to do. - bool doInitialization(CallGraph &CG) override { - Run = declaresCoroSplitIntrinsics(CG.getModule()); - return CallGraphSCCPass::doInitialization(CG); - } - - bool runOnSCC(CallGraphSCC &SCC) override { - if (!Run) - return false; - - // Check for uses of llvm.coro.prepare.retcon. - SmallVector PrepareFns; - auto &M = SCC.getCallGraph().getModule(); - addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon"); - addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async"); - - // Find coroutines for processing. - SmallVector Coroutines; - for (CallGraphNode *CGN : SCC) - if (auto *F = CGN->getFunction()) - if (F->hasFnAttribute(CORO_PRESPLIT_ATTR)) - Coroutines.push_back(F); - - if (Coroutines.empty() && PrepareFns.empty()) - return false; - - CallGraph &CG = getAnalysis().getCallGraph(); - - if (Coroutines.empty()) { - bool Changed = false; - for (auto *PrepareFn : PrepareFns) - Changed |= replaceAllPrepares(PrepareFn, CG); - return Changed; - } - - createDevirtTriggerFunc(CG, SCC); - - // Split all the coroutines. - for (Function *F : Coroutines) { - Attribute Attr = F->getFnAttribute(CORO_PRESPLIT_ATTR); - StringRef Value = Attr.getValueAsString(); - LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F->getName() - << "' state: " << Value << "\n"); - // Async lowering marks coroutines to trigger a restart of the pipeline - // after it has split them. - if (Value == ASYNC_RESTART_AFTER_SPLIT) { - F->removeFnAttr(CORO_PRESPLIT_ATTR); - continue; - } - if (Value == UNPREPARED_FOR_SPLIT) { - prepareForSplit(*F, CG); - continue; - } - F->removeFnAttr(CORO_PRESPLIT_ATTR); - - SmallVector Clones; - const coro::Shape Shape = splitCoroutine(*F, Clones, OptimizeFrame); - updateCallGraphAfterCoroutineSplit(*F, Shape, Clones, CG, SCC); - if (Shape.ABI == coro::ABI::Async) { - // Restart SCC passes. - // Mark function for CoroElide pass. It will devirtualize causing a - // restart of the SCC pipeline. - prepareForSplit(*F, CG, true /*MarkForAsyncRestart*/); - } - } - - for (auto *PrepareFn : PrepareFns) - replaceAllPrepares(PrepareFn, CG); - - return true; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - CallGraphSCCPass::getAnalysisUsage(AU); - } - - StringRef getPassName() const override { return "Coroutine Splitting"; } -}; - -} // end anonymous namespace - -char CoroSplitLegacy::ID = 0; - -INITIALIZE_PASS_BEGIN( - CoroSplitLegacy, "coro-split", - "Split coroutine into a set of functions driving its state machine", false, - false) -INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_END( - CoroSplitLegacy, "coro-split", - "Split coroutine into a set of functions driving its state machine", false, - false) - -Pass *llvm::createCoroSplitLegacyPass(bool OptimizeFrame) { - return new CoroSplitLegacy(OptimizeFrame); -} diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index 965a146c143f..1742e9319c3b 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -10,14 +10,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Coroutines.h" #include "CoroInstr.h" #include "CoroInternal.h" -#include "llvm-c/Transforms/Coroutines.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" @@ -26,14 +23,10 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/InitializePasses.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/Local.h" #include #include @@ -41,55 +34,6 @@ using namespace llvm; -void llvm::initializeCoroutines(PassRegistry &Registry) { - initializeCoroEarlyLegacyPass(Registry); - initializeCoroSplitLegacyPass(Registry); - initializeCoroElideLegacyPass(Registry); - initializeCoroCleanupLegacyPass(Registry); -} - -static void addCoroutineOpt0Passes(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { - PM.add(createCoroSplitLegacyPass()); - PM.add(createCoroElideLegacyPass()); - - PM.add(createBarrierNoopPass()); - PM.add(createCoroCleanupLegacyPass()); -} - -static void addCoroutineEarlyPasses(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { - PM.add(createCoroEarlyLegacyPass()); -} - -static void addCoroutineScalarOptimizerPasses(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { - PM.add(createCoroElideLegacyPass()); -} - -static void addCoroutineSCCPasses(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { - PM.add(createCoroSplitLegacyPass(Builder.OptLevel != 0)); -} - -static void addCoroutineOptimizerLastPasses(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { - PM.add(createCoroCleanupLegacyPass()); -} - -void llvm::addCoroutinePassesToExtensionPoints(PassManagerBuilder &Builder) { - Builder.addExtension(PassManagerBuilder::EP_EarlyAsPossible, - addCoroutineEarlyPasses); - Builder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, - addCoroutineOpt0Passes); - Builder.addExtension(PassManagerBuilder::EP_CGSCCOptimizerLate, - addCoroutineSCCPasses); - Builder.addExtension(PassManagerBuilder::EP_ScalarOptimizerLate, - addCoroutineScalarOptimizerPasses); - Builder.addExtension(PassManagerBuilder::EP_OptimizerLast, - addCoroutineOptimizerLastPasses); -} - // Construct the lowerer base class and initialize its members. coro::LowererBase::LowererBase(Module &M) : TheModule(M), Context(M.getContext()), @@ -119,44 +63,55 @@ Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index, return Bitcast; } +// NOTE: Must be sorted! +static const char *const CoroIntrinsics[] = { + "llvm.coro.align", + "llvm.coro.alloc", + "llvm.coro.async.context.alloc", + "llvm.coro.async.context.dealloc", + "llvm.coro.async.resume", + "llvm.coro.async.size.replace", + "llvm.coro.async.store_resume", + "llvm.coro.begin", + "llvm.coro.destroy", + "llvm.coro.done", + "llvm.coro.end", + "llvm.coro.end.async", + "llvm.coro.frame", + "llvm.coro.free", + "llvm.coro.id", + "llvm.coro.id.async", + "llvm.coro.id.retcon", + "llvm.coro.id.retcon.once", + "llvm.coro.noop", + "llvm.coro.prepare.async", + "llvm.coro.prepare.retcon", + "llvm.coro.promise", + "llvm.coro.resume", + "llvm.coro.save", + "llvm.coro.size", + "llvm.coro.subfn.addr", + "llvm.coro.suspend", + "llvm.coro.suspend.async", + "llvm.coro.suspend.retcon", +}; + #ifndef NDEBUG static bool isCoroutineIntrinsicName(StringRef Name) { - // NOTE: Must be sorted! - static const char *const CoroIntrinsics[] = { - "llvm.coro.align", - "llvm.coro.alloc", - "llvm.coro.async.context.alloc", - "llvm.coro.async.context.dealloc", - "llvm.coro.async.resume", - "llvm.coro.async.size.replace", - "llvm.coro.async.store_resume", - "llvm.coro.begin", - "llvm.coro.destroy", - "llvm.coro.done", - "llvm.coro.end", - "llvm.coro.end.async", - "llvm.coro.frame", - "llvm.coro.free", - "llvm.coro.id", - "llvm.coro.id.async", - "llvm.coro.id.retcon", - "llvm.coro.id.retcon.once", - "llvm.coro.noop", - "llvm.coro.prepare.async", - "llvm.coro.prepare.retcon", - "llvm.coro.promise", - "llvm.coro.resume", - "llvm.coro.save", - "llvm.coro.size", - "llvm.coro.subfn.addr", - "llvm.coro.suspend", - "llvm.coro.suspend.async", - "llvm.coro.suspend.retcon", - }; return Intrinsic::lookupLLVMIntrinsicByName(CoroIntrinsics, Name) != -1; } #endif +bool coro::declaresAnyIntrinsic(const Module &M) { + for (StringRef Name : CoroIntrinsics) { + assert(isCoroutineIntrinsicName(Name) && "not a coroutine intrinsic"); + if (M.getNamedValue(Name)) + return true; + } + + return false; +} + // Verifies if a module has named values listed. Also, in debug mode verifies // that names are intrinsic names. bool coro::declaresIntrinsics(const Module &M, @@ -191,46 +146,6 @@ void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide) { } } -// FIXME: This code is stolen from CallGraph::addToCallGraph(Function *F), which -// happens to be private. It is better for this functionality exposed by the -// CallGraph. -static void buildCGN(CallGraph &CG, CallGraphNode *Node) { - Function *F = Node->getFunction(); - - // Look for calls by this function. - for (Instruction &I : instructions(F)) - if (auto *Call = dyn_cast(&I)) { - const Function *Callee = Call->getCalledFunction(); - if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID())) - // Indirect calls of intrinsics are not allowed so no need to check. - // We can be more precise here by using TargetArg returned by - // Intrinsic::isLeaf. - Node->addCalledFunction(Call, CG.getCallsExternalNode()); - else if (!Callee->isIntrinsic()) - Node->addCalledFunction(Call, CG.getOrInsertFunction(Callee)); - } -} - -// Rebuild CGN after we extracted parts of the code from ParentFunc into -// NewFuncs. Builds CGNs for the NewFuncs and adds them to the current SCC. -void coro::updateCallGraph(Function &ParentFunc, ArrayRef NewFuncs, - CallGraph &CG, CallGraphSCC &SCC) { - // Rebuild CGN from scratch for the ParentFunc - auto *ParentNode = CG[&ParentFunc]; - ParentNode->removeAllCalledFunctions(); - buildCGN(CG, ParentNode); - - SmallVector Nodes(SCC.begin(), SCC.end()); - - for (Function *F : NewFuncs) { - CallGraphNode *Callee = CG.getOrInsertFunction(F); - Nodes.push_back(Callee); - buildCGN(CG, Callee); - } - - SCC.initialize(Nodes); -} - static void clear(coro::Shape &Shape) { Shape.CoroBegin = nullptr; Shape.CoroEnds.clear(); @@ -735,25 +650,3 @@ void CoroAsyncEndInst::checkWellFormed() const { "match the tail arguments", MustTailCallFunc); } - -void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createCoroEarlyLegacyPass()); -} - -void LLVMAddCoroSplitPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createCoroSplitLegacyPass()); -} - -void LLVMAddCoroElidePass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createCoroElideLegacyPass()); -} - -void LLVMAddCoroCleanupPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createCoroCleanupLegacyPass()); -} - -void -LLVMPassManagerBuilderAddCoroutinePassesToExtensionPoints(LLVMPassManagerBuilderRef PMB) { - PassManagerBuilder *Builder = unwrap(PMB); - addCoroutinePassesToExtensionPoints(*Builder); -} diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp index a6d9ce1033f3..58cea7ebb749 100644 --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -1,4 +1,4 @@ -//===- InlineAlways.cpp - Code to inline always_inline functions ----------===// +//===- AlwaysInliner.cpp - Code to inline always_inline functions ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -16,15 +16,10 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" -#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -60,31 +55,38 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M, for (User *U : F.users()) if (auto *CB = dyn_cast(U)) if (CB->getCalledFunction() == &F && - CB->hasFnAttr(Attribute::AlwaysInline)) - Calls.insert(CB); + CB->hasFnAttr(Attribute::AlwaysInline) && + !CB->getAttributes().hasFnAttr(Attribute::NoInline)) + Calls.insert(CB); for (CallBase *CB : Calls) { Function *Caller = CB->getCaller(); OptimizationRemarkEmitter ORE(Caller); - auto OIC = shouldInline( - *CB, - [&](CallBase &CB) { - return InlineCost::getAlways("always inline attribute"); - }, - ORE); - assert(OIC); - emitInlinedIntoBasedOnCost(ORE, CB->getDebugLoc(), CB->getParent(), F, - *Caller, *OIC, false, DEBUG_TYPE); + DebugLoc DLoc = CB->getDebugLoc(); + BasicBlock *Block = CB->getParent(); InlineFunctionInfo IFI( /*cg=*/nullptr, GetAssumptionCache, &PSI, - &FAM.getResult(*(CB->getCaller())), + &FAM.getResult(*Caller), &FAM.getResult(F)); InlineResult Res = InlineFunction( *CB, IFI, &FAM.getResult(F), InsertLifetime); - assert(Res.isSuccess() && "unexpected failure to inline"); - (void)Res; + if (!Res.isSuccess()) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, + Block) + << "'" << ore::NV("Callee", &F) << "' is not inlined into '" + << ore::NV("Caller", Caller) + << "': " << ore::NV("Reason", Res.getFailureReason()); + }); + continue; + } + + emitInlinedIntoBasedOnCost( + ORE, DLoc, Block, F, *Caller, + InlineCost::getAlways("always inline attribute"), + /*ForProfileContext=*/false, DEBUG_TYPE); // Merge the attributes based on the inlining. AttributeFuncs::mergeAttributesForInlining(*Caller, F); @@ -210,6 +212,9 @@ InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallBase &CB) { if (!CB.hasFnAttr(Attribute::AlwaysInline)) return InlineCost::getNever("no alwaysinline attribute"); + if (Callee->hasFnAttribute(Attribute::AlwaysInline) && CB.isNoInline()) + return InlineCost::getNever("noinline call site attribute"); + auto IsViable = isInlineViable(*Callee); if (!IsViable.isSuccess()) return InlineCost::getNever(IsViable.getFailureReason()); diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index e6a542385662..62cfc3294968 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -29,9 +29,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/ArgumentPromotion.h" + #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallPtrSet.h" @@ -40,15 +39,11 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/CallGraphSCCPass.h" -#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -56,33 +51,26 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include #include -#include -#include -#include -#include #include #include @@ -91,43 +79,81 @@ using namespace llvm; #define DEBUG_TYPE "argpromotion" STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted"); -STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted"); -STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted"); STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); -/// A vector used to hold the indices of a single GEP instruction -using IndicesVector = std::vector; +namespace { + +struct ArgPart { + Type *Ty; + Align Alignment; + /// A representative guaranteed-executed load or store instruction for use by + /// metadata transfer. + Instruction *MustExecInstr; +}; + +using OffsetAndArgPart = std::pair; + +} // end anonymous namespace + +static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL, + Value *Ptr, Type *ResElemTy, int64_t Offset) { + // For non-opaque pointers, try to create a "nice" GEP if possible, otherwise + // fall back to an i8 GEP to a specific offset. + unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); + APInt OrigOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset); + if (!Ptr->getType()->isOpaquePointerTy()) { + Type *OrigElemTy = Ptr->getType()->getNonOpaquePointerElementType(); + if (OrigOffset == 0 && OrigElemTy == ResElemTy) + return Ptr; + + if (OrigElemTy->isSized()) { + APInt TmpOffset = OrigOffset; + Type *TmpTy = OrigElemTy; + SmallVector IntIndices = + DL.getGEPIndicesForOffset(TmpTy, TmpOffset); + if (TmpOffset == 0) { + // Try to add trailing zero indices to reach the right type. + while (TmpTy != ResElemTy) { + Type *NextTy = GetElementPtrInst::getTypeAtIndex(TmpTy, (uint64_t)0); + if (!NextTy) + break; + + IntIndices.push_back(APInt::getZero( + isa(TmpTy) ? 32 : OrigOffset.getBitWidth())); + TmpTy = NextTy; + } + + SmallVector Indices; + for (const APInt &Index : IntIndices) + Indices.push_back(IRB.getInt(Index)); + + if (OrigOffset != 0 || TmpTy == ResElemTy) { + Ptr = IRB.CreateGEP(OrigElemTy, Ptr, Indices); + return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace)); + } + } + } + } + + if (OrigOffset != 0) { + Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AddrSpace)); + Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(OrigOffset)); + } + return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace)); +} /// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. static Function * -doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, - SmallPtrSetImpl &ByValArgsToTransform, - Optional> - ReplaceCallSite) { +doPromotion(Function *F, FunctionAnalysisManager &FAM, + const DenseMap> + &ArgsToPromote) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. FunctionType *FTy = F->getFunctionType(); std::vector Params; - using ScalarizeTable = std::set>; - - // ScalarizedElements - If we are promoting a pointer that has elements - // accessed out of it, keep track of which elements are accessed so that we - // can add one argument for each. - // - // Arguments that are directly loaded will have a zero element value here, to - // handle cases where there are both a direct load and GEP accesses. - std::map ScalarizedElements; - - // OriginalLoads - Keep track of a representative load instruction from the - // original function so that we can tell the alias analysis implementation - // what the new GEP/Load instructions we are inserting look like. - // We need to keep the original loads for each argument and the elements - // of the argument that are accessed. - std::map, LoadInst *> OriginalLoads; - // Attribute - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost @@ -138,15 +164,7 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgNo) { - if (ByValArgsToTransform.count(&*I)) { - // Simple byval argument? Just add all the struct element types. - Type *AgTy = I->getParamByValType(); - StructType *STy = cast(AgTy); - llvm::append_range(Params, STy->elements()); - ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(), - AttributeSet()); - ++NumByValArgsPromoted; - } else if (!ArgsToPromote.count(&*I)) { + if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo)); @@ -154,58 +172,12 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, // Dead argument (which are always marked as promotable) ++NumArgumentsDead; } else { - // Okay, this is being promoted. This means that the only uses are loads - // or GEPs which are only used by loads - - // In this table, we will track which indices are loaded from the argument - // (where direct loads are tracked as no indices). - ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; - for (User *U : make_early_inc_range(I->users())) { - Instruction *UI = cast(U); - Type *SrcTy; - if (LoadInst *L = dyn_cast(UI)) - SrcTy = L->getType(); - else - SrcTy = cast(UI)->getSourceElementType(); - // Skip dead GEPs and remove them. - if (isa(UI) && UI->use_empty()) { - UI->eraseFromParent(); - continue; - } - - IndicesVector Indices; - Indices.reserve(UI->getNumOperands() - 1); - // Since loads will only have a single operand, and GEPs only a single - // non-index operand, this will record direct loads without any indices, - // and gep+loads with the GEP indices. - for (const Use &I : llvm::drop_begin(UI->operands())) - Indices.push_back(cast(I)->getSExtValue()); - // GEPs with a single 0 index can be merged with direct loads - if (Indices.size() == 1 && Indices.front() == 0) - Indices.clear(); - ArgIndices.insert(std::make_pair(SrcTy, Indices)); - LoadInst *OrigLoad; - if (LoadInst *L = dyn_cast(UI)) - OrigLoad = L; - else - // Take any load, we will use it only to update Alias Analysis - OrigLoad = cast(UI->user_back()); - OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; - } - - // Add a parameter to the function for each element passed in. - for (const auto &ArgIndex : ArgIndices) { - // not allowed to dereference ->begin() if size() is 0 - Params.push_back(GetElementPtrInst::getIndexedType( - I->getType()->getPointerElementType(), ArgIndex.second)); + const auto &ArgParts = ArgsToPromote.find(&*I)->second; + for (const auto &Pair : ArgParts) { + Params.push_back(Pair.second.Ty); ArgAttrVec.push_back(AttributeSet()); - assert(Params.back()); } - - if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) - ++NumArgumentsPromoted; - else - ++NumAggregatesPromoted; + ++NumArgumentsPromoted; } } @@ -222,24 +194,30 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, // The new function will have the !dbg metadata copied from the original // function. The original function may not be deleted, and dbg metadata need - // to be unique so we need to drop it. + // to be unique, so we need to drop it. F->setSubprogram(nullptr); LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); + uint64_t LargestVectorWidth = 0; + for (auto *I : Params) + if (auto *VT = dyn_cast(I)) + LargestVectorWidth = std::max( + LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize()); + // Recompute the parameter attributes list based on the new arguments for // the function. NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrVec)); + AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth); ArgAttrVec.clear(); F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); - // Loop over all of the callers of the function, transforming the call sites - // to pass in the loaded pointers. - // + // Loop over all the callers of the function, transforming the call sites to + // pass in the loaded pointers. SmallVector Args; const DataLayout &DL = F->getParent()->getDataLayout(); while (!F->use_empty()) { @@ -250,74 +228,34 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, // Loop over the operands, inserting GEP and loads in the caller as // appropriate. - auto AI = CB.arg_begin(); + auto *AI = CB.arg_begin(); ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; - ++I, ++AI, ++ArgNo) - if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { + ++I, ++AI, ++ArgNo) { + if (!ArgsToPromote.count(&*I)) { Args.push_back(*AI); // Unmodified argument ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); - } else if (ByValArgsToTransform.count(&*I)) { - // Emit a GEP and load for each element of the struct. - Type *AgTy = I->getParamByValType(); - StructType *STy = cast(AgTy); - Value *Idxs[2] = { - ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; - const StructLayout *SL = DL.getStructLayout(STy); - Align StructAlign = *I->getParamAlign(); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - auto *Idx = - IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i)); - // TODO: Tell AA about the new values? - Align Alignment = - commonAlignment(StructAlign, SL->getElementOffset(i)); - Args.push_back(IRB.CreateAlignedLoad( - STy->getElementType(i), Idx, Alignment, Idx->getName() + ".val")); - ArgAttrVec.push_back(AttributeSet()); - } } else if (!I->use_empty()) { - // Non-dead argument: insert GEPs and loads as appropriate. - ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; - // Store the Value* version of the indices in here, but declare it now - // for reuse. - std::vector Ops; - for (const auto &ArgIndex : ArgIndices) { - Value *V = *AI; - LoadInst *OrigLoad = - OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; - if (!ArgIndex.second.empty()) { - Ops.reserve(ArgIndex.second.size()); - Type *ElTy = V->getType(); - for (auto II : ArgIndex.second) { - // Use i32 to index structs, and i64 for others (pointers/arrays). - // This satisfies GEP constraints. - Type *IdxTy = - (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) - : Type::getInt64Ty(F->getContext())); - Ops.push_back(ConstantInt::get(IdxTy, II)); - // Keep track of the type we're currently indexing. - if (auto *ElPTy = dyn_cast(ElTy)) - ElTy = ElPTy->getPointerElementType(); - else - ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, II); - } - // And create a GEP to extract those indices. - V = IRB.CreateGEP(ArgIndex.first, V, Ops, V->getName() + ".idx"); - Ops.clear(); + Value *V = *AI; + const auto &ArgParts = ArgsToPromote.find(&*I)->second; + for (const auto &Pair : ArgParts) { + LoadInst *LI = IRB.CreateAlignedLoad( + Pair.second.Ty, + createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first), + Pair.second.Alignment, V->getName() + ".val"); + if (Pair.second.MustExecInstr) { + LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata()); + LI->copyMetadata(*Pair.second.MustExecInstr, + {LLVMContext::MD_range, LLVMContext::MD_nonnull, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null, + LLVMContext::MD_align, LLVMContext::MD_noundef}); } - // Since we're replacing a load make sure we take the alignment - // of the previous load. - LoadInst *newLoad = - IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val"); - newLoad->setAlignment(OrigLoad->getAlign()); - // Transfer the AA info too. - newLoad->setAAMetadata(OrigLoad->getAAMetadata()); - - Args.push_back(newLoad); + Args.push_back(LI); ArgAttrVec.push_back(AttributeSet()); } } + } // Push any varargs arguments on the list. for (; AI != CB.arg_end(); ++AI, ++ArgNo) { @@ -345,9 +283,8 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, Args.clear(); ArgAttrVec.clear(); - // Update the callgraph to know that the callsite has been transformed. - if (ReplaceCallSite) - (*ReplaceCallSite)(CB, *NewCS); + AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(), + LargestVectorWidth); if (!CB.use_empty()) { CB.replaceAllUsesWith(NewCS); @@ -364,11 +301,15 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + // We will collect all the new created allocas to promote them into registers + // after the following loop + SmallVector Allocas; + // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. Function::arg_iterator I2 = NF->arg_begin(); for (Argument &Arg : F->args()) { - if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) { + if (!ArgsToPromote.count(&Arg)) { // If this is an unmodified argument, move the name and users over to the // new version. Arg.replaceAllUsesWith(&*I2); @@ -377,37 +318,6 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, continue; } - if (ByValArgsToTransform.count(&Arg)) { - // In the callee, we create an alloca, and store each of the new incoming - // arguments into the alloca. - Instruction *InsertPt = &NF->begin()->front(); - - // Just add all the struct element types. - Type *AgTy = Arg.getParamByValType(); - Align StructAlign = *Arg.getParamAlign(); - Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, - StructAlign, "", InsertPt); - StructType *STy = cast(AgTy); - Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), - nullptr}; - const StructLayout *SL = DL.getStructLayout(STy); - - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - Value *Idx = GetElementPtrInst::Create( - AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), - InsertPt); - I2->setName(Arg.getName() + "." + Twine(i)); - Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(i)); - new StoreInst(&*I2++, Idx, false, Alignment, InsertPt); - } - - // Anything that used the arg should now use the alloca. - Arg.replaceAllUsesWith(TheAlloca); - TheAlloca->takeName(&Arg); - continue; - } - // There potentially are metadata uses for things like llvm.dbg.value. // Replace them with undef, after handling the other regular uses. auto RauwUndefMetadata = make_scope_exit( @@ -416,57 +326,95 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, if (Arg.use_empty()) continue; - // Otherwise, if we promoted this argument, then all users are load - // instructions (or GEPs with only load users), and all loads should be - // using the new argument that we added. - ScalarizeTable &ArgIndices = ScalarizedElements[&Arg]; - - while (!Arg.use_empty()) { - if (LoadInst *LI = dyn_cast(Arg.user_back())) { - assert(ArgIndices.begin()->second.empty() && - "Load element should sort to front!"); - I2->setName(Arg.getName() + ".val"); - LI->replaceAllUsesWith(&*I2); - LI->eraseFromParent(); - LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << Arg.getName() - << "' in function '" << F->getName() << "'\n"); - } else { - GetElementPtrInst *GEP = cast(Arg.user_back()); - assert(!GEP->use_empty() && - "GEPs without uses should be cleaned up already"); - IndicesVector Operands; - Operands.reserve(GEP->getNumIndices()); - for (const Use &Idx : GEP->indices()) - Operands.push_back(cast(Idx)->getSExtValue()); - - // GEPs with a single 0 index can be merged with direct loads - if (Operands.size() == 1 && Operands.front() == 0) - Operands.clear(); - - Function::arg_iterator TheArg = I2; - for (ScalarizeTable::iterator It = ArgIndices.begin(); - It->second != Operands; ++It, ++TheArg) { - assert(It != ArgIndices.end() && "GEP not handled??"); - } + // Otherwise, if we promoted this argument, we have to create an alloca in + // the callee for every promotable part and store each of the new incoming + // arguments into the corresponding alloca, what lets the old code (the + // store instructions if they are allowed especially) a chance to work as + // before. + assert(Arg.getType()->isPointerTy() && + "Only arguments with a pointer type are promotable"); - TheArg->setName(formatv("{0}.{1:$[.]}.val", Arg.getName(), - make_range(Operands.begin(), Operands.end()))); + IRBuilder IRB(&NF->begin()->front()); - LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() - << "' of function '" << NF->getName() << "'\n"); + // Add only the promoted elements, so parts from ArgsToPromote + SmallDenseMap OffsetToAlloca; + for (const auto &Pair : ArgsToPromote.find(&Arg)->second) { + int64_t Offset = Pair.first; + const ArgPart &Part = Pair.second; - // All of the uses must be load instructions. Replace them all with - // the argument specified by ArgNo. - while (!GEP->use_empty()) { - LoadInst *L = cast(GEP->user_back()); - L->replaceAllUsesWith(&*TheArg); - L->eraseFromParent(); - } - GEP->eraseFromParent(); + Argument *NewArg = I2++; + NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val"); + + AllocaInst *NewAlloca = IRB.CreateAlloca( + Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc"); + NewAlloca->setAlignment(Pair.second.Alignment); + IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment); + + // Collect the alloca to retarget the users to + OffsetToAlloca.insert({Offset, NewAlloca}); + } + + auto GetAlloca = [&](Value *Ptr) { + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, + /* AllowNonInbounds */ true); + assert(Ptr == &Arg && "Not constant offset from arg?"); + return OffsetToAlloca.lookup(Offset.getSExtValue()); + }; + + // Cleanup the code from the dead instructions: GEPs and BitCasts in between + // the original argument and its users: loads and stores. Retarget every + // user to the new created alloca. + SmallVector Worklist; + SmallVector DeadInsts; + append_range(Worklist, Arg.users()); + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + if (isa(V) || isa(V)) { + DeadInsts.push_back(cast(V)); + append_range(Worklist, V->users()); + continue; + } + + if (auto *LI = dyn_cast(V)) { + Value *Ptr = LI->getPointerOperand(); + LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr)); + continue; } + + if (auto *SI = dyn_cast(V)) { + assert(!SI->isVolatile() && "Volatile operations can't be promoted."); + Value *Ptr = SI->getPointerOperand(); + SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr)); + continue; + } + + llvm_unreachable("Unexpected user"); + } + + for (Instruction *I : DeadInsts) { + I->replaceAllUsesWith(PoisonValue::get(I->getType())); + I->eraseFromParent(); } - // Increment I2 past all of the arguments added for this promoted pointer. - std::advance(I2, ArgIndices.size()); + + // Collect the allocas for promotion + for (const auto &Pair : OffsetToAlloca) { + assert(isAllocaPromotable(Pair.second) && + "By design, only promotable allocas should be produced."); + Allocas.push_back(Pair.second); + } + } + + LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size() + << " alloca(s) are promotable by Mem2Reg\n"); + + if (!Allocas.empty()) { + // And we are able to call the `promoteMemoryToRegister()` function. + // Our earlier checks have ensured that PromoteMemToReg() will + // succeed. + auto &DT = FAM.getResult(*NF); + auto &AC = FAM.getResult(*NF); + PromoteMemToReg(Allocas, DT, &AC); } return NF; @@ -474,100 +422,37 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, /// Return true if we can prove that all callees pass in a valid pointer for the /// specified function argument. -static bool allCallersPassValidPointerForArgument(Argument *Arg, Type *Ty) { +static bool allCallersPassValidPointerForArgument(Argument *Arg, + Align NeededAlign, + uint64_t NeededDerefBytes) { Function *Callee = Arg->getParent(); const DataLayout &DL = Callee->getParent()->getDataLayout(); + APInt Bytes(64, NeededDerefBytes); - unsigned ArgNo = Arg->getArgNo(); + // Check if the argument itself is marked dereferenceable and aligned. + if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL)) + return true; // Look at all call sites of the function. At this point we know we only have // direct callees. - for (User *U : Callee->users()) { + return all_of(Callee->users(), [&](User *U) { CallBase &CB = cast(*U); - - if (!isDereferenceablePointer(CB.getArgOperand(ArgNo), Ty, DL)) - return false; - } - return true; + return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()), + NeededAlign, Bytes, DL); + }); } -/// Returns true if Prefix is a prefix of longer. That means, Longer has a size -/// that is greater than or equal to the size of prefix, and each of the -/// elements in Prefix is the same as the corresponding elements in Longer. -/// -/// This means it also returns true when Prefix and Longer are equal! -static bool isPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) { - if (Prefix.size() > Longer.size()) - return false; - return std::equal(Prefix.begin(), Prefix.end(), Longer.begin()); -} - -/// Checks if Indices, or a prefix of Indices, is in Set. -static bool prefixIn(const IndicesVector &Indices, - std::set &Set) { - std::set::iterator Low; - Low = Set.upper_bound(Indices); - if (Low != Set.begin()) - Low--; - // Low is now the last element smaller than or equal to Indices. This means - // it points to a prefix of Indices (possibly Indices itself), if such - // prefix exists. - // - // This load is safe if any prefix of its operands is safe to load. - return Low != Set.end() && isPrefix(*Low, Indices); -} - -/// Mark the given indices (ToMark) as safe in the given set of indices -/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there -/// is already a prefix of Indices in Safe, Indices are implicitely marked safe -/// already. Furthermore, any indices that Indices is itself a prefix of, are -/// removed from Safe (since they are implicitely safe because of Indices now). -static void markIndicesSafe(const IndicesVector &ToMark, - std::set &Safe) { - std::set::iterator Low; - Low = Safe.upper_bound(ToMark); - // Guard against the case where Safe is empty - if (Low != Safe.begin()) - Low--; - // Low is now the last element smaller than or equal to Indices. This - // means it points to a prefix of Indices (possibly Indices itself), if - // such prefix exists. - if (Low != Safe.end()) { - if (isPrefix(*Low, ToMark)) - // If there is already a prefix of these indices (or exactly these - // indices) marked a safe, don't bother adding these indices - return; - - // Increment Low, so we can use it as a "insert before" hint - ++Low; - } - // Insert - Low = Safe.insert(Low, ToMark); - ++Low; - // If there we're a prefix of longer index list(s), remove those - std::set::iterator End = Safe.end(); - while (Low != End && isPrefix(ToMark, *Low)) { - std::set::iterator Remove = Low; - ++Low; - Safe.erase(Remove); - } -} - -/// isSafeToPromoteArgument - As you might guess from the name of this method, -/// it checks to see if it is both safe and useful to promote the argument. -/// This method limits promotion of aggregates to only promote up to three -/// elements of the aggregate in order to avoid exploding the number of -/// arguments passed in. -static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR, - unsigned MaxElements) { - using GEPIndicesSet = std::set; - +/// Determine that this argument is safe to promote, and find the argument +/// parts it can be promoted into. +static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, + unsigned MaxElements, bool IsRecursive, + SmallVectorImpl &ArgPartsVec) { // Quick exit for unused arguments if (Arg->use_empty()) return true; - // We can only promote this argument if all of the uses are loads, or are GEP - // instructions (with constant indices) that are subsequently loaded. + // We can only promote this argument if all the uses are loads at known + // offsets. // // Promoting the argument causes it to be loaded in the caller // unconditionally. This is only safe if we can prove that either the load @@ -578,157 +463,193 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR // anyway, in the latter case, invalid loads won't happen. This prevents us // from introducing an invalid load that wouldn't have happened in the // original code. - // - // This set will contain all sets of indices that are loaded in the entry - // block, and thus are safe to unconditionally load in the caller. - GEPIndicesSet SafeToUnconditionallyLoad; - - // This set contains all the sets of indices that we are planning to promote. - // This makes it possible to limit the number of arguments added. - GEPIndicesSet ToPromote; - - // If the pointer is always valid, any load with first index 0 is valid. - - if (ByValTy) - SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); - - // Whenever a new underlying type for the operand is found, make sure it's - // consistent with the GEPs and loads we've already seen and, if necessary, - // use it to see if all incoming pointers are valid (which implies the 0-index - // is safe). - Type *BaseTy = ByValTy; - auto UpdateBaseTy = [&](Type *NewBaseTy) { - if (BaseTy) - return BaseTy == NewBaseTy; - - BaseTy = NewBaseTy; - if (allCallersPassValidPointerForArgument(Arg, BaseTy)) { - assert(SafeToUnconditionallyLoad.empty()); - SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); + + SmallDenseMap ArgParts; + Align NeededAlign(1); + uint64_t NeededDerefBytes = 0; + + // And if this is a byval argument we also allow to have store instructions. + // Only handle in such way arguments with specified alignment; + // if it's unspecified, the actual alignment of the argument is + // target-specific. + bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign(); + + // An end user of a pointer argument is a load or store instruction. + // Returns None if this load or store is not based on the argument. Return + // true if we can promote the instruction, false otherwise. + auto HandleEndUser = [&](auto *I, Type *Ty, + bool GuaranteedToExecute) -> Optional { + // Don't promote volatile or atomic instructions. + if (!I->isSimple()) + return false; + + Value *Ptr = I->getPointerOperand(); + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, + /* AllowNonInbounds */ true); + if (Ptr != Arg) + return None; + + if (Offset.getSignificantBits() >= 64) + return false; + + TypeSize Size = DL.getTypeStoreSize(Ty); + // Don't try to promote scalable types. + if (Size.isScalable()) + return false; + + // If this is a recursive function and one of the types is a pointer, + // then promoting it might lead to recursive promotion. + if (IsRecursive && Ty->isPointerTy()) + return false; + + int64_t Off = Offset.getSExtValue(); + auto Pair = ArgParts.try_emplace( + Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr}); + ArgPart &Part = Pair.first->second; + bool OffsetNotSeenBefore = Pair.second; + + // We limit promotion to only promoting up to a fixed number of elements of + // the aggregate. + if (MaxElements > 0 && ArgParts.size() > MaxElements) { + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "more than " << MaxElements << " parts\n"); + return false; } - return true; - }; + // For now, we only support loading/storing one specific type at a given + // offset. + if (Part.Ty != Ty) { + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "accessed as both " << *Part.Ty << " and " << *Ty + << " at offset " << Off << "\n"); + return false; + } - // First, iterate functions that are guaranteed to execution on function - // entry and mark loads of (geps of) arguments as safe. - BasicBlock &EntryBlock = Arg->getParent()->front(); - // Declare this here so we can reuse it - IndicesVector Indices; - for (Instruction &I : EntryBlock) { - if (LoadInst *LI = dyn_cast(&I)) { - Value *V = LI->getPointerOperand(); - if (GetElementPtrInst *GEP = dyn_cast(V)) { - V = GEP->getPointerOperand(); - if (V == Arg) { - // This load actually loads (part of) Arg? Check the indices then. - Indices.reserve(GEP->getNumIndices()); - for (Use &Idx : GEP->indices()) - if (ConstantInt *CI = dyn_cast(Idx)) - Indices.push_back(CI->getSExtValue()); - else - // We found a non-constant GEP index for this argument? Bail out - // right away, can't promote this argument at all. - return false; - - if (!UpdateBaseTy(GEP->getSourceElementType())) - return false; - - // Indices checked out, mark them as safe - markIndicesSafe(Indices, SafeToUnconditionallyLoad); - Indices.clear(); - } - } else if (V == Arg) { - // Direct loads are equivalent to a GEP with a single 0 index. - markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); + // If this instruction is not guaranteed to execute, and we haven't seen a + // load or store at this offset before (or it had lower alignment), then we + // need to remember that requirement. + // Note that skipping instructions of previously seen offsets is only + // correct because we only allow a single type for a given offset, which + // also means that the number of accessed bytes will be the same. + if (!GuaranteedToExecute && + (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) { + // We won't be able to prove dereferenceability for negative offsets. + if (Off < 0) + return false; - if (BaseTy && LI->getType() != BaseTy) - return false; + // If the offset is not aligned, an aligned base pointer won't help. + if (!isAligned(I->getAlign(), Off)) + return false; - BaseTy = LI->getType(); - } + NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue()); + NeededAlign = std::max(NeededAlign, I->getAlign()); } + Part.Alignment = std::max(Part.Alignment, I->getAlign()); + return true; + }; + + // Look for loads and stores that are guaranteed to execute on entry. + for (Instruction &I : Arg->getParent()->getEntryBlock()) { + Optional Res{}; + if (LoadInst *LI = dyn_cast(&I)) + Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true); + else if (StoreInst *SI = dyn_cast(&I)) + Res = HandleEndUser(SI, SI->getValueOperand()->getType(), + /* GuaranteedToExecute */ true); + if (Res && !*Res) + return false; + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) break; } - // Now, iterate all uses of the argument to see if there are any uses that are - // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. + // Now look at all loads of the argument. Remember the load instructions + // for the aliasing check below. + SmallVector Worklist; + SmallPtrSet Visited; SmallVector Loads; - IndicesVector Operands; - for (Use &U : Arg->uses()) { - User *UR = U.getUser(); - Operands.clear(); - if (LoadInst *LI = dyn_cast(UR)) { - // Don't hack volatile/atomic loads - if (!LI->isSimple()) - return false; - Loads.push_back(LI); - // Direct loads are equivalent to a GEP with a zero index and then a load. - Operands.push_back(0); + auto AppendUses = [&](const Value *V) { + for (const Use &U : V->uses()) + if (Visited.insert(&U).second) + Worklist.push_back(&U); + }; + AppendUses(Arg); + while (!Worklist.empty()) { + const Use *U = Worklist.pop_back_val(); + Value *V = U->getUser(); + if (isa(V)) { + AppendUses(V); + continue; + } - if (!UpdateBaseTy(LI->getType())) + if (auto *GEP = dyn_cast(V)) { + if (!GEP->hasAllConstantIndices()) return false; - } else if (GetElementPtrInst *GEP = dyn_cast(UR)) { - if (GEP->use_empty()) { - // Dead GEP's cause trouble later. Just remove them if we run into - // them. - continue; - } + AppendUses(V); + continue; + } - if (!UpdateBaseTy(GEP->getSourceElementType())) + if (auto *LI = dyn_cast(V)) { + if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false)) return false; + Loads.push_back(LI); + continue; + } - // Ensure that all of the indices are constants. - for (Use &Idx : GEP->indices()) - if (ConstantInt *C = dyn_cast(Idx)) - Operands.push_back(C->getSExtValue()); - else - return false; // Not a constant operand GEP! - - // Ensure that the only users of the GEP are load instructions. - for (User *GEPU : GEP->users()) - if (LoadInst *LI = dyn_cast(GEPU)) { - // Don't hack volatile/atomic loads - if (!LI->isSimple()) - return false; - Loads.push_back(LI); - } else { - // Other uses than load? - return false; - } - } else { - return false; // Not a load or a GEP. + // Stores are allowed for byval arguments + auto *SI = dyn_cast(V); + if (AreStoresAllowed && SI && + U->getOperandNo() == StoreInst::getPointerOperandIndex()) { + if (!*HandleEndUser(SI, SI->getValueOperand()->getType(), + /* GuaranteedToExecute */ false)) + return false; + continue; + // Only stores TO the argument is allowed, all the other stores are + // unknown users } - // Now, see if it is safe to promote this load / loads of this GEP. Loading - // is safe if Operands, or a prefix of Operands, is marked as safe. - if (!prefixIn(Operands, SafeToUnconditionallyLoad)) - return false; + // Unknown user. + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "unknown user " << *V << "\n"); + return false; + } - // See if we are already promoting a load with these indices. If not, check - // to make sure that we aren't promoting too many elements. If so, nothing - // to do. - if (ToPromote.find(Operands) == ToPromote.end()) { - if (MaxElements > 0 && ToPromote.size() == MaxElements) { - LLVM_DEBUG(dbgs() << "argpromotion not promoting argument '" - << Arg->getName() - << "' because it would require adding more " - << "than " << MaxElements - << " arguments to the function.\n"); - // We limit aggregate promotion to only promoting up to a fixed number - // of elements of the aggregate. - return false; - } - ToPromote.insert(std::move(Operands)); + if (NeededDerefBytes || NeededAlign > 1) { + // Try to prove a required deref / aligned requirement. + if (!allCallersPassValidPointerForArgument(Arg, NeededAlign, + NeededDerefBytes)) { + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "not dereferenceable or aligned\n"); + return false; } } - if (Loads.empty()) + if (ArgParts.empty()) return true; // No users, this is a dead argument. - // Okay, now we know that the argument is only used by load instructions and + // Sort parts by offset. + append_range(ArgPartsVec, ArgParts); + sort(ArgPartsVec, + [](const auto &A, const auto &B) { return A.first < B.first; }); + + // Make sure the parts are non-overlapping. + int64_t Offset = ArgPartsVec[0].first; + for (const auto &Pair : ArgPartsVec) { + if (Pair.first < Offset) + return false; // Overlap with previous part. + + Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty); + } + + // If store instructions are allowed, the path from the entry of the function + // to each load may be not free of instructions that potentially invalidate + // the load, and this is an admissible situation. + if (AreStoresAllowed) + return true; + + // Okay, now we know that the argument is only used by load instructions, and // it is safe to unconditionally perform all of them. Use alias analysis to // check to see if the pointer is guaranteed to not be modified from entry of // the function to each of the load instructions. @@ -762,118 +683,31 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR return true; } -bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) { - // There is no size information, so be conservative. - if (!type->isSized()) - return false; - - // If the alloc size is not equal to the storage size, then there are padding - // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. - if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)) - return false; - - // FIXME: This isn't the right way to check for padding in vectors with - // non-byte-size elements. - if (VectorType *seqTy = dyn_cast(type)) - return isDenselyPacked(seqTy->getElementType(), DL); - - // For array types, check for padding within members. - if (ArrayType *seqTy = dyn_cast(type)) - return isDenselyPacked(seqTy->getElementType(), DL); - - if (!isa(type)) - return true; - - // Check for padding within and between elements of a struct. - StructType *StructTy = cast(type); - const StructLayout *Layout = DL.getStructLayout(StructTy); - uint64_t StartPos = 0; - for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { - Type *ElTy = StructTy->getElementType(i); - if (!isDenselyPacked(ElTy, DL)) - return false; - if (StartPos != Layout->getElementOffsetInBits(i)) - return false; - StartPos += DL.getTypeAllocSizeInBits(ElTy); - } - - return true; -} - -/// Checks if the padding bytes of an argument could be accessed. -static bool canPaddingBeAccessed(Argument *arg) { - assert(arg->hasByValAttr()); - - // Track all the pointers to the argument to make sure they are not captured. - SmallPtrSet PtrValues; - PtrValues.insert(arg); - - // Track all of the stores. - SmallVector Stores; - - // Scan through the uses recursively to make sure the pointer is always used - // sanely. - SmallVector WorkList(arg->users()); - while (!WorkList.empty()) { - Value *V = WorkList.pop_back_val(); - if (isa(V) || isa(V)) { - if (PtrValues.insert(V).second) - llvm::append_range(WorkList, V->users()); - } else if (StoreInst *Store = dyn_cast(V)) { - Stores.push_back(Store); - } else if (!isa(V)) { - return true; - } - } - - // Check to make sure the pointers aren't captured - for (StoreInst *Store : Stores) - if (PtrValues.count(Store->getValueOperand())) - return true; - - return false; -} - -/// Check if callers and the callee \p F agree how promoted arguments would be -/// passed. The ones that they do not agree on are eliminated from the sets but -/// the return value has to be observed as well. -static bool areFunctionArgsABICompatible( - const Function &F, const TargetTransformInfo &TTI, - SmallPtrSetImpl &ArgsToPromote, - SmallPtrSetImpl &ByValArgsToTransform) { - // TODO: Check individual arguments so we can promote a subset? - SmallVector Types; - for (Argument *Arg : ArgsToPromote) - Types.push_back(Arg->getType()->getPointerElementType()); - for (Argument *Arg : ByValArgsToTransform) - Types.push_back(Arg->getParamByValType()); - - for (const Use &U : F.uses()) { +/// Check if callers and callee agree on how promoted arguments would be +/// passed. +static bool areTypesABICompatible(ArrayRef Types, const Function &F, + const TargetTransformInfo &TTI) { + return all_of(F.uses(), [&](const Use &U) { CallBase *CB = dyn_cast(U.getUser()); if (!CB) return false; + const Function *Caller = CB->getCaller(); const Function *Callee = CB->getCalledFunction(); - if (!TTI.areTypesABICompatible(Caller, Callee, Types)) - return false; - } - return true; + return TTI.areTypesABICompatible(Caller, Callee, Types); + }); } /// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. -static Function * -promoteArguments(Function *F, function_ref AARGetter, - unsigned MaxElements, - Optional> - ReplaceCallSite, - const TargetTransformInfo &TTI) { +static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM, + unsigned MaxElements, bool IsRecursive) { // Don't perform argument promotion for naked functions; otherwise we can end // up removing parameters that are seemingly 'not used' as they are referred // to in the assembly. - if(F->hasFnAttribute(Attribute::Naked)) + if (F->hasFnAttribute(Attribute::Naked)) return nullptr; // Make sure that it is local to this module. @@ -903,20 +737,20 @@ promoteArguments(Function *F, function_ref AARGetter, // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function - // is self-recursive and check that target features are compatible. - bool isSelfRecursive = false; + // is self-recursive. for (Use &U : F->uses()) { CallBase *CB = dyn_cast(U.getUser()); // Must be a direct call. - if (CB == nullptr || !CB->isCallee(&U)) + if (CB == nullptr || !CB->isCallee(&U) || + CB->getFunctionType() != F->getFunctionType()) return nullptr; // Can't change signature of musttail callee if (CB->isMustTailCall()) return nullptr; - if (CB->getParent()->getParent() == F) - isSelfRecursive = true; + if (CB->getFunction() == F) + IsRecursive = true; } // Can't change signature of musttail caller @@ -926,16 +760,13 @@ promoteArguments(Function *F, function_ref AARGetter, return nullptr; const DataLayout &DL = F->getParent()->getDataLayout(); - - AAResults &AAR = AARGetter(*F); + auto &AAR = FAM.getResult(*F); + const auto &TTI = FAM.getResult(*F); // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. - SmallPtrSet ArgsToPromote; - SmallPtrSet ByValArgsToTransform; + DenseMap> ArgsToPromote; for (Argument *PtrArg : PointerArgs) { - Type *AgTy = PtrArg->getType()->getPointerElementType(); - // Replace sret attribute with noalias. This reduces register pressure by // avoiding a register copy. if (PtrArg->hasStructRetAttr()) { @@ -949,72 +780,25 @@ promoteArguments(Function *F, function_ref AARGetter, } } - // If this is a byval argument, and if the aggregate type is small, just - // pass the elements, which is always safe, if the passed value is densely - // packed or if we can prove the padding bytes are never accessed. - // - // Only handle arguments with specified alignment; if it's unspecified, the - // actual alignment of the argument is target-specific. - bool isSafeToPromote = PtrArg->hasByValAttr() && PtrArg->getParamAlign() && - (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) || - !canPaddingBeAccessed(PtrArg)); - if (isSafeToPromote) { - if (StructType *STy = dyn_cast(AgTy)) { - if (MaxElements > 0 && STy->getNumElements() > MaxElements) { - LLVM_DEBUG(dbgs() << "argpromotion disable promoting argument '" - << PtrArg->getName() - << "' because it would require adding more" - << " than " << MaxElements - << " arguments to the function.\n"); - continue; - } - - // If all the elements are single-value types, we can promote it. - bool AllSimple = true; - for (const auto *EltTy : STy->elements()) { - if (!EltTy->isSingleValueType()) { - AllSimple = false; - break; - } - } + // If we can promote the pointer to its value. + SmallVector ArgParts; - // Safe to transform, don't even bother trying to "promote" it. - // Passing the elements as a scalar will allow sroa to hack on - // the new alloca we introduce. - if (AllSimple) { - ByValArgsToTransform.insert(PtrArg); - continue; - } - } - } + if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) { + SmallVector Types; + for (const auto &Pair : ArgParts) + Types.push_back(Pair.second.Ty); - // If the argument is a recursive type and we're in a recursive - // function, we could end up infinitely peeling the function argument. - if (isSelfRecursive) { - if (StructType *STy = dyn_cast(AgTy)) { - bool RecursiveType = - llvm::is_contained(STy->elements(), PtrArg->getType()); - if (RecursiveType) - continue; + if (areTypesABICompatible(Types, *F, TTI)) { + ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); } } - - // Otherwise, see if we can promote the pointer to its value. - Type *ByValTy = - PtrArg->hasByValAttr() ? PtrArg->getParamByValType() : nullptr; - if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, MaxElements)) - ArgsToPromote.insert(PtrArg); } // No promotable pointer arguments. - if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) + if (ArgsToPromote.empty()) return nullptr; - if (!areFunctionArgsABICompatible( - *F, TTI, ArgsToPromote, ByValArgsToTransform)) - return nullptr; - - return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); + return doPromotion(F, FAM, ArgsToPromote); } PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, @@ -1030,19 +814,10 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, FunctionAnalysisManager &FAM = AM.getResult(C, CG).getManager(); + bool IsRecursive = C.size() > 1; for (LazyCallGraph::Node &N : C) { Function &OldF = N.getFunction(); - - // FIXME: This lambda must only be used with this function. We should - // skip the lambda and just get the AA results directly. - auto AARGetter = [&](Function &F) -> AAResults & { - assert(&F == &OldF && "Called with an unexpected function!"); - return FAM.getResult(F); - }; - - const TargetTransformInfo &TTI = FAM.getResult(OldF); - Function *NewF = - promoteArguments(&OldF, AARGetter, MaxElements, None, TTI); + Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive); if (!NewF) continue; LocalChange = true; @@ -1077,111 +852,3 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, PA.preserveSet>(); return PA; } - -namespace { - -/// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. -struct ArgPromotion : public CallGraphSCCPass { - // Pass identification, replacement for typeid - static char ID; - - explicit ArgPromotion(unsigned MaxElements = 3) - : CallGraphSCCPass(ID), MaxElements(MaxElements) { - initializeArgPromotionPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - getAAResultsAnalysisUsage(AU); - CallGraphSCCPass::getAnalysisUsage(AU); - } - - bool runOnSCC(CallGraphSCC &SCC) override; - -private: - using llvm::Pass::doInitialization; - - bool doInitialization(CallGraph &CG) override; - - /// The maximum number of elements to expand, or 0 for unlimited. - unsigned MaxElements; -}; - -} // end anonymous namespace - -char ArgPromotion::ID = 0; - -INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", - "Promote 'by reference' arguments to scalars", false, - false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(ArgPromotion, "argpromotion", - "Promote 'by reference' arguments to scalars", false, false) - -Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) { - return new ArgPromotion(MaxElements); -} - -bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { - if (skipSCC(SCC)) - return false; - - // Get the callgraph information that we need to update to reflect our - // changes. - CallGraph &CG = getAnalysis().getCallGraph(); - - LegacyAARGetter AARGetter(*this); - - bool Changed = false, LocalChange; - - // Iterate until we stop promoting from this SCC. - do { - LocalChange = false; - // Attempt to promote arguments from all functions in this SCC. - for (CallGraphNode *OldNode : SCC) { - Function *OldF = OldNode->getFunction(); - if (!OldF) - continue; - - auto ReplaceCallSite = [&](CallBase &OldCS, CallBase &NewCS) { - Function *Caller = OldCS.getParent()->getParent(); - CallGraphNode *NewCalleeNode = - CG.getOrInsertFunction(NewCS.getCalledFunction()); - CallGraphNode *CallerNode = CG[Caller]; - CallerNode->replaceCallEdge(cast(OldCS), - cast(NewCS), NewCalleeNode); - }; - - const TargetTransformInfo &TTI = - getAnalysis().getTTI(*OldF); - if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements, - {ReplaceCallSite}, TTI)) { - LocalChange = true; - - // Update the call graph for the newly promoted function. - CallGraphNode *NewNode = CG.getOrInsertFunction(NewF); - NewNode->stealCalledFunctionsFrom(OldNode); - if (OldNode->getNumReferences() == 0) - delete CG.removeFunctionFromModule(OldNode); - else - OldF->setLinkage(Function::ExternalLinkage); - - // And updat ethe SCC we're iterating as well. - SCC.ReplaceNode(OldNode, NewNode); - } - } - // Remember that we changed something. - Changed |= LocalChange; - } while (LocalChange); - - return Changed; -} - -bool ArgPromotion::doInitialization(CallGraph &CG) { - return CallGraphSCCPass::doInitialization(CG); -} diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index d66140a726f6..b05b7990e3f0 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -15,29 +15,25 @@ #include "llvm/Transforms/IPO/Attributor.h" -#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/NoFolder.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -50,6 +46,10 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#ifdef EXPENSIVE_CHECKS +#include "llvm/IR/Verifier.h" +#endif + #include #include @@ -123,13 +123,13 @@ static cl::list SeedAllowList("attributor-seed-allow-list", cl::Hidden, cl::desc("Comma seperated list of attribute names that are " "allowed to be seeded."), - cl::ZeroOrMore, cl::CommaSeparated); + cl::CommaSeparated); static cl::list FunctionSeedAllowList( "attributor-function-seed-allow-list", cl::Hidden, cl::desc("Comma seperated list of function names that are " "allowed to be seeded."), - cl::ZeroOrMore, cl::CommaSeparated); + cl::CommaSeparated); #endif static cl::opt @@ -209,33 +209,25 @@ bool AA::isNoSyncInst(Attributor &A, const Instruction &I, } bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA, - const Value &V) { - if (auto *C = dyn_cast(&V)) - return !C->isThreadDependent(); - // TODO: Inspect and cache more complex instructions. - if (auto *CB = dyn_cast(&V)) - return CB->getNumOperands() == 0 && !CB->mayHaveSideEffects() && - !CB->mayReadFromMemory(); - const Function *Scope = nullptr; - if (auto *I = dyn_cast(&V)) - Scope = I->getFunction(); - if (auto *A = dyn_cast(&V)) - Scope = A->getParent(); - if (!Scope) + const Value &V, bool ForAnalysisOnly) { + // TODO: See the AAInstanceInfo class comment. + if (!ForAnalysisOnly) return false; - auto &NoRecurseAA = A.getAAFor( - QueryingAA, IRPosition::function(*Scope), DepClassTy::OPTIONAL); - return NoRecurseAA.isAssumedNoRecurse(); + auto &InstanceInfoAA = A.getAAFor( + QueryingAA, IRPosition::value(V), DepClassTy::OPTIONAL); + return InstanceInfoAA.isAssumedUniqueForAnalysis(); } Constant *AA::getInitialValueForObj(Value &Obj, Type &Ty, const TargetLibraryInfo *TLI) { if (isa(Obj)) return UndefValue::get(&Ty); - if (isAllocationFn(&Obj, TLI)) - return getInitialValueOfAllocation(&cast(Obj), TLI, &Ty); + if (Constant *Init = getInitialValueOfAllocation(&Obj, TLI, &Ty)) + return Init; auto *GV = dyn_cast(&Obj); - if (!GV || !GV->hasLocalLinkage()) + if (!GV) + return nullptr; + if (!GV->hasLocalLinkage() && !(GV->isConstant() && GV->hasInitializer())) return nullptr; if (!GV->hasInitializer()) return UndefValue::get(&Ty); @@ -252,19 +244,29 @@ bool AA::isValidInScope(const Value &V, const Function *Scope) { return false; } -bool AA::isValidAtPosition(const Value &V, const Instruction &CtxI, +bool AA::isValidAtPosition(const AA::ValueAndContext &VAC, InformationCache &InfoCache) { - if (isa(V)) + if (isa(VAC.getValue()) || VAC.getValue() == VAC.getCtxI()) return true; - const Function *Scope = CtxI.getFunction(); - if (auto *A = dyn_cast(&V)) + const Function *Scope = nullptr; + const Instruction *CtxI = VAC.getCtxI(); + if (CtxI) + Scope = CtxI->getFunction(); + if (auto *A = dyn_cast(VAC.getValue())) return A->getParent() == Scope; - if (auto *I = dyn_cast(&V)) + if (auto *I = dyn_cast(VAC.getValue())) { if (I->getFunction() == Scope) { - const DominatorTree *DT = - InfoCache.getAnalysisResultForFunction(*Scope); - return DT && DT->dominates(I, &CtxI); + if (const DominatorTree *DT = + InfoCache.getAnalysisResultForFunction( + *Scope)) + return DT->dominates(I, CtxI); + // Local dominance check mostly for the old PM passes. + if (CtxI && I->getParent() == CtxI->getParent()) + return llvm::any_of( + make_range(I->getIterator(), I->getParent()->end()), + [&](const Instruction &AfterI) { return &AfterI == CtxI; }); } + } return false; } @@ -295,11 +297,11 @@ AA::combineOptionalValuesInAAValueLatice(const Optional &A, const Optional &B, Type *Ty) { if (A == B) return A; - if (!B.hasValue()) + if (!B) return A; if (*B == nullptr) return nullptr; - if (!A.hasValue()) + if (!A) return Ty ? getWithType(**B, *Ty) : nullptr; if (*A == nullptr) return nullptr; @@ -314,21 +316,33 @@ AA::combineOptionalValuesInAAValueLatice(const Optional &A, return nullptr; } -bool AA::getPotentialCopiesOfStoredValue( - Attributor &A, StoreInst &SI, SmallSetVector &PotentialCopies, - const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation) { +template +static bool getPotentialCopiesOfMemoryValue( + Attributor &A, Ty &I, SmallSetVector &PotentialCopies, + SmallSetVector &PotentialValueOrigins, + const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation, + bool OnlyExact) { + LLVM_DEBUG(dbgs() << "Trying to determine the potential copies of " << I + << " (only exact: " << OnlyExact << ")\n";); - Value &Ptr = *SI.getPointerOperand(); + Value &Ptr = *I.getPointerOperand(); SmallVector Objects; - if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &SI)) { + if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &I, + UsedAssumedInformation)) { LLVM_DEBUG( dbgs() << "Underlying objects stored into could not be determined\n";); return false; } + // Containers to remember the pointer infos and new copies while we are not + // sure that we can find all of them. If we abort we want to avoid spurious + // dependences and potential copies in the provided container. SmallVector PIs; SmallVector NewCopies; + SmallVector NewCopyOrigins; + const auto *TLI = + A.getInfoCache().getTargetLibraryInfoForFunction(*I.getFunction()); for (Value *Obj : Objects) { LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n"); if (isa(Obj)) @@ -336,7 +350,7 @@ bool AA::getPotentialCopiesOfStoredValue( if (isa(Obj)) { // A null pointer access can be undefined but any offset from null may // be OK. We do not try to optimize the latter. - if (!NullPointerIsDefined(SI.getFunction(), + if (!NullPointerIsDefined(I.getFunction(), Ptr.getType()->getPointerAddressSpace()) && A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation) == Obj) @@ -345,37 +359,74 @@ bool AA::getPotentialCopiesOfStoredValue( dbgs() << "Underlying object is a valid nullptr, giving up.\n";); return false; } + // TODO: Use assumed noalias return. if (!isa(Obj) && !isa(Obj) && - !isNoAliasCall(Obj)) { + !(IsLoad ? isAllocationFn(Obj, TLI) : isNoAliasCall(Obj))) { LLVM_DEBUG(dbgs() << "Underlying object is not supported yet: " << *Obj << "\n";); return false; } if (auto *GV = dyn_cast(Obj)) - if (!GV->hasLocalLinkage()) { + if (!GV->hasLocalLinkage() && + !(GV->isConstant() && GV->hasInitializer())) { LLVM_DEBUG(dbgs() << "Underlying object is global with external " "linkage, not supported yet: " << *Obj << "\n";); return false; } + if (IsLoad) { + Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI); + if (!InitialValue) + return false; + NewCopies.push_back(InitialValue); + NewCopyOrigins.push_back(nullptr); + } + auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) { - if (!Acc.isRead()) + if ((IsLoad && !Acc.isWrite()) || (!IsLoad && !Acc.isRead())) + return true; + if (IsLoad && Acc.isWrittenValueYetUndetermined()) return true; - auto *LI = dyn_cast(Acc.getRemoteInst()); - if (!LI) { - LLVM_DEBUG(dbgs() << "Underlying object read through a non-load " - "instruction not supported yet: " - << *Acc.getRemoteInst() << "\n";); + if (OnlyExact && !IsExact && + !isa_and_nonnull(Acc.getWrittenValue())) { + LLVM_DEBUG(dbgs() << "Non exact access " << *Acc.getRemoteInst() + << ", abort!\n"); return false; } - NewCopies.push_back(LI); + if (IsLoad) { + assert(isa(I) && "Expected load or store instruction only!"); + if (!Acc.isWrittenValueUnknown()) { + NewCopies.push_back(Acc.getWrittenValue()); + NewCopyOrigins.push_back(Acc.getRemoteInst()); + return true; + } + auto *SI = dyn_cast(Acc.getRemoteInst()); + if (!SI) { + LLVM_DEBUG(dbgs() << "Underlying object written through a non-store " + "instruction not supported yet: " + << *Acc.getRemoteInst() << "\n";); + return false; + } + NewCopies.push_back(SI->getValueOperand()); + NewCopyOrigins.push_back(SI); + } else { + assert(isa(I) && "Expected load or store instruction only!"); + auto *LI = dyn_cast(Acc.getRemoteInst()); + if (!LI && OnlyExact) { + LLVM_DEBUG(dbgs() << "Underlying object read through a non-load " + "instruction not supported yet: " + << *Acc.getRemoteInst() << "\n";); + return false; + } + NewCopies.push_back(Acc.getRemoteInst()); + } return true; }; auto &PI = A.getAAFor(QueryingAA, IRPosition::value(*Obj), DepClassTy::NONE); - if (!PI.forallInterferingAccesses(SI, CheckAccess)) { + if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess)) { LLVM_DEBUG( dbgs() << "Failed to verify all interfering accesses for underlying object: " @@ -385,16 +436,40 @@ bool AA::getPotentialCopiesOfStoredValue( PIs.push_back(&PI); } + // Only if we were successful collection all potential copies we record + // dependences (on non-fix AAPointerInfo AAs). We also only then modify the + // given PotentialCopies container. for (auto *PI : PIs) { if (!PI->getState().isAtFixpoint()) UsedAssumedInformation = true; A.recordDependence(*PI, QueryingAA, DepClassTy::OPTIONAL); } PotentialCopies.insert(NewCopies.begin(), NewCopies.end()); + PotentialValueOrigins.insert(NewCopyOrigins.begin(), NewCopyOrigins.end()); return true; } +bool AA::getPotentiallyLoadedValues( + Attributor &A, LoadInst &LI, SmallSetVector &PotentialValues, + SmallSetVector &PotentialValueOrigins, + const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation, + bool OnlyExact) { + return getPotentialCopiesOfMemoryValue( + A, LI, PotentialValues, PotentialValueOrigins, QueryingAA, + UsedAssumedInformation, OnlyExact); +} + +bool AA::getPotentialCopiesOfStoredValue( + Attributor &A, StoreInst &SI, SmallSetVector &PotentialCopies, + const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation, + bool OnlyExact) { + SmallSetVector PotentialValueOrigins; + return getPotentialCopiesOfMemoryValue( + A, SI, PotentialCopies, PotentialValueOrigins, QueryingAA, + UsedAssumedInformation, OnlyExact); +} + static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP, const AbstractAttribute &QueryingAA, bool RequireReadNone, bool &IsKnown) { @@ -449,6 +524,8 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI, SmallVector Worklist; Worklist.push_back(&FromI); + const auto &NoRecurseAA = A.getAAFor( + QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL); while (!Worklist.empty()) { const Instruction *CurFromI = Worklist.pop_back_val(); if (!Visited.insert(CurFromI).second) @@ -468,7 +545,8 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI, << *ToI << " [Intra]\n"); if (Result) return true; - continue; + if (NoRecurseAA.isAssumedNoRecurse()) + continue; } // TODO: If we can go arbitrarily backwards we will eventually reach an @@ -514,10 +592,10 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI, return true; }; - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; Result = !A.checkForAllCallSites(CheckCallSite, *FromFn, /* RequireAllCallSites */ true, - &QueryingAA, AllCallSitesKnown); + &QueryingAA, UsedAssumedInformation); if (Result) { LLVM_DEBUG(dbgs() << "[AA] stepping back to call sites from " << *CurFromI << " in @" << FromFn->getName() @@ -631,7 +709,7 @@ Argument *IRPosition::getAssociatedArgument() const { assert(ACS.getCalledFunction()->arg_size() > u && "ACS mapped into var-args arguments!"); - if (CBCandidateArg.hasValue()) { + if (CBCandidateArg) { CBCandidateArg = nullptr; break; } @@ -640,7 +718,7 @@ Argument *IRPosition::getAssociatedArgument() const { } // If we found a unique callback candidate argument, return it. - if (CBCandidateArg.hasValue() && CBCandidateArg.getValue()) + if (CBCandidateArg && CBCandidateArg.getValue()) return CBCandidateArg.getValue(); // If no callbacks were found, or none used the underlying call site operand @@ -949,22 +1027,24 @@ Attributor::getAssumedConstant(const IRPosition &IRP, bool &UsedAssumedInformation) { // First check all callbacks provided by outside AAs. If any of them returns // a non-null value that is different from the associated value, or None, we - // assume it's simpliied. + // assume it's simplified. for (auto &CB : SimplificationCallbacks.lookup(IRP)) { Optional SimplifiedV = CB(IRP, &AA, UsedAssumedInformation); - if (!SimplifiedV.hasValue()) + if (!SimplifiedV) return llvm::None; if (isa_and_nonnull(*SimplifiedV)) return cast(*SimplifiedV); return nullptr; } + if (auto *C = dyn_cast(&IRP.getAssociatedValue())) + return C; const auto &ValueSimplifyAA = getAAFor(AA, IRP, DepClassTy::NONE); Optional SimplifiedV = ValueSimplifyAA.getAssumedSimplifiedValue(*this); bool IsKnown = ValueSimplifyAA.isAtFixpoint(); UsedAssumedInformation |= !IsKnown; - if (!SimplifiedV.hasValue()) { + if (!SimplifiedV) { recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); return llvm::None; } @@ -987,18 +1067,18 @@ Attributor::getAssumedSimplified(const IRPosition &IRP, bool &UsedAssumedInformation) { // First check all callbacks provided by outside AAs. If any of them returns // a non-null value that is different from the associated value, or None, we - // assume it's simpliied. + // assume it's simplified. for (auto &CB : SimplificationCallbacks.lookup(IRP)) return CB(IRP, AA, UsedAssumedInformation); - // If no high-level/outside simplification occured, use AAValueSimplify. + // If no high-level/outside simplification occurred, use AAValueSimplify. const auto &ValueSimplifyAA = getOrCreateAAFor(IRP, AA, DepClassTy::NONE); Optional SimplifiedV = ValueSimplifyAA.getAssumedSimplifiedValue(*this); bool IsKnown = ValueSimplifyAA.isAtFixpoint(); UsedAssumedInformation |= !IsKnown; - if (!SimplifiedV.hasValue()) { + if (!SimplifiedV) { if (AA) recordDependence(ValueSimplifyAA, *AA, DepClassTy::OPTIONAL); return llvm::None; @@ -1017,7 +1097,7 @@ Attributor::getAssumedSimplified(const IRPosition &IRP, Optional Attributor::translateArgumentToCallSiteContent( Optional V, CallBase &CB, const AbstractAttribute &AA, bool &UsedAssumedInformation) { - if (!V.hasValue()) + if (!V) return V; if (*V == nullptr || isa(*V)) return V; @@ -1078,6 +1158,19 @@ bool Attributor::isAssumedDead(const Use &U, BasicBlock *IncomingBB = PHI->getIncomingBlock(U); return isAssumedDead(*IncomingBB->getTerminator(), QueryingAA, FnLivenessAA, UsedAssumedInformation, CheckBBLivenessOnly, DepClass); + } else if (StoreInst *SI = dyn_cast(UserI)) { + if (!CheckBBLivenessOnly && SI->getPointerOperand() != U.get()) { + const IRPosition IRP = IRPosition::inst(*SI); + const AAIsDead &IsDeadAA = + getOrCreateAAFor(IRP, QueryingAA, DepClassTy::NONE); + if (IsDeadAA.isRemovableStore()) { + if (QueryingAA) + recordDependence(IsDeadAA, *QueryingAA, DepClass); + if (!IsDeadAA.isKnown(AAIsDead::IS_REMOVABLE)) + UsedAssumedInformation = true; + return true; + } + } } return isAssumedDead(IRPosition::inst(*UserI), QueryingAA, FnLivenessAA, @@ -1191,6 +1284,7 @@ bool Attributor::checkForAllUses( function_ref Pred, const AbstractAttribute &QueryingAA, const Value &V, bool CheckBBLivenessOnly, DepClassTy LivenessDepClass, + bool IgnoreDroppableUses, function_ref EquivalentUseCB) { // Check the trivial case first as it catches void values. @@ -1231,7 +1325,7 @@ bool Attributor::checkForAllUses( LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n"); continue; } - if (U->getUser()->isDroppable()) { + if (IgnoreDroppableUses && U->getUser()->isDroppable()) { LLVM_DEBUG(dbgs() << "[Attributor] Droppable user, skip!\n"); continue; } @@ -1241,9 +1335,9 @@ bool Attributor::checkForAllUses( if (!Visited.insert(U).second) continue; SmallSetVector PotentialCopies; - if (AA::getPotentialCopiesOfStoredValue(*this, *SI, PotentialCopies, - QueryingAA, - UsedAssumedInformation)) { + if (AA::getPotentialCopiesOfStoredValue( + *this, *SI, PotentialCopies, QueryingAA, UsedAssumedInformation, + /* OnlyExact */ true)) { LLVM_DEBUG(dbgs() << "[Attributor] Value is stored, continue with " << PotentialCopies.size() << " potential copies instead!\n"); @@ -1277,7 +1371,7 @@ bool Attributor::checkForAllUses( bool Attributor::checkForAllCallSites(function_ref Pred, const AbstractAttribute &QueryingAA, bool RequireAllCallSites, - bool &AllCallSitesKnown) { + bool &UsedAssumedInformation) { // We can try to determine information from // the call sites. However, this is only possible all call sites are known, // hence the function has internal linkage. @@ -1286,31 +1380,26 @@ bool Attributor::checkForAllCallSites(function_ref Pred, if (!AssociatedFunction) { LLVM_DEBUG(dbgs() << "[Attributor] No function associated with " << IRP << "\n"); - AllCallSitesKnown = false; return false; } return checkForAllCallSites(Pred, *AssociatedFunction, RequireAllCallSites, - &QueryingAA, AllCallSitesKnown); + &QueryingAA, UsedAssumedInformation); } bool Attributor::checkForAllCallSites(function_ref Pred, const Function &Fn, bool RequireAllCallSites, const AbstractAttribute *QueryingAA, - bool &AllCallSitesKnown) { + bool &UsedAssumedInformation) { if (RequireAllCallSites && !Fn.hasLocalLinkage()) { LLVM_DEBUG( dbgs() << "[Attributor] Function " << Fn.getName() << " has no internal linkage, hence not all call sites are known\n"); - AllCallSitesKnown = false; return false; } - // If we do not require all call sites we might not see all. - AllCallSitesKnown = RequireAllCallSites; - SmallVector Uses(make_pointer_range(Fn.uses())); for (unsigned u = 0; u < Uses.size(); ++u) { const Use &U = *Uses[u]; @@ -1322,15 +1411,13 @@ bool Attributor::checkForAllCallSites(function_ref Pred, dbgs() << "[Attributor] Check use: " << *U << " in " << *U.getUser() << "\n"; }); - bool UsedAssumedInformation = false; if (isAssumedDead(U, QueryingAA, nullptr, UsedAssumedInformation, /* CheckBBLivenessOnly */ true)) { LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n"); continue; } if (ConstantExpr *CE = dyn_cast(U.getUser())) { - if (CE->isCast() && CE->getType()->isPointerTy() && - CE->getType()->getPointerElementType()->isFunctionTy()) { + if (CE->isCast() && CE->getType()->isPointerTy()) { LLVM_DEBUG( dbgs() << "[Attributor] Use, is constant cast expression, add " << CE->getNumUses() @@ -1477,30 +1564,24 @@ static bool checkForAllInstructionsImpl( } bool Attributor::checkForAllInstructions(function_ref Pred, + const Function *Fn, const AbstractAttribute &QueryingAA, const ArrayRef &Opcodes, bool &UsedAssumedInformation, bool CheckBBLivenessOnly, bool CheckPotentiallyDead) { - - const IRPosition &IRP = QueryingAA.getIRPosition(); // Since we need to provide instructions we have to have an exact definition. - const Function *AssociatedFunction = IRP.getAssociatedFunction(); - if (!AssociatedFunction) - return false; - - if (AssociatedFunction->isDeclaration()) + if (!Fn || Fn->isDeclaration()) return false; // TODO: use the function scope once we have call site AAReturnedValues. - const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const IRPosition &QueryIRP = IRPosition::function(*Fn); const auto *LivenessAA = (CheckBBLivenessOnly || CheckPotentiallyDead) ? nullptr : &(getAAFor(QueryingAA, QueryIRP, DepClassTy::NONE)); - auto &OpcodeInstMap = - InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction); + auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn); if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA, LivenessAA, Opcodes, UsedAssumedInformation, CheckBBLivenessOnly, CheckPotentiallyDead)) @@ -1509,6 +1590,19 @@ bool Attributor::checkForAllInstructions(function_ref Pred, return true; } +bool Attributor::checkForAllInstructions(function_ref Pred, + const AbstractAttribute &QueryingAA, + const ArrayRef &Opcodes, + bool &UsedAssumedInformation, + bool CheckBBLivenessOnly, + bool CheckPotentiallyDead) { + const IRPosition &IRP = QueryingAA.getIRPosition(); + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + return checkForAllInstructions(Pred, AssociatedFunction, QueryingAA, Opcodes, + UsedAssumedInformation, CheckBBLivenessOnly, + CheckPotentiallyDead); +} + bool Attributor::checkForAllReadWriteInstructions( function_ref Pred, AbstractAttribute &QueryingAA, bool &UsedAssumedInformation) { @@ -1547,11 +1641,8 @@ void Attributor::runTillFixpoint() { // the abstract analysis. unsigned IterationCounter = 1; - unsigned MaxFixedPointIterations; - if (MaxFixpointIterations) - MaxFixedPointIterations = MaxFixpointIterations.getValue(); - else - MaxFixedPointIterations = SetFixpointIterations; + unsigned MaxIterations = + Configuration.MaxFixpointIterations.value_or(SetFixpointIterations); SmallVector ChangedAAs; SetVector Worklist, InvalidAAs; @@ -1636,21 +1727,20 @@ void Attributor::runTillFixpoint() { QueryAAsAwaitingUpdate.end()); QueryAAsAwaitingUpdate.clear(); - } while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations || - VerifyMaxFixpointIterations)); + } while (!Worklist.empty() && + (IterationCounter++ < MaxIterations || VerifyMaxFixpointIterations)); - if (IterationCounter > MaxFixedPointIterations && !Worklist.empty()) { + if (IterationCounter > MaxIterations && !Functions.empty()) { auto Remark = [&](OptimizationRemarkMissed ORM) { return ORM << "Attributor did not reach a fixpoint after " - << ore::NV("Iterations", MaxFixedPointIterations) - << " iterations."; + << ore::NV("Iterations", MaxIterations) << " iterations."; }; - Function *F = Worklist.front()->getIRPosition().getAssociatedFunction(); + Function *F = Functions.front(); emitRemark(F, "FixedPoint", Remark); } LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: " - << IterationCounter << "/" << MaxFixpointIterations + << IterationCounter << "/" << MaxIterations << " iterations\n"); // Reset abstract arguments not settled in a sound fixpoint by now. This @@ -1684,11 +1774,9 @@ void Attributor::runTillFixpoint() { << " abstract attributes.\n"; }); - if (VerifyMaxFixpointIterations && - IterationCounter != MaxFixedPointIterations) { + if (VerifyMaxFixpointIterations && IterationCounter != MaxIterations) { errs() << "\n[Attributor] Fixpoint iteration done after: " - << IterationCounter << "/" << MaxFixedPointIterations - << " iterations\n"; + << IterationCounter << "/" << MaxIterations << " iterations\n"; llvm_unreachable("The fixpoint was not reached with exactly the number of " "specified iterations!"); } @@ -1725,6 +1813,9 @@ ChangeStatus Attributor::manifestAttributes() { if (!State.isValidState()) continue; + if (AA->getCtxI() && !isRunOn(*AA->getAnchorScope())) + continue; + // Skip dead code. bool UsedAssumedInformation = false; if (isAssumedDead(*AA, nullptr, UsedAssumedInformation, @@ -1774,7 +1865,7 @@ ChangeStatus Attributor::manifestAttributes() { void Attributor::identifyDeadInternalFunctions() { // Early exit if we don't intend to delete functions. - if (!DeleteFns) + if (!Configuration.DeleteFns) return; // Identify dead internal functions and delete them. This happens outside @@ -1795,7 +1886,7 @@ void Attributor::identifyDeadInternalFunctions() { if (!F) continue; - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; if (checkForAllCallSites( [&](AbstractCallSite ACS) { Function *Callee = ACS.getInstruction()->getFunction(); @@ -1803,7 +1894,7 @@ void Attributor::identifyDeadInternalFunctions() { (Functions.count(Callee) && Callee->hasLocalLinkage() && !LiveInternalFns.count(Callee)); }, - *F, true, nullptr, AllCallSitesKnown)) { + *F, true, nullptr, UsedAssumedInformation)) { continue; } @@ -1826,7 +1917,8 @@ ChangeStatus Attributor::cleanupIR() { << ToBeDeletedBlocks.size() << " blocks and " << ToBeDeletedInsts.size() << " instructions and " << ToBeChangedValues.size() << " values and " - << ToBeChangedUses.size() << " uses. " + << ToBeChangedUses.size() << " uses. To insert " + << ToBeChangedToUnreachableInsts.size() << " unreachables." << "Preserve manifest added " << ManifestAddedBlocks.size() << " blocks\n"); @@ -1844,12 +1936,15 @@ ChangeStatus Attributor::cleanupIR() { NewV = Entry.first; } while (true); + Instruction *I = dyn_cast(U->getUser()); + assert((!I || isRunOn(*I->getFunction())) && + "Cannot replace an instruction outside the current SCC!"); + // Do not replace uses in returns if the value is a must-tail call we will // not delete. - if (auto *RI = dyn_cast(U->getUser())) { + if (auto *RI = dyn_cast_or_null(I)) { if (auto *CI = dyn_cast(OldV->stripPointerCasts())) - if (CI->isMustTailCall() && - (!ToBeDeletedInsts.count(CI) || !isRunOn(*CI->getCaller()))) + if (CI->isMustTailCall() && !ToBeDeletedInsts.count(CI)) return; // If we rewrite a return and the new value is not an argument, strip the // `returned` attribute as it is wrong now. @@ -1859,8 +1954,8 @@ ChangeStatus Attributor::cleanupIR() { } // Do not perform call graph altering changes outside the SCC. - if (auto *CB = dyn_cast(U->getUser())) - if (CB->isCallee(U) && !isRunOn(*CB->getCaller())) + if (auto *CB = dyn_cast_or_null(I)) + if (CB->isCallee(U)) return; LLVM_DEBUG(dbgs() << "Use " << *NewV << " in " << *U->getUser() @@ -1908,8 +2003,12 @@ ChangeStatus Attributor::cleanupIR() { for (auto &U : OldV->uses()) if (Entry.second || !U.getUser()->isDroppable()) Uses.push_back(&U); - for (Use *U : Uses) + for (Use *U : Uses) { + if (auto *I = dyn_cast(U->getUser())) + if (!isRunOn(*I->getFunction())) + continue; ReplaceUse(U, NewV); + } } for (auto &V : InvokeWithDeadSuccessor) @@ -1940,15 +2039,15 @@ ChangeStatus Attributor::cleanupIR() { } } for (Instruction *I : TerminatorsToFold) { - if (!isRunOn(*I->getFunction())) - continue; + assert(isRunOn(*I->getFunction()) && + "Cannot replace a terminator outside the current SCC!"); CGModifiedFunctions.insert(I->getFunction()); ConstantFoldTerminator(I->getParent()); } for (auto &V : ToBeChangedToUnreachableInsts) if (Instruction *I = dyn_cast_or_null(V)) { - if (!isRunOn(*I->getFunction())) - continue; + assert(isRunOn(*I->getFunction()) && + "Cannot replace an instruction outside the current SCC!"); CGModifiedFunctions.insert(I->getFunction()); changeToUnreachable(I); } @@ -1956,10 +2055,10 @@ ChangeStatus Attributor::cleanupIR() { for (auto &V : ToBeDeletedInsts) { if (Instruction *I = dyn_cast_or_null(V)) { if (auto *CB = dyn_cast(I)) { - if (!isRunOn(*I->getFunction())) - continue; + assert(isRunOn(*I->getFunction()) && + "Cannot delete an instruction outside the current SCC!"); if (!isa(CB)) - CGUpdater.removeCallSite(*CB); + Configuration.CGUpdater.removeCallSite(*CB); } I->dropDroppableUses(); CGModifiedFunctions.insert(I->getFunction()); @@ -1972,9 +2071,7 @@ ChangeStatus Attributor::cleanupIR() { } } - llvm::erase_if(DeadInsts, [&](WeakTrackingVH I) { - return !I || !isRunOn(*cast(I)->getFunction()); - }); + llvm::erase_if(DeadInsts, [&](WeakTrackingVH I) { return !I; }); LLVM_DEBUG({ dbgs() << "[Attributor] DeadInsts size: " << DeadInsts.size() << "\n"; @@ -2010,12 +2107,12 @@ ChangeStatus Attributor::cleanupIR() { for (Function *Fn : CGModifiedFunctions) if (!ToBeDeletedFunctions.count(Fn) && Functions.count(Fn)) - CGUpdater.reanalyzeFunction(*Fn); + Configuration.CGUpdater.reanalyzeFunction(*Fn); for (Function *Fn : ToBeDeletedFunctions) { if (!Functions.count(Fn)) continue; - CGUpdater.removeFunction(*Fn); + Configuration.CGUpdater.removeFunction(*Fn); } if (!ToBeChangedUses.empty()) @@ -2254,7 +2351,7 @@ bool Attributor::internalizeFunctions(SmallPtrSetImpl &FnSet, bool Attributor::isValidFunctionSignatureRewrite( Argument &Arg, ArrayRef ReplacementTypes) { - if (!RewriteSignatures) + if (!Configuration.RewriteSignatures) return false; Function *Fn = Arg.getParent(); @@ -2290,9 +2387,9 @@ bool Attributor::isValidFunctionSignatureRewrite( } // Avoid callbacks for now. - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; if (!checkForAllCallSites(CallSiteCanBeChanged, *Fn, true, nullptr, - AllCallSitesKnown)) { + UsedAssumedInformation)) { LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite all call sites\n"); return false; } @@ -2305,7 +2402,6 @@ bool Attributor::isValidFunctionSignatureRewrite( // Forbid must-tail calls for now. // TODO: - bool UsedAssumedInformation = false; auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn); if (!checkForAllInstructionsImpl(nullptr, OpcodeInstMap, InstPred, nullptr, nullptr, {Instruction::Call}, @@ -2370,7 +2466,7 @@ bool Attributor::shouldSeedAttribute(AbstractAttribute &AA) { } ChangeStatus Attributor::rewriteFunctionSignatures( - SmallPtrSetImpl &ModifiedFns) { + SmallSetVector &ModifiedFns) { ChangeStatus Changed = ChangeStatus::UNCHANGED; for (auto &It : ArgumentReplacementMap) { @@ -2403,6 +2499,12 @@ ChangeStatus Attributor::rewriteFunctionSignatures( } } + uint64_t LargestVectorWidth = 0; + for (auto *I : NewArgumentTypes) + if (auto *VT = dyn_cast(I)) + LargestVectorWidth = std::max( + LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize()); + FunctionType *OldFnTy = OldFn->getFunctionType(); Type *RetTy = OldFnTy->getReturnType(); @@ -2432,6 +2534,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures( NewFn->setAttributes(AttributeList::get( Ctx, OldFnAttributeList.getFnAttrs(), OldFnAttributeList.getRetAttrs(), NewArgumentAttributes)); + AttributeFuncs::updateMinLegalVectorWidthAttr(*NewFn, LargestVectorWidth); // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the @@ -2509,14 +2612,17 @@ ChangeStatus Attributor::rewriteFunctionSignatures( Ctx, OldCallAttributeList.getFnAttrs(), OldCallAttributeList.getRetAttrs(), NewArgOperandAttributes)); + AttributeFuncs::updateMinLegalVectorWidthAttr(*NewCB->getCaller(), + LargestVectorWidth); + CallSitePairs.push_back({OldCB, NewCB}); return true; }; // Use the CallSiteReplacementCreator to create replacement call sites. - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; bool Success = checkForAllCallSites(CallSiteReplacementCreator, *OldFn, - true, nullptr, AllCallSitesKnown); + true, nullptr, UsedAssumedInformation); (void)Success; assert(Success && "Assumed call site replacement to succeed!"); @@ -2529,6 +2635,9 @@ ChangeStatus Attributor::rewriteFunctionSignatures( ARIs[OldArgNum]) { if (ARI->CalleeRepairCB) ARI->CalleeRepairCB(*ARI, *NewFn, NewFnArgIt); + if (ARI->ReplacementTypes.empty()) + OldFnArgIt->replaceAllUsesWith( + PoisonValue::get(OldFnArgIt->getType())); NewFnArgIt += ARI->ReplacementTypes.size(); } else { NewFnArgIt->takeName(&*OldFnArgIt); @@ -2544,17 +2653,17 @@ ChangeStatus Attributor::rewriteFunctionSignatures( assert(OldCB.getType() == NewCB.getType() && "Cannot handle call sites with different types!"); ModifiedFns.insert(OldCB.getFunction()); - CGUpdater.replaceCallSite(OldCB, NewCB); + Configuration.CGUpdater.replaceCallSite(OldCB, NewCB); OldCB.replaceAllUsesWith(&NewCB); OldCB.eraseFromParent(); } // Replace the function in the call graph (if any). - CGUpdater.replaceFunctionWith(*OldFn, *NewFn); + Configuration.CGUpdater.replaceFunctionWith(*OldFn, *NewFn); // If the old function was modified and needed to be reanalyzed, the new one // does now. - if (ModifiedFns.erase(OldFn)) + if (ModifiedFns.remove(OldFn)) ModifiedFns.insert(NewFn); Changed = ChangeStatus::CHANGED; @@ -2574,6 +2683,30 @@ void InformationCache::initializeInformationCache(const Function &CF, // queried by abstract attributes during their initialization or update. // This has to happen before we create attributes. + DenseMap> AssumeUsesMap; + + // Add \p V to the assume uses map which track the number of uses outside of + // "visited" assumes. If no outside uses are left the value is added to the + // assume only use vector. + auto AddToAssumeUsesMap = [&](const Value &V) -> void { + SmallVector Worklist; + if (auto *I = dyn_cast(&V)) + Worklist.push_back(I); + while (!Worklist.empty()) { + const Instruction *I = Worklist.pop_back_val(); + Optional &NumUses = AssumeUsesMap[I]; + if (!NumUses) + NumUses = I->getNumUses(); + NumUses = NumUses.getValue() - /* this assume */ 1; + if (NumUses.getValue() != 0) + continue; + AssumeOnlyValues.insert(I); + for (const Value *Op : I->operands()) + if (auto *OpI = dyn_cast(Op)) + Worklist.push_back(OpI); + } + }; + for (Instruction &I : instructions(&F)) { bool IsInterestingOpcode = false; @@ -2594,6 +2727,7 @@ void InformationCache::initializeInformationCache(const Function &CF, // For `must-tail` calls we remember the caller and callee. if (auto *Assume = dyn_cast(&I)) { fillMapFromAssume(*Assume, KnowledgeMap); + AddToAssumeUsesMap(*Assume->getArgOperand(0)); } else if (cast(I).isMustTailCall()) { FI.ContainsMustTailCall = true; if (const Function *Callee = cast(I).getCalledFunction()) @@ -2742,7 +2876,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { getOrCreateAAFor(RetPos); // Every function might be simplified. - getOrCreateAAFor(RetPos); + bool UsedAssumedInformation = false; + getAssumedSimplified(RetPos, nullptr, UsedAssumedInformation); // Every returned value might be marked noundef. getOrCreateAAFor(RetPos); @@ -2834,7 +2969,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { if (!Callee->getReturnType()->isVoidTy() && !CB.use_empty()) { IRPosition CBRetPos = IRPosition::callsite_returned(CB); - getOrCreateAAFor(CBRetPos); + bool UsedAssumedInformation = false; + getAssumedSimplified(CBRetPos, nullptr, UsedAssumedInformation); } for (int I = 0, E = CB.arg_size(); I < E; ++I) { @@ -2897,10 +3033,15 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { getOrCreateAAFor( IRPosition::value(*cast(I).getPointerOperand())); if (SimplifyAllLoads) - getOrCreateAAFor(IRPosition::value(I)); - } else - getOrCreateAAFor( - IRPosition::value(*cast(I).getPointerOperand())); + getAssumedSimplified(IRPosition::value(I), nullptr, + UsedAssumedInformation); + } else { + auto &SI = cast(I); + getOrCreateAAFor(IRPosition::inst(I)); + getAssumedSimplified(IRPosition::value(*SI.getValueOperand()), nullptr, + UsedAssumedInformation); + getOrCreateAAFor(IRPosition::value(*SI.getPointerOperand())); + } return true; }; Success = checkForAllInstructionsImpl( @@ -2975,8 +3116,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, if (!S.isValidState()) OS << "full-set"; else { - for (auto &it : S.getAssumedSet()) - OS << it << ", "; + for (auto &It : S.getAssumedSet()) + OS << It << ", "; if (S.undefIsContained()) OS << "undef "; } @@ -3018,8 +3159,12 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, OS << " [" << Acc.getKind() << "] " << *Acc.getRemoteInst(); if (Acc.getLocalInst() != Acc.getRemoteInst()) OS << " via " << *Acc.getLocalInst(); - if (Acc.getContent().hasValue()) - OS << " [" << *Acc.getContent() << "]"; + if (Acc.getContent()) { + if (*Acc.getContent()) + OS << " [" << **Acc.getContent() << "]"; + else + OS << " [ ]"; + } return OS; } ///} @@ -3032,7 +3177,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache, SetVector &Functions, AnalysisGetter &AG, CallGraphUpdater &CGUpdater, - bool DeleteFns) { + bool DeleteFns, bool IsModulePass) { if (Functions.empty()) return false; @@ -3045,8 +3190,10 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache, // Create an Attributor and initially empty information cache that is filled // while we identify default attribute opportunities. - Attributor A(Functions, InfoCache, CGUpdater, /* Allowed */ nullptr, - DeleteFns); + AttributorConfig AC(CGUpdater); + AC.IsModulePass = IsModulePass; + AC.DeleteFns = DeleteFns; + Attributor A(Functions, InfoCache, AC); // Create shallow wrappers for all functions that are not IPO amendable if (AllowShallowWrappers) @@ -3151,7 +3298,7 @@ PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) { BumpPtrAllocator Allocator; InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr); if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater, - /* DeleteFns */ true)) { + /* DeleteFns */ true, /* IsModulePass */ true)) { // FIXME: Think about passes we will preserve and add them here. return PreservedAnalyses::none(); } @@ -3179,7 +3326,8 @@ PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C, BumpPtrAllocator Allocator; InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions); if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater, - /* DeleteFns */ false)) { + /* DeleteFns */ false, + /* IsModulePass */ false)) { // FIXME: Think about passes we will preserve and add them here. PreservedAnalyses PA; PA.preserve(); @@ -3255,7 +3403,8 @@ struct AttributorLegacyPass : public ModulePass { BumpPtrAllocator Allocator; InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr); return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater, - /* DeleteFns*/ true); + /* DeleteFns*/ true, + /* IsModulePass */ true); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -3292,7 +3441,8 @@ struct AttributorCGSCCLegacyPass : public CallGraphSCCPass { BumpPtrAllocator Allocator; InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions); return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater, - /* DeleteFns */ false); + /* DeleteFns */ false, + /* IsModulePass */ false); } void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 2d88e329e093..4d99ce7e3175 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -14,9 +14,11 @@ #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -30,21 +32,29 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" #include "llvm/IR/Assumptions.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/NoFolder.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include using namespace llvm; @@ -69,11 +79,11 @@ static cl::opt MaxPotentialValues( cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues), cl::init(7)); -static cl::opt - MaxInterferingWrites("attributor-max-interfering-writes", cl::Hidden, - cl::desc("Maximum number of interfering writes to " - "check before assuming all might interfere."), - cl::init(6)); +static cl::opt MaxInterferingAccesses( + "attributor-max-interfering-accesses", cl::Hidden, + cl::desc("Maximum number of interfering accesses to " + "check before assuming all might interfere."), + cl::init(6)); STATISTIC(NumAAs, "Number of abstract attributes created"); @@ -140,6 +150,7 @@ PIPE_OPERATOR(AANonNull) PIPE_OPERATOR(AANoAlias) PIPE_OPERATOR(AADereferenceable) PIPE_OPERATOR(AAAlign) +PIPE_OPERATOR(AAInstanceInfo) PIPE_OPERATOR(AANoCapture) PIPE_OPERATOR(AAValueSimplify) PIPE_OPERATOR(AANoFree) @@ -150,7 +161,7 @@ PIPE_OPERATOR(AAMemoryLocation) PIPE_OPERATOR(AAValueConstantRange) PIPE_OPERATOR(AAPrivatizablePtr) PIPE_OPERATOR(AAUndefinedBehavior) -PIPE_OPERATOR(AAPotentialValues) +PIPE_OPERATOR(AAPotentialConstantValues) PIPE_OPERATOR(AANoUndef) PIPE_OPERATOR(AACallEdges) PIPE_OPERATOR(AAFunctionReachability) @@ -170,6 +181,45 @@ ChangeStatus clampStateAndIndicateChange(DerefState &S, } // namespace llvm +/// Checks if a type could have padding bytes. +static bool isDenselyPacked(Type *Ty, const DataLayout &DL) { + // There is no size information, so be conservative. + if (!Ty->isSized()) + return false; + + // If the alloc size is not equal to the storage size, then there are padding + // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. + if (DL.getTypeSizeInBits(Ty) != DL.getTypeAllocSizeInBits(Ty)) + return false; + + // FIXME: This isn't the right way to check for padding in vectors with + // non-byte-size elements. + if (VectorType *SeqTy = dyn_cast(Ty)) + return isDenselyPacked(SeqTy->getElementType(), DL); + + // For array types, check for padding within members. + if (ArrayType *SeqTy = dyn_cast(Ty)) + return isDenselyPacked(SeqTy->getElementType(), DL); + + if (!isa(Ty)) + return true; + + // Check for padding within and between elements of a struct. + StructType *StructTy = cast(Ty); + const StructLayout *Layout = DL.getStructLayout(StructTy); + uint64_t StartPos = 0; + for (unsigned I = 0, E = StructTy->getNumElements(); I < E; ++I) { + Type *ElTy = StructTy->getElementType(I); + if (!isDenselyPacked(ElTy, DL)) + return false; + if (StartPos != Layout->getElementOffsetInBits(I)) + return false; + StartPos += DL.getTypeAllocSizeInBits(ElTy); + } + + return true; +} + /// Get pointer operand of memory accessing instruction. If \p I is /// not a memory accessing instruction, return nullptr. If \p AllowVolatile, /// is set to false and the instruction is volatile, return nullptr. @@ -236,7 +286,8 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr, } // Ensure the result has the requested type. - Ptr = IRB.CreateBitOrPointerCast(Ptr, ResTy, Ptr->getName() + ".cast"); + Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, ResTy, + Ptr->getName() + ".cast"); LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n"); return Ptr; @@ -251,25 +302,32 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr, /// once. Note that the value used for the callback may still be the value /// associated with \p IRP (due to PHIs). To limit how much effort is invested, /// we will never visit more values than specified by \p MaxValues. -/// If \p Intraprocedural is set to true only values valid in the scope of -/// \p CtxI will be visited and simplification into other scopes is prevented. +/// If \p VS does not contain the Interprocedural bit, only values valid in the +/// scope of \p CtxI will be visited and simplification into other scopes is +/// prevented. template static bool genericValueTraversal( Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA, StateTy &State, function_ref VisitValueCB, - const Instruction *CtxI, bool UseValueSimplify = true, int MaxValues = 16, + const Instruction *CtxI, bool &UsedAssumedInformation, + bool UseValueSimplify = true, int MaxValues = 16, function_ref StripCB = nullptr, - bool Intraprocedural = false) { + AA::ValueScope VS = AA::Interprocedural) { - const AAIsDead *LivenessAA = nullptr; - if (IRP.getAnchorScope()) - LivenessAA = &A.getAAFor( - QueryingAA, - IRPosition::function(*IRP.getAnchorScope(), IRP.getCallBaseContext()), - DepClassTy::NONE); - bool AnyDead = false; + struct LivenessInfo { + const AAIsDead *LivenessAA = nullptr; + bool AnyDead = false; + }; + SmallMapVector LivenessAAs; + auto GetLivenessInfo = [&](const Function &F) -> LivenessInfo & { + LivenessInfo &LI = LivenessAAs[&F]; + if (!LI.LivenessAA) + LI.LivenessAA = &A.getAAFor(QueryingAA, IRPosition::function(F), + DepClassTy::NONE); + return LI; + }; Value *InitialV = &IRP.getAssociatedValue(); using Item = std::pair; @@ -319,10 +377,9 @@ static bool genericValueTraversal( // Look through select instructions, visit assumed potential values. if (auto *SI = dyn_cast(V)) { - bool UsedAssumedInformation = false; Optional C = A.getAssumedConstant( *SI->getCondition(), QueryingAA, UsedAssumedInformation); - bool NoValueYet = !C.hasValue(); + bool NoValueYet = !C; if (NoValueYet || isa_and_nonnull(*C)) continue; if (auto *CI = dyn_cast_or_null(*C)) { @@ -340,12 +397,12 @@ static bool genericValueTraversal( // Look through phi nodes, visit all live operands. if (auto *PHI = dyn_cast(V)) { - assert(LivenessAA && - "Expected liveness in the presence of instructions!"); + LivenessInfo &LI = GetLivenessInfo(*PHI->getFunction()); for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { BasicBlock *IncomingBB = PHI->getIncomingBlock(u); - if (LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) { - AnyDead = true; + if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) { + LI.AnyDead = true; + UsedAssumedInformation |= !LI.LivenessAA->isAtFixpoint(); continue; } Worklist.push_back( @@ -355,9 +412,9 @@ static bool genericValueTraversal( } if (auto *Arg = dyn_cast(V)) { - if (!Intraprocedural && !Arg->hasPassPointeeByValueCopyAttr()) { + if ((VS & AA::Interprocedural) && !Arg->hasPassPointeeByValueCopyAttr()) { SmallVector CallSiteValues; - bool AllCallSitesKnown = true; + bool UsedAssumedInformation = false; if (A.checkForAllCallSites( [&](AbstractCallSite ACS) { // Callbacks might not have a corresponding call site operand, @@ -368,7 +425,7 @@ static bool genericValueTraversal( CallSiteValues.push_back({CSOp, ACS.getInstruction()}); return true; }, - *Arg->getParent(), true, &QueryingAA, AllCallSitesKnown)) { + *Arg->getParent(), true, &QueryingAA, UsedAssumedInformation)) { Worklist.append(CallSiteValues); continue; } @@ -376,14 +433,13 @@ static bool genericValueTraversal( } if (UseValueSimplify && !isa(V)) { - bool UsedAssumedInformation = false; Optional SimpleV = A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation); - if (!SimpleV.hasValue()) + if (!SimpleV) continue; Value *NewV = SimpleV.getValue(); if (NewV && NewV != V) { - if (!Intraprocedural || !CtxI || + if ((VS & AA::Interprocedural) || !CtxI || AA::isValidInScope(*NewV, CtxI->getFunction())) { Worklist.push_back({NewV, CtxI}); continue; @@ -391,6 +447,37 @@ static bool genericValueTraversal( } } + if (auto *LI = dyn_cast(V)) { + bool UsedAssumedInformation = false; + // If we ask for the potentially loaded values from the initial pointer we + // will simply end up here again. The load is as far as we can make it. + if (LI->getPointerOperand() != InitialV) { + SmallSetVector PotentialCopies; + SmallSetVector PotentialValueOrigins; + if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies, + PotentialValueOrigins, QueryingAA, + UsedAssumedInformation, + /* OnlyExact */ true)) { + // Values have to be dynamically unique or we loose the fact that a + // single llvm::Value might represent two runtime values (e.g., stack + // locations in different recursive calls). + bool DynamicallyUnique = + llvm::all_of(PotentialCopies, [&A, &QueryingAA](Value *PC) { + return AA::isDynamicallyUnique(A, QueryingAA, *PC); + }); + if (DynamicallyUnique && + ((VS & AA::Interprocedural) || !CtxI || + llvm::all_of(PotentialCopies, [CtxI](Value *PC) { + return AA::isValidInScope(*PC, CtxI->getFunction()); + }))) { + for (auto *PotentialCopy : PotentialCopies) + Worklist.push_back({PotentialCopy, CtxI}); + continue; + } + } + } + } + // Once a leaf is reached we inform the user through the callback. if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) { LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: " @@ -400,8 +487,10 @@ static bool genericValueTraversal( } while (!Worklist.empty()); // If we actually used liveness information so we have to record a dependence. - if (AnyDead) - A.recordDependence(*LivenessAA, QueryingAA, DepClassTy::OPTIONAL); + for (auto &It : LivenessAAs) + if (It.second.AnyDead) + A.recordDependence(*It.second.LivenessAA, QueryingAA, + DepClassTy::OPTIONAL); // All values have been visited. return true; @@ -411,7 +500,8 @@ bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, SmallVectorImpl &Objects, const AbstractAttribute &QueryingAA, const Instruction *CtxI, - bool Intraprocedural) { + bool &UsedAssumedInformation, + AA::ValueScope VS) { auto StripCB = [&](Value *V) { return getUnderlyingObject(V); }; SmallPtrSet SeenObjects; auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *, @@ -423,15 +513,16 @@ bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, }; if (!genericValueTraversal( A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI, - true, 32, StripCB, Intraprocedural)) + UsedAssumedInformation, true, 32, StripCB, VS)) return false; return true; } -const Value *stripAndAccumulateMinimalOffsets( - Attributor &A, const AbstractAttribute &QueryingAA, const Value *Val, - const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, - bool UseAssumed = false) { +static const Value * +stripAndAccumulateOffsets(Attributor &A, const AbstractAttribute &QueryingAA, + const Value *Val, const DataLayout &DL, APInt &Offset, + bool GetMinOffset, bool AllowNonInbounds, + bool UseAssumed = false) { auto AttributorAnalysis = [&](Value &V, APInt &ROffset) -> bool { const IRPosition &Pos = IRPosition::value(V); @@ -442,14 +533,20 @@ const Value *stripAndAccumulateMinimalOffsets( : DepClassTy::NONE); ConstantRange Range = UseAssumed ? ValueConstantRangeAA.getAssumed() : ValueConstantRangeAA.getKnown(); + if (Range.isFullSet()) + return false; + // We can only use the lower part of the range because the upper part can // be higher than what the value can really be. - ROffset = Range.getSignedMin(); + if (GetMinOffset) + ROffset = Range.getSignedMin(); + else + ROffset = Range.getSignedMax(); return true; }; return Val->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds, - /* AllowInvariant */ false, + /* AllowInvariant */ true, AttributorAnalysis); } @@ -458,8 +555,9 @@ getMinimalBaseOfPointer(Attributor &A, const AbstractAttribute &QueryingAA, const Value *Ptr, int64_t &BytesOffset, const DataLayout &DL, bool AllowNonInbounds = false) { APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); - const Value *Base = stripAndAccumulateMinimalOffsets( - A, QueryingAA, Ptr, DL, OffsetAPInt, AllowNonInbounds); + const Value *Base = + stripAndAccumulateOffsets(A, QueryingAA, Ptr, DL, OffsetAPInt, + /* GetMinOffset */ true, AllowNonInbounds); BytesOffset = OffsetAPInt.getSExtValue(); return Base; @@ -493,10 +591,9 @@ static void clampReturnedValueStates( LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr() << " @ " << RVPos << "\n"); const StateType &AAS = AA.getState(); - if (T.hasValue()) - *T &= AAS; - else - T = AAS; + if (!T) + T = StateType::getBestState(AAS); + *T &= AAS; LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " RV State: " << T << "\n"); return T->isValidState(); @@ -504,7 +601,7 @@ static void clampReturnedValueStates( if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA)) S.indicatePessimisticFixpoint(); - else if (T.hasValue()) + else if (T) S ^= *T; } @@ -560,20 +657,19 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA, LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction() << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n"); const StateType &AAS = AA.getState(); - if (T.hasValue()) - *T &= AAS; - else - T = AAS; + if (!T) + T = StateType::getBestState(AAS); + *T &= AAS; LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " CSA State: " << T << "\n"); return T->isValidState(); }; - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; if (!A.checkForAllCallSites(CallSiteCheck, QueryingAA, true, - AllCallSitesKnown)) + UsedAssumedInformation)) S.indicatePessimisticFixpoint(); - else if (T.hasValue()) + else if (T) S ^= *T; } @@ -667,7 +763,6 @@ struct AACallSiteReturnedFromReturned : public BaseType { return clampStateAndIndicateChange(S, AA.getState()); } }; -} // namespace /// Helper function to accumulate uses. template @@ -779,6 +874,7 @@ static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S, S += ParentState; } } +} // namespace /// ------------------------ PointerInfo --------------------------------------- @@ -786,9 +882,6 @@ namespace llvm { namespace AA { namespace PointerInfo { -/// An access kind description as used by AAPointerInfo. -struct OffsetAndSize; - struct State; } // namespace PointerInfo @@ -806,7 +899,7 @@ struct DenseMapInfo : DenseMapInfo { /// Helper that allows OffsetAndSize as a key in a DenseMap. template <> -struct DenseMapInfo +struct DenseMapInfo : DenseMapInfo> {}; /// Helper for AA::PointerInfo::Acccess DenseMap/Set usage ignoring everythign @@ -822,90 +915,15 @@ struct AccessAsInstructionInfo : DenseMapInfo { } // namespace llvm -/// Helper to represent an access offset and size, with logic to deal with -/// uncertainty and check for overlapping accesses. -struct AA::PointerInfo::OffsetAndSize : public std::pair { - using BaseTy = std::pair; - OffsetAndSize(int64_t Offset, int64_t Size) : BaseTy(Offset, Size) {} - OffsetAndSize(const BaseTy &P) : BaseTy(P) {} - int64_t getOffset() const { return first; } - int64_t getSize() const { return second; } - static OffsetAndSize getUnknown() { return OffsetAndSize(Unknown, Unknown); } - - /// Return true if offset or size are unknown. - bool offsetOrSizeAreUnknown() const { - return getOffset() == OffsetAndSize::Unknown || - getSize() == OffsetAndSize::Unknown; - } - - /// Return true if this offset and size pair might describe an address that - /// overlaps with \p OAS. - bool mayOverlap(const OffsetAndSize &OAS) const { - // Any unknown value and we are giving up -> overlap. - if (offsetOrSizeAreUnknown() || OAS.offsetOrSizeAreUnknown()) - return true; - - // Check if one offset point is in the other interval [offset, offset+size]. - return OAS.getOffset() + OAS.getSize() > getOffset() && - OAS.getOffset() < getOffset() + getSize(); - } - - /// Constant used to represent unknown offset or sizes. - static constexpr int64_t Unknown = 1 << 31; -}; - -/// Implementation of the DenseMapInfo. -/// -///{ -inline llvm::AccessAsInstructionInfo::Access -llvm::AccessAsInstructionInfo::getEmptyKey() { - return Access(Base::getEmptyKey(), nullptr, AAPointerInfo::AK_READ, nullptr); -} -inline llvm::AccessAsInstructionInfo::Access -llvm::AccessAsInstructionInfo::getTombstoneKey() { - return Access(Base::getTombstoneKey(), nullptr, AAPointerInfo::AK_READ, - nullptr); -} -unsigned llvm::AccessAsInstructionInfo::getHashValue( - const llvm::AccessAsInstructionInfo::Access &A) { - return Base::getHashValue(A.getRemoteInst()); -} -bool llvm::AccessAsInstructionInfo::isEqual( - const llvm::AccessAsInstructionInfo::Access &LHS, - const llvm::AccessAsInstructionInfo::Access &RHS) { - return LHS.getRemoteInst() == RHS.getRemoteInst(); -} -inline llvm::DenseMapInfo::Access -llvm::DenseMapInfo::getEmptyKey() { - return AAPointerInfo::Access(nullptr, nullptr, AAPointerInfo::AK_READ, - nullptr); -} -inline llvm::DenseMapInfo::Access -llvm::DenseMapInfo::getTombstoneKey() { - return AAPointerInfo::Access(nullptr, nullptr, AAPointerInfo::AK_WRITE, - nullptr); -} - -unsigned llvm::DenseMapInfo::getHashValue( - const llvm::DenseMapInfo::Access &A) { - return detail::combineHashValue( - DenseMapInfo::getHashValue(A.getRemoteInst()), - (A.isWrittenValueYetUndetermined() - ? ~0 - : DenseMapInfo::getHashValue(A.getWrittenValue()))) + - A.getKind(); -} - -bool llvm::DenseMapInfo::isEqual( - const llvm::DenseMapInfo::Access &LHS, - const llvm::DenseMapInfo::Access &RHS) { - return LHS == RHS; -} -///} - /// A type to track pointer/struct usage and accesses for AAPointerInfo. struct AA::PointerInfo::State : public AbstractState { + ~State() { + // We do not delete the Accesses objects but need to destroy them still. + for (auto &It : AccessBins) + It.second->~Accesses(); + } + /// Return the best possible representable state. static State getBestState(const State &SIS) { return State(); } @@ -916,9 +934,10 @@ struct AA::PointerInfo::State : public AbstractState { return R; } - State() {} - State(const State &SIS) : AccessBins(SIS.AccessBins) {} - State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) {} + State() = default; + State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) { + SIS.AccessBins.clear(); + } const State &getAssumed() const { return *this; } @@ -967,15 +986,11 @@ struct AA::PointerInfo::State : public AbstractState { return false; auto &Accs = It->getSecond(); auto &RAccs = RIt->getSecond(); - if (Accs.size() != RAccs.size()) + if (Accs->size() != RAccs->size()) return false; - auto AccIt = Accs.begin(), RAccIt = RAccs.begin(), AccE = Accs.end(); - while (AccIt != AccE) { - if (*AccIt != *RAccIt) + for (const auto &ZipIt : llvm::zip(*Accs, *RAccs)) + if (std::get<0>(ZipIt) != std::get<1>(ZipIt)) return false; - ++AccIt; - ++RAccIt; - } ++It; ++RIt; } @@ -984,42 +999,88 @@ struct AA::PointerInfo::State : public AbstractState { bool operator!=(const State &R) const { return !(*this == R); } /// We store accesses in a set with the instruction as key. - using Accesses = DenseSet; + struct Accesses { + SmallVector Accesses; + DenseMap Map; + + unsigned size() const { return Accesses.size(); } + + using vec_iterator = decltype(Accesses)::iterator; + vec_iterator begin() { return Accesses.begin(); } + vec_iterator end() { return Accesses.end(); } + + using iterator = decltype(Map)::const_iterator; + iterator find(AAPointerInfo::Access &Acc) { + return Map.find(Acc.getRemoteInst()); + } + iterator find_end() { return Map.end(); } + + AAPointerInfo::Access &get(iterator &It) { + return Accesses[It->getSecond()]; + } + + void insert(AAPointerInfo::Access &Acc) { + Map[Acc.getRemoteInst()] = Accesses.size(); + Accesses.push_back(Acc); + } + }; /// We store all accesses in bins denoted by their offset and size. - using AccessBinsTy = DenseMap; + using AccessBinsTy = DenseMap; AccessBinsTy::const_iterator begin() const { return AccessBins.begin(); } AccessBinsTy::const_iterator end() const { return AccessBins.end(); } protected: /// The bins with all the accesses for the associated pointer. - DenseMap AccessBins; + AccessBinsTy AccessBins; /// Add a new access to the state at offset \p Offset and with size \p Size. /// The access is associated with \p I, writes \p Content (if anything), and /// is of kind \p Kind. /// \Returns CHANGED, if the state changed, UNCHANGED otherwise. - ChangeStatus addAccess(int64_t Offset, int64_t Size, Instruction &I, - Optional Content, + ChangeStatus addAccess(Attributor &A, int64_t Offset, int64_t Size, + Instruction &I, Optional Content, AAPointerInfo::AccessKind Kind, Type *Ty, Instruction *RemoteI = nullptr, Accesses *BinPtr = nullptr) { - OffsetAndSize Key{Offset, Size}; - Accesses &Bin = BinPtr ? *BinPtr : AccessBins[Key]; + AAPointerInfo::OffsetAndSize Key{Offset, Size}; + Accesses *&Bin = BinPtr ? BinPtr : AccessBins[Key]; + if (!Bin) + Bin = new (A.Allocator) Accesses; AAPointerInfo::Access Acc(&I, RemoteI ? RemoteI : &I, Content, Kind, Ty); // Check if we have an access for this instruction in this bin, if not, // simply add it. - auto It = Bin.find(Acc); - if (It == Bin.end()) { - Bin.insert(Acc); + auto It = Bin->find(Acc); + if (It == Bin->find_end()) { + Bin->insert(Acc); return ChangeStatus::CHANGED; } // If the existing access is the same as then new one, nothing changed. - AAPointerInfo::Access Before = *It; + AAPointerInfo::Access &Current = Bin->get(It); + AAPointerInfo::Access Before = Current; // The new one will be combined with the existing one. - *It &= Acc; - return *It == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; + Current &= Acc; + return Current == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; + } + + /// See AAPointerInfo::forallInterferingAccesses. + bool forallInterferingAccesses( + AAPointerInfo::OffsetAndSize OAS, + function_ref CB) const { + if (!isValidState()) + return false; + + for (auto &It : AccessBins) { + AAPointerInfo::OffsetAndSize ItOAS = It.getFirst(); + if (!OAS.mayOverlap(ItOAS)) + continue; + bool IsExact = OAS == ItOAS && !OAS.offsetOrSizeAreUnknown(); + for (auto &Access : *It.getSecond()) + if (!CB(Access, IsExact)) + return false; + } + return true; } /// See AAPointerInfo::forallInterferingAccesses. @@ -1028,10 +1089,11 @@ protected: function_ref CB) const { if (!isValidState()) return false; + // First find the offset and size of I. - OffsetAndSize OAS(-1, -1); + AAPointerInfo::OffsetAndSize OAS(-1, -1); for (auto &It : AccessBins) { - for (auto &Access : It.getSecond()) { + for (auto &Access : *It.getSecond()) { if (Access.getRemoteInst() == &I) { OAS = It.getFirst(); break; @@ -1040,21 +1102,13 @@ protected: if (OAS.getSize() != -1) break; } + // No access for I was found, we are done. if (OAS.getSize() == -1) return true; // Now that we have an offset and size, find all overlapping ones and use // the callback on the accesses. - for (auto &It : AccessBins) { - OffsetAndSize ItOAS = It.getFirst(); - if (!OAS.mayOverlap(ItOAS)) - continue; - bool IsExact = OAS == ItOAS && !OAS.offsetOrSizeAreUnknown(); - for (auto &Access : It.getSecond()) - if (!CB(Access, IsExact)) - return false; - } - return true; + return forallInterferingAccesses(OAS, CB); } private: @@ -1062,6 +1116,7 @@ private: BooleanState BS; }; +namespace { struct AAPointerInfoImpl : public StateWrapper { using BaseTy = StateWrapper; @@ -1084,22 +1139,18 @@ struct AAPointerInfoImpl } bool forallInterferingAccesses( - LoadInst &LI, function_ref CB) + OffsetAndSize OAS, + function_ref CB) const override { - return State::forallInterferingAccesses(LI, CB); + return State::forallInterferingAccesses(OAS, CB); } bool forallInterferingAccesses( - StoreInst &SI, function_ref CB) - const override { - return State::forallInterferingAccesses(SI, CB); - } - bool forallInterferingWrites( - Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI, + Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, function_ref UserCB) const override { SmallPtrSet DominatingWrites; - SmallVector, 8> InterferingWrites; + SmallVector, 8> InterferingAccesses; - Function &Scope = *LI.getFunction(); + Function &Scope = *I.getFunction(); const auto &NoSyncAA = A.getAAFor( QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL); const auto *ExecDomainAA = A.lookupAAFor( @@ -1127,13 +1178,15 @@ struct AAPointerInfoImpl // TODO: Use inter-procedural reachability and dominance. const auto &NoRecurseAA = A.getAAFor( - QueryingAA, IRPosition::function(*LI.getFunction()), - DepClassTy::OPTIONAL); + QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL); - const bool CanUseCFGResoning = CanIgnoreThreading(LI); + const bool FindInterferingWrites = I.mayReadFromMemory(); + const bool FindInterferingReads = I.mayWriteToMemory(); + const bool UseDominanceReasoning = FindInterferingWrites; + const bool CanUseCFGResoning = CanIgnoreThreading(I); InformationCache &InfoCache = A.getInfoCache(); const DominatorTree *DT = - NoRecurseAA.isKnownNoRecurse() + NoRecurseAA.isKnownNoRecurse() && UseDominanceReasoning ? InfoCache.getAnalysisResultForFunction( Scope) : nullptr; @@ -1189,33 +1242,37 @@ struct AAPointerInfoImpl } auto AccessCB = [&](const Access &Acc, bool Exact) { - if (!Acc.isWrite()) + if ((!FindInterferingWrites || !Acc.isWrite()) && + (!FindInterferingReads || !Acc.isRead())) return true; // For now we only filter accesses based on CFG reasoning which does not // work yet if we have threading effects, or the access is complicated. if (CanUseCFGResoning) { - if (!AA::isPotentiallyReachable(A, *Acc.getLocalInst(), LI, QueryingAA, - IsLiveInCalleeCB)) + if ((!Acc.isWrite() || + !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA, + IsLiveInCalleeCB)) && + (!Acc.isRead() || + !AA::isPotentiallyReachable(A, I, *Acc.getLocalInst(), QueryingAA, + IsLiveInCalleeCB))) return true; - if (DT && Exact && - (Acc.getLocalInst()->getFunction() == LI.getFunction()) && + if (DT && Exact && (Acc.getLocalInst()->getFunction() == &Scope) && IsSameThreadAsLoad(Acc)) { - if (DT->dominates(Acc.getLocalInst(), &LI)) + if (DT->dominates(Acc.getLocalInst(), &I)) DominatingWrites.insert(&Acc); } } - InterferingWrites.push_back({&Acc, Exact}); + InterferingAccesses.push_back({&Acc, Exact}); return true; }; - if (!State::forallInterferingAccesses(LI, AccessCB)) + if (!State::forallInterferingAccesses(I, AccessCB)) return false; // If we cannot use CFG reasoning we only filter the non-write accesses // and are done here. if (!CanUseCFGResoning) { - for (auto &It : InterferingWrites) + for (auto &It : InterferingAccesses) if (!UserCB(*It.first, It.second)) return false; return true; @@ -1242,47 +1299,52 @@ struct AAPointerInfoImpl return false; }; - // Run the user callback on all writes we cannot skip and return if that + // Run the user callback on all accesses we cannot skip and return if that // succeeded for all or not. - unsigned NumInterferingWrites = InterferingWrites.size(); - for (auto &It : InterferingWrites) - if (!DT || NumInterferingWrites > MaxInterferingWrites || - !CanSkipAccess(*It.first, It.second)) + unsigned NumInterferingAccesses = InterferingAccesses.size(); + for (auto &It : InterferingAccesses) { + if (!DT || NumInterferingAccesses > MaxInterferingAccesses || + !CanSkipAccess(*It.first, It.second)) { if (!UserCB(*It.first, It.second)) return false; + } + } return true; } - ChangeStatus translateAndAddCalleeState(Attributor &A, - const AAPointerInfo &CalleeAA, - int64_t CallArgOffset, CallBase &CB) { + ChangeStatus translateAndAddState(Attributor &A, const AAPointerInfo &OtherAA, + int64_t Offset, CallBase &CB, + bool FromCallee = false) { using namespace AA::PointerInfo; - if (!CalleeAA.getState().isValidState() || !isValidState()) + if (!OtherAA.getState().isValidState() || !isValidState()) return indicatePessimisticFixpoint(); - const auto &CalleeImplAA = static_cast(CalleeAA); - bool IsByval = CalleeImplAA.getAssociatedArgument()->hasByValAttr(); + const auto &OtherAAImpl = static_cast(OtherAA); + bool IsByval = + FromCallee && OtherAAImpl.getAssociatedArgument()->hasByValAttr(); // Combine the accesses bin by bin. ChangeStatus Changed = ChangeStatus::UNCHANGED; - for (auto &It : CalleeImplAA.getState()) { + for (auto &It : OtherAAImpl.getState()) { OffsetAndSize OAS = OffsetAndSize::getUnknown(); - if (CallArgOffset != OffsetAndSize::Unknown) - OAS = OffsetAndSize(It.first.getOffset() + CallArgOffset, - It.first.getSize()); - Accesses &Bin = AccessBins[OAS]; - for (const AAPointerInfo::Access &RAcc : It.second) { + if (Offset != OffsetAndSize::Unknown) + OAS = OffsetAndSize(It.first.getOffset() + Offset, It.first.getSize()); + Accesses *Bin = AccessBins.lookup(OAS); + for (const AAPointerInfo::Access &RAcc : *It.second) { if (IsByval && !RAcc.isRead()) continue; bool UsedAssumedInformation = false; - Optional Content = A.translateArgumentToCallSiteContent( - RAcc.getContent(), CB, *this, UsedAssumedInformation); - AccessKind AK = - AccessKind(RAcc.getKind() & (IsByval ? AccessKind::AK_READ - : AccessKind::AK_READ_WRITE)); + AccessKind AK = RAcc.getKind(); + Optional Content = RAcc.getContent(); + if (FromCallee) { + Content = A.translateArgumentToCallSiteContent( + RAcc.getContent(), CB, *this, UsedAssumedInformation); + AK = AccessKind( + AK & (IsByval ? AccessKind::AK_READ : AccessKind::AK_READ_WRITE)); + } Changed = - Changed | addAccess(OAS.getOffset(), OAS.getSize(), CB, Content, AK, - RAcc.getType(), RAcc.getRemoteInst(), &Bin); + Changed | addAccess(A, OAS.getOffset(), OAS.getSize(), CB, Content, + AK, RAcc.getType(), RAcc.getRemoteInst(), Bin); } } return Changed; @@ -1305,7 +1367,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { bool handleAccess(Attributor &A, Instruction &I, Value &Ptr, Optional Content, AccessKind Kind, int64_t Offset, ChangeStatus &Changed, Type *Ty, - int64_t Size = AA::PointerInfo::OffsetAndSize::Unknown) { + int64_t Size = OffsetAndSize::Unknown) { using namespace AA::PointerInfo; // No need to find a size if one is given or the offset is unknown. if (Offset != OffsetAndSize::Unknown && Size == OffsetAndSize::Unknown && @@ -1315,13 +1377,13 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { if (!AccessSize.isScalable()) Size = AccessSize.getFixedSize(); } - Changed = Changed | addAccess(Offset, Size, I, Content, Kind, Ty); + Changed = Changed | addAccess(A, Offset, Size, I, Content, Kind, Ty); return true; }; /// Helper struct, will support ranges eventually. struct OffsetInfo { - int64_t Offset = AA::PointerInfo::OffsetAndSize::Unknown; + int64_t Offset = OffsetAndSize::Unknown; bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; } }; @@ -1329,7 +1391,6 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { using namespace AA::PointerInfo; - State S = getState(); ChangeStatus Changed = ChangeStatus::UNCHANGED; Value &AssociatedValue = getAssociatedValue(); @@ -1337,7 +1398,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { DenseMap OffsetInfoMap; OffsetInfoMap[&AssociatedValue] = OffsetInfo{0}; - auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo &PtrOI, + auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo PtrOI, bool &Follow) { OffsetInfo &UsrOI = OffsetInfoMap[Usr]; UsrOI = PtrOI; @@ -1475,8 +1536,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { const auto &CSArgPI = A.getAAFor( *this, IRPosition::callsite_argument(*CB, ArgNo), DepClassTy::REQUIRED); - Changed = translateAndAddCalleeState( - A, CSArgPI, OffsetInfoMap[CurPtr].Offset, *CB) | + Changed = translateAndAddState(A, CSArgPI, + OffsetInfoMap[CurPtr].Offset, *CB) | Changed; return true; } @@ -1497,7 +1558,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { }; if (!A.checkForAllUses(UsePred, *this, AssociatedValue, /* CheckBBLivenessOnly */ true, DepClassTy::OPTIONAL, - EquivalentUseCB)) + /* IgnoreDroppableUses */ true, EquivalentUseCB)) return indicatePessimisticFixpoint(); LLVM_DEBUG({ @@ -1505,15 +1566,19 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { for (auto &It : AccessBins) { dbgs() << "[" << It.first.getOffset() << "-" << It.first.getOffset() + It.first.getSize() - << "] : " << It.getSecond().size() << "\n"; - for (auto &Acc : It.getSecond()) { + << "] : " << It.getSecond()->size() << "\n"; + for (auto &Acc : *It.getSecond()) { dbgs() << " - " << Acc.getKind() << " - " << *Acc.getLocalInst() << "\n"; if (Acc.getLocalInst() != Acc.getRemoteInst()) dbgs() << " --> " << *Acc.getRemoteInst() << "\n"; - if (!Acc.isWrittenValueYetUndetermined()) - dbgs() << " - " << Acc.getWrittenValue() << "\n"; + if (!Acc.isWrittenValueYetUndetermined()) { + if (Acc.getWrittenValue()) + dbgs() << " - c: " << *Acc.getWrittenValue() << "\n"; + else + dbgs() << " - c: \n"; + } } } }); @@ -1576,7 +1641,7 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating { LengthVal = Length->getSExtValue(); Value &Ptr = getAssociatedValue(); unsigned ArgNo = getIRPosition().getCallSiteArgNo(); - ChangeStatus Changed; + ChangeStatus Changed = ChangeStatus::UNCHANGED; if (ArgNo == 0) { handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_WRITE, 0, Changed, nullptr, LengthVal); @@ -1601,7 +1666,8 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating { const IRPosition &ArgPos = IRPosition::argument(*Arg); auto &ArgAA = A.getAAFor(*this, ArgPos, DepClassTy::REQUIRED); - return translateAndAddCalleeState(A, ArgAA, 0, *cast(getCtxI())); + return translateAndAddState(A, ArgAA, 0, *cast(getCtxI()), + /* FromCallee */ true); } /// See AbstractAttribute::trackStatistics() @@ -1619,9 +1685,11 @@ struct AAPointerInfoCallSiteReturned final : AAPointerInfoFloating { AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition()); } }; +} // namespace /// -----------------------NoUnwind Function Attribute-------------------------- +namespace { struct AANoUnwindImpl : AANoUnwind { AANoUnwindImpl(const IRPosition &IRP, Attributor &A) : AANoUnwind(IRP, A) {} @@ -1693,9 +1761,11 @@ struct AANoUnwindCallSite final : AANoUnwindImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); } }; +} // namespace /// --------------------- Function Return Values ------------------------------- +namespace { /// "Attribute" that collects all potential returned values and the return /// instructions that they arise from. /// @@ -1821,7 +1891,7 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) { // Check if we have an assumed unique return value that we could manifest. Optional UniqueRV = getAssumedUniqueReturnValue(A); - if (!UniqueRV.hasValue() || !UniqueRV.getValue()) + if (!UniqueRV || !UniqueRV.getValue()) return Changed; // Bookkeeping. @@ -1893,17 +1963,18 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { return true; }; + bool UsedAssumedInformation = false; auto ReturnInstCB = [&](Instruction &I) { ReturnInst &Ret = cast(I); return genericValueTraversal( A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB, - &I, /* UseValueSimplify */ true, /* MaxValues */ 16, - /* StripCB */ nullptr, /* Intraprocedural */ true); + &I, UsedAssumedInformation, /* UseValueSimplify */ true, + /* MaxValues */ 16, + /* StripCB */ nullptr, AA::Intraprocedural); }; // Discover returned values from all live returned instructions in the // associated function. - bool UsedAssumedInformation = false; if (!A.checkForAllInstructions(ReturnInstCB, *this, {Instruction::Ret}, UsedAssumedInformation)) return indicatePessimisticFixpoint(); @@ -1941,20 +2012,10 @@ struct AAReturnedValuesCallSite final : AAReturnedValuesImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override {} }; +} // namespace /// ------------------------ NoSync Function Attribute ------------------------- -struct AANoSyncImpl : AANoSync { - AANoSyncImpl(const IRPosition &IRP, Attributor &A) : AANoSync(IRP, A) {} - - const std::string getAsStr() const override { - return getAssumed() ? "nosync" : "may-sync"; - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; -}; - bool AANoSync::isNonRelaxedAtomic(const Instruction *I) { if (!I->isAtomic()) return false; @@ -1997,6 +2058,18 @@ bool AANoSync::isNoSyncIntrinsic(const Instruction *I) { return false; } +namespace { +struct AANoSyncImpl : AANoSync { + AANoSyncImpl(const IRPosition &IRP, Attributor &A) : AANoSync(IRP, A) {} + + const std::string getAsStr() const override { + return getAssumed() ? "nosync" : "may-sync"; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; +}; + ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) { auto CheckRWInstForNoSync = [&](Instruction &I) { @@ -2059,9 +2132,11 @@ struct AANoSyncCallSite final : AANoSyncImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); } }; +} // namespace /// ------------------------ No-Free Attributes ---------------------------- +namespace { struct AANoFreeImpl : public AANoFree { AANoFreeImpl(const IRPosition &IRP, Attributor &A) : AANoFree(IRP, A) {} @@ -2243,8 +2318,10 @@ struct AANoFreeCallSiteReturned final : AANoFreeFloating { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nofree) } }; +} // namespace /// ------------------------ NonNull Argument Attribute ------------------------ +namespace { static int64_t getKnownNonNullAndDerefBytesForUse( Attributor &A, const AbstractAttribute &QueryingAA, Value &AssociatedValue, const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) { @@ -2332,7 +2409,7 @@ struct AANonNullImpl : AANonNull { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - Value &V = getAssociatedValue(); + Value &V = *getAssociatedValue().stripPointerCasts(); if (!NullIsDefined && hasAttr({Attribute::NonNull, Attribute::Dereferenceable}, /* IgnoreSubsumingPositions */ false, &A)) { @@ -2356,7 +2433,7 @@ struct AANonNullImpl : AANonNull { } } - if (isa(&getAssociatedValue())) { + if (isa(V)) { indicatePessimisticFixpoint(); return; } @@ -2419,8 +2496,10 @@ struct AANonNullFloating : public AANonNullImpl { }; StateType T; + bool UsedAssumedInformation = false; if (!genericValueTraversal(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI())) + VisitValueCB, getCtxI(), + UsedAssumedInformation)) return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); @@ -2472,9 +2551,11 @@ struct AANonNullCallSiteReturned final /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) } }; +} // namespace /// ------------------------ No-Recurse Attributes ---------------------------- +namespace { struct AANoRecurseImpl : public AANoRecurse { AANoRecurseImpl(const IRPosition &IRP, Attributor &A) : AANoRecurse(IRP, A) {} @@ -2498,14 +2579,15 @@ struct AANoRecurseFunction final : AANoRecurseImpl { DepClassTy::NONE); return NoRecurseAA.isKnownNoRecurse(); }; - bool AllCallSitesKnown; - if (A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown)) { + bool UsedAssumedInformation = false; + if (A.checkForAllCallSites(CallSitePred, *this, true, + UsedAssumedInformation)) { // If we know all call sites and all are known no-recurse, we are done. // If all known call sites, which might not be all that exist, are known // to be no-recurse, we are not done but we can continue to assume // no-recurse. If one of the call sites we have not visited will become // live, another update is triggered. - if (AllCallSitesKnown) + if (!UsedAssumedInformation) indicateOptimisticFixpoint(); return ChangeStatus::UNCHANGED; } @@ -2549,9 +2631,11 @@ struct AANoRecurseCallSite final : AANoRecurseImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); } }; +} // namespace /// -------------------- Undefined-Behavior Attributes ------------------------ +namespace { struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { AAUndefinedBehaviorImpl(const IRPosition &IRP, Attributor &A) : AAUndefinedBehavior(IRP, A) {} @@ -2582,7 +2666,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { // Either we stopped and the appropriate action was taken, // or we got back a simplified value to continue. Optional SimplifiedPtrOp = stopOnUndefOrAssumed(A, PtrOp, &I); - if (!SimplifiedPtrOp.hasValue() || !SimplifiedPtrOp.getValue()) + if (!SimplifiedPtrOp || !SimplifiedPtrOp.getValue()) return true; const Value *PtrOpVal = SimplifiedPtrOp.getValue(); @@ -2627,7 +2711,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { // or we got back a simplified value to continue. Optional SimplifiedCond = stopOnUndefOrAssumed(A, BrInst->getCondition(), BrInst); - if (!SimplifiedCond.hasValue() || !SimplifiedCond.getValue()) + if (!SimplifiedCond || !*SimplifiedCond) return true; AssumedNoUBInsts.insert(&I); return true; @@ -2673,10 +2757,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { IRPosition::value(*ArgVal), *this, UsedAssumedInformation); if (UsedAssumedInformation) continue; - if (SimplifiedVal.hasValue() && !SimplifiedVal.getValue()) + if (SimplifiedVal && !SimplifiedVal.getValue()) return true; - if (!SimplifiedVal.hasValue() || - isa(*SimplifiedVal.getValue())) { + if (!SimplifiedVal || isa(*SimplifiedVal.getValue())) { KnownUBInsts.insert(&I); continue; } @@ -2691,40 +2774,38 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { return true; }; - auto InspectReturnInstForUB = - [&](Value &V, const SmallSetVector RetInsts) { - // Check if a return instruction always cause UB or not - // Note: It is guaranteed that the returned position of the anchor - // scope has noundef attribute when this is called. - // We also ensure the return position is not "assumed dead" - // because the returned value was then potentially simplified to - // `undef` in AAReturnedValues without removing the `noundef` - // attribute yet. - - // When the returned position has noundef attriubte, UB occur in the - // following cases. - // (1) Returned value is known to be undef. - // (2) The value is known to be a null pointer and the returned - // position has nonnull attribute (because the returned value is - // poison). - bool FoundUB = false; - if (isa(V)) { - FoundUB = true; - } else { - if (isa(V)) { - auto &NonNullAA = A.getAAFor( - *this, IRPosition::returned(*getAnchorScope()), - DepClassTy::NONE); - if (NonNullAA.isKnownNonNull()) - FoundUB = true; - } - } + auto InspectReturnInstForUB = [&](Instruction &I) { + auto &RI = cast(I); + // Either we stopped and the appropriate action was taken, + // or we got back a simplified return value to continue. + Optional SimplifiedRetValue = + stopOnUndefOrAssumed(A, RI.getReturnValue(), &I); + if (!SimplifiedRetValue || !*SimplifiedRetValue) + return true; - if (FoundUB) - for (ReturnInst *RI : RetInsts) - KnownUBInsts.insert(RI); - return true; - }; + // Check if a return instruction always cause UB or not + // Note: It is guaranteed that the returned position of the anchor + // scope has noundef attribute when this is called. + // We also ensure the return position is not "assumed dead" + // because the returned value was then potentially simplified to + // `undef` in AAReturnedValues without removing the `noundef` + // attribute yet. + + // When the returned position has noundef attriubte, UB occurs in the + // following cases. + // (1) Returned value is known to be undef. + // (2) The value is known to be a null pointer and the returned + // position has nonnull attribute (because the returned value is + // poison). + if (isa(*SimplifiedRetValue)) { + auto &NonNullAA = A.getAAFor( + *this, IRPosition::returned(*getAnchorScope()), DepClassTy::NONE); + if (NonNullAA.isKnownNonNull()) + KnownUBInsts.insert(&I); + } + + return true; + }; bool UsedAssumedInformation = false; A.checkForAllInstructions(InspectMemAccessInstForUB, *this, @@ -2747,8 +2828,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { auto &RetPosNoUndefAA = A.getAAFor(*this, ReturnIRP, DepClassTy::NONE); if (RetPosNoUndefAA.isKnownNoUndef()) - A.checkForAllReturnedValuesAndReturnInsts(InspectReturnInstForUB, - *this); + A.checkForAllInstructions(InspectReturnInstForUB, *this, + {Instruction::Ret}, UsedAssumedInformation, + /* CheckBBLivenessOnly */ true); } } @@ -2776,7 +2858,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { case Instruction::AtomicRMW: return !AssumedNoUBInsts.count(I); case Instruction::Br: { - auto BrInst = cast(I); + auto *BrInst = cast(I); if (BrInst->isUnconditional()) return false; return !AssumedNoUBInsts.count(I); @@ -2847,13 +2929,13 @@ private: IRPosition::value(*V), *this, UsedAssumedInformation); if (!UsedAssumedInformation) { // Don't depend on assumed values. - if (!SimplifiedV.hasValue()) { + if (!SimplifiedV) { // If it is known (which we tested above) but it doesn't have a value, // then we can assume `undef` and hence the instruction is UB. KnownUBInsts.insert(I); return llvm::None; } - if (!SimplifiedV.getValue()) + if (!*SimplifiedV) return nullptr; V = *SimplifiedV; } @@ -2877,9 +2959,11 @@ struct AAUndefinedBehaviorFunction final : AAUndefinedBehaviorImpl { KnownUBInsts.size(); } }; +} // namespace /// ------------------------ Will-Return Attributes ---------------------------- +namespace { // Helper function that checks whether a function has any cycle which we don't // know if it is bounded or not. // Loops with maximum trip count are considered bounded, any other cycle not. @@ -3018,9 +3102,11 @@ struct AAWillReturnCallSite final : AAWillReturnImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); } }; +} // namespace /// -------------------AAReachability Attribute-------------------------- +namespace { struct AAReachabilityImpl : AAReachability { AAReachabilityImpl(const IRPosition &IRP, Attributor &A) : AAReachability(IRP, A) {} @@ -3032,10 +3118,6 @@ struct AAReachabilityImpl : AAReachability { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { - const auto &NoRecurseAA = A.getAAFor( - *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); - if (!NoRecurseAA.isAssumedNoRecurse()) - return indicatePessimisticFixpoint(); return ChangeStatus::UNCHANGED; } }; @@ -3047,9 +3129,11 @@ struct AAReachabilityFunction final : public AAReachabilityImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(reachable); } }; +} // namespace /// ------------------------ NoAlias Argument Attribute ------------------------ +namespace { struct AANoAliasImpl : AANoAlias { AANoAliasImpl(const IRPosition &IRP, Attributor &A) : AANoAlias(IRP, A) { assert(getAssociatedType()->isPointerTy() && @@ -3146,10 +3230,10 @@ struct AANoAliasArgument final // If the argument is never passed through callbacks, no-alias cannot break // synchronization. - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; if (A.checkForAllCallSites( [](AbstractCallSite ACS) { return !ACS.isCallbackCall(); }, *this, - true, AllCallSitesKnown)) + true, UsedAssumedInformation)) return Base::updateImpl(A); // TODO: add no-alias but make sure it doesn't break synchronization by @@ -3246,14 +3330,20 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { return false; } + auto IsDereferenceableOrNull = [&](Value *O, const DataLayout &DL) { + const auto &DerefAA = A.getAAFor( + *this, IRPosition::value(*O), DepClassTy::OPTIONAL); + return DerefAA.getAssumedDereferenceableBytes(); + }; + A.recordDependence(NoAliasAA, *this, DepClassTy::OPTIONAL); const IRPosition &VIRP = IRPosition::value(getAssociatedValue()); const Function *ScopeFn = VIRP.getAnchorScope(); auto &NoCaptureAA = A.getAAFor(*this, VIRP, DepClassTy::NONE); // Check whether the value is captured in the scope using AANoCapture. - // Look at CFG and check only uses possibly executed before this - // callsite. + // Look at CFG and check only uses possibly executed before this + // callsite. auto UsePred = [&](const Use &U, bool &Follow) -> bool { Instruction *UserI = cast(U.getUser()); @@ -3265,12 +3355,6 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { return true; if (ScopeFn) { - const auto &ReachabilityAA = A.getAAFor( - *this, IRPosition::function(*ScopeFn), DepClassTy::OPTIONAL); - - if (!ReachabilityAA.isAssumedReachable(A, *UserI, *getCtxI())) - return true; - if (auto *CB = dyn_cast(UserI)) { if (CB->isArgOperand(&U)) { @@ -3284,17 +3368,26 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { return true; } } + + if (!AA::isPotentiallyReachable(A, *UserI, *getCtxI(), *this)) + return true; } - // For cases which can potentially have more users - if (isa(U) || isa(U) || isa(U) || - isa(U)) { + // TODO: We should track the capturing uses in AANoCapture but the problem + // is CGSCC runs. For those we would need to "allow" AANoCapture for + // a value in the module slice. + switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) { + case UseCaptureKind::NO_CAPTURE: + return true; + case UseCaptureKind::MAY_CAPTURE: + LLVM_DEBUG(dbgs() << "[AANoAliasCSArg] Unknown user: " << *UserI + << "\n"); + return false; + case UseCaptureKind::PASSTHROUGH: Follow = true; return true; } - - LLVM_DEBUG(dbgs() << "[AANoAliasCSArg] Unknown user: " << *U << "\n"); - return false; + llvm_unreachable("unknown UseCaptureKind"); }; if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) { @@ -3423,12 +3516,21 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); } }; +} // namespace /// -------------------AAIsDead Function Attribute----------------------- +namespace { struct AAIsDeadValueImpl : public AAIsDead { AAIsDeadValueImpl(const IRPosition &IRP, Attributor &A) : AAIsDead(IRP, A) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (auto *Scope = getAnchorScope()) + if (!A.isRunOn(*Scope)) + indicatePessimisticFixpoint(); + } + /// See AAIsDead::isAssumedDead(). bool isAssumedDead() const override { return isAssumed(IS_DEAD); } @@ -3452,22 +3554,25 @@ struct AAIsDeadValueImpl : public AAIsDead { } /// See AbstractAttribute::getAsStr(). - const std::string getAsStr() const override { + virtual const std::string getAsStr() const override { return isAssumedDead() ? "assumed-dead" : "assumed-live"; } /// Check if all uses are assumed dead. bool areAllUsesAssumedDead(Attributor &A, Value &V) { // Callers might not check the type, void has no uses. - if (V.getType()->isVoidTy()) + if (V.getType()->isVoidTy() || V.use_empty()) return true; // If we replace a value with a constant there are no uses left afterwards. if (!isa(V)) { + if (auto *I = dyn_cast(&V)) + if (!A.isRunOn(*I->getFunction())) + return false; bool UsedAssumedInformation = false; Optional C = A.getAssumedConstant(V, *this, UsedAssumedInformation); - if (!C.hasValue() || *C) + if (!C || *C) return true; } @@ -3477,7 +3582,8 @@ struct AAIsDeadValueImpl : public AAIsDead { // without going through N update cycles. This is not required for // correctness. return A.checkForAllUses(UsePred, *this, V, /* CheckBBLivenessOnly */ false, - DepClassTy::REQUIRED); + DepClassTy::REQUIRED, + /* IgnoreDroppableUses */ false); } /// Determine if \p I is assumed to be side-effect free. @@ -3508,6 +3614,8 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { + AAIsDeadValueImpl::initialize(A); + if (isa(getAssociatedValue())) { indicatePessimisticFixpoint(); return; @@ -3538,6 +3646,15 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl { }); } + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + Instruction *I = dyn_cast(&getAssociatedValue()); + if (isa_and_nonnull(I)) + if (isValidState()) + return "assumed-dead-store"; + return AAIsDeadValueImpl::getAsStr(); + } + /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { Instruction *I = dyn_cast(&getAssociatedValue()); @@ -3553,6 +3670,10 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl { return ChangeStatus::UNCHANGED; } + bool isRemovableStore() const override { + return isAssumed(IS_REMOVABLE) && isa(&getAssociatedValue()); + } + /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { Value &V = getAssociatedValue(); @@ -3567,21 +3688,7 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl { return ChangeStatus::CHANGED; } } - if (V.use_empty()) - return ChangeStatus::UNCHANGED; - - bool UsedAssumedInformation = false; - Optional C = - A.getAssumedConstant(V, *this, UsedAssumedInformation); - if (C.hasValue() && C.getValue()) - return ChangeStatus::UNCHANGED; - - // Replace the value with undef as it is dead but keep droppable uses around - // as they provide information we don't want to give up on just yet. - UndefValue &UV = *UndefValue::get(V.getType()); - bool AnyChange = - A.changeValueAfterManifest(V, UV, /* ChangeDropppable */ false); - return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; + return ChangeStatus::UNCHANGED; } /// See AbstractAttribute::trackStatistics() @@ -3596,23 +3703,22 @@ struct AAIsDeadArgument : public AAIsDeadFloating { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { + AAIsDeadFloating::initialize(A); if (!A.isFunctionIPOAmendable(*getAnchorScope())) indicatePessimisticFixpoint(); } /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { - ChangeStatus Changed = AAIsDeadFloating::manifest(A); Argument &Arg = *getAssociatedArgument(); if (A.isValidFunctionSignatureRewrite(Arg, /* ReplacementTypes */ {})) if (A.registerFunctionSignatureRewrite( Arg, /* ReplacementTypes */ {}, Attributor::ArgumentReplacementInfo::CalleeRepairCBTy{}, Attributor::ArgumentReplacementInfo::ACSRepairCBTy{})) { - Arg.dropDroppableUses(); return ChangeStatus::CHANGED; } - return Changed; + return ChangeStatus::UNCHANGED; } /// See AbstractAttribute::trackStatistics() @@ -3625,6 +3731,7 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { + AAIsDeadValueImpl::initialize(A); if (isa(getAssociatedValue())) indicatePessimisticFixpoint(); } @@ -3661,7 +3768,7 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl { struct AAIsDeadCallSiteReturned : public AAIsDeadFloating { AAIsDeadCallSiteReturned(const IRPosition &IRP, Attributor &A) - : AAIsDeadFloating(IRP, A), IsAssumedSideEffectFree(true) {} + : AAIsDeadFloating(IRP, A) {} /// See AAIsDead::isAssumedDead(). bool isAssumedDead() const override { @@ -3670,6 +3777,7 @@ struct AAIsDeadCallSiteReturned : public AAIsDeadFloating { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { + AAIsDeadFloating::initialize(A); if (isa(getAssociatedValue())) { indicatePessimisticFixpoint(); return; @@ -3707,7 +3815,7 @@ struct AAIsDeadCallSiteReturned : public AAIsDeadFloating { } private: - bool IsAssumedSideEffectFree; + bool IsAssumedSideEffectFree = true; }; struct AAIsDeadReturned : public AAIsDeadValueImpl { @@ -3727,9 +3835,8 @@ struct AAIsDeadReturned : public AAIsDeadValueImpl { return areAllUsesAssumedDead(A, *ACS.getInstruction()); }; - bool AllCallSitesKnown; if (!A.checkForAllCallSites(PredForCallSite, *this, true, - AllCallSitesKnown)) + UsedAssumedInformation)) return indicatePessimisticFixpoint(); return ChangeStatus::UNCHANGED; @@ -3761,17 +3868,13 @@ struct AAIsDeadFunction : public AAIsDead { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - const Function *F = getAnchorScope(); - if (F && !F->isDeclaration()) { - // We only want to compute liveness once. If the function is not part of - // the SCC, skip it. - if (A.isRunOn(*const_cast(F))) { - ToBeExploredFrom.insert(&F->getEntryBlock().front()); - assumeLive(A, F->getEntryBlock()); - } else { - indicatePessimisticFixpoint(); - } + Function *F = getAnchorScope(); + if (!F || F->isDeclaration() || !A.isRunOn(*F)) { + indicatePessimisticFixpoint(); + return; } + ToBeExploredFrom.insert(&F->getEntryBlock().front()); + assumeLive(A, F->getEntryBlock()); } /// See AbstractAttribute::getAsStr(). @@ -3834,6 +3937,9 @@ struct AAIsDeadFunction : public AAIsDead { ChangeStatus updateImpl(Attributor &A) override; bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const override { + assert(From->getParent() == getAnchorScope() && + To->getParent() == getAnchorScope() && + "Used AAIsDead of the wrong function"); return isValidState() && !AssumedLiveEdges.count(std::make_pair(From, To)); } @@ -3973,7 +4079,7 @@ identifyAliveSuccessors(Attributor &A, const BranchInst &BI, } else { Optional C = A.getAssumedConstant(*BI.getCondition(), AA, UsedAssumedInformation); - if (!C.hasValue() || isa_and_nonnull(C.getValue())) { + if (!C || isa_and_nonnull(*C)) { // No value yet, assume both edges are dead. } else if (isa_and_nonnull(*C)) { const BasicBlock *SuccBB = @@ -3995,7 +4101,7 @@ identifyAliveSuccessors(Attributor &A, const SwitchInst &SI, bool UsedAssumedInformation = false; Optional C = A.getAssumedConstant(*SI.getCondition(), AA, UsedAssumedInformation); - if (!C.hasValue() || isa_and_nonnull(C.getValue())) { + if (!C || isa_and_nonnull(C.getValue())) { // No value yet, assume all edges are dead. } else if (isa_and_nonnull(C.getValue())) { for (auto &CaseIt : SI.cases()) { @@ -4142,9 +4248,11 @@ struct AAIsDeadCallSite final : AAIsDeadFunction { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override {} }; +} // namespace /// -------------------- Dereferenceable Argument Attribute -------------------- +namespace { struct AADereferenceableImpl : AADereferenceable { AADereferenceableImpl(const IRPosition &IRP, Attributor &A) : AADereferenceable(IRP, A) {} @@ -4152,6 +4260,7 @@ struct AADereferenceableImpl : AADereferenceable { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { + Value &V = *getAssociatedValue().stripPointerCasts(); SmallVector Attrs; getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull}, Attrs, /* IgnoreSubsumingPositions */ false, &A); @@ -4162,9 +4271,8 @@ struct AADereferenceableImpl : AADereferenceable { NonNullAA = &A.getAAFor(*this, IRP, DepClassTy::NONE); bool CanBeNull, CanBeFreed; - takeKnownDerefBytesMaximum( - IRP.getAssociatedValue().getPointerDereferenceableBytes( - A.getDataLayout(), CanBeNull, CanBeFreed)); + takeKnownDerefBytesMaximum(V.getPointerDereferenceableBytes( + A.getDataLayout(), CanBeNull, CanBeFreed)); bool IsFnInterface = IRP.isFnInterfaceKind(); Function *FnScope = IRP.getAnchorScope(); @@ -4263,8 +4371,9 @@ struct AADereferenceableFloating : AADereferenceableImpl { unsigned IdxWidth = DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace()); APInt Offset(IdxWidth, 0); - const Value *Base = - stripAndAccumulateMinimalOffsets(A, *this, &V, DL, Offset, false); + const Value *Base = stripAndAccumulateOffsets( + A, *this, &V, DL, Offset, /* GetMinOffset */ false, + /* AllowNonInbounds */ true); const auto &AA = A.getAAFor( *this, IRPosition::value(*Base), DepClassTy::REQUIRED); @@ -4312,8 +4421,10 @@ struct AADereferenceableFloating : AADereferenceableImpl { }; DerefState T; + bool UsedAssumedInformation = false; if (!genericValueTraversal(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI())) + VisitValueCB, getCtxI(), + UsedAssumedInformation)) return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); @@ -4377,9 +4488,11 @@ struct AADereferenceableCallSiteReturned final STATS_DECLTRACK_CS_ATTR(dereferenceable); } }; +} // namespace // ------------------------ Align Argument Attribute ------------------------ +namespace { static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA, Value &AssociatedValue, const Use *U, const Instruction *I, bool &TrackUse) { @@ -4450,14 +4563,8 @@ struct AAAlignImpl : AAAlign { for (const Attribute &Attr : Attrs) takeKnownMaximum(Attr.getValueAsInt()); - Value &V = getAssociatedValue(); - // TODO: This is a HACK to avoid getPointerAlignment to introduce a ptr2int - // use of the function pointer. This was caused by D73131. We want to - // avoid this for function pointers especially because we iterate - // their uses and int2ptr is not handled. It is not a correctness - // problem though! - if (!V.getType()->getPointerElementType()->isFunctionTy()) - takeKnownMaximum(V.getPointerAlignment(A.getDataLayout()).value()); + Value &V = *getAssociatedValue().stripPointerCasts(); + takeKnownMaximum(V.getPointerAlignment(A.getDataLayout()).value()); if (getIRPosition().isFnInterfaceKind() && (!getAnchorScope() || @@ -4479,16 +4586,16 @@ struct AAAlignImpl : AAAlign { for (const Use &U : AssociatedValue.uses()) { if (auto *SI = dyn_cast(U.getUser())) { if (SI->getPointerOperand() == &AssociatedValue) - if (SI->getAlignment() < getAssumedAlign()) { + if (SI->getAlign() < getAssumedAlign()) { STATS_DECLTRACK(AAAlign, Store, "Number of times alignment added to a store"); - SI->setAlignment(Align(getAssumedAlign())); + SI->setAlignment(getAssumedAlign()); LoadStoreChanged = ChangeStatus::CHANGED; } } else if (auto *LI = dyn_cast(U.getUser())) { if (LI->getPointerOperand() == &AssociatedValue) - if (LI->getAlignment() < getAssumedAlign()) { - LI->setAlignment(Align(getAssumedAlign())); + if (LI->getAlign() < getAssumedAlign()) { + LI->setAlignment(getAssumedAlign()); STATS_DECLTRACK(AAAlign, Load, "Number of times alignment added to a load"); LoadStoreChanged = ChangeStatus::CHANGED; @@ -4532,9 +4639,8 @@ struct AAAlignImpl : AAAlign { /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { - return getAssumedAlign() ? ("align<" + std::to_string(getKnownAlign()) + - "-" + std::to_string(getAssumedAlign()) + ">") - : "unknown-align"; + return "align<" + std::to_string(getKnownAlign().value()) + "-" + + std::to_string(getAssumedAlign().value()) + ">"; } }; @@ -4548,6 +4654,8 @@ struct AAAlignFloating : AAAlignImpl { auto VisitValueCB = [&](Value &V, const Instruction *, AAAlign::StateType &T, bool Stripped) -> bool { + if (isa(V) || isa(V)) + return true; const auto &AA = A.getAAFor(*this, IRPosition::value(V), DepClassTy::REQUIRED); if (!Stripped && this == &AA) { @@ -4555,6 +4663,7 @@ struct AAAlignFloating : AAAlignImpl { unsigned Alignment = 1; if (const Value *Base = GetPointerBaseWithConstantOffset(&V, Offset, DL)) { + // TODO: Use AAAlign for the base too. Align PA = Base->getPointerAlignment(DL); // BasePointerAddr + Offset = Alignment * Q for some integer Q. // So we can say that the maximum power of two which is a divisor of @@ -4578,8 +4687,10 @@ struct AAAlignFloating : AAAlignImpl { }; StateType T; + bool UsedAssumedInformation = false; if (!genericValueTraversal(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI())) + VisitValueCB, getCtxI(), + UsedAssumedInformation)) return indicatePessimisticFixpoint(); // TODO: If we know we visited all incoming values, thus no are assumed @@ -4657,7 +4768,7 @@ struct AAAlignCallSiteArgument final : AAAlignFloating { // so we do not need to track a dependence. const auto &ArgAlignAA = A.getAAFor( *this, IRPosition::argument(*Arg), DepClassTy::NONE); - takeKnownMaximum(ArgAlignAA.getKnownAlign()); + takeKnownMaximum(ArgAlignAA.getKnownAlign().value()); } return Changed; } @@ -4684,8 +4795,10 @@ struct AAAlignCallSiteReturned final /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); } }; +} // namespace /// ------------------ Function No-Return Attribute ---------------------------- +namespace { struct AANoReturnImpl : public AANoReturn { AANoReturnImpl(const IRPosition &IRP, Attributor &A) : AANoReturn(IRP, A) {} @@ -4712,31 +4825,175 @@ struct AANoReturnImpl : public AANoReturn { return indicatePessimisticFixpoint(); return ChangeStatus::UNCHANGED; } -}; - -struct AANoReturnFunction final : AANoReturnImpl { - AANoReturnFunction(const IRPosition &IRP, Attributor &A) - : AANoReturnImpl(IRP, A) {} +}; + +struct AANoReturnFunction final : AANoReturnImpl { + AANoReturnFunction(const IRPosition &IRP, Attributor &A) + : AANoReturnImpl(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(noreturn) } +}; + +/// NoReturn attribute deduction for a call sites. +struct AANoReturnCallSite final : AANoReturnImpl { + AANoReturnCallSite(const IRPosition &IRP, Attributor &A) + : AANoReturnImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoReturnImpl::initialize(A); + if (Function *F = getAssociatedFunction()) { + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); + if (!FnAA.isAssumedNoReturn()) + indicatePessimisticFixpoint(); + } + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); + return clampStateAndIndicateChange(getState(), FnAA.getState()); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); } +}; +} // namespace + +/// ----------------------- Instance Info --------------------------------- + +namespace { +/// A class to hold the state of for no-capture attributes. +struct AAInstanceInfoImpl : public AAInstanceInfo { + AAInstanceInfoImpl(const IRPosition &IRP, Attributor &A) + : AAInstanceInfo(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Value &V = getAssociatedValue(); + if (auto *C = dyn_cast(&V)) { + if (C->isThreadDependent()) + indicatePessimisticFixpoint(); + else + indicateOptimisticFixpoint(); + return; + } + if (auto *CB = dyn_cast(&V)) + if (CB->arg_size() == 0 && !CB->mayHaveSideEffects() && + !CB->mayReadFromMemory()) { + indicateOptimisticFixpoint(); + return; + } + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + Value &V = getAssociatedValue(); + const Function *Scope = nullptr; + if (auto *I = dyn_cast(&V)) + Scope = I->getFunction(); + if (auto *A = dyn_cast(&V)) { + Scope = A->getParent(); + if (!Scope->hasLocalLinkage()) + return Changed; + } + if (!Scope) + return indicateOptimisticFixpoint(); + + auto &NoRecurseAA = A.getAAFor( + *this, IRPosition::function(*Scope), DepClassTy::OPTIONAL); + if (NoRecurseAA.isAssumedNoRecurse()) + return Changed; + + auto UsePred = [&](const Use &U, bool &Follow) { + const Instruction *UserI = dyn_cast(U.getUser()); + if (!UserI || isa(UserI) || isa(UserI) || + isa(UserI) || isa(UserI)) { + Follow = true; + return true; + } + if (isa(UserI) || isa(UserI) || + (isa(UserI) && + cast(UserI)->getValueOperand() != U.get())) + return true; + if (auto *CB = dyn_cast(UserI)) { + // This check is not guaranteeing uniqueness but for now that we cannot + // end up with two versions of \p U thinking it was one. + if (!CB->getCalledFunction() || + !CB->getCalledFunction()->hasLocalLinkage()) + return true; + if (!CB->isArgOperand(&U)) + return false; + const auto &ArgInstanceInfoAA = A.getAAFor( + *this, IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U)), + DepClassTy::OPTIONAL); + if (!ArgInstanceInfoAA.isAssumedUniqueForAnalysis()) + return false; + // If this call base might reach the scope again we might forward the + // argument back here. This is very conservative. + if (AA::isPotentiallyReachable(A, *CB, *Scope, *this, nullptr)) + return false; + return true; + } + return false; + }; + + auto EquivalentUseCB = [&](const Use &OldU, const Use &NewU) { + if (auto *SI = dyn_cast(OldU.getUser())) { + auto *Ptr = SI->getPointerOperand()->stripPointerCasts(); + if (isa(Ptr) && AA::isDynamicallyUnique(A, *this, *Ptr)) + return true; + auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction( + *SI->getFunction()); + if (isAllocationFn(Ptr, TLI) && AA::isDynamicallyUnique(A, *this, *Ptr)) + return true; + } + return false; + }; + + if (!A.checkForAllUses(UsePred, *this, V, /* CheckBBLivenessOnly */ true, + DepClassTy::OPTIONAL, + /* IgnoreDroppableUses */ true, EquivalentUseCB)) + return indicatePessimisticFixpoint(); + + return Changed; + } + + /// See AbstractState::getAsStr(). + const std::string getAsStr() const override { + return isAssumedUniqueForAnalysis() ? "" : ""; + } /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(noreturn) } + void trackStatistics() const override {} }; -/// NoReturn attribute deduction for a call sites. -struct AANoReturnCallSite final : AANoReturnImpl { - AANoReturnCallSite(const IRPosition &IRP, Attributor &A) - : AANoReturnImpl(IRP, A) {} +/// InstanceInfo attribute for floating values. +struct AAInstanceInfoFloating : AAInstanceInfoImpl { + AAInstanceInfoFloating(const IRPosition &IRP, Attributor &A) + : AAInstanceInfoImpl(IRP, A) {} +}; - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AANoReturnImpl::initialize(A); - if (Function *F = getAssociatedFunction()) { - const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); - if (!FnAA.isAssumedNoReturn()) - indicatePessimisticFixpoint(); - } - } +/// NoCapture attribute for function arguments. +struct AAInstanceInfoArgument final : AAInstanceInfoFloating { + AAInstanceInfoArgument(const IRPosition &IRP, Attributor &A) + : AAInstanceInfoFloating(IRP, A) {} +}; + +/// InstanceInfo attribute for call site arguments. +struct AAInstanceInfoCallSiteArgument final : AAInstanceInfoImpl { + AAInstanceInfoCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAInstanceInfoImpl(IRP, A) {} /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { @@ -4744,18 +5001,44 @@ struct AANoReturnCallSite final : AANoReturnImpl { // call site specific liveness information and then it makes // sense to specialize attributes for call sites arguments instead of // redirecting requests to the callee argument. - Function *F = getAssociatedFunction(); - const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); - return clampStateAndIndicateChange(getState(), FnAA.getState()); + Argument *Arg = getAssociatedArgument(); + if (!Arg) + return indicatePessimisticFixpoint(); + const IRPosition &ArgPos = IRPosition::argument(*Arg); + auto &ArgAA = + A.getAAFor(*this, ArgPos, DepClassTy::REQUIRED); + return clampStateAndIndicateChange(getState(), ArgAA.getState()); + } +}; + +/// InstanceInfo attribute for function return value. +struct AAInstanceInfoReturned final : AAInstanceInfoImpl { + AAInstanceInfoReturned(const IRPosition &IRP, Attributor &A) + : AAInstanceInfoImpl(IRP, A) { + llvm_unreachable("InstanceInfo is not applicable to function returns!"); } - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); } + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + llvm_unreachable("InstanceInfo is not applicable to function returns!"); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("InstanceInfo is not applicable to function returns!"); + } +}; + +/// InstanceInfo attribute deduction for a call site return value. +struct AAInstanceInfoCallSiteReturned final : AAInstanceInfoFloating { + AAInstanceInfoCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AAInstanceInfoFloating(IRP, A) {} }; +} // namespace /// ----------------------- Variable Capturing --------------------------------- +namespace { /// A class to hold the state of for no-capture attributes. struct AANoCaptureImpl : public AANoCapture { AANoCaptureImpl(const IRPosition &IRP, Attributor &A) : AANoCapture(IRP, A) {} @@ -4863,143 +5146,69 @@ struct AANoCaptureImpl : public AANoCapture { return "assumed not-captured-maybe-returned"; return "assumed-captured"; } -}; - -/// Attributor-aware capture tracker. -struct AACaptureUseTracker final : public CaptureTracker { - - /// Create a capture tracker that can lookup in-flight abstract attributes - /// through the Attributor \p A. - /// - /// If a use leads to a potential capture, \p CapturedInMemory is set and the - /// search is stopped. If a use leads to a return instruction, - /// \p CommunicatedBack is set to true and \p CapturedInMemory is not changed. - /// If a use leads to a ptr2int which may capture the value, - /// \p CapturedInInteger is set. If a use is found that is currently assumed - /// "no-capture-maybe-returned", the user is added to the \p PotentialCopies - /// set. All values in \p PotentialCopies are later tracked as well. For every - /// explored use we decrement \p RemainingUsesToExplore. Once it reaches 0, - /// the search is stopped with \p CapturedInMemory and \p CapturedInInteger - /// conservatively set to true. - AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA, - const AAIsDead &IsDeadAA, AANoCapture::StateType &State, - SmallSetVector &PotentialCopies, - unsigned &RemainingUsesToExplore) - : A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State), - PotentialCopies(PotentialCopies), - RemainingUsesToExplore(RemainingUsesToExplore) {} - - /// Determine if \p V maybe captured. *Also updates the state!* - bool valueMayBeCaptured(const Value *V) { - if (V->getType()->isPointerTy()) { - PointerMayBeCaptured(V, this); - } else { - State.indicatePessimisticFixpoint(); - } - return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); - } - - /// See CaptureTracker::tooManyUses(). - void tooManyUses() override { - State.removeAssumedBits(AANoCapture::NO_CAPTURE); - } - - bool isDereferenceableOrNull(Value *O, const DataLayout &DL) override { - if (CaptureTracker::isDereferenceableOrNull(O, DL)) - return true; - const auto &DerefAA = A.getAAFor( - NoCaptureAA, IRPosition::value(*O), DepClassTy::OPTIONAL); - return DerefAA.getAssumedDereferenceableBytes(); - } - - /// See CaptureTracker::captured(...). - bool captured(const Use *U) override { - Instruction *UInst = cast(U->getUser()); - LLVM_DEBUG(dbgs() << "Check use: " << *U->get() << " in " << *UInst - << "\n"); - // Because we may reuse the tracker multiple times we keep track of the - // number of explored uses ourselves as well. - if (RemainingUsesToExplore-- == 0) { - LLVM_DEBUG(dbgs() << " - too many uses to explore!\n"); - return isCapturedIn(/* Memory */ true, /* Integer */ true, - /* Return */ true); - } + /// Check the use \p U and update \p State accordingly. Return true if we + /// should continue to update the state. + bool checkUse(Attributor &A, AANoCapture::StateType &State, const Use &U, + bool &Follow) { + Instruction *UInst = cast(U.getUser()); + LLVM_DEBUG(dbgs() << "[AANoCapture] Check use: " << *U.get() << " in " + << *UInst << "\n"); // Deal with ptr2int by following uses. if (isa(UInst)) { LLVM_DEBUG(dbgs() << " - ptr2int assume the worst!\n"); - return valueMayBeCaptured(UInst); + return isCapturedIn(State, /* Memory */ true, /* Integer */ true, + /* Return */ true); } - // For stores we check if we can follow the value through memory or not. - if (auto *SI = dyn_cast(UInst)) { - if (SI->isVolatile()) - return isCapturedIn(/* Memory */ true, /* Integer */ false, - /* Return */ false); - bool UsedAssumedInformation = false; - if (!AA::getPotentialCopiesOfStoredValue( - A, *SI, PotentialCopies, NoCaptureAA, UsedAssumedInformation)) - return isCapturedIn(/* Memory */ true, /* Integer */ false, - /* Return */ false); - // Not captured directly, potential copies will be checked. - return isCapturedIn(/* Memory */ false, /* Integer */ false, + // For stores we already checked if we can follow them, if they make it + // here we give up. + if (isa(UInst)) + return isCapturedIn(State, /* Memory */ true, /* Integer */ false, /* Return */ false); - } // Explicitly catch return instructions. if (isa(UInst)) { - if (UInst->getFunction() == NoCaptureAA.getAnchorScope()) - return isCapturedIn(/* Memory */ false, /* Integer */ false, + if (UInst->getFunction() == getAnchorScope()) + return isCapturedIn(State, /* Memory */ false, /* Integer */ false, /* Return */ true); - return isCapturedIn(/* Memory */ true, /* Integer */ true, + return isCapturedIn(State, /* Memory */ true, /* Integer */ true, /* Return */ true); } // For now we only use special logic for call sites. However, the tracker // itself knows about a lot of other non-capturing cases already. auto *CB = dyn_cast(UInst); - if (!CB || !CB->isArgOperand(U)) - return isCapturedIn(/* Memory */ true, /* Integer */ true, + if (!CB || !CB->isArgOperand(&U)) + return isCapturedIn(State, /* Memory */ true, /* Integer */ true, /* Return */ true); - unsigned ArgNo = CB->getArgOperandNo(U); + unsigned ArgNo = CB->getArgOperandNo(&U); const IRPosition &CSArgPos = IRPosition::callsite_argument(*CB, ArgNo); // If we have a abstract no-capture attribute for the argument we can use // it to justify a non-capture attribute here. This allows recursion! auto &ArgNoCaptureAA = - A.getAAFor(NoCaptureAA, CSArgPos, DepClassTy::REQUIRED); + A.getAAFor(*this, CSArgPos, DepClassTy::REQUIRED); if (ArgNoCaptureAA.isAssumedNoCapture()) - return isCapturedIn(/* Memory */ false, /* Integer */ false, + return isCapturedIn(State, /* Memory */ false, /* Integer */ false, /* Return */ false); if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) { - addPotentialCopy(*CB); - return isCapturedIn(/* Memory */ false, /* Integer */ false, + Follow = true; + return isCapturedIn(State, /* Memory */ false, /* Integer */ false, /* Return */ false); } // Lastly, we could not find a reason no-capture can be assumed so we don't. - return isCapturedIn(/* Memory */ true, /* Integer */ true, + return isCapturedIn(State, /* Memory */ true, /* Integer */ true, /* Return */ true); } - /// Register \p CS as potential copy of the value we are checking. - void addPotentialCopy(CallBase &CB) { PotentialCopies.insert(&CB); } - - /// See CaptureTracker::shouldExplore(...). - bool shouldExplore(const Use *U) override { - // Check liveness and ignore droppable users. - bool UsedAssumedInformation = false; - return !U->getUser()->isDroppable() && - !A.isAssumedDead(*U, &NoCaptureAA, &IsDeadAA, - UsedAssumedInformation); - } - - /// Update the state according to \p CapturedInMem, \p CapturedInInt, and - /// \p CapturedInRet, then return the appropriate value for use in the - /// CaptureTracker::captured() interface. - bool isCapturedIn(bool CapturedInMem, bool CapturedInInt, - bool CapturedInRet) { + /// Update \p State according to \p CapturedInMem, \p CapturedInInt, and + /// \p CapturedInRet, then return true if we should continue updating the + /// state. + static bool isCapturedIn(AANoCapture::StateType &State, bool CapturedInMem, + bool CapturedInInt, bool CapturedInRet) { LLVM_DEBUG(dbgs() << " - captures [Mem " << CapturedInMem << "|Int " << CapturedInInt << "|Ret " << CapturedInRet << "]\n"); if (CapturedInMem) @@ -5008,27 +5217,8 @@ struct AACaptureUseTracker final : public CaptureTracker { State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_INT); if (CapturedInRet) State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_RET); - return !State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); + return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); } - -private: - /// The attributor providing in-flight abstract attributes. - Attributor &A; - - /// The abstract attribute currently updated. - AANoCapture &NoCaptureAA; - - /// The abstract liveness state. - const AAIsDead &IsDeadAA; - - /// The state currently updated. - AANoCapture::StateType &State; - - /// Set of potential copies of the tracked value. - SmallSetVector &PotentialCopies; - - /// Global counter to limit the number of explored uses. - unsigned &RemainingUsesToExplore; }; ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { @@ -5042,7 +5232,6 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { isArgumentPosition() ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); assert(F && "Expected a function!"); const IRPosition &FnPos = IRPosition::function(*F); - const auto &IsDeadAA = A.getAAFor(*this, FnPos, DepClassTy::NONE); AANoCapture::StateType T; @@ -5059,6 +5248,8 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { // AAReturnedValues, e.g., track all values that escape through returns // directly somehow. auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) { + if (!RVAA.getState().isValidState()) + return false; bool SeenConstant = false; for (auto &It : RVAA.returned_values()) { if (isa(It.first)) { @@ -5094,21 +5285,27 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { } } - // Use the CaptureTracker interface and logic with the specialized tracker, - // defined in AACaptureUseTracker, that can look at in-flight abstract - // attributes and directly updates the assumed state. - SmallSetVector PotentialCopies; - unsigned RemainingUsesToExplore = - getDefaultMaxUsesToExploreForCaptureTracking(); - AACaptureUseTracker Tracker(A, *this, IsDeadAA, T, PotentialCopies, - RemainingUsesToExplore); + auto IsDereferenceableOrNull = [&](Value *O, const DataLayout &DL) { + const auto &DerefAA = A.getAAFor( + *this, IRPosition::value(*O), DepClassTy::OPTIONAL); + return DerefAA.getAssumedDereferenceableBytes(); + }; + + auto UseCheck = [&](const Use &U, bool &Follow) -> bool { + switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) { + case UseCaptureKind::NO_CAPTURE: + return true; + case UseCaptureKind::MAY_CAPTURE: + return checkUse(A, T, U, Follow); + case UseCaptureKind::PASSTHROUGH: + Follow = true; + return true; + } + llvm_unreachable("Unexpected use capture kind!"); + }; - // Check all potential copies of the associated value until we can assume - // none will be captured or we have to assume at least one might be. - unsigned Idx = 0; - PotentialCopies.insert(V); - while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size()) - Tracker.valueMayBeCaptured(PotentialCopies[Idx++]); + if (!A.checkForAllUses(UseCheck, *this, *V)) + return indicatePessimisticFixpoint(); AANoCapture::StateType &S = getState(); auto Assumed = S.getAssumed(); @@ -5208,6 +5405,7 @@ struct AANoCaptureCallSiteReturned final : AANoCaptureImpl { STATS_DECLTRACK_CSRET_ATTR(nocapture) } }; +} // namespace /// ------------------ Value Simplify Attribute ---------------------------- @@ -5219,7 +5417,7 @@ bool ValueSimplifyStateType::unionAssumed(Optional Other) { return false; LLVM_DEBUG({ - if (SimplifiedAssociatedValue.hasValue()) + if (SimplifiedAssociatedValue) dbgs() << "[ValueSimplify] is assumed to be " << **SimplifiedAssociatedValue << "\n"; else @@ -5228,6 +5426,7 @@ bool ValueSimplifyStateType::unionAssumed(Optional Other) { return true; } +namespace { struct AAValueSimplifyImpl : AAValueSimplify { AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A) : AAValueSimplify(IRP, A) {} @@ -5243,9 +5442,9 @@ struct AAValueSimplifyImpl : AAValueSimplify { /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { LLVM_DEBUG({ - errs() << "SAV: " << SimplifiedAssociatedValue << " "; + dbgs() << "SAV: " << (bool)SimplifiedAssociatedValue << " "; if (SimplifiedAssociatedValue && *SimplifiedAssociatedValue) - errs() << "SAV: " << **SimplifiedAssociatedValue << " "; + dbgs() << "SAV: " << **SimplifiedAssociatedValue << " "; }); return isValidState() ? (isAtFixpoint() ? "simplified" : "maybe-simple") : "not-simple"; @@ -5259,24 +5458,101 @@ struct AAValueSimplifyImpl : AAValueSimplify { return SimplifiedAssociatedValue; } + /// Ensure the return value is \p V with type \p Ty, if not possible return + /// nullptr. If \p Check is true we will only verify such an operation would + /// suceed and return a non-nullptr value if that is the case. No IR is + /// generated or modified. + static Value *ensureType(Attributor &A, Value &V, Type &Ty, Instruction *CtxI, + bool Check) { + if (auto *TypedV = AA::getWithType(V, Ty)) + return TypedV; + if (CtxI && V.getType()->canLosslesslyBitCastTo(&Ty)) + return Check ? &V + : BitCastInst::CreatePointerBitCastOrAddrSpaceCast(&V, &Ty, + "", CtxI); + return nullptr; + } + + /// Reproduce \p I with type \p Ty or return nullptr if that is not posisble. + /// If \p Check is true we will only verify such an operation would suceed and + /// return a non-nullptr value if that is the case. No IR is generated or + /// modified. + static Value *reproduceInst(Attributor &A, + const AbstractAttribute &QueryingAA, + Instruction &I, Type &Ty, Instruction *CtxI, + bool Check, ValueToValueMapTy &VMap) { + assert(CtxI && "Cannot reproduce an instruction without context!"); + if (Check && (I.mayReadFromMemory() || + !isSafeToSpeculativelyExecute(&I, CtxI, /* DT */ nullptr, + /* TLI */ nullptr))) + return nullptr; + for (Value *Op : I.operands()) { + Value *NewOp = reproduceValue(A, QueryingAA, *Op, Ty, CtxI, Check, VMap); + if (!NewOp) { + assert(Check && "Manifest of new value unexpectedly failed!"); + return nullptr; + } + if (!Check) + VMap[Op] = NewOp; + } + if (Check) + return &I; + + Instruction *CloneI = I.clone(); + // TODO: Try to salvage debug information here. + CloneI->setDebugLoc(DebugLoc()); + VMap[&I] = CloneI; + CloneI->insertBefore(CtxI); + RemapInstruction(CloneI, VMap); + return CloneI; + } + + /// Reproduce \p V with type \p Ty or return nullptr if that is not posisble. + /// If \p Check is true we will only verify such an operation would suceed and + /// return a non-nullptr value if that is the case. No IR is generated or + /// modified. + static Value *reproduceValue(Attributor &A, + const AbstractAttribute &QueryingAA, Value &V, + Type &Ty, Instruction *CtxI, bool Check, + ValueToValueMapTy &VMap) { + if (const auto &NewV = VMap.lookup(&V)) + return NewV; + bool UsedAssumedInformation = false; + Optional SimpleV = + A.getAssumedSimplified(V, QueryingAA, UsedAssumedInformation); + if (!SimpleV) + return PoisonValue::get(&Ty); + Value *EffectiveV = &V; + if (SimpleV.getValue()) + EffectiveV = SimpleV.getValue(); + if (auto *C = dyn_cast(EffectiveV)) + if (!C->canTrap()) + return C; + if (CtxI && AA::isValidAtPosition(AA::ValueAndContext(*EffectiveV, *CtxI), + A.getInfoCache())) + return ensureType(A, *EffectiveV, Ty, CtxI, Check); + if (auto *I = dyn_cast(EffectiveV)) + if (Value *NewV = reproduceInst(A, QueryingAA, *I, Ty, CtxI, Check, VMap)) + return ensureType(A, *NewV, Ty, CtxI, Check); + return nullptr; + } + /// Return a value we can use as replacement for the associated one, or /// nullptr if we don't have one that makes sense. - Value *getReplacementValue(Attributor &A) const { - Value *NewV; - NewV = SimplifiedAssociatedValue.hasValue() - ? SimplifiedAssociatedValue.getValue() - : UndefValue::get(getAssociatedType()); - if (!NewV) - return nullptr; - NewV = AA::getWithType(*NewV, *getAssociatedType()); - if (!NewV || NewV == &getAssociatedValue()) - return nullptr; - const Instruction *CtxI = getCtxI(); - if (CtxI && !AA::isValidAtPosition(*NewV, *CtxI, A.getInfoCache())) - return nullptr; - if (!CtxI && !AA::isValidInScope(*NewV, getAnchorScope())) - return nullptr; - return NewV; + Value *manifestReplacementValue(Attributor &A, Instruction *CtxI) const { + Value *NewV = SimplifiedAssociatedValue + ? SimplifiedAssociatedValue.getValue() + : UndefValue::get(getAssociatedType()); + if (NewV && NewV != &getAssociatedValue()) { + ValueToValueMapTy VMap; + // First verify we can reprduce the value with the required type at the + // context location before we actually start modifying the IR. + if (reproduceValue(A, *this, *NewV, *getAssociatedType(), CtxI, + /* CheckOnly */ true, VMap)) + return reproduceValue(A, *this, *NewV, *getAssociatedType(), CtxI, + /* CheckOnly */ false, VMap); + } + return nullptr; } /// Helper function for querying AAValueSimplify and updating candicate. @@ -5300,14 +5576,14 @@ struct AAValueSimplifyImpl : AAValueSimplify { const auto &AA = A.getAAFor(*this, getIRPosition(), DepClassTy::NONE); - Optional COpt = AA.getAssumedConstantInt(A); + Optional COpt = AA.getAssumedConstant(A); - if (!COpt.hasValue()) { + if (!COpt) { SimplifiedAssociatedValue = llvm::None; A.recordDependence(AA, *this, DepClassTy::OPTIONAL); return true; } - if (auto *C = COpt.getValue()) { + if (auto *C = *COpt) { SimplifiedAssociatedValue = C; A.recordDependence(AA, *this, DepClassTy::OPTIONAL); return true; @@ -5318,7 +5594,7 @@ struct AAValueSimplifyImpl : AAValueSimplify { bool askSimplifiedValueForOtherAAs(Attributor &A) { if (askSimplifiedValueFor(A)) return true; - if (askSimplifiedValueFor(A)) + if (askSimplifiedValueFor(A)) return true; return false; } @@ -5326,14 +5602,18 @@ struct AAValueSimplifyImpl : AAValueSimplify { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { ChangeStatus Changed = ChangeStatus::UNCHANGED; - if (getAssociatedValue().user_empty()) - return Changed; - - if (auto *NewV = getReplacementValue(A)) { - LLVM_DEBUG(dbgs() << "[ValueSimplify] " << getAssociatedValue() << " -> " - << *NewV << " :: " << *this << "\n"); - if (A.changeValueAfterManifest(getAssociatedValue(), *NewV)) - Changed = ChangeStatus::CHANGED; + for (auto &U : getAssociatedValue().uses()) { + // Check if we need to adjust the insertion point to make sure the IR is + // valid. + Instruction *IP = dyn_cast(U.getUser()); + if (auto *PHI = dyn_cast_or_null(IP)) + IP = PHI->getIncomingBlock(U)->getTerminator(); + if (auto *NewV = manifestReplacementValue(A, IP)) { + LLVM_DEBUG(dbgs() << "[ValueSimplify] " << getAssociatedValue() + << " -> " << *NewV << " :: " << *this << "\n"); + if (A.changeUseAfterManifest(U, *NewV)) + Changed = ChangeStatus::CHANGED; + } } return Changed | AAValueSimplify::manifest(A); @@ -5344,73 +5624,6 @@ struct AAValueSimplifyImpl : AAValueSimplify { SimplifiedAssociatedValue = &getAssociatedValue(); return AAValueSimplify::indicatePessimisticFixpoint(); } - - static bool handleLoad(Attributor &A, const AbstractAttribute &AA, - LoadInst &L, function_ref Union) { - auto UnionWrapper = [&](Value &V, Value &Obj) { - if (isa(Obj)) - return Union(V); - if (!AA::isDynamicallyUnique(A, AA, V)) - return false; - if (!AA::isValidAtPosition(V, L, A.getInfoCache())) - return false; - return Union(V); - }; - - Value &Ptr = *L.getPointerOperand(); - SmallVector Objects; - if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, AA, &L)) - return false; - - const auto *TLI = - A.getInfoCache().getTargetLibraryInfoForFunction(*L.getFunction()); - for (Value *Obj : Objects) { - LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n"); - if (isa(Obj)) - continue; - if (isa(Obj)) { - // A null pointer access can be undefined but any offset from null may - // be OK. We do not try to optimize the latter. - bool UsedAssumedInformation = false; - if (!NullPointerIsDefined(L.getFunction(), - Ptr.getType()->getPointerAddressSpace()) && - A.getAssumedSimplified(Ptr, AA, UsedAssumedInformation) == Obj) - continue; - return false; - } - Constant *InitialVal = AA::getInitialValueForObj(*Obj, *L.getType(), TLI); - if (!InitialVal || !Union(*InitialVal)) - return false; - - LLVM_DEBUG(dbgs() << "Underlying object amenable to load-store " - "propagation, checking accesses next.\n"); - - auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) { - LLVM_DEBUG(dbgs() << " - visit access " << Acc << "\n"); - if (Acc.isWrittenValueYetUndetermined()) - return true; - Value *Content = Acc.getWrittenValue(); - if (!Content) - return false; - Value *CastedContent = - AA::getWithType(*Content, *AA.getAssociatedType()); - if (!CastedContent) - return false; - if (IsExact) - return UnionWrapper(*CastedContent, *Obj); - if (auto *C = dyn_cast(CastedContent)) - if (C->isNullValue() || C->isAllOnesValue() || isa(C)) - return UnionWrapper(*CastedContent, *Obj); - return false; - }; - - auto &PI = A.getAAFor(AA, IRPosition::value(*Obj), - DepClassTy::REQUIRED); - if (!PI.forallInterferingWrites(A, AA, L, CheckAccess)) - return false; - } - return true; - } }; struct AAValueSimplifyArgument final : AAValueSimplifyImpl { @@ -5425,15 +5638,6 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { Attribute::StructRet, Attribute::Nest, Attribute::ByVal}, /* IgnoreSubsumingPositions */ true)) indicatePessimisticFixpoint(); - - // FIXME: This is a hack to prevent us from propagating function poiner in - // the new pass manager CGSCC pass as it creates call edges the - // CallGraphUpdater cannot handle yet. - Value &V = getAssociatedValue(); - if (V.getType()->isPointerTy() && - V.getType()->getPointerElementType()->isFunctionTy() && - !A.isModulePass()) - indicatePessimisticFixpoint(); } /// See AbstractAttribute::updateImpl(...). @@ -5466,7 +5670,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { bool UsedAssumedInformation = false; Optional SimpleArgOp = A.getAssumedConstant(ACSArgPos, *this, UsedAssumedInformation); - if (!SimpleArgOp.hasValue()) + if (!SimpleArgOp) return true; if (!SimpleArgOp.getValue()) return false; @@ -5477,14 +5681,14 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { // Generate a answer specific to a call site context. bool Success; - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; if (hasCallBaseContext() && getCallBaseContext()->getCalledFunction() == Arg->getParent()) Success = PredForCallSite( AbstractCallSite(&getCallBaseContext()->getCalledOperandUse())); else Success = A.checkForAllCallSites(PredForCallSite, *this, true, - AllCallSitesKnown); + UsedAssumedInformation); if (!Success) if (!askSimplifiedValueForOtherAAs(A)) @@ -5516,12 +5720,16 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl { ChangeStatus updateImpl(Attributor &A) override { auto Before = SimplifiedAssociatedValue; - auto PredForReturned = [&](Value &V) { - return checkAndUpdate(A, *this, - IRPosition::value(V, getCallBaseContext())); + auto ReturnInstCB = [&](Instruction &I) { + auto &RI = cast(I); + return checkAndUpdate( + A, *this, + IRPosition::value(*RI.getReturnValue(), getCallBaseContext())); }; - if (!A.checkForAllReturnedValues(PredForReturned, *this)) + bool UsedAssumedInformation = false; + if (!A.checkForAllInstructions(ReturnInstCB, *this, {Instruction::Ret}, + UsedAssumedInformation)) if (!askSimplifiedValueForOtherAAs(A)) return indicatePessimisticFixpoint(); @@ -5531,29 +5739,9 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl { } ChangeStatus manifest(Attributor &A) override { - ChangeStatus Changed = ChangeStatus::UNCHANGED; - - if (auto *NewV = getReplacementValue(A)) { - auto PredForReturned = - [&](Value &, const SmallSetVector &RetInsts) { - for (ReturnInst *RI : RetInsts) { - Value *ReturnedVal = RI->getReturnValue(); - if (ReturnedVal == NewV || isa(ReturnedVal)) - return true; - assert(RI->getFunction() == getAnchorScope() && - "ReturnInst in wrong function!"); - LLVM_DEBUG(dbgs() - << "[ValueSimplify] " << *ReturnedVal << " -> " - << *NewV << " in " << *RI << " :: " << *this << "\n"); - if (A.changeUseAfterManifest(RI->getOperandUse(0), *NewV)) - Changed = ChangeStatus::CHANGED; - } - return true; - }; - A.checkForAllReturnedValuesAndReturnInsts(PredForReturned, *this); - } - - return Changed | AAValueSimplify::manifest(A); + // We queried AAValueSimplify for the returned values so they will be + // replaced if a simplified form was found. Nothing to do here. + return ChangeStatus::UNCHANGED; } /// See AbstractAttribute::trackStatistics() @@ -5597,7 +5785,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { const auto &SimplifiedLHS = A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedLHS.hasValue()) + if (!SimplifiedLHS) return true; if (!SimplifiedLHS.getValue()) return false; @@ -5606,7 +5794,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { const auto &SimplifiedRHS = A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedRHS.hasValue()) + if (!SimplifiedRHS) return true; if (!SimplifiedRHS.getValue()) return false; @@ -5662,15 +5850,6 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { return true; } - bool updateWithLoad(Attributor &A, LoadInst &L) { - auto Union = [&](Value &V) { - SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice( - SimplifiedAssociatedValue, &V, L.getType()); - return SimplifiedAssociatedValue != Optional(nullptr); - }; - return handleLoad(A, *this, L, Union); - } - /// Use the generic, non-optimistic InstSimplfy functionality if we managed to /// simplify any operand of the instruction \p I. Return true if successful, /// in that case SimplifiedAssociatedValue will be updated. @@ -5686,7 +5865,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { *this, UsedAssumedInformation); // If we are not sure about any operand we are not sure about the entire // instruction, we'll wait. - if (!SimplifiedOp.hasValue()) + if (!SimplifiedOp) return true; if (SimplifiedOp.getValue()) @@ -5714,7 +5893,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { const DataLayout &DL = I.getModule()->getDataLayout(); SimplifyQuery Q(DL, TLI, DT, AC, &I); if (Value *SimplifiedI = - SimplifyInstructionWithOperands(&I, NewOps, Q, ORE)) { + simplifyInstructionWithOperands(&I, NewOps, Q, ORE)) { SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice( SimplifiedAssociatedValue, SimplifiedI, I.getType()); return SimplifiedAssociatedValue != Optional(nullptr); @@ -5726,6 +5905,36 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { ChangeStatus updateImpl(Attributor &A) override { auto Before = SimplifiedAssociatedValue; + // Do not simplify loads that are only used in llvm.assume if we cannot also + // remove all stores that may feed into the load. The reason is that the + // assume is probably worth something as long as the stores are around. + if (auto *LI = dyn_cast(&getAssociatedValue())) { + InformationCache &InfoCache = A.getInfoCache(); + if (InfoCache.isOnlyUsedByAssume(*LI)) { + SmallSetVector PotentialCopies; + SmallSetVector PotentialValueOrigins; + bool UsedAssumedInformation = false; + if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies, + PotentialValueOrigins, *this, + UsedAssumedInformation, + /* OnlyExact */ true)) { + if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) { + if (!I) + return true; + if (auto *SI = dyn_cast(I)) + return A.isAssumedDead(SI->getOperandUse(0), this, + /* LivenessAA */ nullptr, + UsedAssumedInformation, + /* CheckBBLivenessOnly */ false); + return A.isAssumedDead(*I, this, /* LivenessAA */ nullptr, + UsedAssumedInformation, + /* CheckBBLivenessOnly */ false); + })) + return indicatePessimisticFixpoint(); + } + } + } + auto VisitValueCB = [&](Value &V, const Instruction *CtxI, bool &, bool Stripped) -> bool { auto &AA = A.getAAFor( @@ -5734,9 +5943,6 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { if (!Stripped && this == &AA) { if (auto *I = dyn_cast(&V)) { - if (auto *LI = dyn_cast(&V)) - if (updateWithLoad(A, *LI)) - return true; if (auto *Cmp = dyn_cast(&V)) if (handleCmp(A, *Cmp)) return true; @@ -5754,8 +5960,10 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { }; bool Dummy = false; + bool UsedAssumedInformation = false; if (!genericValueTraversal(A, getIRPosition(), *this, Dummy, VisitValueCB, getCtxI(), + UsedAssumedInformation, /* UseValueSimplify */ false)) if (!askSimplifiedValueForOtherAAs(A)) return indicatePessimisticFixpoint(); @@ -5806,8 +6014,23 @@ struct AAValueSimplifyCallSiteReturned : AAValueSimplifyImpl { void initialize(Attributor &A) override { AAValueSimplifyImpl::initialize(A); - if (!getAssociatedFunction()) + Function *Fn = getAssociatedFunction(); + if (!Fn) { indicatePessimisticFixpoint(); + return; + } + for (Argument &Arg : Fn->args()) { + if (Arg.hasReturnedAttr()) { + auto IRP = IRPosition::callsite_argument(*cast(getCtxI()), + Arg.getArgNo()); + if (IRP.getPositionKind() == IRPosition::IRP_CALL_SITE_ARGUMENT && + checkAndUpdate(A, *this, IRP)) + indicateOptimisticFixpoint(); + else + indicatePessimisticFixpoint(); + return; + } + } } /// See AbstractAttribute::updateImpl(...). @@ -5845,8 +6068,13 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { ChangeStatus Changed = ChangeStatus::UNCHANGED; + // TODO: We should avoid simplification duplication to begin with. + auto *FloatAA = A.lookupAAFor( + IRPosition::value(getAssociatedValue()), this, DepClassTy::NONE); + if (FloatAA && FloatAA->getState().isValidState()) + return Changed; - if (auto *NewV = getReplacementValue(A)) { + if (auto *NewV = manifestReplacementValue(A, getCtxI())) { Use &U = cast(&getAnchorValue()) ->getArgOperandUse(getCallSiteArgNo()); if (A.changeUseAfterManifest(U, *NewV)) @@ -5860,8 +6088,10 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating { STATS_DECLTRACK_CSARG_ATTR(value_simplify) } }; +} // namespace /// ----------------------- Heap-To-Stack Conversion --------------------------- +namespace { struct AAHeapToStackFunction final : public AAHeapToStack { struct AllocationInfo { @@ -5883,7 +6113,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack { bool HasPotentiallyFreeingUnknownUses = false; /// The set of free calls that use this allocation. - SmallPtrSet PotentialFreeCalls{}; + SmallSetVector PotentialFreeCalls{}; }; struct DeallocationInfo { @@ -5895,7 +6125,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack { bool MightFreeUnknownObjects = false; /// The set of allocation calls that are potentially freed. - SmallPtrSet PotentialAllocationCalls{}; + SmallSetVector PotentialAllocationCalls{}; }; AAHeapToStackFunction(const IRPosition &IRP, Attributor &A) @@ -5905,9 +6135,9 @@ struct AAHeapToStackFunction final : public AAHeapToStack { // Ensure we call the destructor so we release any memory allocated in the // sets. for (auto &It : AllocationInfos) - It.getSecond()->~AllocationInfo(); + It.second->~AllocationInfo(); for (auto &It : DeallocationInfos) - It.getSecond()->~DeallocationInfo(); + It.second->~DeallocationInfo(); } void initialize(Attributor &A) override { @@ -5932,7 +6162,8 @@ struct AAHeapToStackFunction final : public AAHeapToStack { if (nullptr != getInitialValueOfAllocation(CB, TLI, I8Ty)) { AllocationInfo *AI = new (A.Allocator) AllocationInfo{CB}; AllocationInfos[CB] = AI; - TLI->getLibFunc(*CB, AI->LibraryFunctionId); + if (TLI) + TLI->getLibFunc(*CB, AI->LibraryFunctionId); } } return true; @@ -5945,6 +6176,16 @@ struct AAHeapToStackFunction final : public AAHeapToStack { /* CheckPotentiallyDead */ true); (void)Success; assert(Success && "Did not expect the call base visit callback to fail!"); + + Attributor::SimplifictionCallbackTy SCB = + [](const IRPosition &, const AbstractAttribute *, + bool &) -> Optional { return nullptr; }; + for (const auto &It : AllocationInfos) + A.registerSimplificationCallback(IRPosition::callsite_returned(*It.first), + SCB); + for (const auto &It : DeallocationInfos) + A.registerSimplificationCallback(IRPosition::callsite_returned(*It.first), + SCB); } const std::string getAsStr() const override { @@ -5971,7 +6212,8 @@ struct AAHeapToStackFunction final : public AAHeapToStack { bool isAssumedHeapToStack(const CallBase &CB) const override { if (isValidState()) - if (AllocationInfo *AI = AllocationInfos.lookup(&CB)) + if (AllocationInfo *AI = + AllocationInfos.lookup(const_cast(&CB))) return AI->Status != AllocationInfo::INVALID; return false; } @@ -6000,6 +6242,17 @@ struct AAHeapToStackFunction final : public AAHeapToStack { Function *F = getAnchorScope(); const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + LoopInfo *LI = + A.getInfoCache().getAnalysisResultForFunction(*F); + Optional MayContainIrreducibleControl; + auto IsInLoop = [&](BasicBlock &BB) { + if (!MayContainIrreducibleControl.has_value()) + MayContainIrreducibleControl = mayContainIrreducibleControl(*F, LI); + if (MayContainIrreducibleControl.value()) + return true; + return LI->getLoopFor(&BB) != nullptr; + }; + for (auto &It : AllocationInfos) { AllocationInfo &AI = *It.second; if (AI.Status == AllocationInfo::INVALID) @@ -6026,13 +6279,13 @@ struct AAHeapToStackFunction final : public AAHeapToStack { else A.emitRemark(AI.CB, "HeapToStack", Remark); + const DataLayout &DL = A.getInfoCache().getDL(); Value *Size; Optional SizeAPI = getSize(A, *this, AI); - if (SizeAPI.hasValue()) { + if (SizeAPI) { Size = ConstantInt::get(AI.CB->getContext(), *SizeAPI); } else { LLVMContext &Ctx = AI.CB->getContext(); - auto &DL = A.getInfoCache().getDL(); ObjectSizeOpts Opts; ObjectSizeOffsetEvaluator Eval(DL, TLI, Ctx, Opts); SizeOffsetEvalType SizeOffsetPair = Eval.compute(AI.CB); @@ -6041,32 +6294,36 @@ struct AAHeapToStackFunction final : public AAHeapToStack { Size = SizeOffsetPair.first; } + Instruction *IP = (!SizeAPI.has_value() || IsInLoop(*AI.CB->getParent())) + ? AI.CB + : &F->getEntryBlock().front(); + Align Alignment(1); if (MaybeAlign RetAlign = AI.CB->getRetAlign()) - Alignment = max(Alignment, RetAlign); + Alignment = std::max(Alignment, *RetAlign); if (Value *Align = getAllocAlignment(AI.CB, TLI)) { Optional AlignmentAPI = getAPInt(A, *this, *Align); - assert(AlignmentAPI.hasValue() && + assert(AlignmentAPI && AlignmentAPI.getValue().getZExtValue() > 0 && "Expected an alignment during manifest!"); - Alignment = - max(Alignment, MaybeAlign(AlignmentAPI.getValue().getZExtValue())); + Alignment = std::max( + Alignment, assumeAligned(AlignmentAPI.getValue().getZExtValue())); } - unsigned AS = cast(AI.CB->getType())->getAddressSpace(); - Instruction *Alloca = - new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment, - "", AI.CB->getNextNode()); + // TODO: Hoist the alloca towards the function entry. + unsigned AS = DL.getAllocaAddrSpace(); + Instruction *Alloca = new AllocaInst(Type::getInt8Ty(F->getContext()), AS, + Size, Alignment, "", IP); if (Alloca->getType() != AI.CB->getType()) - Alloca = new BitCastInst(Alloca, AI.CB->getType(), "malloc_bc", - Alloca->getNextNode()); + Alloca = BitCastInst::CreatePointerBitCastOrAddrSpaceCast( + Alloca, AI.CB->getType(), "malloc_cast", AI.CB); auto *I8Ty = Type::getInt8Ty(F->getContext()); auto *InitVal = getInitialValueOfAllocation(AI.CB, TLI, I8Ty); assert(InitVal && "Must be able to materialize initial memory state of allocation"); - A.changeValueAfterManifest(*AI.CB, *Alloca); + A.changeAfterManifest(IRPosition::inst(*AI.CB), *Alloca); if (auto *II = dyn_cast(AI.CB)) { auto *NBB = II->getNormalDest(); @@ -6095,7 +6352,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack { bool UsedAssumedInformation = false; Optional SimpleV = A.getAssumedConstant(V, AA, UsedAssumedInformation); - if (!SimpleV.hasValue()) + if (!SimpleV) return APInt(64, 0); if (auto *CI = dyn_cast_or_null(SimpleV.getValue())) return CI->getValue(); @@ -6120,11 +6377,11 @@ struct AAHeapToStackFunction final : public AAHeapToStack { /// Collection of all malloc-like calls in a function with associated /// information. - DenseMap AllocationInfos; + MapVector AllocationInfos; /// Collection of all free-like calls in a function with associated /// information. - DenseMap DeallocationInfos; + MapVector DeallocationInfos; ChangeStatus updateImpl(Attributor &A) override; }; @@ -6167,7 +6424,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { // branches etc. SmallVector Objects; if (!AA::getAssumedUnderlyingObjects(A, *DI.CB->getArgOperand(0), Objects, - *this, DI.CB)) { + *this, DI.CB, + UsedAssumedInformation)) { LLVM_DEBUG( dbgs() << "[H2S] Unexpected failure in getAssumedUnderlyingObjects!\n"); @@ -6239,6 +6497,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { dbgs() << "[H2S] unique free call might free unknown allocations\n"); return false; } + if (DI->PotentialAllocationCalls.empty()) + return true; if (DI->PotentialAllocationCalls.size() > 1) { LLVM_DEBUG(dbgs() << "[H2S] unique free call might free " << DI->PotentialAllocationCalls.size() @@ -6316,7 +6576,7 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { if (ValidUsesOnly && AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared) - A.emitRemark(AI.CB, "OMP113", Remark); + A.emitRemark(CB, "OMP113", Remark); LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n"); ValidUsesOnly = false; @@ -6348,7 +6608,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { continue; if (Value *Align = getAllocAlignment(AI.CB, TLI)) { - if (!getAPInt(A, *this, *Align)) { + Optional APAlign = getAPInt(A, *this, *Align); + if (!APAlign) { // Can't generate an alloca which respects the required alignment // on the allocation. LLVM_DEBUG(dbgs() << "[H2S] Unknown allocation alignment: " << *AI.CB @@ -6356,14 +6617,23 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { AI.Status = AllocationInfo::INVALID; Changed = ChangeStatus::CHANGED; continue; + } else { + if (APAlign->ugt(llvm::Value::MaximumAlignment) || + !APAlign->isPowerOf2()) { + LLVM_DEBUG(dbgs() << "[H2S] Invalid allocation alignment: " << APAlign + << "\n"); + AI.Status = AllocationInfo::INVALID; + Changed = ChangeStatus::CHANGED; + continue; + } } } if (MaxHeapToStackSize != -1) { Optional Size = getSize(A, *this, AI); - if (!Size.hasValue() || Size.getValue().ugt(MaxHeapToStackSize)) { + if (!Size || Size.getValue().ugt(MaxHeapToStackSize)) { LLVM_DEBUG({ - if (!Size.hasValue()) + if (!Size) dbgs() << "[H2S] Unknown allocation size: " << *AI.CB << "\n"; else dbgs() << "[H2S] Allocation size too large: " << *AI.CB << " vs. " @@ -6395,8 +6665,10 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { return Changed; } +} // namespace /// ----------------------- Privatizable Pointers ------------------------------ +namespace { struct AAPrivatizablePtrImpl : public AAPrivatizablePtr { AAPrivatizablePtrImpl(const IRPosition &IRP, Attributor &A) : AAPrivatizablePtr(IRP, A), PrivatizableType(llvm::None) {} @@ -6414,9 +6686,9 @@ struct AAPrivatizablePtrImpl : public AAPrivatizablePtr { /// Return a privatizable type that encloses both T0 and T1. /// TODO: This is merely a stub for now as we should manage a mapping as well. Optional combineTypes(Optional T0, Optional T1) { - if (!T0.hasValue()) + if (!T0) return T1; - if (!T1.hasValue()) + if (!T1) return T0; if (T0 == T1) return T0; @@ -6445,11 +6717,13 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { Optional identifyPrivatizableType(Attributor &A) override { // If this is a byval argument and we know all the call sites (so we can // rewrite them), there is no need to check them explicitly. - bool AllCallSitesKnown; - if (getIRPosition().hasAttr(Attribute::ByVal) && + bool UsedAssumedInformation = false; + SmallVector Attrs; + getAttrs({Attribute::ByVal}, Attrs, /* IgnoreSubsumingPositions */ true); + if (!Attrs.empty() && A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this, - true, AllCallSitesKnown)) - return getAssociatedValue().getType()->getPointerElementType(); + true, UsedAssumedInformation)) + return Attrs[0].getValueAsType(); Optional Ty; unsigned ArgNo = getIRPosition().getCallSiteArgNo(); @@ -6474,9 +6748,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { LLVM_DEBUG({ dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: "; - if (CSTy.hasValue() && CSTy.getValue()) + if (CSTy && CSTy.getValue()) CSTy.getValue()->print(dbgs()); - else if (CSTy.hasValue()) + else if (CSTy) dbgs() << ""; else dbgs() << ""; @@ -6486,19 +6760,20 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { LLVM_DEBUG({ dbgs() << " : New Type: "; - if (Ty.hasValue() && Ty.getValue()) + if (Ty && Ty.getValue()) Ty.getValue()->print(dbgs()); - else if (Ty.hasValue()) + else if (Ty) dbgs() << ""; else dbgs() << ""; dbgs() << "\n"; }); - return !Ty.hasValue() || Ty.getValue(); + return !Ty || Ty.getValue(); }; - if (!A.checkForAllCallSites(CallSiteCheck, *this, true, AllCallSitesKnown)) + if (!A.checkForAllCallSites(CallSiteCheck, *this, true, + UsedAssumedInformation)) return nullptr; return Ty; } @@ -6506,7 +6781,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { PrivatizableType = identifyPrivatizableType(A); - if (!PrivatizableType.hasValue()) + if (!PrivatizableType) return ChangeStatus::UNCHANGED; if (!PrivatizableType.getValue()) return indicatePessimisticFixpoint(); @@ -6518,8 +6793,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { // Avoid arguments with padding for now. if (!getIRPosition().hasAttr(Attribute::ByVal) && - !ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(), - A.getInfoCache().getDL())) { + !isDenselyPacked(*PrivatizableType, A.getInfoCache().getDL())) { LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n"); return indicatePessimisticFixpoint(); } @@ -6527,7 +6801,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { // Collect the types that will replace the privatizable type in the function // signature. SmallVector ReplacementTypes; - identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes); + identifyReplacementTypes(*PrivatizableType, ReplacementTypes); // Verify callee and caller agree on how the promoted argument would be // passed. @@ -6545,9 +6819,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { return TTI->areTypesABICompatible( CB->getCaller(), CB->getCalledFunction(), ReplacementTypes); }; - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; if (!A.checkForAllCallSites(CallSiteCheck, *this, true, - AllCallSitesKnown)) { + UsedAssumedInformation)) { LLVM_DEBUG( dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected for " << Fn.getName() << "\n"); @@ -6595,7 +6869,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { *this, IRPosition::argument(CBArg), DepClassTy::REQUIRED); if (CBArgPrivAA.isValidState()) { auto CBArgPrivTy = CBArgPrivAA.getPrivatizableType(); - if (!CBArgPrivTy.hasValue()) + if (!CBArgPrivTy) continue; if (CBArgPrivTy.getValue() == PrivatizableType) continue; @@ -6642,7 +6916,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { DepClassTy::REQUIRED); if (DCArgPrivAA.isValidState()) { auto DCArgPrivTy = DCArgPrivAA.getPrivatizableType(); - if (!DCArgPrivTy.hasValue()) + if (!DCArgPrivTy) return true; if (DCArgPrivTy.getValue() == PrivatizableType) return true; @@ -6674,7 +6948,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { }; if (!A.checkForAllCallSites(IsCompatiblePrivArgOfOtherCallSite, *this, true, - AllCallSitesKnown)) + UsedAssumedInformation)) return indicatePessimisticFixpoint(); return ChangeStatus::UNCHANGED; @@ -6749,8 +7023,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { Type *PrivPtrType = PrivType->getPointerTo(); if (Base->getType() != PrivPtrType) - Base = BitCastInst::CreateBitOrPointerCast(Base, PrivPtrType, "", - ACS.getInstruction()); + Base = BitCastInst::CreatePointerBitCastOrAddrSpaceCast( + Base, PrivPtrType, "", ACS.getInstruction()); // Traverse the type, build GEPs and loads. if (auto *PrivStructType = dyn_cast(PrivType)) { @@ -6784,7 +7058,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { /// See AbstractAttribute::manifest(...) ChangeStatus manifest(Attributor &A) override { - if (!PrivatizableType.hasValue()) + if (!PrivatizableType) return ChangeStatus::UNCHANGED; assert(PrivatizableType.getValue() && "Expected privatizable type!"); @@ -6817,14 +7091,16 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { Function &ReplacementFn, Function::arg_iterator ArgIt) { BasicBlock &EntryBB = ReplacementFn.getEntryBlock(); Instruction *IP = &*EntryBB.getFirstInsertionPt(); - Instruction *AI = new AllocaInst(PrivatizableType.getValue(), 0, + const DataLayout &DL = IP->getModule()->getDataLayout(); + unsigned AS = DL.getAllocaAddrSpace(); + Instruction *AI = new AllocaInst(PrivatizableType.getValue(), AS, Arg->getName() + ".priv", IP); createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn, ArgIt->getArgNo(), *IP); if (AI->getType() != Arg->getType()) - AI = - BitCastInst::CreateBitOrPointerCast(AI, Arg->getType(), "", IP); + AI = BitCastInst::CreatePointerBitCastOrAddrSpaceCast( + AI, Arg->getType(), "", IP); Arg->replaceAllUsesWith(AI); for (CallInst *CI : TailCalls) @@ -6841,8 +7117,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { // When no alignment is specified for the load instruction, // natural alignment is assumed. createReplacementValues( - assumeAligned(AlignAA.getAssumedAlign()), - PrivatizableType.getValue(), ACS, + AlignAA.getAssumedAlign(), *PrivatizableType, ACS, ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()), NewArgOperands); }; @@ -6850,7 +7125,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { // Collect the types that will replace the privatizable type in the function // signature. SmallVector ReplacementTypes; - identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes); + identifyReplacementTypes(*PrivatizableType, ReplacementTypes); // Register a rewrite of the argument. if (A.registerFunctionSignatureRewrite(*Arg, ReplacementTypes, @@ -6897,7 +7172,7 @@ struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl { auto &PrivArgAA = A.getAAFor( *this, IRPosition::argument(*Arg), DepClassTy::REQUIRED); if (PrivArgAA.isAssumedPrivatizablePtr()) - return Obj->getType()->getPointerElementType(); + return PrivArgAA.getPrivatizableType(); } LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid " @@ -6926,7 +7201,7 @@ struct AAPrivatizablePtrCallSiteArgument final /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { PrivatizableType = identifyPrivatizableType(A); - if (!PrivatizableType.hasValue()) + if (!PrivatizableType) return ChangeStatus::UNCHANGED; if (!PrivatizableType.getValue()) return indicatePessimisticFixpoint(); @@ -6992,10 +7267,12 @@ struct AAPrivatizablePtrReturned final : public AAPrivatizablePtrFloating { STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr); } }; +} // namespace /// -------------------- Memory Behavior Attributes ---------------------------- /// Includes read-none, read-only, and write-only. /// ---------------------------------------------------------------------------- +namespace { struct AAMemoryBehaviorImpl : public AAMemoryBehavior { AAMemoryBehaviorImpl(const IRPosition &IRP, Attributor &A) : AAMemoryBehavior(IRP, A) {} @@ -7495,6 +7772,7 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U, if (UserI->mayWriteToMemory()) removeAssumedBits(NO_WRITES); } +} // namespace /// -------------------- Memory Locations Attributes --------------------------- /// Includes read-none, argmemonly, inaccessiblememonly, @@ -7528,6 +7806,7 @@ std::string AAMemoryLocation::getMemoryLocationsAsStr( return S; } +namespace { struct AAMemoryLocationImpl : public AAMemoryLocation { AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A) @@ -7772,8 +8051,10 @@ void AAMemoryLocationImpl::categorizePtrValue( << getMemoryLocationsAsStr(State.getAssumed()) << "]\n"); SmallVector Objects; + bool UsedAssumedInformation = false; if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I, - /* Intraprocedural */ true)) { + UsedAssumedInformation, + AA::Intraprocedural)) { LLVM_DEBUG( dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n"); updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed, @@ -8042,9 +8323,11 @@ struct AAMemoryLocationCallSite final : AAMemoryLocationImpl { STATS_DECLTRACK_CS_ATTR(readnone) } }; +} // namespace /// ------------------ Value Constant Range Attribute ------------------------- +namespace { struct AAValueConstantRangeImpl : AAValueConstantRange { using StateType = IntegerRangeState; AAValueConstantRangeImpl(const IRPosition &IRP, Attributor &A) @@ -8379,7 +8662,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { const auto &SimplifiedLHS = A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedLHS.hasValue()) + if (!SimplifiedLHS) return true; if (!SimplifiedLHS.getValue()) return false; @@ -8388,7 +8671,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { const auto &SimplifiedRHS = A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedRHS.hasValue()) + if (!SimplifiedRHS) return true; if (!SimplifiedRHS.getValue()) return false; @@ -8432,7 +8715,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { const auto &SimplifiedOpV = A.getAssumedSimplified(IRPosition::value(*OpV, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedOpV.hasValue()) + if (!SimplifiedOpV) return true; if (!SimplifiedOpV.getValue()) return false; @@ -8462,7 +8745,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { const auto &SimplifiedLHS = A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedLHS.hasValue()) + if (!SimplifiedLHS) return true; if (!SimplifiedLHS.getValue()) return false; @@ -8471,7 +8754,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { const auto &SimplifiedRHS = A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedRHS.hasValue()) + if (!SimplifiedRHS) return true; if (!SimplifiedRHS.getValue()) return false; @@ -8536,7 +8819,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { const auto &SimplifiedOpV = A.getAssumedSimplified(IRPosition::value(V, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedOpV.hasValue()) + if (!SimplifiedOpV) return true; if (!SimplifiedOpV.getValue()) return false; @@ -8588,8 +8871,10 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { IntegerRangeState T(getBitWidth()); + bool UsedAssumedInformation = false; if (!genericValueTraversal(A, getIRPosition(), *this, T, VisitValueCB, getCtxI(), + UsedAssumedInformation, /* UseValueSimplify */ false)) return indicatePessimisticFixpoint(); @@ -8683,21 +8968,23 @@ struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating { STATS_DECLTRACK_CSARG_ATTR(value_range) } }; +} // namespace /// ------------------ Potential Values Attribute ------------------------- -struct AAPotentialValuesImpl : AAPotentialValues { +namespace { +struct AAPotentialConstantValuesImpl : AAPotentialConstantValues { using StateType = PotentialConstantIntValuesState; - AAPotentialValuesImpl(const IRPosition &IRP, Attributor &A) - : AAPotentialValues(IRP, A) {} + AAPotentialConstantValuesImpl(const IRPosition &IRP, Attributor &A) + : AAPotentialConstantValues(IRP, A) {} /// See AbstractAttribute::initialize(..). void initialize(Attributor &A) override { if (A.hasSimplificationCallback(getIRPosition())) indicatePessimisticFixpoint(); else - AAPotentialValues::initialize(A); + AAPotentialConstantValues::initialize(A); } /// See AbstractAttribute::getAsStr(). @@ -8714,13 +9001,14 @@ struct AAPotentialValuesImpl : AAPotentialValues { } }; -struct AAPotentialValuesArgument final - : AAArgumentFromCallSiteArguments { - using Base = - AAArgumentFromCallSiteArguments; - AAPotentialValuesArgument(const IRPosition &IRP, Attributor &A) + using Base = AAArgumentFromCallSiteArguments; + AAPotentialConstantValuesArgument(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} /// See AbstractAttribute::initialize(..). @@ -8738,11 +9026,12 @@ struct AAPotentialValuesArgument final } }; -struct AAPotentialValuesReturned - : AAReturnedFromReturnedValues { - using Base = - AAReturnedFromReturnedValues; - AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A) +struct AAPotentialConstantValuesReturned + : AAReturnedFromReturnedValues { + using Base = AAReturnedFromReturnedValues; + AAPotentialConstantValuesReturned(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} /// See AbstractAttribute::trackStatistics() @@ -8751,13 +9040,13 @@ struct AAPotentialValuesReturned } }; -struct AAPotentialValuesFloating : AAPotentialValuesImpl { - AAPotentialValuesFloating(const IRPosition &IRP, Attributor &A) - : AAPotentialValuesImpl(IRP, A) {} +struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { + AAPotentialConstantValuesFloating(const IRPosition &IRP, Attributor &A) + : AAPotentialConstantValuesImpl(IRP, A) {} /// See AbstractAttribute::initialize(..). void initialize(Attributor &A) override { - AAPotentialValuesImpl::initialize(A); + AAPotentialConstantValuesImpl::initialize(A); if (isAtFixpoint()) return; @@ -8783,7 +9072,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { indicatePessimisticFixpoint(); - LLVM_DEBUG(dbgs() << "[AAPotentialValues] We give up: " + LLVM_DEBUG(dbgs() << "[AAPotentialConstantValues] We give up: " << getAssociatedValue() << "\n"); } @@ -8891,7 +9180,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedLHS = A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedLHS.hasValue()) + if (!SimplifiedLHS) return ChangeStatus::UNCHANGED; if (!SimplifiedLHS.getValue()) return indicatePessimisticFixpoint(); @@ -8900,7 +9189,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedRHS = A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedRHS.hasValue()) + if (!SimplifiedRHS) return ChangeStatus::UNCHANGED; if (!SimplifiedRHS.getValue()) return indicatePessimisticFixpoint(); @@ -8909,18 +9198,18 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) return indicatePessimisticFixpoint(); - auto &LHSAA = A.getAAFor(*this, IRPosition::value(*LHS), - DepClassTy::REQUIRED); + auto &LHSAA = A.getAAFor( + *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); if (!LHSAA.isValidState()) return indicatePessimisticFixpoint(); - auto &RHSAA = A.getAAFor(*this, IRPosition::value(*RHS), - DepClassTy::REQUIRED); + auto &RHSAA = A.getAAFor( + *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); if (!RHSAA.isValidState()) return indicatePessimisticFixpoint(); - const DenseSet &LHSAAPVS = LHSAA.getAssumedSet(); - const DenseSet &RHSAAPVS = RHSAA.getAssumedSet(); + const SetTy &LHSAAPVS = LHSAA.getAssumedSet(); + const SetTy &RHSAAPVS = RHSAA.getAssumedSet(); // TODO: make use of undef flag to limit potential values aggressively. bool MaybeTrue = false, MaybeFalse = false; @@ -8974,7 +9263,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedLHS = A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedLHS.hasValue()) + if (!SimplifiedLHS) return ChangeStatus::UNCHANGED; if (!SimplifiedLHS.getValue()) return indicatePessimisticFixpoint(); @@ -8983,7 +9272,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedRHS = A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedRHS.hasValue()) + if (!SimplifiedRHS) return ChangeStatus::UNCHANGED; if (!SimplifiedRHS.getValue()) return indicatePessimisticFixpoint(); @@ -8997,21 +9286,21 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { // Check if we only need one operand. bool OnlyLeft = false, OnlyRight = false; - if (C.hasValue() && *C && (*C)->isOneValue()) + if (C && *C && (*C)->isOneValue()) OnlyLeft = true; - else if (C.hasValue() && *C && (*C)->isZeroValue()) + else if (C && *C && (*C)->isZeroValue()) OnlyRight = true; - const AAPotentialValues *LHSAA = nullptr, *RHSAA = nullptr; + const AAPotentialConstantValues *LHSAA = nullptr, *RHSAA = nullptr; if (!OnlyRight) { - LHSAA = &A.getAAFor(*this, IRPosition::value(*LHS), - DepClassTy::REQUIRED); + LHSAA = &A.getAAFor( + *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); if (!LHSAA->isValidState()) return indicatePessimisticFixpoint(); } if (!OnlyLeft) { - RHSAA = &A.getAAFor(*this, IRPosition::value(*RHS), - DepClassTy::REQUIRED); + RHSAA = &A.getAAFor( + *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); if (!RHSAA->isValidState()) return indicatePessimisticFixpoint(); } @@ -9049,17 +9338,17 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedSrc = A.getAssumedSimplified(IRPosition::value(*Src, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedSrc.hasValue()) + if (!SimplifiedSrc) return ChangeStatus::UNCHANGED; if (!SimplifiedSrc.getValue()) return indicatePessimisticFixpoint(); Src = *SimplifiedSrc; - auto &SrcAA = A.getAAFor(*this, IRPosition::value(*Src), - DepClassTy::REQUIRED); + auto &SrcAA = A.getAAFor( + *this, IRPosition::value(*Src), DepClassTy::REQUIRED); if (!SrcAA.isValidState()) return indicatePessimisticFixpoint(); - const DenseSet &SrcAAPVS = SrcAA.getAssumedSet(); + const SetTy &SrcAAPVS = SrcAA.getAssumedSet(); if (SrcAA.undefIsContained()) unionAssumedWithUndef(); else { @@ -9082,7 +9371,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedLHS = A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedLHS.hasValue()) + if (!SimplifiedLHS) return ChangeStatus::UNCHANGED; if (!SimplifiedLHS.getValue()) return indicatePessimisticFixpoint(); @@ -9091,7 +9380,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedRHS = A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedRHS.hasValue()) + if (!SimplifiedRHS) return ChangeStatus::UNCHANGED; if (!SimplifiedRHS.getValue()) return indicatePessimisticFixpoint(); @@ -9100,18 +9389,18 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) return indicatePessimisticFixpoint(); - auto &LHSAA = A.getAAFor(*this, IRPosition::value(*LHS), - DepClassTy::REQUIRED); + auto &LHSAA = A.getAAFor( + *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); if (!LHSAA.isValidState()) return indicatePessimisticFixpoint(); - auto &RHSAA = A.getAAFor(*this, IRPosition::value(*RHS), - DepClassTy::REQUIRED); + auto &RHSAA = A.getAAFor( + *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); if (!RHSAA.isValidState()) return indicatePessimisticFixpoint(); - const DenseSet &LHSAAPVS = LHSAA.getAssumedSet(); - const DenseSet &RHSAAPVS = RHSAA.getAssumedSet(); + const SetTy &LHSAAPVS = LHSAA.getAssumedSet(); + const SetTy &RHSAAPVS = RHSAA.getAssumedSet(); const APInt Zero = APInt(LHS->getType()->getIntegerBitWidth(), 0); // TODO: make use of undef flag to limit potential values aggressively. @@ -9150,13 +9439,13 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedIncomingValue = A.getAssumedSimplified( IRPosition::value(*IncomingValue, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedIncomingValue.hasValue()) + if (!SimplifiedIncomingValue) continue; if (!SimplifiedIncomingValue.getValue()) return indicatePessimisticFixpoint(); IncomingValue = *SimplifiedIncomingValue; - auto &PotentialValuesAA = A.getAAFor( + auto &PotentialValuesAA = A.getAAFor( *this, IRPosition::value(*IncomingValue), DepClassTy::REQUIRED); if (!PotentialValuesAA.isValidState()) return indicatePessimisticFixpoint(); @@ -9169,30 +9458,6 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { : ChangeStatus::CHANGED; } - ChangeStatus updateWithLoad(Attributor &A, LoadInst &L) { - if (!L.getType()->isIntegerTy()) - return indicatePessimisticFixpoint(); - - auto Union = [&](Value &V) { - if (isa(V)) { - unionAssumedWithUndef(); - return true; - } - if (ConstantInt *CI = dyn_cast(&V)) { - unionAssumed(CI->getValue()); - return true; - } - return false; - }; - auto AssumedBefore = getAssumed(); - - if (!AAValueSimplifyImpl::handleLoad(A, *this, L, Union)) - return indicatePessimisticFixpoint(); - - return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED - : ChangeStatus::CHANGED; - } - /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { Value &V = getAssociatedValue(); @@ -9213,9 +9478,6 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { if (auto *PHI = dyn_cast(I)) return updateWithPHINode(A, PHI); - if (auto *L = dyn_cast(I)) - return updateWithLoad(A, *L); - return indicatePessimisticFixpoint(); } @@ -9225,14 +9487,15 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { } }; -struct AAPotentialValuesFunction : AAPotentialValuesImpl { - AAPotentialValuesFunction(const IRPosition &IRP, Attributor &A) - : AAPotentialValuesImpl(IRP, A) {} +struct AAPotentialConstantValuesFunction : AAPotentialConstantValuesImpl { + AAPotentialConstantValuesFunction(const IRPosition &IRP, Attributor &A) + : AAPotentialConstantValuesImpl(IRP, A) {} /// See AbstractAttribute::initialize(...). ChangeStatus updateImpl(Attributor &A) override { - llvm_unreachable("AAPotentialValues(Function|CallSite)::updateImpl will " - "not be called"); + llvm_unreachable( + "AAPotentialConstantValues(Function|CallSite)::updateImpl will " + "not be called"); } /// See AbstractAttribute::trackStatistics() @@ -9241,9 +9504,9 @@ struct AAPotentialValuesFunction : AAPotentialValuesImpl { } }; -struct AAPotentialValuesCallSite : AAPotentialValuesFunction { - AAPotentialValuesCallSite(const IRPosition &IRP, Attributor &A) - : AAPotentialValuesFunction(IRP, A) {} +struct AAPotentialConstantValuesCallSite : AAPotentialConstantValuesFunction { + AAPotentialConstantValuesCallSite(const IRPosition &IRP, Attributor &A) + : AAPotentialConstantValuesFunction(IRP, A) {} /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { @@ -9251,11 +9514,13 @@ struct AAPotentialValuesCallSite : AAPotentialValuesFunction { } }; -struct AAPotentialValuesCallSiteReturned - : AACallSiteReturnedFromReturned { - AAPotentialValuesCallSiteReturned(const IRPosition &IRP, Attributor &A) - : AACallSiteReturnedFromReturned(IRP, A) {} +struct AAPotentialConstantValuesCallSiteReturned + : AACallSiteReturnedFromReturned { + AAPotentialConstantValuesCallSiteReturned(const IRPosition &IRP, + Attributor &A) + : AACallSiteReturnedFromReturned(IRP, A) {} /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { @@ -9263,13 +9528,15 @@ struct AAPotentialValuesCallSiteReturned } }; -struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating { - AAPotentialValuesCallSiteArgument(const IRPosition &IRP, Attributor &A) - : AAPotentialValuesFloating(IRP, A) {} +struct AAPotentialConstantValuesCallSiteArgument + : AAPotentialConstantValuesFloating { + AAPotentialConstantValuesCallSiteArgument(const IRPosition &IRP, + Attributor &A) + : AAPotentialConstantValuesFloating(IRP, A) {} /// See AbstractAttribute::initialize(..). void initialize(Attributor &A) override { - AAPotentialValuesImpl::initialize(A); + AAPotentialConstantValuesImpl::initialize(A); if (isAtFixpoint()) return; @@ -9292,8 +9559,8 @@ struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating { ChangeStatus updateImpl(Attributor &A) override { Value &V = getAssociatedValue(); auto AssumedBefore = getAssumed(); - auto &AA = A.getAAFor(*this, IRPosition::value(V), - DepClassTy::REQUIRED); + auto &AA = A.getAAFor( + *this, IRPosition::value(V), DepClassTy::REQUIRED); const auto &S = AA.getAssumed(); unionAssumed(S); return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED @@ -9365,7 +9632,7 @@ struct AANoUndefImpl : AANoUndef { // considered to be dead. We don't manifest noundef in such positions for // the same reason above. if (!A.getAssumedSimplified(getIRPosition(), *this, UsedAssumedInformation) - .hasValue()) + .has_value()) return ChangeStatus::UNCHANGED; return AANoUndef::manifest(A); } @@ -9400,8 +9667,10 @@ struct AANoUndefFloating : public AANoUndefImpl { }; StateType T; + bool UsedAssumedInformation = false; if (!genericValueTraversal(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI())) + VisitValueCB, getCtxI(), + UsedAssumedInformation)) return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); @@ -9518,9 +9787,10 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { // Process any value that we might call. auto ProcessCalledOperand = [&](Value *V) { bool DummyValue = false; + bool UsedAssumedInformation = false; if (!genericValueTraversal(A, IRPosition::value(*V), *this, DummyValue, VisitValue, nullptr, - false)) { + UsedAssumedInformation, false)) { // If we haven't gone through all values, assume that there are unknown // callees. setHasUnknownCallee(true, Change); @@ -9530,7 +9800,9 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { CallBase *CB = cast(getCtxI()); if (CB->isInlineAsm()) { - setHasUnknownCallee(false, Change); + if (!hasAssumption(*CB->getCaller(), "ompx_no_call_asm") && + !hasAssumption(*CB, "ompx_no_call_asm")) + setHasUnknownCallee(false, Change); return Change; } @@ -9584,7 +9856,8 @@ struct AACallEdgesFunction : public AACallEdgesImpl { // Visit all callable instructions. bool UsedAssumedInformation = false; if (!A.checkForAllCallLikeInstructions(ProcessCallInst, *this, - UsedAssumedInformation)) { + UsedAssumedInformation, + /* CheckBBLivenessOnly */ true)) { // If we haven't looked at all call like instructions, assume that there // are unknown callees. setHasUnknownCallee(true, Change); @@ -9656,7 +9929,7 @@ private: ArrayRef AAEdgesList, const Function &Fn) { Optional Cached = isCachedReachable(Fn); - if (Cached.hasValue()) + if (Cached) return Cached.getValue(); // The query was not cached, thus it is new. We need to request an update @@ -9691,6 +9964,10 @@ private: const SetVector &Edges = AAEdges->getOptimisticEdges(); for (Function *Edge : Edges) { + // Functions that do not call back into the module can be ignored. + if (Edge->hasFnAttribute(Attribute::NoCallback)) + continue; + // We don't need a dependency if the result is reachable. const AAFunctionReachability &EdgeReachability = A.getAAFor( @@ -9820,22 +10097,21 @@ public: } // Update the Instruction queries. - const AAReachability *Reachability; if (!InstQueries.empty()) { - Reachability = &A.getAAFor( + const AAReachability *Reachability = &A.getAAFor( *this, IRPosition::function(*getAssociatedFunction()), DepClassTy::REQUIRED); - } - // Check for local callbases first. - for (auto &InstPair : InstQueries) { - SmallVector CallEdges; - bool AllKnown = - getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges); - // Update will return change if we this effects any queries. - if (!AllKnown) - InstPair.second.CanReachUnknownCallee = true; - Change |= InstPair.second.update(A, *this, CallEdges); + // Check for local callbases first. + for (auto &InstPair : InstQueries) { + SmallVector CallEdges; + bool AllKnown = + getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges); + // Update will return change if we this effects any queries. + if (!AllKnown) + InstPair.second.CanReachUnknownCallee = true; + Change |= InstPair.second.update(A, *this, CallEdges); + } } return Change; @@ -9862,13 +10138,15 @@ private: /// Used to answer if a call base inside this function can reach a specific /// function. - DenseMap CBQueries; + MapVector CBQueries; /// This is for instruction queries than scan "forward". - DenseMap InstQueries; + MapVector InstQueries; }; +} // namespace /// ---------------------- Assumption Propagation ------------------------------ +namespace { struct AAAssumptionInfoImpl : public AAAssumptionInfo { AAAssumptionInfoImpl(const IRPosition &IRP, Attributor &A, const DenseSet &Known) @@ -9938,12 +10216,13 @@ struct AAAssumptionInfoFunction final : AAAssumptionInfoImpl { return !getAssumed().empty() || !getKnown().empty(); }; - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; // Get the intersection of all assumptions held by this node's predecessors. // If we don't know all the call sites then this is either an entry into the // call graph or an empty node. This node is known to only contain its own // assumptions and can be propagated to its successors. - if (!A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown)) + if (!A.checkForAllCallSites(CallSitePred, *this, true, + UsedAssumedInformation)) return indicatePessimisticFixpoint(); return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; @@ -10001,6 +10280,7 @@ private: return Assumptions; } }; +} // namespace AACallGraphNode *AACallEdgeIterator::operator*() const { return static_cast(const_cast( @@ -10023,6 +10303,7 @@ const char AANoReturn::ID = 0; const char AAIsDead::ID = 0; const char AADereferenceable::ID = 0; const char AAAlign::ID = 0; +const char AAInstanceInfo::ID = 0; const char AANoCapture::ID = 0; const char AAValueSimplify::ID = 0; const char AAHeapToStack::ID = 0; @@ -10030,7 +10311,7 @@ const char AAPrivatizablePtr::ID = 0; const char AAMemoryBehavior::ID = 0; const char AAMemoryLocation::ID = 0; const char AAValueConstantRange::ID = 0; -const char AAPotentialValues::ID = 0; +const char AAPotentialConstantValues::ID = 0; const char AANoUndef::ID = 0; const char AACallEdges::ID = 0; const char AAFunctionReachability::ID = 0; @@ -10145,9 +10426,10 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInstanceInfo) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange) -CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialConstantValues) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo) diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 7c178f9a9834..9e27ae49a901 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -135,7 +135,8 @@ void BlockExtractor::loadFile() { if (LineSplit.empty()) continue; if (LineSplit.size()!=2) - report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'"); + report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'", + /*GenCrashDiag=*/false); SmallVector BBNames; LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1, /*KeepEmpty=*/false); @@ -194,13 +195,15 @@ bool BlockExtractor::runOnModule(Module &M) { for (const auto &BInfo : BlocksByName) { Function *F = M.getFunction(BInfo.first); if (!F) - report_fatal_error("Invalid function name specified in the input file"); + report_fatal_error("Invalid function name specified in the input file", + /*GenCrashDiag=*/false); for (const auto &BBInfo : BInfo.second) { auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) { return BB.getName().equals(BBInfo); }); if (Res == F->end()) - report_fatal_error("Invalid block name specified in the input file"); + report_fatal_error("Invalid block name specified in the input file", + /*GenCrashDiag=*/false); GroupsOfBlocks[NextGroupIdx].push_back(&*Res); } ++NextGroupIdx; @@ -212,7 +215,7 @@ bool BlockExtractor::runOnModule(Module &M) { for (BasicBlock *BB : BBs) { // Check if the module contains BB. if (BB->getParent()->getParent() != &M) - report_fatal_error("Invalid basic block"); + report_fatal_error("Invalid basic block", /*GenCrashDiag=*/false); LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting " << BB->getParent()->getName() << ":" << BB->getName() << "\n"); diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp index 927dceec8865..64bfcb2a9a9f 100644 --- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp +++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp @@ -19,11 +19,13 @@ #include "llvm/Transforms/IPO/CalledValuePropagation.h" #include "llvm/Analysis/SparsePropagation.h" #include "llvm/Analysis/ValueLatticeUtils.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/MDBuilder.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO.h" + using namespace llvm; #define DEBUG_TYPE "called-value-propagation" @@ -68,7 +70,7 @@ public: } }; - CVPLatticeVal() : LatticeState(Undefined) {} + CVPLatticeVal() = default; CVPLatticeVal(CVPLatticeStateTy LatticeState) : LatticeState(LatticeState) {} CVPLatticeVal(std::vector &&Functions) : LatticeState(FunctionSet), Functions(std::move(Functions)) { @@ -94,7 +96,7 @@ public: private: /// Holds the state this lattice value is in. - CVPLatticeStateTy LatticeState; + CVPLatticeStateTy LatticeState = Undefined; /// Holds functions indicating the possible targets of call sites. This set /// is empty for lattice values in the undefined, overdefined, and untracked diff --git a/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/llvm/lib/Transforms/IPO/ConstantMerge.cpp index 178d3f41963e..73af30ece47c 100644 --- a/llvm/lib/Transforms/IPO/ConstantMerge.cpp +++ b/llvm/lib/Transforms/IPO/ConstantMerge.cpp @@ -85,7 +85,7 @@ static void copyDebugLocMetadata(const GlobalVariable *From, } static Align getAlign(GlobalVariable *GV) { - return GV->getAlign().getValueOr( + return GV->getAlign().value_or( GV->getParent()->getDataLayout().getPreferredAlign(GV)); } diff --git a/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp index 2fe9a59ad210..dfe33ac9da0d 100644 --- a/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp +++ b/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -15,21 +15,16 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" -#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalObject.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" using namespace llvm; diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index 2a6e38b0437f..99fa4baf355d 100644 --- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -16,18 +16,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO/DeadArgumentElimination.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" @@ -44,9 +43,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/DeadArgumentElimination.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include -#include #include #include @@ -55,36 +54,36 @@ using namespace llvm; #define DEBUG_TYPE "deadargelim" STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); -STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); -STATISTIC(NumArgumentsReplacedWithUndef, - "Number of unread args replaced with undef"); +STATISTIC(NumRetValsEliminated, "Number of unused return values removed"); +STATISTIC(NumArgumentsReplacedWithPoison, + "Number of unread args replaced with poison"); namespace { - /// DAE - The dead argument elimination pass. - class DAE : public ModulePass { - protected: - // DAH uses this to specify a different ID. - explicit DAE(char &ID) : ModulePass(ID) {} +/// The dead argument elimination pass. +class DAE : public ModulePass { +protected: + // DAH uses this to specify a different ID. + explicit DAE(char &ID) : ModulePass(ID) {} - public: - static char ID; // Pass identification, replacement for typeid +public: + static char ID; // Pass identification, replacement for typeid - DAE() : ModulePass(ID) { - initializeDAEPass(*PassRegistry::getPassRegistry()); - } + DAE() : ModulePass(ID) { + initializeDAEPass(*PassRegistry::getPassRegistry()); + } - bool runOnModule(Module &M) override { - if (skipModule(M)) - return false; - DeadArgumentEliminationPass DAEP(ShouldHackArguments()); - ModuleAnalysisManager DummyMAM; - PreservedAnalyses PA = DAEP.run(M, DummyMAM); - return !PA.areAllPreserved(); - } + bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + DeadArgumentEliminationPass DAEP(shouldHackArguments()); + ModuleAnalysisManager DummyMAM; + PreservedAnalyses PA = DAEP.run(M, DummyMAM); + return !PA.areAllPreserved(); + } - virtual bool ShouldHackArguments() const { return false; } - }; + virtual bool shouldHackArguments() const { return false; } +}; } // end anonymous namespace @@ -94,51 +93,51 @@ INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false) namespace { - /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but - /// deletes arguments to functions which are external. This is only for use - /// by bugpoint. - struct DAH : public DAE { - static char ID; +/// The DeadArgumentHacking pass, same as dead argument elimination, but deletes +/// arguments to functions which are external. This is only for use by bugpoint. +struct DAH : public DAE { + static char ID; - DAH() : DAE(ID) {} + DAH() : DAE(ID) {} - bool ShouldHackArguments() const override { return true; } - }; + bool shouldHackArguments() const override { return true; } +}; } // end anonymous namespace char DAH::ID = 0; INITIALIZE_PASS(DAH, "deadarghaX0r", - "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", - false, false) + "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", false, + false) -/// createDeadArgEliminationPass - This pass removes arguments from functions -/// which are not used by the body of the function. +/// This pass removes arguments from functions which are not used by the body of +/// the function. ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); } ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); } -/// DeleteDeadVarargs - If this is an function that takes a ... list, and if -/// llvm.vastart is never called, the varargs list is dead for the function. -bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { - assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!"); - if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false; +/// If this is an function that takes a ... list, and if llvm.vastart is never +/// called, the varargs list is dead for the function. +bool DeadArgumentEliminationPass::deleteDeadVarargs(Function &F) { + assert(F.getFunctionType()->isVarArg() && "Function isn't varargs!"); + if (F.isDeclaration() || !F.hasLocalLinkage()) + return false; // Ensure that the function is only directly called. - if (Fn.hasAddressTaken()) + if (F.hasAddressTaken()) return false; // Don't touch naked functions. The assembly might be using an argument, or // otherwise rely on the frame layout in a way that this analysis will not // see. - if (Fn.hasFnAttribute(Attribute::Naked)) { + if (F.hasFnAttribute(Attribute::Naked)) { return false; } // Okay, we know we can transform this function if safe. Scan its body // looking for calls marked musttail or calls to llvm.vastart. - for (BasicBlock &BB : Fn) { + for (BasicBlock &BB : F) { for (Instruction &I : BB) { CallInst *CI = dyn_cast(&I); if (!CI) @@ -157,25 +156,24 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { // Start by computing a new prototype for the function, which is the same as // the old function, but doesn't have isVarArg set. - FunctionType *FTy = Fn.getFunctionType(); + FunctionType *FTy = F.getFunctionType(); std::vector Params(FTy->param_begin(), FTy->param_end()); - FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), - Params, false); + FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); unsigned NumArgs = Params.size(); // Create the new function body and insert it into the module... - Function *NF = Function::Create(NFTy, Fn.getLinkage(), Fn.getAddressSpace()); - NF->copyAttributesFrom(&Fn); - NF->setComdat(Fn.getComdat()); - Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF); - NF->takeName(&Fn); + Function *NF = Function::Create(NFTy, F.getLinkage(), F.getAddressSpace()); + NF->copyAttributesFrom(&F); + NF->setComdat(F.getComdat()); + F.getParent()->getFunctionList().insert(F.getIterator(), NF); + NF->takeName(&F); - // Loop over all of the callers of the function, transforming the call sites + // Loop over all the callers of the function, transforming the call sites // to pass in a smaller number of arguments into the new function. // std::vector Args; - for (User *U : llvm::make_early_inc_range(Fn.users())) { + for (User *U : llvm::make_early_inc_range(F.users())) { CallBase *CB = dyn_cast(U); if (!CB) continue; @@ -189,7 +187,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { SmallVector ArgAttrs; for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo) ArgAttrs.push_back(PAL.getParamAttrs(ArgNo)); - PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttrs(), + PAL = AttributeList::get(F.getContext(), PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrs); } @@ -224,64 +222,67 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. - NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList()); + NF->getBasicBlockList().splice(NF->begin(), F.getBasicBlockList()); // Loop over the argument list, transferring uses of the old arguments over to - // the new arguments, also transferring over the names as well. While we're at - // it, remove the dead arguments from the DeadArguments list. - for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), - I2 = NF->arg_begin(); I != E; ++I, ++I2) { + // the new arguments, also transferring over the names as well. While we're + // at it, remove the dead arguments from the DeadArguments list. + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(), + I2 = NF->arg_begin(); + I != E; ++I, ++I2) { // Move the name and users over to the new version. I->replaceAllUsesWith(&*I2); I2->takeName(&*I); } - // Clone metadatas from the old function, including debug info descriptor. + // Clone metadata from the old function, including debug info descriptor. SmallVector, 1> MDs; - Fn.getAllMetadata(MDs); + F.getAllMetadata(MDs); for (auto MD : MDs) NF->addMetadata(MD.first, *MD.second); // Fix up any BlockAddresses that refer to the function. - Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType())); + F.replaceAllUsesWith(ConstantExpr::getBitCast(NF, F.getType())); // Delete the bitcast that we just created, so that NF does not // appear to be address-taken. NF->removeDeadConstantUsers(); // Finally, nuke the old function. - Fn.eraseFromParent(); + F.eraseFromParent(); return true; } -/// RemoveDeadArgumentsFromCallers - Checks if the given function has any -/// arguments that are unused, and changes the caller parameters to be undefined -/// instead. -bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { +/// Checks if the given function has any arguments that are unused, and changes +/// the caller parameters to be poison instead. +bool DeadArgumentEliminationPass::removeDeadArgumentsFromCallers(Function &F) { // We cannot change the arguments if this TU does not define the function or // if the linker may choose a function body from another TU, even if the // nominal linkage indicates that other copies of the function have the same // semantics. In the below example, the dead load from %p may not have been - // eliminated from the linker-chosen copy of f, so replacing %p with undef + // eliminated from the linker-chosen copy of f, so replacing %p with poison // in callers may introduce undefined behavior. // // define linkonce_odr void @f(i32* %p) { // %v = load i32 %p // ret void // } - if (!Fn.hasExactDefinition()) + if (!F.hasExactDefinition()) return false; - // Functions with local linkage should already have been handled, except the - // fragile (variadic) ones which we can improve here. - if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg()) + // Functions with local linkage should already have been handled, except if + // they are fully alive (e.g., called indirectly) and except for the fragile + // (variadic) ones. In these cases, we may still be able to improve their + // statically known call sites. + if ((F.hasLocalLinkage() && !LiveFunctions.count(&F)) && + !F.getFunctionType()->isVarArg()) return false; // Don't touch naked functions. The assembly might be using an argument, or // otherwise rely on the frame layout in a way that this analysis will not // see. - if (Fn.hasFnAttribute(Attribute::Naked)) + if (F.hasFnAttribute(Attribute::Naked)) return false; - if (Fn.use_empty()) + if (F.use_empty()) return false; SmallVector UnusedArgs; @@ -289,35 +290,36 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { AttributeMask UBImplyingAttributes = AttributeFuncs::getUBImplyingAttributes(); - for (Argument &Arg : Fn.args()) { + for (Argument &Arg : F.args()) { if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && !Arg.hasPassPointeeByValueCopyAttr()) { if (Arg.isUsedByMetadata()) { - Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); + Arg.replaceAllUsesWith(PoisonValue::get(Arg.getType())); Changed = true; } UnusedArgs.push_back(Arg.getArgNo()); - Fn.removeParamAttrs(Arg.getArgNo(), UBImplyingAttributes); + F.removeParamAttrs(Arg.getArgNo(), UBImplyingAttributes); } } if (UnusedArgs.empty()) return false; - for (Use &U : Fn.uses()) { + for (Use &U : F.uses()) { CallBase *CB = dyn_cast(U.getUser()); - if (!CB || !CB->isCallee(&U)) + if (!CB || !CB->isCallee(&U) || + CB->getFunctionType() != F.getFunctionType()) continue; - // Now go through all unused args and replace them with "undef". + // Now go through all unused args and replace them with poison. for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) { unsigned ArgNo = UnusedArgs[I]; Value *Arg = CB->getArgOperand(ArgNo); - CB->setArgOperand(ArgNo, UndefValue::get(Arg->getType())); + CB->setArgOperand(ArgNo, PoisonValue::get(Arg->getType())); CB->removeParamAttrs(ArgNo, UBImplyingAttributes); - ++NumArgumentsReplacedWithUndef; + ++NumArgumentsReplacedWithPoison; Changed = true; } } @@ -328,16 +330,15 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { /// Convenience function that returns the number of return values. It returns 0 /// for void functions and 1 for functions not returning a struct. It returns /// the number of struct elements for functions returning a struct. -static unsigned NumRetVals(const Function *F) { +static unsigned numRetVals(const Function *F) { Type *RetTy = F->getReturnType(); if (RetTy->isVoidTy()) return 0; - else if (StructType *STy = dyn_cast(RetTy)) + if (StructType *STy = dyn_cast(RetTy)) return STy->getNumElements(); - else if (ArrayType *ATy = dyn_cast(RetTy)) + if (ArrayType *ATy = dyn_cast(RetTy)) return ATy->getNumElements(); - else - return 1; + return 1; } /// Returns the sub-type a function will return at a given Idx. Should @@ -349,20 +350,18 @@ static Type *getRetComponentType(const Function *F, unsigned Idx) { if (StructType *STy = dyn_cast(RetTy)) return STy->getElementType(Idx); - else if (ArrayType *ATy = dyn_cast(RetTy)) + if (ArrayType *ATy = dyn_cast(RetTy)) return ATy->getElementType(); - else - return RetTy; + return RetTy; } -/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not -/// live, it adds Use to the MaybeLiveUses argument. Returns the determined -/// liveness of Use. +/// Checks Use for liveness in LiveValues. If Use is not live, it adds Use to +/// the MaybeLiveUses argument. Returns the determined liveness of Use. DeadArgumentEliminationPass::Liveness -DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use, +DeadArgumentEliminationPass::markIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) { // We're live if our use or its Function is already marked as live. - if (IsLive(Use)) + if (isLive(Use)) return Live; // We're maybe live otherwise, but remember that we must become live if @@ -371,127 +370,127 @@ DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use, return MaybeLive; } -/// SurveyUse - This looks at a single use of an argument or return value -/// and determines if it should be alive or not. Adds this use to MaybeLiveUses -/// if it causes the used value to become MaybeLive. +/// Looks at a single use of an argument or return value and determines if it +/// should be alive or not. Adds this use to MaybeLiveUses if it causes the +/// used value to become MaybeLive. /// /// RetValNum is the return value number to use when this use is used in a /// return instruction. This is used in the recursion, you should always leave /// it at 0. DeadArgumentEliminationPass::Liveness -DeadArgumentEliminationPass::SurveyUse(const Use *U, UseVector &MaybeLiveUses, +DeadArgumentEliminationPass::surveyUse(const Use *U, UseVector &MaybeLiveUses, unsigned RetValNum) { - const User *V = U->getUser(); - if (const ReturnInst *RI = dyn_cast(V)) { - // The value is returned from a function. It's only live when the - // function's return value is live. We use RetValNum here, for the case - // that U is really a use of an insertvalue instruction that uses the - // original Use. - const Function *F = RI->getParent()->getParent(); - if (RetValNum != -1U) { - RetOrArg Use = CreateRet(F, RetValNum); - // We might be live, depending on the liveness of Use. - return MarkIfNotLive(Use, MaybeLiveUses); - } else { - DeadArgumentEliminationPass::Liveness Result = MaybeLive; - for (unsigned Ri = 0; Ri < NumRetVals(F); ++Ri) { - RetOrArg Use = CreateRet(F, Ri); - // We might be live, depending on the liveness of Use. If any - // sub-value is live, then the entire value is considered live. This - // is a conservative choice, and better tracking is possible. - DeadArgumentEliminationPass::Liveness SubResult = - MarkIfNotLive(Use, MaybeLiveUses); - if (Result != Live) - Result = SubResult; - } - return Result; - } + const User *V = U->getUser(); + if (const ReturnInst *RI = dyn_cast(V)) { + // The value is returned from a function. It's only live when the + // function's return value is live. We use RetValNum here, for the case + // that U is really a use of an insertvalue instruction that uses the + // original Use. + const Function *F = RI->getParent()->getParent(); + if (RetValNum != -1U) { + RetOrArg Use = createRet(F, RetValNum); + // We might be live, depending on the liveness of Use. + return markIfNotLive(Use, MaybeLiveUses); } - if (const InsertValueInst *IV = dyn_cast(V)) { - if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex() - && IV->hasIndices()) - // The use we are examining is inserted into an aggregate. Our liveness - // depends on all uses of that aggregate, but if it is used as a return - // value, only index at which we were inserted counts. - RetValNum = *IV->idx_begin(); - - // Note that if we are used as the aggregate operand to the insertvalue, - // we don't change RetValNum, but do survey all our uses. - - Liveness Result = MaybeLive; - for (const Use &UU : IV->uses()) { - Result = SurveyUse(&UU, MaybeLiveUses, RetValNum); - if (Result == Live) - break; - } - return Result; + + DeadArgumentEliminationPass::Liveness Result = MaybeLive; + for (unsigned Ri = 0; Ri < numRetVals(F); ++Ri) { + RetOrArg Use = createRet(F, Ri); + // We might be live, depending on the liveness of Use. If any + // sub-value is live, then the entire value is considered live. This + // is a conservative choice, and better tracking is possible. + DeadArgumentEliminationPass::Liveness SubResult = + markIfNotLive(Use, MaybeLiveUses); + if (Result != Live) + Result = SubResult; + } + return Result; + } + + if (const InsertValueInst *IV = dyn_cast(V)) { + if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex() && + IV->hasIndices()) + // The use we are examining is inserted into an aggregate. Our liveness + // depends on all uses of that aggregate, but if it is used as a return + // value, only index at which we were inserted counts. + RetValNum = *IV->idx_begin(); + + // Note that if we are used as the aggregate operand to the insertvalue, + // we don't change RetValNum, but do survey all our uses. + + Liveness Result = MaybeLive; + for (const Use &UU : IV->uses()) { + Result = surveyUse(&UU, MaybeLiveUses, RetValNum); + if (Result == Live) + break; } + return Result; + } - if (const auto *CB = dyn_cast(V)) { - const Function *F = CB->getCalledFunction(); - if (F) { - // Used in a direct call. + if (const auto *CB = dyn_cast(V)) { + const Function *F = CB->getCalledFunction(); + if (F) { + // Used in a direct call. - // The function argument is live if it is used as a bundle operand. - if (CB->isBundleOperand(U)) - return Live; + // The function argument is live if it is used as a bundle operand. + if (CB->isBundleOperand(U)) + return Live; - // Find the argument number. We know for sure that this use is an - // argument, since if it was the function argument this would be an - // indirect call and the we know can't be looking at a value of the - // label type (for the invoke instruction). - unsigned ArgNo = CB->getArgOperandNo(U); + // Find the argument number. We know for sure that this use is an + // argument, since if it was the function argument this would be an + // indirect call and that we know can't be looking at a value of the + // label type (for the invoke instruction). + unsigned ArgNo = CB->getArgOperandNo(U); - if (ArgNo >= F->getFunctionType()->getNumParams()) - // The value is passed in through a vararg! Must be live. - return Live; + if (ArgNo >= F->getFunctionType()->getNumParams()) + // The value is passed in through a vararg! Must be live. + return Live; - assert(CB->getArgOperand(ArgNo) == CB->getOperand(U->getOperandNo()) && - "Argument is not where we expected it"); + assert(CB->getArgOperand(ArgNo) == CB->getOperand(U->getOperandNo()) && + "Argument is not where we expected it"); - // Value passed to a normal call. It's only live when the corresponding - // argument to the called function turns out live. - RetOrArg Use = CreateArg(F, ArgNo); - return MarkIfNotLive(Use, MaybeLiveUses); - } + // Value passed to a normal call. It's only live when the corresponding + // argument to the called function turns out live. + RetOrArg Use = createArg(F, ArgNo); + return markIfNotLive(Use, MaybeLiveUses); } - // Used in any other way? Value must be live. - return Live; + } + // Used in any other way? Value must be live. + return Live; } -/// SurveyUses - This looks at all the uses of the given value +/// Looks at all the uses of the given value /// Returns the Liveness deduced from the uses of this value. /// /// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If /// the result is Live, MaybeLiveUses might be modified but its content should /// be ignored (since it might not be complete). DeadArgumentEliminationPass::Liveness -DeadArgumentEliminationPass::SurveyUses(const Value *V, +DeadArgumentEliminationPass::surveyUses(const Value *V, UseVector &MaybeLiveUses) { // Assume it's dead (which will only hold if there are no uses at all..). Liveness Result = MaybeLive; // Check each use. for (const Use &U : V->uses()) { - Result = SurveyUse(&U, MaybeLiveUses); + Result = surveyUse(&U, MaybeLiveUses); if (Result == Live) break; } return Result; } -// SurveyFunction - This performs the initial survey of the specified function, -// checking out whether or not it uses any of its incoming arguments or whether -// any callers use the return value. This fills in the LiveValues set and Uses -// map. -// -// We consider arguments of non-internal functions to be intrinsically alive as -// well as arguments to functions which have their "address taken". -void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { +/// Performs the initial survey of the specified function, checking out whether +/// it uses any of its incoming arguments or whether any callers use the return +/// value. This fills in the LiveValues set and Uses map. +/// +/// We consider arguments of non-internal functions to be intrinsically alive as +/// well as arguments to functions which have their "address taken". +void DeadArgumentEliminationPass::surveyFunction(const Function &F) { // Functions with inalloca/preallocated parameters are expecting args in a // particular register and memory layout. if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) || F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) { - MarkLive(F); + markLive(F); return; } @@ -499,11 +498,11 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { // otherwise rely on the frame layout in a way that this analysis will not // see. if (F.hasFnAttribute(Attribute::Naked)) { - MarkLive(F); + markLive(F); return; } - unsigned RetCount = NumRetVals(&F); + unsigned RetCount = numRetVals(&F); // Assume all return values are dead using RetVals = SmallVector; @@ -518,20 +517,10 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { RetUses MaybeLiveRetUses(RetCount); bool HasMustTailCalls = false; - - for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (const ReturnInst *RI = dyn_cast(BB->getTerminator())) { - if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType() - != F.getFunctionType()->getReturnType()) { - // We don't support old style multiple return values. - MarkLive(F); - return; - } - } - + for (const BasicBlock &BB : F) { // If we have any returns of `musttail` results - the signature can't // change - if (BB->getTerminatingMustTailCall() != nullptr) + if (BB.getTerminatingMustTailCall() != nullptr) HasMustTailCalls = true; } @@ -541,7 +530,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { } if (!F.hasLocalLinkage() && (!ShouldHackArguments || F.isIntrinsic())) { - MarkLive(F); + markLive(F); return; } @@ -559,8 +548,9 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { // If the function is PASSED IN as an argument, its address has been // taken. const auto *CB = dyn_cast(U.getUser()); - if (!CB || !CB->isCallee(&U)) { - MarkLive(F); + if (!CB || !CB->isCallee(&U) || + CB->getFunctionType() != F.getFunctionType()) { + markLive(F); return; } @@ -577,13 +567,13 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { continue; // Check all uses of the return value. - for (const Use &U : CB->uses()) { - if (ExtractValueInst *Ext = dyn_cast(U.getUser())) { + for (const Use &UU : CB->uses()) { + if (ExtractValueInst *Ext = dyn_cast(UU.getUser())) { // This use uses a part of our return value, survey the uses of // that part and store the results for this index only. unsigned Idx = *Ext->idx_begin(); if (RetValLiveness[Idx] != Live) { - RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]); + RetValLiveness[Idx] = surveyUses(Ext, MaybeLiveRetUses[Idx]); if (RetValLiveness[Idx] == Live) NumLiveRetVals++; } @@ -591,16 +581,16 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { // Used by something else than extractvalue. Survey, but assume that the // result applies to all sub-values. UseVector MaybeLiveAggregateUses; - if (SurveyUse(&U, MaybeLiveAggregateUses) == Live) { + if (surveyUse(&UU, MaybeLiveAggregateUses) == Live) { NumLiveRetVals = RetCount; RetValLiveness.assign(RetCount, Live); break; - } else { - for (unsigned Ri = 0; Ri != RetCount; ++Ri) { - if (RetValLiveness[Ri] != Live) - MaybeLiveRetUses[Ri].append(MaybeLiveAggregateUses.begin(), - MaybeLiveAggregateUses.end()); - } + } + + for (unsigned Ri = 0; Ri != RetCount; ++Ri) { + if (RetValLiveness[Ri] != Live) + MaybeLiveRetUses[Ri].append(MaybeLiveAggregateUses.begin(), + MaybeLiveAggregateUses.end()); } } } @@ -613,7 +603,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { // Now we've inspected all callers, record the liveness of our return values. for (unsigned Ri = 0; Ri != RetCount; ++Ri) - MarkValue(CreateRet(&F, Ri), RetValLiveness[Ri], MaybeLiveRetUses[Ri]); + markValue(createRet(&F, Ri), RetValLiveness[Ri], MaybeLiveRetUses[Ri]); LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Inspecting args for fn: " << F.getName() << "\n"); @@ -641,81 +631,77 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { } else { // See what the effect of this use is (recording any uses that cause // MaybeLive in MaybeLiveArgUses). - Result = SurveyUses(&*AI, MaybeLiveArgUses); + Result = surveyUses(&*AI, MaybeLiveArgUses); } // Mark the result. - MarkValue(CreateArg(&F, ArgI), Result, MaybeLiveArgUses); + markValue(createArg(&F, ArgI), Result, MaybeLiveArgUses); // Clear the vector again for the next iteration. MaybeLiveArgUses.clear(); } } -/// MarkValue - This function marks the liveness of RA depending on L. If L is -/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses, -/// such that RA will be marked live if any use in MaybeLiveUses gets marked -/// live later on. -void DeadArgumentEliminationPass::MarkValue(const RetOrArg &RA, Liveness L, +/// Marks the liveness of RA depending on L. If L is MaybeLive, it also takes +/// all uses in MaybeLiveUses and records them in Uses, such that RA will be +/// marked live if any use in MaybeLiveUses gets marked live later on. +void DeadArgumentEliminationPass::markValue(const RetOrArg &RA, Liveness L, const UseVector &MaybeLiveUses) { switch (L) { - case Live: - MarkLive(RA); - break; - case MaybeLive: - assert(!IsLive(RA) && "Use is already live!"); - for (const auto &MaybeLiveUse : MaybeLiveUses) { - if (IsLive(MaybeLiveUse)) { - // A use is live, so this value is live. - MarkLive(RA); - break; - } else { - // Note any uses of this value, so this value can be - // marked live whenever one of the uses becomes live. - Uses.insert(std::make_pair(MaybeLiveUse, RA)); - } + case Live: + markLive(RA); + break; + case MaybeLive: + assert(!isLive(RA) && "Use is already live!"); + for (const auto &MaybeLiveUse : MaybeLiveUses) { + if (isLive(MaybeLiveUse)) { + // A use is live, so this value is live. + markLive(RA); + break; } - break; + // Note any uses of this value, so this value can be + // marked live whenever one of the uses becomes live. + Uses.emplace(MaybeLiveUse, RA); + } + break; } } -/// MarkLive - Mark the given Function as alive, meaning that it cannot be -/// changed in any way. Additionally, -/// mark any values that are used as this function's parameters or by its return -/// values (according to Uses) live as well. -void DeadArgumentEliminationPass::MarkLive(const Function &F) { +/// Mark the given Function as alive, meaning that it cannot be changed in any +/// way. Additionally, mark any values that are used as this function's +/// parameters or by its return values (according to Uses) live as well. +void DeadArgumentEliminationPass::markLive(const Function &F) { LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Intrinsically live fn: " << F.getName() << "\n"); // Mark the function as live. LiveFunctions.insert(&F); // Mark all arguments as live. for (unsigned ArgI = 0, E = F.arg_size(); ArgI != E; ++ArgI) - PropagateLiveness(CreateArg(&F, ArgI)); + propagateLiveness(createArg(&F, ArgI)); // Mark all return values as live. - for (unsigned Ri = 0, E = NumRetVals(&F); Ri != E; ++Ri) - PropagateLiveness(CreateRet(&F, Ri)); + for (unsigned Ri = 0, E = numRetVals(&F); Ri != E; ++Ri) + propagateLiveness(createRet(&F, Ri)); } -/// MarkLive - Mark the given return value or argument as live. Additionally, -/// mark any values that are used by this value (according to Uses) live as -/// well. -void DeadArgumentEliminationPass::MarkLive(const RetOrArg &RA) { - if (IsLive(RA)) +/// Mark the given return value or argument as live. Additionally, mark any +/// values that are used by this value (according to Uses) live as well. +void DeadArgumentEliminationPass::markLive(const RetOrArg &RA) { + if (isLive(RA)) return; // Already marked Live. LiveValues.insert(RA); LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Marking " << RA.getDescription() << " live\n"); - PropagateLiveness(RA); + propagateLiveness(RA); } -bool DeadArgumentEliminationPass::IsLive(const RetOrArg &RA) { +bool DeadArgumentEliminationPass::isLive(const RetOrArg &RA) { return LiveFunctions.count(RA.F) || LiveValues.count(RA); } -/// PropagateLiveness - Given that RA is a live value, propagate it's liveness -/// to any other values it uses (according to Uses). -void DeadArgumentEliminationPass::PropagateLiveness(const RetOrArg &RA) { +/// Given that RA is a live value, propagate it's liveness to any other values +/// it uses (according to Uses). +void DeadArgumentEliminationPass::propagateLiveness(const RetOrArg &RA) { // We don't use upper_bound (or equal_range) here, because our recursive call // to ourselves is likely to cause the upper_bound (which is the first value // not belonging to RA) to become erased and the iterator invalidated. @@ -723,18 +709,17 @@ void DeadArgumentEliminationPass::PropagateLiveness(const RetOrArg &RA) { UseMap::iterator E = Uses.end(); UseMap::iterator I; for (I = Begin; I != E && I->first == RA; ++I) - MarkLive(I->second); + markLive(I->second); // Erase RA from the Uses map (from the lower bound to wherever we ended up // after the loop). Uses.erase(Begin, I); } -// RemoveDeadStuffFromFunction - Remove any arguments and return values from F -// that are not in LiveValues. Transform the function and all of the callees of -// the function to not have these arguments and return values. -// -bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { +/// Remove any arguments and return values from F that are not in LiveValues. +/// Transform the function and all the callees of the function to not have these +/// arguments and return values. +bool DeadArgumentEliminationPass::removeDeadStuffFromFunction(Function *F) { // Don't modify fully live functions if (LiveFunctions.count(F)) return false; @@ -742,7 +727,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // Start by computing a new prototype for the function, which is the same as // the old function, but has fewer arguments and a different return type. FunctionType *FTy = F->getFunctionType(); - std::vector Params; + std::vector Params; // Keep track of if we have a live 'returned' argument bool HasLiveReturnedArg = false; @@ -759,7 +744,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { unsigned ArgI = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgI) { - RetOrArg Arg = CreateArg(F, ArgI); + RetOrArg Arg = createArg(F, ArgI); if (LiveValues.erase(Arg)) { Params.push_back(I->getType()); ArgAlive[ArgI] = true; @@ -776,11 +761,11 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // Find out the new return value. Type *RetTy = FTy->getReturnType(); Type *NRetTy = nullptr; - unsigned RetCount = NumRetVals(F); + unsigned RetCount = numRetVals(F); // -1 means unused, other numbers are the new index SmallVector NewRetIdxs(RetCount, -1); - std::vector RetTypes; + std::vector RetTypes; // If there is a function with a live 'returned' argument but a dead return // value, then there are two possible actions: @@ -792,9 +777,9 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // It's not clear in the general case which option is more profitable because, // even in the absence of explicit uses of the return value, code generation // is free to use the 'returned' attribute to do things like eliding - // save/restores of registers across calls. Whether or not this happens is - // target and ABI-specific as well as depending on the amount of register - // pressure, so there's no good way for an IR-level pass to figure this out. + // save/restores of registers across calls. Whether this happens is target and + // ABI-specific as well as depending on the amount of register pressure, so + // there's no good way for an IR-level pass to figure this out. // // Fortunately, the only places where 'returned' is currently generated by // the FE are places where 'returned' is basically free and almost always a @@ -806,7 +791,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { } else { // Look at each of the original return values individually. for (unsigned Ri = 0; Ri != RetCount; ++Ri) { - RetOrArg Ret = CreateRet(F, Ri); + RetOrArg Ret = createRet(F, Ri); if (LiveValues.erase(Ret)) { RetTypes.push_back(getRetComponentType(F, Ri)); NewRetIdxs[Ri] = RetTypes.size() - 1; @@ -879,9 +864,9 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); - // Loop over all of the callers of the function, transforming the call sites - // to pass in a smaller number of arguments into the new function. - std::vector Args; + // Loop over all the callers of the function, transforming the call sites to + // pass in a smaller number of arguments into the new function. + std::vector Args; while (!F->use_empty()) { CallBase &CB = cast(*F->user_back()); @@ -896,7 +881,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // Declare these outside of the loops, so we can reuse them for the second // loop, which loops the varargs. - auto I = CB.arg_begin(); + auto *I = CB.arg_begin(); unsigned Pi = 0; // Loop over those operands, corresponding to the normal arguments to the // original function, and add those that are still alive. @@ -909,11 +894,11 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // If the return type has changed, then get rid of 'returned' on the // call site. The alternative is to make all 'returned' attributes on // call sites keep the return value alive just like 'returned' - // attributes on function declaration but it's less clearly a win and + // attributes on function declaration, but it's less clearly a win and // this is not an expected case anyway ArgAttrVec.push_back(AttributeSet::get( - F->getContext(), - AttrBuilder(F->getContext(), Attrs).removeAttribute(Attribute::Returned))); + F->getContext(), AttrBuilder(F->getContext(), Attrs) + .removeAttribute(Attribute::Returned))); } else { // Otherwise, use the original attributes. ArgAttrVec.push_back(Attrs); @@ -921,7 +906,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { } // Push any varargs arguments on the list. Don't forget their attributes. - for (auto E = CB.arg_end(); I != E; ++I, ++Pi) { + for (auto *E = CB.arg_end(); I != E; ++I, ++Pi) { Args.push_back(*I); ArgAttrVec.push_back(CallPAL.getParamAttrs(Pi)); } @@ -934,8 +919,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { AttributeSet FnAttrs = CallPAL.getFnAttrs().removeAttribute( F->getContext(), Attribute::AllocSize); - AttributeList NewCallPAL = AttributeList::get( - F->getContext(), FnAttrs, RetAttrs, ArgAttrVec); + AttributeList NewCallPAL = + AttributeList::get(F->getContext(), FnAttrs, RetAttrs, ArgAttrVec); SmallVector OpBundles; CB.getOperandBundlesAsDefs(OpBundles); @@ -961,10 +946,10 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { CB.replaceAllUsesWith(NewCB); NewCB->takeName(&CB); } else if (NewCB->getType()->isVoidTy()) { - // If the return value is dead, replace any uses of it with undef + // If the return value is dead, replace any uses of it with poison // (any non-debug value uses will get removed later on). if (!CB.getType()->isX86_MMXTy()) - CB.replaceAllUsesWith(UndefValue::get(CB.getType())); + CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); } else { assert((RetTy->isStructTy() || RetTy->isArrayTy()) && "Return type changed, but not into a void. The old return type" @@ -980,8 +965,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // with all the uses, we will just rebuild it using extract/insertvalue // chaining and let instcombine clean that up. // - // Start out building up our return value from undef - Value *RetVal = UndefValue::get(RetTy); + // Start out building up our return value from poison + Value *RetVal = PoisonValue::get(RetTy); for (unsigned Ri = 0; Ri != RetCount; ++Ri) if (NewRetIdxs[Ri] != -1) { Value *V; @@ -1026,10 +1011,10 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { I2->takeName(&*I); ++I2; } else { - // If this argument is dead, replace any uses of it with undef + // If this argument is dead, replace any uses of it with poison // (any non-debug value uses will get removed later on). if (!I->getType()->isX86_MMXTy()) - I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); } // If we change the return value of the function we must rewrite any return @@ -1048,8 +1033,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // This does generate messy code, but we'll let it to instcombine to // clean that up. Value *OldRet = RI->getOperand(0); - // Start out building up our return value from undef - RetVal = UndefValue::get(NRetTy); + // Start out building up our return value from poison + RetVal = PoisonValue::get(NRetTy); for (unsigned RetI = 0; RetI != RetCount; ++RetI) if (NewRetIdxs[RetI] != -1) { Value *EV = IRB.CreateExtractValue(OldRet, RetI, "oldret"); @@ -1074,12 +1059,22 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { BB.getInstList().erase(RI); } - // Clone metadatas from the old function, including debug info descriptor. + // Clone metadata from the old function, including debug info descriptor. SmallVector, 1> MDs; F->getAllMetadata(MDs); for (auto MD : MDs) NF->addMetadata(MD.first, *MD.second); + // If either the return value(s) or argument(s) are removed, then probably the + // function does not follow standard calling conventions anymore. Hence, add + // DW_CC_nocall to DISubroutineType to inform debugger that it may not be safe + // to call this function or try to interpret the return value. + if (NFTy != FTy && NF->getSubprogram()) { + DISubprogram *SP = NF->getSubprogram(); + auto Temp = SP->getType()->cloneWithCC(llvm::dwarf::DW_CC_nocall); + SP->replaceType(MDNode::replaceWithPermanent(std::move(Temp))); + } + // Now that the old function is dead, delete it. F->eraseFromParent(); @@ -1097,26 +1092,25 @@ PreservedAnalyses DeadArgumentEliminationPass::run(Module &M, LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Deleting dead varargs\n"); for (Function &F : llvm::make_early_inc_range(M)) if (F.getFunctionType()->isVarArg()) - Changed |= DeleteDeadVarargs(F); + Changed |= deleteDeadVarargs(F); - // Second phase:loop through the module, determining which arguments are live. - // We assume all arguments are dead unless proven otherwise (allowing us to - // determine that dead arguments passed into recursive functions are dead). - // + // Second phase: Loop through the module, determining which arguments are + // live. We assume all arguments are dead unless proven otherwise (allowing us + // to determine that dead arguments passed into recursive functions are dead). LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Determining liveness\n"); for (auto &F : M) - SurveyFunction(F); + surveyFunction(F); // Now, remove all dead arguments and return values from each function in // turn. We use make_early_inc_range here because functions will probably get // removed (i.e. replaced by new ones). for (Function &F : llvm::make_early_inc_range(M)) - Changed |= RemoveDeadStuffFromFunction(&F); + Changed |= removeDeadStuffFromFunction(&F); // Finally, look for any unused parameters in functions with non-local - // linkage and replace the passed in parameters with undef. + // linkage and replace the passed in parameters with poison. for (auto &F : M) - Changed |= RemoveDeadArgumentsFromCallers(F); + Changed |= removeDeadArgumentsFromCallers(F); if (!Changed) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/IPO/ExtractGV.cpp b/llvm/lib/Transforms/IPO/ExtractGV.cpp index 387f114f6ffa..84280781ee70 100644 --- a/llvm/lib/Transforms/IPO/ExtractGV.cpp +++ b/llvm/lib/Transforms/IPO/ExtractGV.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SetVector.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Transforms/IPO.h" diff --git a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp index 16d00a0c89e1..b10c2ea13469 100644 --- a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp @@ -8,9 +8,9 @@ #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 213a998d5bba..49077f92884f 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -30,7 +30,6 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -45,6 +44,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" @@ -69,6 +69,7 @@ using namespace llvm; #define DEBUG_TYPE "function-attrs" +STATISTIC(NumArgMemOnly, "Number of functions marked argmemonly"); STATISTIC(NumReadNone, "Number of functions marked readnone"); STATISTIC(NumReadOnly, "Number of functions marked readonly"); STATISTIC(NumWriteOnly, "Number of functions marked writeonly"); @@ -121,28 +122,28 @@ using SCCNodeSet = SmallSetVector; /// result will be based only on AA results for the function declaration; it /// will be assumed that some other (perhaps less optimized) version of the /// function may be selected at link time. -static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, - AAResults &AAR, - const SCCNodeSet &SCCNodes) { +static FunctionModRefBehavior +checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR, + const SCCNodeSet &SCCNodes) { FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F); if (MRB == FMRB_DoesNotAccessMemory) // Already perfect! - return MAK_ReadNone; + return MRB; - if (!ThisBody) { - if (AliasAnalysis::onlyReadsMemory(MRB)) - return MAK_ReadOnly; - - if (AliasAnalysis::onlyWritesMemory(MRB)) - return MAK_WriteOnly; - - // Conservatively assume it reads and writes to memory. - return MAK_MayWrite; - } + if (!ThisBody) + return MRB; // Scan the function body for instructions that may read or write memory. bool ReadsMemory = false; bool WritesMemory = false; + // Track if the function accesses memory not based on pointer arguments or + // allocas. + bool AccessesNonArgsOrAlloca = false; + // Returns true if Ptr is not based on a function argument. + auto IsArgumentOrAlloca = [](const Value *Ptr) { + const Value *UO = getUnderlyingObject(Ptr); + return isa(UO) || isa(UO); + }; for (Instruction &I : instructions(F)) { // Some instructions can be ignored even if they read or write memory. // Detect these now, skipping to the next instruction if one is found. @@ -175,6 +176,7 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, // If it reads, note it. if (isRefSet(MRI)) ReadsMemory = true; + AccessesNonArgsOrAlloca = true; continue; } @@ -187,12 +189,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, MemoryLocation Loc = MemoryLocation::getBeforeOrAfter(Arg, I.getAAMetadata()); - // Skip accesses to local or constant memory as they don't impact the // externally visible mod/ref behavior. if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; + AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr); + if (isModSet(MRI)) // Writes non-local memory. WritesMemory = true; @@ -202,24 +205,29 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, } continue; } else if (LoadInst *LI = dyn_cast(&I)) { + MemoryLocation Loc = MemoryLocation::get(LI); // Ignore non-volatile loads from local memory. (Atomic is okay here.) - if (!LI->isVolatile()) { - MemoryLocation Loc = MemoryLocation::get(LI); - if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) - continue; - } + if (!LI->isVolatile() && + AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr); } else if (StoreInst *SI = dyn_cast(&I)) { + MemoryLocation Loc = MemoryLocation::get(SI); // Ignore non-volatile stores to local memory. (Atomic is okay here.) - if (!SI->isVolatile()) { - MemoryLocation Loc = MemoryLocation::get(SI); - if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) - continue; - } + if (!SI->isVolatile() && + AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr); } else if (VAArgInst *VI = dyn_cast(&I)) { // Ignore vaargs on local memory. MemoryLocation Loc = MemoryLocation::get(VI); if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; + AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr); + } else { + // If AccessesNonArgsOrAlloca has not been updated above, set it + // conservatively. + AccessesNonArgsOrAlloca |= I.mayReadOrWriteMemory(); } // Any remaining instructions need to be taken seriously! Check if they @@ -232,61 +240,74 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, ReadsMemory |= I.mayReadFromMemory(); } - if (WritesMemory) { - if (!ReadsMemory) - return MAK_WriteOnly; - else - return MAK_MayWrite; - } - - return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone; + if (!WritesMemory && !ReadsMemory) + return FMRB_DoesNotAccessMemory; + + FunctionModRefBehavior Result = FunctionModRefBehavior(FMRL_Anywhere); + if (!AccessesNonArgsOrAlloca) + Result = FunctionModRefBehavior(FMRL_ArgumentPointees); + if (WritesMemory) + Result = FunctionModRefBehavior(Result | static_cast(ModRefInfo::Mod)); + if (ReadsMemory) + Result = FunctionModRefBehavior(Result | static_cast(ModRefInfo::Ref)); + return Result; } -MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F, - AAResults &AAR) { +FunctionModRefBehavior llvm::computeFunctionBodyMemoryAccess(Function &F, + AAResults &AAR) { return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}); } -/// Deduce readonly/readnone attributes for the SCC. +/// Deduce readonly/readnone/writeonly attributes for the SCC. template -static void addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, - SmallSet &Changed) { +static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, + SmallSet &Changed) { // Check if any of the functions in the SCC read or write memory. If they // write memory then they can't be marked readnone or readonly. bool ReadsMemory = false; bool WritesMemory = false; + // Check if all functions only access memory through their arguments. + bool ArgMemOnly = true; for (Function *F : SCCNodes) { // Call the callable parameter to look up AA results for this function. AAResults &AAR = AARGetter(*F); - // Non-exact function definitions may not be selected at link time, and an // alternative version that writes to memory may be selected. See the // comment on GlobalValue::isDefinitionExact for more details. - switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(), - AAR, SCCNodes)) { - case MAK_MayWrite: + FunctionModRefBehavior FMRB = + checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes); + if (FMRB == FMRB_DoesNotAccessMemory) + continue; + ModRefInfo MR = createModRefInfo(FMRB); + ReadsMemory |= isRefSet(MR); + WritesMemory |= isModSet(MR); + ArgMemOnly &= AliasAnalysis::onlyAccessesArgPointees(FMRB); + // Reached neither readnone, readonly, writeonly nor argmemonly can be + // inferred. Exit. + if (ReadsMemory && WritesMemory && !ArgMemOnly) return; - case MAK_ReadOnly: - ReadsMemory = true; - break; - case MAK_WriteOnly: - WritesMemory = true; - break; - case MAK_ReadNone: - // Nothing to do! - break; - } } - // If the SCC contains both functions that read and functions that write, then - // we cannot add readonly attributes. - if (ReadsMemory && WritesMemory) - return; - - // Success! Functions in this SCC do not access memory, or only read memory. - // Give them the appropriate attribute. + assert((!ReadsMemory || !WritesMemory || ArgMemOnly) && + "no memory attributes can be added for this SCC, should have exited " + "earlier"); + // Success! Functions in this SCC do not access memory, only read memory, + // only write memory, or only access memory through its arguments. Give them + // the appropriate attribute. for (Function *F : SCCNodes) { + // If possible add argmemonly attribute to F, if it accesses memory. + if (ArgMemOnly && !F->onlyAccessesArgMemory() && + (ReadsMemory || WritesMemory)) { + NumArgMemOnly++; + F->addFnAttr(Attribute::ArgMemOnly); + Changed.insert(F); + } + + // The SCC contains functions both writing and reading from memory. We + // cannot add readonly or writeonline attributes. + if (ReadsMemory && WritesMemory) + continue; if (F->doesNotAccessMemory()) // Already perfect! continue; @@ -1614,6 +1635,26 @@ static bool basicBlockCanReturn(BasicBlock &BB) { return none_of(BB, instructionDoesNotReturn); } +// FIXME: this doesn't handle recursion. +static bool canReturn(Function &F) { + SmallVector Worklist; + SmallPtrSet Visited; + + Visited.insert(&F.front()); + Worklist.push_back(&F.front()); + + do { + BasicBlock *BB = Worklist.pop_back_val(); + if (basicBlockCanReturn(*BB)) + return true; + for (BasicBlock *Succ : successors(BB)) + if (Visited.insert(Succ).second) + Worklist.push_back(Succ); + } while (!Worklist.empty()); + + return false; +} + // Set the noreturn function attribute if possible. static void addNoReturnAttrs(const SCCNodeSet &SCCNodes, SmallSet &Changed) { @@ -1622,9 +1663,7 @@ static void addNoReturnAttrs(const SCCNodeSet &SCCNodes, F->doesNotReturn()) continue; - // The function can return if any basic blocks can return. - // FIXME: this doesn't handle recursion or unreachable blocks. - if (none_of(*F, basicBlockCanReturn)) { + if (!canReturn(*F)) { F->setDoesNotReturn(); Changed.insert(F); } @@ -1792,7 +1831,7 @@ deriveAttrsInPostOrder(ArrayRef Functions, AARGetterT &&AARGetter) { SmallSet Changed; addArgumentReturnedAttrs(Nodes.SCCNodes, Changed); - addReadAttrs(Nodes.SCCNodes, AARGetter, Changed); + addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed); addArgumentAttrs(Nodes.SCCNodes, Changed); inferConvergent(Nodes.SCCNodes, Changed); addNoReturnAttrs(Nodes.SCCNodes, Changed); @@ -1896,6 +1935,7 @@ struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass { char PostOrderFunctionAttrsLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "function-attrs", "Deduce function attributes", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "function-attrs", @@ -1975,12 +2015,13 @@ static bool addNoRecurseAttrsTopDown(Function &F) { // this function could be recursively (indirectly) called. Note that this // also detects if F is directly recursive as F is not yet marked as // a norecurse function. - for (auto *U : F.users()) { - auto *I = dyn_cast(U); + for (auto &U : F.uses()) { + auto *I = dyn_cast(U.getUser()); if (!I) return false; CallBase *CB = dyn_cast(I); - if (!CB || !CB->getParent()->getParent()->doesNotRecurse()) + if (!CB || !CB->isCallee(&U) || + !CB->getParent()->getParent()->doesNotRecurse()) return false; } F.setDoesNotRecurse(); diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index d9b43109f629..56e2df14ff38 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/Constants.h" @@ -33,8 +32,6 @@ #include "llvm/IRReader/IRReader.h" #include "llvm/InitializePasses.h" #include "llvm/Linker/IRMover.h" -#include "llvm/Object/ModuleSymbolTable.h" -#include "llvm/Object/SymbolicFile.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -1112,12 +1109,13 @@ void llvm::thinLTOFinalizeInModule(Module &TheModule, llvm_unreachable("Expected GV to be converted"); } else { // If all copies of the original symbol had global unnamed addr and - // linkonce_odr linkage, it should be an auto hide symbol. In that case - // the thin link would have marked it as CanAutoHide. Add hidden visibility - // to the symbol to preserve the property. + // linkonce_odr linkage, or if all of them had local unnamed addr linkage + // and are constants, then it should be an auto hide symbol. In that case + // the thin link would have marked it as CanAutoHide. Add hidden + // visibility to the symbol to preserve the property. if (NewLinkage == GlobalValue::WeakODRLinkage && GS->second->canAutoHide()) { - assert(GV.hasLinkOnceODRLinkage() && GV.hasGlobalUnnamedAddr()); + assert(GV.canBeOmittedFromSymbolTable()); GV.setVisibility(GlobalValue::HiddenVisibility); } @@ -1330,10 +1328,9 @@ Expected FunctionImporter::importFunctions( << " from " << SrcModule->getSourceFileName() << "\n"; } - if (Error Err = Mover.move( - std::move(SrcModule), GlobalsToImport.getArrayRef(), - [](GlobalValue &, IRMover::ValueAdder) {}, - /*IsPerformingImport=*/true)) + if (Error Err = Mover.move(std::move(SrcModule), + GlobalsToImport.getArrayRef(), nullptr, + /*IsPerformingImport=*/true)) report_fatal_error(Twine("Function Import: link error: ") + toString(std::move(Err))); diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 6c3cc3914337..dafd0dc865a2 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -19,11 +19,8 @@ // Current limitations: // - It does not yet handle integer ranges. We do support "literal constants", // but that's off by default under an option. -// - Only 1 argument per function is specialised, // - The cost-model could be further looked into (it mainly focuses on inlining // benefits), -// - We are not yet caching analysis results, but profiling and checking where -// extra compile time is spent didn't suggest this to be a problem. // // Ideas: // - With a function specialization attribute for arguments, we could have @@ -49,15 +46,16 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueLattice.h" +#include "llvm/Analysis/ValueLatticeUtils.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/SCCPSolver.h" #include "llvm/Transforms/Utils/SizeOpts.h" #include @@ -98,8 +96,13 @@ static cl::opt SpecializeOnAddresses( "func-specialization-on-address", cl::init(false), cl::Hidden, cl::desc("Enable function specialization on the address of global values")); -// TODO: This needs checking to see the impact on compile-times, which is why -// this is off by default for now. +// Disabled by default as it can significantly increase compilation times. +// Running nikic's compile time tracker on x86 with instruction count as the +// metric shows 3-4% regression for SPASS while being neutral for all other +// benchmarks of the llvm test suite. +// +// https://llvm-compile-time-tracker.com +// https://github.com/nikic/llvm-compile-time-tracker static cl::opt EnableSpecializationForLiteralConstant( "function-specialization-for-literal-constant", cl::init(false), cl::Hidden, cl::desc("Enable specialization of functions that take a literal constant " @@ -108,24 +111,18 @@ static cl::opt EnableSpecializationForLiteralConstant( namespace { // Bookkeeping struct to pass data from the analysis and profitability phase // to the actual transform helper functions. -struct ArgInfo { - Function *Fn; // The function to perform specialisation on. - Argument *Arg; // The Formal argument being analysed. - Constant *Const; // A corresponding actual constant argument. - InstructionCost Gain; // Profitability: Gain = Bonus - Cost. - - // Flag if this will be a partial specialization, in which case we will need - // to keep the original function around in addition to the added - // specializations. - bool Partial = false; - - ArgInfo(Function *F, Argument *A, Constant *C, InstructionCost G) - : Fn(F), Arg(A), Const(C), Gain(G){}; +struct SpecializationInfo { + SmallVector Args; // Stores the {formal,actual} argument pairs. + InstructionCost Gain; // Profitability: Gain = Bonus - Cost. }; } // Anonymous namespace using FuncList = SmallVectorImpl; -using ConstList = SmallVectorImpl; +using CallArgBinding = std::pair; +using CallSpecBinding = std::pair; +// We are using MapVector because it guarantees deterministic iteration +// order across executions. +using SpecializationMap = SmallMapVector; // Helper to check if \p LV is either a constant or a constant // range with a single element. This should cover exactly the same cases as the @@ -204,41 +201,45 @@ static Constant *getConstantStackValue(CallInst *Call, Value *Val, // ret void // } // -static void constantArgPropagation(FuncList &WorkList, - Module &M, SCCPSolver &Solver) { +static void constantArgPropagation(FuncList &WorkList, Module &M, + SCCPSolver &Solver) { // Iterate over the argument tracked functions see if there // are any new constant values for the call instruction via // stack variables. for (auto *F : WorkList) { - // TODO: Generalize for any read only arguments. - if (F->arg_size() != 1) - continue; - - auto &Arg = *F->arg_begin(); - if (!Arg.onlyReadsMemory() || !Arg.getType()->isPointerTy()) - continue; for (auto *User : F->users()) { + auto *Call = dyn_cast(User); if (!Call) - break; - auto *ArgOp = Call->getArgOperand(0); - auto *ArgOpType = ArgOp->getType(); - auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver); - if (!ConstVal) - break; + continue; - Value *GV = new GlobalVariable(M, ConstVal->getType(), true, - GlobalValue::InternalLinkage, ConstVal, - "funcspec.arg"); + bool Changed = false; + for (const Use &U : Call->args()) { + unsigned Idx = Call->getArgOperandNo(&U); + Value *ArgOp = Call->getArgOperand(Idx); + Type *ArgOpType = ArgOp->getType(); - if (ArgOpType != ConstVal->getType()) - GV = ConstantExpr::getBitCast(cast(GV), ArgOp->getType()); + if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy()) + continue; - Call->setArgOperand(0, GV); + auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver); + if (!ConstVal) + continue; + + Value *GV = new GlobalVariable(M, ConstVal->getType(), true, + GlobalValue::InternalLinkage, ConstVal, + "funcspec.arg"); + if (ArgOpType != ConstVal->getType()) + GV = ConstantExpr::getBitCast(cast(GV), ArgOpType); + + Call->setArgOperand(Idx, GV); + Changed = true; + } // Add the changed CallInst to Solver Worklist - Solver.visitCall(*Call); + if (Changed) + Solver.visitCall(*Call); } } } @@ -275,7 +276,10 @@ class FunctionSpecializer { std::function GetTTI; std::function GetTLI; - SmallPtrSet SpecializedFuncs; + SmallPtrSet SpecializedFuncs; + SmallPtrSet FullySpecialized; + SmallVector ReplacedWithConstant; + DenseMap FunctionMetrics; public: FunctionSpecializer(SCCPSolver &Solver, @@ -284,42 +288,66 @@ public: std::function GetTLI) : Solver(Solver), GetAC(GetAC), GetTTI(GetTTI), GetTLI(GetTLI) {} + ~FunctionSpecializer() { + // Eliminate dead code. + removeDeadInstructions(); + removeDeadFunctions(); + } + /// Attempt to specialize functions in the module to enable constant /// propagation across function boundaries. /// /// \returns true if at least one function is specialized. - bool - specializeFunctions(FuncList &FuncDecls, - FuncList &CurrentSpecializations) { + bool specializeFunctions(FuncList &Candidates, FuncList &WorkList) { bool Changed = false; - for (auto *F : FuncDecls) { - if (!isCandidateFunction(F, CurrentSpecializations)) + for (auto *F : Candidates) { + if (!isCandidateFunction(F)) continue; auto Cost = getSpecializationCost(F); if (!Cost.isValid()) { LLVM_DEBUG( - dbgs() << "FnSpecialization: Invalid specialisation cost.\n"); + dbgs() << "FnSpecialization: Invalid specialization cost.\n"); continue; } - auto ConstArgs = calculateGains(F, Cost); - if (ConstArgs.empty()) { - LLVM_DEBUG(dbgs() << "FnSpecialization: no possible constants found\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " + << F->getName() << " is " << Cost << "\n"); + + SmallVector Specializations; + if (!calculateGains(F, Cost, Specializations)) { + LLVM_DEBUG(dbgs() << "FnSpecialization: No possible constants found\n"); continue; } - for (auto &CA : ConstArgs) { - specializeFunction(CA, CurrentSpecializations); - Changed = true; - } + Changed = true; + for (auto &Entry : Specializations) + specializeFunction(F, Entry.second, WorkList); } - updateSpecializedFuncs(FuncDecls, CurrentSpecializations); + updateSpecializedFuncs(Candidates, WorkList); NumFuncSpecialized += NbFunctionsSpecialized; return Changed; } + void removeDeadInstructions() { + for (auto *I : ReplacedWithConstant) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead instruction " << *I + << "\n"); + I->eraseFromParent(); + } + ReplacedWithConstant.clear(); + } + + void removeDeadFunctions() { + for (auto *F : FullySpecialized) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function " + << F->getName() << "\n"); + F->eraseFromParent(); + } + FullySpecialized.clear(); + } + bool tryToReplaceWithConstant(Value *V) { if (!V->getType()->isSingleValueType() || isa(V) || V->user_empty()) @@ -330,17 +358,26 @@ public: return false; auto *Const = isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType()); - V->replaceAllUsesWith(Const); - for (auto *U : Const->users()) + LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing " << *V + << "\nFnSpecialization: with " << *Const << "\n"); + + // Record uses of V to avoid visiting irrelevant uses of const later. + SmallVector UseInsts; + for (auto *U : V->users()) if (auto *I = dyn_cast(U)) if (Solver.isBlockExecutable(I->getParent())) - Solver.visit(I); + UseInsts.push_back(I); + + V->replaceAllUsesWith(Const); + + for (auto *I : UseInsts) + Solver.visit(I); // Remove the instruction from Block and Solver. if (auto *I = dyn_cast(V)) { if (I->isSafeToRemove()) { - I->eraseFromParent(); + ReplacedWithConstant.push_back(I); Solver.removeLatticeValueFor(I); } } @@ -352,92 +389,108 @@ private: // also in the cost model. unsigned NbFunctionsSpecialized = 0; + // Compute the code metrics for function \p F. + CodeMetrics &analyzeFunction(Function *F) { + auto I = FunctionMetrics.insert({F, CodeMetrics()}); + CodeMetrics &Metrics = I.first->second; + if (I.second) { + // The code metrics were not cached. + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); + for (BasicBlock &BB : *F) + Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function " + << F->getName() << " is " << Metrics.NumInsts + << " instructions\n"); + } + return Metrics; + } + /// Clone the function \p F and remove the ssa_copy intrinsics added by /// the SCCPSolver in the cloned version. - Function *cloneCandidateFunction(Function *F) { - ValueToValueMapTy EmptyMap; - Function *Clone = CloneFunction(F, EmptyMap); + Function *cloneCandidateFunction(Function *F, ValueToValueMapTy &Mappings) { + Function *Clone = CloneFunction(F, Mappings); removeSSACopy(*Clone); return Clone; } - /// This function decides whether it's worthwhile to specialize function \p F - /// based on the known constant values its arguments can take on, i.e. it - /// calculates a gain and returns a list of actual arguments that are deemed - /// profitable to specialize. Specialization is performed on the first - /// interesting argument. Specializations based on additional arguments will - /// be evaluated on following iterations of the main IPSCCP solve loop. - SmallVector calculateGains(Function *F, InstructionCost Cost) { - SmallVector Worklist; + /// This function decides whether it's worthwhile to specialize function + /// \p F based on the known constant values its arguments can take on. It + /// only discovers potential specialization opportunities without actually + /// applying them. + /// + /// \returns true if any specializations have been found. + bool calculateGains(Function *F, InstructionCost Cost, + SmallVectorImpl &WorkList) { + SpecializationMap Specializations; // Determine if we should specialize the function based on the values the // argument can take on. If specialization is not profitable, we continue // on to the next argument. for (Argument &FormalArg : F->args()) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing arg: " - << FormalArg.getName() << "\n"); // Determine if this argument is interesting. If we know the argument can - // take on any constant values, they are collected in Constants. If the - // argument can only ever equal a constant value in Constants, the - // function will be completely specialized, and the IsPartial flag will - // be set to false by isArgumentInteresting (that function only adds - // values to the Constants list that are deemed profitable). - bool IsPartial = true; - SmallVector ActualConstArg; - if (!isArgumentInteresting(&FormalArg, ActualConstArg, IsPartial)) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Argument is not interesting\n"); + // take on any constant values, they are collected in Constants. + SmallVector ActualArgs; + if (!isArgumentInteresting(&FormalArg, ActualArgs)) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Argument " + << FormalArg.getNameOrAsOperand() + << " is not interesting\n"); continue; } - for (auto *ActualArg : ActualConstArg) { - InstructionCost Gain = - ForceFunctionSpecialization - ? 1 - : getSpecializationBonus(&FormalArg, ActualArg) - Cost; - - if (Gain <= 0) - continue; - Worklist.push_back({F, &FormalArg, ActualArg, Gain}); - } + for (const auto &Entry : ActualArgs) { + CallBase *Call = Entry.first; + Constant *ActualArg = Entry.second; - if (Worklist.empty()) - continue; + auto I = Specializations.insert({Call, SpecializationInfo()}); + SpecializationInfo &S = I.first->second; - // Sort the candidates in descending order. - llvm::stable_sort(Worklist, [](const ArgInfo &L, const ArgInfo &R) { - return L.Gain > R.Gain; - }); - - // Truncate the worklist to 'MaxClonesThreshold' candidates if - // necessary. - if (Worklist.size() > MaxClonesThreshold) { - LLVM_DEBUG(dbgs() << "FnSpecialization: number of candidates exceed " - << "the maximum number of clones threshold.\n" - << "Truncating worklist to " << MaxClonesThreshold - << " candidates.\n"); - Worklist.erase(Worklist.begin() + MaxClonesThreshold, - Worklist.end()); + if (I.second) + S.Gain = ForceFunctionSpecialization ? 1 : 0 - Cost; + if (!ForceFunctionSpecialization) + S.Gain += getSpecializationBonus(&FormalArg, ActualArg); + S.Args.push_back({&FormalArg, ActualArg}); } + } - if (IsPartial || Worklist.size() < ActualConstArg.size()) - for (auto &ActualArg : Worklist) - ActualArg.Partial = true; - - LLVM_DEBUG(dbgs() << "Sorted list of candidates by gain:\n"; - for (auto &C - : Worklist) { - dbgs() << "- Function = " << C.Fn->getName() << ", "; - dbgs() << "FormalArg = " << C.Arg->getName() << ", "; - dbgs() << "ActualArg = " << C.Const->getName() << ", "; - dbgs() << "Gain = " << C.Gain << "\n"; - }); - - // FIXME: Only one argument per function. - break; + // Remove unprofitable specializations. + Specializations.remove_if( + [](const auto &Entry) { return Entry.second.Gain <= 0; }); + + // Clear the MapVector and return the underlying vector. + WorkList = Specializations.takeVector(); + + // Sort the candidates in descending order. + llvm::stable_sort(WorkList, [](const auto &L, const auto &R) { + return L.second.Gain > R.second.Gain; + }); + + // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary. + if (WorkList.size() > MaxClonesThreshold) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " + << "the maximum number of clones threshold.\n" + << "FnSpecialization: Truncating worklist to " + << MaxClonesThreshold << " candidates.\n"); + WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end()); } - return Worklist; + + LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function " + << F->getName() << "\n"; + for (const auto &Entry + : WorkList) { + dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain + << "\n"; + for (const ArgInfo &Arg : Entry.second.Args) + dbgs() << "FnSpecialization: FormalArg = " + << Arg.Formal->getNameOrAsOperand() + << ", ActualArg = " + << Arg.Actual->getNameOrAsOperand() << "\n"; + }); + + return !WorkList.empty(); } - bool isCandidateFunction(Function *F, FuncList &Specializations) { + bool isCandidateFunction(Function *F) { // Do not specialize the cloned function again. if (SpecializedFuncs.contains(F)) return false; @@ -461,44 +514,45 @@ private: return true; } - void specializeFunction(ArgInfo &AI, FuncList &Specializations) { - Function *Clone = cloneCandidateFunction(AI.Fn); - Argument *ClonedArg = Clone->getArg(AI.Arg->getArgNo()); + void specializeFunction(Function *F, SpecializationInfo &S, + FuncList &WorkList) { + ValueToValueMapTy Mappings; + Function *Clone = cloneCandidateFunction(F, Mappings); // Rewrite calls to the function so that they call the clone instead. - rewriteCallSites(AI.Fn, Clone, *ClonedArg, AI.Const); + rewriteCallSites(Clone, S.Args, Mappings); // Initialize the lattice state of the arguments of the function clone, // marking the argument on which we specialized the function constant // with the given value. - Solver.markArgInFuncSpecialization(AI.Fn, ClonedArg, AI.Const); + Solver.markArgInFuncSpecialization(Clone, S.Args); // Mark all the specialized functions - Specializations.push_back(Clone); + WorkList.push_back(Clone); NbFunctionsSpecialized++; // If the function has been completely specialized, the original function // is no longer needed. Mark it unreachable. - if (!AI.Partial) - Solver.markFunctionUnreachable(AI.Fn); + if (F->getNumUses() == 0 || all_of(F->users(), [F](User *U) { + if (auto *CS = dyn_cast(U)) + return CS->getFunction() == F; + return false; + })) { + Solver.markFunctionUnreachable(F); + FullySpecialized.insert(F); + } } /// Compute and return the cost of specializing function \p F. InstructionCost getSpecializationCost(Function *F) { - // Compute the code metrics for the function. - SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); - CodeMetrics Metrics; - for (BasicBlock &BB : *F) - Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); - + CodeMetrics &Metrics = analyzeFunction(F); // If the code metrics reveal that we shouldn't duplicate the function, we // shouldn't specialize it. Set the specialization cost to Invalid. // Or if the lines of codes implies that this function is easy to get // inlined so that we shouldn't specialize it. - if (Metrics.notDuplicatable || + if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || (!ForceFunctionSpecialization && - Metrics.NumInsts < SmallFunctionThreshold)) { + *Metrics.NumInsts.getValue() < SmallFunctionThreshold)) { InstructionCost C{}; C.setInvalid(); return C; @@ -539,31 +593,20 @@ private: DominatorTree DT(*F); LoopInfo LI(DT); auto &TTI = (GetTTI)(*F); - LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for: " << *A - << "\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " + << C->getNameOrAsOperand() << "\n"); InstructionCost TotalCost = 0; for (auto *U : A->users()) { TotalCost += getUserBonus(U, TTI, LI); - LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; + LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n"); } // The below heuristic is only concerned with exposing inlining // opportunities via indirect call promotion. If the argument is not a - // function pointer, give up. - if (!isa(A->getType()) || - !isa(A->getType()->getPointerElementType())) - return TotalCost; - - // Since the argument is a function pointer, its incoming constant values - // should be functions or constant expressions. The code below attempts to - // look through cast expressions to find the function that will be called. - Value *CalledValue = C; - while (isa(CalledValue) && - cast(CalledValue)->isCast()) - CalledValue = cast(CalledValue)->getOperand(0); - Function *CalledFunction = dyn_cast(CalledValue); + // (potentially casted) function pointer, give up. + Function *CalledFunction = dyn_cast(C->stripPointerCasts()); if (!CalledFunction) return TotalCost; @@ -603,6 +646,9 @@ private: Bonus += Params.DefaultThreshold; else if (IC.isVariable() && IC.getCostDelta() > 0) Bonus += IC.getCostDelta(); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus + << " for user " << *U << "\n"); } return TotalCost + Bonus; @@ -615,15 +661,12 @@ private: /// specializing the function based on the incoming values of argument \p A /// would result in any significant optimization opportunities. If /// optimization opportunities exist, the constant values of \p A on which to - /// specialize the function are collected in \p Constants. If the values in - /// \p Constants represent the complete set of values that \p A can take on, - /// the function will be completely specialized, and the \p IsPartial flag is - /// set to false. + /// specialize the function are collected in \p Constants. /// /// \returns true if the function should be specialized on the given /// argument. - bool isArgumentInteresting(Argument *A, ConstList &Constants, - bool &IsPartial) { + bool isArgumentInteresting(Argument *A, + SmallVectorImpl &Constants) { // For now, don't attempt to specialize functions based on the values of // composite types. if (!A->getType()->isSingleValueType() || A->user_empty()) @@ -632,8 +675,9 @@ private: // If the argument isn't overdefined, there's nothing to do. It should // already be constant. if (!Solver.getLatticeValueFor(A).isOverdefined()) { - LLVM_DEBUG(dbgs() << "FnSpecialization: nothing to do, arg is already " - << "constant?\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, argument " + << A->getNameOrAsOperand() + << " is already constant?\n"); return false; } @@ -650,20 +694,26 @@ private: // // TODO 2: this currently does not support constants, i.e. integer ranges. // - IsPartial = !getPossibleConstants(A, Constants); - LLVM_DEBUG(dbgs() << "FnSpecialization: interesting arg: " << *A << "\n"); + getPossibleConstants(A, Constants); + + if (Constants.empty()) + return false; + + LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " + << A->getNameOrAsOperand() << "\n"); return true; } /// Collect in \p Constants all the constant values that argument \p A can /// take on. - /// - /// \returns true if all of the values the argument can take on are constant - /// (e.g., the argument's parent function cannot be called with an - /// overdefined value). - bool getPossibleConstants(Argument *A, ConstList &Constants) { + void getPossibleConstants(Argument *A, + SmallVectorImpl &Constants) { Function *F = A->getParent(); - bool AllConstant = true; + + // SCCP solver does not record an argument that will be constructed on + // stack. + if (A->hasByValAttr() && !F->onlyReadsMemory()) + return; // Iterate over all the call sites of the argument's parent function. for (User *U : F->users()) { @@ -672,10 +722,8 @@ private: auto &CS = *cast(U); // If the call site has attribute minsize set, that callsite won't be // specialized. - if (CS.hasFnAttr(Attribute::MinSize)) { - AllConstant = false; + if (CS.hasFnAttr(Attribute::MinSize)) continue; - } // If the parent of the call site will never be executed, we don't need // to worry about the passed value. @@ -684,13 +732,7 @@ private: auto *V = CS.getArgOperand(A->getArgNo()); if (isa(V)) - return false; - - // For now, constant expressions are fine but only if they are function - // calls. - if (auto *CE = dyn_cast(V)) - if (!isa(CE->getOperand(0))) - return false; + return; // TrackValueOfGlobalVariable only tracks scalar global variables. if (auto *GV = dyn_cast(V)) { @@ -698,36 +740,32 @@ private: // global values. if (!GV->isConstant()) if (!SpecializeOnAddresses) - return false; + return; if (!GV->getValueType()->isSingleValueType()) - return false; + return; } if (isa(V) && (Solver.getLatticeValueFor(V).isConstant() || EnableSpecializationForLiteralConstant)) - Constants.push_back(cast(V)); - else - AllConstant = false; + Constants.push_back({&CS, cast(V)}); } - - // If the argument can only take on constant values, AllConstant will be - // true. - return AllConstant; } /// Rewrite calls to function \p F to call function \p Clone instead. /// - /// This function modifies calls to function \p F whose argument at index \p - /// ArgNo is equal to constant \p C. The calls are rewritten to call function - /// \p Clone instead. + /// This function modifies calls to function \p F as long as the actual + /// arguments match those in \p Args. Note that for recursive calls we + /// need to compare against the cloned formal arguments. /// /// Callsites that have been marked with the MinSize function attribute won't /// be specialized and rewritten. - void rewriteCallSites(Function *F, Function *Clone, Argument &Arg, - Constant *C) { - unsigned ArgNo = Arg.getArgNo(); - SmallVector CallSitesToRewrite; + void rewriteCallSites(Function *Clone, const SmallVectorImpl &Args, + ValueToValueMapTy &Mappings) { + assert(!Args.empty() && "Specialization without arguments"); + Function *F = Args[0].Formal->getParent(); + + SmallVector CallSitesToRewrite; for (auto *U : F->users()) { if (!isa(U) && !isa(U)) continue; @@ -736,35 +774,50 @@ private: continue; CallSitesToRewrite.push_back(&CS); } + + LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing call sites of " + << F->getName() << " with " << Clone->getName() << "\n"); + for (auto *CS : CallSitesToRewrite) { - if ((CS->getFunction() == Clone && CS->getArgOperand(ArgNo) == &Arg) || - CS->getArgOperand(ArgNo) == C) { + LLVM_DEBUG(dbgs() << "FnSpecialization: " + << CS->getFunction()->getName() << " ->" << *CS + << "\n"); + if (/* recursive call */ + (CS->getFunction() == Clone && + all_of(Args, + [CS, &Mappings](const ArgInfo &Arg) { + unsigned ArgNo = Arg.Formal->getArgNo(); + return CS->getArgOperand(ArgNo) == Mappings[Arg.Formal]; + })) || + /* normal call */ + all_of(Args, [CS](const ArgInfo &Arg) { + unsigned ArgNo = Arg.Formal->getArgNo(); + return CS->getArgOperand(ArgNo) == Arg.Actual; + })) { CS->setCalledFunction(Clone); Solver.markOverdefined(CS); } } } - void updateSpecializedFuncs(FuncList &FuncDecls, - FuncList &CurrentSpecializations) { - for (auto *SpecializedFunc : CurrentSpecializations) { - SpecializedFuncs.insert(SpecializedFunc); + void updateSpecializedFuncs(FuncList &Candidates, FuncList &WorkList) { + for (auto *F : WorkList) { + SpecializedFuncs.insert(F); // Initialize the state of the newly created functions, marking them // argument-tracked and executable. - if (SpecializedFunc->hasExactDefinition() && - !SpecializedFunc->hasFnAttribute(Attribute::Naked)) - Solver.addTrackedFunction(SpecializedFunc); + if (F->hasExactDefinition() && !F->hasFnAttribute(Attribute::Naked)) + Solver.addTrackedFunction(F); - Solver.addArgumentTrackedFunction(SpecializedFunc); - FuncDecls.push_back(SpecializedFunc); - Solver.markBlockExecutable(&SpecializedFunc->front()); + Solver.addArgumentTrackedFunction(F); + Candidates.push_back(F); + Solver.markBlockExecutable(&F->front()); // Replace the function arguments for the specialized functions. - for (Argument &Arg : SpecializedFunc->args()) + for (Argument &Arg : F->args()) if (!Arg.use_empty() && tryToReplaceWithConstant(&Arg)) LLVM_DEBUG(dbgs() << "FnSpecialization: Replaced constant argument: " - << Arg.getName() << "\n"); + << Arg.getNameOrAsOperand() << "\n"); } } }; @@ -871,22 +924,26 @@ bool llvm::runFunctionSpecialization( // Initially resolve the constants in all the argument tracked functions. RunSCCPSolver(FuncDecls); - SmallVector CurrentSpecializations; + SmallVector WorkList; unsigned I = 0; while (FuncSpecializationMaxIters != I++ && - FS.specializeFunctions(FuncDecls, CurrentSpecializations)) { + FS.specializeFunctions(FuncDecls, WorkList)) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Finished iteration " << I << "\n"); // Run the solver for the specialized functions. - RunSCCPSolver(CurrentSpecializations); + RunSCCPSolver(WorkList); // Replace some unresolved constant arguments. constantArgPropagation(FuncDecls, M, Solver); - CurrentSpecializations.clear(); + WorkList.clear(); Changed = true; } - // Clean up the IR by removing ssa_copy intrinsics. + LLVM_DEBUG(dbgs() << "FnSpecialization: Number of specializations = " + << NumFuncSpecialized << "\n"); + + // Remove any ssa_copy intrinsics that may have been introduced. removeSSACopy(M); return Changed; } diff --git a/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/llvm/lib/Transforms/IPO/GlobalDCE.cpp index 5e5d2086adc2..f35827220bb6 100644 --- a/llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ b/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -21,7 +21,6 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -34,7 +33,7 @@ using namespace llvm; #define DEBUG_TYPE "globaldce" static cl::opt - ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::ZeroOrMore, + ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::desc("Enable virtual function elimination")); STATISTIC(NumAliases , "Number of global aliases removed"); @@ -86,6 +85,9 @@ ModulePass *llvm::createGlobalDCEPass() { /// Returns true if F is effectively empty. static bool isEmptyFunction(Function *F) { + // Skip external functions. + if (F->isDeclaration()) + return false; BasicBlock &Entry = F->getEntryBlock(); for (auto &I : Entry) { if (I.isDebugOrPseudoInst()) @@ -214,14 +216,14 @@ void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId, if (!Ptr) { LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n"); VFESafeVTables.erase(VTable); - return; + continue; } auto Callee = dyn_cast(Ptr->stripPointerCasts()); if (!Callee) { LLVM_DEBUG(dbgs() << "vtable entry is not function pointer!\n"); VFESafeVTables.erase(VTable); - return; + continue; } LLVM_DEBUG(dbgs() << "vfunc dep " << Caller->getName() << " -> " @@ -298,7 +300,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { // marked as alive are discarded. // Remove empty functions from the global ctors list. - Changed |= optimizeGlobalCtorsList(M, isEmptyFunction); + Changed |= optimizeGlobalCtorsList( + M, [](uint32_t, Function *F) { return isEmptyFunction(F); }); // Collect the set of members for each comdat. for (Function &F : M) @@ -317,7 +320,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { // Loop over the module, adding globals which are obviously necessary. for (GlobalObject &GO : M.global_objects()) { - Changed |= RemoveUnusedGlobalValue(GO); + GO.removeDeadConstantUsers(); // Functions with external linkage are needed if they have a body. // Externally visible & appending globals are needed, if they have an // initializer. @@ -330,7 +333,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { // Compute direct dependencies of aliases. for (GlobalAlias &GA : M.aliases()) { - Changed |= RemoveUnusedGlobalValue(GA); + GA.removeDeadConstantUsers(); // Externally visible aliases are needed. if (!GA.isDiscardableIfUnused()) MarkLive(GA); @@ -340,7 +343,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { // Compute direct dependencies of ifuncs. for (GlobalIFunc &GIF : M.ifuncs()) { - Changed |= RemoveUnusedGlobalValue(GIF); + GIF.removeDeadConstantUsers(); // Externally visible ifuncs are needed. if (!GIF.isDiscardableIfUnused()) MarkLive(GIF); @@ -403,7 +406,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { // Now that all interferences have been dropped, delete the actual objects // themselves. auto EraseUnusedGlobalValue = [&](GlobalValue *GV) { - RemoveUnusedGlobalValue(*GV); + GV->removeDeadConstantUsers(); GV->eraseFromParent(); Changed = true; }; @@ -455,16 +458,3 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { return PreservedAnalyses::none(); return PreservedAnalyses::all(); } - -// RemoveUnusedGlobalValue - Loop over all of the uses of the specified -// GlobalValue, looking for the constant pointer ref that may be pointing to it. -// If found, check to see if the constant pointer ref is safe to destroy, and if -// so, nuke it. This will reduce the reference count on the global value, which -// might make it deader. -// -bool GlobalDCEPass::RemoveUnusedGlobalValue(GlobalValue &GV) { - if (GV.use_empty()) - return false; - GV.removeDeadConstantUsers(); - return GV.use_empty(); -} diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 1cb32e32c895..1a1bde4f0668 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/iterator_range.h" @@ -37,7 +38,6 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" @@ -60,7 +60,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CtorUtils.h" @@ -100,7 +99,7 @@ static cl::opt cl::init(false), cl::Hidden); static cl::opt ColdCCRelFreq( - "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, + "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::desc( "Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a call site to be considered cold for enabling" @@ -232,7 +231,7 @@ CleanupPointerRootUsers(GlobalVariable *GV, if (MemSrc && MemSrc->isConstant()) { Changed = true; MTI->eraseFromParent(); - } else if (Instruction *I = dyn_cast(MemSrc)) { + } else if (Instruction *I = dyn_cast(MTI->getSource())) { if (I->hasOneUse()) Dead.push_back(std::make_pair(I, MTI)); } @@ -405,9 +404,37 @@ static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, for (auto *GVE : GVs) { DIVariable *Var = GVE->getVariable(); DIExpression *Expr = GVE->getExpression(); + int64_t CurVarOffsetInBytes = 0; + uint64_t CurVarOffsetInBits = 0; + + // Calculate the offset (Bytes), Continue if unknown. + if (!Expr->extractIfOffset(CurVarOffsetInBytes)) + continue; + + // Ignore negative offset. + if (CurVarOffsetInBytes < 0) + continue; + + // Convert offset to bits. + CurVarOffsetInBits = CHAR_BIT * (uint64_t)CurVarOffsetInBytes; + + // Current var starts after the fragment, ignore. + if (CurVarOffsetInBits >= (FragmentOffsetInBits + FragmentSizeInBits)) + continue; + + uint64_t CurVarSize = Var->getType()->getSizeInBits(); + // Current variable ends before start of fragment, ignore. + if (CurVarSize != 0 && + (CurVarOffsetInBits + CurVarSize) <= FragmentOffsetInBits) + continue; + + // Current variable fits in the fragment. + if (CurVarOffsetInBits == FragmentOffsetInBits && + CurVarSize == FragmentSizeInBits) + Expr = DIExpression::get(Expr->getContext(), {}); // If the FragmentSize is smaller than the variable, // emit a fragment expression. - if (FragmentSizeInBits < VarSize) { + else if (FragmentSizeInBits < VarSize) { if (auto E = DIExpression::createFragmentExpression( Expr, FragmentOffsetInBits, FragmentSizeInBits)) Expr = *E; @@ -581,17 +608,14 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, // Will trap. } else if (const StoreInst *SI = dyn_cast(U)) { if (SI->getOperand(0) == V) { - //cerr << "NONTRAPPING USE: " << *U; return false; // Storing the value. } } else if (const CallInst *CI = dyn_cast(U)) { if (CI->getCalledOperand() != V) { - //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } } else if (const InvokeInst *II = dyn_cast(U)) { if (II->getCalledOperand() != V) { - //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } } else if (const BitCastInst *CI = dyn_cast(U)) { @@ -615,7 +639,6 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, // the comparing of the value of the created global init bool later in // optimizeGlobalAddressOfAllocation for the global variable. } else { - //cerr << "NONTRAPPING USE: " << *U; return false; } } @@ -878,7 +901,7 @@ OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI, } } - SmallPtrSet RepValues; + SmallSetVector RepValues; RepValues.insert(NewGV); // If there is a comparison against null, we will insert a global bool to @@ -1015,7 +1038,6 @@ valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI, /// accessing the data, and exposes the resultant global to further GlobalOpt. static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV, CallInst *CI, - AtomicOrdering Ordering, const DataLayout &DL, TargetLibraryInfo *TLI) { if (!isAllocRemovable(CI, TLI)) @@ -1062,7 +1084,7 @@ static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV, // its initializer) is ever stored to the global. static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, - AtomicOrdering Ordering, const DataLayout &DL, + const DataLayout &DL, function_ref GetTLI) { // Ignore no-op GEPs and bitcasts. StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1087,7 +1109,7 @@ optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, } else if (isAllocationFn(StoredOnceVal, GetTLI)) { if (auto *CI = dyn_cast(StoredOnceVal)) { auto *TLI = &GetTLI(*CI->getFunction()); - if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, Ordering, DL, TLI)) + if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, DL, TLI)) return true; } } @@ -1257,8 +1279,10 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { return true; } -static bool deleteIfDead( - GlobalValue &GV, SmallPtrSetImpl &NotDiscardableComdats) { +static bool +deleteIfDead(GlobalValue &GV, + SmallPtrSetImpl &NotDiscardableComdats, + function_ref DeleteFnCallback = nullptr) { GV.removeDeadConstantUsers(); if (!GV.isDiscardableIfUnused() && !GV.isDeclaration()) @@ -1277,6 +1301,10 @@ static bool deleteIfDead( return false; LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n"); + if (auto *F = dyn_cast(&GV)) { + if (DeleteFnCallback) + DeleteFnCallback(*F); + } GV.eraseFromParent(); ++NumDeleted; return true; @@ -1416,6 +1444,42 @@ static void makeAllConstantUsesInstructions(Constant *C) { } } +// For a global variable with one store, if the store dominates any loads, +// those loads will always load the stored value (as opposed to the +// initializer), even in the presence of recursion. +static bool forwardStoredOnceStore( + GlobalVariable *GV, const StoreInst *StoredOnceStore, + function_ref LookupDomTree) { + const Value *StoredOnceValue = StoredOnceStore->getValueOperand(); + // We can do this optimization for non-constants in nosync + norecurse + // functions, but globals used in exactly one norecurse functions are already + // promoted to an alloca. + if (!isa(StoredOnceValue)) + return false; + const Function *F = StoredOnceStore->getFunction(); + SmallVector Loads; + for (User *U : GV->users()) { + if (auto *LI = dyn_cast(U)) { + if (LI->getFunction() == F && + LI->getType() == StoredOnceValue->getType() && LI->isSimple()) + Loads.push_back(LI); + } + } + // Only compute DT if we have any loads to examine. + bool MadeChange = false; + if (!Loads.empty()) { + auto &DT = LookupDomTree(*const_cast(F)); + for (auto *LI : Loads) { + if (DT.dominates(StoredOnceStore, LI)) { + LI->replaceAllUsesWith(const_cast(StoredOnceValue)); + LI->eraseFromParent(); + MadeChange = true; + } + } + } + return MadeChange; +} + /// Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. static bool @@ -1572,9 +1636,15 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, // Try to optimize globals based on the knowledge that only one value // (besides its initializer) is ever stored to the global. - if (optimizeOnceStoredGlobal(GV, StoredOnceValue, GS.Ordering, DL, GetTLI)) + if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI)) return true; + // Try to forward the store to any loads. If we have more than one store, we + // may have a store of the initializer between StoredOnceStore and a load. + if (GS.NumStores == 1) + if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree)) + return true; + // Otherwise, if the global was not a boolean, we can shrink it to be a // boolean. Skip this optimization for AS that doesn't allow an initializer. if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic && @@ -1755,7 +1825,7 @@ hasOnlyColdCalls(Function &F, return false; if (!CalledFn->hasLocalLinkage()) return false; - // Skip over instrinsics since they won't remain as function calls. + // Skip over intrinsics since they won't remain as function calls. if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic) continue; // Check if it's valid to use coldcc calling convention. @@ -1884,7 +1954,9 @@ OptimizeFunctions(Module &M, function_ref GetTTI, function_ref GetBFI, function_ref LookupDomTree, - SmallPtrSetImpl &NotDiscardableComdats) { + SmallPtrSetImpl &NotDiscardableComdats, + function_ref ChangedCFGCallback, + function_ref DeleteFnCallback) { bool Changed = false; @@ -1904,7 +1976,7 @@ OptimizeFunctions(Module &M, if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage()) F.setLinkage(GlobalValue::InternalLinkage); - if (deleteIfDead(F, NotDiscardableComdats)) { + if (deleteIfDead(F, NotDiscardableComdats, DeleteFnCallback)) { Changed = true; continue; } @@ -1917,13 +1989,11 @@ OptimizeFunctions(Module &M, // So, remove unreachable blocks from the function, because a) there's // no point in analyzing them and b) GlobalOpt should otherwise grow // some more complicated logic to break these cycles. - // Removing unreachable blocks might invalidate the dominator so we - // recalculate it. + // Notify the analysis manager that we've modified the function's CFG. if (!F.isDeclaration()) { if (removeUnreachableBlocks(F)) { - auto &DT = LookupDomTree(F); - DT.recalculate(F); Changed = true; + ChangedCFGCallback(F); } } @@ -2031,6 +2101,9 @@ OptimizeGlobalVars(Module &M, /// can, false otherwise. static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, TargetLibraryInfo *TLI) { + // Skip external functions. + if (F->isDeclaration()) + return false; // Call the function. Evaluator Eval(DL, TLI); Constant *RetValDummy; @@ -2383,15 +2456,19 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { return Changed; } -static bool optimizeGlobalsInModule( - Module &M, const DataLayout &DL, - function_ref GetTLI, - function_ref GetTTI, - function_ref GetBFI, - function_ref LookupDomTree) { +static bool +optimizeGlobalsInModule(Module &M, const DataLayout &DL, + function_ref GetTLI, + function_ref GetTTI, + function_ref GetBFI, + function_ref LookupDomTree, + function_ref ChangedCFGCallback, + function_ref DeleteFnCallback) { SmallPtrSet NotDiscardableComdats; bool Changed = false; bool LocalChange = true; + Optional FirstNotFullyEvaluatedPriority; + while (LocalChange) { LocalChange = false; @@ -2411,12 +2488,20 @@ static bool optimizeGlobalsInModule( // Delete functions that are trivially dead, ccc -> fastcc LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree, - NotDiscardableComdats); + NotDiscardableComdats, ChangedCFGCallback, + DeleteFnCallback); // Optimize global_ctors list. - LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { - return EvaluateStaticConstructor(F, DL, &GetTLI(*F)); - }); + LocalChange |= + optimizeGlobalCtorsList(M, [&](uint32_t Priority, Function *F) { + if (FirstNotFullyEvaluatedPriority && + *FirstNotFullyEvaluatedPriority != Priority) + return false; + bool Evaluated = EvaluateStaticConstructor(F, DL, &GetTLI(*F)); + if (!Evaluated) + FirstNotFullyEvaluatedPriority = Priority; + return Evaluated; + }); // Optimize non-address-taken globals. LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree, @@ -2457,10 +2542,23 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { return FAM.getResult(F); }; + auto ChangedCFGCallback = [&FAM](Function &F) { + FAM.invalidate(F, PreservedAnalyses::none()); + }; + auto DeleteFnCallback = [&FAM](Function &F) { FAM.clear(F, F.getName()); }; - if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree)) + if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree, + ChangedCFGCallback, DeleteFnCallback)) return PreservedAnalyses::all(); - return PreservedAnalyses::none(); + + PreservedAnalyses PA = PreservedAnalyses::none(); + // We made sure to clear analyses for deleted functions. + PA.preserve(); + // The only place we modify the CFG is when calling + // removeUnreachableBlocks(), but there we make sure to invalidate analyses + // for modified functions. + PA.preserveSet(); + return PA; } namespace { @@ -2491,8 +2589,13 @@ struct GlobalOptLegacyPass : public ModulePass { return this->getAnalysis(F).getBFI(); }; - return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, - LookupDomTree); + auto ChangedCFGCallback = [&LookupDomTree](Function &F) { + auto &DT = LookupDomTree(F); + DT.recalculate(F); + }; + + return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree, + ChangedCFGCallback, nullptr); } void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/llvm/lib/Transforms/IPO/GlobalSplit.cpp b/llvm/lib/Transforms/IPO/GlobalSplit.cpp index e7d698c42fcf..7d9e6135b2eb 100644 --- a/llvm/lib/Transforms/IPO/GlobalSplit.cpp +++ b/llvm/lib/Transforms/IPO/GlobalSplit.cpp @@ -134,9 +134,9 @@ static bool splitGlobal(GlobalVariable &GV) { } // Finally, remove the original global. Any remaining uses refer to invalid - // elements of the global, so replace with undef. + // elements of the global, so replace with poison. if (!GV.use_empty()) - GV.replaceAllUsesWith(UndefValue::get(GV.getType())); + GV.replaceAllUsesWith(PoisonValue::get(GV.getType())); GV.eraseFromParent(); return true; } diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index a964fcde0396..95e8ae0fd22f 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -29,46 +29,33 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/BlockFrequency.h" -#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/CodeExtractor.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/ValueMapper.h" #include -#include #include +#include #include #define DEBUG_TYPE "hotcoldsplit" @@ -126,7 +113,8 @@ bool unlikelyExecuted(BasicBlock &BB) { // mark sanitizer traps as cold. for (Instruction &I : BB) if (auto *CB = dyn_cast(&I)) - if (CB->hasFnAttr(Attribute::Cold) && !CB->getMetadata("nosanitize")) + if (CB->hasFnAttr(Attribute::Cold) && + !CB->getMetadata(LLVMContext::MD_nosanitize)) return true; // The block is cold if it has an unreachable terminator, unless it's @@ -352,7 +340,7 @@ Function *HotColdSplitting::extractColdRegion( // TODO: Pass BFI and BPI to update profile information. CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr, /* BPI */ nullptr, AC, /* AllowVarArgs */ false, - /* AllowAlloca */ false, + /* AllowAlloca */ false, /* AllocaBlock */ nullptr, /* Suffix */ "cold." + std::to_string(Count)); // Perform a simple cost/benefit analysis to decide whether or not to permit @@ -740,7 +728,7 @@ bool HotColdSplittingLegacyPass::runOnModule(Module &M) { std::function GetORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & { ORE.reset(new OptimizationRemarkEmitter(&F)); - return *ORE.get(); + return *ORE; }; auto LookupAC = [this](Function &F) -> AssumptionCache * { if (auto *ACT = getAnalysisIfAvailable()) @@ -772,7 +760,7 @@ HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) { std::function GetORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & { ORE.reset(new OptimizationRemarkEmitter(&F)); - return *ORE.get(); + return *ORE; }; ProfileSummaryInfo *PSI = &AM.getResult(M); diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp index de1c1d379502..ec2b80012ed6 100644 --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -24,7 +24,6 @@ using namespace llvm; void llvm::initializeIPO(PassRegistry &Registry) { initializeOpenMPOptCGSCCLegacyPassPass(Registry); - initializeArgPromotionPass(Registry); initializeAnnotation2MetadataLegacyPass(Registry); initializeCalledValuePropagationLegacyPassPass(Registry); initializeConstantMergeLegacyPassPass(Registry); @@ -70,10 +69,6 @@ void LLVMInitializeIPO(LLVMPassRegistryRef R) { initializeIPO(*unwrap(R)); } -void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createArgumentPromotionPass()); -} - void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createCalledValuePropagationPass()); } diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index faf7cb7d566a..d75d99e307fd 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -16,8 +16,9 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/PassManager.h" @@ -25,8 +26,6 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO.h" -#include -#include #include #define DEBUG_TYPE "iroutliner" @@ -183,11 +182,24 @@ static void getSortedConstantKeys(std::vector &SortedKeys, Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other, Value *V) { Optional GVN = Candidate->getGVN(V); - assert(GVN.hasValue() && "No GVN for incoming value"); + assert(GVN && "No GVN for incoming value"); Optional CanonNum = Candidate->getCanonicalNum(*GVN); Optional FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum); Optional FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN); - return FoundValueOpt.getValueOr(nullptr); + return FoundValueOpt.value_or(nullptr); +} + +BasicBlock * +OutlinableRegion::findCorrespondingBlockIn(const OutlinableRegion &Other, + BasicBlock *BB) { + Instruction *FirstNonPHI = BB->getFirstNonPHI(); + assert(FirstNonPHI && "block is empty?"); + Value *CorrespondingVal = findCorrespondingValueIn(Other, FirstNonPHI); + if (!CorrespondingVal) + return nullptr; + BasicBlock *CorrespondingBlock = + cast(CorrespondingVal)->getParent(); + return CorrespondingBlock; } /// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found @@ -264,13 +276,33 @@ void OutlinableRegion::splitCandidate() { // We iterate over the instructions in the region, if we find a PHINode, we // check if there are predecessors outside of the region, if there are, // we ignore this region since we are unable to handle the severing of the - // phi node right now. + // phi node right now. + + // TODO: Handle extraneous inputs for PHINodes through variable number of + // inputs, similar to how outputs are handled. BasicBlock::iterator It = StartInst->getIterator(); + EndBB = BackInst->getParent(); + BasicBlock *IBlock; + BasicBlock *PHIPredBlock = nullptr; + bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst; while (PHINode *PN = dyn_cast(&*It)) { unsigned NumPredsOutsideRegion = 0; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!BBSet.contains(PN->getIncomingBlock(i))) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (!BBSet.contains(PN->getIncomingBlock(i))) { + PHIPredBlock = PN->getIncomingBlock(i); + ++NumPredsOutsideRegion; + continue; + } + + // We must consider the case there the incoming block to the PHINode is + // the same as the final block of the OutlinableRegion. If this is the + // case, the branch from this block must also be outlined to be valid. + IBlock = PN->getIncomingBlock(i); + if (IBlock == EndBB && EndBBTermAndBackInstDifferent) { + PHIPredBlock = PN->getIncomingBlock(i); ++NumPredsOutsideRegion; + } + } if (NumPredsOutsideRegion > 1) return; @@ -285,11 +317,9 @@ void OutlinableRegion::splitCandidate() { // If the region ends with a PHINode, but does not contain all of the phi node // instructions of the region, we ignore it for now. - if (isa(BackInst)) { - EndBB = BackInst->getParent(); - if (BackInst != &*std::prev(EndBB->getFirstInsertionPt())) - return; - } + if (isa(BackInst) && + BackInst != &*std::prev(EndBB->getFirstInsertionPt())) + return; // The basic block gets split like so: // block: block: @@ -310,6 +340,10 @@ void OutlinableRegion::splitCandidate() { StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline"); PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB); + // If there was a PHINode with an incoming block outside the region, + // make sure is correctly updated in the newly split block. + if (PHIPredBlock) + PrevBB->replaceSuccessorsPhiUsesWith(PHIPredBlock, PrevBB); CandidateSplit = true; if (!BackInst->isTerminator()) { @@ -353,6 +387,25 @@ void OutlinableRegion::reattachCandidate() { assert(StartBB != nullptr && "StartBB for Candidate is not defined!"); assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!"); + // Make sure PHINode references to the block we are merging into are + // updated to be incoming blocks from the predecessor to the current block. + + // NOTE: If this is updated such that the outlined block can have more than + // one incoming block to a PHINode, this logic will have to updated + // to handle multiple precessors instead. + + // We only need to update this if the outlined section contains a PHINode, if + // it does not, then the incoming block was never changed in the first place. + // On the other hand, if PrevBB has no predecessors, it means that all + // incoming blocks to the first block are contained in the region, and there + // will be nothing to update. + Instruction *StartInst = (*Candidate->begin()).Inst; + if (isa(StartInst) && !PrevBB->hasNPredecessors(0)) { + assert(!PrevBB->hasNPredecessorsOrMore(2) && + "PrevBB has more than one predecessor. Should be 0 or 1."); + BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor(); + PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, BeforePrevBB); + } PrevBB->getTerminator()->eraseFromParent(); // If we reattaching after outlining, we iterate over the phi nodes to @@ -501,7 +554,7 @@ collectRegionsConstants(OutlinableRegion &Region, // the the number has been found to be not the same value in each instance. for (Value *V : ID.OperVals) { Optional GVNOpt = C.getGVN(V); - assert(GVNOpt.hasValue() && "Expected a GVN for operand?"); + assert(GVNOpt && "Expected a GVN for operand?"); unsigned GVN = GVNOpt.getValue(); // Check if this global value has been found to not be the same already. @@ -516,7 +569,7 @@ collectRegionsConstants(OutlinableRegion &Region, // global value number. If the global value does not map to a Constant, // it is considered to not be the same value. Optional ConstantMatches = constantMatches(V, GVN, GVNToConstant); - if (ConstantMatches.hasValue()) { + if (ConstantMatches) { if (ConstantMatches.getValue()) continue; else @@ -597,7 +650,7 @@ Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group, "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M); // Transfer the swifterr attribute to the correct function parameter. - if (Group.SwiftErrorArgument.hasValue()) + if (Group.SwiftErrorArgument) Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(), Attribute::SwiftError); @@ -666,6 +719,18 @@ static void moveFunctionData(Function &Old, Function &New, if (!isa(&Val)) { // Remove the debug information for outlined functions. Val.setDebugLoc(DebugLoc()); + + // Loop info metadata may contain line locations. Update them to have no + // value in the new subprogram since the outlined code could be from + // several locations. + auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * { + if (DISubprogram *SP = New.getSubprogram()) + if (auto *Loc = dyn_cast_or_null(MD)) + return DILocation::get(New.getContext(), Loc->getLine(), + Loc->getColumn(), SP, nullptr); + return MD; + }; + updateLoopMetadataDebugLocations(Val, updateLoopInfoLoc); continue; } @@ -691,8 +756,6 @@ static void moveFunctionData(Function &Old, Function &New, for (Instruction *I : DebugInsts) I->eraseFromParent(); } - - assert(NewEnds.size() > 0 && "No return instruction for new function?"); } /// Find the the constants that will need to be lifted into arguments @@ -714,7 +777,7 @@ static void findConstants(IRSimilarityCandidate &C, DenseSet &NotSame, for (Value *V : (*IDIt).OperVals) { // Since these are stored before any outlining, they will be in the // global value numbering. - unsigned GVN = C.getGVN(V).getValue(); + unsigned GVN = *C.getGVN(V); if (isa(V)) if (NotSame.contains(GVN) && !Seen.contains(GVN)) { Inputs.push_back(GVN); @@ -745,8 +808,7 @@ static void mapInputsToGVNs(IRSimilarityCandidate &C, assert(Input && "Have a nullptr as an input"); if (OutputMappings.find(Input) != OutputMappings.end()) Input = OutputMappings.find(Input)->second; - assert(C.getGVN(Input).hasValue() && - "Could not find a numbering for the given input"); + assert(C.getGVN(Input) && "Could not find a numbering for the given input"); EndInputNumbers.push_back(C.getGVN(Input).getValue()); } } @@ -885,11 +947,11 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region, // numbering overrides any discovered location for the extracted code. for (unsigned InputVal : InputGVNs) { Optional CanonicalNumberOpt = C.getCanonicalNum(InputVal); - assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?"); + assert(CanonicalNumberOpt && "Canonical number not found?"); unsigned CanonicalNumber = CanonicalNumberOpt.getValue(); Optional InputOpt = C.fromGVN(InputVal); - assert(InputOpt.hasValue() && "Global value number not found?"); + assert(InputOpt && "Global value number not found?"); Value *Input = InputOpt.getValue(); DenseMap::iterator AggArgIt = @@ -901,7 +963,7 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region, // argument in the overall function. if (Input->isSwiftError()) { assert( - !Group.SwiftErrorArgument.hasValue() && + !Group.SwiftErrorArgument && "Argument already marked with swifterr for this OutlinableGroup!"); Group.SwiftErrorArgument = TypeIndex; } @@ -969,12 +1031,11 @@ static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN, // We check to see if the value is used by the PHINode from some other // predecessor not included in the region. If it is, we make sure // to keep it as an output. - SmallVector IncomingNumbers(PN.getNumIncomingValues()); - std::iota(IncomingNumbers.begin(), IncomingNumbers.end(), 0); - if (any_of(IncomingNumbers, [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) { - return (Idx != PHILoc && V == PN.getIncomingValue(Idx) && - !BlocksInRegion.contains(PN.getIncomingBlock(Idx))); - })) + if (any_of(llvm::seq(0, PN.getNumIncomingValues()), + [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) { + return (Idx != PHILoc && V == PN.getIncomingValue(Idx) && + !BlocksInRegion.contains(PN.getIncomingBlock(Idx))); + })) return true; // Check if the value is used by any other instructions outside the region. @@ -1098,30 +1159,72 @@ static hash_code encodePHINodeData(PHINodeData &PND) { /// /// \param Region - The region that \p PN is an output for. /// \param PN - The PHINode we are analyzing. +/// \param Blocks - The blocks for the region we are analyzing. /// \param AggArgIdx - The argument \p PN will be stored into. /// \returns An optional holding the assigned canonical number, or None if /// there is some attribute of the PHINode blocking it from being used. static Optional getGVNForPHINode(OutlinableRegion &Region, - PHINode *PN, unsigned AggArgIdx) { + PHINode *PN, + DenseSet &Blocks, + unsigned AggArgIdx) { OutlinableGroup &Group = *Region.Parent; IRSimilarityCandidate &Cand = *Region.Candidate; BasicBlock *PHIBB = PN->getParent(); CanonList PHIGVNs; - for (Value *Incoming : PN->incoming_values()) { - // If we cannot find a GVN, this means that the input to the PHINode is - // not included in the region we are trying to analyze, meaning, that if - // it was outlined, we would be adding an extra input. We ignore this - // case for now, and so ignore the region. + Value *Incoming; + BasicBlock *IncomingBlock; + for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) { + Incoming = PN->getIncomingValue(Idx); + IncomingBlock = PN->getIncomingBlock(Idx); + // If we cannot find a GVN, and the incoming block is included in the region + // this means that the input to the PHINode is not included in the region we + // are trying to analyze, meaning, that if it was outlined, we would be + // adding an extra input. We ignore this case for now, and so ignore the + // region. Optional OGVN = Cand.getGVN(Incoming); - if (!OGVN.hasValue()) { + if (!OGVN && Blocks.contains(IncomingBlock)) { Region.IgnoreRegion = true; return None; } + // If the incoming block isn't in the region, we don't have to worry about + // this incoming value. + if (!Blocks.contains(IncomingBlock)) + continue; + // Collect the canonical numbers of the values in the PHINode. - unsigned GVN = OGVN.getValue(); + unsigned GVN = *OGVN; OGVN = Cand.getCanonicalNum(GVN); - assert(OGVN.hasValue() && "No GVN found for incoming value?"); + assert(OGVN && "No GVN found for incoming value?"); + PHIGVNs.push_back(*OGVN); + + // Find the incoming block and use the canonical numbering as well to define + // the hash for the PHINode. + OGVN = Cand.getGVN(IncomingBlock); + + // If there is no number for the incoming block, it is becaause we have + // split the candidate basic blocks. So we use the previous block that it + // was split from to find the valid global value numbering for the PHINode. + if (!OGVN) { + assert(Cand.getStartBB() == IncomingBlock && + "Unknown basic block used in exit path PHINode."); + + BasicBlock *PrevBlock = nullptr; + // Iterate over the predecessors to the incoming block of the + // PHINode, when we find a block that is not contained in the region + // we know that this is the first block that we split from, and should + // have a valid global value numbering. + for (BasicBlock *Pred : predecessors(IncomingBlock)) + if (!Blocks.contains(Pred)) { + PrevBlock = Pred; + break; + } + assert(PrevBlock && "Expected a predecessor not in the reigon!"); + OGVN = Cand.getGVN(PrevBlock); + } + GVN = *OGVN; + OGVN = Cand.getCanonicalNum(GVN); + assert(OGVN && "No GVN found for incoming block?"); PHIGVNs.push_back(*OGVN); } @@ -1131,11 +1234,10 @@ static Optional getGVNForPHINode(OutlinableRegion &Region, DenseMap::iterator GVNToPHIIt; DenseMap::iterator PHIToGVNIt; Optional BBGVN = Cand.getGVN(PHIBB); - assert(BBGVN.hasValue() && "Could not find GVN for the incoming block!"); + assert(BBGVN && "Could not find GVN for the incoming block!"); BBGVN = Cand.getCanonicalNum(BBGVN.getValue()); - assert(BBGVN.hasValue() && - "Could not find canonical number for the incoming block!"); + assert(BBGVN && "Could not find canonical number for the incoming block!"); // Create a pair of the exit block canonical value, and the aggregate // argument location, connected to the canonical numbers stored in the // PHINode. @@ -1262,9 +1364,9 @@ findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region, // If two PHINodes have the same canonical values, but different aggregate // argument locations, then they will have distinct Canonical Values. - GVN = getGVNForPHINode(Region, PN, AggArgIdx); - if (!GVN.hasValue()) - return; + GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx); + if (!GVN) + return; } else { // If we do not have a PHINode we use the global value numbering for the // output value, to find the canonical number to add to the set of stored @@ -1413,7 +1515,7 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) { // Make sure that the argument in the new function has the SwiftError // argument. - if (Group.SwiftErrorArgument.hasValue()) + if (Group.SwiftErrorArgument) Call->addParamAttr(Group.SwiftErrorArgument.getValue(), Attribute::SwiftError); @@ -1520,17 +1622,18 @@ getPassedArgumentAndAdjustArgumentLocation(const Argument *A, /// \param OutputMappings [in] - The mapping of output values from outlined /// region to their original values. /// \param CanonNums [out] - The canonical numbering for the incoming values to -/// \p PN. +/// \p PN paired with their incoming block. /// \param ReplacedWithOutlinedCall - A flag to use the extracted function call /// of \p Region rather than the overall function's call. -static void -findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, - const DenseMap &OutputMappings, - DenseSet &CanonNums, - bool ReplacedWithOutlinedCall = true) { +static void findCanonNumsForPHI( + PHINode *PN, OutlinableRegion &Region, + const DenseMap &OutputMappings, + SmallVector> &CanonNums, + bool ReplacedWithOutlinedCall = true) { // Iterate over the incoming values. for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) { Value *IVal = PN->getIncomingValue(Idx); + BasicBlock *IBlock = PN->getIncomingBlock(Idx); // If we have an argument as incoming value, we need to grab the passed // value from the call itself. if (Argument *A = dyn_cast(IVal)) { @@ -1545,10 +1648,10 @@ findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, // Find and add the canonical number for the incoming value. Optional GVN = Region.Candidate->getGVN(IVal); - assert(GVN.hasValue() && "No GVN for incoming value"); + assert(GVN && "No GVN for incoming value"); Optional CanonNum = Region.Candidate->getCanonicalNum(*GVN); - assert(CanonNum.hasValue() && "No Canonical Number for GVN"); - CanonNums.insert(*CanonNum); + assert(CanonNum && "No Canonical Number for GVN"); + CanonNums.push_back(std::make_pair(*CanonNum, IBlock)); } } @@ -1557,19 +1660,26 @@ findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, /// function. /// /// \param PN [in] - The PHINode that we are finding the canonical numbers for. -/// \param Region [in] - The OutlinableRegion containing \p PN. +/// \param Region [in] - The OutlinableRegion containing \p PN. /// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find /// \p PN in. /// \param OutputMappings [in] - The mapping of output values from outlined /// region to their original values. +/// \param UsedPHIs [in, out] - The PHINodes in the block that have already been +/// matched. /// \return the newly found or created PHINode in \p OverallPhiBlock. static PHINode* findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, BasicBlock *OverallPhiBlock, - const DenseMap &OutputMappings) { + const DenseMap &OutputMappings, + DenseSet &UsedPHIs) { OutlinableGroup &Group = *Region.Parent; - DenseSet PNCanonNums; + + // A list of the canonical numbering assigned to each incoming value, paired + // with the incoming block for the PHINode passed into this function. + SmallVector> PNCanonNums; + // We have to use the extracted function since we have merged this region into // the overall function yet. We make sure to reassign the argument numbering // since it is possible that the argument ordering is different between the @@ -1578,18 +1688,61 @@ findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, /* ReplacedWithOutlinedCall = */ false); OutlinableRegion *FirstRegion = Group.Regions[0]; - DenseSet CurrentCanonNums; + + // A list of the canonical numbering assigned to each incoming value, paired + // with the incoming block for the PHINode that we are currently comparing + // the passed PHINode to. + SmallVector> CurrentCanonNums; + // Find the Canonical Numbering for each PHINode, if it matches, we replace // the uses of the PHINode we are searching for, with the found PHINode. for (PHINode &CurrPN : OverallPhiBlock->phis()) { + // If this PHINode has already been matched to another PHINode to be merged, + // we skip it. + if (UsedPHIs.contains(&CurrPN)) + continue; + CurrentCanonNums.clear(); findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums, /* ReplacedWithOutlinedCall = */ true); - if (all_of(PNCanonNums, [&CurrentCanonNums](unsigned CanonNum) { - return CurrentCanonNums.contains(CanonNum); - })) + // If the list of incoming values is not the same length, then they cannot + // match since there is not an analogue for each incoming value. + if (PNCanonNums.size() != CurrentCanonNums.size()) + continue; + + bool FoundMatch = true; + + // We compare the canonical value for each incoming value in the passed + // in PHINode to one already present in the outlined region. If the + // incoming values do not match, then the PHINodes do not match. + + // We also check to make sure that the incoming block matches as well by + // finding the corresponding incoming block in the combined outlined region + // for the current outlined region. + for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) { + std::pair ToCompareTo = CurrentCanonNums[Idx]; + std::pair ToAdd = PNCanonNums[Idx]; + if (ToCompareTo.first != ToAdd.first) { + FoundMatch = false; + break; + } + + BasicBlock *CorrespondingBlock = + Region.findCorrespondingBlockIn(*FirstRegion, ToAdd.second); + assert(CorrespondingBlock && "Found block is nullptr"); + if (CorrespondingBlock != ToCompareTo.second) { + FoundMatch = false; + break; + } + } + + // If all incoming values and branches matched, then we can merge + // into the found PHINode. + if (FoundMatch) { + UsedPHIs.insert(&CurrPN); return &CurrPN; + } } // If we've made it here, it means we weren't able to replace the PHINode, so @@ -1603,12 +1756,8 @@ findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, // Find corresponding basic block in the overall function for the incoming // block. - Instruction *FirstNonPHI = IncomingBlock->getFirstNonPHI(); - assert(FirstNonPHI && "Incoming block is empty?"); - Value *CorrespondingVal = - Region.findCorrespondingValueIn(*FirstRegion, FirstNonPHI); - assert(CorrespondingVal && "Value is nullptr?"); - BasicBlock *BlockToUse = cast(CorrespondingVal)->getParent(); + BasicBlock *BlockToUse = + Region.findCorrespondingBlockIn(*FirstRegion, IncomingBlock); NewPN->setIncomingBlock(Idx, BlockToUse); // If we have an argument we make sure we replace using the argument from @@ -1623,6 +1772,10 @@ findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, IncomingVal = findOutputMapping(OutputMappings, IncomingVal); Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal); assert(Val && "Value is nullptr?"); + DenseMap::iterator RemappedIt = + FirstRegion->RemappedArguments.find(Val); + if (RemappedIt != FirstRegion->RemappedArguments.end()) + Val = RemappedIt->second; NewPN->setIncomingValue(Idx, Val); } return NewPN; @@ -1649,6 +1802,7 @@ replaceArgumentUses(OutlinableRegion &Region, if (FirstFunction) DominatingFunction = Group.OutlinedFunction; DominatorTree DT(*DominatingFunction); + DenseSet UsedPHIs; for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size(); ArgIdx++) { @@ -1665,6 +1819,8 @@ replaceArgumentUses(OutlinableRegion &Region, << *Region.ExtractedFunction << " with " << *AggArg << " in function " << *Group.OutlinedFunction << "\n"); Arg->replaceAllUsesWith(AggArg); + Value *V = Region.Call->getArgOperand(ArgIdx); + Region.RemappedArguments.insert(std::make_pair(V, AggArg)); continue; } @@ -1713,7 +1869,7 @@ replaceArgumentUses(OutlinableRegion &Region, // If this is storing a PHINode, we must make sure it is included in the // overall function. if (!isa(ValueOperand) || - Region.Candidate->getGVN(ValueOperand).hasValue()) { + Region.Candidate->getGVN(ValueOperand).has_value()) { if (FirstFunction) continue; Value *CorrVal = @@ -1725,7 +1881,7 @@ replaceArgumentUses(OutlinableRegion &Region, PHINode *PN = cast(SI->getValueOperand()); // If it has a value, it was not split by the code extractor, which // is what we are looking for. - if (Region.Candidate->getGVN(PN).hasValue()) + if (Region.Candidate->getGVN(PN)) continue; // We record the parent block for the PHINode in the Region so that @@ -1748,8 +1904,8 @@ replaceArgumentUses(OutlinableRegion &Region, // For our PHINode, we find the combined canonical numbering, and // attempt to find a matching PHINode in the overall PHIBlock. If we // cannot, we copy the PHINode and move it into this new block. - PHINode *NewPN = - findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, OutputMappings); + PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, + OutputMappings, UsedPHIs); NewI->setOperand(0, NewPN); } @@ -1923,7 +2079,7 @@ static void alignOutputBlockWithAggFunc( // If there is, we remove the new output blocks. If it does not, // we add it to our list of sets of output blocks. - if (MatchingBB.hasValue()) { + if (MatchingBB) { LLVM_DEBUG(dbgs() << "Set output block for region in function" << Region.ExtractedFunction << " to " << MatchingBB.getValue()); @@ -2279,6 +2435,9 @@ void IROutliner::pruneIncompatibleRegions( if (BBHasAddressTaken) continue; + if (IRSC.getFunction()->hasOptNone()) + continue; + if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() && !OutlineFromLinkODRs) continue; @@ -2343,9 +2502,9 @@ static Value *findOutputValueInRegion(OutlinableRegion &Region, OutputCanon = *It->second.second.begin(); } Optional OGVN = Region.Candidate->fromCanonicalNum(OutputCanon); - assert(OGVN.hasValue() && "Could not find GVN for Canonical Number?"); + assert(OGVN && "Could not find GVN for Canonical Number?"); Optional OV = Region.Candidate->fromGVN(*OGVN); - assert(OV.hasValue() && "Could not find value for GVN?"); + assert(OV && "Could not find value for GVN?"); return *OV; } @@ -2400,11 +2559,8 @@ static InstructionCost findCostForOutputBlocks(Module &M, for (Value *V : ID.OperVals) { BasicBlock *BB = static_cast(V); - DenseSet::iterator CBIt = CandidateBlocks.find(BB); - if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB)) - continue; - FoundBlocks.insert(BB); - NumOutputBranches++; + if (!CandidateBlocks.contains(BB) && FoundBlocks.insert(BB).second) + NumOutputBranches++; } } @@ -2520,7 +2676,7 @@ void IROutliner::updateOutputMapping(OutlinableRegion &Region, // If we found an output register, place a mapping of the new value // to the original in the mapping. - if (!OutputIdx.hasValue()) + if (!OutputIdx) return; if (OutputMappings.find(Outputs[OutputIdx.getValue()]) == @@ -2680,7 +2836,7 @@ unsigned IROutliner::doOutline(Module &M) { OS->Candidate->getBasicBlocks(BlocksInRegion, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, - false, "outlined"); + false, nullptr, "outlined"); findAddInputsOutputs(M, *OS, NotSame); if (!OS->IgnoreRegion) OutlinedRegions.push_back(OS); @@ -2791,7 +2947,7 @@ unsigned IROutliner::doOutline(Module &M) { OS->Candidate->getBasicBlocks(BlocksInRegion, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, - false, "outlined"); + false, nullptr, "outlined"); bool FunctionOutlined = extractSection(*OS); if (FunctionOutlined) { unsigned StartIdx = OS->Candidate->getStartIdx(); @@ -2874,7 +3030,7 @@ bool IROutlinerLegacyPass::runOnModule(Module &M) { std::unique_ptr ORE; auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & { ORE.reset(new OptimizationRemarkEmitter(&F)); - return *ORE.get(); + return *ORE; }; auto GTTI = [this](Function &F) -> TargetTransformInfo & { @@ -2905,7 +3061,7 @@ PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) { std::function GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & { ORE.reset(new OptimizationRemarkEmitter(&F)); - return *ORE.get(); + return *ORE; }; if (IROutliner(GTTI, GIRSI, GORE).run(M)) diff --git a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp index c32e09875a12..76f8f1a7a482 100644 --- a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -9,11 +9,8 @@ #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -32,7 +29,7 @@ static bool inferAllPrototypeAttributes( // explicitly visited by CGSCC passes in the new pass manager.) if (F.isDeclaration() && !F.hasOptNone()) { if (!F.hasFnAttribute(Attribute::NoBuiltin)) - Changed |= inferLibFuncAttributes(F, GetTLI(F)); + Changed |= inferNonMandatoryLibFuncAttrs(F, GetTLI(F)); Changed |= inferAttributesFromOthers(F); } diff --git a/llvm/lib/Transforms/IPO/InlineSimple.cpp b/llvm/lib/Transforms/IPO/InlineSimple.cpp index 76f1d0c54d08..2143e39d488d 100644 --- a/llvm/lib/Transforms/IPO/InlineSimple.cpp +++ b/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -12,14 +12,8 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Inliner.h" diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 49babc24cb82..4d32266eb9ea 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -14,21 +14,21 @@ #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/PriorityWorklist.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InlineOrder.h" @@ -37,11 +37,9 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" @@ -67,8 +65,6 @@ #include #include #include -#include -#include #include #include @@ -92,11 +88,28 @@ static cl::opt DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden); +static cl::opt IntraSCCCostMultiplier( + "intra-scc-cost-multiplier", cl::init(2), cl::Hidden, + cl::desc( + "Cost multiplier to multiply onto inlined call sites where the " + "new call was previously an intra-SCC call (not relevant when the " + "original call was already intra-SCC). This can accumulate over " + "multiple inlinings (e.g. if a call site already had a cost " + "multiplier and one of its inlined calls was also subject to " + "this, the inlined call would have the original multiplier " + "multiplied by intra-scc-cost-multiplier). This is to prevent tons of " + "inlining through a child SCC which can cause terrible compile times")); + /// A flag for test, so we can print the content of the advisor when running it /// as part of the default (e.g. -O3) pipeline. static cl::opt KeepAdvisorForPrinting("keep-inline-advisor-for-printing", cl::init(false), cl::Hidden); +/// Allows printing the contents of the advisor after each SCC inliner pass. +static cl::opt + EnablePostSCCAdvisorPrinting("enable-scc-inline-advisor-printing", + cl::init(false), cl::Hidden); + extern cl::opt InlinerFunctionImportStats; static cl::opt CGSCCInlineReplayFile( @@ -150,10 +163,6 @@ static cl::opt CGSCCInlineReplayFormat( ":. (default)")), cl::desc("How cgscc inline replay file is formatted"), cl::Hidden); -static cl::opt InlineEnablePriorityOrder( - "inline-enable-priority-order", cl::Hidden, cl::init(false), - cl::desc("Enable the priority inline order for the inliner")); - LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {} LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) @@ -708,8 +717,9 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, // duration of the inliner pass, and thus the lifetime of the owned advisor. // The one we would get from the MAM can be invalidated as a result of the // inliner's activity. - OwnedAdvisor = - std::make_unique(M, FAM, getInlineParams()); + OwnedAdvisor = std::make_unique( + M, FAM, getInlineParams(), + InlineContext{LTOPhase, InlinePass::CGSCCInliner}); if (!CGSCCInlineReplayFile.empty()) OwnedAdvisor = getReplayInlineAdvisor( @@ -718,7 +728,9 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, CGSCCInlineReplayScope, CGSCCInlineReplayFallback, {CGSCCInlineReplayFormat}}, - /*EmitRemarks=*/true); + /*EmitRemarks=*/true, + InlineContext{LTOPhase, + InlinePass::ReplayCGSCCInliner}); return *OwnedAdvisor; } @@ -744,7 +756,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, .getManager(); InlineAdvisor &Advisor = getAdvisor(MAMProxy, FAM, M); - Advisor.onPassEntry(); + Advisor.onPassEntry(&InitialC); auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(&InitialC); }); @@ -773,12 +785,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // this model, but it is uniformly spread across all the functions in the SCC // and eventually they all become too large to inline, rather than // incrementally maknig a single function grow in a super linear fashion. - std::unique_ptr>> Calls; - if (InlineEnablePriorityOrder) - Calls = std::make_unique>(); - else - Calls = std::make_unique>>(); - assert(Calls != nullptr && "Expected an initialized InlineOrder"); + DefaultInlineOrder> Calls; // Populate the initial list of calls in this SCC. for (auto &N : InitialC) { @@ -793,7 +800,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (auto *CB = dyn_cast(&I)) if (Function *Callee = CB->getCalledFunction()) { if (!Callee->isDeclaration()) - Calls->push({CB, -1}); + Calls.push({CB, -1}); else if (!isa(I)) { using namespace ore; setInlineRemark(*CB, "unavailable definition"); @@ -807,7 +814,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, } } } - if (Calls->empty()) + if (Calls.empty()) return PreservedAnalyses::all(); // Capture updatable variable for the current SCC. @@ -833,15 +840,15 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, SmallVector DeadFunctionsInComdats; // Loop forward over all of the calls. - while (!Calls->empty()) { + while (!Calls.empty()) { // We expect the calls to typically be batched with sequences of calls that // have the same caller, so we first set up some shared infrastructure for // this caller. We also do any pruning we can at this layer on the caller // alone. - Function &F = *Calls->front().first->getCaller(); + Function &F = *Calls.front().first->getCaller(); LazyCallGraph::Node &N = *CG.lookup(F); if (CG.lookupSCC(N) != C) { - Calls->pop(); + Calls.pop(); continue; } @@ -857,8 +864,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. bool DidInline = false; - while (!Calls->empty() && Calls->front().first->getCaller() == &F) { - auto P = Calls->pop(); + while (!Calls.empty() && Calls.front().first->getCaller() == &F) { + auto P = Calls.pop(); CallBase *CB = P.first; const int InlineHistoryID = P.second; Function &Callee = *CB->getCalledFunction(); @@ -876,8 +883,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // trigger infinite inlining, much like is prevented within the inliner // itself by the InlineHistory above, but spread across CGSCC iterations // and thus hidden from the full inline history. - if (CG.lookupSCC(*CG.lookup(Callee)) == C && - UR.InlinedInternalEdges.count({&N, C})) { + LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee)); + if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) { LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " "previously split out of this SCC by inlining: " << F.getName() << " -> " << Callee.getName() << "\n"); @@ -897,6 +904,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, continue; } + int CBCostMult = + getStringFnAttrAsInt( + *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName) + .value_or(1); + // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( @@ -935,9 +947,28 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (tryPromoteCall(*ICB)) NewCallee = ICB->getCalledFunction(); } - if (NewCallee) - if (!NewCallee->isDeclaration()) - Calls->push({ICB, NewHistoryID}); + if (NewCallee) { + if (!NewCallee->isDeclaration()) { + Calls.push({ICB, NewHistoryID}); + // Continually inlining through an SCC can result in huge compile + // times and bloated code since we arbitrarily stop at some point + // when the inliner decides it's not profitable to inline anymore. + // We attempt to mitigate this by making these calls exponentially + // more expensive. + // This doesn't apply to calls in the same SCC since if we do + // inline through the SCC the function will end up being + // self-recursive which the inliner bails out on, and inlining + // within an SCC is necessary for performance. + if (CalleeSCC != C && + CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) { + Attribute NewCBCostMult = Attribute::get( + M.getContext(), + InlineConstants::FunctionInlineCostMultiplierAttributeName, + itostr(CBCostMult * IntraSCCCostMultiplier)); + ICB->addFnAttr(NewCBCostMult); + } + } + } } } @@ -953,7 +984,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (Callee.isDiscardableIfUnused() && Callee.hasZeroLiveUses() && !CG.isLibFunction(Callee)) { if (Callee.hasLocalLinkage() || !Callee.hasComdat()) { - Calls->erase_if([&](const std::pair &Call) { + Calls.erase_if([&](const std::pair &Call) { return Call.first->getCaller() == &Callee; }); // Clear the body and queue the function itself for deletion when we @@ -1083,17 +1114,24 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params, bool MandatoryFirst, + InlineContext IC, InliningAdvisorMode Mode, unsigned MaxDevirtIterations) - : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations) { + : Params(Params), IC(IC), Mode(Mode), + MaxDevirtIterations(MaxDevirtIterations) { // Run the inliner first. The theory is that we are walking bottom-up and so // the callees have already been fully optimized, and we want to inline them // into the callers so that our optimizations can reflect that. // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO // because it makes profile annotation in the backend inaccurate. - if (MandatoryFirst) + if (MandatoryFirst) { PM.addPass(InlinerPass(/*OnlyMandatory*/ true)); + if (EnablePostSCCAdvisorPrinting) + PM.addPass(InlineAdvisorAnalysisPrinterPass(dbgs())); + } PM.addPass(InlinerPass()); + if (EnablePostSCCAdvisorPrinting) + PM.addPass(InlineAdvisorAnalysisPrinterPass(dbgs())); } PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M, @@ -1103,7 +1141,8 @@ PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M, {CGSCCInlineReplayFile, CGSCCInlineReplayScope, CGSCCInlineReplayFallback, - {CGSCCInlineReplayFormat}})) { + {CGSCCInlineReplayFormat}}, + IC)) { M.getContext().emitError( "Could not setup Inlining Advisor for the requested " "mode and/or options"); diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp index 692e445cb7cb..5aa5b905f06c 100644 --- a/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/llvm/lib/Transforms/IPO/Internalize.cpp @@ -19,7 +19,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/Internalize.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" @@ -33,8 +32,6 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/Utils/GlobalStatus.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; #define DEBUG_TYPE "internalize" diff --git a/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/llvm/lib/Transforms/IPO/LoopExtractor.cpp index d9a59dd35fde..ad1927c09803 100644 --- a/llvm/lib/Transforms/IPO/LoopExtractor.cpp +++ b/llvm/lib/Transforms/IPO/LoopExtractor.cpp @@ -23,14 +23,9 @@ #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CodeExtractor.h" -#include -#include using namespace llvm; #define DEBUG_TYPE "loop-extract" diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 8e83d7bcb6c2..d5f1d291f41f 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -1223,6 +1223,7 @@ void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) { static const unsigned kX86JumpTableEntrySize = 8; static const unsigned kARMJumpTableEntrySize = 4; static const unsigned kARMBTIJumpTableEntrySize = 8; +static const unsigned kRISCVJumpTableEntrySize = 8; unsigned LowerTypeTestsModule::getJumpTableEntrySize() { switch (Arch) { @@ -1238,6 +1239,9 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() { if (BTE->getZExtValue()) return kARMBTIJumpTableEntrySize; return kARMJumpTableEntrySize; + case Triple::riscv32: + case Triple::riscv64: + return kRISCVJumpTableEntrySize; default: report_fatal_error("Unsupported architecture for jump tables"); } @@ -1265,6 +1269,9 @@ void LowerTypeTestsModule::createJumpTableEntry( AsmOS << "b $" << ArgIndex << "\n"; } else if (JumpTableArch == Triple::thumb) { AsmOS << "b.w $" << ArgIndex << "\n"; + } else if (JumpTableArch == Triple::riscv32 || + JumpTableArch == Triple::riscv64) { + AsmOS << "tail $" << ArgIndex << "@plt\n"; } else { report_fatal_error("Unsupported architecture for jump tables"); } @@ -1282,7 +1289,8 @@ Type *LowerTypeTestsModule::getJumpTableEntryType() { void LowerTypeTestsModule::buildBitSetsFromFunctions( ArrayRef TypeIds, ArrayRef Functions) { if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || - Arch == Triple::thumb || Arch == Triple::aarch64) + Arch == Triple::thumb || Arch == Triple::aarch64 || + Arch == Triple::riscv32 || Arch == Triple::riscv64) buildBitSetsFromFunctionsNative(TypeIds, Functions); else if (Arch == Triple::wasm32 || Arch == Triple::wasm64) buildBitSetsFromFunctionsWASM(TypeIds, Functions); @@ -1427,6 +1435,11 @@ void LowerTypeTestsModule::createJumpTable( F->addFnAttr("branch-target-enforcement", "false"); F->addFnAttr("sign-return-address", "none"); } + if (JumpTableArch == Triple::riscv32 || JumpTableArch == Triple::riscv64) { + // Make sure the jump table assembly is not modified by the assembler or + // the linker. + F->addFnAttr("target-features", "-c,-relax"); + } // Make sure we don't emit .eh_frame for this function. F->addFnAttr(Attribute::NoUnwind); @@ -2187,11 +2200,7 @@ bool LowerTypeTestsModule::lower() { } Sets.emplace_back(I, MaxUniqueId); } - llvm::sort(Sets, - [](const std::pair &S1, - const std::pair &S2) { - return S1.second < S2.second; - }); + llvm::sort(Sets, llvm::less_second()); // For each disjoint set we found... for (const auto &S : Sets) { diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp index 97ef872c5499..b850591b4aa6 100644 --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -88,12 +88,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Argument.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -113,7 +112,6 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/IR/ValueMap.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" @@ -121,8 +119,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/Transforms/Utils/FunctionComparator.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" #include #include #include @@ -139,10 +137,10 @@ STATISTIC(NumThunksWritten, "Number of thunks generated"); STATISTIC(NumAliasesWritten, "Number of aliases generated"); STATISTIC(NumDoubleWeak, "Number of new functions created"); -static cl::opt NumFunctionsForSanityCheck( - "mergefunc-sanity", - cl::desc("How many functions in module could be used for " - "MergeFunctions pass sanity check. " +static cl::opt NumFunctionsForVerificationCheck( + "mergefunc-verify", + cl::desc("How many functions in a module could be used for " + "MergeFunctions to pass a basic correctness check. " "'0' disables this check. Works only with '-debug' key."), cl::init(0), cl::Hidden); @@ -228,10 +226,13 @@ private: /// analyzed again. std::vector Deferred; + /// Set of values marked as used in llvm.used and llvm.compiler.used. + SmallPtrSet Used; + #ifndef NDEBUG /// Checks the rules of order relation introduced among functions set. - /// Returns true, if sanity check has been passed, and false if failed. - bool doSanityCheck(std::vector &Worklist); + /// Returns true, if check has been passed, and false if failed. + bool doFunctionalCheck(std::vector &Worklist); #endif /// Insert a ComparableFunction into the FnTree, or merge it away if it's @@ -330,12 +331,12 @@ PreservedAnalyses MergeFunctionsPass::run(Module &M, } #ifndef NDEBUG -bool MergeFunctions::doSanityCheck(std::vector &Worklist) { - if (const unsigned Max = NumFunctionsForSanityCheck) { +bool MergeFunctions::doFunctionalCheck(std::vector &Worklist) { + if (const unsigned Max = NumFunctionsForVerificationCheck) { unsigned TripleNumber = 0; bool Valid = true; - dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n"; + dbgs() << "MERGEFUNC-VERIFY: Started for first " << Max << " functions.\n"; unsigned i = 0; for (std::vector::iterator I = Worklist.begin(), @@ -351,7 +352,7 @@ bool MergeFunctions::doSanityCheck(std::vector &Worklist) { // If F1 <= F2, then F2 >= F1, otherwise report failure. if (Res1 != -Res2) { - dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber + dbgs() << "MERGEFUNC-VERIFY: Non-symmetric; triple: " << TripleNumber << "\n"; dbgs() << *F1 << '\n' << *F2 << '\n'; Valid = false; @@ -384,7 +385,7 @@ bool MergeFunctions::doSanityCheck(std::vector &Worklist) { } if (!Transitive) { - dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: " + dbgs() << "MERGEFUNC-VERIFY: Non-transitive; triple: " << TripleNumber << "\n"; dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", " << Res4 << "\n"; @@ -395,7 +396,7 @@ bool MergeFunctions::doSanityCheck(std::vector &Worklist) { } } - dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n"; + dbgs() << "MERGEFUNC-VERIFY: " << (Valid ? "Passed." : "Failed.") << "\n"; return Valid; } return true; @@ -410,6 +411,11 @@ static bool isEligibleForMerging(Function &F) { bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; + SmallVector UsedV; + collectUsedGlobalVariables(M, UsedV, /*CompilerUsed=*/false); + collectUsedGlobalVariables(M, UsedV, /*CompilerUsed=*/true); + Used.insert(UsedV.begin(), UsedV.end()); + // All functions in the module, ordered by hash. Functions with a unique // hash value are easily eliminated. std::vector> @@ -436,7 +442,7 @@ bool MergeFunctions::runOnModule(Module &M) { std::vector Worklist; Deferred.swap(Worklist); - LLVM_DEBUG(doSanityCheck(Worklist)); + LLVM_DEBUG(doFunctionalCheck(Worklist)); LLVM_DEBUG(dbgs() << "size of module: " << M.size() << '\n'); LLVM_DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); @@ -456,6 +462,7 @@ bool MergeFunctions::runOnModule(Module &M) { FnTree.clear(); FNodesInTree.clear(); GlobalNumbers.clear(); + Used.clear(); return Changed; } @@ -484,7 +491,7 @@ static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) { if (SrcTy->isStructTy()) { assert(DestTy->isStructTy()); assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements()); - Value *Result = UndefValue::get(DestTy); + Value *Result = PoisonValue::get(DestTy); for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) { Value *Element = createCast( Builder, Builder.CreateExtractValue(V, makeArrayRef(I)), @@ -828,7 +835,10 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { // For better debugability, under MergeFunctionsPDI, we do not modify G's // call sites to point to F even when within the same translation unit. if (!G->isInterposable() && !MergeFunctionsPDI) { - if (G->hasGlobalUnnamedAddr()) { + // Functions referred to by llvm.used/llvm.compiler.used are special: + // there are uses of the symbol name that are not visible to LLVM, + // usually from inline asm. + if (G->hasGlobalUnnamedAddr() && !Used.contains(G)) { // G might have been a key in our GlobalNumberState, and it's illegal // to replace a key in ValueMap with a non-global. GlobalNumbers.erase(G); diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp index d515303e4911..143715006512 100644 --- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp +++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp @@ -14,43 +14,33 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/ModuleInliner.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InlineOrder.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/User.h" -#include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" #include -#include using namespace llvm; @@ -94,7 +84,9 @@ InlineAdvisor &ModuleInlinerPass::getAdvisor(const ModuleAnalysisManager &MAM, // inliner pass, and thus the lifetime of the owned advisor. The one we // would get from the MAM can be invalidated as a result of the inliner's // activity. - OwnedAdvisor = std::make_unique(M, FAM, Params); + OwnedAdvisor = std::make_unique( + M, FAM, Params, + InlineContext{LTOPhase, InlinePass::ModuleInliner}); return *OwnedAdvisor; } @@ -119,7 +111,9 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, LLVM_DEBUG(dbgs() << "---- Module Inliner is Running ---- \n"); auto &IAA = MAM.getResult(M); - if (!IAA.tryCreate(Params, Mode, {})) { + if (!IAA.tryCreate( + Params, Mode, {}, + InlineContext{LTOPhase, InlinePass::ModuleInliner})) { M.getContext().emitError( "Could not setup Inlining Advisor for the requested " "mode and/or options"); @@ -153,7 +147,8 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, // the SCC inliner, which need some refactoring. std::unique_ptr>> Calls; if (InlineEnablePriorityOrder) - Calls = std::make_unique>(); + Calls = std::make_unique( + std::make_unique()); else Calls = std::make_unique>>(); assert(Calls != nullptr && "Expected an initialized InlineOrder"); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 2d765fb6ce6d..227ad8501f25 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -49,7 +49,6 @@ #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" -#include "llvm/Transforms/Utils/CodeExtractor.h" #include @@ -59,17 +58,16 @@ using namespace omp; #define DEBUG_TYPE "openmp-opt" static cl::opt DisableOpenMPOptimizations( - "openmp-opt-disable", cl::ZeroOrMore, - cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, - cl::init(false)); + "openmp-opt-disable", cl::desc("Disable OpenMP specific optimizations."), + cl::Hidden, cl::init(false)); static cl::opt EnableParallelRegionMerging( - "openmp-opt-enable-merging", cl::ZeroOrMore, + "openmp-opt-enable-merging", cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, cl::init(false)); static cl::opt - DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore, + DisableInternalization("openmp-opt-disable-internalization", cl::desc("Disable function internalization."), cl::Hidden, cl::init(false)); @@ -85,42 +83,47 @@ static cl::opt HideMemoryTransferLatency( cl::Hidden, cl::init(false)); static cl::opt DisableOpenMPOptDeglobalization( - "openmp-opt-disable-deglobalization", cl::ZeroOrMore, + "openmp-opt-disable-deglobalization", cl::desc("Disable OpenMP optimizations involving deglobalization."), cl::Hidden, cl::init(false)); static cl::opt DisableOpenMPOptSPMDization( - "openmp-opt-disable-spmdization", cl::ZeroOrMore, + "openmp-opt-disable-spmdization", cl::desc("Disable OpenMP optimizations involving SPMD-ization."), cl::Hidden, cl::init(false)); static cl::opt DisableOpenMPOptFolding( - "openmp-opt-disable-folding", cl::ZeroOrMore, + "openmp-opt-disable-folding", cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden, cl::init(false)); static cl::opt DisableOpenMPOptStateMachineRewrite( - "openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore, + "openmp-opt-disable-state-machine-rewrite", cl::desc("Disable OpenMP optimizations that replace the state machine."), cl::Hidden, cl::init(false)); static cl::opt DisableOpenMPOptBarrierElimination( - "openmp-opt-disable-barrier-elimination", cl::ZeroOrMore, + "openmp-opt-disable-barrier-elimination", cl::desc("Disable OpenMP optimizations that eliminate barriers."), cl::Hidden, cl::init(false)); static cl::opt PrintModuleAfterOptimizations( - "openmp-opt-print-module", cl::ZeroOrMore, + "openmp-opt-print-module-after", cl::desc("Print the current module after OpenMP optimizations."), cl::Hidden, cl::init(false)); +static cl::opt PrintModuleBeforeOptimizations( + "openmp-opt-print-module-before", + cl::desc("Print the current module before OpenMP optimizations."), + cl::Hidden, cl::init(false)); + static cl::opt AlwaysInlineDeviceFunctions( - "openmp-opt-inline-device", cl::ZeroOrMore, + "openmp-opt-inline-device", cl::desc("Inline all applicible functions on the device."), cl::Hidden, cl::init(false)); static cl::opt - EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::ZeroOrMore, + EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::desc("Enables more verbose remarks."), cl::Hidden, cl::init(false)); @@ -129,6 +132,11 @@ static cl::opt cl::desc("Maximal number of attributor iterations."), cl::init(256)); +static cl::opt + SharedMemoryLimit("openmp-opt-shared-limit", cl::Hidden, + cl::desc("Maximum amount of shared memory to use."), + cl::init(std::numeric_limits::max())); + STATISTIC(NumOpenMPRuntimeCallsDeduplicated, "Number of OpenMP runtime calls deduplicated"); STATISTIC(NumOpenMPParallelRegionsDeleted, @@ -493,11 +501,14 @@ struct OMPInformationCache : public InformationCache { // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_` // functions, except if `optnone` is present. - for (Function &F : M) { - for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"}) - if (F.getName().startswith(Prefix) && - !F.hasFnAttribute(Attribute::OptimizeNone)) - F.removeFnAttr(Attribute::NoInline); + if (isOpenMPDevice(M)) { + for (Function &F : M) { + for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"}) + if (F.hasFnAttribute(Attribute::NoInline) && + F.getName().startswith(Prefix) && + !F.hasFnAttribute(Attribute::OptimizeNone)) + F.removeFnAttr(Attribute::NoInline); + } } // TODO: We should attach the attributes defined in OMPKinds.def. @@ -591,7 +602,7 @@ struct KernelInfoState : AbstractState { /// Abstract State interface ///{ - KernelInfoState() {} + KernelInfoState() = default; KernelInfoState(bool BestState) { if (!BestState) indicatePessimisticFixpoint(); @@ -926,8 +937,7 @@ private: SmallDenseMap> BB2PRMap; BasicBlock *StartBB = nullptr, *EndBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationIP) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { BasicBlock *CGStartBB = CodeGenIP.getBlock(); BasicBlock *CGEndBB = SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); @@ -966,8 +976,7 @@ private: const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); ParentBB->getTerminator()->eraseFromParent(); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationIP) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { BasicBlock *CGStartBB = CodeGenIP.getBlock(); BasicBlock *CGEndBB = SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); @@ -1107,10 +1116,8 @@ private: // callbacks. SmallVector Args; for (auto *CI : MergableCIs) { - Value *Callee = - CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts(); - FunctionType *FT = - cast(Callee->getType()->getPointerElementType()); + Value *Callee = CI->getArgOperand(CallbackCalleeOperand); + FunctionType *FT = OMPInfoCache.OMPBuilder.ParallelTask; Args.clear(); Args.push_back(OutlinedFn->getArg(0)); Args.push_back(OutlinedFn->getArg(1)); @@ -1458,7 +1465,6 @@ private: case Intrinsic::nvvm_barrier0_and: case Intrinsic::nvvm_barrier0_or: case Intrinsic::nvvm_barrier0_popc: - case Intrinsic::amdgcn_s_barrier: return true; default: break; @@ -2120,6 +2126,8 @@ private: OMPRTL___kmpc_barrier_simple_generic); ExternalizationRAII ThreadId(OMPInfoCache, OMPRTL___kmpc_get_hardware_thread_id_in_block); + ExternalizationRAII NumThreads( + OMPInfoCache, OMPRTL___kmpc_get_hardware_num_threads_in_block); ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size); registerAAs(IsModulePass); @@ -2407,8 +2415,7 @@ struct AAICVTrackerFunction : public AAICVTracker { auto CallCheck = [&](Instruction &I) { Optional ReplVal = getValueForCall(A, I, ICV); - if (ReplVal.hasValue() && - ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) + if (ReplVal && ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) HasChanged = ChangeStatus::CHANGED; return true; @@ -2468,7 +2475,8 @@ struct AAICVTrackerFunction : public AAICVTracker { if (ICVTrackingAA.isAssumedTracked()) { Optional URV = ICVTrackingAA.getUniqueReplacementValue(ICV); - if (!URV || (*URV && AA::isValidAtPosition(**URV, I, OMPInfoCache))) + if (!URV || (*URV && AA::isValidAtPosition(AA::ValueAndContext(**URV, I), + OMPInfoCache))) return URV; } @@ -2509,13 +2517,13 @@ struct AAICVTrackerFunction : public AAICVTracker { if (ValuesMap.count(CurrInst)) { Optional NewReplVal = ValuesMap.lookup(CurrInst); // Unknown value, track new. - if (!ReplVal.hasValue()) { + if (!ReplVal) { ReplVal = NewReplVal; break; } // If we found a new value, we can't know the icv value anymore. - if (NewReplVal.hasValue()) + if (NewReplVal) if (ReplVal != NewReplVal) return nullptr; @@ -2523,11 +2531,11 @@ struct AAICVTrackerFunction : public AAICVTracker { } Optional NewReplVal = getValueForCall(A, *CurrInst, ICV); - if (!NewReplVal.hasValue()) + if (!NewReplVal) continue; // Unknown value, track new. - if (!ReplVal.hasValue()) { + if (!ReplVal) { ReplVal = NewReplVal; break; } @@ -2539,7 +2547,7 @@ struct AAICVTrackerFunction : public AAICVTracker { } // If we are in the same BB and we have a value, we are done. - if (CurrBB == I->getParent() && ReplVal.hasValue()) + if (CurrBB == I->getParent() && ReplVal) return ReplVal; // Go through all predecessors and add terminators for analysis. @@ -2597,7 +2605,7 @@ struct AAICVTrackerFunctionReturned : AAICVTracker { ICVTrackingAA.getReplacementValue(ICV, &I, A); // If we found a second ICV value there is no unique returned value. - if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal) + if (UniqueICVValue && UniqueICVValue != NewReplVal) return false; UniqueICVValue = NewReplVal; @@ -2648,10 +2656,10 @@ struct AAICVTrackerCallSite : AAICVTracker { } ChangeStatus manifest(Attributor &A) override { - if (!ReplVal.hasValue() || !ReplVal.getValue()) + if (!ReplVal || !*ReplVal) return ChangeStatus::UNCHANGED; - A.changeValueAfterManifest(*getCtxI(), **ReplVal); + A.changeAfterManifest(IRPosition::inst(*getCtxI()), **ReplVal); A.deleteAfterManifest(*getCtxI()); return ChangeStatus::CHANGED; @@ -2789,7 +2797,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { SmallSetVector SingleThreadedBBs; /// Total number of basic blocks in this function. - long unsigned NumBBs; + long unsigned NumBBs = 0; }; ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { @@ -2952,12 +2960,23 @@ struct AAHeapToSharedFunction : public AAHeapToShared { } void initialize(Attributor &A) override { + if (DisableOpenMPOptDeglobalization) { + indicatePessimisticFixpoint(); + return; + } + auto &OMPInfoCache = static_cast(A.getInfoCache()); auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; + Attributor::SimplifictionCallbackTy SCB = + [](const IRPosition &, const AbstractAttribute *, + bool &) -> Optional { return nullptr; }; for (User *U : RFI.Declaration->users()) - if (CallBase *CB = dyn_cast(U)) + if (CallBase *CB = dyn_cast(U)) { MallocCalls.insert(CB); + A.registerSimplificationCallback(IRPosition::callsite_returned(*CB), + SCB); + } findPotentialRemovedFreeCalls(A); } @@ -2999,6 +3018,14 @@ struct AAHeapToSharedFunction : public AAHeapToShared { auto *AllocSize = cast(CB->getArgOperand(0)); + if (AllocSize->getZExtValue() + SharedMemoryUsed > SharedMemoryLimit) { + LLVM_DEBUG(dbgs() << TAG << "Cannot replace call " << *CB + << " with shared memory." + << " Shared memory usage is limited to " + << SharedMemoryLimit << " bytes\n"); + continue; + } + LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB << " with " << AllocSize->getZExtValue() << " bytes of shared memory\n"); @@ -3029,11 +3056,12 @@ struct AAHeapToSharedFunction : public AAHeapToShared { "HeapToShared on allocation without alignment attribute"); SharedMem->setAlignment(MaybeAlign(Alignment)); - A.changeValueAfterManifest(*CB, *NewBuffer); + A.changeAfterManifest(IRPosition::callsite_returned(*CB), *NewBuffer); A.deleteAfterManifest(*CB); A.deleteAfterManifest(*FreeCalls.front()); - NumBytesMovedToSharedMemory += AllocSize->getZExtValue(); + SharedMemoryUsed += AllocSize->getZExtValue(); + NumBytesMovedToSharedMemory = SharedMemoryUsed; Changed = ChangeStatus::CHANGED; } @@ -3069,6 +3097,8 @@ struct AAHeapToSharedFunction : public AAHeapToShared { SmallSetVector MallocCalls; /// Collection of potentially removed free calls in a function. SmallPtrSet PotentialRemovedFreeCalls; + /// The total amount of shared memory that has been used for HeapToShared. + unsigned SharedMemoryUsed = 0; }; struct AAKernelInfo : public StateWrapper { @@ -3137,12 +3167,6 @@ struct AAKernelInfoFunction : AAKernelInfo { auto &OMPInfoCache = static_cast(A.getInfoCache()); Function *Fn = getAnchorScope(); - if (!OMPInfoCache.Kernels.count(Fn)) - return; - - // Add itself to the reaching kernel and set IsKernelEntry. - ReachingKernelEntries.insert(Fn); - IsKernelEntry = true; OMPInformationCache::RuntimeFunctionInfo &InitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; @@ -3176,10 +3200,12 @@ struct AAKernelInfoFunction : AAKernelInfo { Fn); // Ignore kernels without initializers such as global constructors. - if (!KernelInitCB || !KernelDeinitCB) { - indicateOptimisticFixpoint(); + if (!KernelInitCB || !KernelDeinitCB) return; - } + + // Add itself to the reaching kernel and set IsKernelEntry. + ReachingKernelEntries.insert(Fn); + IsKernelEntry = true; // For kernels we might need to initialize/finalize the IsSPMD state and // we need to register a simplification callback so that the Attributor @@ -3345,8 +3371,17 @@ struct AAKernelInfoFunction : AAKernelInfo { return false; } - // Check if the kernel is already in SPMD mode, if so, return success. + // Get the actual kernel, could be the caller of the anchor scope if we have + // a debug wrapper. Function *Kernel = getAnchorScope(); + if (Kernel->hasLocalLinkage()) { + assert(Kernel->hasOneUse() && "Unexpected use of debug kernel wrapper."); + auto *CB = cast(Kernel->user_back()); + Kernel = CB->getCaller(); + } + assert(OMPInfoCache.Kernels.count(Kernel) && "Expected kernel function!"); + + // Check if the kernel is already in SPMD mode, if so, return success. GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable( (Kernel->getName() + "_exec_mode").str()); assert(ExecMode && "Kernel without exec mode?"); @@ -3711,9 +3746,9 @@ struct AAKernelInfoFunction : AAKernelInfo { // __kmpc_get_hardware_num_threads_in_block(); // WarpSize = __kmpc_get_warp_size(); // BlockSize = BlockHwSize - WarpSize; - // if (InitCB >= BlockSize) return; - // IsWorkerCheckBB: bool IsWorker = InitCB >= 0; + // IsWorkerCheckBB: bool IsWorker = InitCB != -1; // if (IsWorker) { + // if (InitCB >= BlockSize) return; // SMBeginBB: __kmpc_barrier_simple_generic(...); // void *WorkFn; // bool Active = __kmpc_kernel_parallel(&WorkFn); @@ -3770,6 +3805,13 @@ struct AAKernelInfoFunction : AAKernelInfo { ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc); InitBB->getTerminator()->eraseFromParent(); + Instruction *IsWorker = + ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, + ConstantInt::get(KernelInitCB->getType(), -1), + "thread.is_worker", InitBB); + IsWorker->setDebugLoc(DLoc); + BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB); + Module &M = *Kernel->getParent(); auto &OMPInfoCache = static_cast(A.getInfoCache()); FunctionCallee BlockHwSizeFn = @@ -3779,29 +3821,22 @@ struct AAKernelInfoFunction : AAKernelInfo { OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_get_warp_size); CallInst *BlockHwSize = - CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB); + CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB); OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize); BlockHwSize->setDebugLoc(DLoc); - CallInst *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB); + CallInst *WarpSize = + CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB); OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize); WarpSize->setDebugLoc(DLoc); - Instruction *BlockSize = - BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB); + Instruction *BlockSize = BinaryOperator::CreateSub( + BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB); BlockSize->setDebugLoc(DLoc); - Instruction *IsMainOrWorker = - ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, - BlockSize, "thread.is_main_or_worker", InitBB); + Instruction *IsMainOrWorker = ICmpInst::Create( + ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize, + "thread.is_main_or_worker", IsWorkerCheckBB); IsMainOrWorker->setDebugLoc(DLoc); - BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker, - InitBB); - - Instruction *IsWorker = - ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, - ConstantInt::get(KernelInitCB->getType(), -1), - "thread.is_worker", IsWorkerCheckBB); - IsWorker->setDebugLoc(DLoc); - BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, - IsWorkerCheckBB); + BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB, + IsMainOrWorker, IsWorkerCheckBB); // Create local storage for the work function pointer. const DataLayout &DL = M.getDataLayout(); @@ -4241,10 +4276,10 @@ struct AAKernelInfoCallSite : AAKernelInfo { unsigned ScheduleTypeVal = ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0; switch (OMPScheduleType(ScheduleTypeVal)) { - case OMPScheduleType::Static: - case OMPScheduleType::StaticChunked: - case OMPScheduleType::Distribute: - case OMPScheduleType::DistributeChunked: + case OMPScheduleType::UnorderedStatic: + case OMPScheduleType::UnorderedStaticChunked: + case OMPScheduleType::OrderedDistribute: + case OMPScheduleType::OrderedDistributeChunked: break; default: SPMDCompatibilityTracker.indicatePessimisticFixpoint(); @@ -4390,7 +4425,7 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall { std::string Str("simplified value: "); - if (!SimplifiedValue.hasValue()) + if (!SimplifiedValue) return Str + std::string("none"); if (!SimplifiedValue.getValue()) @@ -4420,8 +4455,8 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall { IRPosition::callsite_returned(CB), [&](const IRPosition &IRP, const AbstractAttribute *AA, bool &UsedAssumedInformation) -> Optional { - assert((isValidState() || (SimplifiedValue.hasValue() && - SimplifiedValue.getValue() == nullptr)) && + assert((isValidState() || + (SimplifiedValue && SimplifiedValue.getValue() == nullptr)) && "Unexpected invalid state!"); if (!isAtFixpoint()) { @@ -4461,9 +4496,9 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall { ChangeStatus manifest(Attributor &A) override { ChangeStatus Changed = ChangeStatus::UNCHANGED; - if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) { + if (SimplifiedValue && *SimplifiedValue) { Instruction &I = *getCtxI(); - A.changeValueAfterManifest(I, **SimplifiedValue); + A.changeAfterManifest(IRPosition::inst(I), **SimplifiedValue); A.deleteAfterManifest(I); CallBase *CB = dyn_cast(&I); @@ -4549,7 +4584,7 @@ private: // We have empty reaching kernels, therefore we cannot tell if the // associated call site can be folded. At this moment, SimplifiedValue // must be none. - assert(!SimplifiedValue.hasValue() && "SimplifiedValue should be none"); + assert(!SimplifiedValue && "SimplifiedValue should be none"); } return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED @@ -4592,7 +4627,7 @@ private: return indicatePessimisticFixpoint(); if (CallerKernelInfoAA.ReachingKernelEntries.empty()) { - assert(!SimplifiedValue.hasValue() && + assert(!SimplifiedValue && "SimplifiedValue should keep none at this point"); return ChangeStatus::UNCHANGED; } @@ -4700,18 +4735,23 @@ void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) { void OpenMPOpt::registerAAs(bool IsModulePass) { if (SCC.empty()) - return; + if (IsModulePass) { // Ensure we create the AAKernelInfo AAs first and without triggering an // update. This will make sure we register all value simplification // callbacks before any other AA has the chance to create an AAValueSimplify // or similar. - for (Function *Kernel : OMPInfoCache.Kernels) + auto CreateKernelInfoCB = [&](Use &, Function &Kernel) { A.getOrCreateAAFor( - IRPosition::function(*Kernel), /* QueryingAA */ nullptr, + IRPosition::function(Kernel), /* QueryingAA */ nullptr, DepClassTy::NONE, /* ForceUpdate */ false, /* UpdateAfterInit */ false); + return false; + }; + OMPInformationCache::RuntimeFunctionInfo &InitRFI = + OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; + InitRFI.foreachUse(SCC, CreateKernelInfoCB); registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id); registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode); @@ -4899,6 +4939,9 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { AM.getResult(M).getManager(); KernelSet Kernels = getDeviceKernels(M); + if (PrintModuleBeforeOptimizations) + LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt Module Pass:\n" << M); + auto IsCalled = [&](Function &F) { if (Kernels.contains(&F)) return true; @@ -4958,8 +5001,15 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; - Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, - MaxFixpointIterations, OREGetter, DEBUG_TYPE); + + AttributorConfig AC(CGUpdater); + AC.DefaultInitializeLiveInternals = false; + AC.RewriteSignatures = false; + AC.MaxFixpointIterations = MaxFixpointIterations; + AC.OREGetter = OREGetter; + AC.PassName = DEBUG_TYPE; + + Attributor A(Functions, InfoCache, AC); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(true); @@ -5001,6 +5051,9 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, Module &M = *C.begin()->getFunction().getParent(); + if (PrintModuleBeforeOptimizations) + LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt CGSCC Pass:\n" << M); + KernelSet Kernels = getDeviceKernels(M); FunctionAnalysisManager &FAM = @@ -5022,8 +5075,16 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; - Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, - MaxFixpointIterations, OREGetter, DEBUG_TYPE); + + AttributorConfig AC(CGUpdater); + AC.DefaultInitializeLiveInternals = false; + AC.IsModulePass = false; + AC.RewriteSignatures = false; + AC.MaxFixpointIterations = MaxFixpointIterations; + AC.OREGetter = OREGetter; + AC.PassName = DEBUG_TYPE; + + Attributor A(Functions, InfoCache, AC); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(false); @@ -5093,8 +5154,16 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass { unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; - Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, - MaxFixpointIterations, OREGetter, DEBUG_TYPE); + + AttributorConfig AC(CGUpdater); + AC.DefaultInitializeLiveInternals = false; + AC.IsModulePass = false; + AC.RewriteSignatures = false; + AC.MaxFixpointIterations = MaxFixpointIterations; + AC.OREGetter = OREGetter; + AC.PassName = DEBUG_TYPE; + + Attributor A(Functions, InfoCache, AC); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Result = OMPOpt.run(false); diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index 5f2223e4047e..54c72bdbb203 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -14,7 +14,6 @@ #include "llvm/Transforms/IPO/PartialInlining.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -40,6 +39,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/User.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -55,8 +55,6 @@ #include #include #include -#include -#include #include #include #include @@ -99,7 +97,7 @@ static cl::opt // This is an option used by testing: static cl::opt SkipCostAnalysis("skip-partial-inlining-cost-analysis", - cl::init(false), cl::ZeroOrMore, + cl::ReallyHidden, cl::desc("Skip Cost Analysis")); // Used to determine if a cold region is worth outlining based on @@ -129,7 +127,7 @@ static cl::opt MaxNumInlineBlocks( // Command line option to set the maximum number of partial inlining allowed // for the module. The default value of -1 means no limit. static cl::opt MaxNumPartialInlining( - "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, + "max-partial-inlining", cl::init(-1), cl::Hidden, cl::desc("Max number of partial inlining. The default is unlimited")); // Used only when PGO or user annotated branch data is absent. It is @@ -137,7 +135,7 @@ static cl::opt MaxNumPartialInlining( // produces larger value, the BFI value will be used. static cl::opt OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), - cl::Hidden, cl::ZeroOrMore, + cl::Hidden, cl::desc("Relative frequency of outline region to " "the entry block")); @@ -169,7 +167,7 @@ struct FunctionOutliningInfo { }; struct FunctionOutliningMultiRegionInfo { - FunctionOutliningMultiRegionInfo() {} + FunctionOutliningMultiRegionInfo() = default; // Container for outline regions struct OutlineRegionInfo { @@ -440,7 +438,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo( }; auto BBProfileCount = [BFI](BasicBlock *BB) { - return BFI->getBlockProfileCount(BB).getValueOr(0); + return BFI->getBlockProfileCount(BB).value_or(0); }; // Use the same computeBBInlineCost function to compute the cost savings of @@ -741,7 +739,7 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq( auto OutlineRegionRelFreq = BranchProbability::getBranchProbability( OutliningCallFreq.getFrequency(), EntryFreq.getFrequency()); - if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI.get())) + if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI)) return OutlineRegionRelFreq; // When profile data is not available, we need to be conservative in diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 74f68531b89a..ae787be40c55 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -15,19 +15,13 @@ #include "llvm-c/Transforms/PassManagerBuilder.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" #include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Verifier.h" -#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Target/CGPassBuilderOption.h" @@ -41,22 +35,16 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" -#include "llvm/Transforms/Scalar/InstSimplifyPass.h" #include "llvm/Transforms/Scalar/LICM.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" -#include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Vectorize.h" -#include "llvm/Transforms/Vectorize/LoopVectorize.h" -#include "llvm/Transforms/Vectorize/SLPVectorizer.h" -#include "llvm/Transforms/Vectorize/VectorCombine.h" using namespace llvm; namespace llvm { -cl::opt RunPartialInlining("enable-partial-inlining", cl::init(false), - cl::Hidden, cl::ZeroOrMore, +cl::opt RunPartialInlining("enable-partial-inlining", cl::Hidden, cl::desc("Run Partial inlinining pass")); static cl::opt @@ -111,8 +99,8 @@ static cl::opt EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden, cl::desc("Enable performing ThinLTO.")); -cl::opt EnableHotColdSplit("hot-cold-split", cl::init(false), - cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass")); +cl::opt EnableHotColdSplit("hot-cold-split", + cl::desc("Enable hot-cold splitting pass")); cl::opt EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass")); @@ -126,12 +114,12 @@ cl::opt cl::desc("Disable pre-instrumentation inliner")); cl::opt PreInlineThreshold( - "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore, + "preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)")); cl::opt - EnableGVNHoist("enable-gvn-hoist", cl::init(false), cl::ZeroOrMore, + EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)")); static cl::opt @@ -139,13 +127,8 @@ static cl::opt cl::Hidden, cl::desc("Disable shrink-wrap library calls")); -static cl::opt EnableSimpleLoopUnswitch( - "enable-simple-loop-unswitch", cl::init(false), cl::Hidden, - cl::desc("Enable the simple loop unswitch pass. Also enables independent " - "cleanup passes integrated into the loop pass manager pipeline.")); - cl::opt - EnableGVNSink("enable-gvn-sink", cl::init(false), cl::ZeroOrMore, + EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)")); // This option is used in simplifying testing SampleFDO optimizations for @@ -336,59 +319,6 @@ void PassManagerBuilder::populateFunctionPassManager( FPM.add(createEarlyCSEPass()); } -// Do PGO instrumentation generation or use pass as the option specified. -void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM, - bool IsCS = false) { - if (IsCS) { - if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse) - return; - } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) - return; - - // Perform the preinline and cleanup passes for O1 and above. - // We will not do this inline for context sensitive PGO (when IsCS is true). - if (OptLevel > 0 && !DisablePreInliner && PGOSampleUse.empty() && !IsCS) { - // Create preinline pass. We construct an InlineParams object and specify - // the threshold here to avoid the command line options of the regular - // inliner to influence pre-inlining. The only fields of InlineParams we - // care about are DefaultThreshold and HintThreshold. - InlineParams IP; - IP.DefaultThreshold = PreInlineThreshold; - // FIXME: The hint threshold has the same value used by the regular inliner - // when not optimzing for size. This should probably be lowered after - // performance testing. - // Use PreInlineThreshold for both -Os and -Oz. Not running preinliner makes - // the instrumented binary unusably large. Even if PreInlineThreshold is not - // correct thresold for -Oz, it is better than not running preinliner. - IP.HintThreshold = SizeLevel > 0 ? PreInlineThreshold : 325; - - MPM.add(createFunctionInliningPass(IP)); - MPM.add(createSROAPass()); - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createInstructionCombiningPass()); // Combine silly seq's - addExtensionsToPM(EP_Peephole, MPM); - } - if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) { - MPM.add(createPGOInstrumentationGenLegacyPass(IsCS)); - // Add the profile lowering pass. - InstrProfOptions Options; - if (!PGOInstrGen.empty()) - Options.InstrProfileOutput = PGOInstrGen; - Options.DoCounterPromotion = true; - Options.UseBFIInPromotion = IsCS; - MPM.add(createLoopRotatePass()); - MPM.add(createInstrProfilingLegacyPass(Options, IsCS)); - } - if (!PGOInstrUse.empty()) - MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS)); - // Indirect call promotion that promotes intra-module targets only. - // For ThinLTO this is done earlier due to interactions with globalopt - // for imported functions. We don't run this at -O0. - if (OptLevel > 0 && !IsCS) - MPM.add( - createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); -} void PassManagerBuilder::addFunctionSimplificationPasses( legacy::PassManagerBase &MPM) { // Start of function pass. @@ -404,7 +334,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createGVNHoistPass()); if (EnableGVNSink) { MPM.add(createGVNSinkPass()); - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); } } @@ -418,7 +349,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createJumpThreadingPass()); // Thread jumps. MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals } - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add( + createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Merge & remove BBs // Combine silly seq's if (OptLevel > 2) MPM.add(createAggressiveInstCombinerPass()); @@ -427,14 +360,12 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createLibCallsShrinkWrapPass()); addExtensionsToPM(EP_Peephole, MPM); - // Optimize memory intrinsic calls based on the profiled size information. - if (SizeLevel == 0) - MPM.add(createPGOMemOPSizeOptLegacyPass()); - // TODO: Investigate the cost/benefit of tail call elimination on debugging. if (OptLevel > 1) MPM.add(createTailCallEliminationPass()); // Eliminate tail calls - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add( + createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions // The matrix extension can introduce large vector operations early, which can @@ -443,29 +374,32 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createVectorCombinePass()); // Begin the loop pass pipeline. - if (EnableSimpleLoopUnswitch) { - // The simple loop unswitch pass relies on separate cleanup passes. Schedule - // them first so when we re-process a loop they run before other loop - // passes. - MPM.add(createLoopInstSimplifyPass()); - MPM.add(createLoopSimplifyCFGPass()); - } + + // The simple loop unswitch pass relies on separate cleanup passes. Schedule + // them first so when we re-process a loop they run before other loop + // passes. + MPM.add(createLoopInstSimplifyPass()); + MPM.add(createLoopSimplifyCFGPass()); + // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. + // to reduce amount of IR that will have to be duplicated. However, + // do not perform speculative hoisting the first time as LICM + // will destroy metadata that may not need to be destroyed if run + // after loop rotation. // TODO: Investigate promotion cap for O1. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/false)); // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); // TODO: Investigate promotion cap for O1. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); - if (EnableSimpleLoopUnswitch) - MPM.add(createSimpleLoopUnswitchLegacyPass()); - else - MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); + MPM.add(createSimpleLoopUnswitchLegacyPass(OptLevel == 3)); // FIXME: We break the loop pass pipeline here in order to do full // simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the // need for this. - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); MPM.add(createInstructionCombiningPass()); // We resume loop passes creating a second loop pipeline here. if (EnableLoopFlatten) { @@ -521,7 +455,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // TODO: Investigate if this is too expensive at O1. if (OptLevel > 1) { MPM.add(createDeadStoreEliminationPass()); // Delete dead stores - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } addExtensionsToPM(EP_ScalarOptimizerLate, MPM); @@ -580,9 +515,11 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, PM.add(createEarlyCSEPass()); PM.add(createCorrelatedValuePropagationPass()); PM.add(createInstructionCombiningPass()); - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); - PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); - PM.add(createCFGSimplificationPass()); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); + PM.add(createSimpleLoopUnswitchLegacyPass()); + PM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); PM.add(createInstructionCombiningPass()); } @@ -597,6 +534,7 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, // before SLP vectorization. PM.add(createCFGSimplificationPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) + .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) @@ -641,7 +579,8 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, // unrolled loop is a inner loop, then the prologue will be inside the // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } PM.add(createWarnMissedTransformationsPass()); @@ -657,10 +596,6 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { - // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link - // is handled separately, so just check this is not the ThinLTO post-link. - bool DefaultOrPreLinkPipeline = !PerformThinLTO; - MPM.add(createAnnotation2MetadataLegacyPass()); if (!PGOSampleUse.empty()) { @@ -678,7 +613,6 @@ void PassManagerBuilder::populateModulePassManager( // If all optimizations are disabled, just run the always-inline pass and, // if enabled, the function merging pass. if (OptLevel == 0) { - addPGOInstrPasses(MPM); if (Inliner) { MPM.add(Inliner); Inliner = nullptr; @@ -732,8 +666,6 @@ void PassManagerBuilder::populateModulePassManager( // earlier in the pass pipeline, here before globalopt. Otherwise imported // available_externally functions look unreferenced and are removed. if (PerformThinLTO) { - MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, - !PGOSampleUse.empty())); MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); } @@ -772,20 +704,9 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE - - // For SamplePGO in ThinLTO compile phase, we do not want to do indirect - // call promotion as it will change the CFG too much to make the 2nd - // profile annotation in backend more difficult. - // PGO instrumentation is added during the compile phase for ThinLTO, do - // not run it a second time - if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile) - addPGOInstrPasses(MPM); - - // Create profile COMDAT variables. Lld linker wants to see all variables - // before the LTO/ThinLTO link since it needs to resolve symbols/comdats. - if (!PerformThinLTO && EnablePGOCSInstrGen) - MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen)); + MPM.add( + createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Clean up after IPCP & DAE // We add a module alias analysis pass here. In part due to bugs in the // analysis infrastructure this "works" in that the analysis stays alive @@ -811,8 +732,6 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createOpenMPOptCGSCCLegacyPass()); MPM.add(createPostOrderFunctionAttrsLegacyPass()); - if (OptLevel > 2) - MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args addExtensionsToPM(EP_CGSCCOptimizerLate, MPM); addFunctionSimplificationPasses(MPM); @@ -837,14 +756,6 @@ void PassManagerBuilder::populateModulePassManager( // and saves running remaining passes on the eliminated functions. MPM.add(createEliminateAvailableExternallyPass()); - // CSFDO instrumentation and use pass. Don't invoke this for Prepare pass - // for LTO and ThinLTO -- The actual pass will be called after all inlines - // are performed. - // Need to do this after COMDAT variables have been eliminated, - // (i.e. after EliminateAvailableExternallyPass). - if (!(PrepareForLTO || PrepareForThinLTO)) - addPGOInstrPasses(MPM, /* IsCS */ true); - if (EnableOrderFileInstrumentation) MPM.add(createInstrOrderFilePass()); @@ -886,7 +797,8 @@ void PassManagerBuilder::populateModulePassManager( // later might get benefit of no-alias assumption in clone loop. if (UseLoopVersioningLICM) { MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } // We add a fresh GlobalsModRef run at this point. This is particularly @@ -972,7 +884,8 @@ void PassManagerBuilder::populateModulePassManager( // LoopSink (and other loop passes since the last simplifyCFG) might have // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); addExtensionsToPM(EP_OptimizerLast, MPM); @@ -1009,13 +922,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Split call-site with more constrained arguments. PM.add(createCallSiteSplittingPass()); - // Indirect call promotion. This should promote all the targets that are - // left by the earlier promotion pass that promotes intra-module targets. - // This two-step promotion is to save the compile time. For LTO, it should - // produce the same result as if we only do promotion here. - PM.add( - createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty())); - // Propage constant function arguments by specializing the functions. if (EnableFunctionSpecialization && OptLevel > 2) PM.add(createFunctionSpecializationPass()); @@ -1081,9 +987,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createPruneEHPass()); // Remove dead EH info. - // CSFDO instrumentation and use pass. - addPGOInstrPasses(PM, /* IsCS */ true); - // Infer attributes on declarations, call sites, arguments, etc. for an SCC. if (AttributorRun & AttributorRunOption::CGSCC) PM.add(createAttributorCGSCCLegacyPass()); @@ -1098,14 +1001,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createGlobalOptimizerPass()); PM.add(createGlobalDCEPass()); // Remove dead functions. - // If we didn't decide to inline a function, check to see if we can - // transform it to pass arguments by value instead of by reference. - PM.add(createArgumentPromotionPass()); - // The IPO passes may leave cruft around. Clean up after them. PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); - PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true)); + PM.add(createJumpThreadingPass()); // Break up allocas PM.add(createSROAPass()); @@ -1120,7 +1019,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); PM.add(NewGVN ? createNewGVNPass() : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. PM.add(createMemCpyOptPass()); // Remove dead memcpys. @@ -1149,7 +1049,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { addExtensionsToPM(EP_Peephole, PM); - PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true)); + PM.add(createJumpThreadingPass()); } void PassManagerBuilder::addLateLTOOptimizationPasses( @@ -1175,80 +1075,6 @@ void PassManagerBuilder::addLateLTOOptimizationPasses( PM.add(createMergeFunctionsPass()); } -void PassManagerBuilder::populateThinLTOPassManager( - legacy::PassManagerBase &PM) { - PerformThinLTO = true; - if (LibraryInfo) - PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); - - if (VerifyInput) - PM.add(createVerifierPass()); - - if (ImportSummary) { - // This pass imports type identifier resolutions for whole-program - // devirtualization and CFI. It must run early because other passes may - // disturb the specific instruction patterns that these passes look for, - // creating dependencies on resolutions that may not appear in the summary. - // - // For example, GVN may transform the pattern assume(type.test) appearing in - // two basic blocks into assume(phi(type.test, type.test)), which would - // transform a dependency on a WPD resolution into a dependency on a type - // identifier resolution for CFI. - // - // Also, WPD has access to more precise information than ICP and can - // devirtualize more effectively, so it should operate on the IR first. - PM.add(createWholeProgramDevirtPass(nullptr, ImportSummary)); - PM.add(createLowerTypeTestsPass(nullptr, ImportSummary)); - } - - populateModulePassManager(PM); - - if (VerifyOutput) - PM.add(createVerifierPass()); - PerformThinLTO = false; -} - -void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { - if (LibraryInfo) - PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); - - if (VerifyInput) - PM.add(createVerifierPass()); - - addExtensionsToPM(EP_FullLinkTimeOptimizationEarly, PM); - - if (OptLevel != 0) - addLTOOptimizationPasses(PM); - else { - // The whole-program-devirt pass needs to run at -O0 because only it knows - // about the llvm.type.checked.load intrinsic: it needs to both lower the - // intrinsic itself and handle it in the summary. - PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); - } - - // Create a function that performs CFI checks for cross-DSO calls with targets - // in the current module. - PM.add(createCrossDSOCFIPass()); - - // Lower type metadata and the type.test intrinsic. This pass supports Clang's - // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at - // link time if CFI is enabled. The pass does nothing if CFI is disabled. - PM.add(createLowerTypeTestsPass(ExportSummary, nullptr)); - // Run a second time to clean up any type tests left behind by WPD for use - // in ICP (which is performed earlier than this in the regular LTO pipeline). - PM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); - - if (OptLevel != 0) - addLateLTOOptimizationPasses(PM); - - addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM); - - PM.add(createAnnotationRemarksLegacyPass()); - - if (VerifyOutput) - PM.add(createVerifierPass()); -} - LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() { PassManagerBuilder *PMB = new PassManagerBuilder(); return wrap(PMB); @@ -1314,18 +1140,3 @@ LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, legacy::PassManagerBase *MPM = unwrap(PM); Builder->populateModulePassManager(*MPM); } - -void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, - LLVMPassManagerRef PM, - LLVMBool Internalize, - LLVMBool RunInliner) { - PassManagerBuilder *Builder = unwrap(PMB); - legacy::PassManagerBase *LPM = unwrap(PM); - - // A small backwards compatibility hack. populateLTOPassManager used to take - // an RunInliner option. - if (RunInliner && !Builder->Inliner) - Builder->Inliner = createFunctionInliningPass(); - - Builder->populateLTOPassManager(*LPM); -} diff --git a/llvm/lib/Transforms/IPO/PruneEH.cpp b/llvm/lib/Transforms/IPO/PruneEH.cpp index 39de19ca9e9d..e0836a9fd699 100644 --- a/llvm/lib/Transforms/IPO/PruneEH.cpp +++ b/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" @@ -24,9 +23,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Transforms/Utils/Local.h" @@ -246,7 +243,7 @@ static void DeleteBasicBlock(BasicBlock *BB, CallGraphUpdater &CGU) { } if (!I->use_empty()) - I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); } if (TokenInst) { diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp index 5779553ee732..26fb7d676429 100644 --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -18,6 +18,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar/SCCP.h" +#include "llvm/Transforms/Utils/SCCPSolver.h" using namespace llvm; diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp index 7334bf695b67..6859953de962 100644 --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -14,7 +14,8 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/ProfileData/SampleProf.h" #include #include @@ -62,23 +63,24 @@ ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) { return ChildNodeRet; } -ContextTrieNode &ContextTrieNode::moveToChildContext( - const LineLocation &CallSite, ContextTrieNode &&NodeToMove, - uint32_t ContextFramesToRemove, bool DeleteNode) { +ContextTrieNode & +SampleContextTracker::moveContextSamples(ContextTrieNode &ToNodeParent, + const LineLocation &CallSite, + ContextTrieNode &&NodeToMove) { uint64_t Hash = FunctionSamples::getCallSiteHash(NodeToMove.getFuncName(), CallSite); + std::map &AllChildContext = + ToNodeParent.getAllChildContext(); assert(!AllChildContext.count(Hash) && "Node to remove must exist"); - LineLocation OldCallSite = NodeToMove.CallSiteLoc; - ContextTrieNode &OldParentContext = *NodeToMove.getParentContext(); AllChildContext[Hash] = NodeToMove; ContextTrieNode &NewNode = AllChildContext[Hash]; - NewNode.CallSiteLoc = CallSite; + NewNode.setCallSiteLoc(CallSite); // Walk through nodes in the moved the subtree, and update // FunctionSamples' context as for the context promotion. // We also need to set new parant link for all children. std::queue NodeToUpdate; - NewNode.setParentContext(this); + NewNode.setParentContext(&ToNodeParent); NodeToUpdate.push(&NewNode); while (!NodeToUpdate.empty()) { @@ -87,10 +89,8 @@ ContextTrieNode &ContextTrieNode::moveToChildContext( FunctionSamples *FSamples = Node->getFunctionSamples(); if (FSamples) { - FSamples->getContext().promoteOnPath(ContextFramesToRemove); + setContextNode(FSamples, Node); FSamples->getContext().setState(SyntheticContext); - LLVM_DEBUG(dbgs() << " Context promoted to: " - << FSamples->getContext().toString() << "\n"); } for (auto &It : Node->getAllChildContext()) { @@ -100,10 +100,6 @@ ContextTrieNode &ContextTrieNode::moveToChildContext( } } - // Original context no longer needed, destroy if requested. - if (DeleteNode) - OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncName()); - return NewNode; } @@ -131,7 +127,7 @@ void ContextTrieNode::setFunctionSamples(FunctionSamples *FSamples) { Optional ContextTrieNode::getFunctionSize() const { return FuncSize; } void ContextTrieNode::addFunctionSize(uint32_t FSize) { - if (!FuncSize.hasValue()) + if (!FuncSize) FuncSize = 0; FuncSize = FuncSize.getValue() + FSize; @@ -147,6 +143,10 @@ void ContextTrieNode::setParentContext(ContextTrieNode *Parent) { ParentContext = Parent; } +void ContextTrieNode::setCallSiteLoc(const LineLocation &Loc) { + CallSiteLoc = Loc; +} + void ContextTrieNode::dumpNode() { dbgs() << "Node: " << FuncName << "\n" << " Callsite: " << CallSiteLoc << "\n" @@ -202,13 +202,23 @@ SampleContextTracker::SampleContextTracker( SampleContext Context = FuncSample.first; LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString() << "\n"); - if (!Context.isBaseContext()) - FuncToCtxtProfiles[Context.getName()].insert(FSamples); ContextTrieNode *NewNode = getOrCreateContextPath(Context, true); assert(!NewNode->getFunctionSamples() && "New node can't have sample profile"); NewNode->setFunctionSamples(FSamples); } + populateFuncToCtxtMap(); +} + +void SampleContextTracker::populateFuncToCtxtMap() { + for (auto *Node : *this) { + FunctionSamples *FSamples = Node->getFunctionSamples(); + if (FSamples) { + FSamples->getContext().setState(RawContext); + setContextNode(FSamples, Node); + FuncToCtxtProfiles[Node->getFuncName()].push_back(FSamples); + } + } } FunctionSamples * @@ -231,7 +241,7 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst, if (CalleeContext) { FunctionSamples *FSamples = CalleeContext->getFunctionSamples(); LLVM_DEBUG(if (FSamples) { - dbgs() << " Callee context found: " << FSamples->getContext().toString() + dbgs() << " Callee context found: " << getContextString(CalleeContext) << "\n"; }); return FSamples; @@ -333,7 +343,7 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name, if (Context.hasState(InlinedContext) || Context.hasState(MergedContext)) continue; - ContextTrieNode *FromNode = getContextFor(Context); + ContextTrieNode *FromNode = getContextNodeForProfile(CSamples); if (FromNode == Node) continue; @@ -354,7 +364,7 @@ void SampleContextTracker::markContextSamplesInlined( const FunctionSamples *InlinedSamples) { assert(InlinedSamples && "Expect non-null inlined samples"); LLVM_DEBUG(dbgs() << "Marking context profile as inlined: " - << InlinedSamples->getContext().toString() << "\n"); + << getContextString(*InlinedSamples) << "\n"); InlinedSamples->getContext().setState(InlinedContext); } @@ -405,17 +415,43 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( // the context profile in the base (context-less) profile. FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples(); assert(FromSamples && "Shouldn't promote a context without profile"); + (void)FromSamples; // Unused in release build. + LLVM_DEBUG(dbgs() << " Found context tree root to promote: " - << FromSamples->getContext().toString() << "\n"); + << getContextString(&NodeToPromo) << "\n"); assert(!FromSamples->getContext().hasState(InlinedContext) && "Shouldn't promote inlined context profile"); - uint32_t ContextFramesToRemove = - FromSamples->getContext().getContextFrames().size() - 1; - return promoteMergeContextSamplesTree(NodeToPromo, RootContext, - ContextFramesToRemove); + return promoteMergeContextSamplesTree(NodeToPromo, RootContext); +} + +#ifndef NDEBUG +std::string +SampleContextTracker::getContextString(const FunctionSamples &FSamples) const { + return getContextString(getContextNodeForProfile(&FSamples)); } +std::string +SampleContextTracker::getContextString(ContextTrieNode *Node) const { + SampleContextFrameVector Res; + if (Node == &RootContext) + return std::string(); + Res.emplace_back(Node->getFuncName(), LineLocation(0, 0)); + + ContextTrieNode *PreNode = Node; + Node = Node->getParentContext(); + while (Node && Node != &RootContext) { + Res.emplace_back(Node->getFuncName(), PreNode->getCallSiteLoc()); + PreNode = Node; + Node = Node->getParentContext(); + } + + std::reverse(Res.begin(), Res.end()); + + return SampleContext::getContextString(Res); +} +#endif + void SampleContextTracker::dump() { RootContext.dumpTree(); } StringRef SampleContextTracker::getFuncNameFor(ContextTrieNode *Node) const { @@ -526,8 +562,7 @@ ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) { } void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode, - ContextTrieNode &ToNode, - uint32_t ContextFramesToRemove) { + ContextTrieNode &ToNode) { FunctionSamples *FromSamples = FromNode.getFunctionSamples(); FunctionSamples *ToSamples = ToNode.getFunctionSamples(); if (FromSamples && ToSamples) { @@ -540,16 +575,13 @@ void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode, } else if (FromSamples) { // Transfer FromSamples from FromNode to ToNode ToNode.setFunctionSamples(FromSamples); + setContextNode(FromSamples, &ToNode); FromSamples->getContext().setState(SyntheticContext); - FromSamples->getContext().promoteOnPath(ContextFramesToRemove); - FromNode.setFunctionSamples(nullptr); } } ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( - ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent, - uint32_t ContextFramesToRemove) { - assert(ContextFramesToRemove && "Context to remove can't be empty"); + ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent) { // Ignore call site location if destination is top level under root LineLocation NewCallSiteLoc = LineLocation(0, 0); @@ -566,22 +598,25 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( if (!ToNode) { // Do not delete node to move from its parent here because // caller is iterating over children of that parent node. - ToNode = &ToNodeParent.moveToChildContext( - NewCallSiteLoc, std::move(FromNode), ContextFramesToRemove, false); + ToNode = + &moveContextSamples(ToNodeParent, NewCallSiteLoc, std::move(FromNode)); + LLVM_DEBUG({ + dbgs() << " Context promoted and merged to: " << getContextString(ToNode) + << "\n"; + }); } else { // Destination node exists, merge samples for the context tree - mergeContextNode(FromNode, *ToNode, ContextFramesToRemove); + mergeContextNode(FromNode, *ToNode); LLVM_DEBUG({ if (ToNode->getFunctionSamples()) dbgs() << " Context promoted and merged to: " - << ToNode->getFunctionSamples()->getContext().toString() << "\n"; + << getContextString(ToNode) << "\n"; }); // Recursively promote and merge children for (auto &It : FromNode.getAllChildContext()) { ContextTrieNode &FromChildNode = It.second; - promoteMergeContextSamplesTree(FromChildNode, *ToNode, - ContextFramesToRemove); + promoteMergeContextSamplesTree(FromChildNode, *ToNode); } // Remove children once they're all merged @@ -594,4 +629,14 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( return *ToNode; } + +void SampleContextTracker::createContextLessProfileMap( + SampleProfileMap &ContextLessProfiles) { + for (auto *Node : *this) { + FunctionSamples *FProfile = Node->getFunctionSamples(); + // Profile's context can be empty, use ContextNode's func name. + if (FProfile) + ContextLessProfiles[Node->getFuncName()].merge(*FProfile); + } +} } // namespace llvm diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index bc6051de90c4..40de69bbf2cf 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -25,11 +25,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SCCIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" @@ -38,22 +35,16 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstrTypes.h" @@ -64,6 +55,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/PseudoProbe.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -73,9 +65,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" -#include "llvm/Support/GenericDomTree.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/ProfiledCallGraph.h" @@ -84,7 +74,6 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/SampleProfileInference.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" #include @@ -151,8 +140,7 @@ static cl::opt ProfileSampleBlockAccurate( "them conservatively as unknown. ")); static cl::opt ProfileAccurateForSymsInList( - "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, - cl::init(true), + "profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. ")); @@ -183,6 +171,15 @@ static cl::opt ProfileSizeInline( cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size.")); +// Since profiles are consumed by many passes, turning on this option has +// side effects. For instance, pre-link SCC inliner would see merged profiles +// and inline the hot functions (that are skipped in this pass). +static cl::opt DisableSampleLoaderInlining( + "disable-sample-loader-inlining", cl::Hidden, cl::init(false), + cl::desc("If true, artifically skip inline transformation in sample-loader " + "pass, and merge (or scale) profiles (as configured by " + "--sample-profile-merge-inlinee).")); + cl::opt ProfileInlineGrowthLimit( "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " @@ -219,19 +216,19 @@ static cl::opt ProfileICPRelativeHotnessSkip( "Skip relative hotness check for ICP up to given number of targets.")); static cl::opt CallsitePrioritizedInline( - "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore, - cl::init(false), + "sample-profile-prioritized-inline", cl::Hidden, + cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported.")); static cl::opt UsePreInlinerDecision( - "sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore, - cl::init(false), + "sample-profile-use-preinliner", cl::Hidden, + cl::desc("Use the preinliner decisions stored in profile context.")); static cl::opt AllowRecursiveInline( - "sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore, - cl::init(false), + "sample-profile-recursive-inline", cl::Hidden, + cl::desc("Allow sample loader inliner to inline recursive calls.")); static cl::opt ProfileInlineReplayFile( @@ -287,7 +284,6 @@ static cl::opt ProfileInlineReplayFormat( static cl::opt MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, - cl::ZeroOrMore, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader")); @@ -295,6 +291,13 @@ static cl::opt OverwriteExistingWeights( "overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite.")); +static cl::opt AnnotateSampleProfileInlinePhase( + "annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), + cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " + "sample-profile inline pass name.")); + +extern cl::opt EnableExtTspBlockPlacement; + namespace { using BlockWeightMap = DenseMap; @@ -425,7 +428,11 @@ public: : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)), GetAC(std::move(GetAssumptionCache)), GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)), - LTOPhase(LTOPhase) {} + LTOPhase(LTOPhase), + AnnotatedPassName(AnnotateSampleProfileInlinePhase + ? llvm::AnnotateInlinePassName(InlineContext{ + LTOPhase, InlinePass::SampleProfileInliner}) + : CSINLINE_DEBUG) {} bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr); bool runOnModule(Module &M, ModuleAnalysisManager *AM, @@ -487,15 +494,13 @@ protected: /// Profile tracker for different context. std::unique_ptr ContextTracker; - /// Flag indicating whether input profile is context-sensitive - bool ProfileIsCSFlat = false; - /// Flag indicating which LTO/ThinLTO phase the pass is invoked in. /// /// We need to know the LTO phase because for example in ThinLTOPrelink /// phase, in annotation, we should not promote indirect calls. Instead, /// we will mark GUIDs that needs to be annotated to the function. - ThinOrFullLTOPhase LTOPhase; + const ThinOrFullLTOPhase LTOPhase; + const std::string AnnotatedPassName; /// Profle Symbol list tells whether a function name appears in the binary /// used to generate the current profile. @@ -535,6 +540,11 @@ protected: // A pseudo probe helper to correlate the imported sample counts. std::unique_ptr ProbeManager; + +private: + const char *getAnnotatedRemarkPassName() const { + return AnnotatedPassName.c_str(); + } }; class SampleProfileLoaderLegacyPass : public ModulePass { @@ -605,7 +615,7 @@ ErrorOr SampleProfileLoader::getInstWeight(const Instruction &Inst) { // call instruction should have 0 count. // For CS profile, the callsite count of previously inlined callees is // populated with the entry count of the callees. - if (!ProfileIsCSFlat) + if (!FunctionSamples::ProfileIsCS) if (const auto *CB = dyn_cast(&Inst)) if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) return 0; @@ -644,7 +654,7 @@ ErrorOr SampleProfileLoader::getProbeWeight(const Instruction &Inst) { // call instruction should have 0 count. // For CS profile, the callsite count of previously inlined callees is // populated with the entry count of the callees. - if (!ProfileIsCSFlat) + if (!FunctionSamples::ProfileIsCS) if (const auto *CB = dyn_cast(&Inst)) if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) return 0; @@ -698,7 +708,7 @@ SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const { if (Function *Callee = Inst.getCalledFunction()) CalleeName = Callee->getName(); - if (ProfileIsCSFlat) + if (FunctionSamples::ProfileIsCS) return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName); const FunctionSamples *FS = findFunctionSamples(Inst); @@ -730,7 +740,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples( FunctionSamples::getGUID(R->getName()); }; - if (ProfileIsCSFlat) { + if (FunctionSamples::ProfileIsCS) { auto CalleeSamples = ContextTracker->getIndirectCalleeContextSamplesFor(DIL); if (CalleeSamples.empty()) @@ -783,7 +793,7 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { auto it = DILocation2SampleMap.try_emplace(DIL,nullptr); if (it.second) { - if (ProfileIsCSFlat) + if (FunctionSamples::ProfileIsCS) it.first->second = ContextTracker->getContextSamplesFor(DIL); else it.first->second = @@ -839,6 +849,13 @@ static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl &CallTargets, uint64_t Sum) { + // Bail out early if MaxNumPromotions is zero. + // This prevents allocating an array of zero length below. + // + // Note `updateIDTMetaData` is called in two places so check + // `MaxNumPromotions` inside it. + if (MaxNumPromotions == 0) + return; uint32_t NumVals = 0; // OldSum is the existing total count in the value profile data. uint64_t OldSum = 0; @@ -922,6 +939,14 @@ updateIDTMetaData(Instruction &Inst, bool SampleProfileLoader::tryPromoteAndInlineCandidate( Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum, SmallVector *InlinedCallSite) { + // Bail out early if sample-loader inliner is disabled. + if (DisableSampleLoaderInlining) + return false; + + // Bail out early if MaxNumPromotions is zero. + // This prevents allocating an array of zero length in callees below. + if (MaxNumPromotions == 0) + return false; auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName(); auto R = SymbolMap.find(CalleeFunctionName); if (R == SymbolMap.end() || !R->getValue()) @@ -1009,8 +1034,9 @@ void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates( for (auto I : Candidates) { Function *CalledFunction = I->getCalledFunction(); if (CalledFunction) { - ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt", - I->getDebugLoc(), I->getParent()) + ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), + "InlineAttempt", I->getDebugLoc(), + I->getParent()) << "previous inlining reattempted for " << (Hot ? "hotness: '" : "size: '") << ore::NV("Callee", CalledFunction) << "' into '" @@ -1042,13 +1068,12 @@ void SampleProfileLoader::findExternalInlineCandidate( // For AutoFDO profile, retrieve candidate profiles by walking over // the nested inlinee profiles. - if (!ProfileIsCSFlat) { + if (!FunctionSamples::ProfileIsCS) { Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold); return; } - ContextTrieNode *Caller = - ContextTracker->getContextFor(Samples->getContext()); + ContextTrieNode *Caller = ContextTracker->getContextNodeForProfile(Samples); std::queue CalleeList; CalleeList.push(Caller); while (!CalleeList.empty()) { @@ -1098,11 +1123,20 @@ void SampleProfileLoader::findExternalInlineCandidate( /// Iteratively inline hot callsites of a function. /// -/// Iteratively traverse all callsites of the function \p F, and find if -/// the corresponding inlined instance exists and is hot in profile. If -/// it is hot enough, inline the callsites and adds new callsites of the -/// callee into the caller. If the call is an indirect call, first promote -/// it to direct call. Each indirect call is limited with a single target. +/// Iteratively traverse all callsites of the function \p F, so as to +/// find out callsites with corresponding inline instances. +/// +/// For such callsites, +/// - If it is hot enough, inline the callsites and adds callsites of the callee +/// into the caller. If the call is an indirect call, first promote +/// it to direct call. Each indirect call is limited with a single target. +/// +/// - If a callsite is not inlined, merge the its profile to the outline +/// version (if --sample-profile-merge-inlinee is true), or scale the +/// counters of standalone function based on the profile of inlined +/// instances (if --sample-profile-merge-inlinee is false). +/// +/// Later passes may consume the updated profiles. /// /// \param F function to perform iterative inlining. /// \param InlinedGUIDs a set to be updated to include all GUIDs that are @@ -1137,7 +1171,7 @@ bool SampleProfileLoader::inlineHotFunctions( assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && "GUIDToFuncNameMap has to be populated"); AllCandidates.push_back(CB); - if (FS->getEntrySamples() > 0 || ProfileIsCSFlat) + if (FS->getEntrySamples() > 0 || FunctionSamples::ProfileIsCS) LocalNotInlinedCallSites.try_emplace(CB, FS); if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) Hot = true; @@ -1200,13 +1234,17 @@ bool SampleProfileLoader::inlineHotFunctions( // For CS profile, profile for not inlined context will be merged when // base profile is being retrieved. - if (!FunctionSamples::ProfileIsCSFlat) + if (!FunctionSamples::ProfileIsCS) promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F); return Changed; } bool SampleProfileLoader::tryInlineCandidate( InlineCandidate &Candidate, SmallVector *InlinedCallSites) { + // Do not attempt to inline a candidate if + // --disable-sample-loader-inlining is true. + if (DisableSampleLoaderInlining) + return false; CallBase &CB = *Candidate.CallInstr; Function *CalledFunction = CB.getCalledFunction(); @@ -1216,7 +1254,8 @@ bool SampleProfileLoader::tryInlineCandidate( InlineCost Cost = shouldInlineCandidate(Candidate); if (Cost.isNever()) { - ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB) + ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), + "InlineFail", DLoc, BB) << "incompatible inlining"); return false; } @@ -1226,45 +1265,45 @@ bool SampleProfileLoader::tryInlineCandidate( InlineFunctionInfo IFI(nullptr, GetAC); IFI.UpdateProfile = false; - if (InlineFunction(CB, IFI).isSuccess()) { - // Merge the attributes based on the inlining. - AttributeFuncs::mergeAttributesForInlining(*BB->getParent(), - *CalledFunction); - - // The call to InlineFunction erases I, so we can't pass it here. - emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, - *BB->getParent(), Cost, true, CSINLINE_DEBUG); - - // Now populate the list of newly exposed call sites. - if (InlinedCallSites) { - InlinedCallSites->clear(); - for (auto &I : IFI.InlinedCallSites) - InlinedCallSites->push_back(I); - } + if (!InlineFunction(CB, IFI).isSuccess()) + return false; - if (ProfileIsCSFlat) - ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); - ++NumCSInlined; - - // Prorate inlined probes for a duplicated inlining callsite which probably - // has a distribution less than 100%. Samples for an inlinee should be - // distributed among the copies of the original callsite based on each - // callsite's distribution factor for counts accuracy. Note that an inlined - // probe may come with its own distribution factor if it has been duplicated - // in the inlinee body. The two factor are multiplied to reflect the - // aggregation of duplication. - if (Candidate.CallsiteDistribution < 1) { - for (auto &I : IFI.InlinedCallSites) { - if (Optional Probe = extractProbe(*I)) - setProbeDistributionFactor(*I, Probe->Factor * - Candidate.CallsiteDistribution); - } - NumDuplicatedInlinesite++; - } + // Merge the attributes based on the inlining. + AttributeFuncs::mergeAttributesForInlining(*BB->getParent(), + *CalledFunction); - return true; + // The call to InlineFunction erases I, so we can't pass it here. + emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), + Cost, true, getAnnotatedRemarkPassName()); + + // Now populate the list of newly exposed call sites. + if (InlinedCallSites) { + InlinedCallSites->clear(); + for (auto &I : IFI.InlinedCallSites) + InlinedCallSites->push_back(I); } - return false; + + if (FunctionSamples::ProfileIsCS) + ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); + ++NumCSInlined; + + // Prorate inlined probes for a duplicated inlining callsite which probably + // has a distribution less than 100%. Samples for an inlinee should be + // distributed among the copies of the original callsite based on each + // callsite's distribution factor for counts accuracy. Note that an inlined + // probe may come with its own distribution factor if it has been duplicated + // in the inlinee body. The two factor are multiplied to reflect the + // aggregation of duplication. + if (Candidate.CallsiteDistribution < 1) { + for (auto &I : IFI.InlinedCallSites) { + if (Optional Probe = extractProbe(*I)) + setProbeDistributionFactor(*I, Probe->Factor * + Candidate.CallsiteDistribution); + } + NumDuplicatedInlinesite++; + } + + return true; } bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, @@ -1285,14 +1324,8 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, if (Optional Probe = extractProbe(*CB)) Factor = Probe->Factor; - uint64_t CallsiteCount = 0; - ErrorOr Weight = getBlockWeight(CB->getParent()); - if (Weight) - CallsiteCount = Weight.get(); - if (CalleeSamples) - CallsiteCount = std::max( - CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor)); - + uint64_t CallsiteCount = + CalleeSamples ? CalleeSamples->getEntrySamples() * Factor : 0; *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor}; return true; } @@ -1387,7 +1420,6 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { bool SampleProfileLoader::inlineHotFunctionsWithPriority( Function &F, DenseSet &InlinedGUIDs) { - // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure // Profile symbol list is ignored when profile-sample-accurate is on. assert((!ProfAccForSymsInList || @@ -1513,7 +1545,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( // For CS profile, profile for not inlined context will be merged when // base profile is being retrieved. - if (!FunctionSamples::ProfileIsCSFlat) + if (!FunctionSamples::ProfileIsCS) promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F); return Changed; } @@ -1528,11 +1560,11 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples( if (!Callee || Callee->isDeclaration()) continue; - ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline", - I->getDebugLoc(), I->getParent()) - << "previous inlining not repeated: '" - << ore::NV("Callee", Callee) << "' into '" - << ore::NV("Caller", &F) << "'"); + ORE->emit( + OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), "NotInline", + I->getDebugLoc(), I->getParent()) + << "previous inlining not repeated: '" << ore::NV("Callee", Callee) + << "' into '" << ore::NV("Caller", &F) << "'"); ++NumCSNotInlined; const FunctionSamples *FS = Pair.getSecond(); @@ -1540,6 +1572,10 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples( continue; } + // Do not merge a context that is already duplicated into the base profile. + if (FS->getContext().hasAttribute(sampleprof::ContextDuplicatedIntoBase)) + continue; + if (ProfileMergeInlinee) { // A function call can be replicated by optimizations like callsite // splitting or jump threading and the replicates end up sharing the @@ -1623,7 +1659,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { // With CSSPGO all indirect call targets are counted torwards the // original indirect call site in the profile, including both // inlined and non-inlined targets. - if (!FunctionSamples::ProfileIsCSFlat) { + if (!FunctionSamples::ProfileIsCS) { if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) { for (const auto &NameFS : *M) @@ -1714,6 +1750,11 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { } } + // FIXME: Re-enable for sample profiling after investigating why the sum + // of branch weights can be 0 + // + // misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); + uint64_t TempWeight; // Only set weights if there is at least one non-zero weight. // In any other case, let the analyzer set weights. @@ -1798,7 +1839,7 @@ INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", std::unique_ptr SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) { std::unique_ptr ProfiledCG; - if (ProfileIsCSFlat) + if (FunctionSamples::ProfileIsCS) ProfiledCG = std::make_unique(*ContextTracker); else ProfiledCG = std::make_unique(Reader->getProfiles()); @@ -1843,8 +1884,8 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { assert(&CG->getModule() == &M); - if (UseProfiledCallGraph || - (ProfileIsCSFlat && !UseProfiledCallGraph.getNumOccurrences())) { + if (UseProfiledCallGraph || (FunctionSamples::ProfileIsCS && + !UseProfiledCallGraph.getNumOccurrences())) { // Use profiled call edges to augment the top-down order. There are cases // that the top-down order computed based on the static call graph doesn't // reflect real execution order. For example @@ -1973,40 +2014,50 @@ bool SampleProfileLoader::doInitialization(Module &M, ProfileInlineReplayScope, ProfileInlineReplayFallback, {ProfileInlineReplayFormat}}, - /*EmitRemarks=*/false); + /*EmitRemarks=*/false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner}); } - // Apply tweaks if context-sensitive profile is available. - if (Reader->profileIsCSFlat() || Reader->profileIsCSNested()) { - ProfileIsCSFlat = Reader->profileIsCSFlat(); + // Apply tweaks if context-sensitive or probe-based profile is available. + if (Reader->profileIsCS() || Reader->profileIsPreInlined() || + Reader->profileIsProbeBased()) { + if (!UseIterativeBFIInference.getNumOccurrences()) + UseIterativeBFIInference = true; + if (!SampleProfileUseProfi.getNumOccurrences()) + SampleProfileUseProfi = true; + if (!EnableExtTspBlockPlacement.getNumOccurrences()) + EnableExtTspBlockPlacement = true; // Enable priority-base inliner and size inline by default for CSSPGO. if (!ProfileSizeInline.getNumOccurrences()) ProfileSizeInline = true; if (!CallsitePrioritizedInline.getNumOccurrences()) CallsitePrioritizedInline = true; - - // For CSSPGO, use preinliner decision by default when available. - if (!UsePreInlinerDecision.getNumOccurrences()) - UsePreInlinerDecision = true; - // For CSSPGO, we also allow recursive inline to best use context profile. if (!AllowRecursiveInline.getNumOccurrences()) AllowRecursiveInline = true; - // Enable iterative-BFI by default for CSSPGO. - if (!UseIterativeBFIInference.getNumOccurrences()) - UseIterativeBFIInference = true; - // Enable Profi by default for CSSPGO. - if (!SampleProfileUseProfi.getNumOccurrences()) - SampleProfileUseProfi = true; + if (Reader->profileIsPreInlined()) { + if (!UsePreInlinerDecision.getNumOccurrences()) + UsePreInlinerDecision = true; + } - if (FunctionSamples::ProfileIsCSFlat) { - // Tracker for profiles under different context - ContextTracker = std::make_unique( - Reader->getProfiles(), &GUIDToFuncNameMap); + if (!Reader->profileIsCS()) { + // Non-CS profile should be fine without a function size budget for the + // inliner since the contexts in the profile are either all from inlining + // in the prevoius build or pre-computed by the preinliner with a size + // cap, thus they are bounded. + if (!ProfileInlineLimitMin.getNumOccurrences()) + ProfileInlineLimitMin = std::numeric_limits::max(); + if (!ProfileInlineLimitMax.getNumOccurrences()) + ProfileInlineLimitMax = std::numeric_limits::max(); } } + if (Reader->profileIsCS()) { + // Tracker for profiles under different context + ContextTracker = std::make_unique( + Reader->getProfiles(), &GUIDToFuncNameMap); + } + // Load pseudo probe descriptors for probe-based function samples. if (Reader->profileIsProbeBased()) { ProbeManager = std::make_unique(M); @@ -2082,7 +2133,7 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, } // Account for cold calls not inlined.... - if (!ProfileIsCSFlat) + if (!FunctionSamples::ProfileIsCS) for (const std::pair &pair : notInlinedCallInfo) updateProfileCallee(pair.first, pair.second.entryCount); @@ -2145,7 +2196,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) // Initialize entry count when the function has no existing entry // count value. - if (!F.getEntryCount().hasValue()) + if (!F.getEntryCount()) F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); std::unique_ptr OwnedORE; if (AM) { @@ -2158,7 +2209,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) ORE = OwnedORE.get(); } - if (ProfileIsCSFlat) + if (FunctionSamples::ProfileIsCS) Samples = ContextTracker->getBaseSamplesFor(F); else Samples = Reader->getSamplesFor(F); diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index e104ae00e916..d1ab2649ee2e 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -13,21 +13,19 @@ #include "llvm/Transforms/IPO/SampleProfileProbe.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PseudoProbe.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/CRC.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include @@ -416,7 +414,7 @@ void PseudoProbeUpdatePass::runOnFunction(Function &F, FunctionAnalysisManager &FAM) { BlockFrequencyInfo &BFI = FAM.getResult(F); auto BBProfileCount = [&BFI](BasicBlock *BB) { - return BFI.getBlockProfileCount(BB).getValueOr(0); + return BFI.getBlockProfileCount(BB).value_or(0); }; // Collect the sum of execution weight for each probe. diff --git a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp index 95393d9476e0..c7d54b8cdeb0 100644 --- a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp +++ b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp @@ -25,18 +25,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/SyntheticCountsUtils.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; using Scaled64 = ScaledNumber; @@ -47,18 +42,17 @@ using ProfileCount = Function::ProfileCount; namespace llvm { cl::opt InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10), - cl::ZeroOrMore, cl::desc("Initial value of synthetic entry count")); } // namespace llvm /// Initial synthetic count assigned to inline functions. static cl::opt InlineSyntheticCount( - "inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore, + "inline-synthetic-count", cl::Hidden, cl::init(15), cl::desc("Initial synthetic entry count for inline functions.")); /// Initial synthetic count assigned to cold functions. static cl::opt ColdSyntheticCount( - "cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore, + "cold-synthetic-count", cl::Hidden, cl::init(5), cl::desc("Initial synthetic entry count for cold functions.")); // Assign initial synthetic entry counts to functions. diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 52708ff2f226..a360a768a2bc 100644 --- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -21,7 +21,6 @@ #include "llvm/InitializePasses.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Pass.h" -#include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" @@ -311,7 +310,8 @@ void splitAndWriteThinLTOBitcode( return; } if (!F->isDeclaration() && - computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone) + computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == + FMRB_DoesNotAccessMemory) EligibleVirtualFns.insert(F); }); } @@ -542,11 +542,11 @@ class WriteThinLTOBitcode : public ModulePass { raw_ostream &OS; // raw_ostream to print on // The output stream on which to emit a minimized module for use // just in the thin link, if requested. - raw_ostream *ThinLinkOS; + raw_ostream *ThinLinkOS = nullptr; public: static char ID; // Pass identification, replacement for typeid - WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) { + WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) { initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); } diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 8b30f0e989a1..898a213d0849 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -57,6 +57,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AssumptionCache.h" @@ -79,6 +80,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndexYAML.h" @@ -95,6 +97,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Evaluator.h" #include #include @@ -107,6 +110,15 @@ using namespace wholeprogramdevirt; #define DEBUG_TYPE "wholeprogramdevirt" +STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets"); +STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations"); +STATISTIC(NumBranchFunnel, "Number of branch funnels"); +STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations"); +STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations"); +STATISTIC(NumVirtConstProp1Bit, + "Number of 1 bit virtual constant propagations"); +STATISTIC(NumVirtConstProp, "Number of virtual constant propagations"); + static cl::opt ClSummaryAction( "wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), @@ -132,13 +144,12 @@ static cl::opt ClWriteSummary( static cl::opt ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, - cl::init(10), cl::ZeroOrMore, + cl::init(10), cl::desc("Maximum number of call targets per " "call site to enable branch funnels")); static cl::opt PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, - cl::init(false), cl::ZeroOrMore, cl::desc("Print index-based devirtualization messages")); /// Provide a way to force enable whole program visibility in tests. @@ -146,30 +157,34 @@ static cl::opt /// !vcall_visibility metadata (the mere presense of type tests /// previously implied hidden visibility). static cl::opt - WholeProgramVisibility("whole-program-visibility", cl::init(false), - cl::Hidden, cl::ZeroOrMore, + WholeProgramVisibility("whole-program-visibility", cl::Hidden, cl::desc("Enable whole program visibility")); /// Provide a way to force disable whole program for debugging or workarounds, /// when enabled via the linker. static cl::opt DisableWholeProgramVisibility( - "disable-whole-program-visibility", cl::init(false), cl::Hidden, - cl::ZeroOrMore, + "disable-whole-program-visibility", cl::Hidden, cl::desc("Disable whole program visibility (overrides enabling options)")); /// Provide way to prevent certain function from being devirtualized static cl::list SkipFunctionNames("wholeprogramdevirt-skip", cl::desc("Prevent function(s) from being devirtualized"), - cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated); - -/// Mechanism to add runtime checking of devirtualization decisions, trapping on -/// any that are not correct. Useful for debugging undefined behavior leading to -/// failures with WPD. -static cl::opt - CheckDevirt("wholeprogramdevirt-check", cl::init(false), cl::Hidden, - cl::ZeroOrMore, - cl::desc("Add code to trap on incorrect devirtualizations")); + cl::Hidden, cl::CommaSeparated); + +/// Mechanism to add runtime checking of devirtualization decisions, optionally +/// trapping or falling back to indirect call on any that are not correct. +/// Trapping mode is useful for debugging undefined behavior leading to failures +/// with WPD. Fallback mode is useful for ensuring safety when whole program +/// visibility may be compromised. +enum WPDCheckMode { None, Trap, Fallback }; +static cl::opt DevirtCheckMode( + "wholeprogramdevirt-check", cl::Hidden, + cl::desc("Type of checking for incorrect devirtualizations"), + cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"), + clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"), + clEnumValN(WPDCheckMode::Fallback, "fallback", + "Fallback to indirect when incorrect"))); namespace { struct PatternList { @@ -866,13 +881,14 @@ void updateVCallVisibilityInIndex( if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) return; for (auto &P : Index) { + // Don't upgrade the visibility for symbols exported to the dynamic + // linker, as we have no information on their eventual use. + if (DynamicExportSymbols.count(P.first)) + continue; for (auto &S : P.second.SummaryList) { auto *GVar = dyn_cast(S.get()); if (!GVar || - GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic || - // Don't upgrade the visibility for symbols exported to the dynamic - // linker, as we have no information on their eventual use. - DynamicExportSymbols.count(P.first)) + GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic) continue; GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit); } @@ -1133,16 +1149,17 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, if (RemarksEnabled) VCallSite.emitRemark("single-impl", TheFn->stripPointerCasts()->getName(), OREGetter); + NumSingleImpl++; auto &CB = VCallSite.CB; assert(!CB.getCalledFunction() && "devirtualizing direct call?"); IRBuilder<> Builder(&CB); Value *Callee = Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType()); - // If checking is enabled, add support to compare the virtual function - // pointer to the devirtualized target. In case of a mismatch, perform a - // debug trap. - if (CheckDevirt) { + // If trap checking is enabled, add support to compare the virtual + // function pointer to the devirtualized target. In case of a mismatch, + // perform a debug trap. + if (DevirtCheckMode == WPDCheckMode::Trap) { auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee); Instruction *ThenTerm = SplitBlockAndInsertIfThen(Cond, &CB, /*Unreachable=*/false); @@ -1152,8 +1169,38 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, CallTrap->setDebugLoc(CB.getDebugLoc()); } - // Devirtualize. - CB.setCalledOperand(Callee); + // If fallback checking is enabled, add support to compare the virtual + // function pointer to the devirtualized target. In case of a mismatch, + // fall back to indirect call. + if (DevirtCheckMode == WPDCheckMode::Fallback) { + MDNode *Weights = + MDBuilder(M.getContext()).createBranchWeights((1U << 20) - 1, 1); + // Version the indirect call site. If the called value is equal to the + // given callee, 'NewInst' will be executed, otherwise the original call + // site will be executed. + CallBase &NewInst = versionCallSite(CB, Callee, Weights); + NewInst.setCalledOperand(Callee); + // Since the new call site is direct, we must clear metadata that + // is only appropriate for indirect calls. This includes !prof and + // !callees metadata. + NewInst.setMetadata(LLVMContext::MD_prof, nullptr); + NewInst.setMetadata(LLVMContext::MD_callees, nullptr); + // Additionally, we should remove them from the fallback indirect call, + // so that we don't attempt to perform indirect call promotion later. + CB.setMetadata(LLVMContext::MD_prof, nullptr); + CB.setMetadata(LLVMContext::MD_callees, nullptr); + } + + // In either trapping or non-checking mode, devirtualize original call. + else { + // Devirtualize unconditionally. + CB.setCalledOperand(Callee); + // Since the call site is now direct, we must clear metadata that + // is only appropriate for indirect calls. This includes !prof and + // !callees metadata. + CB.setMetadata(LLVMContext::MD_prof, nullptr); + CB.setMetadata(LLVMContext::MD_callees, nullptr); + } // This use is no longer unsafe. if (VCallSite.NumUnsafeUses) @@ -1208,7 +1255,7 @@ bool DevirtModule::trySingleImplDevirt( return false; // If so, update each call site to call that implementation directly. - if (RemarksEnabled) + if (RemarksEnabled || AreStatisticsEnabled()) TargetsForSlot[0].WasDevirt = true; bool IsExported = false; @@ -1279,7 +1326,7 @@ bool DevirtIndex::trySingleImplDevirt(MutableArrayRef TargetsForSlot, return false; // Collect functions devirtualized at least for one call site for stats. - if (PrintSummaryDevirt) + if (PrintSummaryDevirt || AreStatisticsEnabled()) DevirtTargets.insert(TheFn); auto &S = TheFn.getSummaryList()[0]; @@ -1385,6 +1432,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, !FSAttr.getValueAsString().contains("+retpoline")) continue; + NumBranchFunnel++; if (RemarksEnabled) VCallSite.emitRemark("branch-funnel", JT->stripPointerCasts()->getName(), OREGetter); @@ -1476,6 +1524,7 @@ void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, for (auto Call : CSInfo.CallSites) { if (!OptimizedCalls.insert(&Call.CB).second) continue; + NumUniformRetVal++; Call.replaceAndErase( "uniform-ret-val", FnName, RemarksEnabled, OREGetter, ConstantInt::get(cast(Call.CB.getType()), TheRetVal)); @@ -1499,7 +1548,7 @@ bool DevirtModule::tryUniformRetValOpt( } applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal); - if (RemarksEnabled) + if (RemarksEnabled || AreStatisticsEnabled()) for (auto &&Target : TargetsForSlot) Target.WasDevirt = true; return true; @@ -1592,6 +1641,7 @@ void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable, B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType())); Cmp = B.CreateZExt(Cmp, Call.CB.getType()); + NumUniqueRetVal++; Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter, Cmp); } @@ -1636,7 +1686,7 @@ bool DevirtModule::tryUniqueRetValOpt( UniqueMemberAddr); // Update devirtualization statistics for targets. - if (RemarksEnabled) + if (RemarksEnabled || AreStatisticsEnabled()) for (auto &&Target : TargetsForSlot) Target.WasDevirt = true; @@ -1665,11 +1715,13 @@ void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName, Value *Bits = B.CreateLoad(Int8Ty, Addr); Value *BitsAndBit = B.CreateAnd(Bits, Bit); auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0)); + NumVirtConstProp1Bit++; Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled, OREGetter, IsBitSet); } else { Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo()); Value *Val = B.CreateLoad(RetType, ValAddr); + NumVirtConstProp++; Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled, OREGetter, Val); } @@ -1701,7 +1753,7 @@ bool DevirtModule::tryVirtualConstProp( for (VirtualCallTarget &Target : TargetsForSlot) { if (Target.Fn->isDeclaration() || computeFunctionBodyMemoryAccess(*Target.Fn, AARGetter(*Target.Fn)) != - MAK_ReadNone || + FMRB_DoesNotAccessMemory || Target.Fn->arg_empty() || !Target.Fn->arg_begin()->use_empty() || Target.Fn->getReturnType() != RetType) return false; @@ -1755,7 +1807,7 @@ bool DevirtModule::tryVirtualConstProp( setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte, OffsetBit); - if (RemarksEnabled) + if (RemarksEnabled || AreStatisticsEnabled()) for (auto &&Target : TargetsForSlot) Target.WasDevirt = true; @@ -1963,7 +2015,7 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) { // (although this is unlikely). In that case, explicitly build a pair and // RAUW it. if (!CI->use_empty()) { - Value *Pair = UndefValue::get(CI->getType()); + Value *Pair = PoisonValue::get(CI->getType()); IRBuilder<> B(CI); Pair = B.CreateInsertValue(Pair, LoadedValue, {0}); Pair = B.CreateInsertValue(Pair, TypeTestCall, {1}); @@ -2151,9 +2203,9 @@ bool DevirtModule::run() { removeRedundantTypeTests(); - // We have lowered or deleted the type instrinsics, so we will no - // longer have enough information to reason about the liveness of virtual - // function pointers in GlobalDCE. + // We have lowered or deleted the type intrinsics, so we will no longer have + // enough information to reason about the liveness of virtual function + // pointers in GlobalDCE. for (GlobalVariable &GV : M.globals()) GV.eraseMetadata(LLVMContext::MD_vcall_visibility); @@ -2243,7 +2295,7 @@ bool DevirtModule::run() { } // Collect functions devirtualized at least for one call site for stats. - if (RemarksEnabled) + if (RemarksEnabled || AreStatisticsEnabled()) for (const auto &T : TargetsForSlot) if (T.WasDevirt) DevirtTargets[std::string(T.Fn->getName())] = T.Fn; @@ -2276,6 +2328,8 @@ bool DevirtModule::run() { } } + NumDevirtTargets += DevirtTargets.size(); + removeRedundantTypeTests(); // Rebuild each global we touched as part of virtual constant propagation to @@ -2284,9 +2338,9 @@ bool DevirtModule::run() { for (VTableBits &B : Bits) rebuildGlobal(B); - // We have lowered or deleted the type instrinsics, so we will no - // longer have enough information to reason about the liveness of virtual - // function pointers in GlobalDCE. + // We have lowered or deleted the type intrinsics, so we will no longer have + // enough information to reason about the liveness of virtual function + // pointers in GlobalDCE. for (GlobalVariable &GV : M.globals()) GV.eraseMetadata(LLVMContext::MD_vcall_visibility); @@ -2367,4 +2421,6 @@ void DevirtIndex::run() { if (PrintSummaryDevirt) for (const auto &DT : DevirtTargets) errs() << "Devirtualized call to " << DT << "\n"; + + NumDevirtTargets += DevirtTargets.size(); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 0598f751febe..f4d8b79a5311 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -693,9 +693,6 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { unsigned OpndNum = Opnds.size(); unsigned InstrNeeded = OpndNum - 1; - // The number of addends in the form of "(-1)*x". - unsigned NegOpndNum = 0; - // Adjust the number of instructions needed to emit the N-ary add. for (const FAddend *Opnd : Opnds) { if (Opnd->isConstant()) @@ -707,9 +704,6 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { continue; const FAddendCoef &CE = Opnd->getCoef(); - if (CE.isMinusOne() || CE.isMinusTwo()) - NegOpndNum++; - // Let the addend be "c * x". If "c == +/-1", the value of the addend // is immediately available; otherwise, it needs exactly one instruction // to evaluate the value. @@ -1277,7 +1271,7 @@ static Instruction *factorizeMathWithShlOps(BinaryOperator &I, } Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { - if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifyAddInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1375,6 +1369,13 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { } } + // (A & 2^C1) + A => A & (2^C1 - 1) iff bit C1 in A is a sign bit + if (match(&I, m_c_Add(m_And(m_Value(A), m_APInt(C1)), m_Deferred(A))) && + C1->isPowerOf2() && (ComputeNumSignBits(A) > C1->countLeadingZeros())) { + Constant *NewMask = ConstantInt::get(RHS->getType(), *C1 - 1); + return BinaryOperator::CreateAnd(A, NewMask); + } + // A+B --> A|B iff A and B have no bits set in common. if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT)) return BinaryOperator::CreateOr(LHS, RHS); @@ -1528,7 +1529,7 @@ static Instruction *factorizeFAddFSub(BinaryOperator &I, } Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) { - if (Value *V = SimplifyFAddInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifyFAddInst(I.getOperand(0), I.getOperand(1), I.getFastMathFlags(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1687,7 +1688,8 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, // Require at least one GEP with a common base pointer on both sides. if (auto *LHSGEP = dyn_cast(LHS)) { // (gep X, ...) - X - if (LHSGEP->getOperand(0) == RHS) { + if (LHSGEP->getOperand(0)->stripPointerCasts() == + RHS->stripPointerCasts()) { GEP1 = LHSGEP; } else if (auto *RHSGEP = dyn_cast(RHS)) { // (gep X, ...) - (gep X, ...) @@ -1749,7 +1751,7 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, } Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { - if (Value *V = SimplifySubInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifySubInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -2014,6 +2016,37 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { } } + if (auto *II = dyn_cast(Op1)) { + { + // sub(add(X,Y), s/umin(X,Y)) --> s/umax(X,Y) + // sub(add(X,Y), s/umax(X,Y)) --> s/umin(X,Y) + Value *X = II->getLHS(); + Value *Y = II->getRHS(); + if (match(Op0, m_c_Add(m_Specific(X), m_Specific(Y))) && + (Op0->hasOneUse() || Op1->hasOneUse())) { + Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID()); + Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y); + return replaceInstUsesWith(I, InvMaxMin); + } + } + + { + // sub(add(X,Y),umin(Y,Z)) --> add(X,usub.sat(Y,Z)) + // sub(add(X,Z),umin(Y,Z)) --> add(X,usub.sat(Z,Y)) + Value *X, *Y, *Z; + if (match(Op1, m_OneUse(m_UMin(m_Value(Y), m_Value(Z))))) { + if (match(Op0, m_OneUse(m_c_Add(m_Specific(Y), m_Value(X))))) + return BinaryOperator::CreateAdd( + X, Builder.CreateIntrinsic(Intrinsic::usub_sat, I.getType(), + {Y, Z})); + if (match(Op0, m_OneUse(m_c_Add(m_Specific(Z), m_Value(X))))) + return BinaryOperator::CreateAdd( + X, Builder.CreateIntrinsic(Intrinsic::usub_sat, I.getType(), + {Z, Y})); + } + } + } + { // If we have a subtraction between some value and a select between // said value and something else, sink subtraction into select hands, i.e.: @@ -2089,36 +2122,6 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return BinaryOperator::CreateSub(X, Not); } - // TODO: This is the same logic as above but handles the cmp-select idioms - // for min/max, so the use checks are increased to account for the - // extra instructions. If we canonicalize to intrinsics, this block - // can likely be removed. - { - Value *LHS, *RHS, *A; - Value *NotA = Op0, *MinMax = Op1; - SelectPatternFlavor SPF = matchSelectPattern(MinMax, LHS, RHS).Flavor; - if (!SelectPatternResult::isMinOrMax(SPF)) { - NotA = Op1; - MinMax = Op0; - SPF = matchSelectPattern(MinMax, LHS, RHS).Flavor; - } - if (SelectPatternResult::isMinOrMax(SPF) && - match(NotA, m_Not(m_Value(A))) && (NotA == LHS || NotA == RHS)) { - if (NotA == LHS) - std::swap(LHS, RHS); - // LHS is now Y above and expected to have at least 2 uses (the min/max) - // NotA is expected to have 2 uses from the min/max and 1 from the sub. - if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) && - !NotA->hasNUsesOrMore(4)) { - Value *Not = Builder.CreateNot(MinMax); - if (NotA == Op0) - return BinaryOperator::CreateSub(Not, A); - else - return BinaryOperator::CreateSub(A, Not); - } - } - } - // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". Value *LHSOp, *RHSOp; @@ -2149,11 +2152,11 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { // B = ashr i32 A, 31 ; smear the sign bit // sub (xor A, B), B ; flip bits if negative and subtract -1 (add 1) // --> (A < 0) ? -A : A - Value *Cmp = Builder.CreateICmpSLT(A, ConstantInt::getNullValue(Ty)); + Value *IsNeg = Builder.CreateIsNeg(A); // Copy the nuw/nsw flags from the sub to the negate. - Value *Neg = Builder.CreateNeg(A, "", I.hasNoUnsignedWrap(), - I.hasNoSignedWrap()); - return SelectInst::Create(Cmp, Neg, A); + Value *NegA = Builder.CreateNeg(A, "", I.hasNoUnsignedWrap(), + I.hasNoSignedWrap()); + return SelectInst::Create(IsNeg, NegA, A); } // If we are subtracting a low-bit masked subset of some value from an add @@ -2187,12 +2190,23 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return replaceInstUsesWith( I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op1})); + // Op0 - umin(X, Op0) --> usub.sat(Op0, X) + if (match(Op1, m_OneUse(m_c_UMin(m_Value(X), m_Specific(Op0))))) + return replaceInstUsesWith( + I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {Op0, X})); + // Op0 - umax(X, Op0) --> 0 - usub.sat(X, Op0) if (match(Op1, m_OneUse(m_c_UMax(m_Value(X), m_Specific(Op0))))) { Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op0}); return BinaryOperator::CreateNeg(USub); } + // umin(X, Op1) - Op1 --> 0 - usub.sat(Op1, X) + if (match(Op0, m_OneUse(m_c_UMin(m_Value(X), m_Specific(Op1))))) { + Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {Op1, X}); + return BinaryOperator::CreateNeg(USub); + } + // C - ctpop(X) => ctpop(~X) if C is bitwidth if (match(Op0, m_SpecificInt(Ty->getScalarSizeInBits())) && match(Op1, m_OneUse(m_Intrinsic(m_Value(X))))) @@ -2264,7 +2278,7 @@ static Instruction *hoistFNegAboveFMulFDiv(Instruction &I, Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) { Value *Op = I.getOperand(0); - if (Value *V = SimplifyFNegInst(Op, I.getFastMathFlags(), + if (Value *V = simplifyFNegInst(Op, I.getFastMathFlags(), getSimplifyQuery().getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -2287,10 +2301,11 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) { // Unlike most transforms, this one is not safe to propagate nsz unless // it is present on the original select. (We are conservatively intersecting // the nsz flags from the select and root fneg instruction.) - auto propagateSelectFMF = [&](SelectInst *S) { + auto propagateSelectFMF = [&](SelectInst *S, bool CommonOperand) { S->copyFastMathFlags(&I); if (auto *OldSel = dyn_cast(Op)) - if (!OldSel->hasNoSignedZeros()) + if (!OldSel->hasNoSignedZeros() && !CommonOperand && + !isGuaranteedNotToBeUndefOrPoison(OldSel->getCondition())) S->setHasNoSignedZeros(false); }; // -(Cond ? -P : Y) --> Cond ? P : -Y @@ -2298,14 +2313,14 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) { if (match(X, m_FNeg(m_Value(P)))) { Value *NegY = Builder.CreateFNegFMF(Y, &I, Y->getName() + ".neg"); SelectInst *NewSel = SelectInst::Create(Cond, P, NegY); - propagateSelectFMF(NewSel); + propagateSelectFMF(NewSel, P == Y); return NewSel; } // -(Cond ? X : -P) --> Cond ? -X : P if (match(Y, m_FNeg(m_Value(P)))) { Value *NegX = Builder.CreateFNegFMF(X, &I, X->getName() + ".neg"); SelectInst *NewSel = SelectInst::Create(Cond, NegX, P); - propagateSelectFMF(NewSel); + propagateSelectFMF(NewSel, P == X); return NewSel; } } @@ -2314,7 +2329,7 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) { } Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) { - if (Value *V = SimplifyFSubInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifyFSubInst(I.getOperand(0), I.getOperand(1), I.getFastMathFlags(), getSimplifyQuery().getWithInstruction(&I))) return replaceInstUsesWith(I, V); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 6bbb0251f2bc..ae8865651ece 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -24,32 +24,6 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" -/// Similar to getICmpCode but for FCmpInst. This encodes a fcmp predicate into -/// a four bit mask. -static unsigned getFCmpCode(FCmpInst::Predicate CC) { - assert(FCmpInst::FCMP_FALSE <= CC && CC <= FCmpInst::FCMP_TRUE && - "Unexpected FCmp predicate!"); - // Take advantage of the bit pattern of FCmpInst::Predicate here. - // U L G E - static_assert(FCmpInst::FCMP_FALSE == 0, ""); // 0 0 0 0 - static_assert(FCmpInst::FCMP_OEQ == 1, ""); // 0 0 0 1 - static_assert(FCmpInst::FCMP_OGT == 2, ""); // 0 0 1 0 - static_assert(FCmpInst::FCMP_OGE == 3, ""); // 0 0 1 1 - static_assert(FCmpInst::FCMP_OLT == 4, ""); // 0 1 0 0 - static_assert(FCmpInst::FCMP_OLE == 5, ""); // 0 1 0 1 - static_assert(FCmpInst::FCMP_ONE == 6, ""); // 0 1 1 0 - static_assert(FCmpInst::FCMP_ORD == 7, ""); // 0 1 1 1 - static_assert(FCmpInst::FCMP_UNO == 8, ""); // 1 0 0 0 - static_assert(FCmpInst::FCMP_UEQ == 9, ""); // 1 0 0 1 - static_assert(FCmpInst::FCMP_UGT == 10, ""); // 1 0 1 0 - static_assert(FCmpInst::FCMP_UGE == 11, ""); // 1 0 1 1 - static_assert(FCmpInst::FCMP_ULT == 12, ""); // 1 1 0 0 - static_assert(FCmpInst::FCMP_ULE == 13, ""); // 1 1 0 1 - static_assert(FCmpInst::FCMP_UNE == 14, ""); // 1 1 1 0 - static_assert(FCmpInst::FCMP_TRUE == 15, ""); // 1 1 1 1 - return CC; -} - /// This is the complement of getICmpCode, which turns an opcode and two /// operands into either a constant true or false, or a brand new ICmp /// instruction. The sign is passed in to determine which kind of predicate to @@ -66,14 +40,10 @@ static Value *getNewICmpValue(unsigned Code, bool Sign, Value *LHS, Value *RHS, /// operands into either a FCmp instruction, or a true/false constant. static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS, InstCombiner::BuilderTy &Builder) { - const auto Pred = static_cast(Code); - assert(FCmpInst::FCMP_FALSE <= Pred && Pred <= FCmpInst::FCMP_TRUE && - "Unexpected FCmp predicate!"); - if (Pred == FCmpInst::FCMP_FALSE) - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); - if (Pred == FCmpInst::FCMP_TRUE) - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); - return Builder.CreateFCmp(Pred, LHS, RHS); + FCmpInst::Predicate NewPred; + if (Constant *TorF = getPredForFCmpCode(Code, LHS->getType(), NewPred)) + return TorF; + return Builder.CreateFCmp(NewPred, LHS, RHS); } /// Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or @@ -395,6 +365,7 @@ getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C, /// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros /// and the right hand side is of type BMask_Mixed. For example, /// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8). +/// Also used for logical and/or, must be poison safe. static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C, Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, @@ -409,9 +380,9 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( // // We currently handle the case of B, C, D, E are constant. // - ConstantInt *BCst, *CCst, *DCst, *ECst; - if (!match(B, m_ConstantInt(BCst)) || !match(C, m_ConstantInt(CCst)) || - !match(D, m_ConstantInt(DCst)) || !match(E, m_ConstantInt(ECst))) + const APInt *BCst, *CCst, *DCst, *OrigECst; + if (!match(B, m_APInt(BCst)) || !match(C, m_APInt(CCst)) || + !match(D, m_APInt(DCst)) || !match(E, m_APInt(OrigECst))) return nullptr; ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; @@ -420,19 +391,20 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( // canonicalized as, // (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or // (icmp ne (A & D), D) -> (icmp eq (A & D), 0). + APInt ECst = *OrigECst; if (PredR != NewCC) - ECst = cast(ConstantExpr::getXor(DCst, ECst)); + ECst ^= *DCst; // If B or D is zero, skip because if LHS or RHS can be trivially folded by // other folding rules and this pattern won't apply any more. - if (BCst->getValue() == 0 || DCst->getValue() == 0) + if (*BCst == 0 || *DCst == 0) return nullptr; // If B and D don't intersect, ie. (B & D) == 0, no folding because we can't // deduce anything from it. // For example, // (icmp ne (A & 12), 0) & (icmp eq (A & 3), 1) -> no folding. - if ((BCst->getValue() & DCst->getValue()) == 0) + if ((*BCst & *DCst) == 0) return nullptr; // If the following two conditions are met: @@ -451,22 +423,21 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( // For example, // (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9) // (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8) - if ((((BCst->getValue() & DCst->getValue()) & ECst->getValue()) == 0) && - (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())).isPowerOf2()) { - APInt BorD = BCst->getValue() | DCst->getValue(); - APInt BandBxorDorE = (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())) | - ECst->getValue(); - Value *NewMask = ConstantInt::get(BCst->getType(), BorD); - Value *NewMaskedValue = ConstantInt::get(BCst->getType(), BandBxorDorE); + if ((((*BCst & *DCst) & ECst) == 0) && + (*BCst & (*BCst ^ *DCst)).isPowerOf2()) { + APInt BorD = *BCst | *DCst; + APInt BandBxorDorE = (*BCst & (*BCst ^ *DCst)) | ECst; + Value *NewMask = ConstantInt::get(A->getType(), BorD); + Value *NewMaskedValue = ConstantInt::get(A->getType(), BandBxorDorE); Value *NewAnd = Builder.CreateAnd(A, NewMask); return Builder.CreateICmp(NewCC, NewAnd, NewMaskedValue); } - auto IsSubSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) { - return (C1->getValue() & C2->getValue()) == C1->getValue(); + auto IsSubSetOrEqual = [](const APInt *C1, const APInt *C2) { + return (*C1 & *C2) == *C1; }; - auto IsSuperSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) { - return (C1->getValue() & C2->getValue()) == C2->getValue(); + auto IsSuperSetOrEqual = [](const APInt *C1, const APInt *C2) { + return (*C1 & *C2) == *C2; }; // In the following, we consider only the cases where B is a superset of D, B @@ -486,7 +457,7 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( // For example, // (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false. // (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding. - if (ECst->isZero()) { + if (ECst.isZero()) { if (IsSubSetOrEqual(BCst, DCst)) return ConstantInt::get(LHS->getType(), !IsAnd); return nullptr; @@ -504,7 +475,7 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( // ie. (B & E) != 0, then LHS is subsumed by RHS. For example. // (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). assert(IsSubSetOrEqual(BCst, DCst) && "Precondition due to above code"); - if ((BCst->getValue() & ECst->getValue()) != 0) + if ((*BCst & ECst) != 0) return RHS; // Otherwise, LHS and RHS contradict and the whole expression becomes false // (or true if negated.) For example, @@ -516,6 +487,7 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( /// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single /// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side /// aren't of the common mask pattern type. +/// Also used for logical and/or, must be poison safe. static Value *foldLogOpOfMaskedICmpsAsymmetric( ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C, Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, @@ -550,6 +522,7 @@ static Value *foldLogOpOfMaskedICmpsAsymmetric( /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) /// into a single (icmp(A & X) ==/!= Y). static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, + bool IsLogical, InstCombiner::BuilderTy &Builder) { Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); @@ -594,6 +567,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, if (Mask & Mask_AllZeros) { // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) // -> (icmp eq (A & (B|D)), 0) + if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D)) + return nullptr; // TODO: Use freeze? Value *NewOr = Builder.CreateOr(B, D); Value *NewAnd = Builder.CreateAnd(A, NewOr); // We can't use C as zero because we might actually handle @@ -605,6 +580,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, if (Mask & BMask_AllOnes) { // (icmp eq (A & B), B) & (icmp eq (A & D), D) // -> (icmp eq (A & (B|D)), (B|D)) + if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D)) + return nullptr; // TODO: Use freeze? Value *NewOr = Builder.CreateOr(B, D); Value *NewAnd = Builder.CreateAnd(A, NewOr); return Builder.CreateICmp(NewCC, NewAnd, NewOr); @@ -612,6 +589,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, if (Mask & AMask_AllOnes) { // (icmp eq (A & B), A) & (icmp eq (A & D), A) // -> (icmp eq (A & (B&D)), A) + if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D)) + return nullptr; // TODO: Use freeze? Value *NewAnd1 = Builder.CreateAnd(B, D); Value *NewAnd2 = Builder.CreateAnd(A, NewAnd1); return Builder.CreateICmp(NewCC, NewAnd2, A); @@ -736,47 +715,6 @@ Value *InstCombinerImpl::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, return Builder.CreateICmp(NewPred, Input, RangeEnd); } -static Value * -foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, - bool JoinedByAnd, - InstCombiner::BuilderTy &Builder) { - Value *X = LHS->getOperand(0); - if (X != RHS->getOperand(0)) - return nullptr; - - const APInt *C1, *C2; - if (!match(LHS->getOperand(1), m_APInt(C1)) || - !match(RHS->getOperand(1), m_APInt(C2))) - return nullptr; - - // We only handle (X != C1 && X != C2) and (X == C1 || X == C2). - ICmpInst::Predicate Pred = LHS->getPredicate(); - if (Pred != RHS->getPredicate()) - return nullptr; - if (JoinedByAnd && Pred != ICmpInst::ICMP_NE) - return nullptr; - if (!JoinedByAnd && Pred != ICmpInst::ICMP_EQ) - return nullptr; - - // The larger unsigned constant goes on the right. - if (C1->ugt(*C2)) - std::swap(C1, C2); - - APInt Xor = *C1 ^ *C2; - if (Xor.isPowerOf2()) { - // If LHSC and RHSC differ by only one bit, then set that bit in X and - // compare against the larger constant: - // (X == C1 || X == C2) --> (X | (C1 ^ C2)) == C2 - // (X != C1 && X != C2) --> (X | (C1 ^ C2)) != C2 - // We choose an 'or' with a Pow2 constant rather than the inverse mask with - // 'and' because that may lead to smaller codegen from a smaller constant. - Value *Or = Builder.CreateOr(X, ConstantInt::get(X->getType(), Xor)); - return Builder.CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2)); - } - - return nullptr; -} - // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, @@ -941,7 +879,29 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1, CxtI.getName() + ".simplified"); } +/// Fold (icmp eq ctpop(X) 1) | (icmp eq X 0) into (icmp ult ctpop(X) 2) and +/// fold (icmp ne ctpop(X) 1) & (icmp ne X 0) into (icmp ugt ctpop(X) 1). +/// Also used for logical and/or, must be poison safe. +static Value *foldIsPowerOf2OrZero(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd, + InstCombiner::BuilderTy &Builder) { + CmpInst::Predicate Pred0, Pred1; + Value *X; + if (!match(Cmp0, m_ICmp(Pred0, m_Intrinsic(m_Value(X)), + m_SpecificInt(1))) || + !match(Cmp1, m_ICmp(Pred1, m_Specific(X), m_ZeroInt()))) + return nullptr; + + Value *CtPop = Cmp0->getOperand(0); + if (IsAnd && Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_NE) + return Builder.CreateICmpUGT(CtPop, ConstantInt::get(CtPop->getType(), 1)); + if (!IsAnd && Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_EQ) + return Builder.CreateICmpULT(CtPop, ConstantInt::get(CtPop->getType(), 2)); + + return nullptr; +} + /// Reduce a pair of compares that check if a value has exactly 1 bit set. +/// Also used for logical and/or, must be poison safe. static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd, InstCombiner::BuilderTy &Builder) { // Handle 'and' / 'or' commutation: make the equality check the first operand. @@ -1001,22 +961,13 @@ static Value *foldUnsignedUnderflowCheck(ICmpInst *ZeroICmp, }; // Given ZeroCmpOp = (A + B) - // ZeroCmpOp <= A && ZeroCmpOp != 0 --> (0-B) < A - // ZeroCmpOp > A || ZeroCmpOp == 0 --> (0-B) >= A - // // ZeroCmpOp < A && ZeroCmpOp != 0 --> (0-X) < Y iff // ZeroCmpOp >= A || ZeroCmpOp == 0 --> (0-X) >= Y iff // with X being the value (A/B) that is known to be non-zero, // and Y being remaining value. - if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE && - IsAnd) - return Builder.CreateICmpULT(Builder.CreateNeg(B), A); if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE && IsAnd && GetKnownNonZeroAndOther(B, A)) return Builder.CreateICmpULT(Builder.CreateNeg(B), A); - if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ && - !IsAnd) - return Builder.CreateICmpUGE(Builder.CreateNeg(B), A); if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ && !IsAnd && GetKnownNonZeroAndOther(B, A)) return Builder.CreateICmpUGE(Builder.CreateNeg(B), A); @@ -1143,12 +1094,9 @@ Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, /// common operand with the constant. Callers are expected to call this with /// Cmp0/Cmp1 switched to handle logic op commutativity. static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, - BinaryOperator &Logic, + bool IsAnd, InstCombiner::BuilderTy &Builder, const SimplifyQuery &Q) { - bool IsAnd = Logic.getOpcode() == Instruction::And; - assert((IsAnd || Logic.getOpcode() == Instruction::Or) && "Wrong logic op"); - // Match an equality compare with a non-poison constant as Cmp0. // Also, give up if the compare can be constant-folded to avoid looping. ICmpInst::Predicate Pred0; @@ -1174,7 +1122,7 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, // (X != C) || (Y Pred1 X) --> (X != C) || (Y Pred1 C) // Can think of the 'or' substitution with the 'and' bool equivalent: // A || B --> A || (!A && B) - Value *SubstituteCmp = SimplifyICmpInst(Pred1, Y, C, Q); + Value *SubstituteCmp = simplifyICmpInst(Pred1, Y, C, Q); if (!SubstituteCmp) { // If we need to create a new instruction, require that the old compare can // be removed. @@ -1182,16 +1130,24 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, return nullptr; SubstituteCmp = Builder.CreateICmp(Pred1, Y, C); } - return Builder.CreateBinOp(Logic.getOpcode(), Cmp0, SubstituteCmp); + return Builder.CreateBinOp(IsAnd ? Instruction::And : Instruction::Or, Cmp0, + SubstituteCmp); } /// Fold (icmp Pred1 V1, C1) & (icmp Pred2 V2, C2) /// or (icmp Pred1 V1, C1) | (icmp Pred2 V2, C2) /// into a single comparison using range-based reasoning. -static Value *foldAndOrOfICmpsUsingRanges( - ICmpInst::Predicate Pred1, Value *V1, const APInt &C1, - ICmpInst::Predicate Pred2, Value *V2, const APInt &C2, - IRBuilderBase &Builder, bool IsAnd) { +/// NOTE: This is also used for logical and/or, must be poison-safe! +Value *InstCombinerImpl::foldAndOrOfICmpsUsingRanges(ICmpInst *ICmp1, + ICmpInst *ICmp2, + bool IsAnd) { + ICmpInst::Predicate Pred1, Pred2; + Value *V1, *V2; + const APInt *C1, *C2; + if (!match(ICmp1, m_ICmp(Pred1, m_Value(V1), m_APInt(C1))) || + !match(ICmp2, m_ICmp(Pred2, m_Value(V2), m_APInt(C2)))) + return nullptr; + // Look through add of a constant offset on V1, V2, or both operands. This // allows us to interpret the V + C' < C'' range idiom into a proper range. const APInt *Offset1 = nullptr, *Offset2 = nullptr; @@ -1206,152 +1162,51 @@ static Value *foldAndOrOfICmpsUsingRanges( if (V1 != V2) return nullptr; - ConstantRange CR1 = ConstantRange::makeExactICmpRegion(Pred1, C1); + ConstantRange CR1 = ConstantRange::makeExactICmpRegion( + IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, *C1); if (Offset1) CR1 = CR1.subtract(*Offset1); - ConstantRange CR2 = ConstantRange::makeExactICmpRegion(Pred2, C2); + ConstantRange CR2 = ConstantRange::makeExactICmpRegion( + IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, *C2); if (Offset2) CR2 = CR2.subtract(*Offset2); - Optional CR = - IsAnd ? CR1.exactIntersectWith(CR2) : CR1.exactUnionWith(CR2); - if (!CR) - return nullptr; - - CmpInst::Predicate NewPred; - APInt NewC, Offset; - CR->getEquivalentICmp(NewPred, NewC, Offset); - Type *Ty = V1->getType(); Value *NewV = V1; - if (Offset != 0) - NewV = Builder.CreateAdd(NewV, ConstantInt::get(Ty, Offset)); - return Builder.CreateICmp(NewPred, NewV, ConstantInt::get(Ty, NewC)); -} - -/// Fold (icmp)&(icmp) if possible. -Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, - BinaryOperator &And) { - const SimplifyQuery Q = SQ.getWithInstruction(&And); - - // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) - // if K1 and K2 are a one-bit mask. - if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, &And, - /* IsAnd */ true)) - return V; - - ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); - - // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) - if (predicatesFoldable(PredL, PredR)) { - if (LHS->getOperand(0) == RHS->getOperand(1) && - LHS->getOperand(1) == RHS->getOperand(0)) - LHS->swapOperands(); - if (LHS->getOperand(0) == RHS->getOperand(0) && - LHS->getOperand(1) == RHS->getOperand(1)) { - Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); - unsigned Code = getICmpCode(LHS) & getICmpCode(RHS); - bool IsSigned = LHS->isSigned() || RHS->isSigned(); - return getNewICmpValue(Code, IsSigned, Op0, Op1, Builder); - } - } - - // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E) - if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, true, Builder)) - return V; - - if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, And, Builder, Q)) - return V; - if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, And, Builder, Q)) - return V; - - // E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n - if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/false)) - return V; - - // E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n - if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/false)) - return V; - - if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, true, Builder)) - return V; - - if (Value *V = foldSignedTruncationCheck(LHS, RHS, And, Builder)) - return V; - - if (Value *V = foldIsPowerOf2(LHS, RHS, true /* JoinedByAnd */, Builder)) - return V; - - if (Value *X = - foldUnsignedUnderflowCheck(LHS, RHS, /*IsAnd=*/true, Q, Builder)) - return X; - if (Value *X = - foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/true, Q, Builder)) - return X; - - if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/true)) - return X; + Optional CR = CR1.exactUnionWith(CR2); + if (!CR) { + if (!(ICmp1->hasOneUse() && ICmp2->hasOneUse()) || CR1.isWrappedSet() || + CR2.isWrappedSet()) + return nullptr; - // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). - Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0); + // Check whether we have equal-size ranges that only differ by one bit. + // In that case we can apply a mask to map one range onto the other. + APInt LowerDiff = CR1.getLower() ^ CR2.getLower(); + APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1); + APInt CR1Size = CR1.getUpper() - CR1.getLower(); + if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff || + CR1Size != CR2.getUpper() - CR2.getLower()) + return nullptr; - // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) - // TODO: Remove this when foldLogOpOfMaskedICmps can handle undefs. - if (PredL == ICmpInst::ICMP_EQ && match(LHS->getOperand(1), m_ZeroInt()) && - PredR == ICmpInst::ICMP_EQ && match(RHS->getOperand(1), m_ZeroInt()) && - LHS0->getType() == RHS0->getType()) { - Value *NewOr = Builder.CreateOr(LHS0, RHS0); - return Builder.CreateICmp(PredL, NewOr, - Constant::getNullValue(NewOr->getType())); + CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2; + NewV = Builder.CreateAnd(NewV, ConstantInt::get(Ty, ~LowerDiff)); } - const APInt *LHSC, *RHSC; - if (!match(LHS->getOperand(1), m_APInt(LHSC)) || - !match(RHS->getOperand(1), m_APInt(RHSC))) - return nullptr; - - // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 - // where CMAX is the all ones value for the truncated type, - // iff the lower bits of C2 and CA are zero. - if (PredL == ICmpInst::ICMP_EQ && PredL == PredR && LHS->hasOneUse() && - RHS->hasOneUse()) { - Value *V; - const APInt *AndC, *SmallC = nullptr, *BigC = nullptr; - - // (trunc x) == C1 & (and x, CA) == C2 - // (and x, CA) == C2 & (trunc x) == C1 - if (match(RHS0, m_Trunc(m_Value(V))) && - match(LHS0, m_And(m_Specific(V), m_APInt(AndC)))) { - SmallC = RHSC; - BigC = LHSC; - } else if (match(LHS0, m_Trunc(m_Value(V))) && - match(RHS0, m_And(m_Specific(V), m_APInt(AndC)))) { - SmallC = LHSC; - BigC = RHSC; - } - - if (SmallC && BigC) { - unsigned BigBitSize = BigC->getBitWidth(); - unsigned SmallBitSize = SmallC->getBitWidth(); + if (IsAnd) + CR = CR->inverse(); - // Check that the low bits are zero. - APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); - if ((Low & *AndC).isZero() && (Low & *BigC).isZero()) { - Value *NewAnd = Builder.CreateAnd(V, Low | *AndC); - APInt N = SmallC->zext(BigBitSize) | *BigC; - Value *NewVal = ConstantInt::get(NewAnd->getType(), N); - return Builder.CreateICmp(PredL, NewAnd, NewVal); - } - } - } + CmpInst::Predicate NewPred; + APInt NewC, Offset; + CR->getEquivalentICmp(NewPred, NewC, Offset); - return foldAndOrOfICmpsUsingRanges(PredL, LHS0, *LHSC, PredR, RHS0, *RHSC, - Builder, /* IsAnd */ true); + if (Offset != 0) + NewV = Builder.CreateAdd(NewV, ConstantInt::get(Ty, Offset)); + return Builder.CreateICmp(NewPred, NewV, ConstantInt::get(Ty, NewC)); } Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, - bool IsAnd) { + bool IsAnd, bool IsLogicalSelect) { Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1); Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1); FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); @@ -1380,11 +1235,22 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, unsigned FCmpCodeL = getFCmpCode(PredL); unsigned FCmpCodeR = getFCmpCode(PredR); unsigned NewPred = IsAnd ? FCmpCodeL & FCmpCodeR : FCmpCodeL | FCmpCodeR; + + // Intersect the fast math flags. + // TODO: We can union the fast math flags unless this is a logical select. + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + FastMathFlags FMF = LHS->getFastMathFlags(); + FMF &= RHS->getFastMathFlags(); + Builder.setFastMathFlags(FMF); + return getFCmpValue(NewPred, LHS0, LHS1, Builder); } - if ((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) || - (PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO && !IsAnd)) { + // This transform is not valid for a logical select. + if (!IsLogicalSelect && + ((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) || + (PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO && + !IsAnd))) { if (LHS0->getType() != RHS0->getType()) return nullptr; @@ -1574,9 +1440,10 @@ Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) { Value *Cast1Src = Cast1->getOperand(0); // fold logic(cast(A), cast(B)) -> cast(logic(A, B)) - if (shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) { + if ((Cast0->hasOneUse() || Cast1->hasOneUse()) && + shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) { Value *NewOp = Builder.CreateBinOp(LogicOpc, Cast0Src, Cast1Src, - I.getName()); + I.getName()); return CastInst::Create(CastOpcode, NewOp, DestTy); } @@ -1589,9 +1456,8 @@ Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) { ICmpInst *ICmp0 = dyn_cast(Cast0Src); ICmpInst *ICmp1 = dyn_cast(Cast1Src); if (ICmp0 && ICmp1) { - Value *Res = LogicOpc == Instruction::And ? foldAndOfICmps(ICmp0, ICmp1, I) - : foldOrOfICmps(ICmp0, ICmp1, I); - if (Res) + if (Value *Res = + foldAndOrOfICmps(ICmp0, ICmp1, I, LogicOpc == Instruction::And)) return CastInst::Create(CastOpcode, Res, DestTy); return nullptr; } @@ -1862,7 +1728,7 @@ static Instruction *foldComplexAndOrPatterns(BinaryOperator &I, Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { Type *Ty = I.getType(); - if (Value *V = SimplifyAndInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifyAndInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1930,25 +1796,6 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { return BinaryOperator::CreateOr(And, ConstantInt::get(Ty, Together)); } - // If the mask is only needed on one incoming arm, push the 'and' op up. - if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_Value(Y)))) || - match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { - APInt NotAndMask(~(*C)); - BinaryOperator::BinaryOps BinOp = cast(Op0)->getOpcode(); - if (MaskedValueIsZero(X, NotAndMask, 0, &I)) { - // Not masking anything out for the LHS, move mask to RHS. - // and ({x}or X, Y), C --> {x}or X, (and Y, C) - Value *NewRHS = Builder.CreateAnd(Y, Op1, Y->getName() + ".masked"); - return BinaryOperator::Create(BinOp, X, NewRHS); - } - if (!isa(Y) && MaskedValueIsZero(Y, NotAndMask, 0, &I)) { - // Not masking anything out for the RHS, move mask to LHS. - // and ({x}or X, Y), C --> {x}or (and X, C), Y - Value *NewLHS = Builder.CreateAnd(X, Op1, X->getName() + ".masked"); - return BinaryOperator::Create(BinOp, NewLHS, Y); - } - } - unsigned Width = Ty->getScalarSizeInBits(); const APInt *ShiftC; if (match(Op0, m_OneUse(m_SExt(m_AShr(m_Value(X), m_APInt(ShiftC)))))) { @@ -1961,6 +1808,12 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { } } + // If this 'and' clears the sign-bits added by ashr, replace with lshr: + // and (ashr X, ShiftC), C --> lshr X, ShiftC + if (match(Op0, m_AShr(m_Value(X), m_APInt(ShiftC))) && ShiftC->ult(Width) && + C->isMask(Width - ShiftC->getZExtValue())) + return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, *ShiftC)); + const APInt *AddC; if (match(Op0, m_Add(m_Value(X), m_APInt(AddC)))) { // If we add zeros to every bit below a mask, the add has no effect: @@ -1983,7 +1836,7 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { // ((C1 OP zext(X)) & C2) -> zext((C1 OP X) & C2) if C2 fits in the // bitwidth of X and OP behaves well when given trunc(C1) and X. - auto isSuitableBinOpcode = [](BinaryOperator *B) { + auto isNarrowableBinOpcode = [](BinaryOperator *B) { switch (B->getOpcode()) { case Instruction::Xor: case Instruction::Or: @@ -1996,22 +1849,125 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { } }; BinaryOperator *BO; - if (match(Op0, m_OneUse(m_BinOp(BO))) && isSuitableBinOpcode(BO)) { + if (match(Op0, m_OneUse(m_BinOp(BO))) && isNarrowableBinOpcode(BO)) { + Instruction::BinaryOps BOpcode = BO->getOpcode(); Value *X; const APInt *C1; // TODO: The one-use restrictions could be relaxed a little if the AND // is going to be removed. + // Try to narrow the 'and' and a binop with constant operand: + // and (bo (zext X), C1), C --> zext (and (bo X, TruncC1), TruncC) if (match(BO, m_c_BinOp(m_OneUse(m_ZExt(m_Value(X))), m_APInt(C1))) && C->isIntN(X->getType()->getScalarSizeInBits())) { unsigned XWidth = X->getType()->getScalarSizeInBits(); Constant *TruncC1 = ConstantInt::get(X->getType(), C1->trunc(XWidth)); Value *BinOp = isa(BO->getOperand(0)) - ? Builder.CreateBinOp(BO->getOpcode(), X, TruncC1) - : Builder.CreateBinOp(BO->getOpcode(), TruncC1, X); + ? Builder.CreateBinOp(BOpcode, X, TruncC1) + : Builder.CreateBinOp(BOpcode, TruncC1, X); Constant *TruncC = ConstantInt::get(X->getType(), C->trunc(XWidth)); Value *And = Builder.CreateAnd(BinOp, TruncC); return new ZExtInst(And, Ty); } + + // Similar to above: if the mask matches the zext input width, then the + // 'and' can be eliminated, so we can truncate the other variable op: + // and (bo (zext X), Y), C --> zext (bo X, (trunc Y)) + if (isa(BO->getOperand(0)) && + match(BO->getOperand(0), m_OneUse(m_ZExt(m_Value(X)))) && + C->isMask(X->getType()->getScalarSizeInBits())) { + Y = BO->getOperand(1); + Value *TrY = Builder.CreateTrunc(Y, X->getType(), Y->getName() + ".tr"); + Value *NewBO = + Builder.CreateBinOp(BOpcode, X, TrY, BO->getName() + ".narrow"); + return new ZExtInst(NewBO, Ty); + } + // and (bo Y, (zext X)), C --> zext (bo (trunc Y), X) + if (isa(BO->getOperand(1)) && + match(BO->getOperand(1), m_OneUse(m_ZExt(m_Value(X)))) && + C->isMask(X->getType()->getScalarSizeInBits())) { + Y = BO->getOperand(0); + Value *TrY = Builder.CreateTrunc(Y, X->getType(), Y->getName() + ".tr"); + Value *NewBO = + Builder.CreateBinOp(BOpcode, TrY, X, BO->getName() + ".narrow"); + return new ZExtInst(NewBO, Ty); + } + } + + // This is intentionally placed after the narrowing transforms for + // efficiency (transform directly to the narrow logic op if possible). + // If the mask is only needed on one incoming arm, push the 'and' op up. + if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_Value(Y)))) || + match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { + APInt NotAndMask(~(*C)); + BinaryOperator::BinaryOps BinOp = cast(Op0)->getOpcode(); + if (MaskedValueIsZero(X, NotAndMask, 0, &I)) { + // Not masking anything out for the LHS, move mask to RHS. + // and ({x}or X, Y), C --> {x}or X, (and Y, C) + Value *NewRHS = Builder.CreateAnd(Y, Op1, Y->getName() + ".masked"); + return BinaryOperator::Create(BinOp, X, NewRHS); + } + if (!isa(Y) && MaskedValueIsZero(Y, NotAndMask, 0, &I)) { + // Not masking anything out for the RHS, move mask to LHS. + // and ({x}or X, Y), C --> {x}or (and X, C), Y + Value *NewLHS = Builder.CreateAnd(X, Op1, X->getName() + ".masked"); + return BinaryOperator::Create(BinOp, NewLHS, Y); + } + } + + // When the mask is a power-of-2 constant and op0 is a shifted-power-of-2 + // constant, test if the shift amount equals the offset bit index: + // (ShiftC << X) & C --> X == (log2(C) - log2(ShiftC)) ? C : 0 + // (ShiftC >> X) & C --> X == (log2(ShiftC) - log2(C)) ? C : 0 + if (C->isPowerOf2() && + match(Op0, m_OneUse(m_LogicalShift(m_Power2(ShiftC), m_Value(X))))) { + int Log2ShiftC = ShiftC->exactLogBase2(); + int Log2C = C->exactLogBase2(); + bool IsShiftLeft = + cast(Op0)->getOpcode() == Instruction::Shl; + int BitNum = IsShiftLeft ? Log2C - Log2ShiftC : Log2ShiftC - Log2C; + assert(BitNum >= 0 && "Expected demanded bits to handle impossible mask"); + Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(Ty, BitNum)); + return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C), + ConstantInt::getNullValue(Ty)); + } + + Constant *C1, *C2; + const APInt *C3 = C; + Value *X; + if (C3->isPowerOf2()) { + Constant *Log2C3 = ConstantInt::get(Ty, C3->countTrailingZeros()); + if (match(Op0, m_OneUse(m_LShr(m_Shl(m_ImmConstant(C1), m_Value(X)), + m_ImmConstant(C2)))) && + match(C1, m_Power2())) { + Constant *Log2C1 = ConstantExpr::getExactLogBase2(C1); + Constant *LshrC = ConstantExpr::getAdd(C2, Log2C3); + KnownBits KnownLShrc = computeKnownBits(LshrC, 0, nullptr); + if (KnownLShrc.getMaxValue().ult(Width)) { + // iff C1,C3 is pow2 and C2 + cttz(C3) < BitWidth: + // ((C1 << X) >> C2) & C3 -> X == (cttz(C3)+C2-cttz(C1)) ? C3 : 0 + Constant *CmpC = ConstantExpr::getSub(LshrC, Log2C1); + Value *Cmp = Builder.CreateICmpEQ(X, CmpC); + return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C3), + ConstantInt::getNullValue(Ty)); + } + } + + if (match(Op0, m_OneUse(m_Shl(m_LShr(m_ImmConstant(C1), m_Value(X)), + m_ImmConstant(C2)))) && + match(C1, m_Power2())) { + Constant *Log2C1 = ConstantExpr::getExactLogBase2(C1); + Constant *Cmp = + ConstantExpr::getCompare(ICmpInst::ICMP_ULT, Log2C3, C2); + if (Cmp->isZeroValue()) { + // iff C1,C3 is pow2 and Log2(C3) >= C2: + // ((C1 >> X) << C2) & C3 -> X == (cttz(C1)+C2-cttz(C3)) ? C3 : 0 + Constant *ShlC = ConstantExpr::getAdd(C2, Log2C1); + Constant *CmpC = ConstantExpr::getSub(ShlC, Log2C3); + Value *Cmp = Builder.CreateICmpEQ(X, CmpC); + return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C3), + ConstantInt::getNullValue(Ty)); + } + } } } @@ -2121,32 +2077,50 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { ICmpInst *LHS = dyn_cast(Op0); ICmpInst *RHS = dyn_cast(Op1); if (LHS && RHS) - if (Value *Res = foldAndOfICmps(LHS, RHS, I)) + if (Value *Res = foldAndOrOfICmps(LHS, RHS, I, /* IsAnd */ true)) return replaceInstUsesWith(I, Res); // TODO: Make this recursive; it's a little tricky because an arbitrary // number of 'and' instructions might have to be created. - if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { + if (LHS && match(Op1, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op1); + // LHS & (X && Y) --> (LHS && X) && Y if (auto *Cmp = dyn_cast(X)) - if (Value *Res = foldAndOfICmps(LHS, Cmp, I)) - return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y)); + if (Value *Res = + foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true, IsLogical)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalAnd(Res, Y) + : Builder.CreateAnd(Res, Y)); + // LHS & (X && Y) --> X && (LHS & Y) if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldAndOfICmps(LHS, Cmp, I)) - return replaceInstUsesWith(I, Builder.CreateAnd(Res, X)); - } - if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { + if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalAnd(X, Res) + : Builder.CreateAnd(X, Res)); + } + if (RHS && match(Op0, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op0); + // (X && Y) & RHS --> (X && RHS) && Y if (auto *Cmp = dyn_cast(X)) - if (Value *Res = foldAndOfICmps(Cmp, RHS, I)) - return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y)); + if (Value *Res = + foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true, IsLogical)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalAnd(Res, Y) + : Builder.CreateAnd(Res, Y)); + // (X && Y) & RHS --> X && (Y & RHS) if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldAndOfICmps(Cmp, RHS, I)) - return replaceInstUsesWith(I, Builder.CreateAnd(Res, X)); + if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalAnd(X, Res) + : Builder.CreateAnd(X, Res)); } } if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Value *Res = foldLogicOfFCmps(LHS, RHS, true)) + if (Value *Res = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ true)) return replaceInstUsesWith(I, Res); if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder)) @@ -2175,18 +2149,16 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { unsigned FullShift = Ty->getScalarSizeInBits() - 1; if (match(&I, m_c_And(m_OneUse(m_AShr(m_Value(X), m_SpecificInt(FullShift))), m_Value(Y)))) { - Constant *Zero = ConstantInt::getNullValue(Ty); - Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg"); - return SelectInst::Create(Cmp, Y, Zero); + Value *IsNeg = Builder.CreateIsNeg(X, "isneg"); + return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty)); } // If there's a 'not' of the shifted value, swap the select operands: // ~(iN X s>> (N-1)) & Y --> (X s< 0) ? 0 : Y if (match(&I, m_c_And(m_OneUse(m_Not( m_AShr(m_Value(X), m_SpecificInt(FullShift)))), m_Value(Y)))) { - Constant *Zero = ConstantInt::getNullValue(Ty); - Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg"); - return SelectInst::Create(Cmp, Zero, Y); + Value *IsNeg = Builder.CreateIsNeg(X, "isneg"); + return SelectInst::Create(IsNeg, ConstantInt::getNullValue(Ty), Y); } // (~x) & y --> ~(x | (~y)) iff that gets rid of inversions @@ -2482,8 +2454,12 @@ Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *C, Value *B, // not create unnecessary casts if the types already match. Type *SelTy = A->getType(); if (auto *VecTy = dyn_cast(Cond->getType())) { + // For a fixed or scalable vector get N from <{vscale x} N x iM> unsigned Elts = VecTy->getElementCount().getKnownMinValue(); - Type *EltTy = Builder.getIntNTy(SelTy->getPrimitiveSizeInBits() / Elts); + // For a fixed or scalable vector, get the size in bits of N x iM; for a + // scalar this is just M. + unsigned SelEltSize = SelTy->getPrimitiveSizeInBits().getKnownMinSize(); + Type *EltTy = Builder.getIntNTy(SelEltSize / Elts); SelTy = VectorType::get(EltTy, VecTy->getElementCount()); } Value *BitcastC = Builder.CreateBitCast(C, SelTy); @@ -2495,15 +2471,46 @@ Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *C, Value *B, return nullptr; } -/// Fold (icmp)|(icmp) if possible. -Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, - BinaryOperator &Or) { - const SimplifyQuery Q = SQ.getWithInstruction(&Or); +// (icmp eq X, 0) | (icmp ult Other, X) -> (icmp ule Other, X-1) +// (icmp ne X, 0) & (icmp uge Other, X) -> (icmp ugt Other, X-1) +Value *foldAndOrOfICmpEqZeroAndICmp(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, + IRBuilderBase &Builder) { + ICmpInst::Predicate LPred = + IsAnd ? LHS->getInversePredicate() : LHS->getPredicate(); + ICmpInst::Predicate RPred = + IsAnd ? RHS->getInversePredicate() : RHS->getPredicate(); + Value *LHS0 = LHS->getOperand(0); + if (LPred != ICmpInst::ICMP_EQ || !match(LHS->getOperand(1), m_Zero()) || + !LHS0->getType()->isIntOrIntVectorTy() || + !(LHS->hasOneUse() || RHS->hasOneUse())) + return nullptr; + + Value *Other; + if (RPred == ICmpInst::ICMP_ULT && RHS->getOperand(1) == LHS0) + Other = RHS->getOperand(0); + else if (RPred == ICmpInst::ICMP_UGT && RHS->getOperand(0) == LHS0) + Other = RHS->getOperand(1); + else + return nullptr; + + return Builder.CreateICmp( + IsAnd ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE, + Builder.CreateAdd(LHS0, Constant::getAllOnesValue(LHS0->getType())), + Other); +} + +/// Fold (icmp)&(icmp) or (icmp)|(icmp) if possible. +/// If IsLogical is true, then the and/or is in select form and the transform +/// must be poison-safe. +Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, + Instruction &I, bool IsAnd, + bool IsLogical) { + const SimplifyQuery Q = SQ.getWithInstruction(&I); // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) + // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) // if K1 and K2 are a one-bit mask. - if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, &Or, - /* IsAnd */ false)) + if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, &I, IsAnd, IsLogical)) return V; ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); @@ -2513,64 +2520,16 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, match(LHS1, m_APInt(LHSC)); match(RHS1, m_APInt(RHSC)); - // Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3) - // --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3) - // The original condition actually refers to the following two ranges: - // [MAX_UINT-C1+1, MAX_UINT-C1+1+C3] and [MAX_UINT-C2+1, MAX_UINT-C2+1+C3] - // We can fold these two ranges if: - // 1) C1 and C2 is unsigned greater than C3. - // 2) The two ranges are separated. - // 3) C1 ^ C2 is one-bit mask. - // 4) LowRange1 ^ LowRange2 and HighRange1 ^ HighRange2 are one-bit mask. - // This implies all values in the two ranges differ by exactly one bit. - if ((PredL == ICmpInst::ICMP_ULT || PredL == ICmpInst::ICMP_ULE) && - PredL == PredR && LHSC && RHSC && LHS->hasOneUse() && RHS->hasOneUse() && - LHSC->getBitWidth() == RHSC->getBitWidth() && *LHSC == *RHSC) { - - Value *AddOpnd; - const APInt *LAddC, *RAddC; - if (match(LHS0, m_Add(m_Value(AddOpnd), m_APInt(LAddC))) && - match(RHS0, m_Add(m_Specific(AddOpnd), m_APInt(RAddC))) && - LAddC->ugt(*LHSC) && RAddC->ugt(*LHSC)) { - - APInt DiffC = *LAddC ^ *RAddC; - if (DiffC.isPowerOf2()) { - const APInt *MaxAddC = nullptr; - if (LAddC->ult(*RAddC)) - MaxAddC = RAddC; - else - MaxAddC = LAddC; - - APInt RRangeLow = -*RAddC; - APInt RRangeHigh = RRangeLow + *LHSC; - APInt LRangeLow = -*LAddC; - APInt LRangeHigh = LRangeLow + *LHSC; - APInt LowRangeDiff = RRangeLow ^ LRangeLow; - APInt HighRangeDiff = RRangeHigh ^ LRangeHigh; - APInt RangeDiff = LRangeLow.sgt(RRangeLow) ? LRangeLow - RRangeLow - : RRangeLow - LRangeLow; - - if (LowRangeDiff.isPowerOf2() && LowRangeDiff == HighRangeDiff && - RangeDiff.ugt(*LHSC)) { - Type *Ty = AddOpnd->getType(); - Value *MaskC = ConstantInt::get(Ty, ~DiffC); - - Value *NewAnd = Builder.CreateAnd(AddOpnd, MaskC); - Value *NewAdd = Builder.CreateAdd(NewAnd, - ConstantInt::get(Ty, *MaxAddC)); - return Builder.CreateICmp(LHS->getPredicate(), NewAdd, - ConstantInt::get(Ty, *LHSC)); - } - } - } - } - // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) + // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) if (predicatesFoldable(PredL, PredR)) { - if (LHS0 == RHS1 && LHS1 == RHS0) - LHS->swapOperands(); + if (LHS0 == RHS1 && LHS1 == RHS0) { + PredL = ICmpInst::getSwappedPredicate(PredL); + std::swap(LHS0, LHS1); + } if (LHS0 == RHS0 && LHS1 == RHS1) { - unsigned Code = getICmpCode(LHS) | getICmpCode(RHS); + unsigned Code = IsAnd ? getICmpCode(PredL) & getICmpCode(PredR) + : getICmpCode(PredL) | getICmpCode(PredR); bool IsSigned = LHS->isSigned() || RHS->isSigned(); return getNewICmpValue(Code, IsSigned, LHS0, LHS1, Builder); } @@ -2578,68 +2537,70 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, // handle (roughly): // (icmp ne (A & B), C) | (icmp ne (A & D), E) - if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, false, Builder)) + // (icmp eq (A & B), C) & (icmp eq (A & D), E) + if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder)) return V; - if (LHS->hasOneUse() || RHS->hasOneUse()) { - // (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1) - // (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1) - Value *A = nullptr, *B = nullptr; - if (PredL == ICmpInst::ICMP_EQ && match(LHS1, m_Zero())) { - B = LHS0; - if (PredR == ICmpInst::ICMP_ULT && LHS0 == RHS1) - A = RHS0; - else if (PredR == ICmpInst::ICMP_UGT && LHS0 == RHS0) - A = RHS1; - } - // (icmp ult A, B) | (icmp eq B, 0) -> (icmp ule A, B-1) - // (icmp ugt B, A) | (icmp eq B, 0) -> (icmp ule A, B-1) - else if (PredR == ICmpInst::ICMP_EQ && match(RHS1, m_Zero())) { - B = RHS0; - if (PredL == ICmpInst::ICMP_ULT && RHS0 == LHS1) - A = LHS0; - else if (PredL == ICmpInst::ICMP_UGT && RHS0 == LHS0) - A = LHS1; - } - if (A && B && B->getType()->isIntOrIntVectorTy()) - return Builder.CreateICmp( - ICmpInst::ICMP_UGE, - Builder.CreateAdd(B, Constant::getAllOnesValue(B->getType())), A); - } - - if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, Or, Builder, Q)) + // TODO: One of these directions is fine with logical and/or, the other could + // be supported by inserting freeze. + if (!IsLogical) { + if (Value *V = foldAndOrOfICmpEqZeroAndICmp(LHS, RHS, IsAnd, Builder)) + return V; + if (Value *V = foldAndOrOfICmpEqZeroAndICmp(RHS, LHS, IsAnd, Builder)) + return V; + } + + // TODO: Verify whether this is safe for logical and/or. + if (!IsLogical) { + if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, Builder, Q)) + return V; + if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, IsAnd, Builder, Q)) + return V; + } + + if (Value *V = foldIsPowerOf2OrZero(LHS, RHS, IsAnd, Builder)) return V; - if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, Or, Builder, Q)) + if (Value *V = foldIsPowerOf2OrZero(RHS, LHS, IsAnd, Builder)) return V; - // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n - if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/true)) - return V; + // TODO: One of these directions is fine with logical and/or, the other could + // be supported by inserting freeze. + if (!IsLogical) { + // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n + // E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n + if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/!IsAnd)) + return V; - // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n - if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/true)) - return V; + // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n + // E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n + if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/!IsAnd)) + return V; + } - if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, false, Builder)) - return V; + // TODO: Add conjugated or fold, check whether it is safe for logical and/or. + if (IsAnd && !IsLogical) + if (Value *V = foldSignedTruncationCheck(LHS, RHS, I, Builder)) + return V; - if (Value *V = foldIsPowerOf2(LHS, RHS, false /* JoinedByAnd */, Builder)) + if (Value *V = foldIsPowerOf2(LHS, RHS, IsAnd, Builder)) return V; - if (Value *X = - foldUnsignedUnderflowCheck(LHS, RHS, /*IsAnd=*/false, Q, Builder)) - return X; - if (Value *X = - foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/false, Q, Builder)) - return X; + // TODO: Verify whether this is safe for logical and/or. + if (!IsLogical) { + if (Value *X = foldUnsignedUnderflowCheck(LHS, RHS, IsAnd, Q, Builder)) + return X; + if (Value *X = foldUnsignedUnderflowCheck(RHS, LHS, IsAnd, Q, Builder)) + return X; + } - if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/false)) + if (Value *X = foldEqOfParts(LHS, RHS, IsAnd)) return X; // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) + // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) // TODO: Remove this when foldLogOpOfMaskedICmps can handle undefs. - if (PredL == ICmpInst::ICMP_NE && match(LHS1, m_ZeroInt()) && - PredR == ICmpInst::ICMP_NE && match(RHS1, m_ZeroInt()) && + if (!IsLogical && PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && + PredL == PredR && match(LHS1, m_ZeroInt()) && match(RHS1, m_ZeroInt()) && LHS0->getType() == RHS0->getType()) { Value *NewOr = Builder.CreateOr(LHS0, RHS0); return Builder.CreateICmp(PredL, NewOr, @@ -2650,15 +2611,83 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, if (!LHSC || !RHSC) return nullptr; - return foldAndOrOfICmpsUsingRanges(PredL, LHS0, *LHSC, PredR, RHS0, *RHSC, - Builder, /* IsAnd */ false); + // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 + // (trunc x) != C1 | (and x, CA) != C2 -> (and x, CA|CMAX) != C1|C2 + // where CMAX is the all ones value for the truncated type, + // iff the lower bits of C2 and CA are zero. + if (PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && + PredL == PredR && LHS->hasOneUse() && RHS->hasOneUse()) { + Value *V; + const APInt *AndC, *SmallC = nullptr, *BigC = nullptr; + + // (trunc x) == C1 & (and x, CA) == C2 + // (and x, CA) == C2 & (trunc x) == C1 + if (match(RHS0, m_Trunc(m_Value(V))) && + match(LHS0, m_And(m_Specific(V), m_APInt(AndC)))) { + SmallC = RHSC; + BigC = LHSC; + } else if (match(LHS0, m_Trunc(m_Value(V))) && + match(RHS0, m_And(m_Specific(V), m_APInt(AndC)))) { + SmallC = LHSC; + BigC = RHSC; + } + + if (SmallC && BigC) { + unsigned BigBitSize = BigC->getBitWidth(); + unsigned SmallBitSize = SmallC->getBitWidth(); + + // Check that the low bits are zero. + APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); + if ((Low & *AndC).isZero() && (Low & *BigC).isZero()) { + Value *NewAnd = Builder.CreateAnd(V, Low | *AndC); + APInt N = SmallC->zext(BigBitSize) | *BigC; + Value *NewVal = ConstantInt::get(NewAnd->getType(), N); + return Builder.CreateICmp(PredL, NewAnd, NewVal); + } + } + } + + // Match naive pattern (and its inverted form) for checking if two values + // share same sign. An example of the pattern: + // (icmp slt (X & Y), 0) | (icmp sgt (X | Y), -1) -> (icmp sgt (X ^ Y), -1) + // Inverted form (example): + // (icmp slt (X | Y), 0) & (icmp sgt (X & Y), -1) -> (icmp slt (X ^ Y), 0) + bool TrueIfSignedL, TrueIfSignedR; + if (InstCombiner::isSignBitCheck(PredL, *LHSC, TrueIfSignedL) && + InstCombiner::isSignBitCheck(PredR, *RHSC, TrueIfSignedR) && + (RHS->hasOneUse() || LHS->hasOneUse())) { + Value *X, *Y; + if (IsAnd) { + if ((TrueIfSignedL && !TrueIfSignedR && + match(LHS0, m_Or(m_Value(X), m_Value(Y))) && + match(RHS0, m_c_And(m_Specific(X), m_Specific(Y)))) || + (!TrueIfSignedL && TrueIfSignedR && + match(LHS0, m_And(m_Value(X), m_Value(Y))) && + match(RHS0, m_c_Or(m_Specific(X), m_Specific(Y))))) { + Value *NewXor = Builder.CreateXor(X, Y); + return Builder.CreateIsNeg(NewXor); + } + } else { + if ((TrueIfSignedL && !TrueIfSignedR && + match(LHS0, m_And(m_Value(X), m_Value(Y))) && + match(RHS0, m_c_Or(m_Specific(X), m_Specific(Y)))) || + (!TrueIfSignedL && TrueIfSignedR && + match(LHS0, m_Or(m_Value(X), m_Value(Y))) && + match(RHS0, m_c_And(m_Specific(X), m_Specific(Y))))) { + Value *NewXor = Builder.CreateXor(X, Y); + return Builder.CreateIsNotNeg(NewXor); + } + } + } + + return foldAndOrOfICmpsUsingRanges(LHS, RHS, IsAnd); } // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { - if (Value *V = SimplifyOrInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifyOrInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -2824,6 +2853,14 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) return BinaryOperator::CreateOr(Op1, C); + // ((A & B) ^ C) | B -> C | B + if (match(Op0, m_c_Xor(m_c_And(m_Value(A), m_Specific(Op1)), m_Value(C)))) + return BinaryOperator::CreateOr(C, Op1); + + // B | ((A & B) ^ C) -> B | C + if (match(Op1, m_c_Xor(m_c_And(m_Value(A), m_Specific(Op0)), m_Value(C)))) + return BinaryOperator::CreateOr(Op0, C); + // ((B | C) & A) | B -> B | (A & C) if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A)))) return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C)); @@ -2885,33 +2922,51 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { ICmpInst *LHS = dyn_cast(Op0); ICmpInst *RHS = dyn_cast(Op1); if (LHS && RHS) - if (Value *Res = foldOrOfICmps(LHS, RHS, I)) + if (Value *Res = foldAndOrOfICmps(LHS, RHS, I, /* IsAnd */ false)) return replaceInstUsesWith(I, Res); // TODO: Make this recursive; it's a little tricky because an arbitrary // number of 'or' instructions might have to be created. Value *X, *Y; - if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { + if (LHS && match(Op1, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op1); + // LHS | (X || Y) --> (LHS || X) || Y if (auto *Cmp = dyn_cast(X)) - if (Value *Res = foldOrOfICmps(LHS, Cmp, I)) - return replaceInstUsesWith(I, Builder.CreateOr(Res, Y)); + if (Value *Res = + foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false, IsLogical)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalOr(Res, Y) + : Builder.CreateOr(Res, Y)); + // LHS | (X || Y) --> X || (LHS | Y) if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldOrOfICmps(LHS, Cmp, I)) - return replaceInstUsesWith(I, Builder.CreateOr(Res, X)); - } - if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { + if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalOr(X, Res) + : Builder.CreateOr(X, Res)); + } + if (RHS && match(Op0, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op0); + // (X || Y) | RHS --> (X || RHS) || Y if (auto *Cmp = dyn_cast(X)) - if (Value *Res = foldOrOfICmps(Cmp, RHS, I)) - return replaceInstUsesWith(I, Builder.CreateOr(Res, Y)); + if (Value *Res = + foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false, IsLogical)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalOr(Res, Y) + : Builder.CreateOr(Res, Y)); + // (X || Y) | RHS --> X || (Y | RHS) if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldOrOfICmps(Cmp, RHS, I)) - return replaceInstUsesWith(I, Builder.CreateOr(Res, X)); + if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalOr(X, Res) + : Builder.CreateOr(X, Res)); } } if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Value *Res = foldLogicOfFCmps(LHS, RHS, false)) + if (Value *Res = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ false)) return replaceInstUsesWith(I, Res); if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder)) @@ -3025,6 +3080,36 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (matchSimpleRecurrence(&I, PN, Start, Step) && DT.dominates(Step, PN)) return replaceInstUsesWith(I, Builder.CreateOr(Start, Step)); + // (A & B) | (C | D) or (C | D) | (A & B) + // Can be combined if C or D is of type (A/B & X) + if (match(&I, m_c_Or(m_OneUse(m_And(m_Value(A), m_Value(B))), + m_OneUse(m_Or(m_Value(C), m_Value(D)))))) { + // (A & B) | (C | ?) -> C | (? | (A & B)) + // (A & B) | (C | ?) -> C | (? | (A & B)) + // (A & B) | (C | ?) -> C | (? | (A & B)) + // (A & B) | (C | ?) -> C | (? | (A & B)) + // (C | ?) | (A & B) -> C | (? | (A & B)) + // (C | ?) | (A & B) -> C | (? | (A & B)) + // (C | ?) | (A & B) -> C | (? | (A & B)) + // (C | ?) | (A & B) -> C | (? | (A & B)) + if (match(D, m_OneUse(m_c_And(m_Specific(A), m_Value()))) || + match(D, m_OneUse(m_c_And(m_Specific(B), m_Value())))) + return BinaryOperator::CreateOr( + C, Builder.CreateOr(D, Builder.CreateAnd(A, B))); + // (A & B) | (? | D) -> (? | (A & B)) | D + // (A & B) | (? | D) -> (? | (A & B)) | D + // (A & B) | (? | D) -> (? | (A & B)) | D + // (A & B) | (? | D) -> (? | (A & B)) | D + // (? | D) | (A & B) -> (? | (A & B)) | D + // (? | D) | (A & B) -> (? | (A & B)) | D + // (? | D) | (A & B) -> (? | (A & B)) | D + // (? | D) | (A & B) -> (? | (A & B)) | D + if (match(C, m_OneUse(m_c_And(m_Specific(A), m_Value()))) || + match(C, m_OneUse(m_c_And(m_Specific(B), m_Value())))) + return BinaryOperator::CreateOr( + Builder.CreateOr(C, Builder.CreateAnd(A, B)), D); + } + return nullptr; } @@ -3086,26 +3171,26 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, assert(I.getOpcode() == Instruction::Xor && I.getOperand(0) == LHS && I.getOperand(1) == RHS && "Should be 'xor' with these operands"); - if (predicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { - if (LHS->getOperand(0) == RHS->getOperand(1) && - LHS->getOperand(1) == RHS->getOperand(0)) - LHS->swapOperands(); - if (LHS->getOperand(0) == RHS->getOperand(0) && - LHS->getOperand(1) == RHS->getOperand(1)) { + ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); + Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1); + Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1); + + if (predicatesFoldable(PredL, PredR)) { + if (LHS0 == RHS1 && LHS1 == RHS0) { + std::swap(LHS0, LHS1); + PredL = ICmpInst::getSwappedPredicate(PredL); + } + if (LHS0 == RHS0 && LHS1 == RHS1) { // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) - Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); - unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); + unsigned Code = getICmpCode(PredL) ^ getICmpCode(PredR); bool IsSigned = LHS->isSigned() || RHS->isSigned(); - return getNewICmpValue(Code, IsSigned, Op0, Op1, Builder); + return getNewICmpValue(Code, IsSigned, LHS0, LHS1, Builder); } } // TODO: This can be generalized to compares of non-signbits using // decomposeBitTestICmp(). It could be enhanced more by using (something like) // foldLogOpOfMaskedICmps(). - ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); - Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1); - Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1); if ((LHS->hasOneUse() || RHS->hasOneUse()) && LHS0->getType() == RHS0->getType() && LHS0->getType()->isIntOrIntVectorTy()) { @@ -3114,19 +3199,17 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) && PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes())) || (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) && - PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero()))) { - Value *Zero = ConstantInt::getNullValue(LHS0->getType()); - return Builder.CreateICmpSLT(Builder.CreateXor(LHS0, RHS0), Zero); - } + PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero()))) + return Builder.CreateIsNeg(Builder.CreateXor(LHS0, RHS0)); + // (X > -1) ^ (Y < 0) --> (X ^ Y) > -1 // (X < 0) ^ (Y > -1) --> (X ^ Y) > -1 if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) && PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero())) || (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) && - PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes()))) { - Value *MinusOne = ConstantInt::getAllOnesValue(LHS0->getType()); - return Builder.CreateICmpSGT(Builder.CreateXor(LHS0, RHS0), MinusOne); - } + PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes()))) + return Builder.CreateIsNotNeg(Builder.CreateXor(LHS0, RHS0)); + } // Instead of trying to imitate the folds for and/or, decompose this 'xor' @@ -3135,10 +3218,10 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, // // This is based on a truth table definition of xor: // X ^ Y --> (X | Y) & !(X & Y) - if (Value *OrICmp = SimplifyBinOp(Instruction::Or, LHS, RHS, SQ)) { + if (Value *OrICmp = simplifyBinOp(Instruction::Or, LHS, RHS, SQ)) { // TODO: If OrICmp is true, then the definition of xor simplifies to !(X&Y). // TODO: If OrICmp is false, the whole thing is false (InstSimplify?). - if (Value *AndICmp = SimplifyBinOp(Instruction::And, LHS, RHS, SQ)) { + if (Value *AndICmp = simplifyBinOp(Instruction::And, LHS, RHS, SQ)) { // TODO: Independently handle cases where the 'and' side is a constant. ICmpInst *X = nullptr, *Y = nullptr; if (OrICmp == LHS && AndICmp == RHS) { @@ -3274,12 +3357,12 @@ static Instruction *canonicalizeAbs(BinaryOperator &Xor, // Op1 = ashr i32 A, 31 ; smear the sign bit // xor (add A, Op1), Op1 ; add -1 and flip bits if negative // --> (A < 0) ? -A : A - Value *Cmp = Builder.CreateICmpSLT(A, ConstantInt::getNullValue(Ty)); + Value *IsNeg = Builder.CreateIsNeg(A); // Copy the nuw/nsw flags from the add to the negate. auto *Add = cast(Op0); - Value *Neg = Builder.CreateNeg(A, "", Add->hasNoUnsignedWrap(), + Value *NegA = Builder.CreateNeg(A, "", Add->hasNoUnsignedWrap(), Add->hasNoSignedWrap()); - return SelectInst::Create(Cmp, Neg, A); + return SelectInst::Create(IsNeg, NegA, A); } return nullptr; } @@ -3465,51 +3548,7 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) { } } - // TODO: Remove folds if we canonicalize to intrinsics (see above). - // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max: - // - // %notx = xor i32 %x, -1 - // %cmp1 = icmp sgt i32 %notx, %y - // %smax = select i1 %cmp1, i32 %notx, i32 %y - // %res = xor i32 %smax, -1 - // => - // %noty = xor i32 %y, -1 - // %cmp2 = icmp slt %x, %noty - // %res = select i1 %cmp2, i32 %x, i32 %noty - // - // Same is applicable for smin/umax/umin. if (NotOp->hasOneUse()) { - Value *LHS, *RHS; - SelectPatternFlavor SPF = matchSelectPattern(NotOp, LHS, RHS).Flavor; - if (SelectPatternResult::isMinOrMax(SPF)) { - // It's possible we get here before the not has been simplified, so make - // sure the input to the not isn't freely invertible. - if (match(LHS, m_Not(m_Value(X))) && !isFreeToInvert(X, X->hasOneUse())) { - Value *NotY = Builder.CreateNot(RHS); - return SelectInst::Create( - Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY); - } - - // It's possible we get here before the not has been simplified, so make - // sure the input to the not isn't freely invertible. - if (match(RHS, m_Not(m_Value(Y))) && !isFreeToInvert(Y, Y->hasOneUse())) { - Value *NotX = Builder.CreateNot(LHS); - return SelectInst::Create( - Builder.CreateICmp(getInverseMinMaxPred(SPF), NotX, Y), NotX, Y); - } - - // If both sides are freely invertible, then we can get rid of the xor - // completely. - if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) && - isFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) { - Value *NotLHS = Builder.CreateNot(LHS); - Value *NotRHS = Builder.CreateNot(RHS); - return SelectInst::Create( - Builder.CreateICmp(getInverseMinMaxPred(SPF), NotLHS, NotRHS), - NotLHS, NotRHS); - } - } - // Pull 'not' into operands of select if both operands are one-use compares // or one is one-use compare and the other one is a constant. // Inverting the predicates eliminates the 'not' operation. @@ -3549,7 +3588,7 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) { // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { - if (Value *V = SimplifyXorInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifyXorInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -3596,8 +3635,20 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { Value *X, *Y; Constant *C1; if (match(Op1, m_Constant(C1))) { - // Use DeMorgan and reassociation to eliminate a 'not' op. Constant *C2; + + if (match(Op0, m_OneUse(m_Or(m_Value(X), m_ImmConstant(C2)))) && + match(C1, m_ImmConstant())) { + // (X | C2) ^ C1 --> (X & ~C2) ^ (C1^C2) + C2 = Constant::replaceUndefsWith( + C2, Constant::getAllOnesValue(C2->getType()->getScalarType())); + Value *And = Builder.CreateAnd( + X, Constant::mergeUndefsWith(ConstantExpr::getNot(C2), C1)); + return BinaryOperator::CreateXor( + And, Constant::mergeUndefsWith(ConstantExpr::getXor(C1, C2), C1)); + } + + // Use DeMorgan and reassociation to eliminate a 'not' op. if (match(Op0, m_OneUse(m_Or(m_Not(m_Value(X)), m_Constant(C2))))) { // (~X | C2) ^ C1 --> ((X & ~C2) ^ -1) ^ C1 --> (X & ~C2) ^ ~C1 Value *And = Builder.CreateAnd(X, ConstantExpr::getNot(C2)); @@ -3619,9 +3670,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { *CA == X->getType()->getScalarSizeInBits() - 1 && !match(C1, m_AllOnes())) { assert(!C1->isZeroValue() && "Unexpected xor with 0"); - Value *ICmp = - Builder.CreateICmpSGT(X, Constant::getAllOnesValue(X->getType())); - return SelectInst::Create(ICmp, Op1, Builder.CreateNot(Op1)); + Value *IsNotNeg = Builder.CreateIsNotNeg(X); + return SelectInst::Create(IsNotNeg, Op1, Builder.CreateNot(Op1)); } } @@ -3677,9 +3727,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { APInt FoldConst = C1->getValue().lshr(C2->getValue()); FoldConst ^= C3->getValue(); // Prepare the two operands. - auto *Opnd0 = cast(Builder.CreateLShr(X, C2)); - Opnd0->takeName(cast(Op0)); - Opnd0->setDebugLoc(I.getDebugLoc()); + auto *Opnd0 = Builder.CreateLShr(X, C2); + Opnd0->takeName(Op0); return BinaryOperator::CreateXor(Opnd0, ConstantInt::get(Ty, FoldConst)); } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp index 495493aab4b5..2540e545ae4d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp @@ -12,7 +12,6 @@ #include "InstCombineInternal.h" #include "llvm/IR/Instructions.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" using namespace llvm; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 05b28328afbf..67ef2e895b6c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -15,21 +15,18 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Attributes.h" @@ -74,7 +71,6 @@ #include #include #include -#include #include #include @@ -108,6 +104,19 @@ static Type *getPromotedType(Type *Ty) { return Ty; } +/// Recognize a memcpy/memmove from a trivially otherwise unused alloca. +/// TODO: This should probably be integrated with visitAllocSites, but that +/// requires a deeper change to allow either unread or unwritten objects. +static bool hasUndefSource(AnyMemTransferInst *MI) { + auto *Src = MI->getRawSource(); + while (isa(Src) || isa(Src)) { + if (!Src->hasOneUse()) + return false; + Src = cast(Src)->getOperand(0); + } + return isa(Src) && Src->hasOneUse(); +} + Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT); MaybeAlign CopyDstAlign = MI->getDestAlign(); @@ -132,6 +141,14 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { return MI; } + // If the source is provably undef, the memcpy/memmove doesn't do anything + // (unless the transfer is volatile). + if (hasUndefSource(MI) && !MI->isVolatile()) { + // Set the size of the copy to 0, it will be deleted on the next iteration. + MI->setLength(Constant::getNullValue(MI->getLength()->getType())); + return MI; + } + // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with // load/store. ConstantInt *MemOpLength = dyn_cast(MI->getLength()); @@ -241,6 +258,15 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) { return MI; } + // Remove memset with an undef value. + // FIXME: This is technically incorrect because it might overwrite a poison + // value. Change to PoisonValue once #52930 is resolved. + if (isa(MI->getValue())) { + // Set the size of the copy to 0, it will be deleted on the next iteration. + MI->setLength(Constant::getNullValue(MI->getLength()->getType())); + return MI; + } + // Extract the length and alignment and fill if they are constant. ConstantInt *LenC = dyn_cast(MI->getLength()); ConstantInt *FillC = dyn_cast(MI->getValue()); @@ -248,7 +274,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) { return nullptr; const uint64_t Len = LenC->getLimitedValue(); assert(Len && "0-sized memory setting should be removed already."); - const Align Alignment = assumeAligned(MI->getDestAlignment()); + const Align Alignment = MI->getDestAlign().valueOrOne(); // If it is an atomic and alignment is less than the size then we will // introduce the unaligned memory access which will be later transformed @@ -769,7 +795,7 @@ static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) { /// \p Result and a constant \p Overflow value. static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow) { - Constant *V[] = {UndefValue::get(Result->getType()), Overflow}; + Constant *V[] = {PoisonValue::get(Result->getType()), Overflow}; StructType *ST = cast(II->getType()); Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Result, 0); @@ -795,6 +821,10 @@ static Optional getKnownSign(Value *Op, Instruction *CxtI, if (Known.isNegative()) return true; + Value *X, *Y; + if (match(Op, m_NSWSub(m_Value(X), m_Value(Y)))) + return isImpliedByDomCondition(ICmpInst::ICMP_SLT, X, Y, CxtI, DL); + return isImpliedByDomCondition( ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL); } @@ -837,6 +867,67 @@ static Instruction *moveAddAfterMinMax(IntrinsicInst *II, return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1)) : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1)); } +/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value. +Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) { + Type *Ty = MinMax1.getType(); + + // We are looking for a tree of: + // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B)))) + // Where the min and max could be reversed + Instruction *MinMax2; + BinaryOperator *AddSub; + const APInt *MinValue, *MaxValue; + if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) { + if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue)))) + return nullptr; + } else if (match(&MinMax1, + m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) { + if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue)))) + return nullptr; + } else + return nullptr; + + // Check that the constants clamp a saturate, and that the new type would be + // sensible to convert to. + if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1) + return nullptr; + // In what bitwidth can this be treated as saturating arithmetics? + unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1; + // FIXME: This isn't quite right for vectors, but using the scalar type is a + // good first approximation for what should be done there. + if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth)) + return nullptr; + + // Also make sure that the inner min/max and the add/sub have one use. + if (!MinMax2->hasOneUse() || !AddSub->hasOneUse()) + return nullptr; + + // Create the new type (which can be a vector type) + Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth); + + Intrinsic::ID IntrinsicID; + if (AddSub->getOpcode() == Instruction::Add) + IntrinsicID = Intrinsic::sadd_sat; + else if (AddSub->getOpcode() == Instruction::Sub) + IntrinsicID = Intrinsic::ssub_sat; + else + return nullptr; + + // The two operands of the add/sub must be nsw-truncatable to the NewTy. This + // is usually achieved via a sext from a smaller type. + if (ComputeMaxSignificantBits(AddSub->getOperand(0), 0, AddSub) > + NewBitWidth || + ComputeMaxSignificantBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth) + return nullptr; + + // Finally create and return the sat intrinsic, truncated to the new type + Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy); + Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy); + Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy); + Value *Sat = Builder.CreateCall(F, {AT, BT}); + return CastInst::Create(Instruction::SExt, Sat, Ty); +} + /// If we have a clamp pattern like max (min X, 42), 41 -- where the output /// can only be one of two possible constant values -- turn that into a select @@ -879,6 +970,59 @@ static Instruction *foldClampRangeOfTwo(IntrinsicInst *II, return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1); } +/// If this min/max has a constant operand and an operand that is a matching +/// min/max with a constant operand, constant-fold the 2 constant operands. +static Instruction *reassociateMinMaxWithConstants(IntrinsicInst *II) { + Intrinsic::ID MinMaxID = II->getIntrinsicID(); + auto *LHS = dyn_cast(II->getArgOperand(0)); + if (!LHS || LHS->getIntrinsicID() != MinMaxID) + return nullptr; + + Constant *C0, *C1; + if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) || + !match(II->getArgOperand(1), m_ImmConstant(C1))) + return nullptr; + + // max (max X, C0), C1 --> max X, (max C0, C1) --> max X, NewC + ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(MinMaxID); + Constant *CondC = ConstantExpr::getICmp(Pred, C0, C1); + Constant *NewC = ConstantExpr::getSelect(CondC, C0, C1); + + Module *Mod = II->getModule(); + Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType()); + return CallInst::Create(MinMax, {LHS->getArgOperand(0), NewC}); +} + +/// If this min/max has a matching min/max operand with a constant, try to push +/// the constant operand into this instruction. This can enable more folds. +static Instruction * +reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, + InstCombiner::BuilderTy &Builder) { + // Match and capture a min/max operand candidate. + Value *X, *Y; + Constant *C; + Instruction *Inner; + if (!match(II, m_c_MaxOrMin(m_OneUse(m_CombineAnd( + m_Instruction(Inner), + m_MaxOrMin(m_Value(X), m_ImmConstant(C)))), + m_Value(Y)))) + return nullptr; + + // The inner op must match. Check for constants to avoid infinite loops. + Intrinsic::ID MinMaxID = II->getIntrinsicID(); + auto *InnerMM = dyn_cast(Inner); + if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID || + match(X, m_ImmConstant()) || match(Y, m_ImmConstant())) + return nullptr; + + // max (max X, C), Y --> max (max X, Y), C + Function *MinMax = + Intrinsic::getDeclaration(II->getModule(), MinMaxID, II->getType()); + Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y); + NewInner->takeName(Inner); + return CallInst::Create(MinMax, {NewInner, C}); +} + /// Reduce a sequence of min/max intrinsics with a common operand. static Instruction *factorizeMinMaxTree(IntrinsicInst *II) { // Match 3 of the same min/max ops. Example: umin(umin(), umin()). @@ -936,6 +1080,56 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) { return CallInst::Create(MinMax, { MinMaxOp, ThirdOp }); } +/// If all arguments of the intrinsic are unary shuffles with the same mask, +/// try to shuffle after the intrinsic. +static Instruction * +foldShuffledIntrinsicOperands(IntrinsicInst *II, + InstCombiner::BuilderTy &Builder) { + // TODO: This should be extended to handle other intrinsics like fshl, ctpop, + // etc. Use llvm::isTriviallyVectorizable() and related to determine + // which intrinsics are safe to shuffle? + switch (II->getIntrinsicID()) { + case Intrinsic::smax: + case Intrinsic::smin: + case Intrinsic::umax: + case Intrinsic::umin: + case Intrinsic::fma: + case Intrinsic::fshl: + case Intrinsic::fshr: + break; + default: + return nullptr; + } + + Value *X; + ArrayRef Mask; + if (!match(II->getArgOperand(0), + m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask)))) + return nullptr; + + // At least 1 operand must have 1 use because we are creating 2 instructions. + if (none_of(II->args(), [](Value *V) { return V->hasOneUse(); })) + return nullptr; + + // See if all arguments are shuffled with the same mask. + SmallVector NewArgs(II->arg_size()); + NewArgs[0] = X; + Type *SrcTy = X->getType(); + for (unsigned i = 1, e = II->arg_size(); i != e; ++i) { + if (!match(II->getArgOperand(i), + m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) || + X->getType() != SrcTy) + return nullptr; + NewArgs[i] = X; + } + + // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M + Instruction *FPI = isa(II) ? II : nullptr; + Value *NewIntrinsic = + Builder.CreateIntrinsic(II->getIntrinsicID(), SrcTy, NewArgs, FPI); + return new ShuffleVectorInst(NewIntrinsic, Mask); +} + /// CallInst simplification. This mostly only handles folding of intrinsic /// instructions. For normal calls, it allows visitCallBase to do the heavy /// lifting. @@ -943,14 +1137,14 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // Don't try to simplify calls without uses. It will not do anything useful, // but will result in the following folds being skipped. if (!CI.use_empty()) - if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI))) + if (Value *V = simplifyCall(&CI, SQ.getWithInstruction(&CI))) return replaceInstUsesWith(CI, V); if (isFreeCall(&CI, &TLI)) return visitFree(CI); - // If the caller function is nounwind, mark the call as nounwind, even if the - // callee isn't. + // If the caller function (i.e. us, the function that contains this CallInst) + // is nounwind, mark the call as nounwind, even if the callee isn't. if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) { CI.setDoesNotThrow(); return &CI; @@ -980,13 +1174,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Constant *NumBytes = dyn_cast(MI->getLength())) { if (NumBytes->isNullValue()) return eraseInstFromFunction(CI); - - if (ConstantInt *CI = dyn_cast(NumBytes)) - if (CI->getZExtValue() == 1) { - // Replace the instruction with just byte operations. We would - // transform other cases to loads/stores, but we don't know if - // alignment is sufficient. - } } // No other transformations apply to volatile transfers. @@ -1050,10 +1237,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return NewCall; } + // Unused constrained FP intrinsic calls may have declared side effect, which + // prevents it from being removed. In some cases however the side effect is + // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it + // returns a replacement, the call may be removed. + if (CI.use_empty() && isa(CI)) { + if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI))) + return eraseInstFromFunction(CI); + } + Intrinsic::ID IID = II->getIntrinsicID(); switch (IID) { case Intrinsic::objectsize: - if (Value *V = lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false)) + if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false)) return replaceInstUsesWith(CI, V); return nullptr; case Intrinsic::abs: { @@ -1224,6 +1420,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Instruction *R = FoldOpIntoSelect(*II, Sel)) return R; + if (Instruction *NewMinMax = reassociateMinMaxWithConstants(II)) + return NewMinMax; + + if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder)) + return R; + if (Instruction *NewMinMax = factorizeMinMaxTree(II)) return NewMinMax; @@ -1231,14 +1433,35 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } case Intrinsic::bswap: { Value *IIOperand = II->getArgOperand(0); - Value *X = nullptr; + + // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as + // inverse-shift-of-bswap: + // bswap (shl X, Y) --> lshr (bswap X), Y + // bswap (lshr X, Y) --> shl (bswap X), Y + Value *X, *Y; + if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) { + // The transform allows undef vector elements, so try a constant match + // first. If knownbits can handle that case, that clause could be removed. + unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits(); + const APInt *C; + if ((match(Y, m_APIntAllowUndef(C)) && (*C & 7) == 0) || + MaskedValueIsZero(Y, APInt::getLowBitsSet(BitWidth, 3))) { + Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X); + BinaryOperator::BinaryOps InverseShift = + cast(IIOperand)->getOpcode() == Instruction::Shl + ? Instruction::LShr + : Instruction::Shl; + return BinaryOperator::Create(InverseShift, NewSwap, Y); + } + } KnownBits Known = computeKnownBits(IIOperand, 0, II); uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8); uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8); + unsigned BW = Known.getBitWidth(); // bswap(x) -> shift(x) if x has exactly one "active byte" - if (Known.getBitWidth() - LZ - TZ == 8) { + if (BW - LZ - TZ == 8) { assert(LZ != TZ && "active byte cannot be in the middle"); if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x return BinaryOperator::CreateNUWShl( @@ -1250,8 +1473,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { - unsigned C = X->getType()->getScalarSizeInBits() - - IIOperand->getType()->getScalarSizeInBits(); + unsigned C = X->getType()->getScalarSizeInBits() - BW; Value *CV = ConstantInt::get(X->getType(), C); Value *V = Builder.CreateLShr(X, CV); return new TruncInst(V, IIOperand->getType()); @@ -1618,7 +1840,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } // Try to simplify the underlying FMul. - if (Value *V = SimplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1), + if (Value *V = simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1), II->getFastMathFlags(), SQ.getWithInstruction(II))) { auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2)); @@ -1649,7 +1871,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // Try to simplify the underlying FMul. We can only apply simplifications // that do not require rounding. - if (Value *V = SimplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1), + if (Value *V = simplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1), II->getFastMathFlags(), SQ.getWithInstruction(II))) { auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2)); @@ -2135,7 +2357,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } break; } - case Intrinsic::experimental_vector_insert: { + case Intrinsic::vector_insert: { Value *Vec = II->getArgOperand(0); Value *SubVec = II->getArgOperand(1); Value *Idx = II->getArgOperand(2); @@ -2181,7 +2403,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } break; } - case Intrinsic::experimental_vector_extract: { + case Intrinsic::vector_extract: { Value *Vec = II->getArgOperand(0); Value *Idx = II->getArgOperand(1); @@ -2456,11 +2678,15 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { default: { // Handle target specific intrinsics Optional V = targetInstCombineIntrinsic(*II); - if (V.hasValue()) + if (V) return V.getValue(); break; } } + + if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder)) + return Shuf; + // Some intrinsics (like experimental_gc_statepoint) can be used in invoke // context, so it is handled in visitCallBase and we should trigger it. return visitCallBase(*II); @@ -2648,47 +2874,56 @@ static IntrinsicInst *findInitTrampoline(Value *Callee) { return nullptr; } -void InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) { +bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, + const TargetLibraryInfo *TLI) { // Note: We only handle cases which can't be driven from generic attributes // here. So, for example, nonnull and noalias (which are common properties // of some allocation functions) are expected to be handled via annotation // of the respective allocator declaration with generic attributes. + bool Changed = false; - uint64_t Size; - ObjectSizeOpts Opts; - if (getObjectSize(&Call, Size, DL, TLI, Opts) && Size > 0) { - // TODO: We really should just emit deref_or_null here and then - // let the generic inference code combine that with nonnull. - if (Call.hasRetAttr(Attribute::NonNull)) - Call.addRetAttr(Attribute::getWithDereferenceableBytes( - Call.getContext(), Size)); - else - Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes( - Call.getContext(), Size)); + if (isAllocationFn(&Call, TLI)) { + uint64_t Size; + ObjectSizeOpts Opts; + if (getObjectSize(&Call, Size, DL, TLI, Opts) && Size > 0) { + // TODO: We really should just emit deref_or_null here and then + // let the generic inference code combine that with nonnull. + if (Call.hasRetAttr(Attribute::NonNull)) { + Changed = !Call.hasRetAttr(Attribute::Dereferenceable); + Call.addRetAttr( + Attribute::getWithDereferenceableBytes(Call.getContext(), Size)); + } else { + Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull); + Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), Size)); + } + } } // Add alignment attribute if alignment is a power of two constant. Value *Alignment = getAllocAlignment(&Call, TLI); if (!Alignment) - return; + return Changed; ConstantInt *AlignOpC = dyn_cast(Alignment); if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) { uint64_t AlignmentVal = AlignOpC->getZExtValue(); if (llvm::isPowerOf2_64(AlignmentVal)) { - Call.removeRetAttr(Attribute::Alignment); - Call.addRetAttr(Attribute::getWithAlignment(Call.getContext(), - Align(AlignmentVal))); + Align ExistingAlign = Call.getRetAlign().valueOrOne(); + Align NewAlign = Align(AlignmentVal); + if (NewAlign > ExistingAlign) { + Call.addRetAttr( + Attribute::getWithAlignment(Call.getContext(), NewAlign)); + Changed = true; + } } } + return Changed; } /// Improvements for call, callbr and invoke instructions. Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { - if (isAllocationFn(&Call, &TLI)) - annotateAnyAllocSite(Call, &TLI); - - bool Changed = false; + bool Changed = annotateAnyAllocSite(Call, &TLI); // Mark any parameters that are known to be non-null with the nonnull // attribute. This is helpful for inlining calls to functions with null @@ -2718,10 +2953,12 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { // If the callee is a pointer to a function, attempt to move any casts to the // arguments of the call/callbr/invoke. Value *Callee = Call.getCalledOperand(); - if (!isa(Callee) && transformConstExprCastCall(Call)) + Function *CalleeF = dyn_cast(Callee); + if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) && + transformConstExprCastCall(Call)) return nullptr; - if (Function *CalleeF = dyn_cast(Callee)) { + if (CalleeF) { // Remove the convergent attr on calls when the callee is not convergent. if (Call.isConvergent() && !CalleeF->isConvergent() && !CalleeF->isIntrinsic()) { @@ -2905,7 +3142,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { Optional Bundle = GCSP.getOperandBundle(LLVMContext::OB_gc_live); unsigned NumOfGCLives = LiveGcValues.size(); - if (!Bundle.hasValue() || NumOfGCLives == Bundle->Inputs.size()) + if (!Bundle || NumOfGCLives == Bundle->Inputs.size()) break; // We can reduce the size of gc live bundle. DenseMap Val2Idx; @@ -3026,8 +3263,7 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { // // Similarly, avoid folding away bitcasts of byval calls. if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) || - Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated) || - Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal)) + Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated)) return false; auto AI = Call.arg_begin(); @@ -3038,12 +3274,15 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL)) return false; // Cannot transform this parameter value. + // Check if there are any incompatible attributes we cannot drop safely. if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i)) - .overlaps(AttributeFuncs::typeIncompatible(ParamTy))) + .overlaps(AttributeFuncs::typeIncompatible( + ParamTy, AttributeFuncs::ASK_UNSAFE_TO_DROP))) return false; // Attribute not compatible with transformed value. - if (Call.isInAllocaArgument(i)) - return false; // Cannot transform to and from inalloca. + if (Call.isInAllocaArgument(i) || + CallerPAL.hasParamAttr(i, Attribute::Preallocated)) + return false; // Cannot transform to and from inalloca/preallocated. if (CallerPAL.hasParamAttr(i, Attribute::SwiftError)) return false; @@ -3052,13 +3291,18 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { // sized type and the sized type has to have the same size as the old type. if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) { PointerType *ParamPTy = dyn_cast(ParamTy); - if (!ParamPTy || !ParamPTy->getPointerElementType()->isSized()) + if (!ParamPTy) return false; - Type *CurElTy = Call.getParamByValType(i); - if (DL.getTypeAllocSize(CurElTy) != - DL.getTypeAllocSize(ParamPTy->getPointerElementType())) - return false; + if (!ParamPTy->isOpaque()) { + Type *ParamElTy = ParamPTy->getNonOpaquePointerElementType(); + if (!ParamElTy->isSized()) + return false; + + Type *CurElTy = Call.getParamByValType(i); + if (DL.getTypeAllocSize(CurElTy) != DL.getTypeAllocSize(ParamElTy)) + return false; + } } } @@ -3116,13 +3360,20 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy); Args.push_back(NewArg); - // Add any parameter attributes. - if (CallerPAL.hasParamAttr(i, Attribute::ByVal)) { - AttrBuilder AB(FT->getContext(), CallerPAL.getParamAttrs(i)); - AB.addByValAttr(NewArg->getType()->getPointerElementType()); + // Add any parameter attributes except the ones incompatible with the new + // type. Note that we made sure all incompatible ones are safe to drop. + AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible( + ParamTy, AttributeFuncs::ASK_SAFE_TO_DROP); + if (CallerPAL.hasParamAttr(i, Attribute::ByVal) && + !ParamTy->isOpaquePointerTy()) { + AttrBuilder AB(Ctx, CallerPAL.getParamAttrs(i).removeAttributes( + Ctx, IncompatibleAttrs)); + AB.addByValAttr(ParamTy->getNonOpaquePointerElementType()); ArgAttrs.push_back(AttributeSet::get(Ctx, AB)); - } else - ArgAttrs.push_back(CallerPAL.getParamAttrs(i)); + } else { + ArgAttrs.push_back( + CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs)); + } } // If the function takes more arguments than the call was taking, add them diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index f11ba8772f3c..e9e779b8619b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -13,13 +13,10 @@ #include "InstCombineInternal.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" -#include using namespace llvm; using namespace PatternMatch; @@ -39,8 +36,10 @@ static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale, if (BinaryOperator *I = dyn_cast(Val)) { // Cannot look past anything that might overflow. + // We specifically require nuw because we store the Scale in an unsigned + // and perform an unsigned divide on it. OverflowingBinaryOperator *OBI = dyn_cast(Val); - if (OBI && !OBI->hasNoUnsignedWrap() && !OBI->hasNoSignedWrap()) { + if (OBI && !OBI->hasNoUnsignedWrap()) { Scale = 1; Offset = 0; return Val; @@ -639,10 +638,12 @@ Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) { /// Try to narrow the width of math or bitwise logic instructions by pulling a /// truncate ahead of binary operators. -/// TODO: Transforms for truncated shifts should be moved into here. Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) { Type *SrcTy = Trunc.getSrcTy(); Type *DestTy = Trunc.getType(); + unsigned SrcWidth = SrcTy->getScalarSizeInBits(); + unsigned DestWidth = DestTy->getScalarSizeInBits(); + if (!isa(SrcTy) && !shouldChangeType(SrcTy, DestTy)) return nullptr; @@ -685,7 +686,30 @@ Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) { } break; } - + case Instruction::LShr: + case Instruction::AShr: { + // trunc (*shr (trunc A), C) --> trunc(*shr A, C) + Value *A; + Constant *C; + if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) { + unsigned MaxShiftAmt = SrcWidth - DestWidth; + // If the shift is small enough, all zero/sign bits created by the shift + // are removed by the trunc. + if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE, + APInt(SrcWidth, MaxShiftAmt)))) { + auto *OldShift = cast(Trunc.getOperand(0)); + bool IsExact = OldShift->isExact(); + auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true); + ShAmt = Constant::mergeUndefsWith(ShAmt, C); + Value *Shift = + OldShift->getOpcode() == Instruction::AShr + ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact) + : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact); + return CastInst::CreateTruncOrBitCast(Shift, DestTy); + } + } + break; + } default: break; } @@ -873,26 +897,6 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { // TODO: Mask high bits with 'and'. } - // trunc (*shr (trunc A), C) --> trunc(*shr A, C) - if (match(Src, m_OneUse(m_Shr(m_Trunc(m_Value(A)), m_Constant(C))))) { - unsigned MaxShiftAmt = SrcWidth - DestWidth; - - // If the shift is small enough, all zero/sign bits created by the shift are - // removed by the trunc. - if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE, - APInt(SrcWidth, MaxShiftAmt)))) { - auto *OldShift = cast(Src); - bool IsExact = OldShift->isExact(); - auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true); - ShAmt = Constant::mergeUndefsWith(ShAmt, C); - Value *Shift = - OldShift->getOpcode() == Instruction::AShr - ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact) - : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact); - return CastInst::CreateTruncOrBitCast(Shift, DestTy); - } - } - if (Instruction *I = narrowBinOp(Trunc)) return I; @@ -971,7 +975,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { Attribute Attr = Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange); if (Optional MaxVScale = Attr.getVScaleRangeMax()) { - if (Log2_32(MaxVScale.getValue()) < DestWidth) { + if (Log2_32(*MaxVScale) < DestWidth) { Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1)); return replaceInstUsesWith(Trunc, VScale); } @@ -986,13 +990,18 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext) // If we are just checking for a icmp eq of a single bit and zext'ing it // to an integer, then shift the bit to the appropriate place and then // cast to integer to avoid the comparison. + + // FIXME: This set of transforms does not check for extra uses and/or creates + // an extra instruction (an optional final cast is not included + // in the transform comments). We may also want to favor icmp over + // shifts in cases of equal instructions because icmp has better + // analysis in general (invert the transform). + const APInt *Op1CV; if (match(Cmp->getOperand(1), m_APInt(Op1CV))) { // zext (x x>>u31 true if signbit set. - // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. - if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) || - (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnes())) { + if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) { Value *In = Cmp->getOperand(0); Value *Sh = ConstantInt::get(In->getType(), In->getType()->getScalarSizeInBits() - 1); @@ -1000,11 +1009,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext) if (In->getType() != Zext.getType()) In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/); - if (Cmp->getPredicate() == ICmpInst::ICMP_SGT) { - Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder.CreateXor(In, One, In->getName() + ".not"); - } - return replaceInstUsesWith(Zext, In); } @@ -1080,7 +1084,7 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext) KnownBits KnownLHS = computeKnownBits(LHS, 0, &Zext); KnownBits KnownRHS = computeKnownBits(RHS, 0, &Zext); - if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) { + if (KnownLHS == KnownRHS) { APInt KnownBits = KnownLHS.Zero | KnownLHS.One; APInt UnknownBit = ~KnownBits; if (UnknownBit.countPopulation() == 1) { @@ -1343,7 +1347,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) { Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange); if (Optional MaxVScale = Attr.getVScaleRangeMax()) { unsigned TypeWidth = Src->getType()->getScalarSizeInBits(); - if (Log2_32(MaxVScale.getValue()) < TypeWidth) { + if (Log2_32(*MaxVScale) < TypeWidth) { Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1)); return replaceInstUsesWith(CI, VScale); } @@ -1506,10 +1510,8 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - // If we know that the value being extended is positive, we can use a zext - // instead. - KnownBits Known = computeKnownBits(Src, 0, &CI); - if (Known.isNonNegative()) + // If the value being extended is zero or positive, use a zext instead. + if (isKnownNonNegative(Src, DL, 0, &AC, &CI, &DT)) return CastInst::Create(Instruction::ZExt, Src, DestTy); // Try to extend the entire expression tree to the wide destination type. @@ -1597,14 +1599,20 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) { // Splatting a bit of constant-index across a value: // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1 - // TODO: If the dest type is different, use a cast (adjust use check). + // If the dest type is different, use a cast (adjust use check). if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)), - m_SpecificInt(SrcBitSize - 1)))) && - X->getType() == DestTy) { - Constant *ShlAmtC = ConstantInt::get(DestTy, DestBitSize - SrcBitSize); - Constant *AshrAmtC = ConstantInt::get(DestTy, DestBitSize - 1); - Value *Shl = Builder.CreateShl(X, ShlAmtC); - return BinaryOperator::CreateAShr(Shl, AshrAmtC); + m_SpecificInt(SrcBitSize - 1))))) { + Type *XTy = X->getType(); + unsigned XBitSize = XTy->getScalarSizeInBits(); + Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize); + Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1); + if (XTy == DestTy) + return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC), + AshrAmtC); + if (cast(Src)->getOperand(0)->hasOneUse()) { + Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC); + return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true); + } } if (match(Src, m_VScale(DL))) { @@ -1612,7 +1620,7 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) { CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) { Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange); if (Optional MaxVScale = Attr.getVScaleRangeMax()) { - if (Log2_32(MaxVScale.getValue()) < (SrcBitSize - 1)) { + if (Log2_32(*MaxVScale) < (SrcBitSize - 1)) { Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1)); return replaceInstUsesWith(CI, VScale); } @@ -1712,7 +1720,7 @@ static Type *getMinimumFPType(Value *V) { /// Return true if the cast from integer to FP can be proven to be exact for all /// possible inputs (the conversion does not lose any precision). -static bool isKnownExactCastIntToFP(CastInst &I) { +static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) { CastInst::CastOps Opcode = I.getOpcode(); assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) && "Unexpected cast"); @@ -1749,6 +1757,12 @@ static bool isKnownExactCastIntToFP(CastInst &I) { // TODO: // Try harder to find if the source integer type has less significant bits. // For example, compute number of sign bits or compute low bit mask. + KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I); + int LowBits = + (int)SrcTy->getScalarSizeInBits() - SrcKnown.countMinLeadingZeros(); + if (LowBits <= DestNumSigBits) + return true; + return false; } @@ -1929,7 +1943,7 @@ Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) { Value *Src = FPT.getOperand(0); if (isa(Src) || isa(Src)) { auto *FPCast = cast(Src); - if (isKnownExactCastIntToFP(*FPCast)) + if (isKnownExactCastIntToFP(*FPCast, *this)) return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty); } @@ -1943,7 +1957,7 @@ Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) { Value *Src = FPExt.getOperand(0); if (isa(Src) || isa(Src)) { auto *FPCast = cast(Src); - if (isKnownExactCastIntToFP(*FPCast)) + if (isKnownExactCastIntToFP(*FPCast, *this)) return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty); } @@ -1970,13 +1984,13 @@ Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) { // This means this is also safe for a signed input and unsigned output, since // a negative input would lead to undefined behavior. - if (!isKnownExactCastIntToFP(*OpI)) { + if (!isKnownExactCastIntToFP(*OpI, *this)) { // The first cast may not round exactly based on the source integer width // and FP width, but the overflow UB rules can still allow this to fold. // If the destination type is narrow, that means the intermediate FP value // must be large enough to hold the source value exactly. // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior. - int OutputSize = (int)DestType->getScalarSizeInBits() - IsOutputSigned; + int OutputSize = (int)DestType->getScalarSizeInBits(); if (OutputSize > OpI->getType()->getFPMantissaWidth()) return nullptr; } @@ -2150,14 +2164,10 @@ optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, // Now that the element types match, get the shuffle mask and RHS of the // shuffle to use, which depends on whether we're increasing or decreasing the // size of the input. - SmallVector ShuffleMaskStorage; + auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq(0, SrcElts)); ArrayRef ShuffleMask; Value *V2; - // Produce an identify shuffle mask for the src vector. - ShuffleMaskStorage.resize(SrcElts); - std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0); - if (SrcElts > DestElts) { // If we're shrinking the number of elements (rewriting an integer // truncate), just shuffle in the elements corresponding to the least @@ -2278,6 +2288,8 @@ static bool collectInsertionElements(Value *V, unsigned Shift, switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: + if (I->getOperand(0)->getType()->isVectorTy()) + return false; return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, isBigEndian); case Instruction::ZExt: @@ -2351,21 +2363,28 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, /// usually not type-specific like scalar integer or scalar floating-point. static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC) { - // TODO: Create and use a pattern matcher for ExtractElementInst. - auto *ExtElt = dyn_cast(BitCast.getOperand(0)); - if (!ExtElt || !ExtElt->hasOneUse()) + Value *VecOp, *Index; + if (!match(BitCast.getOperand(0), + m_OneUse(m_ExtractElt(m_Value(VecOp), m_Value(Index))))) return nullptr; // The bitcast must be to a vectorizable type, otherwise we can't make a new // type to extract from. Type *DestType = BitCast.getType(); - if (!VectorType::isValidElementType(DestType)) - return nullptr; + VectorType *VecType = cast(VecOp->getType()); + if (VectorType::isValidElementType(DestType)) { + auto *NewVecType = VectorType::get(DestType, VecType); + auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc"); + return ExtractElementInst::Create(NewBC, Index); + } + + // Only solve DestType is vector to avoid inverse transform in visitBitCast. + // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest) + auto *FixedVType = dyn_cast(VecType); + if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1) + return CastInst::Create(Instruction::BitCast, VecOp, DestType); - auto *NewVecType = VectorType::get(DestType, ExtElt->getVectorOperandType()); - auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(), - NewVecType, "bc"); - return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand()); + return nullptr; } /// Change the type of a bitwise logic operation if we can eliminate a bitcast. @@ -2373,8 +2392,8 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder) { Type *DestTy = BitCast.getType(); BinaryOperator *BO; - if (!DestTy->isIntOrIntVectorTy() || - !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) || + + if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) || !BO->isBitwiseLogicOp()) return nullptr; @@ -2384,6 +2403,32 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast, if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy()) return nullptr; + if (DestTy->isFPOrFPVectorTy()) { + Value *X, *Y; + // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y)) + if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) && + match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(Y))))) { + if (X->getType()->isFPOrFPVectorTy() && + Y->getType()->isIntOrIntVectorTy()) { + Value *CastedOp = + Builder.CreateBitCast(BO->getOperand(0), Y->getType()); + Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y); + return CastInst::CreateBitOrPointerCast(NewBO, DestTy); + } + if (X->getType()->isIntOrIntVectorTy() && + Y->getType()->isFPOrFPVectorTy()) { + Value *CastedOp = + Builder.CreateBitCast(BO->getOperand(1), X->getType()); + Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X); + return CastInst::CreateBitOrPointerCast(NewBO, DestTy); + } + } + return nullptr; + } + + if (!DestTy->isIntOrIntVectorTy()) + return nullptr; + Value *X; if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy && !isa(X)) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index e45be5745fcc..d1f89973caa1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -17,13 +17,11 @@ #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" @@ -105,10 +103,14 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) { /// /// If AndCst is non-null, then the loaded value is masked with that constant /// before doing the comparison. This handles cases like "A[i]&4 == 0". -Instruction * -InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, - GlobalVariable *GV, CmpInst &ICI, - ConstantInt *AndCst) { +Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( + LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, + ConstantInt *AndCst) { + if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() || + GV->getValueType() != GEP->getSourceElementType() || + !GV->isConstant() || !GV->hasDefinitiveInitializer()) + return nullptr; + Constant *Init = GV->getInitializer(); if (!isa(Init) && !isa(Init)) return nullptr; @@ -188,8 +190,11 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, if (!Elt) return nullptr; // If this is indexing an array of structures, get the structure element. - if (!LaterIndices.empty()) - Elt = ConstantExpr::getExtractValue(Elt, LaterIndices); + if (!LaterIndices.empty()) { + Elt = ConstantFoldExtractValueInstruction(Elt, LaterIndices); + if (!Elt) + return nullptr; + } // If the element is masked, handle it. if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); @@ -757,7 +762,7 @@ getAsConstantIndexedAddress(Type *ElemTy, Value *V, const DataLayout &DL) { V = GEP->getOperand(0); Constant *GEPIndex = static_cast(GEP->getOperand(1)); Index = ConstantExpr::getAdd( - Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType)); + Index, ConstantExpr::getSExtOrTrunc(GEPIndex, IndexType)); continue; } break; @@ -887,7 +892,8 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, if (PtrBase != GEPRHS->getOperand(0)) { bool IndicesTheSame = GEPLHS->getNumOperands() == GEPRHS->getNumOperands() && - GEPLHS->getType() == GEPRHS->getType() && + GEPLHS->getPointerOperand()->getType() == + GEPRHS->getPointerOperand()->getType() && GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType(); if (IndicesTheSame) for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) @@ -950,7 +956,8 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); - if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { + if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands() && + GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType()) { // If the GEPs only differ by one index, compare it. unsigned NumDifferences = 0; // Keep track of # differences. unsigned DiffOperand = 0; // The operand that differs. @@ -1001,8 +1008,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, } Instruction *InstCombinerImpl::foldAllocaCmp(ICmpInst &ICI, - const AllocaInst *Alloca, - const Value *Other) { + const AllocaInst *Alloca) { assert(ICI.isEquality() && "Cannot fold non-equality comparison."); // It would be tempting to fold away comparisons between allocas and any @@ -1071,10 +1077,9 @@ Instruction *InstCombinerImpl::foldAllocaCmp(ICmpInst &ICI, } } - Type *CmpTy = CmpInst::makeCmpResultType(Other->getType()); - return replaceInstUsesWith( - ICI, - ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate()))); + auto *Res = ConstantInt::get(ICI.getType(), + !CmpInst::isTrueWhenEqual(ICI.getPredicate())); + return replaceInstUsesWith(ICI, Res); } /// Fold "icmp pred (X+C), X". @@ -1376,8 +1381,7 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) { // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0) if (Pred == ICmpInst::ICMP_SGT) { Value *A, *B; - SelectPatternResult SPR = matchSelectPattern(Cmp.getOperand(0), A, B); - if (SPR.Flavor == SPF_SMIN) { + if (match(Cmp.getOperand(0), m_SMin(m_Value(A), m_Value(B)))) { if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT)) return new ICmpInst(Pred, B, Cmp.getOperand(1)); if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT)) @@ -1530,7 +1534,7 @@ Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) { return nullptr; } -/// Fold icmp (trunc X, Y), C. +/// Fold icmp (trunc X), C. Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp, TruncInst *Trunc, const APInt &C) { @@ -1547,6 +1551,16 @@ Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp, unsigned DstBits = Trunc->getType()->getScalarSizeInBits(), SrcBits = X->getType()->getScalarSizeInBits(); if (Cmp.isEquality() && Trunc->hasOneUse()) { + // Canonicalize to a mask and wider compare if the wide type is suitable: + // (trunc X to i8) == C --> (X & 0xff) == (zext C) + if (!X->getType()->isVectorTy() && shouldChangeType(DstBits, SrcBits)) { + Constant *Mask = ConstantInt::get(X->getType(), + APInt::getLowBitsSet(SrcBits, DstBits)); + Value *And = Builder.CreateAnd(X, Mask); + Constant *WideC = ConstantInt::get(X->getType(), C.zext(SrcBits)); + return new ICmpInst(Pred, And, WideC); + } + // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all // of the high bits truncated out of x are known. KnownBits Known = computeKnownBits(X, 0, &Cmp); @@ -1865,15 +1879,13 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp, // Try to optimize things like "A[i] & 42 == 0" to index computations. Value *X = And->getOperand(0); Value *Y = And->getOperand(1); - if (auto *LI = dyn_cast(X)) - if (auto *GEP = dyn_cast(LI->getOperand(0))) - if (auto *GV = dyn_cast(GEP->getOperand(0))) - if (GV->isConstant() && GV->hasDefinitiveInitializer() && - !LI->isVolatile() && isa(Y)) { - ConstantInt *C2 = cast(Y); - if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, Cmp, C2)) + if (auto *C2 = dyn_cast(Y)) + if (auto *LI = dyn_cast(X)) + if (auto *GEP = dyn_cast(LI->getOperand(0))) + if (auto *GV = dyn_cast(GEP->getOperand(0))) + if (Instruction *Res = + foldCmpLoadFromIndexedGlobal(LI, GEP, GV, Cmp, C2)) return Res; - } if (!Cmp.isEquality()) return nullptr; @@ -2216,22 +2228,41 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp, if (Cmp.isEquality() && Shr->isExact() && C.isZero()) return new ICmpInst(Pred, X, Cmp.getOperand(1)); - const APInt *ShiftVal; - if (Cmp.isEquality() && match(Shr->getOperand(0), m_APInt(ShiftVal))) - return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftVal); - - const APInt *ShiftAmt; - if (!match(Shr->getOperand(1), m_APInt(ShiftAmt))) + bool IsAShr = Shr->getOpcode() == Instruction::AShr; + const APInt *ShiftValC; + if (match(Shr->getOperand(0), m_APInt(ShiftValC))) { + if (Cmp.isEquality()) + return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftValC); + + // If the shifted constant is a power-of-2, test the shift amount directly: + // (ShiftValC >> X) >u C --> X > X) X >=u (LZ(C-1) - LZ(ShiftValC)) + if (!IsAShr && ShiftValC->isPowerOf2() && + (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_ULT)) { + bool IsUGT = Pred == CmpInst::ICMP_UGT; + assert(ShiftValC->uge(C) && "Expected simplify of compare"); + assert((IsUGT || !C.isZero()) && "Expected X u< 0 to simplify"); + + unsigned CmpLZ = + IsUGT ? C.countLeadingZeros() : (C - 1).countLeadingZeros(); + unsigned ShiftLZ = ShiftValC->countLeadingZeros(); + Constant *NewC = ConstantInt::get(Shr->getType(), CmpLZ - ShiftLZ); + auto NewPred = IsUGT ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; + return new ICmpInst(NewPred, Shr->getOperand(1), NewC); + } + } + + const APInt *ShiftAmtC; + if (!match(Shr->getOperand(1), m_APInt(ShiftAmtC))) return nullptr; // Check that the shift amount is in range. If not, don't perform undefined // shifts. When the shift is visited it will be simplified. unsigned TypeBits = C.getBitWidth(); - unsigned ShAmtVal = ShiftAmt->getLimitedValue(TypeBits); + unsigned ShAmtVal = ShiftAmtC->getLimitedValue(TypeBits); if (ShAmtVal >= TypeBits || ShAmtVal == 0) return nullptr; - bool IsAShr = Shr->getOpcode() == Instruction::AShr; bool IsExact = Shr->isExact(); Type *ShrTy = Shr->getType(); // TODO: If we could guarantee that InstSimplify would handle all of the @@ -2256,8 +2287,11 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp, } if (Pred == CmpInst::ICMP_UGT) { // icmp ugt (ashr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1 + // 'C + 1 << ShAmtC' can overflow as a signed number, so the 2nd + // clause accounts for that pattern. APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1; - if ((ShiftedC + 1).ashr(ShAmtVal) == (C + 1)) + if ((ShiftedC + 1).ashr(ShAmtVal) == (C + 1) || + (C + 1).shl(ShAmtVal).isMinSignedValue()) return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); } @@ -2337,7 +2371,8 @@ Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp, // constant power-of-2 value: // (X % pow2C) sgt/slt 0 const ICmpInst::Predicate Pred = Cmp.getPredicate(); - if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT) + if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT && + Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE) return nullptr; // TODO: The one-use check is standard because we do not typically want to @@ -2347,7 +2382,15 @@ Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp, return nullptr; const APInt *DivisorC; - if (!C.isZero() || !match(SRem->getOperand(1), m_Power2(DivisorC))) + if (!match(SRem->getOperand(1), m_Power2(DivisorC))) + return nullptr; + + // For cmp_sgt/cmp_slt only zero valued C is handled. + // For cmp_eq/cmp_ne only positive valued C is handled. + if (((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT) && + !C.isZero()) || + ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && + !C.isStrictlyPositive())) return nullptr; // Mask off the sign bit and the modulo bits (low-bits). @@ -2356,6 +2399,9 @@ Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp, Constant *MaskC = ConstantInt::get(Ty, SignMask | (*DivisorC - 1)); Value *And = Builder.CreateAnd(SRem->getOperand(0), MaskC); + if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) + return new ICmpInst(Pred, And, ConstantInt::get(Ty, C)); + // For 'is positive?' check that the sign-bit is clear and at least 1 masked // bit is set. Example: // (i8 X % 32) s> 0 --> (X & 159) s> 0 @@ -2372,26 +2418,30 @@ Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp, Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp, BinaryOperator *UDiv, const APInt &C) { + ICmpInst::Predicate Pred = Cmp.getPredicate(); + Value *X = UDiv->getOperand(0); + Value *Y = UDiv->getOperand(1); + Type *Ty = UDiv->getType(); + const APInt *C2; - if (!match(UDiv->getOperand(0), m_APInt(C2))) + if (!match(X, m_APInt(C2))) return nullptr; assert(*C2 != 0 && "udiv 0, X should have been simplified already."); // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1)) - Value *Y = UDiv->getOperand(1); - if (Cmp.getPredicate() == ICmpInst::ICMP_UGT) { + if (Pred == ICmpInst::ICMP_UGT) { assert(!C.isMaxValue() && "icmp ugt X, UINT_MAX should have been simplified already."); return new ICmpInst(ICmpInst::ICMP_ULE, Y, - ConstantInt::get(Y->getType(), C2->udiv(C + 1))); + ConstantInt::get(Ty, C2->udiv(C + 1))); } // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C) - if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) { + if (Pred == ICmpInst::ICMP_ULT) { assert(C != 0 && "icmp ult X, 0 should have been simplified already."); return new ICmpInst(ICmpInst::ICMP_UGT, Y, - ConstantInt::get(Y->getType(), C2->udiv(C))); + ConstantInt::get(Ty, C2->udiv(C))); } return nullptr; @@ -2401,6 +2451,28 @@ Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp, Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, BinaryOperator *Div, const APInt &C) { + ICmpInst::Predicate Pred = Cmp.getPredicate(); + Value *X = Div->getOperand(0); + Value *Y = Div->getOperand(1); + Type *Ty = Div->getType(); + bool DivIsSigned = Div->getOpcode() == Instruction::SDiv; + + // If unsigned division and the compare constant is bigger than + // UMAX/2 (negative), there's only one pair of values that satisfies an + // equality check, so eliminate the division: + // (X u/ Y) == C --> (X == C) && (Y == 1) + // (X u/ Y) != C --> (X != C) || (Y != 1) + // Similarly, if signed division and the compare constant is exactly SMIN: + // (X s/ Y) == SMIN --> (X == SMIN) && (Y == 1) + // (X s/ Y) != SMIN --> (X != SMIN) || (Y != 1) + if (Cmp.isEquality() && Div->hasOneUse() && C.isSignBitSet() && + (!DivIsSigned || C.isMinSignedValue())) { + Value *XBig = Builder.CreateICmp(Pred, X, ConstantInt::get(Ty, C)); + Value *YOne = Builder.CreateICmp(Pred, Y, ConstantInt::get(Ty, 1)); + auto Logic = Pred == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; + return BinaryOperator::Create(Logic, XBig, YOne); + } + // Fold: icmp pred ([us]div X, C2), C -> range test // Fold this div into the comparison, producing a range check. // Determine, based on the divide type, what the range is being @@ -2408,7 +2480,7 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, // it, otherwise compute the range [low, hi) bounding the new value. // See: InsertRangeTest above for the kinds of replacements possible. const APInt *C2; - if (!match(Div->getOperand(1), m_APInt(C2))) + if (!match(Y, m_APInt(C2))) return nullptr; // FIXME: If the operand types don't match the type of the divide @@ -2419,7 +2491,6 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, // (x /u C2) getOpcode() == Instruction::SDiv; if (!Cmp.isEquality() && DivIsSigned != Cmp.isSigned()) return nullptr; @@ -2441,8 +2512,6 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, // instruction that we're folding. bool ProdOV = (DivIsSigned ? Prod.sdiv(*C2) : Prod.udiv(*C2)) != C; - ICmpInst::Predicate Pred = Cmp.getPredicate(); - // If the division is known to be exact, then there is no remainder from the // divide, so the covered range size is unit, otherwise it is the divisor. APInt RangeSize = Div->isExact() ? APInt(C2->getBitWidth(), 1) : *C2; @@ -2457,7 +2526,7 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, int LoOverflow = 0, HiOverflow = 0; APInt LoBound, HiBound; - if (!DivIsSigned) { // udiv + if (!DivIsSigned) { // udiv // e.g. X/5 op 3 --> [15, 20) LoBound = Prod; HiOverflow = LoOverflow = ProdOV; @@ -2472,7 +2541,7 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, LoBound = -(RangeSize - 1); HiBound = RangeSize; } else if (C.isStrictlyPositive()) { // (X / pos) op pos - LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) + LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) HiOverflow = LoOverflow = ProdOV; if (!HiOverflow) HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true); @@ -2492,18 +2561,19 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, // e.g. X/-5 op 0 --> [-4, 5) LoBound = RangeSize + 1; HiBound = -RangeSize; - if (HiBound == *C2) { // -INTMIN = INTMIN - HiOverflow = 1; // [INTMIN+1, overflow) - HiBound = APInt(); // e.g. X/INTMIN = 0 --> X > INTMIN + if (HiBound == *C2) { // -INTMIN = INTMIN + HiOverflow = 1; // [INTMIN+1, overflow) + HiBound = APInt(); // e.g. X/INTMIN = 0 --> X > INTMIN } } else if (C.isStrictlyPositive()) { // (X / neg) op pos // e.g. X/-5 op 3 --> [-19, -14) HiBound = Prod + 1; HiOverflow = LoOverflow = ProdOV ? -1 : 0; if (!LoOverflow) - LoOverflow = addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0; - } else { // (X / neg) op neg - LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) + LoOverflow = + addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1 : 0; + } else { // (X / neg) op neg + LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) LoOverflow = HiOverflow = ProdOV; if (!HiOverflow) HiOverflow = subWithOverflow(HiBound, Prod, RangeSize, true); @@ -2513,54 +2583,47 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, Pred = ICmpInst::getSwappedPredicate(Pred); } - Value *X = Div->getOperand(0); switch (Pred) { - default: llvm_unreachable("Unhandled icmp opcode!"); - case ICmpInst::ICMP_EQ: - if (LoOverflow && HiOverflow) - return replaceInstUsesWith(Cmp, Builder.getFalse()); - if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, - ConstantInt::get(Div->getType(), LoBound)); - if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, - ConstantInt::get(Div->getType(), HiBound)); - return replaceInstUsesWith( - Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true)); - case ICmpInst::ICMP_NE: - if (LoOverflow && HiOverflow) - return replaceInstUsesWith(Cmp, Builder.getTrue()); - if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, - ConstantInt::get(Div->getType(), LoBound)); - if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, - ConstantInt::get(Div->getType(), HiBound)); - return replaceInstUsesWith(Cmp, - insertRangeTest(X, LoBound, HiBound, - DivIsSigned, false)); - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: - if (LoOverflow == +1) // Low bound is greater than input range. - return replaceInstUsesWith(Cmp, Builder.getTrue()); - if (LoOverflow == -1) // Low bound is less than input range. - return replaceInstUsesWith(Cmp, Builder.getFalse()); - return new ICmpInst(Pred, X, ConstantInt::get(Div->getType(), LoBound)); - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: - if (HiOverflow == +1) // High bound greater than input range. - return replaceInstUsesWith(Cmp, Builder.getFalse()); - if (HiOverflow == -1) // High bound less than input range. - return replaceInstUsesWith(Cmp, Builder.getTrue()); - if (Pred == ICmpInst::ICMP_UGT) - return new ICmpInst(ICmpInst::ICMP_UGE, X, - ConstantInt::get(Div->getType(), HiBound)); - return new ICmpInst(ICmpInst::ICMP_SGE, X, - ConstantInt::get(Div->getType(), HiBound)); + default: + llvm_unreachable("Unhandled icmp predicate!"); + case ICmpInst::ICMP_EQ: + if (LoOverflow && HiOverflow) + return replaceInstUsesWith(Cmp, Builder.getFalse()); + if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, + X, ConstantInt::get(Ty, LoBound)); + if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + X, ConstantInt::get(Ty, HiBound)); + return replaceInstUsesWith( + Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true)); + case ICmpInst::ICMP_NE: + if (LoOverflow && HiOverflow) + return replaceInstUsesWith(Cmp, Builder.getTrue()); + if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + X, ConstantInt::get(Ty, LoBound)); + if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, + X, ConstantInt::get(Ty, HiBound)); + return replaceInstUsesWith( + Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, false)); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + if (LoOverflow == +1) // Low bound is greater than input range. + return replaceInstUsesWith(Cmp, Builder.getTrue()); + if (LoOverflow == -1) // Low bound is less than input range. + return replaceInstUsesWith(Cmp, Builder.getFalse()); + return new ICmpInst(Pred, X, ConstantInt::get(Ty, LoBound)); + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + if (HiOverflow == +1) // High bound greater than input range. + return replaceInstUsesWith(Cmp, Builder.getFalse()); + if (HiOverflow == -1) // High bound less than input range. + return replaceInstUsesWith(Cmp, Builder.getTrue()); + if (Pred == ICmpInst::ICMP_UGT) + return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, HiBound)); + return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, HiBound)); } return nullptr; @@ -2593,18 +2656,24 @@ Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp, !subWithOverflow(SubResult, *C2, C, Cmp.isSigned())) return new ICmpInst(SwappedPred, Y, ConstantInt::get(Ty, SubResult)); + // X - Y == 0 --> X == Y. + // X - Y != 0 --> X != Y. + // TODO: We allow this with multiple uses as long as the other uses are not + // in phis. The phi use check is guarding against a codegen regression + // for a loop test. If the backend could undo this (and possibly + // subsequent transforms), we would not need this hack. + if (Cmp.isEquality() && C.isZero() && + none_of((Sub->users()), [](const User *U) { return isa(U); })) + return new ICmpInst(Pred, X, Y); + // The following transforms are only worth it if the only user of the subtract // is the icmp. // TODO: This is an artificial restriction for all of the transforms below - // that only need a single replacement icmp. + // that only need a single replacement icmp. Can these use the phi test + // like the transform above here? if (!Sub->hasOneUse()) return nullptr; - // X - Y == 0 --> X == Y. - // X - Y != 0 --> X != Y. - if (Cmp.isEquality() && C.isZero()) - return new ICmpInst(Pred, X, Y); - if (Sub->hasNoSignedWrap()) { // (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y) if (Pred == ICmpInst::ICMP_SGT && C.isAllOnes()) @@ -2855,10 +2924,13 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) { ICmpInst::Predicate Pred = Cmp.getPredicate(); Value *Op1 = Cmp.getOperand(1); Value *BCSrcOp = Bitcast->getOperand(0); + Type *SrcType = Bitcast->getSrcTy(); + Type *DstType = Bitcast->getType(); - // Make sure the bitcast doesn't change the number of vector elements. - if (Bitcast->getSrcTy()->getScalarSizeInBits() == - Bitcast->getDestTy()->getScalarSizeInBits()) { + // Make sure the bitcast doesn't change between scalar and vector and + // doesn't change the number of vector elements. + if (SrcType->isVectorTy() == DstType->isVectorTy() && + SrcType->getScalarSizeInBits() == DstType->getScalarSizeInBits()) { // Zero-equality and sign-bit checks are preserved through sitofp + bitcast. Value *X; if (match(BCSrcOp, m_SIToFP(m_Value(X)))) { @@ -2903,8 +2975,7 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) { Type *XType = X->getType(); // We can't currently handle Power style floating point operations here. - if (!(XType->isPPC_FP128Ty() || BCSrcOp->getType()->isPPC_FP128Ty())) { - + if (!(XType->isPPC_FP128Ty() || SrcType->isPPC_FP128Ty())) { Type *NewType = Builder.getIntNTy(XType->getScalarSizeInBits()); if (auto *XVTy = dyn_cast(XType)) NewType = VectorType::get(NewType, XVTy->getElementCount()); @@ -2922,21 +2993,19 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) { // Test to see if the operands of the icmp are casted versions of other // values. If the ptr->ptr cast can be stripped off both arguments, do so. - if (Bitcast->getType()->isPointerTy() && - (isa(Op1) || isa(Op1))) { + if (DstType->isPointerTy() && (isa(Op1) || isa(Op1))) { // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast // so eliminate it as well. if (auto *BC2 = dyn_cast(Op1)) Op1 = BC2->getOperand(0); - Op1 = Builder.CreateBitCast(Op1, BCSrcOp->getType()); + Op1 = Builder.CreateBitCast(Op1, SrcType); return new ICmpInst(Pred, BCSrcOp, Op1); } const APInt *C; - if (!match(Cmp.getOperand(1), m_APInt(C)) || - !Bitcast->getType()->isIntegerTy() || - !Bitcast->getSrcTy()->isIntOrIntVectorTy()) + if (!match(Cmp.getOperand(1), m_APInt(C)) || !DstType->isIntegerTy() || + !SrcType->isIntOrIntVectorTy()) return nullptr; // If this is checking if all elements of a vector compare are set or not, @@ -2948,9 +3017,8 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) { // TODO: Try harder to reduce compare of 2 freely invertible operands? if (Cmp.isEquality() && C->isAllOnes() && Bitcast->hasOneUse() && isFreeToInvert(BCSrcOp, BCSrcOp->hasOneUse())) { - Type *ScalarTy = Bitcast->getType(); - Value *Cast = Builder.CreateBitCast(Builder.CreateNot(BCSrcOp), ScalarTy); - return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(ScalarTy)); + Value *Cast = Builder.CreateBitCast(Builder.CreateNot(BCSrcOp), DstType); + return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(DstType)); } // If this is checking if all elements of an extended vector are clear or not, @@ -2978,7 +3046,7 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) { if (match(BCSrcOp, m_Shuffle(m_Value(Vec), m_Undef(), m_Mask(Mask)))) { // Check whether every element of Mask is the same constant if (is_splat(Mask)) { - auto *VecTy = cast(BCSrcOp->getType()); + auto *VecTy = cast(SrcType); auto *EltTy = cast(VecTy->getElementType()); if (C->isSplat(EltTy->getBitWidth())) { // Fold the icmp based on the value of C @@ -3000,83 +3068,31 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) { /// where X is some kind of instruction. Instruction *InstCombinerImpl::foldICmpInstWithConstant(ICmpInst &Cmp) { const APInt *C; - if (!match(Cmp.getOperand(1), m_APInt(C))) - return nullptr; - if (auto *BO = dyn_cast(Cmp.getOperand(0))) { - switch (BO->getOpcode()) { - case Instruction::Xor: - if (Instruction *I = foldICmpXorConstant(Cmp, BO, *C)) - return I; - break; - case Instruction::And: - if (Instruction *I = foldICmpAndConstant(Cmp, BO, *C)) - return I; - break; - case Instruction::Or: - if (Instruction *I = foldICmpOrConstant(Cmp, BO, *C)) - return I; - break; - case Instruction::Mul: - if (Instruction *I = foldICmpMulConstant(Cmp, BO, *C)) - return I; - break; - case Instruction::Shl: - if (Instruction *I = foldICmpShlConstant(Cmp, BO, *C)) - return I; - break; - case Instruction::LShr: - case Instruction::AShr: - if (Instruction *I = foldICmpShrConstant(Cmp, BO, *C)) - return I; - break; - case Instruction::SRem: - if (Instruction *I = foldICmpSRemConstant(Cmp, BO, *C)) - return I; - break; - case Instruction::UDiv: - if (Instruction *I = foldICmpUDivConstant(Cmp, BO, *C)) - return I; - LLVM_FALLTHROUGH; - case Instruction::SDiv: - if (Instruction *I = foldICmpDivConstant(Cmp, BO, *C)) + if (match(Cmp.getOperand(1), m_APInt(C))) { + if (auto *BO = dyn_cast(Cmp.getOperand(0))) + if (Instruction *I = foldICmpBinOpWithConstant(Cmp, BO, *C)) return I; - break; - case Instruction::Sub: - if (Instruction *I = foldICmpSubConstant(Cmp, BO, *C)) - return I; - break; - case Instruction::Add: - if (Instruction *I = foldICmpAddConstant(Cmp, BO, *C)) - return I; - break; - default: - break; - } - // TODO: These folds could be refactored to be part of the above calls. - if (Instruction *I = foldICmpBinOpEqualityWithConstant(Cmp, BO, *C)) - return I; - } - // Match against CmpInst LHS being instructions other than binary operators. + if (auto *SI = dyn_cast(Cmp.getOperand(0))) + // For now, we only support constant integers while folding the + // ICMP(SELECT)) pattern. We can extend this to support vector of integers + // similar to the cases handled by binary ops above. + if (auto *ConstRHS = dyn_cast(Cmp.getOperand(1))) + if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS)) + return I; - if (auto *SI = dyn_cast(Cmp.getOperand(0))) { - // For now, we only support constant integers while folding the - // ICMP(SELECT)) pattern. We can extend this to support vector of integers - // similar to the cases handled by binary ops above. - if (ConstantInt *ConstRHS = dyn_cast(Cmp.getOperand(1))) - if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS)) + if (auto *TI = dyn_cast(Cmp.getOperand(0))) + if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C)) return I; - } - if (auto *TI = dyn_cast(Cmp.getOperand(0))) { - if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C)) - return I; + if (auto *II = dyn_cast(Cmp.getOperand(0))) + if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C)) + return I; } - if (auto *II = dyn_cast(Cmp.getOperand(0))) - if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C)) - return I; + if (match(Cmp.getOperand(1), m_APIntAllowUndef(C))) + return foldICmpInstWithConstantAllowUndef(Cmp, *C); return nullptr; } @@ -3233,12 +3249,6 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant( case Intrinsic::fshl: case Intrinsic::fshr: if (II->getArgOperand(0) == II->getArgOperand(1)) { - // (rot X, ?) == 0/-1 --> X == 0/-1 - // TODO: This transform is safe to re-use undef elts in a vector, but - // the constant value passed in by the caller doesn't allow that. - if (C.isZero() || C.isAllOnes()) - return new ICmpInst(Pred, II->getArgOperand(0), Cmp.getOperand(1)); - const APInt *RotAmtC; // ror(X, RotAmtC) == C --> X == rol(C, RotAmtC) // rol(X, RotAmtC) == C --> X == ror(C, RotAmtC) @@ -3311,6 +3321,89 @@ static Instruction *foldICmpIntrinsicWithIntrinsic(ICmpInst &Cmp) { return nullptr; } +/// Try to fold integer comparisons with a constant operand: icmp Pred X, C +/// where X is some kind of instruction and C is AllowUndef. +/// TODO: Move more folds which allow undef to this function. +Instruction * +InstCombinerImpl::foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp, + const APInt &C) { + const ICmpInst::Predicate Pred = Cmp.getPredicate(); + if (auto *II = dyn_cast(Cmp.getOperand(0))) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::fshl: + case Intrinsic::fshr: + if (Cmp.isEquality() && II->getArgOperand(0) == II->getArgOperand(1)) { + // (rot X, ?) == 0/-1 --> X == 0/-1 + if (C.isZero() || C.isAllOnes()) + return new ICmpInst(Pred, II->getArgOperand(0), Cmp.getOperand(1)); + } + break; + } + } + + return nullptr; +} + +/// Fold an icmp with BinaryOp and constant operand: icmp Pred BO, C. +Instruction *InstCombinerImpl::foldICmpBinOpWithConstant(ICmpInst &Cmp, + BinaryOperator *BO, + const APInt &C) { + switch (BO->getOpcode()) { + case Instruction::Xor: + if (Instruction *I = foldICmpXorConstant(Cmp, BO, C)) + return I; + break; + case Instruction::And: + if (Instruction *I = foldICmpAndConstant(Cmp, BO, C)) + return I; + break; + case Instruction::Or: + if (Instruction *I = foldICmpOrConstant(Cmp, BO, C)) + return I; + break; + case Instruction::Mul: + if (Instruction *I = foldICmpMulConstant(Cmp, BO, C)) + return I; + break; + case Instruction::Shl: + if (Instruction *I = foldICmpShlConstant(Cmp, BO, C)) + return I; + break; + case Instruction::LShr: + case Instruction::AShr: + if (Instruction *I = foldICmpShrConstant(Cmp, BO, C)) + return I; + break; + case Instruction::SRem: + if (Instruction *I = foldICmpSRemConstant(Cmp, BO, C)) + return I; + break; + case Instruction::UDiv: + if (Instruction *I = foldICmpUDivConstant(Cmp, BO, C)) + return I; + LLVM_FALLTHROUGH; + case Instruction::SDiv: + if (Instruction *I = foldICmpDivConstant(Cmp, BO, C)) + return I; + break; + case Instruction::Sub: + if (Instruction *I = foldICmpSubConstant(Cmp, BO, C)) + return I; + break; + case Instruction::Add: + if (Instruction *I = foldICmpAddConstant(Cmp, BO, C)) + return I; + break; + default: + break; + } + + // TODO: These folds could be refactored to be part of the above calls. + return foldICmpBinOpEqualityWithConstant(Cmp, BO, C); +} + /// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C. Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, IntrinsicInst *II, @@ -3406,64 +3499,6 @@ Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) { if (Instruction *NV = foldOpIntoPhi(I, cast(LHSI))) return NV; break; - case Instruction::Select: { - // If either operand of the select is a constant, we can fold the - // comparison into the select arms, which will cause one to be - // constant folded and the select turned into a bitwise or. - Value *Op1 = nullptr, *Op2 = nullptr; - ConstantInt *CI = nullptr; - - auto SimplifyOp = [&](Value *V) { - Value *Op = nullptr; - if (Constant *C = dyn_cast(V)) { - Op = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); - } else if (RHSC->isNullValue()) { - // If null is being compared, check if it can be further simplified. - Op = SimplifyICmpInst(I.getPredicate(), V, RHSC, SQ); - } - return Op; - }; - Op1 = SimplifyOp(LHSI->getOperand(1)); - if (Op1) - CI = dyn_cast(Op1); - - Op2 = SimplifyOp(LHSI->getOperand(2)); - if (Op2) - CI = dyn_cast(Op2); - - // We only want to perform this transformation if it will not lead to - // additional code. This is true if either both sides of the select - // fold to a constant (in which case the icmp is replaced with a select - // which will usually simplify) or this is the only user of the - // select (in which case we are trading a select+icmp for a simpler - // select+icmp) or all uses of the select can be replaced based on - // dominance information ("Global cases"). - bool Transform = false; - if (Op1 && Op2) - Transform = true; - else if (Op1 || Op2) { - // Local case - if (LHSI->hasOneUse()) - Transform = true; - // Global cases - else if (CI && !CI->isZero()) - // When Op1 is constant try replacing select with second operand. - // Otherwise Op2 is constant and try replacing select with first - // operand. - Transform = - replacedSelectWithOperand(cast(LHSI), &I, Op1 ? 2 : 1); - } - if (Transform) { - if (!Op1) - Op1 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC, - I.getName()); - if (!Op2) - Op2 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC, - I.getName()); - return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); - } - break; - } case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 if (RHSC->isNullValue() && @@ -3476,19 +3511,72 @@ Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) { case Instruction::Load: // Try to optimize things like "A[i] > 4" to index computations. if (GetElementPtrInst *GEP = - dyn_cast(LHSI->getOperand(0))) { + dyn_cast(LHSI->getOperand(0))) if (GlobalVariable *GV = dyn_cast(GEP->getOperand(0))) - if (GV->isConstant() && GV->hasDefinitiveInitializer() && - !cast(LHSI)->isVolatile()) - if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I)) - return Res; - } + if (Instruction *Res = + foldCmpLoadFromIndexedGlobal(cast(LHSI), GEP, GV, I)) + return Res; break; } return nullptr; } +Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred, + SelectInst *SI, Value *RHS, + const ICmpInst &I) { + // Try to fold the comparison into the select arms, which will cause the + // select to be converted into a logical and/or. + auto SimplifyOp = [&](Value *Op, bool SelectCondIsTrue) -> Value * { + if (Value *Res = simplifyICmpInst(Pred, Op, RHS, SQ)) + return Res; + if (Optional Impl = isImpliedCondition(SI->getCondition(), Pred, Op, + RHS, DL, SelectCondIsTrue)) + return ConstantInt::get(I.getType(), *Impl); + return nullptr; + }; + + ConstantInt *CI = nullptr; + Value *Op1 = SimplifyOp(SI->getOperand(1), true); + if (Op1) + CI = dyn_cast(Op1); + + Value *Op2 = SimplifyOp(SI->getOperand(2), false); + if (Op2) + CI = dyn_cast(Op2); + + // We only want to perform this transformation if it will not lead to + // additional code. This is true if either both sides of the select + // fold to a constant (in which case the icmp is replaced with a select + // which will usually simplify) or this is the only user of the + // select (in which case we are trading a select+icmp for a simpler + // select+icmp) or all uses of the select can be replaced based on + // dominance information ("Global cases"). + bool Transform = false; + if (Op1 && Op2) + Transform = true; + else if (Op1 || Op2) { + // Local case + if (SI->hasOneUse()) + Transform = true; + // Global cases + else if (CI && !CI->isZero()) + // When Op1 is constant try replacing select with second operand. + // Otherwise Op2 is constant and try replacing select with first + // operand. + Transform = replacedSelectWithOperand(SI, &I, Op1 ? 2 : 1); + } + if (Transform) { + if (!Op1) + Op1 = Builder.CreateICmp(Pred, SI->getOperand(1), RHS, I.getName()); + if (!Op2) + Op2 = Builder.CreateICmp(Pred, SI->getOperand(2), RHS, I.getName()); + return SelectInst::Create(SI->getOperand(0), Op1, Op2); + } + + return nullptr; +} + /// Some comparisons can be simplified. /// In this case, we are looking for comparisons that look like /// a check for a lossy truncation. @@ -3756,7 +3844,7 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, // Can we fold (XShAmt+YShAmt) ? auto *NewShAmt = dyn_cast_or_null( - SimplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false, + simplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false, /*isNUW=*/false, SQ.getWithInstruction(&I))); if (!NewShAmt) return nullptr; @@ -3956,6 +4044,24 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) return new ICmpInst(Pred, X, Builder.CreateNot(Op0)); + { + // (Op1 + X) + C u= Op1 --> ~C - X u= Op1 + Constant *C; + if (match(Op0, m_OneUse(m_Add(m_c_Add(m_Specific(Op1), m_Value(X)), + m_ImmConstant(C)))) && + (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) { + Constant *C2 = ConstantExpr::getNot(C); + return new ICmpInst(Pred, Builder.CreateSub(C2, X), Op1); + } + // Op0 u>/u<= (Op0 + X) + C --> Op0 u>/u<= ~C - X + if (match(Op1, m_OneUse(m_Add(m_c_Add(m_Specific(Op0), m_Value(X)), + m_ImmConstant(C)))) && + (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) { + Constant *C2 = ConstantExpr::getNot(C); + return new ICmpInst(Pred, Op0, Builder.CreateSub(C2, X)); + } + } + { // Similar to above: an unsigned overflow comparison may use offset + mask: // ((Op1 + C) & C) u< Op1 --> Op1 != 0 @@ -4114,29 +4220,38 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, // icmp (A + C1), (C + C2) -> icmp A, (C + C3) // s.t. C3 = C2 - C1 if (A && C && NoOp0WrapProblem && NoOp1WrapProblem && - (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) - if (ConstantInt *C1 = dyn_cast(B)) - if (ConstantInt *C2 = dyn_cast(D)) { - const APInt &AP1 = C1->getValue(); - const APInt &AP2 = C2->getValue(); - if (AP1.isNegative() == AP2.isNegative()) { - APInt AP1Abs = C1->getValue().abs(); - APInt AP2Abs = C2->getValue().abs(); - if (AP1Abs.uge(AP2Abs)) { - ConstantInt *C3 = Builder.getInt(AP1 - AP2); - bool HasNUW = BO0->hasNoUnsignedWrap() && C3->getValue().ule(AP1); - bool HasNSW = BO0->hasNoSignedWrap(); - Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW); - return new ICmpInst(Pred, NewAdd, C); - } else { - ConstantInt *C3 = Builder.getInt(AP2 - AP1); - bool HasNUW = BO1->hasNoUnsignedWrap() && C3->getValue().ule(AP2); - bool HasNSW = BO1->hasNoSignedWrap(); - Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW); - return new ICmpInst(Pred, A, NewAdd); - } - } + (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) { + const APInt *AP1, *AP2; + // TODO: Support non-uniform vectors. + // TODO: Allow undef passthrough if B AND D's element is undef. + if (match(B, m_APIntAllowUndef(AP1)) && match(D, m_APIntAllowUndef(AP2)) && + AP1->isNegative() == AP2->isNegative()) { + APInt AP1Abs = AP1->abs(); + APInt AP2Abs = AP2->abs(); + if (AP1Abs.uge(AP2Abs)) { + APInt Diff = *AP1 - *AP2; + bool HasNUW = BO0->hasNoUnsignedWrap() && Diff.ule(*AP1); + bool HasNSW = BO0->hasNoSignedWrap(); + Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff); + Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW); + return new ICmpInst(Pred, NewAdd, C); + } else { + APInt Diff = *AP2 - *AP1; + bool HasNUW = BO1->hasNoUnsignedWrap() && Diff.ule(*AP2); + bool HasNSW = BO1->hasNoSignedWrap(); + Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff); + Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW); + return new ICmpInst(Pred, A, NewAdd); } + } + Constant *Cst1, *Cst2; + if (match(B, m_ImmConstant(Cst1)) && match(D, m_ImmConstant(Cst2)) && + ICmpInst::isEquality(Pred)) { + Constant *Diff = ConstantExpr::getSub(Cst2, Cst1); + Value *NewAdd = Builder.CreateAdd(C, Diff); + return new ICmpInst(Pred, A, NewAdd); + } + } // Analyze the case when either Op0 or Op1 is a sub instruction. // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). @@ -4524,18 +4639,21 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) { // (A >> C) == (B >> C) --> (A^B) u< (1 << C) // For lshr and ashr pairs. - if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) && - match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) || - (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) && - match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) { - unsigned TypeBits = Cst1->getBitWidth(); - unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); + const APInt *AP1, *AP2; + if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_APIntAllowUndef(AP1)))) && + match(Op1, m_OneUse(m_LShr(m_Value(B), m_APIntAllowUndef(AP2))))) || + (match(Op0, m_OneUse(m_AShr(m_Value(A), m_APIntAllowUndef(AP1)))) && + match(Op1, m_OneUse(m_AShr(m_Value(B), m_APIntAllowUndef(AP2)))))) { + if (AP1 != AP2) + return nullptr; + unsigned TypeBits = AP1->getBitWidth(); + unsigned ShAmt = AP1->getLimitedValue(TypeBits); if (ShAmt < TypeBits && ShAmt != 0) { ICmpInst::Predicate NewPred = Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted"); APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt); - return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal)); + return new ICmpInst(NewPred, Xor, ConstantInt::get(A->getType(), CmpVal)); } } @@ -4665,8 +4783,7 @@ static Instruction *foldICmpWithTrunc(ICmpInst &ICmp, return nullptr; } -static Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp, - InstCombiner::BuilderTy &Builder) { +Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) { assert(isa(ICmp.getOperand(0)) && "Expected cast for operand 0"); auto *CastOp0 = cast(ICmp.getOperand(0)); Value *X; @@ -4675,25 +4792,37 @@ static Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp, bool IsSignedExt = CastOp0->getOpcode() == Instruction::SExt; bool IsSignedCmp = ICmp.isSigned(); - if (auto *CastOp1 = dyn_cast(ICmp.getOperand(1))) { - // If the signedness of the two casts doesn't agree (i.e. one is a sext - // and the other is a zext), then we can't handle this. - // TODO: This is too strict. We can handle some predicates (equality?). - if (CastOp0->getOpcode() != CastOp1->getOpcode()) - return nullptr; + + // icmp Pred (ext X), (ext Y) + Value *Y; + if (match(ICmp.getOperand(1), m_ZExtOrSExt(m_Value(Y)))) { + bool IsZext0 = isa(ICmp.getOperand(0)); + bool IsZext1 = isa(ICmp.getOperand(1)); + + // If we have mismatched casts, treat the zext of a non-negative source as + // a sext to simulate matching casts. Otherwise, we are done. + // TODO: Can we handle some predicates (equality) without non-negative? + if (IsZext0 != IsZext1) { + if ((IsZext0 && isKnownNonNegative(X, DL, 0, &AC, &ICmp, &DT)) || + (IsZext1 && isKnownNonNegative(Y, DL, 0, &AC, &ICmp, &DT))) + IsSignedExt = true; + else + return nullptr; + } // Not an extension from the same type? - Value *Y = CastOp1->getOperand(0); Type *XTy = X->getType(), *YTy = Y->getType(); if (XTy != YTy) { // One of the casts must have one use because we are creating a new cast. - if (!CastOp0->hasOneUse() && !CastOp1->hasOneUse()) + if (!ICmp.getOperand(0)->hasOneUse() && !ICmp.getOperand(1)->hasOneUse()) return nullptr; // Extend the narrower operand to the type of the wider operand. + CastInst::CastOps CastOpcode = + IsSignedExt ? Instruction::SExt : Instruction::ZExt; if (XTy->getScalarSizeInBits() < YTy->getScalarSizeInBits()) - X = Builder.CreateCast(CastOp0->getOpcode(), X, YTy); + X = Builder.CreateCast(CastOpcode, X, YTy); else if (YTy->getScalarSizeInBits() < XTy->getScalarSizeInBits()) - Y = Builder.CreateCast(CastOp0->getOpcode(), Y, XTy); + Y = Builder.CreateCast(CastOpcode, Y, XTy); else return nullptr; } @@ -4742,7 +4871,7 @@ static Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp, // or could not be determined to be equal (in the case of a constant // expression), so the constant cannot be represented in the shorter type. // All the cases that fold to true or false will have already been handled - // by SimplifyICmpInst, so only deal with the tricky case. + // by simplifyICmpInst, so only deal with the tricky case. if (IsSignedCmp || !IsSignedExt || !isa(C)) return nullptr; @@ -4811,7 +4940,7 @@ Instruction *InstCombinerImpl::foldICmpWithCastOp(ICmpInst &ICmp) { if (Instruction *R = foldICmpWithTrunc(ICmp, Builder)) return R; - return foldICmpWithZextOrSext(ICmp, Builder); + return foldICmpWithZextOrSext(ICmp); } static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS) { @@ -5449,35 +5578,23 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { LHS = Op0; Value *X; - if (match(LHS, m_Shl(m_One(), m_Value(X)))) { - APInt ValToCheck = Op0KnownZeroInverted; + const APInt *C1; + if (match(LHS, m_Shl(m_Power2(C1), m_Value(X)))) { Type *XTy = X->getType(); - if (ValToCheck.isPowerOf2()) { - // ((1 << X) & 8) == 0 -> X != 3 - // ((1 << X) & 8) != 0 -> X == 3 - auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros()); - auto NewPred = ICmpInst::getInversePredicate(Pred); - return new ICmpInst(NewPred, X, CmpC); - } else if ((++ValToCheck).isPowerOf2()) { - // ((1 << X) & 7) == 0 -> X >= 3 - // ((1 << X) & 7) != 0 -> X < 3 - auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros()); + unsigned Log2C1 = C1->countTrailingZeros(); + APInt C2 = Op0KnownZeroInverted; + APInt C2Pow2 = (C2 & ~(*C1 - 1)) + *C1; + if (C2Pow2.isPowerOf2()) { + // iff (C1 is pow2) & ((C2 & ~(C1-1)) + C1) is pow2): + // ((C1 << X) & C2) == 0 -> X >= (Log2(C2+C1) - Log2(C1)) + // ((C1 << X) & C2) != 0 -> X < (Log2(C2+C1) - Log2(C1)) + unsigned Log2C2 = C2Pow2.countTrailingZeros(); + auto *CmpC = ConstantInt::get(XTy, Log2C2 - Log2C1); auto NewPred = Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGE : CmpInst::ICMP_ULT; return new ICmpInst(NewPred, X, CmpC); } } - - // Check if the LHS is 8 >>u x and the result is a power of 2 like 1. - const APInt *CI; - if (Op0KnownZeroInverted.isOne() && - match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) { - // ((8 >>u X) & 1) == 0 -> X != 3 - // ((8 >>u X) & 1) != 0 -> X == 3 - unsigned CmpVal = CI->countTrailingZeros(); - auto NewPred = ICmpInst::getInversePredicate(Pred); - return new ICmpInst(NewPred, X, ConstantInt::get(X->getType(), CmpVal)); - } } break; } @@ -5557,6 +5674,28 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return nullptr; } +/// If one operand of an icmp is effectively a bool (value range of {0,1}), +/// then try to reduce patterns based on that limit. +static Instruction *foldICmpUsingBoolRange(ICmpInst &I, + InstCombiner::BuilderTy &Builder) { + Value *X, *Y; + ICmpInst::Predicate Pred; + + // X must be 0 and bool must be true for "ULT": + // X (X == 0) & Y + if (match(&I, m_c_ICmp(Pred, m_Value(X), m_OneUse(m_ZExt(m_Value(Y))))) && + Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULT) + return BinaryOperator::CreateAnd(Builder.CreateIsNull(X), Y); + + // X must be 0 or bool must be true for "ULE": + // X <=u (sext i1 Y) --> (X == 0) | Y + if (match(&I, m_c_ICmp(Pred, m_Value(X), m_OneUse(m_SExt(m_Value(Y))))) && + Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE) + return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y); + + return nullptr; +} + llvm::Optional> InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, Constant *C) { @@ -5948,7 +6087,7 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { Changed = true; } - if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, Q)) + if (Value *V = simplifyICmpInst(I.getPredicate(), Op0, Op1, Q)) return replaceInstUsesWith(I, V); // Comparing -val or val with non-zero is the same as just comparing val @@ -5984,6 +6123,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpWithDominatingICmp(I)) return Res; + if (Instruction *Res = foldICmpUsingBoolRange(I, Builder)) + return Res; + if (Instruction *Res = foldICmpUsingKnownBits(I)) return Res; @@ -6057,14 +6199,21 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *NI = foldGEPICmp(GEP, Op0, I.getSwappedPredicate(), I)) return NI; + if (auto *SI = dyn_cast(Op0)) + if (Instruction *NI = foldSelectICmp(I.getPredicate(), SI, Op1, I)) + return NI; + if (auto *SI = dyn_cast(Op1)) + if (Instruction *NI = foldSelectICmp(I.getSwappedPredicate(), SI, Op0, I)) + return NI; + // Try to optimize equality comparisons against alloca-based pointers. if (Op0->getType()->isPointerTy() && I.isEquality()) { assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?"); if (auto *Alloca = dyn_cast(getUnderlyingObject(Op0))) - if (Instruction *New = foldAllocaCmp(I, Alloca, Op1)) + if (Instruction *New = foldAllocaCmp(I, Alloca)) return New; if (auto *Alloca = dyn_cast(getUnderlyingObject(Op1))) - if (Instruction *New = foldAllocaCmp(I, Alloca, Op0)) + if (Instruction *New = foldAllocaCmp(I, Alloca)) return New; } @@ -6529,6 +6678,25 @@ static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) { } } +static Instruction *foldFCmpFNegCommonOp(FCmpInst &I) { + CmpInst::Predicate Pred = I.getPredicate(); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Canonicalize fneg as Op1. + if (match(Op0, m_FNeg(m_Value())) && !match(Op1, m_FNeg(m_Value()))) { + std::swap(Op0, Op1); + Pred = I.getSwappedPredicate(); + } + + if (!match(Op1, m_FNeg(m_Specific(Op0)))) + return nullptr; + + // Replace the negated operand with 0.0: + // fcmp Pred Op0, -Op0 --> fcmp Pred Op0, 0.0 + Constant *Zero = ConstantFP::getNullValue(Op0->getType()); + return new FCmpInst(Pred, Op0, Zero, "", &I); +} + Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) { bool Changed = false; @@ -6542,7 +6710,7 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) { const CmpInst::Predicate Pred = I.getPredicate(); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyFCmpInst(Pred, Op0, Op1, I.getFastMathFlags(), + if (Value *V = simplifyFCmpInst(Pred, Op0, Op1, I.getFastMathFlags(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -6587,6 +6755,9 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) { if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I); + if (Instruction *R = foldFCmpFNegCommonOp(I)) + return R; + // Test if the FCmpInst instruction is used exclusively by a select as // part of a minimum or maximum operation. If so, refrain from doing // any other folding. This helps out other analyses which understand @@ -6632,10 +6803,9 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) { case Instruction::Load: if (auto *GEP = dyn_cast(LHSI->getOperand(0))) if (auto *GV = dyn_cast(GEP->getOperand(0))) - if (GV->isConstant() && GV->hasDefinitiveInitializer() && - !cast(LHSI)->isVolatile()) - if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I)) - return Res; + if (Instruction *Res = foldCmpLoadFromIndexedGlobal( + cast(LHSI), GEP, GV, I)) + return Res; break; } } @@ -6657,7 +6827,6 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) { if (match(Op1, m_FPExt(m_Value(Y))) && X->getType() == Y->getType()) return new FCmpInst(Pred, X, Y, "", &I); - // fcmp (fpext X), C -> fcmp X, (fptrunc C) if fptrunc is lossless const APFloat *C; if (match(Op1, m_APFloat(C))) { const fltSemantics &FPSem = @@ -6666,6 +6835,31 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) { APFloat TruncC = *C; TruncC.convert(FPSem, APFloat::rmNearestTiesToEven, &Lossy); + if (Lossy) { + // X can't possibly equal the higher-precision constant, so reduce any + // equality comparison. + // TODO: Other predicates can be handled via getFCmpCode(). + switch (Pred) { + case FCmpInst::FCMP_OEQ: + // X is ordered and equal to an impossible constant --> false + return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + case FCmpInst::FCMP_ONE: + // X is ordered and not equal to an impossible constant --> ordered + return new FCmpInst(FCmpInst::FCMP_ORD, X, + ConstantFP::getNullValue(X->getType())); + case FCmpInst::FCMP_UEQ: + // X is unordered or equal to an impossible constant --> unordered + return new FCmpInst(FCmpInst::FCMP_UNO, X, + ConstantFP::getNullValue(X->getType())); + case FCmpInst::FCMP_UNE: + // X is unordered or not equal to an impossible constant --> true + return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + default: + break; + } + } + + // fcmp (fpext X), C -> fcmp X, (fptrunc C) if fptrunc is lossless // Avoid lossy conversions and denormals. // Zero is a special case that's OK to convert. APFloat Fabs = TruncC; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 7743b4c41555..271154bb3f5a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -71,7 +71,7 @@ public: : InstCombiner(Worklist, Builder, MinimizeSize, AA, AC, TLI, TTI, DT, ORE, BFI, PSI, DL, LI) {} - virtual ~InstCombinerImpl() {} + virtual ~InstCombinerImpl() = default; /// Run the combiner over the entire worklist until it is empty. /// @@ -172,7 +172,8 @@ public: Instruction *visitLandingPadInst(LandingPadInst &LI); Instruction *visitVAEndInst(VAEndInst &I); Value *pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI); - bool freezeDominatedUses(FreezeInst &FI); + bool freezeOtherUses(FreezeInst &FI); + Instruction *foldFreezeIntoRecurrence(FreezeInst &I, PHINode *PN); Instruction *visitFreeze(FreezeInst &I); /// Specify what to return for unhandled instructions. @@ -192,7 +193,7 @@ public: const Twine &Suffix = ""); private: - void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI); + bool annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI); bool isDesirableIntType(unsigned BitWidth) const; bool shouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const; bool shouldChangeType(Type *From, Type *To) const; @@ -325,7 +326,7 @@ private: Instruction *narrowMathIfNoOverflow(BinaryOperator &I); Instruction *narrowFunnelShift(TruncInst &Trunc); Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN); - Instruction *matchSAddSubSat(Instruction &MinMax1); + Instruction *matchSAddSubSat(IntrinsicInst &MinMax1); Instruction *foldNot(BinaryOperator &I); void freelyInvertAllUsersOf(Value *V); @@ -344,16 +345,20 @@ private: const CastInst *CI2); Value *simplifyIntToPtrRoundTripCast(Value *Val); - Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &And); - Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Or); + Value *foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &I, + bool IsAnd, bool IsLogical = false); Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Xor); Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd); + Value *foldAndOrOfICmpsUsingRanges(ICmpInst *ICmp1, ICmpInst *ICmp2, + bool IsAnd); + /// Optimize (fcmp)&(fcmp) or (fcmp)|(fcmp). /// NOTE: Unlike most of instcombine, this returns a Value which should /// already be inserted into the function. - Value *foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd); + Value *foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd, + bool IsLogicalSelect = false); Value *foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI, bool IsAnd, @@ -407,7 +412,7 @@ public: // If we are replacing the instruction with itself, this must be in a // segment of unreachable code, so just clobber the instruction. if (&I == V) - V = UndefValue::get(I.getType()); + V = PoisonValue::get(I.getType()); LLVM_DEBUG(dbgs() << "IC: Replacing " << I << "\n" << " with " << *V << '\n'); @@ -435,7 +440,7 @@ public: void CreateNonTerminatorUnreachable(Instruction *InsertAt) { auto &Ctx = InsertAt->getContext(); new StoreInst(ConstantInt::getTrue(Ctx), - UndefValue::get(Type::getInt1PtrTy(Ctx)), + PoisonValue::get(Type::getInt1PtrTy(Ctx)), InsertAt); } @@ -621,7 +626,8 @@ public: /// other operand, try to fold the binary operator into the select arguments. /// This also works for Cast instructions, which obviously do not have a /// second operand. - Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); + Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI, + bool FoldWithMultiUse = false); /// This is a convenience wrapper function for the above two functions. Instruction *foldBinOpIntoSelectOrPhi(BinaryOperator &I); @@ -650,22 +656,27 @@ public: Instruction *foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); - Instruction *foldAllocaCmp(ICmpInst &ICI, const AllocaInst *Alloca, - const Value *Other); - Instruction *foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, + Instruction *foldSelectICmp(ICmpInst::Predicate Pred, SelectInst *SI, + Value *RHS, const ICmpInst &I); + Instruction *foldAllocaCmp(ICmpInst &ICI, const AllocaInst *Alloca); + Instruction *foldCmpLoadFromIndexedGlobal(LoadInst *LI, + GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst = nullptr); Instruction *foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, Constant *RHSC); Instruction *foldICmpAddOpConst(Value *X, const APInt &C, ICmpInst::Predicate Pred); - Instruction *foldICmpWithCastOp(ICmpInst &ICI); + Instruction *foldICmpWithCastOp(ICmpInst &ICmp); + Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp); Instruction *foldICmpUsingKnownBits(ICmpInst &Cmp); Instruction *foldICmpWithDominatingICmp(ICmpInst &Cmp); Instruction *foldICmpWithConstant(ICmpInst &Cmp); Instruction *foldICmpInstWithConstant(ICmpInst &Cmp); Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp); + Instruction *foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp, + const APInt &C); Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ); Instruction *foldICmpEquality(ICmpInst &Cmp); Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I); @@ -674,6 +685,8 @@ public: Value *foldMultiplicationOverflowCheck(ICmpInst &Cmp); + Instruction *foldICmpBinOpWithConstant(ICmpInst &Cmp, BinaryOperator *BO, + const APInt &C); Instruction *foldICmpSelectConstant(ICmpInst &Cmp, SelectInst *Select, ConstantInt *C); Instruction *foldICmpTruncConstant(ICmpInst &Cmp, TruncInst *Trunc, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 756792918dba..e03b7026f802 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -16,15 +16,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Loads.h" -#include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace PatternMatch; @@ -775,7 +772,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType()); // Make sure that, even if the multiplication below would wrap as an // uint64_t, we still do the right thing. - if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize)) + if ((CS->getValue().zext(128) * APInt(128, TypeSize)).ugt(MaxSize)) return false; continue; } @@ -1395,8 +1392,10 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) { if (StoreInst *PrevSI = dyn_cast(BBI)) { // Prev store isn't volatile, and stores to the same location? - if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1), - SI.getOperand(1))) { + if (PrevSI->isUnordered() && + equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1)) && + PrevSI->getValueOperand()->getType() == + SI.getValueOperand()->getType()) { ++NumDeadStore; // Manually add back the original store to the worklist now, so it will // be processed after the operands of the removed store, as this may @@ -1436,6 +1435,8 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) { } // store undef, Ptr -> noop + // FIXME: This is technically incorrect because it might overwrite a poison + // value. Change to PoisonValue once #52930 is resolved. if (isa(Val)) return eraseInstFromFunction(SI); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 1aa10b550fc4..2a34edbf6cb8 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" -#include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -30,13 +29,9 @@ #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include -#include -#include -#include #define DEBUG_TYPE "instcombine" #include "llvm/Transforms/Utils/InstructionWorklist.h" @@ -145,7 +140,7 @@ static Value *foldMulSelectToNegate(BinaryOperator &I, } Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { - if (Value *V = SimplifyMulInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifyMulInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -297,15 +292,24 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { auto RemOpc = Div->getOpcode() == Instruction::UDiv ? Instruction::URem : Instruction::SRem; - Value *Rem = Builder.CreateBinOp(RemOpc, X, DivOp1); + // X must be frozen because we are increasing its number of uses. + Value *XFreeze = Builder.CreateFreeze(X, X->getName() + ".fr"); + Value *Rem = Builder.CreateBinOp(RemOpc, XFreeze, DivOp1); if (DivOp1 == Y) - return BinaryOperator::CreateSub(X, Rem); - return BinaryOperator::CreateSub(Rem, X); + return BinaryOperator::CreateSub(XFreeze, Rem); + return BinaryOperator::CreateSub(Rem, XFreeze); } } - /// i1 mul -> i1 and. - if (I.getType()->isIntOrIntVectorTy(1)) + // Fold the following two scenarios: + // 1) i1 mul -> i1 and. + // 2) X * Y --> X & Y, iff X, Y can be only {0,1}. + // Note: We could use known bits to generalize this and related patterns with + // shifts/truncs + Type *Ty = I.getType(); + if (Ty->isIntOrIntVectorTy(1) || + (match(Op0, m_And(m_Value(), m_One())) && + match(Op1, m_And(m_Value(), m_One())))) return BinaryOperator::CreateAnd(Op0, Op1); // X*(1 << Y) --> X << Y @@ -338,7 +342,7 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() && (Op0->hasOneUse() || Op1->hasOneUse() || X == Y)) { Value *And = Builder.CreateAnd(X, Y, "mulbool"); - return CastInst::Create(Instruction::ZExt, And, I.getType()); + return CastInst::Create(Instruction::ZExt, And, Ty); } // (sext bool X) * (zext bool Y) --> sext (and X, Y) // (zext bool X) * (sext bool Y) --> sext (and X, Y) @@ -348,42 +352,56 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() && (Op0->hasOneUse() || Op1->hasOneUse())) { Value *And = Builder.CreateAnd(X, Y, "mulbool"); - return CastInst::Create(Instruction::SExt, And, I.getType()); + return CastInst::Create(Instruction::SExt, And, Ty); } // (zext bool X) * Y --> X ? Y : 0 // Y * (zext bool X) --> X ? Y : 0 if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) - return SelectInst::Create(X, Op1, ConstantInt::get(I.getType(), 0)); + return SelectInst::Create(X, Op1, ConstantInt::getNullValue(Ty)); if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) - return SelectInst::Create(X, Op0, ConstantInt::get(I.getType(), 0)); + return SelectInst::Create(X, Op0, ConstantInt::getNullValue(Ty)); - // (sext bool X) * C --> X ? -C : 0 Constant *ImmC; - if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1) && - match(Op1, m_ImmConstant(ImmC))) { - Constant *NegC = ConstantExpr::getNeg(ImmC); - return SelectInst::Create(X, NegC, ConstantInt::getNullValue(I.getType())); + if (match(Op1, m_ImmConstant(ImmC))) { + // (sext bool X) * C --> X ? -C : 0 + if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { + Constant *NegC = ConstantExpr::getNeg(ImmC); + return SelectInst::Create(X, NegC, ConstantInt::getNullValue(Ty)); + } + + // (ashr i32 X, 31) * C --> (X < 0) ? -C : 0 + const APInt *C; + if (match(Op0, m_OneUse(m_AShr(m_Value(X), m_APInt(C)))) && + *C == C->getBitWidth() - 1) { + Constant *NegC = ConstantExpr::getNeg(ImmC); + Value *IsNeg = Builder.CreateIsNeg(X, "isneg"); + return SelectInst::Create(IsNeg, NegC, ConstantInt::getNullValue(Ty)); + } } - // (lshr X, 31) * Y --> (ashr X, 31) & Y - // Y * (lshr X, 31) --> (ashr X, 31) & Y + // (lshr X, 31) * Y --> (X < 0) ? Y : 0 // TODO: We are not checking one-use because the elimination of the multiply // is better for analysis? - // TODO: Should we canonicalize to '(X < 0) ? Y : 0' instead? That would be - // more similar to what we're doing above. const APInt *C; - if (match(Op0, m_LShr(m_Value(X), m_APInt(C))) && *C == C->getBitWidth() - 1) - return BinaryOperator::CreateAnd(Builder.CreateAShr(X, *C), Op1); - if (match(Op1, m_LShr(m_Value(X), m_APInt(C))) && *C == C->getBitWidth() - 1) - return BinaryOperator::CreateAnd(Builder.CreateAShr(X, *C), Op0); + if (match(&I, m_c_BinOp(m_LShr(m_Value(X), m_APInt(C)), m_Value(Y))) && + *C == C->getBitWidth() - 1) { + Value *IsNeg = Builder.CreateIsNeg(X, "isneg"); + return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty)); + } + + // (and X, 1) * Y --> (trunc X) ? Y : 0 + if (match(&I, m_c_BinOp(m_OneUse(m_And(m_Value(X), m_One())), m_Value(Y)))) { + Value *Tr = Builder.CreateTrunc(X, CmpInst::makeCmpResultType(Ty)); + return SelectInst::Create(Tr, Y, ConstantInt::getNullValue(Ty)); + } // ((ashr X, 31) | 1) * X --> abs(X) // X * ((ashr X, 31) | 1) --> abs(X) if (match(&I, m_c_BinOp(m_Or(m_AShr(m_Value(X), - m_SpecificIntAllowUndef(BitWidth - 1)), - m_One()), - m_Deferred(X)))) { + m_SpecificIntAllowUndef(BitWidth - 1)), + m_One()), + m_Deferred(X)))) { Value *Abs = Builder.CreateBinaryIntrinsic( Intrinsic::abs, X, ConstantInt::getBool(I.getContext(), I.hasNoSignedWrap())); @@ -442,7 +460,7 @@ Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) { } Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { - if (Value *V = SimplifyFMulInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifyFMulInst(I.getOperand(0), I.getOperand(1), I.getFastMathFlags(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -532,9 +550,8 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { // sqrt(X) * sqrt(Y) -> sqrt(X * Y) // nnan disallows the possibility of returning a number if both operands are // negative (in that case, we should return NaN). - if (I.hasNoNaNs() && - match(Op0, m_OneUse(m_Intrinsic(m_Value(X)))) && - match(Op1, m_OneUse(m_Intrinsic(m_Value(Y))))) { + if (I.hasNoNaNs() && match(Op0, m_OneUse(m_Sqrt(m_Value(X)))) && + match(Op1, m_OneUse(m_Sqrt(m_Value(Y))))) { Value *XY = Builder.CreateFMulFMF(X, Y, &I); Value *Sqrt = Builder.CreateUnaryIntrinsic(Intrinsic::sqrt, XY, &I); return replaceInstUsesWith(I, Sqrt); @@ -548,11 +565,11 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { // has the necessary (reassoc) fast-math-flags. if (I.hasNoSignedZeros() && match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) && - match(Y, m_Intrinsic(m_Value(X))) && Op1 == X) + match(Y, m_Sqrt(m_Value(X))) && Op1 == X) return BinaryOperator::CreateFDivFMF(X, Y, &I); if (I.hasNoSignedZeros() && match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) && - match(Y, m_Intrinsic(m_Value(X))) && Op0 == X) + match(Y, m_Sqrt(m_Value(X))) && Op0 == X) return BinaryOperator::CreateFDivFMF(X, Y, &I); // Like the similar transform in instsimplify, this requires 'nsz' because @@ -561,14 +578,12 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { Op0->hasNUses(2)) { // Peek through fdiv to find squaring of square root: // (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y - if (match(Op0, m_FDiv(m_Value(X), - m_Intrinsic(m_Value(Y))))) { + if (match(Op0, m_FDiv(m_Value(X), m_Sqrt(m_Value(Y))))) { Value *XX = Builder.CreateFMulFMF(X, X, &I); return BinaryOperator::CreateFDivFMF(XX, Y, &I); } // (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X) - if (match(Op0, m_FDiv(m_Intrinsic(m_Value(Y)), - m_Value(X)))) { + if (match(Op0, m_FDiv(m_Sqrt(m_Value(Y)), m_Value(X)))) { Value *XX = Builder.CreateFMulFMF(X, X, &I); return BinaryOperator::CreateFDivFMF(Y, XX, &I); } @@ -777,7 +792,8 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) { // TODO: Adapt simplifyDivRemOfSelectWithZeroOp to allow this and other folds. if (match(Op0, m_ImmConstant()) && match(Op1, m_Select(m_Value(), m_ImmConstant(), m_ImmConstant()))) { - if (Instruction *R = FoldOpIntoSelect(I, cast(Op1))) + if (Instruction *R = FoldOpIntoSelect(I, cast(Op1), + /*FoldWithMultiUse*/ true)) return R; } @@ -853,12 +869,13 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) { if (match(Op0, m_One())) { assert(!Ty->isIntOrIntVectorTy(1) && "i1 divide not removed?"); if (IsSigned) { - // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the - // result is one, if Op1 is -1 then the result is minus one, otherwise - // it's zero. - Value *Inc = Builder.CreateAdd(Op1, Op0); + // 1 / 0 --> undef ; 1 / 1 --> 1 ; 1 / -1 --> -1 ; 1 / anything else --> 0 + // (Op1 + 1) u< 3 ? Op1 : 0 + // Op1 must be frozen because we are increasing its number of uses. + Value *F1 = Builder.CreateFreeze(Op1, Op1->getName() + ".fr"); + Value *Inc = Builder.CreateAdd(F1, Op0); Value *Cmp = Builder.CreateICmpULT(Inc, ConstantInt::get(Ty, 3)); - return SelectInst::Create(Cmp, Op1, ConstantInt::get(Ty, 0)); + return SelectInst::Create(Cmp, F1, ConstantInt::get(Ty, 0)); } else { // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the // result is one, otherwise it's zero. @@ -900,113 +917,69 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) { static const unsigned MaxDepth = 6; -namespace { - -using FoldUDivOperandCb = Instruction *(*)(Value *Op0, Value *Op1, - const BinaryOperator &I, - InstCombinerImpl &IC); - -/// Used to maintain state for visitUDivOperand(). -struct UDivFoldAction { - /// Informs visitUDiv() how to fold this operand. This can be zero if this - /// action joins two actions together. - FoldUDivOperandCb FoldAction; - - /// Which operand to fold. - Value *OperandToFold; - - union { - /// The instruction returned when FoldAction is invoked. - Instruction *FoldResult; - - /// Stores the LHS action index if this action joins two actions together. - size_t SelectLHSIdx; +// Take the exact integer log2 of the value. If DoFold is true, create the +// actual instructions, otherwise return a non-null dummy value. Return nullptr +// on failure. +static Value *takeLog2(IRBuilderBase &Builder, Value *Op, unsigned Depth, + bool DoFold) { + auto IfFold = [DoFold](function_ref Fn) { + if (!DoFold) + return reinterpret_cast(-1); + return Fn(); }; - UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand) - : FoldAction(FA), OperandToFold(InputOperand), FoldResult(nullptr) {} - UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand, size_t SLHS) - : FoldAction(FA), OperandToFold(InputOperand), SelectLHSIdx(SLHS) {} -}; - -} // end anonymous namespace - -// X udiv 2^C -> X >> C -static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1, - const BinaryOperator &I, - InstCombinerImpl &IC) { - Constant *C1 = ConstantExpr::getExactLogBase2(cast(Op1)); - if (!C1) - llvm_unreachable("Failed to constant fold udiv -> logbase2"); - BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, C1); - if (I.isExact()) - LShr->setIsExact(); - return LShr; -} - -// X udiv (C1 << N), where C1 is "1< X >> (N+C2) -// X udiv (zext (C1 << N)), where C1 is "1< X >> (N+C2) -static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I, - InstCombinerImpl &IC) { - Value *ShiftLeft; - if (!match(Op1, m_ZExt(m_Value(ShiftLeft)))) - ShiftLeft = Op1; - - Constant *CI; - Value *N; - if (!match(ShiftLeft, m_Shl(m_Constant(CI), m_Value(N)))) - llvm_unreachable("match should never fail here!"); - Constant *Log2Base = ConstantExpr::getExactLogBase2(CI); - if (!Log2Base) - llvm_unreachable("getLogBase2 should never fail here!"); - N = IC.Builder.CreateAdd(N, Log2Base); - if (Op1 != ShiftLeft) - N = IC.Builder.CreateZExt(N, Op1->getType()); - BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N); - if (I.isExact()) - LShr->setIsExact(); - return LShr; -} - -// Recursively visits the possible right hand operands of a udiv -// instruction, seeing through select instructions, to determine if we can -// replace the udiv with something simpler. If we find that an operand is not -// able to simplify the udiv, we abort the entire transformation. -static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I, - SmallVectorImpl &Actions, - unsigned Depth = 0) { // FIXME: assert that Op1 isn't/doesn't contain undef. - // Check to see if this is an unsigned division with an exact power of 2, - // if so, convert to a right shift. - if (match(Op1, m_Power2())) { - Actions.push_back(UDivFoldAction(foldUDivPow2Cst, Op1)); - return Actions.size(); - } - - // X udiv (C1 << N), where C1 is "1< X >> (N+C2) - if (match(Op1, m_Shl(m_Power2(), m_Value())) || - match(Op1, m_ZExt(m_Shl(m_Power2(), m_Value())))) { - Actions.push_back(UDivFoldAction(foldUDivShl, Op1)); - return Actions.size(); - } + // log2(2^C) -> C + if (match(Op, m_Power2())) + return IfFold([&]() { + Constant *C = ConstantExpr::getExactLogBase2(cast(Op)); + if (!C) + llvm_unreachable("Failed to constant fold udiv -> logbase2"); + return C; + }); // The remaining tests are all recursive, so bail out if we hit the limit. if (Depth++ == MaxDepth) - return 0; - - if (SelectInst *SI = dyn_cast(Op1)) - // FIXME: missed optimization: if one of the hands of select is/contains - // undef, just directly pick the other one. - // FIXME: can both hands contain undef? - if (size_t LHSIdx = - visitUDivOperand(Op0, SI->getOperand(1), I, Actions, Depth)) - if (visitUDivOperand(Op0, SI->getOperand(2), I, Actions, Depth)) { - Actions.push_back(UDivFoldAction(nullptr, Op1, LHSIdx - 1)); - return Actions.size(); - } + return nullptr; + + // log2(zext X) -> zext log2(X) + // FIXME: Require one use? + Value *X, *Y; + if (match(Op, m_ZExt(m_Value(X)))) + if (Value *LogX = takeLog2(Builder, X, Depth, DoFold)) + return IfFold([&]() { return Builder.CreateZExt(LogX, Op->getType()); }); + + // log2(X << Y) -> log2(X) + Y + // FIXME: Require one use unless X is 1? + if (match(Op, m_Shl(m_Value(X), m_Value(Y)))) + if (Value *LogX = takeLog2(Builder, X, Depth, DoFold)) + return IfFold([&]() { return Builder.CreateAdd(LogX, Y); }); + + // log2(Cond ? X : Y) -> Cond ? log2(X) : log2(Y) + // FIXME: missed optimization: if one of the hands of select is/contains + // undef, just directly pick the other one. + // FIXME: can both hands contain undef? + // FIXME: Require one use? + if (SelectInst *SI = dyn_cast(Op)) + if (Value *LogX = takeLog2(Builder, SI->getOperand(1), Depth, DoFold)) + if (Value *LogY = takeLog2(Builder, SI->getOperand(2), Depth, DoFold)) + return IfFold([&]() { + return Builder.CreateSelect(SI->getOperand(0), LogX, LogY); + }); + + // log2(umin(X, Y)) -> umin(log2(X), log2(Y)) + // log2(umax(X, Y)) -> umax(log2(X), log2(Y)) + auto *MinMax = dyn_cast(Op); + if (MinMax && MinMax->hasOneUse() && !MinMax->isSigned()) + if (Value *LogX = takeLog2(Builder, MinMax->getLHS(), Depth, DoFold)) + if (Value *LogY = takeLog2(Builder, MinMax->getRHS(), Depth, DoFold)) + return IfFold([&]() { + return Builder.CreateBinaryIntrinsic( + MinMax->getIntrinsicID(), LogX, LogY); + }); - return 0; + return nullptr; } /// If we have zero-extended operands of an unsigned div or rem, we may be able @@ -1047,7 +1020,7 @@ static Instruction *narrowUDivURem(BinaryOperator &I, } Instruction *InstCombinerImpl::visitUDiv(BinaryOperator &I) { - if (Value *V = SimplifyUDivInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifyUDivInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1106,42 +1079,18 @@ Instruction *InstCombinerImpl::visitUDiv(BinaryOperator &I) { return BinaryOperator::CreateUDiv(A, X); } - // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...)))) - SmallVector UDivActions; - if (visitUDivOperand(Op0, Op1, I, UDivActions)) - for (unsigned i = 0, e = UDivActions.size(); i != e; ++i) { - FoldUDivOperandCb Action = UDivActions[i].FoldAction; - Value *ActionOp1 = UDivActions[i].OperandToFold; - Instruction *Inst; - if (Action) - Inst = Action(Op0, ActionOp1, I, *this); - else { - // This action joins two actions together. The RHS of this action is - // simply the last action we processed, we saved the LHS action index in - // the joining action. - size_t SelectRHSIdx = i - 1; - Value *SelectRHS = UDivActions[SelectRHSIdx].FoldResult; - size_t SelectLHSIdx = UDivActions[i].SelectLHSIdx; - Value *SelectLHS = UDivActions[SelectLHSIdx].FoldResult; - Inst = SelectInst::Create(cast(ActionOp1)->getCondition(), - SelectLHS, SelectRHS); - } - - // If this is the last action to process, return it to the InstCombiner. - // Otherwise, we insert it before the UDiv and record it so that we may - // use it as part of a joining action (i.e., a SelectInst). - if (e - i != 1) { - Inst->insertBefore(&I); - UDivActions[i].FoldResult = Inst; - } else - return Inst; - } + // Op1 udiv Op2 -> Op1 lshr log2(Op2), if log2() folds away. + if (takeLog2(Builder, Op1, /*Depth*/0, /*DoFold*/false)) { + Value *Res = takeLog2(Builder, Op1, /*Depth*/0, /*DoFold*/true); + return replaceInstUsesWith( + I, Builder.CreateLShr(Op0, Res, I.getName(), I.isExact())); + } return nullptr; } Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { - if (Value *V = SimplifySDivInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifySDivInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1223,9 +1172,9 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { if (match(&I, m_c_BinOp( m_OneUse(m_Intrinsic(m_Value(X), m_One())), m_Deferred(X)))) { - Constant *NegOne = ConstantInt::getAllOnesValue(Ty); - Value *Cond = Builder.CreateICmpSGT(X, NegOne); - return SelectInst::Create(Cond, ConstantInt::get(Ty, 1), NegOne); + Value *Cond = Builder.CreateIsNotNeg(X); + return SelectInst::Create(Cond, ConstantInt::get(Ty, 1), + ConstantInt::getAllOnesValue(Ty)); } // If the sign bits of both operands are zero (i.e. we can prove they are @@ -1242,8 +1191,10 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { if (match(Op1, m_NegatedPower2())) { // X sdiv (-(1 << C)) -> -(X sdiv (1 << C)) -> // -> -(X udiv (1 << C)) -> -(X u>> C) - return BinaryOperator::CreateNeg(Builder.Insert(foldUDivPow2Cst( - Op0, ConstantExpr::getNeg(cast(Op1)), I, *this))); + Constant *CNegLog2 = ConstantExpr::getExactLogBase2( + ConstantExpr::getNeg(cast(Op1))); + Value *Shr = Builder.CreateLShr(Op0, CNegLog2, I.getName(), I.isExact()); + return BinaryOperator::CreateNeg(Shr); } if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { @@ -1368,7 +1319,9 @@ static Instruction *foldFDivPowDivisor(BinaryOperator &I, } Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { - if (Value *V = SimplifyFDivInst(I.getOperand(0), I.getOperand(1), + Module *M = I.getModule(); + + if (Value *V = simplifyFDivInst(I.getOperand(0), I.getOperand(1), I.getFastMathFlags(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1433,8 +1386,8 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { !IsTan && match(Op0, m_Intrinsic(m_Value(X))) && match(Op1, m_Intrinsic(m_Specific(X))); - if ((IsTan || IsCot) && - hasFloatFn(&TLI, I.getType(), LibFunc_tan, LibFunc_tanf, LibFunc_tanl)) { + if ((IsTan || IsCot) && hasFloatFn(M, &TLI, I.getType(), LibFunc_tan, + LibFunc_tanf, LibFunc_tanl)) { IRBuilder<> B(&I); IRBuilder<>::FastMathFlagGuard FMFGuard(B); B.setFastMathFlags(I.getFastMathFlags()); @@ -1498,7 +1451,8 @@ Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) { // TODO: Adapt simplifyDivRemOfSelectWithZeroOp to allow this and other folds. if (match(Op0, m_ImmConstant()) && match(Op1, m_Select(m_Value(), m_ImmConstant(), m_ImmConstant()))) { - if (Instruction *R = FoldOpIntoSelect(I, cast(Op1))) + if (Instruction *R = FoldOpIntoSelect(I, cast(Op1), + /*FoldWithMultiUse*/ true)) return R; } @@ -1530,7 +1484,7 @@ Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) { } Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) { - if (Value *V = SimplifyURemInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifyURemInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1560,11 +1514,13 @@ Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) { return CastInst::CreateZExtOrBitCast(Cmp, Ty); } - // X urem C -> X < C ? X : X - C, where C >= signbit. + // Op0 urem C -> Op0 < C ? Op0 : Op0 - C, where C >= signbit. + // Op0 must be frozen because we are increasing its number of uses. if (match(Op1, m_Negative())) { - Value *Cmp = Builder.CreateICmpULT(Op0, Op1); - Value *Sub = Builder.CreateSub(Op0, Op1); - return SelectInst::Create(Cmp, Op0, Sub); + Value *F0 = Builder.CreateFreeze(Op0, Op0->getName() + ".fr"); + Value *Cmp = Builder.CreateICmpULT(F0, Op1); + Value *Sub = Builder.CreateSub(F0, Op1); + return SelectInst::Create(Cmp, F0, Sub); } // If the divisor is a sext of a boolean, then the divisor must be max @@ -1581,7 +1537,7 @@ Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) { } Instruction *InstCombinerImpl::visitSRem(BinaryOperator &I) { - if (Value *V = SimplifySRemInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifySRemInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1653,7 +1609,7 @@ Instruction *InstCombinerImpl::visitSRem(BinaryOperator &I) { } Instruction *InstCombinerImpl::visitFRem(BinaryOperator &I) { - if (Value *V = SimplifyFRemInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifyFRemInst(I.getOperand(0), I.getOperand(1), I.getFastMathFlags(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp index 42ba4a34a5a9..c573b03f31a6 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp @@ -248,6 +248,20 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { return nullptr; switch (I->getOpcode()) { + case Instruction::And: { + Constant *ShAmt; + // sub(y,and(lshr(x,C),1)) --> add(ashr(shl(x,(BW-1)-C),BW-1),y) + if (match(I, m_c_And(m_OneUse(m_TruncOrSelf( + m_LShr(m_Value(X), m_ImmConstant(ShAmt)))), + m_One()))) { + unsigned BW = X->getType()->getScalarSizeInBits(); + Constant *BWMinusOne = ConstantInt::get(X->getType(), BW - 1); + Value *R = Builder.CreateShl(X, Builder.CreateSub(BWMinusOne, ShAmt)); + R = Builder.CreateAShr(R, BWMinusOne); + return Builder.CreateTruncOrBitCast(R, I->getType()); + } + break; + } case Instruction::SDiv: // `sdiv` is negatible if divisor is not undef/INT_MIN/1. // While this is normally not behind a use-check, diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index 09694d50468f..90a796a0939e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -511,7 +511,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { // Scan to see if all operands are the same opcode, and all have one user. for (Value *V : drop_begin(PN.incoming_values())) { GetElementPtrInst *GEP = dyn_cast(V); - if (!GEP || !GEP->hasOneUser() || GEP->getType() != FirstInst->getType() || + if (!GEP || !GEP->hasOneUser() || + GEP->getSourceElementType() != FirstInst->getSourceElementType() || GEP->getNumOperands() != FirstInst->getNumOperands()) return nullptr; @@ -657,6 +658,10 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { LoadInst *FirstLI = cast(PN.getIncomingValue(0)); + // Can't forward swifterror through a phi. + if (FirstLI->getOperand(0)->isSwiftError()) + return nullptr; + // FIXME: This is overconservative; this transform is allowed in some cases // for atomic operations. if (FirstLI->isAtomic()) @@ -693,6 +698,10 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { LI->getPointerAddressSpace() != LoadAddrSpace) return nullptr; + // Can't forward swifterror through a phi. + if (LI->getOperand(0)->isSwiftError()) + return nullptr; + // We can't sink the load if the loaded value could be modified between // the load and the PHI. if (LI->getParent() != InBB || !isSafeAndProfitableToSinkLoad(LI)) @@ -1112,6 +1121,13 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { return nullptr; } + // If the incoming value is a PHI node before a catchswitch, we cannot + // extract the value within that BB because we cannot insert any non-PHI + // instructions in the BB. + for (auto *Pred : PN->blocks()) + if (Pred->getFirstInsertionPt() == Pred->end()) + return nullptr; + for (User *U : PN->users()) { Instruction *UserI = cast(U); @@ -1260,12 +1276,12 @@ static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, // ... ... // \ / // phi [true] [false] - if (!PN.getType()->isIntegerTy(1)) - return nullptr; - - if (PN.getNumOperands() != 2) - return nullptr; - + // and + // switch (cond) + // case v1: / \ case v2: + // ... ... + // \ / + // phi [v1] [v2] // Make sure all inputs are constants. if (!all_of(PN.operands(), [](Value *V) { return isa(V); })) return nullptr; @@ -1275,50 +1291,77 @@ static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, if (!DT.isReachableFromEntry(BB)) return nullptr; - // Same inputs. - if (PN.getOperand(0) == PN.getOperand(1)) - return PN.getOperand(0); + // Determine which value the condition of the idom has for which successor. + LLVMContext &Context = PN.getContext(); + auto *IDom = DT.getNode(BB)->getIDom()->getBlock(); + Value *Cond; + SmallDenseMap SuccForValue; + SmallDenseMap SuccCount; + auto AddSucc = [&](ConstantInt *C, BasicBlock *Succ) { + SuccForValue[C] = Succ; + ++SuccCount[Succ]; + }; + if (auto *BI = dyn_cast(IDom->getTerminator())) { + if (BI->isUnconditional()) + return nullptr; - BasicBlock *TruePred = nullptr, *FalsePred = nullptr; - for (auto *Pred : predecessors(BB)) { - auto *Input = cast(PN.getIncomingValueForBlock(Pred)); - if (Input->isAllOnesValue()) - TruePred = Pred; - else - FalsePred = Pred; + Cond = BI->getCondition(); + AddSucc(ConstantInt::getTrue(Context), BI->getSuccessor(0)); + AddSucc(ConstantInt::getFalse(Context), BI->getSuccessor(1)); + } else if (auto *SI = dyn_cast(IDom->getTerminator())) { + Cond = SI->getCondition(); + ++SuccCount[SI->getDefaultDest()]; + for (auto Case : SI->cases()) + AddSucc(Case.getCaseValue(), Case.getCaseSuccessor()); + } else { + return nullptr; } - assert(TruePred && FalsePred && "Must be!"); - // Check which edge of the dominator dominates the true input. If it is the - // false edge, we should invert the condition. - auto *IDom = DT.getNode(BB)->getIDom()->getBlock(); - auto *BI = dyn_cast(IDom->getTerminator()); - if (!BI || BI->isUnconditional()) + if (Cond->getType() != PN.getType()) return nullptr; // Check that edges outgoing from the idom's terminators dominate respective // inputs of the Phi. - BasicBlockEdge TrueOutEdge(IDom, BI->getSuccessor(0)); - BasicBlockEdge FalseOutEdge(IDom, BI->getSuccessor(1)); + Optional Invert; + for (auto Pair : zip(PN.incoming_values(), PN.blocks())) { + auto *Input = cast(std::get<0>(Pair)); + BasicBlock *Pred = std::get<1>(Pair); + auto IsCorrectInput = [&](ConstantInt *Input) { + // The input needs to be dominated by the corresponding edge of the idom. + // This edge cannot be a multi-edge, as that would imply that multiple + // different condition values follow the same edge. + auto It = SuccForValue.find(Input); + return It != SuccForValue.end() && SuccCount[It->second] == 1 && + DT.dominates(BasicBlockEdge(IDom, It->second), + BasicBlockEdge(Pred, BB)); + }; + + // Depending on the constant, the condition may need to be inverted. + bool NeedsInvert; + if (IsCorrectInput(Input)) + NeedsInvert = false; + else if (IsCorrectInput(cast(ConstantExpr::getNot(Input)))) + NeedsInvert = true; + else + return nullptr; + + // Make sure the inversion requirement is always the same. + if (Invert && *Invert != NeedsInvert) + return nullptr; - BasicBlockEdge TrueIncEdge(TruePred, BB); - BasicBlockEdge FalseIncEdge(FalsePred, BB); + Invert = NeedsInvert; + } - auto *Cond = BI->getCondition(); - if (DT.dominates(TrueOutEdge, TrueIncEdge) && - DT.dominates(FalseOutEdge, FalseIncEdge)) - // This Phi is actually equivalent to branching condition of IDom. + if (!*Invert) return Cond; - if (DT.dominates(TrueOutEdge, FalseIncEdge) && - DT.dominates(FalseOutEdge, TrueIncEdge)) { - // This Phi is actually opposite to branching condition of IDom. We invert - // the condition that will potentially open up some opportunities for - // sinking. - auto InsertPt = BB->getFirstInsertionPt(); - if (InsertPt != BB->end()) { - Self.Builder.SetInsertPoint(&*InsertPt); - return Self.Builder.CreateNot(Cond); - } + + // This Phi is actually opposite to branching condition of IDom. We invert + // the condition that will potentially open up some opportunities for + // sinking. + auto InsertPt = BB->getFirstInsertionPt(); + if (InsertPt != BB->end()) { + Self.Builder.SetInsertPoint(&*InsertPt); + return Self.Builder.CreateNot(Cond); } return nullptr; @@ -1327,7 +1370,7 @@ static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, // PHINode simplification // Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { - if (Value *V = SimplifyInstruction(&PN, SQ.getWithInstruction(&PN))) + if (Value *V = simplifyInstruction(&PN, SQ.getWithInstruction(&PN))) return replaceInstUsesWith(PN, V); if (Instruction *Result = foldPHIArgZextsIntoPHI(PN)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 65e60498ff95..ad96a5f475f1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" @@ -49,13 +50,6 @@ using namespace llvm; using namespace PatternMatch; -static Value *createMinMax(InstCombiner::BuilderTy &Builder, - SelectPatternFlavor SPF, Value *A, Value *B) { - CmpInst::Predicate Pred = getMinMaxPred(SPF); - assert(CmpInst::isIntPredicate(Pred) && "Expected integer predicate"); - return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B); -} - /// Replace a select operand based on an equality comparison with the identity /// constant of a binop. static Instruction *foldSelectBinOpIdentity(SelectInst &Sel, @@ -370,6 +364,7 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI, // one-use constraint, but that needs be examined carefully since it may not // reduce the total number of instructions. if (TI->getNumOperands() != 2 || FI->getNumOperands() != 2 || + !TI->isSameOperationAs(FI) || (!isa(TI) && !isa(TI)) || !TI->hasOneUse() || !FI->hasOneUse()) return nullptr; @@ -444,69 +439,56 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, Value *FalseVal) { // See the comment above GetSelectFoldableOperands for a description of the // transformation we are doing here. - if (auto *TVI = dyn_cast(TrueVal)) { - if (TVI->hasOneUse() && !isa(FalseVal)) { - if (unsigned SFO = getSelectFoldableOperands(TVI)) { - unsigned OpToFold = 0; - if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { - OpToFold = 1; - } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { - OpToFold = 2; - } - - if (OpToFold) { - Constant *C = ConstantExpr::getBinOpIdentity(TVI->getOpcode(), - TVI->getType(), true); - Value *OOp = TVI->getOperand(2-OpToFold); - // Avoid creating select between 2 constants unless it's selecting - // between 0, 1 and -1. - const APInt *OOpC; - bool OOpIsAPInt = match(OOp, m_APInt(OOpC)); - if (!isa(OOp) || - (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) { - Value *NewSel = Builder.CreateSelect(SI.getCondition(), OOp, C); - NewSel->takeName(TVI); - BinaryOperator *BO = BinaryOperator::Create(TVI->getOpcode(), - FalseVal, NewSel); - BO->copyIRFlags(TVI); - return BO; + auto TryFoldSelectIntoOp = [&](SelectInst &SI, Value *TrueVal, + Value *FalseVal, + bool Swapped) -> Instruction * { + if (auto *TVI = dyn_cast(TrueVal)) { + if (TVI->hasOneUse() && !isa(FalseVal)) { + if (unsigned SFO = getSelectFoldableOperands(TVI)) { + unsigned OpToFold = 0; + if ((SFO & 1) && FalseVal == TVI->getOperand(0)) + OpToFold = 1; + else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) + OpToFold = 2; + + if (OpToFold) { + FastMathFlags FMF; + // TODO: We probably ought to revisit cases where the select and FP + // instructions have different flags and add tests to ensure the + // behaviour is correct. + if (isa(&SI)) + FMF = SI.getFastMathFlags(); + Constant *C = ConstantExpr::getBinOpIdentity( + TVI->getOpcode(), TVI->getType(), true, FMF.noSignedZeros()); + Value *OOp = TVI->getOperand(2 - OpToFold); + // Avoid creating select between 2 constants unless it's selecting + // between 0, 1 and -1. + const APInt *OOpC; + bool OOpIsAPInt = match(OOp, m_APInt(OOpC)); + if (!isa(OOp) || + (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) { + Value *NewSel = Builder.CreateSelect( + SI.getCondition(), Swapped ? C : OOp, Swapped ? OOp : C); + if (isa(&SI)) + cast(NewSel)->setFastMathFlags(FMF); + NewSel->takeName(TVI); + BinaryOperator *BO = + BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel); + BO->copyIRFlags(TVI); + return BO; + } } } } } - } + return nullptr; + }; - if (auto *FVI = dyn_cast(FalseVal)) { - if (FVI->hasOneUse() && !isa(TrueVal)) { - if (unsigned SFO = getSelectFoldableOperands(FVI)) { - unsigned OpToFold = 0; - if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { - OpToFold = 1; - } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { - OpToFold = 2; - } + if (Instruction *R = TryFoldSelectIntoOp(SI, TrueVal, FalseVal, false)) + return R; - if (OpToFold) { - Constant *C = ConstantExpr::getBinOpIdentity(FVI->getOpcode(), - FVI->getType(), true); - Value *OOp = FVI->getOperand(2-OpToFold); - // Avoid creating select between 2 constants unless it's selecting - // between 0, 1 and -1. - const APInt *OOpC; - bool OOpIsAPInt = match(OOp, m_APInt(OOpC)); - if (!isa(OOp) || - (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) { - Value *NewSel = Builder.CreateSelect(SI.getCondition(), C, OOp); - NewSel->takeName(FVI); - BinaryOperator *BO = BinaryOperator::Create(FVI->getOpcode(), - TrueVal, NewSel); - BO->copyIRFlags(FVI); - return BO; - } - } - } - } - } + if (Instruction *R = TryFoldSelectIntoOp(SI, FalseVal, TrueVal, true)) + return R; return nullptr; } @@ -535,6 +517,16 @@ static Instruction *foldSelectICmpAndAnd(Type *SelType, const ICmpInst *Cmp, // Where %B may be optionally shifted: lshr %X, %Z. Value *X, *Z; const bool HasShift = match(B, m_OneUse(m_LShr(m_Value(X), m_Value(Z)))); + + // The shift must be valid. + // TODO: This restricts the fold to constant shift amounts. Is there a way to + // handle variable shifts safely? PR47012 + if (HasShift && + !match(Z, m_SpecificInt_ICMP(CmpInst::ICMP_ULT, + APInt(SelType->getScalarSizeInBits(), + SelType->getScalarSizeInBits())))) + return nullptr; + if (!HasShift) X = B; @@ -1096,74 +1088,55 @@ static bool adjustMinMax(SelectInst &Sel, ICmpInst &Cmp) { return true; } -/// If this is an integer min/max (icmp + select) with a constant operand, -/// create the canonical icmp for the min/max operation and canonicalize the -/// constant to the 'false' operand of the select: -/// select (icmp Pred X, C1), C2, X --> select (icmp Pred' X, C2), X, C2 -/// Note: if C1 != C2, this will change the icmp constant to the existing -/// constant operand of the select. -static Instruction *canonicalizeMinMaxWithConstant(SelectInst &Sel, - ICmpInst &Cmp, - InstCombinerImpl &IC) { - if (!Cmp.hasOneUse() || !isa(Cmp.getOperand(1))) - return nullptr; - - // Canonicalize the compare predicate based on whether we have min or max. +static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp, + InstCombinerImpl &IC) { Value *LHS, *RHS; - SelectPatternResult SPR = matchSelectPattern(&Sel, LHS, RHS); - if (!SelectPatternResult::isMinOrMax(SPR.Flavor)) + // TODO: What to do with pointer min/max patterns? + if (!Sel.getType()->isIntOrIntVectorTy()) return nullptr; - // Is this already canonical? - ICmpInst::Predicate CanonicalPred = getMinMaxPred(SPR.Flavor); - if (Cmp.getOperand(0) == LHS && Cmp.getOperand(1) == RHS && - Cmp.getPredicate() == CanonicalPred) - return nullptr; - - // Bail out on unsimplified X-0 operand (due to some worklist management bug), - // as this may cause an infinite combine loop. Let the sub be folded first. - if (match(LHS, m_Sub(m_Value(), m_Zero())) || - match(RHS, m_Sub(m_Value(), m_Zero()))) - return nullptr; - - // Create the canonical compare and plug it into the select. - IC.replaceOperand(Sel, 0, IC.Builder.CreateICmp(CanonicalPred, LHS, RHS)); - - // If the select operands did not change, we're done. - if (Sel.getTrueValue() == LHS && Sel.getFalseValue() == RHS) - return &Sel; - - // If we are swapping the select operands, swap the metadata too. - assert(Sel.getTrueValue() == RHS && Sel.getFalseValue() == LHS && - "Unexpected results from matchSelectPattern"); - Sel.swapValues(); - Sel.swapProfMetadata(); - return &Sel; -} - -static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, - InstCombinerImpl &IC) { - if (!Cmp.hasOneUse() || !isa(Cmp.getOperand(1))) - return nullptr; - - Value *LHS, *RHS; SelectPatternFlavor SPF = matchSelectPattern(&Sel, LHS, RHS).Flavor; - if (SPF != SelectPatternFlavor::SPF_ABS && - SPF != SelectPatternFlavor::SPF_NABS) - return nullptr; - - // Note that NSW flag can only be propagated for normal, non-negated abs! - bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS && - match(RHS, m_NSWNeg(m_Specific(LHS))); - Constant *IntMinIsPoisonC = - ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison); - Instruction *Abs = - IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC); - - if (SPF == SelectPatternFlavor::SPF_NABS) - return BinaryOperator::CreateNeg(Abs); // Always without NSW flag! + if (SPF == SelectPatternFlavor::SPF_ABS || + SPF == SelectPatternFlavor::SPF_NABS) { + if (!Cmp.hasOneUse() && !RHS->hasOneUse()) + return nullptr; // TODO: Relax this restriction. + + // Note that NSW flag can only be propagated for normal, non-negated abs! + bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS && + match(RHS, m_NSWNeg(m_Specific(LHS))); + Constant *IntMinIsPoisonC = + ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison); + Instruction *Abs = + IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC); + + if (SPF == SelectPatternFlavor::SPF_NABS) + return BinaryOperator::CreateNeg(Abs); // Always without NSW flag! + return IC.replaceInstUsesWith(Sel, Abs); + } + + if (SelectPatternResult::isMinOrMax(SPF)) { + Intrinsic::ID IntrinsicID; + switch (SPF) { + case SelectPatternFlavor::SPF_UMIN: + IntrinsicID = Intrinsic::umin; + break; + case SelectPatternFlavor::SPF_UMAX: + IntrinsicID = Intrinsic::umax; + break; + case SelectPatternFlavor::SPF_SMIN: + IntrinsicID = Intrinsic::smin; + break; + case SelectPatternFlavor::SPF_SMAX: + IntrinsicID = Intrinsic::smax; + break; + default: + llvm_unreachable("Unexpected SPF"); + } + return IC.replaceInstUsesWith( + Sel, IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS)); + } - return IC.replaceInstUsesWith(Sel, Abs); + return nullptr; } /// If we have a select with an equality comparison, then we know the value in @@ -1336,6 +1309,7 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, ICmpInst::Predicate::ICMP_NE, APInt::getAllOnes(C0->getType()->getScalarSizeInBits())))) return nullptr; // Can't do, have all-ones element[s]. + Pred0 = ICmpInst::getFlippedStrictnessPredicate(Pred0); C0 = InstCombiner::AddOne(C0); break; default: @@ -1401,15 +1375,22 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, case ICmpInst::Predicate::ICMP_SGE: // Also non-canonical, but here we don't need to change C2, // so we don't have any restrictions on C2, so we can just handle it. + Pred1 = ICmpInst::Predicate::ICMP_SLT; std::swap(ReplacementLow, ReplacementHigh); break; default: return nullptr; // Unknown predicate. } + assert(Pred1 == ICmpInst::Predicate::ICMP_SLT && + "Unexpected predicate type."); // The thresholds of this clamp-like pattern. auto *ThresholdLowIncl = ConstantExpr::getNeg(C1); auto *ThresholdHighExcl = ConstantExpr::getSub(C0, C1); + + assert((Pred0 == ICmpInst::Predicate::ICMP_ULT || + Pred0 == ICmpInst::Predicate::ICMP_UGE) && + "Unexpected predicate type."); if (Pred0 == ICmpInst::Predicate::ICMP_UGE) std::swap(ThresholdLowIncl, ThresholdHighExcl); @@ -1530,17 +1511,71 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp, return &Sel; } +static Instruction *foldSelectZeroOrOnes(ICmpInst *Cmp, Value *TVal, + Value *FVal, + InstCombiner::BuilderTy &Builder) { + if (!Cmp->hasOneUse()) + return nullptr; + + const APInt *CmpC; + if (!match(Cmp->getOperand(1), m_APIntAllowUndef(CmpC))) + return nullptr; + + // (X u< 2) ? -X : -1 --> sext (X != 0) + Value *X = Cmp->getOperand(0); + if (Cmp->getPredicate() == ICmpInst::ICMP_ULT && *CmpC == 2 && + match(TVal, m_Neg(m_Specific(X))) && match(FVal, m_AllOnes())) + return new SExtInst(Builder.CreateIsNotNull(X), TVal->getType()); + + // (X u> 1) ? -1 : -X --> sext (X != 0) + if (Cmp->getPredicate() == ICmpInst::ICMP_UGT && *CmpC == 1 && + match(FVal, m_Neg(m_Specific(X))) && match(TVal, m_AllOnes())) + return new SExtInst(Builder.CreateIsNotNull(X), TVal->getType()); + + return nullptr; +} + +static Value *foldSelectInstWithICmpConst(SelectInst &SI, ICmpInst *ICI) { + const APInt *CmpC; + Value *V; + CmpInst::Predicate Pred; + if (!match(ICI, m_ICmp(Pred, m_Value(V), m_APInt(CmpC)))) + return nullptr; + + BinaryOperator *BO; + const APInt *C; + CmpInst::Predicate CPred; + if (match(&SI, m_Select(m_Specific(ICI), m_APInt(C), m_BinOp(BO)))) + CPred = ICI->getPredicate(); + else if (match(&SI, m_Select(m_Specific(ICI), m_BinOp(BO), m_APInt(C)))) + CPred = ICI->getInversePredicate(); + else + return nullptr; + + const APInt *BinOpC; + if (!match(BO, m_BinOp(m_Specific(V), m_APInt(BinOpC)))) + return nullptr; + + ConstantRange R = ConstantRange::makeExactICmpRegion(CPred, *CmpC) + .binaryOp(BO->getOpcode(), *BinOpC); + if (R == *C) { + BO->dropPoisonGeneratingFlags(); + return BO; + } + return nullptr; +} + /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI)) return NewSel; - if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *this)) - return NewSel; + if (Instruction *NewSPF = canonicalizeSPF(SI, *ICI, *this)) + return NewSPF; - if (Instruction *NewAbs = canonicalizeAbsNabs(SI, *ICI, *this)) - return NewAbs; + if (Value *V = foldSelectInstWithICmpConst(SI, ICI)) + return replaceInstUsesWith(SI, V); if (Value *V = canonicalizeClampLike(SI, *ICI, Builder)) return replaceInstUsesWith(SI, V); @@ -1572,6 +1607,22 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, } } + // Canonicalize a signbit condition to use zero constant by swapping: + // (CmpLHS > -1) ? TV : FV --> (CmpLHS < 0) ? FV : TV + // To avoid conflicts (infinite loops) with other canonicalizations, this is + // not applied with any constant select arm. + if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes()) && + !match(TrueVal, m_Constant()) && !match(FalseVal, m_Constant()) && + ICI->hasOneUse()) { + InstCombiner::BuilderTy::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(&SI); + Value *IsNeg = Builder.CreateIsNeg(CmpLHS, ICI->getName()); + replaceOperand(SI, 0, IsNeg); + SI.swapValues(); + SI.swapProfMetadata(); + return &SI; + } + // FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring // decomposeBitTestICmp() might help. { @@ -1629,6 +1680,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, if (Instruction *V = foldSelectCtlzToCttz(ICI, TrueVal, FalseVal, Builder)) return V; + if (Instruction *V = foldSelectZeroOrOnes(ICI, TrueVal, FalseVal, Builder)) + return V; + if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder)) return replaceInstUsesWith(SI, V); @@ -1698,114 +1752,6 @@ Instruction *InstCombinerImpl::foldSPFofSPF(Instruction *Inner, // TODO: This could be done in instsimplify. if (SPF1 == SPF2 && SelectPatternResult::isMinOrMax(SPF1)) return replaceInstUsesWith(Outer, Inner); - - // MAX(MIN(a, b), a) -> a - // MIN(MAX(a, b), a) -> a - // TODO: This could be done in instsimplify. - if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || - (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) || - (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) || - (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) - return replaceInstUsesWith(Outer, C); - } - - if (SPF1 == SPF2) { - const APInt *CB, *CC; - if (match(B, m_APInt(CB)) && match(C, m_APInt(CC))) { - // MIN(MIN(A, 23), 97) -> MIN(A, 23) - // MAX(MAX(A, 97), 23) -> MAX(A, 97) - // TODO: This could be done in instsimplify. - if ((SPF1 == SPF_UMIN && CB->ule(*CC)) || - (SPF1 == SPF_SMIN && CB->sle(*CC)) || - (SPF1 == SPF_UMAX && CB->uge(*CC)) || - (SPF1 == SPF_SMAX && CB->sge(*CC))) - return replaceInstUsesWith(Outer, Inner); - - // MIN(MIN(A, 97), 23) -> MIN(A, 23) - // MAX(MAX(A, 23), 97) -> MAX(A, 97) - if ((SPF1 == SPF_UMIN && CB->ugt(*CC)) || - (SPF1 == SPF_SMIN && CB->sgt(*CC)) || - (SPF1 == SPF_UMAX && CB->ult(*CC)) || - (SPF1 == SPF_SMAX && CB->slt(*CC))) { - Outer.replaceUsesOfWith(Inner, A); - return &Outer; - } - } - } - - // max(max(A, B), min(A, B)) --> max(A, B) - // min(min(A, B), max(A, B)) --> min(A, B) - // TODO: This could be done in instsimplify. - if (SPF1 == SPF2 && - ((SPF1 == SPF_UMIN && match(C, m_c_UMax(m_Specific(A), m_Specific(B)))) || - (SPF1 == SPF_SMIN && match(C, m_c_SMax(m_Specific(A), m_Specific(B)))) || - (SPF1 == SPF_UMAX && match(C, m_c_UMin(m_Specific(A), m_Specific(B)))) || - (SPF1 == SPF_SMAX && match(C, m_c_SMin(m_Specific(A), m_Specific(B)))))) - return replaceInstUsesWith(Outer, Inner); - - // ABS(ABS(X)) -> ABS(X) - // NABS(NABS(X)) -> NABS(X) - // TODO: This could be done in instsimplify. - if (SPF1 == SPF2 && (SPF1 == SPF_ABS || SPF1 == SPF_NABS)) { - return replaceInstUsesWith(Outer, Inner); - } - - // ABS(NABS(X)) -> ABS(X) - // NABS(ABS(X)) -> NABS(X) - if ((SPF1 == SPF_ABS && SPF2 == SPF_NABS) || - (SPF1 == SPF_NABS && SPF2 == SPF_ABS)) { - SelectInst *SI = cast(Inner); - Value *NewSI = - Builder.CreateSelect(SI->getCondition(), SI->getFalseValue(), - SI->getTrueValue(), SI->getName(), SI); - return replaceInstUsesWith(Outer, NewSI); - } - - auto IsFreeOrProfitableToInvert = - [&](Value *V, Value *&NotV, bool &ElidesXor) { - if (match(V, m_Not(m_Value(NotV)))) { - // If V has at most 2 uses then we can get rid of the xor operation - // entirely. - ElidesXor |= !V->hasNUsesOrMore(3); - return true; - } - - if (isFreeToInvert(V, !V->hasNUsesOrMore(3))) { - NotV = nullptr; - return true; - } - - return false; - }; - - Value *NotA, *NotB, *NotC; - bool ElidesXor = false; - - // MIN(MIN(~A, ~B), ~C) == ~MAX(MAX(A, B), C) - // MIN(MAX(~A, ~B), ~C) == ~MAX(MIN(A, B), C) - // MAX(MIN(~A, ~B), ~C) == ~MIN(MAX(A, B), C) - // MAX(MAX(~A, ~B), ~C) == ~MIN(MIN(A, B), C) - // - // This transform is performance neutral if we can elide at least one xor from - // the set of three operands, since we'll be tacking on an xor at the very - // end. - if (SelectPatternResult::isMinOrMax(SPF1) && - SelectPatternResult::isMinOrMax(SPF2) && - IsFreeOrProfitableToInvert(A, NotA, ElidesXor) && - IsFreeOrProfitableToInvert(B, NotB, ElidesXor) && - IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) { - if (!NotA) - NotA = Builder.CreateNot(A); - if (!NotB) - NotB = Builder.CreateNot(B); - if (!NotC) - NotC = Builder.CreateNot(C); - - Value *NewInner = createMinMax(Builder, getInverseMinMaxFlavor(SPF1), NotA, - NotB); - Value *NewOuter = Builder.CreateNot( - createMinMax(Builder, getInverseMinMaxFlavor(SPF2), NewInner, NotC)); - return replaceInstUsesWith(Outer, NewOuter); } return nullptr; @@ -2238,163 +2184,6 @@ static Value *foldSelectCmpXchg(SelectInst &SI) { return nullptr; } -static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X, - Value *Y, - InstCombiner::BuilderTy &Builder) { - assert(SelectPatternResult::isMinOrMax(SPF) && "Expected min/max pattern"); - bool IsUnsigned = SPF == SelectPatternFlavor::SPF_UMIN || - SPF == SelectPatternFlavor::SPF_UMAX; - // TODO: If InstSimplify could fold all cases where C2 <= C1, we could change - // the constant value check to an assert. - Value *A; - const APInt *C1, *C2; - if (IsUnsigned && match(X, m_NUWAdd(m_Value(A), m_APInt(C1))) && - match(Y, m_APInt(C2)) && C2->uge(*C1) && X->hasNUses(2)) { - // umin (add nuw A, C1), C2 --> add nuw (umin A, C2 - C1), C1 - // umax (add nuw A, C1), C2 --> add nuw (umax A, C2 - C1), C1 - Value *NewMinMax = createMinMax(Builder, SPF, A, - ConstantInt::get(X->getType(), *C2 - *C1)); - return BinaryOperator::CreateNUW(BinaryOperator::Add, NewMinMax, - ConstantInt::get(X->getType(), *C1)); - } - - if (!IsUnsigned && match(X, m_NSWAdd(m_Value(A), m_APInt(C1))) && - match(Y, m_APInt(C2)) && X->hasNUses(2)) { - bool Overflow; - APInt Diff = C2->ssub_ov(*C1, Overflow); - if (!Overflow) { - // smin (add nsw A, C1), C2 --> add nsw (smin A, C2 - C1), C1 - // smax (add nsw A, C1), C2 --> add nsw (smax A, C2 - C1), C1 - Value *NewMinMax = createMinMax(Builder, SPF, A, - ConstantInt::get(X->getType(), Diff)); - return BinaryOperator::CreateNSW(BinaryOperator::Add, NewMinMax, - ConstantInt::get(X->getType(), *C1)); - } - } - - return nullptr; -} - -/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value. -Instruction *InstCombinerImpl::matchSAddSubSat(Instruction &MinMax1) { - Type *Ty = MinMax1.getType(); - - // We are looking for a tree of: - // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B)))) - // Where the min and max could be reversed - Instruction *MinMax2; - BinaryOperator *AddSub; - const APInt *MinValue, *MaxValue; - if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) { - if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue)))) - return nullptr; - } else if (match(&MinMax1, - m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) { - if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue)))) - return nullptr; - } else - return nullptr; - - // Check that the constants clamp a saturate, and that the new type would be - // sensible to convert to. - if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1) - return nullptr; - // In what bitwidth can this be treated as saturating arithmetics? - unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1; - // FIXME: This isn't quite right for vectors, but using the scalar type is a - // good first approximation for what should be done there. - if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth)) - return nullptr; - - // Also make sure that the number of uses is as expected. The 3 is for the - // the two items of the compare and the select, or 2 from a min/max. - unsigned ExpUses = isa(MinMax1) ? 2 : 3; - if (MinMax2->hasNUsesOrMore(ExpUses) || AddSub->hasNUsesOrMore(ExpUses)) - return nullptr; - - // Create the new type (which can be a vector type) - Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth); - - Intrinsic::ID IntrinsicID; - if (AddSub->getOpcode() == Instruction::Add) - IntrinsicID = Intrinsic::sadd_sat; - else if (AddSub->getOpcode() == Instruction::Sub) - IntrinsicID = Intrinsic::ssub_sat; - else - return nullptr; - - // The two operands of the add/sub must be nsw-truncatable to the NewTy. This - // is usually achieved via a sext from a smaller type. - if (ComputeMaxSignificantBits(AddSub->getOperand(0), 0, AddSub) > - NewBitWidth || - ComputeMaxSignificantBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth) - return nullptr; - - // Finally create and return the sat intrinsic, truncated to the new type - Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy); - Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy); - Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy); - Value *Sat = Builder.CreateCall(F, {AT, BT}); - return CastInst::Create(Instruction::SExt, Sat, Ty); -} - -/// Reduce a sequence of min/max with a common operand. -static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS, - Value *RHS, - InstCombiner::BuilderTy &Builder) { - assert(SelectPatternResult::isMinOrMax(SPF) && "Expected a min/max"); - // TODO: Allow FP min/max with nnan/nsz. - if (!LHS->getType()->isIntOrIntVectorTy()) - return nullptr; - - // Match 3 of the same min/max ops. Example: umin(umin(), umin()). - Value *A, *B, *C, *D; - SelectPatternResult L = matchSelectPattern(LHS, A, B); - SelectPatternResult R = matchSelectPattern(RHS, C, D); - if (SPF != L.Flavor || L.Flavor != R.Flavor) - return nullptr; - - // Look for a common operand. The use checks are different than usual because - // a min/max pattern typically has 2 uses of each op: 1 by the cmp and 1 by - // the select. - Value *MinMaxOp = nullptr; - Value *ThirdOp = nullptr; - if (!LHS->hasNUsesOrMore(3) && RHS->hasNUsesOrMore(3)) { - // If the LHS is only used in this chain and the RHS is used outside of it, - // reuse the RHS min/max because that will eliminate the LHS. - if (D == A || C == A) { - // min(min(a, b), min(c, a)) --> min(min(c, a), b) - // min(min(a, b), min(a, d)) --> min(min(a, d), b) - MinMaxOp = RHS; - ThirdOp = B; - } else if (D == B || C == B) { - // min(min(a, b), min(c, b)) --> min(min(c, b), a) - // min(min(a, b), min(b, d)) --> min(min(b, d), a) - MinMaxOp = RHS; - ThirdOp = A; - } - } else if (!RHS->hasNUsesOrMore(3)) { - // Reuse the LHS. This will eliminate the RHS. - if (D == A || D == B) { - // min(min(a, b), min(c, a)) --> min(min(a, b), c) - // min(min(a, b), min(c, b)) --> min(min(a, b), c) - MinMaxOp = LHS; - ThirdOp = C; - } else if (C == A || C == B) { - // min(min(a, b), min(b, d)) --> min(min(a, b), d) - // min(min(a, b), min(c, b)) --> min(min(a, b), d) - MinMaxOp = LHS; - ThirdOp = D; - } - } - if (!MinMaxOp || !ThirdOp) - return nullptr; - - CmpInst::Predicate P = getMinMaxPred(SPF); - Value *CmpABC = Builder.CreateICmp(P, MinMaxOp, ThirdOp); - return SelectInst::Create(CmpABC, MinMaxOp, ThirdOp); -} - /// Try to reduce a funnel/rotate pattern that includes a compare and select /// into a funnel shift intrinsic. Example: /// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b))) @@ -2484,7 +2273,8 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel, // Match select ?, TC, FC where the constants are equal but negated. // TODO: Generalize to handle a negated variable operand? const APFloat *TC, *FC; - if (!match(TVal, m_APFloat(TC)) || !match(FVal, m_APFloat(FC)) || + if (!match(TVal, m_APFloatAllowUndef(TC)) || + !match(FVal, m_APFloatAllowUndef(FC)) || !abs(*TC).bitwiseIsEqual(abs(*FC))) return nullptr; @@ -2504,17 +2294,16 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel, // (bitcast X) < 0 ? TC : -TC --> copysign(TC, -X) // (bitcast X) >= 0 ? -TC : TC --> copysign(TC, -X) // (bitcast X) >= 0 ? TC : -TC --> copysign(TC, X) + // Note: FMF from the select can not be propagated to the new instructions. if (IsTrueIfSignSet ^ TC->isNegative()) - X = Builder.CreateFNegFMF(X, &Sel); + X = Builder.CreateFNeg(X); // Canonicalize the magnitude argument as the positive constant since we do // not care about its sign. - Value *MagArg = TC->isNegative() ? FVal : TVal; + Value *MagArg = ConstantFP::get(SelType, abs(*TC)); Function *F = Intrinsic::getDeclaration(Sel.getModule(), Intrinsic::copysign, Sel.getType()); - Instruction *CopySign = CallInst::Create(F, { MagArg, X }); - CopySign->setFastMathFlags(Sel.getFastMathFlags()); - return CopySign; + return CallInst::Create(F, { MagArg, X }); } Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) { @@ -2715,29 +2504,144 @@ Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op, } } -Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { +// Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need +// fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. +static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI, + InstCombinerImpl &IC) { Value *CondVal = SI.getCondition(); - Value *TrueVal = SI.getTrueValue(); - Value *FalseVal = SI.getFalseValue(); - Type *SelType = SI.getType(); - // FIXME: Remove this workaround when freeze related patches are done. - // For select with undef operand which feeds into an equality comparison, - // don't simplify it so loop unswitch can know the equality comparison - // may have an undef operand. This is a workaround for PR31652 caused by - // descrepancy about branch on undef between LoopUnswitch and GVN. - if (match(TrueVal, m_Undef()) || match(FalseVal, m_Undef())) { - if (llvm::any_of(SI.users(), [&](User *U) { - ICmpInst *CI = dyn_cast(U); - if (CI && CI->isEquality()) - return true; - return false; - })) { + for (bool Swap : {false, true}) { + Value *TrueVal = SI.getTrueValue(); + Value *X = SI.getFalseValue(); + CmpInst::Predicate Pred; + + if (Swap) + std::swap(TrueVal, X); + + if (!match(CondVal, m_FCmp(Pred, m_Specific(X), m_AnyZeroFP()))) + continue; + + // fold (X <= +/-0.0) ? (0.0 - X) : X to fabs(X), when 'Swap' is false + // fold (X > +/-0.0) ? X : (0.0 - X) to fabs(X), when 'Swap' is true + if (match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X)))) { + if (!Swap && (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) { + Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI); + return IC.replaceInstUsesWith(SI, Fabs); + } + if (Swap && (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) { + Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI); + return IC.replaceInstUsesWith(SI, Fabs); + } + } + + // With nsz, when 'Swap' is false: + // fold (X < +/-0.0) ? -X : X or (X <= +/-0.0) ? -X : X to fabs(X) + // fold (X > +/-0.0) ? -X : X or (X >= +/-0.0) ? -X : X to -fabs(x) + // when 'Swap' is true: + // fold (X > +/-0.0) ? X : -X or (X >= +/-0.0) ? X : -X to fabs(X) + // fold (X < +/-0.0) ? X : -X or (X <= +/-0.0) ? X : -X to -fabs(X) + if (!match(TrueVal, m_FNeg(m_Specific(X))) || !SI.hasNoSignedZeros()) return nullptr; + + if (Swap) + Pred = FCmpInst::getSwappedPredicate(Pred); + + bool IsLTOrLE = Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE || + Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE; + bool IsGTOrGE = Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE || + Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE; + + if (IsLTOrLE) { + Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI); + return IC.replaceInstUsesWith(SI, Fabs); + } + if (IsGTOrGE) { + Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI); + Instruction *NewFNeg = UnaryOperator::CreateFNeg(Fabs); + NewFNeg->setFastMathFlags(SI.getFastMathFlags()); + return NewFNeg; } } - if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, + return nullptr; +} + +// Match the following IR pattern: +// %x.lowbits = and i8 %x, %lowbitmask +// %x.lowbits.are.zero = icmp eq i8 %x.lowbits, 0 +// %x.biased = add i8 %x, %bias +// %x.biased.highbits = and i8 %x.biased, %highbitmask +// %x.roundedup = select i1 %x.lowbits.are.zero, i8 %x, i8 %x.biased.highbits +// Define: +// %alignment = add i8 %lowbitmask, 1 +// Iff 1. an %alignment is a power-of-two (aka, %lowbitmask is a low bit mask) +// and 2. %bias is equal to either %lowbitmask or %alignment, +// and 3. %highbitmask is equal to ~%lowbitmask (aka, to -%alignment) +// then this pattern can be transformed into: +// %x.offset = add i8 %x, %lowbitmask +// %x.roundedup = and i8 %x.offset, %highbitmask +static Value * +foldRoundUpIntegerWithPow2Alignment(SelectInst &SI, + InstCombiner::BuilderTy &Builder) { + Value *Cond = SI.getCondition(); + Value *X = SI.getTrueValue(); + Value *XBiasedHighBits = SI.getFalseValue(); + + ICmpInst::Predicate Pred; + Value *XLowBits; + if (!match(Cond, m_ICmp(Pred, m_Value(XLowBits), m_ZeroInt())) || + !ICmpInst::isEquality(Pred)) + return nullptr; + + if (Pred == ICmpInst::Predicate::ICMP_NE) + std::swap(X, XBiasedHighBits); + + // FIXME: we could support non non-splats here. + + const APInt *LowBitMaskCst; + if (!match(XLowBits, m_And(m_Specific(X), m_APIntAllowUndef(LowBitMaskCst)))) + return nullptr; + + const APInt *BiasCst, *HighBitMaskCst; + if (!match(XBiasedHighBits, + m_And(m_Add(m_Specific(X), m_APIntAllowUndef(BiasCst)), + m_APIntAllowUndef(HighBitMaskCst)))) + return nullptr; + + if (!LowBitMaskCst->isMask()) + return nullptr; + + APInt InvertedLowBitMaskCst = ~*LowBitMaskCst; + if (InvertedLowBitMaskCst != *HighBitMaskCst) + return nullptr; + + APInt AlignmentCst = *LowBitMaskCst + 1; + + if (*BiasCst != AlignmentCst && *BiasCst != *LowBitMaskCst) + return nullptr; + + if (!XBiasedHighBits->hasOneUse()) { + if (*BiasCst == *LowBitMaskCst) + return XBiasedHighBits; + return nullptr; + } + + // FIXME: could we preserve undef's here? + Type *Ty = X->getType(); + Value *XOffset = Builder.CreateAdd(X, ConstantInt::get(Ty, *LowBitMaskCst), + X->getName() + ".biased"); + Value *R = Builder.CreateAnd(XOffset, ConstantInt::get(Ty, *HighBitMaskCst)); + R->takeName(&SI); + return R; +} + +Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { + Value *CondVal = SI.getCondition(); + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + Type *SelType = SI.getType(); + + if (Value *V = simplifySelectInst(CondVal, TrueVal, FalseVal, SQ.getWithInstruction(&SI))) return replaceInstUsesWith(SI, V); @@ -2747,8 +2651,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Instruction *I = canonicalizeScalarSelectOfVecs(SI, *this)) return I; - CmpInst::Predicate Pred; - // Avoid potential infinite loops by checking for non-constant condition. // TODO: Can we assert instead by improving canonicalizeSelectToShuffle()? // Scalar select must have simplified? @@ -2757,13 +2659,29 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { // Folding select to and/or i1 isn't poison safe in general. impliesPoison // checks whether folding it does not convert a well-defined value into // poison. - if (match(TrueVal, m_One()) && impliesPoison(FalseVal, CondVal)) { - // Change: A = select B, true, C --> A = or B, C - return BinaryOperator::CreateOr(CondVal, FalseVal); + if (match(TrueVal, m_One())) { + if (impliesPoison(FalseVal, CondVal)) { + // Change: A = select B, true, C --> A = or B, C + return BinaryOperator::CreateOr(CondVal, FalseVal); + } + + if (auto *LHS = dyn_cast(CondVal)) + if (auto *RHS = dyn_cast(FalseVal)) + if (Value *V = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ false, + /*IsSelectLogical*/ true)) + return replaceInstUsesWith(SI, V); } - if (match(FalseVal, m_Zero()) && impliesPoison(TrueVal, CondVal)) { - // Change: A = select B, C, false --> A = and B, C - return BinaryOperator::CreateAnd(CondVal, TrueVal); + if (match(FalseVal, m_Zero())) { + if (impliesPoison(TrueVal, CondVal)) { + // Change: A = select B, C, false --> A = and B, C + return BinaryOperator::CreateAnd(CondVal, TrueVal); + } + + if (auto *LHS = dyn_cast(CondVal)) + if (auto *RHS = dyn_cast(TrueVal)) + if (Value *V = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ true, + /*IsSelectLogical*/ true)) + return replaceInstUsesWith(SI, V); } auto *One = ConstantInt::getTrue(SelType); @@ -2821,6 +2739,20 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { match(TrueVal, m_Specific(B)) && match(FalseVal, m_Zero())) return replaceOperand(SI, 0, A); + Value *C; + // select (~a | c), a, b -> and a, (or c, freeze(b)) + if (match(CondVal, m_c_Or(m_Not(m_Specific(TrueVal)), m_Value(C))) && + CondVal->hasOneUse()) { + FalseVal = Builder.CreateFreeze(FalseVal); + return BinaryOperator::CreateAnd(TrueVal, Builder.CreateOr(C, FalseVal)); + } + // select (~c & b), a, b -> and b, (or freeze(a), c) + if (match(CondVal, m_c_And(m_Not(m_Value(C)), m_Specific(FalseVal))) && + CondVal->hasOneUse()) { + TrueVal = Builder.CreateFreeze(TrueVal); + return BinaryOperator::CreateAnd(FalseVal, Builder.CreateOr(C, TrueVal)); + } + if (!SelType->isVectorTy()) { if (Value *S = simplifyWithOpReplaced(TrueVal, CondVal, One, SQ, /* AllowRefinement */ true)) @@ -2846,16 +2778,11 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { /* IsAnd */ IsAnd)) return I; - if (auto *ICmp0 = dyn_cast(CondVal)) { - if (auto *ICmp1 = dyn_cast(Op1)) { - if (auto *V = foldAndOrOfICmpsOfAndWithPow2(ICmp0, ICmp1, &SI, IsAnd, - /* IsLogical */ true)) + if (auto *ICmp0 = dyn_cast(CondVal)) + if (auto *ICmp1 = dyn_cast(Op1)) + if (auto *V = foldAndOrOfICmps(ICmp0, ICmp1, SI, IsAnd, + /* IsLogical */ true)) return replaceInstUsesWith(SI, V); - - if (auto *V = foldEqOfParts(ICmp0, ICmp1, IsAnd)) - return replaceInstUsesWith(SI, V); - } - } } // select (select a, true, b), c, false -> select a, c, false @@ -2959,42 +2886,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { } } - // Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need - // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. - // (X <= +/-0.0) ? (0.0 - X) : X --> fabs(X) - if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) && - match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(FalseVal))) && - (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) { - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, &SI); - return replaceInstUsesWith(SI, Fabs); - } - // (X > +/-0.0) ? X : (0.0 - X) --> fabs(X) - if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) && - match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(TrueVal))) && - (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) { - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, &SI); - return replaceInstUsesWith(SI, Fabs); - } - // With nnan and nsz: - // (X < +/-0.0) ? -X : X --> fabs(X) - // (X <= +/-0.0) ? -X : X --> fabs(X) - if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) && - match(TrueVal, m_FNeg(m_Specific(FalseVal))) && SI.hasNoSignedZeros() && - (Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE || - Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE)) { - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, &SI); - return replaceInstUsesWith(SI, Fabs); - } - // With nnan and nsz: - // (X > +/-0.0) ? X : -X --> fabs(X) - // (X >= +/-0.0) ? X : -X --> fabs(X) - if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) && - match(FalseVal, m_FNeg(m_Specific(TrueVal))) && SI.hasNoSignedZeros() && - (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE || - Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE)) { - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, &SI); - return replaceInstUsesWith(SI, Fabs); - } + // Fold selecting to fabs. + if (Instruction *Fabs = foldSelectWithFCmpToFabs(SI, *this)) + return Fabs; // See if we are selecting two values based on a comparison of the two values. if (ICmpInst *ICI = dyn_cast(CondVal)) @@ -3066,8 +2960,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Instruction *R = foldSPFofSPF(cast(RHS), SPF2, LHS2, RHS2, SI, SPF, LHS)) return R; - // TODO. - // ABS(-X) -> ABS(X) } if (SelectPatternResult::isMinOrMax(SPF)) { @@ -3102,46 +2994,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { Value *NewCast = Builder.CreateCast(CastOp, NewSI, SelType); return replaceInstUsesWith(SI, NewCast); } - - // MAX(~a, ~b) -> ~MIN(a, b) - // MAX(~a, C) -> ~MIN(a, ~C) - // MIN(~a, ~b) -> ~MAX(a, b) - // MIN(~a, C) -> ~MAX(a, ~C) - auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * { - Value *A; - if (match(X, m_Not(m_Value(A))) && !X->hasNUsesOrMore(3) && - !isFreeToInvert(A, A->hasOneUse()) && - // Passing false to only consider m_Not and constants. - isFreeToInvert(Y, false)) { - Value *B = Builder.CreateNot(Y); - Value *NewMinMax = createMinMax(Builder, getInverseMinMaxFlavor(SPF), - A, B); - // Copy the profile metadata. - if (MDNode *MD = SI.getMetadata(LLVMContext::MD_prof)) { - cast(NewMinMax)->setMetadata(LLVMContext::MD_prof, MD); - // Swap the metadata if the operands are swapped. - if (X == SI.getFalseValue() && Y == SI.getTrueValue()) - cast(NewMinMax)->swapProfMetadata(); - } - - return BinaryOperator::CreateNot(NewMinMax); - } - - return nullptr; - }; - - if (Instruction *I = moveNotAfterMinMax(LHS, RHS)) - return I; - if (Instruction *I = moveNotAfterMinMax(RHS, LHS)) - return I; - - if (Instruction *I = moveAddAfterMinMax(SPF, LHS, RHS, Builder)) - return I; - - if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder)) - return I; - if (Instruction *I = matchSAddSubSat(SI)) - return I; } } @@ -3307,35 +3159,42 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Value *Fr = foldSelectWithFrozenICmp(SI, Builder)) return replaceInstUsesWith(SI, Fr); + if (Value *V = foldRoundUpIntegerWithPow2Alignment(SI, Builder)) + return replaceInstUsesWith(SI, V); + // select(mask, mload(,,mask,0), 0) -> mload(,,mask,0) // Load inst is intentionally not checked for hasOneUse() if (match(FalseVal, m_Zero()) && - match(TrueVal, m_MaskedLoad(m_Value(), m_Value(), m_Specific(CondVal), - m_CombineOr(m_Undef(), m_Zero())))) { - auto *MaskedLoad = cast(TrueVal); - if (isa(MaskedLoad->getArgOperand(3))) - MaskedLoad->setArgOperand(3, FalseVal /* Zero */); - return replaceInstUsesWith(SI, MaskedLoad); + (match(TrueVal, m_MaskedLoad(m_Value(), m_Value(), m_Specific(CondVal), + m_CombineOr(m_Undef(), m_Zero()))) || + match(TrueVal, m_MaskedGather(m_Value(), m_Value(), m_Specific(CondVal), + m_CombineOr(m_Undef(), m_Zero()))))) { + auto *MaskedInst = cast(TrueVal); + if (isa(MaskedInst->getArgOperand(3))) + MaskedInst->setArgOperand(3, FalseVal /* Zero */); + return replaceInstUsesWith(SI, MaskedInst); } Value *Mask; if (match(TrueVal, m_Zero()) && - match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask), - m_CombineOr(m_Undef(), m_Zero()))) && + (match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask), + m_CombineOr(m_Undef(), m_Zero()))) || + match(FalseVal, m_MaskedGather(m_Value(), m_Value(), m_Value(Mask), + m_CombineOr(m_Undef(), m_Zero())))) && (CondVal->getType() == Mask->getType())) { // We can remove the select by ensuring the load zeros all lanes the // select would have. We determine this by proving there is no overlap // between the load and select masks. // (i.e (load_mask & select_mask) == 0 == no overlap) bool CanMergeSelectIntoLoad = false; - if (Value *V = SimplifyAndInst(CondVal, Mask, SQ.getWithInstruction(&SI))) + if (Value *V = simplifyAndInst(CondVal, Mask, SQ.getWithInstruction(&SI))) CanMergeSelectIntoLoad = match(V, m_Zero()); if (CanMergeSelectIntoLoad) { - auto *MaskedLoad = cast(FalseVal); - if (isa(MaskedLoad->getArgOperand(3))) - MaskedLoad->setArgOperand(3, TrueVal /* Zero */); - return replaceInstUsesWith(SI, MaskedLoad); + auto *MaskedInst = cast(FalseVal); + if (isa(MaskedInst->getArgOperand(3))) + MaskedInst->setArgOperand(3, TrueVal /* Zero */); + return replaceInstUsesWith(SI, MaskedInst); } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 17f0c5c4cff0..f4e2d1239f0f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" -#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" @@ -108,7 +107,7 @@ Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts( // Can we fold (ShAmt0+ShAmt1) ? auto *NewShAmt = dyn_cast_or_null( - SimplifyAddInst(ShAmt0, ShAmt1, /*isNSW=*/false, /*isNUW=*/false, + simplifyAddInst(ShAmt0, ShAmt1, /*isNSW=*/false, /*isNUW=*/false, SQ.getWithInstruction(Sh0))); if (!NewShAmt) return nullptr; // Did not simplify. @@ -232,7 +231,7 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, return nullptr; // Can we simplify (MaskShAmt+ShiftShAmt) ? - auto *SumOfShAmts = dyn_cast_or_null(SimplifyAddInst( + auto *SumOfShAmts = dyn_cast_or_null(simplifyAddInst( MaskShAmt, ShiftShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q)); if (!SumOfShAmts) return nullptr; // Did not simplify. @@ -264,7 +263,7 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, return nullptr; // Can we simplify (ShiftShAmt-MaskShAmt) ? - auto *ShAmtsDiff = dyn_cast_or_null(SimplifySubInst( + auto *ShAmtsDiff = dyn_cast_or_null(simplifySubInst( ShiftShAmt, MaskShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q)); if (!ShAmtsDiff) return nullptr; // Did not simplify. @@ -374,11 +373,12 @@ Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); assert(Op0->getType() == Op1->getType()); + Type *Ty = I.getType(); // If the shift amount is a one-use `sext`, we can demote it to `zext`. Value *Y; if (match(Op1, m_OneUse(m_SExt(m_Value(Y))))) { - Value *NewExt = Builder.CreateZExt(Y, I.getType(), Op1->getName()); + Value *NewExt = Builder.CreateZExt(Y, Ty, Op1->getName()); return BinaryOperator::Create(I.getOpcode(), Op0, NewExt); } @@ -400,15 +400,56 @@ Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) { reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ))) return NewShift; - // (C1 shift (A add C2)) -> (C1 shift C2) shift A) - // iff A and C2 are both positive. + // Pre-shift a constant shifted by a variable amount with constant offset: + // C shift (A add nuw C1) --> (C shift C1) shift A Value *A; - Constant *C; - if (match(Op0, m_Constant()) && match(Op1, m_Add(m_Value(A), m_Constant(C)))) - if (isKnownNonNegative(A, DL, 0, &AC, &I, &DT) && - isKnownNonNegative(C, DL, 0, &AC, &I, &DT)) - return BinaryOperator::Create( - I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), Op0, C), A); + Constant *C, *C1; + if (match(Op0, m_Constant(C)) && + match(Op1, m_NUWAdd(m_Value(A), m_Constant(C1)))) { + Value *NewC = Builder.CreateBinOp(I.getOpcode(), C, C1); + return BinaryOperator::Create(I.getOpcode(), NewC, A); + } + + unsigned BitWidth = Ty->getScalarSizeInBits(); + + const APInt *AC, *AddC; + // Try to pre-shift a constant shifted by a variable amount added with a + // negative number: + // C << (X - AddC) --> (C >> AddC) << X + // and + // C >> (X - AddC) --> (C << AddC) >> X + if (match(Op0, m_APInt(AC)) && match(Op1, m_Add(m_Value(A), m_APInt(AddC))) && + AddC->isNegative() && (-*AddC).ult(BitWidth)) { + assert(!AC->isZero() && "Expected simplify of shifted zero"); + unsigned PosOffset = (-*AddC).getZExtValue(); + + auto isSuitableForPreShift = [PosOffset, &I, AC]() { + switch (I.getOpcode()) { + default: + return false; + case Instruction::Shl: + return (I.hasNoSignedWrap() || I.hasNoUnsignedWrap()) && + AC->eq(AC->lshr(PosOffset).shl(PosOffset)); + case Instruction::LShr: + return I.isExact() && AC->eq(AC->shl(PosOffset).lshr(PosOffset)); + case Instruction::AShr: + return I.isExact() && AC->eq(AC->shl(PosOffset).ashr(PosOffset)); + } + }; + if (isSuitableForPreShift()) { + Constant *NewC = ConstantInt::get(Ty, I.getOpcode() == Instruction::Shl + ? AC->lshr(PosOffset) + : AC->shl(PosOffset)); + BinaryOperator *NewShiftOp = + BinaryOperator::Create(I.getOpcode(), NewC, A); + if (I.getOpcode() == Instruction::Shl) { + NewShiftOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + } else { + NewShiftOp->setIsExact(); + } + return NewShiftOp; + } + } // X shift (A srem C) -> X shift (A and (C - 1)) iff C is a power of 2. // Because shifts by negative values (which could occur if A were negative) @@ -417,7 +458,7 @@ Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) { match(C, m_Power2())) { // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't // demand the sign bit (and many others) here?? - Constant *Mask = ConstantExpr::getSub(C, ConstantInt::get(I.getType(), 1)); + Constant *Mask = ConstantExpr::getSub(C, ConstantInt::get(Ty, 1)); Value *Rem = Builder.CreateAnd(A, Mask, Op1->getName()); return replaceOperand(I, 1, Rem); } @@ -661,10 +702,18 @@ static bool canShiftBinOpWithConstantRHS(BinaryOperator &Shift, } } -Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, +Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *C1, BinaryOperator &I) { + // (C2 << X) << C1 --> (C2 << C1) << X + // (C2 >> X) >> C1 --> (C2 >> C1) >> X + Constant *C2; + Value *X; + if (match(Op0, m_BinOp(I.getOpcode(), m_Constant(C2), m_Value(X)))) + return BinaryOperator::Create( + I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), C2, C1), X); + const APInt *Op1C; - if (!match(Op1, m_APInt(Op1C))) + if (!match(C1, m_APInt(Op1C))) return nullptr; // See if we can propagate this shift into the input, this covers the trivial @@ -701,11 +750,11 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, const APInt *Op0C; if (match(Op0BO->getOperand(1), m_APInt(Op0C))) { if (canShiftBinOpWithConstantRHS(I, Op0BO)) { - Constant *NewRHS = ConstantExpr::get( - I.getOpcode(), cast(Op0BO->getOperand(1)), Op1); + Value *NewRHS = + Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(1), C1); Value *NewShift = - Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); + Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), C1); NewShift->takeName(Op0BO); return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, NewRHS); @@ -730,10 +779,10 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, if (!isa(FalseVal) && TBO->getOperand(0) == FalseVal && match(TBO->getOperand(1), m_APInt(C)) && canShiftBinOpWithConstantRHS(I, TBO)) { - Constant *NewRHS = ConstantExpr::get( - I.getOpcode(), cast(TBO->getOperand(1)), Op1); + Value *NewRHS = + Builder.CreateBinOp(I.getOpcode(), TBO->getOperand(1), C1); - Value *NewShift = Builder.CreateBinOp(I.getOpcode(), FalseVal, Op1); + Value *NewShift = Builder.CreateBinOp(I.getOpcode(), FalseVal, C1); Value *NewOp = Builder.CreateBinOp(TBO->getOpcode(), NewShift, NewRHS); return SelectInst::Create(Cond, NewOp, NewShift); } @@ -747,10 +796,10 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, if (!isa(TrueVal) && FBO->getOperand(0) == TrueVal && match(FBO->getOperand(1), m_APInt(C)) && canShiftBinOpWithConstantRHS(I, FBO)) { - Constant *NewRHS = ConstantExpr::get( - I.getOpcode(), cast(FBO->getOperand(1)), Op1); + Value *NewRHS = + Builder.CreateBinOp(I.getOpcode(), FBO->getOperand(1), C1); - Value *NewShift = Builder.CreateBinOp(I.getOpcode(), TrueVal, Op1); + Value *NewShift = Builder.CreateBinOp(I.getOpcode(), TrueVal, C1); Value *NewOp = Builder.CreateBinOp(FBO->getOpcode(), NewShift, NewRHS); return SelectInst::Create(Cond, NewShift, NewOp); } @@ -762,7 +811,7 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { const SimplifyQuery Q = SQ.getWithInstruction(&I); - if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1), + if (Value *V = simplifyShlInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), Q)) return replaceInstUsesWith(I, V); @@ -968,10 +1017,6 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { if (match(Op1, m_Constant(C1))) { Constant *C2; Value *X; - // (C2 << X) << C1 --> (C2 << C1) << X - if (match(Op0, m_OneUse(m_Shl(m_Constant(C2), m_Value(X))))) - return BinaryOperator::CreateShl(ConstantExpr::getShl(C2, C1), X); - // (X * C2) << C1 --> X * (C2 << C1) if (match(Op0, m_Mul(m_Value(X), m_Constant(C2)))) return BinaryOperator::CreateMul(X, ConstantExpr::getShl(C2, C1)); @@ -993,7 +1038,7 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { } Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { - if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), + if (Value *V = simplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1164,15 +1209,54 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { } } - // Look for a "splat" mul pattern - it replicates bits across each half of - // a value, so a right shift is just a mask of the low bits: - // lshr i32 (mul nuw X, Pow2+1), 16 --> and X, Pow2-1 - // TODO: Generalize to allow more than just half-width shifts? const APInt *MulC; - if (match(Op0, m_NUWMul(m_Value(X), m_APInt(MulC))) && - ShAmtC * 2 == BitWidth && (*MulC - 1).isPowerOf2() && - MulC->logBase2() == ShAmtC) - return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2)); + if (match(Op0, m_NUWMul(m_Value(X), m_APInt(MulC)))) { + // Look for a "splat" mul pattern - it replicates bits across each half of + // a value, so a right shift is just a mask of the low bits: + // lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1 + // TODO: Generalize to allow more than just half-width shifts? + if (BitWidth > 2 && ShAmtC * 2 == BitWidth && (*MulC - 1).isPowerOf2() && + MulC->logBase2() == ShAmtC) + return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2)); + + // The one-use check is not strictly necessary, but codegen may not be + // able to invert the transform and perf may suffer with an extra mul + // instruction. + if (Op0->hasOneUse()) { + APInt NewMulC = MulC->lshr(ShAmtC); + // if c is divisible by (1 << ShAmtC): + // lshr (mul nuw x, MulC), ShAmtC -> mul nuw x, (MulC >> ShAmtC) + if (MulC->eq(NewMulC.shl(ShAmtC))) { + auto *NewMul = + BinaryOperator::CreateNUWMul(X, ConstantInt::get(Ty, NewMulC)); + BinaryOperator *OrigMul = cast(Op0); + NewMul->setHasNoSignedWrap(OrigMul->hasNoSignedWrap()); + return NewMul; + } + } + } + + // Try to narrow bswap. + // In the case where the shift amount equals the bitwidth difference, the + // shift is eliminated. + if (match(Op0, m_OneUse(m_Intrinsic( + m_OneUse(m_ZExt(m_Value(X))))))) { + unsigned SrcWidth = X->getType()->getScalarSizeInBits(); + unsigned WidthDiff = BitWidth - SrcWidth; + if (SrcWidth % 16 == 0) { + Value *NarrowSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X); + if (ShAmtC >= WidthDiff) { + // (bswap (zext X)) >> C --> zext (bswap X >> C') + Value *NewShift = Builder.CreateLShr(NarrowSwap, ShAmtC - WidthDiff); + return new ZExtInst(NewShift, Ty); + } else { + // (bswap (zext X)) >> C --> (zext (bswap X)) << C' + Value *NewZExt = Builder.CreateZExt(NarrowSwap, Ty); + Constant *ShiftDiff = ConstantInt::get(Ty, WidthDiff - ShAmtC); + return BinaryOperator::CreateShl(NewZExt, ShiftDiff); + } + } + } // If the shifted-out value is known-zero, then this is an exact shift. if (!I.isExact() && @@ -1263,7 +1347,7 @@ InstCombinerImpl::foldVariableSignZeroExtensionOfVariableHighBitExtract( } Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) { - if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), + if (Value *V = simplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 3f064cfda712..9d4c01ac03e2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" @@ -154,6 +154,29 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (Depth == 0 && !V->hasOneUse()) DemandedMask.setAllBits(); + // If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care + // about the high bits of the operands. + auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) { + unsigned NLZ = DemandedMask.countLeadingZeros(); + // Right fill the mask of bits for the operands to demand the most + // significant bit and all those below it. + DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ); + if (ShrinkDemandedConstant(I, 0, DemandedFromOps) || + SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) || + ShrinkDemandedConstant(I, 1, DemandedFromOps) || + SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) { + if (NLZ > 0) { + // Disable the nsw and nuw flags here: We can no longer guarantee that + // we won't wrap after simplification. Removing the nsw/nuw flags is + // legal here because the top bit is not demanded. + I->setHasNoSignedWrap(false); + I->setHasNoUnsignedWrap(false); + } + return true; + } + return false; + }; + switch (I->getOpcode()) { default: computeKnownBits(I, Known, Depth, CxtI); @@ -297,13 +320,11 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) { APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask); - Constant *AndC = - ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); + Constant *AndC = ConstantInt::get(VTy, NewMask & AndRHS->getValue()); Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC); InsertNewInstWith(NewAnd, *I); - Constant *XorC = - ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); + Constant *XorC = ConstantInt::get(VTy, NewMask & XorRHS->getValue()); Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC); return InsertNewInstWith(NewXor, *I); } @@ -311,33 +332,6 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; } case Instruction::Select: { - Value *LHS, *RHS; - SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; - if (SPF == SPF_UMAX) { - // UMax(A, C) == A if ... - // The lowest non-zero bit of DemandMask is higher than the highest - // non-zero bit of C. - const APInt *C; - unsigned CTZ = DemandedMask.countTrailingZeros(); - if (match(RHS, m_APInt(C)) && CTZ >= C->getActiveBits()) - return LHS; - } else if (SPF == SPF_UMIN) { - // UMin(A, C) == A if ... - // The lowest non-zero bit of DemandMask is higher than the highest - // non-one bit of C. - // This comes from using DeMorgans on the above umax example. - const APInt *C; - unsigned CTZ = DemandedMask.countTrailingZeros(); - if (match(RHS, m_APInt(C)) && - CTZ >= C->getBitWidth() - C->countLeadingOnes()) - return LHS; - } - - // If this is a select as part of any other min/max pattern, don't simplify - // any further in case we break the structure. - if (SPF != SPF_UNKNOWN) - return nullptr; - if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) || SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1)) return I; @@ -393,12 +387,12 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (match(I->getOperand(0), m_OneUse(m_LShr(m_Value(X), m_APInt(C))))) { // The shift amount must be valid (not poison) in the narrow type, and // it must not be greater than the high bits demanded of the result. - if (C->ult(I->getType()->getScalarSizeInBits()) && + if (C->ult(VTy->getScalarSizeInBits()) && C->ule(DemandedMask.countLeadingZeros())) { // trunc (lshr X, C) --> lshr (trunc X), C IRBuilderBase::InsertPointGuard Guard(Builder); Builder.SetInsertPoint(I); - Value *Trunc = Builder.CreateTrunc(X, I->getType()); + Value *Trunc = Builder.CreateTrunc(X, VTy); return Builder.CreateLShr(Trunc, C->getZExtValue()); } } @@ -420,9 +414,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) return nullptr; // vector->int or fp->int? - if (VectorType *DstVTy = dyn_cast(I->getType())) { - if (VectorType *SrcVTy = - dyn_cast(I->getOperand(0)->getType())) { + if (auto *DstVTy = dyn_cast(VTy)) { + if (auto *SrcVTy = dyn_cast(I->getOperand(0)->getType())) { if (cast(DstVTy)->getNumElements() != cast(SrcVTy)->getNumElements()) // Don't touch a bitcast between vectors of different element counts. @@ -507,26 +500,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } LLVM_FALLTHROUGH; case Instruction::Sub: { - /// If the high-bits of an ADD/SUB are not demanded, then we do not care - /// about the high bits of the operands. - unsigned NLZ = DemandedMask.countLeadingZeros(); - // Right fill the mask of bits for this ADD/SUB to demand the most - // significant bit and all those below it. - APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); - if (ShrinkDemandedConstant(I, 0, DemandedFromOps) || - SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) || - ShrinkDemandedConstant(I, 1, DemandedFromOps) || - SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) { - if (NLZ > 0) { - // Disable the nsw and nuw flags here: We can no longer guarantee that - // we won't wrap after simplification. Removing the nsw/nuw flags is - // legal here because the top bit is not demanded. - BinaryOperator &BinOP = *cast(I); - BinOP.setHasNoSignedWrap(false); - BinOP.setHasNoUnsignedWrap(false); - } + APInt DemandedFromOps; + if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps)) return I; - } // If we are known to be adding/subtracting zeros to every bit below // the highest demanded bit, we just return the other side. @@ -544,6 +520,36 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, NSW, LHSKnown, RHSKnown); break; } + case Instruction::Mul: { + APInt DemandedFromOps; + if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps)) + return I; + + if (DemandedMask.isPowerOf2()) { + // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1. + // If we demand exactly one bit N and we have "X * (C' << N)" where C' is + // odd (has LSB set), then the left-shifted low bit of X is the answer. + unsigned CTZ = DemandedMask.countTrailingZeros(); + const APInt *C; + if (match(I->getOperand(1), m_APInt(C)) && + C->countTrailingZeros() == CTZ) { + Constant *ShiftC = ConstantInt::get(VTy, CTZ); + Instruction *Shl = BinaryOperator::CreateShl(I->getOperand(0), ShiftC); + return InsertNewInstWith(Shl, *I); + } + } + // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because: + // X * X is odd iff X is odd. + // 'Quadratic Reciprocity': X * X -> 0 for bit[1] + if (I->getOperand(0) == I->getOperand(1) && DemandedMask.ult(4)) { + Constant *One = ConstantInt::get(VTy, 1); + Instruction *And1 = BinaryOperator::CreateAnd(I->getOperand(0), One); + return InsertNewInstWith(And1, *I); + } + + computeKnownBits(I, Known, Depth, CxtI); + break; + } case Instruction::Shl: { const APInt *SA; if (match(I->getOperand(1), m_APInt(SA))) { @@ -554,7 +560,26 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, DemandedMask, Known)) return R; + // TODO: If we only want bits that already match the signbit then we don't + // need to shift. + + // If we can pre-shift a right-shifted constant to the left without + // losing any high bits amd we don't demand the low bits, then eliminate + // the left-shift: + // (C >> X) << LeftShiftAmtC --> (C << RightShiftAmtC) >> X uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); + Value *X; + Constant *C; + if (DemandedMask.countTrailingZeros() >= ShiftAmt && + match(I->getOperand(0), m_LShr(m_ImmConstant(C), m_Value(X)))) { + Constant *LeftShiftAmtC = ConstantInt::get(VTy, ShiftAmt); + Constant *NewC = ConstantExpr::getShl(C, LeftShiftAmtC); + if (ConstantExpr::getLShr(NewC, LeftShiftAmtC) == C) { + Instruction *Lshr = BinaryOperator::CreateLShr(NewC, X); + return InsertNewInstWith(Lshr, *I); + } + } + APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); // If the shift is NUW/NSW, then it does demand the high bits. @@ -584,7 +609,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, else if (SignBitOne) Known.One.setSignBit(); if (Known.hasConflict()) - return UndefValue::get(I->getType()); + return UndefValue::get(VTy); } } else { // This is a variable shift, so we can't shift the demand mask by a known @@ -607,6 +632,34 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (match(I->getOperand(1), m_APInt(SA))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); + // If we are just demanding the shifted sign bit and below, then this can + // be treated as an ASHR in disguise. + if (DemandedMask.countLeadingZeros() >= ShiftAmt) { + // If we only want bits that already match the signbit then we don't + // need to shift. + unsigned NumHiDemandedBits = + BitWidth - DemandedMask.countTrailingZeros(); + unsigned SignBits = + ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI); + if (SignBits >= NumHiDemandedBits) + return I->getOperand(0); + + // If we can pre-shift a left-shifted constant to the right without + // losing any low bits (we already know we don't demand the high bits), + // then eliminate the right-shift: + // (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X + Value *X; + Constant *C; + if (match(I->getOperand(0), m_Shl(m_ImmConstant(C), m_Value(X)))) { + Constant *RightShiftAmtC = ConstantInt::get(VTy, ShiftAmt); + Constant *NewC = ConstantExpr::getLShr(C, RightShiftAmtC); + if (ConstantExpr::getShl(NewC, RightShiftAmtC) == C) { + Instruction *Shl = BinaryOperator::CreateShl(NewC, X); + return InsertNewInstWith(Shl, *I); + } + } + } + // Unsigned shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); @@ -628,6 +681,14 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; } case Instruction::AShr: { + unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI); + + // If we only want bits that already match the signbit then we don't need + // to shift. + unsigned NumHiDemandedBits = BitWidth - DemandedMask.countTrailingZeros(); + if (SignBits >= NumHiDemandedBits) + return I->getOperand(0); + // If this is an arithmetic shift right and only the low-bit is set, we can // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless @@ -639,11 +700,6 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return InsertNewInstWith(NewVal, *I); } - // If the sign bit is the only bit demanded by this ashr, then there is no - // need to do it, the shift doesn't change the high bit. - if (DemandedMask.isSignMask()) - return I->getOperand(0); - const APInt *SA; if (match(I->getOperand(1), m_APInt(SA))) { uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); @@ -663,8 +719,6 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI); - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now plus sign bits. APInt HighBits(APInt::getHighBitsSet( @@ -713,13 +767,13 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; } case Instruction::SRem: { - ConstantInt *Rem; - if (match(I->getOperand(1), m_ConstantInt(Rem))) { + const APInt *Rem; + if (match(I->getOperand(1), m_APInt(Rem))) { // X % -1 demands all the bits because we don't want to introduce // INT_MIN % -1 (== undef) by accident. - if (Rem->isMinusOne()) + if (Rem->isAllOnes()) break; - APInt RA = Rem->getValue().abs(); + APInt RA = Rem->abs(); if (RA.isPowerOf2()) { if (DemandedMask.ult(RA)) // srem won't affect demanded bits return I->getOperand(0); @@ -786,7 +840,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (DemandedMask == 1 && VTy->getScalarSizeInBits() % 2 == 0 && match(II->getArgOperand(0), m_Not(m_Value(X)))) { Function *Ctpop = Intrinsic::getDeclaration( - II->getModule(), Intrinsic::ctpop, II->getType()); + II->getModule(), Intrinsic::ctpop, VTy); return InsertNewInstWith(CallInst::Create(Ctpop, {X}), *I); } break; @@ -809,12 +863,10 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Instruction *NewVal; if (NLZ > NTZ) NewVal = BinaryOperator::CreateLShr( - II->getArgOperand(0), - ConstantInt::get(I->getType(), NLZ - NTZ)); + II->getArgOperand(0), ConstantInt::get(VTy, NLZ - NTZ)); else NewVal = BinaryOperator::CreateShl( - II->getArgOperand(0), - ConstantInt::get(I->getType(), NTZ - NLZ)); + II->getArgOperand(0), ConstantInt::get(VTy, NTZ - NLZ)); NewVal->takeName(I); return InsertNewInstWith(NewVal, *I); } @@ -872,7 +924,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Handle target specific intrinsics Optional V = targetSimplifyDemandedUseBitsIntrinsic( *II, DemandedMask, Known, KnownBitsComputed); - if (V.hasValue()) + if (V) return V.getValue(); break; } @@ -1583,7 +1635,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, Optional V = targetSimplifyDemandedVectorEltsIntrinsic( *II, DemandedElts, UndefElts, UndefElts2, UndefElts3, simplifyAndSetOp); - if (V.hasValue()) + if (V) return V.getValue(); break; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 736cf9c825d5..22659a8e4951 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -42,7 +42,6 @@ #include #define DEBUG_TYPE "instcombine" -#include "llvm/Transforms/Utils/InstructionWorklist.h" using namespace llvm; using namespace PatternMatch; @@ -378,7 +377,7 @@ ConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) { Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { Value *SrcVec = EI.getVectorOperand(); Value *Index = EI.getIndexOperand(); - if (Value *V = SimplifyExtractElementInst(SrcVec, Index, + if (Value *V = simplifyExtractElementInst(SrcVec, Index, SQ.getWithInstruction(&EI))) return replaceInstUsesWith(EI, V); @@ -879,7 +878,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( // of an aggregate. If we did, that means the CurrIVI will later be // overwritten with the already-recorded value. But if not, let's record it! Optional &Elt = AggElts[Indices.front()]; - Elt = Elt.getValueOr(InsertedValue); + Elt = Elt.value_or(InsertedValue); // FIXME: should we handle chain-terminating undef base operand? } @@ -1489,7 +1488,7 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) { Value *ScalarOp = IE.getOperand(1); Value *IdxOp = IE.getOperand(2); - if (auto *V = SimplifyInsertElementInst( + if (auto *V = simplifyInsertElementInst( VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE))) return replaceInstUsesWith(IE, V); @@ -1919,24 +1918,29 @@ static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) { Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1); Type *Ty = BO->getType(); switch (BO->getOpcode()) { - case Instruction::Shl: { - // shl X, C --> mul X, (1 << C) - Constant *C; - if (match(BO1, m_Constant(C))) { - Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C); - return { Instruction::Mul, BO0, ShlOne }; - } - break; - } - case Instruction::Or: { - // or X, C --> add X, C (when X and C have no common bits set) - const APInt *C; - if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL)) - return { Instruction::Add, BO0, BO1 }; - break; + case Instruction::Shl: { + // shl X, C --> mul X, (1 << C) + Constant *C; + if (match(BO1, m_Constant(C))) { + Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C); + return {Instruction::Mul, BO0, ShlOne}; } - default: - break; + break; + } + case Instruction::Or: { + // or X, C --> add X, C (when X and C have no common bits set) + const APInt *C; + if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL)) + return {Instruction::Add, BO0, BO1}; + break; + } + case Instruction::Sub: + // sub 0, X --> mul X, -1 + if (match(BO0, m_ZeroInt())) + return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)}; + break; + default: + break; } return {}; } @@ -2053,15 +2057,20 @@ Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) { !match(Shuf.getOperand(1), m_BinOp(B1))) return nullptr; + // If one operand is "0 - X", allow that to be viewed as "X * -1" + // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired + // with a multiply, we will exit because C0/C1 will not be set. Value *X, *Y; - Constant *C0, *C1; + Constant *C0 = nullptr, *C1 = nullptr; bool ConstantsAreOp1; - if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) && - match(B1, m_BinOp(m_Value(Y), m_Constant(C1)))) - ConstantsAreOp1 = true; - else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) && - match(B1, m_BinOp(m_Constant(C1), m_Value(Y)))) + if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) && + match(B1, m_BinOp(m_Constant(C1), m_Value(Y)))) ConstantsAreOp1 = false; + else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)), + m_Neg(m_Value(X)))) && + match(B1, m_CombineOr(m_BinOp(m_Value(Y), m_Constant(C1)), + m_Neg(m_Value(Y))))) + ConstantsAreOp1 = true; else return nullptr; @@ -2086,7 +2095,7 @@ Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) { } } - if (Opc0 != Opc1) + if (Opc0 != Opc1 || !C0 || !C1) return nullptr; // The opcodes must be the same. Use a new name to make that clear. @@ -2233,6 +2242,88 @@ static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf, return SelectInst::Create(NarrowCond, NarrowX, NarrowY); } +/// Canonicalize FP negate after shuffle. +static Instruction *foldFNegShuffle(ShuffleVectorInst &Shuf, + InstCombiner::BuilderTy &Builder) { + Instruction *FNeg0; + Value *X; + if (!match(Shuf.getOperand(0), m_CombineAnd(m_Instruction(FNeg0), + m_FNeg(m_Value(X))))) + return nullptr; + + // shuffle (fneg X), Mask --> fneg (shuffle X, Mask) + if (FNeg0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) { + Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask()); + return UnaryOperator::CreateFNegFMF(NewShuf, FNeg0); + } + + Instruction *FNeg1; + Value *Y; + if (!match(Shuf.getOperand(1), m_CombineAnd(m_Instruction(FNeg1), + m_FNeg(m_Value(Y))))) + return nullptr; + + // shuffle (fneg X), (fneg Y), Mask --> fneg (shuffle X, Y, Mask) + if (FNeg0->hasOneUse() || FNeg1->hasOneUse()) { + Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask()); + Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewShuf); + NewFNeg->copyIRFlags(FNeg0); + NewFNeg->andIRFlags(FNeg1); + return NewFNeg; + } + + return nullptr; +} + +/// Canonicalize casts after shuffle. +static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf, + InstCombiner::BuilderTy &Builder) { + // Do we have 2 matching cast operands? + auto *Cast0 = dyn_cast(Shuf.getOperand(0)); + auto *Cast1 = dyn_cast(Shuf.getOperand(1)); + if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() || + Cast0->getSrcTy() != Cast1->getSrcTy()) + return nullptr; + + // TODO: Allow other opcodes? That would require easing the type restrictions + // below here. + CastInst::CastOps CastOpcode = Cast0->getOpcode(); + switch (CastOpcode) { + case Instruction::FPToSI: + case Instruction::FPToUI: + case Instruction::SIToFP: + case Instruction::UIToFP: + break; + default: + return nullptr; + } + + VectorType *ShufTy = Shuf.getType(); + VectorType *ShufOpTy = cast(Shuf.getOperand(0)->getType()); + VectorType *CastSrcTy = cast(Cast0->getSrcTy()); + + // TODO: Allow length-increasing shuffles? + if (ShufTy->getElementCount().getKnownMinValue() > + ShufOpTy->getElementCount().getKnownMinValue()) + return nullptr; + + // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)? + assert(isa(CastSrcTy) && isa(ShufOpTy) && + "Expected fixed vector operands for casts and binary shuffle"); + if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits()) + return nullptr; + + // At least one of the operands must have only one use (the shuffle). + if (!Cast0->hasOneUse() && !Cast1->hasOneUse()) + return nullptr; + + // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask) + Value *X = Cast0->getOperand(0); + Value *Y = Cast1->getOperand(0); + Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask()); + return CastInst::Create(CastOpcode, NewShuf, ShufTy); +} + /// Try to fold an extract subvector operation. static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) { Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); @@ -2442,7 +2533,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI); - if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(), + if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(), SVI.getType(), ShufQuery)) return replaceInstUsesWith(SVI, V); @@ -2497,7 +2588,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (!ScaledMask.empty()) { // If the shuffled source vector simplifies, cast that value to this // shuffle's type. - if (auto *V = SimplifyShuffleVectorInst(X, UndefValue::get(XType), + if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType), ScaledMask, XType, ShufQuery)) return BitCastInst::Create(Instruction::BitCast, V, SVI.getType()); } @@ -2528,6 +2619,12 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (Instruction *I = narrowVectorSelect(SVI, Builder)) return I; + if (Instruction *I = foldFNegShuffle(SVI, Builder)) + return I; + + if (Instruction *I = foldCastShuffle(SVI, Builder)) + return I; + APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 3091905ca534..0816a4a575d9 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -42,7 +42,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -60,6 +59,7 @@ #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/BasicBlock.h" @@ -90,8 +90,6 @@ #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -140,6 +138,10 @@ static cl::opt EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true)); +static cl::opt MaxSinkNumUsers( + "instcombine-max-sink-users", cl::init(32), + cl::desc("Maximum number of undroppable users for instruction sinking")); + static cl::opt LimitMaxIterations( "instcombine-max-iterations", cl::desc("Limit the maximum number of instruction combining iterations"), @@ -424,7 +426,7 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = I.getOperand(1); // Does "B op C" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) { + if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "A op V". replaceOperand(I, 0, A); replaceOperand(I, 1, V); @@ -457,7 +459,7 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = Op1->getOperand(1); // Does "A op B" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) { + if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "V op C". replaceOperand(I, 0, V); replaceOperand(I, 1, C); @@ -485,7 +487,7 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = I.getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) { + if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "V op B". replaceOperand(I, 0, V); replaceOperand(I, 1, B); @@ -505,7 +507,7 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = Op1->getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) { + if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "B op V". replaceOperand(I, 0, B); replaceOperand(I, 1, V); @@ -652,7 +654,7 @@ Value *InstCombinerImpl::tryFactorization(BinaryOperator &I, std::swap(C, D); // Consider forming "A op' (B op D)". // If "B op D" simplifies then it can be formed with no cost. - V = SimplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I)); + V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I)); // If "B op D" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && LHS->hasOneUse() && RHS->hasOneUse()) @@ -671,7 +673,7 @@ Value *InstCombinerImpl::tryFactorization(BinaryOperator &I, std::swap(C, D); // Consider forming "(A op C) op' B". // If "A op C" simplifies then it can be formed with no cost. - V = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); + V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); // If "A op C" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. @@ -780,8 +782,8 @@ Value *InstCombinerImpl::SimplifyUsingDistributiveLaws(BinaryOperator &I) { // Disable the use of undef because it's not safe to distribute undef. auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef(); - Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive); - Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQDistributive); + Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive); + Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive); // Do "A op C" and "B op C" both simplify? if (L && R) { @@ -819,8 +821,8 @@ Value *InstCombinerImpl::SimplifyUsingDistributiveLaws(BinaryOperator &I) { // Disable the use of undef because it's not safe to distribute undef. auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef(); - Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQDistributive); - Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive); + Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive); + Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive); // Do "A op B" and "A op C" both simplify? if (L && R) { @@ -876,8 +878,8 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, if (LHSIsSelect && RHSIsSelect && A == D) { // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F) Cond = A; - True = SimplifyBinOp(Opcode, B, E, FMF, Q); - False = SimplifyBinOp(Opcode, C, F, FMF, Q); + True = simplifyBinOp(Opcode, B, E, FMF, Q); + False = simplifyBinOp(Opcode, C, F, FMF, Q); if (LHS->hasOneUse() && RHS->hasOneUse()) { if (False && !True) @@ -888,13 +890,13 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, } else if (LHSIsSelect && LHS->hasOneUse()) { // (A ? B : C) op Y -> A ? (B op Y) : (C op Y) Cond = A; - True = SimplifyBinOp(Opcode, B, RHS, FMF, Q); - False = SimplifyBinOp(Opcode, C, RHS, FMF, Q); + True = simplifyBinOp(Opcode, B, RHS, FMF, Q); + False = simplifyBinOp(Opcode, C, RHS, FMF, Q); } else if (RHSIsSelect && RHS->hasOneUse()) { // X op (D ? E : F) -> D ? (X op E) : (X op F) Cond = D; - True = SimplifyBinOp(Opcode, LHS, E, FMF, Q); - False = SimplifyBinOp(Opcode, LHS, F, FMF, Q); + True = simplifyBinOp(Opcode, LHS, E, FMF, Q); + False = simplifyBinOp(Opcode, LHS, F, FMF, Q); } if (!True || !False) @@ -986,8 +988,8 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) { // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C) Constant *Ones = ConstantInt::getAllOnesValue(BO.getType()); Constant *Zero = ConstantInt::getNullValue(BO.getType()); - Constant *TVal = ConstantExpr::get(BO.getOpcode(), Ones, C); - Constant *FVal = ConstantExpr::get(BO.getOpcode(), Zero, C); + Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C); + Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C); return SelectInst::Create(X, TVal, FVal); } @@ -1018,12 +1020,6 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, bool ConstIsRHS = isa(I.getOperand(1)); Constant *ConstOperand = cast(I.getOperand(ConstIsRHS)); - if (auto *SOC = dyn_cast(SO)) { - if (ConstIsRHS) - return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); - return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); - } - Value *Op0 = SO, *Op1 = ConstOperand; if (!ConstIsRHS) std::swap(Op0, Op1); @@ -1035,10 +1031,10 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, return NewBO; } -Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, - SelectInst *SI) { - // Don't modify shared select instructions. - if (!SI->hasOneUse()) +Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, + bool FoldWithMultiUse) { + // Don't modify shared select instructions unless set FoldWithMultiUse + if (!SI->hasOneUse() && !FoldWithMultiUse) return nullptr; Value *TV = SI->getTrueValue(); @@ -1114,12 +1110,6 @@ static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV, bool ConstIsRHS = isa(I->getOperand(1)); Constant *C = cast(I->getOperand(ConstIsRHS)); - if (auto *InC = dyn_cast(InV)) { - if (ConstIsRHS) - return ConstantExpr::get(I->getOpcode(), InC, C); - return ConstantExpr::get(I->getOpcode(), C, InC); - } - Value *Op0 = InV, *Op1 = C; if (!ConstIsRHS) std::swap(Op0, Op1); @@ -1175,10 +1165,11 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { if (cast(InVal)->getParent() == NonConstBB) return nullptr; - // If the incoming non-constant value is in I's block, we will remove one - // instruction, but insert another equivalent one, leading to infinite - // instcombine. - if (isPotentiallyReachable(I.getParent(), NonConstBB, nullptr, &DT, LI)) + // If the incoming non-constant value is reachable from the phis block, + // we'll push the operation across a loop backedge. This could result in + // an infinite combine loop, and is generally non-profitable (especially + // if the operation was originally outside the loop). + if (isPotentiallyReachable(PN->getParent(), NonConstBB, nullptr, &DT, LI)) return nullptr; } @@ -1941,10 +1932,8 @@ static Instruction *foldSelectGEP(GetElementPtrInst &GEP, SmallVector IndexC(GEP.indices()); bool IsInBounds = GEP.isInBounds(); Type *Ty = GEP.getSourceElementType(); - Value *NewTrueC = IsInBounds ? Builder.CreateInBoundsGEP(Ty, TrueC, IndexC) - : Builder.CreateGEP(Ty, TrueC, IndexC); - Value *NewFalseC = IsInBounds ? Builder.CreateInBoundsGEP(Ty, FalseC, IndexC) - : Builder.CreateGEP(Ty, FalseC, IndexC); + Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds); + Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds); return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel); } @@ -1953,13 +1942,11 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction with matching element type, combine the // indices of the two getelementptr instructions into a single instruction. - if (Src->getResultElementType() != GEP.getSourceElementType()) - return nullptr; - if (!shouldMergeGEPs(*cast(&GEP), *Src)) return nullptr; - if (Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 && + if (Src->getResultElementType() == GEP.getSourceElementType() && + Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 && Src->hasOneUse()) { Value *GO1 = GEP.getOperand(1); Value *SO1 = Src->getOperand(1); @@ -1971,45 +1958,21 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, // invariant: this breaks the dependence between GEPs and allows LICM // to hoist the invariant part out of the loop. if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) { - // We have to be careful here. - // We have something like: - // %src = getelementptr , * %base, %idx - // %gep = getelementptr , * %src, %idx2 - // If we just swap idx & idx2 then we could inadvertantly - // change %src from a vector to a scalar, or vice versa. - // Cases: - // 1) %base a scalar & idx a scalar & idx2 a vector - // => Swapping idx & idx2 turns %src into a vector type. - // 2) %base a scalar & idx a vector & idx2 a scalar - // => Swapping idx & idx2 turns %src in a scalar type - // 3) %base, %idx, and %idx2 are scalars - // => %src & %gep are scalars - // => swapping idx & idx2 is safe - // 4) %base a vector - // => %src is a vector - // => swapping idx & idx2 is safe. - auto *SO0 = Src->getOperand(0); - auto *SO0Ty = SO0->getType(); - if (!isa(GEP.getType()) || // case 3 - isa(SO0Ty)) { // case 4 - Src->setOperand(1, GO1); - GEP.setOperand(1, SO1); - return &GEP; - } else { - // Case 1 or 2 - // -- have to recreate %src & %gep - // put NewSrc at same location as %src - Builder.SetInsertPoint(cast(Src)); - Value *NewSrc = Builder.CreateGEP( - GEP.getSourceElementType(), SO0, GO1, Src->getName()); - // Propagate 'inbounds' if the new source was not constant-folded. - if (auto *NewSrcGEPI = dyn_cast(NewSrc)) - NewSrcGEPI->setIsInBounds(Src->isInBounds()); - GetElementPtrInst *NewGEP = GetElementPtrInst::Create( - GEP.getSourceElementType(), NewSrc, {SO1}); - NewGEP->setIsInBounds(GEP.isInBounds()); - return NewGEP; - } + // The swapped GEPs are inbounds if both original GEPs are inbounds + // and the sign of the offsets is the same. For simplicity, only + // handle both offsets being non-negative. + bool IsInBounds = Src->isInBounds() && GEP.isInBounds() && + isKnownNonNegative(SO1, DL, 0, &AC, &GEP, &DT) && + isKnownNonNegative(GO1, DL, 0, &AC, &GEP, &DT); + // Put NewSrc at same location as %src. + Builder.SetInsertPoint(cast(Src)); + Value *NewSrc = Builder.CreateGEP(GEP.getSourceElementType(), + Src->getPointerOperand(), GO1, + Src->getName(), IsInBounds); + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( + GEP.getSourceElementType(), NewSrc, {SO1}); + NewGEP->setIsInBounds(IsInBounds); + return NewGEP; } } } @@ -2022,6 +1985,87 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) return nullptr; // Wait until our source is folded to completion. + // For constant GEPs, use a more general offset-based folding approach. + // Only do this for opaque pointers, as the result element type may change. + Type *PtrTy = Src->getType()->getScalarType(); + if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices() && + (Src->hasOneUse() || Src->hasAllConstantIndices())) { + // Split Src into a variable part and a constant suffix. + gep_type_iterator GTI = gep_type_begin(*Src); + Type *BaseType = GTI.getIndexedType(); + bool IsFirstType = true; + unsigned NumVarIndices = 0; + for (auto Pair : enumerate(Src->indices())) { + if (!isa(Pair.value())) { + BaseType = GTI.getIndexedType(); + IsFirstType = false; + NumVarIndices = Pair.index() + 1; + } + ++GTI; + } + + // Determine the offset for the constant suffix of Src. + APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0); + if (NumVarIndices != Src->getNumIndices()) { + // FIXME: getIndexedOffsetInType() does not handled scalable vectors. + if (isa(BaseType)) + return nullptr; + + SmallVector ConstantIndices; + if (!IsFirstType) + ConstantIndices.push_back( + Constant::getNullValue(Type::getInt32Ty(GEP.getContext()))); + append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices)); + Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices); + } + + // Add the offset for GEP (which is fully constant). + if (!GEP.accumulateConstantOffset(DL, Offset)) + return nullptr; + + APInt OffsetOld = Offset; + // Convert the total offset back into indices. + SmallVector ConstIndices = + DL.getGEPIndicesForOffset(BaseType, Offset); + if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) { + // If both GEP are constant-indexed, and cannot be merged in either way, + // convert them to a GEP of i8. + if (Src->hasAllConstantIndices()) + return isMergedGEPInBounds(*Src, *cast(&GEP)) + ? GetElementPtrInst::CreateInBounds( + Builder.getInt8Ty(), Src->getOperand(0), + Builder.getInt(OffsetOld), GEP.getName()) + : GetElementPtrInst::Create( + Builder.getInt8Ty(), Src->getOperand(0), + Builder.getInt(OffsetOld), GEP.getName()); + return nullptr; + } + + bool IsInBounds = isMergedGEPInBounds(*Src, *cast(&GEP)); + SmallVector Indices; + append_range(Indices, drop_end(Src->indices(), + Src->getNumIndices() - NumVarIndices)); + for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) { + Indices.push_back(ConstantInt::get(GEP.getContext(), Idx)); + // Even if the total offset is inbounds, we may end up representing it + // by first performing a larger negative offset, and then a smaller + // positive one. The large negative offset might go out of bounds. Only + // preserve inbounds if all signs are the same. + IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative(); + } + + return IsInBounds + ? GetElementPtrInst::CreateInBounds(Src->getSourceElementType(), + Src->getOperand(0), Indices, + GEP.getName()) + : GetElementPtrInst::Create(Src->getSourceElementType(), + Src->getOperand(0), Indices, + GEP.getName()); + } + + if (Src->getResultElementType() != GEP.getSourceElementType()) + return nullptr; + SmallVector Indices; // Find out whether the last index in the source GEP is a sequential idx. @@ -2045,7 +2089,7 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, return nullptr; Value *Sum = - SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP)); + simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP)); // Only do the combine when we are sure the cost after the // merge is never more than that before the merge. if (Sum == nullptr) @@ -2116,9 +2160,8 @@ Instruction *InstCombinerImpl::visitGEPOfBitcast(BitCastInst *BCI, // existing GEP Value. Causing issues if this Value is accessed when // constructing an AddrSpaceCastInst SmallVector Indices(GEP.indices()); - Value *NGEP = GEP.isInBounds() - ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, Indices) - : Builder.CreateGEP(SrcEltType, SrcOp, Indices); + Value *NGEP = + Builder.CreateGEP(SrcEltType, SrcOp, Indices, "", GEP.isInBounds()); NGEP->takeName(&GEP); // Preserve GEP address space to satisfy users @@ -2169,12 +2212,10 @@ Instruction *InstCombinerImpl::visitGEPOfBitcast(BitCastInst *BCI, // Otherwise, if the offset is non-zero, we need to find out if there is a // field at Offset in 'A's type. If so, we can pull the cast through the // GEP. - SmallVector NewIndices; + SmallVector NewIndices; if (findElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices, DL)) { - Value *NGEP = - GEP.isInBounds() - ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices) - : Builder.CreateGEP(SrcEltType, SrcOp, NewIndices); + Value *NGEP = Builder.CreateGEP(SrcEltType, SrcOp, NewIndices, "", + GEP.isInBounds()); if (NGEP->getType() == GEP.getType()) return replaceInstUsesWith(GEP, NGEP); @@ -2195,7 +2236,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { Type *GEPType = GEP.getType(); Type *GEPEltType = GEP.getSourceElementType(); bool IsGEPSrcEleScalable = isa(GEPEltType); - if (Value *V = SimplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(), + if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(), SQ.getWithInstruction(&GEP))) return replaceInstUsesWith(GEP, V); @@ -2280,7 +2321,8 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { auto *Op2 = dyn_cast(*I); - if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands()) + if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() || + Op1->getSourceElementType() != Op2->getSourceElementType()) return nullptr; // As for Op1 above, don't try to fold a GEP into itself. @@ -2476,11 +2518,8 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { // addrspacecast i8 addrspace(1)* %0 to i8* SmallVector Idx(GEP.indices()); Value *NewGEP = - GEP.isInBounds() - ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, - Idx, GEP.getName()) - : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx, - GEP.getName()); + Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx, + GEP.getName(), GEP.isInBounds()); return new AddrSpaceCastInst(NewGEP, GEPType); } } @@ -2495,13 +2534,9 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) == DL.getTypeAllocSize(GEPEltType)) { Type *IdxType = DL.getIndexType(GEPType); - Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; - Value *NewGEP = - GEP.isInBounds() - ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, Idx, - GEP.getName()) - : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx, - GEP.getName()); + Value *Idx[2] = {Constant::getNullValue(IdxType), GEP.getOperand(1)}; + Value *NewGEP = Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx, + GEP.getName(), GEP.isInBounds()); // V and GEP are both pointer types --> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType); @@ -2533,11 +2568,8 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If the multiplication NewIdx * Scale may overflow then the new // GEP may not be "inbounds". Value *NewGEP = - GEP.isInBounds() && NSW - ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, - NewIdx, GEP.getName()) - : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx, - GEP.getName()); + Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx, + GEP.getName(), GEP.isInBounds() && NSW); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -2578,11 +2610,8 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx}; Value *NewGEP = - GEP.isInBounds() && NSW - ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, - Off, GEP.getName()) - : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off, - GEP.getName()); + Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off, + GEP.getName(), GEP.isInBounds() && NSW); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType); @@ -2672,6 +2701,7 @@ static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl &Users, const TargetLibraryInfo &TLI) { SmallVector Worklist; + const Optional Family = getAllocationFamily(AI, &TLI); Worklist.push_back(AI); do { @@ -2740,12 +2770,15 @@ static bool isAllocSiteRemovable(Instruction *AI, continue; } - if (isFreeCall(I, &TLI)) { + if (isFreeCall(I, &TLI) && getAllocationFamily(I, &TLI) == Family) { + assert(Family); Users.emplace_back(I); continue; } - if (isReallocLikeFn(I, &TLI)) { + if (isReallocLikeFn(I, &TLI) && + getAllocationFamily(I, &TLI) == Family) { + assert(Family); Users.emplace_back(I); Worklist.push_back(I); continue; @@ -2803,7 +2836,7 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { if (IntrinsicInst *II = dyn_cast(I)) { if (II->getIntrinsicID() == Intrinsic::objectsize) { Value *Result = - lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/true); + lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/true); replaceInstUsesWith(*I, Result); eraseInstFromFunction(*I); Users[i] = nullptr; // Skip examining in the next loop. @@ -3192,7 +3225,7 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) { if (!EV.hasIndices()) return replaceInstUsesWith(EV, Agg); - if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(), + if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(), SQ.getWithInstruction(&EV))) return replaceInstUsesWith(EV, V); @@ -3248,6 +3281,15 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) { makeArrayRef(exti, exte)); } if (WithOverflowInst *WO = dyn_cast(Agg)) { + // extractvalue (any_mul_with_overflow X, -1), 0 --> -X + Intrinsic::ID OvID = WO->getIntrinsicID(); + if (*EV.idx_begin() == 0 && + (OvID == Intrinsic::smul_with_overflow || + OvID == Intrinsic::umul_with_overflow) && + match(WO->getArgOperand(1), m_AllOnes())) { + return BinaryOperator::CreateNeg(WO->getArgOperand(0)); + } + // We're extracting from an overflow intrinsic, see if we're the only user, // which allows us to simplify multiple result intrinsics to simpler // things that just get one value. @@ -3723,21 +3765,116 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) { if (!MaybePoisonOperand) return OrigOp; - auto *FrozenMaybePoisonOperand = new FreezeInst( + Builder.SetInsertPoint(OrigOpInst); + auto *FrozenMaybePoisonOperand = Builder.CreateFreeze( MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr"); replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand); - FrozenMaybePoisonOperand->insertBefore(OrigOpInst); return OrigOp; } -bool InstCombinerImpl::freezeDominatedUses(FreezeInst &FI) { +Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI, + PHINode *PN) { + // Detect whether this is a recurrence with a start value and some number of + // backedge values. We'll check whether we can push the freeze through the + // backedge values (possibly dropping poison flags along the way) until we + // reach the phi again. In that case, we can move the freeze to the start + // value. + Use *StartU = nullptr; + SmallVector Worklist; + for (Use &U : PN->incoming_values()) { + if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) { + // Add backedge value to worklist. + Worklist.push_back(U.get()); + continue; + } + + // Don't bother handling multiple start values. + if (StartU) + return nullptr; + StartU = &U; + } + + if (!StartU || Worklist.empty()) + return nullptr; // Not a recurrence. + + Value *StartV = StartU->get(); + BasicBlock *StartBB = PN->getIncomingBlock(*StartU); + bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV); + // We can't insert freeze if the the start value is the result of the + // terminator (e.g. an invoke). + if (StartNeedsFreeze && StartBB->getTerminator() == StartV) + return nullptr; + + SmallPtrSet Visited; + SmallVector DropFlags; + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + if (!Visited.insert(V).second) + continue; + + if (Visited.size() > 32) + return nullptr; // Limit the total number of values we inspect. + + // Assume that PN is non-poison, because it will be after the transform. + if (V == PN || isGuaranteedNotToBeUndefOrPoison(V)) + continue; + + Instruction *I = dyn_cast(V); + if (!I || canCreateUndefOrPoison(cast(I), + /*ConsiderFlags*/ false)) + return nullptr; + + DropFlags.push_back(I); + append_range(Worklist, I->operands()); + } + + for (Instruction *I : DropFlags) + I->dropPoisonGeneratingFlags(); + + if (StartNeedsFreeze) { + Builder.SetInsertPoint(StartBB->getTerminator()); + Value *FrozenStartV = Builder.CreateFreeze(StartV, + StartV->getName() + ".fr"); + replaceUse(*StartU, FrozenStartV); + } + return replaceInstUsesWith(FI, PN); +} + +bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) { Value *Op = FI.getOperand(0); - if (isa(Op)) + if (isa(Op) || Op->hasOneUse()) return false; + // Move the freeze directly after the definition of its operand, so that + // it dominates the maximum number of uses. Note that it may not dominate + // *all* uses if the operand is an invoke/callbr and the use is in a phi on + // the normal/default destination. This is why the domination check in the + // replacement below is still necessary. + Instruction *MoveBefore = nullptr; + if (isa(Op)) { + MoveBefore = &FI.getFunction()->getEntryBlock().front(); + while (isa(MoveBefore)) + MoveBefore = MoveBefore->getNextNode(); + } else if (auto *PN = dyn_cast(Op)) { + MoveBefore = PN->getParent()->getFirstNonPHI(); + } else if (auto *II = dyn_cast(Op)) { + MoveBefore = II->getNormalDest()->getFirstNonPHI(); + } else if (auto *CB = dyn_cast(Op)) { + MoveBefore = CB->getDefaultDest()->getFirstNonPHI(); + } else { + auto *I = cast(Op); + assert(!I->isTerminator() && "Cannot be a terminator"); + MoveBefore = I->getNextNode(); + } + bool Changed = false; + if (&FI != MoveBefore) { + FI.moveBefore(MoveBefore); + Changed = true; + } + Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool { bool Dominates = DT.dominates(&FI, U); Changed |= Dominates; @@ -3750,48 +3887,63 @@ bool InstCombinerImpl::freezeDominatedUses(FreezeInst &FI) { Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { Value *Op0 = I.getOperand(0); - if (Value *V = SimplifyFreezeInst(Op0, SQ.getWithInstruction(&I))) + if (Value *V = simplifyFreezeInst(Op0, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // freeze (phi const, x) --> phi const, (freeze x) if (auto *PN = dyn_cast(Op0)) { if (Instruction *NV = foldOpIntoPhi(I, PN)) return NV; + if (Instruction *NV = foldFreezeIntoRecurrence(I, PN)) + return NV; } if (Value *NI = pushFreezeToPreventPoisonFromPropagating(I)) return replaceInstUsesWith(I, NI); - if (match(Op0, m_Undef())) { - // If I is freeze(undef), see its uses and fold it to the best constant. - // - or: pick -1 - // - select's condition: pick the value that leads to choosing a constant - // - other ops: pick 0 + // If I is freeze(undef), check its uses and fold it to a fixed constant. + // - or: pick -1 + // - select's condition: if the true value is constant, choose it by making + // the condition true. + // - default: pick 0 + // + // Note that this transform is intentionally done here rather than + // via an analysis in InstSimplify or at individual user sites. That is + // because we must produce the same value for all uses of the freeze - + // it's the reason "freeze" exists! + // + // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid + // duplicating logic for binops at least. + auto getUndefReplacement = [&I](Type *Ty) { Constant *BestValue = nullptr; - Constant *NullValue = Constant::getNullValue(I.getType()); + Constant *NullValue = Constant::getNullValue(Ty); for (const auto *U : I.users()) { Constant *C = NullValue; - if (match(U, m_Or(m_Value(), m_Value()))) - C = Constant::getAllOnesValue(I.getType()); - else if (const auto *SI = dyn_cast(U)) { - if (SI->getCondition() == &I) { - APInt CondVal(1, isa(SI->getFalseValue()) ? 0 : 1); - C = Constant::getIntegerValue(I.getType(), CondVal); - } - } + C = ConstantInt::getAllOnesValue(Ty); + else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value()))) + C = ConstantInt::getTrue(Ty); if (!BestValue) BestValue = C; else if (BestValue != C) BestValue = NullValue; } + assert(BestValue && "Must have at least one use"); + return BestValue; + }; - return replaceInstUsesWith(I, BestValue); + if (match(Op0, m_Undef())) + return replaceInstUsesWith(I, getUndefReplacement(I.getType())); + + Constant *C; + if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) { + Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType()); + return replaceInstUsesWith(I, Constant::replaceUndefsWith(C, ReplaceC)); } - // Replace all dominated uses of Op to freeze(Op). - if (freezeDominatedUses(I)) + // Replace uses of Op with freeze(Op). + if (freezeOtherUses(I)) return &I; return nullptr; @@ -3847,7 +3999,6 @@ static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) { /// block. static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock, TargetLibraryInfo &TLI) { - assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!"); BasicBlock *SrcBlock = I->getParent(); // Cannot move control-flow-involving, volatile loads, vaarg, etc. @@ -4014,48 +4165,68 @@ bool InstCombinerImpl::run() { [this](Instruction *I) -> Optional { if (!EnableCodeSinking) return None; - auto *UserInst = cast_or_null(I->getUniqueUndroppableUser()); - if (!UserInst) - return None; BasicBlock *BB = I->getParent(); BasicBlock *UserParent = nullptr; + unsigned NumUsers = 0; - // Special handling for Phi nodes - get the block the use occurs in. - if (PHINode *PN = dyn_cast(UserInst)) { - for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { - if (PN->getIncomingValue(i) == I) { - // Bail out if we have uses in different blocks. We don't do any - // sophisticated analysis (i.e finding NearestCommonDominator of these - // use blocks). - if (UserParent && UserParent != PN->getIncomingBlock(i)) - return None; - UserParent = PN->getIncomingBlock(i); + for (auto *U : I->users()) { + if (U->isDroppable()) + continue; + if (NumUsers > MaxSinkNumUsers) + return None; + + Instruction *UserInst = cast(U); + // Special handling for Phi nodes - get the block the use occurs in. + if (PHINode *PN = dyn_cast(UserInst)) { + for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { + if (PN->getIncomingValue(i) == I) { + // Bail out if we have uses in different blocks. We don't do any + // sophisticated analysis (i.e finding NearestCommonDominator of + // these use blocks). + if (UserParent && UserParent != PN->getIncomingBlock(i)) + return None; + UserParent = PN->getIncomingBlock(i); + } } + assert(UserParent && "expected to find user block!"); + } else { + if (UserParent && UserParent != UserInst->getParent()) + return None; + UserParent = UserInst->getParent(); } - assert(UserParent && "expected to find user block!"); - } else - UserParent = UserInst->getParent(); - // Try sinking to another block. If that block is unreachable, then do - // not bother. SimplifyCFG should handle it. - if (UserParent == BB || !DT.isReachableFromEntry(UserParent)) - return None; + // Make sure these checks are done only once, naturally we do the checks + // the first time we get the userparent, this will save compile time. + if (NumUsers == 0) { + // Try sinking to another block. If that block is unreachable, then do + // not bother. SimplifyCFG should handle it. + if (UserParent == BB || !DT.isReachableFromEntry(UserParent)) + return None; + + auto *Term = UserParent->getTerminator(); + // See if the user is one of our successors that has only one + // predecessor, so that we don't have to split the critical edge. + // Another option where we can sink is a block that ends with a + // terminator that does not pass control to other block (such as + // return or unreachable or resume). In this case: + // - I dominates the User (by SSA form); + // - the User will be executed at most once. + // So sinking I down to User is always profitable or neutral. + if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term)) + return None; + + assert(DT.dominates(BB, UserParent) && "Dominance relation broken?"); + } - auto *Term = UserParent->getTerminator(); - // See if the user is one of our successors that has only one - // predecessor, so that we don't have to split the critical edge. - // Another option where we can sink is a block that ends with a - // terminator that does not pass control to other block (such as - // return or unreachable or resume). In this case: - // - I dominates the User (by SSA form); - // - the User will be executed at most once. - // So sinking I down to User is always profitable or neutral. - if (UserParent->getUniquePredecessor() == BB || succ_empty(Term)) { - assert(DT.dominates(BB, UserParent) && "Dominance relation broken?"); - return UserParent; + NumUsers++; } - return None; + + // No user or only has droppable users. + if (!UserParent) + return None; + + return UserParent; }; auto OptBB = getOptionalSinkBlockForInst(I); diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8f94172a6402..7a5a74aa4fff 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/Demangle/Demangle.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -42,14 +43,12 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" @@ -63,15 +62,12 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" -#include "llvm/InitializePasses.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" @@ -87,7 +83,6 @@ #include #include #include -#include #include #include #include @@ -116,7 +111,7 @@ static const uint64_t kFreeBSDKasan_ShadowOffset64 = 0xdffff7c000000000; static const uint64_t kNetBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46; static const uint64_t kNetBSDKasan_ShadowOffset64 = 0xdfff900000000000; -static const uint64_t kPS4CPU_ShadowOffset64 = 1ULL << 40; +static const uint64_t kPS_ShadowOffset64 = 1ULL << 40; static const uint64_t kWindowsShadowOffset32 = 3ULL << 28; static const uint64_t kEmscriptenShadowOffset = 0; @@ -335,6 +330,11 @@ static cl::opt ClMemoryAccessCallbackPrefix( cl::desc("Prefix for memory access callbacks"), cl::Hidden, cl::init("__asan_")); +static cl::opt ClKasanMemIntrinCallbackPrefix( + "asan-kernel-mem-intrinsic-prefix", + cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden, + cl::init(false)); + static cl::opt ClInstrumentDynamicAllocas("asan-instrument-dynamic-allocas", cl::desc("instrument dynamic allocas"), @@ -465,11 +465,12 @@ struct ShadowMapping { static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, bool IsKasan) { bool IsAndroid = TargetTriple.isAndroid(); - bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS(); + bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS() || + TargetTriple.isDriverKit(); bool IsMacOS = TargetTriple.isMacOSX(); bool IsFreeBSD = TargetTriple.isOSFreeBSD(); bool IsNetBSD = TargetTriple.isOSNetBSD(); - bool IsPS4CPU = TargetTriple.isPS4CPU(); + bool IsPS = TargetTriple.isPS(); bool IsLinux = TargetTriple.isOSLinux(); bool IsPPC64 = TargetTriple.getArch() == Triple::ppc64 || TargetTriple.getArch() == Triple::ppc64le; @@ -528,8 +529,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, Mapping.Offset = kNetBSDKasan_ShadowOffset64; else Mapping.Offset = kNetBSD_ShadowOffset64; - } else if (IsPS4CPU) - Mapping.Offset = kPS4CPU_ShadowOffset64; + } else if (IsPS) + Mapping.Offset = kPS_ShadowOffset64; else if (IsLinux && IsX86_64) { if (IsKasan) Mapping.Offset = kLinuxKasan_ShadowOffset64; @@ -568,7 +569,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, // offset is not necessary 1/8-th of the address space. On SystemZ, // we could OR the constant in a single instruction, but it's more // efficient to load it once and use indexed addressing. - Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS4CPU && + Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS && !IsRISCV64 && !(Mapping.Offset & (Mapping.Offset - 1)) && Mapping.Offset != kDynamicShadowSentinel; @@ -621,41 +622,9 @@ static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) { namespace { -/// Module analysis for getting various metadata about the module. -class ASanGlobalsMetadataWrapperPass : public ModulePass { -public: - static char ID; - - ASanGlobalsMetadataWrapperPass() : ModulePass(ID) { - initializeASanGlobalsMetadataWrapperPassPass( - *PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - GlobalsMD = GlobalsMetadata(M); - return false; - } - - StringRef getPassName() const override { - return "ASanGlobalsMetadataWrapperPass"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } - - GlobalsMetadata &getGlobalsMD() { return GlobalsMD; } - -private: - GlobalsMetadata GlobalsMD; -}; - -char ASanGlobalsMetadataWrapperPass::ID = 0; - /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer { - AddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD, - const StackSafetyGlobalInfo *SSGI, + AddressSanitizer(Module &M, const StackSafetyGlobalInfo *SSGI, bool CompileKernel = false, bool Recover = false, bool UseAfterScope = false, AsanDetectStackUseAfterReturnMode UseAfterReturn = @@ -666,7 +635,7 @@ struct AddressSanitizer { UseAfterScope(UseAfterScope || ClUseAfterScope), UseAfterReturn(ClUseAfterReturn.getNumOccurrences() ? ClUseAfterReturn : UseAfterReturn), - GlobalsMD(*GlobalsMD), SSGI(SSGI) { + SSGI(SSGI) { C = &(M.getContext()); LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); @@ -779,7 +748,6 @@ private: FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset; Value *LocalDynamicShadow = nullptr; - const GlobalsMetadata &GlobalsMD; const StackSafetyGlobalInfo *SSGI; DenseMap ProcessedAllocas; @@ -787,60 +755,13 @@ private: FunctionCallee AMDGPUAddressPrivate; }; -class AddressSanitizerLegacyPass : public FunctionPass { -public: - static char ID; - - explicit AddressSanitizerLegacyPass( - bool CompileKernel = false, bool Recover = false, - bool UseAfterScope = false, - AsanDetectStackUseAfterReturnMode UseAfterReturn = - AsanDetectStackUseAfterReturnMode::Runtime) - : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover), - UseAfterScope(UseAfterScope), UseAfterReturn(UseAfterReturn) { - initializeAddressSanitizerLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { - return "AddressSanitizerFunctionPass"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - if (ClUseStackSafety) - AU.addRequired(); - AU.addRequired(); - } - - bool runOnFunction(Function &F) override { - GlobalsMetadata &GlobalsMD = - getAnalysis().getGlobalsMD(); - const StackSafetyGlobalInfo *const SSGI = - ClUseStackSafety - ? &getAnalysis().getResult() - : nullptr; - const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(F); - AddressSanitizer ASan(*F.getParent(), &GlobalsMD, SSGI, CompileKernel, - Recover, UseAfterScope, UseAfterReturn); - return ASan.instrumentFunction(F, TLI); - } - -private: - bool CompileKernel; - bool Recover; - bool UseAfterScope; - AsanDetectStackUseAfterReturnMode UseAfterReturn; -}; - class ModuleAddressSanitizer { public: - ModuleAddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD, - bool CompileKernel = false, bool Recover = false, - bool UseGlobalsGC = true, bool UseOdrIndicator = false, + ModuleAddressSanitizer(Module &M, bool CompileKernel = false, + bool Recover = false, bool UseGlobalsGC = true, + bool UseOdrIndicator = false, AsanDtorKind DestructorKind = AsanDtorKind::Global) - : GlobalsMD(*GlobalsMD), - CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan + : CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel), Recover(ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC && !this->CompileKernel), @@ -906,7 +827,6 @@ private: uint64_t getRedzoneSizeForGlobal(uint64_t SizeInBytes) const; int GetAsanVersion(const Module &M) const; - const GlobalsMetadata &GlobalsMD; bool CompileKernel; bool Recover; bool UseGlobalsGC; @@ -931,44 +851,6 @@ private: Function *AsanDtorFunction = nullptr; }; -class ModuleAddressSanitizerLegacyPass : public ModulePass { -public: - static char ID; - - explicit ModuleAddressSanitizerLegacyPass( - bool CompileKernel = false, bool Recover = false, bool UseGlobalGC = true, - bool UseOdrIndicator = false, - AsanDtorKind DestructorKind = AsanDtorKind::Global) - : ModulePass(ID), CompileKernel(CompileKernel), Recover(Recover), - UseGlobalGC(UseGlobalGC), UseOdrIndicator(UseOdrIndicator), - DestructorKind(DestructorKind) { - initializeModuleAddressSanitizerLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { return "ModuleAddressSanitizer"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - } - - bool runOnModule(Module &M) override { - GlobalsMetadata &GlobalsMD = - getAnalysis().getGlobalsMD(); - ModuleAddressSanitizer ASanModule(M, &GlobalsMD, CompileKernel, Recover, - UseGlobalGC, UseOdrIndicator, - DestructorKind); - return ASanModule.instrumentModule(M); - } - -private: - bool CompileKernel; - bool Recover; - bool UseGlobalGC; - bool UseOdrIndicator; - AsanDtorKind DestructorKind; -}; - // Stack poisoning does not play well with exception handling. // When an exception is thrown, we essentially bypass the code // that unpoisones the stack. This is why the run-time library has @@ -1221,85 +1103,6 @@ struct FunctionStackPoisoner : public InstVisitor { } // end anonymous namespace -void LocationMetadata::parse(MDNode *MDN) { - assert(MDN->getNumOperands() == 3); - MDString *DIFilename = cast(MDN->getOperand(0)); - Filename = DIFilename->getString(); - LineNo = mdconst::extract(MDN->getOperand(1))->getLimitedValue(); - ColumnNo = - mdconst::extract(MDN->getOperand(2))->getLimitedValue(); -} - -// FIXME: It would be cleaner to instead attach relevant metadata to the globals -// we want to sanitize instead and reading this metadata on each pass over a -// function instead of reading module level metadata at first. -GlobalsMetadata::GlobalsMetadata(Module &M) { - NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals"); - if (!Globals) - return; - for (auto MDN : Globals->operands()) { - // Metadata node contains the global and the fields of "Entry". - assert(MDN->getNumOperands() == 5); - auto *V = mdconst::extract_or_null(MDN->getOperand(0)); - // The optimizer may optimize away a global entirely. - if (!V) - continue; - auto *StrippedV = V->stripPointerCasts(); - auto *GV = dyn_cast(StrippedV); - if (!GV) - continue; - // We can already have an entry for GV if it was merged with another - // global. - Entry &E = Entries[GV]; - if (auto *Loc = cast_or_null(MDN->getOperand(1))) - E.SourceLoc.parse(Loc); - if (auto *Name = cast_or_null(MDN->getOperand(2))) - E.Name = Name->getString(); - ConstantInt *IsDynInit = mdconst::extract(MDN->getOperand(3)); - E.IsDynInit |= IsDynInit->isOne(); - ConstantInt *IsExcluded = - mdconst::extract(MDN->getOperand(4)); - E.IsExcluded |= IsExcluded->isOne(); - } -} - -AnalysisKey ASanGlobalsMetadataAnalysis::Key; - -GlobalsMetadata ASanGlobalsMetadataAnalysis::run(Module &M, - ModuleAnalysisManager &AM) { - return GlobalsMetadata(M); -} - -PreservedAnalyses AddressSanitizerPass::run(Function &F, - AnalysisManager &AM) { - auto &MAMProxy = AM.getResult(F); - Module &M = *F.getParent(); - if (auto *R = MAMProxy.getCachedResult(M)) { - const TargetLibraryInfo *TLI = &AM.getResult(F); - AddressSanitizer Sanitizer(M, R, nullptr, Options.CompileKernel, - Options.Recover, Options.UseAfterScope, - Options.UseAfterReturn); - if (Sanitizer.instrumentFunction(F, TLI)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); - } - - report_fatal_error( - "The ASanGlobalsMetadataAnalysis is required to run before " - "AddressSanitizer can run"); - return PreservedAnalyses::all(); -} - -void AddressSanitizerPass::printPipeline( - raw_ostream &OS, function_ref MapClassName2PassName) { - static_cast *>(this)->printPipeline( - OS, MapClassName2PassName); - OS << "<"; - if (Options.CompileKernel) - OS << "kernel"; - OS << ">"; -} - void ModuleAddressSanitizerPass::printPipeline( raw_ostream &OS, function_ref MapClassName2PassName) { static_cast *>(this)->printPipeline( @@ -1318,8 +1121,7 @@ ModuleAddressSanitizerPass::ModuleAddressSanitizerPass( PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M, ModuleAnalysisManager &MAM) { - GlobalsMetadata &GlobalsMD = MAM.getResult(M); - ModuleAddressSanitizer ModuleSanitizer(M, &GlobalsMD, Options.CompileKernel, + ModuleAddressSanitizer ModuleSanitizer(M, Options.CompileKernel, Options.Recover, UseGlobalGC, UseOdrIndicator, DestructorKind); bool Modified = false; @@ -1327,9 +1129,9 @@ PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M, const StackSafetyGlobalInfo *const SSGI = ClUseStackSafety ? &MAM.getResult(M) : nullptr; for (Function &F : M) { - AddressSanitizer FunctionSanitizer( - M, &GlobalsMD, SSGI, Options.CompileKernel, Options.Recover, - Options.UseAfterScope, Options.UseAfterReturn); + AddressSanitizer FunctionSanitizer(M, SSGI, Options.CompileKernel, + Options.Recover, Options.UseAfterScope, + Options.UseAfterReturn); const TargetLibraryInfo &TLI = FAM.getResult(F); Modified |= FunctionSanitizer.instrumentFunction(F, &TLI); } @@ -1337,75 +1139,20 @@ PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M, return Modified ? PreservedAnalyses::none() : PreservedAnalyses::all(); } -INITIALIZE_PASS(ASanGlobalsMetadataWrapperPass, "asan-globals-md", - "Read metadata to mark which globals should be instrumented " - "when running ASan.", - false, true) - -char AddressSanitizerLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN( - AddressSanitizerLegacyPass, "asan", - "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, - false) -INITIALIZE_PASS_DEPENDENCY(ASanGlobalsMetadataWrapperPass) -INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END( - AddressSanitizerLegacyPass, "asan", - "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, - false) - -FunctionPass *llvm::createAddressSanitizerFunctionPass( - bool CompileKernel, bool Recover, bool UseAfterScope, - AsanDetectStackUseAfterReturnMode UseAfterReturn) { - assert(!CompileKernel || Recover); - return new AddressSanitizerLegacyPass(CompileKernel, Recover, UseAfterScope, - UseAfterReturn); -} - -char ModuleAddressSanitizerLegacyPass::ID = 0; - -INITIALIZE_PASS( - ModuleAddressSanitizerLegacyPass, "asan-module", - "AddressSanitizer: detects use-after-free and out-of-bounds bugs." - "ModulePass", - false, false) - -ModulePass *llvm::createModuleAddressSanitizerLegacyPassPass( - bool CompileKernel, bool Recover, bool UseGlobalsGC, bool UseOdrIndicator, - AsanDtorKind Destructor) { - assert(!CompileKernel || Recover); - return new ModuleAddressSanitizerLegacyPass( - CompileKernel, Recover, UseGlobalsGC, UseOdrIndicator, Destructor); -} - static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { size_t Res = countTrailingZeros(TypeSize / 8); assert(Res < kNumberOfAccessSizes); return Res; } -/// Create a global describing a source location. -static GlobalVariable *createPrivateGlobalForSourceLoc(Module &M, - LocationMetadata MD) { - Constant *LocData[] = { - createPrivateGlobalForString(M, MD.Filename, true, kAsanGenPrefix), - ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.LineNo), - ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.ColumnNo), - }; - auto LocStruct = ConstantStruct::getAnon(LocData); - auto GV = new GlobalVariable(M, LocStruct->getType(), true, - GlobalValue::PrivateLinkage, LocStruct, - kAsanGenPrefix); - GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - return GV; -} - /// Check if \p G has been created by a trusted compiler pass. static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) { // Do not instrument @llvm.global_ctors, @llvm.used, etc. - if (G->getName().startswith("llvm.")) + if (G->getName().startswith("llvm.") || + // Do not instrument gcov counter arrays. + G->getName().startswith("__llvm_gcov_ctr") || + // Do not instrument rtti proxy symbols for function sanitizer. + G->getName().startswith("__llvm_rtti_proxy")) return true; // Do not instrument asan globals. @@ -1414,10 +1161,6 @@ static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) { G->getName().startswith(kODRGenPrefix)) return true; - // Do not instrument gcov counter arrays. - if (G->getName() == "__llvm_gcov_ctr") - return true; - return false; } @@ -1518,10 +1261,6 @@ bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) { void AddressSanitizer::getInterestingMemoryOperands( Instruction *I, SmallVectorImpl &Interesting) { - // Skip memory accesses inserted by another instrumentation. - if (I->hasMetadata("nosanitize")) - return; - // Do not instrument the load fetching the dynamic shadow address. if (LocalDynamicShadow == I) return; @@ -1613,10 +1352,13 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) { // If a global variable does not have dynamic initialization we don't // have to instrument it. However, if a global does not have initializer // at all, we assume it has dynamic initializer (in other TU). - // - // FIXME: Metadata should be attched directly to the global directly instead - // of being added to llvm.asan.globals. - return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit; + if (!G->hasInitializer()) + return false; + + if (G->hasSanitizerMetadata() && G->getSanitizerMetadata().IsDynInit) + return false; + + return true; } void AddressSanitizer::instrumentPointerComparisonOrSubtraction( @@ -1977,9 +1719,8 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const { Type *Ty = G->getValueType(); LLVM_DEBUG(dbgs() << "GLOBAL: " << *G << "\n"); - // FIXME: Metadata should be attched directly to the global directly instead - // of being added to llvm.asan.globals. - if (GlobalsMD.get(G).IsExcluded) return false; + if (G->hasSanitizerMetadata() && G->getSanitizerMetadata().NoAddress) + return false; if (!Ty->isSized()) return false; if (!G->hasInitializer()) return false; // Globals in address space 1 and 4 are supported for AMDGPU. @@ -2125,6 +1866,8 @@ bool ModuleAddressSanitizer::ShouldUseMachOGlobalsSection() const { return true; if (TargetTriple.isWatchOS() && !TargetTriple.isOSVersionLT(2)) return true; + if (TargetTriple.isDriverKit()) + return true; return false; } @@ -2136,7 +1879,9 @@ StringRef ModuleAddressSanitizer::getGlobalMetadataSection() const { case Triple::MachO: return "__DATA,__asan_globals,regular"; case Triple::Wasm: case Triple::GOFF: + case Triple::SPIRV: case Triple::XCOFF: + case Triple::DXContainer: report_fatal_error( "ModuleAddressSanitizer not implemented for object file format"); case Triple::UnknownObjectFormat: @@ -2470,7 +2215,7 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, // const char *name; // const char *module_name; // size_t has_dynamic_init; - // void *source_location; + // size_t padding_for_windows_msvc_incremental_link; // size_t odr_indicator; // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = @@ -2489,15 +2234,16 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, for (size_t i = 0; i < n; i++) { GlobalVariable *G = GlobalsToChange[i]; - // FIXME: Metadata should be attched directly to the global directly instead - // of being added to llvm.asan.globals. - auto MD = GlobalsMD.get(G); - StringRef NameForGlobal = G->getName(); - // Create string holding the global name (use global name from metadata - // if it's available, otherwise just write the name of global variable). - GlobalVariable *Name = createPrivateGlobalForString( - M, MD.Name.empty() ? NameForGlobal : MD.Name, - /*AllowMerging*/ true, kAsanGenPrefix); + GlobalValue::SanitizerMetadata MD; + if (G->hasSanitizerMetadata()) + MD = G->getSanitizerMetadata(); + + // TODO: Symbol names in the descriptor can be demangled by the runtime + // library. This could save ~0.4% of VM size for a private large binary. + std::string NameForGlobal = llvm::demangle(G->getName().str()); + GlobalVariable *Name = + createPrivateGlobalForString(M, NameForGlobal, + /*AllowMerging*/ true, kAsanGenPrefix); Type *Ty = G->getValueType(); const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty); @@ -2545,14 +2291,6 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, G->eraseFromParent(); NewGlobals[i] = NewGlobal; - Constant *SourceLoc; - if (!MD.SourceLoc.empty()) { - auto SourceLocGlobal = createPrivateGlobalForSourceLoc(M, MD.SourceLoc); - SourceLoc = ConstantExpr::getPointerCast(SourceLocGlobal, IntptrTy); - } else { - SourceLoc = ConstantInt::get(IntptrTy, 0); - } - Constant *ODRIndicator = ConstantExpr::getNullValue(IRB.getInt8PtrTy()); GlobalValue *InstrumentedGlobal = NewGlobal; @@ -2593,10 +2331,12 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), ConstantExpr::getPointerCast(ModuleName, IntptrTy), - ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc, + ConstantInt::get(IntptrTy, MD.IsDynInit), + Constant::getNullValue(IntptrTy), ConstantExpr::getPointerCast(ODRIndicator, IntptrTy)); - if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true; + if (ClInitializers && MD.IsDynInit) + HasDynamicallyInitializedGlobals = true; LLVM_DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); @@ -2759,7 +2499,9 @@ void AddressSanitizer::initializeCallbacks(Module &M) { } const std::string MemIntrinCallbackPrefix = - CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix; + (CompileKernel && !ClKasanMemIntrinCallbackPrefix) + ? std::string("") + : ClMemoryAccessCallbackPrefix; AsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy); @@ -2888,6 +2630,9 @@ bool AddressSanitizer::instrumentFunction(Function &F, // Leave if the function doesn't need instrumentation. if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return FunctionModified; + if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) + return FunctionModified; + LLVM_DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); initializeCallbacks(*F.getParent()); @@ -2908,7 +2653,6 @@ bool AddressSanitizer::instrumentFunction(Function &F, SmallVector NoReturnCalls; SmallVector AllBlocks; SmallVector PointerComparisonsOrSubtracts; - int NumAllocas = 0; // Fill the set of memory operations to instrument. for (auto &BB : F) { @@ -2917,6 +2661,9 @@ bool AddressSanitizer::instrumentFunction(Function &F, int NumInsnsPerBB = 0; for (auto &Inst : BB) { if (LooksLikeCodeInBug11395(&Inst)) return false; + // Skip instructions inserted by another instrumentation. + if (Inst.hasMetadata(LLVMContext::MD_nosanitize)) + continue; SmallVector InterestingOperands; getInterestingMemoryOperands(&Inst, InterestingOperands); @@ -2948,11 +2695,10 @@ bool AddressSanitizer::instrumentFunction(Function &F, IntrinToInstrument.push_back(MI); NumInsnsPerBB++; } else { - if (isa(Inst)) NumAllocas++; if (auto *CB = dyn_cast(&Inst)) { // A call inside BB. TempsToInstrument.clear(); - if (CB->doesNotReturn() && !CB->hasMetadata("nosanitize")) + if (CB->doesNotReturn()) NoReturnCalls.push_back(CB); } if (CallInst *CI = dyn_cast(&Inst)) @@ -3347,7 +3093,7 @@ void FunctionStackPoisoner::processStaticAllocas() { ASanStackVariableDescription D = {AI->getName().data(), ASan.getAllocaSizeInBytes(*AI), 0, - AI->getAlignment(), + AI->getAlign().value(), AI, 0, 0}; @@ -3611,7 +3357,7 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) { IRBuilder<> IRB(AI); - const uint64_t Alignment = std::max(kAllocaRzSize, AI->getAlignment()); + const Align Alignment = std::max(Align(kAllocaRzSize), AI->getAlign()); const uint64_t AllocaRedzoneMask = kAllocaRzSize - 1; Value *Zero = Constant::getNullValue(IntptrTy); @@ -3642,17 +3388,19 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) { // Alignment is added to locate left redzone, PartialPadding for possible // partial redzone and kAllocaRzSize for right redzone respectively. Value *AdditionalChunkSize = IRB.CreateAdd( - ConstantInt::get(IntptrTy, Alignment + kAllocaRzSize), PartialPadding); + ConstantInt::get(IntptrTy, Alignment.value() + kAllocaRzSize), + PartialPadding); Value *NewSize = IRB.CreateAdd(OldSize, AdditionalChunkSize); // Insert new alloca with new NewSize and Alignment params. AllocaInst *NewAlloca = IRB.CreateAlloca(IRB.getInt8Ty(), NewSize); - NewAlloca->setAlignment(Align(Alignment)); + NewAlloca->setAlignment(Alignment); // NewAddress = Address + Alignment - Value *NewAddress = IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy), - ConstantInt::get(IntptrTy, Alignment)); + Value *NewAddress = + IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy), + ConstantInt::get(IntptrTy, Alignment.value())); // Insert __asan_alloca_poison call for new created alloca. IRB.CreateCall(AsanAllocaPoisonFunc, {NewAddress, OldSize}); diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp index 4ad07cab001a..1eadafb4e4b4 100644 --- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -19,7 +19,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" @@ -29,7 +28,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -142,6 +140,9 @@ static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB) { static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI, ScalarEvolution &SE) { + if (F.hasFnAttribute(Attribute::NoSanitizeBounds)) + return false; + const DataLayout &DL = F.getParent()->getDataLayout(); ObjectSizeOpts EvalOpts; EvalOpts.RoundToAlign = true; diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp index 1a7f7a365ce4..b11b84d65d23 100644 --- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp +++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp @@ -13,15 +13,12 @@ #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Transforms/Instrumentation.h" -#include - using namespace llvm; static bool diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index 497aac30c3f6..e5c0705b916e 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" @@ -145,27 +146,27 @@ FunctionPass *llvm::createControlHeightReductionLegacyPass() { namespace { struct CHRStats { - CHRStats() : NumBranches(0), NumBranchesDelta(0), - WeightedNumBranchesDelta(0) {} + CHRStats() = default; void print(raw_ostream &OS) const { OS << "CHRStats: NumBranches " << NumBranches << " NumBranchesDelta " << NumBranchesDelta << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; } - uint64_t NumBranches; // The original number of conditional branches / - // selects - uint64_t NumBranchesDelta; // The decrease of the number of conditional - // branches / selects in the hot paths due to CHR. - uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile - // count at the scope entry. + // The original number of conditional branches / selects + uint64_t NumBranches = 0; + // The decrease of the number of conditional branches / selects in the hot + // paths due to CHR. + uint64_t NumBranchesDelta = 0; + // NumBranchesDelta weighted by the profile count at the scope entry. + uint64_t WeightedNumBranchesDelta = 0; }; // RegInfo - some properties of a Region. struct RegInfo { - RegInfo() : R(nullptr), HasBranch(false) {} - RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} - Region *R; - bool HasBranch; + RegInfo() = default; + RegInfo(Region *RegionIn) : R(RegionIn) {} + Region *R = nullptr; + bool HasBranch = false; SmallVector Selects; }; @@ -769,9 +770,21 @@ CHRScope * CHR::findScope(Region *R) { return nullptr; // If any of the basic blocks have address taken, we must skip this region // because we cannot clone basic blocks that have address taken. - for (BasicBlock *BB : R->blocks()) + for (BasicBlock *BB : R->blocks()) { if (BB->hasAddressTaken()) return nullptr; + // If we encounter llvm.coro.id, skip this region because if the basic block + // is cloned, we end up inserting a token type PHI node to the block with + // llvm.coro.begin. + // FIXME: This could lead to less optimal codegen, because the region is + // excluded, it can prevent CHR from merging adjacent regions into bigger + // scope and hoisting more branches. + for (Instruction &I : *BB) + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::coro_id) + return nullptr; + } + if (Exit) { // Try to find an if-then block (check if R is an if-then). // if (cond) { @@ -1752,7 +1765,7 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet &TrivialPHIs) { // Create the combined branch condition and constant-fold the branches/selects // in the hot path. fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, - ProfileCount.getValueOr(0)); + ProfileCount.value_or(0)); } // A helper for transformScopes. Clone the blocks in the scope (excluding the @@ -1949,28 +1962,27 @@ void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged // condition. void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, - Instruction *BranchOrSelect, - CHRScope *Scope, - IRBuilder<> &IRB, - Value *&MergedCondition) { - if (IsTrueBiased) { - MergedCondition = IRB.CreateAnd(MergedCondition, Cond); - } else { + Instruction *BranchOrSelect, CHRScope *Scope, + IRBuilder<> &IRB, Value *&MergedCondition) { + if (!IsTrueBiased) { // If Cond is an icmp and all users of V except for BranchOrSelect is a // branch, negate the icmp predicate and swap the branch targets and avoid // inserting an Xor to negate Cond. - bool Done = false; - if (auto *ICmp = dyn_cast(Cond)) - if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { - MergedCondition = IRB.CreateAnd(MergedCondition, Cond); - Done = true; - } - if (!Done) { - Value *Negate = IRB.CreateXor( - ConstantInt::getTrue(F.getContext()), Cond); - MergedCondition = IRB.CreateAnd(MergedCondition, Negate); - } + auto *ICmp = dyn_cast(Cond); + if (!ICmp || + !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) + Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond); } + + // Select conditions can be poison, while branching on poison is immediate + // undefined behavior. As such, we need to freeze potentially poisonous + // conditions derived from selects. + if (isa(BranchOrSelect) && + !isGuaranteedNotToBeUndefOrPoison(Cond)) + Cond = IRB.CreateFreeze(Cond); + + // Use logical and to avoid propagating poison from later conditions. + MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond); } void CHR::transformScopes(SmallVectorImpl &CHRScopes) { @@ -2080,7 +2092,7 @@ bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { RegionInfo &RI = getAnalysis().getRegionInfo(); std::unique_ptr OwnedORE = std::make_unique(&F); - return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run(); + return CHR(F, BFI, DT, PSI, RI, *OwnedORE).run(); } namespace llvm { diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index ff3aa14a2a83..6815688827d2 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -66,8 +66,8 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator.h" #include "llvm/Analysis/ValueTracking.h" @@ -84,13 +84,11 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -112,7 +110,6 @@ #include #include #include -#include #include #include #include @@ -187,6 +184,15 @@ static cl::opt ClCombineOffsetLabelsOnGEP( "doing pointer arithmetic."), cl::Hidden, cl::init(true)); +static cl::list ClCombineTaintLookupTables( + "dfsan-combine-taint-lookup-table", + cl::desc( + "When dfsan-combine-offset-labels-on-gep and/or " + "dfsan-combine-pointer-labels-on-load are false, this flag can " + "be used to re-enable combining offset and/or pointer taint when " + "loading specific constant global variables (i.e. lookup tables)."), + cl::Hidden); + static cl::opt ClDebugNonzeroLabels( "dfsan-debug-nonzero-labels", cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " @@ -433,6 +439,7 @@ class DataFlowSanitizer { FunctionType *DFSanUnionLoadFnTy; FunctionType *DFSanLoadLabelAndOriginFnTy; FunctionType *DFSanUnimplementedFnTy; + FunctionType *DFSanWrapperExternWeakNullFnTy; FunctionType *DFSanSetLabelFnTy; FunctionType *DFSanNonzeroLabelFnTy; FunctionType *DFSanVarargWrapperFnTy; @@ -448,6 +455,7 @@ class DataFlowSanitizer { FunctionCallee DFSanUnionLoadFn; FunctionCallee DFSanLoadLabelAndOriginFn; FunctionCallee DFSanUnimplementedFn; + FunctionCallee DFSanWrapperExternWeakNullFn; FunctionCallee DFSanSetLabelFn; FunctionCallee DFSanNonzeroLabelFn; FunctionCallee DFSanVarargWrapperFn; @@ -467,6 +475,7 @@ class DataFlowSanitizer { DFSanABIList ABIList; DenseMap UnwrappedFnMap; AttributeMask ReadOnlyNoneAttrs; + StringSet<> CombineTaintLookupTableNames; /// Memory map parameters used in calculation mapping application addresses /// to shadow addresses and origin addresses. @@ -480,14 +489,13 @@ class DataFlowSanitizer { bool isInstrumented(const Function *F); bool isInstrumented(const GlobalAlias *GA); bool isForceZeroLabels(const Function *F); - FunctionType *getTrampolineFunctionType(FunctionType *T); TransformedFunction getCustomFunctionType(FunctionType *T); WrapperKind getWrapperKind(Function *F); void addGlobalNameSuffix(GlobalValue *GV); + void buildExternWeakCheckIfNeeded(IRBuilder<> &IRB, Function *F); Function *buildWrapperFunction(Function *F, StringRef NewFName, GlobalValue::LinkageTypes NewFLink, FunctionType *NewFT); - Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName); void initializeCallbackFunctions(Module &M); void initializeRuntimeFunctions(Module &M); void injectMetadataGlobals(Module &M); @@ -658,6 +666,8 @@ struct DFSanFunction { // branch instruction using the given conditional expression. void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition); + bool isLookupTableConstant(Value *P); + private: /// Collapses the shadow with aggregate type into a single primitive shadow /// value. @@ -792,25 +802,9 @@ DataFlowSanitizer::DataFlowSanitizer( // FIXME: should we propagate vfs::FileSystem to this constructor? ABIList.set( SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem())); -} -FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) { - assert(!T->isVarArg()); - SmallVector ArgTypes; - ArgTypes.push_back(T->getPointerTo()); - ArgTypes.append(T->param_begin(), T->param_end()); - ArgTypes.append(T->getNumParams(), PrimitiveShadowTy); - Type *RetType = T->getReturnType(); - if (!RetType->isVoidTy()) - ArgTypes.push_back(PrimitiveShadowPtrTy); - - if (shouldTrackOrigins()) { - ArgTypes.append(T->getNumParams(), OriginTy); - if (!RetType->isVoidTy()) - ArgTypes.push_back(OriginPtrTy); - } - - return FunctionType::get(T->getReturnType(), ArgTypes, false); + for (StringRef v : ClCombineTaintLookupTables) + CombineTaintLookupTableNames.insert(v); } TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) { @@ -823,16 +817,8 @@ TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) { std::vector ArgumentIndexMapping; for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) { Type *ParamType = T->getParamType(I); - FunctionType *FT; - if (isa(ParamType) && - (FT = dyn_cast(ParamType->getPointerElementType()))) { - ArgumentIndexMapping.push_back(ArgTypes.size()); - ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo()); - ArgTypes.push_back(Type::getInt8PtrTy(*Ctx)); - } else { - ArgumentIndexMapping.push_back(ArgTypes.size()); - ArgTypes.push_back(ParamType); - } + ArgumentIndexMapping.push_back(ArgTypes.size()); + ArgTypes.push_back(ParamType); } for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) ArgTypes.push_back(PrimitiveShadowTy); @@ -1058,6 +1044,10 @@ bool DataFlowSanitizer::initializeModule(Module &M) { /*isVarArg=*/false); DFSanUnimplementedFnTy = FunctionType::get( Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); + Type *DFSanWrapperExternWeakNullArgs[2] = {Int8Ptr, Int8Ptr}; + DFSanWrapperExternWeakNullFnTy = + FunctionType::get(Type::getVoidTy(*Ctx), DFSanWrapperExternWeakNullArgs, + /*isVarArg=*/false); Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy, Type::getInt8PtrTy(*Ctx), IntptrTy}; DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), @@ -1149,6 +1139,23 @@ void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) { } } +void DataFlowSanitizer::buildExternWeakCheckIfNeeded(IRBuilder<> &IRB, + Function *F) { + // If the function we are wrapping was ExternWeak, it may be null. + // The original code before calling this wrapper may have checked for null, + // but replacing with a known-to-not-be-null wrapper can break this check. + // When replacing uses of the extern weak function with the wrapper we try + // to avoid replacing uses in conditionals, but this is not perfect. + // In the case where we fail, and accidentially optimize out a null check + // for a extern weak function, add a check here to help identify the issue. + if (GlobalValue::isExternalWeakLinkage(F->getLinkage())) { + std::vector Args; + Args.push_back(IRB.CreatePointerCast(F, IRB.getInt8PtrTy())); + Args.push_back(IRB.CreateGlobalStringPtr(F->getName())); + IRB.CreateCall(DFSanWrapperExternWeakNullFn, Args); + } +} + Function * DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, GlobalValue::LinkageTypes NewFLink, @@ -1181,61 +1188,6 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, return NewF; } -Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT, - StringRef FName) { - FunctionType *FTT = getTrampolineFunctionType(FT); - FunctionCallee C = Mod->getOrInsertFunction(FName, FTT); - Function *F = dyn_cast(C.getCallee()); - if (F && F->isDeclaration()) { - F->setLinkage(GlobalValue::LinkOnceODRLinkage); - BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F); - std::vector Args; - Function::arg_iterator AI = F->arg_begin() + 1; - for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N) - Args.push_back(&*AI); - CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB); - Type *RetType = FT->getReturnType(); - ReturnInst *RI = RetType->isVoidTy() ? ReturnInst::Create(*Ctx, BB) - : ReturnInst::Create(*Ctx, CI, BB); - - // F is called by a wrapped custom function with primitive shadows. So - // its arguments and return value need conversion. - DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true, - /*IsForceZeroLabels=*/false); - Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; - ++ValAI; - for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) { - Value *Shadow = - DFSF.expandFromPrimitiveShadow(ValAI->getType(), &*ShadowAI, CI); - DFSF.ValShadowMap[&*ValAI] = Shadow; - } - Function::arg_iterator RetShadowAI = ShadowAI; - const bool ShouldTrackOrigins = shouldTrackOrigins(); - if (ShouldTrackOrigins) { - ValAI = F->arg_begin(); - ++ValAI; - Function::arg_iterator OriginAI = ShadowAI; - if (!RetType->isVoidTy()) - ++OriginAI; - for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++OriginAI, --N) { - DFSF.ValOriginMap[&*ValAI] = &*OriginAI; - } - } - DFSanVisitor(DFSF).visitCallInst(*CI); - if (!RetType->isVoidTy()) { - Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow( - DFSF.getShadow(RI->getReturnValue()), RI); - new StoreInst(PrimitiveShadow, &*RetShadowAI, RI); - if (ShouldTrackOrigins) { - Value *Origin = DFSF.getOrigin(RI->getReturnValue()); - new StoreInst(Origin, &*std::prev(F->arg_end()), RI); - } - } - } - - return cast(C.getCallee()); -} - // Initialize DataFlowSanitizer runtime functions and declare them in the module void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) { { @@ -1256,6 +1208,8 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) { } DFSanUnimplementedFn = Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy); + DFSanWrapperExternWeakNullFn = Mod->getOrInsertFunction( + "__dfsan_wrapper_extern_weak_null", DFSanWrapperExternWeakNullFnTy); { AttributeList AL; AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); @@ -1299,6 +1253,8 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) { DFSanLoadLabelAndOriginFn.getCallee()->stripPointerCasts()); DFSanRuntimeFunctions.insert( DFSanUnimplementedFn.getCallee()->stripPointerCasts()); + DFSanRuntimeFunctions.insert( + DFSanWrapperExternWeakNullFn.getCallee()->stripPointerCasts()); DFSanRuntimeFunctions.insert( DFSanSetLabelFn.getCallee()->stripPointerCasts()); DFSanRuntimeFunctions.insert( @@ -1500,7 +1456,40 @@ bool DataFlowSanitizer::runImpl(Module &M) { Value *WrappedFnCst = ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)); - F.replaceAllUsesWith(WrappedFnCst); + + // Extern weak functions can sometimes be null at execution time. + // Code will sometimes check if an extern weak function is null. + // This could look something like: + // declare extern_weak i8 @my_func(i8) + // br i1 icmp ne (i8 (i8)* @my_func, i8 (i8)* null), label %use_my_func, + // label %avoid_my_func + // The @"dfsw$my_func" wrapper is never null, so if we replace this use + // in the comparision, the icmp will simplify to false and we have + // accidentially optimized away a null check that is necessary. + // This can lead to a crash when the null extern_weak my_func is called. + // + // To prevent (the most common pattern of) this problem, + // do not replace uses in comparisons with the wrapper. + // We definitely want to replace uses in call instructions. + // Other uses (e.g. store the function address somewhere) might be + // called or compared or both - this case may not be handled correctly. + // We will default to replacing with wrapper in cases we are unsure. + auto IsNotCmpUse = [](Use &U) -> bool { + User *Usr = U.getUser(); + if (ConstantExpr *CE = dyn_cast(Usr)) { + // This is the most common case for icmp ne null + if (CE->getOpcode() == Instruction::ICmp) { + return false; + } + } + if (Instruction *I = dyn_cast(Usr)) { + if (I->getOpcode() == Instruction::ICmp) { + return false; + } + } + return true; + }; + F.replaceUsesWithIf(WrappedFnCst, IsNotCmpUse); UnwrappedFnMap[WrappedFnCst] = &F; *FI = NewF; @@ -1919,6 +1908,14 @@ Align DFSanFunction::getOriginAlign(Align InstAlignment) { return Align(std::max(MinOriginAlignment, Alignment)); } +bool DFSanFunction::isLookupTableConstant(Value *P) { + if (GlobalVariable *GV = dyn_cast(P->stripPointerCasts())) + if (GV->isConstant() && GV->hasName()) + return DFS.CombineTaintLookupTableNames.count(GV->getName()); + + return false; +} + bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment) { // When enabling tracking load instructions, we always use @@ -2172,6 +2169,29 @@ static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) { llvm_unreachable("Unknown ordering"); } +Value *StripPointerGEPsAndCasts(Value *V) { + if (!V->getType()->isPointerTy()) + return V; + + // DFSan pass should be running on valid IR, but we'll + // keep a seen set to ensure there are no issues. + SmallPtrSet Visited; + Visited.insert(V); + do { + if (auto *GEP = dyn_cast(V)) { + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast(V)->getOperand(0); + if (!V->getType()->isPointerTy()) + return V; + } else if (isa(V)) { + V = cast(V)->getAliasee(); + } + } while (Visited.insert(V).second); + + return V; +} + void DFSanVisitor::visitLoadInst(LoadInst &LI) { auto &DL = LI.getModule()->getDataLayout(); uint64_t Size = DL.getTypeStoreSize(LI.getType()); @@ -2200,7 +2220,9 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) { Shadows.push_back(PrimitiveShadow); Origins.push_back(Origin); } - if (ClCombinePointerLabelsOnLoad) { + if (ClCombinePointerLabelsOnLoad || + DFSF.isLookupTableConstant( + StripPointerGEPsAndCasts(LI.getPointerOperand()))) { Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand()); PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos); if (ShouldTrackOrigins) { @@ -2562,7 +2584,9 @@ void DFSanVisitor::visitLandingPadInst(LandingPadInst &LPI) { } void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { - if (ClCombineOffsetLabelsOnGEP) { + if (ClCombineOffsetLabelsOnGEP || + DFSF.isLookupTableConstant( + StripPointerGEPsAndCasts(GEPI.getPointerOperand()))) { visitInstOperands(GEPI); return; } @@ -2722,13 +2746,8 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) { auto *MTI = cast( IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(), {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()})); - if (ClPreserveAlignment) { - MTI->setDestAlignment(I.getDestAlign() * DFSF.DFS.ShadowWidthBytes); - MTI->setSourceAlignment(I.getSourceAlign() * DFSF.DFS.ShadowWidthBytes); - } else { - MTI->setDestAlignment(Align(DFSF.DFS.ShadowWidthBytes)); - MTI->setSourceAlignment(Align(DFSF.DFS.ShadowWidthBytes)); - } + MTI->setDestAlignment(DFSF.getShadowAlign(I.getDestAlign().valueOrOne())); + MTI->setSourceAlignment(DFSF.getShadowAlign(I.getSourceAlign().valueOrOne())); if (ClEventCallbacks) { IRB.CreateCall(DFSF.DFS.DFSanMemTransferCallbackFn, {RawDestShadow, @@ -2864,16 +2883,19 @@ bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) { CB.setCalledFunction(&F); IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn, IRB.CreateGlobalStringPtr(F.getName())); + DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F); DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB)); DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin); return true; case DataFlowSanitizer::WK_Discard: CB.setCalledFunction(&F); + DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F); DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB)); DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin); return true; case DataFlowSanitizer::WK_Functional: CB.setCalledFunction(&F); + DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F); visitInstOperands(CB); return true; case DataFlowSanitizer::WK_Custom: @@ -2905,22 +2927,7 @@ bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) { // Adds non-variable arguments. auto *I = CB.arg_begin(); for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) { - Type *T = (*I)->getType(); - FunctionType *ParamFT; - if (isa(T) && - (ParamFT = dyn_cast(T->getPointerElementType()))) { - std::string TName = "dfst"; - TName += utostr(FT->getNumParams() - N); - TName += "$"; - TName += F.getName(); - Constant *Trampoline = - DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName); - Args.push_back(Trampoline); - Args.push_back( - IRB.CreateBitCast(*I, Type::getInt8PtrTy(*DFSF.DFS.Ctx))); - } else { - Args.push_back(*I); - } + Args.push_back(*I); } // Adds shadow arguments. diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 325089fc4402..ac4a1fd6bb7e 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -14,19 +14,15 @@ //===----------------------------------------------------------------------===// #include "CFGMST.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/IRBuilder.h" @@ -34,8 +30,6 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Support/CRC.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -87,7 +81,7 @@ GCOVOptions GCOVOptions::getDefault() { if (DefaultGCOVVersion.size() != 4) { llvm::report_fatal_error(Twine("Invalid -default-gcov-version: ") + - DefaultGCOVVersion); + DefaultGCOVVersion, /*GenCrashDiag=*/false); } memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4); return Options; @@ -169,39 +163,6 @@ private: StringMap InstrumentedFiles; }; -class GCOVProfilerLegacyPass : public ModulePass { -public: - static char ID; - GCOVProfilerLegacyPass() - : GCOVProfilerLegacyPass(GCOVOptions::getDefault()) {} - GCOVProfilerLegacyPass(const GCOVOptions &Opts) - : ModulePass(ID), Profiler(Opts) { - initializeGCOVProfilerLegacyPassPass(*PassRegistry::getPassRegistry()); - } - StringRef getPassName() const override { return "GCOV Profiler"; } - - bool runOnModule(Module &M) override { - auto GetBFI = [this](Function &F) { - return &this->getAnalysis(F).getBFI(); - }; - auto GetBPI = [this](Function &F) { - return &this->getAnalysis(F).getBPI(); - }; - auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { - return this->getAnalysis().getTLI(F); - }; - return Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - } - -private: - GCOVProfiler Profiler; -}; - struct BBInfo { BBInfo *Group; uint32_t Index; @@ -237,21 +198,6 @@ struct Edge { }; } -char GCOVProfilerLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN( - GCOVProfilerLegacyPass, "insert-gcov-profiling", - "Insert instrumentation for GCOV profiling", false, false) -INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END( - GCOVProfilerLegacyPass, "insert-gcov-profiling", - "Insert instrumentation for GCOV profiling", false, false) - -ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) { - return new GCOVProfilerLegacyPass(Options); -} - static StringRef getFunctionName(const DISubprogram *SP) { if (!SP->getLinkageName().empty()) return SP->getLinkageName(); @@ -862,7 +808,8 @@ bool GCOVProfiler::emitProfileNotes( // Split indirectbr critical edges here before computing the MST rather // than later in getInstrBB() to avoid invalidating it. - SplitIndirectBrCriticalEdges(F, BPI, BFI); + SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, + BFI); CFGMST MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI); diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 7b3741d19a1b..218b4bbfb6c0 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -13,14 +13,15 @@ #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" -#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/StackSafetyAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -33,7 +34,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" -#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -43,19 +44,15 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include "llvm/PassRegistry.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/MemoryTaggingSupport.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include using namespace llvm; @@ -83,6 +80,11 @@ static cl::opt cl::desc("Prefix for memory access callbacks"), cl::Hidden, cl::init("__hwasan_")); +static cl::opt ClKasanMemIntrinCallbackPrefix( + "hwasan-kernel-mem-intrinsic-prefix", + cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden, + cl::init(false)); + static cl::opt ClInstrumentWithCalls( "hwasan-instrument-with-calls", cl::desc("instrument reads and writes with callbacks"), cl::Hidden, @@ -145,7 +147,7 @@ static cl::opt ClGenerateTagsWithCalls( cl::init(false)); static cl::opt ClGlobals("hwasan-globals", cl::desc("Instrument globals"), - cl::Hidden, cl::init(false), cl::ZeroOrMore); + cl::Hidden, cl::init(false)); static cl::opt ClMatchAllTag( "hwasan-match-all-tag", @@ -191,17 +193,16 @@ static cl::opt static cl::opt ClInstrumentLandingPads("hwasan-instrument-landing-pads", cl::desc("instrument landing pads"), cl::Hidden, - cl::init(false), cl::ZeroOrMore); + cl::init(false)); static cl::opt ClUseShortGranules( "hwasan-use-short-granules", cl::desc("use short granules in allocas and outlined checks"), cl::Hidden, - cl::init(false), cl::ZeroOrMore); + cl::init(false)); static cl::opt ClInstrumentPersonalityFunctions( "hwasan-instrument-personality-functions", - cl::desc("instrument personality functions"), cl::Hidden, cl::init(false), - cl::ZeroOrMore); + cl::desc("instrument personality functions"), cl::Hidden); static cl::opt ClInlineAllChecks("hwasan-inline-all-checks", cl::desc("inline all checks"), @@ -244,13 +245,6 @@ bool shouldDetectUseAfterScope(const Triple &TargetTriple) { /// An instrumentation pass implementing detection of addressability bugs /// using tagged pointers. class HWAddressSanitizer { -private: - struct AllocaInfo { - AllocaInst *AI; - SmallVector LifetimeStart; - SmallVector LifetimeEnd; - }; - public: HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover, const StackSafetyGlobalInfo *SSI) @@ -265,11 +259,7 @@ public: void setSSI(const StackSafetyGlobalInfo *S) { SSI = S; } - DenseMap padInterestingAllocas( - const MapVector &AllocasToInstrument); - bool sanitizeFunction(Function &F, - llvm::function_ref GetDT, - llvm::function_ref GetPDT); + bool sanitizeFunction(Function &F, FunctionAnalysisManager &FAM); void initializeModule(); void createHwasanCtorComdat(); @@ -301,16 +291,9 @@ public: void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size); Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag); Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong); - static bool isStandardLifetime(const AllocaInfo &AllocaInfo, - const DominatorTree &DT); - bool instrumentStack( - bool ShouldDetectUseAfterScope, - MapVector &AllocasToInstrument, - SmallVector &UnrecognizedLifetimes, - DenseMap> &AllocaDbgMap, - SmallVectorImpl &RetVec, Value *StackTag, - llvm::function_ref GetDT, - llvm::function_ref GetPDT); + bool instrumentStack(memtag::StackInfo &Info, Value *StackTag, + const DominatorTree &DT, const PostDominatorTree &PDT, + const LoopInfo &LI); Value *readRegister(IRBuilder<> &IRB, StringRef Name); bool instrumentLandingPads(SmallVectorImpl &RetVec); Value *getNextTagWithCall(IRBuilder<> &IRB); @@ -328,6 +311,9 @@ public: void instrumentGlobal(GlobalVariable *GV, uint8_t Tag); void instrumentGlobals(); + Value *getPC(IRBuilder<> &IRB); + Value *getSP(IRBuilder<> &IRB); + void instrumentPersonalityFunctions(); private: @@ -397,96 +383,12 @@ private: Value *ShadowBase = nullptr; Value *StackBaseTag = nullptr; + Value *CachedSP = nullptr; GlobalValue *ThreadPtrGlobal = nullptr; }; -class HWAddressSanitizerLegacyPass : public FunctionPass { -public: - // Pass identification, replacement for typeid. - static char ID; - - explicit HWAddressSanitizerLegacyPass(bool CompileKernel = false, - bool Recover = false, - bool DisableOptimization = false) - : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover), - DisableOptimization(DisableOptimization) { - initializeHWAddressSanitizerLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { return "HWAddressSanitizer"; } - - bool doInitialization(Module &M) override { - HWASan = std::make_unique(M, CompileKernel, Recover, - /*SSI=*/nullptr); - return true; - } - - bool runOnFunction(Function &F) override { - auto TargetTriple = Triple(F.getParent()->getTargetTriple()); - if (shouldUseStackSafetyAnalysis(TargetTriple, DisableOptimization)) { - // We cannot call getAnalysis in doInitialization, that would cause a - // crash as the required analyses are not initialized yet. - HWASan->setSSI( - &getAnalysis().getResult()); - } - return HWASan->sanitizeFunction( - F, - [&]() -> const DominatorTree & { - return getAnalysis().getDomTree(); - }, - [&]() -> const PostDominatorTree & { - return getAnalysis().getPostDomTree(); - }); - } - - bool doFinalization(Module &M) override { - HWASan.reset(); - return false; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - // This is an over-estimation of, in case we are building for an - // architecture that doesn't allow stack tagging we will still load the - // analysis. - // This is so we don't need to plumb TargetTriple all the way to here. - if (mightUseStackSafetyAnalysis(DisableOptimization)) - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } - -private: - std::unique_ptr HWASan; - bool CompileKernel; - bool Recover; - bool DisableOptimization; -}; - } // end anonymous namespace -char HWAddressSanitizerLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN( - HWAddressSanitizerLegacyPass, "hwasan", - "HWAddressSanitizer: detect memory bugs using tagged addressing.", false, - false) -INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) -INITIALIZE_PASS_END( - HWAddressSanitizerLegacyPass, "hwasan", - "HWAddressSanitizer: detect memory bugs using tagged addressing.", false, - false) - -FunctionPass * -llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel, bool Recover, - bool DisableOptimization) { - assert(!CompileKernel || Recover); - return new HWAddressSanitizerLegacyPass(CompileKernel, Recover, - DisableOptimization); -} - PreservedAnalyses HWAddressSanitizerPass::run(Module &M, ModuleAnalysisManager &MAM) { const StackSafetyGlobalInfo *SSI = nullptr; @@ -497,16 +399,8 @@ PreservedAnalyses HWAddressSanitizerPass::run(Module &M, HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI); bool Modified = false; auto &FAM = MAM.getResult(M).getManager(); - for (Function &F : M) { - Modified |= HWASan.sanitizeFunction( - F, - [&]() -> const DominatorTree & { - return FAM.getResult(F); - }, - [&]() -> const PostDominatorTree & { - return FAM.getResult(F); - }); - } + for (Function &F : M) + Modified |= HWASan.sanitizeFunction(F, FAM); if (Modified) return PreservedAnalyses::none(); return PreservedAnalyses::all(); @@ -739,7 +633,9 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) { ArrayType::get(IRB.getInt8Ty(), 0)); const std::string MemIntrinCallbackPrefix = - CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix; + (CompileKernel && !ClKasanMemIntrinCallbackPrefix) + ? std::string("") + : ClMemoryAccessCallbackPrefix; HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy); @@ -812,7 +708,7 @@ bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) { void HWAddressSanitizer::getInterestingMemoryOperands( Instruction *I, SmallVectorImpl &Interesting) { // Skip memory accesses inserted by another instrumentation. - if (I->hasMetadata("nosanitize")) + if (I->hasMetadata(LLVMContext::MD_nosanitize)) return; // Do not instrument the load fetching the dynamic shadow address. @@ -1056,18 +952,6 @@ bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) { return true; } -static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) { - uint64_t ArraySize = 1; - if (AI.isArrayAllocation()) { - const ConstantInt *CI = dyn_cast(AI.getArraySize()); - assert(CI && "non-constant array size"); - ArraySize = CI->getZExtValue(); - } - Type *Ty = AI.getAllocatedType(); - uint64_t SizeInBytes = AI.getModule()->getDataLayout().getTypeAllocSize(Ty); - return SizeInBytes * ArraySize; -} - void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size) { size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); @@ -1141,19 +1025,10 @@ Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) { return getNextTagWithCall(IRB); if (StackBaseTag) return StackBaseTag; - // FIXME: use addressofreturnaddress (but implement it in aarch64 backend - // first). - Module *M = IRB.GetInsertBlock()->getParent()->getParent(); - auto GetStackPointerFn = Intrinsic::getDeclaration( - M, Intrinsic::frameaddress, - IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace())); - Value *StackPointer = IRB.CreateCall( - GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())}); - // Extract some entropy from the stack pointer for the tags. // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ // between functions). - Value *StackPointerLong = IRB.CreatePointerCast(StackPointer, IntptrTy); + Value *StackPointerLong = getSP(IRB); Value *StackTag = applyTagMask(IRB, IRB.CreateXor(StackPointerLong, IRB.CreateLShr(StackPointerLong, 20))); @@ -1233,6 +1108,30 @@ Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) { return nullptr; } +Value *HWAddressSanitizer::getPC(IRBuilder<> &IRB) { + if (TargetTriple.getArch() == Triple::aarch64) + return readRegister(IRB, "pc"); + else + return IRB.CreatePtrToInt(IRB.GetInsertBlock()->getParent(), IntptrTy); +} + +Value *HWAddressSanitizer::getSP(IRBuilder<> &IRB) { + if (!CachedSP) { + // FIXME: use addressofreturnaddress (but implement it in aarch64 backend + // first). + Function *F = IRB.GetInsertBlock()->getParent(); + Module *M = F->getParent(); + auto GetStackPointerFn = Intrinsic::getDeclaration( + M, Intrinsic::frameaddress, + IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace())); + CachedSP = IRB.CreatePtrToInt( + IRB.CreateCall(GetStackPointerFn, + {Constant::getNullValue(IRB.getInt32Ty())}), + IntptrTy); + } + return CachedSP; +} + void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) { if (!Mapping.InTls) ShadowBase = getShadowNonTls(IRB); @@ -1251,23 +1150,12 @@ void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) { TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong); if (WithFrameRecord) { - Function *F = IRB.GetInsertBlock()->getParent(); StackBaseTag = IRB.CreateAShr(ThreadLong, 3); // Prepare ring buffer data. - Value *PC; - if (TargetTriple.getArch() == Triple::aarch64) - PC = readRegister(IRB, "pc"); - else - PC = IRB.CreatePtrToInt(F, IntptrTy); - Module *M = F->getParent(); - auto GetStackPointerFn = Intrinsic::getDeclaration( - M, Intrinsic::frameaddress, - IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace())); - Value *SP = IRB.CreatePtrToInt( - IRB.CreateCall(GetStackPointerFn, - {Constant::getNullValue(IRB.getInt32Ty())}), - IntptrTy); + Value *PC = getPC(IRB); + Value *SP = getSP(IRB); + // Mix SP and PC. // Assumptions: // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero) @@ -1330,43 +1218,16 @@ bool HWAddressSanitizer::instrumentLandingPads( return true; } -static bool -maybeReachableFromEachOther(const SmallVectorImpl &Insts, - const DominatorTree &DT) { - // If we have too many lifetime ends, give up, as the algorithm below is N^2. - if (Insts.size() > ClMaxLifetimes) - return true; - for (size_t I = 0; I < Insts.size(); ++I) { - for (size_t J = 0; J < Insts.size(); ++J) { - if (I == J) - continue; - if (isPotentiallyReachable(Insts[I], Insts[J], nullptr, &DT)) - return true; - } - } - return false; -} - -// static -bool HWAddressSanitizer::isStandardLifetime(const AllocaInfo &AllocaInfo, - const DominatorTree &DT) { - // An alloca that has exactly one start and end in every possible execution. - // If it has multiple ends, they have to be unreachable from each other, so - // at most one of them is actually used for each execution of the function. - return AllocaInfo.LifetimeStart.size() == 1 && - (AllocaInfo.LifetimeEnd.size() == 1 || - (AllocaInfo.LifetimeEnd.size() > 0 && - !maybeReachableFromEachOther(AllocaInfo.LifetimeEnd, DT))); +static bool isLifetimeIntrinsic(Value *V) { + auto *II = dyn_cast(V); + return II && II->isLifetimeStartOrEnd(); } -bool HWAddressSanitizer::instrumentStack( - bool ShouldDetectUseAfterScope, - MapVector &AllocasToInstrument, - SmallVector &UnrecognizedLifetimes, - DenseMap> &AllocaDbgMap, - SmallVectorImpl &RetVec, Value *StackTag, - llvm::function_ref GetDT, - llvm::function_ref GetPDT) { +bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo, + Value *StackTag, + const DominatorTree &DT, + const PostDominatorTree &PDT, + const LoopInfo &LI) { // Ideally, we want to calculate tagged stack base pointer, and rewrite all // alloca addresses using that. Unfortunately, offsets are not known yet // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a @@ -1374,10 +1235,10 @@ bool HWAddressSanitizer::instrumentStack( // This generates one extra instruction per alloca use. unsigned int I = 0; - for (auto &KV : AllocasToInstrument) { + for (auto &KV : SInfo.AllocasToInstrument) { auto N = I++; auto *AI = KV.first; - AllocaInfo &Info = KV.second; + memtag::AllocaInfo &Info = KV.second; IRBuilder<> IRB(AI->getNextNode()); // Replace uses of the alloca with tagged address. @@ -1388,10 +1249,34 @@ bool HWAddressSanitizer::instrumentStack( AI->hasName() ? AI->getName().str() : "alloca." + itostr(N); Replacement->setName(Name + ".hwasan"); - AI->replaceUsesWithIf(Replacement, - [AILong](Use &U) { return U.getUser() != AILong; }); + size_t Size = memtag::getAllocaSizeInBytes(*AI); + size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); + + Value *AICast = IRB.CreatePointerCast(AI, Int8PtrTy); + + auto HandleLifetime = [&](IntrinsicInst *II) { + // Set the lifetime intrinsic to cover the whole alloca. This reduces the + // set of assumptions we need to make about the lifetime. Without this we + // would need to ensure that we can track the lifetime pointer to a + // constant offset from the alloca, and would still need to change the + // size to include the extra alignment we use for the untagging to make + // the size consistent. + // + // The check for standard lifetime below makes sure that we have exactly + // one set of start / end in any execution (i.e. the ends are not + // reachable from each other), so this will not cause any problems. + II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize)); + II->setArgOperand(1, AICast); + }; + llvm::for_each(Info.LifetimeStart, HandleLifetime); + llvm::for_each(Info.LifetimeEnd, HandleLifetime); - for (auto *DDI : AllocaDbgMap.lookup(AI)) { + AI->replaceUsesWithIf(Replacement, [AICast, AILong](Use &U) { + auto *User = U.getUser(); + return User != AILong && User != AICast && !isLifetimeIntrinsic(User); + }); + + for (auto *DDI : Info.DbgVariableIntrinsics) { // Prepend "tag_offset, N" to the dwarf expression. // Tag offset logically applies to the alloca pointer, and it makes sense // to put it at the beginning of the expression. @@ -1403,37 +1288,47 @@ bool HWAddressSanitizer::instrumentStack( NewOps, LocNo)); } - size_t Size = getAllocaSizeInBytes(*AI); - size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); auto TagEnd = [&](Instruction *Node) { IRB.SetInsertPoint(Node); Value *UARTag = getUARTag(IRB, StackTag); + // When untagging, use the `AlignedSize` because we need to set the tags + // for the entire alloca to zero. If we used `Size` here, we would + // keep the last granule tagged, and store zero in the last byte of the + // last granule, due to how short granules are implemented. tagAlloca(IRB, AI, UARTag, AlignedSize); }; + // Calls to functions that may return twice (e.g. setjmp) confuse the + // postdominator analysis, and will leave us to keep memory tagged after + // function return. Work around this by always untagging at every return + // statement if return_twice functions are called. bool StandardLifetime = - UnrecognizedLifetimes.empty() && isStandardLifetime(Info, GetDT()); - if (ShouldDetectUseAfterScope && StandardLifetime) { + SInfo.UnrecognizedLifetimes.empty() && + memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, &DT, + &LI, ClMaxLifetimes) && + !SInfo.CallsReturnTwice; + if (DetectUseAfterScope && StandardLifetime) { IntrinsicInst *Start = Info.LifetimeStart[0]; IRB.SetInsertPoint(Start->getNextNode()); tagAlloca(IRB, AI, Tag, Size); - if (!forAllReachableExits(GetDT(), GetPDT(), Start, Info.LifetimeEnd, - RetVec, TagEnd)) { + if (!memtag::forAllReachableExits(DT, PDT, LI, Start, Info.LifetimeEnd, + SInfo.RetVec, TagEnd)) { for (auto *End : Info.LifetimeEnd) End->eraseFromParent(); } } else { tagAlloca(IRB, AI, Tag, Size); - for (auto *RI : RetVec) + for (auto *RI : SInfo.RetVec) TagEnd(RI); - if (!StandardLifetime) { - for (auto &II : Info.LifetimeStart) - II->eraseFromParent(); - for (auto &II : Info.LifetimeEnd) - II->eraseFromParent(); - } + // We inserted tagging outside of the lifetimes, so we have to remove + // them. + for (auto &II : Info.LifetimeStart) + II->eraseFromParent(); + for (auto &II : Info.LifetimeEnd) + II->eraseFromParent(); } + memtag::alignAndPadAlloca(Info, Align(Mapping.getObjectAlignment())); } - for (auto &I : UnrecognizedLifetimes) + for (auto &I : SInfo.UnrecognizedLifetimes) I->eraseFromParent(); return true; } @@ -1443,7 +1338,7 @@ bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { // FIXME: instrument dynamic allocas, too AI.isStaticAlloca() && // alloca() may be called with 0 size, ignore it. - getAllocaSizeInBytes(AI) > 0 && + memtag::getAllocaSizeInBytes(AI) > 0 && // We are only interested in allocas not promotable to registers. // Promotable allocas are common under -O0. !isAllocaPromotable(&AI) && @@ -1456,42 +1351,8 @@ bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { !(SSI && SSI->isSafe(AI)); } -DenseMap HWAddressSanitizer::padInterestingAllocas( - const MapVector &AllocasToInstrument) { - DenseMap AllocaToPaddedAllocaMap; - for (auto &KV : AllocasToInstrument) { - AllocaInst *AI = KV.first; - uint64_t Size = getAllocaSizeInBytes(*AI); - uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); - AI->setAlignment( - Align(std::max(AI->getAlignment(), Mapping.getObjectAlignment()))); - if (Size != AlignedSize) { - Type *AllocatedType = AI->getAllocatedType(); - if (AI->isArrayAllocation()) { - uint64_t ArraySize = - cast(AI->getArraySize())->getZExtValue(); - AllocatedType = ArrayType::get(AllocatedType, ArraySize); - } - Type *TypeWithPadding = StructType::get( - AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size)); - auto *NewAI = new AllocaInst( - TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI); - NewAI->takeName(AI); - NewAI->setAlignment(AI->getAlign()); - NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca()); - NewAI->setSwiftError(AI->isSwiftError()); - NewAI->copyMetadata(*AI); - auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI); - AI->replaceAllUsesWith(Bitcast); - AllocaToPaddedAllocaMap[AI] = NewAI; - } - } - return AllocaToPaddedAllocaMap; -} - -bool HWAddressSanitizer::sanitizeFunction( - Function &F, llvm::function_ref GetDT, - llvm::function_ref GetPDT) { +bool HWAddressSanitizer::sanitizeFunction(Function &F, + FunctionAnalysisManager &FAM) { if (&F == HwasanCtorFunction) return false; @@ -1502,72 +1363,27 @@ bool HWAddressSanitizer::sanitizeFunction( SmallVector OperandsToInstrument; SmallVector IntrinToInstrument; - MapVector AllocasToInstrument; - SmallVector RetVec; SmallVector LandingPadVec; - SmallVector UnrecognizedLifetimes; - DenseMap> AllocaDbgMap; - bool CallsReturnTwice = false; - for (auto &BB : F) { - for (auto &Inst : BB) { - if (CallInst *CI = dyn_cast(&Inst)) { - if (CI->canReturnTwice()) { - CallsReturnTwice = true; - } - } - if (InstrumentStack) { - if (AllocaInst *AI = dyn_cast(&Inst)) { - if (isInterestingAlloca(*AI)) - AllocasToInstrument.insert({AI, {}}); - continue; - } - auto *II = dyn_cast(&Inst); - if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end)) { - AllocaInst *AI = findAllocaForValue(II->getArgOperand(1)); - if (!AI) { - UnrecognizedLifetimes.push_back(&Inst); - continue; - } - if (!isInterestingAlloca(*AI)) - continue; - if (II->getIntrinsicID() == Intrinsic::lifetime_start) - AllocasToInstrument[AI].LifetimeStart.push_back(II); - else - AllocasToInstrument[AI].LifetimeEnd.push_back(II); - continue; - } - } - if (isa(Inst)) { - if (CallInst *CI = Inst.getParent()->getTerminatingMustTailCall()) - RetVec.push_back(CI); - else - RetVec.push_back(&Inst); - } else if (isa(Inst)) { - RetVec.push_back(&Inst); - } - - if (auto *DVI = dyn_cast(&Inst)) { - for (Value *V : DVI->location_ops()) { - if (auto *Alloca = dyn_cast_or_null(V)) - if (!AllocaDbgMap.count(Alloca) || - AllocaDbgMap[Alloca].back() != DVI) - AllocaDbgMap[Alloca].push_back(DVI); - } - } + memtag::StackInfoBuilder SIB( + [this](const AllocaInst &AI) { return isInterestingAlloca(AI); }); + for (auto &Inst : instructions(F)) { + if (InstrumentStack) { + SIB.visit(Inst); + } - if (InstrumentLandingPads && isa(Inst)) - LandingPadVec.push_back(&Inst); + if (InstrumentLandingPads && isa(Inst)) + LandingPadVec.push_back(&Inst); - getInterestingMemoryOperands(&Inst, OperandsToInstrument); + getInterestingMemoryOperands(&Inst, OperandsToInstrument); - if (MemIntrinsic *MI = dyn_cast(&Inst)) - if (!ignoreMemIntrinsic(MI)) - IntrinToInstrument.push_back(MI); - } + if (MemIntrinsic *MI = dyn_cast(&Inst)) + if (!ignoreMemIntrinsic(MI)) + IntrinToInstrument.push_back(MI); } + memtag::StackInfo &SInfo = SIB.get(); + initializeCallbacks(*F.getParent()); bool Changed = false; @@ -1575,7 +1391,7 @@ bool HWAddressSanitizer::sanitizeFunction( if (!LandingPadVec.empty()) Changed |= instrumentLandingPads(LandingPadVec); - if (AllocasToInstrument.empty() && F.hasPersonalityFn() && + if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() && F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) { // __hwasan_personality_thunk is a no-op for functions without an // instrumented stack, so we can drop it. @@ -1583,7 +1399,7 @@ bool HWAddressSanitizer::sanitizeFunction( Changed = true; } - if (AllocasToInstrument.empty() && OperandsToInstrument.empty() && + if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() && IntrinToInstrument.empty()) return Changed; @@ -1593,42 +1409,16 @@ bool HWAddressSanitizer::sanitizeFunction( IRBuilder<> EntryIRB(InsertPt); emitPrologue(EntryIRB, /*WithFrameRecord*/ ClRecordStackHistory && - Mapping.WithFrameRecord && !AllocasToInstrument.empty()); + Mapping.WithFrameRecord && + !SInfo.AllocasToInstrument.empty()); - if (!AllocasToInstrument.empty()) { + if (!SInfo.AllocasToInstrument.empty()) { + const DominatorTree &DT = FAM.getResult(F); + const PostDominatorTree &PDT = FAM.getResult(F); + const LoopInfo &LI = FAM.getResult(F); Value *StackTag = ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB); - // Calls to functions that may return twice (e.g. setjmp) confuse the - // postdominator analysis, and will leave us to keep memory tagged after - // function return. Work around this by always untagging at every return - // statement if return_twice functions are called. - instrumentStack(DetectUseAfterScope && !CallsReturnTwice, - AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap, - RetVec, StackTag, GetDT, GetPDT); - } - // Pad and align each of the allocas that we instrumented to stop small - // uninteresting allocas from hiding in instrumented alloca's padding and so - // that we have enough space to store real tags for short granules. - DenseMap AllocaToPaddedAllocaMap = - padInterestingAllocas(AllocasToInstrument); - - if (!AllocaToPaddedAllocaMap.empty()) { - for (auto &BB : F) { - for (auto &Inst : BB) { - if (auto *DVI = dyn_cast(&Inst)) { - SmallDenseSet LocationOps(DVI->location_ops().begin(), - DVI->location_ops().end()); - for (Value *V : LocationOps) { - if (auto *AI = dyn_cast_or_null(V)) { - if (auto *NewAI = AllocaToPaddedAllocaMap.lookup(AI)) - DVI->replaceVariableLocationOp(V, NewAI); - } - } - } - } - } - for (auto &P : AllocaToPaddedAllocaMap) - P.first->eraseFromParent(); + instrumentStack(SInfo, StackTag, DT, PDT, LI); } // If we split the entry block, move any allocas that were originally in the @@ -1654,6 +1444,7 @@ bool HWAddressSanitizer::sanitizeFunction( ShadowBase = nullptr; StackBaseTag = nullptr; + CachedSP = nullptr; return true; } @@ -1735,34 +1526,10 @@ void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) { GV->eraseFromParent(); } -static DenseSet getExcludedGlobals(Module &M) { - NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals"); - if (!Globals) - return DenseSet(); - DenseSet Excluded(Globals->getNumOperands()); - for (auto MDN : Globals->operands()) { - // Metadata node contains the global and the fields of "Entry". - assert(MDN->getNumOperands() == 5); - auto *V = mdconst::extract_or_null(MDN->getOperand(0)); - // The optimizer may optimize away a global entirely. - if (!V) - continue; - auto *StrippedV = V->stripPointerCasts(); - auto *GV = dyn_cast(StrippedV); - if (!GV) - continue; - ConstantInt *IsExcluded = mdconst::extract(MDN->getOperand(4)); - if (IsExcluded->isOne()) - Excluded.insert(GV); - } - return Excluded; -} - void HWAddressSanitizer::instrumentGlobals() { std::vector Globals; - auto ExcludedGlobals = getExcludedGlobals(M); for (GlobalVariable &GV : M.globals()) { - if (ExcludedGlobals.count(&GV)) + if (GV.hasSanitizerMetadata() && GV.getSanitizerMetadata().NoHWAddress) continue; if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") || diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 9a3afa9cc924..3ef06907dfee 100644 --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -13,30 +13,20 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -45,7 +35,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include #include @@ -71,13 +60,13 @@ static cl::opt DisableICP("disable-icp", cl::init(false), cl::Hidden, // value. // For debug use only. static cl::opt - ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore, + ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::desc("Max number of promotions for this compilation")); // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. // For debug use only. static cl::opt - ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore, + ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::desc("Skip Callsite up to this number for this compilation")); // Set if the pass is called in LTO optimization. The difference for LTO mode @@ -115,55 +104,6 @@ static cl::opt namespace { -class PGOIndirectCallPromotionLegacyPass : public ModulePass { -public: - static char ID; - - PGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false) - : ModulePass(ID), InLTO(InLTO), SamplePGO(SamplePGO) { - initializePGOIndirectCallPromotionLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - } - - StringRef getPassName() const override { return "PGOIndirectCallPromotion"; } - -private: - bool runOnModule(Module &M) override; - - // If this pass is called in LTO. We need to special handling the PGOFuncName - // for the static variables due to LTO's internalization. - bool InLTO; - - // If this pass is called in SamplePGO. We need to add the prof metadata to - // the promoted direct call. - bool SamplePGO; -}; - -} // end anonymous namespace - -char PGOIndirectCallPromotionLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom", - "Use PGO instrumentation profile to promote indirect " - "calls to direct calls.", - false, false) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_END(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom", - "Use PGO instrumentation profile to promote indirect " - "calls to direct calls.", - false, false) - -ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO, - bool SamplePGO) { - return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO); -} - -namespace { - // The class for main data structure to promote indirect calls to conditional // direct calls. class ICallPromotionFunc { @@ -428,15 +368,6 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, return Changed; } -bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) { - ProfileSummaryInfo *PSI = - &getAnalysis().getPSI(); - - // Command-line option has the priority for InLTO. - return promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode, - SamplePGO | ICPSamplePGOMode); -} - PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, ModuleAnalysisManager &AM) { ProfileSummaryInfo *PSI = &AM.getResult(M); diff --git a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp index 3ea314329079..2091881c29fe 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp @@ -9,29 +9,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" -#include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include -#include #include -#include #include using namespace llvm; @@ -61,7 +54,7 @@ private: ArrayType *MapTy; public: - InstrOrderFile() {} + InstrOrderFile() = default; void createOrderFileData(Module &M) { LLVMContext &Ctx = M.getContext(); diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 6868408ef5f5..7843b1522830 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -47,12 +47,10 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include #include -#include #include #include @@ -62,7 +60,7 @@ using namespace llvm; namespace llvm { cl::opt - DebugInfoCorrelate("debug-info-correlate", cl::ZeroOrMore, + DebugInfoCorrelate("debug-info-correlate", cl::desc("Use debug info to correlate profiles."), cl::init(false)); } // namespace llvm @@ -95,18 +93,18 @@ cl::opt NumCountersPerValueSite( cl::init(1.0)); cl::opt AtomicCounterUpdateAll( - "instrprof-atomic-counter-update-all", cl::ZeroOrMore, + "instrprof-atomic-counter-update-all", cl::desc("Make all profile counter updates atomic (for testing only)"), cl::init(false)); cl::opt AtomicCounterUpdatePromoted( - "atomic-counter-update-promoted", cl::ZeroOrMore, + "atomic-counter-update-promoted", cl::desc("Do counter update using atomic fetch add " " for promoted counters only"), cl::init(false)); cl::opt AtomicFirstCounter( - "atomic-first-counter", cl::ZeroOrMore, + "atomic-first-counter", cl::desc("Use atomic fetch add for first counter in a function (usually " "the entry counter)"), cl::init(false)); @@ -116,37 +114,37 @@ cl::opt AtomicFirstCounter( // pipeline is setup, i.e., the default value of true of this option // does not mean the promotion will be done by default. Explicitly // setting this option can override the default behavior. -cl::opt DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore, +cl::opt DoCounterPromotion("do-counter-promotion", cl::desc("Do counter register promotion"), cl::init(false)); cl::opt MaxNumOfPromotionsPerLoop( - cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20), + "max-counter-promotions-per-loop", cl::init(20), cl::desc("Max number counter promotions per loop to avoid" " increasing register pressure too much")); // A debug option cl::opt - MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1), + MaxNumOfPromotions("max-counter-promotions", cl::init(-1), cl::desc("Max number of allowed counter promotions")); cl::opt SpeculativeCounterPromotionMaxExiting( - cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3), + "speculative-counter-promotion-max-exiting", cl::init(3), cl::desc("The max number of exiting blocks of a loop to allow " " speculative counter promotion")); cl::opt SpeculativeCounterPromotionToLoop( - cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false), + "speculative-counter-promotion-to-loop", cl::desc("When the option is false, if the target block is in a loop, " "the promotion will be disallowed unless the promoted counter " " update can be further/iteratively promoted into an acyclic " " region.")); cl::opt IterativeCounterPromotion( - cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true), + "iterative-counter-promotion", cl::init(true), cl::desc("Allow counter promotion across the whole loop nest.")); cl::opt SkipRetExitBlock( - cl::ZeroOrMore, "skip-ret-exit-block", cl::init(true), + "skip-ret-exit-block", cl::init(true), cl::desc("Suppress counter promotion if exit blocks contain ret.")); class InstrProfilingLegacyPass : public ModulePass { @@ -211,6 +209,18 @@ public: Value *Addr = cast(Store)->getPointerOperand(); Type *Ty = LiveInValue->getType(); IRBuilder<> Builder(InsertPos); + if (auto *AddrInst = dyn_cast_or_null(Addr)) { + // If isRuntimeCounterRelocationEnabled() is true then the address of + // the store instruction is computed with two instructions in + // InstrProfiling::getCounterAddress(). We need to copy those + // instructions to this block to compute Addr correctly. + // %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias> + // %Addr = inttoptr i64 %BiasAdd to i64* + auto *OrigBiasInst = dyn_cast(AddrInst->getOperand(0)); + assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add); + Value *BiasInst = Builder.Insert(OrigBiasInst->clone()); + Addr = Builder.CreateIntToPtr(BiasInst, Ty->getPointerTo()); + } if (AtomicCounterUpdatePromoted) // automic update currently can only be promoted across the current // loop, not the whole loop nest. @@ -303,8 +313,7 @@ public: auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader()); // If the average loop trip count is not greater than 1.5, we skip // promotion. - if (PreheaderCount && - (PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2)) + if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2)) continue; } @@ -705,10 +714,9 @@ Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) { Type *Int64Ty = Type::getInt64Ty(M->getContext()); Function *Fn = I->getParent()->getParent(); - Instruction &EntryI = Fn->getEntryBlock().front(); - LoadInst *LI = dyn_cast(&EntryI); - if (!LI) { - IRBuilder<> EntryBuilder(&EntryI); + LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn]; + if (!BiasLI) { + IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front()); auto *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName()); if (!Bias) { // Compiler must define this variable when runtime counter relocation @@ -725,9 +733,9 @@ Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) { if (TT.supportsCOMDAT()) Bias->setComdat(M->getOrInsertComdat(Bias->getName())); } - LI = EntryBuilder.CreateLoad(Int64Ty, Bias); + BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias); } - auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI); + auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), BiasLI); return Builder.CreateIntToPtr(Add, Addr->getType()); } @@ -769,7 +777,8 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) { Name->setLinkage(GlobalValue::PrivateLinkage); ReferencedNames.push_back(Name); - NC->dropAllReferences(); + if (isa(NC)) + NC->dropAllReferences(); } CoverageNamesVar->eraseFromParent(); } @@ -856,8 +865,8 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) { if (TT.isOSDarwin()) return false; // Use linker script magic to get data/cnts/name start/end. - if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() || - TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() || TT.isOSWindows()) + if (TT.isOSAIX() || TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() || + TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS() || TT.isOSWindows()) return false; return true; @@ -1236,7 +1245,7 @@ bool InstrProfiling::emitRuntimeHook() { new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage, nullptr, getInstrProfRuntimeHookVarName()); - if (TT.isOSBinFormatELF()) { + if (TT.isOSBinFormatELF() && !TT.isPS()) { // Mark the user variable as used so that it isn't stripped out. CompilerUsedVars.push_back(Var); } else { diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp index dda242492391..9ff0e632bd7f 100644 --- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -91,23 +91,13 @@ Comdat *llvm::getOrCreateFunctionComdat(Function &F, Triple &T) { /// initializeInstrumentation - Initialize all passes in the TransformUtils /// library. void llvm::initializeInstrumentation(PassRegistry &Registry) { - initializeAddressSanitizerLegacyPassPass(Registry); - initializeModuleAddressSanitizerLegacyPassPass(Registry); initializeMemProfilerLegacyPassPass(Registry); initializeModuleMemProfilerLegacyPassPass(Registry); initializeBoundsCheckingLegacyPassPass(Registry); initializeControlHeightReductionLegacyPassPass(Registry); - initializeGCOVProfilerLegacyPassPass(Registry); - initializePGOInstrumentationGenLegacyPassPass(Registry); - initializePGOInstrumentationUseLegacyPassPass(Registry); - initializePGOIndirectCallPromotionLegacyPassPass(Registry); - initializePGOMemOPSizeOptLegacyPassPass(Registry); initializeCGProfileLegacyPassPass(Registry); initializeInstrOrderFileLegacyPassPass(Registry); initializeInstrProfilingLegacyPassPass(Registry); - initializeMemorySanitizerLegacyPassPass(Registry); - initializeHWAddressSanitizerLegacyPassPass(Registry); - initializeThreadSanitizerLegacyPassPass(Registry); initializeModuleSanitizerCoverageLegacyPassPass(Registry); initializeDataFlowSanitizerLegacyPassPass(Registry); } diff --git a/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h deleted file mode 100644 index 892a6a26da91..000000000000 --- a/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ /dev/null @@ -1,109 +0,0 @@ -//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This module provides means for calculating a maximum spanning tree for a -// given set of weighted edges. The type parameter T is the type of a node. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H -#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H - -#include "llvm/ADT/EquivalenceClasses.h" -#include "llvm/IR/BasicBlock.h" -#include -#include - -namespace llvm { - - /// MaximumSpanningTree - A MST implementation. - /// The type parameter T determines the type of the nodes of the graph. - template - class MaximumSpanningTree { - public: - typedef std::pair Edge; - typedef std::pair EdgeWeight; - typedef std::vector EdgeWeights; - protected: - typedef std::vector MaxSpanTree; - - MaxSpanTree MST; - - private: - // A comparing class for comparing weighted edges. - struct EdgeWeightCompare { - static bool getBlockSize(const T *X) { - const BasicBlock *BB = dyn_cast_or_null(X); - return BB ? BB->size() : 0; - } - - bool operator()(EdgeWeight X, EdgeWeight Y) const { - if (X.second > Y.second) return true; - if (X.second < Y.second) return false; - - // Equal edge weights: break ties by comparing block sizes. - size_t XSizeA = getBlockSize(X.first.first); - size_t YSizeA = getBlockSize(Y.first.first); - if (XSizeA > YSizeA) return true; - if (XSizeA < YSizeA) return false; - - size_t XSizeB = getBlockSize(X.first.second); - size_t YSizeB = getBlockSize(Y.first.second); - if (XSizeB > YSizeB) return true; - if (XSizeB < YSizeB) return false; - - return false; - } - }; - - public: - static char ID; // Class identification, replacement for typeinfo - - /// MaximumSpanningTree() - Takes a vector of weighted edges and returns a - /// spanning tree. - MaximumSpanningTree(EdgeWeights &EdgeVector) { - llvm::stable_sort(EdgeVector, EdgeWeightCompare()); - - // Create spanning tree, Forest contains a special data structure - // that makes checking if two nodes are already in a common (sub-)tree - // fast and cheap. - EquivalenceClasses Forest; - for (typename EdgeWeights::iterator EWi = EdgeVector.begin(), - EWe = EdgeVector.end(); EWi != EWe; ++EWi) { - Edge e = (*EWi).first; - - Forest.insert(e.first); - Forest.insert(e.second); - } - - // Iterate over the sorted edges, biggest first. - for (typename EdgeWeights::iterator EWi = EdgeVector.begin(), - EWe = EdgeVector.end(); EWi != EWe; ++EWi) { - Edge e = (*EWi).first; - - if (Forest.findLeader(e.first) != Forest.findLeader(e.second)) { - Forest.unionSets(e.first, e.second); - // So we know now that the edge is not already in a subtree, so we push - // the edge to the MST. - MST.push_back(e); - } - } - } - - typename MaxSpanTree::iterator begin() { - return MST.begin(); - } - - typename MaxSpanTree::iterator end() { - return MST.end(); - } - }; - -} // End llvm namespace - -#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 5e078f2c4212..01e3b2c20218 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -27,15 +27,14 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -156,7 +155,6 @@ static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) { struct InterestingMemoryAccess { Value *Addr = nullptr; bool IsWrite; - unsigned Alignment; Type *AccessTy; uint64_t TypeSize; Value *MaybeMask = nullptr; @@ -182,8 +180,7 @@ public: void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite); void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, - Instruction *I, Value *Addr, - unsigned Alignment, Type *AccessTy, + Instruction *I, Value *Addr, Type *AccessTy, bool IsWrite); void instrumentMemIntrinsic(MemIntrinsic *MI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); @@ -255,7 +252,7 @@ public: } // end anonymous namespace -MemProfilerPass::MemProfilerPass() {} +MemProfilerPass::MemProfilerPass() = default; PreservedAnalyses MemProfilerPass::run(Function &F, AnalysisManager &AM) { @@ -266,7 +263,7 @@ PreservedAnalyses MemProfilerPass::run(Function &F, return PreservedAnalyses::all(); } -ModuleMemProfilerPass::ModuleMemProfilerPass() {} +ModuleMemProfilerPass::ModuleMemProfilerPass() = default; PreservedAnalyses ModuleMemProfilerPass::run(Module &M, AnalysisManager &AM) { @@ -341,28 +338,24 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const { return None; Access.IsWrite = false; Access.AccessTy = LI->getType(); - Access.Alignment = LI->getAlignment(); Access.Addr = LI->getPointerOperand(); } else if (StoreInst *SI = dyn_cast(I)) { if (!ClInstrumentWrites) return None; Access.IsWrite = true; Access.AccessTy = SI->getValueOperand()->getType(); - Access.Alignment = SI->getAlignment(); Access.Addr = SI->getPointerOperand(); } else if (AtomicRMWInst *RMW = dyn_cast(I)) { if (!ClInstrumentAtomics) return None; Access.IsWrite = true; Access.AccessTy = RMW->getValOperand()->getType(); - Access.Alignment = 0; Access.Addr = RMW->getPointerOperand(); } else if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { if (!ClInstrumentAtomics) return None; Access.IsWrite = true; Access.AccessTy = XCHG->getCompareOperand()->getType(); - Access.Alignment = 0; Access.Addr = XCHG->getPointerOperand(); } else if (auto *CI = dyn_cast(I)) { auto *F = CI->getCalledFunction(); @@ -384,11 +377,6 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const { } auto *BasePtr = CI->getOperand(0 + OpOffset); - if (auto *AlignmentConstant = - dyn_cast(CI->getOperand(1 + OpOffset))) - Access.Alignment = (unsigned)AlignmentConstant->getZExtValue(); - else - Access.Alignment = 1; // No alignment guarantees. We probably got Undef Access.MaybeMask = CI->getOperand(2 + OpOffset); Access.Addr = BasePtr; } @@ -410,6 +398,25 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const { if (Access.Addr->isSwiftError()) return None; + // Peel off GEPs and BitCasts. + auto *Addr = Access.Addr->stripInBoundsOffsets(); + + if (GlobalVariable *GV = dyn_cast(Addr)) { + // Do not instrument PGO counter updates. + if (GV->hasSection()) { + StringRef SectionName = GV->getSection(); + // Check if the global is in the PGO counters section. + auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat(); + if (SectionName.endswith( + getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false))) + return None; + } + + // Do not instrument accesses to LLVM internal variables. + if (GV->getName().startswith("__llvm")) + return None; + } + const DataLayout &DL = I->getModule()->getDataLayout(); Access.TypeSize = DL.getTypeStoreSizeInBits(Access.AccessTy); return Access; @@ -417,7 +424,6 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const { void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, Instruction *I, Value *Addr, - unsigned Alignment, Type *AccessTy, bool IsWrite) { auto *VTy = cast(AccessTy); uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType()); @@ -468,8 +474,7 @@ void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL, if (Access.MaybeMask) { instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr, - Access.Alignment, Access.AccessTy, - Access.IsWrite); + Access.AccessTy, Access.IsWrite); } else { // Since the access counts will be accumulated across the entire allocation, // we only update the shadow access count for the first location and thus @@ -615,8 +620,6 @@ bool MemProfiler::instrumentFunction(Function &F) { initializeCallbacks(*F.getParent()); - FunctionModified |= insertDynamicShadowAtFunctionEntry(F); - SmallVector ToInstrument; // Fill the set of memory operations to instrument. @@ -627,6 +630,15 @@ bool MemProfiler::instrumentFunction(Function &F) { } } + if (ToInstrument.empty()) { + LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified + << " " << F << "\n"); + + return FunctionModified; + } + + FunctionModified |= insertDynamicShadowAtFunctionEntry(F); + int NumInstrumented = 0; for (auto *Inst : ToInstrument) { if (ClDebugMin < 0 || ClDebugMax < 0 || diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index c51acdf52f14..4d72f6c3d1a9 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -174,24 +174,19 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsX86.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueMap.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -634,33 +629,6 @@ void insertModuleCtor(Module &M) { }); } -/// A legacy function pass for msan instrumentation. -/// -/// Instruments functions to detect uninitialized reads. -struct MemorySanitizerLegacyPass : public FunctionPass { - // Pass identification, replacement for typeid. - static char ID; - - MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {}) - : FunctionPass(ID), Options(Options) { - initializeMemorySanitizerLegacyPassPass(*PassRegistry::getPassRegistry()); - } - StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - } - - bool runOnFunction(Function &F) override { - return MSan->sanitizeFunction( - F, getAnalysis().getTLI(F)); - } - bool doInitialization(Module &M) override; - - Optional MSan; - MemorySanitizerOptions Options; -}; - template T getOptOrDefault(const cl::opt &Opt, T Default) { return (Opt.getNumOccurrences() > 0) ? Opt : Default; } @@ -705,21 +673,6 @@ void MemorySanitizerPass::printPipeline( OS << ">"; } -char MemorySanitizerLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan", - "MemorySanitizer: detects uninitialized reads.", false, - false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan", - "MemorySanitizer: detects uninitialized reads.", false, - false) - -FunctionPass * -llvm::createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options) { - return new MemorySanitizerLegacyPass(Options); -} - /// Create a non-const global initialized with the given string. /// /// Creates a writable global for Str so that we can pass it to the @@ -1017,13 +970,6 @@ void MemorySanitizer::initializeModule(Module &M) { } } -bool MemorySanitizerLegacyPass::doInitialization(Module &M) { - if (!Options.Kernel) - insertModuleCtor(M); - MSan.emplace(M, Options); - return true; -} - namespace { /// A helper class that handles instrumentation of VarArg @@ -1674,7 +1620,7 @@ struct MemorySanitizerVisitor : public InstVisitor { /// or extracts if from ParamTLS (for function arguments). Value *getShadow(Value *V) { if (Instruction *I = dyn_cast(V)) { - if (!PropagateShadow || I->getMetadata("nosanitize")) + if (!PropagateShadow || I->getMetadata(LLVMContext::MD_nosanitize)) return getCleanShadow(V); // For instructions the shadow is already stored in the map. Value *Shadow = ShadowMap[V]; @@ -1694,9 +1640,9 @@ struct MemorySanitizerVisitor : public InstVisitor { } if (Argument *A = dyn_cast(V)) { // For arguments we compute the shadow on demand and store it in the map. - Value **ShadowPtr = &ShadowMap[V]; - if (*ShadowPtr) - return *ShadowPtr; + Value *&ShadowPtr = ShadowMap[V]; + if (ShadowPtr) + return ShadowPtr; Function *F = A->getParent(); IRBuilder<> EntryIRB(FnPrologueEnd); unsigned ArgOffset = 0; @@ -1753,12 +1699,12 @@ struct MemorySanitizerVisitor : public InstVisitor { if (!PropagateShadow || Overflow || FArg.hasByValAttr() || (MS.EagerChecks && FArg.hasAttribute(Attribute::NoUndef))) { - *ShadowPtr = getCleanShadow(V); + ShadowPtr = getCleanShadow(V); setOrigin(A, getCleanOrigin()); } else { // Shadow over TLS Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset); - *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base, + ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base, kShadowTLSAlignment); if (MS.TrackOrigins) { Value *OriginPtr = @@ -1767,14 +1713,14 @@ struct MemorySanitizerVisitor : public InstVisitor { } } LLVM_DEBUG(dbgs() - << " ARG: " << FArg << " ==> " << **ShadowPtr << "\n"); + << " ARG: " << FArg << " ==> " << *ShadowPtr << "\n"); break; } ArgOffset += alignTo(Size, kShadowTLSAlignment); } - assert(*ShadowPtr && "Could not find shadow for an argument"); - return *ShadowPtr; + assert(ShadowPtr && "Could not find shadow for an argument"); + return ShadowPtr; } // For everything else the shadow is zero. return getCleanShadow(V); @@ -1793,7 +1739,7 @@ struct MemorySanitizerVisitor : public InstVisitor { assert((isa(V) || isa(V)) && "Unexpected value type in getOrigin()"); if (Instruction *I = dyn_cast(V)) { - if (I->getMetadata("nosanitize")) + if (I->getMetadata(LLVMContext::MD_nosanitize)) return getCleanOrigin(); } Value *Origin = OriginMap[V]; @@ -1916,7 +1862,7 @@ struct MemorySanitizerVisitor : public InstVisitor { // ------------------- Visitors. using InstVisitor::visit; void visit(Instruction &I) { - if (I.getMetadata("nosanitize")) + if (I.getMetadata(LLVMContext::MD_nosanitize)) return; // Don't want to visit if we're in the prologue if (isInPrologue(I)) @@ -1930,12 +1876,12 @@ struct MemorySanitizerVisitor : public InstVisitor { /// Optionally, checks that the load address is fully defined. void visitLoadInst(LoadInst &I) { assert(I.getType()->isSized() && "Load type must have size"); - assert(!I.getMetadata("nosanitize")); + assert(!I.getMetadata(LLVMContext::MD_nosanitize)); IRBuilder<> IRB(I.getNextNode()); Type *ShadowTy = getShadowTy(&I); Value *Addr = I.getPointerOperand(); Value *ShadowPtr = nullptr, *OriginPtr = nullptr; - const Align Alignment = assumeAligned(I.getAlignment()); + const Align Alignment = I.getAlign(); if (PropagateShadow) { std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false); @@ -2573,6 +2519,7 @@ struct MemorySanitizerVisitor : public InstVisitor { /// /// Similar situation exists for memcpy and memset. void visitMemMoveInst(MemMoveInst &I) { + getShadow(I.getArgOperand(1)); // Ensure shadow initialized IRBuilder<> IRB(&I); IRB.CreateCall( MS.MemmoveFn, @@ -2587,6 +2534,7 @@ struct MemorySanitizerVisitor : public InstVisitor { // FIXME: consider doing manual inline for small constant sizes and proper // alignment. void visitMemCpyInst(MemCpyInst &I) { + getShadow(I.getArgOperand(1)); // Ensure shadow initialized IRBuilder<> IRB(&I); IRB.CreateCall( MS.MemcpyFn, @@ -3252,27 +3200,37 @@ struct MemorySanitizerVisitor : public InstVisitor { SOC.Done(&I); } - // Instrument _mm_*_sd intrinsics - void handleUnarySdIntrinsic(IntrinsicInst &I) { + // Instrument _mm_*_sd|ss intrinsics + void handleUnarySdSsIntrinsic(IntrinsicInst &I) { IRBuilder<> IRB(&I); + unsigned Width = + cast(I.getArgOperand(0)->getType())->getNumElements(); Value *First = getShadow(&I, 0); Value *Second = getShadow(&I, 1); - // High word of first operand, low word of second - Value *Shadow = - IRB.CreateShuffleVector(First, Second, llvm::makeArrayRef({2, 1})); + // First element of second operand, remaining elements of first operand + SmallVector Mask; + Mask.push_back(Width); + for (unsigned i = 1; i < Width; i++) + Mask.push_back(i); + Value *Shadow = IRB.CreateShuffleVector(First, Second, Mask); setShadow(&I, Shadow); setOriginForNaryOp(I); } - void handleBinarySdIntrinsic(IntrinsicInst &I) { + void handleBinarySdSsIntrinsic(IntrinsicInst &I) { IRBuilder<> IRB(&I); + unsigned Width = + cast(I.getArgOperand(0)->getType())->getNumElements(); Value *First = getShadow(&I, 0); Value *Second = getShadow(&I, 1); Value *OrShadow = IRB.CreateOr(First, Second); - // High word of first operand, low word of both OR'd together - Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, - llvm::makeArrayRef({2, 1})); + // First element of both OR'd together, remaining elements of first operand + SmallVector Mask; + Mask.push_back(Width); + for (unsigned i = 1; i < Width; i++) + Mask.push_back(i); + Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, Mask); setShadow(&I, Shadow); setOriginForNaryOp(I); @@ -3547,11 +3505,14 @@ struct MemorySanitizerVisitor : public InstVisitor { break; case Intrinsic::x86_sse41_round_sd: - handleUnarySdIntrinsic(I); + case Intrinsic::x86_sse41_round_ss: + handleUnarySdSsIntrinsic(I); break; case Intrinsic::x86_sse2_max_sd: + case Intrinsic::x86_sse_max_ss: case Intrinsic::x86_sse2_min_sd: - handleBinarySdIntrinsic(I); + case Intrinsic::x86_sse_min_ss: + handleBinarySdSsIntrinsic(I); break; case Intrinsic::fshl: @@ -3630,7 +3591,7 @@ struct MemorySanitizerVisitor : public InstVisitor { } void visitCallBase(CallBase &CB) { - assert(!CB.getMetadata("nosanitize")); + assert(!CB.getMetadata(LLVMContext::MD_nosanitize)); if (CB.isInlineAsm()) { // For inline asm (either a call to asm function, or callbr instruction), // do the usual thing: check argument shadow and mark all outputs as @@ -4083,8 +4044,9 @@ struct MemorySanitizerVisitor : public InstVisitor { // Nothing to do here. } - void instrumentAsmArgument(Value *Operand, Instruction &I, IRBuilder<> &IRB, - const DataLayout &DL, bool isOutput) { + void instrumentAsmArgument(Value *Operand, Type *ElemTy, Instruction &I, + IRBuilder<> &IRB, const DataLayout &DL, + bool isOutput) { // For each assembly argument, we check its value for being initialized. // If the argument is a pointer, we assume it points to a single element // of the corresponding type (or to a 8-byte word, if the type is unsized). @@ -4096,10 +4058,9 @@ struct MemorySanitizerVisitor : public InstVisitor { assert(!isOutput); return; } - Type *ElType = OpType->getPointerElementType(); - if (!ElType->isSized()) + if (!ElemTy->isSized()) return; - int Size = DL.getTypeStoreSize(ElType); + int Size = DL.getTypeStoreSize(ElemTy); Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy()); Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size); IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal}); @@ -4159,14 +4120,16 @@ struct MemorySanitizerVisitor : public InstVisitor { // that we won't overwrite uninit values before checking them. for (int i = OutputArgs; i < NumOperands; i++) { Value *Operand = CB->getOperand(i); - instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false); + instrumentAsmArgument(Operand, CB->getParamElementType(i), I, IRB, DL, + /*isOutput*/ false); } // Unpoison output arguments. This must happen before the actual InlineAsm // call, so that the shadow for memory published in the asm() statement // remains valid. for (int i = 0; i < OutputArgs; i++) { Value *Operand = CB->getOperand(i); - instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true); + instrumentAsmArgument(Operand, CB->getParamElementType(i), I, IRB, DL, + /*isOutput*/ true); } setShadow(&I, getCleanShadow(&I)); @@ -4885,8 +4848,8 @@ struct VarArgPowerPC64Helper : public VarArgHelper { assert(A->getType()->isPointerTy()); Type *RealTy = CB.getParamByValType(ArgNo); uint64_t ArgSize = DL.getTypeAllocSize(RealTy); - MaybeAlign ArgAlign = CB.getParamAlign(ArgNo); - if (!ArgAlign || *ArgAlign < Align(8)) + Align ArgAlign = CB.getParamAlign(ArgNo).value_or(Align(8)); + if (ArgAlign < 8) ArgAlign = Align(8); VAArgOffset = alignTo(VAArgOffset, ArgAlign); if (!IsFixed) { @@ -4902,27 +4865,27 @@ struct VarArgPowerPC64Helper : public VarArgHelper { kShadowTLSAlignment, ArgSize); } } - VAArgOffset += alignTo(ArgSize, 8); + VAArgOffset += alignTo(ArgSize, Align(8)); } else { Value *Base; uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); - uint64_t ArgAlign = 8; + Align ArgAlign = Align(8); if (A->getType()->isArrayTy()) { // Arrays are aligned to element size, except for long double // arrays, which are aligned to 8 bytes. Type *ElementTy = A->getType()->getArrayElementType(); if (!ElementTy->isPPC_FP128Ty()) - ArgAlign = DL.getTypeAllocSize(ElementTy); + ArgAlign = Align(DL.getTypeAllocSize(ElementTy)); } else if (A->getType()->isVectorTy()) { // Vectors are naturally aligned. - ArgAlign = DL.getTypeAllocSize(A->getType()); + ArgAlign = Align(ArgSize); } if (ArgAlign < 8) - ArgAlign = 8; + ArgAlign = Align(8); VAArgOffset = alignTo(VAArgOffset, ArgAlign); if (DL.isBigEndian()) { - // Adjusting the shadow for argument with size < 8 to match the placement - // of bits in big endian system + // Adjusting the shadow for argument with size < 8 to match the + // placement of bits in big endian system if (ArgSize < 8) VAArgOffset += (8 - ArgSize); } @@ -4933,7 +4896,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper { IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); } VAArgOffset += ArgSize; - VAArgOffset = alignTo(VAArgOffset, 8); + VAArgOffset = alignTo(VAArgOffset, Align(8)); } if (IsFixed) VAArgBase = VAArgOffset; diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 0902a94452e3..3a29cd70e42e 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -52,7 +52,6 @@ #include "ValueProfileCollector.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -68,6 +67,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -94,8 +94,6 @@ #include "llvm/IR/ProfileSummary.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/BranchProbability.h" @@ -110,6 +108,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/MisExpect.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include @@ -173,14 +172,14 @@ static cl::opt DisableValueProfiling("disable-vp", cl::init(false), // Command line option to set the maximum number of VP annotations to write to // the metadata for a single indirect call callsite. static cl::opt MaxNumAnnotations( - "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, + "icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite")); // Command line option to set the maximum number of value annotations // to write to the metadata for a single memop intrinsic. static cl::opt MaxNumMemOPAnnotations( - "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, + "memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic")); @@ -256,7 +255,7 @@ static cl::opt PGOInstrumentEntry( cl::desc("Force to instrument function entry basicblock.")); static cl::opt PGOFunctionEntryCoverage( - "pgo-function-entry-coverage", cl::init(false), cl::Hidden, cl::ZeroOrMore, + "pgo-function-entry-coverage", cl::Hidden, cl::desc( "Use this option to enable function entry coverage instrumentation.")); @@ -431,125 +430,8 @@ struct SelectInstVisitor : public InstVisitor { unsigned getNumOfSelectInsts() const { return NSIs; } }; - -class PGOInstrumentationGenLegacyPass : public ModulePass { -public: - static char ID; - - PGOInstrumentationGenLegacyPass(bool IsCS = false) - : ModulePass(ID), IsCS(IsCS) { - initializePGOInstrumentationGenLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } - -private: - // Is this is context-sensitive instrumentation. - bool IsCS; - bool runOnModule(Module &M) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - } -}; - -class PGOInstrumentationUseLegacyPass : public ModulePass { -public: - static char ID; - - // Provide the profile filename as the parameter. - PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) - : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { - if (!PGOTestProfileFile.empty()) - ProfileFileName = PGOTestProfileFile; - initializePGOInstrumentationUseLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } - -private: - std::string ProfileFileName; - // Is this is context-sensitive instrumentation use. - bool IsCS; - - bool runOnModule(Module &M) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } -}; - -class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { -public: - static char ID; - StringRef getPassName() const override { - return "PGOInstrumentationGenCreateVarPass"; - } - PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") - : ModulePass(ID), InstrProfileOutput(CSInstrName) { - initializePGOInstrumentationGenCreateVarLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - -private: - bool runOnModule(Module &M) override { - createProfileFileNameVar(M, InstrProfileOutput); - // The variable in a comdat may be discarded by LTO. Ensure the - // declaration will be retained. - appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); - return false; - } - std::string InstrProfileOutput; -}; - } // end anonymous namespace -char PGOInstrumentationGenLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", - "PGO instrumentation.", false, false) -INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", - "PGO instrumentation.", false, false) - -ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { - return new PGOInstrumentationGenLegacyPass(IsCS); -} - -char PGOInstrumentationUseLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", - "Read PGO instrumentation profile.", false, false) -INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", - "Read PGO instrumentation profile.", false, false) - -ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, - bool IsCS) { - return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); -} - -char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; - -INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, - "pgo-instr-gen-create-var", - "Create PGO instrumentation version variable for CSPGO.", false, - false) - -ModulePass * -llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { - return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName)); -} - namespace { /// An MST based instrumentation for PGO @@ -940,7 +822,7 @@ static void instrumentOneFunc( bool IsCS) { // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. - SplitIndirectBrCriticalEdges(F, BPI, BFI); + SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); FuncPGOInstrumentation FuncInfo( F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); @@ -1457,6 +1339,7 @@ void PGOUseFunc::populateCounters() { } LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); + (void) NumPasses; #ifndef NDEBUG // Assert every BB has a valid counter. for (auto &BB : F) { @@ -1697,22 +1580,6 @@ PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { return PreservedAnalyses::all(); } -bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { - if (skipModule(M)) - return false; - - auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { - return this->getAnalysis().getTLI(F); - }; - auto LookupBPI = [this](Function &F) { - return &this->getAnalysis(F).getBPI(); - }; - auto LookupBFI = [this](Function &F) { - return &this->getAnalysis(F).getBFI(); - }; - return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS); -} - PreservedAnalyses PGOInstrumentationGen::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult(M).getManager(); @@ -1740,7 +1607,7 @@ static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BlockFrequencyInfo NBFI(F, NBPI, LI); #ifndef NDEBUG auto BFIEntryCount = F.getEntryCount(); - assert(BFIEntryCount.hasValue() && (BFIEntryCount->getCount() > 0) && + assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) && "Invalid BFI Entrycount"); #endif auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); @@ -1752,7 +1619,7 @@ static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, continue; auto BFICount = NBFI.getBlockProfileCount(&BBI); CountValue = Func.getBBInfo(&BBI).CountValue; - BFICountValue = BFICount.getValue(); + BFICountValue = *BFICount; SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); } @@ -1805,7 +1672,7 @@ static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, NonZeroBBNum++; auto BFICount = NBFI.getBlockProfileCount(&BBI); if (BFICount) - BFICountValue = BFICount.getValue(); + BFICountValue = *BFICount; if (HotBBOnly) { bool rawIsHot = CountValue >= HotCountThreshold; @@ -1929,7 +1796,7 @@ static bool annotateAllFunctions( auto *BFI = LookupBFI(F); // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. - SplitIndirectBrCriticalEdges(F, BPI, BFI); + SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, InstrumentFuncEntry); // When AllMinusOnes is true, it means the profile for the function @@ -2073,25 +1940,6 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M, return PreservedAnalyses::none(); } -bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { - if (skipModule(M)) - return false; - - auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { - return this->getAnalysis().getTLI(F); - }; - auto LookupBPI = [this](Function &F) { - return &this->getAnalysis(F).getBPI(); - }; - auto LookupBFI = [this](Function &F) { - return &this->getAnalysis(F).getBFI(); - }; - - auto *PSI = &getAnalysis().getPSI(); - return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI, - LookupBFI, PSI, IsCS); -} - static std::string getSimpleNodeName(const BasicBlock *Node) { if (!Node->getName().empty()) return std::string(Node->getName()); @@ -2117,6 +1965,8 @@ void llvm::setProfMetadata(Module *M, Instruction *TI, dbgs() << W << " "; } dbgs() << "\n";); + misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); + TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); if (EmitBranchProbability) { std::string BrCondStr = getBranchCondString(TI); diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index d4b78f2c14b0..b11f16894669 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -20,7 +20,6 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/BasicBlock.h" @@ -29,15 +28,11 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include "llvm/PassRegistry.h" #include "llvm/ProfileData/InstrProf.h" #define INSTR_PROF_VALUE_PROF_MEMOP_API #include "llvm/ProfileData/InstrProfData.inc" @@ -46,8 +41,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/WithColor.h" -#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include @@ -63,8 +56,7 @@ STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated."); // The minimum call count to optimize memory intrinsic calls. static cl::opt - MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, - cl::init(1000), + MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::init(1000), cl::desc("The minimum count to optimize memory " "intrinsic calls")); @@ -76,14 +68,13 @@ static cl::opt DisableMemOPOPT("disable-memop-opt", cl::init(false), // The percent threshold to optimize memory intrinsic calls. static cl::opt MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), - cl::Hidden, cl::ZeroOrMore, + cl::Hidden, cl::desc("The percentage threshold for the " "memory intrinsic calls optimization")); // Maximum number of versions for optimizing memory intrinsic call. static cl::opt MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, - cl::ZeroOrMore, cl::desc("The max version for the optimized memory " " intrinsic calls")); @@ -102,43 +93,6 @@ static cl::opt MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), cl::desc("Optimize the memop size <= this value")); -namespace { -class PGOMemOPSizeOptLegacyPass : public FunctionPass { -public: - static char ID; - - PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) { - initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { return "PGOMemOPSize"; } - -private: - bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); - AU.addRequired(); - } -}; -} // end anonymous namespace - -char PGOMemOPSizeOptLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", - "Optimize memory intrinsic using its size value profile", - false, false) -INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", - "Optimize memory intrinsic using its size value profile", - false, false) - -FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() { - return new PGOMemOPSizeOptLegacyPass(); -} - namespace { static const char *getMIName(const MemIntrinsic *MI) { @@ -517,20 +471,6 @@ static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, return MemOPSizeOpt.isChanged(); } -bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { - BlockFrequencyInfo &BFI = - getAnalysis().getBFI(); - auto &ORE = getAnalysis().getORE(); - auto *DTWP = getAnalysisIfAvailable(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - TargetLibraryInfo &TLI = - getAnalysis().getTLI(F); - return PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI); -} - -namespace llvm { -char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID; - PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, FunctionAnalysisManager &FAM) { auto &BFI = FAM.getResult(F); @@ -544,4 +484,3 @@ PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, PA.preserve(); return PA; } -} // namespace llvm diff --git a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp index fc5267261851..0e39fe266369 100644 --- a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp +++ b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp @@ -60,15 +60,9 @@ #include "llvm/Transforms/Instrumentation/PoisonChecking.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index d3b60c7add34..d9d11cc90d3d 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -13,30 +13,24 @@ #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/PostDominators.h" -#include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InlineAsm.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/SpecialCaseList.h" #include "llvm/Support/VirtualFileSystem.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -247,8 +241,7 @@ private: Type *Ty); void SetNoSanitizeMetadata(Instruction *I) { - I->setMetadata(I->getModule()->getMDKindID("nosanitize"), - MDNode::get(*C, None)); + I->setMetadata(LLVMContext::MD_nosanitize, MDNode::get(*C, None)); } std::string getSectionName(const std::string &Section) const; @@ -694,7 +687,7 @@ void ModuleSanitizerCoverage::instrumentFunction( for (auto &Inst : BB) { if (Options.IndirectCalls) { CallBase *CB = dyn_cast(&Inst); - if (CB && !CB->getCalledFunction()) + if (CB && CB->isIndirectCall()) IndirCalls.push_back(&Inst); } if (Options.TraceCmp) { @@ -996,15 +989,11 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, // if we aren't splitting the block, it's nice for allocas to be before // calls. IP = PrepareToSplitEntryBlock(BB, IP); - } else { - EntryLoc = IP->getDebugLoc(); - if (!EntryLoc) - if (auto *SP = F.getSubprogram()) - EntryLoc = DILocation::get(SP->getContext(), 0, 0, SP); } - IRBuilder<> IRB(&*IP); - IRB.SetCurrentDebugLocation(EntryLoc); + InstrumentationIRBuilder IRB(&*IP); + if (EntryLoc) + IRB.SetCurrentDebugLocation(EntryLoc); if (Options.TracePC) { IRB.CreateCall(SanCovTracePC) ->setCannotMerge(); // gets the PC using GET_CALLER_PC. diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 180012198c42..c33b1b3b1a5c 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -38,7 +38,6 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/InitializePasses.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -174,19 +173,6 @@ private: FunctionCallee MemmoveFn, MemcpyFn, MemsetFn; }; -struct ThreadSanitizerLegacyPass : FunctionPass { - ThreadSanitizerLegacyPass() : FunctionPass(ID) { - initializeThreadSanitizerLegacyPassPass(*PassRegistry::getPassRegistry()); - } - StringRef getPassName() const override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnFunction(Function &F) override; - bool doInitialization(Module &M) override; - static char ID; // Pass identification, replacement for typeid. -private: - Optional TSan; -}; - void insertModuleCtor(Module &M) { getOrCreateSanitizerCtorAndInitFunctions( M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{}, @@ -195,7 +181,6 @@ void insertModuleCtor(Module &M) { // time. Hook them into the global ctors list in that case: [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); }); } - } // namespace PreservedAnalyses ThreadSanitizerPass::run(Function &F, @@ -211,38 +196,6 @@ PreservedAnalyses ModuleThreadSanitizerPass::run(Module &M, insertModuleCtor(M); return PreservedAnalyses::none(); } - -char ThreadSanitizerLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(ThreadSanitizerLegacyPass, "tsan", - "ThreadSanitizer: detects data races.", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(ThreadSanitizerLegacyPass, "tsan", - "ThreadSanitizer: detects data races.", false, false) - -StringRef ThreadSanitizerLegacyPass::getPassName() const { - return "ThreadSanitizerLegacyPass"; -} - -void ThreadSanitizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); -} - -bool ThreadSanitizerLegacyPass::doInitialization(Module &M) { - insertModuleCtor(M); - TSan.emplace(); - return true; -} - -bool ThreadSanitizerLegacyPass::runOnFunction(Function &F) { - auto &TLI = getAnalysis().getTLI(F); - TSan->sanitizeFunction(F, TLI); - return true; -} - -FunctionPass *llvm::createThreadSanitizerLegacyPassPass() { - return new ThreadSanitizerLegacyPass(); -} - void ThreadSanitizer::initialize(Module &M) { const DataLayout &DL = M.getDataLayout(); IntptrTy = DL.getIntPtrType(M.getContext()); @@ -527,26 +480,22 @@ void ThreadSanitizer::chooseInstructionsToInstrument( Local.clear(); } -static bool isAtomic(Instruction *I) { +static bool isTsanAtomic(const Instruction *I) { // TODO: Ask TTI whether synchronization scope is between threads. - if (LoadInst *LI = dyn_cast(I)) - return LI->isAtomic() && LI->getSyncScopeID() != SyncScope::SingleThread; - if (StoreInst *SI = dyn_cast(I)) - return SI->isAtomic() && SI->getSyncScopeID() != SyncScope::SingleThread; - if (isa(I)) - return true; - if (isa(I)) - return true; - if (isa(I)) - return true; - return false; + auto SSID = getAtomicSyncScopeID(I); + if (!SSID) + return false; + if (isa(I) || isa(I)) + return SSID.getValue() != SyncScope::SingleThread; + return true; } void ThreadSanitizer::InsertRuntimeIgnores(Function &F) { - IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); + InstrumentationIRBuilder IRB(F.getEntryBlock().getFirstNonPHI()); IRB.CreateCall(TsanIgnoreBegin); EscapeEnumerator EE(F, "tsan_ignore_cleanup", ClHandleCxxExceptions); while (IRBuilder<> *AtExit = EE.Next()) { + InstrumentationIRBuilder::ensureDebugInfo(*AtExit, F); AtExit->CreateCall(TsanIgnoreEnd); } } @@ -581,7 +530,7 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, // Traverse all instructions, collect loads/stores/returns, check for calls. for (auto &BB : F) { for (auto &Inst : BB) { - if (isAtomic(&Inst)) + if (isTsanAtomic(&Inst)) AtomicAccesses.push_back(&Inst); else if (isa(Inst) || isa(Inst)) LocalLoadsAndStores.push_back(&Inst); @@ -629,7 +578,7 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, // Instrument function entry/exit points if there were instrumented accesses. if ((Res || HasCalls) && ClInstrumentFuncEntryExit) { - IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); + InstrumentationIRBuilder IRB(F.getEntryBlock().getFirstNonPHI()); Value *ReturnAddress = IRB.CreateCall( Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress), IRB.getInt32(0)); @@ -637,6 +586,7 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, EscapeEnumerator EE(F, "tsan_cleanup", ClHandleCxxExceptions); while (IRBuilder<> *AtExit = EE.Next()) { + InstrumentationIRBuilder::ensureDebugInfo(*AtExit, F); AtExit->CreateCall(TsanFuncExit, {}); } Res = true; @@ -646,7 +596,7 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II, const DataLayout &DL) { - IRBuilder<> IRB(II.Inst); + InstrumentationIRBuilder IRB(II.Inst); const bool IsWrite = isa(*II.Inst); Value *Addr = IsWrite ? cast(II.Inst)->getPointerOperand() : cast(II.Inst)->getPointerOperand(); @@ -686,8 +636,8 @@ bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II, return true; } - const unsigned Alignment = IsWrite ? cast(II.Inst)->getAlignment() - : cast(II.Inst)->getAlignment(); + const Align Alignment = IsWrite ? cast(II.Inst)->getAlign() + : cast(II.Inst)->getAlign(); const bool IsCompoundRW = ClCompoundReadBeforeWrite && (II.Flags & InstructionInfo::kCompoundRW); const bool IsVolatile = ClDistinguishVolatile && @@ -697,7 +647,7 @@ bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II, const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); FunctionCallee OnAccessFunc = nullptr; - if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0) { + if (Alignment >= Align(8) || (Alignment.value() % (TypeSize / 8)) == 0) { if (IsCompoundRW) OnAccessFunc = TsanCompoundRW[Idx]; else if (IsVolatile) @@ -775,7 +725,7 @@ bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) { // http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { - IRBuilder<> IRB(I); + InstrumentationIRBuilder IRB(I); if (LoadInst *LI = dyn_cast(I)) { Value *Addr = LI->getPointerOperand(); Type *OrigTy = LI->getType(); diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp index fb6216bb2177..32633bbc941b 100644 --- a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp +++ b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp @@ -10,12 +10,9 @@ // //===----------------------------------------------------------------------===// +#include "ValueProfileCollector.h" #include "ValueProfilePlugins.inc" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/InitializePasses.h" -#include +#include "llvm/ProfileData/InstrProf.h" using namespace llvm; diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h index 584a60ab451e..10e5e4d128b1 100644 --- a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h +++ b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h @@ -16,7 +16,6 @@ #ifndef LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H #define LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/ProfileData/InstrProf.h" #include #include @@ -25,6 +24,7 @@ namespace llvm { class Function; class Instruction; +class TargetLibraryInfo; class Value; /// Utility analysis that determines what values are worth profiling. diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc index 6a2c473a596a..3a129de1acd0 100644 --- a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc +++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc @@ -15,6 +15,7 @@ #include "ValueProfileCollector.h" #include "llvm/Analysis/IndirectCallVisitor.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/InstVisitor.h" using namespace llvm; diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp index 126845bb3308..70f150c9461a 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp @@ -16,7 +16,6 @@ #include "llvm-c/Initialization.h" #include "llvm/Analysis/ObjCARCUtil.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/llvm/lib/Transforms/ObjCARC/ObjCARC.h index 62f88a8cc02b..2bc0c8f87d77 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARC.h +++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.h @@ -22,7 +22,6 @@ #ifndef LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARC_H #define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARC_H -#include "ARCRuntimeEntryPoints.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/Analysis/ObjCARCUtil.h" diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp index 210ec60f2f87..03e5fb18d5ac 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp @@ -23,11 +23,14 @@ /// //===----------------------------------------------------------------------===// -#include "ObjCARC.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/ObjCARC.h" diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 2985ae004d3c..f64c26ef2bed 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -102,11 +102,8 @@ public: }; class ObjCARCContractLegacyPass : public FunctionPass { - ObjCARCContract OCARCC; - public: void getAnalysisUsage(AnalysisUsage &AU) const override; - bool doInitialization(Module &M) override; bool runOnFunction(Function &F) override; static char ID; @@ -737,11 +734,9 @@ Pass *llvm::createObjCARCContractPass() { return new ObjCARCContractLegacyPass(); } -bool ObjCARCContractLegacyPass::doInitialization(Module &M) { - return OCARCC.init(M); -} - bool ObjCARCContractLegacyPass::runOnFunction(Function &F) { + ObjCARCContract OCARCC; + OCARCC.init(*F.getParent()); auto *AA = &getAnalysis().getAAResults(); auto *DT = &getAnalysis().getDomTree(); return OCARCC.run(F, AA, DT); diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp index 6b074ac5adab..efcdc51ef5e3 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp @@ -22,7 +22,7 @@ /// //===----------------------------------------------------------------------===// -#include "ObjCARC.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" diff --git a/llvm/lib/Transforms/Scalar/ADCE.cpp b/llvm/lib/Transforms/Scalar/ADCE.cpp index 1cda206a7e14..cdf9de8d78d5 100644 --- a/llvm/lib/Transforms/Scalar/ADCE.cpp +++ b/llvm/lib/Transforms/Scalar/ADCE.cpp @@ -35,7 +35,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index e4ec5f266eb8..9571e99dfb19 100644 --- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -15,8 +15,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Instructions.h" -#include "llvm/InitializePasses.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -26,12 +24,11 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Constant.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" diff --git a/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp b/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp index a5e65ffc45fe..155f47b49357 100644 --- a/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp +++ b/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp @@ -16,11 +16,8 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/MemoryOpRemark.h" diff --git a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp index 95de59fa8262..cc12033fb677 100644 --- a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp +++ b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -57,6 +57,7 @@ #include "llvm/Transforms/Scalar/CallSiteSplitting.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IntrinsicInst.h" @@ -65,7 +66,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -123,8 +123,8 @@ static bool isCondRelevantToAnyCallArgument(ICmpInst *Cmp, CallBase &CB) { return false; } -typedef std::pair ConditionTy; -typedef SmallVector ConditionsTy; +using ConditionTy = std::pair; +using ConditionsTy = SmallVector; /// If From has a conditional jump to To, add the condition to Conditions, /// if it is relevant to any argument at CB. @@ -301,10 +301,9 @@ static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI, /// Note that in case any arguments at the call-site are constrained by its /// predecessors, new call-sites with more constrained arguments will be /// created in createCallSitesOnPredicatedArgument(). -static void splitCallSite( - CallBase &CB, - const SmallVectorImpl> &Preds, - DomTreeUpdater &DTU) { +static void splitCallSite(CallBase &CB, + ArrayRef> Preds, + DomTreeUpdater &DTU) { BasicBlock *TailBB = CB.getParent(); bool IsMustTailCall = CB.isMustTailCall(); diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index 25e8c3ef3b48..8a1761505d59 100644 --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -52,6 +52,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 13963657d183..6dfa2440023f 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -19,15 +19,16 @@ #include "llvm/Analysis/ConstraintSystem.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugCounter.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Transforms/Scalar.h" #include @@ -42,48 +43,129 @@ DEBUG_COUNTER(EliminatedCounter, "conds-eliminated", "Controls which conditions are eliminated"); static int64_t MaxConstraintValue = std::numeric_limits::max(); +static int64_t MinSignedConstraintValue = std::numeric_limits::min(); namespace { -struct ConstraintTy { - SmallVector Coefficients; - ConstraintTy(SmallVector Coefficients) - : Coefficients(Coefficients) {} +class ConstraintInfo; - unsigned size() const { return Coefficients.size(); } +struct StackEntry { + unsigned NumIn; + unsigned NumOut; + bool IsNot; + bool IsSigned = false; + /// Variables that can be removed from the system once the stack entry gets + /// removed. + SmallVector ValuesToRelease; + + StackEntry(unsigned NumIn, unsigned NumOut, bool IsNot, bool IsSigned, + SmallVector ValuesToRelease) + : NumIn(NumIn), NumOut(NumOut), IsNot(IsNot), IsSigned(IsSigned), + ValuesToRelease(ValuesToRelease) {} }; -/// Struct to manage a list of constraints. -struct ConstraintListTy { - SmallVector Constraints; +/// Struct to express a pre-condition of the form %Op0 Pred %Op1. +struct PreconditionTy { + CmpInst::Predicate Pred; + Value *Op0; + Value *Op1; - ConstraintListTy() {} + PreconditionTy(CmpInst::Predicate Pred, Value *Op0, Value *Op1) + : Pred(Pred), Op0(Op0), Op1(Op1) {} +}; - ConstraintListTy(const SmallVector &Constraints) - : Constraints(Constraints) {} +struct ConstraintTy { + SmallVector Coefficients; + SmallVector Preconditions; - void mergeIn(const ConstraintListTy &Other) { - append_range(Constraints, Other.Constraints); - } + bool IsSigned = false; + bool IsEq = false; + + ConstraintTy() = default; - unsigned size() const { return Constraints.size(); } + ConstraintTy(SmallVector Coefficients, bool IsSigned) + : Coefficients(Coefficients), IsSigned(IsSigned) {} + + unsigned size() const { return Coefficients.size(); } - unsigned empty() const { return Constraints.empty(); } + unsigned empty() const { return Coefficients.empty(); } /// Returns true if any constraint has a non-zero coefficient for any of the /// newly added indices. Zero coefficients for new indices are removed. If it /// returns true, no new variable need to be added to the system. bool needsNewIndices(const DenseMap &NewIndices) { - assert(size() == 1); for (unsigned I = 0; I < NewIndices.size(); ++I) { - int64_t Last = get(0).Coefficients.pop_back_val(); + int64_t Last = Coefficients.pop_back_val(); if (Last != 0) return true; } return false; } - ConstraintTy &get(unsigned I) { return Constraints[I]; } + /// Returns true if all preconditions for this list of constraints are + /// satisfied given \p CS and the corresponding \p Value2Index mapping. + bool isValid(const ConstraintInfo &Info) const; +}; + +/// Wrapper encapsulating separate constraint systems and corresponding value +/// mappings for both unsigned and signed information. Facts are added to and +/// conditions are checked against the corresponding system depending on the +/// signed-ness of their predicates. While the information is kept separate +/// based on signed-ness, certain conditions can be transferred between the two +/// systems. +class ConstraintInfo { + DenseMap UnsignedValue2Index; + DenseMap SignedValue2Index; + + ConstraintSystem UnsignedCS; + ConstraintSystem SignedCS; + +public: + DenseMap &getValue2Index(bool Signed) { + return Signed ? SignedValue2Index : UnsignedValue2Index; + } + const DenseMap &getValue2Index(bool Signed) const { + return Signed ? SignedValue2Index : UnsignedValue2Index; + } + + ConstraintSystem &getCS(bool Signed) { + return Signed ? SignedCS : UnsignedCS; + } + const ConstraintSystem &getCS(bool Signed) const { + return Signed ? SignedCS : UnsignedCS; + } + + void popLastConstraint(bool Signed) { getCS(Signed).popLastConstraint(); } + void popLastNVariables(bool Signed, unsigned N) { + getCS(Signed).popLastNVariables(N); + } + + bool doesHold(CmpInst::Predicate Pred, Value *A, Value *B) const; + + void addFact(CmpInst::Predicate Pred, Value *A, Value *B, bool IsNegated, + unsigned NumIn, unsigned NumOut, + SmallVectorImpl &DFSInStack); + + /// Turn a comparison of the form \p Op0 \p Pred \p Op1 into a vector of + /// constraints, using indices from the corresponding constraint system. + /// Additional indices for newly discovered values are added to \p NewIndices. + ConstraintTy getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, + DenseMap &NewIndices) const; + + /// Turn a condition \p CmpI into a vector of constraints, using indices from + /// the corresponding constraint system. Additional indices for newly + /// discovered values are added to \p NewIndices. + ConstraintTy getConstraint(CmpInst *Cmp, + DenseMap &NewIndices) const { + return getConstraint(Cmp->getPredicate(), Cmp->getOperand(0), + Cmp->getOperand(1), NewIndices); + } + + /// Try to add information from \p A \p Pred \p B to the unsigned/signed + /// system if \p Pred is signed/unsigned. + void transferToOtherSystem(CmpInst::Predicate Pred, Value *A, Value *B, + bool IsNegated, unsigned NumIn, unsigned NumOut, + SmallVectorImpl &DFSInStack); }; } // namespace @@ -92,11 +174,28 @@ struct ConstraintListTy { // sum of the pairs equals \p V. The first pair is the constant-factor and X // must be nullptr. If the expression cannot be decomposed, returns an empty // vector. -static SmallVector, 4> decompose(Value *V) { +static SmallVector, 4> +decompose(Value *V, SmallVector &Preconditions, + bool IsSigned) { + + auto CanUseSExt = [](ConstantInt *CI) { + const APInt &Val = CI->getValue(); + return Val.sgt(MinSignedConstraintValue) && Val.slt(MaxConstraintValue); + }; + // Decompose \p V used with a signed predicate. + if (IsSigned) { + if (auto *CI = dyn_cast(V)) { + if (CanUseSExt(CI)) + return {{CI->getSExtValue(), nullptr}}; + } + + return {{0, nullptr}, {1, V}}; + } + if (auto *CI = dyn_cast(V)) { - if (CI->isNegative() || CI->uge(MaxConstraintValue)) + if (CI->uge(MaxConstraintValue)) return {}; - return {{CI->getSExtValue(), nullptr}}; + return {{CI->getZExtValue(), nullptr}}; } auto *GEP = dyn_cast(V); if (GEP && GEP->getNumOperands() == 2 && GEP->isInBounds()) { @@ -106,11 +205,13 @@ static SmallVector, 4> decompose(Value *V) { // If the index is zero-extended, it is guaranteed to be positive. if (match(GEP->getOperand(GEP->getNumOperands() - 1), m_ZExt(m_Value(Op0)))) { - if (match(Op0, m_NUWShl(m_Value(Op1), m_ConstantInt(CI)))) + if (match(Op0, m_NUWShl(m_Value(Op1), m_ConstantInt(CI))) && + CanUseSExt(CI)) return {{0, nullptr}, {1, GEP->getPointerOperand()}, {std::pow(int64_t(2), CI->getSExtValue()), Op1}}; - if (match(Op0, m_NSWAdd(m_Value(Op1), m_ConstantInt(CI)))) + if (match(Op0, m_NSWAdd(m_Value(Op1), m_ConstantInt(CI))) && + CanUseSExt(CI)) return {{CI->getSExtValue(), nullptr}, {1, GEP->getPointerOperand()}, {1, Op1}}; @@ -118,17 +219,19 @@ static SmallVector, 4> decompose(Value *V) { } if (match(GEP->getOperand(GEP->getNumOperands() - 1), m_ConstantInt(CI)) && - !CI->isNegative()) + !CI->isNegative() && CanUseSExt(CI)) return {{CI->getSExtValue(), nullptr}, {1, GEP->getPointerOperand()}}; SmallVector, 4> Result; if (match(GEP->getOperand(GEP->getNumOperands() - 1), - m_NUWShl(m_Value(Op0), m_ConstantInt(CI)))) + m_NUWShl(m_Value(Op0), m_ConstantInt(CI))) && + CanUseSExt(CI)) Result = {{0, nullptr}, {1, GEP->getPointerOperand()}, {std::pow(int64_t(2), CI->getSExtValue()), Op0}}; else if (match(GEP->getOperand(GEP->getNumOperands() - 1), - m_NSWAdd(m_Value(Op0), m_ConstantInt(CI)))) + m_NSWAdd(m_Value(Op0), m_ConstantInt(CI))) && + CanUseSExt(CI)) Result = {{CI->getSExtValue(), nullptr}, {1, GEP->getPointerOperand()}, {1, Op0}}; @@ -136,6 +239,10 @@ static SmallVector, 4> decompose(Value *V) { Op0 = GEP->getOperand(GEP->getNumOperands() - 1); Result = {{0, nullptr}, {1, GEP->getPointerOperand()}, {1, Op0}}; } + // If Op0 is signed non-negative, the GEP is increasing monotonically and + // can be de-composed. + Preconditions.emplace_back(CmpInst::ICMP_SGE, Op0, + ConstantInt::get(Op0->getType(), 0)); return Result; } @@ -145,12 +252,20 @@ static SmallVector, 4> decompose(Value *V) { Value *Op1; ConstantInt *CI; - if (match(V, m_NUWAdd(m_Value(Op0), m_ConstantInt(CI)))) + if (match(V, m_NUWAdd(m_Value(Op0), m_ConstantInt(CI))) && + !CI->uge(MaxConstraintValue)) + return {{CI->getZExtValue(), nullptr}, {1, Op0}}; + if (match(V, m_Add(m_Value(Op0), m_ConstantInt(CI))) && CI->isNegative() && + CanUseSExt(CI)) { + Preconditions.emplace_back( + CmpInst::ICMP_UGE, Op0, + ConstantInt::get(Op0->getType(), CI->getSExtValue() * -1)); return {{CI->getSExtValue(), nullptr}, {1, Op0}}; + } if (match(V, m_NUWAdd(m_Value(Op0), m_Value(Op1)))) return {{0, nullptr}, {1, Op0}, {1, Op1}}; - if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI)))) + if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))) && CanUseSExt(CI)) return {{-1 * CI->getSExtValue(), nullptr}, {1, Op0}}; if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) return {{0, nullptr}, {1, Op0}, {-1, Op1}}; @@ -158,73 +273,73 @@ static SmallVector, 4> decompose(Value *V) { return {{0, nullptr}, {1, V}}; } -/// Turn a condition \p CmpI into a vector of constraints, using indices from \p -/// Value2Index. Additional indices for newly discovered values are added to \p -/// NewIndices. -static ConstraintListTy -getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, - const DenseMap &Value2Index, - DenseMap &NewIndices) { - int64_t Offset1 = 0; - int64_t Offset2 = 0; - - // First try to look up \p V in Value2Index and NewIndices. Otherwise add a - // new entry to NewIndices. - auto GetOrAddIndex = [&Value2Index, &NewIndices](Value *V) -> unsigned { - auto V2I = Value2Index.find(V); - if (V2I != Value2Index.end()) - return V2I->second; - auto NewI = NewIndices.find(V); - if (NewI != NewIndices.end()) - return NewI->second; - auto Insert = - NewIndices.insert({V, Value2Index.size() + NewIndices.size() + 1}); - return Insert.first->second; - }; - - if (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE) - return getConstraint(CmpInst::getSwappedPredicate(Pred), Op1, Op0, - Value2Index, NewIndices); - - if (Pred == CmpInst::ICMP_EQ) { - if (match(Op1, m_Zero())) - return getConstraint(CmpInst::ICMP_ULE, Op0, Op1, Value2Index, - NewIndices); - - auto A = - getConstraint(CmpInst::ICMP_UGE, Op0, Op1, Value2Index, NewIndices); - auto B = - getConstraint(CmpInst::ICMP_ULE, Op0, Op1, Value2Index, NewIndices); - A.mergeIn(B); - return A; +ConstraintTy +ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, + DenseMap &NewIndices) const { + bool IsEq = false; + // Try to convert Pred to one of ULE/SLT/SLE/SLT. + switch (Pred) { + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SGT: + case CmpInst::ICMP_SGE: { + Pred = CmpInst::getSwappedPredicate(Pred); + std::swap(Op0, Op1); + break; } - - if (Pred == CmpInst::ICMP_NE && match(Op1, m_Zero())) { - return getConstraint(CmpInst::ICMP_UGT, Op0, Op1, Value2Index, NewIndices); + case CmpInst::ICMP_EQ: + if (match(Op1, m_Zero())) { + Pred = CmpInst::ICMP_ULE; + } else { + IsEq = true; + Pred = CmpInst::ICMP_ULE; + } + break; + case CmpInst::ICMP_NE: + if (!match(Op1, m_Zero())) + return {}; + Pred = CmpInst::getSwappedPredicate(CmpInst::ICMP_UGT); + std::swap(Op0, Op1); + break; + default: + break; } // Only ULE and ULT predicates are supported at the moment. - if (Pred != CmpInst::ICMP_ULE && Pred != CmpInst::ICMP_ULT) + if (Pred != CmpInst::ICMP_ULE && Pred != CmpInst::ICMP_ULT && + Pred != CmpInst::ICMP_SLE && Pred != CmpInst::ICMP_SLT) return {}; - auto ADec = decompose(Op0->stripPointerCastsSameRepresentation()); - auto BDec = decompose(Op1->stripPointerCastsSameRepresentation()); + SmallVector Preconditions; + bool IsSigned = CmpInst::isSigned(Pred); + auto &Value2Index = getValue2Index(IsSigned); + auto ADec = decompose(Op0->stripPointerCastsSameRepresentation(), + Preconditions, IsSigned); + auto BDec = decompose(Op1->stripPointerCastsSameRepresentation(), + Preconditions, IsSigned); // Skip if decomposing either of the values failed. if (ADec.empty() || BDec.empty()) return {}; - // Skip trivial constraints without any variables. - if (ADec.size() == 1 && BDec.size() == 1) - return {}; - - Offset1 = ADec[0].first; - Offset2 = BDec[0].first; + int64_t Offset1 = ADec[0].first; + int64_t Offset2 = BDec[0].first; Offset1 *= -1; // Create iterator ranges that skip the constant-factor. auto VariablesA = llvm::drop_begin(ADec); auto VariablesB = llvm::drop_begin(BDec); + // First try to look up \p V in Value2Index and NewIndices. Otherwise add a + // new entry to NewIndices. + auto GetOrAddIndex = [&Value2Index, &NewIndices](Value *V) -> unsigned { + auto V2I = Value2Index.find(V); + if (V2I != Value2Index.end()) + return V2I->second; + auto Insert = + NewIndices.insert({V, Value2Index.size() + NewIndices.size() + 1}); + return Insert.first->second; + }; + // Make sure all variables have entries in Value2Index or NewIndices. for (const auto &KV : concat>(VariablesA, VariablesB)) @@ -232,22 +347,85 @@ getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, // Build result constraint, by first adding all coefficients from A and then // subtracting all coefficients from B. - SmallVector R(Value2Index.size() + NewIndices.size() + 1, 0); + ConstraintTy Res( + SmallVector(Value2Index.size() + NewIndices.size() + 1, 0), + IsSigned); + Res.IsEq = IsEq; + auto &R = Res.Coefficients; for (const auto &KV : VariablesA) R[GetOrAddIndex(KV.second)] += KV.first; for (const auto &KV : VariablesB) R[GetOrAddIndex(KV.second)] -= KV.first; - R[0] = Offset1 + Offset2 + (Pred == CmpInst::ICMP_ULT ? -1 : 0); - return {{R}}; + int64_t OffsetSum; + if (AddOverflow(Offset1, Offset2, OffsetSum)) + return {}; + if (Pred == (IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT)) + if (AddOverflow(OffsetSum, int64_t(-1), OffsetSum)) + return {}; + R[0] = OffsetSum; + Res.Preconditions = std::move(Preconditions); + return Res; +} + +bool ConstraintTy::isValid(const ConstraintInfo &Info) const { + return Coefficients.size() > 0 && + all_of(Preconditions, [&Info](const PreconditionTy &C) { + return Info.doesHold(C.Pred, C.Op0, C.Op1); + }); +} + +bool ConstraintInfo::doesHold(CmpInst::Predicate Pred, Value *A, + Value *B) const { + DenseMap NewIndices; + auto R = getConstraint(Pred, A, B, NewIndices); + + if (!NewIndices.empty()) + return false; + + // TODO: properly check NewIndices. + return NewIndices.empty() && R.Preconditions.empty() && !R.IsEq && + !R.empty() && + getCS(CmpInst::isSigned(Pred)).isConditionImplied(R.Coefficients); } -static ConstraintListTy -getConstraint(CmpInst *Cmp, const DenseMap &Value2Index, - DenseMap &NewIndices) { - return getConstraint(Cmp->getPredicate(), Cmp->getOperand(0), - Cmp->getOperand(1), Value2Index, NewIndices); +void ConstraintInfo::transferToOtherSystem( + CmpInst::Predicate Pred, Value *A, Value *B, bool IsNegated, unsigned NumIn, + unsigned NumOut, SmallVectorImpl &DFSInStack) { + // Check if we can combine facts from the signed and unsigned systems to + // derive additional facts. + if (!A->getType()->isIntegerTy()) + return; + // FIXME: This currently depends on the order we add facts. Ideally we + // would first add all known facts and only then try to add additional + // facts. + switch (Pred) { + default: + break; + case CmpInst::ICMP_ULT: + // If B is a signed positive constant, A >=s 0 and A getType(), 0))) { + addFact(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0), + IsNegated, NumIn, NumOut, DFSInStack); + addFact(CmpInst::ICMP_SLT, A, B, IsNegated, NumIn, NumOut, DFSInStack); + } + break; + case CmpInst::ICMP_SLT: + if (doesHold(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0))) + addFact(CmpInst::ICMP_ULT, A, B, IsNegated, NumIn, NumOut, DFSInStack); + break; + case CmpInst::ICMP_SGT: + if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), -1))) + addFact(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0), + IsNegated, NumIn, NumOut, DFSInStack); + break; + case CmpInst::ICMP_SGE: + if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0))) { + addFact(CmpInst::ICMP_UGE, A, B, IsNegated, NumIn, NumOut, DFSInStack); + } + break; + } } namespace { @@ -271,134 +449,253 @@ struct ConstraintOrBlock { Not(Not), Condition(Condition) {} }; -struct StackEntry { - unsigned NumIn; - unsigned NumOut; - CmpInst *Condition; - bool IsNot; +/// Keep state required to build worklist. +struct State { + DominatorTree &DT; + SmallVector WorkList; - StackEntry(unsigned NumIn, unsigned NumOut, CmpInst *Condition, bool IsNot) - : NumIn(NumIn), NumOut(NumOut), Condition(Condition), IsNot(IsNot) {} + State(DominatorTree &DT) : DT(DT) {} + + /// Process block \p BB and add known facts to work-list. + void addInfoFor(BasicBlock &BB); + + /// Returns true if we can add a known condition from BB to its successor + /// block Succ. Each predecessor of Succ can either be BB or be dominated + /// by Succ (e.g. the case when adding a condition from a pre-header to a + /// loop header). + bool canAddSuccessor(BasicBlock &BB, BasicBlock *Succ) const { + if (BB.getSingleSuccessor()) { + assert(BB.getSingleSuccessor() == Succ); + return DT.properlyDominates(&BB, Succ); + } + return any_of(successors(&BB), + [Succ](const BasicBlock *S) { return S != Succ; }) && + all_of(predecessors(Succ), [&BB, Succ, this](BasicBlock *Pred) { + return Pred == &BB || DT.dominates(Succ, Pred); + }); + } }; + } // namespace #ifndef NDEBUG -static void dumpWithNames(ConstraintTy &C, +static void dumpWithNames(const ConstraintSystem &CS, DenseMap &Value2Index) { SmallVector Names(Value2Index.size(), ""); for (auto &KV : Value2Index) { Names[KV.second - 1] = std::string("%") + KV.first->getName().str(); } - ConstraintSystem CS; - CS.addVariableRowFill(C.Coefficients); CS.dump(Names); } -#endif -static bool eliminateConstraints(Function &F, DominatorTree &DT) { - bool Changed = false; - DT.updateDFSNumbers(); +static void dumpWithNames(ArrayRef C, + DenseMap &Value2Index) { ConstraintSystem CS; + CS.addVariableRowFill(C); + dumpWithNames(CS, Value2Index); +} +#endif - SmallVector WorkList; - - // First, collect conditions implied by branches and blocks with their - // Dominator DFS in and out numbers. - for (BasicBlock &BB : F) { - if (!DT.getNode(&BB)) - continue; - WorkList.emplace_back(DT.getNode(&BB)); - - // True as long as long as the current instruction is guaranteed to execute. - bool GuaranteedToExecute = true; - // Scan BB for assume calls. - // TODO: also use this scan to queue conditions to simplify, so we can - // interleave facts from assumes and conditions to simplify in a single - // basic block. And to skip another traversal of each basic block when - // simplifying. - for (Instruction &I : BB) { - Value *Cond; - // For now, just handle assumes with a single compare as condition. - if (match(&I, m_Intrinsic(m_Value(Cond))) && - isa(Cond)) { - if (GuaranteedToExecute) { - // The assume is guaranteed to execute when BB is entered, hence Cond - // holds on entry to BB. - WorkList.emplace_back(DT.getNode(&BB), cast(Cond), false); - } else { - // Otherwise the condition only holds in the successors. - for (BasicBlock *Succ : successors(&BB)) - WorkList.emplace_back(DT.getNode(Succ), cast(Cond), false); +void State::addInfoFor(BasicBlock &BB) { + WorkList.emplace_back(DT.getNode(&BB)); + + // True as long as long as the current instruction is guaranteed to execute. + bool GuaranteedToExecute = true; + // Scan BB for assume calls. + // TODO: also use this scan to queue conditions to simplify, so we can + // interleave facts from assumes and conditions to simplify in a single + // basic block. And to skip another traversal of each basic block when + // simplifying. + for (Instruction &I : BB) { + Value *Cond; + // For now, just handle assumes with a single compare as condition. + if (match(&I, m_Intrinsic(m_Value(Cond))) && + isa(Cond)) { + if (GuaranteedToExecute) { + // The assume is guaranteed to execute when BB is entered, hence Cond + // holds on entry to BB. + WorkList.emplace_back(DT.getNode(&BB), cast(Cond), false); + } else { + // Otherwise the condition only holds in the successors. + for (BasicBlock *Succ : successors(&BB)) { + if (!canAddSuccessor(BB, Succ)) + continue; + WorkList.emplace_back(DT.getNode(Succ), cast(Cond), false); } } - GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I); } + GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I); + } - auto *Br = dyn_cast(BB.getTerminator()); - if (!Br || !Br->isConditional()) - continue; + auto *Br = dyn_cast(BB.getTerminator()); + if (!Br || !Br->isConditional()) + return; + + // If the condition is an OR of 2 compares and the false successor only has + // the current block as predecessor, queue both negated conditions for the + // false successor. + Value *Op0, *Op1; + if (match(Br->getCondition(), m_LogicalOr(m_Value(Op0), m_Value(Op1))) && + isa(Op0) && isa(Op1)) { + BasicBlock *FalseSuccessor = Br->getSuccessor(1); + if (canAddSuccessor(BB, FalseSuccessor)) { + WorkList.emplace_back(DT.getNode(FalseSuccessor), cast(Op0), + true); + WorkList.emplace_back(DT.getNode(FalseSuccessor), cast(Op1), + true); + } + return; + } - // Returns true if we can add a known condition from BB to its successor - // block Succ. Each predecessor of Succ can either be BB or be dominated by - // Succ (e.g. the case when adding a condition from a pre-header to a loop - // header). - auto CanAdd = [&BB, &DT](BasicBlock *Succ) { - return all_of(predecessors(Succ), [&BB, &DT, Succ](BasicBlock *Pred) { - return Pred == &BB || DT.dominates(Succ, Pred); - }); - }; - // If the condition is an OR of 2 compares and the false successor only has - // the current block as predecessor, queue both negated conditions for the - // false successor. - Value *Op0, *Op1; - if (match(Br->getCondition(), m_LogicalOr(m_Value(Op0), m_Value(Op1))) && - match(Op0, m_Cmp()) && match(Op1, m_Cmp())) { - BasicBlock *FalseSuccessor = Br->getSuccessor(1); - if (CanAdd(FalseSuccessor)) { - WorkList.emplace_back(DT.getNode(FalseSuccessor), cast(Op0), - true); - WorkList.emplace_back(DT.getNode(FalseSuccessor), cast(Op1), - true); - } - continue; + // If the condition is an AND of 2 compares and the true successor only has + // the current block as predecessor, queue both conditions for the true + // successor. + if (match(Br->getCondition(), m_LogicalAnd(m_Value(Op0), m_Value(Op1))) && + isa(Op0) && isa(Op1)) { + BasicBlock *TrueSuccessor = Br->getSuccessor(0); + if (canAddSuccessor(BB, TrueSuccessor)) { + WorkList.emplace_back(DT.getNode(TrueSuccessor), cast(Op0), + false); + WorkList.emplace_back(DT.getNode(TrueSuccessor), cast(Op1), + false); } + return; + } - // If the condition is an AND of 2 compares and the true successor only has - // the current block as predecessor, queue both conditions for the true - // successor. - if (match(Br->getCondition(), m_LogicalAnd(m_Value(Op0), m_Value(Op1))) && - match(Op0, m_Cmp()) && match(Op1, m_Cmp())) { - BasicBlock *TrueSuccessor = Br->getSuccessor(0); - if (CanAdd(TrueSuccessor)) { - WorkList.emplace_back(DT.getNode(TrueSuccessor), cast(Op0), - false); - WorkList.emplace_back(DT.getNode(TrueSuccessor), cast(Op1), - false); + auto *CmpI = dyn_cast(Br->getCondition()); + if (!CmpI) + return; + if (canAddSuccessor(BB, Br->getSuccessor(0))) + WorkList.emplace_back(DT.getNode(Br->getSuccessor(0)), CmpI, false); + if (canAddSuccessor(BB, Br->getSuccessor(1))) + WorkList.emplace_back(DT.getNode(Br->getSuccessor(1)), CmpI, true); +} + +void ConstraintInfo::addFact(CmpInst::Predicate Pred, Value *A, Value *B, + bool IsNegated, unsigned NumIn, unsigned NumOut, + SmallVectorImpl &DFSInStack) { + // If the constraint has a pre-condition, skip the constraint if it does not + // hold. + DenseMap NewIndices; + auto R = getConstraint(Pred, A, B, NewIndices); + if (!R.isValid(*this)) + return; + + //LLVM_DEBUG(dbgs() << "Adding " << *Condition << " " << IsNegated << "\n"); + bool Added = false; + assert(CmpInst::isSigned(Pred) == R.IsSigned && + "condition and constraint signs must match"); + auto &CSToUse = getCS(R.IsSigned); + if (R.Coefficients.empty()) + return; + + Added |= CSToUse.addVariableRowFill(R.Coefficients); + + // If R has been added to the system, queue it for removal once it goes + // out-of-scope. + if (Added) { + SmallVector ValuesToRelease; + for (auto &KV : NewIndices) { + getValue2Index(R.IsSigned).insert(KV); + ValuesToRelease.push_back(KV.first); + } + + LLVM_DEBUG({ + dbgs() << " constraint: "; + dumpWithNames(R.Coefficients, getValue2Index(R.IsSigned)); + }); + + DFSInStack.emplace_back(NumIn, NumOut, IsNegated, R.IsSigned, + ValuesToRelease); + + if (R.IsEq) { + // Also add the inverted constraint for equality constraints. + for (auto &Coeff : R.Coefficients) + Coeff *= -1; + CSToUse.addVariableRowFill(R.Coefficients); + + DFSInStack.emplace_back(NumIn, NumOut, IsNegated, R.IsSigned, + SmallVector()); + } + } +} + +static void +tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info, + SmallVectorImpl &ToRemove) { + auto DoesConditionHold = [](CmpInst::Predicate Pred, Value *A, Value *B, + ConstraintInfo &Info) { + DenseMap NewIndices; + auto R = Info.getConstraint(Pred, A, B, NewIndices); + if (R.size() < 2 || R.needsNewIndices(NewIndices) || !R.isValid(Info)) + return false; + + auto &CSToUse = Info.getCS(CmpInst::isSigned(Pred)); + return CSToUse.isConditionImplied(R.Coefficients); + }; + + if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) { + // If A s>= B && B s>= 0, ssub.with.overflow(a, b) should not overflow and + // can be simplified to a regular sub. + Value *A = II->getArgOperand(0); + Value *B = II->getArgOperand(1); + if (!DoesConditionHold(CmpInst::ICMP_SGE, A, B, Info) || + !DoesConditionHold(CmpInst::ICMP_SGE, B, + ConstantInt::get(A->getType(), 0), Info)) + return; + + IRBuilder<> Builder(II->getParent(), II->getIterator()); + Value *Sub = nullptr; + for (User *U : make_early_inc_range(II->users())) { + if (match(U, m_ExtractValue<0>(m_Value()))) { + if (!Sub) + Sub = Builder.CreateSub(A, B); + U->replaceAllUsesWith(Sub); + } else if (match(U, m_ExtractValue<1>(m_Value()))) + U->replaceAllUsesWith(Builder.getFalse()); + else + continue; + + if (U->use_empty()) { + auto *I = cast(U); + ToRemove.push_back(I); + I->setOperand(0, PoisonValue::get(II->getType())); } - continue; } - auto *CmpI = dyn_cast(Br->getCondition()); - if (!CmpI) + if (II->use_empty()) + II->eraseFromParent(); + } +} + +static bool eliminateConstraints(Function &F, DominatorTree &DT) { + bool Changed = false; + DT.updateDFSNumbers(); + + ConstraintInfo Info; + State S(DT); + + // First, collect conditions implied by branches and blocks with their + // Dominator DFS in and out numbers. + for (BasicBlock &BB : F) { + if (!DT.getNode(&BB)) continue; - if (CanAdd(Br->getSuccessor(0))) - WorkList.emplace_back(DT.getNode(Br->getSuccessor(0)), CmpI, false); - if (CanAdd(Br->getSuccessor(1))) - WorkList.emplace_back(DT.getNode(Br->getSuccessor(1)), CmpI, true); + S.addInfoFor(BB); } // Next, sort worklist by dominance, so that dominating blocks and conditions // come before blocks and conditions dominated by them. If a block and a // condition have the same numbers, the condition comes before the block, as // it holds on entry to the block. - sort(WorkList, [](const ConstraintOrBlock &A, const ConstraintOrBlock &B) { + stable_sort(S.WorkList, [](const ConstraintOrBlock &A, const ConstraintOrBlock &B) { return std::tie(A.NumIn, A.IsBlock) < std::tie(B.NumIn, B.IsBlock); }); + SmallVector ToRemove; + // Finally, process ordered worklist and eliminate implied conditions. SmallVector DFSInStack; - DenseMap Value2Index; - for (ConstraintOrBlock &CB : WorkList) { + for (ConstraintOrBlock &CB : S.WorkList) { // First, pop entries from the stack that are out-of-scope for CB. Remove // the corresponding entry from the constraint system. while (!DFSInStack.empty()) { @@ -409,10 +706,20 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) { assert(E.NumIn <= CB.NumIn); if (CB.NumOut <= E.NumOut) break; - LLVM_DEBUG(dbgs() << "Removing " << *E.Condition << " " << E.IsNot - << "\n"); + LLVM_DEBUG({ + dbgs() << "Removing "; + dumpWithNames(Info.getCS(E.IsSigned).getLastConstraint(), + Info.getValue2Index(E.IsSigned)); + dbgs() << "\n"; + }); + + Info.popLastConstraint(E.IsSigned); + // Remove variables in the system that went out of scope. + auto &Mapping = Info.getValue2Index(E.IsSigned); + for (Value *V : E.ValuesToRelease) + Mapping.erase(V); + Info.popLastNVariables(E.IsSigned, E.ValuesToRelease.size()); DFSInStack.pop_back(); - CS.popLastConstraint(); } LLVM_DEBUG({ @@ -427,28 +734,30 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) { // For a block, check if any CmpInsts become known based on the current set // of constraints. if (CB.IsBlock) { - for (Instruction &I : *CB.BB) { - auto *Cmp = dyn_cast(&I); + for (Instruction &I : make_early_inc_range(*CB.BB)) { + if (auto *II = dyn_cast(&I)) { + tryToSimplifyOverflowMath(II, Info, ToRemove); + continue; + } + auto *Cmp = dyn_cast(&I); if (!Cmp) continue; DenseMap NewIndices; - auto R = getConstraint(Cmp, Value2Index, NewIndices); - if (R.size() != 1) - continue; - - if (R.needsNewIndices(NewIndices)) + auto R = Info.getConstraint(Cmp, NewIndices); + if (R.IsEq || R.empty() || R.needsNewIndices(NewIndices) || + !R.isValid(Info)) continue; - if (CS.isConditionImplied(R.get(0).Coefficients)) { + auto &CSToUse = Info.getCS(R.IsSigned); + if (CSToUse.isConditionImplied(R.Coefficients)) { if (!DebugCounter::shouldExecute(EliminatedCounter)) continue; - LLVM_DEBUG(dbgs() << "Condition " << *Cmp - << " implied by dominating constraints\n"); LLVM_DEBUG({ - for (auto &E : reverse(DFSInStack)) - dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n"; + dbgs() << "Condition " << *Cmp + << " implied by dominating constraints\n"; + dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned)); }); Cmp->replaceUsesWithIf( ConstantInt::getTrue(F.getParent()->getContext()), [](Use &U) { @@ -460,16 +769,15 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) { NumCondsRemoved++; Changed = true; } - if (CS.isConditionImplied( - ConstraintSystem::negate(R.get(0).Coefficients))) { + if (CSToUse.isConditionImplied( + ConstraintSystem::negate(R.Coefficients))) { if (!DebugCounter::shouldExecute(EliminatedCounter)) continue; - LLVM_DEBUG(dbgs() << "Condition !" << *Cmp - << " implied by dominating constraints\n"); LLVM_DEBUG({ - for (auto &E : reverse(DFSInStack)) - dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n"; + dbgs() << "Condition !" << *Cmp + << " implied by dominating constraints\n"; + dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned)); }); Cmp->replaceAllUsesWith( ConstantInt::getFalse(F.getParent()->getContext())); @@ -482,7 +790,7 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) { // Set up a function to restore the predicate at the end of the scope if it // has been negated. Negate the predicate in-place, if required. - auto *CI = dyn_cast(CB.Condition); + auto *CI = dyn_cast(CB.Condition); auto PredicateRestorer = make_scope_exit([CI, &CB]() { if (CB.Not && CI) CI->setPredicate(CI->getInversePredicate()); @@ -496,34 +804,28 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) { } } - // Otherwise, add the condition to the system and stack, if we can transform - // it into a constraint. - DenseMap NewIndices; - auto R = getConstraint(CB.Condition, Value2Index, NewIndices); - if (R.empty()) - continue; - - for (auto &KV : NewIndices) - Value2Index.insert(KV); - - LLVM_DEBUG(dbgs() << "Adding " << *CB.Condition << " " << CB.Not << "\n"); - bool Added = false; - for (auto &C : R.Constraints) { - auto Coeffs = C.Coefficients; - LLVM_DEBUG({ - dbgs() << " constraint: "; - dumpWithNames(C, Value2Index); - }); - Added |= CS.addVariableRowFill(Coeffs); - // If R has been added to the system, queue it for removal once it goes - // out-of-scope. - if (Added) - DFSInStack.emplace_back(CB.NumIn, CB.NumOut, CB.Condition, CB.Not); + ICmpInst::Predicate Pred; + Value *A, *B; + if (match(CB.Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) { + // Otherwise, add the condition to the system and stack, if we can + // transform it into a constraint. + Info.addFact(Pred, A, B, CB.Not, CB.NumIn, CB.NumOut, DFSInStack); + Info.transferToOtherSystem(Pred, A, B, CB.Not, CB.NumIn, CB.NumOut, + DFSInStack); } } - assert(CS.size() == DFSInStack.size() && +#ifndef NDEBUG + unsigned SignedEntries = + count_if(DFSInStack, [](const StackEntry &E) { return E.IsSigned; }); + assert(Info.getCS(false).size() == DFSInStack.size() - SignedEntries && + "updates to CS and DFSInStack are out of sync"); + assert(Info.getCS(true).size() == SignedEntries && "updates to CS and DFSInStack are out of sync"); +#endif + + for (Instruction *I : ToRemove) + I->eraseFromParent(); return Changed; } diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index a3fd97079b1d..64bd4241f37c 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -41,8 +41,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -215,6 +213,53 @@ static bool simplifyCommonValuePhi(PHINode *P, LazyValueInfo *LVI, return true; } +static Value *getValueOnEdge(LazyValueInfo *LVI, Value *Incoming, + BasicBlock *From, BasicBlock *To, + Instruction *CxtI) { + if (Constant *C = LVI->getConstantOnEdge(Incoming, From, To, CxtI)) + return C; + + // Look if the incoming value is a select with a scalar condition for which + // LVI can tells us the value. In that case replace the incoming value with + // the appropriate value of the select. This often allows us to remove the + // select later. + auto *SI = dyn_cast(Incoming); + if (!SI) + return nullptr; + + // Once LVI learns to handle vector types, we could also add support + // for vector type constants that are not all zeroes or all ones. + Value *Condition = SI->getCondition(); + if (!Condition->getType()->isVectorTy()) { + if (Constant *C = LVI->getConstantOnEdge(Condition, From, To, CxtI)) { + if (C->isOneValue()) + return SI->getTrueValue(); + if (C->isZeroValue()) + return SI->getFalseValue(); + } + } + + // Look if the select has a constant but LVI tells us that the incoming + // value can never be that constant. In that case replace the incoming + // value with the other value of the select. This often allows us to + // remove the select later. + + // The "false" case + if (auto *C = dyn_cast(SI->getFalseValue())) + if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI) == + LazyValueInfo::False) + return SI->getTrueValue(); + + // The "true" case, + // similar to the select "false" case, but try the select "true" value + if (auto *C = dyn_cast(SI->getTrueValue())) + if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI) == + LazyValueInfo::False) + return SI->getFalseValue(); + + return nullptr; +} + static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT, const SimplifyQuery &SQ) { bool Changed = false; @@ -224,53 +269,14 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT, Value *Incoming = P->getIncomingValue(i); if (isa(Incoming)) continue; - Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB, P); - - // Look if the incoming value is a select with a scalar condition for which - // LVI can tells us the value. In that case replace the incoming value with - // the appropriate value of the select. This often allows us to remove the - // select later. - if (!V) { - SelectInst *SI = dyn_cast(Incoming); - if (!SI) continue; - - Value *Condition = SI->getCondition(); - if (!Condition->getType()->isVectorTy()) { - if (Constant *C = LVI->getConstantOnEdge( - Condition, P->getIncomingBlock(i), BB, P)) { - if (C->isOneValue()) { - V = SI->getTrueValue(); - } else if (C->isZeroValue()) { - V = SI->getFalseValue(); - } - // Once LVI learns to handle vector types, we could also add support - // for vector type constants that are not all zeroes or all ones. - } - } - - // Look if the select has a constant but LVI tells us that the incoming - // value can never be that constant. In that case replace the incoming - // value with the other value of the select. This often allows us to - // remove the select later. - if (!V) { - Constant *C = dyn_cast(SI->getFalseValue()); - if (!C) continue; - - if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, - P->getIncomingBlock(i), BB, P) != - LazyValueInfo::False) - continue; - V = SI->getTrueValue(); - } - - LLVM_DEBUG(dbgs() << "CVP: Threading PHI over " << *SI << '\n'); + Value *V = getValueOnEdge(LVI, Incoming, P->getIncomingBlock(i), BB, P); + if (V) { + P->setIncomingValue(i, V); + Changed = true; } - - P->setIncomingValue(i, V); - Changed = true; } - if (Value *V = SimplifyInstruction(P, SQ)) { + if (Value *V = simplifyInstruction(P, SQ)) { P->replaceAllUsesWith(V); P->eraseFromParent(); Changed = true; @@ -575,7 +581,7 @@ static bool processOverflowIntrinsic(WithOverflowInst *WO, LazyValueInfo *LVI) { StructType *ST = cast(WO->getType()); Constant *Struct = ConstantStruct::get(ST, - { UndefValue::get(ST->getElementType(0)), + { PoisonValue::get(ST->getElementType(0)), ConstantInt::getFalse(ST->getElementType(1)) }); Value *NewI = B.CreateInsertValue(Struct, NewOp, 0); WO->replaceAllUsesWith(NewI); @@ -735,8 +741,7 @@ static bool narrowSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) { // sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can // prove that such a combination is impossible, we need to bump the bitwidth. if (CRs[1]->contains(APInt::getAllOnes(OrigWidth)) && - CRs[0]->contains( - APInt::getSignedMinValue(MinSignedBits).sextOrSelf(OrigWidth))) + CRs[0]->contains(APInt::getSignedMinValue(MinSignedBits).sext(OrigWidth))) ++MinSignedBits; // Don't shrink below 8 bits wide. @@ -955,7 +960,8 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { ++NumAShrsConverted; auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1), - SDI->getName(), SDI); + "", SDI); + BO->takeName(SDI); BO->setDebugLoc(SDI->getDebugLoc()); BO->setIsExact(SDI->isExact()); SDI->replaceAllUsesWith(BO); @@ -974,8 +980,8 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) { return false; ++NumSExt; - auto *ZExt = - CastInst::CreateZExtOrBitCast(Base, SDI->getType(), SDI->getName(), SDI); + auto *ZExt = CastInst::CreateZExtOrBitCast(Base, SDI->getType(), "", SDI); + ZExt->takeName(SDI); ZExt->setDebugLoc(SDI->getDebugLoc()); SDI->replaceAllUsesWith(ZExt); SDI->eraseFromParent(); diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 143a78f604fc..5667eefabad5 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -60,30 +60,31 @@ #include "llvm/Transforms/Scalar/DFAJumpThreading.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include +#ifdef EXPENSIVE_CHECKS +#include "llvm/IR/Verifier.h" +#endif + using namespace llvm; #define DEBUG_TYPE "dfa-jump-threading" @@ -102,6 +103,11 @@ static cl::opt MaxPathLength( cl::desc("Max number of blocks searched to find a threading path"), cl::Hidden, cl::init(20)); +static cl::opt MaxNumPaths( + "dfa-max-num-paths", + cl::desc("Max number of paths enumerated around a switch"), + cl::Hidden, cl::init(200)); + static cl::opt CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), @@ -414,7 +420,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { struct MainSwitch { MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) { - if (isPredictable(SI)) { + if (isCandidate(SI)) { Instr = SI; } else { ORE->emit([&]() { @@ -432,83 +438,60 @@ struct MainSwitch { } private: - /// Do a use-def chain traversal. Make sure the value of the switch variable - /// is always a known constant. This means that all conditional jumps based on - /// switch variable can be converted to unconditional jumps. - bool isPredictable(const SwitchInst *SI) { - std::deque Q; + /// Do a use-def chain traversal starting from the switch condition to see if + /// \p SI is a potential condidate. + /// + /// Also, collect select instructions to unfold. + bool isCandidate(const SwitchInst *SI) { + std::deque Q; SmallSet SeenValues; SelectInsts.clear(); - Value *FirstDef = SI->getOperand(0); - auto *Inst = dyn_cast(FirstDef); - - // If this is a function argument or another non-instruction, then give up. - // We are interested in loop local variables. - if (!Inst) - return false; - - // Require the first definition to be a PHINode - if (!isa(Inst)) + Value *SICond = SI->getCondition(); + LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n"); + if (!isa(SICond)) return false; - LLVM_DEBUG(dbgs() << "\tisPredictable() FirstDef: " << *Inst << "\n"); - - Q.push_back(Inst); - SeenValues.insert(FirstDef); + addToQueue(SICond, Q, SeenValues); while (!Q.empty()) { - Instruction *Current = Q.front(); + Value *Current = Q.front(); Q.pop_front(); if (auto *Phi = dyn_cast(Current)) { for (Value *Incoming : Phi->incoming_values()) { - if (!isPredictableValue(Incoming, SeenValues)) - return false; - addInstToQueue(Incoming, Q, SeenValues); + addToQueue(Incoming, Q, SeenValues); } - LLVM_DEBUG(dbgs() << "\tisPredictable() phi: " << *Phi << "\n"); + LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n"); } else if (SelectInst *SelI = dyn_cast(Current)) { if (!isValidSelectInst(SelI)) return false; - if (!isPredictableValue(SelI->getTrueValue(), SeenValues) || - !isPredictableValue(SelI->getFalseValue(), SeenValues)) { - return false; - } - addInstToQueue(SelI->getTrueValue(), Q, SeenValues); - addInstToQueue(SelI->getFalseValue(), Q, SeenValues); - LLVM_DEBUG(dbgs() << "\tisPredictable() select: " << *SelI << "\n"); + addToQueue(SelI->getTrueValue(), Q, SeenValues); + addToQueue(SelI->getFalseValue(), Q, SeenValues); + LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n"); if (auto *SelIUse = dyn_cast(SelI->user_back())) SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse)); + } else if (isa(Current)) { + LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n"); + continue; } else { - // If it is neither a phi nor a select, then we give up. - return false; + LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n"); + // Allow unpredictable values. The hope is that those will be the + // initial switch values that can be ignored (they will hit the + // unthreaded switch) but this assumption will get checked later after + // paths have been enumerated (in function getStateDefMap). + continue; } } return true; } - bool isPredictableValue(Value *InpVal, SmallSet &SeenValues) { - if (SeenValues.contains(InpVal)) - return true; - - if (isa(InpVal)) - return true; - - // If this is a function argument or another non-instruction, then give up. - if (!isa(InpVal)) - return false; - - return true; - } - - void addInstToQueue(Value *Val, std::deque &Q, - SmallSet &SeenValues) { + void addToQueue(Value *Val, std::deque &Q, + SmallSet &SeenValues) { if (SeenValues.contains(Val)) return; - if (Instruction *I = dyn_cast(Val)) - Q.push_back(I); + Q.push_back(Val); SeenValues.insert(Val); } @@ -562,7 +545,16 @@ struct AllSwitchPaths { void run() { VisitedBlocks Visited; PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1); - StateDefMap StateDef = getStateDefMap(); + StateDefMap StateDef = getStateDefMap(LoopPaths); + + if (StateDef.empty()) { + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", + Switch) + << "Switch instruction is not predictable."; + }); + return; + } for (PathType Path : LoopPaths) { ThreadingPath TPath; @@ -637,6 +629,9 @@ private: PathType NewPath(Path); NewPath.push_front(BB); Res.push_back(NewPath); + if (Res.size() >= MaxNumPaths) { + return Res; + } } } // This block could now be visited again from a different predecessor. Note @@ -647,14 +642,22 @@ private: } /// Walk the use-def chain and collect all the state-defining instructions. - StateDefMap getStateDefMap() const { + /// + /// Return an empty map if unpredictable values encountered inside the basic + /// blocks of \p LoopPaths. + StateDefMap getStateDefMap(const PathsType &LoopPaths) const { StateDefMap Res; + // Basic blocks belonging to any of the loops around the switch statement. + SmallPtrSet LoopBBs; + for (const PathType &Path : LoopPaths) { + for (BasicBlock *BB : Path) + LoopBBs.insert(BB); + } + Value *FirstDef = Switch->getOperand(0); - assert(isa(FirstDef) && "After select unfolding, all state " - "definitions are expected to be phi " - "nodes."); + assert(isa(FirstDef) && "The first definition must be a phi."); SmallVector Stack; Stack.push_back(dyn_cast(FirstDef)); @@ -666,15 +669,17 @@ private: Res[CurPhi->getParent()] = CurPhi; SeenValues.insert(CurPhi); - for (Value *Incoming : CurPhi->incoming_values()) { + for (BasicBlock *IncomingBB : CurPhi->blocks()) { + Value *Incoming = CurPhi->getIncomingValueForBlock(IncomingBB); + bool IsOutsideLoops = LoopBBs.count(IncomingBB) == 0; if (Incoming == FirstDef || isa(Incoming) || - SeenValues.contains(Incoming)) { + SeenValues.contains(Incoming) || IsOutsideLoops) { continue; } - assert(isa(Incoming) && "After select unfolding, all state " - "definitions are expected to be phi " - "nodes."); + // Any unpredictable value inside the loops means we must bail out. + if (!isa(Incoming)) + return StateDefMap(); Stack.push_back(cast(Incoming)); } @@ -823,6 +828,16 @@ private: }); return false; } + + if (!Metrics.NumInsts.isValid()) { + LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " + << "instructions with invalid cost.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) + << "Contains instructions with invalid cost."; + }); + return false; + } } unsigned DuplicationCost = 0; @@ -836,7 +851,7 @@ private: // using binary search, hence the LogBase2(). unsigned CondBranches = APInt(32, Switch->getNumSuccessors()).ceilLogBase2(); - DuplicationCost = Metrics.NumInsts / CondBranches; + DuplicationCost = *Metrics.NumInsts.getValue() / CondBranches; } else { // Compared with jump tables, the DFA optimizer removes an indirect branch // on each loop iteration, thus making branch prediction more precise. The @@ -844,7 +859,7 @@ private: // predictor to make a mistake, and the more benefit there is in the DFA // optimizer. Thus, the more branch targets there are, the lower is the // cost of the DFA opt. - DuplicationCost = Metrics.NumInsts / JumpTableSize; + DuplicationCost = *Metrics.NumInsts.getValue() / JumpTableSize; } LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block " @@ -1197,7 +1212,7 @@ private: PhiToRemove.push_back(Phi); } for (PHINode *PN : PhiToRemove) { - PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + PN->replaceAllUsesWith(PoisonValue::get(PN->getType())); PN->eraseFromParent(); } return; @@ -1246,7 +1261,7 @@ private: /// Returns true if IncomingBB is a predecessor of BB. bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { - return llvm::find(predecessors(BB), IncomingBB) != pred_end(BB); + return llvm::is_contained(predecessors(BB), IncomingBB); } AllSwitchPaths *SwitchPaths; @@ -1278,7 +1293,7 @@ bool DFAJumpThreading::run(Function &F) { continue; LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName() - << " is predictable\n"); + << " is a candidate\n"); MainSwitch Switch(SI, ORE); if (!Switch.getInstr()) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index ae636e7b61f7..4c42869dbd58 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -38,7 +38,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -62,8 +64,6 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" @@ -75,7 +75,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/DebugCounter.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" @@ -83,7 +82,6 @@ #include "llvm/Transforms/Utils/Local.h" #include #include -#include #include #include #include @@ -766,20 +764,27 @@ struct DSEState { // Post-order numbers for each basic block. Used to figure out if memory // accesses are executed before another access. DenseMap PostOrderNumbers; + // Values that are only used with assumes. Used to refine pointer escape + // analysis. + SmallPtrSet EphValues; /// Keep track of instructions (partly) overlapping with killing MemoryDefs per /// basic block. MapVector IOLs; + // Check if there are root nodes that are terminated by UnreachableInst. + // Those roots pessimize post-dominance queries. If there are such roots, + // fall back to CFG scan starting from all non-unreachable roots. + bool AnyUnreachableExit; // Class contains self-reference, make sure it's not copied/moved. DSEState(const DSEState &) = delete; DSEState &operator=(const DSEState &) = delete; DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, - PostDominatorTree &PDT, const TargetLibraryInfo &TLI, - const LoopInfo &LI) - : F(F), AA(AA), EI(DT, LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT), - PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) { + PostDominatorTree &PDT, AssumptionCache &AC, + const TargetLibraryInfo &TLI, const LoopInfo &LI) + : F(F), AA(AA), EI(DT, LI, EphValues), BatchAA(AA, &EI), MSSA(MSSA), + DT(DT), PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) { // Collect blocks with throwing instructions not modeled in MemorySSA and // alloc-like objects. unsigned PO = 0; @@ -805,6 +810,12 @@ struct DSEState { // Collect whether there is any irreducible control flow in the function. ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI); + + AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) { + return isa(E->getTerminator()); + }); + + CodeMetrics::collectEphemeralValues(&F, &AC, EphValues); } /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p @@ -951,7 +962,7 @@ struct DSEState { if (!isInvisibleToCallerOnUnwind(V)) { I.first->second = false; } else if (isNoAliasCall(V)) { - I.first->second = !PointerMayBeCaptured(V, true, false); + I.first->second = !PointerMayBeCaptured(V, true, false, EphValues); } } return I.first->second; @@ -970,7 +981,7 @@ struct DSEState { // with the killing MemoryDef. But we refrain from doing so for now to // limit compile-time and this does not cause any changes to the number // of stores removed on a large test set in practice. - I.first->second = PointerMayBeCaptured(V, false, true); + I.first->second = PointerMayBeCaptured(V, false, true, EphValues); return !I.first->second; } @@ -1003,7 +1014,8 @@ struct DSEState { if (CB->isLifetimeStartOrEnd()) return false; - return CB->use_empty() && CB->willReturn() && CB->doesNotThrow(); + return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() && + !CB->isTerminator(); } return false; @@ -1233,6 +1245,9 @@ struct DSEState { // Reached TOP. if (MSSA.isLiveOnEntryDef(Current)) { LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n"); + if (CanOptimize && Current != KillingDef->getDefiningAccess()) + // The first clobbering def is... none. + KillingDef->setOptimized(Current); return None; } @@ -1309,7 +1324,6 @@ struct DSEState { // memory location and not located in different loops. if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) { LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n"); - WalkerStepLimit -= 1; CanOptimize = false; continue; } @@ -1508,54 +1522,56 @@ struct DSEState { CommonPred = PDT.findNearestCommonDominator(CommonPred, BB); } - // If CommonPred is in the set of killing blocks, just check if it - // post-dominates MaybeDeadAccess. - if (KillingBlocks.count(CommonPred)) { - if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) - return {MaybeDeadAccess}; - return None; - } - // If the common post-dominator does not post-dominate MaybeDeadAccess, // there is a path from MaybeDeadAccess to an exit not going through a // killing block. - if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) { - SetVector WorkList; - - // If CommonPred is null, there are multiple exits from the function. - // They all have to be added to the worklist. - if (CommonPred) - WorkList.insert(CommonPred); - else - for (BasicBlock *R : PDT.roots()) + if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) { + if (!AnyUnreachableExit) + return None; + + // Fall back to CFG scan starting at all non-unreachable roots if not + // all paths to the exit go through CommonPred. + CommonPred = nullptr; + } + + // If CommonPred itself is in the set of killing blocks, we're done. + if (KillingBlocks.count(CommonPred)) + return {MaybeDeadAccess}; + + SetVector WorkList; + // If CommonPred is null, there are multiple exits from the function. + // They all have to be added to the worklist. + if (CommonPred) + WorkList.insert(CommonPred); + else + for (BasicBlock *R : PDT.roots()) { + if (!isa(R->getTerminator())) WorkList.insert(R); + } - NumCFGTries++; - // Check if all paths starting from an exit node go through one of the - // killing blocks before reaching MaybeDeadAccess. - for (unsigned I = 0; I < WorkList.size(); I++) { - NumCFGChecks++; - BasicBlock *Current = WorkList[I]; - if (KillingBlocks.count(Current)) - continue; - if (Current == MaybeDeadAccess->getBlock()) - return None; + NumCFGTries++; + // Check if all paths starting from an exit node go through one of the + // killing blocks before reaching MaybeDeadAccess. + for (unsigned I = 0; I < WorkList.size(); I++) { + NumCFGChecks++; + BasicBlock *Current = WorkList[I]; + if (KillingBlocks.count(Current)) + continue; + if (Current == MaybeDeadAccess->getBlock()) + return None; - // MaybeDeadAccess is reachable from the entry, so we don't have to - // explore unreachable blocks further. - if (!DT.isReachableFromEntry(Current)) - continue; + // MaybeDeadAccess is reachable from the entry, so we don't have to + // explore unreachable blocks further. + if (!DT.isReachableFromEntry(Current)) + continue; - for (BasicBlock *Pred : predecessors(Current)) - WorkList.insert(Pred); + for (BasicBlock *Pred : predecessors(Current)) + WorkList.insert(Pred); - if (WorkList.size() >= MemorySSAPathCheckLimit) - return None; - } - NumCFGSuccess++; - return {MaybeDeadAccess}; + if (WorkList.size() >= MemorySSAPathCheckLimit) + return None; } - return None; + NumCFGSuccess++; } // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is @@ -1780,10 +1796,9 @@ struct DSEState { if (!isRemovable(DefI)) return false; - if (StoredConstant && isAllocationFn(DefUO, &TLI)) { - auto *CB = cast(DefUO); - auto *InitC = getInitialValueOfAllocation(CB, &TLI, - StoredConstant->getType()); + if (StoredConstant) { + Constant *InitC = + getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType()); // If the clobbering access is LiveOnEntry, no instructions between them // can modify the memory location. if (InitC && InitC == StoredConstant) @@ -1921,11 +1936,13 @@ struct DSEState { static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, + AssumptionCache &AC, const TargetLibraryInfo &TLI, const LoopInfo &LI) { bool MadeChange = false; - DSEState State(F, AA, MSSA, DT, PDT, TLI, LI); + MSSA.ensureOptimizedUses(); + DSEState State(F, AA, MSSA, DT, PDT, AC, TLI, LI); // For each store: for (unsigned I = 0; I < State.MemDefs.size(); I++) { MemoryDef *KillingDef = State.MemDefs[I]; @@ -2105,9 +2122,10 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { DominatorTree &DT = AM.getResult(F); MemorySSA &MSSA = AM.getResult(F).getMSSA(); PostDominatorTree &PDT = AM.getResult(F); + AssumptionCache &AC = AM.getResult(F); LoopInfo &LI = AM.getResult(F); - bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI); + bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI); #ifdef LLVM_ENABLE_STATS if (AreStatisticsEnabled()) @@ -2147,9 +2165,11 @@ public: MemorySSA &MSSA = getAnalysis().getMSSA(); PostDominatorTree &PDT = getAnalysis().getPostDomTree(); + AssumptionCache &AC = + getAnalysis().getAssumptionCache(F); LoopInfo &LI = getAnalysis().getLoopInfo(); - bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI); + bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI); #ifdef LLVM_ENABLE_STATS if (AreStatisticsEnabled()) @@ -2173,6 +2193,7 @@ public: AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); } }; @@ -2190,6 +2211,7 @@ INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, false) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 59b934c16c8a..cf2824954122 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -16,7 +16,6 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" @@ -30,19 +29,16 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" -#include "llvm/IR/Use.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -55,7 +51,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" -#include "llvm/Transforms/Utils/GuardUtils.h" #include "llvm/Transforms/Utils/Local.h" #include #include @@ -781,6 +776,21 @@ private: return getLoadStorePointerOperand(Inst); } + Type *getValueType() const { + // TODO: handle target-specific intrinsics. + if (IntrinsicInst *II = dyn_cast(Inst)) { + switch (II->getIntrinsicID()) { + case Intrinsic::masked_load: + return II->getType(); + case Intrinsic::masked_store: + return II->getArgOperand(0)->getType(); + default: + return nullptr; + } + } + return getLoadStoreType(Inst); + } + bool mayReadFromMemory() const { if (IntrID != 0) return Info.ReadMem; @@ -1162,6 +1172,9 @@ bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier, "Violated invariant"); if (Earlier.getPointerOperand() != Later.getPointerOperand()) return false; + if (!Earlier.getValueType() || !Later.getValueType() || + Earlier.getValueType() != Later.getValueType()) + return false; if (Earlier.getMatchingId() != Later.getMatchingId()) return false; // At the moment, we don't remove ordered stores, but do remove @@ -1334,7 +1347,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If the instruction can be simplified (e.g. X+0 = X) then replace it with // its simpler value. - if (Value *V = SimplifyInstruction(&Inst, SQ)) { + if (Value *V = simplifyInstruction(&Inst, SQ)) { LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << " to: " << *V << '\n'); if (!DebugCounter::shouldExecute(CSECounter)) { diff --git a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp index 44017b555769..ad2041cd4253 100644 --- a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -11,8 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp index a98bb8358aef..56f2a3b3004d 100644 --- a/llvm/lib/Transforms/Scalar/Float2Int.cpp +++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp @@ -11,24 +11,22 @@ // //===----------------------------------------------------------------------===// -#include "llvm/InitializePasses.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar/Float2Int.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include -#include // For std::function #define DEBUG_TYPE "float2int" @@ -236,116 +234,111 @@ void Float2IntPass::walkBackwards() { } } -// Walk forwards down the list of seen instructions, so we visit defs before -// uses. -void Float2IntPass::walkForwards() { - for (auto &It : reverse(SeenInsts)) { - if (It.second != unknownRange()) - continue; +// Calculate result range from operand ranges. +// Return None if the range cannot be calculated yet. +Optional Float2IntPass::calcRange(Instruction *I) { + SmallVector OpRanges; + for (Value *O : I->operands()) { + if (Instruction *OI = dyn_cast(O)) { + auto OpIt = SeenInsts.find(OI); + assert(OpIt != SeenInsts.end() && "def not seen before use!"); + if (OpIt->second == unknownRange()) + return None; // Wait until operand range has been calculated. + OpRanges.push_back(OpIt->second); + } else if (ConstantFP *CF = dyn_cast(O)) { + // Work out if the floating point number can be losslessly represented + // as an integer. + // APFloat::convertToInteger(&Exact) purports to do what we want, but + // the exactness can be too precise. For example, negative zero can + // never be exactly converted to an integer. + // + // Instead, we ask APFloat to round itself to an integral value - this + // preserves sign-of-zero - then compare the result with the original. + // + const APFloat &F = CF->getValueAPF(); + + // First, weed out obviously incorrect values. Non-finite numbers + // can't be represented and neither can negative zero, unless + // we're in fast math mode. + if (!F.isFinite() || + (F.isZero() && F.isNegative() && isa(I) && + !I->hasNoSignedZeros())) + return badRange(); + + APFloat NewF = F; + auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven); + if (Res != APFloat::opOK || NewF != F) + return badRange(); + + // OK, it's representable. Now get it. + APSInt Int(MaxIntegerBW+1, false); + bool Exact; + CF->getValueAPF().convertToInteger(Int, + APFloat::rmNearestTiesToEven, + &Exact); + OpRanges.push_back(ConstantRange(Int)); + } else { + llvm_unreachable("Should have already marked this as badRange!"); + } + } - Instruction *I = It.first; - std::function)> Op; - switch (I->getOpcode()) { - // FIXME: Handle select and phi nodes. - default: - case Instruction::UIToFP: - case Instruction::SIToFP: - llvm_unreachable("Should have been handled in walkForwards!"); + switch (I->getOpcode()) { + // FIXME: Handle select and phi nodes. + default: + case Instruction::UIToFP: + case Instruction::SIToFP: + llvm_unreachable("Should have been handled in walkForwards!"); - case Instruction::FNeg: - Op = [](ArrayRef Ops) { - assert(Ops.size() == 1 && "FNeg is a unary operator!"); - unsigned Size = Ops[0].getBitWidth(); - auto Zero = ConstantRange(APInt::getZero(Size)); - return Zero.sub(Ops[0]); - }; - break; + case Instruction::FNeg: { + assert(OpRanges.size() == 1 && "FNeg is a unary operator!"); + unsigned Size = OpRanges[0].getBitWidth(); + auto Zero = ConstantRange(APInt::getZero(Size)); + return Zero.sub(OpRanges[0]); + } - case Instruction::FAdd: - case Instruction::FSub: - case Instruction::FMul: - Op = [I](ArrayRef Ops) { - assert(Ops.size() == 2 && "its a binary operator!"); - auto BinOp = (Instruction::BinaryOps) I->getOpcode(); - return Ops[0].binaryOp(BinOp, Ops[1]); - }; - break; + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: { + assert(OpRanges.size() == 2 && "its a binary operator!"); + auto BinOp = (Instruction::BinaryOps) I->getOpcode(); + return OpRanges[0].binaryOp(BinOp, OpRanges[1]); + } - // - // Root-only instructions - we'll only see these if they're the - // first node in a walk. - // - case Instruction::FPToUI: - case Instruction::FPToSI: - Op = [I](ArrayRef Ops) { - assert(Ops.size() == 1 && "FPTo[US]I is a unary operator!"); - // Note: We're ignoring the casts output size here as that's what the - // caller expects. - auto CastOp = (Instruction::CastOps)I->getOpcode(); - return Ops[0].castOp(CastOp, MaxIntegerBW+1); - }; - break; + // + // Root-only instructions - we'll only see these if they're the + // first node in a walk. + // + case Instruction::FPToUI: + case Instruction::FPToSI: { + assert(OpRanges.size() == 1 && "FPTo[US]I is a unary operator!"); + // Note: We're ignoring the casts output size here as that's what the + // caller expects. + auto CastOp = (Instruction::CastOps)I->getOpcode(); + return OpRanges[0].castOp(CastOp, MaxIntegerBW+1); + } - case Instruction::FCmp: - Op = [](ArrayRef Ops) { - assert(Ops.size() == 2 && "FCmp is a binary operator!"); - return Ops[0].unionWith(Ops[1]); - }; - break; - } + case Instruction::FCmp: + assert(OpRanges.size() == 2 && "FCmp is a binary operator!"); + return OpRanges[0].unionWith(OpRanges[1]); + } +} - bool Abort = false; - SmallVector OpRanges; - for (Value *O : I->operands()) { - if (Instruction *OI = dyn_cast(O)) { - assert(SeenInsts.find(OI) != SeenInsts.end() && - "def not seen before use!"); - OpRanges.push_back(SeenInsts.find(OI)->second); - } else if (ConstantFP *CF = dyn_cast(O)) { - // Work out if the floating point number can be losslessly represented - // as an integer. - // APFloat::convertToInteger(&Exact) purports to do what we want, but - // the exactness can be too precise. For example, negative zero can - // never be exactly converted to an integer. - // - // Instead, we ask APFloat to round itself to an integral value - this - // preserves sign-of-zero - then compare the result with the original. - // - const APFloat &F = CF->getValueAPF(); - - // First, weed out obviously incorrect values. Non-finite numbers - // can't be represented and neither can negative zero, unless - // we're in fast math mode. - if (!F.isFinite() || - (F.isZero() && F.isNegative() && isa(I) && - !I->hasNoSignedZeros())) { - seen(I, badRange()); - Abort = true; - break; - } +// Walk forwards down the list of seen instructions, so we visit defs before +// uses. +void Float2IntPass::walkForwards() { + std::deque Worklist; + for (const auto &Pair : SeenInsts) + if (Pair.second == unknownRange()) + Worklist.push_back(Pair.first); - APFloat NewF = F; - auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven); - if (Res != APFloat::opOK || NewF != F) { - seen(I, badRange()); - Abort = true; - break; - } - // OK, it's representable. Now get it. - APSInt Int(MaxIntegerBW+1, false); - bool Exact; - CF->getValueAPF().convertToInteger(Int, - APFloat::rmNearestTiesToEven, - &Exact); - OpRanges.push_back(ConstantRange(Int)); - } else { - llvm_unreachable("Should have already marked this as badRange!"); - } - } + while (!Worklist.empty()) { + Instruction *I = Worklist.back(); + Worklist.pop_back(); - // Reduce the operands' ranges to a single range and return. - if (!Abort) - seen(I, Op(OpRanges)); + if (Optional Range = calcRange(I)) + seen(I, *Range); + else + Worklist.push_front(I); // Reprocess later. } } diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 398c93e8758c..783301fe589e 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -32,6 +31,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionPrecedenceTracking.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -42,12 +42,10 @@ #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -55,11 +53,9 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" @@ -72,7 +68,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -112,16 +107,16 @@ static cl::opt GVNEnableLoadInLoopPRE("enable-load-in-loop-pre", cl::init(true)); static cl::opt GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre", - cl::init(true)); + cl::init(false)); static cl::opt GVNEnableMemDep("enable-gvn-memdep", cl::init(true)); static cl::opt MaxNumDeps( - "gvn-max-num-deps", cl::Hidden, cl::init(100), cl::ZeroOrMore, + "gvn-max-num-deps", cl::Hidden, cl::init(100), cl::desc("Max number of dependences to attempt Load PRE (default = 100)")); // This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat. static cl::opt MaxBBSpeculations( - "gvn-max-block-speculations", cl::Hidden, cl::init(600), cl::ZeroOrMore, + "gvn-max-block-speculations", cl::Hidden, cl::init(600), cl::desc("Max number of blocks we're willing to speculate on (and recurse " "into) when deducing if a value is fully available or not in GVN " "(default = 600)")); @@ -129,6 +124,8 @@ static cl::opt MaxBBSpeculations( struct llvm::GVNPass::Expression { uint32_t opcode; bool commutative = false; + // The type is not necessarily the result type of the expression, it may be + // any additional type needed to disambiguate the expression. Type *type = nullptr; SmallVector varargs; @@ -178,70 +175,88 @@ template <> struct DenseMapInfo { /// implicitly associated with a rematerialization point which is the /// location of the instruction from which it was formed. struct llvm::gvn::AvailableValue { - enum ValType { + enum class ValType { SimpleVal, // A simple offsetted value that is accessed. LoadVal, // A value produced by a load. MemIntrin, // A memory intrinsic which is loaded from. - UndefVal // A UndefValue representing a value from dead block (which + UndefVal, // A UndefValue representing a value from dead block (which // is not yet physically removed from the CFG). + SelectVal, // A pointer select which is loaded from and for which the load + // can be replace by a value select. }; - /// V - The value that is live out of the block. - PointerIntPair Val; + /// Val - The value that is live out of the block. + Value *Val; + /// Kind of the live-out value. + ValType Kind; /// Offset - The byte offset in Val that is interesting for the load query. unsigned Offset = 0; static AvailableValue get(Value *V, unsigned Offset = 0) { AvailableValue Res; - Res.Val.setPointer(V); - Res.Val.setInt(SimpleVal); + Res.Val = V; + Res.Kind = ValType::SimpleVal; Res.Offset = Offset; return Res; } static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) { AvailableValue Res; - Res.Val.setPointer(MI); - Res.Val.setInt(MemIntrin); + Res.Val = MI; + Res.Kind = ValType::MemIntrin; Res.Offset = Offset; return Res; } static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) { AvailableValue Res; - Res.Val.setPointer(Load); - Res.Val.setInt(LoadVal); + Res.Val = Load; + Res.Kind = ValType::LoadVal; Res.Offset = Offset; return Res; } static AvailableValue getUndef() { AvailableValue Res; - Res.Val.setPointer(nullptr); - Res.Val.setInt(UndefVal); + Res.Val = nullptr; + Res.Kind = ValType::UndefVal; Res.Offset = 0; return Res; } - bool isSimpleValue() const { return Val.getInt() == SimpleVal; } - bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } - bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } - bool isUndefValue() const { return Val.getInt() == UndefVal; } + static AvailableValue getSelect(SelectInst *Sel) { + AvailableValue Res; + Res.Val = Sel; + Res.Kind = ValType::SelectVal; + Res.Offset = 0; + return Res; + } + + bool isSimpleValue() const { return Kind == ValType::SimpleVal; } + bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; } + bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; } + bool isUndefValue() const { return Kind == ValType::UndefVal; } + bool isSelectValue() const { return Kind == ValType::SelectVal; } Value *getSimpleValue() const { assert(isSimpleValue() && "Wrong accessor"); - return Val.getPointer(); + return Val; } LoadInst *getCoercedLoadValue() const { assert(isCoercedLoadValue() && "Wrong accessor"); - return cast(Val.getPointer()); + return cast(Val); } MemIntrinsic *getMemIntrinValue() const { assert(isMemIntrinValue() && "Wrong accessor"); - return cast(Val.getPointer()); + return cast(Val); + } + + SelectInst *getSelectValue() const { + assert(isSelectValue() && "Wrong accessor"); + return cast(Val); } /// Emit code at the specified insertion point to adjust the value defined @@ -275,6 +290,10 @@ struct llvm::gvn::AvailableValueInBlock { return get(BB, AvailableValue::getUndef()); } + static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel) { + return get(BB, AvailableValue::getSelect(Sel)); + } + /// Emit code at the end of this block to adjust the value defined here to /// the specified type. This handles various coercion cases. Value *MaterializeAdjustedValue(LoadInst *Load, GVNPass &gvn) const { @@ -379,6 +398,39 @@ GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) { return e; } +GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) { + Expression E; + Type *PtrTy = GEP->getType()->getScalarType(); + const DataLayout &DL = GEP->getModule()->getDataLayout(); + unsigned BitWidth = DL.getIndexTypeSizeInBits(PtrTy); + MapVector VariableOffsets; + APInt ConstantOffset(BitWidth, 0); + if (PtrTy->isOpaquePointerTy() && + GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) { + // For opaque pointers, convert into offset representation, to recognize + // equivalent address calculations that use different type encoding. + LLVMContext &Context = GEP->getContext(); + E.opcode = GEP->getOpcode(); + E.type = nullptr; + E.varargs.push_back(lookupOrAdd(GEP->getPointerOperand())); + for (const auto &Pair : VariableOffsets) { + E.varargs.push_back(lookupOrAdd(Pair.first)); + E.varargs.push_back(lookupOrAdd(ConstantInt::get(Context, Pair.second))); + } + if (!ConstantOffset.isZero()) + E.varargs.push_back( + lookupOrAdd(ConstantInt::get(Context, ConstantOffset))); + } else { + // If converting to offset representation fails (for typed pointers and + // scalable vectors), fall back to type-based implementation: + E.opcode = GEP->getOpcode(); + E.type = GEP->getSourceElementType(); + for (Use &Op : GEP->operands()) + E.varargs.push_back(lookupOrAdd(Op)); + } + return E; +} + //===----------------------------------------------------------------------===// // ValueTable External Functions //===----------------------------------------------------------------------===// @@ -562,9 +614,11 @@ uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) { case Instruction::InsertElement: case Instruction::ShuffleVector: case Instruction::InsertValue: - case Instruction::GetElementPtr: exp = createExpr(I); break; + case Instruction::GetElementPtr: + exp = createGEPExpr(cast(I)); + break; case Instruction::ExtractValue: exp = createExtractvalueExpr(cast(I)); break; @@ -639,24 +693,24 @@ void GVNPass::ValueTable::verifyRemoved(const Value *V) const { //===----------------------------------------------------------------------===// bool GVNPass::isPREEnabled() const { - return Options.AllowPRE.getValueOr(GVNEnablePRE); + return Options.AllowPRE.value_or(GVNEnablePRE); } bool GVNPass::isLoadPREEnabled() const { - return Options.AllowLoadPRE.getValueOr(GVNEnableLoadPRE); + return Options.AllowLoadPRE.value_or(GVNEnableLoadPRE); } bool GVNPass::isLoadInLoopPREEnabled() const { - return Options.AllowLoadInLoopPRE.getValueOr(GVNEnableLoadInLoopPRE); + return Options.AllowLoadInLoopPRE.value_or(GVNEnableLoadInLoopPRE); } bool GVNPass::isLoadPRESplitBackedgeEnabled() const { - return Options.AllowLoadPRESplitBackedge.getValueOr( + return Options.AllowLoadPRESplitBackedge.value_or( GVNEnableSplitBackedgeInLoadPRE); } bool GVNPass::isMemDepEnabled() const { - return Options.AllowMemDep.getValueOr(GVNEnableMemDep); + return Options.AllowMemDep.value_or(GVNEnableMemDep); } PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) { @@ -897,6 +951,17 @@ ConstructSSAForLoadSet(LoadInst *Load, return SSAUpdate.GetValueInMiddleOfBlock(Load->getParent()); } +static LoadInst *findDominatingLoad(Value *Ptr, Type *LoadTy, SelectInst *Sel, + DominatorTree &DT) { + for (Value *U : Ptr->users()) { + auto *LI = dyn_cast(U); + if (LI && LI->getType() == LoadTy && LI->getParent() == Sel->getParent() && + DT.dominates(LI, Sel)) + return LI; + } + return nullptr; +} + Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt, GVNPass &gvn) const { @@ -937,6 +1002,17 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load, << " " << *getMemIntrinValue() << '\n' << *Res << '\n' << "\n\n\n"); + } else if (isSelectValue()) { + // Introduce a new value select for a load from an eligible pointer select. + SelectInst *Sel = getSelectValue(); + LoadInst *L1 = findDominatingLoad(Sel->getOperand(1), LoadTy, Sel, + gvn.getDominatorTree()); + LoadInst *L2 = findDominatingLoad(Sel->getOperand(2), LoadTy, Sel, + gvn.getDominatorTree()); + assert(L1 && L2 && + "must be able to obtain dominating loads for both value operands of " + "the select"); + Res = SelectInst::Create(Sel->getCondition(), L1, L2, "", Sel); } else { llvm_unreachable("Should not materialize value from dead block"); } @@ -1023,8 +1099,54 @@ static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo, ORE->emit(R); } +/// Check if a load from pointer-select \p Address in \p DepBB can be converted +/// to a value select. The following conditions need to be satisfied: +/// 1. The pointer select (\p Address) must be defined in \p DepBB. +/// 2. Both value operands of the pointer select must be loaded in the same +/// basic block, before the pointer select. +/// 3. There must be no instructions between the found loads and \p End that may +/// clobber the loads. +static Optional +tryToConvertLoadOfPtrSelect(BasicBlock *DepBB, BasicBlock::iterator End, + Value *Address, Type *LoadTy, DominatorTree &DT, + AAResults *AA) { + + auto *Sel = dyn_cast_or_null(Address); + if (!Sel || DepBB != Sel->getParent()) + return None; + + LoadInst *L1 = findDominatingLoad(Sel->getOperand(1), LoadTy, Sel, DT); + LoadInst *L2 = findDominatingLoad(Sel->getOperand(2), LoadTy, Sel, DT); + if (!L1 || !L2) + return None; + + // Ensure there are no accesses that may modify the locations referenced by + // either L1 or L2 between L1, L2 and the specified End iterator. + Instruction *EarlierLoad = L1->comesBefore(L2) ? L1 : L2; + MemoryLocation L1Loc = MemoryLocation::get(L1); + MemoryLocation L2Loc = MemoryLocation::get(L2); + if (any_of(make_range(EarlierLoad->getIterator(), End), [&](Instruction &I) { + return isModSet(AA->getModRefInfo(&I, L1Loc)) || + isModSet(AA->getModRefInfo(&I, L2Loc)); + })) + return None; + + return AvailableValue::getSelect(Sel); +} + bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, Value *Address, AvailableValue &Res) { + if (!DepInfo.isDef() && !DepInfo.isClobber()) { + assert(isa(Address)); + if (auto R = tryToConvertLoadOfPtrSelect( + Load->getParent(), Load->getIterator(), Address, Load->getType(), + getDominatorTree(), getAliasAnalysis())) { + Res = *R; + return true; + } + return false; + } + assert((DepInfo.isDef() || DepInfo.isClobber()) && "expected a local dependence"); assert(Load->isUnordered() && "rules below are incorrect for ordered access"); @@ -1066,9 +1188,7 @@ bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) { const auto ClobberOff = MD->getClobberOffset(DepLoad); // GVN has no deal with a negative offset. - Offset = (ClobberOff == None || ClobberOff.getValue() < 0) - ? -1 - : ClobberOff.getValue(); + Offset = (ClobberOff == None || *ClobberOff < 0) ? -1 : *ClobberOff; } if (Offset == -1) Offset = @@ -1092,6 +1212,7 @@ bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, } } } + // Nothing known about this clobber, have to be conservative LLVM_DEBUG( // fast print dep, using operator<< on instruction is too slow. @@ -1111,12 +1232,11 @@ bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, return true; } - if (isAllocationFn(DepInst, TLI)) - if (auto *InitVal = getInitialValueOfAllocation(cast(DepInst), - TLI, Load->getType())) { - Res = AvailableValue::get(InitVal); - return true; - } + if (Constant *InitVal = + getInitialValueOfAllocation(DepInst, TLI, Load->getType())) { + Res = AvailableValue::get(InitVal); + return true; + } if (StoreInst *S = dyn_cast(DepInst)) { // Reject loads and stores that are to the same address but are of @@ -1176,16 +1296,23 @@ void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps, continue; } - if (!DepInfo.isDef() && !DepInfo.isClobber()) { - UnavailableBlocks.push_back(DepBB); - continue; - } - // The address being loaded in this non-local block may not be the same as // the pointer operand of the load if PHI translation occurs. Make sure // to consider the right address. Value *Address = Deps[i].getAddress(); + if (!DepInfo.isDef() && !DepInfo.isClobber()) { + if (auto R = tryToConvertLoadOfPtrSelect( + DepBB, DepBB->end(), Address, Load->getType(), getDominatorTree(), + getAliasAnalysis())) { + ValuesPerBlock.push_back( + AvailableValueInBlock::get(DepBB, std::move(*R))); + continue; + } + UnavailableBlocks.push_back(DepBB); + continue; + } + AvailableValue AV; if (AnalyzeLoadAvailability(Load, DepInfo, Address, AV)) { // subtlety: because we know this was a non-local dependency, we know @@ -1923,8 +2050,9 @@ bool GVNPass::processLoad(LoadInst *L) { if (Dep.isNonLocal()) return processNonLocalLoad(L); + Value *Address = L->getPointerOperand(); // Only handle the local case below - if (!Dep.isDef() && !Dep.isClobber()) { + if (!Dep.isDef() && !Dep.isClobber() && !isa(Address)) { // This might be a NonFuncLocal or an Unknown LLVM_DEBUG( // fast print dep, using operator<< on instruction is too slow. @@ -1934,7 +2062,7 @@ bool GVNPass::processLoad(LoadInst *L) { } AvailableValue AV; - if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) { + if (AnalyzeLoadAvailability(L, Dep, Address, AV)) { Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this); // Replace the load! @@ -2324,7 +2452,7 @@ bool GVNPass::processInstruction(Instruction *I) { // example if it determines that %y is equal to %x then the instruction // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. const DataLayout &DL = I->getModule()->getDataLayout(); - if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) { + if (Value *V = simplifyInstruction(I, {DL, TLI, DT, AC})) { bool Changed = false; if (!I->use_empty()) { // Simplification can cause a special instruction to become not special. @@ -2491,6 +2619,7 @@ bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, unsigned Iteration = 0; while (ShouldContinue) { LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); + (void) Iteration; ShouldContinue = iterateOnFunction(F); Changed |= ShouldContinue; ++Iteration; diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp index fdc3afd9348a..6cdc671ddb64 100644 --- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -54,11 +54,9 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Use.h" @@ -126,7 +124,7 @@ using HoistingPointInfo = std::pair; using HoistingPointList = SmallVector; // A map from a pair of VNs to all the instructions with those VNs. -using VNType = std::pair; +using VNType = std::pair; using VNtoInsns = DenseMap>; @@ -161,7 +159,7 @@ using InValuesType = // An invalid value number Used when inserting a single value number into // VNtoInsns. -enum : unsigned { InvalidVN = ~2U }; +enum : uintptr_t { InvalidVN = ~(uintptr_t)2 }; // Records all scalar instructions candidate for code hoisting. class InsnInfo { @@ -187,7 +185,9 @@ public: void insert(LoadInst *Load, GVNPass::ValueTable &VN) { if (Load->isSimple()) { unsigned V = VN.lookupOrAdd(Load->getPointerOperand()); - VNtoLoads[{V, InvalidVN}].push_back(Load); + // With opaque pointers we may have loads from the same pointer with + // different result types, which should be disambiguated. + VNtoLoads[{V, (uintptr_t)Load->getType()}].push_back(Load); } } @@ -261,7 +261,9 @@ public: GVNHoist(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA, MemoryDependenceResults *MD, MemorySSA *MSSA) : DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA), - MSSAUpdater(std::make_unique(MSSA)) {} + MSSAUpdater(std::make_unique(MSSA)) { + MSSA->ensureOptimizedUses(); + } bool run(Function &F); @@ -1147,6 +1149,8 @@ std::pair GVNHoist::hoist(HoistingPointList &HPL) { DFSNumber[Repl] = DFSNumber[Last]++; } + // Drop debug location as per debug info update guide. + Repl->dropLocation(); NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess); if (isa(Repl)) diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp index e612a82fc89a..720b8e71fd56 100644 --- a/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -35,7 +35,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" @@ -45,7 +44,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -383,6 +381,8 @@ public: } }; +using BasicBlocksSet = SmallPtrSet; + class ValueTable { DenseMap ValueNumbering; DenseMap ExpressionNumbering; @@ -390,6 +390,7 @@ class ValueTable { BumpPtrAllocator Allocator; ArrayRecycler Recycler; uint32_t nextValueNumber = 1; + BasicBlocksSet ReachableBBs; /// Create an expression for I based on its opcode and its uses. If I /// touches or reads memory, the expression is also based upon its memory @@ -421,6 +422,11 @@ class ValueTable { public: ValueTable() = default; + /// Set basic blocks reachable from entry block. + void setReachableBBs(const BasicBlocksSet &ReachableBBs) { + this->ReachableBBs = ReachableBBs; + } + /// Returns the value number for the specified value, assigning /// it a new number if it did not have one before. uint32_t lookupOrAdd(Value *V) { @@ -434,6 +440,9 @@ public: } Instruction *I = cast(V); + if (!ReachableBBs.contains(I->getParent())) + return ~0U; + InstructionUseExpr *exp = nullptr; switch (I->getOpcode()) { case Instruction::Load: @@ -570,6 +579,7 @@ public: unsigned NumSunk = 0; ReversePostOrderTraversal RPOT(&F); + VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end())); for (auto *N : RPOT) NumSunk += sinkBB(N); @@ -648,12 +658,7 @@ Optional GVNSink::analyzeInstructionForSinking( VNums[N]++; } unsigned VNumToSink = - std::max_element(VNums.begin(), VNums.end(), - [](const std::pair &I, - const std::pair &J) { - return I.second < J.second; - }) - ->first; + std::max_element(VNums.begin(), VNums.end(), llvm::less_second())->first; if (VNums[VNumToSink] == 1) // Can't sink anything! @@ -776,12 +781,9 @@ unsigned GVNSink::sinkBB(BasicBlock *BBEnd) { unsigned NumOrigPreds = Preds.size(); // We can only sink instructions through unconditional branches. - for (auto I = Preds.begin(); I != Preds.end();) { - if ((*I)->getTerminator()->getNumSuccessors() != 1) - I = Preds.erase(I); - else - ++I; - } + llvm::erase_if(Preds, [](BasicBlock *BB) { + return BB->getTerminator()->getNumSuccessors() != 1; + }); LockstepReverseIterator LRI(Preds); SmallVector Candidates; diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp index 82b81003ef21..af6062d142f0 100644 --- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -42,7 +42,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -496,6 +495,8 @@ void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const { makeAvailableAt(Op, Loc); Inst->moveBefore(Loc); + // If we moved instruction before guard we must clean poison generating flags. + Inst->dropPoisonGeneratingFlags(); } bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1, diff --git a/llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp b/llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp index e2022aba97c4..26f2db183fbf 100644 --- a/llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp +++ b/llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp @@ -8,7 +8,6 @@ #include "llvm/Transforms/Scalar/IVUsersPrinter.h" #include "llvm/Analysis/IVUsers.h" -#include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "iv-users" diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index ceb03eb17f6d..e977dd18be9f 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -25,10 +25,7 @@ #include "llvm/Transforms/Scalar/IndVarSimplify.h" #include "llvm/ADT/APFloat.h" -#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -74,11 +71,9 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -387,7 +382,7 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); // Delete the old floating point increment. - Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); + Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType())); RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); // If the FP induction variable still has uses, this is because something else @@ -605,10 +600,10 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L, Intrinsic::getName(Intrinsic::experimental_guard)); bool HasGuards = GuardDecl && !GuardDecl->use_empty(); - SmallVector LoopPhis; - for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { - LoopPhis.push_back(cast(I)); - } + SmallVector LoopPhis; + for (PHINode &PN : L->getHeader()->phis()) + LoopPhis.push_back(&PN); + // Each round of simplification iterates through the SimplifyIVUsers worklist // for all current phis, then determines whether any IVs can be // widened. Widening adds new phis to LoopPhis, inducing another round of diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index 0e5653eeb7d5..799669a19796 100644 --- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -56,8 +56,6 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/BasicBlock.h" @@ -1411,12 +1409,12 @@ bool LoopConstrainer::run() { bool IsSignedPredicate = MainLoopStructure.IsSignedPredicate; Optional MaybeSR = calculateSubRanges(IsSignedPredicate); - if (!MaybeSR.hasValue()) { + if (!MaybeSR) { LLVM_DEBUG(dbgs() << "irce: could not compute subranges\n"); return false; } - SubRanges SR = MaybeSR.getValue(); + SubRanges SR = *MaybeSR; bool Increasing = MainLoopStructure.IndVarIncreasing; IntegerType *IVTy = cast(Range.getBegin()->getType()); @@ -1429,9 +1427,9 @@ bool LoopConstrainer::run() { // constructor. ClonedLoop PreLoop, PostLoop; bool NeedsPreLoop = - Increasing ? SR.LowLimit.hasValue() : SR.HighLimit.hasValue(); + Increasing ? SR.LowLimit.has_value() : SR.HighLimit.has_value(); bool NeedsPostLoop = - Increasing ? SR.HighLimit.hasValue() : SR.LowLimit.hasValue(); + Increasing ? SR.HighLimit.has_value() : SR.LowLimit.has_value(); Value *ExitPreLoopAt = nullptr; Value *ExitMainLoopAt = nullptr; @@ -1710,7 +1708,7 @@ IntersectSignedRange(ScalarEvolution &SE, const InductiveRangeCheck::Range &R2) { if (R2.isEmpty(SE, /* IsSigned */ true)) return None; - if (!R1.hasValue()) + if (!R1) return R2; auto &R1Value = R1.getValue(); // We never return empty ranges from this function, and R1 is supposed to be @@ -1739,7 +1737,7 @@ IntersectUnsignedRange(ScalarEvolution &SE, const InductiveRangeCheck::Range &R2) { if (R2.isEmpty(SE, /* IsSigned */ false)) return None; - if (!R1.hasValue()) + if (!R1) return R2; auto &R1Value = R1.getValue(); // We never return empty ranges from this function, and R1 is supposed to be @@ -1763,10 +1761,14 @@ IntersectUnsignedRange(ScalarEvolution &SE, } PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) { - auto &SE = AM.getResult(F); auto &DT = AM.getResult(F); - auto &BPI = AM.getResult(F); LoopInfo &LI = AM.getResult(F); + // There are no loops in the function. Return before computing other expensive + // analyses. + if (LI.empty()) + return PreservedAnalyses::all(); + auto &SE = AM.getResult(F); + auto &BPI = AM.getResult(F); // Get BFI analysis result on demand. Please note that modification of // CFG invalidates this analysis and we should handle it. @@ -1854,7 +1856,7 @@ InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L, LoopStructure &LS) { if (SkipProfitabilityChecks) return true; - if (GetBFI.hasValue()) { + if (GetBFI) { BlockFrequencyInfo &BFI = (*GetBFI)(); uint64_t hFreq = BFI.getBlockFreq(LS.Header).getFrequency(); uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency(); @@ -1920,12 +1922,12 @@ bool InductiveRangeCheckElimination::run( const char *FailureReason = nullptr; Optional MaybeLoopStructure = LoopStructure::parseLoopStructure(SE, *L, FailureReason); - if (!MaybeLoopStructure.hasValue()) { + if (!MaybeLoopStructure) { LLVM_DEBUG(dbgs() << "irce: could not parse loop structure: " << FailureReason << "\n";); return false; } - LoopStructure LS = MaybeLoopStructure.getValue(); + LoopStructure LS = *MaybeLoopStructure; if (!isProfitableToTransform(*L, LS)) return false; const SCEVAddRecExpr *IndVar = @@ -1946,10 +1948,10 @@ bool InductiveRangeCheckElimination::run( for (InductiveRangeCheck &IRC : RangeChecks) { auto Result = IRC.computeSafeIterationSpace(SE, IndVar, LS.IsSignedPredicate); - if (Result.hasValue()) { + if (Result) { auto MaybeSafeIterRange = IntersectRange(SE, SafeIterRange, Result.getValue()); - if (MaybeSafeIterRange.hasValue()) { + if (MaybeSafeIterRange) { assert( !MaybeSafeIterRange.getValue().isEmpty(SE, LS.IsSignedPredicate) && "We should never return empty ranges!"); @@ -1959,7 +1961,7 @@ bool InductiveRangeCheckElimination::run( } } - if (!SafeIterRange.hasValue()) + if (!SafeIterRange) return false; LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT, diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 8f5933b7bd71..5eefde2e37a1 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -92,8 +92,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AssumptionCache.h" @@ -182,7 +180,7 @@ public: class InferAddressSpacesImpl { AssumptionCache &AC; - DominatorTree *DT = nullptr; + const DominatorTree *DT = nullptr; const TargetTransformInfo *TTI = nullptr; const DataLayout *DL = nullptr; @@ -213,10 +211,11 @@ class InferAddressSpacesImpl { // Changes the flat address expressions in function F to point to specific // address spaces if InferredAddrSpace says so. Postorder is the postorder of // all flat expressions in the use-def graph of function F. - bool rewriteWithNewAddressSpaces( - const TargetTransformInfo &TTI, ArrayRef Postorder, - const ValueToAddrSpaceMapTy &InferredAddrSpace, - const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const; + bool + rewriteWithNewAddressSpaces(ArrayRef Postorder, + const ValueToAddrSpaceMapTy &InferredAddrSpace, + const PredicatedAddrSpaceMapTy &PredicatedAS, + Function *F) const; void appendsFlatAddressExpressionToPostorderStack( Value *V, PostorderStackTy &PostorderStack, @@ -240,7 +239,7 @@ class InferAddressSpacesImpl { unsigned getPredicatedAddrSpace(const Value &V, Value *Opnd) const; public: - InferAddressSpacesImpl(AssumptionCache &AC, DominatorTree *DT, + InferAddressSpacesImpl(AssumptionCache &AC, const DominatorTree *DT, const TargetTransformInfo *TTI, unsigned FlatAddrSpace) : AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {} bool run(Function &F); @@ -280,15 +279,15 @@ static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL, // arithmetic may also be undefined after invalid pointer reinterpret cast. // However, as we confirm through the target hooks that it's a no-op // addrspacecast, it doesn't matter since the bits should be the same. + unsigned P2IOp0AS = P2I->getOperand(0)->getType()->getPointerAddressSpace(); + unsigned I2PAS = I2P->getType()->getPointerAddressSpace(); return CastInst::isNoopCast(Instruction::CastOps(I2P->getOpcode()), I2P->getOperand(0)->getType(), I2P->getType(), DL) && CastInst::isNoopCast(Instruction::CastOps(P2I->getOpcode()), P2I->getOperand(0)->getType(), P2I->getType(), DL) && - TTI->isNoopAddrSpaceCast( - P2I->getOperand(0)->getType()->getPointerAddressSpace(), - I2P->getType()->getPointerAddressSpace()); + (P2IOp0AS == I2PAS || TTI->isNoopAddrSpaceCast(P2IOp0AS, I2PAS)); } // Returns true if V is an address expression. @@ -332,8 +331,7 @@ getPointerOperands(const Value &V, const DataLayout &DL, switch (Op.getOpcode()) { case Instruction::PHI: { auto IncomingValues = cast(Op).incoming_values(); - return SmallVector(IncomingValues.begin(), - IncomingValues.end()); + return {IncomingValues.begin(), IncomingValues.end()}; } case Instruction::BitCast: case Instruction::AddrSpaceCast: @@ -655,10 +653,13 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( case Instruction::IntToPtr: { assert(isNoopPtrIntCastPair(cast(I), *DL, TTI)); Value *Src = cast(I->getOperand(0))->getOperand(0); - assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); - if (Src->getType() != NewPtrType) - return new BitCastInst(Src, NewPtrType); - return Src; + if (Src->getType() == NewPtrType) + return Src; + + // If we had a no-op inttoptr/ptrtoint pair, we may still have inferred a + // source address space from a generic pointer source need to insert a cast + // back. + return CastInst::CreatePointerBitCastOrAddrSpaceCast(Src, NewPtrType); } default: llvm_unreachable("Unexpected opcode"); @@ -726,7 +727,7 @@ static Value *cloneConstantExprWithNewAddressSpace( NewOperands.push_back(cast(NewOperand)); continue; } - if (auto CExpr = dyn_cast(Operand)) + if (auto *CExpr = dyn_cast(Operand)) if (Value *NewOperand = cloneConstantExprWithNewAddressSpace( CExpr, NewAddrSpace, ValueWithNewAddrSpace, DL, TTI)) { IsNew = true; @@ -738,7 +739,7 @@ static Value *cloneConstantExprWithNewAddressSpace( } // If !IsNew, we will replace the Value with itself. However, replaced values - // are assumed to wrapped in a addrspace cast later so drop it now. + // are assumed to wrapped in an addrspacecast cast later so drop it now. if (!IsNew) return nullptr; @@ -821,8 +822,8 @@ bool InferAddressSpacesImpl::run(Function &F) { // Changes the address spaces of the flat address expressions who are inferred // to point to a specific address space. - return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace, - PredicatedAS, &F); + return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS, + &F); } // Constants need to be tracked through RAUW to handle cases with nested @@ -1010,7 +1011,7 @@ static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI, } /// Update memory intrinsic uses that require more complex processing than -/// simple memory instructions. Thse require re-mangling and may have multiple +/// simple memory instructions. These require re-mangling and may have multiple /// pointer operands. static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV, Value *NewV) { @@ -1020,8 +1021,7 @@ static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV, MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias); if (auto *MSI = dyn_cast(MI)) { - B.CreateMemSet(NewV, MSI->getValue(), MSI->getLength(), - MaybeAlign(MSI->getDestAlignment()), + B.CreateMemSet(NewV, MSI->getValue(), MSI->getLength(), MSI->getDestAlign(), false, // isVolatile TBAA, ScopeMD, NoAliasMD); } else if (auto *MTI = dyn_cast(MI)) { @@ -1104,7 +1104,7 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I, } bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( - const TargetTransformInfo &TTI, ArrayRef Postorder, + ArrayRef Postorder, const ValueToAddrSpaceMapTy &InferredAddrSpace, const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const { // For each address expression to be modified, creates a clone of it with its @@ -1178,7 +1178,7 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( I = skipToNextUser(I, E); if (isSimplePointerUseValidToReplace( - TTI, U, V->getType()->getPointerAddressSpace())) { + *TTI, U, V->getType()->getPointerAddressSpace())) { // If V is used as the pointer operand of a compatible memory operation, // sets the pointer operand to NewV. This replacement does not change // the element type, so the resultant load/store is still valid. @@ -1239,8 +1239,16 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( if (!cast(ASC->getType()) ->hasSameElementTypeAs( cast(NewV->getType()))) { + BasicBlock::iterator InsertPos; + if (Instruction *NewVInst = dyn_cast(NewV)) + InsertPos = std::next(NewVInst->getIterator()); + else if (Instruction *VInst = dyn_cast(V)) + InsertPos = std::next(VInst->getIterator()); + else + InsertPos = ASC->getIterator(); + NewV = CastInst::Create(Instruction::BitCast, NewV, - ASC->getType(), "", ASC); + ASC->getType(), "", &*InsertPos); } ASC->replaceAllUsesWith(NewV); DeadInstructions.push_back(ASC); @@ -1249,12 +1257,18 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( } // Otherwise, replaces the use with flat(NewV). - if (Instruction *Inst = dyn_cast(V)) { + if (Instruction *VInst = dyn_cast(V)) { // Don't create a copy of the original addrspacecast. if (U == V && isa(V)) continue; - BasicBlock::iterator InsertPos = std::next(Inst->getIterator()); + // Insert the addrspacecast after NewV. + BasicBlock::iterator InsertPos; + if (Instruction *NewVInst = dyn_cast(NewV)) + InsertPos = std::next(NewVInst->getIterator()); + else + InsertPos = std::next(VInst->getIterator()); + while (isa(InsertPos)) ++InsertPos; U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos)); diff --git a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp index c11d2e4c1d6b..4644905adba3 100644 --- a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp +++ b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp @@ -7,21 +7,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/InstSimplifyPass.h" -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -55,7 +51,7 @@ static bool runImpl(Function &F, const SimplifyQuery &SQ, DeadInstsInBB.push_back(&I); Changed = true; } else if (!I.use_empty()) { - if (Value *V = SimplifyInstruction(&I, SQ, ORE)) { + if (Value *V = simplifyInstruction(&I, SQ, ORE)) { // Mark all uses for resimplification next time round the loop. for (User *U : I.users()) Next->insert(cast(U)); diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index a3efad104ca6..5caefc422921 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -56,7 +56,6 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" -#include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -74,7 +73,6 @@ #include "llvm/Transforms/Utils/ValueMapper.h" #include #include -#include #include #include #include @@ -106,11 +104,6 @@ static cl::opt PrintLVIAfterJumpThreading( cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), cl::Hidden); -static cl::opt JumpThreadingFreezeSelectCond( - "jump-threading-freeze-select-cond", - cl::desc("Freeze the condition when unfolding select"), cl::init(false), - cl::Hidden); - static cl::opt ThreadAcrossLoopHeaders( "jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), @@ -140,8 +133,7 @@ namespace { public: static char ID; // Pass identification - JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1) - : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) { + JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) { initializeJumpThreadingPass(*PassRegistry::getPassRegistry()); } @@ -175,12 +167,11 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading", "Jump Threading", false, false) // Public interface to the Jump Threading pass -FunctionPass *llvm::createJumpThreadingPass(bool InsertFr, int Threshold) { - return new JumpThreading(InsertFr, Threshold); +FunctionPass *llvm::createJumpThreadingPass(int Threshold) { + return new JumpThreading(Threshold); } -JumpThreadingPass::JumpThreadingPass(bool InsertFr, int T) { - InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr; +JumpThreadingPass::JumpThreadingPass(int T) { DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T); } @@ -326,7 +317,7 @@ bool JumpThreading::runOnFunction(Function &F) { std::unique_ptr BFI; std::unique_ptr BPI; if (F.hasProfileData()) { - LoopInfo LI{DominatorTree(F)}; + LoopInfo LI{*DT}; BPI.reset(new BranchProbabilityInfo(F, LI, TLI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } @@ -491,14 +482,16 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, // at the end of block. RAUW unconditionally replaces all uses // including the guards/assumes themselves and the uses before the // guard/assume. -static void replaceFoldableUses(Instruction *Cond, Value *ToVal) { +static bool replaceFoldableUses(Instruction *Cond, Value *ToVal, + BasicBlock *KnownAtEndOfBB) { + bool Changed = false; assert(Cond->getType() == ToVal->getType()); - auto *BB = Cond->getParent(); // We can unconditionally replace all uses in non-local blocks (i.e. uses // strictly dominated by BB), since LVI information is true from the // terminator of BB. - replaceNonLocalUsesWith(Cond, ToVal); - for (Instruction &I : reverse(*BB)) { + if (Cond->getParent() == KnownAtEndOfBB) + Changed |= replaceNonLocalUsesWith(Cond, ToVal); + for (Instruction &I : reverse(*KnownAtEndOfBB)) { // Reached the Cond whose uses we are trying to replace, so there are no // more uses. if (&I == Cond) @@ -507,10 +500,13 @@ static void replaceFoldableUses(Instruction *Cond, Value *ToVal) { // of BB, where we know Cond is ToVal. if (!isGuaranteedToTransferExecutionToSuccessor(&I)) break; - I.replaceUsesOfWith(Cond, ToVal); + Changed |= I.replaceUsesOfWith(Cond, ToVal); } - if (Cond->use_empty() && !Cond->mayHaveSideEffects()) + if (Cond->use_empty() && !Cond->mayHaveSideEffects()) { Cond->eraseFromParent(); + Changed = true; + } + return Changed; } /// Return the cost of duplicating a piece of this block from first non-phi @@ -792,6 +788,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( if (Preference != WantInteger) return false; if (ConstantInt *CI = dyn_cast(BO->getOperand(1))) { + const DataLayout &DL = BO->getModule()->getDataLayout(); PredValueInfoTy LHSVals; computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals, WantInteger, RecursionSet, CxtI); @@ -799,7 +796,8 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( // Try to use constant folding to simplify the binary operator. for (const auto &LHSVal : LHSVals) { Constant *V = LHSVal.first; - Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI); + Constant *Folded = + ConstantFoldBinaryOpOperands(BO->getOpcode(), V, CI, DL); if (Constant *KC = getKnownConstant(Folded, WantInteger)) Result.emplace_back(KC, LHSVal.second); @@ -835,7 +833,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( LHS = CmpLHS->DoPHITranslation(BB, PredBB); RHS = PN->getIncomingValue(i); } - Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL}); + Value *Res = simplifyCmpInst(Pred, LHS, RHS, {DL}); if (!Res) { if (!isa(RHS)) continue; @@ -1135,34 +1133,21 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) { return ConstantFolded; } - if (CmpInst *CondCmp = dyn_cast(CondInst)) { + // Some of the following optimization can safely work on the unfrozen cond. + Value *CondWithoutFreeze = CondInst; + if (auto *FI = dyn_cast(CondInst)) + CondWithoutFreeze = FI->getOperand(0); + + if (CmpInst *CondCmp = dyn_cast(CondWithoutFreeze)) { // If we're branching on a conditional, LVI might be able to determine // it's value at the branch instruction. We only handle comparisons // against a constant at this time. - // TODO: This should be extended to handle switches as well. - BranchInst *CondBr = dyn_cast(BB->getTerminator()); - Constant *CondConst = dyn_cast(CondCmp->getOperand(1)); - if (CondBr && CondConst) { - // We should have returned as soon as we turn a conditional branch to - // unconditional. Because its no longer interesting as far as jump - // threading is concerned. - assert(CondBr->isConditional() && "Threading on unconditional terminator"); - + if (Constant *CondConst = dyn_cast(CondCmp->getOperand(1))) { LazyValueInfo::Tristate Ret = LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0), - CondConst, CondBr, /*UseBlockValue=*/false); + CondConst, BB->getTerminator(), + /*UseBlockValue=*/false); if (Ret != LazyValueInfo::Unknown) { - unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0; - unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1; - BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove); - ToRemoveSucc->removePredecessor(BB, true); - BranchInst *UncondBr = - BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr); - UncondBr->setDebugLoc(CondBr->getDebugLoc()); - ++NumFolds; - CondBr->eraseFromParent(); - if (CondCmp->use_empty()) - CondCmp->eraseFromParent(); // We can safely replace *some* uses of the CondInst if it has // exactly one value as returned by LVI. RAUW is incorrect in the // presence of guards and assumes, that have the `Cond` as the use. This @@ -1170,17 +1155,11 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) { // at the end of block, but RAUW unconditionally replaces all uses // including the guards/assumes themselves and the uses before the // guard/assume. - else if (CondCmp->getParent() == BB) { - auto *CI = Ret == LazyValueInfo::True ? - ConstantInt::getTrue(CondCmp->getType()) : - ConstantInt::getFalse(CondCmp->getType()); - replaceFoldableUses(CondCmp, CI); - } - DTU->applyUpdatesPermissive( - {{DominatorTree::Delete, BB, ToRemoveSucc}}); - if (HasProfileData) - BPI->eraseBlock(BB); - return true; + auto *CI = Ret == LazyValueInfo::True ? + ConstantInt::getTrue(CondCmp->getType()) : + ConstantInt::getFalse(CondCmp->getType()); + if (replaceFoldableUses(CondCmp, CI, BB)) + return true; } // We did not manage to simplify this branch, try to see whether @@ -1198,11 +1177,7 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) { // for loads that are used by a switch or by the condition for the branch. If // we see one, check to see if it's partially redundant. If so, insert a PHI // which can then be used to thread the values. - Value *SimplifyValue = CondInst; - - if (auto *FI = dyn_cast(SimplifyValue)) - // Look into freeze's operand - SimplifyValue = FI->getOperand(0); + Value *SimplifyValue = CondWithoutFreeze; if (CmpInst *CondCmp = dyn_cast(SimplifyValue)) if (isa(CondCmp->getOperand(1))) @@ -1227,10 +1202,7 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) { // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in // the current block, see if we can simplify. - PHINode *PN = dyn_cast( - isa(CondInst) ? cast(CondInst)->getOperand(0) - : CondInst); - + PHINode *PN = dyn_cast(CondWithoutFreeze); if (PN && PN->getParent() == BB && isa(BB->getTerminator())) return processBranchOnPHI(PN); @@ -1253,6 +1225,17 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) { return false; Value *Cond = BI->getCondition(); + // Assuming that predecessor's branch was taken, if pred's branch condition + // (V) implies Cond, Cond can be either true, undef, or poison. In this case, + // freeze(Cond) is either true or a nondeterministic value. + // If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true + // without affecting other instructions. + auto *FICond = dyn_cast(Cond); + if (FICond && FICond->hasOneUse()) + Cond = FICond->getOperand(0); + else + FICond = nullptr; + BasicBlock *CurrentBB = BB; BasicBlock *CurrentPred = BB->getSinglePredecessor(); unsigned Iter = 0; @@ -1269,6 +1252,15 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) { bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB; Optional Implication = isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue); + + // If the branch condition of BB (which is Cond) and CurrentPred are + // exactly the same freeze instruction, Cond can be folded into CondIsTrue. + if (!Implication && FICond && isa(PBI->getCondition())) { + if (cast(PBI->getCondition())->getOperand(0) == + FICond->getOperand(0)) + Implication = CondIsTrue; + } + if (Implication) { BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1); BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0); @@ -1277,6 +1269,9 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) { UncondBI->setDebugLoc(BI->getDebugLoc()); ++NumFolds; BI->eraseFromParent(); + if (FICond) + FICond->eraseFromParent(); + DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}}); if (HasProfileData) BPI->eraseBlock(BB); @@ -1338,10 +1333,10 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { combineMetadataForCSE(NLoadI, LoadI, false); }; - // If the returned value is the load itself, replace with an undef. This can + // If the returned value is the load itself, replace with poison. This can // only happen in dead loops. if (AvailableVal == LoadI) - AvailableVal = UndefValue::get(LoadI->getType()); + AvailableVal = PoisonValue::get(LoadI->getType()); if (AvailableVal->getType() != LoadI->getType()) AvailableVal = CastInst::CreateBitOrPointerCast( AvailableVal, LoadI->getType(), "", LoadI); @@ -1566,10 +1561,8 @@ findMostPopularDest(BasicBlock *BB, DestPopularity[PredToDest.second]++; // Find the most popular dest. - using VT = decltype(DestPopularity)::value_type; auto MostPopular = std::max_element( - DestPopularity.begin(), DestPopularity.end(), - [](const VT &L, const VT &R) { return L.second < R.second; }); + DestPopularity.begin(), DestPopularity.end(), llvm::less_second()); // Okay, we have finally picked the most popular destination. return MostPopular->first; @@ -1742,9 +1735,8 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB, // at the end of block, but RAUW unconditionally replaces all uses // including the guards/assumes themselves and the uses before the // guard/assume. - else if (OnlyVal && OnlyVal != MultipleVal && - CondInst->getParent() == BB) - replaceFoldableUses(CondInst, OnlyVal); + else if (OnlyVal && OnlyVal != MultipleVal) + replaceFoldableUses(CondInst, OnlyVal, BB); } return true; } @@ -2672,7 +2664,7 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred( // If this instruction can be simplified after the operands are updated, // just use the simplified value instead. This frequently happens due to // phi translation. - if (Value *IV = SimplifyInstruction( + if (Value *IV = simplifyInstruction( New, {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) { ValueMapping[&*BI] = IV; @@ -2912,9 +2904,7 @@ bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) { continue; // Expand the select. Value *Cond = SI->getCondition(); - if (InsertFreezeWhenUnfoldingSelect && - !isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI, - &DTU->getDomTree())) + if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI)) Cond = new FreezeInst(Cond, "cond.fr", SI); Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false); BasicBlock *SplitBB = SI->getParent(); diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 7fb1a25bdf13..492f4e40395a 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -37,29 +37,27 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/ADT/PriorityWorklist.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -78,7 +76,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -88,6 +85,11 @@ #include using namespace llvm; +namespace llvm { +class BlockFrequencyInfo; +class LPMUpdater; +} // namespace llvm + #define DEBUG_TYPE "licm" STATISTIC(NumCreatedBlocks, "Number of blocks created"); @@ -114,8 +116,7 @@ static cl::opt MaxNumUsesTraversed( // Experimental option to allow imprecision in LICM in pathological cases, in // exchange for faster compile. This is to be removed if MemorySSA starts to -// address the same issue. This flag applies only when LICM uses MemorySSA -// instead on AliasSetTracker. LICM calls MemorySSAWalker's +// address the same issue. LICM calls MemorySSAWalker's // getClobberingMemoryAccess, up to the value of the Cap, getting perfect // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess, // which may not be precise, since optimizeUses is capped. The result is @@ -143,37 +144,32 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, bool LoopNestMode); static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, - MemorySSAUpdater *MSSAU, ScalarEvolution *SE, + MemorySSAUpdater &MSSAU, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, const Loop *CurLoop, - ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, + ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE); -static bool isSafeToExecuteUnconditionally(Instruction &Inst, - const DominatorTree *DT, - const TargetLibraryInfo *TLI, - const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, - const Instruction *CtxI = nullptr); -static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, - AliasSetTracker *CurAST, Loop *CurLoop, - AAResults *AA); -static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, - Loop *CurLoop, Instruction &I, - SinkAndHoistLICMFlags &Flags); -static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, - MemoryUse &MU); +static bool isSafeToExecuteUnconditionally( + Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, + const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, const Instruction *CtxI, + bool AllowSpeculation); +static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU, + Loop *CurLoop, Instruction &I, + SinkAndHoistLICMFlags &Flags); +static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, + MemoryUse &MU); static Instruction *cloneInstructionInExitBlock( Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, - const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU); + const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU); static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater *MSSAU); + MemorySSAUpdater &MSSAU); static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater *MSSAU, ScalarEvolution *SE); + MemorySSAUpdater &MSSAU, ScalarEvolution *SE); static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, function_ref Fn); @@ -188,21 +184,26 @@ struct LoopInvariantCodeMotion { OptimizationRemarkEmitter *ORE, bool LoopNestMode = false); LoopInvariantCodeMotion(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap) + unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(LicmAllowSpeculation) {} private: unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + bool LicmAllowSpeculation; }; struct LegacyLICMPass : public LoopPass { static char ID; // Pass identification, replacement for typeid LegacyLICMPass( unsigned LicmMssaOptCap = SetLicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap) - : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) { + unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation = true) + : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmAllowSpeculation) { initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); } @@ -265,7 +266,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); - LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap, + Opts.AllowSpeculation); if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE)) return PreservedAnalyses::all(); @@ -279,6 +281,16 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, return PA; } +void LICMPass::printPipeline( + raw_ostream &OS, function_ref MapClassName2PassName) { + static_cast *>(this)->printPipeline( + OS, MapClassName2PassName); + + OS << "<"; + OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation"; + OS << ">"; +} + PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &) { @@ -290,7 +302,8 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM, // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(LN.getParent()); - LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap, + Opts.AllowSpeculation); Loop &OutermostLoop = LN.getOutermostLoop(); bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI, @@ -308,6 +321,16 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM, return PA; } +void LNICMPass::printPipeline( + raw_ostream &OS, function_ref MapClassName2PassName) { + static_cast *>(this)->printPipeline( + OS, MapClassName2PassName); + + OS << "<"; + OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation"; + OS << ">"; +} + char LegacyLICMPass::ID = 0; INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) @@ -321,8 +344,10 @@ INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } Pass *llvm::createLICMPass(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap) { - return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) { + return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmAllowSpeculation); } llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L, @@ -365,6 +390,7 @@ bool LoopInvariantCodeMotion::runOnLoop( bool Changed = false; assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); + MSSA->ensureOptimizedUses(); // If this loop has metadata indicating that LICM is not to be performed then // just exit. @@ -411,14 +437,15 @@ bool LoopInvariantCodeMotion::runOnLoop( if (L->hasDedicatedExits()) Changed |= LoopNestMode ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, - DT, BFI, TLI, TTI, L, &MSSAU, + DT, BFI, TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE) : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, - TLI, TTI, L, &MSSAU, &SafetyInfo, Flags, ORE); + TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE); Flags.setIsSink(false); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, - &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode); + MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode, + LicmAllowSpeculation); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -451,8 +478,7 @@ bool LoopInvariantCodeMotion::runOnLoop( PredIteratorCache PIC; // Promoting one set of accesses may make the pointers for another set - // loop invariant, so run this in a loop (with the MaybePromotable set - // decreasing in size over time). + // loop invariant, so run this in a loop. bool Promoted = false; bool LocalPromoted; do { @@ -460,8 +486,8 @@ bool LoopInvariantCodeMotion::runOnLoop( for (const SmallSetVector &PointerMustAliases : collectPromotionCandidates(MSSA, AA, L)) { LocalPromoted |= promoteLoopAccessesToScalars( - PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, - LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE); + PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, + DT, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation); } Promoted |= LocalPromoted; } while (LocalPromoted); @@ -502,17 +528,17 @@ bool LoopInvariantCodeMotion::runOnLoop( bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, - Loop *CurLoop, MemorySSAUpdater *MSSAU, + Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && - CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr && + CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected input to sinkRegion."); - // We want to visit children before parents. We will enque all the parents + // We want to visit children before parents. We will enqueue all the parents // before their children in the worklist and process the worklist in reverse // order. SmallVector Worklist = collectChildrenInLoop(N, CurLoop); @@ -550,8 +576,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, if (!I.mayHaveSideEffects() && isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop, SafetyInfo, TTI, FreeInLoop, LoopNestMode) && - canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/nullptr, MSSAU, true, - &Flags, ORE)) { + canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) { if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) { if (!FreeInLoop) { ++II; @@ -564,14 +589,14 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, } } if (VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); + MSSAU.getMemorySSA()->verifyMemorySSA(); return Changed; } bool llvm::sinkRegionForLoopNest( DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, - Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, + Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) { bool Changed = false; @@ -600,7 +625,7 @@ private: LoopInfo *LI; DominatorTree *DT; Loop *CurLoop; - MemorySSAUpdater *MSSAU; + MemorySSAUpdater &MSSAU; // A map of blocks in the loop to the block their instructions will be hoisted // to. @@ -612,7 +637,7 @@ private: public: ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop, - MemorySSAUpdater *MSSAU) + MemorySSAUpdater &MSSAU) : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {} void registerPossiblyHoistableBranch(BranchInst *BI) { @@ -788,7 +813,7 @@ public: if (HoistTarget == InitialPreheader) { // Phis in the loop header now need to use the new preheader. InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc); - MSSAU->wireOldPredecessorsToNewImmediatePredecessor( + MSSAU.wireOldPredecessorsToNewImmediatePredecessor( HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget}); // The new preheader dominates the loop header. DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc); @@ -822,13 +847,14 @@ public: bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, Loop *CurLoop, - MemorySSAUpdater *MSSAU, ScalarEvolution *SE, + MemorySSAUpdater &MSSAU, ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, - OptimizationRemarkEmitter *ORE, bool LoopNestMode) { + OptimizationRemarkEmitter *ORE, bool LoopNestMode, + bool AllowSpeculation) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && - CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr && + CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected input to hoistRegion."); ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU); @@ -873,11 +899,10 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, // and we have accurately duplicated the control flow from the loop header // to that block. if (CurLoop->hasLoopInvariantOperands(&I) && - canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/ nullptr, MSSAU, - true, &Flags, ORE) && + canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) && isSafeToExecuteUnconditionally( I, DT, TLI, CurLoop, SafetyInfo, ORE, - CurLoop->getLoopPreheader()->getTerminator())) { + CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) { hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, MSSAU, SE, ORE); HoistedInstructions.push_back(&I); @@ -982,7 +1007,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, } } if (VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); + MSSAU.getMemorySSA()->verifyMemorySSA(); // Now that we've finished hoisting make sure that LI and DT are still // valid. @@ -1083,30 +1108,19 @@ bool isHoistableAndSinkableInst(Instruction &I) { isa(I) || isa(I) || isa(I) || isa(I)); } -/// Return true if all of the alias sets within this AST are known not to -/// contain a Mod, or if MSSA knows there are no MemoryDefs in the loop. -bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU, - const Loop *L) { - if (CurAST) { - for (AliasSet &AS : *CurAST) { - if (!AS.isForwardingAliasSet() && AS.isMod()) { - return false; - } - } - return true; - } else { /*MSSAU*/ - for (auto *BB : L->getBlocks()) - if (MSSAU->getMemorySSA()->getBlockDefs(BB)) - return false; - return true; - } +/// Return true if MSSA knows there are no MemoryDefs in the loop. +bool isReadOnly(const MemorySSAUpdater &MSSAU, const Loop *L) { + for (auto *BB : L->getBlocks()) + if (MSSAU.getMemorySSA()->getBlockDefs(BB)) + return false; + return true; } /// Return true if I is the only Instruction with a MemoryAccess in L. bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, - const MemorySSAUpdater *MSSAU) { + const MemorySSAUpdater &MSSAU) { for (auto *BB : L->getBlocks()) - if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) { + if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) { int NotAPhi = 0; for (const auto &Acc : *Accs) { if (isa(&Acc)) @@ -1121,22 +1135,15 @@ bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, } bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, - Loop *CurLoop, AliasSetTracker *CurAST, - MemorySSAUpdater *MSSAU, + Loop *CurLoop, MemorySSAUpdater &MSSAU, bool TargetExecutesOncePerLoop, - SinkAndHoistLICMFlags *Flags, + SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) { - assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) && - "Either AliasSetTracker or MemorySSA should be initialized."); - // If we don't understand the instruction, bail early. if (!isHoistableAndSinkableInst(I)) return false; - MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr; - if (MSSA) - assert(Flags != nullptr && "Flags cannot be null."); - + MemorySSA *MSSA = MSSAU.getMemorySSA(); // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast(&I)) { if (!LI->isUnordered()) @@ -1156,13 +1163,8 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, if (isLoadInvariantInLoop(LI, DT, CurLoop)) return true; - bool Invalidated; - if (CurAST) - Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST, - CurLoop, AA); - else - Invalidated = pointerInvalidatedByLoopWithMSSA( - MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags); + bool Invalidated = pointerInvalidatedByLoop( + MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop, I, Flags); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) @@ -1210,24 +1212,17 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, if (AAResults::onlyAccessesArgPointees(Behavior)) { // TODO: expand to writeable arguments for (Value *Op : CI->args()) - if (Op->getType()->isPointerTy()) { - bool Invalidated; - if (CurAST) - Invalidated = pointerInvalidatedByLoop( - MemoryLocation::getBeforeOrAfter(Op), CurAST, CurLoop, AA); - else - Invalidated = pointerInvalidatedByLoopWithMSSA( + if (Op->getType()->isPointerTy() && + pointerInvalidatedByLoop( MSSA, cast(MSSA->getMemoryAccess(CI)), CurLoop, I, - *Flags); - if (Invalidated) - return false; - } + Flags)) + return false; return true; } // If this call only reads from memory and there are no writes to memory // in the loop, we can hoist or sink the call as appropriate. - if (isReadOnly(CurAST, MSSAU, CurLoop)) + if (isReadOnly(MSSAU, CurLoop)) return true; } @@ -1238,21 +1233,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, } else if (auto *FI = dyn_cast(&I)) { // Fences alias (most) everything to provide ordering. For the moment, // just give up if there are any other memory operations in the loop. - if (CurAST) { - auto Begin = CurAST->begin(); - assert(Begin != CurAST->end() && "must contain FI"); - if (std::next(Begin) != CurAST->end()) - // constant memory for instance, TODO: handle better - return false; - auto *UniqueI = Begin->getUniqueInstruction(); - if (!UniqueI) - // other memory op, give up - return false; - (void)FI; // suppress unused variable warning - assert(UniqueI == FI && "AS must contain FI"); - return true; - } else // MSSAU - return isOnlyMemoryAccess(FI, CurLoop, MSSAU); + return isOnlyMemoryAccess(FI, CurLoop, MSSAU); } else if (auto *SI = dyn_cast(&I)) { if (!SI->isUnordered()) return false; // Don't sink/hoist volatile or ordered atomic store! @@ -1262,68 +1243,54 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, // load store promotion instead. TODO: We can extend this to cases where // there is exactly one write to the location and that write dominates an // arbitrary number of reads in the loop. - if (CurAST) { - auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI)); - - if (AS.isRef() || !AS.isMustAlias()) - // Quick exit test, handled by the full path below as well. - return false; - auto *UniqueI = AS.getUniqueInstruction(); - if (!UniqueI) - // other memory op, give up - return false; - assert(UniqueI == SI && "AS must contain SI"); + if (isOnlyMemoryAccess(SI, CurLoop, MSSAU)) return true; - } else { // MSSAU - if (isOnlyMemoryAccess(SI, CurLoop, MSSAU)) - return true; - // If there are more accesses than the Promotion cap or no "quota" to - // check clobber, then give up as we're not walking a list that long. - if (Flags->tooManyMemoryAccesses() || Flags->tooManyClobberingCalls()) - return false; - // If there are interfering Uses (i.e. their defining access is in the - // loop), or ordered loads (stored as Defs!), don't move this store. - // Could do better here, but this is conservatively correct. - // TODO: Cache set of Uses on the first walk in runOnLoop, update when - // moving accesses. Can also extend to dominating uses. - auto *SIMD = MSSA->getMemoryAccess(SI); - for (auto *BB : CurLoop->getBlocks()) - if (auto *Accesses = MSSA->getBlockAccesses(BB)) { - for (const auto &MA : *Accesses) - if (const auto *MU = dyn_cast(&MA)) { - auto *MD = MU->getDefiningAccess(); - if (!MSSA->isLiveOnEntryDef(MD) && - CurLoop->contains(MD->getBlock())) - return false; - // Disable hoisting past potentially interfering loads. Optimized - // Uses may point to an access outside the loop, as getClobbering - // checks the previous iteration when walking the backedge. - // FIXME: More precise: no Uses that alias SI. - if (!Flags->getIsSink() && !MSSA->dominates(SIMD, MU)) - return false; - } else if (const auto *MD = dyn_cast(&MA)) { - if (auto *LI = dyn_cast(MD->getMemoryInst())) { - (void)LI; // Silence warning. - assert(!LI->isUnordered() && "Expected unordered load"); + // If there are more accesses than the Promotion cap or no "quota" to + // check clobber, then give up as we're not walking a list that long. + if (Flags.tooManyMemoryAccesses() || Flags.tooManyClobberingCalls()) + return false; + // If there are interfering Uses (i.e. their defining access is in the + // loop), or ordered loads (stored as Defs!), don't move this store. + // Could do better here, but this is conservatively correct. + // TODO: Cache set of Uses on the first walk in runOnLoop, update when + // moving accesses. Can also extend to dominating uses. + auto *SIMD = MSSA->getMemoryAccess(SI); + for (auto *BB : CurLoop->getBlocks()) + if (auto *Accesses = MSSA->getBlockAccesses(BB)) { + for (const auto &MA : *Accesses) + if (const auto *MU = dyn_cast(&MA)) { + auto *MD = MU->getDefiningAccess(); + if (!MSSA->isLiveOnEntryDef(MD) && + CurLoop->contains(MD->getBlock())) + return false; + // Disable hoisting past potentially interfering loads. Optimized + // Uses may point to an access outside the loop, as getClobbering + // checks the previous iteration when walking the backedge. + // FIXME: More precise: no Uses that alias SI. + if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU)) + return false; + } else if (const auto *MD = dyn_cast(&MA)) { + if (auto *LI = dyn_cast(MD->getMemoryInst())) { + (void)LI; // Silence warning. + assert(!LI->isUnordered() && "Expected unordered load"); + return false; + } + // Any call, while it may not be clobbering SI, it may be a use. + if (auto *CI = dyn_cast(MD->getMemoryInst())) { + // Check if the call may read from the memory location written + // to by SI. Check CI's attributes and arguments; the number of + // such checks performed is limited above by NoOfMemAccTooLarge. + ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI)); + if (isModOrRefSet(MRI)) return false; - } - // Any call, while it may not be clobbering SI, it may be a use. - if (auto *CI = dyn_cast(MD->getMemoryInst())) { - // Check if the call may read from the memory location written - // to by SI. Check CI's attributes and arguments; the number of - // such checks performed is limited above by NoOfMemAccTooLarge. - ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI)); - if (isModOrRefSet(MRI)) - return false; - } } - } - auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); - Flags->incrementClobberingCalls(); - // If there are no clobbering Defs in the loop, store is safe to hoist. - return MSSA->isLiveOnEntryDef(Source) || - !CurLoop->contains(Source->getBlock()); - } + } + } + auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); + Flags.incrementClobberingCalls(); + // If there are no clobbering Defs in the loop, store is safe to hoist. + return MSSA->isLiveOnEntryDef(Source) || + !CurLoop->contains(Source->getBlock()); } assert(!I.mayReadOrWriteMemory() && "unhandled aliasing"); @@ -1421,7 +1388,7 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, static Instruction *cloneInstructionInExitBlock( Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, - const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) { + const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) { Instruction *New; if (auto *CI = dyn_cast(&I)) { const auto &BlockColors = SafetyInfo->getBlockColors(); @@ -1457,16 +1424,16 @@ static Instruction *cloneInstructionInExitBlock( if (!I.getName().empty()) New->setName(I.getName() + ".le"); - if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) { + if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) { // Create a new MemoryAccess and let MemorySSA set its defining access. - MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB( + MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB( New, nullptr, New->getParent(), MemorySSA::Beginning); if (NewMemAcc) { if (auto *MemDef = dyn_cast(NewMemAcc)) - MSSAU->insertDef(MemDef, /*RenameUses=*/true); + MSSAU.insertDef(MemDef, /*RenameUses=*/true); else { auto *MemUse = cast(NewMemAcc); - MSSAU->insertUse(MemUse, /*RenameUses=*/true); + MSSAU.insertUse(MemUse, /*RenameUses=*/true); } } } @@ -1492,25 +1459,22 @@ static Instruction *cloneInstructionInExitBlock( } static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater *MSSAU) { - if (MSSAU) - MSSAU->removeMemoryAccess(&I); + MemorySSAUpdater &MSSAU) { + MSSAU.removeMemoryAccess(&I); SafetyInfo.removeInstruction(&I); I.eraseFromParent(); } static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater *MSSAU, + MemorySSAUpdater &MSSAU, ScalarEvolution *SE) { SafetyInfo.removeInstruction(&I); SafetyInfo.insertInstructionTo(&I, Dest.getParent()); I.moveBefore(&Dest); - if (MSSAU) - if (MemoryUseOrDef *OldMemAcc = cast_or_null( - MSSAU->getMemorySSA()->getMemoryAccess(&I))) - MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), - MemorySSA::BeforeTerminator); + if (MemoryUseOrDef *OldMemAcc = cast_or_null( + MSSAU.getMemorySSA()->getMemoryAccess(&I))) + MSSAU.moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::BeforeTerminator); if (SE) SE->forgetValue(&I); } @@ -1519,7 +1483,7 @@ static Instruction *sinkThroughTriviallyReplaceablePHI( PHINode *TPN, Instruction *I, LoopInfo *LI, SmallDenseMap &SunkCopies, const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, - MemorySSAUpdater *MSSAU) { + MemorySSAUpdater &MSSAU) { assert(isTriviallyReplaceablePHI(*TPN, *I) && "Expect only trivially replaceable PHI"); BasicBlock *ExitBlock = TPN->getParent(); @@ -1625,7 +1589,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, /// static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, const Loop *CurLoop, - ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, + ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE) { bool Changed = false; LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); @@ -1642,7 +1606,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, continue; if (!DT->isReachableFromEntry(User->getParent())) { - U = UndefValue::get(I.getType()); + U = PoisonValue::get(I.getType()); Changed = true; continue; } @@ -1655,7 +1619,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, // unreachable. BasicBlock *BB = PN->getIncomingBlock(U); if (!DT->isReachableFromEntry(BB)) { - U = UndefValue::get(I.getType()); + U = PoisonValue::get(I.getType()); Changed = true; continue; } @@ -1669,7 +1633,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, // Split predecessors of the PHI so that we can make users trivially // replaceable. - splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU); + splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, &MSSAU); // Should rebuild the iterators, as they may be invalidated by // splitPredecessorsOfLoopExit(). @@ -1720,7 +1684,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, Instruction *New = sinkThroughTriviallyReplaceablePHI( PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU); PN->replaceAllUsesWith(New); - eraseInstruction(*PN, *SafetyInfo, nullptr); + eraseInstruction(*PN, *SafetyInfo, MSSAU); Changed = true; } return Changed; @@ -1731,7 +1695,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, /// static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, - MemorySSAUpdater *MSSAU, ScalarEvolution *SE, + MemorySSAUpdater &MSSAU, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE) { LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": " << I << "\n"); @@ -1774,14 +1738,12 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, /// Only sink or hoist an instruction if it is not a trapping instruction, /// or if the instruction is known not to trap when moved to the preheader. /// or if it is a trapping instruction and is guaranteed to execute. -static bool isSafeToExecuteUnconditionally(Instruction &Inst, - const DominatorTree *DT, - const TargetLibraryInfo *TLI, - const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, - const Instruction *CtxI) { - if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) +static bool isSafeToExecuteUnconditionally( + Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, + const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, const Instruction *CtxI, + bool AllowSpeculation) { + if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) return true; bool GuaranteedToExecute = @@ -1809,7 +1771,7 @@ class LoopPromoter : public LoadAndStorePromoter { SmallVectorImpl &LoopInsertPts; SmallVectorImpl &MSSAInsertPts; PredIteratorCache &PredCache; - MemorySSAUpdater *MSSAU; + MemorySSAUpdater &MSSAU; LoopInfo &LI; DebugLoc DL; Align Alignment; @@ -1841,7 +1803,7 @@ public: SmallVectorImpl &LEB, SmallVectorImpl &LIP, SmallVectorImpl &MSSAIP, PredIteratorCache &PIC, - MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl, + MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl, Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), @@ -1883,14 +1845,14 @@ public: MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i]; MemoryAccess *NewMemAcc; if (!MSSAInsertPoint) { - NewMemAcc = MSSAU->createMemoryAccessInBB( + NewMemAcc = MSSAU.createMemoryAccessInBB( NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning); } else { NewMemAcc = - MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint); + MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint); } MSSAInsertPts[i] = NewMemAcc; - MSSAU->insertDef(cast(NewMemAcc), true); + MSSAU.insertDef(cast(NewMemAcc), true); // FIXME: true for safety, false may still be correct. } } @@ -1902,7 +1864,7 @@ public: void instructionDeleted(Instruction *I) const override { SafetyInfo.removeInstruction(I); - MSSAU->removeMemoryAccess(I); + MSSAU.removeMemoryAccess(I); } bool shouldDelete(Instruction *I) const override { @@ -1948,8 +1910,8 @@ bool llvm::promoteLoopAccessesToScalars( SmallVectorImpl &InsertPts, SmallVectorImpl &MSSAInsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, - Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE) { + Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, bool AllowSpeculation) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && @@ -1997,6 +1959,7 @@ bool llvm::promoteLoopAccessesToScalars( bool DereferenceableInPH = false; bool SafeToInsertStore = false; + bool StoreIsGuanteedToExecute = false; bool FoundLoadToPromote = false; SmallVector LoopUses; @@ -2031,9 +1994,9 @@ bool llvm::promoteLoopAccessesToScalars( // different sizes. While we are at it, collect alignment and AA info. Type *AccessTy = nullptr; for (Value *ASIV : PointerMustAliases) { - for (User *U : ASIV->users()) { + for (Use &U : ASIV->uses()) { // Ignore instructions that are outside the loop. - Instruction *UI = dyn_cast(U); + Instruction *UI = dyn_cast(U.getUser()); if (!UI || !CurLoop->contains(UI)) continue; @@ -2054,16 +2017,16 @@ bool llvm::promoteLoopAccessesToScalars( // to execute does as well. Thus we can increase our guaranteed // alignment as well. if (!DereferenceableInPH || (InstAlignment > Alignment)) - if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop, - SafetyInfo, ORE, - Preheader->getTerminator())) { + if (isSafeToExecuteUnconditionally( + *Load, DT, TLI, CurLoop, SafetyInfo, ORE, + Preheader->getTerminator(), AllowSpeculation)) { DereferenceableInPH = true; Alignment = std::max(Alignment, InstAlignment); } } else if (const StoreInst *Store = dyn_cast(UI)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. - if (UI->getOperand(1) != ASIV) + if (U.getOperandNo() != StoreInst::getPointerOperandIndex()) continue; if (!Store->isUnordered()) return false; @@ -2077,10 +2040,12 @@ bool llvm::promoteLoopAccessesToScalars( // alignment than any other guaranteed stores, in which case we can // raise the alignment on the promoted store. Align InstAlignment = Store->getAlign(); - + bool GuaranteedToExecute = + SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop); + StoreIsGuanteedToExecute |= GuaranteedToExecute; if (!DereferenceableInPH || !SafeToInsertStore || (InstAlignment > Alignment)) { - if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) { + if (GuaranteedToExecute) { DereferenceableInPH = true; SafeToInsertStore = true; Alignment = std::max(Alignment, InstAlignment); @@ -2194,32 +2159,37 @@ bool llvm::promoteLoopAccessesToScalars( // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. - LoadInst *PreheaderLoad = new LoadInst( - AccessTy, SomePtr, SomePtr->getName() + ".promoted", - Preheader->getTerminator()); - if (SawUnorderedAtomic) - PreheaderLoad->setOrdering(AtomicOrdering::Unordered); - PreheaderLoad->setAlignment(Alignment); - PreheaderLoad->setDebugLoc(DebugLoc()); - if (AATags) - PreheaderLoad->setAAMetadata(AATags); - SSA.AddAvailableValue(Preheader, PreheaderLoad); - - MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB( - PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); - MemoryUse *NewMemUse = cast(PreheaderLoadMemoryAccess); - MSSAU->insertUse(NewMemUse, /*RenameUses=*/true); + LoadInst *PreheaderLoad = nullptr; + if (FoundLoadToPromote || !StoreIsGuanteedToExecute) { + PreheaderLoad = + new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted", + Preheader->getTerminator()); + if (SawUnorderedAtomic) + PreheaderLoad->setOrdering(AtomicOrdering::Unordered); + PreheaderLoad->setAlignment(Alignment); + PreheaderLoad->setDebugLoc(DebugLoc()); + if (AATags) + PreheaderLoad->setAAMetadata(AATags); + + MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB( + PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); + MemoryUse *NewMemUse = cast(PreheaderLoadMemoryAccess); + MSSAU.insertUse(NewMemUse, /*RenameUses=*/true); + SSA.AddAvailableValue(Preheader, PreheaderLoad); + } else { + SSA.AddAvailableValue(Preheader, PoisonValue::get(AccessTy)); + } if (VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); + MSSAU.getMemorySSA()->verifyMemorySSA(); // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); if (VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); + MSSAU.getMemorySSA()->verifyMemorySSA(); // If the SSAUpdater didn't use the load in the preheader, just zap it now. - if (PreheaderLoad->use_empty()) + if (PreheaderLoad && PreheaderLoad->use_empty()) eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU); return true; @@ -2246,8 +2216,7 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) { return false; }; - // Populate AST with potentially promotable accesses and remove them from - // MaybePromotable, so they will not be checked again on the next iteration. + // Populate AST with potentially promotable accesses. SmallPtrSet AttemptingPromotion; foreachMemoryAccess(MSSA, L, [&](Instruction *I) { if (IsPotentiallyPromotable(I)) { @@ -2286,15 +2255,9 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) { return Result; } -static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, - AliasSetTracker *CurAST, Loop *CurLoop, - AAResults *AA) { - return CurAST->getAliasSetFor(MemLoc).isMod(); -} - -bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, - Loop *CurLoop, Instruction &I, - SinkAndHoistLICMFlags &Flags) { +static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU, + Loop *CurLoop, Instruction &I, + SinkAndHoistLICMFlags &Flags) { // For hoisting, use the walker to determine safety if (!Flags.getIsSink()) { MemoryAccess *Source; @@ -2329,17 +2292,16 @@ bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, if (Flags.tooManyMemoryAccesses()) return true; for (auto *BB : CurLoop->getBlocks()) - if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU)) + if (pointerInvalidatedByBlock(*BB, *MSSA, *MU)) return true; // When sinking, the source block may not be part of the loop so check it. if (!CurLoop->contains(&I)) - return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU); + return pointerInvalidatedByBlock(*I.getParent(), *MSSA, *MU); return false; } -bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, - MemoryUse &MU) { +bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) { if (const auto *Accesses = MSSA.getBlockDefs(&BB)) for (const auto &MA : *Accesses) if (const auto *MD = dyn_cast(&MA)) diff --git a/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp b/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp index 1c3ff1a61b7e..c063c0d3c88a 100644 --- a/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp +++ b/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp @@ -8,6 +8,7 @@ #include "llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" using namespace llvm; #define DEBUG_TYPE "loop-accesses" diff --git a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp index d438d56e38ca..2b9800f11912 100644 --- a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp +++ b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp @@ -8,20 +8,15 @@ #include "llvm/Transforms/Scalar/LoopBoundSplit.h" #include "llvm/ADT/Sequence.h" -#include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopSimplify.h" -#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #define DEBUG_TYPE "loop-bound-split" @@ -33,26 +28,23 @@ using namespace PatternMatch; namespace { struct ConditionInfo { /// Branch instruction with this condition - BranchInst *BI; + BranchInst *BI = nullptr; /// ICmp instruction with this condition - ICmpInst *ICmp; + ICmpInst *ICmp = nullptr; /// Preciate info - ICmpInst::Predicate Pred; + ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; /// AddRec llvm value - Value *AddRecValue; + Value *AddRecValue = nullptr; /// Non PHI AddRec llvm value Value *NonPHIAddRecValue; /// Bound llvm value - Value *BoundValue; + Value *BoundValue = nullptr; /// AddRec SCEV - const SCEVAddRecExpr *AddRecSCEV; + const SCEVAddRecExpr *AddRecSCEV = nullptr; /// Bound SCEV - const SCEV *BoundSCEV; + const SCEV *BoundSCEV = nullptr; - ConditionInfo() - : BI(nullptr), ICmp(nullptr), Pred(ICmpInst::BAD_ICMP_PREDICATE), - AddRecValue(nullptr), BoundValue(nullptr), AddRecSCEV(nullptr), - BoundSCEV(nullptr) {} + ConditionInfo() = default; }; } // namespace diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp index 57e36e5b9b90..9590fbbb1994 100644 --- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -30,9 +29,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" -#include "llvm/Transforms/Utils/ValueMapper.h" #define DEBUG_TYPE "loop-data-prefetch" @@ -236,15 +233,14 @@ struct Prefetch { /// The address formula for this prefetch as returned by ScalarEvolution. const SCEVAddRecExpr *LSCEVAddRec; /// The point of insertion for the prefetch instruction. - Instruction *InsertPt; + Instruction *InsertPt = nullptr; /// True if targeting a write memory access. - bool Writes; + bool Writes = false; /// The (first seen) prefetched instruction. - Instruction *MemI; + Instruction *MemI = nullptr; /// Constructor to create a new Prefetch for \p I. - Prefetch(const SCEVAddRecExpr *L, Instruction *I) - : LSCEVAddRec(L), InsertPt(nullptr), Writes(false), MemI(nullptr) { + Prefetch(const SCEVAddRecExpr *L, Instruction *I) : LSCEVAddRec(L) { addInstruction(I); }; @@ -303,7 +299,11 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) { } Metrics.analyzeBasicBlock(BB, *TTI, EphValues); } - unsigned LoopSize = Metrics.NumInsts; + + if (!Metrics.NumInsts.isValid()) + return MadeChange; + + unsigned LoopSize = *Metrics.NumInsts.getValue(); if (!LoopSize) LoopSize = 1; diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp index 361d6c0d9381..93f3cd704196 100644 --- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -17,12 +17,12 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PatternMatch.h" @@ -192,13 +192,13 @@ getValueOnFirstIteration(Value *V, DenseMap &FirstIterValue, getValueOnFirstIteration(BO->getOperand(0), FirstIterValue, SQ); Value *RHS = getValueOnFirstIteration(BO->getOperand(1), FirstIterValue, SQ); - FirstIterV = SimplifyBinOp(BO->getOpcode(), LHS, RHS, SQ); + FirstIterV = simplifyBinOp(BO->getOpcode(), LHS, RHS, SQ); } else if (auto *Cmp = dyn_cast(V)) { Value *LHS = getValueOnFirstIteration(Cmp->getOperand(0), FirstIterValue, SQ); Value *RHS = getValueOnFirstIteration(Cmp->getOperand(1), FirstIterValue, SQ); - FirstIterV = SimplifyICmpInst(Cmp->getPredicate(), LHS, RHS, SQ); + FirstIterV = simplifyICmpInst(Cmp->getPredicate(), LHS, RHS, SQ); } else if (auto *Select = dyn_cast(V)) { Value *Cond = getValueOnFirstIteration(Select->getCondition(), FirstIterValue, SQ); @@ -458,13 +458,13 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, if (ExitBlock && isLoopNeverExecuted(L)) { LLVM_DEBUG(dbgs() << "Loop is proven to never execute, delete it!"); // We need to forget the loop before setting the incoming values of the exit - // phis to undef, so we properly invalidate the SCEV expressions for those + // phis to poison, so we properly invalidate the SCEV expressions for those // phis. SE.forgetLoop(L); - // Set incoming value to undef for phi nodes in the exit block. + // Set incoming value to poison for phi nodes in the exit block. for (PHINode &P : ExitBlock->phis()) { std::fill(P.incoming_values().begin(), P.incoming_values().end(), - UndefValue::get(P.getType())); + PoisonValue::get(P.getType())); } ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "NeverExecutes", L->getStartLoc(), diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp index 0f4c767c1e4c..03a10cb36bb6 100644 --- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp @@ -47,7 +47,6 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" @@ -231,7 +230,7 @@ public: // having to update as many def-use and use-def chains. for (auto *Inst : reverse(Unused)) { if (!Inst->use_empty()) - Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType())); Inst->eraseFromParent(); } } @@ -601,7 +600,7 @@ private: {LLVMLoopDistributeFollowupAll, Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential : LLVMLoopDistributeFollowupCoincident}); - if (PartitionID.hasValue()) { + if (PartitionID) { Loop *NewLoop = Part->getDistributedLoop(); NewLoop->setLoopID(PartitionID.getValue()); } @@ -770,19 +769,19 @@ public: // Don't distribute the loop if we need too many SCEV run-time checks, or // any if it's illegal. - const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate(); + const SCEVPredicate &Pred = LAI->getPSE().getPredicate(); if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) { return fail("RuntimeCheckWithConvergent", "may not insert runtime check with convergent operation"); } - if (Pred.getComplexity() > (IsForced.getValueOr(false) + if (Pred.getComplexity() > (IsForced.value_or(false) ? PragmaDistributeSCEVCheckThreshold : DistributeSCEVCheckThreshold)) return fail("TooManySCEVRuntimeChecks", "too many SCEV run-time checks needed.\n"); - if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L)) + if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L)) return fail("HeuristicDisabled", "distribution heuristic disabled"); LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n"); @@ -859,7 +858,7 @@ public: /// Provide diagnostics then \return with false. bool fail(StringRef RemarkName, StringRef Message) { LLVMContext &Ctx = F->getContext(); - bool Forced = isForced().getValueOr(false); + bool Forced = isForced().value_or(false); LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n"); @@ -991,7 +990,7 @@ static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, // If distribution was forced for the specific loop to be // enabled/disabled, follow that. Otherwise use the global flag. - if (LDL.isForced().getValueOr(EnableLoopDistribute)) + if (LDL.isForced().value_or(EnableLoopDistribute)) Changed |= LDL.processLoop(GetLAA); } diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp index c46db4e63bfe..f36193fc468e 100644 --- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -54,6 +54,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -64,12 +65,12 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" @@ -210,8 +211,9 @@ struct FlattenInfo { if (!MatchedItCount) return false; - // Look through extends if the IV has been widened. - if (Widened && + // Look through extends if the IV has been widened. Don't look through + // extends if we already looked through a trunc. + if (Widened && IsAdd && (isa(MatchedItCount) || isa(MatchedItCount))) { assert(MatchedItCount->getType() == InnerInductionPHI->getType() && "Unexpected type mismatch in types after widening"); @@ -410,7 +412,7 @@ static bool findLoopComponents( // pre-header and one from the latch. The incoming latch value is the // increment variable. Increment = - dyn_cast(InductionPHI->getIncomingValueForBlock(Latch)); + cast(InductionPHI->getIncomingValueForBlock(Latch)); if (Increment->hasNUsesOrMore(3)) { LLVM_DEBUG(dbgs() << "Could not find valid increment\n"); return false; @@ -921,7 +923,7 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM, // this pass will simplify all loops that contain inner loops, // regardless of whether anything ends up being flattened. Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U, - MSSAU.hasValue() ? MSSAU.getPointer() : nullptr); + MSSAU ? MSSAU.getPointer() : nullptr); if (!Changed) return PreservedAnalyses::all(); @@ -987,7 +989,7 @@ bool LoopFlattenLegacyPass::runOnFunction(Function &F) { for (Loop *L : *LI) { auto LN = LoopNest::getLoopNest(*L, *SE); Changed |= Flatten(*LN, DT, LI, SE, AC, TTI, nullptr, - MSSAU.hasValue() ? MSSAU.getPointer() : nullptr); + MSSAU ? MSSAU.getPointer() : nullptr); } return Changed; } diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp index bf4d275e04ba..d94b767c7b63 100644 --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -117,7 +117,7 @@ static cl::opt FusionDependenceAnalysis( "Use the dependence analysis interface"), clEnumValN(FUSION_DEPENDENCE_ANALYSIS_ALL, "all", "Use all available analyses")), - cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL), cl::ZeroOrMore); + cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL)); static cl::opt FusionPeelMaxCount( "loop-fusion-peel-max-count", cl::init(0), cl::Hidden, @@ -128,7 +128,7 @@ static cl::opt FusionPeelMaxCount( static cl::opt VerboseFusionDebugging("loop-fusion-verbose-debug", cl::desc("Enable verbose debugging for Loop Fusion"), - cl::Hidden, cl::init(false), cl::ZeroOrMore); + cl::Hidden, cl::init(false)); #endif namespace { @@ -178,12 +178,12 @@ struct FusionCandidate { /// FusionCandidateCompare function, required by FusionCandidateSet to /// determine where the FusionCandidate should be inserted into the set. These /// are used to establish ordering of the FusionCandidates based on dominance. - const DominatorTree *DT; + DominatorTree &DT; const PostDominatorTree *PDT; OptimizationRemarkEmitter &ORE; - FusionCandidate(Loop *L, const DominatorTree *DT, + FusionCandidate(Loop *L, DominatorTree &DT, const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE, TTI::PeelingPreferences PP) : Preheader(L->getLoopPreheader()), Header(L->getHeader()), @@ -192,7 +192,6 @@ struct FusionCandidate { GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)), Peeled(false), DT(DT), PDT(PDT), ORE(ORE) { - assert(DT && "Expected non-null DT!"); // Walk over all blocks in the loop and check for conditions that may // prevent fusion. For each block, walk over all instructions and collect // the memory reads and writes If any instructions that prevent fusion are @@ -391,7 +390,7 @@ struct FusionCandidateCompare { /// IF RHS dominates LHS and LHS post-dominates RHS, return false; bool operator()(const FusionCandidate &LHS, const FusionCandidate &RHS) const { - const DominatorTree *DT = LHS.DT; + const DominatorTree *DT = &(LHS.DT); BasicBlock *LHSEntryBlock = LHS.getEntryBlock(); BasicBlock *RHSEntryBlock = RHS.getEntryBlock(); @@ -646,7 +645,7 @@ private: for (Loop *L : LV) { TTI::PeelingPreferences PP = gatherPeelingPreferences(L, SE, TTI, None, None); - FusionCandidate CurrCand(L, &DT, &PDT, ORE, PP); + FusionCandidate CurrCand(L, DT, &PDT, ORE, PP); if (!CurrCand.isEligibleForFusion(SE)) continue; @@ -991,7 +990,7 @@ private: FuseCounter); FusionCandidate FusedCand( - performFusion((Peel ? FC0Copy : *FC0), *FC1), &DT, &PDT, ORE, + performFusion((Peel ? FC0Copy : *FC0), *FC1), DT, &PDT, ORE, FC0Copy.PP); FusedCand.verify(); assert(FusedCand.isEligibleForFusion(SE) && diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 318c4c06f0f7..88d6a7aff3c9 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -61,7 +61,6 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -346,7 +345,7 @@ INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom", Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); } static void deleteDeadInstruction(Instruction *I) { - I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); I->eraseFromParent(); } @@ -798,7 +797,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl &SL, } /// processLoopMemIntrinsic - Template function for calling different processor -/// functions based on mem instrinsic type. +/// functions based on mem intrinsic type. template bool LoopIdiomRecognize::processLoopMemIntrinsic( BasicBlock *BB, @@ -995,9 +994,8 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, SmallPtrSet MSIs; MSIs.insert(MSI); return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()), - MaybeAlign(MSI->getDestAlignment()), - SplatValue, MSI, MSIs, Ev, BECount, - IsNegStride, /*IsLoopMemset=*/true); + MSI->getDestAlign(), SplatValue, MSI, MSIs, Ev, + BECount, IsNegStride, /*IsLoopMemset=*/true); } /// mayLoopAccessLocation - Return true if the specified loop might access the @@ -1101,6 +1099,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( Value *StoredVal, Instruction *TheStore, SmallPtrSetImpl &Stores, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) { + Module *M = TheStore->getModule(); Value *SplatValue = isBytewiseValue(StoredVal, *DL); Constant *PatternValue = nullptr; @@ -1173,6 +1172,8 @@ bool LoopIdiomRecognize::processLoopStridedStore( CallInst *NewCall; if (SplatValue) { AAMDNodes AATags = TheStore->getAAMetadata(); + for (Instruction *Store : Stores) + AATags = AATags.merge(Store->getAAMetadata()); if (auto CI = dyn_cast(NumBytes)) AATags = AATags.extendTo(CI->getZExtValue()); else @@ -1181,15 +1182,14 @@ bool LoopIdiomRecognize::processLoopStridedStore( NewCall = Builder.CreateMemSet( BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment), /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias); - } else { + } else if (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) { // Everything is emitted in default address space Type *Int8PtrTy = DestInt8PtrTy; - Module *M = TheStore->getModule(); StringRef FuncName = "memset_pattern16"; - FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(), - Int8PtrTy, Int8PtrTy, IntIdxTy); - inferLibFuncAttributes(M, FuncName, *TLI); + FunctionCallee MSP = getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16, + Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy); + inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI); // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. @@ -1200,7 +1200,9 @@ bool LoopIdiomRecognize::processLoopStridedStore( GV->setAlignment(Align(16)); Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy); NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); - } + } else + return Changed; + NewCall->setDebugLoc(TheStore->getDebugLoc()); if (MSSAU) { @@ -1275,9 +1277,8 @@ class MemmoveVerifier { public: explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr, const DataLayout &DL) - : DL(DL), LoadOff(0), StoreOff(0), - BP1(llvm::GetPointerBaseWithConstantOffset( - LoadBasePtr.stripPointerCasts(), LoadOff, DL)), + : DL(DL), BP1(llvm::GetPointerBaseWithConstantOffset( + LoadBasePtr.stripPointerCasts(), LoadOff, DL)), BP2(llvm::GetPointerBaseWithConstantOffset( StoreBasePtr.stripPointerCasts(), StoreOff, DL)), IsSameObject(BP1 == BP2) {} @@ -1307,8 +1308,8 @@ public: private: const DataLayout &DL; - int64_t LoadOff; - int64_t StoreOff; + int64_t LoadOff = 0; + int64_t StoreOff = 0; const Value *BP1; const Value *BP2; @@ -1420,26 +1421,19 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( // If the store is a memcpy instruction, we must check if it will write to // the load memory locations. So remove it from the ignored stores. - if (IsMemCpy) - IgnoredInsts.erase(TheStore); MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL); + if (IsMemCpy && !Verifier.IsSameObject) + IgnoredInsts.erase(TheStore); if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, StoreSizeSCEV, *AA, IgnoredInsts)) { - if (!IsMemCpy) { - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", - TheLoad) - << ore::NV("Inst", InstRemark) << " in " - << ore::NV("Function", TheStore->getFunction()) - << " function will not be hoisted: " - << ore::NV("Reason", "The loop may access load location"); - }); - return Changed; - } - // At this point loop may access load only for memcpy in same underlying - // object. If that's not the case bail out. - if (!Verifier.IsSameObject) - return Changed; + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad) + << ore::NV("Inst", InstRemark) << " in " + << ore::NV("Function", TheStore->getFunction()) + << " function will not be hoisted: " + << ore::NV("Reason", "The loop may access load location"); + }); + return Changed; } bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore; @@ -1487,7 +1481,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( return Changed; // We cannot allow unaligned ops for unordered load/store, so reject // anything where the alignment isn't at least the element size. - assert((StoreAlign.hasValue() && LoadAlign.hasValue()) && + assert((StoreAlign && LoadAlign) && "Expect unordered load/store to have align."); if (StoreAlign.getValue() < StoreSize || LoadAlign.getValue() < StoreSize) return Changed; diff --git a/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp index b9e63a4bc06f..4249512ea0f8 100644 --- a/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopInstSimplify.h" -#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -25,21 +24,17 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/User.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" -#include #include using namespace llvm; @@ -101,7 +96,7 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI, if (!IsFirstIteration && !ToSimplify->count(&I)) continue; - Value *V = SimplifyInstruction(&I, SQ.getWithInstruction(&I)); + Value *V = simplifyInstruction(&I, SQ.getWithInstruction(&I)); if (!V || !LI.replacementPreservesLCSSAForm(&I, V)) continue; @@ -109,6 +104,10 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI, auto *UserI = cast(U.getUser()); U.set(V); + // Do not bother dealing with unreachable code. + if (!DT.isReachableFromEntry(UserI->getParent())) + continue; + // If the instruction is used by a PHI node we have already processed // we'll need to iterate on the loop body to converge, so add it to // the next set. @@ -222,7 +221,7 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, AR.MSSA->verifyMemorySSA(); } if (!simplifyLoopInst(L, AR.DT, AR.LI, AR.AC, AR.TLI, - MSSAU.hasValue() ? MSSAU.getPointer() : nullptr)) + MSSAU ? MSSAU.getPointer() : nullptr)) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index c2b065c4eb31..1d3023d04463 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/LoopCacheAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/Analysis/LoopPass.h" @@ -33,7 +34,6 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" @@ -44,7 +44,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include @@ -120,8 +119,6 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, std::vector Dep; Instruction *Src = cast(*I); Instruction *Dst = cast(*J); - if (Src == Dst) - continue; // Ignore Input dependencies. if (isa(Src) && isa(Dst)) continue; @@ -270,26 +267,28 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, return true; } -static LoopVector populateWorklist(Loop &L) { +static void populateWorklist(Loop &L, LoopVector &LoopList) { LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: " << L.getHeader()->getParent()->getName() << " Loop: %" << L.getHeader()->getName() << '\n'); - LoopVector LoopList; + assert(LoopList.empty() && "LoopList should initially be empty!"); Loop *CurrentLoop = &L; const std::vector *Vec = &CurrentLoop->getSubLoops(); while (!Vec->empty()) { // The current loop has multiple subloops in it hence it is not tightly // nested. // Discard all loops above it added into Worklist. - if (Vec->size() != 1) - return {}; + if (Vec->size() != 1) { + LoopList = {}; + return; + } LoopList.push_back(CurrentLoop); CurrentLoop = Vec->front(); Vec = &CurrentLoop->getSubLoops(); } LoopList.push_back(CurrentLoop); - return LoopList; + return; } namespace { @@ -360,8 +359,10 @@ public: : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} /// Check if the loop interchange is profitable. - bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, - CharMatrix &DepMatrix); + bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop, + unsigned InnerLoopId, unsigned OuterLoopId, + CharMatrix &DepMatrix, + const DenseMap &CostMap); private: int getInstrOrderCost(); @@ -412,23 +413,26 @@ struct LoopInterchange { LoopInfo *LI = nullptr; DependenceInfo *DI = nullptr; DominatorTree *DT = nullptr; + std::unique_ptr CC = nullptr; /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI, - DominatorTree *DT, OptimizationRemarkEmitter *ORE) - : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {} + DominatorTree *DT, std::unique_ptr &CC, + OptimizationRemarkEmitter *ORE) + : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {} bool run(Loop *L) { if (L->getParentLoop()) return false; - - return processLoopList(populateWorklist(*L)); + SmallVector LoopList; + populateWorklist(*L, LoopList); + return processLoopList(LoopList); } bool run(LoopNest &LN) { - const auto &LoopList = LN.getLoops(); + SmallVector LoopList(LN.getLoops().begin(), LN.getLoops().end()); for (unsigned I = 1; I < LoopList.size(); ++I) if (LoopList[I]->getParentLoop() != LoopList[I - 1]) return false; @@ -460,7 +464,7 @@ struct LoopInterchange { return LoopList.size() - 1; } - bool processLoopList(ArrayRef LoopList) { + bool processLoopList(SmallVectorImpl &LoopList) { bool Changed = false; unsigned LoopNestDepth = LoopList.size(); if (LoopNestDepth < 2) { @@ -500,27 +504,55 @@ struct LoopInterchange { } unsigned SelecLoopId = selectLoopForInterchange(LoopList); - // Move the selected loop outwards to the best possible position. - Loop *LoopToBeInterchanged = LoopList[SelecLoopId]; - for (unsigned i = SelecLoopId; i > 0; i--) { - bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i, - i - 1, DependencyMatrix); - if (!Interchanged) - return Changed; - // Update the DependencyMatrix - interChangeDependencies(DependencyMatrix, i, i - 1); + // Obtain the loop vector returned from loop cache analysis beforehand, + // and put each pair into a map for constant time query + // later. Indices in loop vector reprsent the optimal order of the + // corresponding loop, e.g., given a loopnest with depth N, index 0 + // indicates the loop should be placed as the outermost loop and index N + // indicates the loop should be placed as the innermost loop. + // + // For the old pass manager CacheCost would be null. + DenseMap CostMap; + if (CC != nullptr) { + const auto &LoopCosts = CC->getLoopCosts(); + for (unsigned i = 0; i < LoopCosts.size(); i++) { + CostMap[LoopCosts[i].first] = i; + } + } + // We try to achieve the globally optimal memory access for the loopnest, + // and do interchange based on a bubble-sort fasion. We start from + // the innermost loop, move it outwards to the best possible position + // and repeat this process. + for (unsigned j = SelecLoopId; j > 0; j--) { + bool ChangedPerIter = false; + for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) { + bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1, + DependencyMatrix, CostMap); + if (!Interchanged) + continue; + // Loops interchanged, update LoopList accordingly. + std::swap(LoopList[i - 1], LoopList[i]); + // Update the DependencyMatrix + interChangeDependencies(DependencyMatrix, i, i - 1); #ifdef DUMP_DEP_MATRICIES - LLVM_DEBUG(dbgs() << "Dependence after interchange\n"); - printDepMatrix(DependencyMatrix); + LLVM_DEBUG(dbgs() << "Dependence after interchange\n"); + printDepMatrix(DependencyMatrix); #endif - Changed |= Interchanged; + ChangedPerIter |= Interchanged; + Changed |= Interchanged; + } + // Early abort if there was no interchange during an entire round of + // moving loops outwards. + if (!ChangedPerIter) + break; } return Changed; } bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId, unsigned OuterLoopId, - std::vector> &DependencyMatrix) { + std::vector> &DependencyMatrix, + const DenseMap &CostMap) { LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << "\n"); LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE); @@ -530,7 +562,8 @@ struct LoopInterchange { } LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n"); LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); - if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) { + if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId, + DependencyMatrix, CostMap)) { LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n"); return false; } @@ -733,8 +766,12 @@ static PHINode *findInnerReductionPhi(Loop *L, Value *V) { if (PHI->getNumIncomingValues() == 1) continue; RecurrenceDescriptor RD; - if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) + if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) { + // Detect floating point reduction only when it can be reordered. + if (RD.getExactFPMathInst() != nullptr) + return nullptr; return PHI; + } return nullptr; } } @@ -893,28 +930,23 @@ areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock(); for (PHINode &PHI : LoopNestExit->phis()) { - // FIXME: We currently are not able to detect floating point reductions - // and have to use floating point PHIs as a proxy to prevent - // interchanging in the presence of floating point reductions. - if (PHI.getType()->isFloatingPointTy()) - return false; for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { - Instruction *IncomingI = dyn_cast(PHI.getIncomingValue(i)); - if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch()) - continue; - - // The incoming value is defined in the outer loop latch. Currently we - // only support that in case the outer loop latch has a single predecessor. - // This guarantees that the outer loop latch is executed if and only if - // the inner loop is executed (because tightlyNested() guarantees that the - // outer loop header only branches to the inner loop or the outer loop - // latch). - // FIXME: We could weaken this logic and allow multiple predecessors, - // if the values are produced outside the loop latch. We would need - // additional logic to update the PHI nodes in the exit block as - // well. - if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr) - return false; + Instruction *IncomingI = dyn_cast(PHI.getIncomingValue(i)); + if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch()) + continue; + + // The incoming value is defined in the outer loop latch. Currently we + // only support that in case the outer loop latch has a single predecessor. + // This guarantees that the outer loop latch is executed if and only if + // the inner loop is executed (because tightlyNested() guarantees that the + // outer loop header only branches to the inner loop or the outer loop + // latch). + // FIXME: We could weaken this logic and allow multiple predecessors, + // if the values are produced outside the loop latch. We would need + // additional logic to update the PHI nodes in the exit block as + // well. + if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr) + return false; } } return true; @@ -1125,21 +1157,33 @@ static bool isProfitableForVectorization(unsigned InnerLoopId, return !DepMatrix.empty(); } -bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, - unsigned OuterLoopId, - CharMatrix &DepMatrix) { - // TODO: Add better profitability checks. - // e.g - // 1) Construct dependency matrix and move the one with no loop carried dep - // inside to enable vectorization. - - // This is rough cost estimation algorithm. It counts the good and bad order - // of induction variables in the instruction and allows reordering if number - // of bad orders is more than good. - int Cost = getInstrOrderCost(); - LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n"); - if (Cost < -LoopInterchangeCostThreshold) - return true; +bool LoopInterchangeProfitability::isProfitable( + const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId, + unsigned OuterLoopId, CharMatrix &DepMatrix, + const DenseMap &CostMap) { + // TODO: Remove the legacy cost model. + + // This is the new cost model returned from loop cache analysis. + // A smaller index means the loop should be placed an outer loop, and vice + // versa. + if (CostMap.find(InnerLoop) != CostMap.end() && + CostMap.find(OuterLoop) != CostMap.end()) { + unsigned InnerIndex = 0, OuterIndex = 0; + InnerIndex = CostMap.find(InnerLoop)->second; + OuterIndex = CostMap.find(OuterLoop)->second; + LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex + << ", OuterIndex = " << OuterIndex << "\n"); + if (InnerIndex < OuterIndex) + return true; + } else { + // Legacy cost model: this is rough cost estimation algorithm. It counts the + // good and bad order of induction variables in the instruction and allows + // reordering if number of bad orders is more than good. + int Cost = getInstrOrderCost(); + LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n"); + if (Cost < -LoopInterchangeCostThreshold) + return true; + } // It is not profitable as per current cache profitability model. But check if // we can move this loop outside to improve parallelism. @@ -1150,10 +1194,8 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable", InnerLoop->getStartLoc(), InnerLoop->getHeader()) - << "Interchanging loops is too costly (cost=" - << ore::NV("Cost", Cost) << ", threshold=" - << ore::NV("Threshold", LoopInterchangeCostThreshold) - << ") and it does not improve parallelism."; + << "Interchanging loops is too costly and it does not improve " + "parallelism."; }); return false; } @@ -1424,9 +1466,13 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, // Incoming values are guaranteed be instructions currently. auto IncI = cast(P.getIncomingValueForBlock(InnerLatch)); + // In case of multi-level nested loops, follow LCSSA to find the incoming + // value defined from the innermost loop. + auto IncIInnerMost = cast(followLCSSA(IncI)); // Skip phis with incoming values from the inner loop body, excluding the // header and latch. - if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader) + if (IncIInnerMost->getParent() != InnerLatch && + IncIInnerMost->getParent() != InnerHeader) continue; assert(all_of(P.users(), @@ -1695,8 +1741,8 @@ struct LoopInterchangeLegacyPass : public LoopPass { auto *DI = &getAnalysis().getDI(); auto *DT = &getAnalysis().getDomTree(); auto *ORE = &getAnalysis().getORE(); - - return LoopInterchange(SE, LI, DI, DT, ORE).run(L); + std::unique_ptr CC = nullptr; + return LoopInterchange(SE, LI, DI, DT, CC, ORE).run(L); } }; } // namespace @@ -1723,8 +1769,10 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, Function &F = *LN.getParent(); DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); + std::unique_ptr CC = + CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI); OptimizationRemarkEmitter ORE(&F); - if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN)) + if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN)) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); } diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index 21d59936616b..1877ac1dfd08 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -61,7 +61,6 @@ #include #include #include -#include #include #include @@ -213,7 +212,8 @@ public: continue; // Only progagate the value if they are of the same type. - if (Store->getPointerOperandType() != Load->getPointerOperandType()) + if (Store->getPointerOperandType() != Load->getPointerOperandType() || + getLoadStoreType(Store) != getLoadStoreType(Load)) continue; Candidates.emplace_front(Load, Store); @@ -528,7 +528,7 @@ public: return false; } - if (LAI.getPSE().getUnionPredicate().getComplexity() > + if (LAI.getPSE().getPredicate().getComplexity() > LoadElimSCEVCheckThreshold) { LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n"); return false; @@ -539,7 +539,7 @@ public: return false; } - if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) { + if (!Checks.empty() || !LAI.getPSE().getPredicate().isAlwaysTrue()) { if (LAI.hasConvergentOp()) { LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with " "convergent calls\n"); @@ -706,8 +706,12 @@ FunctionPass *llvm::createLoopLoadEliminationPass() { PreservedAnalyses LoopLoadEliminationPass::run(Function &F, FunctionAnalysisManager &AM) { - auto &SE = AM.getResult(F); auto &LI = AM.getResult(F); + // There are no loops in the function. Return before computing other expensive + // analyses. + if (LI.empty()) + return PreservedAnalyses::all(); + auto &SE = AM.getResult(F); auto &TTI = AM.getResult(F); auto &DT = AM.getResult(F); auto &TLI = AM.getResult(F); diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp index 6c783848432b..d20d275ea60c 100644 --- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp @@ -8,14 +8,12 @@ #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Support/Debug.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/TimeProfiler.h" using namespace llvm; @@ -311,12 +309,12 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, #ifndef NDEBUG // LoopAnalysisResults should always be valid. - // Note that we don't LAR.SE.verify() because that can change observed SE - // queries. See PR44815. if (VerifyDomInfo) LAR.DT.verify(); if (VerifyLoopInfo) LAR.LI.verify(LAR.DT); + if (VerifySCEV) + LAR.SE.verify(); if (LAR.MSSA && VerifyMemorySSA) LAR.MSSA->verifyMemorySSA(); #endif diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index aa7e79a589f2..d0ee5b47a8ca 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -188,7 +188,6 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalValue.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" @@ -244,7 +243,7 @@ struct LoopICmp { LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV, const SCEV *Limit) : Pred(Pred), IV(IV), Limit(Limit) {} - LoopICmp() {} + LoopICmp() = default; void dump() { dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV << ", Limit = " << *Limit << "\n"; @@ -778,7 +777,7 @@ unsigned LoopPredication::collectChecks(SmallVectorImpl &Checks, if (ICmpInst *ICI = dyn_cast(Condition)) { if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Guard)) { - Checks.push_back(NewRangeCheck.getValue()); + Checks.push_back(*NewRangeCheck); NumWidened++; continue; } diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp index 9d22eceb987f..f4ef22562341 100644 --- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -29,15 +29,11 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" @@ -59,7 +55,6 @@ #include #include #include -#include #include #include #include @@ -559,12 +554,12 @@ bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) { } // Must be a CMP or an ext (of a value with nsw) then CMP else { - Instruction *UUser = dyn_cast(UU); + auto *UUser = cast(UU); // Skip SExt if we are extending an nsw value // TODO: Allow ZExt too - if (BO->hasNoSignedWrap() && UUser && UUser->hasOneUse() && + if (BO->hasNoSignedWrap() && UUser->hasOneUse() && isa(UUser)) - UUser = dyn_cast(*(UUser->user_begin())); + UUser = cast(*(UUser->user_begin())); if (!isCompareUsedByBranch(UUser)) return false; } diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp index 5ba137b1c85f..d9c33b5f335a 100644 --- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -11,10 +11,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopRotation.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" @@ -22,9 +22,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/LoopRotationUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -62,8 +60,8 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM, MSSAU = MemorySSAUpdater(AR.MSSA); bool Changed = LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, - MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, false, - Threshold, false, PrepareForLTO || PrepareForLTOOption); + MSSAU ? MSSAU.getPointer() : nullptr, SQ, false, Threshold, + false, PrepareForLTO || PrepareForLTOOption); if (!Changed) return PreservedAnalyses::all(); @@ -133,9 +131,8 @@ public: : MaxHeaderSize; return LoopRotation(L, LI, TTI, AC, &DT, &SE, - MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, - false, Threshold, false, - PrepareForLTO || PrepareForLTOOption); + MSSAU ? MSSAU.getPointer() : nullptr, SQ, false, + Threshold, false, PrepareForLTO || PrepareForLTOOption); } }; } // end namespace diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index d3fcba10c275..b7e0e32780b4 100644 --- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -16,28 +16,21 @@ #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" -#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -261,13 +254,17 @@ private: assert(L.getNumBlocks() == LiveLoopBlocks.size() + DeadLoopBlocks.size() && "Malformed block sets?"); - // Now, all exit blocks that are not marked as live are dead. + // Now, all exit blocks that are not marked as live are dead, if all their + // predecessors are in the loop. This may not be the case, as the input loop + // may not by in loop-simplify/canonical form. SmallVector ExitBlocks; L.getExitBlocks(ExitBlocks); SmallPtrSet UniqueDeadExits; for (auto *ExitBlock : ExitBlocks) if (!LiveExitBlocks.count(ExitBlock) && - UniqueDeadExits.insert(ExitBlock).second) + UniqueDeadExits.insert(ExitBlock).second && + all_of(predecessors(ExitBlock), + [this](BasicBlock *Pred) { return L.contains(Pred); })) DeadExitBlocks.push_back(ExitBlock); // Whether or not the edge From->To will still be present in graph after the @@ -374,7 +371,7 @@ private: DeadInstructions.emplace_back(LandingPad); for (Instruction *I : DeadInstructions) { - I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); I->eraseFromParent(); } @@ -704,8 +701,7 @@ PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM, MSSAU = MemorySSAUpdater(AR.MSSA); bool DeleteCurrentLoop = false; if (!simplifyLoopCFG(L, AR.DT, AR.LI, AR.SE, - MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, - DeleteCurrentLoop)) + MSSAU ? MSSAU.getPointer() : nullptr, DeleteCurrentLoop)) return PreservedAnalyses::all(); if (DeleteCurrentLoop) @@ -739,9 +735,9 @@ public: if (MSSAA && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); bool DeleteCurrentLoop = false; - bool Changed = simplifyLoopCFG( - *L, DT, LI, SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, - DeleteCurrentLoop); + bool Changed = + simplifyLoopCFG(*L, DT, LI, SE, MSSAU ? MSSAU.getPointer() : nullptr, + DeleteCurrentLoop); if (DeleteCurrentLoop) LPM.markLoopAsDeleted(*L); return Changed; diff --git a/llvm/lib/Transforms/Scalar/LoopSink.cpp b/llvm/lib/Transforms/Scalar/LoopSink.cpp index c9c9e60d0921..dce1af475fb1 100644 --- a/llvm/lib/Transforms/Scalar/LoopSink.cpp +++ b/llvm/lib/Transforms/Scalar/LoopSink.cpp @@ -34,24 +34,18 @@ #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -70,14 +64,6 @@ static cl::opt MaxNumberOfUseBBsForSinking( "max-uses-for-sinking", cl::Hidden, cl::init(30), cl::desc("Do not sink instructions that have too many uses.")); -static cl::opt EnableMSSAInLoopSink( - "enable-mssa-in-loop-sink", cl::Hidden, cl::init(true), - cl::desc("Enable MemorySSA for LoopSink in new pass manager")); - -static cl::opt EnableMSSAInLegacyLoopSink( - "enable-mssa-in-legacy-loop-sink", cl::Hidden, cl::init(false), - cl::desc("Enable MemorySSA for LoopSink in legacy pass manager")); - /// Return adjusted total frequency of \p BBs. /// /// * If there is only one BB, sinking instruction will not introduce code @@ -279,9 +265,8 @@ static bool sinkInstruction( static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo &BFI, - ScalarEvolution *SE, - AliasSetTracker *CurAST, - MemorySSA *MSSA) { + MemorySSA &MSSA, + ScalarEvolution *SE) { BasicBlock *Preheader = L.getLoopPreheader(); assert(Preheader && "Expected loop to have preheader"); @@ -297,13 +282,8 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, })) return false; - std::unique_ptr MSSAU; - std::unique_ptr LICMFlags; - if (MSSA) { - MSSAU = std::make_unique(MSSA); - LICMFlags = - std::make_unique(/*IsSink=*/true, &L, MSSA); - } + MemorySSAUpdater MSSAU(&MSSA); + SinkAndHoistLICMFlags LICMFlags(/*IsSink=*/true, &L, &MSSA); bool Changed = false; @@ -324,14 +304,15 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, // on B (A appears after B), A needs to be sinked first before B can be // sinked. for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) { + if (isa(&I)) + continue; // No need to check for instruction's operands are loop invariant. assert(L.hasLoopInvariantOperands(&I) && "Insts in a loop's preheader should have loop invariant operands!"); - if (!canSinkOrHoistInst(I, &AA, &DT, &L, CurAST, MSSAU.get(), false, - LICMFlags.get())) + if (!canSinkOrHoistInst(I, &AA, &DT, &L, MSSAU, false, LICMFlags)) continue; if (sinkInstruction(L, I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI, - MSSAU.get())) + &MSSAU)) Changed = true; } @@ -340,13 +321,6 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, return Changed; } -static void computeAliasSet(Loop &L, BasicBlock &Preheader, - AliasSetTracker &CurAST) { - for (BasicBlock *BB : L.blocks()) - CurAST.add(*BB); - CurAST.add(Preheader); -} - PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) { LoopInfo &LI = FAM.getResult(F); // Nothing to do if there are no loops. @@ -356,10 +330,7 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) { AAResults &AA = FAM.getResult(F); DominatorTree &DT = FAM.getResult(F); BlockFrequencyInfo &BFI = FAM.getResult(F); - - MemorySSA *MSSA = EnableMSSAInLoopSink - ? &FAM.getResult(F).getMSSA() - : nullptr; + MemorySSA &MSSA = FAM.getResult(F).getMSSA(); // We want to do a postorder walk over the loops. Since loops are a tree this // is equivalent to a reversed preorder walk and preorder is easy to compute @@ -381,18 +352,11 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) { if (!Preheader->getParent()->hasProfileData()) continue; - std::unique_ptr CurAST; - if (!EnableMSSAInLoopSink) { - CurAST = std::make_unique(AA); - computeAliasSet(L, *Preheader, *CurAST.get()); - } - // Note that we don't pass SCEV here because it is only used to invalidate // loops in SCEV and we don't preserve (or request) SCEV at all making that // unnecessary. - Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI, - /*ScalarEvolution*/ nullptr, - CurAST.get(), MSSA); + Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI, MSSA, + /*ScalarEvolution*/ nullptr); } while (!PreorderLoops.empty()); if (!Changed) @@ -400,13 +364,10 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) { PreservedAnalyses PA; PA.preserveSet(); + PA.preserve(); - if (MSSA) { - PA.preserve(); - - if (VerifyMemorySSA) - MSSA->verifyMemorySSA(); - } + if (VerifyMemorySSA) + MSSA.verifyMemorySSA(); return PA; } @@ -432,24 +393,16 @@ struct LegacyLoopSinkPass : public LoopPass { return false; AAResults &AA = getAnalysis().getAAResults(); + MemorySSA &MSSA = getAnalysis().getMSSA(); auto *SE = getAnalysisIfAvailable(); - std::unique_ptr CurAST; - MemorySSA *MSSA = nullptr; - if (EnableMSSAInLegacyLoopSink) - MSSA = &getAnalysis().getMSSA(); - else { - CurAST = std::make_unique(AA); - computeAliasSet(*L, *Preheader, *CurAST.get()); - } - bool Changed = sinkLoopInvariantInstructions( *L, AA, getAnalysis().getLoopInfo(), getAnalysis().getDomTree(), getAnalysis().getBFI(), - SE ? &SE->getSE() : nullptr, CurAST.get(), MSSA); + MSSA, SE ? &SE->getSE() : nullptr); - if (MSSA && VerifyMemorySSA) - MSSA->verifyMemorySSA(); + if (VerifyMemorySSA) + MSSA.verifyMemorySSA(); return Changed; } @@ -458,10 +411,8 @@ struct LegacyLoopSinkPass : public LoopPass { AU.setPreservesCFG(); AU.addRequired(); getLoopAnalysisUsage(AU); - if (EnableMSSAInLegacyLoopSink) { - AU.addRequired(); - AU.addPreserved(); - } + AU.addRequired(); + AU.addPreserved(); } }; } diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 654f0d2a03a8..9959e408e2e2 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -78,6 +78,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -91,9 +92,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" -#include "llvm/IR/OperandTraits.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" @@ -114,12 +113,12 @@ #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include #include #include #include -#include #include #include #include @@ -142,10 +141,7 @@ static const unsigned MaxIVUsers = 200; /// the salvaging is not too expensive for the compiler. static const unsigned MaxSCEVSalvageExpressionSize = 64; -// Temporary flag to cleanup congruent phis after LSR phi expansion. -// It's currently disabled until we can determine whether it's truly useful or -// not. The flag should be removed after the v3.0 release. -// This is now needed for ivchains. +// Cleanup congruent phis after LSR phi expansion. static cl::opt EnablePhiElim( "enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination")); @@ -481,6 +477,12 @@ void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { canonicalize(*L); } +static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L) { + return SCEVExprContains(S, [&L](const SCEV *S) { + return isa(S) && (cast(S)->getLoop() == &L); + }); +} + /// Check whether or not this formula satisfies the canonical /// representation. /// \see Formula::BaseRegs. @@ -494,18 +496,15 @@ bool Formula::isCanonical(const Loop &L) const { if (Scale == 1 && BaseRegs.empty()) return false; - const SCEVAddRecExpr *SAR = dyn_cast(ScaledReg); - if (SAR && SAR->getLoop() == &L) + if (containsAddRecDependentOnLoop(ScaledReg, L)) return true; // If ScaledReg is not a recurrent expr, or it is but its loop is not current // loop, meanwhile BaseRegs contains a recurrent expr reg related with current // loop, we want to swap the reg in BaseRegs with ScaledReg. - auto I = find_if(BaseRegs, [&](const SCEV *S) { - return isa(S) && - (cast(S)->getLoop() == &L); + return none_of(BaseRegs, [&L](const SCEV *S) { + return containsAddRecDependentOnLoop(S, L); }); - return I == BaseRegs.end(); } /// Helper method to morph a formula into its canonical representation. @@ -537,11 +536,9 @@ void Formula::canonicalize(const Loop &L) { // If ScaledReg is an invariant with respect to L, find the reg from // BaseRegs containing the recurrent expr related with Loop L. Swap the // reg with ScaledReg. - const SCEVAddRecExpr *SAR = dyn_cast(ScaledReg); - if (!SAR || SAR->getLoop() != &L) { - auto I = find_if(BaseRegs, [&](const SCEV *S) { - return isa(S) && - (cast(S)->getLoop() == &L); + if (!containsAddRecDependentOnLoop(ScaledReg, L)) { + auto I = find_if(BaseRegs, [&L](const SCEV *S) { + return containsAddRecDependentOnLoop(S, L); }); if (I != BaseRegs.end()) std::swap(ScaledReg, *I); @@ -1070,7 +1067,7 @@ public: C.ScaleCost = 0; } - bool isLess(Cost &Other); + bool isLess(const Cost &Other); void Lose(); @@ -1358,6 +1355,8 @@ void Cost::RateFormula(const Formula &F, const DenseSet &VisitedRegs, const LSRUse &LU, SmallPtrSetImpl *LoserRegs) { + if (isLoser()) + return; assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula"); // Tally up the registers. unsigned PrevAddRecCost = C.AddRecCost; @@ -1467,7 +1466,7 @@ void Cost::Lose() { } /// Choose the lower cost. -bool Cost::isLess(Cost &Other) { +bool Cost::isLess(const Cost &Other) { if (InsnsCost.getNumOccurrences() > 0 && InsnsCost && C.Insns != Other.C.Insns) return C.Insns < Other.C.Insns; @@ -4081,23 +4080,24 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { continue; // Divide out the factor, ignoring high bits, since we'll be // scaling the value back up in the end. - if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) { - // TODO: This could be optimized to avoid all the copying. - Formula F = Base; - F.ScaledReg = Quotient; - F.deleteBaseReg(F.BaseRegs[i]); - // The canonical representation of 1*reg is reg, which is already in - // Base. In that case, do not try to insert the formula, it will be - // rejected anyway. - if (F.Scale == 1 && (F.BaseRegs.empty() || - (AR->getLoop() != L && LU.AllFixupsOutsideLoop))) - continue; - // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate - // non canonical Formula with ScaledReg's loop not being L. - if (F.Scale == 1 && LU.AllFixupsOutsideLoop) - F.canonicalize(*L); - (void)InsertFormula(LU, LUIdx, F); - } + if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) + if (!Quotient->isZero()) { + // TODO: This could be optimized to avoid all the copying. + Formula F = Base; + F.ScaledReg = Quotient; + F.deleteBaseReg(F.BaseRegs[i]); + // The canonical representation of 1*reg is reg, which is already in + // Base. In that case, do not try to insert the formula, it will be + // rejected anyway. + if (F.Scale == 1 && (F.BaseRegs.empty() || + (AR->getLoop() != L && LU.AllFixupsOutsideLoop))) + continue; + // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate + // non canonical Formula with ScaledReg's loop not being L. + if (F.Scale == 1 && LU.AllFixupsOutsideLoop) + F.canonicalize(*L); + (void)InsertFormula(LU, LUIdx, F); + } } } } @@ -5601,6 +5601,27 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, DeadInsts.emplace_back(OperandIsInstr); } +// Check if there are any loop exit values which are only used once within the +// loop which may potentially be optimized with a call to rewriteLoopExitValue. +static bool LoopExitValHasSingleUse(Loop *L) { + BasicBlock *ExitBB = L->getExitBlock(); + if (!ExitBB) + return false; + + for (PHINode &ExitPhi : ExitBB->phis()) { + if (ExitPhi.getNumIncomingValues() != 1) + break; + + BasicBlock *Pred = ExitPhi.getIncomingBlock(0); + Value *IVNext = ExitPhi.getIncomingValueForBlock(Pred); + // One use would be the exit phi node, and there should be only one other + // use for this to be considered. + if (IVNext->getNumUses() == 2) + return true; + } + return false; +} + /// Rewrite all the fixup locations with new values, following the chosen /// solution. void LSRInstance::ImplementSolution( @@ -5894,40 +5915,57 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { } namespace { + +/// Enables more convenient iteration over a DWARF expression vector. +static iterator_range +ToDwarfOpIter(SmallVectorImpl &Expr) { + llvm::DIExpression::expr_op_iterator Begin = + llvm::DIExpression::expr_op_iterator(Expr.begin()); + llvm::DIExpression::expr_op_iterator End = + llvm::DIExpression::expr_op_iterator(Expr.end()); + return {Begin, End}; +} + struct SCEVDbgValueBuilder { SCEVDbgValueBuilder() = default; - SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { - Values = Base.Values; + SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); } + + void clone(const SCEVDbgValueBuilder &Base) { + LocationOps = Base.LocationOps; Expr = Base.Expr; } + void clear() { + LocationOps.clear(); + Expr.clear(); + } + /// The DIExpression as we translate the SCEV. SmallVector Expr; /// The location ops of the DIExpression. - SmallVector Values; + SmallVector LocationOps; void pushOperator(uint64_t Op) { Expr.push_back(Op); } void pushUInt(uint64_t Operand) { Expr.push_back(Operand); } /// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value /// in the set of values referenced by the expression. - void pushValue(llvm::Value *V) { + void pushLocation(llvm::Value *V) { Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg); - auto *It = - std::find(Values.begin(), Values.end(), llvm::ValueAsMetadata::get(V)); + auto *It = std::find(LocationOps.begin(), LocationOps.end(), V); unsigned ArgIndex = 0; - if (It != Values.end()) { - ArgIndex = std::distance(Values.begin(), It); + if (It != LocationOps.end()) { + ArgIndex = std::distance(LocationOps.begin(), It); } else { - ArgIndex = Values.size(); - Values.push_back(llvm::ValueAsMetadata::get(V)); + ArgIndex = LocationOps.size(); + LocationOps.push_back(V); } Expr.push_back(ArgIndex); } void pushValue(const SCEVUnknown *U) { llvm::Value *V = cast(U)->getValue(); - pushValue(V); + pushLocation(V); } bool pushConst(const SCEVConstant *C) { @@ -5938,6 +5976,12 @@ struct SCEVDbgValueBuilder { return true; } + // Iterating the expression as DWARF ops is convenient when updating + // DWARF_OP_LLVM_args. + iterator_range expr_ops() { + return ToDwarfOpIter(Expr); + } + /// Several SCEV types are sequences of the same arithmetic operator applied /// to constants and values that may be extended or truncated. bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr, @@ -5979,7 +6023,7 @@ struct SCEVDbgValueBuilder { } else if (const SCEVUnknown *U = dyn_cast(S)) { if (!U->getValue()) return false; - pushValue(U->getValue()); + pushLocation(U->getValue()); } else if (const SCEVMulExpr *MulRec = dyn_cast(S)) { Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul); @@ -6010,52 +6054,6 @@ struct SCEVDbgValueBuilder { return Success; } - void setFinalExpression(llvm::DbgValueInst &DI, const DIExpression *OldExpr) { - // Re-state assumption that this dbg.value is not variadic. Any remaining - // opcodes in its expression operate on a single value already on the - // expression stack. Prepend our operations, which will re-compute and - // place that value on the expression stack. - assert(!DI.hasArgList()); - auto *NewExpr = - DIExpression::prependOpcodes(OldExpr, Expr, /*StackValue*/ true); - DI.setExpression(NewExpr); - - auto ValArrayRef = llvm::ArrayRef(Values); - DI.setRawLocation(llvm::DIArgList::get(DI.getContext(), ValArrayRef)); - } - - /// If a DVI can be emitted without a DIArgList, omit DW_OP_llvm_arg and the - /// location op index 0. - void setShortFinalExpression(llvm::DbgValueInst &DI, - const DIExpression *OldExpr) { - assert((Expr[0] == llvm::dwarf::DW_OP_LLVM_arg && Expr[1] == 0) && - "Expected DW_OP_llvm_arg and 0."); - DI.replaceVariableLocationOp( - 0u, llvm::MetadataAsValue::get(DI.getContext(), Values[0])); - - // See setFinalExpression: prepend our opcodes on the start of any old - // expression opcodes. - assert(!DI.hasArgList()); - llvm::SmallVector FinalExpr(llvm::drop_begin(Expr, 2)); - auto *NewExpr = - DIExpression::prependOpcodes(OldExpr, FinalExpr, /*StackValue*/ true); - DI.setExpression(NewExpr); - } - - /// Once the IV and variable SCEV translation is complete, write it to the - /// source DVI. - void applyExprToDbgValue(llvm::DbgValueInst &DI, - const DIExpression *OldExpr) { - assert(!Expr.empty() && "Unexpected empty expression."); - // Emit a simpler form if only a single location is referenced. - if (Values.size() == 1 && Expr[0] == llvm::dwarf::DW_OP_LLVM_arg && - Expr[1] == 0) { - setShortFinalExpression(DI, OldExpr); - } else { - setFinalExpression(DI, OldExpr); - } - } - /// Return true if the combination of arithmetic operator and underlying /// SCEV constant value is an identity function. bool isIdentityFunction(uint64_t Op, const SCEV *S) { @@ -6104,6 +6102,48 @@ struct SCEVDbgValueBuilder { return true; } + /// Create an expression that is an offset from a value (usually the IV). + void createOffsetExpr(int64_t Offset, Value *OffsetValue) { + pushLocation(OffsetValue); + DIExpression::appendOffset(Expr, Offset); + LLVM_DEBUG( + dbgs() << "scev-salvage: Generated IV offset expression. Offset: " + << std::to_string(Offset) << "\n"); + } + + /// Combine a translation of the SCEV and the IV to create an expression that + /// recovers a location's value. + /// returns true if an expression was created. + bool createIterCountExpr(const SCEV *S, + const SCEVDbgValueBuilder &IterationCount, + ScalarEvolution &SE) { + // SCEVs for SSA values are most frquently of the form + // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..). + // This is because %a is a PHI node that is not the IV. However, these + // SCEVs have not been observed to result in debuginfo-lossy optimisations, + // so its not expected this point will be reached. + if (!isa(S)) + return false; + + LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S + << '\n'); + + const auto *Rec = cast(S); + if (!Rec->isAffine()) + return false; + + if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize) + return false; + + // Initialise a new builder with the iteration count expression. In + // combination with the value's SCEV this enables recovery. + clone(IterationCount); + if (!SCEVToValueExpr(*Rec, SE)) + return false; + + return true; + } + /// Convert a SCEV of a value to a DIExpression that is pushed onto the /// builder's expression stack. The stack should already contain an /// expression for the iteration count, so that it can be multiplied by @@ -6133,74 +6173,294 @@ struct SCEVDbgValueBuilder { } return true; } + + // Append the current expression and locations to a location list and an + // expression list. Modify the DW_OP_LLVM_arg indexes to account for + // the locations already present in the destination list. + void appendToVectors(SmallVectorImpl &DestExpr, + SmallVectorImpl &DestLocations) { + assert(!DestLocations.empty() && + "Expected the locations vector to contain the IV"); + // The DWARF_OP_LLVM_arg arguments of the expression being appended must be + // modified to account for the locations already in the destination vector. + // All builders contain the IV as the first location op. + assert(!LocationOps.empty() && + "Expected the location ops to contain the IV."); + // DestIndexMap[n] contains the index in DestLocations for the nth + // location in this SCEVDbgValueBuilder. + SmallVector DestIndexMap; + for (const auto &Op : LocationOps) { + auto It = find(DestLocations, Op); + if (It != DestLocations.end()) { + // Location already exists in DestLocations, reuse existing ArgIndex. + DestIndexMap.push_back(std::distance(DestLocations.begin(), It)); + continue; + } + // Location is not in DestLocations, add it. + DestIndexMap.push_back(DestLocations.size()); + DestLocations.push_back(Op); + } + + for (const auto &Op : expr_ops()) { + if (Op.getOp() != dwarf::DW_OP_LLVM_arg) { + Op.appendToVector(DestExpr); + continue; + } + + DestExpr.push_back(dwarf::DW_OP_LLVM_arg); + // `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV, + // DestIndexMap[n] contains its new index in DestLocations. + uint64_t NewIndex = DestIndexMap[Op.getArg(0)]; + DestExpr.push_back(NewIndex); + } + } }; +/// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs +/// and DIExpression. struct DVIRecoveryRec { + DVIRecoveryRec(DbgValueInst *DbgValue) + : DVI(DbgValue), Expr(DbgValue->getExpression()), + HadLocationArgList(false) {} + DbgValueInst *DVI; DIExpression *Expr; - Metadata *LocationOp; - const llvm::SCEV *SCEV; + bool HadLocationArgList; + SmallVector LocationOps; + SmallVector SCEVs; + SmallVector, 2> RecoveryExprs; + + void clear() { + for (auto &RE : RecoveryExprs) + RE.reset(); + RecoveryExprs.clear(); + } + + ~DVIRecoveryRec() { clear(); } }; } // namespace -static void RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI, - const SCEVDbgValueBuilder &IterationCount, - ScalarEvolution &SE) { - // LSR may add locations to previously single location-op DVIs which - // are currently not supported. - if (CachedDVI.DVI->getNumVariableLocationOps() != 1) - return; +/// Returns the total number of DW_OP_llvm_arg operands in the expression. +/// This helps in determining if a DIArglist is necessary or can be omitted from +/// the dbg.value. +static unsigned numLLVMArgOps(SmallVectorImpl &Expr) { + auto expr_ops = ToDwarfOpIter(Expr); + unsigned Count = 0; + for (auto Op : expr_ops) + if (Op.getOp() == dwarf::DW_OP_LLVM_arg) + Count++; + return Count; +} + +/// Overwrites DVI with the location and Ops as the DIExpression. This will +/// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands, +/// because a DIArglist is not created for the first argument of the dbg.value. +static void updateDVIWithLocation(DbgValueInst &DVI, Value *Location, + SmallVectorImpl &Ops) { + assert( + numLLVMArgOps(Ops) == 0 && + "Expected expression that does not contain any DW_OP_llvm_arg operands."); + DVI.setRawLocation(ValueAsMetadata::get(Location)); + DVI.setExpression(DIExpression::get(DVI.getContext(), Ops)); +} + +/// Overwrite DVI with locations placed into a DIArglist. +static void updateDVIWithLocations(DbgValueInst &DVI, + SmallVectorImpl &Locations, + SmallVectorImpl &Ops) { + assert(numLLVMArgOps(Ops) != 0 && + "Expected expression that references DIArglist locations using " + "DW_OP_llvm_arg operands."); + SmallVector MetadataLocs; + for (Value *V : Locations) + MetadataLocs.push_back(ValueAsMetadata::get(V)); + auto ValArrayRef = llvm::ArrayRef(MetadataLocs); + DVI.setRawLocation(llvm::DIArgList::get(DVI.getContext(), ValArrayRef)); + DVI.setExpression(DIExpression::get(DVI.getContext(), Ops)); +} + +/// Write the new expression and new location ops for the dbg.value. If possible +/// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This +/// can be omitted if: +/// 1. There is only a single location, refenced by a single DW_OP_llvm_arg. +/// 2. The DW_OP_LLVM_arg is the first operand in the expression. +static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, + SmallVectorImpl &NewLocationOps, + SmallVectorImpl &NewExpr) { + unsigned NumLLVMArgs = numLLVMArgOps(NewExpr); + if (NumLLVMArgs == 0) { + // Location assumed to be on the stack. + updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], NewExpr); + } else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) { + // There is only a single DW_OP_llvm_arg at the start of the expression, + // so it can be omitted along with DIArglist. + assert(NewExpr[1] == 0 && + "Lone LLVM_arg in a DIExpression should refer to location-op 0."); + llvm::SmallVector ShortenedOps(llvm::drop_begin(NewExpr, 2)); + updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], ShortenedOps); + } else { + // Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary. + updateDVIWithLocations(*DVIRec.DVI, NewLocationOps, NewExpr); + } + + // If the DIExpression was previously empty then add the stack terminator. + // Non-empty expressions have only had elements inserted into them and so the + // terminator should already be present e.g. stack_value or fragment. + DIExpression *SalvageExpr = DVIRec.DVI->getExpression(); + if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) { + SalvageExpr = DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value}); + DVIRec.DVI->setExpression(SalvageExpr); + } +} + +/// Cached location ops may be erased during LSR, in which case an undef is +/// required when restoring from the cache. The type of that location is no +/// longer available, so just use int8. The undef will be replaced by one or +/// more locations later when a SCEVDbgValueBuilder selects alternative +/// locations to use for the salvage. +static Value *getValueOrUndef(WeakVH &VH, LLVMContext &C) { + return (VH) ? VH : UndefValue::get(llvm::Type::getInt8Ty(C)); +} + +/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values. +static void restorePreTransformState(DVIRecoveryRec &DVIRec) { + LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n" + << "scev-salvage: post-LSR: " << *DVIRec.DVI << '\n'); + assert(DVIRec.Expr && "Expected an expression"); + DVIRec.DVI->setExpression(DVIRec.Expr); + + // Even a single location-op may be inside a DIArgList and referenced with + // DW_OP_LLVM_arg, which is valid only with a DIArgList. + if (!DVIRec.HadLocationArgList) { + assert(DVIRec.LocationOps.size() == 1 && + "Unexpected number of location ops."); + // LSR's unsuccessful salvage attempt may have added DIArgList, which in + // this case was not present before, so force the location back to a single + // uncontained Value. + Value *CachedValue = + getValueOrUndef(DVIRec.LocationOps[0], DVIRec.DVI->getContext()); + DVIRec.DVI->setRawLocation(ValueAsMetadata::get(CachedValue)); + } else { + SmallVector MetadataLocs; + for (WeakVH VH : DVIRec.LocationOps) { + Value *CachedValue = getValueOrUndef(VH, DVIRec.DVI->getContext()); + MetadataLocs.push_back(ValueAsMetadata::get(CachedValue)); + } + auto ValArrayRef = llvm::ArrayRef(MetadataLocs); + DVIRec.DVI->setRawLocation( + llvm::DIArgList::get(DVIRec.DVI->getContext(), ValArrayRef)); + } + LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DVIRec.DVI << '\n'); +} - // SCEVs for SSA values are most frquently of the form - // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..). - // This is because %a is a PHI node that is not the IV. However, these - // SCEVs have not been observed to result in debuginfo-lossy optimisations, - // so its not expected this point will be reached. - if (!isa(CachedDVI.SCEV)) - return; +static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, + llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, + const SCEV *SCEVInductionVar, + SCEVDbgValueBuilder IterCountExpr) { + if (!DVIRec.DVI->isUndef()) + return false; - LLVM_DEBUG(dbgs() << "scev-salvage: Value to salvage SCEV: " - << *CachedDVI.SCEV << '\n'); + // LSR may have caused several changes to the dbg.value in the failed salvage + // attempt. So restore the DIExpression, the location ops and also the + // location ops format, which is always DIArglist for multiple ops, but only + // sometimes for a single op. + restorePreTransformState(DVIRec); + + // LocationOpIndexMap[i] will store the post-LSR location index of + // the non-optimised out location at pre-LSR index i. + SmallVector LocationOpIndexMap; + LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1); + SmallVector NewLocationOps; + NewLocationOps.push_back(LSRInductionVar); + + for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) { + WeakVH VH = DVIRec.LocationOps[i]; + // Place the locations not optimised out in the list first, avoiding + // inserts later. The map is used to update the DIExpression's + // DW_OP_LLVM_arg arguments as the expression is updated. + if (VH && !isa(VH)) { + NewLocationOps.push_back(VH); + LocationOpIndexMap[i] = NewLocationOps.size() - 1; + LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i + << " now at index " << LocationOpIndexMap[i] << "\n"); + continue; + } - const auto *Rec = cast(CachedDVI.SCEV); - if (!Rec->isAffine()) - return; + // It's possible that a value referred to in the SCEV may have been + // optimised out by LSR. + if (SE.containsErasedValue(DVIRec.SCEVs[i]) || + SE.containsUndefs(DVIRec.SCEVs[i])) { + LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i + << " refers to a location that is now undef or erased. " + "Salvage abandoned.\n"); + return false; + } - if (CachedDVI.SCEV->getExpressionSize() > MaxSCEVSalvageExpressionSize) - return; + LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i + << " with SCEV: " << *DVIRec.SCEVs[i] << "\n"); + + DVIRec.RecoveryExprs[i] = std::make_unique(); + SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get(); + + // Create an offset-based salvage expression if possible, as it requires + // less DWARF ops than an iteration count-based expression. + if (Optional Offset = + SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) { + if (Offset.getValue().getMinSignedBits() <= 64) + SalvageExpr->createOffsetExpr(Offset.getValue().getSExtValue(), + LSRInductionVar); + } else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr, + SE)) + return false; + } - // Initialise a new builder with the iteration count expression. In - // combination with the value's SCEV this enables recovery. - SCEVDbgValueBuilder RecoverValue(IterationCount); - if (!RecoverValue.SCEVToValueExpr(*Rec, SE)) - return; + // Merge the DbgValueBuilder generated expressions and the original + // DIExpression, place the result into an new vector. + SmallVector NewExpr; + if (DVIRec.Expr->getNumElements() == 0) { + assert(DVIRec.RecoveryExprs.size() == 1 && + "Expected only a single recovery expression for an empty " + "DIExpression."); + assert(DVIRec.RecoveryExprs[0] && + "Expected a SCEVDbgSalvageBuilder for location 0"); + SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get(); + B->appendToVectors(NewExpr, NewLocationOps); + } + for (const auto &Op : DVIRec.Expr->expr_ops()) { + // Most Ops needn't be updated. + if (Op.getOp() != dwarf::DW_OP_LLVM_arg) { + Op.appendToVector(NewExpr); + continue; + } - LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *CachedDVI.DVI << '\n'); - RecoverValue.applyExprToDbgValue(*CachedDVI.DVI, CachedDVI.Expr); - LLVM_DEBUG(dbgs() << "scev-salvage: to: " << *CachedDVI.DVI << '\n'); -} + uint64_t LocationArgIndex = Op.getArg(0); + SCEVDbgValueBuilder *DbgBuilder = + DVIRec.RecoveryExprs[LocationArgIndex].get(); + // The location doesn't have s SCEVDbgValueBuilder, so LSR did not + // optimise it away. So just translate the argument to the updated + // location index. + if (!DbgBuilder) { + NewExpr.push_back(dwarf::DW_OP_LLVM_arg); + assert(LocationOpIndexMap[Op.getArg(0)] != -1 && + "Expected a positive index for the location-op position."); + NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]); + continue; + } + // The location has a recovery expression. + DbgBuilder->appendToVectors(NewExpr, NewLocationOps); + } -static void RewriteDVIUsingOffset(DVIRecoveryRec &DVIRec, llvm::PHINode &IV, - int64_t Offset) { - assert(!DVIRec.DVI->hasArgList() && "Expected single location-op dbg.value."); - DbgValueInst *DVI = DVIRec.DVI; - SmallVector Ops; - DIExpression::appendOffset(Ops, Offset); - DIExpression *Expr = DIExpression::prependOpcodes(DVIRec.Expr, Ops, true); - LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *DVIRec.DVI << '\n'); - DVI->setExpression(Expr); - llvm::Value *ValIV = dyn_cast(&IV); - DVI->replaceVariableLocationOp( - 0u, llvm::MetadataAsValue::get(DVI->getContext(), - llvm::ValueAsMetadata::get(ValIV))); - LLVM_DEBUG(dbgs() << "scev-salvage: updated with offset to IV: " - << *DVIRec.DVI << '\n'); + UpdateDbgValueInst(DVIRec, NewLocationOps, NewExpr); + LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " << *DVIRec.DVI << "\n"); + return true; } +/// Obtain an expression for the iteration count, then attempt to salvage the +/// dbg.value intrinsics. static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, - SmallVector &DVIToUpdate) { + SmallVector, 2> &DVIToUpdate) { if (DVIToUpdate.empty()) return; @@ -6213,49 +6473,22 @@ DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, if (!IVAddRec->isAffine()) return; + // Prevent translation using excessive resources. if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize) return; // The iteration count is required to recover location values. SCEVDbgValueBuilder IterCountExpr; - IterCountExpr.pushValue(LSRInductionVar); + IterCountExpr.pushLocation(LSRInductionVar); if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE)) return; LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar << '\n'); - // Needn't salvage if the location op hasn't been undef'd by LSR. for (auto &DVIRec : DVIToUpdate) { - if (!DVIRec.DVI->isUndef()) - continue; - - // Some DVIs that were single location-op when cached are now multi-op, - // due to LSR optimisations. However, multi-op salvaging is not yet - // supported by SCEV salvaging. But, we can attempt a salvage by restoring - // the pre-LSR single-op expression. - if (DVIRec.DVI->hasArgList()) { - if (!DVIRec.DVI->getVariableLocationOp(0)) - continue; - llvm::Type *Ty = DVIRec.DVI->getVariableLocationOp(0)->getType(); - DVIRec.DVI->setRawLocation( - llvm::ValueAsMetadata::get(UndefValue::get(Ty))); - DVIRec.DVI->setExpression(DVIRec.Expr); - } - - LLVM_DEBUG(dbgs() << "scev-salvage: value to recover SCEV: " - << *DVIRec.SCEV << '\n'); - - // Create a simple expression if the IV and value to salvage SCEVs - // start values differ by only a constant value. - if (Optional Offset = - SE.computeConstantDifference(DVIRec.SCEV, SCEVInductionVar)) { - if (Offset.getValue().getMinSignedBits() <= 64) - RewriteDVIUsingOffset(DVIRec, *LSRInductionVar, - Offset.getValue().getSExtValue()); - } else { - RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE); - } + SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar, + IterCountExpr); } } } @@ -6263,39 +6496,53 @@ DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, /// Identify and cache salvageable DVI locations and expressions along with the /// corresponding SCEV(s). Also ensure that the DVI is not deleted between /// cacheing and salvaging. -static void -DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, - SmallVector &SalvageableDVISCEVs, - SmallSet, 2> &DVIHandles) { +static void DbgGatherSalvagableDVI( + Loop *L, ScalarEvolution &SE, + SmallVector, 2> &SalvageableDVISCEVs, + SmallSet, 2> &DVIHandles) { for (auto &B : L->getBlocks()) { for (auto &I : *B) { auto DVI = dyn_cast(&I); if (!DVI) continue; - + // Ensure that if any location op is undef that the dbg.vlue is not + // cached. if (DVI->isUndef()) continue; - if (DVI->hasArgList()) - continue; + // Check that the location op SCEVs are suitable for translation to + // DIExpression. + const auto &HasTranslatableLocationOps = + [&](const DbgValueInst *DVI) -> bool { + for (const auto LocOp : DVI->location_ops()) { + if (!LocOp) + return false; - if (!DVI->getVariableLocationOp(0) || - !SE.isSCEVable(DVI->getVariableLocationOp(0)->getType())) - continue; + if (!SE.isSCEVable(LocOp->getType())) + return false; - // SCEVUnknown wraps an llvm::Value, it does not have a start and stride. - // Therefore no translation to DIExpression is performed. - const SCEV *S = SE.getSCEV(DVI->getVariableLocationOp(0)); - if (isa(S)) - continue; + const SCEV *S = SE.getSCEV(LocOp); + if (SE.containsUndefs(S)) + return false; + } + return true; + }; - // Avoid wasting resources generating an expression containing undef. - if (SE.containsUndefs(S)) + if (!HasTranslatableLocationOps(DVI)) continue; - SalvageableDVISCEVs.push_back( - {DVI, DVI->getExpression(), DVI->getRawLocation(), - SE.getSCEV(DVI->getVariableLocationOp(0))}); + std::unique_ptr NewRec = + std::make_unique(DVI); + // Each location Op may need a SCEVDbgValueBuilder in order to recover it. + // Pre-allocating a vector will enable quick lookups of the builder later + // during the salvage. + NewRec->RecoveryExprs.resize(DVI->getNumVariableLocationOps()); + for (const auto LocOp : DVI->location_ops()) { + NewRec->SCEVs.push_back(SE.getSCEV(LocOp)); + NewRec->LocationOps.push_back(LocOp); + NewRec->HadLocationArgList = DVI->hasArgList(); + } + SalvageableDVISCEVs.push_back(std::move(NewRec)); DVIHandles.insert(DVI); } } @@ -6344,9 +6591,9 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, // Debug preservation - before we start removing anything identify which DVI // meet the salvageable criteria and store their DIExpression and SCEVs. - SmallVector SalvageableDVI; + SmallVector, 2> SalvageableDVIRecords; SmallSet, 2> DVIHandles; - DbgGatherSalvagableDVI(L, SE, SalvageableDVI, DVIHandles); + DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords, DVIHandles); bool Changed = false; std::unique_ptr MSSAU; @@ -6375,8 +6622,26 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); } } + // LSR may at times remove all uses of an induction variable from a loop. + // The only remaining use is the PHI in the exit block. + // When this is the case, if the exit value of the IV can be calculated using + // SCEV, we can replace the exit block PHI with the final value of the IV and + // skip the updates in each loop iteration. + if (L->isRecursivelyLCSSAForm(DT, LI) && LoopExitValHasSingleUse(L)) { + SmallVector DeadInsts; + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + SCEVExpander Rewriter(SE, DL, "lsr", false); + int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT, + OnlyCheapRepl, DeadInsts); + if (Rewrites) { + Changed = true; + RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI, + MSSAU.get()); + DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); + } + } - if (SalvageableDVI.empty()) + if (SalvageableDVIRecords.empty()) return Changed; // Obtain relevant IVs and attempt to rewrite the salvageable DVIs with @@ -6384,13 +6649,16 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, // TODO: Allow for multiple IV references for nested AddRecSCEVs for (auto &L : LI) { if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer)) - DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVI); + DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVIRecords); else { LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV " "could not be identified.\n"); } } + for (auto &Rec : SalvageableDVIRecords) + Rec->clear(); + SalvageableDVIRecords.clear(); DVIHandles.clear(); return Changed; } diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index 1ecbb86724e1..8c2868563227 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -42,10 +43,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/LCSSA.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/LoopPeel.h" -#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include @@ -331,14 +330,23 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, SmallPtrSet EphValues; CodeMetrics::collectEphemeralValues(L, &AC, EphValues); Loop *SubLoop = L->getSubLoops()[0]; - unsigned InnerLoopSize = + InstructionCost InnerLoopSizeIC = ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable, Convergent, TTI, EphValues, UP.BEInsns); - unsigned OuterLoopSize = + InstructionCost OuterLoopSizeIC = ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, EphValues, UP.BEInsns); - LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSize << "\n"); - LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n"); + LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSizeIC << "\n"); + LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSizeIC << "\n"); + + if (!InnerLoopSizeIC.isValid() || !OuterLoopSizeIC.isValid()) { + LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions" + << " with invalid cost.\n"); + return LoopUnrollResult::Unmodified; + } + unsigned InnerLoopSize = *InnerLoopSizeIC.getValue(); + unsigned OuterLoopSize = *OuterLoopSizeIC.getValue(); + if (NotDuplicatable) { LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable " "instructions.\n"); @@ -364,7 +372,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, Optional NewInnerEpilogueLoopID = makeFollowupLoopID( OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupRemainderInner}); - if (NewInnerEpilogueLoopID.hasValue()) + if (NewInnerEpilogueLoopID) SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue()); // Find trip count and trip multiple @@ -394,14 +402,14 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, Optional NewOuterEpilogueLoopID = makeFollowupLoopID( OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupRemainderOuter}); - if (NewOuterEpilogueLoopID.hasValue()) + if (NewOuterEpilogueLoopID) EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue()); } Optional NewInnerLoopID = makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupInner}); - if (NewInnerLoopID.hasValue()) + if (NewInnerLoopID) SubLoop->setLoopID(NewInnerLoopID.getValue()); else SubLoop->setLoopID(OrigSubLoopID); @@ -410,7 +418,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, Optional NewOuterLoopID = makeFollowupLoopID( OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter}); - if (NewOuterLoopID.hasValue()) { + if (NewOuterLoopID) { L->setLoopID(NewOuterLoopID.getValue()); // Do not setLoopAlreadyUnrolled if a followup was given. diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 9beb2281cf0f..fda86afe5f9d 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -133,7 +132,7 @@ static cl::opt UnrollAllowRemainder( "when unrolling a loop.")); static cl::opt - UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, + UnrollRuntime("unroll-runtime", cl::Hidden, cl::desc("Unroll loops with run-time trip counts")); static cl::opt UnrollMaxUpperBound( @@ -254,19 +253,19 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze; // Apply user values provided by argument - if (UserThreshold.hasValue()) { + if (UserThreshold) { UP.Threshold = *UserThreshold; UP.PartialThreshold = *UserThreshold; } - if (UserCount.hasValue()) + if (UserCount) UP.Count = *UserCount; - if (UserAllowPartial.hasValue()) + if (UserAllowPartial) UP.Partial = *UserAllowPartial; - if (UserRuntime.hasValue()) + if (UserRuntime) UP.Runtime = *UserRuntime; - if (UserUpperBound.hasValue()) + if (UserUpperBound) UP.UpperBound = *UserUpperBound; - if (UserFullUnrollMaxCount.hasValue()) + if (UserFullUnrollMaxCount) UP.FullUnrollMaxCount = *UserFullUnrollMaxCount; return UP; @@ -664,7 +663,7 @@ static Optional analyzeLoopUnrollCost( } /// ApproximateLoopSize - Approximate the size of the loop. -unsigned llvm::ApproximateLoopSize( +InstructionCost llvm::ApproximateLoopSize( const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl &EphValues, unsigned BEInsns) { @@ -675,7 +674,7 @@ unsigned llvm::ApproximateLoopSize( NotDuplicatable = Metrics.notDuplicatable; Convergent = Metrics.convergent; - unsigned LoopSize = Metrics.NumInsts; + InstructionCost LoopSize = Metrics.NumInsts; // Don't allow an estimate of size zero. This would allows unrolling of loops // with huge iteration counts, which is a compile time problem even if it's @@ -683,7 +682,9 @@ unsigned llvm::ApproximateLoopSize( // that each loop has at least three instructions (likely a conditional // branch, a comparison feeding that branch, and some kind of loop increment // feeding that comparison instruction). - LoopSize = std::max(LoopSize, BEInsns + 1); + if (LoopSize.isValid() && *LoopSize.getValue() < BEInsns + 1) + // This is an open coded max() on InstructionCost + LoopSize = BEInsns + 1; return LoopSize; } @@ -788,15 +789,13 @@ shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, // 2nd priority is unroll count set by pragma. if (PInfo.PragmaCount > 0) { - if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) && - UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold) + if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0))) return PInfo.PragmaCount; } - if (PInfo.PragmaFullUnroll && TripCount != 0) { - if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold) - return TripCount; - } + if (PInfo.PragmaFullUnroll && TripCount != 0) + return TripCount; + // if didn't return until here, should continue to other priorties return None; } @@ -912,7 +911,7 @@ bool llvm::computeUnrollCount( if (PP.PeelCount) { if (UnrollCount.getNumOccurrences() > 0) { report_fatal_error("Cannot specify both explicit peel count and " - "explicit unroll count"); + "explicit unroll count", /*GenCrashDiag=*/false); } UP.Count = 1; UP.Runtime = false; @@ -1192,10 +1191,18 @@ static LoopUnrollResult tryToUnrollLoop( SmallPtrSet EphValues; CodeMetrics::collectEphemeralValues(L, &AC, EphValues); - unsigned LoopSize = + InstructionCost LoopSizeIC = ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, EphValues, UP.BEInsns); - LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); + LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSizeIC << "\n"); + + if (!LoopSizeIC.isValid()) { + LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions" + << " with invalid cost.\n"); + return LoopUnrollResult::Unmodified; + } + unsigned LoopSize = *LoopSizeIC.getValue(); + if (NotDuplicatable) { LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" << " instructions.\n"); @@ -1316,7 +1323,7 @@ static LoopUnrollResult tryToUnrollLoop( Optional RemainderLoopID = makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); - if (RemainderLoopID.hasValue()) + if (RemainderLoopID) RemainderLoop->setLoopID(RemainderLoopID.getValue()); } @@ -1324,7 +1331,7 @@ static LoopUnrollResult tryToUnrollLoop( Optional NewLoopID = makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupUnrolled}); - if (NewLoopID.hasValue()) { + if (NewLoopID) { L->setLoopID(NewLoopID.getValue()); // Do not setLoopAlreadyUnrolled if loop attributes have been specified @@ -1548,8 +1555,12 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, PreservedAnalyses LoopUnrollPass::run(Function &F, FunctionAnalysisManager &AM) { - auto &SE = AM.getResult(F); auto &LI = AM.getResult(F); + // There are no loops in the function. Return before computing other expensive + // analyses. + if (LI.empty()) + return PreservedAnalyses::all(); + auto &SE = AM.getResult(F); auto &TTI = AM.getResult(F); auto &DT = AM.getResult(F); auto &AC = AM.getResult(F); diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp deleted file mode 100644 index 76bb5497c2c2..000000000000 --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ /dev/null @@ -1,1774 +0,0 @@ -//===- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop -------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass transforms loops that contain branches on loop-invariant conditions -// to multiple loops. For example, it turns the left into the right code: -// -// for (...) if (lic) -// A for (...) -// if (lic) A; B; C -// B else -// C for (...) -// A; C -// -// This can increase the size of the code exponentially (doubling it every time -// a loop is unswitched) so we only unswitch if the resultant code will be -// smaller than a threshold. -// -// This pass expects LICM to be run before it to hoist invariant conditions out -// of the loop, to make the unswitching opportunity obvious. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LazyBlockFrequencyInfo.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/MemorySSAUpdater.h" -#include "llvm/Analysis/MustExecute.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include -#include -#include -#include -#include -#include -#include - -using namespace llvm; - -#define DEBUG_TYPE "loop-unswitch" - -STATISTIC(NumBranches, "Number of branches unswitched"); -STATISTIC(NumSwitches, "Number of switches unswitched"); -STATISTIC(NumGuards, "Number of guards unswitched"); -STATISTIC(NumSelects , "Number of selects unswitched"); -STATISTIC(NumTrivial , "Number of unswitches that are trivial"); -STATISTIC(NumSimplify, "Number of simplifications of unswitched code"); -STATISTIC(TotalInsts, "Total number of instructions analyzed"); - -// The specific value of 100 here was chosen based only on intuition and a -// few specific examples. -static cl::opt -Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), - cl::init(100), cl::Hidden); - -static cl::opt - MSSAThreshold("loop-unswitch-memoryssa-threshold", - cl::desc("Max number of memory uses to explore during " - "partial unswitching analysis"), - cl::init(100), cl::Hidden); - -namespace { - - class LUAnalysisCache { - using UnswitchedValsMap = - DenseMap>; - using UnswitchedValsIt = UnswitchedValsMap::iterator; - - struct LoopProperties { - unsigned CanBeUnswitchedCount; - unsigned WasUnswitchedCount; - unsigned SizeEstimation; - UnswitchedValsMap UnswitchedVals; - }; - - // Here we use std::map instead of DenseMap, since we need to keep valid - // LoopProperties pointer for current loop for better performance. - using LoopPropsMap = std::map; - using LoopPropsMapIt = LoopPropsMap::iterator; - - LoopPropsMap LoopsProperties; - UnswitchedValsMap *CurLoopInstructions = nullptr; - LoopProperties *CurrentLoopProperties = nullptr; - - // A loop unswitching with an estimated cost above this threshold - // is not performed. MaxSize is turned into unswitching quota for - // the current loop, and reduced correspondingly, though note that - // the quota is returned by releaseMemory() when the loop has been - // processed, so that MaxSize will return to its previous - // value. So in most cases MaxSize will equal the Threshold flag - // when a new loop is processed. An exception to that is that - // MaxSize will have a smaller value while processing nested loops - // that were introduced due to loop unswitching of an outer loop. - // - // FIXME: The way that MaxSize works is subtle and depends on the - // pass manager processing loops and calling releaseMemory() in a - // specific order. It would be good to find a more straightforward - // way of doing what MaxSize does. - unsigned MaxSize; - - public: - LUAnalysisCache() : MaxSize(Threshold) {} - - // Analyze loop. Check its size, calculate is it possible to unswitch - // it. Returns true if we can unswitch this loop. - bool countLoop(const Loop *L, const TargetTransformInfo &TTI, - AssumptionCache *AC); - - // Clean all data related to given loop. - void forgetLoop(const Loop *L); - - // Mark case value as unswitched. - // Since SI instruction can be partly unswitched, in order to avoid - // extra unswitching in cloned loops keep track all unswitched values. - void setUnswitched(const SwitchInst *SI, const Value *V); - - // Check was this case value unswitched before or not. - bool isUnswitched(const SwitchInst *SI, const Value *V); - - // Returns true if another unswitching could be done within the cost - // threshold. - bool costAllowsUnswitching(); - - // Clone all loop-unswitch related loop properties. - // Redistribute unswitching quotas. - // Note, that new loop data is stored inside the VMap. - void cloneData(const Loop *NewLoop, const Loop *OldLoop, - const ValueToValueMapTy &VMap); - }; - - class LoopUnswitch : public LoopPass { - LoopInfo *LI; // Loop information - LPPassManager *LPM; - AssumptionCache *AC; - - // Used to check if second loop needs processing after - // rewriteLoopBodyWithConditionConstant rewrites first loop. - std::vector LoopProcessWorklist; - - LUAnalysisCache BranchesInfo; - - bool OptimizeForSize; - bool RedoLoop = false; - - Loop *CurrentLoop = nullptr; - DominatorTree *DT = nullptr; - MemorySSA *MSSA = nullptr; - AAResults *AA = nullptr; - std::unique_ptr MSSAU; - BasicBlock *LoopHeader = nullptr; - BasicBlock *LoopPreheader = nullptr; - - bool SanitizeMemory; - SimpleLoopSafetyInfo SafetyInfo; - - // LoopBlocks contains all of the basic blocks of the loop, including the - // preheader of the loop, the body of the loop, and the exit blocks of the - // loop, in that order. - std::vector LoopBlocks; - // NewBlocks contained cloned copy of basic blocks from LoopBlocks. - std::vector NewBlocks; - - bool HasBranchDivergence; - - public: - static char ID; // Pass ID, replacement for typeid - - explicit LoopUnswitch(bool Os = false, bool HasBranchDivergence = false) - : LoopPass(ID), OptimizeForSize(Os), - HasBranchDivergence(HasBranchDivergence) { - initializeLoopUnswitchPass(*PassRegistry::getPassRegistry()); - } - - bool runOnLoop(Loop *L, LPPassManager &LPM) override; - bool processCurrentLoop(); - bool isUnreachableDueToPreviousUnswitching(BasicBlock *); - - /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG. - /// - void getAnalysisUsage(AnalysisUsage &AU) const override { - // Lazy BFI and BPI are marked as preserved here so Loop Unswitching - // can remain part of the same loop pass as LICM - AU.addPreserved(); - AU.addPreserved(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - if (HasBranchDivergence) - AU.addRequired(); - getLoopAnalysisUsage(AU); - } - - private: - void releaseMemory() override { BranchesInfo.forgetLoop(CurrentLoop); } - - void initLoopData() { - LoopHeader = CurrentLoop->getHeader(); - LoopPreheader = CurrentLoop->getLoopPreheader(); - } - - /// Split all of the edges from inside the loop to their exit blocks. - /// Update the appropriate Phi nodes as we do so. - void splitExitEdges(Loop *L, - const SmallVectorImpl &ExitBlocks); - - bool tryTrivialLoopUnswitch(bool &Changed); - - bool unswitchIfProfitable(Value *LoopCond, Constant *Val, - Instruction *TI = nullptr, - ArrayRef ToDuplicate = {}); - void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, - BasicBlock *ExitBlock, Instruction *TI); - void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L, - Instruction *TI, - ArrayRef ToDuplicate = {}); - - void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, - Constant *Val, bool IsEqual); - - void - emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, - BasicBlock *TrueDest, BasicBlock *FalseDest, - BranchInst *OldBranch, Instruction *TI, - ArrayRef ToDuplicate = {}); - - void simplifyCode(std::vector &Worklist, Loop *L); - - /// Given that the Invariant is not equal to Val. Simplify instructions - /// in the loop. - Value *simplifyInstructionWithNotEqual(Instruction *Inst, Value *Invariant, - Constant *Val); - }; - -} // end anonymous namespace - -// Analyze loop. Check its size, calculate is it possible to unswitch -// it. Returns true if we can unswitch this loop. -bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI, - AssumptionCache *AC) { - LoopPropsMapIt PropsIt; - bool Inserted; - std::tie(PropsIt, Inserted) = - LoopsProperties.insert(std::make_pair(L, LoopProperties())); - - LoopProperties &Props = PropsIt->second; - - if (Inserted) { - // New loop. - - // Limit the number of instructions to avoid causing significant code - // expansion, and the number of basic blocks, to avoid loops with - // large numbers of branches which cause loop unswitching to go crazy. - // This is a very ad-hoc heuristic. - - SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(L, AC, EphValues); - - // FIXME: This is overly conservative because it does not take into - // consideration code simplification opportunities and code that can - // be shared by the resultant unswitched loops. - CodeMetrics Metrics; - for (BasicBlock *BB : L->blocks()) - Metrics.analyzeBasicBlock(BB, TTI, EphValues); - - Props.SizeEstimation = Metrics.NumInsts; - Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); - Props.WasUnswitchedCount = 0; - MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; - - if (Metrics.notDuplicatable) { - LLVM_DEBUG(dbgs() << "NOT unswitching loop %" << L->getHeader()->getName() - << ", contents cannot be " - << "duplicated!\n"); - return false; - } - } - - // Be careful. This links are good only before new loop addition. - CurrentLoopProperties = &Props; - CurLoopInstructions = &Props.UnswitchedVals; - - return true; -} - -// Clean all data related to given loop. -void LUAnalysisCache::forgetLoop(const Loop *L) { - LoopPropsMapIt LIt = LoopsProperties.find(L); - - if (LIt != LoopsProperties.end()) { - LoopProperties &Props = LIt->second; - MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) * - Props.SizeEstimation; - LoopsProperties.erase(LIt); - } - - CurrentLoopProperties = nullptr; - CurLoopInstructions = nullptr; -} - -// Mark case value as unswitched. -// Since SI instruction can be partly unswitched, in order to avoid -// extra unswitching in cloned loops keep track all unswitched values. -void LUAnalysisCache::setUnswitched(const SwitchInst *SI, const Value *V) { - (*CurLoopInstructions)[SI].insert(V); -} - -// Check was this case value unswitched before or not. -bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) { - return (*CurLoopInstructions)[SI].count(V); -} - -bool LUAnalysisCache::costAllowsUnswitching() { - return CurrentLoopProperties->CanBeUnswitchedCount > 0; -} - -// Clone all loop-unswitch related loop properties. -// Redistribute unswitching quotas. -// Note, that new loop data is stored inside the VMap. -void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop, - const ValueToValueMapTy &VMap) { - LoopProperties &NewLoopProps = LoopsProperties[NewLoop]; - LoopProperties &OldLoopProps = *CurrentLoopProperties; - UnswitchedValsMap &Insts = OldLoopProps.UnswitchedVals; - - // Reallocate "can-be-unswitched quota" - - --OldLoopProps.CanBeUnswitchedCount; - ++OldLoopProps.WasUnswitchedCount; - NewLoopProps.WasUnswitchedCount = 0; - unsigned Quota = OldLoopProps.CanBeUnswitchedCount; - NewLoopProps.CanBeUnswitchedCount = Quota / 2; - OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2; - - NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation; - - // Clone unswitched values info: - // for new loop switches we clone info about values that was - // already unswitched and has redundant successors. - for (const auto &I : Insts) { - const SwitchInst *OldInst = I.first; - Value *NewI = VMap.lookup(OldInst); - const SwitchInst *NewInst = cast_or_null(NewI); - assert(NewInst && "All instructions that are in SrcBB must be in VMap."); - - NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst]; - } -} - -char LoopUnswitch::ID = 0; - -INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops", - false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(LoopPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) -INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) -INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops", - false, false) - -Pass *llvm::createLoopUnswitchPass(bool Os, bool HasBranchDivergence) { - return new LoopUnswitch(Os, HasBranchDivergence); -} - -/// Operator chain lattice. -enum OperatorChain { - OC_OpChainNone, ///< There is no operator. - OC_OpChainOr, ///< There are only ORs. - OC_OpChainAnd, ///< There are only ANDs. - OC_OpChainMixed ///< There are ANDs and ORs. -}; - -/// Cond is a condition that occurs in L. If it is invariant in the loop, or has -/// an invariant piece, return the invariant. Otherwise, return null. -// -/// NOTE: findLIVLoopCondition will not return a partial LIV by walking up a -/// mixed operator chain, as we can not reliably find a value which will -/// simplify the operator chain. If the chain is AND-only or OR-only, we can use -/// 0 or ~0 to simplify the chain. -/// -/// NOTE: In case a partial LIV and a mixed operator chain, we may be able to -/// simplify the condition itself to a loop variant condition, but at the -/// cost of creating an entirely new loop. -static Value *findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, - OperatorChain &ParentChain, - DenseMap &Cache, - MemorySSAUpdater *MSSAU) { - auto CacheIt = Cache.find(Cond); - if (CacheIt != Cache.end()) - return CacheIt->second; - - // We started analyze new instruction, increment scanned instructions counter. - ++TotalInsts; - - // We can never unswitch on vector conditions. - if (Cond->getType()->isVectorTy()) - return nullptr; - - // Constants should be folded, not unswitched on! - if (isa(Cond)) return nullptr; - - // TODO: Handle: br (VARIANT|INVARIANT). - - // Hoist simple values out. - if (L->makeLoopInvariant(Cond, Changed, nullptr, MSSAU)) { - Cache[Cond] = Cond; - return Cond; - } - - // Walk up the operator chain to find partial invariant conditions. - if (BinaryOperator *BO = dyn_cast(Cond)) - if (BO->getOpcode() == Instruction::And || - BO->getOpcode() == Instruction::Or) { - // Given the previous operator, compute the current operator chain status. - OperatorChain NewChain; - switch (ParentChain) { - case OC_OpChainNone: - NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd : - OC_OpChainOr; - break; - case OC_OpChainOr: - NewChain = BO->getOpcode() == Instruction::Or ? OC_OpChainOr : - OC_OpChainMixed; - break; - case OC_OpChainAnd: - NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd : - OC_OpChainMixed; - break; - case OC_OpChainMixed: - NewChain = OC_OpChainMixed; - break; - } - - // If we reach a Mixed state, we do not want to keep walking up as we can not - // reliably find a value that will simplify the chain. With this check, we - // will return null on the first sight of mixed chain and the caller will - // either backtrack to find partial LIV in other operand or return null. - if (NewChain != OC_OpChainMixed) { - // Update the current operator chain type before we search up the chain. - ParentChain = NewChain; - // If either the left or right side is invariant, we can unswitch on this, - // which will cause the branch to go away in one loop and the condition to - // simplify in the other one. - if (Value *LHS = findLIVLoopCondition(BO->getOperand(0), L, Changed, - ParentChain, Cache, MSSAU)) { - Cache[Cond] = LHS; - return LHS; - } - // We did not manage to find a partial LIV in operand(0). Backtrack and try - // operand(1). - ParentChain = NewChain; - if (Value *RHS = findLIVLoopCondition(BO->getOperand(1), L, Changed, - ParentChain, Cache, MSSAU)) { - Cache[Cond] = RHS; - return RHS; - } - } - } - - Cache[Cond] = nullptr; - return nullptr; -} - -/// Cond is a condition that occurs in L. If it is invariant in the loop, or has -/// an invariant piece, return the invariant along with the operator chain type. -/// Otherwise, return null. -static std::pair -findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, - MemorySSAUpdater *MSSAU) { - DenseMap Cache; - OperatorChain OpChain = OC_OpChainNone; - Value *FCond = findLIVLoopCondition(Cond, L, Changed, OpChain, Cache, MSSAU); - - // In case we do find a LIV, it can not be obtained by walking up a mixed - // operator chain. - assert((!FCond || OpChain != OC_OpChainMixed) && - "Do not expect a partial LIV with mixed operator chain"); - return {FCond, OpChain}; -} - -bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) { - if (skipLoop(L)) - return false; - - AC = &getAnalysis().getAssumptionCache( - *L->getHeader()->getParent()); - LI = &getAnalysis().getLoopInfo(); - LPM = &LPMRef; - DT = &getAnalysis().getDomTree(); - AA = &getAnalysis().getAAResults(); - MSSA = &getAnalysis().getMSSA(); - MSSAU = std::make_unique(MSSA); - CurrentLoop = L; - Function *F = CurrentLoop->getHeader()->getParent(); - - SanitizeMemory = F->hasFnAttribute(Attribute::SanitizeMemory); - if (SanitizeMemory) - SafetyInfo.computeLoopSafetyInfo(L); - - if (VerifyMemorySSA) - MSSA->verifyMemorySSA(); - - bool Changed = false; - do { - assert(CurrentLoop->isLCSSAForm(*DT)); - if (VerifyMemorySSA) - MSSA->verifyMemorySSA(); - RedoLoop = false; - Changed |= processCurrentLoop(); - } while (RedoLoop); - - if (VerifyMemorySSA) - MSSA->verifyMemorySSA(); - - return Changed; -} - -// Return true if the BasicBlock BB is unreachable from the loop header. -// Return false, otherwise. -bool LoopUnswitch::isUnreachableDueToPreviousUnswitching(BasicBlock *BB) { - auto *Node = DT->getNode(BB)->getIDom(); - BasicBlock *DomBB = Node->getBlock(); - while (CurrentLoop->contains(DomBB)) { - BranchInst *BInst = dyn_cast(DomBB->getTerminator()); - - Node = DT->getNode(DomBB)->getIDom(); - DomBB = Node->getBlock(); - - if (!BInst || !BInst->isConditional()) - continue; - - Value *Cond = BInst->getCondition(); - if (!isa(Cond)) - continue; - - BasicBlock *UnreachableSucc = - Cond == ConstantInt::getTrue(Cond->getContext()) - ? BInst->getSuccessor(1) - : BInst->getSuccessor(0); - - if (DT->dominates(UnreachableSucc, BB)) - return true; - } - return false; -} - -/// FIXME: Remove this workaround when freeze related patches are done. -/// LoopUnswitch and Equality propagation in GVN have discrepancy about -/// whether branch on undef/poison has undefine behavior. Here it is to -/// rule out some common cases that we found such discrepancy already -/// causing problems. Detail could be found in PR31652. Note if the -/// func returns true, it is unsafe. But if it is false, it doesn't mean -/// it is necessarily safe. -static bool equalityPropUnSafe(Value &LoopCond) { - ICmpInst *CI = dyn_cast(&LoopCond); - if (!CI || !CI->isEquality()) - return false; - - Value *LHS = CI->getOperand(0); - Value *RHS = CI->getOperand(1); - if (isa(LHS) || isa(RHS)) - return true; - - auto HasUndefInPHI = [](PHINode &PN) { - for (Value *Opd : PN.incoming_values()) { - if (isa(Opd)) - return true; - } - return false; - }; - PHINode *LPHI = dyn_cast(LHS); - PHINode *RPHI = dyn_cast(RHS); - if ((LPHI && HasUndefInPHI(*LPHI)) || (RPHI && HasUndefInPHI(*RPHI))) - return true; - - auto HasUndefInSelect = [](SelectInst &SI) { - if (isa(SI.getTrueValue()) || - isa(SI.getFalseValue())) - return true; - return false; - }; - SelectInst *LSI = dyn_cast(LHS); - SelectInst *RSI = dyn_cast(RHS); - if ((LSI && HasUndefInSelect(*LSI)) || (RSI && HasUndefInSelect(*RSI))) - return true; - return false; -} - -/// Do actual work and unswitch loop if possible and profitable. -bool LoopUnswitch::processCurrentLoop() { - bool Changed = false; - - initLoopData(); - - // If LoopSimplify was unable to form a preheader, don't do any unswitching. - if (!LoopPreheader) - return false; - - // Loops with indirectbr cannot be cloned. - if (!CurrentLoop->isSafeToClone()) - return false; - - // Without dedicated exits, splitting the exit edge may fail. - if (!CurrentLoop->hasDedicatedExits()) - return false; - - LLVMContext &Context = LoopHeader->getContext(); - - // Analyze loop cost, and stop unswitching if loop content can not be duplicated. - if (!BranchesInfo.countLoop( - CurrentLoop, - getAnalysis().getTTI( - *CurrentLoop->getHeader()->getParent()), - AC)) - return false; - - // Try trivial unswitch first before loop over other basic blocks in the loop. - if (tryTrivialLoopUnswitch(Changed)) { - return true; - } - - // Do not do non-trivial unswitch while optimizing for size. - // FIXME: Use Function::hasOptSize(). - if (OptimizeForSize || - LoopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize)) - return Changed; - - // Run through the instructions in the loop, keeping track of three things: - // - // - That we do not unswitch loops containing convergent operations, as we - // might be making them control dependent on the unswitch value when they - // were not before. - // FIXME: This could be refined to only bail if the convergent operation is - // not already control-dependent on the unswitch value. - // - // - That basic blocks in the loop contain invokes whose predecessor edges we - // cannot split. - // - // - The set of guard intrinsics encountered (these are non terminator - // instructions that are also profitable to be unswitched). - - SmallVector Guards; - - for (const auto BB : CurrentLoop->blocks()) { - for (auto &I : *BB) { - auto *CB = dyn_cast(&I); - if (!CB) - continue; - if (CB->isConvergent()) - return Changed; - if (auto *II = dyn_cast(&I)) - if (!II->getUnwindDest()->canSplitPredecessors()) - return Changed; - if (auto *II = dyn_cast(&I)) - if (II->getIntrinsicID() == Intrinsic::experimental_guard) - Guards.push_back(II); - } - } - - for (IntrinsicInst *Guard : Guards) { - Value *LoopCond = findLIVLoopCondition(Guard->getOperand(0), CurrentLoop, - Changed, MSSAU.get()) - .first; - if (LoopCond && - unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { - // NB! Unswitching (if successful) could have erased some of the - // instructions in Guards leaving dangling pointers there. This is fine - // because we're returning now, and won't look at Guards again. - ++NumGuards; - return true; - } - } - - // Loop over all of the basic blocks in the loop. If we find an interior - // block that is branching on a loop-invariant condition, we can unswitch this - // loop. - for (Loop::block_iterator I = CurrentLoop->block_begin(), - E = CurrentLoop->block_end(); - I != E; ++I) { - Instruction *TI = (*I)->getTerminator(); - - // Unswitching on a potentially uninitialized predicate is not - // MSan-friendly. Limit this to the cases when the original predicate is - // guaranteed to execute, to avoid creating a use-of-uninitialized-value - // in the code that did not have one. - // This is a workaround for the discrepancy between LLVM IR and MSan - // semantics. See PR28054 for more details. - if (SanitizeMemory && - !SafetyInfo.isGuaranteedToExecute(*TI, DT, CurrentLoop)) - continue; - - if (BranchInst *BI = dyn_cast(TI)) { - // Some branches may be rendered unreachable because of previous - // unswitching. - // Unswitch only those branches that are reachable. - if (isUnreachableDueToPreviousUnswitching(*I)) - continue; - - // If this isn't branching on an invariant condition, we can't unswitch - // it. - if (BI->isConditional()) { - // See if this, or some part of it, is loop invariant. If so, we can - // unswitch on it if we desire. - Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop, - Changed, MSSAU.get()) - .first; - if (LoopCond && !equalityPropUnSafe(*LoopCond) && - unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) { - ++NumBranches; - return true; - } - } - } else if (SwitchInst *SI = dyn_cast(TI)) { - Value *SC = SI->getCondition(); - Value *LoopCond; - OperatorChain OpChain; - std::tie(LoopCond, OpChain) = - findLIVLoopCondition(SC, CurrentLoop, Changed, MSSAU.get()); - - unsigned NumCases = SI->getNumCases(); - if (LoopCond && NumCases) { - // Find a value to unswitch on: - // FIXME: this should chose the most expensive case! - // FIXME: scan for a case with a non-critical edge? - Constant *UnswitchVal = nullptr; - // Find a case value such that at least one case value is unswitched - // out. - if (OpChain == OC_OpChainAnd) { - // If the chain only has ANDs and the switch has a case value of 0. - // Dropping in a 0 to the chain will unswitch out the 0-casevalue. - auto *AllZero = cast(Constant::getNullValue(SC->getType())); - if (BranchesInfo.isUnswitched(SI, AllZero)) - continue; - // We are unswitching 0 out. - UnswitchVal = AllZero; - } else if (OpChain == OC_OpChainOr) { - // If the chain only has ORs and the switch has a case value of ~0. - // Dropping in a ~0 to the chain will unswitch out the ~0-casevalue. - auto *AllOne = cast(Constant::getAllOnesValue(SC->getType())); - if (BranchesInfo.isUnswitched(SI, AllOne)) - continue; - // We are unswitching ~0 out. - UnswitchVal = AllOne; - } else { - assert(OpChain == OC_OpChainNone && - "Expect to unswitch on trivial chain"); - // Do not process same value again and again. - // At this point we have some cases already unswitched and - // some not yet unswitched. Let's find the first not yet unswitched one. - for (auto Case : SI->cases()) { - Constant *UnswitchValCandidate = Case.getCaseValue(); - if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) { - UnswitchVal = UnswitchValCandidate; - break; - } - } - } - - if (!UnswitchVal) - continue; - - if (unswitchIfProfitable(LoopCond, UnswitchVal)) { - ++NumSwitches; - // In case of a full LIV, UnswitchVal is the value we unswitched out. - // In case of a partial LIV, we only unswitch when its an AND-chain - // or OR-chain. In both cases switch input value simplifies to - // UnswitchVal. - BranchesInfo.setUnswitched(SI, UnswitchVal); - return true; - } - } - } - - // Scan the instructions to check for unswitchable values. - for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); - BBI != E; ++BBI) - if (SelectInst *SI = dyn_cast(BBI)) { - Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop, - Changed, MSSAU.get()) - .first; - if (LoopCond && - unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { - ++NumSelects; - return true; - } - } - } - - // Check if there is a header condition that is invariant along the patch from - // either the true or false successors to the header. This allows unswitching - // conditions depending on memory accesses, if there's a path not clobbering - // the memory locations. Check if this transform has been disabled using - // metadata, to avoid unswitching the same loop multiple times. - if (MSSA && - !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) { - if (auto Info = - hasPartialIVCondition(*CurrentLoop, MSSAThreshold, *MSSA, *AA)) { - assert(!Info->InstToDuplicate.empty() && - "need at least a partially invariant condition"); - LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition " - << *Info->InstToDuplicate[0] << "\n"); - - Instruction *TI = CurrentLoop->getHeader()->getTerminator(); - Value *LoopCond = Info->InstToDuplicate[0]; - - // If the partially unswitched path is a no-op and has a single exit - // block, we do not need to do full unswitching. Instead, we can directly - // branch to the exit. - // TODO: Instead of duplicating the checks, we could also just directly - // branch to the exit from the conditional branch in the loop. - if (Info->PathIsNoop) { - if (HasBranchDivergence && - getAnalysis().isDivergent(LoopCond)) { - LLVM_DEBUG(dbgs() << "NOT unswitching loop %" - << CurrentLoop->getHeader()->getName() - << " at non-trivial condition '" - << *Info->KnownValue << "' == " << *LoopCond << "\n" - << ". Condition is divergent.\n"); - return false; - } - - ++NumBranches; - - BasicBlock *TrueDest = LoopHeader; - BasicBlock *FalseDest = Info->ExitForPath; - if (Info->KnownValue->isOneValue()) - std::swap(TrueDest, FalseDest); - - auto *OldBr = - cast(CurrentLoop->getLoopPreheader()->getTerminator()); - emitPreheaderBranchOnCondition(LoopCond, Info->KnownValue, TrueDest, - FalseDest, OldBr, TI, - Info->InstToDuplicate); - delete OldBr; - RedoLoop = false; - return true; - } - - // Otherwise, the path is not a no-op. Run regular unswitching. - if (unswitchIfProfitable(LoopCond, Info->KnownValue, - CurrentLoop->getHeader()->getTerminator(), - Info->InstToDuplicate)) { - ++NumBranches; - RedoLoop = false; - return true; - } - } - } - - return Changed; -} - -/// Check to see if all paths from BB exit the loop with no side effects -/// (including infinite loops). -/// -/// If true, we return true and set ExitBB to the block we -/// exit through. -/// -static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, - BasicBlock *&ExitBB, - std::set &Visited) { - if (!Visited.insert(BB).second) { - // Already visited. Without more analysis, this could indicate an infinite - // loop. - return false; - } - if (!L->contains(BB)) { - // Otherwise, this is a loop exit, this is fine so long as this is the - // first exit. - if (ExitBB) return false; - ExitBB = BB; - return true; - } - - // Otherwise, this is an unvisited intra-loop node. Check all successors. - for (BasicBlock *Succ : successors(BB)) { - // Check to see if the successor is a trivial loop exit. - if (!isTrivialLoopExitBlockHelper(L, Succ, ExitBB, Visited)) - return false; - } - - // Okay, everything after this looks good, check to make sure that this block - // doesn't include any side effects. - for (Instruction &I : *BB) - if (I.mayHaveSideEffects()) - return false; - - return true; -} - -/// Return true if the specified block unconditionally leads to an exit from -/// the specified loop, and has no side-effects in the process. If so, return -/// the block that is exited to, otherwise return null. -static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { - std::set Visited; - Visited.insert(L->getHeader()); // Branches to header make infinite loops. - BasicBlock *ExitBB = nullptr; - if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) - return ExitBB; - return nullptr; -} - -/// We have found that we can unswitch CurrentLoop when LoopCond == Val to -/// simplify the loop. If we decide that this is profitable, -/// unswitch the loop, reprocess the pieces, then return true. -bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val, - Instruction *TI, - ArrayRef ToDuplicate) { - // Check to see if it would be profitable to unswitch current loop. - if (!BranchesInfo.costAllowsUnswitching()) { - LLVM_DEBUG(dbgs() << "NOT unswitching loop %" - << CurrentLoop->getHeader()->getName() - << " at non-trivial condition '" << *Val - << "' == " << *LoopCond << "\n" - << ". Cost too high.\n"); - return false; - } - if (HasBranchDivergence && - getAnalysis().isDivergent(LoopCond)) { - LLVM_DEBUG(dbgs() << "NOT unswitching loop %" - << CurrentLoop->getHeader()->getName() - << " at non-trivial condition '" << *Val - << "' == " << *LoopCond << "\n" - << ". Condition is divergent.\n"); - return false; - } - - unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate); - return true; -} - -/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, -/// otherwise branch to FalseDest. Insert the code immediately before OldBranch -/// and remove (but not erase!) it from the function. -void LoopUnswitch::emitPreheaderBranchOnCondition( - Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, - BranchInst *OldBranch, Instruction *TI, - ArrayRef ToDuplicate) { - assert(OldBranch->isUnconditional() && "Preheader is not split correctly"); - assert(TrueDest != FalseDest && "Branch targets should be different"); - - // Insert a conditional branch on LIC to the two preheaders. The original - // code is the true version and the new code is the false version. - Value *BranchVal = LIC; - bool Swapped = false; - - if (!ToDuplicate.empty()) { - ValueToValueMapTy Old2New; - for (Instruction *I : reverse(ToDuplicate)) { - auto *New = I->clone(); - New->insertBefore(OldBranch); - RemapInstruction(New, Old2New, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - Old2New[I] = New; - - if (MSSAU) { - MemorySSA *MSSA = MSSAU->getMemorySSA(); - auto *MemA = dyn_cast_or_null(MSSA->getMemoryAccess(I)); - if (!MemA) - continue; - - Loop *L = LI->getLoopFor(I->getParent()); - auto *DefiningAccess = MemA->getDefiningAccess(); - // Get the first defining access before the loop. - while (L->contains(DefiningAccess->getBlock())) { - // If the defining access is a MemoryPhi, get the incoming - // value for the pre-header as defining access. - if (auto *MemPhi = dyn_cast(DefiningAccess)) { - DefiningAccess = - MemPhi->getIncomingValueForBlock(L->getLoopPreheader()); - } else { - DefiningAccess = - cast(DefiningAccess)->getDefiningAccess(); - } - } - MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(), - MemorySSA::BeforeTerminator); - } - } - BranchVal = Old2New[ToDuplicate[0]]; - } else { - - if (!isa(Val) || - Val->getType() != Type::getInt1Ty(LIC->getContext())) - BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); - else if (Val != ConstantInt::getTrue(Val->getContext())) { - // We want to enter the new loop when the condition is true. - std::swap(TrueDest, FalseDest); - Swapped = true; - } - } - - // Old branch will be removed, so save its parent and successor to update the - // DomTree. - auto *OldBranchSucc = OldBranch->getSuccessor(0); - auto *OldBranchParent = OldBranch->getParent(); - - // Insert the new branch. - BranchInst *BI = - IRBuilder<>(OldBranch).CreateCondBr(BranchVal, TrueDest, FalseDest, TI); - if (Swapped) - BI->swapProfMetadata(); - - // Remove the old branch so there is only one branch at the end. This is - // needed to perform DomTree's internal DFS walk on the function's CFG. - OldBranch->removeFromParent(); - - // Inform the DT about the new branch. - if (DT) { - // First, add both successors. - SmallVector Updates; - if (TrueDest != OldBranchSucc) - Updates.push_back({DominatorTree::Insert, OldBranchParent, TrueDest}); - if (FalseDest != OldBranchSucc) - Updates.push_back({DominatorTree::Insert, OldBranchParent, FalseDest}); - // If both of the new successors are different from the old one, inform the - // DT that the edge was deleted. - if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) { - Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc}); - } - - if (MSSAU) - MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true); - else - DT->applyUpdates(Updates); - } - - // If either edge is critical, split it. This helps preserve LoopSimplify - // form for enclosing loops. - auto Options = - CriticalEdgeSplittingOptions(DT, LI, MSSAU.get()).setPreserveLCSSA(); - SplitCriticalEdge(BI, 0, Options); - SplitCriticalEdge(BI, 1, Options); -} - -/// Given a loop that has a trivial unswitchable condition in it (a cond branch -/// from its header block to its latch block, where the path through the loop -/// that doesn't execute its body has no side-effects), unswitch it. This -/// doesn't involve any code duplication, just moving the conditional branch -/// outside of the loop and updating loop info. -void LoopUnswitch::unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, - BasicBlock *ExitBlock, - Instruction *TI) { - LLVM_DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %" - << LoopHeader->getName() << " [" << L->getBlocks().size() - << " blocks] in Function " - << L->getHeader()->getParent()->getName() - << " on cond: " << *Val << " == " << *Cond << "\n"); - // We are going to make essential changes to CFG. This may invalidate cached - // information for L or one of its parent loops in SCEV. - if (auto *SEWP = getAnalysisIfAvailable()) - SEWP->getSE().forgetTopmostLoop(L); - - // First step, split the preheader, so that we know that there is a safe place - // to insert the conditional branch. We will change LoopPreheader to have a - // conditional branch on Cond. - BasicBlock *NewPH = SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get()); - - // Now that we have a place to insert the conditional branch, create a place - // to branch to: this is the exit block out of the loop that we should - // short-circuit to. - - // Split this block now, so that the loop maintains its exit block, and so - // that the jump from the preheader can execute the contents of the exit block - // without actually branching to it (the exit block should be dominated by the - // loop header, not the preheader). - assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); - BasicBlock *NewExit = - SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI, MSSAU.get()); - - // Okay, now we have a position to branch from and a position to branch to, - // insert the new conditional branch. - auto *OldBranch = dyn_cast(LoopPreheader->getTerminator()); - assert(OldBranch && "Failed to split the preheader"); - emitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, OldBranch, TI); - - // emitPreheaderBranchOnCondition removed the OldBranch from the function. - // Delete it, as it is no longer needed. - delete OldBranch; - - // We need to reprocess this loop, it could be unswitched again. - RedoLoop = true; - - // Now that we know that the loop is never entered when this condition is a - // particular value, rewrite the loop with this info. We know that this will - // at least eliminate the old branch. - rewriteLoopBodyWithConditionConstant(L, Cond, Val, /*IsEqual=*/false); - - ++NumTrivial; -} - -/// Check if the first non-constant condition starting from the loop header is -/// a trivial unswitch condition: that is, a condition controls whether or not -/// the loop does anything at all. If it is a trivial condition, unswitching -/// produces no code duplications (equivalently, it produces a simpler loop and -/// a new empty loop, which gets deleted). Therefore always unswitch trivial -/// condition. -bool LoopUnswitch::tryTrivialLoopUnswitch(bool &Changed) { - BasicBlock *CurrentBB = CurrentLoop->getHeader(); - Instruction *CurrentTerm = CurrentBB->getTerminator(); - LLVMContext &Context = CurrentBB->getContext(); - - // If loop header has only one reachable successor (currently via an - // unconditional branch or constant foldable conditional branch, but - // should also consider adding constant foldable switch instruction in - // future), we should keep looking for trivial condition candidates in - // the successor as well. An alternative is to constant fold conditions - // and merge successors into loop header (then we only need to check header's - // terminator). The reason for not doing this in LoopUnswitch pass is that - // it could potentially break LoopPassManager's invariants. Folding dead - // branches could either eliminate the current loop or make other loops - // unreachable. LCSSA form might also not be preserved after deleting - // branches. The following code keeps traversing loop header's successors - // until it finds the trivial condition candidate (condition that is not a - // constant). Since unswitching generates branches with constant conditions, - // this scenario could be very common in practice. - SmallPtrSet Visited; - - while (true) { - // If we exit loop or reach a previous visited block, then - // we can not reach any trivial condition candidates (unfoldable - // branch instructions or switch instructions) and no unswitch - // can happen. Exit and return false. - if (!CurrentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second) - return false; - - // Check if this loop will execute any side-effecting instructions (e.g. - // stores, calls, volatile loads) in the part of the loop that the code - // *would* execute. Check the header first. - for (Instruction &I : *CurrentBB) - if (I.mayHaveSideEffects()) - return false; - - if (BranchInst *BI = dyn_cast(CurrentTerm)) { - if (BI->isUnconditional()) { - CurrentBB = BI->getSuccessor(0); - } else if (BI->getCondition() == ConstantInt::getTrue(Context)) { - CurrentBB = BI->getSuccessor(0); - } else if (BI->getCondition() == ConstantInt::getFalse(Context)) { - CurrentBB = BI->getSuccessor(1); - } else { - // Found a trivial condition candidate: non-foldable conditional branch. - break; - } - } else if (SwitchInst *SI = dyn_cast(CurrentTerm)) { - // At this point, any constant-foldable instructions should have probably - // been folded. - ConstantInt *Cond = dyn_cast(SI->getCondition()); - if (!Cond) - break; - // Find the target block we are definitely going to. - CurrentBB = SI->findCaseValue(Cond)->getCaseSuccessor(); - } else { - // We do not understand these terminator instructions. - break; - } - - CurrentTerm = CurrentBB->getTerminator(); - } - - // CondVal is the condition that controls the trivial condition. - // LoopExitBB is the BasicBlock that loop exits when meets trivial condition. - Constant *CondVal = nullptr; - BasicBlock *LoopExitBB = nullptr; - - if (BranchInst *BI = dyn_cast(CurrentTerm)) { - // If this isn't branching on an invariant condition, we can't unswitch it. - if (!BI->isConditional()) - return false; - - Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop, - Changed, MSSAU.get()) - .first; - - // Unswitch only if the trivial condition itself is an LIV (not - // partial LIV which could occur in and/or) - if (!LoopCond || LoopCond != BI->getCondition()) - return false; - - // Check to see if a successor of the branch is guaranteed to - // exit through a unique exit block without having any - // side-effects. If so, determine the value of Cond that causes - // it to do this. - if ((LoopExitBB = - isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(0)))) { - CondVal = ConstantInt::getTrue(Context); - } else if ((LoopExitBB = - isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(1)))) { - CondVal = ConstantInt::getFalse(Context); - } - - // If we didn't find a single unique LoopExit block, or if the loop exit - // block contains phi nodes, this isn't trivial. - if (!LoopExitBB || isa(LoopExitBB->begin())) - return false; // Can't handle this. - - if (equalityPropUnSafe(*LoopCond)) - return false; - - unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB, - CurrentTerm); - ++NumBranches; - return true; - } else if (SwitchInst *SI = dyn_cast(CurrentTerm)) { - // If this isn't switching on an invariant condition, we can't unswitch it. - Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop, - Changed, MSSAU.get()) - .first; - - // Unswitch only if the trivial condition itself is an LIV (not - // partial LIV which could occur in and/or) - if (!LoopCond || LoopCond != SI->getCondition()) - return false; - - // Check to see if a successor of the switch is guaranteed to go to the - // latch block or exit through a one exit block without having any - // side-effects. If so, determine the value of Cond that causes it to do - // this. - // Note that we can't trivially unswitch on the default case or - // on already unswitched cases. - for (auto Case : SI->cases()) { - BasicBlock *LoopExitCandidate; - if ((LoopExitCandidate = - isTrivialLoopExitBlock(CurrentLoop, Case.getCaseSuccessor()))) { - // Okay, we found a trivial case, remember the value that is trivial. - ConstantInt *CaseVal = Case.getCaseValue(); - - // Check that it was not unswitched before, since already unswitched - // trivial vals are looks trivial too. - if (BranchesInfo.isUnswitched(SI, CaseVal)) - continue; - LoopExitBB = LoopExitCandidate; - CondVal = CaseVal; - break; - } - } - - // If we didn't find a single unique LoopExit block, or if the loop exit - // block contains phi nodes, this isn't trivial. - if (!LoopExitBB || isa(LoopExitBB->begin())) - return false; // Can't handle this. - - unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB, - nullptr); - - // We are only unswitching full LIV. - BranchesInfo.setUnswitched(SI, CondVal); - ++NumSwitches; - return true; - } - return false; -} - -/// Split all of the edges from inside the loop to their exit blocks. -/// Update the appropriate Phi nodes as we do so. -void LoopUnswitch::splitExitEdges( - Loop *L, const SmallVectorImpl &ExitBlocks) { - - for (unsigned I = 0, E = ExitBlocks.size(); I != E; ++I) { - BasicBlock *ExitBlock = ExitBlocks[I]; - SmallVector Preds(predecessors(ExitBlock)); - - // Although SplitBlockPredecessors doesn't preserve loop-simplify in - // general, if we call it on all predecessors of all exits then it does. - SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI, MSSAU.get(), - /*PreserveLCSSA*/ true); - } -} - -/// We determined that the loop is profitable to unswitch when LIC equal Val. -/// Split it into loop versions and test the condition outside of either loop. -/// Return the loops created as Out1/Out2. -void LoopUnswitch::unswitchNontrivialCondition( - Value *LIC, Constant *Val, Loop *L, Instruction *TI, - ArrayRef ToDuplicate) { - Function *F = LoopHeader->getParent(); - LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" - << LoopHeader->getName() << " [" << L->getBlocks().size() - << " blocks] in Function " << F->getName() << " when '" - << *Val << "' == " << *LIC << "\n"); - - // We are going to make essential changes to CFG. This may invalidate cached - // information for L or one of its parent loops in SCEV. - if (auto *SEWP = getAnalysisIfAvailable()) - SEWP->getSE().forgetTopmostLoop(L); - - LoopBlocks.clear(); - NewBlocks.clear(); - - if (MSSAU && VerifyMemorySSA) - MSSA->verifyMemorySSA(); - - // First step, split the preheader and exit blocks, and add these blocks to - // the LoopBlocks list. - BasicBlock *NewPreheader = - SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get()); - LoopBlocks.push_back(NewPreheader); - - // We want the loop to come after the preheader, but before the exit blocks. - llvm::append_range(LoopBlocks, L->blocks()); - - SmallVector ExitBlocks; - L->getUniqueExitBlocks(ExitBlocks); - - // Split all of the edges from inside the loop to their exit blocks. Update - // the appropriate Phi nodes as we do so. - splitExitEdges(L, ExitBlocks); - - // The exit blocks may have been changed due to edge splitting, recompute. - ExitBlocks.clear(); - L->getUniqueExitBlocks(ExitBlocks); - - // Add exit blocks to the loop blocks. - llvm::append_range(LoopBlocks, ExitBlocks); - - // Next step, clone all of the basic blocks that make up the loop (including - // the loop preheader and exit blocks), keeping track of the mapping between - // the instructions and blocks. - NewBlocks.reserve(LoopBlocks.size()); - ValueToValueMapTy VMap; - for (unsigned I = 0, E = LoopBlocks.size(); I != E; ++I) { - BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[I], VMap, ".us", F); - - NewBlocks.push_back(NewBB); - VMap[LoopBlocks[I]] = NewBB; // Keep the BB mapping. - } - - // Splice the newly inserted blocks into the function right before the - // original preheader. - F->getBasicBlockList().splice(NewPreheader->getIterator(), - F->getBasicBlockList(), - NewBlocks[0]->getIterator(), F->end()); - - // Now we create the new Loop object for the versioned loop. - Loop *NewLoop = cloneLoop(L, L->getParentLoop(), VMap, LI, LPM); - - // Recalculate unswitching quota, inherit simplified switches info for NewBB, - // Probably clone more loop-unswitch related loop properties. - BranchesInfo.cloneData(NewLoop, L, VMap); - - Loop *ParentLoop = L->getParentLoop(); - if (ParentLoop) { - // Make sure to add the cloned preheader and exit blocks to the parent loop - // as well. - ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI); - } - - for (unsigned EBI = 0, EBE = ExitBlocks.size(); EBI != EBE; ++EBI) { - BasicBlock *NewExit = cast(VMap[ExitBlocks[EBI]]); - // The new exit block should be in the same loop as the old one. - if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[EBI])) - ExitBBLoop->addBasicBlockToLoop(NewExit, *LI); - - assert(NewExit->getTerminator()->getNumSuccessors() == 1 && - "Exit block should have been split to have one successor!"); - BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); - - // If the successor of the exit block had PHI nodes, add an entry for - // NewExit. - for (PHINode &PN : ExitSucc->phis()) { - Value *V = PN.getIncomingValueForBlock(ExitBlocks[EBI]); - ValueToValueMapTy::iterator It = VMap.find(V); - if (It != VMap.end()) V = It->second; - PN.addIncoming(V, NewExit); - } - - if (LandingPadInst *LPad = NewExit->getLandingPadInst()) { - PHINode *PN = PHINode::Create(LPad->getType(), 0, "", - &*ExitSucc->getFirstInsertionPt()); - - for (BasicBlock *BB : predecessors(ExitSucc)) { - LandingPadInst *LPI = BB->getLandingPadInst(); - LPI->replaceAllUsesWith(PN); - PN->addIncoming(LPI, BB); - } - } - } - - // Rewrite the code to refer to itself. - for (unsigned NBI = 0, NBE = NewBlocks.size(); NBI != NBE; ++NBI) { - for (Instruction &I : *NewBlocks[NBI]) { - RemapInstruction(&I, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - if (auto *II = dyn_cast(&I)) - AC->registerAssumption(II); - } - } - - // Rewrite the original preheader to select between versions of the loop. - BranchInst *OldBR = cast(LoopPreheader->getTerminator()); - assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && - "Preheader splitting did not work correctly!"); - - if (MSSAU) { - // Update MemorySSA after cloning, and before splitting to unreachables, - // since that invalidates the 1:1 mapping of clones in VMap. - LoopBlocksRPO LBRPO(L); - LBRPO.perform(LI); - MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, VMap); - } - - // Emit the new branch that selects between the two versions of this loop. - emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR, - TI, ToDuplicate); - if (MSSAU) { - // Update MemoryPhis in Exit blocks. - MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT); - if (VerifyMemorySSA) - MSSA->verifyMemorySSA(); - } - - // The OldBr was replaced by a new one and removed (but not erased) by - // emitPreheaderBranchOnCondition. It is no longer needed, so delete it. - delete OldBR; - - LoopProcessWorklist.push_back(NewLoop); - RedoLoop = true; - - // Keep a WeakTrackingVH holding onto LIC. If the first call to - // RewriteLoopBody - // deletes the instruction (for example by simplifying a PHI that feeds into - // the condition that we're unswitching on), we don't rewrite the second - // iteration. - WeakTrackingVH LICHandle(LIC); - - if (ToDuplicate.empty()) { - // Now we rewrite the original code to know that the condition is true and - // the new code to know that the condition is false. - rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); - - // It's possible that simplifying one loop could cause the other to be - // changed to another value or a constant. If its a constant, don't - // simplify it. - if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && - LICHandle && !isa(LICHandle)) - rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, - /*IsEqual=*/true); - } else { - // Partial unswitching. Update the condition in the right loop with the - // constant. - auto *CC = cast(Val); - if (CC->isOneValue()) { - rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val, - /*IsEqual=*/true); - } else - rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true); - - // Mark the new loop as partially unswitched, to avoid unswitching on the - // same condition again. - auto &Context = NewLoop->getHeader()->getContext(); - MDNode *DisableUnswitchMD = MDNode::get( - Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable")); - MDNode *NewLoopID = makePostTransformationMetadata( - Context, L->getLoopID(), {"llvm.loop.unswitch.partial"}, - {DisableUnswitchMD}); - NewLoop->setLoopID(NewLoopID); - } - - if (MSSA && VerifyMemorySSA) - MSSA->verifyMemorySSA(); -} - -/// Remove all instances of I from the worklist vector specified. -static void removeFromWorklist(Instruction *I, - std::vector &Worklist) { - llvm::erase_value(Worklist, I); -} - -/// When we find that I really equals V, remove I from the -/// program, replacing all uses with V and update the worklist. -static void replaceUsesOfWith(Instruction *I, Value *V, - std::vector &Worklist, Loop *L, - LPPassManager *LPM, MemorySSAUpdater *MSSAU) { - LLVM_DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n"); - - // Add uses to the worklist, which may be dead now. - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (Instruction *Use = dyn_cast(I->getOperand(i))) - Worklist.push_back(Use); - - // Add users to the worklist which may be simplified now. - for (User *U : I->users()) - Worklist.push_back(cast(U)); - removeFromWorklist(I, Worklist); - I->replaceAllUsesWith(V); - if (!I->mayHaveSideEffects()) { - if (MSSAU) - MSSAU->removeMemoryAccess(I); - I->eraseFromParent(); - } - ++NumSimplify; -} - -/// We know either that the value LIC has the value specified by Val in the -/// specified loop, or we know it does NOT have that value. -/// Rewrite any uses of LIC or of properties correlated to it. -void LoopUnswitch::rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, - Constant *Val, - bool IsEqual) { - assert(!isa(LIC) && "Why are we unswitching on a constant?"); - - // FIXME: Support correlated properties, like: - // for (...) - // if (li1 < li2) - // ... - // if (li1 > li2) - // ... - - // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, - // selects, switches. - std::vector Worklist; - LLVMContext &Context = Val->getContext(); - - // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC - // in the loop with the appropriate one directly. - if (IsEqual || (isa(Val) && - Val->getType()->isIntegerTy(1))) { - Value *Replacement; - if (IsEqual) - Replacement = Val; - else - Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), - !cast(Val)->getZExtValue()); - - for (User *U : LIC->users()) { - Instruction *UI = dyn_cast(U); - if (!UI || !L->contains(UI)) - continue; - Worklist.push_back(UI); - } - - for (Instruction *UI : Worklist) - UI->replaceUsesOfWith(LIC, Replacement); - - simplifyCode(Worklist, L); - return; - } - - // Otherwise, we don't know the precise value of LIC, but we do know that it - // is certainly NOT "Val". As such, simplify any uses in the loop that we - // can. This case occurs when we unswitch switch statements. - for (User *U : LIC->users()) { - Instruction *UI = dyn_cast(U); - if (!UI || !L->contains(UI)) - continue; - - // At this point, we know LIC is definitely not Val. Try to use some simple - // logic to simplify the user w.r.t. to the context. - if (Value *Replacement = simplifyInstructionWithNotEqual(UI, LIC, Val)) { - if (LI->replacementPreservesLCSSAForm(UI, Replacement)) { - // This in-loop instruction has been simplified w.r.t. its context, - // i.e. LIC != Val, make sure we propagate its replacement value to - // all its users. - // - // We can not yet delete UI, the LIC user, yet, because that would invalidate - // the LIC->users() iterator !. However, we can make this instruction - // dead by replacing all its users and push it onto the worklist so that - // it can be properly deleted and its operands simplified. - UI->replaceAllUsesWith(Replacement); - } - } - - // This is a LIC user, push it into the worklist so that simplifyCode can - // attempt to simplify it. - Worklist.push_back(UI); - - // If we know that LIC is not Val, use this info to simplify code. - SwitchInst *SI = dyn_cast(UI); - if (!SI || !isa(Val)) continue; - - // NOTE: if a case value for the switch is unswitched out, we record it - // after the unswitch finishes. We can not record it here as the switch - // is not a direct user of the partial LIV. - SwitchInst::CaseHandle DeadCase = - *SI->findCaseValue(cast(Val)); - // Default case is live for multiple values. - if (DeadCase == *SI->case_default()) - continue; - - // Found a dead case value. Don't remove PHI nodes in the - // successor if they become single-entry, those PHI nodes may - // be in the Users list. - - BasicBlock *Switch = SI->getParent(); - BasicBlock *SISucc = DeadCase.getCaseSuccessor(); - BasicBlock *Latch = L->getLoopLatch(); - - if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. - // If the DeadCase successor dominates the loop latch, then the - // transformation isn't safe since it will delete the sole predecessor edge - // to the latch. - if (Latch && DT->dominates(SISucc, Latch)) - continue; - - // FIXME: This is a hack. We need to keep the successor around - // and hooked up so as to preserve the loop structure, because - // trying to update it is complicated. So instead we preserve the - // loop structure and put the block on a dead code path. - SplitEdge(Switch, SISucc, DT, LI, MSSAU.get()); - // Compute the successors instead of relying on the return value - // of SplitEdge, since it may have split the switch successor - // after PHI nodes. - BasicBlock *NewSISucc = DeadCase.getCaseSuccessor(); - BasicBlock *OldSISucc = *succ_begin(NewSISucc); - // Create an "unreachable" destination. - BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", - Switch->getParent(), - OldSISucc); - new UnreachableInst(Context, Abort); - // Force the new case destination to branch to the "unreachable" - // block while maintaining a (dead) CFG edge to the old block. - NewSISucc->getTerminator()->eraseFromParent(); - BranchInst::Create(Abort, OldSISucc, - ConstantInt::getTrue(Context), NewSISucc); - // Release the PHI operands for this edge. - for (PHINode &PN : NewSISucc->phis()) - PN.setIncomingValueForBlock(Switch, UndefValue::get(PN.getType())); - // Tell the domtree about the new block. We don't fully update the - // domtree here -- instead we force it to do a full recomputation - // after the pass is complete -- but we do need to inform it of - // new blocks. - DT->addNewBlock(Abort, NewSISucc); - } - - simplifyCode(Worklist, L); -} - -/// Now that we have simplified some instructions in the loop, walk over it and -/// constant prop, dce, and fold control flow where possible. Note that this is -/// effectively a very simple loop-structure-aware optimizer. During processing -/// of this loop, L could very well be deleted, so it must not be used. -/// -/// FIXME: When the loop optimizer is more mature, separate this out to a new -/// pass. -/// -void LoopUnswitch::simplifyCode(std::vector &Worklist, Loop *L) { - const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - while (!Worklist.empty()) { - Instruction *I = Worklist.back(); - Worklist.pop_back(); - - // Simple DCE. - if (isInstructionTriviallyDead(I)) { - LLVM_DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n"); - - // Add uses to the worklist, which may be dead now. - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (Instruction *Use = dyn_cast(I->getOperand(i))) - Worklist.push_back(Use); - removeFromWorklist(I, Worklist); - if (MSSAU) - MSSAU->removeMemoryAccess(I); - I->eraseFromParent(); - ++NumSimplify; - continue; - } - - // See if instruction simplification can hack this up. This is common for - // things like "select false, X, Y" after unswitching made the condition be - // 'false'. TODO: update the domtree properly so we can pass it here. - if (Value *V = SimplifyInstruction(I, DL)) - if (LI->replacementPreservesLCSSAForm(I, V)) { - replaceUsesOfWith(I, V, Worklist, L, LPM, MSSAU.get()); - continue; - } - - // Special case hacks that appear commonly in unswitched code. - if (BranchInst *BI = dyn_cast(I)) { - if (BI->isUnconditional()) { - // If BI's parent is the only pred of the successor, fold the two blocks - // together. - BasicBlock *Pred = BI->getParent(); - (void)Pred; - BasicBlock *Succ = BI->getSuccessor(0); - BasicBlock *SinglePred = Succ->getSinglePredecessor(); - if (!SinglePred) continue; // Nothing to do. - assert(SinglePred == Pred && "CFG broken"); - - // Make the LPM and Worklist updates specific to LoopUnswitch. - removeFromWorklist(BI, Worklist); - auto SuccIt = Succ->begin(); - while (PHINode *PN = dyn_cast(SuccIt++)) { - for (unsigned It = 0, E = PN->getNumOperands(); It != E; ++It) - if (Instruction *Use = dyn_cast(PN->getOperand(It))) - Worklist.push_back(Use); - for (User *U : PN->users()) - Worklist.push_back(cast(U)); - removeFromWorklist(PN, Worklist); - ++NumSimplify; - } - // Merge the block and make the remaining analyses updates. - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - MergeBlockIntoPredecessor(Succ, &DTU, LI, MSSAU.get()); - ++NumSimplify; - continue; - } - - continue; - } - } -} - -/// Simple simplifications we can do given the information that Cond is -/// definitely not equal to Val. -Value *LoopUnswitch::simplifyInstructionWithNotEqual(Instruction *Inst, - Value *Invariant, - Constant *Val) { - // icmp eq cond, val -> false - ICmpInst *CI = dyn_cast(Inst); - if (CI && CI->isEquality()) { - Value *Op0 = CI->getOperand(0); - Value *Op1 = CI->getOperand(1); - if ((Op0 == Invariant && Op1 == Val) || (Op0 == Val && Op1 == Invariant)) { - LLVMContext &Ctx = Inst->getContext(); - if (CI->getPredicate() == CmpInst::ICMP_EQ) - return ConstantInt::getFalse(Ctx); - else - return ConstantInt::getTrue(Ctx); - } - } - - // FIXME: there may be other opportunities, e.g. comparison with floating - // point, or Invariant - Val != 0, etc. - return nullptr; -} diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 2ff1e8480749..c733aa4701ed 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -70,14 +70,12 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/llvm/lib/Transforms/Scalar/LowerAtomic.cpp deleted file mode 100644 index 4063e4fe0472..000000000000 --- a/llvm/lib/Transforms/Scalar/LowerAtomic.cpp +++ /dev/null @@ -1,177 +0,0 @@ -//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass lowers atomic intrinsics to non-atomic form for use in a known -// non-preemptible environment. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Scalar/LowerAtomic.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" -using namespace llvm; - -#define DEBUG_TYPE "loweratomic" - -static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { - IRBuilder<> Builder(CXI); - Value *Ptr = CXI->getPointerOperand(); - Value *Cmp = CXI->getCompareOperand(); - Value *Val = CXI->getNewValOperand(); - - LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr); - Value *Equal = Builder.CreateICmpEQ(Orig, Cmp); - Value *Res = Builder.CreateSelect(Equal, Val, Orig); - Builder.CreateStore(Res, Ptr); - - Res = Builder.CreateInsertValue(UndefValue::get(CXI->getType()), Orig, 0); - Res = Builder.CreateInsertValue(Res, Equal, 1); - - CXI->replaceAllUsesWith(Res); - CXI->eraseFromParent(); - return true; -} - -bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) { - IRBuilder<> Builder(RMWI); - Value *Ptr = RMWI->getPointerOperand(); - Value *Val = RMWI->getValOperand(); - - LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr); - Value *Res = nullptr; - - switch (RMWI->getOperation()) { - default: llvm_unreachable("Unexpected RMW operation"); - case AtomicRMWInst::Xchg: - Res = Val; - break; - case AtomicRMWInst::Add: - Res = Builder.CreateAdd(Orig, Val); - break; - case AtomicRMWInst::Sub: - Res = Builder.CreateSub(Orig, Val); - break; - case AtomicRMWInst::And: - Res = Builder.CreateAnd(Orig, Val); - break; - case AtomicRMWInst::Nand: - Res = Builder.CreateNot(Builder.CreateAnd(Orig, Val)); - break; - case AtomicRMWInst::Or: - Res = Builder.CreateOr(Orig, Val); - break; - case AtomicRMWInst::Xor: - Res = Builder.CreateXor(Orig, Val); - break; - case AtomicRMWInst::Max: - Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val), - Val, Orig); - break; - case AtomicRMWInst::Min: - Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val), - Orig, Val); - break; - case AtomicRMWInst::UMax: - Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val), - Val, Orig); - break; - case AtomicRMWInst::UMin: - Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val), - Orig, Val); - break; - case AtomicRMWInst::FAdd: - Res = Builder.CreateFAdd(Orig, Val); - break; - case AtomicRMWInst::FSub: - Res = Builder.CreateFSub(Orig, Val); - break; - } - Builder.CreateStore(Res, Ptr); - RMWI->replaceAllUsesWith(Orig); - RMWI->eraseFromParent(); - return true; -} - -static bool LowerFenceInst(FenceInst *FI) { - FI->eraseFromParent(); - return true; -} - -static bool LowerLoadInst(LoadInst *LI) { - LI->setAtomic(AtomicOrdering::NotAtomic); - return true; -} - -static bool LowerStoreInst(StoreInst *SI) { - SI->setAtomic(AtomicOrdering::NotAtomic); - return true; -} - -static bool runOnBasicBlock(BasicBlock &BB) { - bool Changed = false; - for (Instruction &Inst : make_early_inc_range(BB)) { - if (FenceInst *FI = dyn_cast(&Inst)) - Changed |= LowerFenceInst(FI); - else if (AtomicCmpXchgInst *CXI = dyn_cast(&Inst)) - Changed |= LowerAtomicCmpXchgInst(CXI); - else if (AtomicRMWInst *RMWI = dyn_cast(&Inst)) - Changed |= lowerAtomicRMWInst(RMWI); - else if (LoadInst *LI = dyn_cast(&Inst)) { - if (LI->isAtomic()) - LowerLoadInst(LI); - } else if (StoreInst *SI = dyn_cast(&Inst)) { - if (SI->isAtomic()) - LowerStoreInst(SI); - } - } - return Changed; -} - -static bool lowerAtomics(Function &F) { - bool Changed = false; - for (BasicBlock &BB : F) { - Changed |= runOnBasicBlock(BB); - } - return Changed; -} - -PreservedAnalyses LowerAtomicPass::run(Function &F, FunctionAnalysisManager &) { - if (lowerAtomics(F)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} - -namespace { -class LowerAtomicLegacyPass : public FunctionPass { -public: - static char ID; - - LowerAtomicLegacyPass() : FunctionPass(ID) { - initializeLowerAtomicLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - // Don't skip optnone functions; atomics still need to be lowered. - FunctionAnalysisManager DummyFAM; - auto PA = Impl.run(F, DummyFAM); - return !PA.areAllPreserved(); - } - -private: - LowerAtomicPass Impl; - }; -} - -char LowerAtomicLegacyPass::ID = 0; -INITIALIZE_PASS(LowerAtomicLegacyPass, "loweratomic", - "Lower atomic intrinsics to non-atomic form", false, false) - -Pass *llvm::createLowerAtomicPass() { return new LowerAtomicLegacyPass(); } diff --git a/llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp b/llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp new file mode 100644 index 000000000000..6aba913005d0 --- /dev/null +++ b/llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp @@ -0,0 +1,99 @@ +//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass lowers atomic intrinsics to non-atomic form for use in a known +// non-preemptible environment. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/LowerAtomicPass.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/LowerAtomic.h" +using namespace llvm; + +#define DEBUG_TYPE "loweratomic" + +static bool LowerFenceInst(FenceInst *FI) { + FI->eraseFromParent(); + return true; +} + +static bool LowerLoadInst(LoadInst *LI) { + LI->setAtomic(AtomicOrdering::NotAtomic); + return true; +} + +static bool LowerStoreInst(StoreInst *SI) { + SI->setAtomic(AtomicOrdering::NotAtomic); + return true; +} + +static bool runOnBasicBlock(BasicBlock &BB) { + bool Changed = false; + for (Instruction &Inst : make_early_inc_range(BB)) { + if (FenceInst *FI = dyn_cast(&Inst)) + Changed |= LowerFenceInst(FI); + else if (AtomicCmpXchgInst *CXI = dyn_cast(&Inst)) + Changed |= lowerAtomicCmpXchgInst(CXI); + else if (AtomicRMWInst *RMWI = dyn_cast(&Inst)) + Changed |= lowerAtomicRMWInst(RMWI); + else if (LoadInst *LI = dyn_cast(&Inst)) { + if (LI->isAtomic()) + LowerLoadInst(LI); + } else if (StoreInst *SI = dyn_cast(&Inst)) { + if (SI->isAtomic()) + LowerStoreInst(SI); + } + } + return Changed; +} + +static bool lowerAtomics(Function &F) { + bool Changed = false; + for (BasicBlock &BB : F) { + Changed |= runOnBasicBlock(BB); + } + return Changed; +} + +PreservedAnalyses LowerAtomicPass::run(Function &F, FunctionAnalysisManager &) { + if (lowerAtomics(F)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +} + +namespace { +class LowerAtomicLegacyPass : public FunctionPass { +public: + static char ID; + + LowerAtomicLegacyPass() : FunctionPass(ID) { + initializeLowerAtomicLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + // Don't skip optnone functions; atomics still need to be lowered. + FunctionAnalysisManager DummyFAM; + auto PA = Impl.run(F, DummyFAM); + return !PA.areAllPreserved(); + } + +private: + LowerAtomicPass Impl; + }; +} + +char LowerAtomicLegacyPass::ID = 0; +INITIALIZE_PASS(LowerAtomicLegacyPass, "loweratomic", + "Lower atomic intrinsics to non-atomic form", false, false) + +Pass *llvm::createLowerAtomicPass() { return new LowerAtomicLegacyPass(); } diff --git a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp index 186065db327e..47493b54a527 100644 --- a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp @@ -26,11 +26,9 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" @@ -96,7 +94,7 @@ static bool replaceConditionalBranchesOnConstant(Instruction *II, return HasDeadBlocks; } -static bool lowerConstantIntrinsics(Function &F, const TargetLibraryInfo *TLI, +static bool lowerConstantIntrinsics(Function &F, const TargetLibraryInfo &TLI, DominatorTree *DT) { Optional DTU; if (DT) @@ -140,21 +138,21 @@ static bool lowerConstantIntrinsics(Function &F, const TargetLibraryInfo *TLI, IsConstantIntrinsicsHandled++; break; case Intrinsic::objectsize: - NewValue = lowerObjectSizeCall(II, DL, TLI, true); + NewValue = lowerObjectSizeCall(II, DL, &TLI, true); ObjectSizeIntrinsicsHandled++; break; } HasDeadBlocks |= replaceConditionalBranchesOnConstant( - II, NewValue, DTU.hasValue() ? DTU.getPointer() : nullptr); + II, NewValue, DTU ? DTU.getPointer() : nullptr); } if (HasDeadBlocks) - removeUnreachableBlocks(F, DTU.hasValue() ? DTU.getPointer() : nullptr); + removeUnreachableBlocks(F, DTU ? DTU.getPointer() : nullptr); return !Worklist.empty(); } PreservedAnalyses LowerConstantIntrinsicsPass::run(Function &F, FunctionAnalysisManager &AM) { - if (lowerConstantIntrinsics(F, AM.getCachedResult(F), + if (lowerConstantIntrinsics(F, AM.getResult(F), AM.getCachedResult(F))) { PreservedAnalyses PA; PA.preserve(); @@ -178,8 +176,8 @@ public: } bool runOnFunction(Function &F) override { - auto *TLIP = getAnalysisIfAvailable(); - const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; + const TargetLibraryInfo &TLI = + getAnalysis().getTLI(F); DominatorTree *DT = nullptr; if (auto *DTWP = getAnalysisIfAvailable()) DT = &DTWP->getDomTree(); @@ -187,6 +185,7 @@ public: } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); } @@ -196,6 +195,7 @@ public: char LowerConstantIntrinsics::ID = 0; INITIALIZE_PASS_BEGIN(LowerConstantIntrinsics, "lower-constant-intrinsics", "Lower constant intrinsics", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(LowerConstantIntrinsics, "lower-constant-intrinsics", "Lower constant intrinsics", false, false) diff --git a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index a7eb60b5e032..88fad9896c59 100644 --- a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -21,12 +21,11 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/MisExpect.h" using namespace llvm; @@ -101,6 +100,8 @@ static bool handleSwitchExpect(SwitchInst &SI) { uint64_t Index = (Case == *SI.case_default()) ? 0 : Case.getCaseIndex() + 1; Weights[Index] = LikelyBranchWeightVal; + misexpect::checkExpectAnnotations(SI, Weights, /*IsFrontend=*/true); + SI.setCondition(ArgValue); SI.setMetadata(LLVMContext::MD_prof, @@ -315,13 +316,16 @@ template static bool handleBrSelExpect(BrSelInst &BSI) { std::tie(LikelyBranchWeightVal, UnlikelyBranchWeightVal) = getBranchWeight(Fn->getIntrinsicID(), CI, 2); + SmallVector ExpectedWeights; if ((ExpectedValue->getZExtValue() == ValueComparedTo) == (Predicate == CmpInst::ICMP_EQ)) { Node = MDB.createBranchWeights(LikelyBranchWeightVal, UnlikelyBranchWeightVal); + ExpectedWeights = {LikelyBranchWeightVal, UnlikelyBranchWeightVal}; } else { Node = MDB.createBranchWeights(UnlikelyBranchWeightVal, LikelyBranchWeightVal); + ExpectedWeights = {UnlikelyBranchWeightVal, LikelyBranchWeightVal}; } if (CmpI) @@ -329,6 +333,8 @@ template static bool handleBrSelExpect(BrSelInst &BSI) { else BSI.setCondition(ArgValue); + misexpect::checkFrontendInstrumentation(BSI, ExpectedWeights); + BSI.setMetadata(LLVMContext::MD_prof, Node); return true; @@ -409,7 +415,7 @@ public: bool runOnFunction(Function &F) override { return lowerExpectIntrinsic(F); } }; -} +} // namespace char LowerExpectIntrinsic::ID = 0; INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", diff --git a/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp index 45f5929e3b90..8dc037b10cc8 100644 --- a/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp +++ b/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp @@ -15,7 +15,6 @@ #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/GuardUtils.h" -#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -49,9 +48,13 @@ static bool lowerGuardIntrinsic(Function &F) { return false; SmallVector ToLower; - for (auto &I : instructions(F)) - if (isGuard(&I)) - ToLower.push_back(cast(&I)); + // Traverse through the users of GuardDecl. + // This is presumably cheaper than traversing all instructions in the + // function. + for (auto *U : GuardDecl->users()) + if (auto *CI = dyn_cast(U)) + if (CI->getFunction() == &F) + ToLower.push_back(CI); if (ToLower.empty()) return false; diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 296becb31e8f..c05906649f16 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -18,11 +18,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" -#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -704,10 +704,10 @@ public: // We may remove II. By default continue on the next/prev instruction. ++II; // If we were to erase II, move again. - auto EraseFromParent = [&II](Value *V) { + auto EraseFromParent = [&II, &BB](Value *V) { auto *Inst = cast(V); if (Inst->use_empty()) { - if (Inst == &*II) { + if (II != BB.rend() && Inst == &*II) { ++II; } Inst->eraseFromParent(); @@ -718,7 +718,7 @@ public: Instruction *NewInst = nullptr; IRBuilder<> IB(&I); - MatrixBuilder> Builder(IB); + MatrixBuilder Builder(IB); Value *TA, *TAMA, *TAMB; ConstantInt *R, *K, *C; @@ -766,28 +766,25 @@ public: // If we have a TT matmul, lift the transpose. We may be able to fold into // consuming multiply. for (BasicBlock &BB : Func) { - for (BasicBlock::iterator II = BB.begin(); II != BB.end();) { - Instruction *I = &*II; - // We may remove I. - ++II; + for (Instruction &I : llvm::make_early_inc_range(BB)) { Value *A, *B, *AT, *BT; ConstantInt *R, *K, *C; // A^t * B ^t -> (B * A)^t - if (match(&*I, m_Intrinsic( - m_Value(A), m_Value(B), m_ConstantInt(R), - m_ConstantInt(K), m_ConstantInt(C))) && + if (match(&I, m_Intrinsic( + m_Value(A), m_Value(B), m_ConstantInt(R), + m_ConstantInt(K), m_ConstantInt(C))) && match(A, m_Intrinsic(m_Value(AT))) && match(B, m_Intrinsic(m_Value((BT))))) { - IRBuilder<> IB(&*I); - MatrixBuilder> Builder(IB); + IRBuilder<> IB(&I); + MatrixBuilder Builder(IB); Value *M = Builder.CreateMatrixMultiply( BT, AT, C->getZExtValue(), K->getZExtValue(), R->getZExtValue()); setShapeInfo(M, {C, R}); Instruction *NewInst = Builder.CreateMatrixTranspose( M, C->getZExtValue(), R->getZExtValue()); - ReplaceAllUsesWith(*I, NewInst); - if (I->use_empty()) - I->eraseFromParent(); + ReplaceAllUsesWith(I, NewInst); + if (I.use_empty()) + I.eraseFromParent(); if (A->use_empty()) cast(A)->eraseFromParent(); if (A != B && B->use_empty()) @@ -891,27 +888,27 @@ public: // having to update as many def-use and use-def chains. // // Because we add to ToRemove during fusion we can't guarantee that defs - // are before uses. Change uses to undef temporarily as these should get + // are before uses. Change uses to poison temporarily as these should get // removed as well. // - // For verification, we keep track of where we changed uses to undefs in - // UndefedInsts and then check that we in fact remove them. - SmallSet UndefedInsts; + // For verification, we keep track of where we changed uses to poison in + // PoisonedInsts and then check that we in fact remove them. + SmallSet PoisonedInsts; for (auto *Inst : reverse(ToRemove)) { for (Use &U : llvm::make_early_inc_range(Inst->uses())) { - if (auto *Undefed = dyn_cast(U.getUser())) - UndefedInsts.insert(Undefed); - U.set(UndefValue::get(Inst->getType())); + if (auto *Poisoned = dyn_cast(U.getUser())) + PoisonedInsts.insert(Poisoned); + U.set(PoisonValue::get(Inst->getType())); } Inst->eraseFromParent(); - UndefedInsts.erase(Inst); + PoisonedInsts.erase(Inst); } - if (!UndefedInsts.empty()) { - // If we didn't remove all undefed instructions, it's a hard error. - dbgs() << "Undefed but present instructions:\n"; - for (auto *I : UndefedInsts) + if (!PoisonedInsts.empty()) { + // If we didn't remove all poisoned instructions, it's a hard error. + dbgs() << "Poisoned but present instructions:\n"; + for (auto *I : PoisonedInsts) dbgs() << *I << "\n"; - llvm_unreachable("Undefed but instruction not removed"); + llvm_unreachable("Poisoned but instruction not removed"); } return Changed; @@ -1670,7 +1667,7 @@ public: for (unsigned I = 0; I < NewNumVecs; ++I) { // Build a single result vector. First initialize it. - Value *ResultVector = UndefValue::get( + Value *ResultVector = PoisonValue::get( FixedVectorType::get(VectorTy->getElementType(), NewNumElts)); // Go through the old elements and insert it into the resulting vector. for (auto J : enumerate(InputMatrix.vectors())) { diff --git a/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp b/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp index 73b2cd06fa23..e2de322933bc 100644 --- a/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp +++ b/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp @@ -13,8 +13,6 @@ #include "llvm/Transforms/Scalar/LowerWidenableCondition.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/GuardUtils.h" -#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -24,7 +22,6 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/GuardUtils.h" using namespace llvm; @@ -50,9 +47,13 @@ static bool lowerWidenableCondition(Function &F) { using namespace llvm::PatternMatch; SmallVector ToLower; - for (auto &I : instructions(F)) - if (match(&I, m_Intrinsic())) - ToLower.push_back(cast(&I)); + // Traverse through the users of WCDecl. + // This is presumably cheaper than traversing all instructions in the + // function. + for (auto *U : WCDecl->users()) + if (auto *CI = dyn_cast(U)) + if (CI->getFunction() == &F) + ToLower.push_back(CI); if (ToLower.empty()) return false; diff --git a/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp b/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp index 5ffae128f5f0..a3f09a5a33c3 100644 --- a/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp +++ b/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp @@ -33,13 +33,11 @@ #include "llvm/Transforms/Scalar/MakeGuardsExplicit.h" #include "llvm/Analysis/GuardUtils.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/GuardUtils.h" using namespace llvm; diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 6698db26626b..1f5bc69acecd 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -28,14 +28,12 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" @@ -45,7 +43,6 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" @@ -61,15 +58,13 @@ #include #include #include -#include using namespace llvm; #define DEBUG_TYPE "memcpyopt" static cl::opt EnableMemCpyOptWithoutLibcalls( - "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden, - cl::ZeroOrMore, + "enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled")); STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); @@ -100,7 +95,7 @@ struct MemsetRange { Value *StartPtr; /// Alignment - The known alignment of the first store. - unsigned Alignment; + MaybeAlign Alignment; /// TheStores - The actual stores that make up this range. SmallVector TheStores; @@ -182,16 +177,16 @@ public: TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType()); assert(!StoreSize.isScalable() && "Can't track scalable-typed stores"); addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(), - SI->getAlign().value(), SI); + SI->getAlign(), SI); } void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { int64_t Size = cast(MSI->getLength())->getZExtValue(); - addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI); + addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI); } - void addRange(int64_t Start, int64_t Size, Value *Ptr, - unsigned Alignment, Instruction *Inst); + void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment, + Instruction *Inst); }; } // end anonymous namespace @@ -200,7 +195,7 @@ public: /// new range for the specified store at the specified offset, merging into /// existing ranges as appropriate. void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, - unsigned Alignment, Instruction *Inst) { + MaybeAlign Alignment, Instruction *Inst) { int64_t End = Start+Size; range_iterator I = partition_point( @@ -352,9 +347,25 @@ static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, // Check for mod of Loc between Start and End, excluding both boundaries. // Start and End can be in different blocks. -static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc, - const MemoryUseOrDef *Start, +static bool writtenBetween(MemorySSA *MSSA, AliasAnalysis &AA, + MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End) { + if (isa(End)) { + // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes. + // Manually check read accesses between Start and End, if they are in the + // same block, for clobbers. Otherwise assume Loc is clobbered. + return Start->getBlock() != End->getBlock() || + any_of( + make_range(std::next(Start->getIterator()), End->getIterator()), + [&AA, Loc](const MemoryAccess &Acc) { + if (isa(&Acc)) + return false; + Instruction *AccInst = + cast(&Acc)->getMemoryInst(); + return isModSet(AA.getModRefInfo(AccInst, Loc)); + }); + } + // TODO: Only walk until we hit Start. MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( End->getDefiningAccess(), Loc); @@ -492,7 +503,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, StartPtr = Range.StartPtr; AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start, - MaybeAlign(Range.Alignment)); + Range.Alignment); LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI : Range.TheStores) dbgs() << *SI << '\n'; @@ -749,36 +760,25 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // Detect cases where we're performing call slot forwarding, but // happen to be using a load-store pair to implement it, rather than // a memcpy. - CallInst *C = nullptr; - if (auto *LoadClobber = dyn_cast( - MSSA->getWalker()->getClobberingMemoryAccess(LI))) { - // The load most post-dom the call. Limit to the same block for now. - // TODO: Support non-local call-slot optimization? - if (LoadClobber->getBlock() == SI->getParent()) - C = dyn_cast_or_null(LoadClobber->getMemoryInst()); - } - - if (C) { - // Check that nothing touches the dest of the "copy" between - // the call and the store. - MemoryLocation StoreLoc = MemoryLocation::get(SI); - if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C), - MSSA->getMemoryAccess(SI))) - C = nullptr; - } - - if (C) { - bool changed = performCallSlotOptzn( - LI, SI, SI->getPointerOperand()->stripPointerCasts(), - LI->getPointerOperand()->stripPointerCasts(), - DL.getTypeStoreSize(SI->getOperand(0)->getType()), - commonAlignment(SI->getAlign(), LI->getAlign()), C); - if (changed) { - eraseInstruction(SI); - eraseInstruction(LI); - ++NumMemCpyInstr; - return true; - } + auto GetCall = [&]() -> CallInst * { + // We defer this expensive clobber walk until the cheap checks + // have been done on the source inside performCallSlotOptzn. + if (auto *LoadClobber = dyn_cast( + MSSA->getWalker()->getClobberingMemoryAccess(LI))) + return dyn_cast_or_null(LoadClobber->getMemoryInst()); + return nullptr; + }; + + bool changed = performCallSlotOptzn( + LI, SI, SI->getPointerOperand()->stripPointerCasts(), + LI->getPointerOperand()->stripPointerCasts(), + DL.getTypeStoreSize(SI->getOperand(0)->getType()), + std::min(SI->getAlign(), LI->getAlign()), GetCall); + if (changed) { + eraseInstruction(SI); + eraseInstruction(LI); + ++NumMemCpyInstr; + return true; } } } @@ -853,7 +853,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore, Value *cpyDest, Value *cpySrc, TypeSize cpySize, - Align cpyAlign, CallInst *C) { + Align cpyAlign, + std::function GetC) { // The general transformation to keep in mind is // // call @func(..., src, ...) @@ -872,11 +873,6 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, if (cpySize.isScalable()) return false; - // Lifetime marks shouldn't be operated on. - if (Function *F = C->getCalledFunction()) - if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start) - return false; - // Require that src be an alloca. This simplifies the reasoning considerably. auto *srcAlloca = dyn_cast(cpySrc); if (!srcAlloca) @@ -893,6 +889,33 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, if (cpySize < srcSize) return false; + CallInst *C = GetC(); + if (!C) + return false; + + // Lifetime marks shouldn't be operated on. + if (Function *F = C->getCalledFunction()) + if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start) + return false; + + + if (C->getParent() != cpyStore->getParent()) { + LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n"); + return false; + } + + MemoryLocation DestLoc = isa(cpyStore) ? + MemoryLocation::get(cpyStore) : + MemoryLocation::getForDest(cast(cpyStore)); + + // Check that nothing touches the dest of the copy between + // the call and the store/memcpy. + if (accessedBetween(*AA, DestLoc, MSSA->getMemoryAccess(C), + MSSA->getMemoryAccess(cpyStore))) { + LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n"); + return false; + } + // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. @@ -902,6 +925,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, return false; } + // Make sure that nothing can observe cpyDest being written early. There are // a number of cases to consider: // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of @@ -1118,7 +1142,7 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // then we could still perform the xform by moving M up to the first memcpy. // TODO: It would be sufficient to check the MDep source up to the memcpy // size of M, rather than MDep. - if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), + if (writtenBetween(MSSA, *AA, MemoryLocation::getForSource(MDep), MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M))) return false; @@ -1215,14 +1239,14 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, } // By default, create an unaligned memset. - unsigned Align = 1; + Align Alignment = Align(1); // If Dest is aligned, and SrcSize is constant, use the minimum alignment // of the sum. - const unsigned DestAlign = - std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment()); + const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(), + MemCpy->getDestAlign().valueOrOne()); if (DestAlign > 1) if (auto *SrcSizeC = dyn_cast(SrcSize)) - Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign); + Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue()); IRBuilder<> Builder(MemCpy); @@ -1241,11 +1265,11 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff); unsigned DestAS = Dest->getType()->getPointerAddressSpace(); Instruction *NewMemSet = Builder.CreateMemSet( - Builder.CreateGEP(Builder.getInt8Ty(), - Builder.CreatePointerCast(Dest, - Builder.getInt8PtrTy(DestAS)), - SrcSize), - MemSet->getOperand(1), MemsetLen, MaybeAlign(Align)); + Builder.CreateGEP( + Builder.getInt8Ty(), + Builder.CreatePointerCast(Dest, Builder.getInt8PtrTy(DestAS)), + SrcSize), + MemSet->getOperand(1), MemsetLen, Alignment); assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && "MemCpy must be a MemoryDef"); @@ -1402,7 +1426,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { } MemoryUseOrDef *MA = MSSA->getMemoryAccess(M); - MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA); + // FIXME: Not using getClobberingMemoryAccess() here due to PR54682. + MemoryAccess *AnyClobber = MA->getDefiningAccess(); MemoryLocation DestLoc = MemoryLocation::getForDest(M); const MemoryAccess *DestClobber = MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc); @@ -1431,28 +1456,20 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { if (Instruction *MI = MD->getMemoryInst()) { if (auto *CopySize = dyn_cast(M->getLength())) { if (auto *C = dyn_cast(MI)) { - // The memcpy must post-dom the call. Limit to the same block for - // now. Additionally, we need to ensure that there are no accesses - // to dest between the call and the memcpy. Accesses to src will be - // checked by performCallSlotOptzn(). - // TODO: Support non-local call-slot optimization? - if (C->getParent() == M->getParent() && - !accessedBetween(*AA, DestLoc, MD, MA)) { - // FIXME: Can we pass in either of dest/src alignment here instead - // of conservatively taking the minimum? - Align Alignment = std::min(M->getDestAlign().valueOrOne(), - M->getSourceAlign().valueOrOne()); - if (performCallSlotOptzn( - M, M, M->getDest(), M->getSource(), - TypeSize::getFixed(CopySize->getZExtValue()), Alignment, - C)) { - LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" - << " call: " << *C << "\n" - << " memcpy: " << *M << "\n"); - eraseInstruction(M); - ++NumMemCpyInstr; - return true; - } + // FIXME: Can we pass in either of dest/src alignment here instead + // of conservatively taking the minimum? + Align Alignment = std::min(M->getDestAlign().valueOrOne(), + M->getSourceAlign().valueOrOne()); + if (performCallSlotOptzn( + M, M, M->getDest(), M->getSource(), + TypeSize::getFixed(CopySize->getZExtValue()), Alignment, + [C]() -> CallInst * { return C; })) { + LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" + << " call: " << *C << "\n" + << " memcpy: " << *M << "\n"); + eraseInstruction(M); + ++NumMemCpyInstr; + return true; } } } @@ -1557,7 +1574,7 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { // *b = 42; // foo(*a) // It would be invalid to transform the second memcpy into foo(*b). - if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), + if (writtenBetween(MSSA, *AA, MemoryLocation::getForSource(MDep), MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB))) return false; diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp index aac0deea5be3..ce01ae5b2692 100644 --- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp +++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp @@ -144,31 +144,33 @@ BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) { LLVM_DEBUG(dbgs() << "volatile or atomic\n"); return {}; } - Value *const Addr = LoadI->getOperand(0); + Value *Addr = LoadI->getOperand(0); if (Addr->getType()->getPointerAddressSpace() != 0) { LLVM_DEBUG(dbgs() << "from non-zero AddressSpace\n"); return {}; } - auto *const GEP = dyn_cast(Addr); - if (!GEP) - return {}; - LLVM_DEBUG(dbgs() << "GEP\n"); - if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) { - LLVM_DEBUG(dbgs() << "used outside of block\n"); - return {}; - } - const auto &DL = GEP->getModule()->getDataLayout(); - if (!isDereferenceablePointer(GEP, LoadI->getType(), DL)) { + const auto &DL = LoadI->getModule()->getDataLayout(); + if (!isDereferenceablePointer(Addr, LoadI->getType(), DL)) { LLVM_DEBUG(dbgs() << "not dereferenceable\n"); // We need to make sure that we can do comparison in any order, so we // require memory to be unconditionnally dereferencable. return {}; } - APInt Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0); - if (!GEP->accumulateConstantOffset(DL, Offset)) - return {}; - return BCEAtom(GEP, LoadI, BaseId.getBaseId(GEP->getPointerOperand()), - Offset); + + APInt Offset = APInt(DL.getPointerTypeSizeInBits(Addr->getType()), 0); + Value *Base = Addr; + auto *GEP = dyn_cast(Addr); + if (GEP) { + LLVM_DEBUG(dbgs() << "GEP\n"); + if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) { + LLVM_DEBUG(dbgs() << "used outside of block\n"); + return {}; + } + if (!GEP->accumulateConstantOffset(DL, Offset)) + return {}; + Base = GEP->getPointerOperand(); + } + return BCEAtom(GEP, LoadI, BaseId.getBaseId(Base), Offset); } // A comparison between two BCE atoms, e.g. `a == o.a` in the example at the @@ -244,7 +246,7 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst, auto MayClobber = [&](LoadInst *LI) { // If a potentially clobbering instruction comes before the load, // we can still safely sink the load. - return !Inst->comesBefore(LI) && + return (Inst->getParent() != LI->getParent() || !Inst->comesBefore(LI)) && isModSet(AA.getModRefInfo(Inst, MemoryLocation::get(LI))); }; if (MayClobber(Cmp.Lhs.LoadI) || MayClobber(Cmp.Rhs.LoadI)) @@ -270,9 +272,8 @@ void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis &AA) const { } // Do the actual spliting. - for (Instruction *Inst : reverse(OtherInsts)) { - Inst->moveBefore(&*NewParent->begin()); - } + for (Instruction *Inst : reverse(OtherInsts)) + Inst->moveBefore(*NewParent, NewParent->begin()); } bool BCECmpBlock::canSplit(AliasAnalysis &AA) const { @@ -368,8 +369,11 @@ Optional visitCmpBlock(Value *const Val, BasicBlock *const Block, return None; BCECmpBlock::InstructionSet BlockInsts( - {Result->Lhs.GEP, Result->Rhs.GEP, Result->Lhs.LoadI, Result->Rhs.LoadI, - Result->CmpI, BranchI}); + {Result->Lhs.LoadI, Result->Rhs.LoadI, Result->CmpI, BranchI}); + if (Result->Lhs.GEP) + BlockInsts.insert(Result->Lhs.GEP); + if (Result->Rhs.GEP) + BlockInsts.insert(Result->Rhs.GEP); return BCECmpBlock(std::move(*Result), Block, BlockInsts); } @@ -604,8 +608,15 @@ static BasicBlock *mergeComparisons(ArrayRef Comparisons, NextCmpBlock->getParent(), InsertBefore); IRBuilder<> Builder(BB); // Add the GEPs from the first BCECmpBlock. - Value *const Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone()); - Value *const Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone()); + Value *Lhs, *Rhs; + if (FirstCmp.Lhs().GEP) + Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone()); + else + Lhs = FirstCmp.Lhs().LoadI->getPointerOperand(); + if (FirstCmp.Rhs().GEP) + Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone()); + else + Rhs = FirstCmp.Rhs().LoadI->getPointerOperand(); Value *IsEqual = nullptr; LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> " diff --git a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp index 734532a6670c..6383d6ea838b 100644 --- a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -76,13 +76,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Metadata.h" +#include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index f35c9212a6f9..876ef3c427a6 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -88,8 +88,6 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" @@ -1076,6 +1074,9 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, Value *Arg1, Value *Arg2, Instruction *I) const { auto *E = new (ExpressionAllocator) BasicExpression(2); + // TODO: we need to remove context instruction after Value Tracking + // can run without context instruction + const SimplifyQuery Q = SQ.getWithInstruction(I); E->setType(T); E->setOpcode(Opcode); @@ -1091,7 +1092,7 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, E->op_push_back(lookupOperandLeader(Arg1)); E->op_push_back(lookupOperandLeader(Arg2)); - Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), SQ); + Value *V = simplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), Q); if (auto Simplified = checkExprResults(E, I, V)) { addAdditionalUsers(Simplified, I); return Simplified.Expr; @@ -1147,6 +1148,9 @@ NewGVN::ExprResult NewGVN::checkExprResults(Expression *E, Instruction *I, NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const { auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands()); + // TODO: we need to remove context instruction after Value Tracking + // can run without context instruction + const SimplifyQuery Q = SQ.getWithInstruction(I); bool AllConstant = setBasicExpressionInfo(I, E); @@ -1169,13 +1173,13 @@ NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const { Predicate = CmpInst::getSwappedPredicate(Predicate); } E->setOpcode((CI->getOpcode() << 8) | Predicate); - // TODO: 25% of our time is spent in SimplifyCmpInst with pointer operands + // TODO: 25% of our time is spent in simplifyCmpInst with pointer operands assert(I->getOperand(0)->getType() == I->getOperand(1)->getType() && "Wrong types on cmp instruction"); assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() && E->getOperand(1)->getType() == I->getOperand(1)->getType())); Value *V = - SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), SQ); + simplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), Q); if (auto Simplified = checkExprResults(E, I, V)) return Simplified; } else if (isa(I)) { @@ -1183,26 +1187,26 @@ NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const { E->getOperand(1) == E->getOperand(2)) { assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() && E->getOperand(2)->getType() == I->getOperand(2)->getType()); - Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1), - E->getOperand(2), SQ); + Value *V = simplifySelectInst(E->getOperand(0), E->getOperand(1), + E->getOperand(2), Q); if (auto Simplified = checkExprResults(E, I, V)) return Simplified; } } else if (I->isBinaryOp()) { Value *V = - SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ); + simplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), Q); if (auto Simplified = checkExprResults(E, I, V)) return Simplified; } else if (auto *CI = dyn_cast(I)) { Value *V = - SimplifyCastInst(CI->getOpcode(), E->getOperand(0), CI->getType(), SQ); + simplifyCastInst(CI->getOpcode(), E->getOperand(0), CI->getType(), Q); if (auto Simplified = checkExprResults(E, I, V)) return Simplified; } else if (auto *GEPI = dyn_cast(I)) { Value *V = - SimplifyGEPInst(GEPI->getSourceElementType(), *E->op_begin(), + simplifyGEPInst(GEPI->getSourceElementType(), *E->op_begin(), makeArrayRef(std::next(E->op_begin()), E->op_end()), - GEPI->isInBounds(), SQ); + GEPI->isInBounds(), Q); if (auto Simplified = checkExprResults(E, I, V)) return Simplified; } else if (AllConstant) { @@ -1453,10 +1457,12 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr, if (Offset >= 0) { if (auto *C = dyn_cast( lookupOperandLeader(DepSI->getValueOperand()))) { - LLVM_DEBUG(dbgs() << "Coercing load from store " << *DepSI - << " to constant " << *C << "\n"); - return createConstantExpression( - getConstantStoreValueForLoad(C, Offset, LoadType, DL)); + if (Constant *Res = + getConstantStoreValueForLoad(C, Offset, LoadType, DL)) { + LLVM_DEBUG(dbgs() << "Coercing load from store " << *DepSI + << " to constant " << *Res << "\n"); + return createConstantExpression(Res); + } } } } else if (auto *DepLI = dyn_cast(DepInst)) { @@ -1503,9 +1509,8 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr, else if (auto *II = dyn_cast(DepInst)) { if (II->getIntrinsicID() == Intrinsic::lifetime_start) return createConstantExpression(UndefValue::get(LoadType)); - } else if (isAllocationFn(DepInst, TLI)) - if (auto *InitVal = getInitialValueOfAllocation(cast(DepInst), - TLI, LoadType)) + } else if (auto *InitVal = + getInitialValueOfAllocation(DepInst, TLI, LoadType)) return createConstantExpression(InitVal); return nullptr; @@ -3142,9 +3147,8 @@ bool NewGVN::singleReachablePHIPath( // connected component finding in this routine, and it's probably not worth // the complexity for the time being. So, we just keep a set of visited // MemoryAccess and return true when we hit a cycle. - if (Visited.count(First)) + if (!Visited.insert(First).second) return true; - Visited.insert(First); const auto *EndDef = First; for (auto *ChainDef : optimized_def_chain(First)) { @@ -3353,7 +3357,7 @@ void NewGVN::verifyStoreExpressions() const { // instruction set, propagating value numbers, marking things touched, etc, // until the set of touched instructions is completely empty. void NewGVN::iterateTouchedInstructions() { - unsigned int Iterations = 0; + uint64_t Iterations = 0; // Figure out where touchedinstructions starts int FirstInstr = TouchedInstructions.find_first(); // Nothing set, nothing to iterate, just return. diff --git a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp index e0d0301c1ef6..689a2a286cb9 100644 --- a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp +++ b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp @@ -125,6 +125,9 @@ static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI, if (Call->isNoBuiltin() || Call->isStrictFP()) continue; + if (Call->isMustTailCall()) + continue; + // Skip if function either has local linkage or is not a known library // function. LibFunc LF; @@ -137,7 +140,7 @@ static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI, case LibFunc_sqrt: if (TTI->haveFastSqrt(Call->getType()) && optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI, - DTU.hasValue() ? DTU.getPointer() : nullptr)) + DTU ? DTU.getPointer() : nullptr)) break; continue; default: diff --git a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp index a110f7d5c241..e1cc3fc71c3e 100644 --- a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -53,9 +53,9 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" @@ -65,6 +65,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #define DEBUG_TYPE "safepoint-placement" diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index c354fa177a60..da1737979305 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -42,7 +41,6 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" @@ -54,7 +52,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" @@ -183,7 +180,7 @@ void ReassociatePass::BuildRankMap(Function &F, // we cannot move. This ensures that the ranks for these instructions are // all different in the block. for (Instruction &I : *BB) - if (mayBeMemoryDependent(I)) + if (mayHaveNonDefUseDependency(I)) ValueRankMap[&I] = ++BBRank; } } @@ -1076,7 +1073,7 @@ static BinaryOperator *ConvertShiftToMul(Instruction *Shl) { BinaryOperator *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl); - Shl->setOperand(0, UndefValue::get(Shl->getType())); // Drop use of op. + Shl->setOperand(0, PoisonValue::get(Shl->getType())); // Drop use of op. Mul->takeName(Shl); // Everyone now refers to the mul instruction. diff --git a/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/llvm/lib/Transforms/Scalar/Reg2Mem.cpp index a49b9ad3f62b..9dc64493a9ee 100644 --- a/llvm/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/llvm/lib/Transforms/Scalar/Reg2Mem.cpp @@ -24,8 +24,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index b795ad3899bc..51e4a5773f3e 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -258,6 +258,7 @@ struct GCPtrLivenessData { // base relation will remain. Internally, we add a mixture of the two // types, then update all the second type to the first type using DefiningValueMapTy = MapVector; +using IsKnownBaseMapTy = MapVector; using PointerToBaseTy = MapVector; using StatepointLiveSetTy = SetVector; using RematerializedValueMapTy = @@ -281,19 +282,29 @@ struct PartiallyConstructedSafepointRecord { RematerializedValueMapTy RematerializedValues; }; +struct RematerizlizationCandidateRecord { + // Chain from derived pointer to base. + SmallVector ChainToBase; + // Original base. + Value *RootOfChain; + // Cost of chain. + InstructionCost Cost; +}; +using RematCandTy = MapVector; + } // end anonymous namespace static ArrayRef GetDeoptBundleOperands(const CallBase *Call) { Optional DeoptBundle = Call->getOperandBundle(LLVMContext::OB_deopt); - if (!DeoptBundle.hasValue()) { + if (!DeoptBundle) { assert(AllowStatepointWithNoDeoptInfo && "Found non-leaf call without deopt info!"); return None; } - return DeoptBundle.getValue().Inputs; + return DeoptBundle->Inputs; } /// Compute the live-in set for every basic block in the function @@ -385,45 +396,16 @@ static void analyzeParsePointLiveness( Result.LiveSet = LiveSet; } -// Returns true is V is a knownBaseResult. -static bool isKnownBaseResult(Value *V); - -// Returns true if V is a BaseResult that already exists in the IR, i.e. it is -// not created by the findBasePointers algorithm. -static bool isOriginalBaseResult(Value *V); - -namespace { - -/// A single base defining value - An immediate base defining value for an -/// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'. -/// For instructions which have multiple pointer [vector] inputs or that -/// transition between vector and scalar types, there is no immediate base -/// defining value. The 'base defining value' for 'Def' is the transitive -/// closure of this relation stopping at the first instruction which has no -/// immediate base defining value. The b.d.v. might itself be a base pointer, -/// but it can also be an arbitrary derived pointer. -struct BaseDefiningValueResult { - /// Contains the value which is the base defining value. - Value * const BDV; - - /// True if the base defining value is also known to be an actual base - /// pointer. - const bool IsKnownBase; - - BaseDefiningValueResult(Value *BDV, bool IsKnownBase) - : BDV(BDV), IsKnownBase(IsKnownBase) { -#ifndef NDEBUG - // Check consistency between new and old means of checking whether a BDV is - // a base. - bool MustBeBase = isKnownBaseResult(BDV); - assert(!MustBeBase || MustBeBase == IsKnownBase); -#endif - } -}; +/// Returns true if V is a known base. +static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases); -} // end anonymous namespace +/// Caches the IsKnownBase flag for a value and asserts that it wasn't present +/// in the cache before. +static void setKnownBase(Value *V, bool IsKnownBase, + IsKnownBaseMapTy &KnownBases); -static BaseDefiningValueResult findBaseDefiningValue(Value *I); +static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache, + IsKnownBaseMapTy &KnownBases); /// Return a base defining value for the 'Index' element of the given vector /// instruction 'I'. If Index is null, returns a BDV for the entire vector @@ -434,76 +416,122 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I); /// vector returned is a BDV (and possibly a base) of the entire vector 'I'. /// If the later, the return pointer is a BDV (or possibly a base) for the /// particular element in 'I'. -static BaseDefiningValueResult -findBaseDefiningValueOfVector(Value *I) { +static Value *findBaseDefiningValueOfVector(Value *I, DefiningValueMapTy &Cache, + IsKnownBaseMapTy &KnownBases) { // Each case parallels findBaseDefiningValue below, see that code for // detailed motivation. - if (isa(I)) + auto Cached = Cache.find(I); + if (Cached != Cache.end()) + return Cached->second; + + if (isa(I)) { // An incoming argument to the function is a base pointer - return BaseDefiningValueResult(I, true); + Cache[I] = I; + setKnownBase(I, /* IsKnownBase */true, KnownBases); + return I; + } - if (isa(I)) + if (isa(I)) { // Base of constant vector consists only of constant null pointers. // For reasoning see similar case inside 'findBaseDefiningValue' function. - return BaseDefiningValueResult(ConstantAggregateZero::get(I->getType()), - true); + auto *CAZ = ConstantAggregateZero::get(I->getType()); + Cache[I] = CAZ; + setKnownBase(CAZ, /* IsKnownBase */true, KnownBases); + return CAZ; + } - if (isa(I)) - return BaseDefiningValueResult(I, true); + if (isa(I)) { + Cache[I] = I; + setKnownBase(I, /* IsKnownBase */true, KnownBases); + return I; + } - if (isa(I)) + if (isa(I)) { // We don't know whether this vector contains entirely base pointers or // not. To be conservatively correct, we treat it as a BDV and will // duplicate code as needed to construct a parallel vector of bases. - return BaseDefiningValueResult(I, false); + Cache[I] = I; + setKnownBase(I, /* IsKnownBase */false, KnownBases); + return I; + } - if (isa(I)) + if (isa(I)) { // We don't know whether this vector contains entirely base pointers or // not. To be conservatively correct, we treat it as a BDV and will // duplicate code as needed to construct a parallel vector of bases. // TODO: There a number of local optimizations which could be applied here // for particular sufflevector patterns. - return BaseDefiningValueResult(I, false); + Cache[I] = I; + setKnownBase(I, /* IsKnownBase */false, KnownBases); + return I; + } // The behavior of getelementptr instructions is the same for vector and // non-vector data types. - if (auto *GEP = dyn_cast(I)) - return findBaseDefiningValue(GEP->getPointerOperand()); + if (auto *GEP = dyn_cast(I)) { + auto *BDV = + findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases); + Cache[GEP] = BDV; + return BDV; + } + + // The behavior of freeze instructions is the same for vector and + // non-vector data types. + if (auto *Freeze = dyn_cast(I)) { + auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases); + Cache[Freeze] = BDV; + return BDV; + } // If the pointer comes through a bitcast of a vector of pointers to // a vector of another type of pointer, then look through the bitcast - if (auto *BC = dyn_cast(I)) - return findBaseDefiningValue(BC->getOperand(0)); + if (auto *BC = dyn_cast(I)) { + auto *BDV = findBaseDefiningValue(BC->getOperand(0), Cache, KnownBases); + Cache[BC] = BDV; + return BDV; + } // We assume that functions in the source language only return base // pointers. This should probably be generalized via attributes to support // both source language and internal functions. - if (isa(I) || isa(I)) - return BaseDefiningValueResult(I, true); + if (isa(I) || isa(I)) { + Cache[I] = I; + setKnownBase(I, /* IsKnownBase */true, KnownBases); + return I; + } // A PHI or Select is a base defining value. The outer findBasePointer // algorithm is responsible for constructing a base value for this BDV. assert((isa(I) || isa(I)) && "unknown vector instruction - no base found for vector element"); - return BaseDefiningValueResult(I, false); + Cache[I] = I; + setKnownBase(I, /* IsKnownBase */false, KnownBases); + return I; } /// Helper function for findBasePointer - Will return a value which either a) /// defines the base pointer for the input, b) blocks the simple search /// (i.e. a PHI or Select of two derived pointers), or c) involves a change /// from pointer to vector type or back. -static BaseDefiningValueResult findBaseDefiningValue(Value *I) { +static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache, + IsKnownBaseMapTy &KnownBases) { assert(I->getType()->isPtrOrPtrVectorTy() && "Illegal to ask for the base pointer of a non-pointer type"); + auto Cached = Cache.find(I); + if (Cached != Cache.end()) + return Cached->second; if (I->getType()->isVectorTy()) - return findBaseDefiningValueOfVector(I); + return findBaseDefiningValueOfVector(I, Cache, KnownBases); - if (isa(I)) + if (isa(I)) { // An incoming argument to the function is a base pointer // We should have never reached here if this argument isn't an gc value - return BaseDefiningValueResult(I, true); + Cache[I] = I; + setKnownBase(I, /* IsKnownBase */true, KnownBases); + return I; + } if (isa(I)) { // We assume that objects with a constant base (e.g. a global) can't move @@ -516,8 +544,10 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) { // "phi (const1, const2)" or "phi (const, regular gc ptr)". // See constant.ll file for relevant test cases. - return BaseDefiningValueResult( - ConstantPointerNull::get(cast(I->getType())), true); + auto *CPN = ConstantPointerNull::get(cast(I->getType())); + Cache[I] = CPN; + setKnownBase(CPN, /* IsKnownBase */true, KnownBases); + return CPN; } // inttoptrs in an integral address space are currently ill-defined. We @@ -525,8 +555,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) { // constant rule above and because we don't really have a better semantic // to give them. Note that the optimizer is always free to insert undefined // behavior on dynamically dead paths as well. - if (isa(I)) - return BaseDefiningValueResult(I, true); + if (isa(I)) { + Cache[I] = I; + setKnownBase(I, /* IsKnownBase */true, KnownBases); + return I; + } if (CastInst *CI = dyn_cast(I)) { Value *Def = CI->stripPointerCasts(); @@ -539,16 +572,31 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) { // not simply a pointer cast (i.e. an inttoptr). We don't know how to // handle int->ptr conversion. assert(!isa(Def) && "shouldn't find another cast here"); - return findBaseDefiningValue(Def); + auto *BDV = findBaseDefiningValue(Def, Cache, KnownBases); + Cache[CI] = BDV; + return BDV; } - if (isa(I)) + if (isa(I)) { // The value loaded is an gc base itself - return BaseDefiningValueResult(I, true); + Cache[I] = I; + setKnownBase(I, /* IsKnownBase */true, KnownBases); + return I; + } - if (GetElementPtrInst *GEP = dyn_cast(I)) + if (GetElementPtrInst *GEP = dyn_cast(I)) { // The base of this GEP is the base - return findBaseDefiningValue(GEP->getPointerOperand()); + auto *BDV = + findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases); + Cache[GEP] = BDV; + return BDV; + } + + if (auto *Freeze = dyn_cast(I)) { + auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases); + Cache[Freeze] = BDV; + return BDV; + } if (IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { @@ -569,24 +617,32 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) { llvm_unreachable( "interaction with the gcroot mechanism is not supported"); case Intrinsic::experimental_gc_get_pointer_base: - return findBaseDefiningValue(II->getOperand(0)); + auto *BDV = findBaseDefiningValue(II->getOperand(0), Cache, KnownBases); + Cache[II] = BDV; + return BDV; } } // We assume that functions in the source language only return base // pointers. This should probably be generalized via attributes to support // both source language and internal functions. - if (isa(I) || isa(I)) - return BaseDefiningValueResult(I, true); + if (isa(I) || isa(I)) { + Cache[I] = I; + setKnownBase(I, /* IsKnownBase */true, KnownBases); + return I; + } // TODO: I have absolutely no idea how to implement this part yet. It's not // necessarily hard, I just haven't really looked at it yet. assert(!isa(I) && "Landing Pad is unimplemented"); - if (isa(I)) + if (isa(I)) { // A CAS is effectively a atomic store and load combined under a // predicate. From the perspective of base pointers, we just treat it // like a load. - return BaseDefiningValueResult(I, true); + Cache[I] = I; + setKnownBase(I, /* IsKnownBase */true, KnownBases); + return I; + } assert(!isa(I) && "Xchg handled above, all others are " "binary ops which don't apply to pointers"); @@ -594,8 +650,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) { // The aggregate ops. Aggregates can either be in the heap or on the // stack, but in either case, this is simply a field load. As a result, // this is a defining definition of the base just like a load is. - if (isa(I)) - return BaseDefiningValueResult(I, true); + if (isa(I)) { + Cache[I] = I; + setKnownBase(I, /* IsKnownBase */true, KnownBases); + return I; + } // We should never see an insert vector since that would require we be // tracing back a struct value not a pointer value. @@ -606,6 +665,8 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) { // substituting gc.get.pointer.base() intrinsic. bool IsKnownBase = isa(I) && cast(I)->getMetadata("is_base_value"); + setKnownBase(I, /* IsKnownBase */IsKnownBase, KnownBases); + Cache[I] = I; // An extractelement produces a base result exactly when it's input does. // We may need to insert a parallel instruction to extract the appropriate @@ -615,33 +676,38 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) { // Note: There a lot of obvious peephole cases here. This are deliberately // handled after the main base pointer inference algorithm to make writing // test cases to exercise that code easier. - return BaseDefiningValueResult(I, IsKnownBase); + return I; // The last two cases here don't return a base pointer. Instead, they // return a value which dynamically selects from among several base // derived pointers (each with it's own base potentially). It's the job of // the caller to resolve these. assert((isa(I) || isa(I)) && - "missing instruction case in findBaseDefiningValing"); - return BaseDefiningValueResult(I, IsKnownBase); + "missing instruction case in findBaseDefiningValue"); + return I; } /// Returns the base defining value for this value. -static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) { - Value *&Cached = Cache[I]; - if (!Cached) { - Cached = findBaseDefiningValue(I).BDV; +static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache, + IsKnownBaseMapTy &KnownBases) { + if (Cache.find(I) == Cache.end()) { + auto *BDV = findBaseDefiningValue(I, Cache, KnownBases); + Cache[I] = BDV; LLVM_DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> " - << Cached->getName() << "\n"); + << Cache[I]->getName() << ", is known base = " + << KnownBases[I] << "\n"); } assert(Cache[I] != nullptr); - return Cached; + assert(KnownBases.find(Cache[I]) != KnownBases.end() && + "Cached value must be present in known bases map"); + return Cache[I]; } /// Return a base pointer for this value if known. Otherwise, return it's /// base defining value. -static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) { - Value *Def = findBaseDefiningValueCached(I, Cache); +static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache, + IsKnownBaseMapTy &KnownBases) { + Value *Def = findBaseDefiningValueCached(I, Cache, KnownBases); auto Found = Cache.find(Def); if (Found != Cache.end()) { // Either a base-of relation, or a self reference. Caller must check. @@ -651,6 +717,7 @@ static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) { return Def; } +#ifndef NDEBUG /// This value is a base pointer that is not generated by RS4GC, i.e. it already /// exists in the code. static bool isOriginalBaseResult(Value *V) { @@ -659,21 +726,22 @@ static bool isOriginalBaseResult(Value *V) { !isa(V) && !isa(V) && !isa(V); } +#endif -/// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV, -/// is it known to be a base pointer? Or do we need to continue searching. -static bool isKnownBaseResult(Value *V) { - if (isOriginalBaseResult(V)) - return true; - if (isa(V) && - cast(V)->getMetadata("is_base_value")) { - // This is a previously inserted base phi or select. We know - // that this is a base value. - return true; - } +static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases) { + auto It = KnownBases.find(V); + assert(It != KnownBases.end() && "Value not present in the map"); + return It->second; +} - // We need to keep searching - return false; +static void setKnownBase(Value *V, bool IsKnownBase, + IsKnownBaseMapTy &KnownBases) { +#ifndef NDEBUG + auto It = KnownBases.find(V); + if (It != KnownBases.end()) + assert(It->second == IsKnownBase && "Changing already present value"); +#endif + KnownBases[V] = IsKnownBase; } // Returns true if First and Second values are both scalar or both vector. @@ -801,10 +869,11 @@ static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) { /// For gc objects, this is simply itself. On success, returns a value which is /// the base pointer. (This is reliable and can be used for relocation.) On /// failure, returns nullptr. -static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { - Value *Def = findBaseOrBDV(I, Cache); +static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache, + IsKnownBaseMapTy &KnownBases) { + Value *Def = findBaseOrBDV(I, Cache, KnownBases); - if (isKnownBaseResult(Def) && areBothVectorOrScalar(Def, I)) + if (isKnownBase(Def, KnownBases) && areBothVectorOrScalar(Def, I)) return Def; // Here's the rough algorithm: @@ -887,8 +956,8 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { assert(!isOriginalBaseResult(Current) && "why did it get added?"); auto visitIncomingValue = [&](Value *InVal) { - Value *Base = findBaseOrBDV(InVal, Cache); - if (isKnownBaseResult(Base) && areBothVectorOrScalar(Base, InVal)) + Value *Base = findBaseOrBDV(InVal, Cache, KnownBases); + if (isKnownBase(Base, KnownBases) && areBothVectorOrScalar(Base, InVal)) // Known bases won't need new instructions introduced and can be // ignored safely. However, this can only be done when InVal and Base // are both scalar or both vector. Otherwise, we need to find a @@ -924,12 +993,16 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { for (auto Pair : States) { Value *BDV = Pair.first; auto canPruneInput = [&](Value *V) { - Value *BDV = findBaseOrBDV(V, Cache); - if (V->stripPointerCasts() != BDV) + // If the input of the BDV is the BDV itself we can prune it. This is + // only possible if the BDV is a PHI node. + if (V->stripPointerCasts() == BDV) + return true; + Value *VBDV = findBaseOrBDV(V, Cache, KnownBases); + if (V->stripPointerCasts() != VBDV) return false; // The assumption is that anything not in the state list is // propagates a base pointer. - return States.count(BDV) == 0; + return States.count(VBDV) == 0; }; bool CanPrune = true; @@ -975,13 +1048,13 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { // Only values that do not have known bases or those that have differing // type (scalar versus vector) from a possible known base should be in the // lattice. - assert((!isKnownBaseResult(BDV) || + assert((!isKnownBase(BDV, KnownBases) || !areBothVectorOrScalar(BDV, Pair.second.getBaseValue())) && "why did it get added?"); BDVState NewState(BDV); visitBDVOperands(BDV, [&](Value *Op) { - Value *BDV = findBaseOrBDV(Op, Cache); + Value *BDV = findBaseOrBDV(Op, Cache, KnownBases); auto OpState = GetStateForBDV(BDV, Op); NewState.meet(OpState); }); @@ -1014,8 +1087,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { // Only values that do not have known bases or those that have differing // type (scalar versus vector) from a possible known base should be in the // lattice. - assert((!isKnownBaseResult(I) || !areBothVectorOrScalar(I, BaseValue)) && - "why did it get added?"); + assert( + (!isKnownBase(I, KnownBases) || !areBothVectorOrScalar(I, BaseValue)) && + "why did it get added?"); assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); if (!State.isBase() || !isa(BaseValue->getType())) @@ -1033,6 +1107,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE); BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {})); States[I] = BDVState(I, BDVState::Base, BaseInst); + setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases); } else if (!isa(I->getType())) { // We need to handle cases that have a vector base but the instruction is // a scalar type (these could be phis or selects or any instruction that @@ -1055,7 +1130,8 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { // Only values that do not have known bases or those that have differing // type (scalar versus vector) from a possible known base should be in the // lattice. - assert((!isKnownBaseResult(I) || !areBothVectorOrScalar(I, State.getBaseValue())) && + assert((!isKnownBase(I, KnownBases) || + !areBothVectorOrScalar(I, State.getBaseValue())) && "why did it get added?"); assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); @@ -1087,6 +1163,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { // Add metadata marking this as a base value BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {})); States[I] = BDVState(I, BDVState::Conflict, BaseInst); + setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases); } #ifndef NDEBUG @@ -1102,7 +1179,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { // assured to be able to determine an instruction which produces it's base // pointer. auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) { - Value *BDV = findBaseOrBDV(Input, Cache); + Value *BDV = findBaseOrBDV(Input, Cache, KnownBases); Value *Base = nullptr; if (!States.count(BDV)) { assert(areBothVectorOrScalar(BDV, Input)); @@ -1129,7 +1206,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { // Only values that do not have known bases or those that have differing // type (scalar versus vector) from a possible known base should be in the // lattice. - assert((!isKnownBaseResult(BDV) || + assert((!isKnownBase(BDV, KnownBases) || !areBothVectorOrScalar(BDV, State.getBaseValue())) && "why did it get added?"); assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); @@ -1154,13 +1231,21 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { #ifndef NDEBUG Value *OldBase = BlockToValue[InBB]; Value *Base = getBaseForInput(InVal, nullptr); + + // We can't use `stripPointerCasts` instead of this function because + // `stripPointerCasts` doesn't handle vectors of pointers. + auto StripBitCasts = [](Value *V) -> Value * { + while (auto *BC = dyn_cast(V)) + V = BC->getOperand(0); + return V; + }; // In essence this assert states: the only way two values // incoming from the same basic block may be different is by // being different bitcasts of the same value. A cleanup // that remains TODO is changing findBaseOrBDV to return an // llvm::Value of the correct type (and still remain pure). // This will remove the need to add bitcasts. - assert(Base->stripPointerCasts() == OldBase->stripPointerCasts() && + assert(StripBitCasts(Base) == StripBitCasts(OldBase) && "findBaseOrBDV should be pure!"); #endif } @@ -1223,8 +1308,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { // Only values that do not have known bases or those that have differing // type (scalar versus vector) from a possible known base should be in the // lattice. - assert((!isKnownBaseResult(BDV) || !areBothVectorOrScalar(BDV, Base)) && - "why did it get added?"); + assert( + (!isKnownBase(BDV, KnownBases) || !areBothVectorOrScalar(BDV, Base)) && + "why did it get added?"); LLVM_DEBUG( dbgs() << "Updating base value cache" @@ -1255,9 +1341,10 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { // pointer was a base pointer. static void findBasePointers(const StatepointLiveSetTy &live, PointerToBaseTy &PointerToBase, DominatorTree *DT, - DefiningValueMapTy &DVCache) { + DefiningValueMapTy &DVCache, + IsKnownBaseMapTy &KnownBases) { for (Value *ptr : live) { - Value *base = findBasePointer(ptr, DVCache); + Value *base = findBasePointer(ptr, DVCache, KnownBases); assert(base && "failed to find base pointer"); PointerToBase[ptr] = base; assert((!isa(base) || !isa(ptr) || @@ -1272,7 +1359,8 @@ static void findBasePointers(const StatepointLiveSetTy &live, static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, CallBase *Call, PartiallyConstructedSafepointRecord &result, - PointerToBaseTy &PointerToBase) { + PointerToBaseTy &PointerToBase, + IsKnownBaseMapTy &KnownBases) { StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet; // We assume that all pointers passed to deopt are base pointers; as an // optimization, we can use this to avoid seperately materializing the base @@ -1286,7 +1374,8 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, PotentiallyDerivedPointers.remove(V); PointerToBase[V] = V; } - findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache); + findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache, + KnownBases); } /// Given an updated version of the dataflow liveness results, update the @@ -1349,23 +1438,23 @@ static constexpr Attribute::AttrKind FnAttrsToStrip[] = // Create new attribute set containing only attributes which can be transferred // from original call to the safepoint. static AttributeList legalizeCallAttributes(LLVMContext &Ctx, - AttributeList AL) { - if (AL.isEmpty()) - return AL; + AttributeList OrigAL, + AttributeList StatepointAL) { + if (OrigAL.isEmpty()) + return StatepointAL; // Remove the readonly, readnone, and statepoint function attributes. - AttrBuilder FnAttrs(Ctx, AL.getFnAttrs()); + AttrBuilder FnAttrs(Ctx, OrigAL.getFnAttrs()); for (auto Attr : FnAttrsToStrip) FnAttrs.removeAttribute(Attr); - for (Attribute A : AL.getFnAttrs()) { + for (Attribute A : OrigAL.getFnAttrs()) { if (isStatepointDirectiveAttr(A)) FnAttrs.removeAttribute(A); } // Just skip parameter and return attributes for now - return AttributeList::get(Ctx, AttributeList::FunctionIndex, - AttributeSet::get(Ctx, FnAttrs)); + return StatepointAL.addFnAttributes(Ctx, FnAttrs); } /// Helper function to place all gc relocates necessary for the given @@ -1570,8 +1659,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ assert(DeoptLowering.equals("live-through") && "Unsupported value!"); } - Value *CallTarget = Call->getCalledOperand(); - if (Function *F = dyn_cast(CallTarget)) { + FunctionCallee CallTarget(Call->getFunctionType(), Call->getCalledOperand()); + if (Function *F = dyn_cast(CallTarget.getCallee())) { auto IID = F->getIntrinsicID(); if (IID == Intrinsic::experimental_deoptimize) { // Calls to llvm.experimental.deoptimize are lowered to calls to the @@ -1589,8 +1678,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ // the same module. This is fine -- we assume the frontend knew what it // was doing when generating this kind of IR. CallTarget = F->getParent() - ->getOrInsertFunction("__llvm_deoptimize", FTy) - .getCallee(); + ->getOrInsertFunction("__llvm_deoptimize", FTy); IsDeoptimize = true; } else if (IID == Intrinsic::memcpy_element_unordered_atomic || @@ -1686,8 +1774,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ CallTarget = F->getParent() - ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy) - .getCallee(); + ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy); } } @@ -1705,8 +1792,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ // function attributes. In case if we can handle this set of attributes - // set up function attrs directly on statepoint and return attrs later for // gc_result intrinsic. - SPCall->setAttributes( - legalizeCallAttributes(CI->getContext(), CI->getAttributes())); + SPCall->setAttributes(legalizeCallAttributes( + CI->getContext(), CI->getAttributes(), SPCall->getAttributes())); Token = cast(SPCall); @@ -1732,8 +1819,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ // function attributes. In case if we can handle this set of attributes - // set up function attrs directly on statepoint and return attrs later for // gc_result intrinsic. - SPInvoke->setAttributes( - legalizeCallAttributes(II->getContext(), II->getAttributes())); + SPInvoke->setAttributes(legalizeCallAttributes( + II->getContext(), II->getAttributes(), SPInvoke->getAttributes())); Token = cast(SPInvoke); @@ -2071,6 +2158,7 @@ static void relocationViaAlloca( assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues && "we must have the same allocas with lives"); + (void) NumRematerializedValues; if (!PromotableAllocas.empty()) { // Apply mem2reg to promote alloca to SSA PromoteMemToReg(PromotableAllocas, DT); @@ -2221,27 +2309,25 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh return true; } -// From the statepoint live set pick values that are cheaper to recompute then -// to relocate. Remove this values from the live set, rematerialize them after -// statepoint and record them in "Info" structure. Note that similar to -// relocated values we don't do any user adjustments here. -static void rematerializeLiveValues(CallBase *Call, - PartiallyConstructedSafepointRecord &Info, - PointerToBaseTy &PointerToBase, - TargetTransformInfo &TTI) { +// Find derived pointers that can be recomputed cheap enough and fill +// RematerizationCandidates with such candidates. +static void +findRematerializationCandidates(PointerToBaseTy PointerToBase, + RematCandTy &RematerizationCandidates, + TargetTransformInfo &TTI) { const unsigned int ChainLengthThreshold = 10; - // Record values we are going to delete from this statepoint live set. - // We can not di this in following loop due to iterator invalidation. - SmallVector LiveValuesToBeDeleted; + for (auto P2B : PointerToBase) { + auto *Derived = P2B.first; + auto *Base = P2B.second; + // Consider only derived pointers. + if (Derived == Base) + continue; - for (Value *LiveValue: Info.LiveSet) { - // For each live pointer find its defining chain + // For each live pointer find its defining chain. SmallVector ChainToBase; - assert(PointerToBase.count(LiveValue)); Value *RootOfChain = - findRematerializableChainToBasePointer(ChainToBase, - LiveValue); + findRematerializableChainToBasePointer(ChainToBase, Derived); // Nothing to do, or chain is too long if ( ChainToBase.size() == 0 || @@ -2250,9 +2336,9 @@ static void rematerializeLiveValues(CallBase *Call, // Handle the scenario where the RootOfChain is not equal to the // Base Value, but they are essentially the same phi values. - if (RootOfChain != PointerToBase[LiveValue]) { + if (RootOfChain != PointerToBase[Derived]) { PHINode *OrigRootPhi = dyn_cast(RootOfChain); - PHINode *AlternateRootPhi = dyn_cast(PointerToBase[LiveValue]); + PHINode *AlternateRootPhi = dyn_cast(PointerToBase[Derived]); if (!OrigRootPhi || !AlternateRootPhi) continue; // PHI nodes that have the same incoming values, and belonging to the same @@ -2266,33 +2352,61 @@ static void rematerializeLiveValues(CallBase *Call, // deficiency in the findBasePointer algorithm. if (!AreEquivalentPhiNodes(*OrigRootPhi, *AlternateRootPhi)) continue; - // Now that the phi nodes are proved to be the same, assert that - // findBasePointer's newly generated AlternateRootPhi is present in the - // liveset of the call. - assert(Info.LiveSet.count(AlternateRootPhi)); } - // Compute cost of this chain + // Compute cost of this chain. InstructionCost Cost = chainToBasePointerCost(ChainToBase, TTI); // TODO: We can also account for cases when we will be able to remove some // of the rematerialized values by later optimization passes. I.e if // we rematerialized several intersecting chains. Or if original values // don't have any uses besides this statepoint. + // Ok, there is a candidate. + RematerizlizationCandidateRecord Record; + Record.ChainToBase = ChainToBase; + Record.RootOfChain = RootOfChain; + Record.Cost = Cost; + RematerizationCandidates.insert({ Derived, Record }); + } +} + +// From the statepoint live set pick values that are cheaper to recompute then +// to relocate. Remove this values from the live set, rematerialize them after +// statepoint and record them in "Info" structure. Note that similar to +// relocated values we don't do any user adjustments here. +static void rematerializeLiveValues(CallBase *Call, + PartiallyConstructedSafepointRecord &Info, + PointerToBaseTy &PointerToBase, + RematCandTy &RematerizationCandidates, + TargetTransformInfo &TTI) { + // Record values we are going to delete from this statepoint live set. + // We can not di this in following loop due to iterator invalidation. + SmallVector LiveValuesToBeDeleted; + + for (Value *LiveValue : Info.LiveSet) { + auto It = RematerizationCandidates.find(LiveValue); + if (It == RematerizationCandidates.end()) + continue; + + RematerizlizationCandidateRecord &Record = It->second; + + InstructionCost Cost = Record.Cost; // For invokes we need to rematerialize each chain twice - for normal and // for unwind basic blocks. Model this by multiplying cost by two. - if (isa(Call)) { + if (isa(Call)) Cost *= 2; - } - // If it's too expensive - skip it + + // If it's too expensive - skip it. if (Cost >= RematerializationThreshold) continue; // Remove value from the live set LiveValuesToBeDeleted.push_back(LiveValue); - // Clone instructions and record them inside "Info" structure + // Clone instructions and record them inside "Info" structure. - // Walk backwards to visit top-most instructions first + // For each live pointer find get its defining chain. + SmallVector ChainToBase = Record.ChainToBase; + // Walk backwards to visit top-most instructions first. std::reverse(ChainToBase.begin(), ChainToBase.end()); // Utility function which clones all instructions from "ChainToBase" @@ -2352,7 +2466,7 @@ static void rematerializeLiveValues(CallBase *Call, Instruction *InsertBefore = Call->getNextNode(); assert(InsertBefore); Instruction *RematerializedValue = rematerializeChain( - InsertBefore, RootOfChain, PointerToBase[LiveValue]); + InsertBefore, Record.RootOfChain, PointerToBase[LiveValue]); Info.RematerializedValues[RematerializedValue] = LiveValue; } else { auto *Invoke = cast(Call); @@ -2363,9 +2477,9 @@ static void rematerializeLiveValues(CallBase *Call, &*Invoke->getUnwindDest()->getFirstInsertionPt(); Instruction *NormalRematerializedValue = rematerializeChain( - NormalInsertBefore, RootOfChain, PointerToBase[LiveValue]); + NormalInsertBefore, Record.RootOfChain, PointerToBase[LiveValue]); Instruction *UnwindRematerializedValue = rematerializeChain( - UnwindInsertBefore, RootOfChain, PointerToBase[LiveValue]); + UnwindInsertBefore, Record.RootOfChain, PointerToBase[LiveValue]); Info.RematerializedValues[NormalRematerializedValue] = LiveValue; Info.RematerializedValues[UnwindRematerializedValue] = LiveValue; @@ -2380,7 +2494,8 @@ static void rematerializeLiveValues(CallBase *Call, static bool inlineGetBaseAndOffset(Function &F, SmallVectorImpl &Intrinsics, - DefiningValueMapTy &DVCache) { + DefiningValueMapTy &DVCache, + IsKnownBaseMapTy &KnownBases) { auto &Context = F.getContext(); auto &DL = F.getParent()->getDataLayout(); bool Changed = false; @@ -2389,7 +2504,8 @@ static bool inlineGetBaseAndOffset(Function &F, switch (Callsite->getIntrinsicID()) { case Intrinsic::experimental_gc_get_pointer_base: { Changed = true; - Value *Base = findBasePointer(Callsite->getOperand(0), DVCache); + Value *Base = + findBasePointer(Callsite->getOperand(0), DVCache, KnownBases); assert(!DVCache.count(Callsite)); auto *BaseBC = IRBuilder<>(Callsite).CreateBitCast( Base, Callsite->getType(), suffixed_name_or(Base, ".cast", "")); @@ -2404,7 +2520,7 @@ static bool inlineGetBaseAndOffset(Function &F, case Intrinsic::experimental_gc_get_pointer_offset: { Changed = true; Value *Derived = Callsite->getOperand(0); - Value *Base = findBasePointer(Derived, DVCache); + Value *Base = findBasePointer(Derived, DVCache, KnownBases); assert(!DVCache.count(Callsite)); unsigned AddressSpace = Derived->getType()->getPointerAddressSpace(); unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace); @@ -2431,7 +2547,8 @@ static bool inlineGetBaseAndOffset(Function &F, static bool insertParsePoints(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, SmallVectorImpl &ToUpdate, - DefiningValueMapTy &DVCache) { + DefiningValueMapTy &DVCache, + IsKnownBaseMapTy &KnownBases) { #ifndef NDEBUG // Validate the input std::set Uniqued; @@ -2487,7 +2604,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // B) Find the base pointers for each live pointer for (size_t i = 0; i < Records.size(); i++) { PartiallyConstructedSafepointRecord &info = Records[i]; - findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase); + findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase, KnownBases); } if (PrintBasePointers) { errs() << "Base Pairs (w/o Relocation):\n"; @@ -2563,11 +2680,16 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Holders.clear(); + // Compute the cost of possible re-materialization of derived pointers. + RematCandTy RematerizationCandidates; + findRematerializationCandidates(PointerToBase, RematerizationCandidates, TTI); + // In order to reduce live set of statepoint we might choose to rematerialize // some values instead of relocating them. This is purely an optimization and // does not influence correctness. for (size_t i = 0; i < Records.size(); i++) - rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase, TTI); + rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase, + RematerizationCandidates, TTI); // We need this to safely RAUW and delete call or invoke return values that // may themselves be live over a statepoint. For details, please see usage in @@ -2930,13 +3052,18 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, // inlineGetBaseAndOffset() and insertParsePoints(). DefiningValueMapTy DVCache; + // Mapping between a base values and a flag indicating whether it's a known + // base or not. + IsKnownBaseMapTy KnownBases; + if (!Intrinsics.empty()) // Inline @gc.get.pointer.base() and @gc.get.pointer.offset() before finding // live references. - MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache); + MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache, KnownBases); if (!ParsePointNeeded.empty()) - MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache); + MadeChange |= + insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache, KnownBases); return MadeChange; } diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index c34da51e6dc1..2282ef636076 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -17,20 +17,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/SCCP.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueLattice.h" #include "llvm/Analysis/ValueLatticeUtils.h" @@ -38,14 +33,13 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" @@ -59,7 +53,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/PredicateInfo.h" +#include "llvm/Transforms/Utils/SCCPSolver.h" #include #include #include @@ -97,6 +91,18 @@ static bool isOverdefined(const ValueLatticeElement &LV) { return !LV.isUnknownOrUndef() && !isConstant(LV); } +static bool canRemoveInstruction(Instruction *I) { + if (wouldInstructionBeTriviallyDead(I)) + return true; + + // Some instructions can be handled but are rejected above. Catch + // those cases by falling through to here. + // TODO: Mark globals as being constant earlier, so + // TODO: wouldInstructionBeTriviallyDead() knows that atomic loads + // TODO: are safe to remove. + return isa(I); +} + static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) { Constant *Const = nullptr; if (V->getType()->isStructTy()) { @@ -127,7 +133,8 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) { // Calls with "clang.arc.attachedcall" implicitly use the return value and // those uses cannot be updated with a constant. CallBase *CB = dyn_cast(V); - if (CB && ((CB->isMustTailCall() && !CB->isSafeToRemove()) || + if (CB && ((CB->isMustTailCall() && + !canRemoveInstruction(CB)) || CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall))) { Function *F = CB->getCalledFunction(); @@ -156,7 +163,7 @@ static bool simplifyInstsInBlock(SCCPSolver &Solver, BasicBlock &BB, if (Inst.getType()->isVoidTy()) continue; if (tryToReplaceWithConstant(Solver, &Inst)) { - if (Inst.isSafeToRemove()) + if (canRemoveInstruction(&Inst)) Inst.eraseFromParent(); MadeChanges = true; @@ -170,6 +177,7 @@ static bool simplifyInstsInBlock(SCCPSolver &Solver, BasicBlock &BB, continue; if (IV.getConstantRange().isAllNonNegative()) { auto *ZExt = new ZExtInst(ExtOp, Inst.getType(), "", &Inst); + ZExt->takeName(&Inst); InsertedValues.insert(ZExt); Inst.replaceAllUsesWith(ZExt); Solver.removeLatticeValueFor(&Inst); @@ -182,10 +190,14 @@ static bool simplifyInstsInBlock(SCCPSolver &Solver, BasicBlock &BB, return MadeChanges; } +static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB, + DomTreeUpdater &DTU, + BasicBlock *&NewUnreachableBB); + // runSCCP() - Run the Sparse Conditional Constant Propagation algorithm, // and return true if the function was modified. static bool runSCCP(Function &F, const DataLayout &DL, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, DomTreeUpdater &DTU) { LLVM_DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n"); SCCPSolver Solver( DL, [TLI](Function &F) -> const TargetLibraryInfo & { return *TLI; }, @@ -213,13 +225,12 @@ static bool runSCCP(Function &F, const DataLayout &DL, // as we cannot modify the CFG of the function. SmallPtrSet InsertedValues; + SmallVector BlocksToErase; for (BasicBlock &BB : F) { if (!Solver.isBlockExecutable(&BB)) { LLVM_DEBUG(dbgs() << " BasicBlock Dead:" << BB); - ++NumDeadBlocks; - NumInstRemoved += removeAllNonTerminatorAndEHPadInstructions(&BB).first; - + BlocksToErase.push_back(&BB); MadeChanges = true; continue; } @@ -228,17 +239,32 @@ static bool runSCCP(Function &F, const DataLayout &DL, NumInstRemoved, NumInstReplaced); } + // Remove unreachable blocks and non-feasible edges. + for (BasicBlock *DeadBB : BlocksToErase) + NumInstRemoved += changeToUnreachable(DeadBB->getFirstNonPHI(), + /*PreserveLCSSA=*/false, &DTU); + + BasicBlock *NewUnreachableBB = nullptr; + for (BasicBlock &BB : F) + MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU, NewUnreachableBB); + + for (BasicBlock *DeadBB : BlocksToErase) + if (!DeadBB->hasAddressTaken()) + DTU.deleteBB(DeadBB); + return MadeChanges; } PreservedAnalyses SCCPPass::run(Function &F, FunctionAnalysisManager &AM) { const DataLayout &DL = F.getParent()->getDataLayout(); auto &TLI = AM.getResult(F); - if (!runSCCP(F, DL, &TLI)) + auto *DT = AM.getCachedResult(F); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + if (!runSCCP(F, DL, &TLI, DTU)) return PreservedAnalyses::all(); auto PA = PreservedAnalyses(); - PA.preserveSet(); + PA.preserve(); return PA; } @@ -261,7 +287,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); - AU.setPreservesCFG(); + AU.addPreserved(); } // runOnFunction - Run the Sparse Conditional Constant Propagation @@ -272,7 +298,10 @@ public: const DataLayout &DL = F.getParent()->getDataLayout(); const TargetLibraryInfo *TLI = &getAnalysis().getTLI(F); - return runSCCP(F, DL, TLI); + auto *DTWP = getAnalysisIfAvailable(); + DomTreeUpdater DTU(DTWP ? &DTWP->getDomTree() : nullptr, + DomTreeUpdater::UpdateStrategy::Lazy); + return runSCCP(F, DL, TLI, DTU); } }; @@ -342,7 +371,8 @@ static void findReturnsToZap(Function &F, } static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB, - DomTreeUpdater &DTU) { + DomTreeUpdater &DTU, + BasicBlock *&NewUnreachableBB) { SmallPtrSet FeasibleSuccessors; bool HasNonFeasibleEdges = false; for (BasicBlock *Succ : successors(BB)) { @@ -362,7 +392,19 @@ static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB, isa(TI)) && "Terminator must be a br, switch or indirectbr"); - if (FeasibleSuccessors.size() == 1) { + if (FeasibleSuccessors.size() == 0) { + // Branch on undef/poison, replace with unreachable. + SmallPtrSet SeenSuccs; + SmallVector Updates; + for (BasicBlock *Succ : successors(BB)) { + Succ->removePredecessor(BB); + if (SeenSuccs.insert(Succ).second) + Updates.push_back({DominatorTree::Delete, BB, Succ}); + } + TI->eraseFromParent(); + new UnreachableInst(BB->getContext(), BB); + DTU.applyUpdatesPermissive(Updates); + } else if (FeasibleSuccessors.size() == 1) { // Replace with an unconditional branch to the only feasible successor. BasicBlock *OnlyFeasibleSuccessor = *FeasibleSuccessors.begin(); SmallVector Updates; @@ -385,6 +427,23 @@ static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB, } else if (FeasibleSuccessors.size() > 1) { SwitchInstProfUpdateWrapper SI(*cast(TI)); SmallVector Updates; + + // If the default destination is unfeasible it will never be taken. Replace + // it with a new block with a single Unreachable instruction. + BasicBlock *DefaultDest = SI->getDefaultDest(); + if (!FeasibleSuccessors.contains(DefaultDest)) { + if (!NewUnreachableBB) { + NewUnreachableBB = + BasicBlock::Create(DefaultDest->getContext(), "default.unreachable", + DefaultDest->getParent(), DefaultDest); + new UnreachableInst(DefaultDest->getContext(), NewUnreachableBB); + } + + SI->setDefaultDest(NewUnreachableBB); + Updates.push_back({DominatorTree::Delete, BB, DefaultDest}); + Updates.push_back({DominatorTree::Insert, BB, NewUnreachableBB}); + } + for (auto CI = SI->case_begin(); CI != SI->case_end();) { if (FeasibleSuccessors.contains(CI->getCaseSuccessor())) { ++CI; @@ -532,11 +591,13 @@ bool llvm::runIPSCCP( NumInstRemoved += changeToUnreachable(F.front().getFirstNonPHI(), /*PreserveLCSSA=*/false, &DTU); + BasicBlock *NewUnreachableBB = nullptr; for (BasicBlock &BB : F) - MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU); + MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU, NewUnreachableBB); for (BasicBlock *DeadBB : BlocksToErase) - DTU.deleteBB(DeadBB); + if (!DeadBB->hasAddressTaken()) + DTU.deleteBB(DeadBB); for (BasicBlock &BB : F) { for (Instruction &Inst : llvm::make_early_inc_range(BB)) { diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 8be8946702be..143a035749c7 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -57,11 +57,9 @@ #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -78,14 +76,12 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include -#include #include #include #include @@ -1016,7 +1012,7 @@ private: I.getParent()->getFirstInsertionPt() == I.getParent()->end()) return PI.setAborted(&I); - // TODO: We could use SimplifyInstruction here to fold PHINodes and + // TODO: We could use simplifyInstruction here to fold PHINodes and // SelectInsts. However, doing so requires to change the current // dead-operand-tracking mechanism. For instance, suppose neither loading // from %U nor %other traps. Then "load (select undef, %U, %other)" does not @@ -1987,13 +1983,22 @@ static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t RelBegin = S.beginOffset() - AllocBeginOffset; uint64_t RelEnd = S.endOffset() - AllocBeginOffset; + Use *U = S.getUse(); + + // Lifetime intrinsics operate over the whole alloca whose sizes are usually + // larger than other load/store slices (RelEnd > Size). But lifetime are + // always promotable and should not impact other slices' promotability of the + // partition. + if (IntrinsicInst *II = dyn_cast(U->getUser())) { + if (II->isLifetimeStartOrEnd() || II->isDroppable()) + return true; + } + // We can't reasonably handle cases where the load or store extends past // the end of the alloca's type and into its padding. if (RelEnd > Size) return false; - Use *U = S.getUse(); - if (LoadInst *LI = dyn_cast(U->getUser())) { if (LI->isVolatile()) return false; @@ -2048,9 +2053,6 @@ static bool isIntegerWideningViableForSlice(const Slice &S, return false; if (!S.isSplittable()) return false; // Skip any unsplittable intrinsics. - } else if (IntrinsicInst *II = dyn_cast(U->getUser())) { - if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) - return false; } else { return false; } @@ -2179,10 +2181,7 @@ static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, return V; } - SmallVector Mask; - Mask.reserve(NumElements); - for (unsigned i = BeginIndex; i != EndIndex; ++i) - Mask.push_back(i); + auto Mask = llvm::to_vector<8>(llvm::seq(BeginIndex, EndIndex)); V = IRB.CreateShuffleVector(V, Mask, Name + ".extract"); LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n"); return V; @@ -2734,10 +2733,9 @@ private: Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8); V = IRB.CreateMul( IRB.CreateZExt(V, SplatIntTy, "zext"), - ConstantExpr::getUDiv( - Constant::getAllOnesValue(SplatIntTy), - ConstantExpr::getZExt(Constant::getAllOnesValue(V->getType()), - SplatIntTy)), + IRB.CreateUDiv(Constant::getAllOnesValue(SplatIntTy), + IRB.CreateZExt(Constant::getAllOnesValue(V->getType()), + SplatIntTy)), "isplat"); return V; } @@ -2887,7 +2885,7 @@ private: assert((IsDest && II.getRawDest() == OldPtr) || (!IsDest && II.getRawSource() == OldPtr)); - MaybeAlign SliceAlign = getSliceAlign(); + Align SliceAlign = getSliceAlign(); // For unsplit intrinsics, we simply modify the source and destination // pointers in place. This isn't just an optimization, it is a matter of @@ -3481,19 +3479,13 @@ private: Type *Ty = GEPI.getSourceElementType(); Value *True = Sel->getTrueValue(); - Value *NTrue = - IsInBounds - ? IRB.CreateInBoundsGEP(Ty, True, Index, - True->getName() + ".sroa.gep") - : IRB.CreateGEP(Ty, True, Index, True->getName() + ".sroa.gep"); + Value *NTrue = IRB.CreateGEP(Ty, True, Index, True->getName() + ".sroa.gep", + IsInBounds); Value *False = Sel->getFalseValue(); - Value *NFalse = - IsInBounds - ? IRB.CreateInBoundsGEP(Ty, False, Index, - False->getName() + ".sroa.gep") - : IRB.CreateGEP(Ty, False, Index, False->getName() + ".sroa.gep"); + Value *NFalse = IRB.CreateGEP(Ty, False, Index, + False->getName() + ".sroa.gep", IsInBounds); Value *NSel = IRB.CreateSelect(Sel->getCondition(), NTrue, NFalse, Sel->getName() + ".sroa.sel"); @@ -3547,10 +3539,8 @@ private: IRB.SetInsertPoint(In->getParent(), std::next(In->getIterator())); Type *Ty = GEPI.getSourceElementType(); - NewVal = IsInBounds ? IRB.CreateInBoundsGEP(Ty, In, Index, - In->getName() + ".sroa.gep") - : IRB.CreateGEP(Ty, In, Index, - In->getName() + ".sroa.gep"); + NewVal = IRB.CreateGEP(Ty, In, Index, In->getName() + ".sroa.gep", + IsInBounds); } NewPN->addIncoming(NewVal, B); } @@ -3972,16 +3962,15 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { for (LoadInst *LI : Loads) { SplitLoads.clear(); - IntegerType *Ty = cast(LI->getType()); - assert(Ty->getBitWidth() % 8 == 0); - uint64_t LoadSize = Ty->getBitWidth() / 8; - assert(LoadSize > 0 && "Cannot have a zero-sized integer load!"); - auto &Offsets = SplitOffsetsMap[LI]; - assert(LoadSize == Offsets.S->endOffset() - Offsets.S->beginOffset() && - "Slice size should always match load size exactly!"); + unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset(); + assert(LI->getType()->getIntegerBitWidth() % 8 == 0 && + "Load must have type size equal to store size"); + assert(LI->getType()->getIntegerBitWidth() / 8 >= SliceSize && + "Load must be >= slice size"); + uint64_t BaseOffset = Offsets.S->beginOffset(); - assert(BaseOffset + LoadSize > BaseOffset && + assert(BaseOffset + SliceSize > BaseOffset && "Cannot represent alloca access size using 64-bit integers!"); Instruction *BasePtr = cast(LI->getPointerOperand()); @@ -3992,7 +3981,7 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { uint64_t PartOffset = 0, PartSize = Offsets.Splits.front(); int Idx = 0, Size = Offsets.Splits.size(); for (;;) { - auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); + auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8); auto AS = LI->getPointerAddressSpace(); auto *PartPtrTy = PartTy->getPointerTo(AS); LoadInst *PLoad = IRB.CreateAlignedLoad( @@ -4025,7 +4014,7 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // Setup the next partition. PartOffset = Offsets.Splits[Idx]; ++Idx; - PartSize = (Idx < Size ? Offsets.Splits[Idx] : LoadSize) - PartOffset; + PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset; } // Now that we have the split loads, do the slow walk over all uses of the diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index f9650efc051f..008ddfc72740 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -16,16 +16,13 @@ #include "llvm-c/Initialization.h" #include "llvm-c/Transforms/Scalar.h" #include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/Scalarizer.h" -#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" using namespace llvm; @@ -76,7 +73,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopRerollLegacyPassPass(Registry); initializeLoopUnrollPass(Registry); initializeLoopUnrollAndJamPass(Registry); - initializeLoopUnswitchPass(Registry); initializeWarnMissedTransformationsLegacyPass(Registry); initializeLoopVersioningLICMLegacyPassPass(Registry); initializeLoopIdiomRecognizeLegacyPassPass(Registry); @@ -104,6 +100,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeSimpleLoopUnswitchLegacyPassPass(Registry); initializeSinkingLegacyPassPass(Registry); initializeTailCallElimPass(Registry); + initializeTLSVariableHoistLegacyPassPass(Registry); initializeSeparateConstOffsetFromGEPLegacyPassPass(Registry); initializeSpeculativeExecutionLegacyPassPass(Registry); initializeStraightLineStrengthReduceLegacyPassPass(Registry); @@ -214,10 +211,6 @@ void LLVMAddLoopUnrollAndJamPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopUnrollAndJamPass()); } -void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createLoopUnswitchPass()); -} - void LLVMAddLowerAtomicPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLowerAtomicPass()); } diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp index 29cea42e4a00..e2976ace3a4a 100644 --- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp +++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp @@ -1,5 +1,5 @@ //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===// -// instrinsics +// intrinsics // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -24,11 +24,9 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" @@ -36,7 +34,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include #include using namespace llvm; @@ -876,7 +873,7 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI, for (BasicBlock &BB : llvm::make_early_inc_range(F)) { bool ModifiedDTOnIteration = false; MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL, - DTU.hasValue() ? DTU.getPointer() : nullptr); + DTU ? DTU.getPointer() : nullptr); // Restart BB iteration if the dominator tree of the Function was changed if (ModifiedDTOnIteration) diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index 3606c8a4b073..08f4b2173da2 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -39,8 +39,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include #include @@ -52,7 +50,7 @@ using namespace llvm; #define DEBUG_TYPE "scalarizer" -static cl::opt ScalarizeVariableInsertExtract( +static cl::opt ClScalarizeVariableInsertExtract( "scalarize-variable-insert-extract", cl::init(true), cl::Hidden, cl::desc("Allow the scalarizer pass to scalarize " "insertelement/extractelement with variable index")); @@ -60,9 +58,9 @@ static cl::opt ScalarizeVariableInsertExtract( // This is disabled by default because having separate loads and stores // makes it more likely that the -combiner-alias-analysis limits will be // reached. -static cl::opt - ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden, - cl::desc("Allow the scalarizer pass to scalarize loads and store")); +static cl::opt ClScalarizeLoadStore( + "scalarize-load-store", cl::init(false), cl::Hidden, + cl::desc("Allow the scalarizer pass to scalarize loads and store")); namespace { @@ -96,7 +94,7 @@ public: // Scatter V into Size components. If new instructions are needed, // insert them before BBI in BB. If Cache is nonnull, use it to cache // the results. - Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, + Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, Type *PtrElemTy, ValueVector *cachePtr = nullptr); // Return component I, creating a new Value for it if necessary. @@ -109,8 +107,8 @@ private: BasicBlock *BB; BasicBlock::iterator BBI; Value *V; + Type *PtrElemTy; ValueVector *CachePtr; - PointerType *PtrTy; ValueVector Tmp; unsigned Size; }; @@ -188,10 +186,23 @@ struct VectorLayout { uint64_t ElemSize = 0; }; +template +T getWithDefaultOverride(const cl::opt &ClOption, + const llvm::Optional &DefaultOverride) { + return ClOption.getNumOccurrences() ? ClOption + : DefaultOverride.value_or(ClOption); +} + class ScalarizerVisitor : public InstVisitor { public: - ScalarizerVisitor(unsigned ParallelLoopAccessMDKind, DominatorTree *DT) - : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind), DT(DT) { + ScalarizerVisitor(unsigned ParallelLoopAccessMDKind, DominatorTree *DT, + ScalarizerPassOptions Options) + : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind), DT(DT), + ScalarizeVariableInsertExtract( + getWithDefaultOverride(ClScalarizeVariableInsertExtract, + Options.ScalarizeVariableInsertExtract)), + ScalarizeLoadStore(getWithDefaultOverride(ClScalarizeLoadStore, + Options.ScalarizeLoadStore)) { } bool visit(Function &F); @@ -216,8 +227,9 @@ public: bool visitCallInst(CallInst &ICI); private: - Scatterer scatter(Instruction *Point, Value *V); + Scatterer scatter(Instruction *Point, Value *V, Type *PtrElemTy = nullptr); void gather(Instruction *Op, const ValueVector &CV); + void replaceUses(Instruction *Op, Value *CV); bool canTransferMetadata(unsigned Kind); void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV); Optional getVectorLayout(Type *Ty, Align Alignment, @@ -231,12 +243,16 @@ private: ScatterMap Scattered; GatherList Gathered; + bool Scalarized; SmallVector PotentiallyDeadInstrs; unsigned ParallelLoopAccessMDKind; DominatorTree *DT; + + const bool ScalarizeVariableInsertExtract; + const bool ScalarizeLoadStore; }; class ScalarizerLegacyPass : public FunctionPass { @@ -265,12 +281,14 @@ INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer", "Scalarize vector operations", false, false) Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, - ValueVector *cachePtr) - : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) { + Type *PtrElemTy, ValueVector *cachePtr) + : BB(bb), BBI(bbi), V(v), PtrElemTy(PtrElemTy), CachePtr(cachePtr) { Type *Ty = V->getType(); - PtrTy = dyn_cast(Ty); - if (PtrTy) - Ty = PtrTy->getPointerElementType(); + if (Ty->isPointerTy()) { + assert(cast(Ty)->isOpaqueOrPointeeTypeMatches(PtrElemTy) && + "Pointer element type mismatch"); + Ty = PtrElemTy; + } Size = cast(Ty)->getNumElements(); if (!CachePtr) Tmp.resize(Size, nullptr); @@ -287,15 +305,15 @@ Value *Scatterer::operator[](unsigned I) { if (CV[I]) return CV[I]; IRBuilder<> Builder(BB, BBI); - if (PtrTy) { - Type *ElTy = - cast(PtrTy->getPointerElementType())->getElementType(); + if (PtrElemTy) { + Type *VectorElemTy = cast(PtrElemTy)->getElementType(); if (!CV[0]) { - Type *NewPtrTy = PointerType::get(ElTy, PtrTy->getAddressSpace()); + Type *NewPtrTy = PointerType::get( + VectorElemTy, V->getType()->getPointerAddressSpace()); CV[0] = Builder.CreateBitCast(V, NewPtrTy, V->getName() + ".i0"); } if (I != 0) - CV[I] = Builder.CreateConstGEP1_32(ElTy, CV[0], I, + CV[I] = Builder.CreateConstGEP1_32(VectorElemTy, CV[0], I, V->getName() + ".i" + Twine(I)); } else { // Search through a chain of InsertElementInsts looking for element I. @@ -334,7 +352,7 @@ bool ScalarizerLegacyPass::runOnFunction(Function &F) { unsigned ParallelLoopAccessMDKind = M.getContext().getMDKindID("llvm.mem.parallel_loop_access"); DominatorTree *DT = &getAnalysis().getDomTree(); - ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT); + ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, ScalarizerPassOptions()); return Impl.visit(F); } @@ -345,6 +363,8 @@ FunctionPass *llvm::createScalarizerPass() { bool ScalarizerVisitor::visit(Function &F) { assert(Gathered.empty() && Scattered.empty()); + Scalarized = false; + // To ensure we replace gathered components correctly we need to do an ordered // traversal of the basic blocks in the function. ReversePostOrderTraversal RPOT(&F.getEntryBlock()); @@ -362,13 +382,14 @@ bool ScalarizerVisitor::visit(Function &F) { // Return a scattered form of V that can be accessed by Point. V must be a // vector or a pointer to a vector. -Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) { +Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V, + Type *PtrElemTy) { if (Argument *VArg = dyn_cast(V)) { // Put the scattered form of arguments in the entry block, // so that it can be used everywhere. Function *F = VArg->getParent(); BasicBlock *BB = &F->getEntryBlock(); - return Scatterer(BB, BB->begin(), V, &Scattered[V]); + return Scatterer(BB, BB->begin(), V, PtrElemTy, &Scattered[V]); } if (Instruction *VOp = dyn_cast(V)) { // When scalarizing PHI nodes we might try to examine/rewrite InsertElement @@ -379,17 +400,17 @@ Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) { // need to analyse them further. if (!DT->isReachableFromEntry(VOp->getParent())) return Scatterer(Point->getParent(), Point->getIterator(), - UndefValue::get(V->getType())); + PoisonValue::get(V->getType()), PtrElemTy); // Put the scattered form of an instruction directly after the // instruction, skipping over PHI nodes and debug intrinsics. BasicBlock *BB = VOp->getParent(); return Scatterer( BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, - &Scattered[V]); + PtrElemTy, &Scattered[V]); } // In the fallback case, just put the scattered before Point and // keep the result local to Point. - return Scatterer(Point->getParent(), Point->getIterator(), V); + return Scatterer(Point->getParent(), Point->getIterator(), V, PtrElemTy); } // Replace Op with the gathered form of the components in CV. Defer the @@ -419,6 +440,15 @@ void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) { Gathered.push_back(GatherList::value_type(Op, &SV)); } +// Replace Op with CV and collect Op has a potentially dead instruction. +void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) { + if (CV != Op) { + Op->replaceAllUsesWith(CV); + PotentiallyDeadInstrs.emplace_back(Op); + Scalarized = true; + } +} + // Return true if it is safe to transfer the given metadata tag from // vector to scalar instructions. bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) { @@ -558,9 +588,11 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) { if (OpI->getType()->isVectorTy()) { Scattered[I] = scatter(&CI, OpI); assert(Scattered[I].size() == NumElems && "mismatched call operands"); + if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) + Tys.push_back(OpI->getType()->getScalarType()); } else { ScalarOperands[I] = OpI; - if (hasVectorInstrinsicOverloadedScalarOpd(ID, I)) + if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) Tys.push_back(OpI->getType()); } } @@ -576,7 +608,7 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) { ScalarCallOps.clear(); for (unsigned J = 0; J != NumArgs; ++J) { - if (hasVectorInstrinsicScalarOpd(ID, J)) + if (isVectorIntrinsicWithScalarOpAtArg(ID, J)) ScalarCallOps.push_back(ScalarOperands[J]); else ScalarCallOps.push_back(Scattered[J][Elem]); @@ -809,7 +841,7 @@ bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) { if (auto *CI = dyn_cast(ExtIdx)) { Value *Res = Op0[CI->getValue().getZExtValue()]; - gather(&EEI, {Res}); + replaceUses(&EEI, Res); return true; } @@ -825,7 +857,7 @@ bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) { Res = Builder.CreateSelect(ShouldExtract, Elt, Res, EEI.getName() + ".upto" + Twine(I)); } - gather(&EEI, {Res}); + replaceUses(&EEI, Res); return true; } @@ -891,7 +923,7 @@ bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) { unsigned NumElems = cast(Layout->VecTy)->getNumElements(); IRBuilder<> Builder(&LI); - Scatterer Ptr = scatter(&LI, LI.getPointerOperand()); + Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), LI.getType()); ValueVector Res; Res.resize(NumElems); @@ -917,7 +949,7 @@ bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) { unsigned NumElems = cast(Layout->VecTy)->getNumElements(); IRBuilder<> Builder(&SI); - Scatterer VPtr = scatter(&SI, SI.getPointerOperand()); + Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), FullValue->getType()); Scatterer VVal = scatter(&SI, FullValue); ValueVector Stores; @@ -940,7 +972,7 @@ bool ScalarizerVisitor::visitCallInst(CallInst &CI) { bool ScalarizerVisitor::finish() { // The presence of data in Gathered or Scattered indicates changes // made to the Function. - if (Gathered.empty() && Scattered.empty()) + if (Gathered.empty() && Scattered.empty() && !Scalarized) return false; for (const auto &GMI : Gathered) { Instruction *Op = GMI.first; @@ -971,6 +1003,7 @@ bool ScalarizerVisitor::finish() { } Gathered.clear(); Scattered.clear(); + Scalarized = false; RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs); @@ -982,7 +1015,7 @@ PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) unsigned ParallelLoopAccessMDKind = M.getContext().getMDKindID("llvm.mem.parallel_loop_access"); DominatorTree *DT = &AM.getResult(F); - ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT); + ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, Options); bool Changed = Impl.visit(F); PreservedAnalyses PA; PA.preserve(); diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index d23925042b0a..7da5a78772ad 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -189,7 +189,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index a27da047bfd3..0535608244cc 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/GuardUtils.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" @@ -28,6 +27,7 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -49,7 +49,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GenericDomTree.h" +#include "llvm/Support/InstructionCost.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -81,7 +83,6 @@ static cl::opt EnableNonTrivialUnswitch( static cl::opt UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, - cl::ZeroOrMore, cl::desc("The cost threshold for unswitching a loop.")); static cl::opt EnableUnswitchCostMultiplier( @@ -110,17 +111,27 @@ static cl::opt "partial unswitching analysis"), cl::init(100), cl::Hidden); static cl::opt FreezeLoopUnswitchCond( - "freeze-loop-unswitch-cond", cl::init(false), cl::Hidden, + "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation.")); +// Helper to skip (select x, true, false), which matches both a logical AND and +// OR and can confuse code that tries to determine if \p Cond is either a +// logical AND or OR but not both. +static Value *skipTrivialSelect(Value *Cond) { + Value *CondNext; + while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero()))) + Cond = CondNext; + return Cond; +} + /// Collect all of the loop invariant input values transitively used by the /// homogeneous instruction graph from a given root. /// /// This essentially walks from a root recursively through loop variant operands -/// which have the exact same opcode and finds all inputs which are loop -/// invariant. For some operations these can be re-associated and unswitched out -/// of the loop entirely. +/// which have perform the same logical operation (AND or OR) and finds all +/// inputs which are loop invariant. For some operations these can be +/// re-associated and unswitched out of the loop entirely. static TinyPtrVector collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root, LoopInfo &LI) { @@ -150,7 +161,7 @@ collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root, } // If not an instruction with the same opcode, nothing we can do. - Instruction *OpI = dyn_cast(OpV); + Instruction *OpI = dyn_cast(skipTrivialSelect(OpV)); if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) || (IsRootOr && match(OpI, m_LogicalOr())))) { @@ -202,13 +213,19 @@ static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, /// branch on a single value. static void buildPartialUnswitchConditionalBranch( BasicBlock &BB, ArrayRef Invariants, bool Direction, - BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze) { + BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, + Instruction *I, AssumptionCache *AC, DominatorTree &DT) { IRBuilder<> IRB(&BB); - Value *Cond = Direction ? IRB.CreateOr(Invariants) : - IRB.CreateAnd(Invariants); - if (InsertFreeze) - Cond = IRB.CreateFreeze(Cond, Cond->getName() + ".fr"); + SmallVector FrozenInvariants; + for (Value *Inv : Invariants) { + if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT)) + Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr"); + FrozenInvariants.push_back(Inv); + } + + Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants) + : IRB.CreateAnd(FrozenInvariants); IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, Direction ? &NormalSucc : &UnswitchedSucc); } @@ -442,11 +459,12 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, // some input conditions to the branch. bool FullUnswitch = false; - if (L.isLoopInvariant(BI.getCondition())) { - Invariants.push_back(BI.getCondition()); + Value *Cond = skipTrivialSelect(BI.getCondition()); + if (L.isLoopInvariant(Cond)) { + Invariants.push_back(Cond); FullUnswitch = true; } else { - if (auto *CondInst = dyn_cast(BI.getCondition())) + if (auto *CondInst = dyn_cast(Cond)) Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI); if (Invariants.empty()) { LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n"); @@ -480,8 +498,8 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, // is a graph of `or` operations, or the exit block is along the false edge // and the condition is a graph of `and` operations. if (!FullUnswitch) { - if (ExitDirection ? !match(BI.getCondition(), m_LogicalOr()) - : !match(BI.getCondition(), m_LogicalAnd())) { + if (ExitDirection ? !match(Cond, m_LogicalOr()) + : !match(Cond, m_LogicalAnd())) { LLVM_DEBUG(dbgs() << " Branch condition is in improper form for " "non-full unswitch!\n"); return false; @@ -546,6 +564,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, // its successors. OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(), BI); + BI.setCondition(Cond); if (MSSAU) { // Temporarily clone the terminator, to make MSSA update cheaper by // separating "insert edge" updates from "remove edge" ones. @@ -561,15 +580,16 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, // Only unswitching a subset of inputs to the condition, so we will need to // build a new branch that merges the invariant inputs. if (ExitDirection) - assert(match(BI.getCondition(), m_LogicalOr()) && + assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the " "condition!"); else - assert(match(BI.getCondition(), m_LogicalAnd()) && + assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the" " condition!"); - buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection, - *UnswitchedBB, *NewPH, false); + buildPartialUnswitchConditionalBranch( + *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH, + FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT); } // Update the dominator tree with the added edge. @@ -1019,7 +1039,8 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, // Don't bother trying to unswitch past an unconditional branch or a branch // with a constant value. These should be removed by simplifycfg prior to // running this pass. - if (!BI->isConditional() || isa(BI->getCondition())) + if (!BI->isConditional() || + isa(skipTrivialSelect(BI->getCondition()))) return Changed; // Found a trivial condition candidate: non-foldable conditional branch. If @@ -1663,7 +1684,7 @@ deleteDeadBlocksFromLoop(Loop &L, // uses in other blocks. for (auto &I : *BB) if (!I.use_empty()) - I.replaceAllUsesWith(UndefValue::get(I.getType())); + I.replaceAllUsesWith(PoisonValue::get(I.getType())); BB->dropAllReferences(); } @@ -2042,12 +2063,13 @@ static void unswitchNontrivialInvariants( "Can only unswitch switches and conditional branch!"); bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty(); bool FullUnswitch = - SI || (BI->getCondition() == Invariants[0] && !PartiallyInvariant); + SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] && + !PartiallyInvariant); if (FullUnswitch) assert(Invariants.size() == 1 && "Cannot have other invariants with full unswitching!"); else - assert(isa(BI->getCondition()) && + assert(isa(skipTrivialSelect(BI->getCondition())) && "Partial unswitching requires an instruction as the condition!"); if (MSSAU && VerifyMemorySSA) @@ -2062,14 +2084,14 @@ static void unswitchNontrivialInvariants( bool Direction = true; int ClonedSucc = 0; if (!FullUnswitch) { - Value *Cond = BI->getCondition(); + Value *Cond = skipTrivialSelect(BI->getCondition()); (void)Cond; assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions " "can combine invariants being unswitched."); - if (!match(BI->getCondition(), m_LogicalOr())) { - if (match(BI->getCondition(), m_LogicalAnd()) || + if (!match(Cond, m_LogicalOr())) { + if (match(Cond, m_LogicalAnd()) || (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) { Direction = false; ClonedSucc = 1; @@ -2209,11 +2231,12 @@ static void unswitchNontrivialInvariants( BasicBlock *ClonedPH = ClonedPHs.begin()->second; BI->setSuccessor(ClonedSucc, ClonedPH); BI->setSuccessor(1 - ClonedSucc, LoopPH); + Value *Cond = skipTrivialSelect(BI->getCondition()); if (InsertFreeze) { - auto Cond = BI->getCondition(); if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT)) - BI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", BI)); + Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI); } + BI->setCondition(Cond); DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); } else { assert(SI && "Must either be a branch or switch!"); @@ -2311,9 +2334,11 @@ static void unswitchNontrivialInvariants( if (PartiallyInvariant) buildPartialInvariantUnswitchConditionalBranch( *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU); - else - buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction, - *ClonedPH, *LoopPH, InsertFreeze); + else { + buildPartialUnswitchConditionalBranch( + *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, + FreezeLoopUnswitchCond, BI, &AC, DT); + } DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); if (MSSAU) { @@ -2745,22 +2770,16 @@ static bool unswitchBestCondition( BI->getSuccessor(0) == BI->getSuccessor(1)) continue; - // If BI's condition is 'select _, true, false', simplify it to confuse - // matchers - Value *Cond = BI->getCondition(), *CondNext; - while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero()))) - Cond = CondNext; - BI->setCondition(Cond); - + Value *Cond = skipTrivialSelect(BI->getCondition()); if (isa(Cond)) continue; - if (L.isLoopInvariant(BI->getCondition())) { - UnswitchCandidates.push_back({BI, {BI->getCondition()}}); + if (L.isLoopInvariant(Cond)) { + UnswitchCandidates.push_back({BI, {Cond}}); continue; } - Instruction &CondI = *cast(BI->getCondition()); + Instruction &CondI = *cast(Cond); if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) { TinyPtrVector Invariants = collectHomogenousInstGraphLoopInvariants(L, CondI, LI); @@ -2785,8 +2804,7 @@ static bool unswitchBestCondition( PartialIVInfo = *Info; PartialIVCondBranch = L.getHeader()->getTerminator(); TinyPtrVector ValsToDuplicate; - for (auto *Inst : Info->InstToDuplicate) - ValsToDuplicate.push_back(Inst); + llvm::append_range(ValsToDuplicate, Info->InstToDuplicate); UnswitchCandidates.push_back( {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)}); } @@ -2902,10 +2920,11 @@ static bool unswitchBestCondition( // its cost. if (!FullUnswitch) { auto &BI = cast(TI); - if (match(BI.getCondition(), m_LogicalAnd())) { + Value *Cond = skipTrivialSelect(BI.getCondition()); + if (match(Cond, m_LogicalAnd())) { if (SuccBB == BI.getSuccessor(1)) continue; - } else if (match(BI.getCondition(), m_LogicalOr())) { + } else if (match(Cond, m_LogicalOr())) { if (SuccBB == BI.getSuccessor(0)) continue; } else if ((PartialIVInfo.KnownValue->isOneValue() && @@ -2947,8 +2966,9 @@ static bool unswitchBestCondition( ArrayRef Invariants = TerminatorAndInvariants.second; BranchInst *BI = dyn_cast(&TI); InstructionCost CandidateCost = ComputeUnswitchedCost( - TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 && - Invariants[0] == BI->getCondition())); + TI, /*FullUnswitch*/ !BI || + (Invariants.size() == 1 && + Invariants[0] == skipTrivialSelect(BI->getCondition()))); // Calculate cost multiplier which is a tool to limit potentially // exponential behavior of loop-unswitch. if (EnableUnswitchCostMultiplier) { @@ -3131,8 +3151,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, AR.MSSA->verifyMemorySSA(); } if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial, - UnswitchCB, &AR.SE, - MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, + UnswitchCB, &AR.SE, MSSAU ? MSSAU.getPointer() : nullptr, DestroyLoopCB)) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index ee17da1875e5..fb2d812a186d 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -31,19 +31,16 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include @@ -59,6 +56,11 @@ static cl::opt UserKeepLoops( "keep-loops", cl::Hidden, cl::init(true), cl::desc("Preserve canonical loop structure (default = true)")); +static cl::opt UserSwitchRangeToICmp( + "switch-range-to-icmp", cl::Hidden, cl::init(false), + cl::desc( + "Convert switches into an integer range comparison (default = false)")); + static cl::opt UserSwitchToLookup( "switch-to-lookup", cl::Hidden, cl::init(false), cl::desc("Convert switches to lookup tables (default = false)")); @@ -311,6 +313,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) { Options.BonusInstThreshold = UserBonusInstThreshold; if (UserForwardSwitchCond.getNumOccurrences()) Options.ForwardSwitchCondToPhi = UserForwardSwitchCond; + if (UserSwitchRangeToICmp.getNumOccurrences()) + Options.ConvertSwitchRangeToICmp = UserSwitchRangeToICmp; if (UserSwitchToLookup.getNumOccurrences()) Options.ConvertSwitchToLookupTable = UserSwitchToLookup; if (UserKeepLoops.getNumOccurrences()) @@ -337,6 +341,8 @@ void SimplifyCFGPass::printPipeline( OS << "<"; OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ";"; OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;"; + OS << (Options.ConvertSwitchRangeToICmp ? "" : "no-") + << "switch-range-to-icmp;"; OS << (Options.ConvertSwitchToLookupTable ? "" : "no-") << "switch-to-lookup;"; OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;"; diff --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp index 8600aacdb056..e8fde53005f0 100644 --- a/llvm/lib/Transforms/Scalar/Sink.cpp +++ b/llvm/lib/Transforms/Scalar/Sink.cpp @@ -15,12 +15,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -48,7 +43,7 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA, } if (Inst->isTerminator() || isa(Inst) || Inst->isEHPad() || - Inst->mayThrow()) + Inst->mayThrow() || !Inst->willReturn()) return false; if (auto *Call = dyn_cast(Inst)) { diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp index 06169a7834f6..9ac4608134c2 100644 --- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -63,10 +63,10 @@ #include "llvm/Transforms/Scalar/SpeculativeExecution.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" @@ -275,7 +275,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo( }); } - // Usially debug label instrinsic corresponds to label in LLVM IR. In these + // Usially debug label intrinsic corresponds to label in LLVM IR. In these // cases we should not move it here. // TODO: Possible special processing needed to detect it is related to a // hoisted instruction. @@ -301,7 +301,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo( if (TotalSpeculationCost > SpecExecMaxSpeculationCost) return false; // too much to hoist } else { - // Debug info instrinsics should not be counted for threshold. + // Debug info intrinsics should not be counted for threshold. if (!isa(I)) NotHoistedInstCount++; if (NotHoistedInstCount > SpecExecMaxNotHoisted) diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index b47378808216..70df0cec0dca 100644 --- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -68,7 +68,6 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -683,24 +682,16 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis( unsigned AS = Basis.Ins->getType()->getPointerAddressSpace(); Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS); Reduced = Builder.CreateBitCast(Basis.Ins, CharTy); - if (InBounds) - Reduced = - Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump); - else - Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump); + Reduced = + Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds); Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType()); } else { // C = gep Basis, Bump // Canonicalize bump to pointer size. Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy); - if (InBounds) - Reduced = Builder.CreateInBoundsGEP( - cast(Basis.Ins)->getResultElementType(), - Basis.Ins, Bump); - else - Reduced = Builder.CreateGEP( - cast(Basis.Ins)->getResultElementType(), - Basis.Ins, Bump); + Reduced = Builder.CreateGEP( + cast(Basis.Ins)->getResultElementType(), + Basis.Ins, Bump, "", InBounds); } break; } diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index b3a445368537..f6525ad7de9b 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -18,10 +18,8 @@ #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/RegionPass.h" -#include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -33,7 +31,6 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" -#include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" @@ -41,7 +38,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" @@ -72,6 +68,11 @@ static cl::opt cl::desc("Allow relaxed uniform region checks"), cl::init(true)); +static cl::opt + ReorderNodeSize("structurizecfg-node-reorder-size", + cl::desc("Limit region size for reordering nodes"), + cl::init(100), cl::Hidden); + // Definition of the complex types used in this pass. using BBValuePair = std::pair; @@ -266,6 +267,8 @@ class StructurizeCFG { void orderNodes(); + void reorderNodes(); + void analyzeLoops(RegionNode *N); Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); @@ -424,6 +427,57 @@ void StructurizeCFG::orderNodes() { } } +/// Change the node ordering to decrease the range of live values, especially +/// the values that capture the control flow path for branches. We do this +/// by moving blocks with a single predecessor and successor to appear after +/// predecessor. The motivation is to move some loop exit blocks into a loop. +/// In cases where a loop has a large number of exit blocks, this reduces the +/// amount of values needed across the loop boundary. +void StructurizeCFG::reorderNodes() { + SmallVector NewOrder; + DenseMap MoveTo; + BitVector Moved(Order.size()); + + // The benefits of reordering nodes occurs for large regions. + if (Order.size() <= ReorderNodeSize) + return; + + // The algorithm works with two passes over Order. The first pass identifies + // the blocks to move and the position to move them to. The second pass + // creates the new order based upon this information. We move blocks with + // a single predecessor and successor. If there are multiple candidates then + // maintain the original order. + BBSet Seen; + for (int I = Order.size() - 1; I >= 0; --I) { + auto *BB = Order[I]->getEntry(); + Seen.insert(BB); + auto *Pred = BB->getSinglePredecessor(); + auto *Succ = BB->getSingleSuccessor(); + // Consider only those basic blocks that have a predecessor in Order and a + // successor that exits the region. The region may contain subregions that + // have been structurized and are not included in Order. + if (Pred && Succ && Seen.count(Pred) && Succ == ParentRegion->getExit() && + !MoveTo.count(Pred)) { + MoveTo[Pred] = I; + Moved.set(I); + } + } + + // If no blocks have been moved then the original order is good. + if (!Moved.count()) + return; + + for (size_t I = 0, E = Order.size(); I < E; ++I) { + auto *BB = Order[I]->getEntry(); + if (MoveTo.count(BB)) + NewOrder.push_back(Order[MoveTo[BB]]); + if (!Moved[I]) + NewOrder.push_back(Order[I]); + } + + Order.assign(NewOrder); +} + /// Determine the end of the loops void StructurizeCFG::analyzeLoops(RegionNode *N) { if (N->isSubRegion()) { @@ -685,7 +739,7 @@ void StructurizeCFG::simplifyAffectedPhis() { Q.DT = DT; for (WeakVH VH : AffectedPhis) { if (auto Phi = dyn_cast_or_null(VH)) { - if (auto NewValue = SimplifyInstruction(Phi, Q)) { + if (auto NewValue = simplifyInstruction(Phi, Q)) { Phi->replaceAllUsesWith(NewValue); Phi->eraseFromParent(); Changed = true; @@ -1085,12 +1139,13 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) { ParentRegion = R; orderNodes(); + reorderNodes(); collectInfos(); createFlow(); insertConditions(false); insertConditions(true); - simplifyConditions(); setPhiValues(); + simplifyConditions(); simplifyAffectedPhis(); rebuildSSA(); diff --git a/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp b/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp new file mode 100644 index 000000000000..16b3483f9687 --- /dev/null +++ b/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp @@ -0,0 +1,306 @@ +//===- TLSVariableHoist.cpp -------- Remove Redundant TLS Loads ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass identifies/eliminate Redundant TLS Loads if related option is set. +// The example: Please refer to the comment at the head of TLSVariableHoist.h. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/TLSVariableHoist.h" +#include +#include +#include +#include +#include +#include + +using namespace llvm; +using namespace tlshoist; + +#define DEBUG_TYPE "tlshoist" + +static cl::opt TLSLoadHoist( + "tls-load-hoist", cl::init(false), cl::Hidden, + cl::desc("hoist the TLS loads in PIC model to eliminate redundant " + "TLS address calculation.")); + +namespace { + +/// The TLS Variable hoist pass. +class TLSVariableHoistLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + + TLSVariableHoistLegacyPass() : FunctionPass(ID) { + initializeTLSVariableHoistLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &Fn) override; + + StringRef getPassName() const override { return "TLS Variable Hoist"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + } + +private: + TLSVariableHoistPass Impl; +}; + +} // end anonymous namespace + +char TLSVariableHoistLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(TLSVariableHoistLegacyPass, "tlshoist", + "TLS Variable Hoist", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(TLSVariableHoistLegacyPass, "tlshoist", + "TLS Variable Hoist", false, false) + +FunctionPass *llvm::createTLSVariableHoistPass() { + return new TLSVariableHoistLegacyPass(); +} + +/// Perform the TLS Variable Hoist optimization for the given function. +bool TLSVariableHoistLegacyPass::runOnFunction(Function &Fn) { + if (skipFunction(Fn)) + return false; + + LLVM_DEBUG(dbgs() << "********** Begin TLS Variable Hoist **********\n"); + LLVM_DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n'); + + bool MadeChange = + Impl.runImpl(Fn, getAnalysis().getDomTree(), + getAnalysis().getLoopInfo()); + + if (MadeChange) { + LLVM_DEBUG(dbgs() << "********** Function after TLS Variable Hoist: " + << Fn.getName() << '\n'); + LLVM_DEBUG(dbgs() << Fn); + } + LLVM_DEBUG(dbgs() << "********** End TLS Variable Hoist **********\n"); + + return MadeChange; +} + +void TLSVariableHoistPass::collectTLSCandidate(Instruction *Inst) { + // Skip all cast instructions. They are visited indirectly later on. + if (Inst->isCast()) + return; + + // Scan all operands. + for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) { + auto *GV = dyn_cast(Inst->getOperand(Idx)); + if (!GV || !GV->isThreadLocal()) + continue; + + // Add Candidate to TLSCandMap (GV --> Candidate). + TLSCandMap[GV].addUser(Inst, Idx); + } +} + +void TLSVariableHoistPass::collectTLSCandidates(Function &Fn) { + // First, quickly check if there is TLS Variable. + Module *M = Fn.getParent(); + + bool HasTLS = llvm::any_of( + M->globals(), [](GlobalVariable &GV) { return GV.isThreadLocal(); }); + + // If non, directly return. + if (!HasTLS) + return; + + TLSCandMap.clear(); + + // Then, collect TLS Variable info. + for (BasicBlock &BB : Fn) { + // Ignore unreachable basic blocks. + if (!DT->isReachableFromEntry(&BB)) + continue; + + for (Instruction &Inst : BB) + collectTLSCandidate(&Inst); + } +} + +static bool oneUseOutsideLoop(tlshoist::TLSCandidate &Cand, LoopInfo *LI) { + if (Cand.Users.size() != 1) + return false; + + BasicBlock *BB = Cand.Users[0].Inst->getParent(); + if (LI->getLoopFor(BB)) + return false; + + return true; +} + +Instruction *TLSVariableHoistPass::getNearestLoopDomInst(BasicBlock *BB, + Loop *L) { + assert(L && "Unexcepted Loop status!"); + + // Get the outermost loop. + while (Loop *Parent = L->getParentLoop()) + L = Parent; + + BasicBlock *PreHeader = L->getLoopPreheader(); + + // There is unique predecessor outside the loop. + if (PreHeader) + return PreHeader->getTerminator(); + + BasicBlock *Header = L->getHeader(); + BasicBlock *Dom = Header; + for (BasicBlock *PredBB : predecessors(Header)) + Dom = DT->findNearestCommonDominator(Dom, PredBB); + + assert(Dom && "Not find dominator BB!"); + Instruction *Term = Dom->getTerminator(); + + return Term; +} + +Instruction *TLSVariableHoistPass::getDomInst(Instruction *I1, + Instruction *I2) { + if (!I1) + return I2; + if (DT->dominates(I1, I2)) + return I1; + if (DT->dominates(I2, I1)) + return I2; + + // If there is no dominance relation, use common dominator. + BasicBlock *DomBB = + DT->findNearestCommonDominator(I1->getParent(), I2->getParent()); + + Instruction *Dom = DomBB->getTerminator(); + assert(Dom && "Common dominator not found!"); + + return Dom; +} + +BasicBlock::iterator TLSVariableHoistPass::findInsertPos(Function &Fn, + GlobalVariable *GV, + BasicBlock *&PosBB) { + tlshoist::TLSCandidate &Cand = TLSCandMap[GV]; + + // We should hoist the TLS use out of loop, so choose its nearest instruction + // which dominate the loop and the outside loops (if exist). + Instruction *LastPos = nullptr; + for (auto &User : Cand.Users) { + BasicBlock *BB = User.Inst->getParent(); + Instruction *Pos = User.Inst; + if (Loop *L = LI->getLoopFor(BB)) { + Pos = getNearestLoopDomInst(BB, L); + assert(Pos && "Not find insert position out of loop!"); + } + Pos = getDomInst(LastPos, Pos); + LastPos = Pos; + } + + assert(LastPos && "Unexpected insert position!"); + BasicBlock *Parent = LastPos->getParent(); + PosBB = Parent; + return LastPos->getIterator(); +} + +// Generate a bitcast (no type change) to replace the uses of TLS Candidate. +Instruction *TLSVariableHoistPass::genBitCastInst(Function &Fn, + GlobalVariable *GV) { + BasicBlock *PosBB = &Fn.getEntryBlock(); + BasicBlock::iterator Iter = findInsertPos(Fn, GV, PosBB); + Type *Ty = GV->getType(); + auto *CastInst = new BitCastInst(GV, Ty, "tls_bitcast"); + PosBB->getInstList().insert(Iter, CastInst); + return CastInst; +} + +bool TLSVariableHoistPass::tryReplaceTLSCandidate(Function &Fn, + GlobalVariable *GV) { + + tlshoist::TLSCandidate &Cand = TLSCandMap[GV]; + + // If only used 1 time and not in loops, we no need to replace it. + if (oneUseOutsideLoop(Cand, LI)) + return false; + + // Generate a bitcast (no type change) + auto *CastInst = genBitCastInst(Fn, GV); + + // to replace the uses of TLS Candidate + for (auto &User : Cand.Users) + User.Inst->setOperand(User.OpndIdx, CastInst); + + return true; +} + +bool TLSVariableHoistPass::tryReplaceTLSCandidates(Function &Fn) { + if (TLSCandMap.empty()) + return false; + + bool Replaced = false; + for (auto &GV2Cand : TLSCandMap) { + GlobalVariable *GV = GV2Cand.first; + Replaced |= tryReplaceTLSCandidate(Fn, GV); + } + + return Replaced; +} + +/// Optimize expensive TLS variables in the given function. +bool TLSVariableHoistPass::runImpl(Function &Fn, DominatorTree &DT, + LoopInfo &LI) { + if (Fn.hasOptNone()) + return false; + + if (!TLSLoadHoist && !Fn.getAttributes().hasFnAttr("tls-load-hoist")) + return false; + + this->LI = &LI; + this->DT = &DT; + assert(this->LI && this->DT && "Unexcepted requirement!"); + + // Collect all TLS variable candidates. + collectTLSCandidates(Fn); + + bool MadeChange = tryReplaceTLSCandidates(Fn); + + return MadeChange; +} + +PreservedAnalyses TLSVariableHoistPass::run(Function &F, + FunctionAnalysisManager &AM) { + + auto &LI = AM.getResult(F); + auto &DT = AM.getResult(F); + + if (!runImpl(F, DT, LI)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet(); + return PA; +} diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 3bcf92e28a21..27c04177e894 100644 --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -53,11 +53,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -76,14 +73,12 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "tailcallelim" @@ -248,10 +243,10 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) { isa(&I)) continue; - // Special-case operand bundle "clang.arc.attachedcall". + // Special-case operand bundles "clang.arc.attachedcall" and "ptrauth". bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundlesOtherThan( - LLVMContext::OB_clang_arc_attachedcall); + {LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_ptrauth}); if (!IsNoTail && CI->doesNotAccessMemory()) { // A call to a readnone function whose arguments are all things computed @@ -531,7 +526,7 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) { } // If the function doen't return void, create the RetPN and RetKnownPN PHI - // nodes to track our return value. We initialize RetPN with undef and + // nodes to track our return value. We initialize RetPN with poison and // RetKnownPN with false since we can't know our return value at function // entry. Type *RetType = F.getReturnType(); @@ -540,7 +535,7 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) { RetPN = PHINode::Create(RetType, 2, "ret.tr", InsertPos); RetKnownPN = PHINode::Create(BoolType, 2, "ret.known.tr", InsertPos); - RetPN->addIncoming(UndefValue::get(RetType), NewEntry); + RetPN->addIncoming(PoisonValue::get(RetType), NewEntry); RetKnownPN->addIncoming(ConstantInt::getFalse(BoolType), NewEntry); } @@ -734,7 +729,7 @@ void TailRecursionEliminator::cleanupAndFinalize() { // call. for (PHINode *PN : ArgumentPHIs) { // If the PHI Node is a dynamic constant, replace it with the value it is. - if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) { + if (Value *PNV = simplifyInstruction(PN, F.getParent()->getDataLayout())) { PN->replaceAllUsesWith(PNV); PN->eraseFromParent(); } diff --git a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp index 80a7d3a43ad6..8367e61c1a47 100644 --- a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp +++ b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp @@ -61,7 +61,7 @@ static void warnAboutLeftoverTransformations(Loop *L, << "loop not vectorized: the optimizer was unable to perform the " "requested transformation; the transformation might be disabled " "or specified as part of an unsupported transformation ordering"); - else if (InterleaveCount.getValueOr(0) != 1) + else if (InterleaveCount.value_or(0) != 1) ORE->emit( DiagnosticInfoOptimizationFailure(DEBUG_TYPE, "FailedRequestedInterleaving", diff --git a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp index c734611836eb..24972db404be 100644 --- a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp +++ b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp @@ -50,9 +50,6 @@ static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) { auto Int64Ty = Builder.getInt64Ty(); auto M = Builder.GetInsertBlock()->getModule(); auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty); - if (!M->getModuleFlag("amdgpu_hostcall")) { - M->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1); - } return Builder.CreateCall(Fn, Version); } diff --git a/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp index cbc508bb863a..0318429a76a7 100644 --- a/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp +++ b/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ASanStackFrameLayout.h" #include "llvm/ADT/SmallString.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/llvm/lib/Transforms/Utils/AddDiscriminators.cpp index e789194eb3ab..e6372fc5ab86 100644 --- a/llvm/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/llvm/lib/Transforms/Utils/AddDiscriminators.cpp @@ -222,7 +222,7 @@ static bool addDiscriminators(Function &F) { << DIL->getColumn() << ":" << Discriminator << " " << I << "\n"); } else { - I.setDebugLoc(NewDIL.getValue()); + I.setDebugLoc(*NewDIL); LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" << DIL->getColumn() << ":" << Discriminator << " " << I << "\n"); @@ -260,7 +260,7 @@ static bool addDiscriminators(Function &F) { << CurrentDIL->getLine() << ":" << CurrentDIL->getColumn() << ":" << Discriminator << " " << I << "\n"); } else { - I.setDebugLoc(NewDIL.getValue()); + I.setDebugLoc(*NewDIL); Changed = true; } } diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp index f910f7c3c31f..02ea17825c2f 100644 --- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp +++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DebugCounter.h" diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 15c4a64eb794..e9983ff82176 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -21,7 +21,6 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSAUpdater.h" -#include "llvm/Analysis/PostDominators.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -33,7 +32,6 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/PseudoProbe.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -1164,7 +1162,11 @@ SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef Preds, if (NewLatch != OldLatch) { MDNode *MD = OldLatch->getTerminator()->getMetadata("llvm.loop"); NewLatch->getTerminator()->setMetadata("llvm.loop", MD); - OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr); + // It's still possible that OldLatch is the latch of another inner loop, + // in which case we do not remove the metadata. + Loop *IL = LI->getLoopFor(OldLatch); + if (IL && IL->getLoopLatch() != OldLatch) + OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr); } } diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index 1bb80be8ef99..0b36e8708a03 100644 --- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -27,9 +27,7 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -317,18 +315,11 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum, // predecessors of BB. static BasicBlock * findIBRPredecessor(BasicBlock *BB, SmallVectorImpl &OtherPreds) { - // If the block doesn't have any PHIs, we don't care about it, since there's - // no point in splitting it. - PHINode *PN = dyn_cast(BB->begin()); - if (!PN) - return nullptr; - // Verify we have exactly one IBR predecessor. // Conservatively bail out if one of the other predecessors is not a "regular" // terminator (that is, not a switch or a br). BasicBlock *IBB = nullptr; - for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) { - BasicBlock *PredBB = PN->getIncomingBlock(Pred); + for (BasicBlock *PredBB : predecessors(BB)) { Instruction *PredTerm = PredBB->getTerminator(); switch (PredTerm->getOpcode()) { case Instruction::IndirectBr: @@ -349,6 +340,7 @@ findIBRPredecessor(BasicBlock *BB, SmallVectorImpl &OtherPreds) { } bool llvm::SplitIndirectBrCriticalEdges(Function &F, + bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI) { // Check whether the function has any indirectbrs, and collect which blocks @@ -370,6 +362,9 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F, bool ShouldUpdateAnalysis = BPI && BFI; bool Changed = false; for (BasicBlock *Target : Targets) { + if (IgnoreBlocksWithoutPHI && Target->phis().empty()) + continue; + SmallVector OtherPreds; BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds); // If we did not found an indirectbr, or the indirectbr is the only diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 97f11ca71726..c4a58f36c171 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -13,16 +13,17 @@ #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Support/TypeSize.h" using namespace llvm; @@ -41,7 +42,6 @@ STATISTIC(NumInaccessibleMemOrArgMemOnly, STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind"); STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture"); STATISTIC(NumWriteOnlyArg, "Number of arguments inferred as writeonly"); -STATISTIC(NumSExtArg, "Number of arguments inferred as signext"); STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly"); STATISTIC(NumNoAlias, "Number of function returns inferred as noalias"); STATISTIC(NumNoUndef, "Number of function returns inferred as noundef returns"); @@ -149,14 +149,6 @@ static bool setOnlyWritesMemory(Function &F, unsigned ArgNo) { return true; } -static bool setSignExtendedArg(Function &F, unsigned ArgNo) { - if (F.hasParamAttribute(ArgNo, Attribute::SExt)) - return false; - F.addParamAttr(ArgNo, Attribute::SExt); - ++NumSExtArg; - return true; -} - static bool setRetNoUndef(Function &F) { if (!F.getReturnType()->isVoidTy() && !F.hasRetAttribute(Attribute::NoUndef)) { @@ -224,15 +216,54 @@ static bool setWillReturn(Function &F) { return true; } -bool llvm::inferLibFuncAttributes(Module *M, StringRef Name, - const TargetLibraryInfo &TLI) { +static bool setAlignedAllocParam(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::AllocAlign)) + return false; + F.addParamAttr(ArgNo, Attribute::AllocAlign); + return true; +} + +static bool setAllocatedPointerParam(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::AllocatedPointer)) + return false; + F.addParamAttr(ArgNo, Attribute::AllocatedPointer); + return true; +} + +static bool setAllocSize(Function &F, unsigned ElemSizeArg, + Optional NumElemsArg) { + if (F.hasFnAttribute(Attribute::AllocSize)) + return false; + F.addFnAttr(Attribute::getWithAllocSizeArgs(F.getContext(), ElemSizeArg, + NumElemsArg)); + return true; +} + +static bool setAllocFamily(Function &F, StringRef Family) { + if (F.hasFnAttribute("alloc-family")) + return false; + F.addFnAttr("alloc-family", Family); + return true; +} + +static bool setAllocKind(Function &F, AllocFnKind K) { + if (F.hasFnAttribute(Attribute::AllocKind)) + return false; + F.addFnAttr( + Attribute::get(F.getContext(), Attribute::AllocKind, uint64_t(K))); + return true; +} + +bool llvm::inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name, + const TargetLibraryInfo &TLI) { Function *F = M->getFunction(Name); if (!F) return false; - return inferLibFuncAttributes(*F, TLI); + return inferNonMandatoryLibFuncAttrs(*F, TLI); } -bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { +bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, + const TargetLibraryInfo &TLI) { LibFunc TheLibFunc; if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc))) return false; @@ -360,6 +391,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setArgNoUndef(F, 1); LLVM_FALLTHROUGH; case LibFunc_strdup: + Changed |= setAllocFamily(F, "malloc"); Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); @@ -416,9 +448,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_aligned_alloc: + Changed |= setAlignedAllocParam(F, 0); + Changed |= setAllocSize(F, 1, None); + Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Uninitialized | AllocFnKind::Aligned); + LLVM_FALLTHROUGH; case LibFunc_valloc: case LibFunc_malloc: case LibFunc_vec_malloc: + Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_malloc ? "vec_malloc" + : "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Uninitialized); + Changed |= setAllocSize(F, 0, None); Changed |= setOnlyAccessesInaccessibleMemory(F); Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -481,6 +521,11 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_memalign: + Changed |= setAllocFamily(F, "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Aligned | + AllocFnKind::Uninitialized); + Changed |= setAllocSize(F, 1, None); + Changed |= setAlignedAllocParam(F, 0); Changed |= setOnlyAccessesInaccessibleMemory(F); Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); @@ -500,8 +545,13 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_realloc: - case LibFunc_vec_realloc: case LibFunc_reallocf: + case LibFunc_vec_realloc: + Changed |= setAllocFamily( + F, TheLibFunc == LibFunc_vec_realloc ? "vec_malloc" : "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Realloc); + Changed |= setAllocatedPointerParam(F, 0); + Changed |= setAllocSize(F, 1, None); Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); @@ -575,6 +625,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_calloc: case LibFunc_vec_calloc: + Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_calloc ? "vec_malloc" + : "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Zeroed); + Changed |= setAllocSize(F, 0, 1); Changed |= setOnlyAccessesInaccessibleMemory(F); Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -633,6 +687,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_free: case LibFunc_vec_free: + Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_free ? "vec_malloc" + : "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Free); + Changed |= setAllocatedPointerParam(F, 0); Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); Changed |= setArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -1041,7 +1099,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_ldexp: case LibFunc_ldexpf: case LibFunc_ldexpl: - Changed |= setSignExtendedArg(F, 1); Changed |= setWillReturn(F); return Changed; case LibFunc_abs: @@ -1178,34 +1235,179 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { } } -bool llvm::hasFloatFn(const TargetLibraryInfo *TLI, Type *Ty, +static void setArgExtAttr(Function &F, unsigned ArgNo, + const TargetLibraryInfo &TLI, bool Signed = true) { + Attribute::AttrKind ExtAttr = TLI.getExtAttrForI32Param(Signed); + if (ExtAttr != Attribute::None && !F.hasParamAttribute(ArgNo, ExtAttr)) + F.addParamAttr(ArgNo, ExtAttr); +} + +// Modeled after X86TargetLowering::markLibCallAttributes. +static void markRegisterParameterAttributes(Function *F) { + if (!F->arg_size() || F->isVarArg()) + return; + + const CallingConv::ID CC = F->getCallingConv(); + if (CC != CallingConv::C && CC != CallingConv::X86_StdCall) + return; + + const Module *M = F->getParent(); + unsigned N = M->getNumberRegisterParameters(); + if (!N) + return; + + const DataLayout &DL = M->getDataLayout(); + + for (Argument &A : F->args()) { + Type *T = A.getType(); + if (!T->isIntOrPtrTy()) + continue; + + const TypeSize &TS = DL.getTypeAllocSize(T); + if (TS > 8) + continue; + + assert(TS <= 4 && "Need to account for parameters larger than word size"); + const unsigned NumRegs = TS > 4 ? 2 : 1; + if (N < NumRegs) + return; + + N -= NumRegs; + F->addParamAttr(A.getArgNo(), Attribute::InReg); + } +} + +FunctionCallee llvm::getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, + LibFunc TheLibFunc, FunctionType *T, + AttributeList AttributeList) { + assert(TLI.has(TheLibFunc) && + "Creating call to non-existing library function."); + StringRef Name = TLI.getName(TheLibFunc); + FunctionCallee C = M->getOrInsertFunction(Name, T, AttributeList); + + // Make sure any mandatory argument attributes are added. + + // Any outgoing i32 argument should be handled with setArgExtAttr() which + // will add an extension attribute if the target ABI requires it. Adding + // argument extensions is typically done by the front end but when an + // optimizer is building a library call on its own it has to take care of + // this. Each such generated function must be handled here with sign or + // zero extensions as needed. F is retreived with cast<> because we demand + // of the caller to have called isLibFuncEmittable() first. + Function *F = cast(C.getCallee()); + assert(F->getFunctionType() == T && "Function type does not match."); + switch (TheLibFunc) { + case LibFunc_fputc: + case LibFunc_putchar: + setArgExtAttr(*F, 0, TLI); + break; + case LibFunc_ldexp: + case LibFunc_ldexpf: + case LibFunc_ldexpl: + case LibFunc_memchr: + case LibFunc_memrchr: + case LibFunc_strchr: + setArgExtAttr(*F, 1, TLI); + break; + case LibFunc_memccpy: + setArgExtAttr(*F, 2, TLI); + break; + + // These are functions that are known to not need any argument extension + // on any target: A size_t argument (which may be an i32 on some targets) + // should not trigger the assert below. + case LibFunc_bcmp: + case LibFunc_calloc: + case LibFunc_fwrite: + case LibFunc_malloc: + case LibFunc_memcmp: + case LibFunc_memcpy_chk: + case LibFunc_mempcpy: + case LibFunc_memset_pattern16: + case LibFunc_snprintf: + case LibFunc_stpncpy: + case LibFunc_strlcat: + case LibFunc_strlcpy: + case LibFunc_strncat: + case LibFunc_strncmp: + case LibFunc_strncpy: + case LibFunc_vsnprintf: + break; + + default: +#ifndef NDEBUG + for (unsigned i = 0; i < T->getNumParams(); i++) + assert(!isa(T->getParamType(i)) && + "Unhandled integer argument."); +#endif + break; + } + + markRegisterParameterAttributes(F); + + return C; +} + +FunctionCallee llvm::getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, + LibFunc TheLibFunc, FunctionType *T) { + return getOrInsertLibFunc(M, TLI, TheLibFunc, T, AttributeList()); +} + +bool llvm::isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, + LibFunc TheLibFunc) { + StringRef FuncName = TLI->getName(TheLibFunc); + if (!TLI->has(TheLibFunc)) + return false; + + // Check if the Module already has a GlobalValue with the same name, in + // which case it must be a Function with the expected type. + if (GlobalValue *GV = M->getNamedValue(FuncName)) { + if (auto *F = dyn_cast(GV)) + return TLI->isValidProtoForLibFunc(*F->getFunctionType(), TheLibFunc, *M); + return false; + } + + return true; +} + +bool llvm::isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, + StringRef Name) { + LibFunc TheLibFunc; + return TLI->getLibFunc(Name, TheLibFunc) && + isLibFuncEmittable(M, TLI, TheLibFunc); +} + +bool llvm::hasFloatFn(const Module *M, const TargetLibraryInfo *TLI, Type *Ty, LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn) { switch (Ty->getTypeID()) { case Type::HalfTyID: return false; case Type::FloatTyID: - return TLI->has(FloatFn); + return isLibFuncEmittable(M, TLI, FloatFn); case Type::DoubleTyID: - return TLI->has(DoubleFn); + return isLibFuncEmittable(M, TLI, DoubleFn); default: - return TLI->has(LongDoubleFn); + return isLibFuncEmittable(M, TLI, LongDoubleFn); } } -StringRef llvm::getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn) { - assert(hasFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) && +StringRef llvm::getFloatFn(const Module *M, const TargetLibraryInfo *TLI, + Type *Ty, LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn, LibFunc &TheLibFunc) { + assert(hasFloatFn(M, TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) && "Cannot get name for unavailable function!"); switch (Ty->getTypeID()) { case Type::HalfTyID: llvm_unreachable("No name for HalfTy!"); case Type::FloatTyID: + TheLibFunc = FloatFn; return TLI->getName(FloatFn); case Type::DoubleTyID: + TheLibFunc = DoubleFn; return TLI->getName(DoubleFn); default: + TheLibFunc = LongDoubleFn; return TLI->getName(LongDoubleFn); } } @@ -1222,14 +1424,14 @@ static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType, ArrayRef Operands, IRBuilderBase &B, const TargetLibraryInfo *TLI, bool IsVaArgs = false) { - if (!TLI->has(TheLibFunc)) + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, TheLibFunc)) return nullptr; - Module *M = B.GetInsertBlock()->getModule(); StringRef FuncName = TLI->getName(TheLibFunc); FunctionType *FuncType = FunctionType::get(ReturnType, ParamTypes, IsVaArgs); - FunctionCallee Callee = M->getOrInsertFunction(FuncName, FuncType); - inferLibFuncAttributes(M, FuncName, *TLI); + FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, FuncType); + inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI); CallInst *CI = B.CreateCall(Callee, Operands, FuncName); if (const Function *F = dyn_cast(Callee.getCallee()->stripPointerCasts())) @@ -1298,16 +1500,16 @@ Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_memcpy_chk)) + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, LibFunc_memcpy_chk)) return nullptr; - Module *M = B.GetInsertBlock()->getModule(); AttributeList AS; AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex, Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); - FunctionCallee MemCpy = M->getOrInsertFunction( - "__memcpy_chk", AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(), + FunctionCallee MemCpy = getOrInsertLibFunc(M, *TLI, LibFunc_memcpy_chk, + AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), DL.getIntPtrType(Context)); Dst = castToCStr(Dst, B); @@ -1337,6 +1539,15 @@ Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, {castToCStr(Ptr, B), Val, Len}, B, TLI); } +Value *llvm::emitMemRChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + LLVMContext &Context = B.GetInsertBlock()->getContext(); + return emitLibCall( + LibFunc_memrchr, B.getInt8PtrTy(), + {B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context)}, + {castToCStr(Ptr, B), Val, Len}, B, TLI); +} + Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { LLVMContext &Context = B.GetInsertBlock()->getContext(); @@ -1441,14 +1652,15 @@ static void appendTypeSuffix(Value *Op, StringRef &Name, } } -static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name, - IRBuilderBase &B, - const AttributeList &Attrs) { +static Value *emitUnaryFloatFnCallHelper(Value *Op, LibFunc TheLibFunc, + StringRef Name, IRBuilderBase &B, + const AttributeList &Attrs, + const TargetLibraryInfo *TLI) { assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall"); Module *M = B.GetInsertBlock()->getModule(); - FunctionCallee Callee = - M->getOrInsertFunction(Name, Op->getType(), Op->getType()); + FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, Op->getType(), + Op->getType()); CallInst *CI = B.CreateCall(Callee, Op, Name); // The incoming attribute set may have come from a speculatable intrinsic, but @@ -1463,12 +1675,16 @@ static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name, return CI; } -Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilderBase &B, +Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI, + StringRef Name, IRBuilderBase &B, const AttributeList &Attrs) { SmallString<20> NameBuffer; appendTypeSuffix(Op, Name, NameBuffer); - return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs); + LibFunc TheLibFunc; + TLI->getLibFunc(Name, TheLibFunc); + + return emitUnaryFloatFnCallHelper(Op, TheLibFunc, Name, B, Attrs, TLI); } Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI, @@ -1476,23 +1692,25 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI, LibFunc LongDoubleFn, IRBuilderBase &B, const AttributeList &Attrs) { // Get the name of the function according to TLI. - StringRef Name = getFloatFnName(TLI, Op->getType(), - DoubleFn, FloatFn, LongDoubleFn); + Module *M = B.GetInsertBlock()->getModule(); + LibFunc TheLibFunc; + StringRef Name = getFloatFn(M, TLI, Op->getType(), DoubleFn, FloatFn, + LongDoubleFn, TheLibFunc); - return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs); + return emitUnaryFloatFnCallHelper(Op, TheLibFunc, Name, B, Attrs, TLI); } static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2, + LibFunc TheLibFunc, StringRef Name, IRBuilderBase &B, const AttributeList &Attrs, - const TargetLibraryInfo *TLI = nullptr) { + const TargetLibraryInfo *TLI) { assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall"); Module *M = B.GetInsertBlock()->getModule(); - FunctionCallee Callee = M->getOrInsertFunction(Name, Op1->getType(), - Op1->getType(), Op2->getType()); - if (TLI != nullptr) - inferLibFuncAttributes(M, Name, *TLI); + FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, Op1->getType(), + Op1->getType(), Op2->getType()); + inferNonMandatoryLibFuncAttrs(M, Name, *TLI); CallInst *CI = B.CreateCall(Callee, { Op1, Op2 }, Name); // The incoming attribute set may have come from a speculatable intrinsic, but @@ -1507,15 +1725,19 @@ static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2, return CI; } -Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, - IRBuilderBase &B, +Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, + const TargetLibraryInfo *TLI, + StringRef Name, IRBuilderBase &B, const AttributeList &Attrs) { assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall"); SmallString<20> NameBuffer; appendTypeSuffix(Op1, Name, NameBuffer); - return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs); + LibFunc TheLibFunc; + TLI->getLibFunc(Name, TheLibFunc); + + return emitBinaryFloatFnCallHelper(Op1, Op2, TheLibFunc, Name, B, Attrs, TLI); } Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, @@ -1524,22 +1746,24 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, LibFunc LongDoubleFn, IRBuilderBase &B, const AttributeList &Attrs) { // Get the name of the function according to TLI. - StringRef Name = getFloatFnName(TLI, Op1->getType(), - DoubleFn, FloatFn, LongDoubleFn); + Module *M = B.GetInsertBlock()->getModule(); + LibFunc TheLibFunc; + StringRef Name = getFloatFn(M, TLI, Op1->getType(), DoubleFn, FloatFn, + LongDoubleFn, TheLibFunc); - return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs, TLI); + return emitBinaryFloatFnCallHelper(Op1, Op2, TheLibFunc, Name, B, Attrs, TLI); } Value *llvm::emitPutChar(Value *Char, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_putchar)) + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, LibFunc_putchar)) return nullptr; - Module *M = B.GetInsertBlock()->getModule(); StringRef PutCharName = TLI->getName(LibFunc_putchar); - FunctionCallee PutChar = - M->getOrInsertFunction(PutCharName, B.getInt32Ty(), B.getInt32Ty()); - inferLibFuncAttributes(M, PutCharName, *TLI); + FunctionCallee PutChar = getOrInsertLibFunc(M, *TLI, LibFunc_putchar, + B.getInt32Ty(), B.getInt32Ty()); + inferNonMandatoryLibFuncAttrs(M, PutCharName, *TLI); CallInst *CI = B.CreateCall(PutChar, B.CreateIntCast(Char, B.getInt32Ty(), @@ -1555,14 +1779,14 @@ Value *llvm::emitPutChar(Value *Char, IRBuilderBase &B, Value *llvm::emitPutS(Value *Str, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_puts)) + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, LibFunc_puts)) return nullptr; - Module *M = B.GetInsertBlock()->getModule(); StringRef PutsName = TLI->getName(LibFunc_puts); - FunctionCallee PutS = - M->getOrInsertFunction(PutsName, B.getInt32Ty(), B.getInt8PtrTy()); - inferLibFuncAttributes(M, PutsName, *TLI); + FunctionCallee PutS = getOrInsertLibFunc(M, *TLI, LibFunc_puts, B.getInt32Ty(), + B.getInt8PtrTy()); + inferNonMandatoryLibFuncAttrs(M, PutsName, *TLI); CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), PutsName); if (const Function *F = dyn_cast(PutS.getCallee()->stripPointerCasts())) @@ -1572,15 +1796,15 @@ Value *llvm::emitPutS(Value *Str, IRBuilderBase &B, Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_fputc)) + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, LibFunc_fputc)) return nullptr; - Module *M = B.GetInsertBlock()->getModule(); StringRef FPutcName = TLI->getName(LibFunc_fputc); - FunctionCallee F = M->getOrInsertFunction(FPutcName, B.getInt32Ty(), - B.getInt32Ty(), File->getType()); + FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fputc, B.getInt32Ty(), + B.getInt32Ty(), File->getType()); if (File->getType()->isPointerTy()) - inferLibFuncAttributes(M, FPutcName, *TLI); + inferNonMandatoryLibFuncAttrs(M, FPutcName, *TLI); Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true, "chari"); CallInst *CI = B.CreateCall(F, {Char, File}, FPutcName); @@ -1593,15 +1817,15 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilderBase &B, Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_fputs)) + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, LibFunc_fputs)) return nullptr; - Module *M = B.GetInsertBlock()->getModule(); StringRef FPutsName = TLI->getName(LibFunc_fputs); - FunctionCallee F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(), - B.getInt8PtrTy(), File->getType()); + FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fputs, B.getInt32Ty(), + B.getInt8PtrTy(), File->getType()); if (File->getType()->isPointerTy()) - inferLibFuncAttributes(M, FPutsName, *TLI); + inferNonMandatoryLibFuncAttrs(M, FPutsName, *TLI); CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsName); if (const Function *Fn = @@ -1612,18 +1836,18 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilderBase &B, Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_fwrite)) + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, LibFunc_fwrite)) return nullptr; - Module *M = B.GetInsertBlock()->getModule(); LLVMContext &Context = B.GetInsertBlock()->getContext(); StringRef FWriteName = TLI->getName(LibFunc_fwrite); - FunctionCallee F = M->getOrInsertFunction( - FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(), - DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType()); + FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fwrite, + DL.getIntPtrType(Context), B.getInt8PtrTy(), DL.getIntPtrType(Context), + DL.getIntPtrType(Context), File->getType()); if (File->getType()->isPointerTy()) - inferLibFuncAttributes(M, FWriteName, *TLI); + inferNonMandatoryLibFuncAttrs(M, FWriteName, *TLI); CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, ConstantInt::get(DL.getIntPtrType(Context), 1), File}); @@ -1636,15 +1860,15 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilderBase &B, Value *llvm::emitMalloc(Value *Num, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_malloc)) + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, LibFunc_malloc)) return nullptr; - Module *M = B.GetInsertBlock()->getModule(); StringRef MallocName = TLI->getName(LibFunc_malloc); LLVMContext &Context = B.GetInsertBlock()->getContext(); - FunctionCallee Malloc = M->getOrInsertFunction(MallocName, B.getInt8PtrTy(), - DL.getIntPtrType(Context)); - inferLibFuncAttributes(M, MallocName, *TLI); + FunctionCallee Malloc = getOrInsertLibFunc(M, *TLI, LibFunc_malloc, + B.getInt8PtrTy(), DL.getIntPtrType(Context)); + inferNonMandatoryLibFuncAttrs(M, MallocName, *TLI); CallInst *CI = B.CreateCall(Malloc, Num, MallocName); if (const Function *F = @@ -1656,16 +1880,16 @@ Value *llvm::emitMalloc(Value *Num, IRBuilderBase &B, const DataLayout &DL, Value *llvm::emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI) { - if (!TLI.has(LibFunc_calloc)) + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, &TLI, LibFunc_calloc)) return nullptr; - Module *M = B.GetInsertBlock()->getModule(); StringRef CallocName = TLI.getName(LibFunc_calloc); const DataLayout &DL = M->getDataLayout(); IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext())); - FunctionCallee Calloc = - M->getOrInsertFunction(CallocName, B.getInt8PtrTy(), PtrType, PtrType); - inferLibFuncAttributes(M, CallocName, TLI); + FunctionCallee Calloc = getOrInsertLibFunc(M, TLI, LibFunc_calloc, + B.getInt8PtrTy(), PtrType, PtrType); + inferNonMandatoryLibFuncAttrs(M, CallocName, TLI); CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName); if (const auto *F = diff --git a/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp b/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp index ac3839f2a4ab..1840f26add2d 100644 --- a/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp +++ b/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp @@ -14,6 +14,9 @@ #include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/IR/Constants.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index 56b6e4bc46a5..e530afc277db 100644 --- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -279,8 +279,8 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) { /// ; The original call instruction stays in its original block. /// %t0 = musttail call i32 %ptr() /// ret %t0 -static CallBase &versionCallSite(CallBase &CB, Value *Callee, - MDNode *BranchWeights) { +CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee, + MDNode *BranchWeights) { IRBuilder<> Builder(&CB); CallBase *OrigInst = &CB; diff --git a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp index 6b01c0c71d00..f229d4bf14e9 100644 --- a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp +++ b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp @@ -30,8 +30,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CanonicalizeAliases.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/Constants.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp index 049c7d113521..a1ee3df907ec 100644 --- a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp +++ b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp @@ -29,7 +29,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/IVDescriptors.h" diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 86413df664a0..8f053cd56e0e 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -14,7 +14,6 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" @@ -23,7 +22,6 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" @@ -324,6 +322,9 @@ struct PruningFunctionCloner { bool ModuleLevelChanges; const char *NameSuffix; ClonedCodeInfo *CodeInfo; + bool HostFuncIsStrictFP; + + Instruction *cloneInstruction(BasicBlock::const_iterator II); public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, @@ -331,7 +332,10 @@ public: const char *nameSuffix, ClonedCodeInfo *codeInfo) : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), - CodeInfo(codeInfo) {} + CodeInfo(codeInfo) { + HostFuncIsStrictFP = + newFunc->getAttributes().hasFnAttr(Attribute::StrictFP); + } /// The specified block is found to be reachable, clone it and /// anything that it can reach. @@ -340,6 +344,89 @@ public: }; } // namespace +static bool hasRoundingModeOperand(Intrinsic::ID CIID) { + switch (CIID) { +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ + case Intrinsic::INTRINSIC: \ + return ROUND_MODE == 1; +#define FUNCTION INSTRUCTION +#include "llvm/IR/ConstrainedOps.def" + default: + llvm_unreachable("Unexpected constrained intrinsic id"); + } +} + +Instruction * +PruningFunctionCloner::cloneInstruction(BasicBlock::const_iterator II) { + const Instruction &OldInst = *II; + Instruction *NewInst = nullptr; + if (HostFuncIsStrictFP) { + Intrinsic::ID CIID = getConstrainedIntrinsicID(OldInst); + if (CIID != Intrinsic::not_intrinsic) { + // Instead of cloning the instruction, a call to constrained intrinsic + // should be created. + // Assume the first arguments of constrained intrinsics are the same as + // the operands of original instruction. + + // Determine overloaded types of the intrinsic. + SmallVector TParams; + SmallVector Descriptor; + getIntrinsicInfoTableEntries(CIID, Descriptor); + for (unsigned I = 0, E = Descriptor.size(); I != E; ++I) { + Intrinsic::IITDescriptor Operand = Descriptor[I]; + switch (Operand.Kind) { + case Intrinsic::IITDescriptor::Argument: + if (Operand.getArgumentKind() != + Intrinsic::IITDescriptor::AK_MatchType) { + if (I == 0) + TParams.push_back(OldInst.getType()); + else + TParams.push_back(OldInst.getOperand(I - 1)->getType()); + } + break; + case Intrinsic::IITDescriptor::SameVecWidthArgument: + ++I; + break; + default: + break; + } + } + + // Create intrinsic call. + LLVMContext &Ctx = NewFunc->getContext(); + Function *IFn = + Intrinsic::getDeclaration(NewFunc->getParent(), CIID, TParams); + SmallVector Args; + unsigned NumOperands = OldInst.getNumOperands(); + if (isa(OldInst)) + --NumOperands; + for (unsigned I = 0; I < NumOperands; ++I) { + Value *Op = OldInst.getOperand(I); + Args.push_back(Op); + } + if (const auto *CmpI = dyn_cast(&OldInst)) { + FCmpInst::Predicate Pred = CmpI->getPredicate(); + StringRef PredName = FCmpInst::getPredicateName(Pred); + Args.push_back(MetadataAsValue::get(Ctx, MDString::get(Ctx, PredName))); + } + + // The last arguments of a constrained intrinsic are metadata that + // represent rounding mode (absents in some intrinsics) and exception + // behavior. The inlined function uses default settings. + if (hasRoundingModeOperand(CIID)) + Args.push_back( + MetadataAsValue::get(Ctx, MDString::get(Ctx, "round.tonearest"))); + Args.push_back( + MetadataAsValue::get(Ctx, MDString::get(Ctx, "fpexcept.ignore"))); + + NewInst = CallInst::Create(IFn, Args, OldInst.getName() + ".strict"); + } + } + if (!NewInst) + NewInst = II->clone(); + return NewInst; +} + /// The specified block is found to be reachable, clone it and /// anything that it can reach. void PruningFunctionCloner::CloneBlock( @@ -379,7 +466,14 @@ void PruningFunctionCloner::CloneBlock( for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; ++II) { - Instruction *NewInst = II->clone(); + Instruction *NewInst = cloneInstruction(II); + + if (HostFuncIsStrictFP) { + // All function calls in the inlined function must get 'strictfp' + // attribute to prevent undesirable optimizations. + if (auto *Call = dyn_cast(NewInst)) + Call->addFnAttr(Attribute::StrictFP); + } // Eagerly remap operands to the newly cloned instruction, except for PHI // nodes for which we defer processing until we update the CFG. @@ -391,7 +485,7 @@ void PruningFunctionCloner::CloneBlock( // a mapping to that value rather than inserting a new instruction into // the basic block. if (Value *V = - SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) { + simplifyInstruction(NewInst, BB->getModule()->getDataLayout())) { // On the off-chance that this simplifies to an instruction in the old // function, map it back into the new function. if (NewFunc != OldFunc) @@ -674,7 +768,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, continue; // See if this instruction simplifies. - Value *SimpleV = SimplifyInstruction(I, DL); + Value *SimpleV = simplifyInstruction(I, DL); if (!SimpleV) continue; diff --git a/llvm/lib/Transforms/Utils/CloneModule.cpp b/llvm/lib/Transforms/Utils/CloneModule.cpp index 57c273a0e3c5..55cda0f11e47 100644 --- a/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -11,13 +11,16 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; +namespace llvm { +class Constant; +} + static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) { const Comdat *SC = Src->getComdat(); if (!SC) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index cec159f6a448..f94d854f7ee8 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -53,7 +53,6 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" -#include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" @@ -62,12 +61,10 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include #include #include #include -#include #include #include @@ -249,9 +246,10 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, AssumptionCache *AC, bool AllowVarArgs, bool AllowAlloca, - std::string Suffix) + BasicBlock *AllocationBlock, std::string Suffix) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs), + BPI(BPI), AC(AC), AllocationBlock(AllocationBlock), + AllowVarArgs(AllowVarArgs), Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)), Suffix(Suffix) {} @@ -260,7 +258,7 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BranchProbabilityInfo *BPI, AssumptionCache *AC, std::string Suffix) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllowVarArgs(false), + BPI(BPI), AC(AC), AllocationBlock(nullptr), AllowVarArgs(false), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, /* AllowVarArgs */ false, /* AllowAlloca */ false)), @@ -922,6 +920,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::StackAlignment: case Attribute::WillReturn: case Attribute::WriteOnly: + case Attribute::AllocKind: + case Attribute::PresplitCoroutine: continue; // Those attributes should be safe to propagate to the extracted function. case Attribute::AlwaysInline: @@ -939,6 +939,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::NonLazyBind: case Attribute::NoRedZone: case Attribute::NoUnwind: + case Attribute::NoSanitizeBounds: case Attribute::NoSanitizeCoverage: case Attribute::NullPointerIsValid: case Attribute::OptForFuzzing: @@ -964,6 +965,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, break; // These attributes cannot be applied to functions. case Attribute::Alignment: + case Attribute::AllocatedPointer: + case Attribute::AllocAlign: case Attribute::ByVal: case Attribute::Dereferenceable: case Attribute::DereferenceableOrNull: @@ -1190,9 +1193,10 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, // Allocate a struct at the beginning of this function StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); - Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", - &codeReplacer->getParent()->front().front()); + Struct = new AllocaInst( + StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg", + AllocationBlock ? &*AllocationBlock->getFirstInsertionPt() + : &codeReplacer->getParent()->front().front()); params.push_back(Struct); // Store aggregated inputs in the struct. @@ -1771,7 +1775,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Update the entry count of the function. if (BFI) { auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (Count.hasValue()) + if (Count) newFunction->setEntryCount( ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp index dfb9f608eab2..1ff0f148b3a9 100644 --- a/llvm/lib/Transforms/Utils/CodeLayout.cpp +++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp @@ -40,11 +40,20 @@ #include "llvm/Transforms/Utils/CodeLayout.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "code-layout" +cl::opt EnableExtTspBlockPlacement( + "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false), + cl::desc("Enable machine block placement based on the ext-tsp model, " + "optimizing I-cache utilization.")); + +cl::opt ApplyExtTspWithoutProfile( + "ext-tsp-apply-without-profile", + cl::desc("Whether to apply ext-tsp placement for instances w/o profile"), + cl::init(true), cl::Hidden); + // Algorithm-specific constants. The values are tuned for the best performance // of large-scale front-end bound binaries. static cl::opt @@ -63,6 +72,12 @@ static cl::opt BackwardDistance( "ext-tsp-backward-distance", cl::Hidden, cl::init(640), cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP")); +// The maximum size of a chain created by the algorithm. The size is bounded +// so that the algorithm can efficiently process extremely large instance. +static cl::opt + MaxChainSize("ext-tsp-max-chain-size", cl::Hidden, cl::init(4096), + cl::desc("The maximum size of a chain to create.")); + // The maximum size of a chain for splitting. Larger values of the threshold // may yield better quality at the cost of worsen run-time. static cl::opt ChainSplitThreshold( @@ -115,7 +130,7 @@ enum class MergeTypeTy : int { X_Y, X1_Y_X2, Y_X2_X1, X2_X1_Y }; /// together with the corresponfiding merge 'type' and 'offset'. class MergeGainTy { public: - explicit MergeGainTy() {} + explicit MergeGainTy() = default; explicit MergeGainTy(double Score, size_t MergeOffset, MergeTypeTy MergeType) : Score(Score), MergeOffset(MergeOffset), MergeType(MergeType) {} @@ -142,7 +157,6 @@ private: MergeTypeTy MergeType{MergeTypeTy::X_Y}; }; -class Block; class Jump; class Chain; class ChainEdge; @@ -223,6 +237,8 @@ public: const std::vector &blocks() const { return Blocks; } + size_t numBlocks() const { return Blocks.size(); } + const std::vector> &edges() const { return Edges; } @@ -499,7 +515,7 @@ private: AllEdges.reserve(AllJumps.size()); for (auto &Block : AllBlocks) { for (auto &Jump : Block.OutJumps) { - const auto SuccBlock = Jump->Target; + auto SuccBlock = Jump->Target; auto CurEdge = Block.CurChain->getEdge(SuccBlock->CurChain); // this edge is already present in the graph if (CurEdge != nullptr) { @@ -589,6 +605,10 @@ private: if (ChainPred == ChainSucc) continue; + // Stop early if the combined chain violates the maximum allowed size + if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize) + continue; + // Compute the gain of merging the two chains auto CurGain = getBestMergeGain(ChainPred, ChainSucc, ChainEdge); if (CurGain.score() <= EPS) diff --git a/llvm/lib/Transforms/Utils/CtorUtils.cpp b/llvm/lib/Transforms/Utils/CtorUtils.cpp index 069a86f6ab33..c997f39508e3 100644 --- a/llvm/lib/Transforms/Utils/CtorUtils.cpp +++ b/llvm/lib/Transforms/Utils/CtorUtils.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include #define DEBUG_TYPE "ctor_utils" @@ -62,21 +63,20 @@ static void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemov /// Given a llvm.global_ctors list that we can understand, /// return a list of the functions and null terminator as a vector. -static std::vector parseGlobalCtors(GlobalVariable *GV) { - if (GV->getInitializer()->isNullValue()) - return std::vector(); +static std::vector> +parseGlobalCtors(GlobalVariable *GV) { ConstantArray *CA = cast(GV->getInitializer()); - std::vector Result; + std::vector> Result; Result.reserve(CA->getNumOperands()); for (auto &V : CA->operands()) { ConstantStruct *CS = cast(V); - Result.push_back(dyn_cast(CS->getOperand(1))); + Result.emplace_back(cast(CS->getOperand(0))->getZExtValue(), + dyn_cast(CS->getOperand(1))); } return Result; } -/// Find the llvm.global_ctors list, verifying that all initializers have an -/// init priority of 65535. +/// Find the llvm.global_ctors list. static GlobalVariable *findGlobalCtors(Module &M) { GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); if (!GV) @@ -87,9 +87,11 @@ static GlobalVariable *findGlobalCtors(Module &M) { if (!GV->hasUniqueInitializer()) return nullptr; - if (isa(GV->getInitializer())) - return GV; - ConstantArray *CA = cast(GV->getInitializer()); + // If there are no ctors, then the initializer might be null/undef/poison. + // Ignore anything but an array. + ConstantArray *CA = dyn_cast(GV->getInitializer()); + if (!CA) + return nullptr; for (auto &V : CA->operands()) { if (isa(V)) @@ -98,54 +100,47 @@ static GlobalVariable *findGlobalCtors(Module &M) { if (isa(CS->getOperand(1))) continue; - // Must have a function or null ptr. - if (!isa(CS->getOperand(1))) - return nullptr; - - // Init priority must be standard. - ConstantInt *CI = cast(CS->getOperand(0)); - if (CI->getZExtValue() != 65535) + // Can only handle global constructors with no arguments. + Function *F = dyn_cast(CS->getOperand(1)); + if (!F || F->arg_size() != 0) return nullptr; } - return GV; } /// Call "ShouldRemove" for every entry in M's global_ctor list and remove the /// entries for which it returns true. Return true if anything changed. bool llvm::optimizeGlobalCtorsList( - Module &M, function_ref ShouldRemove) { + Module &M, function_ref ShouldRemove) { GlobalVariable *GlobalCtors = findGlobalCtors(M); if (!GlobalCtors) return false; - std::vector Ctors = parseGlobalCtors(GlobalCtors); + std::vector> Ctors = + parseGlobalCtors(GlobalCtors); if (Ctors.empty()) return false; bool MadeChange = false; - // Loop over global ctors, optimizing them when we can. - unsigned NumCtors = Ctors.size(); - BitVector CtorsToRemove(NumCtors); - for (unsigned i = 0; i != Ctors.size() && NumCtors > 0; ++i) { - Function *F = Ctors[i]; - // Found a null terminator in the middle of the list, prune off the rest of - // the list. + BitVector CtorsToRemove(Ctors.size()); + std::vector CtorsByPriority(Ctors.size()); + std::iota(CtorsByPriority.begin(), CtorsByPriority.end(), 0); + stable_sort(CtorsByPriority, [&](size_t LHS, size_t RHS) { + return Ctors[LHS].first < Ctors[RHS].first; + }); + for (unsigned CtorIndex : CtorsByPriority) { + const uint32_t Priority = Ctors[CtorIndex].first; + Function *F = Ctors[CtorIndex].second; if (!F) continue; LLVM_DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); - // We cannot simplify external ctor functions. - if (F->empty()) - continue; - // If we can evaluate the ctor at compile time, do. - if (ShouldRemove(F)) { - Ctors[i] = nullptr; - CtorsToRemove.set(i); - NumCtors--; + if (ShouldRemove(Priority, F)) { + Ctors[CtorIndex].second = nullptr; + CtorsToRemove.set(CtorIndex); MadeChange = true; continue; } diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp index 589622d69578..205f7a7d9ed2 100644 --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -37,12 +37,16 @@ namespace { cl::opt Quiet("debugify-quiet", cl::desc("Suppress verbose debugify output")); +cl::opt DebugifyFunctionsLimit( + "debugify-func-limit", + cl::desc("Set max number of processed functions per pass."), + cl::init(UINT_MAX)); + enum class Level { Locations, LocationsAndVariables }; -// Used for the synthetic mode only. cl::opt DebugifyLevel( "debugify-level", cl::desc("Kind of debug info to add"), cl::values(clEnumValN(Level::Locations, "locations", "Locations only"), @@ -210,15 +214,15 @@ bool llvm::applyDebugifyMetadata( static bool applyDebugify(Function &F, enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, - DebugInfoPerPassMap *DIPreservationMap = nullptr, + DebugInfoPerPass *DebugInfoBeforePass = nullptr, StringRef NameOfWrappedPass = "") { Module &M = *F.getParent(); auto FuncIt = F.getIterator(); if (Mode == DebugifyMode::SyntheticDebugInfo) return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)), "FunctionDebugify: ", /*ApplyToMF*/ nullptr); - assert(DIPreservationMap); - return collectDebugInfoMetadata(M, M.functions(), *DIPreservationMap, + assert(DebugInfoBeforePass); + return collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass, "FunctionDebugify (original debuginfo)", NameOfWrappedPass); } @@ -226,12 +230,12 @@ applyDebugify(Function &F, static bool applyDebugify(Module &M, enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, - DebugInfoPerPassMap *DIPreservationMap = nullptr, + DebugInfoPerPass *DebugInfoBeforePass = nullptr, StringRef NameOfWrappedPass = "") { if (Mode == DebugifyMode::SyntheticDebugInfo) return applyDebugifyMetadata(M, M.functions(), "ModuleDebugify: ", /*ApplyToMF*/ nullptr); - return collectDebugInfoMetadata(M, M.functions(), *DIPreservationMap, + return collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass, "ModuleDebugify (original debuginfo)", NameOfWrappedPass); } @@ -267,7 +271,7 @@ bool llvm::stripDebugifyMetadata(Module &M) { SmallVector Flags(NMD->operands()); NMD->clearOperands(); for (MDNode *Flag : Flags) { - MDString *Key = dyn_cast_or_null(Flag->getOperand(1)); + auto *Key = cast(Flag->getOperand(1)); if (Key->getString() == "Debug Info Version") { Changed = true; continue; @@ -283,32 +287,37 @@ bool llvm::stripDebugifyMetadata(Module &M) { bool llvm::collectDebugInfoMetadata(Module &M, iterator_range Functions, - DebugInfoPerPassMap &DIPreservationMap, + DebugInfoPerPass &DebugInfoBeforePass, StringRef Banner, StringRef NameOfWrappedPass) { LLVM_DEBUG(dbgs() << Banner << ": (before) " << NameOfWrappedPass << '\n'); - // Clear the map with the debug info before every single pass. - DIPreservationMap.clear(); - if (!M.getNamedMetadata("llvm.dbg.cu")) { dbg() << Banner << ": Skipping module without debug info\n"; return false; } + uint64_t FunctionsCnt = DebugInfoBeforePass.DIFunctions.size(); // Visit each instruction. for (Function &F : Functions) { + // Use DI collected after previous Pass (when -debugify-each is used). + if (DebugInfoBeforePass.DIFunctions.count(&F)) + continue; + if (isFunctionSkipped(F)) continue; + // Stop collecting DI if the Functions number reached the limit. + if (++FunctionsCnt >= DebugifyFunctionsLimit) + break; // Collect the DISubprogram. auto *SP = F.getSubprogram(); - DIPreservationMap[NameOfWrappedPass].DIFunctions.insert({F.getName(), SP}); + DebugInfoBeforePass.DIFunctions.insert({&F, SP}); if (SP) { LLVM_DEBUG(dbgs() << " Collecting subprogram: " << *SP << '\n'); for (const DINode *DN : SP->getRetainedNodes()) { if (const auto *DV = dyn_cast(DN)) { - DIPreservationMap[NameOfWrappedPass].DIVariables[DV] = 0; + DebugInfoBeforePass.DIVariables[DV] = 0; } } } @@ -320,20 +329,22 @@ bool llvm::collectDebugInfoMetadata(Module &M, if (isa(I)) continue; - // Collect dbg.values and dbg.declares. - if (auto *DVI = dyn_cast(&I)) { - if (!SP) - continue; - // Skip inlined variables. - if (I.getDebugLoc().getInlinedAt()) + // Cllect dbg.values and dbg.declare. + if (DebugifyLevel > Level::Locations) { + if (auto *DVI = dyn_cast(&I)) { + if (!SP) + continue; + // Skip inlined variables. + if (I.getDebugLoc().getInlinedAt()) + continue; + // Skip undef values. + if (DVI->isUndef()) + continue; + + auto *Var = DVI->getVariable(); + DebugInfoBeforePass.DIVariables[Var]++; continue; - // Skip undef values. - if (DVI->isUndef()) - continue; - - auto *Var = DVI->getVariable(); - DIPreservationMap[NameOfWrappedPass].DIVariables[Var]++; - continue; + } } // Skip debug instructions other than dbg.value and dbg.declare. @@ -341,11 +352,11 @@ bool llvm::collectDebugInfoMetadata(Module &M, continue; LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); - DIPreservationMap[NameOfWrappedPass].InstToDelete.insert({&I, &I}); + DebugInfoBeforePass.InstToDelete.insert({&I, &I}); const DILocation *Loc = I.getDebugLoc().get(); bool HasLoc = Loc != nullptr; - DIPreservationMap[NameOfWrappedPass].DILocations.insert({&I, HasLoc}); + DebugInfoBeforePass.DILocations.insert({&I, HasLoc}); } } } @@ -367,12 +378,12 @@ static bool checkFunctions(const DebugFnMap &DIFunctionsBefore, if (SPIt == DIFunctionsBefore.end()) { if (ShouldWriteIntoJSON) Bugs.push_back(llvm::json::Object({{"metadata", "DISubprogram"}, - {"name", F.first}, + {"name", F.first->getName()}, {"action", "not-generate"}})); else dbg() << "ERROR: " << NameOfWrappedPass - << " did not generate DISubprogram for " << F.first << " from " - << FileNameFromCU << '\n'; + << " did not generate DISubprogram for " << F.first->getName() + << " from " << FileNameFromCU << '\n'; Preserved = false; } else { auto SP = SPIt->second; @@ -382,11 +393,11 @@ static bool checkFunctions(const DebugFnMap &DIFunctionsBefore, // a debug info bug. if (ShouldWriteIntoJSON) Bugs.push_back(llvm::json::Object({{"metadata", "DISubprogram"}, - {"name", F.first}, + {"name", F.first->getName()}, {"action", "drop"}})); else dbg() << "ERROR: " << NameOfWrappedPass << " dropped DISubprogram of " - << F.first << " from " << FileNameFromCU << '\n'; + << F.first->getName() << " from " << FileNameFromCU << '\n'; Preserved = false; } } @@ -515,7 +526,7 @@ static void writeJSON(StringRef OrigDIVerifyBugsReportFilePath, bool llvm::checkDebugInfoMetadata(Module &M, iterator_range Functions, - DebugInfoPerPassMap &DIPreservationMap, + DebugInfoPerPass &DebugInfoBeforePass, StringRef Banner, StringRef NameOfWrappedPass, StringRef OrigDIVerifyBugsReportFilePath) { LLVM_DEBUG(dbgs() << Banner << ": (after) " << NameOfWrappedPass << '\n'); @@ -526,24 +537,26 @@ bool llvm::checkDebugInfoMetadata(Module &M, } // Map the debug info holding DIs after a pass. - DebugInfoPerPassMap DIPreservationAfter; + DebugInfoPerPass DebugInfoAfterPass; // Visit each instruction. for (Function &F : Functions) { if (isFunctionSkipped(F)) continue; + // Don't process functions without DI collected before the Pass. + if (!DebugInfoBeforePass.DIFunctions.count(&F)) + continue; // TODO: Collect metadata other than DISubprograms. // Collect the DISubprogram. auto *SP = F.getSubprogram(); - DIPreservationAfter[NameOfWrappedPass].DIFunctions.insert( - {F.getName(), SP}); + DebugInfoAfterPass.DIFunctions.insert({&F, SP}); if (SP) { LLVM_DEBUG(dbgs() << " Collecting subprogram: " << *SP << '\n'); for (const DINode *DN : SP->getRetainedNodes()) { if (const auto *DV = dyn_cast(DN)) { - DIPreservationAfter[NameOfWrappedPass].DIVariables[DV] = 0; + DebugInfoAfterPass.DIVariables[DV] = 0; } } } @@ -556,19 +569,21 @@ bool llvm::checkDebugInfoMetadata(Module &M, continue; // Collect dbg.values and dbg.declares. - if (auto *DVI = dyn_cast(&I)) { - if (!SP) - continue; - // Skip inlined variables. - if (I.getDebugLoc().getInlinedAt()) - continue; - // Skip undef values. - if (DVI->isUndef()) + if (DebugifyLevel > Level::Locations) { + if (auto *DVI = dyn_cast(&I)) { + if (!SP) + continue; + // Skip inlined variables. + if (I.getDebugLoc().getInlinedAt()) + continue; + // Skip undef values. + if (DVI->isUndef()) + continue; + + auto *Var = DVI->getVariable(); + DebugInfoAfterPass.DIVariables[Var]++; continue; - - auto *Var = DVI->getVariable(); - DIPreservationAfter[NameOfWrappedPass].DIVariables[Var]++; - continue; + } } // Skip debug instructions other than dbg.value and dbg.declare. @@ -580,7 +595,7 @@ bool llvm::checkDebugInfoMetadata(Module &M, const DILocation *Loc = I.getDebugLoc().get(); bool HasLoc = Loc != nullptr; - DIPreservationAfter[NameOfWrappedPass].DILocations.insert({&I, HasLoc}); + DebugInfoAfterPass.DILocations.insert({&I, HasLoc}); } } } @@ -590,16 +605,16 @@ bool llvm::checkDebugInfoMetadata(Module &M, (cast(M.getNamedMetadata("llvm.dbg.cu")->getOperand(0))) ->getFilename(); - auto DIFunctionsBefore = DIPreservationMap[NameOfWrappedPass].DIFunctions; - auto DIFunctionsAfter = DIPreservationAfter[NameOfWrappedPass].DIFunctions; + auto DIFunctionsBefore = DebugInfoBeforePass.DIFunctions; + auto DIFunctionsAfter = DebugInfoAfterPass.DIFunctions; - auto DILocsBefore = DIPreservationMap[NameOfWrappedPass].DILocations; - auto DILocsAfter = DIPreservationAfter[NameOfWrappedPass].DILocations; + auto DILocsBefore = DebugInfoBeforePass.DILocations; + auto DILocsAfter = DebugInfoAfterPass.DILocations; - auto InstToDelete = DIPreservationMap[NameOfWrappedPass].InstToDelete; + auto InstToDelete = DebugInfoBeforePass.InstToDelete; - auto DIVarsBefore = DIPreservationMap[NameOfWrappedPass].DIVariables; - auto DIVarsAfter = DIPreservationAfter[NameOfWrappedPass].DIVariables; + auto DIVarsBefore = DebugInfoBeforePass.DIVariables; + auto DIVarsAfter = DebugInfoAfterPass.DIVariables; bool ShouldWriteIntoJSON = !OrigDIVerifyBugsReportFilePath.empty(); llvm::json::Array Bugs; @@ -626,6 +641,11 @@ bool llvm::checkDebugInfoMetadata(Module &M, else dbg() << ResultBanner << ": FAIL\n"; + // In the case of the `debugify-each`, no need to go over all the instructions + // again in the collectDebugInfoMetadata(), since as an input we can use + // the debugging information from the previous pass. + DebugInfoBeforePass = DebugInfoAfterPass; + LLVM_DEBUG(dbgs() << "\n\n"); return Result; } @@ -770,14 +790,14 @@ bool checkDebugifyMetadata(Module &M, /// legacy module pass manager. struct DebugifyModulePass : public ModulePass { bool runOnModule(Module &M) override { - return applyDebugify(M, Mode, DIPreservationMap, NameOfWrappedPass); + return applyDebugify(M, Mode, DebugInfoBeforePass, NameOfWrappedPass); } DebugifyModulePass(enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, StringRef NameOfWrappedPass = "", - DebugInfoPerPassMap *DIPreservationMap = nullptr) + DebugInfoPerPass *DebugInfoBeforePass = nullptr) : ModulePass(ID), NameOfWrappedPass(NameOfWrappedPass), - DIPreservationMap(DIPreservationMap), Mode(Mode) {} + DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -787,7 +807,7 @@ struct DebugifyModulePass : public ModulePass { private: StringRef NameOfWrappedPass; - DebugInfoPerPassMap *DIPreservationMap; + DebugInfoPerPass *DebugInfoBeforePass; enum DebugifyMode Mode; }; @@ -795,15 +815,15 @@ private: /// single function, used with the legacy module pass manager. struct DebugifyFunctionPass : public FunctionPass { bool runOnFunction(Function &F) override { - return applyDebugify(F, Mode, DIPreservationMap, NameOfWrappedPass); + return applyDebugify(F, Mode, DebugInfoBeforePass, NameOfWrappedPass); } DebugifyFunctionPass( enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, StringRef NameOfWrappedPass = "", - DebugInfoPerPassMap *DIPreservationMap = nullptr) + DebugInfoPerPass *DebugInfoBeforePass = nullptr) : FunctionPass(ID), NameOfWrappedPass(NameOfWrappedPass), - DIPreservationMap(DIPreservationMap), Mode(Mode) {} + DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -813,7 +833,7 @@ struct DebugifyFunctionPass : public FunctionPass { private: StringRef NameOfWrappedPass; - DebugInfoPerPassMap *DIPreservationMap; + DebugInfoPerPass *DebugInfoBeforePass; enum DebugifyMode Mode; }; @@ -825,7 +845,7 @@ struct CheckDebugifyModulePass : public ModulePass { return checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass, "CheckModuleDebugify", Strip, StatsMap); return checkDebugInfoMetadata( - M, M.functions(), *DIPreservationMap, + M, M.functions(), *DebugInfoBeforePass, "CheckModuleDebugify (original debuginfo)", NameOfWrappedPass, OrigDIVerifyBugsReportFilePath); } @@ -834,11 +854,11 @@ struct CheckDebugifyModulePass : public ModulePass { bool Strip = false, StringRef NameOfWrappedPass = "", DebugifyStatsMap *StatsMap = nullptr, enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, - DebugInfoPerPassMap *DIPreservationMap = nullptr, + DebugInfoPerPass *DebugInfoBeforePass = nullptr, StringRef OrigDIVerifyBugsReportFilePath = "") : ModulePass(ID), NameOfWrappedPass(NameOfWrappedPass), OrigDIVerifyBugsReportFilePath(OrigDIVerifyBugsReportFilePath), - StatsMap(StatsMap), DIPreservationMap(DIPreservationMap), Mode(Mode), + StatsMap(StatsMap), DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode), Strip(Strip) {} void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -851,7 +871,7 @@ private: StringRef NameOfWrappedPass; StringRef OrigDIVerifyBugsReportFilePath; DebugifyStatsMap *StatsMap; - DebugInfoPerPassMap *DIPreservationMap; + DebugInfoPerPass *DebugInfoBeforePass; enum DebugifyMode Mode; bool Strip; }; @@ -867,7 +887,7 @@ struct CheckDebugifyFunctionPass : public FunctionPass { NameOfWrappedPass, "CheckFunctionDebugify", Strip, StatsMap); return checkDebugInfoMetadata( - M, make_range(FuncIt, std::next(FuncIt)), *DIPreservationMap, + M, make_range(FuncIt, std::next(FuncIt)), *DebugInfoBeforePass, "CheckFunctionDebugify (original debuginfo)", NameOfWrappedPass, OrigDIVerifyBugsReportFilePath); } @@ -876,11 +896,11 @@ struct CheckDebugifyFunctionPass : public FunctionPass { bool Strip = false, StringRef NameOfWrappedPass = "", DebugifyStatsMap *StatsMap = nullptr, enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, - DebugInfoPerPassMap *DIPreservationMap = nullptr, + DebugInfoPerPass *DebugInfoBeforePass = nullptr, StringRef OrigDIVerifyBugsReportFilePath = "") : FunctionPass(ID), NameOfWrappedPass(NameOfWrappedPass), OrigDIVerifyBugsReportFilePath(OrigDIVerifyBugsReportFilePath), - StatsMap(StatsMap), DIPreservationMap(DIPreservationMap), Mode(Mode), + StatsMap(StatsMap), DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode), Strip(Strip) {} void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -893,7 +913,7 @@ private: StringRef NameOfWrappedPass; StringRef OrigDIVerifyBugsReportFilePath; DebugifyStatsMap *StatsMap; - DebugInfoPerPassMap *DIPreservationMap; + DebugInfoPerPass *DebugInfoBeforePass; enum DebugifyMode Mode; bool Strip; }; @@ -923,21 +943,21 @@ void llvm::exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map) { ModulePass *createDebugifyModulePass(enum DebugifyMode Mode, llvm::StringRef NameOfWrappedPass, - DebugInfoPerPassMap *DIPreservationMap) { + DebugInfoPerPass *DebugInfoBeforePass) { if (Mode == DebugifyMode::SyntheticDebugInfo) return new DebugifyModulePass(); assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode"); - return new DebugifyModulePass(Mode, NameOfWrappedPass, DIPreservationMap); + return new DebugifyModulePass(Mode, NameOfWrappedPass, DebugInfoBeforePass); } FunctionPass * createDebugifyFunctionPass(enum DebugifyMode Mode, llvm::StringRef NameOfWrappedPass, - DebugInfoPerPassMap *DIPreservationMap) { + DebugInfoPerPass *DebugInfoBeforePass) { if (Mode == DebugifyMode::SyntheticDebugInfo) return new DebugifyFunctionPass(); assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode"); - return new DebugifyFunctionPass(Mode, NameOfWrappedPass, DIPreservationMap); + return new DebugifyFunctionPass(Mode, NameOfWrappedPass, DebugInfoBeforePass); } PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) { @@ -948,25 +968,25 @@ PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) { ModulePass *createCheckDebugifyModulePass( bool Strip, StringRef NameOfWrappedPass, DebugifyStatsMap *StatsMap, - enum DebugifyMode Mode, DebugInfoPerPassMap *DIPreservationMap, + enum DebugifyMode Mode, DebugInfoPerPass *DebugInfoBeforePass, StringRef OrigDIVerifyBugsReportFilePath) { if (Mode == DebugifyMode::SyntheticDebugInfo) return new CheckDebugifyModulePass(Strip, NameOfWrappedPass, StatsMap); assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode"); return new CheckDebugifyModulePass(false, NameOfWrappedPass, nullptr, Mode, - DIPreservationMap, + DebugInfoBeforePass, OrigDIVerifyBugsReportFilePath); } FunctionPass *createCheckDebugifyFunctionPass( bool Strip, StringRef NameOfWrappedPass, DebugifyStatsMap *StatsMap, - enum DebugifyMode Mode, DebugInfoPerPassMap *DIPreservationMap, + enum DebugifyMode Mode, DebugInfoPerPass *DebugInfoBeforePass, StringRef OrigDIVerifyBugsReportFilePath) { if (Mode == DebugifyMode::SyntheticDebugInfo) return new CheckDebugifyFunctionPass(Strip, NameOfWrappedPass, StatsMap); assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode"); return new CheckDebugifyFunctionPass(false, NameOfWrappedPass, nullptr, Mode, - DIPreservationMap, + DebugInfoBeforePass, OrigDIVerifyBugsReportFilePath); } diff --git a/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp index 5f53d794fe8a..f6f80540ad95 100644 --- a/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -8,11 +8,10 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; /// DemoteRegToStack - This function takes a virtual register computed by an diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp index e73287c060ae..7b8d8553bac2 100644 --- a/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -29,7 +29,6 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" @@ -37,7 +36,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include #define DEBUG_TYPE "evaluator" @@ -219,10 +217,13 @@ Constant *Evaluator::ComputeLoadResult(Constant *P, Type *Ty) { P = cast(P->stripAndAccumulateConstantOffsets( DL, Offset, /* AllowNonInbounds */ true)); Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(P->getType())); - auto *GV = dyn_cast(P); - if (!GV) - return nullptr; + if (auto *GV = dyn_cast(P)) + return ComputeLoadResult(GV, Ty, Offset); + return nullptr; +} +Constant *Evaluator::ComputeLoadResult(GlobalVariable *GV, Type *Ty, + const APInt &Offset) { auto It = MutatedMemory.find(GV); if (It != MutatedMemory.end()) return It->second.read(Ty, Offset, DL); @@ -335,50 +336,6 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, auto Res = MutatedMemory.try_emplace(GV, GV->getInitializer()); if (!Res.first->second.write(Val, Offset, DL)) return false; - } else if (BinaryOperator *BO = dyn_cast(CurInst)) { - InstResult = ConstantExpr::get(BO->getOpcode(), - getVal(BO->getOperand(0)), - getVal(BO->getOperand(1))); - LLVM_DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " - << *InstResult << "\n"); - } else if (CmpInst *CI = dyn_cast(CurInst)) { - InstResult = ConstantExpr::getCompare(CI->getPredicate(), - getVal(CI->getOperand(0)), - getVal(CI->getOperand(1))); - LLVM_DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult - << "\n"); - } else if (CastInst *CI = dyn_cast(CurInst)) { - InstResult = ConstantExpr::getCast(CI->getOpcode(), - getVal(CI->getOperand(0)), - CI->getType()); - LLVM_DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult - << "\n"); - } else if (SelectInst *SI = dyn_cast(CurInst)) { - InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)), - getVal(SI->getOperand(1)), - getVal(SI->getOperand(2))); - LLVM_DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult - << "\n"); - } else if (auto *EVI = dyn_cast(CurInst)) { - InstResult = ConstantExpr::getExtractValue( - getVal(EVI->getAggregateOperand()), EVI->getIndices()); - LLVM_DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " - << *InstResult << "\n"); - } else if (auto *IVI = dyn_cast(CurInst)) { - InstResult = ConstantExpr::getInsertValue( - getVal(IVI->getAggregateOperand()), - getVal(IVI->getInsertedValueOperand()), IVI->getIndices()); - LLVM_DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " - << *InstResult << "\n"); - } else if (GetElementPtrInst *GEP = dyn_cast(CurInst)) { - Constant *P = getVal(GEP->getOperand(0)); - SmallVector GEPOps; - for (Use &Op : llvm::drop_begin(GEP->operands())) - GEPOps.push_back(getVal(Op)); - InstResult = - ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps, - cast(GEP)->isInBounds()); - LLVM_DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult << "\n"); } else if (LoadInst *LI = dyn_cast(CurInst)) { if (!LI->isSimple()) { LLVM_DEBUG( @@ -438,16 +395,39 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, << "intrinsic.\n"); return false; } + + auto *LenC = dyn_cast(getVal(MSI->getLength())); + if (!LenC) { + LLVM_DEBUG(dbgs() << "Memset with unknown length.\n"); + return false; + } + Constant *Ptr = getVal(MSI->getDest()); + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = cast(Ptr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)); + auto *GV = dyn_cast(Ptr); + if (!GV) { + LLVM_DEBUG(dbgs() << "Memset with unknown base.\n"); + return false; + } + Constant *Val = getVal(MSI->getValue()); - Constant *DestVal = - ComputeLoadResult(getVal(Ptr), MSI->getValue()->getType()); - if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { - // This memset is a no-op. - LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n"); - ++CurInst; - continue; + APInt Len = LenC->getValue(); + while (Len != 0) { + Constant *DestVal = ComputeLoadResult(GV, Val->getType(), Offset); + if (DestVal != Val) { + LLVM_DEBUG(dbgs() << "Memset is not a no-op at offset " + << Offset << " of " << *GV << ".\n"); + return false; + } + ++Offset; + --Len; } + + LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n"); + ++CurInst; + continue; } if (II->isLifetimeStartOrEnd()) { @@ -602,11 +582,16 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, LLVM_DEBUG(dbgs() << "Successfully evaluated block.\n"); return true; } else { - // Did not know how to evaluate this! - LLVM_DEBUG( - dbgs() << "Failed to evaluate block due to unhandled instruction." - "\n"); - return false; + SmallVector Ops; + for (Value *Op : CurInst->operands()) + Ops.push_back(getVal(Op)); + InstResult = ConstantFoldInstOperands(&*CurInst, Ops, DL, TLI); + if (!InstResult) { + LLVM_DEBUG(dbgs() << "Cannot fold instruction: " << *CurInst << "\n"); + return false; + } + LLVM_DEBUG(dbgs() << "Folded instruction " << *CurInst << " to " + << *InstResult << "\n"); } if (!CurInst->use_empty()) { @@ -631,6 +616,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, /// function. bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, const SmallVectorImpl &ActualArgs) { + assert(ActualArgs.size() == F->arg_size() && "wrong number of arguments"); + // Check to see if this function is already executing (recursion). If so, // bail out. TODO: we might want to accept limited recursion. if (is_contained(CallStack, F)) diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp index 8de3ce876bab..24539bd231c6 100644 --- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp +++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp @@ -68,6 +68,7 @@ #include "llvm/Transforms/Utils/FixIrreducible.h" #include "llvm/ADT/SCCIterator.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -137,10 +138,18 @@ static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop, // not be necessary if we can retain such backedges. if (Headers.count(Child->getHeader())) { for (auto BB : Child->blocks()) { + if (LI.getLoopFor(BB) != Child) + continue; LI.changeLoopFor(BB, NewLoop); LLVM_DEBUG(dbgs() << "moved block from child: " << BB->getName() << "\n"); } + std::vector GrandChildLoops; + std::swap(GrandChildLoops, Child->getSubLoopsVector()); + for (auto GrandChildLoop : GrandChildLoops) { + GrandChildLoop->setParentLoop(nullptr); + NewLoop->addChildLoop(GrandChildLoop); + } LI.destroy(Child); LLVM_DEBUG(dbgs() << "subsumed child loop (common header)\n"); continue; diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 2946c0018c31..193806d9cc87 100644 --- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -12,8 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/FunctionImportUtils.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/InstIterator.h" using namespace llvm; /// Checks if we should import SGV as a definition, otherwise import as a diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp index c1c5f5cc879f..c5aded3c45f4 100644 --- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -38,22 +38,26 @@ static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) { } /// It is safe to destroy a constant iff it is only used by constants itself. -/// Note that constants cannot be cyclic, so this test is pretty easy to -/// implement recursively. -/// +/// Note that while constants cannot be cyclic, they can be tree-like, so we +/// should keep a visited set to avoid exponential runtime. bool llvm::isSafeToDestroyConstant(const Constant *C) { - if (isa(C)) - return false; - - if (isa(C)) - return false; + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(C); + while (!Worklist.empty()) { + const Constant *C = Worklist.pop_back_val(); + if (!Visited.insert(C).second) + continue; + if (isa(C) || isa(C)) + return false; - for (const User *U : C->users()) - if (const Constant *CU = dyn_cast(U)) { - if (!isSafeToDestroyConstant(CU)) + for (const User *U : C->users()) { + if (const Constant *CU = dyn_cast(U)) + Worklist.push_back(CU); + else return false; - } else - return false; + } + } return true; } @@ -100,6 +104,8 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, if (SI->isVolatile()) return true; + ++GS.NumStores; + GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering()); // If this is a direct store to the global (i.e., the global is a scalar diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp index 047bf5569ded..55bcb6f3b121 100644 --- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp +++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 923bcc781e47..2fb00f95b749 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -37,7 +37,6 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -85,7 +84,7 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), static cl::opt UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden, - cl::ZeroOrMore, cl::init(true), + cl::init(true), cl::desc("Use the llvm.experimental.noalias.scope.decl " "intrinsic during inlining.")); @@ -1044,12 +1043,10 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, } for (Value *Arg : Call->args()) { - // We need to check the underlying objects of all arguments, not just - // the pointer arguments, because we might be passing pointers as - // integers, etc. - // However, if we know that the call only accesses pointer arguments, - // then we only need to check the pointer arguments. - if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy()) + // Only care about pointer arguments. If a noalias argument is + // accessed through a non-pointer argument, it must be captured + // first (e.g. via ptrtoint), and we protect against captures below. + if (!Arg->getType()->isPointerTy()) continue; PtrArgs.push_back(Arg); @@ -1080,7 +1077,8 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, // Figure out if we're derived from anything that is not a noalias // argument. - bool CanDeriveViaCapture = false, UsesAliasingPtr = false; + bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false, + UsesUnknownObject = false; for (const Value *V : ObjSet) { // Is this value a constant that cannot be derived from any pointer // value (we need to exclude constant expressions, for example, that @@ -1101,19 +1099,28 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, UsesAliasingPtr = true; } - // If this is not some identified function-local object (which cannot - // directly alias a noalias argument), or some other argument (which, - // by definition, also cannot alias a noalias argument), then we could - // alias a noalias argument that has been captured). - if (!isa(V) && - !isIdentifiedFunctionLocal(const_cast(V))) - CanDeriveViaCapture = true; + if (isEscapeSource(V)) { + // An escape source can only alias with a noalias argument if it has + // been captured beforehand. + RequiresNoCaptureBefore = true; + } else if (!isa(V) && !isIdentifiedObject(V)) { + // If this is neither an escape source, nor some identified object + // (which cannot directly alias a noalias argument), nor some other + // argument (which, by definition, also cannot alias a noalias + // argument), conservatively do not make any assumptions. + UsesUnknownObject = true; + } } + // Nothing we can do if the used underlying object cannot be reliably + // determined. + if (UsesUnknownObject) + continue; + // A function call can always get captured noalias pointers (via other // parameters, globals, etc.). if (IsFuncCall && !IsArgMemOnlyCall) - CanDeriveViaCapture = true; + RequiresNoCaptureBefore = true; // First, we want to figure out all of the sets with which we definitely // don't alias. Iterate over all noalias set, and add those for which: @@ -1124,16 +1131,16 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, // noalias arguments via other noalias arguments or globals, and so we // must always check for prior capture. for (const Argument *A : NoAliasArgs) { - if (!ObjSet.count(A) && (!CanDeriveViaCapture || - // It might be tempting to skip the - // PointerMayBeCapturedBefore check if - // A->hasNoCaptureAttr() is true, but this is - // incorrect because nocapture only guarantees - // that no copies outlive the function, not - // that the value cannot be locally captured. - !PointerMayBeCapturedBefore(A, - /* ReturnCaptures */ false, - /* StoreCaptures */ false, I, &DT))) + if (ObjSet.contains(A)) + continue; // May be based on a noalias argument. + + // It might be tempting to skip the PointerMayBeCapturedBefore check if + // A->hasNoCaptureAttr() is true, but this is incorrect because + // nocapture only guarantees that no copies outlive the function, not + // that the value cannot be locally captured. + if (!RequiresNoCaptureBefore || + !PointerMayBeCapturedBefore(A, /* ReturnCaptures */ false, + /* StoreCaptures */ false, I, &DT)) NoAliases.push_back(NewScopes[A]); } @@ -1422,7 +1429,8 @@ static Value *HandleByValArgument(Type *ByValType, Value *Arg, // If the byval had an alignment specified, we *must* use at least that // alignment, as it is required by the byval argument (and uses of the // pointer inside the callee). - Alignment = max(Alignment, MaybeAlign(ByValAlignment)); + if (ByValAlignment > 0) + Alignment = std::max(Alignment, Align(ByValAlignment)); Value *NewAlloca = new AllocaInst(ByValType, DL.getAllocaAddrSpace(), nullptr, Alignment, @@ -1601,7 +1609,7 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, return; auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; int64_t CallCount = - std::min(CallSiteCount.getValueOr(0), CalleeEntryCount.getCount()); + std::min(CallSiteCount.value_or(0), CalleeEntryCount.getCount()); updateProfileCallee(Callee, -CallCount, &VMap); } @@ -1609,7 +1617,7 @@ void llvm::updateProfileCallee( Function *Callee, int64_t EntryDelta, const ValueMap *VMap) { auto CalleeCount = Callee->getEntryCount(); - if (!CalleeCount.hasValue()) + if (!CalleeCount) return; const uint64_t PriorEntryCount = CalleeCount->getCount(); @@ -1789,6 +1797,13 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, BasicBlock *OrigBB = CB.getParent(); Function *Caller = OrigBB->getParent(); + // Do not inline strictfp function into non-strictfp one. It would require + // conversion of all FP operations in host function to constrained intrinsics. + if (CalledFunc->getAttributes().hasFnAttr(Attribute::StrictFP) && + !Caller->getAttributes().hasFnAttr(Attribute::StrictFP)) { + return InlineResult::failure("incompatible strictfp attributes"); + } + // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. @@ -2644,7 +2659,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AssumptionCache *AC = IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; auto &DL = Caller->getParent()->getDataLayout(); - if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) { + if (Value *V = simplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } diff --git a/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/llvm/lib/Transforms/Utils/IntegerDivision.cpp index 9082049c82da..47ab30f03d14 100644 --- a/llvm/lib/Transforms/Utils/IntegerDivision.cpp +++ b/llvm/lib/Transforms/Utils/IntegerDivision.cpp @@ -18,7 +18,6 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include using namespace llvm; diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp index 72b864dc3e48..84d377d835f3 100644 --- a/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -33,14 +33,13 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" diff --git a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index 6958a89f5be6..6e87da9fb168 100644 --- a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -30,14 +30,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 9a10535c9310..b203259db1c6 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -29,7 +29,6 @@ #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -63,9 +62,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/IR/PseudoProbe.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" @@ -80,7 +77,6 @@ #include "llvm/Transforms/Utils/ValueMapper.h" #include #include -#include #include #include #include @@ -489,7 +485,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, if (auto *FPI = dyn_cast(I)) { Optional ExBehavior = FPI->getExceptionBehavior(); - return ExBehavior.getValue() != fp::ebStrict; + return *ExBehavior != fp::ebStrict; } } @@ -504,15 +500,12 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, if (isMathLibCallNoop(Call, TLI)) return true; - // To express possible interaction with floating point environment constrained - // intrinsics are described as if they access memory. So they look like having - // side effect but actually do not have it unless they raise floating point - // exception. If FP exceptions are ignored, the intrinsic may be deleted. - if (auto *CI = dyn_cast(I)) { - Optional EB = CI->getExceptionBehavior(); - if (!EB || *EB == fp::ExceptionBehavior::ebIgnore) - return true; - } + // Non-volatile atomic loads from constants can be removed. + if (auto *LI = dyn_cast(I)) + if (auto *GV = dyn_cast( + LI->getPointerOperand()->stripPointerCasts())) + if (!LI->isVolatile() && GV->isConstant()) + return true; return false; } @@ -682,7 +675,7 @@ simplifyAndDCEInstruction(Instruction *I, return true; } - if (Value *SimpleV = SimplifyInstruction(I, DL)) { + if (Value *SimpleV = simplifyInstruction(I, DL)) { // Add the users to the worklist. CAREFUL: an instruction can use itself, // in the case of a phi node. for (User *U : I->users()) { @@ -1133,7 +1126,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, // If there is more than one pred of succ, and there are PHI nodes in // the successor, then we need to add incoming edges for the PHI nodes // - const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB)); + const PredBlockVector BBPreds(predecessors(BB)); // Loop over all of the PHI nodes in the successor of BB. for (BasicBlock::iterator I = Succ->begin(); isa(I); ++I) { @@ -1393,7 +1386,7 @@ Align llvm::getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, static bool PhiHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, PHINode *APN) { - // Since we can't guarantee that the original dbg.declare instrinsic + // Since we can't guarantee that the original dbg.declare intrinsic // is removed by LowerDbgDeclare(), we need to make sure that we are // not inserting the same dbg.value intrinsic over and over. SmallVector DbgValues; @@ -1472,7 +1465,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " << *DII << '\n'); // For now, when there is a store to parts of the variable (but we do not - // know which part) we insert an dbg.value instrinsic to indicate that we + // know which part) we insert an dbg.value intrinsic to indicate that we // know nothing about the variable's content. DV = UndefValue::get(DV->getType()); Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI); @@ -2240,6 +2233,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, II->setDebugLoc(CI->getDebugLoc()); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); + II->setMetadata(LLVMContext::MD_prof, CI->getMetadata(LLVMContext::MD_prof)); if (DTU) DTU->applyUpdates({{DominatorTree::Insert, BB, UnwindEdge}}); @@ -2349,19 +2343,42 @@ static bool markAliveBlocks(Function &F, isa(Callee)) { changeToUnreachable(II, false, DTU); Changed = true; - } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) { - if (II->use_empty() && II->onlyReadsMemory()) { - // jump to the normal destination branch. - BasicBlock *NormalDestBB = II->getNormalDest(); - BasicBlock *UnwindDestBB = II->getUnwindDest(); - BranchInst::Create(NormalDestBB, II); - UnwindDestBB->removePredecessor(II->getParent()); - II->eraseFromParent(); + } else { + if (II->doesNotReturn() && + !isa(II->getNormalDest()->front())) { + // If we found an invoke of a no-return function, + // create a new empty basic block with an `unreachable` terminator, + // and set it as the normal destination for the invoke, + // unless that is already the case. + // Note that the original normal destination could have other uses. + BasicBlock *OrigNormalDest = II->getNormalDest(); + OrigNormalDest->removePredecessor(II->getParent()); + LLVMContext &Ctx = II->getContext(); + BasicBlock *UnreachableNormalDest = BasicBlock::Create( + Ctx, OrigNormalDest->getName() + ".unreachable", + II->getFunction(), OrigNormalDest); + new UnreachableInst(Ctx, UnreachableNormalDest); + II->setNormalDest(UnreachableNormalDest); if (DTU) - DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}}); - } else - changeToCall(II, DTU); - Changed = true; + DTU->applyUpdates( + {{DominatorTree::Delete, BB, OrigNormalDest}, + {DominatorTree::Insert, BB, UnreachableNormalDest}}); + Changed = true; + } + if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) { + if (II->use_empty() && !II->mayHaveSideEffects()) { + // jump to the normal destination branch. + BasicBlock *NormalDestBB = II->getNormalDest(); + BasicBlock *UnwindDestBB = II->getUnwindDest(); + BranchInst::Create(NormalDestBB, II); + UnwindDestBB->removePredecessor(II->getParent()); + II->eraseFromParent(); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}}); + } else + changeToCall(II, DTU); + Changed = true; + } } } else if (auto *CatchSwitch = dyn_cast(Terminator)) { // Remove catchpads which cannot be reached. diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index 5b66da1e7082..f093fea19c4d 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -28,7 +28,6 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -38,12 +37,10 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include #include -#include using namespace llvm; using namespace llvm::PatternMatch; @@ -389,6 +386,10 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (!PP.AllowPeeling) return; + // Check that we can peel at least one iteration. + if (2 * LoopSize > Threshold) + return; + unsigned AlreadyPeeled = 0; if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) AlreadyPeeled = *Peeled; @@ -401,47 +402,45 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, // which every Phi is guaranteed to become an invariant, and try to peel the // maximum number of iterations among these values, thus turning all those // Phis into invariants. - // First, check that we can peel at least one iteration. - if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) { - // Store the pre-calculated values here. - SmallDenseMap > IterationsToInvariance; - // Now go through all Phis to calculate their the number of iterations they - // need to become invariants. - // Start the max computation with the PP.PeelCount value set by the target - // in TTI.getPeelingPreferences or by the flag -unroll-peel-count. - unsigned DesiredPeelCount = TargetPeelCount; - BasicBlock *BackEdge = L->getLoopLatch(); - assert(BackEdge && "Loop is not in simplified form?"); - for (auto BI = L->getHeader()->begin(); isa(&*BI); ++BI) { - PHINode *Phi = cast(&*BI); - auto ToInvariance = calculateIterationsToInvariance( - Phi, L, BackEdge, IterationsToInvariance); - if (ToInvariance) - DesiredPeelCount = std::max(DesiredPeelCount, *ToInvariance); - } - // Pay respect to limitations implied by loop size and the max peel count. - unsigned MaxPeelCount = UnrollPeelMaxCount; - MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1); - - DesiredPeelCount = std::max(DesiredPeelCount, - countToEliminateCompares(*L, MaxPeelCount, SE)); - - if (DesiredPeelCount == 0) - DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT); - - if (DesiredPeelCount > 0) { - DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); - // Consider max peel count limitation. - assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); - if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) { - LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount - << " iteration(s) to turn" - << " some Phis into invariants.\n"); - PP.PeelCount = DesiredPeelCount; - PP.PeelProfiledIterations = false; - return; - } + // Store the pre-calculated values here. + SmallDenseMap> IterationsToInvariance; + // Now go through all Phis to calculate their the number of iterations they + // need to become invariants. + // Start the max computation with the PP.PeelCount value set by the target + // in TTI.getPeelingPreferences or by the flag -unroll-peel-count. + unsigned DesiredPeelCount = TargetPeelCount; + BasicBlock *BackEdge = L->getLoopLatch(); + assert(BackEdge && "Loop is not in simplified form?"); + for (auto BI = L->getHeader()->begin(); isa(&*BI); ++BI) { + PHINode *Phi = cast(&*BI); + auto ToInvariance = calculateIterationsToInvariance(Phi, L, BackEdge, + IterationsToInvariance); + if (ToInvariance) + DesiredPeelCount = std::max(DesiredPeelCount, *ToInvariance); + } + + // Pay respect to limitations implied by loop size and the max peel count. + unsigned MaxPeelCount = UnrollPeelMaxCount; + MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1); + + DesiredPeelCount = std::max(DesiredPeelCount, + countToEliminateCompares(*L, MaxPeelCount, SE)); + + if (DesiredPeelCount == 0) + DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT); + + if (DesiredPeelCount > 0) { + DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); + // Consider max peel count limitation. + assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); + if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) { + LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount + << " iteration(s) to turn" + << " some Phis into invariants.\n"); + PP.PeelCount = DesiredPeelCount; + PP.PeelProfiledIterations = false; + return; } } @@ -461,27 +460,26 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (L->getHeader()->getParent()->hasProfileData()) { if (violatesLegacyMultiExitLoopCheck(L)) return; - Optional PeelCount = getLoopEstimatedTripCount(L); - if (!PeelCount) + Optional EstimatedTripCount = getLoopEstimatedTripCount(L); + if (!EstimatedTripCount) return; - LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount - << "\n"); + LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " + << *EstimatedTripCount << "\n"); - if (*PeelCount) { - if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) && - (LoopSize * (*PeelCount + 1) <= Threshold)) { - LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount - << " iterations.\n"); - PP.PeelCount = *PeelCount; + if (*EstimatedTripCount) { + if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) { + unsigned PeelCount = *EstimatedTripCount; + LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n"); + PP.PeelCount = PeelCount; return; } - LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n"); LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); - LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) - << "\n"); + LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n"); LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n"); + LLVM_DEBUG(dbgs() << "Max peel count by cost: " + << (Threshold / LoopSize - 1) << "\n"); } } } @@ -579,7 +577,8 @@ static void cloneLoopBlocks( SmallVectorImpl> &ExitEdges, SmallVectorImpl &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, - LoopInfo *LI, ArrayRef LoopLocalNoAliasDeclScopes) { + LoopInfo *LI, ArrayRef LoopLocalNoAliasDeclScopes, + ScalarEvolution &SE) { BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); BasicBlock *PreHeader = L->getLoopPreheader(); @@ -685,6 +684,7 @@ static void cloneLoopBlocks( if (LatchInst && L->contains(LatchInst)) LatchVal = VMap[LatchVal]; PHI.addIncoming(LatchVal, cast(VMap[Edge.first])); + SE.forgetValue(&PHI); } // LastValueMap is updated with the values for the current loop @@ -719,9 +719,9 @@ TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences( } // User specifed values provided by argument. - if (UserAllowPeeling.hasValue()) + if (UserAllowPeeling) PP.AllowPeeling = *UserAllowPeeling; - if (UserAllowProfileBasedPeeling.hasValue()) + if (UserAllowProfileBasedPeeling) PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; return PP; @@ -851,7 +851,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI, - LoopLocalNoAliasDeclScopes); + LoopLocalNoAliasDeclScopes, *SE); // Remap to use values from the current iteration instead of the // previous one. @@ -907,8 +907,10 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // We modified the loop, update SE. SE->forgetTopmostLoop(L); +#ifdef EXPENSIVE_CHECKS // Finally DomtTree must be correct. assert(DT.verify(DominatorTree::VerificationLevel::Fast)); +#endif // FIXME: Incrementally update loop-simplify simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA); diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index c66fd7bb0588..0f33559c7e70 100644 --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -13,31 +13,24 @@ #include "llvm/Transforms/Utils/LoopRotationUtils.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -317,7 +310,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { L->dump()); return Rotated; } - if (Metrics.NumInsts > MaxHeaderSize) { + if (!Metrics.NumInsts.isValid()) { + LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains instructions" + " with invalid cost: "; + L->dump()); + return Rotated; + } + if (*Metrics.NumInsts.getValue() > MaxHeaderSize) { LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains " << Metrics.NumInsts << " instructions, which is more than the threshold (" @@ -446,7 +445,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // With the operands remapped, see if the instruction constant folds or is // otherwise simplifyable. This commonly occurs because the entry from PHI // nodes allows icmps and other instructions to fold. - Value *V = SimplifyInstruction(C, SQ); + Value *V = simplifyInstruction(C, SQ); if (V && LI->replacementPreservesLCSSAForm(C, V)) { // If so, then delete the temporary instruction and stick the folded value // in the map. diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 67311ab4cd02..55d5c733733b 100644 --- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -40,8 +40,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LoopSimplify.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -59,14 +57,11 @@ #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -181,7 +176,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ) { PHINode *PN = cast(I); ++I; - if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { + if (Value *V = simplifyInstruction(PN, {DL, nullptr, DT, AC})) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); PN->eraseFromParent(); @@ -602,7 +597,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast(I++)); ) - if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { + if (Value *V = simplifyInstruction(PN, {DL, nullptr, DT, AC})) { if (SE) SE->forgetValue(PN); if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) { PN->replaceAllUsesWith(V); diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 9ca1f4f44b97..1be1082002fc 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -236,7 +236,7 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, SmallVector DeadInsts; for (BasicBlock *BB : L->getBlocks()) { for (Instruction &Inst : llvm::make_early_inc_range(*BB)) { - if (Value *V = SimplifyInstruction(&Inst, {DL, nullptr, DT, AC})) + if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC})) if (LI->replacementPreservesLCSSAForm(&Inst, V)) Inst.replaceAllUsesWith(V); if (isInstructionTriviallyDead(&Inst)) @@ -513,7 +513,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (const DILocation *DIL = I.getDebugLoc()) { auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count); if (NewDIL) - I.setDebugLoc(NewDIL.getValue()); + I.setDebugLoc(*NewDIL); else LLVM_DEBUG(dbgs() << "Failed to create new discriminator: " diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp index 6efaa012aeca..96485d15c75b 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -39,7 +38,6 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" @@ -358,7 +356,7 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, if (const DILocation *DIL = I.getDebugLoc()) { auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count); if (NewDIL) - I.setDebugLoc(NewDIL.getValue()); + I.setDebugLoc(*NewDIL); else LLVM_DEBUG(dbgs() << "Failed to create new discriminator: " diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index bb719a499a4c..cd3b6c1a095a 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -20,20 +20,19 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -74,7 +73,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, - LoopInfo *LI, bool PreserveLCSSA) { + LoopInfo *LI, bool PreserveLCSSA, + ScalarEvolution &SE) { // Loop structure should be the following: // Preheader // PrologHeader @@ -134,6 +134,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, PN.setIncomingValueForBlock(NewPreHeader, NewPN); else PN.addIncoming(NewPN, PrologExit); + SE.forgetValue(&PN); } } @@ -192,7 +193,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, - LoopInfo *LI, bool PreserveLCSSA) { + LoopInfo *LI, bool PreserveLCSSA, + ScalarEvolution &SE) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *EpilogLatch = cast(VMap[Latch]); @@ -233,6 +235,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, // Add incoming PreHeader from branch around the Loop PN.addIncoming(UndefValue::get(PN.getType()), PreHeader); + SE.forgetValue(&PN); Value *V = PN.getIncomingValueForBlock(Latch); Instruction *I = dyn_cast(V); @@ -398,7 +401,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, Optional NewLoopID = makeFollowupLoopID( LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); - if (NewLoopID.hasValue()) { + if (NewLoopID) { NewLoop->setLoopID(NewLoopID.getValue()); // Do not setLoopAlreadyUnrolled if loop attributes have been defined @@ -739,11 +742,28 @@ bool llvm::UnrollRuntimeLoopRemainder( // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % loop unroll factor PreHeaderBR = cast(PreHeader->getTerminator()); + IRBuilder<> B(PreHeaderBR); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); - Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), - PreHeaderBR); - IRBuilder<> B(PreHeaderBR); + Value *BECount; + // If there are other exits before the latch, that may cause the latch exit + // branch to never be executed, and the latch exit count may be poison. + // In this case, freeze the TripCount and base BECount on the frozen + // TripCount. We will introduce two branches using these values, and it's + // important that they see a consistent value (which would not be guaranteed + // if were frozen independently.) + if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) && + !isGuaranteedNotToBeUndefOrPoison(TripCount, AC, PreHeaderBR, DT)) { + TripCount = B.CreateFreeze(TripCount); + BECount = + B.CreateAdd(TripCount, ConstantInt::get(TripCount->getType(), -1)); + } else { + // If we don't need to freeze, use SCEVExpander for BECount as well, to + // allow slightly better value reuse. + BECount = + Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); + } + Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count); Value *BranchVal = @@ -884,9 +904,8 @@ bool llvm::UnrollRuntimeLoopRemainder( if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. - ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, - EpilogPreHeader, NewPreHeader, VMap, DT, LI, - PreserveLCSSA); + ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, + NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE); // Update counter in loop for unrolling. // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count. @@ -910,7 +929,7 @@ bool llvm::UnrollRuntimeLoopRemainder( // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, - NewPreHeader, VMap, DT, LI, PreserveLCSSA); + NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE); } // If this loop is nested, then the loop unroller changes the code in the any @@ -941,7 +960,7 @@ bool llvm::UnrollRuntimeLoopRemainder( SmallVector DeadInsts; for (BasicBlock *BB : RemainderBlocks) { for (Instruction &Inst : llvm::make_early_inc_range(*BB)) { - if (Value *V = SimplifyInstruction(&Inst, {DL, nullptr, DT, AC})) + if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC})) if (LI->replacementPreservesLCSSAForm(&Inst, V)) Inst.replaceAllUsesWith(V); if (isInstructionTriviallyDead(&Inst)) diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 95db2fe8d310..ec898c463574 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -23,31 +23,25 @@ #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstSimplifyFolder.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" -#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" @@ -260,10 +254,10 @@ llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) { Optional Width = getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width"); - if (Width.hasValue()) { + if (Width) { Optional IsScalable = getOptionalIntLoopAttribute( TheLoop, "llvm.loop.vectorize.scalable.enable"); - return ElementCount::get(*Width, IsScalable.getValueOr(false)); + return ElementCount::get(*Width, IsScalable.value_or(false)); } return None; @@ -364,7 +358,7 @@ TransformationMode llvm::hasUnrollTransformation(const Loop *L) { Optional Count = getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count"); - if (Count.hasValue()) + if (Count) return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser; if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")) @@ -385,7 +379,7 @@ TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) { Optional Count = getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count"); - if (Count.hasValue()) + if (Count) return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser; if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable")) @@ -497,9 +491,11 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, if (SE) SE->forgetLoop(L); - auto *OldBr = dyn_cast(Preheader->getTerminator()); - assert(OldBr && "Preheader must end with a branch"); - assert(OldBr->isUnconditional() && "Preheader must have a single successor"); + Instruction *OldTerm = Preheader->getTerminator(); + assert(!OldTerm->mayHaveSideEffects() && + "Preheader must end with a side-effect-free terminator"); + assert(OldTerm->getNumSuccessors() == 1 && + "Preheader must have a single successor"); // Connect the preheader to the exit block. Keep the old edge to the header // around to perform the dominator tree update in two separate steps // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge @@ -525,7 +521,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, // coming to this inner loop, this will break the outer loop structure (by // deleting the backedge of the outer loop). If the outer loop is indeed a // non-loop, it will be deleted in a future iteration of loop deletion pass. - IRBuilder<> Builder(OldBr); + IRBuilder<> Builder(OldTerm); auto *ExitBlock = L->getUniqueExitBlock(); DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); @@ -535,7 +531,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); // Remove the old branch. The conditional branch becomes a new terminator. - OldBr->eraseFromParent(); + OldTerm->eraseFromParent(); // Rewrite phis in the exit block to get their inputs from the Preheader // instead of the exiting block. @@ -579,7 +575,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, assert(L->hasNoExitBlocks() && "Loop should have either zero or one exit blocks."); - Builder.SetInsertPoint(OldBr); + Builder.SetInsertPoint(OldTerm); Builder.CreateUnreachable(); Preheader->getTerminator()->eraseFromParent(); } @@ -692,18 +688,12 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, } } -static Loop *getOutermostLoop(Loop *L) { - while (Loop *Parent = L->getParentLoop()) - L = Parent; - return L; -} - void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA) { auto *Latch = L->getLoopLatch(); assert(Latch && "multiple latches not yet supported"); auto *Header = L->getHeader(); - Loop *OutermostLoop = getOutermostLoop(L); + Loop *OutermostLoop = L->getOutermostLoop(); SE.forgetLoop(L); @@ -1103,7 +1093,8 @@ Value *llvm::createOrderedReduction(IRBuilderBase &B, return B.CreateFAddReduce(Start, Src); } -void llvm::propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue) { +void llvm::propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue, + bool IncludeWrapFlags) { auto *VecOp = dyn_cast(I); if (!VecOp) return; @@ -1112,7 +1103,7 @@ void llvm::propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue) { if (!Intersection) return; const unsigned Opcode = Intersection->getOpcode(); - VecOp->copyIRFlags(Intersection); + VecOp->copyIRFlags(Intersection, IncludeWrapFlags); for (auto *V : VL) { auto *Instr = dyn_cast(V); if (!Instr) @@ -1536,6 +1527,11 @@ static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); + if (CG->NeedsFreeze) { + IRBuilder<> Builder(Loc); + Start = Builder.CreateFreeze(Start, Start->getName() + ".fr"); + End = Builder.CreateFreeze(End, End->getName() + ".fr"); + } LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n"); return {Start, End}; } @@ -1614,6 +1610,45 @@ Value *llvm::addRuntimeChecks( return MemoryRuntimeCheck; } +Value *llvm::addDiffRuntimeChecks( + Instruction *Loc, Loop *TheLoop, ArrayRef Checks, + SCEVExpander &Expander, + function_ref GetVF, unsigned IC) { + + LLVMContext &Ctx = Loc->getContext(); + IRBuilder ChkBuilder(Ctx, + Loc->getModule()->getDataLayout()); + ChkBuilder.SetInsertPoint(Loc); + // Our instructions might fold to a constant. + Value *MemoryRuntimeCheck = nullptr; + + for (auto &C : Checks) { + Type *Ty = C.SinkStart->getType(); + // Compute VF * IC * AccessSize. + auto *VFTimesUFTimesSize = + ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()), + ConstantInt::get(Ty, IC * C.AccessSize)); + Value *Sink = Expander.expandCodeFor(C.SinkStart, Ty, Loc); + Value *Src = Expander.expandCodeFor(C.SrcStart, Ty, Loc); + if (C.NeedsFreeze) { + IRBuilder<> Builder(Loc); + Sink = Builder.CreateFreeze(Sink, Sink->getName() + ".fr"); + Src = Builder.CreateFreeze(Src, Src->getName() + ".fr"); + } + Value *Diff = ChkBuilder.CreateSub(Sink, Src); + Value *IsConflict = + ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check"); + + if (MemoryRuntimeCheck) { + IsConflict = + ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); + } + MemoryRuntimeCheck = IsConflict; + } + + return MemoryRuntimeCheck; +} + Optional llvm::hasPartialIVCondition(Loop &L, unsigned MSSAThreshold, MemorySSA &MSSA, diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp index f0bf625fa18e..97f29527bb95 100644 --- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -41,9 +41,8 @@ LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, ArrayRef Checks, Loop *L, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE) - : VersionedLoop(L), NonVersionedLoop(nullptr), - AliasChecks(Checks.begin(), Checks.end()), - Preds(LAI.getPSE().getUnionPredicate()), LAI(LAI), LI(LI), DT(DT), + : VersionedLoop(L), AliasChecks(Checks.begin(), Checks.end()), + Preds(LAI.getPSE().getPredicate()), LAI(LAI), LI(LI), DT(DT), SE(SE) { } @@ -277,7 +276,7 @@ bool runImpl(LoopInfo *LI, function_ref GetLAA, const LoopAccessInfo &LAI = GetLAA(*L); if (!LAI.hasConvergentOp() && (LAI.getNumRuntimePointerChecks() || - !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) { + !LAI.getPSE().getPredicate().isAlwaysTrue())) { LoopVersioning LVer(LAI, LAI.getRuntimePointerChecking()->getChecks(), L, LI, DT, SE); LVer.versionLoop(); diff --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp new file mode 100644 index 000000000000..8641581c8039 --- /dev/null +++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp @@ -0,0 +1,93 @@ +//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass lowers atomic intrinsics to non-atomic form for use in a known +// non-preemptible environment. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LowerAtomic.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +using namespace llvm; + +#define DEBUG_TYPE "loweratomic" + +bool llvm::lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { + IRBuilder<> Builder(CXI); + Value *Ptr = CXI->getPointerOperand(); + Value *Cmp = CXI->getCompareOperand(); + Value *Val = CXI->getNewValOperand(); + + LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr); + Value *Equal = Builder.CreateICmpEQ(Orig, Cmp); + Value *Res = Builder.CreateSelect(Equal, Val, Orig); + Builder.CreateStore(Res, Ptr); + + Res = Builder.CreateInsertValue(UndefValue::get(CXI->getType()), Orig, 0); + Res = Builder.CreateInsertValue(Res, Equal, 1); + + CXI->replaceAllUsesWith(Res); + CXI->eraseFromParent(); + return true; +} + +Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op, + IRBuilderBase &Builder, Value *Loaded, + Value *Inc) { + Value *NewVal; + switch (Op) { + case AtomicRMWInst::Xchg: + return Inc; + case AtomicRMWInst::Add: + return Builder.CreateAdd(Loaded, Inc, "new"); + case AtomicRMWInst::Sub: + return Builder.CreateSub(Loaded, Inc, "new"); + case AtomicRMWInst::And: + return Builder.CreateAnd(Loaded, Inc, "new"); + case AtomicRMWInst::Nand: + return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); + case AtomicRMWInst::Or: + return Builder.CreateOr(Loaded, Inc, "new"); + case AtomicRMWInst::Xor: + return Builder.CreateXor(Loaded, Inc, "new"); + case AtomicRMWInst::Max: + NewVal = Builder.CreateICmpSGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::Min: + NewVal = Builder.CreateICmpSLE(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::UMax: + NewVal = Builder.CreateICmpUGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::UMin: + NewVal = Builder.CreateICmpULE(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::FAdd: + return Builder.CreateFAdd(Loaded, Inc, "new"); + case AtomicRMWInst::FSub: + return Builder.CreateFSub(Loaded, Inc, "new"); + default: + llvm_unreachable("Unknown atomic op"); + } +} + +bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) { + IRBuilder<> Builder(RMWI); + Value *Ptr = RMWI->getPointerOperand(); + Value *Val = RMWI->getValOperand(); + + LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr); + Value *Res = buildAtomicRMWValue(RMWI->getOperation(), Builder, Orig, Val); + Builder.CreateStore(Res, Ptr); + RMWI->replaceAllUsesWith(Orig); + RMWI->eraseFromParent(); + return true; +} diff --git a/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp b/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp new file mode 100644 index 000000000000..010deb77a883 --- /dev/null +++ b/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp @@ -0,0 +1,221 @@ +//===-- LowerGlobalDtors.cpp - Lower @llvm.global_dtors -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Lower @llvm.global_dtors. +/// +/// Implement @llvm.global_dtors by creating wrapper functions that are +/// registered in @llvm.global_ctors and which contain a call to +/// `__cxa_atexit` to register their destructor functions. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LowerGlobalDtors.h" + +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "lower-global-dtors" + +namespace { +class LowerGlobalDtorsLegacyPass final : public ModulePass { + StringRef getPassName() const override { + return "Lower @llvm.global_dtors via `__cxa_atexit`"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + ModulePass::getAnalysisUsage(AU); + } + + bool runOnModule(Module &M) override; + +public: + static char ID; + LowerGlobalDtorsLegacyPass() : ModulePass(ID) { + initializeLowerGlobalDtorsLegacyPassPass(*PassRegistry::getPassRegistry()); + } +}; +} // End anonymous namespace + +char LowerGlobalDtorsLegacyPass::ID = 0; +INITIALIZE_PASS(LowerGlobalDtorsLegacyPass, DEBUG_TYPE, + "Lower @llvm.global_dtors via `__cxa_atexit`", false, false) + +ModulePass *llvm::createLowerGlobalDtorsLegacyPass() { + return new LowerGlobalDtorsLegacyPass(); +} + +static bool runImpl(Module &M); +bool LowerGlobalDtorsLegacyPass::runOnModule(Module &M) { return runImpl(M); } + +PreservedAnalyses LowerGlobalDtorsPass::run(Module &M, + ModuleAnalysisManager &AM) { + bool Changed = runImpl(M); + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet(); + return PA; +} + +static bool runImpl(Module &M) { + GlobalVariable *GV = M.getGlobalVariable("llvm.global_dtors"); + if (!GV || !GV->hasInitializer()) + return false; + + const ConstantArray *InitList = dyn_cast(GV->getInitializer()); + if (!InitList) + return false; + + // Validate @llvm.global_dtor's type. + auto *ETy = dyn_cast(InitList->getType()->getElementType()); + if (!ETy || ETy->getNumElements() != 3 || + !ETy->getTypeAtIndex(0U)->isIntegerTy() || + !ETy->getTypeAtIndex(1U)->isPointerTy() || + !ETy->getTypeAtIndex(2U)->isPointerTy()) + return false; // Not (int, ptr, ptr). + + // Collect the contents of @llvm.global_dtors, ordered by priority. Within a + // priority, sequences of destructors with the same associated object are + // recorded so that we can register them as a group. + std::map< + uint16_t, + std::vector>> + > DtorFuncs; + for (Value *O : InitList->operands()) { + auto *CS = dyn_cast(O); + if (!CS) + continue; // Malformed. + + auto *Priority = dyn_cast(CS->getOperand(0)); + if (!Priority) + continue; // Malformed. + uint16_t PriorityValue = Priority->getLimitedValue(UINT16_MAX); + + Constant *DtorFunc = CS->getOperand(1); + if (DtorFunc->isNullValue()) + break; // Found a null terminator, skip the rest. + + Constant *Associated = CS->getOperand(2); + Associated = cast(Associated->stripPointerCasts()); + + auto &AtThisPriority = DtorFuncs[PriorityValue]; + if (AtThisPriority.empty() || AtThisPriority.back().first != Associated) { + std::vector NewList; + NewList.push_back(DtorFunc); + AtThisPriority.push_back(std::make_pair(Associated, NewList)); + } else { + AtThisPriority.back().second.push_back(DtorFunc); + } + } + if (DtorFuncs.empty()) + return false; + + // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d); + LLVMContext &C = M.getContext(); + PointerType *VoidStar = Type::getInt8PtrTy(C); + Type *AtExitFuncArgs[] = {VoidStar}; + FunctionType *AtExitFuncTy = + FunctionType::get(Type::getVoidTy(C), AtExitFuncArgs, + /*isVarArg=*/false); + + FunctionCallee AtExit = M.getOrInsertFunction( + "__cxa_atexit", + FunctionType::get(Type::getInt32Ty(C), + {PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar}, + /*isVarArg=*/false)); + + // Declare __dso_local. + Type *DsoHandleTy = Type::getInt8Ty(C); + Constant *DsoHandle = M.getOrInsertGlobal("__dso_handle", DsoHandleTy, [&] { + auto *GV = new GlobalVariable(M, DsoHandleTy, /*isConstant=*/true, + GlobalVariable::ExternalWeakLinkage, nullptr, + "__dso_handle"); + GV->setVisibility(GlobalVariable::HiddenVisibility); + return GV; + }); + + // For each unique priority level and associated symbol, generate a function + // to call all the destructors at that level, and a function to register the + // first function with __cxa_atexit. + for (auto &PriorityAndMore : DtorFuncs) { + uint16_t Priority = PriorityAndMore.first; + uint64_t Id = 0; + auto &AtThisPriority = PriorityAndMore.second; + for (auto &AssociatedAndMore : AtThisPriority) { + Constant *Associated = AssociatedAndMore.first; + auto ThisId = Id++; + + Function *CallDtors = Function::Create( + AtExitFuncTy, Function::PrivateLinkage, + "call_dtors" + + (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority)) + : Twine()) + + (AtThisPriority.size() > 1 ? Twine("$") + Twine(ThisId) + : Twine()) + + (!Associated->isNullValue() ? (Twine(".") + Associated->getName()) + : Twine()), + &M); + BasicBlock *BB = BasicBlock::Create(C, "body", CallDtors); + FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C), + /*isVarArg=*/false); + + for (auto Dtor : reverse(AssociatedAndMore.second)) + CallInst::Create(VoidVoid, Dtor, "", BB); + ReturnInst::Create(C, BB); + + Function *RegisterCallDtors = Function::Create( + VoidVoid, Function::PrivateLinkage, + "register_call_dtors" + + (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority)) + : Twine()) + + (AtThisPriority.size() > 1 ? Twine("$") + Twine(ThisId) + : Twine()) + + (!Associated->isNullValue() ? (Twine(".") + Associated->getName()) + : Twine()), + &M); + BasicBlock *EntryBB = BasicBlock::Create(C, "entry", RegisterCallDtors); + BasicBlock *FailBB = BasicBlock::Create(C, "fail", RegisterCallDtors); + BasicBlock *RetBB = BasicBlock::Create(C, "return", RegisterCallDtors); + + Value *Null = ConstantPointerNull::get(VoidStar); + Value *Args[] = {CallDtors, Null, DsoHandle}; + Value *Res = CallInst::Create(AtExit, Args, "call", EntryBB); + Value *Cmp = new ICmpInst(*EntryBB, ICmpInst::ICMP_NE, Res, + Constant::getNullValue(Res->getType())); + BranchInst::Create(FailBB, RetBB, Cmp, EntryBB); + + // If `__cxa_atexit` hits out-of-memory, trap, so that we don't misbehave. + // This should be very rare, because if the process is running out of + // memory before main has even started, something is wrong. + CallInst::Create(Intrinsic::getDeclaration(&M, Intrinsic::trap), "", + FailBB); + new UnreachableInst(C, FailBB); + + ReturnInst::Create(C, RetBB); + + // Now register the registration function with @llvm.global_ctors. + appendToGlobalCtors(M, RegisterCallDtors, Priority, Associated); + } + } + + // Now that we've lowered everything, remove @llvm.global_dtors. + GV->eraseFromParent(); + + return true; +} diff --git a/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/llvm/lib/Transforms/Utils/LowerInvoke.cpp index fe0ff5899d8f..59cfa41fb7fd 100644 --- a/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ b/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -17,8 +17,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils.h" diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp index 3d75dd57456d..b4acb1b2ae90 100644 --- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -7,9 +7,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -18,7 +20,9 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, - const TargetTransformInfo &TTI) { + bool CanOverlap, + const TargetTransformInfo &TTI, + Optional AtomicElementSize) { // No need to expand zero length copies. if (CopyLen->isZero()) return; @@ -28,15 +32,25 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Function *ParentFunc = PreLoopBB->getParent(); LLVMContext &Ctx = PreLoopBB->getContext(); const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); + MDBuilder MDB(Ctx); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); + StringRef Name = "MemCopyAliasScope"; + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); Type *TypeOfCopyLen = CopyLen->getType(); Type *LoopOpType = TTI.getMemcpyLoopLoweringType( - Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value()); + Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), + AtomicElementSize); + assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && + "Atomic memcpy lowering is not supported for vector operand type"); unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); + assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && + "Atomic memcpy lowering is not supported for selected operand size"); + uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; if (LoopEndCount != 0) { @@ -68,12 +82,25 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, // Loop Body Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); - Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, - PartSrcAlign, SrcIsVolatile); + LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, + PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); + } Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); - LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); - + StoreInst *Store = LoopBuilder.CreateAlignedStore( + Load, DstGEP, PartDstAlign, DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } + if (AtomicElementSize) { + Load->setAtomic(AtomicOrdering::Unordered); + Store->setAtomic(AtomicOrdering::Unordered); + } Value *NewIndex = LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); LoopIndex->addIncoming(NewIndex, LoopBB); @@ -93,7 +120,7 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, SmallVector RemainingOps; TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, SrcAS, DstAS, SrcAlign.value(), - DstAlign.value()); + DstAlign.value(), AtomicElementSize); for (auto OpTy : RemainingOps) { Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied)); @@ -101,6 +128,10 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, // Calaculate the new index unsigned OperandSize = DL.getTypeStoreSize(OpTy); + assert( + (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) && + "Atomic memcpy lowering is not supported for selected operand size"); + uint64_t GepIndex = BytesCopied / OperandSize; assert(GepIndex * OperandSize == BytesCopied && "Division should have no Remainder!"); @@ -111,9 +142,13 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); Value *SrcGEP = RBuilder.CreateInBoundsGEP( OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); - Value *Load = + LoadInst *Load = RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile); - + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); + } // Cast destination to operand type and store. PointerType *DstPtrType = PointerType::get(OpTy, DstAS); Value *CastedDst = DstAddr->getType() == DstPtrType @@ -121,8 +156,16 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, : RBuilder.CreateBitCast(DstAddr, DstPtrType); Value *DstGEP = RBuilder.CreateInBoundsGEP( OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); - RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); - + StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, + DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } + if (AtomicElementSize) { + Load->setAtomic(AtomicOrdering::Unordered); + Store->setAtomic(AtomicOrdering::Unordered); + } BytesCopied += OperandSize; } } @@ -134,8 +177,9 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, - bool DstIsVolatile, - const TargetTransformInfo &TTI) { + bool DstIsVolatile, bool CanOverlap, + const TargetTransformInfo &TTI, + Optional AtomicElementSize) { BasicBlock *PreLoopBB = InsertBefore->getParent(); BasicBlock *PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); @@ -143,12 +187,22 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, Function *ParentFunc = PreLoopBB->getParent(); const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); LLVMContext &Ctx = PreLoopBB->getContext(); + MDBuilder MDB(Ctx); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); + StringRef Name = "MemCopyAliasScope"; + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); + unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); Type *LoopOpType = TTI.getMemcpyLoopLoweringType( - Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value()); + Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), + AtomicElementSize); + assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && + "Atomic memcpy lowering is not supported for vector operand type"); unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); + assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && + "Atomic memcpy lowering is not supported for selected operand size"); IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); @@ -183,19 +237,40 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); - Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, PartSrcAlign, - SrcIsVolatile); + LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, + PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope)); + } Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); - LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); - + StoreInst *Store = + LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } + if (AtomicElementSize) { + Load->setAtomic(AtomicOrdering::Unordered); + Store->setAtomic(AtomicOrdering::Unordered); + } Value *NewIndex = LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); LoopIndex->addIncoming(NewIndex, LoopBB); - if (!LoopOpIsInt8) { - // Add in the - Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); - Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); + bool requiresResidual = + !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize); + if (requiresResidual) { + Type *ResLoopOpType = AtomicElementSize + ? Type::getIntNTy(Ctx, *AtomicElementSize * 8) + : Int8Type; + unsigned ResLoopOpSize = DL.getTypeStoreSize(ResLoopOpType); + assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) && + "Store size is expected to match type size"); + + // Add in the + Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); + Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); // Loop body for the residual copy. BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", @@ -230,21 +305,34 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); ResidualIndex->addIncoming(Zero, ResHeaderBB); - Value *SrcAsInt8 = - ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS)); - Value *DstAsInt8 = - ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS)); + Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast( + SrcAddr, PointerType::get(ResLoopOpType, SrcAS)); + Value *DstAsResLoopOpType = ResBuilder.CreateBitCast( + DstAddr, PointerType::get(ResLoopOpType, DstAS)); Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); - Value *SrcGEP = - ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); - Value *Load = ResBuilder.CreateAlignedLoad(Int8Type, SrcGEP, PartSrcAlign, - SrcIsVolatile); - Value *DstGEP = - ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); - ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); - - Value *ResNewIndex = - ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); + Value *SrcGEP = ResBuilder.CreateInBoundsGEP( + ResLoopOpType, SrcAsResLoopOpType, FullOffset); + LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP, + PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); + } + Value *DstGEP = ResBuilder.CreateInBoundsGEP( + ResLoopOpType, DstAsResLoopOpType, FullOffset); + StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, + DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } + if (AtomicElementSize) { + Load->setAtomic(AtomicOrdering::Unordered); + Store->setAtomic(AtomicOrdering::Unordered); + } + Value *ResNewIndex = ResBuilder.CreateAdd( + ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize)); ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); // Create the loop branch condition. @@ -297,7 +385,13 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, Function *F = OrigBB->getParent(); const DataLayout &DL = F->getParent()->getDataLayout(); - Type *EltTy = SrcAddr->getType()->getPointerElementType(); + // TODO: Use different element type if possible? + IRBuilder<> CastBuilder(InsertBefore); + Type *EltTy = CastBuilder.getInt8Ty(); + Type *PtrTy = + CastBuilder.getInt8PtrTy(SrcAddr->getType()->getPointerAddressSpace()); + SrcAddr = CastBuilder.CreateBitCast(SrcAddr, PtrTy); + DstAddr = CastBuilder.CreateBitCast(DstAddr, PtrTy); // Create the a comparison of src and dst, based on which we jump to either // the forward-copy part of the function (if src >= dst) or the backwards-copy @@ -419,8 +513,21 @@ static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, NewBB); } +template +static bool canOverlap(MemTransferBase *Memcpy, ScalarEvolution *SE) { + if (SE) { + auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource()); + auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest()); + if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy)) + return false; + } + return true; +} + void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + ScalarEvolution *SE) { + bool CanOverlap = canOverlap(Memcpy, SE); if (ConstantInt *CI = dyn_cast(Memcpy->getLength())) { createMemCpyLoopKnownSize( /* InsertBefore */ Memcpy, @@ -431,6 +538,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), + /* CanOverlap */ CanOverlap, /* TargetTransformInfo */ TTI); } else { createMemCpyLoopUnknownSize( @@ -442,6 +550,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), + /* CanOverlap */ CanOverlap, /* TargetTransformInfo */ TTI); } } @@ -465,3 +574,35 @@ void llvm::expandMemSetAsLoop(MemSetInst *Memset) { /* Alignment */ Memset->getDestAlign().valueOrOne(), Memset->isVolatile()); } + +void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy, + const TargetTransformInfo &TTI, + ScalarEvolution *SE) { + if (ConstantInt *CI = dyn_cast(AtomicMemcpy->getLength())) { + createMemCpyLoopKnownSize( + /* InsertBefore */ AtomicMemcpy, + /* SrcAddr */ AtomicMemcpy->getRawSource(), + /* DstAddr */ AtomicMemcpy->getRawDest(), + /* CopyLen */ CI, + /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), + /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), + /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), + /* DstIsVolatile */ AtomicMemcpy->isVolatile(), + /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. + /* TargetTransformInfo */ TTI, + /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); + } else { + createMemCpyLoopUnknownSize( + /* InsertBefore */ AtomicMemcpy, + /* SrcAddr */ AtomicMemcpy->getRawSource(), + /* DstAddr */ AtomicMemcpy->getRawDest(), + /* CopyLen */ AtomicMemcpy->getLength(), + /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), + /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), + /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), + /* DstIsVolatile */ AtomicMemcpy->isVolatile(), + /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. + /* TargetTransformInfo */ TTI, + /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); + } +} diff --git a/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/llvm/lib/Transforms/Utils/LowerSwitch.cpp index aff9d1311688..44aeb26fadf9 100644 --- a/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -119,25 +119,27 @@ raw_ostream &operator<<(raw_ostream &O, const CaseVector &C) { void FixPhis( BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, const unsigned NumMergedCases = std::numeric_limits::max()) { - for (BasicBlock::iterator I = SuccBB->begin(), - IE = SuccBB->getFirstNonPHI()->getIterator(); - I != IE; ++I) { - PHINode *PN = cast(I); + for (auto &I : SuccBB->phis()) { + PHINode *PN = cast(&I); - // Only update the first occurrence. + // Only update the first occurrence if NewBB exists. unsigned Idx = 0, E = PN->getNumIncomingValues(); unsigned LocalNumMergedCases = NumMergedCases; - for (; Idx != E; ++Idx) { + for (; Idx != E && NewBB; ++Idx) { if (PN->getIncomingBlock(Idx) == OrigBB) { PN->setIncomingBlock(Idx, NewBB); break; } } + // Skip the updated incoming block so that it will not be removed. + if (NewBB) + ++Idx; + // Remove additional occurrences coming from condensed cases and keep the // number of incoming values equal to the number of branches to SuccBB. SmallVector Indices; - for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx) + for (; LocalNumMergedCases > 0 && Idx < E; ++Idx) if (PN->getIncomingBlock(Idx) == OrigBB) { Indices.push_back(Idx); LocalNumMergedCases--; @@ -195,6 +197,13 @@ BasicBlock *NewLeafBlock(CaseRange &Leaf, Value *Val, ConstantInt *LowerBound, BasicBlock *Succ = Leaf.BB; BranchInst::Create(Succ, Default, Comp, NewLeaf); + // Update the PHI incoming value/block for the default. + for (auto &I : Default->phis()) { + PHINode *PN = cast(&I); + auto *V = PN->getIncomingValueForBlock(OrigBlock); + PN->addIncoming(V, NewLeaf); + } + // If there were any PHI nodes in this successor, rewrite one entry // from OrigBlock to come from NewLeaf. for (BasicBlock::iterator I = Succ->begin(); isa(I); ++I) { @@ -494,19 +503,17 @@ void ProcessSwitchInst(SwitchInst *SI, Val = SI->getCondition(); } - // Create a new, empty default block so that the new hierarchy of - // if-then statements go to this and the PHI nodes are happy. - BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); - F->getBasicBlockList().insert(Default->getIterator(), NewDefault); - BranchInst::Create(Default, NewDefault); - BasicBlock *SwitchBlock = SwitchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, - OrigBlock, OrigBlock, NewDefault, UnreachableRanges); - - // If there are entries in any PHI nodes for the default edge, make sure - // to update them as well. - FixPhis(Default, OrigBlock, NewDefault); + OrigBlock, OrigBlock, Default, UnreachableRanges); + + // We have added incoming values for newly-created predecessors in + // NewLeafBlock(). The only meaningful work we offload to FixPhis() is to + // remove the incoming values from OrigBlock. There might be a special case + // that SwitchBlock is the same as Default, under which the PHIs in Default + // are fixed inside SwitchConvert(). + if (SwitchBlock != Default) + FixPhis(Default, OrigBlock, nullptr); // Branch to our shiny new if-then stuff... BranchInst::Create(SwitchBlock, OrigBlock); diff --git a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp new file mode 100644 index 000000000000..a1029475cf1d --- /dev/null +++ b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp @@ -0,0 +1,195 @@ +//== MemoryTaggingSupport.cpp - helpers for memory tagging implementations ===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares common infrastructure for HWAddressSanitizer and +// Aarch64StackTagging. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MemoryTaggingSupport.h" + +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IntrinsicInst.h" + +namespace llvm { +namespace memtag { +namespace { +bool maybeReachableFromEachOther(const SmallVectorImpl &Insts, + const DominatorTree *DT, const LoopInfo *LI, + size_t MaxLifetimes) { + // If we have too many lifetime ends, give up, as the algorithm below is N^2. + if (Insts.size() > MaxLifetimes) + return true; + for (size_t I = 0; I < Insts.size(); ++I) { + for (size_t J = 0; J < Insts.size(); ++J) { + if (I == J) + continue; + if (isPotentiallyReachable(Insts[I], Insts[J], nullptr, DT, LI)) + return true; + } + } + return false; +} +} // namespace + +bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT, + const LoopInfo &LI, const Instruction *Start, + const SmallVectorImpl &Ends, + const SmallVectorImpl &RetVec, + llvm::function_ref Callback) { + if (Ends.size() == 1 && PDT.dominates(Ends[0], Start)) { + Callback(Ends[0]); + return true; + } + SmallPtrSet EndBlocks; + for (auto *End : Ends) { + EndBlocks.insert(End->getParent()); + } + SmallVector ReachableRetVec; + unsigned NumCoveredExits = 0; + for (auto *RI : RetVec) { + if (!isPotentiallyReachable(Start, RI, nullptr, &DT, &LI)) + continue; + ReachableRetVec.push_back(RI); + // If there is an end in the same basic block as the return, we know for + // sure that the return is covered. Otherwise, we can check whether there + // is a way to reach the RI from the start of the lifetime without passing + // through an end. + if (EndBlocks.count(RI->getParent()) > 0 || + !isPotentiallyReachable(Start, RI, &EndBlocks, &DT, &LI)) { + ++NumCoveredExits; + } + } + // If there's a mix of covered and non-covered exits, just put the untag + // on exits, so we avoid the redundancy of untagging twice. + if (NumCoveredExits == ReachableRetVec.size()) { + for (auto *End : Ends) + Callback(End); + } else { + for (auto *RI : ReachableRetVec) + Callback(RI); + // We may have inserted untag outside of the lifetime interval. + // Signal the caller to remove the lifetime end call for this alloca. + return false; + } + return true; +} + +bool isStandardLifetime(const SmallVectorImpl &LifetimeStart, + const SmallVectorImpl &LifetimeEnd, + const DominatorTree *DT, const LoopInfo *LI, + size_t MaxLifetimes) { + // An alloca that has exactly one start and end in every possible execution. + // If it has multiple ends, they have to be unreachable from each other, so + // at most one of them is actually used for each execution of the function. + return LifetimeStart.size() == 1 && + (LifetimeEnd.size() == 1 || + (LifetimeEnd.size() > 0 && + !maybeReachableFromEachOther(LifetimeEnd, DT, LI, MaxLifetimes))); +} + +Instruction *getUntagLocationIfFunctionExit(Instruction &Inst) { + if (isa(Inst)) { + if (CallInst *CI = Inst.getParent()->getTerminatingMustTailCall()) + return CI; + return &Inst; + } + if (isa(Inst)) { + return &Inst; + } + return nullptr; +} + +void StackInfoBuilder::visit(Instruction &Inst) { + if (CallInst *CI = dyn_cast(&Inst)) { + if (CI->canReturnTwice()) { + Info.CallsReturnTwice = true; + } + } + if (AllocaInst *AI = dyn_cast(&Inst)) { + if (IsInterestingAlloca(*AI)) { + Info.AllocasToInstrument[AI].AI = AI; + } + return; + } + auto *II = dyn_cast(&Inst); + if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end)) { + AllocaInst *AI = findAllocaForValue(II->getArgOperand(1)); + if (!AI) { + Info.UnrecognizedLifetimes.push_back(&Inst); + return; + } + if (!IsInterestingAlloca(*AI)) + return; + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + Info.AllocasToInstrument[AI].LifetimeStart.push_back(II); + else + Info.AllocasToInstrument[AI].LifetimeEnd.push_back(II); + return; + } + if (auto *DVI = dyn_cast(&Inst)) { + for (Value *V : DVI->location_ops()) { + if (auto *AI = dyn_cast_or_null(V)) { + if (!IsInterestingAlloca(*AI)) + continue; + AllocaInfo &AInfo = Info.AllocasToInstrument[AI]; + auto &DVIVec = AInfo.DbgVariableIntrinsics; + if (DVIVec.empty() || DVIVec.back() != DVI) + DVIVec.push_back(DVI); + } + } + } + Instruction *ExitUntag = getUntagLocationIfFunctionExit(Inst); + if (ExitUntag) + Info.RetVec.push_back(ExitUntag); +} + +uint64_t getAllocaSizeInBytes(const AllocaInst &AI) { + auto DL = AI.getModule()->getDataLayout(); + return *AI.getAllocationSizeInBits(DL) / 8; +} + +void alignAndPadAlloca(memtag::AllocaInfo &Info, llvm::Align Alignment) { + const Align NewAlignment = std::max(Info.AI->getAlign(), Alignment); + Info.AI->setAlignment(NewAlignment); + auto &Ctx = Info.AI->getFunction()->getContext(); + + uint64_t Size = getAllocaSizeInBytes(*Info.AI); + uint64_t AlignedSize = alignTo(Size, Alignment); + if (Size == AlignedSize) + return; + + // Add padding to the alloca. + Type *AllocatedType = + Info.AI->isArrayAllocation() + ? ArrayType::get( + Info.AI->getAllocatedType(), + cast(Info.AI->getArraySize())->getZExtValue()) + : Info.AI->getAllocatedType(); + Type *PaddingType = ArrayType::get(Type::getInt8Ty(Ctx), AlignedSize - Size); + Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType); + auto *NewAI = + new AllocaInst(TypeWithPadding, Info.AI->getType()->getAddressSpace(), + nullptr, "", Info.AI); + NewAI->takeName(Info.AI); + NewAI->setAlignment(Info.AI->getAlign()); + NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca()); + NewAI->setSwiftError(Info.AI->isSwiftError()); + NewAI->copyMetadata(*Info.AI); + + auto *NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI); + Info.AI->replaceAllUsesWith(NewPtr); + Info.AI->eraseFromParent(); + Info.AI = NewAI; +} + +} // namespace memtag +} // namespace llvm diff --git a/llvm/lib/Transforms/Utils/MisExpect.cpp b/llvm/lib/Transforms/Utils/MisExpect.cpp new file mode 100644 index 000000000000..b73d68ebec7c --- /dev/null +++ b/llvm/lib/Transforms/Utils/MisExpect.cpp @@ -0,0 +1,249 @@ +//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit warnings for potentially incorrect usage of the +// llvm.expect intrinsic. This utility extracts the threshold values from +// metadata associated with the instrumented Branch or Switch instruction. The +// threshold values are then used to determine if a warning should be emmited. +// +// MisExpect's implementation relies on two assumptions about how branch weights +// are managed in LLVM. +// +// 1) Frontend profiling weights are always in place before llvm.expect is +// lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore +// needs to extract the branch weights and then compare them to the weights +// being added by the llvm.expect intrinsic lowering. +// +// 2) Sampling and IR based profiles will *only* have branch weight metadata +// before profiling data is consulted if they are from a lowered llvm.expect +// intrinsic. These profiles thus always extract the expected weights and then +// compare them to the weights collected during profiling to determine if a +// diagnostic message is warranted. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MisExpect.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormatVariadic.h" +#include +#include +#include + +#define DEBUG_TYPE "misexpect" + +using namespace llvm; +using namespace misexpect; + +namespace llvm { + +// Command line option to enable/disable the warning when profile data suggests +// a mismatch with the use of the llvm.expect intrinsic +static cl::opt PGOWarnMisExpect( + "pgo-warn-misexpect", cl::init(false), cl::Hidden, + cl::desc("Use this option to turn on/off " + "warnings about incorrect usage of llvm.expect intrinsics.")); + +static cl::opt MisExpectTolerance( + "misexpect-tolerance", cl::init(0), + cl::desc("Prevents emiting diagnostics when profile counts are " + "within N% of the threshold..")); + +} // namespace llvm + +namespace { + +bool isMisExpectDiagEnabled(LLVMContext &Ctx) { + return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested(); +} + +uint64_t getMisExpectTolerance(LLVMContext &Ctx) { + return std::max(static_cast(MisExpectTolerance), + Ctx.getDiagnosticsMisExpectTolerance()); +} + +Instruction *getInstCondition(Instruction *I) { + assert(I != nullptr && "MisExpect target Instruction cannot be nullptr"); + Instruction *Ret = nullptr; + if (auto *B = dyn_cast(I)) { + Ret = dyn_cast(B->getCondition()); + } + // TODO: Find a way to resolve condition location for switches + // Using the condition of the switch seems to often resolve to an earlier + // point in the program, i.e. the calculation of the switch condition, rather + // than the switch's location in the source code. Thus, we should use the + // instruction to get source code locations rather than the condition to + // improve diagnostic output, such as the caret. If the same problem exists + // for branch instructions, then we should remove this function and directly + // use the instruction + // + else if (auto *S = dyn_cast(I)) { + Ret = dyn_cast(S->getCondition()); + } + return Ret ? Ret : I; +} + +void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx, + uint64_t ProfCount, uint64_t TotalCount) { + double PercentageCorrect = (double)ProfCount / TotalCount; + auto PerString = + formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount); + auto RemStr = formatv( + "Potential performance regression from use of the llvm.expect intrinsic: " + "Annotation was correct on {0} of profiled executions.", + PerString); + Twine Msg(PerString); + Instruction *Cond = getInstCondition(I); + if (isMisExpectDiagEnabled(Ctx)) + Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg)); + OptimizationRemarkEmitter ORE(I->getParent()->getParent()); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str()); +} + +} // namespace + +namespace llvm { +namespace misexpect { + +// Helper function to extract branch weights into a vector +Optional> extractWeights(Instruction *I, + LLVMContext &Ctx) { + assert(I && "MisExpect::extractWeights given invalid pointer"); + + auto *ProfileData = I->getMetadata(LLVMContext::MD_prof); + if (!ProfileData) + return None; + + unsigned NOps = ProfileData->getNumOperands(); + if (NOps < 3) + return None; + + auto *ProfDataName = dyn_cast(ProfileData->getOperand(0)); + if (!ProfDataName || !ProfDataName->getString().equals("branch_weights")) + return None; + + SmallVector Weights(NOps - 1); + for (unsigned Idx = 1; Idx < NOps; Idx++) { + ConstantInt *Value = + mdconst::dyn_extract(ProfileData->getOperand(Idx)); + uint32_t V = Value->getZExtValue(); + Weights[Idx - 1] = V; + } + + return Weights; +} + +// TODO: when clang allows c++17, use std::clamp instead +uint32_t clamp(uint64_t value, uint32_t low, uint32_t hi) { + if (value > hi) + return hi; + if (value < low) + return low; + return value; +} + +void verifyMisExpect(Instruction &I, ArrayRef RealWeights, + ArrayRef ExpectedWeights) { + // To determine if we emit a diagnostic, we need to compare the branch weights + // from the profile to those added by the llvm.expect intrinsic. + // So first, we extract the "likely" and "unlikely" weights from + // ExpectedWeights And determine the correct weight in the profile to compare + // against. + uint64_t LikelyBranchWeight = 0, + UnlikelyBranchWeight = std::numeric_limits::max(); + size_t MaxIndex = 0; + for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) { + uint32_t V = ExpectedWeights[Idx]; + if (LikelyBranchWeight < V) { + LikelyBranchWeight = V; + MaxIndex = Idx; + } + if (UnlikelyBranchWeight > V) { + UnlikelyBranchWeight = V; + } + } + + const uint64_t ProfiledWeight = RealWeights[MaxIndex]; + const uint64_t RealWeightsTotal = + std::accumulate(RealWeights.begin(), RealWeights.end(), (uint64_t)0, + std::plus()); + const uint64_t NumUnlikelyTargets = RealWeights.size() - 1; + + uint64_t TotalBranchWeight = + LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets); + + // FIXME: When we've addressed sample profiling, restore the assertion + // + // We cannot calculate branch probability if either of these invariants aren't + // met. However, MisExpect diagnostics should not prevent code from compiling, + // so we simply forgo emitting diagnostics here, and return early. + if ((TotalBranchWeight == 0) || (TotalBranchWeight <= LikelyBranchWeight)) + return; + + // To determine our threshold value we need to obtain the branch probability + // for the weights added by llvm.expect and use that proportion to calculate + // our threshold based on the collected profile data. + auto LikelyProbablilty = BranchProbability::getBranchProbability( + LikelyBranchWeight, TotalBranchWeight); + + uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal); + + // clamp tolerance range to [0, 100) + auto Tolerance = getMisExpectTolerance(I.getContext()); + Tolerance = clamp(Tolerance, 0, 99); + + // Allow users to relax checking by N% i.e., if they use a 5% tolerance, + // then we check against 0.95*ScaledThreshold + if (Tolerance > 0) + ScaledThreshold *= (1.0 - Tolerance / 100.0); + + // When the profile weight is below the threshold, we emit the diagnostic + if (ProfiledWeight < ScaledThreshold) + emitMisexpectDiagnostic(&I, I.getContext(), ProfiledWeight, + RealWeightsTotal); +} + +void checkBackendInstrumentation(Instruction &I, + const ArrayRef RealWeights) { + auto ExpectedWeightsOpt = extractWeights(&I, I.getContext()); + if (!ExpectedWeightsOpt) + return; + auto ExpectedWeights = ExpectedWeightsOpt.getValue(); + verifyMisExpect(I, RealWeights, ExpectedWeights); +} + +void checkFrontendInstrumentation(Instruction &I, + const ArrayRef ExpectedWeights) { + auto RealWeightsOpt = extractWeights(&I, I.getContext()); + if (!RealWeightsOpt) + return; + auto RealWeights = RealWeightsOpt.getValue(); + verifyMisExpect(I, RealWeights, ExpectedWeights); +} + +void checkExpectAnnotations(Instruction &I, + const ArrayRef ExistingWeights, + bool IsFrontendInstr) { + if (IsFrontendInstr) { + checkFrontendInstrumentation(I, ExistingWeights); + } else { + checkBackendInstrumentation(I, ExistingWeights); + } +} + +} // namespace misexpect +} // namespace llvm +#undef DEBUG_TYPE diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index d6a6be2762c7..5120ade70e16 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -237,8 +236,8 @@ std::string llvm::getUniqueModuleId(Module *M) { return ("." + Str).str(); } -void VFABI::setVectorVariantNames( - CallInst *CI, const SmallVector &VariantMappings) { +void VFABI::setVectorVariantNames(CallInst *CI, + ArrayRef VariantMappings) { if (VariantMappings.empty()) return; @@ -255,7 +254,7 @@ void VFABI::setVectorVariantNames( for (const std::string &VariantMapping : VariantMappings) { LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n"); Optional VI = VFABI::tryDemangleForVFABI(VariantMapping, *M); - assert(VI.hasValue() && "Cannot add an invalid VFABI name."); + assert(VI && "Cannot add an invalid VFABI name."); assert(M->getNamedValue(VI.getValue().VectorName) && "Cannot add variant to attribute: " "vector function declaration is missing."); @@ -266,14 +265,15 @@ void VFABI::setVectorVariantNames( } void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, - StringRef SectionName) { - // Embed the buffer into the module. + StringRef SectionName, Align Alignment) { + // Embed the memory buffer into the module. Constant *ModuleConstant = ConstantDataArray::get( M.getContext(), makeArrayRef(Buf.getBufferStart(), Buf.getBufferSize())); GlobalVariable *GV = new GlobalVariable( M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage, ModuleConstant, "llvm.embedded.object"); GV->setSection(SectionName); + GV->setAlignment(Alignment); appendToCompilerUsed(M, GV); } diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp index bd2b6fafdf2e..53334bc2a369 100644 --- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -15,19 +15,12 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/CFG.h" #include "llvm/IR/AssemblyAnnotationWriter.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" @@ -35,7 +28,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/DebugCounter.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Transforms/Utils.h" #include #define DEBUG_TYPE "predicateinfo" using namespace llvm; diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 01b433b4782a..aff692b36288 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -20,7 +20,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/TinyPtrVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -32,7 +31,6 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -68,7 +66,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { if (const LoadInst *LI = dyn_cast(U)) { // Note that atomic loads can be transformed; atomic semantics do // not have any meaning for a local alloca. - if (LI->isVolatile()) + if (LI->isVolatile() || LI->getType() != AI->getAllocatedType()) return false; } else if (const StoreInst *SI = dyn_cast(U)) { if (SI->getValueOperand() == AI || @@ -678,7 +676,7 @@ void PromoteMem2Reg::run() { A->eraseFromParent(); } - // Remove alloca's dbg.declare instrinsics from the function. + // Remove alloca's dbg.declare intrinsics from the function. for (auto &DbgUsers : AllocaDbgUsers) { for (auto *DII : DbgUsers) if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref()) @@ -704,7 +702,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, SQ)) { + if (Value *V = simplifyInstruction(PN, SQ)) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); NewPhiNodes.erase(I++); diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp index 65207056a3f4..926427450682 100644 --- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp +++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp @@ -18,9 +18,6 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -38,11 +35,13 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) { GetElementPtrInst *GEP = dyn_cast(GV.use_begin()->getUser()); - if (!GEP || !GEP->hasOneUse()) + if (!GEP || !GEP->hasOneUse() || + GV.getValueType() != GEP->getSourceElementType()) return false; LoadInst *Load = dyn_cast(GEP->use_begin()->getUser()); - if (!Load || !Load->hasOneUse()) + if (!Load || !Load->hasOneUse() || + Load->getType() != GEP->getResultElementType()) return false; // If the original lookup table does not have local linkage and is @@ -144,7 +143,7 @@ static void convertToRelLookupTable(GlobalVariable &LookupTable) { Value *Offset = Builder.CreateShl(Index, ConstantInt::get(IntTy, 2), "reltable.shift"); - // Insert the call to load.relative instrinsic before LOAD. + // Insert the call to load.relative intrinsic before LOAD. // GEP might not be immediately followed by a LOAD, like it can be hoisted // outside the loop or another instruction might be inserted them in between. Builder.SetInsertPoint(Load); @@ -171,13 +170,17 @@ static void convertToRelLookupTable(GlobalVariable &LookupTable) { // Convert lookup tables to relative lookup tables in the module. static bool convertToRelativeLookupTables( Module &M, function_ref GetTTI) { - Module::iterator FI = M.begin(); - if (FI == M.end()) - return false; + for (Function &F : M) { + if (F.isDeclaration()) + continue; - // Check if we have a target that supports relative lookup tables. - if (!GetTTI(*FI).shouldBuildRelLookupTables()) - return false; + // Check if we have a target that supports relative lookup tables. + if (!GetTTI(F).shouldBuildRelLookupTables()) + return false; + + // We assume that the result is independent of the checked function. + break; + } bool Changed = false; diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index d7e8eaf677c6..eee91e70292e 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -15,14 +15,12 @@ #include "llvm/Transforms/Utils/SCCPSolver.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" +#include "llvm/Analysis/ValueLattice.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h" #include #include #include @@ -452,7 +450,8 @@ public: return TrackingIncomingArguments; } - void markArgInFuncSpecialization(Function *F, Argument *A, Constant *C); + void markArgInFuncSpecialization(Function *F, + const SmallVectorImpl &Args); void markFunctionUnreachable(Function *F) { for (auto &BB : *F) @@ -526,29 +525,38 @@ Constant *SCCPInstVisitor::getConstant(const ValueLatticeElement &LV) const { return nullptr; } -void SCCPInstVisitor::markArgInFuncSpecialization(Function *F, Argument *A, - Constant *C) { - assert(F->arg_size() == A->getParent()->arg_size() && +void SCCPInstVisitor::markArgInFuncSpecialization( + Function *F, const SmallVectorImpl &Args) { + assert(!Args.empty() && "Specialization without arguments"); + assert(F->arg_size() == Args[0].Formal->getParent()->arg_size() && "Functions should have the same number of arguments"); - // Mark the argument constant in the new function. - markConstant(A, C); - - // For the remaining arguments in the new function, copy the lattice state - // over from the old function. - for (auto I = F->arg_begin(), J = A->getParent()->arg_begin(), - E = F->arg_end(); - I != E; ++I, ++J) - if (J != A && ValueState.count(I)) { + auto Iter = Args.begin(); + Argument *NewArg = F->arg_begin(); + Argument *OldArg = Args[0].Formal->getParent()->arg_begin(); + for (auto End = F->arg_end(); NewArg != End; ++NewArg, ++OldArg) { + + LLVM_DEBUG(dbgs() << "SCCP: Marking argument " + << NewArg->getNameOrAsOperand() << "\n"); + + if (Iter != Args.end() && OldArg == Iter->Formal) { + // Mark the argument constants in the new function. + markConstant(NewArg, Iter->Actual); + ++Iter; + } else if (ValueState.count(OldArg)) { + // For the remaining arguments in the new function, copy the lattice state + // over from the old function. + // // Note: This previously looked like this: - // ValueState[J] = ValueState[I]; + // ValueState[NewArg] = ValueState[OldArg]; // This is incorrect because the DenseMap class may resize the underlying - // memory when inserting `J`, which will invalidate the reference to `I`. - // Instead, we make sure `J` exists, then set it to `I` afterwards. - auto &NewValue = ValueState[J]; - NewValue = ValueState[I]; - pushToWorkList(NewValue, J); + // memory when inserting `NewArg`, which will invalidate the reference to + // `OldArg`. Instead, we make sure `NewArg` exists before setting it. + auto &NewValue = ValueState[NewArg]; + NewValue = ValueState[OldArg]; + pushToWorkList(NewValue, NewArg); } + } } void SCCPInstVisitor::visitInstruction(Instruction &I) { @@ -988,7 +996,7 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) { if ((V1State.isConstant() || V2State.isConstant())) { Value *V1 = isConstant(V1State) ? getConstant(V1State) : I.getOperand(0); Value *V2 = isConstant(V2State) ? getConstant(V2State) : I.getOperand(1); - Value *R = SimplifyBinOp(I.getOpcode(), V1, V2, SimplifyQuery(DL)); + Value *R = simplifyBinOp(I.getOpcode(), V1, V2, SimplifyQuery(DL)); auto *C = dyn_cast_or_null(R); if (C) { // X op Y -> undef. @@ -1287,17 +1295,6 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) { return; } - // TODO: Actually filp MayIncludeUndef for the created range to false, - // once most places in the optimizer respect the branches on - // undef/poison are UB rule. The reason why the new range cannot be - // undef is as follows below: - // The new range is based on a branch condition. That guarantees that - // neither of the compare operands can be undef in the branch targets, - // unless we have conditions that are always true/false (e.g. icmp ule - // i32, %a, i32_max). For the latter overdefined/empty range will be - // inferred, but the branch will get folded accordingly anyways. - bool MayIncludeUndef = !isa(PI); - ValueLatticeElement CondVal = getValueState(OtherOp); ValueLatticeElement &IV = ValueState[&CB]; if (CondVal.isConstantRange() || CopyOfVal.isConstantRange()) { @@ -1322,9 +1319,15 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) { if (!CopyOfCR.contains(NewCR) && CopyOfCR.getSingleMissingElement()) NewCR = CopyOfCR; + // The new range is based on a branch condition. That guarantees that + // neither of the compare operands can be undef in the branch targets, + // unless we have conditions that are always true/false (e.g. icmp ule + // i32, %a, i32_max). For the latter overdefined/empty range will be + // inferred, but the branch will get folded accordingly anyways. addAdditionalUser(OtherOp, &CB); - mergeInValue(IV, &CB, - ValueLatticeElement::getRange(NewCR, MayIncludeUndef)); + mergeInValue( + IV, &CB, + ValueLatticeElement::getRange(NewCR, /*MayIncludeUndef*/ false)); return; } else if (Pred == CmpInst::ICMP_EQ && CondVal.isConstant()) { // For non-integer values or integer constant expressions, only @@ -1332,8 +1335,7 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) { addAdditionalUser(OtherOp, &CB); mergeInValue(IV, &CB, CondVal); return; - } else if (Pred == CmpInst::ICMP_NE && CondVal.isConstant() && - !MayIncludeUndef) { + } else if (Pred == CmpInst::ICMP_NE && CondVal.isConstant()) { // Propagate inequalities. addAdditionalUser(OtherOp, &CB); mergeInValue(IV, &CB, @@ -1442,22 +1444,19 @@ void SCCPInstVisitor::solve() { } } -/// resolvedUndefsIn - While solving the dataflow for a function, we assume -/// that branches on undef values cannot reach any of their successors. -/// However, this is not a safe assumption. After we solve dataflow, this -/// method should be use to handle this. If this returns true, the solver -/// should be rerun. +/// While solving the dataflow for a function, we don't compute a result for +/// operations with an undef operand, to allow undef to be lowered to a +/// constant later. For example, constant folding of "zext i8 undef to i16" +/// would result in "i16 0", and if undef is later lowered to "i8 1", then the +/// zext result would become "i16 1" and would result into an overdefined +/// lattice value once merged with the previous result. Not computing the +/// result of the zext (treating undef the same as unknown) allows us to handle +/// a later undef->constant lowering more optimally. /// -/// This method handles this by finding an unresolved branch and marking it one -/// of the edges from the block as being feasible, even though the condition -/// doesn't say it would otherwise be. This allows SCCP to find the rest of the -/// CFG and only slightly pessimizes the analysis results (by marking one, -/// potentially infeasible, edge feasible). This cannot usefully modify the -/// constraints on the condition of the branch, as that would impact other users -/// of the value. -/// -/// This scan also checks for values that use undefs. It conservatively marks -/// them as overdefined. +/// However, if the operand remains undef when the solver returns, we do need +/// to assign some result to the instruction (otherwise we would treat it as +/// unreachable). For simplicity, we mark any instructions that are still +/// unknown as overdefined. bool SCCPInstVisitor::resolvedUndefsIn(Function &F) { bool MadeChange = false; for (BasicBlock &BB : F) { @@ -1486,7 +1485,7 @@ bool SCCPInstVisitor::resolvedUndefsIn(Function &F) { // more precise than this but it isn't worth bothering. for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { ValueLatticeElement &LV = getStructValueState(&I, i); - if (LV.isUnknownOrUndef()) { + if (LV.isUnknown()) { markOverdefined(LV, &I); MadeChange = true; } @@ -1495,7 +1494,7 @@ bool SCCPInstVisitor::resolvedUndefsIn(Function &F) { } ValueLatticeElement &LV = getValueState(&I); - if (!LV.isUnknownOrUndef()) + if (!LV.isUnknown()) continue; // There are two reasons a call can have an undef result @@ -1518,91 +1517,6 @@ bool SCCPInstVisitor::resolvedUndefsIn(Function &F) { markOverdefined(&I); MadeChange = true; } - - // Check to see if we have a branch or switch on an undefined value. If so - // we force the branch to go one way or the other to make the successor - // values live. It doesn't really matter which way we force it. - Instruction *TI = BB.getTerminator(); - if (auto *BI = dyn_cast(TI)) { - if (!BI->isConditional()) - continue; - if (!getValueState(BI->getCondition()).isUnknownOrUndef()) - continue; - - // If the input to SCCP is actually branch on undef, fix the undef to - // false. - if (isa(BI->getCondition())) { - BI->setCondition(ConstantInt::getFalse(BI->getContext())); - markEdgeExecutable(&BB, TI->getSuccessor(1)); - MadeChange = true; - continue; - } - - // Otherwise, it is a branch on a symbolic value which is currently - // considered to be undef. Make sure some edge is executable, so a - // branch on "undef" always flows somewhere. - // FIXME: Distinguish between dead code and an LLVM "undef" value. - BasicBlock *DefaultSuccessor = TI->getSuccessor(1); - if (markEdgeExecutable(&BB, DefaultSuccessor)) - MadeChange = true; - - continue; - } - - if (auto *IBR = dyn_cast(TI)) { - // Indirect branch with no successor ?. Its ok to assume it branches - // to no target. - if (IBR->getNumSuccessors() < 1) - continue; - - if (!getValueState(IBR->getAddress()).isUnknownOrUndef()) - continue; - - // If the input to SCCP is actually branch on undef, fix the undef to - // the first successor of the indirect branch. - if (isa(IBR->getAddress())) { - IBR->setAddress(BlockAddress::get(IBR->getSuccessor(0))); - markEdgeExecutable(&BB, IBR->getSuccessor(0)); - MadeChange = true; - continue; - } - - // Otherwise, it is a branch on a symbolic value which is currently - // considered to be undef. Make sure some edge is executable, so a - // branch on "undef" always flows somewhere. - // FIXME: IndirectBr on "undef" doesn't actually need to go anywhere: - // we can assume the branch has undefined behavior instead. - BasicBlock *DefaultSuccessor = IBR->getSuccessor(0); - if (markEdgeExecutable(&BB, DefaultSuccessor)) - MadeChange = true; - - continue; - } - - if (auto *SI = dyn_cast(TI)) { - if (!SI->getNumCases() || - !getValueState(SI->getCondition()).isUnknownOrUndef()) - continue; - - // If the input to SCCP is actually switch on undef, fix the undef to - // the first constant. - if (isa(SI->getCondition())) { - SI->setCondition(SI->case_begin()->getCaseValue()); - markEdgeExecutable(&BB, SI->case_begin()->getCaseSuccessor()); - MadeChange = true; - continue; - } - - // Otherwise, it is a branch on a symbolic value which is currently - // considered to be undef. Make sure some edge is executable, so a - // branch on "undef" always flows somewhere. - // FIXME: Distinguish between dead code and an LLVM "undef" value. - BasicBlock *DefaultSuccessor = SI->case_begin()->getCaseSuccessor(); - if (markEdgeExecutable(&BB, DefaultSuccessor)) - MadeChange = true; - - continue; - } } return MadeChange; @@ -1618,7 +1532,7 @@ SCCPSolver::SCCPSolver( LLVMContext &Ctx) : Visitor(new SCCPInstVisitor(DL, std::move(GetTLI), Ctx)) {} -SCCPSolver::~SCCPSolver() {} +SCCPSolver::~SCCPSolver() = default; void SCCPSolver::addAnalysis(Function &F, AnalysisResultsForFn A) { return Visitor->addAnalysis(F, std::move(A)); @@ -1713,9 +1627,9 @@ SmallPtrSetImpl &SCCPSolver::getArgumentTrackedFunctions() { return Visitor->getArgumentTrackedFunctions(); } -void SCCPSolver::markArgInFuncSpecialization(Function *F, Argument *A, - Constant *C) { - Visitor->markArgInFuncSpecialization(F, A, C); +void SCCPSolver::markArgInFuncSpecialization( + Function *F, const SmallVectorImpl &Args) { + Visitor->markArgInFuncSpecialization(F, Args); } void SCCPSolver::markFunctionUnreachable(Function *F) { diff --git a/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/llvm/lib/Transforms/Utils/SSAUpdater.cpp index 7d9992176658..37019e3bf95b 100644 --- a/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -25,7 +25,6 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -166,7 +165,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. if (Value *V = - SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) { + simplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) { InsertedPHI->eraseFromParent(); return V; } diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp index 961adf2570a7..5e92b9852a9f 100644 --- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -15,15 +15,46 @@ #include "llvm/Transforms/Utils/SampleProfileInference.h" #include "llvm/ADT/BitVector.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include #include +#include using namespace llvm; #define DEBUG_TYPE "sample-profile-inference" namespace { +static cl::opt SampleProfileEvenCountDistribution( + "sample-profile-even-count-distribution", cl::init(true), cl::Hidden, + cl::desc("Try to evenly distribute counts when there are multiple equally " + "likely options.")); + +static cl::opt SampleProfileMaxDfsCalls( + "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden, + cl::desc("Maximum number of dfs iterations for even count distribution.")); + +static cl::opt SampleProfileProfiCostInc( + "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden, + cl::desc("A cost of increasing a block's count by one.")); + +static cl::opt SampleProfileProfiCostDec( + "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden, + cl::desc("A cost of decreasing a block's count by one.")); + +static cl::opt SampleProfileProfiCostIncZero( + "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden, + cl::desc("A cost of increasing a count of zero-weight block by one.")); + +static cl::opt SampleProfileProfiCostIncEntry( + "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden, + cl::desc("A cost of increasing the entry block's count by one.")); + +static cl::opt SampleProfileProfiCostDecEntry( + "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden, + cl::desc("A cost of decreasing the entry block's count by one.")); + /// A value indicating an infinite flow/capacity/weight of a block/edge. /// Not using numeric_limits::max(), as the values can be summed up /// during the execution. @@ -52,16 +83,16 @@ public: Nodes = std::vector(NodeCount); Edges = std::vector>(NodeCount, std::vector()); + if (SampleProfileEvenCountDistribution) + AugmentingEdges = + std::vector>(NodeCount, std::vector()); } // Run the algorithm. int64_t run() { - // Find an augmenting path and update the flow along the path - size_t AugmentationIters = 0; - while (findAugmentingPath()) { - augmentFlowAlongPath(); - AugmentationIters++; - } + // Iteratively find an augmentation path/dag in the network and send the + // flow along its edges + size_t AugmentationIters = applyFlowAugmentation(); // Compute the total flow and its cost int64_t TotalCost = 0; @@ -79,6 +110,7 @@ public: << " iterations with " << TotalFlow << " total flow" << " of " << TotalCost << " cost\n"); (void)TotalFlow; + (void)AugmentationIters; return TotalCost; } @@ -134,20 +166,61 @@ public: return Flow; } - /// A cost of increasing a block's count by one. - static constexpr int64_t AuxCostInc = 10; - /// A cost of decreasing a block's count by one. - static constexpr int64_t AuxCostDec = 20; - /// A cost of increasing a count of zero-weight block by one. - static constexpr int64_t AuxCostIncZero = 11; - /// A cost of increasing the entry block's count by one. - static constexpr int64_t AuxCostIncEntry = 40; - /// A cost of decreasing the entry block's count by one. - static constexpr int64_t AuxCostDecEntry = 10; /// A cost of taking an unlikely jump. static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30; + /// Minimum BaseDistance for the jump distance values in island joining. + static constexpr uint64_t MinBaseDistance = 10000; private: + /// Iteratively find an augmentation path/dag in the network and send the + /// flow along its edges. The method returns the number of applied iterations. + size_t applyFlowAugmentation() { + size_t AugmentationIters = 0; + while (findAugmentingPath()) { + uint64_t PathCapacity = computeAugmentingPathCapacity(); + while (PathCapacity > 0) { + bool Progress = false; + if (SampleProfileEvenCountDistribution) { + // Identify node/edge candidates for augmentation + identifyShortestEdges(PathCapacity); + + // Find an augmenting DAG + auto AugmentingOrder = findAugmentingDAG(); + + // Apply the DAG augmentation + Progress = augmentFlowAlongDAG(AugmentingOrder); + PathCapacity = computeAugmentingPathCapacity(); + } + + if (!Progress) { + augmentFlowAlongPath(PathCapacity); + PathCapacity = 0; + } + + AugmentationIters++; + } + } + return AugmentationIters; + } + + /// Compute the capacity of the cannonical augmenting path. If the path is + /// saturated (that is, no flow can be sent along the path), then return 0. + uint64_t computeAugmentingPathCapacity() { + uint64_t PathCapacity = INF; + uint64_t Now = Target; + while (Now != Source) { + uint64_t Pred = Nodes[Now].ParentNode; + auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; + + assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow"); + uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow); + PathCapacity = std::min(PathCapacity, EdgeCapacity); + + Now = Pred; + } + return PathCapacity; + } + /// Check for existence of an augmenting path with a positive capacity. bool findAugmentingPath() { // Initialize data structures @@ -180,7 +253,7 @@ private: // from Source to Target; it follows from inequalities // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] // >= Dist[Source, V] - if (Nodes[Target].Distance == 0) + if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0) break; if (Nodes[Src].Distance > Nodes[Target].Distance) continue; @@ -210,21 +283,9 @@ private: } /// Update the current flow along the augmenting path. - void augmentFlowAlongPath() { - // Find path capacity - int64_t PathCapacity = INF; - uint64_t Now = Target; - while (Now != Source) { - uint64_t Pred = Nodes[Now].ParentNode; - auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; - PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow); - Now = Pred; - } - + void augmentFlowAlongPath(uint64_t PathCapacity) { assert(PathCapacity > 0 && "found an incorrect augmenting path"); - - // Update the flow along the path - Now = Target; + uint64_t Now = Target; while (Now != Source) { uint64_t Pred = Nodes[Now].ParentNode; auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; @@ -237,6 +298,220 @@ private: } } + /// Find an Augmenting DAG order using a modified version of DFS in which we + /// can visit a node multiple times. In the DFS search, when scanning each + /// edge out of a node, continue search at Edge.Dst endpoint if it has not + /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm + /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time. + /// It returns an Augmenting Order (Taken nodes in decreasing Finish time) + /// that starts with Source and ends with Target. + std::vector findAugmentingDAG() { + // We use a stack based implemenation of DFS to avoid recursion. + // Defining DFS data structures: + // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that + // - we are currently visiting Nodes[NodeIdx] and + // - the next edge to scan is Edges[NodeIdx][EdgeIdx] + typedef std::pair StackItemType; + std::stack Stack; + std::vector AugmentingOrder; + + // Phase 0: Initialize Node attributes and Time for DFS run + for (auto &Node : Nodes) { + Node.Discovery = 0; + Node.Finish = 0; + Node.NumCalls = 0; + Node.Taken = false; + } + uint64_t Time = 0; + // Mark Target as Taken + // Taken attribute will be propagated backwards from Target towards Source + Nodes[Target].Taken = true; + + // Phase 1: Start DFS traversal from Source + Stack.emplace(Source, 0); + Nodes[Source].Discovery = ++Time; + while (!Stack.empty()) { + auto NodeIdx = Stack.top().first; + auto EdgeIdx = Stack.top().second; + + // If we haven't scanned all edges out of NodeIdx, continue scanning + if (EdgeIdx < Edges[NodeIdx].size()) { + auto &Edge = Edges[NodeIdx][EdgeIdx]; + auto &Dst = Nodes[Edge.Dst]; + Stack.top().second++; + + if (Edge.OnShortestPath) { + // If we haven't seen Edge.Dst so far, continue DFS search there + if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) { + Dst.Discovery = ++Time; + Stack.emplace(Edge.Dst, 0); + Dst.NumCalls++; + } else if (Dst.Taken && Dst.Finish != 0) { + // Else, if Edge.Dst already have a path to Target, so that NodeIdx + Nodes[NodeIdx].Taken = true; + } + } + } else { + // If we are done scanning all edge out of NodeIdx + Stack.pop(); + // If we haven't found a path from NodeIdx to Target, forget about it + if (!Nodes[NodeIdx].Taken) { + Nodes[NodeIdx].Discovery = 0; + } else { + // If we have found a path from NodeIdx to Target, then finish NodeIdx + // and propagate Taken flag to DFS parent unless at the Source + Nodes[NodeIdx].Finish = ++Time; + // NodeIdx == Source if and only if the stack is empty + if (NodeIdx != Source) { + assert(!Stack.empty() && "empty stack while running dfs"); + Nodes[Stack.top().first].Taken = true; + } + AugmentingOrder.push_back(NodeIdx); + } + } + } + // Nodes are collected decreasing Finish time, so the order is reversed + std::reverse(AugmentingOrder.begin(), AugmentingOrder.end()); + + // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges + for (size_t Src : AugmentingOrder) { + AugmentingEdges[Src].clear(); + for (auto &Edge : Edges[Src]) { + uint64_t Dst = Edge.Dst; + if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken && + Nodes[Dst].Finish < Nodes[Src].Finish) { + AugmentingEdges[Src].push_back(&Edge); + } + } + assert((Src == Target || !AugmentingEdges[Src].empty()) && + "incorrectly constructed augmenting edges"); + } + + return AugmentingOrder; + } + + /// Update the current flow along the given (acyclic) subgraph specified by + /// the vertex order, AugmentingOrder. The objective is to send as much flow + /// as possible while evenly distributing flow among successors of each node. + /// After the update at least one edge is saturated. + bool augmentFlowAlongDAG(const std::vector &AugmentingOrder) { + // Phase 0: Initialization + for (uint64_t Src : AugmentingOrder) { + Nodes[Src].FracFlow = 0; + Nodes[Src].IntFlow = 0; + for (auto &Edge : AugmentingEdges[Src]) { + Edge->AugmentedFlow = 0; + } + } + + // Phase 1: Send a unit of fractional flow along the DAG + uint64_t MaxFlowAmount = INF; + Nodes[Source].FracFlow = 1.0; + for (uint64_t Src : AugmentingOrder) { + assert((Src == Target || Nodes[Src].FracFlow > 0.0) && + "incorrectly computed fractional flow"); + // Distribute flow evenly among successors of Src + uint64_t Degree = AugmentingEdges[Src].size(); + for (auto &Edge : AugmentingEdges[Src]) { + double EdgeFlow = Nodes[Src].FracFlow / Degree; + Nodes[Edge->Dst].FracFlow += EdgeFlow; + if (Edge->Capacity == INF) + continue; + uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow; + MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow); + } + } + // Stop early if we cannot send any (integral) flow from Source to Target + if (MaxFlowAmount == 0) + return false; + + // Phase 2: Send an integral flow of MaxFlowAmount + Nodes[Source].IntFlow = MaxFlowAmount; + for (uint64_t Src : AugmentingOrder) { + if (Src == Target) + break; + // Distribute flow evenly among successors of Src, rounding up to make + // sure all flow is sent + uint64_t Degree = AugmentingEdges[Src].size(); + // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree + uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree; + for (auto &Edge : AugmentingEdges[Src]) { + uint64_t Dst = Edge->Dst; + uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow); + EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow)); + Nodes[Dst].IntFlow += EdgeFlow; + Nodes[Src].IntFlow -= EdgeFlow; + Edge->AugmentedFlow += EdgeFlow; + } + } + assert(Nodes[Target].IntFlow <= MaxFlowAmount); + Nodes[Target].IntFlow = 0; + + // Phase 3: Send excess flow back traversing the nodes backwards. + // Because of rounding, not all flow can be sent along the edges of Src. + // Hence, sending the remaining flow back to maintain flow conservation + for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) { + uint64_t Src = AugmentingOrder[Idx - 1]; + // Try to send excess flow back along each edge. + // Make sure we only send back flow we just augmented (AugmentedFlow). + for (auto &Edge : AugmentingEdges[Src]) { + uint64_t Dst = Edge->Dst; + if (Nodes[Dst].IntFlow == 0) + continue; + uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow); + Nodes[Dst].IntFlow -= EdgeFlow; + Nodes[Src].IntFlow += EdgeFlow; + Edge->AugmentedFlow -= EdgeFlow; + } + } + + // Phase 4: Update flow values along all edges + bool HasSaturatedEdges = false; + for (uint64_t Src : AugmentingOrder) { + // Verify that we have sent all the excess flow from the node + assert(Src == Source || Nodes[Src].IntFlow == 0); + for (auto &Edge : AugmentingEdges[Src]) { + assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow); + // Update flow values along the edge and its reverse copy + auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex]; + Edge->Flow += Edge->AugmentedFlow; + RevEdge.Flow -= Edge->AugmentedFlow; + if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0) + HasSaturatedEdges = true; + } + } + + // The augmentation is successful iff at least one edge becomes saturated + return HasSaturatedEdges; + } + + /// Identify candidate (shortest) edges for augmentation. + void identifyShortestEdges(uint64_t PathCapacity) { + assert(PathCapacity > 0 && "found an incorrect augmenting DAG"); + // To make sure the augmentation DAG contains only edges with large residual + // capacity, we prune all edges whose capacity is below a fraction of + // the capacity of the augmented path. + // (All edges of the path itself are always in the DAG) + uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1)); + + // Decide which edges are on a shortest path from Source to Target + for (size_t Src = 0; Src < Nodes.size(); Src++) { + // An edge cannot be augmenting if the endpoint has large distance + if (Nodes[Src].Distance > Nodes[Target].Distance) + continue; + + for (auto &Edge : Edges[Src]) { + uint64_t Dst = Edge.Dst; + Edge.OnShortestPath = + Src != Target && Dst != Source && + Nodes[Dst].Distance <= Nodes[Target].Distance && + Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost && + Edge.Capacity > Edge.Flow && + uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity; + } + } + } + /// A node in a flow network. struct Node { /// The cost of the cheapest path from the source to the current node. @@ -247,7 +522,20 @@ private: uint64_t ParentEdgeIndex; /// An indicator of whether the current node is in a queue. bool Taken; + + /// Data fields utilized in DAG-augmentation: + /// Fractional flow. + double FracFlow; + /// Integral flow. + uint64_t IntFlow; + /// Discovery time. + uint64_t Discovery; + /// Finish time. + uint64_t Finish; + /// NumCalls. + uint64_t NumCalls; }; + /// An edge in a flow network. struct Edge { /// The cost of the edge. @@ -260,6 +548,12 @@ private: uint64_t Dst; /// The index of the reverse edge between Dst and the current node. uint64_t RevEdgeIndex; + + /// Data fields utilized in DAG-augmentation: + /// Whether the edge is currently on a shortest path from Source to Target. + bool OnShortestPath; + /// Extra flow along the edge. + uint64_t AugmentedFlow; }; /// The set of network nodes. @@ -270,8 +564,13 @@ private: uint64_t Source; /// Target (sink) node of the flow. uint64_t Target; + /// Augmenting edges. + std::vector> AugmentingEdges; }; +constexpr int64_t MinCostMaxFlow::AuxCostUnlikely; +constexpr uint64_t MinCostMaxFlow::MinBaseDistance; + /// A post-processing adjustment of control flow. It applies two steps by /// rerouting some flow and making it more realistic: /// @@ -433,19 +732,22 @@ private: /// A distance of a path for a given jump. /// In order to incite the path to use blocks/jumps with large positive flow, /// and avoid changing branch probability of outgoing edges drastically, - /// set the distance as follows: - /// if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0) - /// if Block.Weight > 0, then distance = 1 - /// otherwise distance >> 1 + /// set the jump distance so as: + /// - to minimize the number of unlikely jumps used and subject to that, + /// - to minimize the number of Flow == 0 jumps used and subject to that, + /// - minimizes total multiplicative Flow increase for the remaining edges. + /// To capture this objective with integer distances, we round off fractional + /// parts to a multiple of 1 / BaseDistance. int64_t jumpDistance(FlowJump *Jump) const { - int64_t BaseDistance = 100; + uint64_t BaseDistance = + std::max(static_cast(MinCostMaxFlow::MinBaseDistance), + std::min(Func.Blocks[Func.Entry].Flow, + MinCostMaxFlow::AuxCostUnlikely / NumBlocks())); if (Jump->IsUnlikely) return MinCostMaxFlow::AuxCostUnlikely; if (Jump->Flow > 0) - return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0); - if (Func.Blocks[Jump->Target].Weight > 0) - return BaseDistance; - return BaseDistance * (NumBlocks() + 1); + return BaseDistance + BaseDistance / Jump->Flow; + return BaseDistance * NumBlocks(); }; uint64_t NumBlocks() const { return Func.Blocks.size(); } @@ -511,7 +813,7 @@ private: std::vector &KnownDstBlocks, std::vector &UnknownBlocks) { // Run BFS from SrcBlock and make sure all paths are going through unknown - // blocks and end at a non-unknown DstBlock + // blocks and end at a known DstBlock auto Visited = BitVector(NumBlocks(), false); std::queue Queue; @@ -778,8 +1080,8 @@ void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { // We assume that decreasing block counts is more expensive than increasing, // and thus, setting separate costs here. In the future we may want to tune // the relative costs so as to maximize the quality of generated profiles. - int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc; - int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec; + int64_t AuxCostInc = SampleProfileProfiCostInc; + int64_t AuxCostDec = SampleProfileProfiCostDec; if (Block.UnknownWeight) { // Do not penalize changing weights of blocks w/o known profile count AuxCostInc = 0; @@ -788,12 +1090,12 @@ void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { // Increasing the count for "cold" blocks with zero initial count is more // expensive than for "hot" ones if (Block.Weight == 0) { - AuxCostInc = MinCostMaxFlow::AuxCostIncZero; + AuxCostInc = SampleProfileProfiCostIncZero; } // Modifying the count of the entry block is expensive if (Block.isEntry()) { - AuxCostInc = MinCostMaxFlow::AuxCostIncEntry; - AuxCostDec = MinCostMaxFlow::AuxCostDecEntry; + AuxCostInc = SampleProfileProfiCostIncEntry; + AuxCostDec = SampleProfileProfiCostDecEntry; } } // For blocks with self-edges, do not penalize a reduction of the count, diff --git a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp index ea0e8343eb88..a2588b8cec7d 100644 --- a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp @@ -11,6 +11,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" namespace llvm { @@ -35,9 +39,13 @@ cl::opt NoWarnSampleUnused( "samples but without debug information to use those samples. ")); cl::opt SampleProfileUseProfi( - "sample-profile-use-profi", cl::init(false), cl::Hidden, cl::ZeroOrMore, + "sample-profile-use-profi", cl::Hidden, cl::desc("Use profi to infer block and edge counts.")); +cl::opt SampleProfileInferEntryCount( + "sample-profile-infer-entry-count", cl::init(true), cl::Hidden, + cl::desc("Use profi to infer function entry count.")); + namespace sampleprofutil { /// Return true if the given callsite is hot wrt to hot cutoff threshold. diff --git a/llvm/lib/Transforms/Utils/SanitizerStats.cpp b/llvm/lib/Transforms/Utils/SanitizerStats.cpp index a1313c77ed77..fd21ee4cc408 100644 --- a/llvm/lib/Transforms/Utils/SanitizerStats.cpp +++ b/llvm/lib/Transforms/Utils/SanitizerStats.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SanitizerStats.h" -#include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 5363a851fc27..401f1ee5a55d 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -22,11 +22,8 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -276,7 +273,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, } // If we haven't found this binop, insert it. - Instruction *BO = cast(Builder.CreateBinOp(Opcode, LHS, RHS)); + // TODO: Use the Builder, which will make CreateBinOp below fold with + // InstSimplifyFolder. + Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS)); BO->setDebugLoc(Loc); if (Flags & SCEV::FlagNUW) BO->setHasNoUnsignedWrap(); @@ -591,7 +590,9 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, if (isa(IP)) ScanLimit++; if (IP->getOpcode() == Instruction::GetElementPtr && - IP->getOperand(0) == V && IP->getOperand(1) == Idx) + IP->getOperand(0) == V && IP->getOperand(1) == Idx && + cast(&*IP)->getSourceElementType() == + Type::getInt8Ty(Ty->getContext())) return &*IP; if (IP == BlockBegin) break; } @@ -1633,7 +1634,6 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { NewS = Ext; const SCEV *V = cast(NewS)->evaluateAtIteration(IH, SE); - //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; // Truncate the result down to the original type, if needed. const SCEV *T = SE.getTruncateOrNoop(V, Ty); @@ -1671,154 +1671,49 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { return Builder.CreateSExt(V, Ty); } -Value *SCEVExpander::expandSMaxExpr(const SCEVNAryExpr *S) { - Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); - Type *Ty = LHS->getType(); - for (int i = S->getNumOperands()-2; i >= 0; --i) { - // In the case of mixed integer and pointer types, do the - // rest of the comparisons as integer. - Type *OpTy = S->getOperand(i)->getType(); - if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { - Ty = SE.getEffectiveSCEVType(Ty); - LHS = InsertNoopCastOfTo(LHS, Ty); - } - Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); - Value *Sel; - if (Ty->isIntegerTy()) - Sel = Builder.CreateIntrinsic(Intrinsic::smax, {Ty}, {LHS, RHS}, - /*FMFSource=*/nullptr, "smax"); - else { - Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); - Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); - } - LHS = Sel; - } - // In the case of mixed integer and pointer types, cast the - // final result back to the pointer type. - if (LHS->getType() != S->getType()) - LHS = InsertNoopCastOfTo(LHS, S->getType()); - return LHS; -} - -Value *SCEVExpander::expandUMaxExpr(const SCEVNAryExpr *S) { - Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); - Type *Ty = LHS->getType(); - for (int i = S->getNumOperands()-2; i >= 0; --i) { - // In the case of mixed integer and pointer types, do the - // rest of the comparisons as integer. - Type *OpTy = S->getOperand(i)->getType(); - if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { - Ty = SE.getEffectiveSCEVType(Ty); - LHS = InsertNoopCastOfTo(LHS, Ty); - } - Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); - Value *Sel; - if (Ty->isIntegerTy()) - Sel = Builder.CreateIntrinsic(Intrinsic::umax, {Ty}, {LHS, RHS}, - /*FMFSource=*/nullptr, "umax"); - else { - Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); - Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); - } - LHS = Sel; - } - // In the case of mixed integer and pointer types, cast the - // final result back to the pointer type. - if (LHS->getType() != S->getType()) - LHS = InsertNoopCastOfTo(LHS, S->getType()); - return LHS; -} - -Value *SCEVExpander::expandSMinExpr(const SCEVNAryExpr *S) { - Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); - Type *Ty = LHS->getType(); - for (int i = S->getNumOperands() - 2; i >= 0; --i) { - // In the case of mixed integer and pointer types, do the - // rest of the comparisons as integer. - Type *OpTy = S->getOperand(i)->getType(); - if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { - Ty = SE.getEffectiveSCEVType(Ty); - LHS = InsertNoopCastOfTo(LHS, Ty); - } - Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); - Value *Sel; - if (Ty->isIntegerTy()) - Sel = Builder.CreateIntrinsic(Intrinsic::smin, {Ty}, {LHS, RHS}, - /*FMFSource=*/nullptr, "smin"); - else { - Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); - Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); - } - LHS = Sel; - } - // In the case of mixed integer and pointer types, cast the - // final result back to the pointer type. - if (LHS->getType() != S->getType()) - LHS = InsertNoopCastOfTo(LHS, S->getType()); - return LHS; -} - -Value *SCEVExpander::expandUMinExpr(const SCEVNAryExpr *S) { +Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S, + Intrinsic::ID IntrinID, Twine Name, + bool IsSequential) { Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); Type *Ty = LHS->getType(); + if (IsSequential) + LHS = Builder.CreateFreeze(LHS); for (int i = S->getNumOperands() - 2; i >= 0; --i) { - // In the case of mixed integer and pointer types, do the - // rest of the comparisons as integer. - Type *OpTy = S->getOperand(i)->getType(); - if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { - Ty = SE.getEffectiveSCEVType(Ty); - LHS = InsertNoopCastOfTo(LHS, Ty); - } Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); + if (IsSequential && i != 0) + RHS = Builder.CreateFreeze(RHS); Value *Sel; if (Ty->isIntegerTy()) - Sel = Builder.CreateIntrinsic(Intrinsic::umin, {Ty}, {LHS, RHS}, - /*FMFSource=*/nullptr, "umin"); + Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS}, + /*FMFSource=*/nullptr, Name); else { - Value *ICmp = Builder.CreateICmpULT(LHS, RHS); - Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); + Value *ICmp = + Builder.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID), LHS, RHS); + Sel = Builder.CreateSelect(ICmp, LHS, RHS, Name); } LHS = Sel; } - // In the case of mixed integer and pointer types, cast the - // final result back to the pointer type. - if (LHS->getType() != S->getType()) - LHS = InsertNoopCastOfTo(LHS, S->getType()); return LHS; } Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { - return expandSMaxExpr(S); + return expandMinMaxExpr(S, Intrinsic::smax, "smax"); } Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { - return expandUMaxExpr(S); + return expandMinMaxExpr(S, Intrinsic::umax, "umax"); } Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { - return expandSMinExpr(S); + return expandMinMaxExpr(S, Intrinsic::smin, "smin"); } Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { - return expandUMinExpr(S); + return expandMinMaxExpr(S, Intrinsic::umin, "umin"); } Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) { - SmallVector Ops; - for (const SCEV *Op : S->operands()) - Ops.emplace_back(expand(Op)); - - Value *SaturationPoint = - MinMaxIntrinsic::getSaturationPoint(Intrinsic::umin, S->getType()); - - SmallVector OpIsZero; - for (Value *Op : ArrayRef(Ops).drop_back()) - OpIsZero.emplace_back(Builder.CreateICmpEQ(Op, SaturationPoint)); - - Value *AnyOpIsZero = Builder.CreateLogicalOr(OpIsZero); - - Value *NaiveUMin = expandUMinExpr(S); - return Builder.CreateSelect(AnyOpIsZero, SaturationPoint, NaiveUMin); + return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true); } Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, @@ -1868,35 +1763,33 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) { return V; } -ScalarEvolution::ValueOffsetPair -SCEVExpander::FindValueInExprValueMap(const SCEV *S, - const Instruction *InsertPt) { - auto *Set = SE.getSCEVValues(S); +Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S, + const Instruction *InsertPt) { // If the expansion is not in CanonicalMode, and the SCEV contains any // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. - if (CanonicalMode || !SE.containsAddRecurrence(S)) { - // If S is scConstant, it may be worse to reuse an existing Value. - if (S->getSCEVType() != scConstant && Set) { - // Choose a Value from the set which dominates the InsertPt. - // InsertPt should be inside the Value's parent loop so as not to break - // the LCSSA form. - for (auto const &VOPair : *Set) { - Value *V = VOPair.first; - ConstantInt *Offset = VOPair.second; - Instruction *EntInst = dyn_cast_or_null(V); - if (!EntInst) - continue; + if (!CanonicalMode && SE.containsAddRecurrence(S)) + return nullptr; - assert(EntInst->getFunction() == InsertPt->getFunction()); - if (S->getType() == V->getType() && - SE.DT.dominates(EntInst, InsertPt) && - (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || - SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) - return {V, Offset}; - } - } + // If S is a constant, it may be worse to reuse an existing Value. + if (isa(S)) + return nullptr; + + // Choose a Value from the set which dominates the InsertPt. + // InsertPt should be inside the Value's parent loop so as not to break + // the LCSSA form. + for (Value *V : SE.getSCEVValues(S)) { + Instruction *EntInst = dyn_cast(V); + if (!EntInst) + continue; + + assert(EntInst->getFunction() == InsertPt->getFunction()); + if (S->getType() == V->getType() && + SE.DT.dominates(EntInst, InsertPt) && + (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || + SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) + return V; } - return {nullptr, nullptr}; + return nullptr; } // The expansion of SCEV will either reuse a previous Value in ExprValueMap, @@ -1965,9 +1858,7 @@ Value *SCEVExpander::expand(const SCEV *S) { Builder.SetInsertPoint(InsertPt); // Expand the expression into instructions. - ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt); - Value *V = VO.first; - + Value *V = FindValueInExprValueMap(S, InsertPt); if (!V) V = visit(S); else { @@ -1978,21 +1869,6 @@ Value *SCEVExpander::expand(const SCEV *S) { if (auto *I = dyn_cast(V)) if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)) I->dropPoisonGeneratingFlags(); - - if (VO.second) { - if (PointerType *Vty = dyn_cast(V->getType())) { - int64_t Offset = VO.second->getSExtValue(); - ConstantInt *Idx = - ConstantInt::getSigned(VO.second->getType(), -Offset); - unsigned AS = Vty->getAddressSpace(); - V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS)); - V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx, - "uglygep"); - V = Builder.CreateBitCast(V, Vty); - } else { - V = Builder.CreateSub(V, VO.second); - } - } } // Remember the expanded value for this SCEV at this location. // @@ -2058,7 +1934,7 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // so narrow phis can reuse them. for (PHINode *Phi : Phis) { auto SimplifyPHINode = [&](PHINode *PN) -> Value * { - if (Value *V = SimplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC})) + if (Value *V = simplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC})) return V; if (!SE.isSCEVable(PN->getType())) return nullptr; @@ -2174,9 +2050,9 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, return NumElim; } -Optional -SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, - Loop *L) { +Value *SCEVExpander::getRelatedExistingExpansion(const SCEV *S, + const Instruction *At, + Loop *L) { using namespace llvm::PatternMatch; SmallVector ExitingBlocks; @@ -2193,25 +2069,17 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, continue; if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) - return ScalarEvolution::ValueOffsetPair(LHS, nullptr); + return LHS; if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) - return ScalarEvolution::ValueOffsetPair(RHS, nullptr); + return RHS; } // Use expand's logic which is used for reusing a previous Value in // ExprValueMap. Note that we don't currently model the cost of // needing to drop poison generating flags on the instruction if we // want to reuse it. We effectively assume that has zero cost. - ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At); - if (VO.first) - return VO; - - // There is potential to make this significantly smarter, but this simple - // heuristic already gets some interesting cases. - - // Can not find suitable value. - return None; + return FindValueInExprValueMap(S, At); } template static InstructionCost costAndCollectOperands( @@ -2469,8 +2337,8 @@ Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, switch (Pred->getKind()) { case SCEVPredicate::P_Union: return expandUnionPredicate(cast(Pred), IP); - case SCEVPredicate::P_Equal: - return expandEqualPredicate(cast(Pred), IP); + case SCEVPredicate::P_Compare: + return expandComparePredicate(cast(Pred), IP); case SCEVPredicate::P_Wrap: { auto *AddRecPred = cast(Pred); return expandWrapPredicate(AddRecPred, IP); @@ -2479,15 +2347,16 @@ Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, llvm_unreachable("Unknown SCEV predicate type"); } -Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred, - Instruction *IP) { +Value *SCEVExpander::expandComparePredicate(const SCEVComparePredicate *Pred, + Instruction *IP) { Value *Expr0 = expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP, false); Value *Expr1 = expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP, false); Builder.SetInsertPoint(IP); - auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check"); + auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate()); + auto *I = Builder.CreateICmp(InvPred, Expr0, Expr1, "ident.check"); return I; } @@ -2496,7 +2365,8 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, assert(AR->isAffine() && "Cannot generate RT check for " "non-affine expression"); - SCEVUnionPredicate Pred; + // FIXME: It is highly suspicious that we're ignoring the predicates here. + SmallVector Pred; const SCEV *ExitCount = SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred); @@ -2710,10 +2580,10 @@ namespace { struct SCEVFindUnsafe { ScalarEvolution &SE; bool CanonicalMode; - bool IsUnsafe; + bool IsUnsafe = false; SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode) - : SE(SE), CanonicalMode(CanonicalMode), IsUnsafe(false) {} + : SE(SE), CanonicalMode(CanonicalMode) {} bool follow(const SCEV *S) { if (const SCEVUDivExpr *D = dyn_cast(S)) { diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 335ac03ccb52..567b866f7777 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -27,7 +27,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemorySSA.h" @@ -50,7 +50,6 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" @@ -58,7 +57,6 @@ #include "llvm/IR/NoFolder.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/IR/PseudoProbe.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" @@ -74,7 +72,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -94,8 +91,8 @@ using namespace PatternMatch; #define DEBUG_TYPE "simplifycfg" cl::opt llvm::RequireAndPreserveDomTree( - "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::ZeroOrMore, - cl::init(false), + "simplifycfg-require-and-preserve-domtree", cl::Hidden, + cl::desc("Temorary development switch used to gradually uplift SimplifyCFG " "into preserving DomTree,")); @@ -167,6 +164,14 @@ static cl::opt BranchFoldToCommonDestVectorMultiplier( "to fold branch to common destination when vector operations are " "present")); +static cl::opt EnableMergeCompatibleInvokes( + "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), + cl::desc("Allow SimplifyCFG to merge invokes together when appropriate")); + +static cl::opt MaxSwitchCasesPerResult( + "max-switch-cases-per-result", cl::Hidden, cl::init(16), + cl::desc("Limit cases to analyze when converting a switch to select")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -192,6 +197,8 @@ STATISTIC(NumSinkCommonInstrs, STATISTIC(NumSpeculations, "Number of speculative executed instructions"); STATISTIC(NumInvokes, "Number of invokes with empty resume blocks simplified into calls"); +STATISTIC(NumInvokesMerged, "Number of invokes that were merged together"); +STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed"); namespace { @@ -291,6 +298,34 @@ public: } // end anonymous namespace +/// Return true if all the PHI nodes in the basic block \p BB +/// receive compatible (identical) incoming values when coming from +/// all of the predecessor blocks that are specified in \p IncomingBlocks. +/// +/// Note that if the values aren't exactly identical, but \p EquivalenceSet +/// is provided, and *both* of the values are present in the set, +/// then they are considered equal. +static bool IncomingValuesAreCompatible( + BasicBlock *BB, ArrayRef IncomingBlocks, + SmallPtrSetImpl *EquivalenceSet = nullptr) { + assert(IncomingBlocks.size() == 2 && + "Only for a pair of incoming blocks at the time!"); + + // FIXME: it is okay if one of the incoming values is an `undef` value, + // iff the other incoming value is guaranteed to be a non-poison value. + // FIXME: it is okay if one of the incoming values is a `poison` value. + return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) { + Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]); + Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]); + if (IV0 == IV1) + return true; + if (EquivalenceSet && EquivalenceSet->contains(IV0) && + EquivalenceSet->contains(IV1)) + return true; + return false; + }); +} + /// Return true if it is safe to merge these two /// terminator instructions together. static bool @@ -307,17 +342,17 @@ SafeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallPtrSet SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); bool Fail = false; - for (BasicBlock *Succ : successors(SI2BB)) - if (SI1Succs.count(Succ)) - for (BasicBlock::iterator BBI = Succ->begin(); isa(BBI); ++BBI) { - PHINode *PN = cast(BBI); - if (PN->getIncomingValueForBlock(SI1BB) != - PN->getIncomingValueForBlock(SI2BB)) { - if (FailBlocks) - FailBlocks->insert(Succ); - Fail = true; - } - } + for (BasicBlock *Succ : successors(SI2BB)) { + if (!SI1Succs.count(Succ)) + continue; + if (IncomingValuesAreCompatible(Succ, {SI1BB, SI2BB})) + continue; + Fail = true; + if (FailBlocks) + FailBlocks->insert(Succ); + else + break; + } return !Fail; } @@ -347,6 +382,13 @@ static InstructionCost computeSpeculationCost(const User *I, return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); } +/// Check whether this is a potentially trapping constant. +static bool canTrap(const Value *V) { + if (auto *C = dyn_cast(V)) + return C->canTrap(); + return false; +} + /// If we have a merge point of an "if condition" as accepted above, /// return true if the specified value dominates the block. We /// don't handle the true generality of domination here, just a special case @@ -381,10 +423,7 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, if (!I) { // Non-instructions all dominate instructions, but not all constantexprs // can be executed unconditionally. - if (ConstantExpr *C = dyn_cast(V)) - if (C->canTrap()) - return false; - return true; + return !canTrap(V); } BasicBlock *PBB = I->getParent(); @@ -1459,7 +1498,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, return false; if (!I1NonDbg->isTerminator()) return false; - // Now we know that we only need to hoist debug instrinsics and the + // Now we know that we only need to hoist debug intrinsics and the // terminator. Let the loop below handle those 2 cases. } @@ -2212,6 +2251,320 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, return Changed; } +namespace { + +struct CompatibleSets { + using SetTy = SmallVector; + + SmallVector Sets; + + static bool shouldBelongToSameSet(ArrayRef Invokes); + + SetTy &getCompatibleSet(InvokeInst *II); + + void insert(InvokeInst *II); +}; + +CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) { + // Perform a linear scan over all the existing sets, see if the new `invoke` + // is compatible with any particular set. Since we know that all the `invokes` + // within a set are compatible, only check the first `invoke` in each set. + // WARNING: at worst, this has quadratic complexity. + for (CompatibleSets::SetTy &Set : Sets) { + if (CompatibleSets::shouldBelongToSameSet({Set.front(), II})) + return Set; + } + + // Otherwise, we either had no sets yet, or this invoke forms a new set. + return Sets.emplace_back(); +} + +void CompatibleSets::insert(InvokeInst *II) { + getCompatibleSet(II).emplace_back(II); +} + +bool CompatibleSets::shouldBelongToSameSet(ArrayRef Invokes) { + assert(Invokes.size() == 2 && "Always called with exactly two candidates."); + + // Can we theoretically merge these `invoke`s? + auto IsIllegalToMerge = [](InvokeInst *II) { + return II->cannotMerge() || II->isInlineAsm(); + }; + if (any_of(Invokes, IsIllegalToMerge)) + return false; + + // Either both `invoke`s must be direct, + // or both `invoke`s must be indirect. + auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); }; + bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall); + bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall); + if (HaveIndirectCalls) { + if (!AllCallsAreIndirect) + return false; + } else { + // All callees must be identical. + Value *Callee = nullptr; + for (InvokeInst *II : Invokes) { + Value *CurrCallee = II->getCalledOperand(); + assert(CurrCallee && "There is always a called operand."); + if (!Callee) + Callee = CurrCallee; + else if (Callee != CurrCallee) + return false; + } + } + + // Either both `invoke`s must not have a normal destination, + // or both `invoke`s must have a normal destination, + auto HasNormalDest = [](InvokeInst *II) { + return !isa(II->getNormalDest()->getFirstNonPHIOrDbg()); + }; + if (any_of(Invokes, HasNormalDest)) { + // Do not merge `invoke` that does not have a normal destination with one + // that does have a normal destination, even though doing so would be legal. + if (!all_of(Invokes, HasNormalDest)) + return false; + + // All normal destinations must be identical. + BasicBlock *NormalBB = nullptr; + for (InvokeInst *II : Invokes) { + BasicBlock *CurrNormalBB = II->getNormalDest(); + assert(CurrNormalBB && "There is always a 'continue to' basic block."); + if (!NormalBB) + NormalBB = CurrNormalBB; + else if (NormalBB != CurrNormalBB) + return false; + } + + // In the normal destination, the incoming values for these two `invoke`s + // must be compatible. + SmallPtrSet EquivalenceSet(Invokes.begin(), Invokes.end()); + if (!IncomingValuesAreCompatible( + NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()}, + &EquivalenceSet)) + return false; + } + +#ifndef NDEBUG + // All unwind destinations must be identical. + // We know that because we have started from said unwind destination. + BasicBlock *UnwindBB = nullptr; + for (InvokeInst *II : Invokes) { + BasicBlock *CurrUnwindBB = II->getUnwindDest(); + assert(CurrUnwindBB && "There is always an 'unwind to' basic block."); + if (!UnwindBB) + UnwindBB = CurrUnwindBB; + else + assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination."); + } +#endif + + // In the unwind destination, the incoming values for these two `invoke`s + // must be compatible. + if (!IncomingValuesAreCompatible( + Invokes.front()->getUnwindDest(), + {Invokes[0]->getParent(), Invokes[1]->getParent()})) + return false; + + // Ignoring arguments, these `invoke`s must be identical, + // including operand bundles. + const InvokeInst *II0 = Invokes.front(); + for (auto *II : Invokes.drop_front()) + if (!II->isSameOperationAs(II0)) + return false; + + // Can we theoretically form the data operands for the merged `invoke`? + auto IsIllegalToMergeArguments = [](auto Ops) { + Type *Ty = std::get<0>(Ops)->getType(); + assert(Ty == std::get<1>(Ops)->getType() && "Incompatible types?"); + return Ty->isTokenTy() && std::get<0>(Ops) != std::get<1>(Ops); + }; + assert(Invokes.size() == 2 && "Always called with exactly two candidates."); + if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()), + IsIllegalToMergeArguments)) + return false; + + return true; +} + +} // namespace + +// Merge all invokes in the provided set, all of which are compatible +// as per the `CompatibleSets::shouldBelongToSameSet()`. +static void MergeCompatibleInvokesImpl(ArrayRef Invokes, + DomTreeUpdater *DTU) { + assert(Invokes.size() >= 2 && "Must have at least two invokes to merge."); + + SmallVector Updates; + if (DTU) + Updates.reserve(2 + 3 * Invokes.size()); + + bool HasNormalDest = + !isa(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg()); + + // Clone one of the invokes into a new basic block. + // Since they are all compatible, it doesn't matter which invoke is cloned. + InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() { + InvokeInst *II0 = Invokes.front(); + BasicBlock *II0BB = II0->getParent(); + BasicBlock *InsertBeforeBlock = + II0->getParent()->getIterator()->getNextNode(); + Function *Func = II0BB->getParent(); + LLVMContext &Ctx = II0->getContext(); + + BasicBlock *MergedInvokeBB = BasicBlock::Create( + Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock); + + auto *MergedInvoke = cast(II0->clone()); + // NOTE: all invokes have the same attributes, so no handling needed. + MergedInvokeBB->getInstList().push_back(MergedInvoke); + + if (!HasNormalDest) { + // This set does not have a normal destination, + // so just form a new block with unreachable terminator. + BasicBlock *MergedNormalDest = BasicBlock::Create( + Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock); + new UnreachableInst(Ctx, MergedNormalDest); + MergedInvoke->setNormalDest(MergedNormalDest); + } + + // The unwind destination, however, remainds identical for all invokes here. + + return MergedInvoke; + }(); + + if (DTU) { + // Predecessor blocks that contained these invokes will now branch to + // the new block that contains the merged invoke, ... + for (InvokeInst *II : Invokes) + Updates.push_back( + {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()}); + + // ... which has the new `unreachable` block as normal destination, + // or unwinds to the (same for all `invoke`s in this set) `landingpad`, + for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke)) + Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(), + SuccBBOfMergedInvoke}); + + // Since predecessor blocks now unconditionally branch to a new block, + // they no longer branch to their original successors. + for (InvokeInst *II : Invokes) + for (BasicBlock *SuccOfPredBB : successors(II->getParent())) + Updates.push_back( + {DominatorTree::Delete, II->getParent(), SuccOfPredBB}); + } + + bool IsIndirectCall = Invokes[0]->isIndirectCall(); + + // Form the merged operands for the merged invoke. + for (Use &U : MergedInvoke->operands()) { + // Only PHI together the indirect callees and data operands. + if (MergedInvoke->isCallee(&U)) { + if (!IsIndirectCall) + continue; + } else if (!MergedInvoke->isDataOperand(&U)) + continue; + + // Don't create trivial PHI's with all-identical incoming values. + bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) { + return II->getOperand(U.getOperandNo()) != U.get(); + }); + if (!NeedPHI) + continue; + + // Form a PHI out of all the data ops under this index. + PHINode *PN = PHINode::Create( + U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke); + for (InvokeInst *II : Invokes) + PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent()); + + U.set(PN); + } + + // We've ensured that each PHI node has compatible (identical) incoming values + // when coming from each of the `invoke`s in the current merge set, + // so update the PHI nodes accordingly. + for (BasicBlock *Succ : successors(MergedInvoke)) + AddPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(), + /*ExistPred=*/Invokes.front()->getParent()); + + // And finally, replace the original `invoke`s with an unconditional branch + // to the block with the merged `invoke`. Also, give that merged `invoke` + // the merged debugloc of all the original `invoke`s. + const DILocation *MergedDebugLoc = nullptr; + for (InvokeInst *II : Invokes) { + // Compute the debug location common to all the original `invoke`s. + if (!MergedDebugLoc) + MergedDebugLoc = II->getDebugLoc(); + else + MergedDebugLoc = + DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc()); + + // And replace the old `invoke` with an unconditionally branch + // to the block with the merged `invoke`. + for (BasicBlock *OrigSuccBB : successors(II->getParent())) + OrigSuccBB->removePredecessor(II->getParent()); + BranchInst::Create(MergedInvoke->getParent(), II->getParent()); + II->replaceAllUsesWith(MergedInvoke); + II->eraseFromParent(); + ++NumInvokesMerged; + } + MergedInvoke->setDebugLoc(MergedDebugLoc); + ++NumInvokeSetsFormed; + + if (DTU) + DTU->applyUpdates(Updates); +} + +/// If this block is a `landingpad` exception handling block, categorize all +/// the predecessor `invoke`s into sets, with all `invoke`s in each set +/// being "mergeable" together, and then merge invokes in each set together. +/// +/// This is a weird mix of hoisting and sinking. Visually, it goes from: +/// [...] [...] +/// | | +/// [invoke0] [invoke1] +/// / \ / \ +/// [cont0] [landingpad] [cont1] +/// to: +/// [...] [...] +/// \ / +/// [invoke] +/// / \ +/// [cont] [landingpad] +/// +/// But of course we can only do that if the invokes share the `landingpad`, +/// edges invoke0->cont0 and invoke1->cont1 are "compatible", +/// and the invoked functions are "compatible". +static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) { + if (!EnableMergeCompatibleInvokes) + return false; + + bool Changed = false; + + // FIXME: generalize to all exception handling blocks? + if (!BB->isLandingPad()) + return Changed; + + CompatibleSets Grouper; + + // Record all the predecessors of this `landingpad`. As per verifier, + // the only allowed predecessor is the unwind edge of an `invoke`. + // We want to group "compatible" `invokes` into the same set to be merged. + for (BasicBlock *PredBB : predecessors(BB)) + Grouper.insert(cast(PredBB->getTerminator())); + + // And now, merge `invoke`s that were grouped togeter. + for (ArrayRef Invokes : Grouper.Sets) { + if (Invokes.size() < 2) + continue; + Changed = true; + MergeCompatibleInvokesImpl(Invokes, DTU); + } + + return Changed; +} + /// Determine if we can hoist sink a sole store instruction out of a /// conditional block. /// @@ -2326,15 +2679,15 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, passingValueIsAlwaysUndefined(ThenV, &PN)) return false; + if (canTrap(OrigV) || canTrap(ThenV)) + return false; + HaveRewritablePHIs = true; ConstantExpr *OrigCE = dyn_cast(OrigV); ConstantExpr *ThenCE = dyn_cast(ThenV); if (!OrigCE && !ThenCE) - continue; // Known safe and cheap. + continue; // Known cheap (FIXME: Maybe not true for aggregates). - if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || - (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) - return false; InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0; InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0; InstructionCost MaxCost = @@ -2626,40 +2979,85 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { return true; } -/// If we have a conditional branch on a PHI node value that is defined in the -/// same block as the branch and if any PHI entries are constants, thread edges -/// corresponding to that entry to be branches to their ultimate destination. -static Optional FoldCondBranchOnPHIImpl(BranchInst *BI, - DomTreeUpdater *DTU, - const DataLayout &DL, - AssumptionCache *AC) { +static ConstantInt * +getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To, + SmallDenseMap, + ConstantInt *> &Visited) { + // Don't look past the block defining the value, we might get the value from + // a previous loop iteration. + auto *I = dyn_cast(V); + if (I && I->getParent() == To) + return nullptr; + + // We know the value if the From block branches on it. + auto *BI = dyn_cast(From->getTerminator()); + if (BI && BI->isConditional() && BI->getCondition() == V && + BI->getSuccessor(0) != BI->getSuccessor(1)) + return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext()) + : ConstantInt::getFalse(BI->getContext()); + + // Limit the amount of blocks we inspect. + if (Visited.size() >= 8) + return nullptr; + + auto Pair = Visited.try_emplace({From, To}, nullptr); + if (!Pair.second) + return Pair.first->second; + + // Check whether the known value is the same for all predecessors. + ConstantInt *Common = nullptr; + for (BasicBlock *Pred : predecessors(From)) { + ConstantInt *C = getKnownValueOnEdge(V, Pred, From, Visited); + if (!C || (Common && Common != C)) + return nullptr; + Common = C; + } + return Visited[{From, To}] = Common; +} + +/// If we have a conditional branch on something for which we know the constant +/// value in predecessors (e.g. a phi node in the current block), thread edges +/// from the predecessor to their ultimate destination. +static Optional +FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, + const DataLayout &DL, + AssumptionCache *AC) { + SmallMapVector KnownValues; BasicBlock *BB = BI->getParent(); - PHINode *PN = dyn_cast(BI->getCondition()); - // NOTE: we currently cannot transform this case if the PHI node is used - // outside of the block. - if (!PN || PN->getParent() != BB || !PN->hasOneUse()) - return false; + Value *Cond = BI->getCondition(); + PHINode *PN = dyn_cast(Cond); + if (PN && PN->getParent() == BB) { + // Degenerate case of a single entry PHI. + if (PN->getNumIncomingValues() == 1) { + FoldSingleEntryPHINodes(PN->getParent()); + return true; + } - // Degenerate case of a single entry PHI. - if (PN->getNumIncomingValues() == 1) { - FoldSingleEntryPHINodes(PN->getParent()); - return true; + for (Use &U : PN->incoming_values()) + if (auto *CB = dyn_cast(U)) + KnownValues.insert({PN->getIncomingBlock(U), CB}); + } else { + SmallDenseMap, ConstantInt *> Visited; + for (BasicBlock *Pred : predecessors(BB)) { + if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB, Visited)) + KnownValues.insert({Pred, CB}); + } } + if (KnownValues.empty()) + return false; + // Now we know that this block has multiple preds and two succs. + // Check that the block is small enough and values defined in the block are + // not used outside of it. if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; - // Okay, this is a simple enough basic block. See if any phi values are - // constants. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - ConstantInt *CB = dyn_cast(PN->getIncomingValue(i)); - if (!CB || !CB->getType()->isIntegerTy(1)) - continue; - + for (const auto &Pair : KnownValues) { // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. - BasicBlock *PredBB = PN->getIncomingBlock(i); + ConstantInt *CB = Pair.second; + BasicBlock *PredBB = Pair.first; BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); if (RealDest == BB) @@ -2690,6 +3088,7 @@ static Optional FoldCondBranchOnPHIImpl(BranchInst *BI, // cloned instructions outside of EdgeBB. BasicBlock::iterator InsertPt = EdgeBB->begin(); DenseMap TranslateMap; // Track translated values. + TranslateMap[Cond] = Pair.second; for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { if (PHINode *PN = dyn_cast(BBI)) { TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB); @@ -2708,7 +3107,7 @@ static Optional FoldCondBranchOnPHIImpl(BranchInst *BI, } // Check for trivial simplification. - if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) { + if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) { if (!BBI->use_empty()) TranslateMap[&*BBI] = V; if (!N->mayHaveSideEffects()) { @@ -2746,6 +3145,12 @@ static Optional FoldCondBranchOnPHIImpl(BranchInst *BI, DTU->applyUpdates(Updates); } + // For simplicity, we created a separate basic block for the edge. Merge + // it back into the predecessor if possible. This not only avoids + // unnecessary SimplifyCFG iterations, but also makes sure that we don't + // bypass the check for trivial cycles above. + MergeBlockIntoPredecessor(EdgeBB, DTU); + // Signal repeat, simplifying any other constants. return None; } @@ -2753,13 +3158,15 @@ static Optional FoldCondBranchOnPHIImpl(BranchInst *BI, return false; } -static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, - const DataLayout &DL, AssumptionCache *AC) { +static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI, + DomTreeUpdater *DTU, + const DataLayout &DL, + AssumptionCache *AC) { Optional Result; bool EverChanged = false; do { // Note that None means "we changed things, but recurse further." - Result = FoldCondBranchOnPHIImpl(BI, DTU, DL, AC); + Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC); EverChanged |= Result == None || *Result; } while (Result == None); return EverChanged; @@ -2847,7 +3254,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, bool Changed = false; for (BasicBlock::iterator II = BB->begin(); isa(II);) { PHINode *PN = cast(II++); - if (Value *V = SimplifyInstruction(PN, {DL, PN})) { + if (Value *V = simplifyInstruction(PN, {DL, PN})) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); Changed = true; @@ -3186,18 +3593,18 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, Instruction *Cond = dyn_cast(BI->getCondition()); - if (!Cond || (!isa(Cond) && !isa(Cond)) || + if (!Cond || + (!isa(Cond) && !isa(Cond) && + !isa(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; // Cond is known to be a compare or binary operator. Check to make sure that // neither operand is a potentially-trapping constant expression. - if (ConstantExpr *CE = dyn_cast(Cond->getOperand(0))) - if (CE->canTrap()) - return false; - if (ConstantExpr *CE = dyn_cast(Cond->getOperand(1))) - if (CE->canTrap()) - return false; + if (canTrap(Cond->getOperand(0))) + return false; + if (canTrap(Cond->getOperand(1))) + return false; // Finally, don't infinitely unroll conditional loops. if (is_contained(successors(BB), BB)) @@ -3384,7 +3791,9 @@ static bool mergeConditionalStoreToAddress( return false; // Now check the stores are compatible. - if (!QStore->isUnordered() || !PStore->isUnordered()) + if (!QStore->isUnordered() || !PStore->isUnordered() || + PStore->getValueOperand()->getType() != + QStore->getValueOperand()->getType()) return false; // Check that sinking the store won't cause program behavior changes. Sinking @@ -3687,7 +4096,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, if (PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { // Okay, the outcome of this conditional branch is statically - // knowable. If this block had a single pred, handle specially. + // knowable. If this block had a single pred, handle specially, otherwise + // FoldCondBranchOnValueKnownInPredecessor() will handle it. if (BB->getSinglePredecessor()) { // Turn this into a branch on constant. bool CondIsTrue = PBI->getSuccessor(0) == BB; @@ -3695,35 +4105,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue)); return true; // Nuke the branch on constant. } - - // Otherwise, if there are multiple predecessors, insert a PHI that merges - // in the constant and simplify the block result. Subsequent passes of - // simplifycfg will thread the block. - if (BlockIsSimpleEnoughToThreadThrough(BB)) { - pred_iterator PB = pred_begin(BB), PE = pred_end(BB); - PHINode *NewPN = PHINode::Create( - Type::getInt1Ty(BB->getContext()), std::distance(PB, PE), - BI->getCondition()->getName() + ".pr", &BB->front()); - // Okay, we're going to insert the PHI node. Since PBI is not the only - // predecessor, compute the PHI'd conditional value for all of the preds. - // Any predecessor where the condition is not computable we keep symbolic. - for (pred_iterator PI = PB; PI != PE; ++PI) { - BasicBlock *P = *PI; - if ((PBI = dyn_cast(P->getTerminator())) && PBI != BI && - PBI->isConditional() && PBI->getCondition() == BI->getCondition() && - PBI->getSuccessor(0) != PBI->getSuccessor(1)) { - bool CondIsTrue = PBI->getSuccessor(0) == BB; - NewPN->addIncoming( - ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue), - P); - } else { - NewPN->addIncoming(BI->getCondition(), P); - } - } - - BI->setCondition(NewPN); - return true; - } } // If the previous block ended with a widenable branch, determine if reusing @@ -3732,9 +4113,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, if (tryWidenCondBranchToCondBranch(PBI, BI, DTU)) return true; - if (auto *CE = dyn_cast(BI->getCondition())) - if (CE->canTrap()) - return false; + if (canTrap(BI->getCondition())) + return false; // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional @@ -3791,15 +4171,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, PHINode *PN = cast(II); Value *BIV = PN->getIncomingValueForBlock(BB); - if (ConstantExpr *CE = dyn_cast(BIV)) - if (CE->canTrap()) - return false; + if (canTrap(BIV)) + return false; unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); Value *PBIV = PN->getIncomingValue(PBBIdx); - if (ConstantExpr *CE = dyn_cast(PBIV)) - if (CE->canTrap()) - return false; + if (canTrap(PBIV)) + return false; } // Finally, if everything is ok, fold the branches to logical ops. @@ -4116,7 +4494,7 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( assert(VVal && "Should have a unique destination value"); ICI->setOperand(0, VVal); - if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) { + if (Value *V = simplifyInstruction(ICI, {DL, ICI})) { ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); } @@ -4812,8 +5190,9 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch, } } -/// Turn a switch with two reachable destinations into an integer range -/// comparison and branch. +/// Turn a switch into an integer range comparison and branch. +/// Switches with more than 2 destinations are ignored. +/// Switches with 1 destination are also ignored. bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); @@ -4845,6 +5224,8 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, } return false; // More than two destinations. } + if (!DestB) + return false; // All destinations are the same and the default is unreachable assert(DestA && DestB && "Single-destination switch should have been folded."); @@ -5169,11 +5550,6 @@ ConstantFold(Instruction *I, const DataLayout &DL, return nullptr; } - if (CmpInst *Cmp = dyn_cast(I)) { - return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0], - COps[1], DL); - } - return ConstantFoldInstOperands(I, COps, DL); } @@ -5182,7 +5558,7 @@ ConstantFold(Instruction *I, const DataLayout &DL, /// destionations CaseDest corresponding to value CaseVal (0 for the default /// case), of a switch instruction SI. static bool -GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, +getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl> &Res, const DataLayout &DL, const TargetTransformInfo &TTI) { @@ -5253,9 +5629,9 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, // Helper function used to add CaseVal to the list of cases that generate // Result. Returns the updated number of cases that generate this result. -static uintptr_t MapCaseToResult(ConstantInt *CaseVal, - SwitchCaseResultVectorTy &UniqueResults, - Constant *Result) { +static size_t mapCaseToResult(ConstantInt *CaseVal, + SwitchCaseResultVectorTy &UniqueResults, + Constant *Result) { for (auto &I : UniqueResults) { if (I.first == Result) { I.second.push_back(CaseVal); @@ -5271,18 +5647,19 @@ static uintptr_t MapCaseToResult(ConstantInt *CaseVal, // results for the PHI node of the common destination block for a switch // instruction. Returns false if multiple PHI nodes have been found or if // there is not a common destination block for the switch. -static bool -InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, - SwitchCaseResultVectorTy &UniqueResults, - Constant *&DefaultResult, const DataLayout &DL, - const TargetTransformInfo &TTI, - uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) { +static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, + BasicBlock *&CommonDest, + SwitchCaseResultVectorTy &UniqueResults, + Constant *&DefaultResult, + const DataLayout &DL, + const TargetTransformInfo &TTI, + uintptr_t MaxUniqueResults) { for (auto &I : SI->cases()) { ConstantInt *CaseVal = I.getCaseValue(); // Resulting value at phi nodes for this case value. SwitchCaseResultsTy Results; - if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results, + if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results, DL, TTI)) return false; @@ -5291,11 +5668,11 @@ InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, return false; // Add the case->result mapping to UniqueResults. - const uintptr_t NumCasesForResult = - MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second); + const size_t NumCasesForResult = + mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second); // Early out if there are too many cases for this result. - if (NumCasesForResult > MaxCasesPerResult) + if (NumCasesForResult > MaxSwitchCasesPerResult) return false; // Early out if there are too many unique results. @@ -5311,7 +5688,7 @@ InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, // Find the default result value. SmallVector, 1> DefaultResults; BasicBlock *DefaultDest = SI->getDefaultDest(); - GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults, + getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults, DL, TTI); // If the default value is not found abort unless the default destination // is unreachable. @@ -5326,48 +5703,76 @@ InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, // Helper function that checks if it is possible to transform a switch with only // two cases (or two cases + default) that produces a result into a select. -// Example: -// switch (a) { -// case 10: %0 = icmp eq i32 %a, 10 -// return 10; %1 = select i1 %0, i32 10, i32 4 -// case 20: ----> %2 = icmp eq i32 %a, 20 -// return 2; %3 = select i1 %2, i32 2, i32 %1 -// default: -// return 4; -// } -static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, - Constant *DefaultResult, Value *Condition, - IRBuilder<> &Builder) { +// TODO: Handle switches with more than 2 cases that map to the same result. +static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, + Constant *DefaultResult, Value *Condition, + IRBuilder<> &Builder) { // If we are selecting between only two cases transform into a simple // select or a two-way select if default is possible. + // Example: + // switch (a) { %0 = icmp eq i32 %a, 10 + // case 10: return 42; %1 = select i1 %0, i32 42, i32 4 + // case 20: return 2; ----> %2 = icmp eq i32 %a, 20 + // default: return 4; %3 = select i1 %2, i32 2, i32 %1 + // } if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 && ResultVector[1].second.size() == 1) { - ConstantInt *const FirstCase = ResultVector[0].second[0]; - ConstantInt *const SecondCase = ResultVector[1].second[0]; - - bool DefaultCanTrigger = DefaultResult; + ConstantInt *FirstCase = ResultVector[0].second[0]; + ConstantInt *SecondCase = ResultVector[1].second[0]; Value *SelectValue = ResultVector[1].first; - if (DefaultCanTrigger) { - Value *const ValueCompare = + if (DefaultResult) { + Value *ValueCompare = Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp"); SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first, DefaultResult, "switch.select"); } - Value *const ValueCompare = + Value *ValueCompare = Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp"); return Builder.CreateSelect(ValueCompare, ResultVector[0].first, SelectValue, "switch.select"); } - // Handle the degenerate case where two cases have the same value. - if (ResultVector.size() == 1 && ResultVector[0].second.size() == 2 && - DefaultResult) { - Value *Cmp1 = Builder.CreateICmpEQ( - Condition, ResultVector[0].second[0], "switch.selectcmp.case1"); - Value *Cmp2 = Builder.CreateICmpEQ( - Condition, ResultVector[0].second[1], "switch.selectcmp.case2"); - Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp"); - return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); + // Handle the degenerate case where two cases have the same result value. + if (ResultVector.size() == 1 && DefaultResult) { + ArrayRef CaseValues = ResultVector[0].second; + unsigned CaseCount = CaseValues.size(); + // n bits group cases map to the same result: + // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default + // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default + // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default + if (isPowerOf2_32(CaseCount)) { + ConstantInt *MinCaseVal = CaseValues[0]; + // Find mininal value. + for (auto Case : CaseValues) + if (Case->getValue().slt(MinCaseVal->getValue())) + MinCaseVal = Case; + + // Mark the bits case number touched. + APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth()); + for (auto Case : CaseValues) + BitMask |= (Case->getValue() - MinCaseVal->getValue()); + + // Check if cases with the same result can cover all number + // in touched bits. + if (BitMask.countPopulation() == Log2_32(CaseCount)) { + if (!MinCaseVal->isNullValue()) + Condition = Builder.CreateSub(Condition, MinCaseVal); + Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and"); + Value *Cmp = Builder.CreateICmpEQ( + And, Constant::getNullValue(And->getType()), "switch.selectcmp"); + return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); + } + } + + // Handle the degenerate case where two cases have the same value. + if (CaseValues.size() == 2) { + Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0], + "switch.selectcmp.case1"); + Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1], + "switch.selectcmp.case2"); + Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp"); + return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); + } } return nullptr; @@ -5375,10 +5780,10 @@ static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, // Helper function to cleanup a switch instruction that has been converted into // a select, fixing up PHI nodes and basic blocks. -static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, - Value *SelectValue, - IRBuilder<> &Builder, - DomTreeUpdater *DTU) { +static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, + Value *SelectValue, + IRBuilder<> &Builder, + DomTreeUpdater *DTU) { std::vector Updates; BasicBlock *SelectBB = SI->getParent(); @@ -5409,33 +5814,31 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, DTU->applyUpdates(Updates); } -/// If the switch is only used to initialize one or more -/// phi nodes in a common successor block with only two different -/// constant values, replace the switch with select. -static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, - DomTreeUpdater *DTU, const DataLayout &DL, - const TargetTransformInfo &TTI) { +/// If a switch is only used to initialize one or more phi nodes in a common +/// successor block with only two different constant values, try to replace the +/// switch with a select. Returns true if the fold was made. +static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, + DomTreeUpdater *DTU, const DataLayout &DL, + const TargetTransformInfo &TTI) { Value *const Cond = SI->getCondition(); PHINode *PHI = nullptr; BasicBlock *CommonDest = nullptr; Constant *DefaultResult; SwitchCaseResultVectorTy UniqueResults; // Collect all the cases that will deliver the same value from the switch. - if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, - DL, TTI, /*MaxUniqueResults*/2, - /*MaxCasesPerResult*/2)) + if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, + DL, TTI, /*MaxUniqueResults*/ 2)) return false; - assert(PHI != nullptr && "PHI for value select not found"); + assert(PHI != nullptr && "PHI for value select not found"); Builder.SetInsertPoint(SI); Value *SelectValue = - ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder); - if (SelectValue) { - RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder, DTU); - return true; - } - // The switch couldn't be converted into a select. - return false; + foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder); + if (!SelectValue) + return false; + + removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU); + return true; } namespace { @@ -5655,7 +6058,7 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { IntegerType *IT = cast(Index->getType()); uint64_t TableSize = Array->getInitializer()->getType()->getArrayNumElements(); - if (TableSize > (1ULL << (IT->getBitWidth() - 1))) + if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u))) Index = Builder.CreateZExt( Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1), "switch.tableidx.zext"); @@ -5707,6 +6110,27 @@ static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, DL.fitsInLegalInteger(IT->getBitWidth()); } +static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) { + // 40% is the default density for building a jump table in optsize/minsize + // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this + // function was based on. + const uint64_t MinDensity = 40; + + if (CaseRange >= UINT64_MAX / 100) + return false; // Avoid multiplication overflows below. + + return NumCases * 100 >= CaseRange * MinDensity; +} + +static bool isSwitchDense(ArrayRef Values) { + uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); + uint64_t Range = Diff + 1; + if (Range < Diff) + return false; // Overflow. + + return isSwitchDense(Values.size(), Range); +} + /// Determine whether a lookup table should be built for this switch, based on /// the number of cases, size of the table, and the types of the results. // TODO: We could support larger than legal types by limiting based on the @@ -5716,8 +6140,8 @@ static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap &ResultTypes) { - if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) - return false; // TableSize overflowed, or mul below might overflow. + if (SI->getNumCases() > TableSize) + return false; // TableSize overflowed. bool AllTablesFitInRegister = true; bool HasIllegalType = false; @@ -5747,10 +6171,7 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, if (HasIllegalType) return false; - // The table density should be at least 40%. This is the same criterion as for - // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. - // FIXME: Find the best cut-off. - return SI->getNumCases() * 10 >= TableSize * 4; + return isSwitchDense(SI->getNumCases(), TableSize); } /// Try to reuse the switch table index compare. Following pattern: @@ -5888,7 +6309,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Resulting value at phi nodes for this case value. using ResultsTy = SmallVector, 4>; ResultsTy Results; - if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest, + if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest, Results, DL, TTI)) return false; @@ -5916,7 +6337,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // or a bitmask that fits in a register. SmallVector, 4> DefaultResultsList; bool HasDefaultResults = - GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, + getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResultsList, DL, TTI); bool NeedMask = (TableHasHoles && !HasDefaultResults); @@ -6086,17 +6507,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, return true; } -static bool isSwitchDense(ArrayRef Values) { - // See also SelectionDAGBuilder::isDense(), which this function was based on. - uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); - uint64_t Range = Diff + 1; - uint64_t NumCases = Values.size(); - // 40% is the default density for building a jump table in optsize/minsize mode. - uint64_t MinDensity = 40; - - return NumCases * 100 >= Range * MinDensity; -} - /// Try to transform a switch that has "holes" in it to a contiguous sequence /// of cases. /// @@ -6211,14 +6621,16 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { } // Try to transform the switch into an icmp and a branch. - if (TurnSwitchRangeIntoICmp(SI, Builder)) + // The conversion from switch to comparison may lose information on + // impossible switch values, so disable it early in the pipeline. + if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder)) return requestResimplify(); // Remove unreachable cases. if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL)) return requestResimplify(); - if (switchToSelect(SI, Builder, DTU, DL, TTI)) + if (trySwitchToSelect(SI, Builder, DTU, DL, TTI)) return requestResimplify(); if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) @@ -6521,12 +6933,11 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { return requestResimplify(); } - // If this is a branch on a phi node in the current block, thread control - // through this block if any PHI node entries are constants. - if (PHINode *PN = dyn_cast(BI->getCondition())) - if (PN->getParent() == BI->getParent()) - if (FoldCondBranchOnPHI(BI, DTU, DL, Options.AC)) - return requestResimplify(); + // If this is a branch on something for which we know the constant value in + // predecessors (e.g. a phi node in the current block), thread control + // through this block. + if (FoldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, Options.AC)) + return requestResimplify(); // Scan predecessor blocks for conditional branches. for (BasicBlock *Pred : predecessors(BB)) @@ -6725,7 +7136,8 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { return true; if (SinkCommon && Options.SinkCommonInsts) - if (SinkCommonCodeFromPredecessors(BB, DTU)) { + if (SinkCommonCodeFromPredecessors(BB, DTU) || + MergeCompatibleInvokes(BB, DTU)) { // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's, // so we may now how duplicate PHI's. // Let's rerun EliminateDuplicatePHINodes() first, diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 5b7fd4349c6c..dbef1ff2e739 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -13,11 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SimplifyIndVar.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -58,7 +56,7 @@ namespace { SCEVExpander &Rewriter; SmallVectorImpl &DeadInsts; - bool Changed; + bool Changed = false; public: SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, @@ -66,7 +64,7 @@ namespace { SCEVExpander &Rewriter, SmallVectorImpl &Dead) : L(Loop), LI(LI), SE(SE), DT(DT), TTI(TTI), Rewriter(Rewriter), - DeadInsts(Dead), Changed(false) { + DeadInsts(Dead) { assert(LI && "IV simplification requires LoopInfo"); } @@ -161,11 +159,12 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) D = ConstantInt::get(UseInst->getContext(), APInt::getOneBitSet(BitWidth, D->getZExtValue())); } - FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D)); + const auto *LHS = SE->getSCEV(IVSrc); + const auto *RHS = SE->getSCEV(D); + FoldedExpr = SE->getUDivExpr(LHS, RHS); // We might have 'exact' flag set at this point which will no longer be // correct after we make the replacement. - if (UseInst->isExact() && - SE->getSCEV(IVSrc) != SE->getMulExpr(FoldedExpr, SE->getSCEV(D))) + if (UseInst->isExact() && LHS != SE->getMulExpr(FoldedExpr, RHS)) MustDropExactFlag = true; } // We have something that might fold it's operand. Compare SCEVs. @@ -872,6 +871,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { Instruction *IVOperand = UseOper.second; for (unsigned N = 0; IVOperand; ++N) { assert(N <= Simplified.size() && "runaway iteration"); + (void) N; Value *NewOper = foldIVUser(UseInst, IVOperand); if (!NewOper) @@ -1757,10 +1757,6 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri truncateIVUse(DU, DT, LI); return nullptr; } - // Assume block terminators cannot evaluate to a recurrence. We can't to - // insert a Trunc after a terminator if there happens to be a critical edge. - assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && - "SCEV is not expected to evaluate a block terminator"); // Reuse the IV increment that SCEVExpander created as long as it dominates // NarrowUse. diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index e02d02a05752..f4306bb43dfd 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -14,28 +14,23 @@ #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/Loads.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SizeOpts.h" using namespace llvm; @@ -206,6 +201,11 @@ static Value *copyFlags(const CallInst &Old, Value *New) { return New; } +// Helper to avoid truncating the length if size_t is 32-bits. +static StringRef substr(StringRef Str, uint64_t Len) { + return Len >= Str.size() ? Str : Str.substr(0, Len); +} + //===----------------------------------------------------------------------===// // String and Memory Library Call Optimizations //===----------------------------------------------------------------------===// @@ -242,7 +242,7 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of // the string .. we're concatenating). - Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr"); + Value *CpyDst = B.CreateInBoundsGEP(B.getInt8Ty(), Dst, DstLen, "endptr"); // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. @@ -326,7 +326,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) { if (!getConstantStringInfo(SrcStr, Str)) { if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p) if (Value *StrLen = emitStrLen(SrcStr, B, DL, TLI)) - return B.CreateGEP(B.getInt8Ty(), SrcStr, StrLen, "strchr"); + return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, StrLen, "strchr"); return nullptr; } @@ -339,35 +339,29 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) { return Constant::getNullValue(CI->getType()); // strchr(s+n,c) -> gep(s+n+i,c) - return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr"); + return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr"); } Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilderBase &B) { Value *SrcStr = CI->getArgOperand(0); - ConstantInt *CharC = dyn_cast(CI->getArgOperand(1)); + Value *CharVal = CI->getArgOperand(1); + ConstantInt *CharC = dyn_cast(CharVal); annotateNonNullNoUndefBasedOnAccess(CI, 0); - // Cannot fold anything if we're not looking for a constant. - if (!CharC) - return nullptr; - StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { // strrchr(s, 0) -> strchr(s, 0) - if (CharC->isZero()) + if (CharC && CharC->isZero()) return copyFlags(*CI, emitStrChr(SrcStr, '\0', B, TLI)); return nullptr; } - // Compute the offset. - size_t I = (0xFF & CharC->getSExtValue()) == 0 - ? Str.size() - : Str.rfind(CharC->getSExtValue()); - if (I == StringRef::npos) // Didn't find the char. Return null. - return Constant::getNullValue(CI->getType()); - - // strrchr(s+n,c) -> gep(s+n+i,c) - return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr"); + // Try to expand strrchr to the memrchr nonstandard extension if it's + // available, or simply fail otherwise. + uint64_t NBytes = Str.size() + 1; // Include the terminating nul. + Type *IntPtrType = DL.getIntPtrType(CI->getContext()); + Value *Size = ConstantInt::get(IntPtrType, NBytes); + return copyFlags(*CI, emitMemRChr(SrcStr, CharVal, Size, B, DL, TLI)); } Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) { @@ -428,6 +422,12 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) { return nullptr; } +// Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant +// arrays LHS and RHS and nonconstant Size. +static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS, + Value *Size, bool StrNCmp, + IRBuilderBase &B, const DataLayout &DL); + Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) { Value *Str1P = CI->getArgOperand(0); Value *Str2P = CI->getArgOperand(1); @@ -442,7 +442,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) { if (ConstantInt *LengthArg = dyn_cast(Size)) Length = LengthArg->getZExtValue(); else - return nullptr; + return optimizeMemCmpVarSize(CI, Str1P, Str2P, Size, true, B, DL); if (Length == 0) // strncmp(x,y,0) -> 0 return ConstantInt::get(CI->getType(), 0); @@ -456,8 +456,9 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) { // strncmp(x, y) -> cnst (if both x and y are constant strings) if (HasStr1 && HasStr2) { - StringRef SubStr1 = Str1.substr(0, Length); - StringRef SubStr2 = Str2.substr(0, Length); + // Avoid truncating the 64-bit Length to 32 bits in ILP32. + StringRef SubStr1 = substr(Str1, Length); + StringRef SubStr2 = substr(Str2, Length); return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2)); } @@ -557,8 +558,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) { Type *PT = Callee->getFunctionType()->getParamType(0); Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len); - Value *DstEnd = B.CreateGEP(B.getInt8Ty(), Dst, - ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); + Value *DstEnd = B.CreateInBoundsGEP( + B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. @@ -634,12 +635,51 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) { } Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B, - unsigned CharSize) { + unsigned CharSize, + Value *Bound) { Value *Src = CI->getArgOperand(0); + Type *CharTy = B.getIntNTy(CharSize); + + if (isOnlyUsedInZeroEqualityComparison(CI) && + (!Bound || isKnownNonZero(Bound, DL))) { + // Fold strlen: + // strlen(x) != 0 --> *x != 0 + // strlen(x) == 0 --> *x == 0 + // and likewise strnlen with constant N > 0: + // strnlen(x, N) != 0 --> *x != 0 + // strnlen(x, N) == 0 --> *x == 0 + return B.CreateZExt(B.CreateLoad(CharTy, Src, "char0"), + CI->getType()); + } + + if (Bound) { + if (ConstantInt *BoundCst = dyn_cast(Bound)) { + if (BoundCst->isZero()) + // Fold strnlen(s, 0) -> 0 for any s, constant or otherwise. + return ConstantInt::get(CI->getType(), 0); + + if (BoundCst->isOne()) { + // Fold strnlen(s, 1) -> *s ? 1 : 0 for any s. + Value *CharVal = B.CreateLoad(CharTy, Src, "strnlen.char0"); + Value *ZeroChar = ConstantInt::get(CharTy, 0); + Value *Cmp = B.CreateICmpNE(CharVal, ZeroChar, "strnlen.char0cmp"); + return B.CreateZExt(Cmp, CI->getType()); + } + } + } + + if (uint64_t Len = GetStringLength(Src, CharSize)) { + Value *LenC = ConstantInt::get(CI->getType(), Len - 1); + // Fold strlen("xyz") -> 3 and strnlen("xyz", 2) -> 2 + // and strnlen("xyz", Bound) -> min(3, Bound) for nonconstant Bound. + if (Bound) + return B.CreateBinaryIntrinsic(Intrinsic::umin, LenC, Bound); + return LenC; + } - // Constant folding: strlen("xyz") -> 3 - if (uint64_t Len = GetStringLength(Src, CharSize)) - return ConstantInt::get(CI->getType(), Len - 1); + if (Bound) + // Punt for strnlen for now. + return nullptr; // If s is a constant pointer pointing to a string literal, we can fold // strlen(s + x) to strlen(s) - x, when x is known to be in the range @@ -650,6 +690,7 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B, // very useful because calling strlen for a pointer of other types is // very uncommon. if (GEPOperator *GEP = dyn_cast(Src)) { + // TODO: Handle subobjects. if (!isGEPBasedOnPointerToString(GEP, CharSize)) return nullptr; @@ -674,22 +715,15 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B, Value *Offset = GEP->getOperand(2); KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr); - Known.Zero.flipAllBits(); uint64_t ArrSize = cast(GEP->getSourceElementType())->getNumElements(); - // KnownZero's bits are flipped, so zeros in KnownZero now represent - // bits known to be zeros in Offset, and ones in KnowZero represent - // bits unknown in Offset. Therefore, Offset is known to be in range - // [0, NullTermIdx] when the flipped KnownZero is non-negative and - // unsigned-less-than NullTermIdx. - // // If Offset is not provably in the range [0, NullTermIdx], we can still // optimize if we can prove that the program has undefined behavior when // Offset is outside that range. That is the case when GEP->getOperand(0) // is a pointer to an object whose memory extent is NullTermIdx+1. - if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) || - (GEP->isInBounds() && isa(GEP->getOperand(0)) && + if ((Known.isNonNegative() && Known.getMaxValue().ule(NullTermIdx)) || + (isa(GEP->getOperand(0)) && NullTermIdx == ArrSize - 1)) { Offset = B.CreateSExtOrTrunc(Offset, CI->getType()); return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), @@ -713,12 +747,6 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B, } } - // strlen(x) != 0 --> *x != 0 - // strlen(x) == 0 --> *x == 0 - if (isOnlyUsedInZeroEqualityComparison(CI)) - return B.CreateZExt(B.CreateLoad(B.getIntNTy(CharSize), Src, "strlenfirst"), - CI->getType()); - return nullptr; } @@ -729,6 +757,16 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilderBase &B) { return nullptr; } +Value *LibCallSimplifier::optimizeStrNLen(CallInst *CI, IRBuilderBase &B) { + Value *Bound = CI->getArgOperand(1); + if (Value *V = optimizeStringLength(CI, B, 8, Bound)) + return V; + + if (isKnownNonZero(Bound, DL)) + annotateNonNullNoUndefBasedOnAccess(CI, 0); + return nullptr; +} + Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilderBase &B) { Module &M = *CI->getModule(); unsigned WCharSize = TLI->getWCharSize(M) * 8; @@ -755,8 +793,8 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilderBase &B) { if (I == StringRef::npos) // No match. return Constant::getNullValue(CI->getType()); - return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), - "strpbrk"); + return B.CreateInBoundsGEP(B.getInt8Ty(), CI->getArgOperand(0), + B.getInt64(I), "strpbrk"); } // strpbrk(s, "a") -> strchr(s, 'a') @@ -880,35 +918,190 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) { } Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) { - if (isKnownNonZero(CI->getOperand(2), DL)) - annotateNonNullNoUndefBasedOnAccess(CI, 0); - return nullptr; + Value *SrcStr = CI->getArgOperand(0); + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, 0, Size, DL); + Value *CharVal = CI->getArgOperand(1); + ConstantInt *LenC = dyn_cast(Size); + Value *NullPtr = Constant::getNullValue(CI->getType()); + + if (LenC) { + if (LenC->isZero()) + // Fold memrchr(x, y, 0) --> null. + return NullPtr; + + if (LenC->isOne()) { + // Fold memrchr(x, y, 1) --> *x == y ? x : null for any x and y, + // constant or otherwise. + Value *Val = B.CreateLoad(B.getInt8Ty(), SrcStr, "memrchr.char0"); + // Slice off the character's high end bits. + CharVal = B.CreateTrunc(CharVal, B.getInt8Ty()); + Value *Cmp = B.CreateICmpEQ(Val, CharVal, "memrchr.char0cmp"); + return B.CreateSelect(Cmp, SrcStr, NullPtr, "memrchr.sel"); + } + } + + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) + return nullptr; + + if (Str.size() == 0) + // If the array is empty fold memrchr(A, C, N) to null for any value + // of C and N on the basis that the only valid value of N is zero + // (otherwise the call is undefined). + return NullPtr; + + uint64_t EndOff = UINT64_MAX; + if (LenC) { + EndOff = LenC->getZExtValue(); + if (Str.size() < EndOff) + // Punt out-of-bounds accesses to sanitizers and/or libc. + return nullptr; + } + + if (ConstantInt *CharC = dyn_cast(CharVal)) { + // Fold memrchr(S, C, N) for a constant C. + size_t Pos = Str.rfind(CharC->getZExtValue(), EndOff); + if (Pos == StringRef::npos) + // When the character is not in the source array fold the result + // to null regardless of Size. + return NullPtr; + + if (LenC) + // Fold memrchr(s, c, N) --> s + Pos for constant N > Pos. + return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(Pos)); + + if (Str.find(Str[Pos]) == Pos) { + // When there is just a single occurrence of C in S, i.e., the one + // in Str[Pos], fold + // memrchr(s, c, N) --> N <= Pos ? null : s + Pos + // for nonconstant N. + Value *Cmp = B.CreateICmpULE(Size, ConstantInt::get(Size->getType(), Pos), + "memrchr.cmp"); + Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, + B.getInt64(Pos), "memrchr.ptr_plus"); + return B.CreateSelect(Cmp, NullPtr, SrcPlus, "memrchr.sel"); + } + } + + // Truncate the string to search at most EndOff characters. + Str = Str.substr(0, EndOff); + if (Str.find_first_not_of(Str[0]) != StringRef::npos) + return nullptr; + + // If the source array consists of all equal characters, then for any + // C and N (whether in bounds or not), fold memrchr(S, C, N) to + // N != 0 && *S == C ? S + N - 1 : null + Type *SizeTy = Size->getType(); + Type *Int8Ty = B.getInt8Ty(); + Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0)); + // Slice off the sought character's high end bits. + CharVal = B.CreateTrunc(CharVal, Int8Ty); + Value *CEqS0 = B.CreateICmpEQ(ConstantInt::get(Int8Ty, Str[0]), CharVal); + Value *And = B.CreateLogicalAnd(NNeZ, CEqS0); + Value *SizeM1 = B.CreateSub(Size, ConstantInt::get(SizeTy, 1)); + Value *SrcPlus = + B.CreateInBoundsGEP(Int8Ty, SrcStr, SizeM1, "memrchr.ptr_plus"); + return B.CreateSelect(And, SrcPlus, NullPtr, "memrchr.sel"); } Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { Value *SrcStr = CI->getArgOperand(0); Value *Size = CI->getArgOperand(2); - annotateNonNullAndDereferenceable(CI, 0, Size, DL); - ConstantInt *CharC = dyn_cast(CI->getArgOperand(1)); + if (isKnownNonZero(Size, DL)) + annotateNonNullNoUndefBasedOnAccess(CI, 0); + + Value *CharVal = CI->getArgOperand(1); + ConstantInt *CharC = dyn_cast(CharVal); ConstantInt *LenC = dyn_cast(Size); + Value *NullPtr = Constant::getNullValue(CI->getType()); // memchr(x, y, 0) -> null if (LenC) { if (LenC->isZero()) - return Constant::getNullValue(CI->getType()); - } else { - // From now on we need at least constant length and string. - return nullptr; + return NullPtr; + + if (LenC->isOne()) { + // Fold memchr(x, y, 1) --> *x == y ? x : null for any x and y, + // constant or otherwise. + Value *Val = B.CreateLoad(B.getInt8Ty(), SrcStr, "memchr.char0"); + // Slice off the character's high end bits. + CharVal = B.CreateTrunc(CharVal, B.getInt8Ty()); + Value *Cmp = B.CreateICmpEQ(Val, CharVal, "memchr.char0cmp"); + return B.CreateSelect(Cmp, SrcStr, NullPtr, "memchr.sel"); + } } StringRef Str; if (!getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) return nullptr; - // Truncate the string to LenC. If Str is smaller than LenC we will still only - // scan the string, as reading past the end of it is undefined and we can just - // return null if we don't find the char. - Str = Str.substr(0, LenC->getZExtValue()); + if (CharC) { + size_t Pos = Str.find(CharC->getZExtValue()); + if (Pos == StringRef::npos) + // When the character is not in the source array fold the result + // to null regardless of Size. + return NullPtr; + + // Fold memchr(s, c, n) -> n <= Pos ? null : s + Pos + // When the constant Size is less than or equal to the character + // position also fold the result to null. + Value *Cmp = B.CreateICmpULE(Size, ConstantInt::get(Size->getType(), Pos), + "memchr.cmp"); + Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(Pos), + "memchr.ptr"); + return B.CreateSelect(Cmp, NullPtr, SrcPlus); + } + + if (Str.size() == 0) + // If the array is empty fold memchr(A, C, N) to null for any value + // of C and N on the basis that the only valid value of N is zero + // (otherwise the call is undefined). + return NullPtr; + + if (LenC) + Str = substr(Str, LenC->getZExtValue()); + + size_t Pos = Str.find_first_not_of(Str[0]); + if (Pos == StringRef::npos + || Str.find_first_not_of(Str[Pos], Pos) == StringRef::npos) { + // If the source array consists of at most two consecutive sequences + // of the same characters, then for any C and N (whether in bounds or + // not), fold memchr(S, C, N) to + // N != 0 && *S == C ? S : null + // or for the two sequences to: + // N != 0 && *S == C ? S : (N > Pos && S[Pos] == C ? S + Pos : null) + // ^Sel2 ^Sel1 are denoted above. + // The latter makes it also possible to fold strchr() calls with strings + // of the same characters. + Type *SizeTy = Size->getType(); + Type *Int8Ty = B.getInt8Ty(); + + // Slice off the sought character's high end bits. + CharVal = B.CreateTrunc(CharVal, Int8Ty); + + Value *Sel1 = NullPtr; + if (Pos != StringRef::npos) { + // Handle two consecutive sequences of the same characters. + Value *PosVal = ConstantInt::get(SizeTy, Pos); + Value *StrPos = ConstantInt::get(Int8Ty, Str[Pos]); + Value *CEqSPos = B.CreateICmpEQ(CharVal, StrPos); + Value *NGtPos = B.CreateICmp(ICmpInst::ICMP_UGT, Size, PosVal); + Value *And = B.CreateAnd(CEqSPos, NGtPos); + Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, PosVal); + Sel1 = B.CreateSelect(And, SrcPlus, NullPtr, "memchr.sel1"); + } + + Value *Str0 = ConstantInt::get(Int8Ty, Str[0]); + Value *CEqS0 = B.CreateICmpEQ(Str0, CharVal); + Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0)); + Value *And = B.CreateAnd(NNeZ, CEqS0); + return B.CreateSelect(And, SrcStr, Sel1, "memchr.sel2"); + } + + if (!LenC) + // From now on we need a constant length and constant array. + return nullptr; // If the char is variable but the input str and length are not we can turn // this memchr call into a simple bit field test. Of course this only works @@ -920,60 +1113,93 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n'))) // != 0 // after bounds check. - if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) { - unsigned char Max = - *std::max_element(reinterpret_cast(Str.begin()), - reinterpret_cast(Str.end())); - - // Make sure the bit field we're about to create fits in a register on the - // target. - // FIXME: On a 64 bit architecture this prevents us from using the - // interesting range of alpha ascii chars. We could do better by emitting - // two bitfields or shifting the range by 64 if no lower chars are used. - if (!DL.fitsInLegalInteger(Max + 1)) - return nullptr; + if (Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI)) + return nullptr; + + unsigned char Max = + *std::max_element(reinterpret_cast(Str.begin()), + reinterpret_cast(Str.end())); - // For the bit field use a power-of-2 type with at least 8 bits to avoid - // creating unnecessary illegal types. - unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max)); + // Make sure the bit field we're about to create fits in a register on the + // target. + // FIXME: On a 64 bit architecture this prevents us from using the + // interesting range of alpha ascii chars. We could do better by emitting + // two bitfields or shifting the range by 64 if no lower chars are used. + if (!DL.fitsInLegalInteger(Max + 1)) + return nullptr; - // Now build the bit field. - APInt Bitfield(Width, 0); - for (char C : Str) - Bitfield.setBit((unsigned char)C); - Value *BitfieldC = B.getInt(Bitfield); + // For the bit field use a power-of-2 type with at least 8 bits to avoid + // creating unnecessary illegal types. + unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max)); - // Adjust width of "C" to the bitfield width, then mask off the high bits. - Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType()); - C = B.CreateAnd(C, B.getIntN(Width, 0xFF)); + // Now build the bit field. + APInt Bitfield(Width, 0); + for (char C : Str) + Bitfield.setBit((unsigned char)C); + Value *BitfieldC = B.getInt(Bitfield); - // First check that the bit field access is within bounds. - Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width), - "memchr.bounds"); + // Adjust width of "C" to the bitfield width, then mask off the high bits. + Value *C = B.CreateZExtOrTrunc(CharVal, BitfieldC->getType()); + C = B.CreateAnd(C, B.getIntN(Width, 0xFF)); - // Create code that checks if the given bit is set in the field. - Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C); - Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits"); + // First check that the bit field access is within bounds. + Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width), + "memchr.bounds"); - // Finally merge both checks and cast to pointer type. The inttoptr - // implicitly zexts the i1 to intptr type. - return B.CreateIntToPtr(B.CreateLogicalAnd(Bounds, Bits, "memchr"), - CI->getType()); - } + // Create code that checks if the given bit is set in the field. + Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C); + Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits"); - // Check if all arguments are constants. If so, we can constant fold. - if (!CharC) - return nullptr; + // Finally merge both checks and cast to pointer type. The inttoptr + // implicitly zexts the i1 to intptr type. + return B.CreateIntToPtr(B.CreateLogicalAnd(Bounds, Bits, "memchr"), + CI->getType()); +} - // Compute the offset. - size_t I = Str.find(CharC->getSExtValue() & 0xFF); - if (I == StringRef::npos) // Didn't find the char. memchr returns null. +// Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant +// arrays LHS and RHS and nonconstant Size. +static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS, + Value *Size, bool StrNCmp, + IRBuilderBase &B, const DataLayout &DL) { + if (LHS == RHS) // memcmp(s,s,x) -> 0 return Constant::getNullValue(CI->getType()); - // memchr(s+n,c,l) -> gep(s+n+i,c) - return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr"); + StringRef LStr, RStr; + if (!getConstantStringInfo(LHS, LStr, 0, /*TrimAtNul=*/false) || + !getConstantStringInfo(RHS, RStr, 0, /*TrimAtNul=*/false)) + return nullptr; + + // If the contents of both constant arrays are known, fold a call to + // memcmp(A, B, N) to + // N <= Pos ? 0 : (A < B ? -1 : B < A ? +1 : 0) + // where Pos is the first mismatch between A and B, determined below. + + uint64_t Pos = 0; + Value *Zero = ConstantInt::get(CI->getType(), 0); + for (uint64_t MinSize = std::min(LStr.size(), RStr.size()); ; ++Pos) { + if (Pos == MinSize || + (StrNCmp && (LStr[Pos] == '\0' && RStr[Pos] == '\0'))) { + // One array is a leading part of the other of equal or greater + // size, or for strncmp, the arrays are equal strings. + // Fold the result to zero. Size is assumed to be in bounds, since + // otherwise the call would be undefined. + return Zero; + } + + if (LStr[Pos] != RStr[Pos]) + break; + } + + // Normalize the result. + typedef unsigned char UChar; + int IRes = UChar(LStr[Pos]) < UChar(RStr[Pos]) ? -1 : 1; + Value *MaxSize = ConstantInt::get(Size->getType(), Pos); + Value *Cmp = B.CreateICmp(ICmpInst::ICMP_ULE, Size, MaxSize); + Value *Res = ConstantInt::get(CI->getType(), IRes); + return B.CreateSelect(Cmp, Zero, Res); } +// Optimize a memcmp call CI with constant size Len. static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS, uint64_t Len, IRBuilderBase &B, const DataLayout &DL) { @@ -1028,25 +1254,6 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS, } } - // Constant folding: memcmp(x, y, Len) -> constant (all arguments are const). - // TODO: This is limited to i8 arrays. - StringRef LHSStr, RHSStr; - if (getConstantStringInfo(LHS, LHSStr) && - getConstantStringInfo(RHS, RHSStr)) { - // Make sure we're not reading out-of-bounds memory. - if (Len > LHSStr.size() || Len > RHSStr.size()) - return nullptr; - // Fold the memcmp and normalize the result. This way we get consistent - // results across multiple platforms. - uint64_t Ret = 0; - int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len); - if (Cmp < 0) - Ret = -1; - else if (Cmp > 0) - Ret = 1; - return ConstantInt::get(CI->getType(), Ret); - } - return nullptr; } @@ -1056,33 +1263,29 @@ Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI, Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); Value *Size = CI->getArgOperand(2); - if (LHS == RHS) // memcmp(s,s,x) -> 0 - return Constant::getNullValue(CI->getType()); - annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL); - // Handle constant lengths. + + if (Value *Res = optimizeMemCmpVarSize(CI, LHS, RHS, Size, false, B, DL)) + return Res; + + // Handle constant Size. ConstantInt *LenC = dyn_cast(Size); if (!LenC) return nullptr; - // memcmp(d,s,0) -> 0 - if (LenC->getZExtValue() == 0) - return Constant::getNullValue(CI->getType()); - - if (Value *Res = - optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL)) - return Res; - return nullptr; + return optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL); } Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); if (Value *V = optimizeMemCmpBCmpCommon(CI, B)) return V; // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0 // bcmp can be more efficient than memcmp because it only has to know that // there is a difference, not how different one is to the other. - if (TLI->has(LibFunc_bcmp) && isOnlyUsedInZeroEqualityComparison(CI)) { + if (isLibFuncEmittable(M, TLI, LibFunc_bcmp) && + isOnlyUsedInZeroEqualityComparison(CI)) { Value *LHS = CI->getArgOperand(0); Value *RHS = CI->getArgOperand(1); Value *Size = CI->getArgOperand(2); @@ -1125,6 +1328,7 @@ Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilderBase &B) { return Constant::getNullValue(CI->getType()); if (!getConstantStringInfo(Src, SrcStr, /*Offset=*/0, /*TrimAtNul=*/false) || + // TODO: Handle zeroinitializer. !StopChar) return nullptr; } else { @@ -1246,7 +1450,8 @@ static Value *valueHasFloatPrecision(Value *Val) { /// Shrink double -> float functions. static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B, - bool isBinary, bool isPrecise = false) { + bool isBinary, const TargetLibraryInfo *TLI, + bool isPrecise = false) { Function *CalleeFn = CI->getCalledFunction(); if (!CI->getType()->isDoubleTy() || !CalleeFn) return nullptr; @@ -1296,22 +1501,25 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B, R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]); } else { AttributeList CalleeAttrs = CalleeFn->getAttributes(); - R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeName, B, CalleeAttrs) - : emitUnaryFloatFnCall(V[0], CalleeName, B, CalleeAttrs); + R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], TLI, CalleeName, B, + CalleeAttrs) + : emitUnaryFloatFnCall(V[0], TLI, CalleeName, B, CalleeAttrs); } return B.CreateFPExt(R, B.getDoubleTy()); } /// Shrink double -> float for unary functions. static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilderBase &B, + const TargetLibraryInfo *TLI, bool isPrecise = false) { - return optimizeDoubleFP(CI, B, false, isPrecise); + return optimizeDoubleFP(CI, B, false, TLI, isPrecise); } /// Shrink double -> float for binary functions. static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilderBase &B, + const TargetLibraryInfo *TLI, bool isPrecise = false) { - return optimizeDoubleFP(CI, B, true, isPrecise); + return optimizeDoubleFP(CI, B, true, TLI, isPrecise); } // cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z))) @@ -1427,6 +1635,7 @@ static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) { /// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x); /// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x). Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { + Module *M = Pow->getModule(); Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); AttributeList Attrs; // Attributes are only meaningful on the original call Module *Mod = Pow->getModule(); @@ -1454,7 +1663,8 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { Function *CalleeFn = BaseFn->getCalledFunction(); if (CalleeFn && - TLI->getLibFunc(CalleeFn->getName(), LibFn) && TLI->has(LibFn)) { + TLI->getLibFunc(CalleeFn->getName(), LibFn) && + isLibFuncEmittable(M, TLI, LibFn)) { StringRef ExpName; Intrinsic::ID ID; Value *ExpFn; @@ -1506,7 +1716,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { // pow(2.0, itofp(x)) -> ldexp(1.0, x) if (match(Base, m_SpecificFP(2.0)) && (isa(Expo) || isa(Expo)) && - hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { + hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize())) return copyFlags(*Pow, emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, @@ -1515,7 +1725,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { } // pow(2.0 ** n, x) -> exp2(n * x) - if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) { + if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) { APFloat BaseR = APFloat(1.0); BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored); BaseR = BaseR / *BaseF; @@ -1542,7 +1752,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { // pow(10.0, x) -> exp10(x) // TODO: There is no exp10() intrinsic yet, but some day there shall be one. if (match(Base, m_SpecificFP(10.0)) && - hasFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) + hasFloatFn(M, TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) return copyFlags(*Pow, emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l, B, Attrs)); @@ -1567,7 +1777,8 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration( Mod, Intrinsic::exp2, Ty), FMul, "exp2")); - else if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) + else if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f, + LibFunc_exp2l)) return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l, B, Attrs)); @@ -1588,7 +1799,8 @@ static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno, } // Otherwise, use the libcall for sqrt(). - if (hasFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) + if (hasFloatFn(M, TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf, + LibFunc_sqrtl)) // TODO: We also should check that the target can in fact lower the sqrt() // libcall. We currently have no way to ask this question, so we ask if // the target has a sqrt() libcall, which is not exactly the same. @@ -1778,8 +1990,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { // Shrink pow() to powf() if the arguments are single precision, // unless the result is expected to be double precision. if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) && - hasFloatVersion(Name)) { - if (Value *Shrunk = optimizeBinaryDoubleFP(Pow, B, true)) + hasFloatVersion(M, Name)) { + if (Value *Shrunk = optimizeBinaryDoubleFP(Pow, B, TLI, true)) return Shrunk; } @@ -1787,13 +1999,14 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { } Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); Function *Callee = CI->getCalledFunction(); AttributeList Attrs; // Attributes are only meaningful on the original call StringRef Name = Callee->getName(); Value *Ret = nullptr; if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) && - hasFloatVersion(Name)) - Ret = optimizeUnaryDoubleFP(CI, B, true); + hasFloatVersion(M, Name)) + Ret = optimizeUnaryDoubleFP(CI, B, TLI, true); Type *Ty = CI->getType(); Value *Op = CI->getArgOperand(0); @@ -1801,7 +2014,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= IntSize // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize if ((isa(Op) || isa(Op)) && - hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { + hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize())) return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl, @@ -1812,12 +2025,14 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { } Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); + // If we can shrink the call to a float function rather than a double // function, do that first. Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); - if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name)) - if (Value *Ret = optimizeBinaryDoubleFP(CI, B)) + if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(M, Name)) + if (Value *Ret = optimizeBinaryDoubleFP(CI, B, TLI)) return Ret; // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to @@ -1848,8 +2063,8 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { Type *Ty = Log->getType(); Value *Ret = nullptr; - if (UnsafeFPShrink && hasFloatVersion(LogNm)) - Ret = optimizeUnaryDoubleFP(Log, B, true); + if (UnsafeFPShrink && hasFloatVersion(Mod, LogNm)) + Ret = optimizeUnaryDoubleFP(Log, B, TLI, true); // The earlier call must also be 'fast' in order to do these transforms. CallInst *Arg = dyn_cast(Log->getArgOperand(0)); @@ -1957,7 +2172,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { Log->doesNotAccessMemory() ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), Arg->getOperand(0), "log") - : emitUnaryFloatFnCall(Arg->getOperand(0), LogNm, B, Attrs); + : emitUnaryFloatFnCall(Arg->getOperand(0), TLI, LogNm, B, Attrs); Value *MulY = B.CreateFMul(Arg->getArgOperand(1), LogX, "mul"); // Since pow() may have side effects, e.g. errno, // dead code elimination may not be trusted to remove it. @@ -1980,7 +2195,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { Value *LogE = Log->doesNotAccessMemory() ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), Eul, "log") - : emitUnaryFloatFnCall(Eul, LogNm, B, Attrs); + : emitUnaryFloatFnCall(Eul, TLI, LogNm, B, Attrs); Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul"); // Since exp() may have side effects, e.g. errno, // dead code elimination may not be trusted to remove it. @@ -1992,14 +2207,16 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { } Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; // TODO: Once we have a way (other than checking for the existince of the // libcall) to tell whether our target can lower @llvm.sqrt, relax the // condition below. - if (TLI->has(LibFunc_sqrtf) && (Callee->getName() == "sqrt" || - Callee->getIntrinsicID() == Intrinsic::sqrt)) - Ret = optimizeUnaryDoubleFP(CI, B, true); + if (isLibFuncEmittable(M, TLI, LibFunc_sqrtf) && + (Callee->getName() == "sqrt" || + Callee->getIntrinsicID() == Intrinsic::sqrt)) + Ret = optimizeUnaryDoubleFP(CI, B, TLI, true); if (!CI->isFast()) return Ret; @@ -2044,7 +2261,6 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) { // If we found a repeated factor, hoist it out of the square root and // replace it with the fabs of that factor. - Module *M = Callee->getParent(); Type *ArgType = I->getType(); Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs"); @@ -2061,11 +2277,12 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) { // TODO: Generalize to handle any trig function and its inverse. Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; StringRef Name = Callee->getName(); - if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name)) - Ret = optimizeUnaryDoubleFP(CI, B, true); + if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(M, Name)) + Ret = optimizeUnaryDoubleFP(CI, B, TLI, true); Value *Op1 = CI->getArgOperand(0); auto *OpC = dyn_cast(Op1); @@ -2081,7 +2298,8 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilderBase &B) { // tanl(atanl(x)) -> x LibFunc Func; Function *F = OpC->getCalledFunction(); - if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && + if (F && TLI->getLibFunc(F->getName(), Func) && + isLibFuncEmittable(M, TLI, Func) && ((Func == LibFunc_atan && Callee->getName() == "tan") || (Func == LibFunc_atanf && Callee->getName() == "tanf") || (Func == LibFunc_atanl && Callee->getName() == "tanl"))) @@ -2097,9 +2315,10 @@ static bool isTrigLibCall(CallInst *CI) { CI->hasFnAttr(Attribute::ReadNone); } -static void insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg, +static bool insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg, bool UseFloat, Value *&Sin, Value *&Cos, - Value *&SinCos) { + Value *&SinCos, const TargetLibraryInfo *TLI) { + Module *M = OrigCallee->getParent(); Type *ArgTy = Arg->getType(); Type *ResTy; StringRef Name; @@ -2119,9 +2338,12 @@ static void insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg, ResTy = StructType::get(ArgTy, ArgTy); } - Module *M = OrigCallee->getParent(); - FunctionCallee Callee = - M->getOrInsertFunction(Name, OrigCallee->getAttributes(), ResTy, ArgTy); + if (!isLibFuncEmittable(M, TLI, Name)) + return false; + LibFunc TheLibFunc; + TLI->getLibFunc(Name, TheLibFunc); + FunctionCallee Callee = getOrInsertLibFunc( + M, *TLI, TheLibFunc, OrigCallee->getAttributes(), ResTy, ArgTy); if (Instruction *ArgInst = dyn_cast(Arg)) { // If the argument is an instruction, it must dominate all uses so put our @@ -2145,6 +2367,8 @@ static void insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg, Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1), "cospi"); } + + return true; } Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) { @@ -2172,7 +2396,9 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) { return nullptr; Value *Sin, *Cos, *SinCos; - insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, SinCos); + if (!insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, + SinCos, TLI)) + return nullptr; auto replaceTrigInsts = [this](SmallVectorImpl &Calls, Value *Res) { @@ -2193,6 +2419,7 @@ void LibCallSimplifier::classifyArgUse( SmallVectorImpl &CosCalls, SmallVectorImpl &SinCosCalls) { CallInst *CI = dyn_cast(Val); + Module *M = CI->getModule(); if (!CI || CI->use_empty()) return; @@ -2203,7 +2430,8 @@ void LibCallSimplifier::classifyArgUse( Function *Callee = CI->getCalledFunction(); LibFunc Func; - if (!Callee || !TLI->getLibFunc(*Callee, Func) || !TLI->has(Func) || + if (!Callee || !TLI->getLibFunc(*Callee, Func) || + !isLibFuncEmittable(M, TLI, Func) || !isTrigLibCall(CI)) return; @@ -2258,7 +2486,7 @@ Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilderBase &B) { // abs(x) -> x getArgOperand(0); - Value *IsNeg = B.CreateICmpSLT(X, Constant::getNullValue(X->getType())); + Value *IsNeg = B.CreateIsNeg(X); Value *NegX = B.CreateNSWNeg(X, "neg"); return B.CreateSelect(IsNeg, NegX, X); } @@ -2418,6 +2646,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) { Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); if (Value *V = optimizePrintFString(CI, B)) { @@ -2426,10 +2655,10 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) { // printf(format, ...) -> iprintf(format, ...) if no floating point // arguments. - if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - FunctionCallee IPrintFFn = - M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); + if (isLibFuncEmittable(M, TLI, LibFunc_iprintf) && + !callHasFloatingPointArgument(CI)) { + FunctionCallee IPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_iprintf, FT, + Callee->getAttributes()); CallInst *New = cast(CI->clone()); New->setCalledFunction(IPrintFFn); B.Insert(New); @@ -2438,11 +2667,10 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) { // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point // arguments. - if (TLI->has(LibFunc_small_printf) && !callHasFP128Argument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - auto SmallPrintFFn = - M->getOrInsertFunction(TLI->getName(LibFunc_small_printf), - FT, Callee->getAttributes()); + if (isLibFuncEmittable(M, TLI, LibFunc_small_printf) && + !callHasFP128Argument(CI)) { + auto SmallPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_printf, FT, + Callee->getAttributes()); CallInst *New = cast(CI->clone()); New->setCalledFunction(SmallPrintFFn); B.Insert(New); @@ -2489,7 +2717,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); Value *Ptr = castToCStr(Dest, B); B.CreateStore(V, Ptr); - Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); + Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); B.CreateStore(B.getInt8(0), Ptr); return ConstantInt::get(CI->getType(), 1); @@ -2541,6 +2769,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, } Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); if (Value *V = optimizeSPrintFString(CI, B)) { @@ -2549,10 +2778,10 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) { // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating // point arguments. - if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - FunctionCallee SIPrintFFn = - M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); + if (isLibFuncEmittable(M, TLI, LibFunc_siprintf) && + !callHasFloatingPointArgument(CI)) { + FunctionCallee SIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_siprintf, + FT, Callee->getAttributes()); CallInst *New = cast(CI->clone()); New->setCalledFunction(SIPrintFFn); B.Insert(New); @@ -2561,11 +2790,10 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) { // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit // floating point arguments. - if (TLI->has(LibFunc_small_sprintf) && !callHasFP128Argument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - auto SmallSPrintFFn = - M->getOrInsertFunction(TLI->getName(LibFunc_small_sprintf), - FT, Callee->getAttributes()); + if (isLibFuncEmittable(M, TLI, LibFunc_small_sprintf) && + !callHasFP128Argument(CI)) { + auto SmallSPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_sprintf, FT, + Callee->getAttributes()); CallInst *New = cast(CI->clone()); New->setCalledFunction(SmallSPrintFFn); B.Insert(New); @@ -2629,7 +2857,7 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char"); Value *Ptr = castToCStr(CI->getArgOperand(0), B); B.CreateStore(V, Ptr); - Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); + Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); B.CreateStore(B.getInt8(0), Ptr); return ConstantInt::get(CI->getType(), 1); @@ -2721,6 +2949,7 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, } Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); if (Value *V = optimizeFPrintFString(CI, B)) { @@ -2729,10 +2958,10 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) { // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no // floating point arguments. - if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - FunctionCallee FIPrintFFn = - M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); + if (isLibFuncEmittable(M, TLI, LibFunc_fiprintf) && + !callHasFloatingPointArgument(CI)) { + FunctionCallee FIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_fiprintf, + FT, Callee->getAttributes()); CallInst *New = cast(CI->clone()); New->setCalledFunction(FIPrintFFn); B.Insert(New); @@ -2741,11 +2970,11 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) { // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no // 128-bit floating point arguments. - if (TLI->has(LibFunc_small_fprintf) && !callHasFP128Argument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); + if (isLibFuncEmittable(M, TLI, LibFunc_small_fprintf) && + !callHasFP128Argument(CI)) { auto SmallFPrintFFn = - M->getOrInsertFunction(TLI->getName(LibFunc_small_fprintf), - FT, Callee->getAttributes()); + getOrInsertLibFunc(M, *TLI, LibFunc_small_fprintf, FT, + Callee->getAttributes()); CallInst *New = cast(CI->clone()); New->setCalledFunction(SmallFPrintFFn); B.Insert(New); @@ -2830,21 +3059,19 @@ Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilderBase &B) { CI->getArgOperand(2))); } -bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) { - LibFunc Func; +bool LibCallSimplifier::hasFloatVersion(const Module *M, StringRef FuncName) { SmallString<20> FloatFuncName = FuncName; FloatFuncName += 'f'; - if (TLI->getLibFunc(FloatFuncName, Func)) - return TLI->has(Func); - return false; + return isLibFuncEmittable(M, TLI, FloatFuncName); } Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, IRBuilderBase &Builder) { + Module *M = CI->getModule(); LibFunc Func; Function *Callee = CI->getCalledFunction(); // Check for string/memory library functions. - if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) { + if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) { // Make sure we never change the calling convention. assert( (ignoreCallingConv(Func) || @@ -2871,6 +3098,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeStrNCpy(CI, Builder); case LibFunc_strlen: return optimizeStrLen(CI, Builder); + case LibFunc_strnlen: + return optimizeStrNLen(CI, Builder); case LibFunc_strpbrk: return optimizeStrPBrk(CI, Builder); case LibFunc_strndup: @@ -2923,6 +3152,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, LibFunc Func, IRBuilderBase &Builder) { + const Module *M = CI->getModule(); + // Don't optimize calls that require strict floating point semantics. if (CI->isStrictFP()) return nullptr; @@ -3001,12 +3232,12 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, case LibFunc_sin: case LibFunc_sinh: case LibFunc_tanh: - if (UnsafeFPShrink && hasFloatVersion(CI->getCalledFunction()->getName())) - return optimizeUnaryDoubleFP(CI, Builder, true); + if (UnsafeFPShrink && hasFloatVersion(M, CI->getCalledFunction()->getName())) + return optimizeUnaryDoubleFP(CI, Builder, TLI, true); return nullptr; case LibFunc_copysign: - if (hasFloatVersion(CI->getCalledFunction()->getName())) - return optimizeBinaryDoubleFP(CI, Builder); + if (hasFloatVersion(M, CI->getCalledFunction()->getName())) + return optimizeBinaryDoubleFP(CI, Builder, TLI); return nullptr; case LibFunc_fminf: case LibFunc_fmin: @@ -3025,6 +3256,7 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, } Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) { + Module *M = CI->getModule(); assert(!CI->isMustTailCall() && "These transforms aren't musttail safe."); // TODO: Split out the code below that operates on FP calls so that @@ -3103,7 +3335,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) { } // Then check for known library functions. - if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) { + if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) { // We never change the calling convention. if (!ignoreCallingConv(Func) && !IsCallingConvC) return nullptr; @@ -3170,7 +3402,7 @@ LibCallSimplifier::LibCallSimplifier( function_ref Replacer, function_ref Eraser) : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI), - UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {} + Replacer(Replacer), Eraser(Eraser) {} void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { // Indirect through the replacer used in this instance. @@ -3361,7 +3593,8 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // If the function was an __stpcpy_chk, and we were able to fold it into // a __memcpy_chk, we still need to return the correct end pointer. if (Ret && Func == LibFunc_stpcpy_chk) - return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1)); + return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, + ConstantInt::get(SizeTTy, Len - 1)); return copyFlags(*CI, cast(Ret)); } diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp index 08a29ea16ba1..1242380f73c1 100644 --- a/llvm/lib/Transforms/Utils/SizeOpts.cpp +++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp @@ -48,12 +48,12 @@ cl::opt llvm::ForcePGSO( cl::desc("Force the (profiled-guided) size optimizations. ")); cl::opt llvm::PgsoCutoffInstrProf( - "pgso-cutoff-instr-prof", cl::Hidden, cl::init(950000), cl::ZeroOrMore, + "pgso-cutoff-instr-prof", cl::Hidden, cl::init(950000), cl::desc("The profile guided size optimization profile summary cutoff " "for instrumentation profile.")); cl::opt llvm::PgsoCutoffSampleProf( - "pgso-cutoff-sample-prof", cl::Hidden, cl::init(990000), cl::ZeroOrMore, + "pgso-cutoff-sample-prof", cl::Hidden, cl::init(990000), cl::desc("The profile guided size optimization profile summary cutoff " "for sample profile.")); diff --git a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp index 1fa574f04c37..0ff88e8b4612 100644 --- a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp +++ b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp @@ -9,7 +9,7 @@ // This is a little utility pass that removes the gc.relocates inserted by // RewriteStatepointsForGC. Note that the generated IR is incorrect, // but this is useful as a single pass in itself, for analysis of IR, without -// the GC.relocates. The statepoint and gc.result instrinsics would still be +// the GC.relocates. The statepoint and gc.result intrinsics would still be // present. //===----------------------------------------------------------------------===// @@ -18,10 +18,8 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Statepoint.h" -#include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp index 6a0eb34a7999..4ad16d622e8d 100644 --- a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp @@ -57,7 +57,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SymbolRewriter.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/ilist.h" diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp index 0b718ed6136e..832353741500 100644 --- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -18,7 +18,9 @@ #include "llvm/Transforms/Utils/UnifyLoopExits.h" #include "llvm/ADT/MapVector.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/InitializePasses.h" #include "llvm/Transforms/Utils.h" @@ -143,6 +145,8 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { // locate the exit blocks. SetVector ExitingBlocks; SetVector Exits; + // Record the exit blocks that branch to the same block. + MapVector > CommonSuccs; // We need SetVectors, but the Loop API takes a vector, so we use a temporary. SmallVector Temp; @@ -156,6 +160,11 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { if (SL == L || L->contains(SL)) continue; Exits.insert(S); + // The typical case for reducing the number of guard blocks occurs when + // the exit block has a single predecessor and successor. + if (S->getSinglePredecessor()) + if (auto *Succ = S->getSingleSuccessor()) + CommonSuccs[Succ].insert(S); } } @@ -170,13 +179,39 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { for (auto EB : ExitingBlocks) { dbgs() << " " << EB->getName(); } - dbgs() << "\n";); + dbgs() << "\n"; + + dbgs() << "Exit blocks with a common successor:\n"; + for (auto CS : CommonSuccs) { + dbgs() << " Succ " << CS.first->getName() << ", exits:"; + for (auto Exit : CS.second) + dbgs() << " " << Exit->getName(); + dbgs() << "\n"; + }); if (Exits.size() <= 1) { LLVM_DEBUG(dbgs() << "loop does not have multiple exits; nothing to do\n"); return false; } + // When multiple exit blocks branch to the same block, change the control + // flow hub to after the exit blocks rather than before. This reduces the + // number of guard blocks needed after the loop. + for (auto CS : CommonSuccs) { + auto CB = CS.first; + auto Preds = CS.second; + if (Exits.contains(CB)) + continue; + if (Preds.size() < 2 || Preds.size() == Exits.size()) + continue; + for (auto Exit : Preds) { + Exits.remove(Exit); + ExitingBlocks.remove(Exit->getSinglePredecessor()); + ExitingBlocks.insert(Exit); + } + Exits.insert(CB); + } + SmallVector GuardBlocks; DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); auto LoopExitBlock = CreateControlFlowHub(&DTU, GuardBlocks, ExitingBlocks, @@ -196,6 +231,17 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { if (auto ParentLoop = L->getParentLoop()) { for (auto G : GuardBlocks) { ParentLoop->addBasicBlockToLoop(G, LI); + // Ensure the guard block predecessors are in a valid loop. After the + // change to the control flow hub for common successors, a guard block + // predecessor may not be in a loop or may be in an outer loop. + for (auto Pred : predecessors(G)) { + auto PredLoop = LI.getLoopFor(Pred); + if (!ParentLoop->contains(PredLoop)) { + if (PredLoop) + LI.removeBlock(Pred); + ParentLoop->addBasicBlockToLoop(Pred, LI); + } + } } ParentLoop->verifyLoop(); } diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp index 43eb5c87acee..f34f2df971b1 100644 --- a/llvm/lib/Transforms/Utils/Utils.cpp +++ b/llvm/lib/Transforms/Utils/Utils.cpp @@ -34,6 +34,7 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeLCSSAWrapperPassPass(Registry); initializeLibCallsShrinkWrapLegacyPassPass(Registry); initializeLoopSimplifyPass(Registry); + initializeLowerGlobalDtorsLegacyPassPass(Registry); initializeLowerInvokeLegacyPassPass(Registry); initializeLowerSwitchLegacyPassPass(Registry); initializeNameAnonGlobalLegacyPassPass(Registry); diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp index 637181722f63..42be67f3cfc0 100644 --- a/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -64,10 +64,15 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, return true; } -template -static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, - HelperClass &Helper, - const DataLayout &DL) { +/// If we saw a store of a value to memory, and +/// then a load from a must-aliased pointer of a different type, try to coerce +/// the stored value. LoadedTy is the type of the load we want to replace. +/// IRB is IRBuilder used to insert new instructions. +/// +/// If we can't do it, return null. +Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, + IRBuilderBase &Helper, + const DataLayout &DL) { assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) && "precondition violation - materialization can't fail"); if (auto *C = dyn_cast(StoredVal)) @@ -154,18 +159,6 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, return StoredVal; } -/// If we saw a store of a value to memory, and -/// then a load from a must-aliased pointer of a different type, try to coerce -/// the stored value. LoadedTy is the type of the load we want to replace. -/// IRB is IRBuilder used to insert new instructions. -/// -/// If we can't do it, return null. -Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, - IRBuilderBase &IRB, - const DataLayout &DL) { - return coerceAvailableValueToLoadTypeHelper(StoredVal, LoadedTy, IRB, DL); -} - /// This function is called when we have a memdep query of a load that ends up /// being a clobbering memory write (store, memset, memcpy, memmove). This /// means that the write *may* provide bits used by the load but we can't be @@ -277,7 +270,7 @@ static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase, // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it // to i16. - unsigned LoadAlign = LI->getAlignment(); + unsigned LoadAlign = LI->getAlign().value(); int64_t MemLocEnd = MemLocOffs + MemLocSize; @@ -400,10 +393,9 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, return -1; } -template -static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy, - HelperClass &Helper, - const DataLayout &DL) { +static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset, + Type *LoadTy, IRBuilderBase &Builder, + const DataLayout &DL) { LLVMContext &Ctx = SrcVal->getType()->getContext(); // If two pointers are in the same address space, they have the same size, @@ -421,9 +413,11 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy, // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. if (SrcVal->getType()->isPtrOrPtrVectorTy()) - SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType())); + SrcVal = + Builder.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType())); if (!SrcVal->getType()->isIntegerTy()) - SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); + SrcVal = + Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); // Shift the bits to the least significant depending on endianness. unsigned ShiftAmt; @@ -432,12 +426,12 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy, else ShiftAmt = (StoreSize - LoadSize - Offset) * 8; if (ShiftAmt) - SrcVal = Helper.CreateLShr(SrcVal, - ConstantInt::get(SrcVal->getType(), ShiftAmt)); + SrcVal = Builder.CreateLShr(SrcVal, + ConstantInt::get(SrcVal->getType(), ShiftAmt)); if (LoadSize != StoreSize) - SrcVal = Helper.CreateTruncOrBitCast(SrcVal, - IntegerType::get(Ctx, LoadSize * 8)); + SrcVal = Builder.CreateTruncOrBitCast(SrcVal, + IntegerType::get(Ctx, LoadSize * 8)); return SrcVal; } @@ -450,14 +444,12 @@ Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, IRBuilder<> Builder(InsertPt); SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL); - return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, Builder, DL); + return coerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL); } Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset, Type *LoadTy, const DataLayout &DL) { - ConstantFolder F; - SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, F, DL); - return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, F, DL); + return ConstantFoldLoadFromConst(SrcVal, LoadTy, APInt(32, Offset), DL); } /// This function is called when we have a memdep query of a load that ends up @@ -522,75 +514,77 @@ Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset, return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL); } -template -T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset, - Type *LoadTy, HelperClass &Helper, - const DataLayout &DL) { +/// This function is called when we have a +/// memdep query of a load that ends up being a clobbering mem intrinsic. +Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, + Type *LoadTy, Instruction *InsertPt, + const DataLayout &DL) { LLVMContext &Ctx = LoadTy->getContext(); uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize() / 8; + IRBuilder<> Builder(InsertPt); // We know that this method is only called when the mem transfer fully // provides the bits for the load. if (MemSetInst *MSI = dyn_cast(SrcInst)) { // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and // independently of what the offset is. - T *Val = cast(MSI->getValue()); + Value *Val = MSI->getValue(); if (LoadSize != 1) Val = - Helper.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8)); - T *OneElt = Val; + Builder.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8)); + Value *OneElt = Val; // Splat the value out to the right number of bits. for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) { // If we can double the number of bytes set, do it. if (NumBytesSet * 2 <= LoadSize) { - T *ShVal = Helper.CreateShl( + Value *ShVal = Builder.CreateShl( Val, ConstantInt::get(Val->getType(), NumBytesSet * 8)); - Val = Helper.CreateOr(Val, ShVal); + Val = Builder.CreateOr(Val, ShVal); NumBytesSet <<= 1; continue; } // Otherwise insert one byte at a time. - T *ShVal = Helper.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8)); - Val = Helper.CreateOr(OneElt, ShVal); + Value *ShVal = + Builder.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8)); + Val = Builder.CreateOr(OneElt, ShVal); ++NumBytesSet; } - return coerceAvailableValueToLoadTypeHelper(Val, LoadTy, Helper, DL); + return coerceAvailableValueToLoadType(Val, LoadTy, Builder, DL); } // Otherwise, this is a memcpy/memmove from a constant global. MemTransferInst *MTI = cast(SrcInst); Constant *Src = cast(MTI->getSource()); - - // Otherwise, see if we can constant fold a load from the constant with the - // offset applied as appropriate. unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType()); - return ConstantFoldLoadFromConstPtr( - Src, LoadTy, APInt(IndexSize, Offset), DL); -} - -/// This function is called when we have a -/// memdep query of a load that ends up being a clobbering mem intrinsic. -Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, - Type *LoadTy, Instruction *InsertPt, - const DataLayout &DL) { - IRBuilder<> Builder(InsertPt); - return getMemInstValueForLoadHelper>(SrcInst, Offset, - LoadTy, Builder, DL); + return ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset), + DL); } Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, Type *LoadTy, const DataLayout &DL) { - // The only case analyzeLoadFromClobberingMemInst cannot be converted to a - // constant is when it's a memset of a non-constant. - if (auto *MSI = dyn_cast(SrcInst)) - if (!isa(MSI->getValue())) + LLVMContext &Ctx = LoadTy->getContext(); + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize() / 8; + + // We know that this method is only called when the mem transfer fully + // provides the bits for the load. + if (MemSetInst *MSI = dyn_cast(SrcInst)) { + auto *Val = dyn_cast(MSI->getValue()); + if (!Val) return nullptr; - ConstantFolder F; - return getMemInstValueForLoadHelper(SrcInst, Offset, - LoadTy, F, DL); + + Val = ConstantInt::get(Ctx, APInt::getSplat(LoadSize * 8, Val->getValue())); + return ConstantFoldLoadFromConst(Val, LoadTy, DL); + } + + // Otherwise, this is a memcpy/memmove from a constant global. + MemTransferInst *MTI = cast(SrcInst); + Constant *Src = cast(MTI->getSource()); + unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType()); + return ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset), + DL); } } // namespace VNCoercion } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 97c2acb7d4c7..f59fc3a6dd60 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -62,14 +62,13 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -497,7 +496,7 @@ bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB, if (PtrDelta.urem(Stride) != 0) return false; unsigned IdxBitWidth = OpA->getType()->getScalarSizeInBits(); - APInt IdxDiff = PtrDelta.udiv(Stride).zextOrSelf(IdxBitWidth); + APInt IdxDiff = PtrDelta.udiv(Stride).zext(IdxBitWidth); // Only look through a ZExt/SExt. if (!isa(OpA) && !isa(OpA)) @@ -1298,10 +1297,16 @@ bool Vectorizer::vectorizeLoadChain( CV->replaceAllUsesWith(V); } - // Bitcast might not be an Instruction, if the value being loaded is a - // constant. In that case, no need to reorder anything. - if (Instruction *BitcastInst = dyn_cast(Bitcast)) - reorder(BitcastInst); + // Since we might have opaque pointers we might end up using the pointer + // operand of the first load (wrt. memory loaded) for the vector load. Since + // this first load might not be the first in the block we potentially need to + // reorder the pointer operand (and its operands). If we have a bitcast though + // it might be before the load and should be the reorder start instruction. + // "Might" because for opaque pointers the "bitcast" is just the first loads + // pointer operand, as oppposed to something we inserted at the right position + // ourselves. + Instruction *BCInst = dyn_cast(Bitcast); + reorder((BCInst && BCInst != L0->getPointerOperand()) ? BCInst : LI); eraseInstructions(Chain); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 81e5aa223c07..6242d9a93fc1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -17,7 +17,9 @@ #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/IntrinsicInst.h" @@ -31,8 +33,6 @@ using namespace PatternMatch; #define LV_NAME "loop-vectorize" #define DEBUG_TYPE LV_NAME -extern cl::opt EnableVPlanPredication; - static cl::opt EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")); @@ -439,6 +439,26 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, return false; } +/// Returns true if A and B have same pointer operands or same SCEVs addresses +static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, + StoreInst *B) { + // Compare store + if (A == B) + return true; + + // Otherwise Compare pointers + Value *APtr = A->getPointerOperand(); + Value *BPtr = B->getPointerOperand(); + if (APtr == BPtr) + return true; + + // Otherwise compare address SCEVs + if (SE->getSCEV(APtr) == SE->getSCEV(BPtr)) + return true; + + return false; +} + int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy, Value *Ptr) const { const ValueToValueMap &Strides = @@ -487,7 +507,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { // FIXME: We skip these checks when VPlan predication is enabled as we // want to allow divergent branches. This whole check will be removed // once VPlan predication is on by default. - if (!EnableVPlanPredication && Br && Br->isConditional() && + if (Br && Br->isConditional() && !TheLoop->isLoopInvariant(Br->getCondition()) && !LI->isLoopHeader(Br->getSuccessor(0)) && !LI->isLoopHeader(Br->getSuccessor(1))) { @@ -572,7 +592,7 @@ void LoopVectorizationLegality::addInductionPhi( // on predicates that only hold within the loop, since allowing the exit // currently means re-using this SCEV outside the loop (see PR33706 for more // details). - if (PSE.getUnionPredicate().isAlwaysTrue()) { + if (PSE.getPredicate().isAlwaysTrue()) { AllowedExit.insert(Phi); AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); } @@ -676,7 +696,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { RecurrenceDescriptor RedDes; if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, - DT)) { + DT, PSE.getSE())) { Requirements->addExactFPMathInst(RedDes.getExactFPMathInst()); AllowedExit.insert(RedDes.getLoopExitInstr()); Reductions[Phi] = RedDes; @@ -770,7 +790,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { auto *SE = PSE.getSE(); Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI); for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) - if (hasVectorInstrinsicScalarOpd(IntrinID, i)) { + if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, i)) { if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { reportVectorizationFailure("Found unvectorizable intrinsic", "intrinsic instruction cannot be vectorized", @@ -849,7 +869,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // used outside the loop only if the SCEV predicates within the loop is // same as outside the loop. Allowing the exit means reusing the SCEV // outside the loop. - if (PSE.getUnionPredicate().isAlwaysTrue()) { + if (PSE.getPredicate().isAlwaysTrue()) { AllowedExit.insert(&I); continue; } @@ -911,15 +931,70 @@ bool LoopVectorizationLegality::canVectorizeMemory() { if (!LAI->canVectorizeMemory()) return false; - if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { - reportVectorizationFailure("Stores to a uniform address", - "write to a loop invariant address could not be vectorized", - "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); - return false; + // We can vectorize stores to invariant address when final reduction value is + // guaranteed to be stored at the end of the loop. Also, if decision to + // vectorize loop is made, runtime checks are added so as to make sure that + // invariant address won't alias with any other objects. + if (!LAI->getStoresToInvariantAddresses().empty()) { + // For each invariant address, check its last stored value is unconditional. + for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) { + if (isInvariantStoreOfReduction(SI) && + blockNeedsPredication(SI->getParent())) { + reportVectorizationFailure( + "We don't allow storing to uniform addresses", + "write of conditional recurring variant value to a loop " + "invariant address could not be vectorized", + "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); + return false; + } + } + + if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { + // For each invariant address, check its last stored value is the result + // of one of our reductions. + // + // We do not check if dependence with loads exists because they are + // currently rejected earlier in LoopAccessInfo::analyzeLoop. In case this + // behaviour changes we have to modify this code. + ScalarEvolution *SE = PSE.getSE(); + SmallVector UnhandledStores; + for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) { + if (isInvariantStoreOfReduction(SI)) { + // Earlier stores to this address are effectively deadcode. + // With opaque pointers it is possible for one pointer to be used with + // different sizes of stored values: + // store i32 0, ptr %x + // store i8 0, ptr %x + // The latest store doesn't complitely overwrite the first one in the + // example. That is why we have to make sure that types of stored + // values are same. + // TODO: Check that bitwidth of unhandled store is smaller then the + // one that overwrites it and add a test. + erase_if(UnhandledStores, [SE, SI](StoreInst *I) { + return storeToSameAddress(SE, SI, I) && + I->getValueOperand()->getType() == + SI->getValueOperand()->getType(); + }); + continue; + } + UnhandledStores.push_back(SI); + } + + bool IsOK = UnhandledStores.empty(); + // TODO: we should also validate against InvariantMemSets. + if (!IsOK) { + reportVectorizationFailure( + "We don't allow storing to uniform addresses", + "write to a loop invariant address could not " + "be vectorized", + "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); + return false; + } + } } Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); - PSE.addPredicate(LAI->getPSE().getUnionPredicate()); + PSE.addPredicate(LAI->getPSE().getPredicate()); return true; } @@ -949,6 +1024,26 @@ bool LoopVectorizationLegality::canVectorizeFPMath( })); } +bool LoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) { + return any_of(getReductionVars(), [&](auto &Reduction) -> bool { + const RecurrenceDescriptor &RdxDesc = Reduction.second; + return RdxDesc.IntermediateStore == SI; + }); +} + +bool LoopVectorizationLegality::isInvariantAddressOfReduction(Value *V) { + return any_of(getReductionVars(), [&](auto &Reduction) -> bool { + const RecurrenceDescriptor &RdxDesc = Reduction.second; + if (!RdxDesc.IntermediateStore) + return false; + + ScalarEvolution *SE = PSE.getSE(); + Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand(); + return V == InvariantAddress || + SE->getSCEV(V) == SE->getSCEV(InvariantAddress); + }); +} + bool LoopVectorizationLegality::isInductionPhi(const Value *V) const { Value *In0 = const_cast(V); PHINode *PN = dyn_cast_or_null(In0); @@ -969,6 +1064,16 @@ LoopVectorizationLegality::getIntOrFpInductionDescriptor(PHINode *Phi) const { return nullptr; } +const InductionDescriptor * +LoopVectorizationLegality::getPointerInductionDescriptor(PHINode *Phi) const { + if (!isInductionPhi(Phi)) + return nullptr; + auto &ID = getInductionVars().find(Phi)->second; + if (ID.getKind() == InductionDescriptor::IK_PtrInduction) + return &ID; + return nullptr; +} + bool LoopVectorizationLegality::isCastedInductionVariable( const Value *V) const { auto *Inst = dyn_cast(V); @@ -1266,7 +1371,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; - if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { + if (PSE.getPredicate().getComplexity() > SCEVThreshold) { reportVectorizationFailure("Too many SCEV checks needed", "Too many SCEV assumptions need to be made and checked at runtime", "TooManySCEVRunTimeChecks", ORE, TheLoop); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 71eb39a18d2f..0cb2032fa45a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -25,6 +25,7 @@ #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H #include "VPlan.h" +#include "llvm/Support/InstructionCost.h" namespace llvm { @@ -59,7 +60,7 @@ class VPBuilder { } public: - VPBuilder() {} + VPBuilder() = default; /// Clear the insertion point: created instructions will not be inserted into /// a block. @@ -187,12 +188,16 @@ struct VectorizationFactor { /// Cost of the loop with that width. InstructionCost Cost; - VectorizationFactor(ElementCount Width, InstructionCost Cost) - : Width(Width), Cost(Cost) {} + /// Cost of the scalar loop. + InstructionCost ScalarCost; + + VectorizationFactor(ElementCount Width, InstructionCost Cost, + InstructionCost ScalarCost) + : Width(Width), Cost(Cost), ScalarCost(ScalarCost) {} /// Width 1 means no vectorization, cost 0 means uncomputed cost. static VectorizationFactor Disabled() { - return {ElementCount::getFixed(1), 0}; + return {ElementCount::getFixed(1), 0, 0}; } bool operator==(const VectorizationFactor &rhs) const { @@ -298,8 +303,12 @@ public: /// Generate the IR code for the body of the vectorized loop according to the /// best selected \p VF, \p UF and VPlan \p BestPlan. + /// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue + /// vectorization re-using plans for both the main and epilogue vector loops. + /// It should be removed once the re-use issue has been fixed. void executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, - InnerLoopVectorizer &LB, DominatorTree *DT); + InnerLoopVectorizer &LB, DominatorTree *DT, + bool IsEpilogueVectorization); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void printPlans(raw_ostream &O); @@ -319,6 +328,9 @@ public: getDecisionAndClampRange(const std::function &Predicate, VFRange &Range); + /// Check if the number of runtime checks exceeds the threshold. + bool requiresTooManyRuntimeChecks() const; + protected: /// Collect the instructions from the original loop that would be trivially /// dead in the vectorized loop if generated. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3290439ecd07..b637b2d5ddae 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -58,7 +58,6 @@ #include "VPRecipeBuilder.h" #include "VPlan.h" #include "VPlanHCFGBuilder.h" -#include "VPlanPredicator.h" #include "VPlanTransforms.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -112,7 +111,6 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" @@ -144,10 +142,10 @@ #include #include #include -#include #include #include #include +#include #include #include #include @@ -346,13 +344,6 @@ cl::opt EnableVPlanNativePath( cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization.")); -// FIXME: Remove this switch once we have divergence analysis. Currently we -// assume divergent non-backedge branches when this switch is true. -cl::opt EnableVPlanPredication( - "enable-vplan-predication", cl::init(false), cl::Hidden, - cl::desc("Enable VPlan-native vectorization path predicator with " - "support for outer loop vectorization.")); - // This flag enables the stress testing of the VPlan H-CFG construction in the // VPlan-native vectorization path. It must be used in conjuction with // -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the @@ -481,7 +472,7 @@ public: VPTransformState &State); /// Fix the vectorized code, taking care of header phi's, live-outs, and more. - void fixVectorizedLoop(VPTransformState &State); + void fixVectorizedLoop(VPTransformState &State, VPlan &Plan); // Return true if any runtime check is added. bool areSafetyChecksAdded() { return AddedSafetyChecks; } @@ -491,12 +482,6 @@ public: /// new unrolled loop, where UF is the unroll factor. using VectorParts = SmallVector; - /// Vectorize a single first-order recurrence or pointer induction PHINode in - /// a block. This method handles the induction variable canonicalization. It - /// supports both VF = 1 for unrolled loops and arbitrary length vectors. - void widenPHIInstruction(Instruction *PN, VPWidenPHIRecipe *PhiR, - VPTransformState &State); - /// A helper function to scalarize a single Instruction in the innermost loop. /// Generates a sequence of scalar instances for each lane between \p MinLane /// and \p MaxLane, times each part between \p MinPart and \p MaxPart, @@ -506,13 +491,6 @@ public: const VPIteration &Instance, bool IfPredicateInstr, VPTransformState &State); - /// Widen an integer or floating-point induction variable \p IV. If \p Trunc - /// is provided, the integer induction variable will first be truncated to - /// the corresponding type. \p CanonicalIV is the scalar value generated for - /// the canonical induction variable. - void widenIntOrFpInduction(PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, - VPTransformState &State, Value *CanonicalIV); - /// Construct the vector value of a scalarized value \p V one lane at a time. void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance, VPTransformState &State); @@ -527,13 +505,8 @@ public: ArrayRef StoredValues, VPValue *BlockInMask = nullptr); - /// Set the debug location in the builder \p Ptr using the debug location in - /// \p V. If \p Ptr is None then it uses the class member's Builder. - void setDebugLocFromInst(const Value *V, - Optional *> CustomBuilder = None); - - /// Fix the non-induction PHIs in the OrigPHIsToFix vector. - void fixNonInductionPHIs(VPTransformState &State); + /// Fix the non-induction PHIs in \p Plan. + void fixNonInductionPHIs(VPlan &Plan, VPTransformState &State); /// Returns true if the reordering of FP operations is not allowed, but we are /// able to vectorize with strict in-order reductions for the given RdxDesc. @@ -546,17 +519,6 @@ public: /// element. virtual Value *getBroadcastInstrs(Value *V); - /// Add metadata from one instruction to another. - /// - /// This includes both the original MDs from \p From and additional ones (\see - /// addNewMetadata). Use this for *newly created* instructions in the vector - /// loop. - void addMetadata(Instruction *To, Instruction *From); - - /// Similar to the previous function but it adds the metadata to a - /// vector of instructions. - void addMetadata(ArrayRef To, Instruction *From); - // Returns the resume value (bc.merge.rdx) for a reduction as // generated by fixReduction. PHINode *getReductionResumeValue(const RecurrenceDescriptor &RdxDesc); @@ -575,13 +537,9 @@ protected: /// Set up the values of the IVs correctly when exiting the vector loop. void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, - Value *CountRoundDown, Value *EndValue, - BasicBlock *MiddleBlock); - - /// Introduce a conditional branch (on true, condition to be set later) at the - /// end of the header=latch connecting it to itself (across the backedge) and - /// to the exit block of \p L. - void createHeaderBranch(Loop *L); + Value *VectorTripCount, Value *EndValue, + BasicBlock *MiddleBlock, BasicBlock *VectorHeader, + VPlan &Plan); /// Handle all cross-iteration phis in the header. void fixCrossIterationPHIs(VPTransformState &State); @@ -595,16 +553,9 @@ protected: void fixReduction(VPReductionPHIRecipe *Phi, VPTransformState &State); /// Clear NSW/NUW flags from reduction instructions if necessary. - void clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc, + void clearReductionWrapFlags(VPReductionPHIRecipe *PhiR, VPTransformState &State); - /// Fixup the LCSSA phi nodes in the unique exit block. This simply - /// means we need to add the appropriate incoming value from the middle - /// block as exiting edges from the scalar epilogue loop (if present) are - /// already in place, and we exit the vector loop exclusively to the middle - /// block. - void fixLCSSAPHIs(VPTransformState &State); - /// Iteratively sink the scalarized operands of a predicated instruction into /// the block that was created for it. void sinkScalarOperands(Instruction *PredInst); @@ -613,30 +564,11 @@ protected: /// represented as. void truncateToMinimalBitwidths(VPTransformState &State); - /// Compute scalar induction steps. \p ScalarIV is the scalar induction - /// variable on which to base the steps, \p Step is the size of the step, and - /// \p EntryVal is the value from the original loop that maps to the steps. - /// Note that \p EntryVal doesn't have to be an induction variable - it - /// can also be a truncate instruction. - void buildScalarSteps(Value *ScalarIV, Value *Step, Instruction *EntryVal, - const InductionDescriptor &ID, VPValue *Def, - VPTransformState &State); - - /// Create a vector induction phi node based on an existing scalar one. \p - /// EntryVal is the value from the original loop that maps to the vector phi - /// node, and \p Step is the loop-invariant step. If \p EntryVal is a - /// truncate instruction, instead of widening the original IV, we widen a - /// version of the IV truncated to \p EntryVal's type. - void createVectorIntOrFpInductionPHI(const InductionDescriptor &II, - Value *Step, Value *Start, - Instruction *EntryVal, VPValue *Def, - VPTransformState &State); - /// Returns (and creates if needed) the original loop trip count. - Value *getOrCreateTripCount(Loop *NewLoop); + Value *getOrCreateTripCount(BasicBlock *InsertBlock); /// Returns (and creates if needed) the trip count of the widened loop. - Value *getOrCreateVectorTripCount(Loop *NewLoop); + Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock); /// Returns a bitcasted value to the requested vector type. /// Also handles bitcasts of vector <-> vector types. @@ -645,33 +577,21 @@ protected: /// Emit a bypass check to see if the vector trip count is zero, including if /// it overflows. - void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass); + void emitIterationCountCheck(BasicBlock *Bypass); /// Emit a bypass check to see if all of the SCEV assumptions we've /// had to make are correct. Returns the block containing the checks or /// nullptr if no checks have been added. - BasicBlock *emitSCEVChecks(Loop *L, BasicBlock *Bypass); + BasicBlock *emitSCEVChecks(BasicBlock *Bypass); /// Emit bypass checks to check any memory assumptions we may have made. /// Returns the block containing the checks or nullptr if no checks have been /// added. - BasicBlock *emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass); - - /// Compute the transformed value of Index at offset StartValue using step - /// StepValue. - /// For integer induction, returns StartValue + Index * StepValue. - /// For pointer induction, returns StartValue[Index * StepValue]. - /// FIXME: The newly created binary instructions should contain nsw/nuw - /// flags, which can be found from the original scalar operations. - Value *emitTransformedIndex(IRBuilder<> &B, Value *Index, ScalarEvolution *SE, - const DataLayout &DL, - const InductionDescriptor &ID, - BasicBlock *VectorHeader) const; + BasicBlock *emitMemRuntimeChecks(BasicBlock *Bypass); /// Emit basic blocks (prefixed with \p Prefix) for the iteration check, - /// vector loop preheader, middle block and scalar preheader. Also - /// allocate a loop object for the new vector loop and return it. - Loop *createVectorLoopSkeleton(StringRef Prefix); + /// vector loop preheader, middle block and scalar preheader. + void createVectorLoopSkeleton(StringRef Prefix); /// Create new phi nodes for the induction variables to resume iteration count /// in the scalar epilogue, from where the vectorized loop left off. @@ -680,21 +600,12 @@ protected: /// block, the \p AdditionalBypass pair provides information about the bypass /// block and the end value on the edge from bypass to this loop. void createInductionResumeValues( - Loop *L, std::pair AdditionalBypass = {nullptr, nullptr}); /// Complete the loop skeleton by adding debug MDs, creating appropriate /// conditional branches in the middle block, preparing the builder and - /// running the verifier. Take in the vector loop \p L as argument, and return - /// the preheader of the completed vector loop. - BasicBlock *completeLoopSkeleton(Loop *L, MDNode *OrigLoopID); - - /// Add additional metadata to \p To that was not present on \p Orig. - /// - /// Currently this is used to add the noalias annotations based on the - /// inserted memchecks. Use this for instructions that are *cloned* into the - /// vector loop. - void addNewMetadata(Instruction *To, const Instruction *Orig); + /// running the verifier. Return the preheader of the completed vector loop. + BasicBlock *completeLoopSkeleton(MDNode *OrigLoopID); /// Collect poison-generating recipes that may generate a poison value that is /// used after vectorization, even when their operands are not poison. Those @@ -741,13 +652,6 @@ protected: /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; - /// LoopVersioning. It's only set up (non-null) if memchecks were - /// used. - /// - /// This is currently only used to add no-alias metadata based on the - /// memchecks. The actually versioning is performed manually. - std::unique_ptr LVer; - /// The vectorization SIMD factor to use. Each vector will have this many /// vector elements. ElementCount VF; @@ -774,9 +678,6 @@ protected: /// there can be multiple exiting edges reaching this block. BasicBlock *LoopExitBlock; - /// The vector loop body. - BasicBlock *LoopVectorBody; - /// The scalar loop body. BasicBlock *LoopScalarBody; @@ -805,10 +706,6 @@ protected: // so we can later fix-up the external users of the induction variables. DenseMap IVEndValues; - // Vector of original scalar PHIs whose corresponding widened PHIs need to be - // fixed up at the end of vector code generation. - SmallVector OrigPHIsToFix; - /// BFI and PSI are used to check for profile guided size optimizations. BlockFrequencyInfo *BFI; ProfileSummaryInfo *PSI; @@ -936,8 +833,7 @@ protected: /// Emits an iteration count bypass check once for the main loop (when \p /// ForEpilogue is false) and once for the epilogue loop (when \p /// ForEpilogue is true). - BasicBlock *emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass, - bool ForEpilogue); + BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue); void printDebugTracesAtStart() override; void printDebugTracesAtEnd() override; }; @@ -956,7 +852,9 @@ public: BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, GeneratedRTChecks &Checks) : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, - EPI, LVL, CM, BFI, PSI, Checks) {} + EPI, LVL, CM, BFI, PSI, Checks) { + TripCount = EPI.TripCount; + } /// Implements the interface for creating a vectorized skeleton using the /// *epilogue loop* strategy (ie the second pass of vplan execution). std::pair @@ -966,7 +864,7 @@ protected: /// Emits an iteration count bypass check after the main vector loop has /// finished to see if there are any iterations left to execute by either /// the vector epilogue or the scalar epilogue. - BasicBlock *emitMinimumVectorEpilogueIterCountCheck(Loop *L, + BasicBlock *emitMinimumVectorEpilogueIterCountCheck( BasicBlock *Bypass, BasicBlock *Insert); void printDebugTracesAtStart() override; @@ -993,31 +891,6 @@ static Instruction *getDebugLocFromInstOrOperands(Instruction *I) { return I; } -void InnerLoopVectorizer::setDebugLocFromInst( - const Value *V, Optional *> CustomBuilder) { - IRBuilder<> *B = (CustomBuilder == None) ? &Builder : *CustomBuilder; - if (const Instruction *Inst = dyn_cast_or_null(V)) { - const DILocation *DIL = Inst->getDebugLoc(); - - // When a FSDiscriminator is enabled, we don't need to add the multiply - // factors to the discriminators. - if (DIL && Inst->getFunction()->isDebugInfoForProfiling() && - !isa(Inst) && !EnableFSDiscriminator) { - // FIXME: For scalable vectors, assume vscale=1. - auto NewDIL = - DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue()); - if (NewDIL) - B->SetCurrentDebugLocation(NewDIL.getValue()); - else - LLVM_DEBUG(dbgs() - << "Failed to create new discriminator: " - << DIL->getFilename() << " Line: " << DIL->getLine()); - } else - B->SetCurrentDebugLocation(DIL); - } else - B->SetCurrentDebugLocation(DebugLoc()); -} - /// Write a \p DebugMsg about vectorization to the debug output stream. If \p I /// is passed, the message relates to that particular instruction. #ifndef NDEBUG @@ -1059,7 +932,7 @@ static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName, namespace llvm { /// Return a value for Step multiplied by VF. -Value *createStepForVF(IRBuilder<> &B, Type *Ty, ElementCount VF, +Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step) { assert(Ty->isIntegerTy() && "Expected an integer step"); Constant *StepVal = ConstantInt::get(Ty, Step * VF.getKnownMinValue()); @@ -1067,12 +940,13 @@ Value *createStepForVF(IRBuilder<> &B, Type *Ty, ElementCount VF, } /// Return the runtime value for VF. -Value *getRuntimeVF(IRBuilder<> &B, Type *Ty, ElementCount VF) { +Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF) { Constant *EC = ConstantInt::get(Ty, VF.getKnownMinValue()); return VF.isScalable() ? B.CreateVScale(EC) : EC; } -static Value *getRuntimeVFAsFloat(IRBuilder<> &B, Type *FTy, ElementCount VF) { +static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, + ElementCount VF) { assert(FTy->isFloatingPointTy() && "Expected floating point type!"); Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits()); Value *RuntimeVF = getRuntimeVF(B, IntTy, VF); @@ -1119,14 +993,6 @@ static std::string getDebugLocString(const Loop *L) { } #endif -void InnerLoopVectorizer::addNewMetadata(Instruction *To, - const Instruction *Orig) { - // If the loop was versioned with memchecks, add the corresponding no-alias - // metadata. - if (LVer && (isa(Orig) || isa(Orig))) - LVer->annotateInstWithNoAlias(To, Orig); -} - void InnerLoopVectorizer::collectPoisonGeneratingRecipes( VPTransformState &State) { @@ -1151,6 +1017,7 @@ void InnerLoopVectorizer::collectPoisonGeneratingRecipes( // handled. if (isa(CurRec) || isa(CurRec) || + isa(CurRec) || isa(CurRec)) continue; @@ -1176,10 +1043,10 @@ void InnerLoopVectorizer::collectPoisonGeneratingRecipes( for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { for (VPRecipeBase &Recipe : *VPBB) { if (auto *WidenRec = dyn_cast(&Recipe)) { - Instruction *UnderlyingInstr = WidenRec->getUnderlyingInstr(); + Instruction &UnderlyingInstr = WidenRec->getIngredient(); VPDef *AddrDef = WidenRec->getAddr()->getDef(); - if (AddrDef && WidenRec->isConsecutive() && UnderlyingInstr && - Legal->blockNeedsPredication(UnderlyingInstr->getParent())) + if (AddrDef && WidenRec->isConsecutive() && + Legal->blockNeedsPredication(UnderlyingInstr.getParent())) collectPoisonGeneratingInstrsInBackwardSlice( cast(AddrDef)); } else if (auto *InterleaveRec = dyn_cast(&Recipe)) { @@ -1206,20 +1073,6 @@ void InnerLoopVectorizer::collectPoisonGeneratingRecipes( } } -void InnerLoopVectorizer::addMetadata(Instruction *To, - Instruction *From) { - propagateMetadata(To, From); - addNewMetadata(To, From); -} - -void InnerLoopVectorizer::addMetadata(ArrayRef To, - Instruction *From) { - for (Value *V : To) { - if (Instruction *I = dyn_cast(V)) - addMetadata(I, From); - } -} - PHINode *InnerLoopVectorizer::getReductionResumeValue( const RecurrenceDescriptor &RdxDesc) { auto It = ReductionResumeValues.find(&RdxDesc); @@ -1363,7 +1216,7 @@ public: /// RdxDesc. This is true if the -enable-strict-reductions flag is passed, /// the IsOrdered flag of RdxDesc is set and we do not allow reordering /// of FP operations. - bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) { + bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const { return !Hints->allowReordering() && RdxDesc.isOrdered(); } @@ -1701,6 +1554,11 @@ public: private: unsigned NumPredStores = 0; + /// Convenience function that returns the value of vscale_range iff + /// vscale_range.min == vscale_range.max or otherwise returns the value + /// returned by the corresponding TLI method. + Optional getVScaleForTuning() const; + /// \return An upper bound for the vectorization factors for both /// fixed and scalable vectorization, where the minimum-known number of /// elements is a power-of-2 larger than zero. If scalable vectorization is @@ -1713,15 +1571,10 @@ private: /// \return the maximized element count based on the targets vector /// registers and the loop trip-count, but limited to a maximum safe VF. /// This is a helper function of computeFeasibleMaxVF. - /// FIXME: MaxSafeVF is currently passed by reference to avoid some obscure - /// issue that occurred on one of the buildbots which cannot be reproduced - /// without having access to the properietary compiler (see comments on - /// D98509). The issue is currently under investigation and this workaround - /// will be removed as soon as possible. ElementCount getMaximizedVFForTarget(unsigned ConstTripCount, unsigned SmallestType, unsigned WidestType, - const ElementCount &MaxSafeVF, + ElementCount MaxSafeVF, bool FoldTailByMasking); /// \return the maximum legal scalable VF, based on the safe max number @@ -2012,7 +1865,7 @@ public: /// there is no vector code generation, the check blocks are removed /// completely. void Create(Loop *L, const LoopAccessInfo &LAI, - const SCEVUnionPredicate &UnionPred) { + const SCEVPredicate &UnionPred, ElementCount VF, unsigned IC) { BasicBlock *LoopHeader = L->getHeader(); BasicBlock *Preheader = L->getLoopPreheader(); @@ -2035,9 +1888,19 @@ public: MemCheckBlock = SplitBlock(Pred, Pred->getTerminator(), DT, LI, nullptr, "vector.memcheck"); - MemRuntimeCheckCond = - addRuntimeChecks(MemCheckBlock->getTerminator(), L, - RtPtrChecking.getChecks(), MemCheckExp); + auto DiffChecks = RtPtrChecking.getDiffChecks(); + if (DiffChecks) { + MemRuntimeCheckCond = addDiffRuntimeChecks( + MemCheckBlock->getTerminator(), L, *DiffChecks, MemCheckExp, + [VF](IRBuilderBase &B, unsigned Bits) { + return getRuntimeVF(B, B.getIntNTy(Bits), VF); + }, + IC); + } else { + MemRuntimeCheckCond = + addRuntimeChecks(MemCheckBlock->getTerminator(), L, + RtPtrChecking.getChecks(), MemCheckExp); + } assert(MemRuntimeCheckCond && "no RT checks generated although RtPtrChecking " "claimed checks are required"); @@ -2109,12 +1972,16 @@ public: /// Adds the generated SCEVCheckBlock before \p LoopVectorPreHeader and /// adjusts the branches to branch to the vector preheader or \p Bypass, /// depending on the generated condition. - BasicBlock *emitSCEVChecks(Loop *L, BasicBlock *Bypass, + BasicBlock *emitSCEVChecks(BasicBlock *Bypass, BasicBlock *LoopVectorPreHeader, BasicBlock *LoopExitBlock) { if (!SCEVCheckCond) return nullptr; - if (auto *C = dyn_cast(SCEVCheckCond)) + + Value *Cond = SCEVCheckCond; + // Mark the check as used, to prevent it from being removed during cleanup. + SCEVCheckCond = nullptr; + if (auto *C = dyn_cast(Cond)) if (C->isZero()) return nullptr; @@ -2133,18 +2000,15 @@ public: DT->addNewBlock(SCEVCheckBlock, Pred); DT->changeImmediateDominator(LoopVectorPreHeader, SCEVCheckBlock); - ReplaceInstWithInst( - SCEVCheckBlock->getTerminator(), - BranchInst::Create(Bypass, LoopVectorPreHeader, SCEVCheckCond)); - // Mark the check as used, to prevent it from being removed during cleanup. - SCEVCheckCond = nullptr; + ReplaceInstWithInst(SCEVCheckBlock->getTerminator(), + BranchInst::Create(Bypass, LoopVectorPreHeader, Cond)); return SCEVCheckBlock; } /// Adds the generated MemCheckBlock before \p LoopVectorPreHeader and adjusts /// the branches to branch to the vector preheader or \p Bypass, depending on /// the generated condition. - BasicBlock *emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass, + BasicBlock *emitMemRuntimeChecks(BasicBlock *Bypass, BasicBlock *LoopVectorPreHeader) { // Check if we generated code that checks in runtime if arrays overlap. if (!MemRuntimeCheckCond) @@ -2341,7 +2205,7 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) { /// \p Opcode is relevant for FP induction variable. static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, - IRBuilder<> &Builder) { + IRBuilderBase &Builder) { assert(VF.isVector() && "only vector VFs are supported"); // Create and check the types. @@ -2357,9 +2221,8 @@ static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, // Create a vector of consecutive numbers from zero to VF. VectorType *InitVecValVTy = ValVTy; - Type *InitVecValSTy = STy; if (STy->isFloatingPointTy()) { - InitVecValSTy = + Type *InitVecValSTy = IntegerType::get(STy->getContext(), STy->getScalarSizeInBits()); InitVecValVTy = VectorType::get(InitVecValSTy, VLen); } @@ -2389,199 +2252,12 @@ static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); } -void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( - const InductionDescriptor &II, Value *Step, Value *Start, - Instruction *EntryVal, VPValue *Def, VPTransformState &State) { - IRBuilder<> &Builder = State.Builder; - assert((isa(EntryVal) || isa(EntryVal)) && - "Expected either an induction phi-node or a truncate of it!"); - - // Construct the initial value of the vector IV in the vector loop preheader - auto CurrIP = Builder.saveIP(); - Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); - if (isa(EntryVal)) { - assert(Start->getType()->isIntegerTy() && - "Truncation requires an integer type"); - auto *TruncType = cast(EntryVal->getType()); - Step = Builder.CreateTrunc(Step, TruncType); - Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); - } - - Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0); - Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); - Value *SteppedStart = getStepVector( - SplatStart, Zero, Step, II.getInductionOpcode(), State.VF, State.Builder); - - // We create vector phi nodes for both integer and floating-point induction - // variables. Here, we determine the kind of arithmetic we will perform. - Instruction::BinaryOps AddOp; - Instruction::BinaryOps MulOp; - if (Step->getType()->isIntegerTy()) { - AddOp = Instruction::Add; - MulOp = Instruction::Mul; - } else { - AddOp = II.getInductionOpcode(); - MulOp = Instruction::FMul; - } - - // Multiply the vectorization factor by the step using integer or - // floating-point arithmetic as appropriate. - Type *StepType = Step->getType(); - Value *RuntimeVF; - if (Step->getType()->isFloatingPointTy()) - RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF); - else - RuntimeVF = getRuntimeVF(Builder, StepType, State.VF); - Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF); - - // Create a vector splat to use in the induction update. - // - // FIXME: If the step is non-constant, we create the vector splat with - // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't - // handle a constant vector splat. - Value *SplatVF = isa(Mul) - ? ConstantVector::getSplat(State.VF, cast(Mul)) - : Builder.CreateVectorSplat(State.VF, Mul); - Builder.restoreIP(CurrIP); - - // We may need to add the step a number of times, depending on the unroll - // factor. The last of those goes into the PHI. - PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind", - &*LoopVectorBody->getFirstInsertionPt()); - VecInd->setDebugLoc(EntryVal->getDebugLoc()); - Instruction *LastInduction = VecInd; - for (unsigned Part = 0; Part < UF; ++Part) { - State.set(Def, LastInduction, Part); - - if (isa(EntryVal)) - addMetadata(LastInduction, EntryVal); - - LastInduction = cast( - Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")); - LastInduction->setDebugLoc(EntryVal->getDebugLoc()); - } - - // Move the last step to the end of the latch block. This ensures consistent - // placement of all induction updates. - auto *LoopVectorLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch(); - auto *Br = cast(LoopVectorLatch->getTerminator()); - LastInduction->moveBefore(Br); - LastInduction->setName("vec.ind.next"); - - VecInd->addIncoming(SteppedStart, LoopVectorPreHeader); - VecInd->addIncoming(LastInduction, LoopVectorLatch); -} - -void InnerLoopVectorizer::widenIntOrFpInduction( - PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, VPTransformState &State, - Value *CanonicalIV) { - Value *Start = Def->getStartValue()->getLiveInIRValue(); - const InductionDescriptor &ID = Def->getInductionDescriptor(); - TruncInst *Trunc = Def->getTruncInst(); - IRBuilder<> &Builder = State.Builder; - assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); - assert(!State.VF.isZero() && "VF must be non-zero"); - - // The value from the original loop to which we are mapping the new induction - // variable. - Instruction *EntryVal = Trunc ? cast(Trunc) : IV; - - auto &DL = EntryVal->getModule()->getDataLayout(); - - // Generate code for the induction step. Note that induction steps are - // required to be loop-invariant - auto CreateStepValue = [&](const SCEV *Step) -> Value * { - assert(PSE.getSE()->isLoopInvariant(Step, OrigLoop) && - "Induction step should be loop invariant"); - if (PSE.getSE()->isSCEVable(IV->getType())) { - SCEVExpander Exp(*PSE.getSE(), DL, "induction"); - return Exp.expandCodeFor(Step, Step->getType(), - State.CFG.VectorPreHeader->getTerminator()); - } - return cast(Step)->getValue(); - }; - - // The scalar value to broadcast. This is derived from the canonical - // induction variable. If a truncation type is given, truncate the canonical - // induction variable and step. Otherwise, derive these values from the - // induction descriptor. - auto CreateScalarIV = [&](Value *&Step) -> Value * { - Value *ScalarIV = CanonicalIV; - Type *NeededType = IV->getType(); - if (!Def->isCanonical() || ScalarIV->getType() != NeededType) { - ScalarIV = - NeededType->isIntegerTy() - ? Builder.CreateSExtOrTrunc(ScalarIV, NeededType) - : Builder.CreateCast(Instruction::SIToFP, ScalarIV, NeededType); - ScalarIV = emitTransformedIndex(Builder, ScalarIV, PSE.getSE(), DL, ID, - State.CFG.PrevBB); - ScalarIV->setName("offset.idx"); - } - if (Trunc) { - auto *TruncType = cast(Trunc->getType()); - assert(Step->getType()->isIntegerTy() && - "Truncation requires an integer step"); - ScalarIV = Builder.CreateTrunc(ScalarIV, TruncType); - Step = Builder.CreateTrunc(Step, TruncType); - } - return ScalarIV; - }; - - // Fast-math-flags propagate from the original induction instruction. - IRBuilder<>::FastMathFlagGuard FMFG(Builder); - if (ID.getInductionBinOp() && isa(ID.getInductionBinOp())) - Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); - - // Now do the actual transformations, and start with creating the step value. - Value *Step = CreateStepValue(ID.getStep()); - if (State.VF.isScalar()) { - Value *ScalarIV = CreateScalarIV(Step); - Type *ScalarTy = IntegerType::get(ScalarIV->getContext(), - Step->getType()->getScalarSizeInBits()); - - Instruction::BinaryOps IncOp = ID.getInductionOpcode(); - if (IncOp == Instruction::BinaryOpsEnd) - IncOp = Instruction::Add; - for (unsigned Part = 0; Part < UF; ++Part) { - Value *StartIdx = ConstantInt::get(ScalarTy, Part); - Instruction::BinaryOps MulOp = Instruction::Mul; - if (Step->getType()->isFloatingPointTy()) { - StartIdx = Builder.CreateUIToFP(StartIdx, Step->getType()); - MulOp = Instruction::FMul; - } - - Value *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step); - Value *EntryPart = Builder.CreateBinOp(IncOp, ScalarIV, Mul, "induction"); - State.set(Def, EntryPart, Part); - if (Trunc) { - assert(!Step->getType()->isFloatingPointTy() && - "fp inductions shouldn't be truncated"); - addMetadata(EntryPart, Trunc); - } - } - return; - } - - // Create a new independent vector induction variable, if one is needed. - if (Def->needsVectorIV()) - createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); - - if (Def->needsScalarIV()) { - // Create scalar steps that can be used by instructions we will later - // scalarize. Note that the addition of the scalar steps will not increase - // the number of instructions in the loop in the common case prior to - // InstCombine. We will be trading one vector extract for each scalar step. - Value *ScalarIV = CreateScalarIV(Step); - buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); - } -} - -void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, - Instruction *EntryVal, - const InductionDescriptor &ID, - VPValue *Def, - VPTransformState &State) { - IRBuilder<> &Builder = State.Builder; +/// Compute scalar induction steps. \p ScalarIV is the scalar induction +/// variable on which to base the steps, \p Step is the size of the step. +static void buildScalarSteps(Value *ScalarIV, Value *Step, + const InductionDescriptor &ID, VPValue *Def, + VPTransformState &State) { + IRBuilderBase &Builder = State.Builder; // We shouldn't have to build scalar steps if we aren't vectorizing. assert(State.VF.isVector() && "VF should be greater than one"); // Get the value type and ensure it and the step have the same integer type. @@ -2652,6 +2328,103 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, } } +// Generate code for the induction step. Note that induction steps are +// required to be loop-invariant +static Value *CreateStepValue(const SCEV *Step, ScalarEvolution &SE, + Instruction *InsertBefore, + Loop *OrigLoop = nullptr) { + const DataLayout &DL = SE.getDataLayout(); + assert((!OrigLoop || SE.isLoopInvariant(Step, OrigLoop)) && + "Induction step should be loop invariant"); + if (auto *E = dyn_cast(Step)) + return E->getValue(); + + SCEVExpander Exp(SE, DL, "induction"); + return Exp.expandCodeFor(Step, Step->getType(), InsertBefore); +} + +/// Compute the transformed value of Index at offset StartValue using step +/// StepValue. +/// For integer induction, returns StartValue + Index * StepValue. +/// For pointer induction, returns StartValue[Index * StepValue]. +/// FIXME: The newly created binary instructions should contain nsw/nuw +/// flags, which can be found from the original scalar operations. +static Value *emitTransformedIndex(IRBuilderBase &B, Value *Index, + Value *StartValue, Value *Step, + const InductionDescriptor &ID) { + assert(Index->getType()->getScalarType() == Step->getType() && + "Index scalar type does not match StepValue type"); + + // Note: the IR at this point is broken. We cannot use SE to create any new + // SCEV and then expand it, hoping that SCEV's simplification will give us + // a more optimal code. Unfortunately, attempt of doing so on invalid IR may + // lead to various SCEV crashes. So all we can do is to use builder and rely + // on InstCombine for future simplifications. Here we handle some trivial + // cases only. + auto CreateAdd = [&B](Value *X, Value *Y) { + assert(X->getType() == Y->getType() && "Types don't match!"); + if (auto *CX = dyn_cast(X)) + if (CX->isZero()) + return Y; + if (auto *CY = dyn_cast(Y)) + if (CY->isZero()) + return X; + return B.CreateAdd(X, Y); + }; + + // We allow X to be a vector type, in which case Y will potentially be + // splatted into a vector with the same element count. + auto CreateMul = [&B](Value *X, Value *Y) { + assert(X->getType()->getScalarType() == Y->getType() && + "Types don't match!"); + if (auto *CX = dyn_cast(X)) + if (CX->isOne()) + return Y; + if (auto *CY = dyn_cast(Y)) + if (CY->isOne()) + return X; + VectorType *XVTy = dyn_cast(X->getType()); + if (XVTy && !isa(Y->getType())) + Y = B.CreateVectorSplat(XVTy->getElementCount(), Y); + return B.CreateMul(X, Y); + }; + + switch (ID.getKind()) { + case InductionDescriptor::IK_IntInduction: { + assert(!isa(Index->getType()) && + "Vector indices not supported for integer inductions yet"); + assert(Index->getType() == StartValue->getType() && + "Index type does not match StartValue type"); + if (isa(Step) && cast(Step)->isMinusOne()) + return B.CreateSub(StartValue, Index); + auto *Offset = CreateMul(Index, Step); + return CreateAdd(StartValue, Offset); + } + case InductionDescriptor::IK_PtrInduction: { + assert(isa(Step) && + "Expected constant step for pointer induction"); + return B.CreateGEP(ID.getElementType(), StartValue, CreateMul(Index, Step)); + } + case InductionDescriptor::IK_FpInduction: { + assert(!isa(Index->getType()) && + "Vector indices not supported for FP inductions yet"); + assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); + auto InductionBinOp = ID.getInductionBinOp(); + assert(InductionBinOp && + (InductionBinOp->getOpcode() == Instruction::FAdd || + InductionBinOp->getOpcode() == Instruction::FSub) && + "Original bin op should be defined for FP induction"); + + Value *MulExp = B.CreateFMul(Step, Index); + return B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp, + "induction"); + } + case InductionDescriptor::IK_NoInduction: + return nullptr; + } + llvm_unreachable("invalid enum"); +} + void InnerLoopVectorizer::packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance, VPTransformState &State) { @@ -2734,7 +2507,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( for (unsigned Part = 0; Part < UF; Part++) { Value *AddrPart = State.get(Addr, VPIteration(Part, 0)); - setDebugLocFromInst(AddrPart); + State.setDebugLocFromInst(AddrPart); // Notice current instruction could be any index. Need to adjust the address // to the member of index 0. @@ -2760,7 +2533,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( AddrParts.push_back(Builder.CreateBitCast(AddrPart, PtrTy)); } - setDebugLocFromInst(Instr); + State.setDebugLocFromInst(Instr); Value *PoisonVec = PoisonValue::get(VecTy); Value *MaskForGaps = nullptr; @@ -2915,8 +2688,6 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, if (!Instance.isFirstIteration()) return; - setDebugLocFromInst(Instr); - // Does this instruction return a value ? bool IsVoidRetTy = Instr->getType()->isVoidTy(); @@ -2933,21 +2704,23 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, if (State.MayGeneratePoisonRecipes.contains(RepRecipe)) Cloned->dropPoisonGeneratingFlags(); - State.Builder.SetInsertPoint(Builder.GetInsertBlock(), - Builder.GetInsertPoint()); + if (Instr->getDebugLoc()) + State.setDebugLocFromInst(Instr); + // Replace the operands of the cloned instructions with their scalar // equivalents in the new loop. for (auto &I : enumerate(RepRecipe->operands())) { auto InputInstance = Instance; VPValue *Operand = I.value(); - if (State.Plan->isUniformAfterVectorization(Operand)) + VPReplicateRecipe *OperandR = dyn_cast(Operand); + if (OperandR && OperandR->isUniform()) InputInstance.Lane = VPLane::getFirstLane(); Cloned->setOperand(I.index(), State.get(Operand, InputInstance)); } - addNewMetadata(Cloned, Instr); + State.addNewMetadata(Cloned, Instr); // Place the cloned scalar in the new loop. - Builder.Insert(Cloned); + State.Builder.Insert(Cloned); State.set(RepRecipe, Cloned, Instance); @@ -2960,29 +2733,12 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, PredicatedInstructions.push_back(Cloned); } -void InnerLoopVectorizer::createHeaderBranch(Loop *L) { - BasicBlock *Header = L->getHeader(); - assert(!L->getLoopLatch() && "loop should not have a latch at this point"); - - IRBuilder<> B(Header->getTerminator()); - Instruction *OldInst = - getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()); - setDebugLocFromInst(OldInst, &B); - - // Connect the header to the exit and header blocks and replace the old - // terminator. - B.CreateCondBr(B.getTrue(), L->getUniqueExitBlock(), Header); - - // Now we have two terminators. Remove the old one from the block. - Header->getTerminator()->eraseFromParent(); -} - -Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) { +Value *InnerLoopVectorizer::getOrCreateTripCount(BasicBlock *InsertBlock) { if (TripCount) return TripCount; - assert(L && "Create Trip Count for null loop."); - IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); + assert(InsertBlock); + IRBuilder<> Builder(InsertBlock->getTerminator()); // Find the loop boundaries. ScalarEvolution *SE = PSE.getSE(); const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount(); @@ -3006,7 +2762,7 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) { const SCEV *ExitCount = SE->getAddExpr( BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType())); - const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + const DataLayout &DL = InsertBlock->getModule()->getDataLayout(); // Expand the trip count and place the new instructions in the preheader. // Notice that the pre-header does not change, only the loop body. @@ -3014,22 +2770,23 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) { // Count holds the overall loop count (N). TripCount = Exp.expandCodeFor(ExitCount, ExitCount->getType(), - L->getLoopPreheader()->getTerminator()); + InsertBlock->getTerminator()); if (TripCount->getType()->isPointerTy()) TripCount = CastInst::CreatePointerCast(TripCount, IdxTy, "exitcount.ptrcnt.to.int", - L->getLoopPreheader()->getTerminator()); + InsertBlock->getTerminator()); return TripCount; } -Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) { +Value * +InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) { if (VectorTripCount) return VectorTripCount; - Value *TC = getOrCreateTripCount(L); - IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); + Value *TC = getOrCreateTripCount(InsertBlock); + IRBuilder<> Builder(InsertBlock->getTerminator()); Type *Ty = TC->getType(); // This is where we can make the step a runtime constant. @@ -3041,6 +2798,8 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) { // overflows: the vector induction variable will eventually wrap to zero given // that it starts at zero and its Step is a power of two; the loop will then // exit, with the last early-exit vector comparison also producing all-true. + // For scalable vectors the VF is not guaranteed to be a power of 2, but this + // is accounted for in emitIterationCountCheck that adds an overflow check. if (Cost->foldTailByMasking()) { assert(isPowerOf2_32(VF.getKnownMinValue() * UF) && "VF*UF must be a power of 2 when folding tail by masking"); @@ -3103,9 +2862,8 @@ Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy, return Builder.CreateBitOrPointerCast(CastVal, DstFVTy); } -void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, - BasicBlock *Bypass) { - Value *Count = getOrCreateTripCount(L); +void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { + Value *Count = getOrCreateTripCount(LoopVectorPreHeader); // Reuse existing vector loop preheader for TC checks. // Note that new preheader block is generated for vector loop. BasicBlock *const TCCheckBlock = LoopVectorPreHeader; @@ -3120,10 +2878,23 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, : ICmpInst::ICMP_ULT; // If tail is to be folded, vector loop takes care of all iterations. + Type *CountTy = Count->getType(); Value *CheckMinIters = Builder.getFalse(); - if (!Cost->foldTailByMasking()) { - Value *Step = createStepForVF(Builder, Count->getType(), VF, UF); + Value *Step = createStepForVF(Builder, CountTy, VF, UF); + if (!Cost->foldTailByMasking()) CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check"); + else if (VF.isScalable()) { + // vscale is not necessarily a power-of-2, which means we cannot guarantee + // an overflow to zero when updating induction variables and so an + // additional overflow check is required before entering the vector loop. + + // Get the maximum unsigned value for the type. + Value *MaxUIntTripCount = + ConstantInt::get(CountTy, cast(CountTy)->getMask()); + Value *LHS = Builder.CreateSub(MaxUIntTripCount, Count); + + // Don't execute the vector loop if (UMax - n) < (VF * UF). + CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, Step); } // Create new preheader for vector loop. LoopVectorPreHeader = @@ -3148,10 +2919,10 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, LoopBypassBlocks.push_back(TCCheckBlock); } -BasicBlock *InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) { +BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) { BasicBlock *const SCEVCheckBlock = - RTChecks.emitSCEVChecks(L, Bypass, LoopVectorPreHeader, LoopExitBlock); + RTChecks.emitSCEVChecks(Bypass, LoopVectorPreHeader, LoopExitBlock); if (!SCEVCheckBlock) return nullptr; @@ -3176,14 +2947,13 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) { return SCEVCheckBlock; } -BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, - BasicBlock *Bypass) { +BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) { // VPlan-native path does not do any analysis for runtime checks currently. if (EnableVPlanNativePath) return nullptr; BasicBlock *const MemCheckBlock = - RTChecks.emitMemRuntimeChecks(L, Bypass, LoopVectorPreHeader); + RTChecks.emitMemRuntimeChecks(Bypass, LoopVectorPreHeader); // Check if we generated code that checks in runtime if arrays overlap. We put // the checks into a separate block to make the more common case of few @@ -3197,7 +2967,8 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, "to vectorize."); ORE->emit([&]() { return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationCodeSize", - L->getStartLoc(), L->getHeader()) + OrigLoop->getStartLoc(), + OrigLoop->getHeader()) << "Code-size may be reduced by not forcing " "vectorization, or by source-code modifications " "eliminating the need for runtime checks " @@ -3209,116 +2980,10 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, AddedSafetyChecks = true; - // We currently don't use LoopVersioning for the actual loop cloning but we - // still use it to add the noalias metadata. - LVer = std::make_unique( - *Legal->getLAI(), - Legal->getLAI()->getRuntimePointerChecking()->getChecks(), OrigLoop, LI, - DT, PSE.getSE()); - LVer->prepareNoAliasMetadata(); return MemCheckBlock; } -Value *InnerLoopVectorizer::emitTransformedIndex( - IRBuilder<> &B, Value *Index, ScalarEvolution *SE, const DataLayout &DL, - const InductionDescriptor &ID, BasicBlock *VectorHeader) const { - - SCEVExpander Exp(*SE, DL, "induction"); - auto Step = ID.getStep(); - auto StartValue = ID.getStartValue(); - assert(Index->getType()->getScalarType() == Step->getType() && - "Index scalar type does not match StepValue type"); - - // Note: the IR at this point is broken. We cannot use SE to create any new - // SCEV and then expand it, hoping that SCEV's simplification will give us - // a more optimal code. Unfortunately, attempt of doing so on invalid IR may - // lead to various SCEV crashes. So all we can do is to use builder and rely - // on InstCombine for future simplifications. Here we handle some trivial - // cases only. - auto CreateAdd = [&B](Value *X, Value *Y) { - assert(X->getType() == Y->getType() && "Types don't match!"); - if (auto *CX = dyn_cast(X)) - if (CX->isZero()) - return Y; - if (auto *CY = dyn_cast(Y)) - if (CY->isZero()) - return X; - return B.CreateAdd(X, Y); - }; - - // We allow X to be a vector type, in which case Y will potentially be - // splatted into a vector with the same element count. - auto CreateMul = [&B](Value *X, Value *Y) { - assert(X->getType()->getScalarType() == Y->getType() && - "Types don't match!"); - if (auto *CX = dyn_cast(X)) - if (CX->isOne()) - return Y; - if (auto *CY = dyn_cast(Y)) - if (CY->isOne()) - return X; - VectorType *XVTy = dyn_cast(X->getType()); - if (XVTy && !isa(Y->getType())) - Y = B.CreateVectorSplat(XVTy->getElementCount(), Y); - return B.CreateMul(X, Y); - }; - - // Get a suitable insert point for SCEV expansion. For blocks in the vector - // loop, choose the end of the vector loop header (=VectorHeader), because - // the DomTree is not kept up-to-date for additional blocks generated in the - // vector loop. By using the header as insertion point, we guarantee that the - // expanded instructions dominate all their uses. - auto GetInsertPoint = [this, &B, VectorHeader]() { - BasicBlock *InsertBB = B.GetInsertPoint()->getParent(); - if (InsertBB != LoopVectorBody && - LI->getLoopFor(VectorHeader) == LI->getLoopFor(InsertBB)) - return VectorHeader->getTerminator(); - return &*B.GetInsertPoint(); - }; - - switch (ID.getKind()) { - case InductionDescriptor::IK_IntInduction: { - assert(!isa(Index->getType()) && - "Vector indices not supported for integer inductions yet"); - assert(Index->getType() == StartValue->getType() && - "Index type does not match StartValue type"); - if (ID.getConstIntStepValue() && ID.getConstIntStepValue()->isMinusOne()) - return B.CreateSub(StartValue, Index); - auto *Offset = CreateMul( - Index, Exp.expandCodeFor(Step, Index->getType(), GetInsertPoint())); - return CreateAdd(StartValue, Offset); - } - case InductionDescriptor::IK_PtrInduction: { - assert(isa(Step) && - "Expected constant step for pointer induction"); - return B.CreateGEP( - ID.getElementType(), StartValue, - CreateMul(Index, - Exp.expandCodeFor(Step, Index->getType()->getScalarType(), - GetInsertPoint()))); - } - case InductionDescriptor::IK_FpInduction: { - assert(!isa(Index->getType()) && - "Vector indices not supported for FP inductions yet"); - assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); - auto InductionBinOp = ID.getInductionBinOp(); - assert(InductionBinOp && - (InductionBinOp->getOpcode() == Instruction::FAdd || - InductionBinOp->getOpcode() == Instruction::FSub) && - "Original bin op should be defined for FP induction"); - - Value *StepValue = cast(Step)->getValue(); - Value *MulExp = B.CreateFMul(StepValue, Index); - return B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp, - "induction"); - } - case InductionDescriptor::IK_NoInduction: - return nullptr; - } - llvm_unreachable("invalid enum"); -} - -Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { +void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { LoopScalarBody = OrigLoop->getHeader(); LoopVectorPreHeader = OrigLoop->getLoopPreheader(); assert(LoopVectorPreHeader && "Invalid loop structure"); @@ -3350,43 +3015,24 @@ Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc()); ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst); - // We intentionally don't let SplitBlock to update LoopInfo since - // LoopVectorBody should belong to another loop than LoopVectorPreHeader. - // LoopVectorBody is explicitly added to the correct place few lines later. - LoopVectorBody = - SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, - nullptr, nullptr, Twine(Prefix) + "vector.body"); - - // Update dominator for loop exit. + // Update dominator for loop exit. During skeleton creation, only the vector + // pre-header and the middle block are created. The vector loop is entirely + // created during VPlan exection. if (!Cost->requiresScalarEpilogue(VF)) // If there is an epilogue which must run, there's no edge from the // middle block to exit blocks and thus no need to update the immediate // dominator of the exit blocks. DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock); - - // Create and register the new vector loop. - Loop *Lp = LI->AllocateLoop(); - Loop *ParentLoop = OrigLoop->getParentLoop(); - - // Insert the new loop into the loop nest and register the new basic blocks - // before calling any utilities such as SCEV that require valid LoopInfo. - if (ParentLoop) { - ParentLoop->addChildLoop(Lp); - } else { - LI->addTopLevelLoop(Lp); - } - Lp->addBasicBlockToLoop(LoopVectorBody, *LI); - return Lp; } void InnerLoopVectorizer::createInductionResumeValues( - Loop *L, std::pair AdditionalBypass) { + std::pair AdditionalBypass) { assert(((AdditionalBypass.first && AdditionalBypass.second) || (!AdditionalBypass.first && !AdditionalBypass.second)) && "Inconsistent information about additional bypass."); - Value *VectorTripCount = getOrCreateVectorTripCount(L); - assert(VectorTripCount && L && "Expected valid arguments"); + Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); + assert(VectorTripCount && "Expected valid arguments"); // We are going to resume the execution of the scalar loop. // Go over all of the induction variables that we found and fix the // PHIs that are left in the scalar version of the loop. @@ -3399,19 +3045,13 @@ void InnerLoopVectorizer::createInductionResumeValues( PHINode *OrigPhi = InductionEntry.first; InductionDescriptor II = InductionEntry.second; - // Create phi nodes to merge from the backedge-taken check block. - PHINode *BCResumeVal = - PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val", - LoopScalarPreHeader->getTerminator()); - // Copy original phi DL over to the new one. - BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc()); Value *&EndValue = IVEndValues[OrigPhi]; Value *EndValueFromAdditionalBypass = AdditionalBypass.second; if (OrigPhi == OldInduction) { // We know what the end value is. EndValue = VectorTripCount; } else { - IRBuilder<> B(L->getLoopPreheader()->getTerminator()); + IRBuilder<> B(LoopVectorPreHeader->getTerminator()); // Fast-math-flags propagate from the original induction instruction. if (II.getInductionBinOp() && isa(II.getInductionBinOp())) @@ -3420,10 +3060,10 @@ void InnerLoopVectorizer::createInductionResumeValues( Type *StepType = II.getStep()->getType(); Instruction::CastOps CastOp = CastInst::getCastOpcode(VectorTripCount, true, StepType, true); - Value *CRD = B.CreateCast(CastOp, VectorTripCount, StepType, "cast.crd"); - const DataLayout &DL = LoopScalarBody->getModule()->getDataLayout(); - EndValue = - emitTransformedIndex(B, CRD, PSE.getSE(), DL, II, LoopVectorBody); + Value *VTC = B.CreateCast(CastOp, VectorTripCount, StepType, "cast.vtc"); + Value *Step = + CreateStepValue(II.getStep(), *PSE.getSE(), &*B.GetInsertPoint()); + EndValue = emitTransformedIndex(B, VTC, II.getStartValue(), Step, II); EndValue->setName("ind.end"); // Compute the end value for the additional bypass (if applicable). @@ -3431,13 +3071,23 @@ void InnerLoopVectorizer::createInductionResumeValues( B.SetInsertPoint(&(*AdditionalBypass.first->getFirstInsertionPt())); CastOp = CastInst::getCastOpcode(AdditionalBypass.second, true, StepType, true); - CRD = - B.CreateCast(CastOp, AdditionalBypass.second, StepType, "cast.crd"); + Value *Step = + CreateStepValue(II.getStep(), *PSE.getSE(), &*B.GetInsertPoint()); + VTC = + B.CreateCast(CastOp, AdditionalBypass.second, StepType, "cast.vtc"); EndValueFromAdditionalBypass = - emitTransformedIndex(B, CRD, PSE.getSE(), DL, II, LoopVectorBody); + emitTransformedIndex(B, VTC, II.getStartValue(), Step, II); EndValueFromAdditionalBypass->setName("ind.end"); } } + + // Create phi nodes to merge from the backedge-taken check block. + PHINode *BCResumeVal = + PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val", + LoopScalarPreHeader->getTerminator()); + // Copy original phi DL over to the new one. + BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc()); + // The new PHI merges the original incoming value, in case of a bypass, // or the value at the end of the vectorized loop. BCResumeVal->addIncoming(EndValue, LoopMiddleBlock); @@ -3456,13 +3106,10 @@ void InnerLoopVectorizer::createInductionResumeValues( } } -BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(Loop *L, - MDNode *OrigLoopID) { - assert(L && "Expected valid loop."); - +BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(MDNode *OrigLoopID) { // The trip counts should be cached by now. - Value *Count = getOrCreateTripCount(L); - Value *VectorTripCount = getOrCreateVectorTripCount(L); + Value *Count = getOrCreateTripCount(LoopVectorPreHeader); + Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator(); @@ -3487,14 +3134,8 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(Loop *L, cast(LoopMiddleBlock->getTerminator())->setCondition(CmpN); } - // Get ready to start creating new instructions into the vectorized body. - assert(LoopVectorPreHeader == L->getLoopPreheader() && - "Inconsistent vector loop preheader"); - Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt()); - #ifdef EXPENSIVE_CHECKS assert(DT->verify(DominatorTree::VerificationLevel::Fast)); - LI->verify(*DT); #endif return LoopVectorPreHeader; @@ -3517,7 +3158,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() { |/ | | v | [ ] \ - | [ ]_| <-- vector loop. + | [ ]_| <-- vector loop (created during VPlan execution). | | | v \ -[ ] <--- middle-block. @@ -3544,34 +3185,32 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() { // simply happens to be prone to hitting this in practice. In theory, we // can hit the same issue for any SCEV, or ValueTracking query done during // mutation. See PR49900. - getOrCreateTripCount(OrigLoop); + getOrCreateTripCount(OrigLoop->getLoopPreheader()); // Create an empty vector loop, and prepare basic blocks for the runtime // checks. - Loop *Lp = createVectorLoopSkeleton(""); + createVectorLoopSkeleton(""); // Now, compare the new count to zero. If it is zero skip the vector loop and // jump to the scalar loop. This check also covers the case where the // backedge-taken count is uint##_max: adding one to it will overflow leading // to an incorrect trip count of zero. In this (rare) case we will also jump // to the scalar loop. - emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader); + emitIterationCountCheck(LoopScalarPreHeader); // Generate the code to check any assumptions that we've made for SCEV // expressions. - emitSCEVChecks(Lp, LoopScalarPreHeader); + emitSCEVChecks(LoopScalarPreHeader); // Generate the code that checks in runtime if arrays overlap. We put the // checks into a separate block to make the more common case of few elements // faster. - emitMemRuntimeChecks(Lp, LoopScalarPreHeader); - - createHeaderBranch(Lp); + emitMemRuntimeChecks(LoopScalarPreHeader); // Emit phis for the new starting index of the scalar loop. - createInductionResumeValues(Lp); + createInductionResumeValues(); - return {completeLoopSkeleton(Lp, OrigLoopID), nullptr}; + return {completeLoopSkeleton(OrigLoopID), nullptr}; } // Fix up external users of the induction variable. At this point, we are @@ -3580,8 +3219,9 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() { // value for the IV when arriving directly from the middle block. void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, - Value *CountRoundDown, Value *EndValue, - BasicBlock *MiddleBlock) { + Value *VectorTripCount, Value *EndValue, + BasicBlock *MiddleBlock, + BasicBlock *VectorHeader, VPlan &Plan) { // There are two kinds of external IV usages - those that use the value // computed in the last iteration (the PHI) and those that use the penultimate // value (the value that feeds into the phi from the loop latch). @@ -3608,8 +3248,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, for (User *U : OrigPhi->users()) { auto *UI = cast(U); if (!OrigLoop->contains(UI)) { - const DataLayout &DL = - OrigLoop->getHeader()->getModule()->getDataLayout(); assert(isa(UI) && "Expected LCSSA form"); IRBuilder<> B(MiddleBlock->getTerminator()); @@ -3619,15 +3257,18 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags()); Value *CountMinusOne = B.CreateSub( - CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1)); + VectorTripCount, ConstantInt::get(VectorTripCount->getType(), 1)); Value *CMO = !II.getStep()->getType()->isIntegerTy() ? B.CreateCast(Instruction::SIToFP, CountMinusOne, II.getStep()->getType()) : B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType()); CMO->setName("cast.cmo"); + + Value *Step = CreateStepValue(II.getStep(), *PSE.getSE(), + VectorHeader->getTerminator()); Value *Escape = - emitTransformedIndex(B, CMO, PSE.getSE(), DL, II, LoopVectorBody); + emitTransformedIndex(B, CMO, II.getStartValue(), Step, II); Escape->setName("ind.escape"); MissingVals[UI] = Escape; } @@ -3640,8 +3281,10 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, // In this case, if IV1 has an external use, we need to avoid adding both // "last value of IV1" and "penultimate value of IV2". So, verify that we // don't already have an incoming value for the middle block. - if (PHI->getBasicBlockIndex(MiddleBlock) == -1) + if (PHI->getBasicBlockIndex(MiddleBlock) == -1) { PHI->addIncoming(I.second, MiddleBlock); + Plan.removeLiveOut(PHI); + } } } @@ -3920,18 +3563,16 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths(VPTransformState &State) { } } -void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { +void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, + VPlan &Plan) { // Insert truncates and extends for any truncated instructions as hints to // InstCombine. if (VF.isVector()) truncateToMinimalBitwidths(State); // Fix widened non-induction PHIs by setting up the PHI operands. - if (OrigPHIsToFix.size()) { - assert(EnableVPlanNativePath && - "Unexpected non-induction PHIs for fixup in non VPlan-native path"); - fixNonInductionPHIs(State); - } + if (EnableVPlanNativePath) + fixNonInductionPHIs(Plan, State); // At this point every instruction in the original loop is widened to a // vector form. Now we need to fix the recurrences in the loop. These PHI @@ -3942,24 +3583,37 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); - // If we inserted an edge from the middle block to the unique exit block, - // update uses outside the loop (phis) to account for the newly inserted - // edge. - if (!Cost->requiresScalarEpilogue(VF)) { + VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitingBasicBlock(); + Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]); + if (Cost->requiresScalarEpilogue(VF)) { + // No edge from the middle block to the unique exit block has been inserted + // and there is nothing to fix from vector loop; phis should have incoming + // from scalar loop only. + Plan.clearLiveOuts(); + } else { + // If we inserted an edge from the middle block to the unique exit block, + // update uses outside the loop (phis) to account for the newly inserted + // edge. + // Fix-up external users of the induction variables. for (auto &Entry : Legal->getInductionVars()) fixupIVUsers(Entry.first, Entry.second, - getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)), - IVEndValues[Entry.first], LoopMiddleBlock); - - fixLCSSAPHIs(State); + getOrCreateVectorTripCount(VectorLoop->getLoopPreheader()), + IVEndValues[Entry.first], LoopMiddleBlock, + VectorLoop->getHeader(), Plan); } + // Fix LCSSA phis not already fixed earlier. Extracts may need to be generated + // in the exit block, so update the builder. + State.Builder.SetInsertPoint(State.CFG.ExitBB->getFirstNonPHI()); + for (auto &KV : Plan.getLiveOuts()) + KV.second->fixPhi(Plan, State); + for (Instruction *PI : PredicatedInstructions) sinkScalarOperands(&*PI); // Remove redundant induction instructions. - cse(LoopVectorBody); + cse(VectorLoop->getHeader()); // Set/update profile weights for the vector and remainder loops as original // loop iterations are now distributed among them. Note that original loop @@ -3974,9 +3628,9 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { // For scalable vectorization we can't know at compile time how many iterations // of the loop are handled in one vector iteration, so instead assume a pessimistic // vscale of '1'. - setProfileInfoAfterUnrolling( - LI->getLoopFor(LoopScalarBody), LI->getLoopFor(LoopVectorBody), - LI->getLoopFor(LoopScalarBody), VF.getKnownMinValue() * UF); + setProfileInfoAfterUnrolling(LI->getLoopFor(LoopScalarBody), VectorLoop, + LI->getLoopFor(LoopScalarBody), + VF.getKnownMinValue() * UF); } void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) { @@ -3986,7 +3640,8 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) { // the currently empty PHI nodes. At this point every instruction in the // original loop is widened to a vector form so we can use them to construct // the incoming edges. - VPBasicBlock *Header = State.Plan->getEntry()->getEntryBasicBlock(); + VPBasicBlock *Header = + State.Plan->getVectorLoopRegion()->getEntryBasicBlock(); for (VPRecipeBase &R : Header->phis()) { if (auto *ReductionPhi = dyn_cast(&R)) fixReduction(ReductionPhi, State); @@ -4102,8 +3757,10 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence( // and thus no phis which needed updated. if (!Cost->requiresScalarEpilogue(VF)) for (PHINode &LCSSAPhi : LoopExitBlock->phis()) - if (llvm::is_contained(LCSSAPhi.incoming_values(), Phi)) + if (llvm::is_contained(LCSSAPhi.incoming_values(), Phi)) { LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock); + State.Plan->removeLiveOut(&LCSSAPhi); + } } void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, @@ -4117,14 +3774,14 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, RecurKind RK = RdxDesc.getRecurrenceKind(); TrackingVH ReductionStartValue = RdxDesc.getRecurrenceStartValue(); Instruction *LoopExitInst = RdxDesc.getLoopExitInstr(); - setDebugLocFromInst(ReductionStartValue); + State.setDebugLocFromInst(ReductionStartValue); VPValue *LoopExitInstDef = PhiR->getBackedgeValue(); // This is the vector-clone of the value that leaves the loop. Type *VecTy = State.get(LoopExitInstDef, 0)->getType(); // Wrap flags are in general invalid after vectorization, clear them. - clearReductionWrapFlags(RdxDesc, State); + clearReductionWrapFlags(PhiR, State); // Before each round, move the insertion point right between // the PHIs and the values we are going to write. @@ -4132,9 +3789,13 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, // instructions. Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt()); - setDebugLocFromInst(LoopExitInst); + State.setDebugLocFromInst(LoopExitInst); Type *PhiTy = OrigPhi->getType(); + + VPBasicBlock *LatchVPBB = + PhiR->getParent()->getEnclosingLoopRegion()->getExitingBasicBlock(); + BasicBlock *VectorLoopLatch = State.CFG.VPBB2IRBB[LatchVPBB]; // If tail is folded by masking, the vector value to leave the loop should be // a Select choosing between the vectorized LoopExitInst and vectorized Phi, // instead of the former. For an inloop reduction the reduction will already @@ -4142,17 +3803,20 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, if (Cost->foldTailByMasking() && !PhiR->isInLoop()) { for (unsigned Part = 0; Part < UF; ++Part) { Value *VecLoopExitInst = State.get(LoopExitInstDef, Part); - Value *Sel = nullptr; + SelectInst *Sel = nullptr; for (User *U : VecLoopExitInst->users()) { if (isa(U)) { assert(!Sel && "Reduction exit feeding two selects"); - Sel = U; + Sel = cast(U); } else assert(isa(U) && "Reduction exit must feed Phi's or select"); } assert(Sel && "Reduction exit feeds no select"); State.reset(LoopExitInstDef, Sel, Part); + if (isa(Sel)) + Sel->setFastMathFlags(RdxDesc.getFastMathFlags()); + // If the target can create a predicated operator for the reduction at no // extra cost in the loop (for example a predicated vadd), it can be // cheaper for the select to remain in the loop than be sunk out of it, @@ -4164,8 +3828,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, TargetTransformInfo::ReductionFlags())) { auto *VecRdxPhi = cast(State.get(PhiR, Part)); - VecRdxPhi->setIncomingValueForBlock( - LI->getLoopFor(LoopVectorBody)->getLoopLatch(), Sel); + VecRdxPhi->setIncomingValueForBlock(VectorLoopLatch, Sel); } } } @@ -4176,8 +3839,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, if (VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); - Builder.SetInsertPoint( - LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator()); + Builder.SetInsertPoint(VectorLoopLatch->getTerminator()); VectorParts RdxParts(UF); for (unsigned Part = 0; Part < UF; ++Part) { RdxParts[Part] = State.get(LoopExitInstDef, Part); @@ -4208,7 +3870,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, // conditional branch, and (c) other passes may add new predecessors which // terminate on this line. This is the easiest way to ensure we don't // accidentally cause an extra step back into the loop while debugging. - setDebugLocFromInst(LoopMiddleBlock->getTerminator()); + State.setDebugLocFromInst(LoopMiddleBlock->getTerminator()); if (PhiR->isOrdered()) ReducedPartRdx = State.get(LoopExitInstDef, UF - 1); else { @@ -4265,6 +3927,17 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, // Set the resume value for this reduction ReductionResumeValues.insert({&RdxDesc, BCBlockPhi}); + // If there were stores of the reduction value to a uniform memory address + // inside the loop, create the final store here. + if (StoreInst *SI = RdxDesc.IntermediateStore) { + StoreInst *NewSI = + Builder.CreateStore(ReducedPartRdx, SI->getPointerOperand()); + propagateMetadata(NewSI, SI); + + // If the reduction value is used in other places, + // then let the code below create PHI's for that. + } + // Now, we need to fix the users of the reduction variable // inside and outside of the scalar remainder loop. @@ -4273,8 +3946,10 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, // fixFirstOrderRecurrence for a more complete explaination of the logic. if (!Cost->requiresScalarEpilogue(VF)) for (PHINode &LCSSAPhi : LoopExitBlock->phis()) - if (llvm::is_contained(LCSSAPhi.incoming_values(), LoopExitInst)) + if (llvm::is_contained(LCSSAPhi.incoming_values(), LoopExitInst)) { LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock); + State.Plan->removeLiveOut(&LCSSAPhi); + } // Fix the scalar loop reduction variable with the incoming reduction sum // from the vector body and from the backedge value. @@ -4287,63 +3962,35 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); } -void InnerLoopVectorizer::clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc, +void InnerLoopVectorizer::clearReductionWrapFlags(VPReductionPHIRecipe *PhiR, VPTransformState &State) { + const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); RecurKind RK = RdxDesc.getRecurrenceKind(); if (RK != RecurKind::Add && RK != RecurKind::Mul) return; - Instruction *LoopExitInstr = RdxDesc.getLoopExitInstr(); - assert(LoopExitInstr && "null loop exit instruction"); - SmallVector Worklist; - SmallPtrSet Visited; - Worklist.push_back(LoopExitInstr); - Visited.insert(LoopExitInstr); + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(PhiR); + Visited.insert(PhiR); while (!Worklist.empty()) { - Instruction *Cur = Worklist.pop_back_val(); - if (isa(Cur)) - for (unsigned Part = 0; Part < UF; ++Part) { - // FIXME: Should not rely on getVPValue at this point. - Value *V = State.get(State.Plan->getVPValue(Cur, true), Part); - cast(V)->dropPoisonGeneratingFlags(); + VPValue *Cur = Worklist.pop_back_val(); + for (unsigned Part = 0; Part < UF; ++Part) { + Value *V = State.get(Cur, Part); + if (!isa(V)) + break; + cast(V)->dropPoisonGeneratingFlags(); } - for (User *U : Cur->users()) { - Instruction *UI = cast(U); - if ((Cur != LoopExitInstr || OrigLoop->contains(UI->getParent())) && - Visited.insert(UI).second) - Worklist.push_back(UI); - } - } -} - -void InnerLoopVectorizer::fixLCSSAPHIs(VPTransformState &State) { - for (PHINode &LCSSAPhi : LoopExitBlock->phis()) { - if (LCSSAPhi.getBasicBlockIndex(LoopMiddleBlock) != -1) - // Some phis were already hand updated by the reduction and recurrence - // code above, leave them alone. - continue; - - auto *IncomingValue = LCSSAPhi.getIncomingValue(0); - // Non-instruction incoming values will have only one value. - - VPLane Lane = VPLane::getFirstLane(); - if (isa(IncomingValue) && - !Cost->isUniformAfterVectorization(cast(IncomingValue), - VF)) - Lane = VPLane::getLastLaneForVF(VF); - - // Can be a loop invariant incoming value or the last scalar value to be - // extracted from the vectorized loop. - // FIXME: Should not rely on getVPValue at this point. - Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); - Value *lastIncomingValue = - OrigLoop->isLoopInvariant(IncomingValue) - ? IncomingValue - : State.get(State.Plan->getVPValue(IncomingValue, true), - VPIteration(UF - 1, Lane)); - LCSSAPhi.addIncoming(lastIncomingValue, LoopMiddleBlock); + for (VPUser *U : Cur->users()) { + auto *UserRecipe = dyn_cast(U); + if (!UserRecipe) + continue; + for (VPValue *V : UserRecipe->definedValues()) + if (Visited.insert(V).second) + Worklist.push_back(V); + } } } @@ -4421,17 +4068,23 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { } while (Changed); } -void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) { - for (PHINode *OrigPhi : OrigPHIsToFix) { - VPWidenPHIRecipe *VPPhi = - cast(State.Plan->getVPValue(OrigPhi)); - PHINode *NewPhi = cast(State.get(VPPhi, 0)); - // Make sure the builder has a valid insert point. - Builder.SetInsertPoint(NewPhi); - for (unsigned i = 0; i < VPPhi->getNumOperands(); ++i) { - VPValue *Inc = VPPhi->getIncomingValue(i); - VPBasicBlock *VPBB = VPPhi->getIncomingBlock(i); - NewPhi->addIncoming(State.get(Inc, 0), State.CFG.VPBB2IRBB[VPBB]); +void InnerLoopVectorizer::fixNonInductionPHIs(VPlan &Plan, + VPTransformState &State) { + auto Iter = depth_first( + VPBlockRecursiveTraversalWrapper(Plan.getEntry())); + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { + for (VPRecipeBase &P : VPBB->phis()) { + VPWidenPHIRecipe *VPPhi = dyn_cast(&P); + if (!VPPhi) + continue; + PHINode *NewPhi = cast(State.get(VPPhi, 0)); + // Make sure the builder has a valid insert point. + Builder.SetInsertPoint(NewPhi); + for (unsigned i = 0; i < VPPhi->getNumOperands(); ++i) { + VPValue *Inc = VPPhi->getIncomingValue(i); + VPBasicBlock *VPBB = VPPhi->getIncomingBlock(i); + NewPhi->addIncoming(State.get(Inc, 0), State.CFG.VPBB2IRBB[VPBB]); + } } } } @@ -4441,139 +4094,6 @@ bool InnerLoopVectorizer::useOrderedReductions( return Cost->useOrderedReductions(RdxDesc); } -void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, - VPWidenPHIRecipe *PhiR, - VPTransformState &State) { - PHINode *P = cast(PN); - if (EnableVPlanNativePath) { - // Currently we enter here in the VPlan-native path for non-induction - // PHIs where all control flow is uniform. We simply widen these PHIs. - // Create a vector phi with no operands - the vector phi operands will be - // set at the end of vector code generation. - Type *VecTy = (State.VF.isScalar()) - ? PN->getType() - : VectorType::get(PN->getType(), State.VF); - Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi"); - State.set(PhiR, VecPhi, 0); - OrigPHIsToFix.push_back(P); - - return; - } - - assert(PN->getParent() == OrigLoop->getHeader() && - "Non-header phis should have been handled elsewhere"); - - // In order to support recurrences we need to be able to vectorize Phi nodes. - // Phi nodes have cycles, so we need to vectorize them in two stages. This is - // stage #1: We create a new vector PHI node with no incoming edges. We'll use - // this value when we vectorize all of the instructions that use the PHI. - - assert(!Legal->isReductionVariable(P) && - "reductions should be handled elsewhere"); - - setDebugLocFromInst(P); - - // This PHINode must be an induction variable. - // Make sure that we know about it. - assert(Legal->getInductionVars().count(P) && "Not an induction variable"); - - InductionDescriptor II = Legal->getInductionVars().lookup(P); - const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); - - auto *IVR = PhiR->getParent()->getPlan()->getCanonicalIV(); - PHINode *CanonicalIV = cast(State.get(IVR, 0)); - - // FIXME: The newly created binary instructions should contain nsw/nuw flags, - // which can be found from the original scalar operations. - switch (II.getKind()) { - case InductionDescriptor::IK_NoInduction: - llvm_unreachable("Unknown induction"); - case InductionDescriptor::IK_IntInduction: - case InductionDescriptor::IK_FpInduction: - llvm_unreachable("Integer/fp induction is handled elsewhere."); - case InductionDescriptor::IK_PtrInduction: { - // Handle the pointer induction variable case. - assert(P->getType()->isPointerTy() && "Unexpected type."); - - if (Cost->isScalarAfterVectorization(P, State.VF)) { - // This is the normalized GEP that starts counting at zero. - Value *PtrInd = - Builder.CreateSExtOrTrunc(CanonicalIV, II.getStep()->getType()); - // Determine the number of scalars we need to generate for each unroll - // iteration. If the instruction is uniform, we only need to generate the - // first lane. Otherwise, we generate all VF values. - bool IsUniform = vputils::onlyFirstLaneUsed(PhiR); - assert((IsUniform || !State.VF.isScalable()) && - "Cannot scalarize a scalable VF"); - unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue(); - - for (unsigned Part = 0; Part < UF; ++Part) { - Value *PartStart = - createStepForVF(Builder, PtrInd->getType(), VF, Part); - - for (unsigned Lane = 0; Lane < Lanes; ++Lane) { - Value *Idx = Builder.CreateAdd( - PartStart, ConstantInt::get(PtrInd->getType(), Lane)); - Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); - Value *SclrGep = emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), - DL, II, State.CFG.PrevBB); - SclrGep->setName("next.gep"); - State.set(PhiR, SclrGep, VPIteration(Part, Lane)); - } - } - return; - } - assert(isa(II.getStep()) && - "Induction step not a SCEV constant!"); - Type *PhiType = II.getStep()->getType(); - - // Build a pointer phi - Value *ScalarStartValue = PhiR->getStartValue()->getLiveInIRValue(); - Type *ScStValueType = ScalarStartValue->getType(); - PHINode *NewPointerPhi = - PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV); - NewPointerPhi->addIncoming(ScalarStartValue, LoopVectorPreHeader); - - // A pointer induction, performed by using a gep - BasicBlock *LoopLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch(); - Instruction *InductionLoc = LoopLatch->getTerminator(); - const SCEV *ScalarStep = II.getStep(); - SCEVExpander Exp(*PSE.getSE(), DL, "induction"); - Value *ScalarStepValue = - Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc); - Value *RuntimeVF = getRuntimeVF(Builder, PhiType, VF); - Value *NumUnrolledElems = - Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, State.UF)); - Value *InductionGEP = GetElementPtrInst::Create( - II.getElementType(), NewPointerPhi, - Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind", - InductionLoc); - NewPointerPhi->addIncoming(InductionGEP, LoopLatch); - - // Create UF many actual address geps that use the pointer - // phi as base and a vectorized version of the step value - // () as offset. - for (unsigned Part = 0; Part < State.UF; ++Part) { - Type *VecPhiType = VectorType::get(PhiType, State.VF); - Value *StartOffsetScalar = - Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, Part)); - Value *StartOffset = - Builder.CreateVectorSplat(State.VF, StartOffsetScalar); - // Create a vector of consecutive numbers from zero to VF. - StartOffset = - Builder.CreateAdd(StartOffset, Builder.CreateStepVector(VecPhiType)); - - Value *GEP = Builder.CreateGEP( - II.getElementType(), NewPointerPhi, - Builder.CreateMul( - StartOffset, Builder.CreateVectorSplat(State.VF, ScalarStepValue), - "vector.gep")); - State.set(PhiR, GEP, Part); - } - } - } -} - /// A helper function for checking whether an integer division-related /// instruction may divide by zero (in which case it must be predicated if /// executed conditionally in the scalar code). @@ -4597,7 +4117,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, VPTransformState &State) { assert(!isa(I) && "DbgInfoIntrinsic should have been dropped during VPlan construction"); - setDebugLocFromInst(&I); + State.setDebugLocFromInst(&I); Module *M = I.getParent()->getParent()->getParent(); auto *CI = cast(&I); @@ -4627,13 +4147,13 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, // Some intrinsics have a scalar argument - don't replace it with a // vector. Value *Arg; - if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, I.index())) + if (!UseVectorIntrinsic || + !isVectorIntrinsicWithScalarOpAtArg(ID, I.index())) Arg = State.get(I.value(), Part); - else { + else Arg = State.get(I.value(), VPIteration(0, 0)); - if (hasVectorInstrinsicOverloadedScalarOpd(ID, I.index())) - TysForDecl.push_back(Arg->getType()); - } + if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I.index())) + TysForDecl.push_back(Arg->getType()); Args.push_back(Arg); } @@ -4661,7 +4181,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, V->copyFastMathFlags(CI); State.set(Def, V, Part); - addMetadata(V, &I); + State.addMetadata(V, &I); } } @@ -4672,6 +4192,14 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { assert(VF.isVector() && Scalars.find(VF) == Scalars.end() && "This function should not be visited twice for the same VF"); + // This avoids any chances of creating a REPLICATE recipe during planning + // since that would result in generation of scalarized code during execution, + // which is not supported for scalable vectors. + if (VF.isScalable()) { + Scalars[VF].insert(Uniforms[VF].begin(), Uniforms[VF].end()); + return; + } + SmallSetVector Worklist; // These sets are used to seed the analysis with pointers used by memory @@ -4761,7 +4289,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { } // Insert the forced scalars. - // FIXME: Currently widenPHIInstruction() often creates a dead vector + // FIXME: Currently VPWidenPHIRecipe() often creates a dead vector // induction variable when the PHI user is scalarized. auto ForcedScalar = ForcedScalars.find(VF); if (ForcedScalar != ForcedScalars.end()) @@ -4888,6 +4416,27 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened( if (hasIrregularType(ScalarTy, DL)) return false; + // If the group involves a non-integral pointer, we may not be able to + // losslessly cast all values to a common type. + unsigned InterleaveFactor = Group->getFactor(); + bool ScalarNI = DL.isNonIntegralPointerType(ScalarTy); + for (unsigned i = 0; i < InterleaveFactor; i++) { + Instruction *Member = Group->getMember(i); + if (!Member) + continue; + auto *MemberTy = getLoadStoreType(Member); + bool MemberNI = DL.isNonIntegralPointerType(MemberTy); + // Don't coerce non-integral pointers to integers or vice versa. + if (MemberNI != ScalarNI) { + // TODO: Consider adding special nullptr value case here + return false; + } else if (MemberNI && ScalarNI && + ScalarTy->getPointerAddressSpace() != + MemberTy->getPointerAddressSpace()) { + return false; + } + } + // Check if masking is required. // A Group may need masking for one of two reasons: it resides in a block that // needs predication, or it was decided to use masking to deal with gaps @@ -5170,7 +4719,7 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() { return true; } - if (!PSE.getUnionPredicate().getPredicates().empty()) { + if (!PSE.getPredicate().isAlwaysTrue()) { reportVectorizationFailure("Runtime SCEV check is required with -Os/-Oz", "runtime SCEV checks needed. Enable vectorization of this " "loop with '#pragma clang loop vectorize(enable)' when " @@ -5461,14 +5010,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { } } - // For scalable vectors don't use tail folding for low trip counts or - // optimizing for code size. We only permit this if the user has explicitly - // requested it. - if (ScalarEpilogueStatus != CM_ScalarEpilogueNotNeededUsePredicate && - ScalarEpilogueStatus != CM_ScalarEpilogueNotAllowedUsePredicate && - MaxFactors.ScalableVF.isVector()) - MaxFactors.ScalableVF = ElementCount::getScalable(0); - // If we don't know the precise trip count, or if the trip count that we // found modulo the vectorization factor is not zero, try to fold the tail // by masking. @@ -5511,7 +5052,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget( unsigned ConstTripCount, unsigned SmallestType, unsigned WidestType, - const ElementCount &MaxSafeVF, bool FoldTailByMasking) { + ElementCount MaxSafeVF, bool FoldTailByMasking) { bool ComputeScalableMaxVF = MaxSafeVF.isScalable(); TypeSize WidestRegister = TTI.getRegisterBitWidth( ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector @@ -5556,9 +5097,12 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget( return ElementCount::getFixed(ClampedConstTripCount); } + TargetTransformInfo::RegisterKind RegKind = + ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector + : TargetTransformInfo::RGK_FixedWidthVector; ElementCount MaxVF = MaxVectorElementCount; - if (TTI.shouldMaximizeVectorBandwidth() || - (MaximizeBandwidth && isScalarEpilogueAllowed())) { + if (MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 && + TTI.shouldMaximizeVectorBandwidth(RegKind))) { auto MaxVectorElementCountMaxBW = ElementCount::get( PowerOf2Floor(WidestRegister.getKnownMinSize() / SmallestType), ComputeScalableMaxVF); @@ -5596,10 +5140,27 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget( MaxVF = MinVF; } } + + // Invalidate any widening decisions we might have made, in case the loop + // requires prediction (decided later), but we have already made some + // load/store widening decisions. + invalidateCostModelingDecisions(); } return MaxVF; } +Optional LoopVectorizationCostModel::getVScaleForTuning() const { + if (TheFunction->hasFnAttribute(Attribute::VScaleRange)) { + auto Attr = TheFunction->getFnAttribute(Attribute::VScaleRange); + auto Min = Attr.getVScaleRangeMin(); + auto Max = Attr.getVScaleRangeMax(); + if (Max && Min == Max) + return Max; + } + + return TTI.getVScaleForTuning(); +} + bool LoopVectorizationCostModel::isMoreProfitable( const VectorizationFactor &A, const VectorizationFactor &B) const { InstructionCost CostA = A.Cost; @@ -5624,7 +5185,7 @@ bool LoopVectorizationCostModel::isMoreProfitable( // Improve estimate for the vector width if it is scalable. unsigned EstimatedWidthA = A.Width.getKnownMinValue(); unsigned EstimatedWidthB = B.Width.getKnownMinValue(); - if (Optional VScale = TTI.getVScaleForTuning()) { + if (Optional VScale = getVScaleForTuning()) { if (A.Width.isScalable()) EstimatedWidthA *= VScale.getValue(); if (B.Width.isScalable()) @@ -5651,7 +5212,8 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( assert(VFCandidates.count(ElementCount::getFixed(1)) && "Expected Scalar VF to be a candidate"); - const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost); + const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost, + ExpectedCost); VectorizationFactor ChosenFactor = ScalarCost; bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled; @@ -5669,12 +5231,12 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( continue; VectorizationCostTy C = expectedCost(i, &InvalidCosts); - VectorizationFactor Candidate(i, C.first); + VectorizationFactor Candidate(i, C.first, ScalarCost.ScalarCost); #ifndef NDEBUG unsigned AssumedMinimumVscale = 1; - if (Optional VScale = TTI.getVScaleForTuning()) - AssumedMinimumVscale = VScale.getValue(); + if (Optional VScale = getVScaleForTuning()) + AssumedMinimumVscale = *VScale; unsigned Width = Candidate.Width.isScalable() ? Candidate.Width.getKnownMinValue() * AssumedMinimumVscale @@ -5862,7 +5424,7 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor( LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n";); ElementCount ForcedEC = ElementCount::getFixed(EpilogueVectorizationForceVF); if (LVP.hasPlanWithVF(ForcedEC)) - return {ForcedEC, 0}; + return {ForcedEC, 0, 0}; else { LLVM_DEBUG( dbgs() @@ -5885,8 +5447,20 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor( return Result; } + // If MainLoopVF = vscale x 2, and vscale is expected to be 4, then we know + // the main loop handles 8 lanes per iteration. We could still benefit from + // vectorizing the epilogue loop with VF=4. + ElementCount EstimatedRuntimeVF = MainLoopVF; + if (MainLoopVF.isScalable()) { + EstimatedRuntimeVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue()); + if (Optional VScale = getVScaleForTuning()) + EstimatedRuntimeVF *= *VScale; + } + for (auto &NextVF : ProfitableVFs) - if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) && + if (((!NextVF.Width.isScalable() && MainLoopVF.isScalable() && + ElementCount::isKnownLT(NextVF.Width, EstimatedRuntimeVF)) || + ElementCount::isKnownLT(NextVF.Width, MainLoopVF)) && (Result.Width.isScalar() || isMoreProfitable(NextVF, Result)) && LVP.hasPlanWithVF(NextVF.Width)) Result = NextVF; @@ -6006,6 +5580,18 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, !(InterleaveSmallLoopScalarReduction && HasReductions && VF.isScalar())) return 1; + // If we did not calculate the cost for VF (because the user selected the VF) + // then we calculate the cost of VF here. + if (LoopCost == 0) { + InstructionCost C = expectedCost(VF).first; + assert(C.isValid() && "Expected to have chosen a VF with valid cost"); + LoopCost = *C.getValue(); + + // Loop body is free and there is no need for interleaving. + if (LoopCost == 0) + return 1; + } + RegisterUsage R = calculateRegisterUsage({VF})[0]; // We divide by these constants so assume that we have at least one // instruction that uses at least one register. @@ -6097,16 +5683,6 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, assert(IC > 0 && "Interleave count must be greater than 0."); - // If we did not calculate the cost for VF (because the user selected the VF) - // then we calculate the cost of VF here. - if (LoopCost == 0) { - InstructionCost C = expectedCost(VF).first; - assert(C.isValid() && "Expected to have chosen a VF with valid cost"); - LoopCost = *C.getValue(); - } - - assert(LoopCost && "Non-zero loop cost expected"); - // Interleave if we vectorized this loop and there is a reduction that could // benefit from interleaving. if (VF.isVector() && HasReductions) { @@ -6114,9 +5690,15 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, return IC; } - // Note that if we've already vectorized the loop we will have done the - // runtime check and so interleaving won't require further checks. - bool InterleavingRequiresRuntimePointerCheck = + // For any scalar loop that either requires runtime checks or predication we + // are better off leaving this to the unroller. Note that if we've already + // vectorized the loop we will have done the runtime check and so interleaving + // won't require further checks. + bool ScalarInterleavingRequiresPredication = + (VF.isScalar() && any_of(TheLoop->blocks(), [this](BasicBlock *BB) { + return Legal->blockNeedsPredication(BB); + })); + bool ScalarInterleavingRequiresRuntimePointerCheck = (VF.isScalar() && Legal->getRuntimePointerChecking()->Need); // We want to interleave small loops in order to reduce the loop overhead and @@ -6126,7 +5708,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, << "LV: VF is " << VF << '\n'); const bool AggressivelyInterleaveReductions = TTI.enableAggressiveInterleaving(HasReductions); - if (!InterleavingRequiresRuntimePointerCheck && LoopCost < SmallLoopCost) { + if (!ScalarInterleavingRequiresRuntimePointerCheck && + !ScalarInterleavingRequiresPredication && LoopCost < SmallLoopCost) { // We assume that the cost overhead is 1 and we use the cost model // to estimate the cost of the loop and interleave until the cost of the // loop overhead is about 5% of the cost of the loop. @@ -6289,16 +5872,10 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); - // A lambda that gets the register usage for the given type and VF. - const auto &TTICapture = TTI; - auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned { + auto GetRegUsage = [&TTI = TTI](Type *Ty, ElementCount VF) -> unsigned { if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty)) return 0; - InstructionCost::CostType RegUsage = - *TTICapture.getRegUsageForType(VectorType::get(Ty, VF)).getValue(); - assert(RegUsage >= 0 && RegUsage <= std::numeric_limits::max() && - "Nonsensical values for register usage."); - return RegUsage; + return TTI.getRegUsageForType(VectorType::get(Ty, VF)); }; for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) { @@ -7049,10 +6626,17 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, bool TypeNotScalarized = false; if (VF.isVector() && VectorTy->isVectorTy()) { - unsigned NumParts = TTI.getNumberOfParts(VectorTy); - if (NumParts) - TypeNotScalarized = NumParts < VF.getKnownMinValue(); - else + if (unsigned NumParts = TTI.getNumberOfParts(VectorTy)) { + if (VF.isScalable()) + // is assumed to be profitable over iN because + // scalable registers are a distinct register class from scalar ones. + // If we ever find a target which wants to lower scalable vectors + // back to scalars, we'll need to update this code to explicitly + // ask TTI about the register class uses for each part. + TypeNotScalarized = NumParts <= VF.getKnownMinValue(); + else + TypeNotScalarized = NumParts < VF.getKnownMinValue(); + } else C = InstructionCost::getInvalid(); } return VectorizationCostTy(C, TypeNotScalarized); @@ -7128,8 +6712,6 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { Cost = getGatherScatterCost(&I, VF); setWideningDecision(&I, VF, CM_GatherScatter, Cost); } else { - assert((isa(&I) || !VF.isScalable()) && - "Cannot yet scalarize uniform stores"); Cost = getUniformMemOpCost(&I, VF); setWideningDecision(&I, VF, CM_Scalarize, Cost); } @@ -7487,8 +7069,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, InstWidening Decision = getWideningDecision(I, Width); assert(Decision != CM_Unknown && "CM decision should be taken at this point"); - if (Decision == CM_Scalarize) + if (Decision == CM_Scalarize) { + if (VF.isScalable() && isa(I)) + // We can't scalarize a scalable vector store (even a uniform one + // currently), return an invalid cost so as to prevent vectorization. + return InstructionCost::getInvalid(); Width = ElementCount::getFixed(1); + } } VectorTy = ToVectorTy(getLoadStoreType(I), Width); return getMemoryInstructionCost(I, VF); @@ -7656,6 +7243,16 @@ void LoopVectorizationCostModel::collectValuesToIgnore() { // Ignore ephemeral values. CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore); + // Find all stores to invariant variables. Since they are going to sink + // outside the loop we do not need calculate cost for them. + for (BasicBlock *BB : TheLoop->blocks()) + for (Instruction &I : *BB) { + StoreInst *SI; + if ((SI = dyn_cast(&I)) && + Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) + ValuesToIgnore.insert(&I); + } + // Ignore type-promoting instructions we identified during reduction // detection. for (auto &Reduction : Legal->getReductionVars()) { @@ -7757,7 +7354,7 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) { if (VPlanBuildStressTest) return VectorizationFactor::Disabled(); - return {VF, 0 /*Cost*/}; + return {VF, 0 /*Cost*/, 0 /* ScalarCost */}; } LLVM_DEBUG( @@ -7766,6 +7363,14 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) { return VectorizationFactor::Disabled(); } +bool LoopVectorizationPlanner::requiresTooManyRuntimeChecks() const { + unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks(); + return (NumRuntimePointerChecks > + VectorizerParams::RuntimeMemoryCheckThreshold && + !Hints.allowReordering()) || + NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; +} + Optional LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { assert(OrigLoop->isInnermost() && "Inner loop expected."); @@ -7800,7 +7405,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { CM.collectInLoopReductions(); buildVPlansWithVPRecipes(UserVF, UserVF); LLVM_DEBUG(printPlans(dbgs())); - return {{UserVF, 0}}; + return {{UserVF, 0, 0}}; } else reportVectorizationInfo("UserVF ignored because of invalid costs.", "InvalidCost", ORE, OrigLoop); @@ -7834,30 +7439,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { return VectorizationFactor::Disabled(); // Select the optimal vectorization factor. - auto SelectedVF = CM.selectVectorizationFactor(VFCandidates); - - // Check if it is profitable to vectorize with runtime checks. - unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks(); - if (SelectedVF.Width.getKnownMinValue() > 1 && NumRuntimePointerChecks) { - bool PragmaThresholdReached = - NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; - bool ThresholdReached = - NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; - if ((ThresholdReached && !Hints.allowReordering()) || - PragmaThresholdReached) { - ORE->emit([&]() { - return OptimizationRemarkAnalysisAliasing( - DEBUG_TYPE, "CantReorderMemOps", OrigLoop->getStartLoc(), - OrigLoop->getHeader()) - << "loop not vectorized: cannot prove it is safe to reorder " - "memory operations"; - }); - LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); - Hints.emitRemarkWithHints(); - return VectorizationFactor::Disabled(); - } - } - return SelectedVF; + return CM.selectVectorizationFactor(VFCandidates); } VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const { @@ -7910,17 +7492,36 @@ static void AddRuntimeUnrollDisableMetaData(Loop *L) { void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan, InnerLoopVectorizer &ILV, - DominatorTree *DT) { + DominatorTree *DT, + bool IsEpilogueVectorization) { LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF << ", UF=" << BestUF << '\n'); // Perform the actual loop transformation. - // 1. Create a new empty loop. Unlink the old loop and connect the new one. + // 1. Set up the skeleton for vectorization, including vector pre-header and + // middle block. The vector loop is created during VPlan execution. VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan}; Value *CanonicalIVStartValue; std::tie(State.CFG.PrevBB, CanonicalIVStartValue) = ILV.createVectorizedLoopSkeleton(); + + // Only use noalias metadata when using memory checks guaranteeing no overlap + // across all iterations. + const LoopAccessInfo *LAI = ILV.Legal->getLAI(); + if (LAI && !LAI->getRuntimePointerChecking()->getChecks().empty() && + !LAI->getRuntimePointerChecking()->getDiffChecks()) { + + // We currently don't use LoopVersioning for the actual loop cloning but we + // still use it to add the noalias metadata. + // TODO: Find a better way to re-use LoopVersioning functionality to add + // metadata. + State.LVer = std::make_unique( + *LAI, LAI->getRuntimePointerChecking()->getChecks(), OrigLoop, LI, DT, + PSE.getSE()); + State.LVer->prepareNoAliasMetadata(); + } + ILV.collectPoisonGeneratingRecipes(State); ILV.printDebugTracesAtStart(); @@ -7936,7 +7537,9 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, // 2. Copy and widen instructions from the old loop into the new loop. BestVPlan.prepareToExecute(ILV.getOrCreateTripCount(nullptr), ILV.getOrCreateVectorTripCount(nullptr), - CanonicalIVStartValue, State); + CanonicalIVStartValue, State, + IsEpilogueVectorization); + BestVPlan.execute(&State); // Keep all loop hints from the original loop on the vector loop (we'll @@ -7947,8 +7550,10 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, LLVMLoopVectorizeFollowupVectorized}); - Loop *L = LI->getLoopFor(State.CFG.PrevBB); - if (VectorizedLoopID.hasValue()) + VPBasicBlock *HeaderVPBB = + BestVPlan.getVectorLoopRegion()->getEntryBasicBlock(); + Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]); + if (VectorizedLoopID) L->setLoopID(VectorizedLoopID.getValue()); else { // Keep all loop hints from the original loop on the vector loop (we'll @@ -7965,7 +7570,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, // 3. Fix the vectorized code: take care of header phi's, live-outs, // predication, updating analyses. - ILV.fixVectorizedLoop(State); + ILV.fixVectorizedLoop(State, BestVPlan); ILV.printDebugTracesAtEnd(); } @@ -8036,22 +7641,31 @@ Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; } std::pair EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() { MDNode *OrigLoopID = OrigLoop->getLoopID(); - Loop *Lp = createVectorLoopSkeleton(""); + + // Workaround! Compute the trip count of the original loop and cache it + // before we start modifying the CFG. This code has a systemic problem + // wherein it tries to run analysis over partially constructed IR; this is + // wrong, and not simply for SCEV. The trip count of the original loop + // simply happens to be prone to hitting this in practice. In theory, we + // can hit the same issue for any SCEV, or ValueTracking query done during + // mutation. See PR49900. + getOrCreateTripCount(OrigLoop->getLoopPreheader()); + createVectorLoopSkeleton(""); // Generate the code to check the minimum iteration count of the vector // epilogue (see below). EPI.EpilogueIterationCountCheck = - emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader, true); + emitIterationCountCheck(LoopScalarPreHeader, true); EPI.EpilogueIterationCountCheck->setName("iter.check"); // Generate the code to check any assumptions that we've made for SCEV // expressions. - EPI.SCEVSafetyCheck = emitSCEVChecks(Lp, LoopScalarPreHeader); + EPI.SCEVSafetyCheck = emitSCEVChecks(LoopScalarPreHeader); // Generate the code that checks at runtime if arrays overlap. We put the // checks into a separate block to make the more common case of few elements // faster. - EPI.MemSafetyCheck = emitMemRuntimeChecks(Lp, LoopScalarPreHeader); + EPI.MemSafetyCheck = emitMemRuntimeChecks(LoopScalarPreHeader); // Generate the iteration count check for the main loop, *after* the check // for the epilogue loop, so that the path-length is shorter for the case @@ -8060,19 +7674,17 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() { // trip count. Note: the branch will get updated later on when we vectorize // the epilogue. EPI.MainLoopIterationCountCheck = - emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader, false); + emitIterationCountCheck(LoopScalarPreHeader, false); // Generate the induction variable. - Value *CountRoundDown = getOrCreateVectorTripCount(Lp); - EPI.VectorTripCount = CountRoundDown; - createHeaderBranch(Lp); + EPI.VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); // Skip induction resume value creation here because they will be created in // the second pass. If we created them here, they wouldn't be used anyway, // because the vplan in the second pass still contains the inductions from the // original loop. - return {completeLoopSkeleton(Lp, OrigLoopID), nullptr}; + return {completeLoopSkeleton(OrigLoopID), nullptr}; } void EpilogueVectorizerMainLoop::printDebugTracesAtStart() { @@ -8092,13 +7704,13 @@ void EpilogueVectorizerMainLoop::printDebugTracesAtEnd() { }); } -BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck( - Loop *L, BasicBlock *Bypass, bool ForEpilogue) { - assert(L && "Expected valid Loop."); +BasicBlock * +EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass, + bool ForEpilogue) { assert(Bypass && "Expected valid bypass basic block."); ElementCount VFactor = ForEpilogue ? EPI.EpilogueVF : VF; unsigned UFactor = ForEpilogue ? EPI.EpilogueUF : UF; - Value *Count = getOrCreateTripCount(L); + Value *Count = getOrCreateTripCount(LoopVectorPreHeader); // Reuse existing vector loop preheader for TC checks. // Note that new preheader block is generated for vector loop. BasicBlock *const TCCheckBlock = LoopVectorPreHeader; @@ -8157,7 +7769,7 @@ BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck( std::pair EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() { MDNode *OrigLoopID = OrigLoop->getLoopID(); - Loop *Lp = createVectorLoopSkeleton("vec.epilog."); + createVectorLoopSkeleton("vec.epilog."); // Now, compare the remaining count and if there aren't enough iterations to // execute the vectorized epilogue skip to the scalar part. @@ -8166,7 +7778,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() { LoopVectorPreHeader = SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, LI, nullptr, "vec.epilog.ph"); - emitMinimumVectorEpilogueIterCountCheck(Lp, LoopScalarPreHeader, + emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader, VecEpilogueIterationCountCheck); // Adjust the control flow taking the state info from the main loop @@ -8238,9 +7850,6 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() { EPResumeVal->addIncoming(ConstantInt::get(IdxTy, 0), EPI.MainLoopIterationCountCheck); - // Generate the induction variable. - createHeaderBranch(Lp); - // Generate induction resume values. These variables save the new starting // indexes for the scalar loop. They are used to test if there are any tail // iterations left once the vector loop has completed. @@ -8248,15 +7857,15 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() { // check, then the resume value for the induction variable comes from // the trip count of the main vector loop, hence passing the AdditionalBypass // argument. - createInductionResumeValues(Lp, {VecEpilogueIterationCountCheck, - EPI.VectorTripCount} /* AdditionalBypass */); + createInductionResumeValues({VecEpilogueIterationCountCheck, + EPI.VectorTripCount} /* AdditionalBypass */); - return {completeLoopSkeleton(Lp, OrigLoopID), EPResumeVal}; + return {completeLoopSkeleton(OrigLoopID), EPResumeVal}; } BasicBlock * EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck( - Loop *L, BasicBlock *Bypass, BasicBlock *Insert) { + BasicBlock *Bypass, BasicBlock *Insert) { assert(EPI.TripCount && "Expected trip count to have been safed in the first pass."); @@ -8397,7 +8006,8 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { // constructing the desired canonical IV in the header block as its first // non-phi instructions. assert(CM.foldTailByMasking() && "must fold the tail"); - VPBasicBlock *HeaderVPBB = Plan->getEntry()->getEntryBasicBlock(); + VPBasicBlock *HeaderVPBB = + Plan->getVectorLoopRegion()->getEntryBasicBlock(); auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi(); auto *IV = new VPWidenCanonicalIVRecipe(Plan->getCanonicalIV()); HeaderVPBB->insert(IV, HeaderVPBB->getFirstNonPhi()); @@ -8439,8 +8049,6 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, "Must be called with either a load or store"); auto willWiden = [&](ElementCount VF) -> bool { - if (VF.isScalar()) - return false; LoopVectorizationCostModel::InstWidening Decision = CM.getWideningDecision(I, VF); assert(Decision != LoopVectorizationCostModel::CM_Unknown && @@ -8477,11 +8085,12 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, Mask, Consecutive, Reverse); } -static VPWidenIntOrFpInductionRecipe * -createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc, - VPValue *Start, const InductionDescriptor &IndDesc, - LoopVectorizationCostModel &CM, Loop &OrigLoop, - VFRange &Range) { +/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also +/// insert a recipe to expand the step for the induction recipe. +static VPWidenIntOrFpInductionRecipe *createWidenInductionRecipes( + PHINode *Phi, Instruction *PhiOrTrunc, VPValue *Start, + const InductionDescriptor &IndDesc, LoopVectorizationCostModel &CM, + VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop, VFRange &Range) { // Returns true if an instruction \p I should be scalarized instead of // vectorized for the chosen vectorization factor. auto ShouldScalarizeInstruction = [&CM](Instruction *I, ElementCount VF) { @@ -8489,18 +8098,6 @@ createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc, CM.isProfitableToScalarize(I, VF); }; - bool NeedsScalarIV = LoopVectorizationPlanner::getDecisionAndClampRange( - [&](ElementCount VF) { - // Returns true if we should generate a scalar version of \p IV. - if (ShouldScalarizeInstruction(PhiOrTrunc, VF)) - return true; - auto isScalarInst = [&](User *U) -> bool { - auto *I = cast(U); - return OrigLoop.contains(I) && ShouldScalarizeInstruction(I, VF); - }; - return any_of(PhiOrTrunc->users(), isScalarInst); - }, - Range); bool NeedsScalarIVOnly = LoopVectorizationPlanner::getDecisionAndClampRange( [&](ElementCount VF) { return ShouldScalarizeInstruction(PhiOrTrunc, VF); @@ -8508,30 +8105,38 @@ createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc, Range); assert(IndDesc.getStartValue() == Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader())); + assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) && + "step must be loop invariant"); + + VPValue *Step = + vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep(), SE); if (auto *TruncI = dyn_cast(PhiOrTrunc)) { - return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, TruncI, - NeedsScalarIV, !NeedsScalarIVOnly); + return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc, TruncI, + !NeedsScalarIVOnly); } assert(isa(PhiOrTrunc) && "must be a phi node here"); - return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, NeedsScalarIV, + return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc, !NeedsScalarIVOnly); } -VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI( - PHINode *Phi, ArrayRef Operands, VFRange &Range) const { +VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI( + PHINode *Phi, ArrayRef Operands, VPlan &Plan, VFRange &Range) { // Check if this is an integer or fp induction. If so, build the recipe that // produces its scalar and vector values. if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) - return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, *OrigLoop, - Range); + return createWidenInductionRecipes(Phi, Phi, Operands[0], *II, CM, Plan, + *PSE.getSE(), *OrigLoop, Range); + // Check if this is pointer induction. If so, build the recipe for it. + if (auto *II = Legal->getPointerInductionDescriptor(Phi)) + return new VPWidenPointerInductionRecipe(Phi, Operands[0], *II, + *PSE.getSE()); return nullptr; } VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate( - TruncInst *I, ArrayRef Operands, VFRange &Range, - VPlan &Plan) const { + TruncInst *I, ArrayRef Operands, VFRange &Range, VPlan &Plan) { // Optimize the special case where the source is a constant integer // induction variable. Notice that we can only optimize the 'trunc' case // because (a) FP conversions lose precision, (b) sext/zext may wrap, and @@ -8552,7 +8157,8 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate( auto *Phi = cast(I->getOperand(0)); const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi); VPValue *Start = Plan.getOrAddVPValue(II.getStartValue()); - return createWidenInductionRecipe(Phi, I, Start, II, CM, *OrigLoop, Range); + return createWidenInductionRecipes(Phi, I, Start, II, CM, Plan, + *PSE.getSE(), *OrigLoop, Range); } return nullptr; } @@ -8569,13 +8175,30 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi, return Operands[0]; } + unsigned NumIncoming = Phi->getNumIncomingValues(); + // For in-loop reductions, we do not need to create an additional select. + VPValue *InLoopVal = nullptr; + for (unsigned In = 0; In < NumIncoming; In++) { + PHINode *PhiOp = + dyn_cast_or_null(Operands[In]->getUnderlyingValue()); + if (PhiOp && CM.isInLoopReduction(PhiOp)) { + assert(!InLoopVal && "Found more than one in-loop reduction!"); + InLoopVal = Operands[In]; + } + } + + assert((!InLoopVal || NumIncoming == 2) && + "Found an in-loop reduction for PHI with unexpected number of " + "incoming values"); + if (InLoopVal) + return Operands[Operands[0] == InLoopVal ? 1 : 0]; + // We know that all PHIs in non-header blocks are converted into selects, so // we don't have to worry about the insertion order and we can just use the // builder. At this point we generate the predication tree. There may be // duplications since this is a simple recursive scan, but future // optimizations will clean it up. SmallVector OperandsWithMask; - unsigned NumIncoming = Phi->getNumIncomingValues(); for (unsigned In = 0; In < NumIncoming; In++) { VPValue *EdgeMask = @@ -8681,6 +8304,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, case Instruction::URem: case Instruction::Xor: case Instruction::ZExt: + case Instruction::Freeze: return true; } return false; @@ -8806,14 +8430,14 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr, Plan->removeVPValueFor(Instr); Plan->addVPValue(Instr, PHIRecipe); } - auto *Exit = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); + auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", PredRecipe); - VPRegionBlock *Region = new VPRegionBlock(Entry, Exit, RegionName, true); + VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true); // Note: first set Entry as region entry and then connect successors starting // from it in order, to propagate the "parent" of each VPBasicBlock. - VPBlockUtils::insertTwoBlocksAfter(Pred, Exit, BlockInMask, Entry); - VPBlockUtils::connectBlocks(Pred, Exit); + VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry); + VPBlockUtils::connectBlocks(Pred, Exiting); return Region; } @@ -8822,52 +8446,37 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, ArrayRef Operands, VFRange &Range, VPlanPtr &Plan) { - // First, check for specific widening recipes that deal with calls, memory - // operations, inductions and Phi nodes. - if (auto *CI = dyn_cast(Instr)) - return toVPRecipeResult(tryToWidenCall(CI, Operands, Range)); - - if (isa(Instr) || isa(Instr)) - return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan)); - + // First, check for specific widening recipes that deal with inductions, Phi + // nodes, calls and memory operations. VPRecipeBase *Recipe; if (auto Phi = dyn_cast(Instr)) { if (Phi->getParent() != OrigLoop->getHeader()) return tryToBlend(Phi, Operands, Plan); - if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range))) + if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range))) return toVPRecipeResult(Recipe); VPHeaderPHIRecipe *PhiRecipe = nullptr; - if (Legal->isReductionVariable(Phi) || Legal->isFirstOrderRecurrence(Phi)) { - VPValue *StartV = Operands[0]; - if (Legal->isReductionVariable(Phi)) { - const RecurrenceDescriptor &RdxDesc = - Legal->getReductionVars().find(Phi)->second; - assert(RdxDesc.getRecurrenceStartValue() == - Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); - PhiRecipe = new VPReductionPHIRecipe(Phi, RdxDesc, *StartV, - CM.isInLoopReduction(Phi), - CM.useOrderedReductions(RdxDesc)); - } else { - PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV); - } - - // Record the incoming value from the backedge, so we can add the incoming - // value from the backedge after all recipes have been created. - recordRecipeOf(cast( - Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()))); - PhisToFix.push_back(PhiRecipe); + assert((Legal->isReductionVariable(Phi) || + Legal->isFirstOrderRecurrence(Phi)) && + "can only widen reductions and first-order recurrences here"); + VPValue *StartV = Operands[0]; + if (Legal->isReductionVariable(Phi)) { + const RecurrenceDescriptor &RdxDesc = + Legal->getReductionVars().find(Phi)->second; + assert(RdxDesc.getRecurrenceStartValue() == + Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); + PhiRecipe = new VPReductionPHIRecipe(Phi, RdxDesc, *StartV, + CM.isInLoopReduction(Phi), + CM.useOrderedReductions(RdxDesc)); } else { - // TODO: record backedge value for remaining pointer induction phis. - assert(Phi->getType()->isPointerTy() && - "only pointer phis should be handled here"); - assert(Legal->getInductionVars().count(Phi) && - "Not an induction variable"); - InductionDescriptor II = Legal->getInductionVars().lookup(Phi); - VPValue *Start = Plan->getOrAddVPValue(II.getStartValue()); - PhiRecipe = new VPWidenPHIRecipe(Phi, Start); + PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV); } + // Record the incoming value from the backedge, so we can add the incoming + // value from the backedge after all recipes have been created. + recordRecipeOf(cast( + Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()))); + PhisToFix.push_back(PhiRecipe); return toVPRecipeResult(PhiRecipe); } @@ -8876,6 +8485,17 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, Range, *Plan))) return toVPRecipeResult(Recipe); + // All widen recipes below deal only with VF > 1. + if (LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { return VF.isScalar(); }, Range)) + return nullptr; + + if (auto *CI = dyn_cast(Instr)) + return toVPRecipeResult(tryToWidenCall(CI, Operands, Range)); + + if (isa(Instr) || isa(Instr)) + return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan)); + if (!shouldWiden(Instr, Range)) return nullptr; @@ -8949,15 +8569,13 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, // CanonicalIVIncrement{NUW} VPInstruction to increment it by VF * UF and a // BranchOnCount VPInstruction to the latch. static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL, - bool HasNUW, bool IsVPlanNative) { + bool HasNUW) { Value *StartIdx = ConstantInt::get(IdxTy, 0); auto *StartV = Plan.getOrAddVPValue(StartIdx); auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL); VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *Header = TopRegion->getEntryBasicBlock(); - if (IsVPlanNative) - Header = cast(Header->getSingleSuccessor()); Header->insert(CanonicalIVPHI, Header->begin()); auto *CanonicalIVIncrement = @@ -8966,11 +8584,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL, {CanonicalIVPHI}, DL); CanonicalIVPHI->addOperand(CanonicalIVIncrement); - VPBasicBlock *EB = TopRegion->getExitBasicBlock(); - if (IsVPlanNative) { - EB = cast(EB->getSinglePredecessor()); - EB->setCondBit(nullptr); - } + VPBasicBlock *EB = TopRegion->getExitingBasicBlock(); EB->appendRecipe(CanonicalIVIncrement); auto *BranchOnCount = @@ -8979,6 +8593,26 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL, EB->appendRecipe(BranchOnCount); } +// Add exit values to \p Plan. VPLiveOuts are added for each LCSSA phi in the +// original exit block. +static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, + VPBasicBlock *MiddleVPBB, Loop *OrigLoop, + VPlan &Plan) { + BasicBlock *ExitBB = OrigLoop->getUniqueExitBlock(); + BasicBlock *ExitingBB = OrigLoop->getExitingBlock(); + // Only handle single-exit loops with unique exit blocks for now. + if (!ExitBB || !ExitBB->getSinglePredecessor() || !ExitingBB) + return; + + // Introduce VPUsers modeling the exit values. + for (PHINode &ExitPhi : ExitBB->phis()) { + Value *IncomingValue = + ExitPhi.getIncomingValueForBlock(ExitingBB); + VPValue *V = Plan.getOrAddVPValue(IncomingValue, true); + Plan.addLiveOut(&ExitPhi, V); + } +} + VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VFRange &Range, SmallPtrSetImpl &DeadInstructions, const MapVector &SinkAfter) { @@ -9007,7 +8641,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( RecipeBuilder.recordRecipeOf(Phi); for (auto &R : ReductionOperations) { RecipeBuilder.recordRecipeOf(R); - // For min/max reducitons, where we have a pair of icmp/select, we also + // For min/max reductions, where we have a pair of icmp/select, we also // need to record the ICmp recipe, so it can be removed later. assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) && "Only min/max recurrences allowed for inloop reductions"); @@ -9039,18 +8673,25 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( // visit each basic block after having visited its predecessor basic blocks. // --------------------------------------------------------------------------- - // Create initial VPlan skeleton, with separate header and latch blocks. - VPBasicBlock *HeaderVPBB = new VPBasicBlock(); + // Create initial VPlan skeleton, starting with a block for the pre-header, + // followed by a region for the vector loop, followed by the middle block. The + // skeleton vector loop region contains a header and latch block. + VPBasicBlock *Preheader = new VPBasicBlock("vector.ph"); + auto Plan = std::make_unique(Preheader); + + VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); - auto Plan = std::make_unique(TopRegion); + VPBlockUtils::insertBlockAfter(TopRegion, Preheader); + VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); + VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); Instruction *DLInst = getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()); addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DLInst ? DLInst->getDebugLoc() : DebugLoc(), - !CM.foldTailByMasking(), false); + !CM.foldTailByMasking()); // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. @@ -9063,11 +8704,12 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. unsigned VPBBsForBB = 0; - VPBB->setName(BB->getName()); + if (VPBB != HeaderVPBB) + VPBB->setName(BB->getName()); Builder.setInsertPoint(VPBB); // Introduce each ingredient into VPlan. - // TODO: Model and preserve debug instrinsics in VPlan. + // TODO: Model and preserve debug intrinsics in VPlan. for (Instruction &I : BB->instructionsWithoutDebug()) { Instruction *Instr = &I; @@ -9085,6 +8727,14 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( auto OpRange = Plan->mapToVPValues(Instr->operands()); Operands = {OpRange.begin(), OpRange.end()}; } + + // Invariant stores inside loop will be deleted and a single store + // with the final reduction value will be added to the exit block + StoreInst *SI; + if ((SI = dyn_cast(&I)) && + Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) + continue; + if (auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe( Instr, Operands, Range, Plan)) { // If Instr can be simplified to an existing VPValue, use it. @@ -9135,14 +8785,18 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPBB = cast(VPBB->getSingleSuccessor()); } + HeaderVPBB->setName("vector.body"); + // Fold the last, empty block into its predecessor. VPBB = VPBlockUtils::tryToMergeBlockIntoPredecessor(VPBB); assert(VPBB && "expected to fold last (empty) block"); // After here, VPBB should not be used. VPBB = nullptr; - assert(isa(Plan->getEntry()) && - !Plan->getEntry()->getEntryBasicBlock()->empty() && + addUsersInExitBlock(HeaderVPBB, MiddleVPBB, OrigLoop, *Plan); + + assert(isa(Plan->getVectorLoopRegion()) && + !Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() && "entry block must be set to a VPRegionBlock having a non-empty entry " "VPBasicBlock"); RecipeBuilder.fixHeaderPhis(); @@ -9222,12 +8876,13 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( Ind->moveBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi()); // Adjust the recipes for any inloop reductions. - adjustRecipesForReductions(cast(TopRegion->getExit()), Plan, + adjustRecipesForReductions(cast(TopRegion->getExiting()), Plan, RecipeBuilder, Range.Start); // Introduce a recipe to combine the incoming and previous values of a // first-order recurrence. - for (VPRecipeBase &R : Plan->getEntry()->getEntryBasicBlock()->phis()) { + for (VPRecipeBase &R : + Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { auto *RecurPhi = dyn_cast(&R); if (!RecurPhi) continue; @@ -9236,7 +8891,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPBasicBlock *InsertBlock = PrevRecipe->getParent(); auto *Region = GetReplicateRegion(PrevRecipe); if (Region) - InsertBlock = cast(Region->getSingleSuccessor()); + InsertBlock = dyn_cast(Region->getSingleSuccessor()); + if (!InsertBlock) { + InsertBlock = new VPBasicBlock(Region->getName() + ".succ"); + VPBlockUtils::insertBlockAfter(InsertBlock, Region); + } if (Region || PrevRecipe->isPhi()) Builder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi()); else @@ -9283,13 +8942,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( } } - // From this point onwards, VPlan-to-VPlan transformations may change the plan - // in ways that accessing values using original IR values is incorrect. - Plan->disableValue2VPValue(); - - VPlanTransforms::sinkScalarOperands(*Plan); - VPlanTransforms::mergeReplicateRegions(*Plan); - std::string PlanName; raw_string_ostream RSO(PlanName); ElementCount VF = Range.Start; @@ -9303,10 +8955,20 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( RSO.flush(); Plan->setName(PlanName); + // From this point onwards, VPlan-to-VPlan transformations may change the plan + // in ways that accessing values using original IR values is incorrect. + Plan->disableValue2VPValue(); + + VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE()); + VPlanTransforms::sinkScalarOperands(*Plan); + VPlanTransforms::mergeReplicateRegions(*Plan); + VPlanTransforms::removeDeadRecipes(*Plan); + VPlanTransforms::removeRedundantExpandSCEVRecipes(*Plan); + // Fold Exit block into its predecessor if possible. // TODO: Fold block earlier once all VPlan transforms properly maintain a // VPBasicBlock as exit. - VPBlockUtils::tryToMergeBlockIntoPredecessor(TopRegion->getExit()); + VPBlockUtils::tryToMergeBlockIntoPredecessor(TopRegion->getExiting()); assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; @@ -9331,23 +8993,20 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { VF *= 2) Plan->addVF(VF); - if (EnableVPlanPredication) { - VPlanPredicator VPP(*Plan); - VPP.predicate(); - - // Avoid running transformation to recipes until masked code generation in - // VPlan-native path is in place. - return Plan; - } - SmallPtrSet DeadInstructions; VPlanTransforms::VPInstructionsToVPRecipes( OrigLoop, Plan, [this](PHINode *P) { return Legal->getIntOrFpInductionDescriptor(P); }, DeadInstructions, *PSE.getSE()); + // Remove the existing terminator of the exiting block of the top-most region. + // A BranchOnCount will be added instead when adding the canonical IV recipes. + auto *Term = + Plan->getVectorLoopRegion()->getExitingBasicBlock()->getTerminator(); + Term->eraseFromParent(); + addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DebugLoc(), - true, true); + true); return Plan; } @@ -9399,7 +9058,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( R->getOperand(FirstOpId) == Chain ? FirstOpId + 1 : FirstOpId; VPValue *VecOp = Plan->getVPValue(R->getOperand(VecOpId)); - auto *CondOp = CM.foldTailByMasking() + auto *CondOp = CM.blockNeedsPredicationForAnyReason(R->getParent()) ? RecipeBuilder.createBlockInMask(R->getParent(), Plan) : nullptr; @@ -9441,7 +9100,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( // dedicated latch block. if (CM.foldTailByMasking()) { Builder.setInsertPoint(LatchVPBB, LatchVPBB->begin()); - for (VPRecipeBase &R : Plan->getEntry()->getEntryBasicBlock()->phis()) { + for (VPRecipeBase &R : + Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { VPReductionPHIRecipe *PhiR = dyn_cast(&R); if (!PhiR || PhiR->isInLoop()) continue; @@ -9493,7 +9153,7 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { void VPWidenSelectRecipe::execute(VPTransformState &State) { auto &I = *cast(getUnderlyingInstr()); - State.ILV->setDebugLocFromInst(&I); + State.setDebugLocFromInst(&I); // The condition can be loop invariant but still defined inside the // loop. This means that we can't just use the original 'cond' value. @@ -9508,7 +9168,7 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) { Value *Op1 = State.get(getOperand(2), Part); Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); State.set(this, Sel, Part); - State.ILV->addMetadata(Sel, &I); + State.addMetadata(Sel, &I); } } @@ -9542,7 +9202,7 @@ void VPWidenRecipe::execute(VPTransformState &State) { case Instruction::Or: case Instruction::Xor: { // Just widen unops and binops. - State.ILV->setDebugLocFromInst(&I); + State.setDebugLocFromInst(&I); for (unsigned Part = 0; Part < State.UF; ++Part) { SmallVector Ops; @@ -9565,17 +9225,28 @@ void VPWidenRecipe::execute(VPTransformState &State) { // Use this vector value for all users of the original instruction. State.set(this, V, Part); - State.ILV->addMetadata(V, &I); + State.addMetadata(V, &I); } break; } + case Instruction::Freeze: { + State.setDebugLocFromInst(&I); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *Op = State.get(getOperand(0), Part); + + Value *Freeze = Builder.CreateFreeze(Op); + State.set(this, Freeze, Part); + } + break; + } case Instruction::ICmp: case Instruction::FCmp: { // Widen compares. Generate vector compares. bool FCmp = (I.getOpcode() == Instruction::FCmp); auto *Cmp = cast(&I); - State.ILV->setDebugLocFromInst(Cmp); + State.setDebugLocFromInst(Cmp); for (unsigned Part = 0; Part < State.UF; ++Part) { Value *A = State.get(getOperand(0), Part); Value *B = State.get(getOperand(1), Part); @@ -9589,7 +9260,7 @@ void VPWidenRecipe::execute(VPTransformState &State) { C = Builder.CreateICmp(Cmp->getPredicate(), A, B); } State.set(this, C, Part); - State.ILV->addMetadata(C, &I); + State.addMetadata(C, &I); } break; @@ -9608,7 +9279,7 @@ void VPWidenRecipe::execute(VPTransformState &State) { case Instruction::FPTrunc: case Instruction::BitCast: { auto *CI = cast(&I); - State.ILV->setDebugLocFromInst(CI); + State.setDebugLocFromInst(CI); /// Vectorize casts. Type *DestTy = (State.VF.isScalar()) @@ -9619,7 +9290,7 @@ void VPWidenRecipe::execute(VPTransformState &State) { Value *A = State.get(getOperand(0), Part); Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); State.set(this, Cast, Part); - State.ILV->addMetadata(Cast, &I); + State.addMetadata(Cast, &I); } break; } @@ -9655,7 +9326,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { for (unsigned Part = 0; Part < State.UF; ++Part) { Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone); State.set(this, EntryPart, Part); - State.ILV->addMetadata(EntryPart, GEP); + State.addMetadata(EntryPart, GEP); } } else { // If the GEP has at least one loop-varying operand, we are sure to @@ -9693,32 +9364,276 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { // Create the new GEP. Note that this GEP may be a scalar if VF == 1, // but it should be a vector, otherwise. - auto *NewGEP = IsInBounds - ? State.Builder.CreateInBoundsGEP( - GEP->getSourceElementType(), Ptr, Indices) - : State.Builder.CreateGEP(GEP->getSourceElementType(), - Ptr, Indices); + auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, + Indices, "", IsInBounds); assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && "NewGEP is not a pointer vector"); State.set(this, NewGEP, Part); - State.ILV->addMetadata(NewGEP, GEP); + State.addMetadata(NewGEP, GEP); } } } void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Int or FP induction being replicated."); - auto *CanonicalIV = State.get(getParent()->getPlan()->getCanonicalIV(), 0); - State.ILV->widenIntOrFpInduction(IV, this, State, CanonicalIV); + + Value *Start = getStartValue()->getLiveInIRValue(); + const InductionDescriptor &ID = getInductionDescriptor(); + TruncInst *Trunc = getTruncInst(); + IRBuilderBase &Builder = State.Builder; + assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); + assert(State.VF.isVector() && "must have vector VF"); + + // The value from the original loop to which we are mapping the new induction + // variable. + Instruction *EntryVal = Trunc ? cast(Trunc) : IV; + + // Fast-math-flags propagate from the original induction instruction. + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + if (ID.getInductionBinOp() && isa(ID.getInductionBinOp())) + Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); + + // Now do the actual transformations, and start with fetching the step value. + Value *Step = State.get(getStepValue(), VPIteration(0, 0)); + + assert((isa(EntryVal) || isa(EntryVal)) && + "Expected either an induction phi-node or a truncate of it!"); + + // Construct the initial value of the vector IV in the vector loop preheader + auto CurrIP = Builder.saveIP(); + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); + Builder.SetInsertPoint(VectorPH->getTerminator()); + if (isa(EntryVal)) { + assert(Start->getType()->isIntegerTy() && + "Truncation requires an integer type"); + auto *TruncType = cast(EntryVal->getType()); + Step = Builder.CreateTrunc(Step, TruncType); + Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); + } + + Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0); + Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); + Value *SteppedStart = getStepVector( + SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder); + + // We create vector phi nodes for both integer and floating-point induction + // variables. Here, we determine the kind of arithmetic we will perform. + Instruction::BinaryOps AddOp; + Instruction::BinaryOps MulOp; + if (Step->getType()->isIntegerTy()) { + AddOp = Instruction::Add; + MulOp = Instruction::Mul; + } else { + AddOp = ID.getInductionOpcode(); + MulOp = Instruction::FMul; + } + + // Multiply the vectorization factor by the step using integer or + // floating-point arithmetic as appropriate. + Type *StepType = Step->getType(); + Value *RuntimeVF; + if (Step->getType()->isFloatingPointTy()) + RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF); + else + RuntimeVF = getRuntimeVF(Builder, StepType, State.VF); + Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF); + + // Create a vector splat to use in the induction update. + // + // FIXME: If the step is non-constant, we create the vector splat with + // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't + // handle a constant vector splat. + Value *SplatVF = isa(Mul) + ? ConstantVector::getSplat(State.VF, cast(Mul)) + : Builder.CreateVectorSplat(State.VF, Mul); + Builder.restoreIP(CurrIP); + + // We may need to add the step a number of times, depending on the unroll + // factor. The last of those goes into the PHI. + PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind", + &*State.CFG.PrevBB->getFirstInsertionPt()); + VecInd->setDebugLoc(EntryVal->getDebugLoc()); + Instruction *LastInduction = VecInd; + for (unsigned Part = 0; Part < State.UF; ++Part) { + State.set(this, LastInduction, Part); + + if (isa(EntryVal)) + State.addMetadata(LastInduction, EntryVal); + + LastInduction = cast( + Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")); + LastInduction->setDebugLoc(EntryVal->getDebugLoc()); + } + + LastInduction->setName("vec.ind.next"); + VecInd->addIncoming(SteppedStart, VectorPH); + // Add induction update using an incorrect block temporarily. The phi node + // will be fixed after VPlan execution. Note that at this point the latch + // block cannot be used, as it does not exist yet. + // TODO: Model increment value in VPlan, by turning the recipe into a + // multi-def and a subclass of VPHeaderPHIRecipe. + VecInd->addIncoming(LastInduction, VectorPH); +} + +void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { + assert(IndDesc.getKind() == InductionDescriptor::IK_PtrInduction && + "Not a pointer induction according to InductionDescriptor!"); + assert(cast(getUnderlyingInstr())->getType()->isPointerTy() && + "Unexpected type."); + + auto *IVR = getParent()->getPlan()->getCanonicalIV(); + PHINode *CanonicalIV = cast(State.get(IVR, 0)); + + if (onlyScalarsGenerated(State.VF)) { + // This is the normalized GEP that starts counting at zero. + Value *PtrInd = State.Builder.CreateSExtOrTrunc( + CanonicalIV, IndDesc.getStep()->getType()); + // Determine the number of scalars we need to generate for each unroll + // iteration. If the instruction is uniform, we only need to generate the + // first lane. Otherwise, we generate all VF values. + bool IsUniform = vputils::onlyFirstLaneUsed(this); + assert((IsUniform || !State.VF.isScalable()) && + "Cannot scalarize a scalable VF"); + unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue(); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *PartStart = + createStepForVF(State.Builder, PtrInd->getType(), State.VF, Part); + + for (unsigned Lane = 0; Lane < Lanes; ++Lane) { + Value *Idx = State.Builder.CreateAdd( + PartStart, ConstantInt::get(PtrInd->getType(), Lane)); + Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx); + + Value *Step = CreateStepValue(IndDesc.getStep(), SE, + State.CFG.PrevBB->getTerminator()); + Value *SclrGep = emitTransformedIndex( + State.Builder, GlobalIdx, IndDesc.getStartValue(), Step, IndDesc); + SclrGep->setName("next.gep"); + State.set(this, SclrGep, VPIteration(Part, Lane)); + } + } + return; + } + + assert(isa(IndDesc.getStep()) && + "Induction step not a SCEV constant!"); + Type *PhiType = IndDesc.getStep()->getType(); + + // Build a pointer phi + Value *ScalarStartValue = getStartValue()->getLiveInIRValue(); + Type *ScStValueType = ScalarStartValue->getType(); + PHINode *NewPointerPhi = + PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV); + + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); + NewPointerPhi->addIncoming(ScalarStartValue, VectorPH); + + // A pointer induction, performed by using a gep + const DataLayout &DL = NewPointerPhi->getModule()->getDataLayout(); + Instruction *InductionLoc = &*State.Builder.GetInsertPoint(); + + const SCEV *ScalarStep = IndDesc.getStep(); + SCEVExpander Exp(SE, DL, "induction"); + Value *ScalarStepValue = Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc); + Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF); + Value *NumUnrolledElems = + State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, State.UF)); + Value *InductionGEP = GetElementPtrInst::Create( + IndDesc.getElementType(), NewPointerPhi, + State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind", + InductionLoc); + // Add induction update using an incorrect block temporarily. The phi node + // will be fixed after VPlan execution. Note that at this point the latch + // block cannot be used, as it does not exist yet. + // TODO: Model increment value in VPlan, by turning the recipe into a + // multi-def and a subclass of VPHeaderPHIRecipe. + NewPointerPhi->addIncoming(InductionGEP, VectorPH); + + // Create UF many actual address geps that use the pointer + // phi as base and a vectorized version of the step value + // () as offset. + for (unsigned Part = 0; Part < State.UF; ++Part) { + Type *VecPhiType = VectorType::get(PhiType, State.VF); + Value *StartOffsetScalar = + State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, Part)); + Value *StartOffset = + State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar); + // Create a vector of consecutive numbers from zero to VF. + StartOffset = State.Builder.CreateAdd( + StartOffset, State.Builder.CreateStepVector(VecPhiType)); + + Value *GEP = State.Builder.CreateGEP( + IndDesc.getElementType(), NewPointerPhi, + State.Builder.CreateMul( + StartOffset, + State.Builder.CreateVectorSplat(State.VF, ScalarStepValue), + "vector.gep")); + State.set(this, GEP, Part); + } } -void VPWidenPHIRecipe::execute(VPTransformState &State) { - State.ILV->widenPHIInstruction(cast(getUnderlyingValue()), this, - State); +void VPScalarIVStepsRecipe::execute(VPTransformState &State) { + assert(!State.Instance && "VPScalarIVStepsRecipe being replicated."); + + // Fast-math-flags propagate from the original induction instruction. + IRBuilder<>::FastMathFlagGuard FMFG(State.Builder); + if (IndDesc.getInductionBinOp() && + isa(IndDesc.getInductionBinOp())) + State.Builder.setFastMathFlags( + IndDesc.getInductionBinOp()->getFastMathFlags()); + + Value *Step = State.get(getStepValue(), VPIteration(0, 0)); + auto CreateScalarIV = [&](Value *&Step) -> Value * { + Value *ScalarIV = State.get(getCanonicalIV(), VPIteration(0, 0)); + auto *CanonicalIV = State.get(getParent()->getPlan()->getCanonicalIV(), 0); + if (!isCanonical() || CanonicalIV->getType() != Ty) { + ScalarIV = + Ty->isIntegerTy() + ? State.Builder.CreateSExtOrTrunc(ScalarIV, Ty) + : State.Builder.CreateCast(Instruction::SIToFP, ScalarIV, Ty); + ScalarIV = emitTransformedIndex(State.Builder, ScalarIV, + getStartValue()->getLiveInIRValue(), Step, + IndDesc); + ScalarIV->setName("offset.idx"); + } + if (TruncToTy) { + assert(Step->getType()->isIntegerTy() && + "Truncation requires an integer step"); + ScalarIV = State.Builder.CreateTrunc(ScalarIV, TruncToTy); + Step = State.Builder.CreateTrunc(Step, TruncToTy); + } + return ScalarIV; + }; + + Value *ScalarIV = CreateScalarIV(Step); + if (State.VF.isVector()) { + buildScalarSteps(ScalarIV, Step, IndDesc, this, State); + return; + } + + for (unsigned Part = 0; Part < State.UF; ++Part) { + assert(!State.VF.isScalable() && "scalable vectors not yet supported."); + Value *EntryPart; + if (Step->getType()->isFloatingPointTy()) { + Value *StartIdx = + getRuntimeVFAsFloat(State.Builder, Step->getType(), State.VF * Part); + // Floating-point operations inherit FMF via the builder's flags. + Value *MulOp = State.Builder.CreateFMul(StartIdx, Step); + EntryPart = State.Builder.CreateBinOp(IndDesc.getInductionOpcode(), + ScalarIV, MulOp); + } else { + Value *StartIdx = + getRuntimeVF(State.Builder, Step->getType(), State.VF * Part); + EntryPart = State.Builder.CreateAdd( + ScalarIV, State.Builder.CreateMul(StartIdx, Step), "induction"); + } + State.set(this, EntryPart, Part); + } } void VPBlendRecipe::execute(VPTransformState &State) { - State.ILV->setDebugLocFromInst(Phi, &State.Builder); + State.setDebugLocFromInst(Phi); // We know that all PHIs in non-header blocks are converted into // selects, so we don't have to worry about the insertion order and we // can just use the builder. @@ -9979,7 +9894,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { // Handle Stores: if (SI) { - State.ILV->setDebugLocFromInst(SI); + State.setDebugLocFromInst(SI); for (unsigned Part = 0; Part < State.UF; ++Part) { Instruction *NewSI = nullptr; @@ -10005,14 +9920,14 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { else NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment); } - State.ILV->addMetadata(NewSI, SI); + State.addMetadata(NewSI, SI); } return; } // Handle loads. assert(LI && "Must have a load instruction"); - State.ILV->setDebugLocFromInst(LI); + State.setDebugLocFromInst(LI); for (unsigned Part = 0; Part < State.UF; ++Part) { Value *NewLI; if (CreateGatherScatter) { @@ -10020,7 +9935,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { Value *VectorGep = State.get(getAddr(), Part); NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart, nullptr, "wide.masked.gather"); - State.ILV->addMetadata(NewLI, LI); + State.addMetadata(NewLI, LI); } else { auto *VecPtr = CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0))); @@ -10033,12 +9948,12 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load"); // Add metadata to the load, but setVectorValue to the reverse shuffle. - State.ILV->addMetadata(NewLI, LI); + State.addMetadata(NewLI, LI); if (Reverse) NewLI = Builder.CreateVectorReverse(NewLI, "reverse"); } - State.set(this, NewLI, Part); + State.set(getVPSingleValue(), NewLI, Part); } } @@ -10119,7 +10034,8 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part) { // Check if there is a scalar value for the selected lane. if (!hasScalarValue(Def, {Part, LastLane})) { // At the moment, VPWidenIntOrFpInductionRecipes can also be uniform. - assert(isa(Def->getDef()) && + assert((isa(Def->getDef()) || + isa(Def->getDef())) && "unexpected recipe found to be invariant"); IsUniform = true; LastLane = 0; @@ -10201,8 +10117,7 @@ static bool processLoopInVPlanNativePath( // If we are stress testing VPlan builds, do not attempt to generate vector // code. Masked vector code generation support will follow soon. // Also, do not attempt to vectorize if no vector code will be produced. - if (VPlanBuildStressTest || EnableVPlanPredication || - VectorizationFactor::Disabled() == VF) + if (VPlanBuildStressTest || VectorizationFactor::Disabled() == VF) return false; VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); @@ -10214,7 +10129,7 @@ static bool processLoopInVPlanNativePath( &CM, BFI, PSI, Checks); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); - LVP.executePlan(VF.Width, 1, BestPlan, LB, DT); + LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false); } // Mark the loop as already vectorized to avoid vectorizing again. @@ -10282,8 +10197,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { const std::string DebugLocStr = getDebugLocString(L); #endif /* NDEBUG */ - LLVM_DEBUG(dbgs() << "\nLV: Checking a loop in \"" - << L->getHeader()->getParent()->getName() << "\" from " + LLVM_DEBUG(dbgs() << "\nLV: Checking a loop in '" + << L->getHeader()->getParent()->getName() << "' from " << DebugLocStr << "\n"); LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI); @@ -10438,10 +10353,30 @@ bool LoopVectorizePass::processLoop(Loop *L) { VectorizationFactor VF = VectorizationFactor::Disabled(); unsigned IC = 1; + GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, + F->getParent()->getDataLayout()); if (MaybeVF) { + if (LVP.requiresTooManyRuntimeChecks()) { + ORE->emit([&]() { + return OptimizationRemarkAnalysisAliasing( + DEBUG_TYPE, "CantReorderMemOps", L->getStartLoc(), + L->getHeader()) + << "loop not vectorized: cannot prove it is safe to reorder " + "memory operations"; + }); + LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); + Hints.emitRemarkWithHints(); + return false; + } VF = *MaybeVF; // Select the interleave count. IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue()); + + unsigned SelectedIC = std::max(IC, UserIC); + // Optimistically generate runtime checks if they are needed. Drop them if + // they turn out to not be profitable. + if (VF.Width.isVector() || SelectedIC > 1) + Checks.Create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC); } // Identify the diagnostic messages that should be produced. @@ -10529,14 +10464,6 @@ bool LoopVectorizePass::processLoop(Loop *L) { bool DisableRuntimeUnroll = false; MDNode *OrigLoopID = L->getLoopID(); { - // Optimistically generate runtime checks. Drop them if they turn out to not - // be profitable. Limit the scope of Checks, so the cleanup happens - // immediately after vector codegeneration is done. - GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, - F->getParent()->getDataLayout()); - if (!VF.Width.isScalar() || IC > 1) - Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate()); - using namespace ore; if (!VectorizeLoop) { assert(IC > 1 && "interleave count should not be 1 or 0"); @@ -10546,7 +10473,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { &CM, BFI, PSI, Checks); VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); - LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT); + LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false); ORE->emit([&]() { return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(), @@ -10571,12 +10498,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { VPlan &BestMainPlan = LVP.getBestPlanFor(EPI.MainLoopVF); LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, BestMainPlan, MainILV, - DT); + DT, true); ++LoopsVectorized; - simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */); - formLCSSARecursively(*L, *DT, LI, SE); - // Second pass vectorizes the epilogue and adjusts the control flow // edges from the first pass. EPI.MainLoopVF = EPI.EpilogueVF; @@ -10586,23 +10510,24 @@ bool LoopVectorizePass::processLoop(Loop *L) { Checks); VPlan &BestEpiPlan = LVP.getBestPlanFor(EPI.EpilogueVF); + VPRegionBlock *VectorLoop = BestEpiPlan.getVectorLoopRegion(); + VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); + Header->setName("vec.epilog.vector.body"); // Ensure that the start values for any VPReductionPHIRecipes are // updated before vectorising the epilogue loop. - VPBasicBlock *Header = BestEpiPlan.getEntry()->getEntryBasicBlock(); for (VPRecipeBase &R : Header->phis()) { if (auto *ReductionPhi = dyn_cast(&R)) { if (auto *Resume = MainILV.getReductionResumeValue( ReductionPhi->getRecurrenceDescriptor())) { - VPValue *StartVal = new VPValue(Resume); - BestEpiPlan.addExternalDef(StartVal); + VPValue *StartVal = BestEpiPlan.getOrAddExternalDef(Resume); ReductionPhi->setOperand(0, StartVal); } } } LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, - DT); + DT, true); ++LoopsEpilogueVectorized; if (!MainILV.areSafetyChecksAdded()) @@ -10612,7 +10537,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { &LVL, &CM, BFI, PSI, Checks); VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); - LVP.executePlan(VF.Width, IC, BestPlan, LB, DT); + LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); ++LoopsVectorized; // Add metadata to disable runtime unrolling a scalar loop when there @@ -10638,7 +10563,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { Optional RemainderLoopID = makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, LLVMLoopVectorizeFollowupEpilogue}); - if (RemainderLoopID.hasValue()) { + if (RemainderLoopID) { L->setLoopID(RemainderLoopID.getValue()); } else { if (DisableRuntimeUnroll) @@ -10720,8 +10645,12 @@ LoopVectorizeResult LoopVectorizePass::runImpl( PreservedAnalyses LoopVectorizePass::run(Function &F, FunctionAnalysisManager &AM) { - auto &SE = AM.getResult(F); auto &LI = AM.getResult(F); + // There are no loops in the function. Return before computing other expensive + // analyses. + if (LI.empty()) + return PreservedAnalyses::all(); + auto &SE = AM.getResult(F); auto &TTI = AM.getResult(F); auto &DT = AM.getResult(F); auto &BFI = AM.getResult(F); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 15b349f53fd9..019a09665a67 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -53,7 +53,6 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -64,7 +63,6 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" -#include "llvm/IR/NoFolder.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" @@ -72,8 +70,9 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#ifdef EXPENSIVE_CHECKS #include "llvm/IR/Verifier.h" -#include "llvm/InitializePasses.h" +#endif #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -87,6 +86,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/InjectTLIMappings.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Vectorize.h" #include @@ -164,13 +164,14 @@ static cl::opt LookAheadMaxDepth( "slp-max-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for operand reordering scores")); -// The Look-ahead heuristic goes through the users of the bundle to calculate -// the users cost in getExternalUsesCost(). To avoid compilation time increase -// we limit the number of users visited to this value. -static cl::opt LookAheadUsersBudget( - "slp-look-ahead-users-budget", cl::init(2), cl::Hidden, - cl::desc("The maximum number of users to visit while visiting the " - "predecessors. This prevents compilation time increase.")); +// The maximum depth that the look-ahead score heuristic will explore +// when it probing among candidates for vectorization tree roots. +// The higher this value, the higher the compilation time overhead but unlike +// similar limit for operands ordering this is less frequently used, hence +// impact of higher value is less noticeable. +static cl::opt RootLookAheadMaxDepth( + "slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden, + cl::desc("The maximum look-ahead depth for searching best rooting option")); static cl::opt ViewSLPTree("view-slp-tree", cl::Hidden, @@ -571,7 +572,7 @@ static InstructionsState getSameOpcode(ArrayRef VL, areCompatibleCmpOps(AltOp0, AltOp1, Op1, Op0)) continue; } - if (BaseIndex == AltIndex) { + if (BaseIndex == AltIndex && BasePred != CurrentPred) { assert(isValidForAlternation(Opcode) && isValidForAlternation(InstOpcode) && "Cast isn't safe for alternation, logic needs to be updated!"); @@ -640,7 +641,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, CallInst *CI = cast(UserInst); Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) { - if (hasVectorInstrinsicScalarOpd(ID, i)) + if (isVectorIntrinsicWithScalarOpAtArg(ID, i)) return (CI->getArgOperand(i) == Scalar); } LLVM_FALLTHROUGH; @@ -736,29 +737,28 @@ static void inversePermutation(ArrayRef Indices, /// \returns inserting index of InsertElement or InsertValue instruction, /// using Offset as base offset for index. -static Optional getInsertIndex(Value *InsertInst, unsigned Offset) { +static Optional getInsertIndex(const Value *InsertInst, + unsigned Offset = 0) { int Index = Offset; - if (auto *IE = dyn_cast(InsertInst)) { - if (auto *CI = dyn_cast(IE->getOperand(2))) { + if (const auto *IE = dyn_cast(InsertInst)) { + if (const auto *CI = dyn_cast(IE->getOperand(2))) { auto *VT = cast(IE->getType()); if (CI->getValue().uge(VT->getNumElements())) - return UndefMaskElem; + return None; Index *= VT->getNumElements(); Index += CI->getZExtValue(); return Index; } - if (isa(IE->getOperand(2))) - return UndefMaskElem; return None; } - auto *IV = cast(InsertInst); + const auto *IV = cast(InsertInst); Type *CurrentType = IV->getType(); for (unsigned I : IV->indices()) { - if (auto *ST = dyn_cast(CurrentType)) { + if (const auto *ST = dyn_cast(CurrentType)) { Index *= ST->getNumElements(); CurrentType = ST->getElementType(I); - } else if (auto *AT = dyn_cast(CurrentType)) { + } else if (const auto *AT = dyn_cast(CurrentType)) { Index *= AT->getNumElements(); CurrentType = AT->getElementType(); } else { @@ -769,11 +769,7 @@ static Optional getInsertIndex(Value *InsertInst, unsigned Offset) { return Index; } -/// Reorders the list of scalars in accordance with the given \p Order and then -/// the \p Mask. \p Order - is the original order of the scalars, need to -/// reorder scalars into an unordered state at first according to the given -/// order. Then the ordered scalars are shuffled once again in accordance with -/// the provided mask. +/// Reorders the list of scalars in accordance with the given \p Mask. static void reorderScalars(SmallVectorImpl &Scalars, ArrayRef Mask) { assert(!Mask.empty() && "Expected non-empty mask."); @@ -785,6 +781,58 @@ static void reorderScalars(SmallVectorImpl &Scalars, Scalars[Mask[I]] = Prev[I]; } +/// Checks if the provided value does not require scheduling. It does not +/// require scheduling if this is not an instruction or it is an instruction +/// that does not read/write memory and all operands are either not instructions +/// or phi nodes or instructions from different blocks. +static bool areAllOperandsNonInsts(Value *V) { + auto *I = dyn_cast(V); + if (!I) + return true; + return !mayHaveNonDefUseDependency(*I) && + all_of(I->operands(), [I](Value *V) { + auto *IO = dyn_cast(V); + if (!IO) + return true; + return isa(IO) || IO->getParent() != I->getParent(); + }); +} + +/// Checks if the provided value does not require scheduling. It does not +/// require scheduling if this is not an instruction or it is an instruction +/// that does not read/write memory and all users are phi nodes or instructions +/// from the different blocks. +static bool isUsedOutsideBlock(Value *V) { + auto *I = dyn_cast(V); + if (!I) + return true; + // Limits the number of uses to save compile time. + constexpr int UsesLimit = 8; + return !I->mayReadOrWriteMemory() && !I->hasNUsesOrMore(UsesLimit) && + all_of(I->users(), [I](User *U) { + auto *IU = dyn_cast(U); + if (!IU) + return true; + return IU->getParent() != I->getParent() || isa(IU); + }); +} + +/// Checks if the specified value does not require scheduling. It does not +/// require scheduling if all operands and all users do not need to be scheduled +/// in the current basic block. +static bool doesNotNeedToBeScheduled(Value *V) { + return areAllOperandsNonInsts(V) && isUsedOutsideBlock(V); +} + +/// Checks if the specified array of instructions does not require scheduling. +/// It is so if all either instructions have operands that do not require +/// scheduling or their users do not require scheduling since they are phis or +/// in other basic blocks. +static bool doesNotNeedToSchedule(ArrayRef VL) { + return !VL.empty() && + (all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts)); +} + namespace slpvectorizer { /// Bottom Up SLP Vectorizer. @@ -805,8 +853,8 @@ public: TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo *Li, DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB, const DataLayout *DL, OptimizationRemarkEmitter *ORE) - : F(Func), SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), - DB(DB), DL(DL), ORE(ORE), Builder(Se->getContext()) { + : BatchAA(*Aa), F(Func), SE(Se), TTI(Tti), TLI(TLi), LI(Li), + DT(Dt), AC(AC), DB(DB), DL(DL), ORE(ORE), Builder(Se->getContext()) { CodeMetrics::collectEphemeralValues(F, AC, EphValues); // Use the vector register size specified by the target unless overridden // by a command-line option. @@ -847,7 +895,10 @@ public: /// Construct a vectorizable tree that starts at \p Roots, ignoring users for /// the purpose of scheduling and extraction in the \p UserIgnoreLst. void buildTree(ArrayRef Roots, - ArrayRef UserIgnoreLst = None); + const SmallDenseSet &UserIgnoreLst); + + /// Construct a vectorizable tree that starts at \p Roots. + void buildTree(ArrayRef Roots); /// Builds external uses of the vectorized scalars, i.e. the list of /// vectorized scalars to be extracted, their lanes and their scalar users. \p @@ -868,6 +919,7 @@ public: } MinBWs.clear(); InstrElementSize.clear(); + UserIgnoreList = nullptr; } unsigned getTreeSize() const { return VectorizableTree.size(); } @@ -881,6 +933,9 @@ public: /// ExtractElement, ExtractValue), which can be part of the graph. Optional findReusedOrderedScalars(const TreeEntry &TE); + /// Sort loads into increasing pointers offsets to allow greater clustering. + Optional findPartiallyOrderedLoads(const TreeEntry &TE); + /// Gets reordering data for the given tree entry. If the entry is vectorized /// - just return ReorderIndices, otherwise check if the scalars can be /// reordered and return the most optimal order. @@ -995,96 +1050,18 @@ public: #endif }; - /// A helper data structure to hold the operands of a vector of instructions. - /// This supports a fixed vector length for all operand vectors. - class VLOperands { - /// For each operand we need (i) the value, and (ii) the opcode that it - /// would be attached to if the expression was in a left-linearized form. - /// This is required to avoid illegal operand reordering. - /// For example: - /// \verbatim - /// 0 Op1 - /// |/ - /// Op1 Op2 Linearized + Op2 - /// \ / ----------> |/ - /// - - - /// - /// Op1 - Op2 (0 + Op1) - Op2 - /// \endverbatim - /// - /// Value Op1 is attached to a '+' operation, and Op2 to a '-'. - /// - /// Another way to think of this is to track all the operations across the - /// path from the operand all the way to the root of the tree and to - /// calculate the operation that corresponds to this path. For example, the - /// path from Op2 to the root crosses the RHS of the '-', therefore the - /// corresponding operation is a '-' (which matches the one in the - /// linearized tree, as shown above). - /// - /// For lack of a better term, we refer to this operation as Accumulated - /// Path Operation (APO). - struct OperandData { - OperandData() = default; - OperandData(Value *V, bool APO, bool IsUsed) - : V(V), APO(APO), IsUsed(IsUsed) {} - /// The operand value. - Value *V = nullptr; - /// TreeEntries only allow a single opcode, or an alternate sequence of - /// them (e.g, +, -). Therefore, we can safely use a boolean value for the - /// APO. It is set to 'true' if 'V' is attached to an inverse operation - /// in the left-linearized form (e.g., Sub/Div), and 'false' otherwise - /// (e.g., Add/Mul) - bool APO = false; - /// Helper data for the reordering function. - bool IsUsed = false; - }; - - /// During operand reordering, we are trying to select the operand at lane - /// that matches best with the operand at the neighboring lane. Our - /// selection is based on the type of value we are looking for. For example, - /// if the neighboring lane has a load, we need to look for a load that is - /// accessing a consecutive address. These strategies are summarized in the - /// 'ReorderingMode' enumerator. - enum class ReorderingMode { - Load, ///< Matching loads to consecutive memory addresses - Opcode, ///< Matching instructions based on opcode (same or alternate) - Constant, ///< Matching constants - Splat, ///< Matching the same instruction multiple times (broadcast) - Failed, ///< We failed to create a vectorizable group - }; - - using OperandDataVec = SmallVector; - - /// A vector of operand vectors. - SmallVector OpsVec; - + /// A helper class used for scoring candidates for two consecutive lanes. + class LookAheadHeuristics { const DataLayout &DL; ScalarEvolution &SE; const BoUpSLP &R; + int NumLanes; // Total number of lanes (aka vectorization factor). + int MaxLevel; // The maximum recursion depth for accumulating score. - /// \returns the operand data at \p OpIdx and \p Lane. - OperandData &getData(unsigned OpIdx, unsigned Lane) { - return OpsVec[OpIdx][Lane]; - } - - /// \returns the operand data at \p OpIdx and \p Lane. Const version. - const OperandData &getData(unsigned OpIdx, unsigned Lane) const { - return OpsVec[OpIdx][Lane]; - } - - /// Clears the used flag for all entries. - void clearUsed() { - for (unsigned OpIdx = 0, NumOperands = getNumOperands(); - OpIdx != NumOperands; ++OpIdx) - for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes; - ++Lane) - OpsVec[OpIdx][Lane].IsUsed = false; - } - - /// Swap the operand at \p OpIdx1 with that one at \p OpIdx2. - void swap(unsigned OpIdx1, unsigned OpIdx2, unsigned Lane) { - std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]); - } + public: + LookAheadHeuristics(const DataLayout &DL, ScalarEvolution &SE, + const BoUpSLP &R, int NumLanes, int MaxLevel) + : DL(DL), SE(SE), R(R), NumLanes(NumLanes), MaxLevel(MaxLevel) {} // The hard-coded scores listed here are not very important, though it shall // be higher for better matches to improve the resulting cost. When @@ -1099,6 +1076,11 @@ public: /// Loads from consecutive memory addresses, e.g. load(A[i]), load(A[i+1]). static const int ScoreConsecutiveLoads = 4; + /// The same load multiple times. This should have a better score than + /// `ScoreSplat` because it in x86 for a 2-lane vector we can represent it + /// with `movddup (%reg), xmm0` which has a throughput of 0.5 versus 0.5 for + /// a vector load and 1.0 for a broadcast. + static const int ScoreSplatLoads = 3; /// Loads from reversed memory addresses, e.g. load(A[i+1]), load(A[i]). static const int ScoreReversedLoads = 3; /// ExtractElementInst from same vector and consecutive indexes. @@ -1117,43 +1099,67 @@ public: static const int ScoreUndef = 1; /// Score for failing to find a decent match. static const int ScoreFail = 0; - /// User exteranl to the vectorized code. - static const int ExternalUseCost = 1; - /// The user is internal but in a different lane. - static const int UserInDiffLaneCost = ExternalUseCost; + /// Score if all users are vectorized. + static const int ScoreAllUserVectorized = 1; /// \returns the score of placing \p V1 and \p V2 in consecutive lanes. - static int getShallowScore(Value *V1, Value *V2, const DataLayout &DL, - ScalarEvolution &SE, int NumLanes) { - if (V1 == V2) - return VLOperands::ScoreSplat; + /// \p U1 and \p U2 are the users of \p V1 and \p V2. + /// Also, checks if \p V1 and \p V2 are compatible with instructions in \p + /// MainAltOps. + int getShallowScore(Value *V1, Value *V2, Instruction *U1, Instruction *U2, + ArrayRef MainAltOps) const { + if (V1 == V2) { + if (isa(V1)) { + // Retruns true if the users of V1 and V2 won't need to be extracted. + auto AllUsersAreInternal = [U1, U2, this](Value *V1, Value *V2) { + // Bail out if we have too many uses to save compilation time. + static constexpr unsigned Limit = 8; + if (V1->hasNUsesOrMore(Limit) || V2->hasNUsesOrMore(Limit)) + return false; + + auto AllUsersVectorized = [U1, U2, this](Value *V) { + return llvm::all_of(V->users(), [U1, U2, this](Value *U) { + return U == U1 || U == U2 || R.getTreeEntry(U) != nullptr; + }); + }; + return AllUsersVectorized(V1) && AllUsersVectorized(V2); + }; + // A broadcast of a load can be cheaper on some targets. + if (R.TTI->isLegalBroadcastLoad(V1->getType(), + ElementCount::getFixed(NumLanes)) && + ((int)V1->getNumUses() == NumLanes || + AllUsersAreInternal(V1, V2))) + return LookAheadHeuristics::ScoreSplatLoads; + } + return LookAheadHeuristics::ScoreSplat; + } auto *LI1 = dyn_cast(V1); auto *LI2 = dyn_cast(V2); if (LI1 && LI2) { if (LI1->getParent() != LI2->getParent()) - return VLOperands::ScoreFail; + return LookAheadHeuristics::ScoreFail; Optional Dist = getPointersDiff( LI1->getType(), LI1->getPointerOperand(), LI2->getType(), LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true); - if (!Dist) - return VLOperands::ScoreFail; + if (!Dist || *Dist == 0) + return LookAheadHeuristics::ScoreFail; // The distance is too large - still may be profitable to use masked // loads/gathers. if (std::abs(*Dist) > NumLanes / 2) - return VLOperands::ScoreAltOpcodes; + return LookAheadHeuristics::ScoreAltOpcodes; // This still will detect consecutive loads, but we might have "holes" // in some cases. It is ok for non-power-2 vectorization and may produce // better results. It should not affect current vectorization. - return (*Dist > 0) ? VLOperands::ScoreConsecutiveLoads - : VLOperands::ScoreReversedLoads; + return (*Dist > 0) ? LookAheadHeuristics::ScoreConsecutiveLoads + : LookAheadHeuristics::ScoreReversedLoads; } auto *C1 = dyn_cast(V1); auto *C2 = dyn_cast(V2); if (C1 && C2) - return VLOperands::ScoreConstants; + return LookAheadHeuristics::ScoreConstants; // Extracts from consecutive indexes of the same vector better score as // the extracts could be optimized away. @@ -1162,7 +1168,7 @@ public: if (match(V1, m_ExtractElt(m_Value(EV1), m_ConstantInt(Ex1Idx)))) { // Undefs are always profitable for extractelements. if (isa(V2)) - return VLOperands::ScoreConsecutiveExtracts; + return LookAheadHeuristics::ScoreConsecutiveExtracts; Value *EV2 = nullptr; ConstantInt *Ex2Idx = nullptr; if (match(V2, @@ -1170,108 +1176,62 @@ public: m_Undef())))) { // Undefs are always profitable for extractelements. if (!Ex2Idx) - return VLOperands::ScoreConsecutiveExtracts; + return LookAheadHeuristics::ScoreConsecutiveExtracts; if (isUndefVector(EV2) && EV2->getType() == EV1->getType()) - return VLOperands::ScoreConsecutiveExtracts; + return LookAheadHeuristics::ScoreConsecutiveExtracts; if (EV2 == EV1) { int Idx1 = Ex1Idx->getZExtValue(); int Idx2 = Ex2Idx->getZExtValue(); int Dist = Idx2 - Idx1; // The distance is too large - still may be profitable to use // shuffles. + if (std::abs(Dist) == 0) + return LookAheadHeuristics::ScoreSplat; if (std::abs(Dist) > NumLanes / 2) - return VLOperands::ScoreAltOpcodes; - return (Dist > 0) ? VLOperands::ScoreConsecutiveExtracts - : VLOperands::ScoreReversedExtracts; + return LookAheadHeuristics::ScoreSameOpcode; + return (Dist > 0) ? LookAheadHeuristics::ScoreConsecutiveExtracts + : LookAheadHeuristics::ScoreReversedExtracts; } + return LookAheadHeuristics::ScoreAltOpcodes; } + return LookAheadHeuristics::ScoreFail; } auto *I1 = dyn_cast(V1); auto *I2 = dyn_cast(V2); if (I1 && I2) { if (I1->getParent() != I2->getParent()) - return VLOperands::ScoreFail; - InstructionsState S = getSameOpcode({I1, I2}); + return LookAheadHeuristics::ScoreFail; + SmallVector Ops(MainAltOps.begin(), MainAltOps.end()); + Ops.push_back(I1); + Ops.push_back(I2); + InstructionsState S = getSameOpcode(Ops); // Note: Only consider instructions with <= 2 operands to avoid // complexity explosion. - if (S.getOpcode() && S.MainOp->getNumOperands() <= 2) - return S.isAltShuffle() ? VLOperands::ScoreAltOpcodes - : VLOperands::ScoreSameOpcode; + if (S.getOpcode() && + (S.MainOp->getNumOperands() <= 2 || !MainAltOps.empty() || + !S.isAltShuffle()) && + all_of(Ops, [&S](Value *V) { + return cast(V)->getNumOperands() == + S.MainOp->getNumOperands(); + })) + return S.isAltShuffle() ? LookAheadHeuristics::ScoreAltOpcodes + : LookAheadHeuristics::ScoreSameOpcode; } if (isa(V2)) - return VLOperands::ScoreUndef; - - return VLOperands::ScoreFail; - } - - /// Holds the values and their lanes that are taking part in the look-ahead - /// score calculation. This is used in the external uses cost calculation. - /// Need to hold all the lanes in case of splat/broadcast at least to - /// correctly check for the use in the different lane. - SmallDenseMap> InLookAheadValues; - - /// \returns the additional cost due to uses of \p LHS and \p RHS that are - /// either external to the vectorized code, or require shuffling. - int getExternalUsesCost(const std::pair &LHS, - const std::pair &RHS) { - int Cost = 0; - std::array, 2> Values = {{LHS, RHS}}; - for (int Idx = 0, IdxE = Values.size(); Idx != IdxE; ++Idx) { - Value *V = Values[Idx].first; - if (isa(V)) { - // Since this is a function pass, it doesn't make semantic sense to - // walk the users of a subclass of Constant. The users could be in - // another function, or even another module that happens to be in - // the same LLVMContext. - continue; - } + return LookAheadHeuristics::ScoreUndef; - // Calculate the absolute lane, using the minimum relative lane of LHS - // and RHS as base and Idx as the offset. - int Ln = std::min(LHS.second, RHS.second) + Idx; - assert(Ln >= 0 && "Bad lane calculation"); - unsigned UsersBudget = LookAheadUsersBudget; - for (User *U : V->users()) { - if (const TreeEntry *UserTE = R.getTreeEntry(U)) { - // The user is in the VectorizableTree. Check if we need to insert. - int UserLn = UserTE->findLaneForValue(U); - assert(UserLn >= 0 && "Bad lane"); - // If the values are different, check just the line of the current - // value. If the values are the same, need to add UserInDiffLaneCost - // only if UserLn does not match both line numbers. - if ((LHS.first != RHS.first && UserLn != Ln) || - (LHS.first == RHS.first && UserLn != LHS.second && - UserLn != RHS.second)) { - Cost += UserInDiffLaneCost; - break; - } - } else { - // Check if the user is in the look-ahead code. - auto It2 = InLookAheadValues.find(U); - if (It2 != InLookAheadValues.end()) { - // The user is in the look-ahead code. Check the lane. - if (!It2->getSecond().contains(Ln)) { - Cost += UserInDiffLaneCost; - break; - } - } else { - // The user is neither in SLP tree nor in the look-ahead code. - Cost += ExternalUseCost; - break; - } - } - // Limit the number of visited uses to cap compilation time. - if (--UsersBudget == 0) - break; - } - } - return Cost; + return LookAheadHeuristics::ScoreFail; } - /// Go through the operands of \p LHS and \p RHS recursively until \p - /// MaxLevel, and return the cummulative score. For example: + /// Go through the operands of \p LHS and \p RHS recursively until + /// MaxLevel, and return the cummulative score. \p U1 and \p U2 are + /// the users of \p LHS and \p RHS (that is \p LHS and \p RHS are operands + /// of \p U1 and \p U2), except at the beginning of the recursion where + /// these are set to nullptr. + /// + /// For example: /// \verbatim /// A[0] B[0] A[1] B[1] C[0] D[0] B[1] A[1] /// \ / \ / \ / \ / @@ -1282,8 +1242,8 @@ public: /// each level recursively, accumulating the score. It starts from matching /// the additions at level 0, then moves on to the loads (level 1). The /// score of G1 and G2 is higher than G1 and G3, because {A[0],A[1]} and - /// {B[0],B[1]} match with VLOperands::ScoreConsecutiveLoads, while - /// {A[0],C[0]} has a score of VLOperands::ScoreFail. + /// {B[0],B[1]} match with LookAheadHeuristics::ScoreConsecutiveLoads, while + /// {A[0],C[0]} has a score of LookAheadHeuristics::ScoreFail. /// Please note that the order of the operands does not matter, as we /// evaluate the score of all profitable combinations of operands. In /// other words the score of G1 and G4 is the same as G1 and G2. This @@ -1291,18 +1251,13 @@ public: /// Look-ahead SLP: Auto-vectorization in the presence of commutative /// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha, /// Luís F. W. Góes - int getScoreAtLevelRec(const std::pair &LHS, - const std::pair &RHS, int CurrLevel, - int MaxLevel) { + int getScoreAtLevelRec(Value *LHS, Value *RHS, Instruction *U1, + Instruction *U2, int CurrLevel, + ArrayRef MainAltOps) const { - Value *V1 = LHS.first; - Value *V2 = RHS.first; // Get the shallow score of V1 and V2. - int ShallowScoreAtThisLevel = std::max( - (int)ScoreFail, getShallowScore(V1, V2, DL, SE, getNumLanes()) - - getExternalUsesCost(LHS, RHS)); - int Lane1 = LHS.second; - int Lane2 = RHS.second; + int ShallowScoreAtThisLevel = + getShallowScore(LHS, RHS, U1, U2, MainAltOps); // If reached MaxLevel, // or if V1 and V2 are not instructions, @@ -1310,20 +1265,17 @@ public: // or if they are not consecutive, // or if profitable to vectorize loads or extractelements, early return // the current cost. - auto *I1 = dyn_cast(V1); - auto *I2 = dyn_cast(V2); + auto *I1 = dyn_cast(LHS); + auto *I2 = dyn_cast(RHS); if (CurrLevel == MaxLevel || !(I1 && I2) || I1 == I2 || - ShallowScoreAtThisLevel == VLOperands::ScoreFail || + ShallowScoreAtThisLevel == LookAheadHeuristics::ScoreFail || (((isa(I1) && isa(I2)) || + (I1->getNumOperands() > 2 && I2->getNumOperands() > 2) || (isa(I1) && isa(I2))) && ShallowScoreAtThisLevel)) return ShallowScoreAtThisLevel; assert(I1 && I2 && "Should have early exited."); - // Keep track of in-tree values for determining the external-use cost. - InLookAheadValues[V1].insert(Lane1); - InLookAheadValues[V2].insert(Lane2); - // Contains the I2 operand indexes that got matched with I1 operands. SmallSet Op2Used; @@ -1346,11 +1298,12 @@ public: if (Op2Used.count(OpIdx2)) continue; // Recursively calculate the cost at each level - int TmpScore = getScoreAtLevelRec({I1->getOperand(OpIdx1), Lane1}, - {I2->getOperand(OpIdx2), Lane2}, - CurrLevel + 1, MaxLevel); + int TmpScore = + getScoreAtLevelRec(I1->getOperand(OpIdx1), I2->getOperand(OpIdx2), + I1, I2, CurrLevel + 1, None); // Look for the best score. - if (TmpScore > VLOperands::ScoreFail && TmpScore > MaxTmpScore) { + if (TmpScore > LookAheadHeuristics::ScoreFail && + TmpScore > MaxTmpScore) { MaxTmpScore = TmpScore; MaxOpIdx2 = OpIdx2; FoundBest = true; @@ -1364,24 +1317,213 @@ public: } return ShallowScoreAtThisLevel; } + }; + /// A helper data structure to hold the operands of a vector of instructions. + /// This supports a fixed vector length for all operand vectors. + class VLOperands { + /// For each operand we need (i) the value, and (ii) the opcode that it + /// would be attached to if the expression was in a left-linearized form. + /// This is required to avoid illegal operand reordering. + /// For example: + /// \verbatim + /// 0 Op1 + /// |/ + /// Op1 Op2 Linearized + Op2 + /// \ / ----------> |/ + /// - - + /// + /// Op1 - Op2 (0 + Op1) - Op2 + /// \endverbatim + /// + /// Value Op1 is attached to a '+' operation, and Op2 to a '-'. + /// + /// Another way to think of this is to track all the operations across the + /// path from the operand all the way to the root of the tree and to + /// calculate the operation that corresponds to this path. For example, the + /// path from Op2 to the root crosses the RHS of the '-', therefore the + /// corresponding operation is a '-' (which matches the one in the + /// linearized tree, as shown above). + /// + /// For lack of a better term, we refer to this operation as Accumulated + /// Path Operation (APO). + struct OperandData { + OperandData() = default; + OperandData(Value *V, bool APO, bool IsUsed) + : V(V), APO(APO), IsUsed(IsUsed) {} + /// The operand value. + Value *V = nullptr; + /// TreeEntries only allow a single opcode, or an alternate sequence of + /// them (e.g, +, -). Therefore, we can safely use a boolean value for the + /// APO. It is set to 'true' if 'V' is attached to an inverse operation + /// in the left-linearized form (e.g., Sub/Div), and 'false' otherwise + /// (e.g., Add/Mul) + bool APO = false; + /// Helper data for the reordering function. + bool IsUsed = false; + }; + + /// During operand reordering, we are trying to select the operand at lane + /// that matches best with the operand at the neighboring lane. Our + /// selection is based on the type of value we are looking for. For example, + /// if the neighboring lane has a load, we need to look for a load that is + /// accessing a consecutive address. These strategies are summarized in the + /// 'ReorderingMode' enumerator. + enum class ReorderingMode { + Load, ///< Matching loads to consecutive memory addresses + Opcode, ///< Matching instructions based on opcode (same or alternate) + Constant, ///< Matching constants + Splat, ///< Matching the same instruction multiple times (broadcast) + Failed, ///< We failed to create a vectorizable group + }; + + using OperandDataVec = SmallVector; + + /// A vector of operand vectors. + SmallVector OpsVec; + + const DataLayout &DL; + ScalarEvolution &SE; + const BoUpSLP &R; + + /// \returns the operand data at \p OpIdx and \p Lane. + OperandData &getData(unsigned OpIdx, unsigned Lane) { + return OpsVec[OpIdx][Lane]; + } + + /// \returns the operand data at \p OpIdx and \p Lane. Const version. + const OperandData &getData(unsigned OpIdx, unsigned Lane) const { + return OpsVec[OpIdx][Lane]; + } + + /// Clears the used flag for all entries. + void clearUsed() { + for (unsigned OpIdx = 0, NumOperands = getNumOperands(); + OpIdx != NumOperands; ++OpIdx) + for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes; + ++Lane) + OpsVec[OpIdx][Lane].IsUsed = false; + } + + /// Swap the operand at \p OpIdx1 with that one at \p OpIdx2. + void swap(unsigned OpIdx1, unsigned OpIdx2, unsigned Lane) { + std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]); + } + + /// \param Lane lane of the operands under analysis. + /// \param OpIdx operand index in \p Lane lane we're looking the best + /// candidate for. + /// \param Idx operand index of the current candidate value. + /// \returns The additional score due to possible broadcasting of the + /// elements in the lane. It is more profitable to have power-of-2 unique + /// elements in the lane, it will be vectorized with higher probability + /// after removing duplicates. Currently the SLP vectorizer supports only + /// vectorization of the power-of-2 number of unique scalars. + int getSplatScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const { + Value *IdxLaneV = getData(Idx, Lane).V; + if (!isa(IdxLaneV) || IdxLaneV == getData(OpIdx, Lane).V) + return 0; + SmallPtrSet Uniques; + for (unsigned Ln = 0, E = getNumLanes(); Ln < E; ++Ln) { + if (Ln == Lane) + continue; + Value *OpIdxLnV = getData(OpIdx, Ln).V; + if (!isa(OpIdxLnV)) + return 0; + Uniques.insert(OpIdxLnV); + } + int UniquesCount = Uniques.size(); + int UniquesCntWithIdxLaneV = + Uniques.contains(IdxLaneV) ? UniquesCount : UniquesCount + 1; + Value *OpIdxLaneV = getData(OpIdx, Lane).V; + int UniquesCntWithOpIdxLaneV = + Uniques.contains(OpIdxLaneV) ? UniquesCount : UniquesCount + 1; + if (UniquesCntWithIdxLaneV == UniquesCntWithOpIdxLaneV) + return 0; + return (PowerOf2Ceil(UniquesCntWithOpIdxLaneV) - + UniquesCntWithOpIdxLaneV) - + (PowerOf2Ceil(UniquesCntWithIdxLaneV) - UniquesCntWithIdxLaneV); + } + + /// \param Lane lane of the operands under analysis. + /// \param OpIdx operand index in \p Lane lane we're looking the best + /// candidate for. + /// \param Idx operand index of the current candidate value. + /// \returns The additional score for the scalar which users are all + /// vectorized. + int getExternalUseScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const { + Value *IdxLaneV = getData(Idx, Lane).V; + Value *OpIdxLaneV = getData(OpIdx, Lane).V; + // Do not care about number of uses for vector-like instructions + // (extractelement/extractvalue with constant indices), they are extracts + // themselves and already externally used. Vectorization of such + // instructions does not add extra extractelement instruction, just may + // remove it. + if (isVectorLikeInstWithConstOps(IdxLaneV) && + isVectorLikeInstWithConstOps(OpIdxLaneV)) + return LookAheadHeuristics::ScoreAllUserVectorized; + auto *IdxLaneI = dyn_cast(IdxLaneV); + if (!IdxLaneI || !isa(OpIdxLaneV)) + return 0; + return R.areAllUsersVectorized(IdxLaneI, None) + ? LookAheadHeuristics::ScoreAllUserVectorized + : 0; + } + + /// Score scaling factor for fully compatible instructions but with + /// different number of external uses. Allows better selection of the + /// instructions with less external uses. + static const int ScoreScaleFactor = 10; /// \Returns the look-ahead score, which tells us how much the sub-trees /// rooted at \p LHS and \p RHS match, the more they match the higher the /// score. This helps break ties in an informed way when we cannot decide on /// the order of the operands by just considering the immediate /// predecessors. - int getLookAheadScore(const std::pair &LHS, - const std::pair &RHS) { - InLookAheadValues.clear(); - return getScoreAtLevelRec(LHS, RHS, 1, LookAheadMaxDepth); + int getLookAheadScore(Value *LHS, Value *RHS, ArrayRef MainAltOps, + int Lane, unsigned OpIdx, unsigned Idx, + bool &IsUsed) { + LookAheadHeuristics LookAhead(DL, SE, R, getNumLanes(), + LookAheadMaxDepth); + // Keep track of the instruction stack as we recurse into the operands + // during the look-ahead score exploration. + int Score = + LookAhead.getScoreAtLevelRec(LHS, RHS, /*U1=*/nullptr, /*U2=*/nullptr, + /*CurrLevel=*/1, MainAltOps); + if (Score) { + int SplatScore = getSplatScore(Lane, OpIdx, Idx); + if (Score <= -SplatScore) { + // Set the minimum score for splat-like sequence to avoid setting + // failed state. + Score = 1; + } else { + Score += SplatScore; + // Scale score to see the difference between different operands + // and similar operands but all vectorized/not all vectorized + // uses. It does not affect actual selection of the best + // compatible operand in general, just allows to select the + // operand with all vectorized uses. + Score *= ScoreScaleFactor; + Score += getExternalUseScore(Lane, OpIdx, Idx); + IsUsed = true; + } + } + return Score; } + /// Best defined scores per lanes between the passes. Used to choose the + /// best operand (with the highest score) between the passes. + /// The key - {Operand Index, Lane}. + /// The value - the best score between the passes for the lane and the + /// operand. + SmallDenseMap, unsigned, 8> + BestScoresPerLanes; + // Search all operands in Ops[*][Lane] for the one that matches best // Ops[OpIdx][LastLane] and return its opreand index. // If no good match can be found, return None. - Optional - getBestOperand(unsigned OpIdx, int Lane, int LastLane, - ArrayRef ReorderingModes) { + Optional getBestOperand(unsigned OpIdx, int Lane, int LastLane, + ArrayRef ReorderingModes, + ArrayRef MainAltOps) { unsigned NumOperands = getNumOperands(); // The operand of the previous lane at OpIdx. @@ -1389,6 +1531,8 @@ public: // Our strategy mode for OpIdx. ReorderingMode RMode = ReorderingModes[OpIdx]; + if (RMode == ReorderingMode::Failed) + return None; // The linearized opcode of the operand at OpIdx, Lane. bool OpIdxAPO = getData(OpIdx, Lane).APO; @@ -1400,7 +1544,15 @@ public: Optional Idx = None; unsigned Score = 0; } BestOp; - + BestOp.Score = + BestScoresPerLanes.try_emplace(std::make_pair(OpIdx, Lane), 0) + .first->second; + + // Track if the operand must be marked as used. If the operand is set to + // Score 1 explicitly (because of non power-of-2 unique scalars, we may + // want to reestimate the operands again on the following iterations). + bool IsUsed = + RMode == ReorderingMode::Splat || RMode == ReorderingMode::Constant; // Iterate through all unused operands and look for the best. for (unsigned Idx = 0; Idx != NumOperands; ++Idx) { // Get the operand at Idx and Lane. @@ -1426,11 +1578,12 @@ public: bool LeftToRight = Lane > LastLane; Value *OpLeft = (LeftToRight) ? OpLastLane : Op; Value *OpRight = (LeftToRight) ? Op : OpLastLane; - unsigned Score = - getLookAheadScore({OpLeft, LastLane}, {OpRight, Lane}); - if (Score > BestOp.Score) { + int Score = getLookAheadScore(OpLeft, OpRight, MainAltOps, Lane, + OpIdx, Idx, IsUsed); + if (Score > static_cast(BestOp.Score)) { BestOp.Idx = Idx; BestOp.Score = Score; + BestScoresPerLanes[std::make_pair(OpIdx, Lane)] = Score; } break; } @@ -1439,12 +1592,12 @@ public: BestOp.Idx = Idx; break; case ReorderingMode::Failed: - return None; + llvm_unreachable("Not expected Failed reordering mode."); } } if (BestOp.Idx) { - getData(BestOp.Idx.getValue(), Lane).IsUsed = true; + getData(*BestOp.Idx, Lane).IsUsed = IsUsed; return BestOp.Idx; } // If we could not find a good match return None. @@ -1761,6 +1914,10 @@ public: // rest of the lanes. We are visiting the nodes in a circular fashion, // using FirstLane as the center point and increasing the radius // distance. + SmallVector> MainAltOps(NumOperands); + for (unsigned I = 0; I < NumOperands; ++I) + MainAltOps[I].push_back(getData(I, FirstLane).V); + for (unsigned Distance = 1; Distance != NumLanes; ++Distance) { // Visit the lane on the right and then the lane on the left. for (int Direction : {+1, -1}) { @@ -1773,21 +1930,29 @@ public: // Look for a good match for each operand. for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { // Search for the operand that matches SortedOps[OpIdx][Lane-1]. - Optional BestIdx = - getBestOperand(OpIdx, Lane, LastLane, ReorderingModes); + Optional BestIdx = getBestOperand( + OpIdx, Lane, LastLane, ReorderingModes, MainAltOps[OpIdx]); // By not selecting a value, we allow the operands that follow to // select a better matching value. We will get a non-null value in // the next run of getBestOperand(). if (BestIdx) { // Swap the current operand with the one returned by // getBestOperand(). - swap(OpIdx, BestIdx.getValue(), Lane); + swap(OpIdx, *BestIdx, Lane); } else { // We failed to find a best operand, set mode to 'Failed'. ReorderingModes[OpIdx] = ReorderingMode::Failed; // Enable the second pass. StrategyFailed = true; } + // Try to get the alternate opcode and follow it during analysis. + if (MainAltOps[OpIdx].size() != 2) { + OperandData &AltOp = getData(OpIdx, Lane); + InstructionsState OpS = + getSameOpcode({MainAltOps[OpIdx].front(), AltOp.V}); + if (OpS.getOpcode() && OpS.isAltShuffle()) + MainAltOps[OpIdx].push_back(AltOp.V); + } } } } @@ -1851,15 +2016,109 @@ public: #endif }; + /// Evaluate each pair in \p Candidates and return index into \p Candidates + /// for a pair which have highest score deemed to have best chance to form + /// root of profitable tree to vectorize. Return None if no candidate scored + /// above the LookAheadHeuristics::ScoreFail. + /// \param Limit Lower limit of the cost, considered to be good enough score. + Optional + findBestRootPair(ArrayRef> Candidates, + int Limit = LookAheadHeuristics::ScoreFail) { + LookAheadHeuristics LookAhead(*DL, *SE, *this, /*NumLanes=*/2, + RootLookAheadMaxDepth); + int BestScore = Limit; + Optional Index = None; + for (int I : seq(0, Candidates.size())) { + int Score = LookAhead.getScoreAtLevelRec(Candidates[I].first, + Candidates[I].second, + /*U1=*/nullptr, /*U2=*/nullptr, + /*Level=*/1, None); + if (Score > BestScore) { + BestScore = Score; + Index = I; + } + } + return Index; + } + /// Checks if the instruction is marked for deletion. bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); } - /// Marks values operands for later deletion by replacing them with Undefs. - void eraseInstructions(ArrayRef AV); + /// Removes an instruction from its block and eventually deletes it. + /// It's like Instruction::eraseFromParent() except that the actual deletion + /// is delayed until BoUpSLP is destructed. + void eraseInstruction(Instruction *I) { + DeletedInstructions.insert(I); + } + + /// Checks if the instruction was already analyzed for being possible + /// reduction root. + bool isAnalyzedReductionRoot(Instruction *I) const { + return AnalyzedReductionsRoots.count(I); + } + /// Register given instruction as already analyzed for being possible + /// reduction root. + void analyzedReductionRoot(Instruction *I) { + AnalyzedReductionsRoots.insert(I); + } + /// Checks if the provided list of reduced values was checked already for + /// vectorization. + bool areAnalyzedReductionVals(ArrayRef VL) { + return AnalyzedReductionVals.contains(hash_value(VL)); + } + /// Adds the list of reduced values to list of already checked values for the + /// vectorization. + void analyzedReductionVals(ArrayRef VL) { + AnalyzedReductionVals.insert(hash_value(VL)); + } + /// Clear the list of the analyzed reduction root instructions. + void clearReductionData() { + AnalyzedReductionsRoots.clear(); + AnalyzedReductionVals.clear(); + } + /// Checks if the given value is gathered in one of the nodes. + bool isAnyGathered(const SmallDenseSet &Vals) const { + return any_of(MustGather, [&](Value *V) { return Vals.contains(V); }); + } ~BoUpSLP(); private: + /// Check if the operands on the edges \p Edges of the \p UserTE allows + /// reordering (i.e. the operands can be reordered because they have only one + /// user and reordarable). + /// \param ReorderableGathers List of all gather nodes that require reordering + /// (e.g., gather of extractlements or partially vectorizable loads). + /// \param GatherOps List of gather operand nodes for \p UserTE that require + /// reordering, subset of \p NonVectorized. + bool + canReorderOperands(TreeEntry *UserTE, + SmallVectorImpl> &Edges, + ArrayRef ReorderableGathers, + SmallVectorImpl &GatherOps); + + /// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph, + /// if any. If it is not vectorized (gather node), returns nullptr. + TreeEntry *getVectorizedOperand(TreeEntry *UserTE, unsigned OpIdx) { + ArrayRef VL = UserTE->getOperand(OpIdx); + TreeEntry *TE = nullptr; + const auto *It = find_if(VL, [this, &TE](Value *V) { + TE = getTreeEntry(V); + return TE; + }); + if (It != VL.end() && TE->isSame(VL)) + return TE; + return nullptr; + } + + /// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph, + /// if any. If it is not vectorized (gather node), returns nullptr. + const TreeEntry *getVectorizedOperand(const TreeEntry *UserTE, + unsigned OpIdx) const { + return const_cast(this)->getVectorizedOperand( + const_cast(UserTE), OpIdx); + } + /// Checks if all users of \p I are the part of the vectorization tree. bool areAllUsersVectorized(Instruction *I, ArrayRef VectorizedVals) const; @@ -1886,12 +2145,17 @@ private: /// Vectorize a single entry in the tree, starting in \p VL. Value *vectorizeTree(ArrayRef VL); + /// Create a new vector from a list of scalar values. Produces a sequence + /// which exploits values reused across lanes, and arranges the inserts + /// for ease of later optimization. + Value *createBuildVector(ArrayRef VL); + /// \returns the scalarization cost for this type. Scalarization in this /// context means the creation of vectors from a group of scalars. If \p /// NeedToShuffle is true, need to add a cost of reshuffling some of the /// vector elements. InstructionCost getGatherCost(FixedVectorType *Ty, - const DenseSet &ShuffledIndices, + const APInt &ShuffledIndices, bool NeedToShuffle) const; /// Checks if the gathered \p VL can be represented as shuffle(s) of previous @@ -1926,6 +2190,29 @@ private: const DataLayout &DL, ScalarEvolution &SE, const BoUpSLP &R); + + /// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the + /// users of \p TE and collects the stores. It returns the map from the store + /// pointers to the collected stores. + DenseMap> + collectUserStores(const BoUpSLP::TreeEntry *TE) const; + + /// Helper for `findExternalStoreUsersReorderIndices()`. It checks if the + /// stores in \p StoresVec can for a vector instruction. If so it returns true + /// and populates \p ReorderIndices with the shuffle indices of the the stores + /// when compared to the sorted vector. + bool CanFormVector(const SmallVector &StoresVec, + OrdersType &ReorderIndices) const; + + /// Iterates through the users of \p TE, looking for scalar stores that can be + /// potentially vectorized in a future SLP-tree. If found, it keeps track of + /// their order and builds an order index vector for each store bundle. It + /// returns all these order vectors found. + /// We run this after the tree has formed, otherwise we may come across user + /// instructions that are not yet in the tree. + SmallVector + findExternalStoreUsersReorderIndices(TreeEntry *TE) const; + struct TreeEntry { using VecTreeTy = SmallVector, 8>; TreeEntry(VecTreeTy &Container) : Container(Container) {} @@ -2270,15 +2557,21 @@ private: ScalarToTreeEntry[V] = Last; } // Update the scheduler bundle to point to this TreeEntry. - unsigned Lane = 0; - for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember; - BundleMember = BundleMember->NextInBundle) { - BundleMember->TE = Last; - BundleMember->Lane = Lane; - ++Lane; - } - assert((!Bundle.getValue() || Lane == VL.size()) && + ScheduleData *BundleMember = *Bundle; + assert((BundleMember || isa(S.MainOp) || + isVectorLikeInstWithConstOps(S.MainOp) || + doesNotNeedToSchedule(VL)) && "Bundle and VL out of sync"); + if (BundleMember) { + for (Value *V : VL) { + if (doesNotNeedToBeScheduled(V)) + continue; + assert(BundleMember && "Unexpected end of bundle."); + BundleMember->TE = Last; + BundleMember = BundleMember->NextInBundle; + } + } + assert(!BundleMember && "Bundle and VL out of sync"); } else { MustGather.insert(VL.begin(), VL.end()); } @@ -2312,7 +2605,7 @@ private: /// Maps a specific scalar to its tree entry. SmallDenseMap ScalarToTreeEntry; - /// Maps a value to the proposed vectorizable size. + /// Maps a value to the proposed vectorizable size. SmallDenseMap InstrElementSize; /// A list of scalars that we found that we need to keep as scalars. @@ -2343,12 +2636,12 @@ private: // First check if the result is already in the cache. AliasCacheKey key = std::make_pair(Inst1, Inst2); Optional &result = AliasCache[key]; - if (result.hasValue()) { + if (result) { return result.getValue(); } bool aliased = true; if (Loc1.Ptr && isSimple(Inst1)) - aliased = isModOrRefSet(AA->getModRefInfo(Inst2, Loc1)); + aliased = isModOrRefSet(BatchAA.getModRefInfo(Inst2, Loc1)); // Store the result in the cache. result = aliased; return aliased; @@ -2360,20 +2653,23 @@ private: /// TODO: consider moving this to the AliasAnalysis itself. DenseMap> AliasCache; - /// Removes an instruction from its block and eventually deletes it. - /// It's like Instruction::eraseFromParent() except that the actual deletion - /// is delayed until BoUpSLP is destructed. - /// This is required to ensure that there are no incorrect collisions in the - /// AliasCache, which can happen if a new instruction is allocated at the - /// same address as a previously deleted instruction. - void eraseInstruction(Instruction *I, bool ReplaceOpsWithUndef = false) { - auto It = DeletedInstructions.try_emplace(I, ReplaceOpsWithUndef).first; - It->getSecond() = It->getSecond() && ReplaceOpsWithUndef; - } + // Cache for pointerMayBeCaptured calls inside AA. This is preserved + // globally through SLP because we don't perform any action which + // invalidates capture results. + BatchAAResults BatchAA; /// Temporary store for deleted instructions. Instructions will be deleted - /// eventually when the BoUpSLP is destructed. - DenseMap DeletedInstructions; + /// eventually when the BoUpSLP is destructed. The deferral is required to + /// ensure that there are no incorrect collisions in the AliasCache, which + /// can happen if a new instruction is allocated at the same address as a + /// previously deleted instruction. + DenseSet DeletedInstructions; + + /// Set of the instruction, being analyzed already for reductions. + SmallPtrSet AnalyzedReductionsRoots; + + /// Set of hashes for the list of reduction values already being analyzed. + DenseSet AnalyzedReductionVals; /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). External User @@ -2407,14 +2703,39 @@ private: NextLoadStore = nullptr; IsScheduled = false; SchedulingRegionID = BlockSchedulingRegionID; - UnscheduledDepsInBundle = UnscheduledDeps; clearDependencies(); OpValue = OpVal; TE = nullptr; - Lane = -1; + } + + /// Verify basic self consistency properties + void verify() { + if (hasValidDependencies()) { + assert(UnscheduledDeps <= Dependencies && "invariant"); + } else { + assert(UnscheduledDeps == Dependencies && "invariant"); + } + + if (IsScheduled) { + assert(isSchedulingEntity() && + "unexpected scheduled state"); + for (const ScheduleData *BundleMember = this; BundleMember; + BundleMember = BundleMember->NextInBundle) { + assert(BundleMember->hasValidDependencies() && + BundleMember->UnscheduledDeps == 0 && + "unexpected scheduled state"); + assert((BundleMember == this || !BundleMember->IsScheduled) && + "only bundle is marked scheduled"); + } + } + + assert(Inst->getParent() == FirstInBundle->Inst->getParent() && + "all bundle members must be in same basic block"); } /// Returns true if the dependency information has been calculated. + /// Note that depenendency validity can vary between instructions within + /// a single bundle. bool hasValidDependencies() const { return Dependencies != InvalidDeps; } /// Returns true for single instructions and for bundle representatives @@ -2424,7 +2745,7 @@ private: /// Returns true if it represents an instruction bundle and not only a /// single instruction. bool isPartOfBundle() const { - return NextInBundle != nullptr || FirstInBundle != this; + return NextInBundle != nullptr || FirstInBundle != this || TE; } /// Returns true if it is ready for scheduling, i.e. it has no more @@ -2432,20 +2753,23 @@ private: bool isReady() const { assert(isSchedulingEntity() && "can't consider non-scheduling entity for ready list"); - return UnscheduledDepsInBundle == 0 && !IsScheduled; + return unscheduledDepsInBundle() == 0 && !IsScheduled; } - /// Modifies the number of unscheduled dependencies, also updating it for - /// the whole bundle. + /// Modifies the number of unscheduled dependencies for this instruction, + /// and returns the number of remaining dependencies for the containing + /// bundle. int incrementUnscheduledDeps(int Incr) { + assert(hasValidDependencies() && + "increment of unscheduled deps would be meaningless"); UnscheduledDeps += Incr; - return FirstInBundle->UnscheduledDepsInBundle += Incr; + return FirstInBundle->unscheduledDepsInBundle(); } /// Sets the number of unscheduled dependencies to the number of /// dependencies. void resetUnscheduledDeps() { - incrementUnscheduledDeps(Dependencies - UnscheduledDeps); + UnscheduledDeps = Dependencies; } /// Clears all dependency information. @@ -2453,6 +2777,19 @@ private: Dependencies = InvalidDeps; resetUnscheduledDeps(); MemoryDependencies.clear(); + ControlDependencies.clear(); + } + + int unscheduledDepsInBundle() const { + assert(isSchedulingEntity() && "only meaningful on the bundle"); + int Sum = 0; + for (const ScheduleData *BundleMember = this; BundleMember; + BundleMember = BundleMember->NextInBundle) { + if (BundleMember->UnscheduledDeps == InvalidDeps) + return InvalidDeps; + Sum += BundleMember->UnscheduledDeps; + } + return Sum; } void dump(raw_ostream &os) const { @@ -2473,6 +2810,12 @@ private: Instruction *Inst = nullptr; + /// Opcode of the current instruction in the schedule data. + Value *OpValue = nullptr; + + /// The TreeEntry that this instruction corresponds to. + TreeEntry *TE = nullptr; + /// Points to the head in an instruction bundle (and always to this for /// single instructions). ScheduleData *FirstInBundle = nullptr; @@ -2489,6 +2832,12 @@ private: /// This list is derived on demand in calculateDependencies(). SmallVector MemoryDependencies; + /// List of instructions which this instruction could be control dependent + /// on. Allowing such nodes to be scheduled below this one could introduce + /// a runtime fault which didn't exist in the original program. + /// ex: this is a load or udiv following a readonly call which inf loops + SmallVector ControlDependencies; + /// This ScheduleData is in the current scheduling region if this matches /// the current SchedulingRegionID of BlockScheduling. int SchedulingRegionID = 0; @@ -2508,22 +2857,9 @@ private: /// Note that this is negative as long as Dependencies is not calculated. int UnscheduledDeps = InvalidDeps; - /// The sum of UnscheduledDeps in a bundle. Equals to UnscheduledDeps for - /// single instructions. - int UnscheduledDepsInBundle = InvalidDeps; - /// True if this instruction is scheduled (or considered as scheduled in the /// dry-run). bool IsScheduled = false; - - /// Opcode of the current instruction in the schedule data. - Value *OpValue = nullptr; - - /// The TreeEntry that this instruction corresponds to. - TreeEntry *TE = nullptr; - - /// The lane of this node in the TreeEntry. - int Lane = -1; }; #ifndef NDEBUG @@ -2538,6 +2874,21 @@ private: friend struct DOTGraphTraits; /// Contains all scheduling data for a basic block. + /// It does not schedules instructions, which are not memory read/write + /// instructions and their operands are either constants, or arguments, or + /// phis, or instructions from others blocks, or their users are phis or from + /// the other blocks. The resulting vector instructions can be placed at the + /// beginning of the basic block without scheduling (if operands does not need + /// to be scheduled) or at the end of the block (if users are outside of the + /// block). It allows to save some compile time and memory used by the + /// compiler. + /// ScheduleData is assigned for each instruction in between the boundaries of + /// the tree entry, even for those, which are not part of the graph. It is + /// required to correctly follow the dependencies between the instructions and + /// their correct scheduling. The ScheduleData is not allocated for the + /// instructions, which do not require scheduling, like phis, nodes with + /// extractelements/insertelements only or nodes with instructions, with + /// uses/operands outside of the block. struct BlockScheduling { BlockScheduling(BasicBlock *BB) : BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize) {} @@ -2548,6 +2899,7 @@ private: ScheduleEnd = nullptr; FirstLoadStoreInRegion = nullptr; LastLoadStoreInRegion = nullptr; + RegionHasStackSave = false; // Reduce the maximum schedule region size by the size of the // previous scheduling run. @@ -2561,20 +2913,29 @@ private: ++SchedulingRegionID; } - ScheduleData *getScheduleData(Value *V) { - ScheduleData *SD = ScheduleDataMap[V]; - if (SD && SD->SchedulingRegionID == SchedulingRegionID) + ScheduleData *getScheduleData(Instruction *I) { + if (BB != I->getParent()) + // Avoid lookup if can't possibly be in map. + return nullptr; + ScheduleData *SD = ScheduleDataMap.lookup(I); + if (SD && isInSchedulingRegion(SD)) return SD; return nullptr; } + ScheduleData *getScheduleData(Value *V) { + if (auto *I = dyn_cast(V)) + return getScheduleData(I); + return nullptr; + } + ScheduleData *getScheduleData(Value *V, Value *Key) { if (V == Key) return getScheduleData(V); auto I = ExtraScheduleDataMap.find(V); if (I != ExtraScheduleDataMap.end()) { - ScheduleData *SD = I->second[Key]; - if (SD && SD->SchedulingRegionID == SchedulingRegionID) + ScheduleData *SD = I->second.lookup(Key); + if (SD && isInSchedulingRegion(SD)) return SD; } return nullptr; @@ -2595,7 +2956,7 @@ private: BundleMember = BundleMember->NextInBundle) { if (BundleMember->Inst != BundleMember->OpValue) continue; - + // Handle the def-use chain dependencies. // Decrement the unscheduled counter and insert to ready list if ready. @@ -2617,10 +2978,12 @@ private: }; // If BundleMember is a vector bundle, its operands may have been - // reordered duiring buildTree(). We therefore need to get its operands + // reordered during buildTree(). We therefore need to get its operands // through the TreeEntry. if (TreeEntry *TE = BundleMember->TE) { - int Lane = BundleMember->Lane; + // Need to search for the lane since the tree entry can be reordered. + int Lane = std::distance(TE->Scalars.begin(), + find(TE->Scalars, BundleMember->Inst)); assert(Lane >= 0 && "Lane not set"); // Since vectorization tree is being built recursively this assertion @@ -2629,7 +2992,7 @@ private: // where their second (immediate) operand is not added. Since // immediates do not affect scheduler behavior this is considered // okay. - auto *In = TE->getMainOp(); + auto *In = BundleMember->Inst; assert(In && (isa(In) || isa(In) || In->getNumOperands() == TE->getNumOperands()) && @@ -2649,7 +3012,8 @@ private: } // Handle the memory dependencies. for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) { - if (MemoryDepSD->incrementUnscheduledDeps(-1) == 0) { + if (MemoryDepSD->hasValidDependencies() && + MemoryDepSD->incrementUnscheduledDeps(-1) == 0) { // There are no more unscheduled dependencies after decrementing, // so we can put the dependent instruction into the ready list. ScheduleData *DepBundle = MemoryDepSD->FirstInBundle; @@ -2660,6 +3024,48 @@ private: << "SLP: gets ready (mem): " << *DepBundle << "\n"); } } + // Handle the control dependencies. + for (ScheduleData *DepSD : BundleMember->ControlDependencies) { + if (DepSD->incrementUnscheduledDeps(-1) == 0) { + // There are no more unscheduled dependencies after decrementing, + // so we can put the dependent instruction into the ready list. + ScheduleData *DepBundle = DepSD->FirstInBundle; + assert(!DepBundle->IsScheduled && + "already scheduled bundle gets ready"); + ReadyList.insert(DepBundle); + LLVM_DEBUG(dbgs() + << "SLP: gets ready (ctl): " << *DepBundle << "\n"); + } + } + + } + } + + /// Verify basic self consistency properties of the data structure. + void verify() { + if (!ScheduleStart) + return; + + assert(ScheduleStart->getParent() == ScheduleEnd->getParent() && + ScheduleStart->comesBefore(ScheduleEnd) && + "Not a valid scheduling region?"); + + for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) { + auto *SD = getScheduleData(I); + if (!SD) + continue; + assert(isInSchedulingRegion(SD) && + "primary schedule data not in window?"); + assert(isInSchedulingRegion(SD->FirstInBundle) && + "entire bundle in window!"); + (void)SD; + doForAllOpcodes(I, [](ScheduleData *SD) { SD->verify(); }); + } + + for (auto *SD : ReadyInsts) { + assert(SD->isSchedulingEntity() && SD->isReady() && + "item in ready list not ready?"); + (void)SD; } } @@ -2670,7 +3076,7 @@ private: auto I = ExtraScheduleDataMap.find(V); if (I != ExtraScheduleDataMap.end()) for (auto &P : I->second) - if (P.second->SchedulingRegionID == SchedulingRegionID) + if (isInSchedulingRegion(P.second)) Action(P.second); } @@ -2679,10 +3085,11 @@ private: void initialFillReadyList(ReadyListType &ReadyList) { for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) { doForAllOpcodes(I, [&](ScheduleData *SD) { - if (SD->isSchedulingEntity() && SD->isReady()) { + if (SD->isSchedulingEntity() && SD->hasValidDependencies() && + SD->isReady()) { ReadyList.insert(SD); LLVM_DEBUG(dbgs() - << "SLP: initially in ready list: " << *I << "\n"); + << "SLP: initially in ready list: " << *SD << "\n"); } }); } @@ -2740,18 +3147,14 @@ private: /// Attaches ScheduleData to Instruction. /// Note that the mapping survives during all vectorization iterations, i.e. /// ScheduleData structures are recycled. - DenseMap ScheduleDataMap; + DenseMap ScheduleDataMap; /// Attaches ScheduleData to Instruction with the leading key. DenseMap> ExtraScheduleDataMap; - struct ReadyList : SmallVector { - void insert(ScheduleData *SD) { push_back(SD); } - }; - /// The ready-list for scheduling (only used for the dry-run). - ReadyList ReadyInsts; + SetVector ReadyInsts; /// The first instruction of the scheduling region. Instruction *ScheduleStart = nullptr; @@ -2767,6 +3170,11 @@ private: /// (can be null). ScheduleData *LastLoadStoreInRegion = nullptr; + /// Is there an llvm.stacksave or llvm.stackrestore in the scheduling + /// region? Used to optimize the dependence calculation for the + /// common case where there isn't. + bool RegionHasStackSave = false; + /// The current size of the scheduling region. int ScheduleRegionSize = 0; @@ -2775,8 +3183,8 @@ private: /// The ID of the scheduling region. For a new vectorization iteration this /// is incremented which "removes" all ScheduleData from the region. - // Make sure that the initial SchedulingRegionID is greater than the - // initial SchedulingRegionID in ScheduleData (which is 0). + /// Make sure that the initial SchedulingRegionID is greater than the + /// initial SchedulingRegionID in ScheduleData (which is 0). int SchedulingRegionID = 1; }; @@ -2788,7 +3196,7 @@ private: void scheduleBlock(BlockScheduling *BS); /// List of users to ignore during scheduling and that don't need extracting. - ArrayRef UserIgnoreList; + const SmallDenseSet *UserIgnoreList = nullptr; /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of /// sorted SmallVectors of unsigned. @@ -2819,7 +3227,6 @@ private: ScalarEvolution *SE; TargetTransformInfo *TTI; TargetLibraryInfo *TLI; - AAResults *AA; LoopInfo *LI; DominatorTree *DT; AssumptionCache *AC; @@ -2936,20 +3343,25 @@ template <> struct DOTGraphTraits : public DefaultDOTGraphTraits { } // end namespace llvm BoUpSLP::~BoUpSLP() { - for (const auto &Pair : DeletedInstructions) { - // Replace operands of ignored instructions with Undefs in case if they were - // marked for deletion. - if (Pair.getSecond()) { - Value *Undef = UndefValue::get(Pair.getFirst()->getType()); - Pair.getFirst()->replaceAllUsesWith(Undef); - } - Pair.getFirst()->dropAllReferences(); - } - for (const auto &Pair : DeletedInstructions) { - assert(Pair.getFirst()->use_empty() && + SmallVector DeadInsts; + for (auto *I : DeletedInstructions) { + for (Use &U : I->operands()) { + auto *Op = dyn_cast(U.get()); + if (Op && !DeletedInstructions.count(Op) && Op->hasOneUser() && + wouldInstructionBeTriviallyDead(Op, TLI)) + DeadInsts.emplace_back(Op); + } + I->dropAllReferences(); + } + for (auto *I : DeletedInstructions) { + assert(I->use_empty() && "trying to erase instruction with users."); - Pair.getFirst()->eraseFromParent(); + I->eraseFromParent(); } + + // Cleanup any dead scalar code feeding the vectorized instructions + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI); + #ifdef EXPENSIVE_CHECKS // If we could guarantee that this call is not extremely slow, we could // remove the ifdef limitation (see PR47712). @@ -2957,13 +3369,6 @@ BoUpSLP::~BoUpSLP() { #endif } -void BoUpSLP::eraseInstructions(ArrayRef AV) { - for (auto *V : AV) { - if (auto *I = dyn_cast(V)) - eraseInstruction(I, /*ReplaceOpsWithUndef=*/true); - }; -} - /// Reorders the given \p Reuses mask according to the given \p Mask. \p Reuses /// contains original mask for the scalars reused in the node. Procedure /// transform this mask in accordance with the given \p Mask. @@ -3068,6 +3473,189 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) { return None; } +namespace { +/// Tracks the state we can represent the loads in the given sequence. +enum class LoadsState { Gather, Vectorize, ScatterVectorize }; +} // anonymous namespace + +/// Checks if the given array of loads can be represented as a vectorized, +/// scatter or just simple gather. +static LoadsState canVectorizeLoads(ArrayRef VL, const Value *VL0, + const TargetTransformInfo &TTI, + const DataLayout &DL, ScalarEvolution &SE, + LoopInfo &LI, + SmallVectorImpl &Order, + SmallVectorImpl &PointerOps) { + // Check that a vectorized load would load the same memory as a scalar + // load. For example, we don't want to vectorize loads that are smaller + // than 8-bit. Even though we have a packed struct {} LLVM + // treats loading/storing it as an i8 struct. If we vectorize loads/stores + // from such a struct, we read/write packed bits disagreeing with the + // unvectorized version. + Type *ScalarTy = VL0->getType(); + + if (DL.getTypeSizeInBits(ScalarTy) != DL.getTypeAllocSizeInBits(ScalarTy)) + return LoadsState::Gather; + + // Make sure all loads in the bundle are simple - we can't vectorize + // atomic or volatile loads. + PointerOps.clear(); + PointerOps.resize(VL.size()); + auto *POIter = PointerOps.begin(); + for (Value *V : VL) { + auto *L = cast(V); + if (!L->isSimple()) + return LoadsState::Gather; + *POIter = L->getPointerOperand(); + ++POIter; + } + + Order.clear(); + // Check the order of pointer operands or that all pointers are the same. + bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order); + if (IsSorted || all_of(PointerOps, [&PointerOps](Value *P) { + if (getUnderlyingObject(P) != getUnderlyingObject(PointerOps.front())) + return false; + auto *GEP = dyn_cast(P); + if (!GEP) + return false; + auto *GEP0 = cast(PointerOps.front()); + return GEP->getNumOperands() == 2 && + ((isConstant(GEP->getOperand(1)) && + isConstant(GEP0->getOperand(1))) || + getSameOpcode({GEP->getOperand(1), GEP0->getOperand(1)}) + .getOpcode()); + })) { + if (IsSorted) { + Value *Ptr0; + Value *PtrN; + if (Order.empty()) { + Ptr0 = PointerOps.front(); + PtrN = PointerOps.back(); + } else { + Ptr0 = PointerOps[Order.front()]; + PtrN = PointerOps[Order.back()]; + } + Optional Diff = + getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE); + // Check that the sorted loads are consecutive. + if (static_cast(*Diff) == VL.size() - 1) + return LoadsState::Vectorize; + } + // TODO: need to improve analysis of the pointers, if not all of them are + // GEPs or have > 2 operands, we end up with a gather node, which just + // increases the cost. + Loop *L = LI.getLoopFor(cast(VL0)->getParent()); + bool ProfitableGatherPointers = + static_cast(count_if(PointerOps, [L](Value *V) { + return L && L->isLoopInvariant(V); + })) <= VL.size() / 2 && VL.size() > 2; + if (ProfitableGatherPointers || all_of(PointerOps, [IsSorted](Value *P) { + auto *GEP = dyn_cast(P); + return (IsSorted && !GEP && doesNotNeedToBeScheduled(P)) || + (GEP && GEP->getNumOperands() == 2); + })) { + Align CommonAlignment = cast(VL0)->getAlign(); + for (Value *V : VL) + CommonAlignment = + std::min(CommonAlignment, cast(V)->getAlign()); + auto *VecTy = FixedVectorType::get(ScalarTy, VL.size()); + if (TTI.isLegalMaskedGather(VecTy, CommonAlignment) && + !TTI.forceScalarizeMaskedGather(VecTy, CommonAlignment)) + return LoadsState::ScatterVectorize; + } + } + + return LoadsState::Gather; +} + +bool clusterSortPtrAccesses(ArrayRef VL, Type *ElemTy, + const DataLayout &DL, ScalarEvolution &SE, + SmallVectorImpl &SortedIndices) { + assert(llvm::all_of( + VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && + "Expected list of pointer operands."); + // Map from bases to a vector of (Ptr, Offset, OrigIdx), which we insert each + // Ptr into, sort and return the sorted indices with values next to one + // another. + MapVector>> Bases; + Bases[VL[0]].push_back(std::make_tuple(VL[0], 0U, 0U)); + + unsigned Cnt = 1; + for (Value *Ptr : VL.drop_front()) { + bool Found = any_of(Bases, [&](auto &Base) { + Optional Diff = + getPointersDiff(ElemTy, Base.first, ElemTy, Ptr, DL, SE, + /*StrictCheck=*/true); + if (!Diff) + return false; + + Base.second.emplace_back(Ptr, *Diff, Cnt++); + return true; + }); + + if (!Found) { + // If we haven't found enough to usefully cluster, return early. + if (Bases.size() > VL.size() / 2 - 1) + return false; + + // Not found already - add a new Base + Bases[Ptr].emplace_back(Ptr, 0, Cnt++); + } + } + + // For each of the bases sort the pointers by Offset and check if any of the + // base become consecutively allocated. + bool AnyConsecutive = false; + for (auto &Base : Bases) { + auto &Vec = Base.second; + if (Vec.size() > 1) { + llvm::stable_sort(Vec, [](const std::tuple &X, + const std::tuple &Y) { + return std::get<1>(X) < std::get<1>(Y); + }); + int InitialOffset = std::get<1>(Vec[0]); + AnyConsecutive |= all_of(enumerate(Vec), [InitialOffset](auto &P) { + return std::get<1>(P.value()) == int(P.index()) + InitialOffset; + }); + } + } + + // Fill SortedIndices array only if it looks worth-while to sort the ptrs. + SortedIndices.clear(); + if (!AnyConsecutive) + return false; + + for (auto &Base : Bases) { + for (auto &T : Base.second) + SortedIndices.push_back(std::get<2>(T)); + } + + assert(SortedIndices.size() == VL.size() && + "Expected SortedIndices to be the size of VL"); + return true; +} + +Optional +BoUpSLP::findPartiallyOrderedLoads(const BoUpSLP::TreeEntry &TE) { + assert(TE.State == TreeEntry::NeedToGather && "Expected gather node only."); + Type *ScalarTy = TE.Scalars[0]->getType(); + + SmallVector Ptrs; + Ptrs.reserve(TE.Scalars.size()); + for (Value *V : TE.Scalars) { + auto *L = dyn_cast(V); + if (!L || !L->isSimple()) + return None; + Ptrs.push_back(L->getPointerOperand()); + } + + BoUpSLP::OrdersType Order; + if (clusterSortPtrAccesses(Ptrs, ScalarTy, *DL, *SE, Order)) + return Order; + return None; +} + Optional BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // No need to reorder if need to shuffle reuses, still need to shuffle the @@ -3108,6 +3696,9 @@ Optional BoUpSLP::getReorderingData(const TreeEntry &TE, } if (Optional CurrentOrder = findReusedOrderedScalars(TE)) return CurrentOrder; + if (TE.Scalars.size() >= 4) + if (Optional Order = findPartiallyOrderedLoads(TE)) + return Order; } return None; } @@ -3118,13 +3709,55 @@ void BoUpSLP::reorderTopToBottom() { // ExtractElement gather nodes which can be vectorized and need to handle // their ordering. DenseMap GathersToOrders; + + // AltShuffles can also have a preferred ordering that leads to fewer + // instructions, e.g., the addsub instruction in x86. + DenseMap AltShufflesToOrders; + + // Maps a TreeEntry to the reorder indices of external users. + DenseMap> + ExternalUserReorderMap; + // FIXME: Workaround for syntax error reported by MSVC buildbots. + TargetTransformInfo &TTIRef = *TTI; // Find all reorderable nodes with the given VF. // Currently the are vectorized stores,loads,extracts + some gathering of // extracts. - for_each(VectorizableTree, [this, &VFToOrderedEntries, &GathersToOrders]( + for_each(VectorizableTree, [this, &TTIRef, &VFToOrderedEntries, + &GathersToOrders, &ExternalUserReorderMap, + &AltShufflesToOrders]( const std::unique_ptr &TE) { + // Look for external users that will probably be vectorized. + SmallVector ExternalUserReorderIndices = + findExternalStoreUsersReorderIndices(TE.get()); + if (!ExternalUserReorderIndices.empty()) { + VFToOrderedEntries[TE->Scalars.size()].insert(TE.get()); + ExternalUserReorderMap.try_emplace(TE.get(), + std::move(ExternalUserReorderIndices)); + } + + // Patterns like [fadd,fsub] can be combined into a single instruction in + // x86. Reordering them into [fsub,fadd] blocks this pattern. So we need + // to take into account their order when looking for the most used order. + if (TE->isAltShuffle()) { + VectorType *VecTy = + FixedVectorType::get(TE->Scalars[0]->getType(), TE->Scalars.size()); + unsigned Opcode0 = TE->getOpcode(); + unsigned Opcode1 = TE->getAltOpcode(); + // The opcode mask selects between the two opcodes. + SmallBitVector OpcodeMask(TE->Scalars.size(), 0); + for (unsigned Lane : seq(0, TE->Scalars.size())) + if (cast(TE->Scalars[Lane])->getOpcode() == Opcode1) + OpcodeMask.set(Lane); + // If this pattern is supported by the target then we consider the order. + if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) { + VFToOrderedEntries[TE->Scalars.size()].insert(TE.get()); + AltShufflesToOrders.try_emplace(TE.get(), OrdersType()); + } + // TODO: Check the reverse order too. + } + if (Optional CurrentOrder = - getReorderingData(*TE.get(), /*TopToBottom=*/true)) { + getReorderingData(*TE, /*TopToBottom=*/true)) { // Do not include ordering for nodes used in the alt opcode vectorization, // better to reorder them during bottom-to-top stage. If follow the order // here, it causes reordering of the whole graph though actually it is @@ -3142,10 +3775,7 @@ void BoUpSLP::reorderTopToBottom() { EI.UserTE->isAltShuffle() && EI.UserTE->Idx != 0; })) return; - if (UserTE->UserTreeIndices.empty()) - UserTE = nullptr; - else - UserTE = UserTE->UserTreeIndices.back().UserTE; + UserTE = UserTE->UserTreeIndices.back().UserTE; ++Cnt; } VFToOrderedEntries[TE->Scalars.size()].insert(TE.get()); @@ -3176,11 +3806,30 @@ void BoUpSLP::reorderTopToBottom() { if (!OpTE->ReuseShuffleIndices.empty()) continue; // Count number of orders uses. - const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & { - if (OpTE->State == TreeEntry::NeedToGather) - return GathersToOrders.find(OpTE)->second; + const auto &Order = [OpTE, &GathersToOrders, + &AltShufflesToOrders]() -> const OrdersType & { + if (OpTE->State == TreeEntry::NeedToGather) { + auto It = GathersToOrders.find(OpTE); + if (It != GathersToOrders.end()) + return It->second; + } + if (OpTE->isAltShuffle()) { + auto It = AltShufflesToOrders.find(OpTE); + if (It != AltShufflesToOrders.end()) + return It->second; + } return OpTE->ReorderIndices; }(); + // First consider the order of the external scalar users. + auto It = ExternalUserReorderMap.find(OpTE); + if (It != ExternalUserReorderMap.end()) { + const auto &ExternalUserReorderIndices = It->second; + for (const OrdersType &ExtOrder : ExternalUserReorderIndices) + ++OrdersUses.insert(std::make_pair(ExtOrder, 0)).first->second; + // No other useful reorder data in this entry. + if (Order.empty()) + continue; + } // Stores actually store the mask, not the order, need to invert. if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() && OpTE->getOpcode() == Instruction::Store && !Order.empty()) { @@ -3270,6 +3919,57 @@ void BoUpSLP::reorderTopToBottom() { } } +bool BoUpSLP::canReorderOperands( + TreeEntry *UserTE, SmallVectorImpl> &Edges, + ArrayRef ReorderableGathers, + SmallVectorImpl &GatherOps) { + for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I) { + if (any_of(Edges, [I](const std::pair &OpData) { + return OpData.first == I && + OpData.second->State == TreeEntry::Vectorize; + })) + continue; + if (TreeEntry *TE = getVectorizedOperand(UserTE, I)) { + // Do not reorder if operand node is used by many user nodes. + if (any_of(TE->UserTreeIndices, + [UserTE](const EdgeInfo &EI) { return EI.UserTE != UserTE; })) + return false; + // Add the node to the list of the ordered nodes with the identity + // order. + Edges.emplace_back(I, TE); + // Add ScatterVectorize nodes to the list of operands, where just + // reordering of the scalars is required. Similar to the gathers, so + // simply add to the list of gathered ops. + // If there are reused scalars, process this node as a regular vectorize + // node, just reorder reuses mask. + if (TE->State != TreeEntry::Vectorize && TE->ReuseShuffleIndices.empty()) + GatherOps.push_back(TE); + continue; + } + TreeEntry *Gather = nullptr; + if (count_if(ReorderableGathers, + [&Gather, UserTE, I](TreeEntry *TE) { + assert(TE->State != TreeEntry::Vectorize && + "Only non-vectorized nodes are expected."); + if (any_of(TE->UserTreeIndices, + [UserTE, I](const EdgeInfo &EI) { + return EI.UserTE == UserTE && EI.EdgeIdx == I; + })) { + assert(TE->isSame(UserTE->getOperand(I)) && + "Operand entry does not match operands."); + Gather = TE; + return true; + } + return false; + }) > 1 && + !all_of(UserTE->getOperand(I), isConstant)) + return false; + if (Gather) + GatherOps.push_back(Gather); + } + return true; +} + void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { SetVector OrderedEntries; DenseMap GathersToOrders; @@ -3283,49 +3983,13 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { if (TE->State != TreeEntry::Vectorize) NonVectorized.push_back(TE.get()); if (Optional CurrentOrder = - getReorderingData(*TE.get(), /*TopToBottom=*/false)) { + getReorderingData(*TE, /*TopToBottom=*/false)) { OrderedEntries.insert(TE.get()); if (TE->State != TreeEntry::Vectorize) GathersToOrders.try_emplace(TE.get(), *CurrentOrder); } }); - // Checks if the operands of the users are reordarable and have only single - // use. - auto &&CheckOperands = - [this, &NonVectorized](const auto &Data, - SmallVectorImpl &GatherOps) { - for (unsigned I = 0, E = Data.first->getNumOperands(); I < E; ++I) { - if (any_of(Data.second, - [I](const std::pair &OpData) { - return OpData.first == I && - OpData.second->State == TreeEntry::Vectorize; - })) - continue; - ArrayRef VL = Data.first->getOperand(I); - const TreeEntry *TE = nullptr; - const auto *It = find_if(VL, [this, &TE](Value *V) { - TE = getTreeEntry(V); - return TE; - }); - if (It != VL.end() && TE->isSame(VL)) - return false; - TreeEntry *Gather = nullptr; - if (count_if(NonVectorized, [VL, &Gather](TreeEntry *TE) { - assert(TE->State != TreeEntry::Vectorize && - "Only non-vectorized nodes are expected."); - if (TE->isSame(VL)) { - Gather = TE; - return true; - } - return false; - }) > 1) - return false; - if (Gather) - GatherOps.push_back(Gather); - } - return true; - }; // 1. Propagate order to the graph nodes, which use only reordered nodes. // I.e., if the node has operands, that are reordered, try to make at least // one operand order in the natural order and reorder others + reorder the @@ -3334,7 +3998,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { while (!OrderedEntries.empty()) { // 1. Filter out only reordered nodes. // 2. If the entry has multiple uses - skip it and jump to the next node. - MapVector>> Users; + DenseMap>> Users; SmallVector Filtered; for (TreeEntry *TE : OrderedEntries) { if (!(TE->State == TreeEntry::Vectorize || @@ -3362,10 +4026,17 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { // Erase filtered entries. for_each(Filtered, [&OrderedEntries](TreeEntry *TE) { OrderedEntries.remove(TE); }); - for (const auto &Data : Users) { + SmallVector< + std::pair>>> + UsersVec(Users.begin(), Users.end()); + sort(UsersVec, [](const auto &Data1, const auto &Data2) { + return Data1.first->Idx > Data2.first->Idx; + }); + for (auto &Data : UsersVec) { // Check that operands are used only in the User node. SmallVector GatherOps; - if (!CheckOperands(Data, GatherOps)) { + if (!canReorderOperands(Data.first, Data.second, NonVectorized, + GatherOps)) { for_each(Data.second, [&OrderedEntries](const std::pair &Op) { OrderedEntries.remove(Op.second); @@ -3381,18 +4052,22 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { // the same node my be considered several times, though might be not // profitable. SmallPtrSet VisitedOps; + SmallPtrSet VisitedUsers; for (const auto &Op : Data.second) { TreeEntry *OpTE = Op.second; if (!VisitedOps.insert(OpTE).second) continue; - if (!OpTE->ReuseShuffleIndices.empty() || - (IgnoreReorder && OpTE == VectorizableTree.front().get())) + if (!OpTE->ReuseShuffleIndices.empty()) continue; const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & { if (OpTE->State == TreeEntry::NeedToGather) return GathersToOrders.find(OpTE)->second; return OpTE->ReorderIndices; }(); + unsigned NumOps = count_if( + Data.second, [OpTE](const std::pair &P) { + return P.second == OpTE; + }); // Stores actually store the mask, not the order, need to invert. if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() && OpTE->getOpcode() == Instruction::Store && !Order.empty()) { @@ -3404,14 +4079,52 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { return Idx == UndefMaskElem ? E : static_cast(Idx); }); fixupOrderingIndices(CurrentOrder); - ++OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second; + OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second += + NumOps; } else { - ++OrdersUses.insert(std::make_pair(Order, 0)).first->second; + OrdersUses.insert(std::make_pair(Order, 0)).first->second += NumOps; + } + auto Res = OrdersUses.insert(std::make_pair(OrdersType(), 0)); + const auto &&AllowsReordering = [IgnoreReorder, &GathersToOrders]( + const TreeEntry *TE) { + if (!TE->ReorderIndices.empty() || !TE->ReuseShuffleIndices.empty() || + (TE->State == TreeEntry::Vectorize && TE->isAltShuffle()) || + (IgnoreReorder && TE->Idx == 0)) + return true; + if (TE->State == TreeEntry::NeedToGather) { + auto It = GathersToOrders.find(TE); + if (It != GathersToOrders.end()) + return !It->second.empty(); + return true; + } + return false; + }; + for (const EdgeInfo &EI : OpTE->UserTreeIndices) { + TreeEntry *UserTE = EI.UserTE; + if (!VisitedUsers.insert(UserTE).second) + continue; + // May reorder user node if it requires reordering, has reused + // scalars, is an alternate op vectorize node or its op nodes require + // reordering. + if (AllowsReordering(UserTE)) + continue; + // Check if users allow reordering. + // Currently look up just 1 level of operands to avoid increase of + // the compile time. + // Profitable to reorder if definitely more operands allow + // reordering rather than those with natural order. + ArrayRef> Ops = Users[UserTE]; + if (static_cast(count_if( + Ops, [UserTE, &AllowsReordering]( + const std::pair &Op) { + return AllowsReordering(Op.second) && + all_of(Op.second->UserTreeIndices, + [UserTE](const EdgeInfo &EI) { + return EI.UserTE == UserTE; + }); + })) <= Ops.size() / 2) + ++Res.first->second; } - OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second += - OpTE->UserTreeIndices.size(); - assert(OrdersUses[{}] > 0 && "Counter cannot be less than 0."); - --OrdersUses[{}]; } // If no orders - skip current nodes and jump to the next one, if any. if (OrdersUses.empty()) { @@ -3452,7 +4165,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { OrderedEntries.remove(TE); if (!VisitedOps.insert(TE).second) continue; - if (!TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty()) { + if (TE->ReuseShuffleIndices.size() == BestOrder.size()) { // Just reorder reuses indices. reorderReuses(TE->ReuseShuffleIndices, Mask); continue; @@ -3464,6 +4177,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { TE->ReorderIndices.empty()) && "Non-matching sizes of user/operand entries."); reorderOrder(TE->ReorderIndices, Mask); + if (IgnoreReorder && TE == VectorizableTree.front().get()) + IgnoreReorder = false; } // For gathers just need to reorder its scalars. for (TreeEntry *Gather : GatherOps) { @@ -3554,90 +4269,282 @@ void BoUpSLP::buildExternalUses( } } - // Ignore users in the user ignore list. - if (is_contained(UserIgnoreList, UserInst)) - continue; + // Ignore users in the user ignore list. + if (UserIgnoreList && UserIgnoreList->contains(UserInst)) + continue; + + LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " + << Lane << " from " << *Scalar << ".\n"); + ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane)); + } + } + } +} + +DenseMap> +BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const { + DenseMap> PtrToStoresMap; + for (unsigned Lane : seq(0, TE->Scalars.size())) { + Value *V = TE->Scalars[Lane]; + // To save compilation time we don't visit if we have too many users. + static constexpr unsigned UsersLimit = 4; + if (V->hasNUsesOrMore(UsersLimit)) + break; + + // Collect stores per pointer object. + for (User *U : V->users()) { + auto *SI = dyn_cast(U); + if (SI == nullptr || !SI->isSimple() || + !isValidElementType(SI->getValueOperand()->getType())) + continue; + // Skip entry if already + if (getTreeEntry(U)) + continue; + + Value *Ptr = getUnderlyingObject(SI->getPointerOperand()); + auto &StoresVec = PtrToStoresMap[Ptr]; + // For now just keep one store per pointer object per lane. + // TODO: Extend this to support multiple stores per pointer per lane + if (StoresVec.size() > Lane) + continue; + // Skip if in different BBs. + if (!StoresVec.empty() && + SI->getParent() != StoresVec.back()->getParent()) + continue; + // Make sure that the stores are of the same type. + if (!StoresVec.empty() && + SI->getValueOperand()->getType() != + StoresVec.back()->getValueOperand()->getType()) + continue; + StoresVec.push_back(SI); + } + } + return PtrToStoresMap; +} + +bool BoUpSLP::CanFormVector(const SmallVector &StoresVec, + OrdersType &ReorderIndices) const { + // We check whether the stores in StoreVec can form a vector by sorting them + // and checking whether they are consecutive. + + // To avoid calling getPointersDiff() while sorting we create a vector of + // pairs {store, offset from first} and sort this instead. + SmallVector, 4> StoreOffsetVec(StoresVec.size()); + StoreInst *S0 = StoresVec[0]; + StoreOffsetVec[0] = {S0, 0}; + Type *S0Ty = S0->getValueOperand()->getType(); + Value *S0Ptr = S0->getPointerOperand(); + for (unsigned Idx : seq(1, StoresVec.size())) { + StoreInst *SI = StoresVec[Idx]; + Optional Diff = + getPointersDiff(S0Ty, S0Ptr, SI->getValueOperand()->getType(), + SI->getPointerOperand(), *DL, *SE, + /*StrictCheck=*/true); + // We failed to compare the pointers so just abandon this StoresVec. + if (!Diff) + return false; + StoreOffsetVec[Idx] = {StoresVec[Idx], *Diff}; + } + + // Sort the vector based on the pointers. We create a copy because we may + // need the original later for calculating the reorder (shuffle) indices. + stable_sort(StoreOffsetVec, [](const std::pair &Pair1, + const std::pair &Pair2) { + int Offset1 = Pair1.second; + int Offset2 = Pair2.second; + return Offset1 < Offset2; + }); + + // Check if the stores are consecutive by checking if their difference is 1. + for (unsigned Idx : seq(1, StoreOffsetVec.size())) + if (StoreOffsetVec[Idx].second != StoreOffsetVec[Idx-1].second + 1) + return false; + + // Calculate the shuffle indices according to their offset against the sorted + // StoreOffsetVec. + ReorderIndices.reserve(StoresVec.size()); + for (StoreInst *SI : StoresVec) { + unsigned Idx = find_if(StoreOffsetVec, + [SI](const std::pair &Pair) { + return Pair.first == SI; + }) - + StoreOffsetVec.begin(); + ReorderIndices.push_back(Idx); + } + // Identity order (e.g., {0,1,2,3}) is modeled as an empty OrdersType in + // reorderTopToBottom() and reorderBottomToTop(), so we are following the + // same convention here. + auto IsIdentityOrder = [](const OrdersType &Order) { + for (unsigned Idx : seq(0, Order.size())) + if (Idx != Order[Idx]) + return false; + return true; + }; + if (IsIdentityOrder(ReorderIndices)) + ReorderIndices.clear(); + + return true; +} - LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " - << Lane << " from " << *Scalar << ".\n"); - ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane)); - } - } +#ifndef NDEBUG +LLVM_DUMP_METHOD static void dumpOrder(const BoUpSLP::OrdersType &Order) { + for (unsigned Idx : Order) + dbgs() << Idx << ", "; + dbgs() << "\n"; +} +#endif + +SmallVector +BoUpSLP::findExternalStoreUsersReorderIndices(TreeEntry *TE) const { + unsigned NumLanes = TE->Scalars.size(); + + DenseMap> PtrToStoresMap = + collectUserStores(TE); + + // Holds the reorder indices for each candidate store vector that is a user of + // the current TreeEntry. + SmallVector ExternalReorderIndices; + + // Now inspect the stores collected per pointer and look for vectorization + // candidates. For each candidate calculate the reorder index vector and push + // it into `ExternalReorderIndices` + for (const auto &Pair : PtrToStoresMap) { + auto &StoresVec = Pair.second; + // If we have fewer than NumLanes stores, then we can't form a vector. + if (StoresVec.size() != NumLanes) + continue; + + // If the stores are not consecutive then abandon this StoresVec. + OrdersType ReorderIndices; + if (!CanFormVector(StoresVec, ReorderIndices)) + continue; + + // We now know that the scalars in StoresVec can form a vector instruction, + // so set the reorder indices. + ExternalReorderIndices.push_back(ReorderIndices); } + return ExternalReorderIndices; } void BoUpSLP::buildTree(ArrayRef Roots, - ArrayRef UserIgnoreLst) { + const SmallDenseSet &UserIgnoreLst) { deleteTree(); - UserIgnoreList = UserIgnoreLst; + UserIgnoreList = &UserIgnoreLst; if (!allSameType(Roots)) return; buildTree_rec(Roots, 0, EdgeInfo()); } -namespace { -/// Tracks the state we can represent the loads in the given sequence. -enum class LoadsState { Gather, Vectorize, ScatterVectorize }; -} // anonymous namespace - -/// Checks if the given array of loads can be represented as a vectorized, -/// scatter or just simple gather. -static LoadsState canVectorizeLoads(ArrayRef VL, const Value *VL0, - const TargetTransformInfo &TTI, - const DataLayout &DL, ScalarEvolution &SE, - SmallVectorImpl &Order, - SmallVectorImpl &PointerOps) { - // Check that a vectorized load would load the same memory as a scalar - // load. For example, we don't want to vectorize loads that are smaller - // than 8-bit. Even though we have a packed struct {} LLVM - // treats loading/storing it as an i8 struct. If we vectorize loads/stores - // from such a struct, we read/write packed bits disagreeing with the - // unvectorized version. - Type *ScalarTy = VL0->getType(); - - if (DL.getTypeSizeInBits(ScalarTy) != DL.getTypeAllocSizeInBits(ScalarTy)) - return LoadsState::Gather; +void BoUpSLP::buildTree(ArrayRef Roots) { + deleteTree(); + if (!allSameType(Roots)) + return; + buildTree_rec(Roots, 0, EdgeInfo()); +} - // Make sure all loads in the bundle are simple - we can't vectorize - // atomic or volatile loads. - PointerOps.clear(); - PointerOps.resize(VL.size()); - auto *POIter = PointerOps.begin(); +/// \return true if the specified list of values has only one instruction that +/// requires scheduling, false otherwise. +#ifndef NDEBUG +static bool needToScheduleSingleInstruction(ArrayRef VL) { + Value *NeedsScheduling = nullptr; for (Value *V : VL) { - auto *L = cast(V); - if (!L->isSimple()) - return LoadsState::Gather; - *POIter = L->getPointerOperand(); - ++POIter; + if (doesNotNeedToBeScheduled(V)) + continue; + if (!NeedsScheduling) { + NeedsScheduling = V; + continue; + } + return false; } + return NeedsScheduling; +} +#endif - Order.clear(); - // Check the order of pointer operands. - if (llvm::sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order)) { - Value *Ptr0; - Value *PtrN; - if (Order.empty()) { - Ptr0 = PointerOps.front(); - PtrN = PointerOps.back(); +/// Generates key/subkey pair for the given value to provide effective sorting +/// of the values and better detection of the vectorizable values sequences. The +/// keys/subkeys can be used for better sorting of the values themselves (keys) +/// and in values subgroups (subkeys). +static std::pair generateKeySubkey( + Value *V, const TargetLibraryInfo *TLI, + function_ref LoadsSubkeyGenerator, + bool AllowAlternate) { + hash_code Key = hash_value(V->getValueID() + 2); + hash_code SubKey = hash_value(0); + // Sort the loads by the distance between the pointers. + if (auto *LI = dyn_cast(V)) { + Key = hash_combine(hash_value(Instruction::Load), Key); + if (LI->isSimple()) + SubKey = hash_value(LoadsSubkeyGenerator(Key, LI)); + else + SubKey = hash_value(LI); + } else if (isVectorLikeInstWithConstOps(V)) { + // Sort extracts by the vector operands. + if (isa(V)) + Key = hash_value(Value::UndefValueVal + 1); + if (auto *EI = dyn_cast(V)) { + if (!isUndefVector(EI->getVectorOperand()) && + !isa(EI->getIndexOperand())) + SubKey = hash_value(EI->getVectorOperand()); + } + } else if (auto *I = dyn_cast(V)) { + // Sort other instructions just by the opcodes except for CMPInst. + // For CMP also sort by the predicate kind. + if ((isa(I) || isa(I)) && + isValidForAlternation(I->getOpcode())) { + if (AllowAlternate) + Key = hash_value(isa(I) ? 1 : 0); + else + Key = hash_combine(hash_value(I->getOpcode()), Key); + SubKey = hash_combine( + hash_value(I->getOpcode()), hash_value(I->getType()), + hash_value(isa(I) + ? I->getType() + : cast(I)->getOperand(0)->getType())); + // For casts, look through the only operand to improve compile time. + if (isa(I)) { + std::pair OpVals = + generateKeySubkey(I->getOperand(0), TLI, LoadsSubkeyGenerator, + /*=AllowAlternate*/ true); + Key = hash_combine(OpVals.first, Key); + SubKey = hash_combine(OpVals.first, SubKey); + } + } else if (auto *CI = dyn_cast(I)) { + CmpInst::Predicate Pred = CI->getPredicate(); + if (CI->isCommutative()) + Pred = std::min(Pred, CmpInst::getInversePredicate(Pred)); + CmpInst::Predicate SwapPred = CmpInst::getSwappedPredicate(Pred); + SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(Pred), + hash_value(SwapPred), + hash_value(CI->getOperand(0)->getType())); + } else if (auto *Call = dyn_cast(I)) { + Intrinsic::ID ID = getVectorIntrinsicIDForCall(Call, TLI); + if (isTriviallyVectorizable(ID)) { + SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(ID)); + } else if (!VFDatabase(*Call).getMappings(*Call).empty()) { + SubKey = hash_combine(hash_value(I->getOpcode()), + hash_value(Call->getCalledFunction())); + } else { + Key = hash_combine(hash_value(Call), Key); + SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(Call)); + } + for (const CallBase::BundleOpInfo &Op : Call->bundle_op_infos()) + SubKey = hash_combine(hash_value(Op.Begin), hash_value(Op.End), + hash_value(Op.Tag), SubKey); + } else if (auto *Gep = dyn_cast(I)) { + if (Gep->getNumOperands() == 2 && isa(Gep->getOperand(1))) + SubKey = hash_value(Gep->getPointerOperand()); + else + SubKey = hash_value(Gep); + } else if (BinaryOperator::isIntDivRem(I->getOpcode()) && + !isa(I->getOperand(1))) { + // Do not try to vectorize instructions with potentially high cost. + SubKey = hash_value(I); } else { - Ptr0 = PointerOps[Order.front()]; - PtrN = PointerOps[Order.back()]; + SubKey = hash_value(I->getOpcode()); } - Optional Diff = - getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE); - // Check that the sorted loads are consecutive. - if (static_cast(*Diff) == VL.size() - 1) - return LoadsState::Vectorize; - Align CommonAlignment = cast(VL0)->getAlign(); - for (Value *V : VL) - CommonAlignment = - commonAlignment(CommonAlignment, cast(V)->getAlign()); - if (TTI.isLegalMaskedGather(FixedVectorType::get(ScalarTy, VL.size()), - CommonAlignment)) - return LoadsState::ScatterVectorize; + Key = hash_combine(hash_value(I->getParent()), Key); } - - return LoadsState::Gather; + return std::make_pair(Key, SubKey); } void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, @@ -3722,10 +4629,84 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // If all of the operands are identical or constant we have a simple solution. // If we deal with insert/extract instructions, they all must have constant // indices, otherwise we should gather them, not try to vectorize. - if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode() || - (isa(S.MainOp) && - !all_of(VL, isVectorLikeInstWithConstOps))) { - LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); + // If alternate op node with 2 elements with gathered operands - do not + // vectorize. + auto &&NotProfitableForVectorization = [&S, this, + Depth](ArrayRef VL) { + if (!S.getOpcode() || !S.isAltShuffle() || VL.size() > 2) + return false; + if (VectorizableTree.size() < MinTreeSize) + return false; + if (Depth >= RecursionMaxDepth - 1) + return true; + // Check if all operands are extracts, part of vector node or can build a + // regular vectorize node. + SmallVector InstsCount(VL.size(), 0); + for (Value *V : VL) { + auto *I = cast(V); + InstsCount.push_back(count_if(I->operand_values(), [](Value *Op) { + return isa(Op) || isVectorLikeInstWithConstOps(Op); + })); + } + bool IsCommutative = isCommutative(S.MainOp) || isCommutative(S.AltOp); + if ((IsCommutative && + std::accumulate(InstsCount.begin(), InstsCount.end(), 0) < 2) || + (!IsCommutative && + all_of(InstsCount, [](unsigned ICnt) { return ICnt < 2; }))) + return true; + assert(VL.size() == 2 && "Expected only 2 alternate op instructions."); + SmallVector>> Candidates; + auto *I1 = cast(VL.front()); + auto *I2 = cast(VL.back()); + for (int Op = 0, E = S.MainOp->getNumOperands(); Op < E; ++Op) + Candidates.emplace_back().emplace_back(I1->getOperand(Op), + I2->getOperand(Op)); + if (static_cast(count_if( + Candidates, [this](ArrayRef> Cand) { + return findBestRootPair(Cand, LookAheadHeuristics::ScoreSplat); + })) >= S.MainOp->getNumOperands() / 2) + return false; + if (S.MainOp->getNumOperands() > 2) + return true; + if (IsCommutative) { + // Check permuted operands. + Candidates.clear(); + for (int Op = 0, E = S.MainOp->getNumOperands(); Op < E; ++Op) + Candidates.emplace_back().emplace_back(I1->getOperand(Op), + I2->getOperand((Op + 1) % E)); + if (any_of( + Candidates, [this](ArrayRef> Cand) { + return findBestRootPair(Cand, LookAheadHeuristics::ScoreSplat); + })) + return false; + } + return true; + }; + SmallVector SortedIndices; + BasicBlock *BB = nullptr; + bool AreAllSameInsts = + (S.getOpcode() && allSameBlock(VL)) || + (S.OpValue->getType()->isPointerTy() && UserTreeIdx.UserTE && + UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize && + VL.size() > 2 && + all_of(VL, + [&BB](Value *V) { + auto *I = dyn_cast(V); + if (!I) + return doesNotNeedToBeScheduled(V); + if (!BB) + BB = I->getParent(); + return BB == I->getParent() && I->getNumOperands() == 2; + }) && + BB && + sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL, *SE, + SortedIndices)); + if (allConstant(VL) || isSplat(VL) || !AreAllSameInsts || + (isa( + S.OpValue) && + !all_of(VL, isVectorLikeInstWithConstOps)) || + NotProfitableForVectorization(VL)) { + LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n"); if (TryToFindDuplicates(S)) newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); @@ -3736,12 +4717,14 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // the same block. // Don't vectorize ephemeral values. - for (Value *V : VL) { - if (EphValues.count(V)) { - LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V - << ") is ephemeral.\n"); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); - return; + if (!EphValues.empty()) { + for (Value *V : VL) { + if (EphValues.count(V)) { + LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V + << ") is ephemeral.\n"); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + return; + } } } @@ -3779,20 +4762,37 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } // The reduction nodes (stored in UserIgnoreList) also should stay scalar. - for (Value *V : VL) { - if (is_contained(UserIgnoreList, V)) { - LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); - if (TryToFindDuplicates(S)) - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, - ReuseShuffleIndicies); - return; + if (UserIgnoreList && !UserIgnoreList->empty()) { + for (Value *V : VL) { + if (UserIgnoreList && UserIgnoreList->contains(V)) { + LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); + if (TryToFindDuplicates(S)) + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); + return; + } } } + // Special processing for sorted pointers for ScatterVectorize node with + // constant indeces only. + if (AreAllSameInsts && !(S.getOpcode() && allSameBlock(VL)) && + UserTreeIdx.UserTE && + UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize) { + assert(S.OpValue->getType()->isPointerTy() && + count_if(VL, [](Value *V) { return isa(V); }) >= + 2 && + "Expected pointers only."); + // Reset S to make it GetElementPtr kind of node. + const auto *It = find_if(VL, [](Value *V) { return isa(V); }); + assert(It != VL.end() && "Expected at least one GEP."); + S = getSameOpcode(*It); + } + // Check that all of the users of the scalars that we want to vectorize are // schedulable. auto *VL0 = cast(S.OpValue); - BasicBlock *BB = VL0->getParent(); + BB = VL0->getParent(); if (!DT->isReachableFromEntry(BB)) { // Don't go into unreachable blocks. They may contain instructions with @@ -3810,9 +4810,13 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (!BSRef) BSRef = std::make_unique(BB); - BlockScheduling &BS = *BSRef.get(); + BlockScheduling &BS = *BSRef; Optional Bundle = BS.tryScheduleBundle(VL, this, S); +#ifdef EXPENSIVE_CHECKS + // Make sure we didn't break any internal invariants + BS.verify(); +#endif if (!Bundle) { LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); assert((!BS.getScheduleData(VL0) || @@ -3832,10 +4836,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Check for terminator values (e.g. invoke). for (Value *V : VL) - for (unsigned I = 0, E = PH->getNumIncomingValues(); I < E; ++I) { - Instruction *Term = dyn_cast( - cast(V)->getIncomingValueForBlock( - PH->getIncomingBlock(I))); + for (Value *Incoming : cast(V)->incoming_values()) { + Instruction *Term = dyn_cast(Incoming); if (Term && Term->isTerminator()) { LLVM_DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (terminator use).\n"); @@ -3918,13 +4920,9 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Check that we have a buildvector and not a shuffle of 2 or more // different vectors. ValueSet SourceVectors; - int MinIdx = std::numeric_limits::max(); for (Value *V : VL) { SourceVectors.insert(cast(V)->getOperand(0)); - Optional Idx = *getInsertIndex(V, 0); - if (!Idx || *Idx == UndefMaskElem) - continue; - MinIdx = std::min(MinIdx, *Idx); + assert(getInsertIndex(V) != None && "Non-constant or undef index?"); } if (count_if(VL, [&SourceVectors](Value *V) { @@ -3946,10 +4944,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, decltype(OrdCompare)> Indices(OrdCompare); for (int I = 0, E = VL.size(); I < E; ++I) { - Optional Idx = *getInsertIndex(VL[I], 0); - if (!Idx || *Idx == UndefMaskElem) - continue; - Indices.emplace(*Idx, I); + unsigned Idx = *getInsertIndex(VL[I]); + Indices.emplace(Idx, I); } OrdersType CurrentOrder(VL.size(), VL.size()); bool IsIdentity = true; @@ -3985,7 +4981,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, SmallVector PointerOps; OrdersType CurrentOrder; TreeEntry *TE = nullptr; - switch (canVectorizeLoads(VL, VL0, *TTI, *DL, *SE, CurrentOrder, + switch (canVectorizeLoads(VL, VL0, *TTI, *DL, *SE, *LI, CurrentOrder, PointerOps)) { case LoadsState::Vectorize: if (CurrentOrder.empty()) { @@ -4166,7 +5162,10 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, case Instruction::GetElementPtr: { // We don't combine GEPs with complicated (nested) indexing. for (Value *V : VL) { - if (cast(V)->getNumOperands() != 2) { + auto *I = dyn_cast(V); + if (!I) + continue; + if (I->getNumOperands() != 2) { LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); BS.cancelScheduling(VL, VL0); newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, @@ -4177,9 +5176,12 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // We can't combine several GEPs into one vector if they operate on // different types. - Type *Ty0 = VL0->getOperand(0)->getType(); + Type *Ty0 = cast(VL0)->getSourceElementType(); for (Value *V : VL) { - Type *CurTy = cast(V)->getOperand(0)->getType(); + auto *GEP = dyn_cast(V); + if (!GEP) + continue; + Type *CurTy = GEP->getSourceElementType(); if (Ty0 != CurTy) { LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); @@ -4190,15 +5192,22 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } } + bool IsScatterUser = + UserTreeIdx.UserTE && + UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize; // We don't combine GEPs with non-constant indexes. Type *Ty1 = VL0->getOperand(1)->getType(); for (Value *V : VL) { - auto Op = cast(V)->getOperand(1); - if (!isa(Op) || + auto *I = dyn_cast(V); + if (!I) + continue; + auto *Op = I->getOperand(1); + if ((!IsScatterUser && !isa(Op)) || (Op->getType() != Ty1 && - Op->getType()->getScalarSizeInBits() > - DL->getIndexSizeInBits( - V->getType()->getPointerAddressSpace()))) { + ((IsScatterUser && !isa(Op)) || + Op->getType()->getScalarSizeInBits() > + DL->getIndexSizeInBits( + V->getType()->getPointerAddressSpace())))) { LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); BS.cancelScheduling(VL, VL0); @@ -4213,9 +5222,14 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); SmallVector Operands(2); // Prepare the operand vector for pointer operands. - for (Value *V : VL) - Operands.front().push_back( - cast(V)->getPointerOperand()); + for (Value *V : VL) { + auto *GEP = dyn_cast(V); + if (!GEP) { + Operands.front().push_back(V); + continue; + } + Operands.front().push_back(GEP->getPointerOperand()); + } TE->setOperand(0, Operands.front()); // Need to cast all indices to the same type before vectorization to // avoid crash. @@ -4226,9 +5240,10 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, Type *VL0Ty = VL0->getOperand(IndexIdx)->getType(); Type *Ty = all_of(VL, [VL0Ty, IndexIdx](Value *V) { - return VL0Ty == cast(V) - ->getOperand(IndexIdx) - ->getType(); + auto *GEP = dyn_cast(V); + if (!GEP) + return true; + return VL0Ty == GEP->getOperand(IndexIdx)->getType(); }) ? VL0Ty : DL->getIndexType(cast(VL0) @@ -4236,10 +5251,19 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, ->getScalarType()); // Prepare the operand vector. for (Value *V : VL) { - auto *Op = cast(V)->getOperand(IndexIdx); - auto *CI = cast(Op); - Operands.back().push_back(ConstantExpr::getIntegerCast( - CI, Ty, CI->getValue().isSignBitSet())); + auto *I = dyn_cast(V); + if (!I) { + Operands.back().push_back( + ConstantInt::get(Ty, 0, /*isSigned=*/false)); + continue; + } + auto *Op = I->getOperand(IndexIdx); + auto *CI = dyn_cast(Op); + if (!CI) + Operands.back().push_back(Op); + else + Operands.back().push_back(ConstantExpr::getIntegerCast( + CI, Ty, CI->getValue().isSignBitSet())); } TE->setOperand(IndexIdx, Operands.back()); @@ -4345,7 +5369,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, unsigned NumArgs = CI->arg_size(); SmallVector ScalarArgs(NumArgs, nullptr); for (unsigned j = 0; j != NumArgs; ++j) - if (hasVectorInstrinsicScalarOpd(ID, j)) + if (isVectorIntrinsicWithScalarOpAtArg(ID, j)) ScalarArgs[j] = CI->getArgOperand(j); for (Value *V : VL) { CallInst *CI2 = dyn_cast(V); @@ -4364,7 +5388,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Some intrinsics have scalar arguments and should be same in order for // them to be vectorized. for (unsigned j = 0; j != NumArgs; ++j) { - if (hasVectorInstrinsicScalarOpd(ID, j)) { + if (isVectorIntrinsicWithScalarOpAtArg(ID, j)) { Value *A1J = CI2->getArgOperand(j); if (ScalarArgs[j] != A1J) { BS.cancelScheduling(VL, VL0); @@ -4397,7 +5421,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) { // For scalar operands no need to to create an entry since no need to // vectorize it. - if (hasVectorInstrinsicScalarOpd(ID, i)) + if (isVectorIntrinsicWithScalarOpAtArg(ID, i)) continue; ValueList Operands; // Prepare the operand vector. @@ -4434,6 +5458,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } else { CmpInst::Predicate P0 = CI->getPredicate(); CmpInst::Predicate AltP0 = cast(S.AltOp)->getPredicate(); + assert(P0 != AltP0 && + "Expected different main/alternate predicates."); CmpInst::Predicate AltP0Swapped = CmpInst::getSwappedPredicate(AltP0); Value *BaseOp0 = VL0->getOperand(0); Value *BaseOp1 = VL0->getOperand(1); @@ -4443,16 +5469,15 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, auto *Cmp = cast(V); Value *LHS = Cmp->getOperand(0); Value *RHS = Cmp->getOperand(1); - CmpInst::Predicate CurrentPred = CI->getPredicate(); - CmpInst::Predicate CurrentPredSwapped = - CmpInst::getSwappedPredicate(CurrentPred); - if (P0 == AltP0 || P0 == AltP0Swapped) { - if ((P0 == CurrentPred && - !areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) || - (P0 == CurrentPredSwapped && - !areCompatibleCmpOps(BaseOp0, BaseOp1, RHS, LHS))) + CmpInst::Predicate CurrentPred = Cmp->getPredicate(); + if (P0 == AltP0Swapped) { + if (CI != Cmp && S.AltOp != Cmp && + ((P0 == CurrentPred && + !areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) || + (AltP0 == CurrentPred && + areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)))) std::swap(LHS, RHS); - } else if (!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) { + } else if (P0 != CurrentPred && AltP0 != CurrentPred) { std::swap(LHS, RHS); } Left.push_back(LHS); @@ -4602,7 +5627,9 @@ bool BoUpSLP::areAllUsersVectorized(Instruction *I, ArrayRef VectorizedVals) const { return (I->hasOneUse() && is_contained(VectorizedVals, I)) || all_of(I->users(), [this](User *U) { - return ScalarToTreeEntry.count(U) > 0 || MustGather.contains(U); + return ScalarToTreeEntry.count(U) > 0 || + isVectorLikeInstWithConstOps(U) || + (isa(U) && MustGather.contains(U)); }); } @@ -4659,19 +5686,21 @@ computeExtractCost(ArrayRef VL, FixedVectorType *VecTy, // Process extracts in blocks of EltsPerVector to check if the source vector // operand can be re-used directly. If not, add the cost of creating a shuffle // to extract the values into a vector register. + SmallVector RegMask(EltsPerVector, UndefMaskElem); for (auto *V : VL) { ++Idx; - // Need to exclude undefs from analysis. - if (isa(V) || Mask[Idx] == UndefMaskElem) - continue; - // Reached the start of a new vector registers. if (Idx % EltsPerVector == 0) { + RegMask.assign(EltsPerVector, UndefMaskElem); AllConsecutive = true; continue; } + // Need to exclude undefs from analysis. + if (isa(V) || Mask[Idx] == UndefMaskElem) + continue; + // Check all extracts for a vector register on the target directly // extract values in order. unsigned CurrentIdx = *getExtractIndex(cast(V)); @@ -4679,6 +5708,7 @@ computeExtractCost(ArrayRef VL, FixedVectorType *VecTy, unsigned PrevIdx = *getExtractIndex(cast(VL[Idx - 1])); AllConsecutive &= PrevIdx + 1 == CurrentIdx && CurrentIdx % EltsPerVector == Idx % EltsPerVector; + RegMask[Idx % EltsPerVector] = CurrentIdx % EltsPerVector; } if (AllConsecutive) @@ -4690,10 +5720,10 @@ computeExtractCost(ArrayRef VL, FixedVectorType *VecTy, // If we have a series of extracts which are not consecutive and hence // cannot re-use the source vector register directly, compute the shuffle - // cost to extract the a vector with EltsPerVector elements. + // cost to extract the vector with EltsPerVector elements. Cost += TTI.getShuffleCost( TargetTransformInfo::SK_PermuteSingleSrc, - FixedVectorType::get(VecTy->getElementType(), EltsPerVector)); + FixedVectorType::get(VecTy->getElementType(), EltsPerVector), RegMask); } return Cost; } @@ -4701,12 +5731,12 @@ computeExtractCost(ArrayRef VL, FixedVectorType *VecTy, /// Build shuffle mask for shuffle graph entries and lists of main and alternate /// operations operands. static void -buildSuffleEntryMask(ArrayRef VL, ArrayRef ReorderIndices, - ArrayRef ReusesIndices, - const function_ref IsAltOp, - SmallVectorImpl &Mask, - SmallVectorImpl *OpScalars = nullptr, - SmallVectorImpl *AltScalars = nullptr) { +buildShuffleEntryMask(ArrayRef VL, ArrayRef ReorderIndices, + ArrayRef ReusesIndices, + const function_ref IsAltOp, + SmallVectorImpl &Mask, + SmallVectorImpl *OpScalars = nullptr, + SmallVectorImpl *AltScalars = nullptr) { unsigned Sz = VL.size(); Mask.assign(Sz, UndefMaskElem); SmallVector OrderMask; @@ -4736,6 +5766,29 @@ buildSuffleEntryMask(ArrayRef VL, ArrayRef ReorderIndices, } } +/// Checks if the specified instruction \p I is an alternate operation for the +/// given \p MainOp and \p AltOp instructions. +static bool isAlternateInstruction(const Instruction *I, + const Instruction *MainOp, + const Instruction *AltOp) { + if (auto *CI0 = dyn_cast(MainOp)) { + auto *AltCI0 = cast(AltOp); + auto *CI = cast(I); + CmpInst::Predicate P0 = CI0->getPredicate(); + CmpInst::Predicate AltP0 = AltCI0->getPredicate(); + assert(P0 != AltP0 && "Expected different main/alternate predicates."); + CmpInst::Predicate AltP0Swapped = CmpInst::getSwappedPredicate(AltP0); + CmpInst::Predicate CurrentPred = CI->getPredicate(); + if (P0 == AltP0Swapped) + return I == AltCI0 || + (I != MainOp && + !areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), + CI->getOperand(0), CI->getOperand(1))); + return AltP0 == CurrentPred || AltP0Swapped == CurrentPred; + } + return I->getOpcode() == AltOp->getOpcode(); +} + InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals) { ArrayRef VL = E->Scalars; @@ -4849,7 +5902,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, SmallVector Entries; Optional Shuffle = isGatherShuffledEntry(E, Mask, Entries); - if (Shuffle.hasValue()) { + if (Shuffle) { InstructionCost GatherCost = 0; if (ShuffleVectorInst::isIdentityMask(Mask)) { // Perfect match in the graph, will reuse the previously vectorized @@ -4885,7 +5938,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, SmallVector Mask; Optional ShuffleKind = isFixedVectorShuffle(VL, Mask); - if (ShuffleKind.hasValue()) { + if (ShuffleKind) { // Found the bunch of extractelement instructions that must be gathered // into a vector and can be represented as a permutation elements in a // single input vector or of 2 input vectors. @@ -4903,7 +5956,9 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, // broadcast. assert(VecTy == FinalVecTy && "No reused scalars expected for broadcast."); - return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy); + return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, + /*Mask=*/None, /*Index=*/0, + /*SubTp=*/nullptr, /*Args=*/VL[0]); } InstructionCost ReuseShuffleCost = 0; if (NeedToShuffleReuses) @@ -4927,8 +5982,9 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, !VectorizedLoads.count(Slice.back()) && allSameBlock(Slice)) { SmallVector PointerOps; OrdersType CurrentOrder; - LoadsState LS = canVectorizeLoads(Slice, Slice.front(), *TTI, *DL, - *SE, CurrentOrder, PointerOps); + LoadsState LS = + canVectorizeLoads(Slice, Slice.front(), *TTI, *DL, *SE, *LI, + CurrentOrder, PointerOps); switch (LS) { case LoadsState::Vectorize: case LoadsState::ScatterVectorize: @@ -5018,7 +6074,11 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, assert((E->State == TreeEntry::Vectorize || E->State == TreeEntry::ScatterVectorize) && "Unhandled state"); - assert(E->getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL"); + assert(E->getOpcode() && + ((allSameType(VL) && allSameBlock(VL)) || + (E->getOpcode() == Instruction::GetElementPtr && + E->getMainOp()->getType()->isPointerTy())) && + "Invalid VL"); Instruction *VL0 = E->getMainOp(); unsigned ShuffleOrOp = E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode(); @@ -5090,30 +6150,60 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, assert(E->ReuseShuffleIndices.empty() && "Unique insertelements only are expected."); auto *SrcVecTy = cast(VL0->getType()); - unsigned const NumElts = SrcVecTy->getNumElements(); unsigned const NumScalars = VL.size(); + + unsigned NumOfParts = TTI->getNumberOfParts(SrcVecTy); + + unsigned OffsetBeg = *getInsertIndex(VL.front()); + unsigned OffsetEnd = OffsetBeg; + for (Value *V : VL.drop_front()) { + unsigned Idx = *getInsertIndex(V); + if (OffsetBeg > Idx) + OffsetBeg = Idx; + else if (OffsetEnd < Idx) + OffsetEnd = Idx; + } + unsigned VecScalarsSz = PowerOf2Ceil(NumElts); + if (NumOfParts > 0) + VecScalarsSz = PowerOf2Ceil((NumElts + NumOfParts - 1) / NumOfParts); + unsigned VecSz = + (1 + OffsetEnd / VecScalarsSz - OffsetBeg / VecScalarsSz) * + VecScalarsSz; + unsigned Offset = VecScalarsSz * (OffsetBeg / VecScalarsSz); + unsigned InsertVecSz = std::min( + PowerOf2Ceil(OffsetEnd - OffsetBeg + 1), + ((OffsetEnd - OffsetBeg + VecScalarsSz) / VecScalarsSz) * + VecScalarsSz); + bool IsWholeSubvector = + OffsetBeg == Offset && ((OffsetEnd + 1) % VecScalarsSz == 0); + // Check if we can safely insert a subvector. If it is not possible, just + // generate a whole-sized vector and shuffle the source vector and the new + // subvector. + if (OffsetBeg + InsertVecSz > VecSz) { + // Align OffsetBeg to generate correct mask. + OffsetBeg = alignDown(OffsetBeg, VecSz, Offset); + InsertVecSz = VecSz; + } + APInt DemandedElts = APInt::getZero(NumElts); // TODO: Add support for Instruction::InsertValue. SmallVector Mask; if (!E->ReorderIndices.empty()) { inversePermutation(E->ReorderIndices, Mask); - Mask.append(NumElts - NumScalars, UndefMaskElem); + Mask.append(InsertVecSz - Mask.size(), UndefMaskElem); } else { - Mask.assign(NumElts, UndefMaskElem); - std::iota(Mask.begin(), std::next(Mask.begin(), NumScalars), 0); + Mask.assign(VecSz, UndefMaskElem); + std::iota(Mask.begin(), std::next(Mask.begin(), InsertVecSz), 0); } - unsigned Offset = *getInsertIndex(VL0, 0); bool IsIdentity = true; - SmallVector PrevMask(NumElts, UndefMaskElem); + SmallVector PrevMask(InsertVecSz, UndefMaskElem); Mask.swap(PrevMask); for (unsigned I = 0; I < NumScalars; ++I) { - Optional InsertIdx = getInsertIndex(VL[PrevMask[I]], 0); - if (!InsertIdx || *InsertIdx == UndefMaskElem) - continue; - DemandedElts.setBit(*InsertIdx); - IsIdentity &= *InsertIdx - Offset == I; - Mask[*InsertIdx - Offset] = I; + unsigned InsertIdx = *getInsertIndex(VL[PrevMask[I]]); + DemandedElts.setBit(InsertIdx); + IsIdentity &= InsertIdx - OffsetBeg == I; + Mask[InsertIdx - OffsetBeg] = I; } assert(Offset < NumElts && "Failed to find vector index offset"); @@ -5121,32 +6211,41 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, Cost -= TTI->getScalarizationOverhead(SrcVecTy, DemandedElts, /*Insert*/ true, /*Extract*/ false); - if (IsIdentity && NumElts != NumScalars && Offset % NumScalars != 0) { - // FIXME: Replace with SK_InsertSubvector once it is properly supported. - unsigned Sz = PowerOf2Ceil(Offset + NumScalars); - Cost += TTI->getShuffleCost( - TargetTransformInfo::SK_PermuteSingleSrc, - FixedVectorType::get(SrcVecTy->getElementType(), Sz)); - } else if (!IsIdentity) { - auto *FirstInsert = - cast(*find_if(E->Scalars, [E](Value *V) { - return !is_contained(E->Scalars, - cast(V)->getOperand(0)); - })); - if (isUndefVector(FirstInsert->getOperand(0))) { - Cost += TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, SrcVecTy, Mask); + // First cost - resize to actual vector size if not identity shuffle or + // need to shift the vector. + // Do not calculate the cost if the actual size is the register size and + // we can merge this shuffle with the following SK_Select. + auto *InsertVecTy = + FixedVectorType::get(SrcVecTy->getElementType(), InsertVecSz); + if (!IsIdentity) + Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, + InsertVecTy, Mask); + auto *FirstInsert = cast(*find_if(E->Scalars, [E](Value *V) { + return !is_contained(E->Scalars, cast(V)->getOperand(0)); + })); + // Second cost - permutation with subvector, if some elements are from the + // initial vector or inserting a subvector. + // TODO: Implement the analysis of the FirstInsert->getOperand(0) + // subvector of ActualVecTy. + if (!isUndefVector(FirstInsert->getOperand(0)) && NumScalars != NumElts && + !IsWholeSubvector) { + if (InsertVecSz != VecSz) { + auto *ActualVecTy = + FixedVectorType::get(SrcVecTy->getElementType(), VecSz); + Cost += TTI->getShuffleCost(TTI::SK_InsertSubvector, ActualVecTy, + None, OffsetBeg - Offset, InsertVecTy); } else { - SmallVector InsertMask(NumElts); - std::iota(InsertMask.begin(), InsertMask.end(), 0); - for (unsigned I = 0; I < NumElts; I++) { + for (unsigned I = 0, End = OffsetBeg - Offset; I < End; ++I) + Mask[I] = I; + for (unsigned I = OffsetBeg - Offset, End = OffsetEnd - Offset; + I <= End; ++I) if (Mask[I] != UndefMaskElem) - InsertMask[Offset + I] = NumElts + I; - } - Cost += - TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVecTy, InsertMask); + Mask[I] = I + VecSz; + for (unsigned I = OffsetEnd + 1 - Offset; I < VecSz; ++I) + Mask[I] = I; + Cost += TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, InsertVecTy, Mask); } } - return Cost; } case Instruction::ZExt: @@ -5227,9 +6326,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, // If the selects are the only uses of the compares, they will be dead // and we can adjust the cost by removing their cost. if (IntrinsicAndUse.second) - IntrinsicCost -= - TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy, MaskTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); + IntrinsicCost -= TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy, + MaskTy, VecPred, CostKind); VecCost = std::min(VecCost, IntrinsicCost); } LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost)); @@ -5309,7 +6407,14 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, TargetTransformInfo::OperandValueKind Op1VK = TargetTransformInfo::OK_AnyValue; TargetTransformInfo::OperandValueKind Op2VK = - TargetTransformInfo::OK_UniformConstantValue; + any_of(VL, + [](Value *V) { + return isa(V) && + !isConstant( + cast(V)->getOperand(1)); + }) + ? TargetTransformInfo::OK_AnyValue + : TargetTransformInfo::OK_UniformConstantValue; InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost( Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK); @@ -5340,7 +6445,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, Align CommonAlignment = Alignment; for (Value *V : VL) CommonAlignment = - commonAlignment(CommonAlignment, cast(V)->getAlign()); + std::min(CommonAlignment, cast(V)->getAlign()); VecLdCost = TTI->getGatherScatterOpCost( Instruction::Load, VecTy, cast(VL0)->getPointerOperand(), /*VariableMask=*/false, CommonAlignment, CostKind, VL0); @@ -5458,39 +6563,21 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, TTI::CastContextHint::None, CostKind); } - SmallVector Mask; - buildSuffleEntryMask( - E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices, - [E](Instruction *I) { - assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); - if (auto *CI0 = dyn_cast(E->getMainOp())) { - auto *AltCI0 = cast(E->getAltOp()); - auto *CI = cast(I); - CmpInst::Predicate P0 = CI0->getPredicate(); - CmpInst::Predicate AltP0 = AltCI0->getPredicate(); - CmpInst::Predicate AltP0Swapped = - CmpInst::getSwappedPredicate(AltP0); - CmpInst::Predicate CurrentPred = CI->getPredicate(); - CmpInst::Predicate CurrentPredSwapped = - CmpInst::getSwappedPredicate(CurrentPred); - if (P0 == AltP0 || P0 == AltP0Swapped) { - // Alternate cmps have same/swapped predicate as main cmps but - // different order of compatible operands. - return !( - (P0 == CurrentPred && - areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), - I->getOperand(0), I->getOperand(1))) || - (P0 == CurrentPredSwapped && - areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), - I->getOperand(1), I->getOperand(0)))); - } - return CurrentPred != P0 && CurrentPredSwapped != P0; - } - return I->getOpcode() == E->getAltOpcode(); - }, - Mask); - CommonCost = - TTI->getShuffleCost(TargetTransformInfo::SK_Select, FinalVecTy, Mask); + if (E->ReuseShuffleIndices.empty()) { + CommonCost = + TTI->getShuffleCost(TargetTransformInfo::SK_Select, FinalVecTy); + } else { + SmallVector Mask; + buildShuffleEntryMask( + E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices, + [E](Instruction *I) { + assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); + return I->getOpcode() == E->getAltOpcode(); + }, + Mask); + CommonCost = TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, + FinalVecTy, Mask); + } LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost)); return CommonCost + VecCost - ScalarCost; } @@ -5618,7 +6705,10 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const { // No need to vectorize inserts of gathered values. if (VectorizableTree.size() == 2 && isa(VectorizableTree[0]->Scalars[0]) && - VectorizableTree[1]->State == TreeEntry::NeedToGather) + VectorizableTree[1]->State == TreeEntry::NeedToGather && + (VectorizableTree[1]->getVectorFactor() <= 2 || + !(isSplat(VectorizableTree[1]->Scalars) || + allConstant(VectorizableTree[1]->Scalars)))) return true; // We can vectorize the tree if its size is greater than or equal to the @@ -5748,20 +6838,26 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU, return false; auto *IE1 = VU; auto *IE2 = V; + unsigned Idx1 = *getInsertIndex(IE1); + unsigned Idx2 = *getInsertIndex(IE2); // Go through the vector operand of insertelement instructions trying to find // either VU as the original vector for IE2 or V as the original vector for // IE1. do { - if (IE2 == VU || IE1 == V) - return true; + if (IE2 == VU) + return VU->hasOneUse(); + if (IE1 == V) + return V->hasOneUse(); if (IE1) { - if (IE1 != VU && !IE1->hasOneUse()) + if ((IE1 != VU && !IE1->hasOneUse()) || + getInsertIndex(IE1).value_or(Idx2) == Idx2) IE1 = nullptr; else IE1 = dyn_cast(IE1->getOperand(0)); } if (IE2) { - if (IE2 != V && !IE2->hasOneUse()) + if ((IE2 != V && !IE2->hasOneUse()) || + getInsertIndex(IE2).value_or(Idx1) == Idx1) IE2 = nullptr; else IE2 = dyn_cast(IE2->getOperand(0)); @@ -5770,6 +6866,153 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU, return false; } +/// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the +/// buildvector sequence. +static bool isFirstInsertElement(const InsertElementInst *IE1, + const InsertElementInst *IE2) { + if (IE1 == IE2) + return false; + const auto *I1 = IE1; + const auto *I2 = IE2; + const InsertElementInst *PrevI1; + const InsertElementInst *PrevI2; + unsigned Idx1 = *getInsertIndex(IE1); + unsigned Idx2 = *getInsertIndex(IE2); + do { + if (I2 == IE1) + return true; + if (I1 == IE2) + return false; + PrevI1 = I1; + PrevI2 = I2; + if (I1 && (I1 == IE1 || I1->hasOneUse()) && + getInsertIndex(I1).value_or(Idx2) != Idx2) + I1 = dyn_cast(I1->getOperand(0)); + if (I2 && ((I2 == IE2 || I2->hasOneUse())) && + getInsertIndex(I2).value_or(Idx1) != Idx1) + I2 = dyn_cast(I2->getOperand(0)); + } while ((I1 && PrevI1 != I1) || (I2 && PrevI2 != I2)); + llvm_unreachable("Two different buildvectors not expected."); +} + +namespace { +/// Returns incoming Value *, if the requested type is Value * too, or a default +/// value, otherwise. +struct ValueSelect { + template + static typename std::enable_if::value, Value *>::type + get(Value *V) { + return V; + } + template + static typename std::enable_if::value, U>::type + get(Value *) { + return U(); + } +}; +} // namespace + +/// Does the analysis of the provided shuffle masks and performs the requested +/// actions on the vectors with the given shuffle masks. It tries to do it in +/// several steps. +/// 1. If the Base vector is not undef vector, resizing the very first mask to +/// have common VF and perform action for 2 input vectors (including non-undef +/// Base). Other shuffle masks are combined with the resulting after the 1 stage +/// and processed as a shuffle of 2 elements. +/// 2. If the Base is undef vector and have only 1 shuffle mask, perform the +/// action only for 1 vector with the given mask, if it is not the identity +/// mask. +/// 3. If > 2 masks are used, perform the remaining shuffle actions for 2 +/// vectors, combing the masks properly between the steps. +template +static T *performExtractsShuffleAction( + MutableArrayRef>> ShuffleMask, Value *Base, + function_ref GetVF, + function_ref(T *, ArrayRef)> ResizeAction, + function_ref, ArrayRef)> Action) { + assert(!ShuffleMask.empty() && "Empty list of shuffles for inserts."); + SmallVector Mask(ShuffleMask.begin()->second); + auto VMIt = std::next(ShuffleMask.begin()); + T *Prev = nullptr; + bool IsBaseNotUndef = !isUndefVector(Base); + if (IsBaseNotUndef) { + // Base is not undef, need to combine it with the next subvectors. + std::pair Res = ResizeAction(ShuffleMask.begin()->first, Mask); + for (unsigned Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) { + if (Mask[Idx] == UndefMaskElem) + Mask[Idx] = Idx; + else + Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF; + } + auto *V = ValueSelect::get(Base); + (void)V; + assert((!V || GetVF(V) == Mask.size()) && + "Expected base vector of VF number of elements."); + Prev = Action(Mask, {nullptr, Res.first}); + } else if (ShuffleMask.size() == 1) { + // Base is undef and only 1 vector is shuffled - perform the action only for + // single vector, if the mask is not the identity mask. + std::pair Res = ResizeAction(ShuffleMask.begin()->first, Mask); + if (Res.second) + // Identity mask is found. + Prev = Res.first; + else + Prev = Action(Mask, {ShuffleMask.begin()->first}); + } else { + // Base is undef and at least 2 input vectors shuffled - perform 2 vectors + // shuffles step by step, combining shuffle between the steps. + unsigned Vec1VF = GetVF(ShuffleMask.begin()->first); + unsigned Vec2VF = GetVF(VMIt->first); + if (Vec1VF == Vec2VF) { + // No need to resize the input vectors since they are of the same size, we + // can shuffle them directly. + ArrayRef SecMask = VMIt->second; + for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) { + if (SecMask[I] != UndefMaskElem) { + assert(Mask[I] == UndefMaskElem && "Multiple uses of scalars."); + Mask[I] = SecMask[I] + Vec1VF; + } + } + Prev = Action(Mask, {ShuffleMask.begin()->first, VMIt->first}); + } else { + // Vectors of different sizes - resize and reshuffle. + std::pair Res1 = + ResizeAction(ShuffleMask.begin()->first, Mask); + std::pair Res2 = ResizeAction(VMIt->first, VMIt->second); + ArrayRef SecMask = VMIt->second; + for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) { + if (Mask[I] != UndefMaskElem) { + assert(SecMask[I] == UndefMaskElem && "Multiple uses of scalars."); + if (Res1.second) + Mask[I] = I; + } else if (SecMask[I] != UndefMaskElem) { + assert(Mask[I] == UndefMaskElem && "Multiple uses of scalars."); + Mask[I] = (Res2.second ? I : SecMask[I]) + VF; + } + } + Prev = Action(Mask, {Res1.first, Res2.first}); + } + VMIt = std::next(VMIt); + } + // Perform requested actions for the remaining masks/vectors. + for (auto E = ShuffleMask.end(); VMIt != E; ++VMIt) { + // Shuffle other input vectors, if any. + std::pair Res = ResizeAction(VMIt->first, VMIt->second); + ArrayRef SecMask = VMIt->second; + for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) { + if (SecMask[I] != UndefMaskElem) { + assert((Mask[I] == UndefMaskElem || IsBaseNotUndef) && + "Multiple uses of scalars."); + Mask[I] = (Res.second ? I : SecMask[I]) + VF; + } else if (Mask[I] != UndefMaskElem) { + Mask[I] = I; + } + } + Prev = Action(Mask, {Prev, Res.first}); + } + return Prev; +} + InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { InstructionCost Cost = 0; LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size " @@ -5778,7 +7021,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { unsigned BundleWidth = VectorizableTree[0]->Scalars.size(); for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) { - TreeEntry &TE = *VectorizableTree[I].get(); + TreeEntry &TE = *VectorizableTree[I]; InstructionCost C = getEntryCost(&TE, VectorizedVals); Cost += C; @@ -5790,9 +7033,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { SmallPtrSet ExtractCostCalculated; InstructionCost ExtractCost = 0; - SmallVector VF; - SmallVector> ShuffleMask; - SmallVector FirstUsers; + SmallVector>> ShuffleMasks; + SmallVector> FirstUsers; SmallVector DemandedElts; for (ExternalUser &EU : ExternalUses) { // We only add extract cost once for the same scalar. @@ -5819,42 +7061,59 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { // to detect it as a final shuffled/identity match. if (auto *VU = dyn_cast_or_null(EU.User)) { if (auto *FTy = dyn_cast(VU->getType())) { - Optional InsertIdx = getInsertIndex(VU, 0); - if (!InsertIdx || *InsertIdx == UndefMaskElem) - continue; - auto *It = find_if(FirstUsers, [VU](Value *V) { - return areTwoInsertFromSameBuildVector(VU, - cast(V)); - }); - int VecId = -1; - if (It == FirstUsers.end()) { - VF.push_back(FTy->getNumElements()); - ShuffleMask.emplace_back(VF.back(), UndefMaskElem); - // Find the insertvector, vectorized in tree, if any. - Value *Base = VU; - while (isa(Base)) { - // Build the mask for the vectorized insertelement instructions. - if (const TreeEntry *E = getTreeEntry(Base)) { - VU = cast(Base); - do { - int Idx = E->findLaneForValue(Base); - ShuffleMask.back()[Idx] = Idx; - Base = cast(Base)->getOperand(0); - } while (E == getTreeEntry(Base)); - break; + Optional InsertIdx = getInsertIndex(VU); + if (InsertIdx) { + const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar); + auto *It = + find_if(FirstUsers, + [VU](const std::pair &Pair) { + return areTwoInsertFromSameBuildVector( + VU, cast(Pair.first)); + }); + int VecId = -1; + if (It == FirstUsers.end()) { + (void)ShuffleMasks.emplace_back(); + SmallVectorImpl &Mask = ShuffleMasks.back()[ScalarTE]; + if (Mask.empty()) + Mask.assign(FTy->getNumElements(), UndefMaskElem); + // Find the insertvector, vectorized in tree, if any. + Value *Base = VU; + while (auto *IEBase = dyn_cast(Base)) { + if (IEBase != EU.User && + (!IEBase->hasOneUse() || + getInsertIndex(IEBase).value_or(*InsertIdx) == *InsertIdx)) + break; + // Build the mask for the vectorized insertelement instructions. + if (const TreeEntry *E = getTreeEntry(IEBase)) { + VU = IEBase; + do { + IEBase = cast(Base); + int Idx = *getInsertIndex(IEBase); + assert(Mask[Idx] == UndefMaskElem && + "InsertElementInstruction used already."); + Mask[Idx] = Idx; + Base = IEBase->getOperand(0); + } while (E == getTreeEntry(Base)); + break; + } + Base = cast(Base)->getOperand(0); } - Base = cast(Base)->getOperand(0); + FirstUsers.emplace_back(VU, ScalarTE); + DemandedElts.push_back(APInt::getZero(FTy->getNumElements())); + VecId = FirstUsers.size() - 1; + } else { + if (isFirstInsertElement(VU, cast(It->first))) + It->first = VU; + VecId = std::distance(FirstUsers.begin(), It); } - FirstUsers.push_back(VU); - DemandedElts.push_back(APInt::getZero(VF.back())); - VecId = FirstUsers.size() - 1; - } else { - VecId = std::distance(FirstUsers.begin(), It); + int InIdx = *InsertIdx; + SmallVectorImpl &Mask = ShuffleMasks[VecId][ScalarTE]; + if (Mask.empty()) + Mask.assign(FTy->getNumElements(), UndefMaskElem); + Mask[InIdx] = EU.Lane; + DemandedElts[VecId].setBit(InIdx); + continue; } - int Idx = *InsertIdx; - ShuffleMask[VecId][Idx] = EU.Lane; - DemandedElts[VecId].setBit(Idx); - continue; } } @@ -5878,86 +7137,75 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { InstructionCost SpillCost = getSpillCost(); Cost += SpillCost + ExtractCost; - if (FirstUsers.size() == 1) { - int Limit = ShuffleMask.front().size() * 2; - if (all_of(ShuffleMask.front(), [Limit](int Idx) { return Idx < Limit; }) && - !ShuffleVectorInst::isIdentityMask(ShuffleMask.front())) { - InstructionCost C = TTI->getShuffleCost( + auto &&ResizeToVF = [this, &Cost](const TreeEntry *TE, ArrayRef Mask) { + InstructionCost C = 0; + unsigned VF = Mask.size(); + unsigned VecVF = TE->getVectorFactor(); + if (VF != VecVF && + (any_of(Mask, [VF](int Idx) { return Idx >= static_cast(VF); }) || + (all_of(Mask, + [VF](int Idx) { return Idx < 2 * static_cast(VF); }) && + !ShuffleVectorInst::isIdentityMask(Mask)))) { + SmallVector OrigMask(VecVF, UndefMaskElem); + std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)), + OrigMask.begin()); + C = TTI->getShuffleCost( TTI::SK_PermuteSingleSrc, - cast(FirstUsers.front()->getType()), - ShuffleMask.front()); - LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C - << " for final shuffle of insertelement external users " - << *VectorizableTree.front()->Scalars.front() << ".\n" - << "SLP: Current total cost = " << Cost << "\n"); + FixedVectorType::get(TE->getMainOp()->getType(), VecVF), OrigMask); + LLVM_DEBUG( + dbgs() << "SLP: Adding cost " << C + << " for final shuffle of insertelement external users.\n"; + TE->dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n"); Cost += C; + return std::make_pair(TE, true); } + return std::make_pair(TE, false); + }; + // Calculate the cost of the reshuffled vectors, if any. + for (int I = 0, E = FirstUsers.size(); I < E; ++I) { + Value *Base = cast(FirstUsers[I].first)->getOperand(0); + unsigned VF = ShuffleMasks[I].begin()->second.size(); + auto *FTy = FixedVectorType::get( + cast(FirstUsers[I].first->getType())->getElementType(), VF); + auto Vector = ShuffleMasks[I].takeVector(); + auto &&EstimateShufflesCost = [this, FTy, + &Cost](ArrayRef Mask, + ArrayRef TEs) { + assert((TEs.size() == 1 || TEs.size() == 2) && + "Expected exactly 1 or 2 tree entries."); + if (TEs.size() == 1) { + int Limit = 2 * Mask.size(); + if (!all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) || + !ShuffleVectorInst::isIdentityMask(Mask)) { + InstructionCost C = + TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FTy, Mask); + LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C + << " for final shuffle of insertelement " + "external users.\n"; + TEs.front()->dump(); + dbgs() << "SLP: Current total cost = " << Cost << "\n"); + Cost += C; + } + } else { + InstructionCost C = + TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, FTy, Mask); + LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C + << " for final shuffle of vector node and external " + "insertelement users.\n"; + if (TEs.front()) { TEs.front()->dump(); } TEs.back()->dump(); + dbgs() << "SLP: Current total cost = " << Cost << "\n"); + Cost += C; + } + return TEs.back(); + }; + (void)performExtractsShuffleAction( + makeMutableArrayRef(Vector.data(), Vector.size()), Base, + [](const TreeEntry *E) { return E->getVectorFactor(); }, ResizeToVF, + EstimateShufflesCost); InstructionCost InsertCost = TTI->getScalarizationOverhead( - cast(FirstUsers.front()->getType()), - DemandedElts.front(), /*Insert*/ true, /*Extract*/ false); - LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost - << " for insertelements gather.\n" - << "SLP: Current total cost = " << Cost << "\n"); - Cost -= InsertCost; - } else if (FirstUsers.size() >= 2) { - unsigned MaxVF = *std::max_element(VF.begin(), VF.end()); - // Combined masks of the first 2 vectors. - SmallVector CombinedMask(MaxVF, UndefMaskElem); - copy(ShuffleMask.front(), CombinedMask.begin()); - APInt CombinedDemandedElts = DemandedElts.front().zextOrSelf(MaxVF); - auto *VecTy = FixedVectorType::get( - cast(FirstUsers.front()->getType())->getElementType(), - MaxVF); - for (int I = 0, E = ShuffleMask[1].size(); I < E; ++I) { - if (ShuffleMask[1][I] != UndefMaskElem) { - CombinedMask[I] = ShuffleMask[1][I] + MaxVF; - CombinedDemandedElts.setBit(I); - } - } - InstructionCost C = - TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask); - LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C - << " for final shuffle of vector node and external " - "insertelement users " - << *VectorizableTree.front()->Scalars.front() << ".\n" - << "SLP: Current total cost = " << Cost << "\n"); - Cost += C; - InstructionCost InsertCost = TTI->getScalarizationOverhead( - VecTy, CombinedDemandedElts, /*Insert*/ true, /*Extract*/ false); - LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost - << " for insertelements gather.\n" - << "SLP: Current total cost = " << Cost << "\n"); + cast(FirstUsers[I].first->getType()), DemandedElts[I], + /*Insert*/ true, /*Extract*/ false); Cost -= InsertCost; - for (int I = 2, E = FirstUsers.size(); I < E; ++I) { - // Other elements - permutation of 2 vectors (the initial one and the - // next Ith incoming vector). - unsigned VF = ShuffleMask[I].size(); - for (unsigned Idx = 0; Idx < VF; ++Idx) { - int Mask = ShuffleMask[I][Idx]; - if (Mask != UndefMaskElem) - CombinedMask[Idx] = MaxVF + Mask; - else if (CombinedMask[Idx] != UndefMaskElem) - CombinedMask[Idx] = Idx; - } - for (unsigned Idx = VF; Idx < MaxVF; ++Idx) - if (CombinedMask[Idx] != UndefMaskElem) - CombinedMask[Idx] = Idx; - InstructionCost C = - TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask); - LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C - << " for final shuffle of vector node and external " - "insertelement users " - << *VectorizableTree.front()->Scalars.front() << ".\n" - << "SLP: Current total cost = " << Cost << "\n"); - Cost += C; - InstructionCost InsertCost = TTI->getScalarizationOverhead( - cast(FirstUsers[I]->getType()), DemandedElts[I], - /*Insert*/ true, /*Extract*/ false); - LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost - << " for insertelements gather.\n" - << "SLP: Current total cost = " << Cost << "\n"); - Cost -= InsertCost; - } } #ifndef NDEBUG @@ -6050,6 +7298,12 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl &Mask, } } + if (UsedTEs.empty()) { + assert(all_of(TE->Scalars, UndefValue::classof) && + "Expected vector of undefs only."); + return None; + } + unsigned VF = 0; if (UsedTEs.size() == 1) { // Try to find the perfect match in another gather node at first. @@ -6109,17 +7363,11 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl &Mask, return None; } -InstructionCost -BoUpSLP::getGatherCost(FixedVectorType *Ty, - const DenseSet &ShuffledIndices, - bool NeedToShuffle) const { - unsigned NumElts = Ty->getNumElements(); - APInt DemandedElts = APInt::getZero(NumElts); - for (unsigned I = 0; I < NumElts; ++I) - if (!ShuffledIndices.count(I)) - DemandedElts.setBit(I); +InstructionCost BoUpSLP::getGatherCost(FixedVectorType *Ty, + const APInt &ShuffledIndices, + bool NeedToShuffle) const { InstructionCost Cost = - TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true, + TTI->getScalarizationOverhead(Ty, ~ShuffledIndices, /*Insert*/ true, /*Extract*/ false); if (NeedToShuffle) Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty); @@ -6136,19 +7384,19 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef VL) const { // Find the cost of inserting/extracting values from the vector. // Check if the same elements are inserted several times and count them as // shuffle candidates. - DenseSet ShuffledElements; + APInt ShuffledElements = APInt::getZero(VL.size()); DenseSet UniqueElements; // Iterate in reverse order to consider insert elements with the high cost. for (unsigned I = VL.size(); I > 0; --I) { unsigned Idx = I - 1; // No need to shuffle duplicates for constants. if (isConstant(VL[Idx])) { - ShuffledElements.insert(Idx); + ShuffledElements.setBit(Idx); continue; } if (!UniqueElements.insert(VL[Idx]).second) { DuplicateNonConst = true; - ShuffledElements.insert(Idx); + ShuffledElements.setBit(Idx); } } return getGatherCost(VecTy, ShuffledElements, DuplicateNonConst); @@ -6173,14 +7421,83 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef VL, void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) { // Get the basic block this bundle is in. All instructions in the bundle - // should be in this block. + // should be in this block (except for extractelement-like instructions with + // constant indeces). auto *Front = E->getMainOp(); auto *BB = Front->getParent(); assert(llvm::all_of(E->Scalars, [=](Value *V) -> bool { + if (E->getOpcode() == Instruction::GetElementPtr && + !isa(V)) + return true; auto *I = cast(V); - return !E->isOpcodeOrAlt(I) || I->getParent() == BB; + return !E->isOpcodeOrAlt(I) || I->getParent() == BB || + isVectorLikeInstWithConstOps(I); })); + auto &&FindLastInst = [E, Front, this, &BB]() { + Instruction *LastInst = Front; + for (Value *V : E->Scalars) { + auto *I = dyn_cast(V); + if (!I) + continue; + if (LastInst->getParent() == I->getParent()) { + if (LastInst->comesBefore(I)) + LastInst = I; + continue; + } + assert(isVectorLikeInstWithConstOps(LastInst) && + isVectorLikeInstWithConstOps(I) && + "Expected vector-like insts only."); + if (!DT->isReachableFromEntry(LastInst->getParent())) { + LastInst = I; + continue; + } + if (!DT->isReachableFromEntry(I->getParent())) + continue; + auto *NodeA = DT->getNode(LastInst->getParent()); + auto *NodeB = DT->getNode(I->getParent()); + assert(NodeA && "Should only process reachable instructions"); + assert(NodeB && "Should only process reachable instructions"); + assert((NodeA == NodeB) == + (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) && + "Different nodes should have different DFS numbers"); + if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn()) + LastInst = I; + } + BB = LastInst->getParent(); + return LastInst; + }; + + auto &&FindFirstInst = [E, Front]() { + Instruction *FirstInst = Front; + for (Value *V : E->Scalars) { + auto *I = dyn_cast(V); + if (!I) + continue; + if (I->comesBefore(FirstInst)) + FirstInst = I; + } + return FirstInst; + }; + + // Set the insert point to the beginning of the basic block if the entry + // should not be scheduled. + if (E->State != TreeEntry::NeedToGather && + doesNotNeedToSchedule(E->Scalars)) { + Instruction *InsertInst; + if (all_of(E->Scalars, isUsedOutsideBlock)) + InsertInst = FindLastInst(); + else + InsertInst = FindFirstInst(); + // If the instruction is PHI, set the insert point after all the PHIs. + if (isa(InsertInst)) + InsertInst = BB->getFirstNonPHI(); + BasicBlock::iterator InsertPt = InsertInst->getIterator(); + Builder.SetInsertPoint(BB, InsertPt); + Builder.SetCurrentDebugLocation(Front->getDebugLoc()); + return; + } + // The last instruction in the bundle in program order. Instruction *LastInst = nullptr; @@ -6189,8 +7506,10 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) { // VL.back() and iterate over schedule data until we reach the end of the // bundle. The end of the bundle is marked by null ScheduleData. if (BlocksSchedules.count(BB)) { - auto *Bundle = - BlocksSchedules[BB]->getScheduleData(E->isOneOf(E->Scalars.back())); + Value *V = E->isOneOf(E->Scalars.back()); + if (doesNotNeedToBeScheduled(V)) + V = *find_if_not(E->Scalars, doesNotNeedToBeScheduled); + auto *Bundle = BlocksSchedules[BB]->getScheduleData(V); if (Bundle && Bundle->isPartOfBundle()) for (; Bundle; Bundle = Bundle->NextInBundle) if (Bundle->OpValue == Bundle->Inst) @@ -6216,19 +7535,16 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) { // we both exit early from buildTree_rec and that the bundle be out-of-order // (causing us to iterate all the way to the end of the block). if (!LastInst) { - SmallPtrSet Bundle(E->Scalars.begin(), E->Scalars.end()); - for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) { - if (Bundle.erase(&I) && E->isOpcodeOrAlt(&I)) - LastInst = &I; - if (Bundle.empty()) - break; - } + LastInst = FindLastInst(); + // If the instruction is PHI, set the insert point after all the PHIs. + if (isa(LastInst)) + LastInst = BB->getFirstNonPHI()->getPrevNode(); } assert(LastInst && "Failed to find last instruction in bundle"); // Set the insertion point after the last instruction in the bundle. Set the // debug location to Front. - Builder.SetInsertPoint(BB, ++LastInst->getIterator()); + Builder.SetInsertPoint(BB, std::next(LastInst->getIterator())); Builder.SetCurrentDebugLocation(Front->getDebugLoc()); } @@ -6358,8 +7674,15 @@ public: } // namespace Value *BoUpSLP::vectorizeTree(ArrayRef VL) { - unsigned VF = VL.size(); + const unsigned VF = VL.size(); InstructionsState S = getSameOpcode(VL); + // Special processing for GEPs bundle, which may include non-gep values. + if (!S.getOpcode() && VL.front()->getType()->isPointerTy()) { + const auto *It = + find_if(VL, [](Value *V) { return isa(V); }); + if (It != VL.end()) + S = getSameOpcode(*It); + } if (S.getOpcode()) { if (TreeEntry *E = getTreeEntry(S.OpValue)) if (E->isSame(VL)) { @@ -6414,7 +7737,18 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL) { } } - // Check that every instruction appears once in this bundle. + // Can't vectorize this, so simply build a new vector with each lane + // corresponding to the requested value. + return createBuildVector(VL); +} +Value *BoUpSLP::createBuildVector(ArrayRef VL) { + assert(any_of(VectorizableTree, + [VL](const std::unique_ptr &TE) { + return TE->State == TreeEntry::NeedToGather && TE->isSame(VL); + }) && + "Non-matching gather node."); + unsigned VF = VL.size(); + // Exploit possible reuse of values across lanes. SmallVector ReuseShuffleIndicies; SmallVector UniqueValues; if (VL.size() > 2) { @@ -6447,6 +7781,10 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL) { ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(), UndefMaskElem); } else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) { + if (UniqueValues.empty()) { + assert(all_of(VL, UndefValue::classof) && "Expected list of undefs."); + NumValues = VF; + } ReuseShuffleIndicies.clear(); UniqueValues.clear(); UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues)); @@ -6486,7 +7824,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { SmallVector Entries; Optional Shuffle = isGatherShuffledEntry(E, Mask, Entries); - if (Shuffle.hasValue()) { + if (Shuffle) { assert((Entries.size() == 1 || Entries.size() == 2) && "Expected shuffle of 1 or 2 entries."); Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue, @@ -6520,14 +7858,20 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size()); switch (ShuffleOrOp) { case Instruction::PHI: { - assert( - (E->ReorderIndices.empty() || E != VectorizableTree.front().get()) && - "PHI reordering is free."); + assert((E->ReorderIndices.empty() || + E != VectorizableTree.front().get() || + !E->UserTreeIndices.empty()) && + "PHI reordering is free."); auto *PH = cast(VL0); Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI()); Builder.SetCurrentDebugLocation(PH->getDebugLoc()); PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues()); Value *V = NewPhi; + + // Adjust insertion point once all PHI's have been generated. + Builder.SetInsertPoint(&*PH->getParent()->getFirstInsertionPt()); + Builder.SetCurrentDebugLocation(PH->getDebugLoc()); + ShuffleBuilder.addInversedMask(E->ReorderIndices); ShuffleBuilder.addMask(E->ReuseShuffleIndices); V = ShuffleBuilder.finalize(V); @@ -6593,7 +7937,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { cast(FirstInsert->getType())->getNumElements(); const unsigned NumScalars = E->Scalars.size(); - unsigned Offset = *getInsertIndex(VL0, 0); + unsigned Offset = *getInsertIndex(VL0); assert(Offset < NumElts && "Failed to find vector index offset"); // Create shuffle to resize vector @@ -6611,11 +7955,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Mask.swap(PrevMask); for (unsigned I = 0; I < NumScalars; ++I) { Value *Scalar = E->Scalars[PrevMask[I]]; - Optional InsertIdx = getInsertIndex(Scalar, 0); - if (!InsertIdx || *InsertIdx == UndefMaskElem) - continue; - IsIdentity &= *InsertIdx - Offset == I; - Mask[*InsertIdx - Offset] = I; + unsigned InsertIdx = *getInsertIndex(Scalar); + IsIdentity &= InsertIdx - Offset == I; + Mask[InsertIdx - Offset] = I; } if (!IsIdentity || NumElts != NumScalars) { V = Builder.CreateShuffleVector(V, Mask); @@ -6802,19 +8144,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { unsigned AS = LI->getPointerAddressSpace(); Value *PO = LI->getPointerOperand(); if (E->State == TreeEntry::Vectorize) { - Value *VecPtr = Builder.CreateBitCast(PO, VecTy->getPointerTo(AS)); + NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign()); // The pointer operand uses an in-tree scalar so we add the new BitCast - // to ExternalUses list to make sure that an extract will be generated - // in the future. + // or LoadInst to ExternalUses list to make sure that an extract will + // be generated in the future. if (TreeEntry *Entry = getTreeEntry(PO)) { // Find which lane we need to extract. unsigned FoundLane = Entry->findLaneForValue(PO); - ExternalUses.emplace_back(PO, cast(VecPtr), FoundLane); + ExternalUses.emplace_back( + PO, PO != VecPtr ? cast(VecPtr) : NewLI, FoundLane); } - - NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign()); } else { assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state"); Value *VecPtr = vectorizeTree(E->getOperand(0)); @@ -6822,7 +8163,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Align CommonAlignment = LI->getAlign(); for (Value *V : E->Scalars) CommonAlignment = - commonAlignment(CommonAlignment, cast(V)->getAlign()); + std::min(CommonAlignment, cast(V)->getAlign()); NewLI = Builder.CreateMaskedGather(VecTy, VecPtr, CommonAlignment); } Value *V = propagateMetadata(NewLI, E->Scalars); @@ -6847,17 +8188,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *ScalarPtr = SI->getPointerOperand(); Value *VecPtr = Builder.CreateBitCast( ScalarPtr, VecValue->getType()->getPointerTo(AS)); - StoreInst *ST = Builder.CreateAlignedStore(VecValue, VecPtr, - SI->getAlign()); + StoreInst *ST = + Builder.CreateAlignedStore(VecValue, VecPtr, SI->getAlign()); - // The pointer operand uses an in-tree scalar, so add the new BitCast to - // ExternalUses to make sure that an extract will be generated in the - // future. + // The pointer operand uses an in-tree scalar, so add the new BitCast or + // StoreInst to ExternalUses to make sure that an extract will be + // generated in the future. if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) { // Find which lane we need to extract. unsigned FoundLane = Entry->findLaneForValue(ScalarPtr); - ExternalUses.push_back( - ExternalUser(ScalarPtr, cast(VecPtr), FoundLane)); + ExternalUses.push_back(ExternalUser( + ScalarPtr, ScalarPtr != VecPtr ? cast(VecPtr) : ST, + FoundLane)); } Value *V = propagateMetadata(ST, E->Scalars); @@ -6879,8 +8221,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Value *V = Builder.CreateGEP(GEP0->getSourceElementType(), Op0, OpVecs); - if (Instruction *I = dyn_cast(V)) - V = propagateMetadata(I, E->Scalars); + if (Instruction *I = dyn_cast(V)) { + SmallVector GEPs; + for (Value *V : E->Scalars) { + if (isa(V)) + GEPs.push_back(V); + } + V = propagateMetadata(I, GEPs); + } ShuffleBuilder.addInversedMask(E->ReorderIndices); ShuffleBuilder.addMask(E->ReuseShuffleIndices); @@ -6913,11 +8261,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be // vectorized. - if (UseIntrinsic && hasVectorInstrinsicScalarOpd(IID, j)) { + if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(IID, j)) { CallInst *CEI = cast(VL0); ScalarArg = CEI->getArgOperand(j); OpVecs.push_back(CEI->getArgOperand(j)); - if (hasVectorInstrinsicOverloadedScalarOpd(IID, j)) + if (isVectorIntrinsicWithOverloadTypeAtArg(IID, j)) TysForDecl.push_back(ScalarArg->getType()); continue; } @@ -6925,6 +8273,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *OpVec = vectorizeTree(E->getOperand(j)); LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); + if (isVectorIntrinsicWithOverloadTypeAtArg(IID, j)) + TysForDecl.push_back(OpVec->getType()); } Function *CF; @@ -6997,10 +8347,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { V0 = Builder.CreateCmp(CI0->getPredicate(), LHS, RHS); auto *AltCI = cast(E->getAltOp()); CmpInst::Predicate AltPred = AltCI->getPredicate(); - unsigned AltIdx = - std::distance(E->Scalars.begin(), find(E->Scalars, AltCI)); - if (AltCI->getOperand(0) != E->getOperand(0)[AltIdx]) - AltPred = CmpInst::getSwappedPredicate(AltPred); V1 = Builder.CreateCmp(AltPred, LHS, RHS); } else { V0 = Builder.CreateCast( @@ -7022,34 +8368,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // each vector operation. ValueList OpScalars, AltScalars; SmallVector Mask; - buildSuffleEntryMask( + buildShuffleEntryMask( E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices, [E](Instruction *I) { assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); - if (auto *CI0 = dyn_cast(E->getMainOp())) { - auto *AltCI0 = cast(E->getAltOp()); - auto *CI = cast(I); - CmpInst::Predicate P0 = CI0->getPredicate(); - CmpInst::Predicate AltP0 = AltCI0->getPredicate(); - CmpInst::Predicate AltP0Swapped = - CmpInst::getSwappedPredicate(AltP0); - CmpInst::Predicate CurrentPred = CI->getPredicate(); - CmpInst::Predicate CurrentPredSwapped = - CmpInst::getSwappedPredicate(CurrentPred); - if (P0 == AltP0 || P0 == AltP0Swapped) { - // Alternate cmps have same/swapped predicate as main cmps but - // different order of compatible operands. - return !( - (P0 == CurrentPred && - areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), - I->getOperand(0), I->getOperand(1))) || - (P0 == CurrentPredSwapped && - areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), - I->getOperand(1), I->getOperand(0)))); - } - return CurrentPred != P0 && CurrentPredSwapped != P0; - } - return I->getOpcode() == E->getAltOpcode(); + return isAlternateInstruction(I, E->getMainOp(), E->getAltOp()); }, Mask, &OpScalars, &AltScalars); @@ -7080,6 +8403,17 @@ Value *BoUpSLP::vectorizeTree() { return vectorizeTree(ExternallyUsedValues); } +namespace { +/// Data type for handling buildvector sequences with the reused scalars from +/// other tree entries. +struct ShuffledInsertData { + /// List of insertelements to be replaced by shuffles. + SmallVector InsertElements; + /// The parent vectors and shuffle mask for the given list of inserts. + MapVector> ValueMasks; +}; +} // namespace + Value * BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { // All blocks must be scheduled before any instructions are inserted. @@ -7113,6 +8447,9 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n"); + SmallVector ShuffledInserts; + // Maps vector instruction to original insertelement instruction + DenseMap VectorToInsertElement; // Extract all of the elements with the external uses. for (const auto &ExternalUse : ExternalUses) { Value *Scalar = ExternalUse.Scalar; @@ -7126,6 +8463,10 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { assert(E && "Invalid scalar"); assert(E->State != TreeEntry::NeedToGather && "Extracting from a gather list"); + // Non-instruction pointers are not deleted, just skip them. + if (E->getOpcode() == Instruction::GetElementPtr && + !isa(Scalar)) + continue; Value *Vec = E->VectorizedValue; assert(Vec && "Can't find vectorizable value"); @@ -7152,6 +8493,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { assert(isa(Scalar->getType()) && isa(Scalar) && "In-tree scalar of vector type is not insertelement?"); + auto *IE = cast(Scalar); + VectorToInsertElement.try_emplace(Vec, IE); return Vec; }; // If User == nullptr, the Scalar is used as extra arg. Generate @@ -7180,6 +8523,69 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { continue; } + if (auto *VU = dyn_cast(User)) { + // Skip if the scalar is another vector op or Vec is not an instruction. + if (!Scalar->getType()->isVectorTy() && isa(Vec)) { + if (auto *FTy = dyn_cast(User->getType())) { + Optional InsertIdx = getInsertIndex(VU); + if (InsertIdx) { + // Need to use original vector, if the root is truncated. + if (MinBWs.count(Scalar) && + VectorizableTree[0]->VectorizedValue == Vec) + Vec = VectorRoot; + auto *It = + find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) { + // Checks if 2 insertelements are from the same buildvector. + InsertElementInst *VecInsert = Data.InsertElements.front(); + return areTwoInsertFromSameBuildVector(VU, VecInsert); + }); + unsigned Idx = *InsertIdx; + if (It == ShuffledInserts.end()) { + (void)ShuffledInserts.emplace_back(); + It = std::next(ShuffledInserts.begin(), + ShuffledInserts.size() - 1); + SmallVectorImpl &Mask = It->ValueMasks[Vec]; + if (Mask.empty()) + Mask.assign(FTy->getNumElements(), UndefMaskElem); + // Find the insertvector, vectorized in tree, if any. + Value *Base = VU; + while (auto *IEBase = dyn_cast(Base)) { + if (IEBase != User && + (!IEBase->hasOneUse() || + getInsertIndex(IEBase).value_or(Idx) == Idx)) + break; + // Build the mask for the vectorized insertelement instructions. + if (const TreeEntry *E = getTreeEntry(IEBase)) { + do { + IEBase = cast(Base); + int IEIdx = *getInsertIndex(IEBase); + assert(Mask[Idx] == UndefMaskElem && + "InsertElementInstruction used already."); + Mask[IEIdx] = IEIdx; + Base = IEBase->getOperand(0); + } while (E == getTreeEntry(Base)); + break; + } + Base = cast(Base)->getOperand(0); + // After the vectorization the def-use chain has changed, need + // to look through original insertelement instructions, if they + // get replaced by vector instructions. + auto It = VectorToInsertElement.find(Base); + if (It != VectorToInsertElement.end()) + Base = It->second; + } + } + SmallVectorImpl &Mask = It->ValueMasks[Vec]; + if (Mask.empty()) + Mask.assign(FTy->getNumElements(), UndefMaskElem); + Mask[Idx] = ExternalUse.Lane; + It->InsertElements.push_back(cast(User)); + continue; + } + } + } + } + // Generate extracts for out-of-tree users. // Find the insertion point for the extractelement lane. if (auto *VecI = dyn_cast(Vec)) { @@ -7215,6 +8621,221 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); } + // Checks if the mask is an identity mask. + auto &&IsIdentityMask = [](ArrayRef Mask, FixedVectorType *VecTy) { + int Limit = Mask.size(); + return VecTy->getNumElements() == Mask.size() && + all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) && + ShuffleVectorInst::isIdentityMask(Mask); + }; + // Tries to combine 2 different masks into single one. + auto &&CombineMasks = [](SmallVectorImpl &Mask, ArrayRef ExtMask) { + SmallVector NewMask(ExtMask.size(), UndefMaskElem); + for (int I = 0, Sz = ExtMask.size(); I < Sz; ++I) { + if (ExtMask[I] == UndefMaskElem) + continue; + NewMask[I] = Mask[ExtMask[I]]; + } + Mask.swap(NewMask); + }; + // Peek through shuffles, trying to simplify the final shuffle code. + auto &&PeekThroughShuffles = + [&IsIdentityMask, &CombineMasks](Value *&V, SmallVectorImpl &Mask, + bool CheckForLengthChange = false) { + while (auto *SV = dyn_cast(V)) { + // Exit if not a fixed vector type or changing size shuffle. + if (!isa(SV->getType()) || + (CheckForLengthChange && SV->changesLength())) + break; + // Exit if the identity or broadcast mask is found. + if (IsIdentityMask(Mask, cast(SV->getType())) || + SV->isZeroEltSplat()) + break; + bool IsOp1Undef = isUndefVector(SV->getOperand(0)); + bool IsOp2Undef = isUndefVector(SV->getOperand(1)); + if (!IsOp1Undef && !IsOp2Undef) + break; + SmallVector ShuffleMask(SV->getShuffleMask().begin(), + SV->getShuffleMask().end()); + CombineMasks(ShuffleMask, Mask); + Mask.swap(ShuffleMask); + if (IsOp2Undef) + V = SV->getOperand(0); + else + V = SV->getOperand(1); + } + }; + // Smart shuffle instruction emission, walks through shuffles trees and + // tries to find the best matching vector for the actual shuffle + // instruction. + auto &&CreateShuffle = [this, &IsIdentityMask, &PeekThroughShuffles, + &CombineMasks](Value *V1, Value *V2, + ArrayRef Mask) -> Value * { + assert(V1 && "Expected at least one vector value."); + if (V2 && !isUndefVector(V2)) { + // Peek through shuffles. + Value *Op1 = V1; + Value *Op2 = V2; + int VF = + cast(V1->getType())->getElementCount().getKnownMinValue(); + SmallVector CombinedMask1(Mask.size(), UndefMaskElem); + SmallVector CombinedMask2(Mask.size(), UndefMaskElem); + for (int I = 0, E = Mask.size(); I < E; ++I) { + if (Mask[I] < VF) + CombinedMask1[I] = Mask[I]; + else + CombinedMask2[I] = Mask[I] - VF; + } + Value *PrevOp1; + Value *PrevOp2; + do { + PrevOp1 = Op1; + PrevOp2 = Op2; + PeekThroughShuffles(Op1, CombinedMask1, /*CheckForLengthChange=*/true); + PeekThroughShuffles(Op2, CombinedMask2, /*CheckForLengthChange=*/true); + // Check if we have 2 resizing shuffles - need to peek through operands + // again. + if (auto *SV1 = dyn_cast(Op1)) + if (auto *SV2 = dyn_cast(Op2)) + if (SV1->getOperand(0)->getType() == + SV2->getOperand(0)->getType() && + SV1->getOperand(0)->getType() != SV1->getType() && + isUndefVector(SV1->getOperand(1)) && + isUndefVector(SV2->getOperand(1))) { + Op1 = SV1->getOperand(0); + Op2 = SV2->getOperand(0); + SmallVector ShuffleMask1(SV1->getShuffleMask().begin(), + SV1->getShuffleMask().end()); + CombineMasks(ShuffleMask1, CombinedMask1); + CombinedMask1.swap(ShuffleMask1); + SmallVector ShuffleMask2(SV2->getShuffleMask().begin(), + SV2->getShuffleMask().end()); + CombineMasks(ShuffleMask2, CombinedMask2); + CombinedMask2.swap(ShuffleMask2); + } + } while (PrevOp1 != Op1 || PrevOp2 != Op2); + VF = cast(Op1->getType()) + ->getElementCount() + .getKnownMinValue(); + for (int I = 0, E = Mask.size(); I < E; ++I) { + if (CombinedMask2[I] != UndefMaskElem) { + assert(CombinedMask1[I] == UndefMaskElem && + "Expected undefined mask element"); + CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF); + } + } + Value *Vec = Builder.CreateShuffleVector( + Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2, + CombinedMask1); + if (auto *I = dyn_cast(Vec)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } + return Vec; + } + if (isa(V1)) + return PoisonValue::get(FixedVectorType::get( + cast(V1->getType())->getElementType(), Mask.size())); + Value *Op = V1; + SmallVector CombinedMask(Mask.begin(), Mask.end()); + PeekThroughShuffles(Op, CombinedMask); + if (!isa(Op->getType()) || + !IsIdentityMask(CombinedMask, cast(Op->getType()))) { + Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask); + if (auto *I = dyn_cast(Vec)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } + return Vec; + } + return Op; + }; + + auto &&ResizeToVF = [&CreateShuffle](Value *Vec, ArrayRef Mask) { + unsigned VF = Mask.size(); + unsigned VecVF = cast(Vec->getType())->getNumElements(); + if (VF != VecVF) { + if (any_of(Mask, [VF](int Idx) { return Idx >= static_cast(VF); })) { + Vec = CreateShuffle(Vec, nullptr, Mask); + return std::make_pair(Vec, true); + } + SmallVector ResizeMask(VF, UndefMaskElem); + for (unsigned I = 0; I < VF; ++I) { + if (Mask[I] != UndefMaskElem) + ResizeMask[Mask[I]] = Mask[I]; + } + Vec = CreateShuffle(Vec, nullptr, ResizeMask); + } + + return std::make_pair(Vec, false); + }; + // Perform shuffling of the vectorize tree entries for better handling of + // external extracts. + for (int I = 0, E = ShuffledInserts.size(); I < E; ++I) { + // Find the first and the last instruction in the list of insertelements. + sort(ShuffledInserts[I].InsertElements, isFirstInsertElement); + InsertElementInst *FirstInsert = ShuffledInserts[I].InsertElements.front(); + InsertElementInst *LastInsert = ShuffledInserts[I].InsertElements.back(); + Builder.SetInsertPoint(LastInsert); + auto Vector = ShuffledInserts[I].ValueMasks.takeVector(); + Value *NewInst = performExtractsShuffleAction( + makeMutableArrayRef(Vector.data(), Vector.size()), + FirstInsert->getOperand(0), + [](Value *Vec) { + return cast(Vec->getType()) + ->getElementCount() + .getKnownMinValue(); + }, + ResizeToVF, + [FirstInsert, &CreateShuffle](ArrayRef Mask, + ArrayRef Vals) { + assert((Vals.size() == 1 || Vals.size() == 2) && + "Expected exactly 1 or 2 input values."); + if (Vals.size() == 1) { + // Do not create shuffle if the mask is a simple identity + // non-resizing mask. + if (Mask.size() != cast(Vals.front()->getType()) + ->getNumElements() || + !ShuffleVectorInst::isIdentityMask(Mask)) + return CreateShuffle(Vals.front(), nullptr, Mask); + return Vals.front(); + } + return CreateShuffle(Vals.front() ? Vals.front() + : FirstInsert->getOperand(0), + Vals.back(), Mask); + }); + auto It = ShuffledInserts[I].InsertElements.rbegin(); + // Rebuild buildvector chain. + InsertElementInst *II = nullptr; + if (It != ShuffledInserts[I].InsertElements.rend()) + II = *It; + SmallVector Inserts; + while (It != ShuffledInserts[I].InsertElements.rend()) { + assert(II && "Must be an insertelement instruction."); + if (*It == II) + ++It; + else + Inserts.push_back(cast(II)); + II = dyn_cast(II->getOperand(0)); + } + for (Instruction *II : reverse(Inserts)) { + II->replaceUsesOfWith(II->getOperand(0), NewInst); + if (auto *NewI = dyn_cast(NewInst)) + if (II->getParent() == NewI->getParent() && II->comesBefore(NewI)) + II->moveAfter(NewI); + NewInst = II; + } + LastInsert->replaceAllUsesWith(NewInst); + for (InsertElementInst *IE : reverse(ShuffledInserts[I].InsertElements)) { + IE->replaceUsesOfWith(IE->getOperand(0), + PoisonValue::get(IE->getOperand(0)->getType())); + IE->replaceUsesOfWith(IE->getOperand(1), + PoisonValue::get(IE->getOperand(1)->getType())); + eraseInstruction(IE); + } + CSEBlocks.insert(LastInsert->getParent()); + } + // For each vectorized value: for (auto &TEPtr : VectorizableTree) { TreeEntry *Entry = TEPtr.get(); @@ -7229,6 +8850,9 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { Value *Scalar = Entry->Scalars[Lane]; + if (Entry->getOpcode() == Instruction::GetElementPtr && + !isa(Scalar)) + continue; #ifndef NDEBUG Type *Ty = Scalar->getType(); if (!Ty->isVoidTy()) { @@ -7236,7 +8860,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); // It is legal to delete users in the ignorelist. - assert((getTreeEntry(U) || is_contained(UserIgnoreList, U) || + assert((getTreeEntry(U) || + (UserIgnoreList && UserIgnoreList->contains(U)) || (isa_and_nonnull(U) && isDeleted(cast(U)))) && "Deleting out-of-tree value"); @@ -7404,9 +9029,11 @@ void BoUpSLP::optimizeGatherSequence() { BoUpSLP::ScheduleData * BoUpSLP::BlockScheduling::buildBundle(ArrayRef VL) { - ScheduleData *Bundle = nullptr; + ScheduleData *Bundle = nullptr; ScheduleData *PrevInBundle = nullptr; for (Value *V : VL) { + if (doesNotNeedToBeScheduled(V)) + continue; ScheduleData *BundleMember = getScheduleData(V); assert(BundleMember && "no ScheduleData for bundle member " @@ -7418,8 +9045,6 @@ BoUpSLP::BlockScheduling::buildBundle(ArrayRef VL) { } else { Bundle = BundleMember; } - BundleMember->UnscheduledDepsInBundle = 0; - Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps; // Group the instructions to a bundle. BundleMember->FirstInBundle = Bundle; @@ -7436,7 +9061,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, const InstructionsState &S) { // No need to schedule PHIs, insertelement, extractelement and extractvalue // instructions. - if (isa(S.OpValue) || isVectorLikeInstWithConstOps(S.OpValue)) + if (isa(S.OpValue) || isVectorLikeInstWithConstOps(S.OpValue) || + doesNotNeedToSchedule(VL)) return nullptr; // Initialize the instruction bundle. @@ -7455,16 +9081,17 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, doForAllOpcodes(I, [](ScheduleData *SD) { SD->clearDependencies(); }); ReSchedule = true; } - if (ReSchedule) { - resetSchedule(); - initialFillReadyList(ReadyInsts); - } if (Bundle) { LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block " << BB->getName() << "\n"); calculateDependencies(Bundle, /*InsertInReadyList=*/true, SLP); } + if (ReSchedule) { + resetSchedule(); + initialFillReadyList(ReadyInsts); + } + // Now try to schedule the new bundle or (if no bundle) just calculate // dependencies. As soon as the bundle is "ready" it means that there are no // cyclic dependencies and we can schedule it. Note that's important that we @@ -7472,14 +9099,17 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, while (((!Bundle && ReSchedule) || (Bundle && !Bundle->isReady())) && !ReadyInsts.empty()) { ScheduleData *Picked = ReadyInsts.pop_back_val(); - if (Picked->isSchedulingEntity() && Picked->isReady()) - schedule(Picked, ReadyInsts); + assert(Picked->isSchedulingEntity() && Picked->isReady() && + "must be ready to schedule"); + schedule(Picked, ReadyInsts); } }; // Make sure that the scheduling region contains all // instructions of the bundle. for (Value *V : VL) { + if (doesNotNeedToBeScheduled(V)) + continue; if (!extendSchedulingRegion(V, S)) { // If the scheduling region got new instructions at the lower end (or it // is a new region for the first bundle). This makes it necessary to @@ -7494,9 +9124,16 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, bool ReSchedule = false; for (Value *V : VL) { + if (doesNotNeedToBeScheduled(V)) + continue; ScheduleData *BundleMember = getScheduleData(V); assert(BundleMember && "no ScheduleData for bundle member (maybe not in same basic block)"); + + // Make sure we don't leave the pieces of the bundle in the ready list when + // whole bundle might not be ready. + ReadyInsts.remove(BundleMember); + if (!BundleMember->IsScheduled) continue; // A bundle member was scheduled as single instruction before and now @@ -7518,16 +9155,24 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL, Value *OpValue) { - if (isa(OpValue) || isVectorLikeInstWithConstOps(OpValue)) + if (isa(OpValue) || isVectorLikeInstWithConstOps(OpValue) || + doesNotNeedToSchedule(VL)) return; + if (doesNotNeedToBeScheduled(OpValue)) + OpValue = *find_if_not(VL, doesNotNeedToBeScheduled); ScheduleData *Bundle = getScheduleData(OpValue); LLVM_DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n"); assert(!Bundle->IsScheduled && "Can't cancel bundle which is already scheduled"); - assert(Bundle->isSchedulingEntity() && Bundle->isPartOfBundle() && + assert(Bundle->isSchedulingEntity() && + (Bundle->isPartOfBundle() || needToScheduleSingleInstruction(VL)) && "tried to unbundle something which is not a bundle"); + // Remove the bundle from the ready list. + if (Bundle->isReady()) + ReadyInsts.remove(Bundle); + // Un-bundle: make single instructions out of the bundle. ScheduleData *BundleMember = Bundle; while (BundleMember) { @@ -7535,8 +9180,8 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL, BundleMember->FirstInBundle = BundleMember; ScheduleData *Next = BundleMember->NextInBundle; BundleMember->NextInBundle = nullptr; - BundleMember->UnscheduledDepsInBundle = BundleMember->UnscheduledDeps; - if (BundleMember->UnscheduledDepsInBundle == 0) { + BundleMember->TE = nullptr; + if (BundleMember->unscheduledDepsInBundle() == 0) { ReadyInsts.insert(BundleMember); } BundleMember = Next; @@ -7559,9 +9204,10 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V, Instruction *I = dyn_cast(V); assert(I && "bundle member must be an instruction"); assert(!isa(I) && !isVectorLikeInstWithConstOps(I) && + !doesNotNeedToBeScheduled(I) && "phi nodes/insertelements/extractelements/extractvalues don't need to " "be scheduled"); - auto &&CheckSheduleForI = [this, &S](Instruction *I) -> bool { + auto &&CheckScheduleForI = [this, &S](Instruction *I) -> bool { ScheduleData *ISD = getScheduleData(I); if (!ISD) return false; @@ -7573,7 +9219,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V, ExtraScheduleDataMap[I][S.OpValue] = SD; return true; }; - if (CheckSheduleForI(I)) + if (CheckScheduleForI(I)) return true; if (!ScheduleStart) { // It's the first instruction in the new region. @@ -7581,7 +9227,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V, ScheduleStart = I; ScheduleEnd = I->getNextNode(); if (isOneOf(S, I) != I) - CheckSheduleForI(I); + CheckScheduleForI(I); assert(ScheduleEnd && "tried to vectorize a terminator?"); LLVM_DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n"); return true; @@ -7609,7 +9255,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V, initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion); ScheduleStart = I; if (isOneOf(S, I) != I) - CheckSheduleForI(I); + CheckScheduleForI(I); LLVM_DEBUG(dbgs() << "SLP: extend schedule region start to " << *I << "\n"); return true; @@ -7623,7 +9269,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V, nullptr); ScheduleEnd = I->getNextNode(); if (isOneOf(S, I) != I) - CheckSheduleForI(I); + CheckScheduleForI(I); assert(ScheduleEnd && "tried to vectorize a terminator?"); LLVM_DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n"); return true; @@ -7635,7 +9281,10 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI, ScheduleData *NextLoadStore) { ScheduleData *CurrentLoadStore = PrevLoadStore; for (Instruction *I = FromI; I != ToI; I = I->getNextNode()) { - ScheduleData *SD = ScheduleDataMap[I]; + // No need to allocate data for non-schedulable instructions. + if (doesNotNeedToBeScheduled(I)) + continue; + ScheduleData *SD = ScheduleDataMap.lookup(I); if (!SD) { SD = allocateScheduleDataChunks(); ScheduleDataMap[I] = SD; @@ -7658,6 +9307,10 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI, } CurrentLoadStore = SD; } + + if (match(I, m_Intrinsic()) || + match(I, m_Intrinsic())) + RegionHasStackSave = true; } if (NextLoadStore) { if (CurrentLoadStore) @@ -7690,8 +9343,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, // Handle def-use chain dependencies. if (BundleMember->OpValue != BundleMember->Inst) { - ScheduleData *UseSD = getScheduleData(BundleMember->Inst); - if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) { + if (ScheduleData *UseSD = getScheduleData(BundleMember->Inst)) { BundleMember->Dependencies++; ScheduleData *DestBundle = UseSD->FirstInBundle; if (!DestBundle->IsScheduled) @@ -7701,10 +9353,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, } } else { for (User *U : BundleMember->Inst->users()) { - assert(isa(U) && - "user of instruction must be instruction"); - ScheduleData *UseSD = getScheduleData(U); - if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) { + if (ScheduleData *UseSD = getScheduleData(cast(U))) { BundleMember->Dependencies++; ScheduleData *DestBundle = UseSD->FirstInBundle; if (!DestBundle->IsScheduled) @@ -7715,6 +9364,75 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, } } + auto makeControlDependent = [&](Instruction *I) { + auto *DepDest = getScheduleData(I); + assert(DepDest && "must be in schedule window"); + DepDest->ControlDependencies.push_back(BundleMember); + BundleMember->Dependencies++; + ScheduleData *DestBundle = DepDest->FirstInBundle; + if (!DestBundle->IsScheduled) + BundleMember->incrementUnscheduledDeps(1); + if (!DestBundle->hasValidDependencies()) + WorkList.push_back(DestBundle); + }; + + // Any instruction which isn't safe to speculate at the begining of the + // block is control dependend on any early exit or non-willreturn call + // which proceeds it. + if (!isGuaranteedToTransferExecutionToSuccessor(BundleMember->Inst)) { + for (Instruction *I = BundleMember->Inst->getNextNode(); + I != ScheduleEnd; I = I->getNextNode()) { + if (isSafeToSpeculativelyExecute(I, &*BB->begin())) + continue; + + // Add the dependency + makeControlDependent(I); + + if (!isGuaranteedToTransferExecutionToSuccessor(I)) + // Everything past here must be control dependent on I. + break; + } + } + + if (RegionHasStackSave) { + // If we have an inalloc alloca instruction, it needs to be scheduled + // after any preceeding stacksave. We also need to prevent any alloca + // from reordering above a preceeding stackrestore. + if (match(BundleMember->Inst, m_Intrinsic()) || + match(BundleMember->Inst, m_Intrinsic())) { + for (Instruction *I = BundleMember->Inst->getNextNode(); + I != ScheduleEnd; I = I->getNextNode()) { + if (match(I, m_Intrinsic()) || + match(I, m_Intrinsic())) + // Any allocas past here must be control dependent on I, and I + // must be memory dependend on BundleMember->Inst. + break; + + if (!isa(I)) + continue; + + // Add the dependency + makeControlDependent(I); + } + } + + // In addition to the cases handle just above, we need to prevent + // allocas from moving below a stacksave. The stackrestore case + // is currently thought to be conservatism. + if (isa(BundleMember->Inst)) { + for (Instruction *I = BundleMember->Inst->getNextNode(); + I != ScheduleEnd; I = I->getNextNode()) { + if (!match(I, m_Intrinsic()) && + !match(I, m_Intrinsic())) + continue; + + // Add the dependency + makeControlDependent(I); + break; + } + } + } + // Handle the memory dependencies (if any). ScheduleData *DepDest = BundleMember->NextLoadStore; if (!DepDest) @@ -7777,7 +9495,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, } } if (InsertInReadyList && SD->isReady()) { - ReadyInsts.push_back(SD); + ReadyInsts.insert(SD); LLVM_DEBUG(dbgs() << "SLP: gets ready on update: " << *SD->Inst << "\n"); } @@ -7804,11 +9522,18 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { LLVM_DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n"); + // A key point - if we got here, pre-scheduling was able to find a valid + // scheduling of the sub-graph of the scheduling window which consists + // of all vector bundles and their transitive users. As such, we do not + // need to reschedule anything *outside of* that subgraph. + BS->resetSchedule(); // For the real scheduling we use a more sophisticated ready-list: it is // sorted by the original instruction location. This lets the final schedule // be as close as possible to the original instruction order. + // WARNING: If changing this order causes a correctness issue, that means + // there is some missing dependence edge in the schedule data graph. struct ScheduleDataCompare { bool operator()(ScheduleData *SD1, ScheduleData *SD2) const { return SD2->SchedulingPriority < SD1->SchedulingPriority; @@ -7816,21 +9541,22 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { }; std::set ReadyInsts; - // Ensure that all dependency data is updated and fill the ready-list with - // initial instructions. + // Ensure that all dependency data is updated (for nodes in the sub-graph) + // and fill the ready-list with initial instructions. int Idx = 0; - int NumToSchedule = 0; for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd; I = I->getNextNode()) { - BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) { + BS->doForAllOpcodes(I, [this, &Idx, BS](ScheduleData *SD) { + TreeEntry *SDTE = getTreeEntry(SD->Inst); + (void)SDTE; assert((isVectorLikeInstWithConstOps(SD->Inst) || - SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr)) && + SD->isPartOfBundle() == + (SDTE && !doesNotNeedToSchedule(SDTE->Scalars))) && "scheduler and vectorizer bundle mismatch"); SD->FirstInBundle->SchedulingPriority = Idx++; - if (SD->isSchedulingEntity()) { + + if (SD->isSchedulingEntity() && SD->isPartOfBundle()) BS->calculateDependencies(SD, false, this); - NumToSchedule++; - } }); } BS->initialFillReadyList(ReadyInsts); @@ -7853,9 +9579,23 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { } BS->schedule(picked, ReadyInsts); - NumToSchedule--; } - assert(NumToSchedule == 0 && "could not schedule all instructions"); + + // Check that we didn't break any of our invariants. +#ifdef EXPENSIVE_CHECKS + BS->verify(); +#endif + +#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS) + // Check that all schedulable entities got scheduled + for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd; I = I->getNextNode()) { + BS->doForAllOpcodes(I, [&](ScheduleData *SD) { + if (SD->isSchedulingEntity() && SD->hasValidDependencies()) { + assert(SD->IsScheduled && "must be scheduled at this point"); + } + }); + } +#endif // Avoid duplicate scheduling of the block. BS->ScheduleStart = nullptr; @@ -7865,11 +9605,8 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) { // If V is a store, just return the width of the stored value (or value // truncated just before storing) without traversing the expression tree. // This is the common case. - if (auto *Store = dyn_cast(V)) { - if (auto *Trunc = dyn_cast(Store->getValueOperand())) - return DL->getTypeSizeInBits(Trunc->getSrcTy()); + if (auto *Store = dyn_cast(V)) return DL->getTypeSizeInBits(Store->getValueOperand()->getType()); - } if (auto *IEI = dyn_cast(V)) return getVectorElementSize(IEI->getOperand(1)); @@ -8271,6 +10008,8 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, // Scan the blocks in the function in post order. for (auto BB : post_order(&F.getEntryBlock())) { + // Start new block - clear the list of reduction roots. + R.clearReductionData(); collectSeedInstructions(BB); // Vectorize trees that end at stores. @@ -8301,11 +10040,10 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, } bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, - unsigned Idx) { + unsigned Idx, unsigned MinVF) { LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << Chain.size() << "\n"); const unsigned Sz = R.getVectorElementSize(Chain[0]); - const unsigned MinVF = R.getMinVecRegSize() / Sz; unsigned VF = Chain.size(); if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF) @@ -8444,9 +10182,15 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef Stores, unsigned EltSize = R.getVectorElementSize(Operands[0]); unsigned MaxElts = llvm::PowerOf2Floor(MaxVecRegSize / EltSize); - unsigned MinVF = R.getMinVF(EltSize); unsigned MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store), MaxElts); + auto *Store = cast(Operands[0]); + Type *StoreTy = Store->getValueOperand()->getType(); + Type *ValueTy = StoreTy; + if (auto *Trunc = dyn_cast(Store->getValueOperand())) + ValueTy = Trunc->getSrcTy(); + unsigned MinVF = TTI->getStoreMinimumVF( + R.getMinVF(DL->getTypeSizeInBits(ValueTy)), StoreTy, ValueTy); // FIXME: Is division-by-2 the correct step? Should we assert that the // register size is a power-of-2? @@ -8456,7 +10200,7 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef Stores, ArrayRef Slice = makeArrayRef(Operands).slice(Cnt, Size); if (!VectorizedStores.count(Slice.front()) && !VectorizedStores.count(Slice.back()) && - vectorizeStoreChain(Slice, R, Cnt)) { + vectorizeStoreChain(Slice, R, Cnt, MinVF)) { // Mark the vectorized stores so that we don't vectorize them again. VectorizedStores.insert(Slice.begin(), Slice.end()); Changed = true; @@ -8516,6 +10260,8 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) { bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { if (!A || !B) return false; + if (isa(A) || isa(B)) + return false; Value *VL[] = {A, B}; return tryToVectorizeList(VL, R); } @@ -8658,7 +10404,8 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) { if (!I) return false; - if (!isa(I) && !isa(I)) + if ((!isa(I) && !isa(I)) || + isa(I->getType())) return false; Value *P = I->getParent(); @@ -8669,32 +10416,40 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) { if (!Op0 || !Op1 || Op0->getParent() != P || Op1->getParent() != P) return false; - // Try to vectorize V. - if (tryToVectorizePair(Op0, Op1, R)) - return true; + // First collect all possible candidates + SmallVector, 4> Candidates; + Candidates.emplace_back(Op0, Op1); auto *A = dyn_cast(Op0); auto *B = dyn_cast(Op1); // Try to skip B. - if (B && B->hasOneUse()) { + if (A && B && B->hasOneUse()) { auto *B0 = dyn_cast(B->getOperand(0)); auto *B1 = dyn_cast(B->getOperand(1)); - if (B0 && B0->getParent() == P && tryToVectorizePair(A, B0, R)) - return true; - if (B1 && B1->getParent() == P && tryToVectorizePair(A, B1, R)) - return true; + if (B0 && B0->getParent() == P) + Candidates.emplace_back(A, B0); + if (B1 && B1->getParent() == P) + Candidates.emplace_back(A, B1); } - // Try to skip A. - if (A && A->hasOneUse()) { + if (B && A && A->hasOneUse()) { auto *A0 = dyn_cast(A->getOperand(0)); auto *A1 = dyn_cast(A->getOperand(1)); - if (A0 && A0->getParent() == P && tryToVectorizePair(A0, B, R)) - return true; - if (A1 && A1->getParent() == P && tryToVectorizePair(A1, B, R)) - return true; + if (A0 && A0->getParent() == P) + Candidates.emplace_back(A0, B); + if (A1 && A1->getParent() == P) + Candidates.emplace_back(A1, B); } - return false; + + if (Candidates.size() == 1) + return tryToVectorizePair(Op0, Op1, R); + + // We have multiple options. Try to pick the single best. + Optional BestCandidate = R.findBestRootPair(Candidates); + if (!BestCandidate) + return false; + return tryToVectorizePair(Candidates[*BestCandidate].first, + Candidates[*BestCandidate].second, R); } namespace { @@ -8729,15 +10484,16 @@ class HorizontalReduction { using ReductionOpsType = SmallVector; using ReductionOpsListType = SmallVector; ReductionOpsListType ReductionOps; - SmallVector ReducedVals; + /// List of possibly reduced values. + SmallVector> ReducedVals; + /// Maps reduced value to the corresponding reduction operation. + DenseMap> ReducedValsToOps; // Use map vector to make stable output. MapVector ExtraArgs; WeakTrackingVH ReductionRoot; /// The type of reduction operation. RecurKind RdxKind; - const unsigned INVALID_OPERAND_INDEX = std::numeric_limits::max(); - static bool isCmpSelMinMax(Instruction *I) { return match(I, m_Select(m_Cmp(), m_Value(), m_Value())) && RecurrenceDescriptor::isMinMaxRecurrenceKind(getRdxKind(I)); @@ -8781,26 +10537,6 @@ class HorizontalReduction { return I->getOperand(Index); } - /// Checks if the ParentStackElem.first should be marked as a reduction - /// operation with an extra argument or as extra argument itself. - void markExtraArg(std::pair &ParentStackElem, - Value *ExtraArg) { - if (ExtraArgs.count(ParentStackElem.first)) { - ExtraArgs[ParentStackElem.first] = nullptr; - // We ran into something like: - // ParentStackElem.first = ExtraArgs[ParentStackElem.first] + ExtraArg. - // The whole ParentStackElem.first should be considered as an extra value - // in this case. - // Do not perform analysis of remaining operands of ParentStackElem.first - // instruction, this whole instruction is an extra argument. - ParentStackElem.second = INVALID_OPERAND_INDEX; - } else { - // We ran into something like: - // ParentStackElem.first += ... + ExtraArg + ... - ExtraArgs[ParentStackElem.first] = ExtraArg; - } - } - /// Creates reduction operation with the current opcode. static Value *createOp(IRBuilder<> &Builder, RecurKind Kind, Value *LHS, Value *RHS, const Twine &Name, bool UseSelect) { @@ -8859,7 +10595,7 @@ class HorizontalReduction { } /// Creates reduction operation with the current opcode with the IR flags - /// from \p ReductionOps. + /// from \p ReductionOps, dropping nuw/nsw flags. static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS, Value *RHS, const Twine &Name, const ReductionOpsListType &ReductionOps) { @@ -8873,31 +10609,21 @@ class HorizontalReduction { Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, UseSelect); if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) { if (auto *Sel = dyn_cast(Op)) { - propagateIRFlags(Sel->getCondition(), ReductionOps[0]); - propagateIRFlags(Op, ReductionOps[1]); + propagateIRFlags(Sel->getCondition(), ReductionOps[0], nullptr, + /*IncludeWrapFlags=*/false); + propagateIRFlags(Op, ReductionOps[1], nullptr, + /*IncludeWrapFlags=*/false); return Op; } } - propagateIRFlags(Op, ReductionOps[0]); - return Op; - } - - /// Creates reduction operation with the current opcode with the IR flags - /// from \p I. - static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS, - Value *RHS, const Twine &Name, Instruction *I) { - auto *SelI = dyn_cast(I); - Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, SelI != nullptr); - if (SelI && RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) { - if (auto *Sel = dyn_cast(Op)) - propagateIRFlags(Sel->getCondition(), SelI->getCondition()); - } - propagateIRFlags(Op, I); + propagateIRFlags(Op, ReductionOps[0], nullptr, /*IncludeWrapFlags=*/false); return Op; } - static RecurKind getRdxKind(Instruction *I) { - assert(I && "Expected instruction for reduction matching"); + static RecurKind getRdxKind(Value *V) { + auto *I = dyn_cast(V); + if (!I) + return RecurKind::None; if (match(I, m_Add(m_Value(), m_Value()))) return RecurKind::Add; if (match(I, m_Mul(m_Value(), m_Value()))) @@ -9059,7 +10785,9 @@ public: HorizontalReduction() = default; /// Try to find a reduction tree. - bool matchAssociativeReduction(PHINode *Phi, Instruction *Inst) { + bool matchAssociativeReduction(PHINode *Phi, Instruction *Inst, + ScalarEvolution &SE, const DataLayout &DL, + const TargetLibraryInfo &TLI) { assert((!Phi || is_contained(Phi->operands(), Inst)) && "Phi needs to use the binary operator"); assert((isa(Inst) || isa(Inst) || @@ -9103,124 +10831,178 @@ public: ReductionRoot = Inst; - // The opcode for leaf values that we perform a reduction on. - // For example: load(x) + load(y) + load(z) + fptoui(w) - // The leaf opcode for 'w' does not match, so we don't include it as a - // potential candidate for the reduction. - unsigned LeafOpcode = 0; - - // Post-order traverse the reduction tree starting at Inst. We only handle - // true trees containing binary operators or selects. - SmallVector, 32> Stack; - Stack.push_back(std::make_pair(Inst, getFirstOperandIndex(Inst))); - initReductionOps(Inst); - while (!Stack.empty()) { - Instruction *TreeN = Stack.back().first; - unsigned EdgeToVisit = Stack.back().second++; - const RecurKind TreeRdxKind = getRdxKind(TreeN); - bool IsReducedValue = TreeRdxKind != RdxKind; - - // Postorder visit. - if (IsReducedValue || EdgeToVisit >= getNumberOfOperands(TreeN)) { - if (IsReducedValue) - ReducedVals.push_back(TreeN); - else { - auto ExtraArgsIter = ExtraArgs.find(TreeN); - if (ExtraArgsIter != ExtraArgs.end() && !ExtraArgsIter->second) { - // Check if TreeN is an extra argument of its parent operation. - if (Stack.size() <= 1) { - // TreeN can't be an extra argument as it is a root reduction - // operation. - return false; - } - // Yes, TreeN is an extra argument, do not add it to a list of - // reduction operations. - // Stack[Stack.size() - 2] always points to the parent operation. - markExtraArg(Stack[Stack.size() - 2], TreeN); - ExtraArgs.erase(TreeN); - } else - addReductionOps(TreeN); - } - // Retract. - Stack.pop_back(); - continue; - } - - // Visit operands. - Value *EdgeVal = getRdxOperand(TreeN, EdgeToVisit); - auto *EdgeInst = dyn_cast(EdgeVal); - if (!EdgeInst) { - // Edge value is not a reduction instruction or a leaf instruction. - // (It may be a constant, function argument, or something else.) - markExtraArg(Stack.back(), EdgeVal); - continue; + // Iterate through all the operands of the possible reduction tree and + // gather all the reduced values, sorting them by their value id. + BasicBlock *BB = Inst->getParent(); + bool IsCmpSelMinMax = isCmpSelMinMax(Inst); + SmallVector Worklist(1, Inst); + // Checks if the operands of the \p TreeN instruction are also reduction + // operations or should be treated as reduced values or an extra argument, + // which is not part of the reduction. + auto &&CheckOperands = [this, IsCmpSelMinMax, + BB](Instruction *TreeN, + SmallVectorImpl &ExtraArgs, + SmallVectorImpl &PossibleReducedVals, + SmallVectorImpl &ReductionOps) { + for (int I = getFirstOperandIndex(TreeN), + End = getNumberOfOperands(TreeN); + I < End; ++I) { + Value *EdgeVal = getRdxOperand(TreeN, I); + ReducedValsToOps[EdgeVal].push_back(TreeN); + auto *EdgeInst = dyn_cast(EdgeVal); + // Edge has wrong parent - mark as an extra argument. + if (EdgeInst && !isVectorLikeInstWithConstOps(EdgeInst) && + !hasSameParent(EdgeInst, BB)) { + ExtraArgs.push_back(EdgeVal); + continue; + } + // If the edge is not an instruction, or it is different from the main + // reduction opcode or has too many uses - possible reduced value. + if (!EdgeInst || getRdxKind(EdgeInst) != RdxKind || + IsCmpSelMinMax != isCmpSelMinMax(EdgeInst) || + !hasRequiredNumberOfUses(IsCmpSelMinMax, EdgeInst) || + !isVectorizable(getRdxKind(EdgeInst), EdgeInst)) { + PossibleReducedVals.push_back(EdgeVal); + continue; + } + ReductionOps.push_back(EdgeInst); } - RecurKind EdgeRdxKind = getRdxKind(EdgeInst); - // Continue analysis if the next operand is a reduction operation or - // (possibly) a leaf value. If the leaf value opcode is not set, - // the first met operation != reduction operation is considered as the - // leaf opcode. - // Only handle trees in the current basic block. - // Each tree node needs to have minimal number of users except for the - // ultimate reduction. - const bool IsRdxInst = EdgeRdxKind == RdxKind; - if (EdgeInst != Phi && EdgeInst != Inst && - hasSameParent(EdgeInst, Inst->getParent()) && - hasRequiredNumberOfUses(isCmpSelMinMax(Inst), EdgeInst) && - (!LeafOpcode || LeafOpcode == EdgeInst->getOpcode() || IsRdxInst)) { - if (IsRdxInst) { - // We need to be able to reassociate the reduction operations. - if (!isVectorizable(EdgeRdxKind, EdgeInst)) { - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), EdgeInst); - continue; - } - } else if (!LeafOpcode) { - LeafOpcode = EdgeInst->getOpcode(); + }; + // Try to regroup reduced values so that it gets more profitable to try to + // reduce them. Values are grouped by their value ids, instructions - by + // instruction op id and/or alternate op id, plus do extra analysis for + // loads (grouping them by the distabce between pointers) and cmp + // instructions (grouping them by the predicate). + MapVector>> + PossibleReducedVals; + initReductionOps(Inst); + while (!Worklist.empty()) { + Instruction *TreeN = Worklist.pop_back_val(); + SmallVector Args; + SmallVector PossibleRedVals; + SmallVector PossibleReductionOps; + CheckOperands(TreeN, Args, PossibleRedVals, PossibleReductionOps); + // If too many extra args - mark the instruction itself as a reduction + // value, not a reduction operation. + if (Args.size() < 2) { + addReductionOps(TreeN); + // Add extra args. + if (!Args.empty()) { + assert(Args.size() == 1 && "Expected only single argument."); + ExtraArgs[TreeN] = Args.front(); } - Stack.push_back( - std::make_pair(EdgeInst, getFirstOperandIndex(EdgeInst))); - continue; + // Add reduction values. The values are sorted for better vectorization + // results. + for (Value *V : PossibleRedVals) { + size_t Key, Idx; + std::tie(Key, Idx) = generateKeySubkey( + V, &TLI, + [&PossibleReducedVals, &DL, &SE](size_t Key, LoadInst *LI) { + auto It = PossibleReducedVals.find(Key); + if (It != PossibleReducedVals.end()) { + for (const auto &LoadData : It->second) { + auto *RLI = cast(LoadData.second.front().first); + if (getPointersDiff(RLI->getType(), + RLI->getPointerOperand(), LI->getType(), + LI->getPointerOperand(), DL, SE, + /*StrictCheck=*/true)) + return hash_value(RLI->getPointerOperand()); + } + } + return hash_value(LI->getPointerOperand()); + }, + /*AllowAlternate=*/false); + ++PossibleReducedVals[Key][Idx] + .insert(std::make_pair(V, 0)) + .first->second; + } + Worklist.append(PossibleReductionOps.rbegin(), + PossibleReductionOps.rend()); + } else { + size_t Key, Idx; + std::tie(Key, Idx) = generateKeySubkey( + TreeN, &TLI, + [&PossibleReducedVals, &DL, &SE](size_t Key, LoadInst *LI) { + auto It = PossibleReducedVals.find(Key); + if (It != PossibleReducedVals.end()) { + for (const auto &LoadData : It->second) { + auto *RLI = cast(LoadData.second.front().first); + if (getPointersDiff(RLI->getType(), RLI->getPointerOperand(), + LI->getType(), LI->getPointerOperand(), + DL, SE, /*StrictCheck=*/true)) + return hash_value(RLI->getPointerOperand()); + } + } + return hash_value(LI->getPointerOperand()); + }, + /*AllowAlternate=*/false); + ++PossibleReducedVals[Key][Idx] + .insert(std::make_pair(TreeN, 0)) + .first->second; + } + } + auto PossibleReducedValsVect = PossibleReducedVals.takeVector(); + // Sort values by the total number of values kinds to start the reduction + // from the longest possible reduced values sequences. + for (auto &PossibleReducedVals : PossibleReducedValsVect) { + auto PossibleRedVals = PossibleReducedVals.second.takeVector(); + SmallVector> PossibleRedValsVect; + for (auto It = PossibleRedVals.begin(), E = PossibleRedVals.end(); + It != E; ++It) { + PossibleRedValsVect.emplace_back(); + auto RedValsVect = It->second.takeVector(); + stable_sort(RedValsVect, [](const auto &P1, const auto &P2) { + return P1.second < P2.second; + }); + for (const std::pair &Data : RedValsVect) + PossibleRedValsVect.back().append(Data.second, Data.first); } - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), EdgeInst); - } + stable_sort(PossibleRedValsVect, [](const auto &P1, const auto &P2) { + return P1.size() > P2.size(); + }); + ReducedVals.emplace_back(); + for (ArrayRef Data : PossibleRedValsVect) + ReducedVals.back().append(Data.rbegin(), Data.rend()); + } + // Sort the reduced values by number of same/alternate opcode and/or pointer + // operand. + stable_sort(ReducedVals, [](ArrayRef P1, ArrayRef P2) { + return P1.size() > P2.size(); + }); return true; } /// Attempt to vectorize the tree found by matchAssociativeReduction. Value *tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) { + constexpr int ReductionLimit = 4; + constexpr unsigned RegMaxNumber = 4; + constexpr unsigned RedValsMaxNumber = 128; // If there are a sufficient number of reduction values, reduce // to a nearby power-of-2. We can safely generate oversized // vectors and rely on the backend to split them to legal sizes. - unsigned NumReducedVals = ReducedVals.size(); - if (NumReducedVals < 4) + unsigned NumReducedVals = std::accumulate( + ReducedVals.begin(), ReducedVals.end(), 0, + [](int Num, ArrayRef Vals) { return Num + Vals.size(); }); + if (NumReducedVals < ReductionLimit) return nullptr; - // Intersect the fast-math-flags from all reduction operations. - FastMathFlags RdxFMF; - RdxFMF.set(); - for (ReductionOpsType &RdxOp : ReductionOps) { - for (Value *RdxVal : RdxOp) { - if (auto *FPMO = dyn_cast(RdxVal)) - RdxFMF &= FPMO->getFastMathFlags(); - } - } - IRBuilder<> Builder(cast(ReductionRoot)); - Builder.setFastMathFlags(RdxFMF); + // Track the reduced values in case if they are replaced by extractelement + // because of the vectorization. + DenseMap TrackedVals; BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues; // The same extra argument may be used several times, so log each attempt // to use it. for (const std::pair &Pair : ExtraArgs) { assert(Pair.first && "DebugLoc must be set."); ExternallyUsedValues[Pair.second].push_back(Pair.first); + TrackedVals.try_emplace(Pair.second, Pair.second); } // The compare instruction of a min/max is the insertion point for new // instructions and may be replaced with a new compare instruction. - auto getCmpForMinMaxReduction = [](Instruction *RdxRootInst) { + auto &&GetCmpForMinMaxReduction = [](Instruction *RdxRootInst) { assert(isa(RdxRootInst) && "Expected min/max reduction to have select root instruction"); Value *ScalarCond = cast(RdxRootInst)->getCondition(); @@ -9232,164 +11014,390 @@ public: // The reduction root is used as the insertion point for new instructions, // so set it as externally used to prevent it from being deleted. ExternallyUsedValues[ReductionRoot]; - SmallVector IgnoreList; - for (ReductionOpsType &RdxOp : ReductionOps) - IgnoreList.append(RdxOp.begin(), RdxOp.end()); - - unsigned ReduxWidth = PowerOf2Floor(NumReducedVals); - if (NumReducedVals > ReduxWidth) { - // In the loop below, we are building a tree based on a window of - // 'ReduxWidth' values. - // If the operands of those values have common traits (compare predicate, - // constant operand, etc), then we want to group those together to - // minimize the cost of the reduction. - - // TODO: This should be extended to count common operands for - // compares and binops. - - // Step 1: Count the number of times each compare predicate occurs. - SmallDenseMap PredCountMap; - for (Value *RdxVal : ReducedVals) { - CmpInst::Predicate Pred; - if (match(RdxVal, m_Cmp(Pred, m_Value(), m_Value()))) - ++PredCountMap[Pred]; - } - // Step 2: Sort the values so the most common predicates come first. - stable_sort(ReducedVals, [&PredCountMap](Value *A, Value *B) { - CmpInst::Predicate PredA, PredB; - if (match(A, m_Cmp(PredA, m_Value(), m_Value())) && - match(B, m_Cmp(PredB, m_Value(), m_Value()))) { - return PredCountMap[PredA] > PredCountMap[PredB]; - } - return false; - }); - } + SmallDenseSet IgnoreList; + for (ReductionOpsType &RdxOps : ReductionOps) + for (Value *RdxOp : RdxOps) { + if (!RdxOp) + continue; + IgnoreList.insert(RdxOp); + } + bool IsCmpSelMinMax = isCmpSelMinMax(cast(ReductionRoot)); + + // Need to track reduced vals, they may be changed during vectorization of + // subvectors. + for (ArrayRef Candidates : ReducedVals) + for (Value *V : Candidates) + TrackedVals.try_emplace(V, V); + DenseMap VectorizedVals; Value *VectorizedTree = nullptr; - unsigned i = 0; - while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) { - ArrayRef VL(&ReducedVals[i], ReduxWidth); - V.buildTree(VL, IgnoreList); - if (V.isTreeTinyAndNotFullyVectorizable(/*ForReduction=*/true)) - break; - if (V.isLoadCombineReductionCandidate(RdxKind)) - break; - V.reorderTopToBottom(); - V.reorderBottomToTop(/*IgnoreReorder=*/true); - V.buildExternalUses(ExternallyUsedValues); - - // For a poison-safe boolean logic reduction, do not replace select - // instructions with logic ops. All reduced values will be frozen (see - // below) to prevent leaking poison. - if (isa(ReductionRoot) && - isBoolLogicOp(cast(ReductionRoot)) && - NumReducedVals != ReduxWidth) - break; + bool CheckForReusedReductionOps = false; + // Try to vectorize elements based on their type. + for (unsigned I = 0, E = ReducedVals.size(); I < E; ++I) { + ArrayRef OrigReducedVals = ReducedVals[I]; + InstructionsState S = getSameOpcode(OrigReducedVals); + SmallVector Candidates; + DenseMap TrackedToOrig; + for (unsigned Cnt = 0, Sz = OrigReducedVals.size(); Cnt < Sz; ++Cnt) { + Value *RdxVal = TrackedVals.find(OrigReducedVals[Cnt])->second; + // Check if the reduction value was not overriden by the extractelement + // instruction because of the vectorization and exclude it, if it is not + // compatible with other values. + if (auto *Inst = dyn_cast(RdxVal)) + if (isVectorLikeInstWithConstOps(Inst) && + (!S.getOpcode() || !S.isOpcodeOrAlt(Inst))) + continue; + Candidates.push_back(RdxVal); + TrackedToOrig.try_emplace(RdxVal, OrigReducedVals[Cnt]); + } + bool ShuffledExtracts = false; + // Try to handle shuffled extractelements. + if (S.getOpcode() == Instruction::ExtractElement && !S.isAltShuffle() && + I + 1 < E) { + InstructionsState NextS = getSameOpcode(ReducedVals[I + 1]); + if (NextS.getOpcode() == Instruction::ExtractElement && + !NextS.isAltShuffle()) { + SmallVector CommonCandidates(Candidates); + for (Value *RV : ReducedVals[I + 1]) { + Value *RdxVal = TrackedVals.find(RV)->second; + // Check if the reduction value was not overriden by the + // extractelement instruction because of the vectorization and + // exclude it, if it is not compatible with other values. + if (auto *Inst = dyn_cast(RdxVal)) + if (!NextS.getOpcode() || !NextS.isOpcodeOrAlt(Inst)) + continue; + CommonCandidates.push_back(RdxVal); + TrackedToOrig.try_emplace(RdxVal, RV); + } + SmallVector Mask; + if (isFixedVectorShuffle(CommonCandidates, Mask)) { + ++I; + Candidates.swap(CommonCandidates); + ShuffledExtracts = true; + } + } + } + unsigned NumReducedVals = Candidates.size(); + if (NumReducedVals < ReductionLimit) + continue; - V.computeMinimumValueSizes(); + unsigned MaxVecRegSize = V.getMaxVecRegSize(); + unsigned EltSize = V.getVectorElementSize(Candidates[0]); + unsigned MaxElts = RegMaxNumber * PowerOf2Floor(MaxVecRegSize / EltSize); + + unsigned ReduxWidth = std::min( + PowerOf2Floor(NumReducedVals), std::max(RedValsMaxNumber, MaxElts)); + unsigned Start = 0; + unsigned Pos = Start; + // Restarts vectorization attempt with lower vector factor. + unsigned PrevReduxWidth = ReduxWidth; + bool CheckForReusedReductionOpsLocal = false; + auto &&AdjustReducedVals = [&Pos, &Start, &ReduxWidth, NumReducedVals, + &CheckForReusedReductionOpsLocal, + &PrevReduxWidth, &V, + &IgnoreList](bool IgnoreVL = false) { + bool IsAnyRedOpGathered = !IgnoreVL && V.isAnyGathered(IgnoreList); + if (!CheckForReusedReductionOpsLocal && PrevReduxWidth == ReduxWidth) { + // Check if any of the reduction ops are gathered. If so, worth + // trying again with less number of reduction ops. + CheckForReusedReductionOpsLocal |= IsAnyRedOpGathered; + } + ++Pos; + if (Pos < NumReducedVals - ReduxWidth + 1) + return IsAnyRedOpGathered; + Pos = Start; + ReduxWidth /= 2; + return IsAnyRedOpGathered; + }; + while (Pos < NumReducedVals - ReduxWidth + 1 && + ReduxWidth >= ReductionLimit) { + // Dependency in tree of the reduction ops - drop this attempt, try + // later. + if (CheckForReusedReductionOpsLocal && PrevReduxWidth != ReduxWidth && + Start == 0) { + CheckForReusedReductionOps = true; + break; + } + PrevReduxWidth = ReduxWidth; + ArrayRef VL(std::next(Candidates.begin(), Pos), ReduxWidth); + // Beeing analyzed already - skip. + if (V.areAnalyzedReductionVals(VL)) { + (void)AdjustReducedVals(/*IgnoreVL=*/true); + continue; + } + // Early exit if any of the reduction values were deleted during + // previous vectorization attempts. + if (any_of(VL, [&V](Value *RedVal) { + auto *RedValI = dyn_cast(RedVal); + if (!RedValI) + return false; + return V.isDeleted(RedValI); + })) + break; + V.buildTree(VL, IgnoreList); + if (V.isTreeTinyAndNotFullyVectorizable(/*ForReduction=*/true)) { + if (!AdjustReducedVals()) + V.analyzedReductionVals(VL); + continue; + } + if (V.isLoadCombineReductionCandidate(RdxKind)) { + if (!AdjustReducedVals()) + V.analyzedReductionVals(VL); + continue; + } + V.reorderTopToBottom(); + // No need to reorder the root node at all. + V.reorderBottomToTop(/*IgnoreReorder=*/true); + // Keep extracted other reduction values, if they are used in the + // vectorization trees. + BoUpSLP::ExtraValueToDebugLocsMap LocalExternallyUsedValues( + ExternallyUsedValues); + for (unsigned Cnt = 0, Sz = ReducedVals.size(); Cnt < Sz; ++Cnt) { + if (Cnt == I || (ShuffledExtracts && Cnt == I - 1)) + continue; + for_each(ReducedVals[Cnt], + [&LocalExternallyUsedValues, &TrackedVals](Value *V) { + if (isa(V)) + LocalExternallyUsedValues[TrackedVals[V]]; + }); + } + // Number of uses of the candidates in the vector of values. + SmallDenseMap NumUses; + for (unsigned Cnt = 0; Cnt < Pos; ++Cnt) { + Value *V = Candidates[Cnt]; + if (NumUses.count(V) > 0) + continue; + NumUses[V] = std::count(VL.begin(), VL.end(), V); + } + for (unsigned Cnt = Pos + ReduxWidth; Cnt < NumReducedVals; ++Cnt) { + Value *V = Candidates[Cnt]; + if (NumUses.count(V) > 0) + continue; + NumUses[V] = std::count(VL.begin(), VL.end(), V); + } + // Gather externally used values. + SmallPtrSet Visited; + for (unsigned Cnt = 0; Cnt < Pos; ++Cnt) { + Value *V = Candidates[Cnt]; + if (!Visited.insert(V).second) + continue; + unsigned NumOps = VectorizedVals.lookup(V) + NumUses[V]; + if (NumOps != ReducedValsToOps.find(V)->second.size()) + LocalExternallyUsedValues[V]; + } + for (unsigned Cnt = Pos + ReduxWidth; Cnt < NumReducedVals; ++Cnt) { + Value *V = Candidates[Cnt]; + if (!Visited.insert(V).second) + continue; + unsigned NumOps = VectorizedVals.lookup(V) + NumUses[V]; + if (NumOps != ReducedValsToOps.find(V)->second.size()) + LocalExternallyUsedValues[V]; + } + V.buildExternalUses(LocalExternallyUsedValues); + + V.computeMinimumValueSizes(); + + // Intersect the fast-math-flags from all reduction operations. + FastMathFlags RdxFMF; + RdxFMF.set(); + for (Value *U : IgnoreList) + if (auto *FPMO = dyn_cast(U)) + RdxFMF &= FPMO->getFastMathFlags(); + // Estimate cost. + InstructionCost TreeCost = V.getTreeCost(VL); + InstructionCost ReductionCost = + getReductionCost(TTI, VL, ReduxWidth, RdxFMF); + InstructionCost Cost = TreeCost + ReductionCost; + if (!Cost.isValid()) { + LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n"); + return nullptr; + } + if (Cost >= -SLPCostThreshold) { + V.getORE()->emit([&]() { + return OptimizationRemarkMissed( + SV_NAME, "HorSLPNotBeneficial", + ReducedValsToOps.find(VL[0])->second.front()) + << "Vectorizing horizontal reduction is possible" + << "but not beneficial with cost " << ore::NV("Cost", Cost) + << " and threshold " + << ore::NV("Threshold", -SLPCostThreshold); + }); + if (!AdjustReducedVals()) + V.analyzedReductionVals(VL); + continue; + } - // Estimate cost. - InstructionCost TreeCost = - V.getTreeCost(makeArrayRef(&ReducedVals[i], ReduxWidth)); - InstructionCost ReductionCost = - getReductionCost(TTI, ReducedVals[i], ReduxWidth, RdxFMF); - InstructionCost Cost = TreeCost + ReductionCost; - if (!Cost.isValid()) { - LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n"); - return nullptr; - } - if (Cost >= -SLPCostThreshold) { + LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" + << Cost << ". (HorRdx)\n"); V.getORE()->emit([&]() { - return OptimizationRemarkMissed(SV_NAME, "HorSLPNotBeneficial", - cast(VL[0])) - << "Vectorizing horizontal reduction is possible" - << "but not beneficial with cost " << ore::NV("Cost", Cost) - << " and threshold " - << ore::NV("Threshold", -SLPCostThreshold); + return OptimizationRemark( + SV_NAME, "VectorizedHorizontalReduction", + ReducedValsToOps.find(VL[0])->second.front()) + << "Vectorized horizontal reduction with cost " + << ore::NV("Cost", Cost) << " and with tree size " + << ore::NV("TreeSize", V.getTreeSize()); }); - break; - } - LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" - << Cost << ". (HorRdx)\n"); - V.getORE()->emit([&]() { - return OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction", - cast(VL[0])) - << "Vectorized horizontal reduction with cost " - << ore::NV("Cost", Cost) << " and with tree size " - << ore::NV("TreeSize", V.getTreeSize()); - }); + Builder.setFastMathFlags(RdxFMF); - // Vectorize a tree. - DebugLoc Loc = cast(ReducedVals[i])->getDebugLoc(); - Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues); + // Vectorize a tree. + Value *VectorizedRoot = V.vectorizeTree(LocalExternallyUsedValues); - // Emit a reduction. If the root is a select (min/max idiom), the insert - // point is the compare condition of that select. - Instruction *RdxRootInst = cast(ReductionRoot); - if (isCmpSelMinMax(RdxRootInst)) - Builder.SetInsertPoint(getCmpForMinMaxReduction(RdxRootInst)); - else - Builder.SetInsertPoint(RdxRootInst); + // Emit a reduction. If the root is a select (min/max idiom), the insert + // point is the compare condition of that select. + Instruction *RdxRootInst = cast(ReductionRoot); + if (IsCmpSelMinMax) + Builder.SetInsertPoint(GetCmpForMinMaxReduction(RdxRootInst)); + else + Builder.SetInsertPoint(RdxRootInst); - // To prevent poison from leaking across what used to be sequential, safe, - // scalar boolean logic operations, the reduction operand must be frozen. - if (isa(RdxRootInst) && isBoolLogicOp(RdxRootInst)) - VectorizedRoot = Builder.CreateFreeze(VectorizedRoot); + // To prevent poison from leaking across what used to be sequential, + // safe, scalar boolean logic operations, the reduction operand must be + // frozen. + if (isa(RdxRootInst) && isBoolLogicOp(RdxRootInst)) + VectorizedRoot = Builder.CreateFreeze(VectorizedRoot); - Value *ReducedSubTree = - emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI); + Value *ReducedSubTree = + emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI); - if (!VectorizedTree) { - // Initialize the final value in the reduction. - VectorizedTree = ReducedSubTree; - } else { - // Update the final value in the reduction. - Builder.SetCurrentDebugLocation(Loc); - VectorizedTree = createOp(Builder, RdxKind, VectorizedTree, - ReducedSubTree, "op.rdx", ReductionOps); + if (!VectorizedTree) { + // Initialize the final value in the reduction. + VectorizedTree = ReducedSubTree; + } else { + // Update the final value in the reduction. + Builder.SetCurrentDebugLocation( + cast(ReductionOps.front().front())->getDebugLoc()); + VectorizedTree = createOp(Builder, RdxKind, VectorizedTree, + ReducedSubTree, "op.rdx", ReductionOps); + } + // Count vectorized reduced values to exclude them from final reduction. + for (Value *V : VL) + ++VectorizedVals.try_emplace(TrackedToOrig.find(V)->second, 0) + .first->getSecond(); + Pos += ReduxWidth; + Start = Pos; + ReduxWidth = PowerOf2Floor(NumReducedVals - Pos); } - i += ReduxWidth; - ReduxWidth = PowerOf2Floor(NumReducedVals - i); } - if (VectorizedTree) { // Finish the reduction. - for (; i < NumReducedVals; ++i) { - auto *I = cast(ReducedVals[i]); - Builder.SetCurrentDebugLocation(I->getDebugLoc()); - VectorizedTree = - createOp(Builder, RdxKind, VectorizedTree, I, "", ReductionOps); + // Need to add extra arguments and not vectorized possible reduction + // values. + // Try to avoid dependencies between the scalar remainders after + // reductions. + auto &&FinalGen = + [this, &Builder, + &TrackedVals](ArrayRef> InstVals) { + unsigned Sz = InstVals.size(); + SmallVector> ExtraReds(Sz / 2 + + Sz % 2); + for (unsigned I = 0, E = (Sz / 2) * 2; I < E; I += 2) { + Instruction *RedOp = InstVals[I + 1].first; + Builder.SetCurrentDebugLocation(RedOp->getDebugLoc()); + Value *RdxVal1 = InstVals[I].second; + Value *StableRdxVal1 = RdxVal1; + auto It1 = TrackedVals.find(RdxVal1); + if (It1 != TrackedVals.end()) + StableRdxVal1 = It1->second; + Value *RdxVal2 = InstVals[I + 1].second; + Value *StableRdxVal2 = RdxVal2; + auto It2 = TrackedVals.find(RdxVal2); + if (It2 != TrackedVals.end()) + StableRdxVal2 = It2->second; + Value *ExtraRed = createOp(Builder, RdxKind, StableRdxVal1, + StableRdxVal2, "op.rdx", ReductionOps); + ExtraReds[I / 2] = std::make_pair(InstVals[I].first, ExtraRed); + } + if (Sz % 2 == 1) + ExtraReds[Sz / 2] = InstVals.back(); + return ExtraReds; + }; + SmallVector> ExtraReductions; + SmallPtrSet Visited; + for (ArrayRef Candidates : ReducedVals) { + for (Value *RdxVal : Candidates) { + if (!Visited.insert(RdxVal).second) + continue; + unsigned NumOps = VectorizedVals.lookup(RdxVal); + for (Instruction *RedOp : + makeArrayRef(ReducedValsToOps.find(RdxVal)->second) + .drop_back(NumOps)) + ExtraReductions.emplace_back(RedOp, RdxVal); + } } for (auto &Pair : ExternallyUsedValues) { // Add each externally used value to the final reduction. - for (auto *I : Pair.second) { - Builder.SetCurrentDebugLocation(I->getDebugLoc()); - VectorizedTree = createOp(Builder, RdxKind, VectorizedTree, - Pair.first, "op.extra", I); - } + for (auto *I : Pair.second) + ExtraReductions.emplace_back(I, Pair.first); + } + // Iterate through all not-vectorized reduction values/extra arguments. + while (ExtraReductions.size() > 1) { + SmallVector> NewReds = + FinalGen(ExtraReductions); + ExtraReductions.swap(NewReds); + } + // Final reduction. + if (ExtraReductions.size() == 1) { + Instruction *RedOp = ExtraReductions.back().first; + Builder.SetCurrentDebugLocation(RedOp->getDebugLoc()); + Value *RdxVal = ExtraReductions.back().second; + Value *StableRdxVal = RdxVal; + auto It = TrackedVals.find(RdxVal); + if (It != TrackedVals.end()) + StableRdxVal = It->second; + VectorizedTree = createOp(Builder, RdxKind, VectorizedTree, + StableRdxVal, "op.rdx", ReductionOps); } ReductionRoot->replaceAllUsesWith(VectorizedTree); - // Mark all scalar reduction ops for deletion, they are replaced by the - // vector reductions. - V.eraseInstructions(IgnoreList); + // The original scalar reduction is expected to have no remaining + // uses outside the reduction tree itself. Assert that we got this + // correct, replace internal uses with undef, and mark for eventual + // deletion. +#ifndef NDEBUG + SmallSet IgnoreSet; + for (ArrayRef RdxOps : ReductionOps) + IgnoreSet.insert(RdxOps.begin(), RdxOps.end()); +#endif + for (ArrayRef RdxOps : ReductionOps) { + for (Value *Ignore : RdxOps) { + if (!Ignore) + continue; +#ifndef NDEBUG + for (auto *U : Ignore->users()) { + assert(IgnoreSet.count(U) && + "All users must be either in the reduction ops list."); + } +#endif + if (!Ignore->use_empty()) { + Value *Undef = UndefValue::get(Ignore->getType()); + Ignore->replaceAllUsesWith(Undef); + } + V.eraseInstruction(cast(Ignore)); + } + } + } else if (!CheckForReusedReductionOps) { + for (ReductionOpsType &RdxOps : ReductionOps) + for (Value *RdxOp : RdxOps) + V.analyzedReductionRoot(cast(RdxOp)); } return VectorizedTree; } - unsigned numReductionValues() const { return ReducedVals.size(); } - private: /// Calculate the cost of a reduction. InstructionCost getReductionCost(TargetTransformInfo *TTI, - Value *FirstReducedVal, unsigned ReduxWidth, - FastMathFlags FMF) { + ArrayRef ReducedVals, + unsigned ReduxWidth, FastMathFlags FMF) { TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + Value *FirstReducedVal = ReducedVals.front(); Type *ScalarTy = FirstReducedVal->getType(); FixedVectorType *VectorTy = FixedVectorType::get(ScalarTy, ReduxWidth); - InstructionCost VectorCost, ScalarCost; + InstructionCost VectorCost = 0, ScalarCost; + // If all of the reduced values are constant, the vector cost is 0, since + // the reduction value can be calculated at the compile time. + bool AllConsts = all_of(ReducedVals, isConstant); switch (RdxKind) { case RecurKind::Add: case RecurKind::Mul: @@ -9399,17 +11407,22 @@ private: case RecurKind::FAdd: case RecurKind::FMul: { unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(RdxKind); - VectorCost = - TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, FMF, CostKind); + if (!AllConsts) + VectorCost = + TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, FMF, CostKind); ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy, CostKind); break; } case RecurKind::FMax: case RecurKind::FMin: { auto *SclCondTy = CmpInst::makeCmpResultType(ScalarTy); - auto *VecCondTy = cast(CmpInst::makeCmpResultType(VectorTy)); - VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy, - /*IsUnsigned=*/false, CostKind); + if (!AllConsts) { + auto *VecCondTy = + cast(CmpInst::makeCmpResultType(VectorTy)); + VectorCost = + TTI->getMinMaxReductionCost(VectorTy, VecCondTy, + /*IsUnsigned=*/false, CostKind); + } CmpInst::Predicate RdxPred = getMinMaxReductionPredicate(RdxKind); ScalarCost = TTI->getCmpSelInstrCost(Instruction::FCmp, ScalarTy, SclCondTy, RdxPred, CostKind) + @@ -9422,11 +11435,14 @@ private: case RecurKind::UMax: case RecurKind::UMin: { auto *SclCondTy = CmpInst::makeCmpResultType(ScalarTy); - auto *VecCondTy = cast(CmpInst::makeCmpResultType(VectorTy)); - bool IsUnsigned = - RdxKind == RecurKind::UMax || RdxKind == RecurKind::UMin; - VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy, IsUnsigned, - CostKind); + if (!AllConsts) { + auto *VecCondTy = + cast(CmpInst::makeCmpResultType(VectorTy)); + bool IsUnsigned = + RdxKind == RecurKind::UMax || RdxKind == RecurKind::UMin; + VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy, + IsUnsigned, CostKind); + } CmpInst::Predicate RdxPred = getMinMaxReductionPredicate(RdxKind); ScalarCost = TTI->getCmpSelInstrCost(Instruction::ICmp, ScalarTy, SclCondTy, RdxPred, CostKind) + @@ -9490,21 +11506,22 @@ static Optional getAggregateSize(Instruction *InsertInst) { } while (true); } -static bool findBuildAggregate_rec(Instruction *LastInsertInst, +static void findBuildAggregate_rec(Instruction *LastInsertInst, TargetTransformInfo *TTI, SmallVectorImpl &BuildVectorOpds, SmallVectorImpl &InsertElts, unsigned OperandOffset) { do { Value *InsertedOperand = LastInsertInst->getOperand(1); - Optional OperandIndex = getInsertIndex(LastInsertInst, OperandOffset); + Optional OperandIndex = + getInsertIndex(LastInsertInst, OperandOffset); if (!OperandIndex) - return false; + return; if (isa(InsertedOperand) || isa(InsertedOperand)) { - if (!findBuildAggregate_rec(cast(InsertedOperand), TTI, - BuildVectorOpds, InsertElts, *OperandIndex)) - return false; + findBuildAggregate_rec(cast(InsertedOperand), TTI, + BuildVectorOpds, InsertElts, *OperandIndex); + } else { BuildVectorOpds[*OperandIndex] = InsertedOperand; InsertElts[*OperandIndex] = LastInsertInst; @@ -9514,7 +11531,6 @@ static bool findBuildAggregate_rec(Instruction *LastInsertInst, (isa(LastInsertInst) || isa(LastInsertInst)) && LastInsertInst->hasOneUse()); - return true; } /// Recognize construction of vectors like @@ -9549,13 +11565,11 @@ static bool findBuildAggregate(Instruction *LastInsertInst, BuildVectorOpds.resize(*AggregateSize); InsertElts.resize(*AggregateSize); - if (findBuildAggregate_rec(LastInsertInst, TTI, BuildVectorOpds, InsertElts, - 0)) { - llvm::erase_value(BuildVectorOpds, nullptr); - llvm::erase_value(InsertElts, nullptr); - if (BuildVectorOpds.size() >= 2) - return true; - } + findBuildAggregate_rec(LastInsertInst, TTI, BuildVectorOpds, InsertElts, 0); + llvm::erase_value(BuildVectorOpds, nullptr); + llvm::erase_value(InsertElts, nullptr); + if (BuildVectorOpds.size() >= 2) + return true; return false; } @@ -9642,7 +11656,8 @@ static bool matchRdxBop(Instruction *I, Value *&V0, Value *&V1) { /// performed. static bool tryToVectorizeHorReductionOrInstOperands( PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R, - TargetTransformInfo *TTI, + TargetTransformInfo *TTI, ScalarEvolution &SE, const DataLayout &DL, + const TargetLibraryInfo &TLI, const function_ref Vectorize) { if (!ShouldVectorizeHor) return false; @@ -9661,7 +11676,7 @@ static bool tryToVectorizeHorReductionOrInstOperands( // horizontal reduction. // Interrupt the process if the Root instruction itself was vectorized or all // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized. - // Skip the analysis of CmpInsts.Compiler implements postanalysis of the + // Skip the analysis of CmpInsts. Compiler implements postanalysis of the // CmpInsts so we can skip extra attempts in // tryToVectorizeHorReductionOrInstOperands and save compile time. std::queue> Stack; @@ -9669,13 +11684,16 @@ static bool tryToVectorizeHorReductionOrInstOperands( SmallPtrSet VisitedInstrs; SmallVector PostponedInsts; bool Res = false; - auto &&TryToReduce = [TTI, &P, &R](Instruction *Inst, Value *&B0, - Value *&B1) -> Value * { + auto &&TryToReduce = [TTI, &SE, &DL, &P, &R, &TLI](Instruction *Inst, + Value *&B0, + Value *&B1) -> Value * { + if (R.isAnalyzedReductionRoot(Inst)) + return nullptr; bool IsBinop = matchRdxBop(Inst, B0, B1); bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value())); if (IsBinop || IsSelect) { HorizontalReduction HorRdx; - if (HorRdx.matchAssociativeReduction(P, Inst)) + if (HorRdx.matchAssociativeReduction(P, Inst, SE, DL, TLI)) return HorRdx.tryToReduce(R, TTI); } return nullptr; @@ -9720,7 +11738,7 @@ static bool tryToVectorizeHorReductionOrInstOperands( // Do not try to vectorize CmpInst operands, this is done separately. // Final attempt for binop args vectorization should happen after the loop // to try to find reductions. - if (!isa(Inst)) + if (!isa(Inst)) PostponedInsts.push_back(Inst); } @@ -9733,8 +11751,8 @@ static bool tryToVectorizeHorReductionOrInstOperands( if (auto *I = dyn_cast(Op)) // Do not try to vectorize CmpInst operands, this is done // separately. - if (!isa(I) && !isa(I) && !R.isDeleted(I) && - I->getParent() == BB) + if (!isa(I) && + !R.isDeleted(I) && I->getParent() == BB) Stack.emplace(I, Level); } // Try to vectorized binops where reductions were not found. @@ -9758,8 +11776,8 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V, auto &&ExtraVectorization = [this](Instruction *I, BoUpSLP &R) -> bool { return tryToVectorize(I, R); }; - return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI, - ExtraVectorization); + return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI, *SE, *DL, + *TLI, ExtraVectorization); } bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI, @@ -9927,12 +11945,16 @@ bool SLPVectorizerPass::vectorizeSimpleInstructions( for (auto *I : reverse(Instructions)) { if (R.isDeleted(I)) continue; - if (auto *LastInsertValue = dyn_cast(I)) + if (auto *LastInsertValue = dyn_cast(I)) { OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R); - else if (auto *LastInsertElem = dyn_cast(I)) + } else if (auto *LastInsertElem = dyn_cast(I)) { OpsChanged |= vectorizeInsertElementInst(LastInsertElem, BB, R); - else if (isa(I)) + } else if (isa(I)) { PostponedCmps.push_back(I); + continue; + } + // Try to find reductions in buildvector sequnces. + OpsChanged |= vectorizeRootInstruction(nullptr, I, BB, R, TTI); } if (AtTerminator) { // Try to find reductions first. @@ -10350,7 +12372,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) { DomTreeNodeBase *NodeI2 = DT->getNode(I2->getParent()); assert(NodeI1 && "Should only process reachable instructions"); - assert(NodeI1 && "Should only process reachable instructions"); + assert(NodeI2 && "Should only process reachable instructions"); assert((NodeI1 == NodeI2) == (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) && "Different nodes should have different DFS numbers"); diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 8822c0004eb2..97f2b1a93815 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -72,17 +72,17 @@ class VPRecipeBuilder { VPRecipeBase *tryToWidenMemory(Instruction *I, ArrayRef Operands, VFRange &Range, VPlanPtr &Plan); - /// Check if an induction recipe should be constructed for \I. If so build and - /// return it. If not, return null. - VPWidenIntOrFpInductionRecipe * - tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef Operands, - VFRange &Range) const; + /// Check if an induction recipe should be constructed for \p Phi. If so build + /// and return it. If not, return null. + VPRecipeBase *tryToOptimizeInductionPHI(PHINode *Phi, + ArrayRef Operands, + VPlan &Plan, VFRange &Range); /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. VPWidenIntOrFpInductionRecipe * tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef Operands, - VFRange &Range, VPlan &Plan) const; + VFRange &Range, VPlan &Plan); /// Handle non-loop phi nodes. Return a VPValue, if all incoming values match /// or a new VPBlendRecipe otherwise. Currently all such phi nodes are turned diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 342d4a074e10..4d709097c306 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -23,11 +23,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" -#include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" @@ -35,13 +34,13 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GenericDomTreeConstruction.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include -#include #include #include @@ -60,7 +59,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) { } #endif -Value *VPLane::getAsRuntimeExpr(IRBuilder<> &Builder, +Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const { switch (LaneKind) { case VPLane::Kind::ScalableLast: @@ -158,25 +157,25 @@ void VPBlockBase::setPlan(VPlan *ParentPlan) { } /// \return the VPBasicBlock that is the exit of Block, possibly indirectly. -const VPBasicBlock *VPBlockBase::getExitBasicBlock() const { +const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const { const VPBlockBase *Block = this; while (const VPRegionBlock *Region = dyn_cast(Block)) - Block = Region->getExit(); + Block = Region->getExiting(); return cast(Block); } -VPBasicBlock *VPBlockBase::getExitBasicBlock() { +VPBasicBlock *VPBlockBase::getExitingBasicBlock() { VPBlockBase *Block = this; while (VPRegionBlock *Region = dyn_cast(Block)) - Block = Region->getExit(); + Block = Region->getExiting(); return cast(Block); } VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { if (!Successors.empty() || !Parent) return this; - assert(Parent->getExit() == this && - "Block w/o successors not the exit of its parent."); + assert(Parent->getExiting() == this && + "Block w/o successors not the exiting block of its parent."); return Parent->getEnclosingBlockWithSuccessors(); } @@ -188,28 +187,6 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { return Parent->getEnclosingBlockWithPredecessors(); } -VPValue *VPBlockBase::getCondBit() { - return CondBitUser.getSingleOperandOrNull(); -} - -const VPValue *VPBlockBase::getCondBit() const { - return CondBitUser.getSingleOperandOrNull(); -} - -void VPBlockBase::setCondBit(VPValue *CV) { CondBitUser.resetSingleOpUser(CV); } - -VPValue *VPBlockBase::getPredicate() { - return PredicateUser.getSingleOperandOrNull(); -} - -const VPValue *VPBlockBase::getPredicate() const { - return PredicateUser.getSingleOperandOrNull(); -} - -void VPBlockBase::setPredicate(VPValue *CV) { - PredicateUser.resetSingleOpUser(CV); -} - void VPBlockBase::deleteCFG(VPBlockBase *Entry) { SmallVector Blocks(depth_first(Entry)); @@ -245,6 +222,52 @@ Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) { // set(Def, Extract, Instance); return Extract; } +BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) { + VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion(); + return VPBB2IRBB[LoopRegion->getPreheaderVPBB()]; +} + +void VPTransformState::addNewMetadata(Instruction *To, + const Instruction *Orig) { + // If the loop was versioned with memchecks, add the corresponding no-alias + // metadata. + if (LVer && (isa(Orig) || isa(Orig))) + LVer->annotateInstWithNoAlias(To, Orig); +} + +void VPTransformState::addMetadata(Instruction *To, Instruction *From) { + propagateMetadata(To, From); + addNewMetadata(To, From); +} + +void VPTransformState::addMetadata(ArrayRef To, Instruction *From) { + for (Value *V : To) { + if (Instruction *I = dyn_cast(V)) + addMetadata(I, From); + } +} + +void VPTransformState::setDebugLocFromInst(const Value *V) { + if (const Instruction *Inst = dyn_cast_or_null(V)) { + const DILocation *DIL = Inst->getDebugLoc(); + + // When a FSDiscriminator is enabled, we don't need to add the multiply + // factors to the discriminators. + if (DIL && Inst->getFunction()->isDebugInfoForProfiling() && + !isa(Inst) && !EnableFSDiscriminator) { + // FIXME: For scalable vectors, assume vscale=1. + auto NewDIL = + DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue()); + if (NewDIL) + Builder.SetCurrentDebugLocation(*NewDIL); + else + LLVM_DEBUG(dbgs() << "Failed to create new discriminator: " + << DIL->getFilename() << " Line: " << DIL->getLine()); + } else + Builder.SetCurrentDebugLocation(DIL); + } else + Builder.SetCurrentDebugLocation(DebugLoc()); +} BasicBlock * VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { @@ -252,43 +275,36 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { // Pred stands for Predessor. Prev stands for Previous - last visited/created. BasicBlock *PrevBB = CFG.PrevBB; BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(), - PrevBB->getParent(), CFG.LastBB); + PrevBB->getParent(), CFG.ExitBB); LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); // Hook up the new basic block to its predecessors. for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { - VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock(); - auto &PredVPSuccessors = PredVPBB->getSuccessors(); + VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock(); + auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; - // In outer loop vectorization scenario, the predecessor BBlock may not yet - // be visited(backedge). Mark the VPBasicBlock for fixup at the end of - // vectorization. We do not encounter this case in inner loop vectorization - // as we start out by building a loop skeleton with the vector loop header - // and latch blocks. As a result, we never enter this function for the - // header block in the non VPlan-native path. - if (!PredBB) { - assert(EnableVPlanNativePath && - "Unexpected null predecessor in non VPlan-native path"); - CFG.VPBBsToFix.push_back(PredVPBB); - continue; - } - assert(PredBB && "Predecessor basic-block not found building successor."); auto *PredBBTerminator = PredBB->getTerminator(); LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); + + auto *TermBr = dyn_cast(PredBBTerminator); if (isa(PredBBTerminator)) { assert(PredVPSuccessors.size() == 1 && "Predecessor ending w/o branch must have single successor."); + DebugLoc DL = PredBBTerminator->getDebugLoc(); PredBBTerminator->eraseFromParent(); - BranchInst::Create(NewBB, PredBB); + auto *Br = BranchInst::Create(NewBB, PredBB); + Br->setDebugLoc(DL); + } else if (TermBr && !TermBr->isConditional()) { + TermBr->setSuccessor(0, NewBB); } else { - assert(PredVPSuccessors.size() == 2 && - "Predecessor ending with branch must have two successors."); + // Set each forward successor here when it is created, excluding + // backedges. A backward successor is set when the branch is created. unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; - assert(!PredBBTerminator->getSuccessor(idx) && + assert(!TermBr->getSuccessor(idx) && "Trying to reset an existing successor block."); - PredBBTerminator->setSuccessor(idx, NewBB); + TermBr->setSuccessor(idx, NewBB); } } return NewBB; @@ -300,27 +316,51 @@ void VPBasicBlock::execute(VPTransformState *State) { VPBlockBase *SingleHPred = nullptr; BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. - // 1. Create an IR basic block, or reuse the last one if possible. - // The last IR basic block is reused, as an optimization, in three cases: - // A. the first VPBB reuses the loop header BB - when PrevVPBB is null; - // B. when the current VPBB has a single (hierarchical) predecessor which - // is PrevVPBB and the latter has a single (hierarchical) successor; and - // C. when the current VPBB is an entry of a region replica - where PrevVPBB - // is the exit of this region from a previous instance, or the predecessor - // of this region. - if (PrevVPBB && /* A */ - !((SingleHPred = getSingleHierarchicalPredecessor()) && - SingleHPred->getExitBasicBlock() == PrevVPBB && - PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */ - !(Replica && getPredecessors().empty())) { /* C */ + auto IsLoopRegion = [](VPBlockBase *BB) { + auto *R = dyn_cast(BB); + return R && !R->isReplicator(); + }; + + // 1. Create an IR basic block, or reuse the last one or ExitBB if possible. + if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) { + // ExitBB can be re-used for the exit block of the Plan. + NewBB = State->CFG.ExitBB; + State->CFG.PrevBB = NewBB; + + // Update the branch instruction in the predecessor to branch to ExitBB. + VPBlockBase *PredVPB = getSingleHierarchicalPredecessor(); + VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock(); + assert(PredVPB->getSingleSuccessor() == this && + "predecessor must have the current block as only successor"); + BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB]; + // The Exit block of a loop is always set to be successor 0 of the Exiting + // block. + cast(ExitingBB->getTerminator())->setSuccessor(0, NewBB); + } else if (PrevVPBB && /* A */ + !((SingleHPred = getSingleHierarchicalPredecessor()) && + SingleHPred->getExitingBasicBlock() == PrevVPBB && + PrevVPBB->getSingleHierarchicalSuccessor() && + (SingleHPred->getParent() == getEnclosingLoopRegion() && + !IsLoopRegion(SingleHPred))) && /* B */ + !(Replica && getPredecessors().empty())) { /* C */ + // The last IR basic block is reused, as an optimization, in three cases: + // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null; + // B. when the current VPBB has a single (hierarchical) predecessor which + // is PrevVPBB and the latter has a single (hierarchical) successor which + // both are in the same non-replicator region; and + // C. when the current VPBB is an entry of a region replica - where PrevVPBB + // is the exiting VPBB of this region from a previous instance, or the + // predecessor of this region. + NewBB = createEmptyBasicBlock(State->CFG); State->Builder.SetInsertPoint(NewBB); // Temporarily terminate with unreachable until CFG is rewired. UnreachableInst *Terminator = State->Builder.CreateUnreachable(); + // Register NewBB in its loop. In innermost loops its the same for all + // BB's. + if (State->CurrentVectorLoop) + State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI); State->Builder.SetInsertPoint(Terminator); - // Register NewBB in its loop. In innermost loops its the same for all BB's. - Loop *L = State->LI->getLoopFor(State->CFG.LastBB); - L->addBasicBlockToLoop(NewBB, *State->LI); State->CFG.PrevBB = NewBB; } @@ -334,29 +374,6 @@ void VPBasicBlock::execute(VPTransformState *State) { for (VPRecipeBase &Recipe : Recipes) Recipe.execute(*State); - VPValue *CBV; - if (EnableVPlanNativePath && (CBV = getCondBit())) { - assert(CBV->getUnderlyingValue() && - "Unexpected null underlying value for condition bit"); - - // Condition bit value in a VPBasicBlock is used as the branch selector. In - // the VPlan-native path case, since all branches are uniform we generate a - // branch instruction using the condition value from vector lane 0 and dummy - // successors. The successors are fixed later when the successor blocks are - // visited. - Value *NewCond = State->get(CBV, {0, 0}); - - // Replace the temporary unreachable terminator with the new conditional - // branch. - auto *CurrentTerminator = NewBB->getTerminator(); - assert(isa(CurrentTerminator) && - "Expected to replace unreachable terminator with conditional " - "branch."); - auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond); - CondBr->setSuccessor(0, nullptr); - ReplaceInstWithInst(CurrentTerminator, CondBr); - } - LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB); } @@ -395,6 +412,61 @@ VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) { return SplitBlock; } +VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() { + VPRegionBlock *P = getParent(); + if (P && P->isReplicator()) { + P = P->getParent(); + assert(!cast(P)->isReplicator() && + "unexpected nested replicate regions"); + } + return P; +} + +static bool hasConditionalTerminator(const VPBasicBlock *VPBB) { + if (VPBB->empty()) { + assert( + VPBB->getNumSuccessors() < 2 && + "block with multiple successors doesn't have a recipe as terminator"); + return false; + } + + const VPRecipeBase *R = &VPBB->back(); + auto *VPI = dyn_cast(R); + bool IsCondBranch = + isa(R) || + (VPI && (VPI->getOpcode() == VPInstruction::BranchOnCond || + VPI->getOpcode() == VPInstruction::BranchOnCount)); + (void)IsCondBranch; + + if (VPBB->getNumSuccessors() >= 2 || VPBB->isExiting()) { + assert(IsCondBranch && "block with multiple successors not terminated by " + "conditional branch recipe"); + + return true; + } + + assert( + !IsCondBranch && + "block with 0 or 1 successors terminated by conditional branch recipe"); + return false; +} + +VPRecipeBase *VPBasicBlock::getTerminator() { + if (hasConditionalTerminator(this)) + return &back(); + return nullptr; +} + +const VPRecipeBase *VPBasicBlock::getTerminator() const { + if (hasConditionalTerminator(this)) + return &back(); + return nullptr; +} + +bool VPBasicBlock::isExiting() const { + return getParent()->getExitingBasicBlock() == this; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const { if (getSuccessors().empty()) { @@ -411,13 +483,6 @@ void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const { void VPBasicBlock::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << getName() << ":\n"; - if (const VPValue *Pred = getPredicate()) { - O << Indent << "BlockPredicate:"; - Pred->printAsOperand(O, SlotTracker); - if (const auto *PredInst = dyn_cast(Pred)) - O << " (" << PredInst->getParent()->getName() << ")"; - O << '\n'; - } auto RecipeIndent = Indent + " "; for (const VPRecipeBase &Recipe : *this) { @@ -426,14 +491,6 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent, } printSuccessors(O, Indent); - - if (const VPValue *CBV = getCondBit()) { - O << Indent << "CondBit: "; - CBV->printAsOperand(O, SlotTracker); - if (const auto *CBI = dyn_cast(CBV)) - O << " (" << CBI->getParent()->getName() << ")"; - O << '\n'; - } } #endif @@ -448,25 +505,26 @@ void VPRegionBlock::execute(VPTransformState *State) { ReversePostOrderTraversal RPOT(Entry); if (!isReplicator()) { + // Create and register the new vector loop. + Loop *PrevLoop = State->CurrentVectorLoop; + State->CurrentVectorLoop = State->LI->AllocateLoop(); + BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()]; + Loop *ParentLoop = State->LI->getLoopFor(VectorPH); + + // Insert the new loop into the loop nest and register the new basic blocks + // before calling any utilities such as SCEV that require valid LoopInfo. + if (ParentLoop) + ParentLoop->addChildLoop(State->CurrentVectorLoop); + else + State->LI->addTopLevelLoop(State->CurrentVectorLoop); + // Visit the VPBlocks connected to "this", starting from it. for (VPBlockBase *Block : RPOT) { - if (EnableVPlanNativePath) { - // The inner loop vectorization path does not represent loop preheader - // and exit blocks as part of the VPlan. In the VPlan-native path, skip - // vectorizing loop preheader block. In future, we may replace this - // check with the check for loop preheader. - if (Block->getNumPredecessors() == 0) - continue; - - // Skip vectorizing loop exit block. In future, we may replace this - // check with the check for loop exit. - if (Block->getNumSuccessors() == 0) - continue; - } - LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); Block->execute(State); } + + State->CurrentVectorLoop = PrevLoop; return; } @@ -508,341 +566,32 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, } #endif -bool VPRecipeBase::mayWriteToMemory() const { - switch (getVPDefID()) { - case VPWidenMemoryInstructionSC: { - return cast(this)->isStore(); - } - case VPReplicateSC: - case VPWidenCallSC: - return cast(getVPSingleValue()->getUnderlyingValue()) - ->mayWriteToMemory(); - case VPBranchOnMaskSC: - return false; - case VPWidenIntOrFpInductionSC: - case VPWidenCanonicalIVSC: - case VPWidenPHISC: - case VPBlendSC: - case VPWidenSC: - case VPWidenGEPSC: - case VPReductionSC: - case VPWidenSelectSC: { - const Instruction *I = - dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); - (void)I; - assert((!I || !I->mayWriteToMemory()) && - "underlying instruction may write to memory"); - return false; - } - default: - return true; - } -} - -bool VPRecipeBase::mayReadFromMemory() const { - switch (getVPDefID()) { - case VPWidenMemoryInstructionSC: { - return !cast(this)->isStore(); - } - case VPReplicateSC: - case VPWidenCallSC: - return cast(getVPSingleValue()->getUnderlyingValue()) - ->mayReadFromMemory(); - case VPBranchOnMaskSC: - return false; - case VPWidenIntOrFpInductionSC: - case VPWidenCanonicalIVSC: - case VPWidenPHISC: - case VPBlendSC: - case VPWidenSC: - case VPWidenGEPSC: - case VPReductionSC: - case VPWidenSelectSC: { - const Instruction *I = - dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); - (void)I; - assert((!I || !I->mayReadFromMemory()) && - "underlying instruction may read from memory"); - return false; - } - default: - return true; - } -} - -bool VPRecipeBase::mayHaveSideEffects() const { - switch (getVPDefID()) { - case VPBranchOnMaskSC: - return false; - case VPWidenIntOrFpInductionSC: - case VPWidenCanonicalIVSC: - case VPWidenPHISC: - case VPBlendSC: - case VPWidenSC: - case VPWidenGEPSC: - case VPReductionSC: - case VPWidenSelectSC: { - const Instruction *I = - dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); - (void)I; - assert((!I || !I->mayHaveSideEffects()) && - "underlying instruction has side-effects"); - return false; - } - case VPReplicateSC: { - auto *R = cast(this); - return R->getUnderlyingInstr()->mayHaveSideEffects(); - } - default: - return true; - } -} - -void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { - assert(!Parent && "Recipe already in some VPBasicBlock"); - assert(InsertPos->getParent() && - "Insertion position not in any VPBasicBlock"); - Parent = InsertPos->getParent(); - Parent->getRecipeList().insert(InsertPos->getIterator(), this); -} - -void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { - assert(!Parent && "Recipe already in some VPBasicBlock"); - assert(InsertPos->getParent() && - "Insertion position not in any VPBasicBlock"); - Parent = InsertPos->getParent(); - Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); -} - -void VPRecipeBase::removeFromParent() { - assert(getParent() && "Recipe not in any VPBasicBlock"); - getParent()->getRecipeList().remove(getIterator()); - Parent = nullptr; -} - -iplist::iterator VPRecipeBase::eraseFromParent() { - assert(getParent() && "Recipe not in any VPBasicBlock"); - return getParent()->getRecipeList().erase(getIterator()); -} - -void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { - removeFromParent(); - insertAfter(InsertPos); -} - -void VPRecipeBase::moveBefore(VPBasicBlock &BB, - iplist::iterator I) { - assert(I == BB.end() || I->getParent() == &BB); - removeFromParent(); - Parent = &BB; - BB.getRecipeList().insert(I, this); -} - -void VPInstruction::generateInstruction(VPTransformState &State, - unsigned Part) { - IRBuilder<> &Builder = State.Builder; - Builder.SetCurrentDebugLocation(DL); - - if (Instruction::isBinaryOp(getOpcode())) { - Value *A = State.get(getOperand(0), Part); - Value *B = State.get(getOperand(1), Part); - Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B); - State.set(this, V, Part); - return; - } - - switch (getOpcode()) { - case VPInstruction::Not: { - Value *A = State.get(getOperand(0), Part); - Value *V = Builder.CreateNot(A); - State.set(this, V, Part); - break; - } - case VPInstruction::ICmpULE: { - Value *IV = State.get(getOperand(0), Part); - Value *TC = State.get(getOperand(1), Part); - Value *V = Builder.CreateICmpULE(IV, TC); - State.set(this, V, Part); - break; - } - case Instruction::Select: { - Value *Cond = State.get(getOperand(0), Part); - Value *Op1 = State.get(getOperand(1), Part); - Value *Op2 = State.get(getOperand(2), Part); - Value *V = Builder.CreateSelect(Cond, Op1, Op2); - State.set(this, V, Part); - break; - } - case VPInstruction::ActiveLaneMask: { - // Get first lane of vector induction variable. - Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); - // Get the original loop tripcount. - Value *ScalarTC = State.get(getOperand(1), Part); - - auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); - auto *PredTy = VectorType::get(Int1Ty, State.VF); - Instruction *Call = Builder.CreateIntrinsic( - Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, - {VIVElem0, ScalarTC}, nullptr, "active.lane.mask"); - State.set(this, Call, Part); - break; - } - case VPInstruction::FirstOrderRecurrenceSplice: { - // Generate code to combine the previous and current values in vector v3. - // - // vector.ph: - // v_init = vector(..., ..., ..., a[-1]) - // br vector.body - // - // vector.body - // i = phi [0, vector.ph], [i+4, vector.body] - // v1 = phi [v_init, vector.ph], [v2, vector.body] - // v2 = a[i, i+1, i+2, i+3]; - // v3 = vector(v1(3), v2(0, 1, 2)) - - // For the first part, use the recurrence phi (v1), otherwise v2. - auto *V1 = State.get(getOperand(0), 0); - Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); - if (!PartMinus1->getType()->isVectorTy()) { - State.set(this, PartMinus1, Part); - } else { - Value *V2 = State.get(getOperand(1), Part); - State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part); - } - break; - } - - case VPInstruction::CanonicalIVIncrement: - case VPInstruction::CanonicalIVIncrementNUW: { - Value *Next = nullptr; - if (Part == 0) { - bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW; - auto *Phi = State.get(getOperand(0), 0); - // The loop step is equal to the vectorization factor (num of SIMD - // elements) times the unroll factor (num of SIMD instructions). - Value *Step = - createStepForVF(Builder, Phi->getType(), State.VF, State.UF); - Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false); - } else { - Next = State.get(this, 0); - } - - State.set(this, Next, Part); - break; - } - case VPInstruction::BranchOnCount: { - if (Part != 0) - break; - // First create the compare. - Value *IV = State.get(getOperand(0), Part); - Value *TC = State.get(getOperand(1), Part); - Value *Cond = Builder.CreateICmpEQ(IV, TC); - - // Now create the branch. - auto *Plan = getParent()->getPlan(); - VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); - VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); - if (Header->empty()) { - assert(EnableVPlanNativePath && - "empty entry block only expected in VPlanNativePath"); - Header = cast(Header->getSingleSuccessor()); +void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, + Value *CanonicalIVStartValue, + VPTransformState &State, + bool IsEpilogueVectorization) { + + VPBasicBlock *ExitingVPBB = getVectorLoopRegion()->getExitingBasicBlock(); + auto *Term = dyn_cast(&ExitingVPBB->back()); + // Try to simplify BranchOnCount to 'BranchOnCond true' if TC <= VF * UF when + // preparing to execute the plan for the main vector loop. + if (!IsEpilogueVectorization && Term && + Term->getOpcode() == VPInstruction::BranchOnCount && + isa(TripCountV)) { + ConstantInt *C = cast(TripCountV); + uint64_t TCVal = C->getZExtValue(); + if (TCVal && TCVal <= State.VF.getKnownMinValue() * State.UF) { + auto *BOC = + new VPInstruction(VPInstruction::BranchOnCond, + {getOrAddExternalDef(State.Builder.getTrue())}); + Term->eraseFromParent(); + ExitingVPBB->appendRecipe(BOC); + // TODO: Further simplifications are possible + // 1. Replace inductions with constants. + // 2. Replace vector loop region with VPBasicBlock. } - // TODO: Once the exit block is modeled in VPlan, use it instead of going - // through State.CFG.LastBB. - BasicBlock *Exit = - cast(State.CFG.LastBB->getTerminator())->getSuccessor(0); - - Builder.CreateCondBr(Cond, Exit, State.CFG.VPBB2IRBB[Header]); - Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); - break; - } - default: - llvm_unreachable("Unsupported opcode for instruction"); - } -} - -void VPInstruction::execute(VPTransformState &State) { - assert(!State.Instance && "VPInstruction executing an Instance"); - IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); - State.Builder.setFastMathFlags(FMF); - for (unsigned Part = 0; Part < State.UF; ++Part) - generateInstruction(State, Part); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPInstruction::dump() const { - VPSlotTracker SlotTracker(getParent()->getPlan()); - print(dbgs(), "", SlotTracker); -} - -void VPInstruction::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "EMIT "; - - if (hasResult()) { - printAsOperand(O, SlotTracker); - O << " = "; - } - - switch (getOpcode()) { - case VPInstruction::Not: - O << "not"; - break; - case VPInstruction::ICmpULE: - O << "icmp ule"; - break; - case VPInstruction::SLPLoad: - O << "combined load"; - break; - case VPInstruction::SLPStore: - O << "combined store"; - break; - case VPInstruction::ActiveLaneMask: - O << "active lane mask"; - break; - case VPInstruction::FirstOrderRecurrenceSplice: - O << "first-order splice"; - break; - case VPInstruction::CanonicalIVIncrement: - O << "VF * UF + "; - break; - case VPInstruction::CanonicalIVIncrementNUW: - O << "VF * UF +(nuw) "; - break; - case VPInstruction::BranchOnCount: - O << "branch-on-count "; - break; - default: - O << Instruction::getOpcodeName(getOpcode()); - } - - O << FMF; - - for (const VPValue *Operand : operands()) { - O << " "; - Operand->printAsOperand(O, SlotTracker); } - if (DL) { - O << ", !dbg "; - DL.print(O); - } -} -#endif - -void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { - // Make sure the VPInstruction is a floating-point operation. - assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul || - Opcode == Instruction::FNeg || Opcode == Instruction::FSub || - Opcode == Instruction::FDiv || Opcode == Instruction::FRem || - Opcode == Instruction::FCmp) && - "this op can't take fast-math flags"); - FMF = FMFNew; -} - -void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, - Value *CanonicalIVStartValue, - VPTransformState &State) { // Check if the trip count is needed, and if so build it. if (TripCount && TripCount->getNumUsers()) { for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) @@ -868,111 +617,78 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, // When vectorizing the epilogue loop, the canonical induction start value // needs to be changed from zero to the value after the main vector loop. if (CanonicalIVStartValue) { - VPValue *VPV = new VPValue(CanonicalIVStartValue); - addExternalDef(VPV); + VPValue *VPV = getOrAddExternalDef(CanonicalIVStartValue); auto *IV = getCanonicalIV(); assert(all_of(IV->users(), [](const VPUser *U) { + if (isa(U)) + return true; auto *VPI = cast(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && - "the canonical IV should only be used by its increments when " + "the canonical IV should only be used by its increments or " + "ScalarIVSteps when " "resetting the start value"); IV->setOperand(0, VPV); } } -/// Generate the code inside the body of the vectorized loop. Assumes a single -/// LoopVectorBody basic-block was created for this. Introduce additional -/// basic-blocks as needed, and fill them all. +/// Generate the code inside the preheader and body of the vectorized loop. +/// Assumes a single pre-header basic-block was created for this. Introduce +/// additional basic-blocks as needed, and fill them all. void VPlan::execute(VPTransformState *State) { - // 0. Set the reverse mapping from VPValues to Values for code generation. + // Set the reverse mapping from VPValues to Values for code generation. for (auto &Entry : Value2VPValue) State->VPValue2Value[Entry.second] = Entry.first; - BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB; - State->CFG.VectorPreHeader = VectorPreHeaderBB; - BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor(); - assert(VectorHeaderBB && "Loop preheader does not have a single successor."); - - // 1. Make room to generate basic-blocks inside loop body if needed. - BasicBlock *VectorLatchBB = VectorHeaderBB->splitBasicBlock( - VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch"); - Loop *L = State->LI->getLoopFor(VectorHeaderBB); - L->addBasicBlockToLoop(VectorLatchBB, *State->LI); - // Remove the edge between Header and Latch to allow other connections. - // Temporarily terminate with unreachable until CFG is rewired. - // Note: this asserts the generated code's assumption that - // getFirstInsertionPt() can be dereferenced into an Instruction. - VectorHeaderBB->getTerminator()->eraseFromParent(); - State->Builder.SetInsertPoint(VectorHeaderBB); - UnreachableInst *Terminator = State->Builder.CreateUnreachable(); - State->Builder.SetInsertPoint(Terminator); - - // 2. Generate code in loop body. + // Initialize CFG state. State->CFG.PrevVPBB = nullptr; - State->CFG.PrevBB = VectorHeaderBB; - State->CFG.LastBB = VectorLatchBB; + State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor(); + BasicBlock *VectorPreHeader = State->CFG.PrevBB; + State->Builder.SetInsertPoint(VectorPreHeader->getTerminator()); + // Generate code in the loop pre-header and body. for (VPBlockBase *Block : depth_first(Entry)) Block->execute(State); - // Setup branch terminator successors for VPBBs in VPBBsToFix based on - // VPBB's successors. - for (auto VPBB : State->CFG.VPBBsToFix) { - assert(EnableVPlanNativePath && - "Unexpected VPBBsToFix in non VPlan-native path"); - BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB]; - assert(BB && "Unexpected null basic block for VPBB"); - - unsigned Idx = 0; - auto *BBTerminator = BB->getTerminator(); - - for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) { - VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock(); - BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]); - ++Idx; - } - } - - // 3. Merge the temporary latch created with the last basic-block filled. - BasicBlock *LastBB = State->CFG.PrevBB; - assert(isa(LastBB->getTerminator()) && - "Expected VPlan CFG to terminate with branch"); - - // Move both the branch and check from LastBB to VectorLatchBB. - auto *LastBranch = cast(LastBB->getTerminator()); - LastBranch->moveBefore(VectorLatchBB->getTerminator()); - VectorLatchBB->getTerminator()->eraseFromParent(); - // Move condition so it is guaranteed to be next to branch. This is only done - // to avoid excessive test updates. - // TODO: Remove special handling once the increments for all inductions are - // modeled explicitly in VPlan. - cast(LastBranch->getCondition())->moveBefore(LastBranch); - // Connect LastBB to VectorLatchBB to facilitate their merge. - BranchInst::Create(VectorLatchBB, LastBB); - - // Merge LastBB with Latch. - bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI); - (void)Merged; - assert(Merged && "Could not merge last basic block with latch."); - VectorLatchBB = LastBB; + VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock(); + BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB]; // Fix the latch value of canonical, reduction and first-order recurrences // phis in the vector loop. - VPBasicBlock *Header = Entry->getEntryBasicBlock(); - if (Header->empty()) { - assert(EnableVPlanNativePath); - Header = cast(Header->getSingleSuccessor()); - } + VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); for (VPRecipeBase &R : Header->phis()) { // Skip phi-like recipes that generate their backedege values themselves. - // TODO: Model their backedge values explicitly. - if (isa(&R) || isa(&R)) + if (isa(&R)) + continue; + + if (isa(&R) || + isa(&R)) { + PHINode *Phi = nullptr; + if (isa(&R)) { + Phi = cast(State->get(R.getVPSingleValue(), 0)); + } else { + auto *WidenPhi = cast(&R); + // TODO: Split off the case that all users of a pointer phi are scalar + // from the VPWidenPointerInductionRecipe. + if (WidenPhi->onlyScalarsGenerated(State->VF)) + continue; + + auto *GEP = cast(State->get(WidenPhi, 0)); + Phi = cast(GEP->getPointerOperand()); + } + + Phi->setIncomingBlock(1, VectorLatchBB); + + // Move the last step to the end of the latch block. This ensures + // consistent placement of all induction updates. + Instruction *Inc = cast(Phi->getIncomingValue(1)); + Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode()); continue; + } auto *PhiR = cast(&R); // For canonical IV, first-order recurrences and in-order reduction phis, @@ -993,9 +709,12 @@ void VPlan::execute(VPTransformState *State) { } // We do not attempt to preserve DT for outer loop vectorization currently. - if (!EnableVPlanNativePath) - updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB, - L->getExitBlock()); + if (!EnableVPlanNativePath) { + BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header]; + State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader); + updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB, + State->CFG.ExitBB); + } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1021,6 +740,17 @@ void VPlan::print(raw_ostream &O) const { O << '\n'; Block->print(O, "", SlotTracker); } + + if (!LiveOuts.empty()) + O << "\n"; + for (auto &KV : LiveOuts) { + O << "Live-out "; + KV.second->getPhi()->printAsOperand(O); + O << " = "; + KV.second->getOperand(0)->printAsOperand(O, SlotTracker); + O << "\n"; + } + O << "}\n"; } @@ -1034,11 +764,14 @@ LLVM_DUMP_METHOD void VPlan::dump() const { print(dbgs()); } #endif -void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, +void VPlan::addLiveOut(PHINode *PN, VPValue *V) { + assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists"); + LiveOuts.insert({PN, new VPLiveOut(PN, V)}); +} + +void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB, BasicBlock *LoopLatchBB, BasicBlock *LoopExitBB) { - BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor(); - assert(LoopHeaderBB && "Loop preheader does not have a single successor."); // The vector body may be more than a single basic-block by this point. // Update the dominator tree information inside the vector body by propagating // it from header to latch, expecting only triangular control-flow, if any. @@ -1075,6 +808,7 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + Twine VPlanPrinter::getUID(const VPBlockBase *Block) { return (isa(Block) ? "cluster_N" : "N") + Twine(getOrCreateBID(Block)); @@ -1122,8 +856,8 @@ void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden, const Twine &Label) { // Due to "dot" we print an edge between two regions as an edge between the - // exit basic block and the entry basic of the respective regions. - const VPBlockBase *Tail = From->getExitBasicBlock(); + // exiting basic block and the entry basic of the respective regions. + const VPBlockBase *Tail = From->getExitingBasicBlock(); const VPBlockBase *Head = To->getEntryBasicBlock(); OS << Indent << getUID(Tail) << " -> " << getUID(Head); OS << " [ label=\"" << Label << '\"'; @@ -1213,328 +947,6 @@ void VPlanIngredient::print(raw_ostream &O) const { V->printAsOperand(O, false); } -void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "WIDEN-CALL "; - - auto *CI = cast(getUnderlyingInstr()); - if (CI->getType()->isVoidTy()) - O << "void "; - else { - printAsOperand(O, SlotTracker); - O << " = "; - } - - O << "call @" << CI->getCalledFunction()->getName() << "("; - printOperands(O, SlotTracker); - O << ")"; -} - -void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "WIDEN-SELECT "; - printAsOperand(O, SlotTracker); - O << " = select "; - getOperand(0)->printAsOperand(O, SlotTracker); - O << ", "; - getOperand(1)->printAsOperand(O, SlotTracker); - O << ", "; - getOperand(2)->printAsOperand(O, SlotTracker); - O << (InvariantCond ? " (condition is loop invariant)" : ""); -} - -void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "WIDEN "; - printAsOperand(O, SlotTracker); - O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; - printOperands(O, SlotTracker); -} - -void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "WIDEN-INDUCTION"; - if (getTruncInst()) { - O << "\\l\""; - O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; - O << " +\n" << Indent << "\" "; - getVPValue(0)->printAsOperand(O, SlotTracker); - } else - O << " " << VPlanIngredient(IV); -} -#endif - -bool VPWidenIntOrFpInductionRecipe::isCanonical() const { - auto *StartC = dyn_cast(getStartValue()->getLiveInIRValue()); - auto *StepC = dyn_cast(getInductionDescriptor().getStep()); - return StartC && StartC->isZero() && StepC && StepC->isOne(); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "WIDEN-GEP "; - O << (IsPtrLoopInvariant ? "Inv" : "Var"); - size_t IndicesNumber = IsIndexLoopInvariant.size(); - for (size_t I = 0; I < IndicesNumber; ++I) - O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]"; - - O << " "; - printAsOperand(O, SlotTracker); - O << " = getelementptr "; - printOperands(O, SlotTracker); -} - -void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "WIDEN-PHI "; - - auto *OriginalPhi = cast(getUnderlyingValue()); - // Unless all incoming values are modeled in VPlan print the original PHI - // directly. - // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming - // values as VPValues. - if (getNumOperands() != OriginalPhi->getNumOperands()) { - O << VPlanIngredient(OriginalPhi); - return; - } - - printAsOperand(O, SlotTracker); - O << " = phi "; - printOperands(O, SlotTracker); -} - -void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "BLEND "; - Phi->printAsOperand(O, false); - O << " ="; - if (getNumIncomingValues() == 1) { - // Not a User of any mask: not really blending, this is a - // single-predecessor phi. - O << " "; - getIncomingValue(0)->printAsOperand(O, SlotTracker); - } else { - for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { - O << " "; - getIncomingValue(I)->printAsOperand(O, SlotTracker); - O << "/"; - getMask(I)->printAsOperand(O, SlotTracker); - } - } -} - -void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "REDUCE "; - printAsOperand(O, SlotTracker); - O << " = "; - getChainOp()->printAsOperand(O, SlotTracker); - O << " +"; - if (isa(getUnderlyingInstr())) - O << getUnderlyingInstr()->getFastMathFlags(); - O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " ("; - getVecOp()->printAsOperand(O, SlotTracker); - if (getCondOp()) { - O << ", "; - getCondOp()->printAsOperand(O, SlotTracker); - } - O << ")"; -} - -void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); - - if (!getUnderlyingInstr()->getType()->isVoidTy()) { - printAsOperand(O, SlotTracker); - O << " = "; - } - O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; - printOperands(O, SlotTracker); - - if (AlsoPack) - O << " (S->V)"; -} - -void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "PHI-PREDICATED-INSTRUCTION "; - printAsOperand(O, SlotTracker); - O << " = "; - printOperands(O, SlotTracker); -} - -void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "WIDEN "; - - if (!isStore()) { - printAsOperand(O, SlotTracker); - O << " = "; - } - O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; - - printOperands(O, SlotTracker); -} -#endif - -void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { - Value *Start = getStartValue()->getLiveInIRValue(); - PHINode *EntryPart = PHINode::Create( - Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); - EntryPart->addIncoming(Start, State.CFG.VectorPreHeader); - EntryPart->setDebugLoc(DL); - for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) - State.set(this, EntryPart, Part); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "EMIT "; - printAsOperand(O, SlotTracker); - O << " = CANONICAL-INDUCTION"; -} -#endif - -void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { - Value *CanonicalIV = State.get(getOperand(0), 0); - Type *STy = CanonicalIV->getType(); - IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); - ElementCount VF = State.VF; - Value *VStart = VF.isScalar() - ? CanonicalIV - : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); - for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { - Value *VStep = createStepForVF(Builder, STy, VF, Part); - if (VF.isVector()) { - VStep = Builder.CreateVectorSplat(VF, VStep); - VStep = Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); - } - Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); - State.set(this, CanonicalVectorIV, Part); - } -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "EMIT "; - printAsOperand(O, SlotTracker); - O << " = WIDEN-CANONICAL-INDUCTION "; - printOperands(O, SlotTracker); -} -#endif - -void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { - auto &Builder = State.Builder; - // Create a vector from the initial value. - auto *VectorInit = getStartValue()->getLiveInIRValue(); - - Type *VecTy = State.VF.isScalar() - ? VectorInit->getType() - : VectorType::get(VectorInit->getType(), State.VF); - - if (State.VF.isVector()) { - auto *IdxTy = Builder.getInt32Ty(); - auto *One = ConstantInt::get(IdxTy, 1); - IRBuilder<>::InsertPointGuard Guard(Builder); - Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); - auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); - auto *LastIdx = Builder.CreateSub(RuntimeVF, One); - VectorInit = Builder.CreateInsertElement( - PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); - } - - // Create a phi node for the new recurrence. - PHINode *EntryPart = PHINode::Create( - VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); - EntryPart->addIncoming(VectorInit, State.CFG.VectorPreHeader); - State.set(this, EntryPart, 0); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; - printAsOperand(O, SlotTracker); - O << " = phi "; - printOperands(O, SlotTracker); -} -#endif - -void VPReductionPHIRecipe::execute(VPTransformState &State) { - PHINode *PN = cast(getUnderlyingValue()); - auto &Builder = State.Builder; - - // In order to support recurrences we need to be able to vectorize Phi nodes. - // Phi nodes have cycles, so we need to vectorize them in two stages. This is - // stage #1: We create a new vector PHI node with no incoming edges. We'll use - // this value when we vectorize all of the instructions that use the PHI. - bool ScalarPHI = State.VF.isScalar() || IsInLoop; - Type *VecTy = - ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); - - BasicBlock *HeaderBB = State.CFG.PrevBB; - assert(State.LI->getLoopFor(HeaderBB)->getHeader() == HeaderBB && - "recipe must be in the vector loop header"); - unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; - for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { - Value *EntryPart = - PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt()); - State.set(this, EntryPart, Part); - } - - // Reductions do not have to start at zero. They can start with - // any loop invariant values. - VPValue *StartVPV = getStartValue(); - Value *StartV = StartVPV->getLiveInIRValue(); - - Value *Iden = nullptr; - RecurKind RK = RdxDesc.getRecurrenceKind(); - if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || - RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) { - // MinMax reduction have the start value as their identify. - if (ScalarPHI) { - Iden = StartV; - } else { - IRBuilderBase::InsertPointGuard IPBuilder(Builder); - Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); - StartV = Iden = - Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); - } - } else { - Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), - RdxDesc.getFastMathFlags()); - - if (!ScalarPHI) { - Iden = Builder.CreateVectorSplat(State.VF, Iden); - IRBuilderBase::InsertPointGuard IPBuilder(Builder); - Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); - Constant *Zero = Builder.getInt32(0); - StartV = Builder.CreateInsertElement(Iden, StartV, Zero); - } - } - - for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { - Value *EntryPart = State.get(this, Part); - // Make sure to add the reduction start value only to the - // first unroll part. - Value *StartVal = (Part == 0) ? StartV : Iden; - cast(EntryPart)->addIncoming(StartVal, State.CFG.VectorPreHeader); - } -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "WIDEN-REDUCTION-PHI "; - - printAsOperand(O, SlotTracker); - O << " = phi "; - printOperands(O, SlotTracker); -} #endif template void DomTreeBuilder::Calculate(VPDominatorTree &DT); @@ -1594,7 +1006,10 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, continue; assert(isa(&VPI) && "Can only handle VPInstructions"); auto *VPInst = cast(&VPI); - auto *Inst = cast(VPInst->getUnderlyingValue()); + + auto *Inst = dyn_cast_or_null(VPInst->getUnderlyingValue()); + if (!Inst) + continue; auto *IG = IAI.getInterleaveGroup(Inst); if (!IG) continue; @@ -1622,7 +1037,7 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI) { Old2NewTy Old2New; - visitRegion(cast(Plan.getEntry()), Old2New, IAI); + visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI); } void VPSlotTracker::assignSlot(const VPValue *V) { @@ -1632,8 +1047,8 @@ void VPSlotTracker::assignSlot(const VPValue *V) { void VPSlotTracker::assignSlots(const VPlan &Plan) { - for (const VPValue *V : Plan.VPExternalDefs) - assignSlot(V); + for (const auto &P : Plan.VPExternalDefs) + assignSlot(P.second); assignSlot(&Plan.VectorTripCount); if (Plan.BackedgeTakenCount) @@ -1651,7 +1066,19 @@ void VPSlotTracker::assignSlots(const VPlan &Plan) { } bool vputils::onlyFirstLaneUsed(VPValue *Def) { - return all_of(Def->users(), [Def](VPUser *U) { - return cast(U)->onlyFirstLaneUsed(Def); - }); + return all_of(Def->users(), + [Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); }); +} + +VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, + ScalarEvolution &SE) { + if (auto *E = dyn_cast(Expr)) + return Plan.getOrAddExternalDef(E->getValue()); + if (auto *E = dyn_cast(Expr)) + return Plan.getOrAddExternalDef(E->getValue()); + + VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock(); + VPValue *Step = new VPExpandSCEVRecipe(Expr, SE); + Preheader->appendRecipe(cast(Step->getDef())); + return Step; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index bcaabca692cc..09da4a545d0d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -25,27 +25,26 @@ #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H -#include "VPlanLoopInfo.h" #include "VPlanValue.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/Support/InstructionCost.h" +#include "llvm/IR/FMF.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" #include #include #include -#include #include namespace llvm { @@ -54,6 +53,7 @@ class BasicBlock; class DominatorTree; class InductionDescriptor; class InnerLoopVectorizer; +class IRBuilderBase; class LoopInfo; class raw_ostream; class RecurrenceDescriptor; @@ -67,10 +67,11 @@ class VPlanSlp; /// Returns a calculation for the total number of elements for a given \p VF. /// For fixed width vectors this value is a constant, whereas for scalable /// vectors it is an expression determined at runtime. -Value *getRuntimeVF(IRBuilder<> &B, Type *Ty, ElementCount VF); +Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF); /// Return a value for Step multiplied by VF. -Value *createStepForVF(IRBuilder<> &B, Type *Ty, ElementCount VF, int64_t Step); +Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, + int64_t Step); /// A range of powers-of-2 vectorization factors with fixed start and /// adjustable end. The range includes start and excludes end, e.g.,: @@ -151,7 +152,7 @@ public: /// Returns an expression describing the lane index that can be used at /// runtime. - Value *getAsRuntimeExpr(IRBuilder<> &Builder, const ElementCount &VF) const; + Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const; /// Returns the Kind of lane offset. Kind getKind() const { return LaneKind; } @@ -199,10 +200,10 @@ struct VPIteration { /// needed for generating the output IR. struct VPTransformState { VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, - DominatorTree *DT, IRBuilder<> &Builder, + DominatorTree *DT, IRBuilderBase &Builder, InnerLoopVectorizer *ILV, VPlan *Plan) - : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan) { - } + : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan), + LVer(nullptr) {} /// The chosen Vectorization and Unroll Factors of the loop being vectorized. ElementCount VF; @@ -298,6 +299,27 @@ struct VPTransformState { Iter->second[Instance.Part][CacheIdx] = V; } + /// Add additional metadata to \p To that was not present on \p Orig. + /// + /// Currently this is used to add the noalias annotations based on the + /// inserted memchecks. Use this for instructions that are *cloned* into the + /// vector loop. + void addNewMetadata(Instruction *To, const Instruction *Orig); + + /// Add metadata from one instruction to another. + /// + /// This includes both the original MDs from \p From and additional ones (\see + /// addNewMetadata). Use this for *newly created* instructions in the vector + /// loop. + void addMetadata(Instruction *To, Instruction *From); + + /// Similar to the previous function but it adds the metadata to a + /// vector of instructions. + void addMetadata(ArrayRef To, Instruction *From); + + /// Set the debug location in the builder using the debug location in \p V. + void setDebugLocFromInst(const Value *V); + /// Hold state information used when constructing the CFG of the output IR, /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks. struct CFGState { @@ -308,26 +330,19 @@ struct VPTransformState { /// header BasicBlock. BasicBlock *PrevBB = nullptr; - /// The last IR BasicBlock in the output IR. Set to the new latch - /// BasicBlock, used for placing the newly created BasicBlocks. - BasicBlock *LastBB = nullptr; - - /// The IR BasicBlock that is the preheader of the vector loop in the output - /// IR. - /// FIXME: The vector preheader should also be modeled in VPlan, so any code - /// that needs to be added to the preheader gets directly generated by - /// VPlan. There should be no need to manage a pointer to the IR BasicBlock. - BasicBlock *VectorPreHeader = nullptr; + /// The last IR BasicBlock in the output IR. Set to the exit block of the + /// vector loop. + BasicBlock *ExitBB = nullptr; /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case /// of replication, maps the BasicBlock of the last replica created. SmallDenseMap VPBB2IRBB; - /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed - /// up at the end of vector code generation. - SmallVector VPBBsToFix; - CFGState() = default; + + /// Returns the BasicBlock* mapped to the pre-header of the loop region + /// containing \p R. + BasicBlock *getPreheaderBBFor(VPRecipeBase *R); } CFG; /// Hold a pointer to LoopInfo to register new basic blocks in the loop. @@ -337,7 +352,7 @@ struct VPTransformState { DominatorTree *DT; /// Hold a reference to the IRBuilder used to generate output IR code. - IRBuilder<> &Builder; + IRBuilderBase &Builder; VPValue2ValueTy VPValue2Value; @@ -353,41 +368,16 @@ struct VPTransformState { /// Holds recipes that may generate a poison value that is used after /// vectorization, even when their operands are not poison. SmallPtrSet MayGeneratePoisonRecipes; -}; - -/// VPUsers instance used by VPBlockBase to manage CondBit and the block -/// predicate. Currently VPBlockUsers are used in VPBlockBase for historical -/// reasons, but in the future the only VPUsers should either be recipes or -/// live-outs.VPBlockBase uses. -struct VPBlockUser : public VPUser { - VPBlockUser() : VPUser({}, VPUserID::Block) {} - VPValue *getSingleOperandOrNull() { - if (getNumOperands() == 1) - return getOperand(0); + /// The loop object for the current parent region, or nullptr. + Loop *CurrentVectorLoop = nullptr; - return nullptr; - } - const VPValue *getSingleOperandOrNull() const { - if (getNumOperands() == 1) - return getOperand(0); - - return nullptr; - } - - void resetSingleOpUser(VPValue *NewVal) { - assert(getNumOperands() <= 1 && "Didn't expect more than one operand!"); - if (!NewVal) { - if (getNumOperands() == 1) - removeLastOperand(); - return; - } - - if (getNumOperands() == 1) - setOperand(0, NewVal); - else - addOperand(NewVal); - } + /// LoopVersioning. It's only set up (non-null) if memchecks were + /// used. + /// + /// This is currently only used to add no-alias metadata based on the + /// memchecks. The actually versioning is performed manually. + std::unique_ptr LVer; }; /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph. @@ -410,16 +400,6 @@ class VPBlockBase { /// List of successor blocks. SmallVector Successors; - /// Successor selector managed by a VPUser. For blocks with zero or one - /// successors, there is no operand. Otherwise there is exactly one operand - /// which is the branch condition. - VPBlockUser CondBitUser; - - /// If the block is predicated, its predicate is stored as an operand of this - /// VPUser to maintain the def-use relations. Otherwise there is no operand - /// here. - VPBlockUser PredicateUser; - /// VPlan containing the block. Can only be set on the entry block of the /// plan. VPlan *Plan = nullptr; @@ -493,11 +473,11 @@ public: const VPBasicBlock *getEntryBasicBlock() const; VPBasicBlock *getEntryBasicBlock(); - /// \return the VPBasicBlock that is the exit of this VPBlockBase, + /// \return the VPBasicBlock that is the exiting this VPBlockBase, /// recursively, if the latter is a VPRegionBlock. Otherwise, if this /// VPBlockBase is a VPBasicBlock, it is returned. - const VPBasicBlock *getExitBasicBlock() const; - VPBasicBlock *getExitBasicBlock(); + const VPBasicBlock *getExitingBasicBlock() const; + VPBasicBlock *getExitingBasicBlock(); const VPBlocksTy &getSuccessors() const { return Successors; } VPBlocksTy &getSuccessors() { return Successors; } @@ -565,20 +545,6 @@ public: return getEnclosingBlockWithPredecessors()->getSinglePredecessor(); } - /// \return the condition bit selecting the successor. - VPValue *getCondBit(); - /// \return the condition bit selecting the successor. - const VPValue *getCondBit() const; - /// Set the condition bit selecting the successor. - void setCondBit(VPValue *CV); - - /// \return the block's predicate. - VPValue *getPredicate(); - /// \return the block's predicate. - const VPValue *getPredicate() const; - /// Set the block's predicate. - void setPredicate(VPValue *Pred); - /// Set a given VPBlockBase \p Successor as the single successor of this /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor. /// This VPBlockBase must have no successors. @@ -588,14 +554,11 @@ public: } /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two - /// successors of this VPBlockBase. \p Condition is set as the successor - /// selector. This VPBlockBase is not added as predecessor of \p IfTrue or \p - /// IfFalse. This VPBlockBase must have no successors. - void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse, - VPValue *Condition) { + /// successors of this VPBlockBase. This VPBlockBase is not added as + /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no + /// successors. + void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) { assert(Successors.empty() && "Setting two successors when others exist."); - assert(Condition && "Setting two successors without condition!"); - setCondBit(Condition); appendSuccessor(IfTrue); appendSuccessor(IfFalse); } @@ -612,11 +575,8 @@ public: /// Remove all the predecessor of this block. void clearPredecessors() { Predecessors.clear(); } - /// Remove all the successors of this block and set to null its condition bit - void clearSuccessors() { - Successors.clear(); - setCondBit(nullptr); - } + /// Remove all the successors of this block. + void clearSuccessors() { Successors.clear(); } /// The method which generates the output IR that correspond to this /// VPBlockBase, thereby "executing" the VPlan. @@ -665,6 +625,32 @@ public: #endif }; +/// A value that is used outside the VPlan. The operand of the user needs to be +/// added to the associated LCSSA phi node. +class VPLiveOut : public VPUser { + PHINode *Phi; + +public: + VPLiveOut(PHINode *Phi, VPValue *Op) + : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {} + + /// Fixup the wrapped LCSSA phi node in the unique exit block. This simply + /// means we need to add the appropriate incoming value from the middle + /// block as exiting edges from the scalar epilogue loop (if present) are + /// already in place, and we exit the vector loop exclusively to the middle + /// block. + void fixPhi(VPlan &Plan, VPTransformState &State); + + /// Returns true if the VPLiveOut uses scalars of operand \p Op. + bool usesScalars(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } + + PHINode *getPhi() const { return Phi; } +}; + /// VPRecipeBase is a base class modeling a sequence of one or more output IR /// instructions. VPRecipeBase owns the the VPValues it defines through VPDef /// and is responsible for deleting its defined values. Single-value @@ -699,6 +685,9 @@ public: /// Insert an unlinked recipe into a basic block immediately before /// the specified recipe. void insertBefore(VPRecipeBase *InsertPos); + /// Insert an unlinked recipe into \p BB immediately before the insertion + /// point \p IP; + void insertBefore(VPBasicBlock &BB, iplist::iterator IP); /// Insert an unlinked Recipe into a basic block immediately after /// the specified Recipe. @@ -759,14 +748,6 @@ public: bool mayReadOrWriteMemory() const { return mayReadFromMemory() || mayWriteToMemory(); } - - /// Returns true if the recipe only uses the first lane of operand \p Op. - /// Conservatively returns false. - virtual bool onlyFirstLaneUsed(const VPValue *Op) const { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return false; - } }; inline bool VPUser::classof(const VPDef *Def) { @@ -804,6 +785,7 @@ public: CanonicalIVIncrement, CanonicalIVIncrementNUW, BranchOnCount, + BranchOnCond }; private: @@ -892,6 +874,7 @@ public: case Instruction::Unreachable: case Instruction::Fence: case Instruction::AtomicRMW: + case VPInstruction::BranchOnCond: case VPInstruction::BranchOnCount: return false; default: @@ -1049,27 +1032,25 @@ public: }; /// A recipe for handling phi nodes of integer and floating-point inductions, -/// producing their vector and scalar values. +/// producing their vector values. class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue { PHINode *IV; const InductionDescriptor &IndDesc; - bool NeedsScalarIV; bool NeedsVectorIV; public: - VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, + VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, - bool NeedsScalarIV, bool NeedsVectorIV) - : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(IV, this), - IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV), + bool NeedsVectorIV) + : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start, Step}), + VPValue(IV, this), IV(IV), IndDesc(IndDesc), NeedsVectorIV(NeedsVectorIV) {} - VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, + VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, - TruncInst *Trunc, bool NeedsScalarIV, - bool NeedsVectorIV) - : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(Trunc, this), - IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV), + TruncInst *Trunc, bool NeedsVectorIV) + : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start, Step}), + VPValue(Trunc, this), IV(IV), IndDesc(IndDesc), NeedsVectorIV(NeedsVectorIV) {} ~VPWidenIntOrFpInductionRecipe() override = default; @@ -1093,6 +1074,10 @@ public: VPValue *getStartValue() { return getOperand(0); } const VPValue *getStartValue() const { return getOperand(0); } + /// Returns the step value of the induction. + VPValue *getStepValue() { return getOperand(1); } + const VPValue *getStepValue() const { return getOperand(1); } + /// Returns the first defined value as TruncInst, if it is one or nullptr /// otherwise. TruncInst *getTruncInst() { @@ -1102,6 +1087,8 @@ public: return dyn_cast_or_null(getVPValue(0)->getUnderlyingValue()); } + PHINode *getPHINode() { return IV; } + /// Returns the induction descriptor for the recipe. const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } @@ -1115,9 +1102,6 @@ public: return TruncI ? TruncI->getType() : IV->getType(); } - /// Returns true if a scalar phi needs to be created for the induction. - bool needsScalarIV() const { return NeedsScalarIV; } - /// Returns true if a vector phi needs to be created for the induction. bool needsVectorIV() const { return NeedsVectorIV; } }; @@ -1167,6 +1151,9 @@ public: VPValue *getStartValue() { return getNumOperands() == 0 ? nullptr : getOperand(0); } + VPValue *getStartValue() const { + return getNumOperands() == 0 ? nullptr : getOperand(0); + } /// Returns the incoming value from the loop backedge. VPValue *getBackedgeValue() { @@ -1180,6 +1167,52 @@ public: } }; +class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe { + const InductionDescriptor &IndDesc; + + /// SCEV used to expand step. + /// FIXME: move expansion of step to the pre-header, once it is modeled + /// explicitly. + ScalarEvolution &SE; + +public: + /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p + /// Start. + VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, + const InductionDescriptor &IndDesc, + ScalarEvolution &SE) + : VPHeaderPHIRecipe(VPVWidenPointerInductionSC, VPWidenPointerInductionSC, + Phi), + IndDesc(IndDesc), SE(SE) { + addOperand(Start); + } + + ~VPWidenPointerInductionRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *B) { + return B->getVPDefID() == VPRecipeBase::VPWidenPointerInductionSC; + } + static inline bool classof(const VPHeaderPHIRecipe *R) { + return R->getVPDefID() == VPRecipeBase::VPWidenPointerInductionSC; + } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPVWidenPointerInductionSC; + } + + /// Generate vector values for the pointer induction. + void execute(VPTransformState &State) override; + + /// Returns true if only scalar values will be generated. + bool onlyScalarsGenerated(ElementCount VF); + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for handling header phis that are widened in the vector loop. /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are /// managed in the recipe directly. @@ -1363,9 +1396,8 @@ public: "Op must be an operand of the recipe"); // Recursing through Blend recipes only, must terminate at header phi's the // latest. - return all_of(users(), [this](VPUser *U) { - return cast(U)->onlyFirstLaneUsed(this); - }); + return all_of(users(), + [this](VPUser *U) { return U->onlyFirstLaneUsed(this); }); } }; @@ -1440,6 +1472,15 @@ public: unsigned getNumStoreOperands() const { return getNumOperands() - (HasMask ? 2 : 1); } + + /// The recipe only uses the first lane of the address. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return Op == getAddr() && all_of(getStoredValues(), [Op](VPValue *StoredV) { + return Op != StoredV; + }); + } }; /// A recipe to represent inloop reduction operations, performing a reduction on @@ -1551,6 +1592,13 @@ public: "Op must be an operand of the recipe"); return isUniform(); } + + /// Returns true if the recipe uses scalars of operand \p Op. + bool usesScalars(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } }; /// A recipe for generating conditional branches on the bits of a mask. @@ -1590,6 +1638,13 @@ public: // Mask is optional. return getNumOperands() == 1 ? getOperand(0) : nullptr; } + + /// Returns true if the recipe uses scalars of operand \p Op. + bool usesScalars(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } }; /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when @@ -1619,6 +1674,13 @@ public: void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif + + /// Returns true if the recipe uses scalars of operand \p Op. + bool usesScalars(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } }; /// A Recipe for widening load/store operations. @@ -1627,7 +1689,7 @@ public: /// - For store: Address, stored value, optional mask /// TODO: We currently execute only per-part unless a specific instance is /// provided. -class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPValue { +class VPWidenMemoryInstructionRecipe : public VPRecipeBase { Instruction &Ingredient; // Whether the loaded-from / stored-to addresses are consecutive. @@ -1649,10 +1711,10 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPValue { public: VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse) - : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), - VPValue(VPValue::VPVMemoryInstructionSC, &Load, this), Ingredient(Load), + : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) { assert((Consecutive || !Reverse) && "Reverse implies consecutive"); + new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this); setMask(Mask); } @@ -1660,7 +1722,6 @@ public: VPValue *StoredValue, VPValue *Mask, bool Consecutive, bool Reverse) : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}), - VPValue(VPValue::VPVMemoryInstructionSC, &Store, this), Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) { assert((Consecutive || !Reverse) && "Reverse implies consecutive"); setMask(Mask); @@ -1714,9 +1775,42 @@ public: "Op must be an operand of the recipe"); // Widened, consecutive memory operations only demand the first lane of - // their address. - return Op == getAddr() && isConsecutive(); + // their address, unless the same operand is also stored. That latter can + // happen with opaque pointers. + return Op == getAddr() && isConsecutive() && + (!isStore() || Op != getStoredValue()); + } + + Instruction &getIngredient() const { return Ingredient; } +}; + +/// Recipe to expand a SCEV expression. +class VPExpandSCEVRecipe : public VPRecipeBase, public VPValue { + const SCEV *Expr; + ScalarEvolution &SE; + +public: + VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE) + : VPRecipeBase(VPExpandSCEVSC, {}), VPValue(nullptr, this), Expr(Expr), + SE(SE) {} + + ~VPExpandSCEVRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPExpandSCEVSC; } + + /// Generate a canonical vector induction variable of the vector loop, with + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + const SCEV *getSCEV() const { return Expr; } }; /// Canonical scalar induction phi of the vector loop. Starting at the specified @@ -1738,6 +1832,12 @@ public: static inline bool classof(const VPDef *D) { return D->getVPDefID() == VPCanonicalIVPHISC; } + static inline bool classof(const VPHeaderPHIRecipe *D) { + return D->getVPDefID() == VPCanonicalIVPHISC; + } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPVCanonicalIVPHISC; + } /// Generate the canonical scalar induction phi of the vector loop. void execute(VPTransformState &State) override; @@ -1803,6 +1903,64 @@ public: } }; +/// A recipe for handling phi nodes of integer and floating-point inductions, +/// producing their scalar values. +class VPScalarIVStepsRecipe : public VPRecipeBase, public VPValue { + /// Scalar type to use for the generated values. + Type *Ty; + /// If not nullptr, truncate the generated values to TruncToTy. + Type *TruncToTy; + const InductionDescriptor &IndDesc; + +public: + VPScalarIVStepsRecipe(Type *Ty, const InductionDescriptor &IndDesc, + VPValue *CanonicalIV, VPValue *Start, VPValue *Step, + Type *TruncToTy) + : VPRecipeBase(VPScalarIVStepsSC, {CanonicalIV, Start, Step}), + VPValue(nullptr, this), Ty(Ty), TruncToTy(TruncToTy), IndDesc(IndDesc) { + } + + ~VPScalarIVStepsRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC; + } + /// Extra classof implementations to allow directly casting from VPUser -> + /// VPScalarIVStepsRecipe. + static inline bool classof(const VPUser *U) { + auto *R = dyn_cast(U); + return R && R->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC; + } + static inline bool classof(const VPRecipeBase *R) { + return R->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC; + } + + /// Generate the scalarized versions of the phi node as needed by their users. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + /// Returns true if the induction is canonical, i.e. starting at 0 and + /// incremented by UF * VF (= the original IV is incremented by 1). + bool isCanonical() const; + + VPCanonicalIVPHIRecipe *getCanonicalIV() const; + VPValue *getStartValue() const { return getOperand(1); } + VPValue *getStepValue() const { return getOperand(2); } + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } +}; + /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It /// holds a sequence of zero or more VPRecipe's each representing a sequence of /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes. @@ -1895,6 +2053,8 @@ public: /// SplitAt to the new block. Returns the new block. VPBasicBlock *splitAt(iterator SplitAt); + VPRegionBlock *getEnclosingLoopRegion(); + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p /// SlotTracker is used to print unnamed VPValue's using consequtive numbers. @@ -1906,6 +2066,14 @@ public: using VPBlockBase::print; // Get the print(raw_stream &O) version. #endif + /// If the block has multiple successors, return the branch recipe terminating + /// the block. If there are no or only a single successor, return nullptr; + VPRecipeBase *getTerminator(); + const VPRecipeBase *getTerminator() const; + + /// Returns true if the block is exiting it's parent region. + bool isExiting() const; + private: /// Create an IR BasicBlock to hold the output instructions generated by this /// VPBasicBlock, and return it. Update the CFGState accordingly. @@ -1913,7 +2081,7 @@ private: }; /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks -/// which form a Single-Entry-Single-Exit subgraph of the output IR CFG. +/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG. /// A VPRegionBlock may indicate that its contents are to be replicated several /// times. This is designed to support predicated scalarization, in which a /// scalar if-then code structure needs to be generated VF * UF times. Having @@ -1924,25 +2092,26 @@ class VPRegionBlock : public VPBlockBase { /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock. VPBlockBase *Entry; - /// Hold the Single Exit of the SESE region modelled by the VPRegionBlock. - VPBlockBase *Exit; + /// Hold the Single Exiting block of the SESE region modelled by the + /// VPRegionBlock. + VPBlockBase *Exiting; /// An indicator whether this region is to generate multiple replicated /// instances of output IR corresponding to its VPBlockBases. bool IsReplicator; public: - VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exit, + VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name = "", bool IsReplicator = false) - : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exit(Exit), + : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting), IsReplicator(IsReplicator) { assert(Entry->getPredecessors().empty() && "Entry block has predecessors."); - assert(Exit->getSuccessors().empty() && "Exit block has successors."); + assert(Exiting->getSuccessors().empty() && "Exit block has successors."); Entry->setParent(this); - Exit->setParent(this); + Exiting->setParent(this); } VPRegionBlock(const std::string &Name = "", bool IsReplicator = false) - : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exit(nullptr), + : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr), IsReplicator(IsReplicator) {} ~VPRegionBlock() override { @@ -1976,16 +2145,22 @@ public: // DominatorTreeBase representing the Graph type. VPBlockBase &front() const { return *Entry; } - const VPBlockBase *getExit() const { return Exit; } - VPBlockBase *getExit() { return Exit; } + const VPBlockBase *getExiting() const { return Exiting; } + VPBlockBase *getExiting() { return Exiting; } - /// Set \p ExitBlock as the exit VPBlockBase of this VPRegionBlock. \p - /// ExitBlock must have no successors. - void setExit(VPBlockBase *ExitBlock) { - assert(ExitBlock->getSuccessors().empty() && + /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p + /// ExitingBlock must have no successors. + void setExiting(VPBlockBase *ExitingBlock) { + assert(ExitingBlock->getSuccessors().empty() && "Exit block cannot have successors."); - Exit = ExitBlock; - ExitBlock->setParent(this); + Exiting = ExitingBlock; + ExitingBlock->setParent(this); + } + + /// Returns the pre-header VPBasicBlock of the loop region. + VPBasicBlock *getPreheaderVPBB() { + assert(!isReplicator() && "should only get pre-header of loop regions"); + return getSinglePredecessor()->getExitingBasicBlock(); } /// An indicator whether this region is to generate multiple replicated @@ -2119,11 +2294,11 @@ struct GraphTraits> using nodes_iterator = df_iterator; static NodeRef getEntryNode(Inverse N) { - return N.Graph->getExit(); + return N.Graph->getExiting(); } static nodes_iterator nodes_begin(GraphRef N) { - return nodes_iterator::begin(N->getExit()); + return nodes_iterator::begin(N->getExiting()); } static nodes_iterator nodes_end(GraphRef N) { @@ -2281,12 +2456,9 @@ class VPlan { /// Holds the name of the VPlan, for printing. std::string Name; - /// Holds all the external definitions created for this VPlan. - // TODO: Introduce a specific representation for external definitions in - // VPlan. External definitions must be immutable and hold a pointer to its - // underlying IR that will be used to implement its structural comparison - // (operators '==' and '<'). - SetVector VPExternalDefs; + /// Holds all the external definitions created for this VPlan. External + /// definitions must be immutable and hold a pointer to their underlying IR. + DenseMap VPExternalDefs; /// Represents the trip count of the original loop, for folding /// the tail. @@ -2307,13 +2479,13 @@ class VPlan { /// to be free when the plan's destructor is called. SmallVector VPValuesToFree; - /// Holds the VPLoopInfo analysis for this VPlan. - VPLoopInfo VPLInfo; - /// Indicates whether it is safe use the Value2VPValue mapping or if the /// mapping cannot be used any longer, because it is stale. bool Value2VPValueEnabled = true; + /// Values used outside the plan. + MapVector LiveOuts; + public: VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) { if (Entry) @@ -2321,6 +2493,8 @@ public: } ~VPlan() { + clearLiveOuts(); + if (Entry) { VPValue DummyValue; for (VPBlockBase *Block : depth_first(Entry)) @@ -2334,13 +2508,14 @@ public: delete TripCount; if (BackedgeTakenCount) delete BackedgeTakenCount; - for (VPValue *Def : VPExternalDefs) - delete Def; + for (auto &P : VPExternalDefs) + delete P.second; } /// Prepare the plan for execution, setting up the required live-in values. void prepareToExecute(Value *TripCount, Value *VectorTripCount, - Value *CanonicalIVStartValue, VPTransformState &State); + Value *CanonicalIVStartValue, VPTransformState &State, + bool IsEpilogueVectorization); /// Generate the IR code for this VPlan. void execute(struct VPTransformState *State); @@ -2383,9 +2558,13 @@ public: void setName(const Twine &newName) { Name = newName.str(); } - /// Add \p VPVal to the pool of external definitions if it's not already - /// in the pool. - void addExternalDef(VPValue *VPVal) { VPExternalDefs.insert(VPVal); } + /// Get the existing or add a new external definition for \p V. + VPValue *getOrAddExternalDef(Value *V) { + auto I = VPExternalDefs.insert({V, nullptr}); + if (I.second) + I.first->second = new VPValue(V); + return I.first->second; + } void addVPValue(Value *V) { assert(Value2VPValueEnabled && @@ -2432,10 +2611,6 @@ public: Value2VPValue.erase(V); } - /// Return the VPLoopInfo analysis for this VPlan. - VPLoopInfo &getVPLoopInfo() { return VPLInfo; } - const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; } - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print this VPlan to \p O. void print(raw_ostream &O) const; @@ -2465,7 +2640,10 @@ public: /// Returns the VPRegionBlock of the vector loop. VPRegionBlock *getVectorLoopRegion() { - return cast(getEntry()); + return cast(getEntry()->getSingleSuccessor()); + } + const VPRegionBlock *getVectorLoopRegion() const { + return cast(getEntry()->getSingleSuccessor()); } /// Returns the canonical induction recipe of the vector loop. @@ -2478,6 +2656,23 @@ public: return cast(&*EntryVPBB->begin()); } + void addLiveOut(PHINode *PN, VPValue *V); + + void clearLiveOuts() { + for (auto &KV : LiveOuts) + delete KV.second; + LiveOuts.clear(); + } + + void removeLiveOut(PHINode *PN) { + delete LiveOuts[PN]; + LiveOuts.erase(PN); + } + + const MapVector &getLiveOuts() const { + return LiveOuts; + } + private: /// Add to the given dominator tree the header block and every new basic block /// that was created between it and the latch block, inclusive. @@ -2567,9 +2762,8 @@ public: /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's - /// successors are moved from \p BlockPtr to \p NewBlock and \p BlockPtr's - /// conditional bit is propagated to \p NewBlock. \p NewBlock must have - /// neither successors nor predecessors. + /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must + /// have neither successors nor predecessors. static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { assert(NewBlock->getSuccessors().empty() && NewBlock->getPredecessors().empty() && @@ -2580,24 +2774,22 @@ public: disconnectBlocks(BlockPtr, Succ); connectBlocks(NewBlock, Succ); } - NewBlock->setCondBit(BlockPtr->getCondBit()); - BlockPtr->setCondBit(nullptr); connectBlocks(BlockPtr, NewBlock); } /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr - /// parent to \p IfTrue and \p IfFalse. \p Condition is set as the successor - /// selector. \p BlockPtr must have no successors and \p IfTrue and \p IfFalse - /// must have neither successors nor predecessors. + /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors + /// and \p IfTrue and \p IfFalse must have neither successors nor + /// predecessors. static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, - VPValue *Condition, VPBlockBase *BlockPtr) { + VPBlockBase *BlockPtr) { assert(IfTrue->getSuccessors().empty() && "Can't insert IfTrue with successors."); assert(IfFalse->getSuccessors().empty() && "Can't insert IfFalse with successors."); - BlockPtr->setTwoSuccessors(IfTrue, IfFalse, Condition); + BlockPtr->setTwoSuccessors(IfTrue, IfFalse); IfTrue->setPredecessors({BlockPtr}); IfFalse->setPredecessors({BlockPtr}); IfTrue->setParent(BlockPtr->getParent()); @@ -2639,8 +2831,8 @@ public: R.moveBefore(*PredVPBB, PredVPBB->end()); VPBlockUtils::disconnectBlocks(PredVPBB, VPBB); auto *ParentRegion = cast(Block->getParent()); - if (ParentRegion->getExit() == Block) - ParentRegion->setExit(PredVPBB); + if (ParentRegion->getExiting() == Block) + ParentRegion->setExiting(PredVPBB); SmallVector Successors(Block->successors()); for (auto *Succ : Successors) { VPBlockUtils::disconnectBlocks(Block, Succ); @@ -2650,41 +2842,6 @@ public: return PredVPBB; } - /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge. - static bool isBackEdge(const VPBlockBase *FromBlock, - const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) { - assert(FromBlock->getParent() == ToBlock->getParent() && - FromBlock->getParent() && "Must be in same region"); - const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock); - const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock); - if (!FromLoop || !ToLoop || FromLoop != ToLoop) - return false; - - // A back-edge is a branch from the loop latch to its header. - return ToLoop->isLoopLatch(FromBlock) && ToBlock == ToLoop->getHeader(); - } - - /// Returns true if \p Block is a loop latch - static bool blockIsLoopLatch(const VPBlockBase *Block, - const VPLoopInfo *VPLInfo) { - if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block)) - return ParentVPL->isLoopLatch(Block); - - return false; - } - - /// Count and return the number of succesors of \p PredBlock excluding any - /// backedges. - static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock, - VPLoopInfo *VPLI) { - unsigned Count = 0; - for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) { - if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI)) - Count++; - } - return Count; - } - /// Return an iterator range over \p Range which only includes \p BlockTy /// blocks. The accesses are casted to \p BlockTy. template @@ -2845,6 +3002,13 @@ namespace vputils { /// Returns true if only the first lane of \p Def is used. bool onlyFirstLaneUsed(VPValue *Def); +/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p +/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in +/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's +/// pre-header already contains a recipe expanding \p Expr, return it. If not, +/// create a new one. +VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, + ScalarEvolution &SE); } // end namespace vputils } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 379988733312..84b0dac862b6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -42,9 +42,6 @@ private: // Vectorization plan that we are working on. VPlan &Plan; - // Output Top Region. - VPRegionBlock *TopRegion = nullptr; - // Builder of the VPlan instruction-level representation. VPBuilder VPIRBuilder; @@ -59,6 +56,9 @@ private: // Hold phi node's that need to be fixed once the plain CFG has been built. SmallVector PhisToFix; + /// Maps loops in the original IR to their corresponding region. + DenseMap Loop2Region; + // Utility functions. void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB); void fixPhiNodes(); @@ -73,8 +73,9 @@ public: PlainCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P) : TheLoop(Lp), LI(LI), Plan(P) {} - // Build the plain CFG and return its Top Region. - VPRegionBlock *buildPlainCFG(); + /// Build plain CFG for TheLoop. Return the pre-header VPBasicBlock connected + /// to a new VPRegionBlock (TopRegion) enclosing the plain CFG. + VPBasicBlock *buildPlainCFG(); }; } // anonymous namespace @@ -106,19 +107,32 @@ void PlainCFGBuilder::fixPhiNodes() { } } -// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an -// existing one if it was already created. +// Create a new empty VPBasicBlock for an incoming BasicBlock in the region +// corresponding to the containing loop or retrieve an existing one if it was +// already created. If no region exists yet for the loop containing \p BB, a new +// one is created. VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { auto BlockIt = BB2VPBB.find(BB); if (BlockIt != BB2VPBB.end()) // Retrieve existing VPBB. return BlockIt->second; + // Get or create a region for the loop containing BB. + Loop *CurrentLoop = LI->getLoopFor(BB); + VPRegionBlock *ParentR = nullptr; + if (CurrentLoop) { + auto Iter = Loop2Region.insert({CurrentLoop, nullptr}); + if (Iter.second) + Iter.first->second = new VPRegionBlock( + CurrentLoop->getHeader()->getName().str(), false /*isReplicator*/); + ParentR = Iter.first->second; + } + // Create new VPBB. LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << BB->getName() << "\n"); VPBasicBlock *VPBB = new VPBasicBlock(BB->getName()); BB2VPBB[BB] = VPBB; - VPBB->setParent(TopRegion); + VPBB->setParent(ParentR); return VPBB; } @@ -182,8 +196,7 @@ VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) { // A and B: Create VPValue and add it to the pool of external definitions and // to the Value->VPValue map. - VPValue *NewVPVal = new VPValue(IRVal); - Plan.addExternalDef(NewVPVal); + VPValue *NewVPVal = Plan.getOrAddExternalDef(IRVal); IRDef2VPValue[IRVal] = NewVPVal; return NewVPVal; } @@ -203,10 +216,13 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, "Instruction shouldn't have been visited."); if (auto *Br = dyn_cast(Inst)) { - // Branch instruction is not explicitly represented in VPlan but we need - // to represent its condition bit when it's conditional. - if (Br->isConditional()) - getOrCreateVPOperand(Br->getCondition()); + // Conditional branch instruction are represented using BranchOnCond + // recipes. + if (Br->isConditional()) { + VPValue *Cond = getOrCreateVPOperand(Br->getCondition()); + VPBB->appendRecipe( + new VPInstruction(VPInstruction::BranchOnCond, {Cond})); + } // Skip the rest of the Instruction processing for Branch instructions. continue; @@ -238,11 +254,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, } // Main interface to build the plain CFG. -VPRegionBlock *PlainCFGBuilder::buildPlainCFG() { - // 1. Create the Top Region. It will be the parent of all VPBBs. - TopRegion = new VPRegionBlock("TopRegion", false /*isReplicator*/); - - // 2. Scan the body of the loop in a topological order to visit each basic +VPBasicBlock *PlainCFGBuilder::buildPlainCFG() { + // 1. Scan the body of the loop in a topological order to visit each basic // block after having visited its predecessor basic blocks. Create a VPBB for // each BB and link it to its successor and predecessor VPBBs. Note that // predecessors must be set in the same order as they are in the incomming IR. @@ -251,21 +264,20 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() { // Loop PH needs to be explicitly visited since it's not taken into account by // LoopBlocksDFS. - BasicBlock *PreheaderBB = TheLoop->getLoopPreheader(); - assert((PreheaderBB->getTerminator()->getNumSuccessors() == 1) && + BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); + assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && "Unexpected loop preheader"); - VPBasicBlock *PreheaderVPBB = getOrCreateVPBB(PreheaderBB); - for (auto &I : *PreheaderBB) { + VPBasicBlock *ThePreheaderVPBB = getOrCreateVPBB(ThePreheaderBB); + ThePreheaderVPBB->setName("vector.ph"); + for (auto &I : *ThePreheaderBB) { if (I.getType()->isVoidTy()) continue; - VPValue *VPV = new VPValue(&I); - Plan.addExternalDef(VPV); - IRDef2VPValue[&I] = VPV; + IRDef2VPValue[&I] = Plan.getOrAddExternalDef(&I); } // Create empty VPBB for Loop H so that we can link PH->H. VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader()); - // Preheader's predecessors will be set during the loop RPO traversal below. - PreheaderVPBB->setOneSuccessor(HeaderVPBB); + HeaderVPBB->setName("vector.body"); + ThePreheaderVPBB->setOneSuccessor(HeaderVPBB); LoopBlocksRPO RPO(TheLoop); RPO.perform(LI); @@ -295,16 +307,13 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() { // Get VPBB's condition bit. assert(isa(TI) && "Unsupported terminator!"); - auto *Br = cast(TI); - Value *BrCond = Br->getCondition(); // Look up the branch condition to get the corresponding VPValue // representing the condition bit in VPlan (which may be in another VPBB). - assert(IRDef2VPValue.count(BrCond) && + assert(IRDef2VPValue.count(cast(TI)->getCondition()) && "Missing condition bit in IRDef2VPValue!"); - VPValue *VPCondBit = IRDef2VPValue[BrCond]; - // Link successors using condition bit. - VPBB->setTwoSuccessors(SuccVPBB0, SuccVPBB1, VPCondBit); + // Link successors. + VPBB->setTwoSuccessors(SuccVPBB0, SuccVPBB1); } else llvm_unreachable("Number of successors not supported."); @@ -312,30 +321,61 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() { setVPBBPredsFromBB(VPBB, BB); } - // 3. Process outermost loop exit. We created an empty VPBB for the loop + // 2. Process outermost loop exit. We created an empty VPBB for the loop // single exit BB during the RPO traversal of the loop body but Instructions // weren't visited because it's not part of the the loop. BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); assert(LoopExitBB && "Loops with multiple exits are not supported."); VPBasicBlock *LoopExitVPBB = BB2VPBB[LoopExitBB]; - createVPInstructionsForVPBB(LoopExitVPBB, LoopExitBB); // Loop exit was already set as successor of the loop exiting BB. // We only set its predecessor VPBB now. setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB); + // 3. Fix up region blocks for loops. For each loop, + // * use the header block as entry to the corresponding region, + // * use the latch block as exit of the corresponding region, + // * set the region as successor of the loop pre-header, and + // * set the exit block as successor to the region. + SmallVector LoopWorkList; + LoopWorkList.push_back(TheLoop); + while (!LoopWorkList.empty()) { + Loop *L = LoopWorkList.pop_back_val(); + BasicBlock *Header = L->getHeader(); + BasicBlock *Exiting = L->getLoopLatch(); + assert(Exiting == L->getExitingBlock() && + "Latch must be the only exiting block"); + VPRegionBlock *Region = Loop2Region[L]; + VPBasicBlock *HeaderVPBB = getOrCreateVPBB(Header); + VPBasicBlock *ExitingVPBB = getOrCreateVPBB(Exiting); + + // Disconnect backedge and pre-header from header. + VPBasicBlock *PreheaderVPBB = getOrCreateVPBB(L->getLoopPreheader()); + VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB); + VPBlockUtils::disconnectBlocks(ExitingVPBB, HeaderVPBB); + + Region->setParent(PreheaderVPBB->getParent()); + Region->setEntry(HeaderVPBB); + VPBlockUtils::connectBlocks(PreheaderVPBB, Region); + + // Disconnect exit block from exiting (=latch) block, set exiting block and + // connect region to exit block. + VPBasicBlock *ExitVPBB = getOrCreateVPBB(L->getExitBlock()); + VPBlockUtils::disconnectBlocks(ExitingVPBB, ExitVPBB); + Region->setExiting(ExitingVPBB); + VPBlockUtils::connectBlocks(Region, ExitVPBB); + + // Queue sub-loops for processing. + LoopWorkList.append(L->begin(), L->end()); + } // 4. The whole CFG has been built at this point so all the input Values must // have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding // VPlan operands. fixPhiNodes(); - // 5. Final Top Region setup. Set outermost loop pre-header and single exit as - // Top Region entry and exit. - TopRegion->setEntry(PreheaderVPBB); - TopRegion->setExit(LoopExitVPBB); - return TopRegion; + return ThePreheaderVPBB; } -VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() { +VPBasicBlock *VPlanHCFGBuilder::buildPlainCFG() { PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); return PCFGBuilder.buildPlainCFG(); } @@ -343,20 +383,15 @@ VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() { // Public interface to build a H-CFG. void VPlanHCFGBuilder::buildHierarchicalCFG() { // Build Top Region enclosing the plain CFG and set it as VPlan entry. - VPRegionBlock *TopRegion = buildPlainCFG(); - Plan.setEntry(TopRegion); + VPBasicBlock *EntryVPBB = buildPlainCFG(); + Plan.setEntry(EntryVPBB); LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan); + VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); Verifier.verifyHierarchicalCFG(TopRegion); // Compute plain CFG dom tree for VPLInfo. VPDomTree.recalculate(*TopRegion); LLVM_DEBUG(dbgs() << "Dominator Tree after building the plain CFG.\n"; VPDomTree.print(dbgs())); - - // Compute VPLInfo and keep it in Plan. - VPLoopInfo &VPLInfo = Plan.getVPLoopInfo(); - VPLInfo.analyze(VPDomTree); - LLVM_DEBUG(dbgs() << "VPLoop Info After buildPlainCFG:\n"; - VPLInfo.print(dbgs())); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h index 238ee7e6347c..2d52990af268 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h @@ -24,13 +24,15 @@ #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H -#include "VPlan.h" #include "VPlanDominatorTree.h" #include "VPlanVerifier.h" namespace llvm { class Loop; +class LoopInfo; +class VPRegionBlock; +class VPlan; class VPlanTestBase; /// Main class to build the VPlan H-CFG for an incoming IR. @@ -55,9 +57,9 @@ private: // are introduced. VPDominatorTree VPDomTree; - /// Build plain CFG for TheLoop. Return a new VPRegionBlock (TopRegion) - /// enclosing the plain CFG. - VPRegionBlock *buildPlainCFG(); + /// Build plain CFG for TheLoop. Return the pre-header VPBasicBlock connected + /// to a new VPRegionBlock (TopRegion) enclosing the plain CFG. + VPBasicBlock *buildPlainCFG(); public: VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P) diff --git a/llvm/lib/Transforms/Vectorize/VPlanLoopInfo.h b/llvm/lib/Transforms/Vectorize/VPlanLoopInfo.h deleted file mode 100644 index 5208f2d58e2b..000000000000 --- a/llvm/lib/Transforms/Vectorize/VPlanLoopInfo.h +++ /dev/null @@ -1,44 +0,0 @@ -//===-- VPLoopInfo.h --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file defines VPLoopInfo analysis and VPLoop class. VPLoopInfo is a -/// specialization of LoopInfoBase for VPBlockBase. VPLoops is a specialization -/// of LoopBase that is used to hold loop metadata from VPLoopInfo. Further -/// information can be found in VectorizationPlanner.rst. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H -#define LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H - -#include "llvm/Analysis/LoopInfoImpl.h" - -namespace llvm { -class VPBlockBase; - -/// Hold analysis information for every loop detected by VPLoopInfo. It is an -/// instantiation of LoopBase. -class VPLoop : public LoopBase { -private: - friend class LoopInfoBase; - explicit VPLoop(VPBlockBase *VPB) : LoopBase(VPB) {} -}; - -/// VPLoopInfo provides analysis of natural loop for VPBlockBase-based -/// Hierarchical CFG. It is a specialization of LoopInfoBase class. -// TODO: VPLoopInfo is initially computed on top of the VPlan plain CFG, which -// is the same as the incoming IR CFG. If it's more efficient than running the -// whole loop detection algorithm, we may want to create a mechanism to -// translate LoopInfo into VPLoopInfo. However, that would require significant -// changes in LoopInfoBase class. -typedef LoopInfoBase VPLoopInfo; - -} // namespace llvm - -#endif // LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp deleted file mode 100644 index e879a33db6ee..000000000000 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ /dev/null @@ -1,248 +0,0 @@ -//===-- VPlanPredicator.cpp -------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements the VPlanPredicator class which contains the public -/// interfaces to predicate and linearize the VPlan region. -/// -//===----------------------------------------------------------------------===// - -#include "VPlanPredicator.h" -#include "VPlan.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -#define DEBUG_TYPE "VPlanPredicator" - -using namespace llvm; - -// Generate VPInstructions at the beginning of CurrBB that calculate the -// predicate being propagated from PredBB to CurrBB depending on the edge type -// between them. For example if: -// i. PredBB is controlled by predicate %BP, and -// ii. The edge PredBB->CurrBB is the false edge, controlled by the condition -// bit value %CBV then this function will generate the following two -// VPInstructions at the start of CurrBB: -// %IntermediateVal = not %CBV -// %FinalVal = and %BP %IntermediateVal -// It returns %FinalVal. -VPValue *VPlanPredicator::getOrCreateNotPredicate(VPBasicBlock *PredBB, - VPBasicBlock *CurrBB) { - VPValue *CBV = PredBB->getCondBit(); - - // Set the intermediate value - this is either 'CBV', or 'not CBV' - // depending on the edge type. - EdgeType ET = getEdgeTypeBetween(PredBB, CurrBB); - VPValue *IntermediateVal = nullptr; - switch (ET) { - case EdgeType::TRUE_EDGE: - // CurrBB is the true successor of PredBB - nothing to do here. - IntermediateVal = CBV; - break; - - case EdgeType::FALSE_EDGE: - // CurrBB is the False successor of PredBB - compute not of CBV. - IntermediateVal = Builder.createNot(CBV, {}); - break; - } - - // Now AND intermediate value with PredBB's block predicate if it has one. - VPValue *BP = PredBB->getPredicate(); - if (BP) - return Builder.createAnd(BP, IntermediateVal, {}); - else - return IntermediateVal; -} - -// Generate a tree of ORs for all IncomingPredicates in WorkList. -// Note: This function destroys the original Worklist. -// -// P1 P2 P3 P4 P5 -// \ / \ / / -// OR1 OR2 / -// \ | / -// \ +/-+ -// \ / | -// OR3 | -// \ | -// OR4 <- Returns this -// | -// -// The algorithm uses a worklist of predicates as its main data structure. -// We pop a pair of values from the front (e.g. P1 and P2), generate an OR -// (in this example OR1), and push it back. In this example the worklist -// contains {P3, P4, P5, OR1}. -// The process iterates until we have only one element in the Worklist (OR4). -// The last element is the root predicate which is returned. -VPValue *VPlanPredicator::genPredicateTree(std::list &Worklist) { - if (Worklist.empty()) - return nullptr; - - // The worklist initially contains all the leaf nodes. Initialize the tree - // using them. - while (Worklist.size() >= 2) { - // Pop a pair of values from the front. - VPValue *LHS = Worklist.front(); - Worklist.pop_front(); - VPValue *RHS = Worklist.front(); - Worklist.pop_front(); - - // Create an OR of these values. - VPValue *Or = Builder.createOr(LHS, RHS, {}); - - // Push OR to the back of the worklist. - Worklist.push_back(Or); - } - - assert(Worklist.size() == 1 && "Expected 1 item in worklist"); - - // The root is the last node in the worklist. - VPValue *Root = Worklist.front(); - - // This root needs to replace the existing block predicate. This is done in - // the caller function. - return Root; -} - -// Return whether the edge FromBlock -> ToBlock is a TRUE_EDGE or FALSE_EDGE -VPlanPredicator::EdgeType -VPlanPredicator::getEdgeTypeBetween(VPBlockBase *FromBlock, - VPBlockBase *ToBlock) { - unsigned Count = 0; - for (VPBlockBase *SuccBlock : FromBlock->getSuccessors()) { - if (SuccBlock == ToBlock) { - assert(Count < 2 && "Switch not supported currently"); - return (Count == 0) ? EdgeType::TRUE_EDGE : EdgeType::FALSE_EDGE; - } - Count++; - } - - llvm_unreachable("Broken getEdgeTypeBetween"); -} - -// Generate all predicates needed for CurrBlock by going through its immediate -// predecessor blocks. -void VPlanPredicator::createOrPropagatePredicates(VPBlockBase *CurrBlock, - VPRegionBlock *Region) { - // Blocks that dominate region exit inherit the predicate from the region. - // Return after setting the predicate. - if (VPDomTree.dominates(CurrBlock, Region->getExit())) { - VPValue *RegionBP = Region->getPredicate(); - CurrBlock->setPredicate(RegionBP); - return; - } - - // Collect all incoming predicates in a worklist. - std::list IncomingPredicates; - - // Set the builder's insertion point to the top of the current BB - VPBasicBlock *CurrBB = cast(CurrBlock->getEntryBasicBlock()); - Builder.setInsertPoint(CurrBB, CurrBB->begin()); - - // For each predecessor, generate the VPInstructions required for - // computing 'BP AND (not) CBV" at the top of CurrBB. - // Collect the outcome of this calculation for all predecessors - // into IncomingPredicates. - for (VPBlockBase *PredBlock : CurrBlock->getPredecessors()) { - // Skip back-edges - if (VPBlockUtils::isBackEdge(PredBlock, CurrBlock, VPLI)) - continue; - - VPValue *IncomingPredicate = nullptr; - unsigned NumPredSuccsNoBE = - VPBlockUtils::countSuccessorsNoBE(PredBlock, VPLI); - - // If there is an unconditional branch to the currBB, then we don't create - // edge predicates. We use the predecessor's block predicate instead. - if (NumPredSuccsNoBE == 1) - IncomingPredicate = PredBlock->getPredicate(); - else if (NumPredSuccsNoBE == 2) { - // Emit recipes into CurrBlock if required - assert(isa(PredBlock) && "Only BBs have multiple exits"); - IncomingPredicate = - getOrCreateNotPredicate(cast(PredBlock), CurrBB); - } else - llvm_unreachable("FIXME: switch statement ?"); - - if (IncomingPredicate) - IncomingPredicates.push_back(IncomingPredicate); - } - - // Logically OR all incoming predicates by building the Predicate Tree. - VPValue *Predicate = genPredicateTree(IncomingPredicates); - - // Now update the block's predicate with the new one. - CurrBlock->setPredicate(Predicate); -} - -// Generate all predicates needed for Region. -void VPlanPredicator::predicateRegionRec(VPRegionBlock *Region) { - VPBasicBlock *EntryBlock = cast(Region->getEntry()); - ReversePostOrderTraversal RPOT(EntryBlock); - - // Generate edge predicates and append them to the block predicate. RPO is - // necessary since the predecessor blocks' block predicate needs to be set - // before the current block's block predicate can be computed. - for (VPBlockBase *Block : RPOT) { - // TODO: Handle nested regions once we start generating the same. - assert(!isa(Block) && "Nested region not expected"); - createOrPropagatePredicates(Block, Region); - } -} - -// Linearize the CFG within Region. -// TODO: Predication and linearization need RPOT for every region. -// This traversal is expensive. Since predication is not adding new -// blocks, we should be able to compute RPOT once in predication and -// reuse it here. This becomes even more important once we have nested -// regions. -void VPlanPredicator::linearizeRegionRec(VPRegionBlock *Region) { - ReversePostOrderTraversal RPOT(Region->getEntry()); - VPBlockBase *PrevBlock = nullptr; - - for (VPBlockBase *CurrBlock : RPOT) { - // TODO: Handle nested regions once we start generating the same. - assert(!isa(CurrBlock) && "Nested region not expected"); - - // Linearize control flow by adding an unconditional edge between PrevBlock - // and CurrBlock skipping loop headers and latches to keep intact loop - // header predecessors and loop latch successors. - if (PrevBlock && !VPLI->isLoopHeader(CurrBlock) && - !VPBlockUtils::blockIsLoopLatch(PrevBlock, VPLI)) { - - LLVM_DEBUG(dbgs() << "Linearizing: " << PrevBlock->getName() << "->" - << CurrBlock->getName() << "\n"); - - PrevBlock->clearSuccessors(); - CurrBlock->clearPredecessors(); - VPBlockUtils::connectBlocks(PrevBlock, CurrBlock); - } - - PrevBlock = CurrBlock; - } -} - -// Entry point. The driver function for the predicator. -void VPlanPredicator::predicate() { - // Predicate the blocks within Region. - predicateRegionRec(cast(Plan.getEntry())); - - // Linearlize the blocks with Region. - linearizeRegionRec(cast(Plan.getEntry())); -} - -VPlanPredicator::VPlanPredicator(VPlan &Plan) - : Plan(Plan), VPLI(&(Plan.getVPLoopInfo())) { - // FIXME: Predicator is currently computing the dominator information for the - // top region. Once we start storing dominator information in a VPRegionBlock, - // we can avoid this recalculation. - VPDomTree.recalculate(*(cast(Plan.getEntry()))); -} diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.h b/llvm/lib/Transforms/Vectorize/VPlanPredicator.h deleted file mode 100644 index a5db9a54da3c..000000000000 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.h +++ /dev/null @@ -1,74 +0,0 @@ -//===-- VPlanPredicator.h ---------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file defines the VPlanPredicator class which contains the public -/// interfaces to predicate and linearize the VPlan region. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H -#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H - -#include "LoopVectorizationPlanner.h" -#include "VPlan.h" -#include "VPlanDominatorTree.h" - -namespace llvm { - -class VPlanPredicator { -private: - enum class EdgeType { - TRUE_EDGE, - FALSE_EDGE, - }; - - // VPlan being predicated. - VPlan &Plan; - - // VPLoopInfo for Plan's HCFG. - VPLoopInfo *VPLI; - - // Dominator tree for Plan's HCFG. - VPDominatorTree VPDomTree; - - // VPlan builder used to generate VPInstructions for block predicates. - VPBuilder Builder; - - /// Get the type of edge from \p FromBlock to \p ToBlock. Returns TRUE_EDGE if - /// \p ToBlock is either the unconditional successor or the conditional true - /// successor of \p FromBlock and FALSE_EDGE otherwise. - EdgeType getEdgeTypeBetween(VPBlockBase *FromBlock, VPBlockBase *ToBlock); - - /// Create and return VPValue corresponding to the predicate for the edge from - /// \p PredBB to \p CurrentBlock. - VPValue *getOrCreateNotPredicate(VPBasicBlock *PredBB, VPBasicBlock *CurrBB); - - /// Generate and return the result of ORing all the predicate VPValues in \p - /// Worklist. - VPValue *genPredicateTree(std::list &Worklist); - - /// Create or propagate predicate for \p CurrBlock in region \p Region using - /// predicate(s) of its predecessor(s) - void createOrPropagatePredicates(VPBlockBase *CurrBlock, - VPRegionBlock *Region); - - /// Predicate the CFG within \p Region. - void predicateRegionRec(VPRegionBlock *Region); - - /// Linearize the CFG within \p Region. - void linearizeRegionRec(VPRegionBlock *Region); - -public: - VPlanPredicator(VPlan &Plan); - - /// Predicate Plan's HCFG. - void predicate(); -}; -} // end namespace llvm -#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp new file mode 100644 index 000000000000..92422b17457c --- /dev/null +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -0,0 +1,840 @@ +//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains implementations for different VPlan recipes. +/// +//===----------------------------------------------------------------------===// + +#include "VPlan.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/IVDescriptors.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include + +using namespace llvm; + +extern cl::opt EnableVPlanNativePath; + +bool VPRecipeBase::mayWriteToMemory() const { + switch (getVPDefID()) { + case VPWidenMemoryInstructionSC: { + return cast(this)->isStore(); + } + case VPReplicateSC: + case VPWidenCallSC: + return cast(getVPSingleValue()->getUnderlyingValue()) + ->mayWriteToMemory(); + case VPBranchOnMaskSC: + return false; + case VPWidenIntOrFpInductionSC: + case VPWidenCanonicalIVSC: + case VPWidenPHISC: + case VPBlendSC: + case VPWidenSC: + case VPWidenGEPSC: + case VPReductionSC: + case VPWidenSelectSC: { + const Instruction *I = + dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); + (void)I; + assert((!I || !I->mayWriteToMemory()) && + "underlying instruction may write to memory"); + return false; + } + default: + return true; + } +} + +bool VPRecipeBase::mayReadFromMemory() const { + switch (getVPDefID()) { + case VPWidenMemoryInstructionSC: { + return !cast(this)->isStore(); + } + case VPReplicateSC: + case VPWidenCallSC: + return cast(getVPSingleValue()->getUnderlyingValue()) + ->mayReadFromMemory(); + case VPBranchOnMaskSC: + return false; + case VPWidenIntOrFpInductionSC: + case VPWidenCanonicalIVSC: + case VPWidenPHISC: + case VPBlendSC: + case VPWidenSC: + case VPWidenGEPSC: + case VPReductionSC: + case VPWidenSelectSC: { + const Instruction *I = + dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); + (void)I; + assert((!I || !I->mayReadFromMemory()) && + "underlying instruction may read from memory"); + return false; + } + default: + return true; + } +} + +bool VPRecipeBase::mayHaveSideEffects() const { + switch (getVPDefID()) { + case VPWidenIntOrFpInductionSC: + case VPWidenPointerInductionSC: + case VPWidenCanonicalIVSC: + case VPWidenPHISC: + case VPBlendSC: + case VPWidenSC: + case VPWidenGEPSC: + case VPReductionSC: + case VPWidenSelectSC: + case VPScalarIVStepsSC: { + const Instruction *I = + dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); + (void)I; + assert((!I || !I->mayHaveSideEffects()) && + "underlying instruction has side-effects"); + return false; + } + case VPReplicateSC: { + auto *R = cast(this); + return R->getUnderlyingInstr()->mayHaveSideEffects(); + } + default: + return true; + } +} + +void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { + auto Lane = VPLane::getLastLaneForVF(State.VF); + VPValue *ExitValue = getOperand(0); + if (Plan.isUniformAfterVectorization(ExitValue)) + Lane = VPLane::getFirstLane(); + Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), + State.Builder.GetInsertBlock()); +} + +void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { + assert(!Parent && "Recipe already in some VPBasicBlock"); + assert(InsertPos->getParent() && + "Insertion position not in any VPBasicBlock"); + Parent = InsertPos->getParent(); + Parent->getRecipeList().insert(InsertPos->getIterator(), this); +} + +void VPRecipeBase::insertBefore(VPBasicBlock &BB, + iplist::iterator I) { + assert(!Parent && "Recipe already in some VPBasicBlock"); + assert(I == BB.end() || I->getParent() == &BB); + Parent = &BB; + BB.getRecipeList().insert(I, this); +} + +void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { + assert(!Parent && "Recipe already in some VPBasicBlock"); + assert(InsertPos->getParent() && + "Insertion position not in any VPBasicBlock"); + Parent = InsertPos->getParent(); + Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); +} + +void VPRecipeBase::removeFromParent() { + assert(getParent() && "Recipe not in any VPBasicBlock"); + getParent()->getRecipeList().remove(getIterator()); + Parent = nullptr; +} + +iplist::iterator VPRecipeBase::eraseFromParent() { + assert(getParent() && "Recipe not in any VPBasicBlock"); + return getParent()->getRecipeList().erase(getIterator()); +} + +void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { + removeFromParent(); + insertAfter(InsertPos); +} + +void VPRecipeBase::moveBefore(VPBasicBlock &BB, + iplist::iterator I) { + removeFromParent(); + insertBefore(BB, I); +} + +void VPInstruction::generateInstruction(VPTransformState &State, + unsigned Part) { + IRBuilderBase &Builder = State.Builder; + Builder.SetCurrentDebugLocation(DL); + + if (Instruction::isBinaryOp(getOpcode())) { + Value *A = State.get(getOperand(0), Part); + Value *B = State.get(getOperand(1), Part); + Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B); + State.set(this, V, Part); + return; + } + + switch (getOpcode()) { + case VPInstruction::Not: { + Value *A = State.get(getOperand(0), Part); + Value *V = Builder.CreateNot(A); + State.set(this, V, Part); + break; + } + case VPInstruction::ICmpULE: { + Value *IV = State.get(getOperand(0), Part); + Value *TC = State.get(getOperand(1), Part); + Value *V = Builder.CreateICmpULE(IV, TC); + State.set(this, V, Part); + break; + } + case Instruction::Select: { + Value *Cond = State.get(getOperand(0), Part); + Value *Op1 = State.get(getOperand(1), Part); + Value *Op2 = State.get(getOperand(2), Part); + Value *V = Builder.CreateSelect(Cond, Op1, Op2); + State.set(this, V, Part); + break; + } + case VPInstruction::ActiveLaneMask: { + // Get first lane of vector induction variable. + Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); + // Get the original loop tripcount. + Value *ScalarTC = State.get(getOperand(1), Part); + + auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); + auto *PredTy = VectorType::get(Int1Ty, State.VF); + Instruction *Call = Builder.CreateIntrinsic( + Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, + {VIVElem0, ScalarTC}, nullptr, "active.lane.mask"); + State.set(this, Call, Part); + break; + } + case VPInstruction::FirstOrderRecurrenceSplice: { + // Generate code to combine the previous and current values in vector v3. + // + // vector.ph: + // v_init = vector(..., ..., ..., a[-1]) + // br vector.body + // + // vector.body + // i = phi [0, vector.ph], [i+4, vector.body] + // v1 = phi [v_init, vector.ph], [v2, vector.body] + // v2 = a[i, i+1, i+2, i+3]; + // v3 = vector(v1(3), v2(0, 1, 2)) + + // For the first part, use the recurrence phi (v1), otherwise v2. + auto *V1 = State.get(getOperand(0), 0); + Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); + if (!PartMinus1->getType()->isVectorTy()) { + State.set(this, PartMinus1, Part); + } else { + Value *V2 = State.get(getOperand(1), Part); + State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part); + } + break; + } + case VPInstruction::CanonicalIVIncrement: + case VPInstruction::CanonicalIVIncrementNUW: { + Value *Next = nullptr; + if (Part == 0) { + bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW; + auto *Phi = State.get(getOperand(0), 0); + // The loop step is equal to the vectorization factor (num of SIMD + // elements) times the unroll factor (num of SIMD instructions). + Value *Step = + createStepForVF(Builder, Phi->getType(), State.VF, State.UF); + Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false); + } else { + Next = State.get(this, 0); + } + + State.set(this, Next, Part); + break; + } + case VPInstruction::BranchOnCond: { + if (Part != 0) + break; + + Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); + VPRegionBlock *ParentRegion = getParent()->getParent(); + VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); + + // Replace the temporary unreachable terminator with a new conditional + // branch, hooking it up to backward destination for exiting blocks now and + // to forward destination(s) later when they are created. + BranchInst *CondBr = + Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); + + if (getParent()->isExiting()) + CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); + + CondBr->setSuccessor(0, nullptr); + Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); + break; + } + case VPInstruction::BranchOnCount: { + if (Part != 0) + break; + // First create the compare. + Value *IV = State.get(getOperand(0), Part); + Value *TC = State.get(getOperand(1), Part); + Value *Cond = Builder.CreateICmpEQ(IV, TC); + + // Now create the branch. + auto *Plan = getParent()->getPlan(); + VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); + VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); + + // Replace the temporary unreachable terminator with a new conditional + // branch, hooking it up to backward destination (the header) now and to the + // forward destination (the exit/middle block) later when it is created. + // Note that CreateCondBr expects a valid BB as first argument, so we need + // to set it to nullptr later. + BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), + State.CFG.VPBB2IRBB[Header]); + CondBr->setSuccessor(0, nullptr); + Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); + break; + } + default: + llvm_unreachable("Unsupported opcode for instruction"); + } +} + +void VPInstruction::execute(VPTransformState &State) { + assert(!State.Instance && "VPInstruction executing an Instance"); + IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); + State.Builder.setFastMathFlags(FMF); + for (unsigned Part = 0; Part < State.UF; ++Part) + generateInstruction(State, Part); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPInstruction::dump() const { + VPSlotTracker SlotTracker(getParent()->getPlan()); + print(dbgs(), "", SlotTracker); +} + +void VPInstruction::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + + if (hasResult()) { + printAsOperand(O, SlotTracker); + O << " = "; + } + + switch (getOpcode()) { + case VPInstruction::Not: + O << "not"; + break; + case VPInstruction::ICmpULE: + O << "icmp ule"; + break; + case VPInstruction::SLPLoad: + O << "combined load"; + break; + case VPInstruction::SLPStore: + O << "combined store"; + break; + case VPInstruction::ActiveLaneMask: + O << "active lane mask"; + break; + case VPInstruction::FirstOrderRecurrenceSplice: + O << "first-order splice"; + break; + case VPInstruction::CanonicalIVIncrement: + O << "VF * UF + "; + break; + case VPInstruction::CanonicalIVIncrementNUW: + O << "VF * UF +(nuw) "; + break; + case VPInstruction::BranchOnCond: + O << "branch-on-cond"; + break; + case VPInstruction::BranchOnCount: + O << "branch-on-count "; + break; + default: + O << Instruction::getOpcodeName(getOpcode()); + } + + O << FMF; + + for (const VPValue *Operand : operands()) { + O << " "; + Operand->printAsOperand(O, SlotTracker); + } + + if (DL) { + O << ", !dbg "; + DL.print(O); + } +} +#endif + +void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { + // Make sure the VPInstruction is a floating-point operation. + assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul || + Opcode == Instruction::FNeg || Opcode == Instruction::FSub || + Opcode == Instruction::FDiv || Opcode == Instruction::FRem || + Opcode == Instruction::FCmp) && + "this op can't take fast-math flags"); + FMF = FMFNew; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "WIDEN-CALL "; + + auto *CI = cast(getUnderlyingInstr()); + if (CI->getType()->isVoidTy()) + O << "void "; + else { + printAsOperand(O, SlotTracker); + O << " = "; + } + + O << "call @" << CI->getCalledFunction()->getName() << "("; + printOperands(O, SlotTracker); + O << ")"; +} + +void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "WIDEN-SELECT "; + printAsOperand(O, SlotTracker); + O << " = select "; + getOperand(0)->printAsOperand(O, SlotTracker); + O << ", "; + getOperand(1)->printAsOperand(O, SlotTracker); + O << ", "; + getOperand(2)->printAsOperand(O, SlotTracker); + O << (InvariantCond ? " (condition is loop invariant)" : ""); +} + +void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "WIDEN "; + printAsOperand(O, SlotTracker); + O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; + printOperands(O, SlotTracker); +} + +void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "WIDEN-INDUCTION"; + if (getTruncInst()) { + O << "\\l\""; + O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; + O << " +\n" << Indent << "\" "; + getVPValue(0)->printAsOperand(O, SlotTracker); + } else + O << " " << VPlanIngredient(IV); + + O << ", "; + getStepValue()->printAsOperand(O, SlotTracker); +} +#endif + +bool VPWidenIntOrFpInductionRecipe::isCanonical() const { + auto *StartC = dyn_cast(getStartValue()->getLiveInIRValue()); + auto *StepC = dyn_cast(getInductionDescriptor().getStep()); + return StartC && StartC->isZero() && StepC && StepC->isOne(); +} + +VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const { + return cast(getOperand(0)); +} + +bool VPScalarIVStepsRecipe::isCanonical() const { + auto *CanIV = getCanonicalIV(); + // The start value of the steps-recipe must match the start value of the + // canonical induction and it must step by 1. + if (CanIV->getStartValue() != getStartValue()) + return false; + auto *StepVPV = getStepValue(); + if (StepVPV->getDef()) + return false; + auto *StepC = dyn_cast_or_null(StepVPV->getLiveInIRValue()); + return StepC && StepC->isOne(); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent; + printAsOperand(O, SlotTracker); + O << Indent << "= SCALAR-STEPS "; + printOperands(O, SlotTracker); +} + +void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "WIDEN-GEP "; + O << (IsPtrLoopInvariant ? "Inv" : "Var"); + size_t IndicesNumber = IsIndexLoopInvariant.size(); + for (size_t I = 0; I < IndicesNumber; ++I) + O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]"; + + O << " "; + printAsOperand(O, SlotTracker); + O << " = getelementptr "; + printOperands(O, SlotTracker); +} + +void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "BLEND "; + Phi->printAsOperand(O, false); + O << " ="; + if (getNumIncomingValues() == 1) { + // Not a User of any mask: not really blending, this is a + // single-predecessor phi. + O << " "; + getIncomingValue(0)->printAsOperand(O, SlotTracker); + } else { + for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { + O << " "; + getIncomingValue(I)->printAsOperand(O, SlotTracker); + O << "/"; + getMask(I)->printAsOperand(O, SlotTracker); + } + } +} + +void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "REDUCE "; + printAsOperand(O, SlotTracker); + O << " = "; + getChainOp()->printAsOperand(O, SlotTracker); + O << " +"; + if (isa(getUnderlyingInstr())) + O << getUnderlyingInstr()->getFastMathFlags(); + O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " ("; + getVecOp()->printAsOperand(O, SlotTracker); + if (getCondOp()) { + O << ", "; + getCondOp()->printAsOperand(O, SlotTracker); + } + O << ")"; + if (RdxDesc->IntermediateStore) + O << " (with final reduction value stored in invariant address sank " + "outside of loop)"; +} + +void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); + + if (!getUnderlyingInstr()->getType()->isVoidTy()) { + printAsOperand(O, SlotTracker); + O << " = "; + } + if (auto *CB = dyn_cast(getUnderlyingInstr())) { + O << "call @" << CB->getCalledFunction()->getName() << "("; + interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), + O, [&O, &SlotTracker](VPValue *Op) { + Op->printAsOperand(O, SlotTracker); + }); + O << ")"; + } else { + O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; + printOperands(O, SlotTracker); + } + + if (AlsoPack) + O << " (S->V)"; +} + +void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "PHI-PREDICATED-INSTRUCTION "; + printAsOperand(O, SlotTracker); + O << " = "; + printOperands(O, SlotTracker); +} + +void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "WIDEN "; + + if (!isStore()) { + getVPSingleValue()->printAsOperand(O, SlotTracker); + O << " = "; + } + O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; + + printOperands(O, SlotTracker); +} +#endif + +void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { + Value *Start = getStartValue()->getLiveInIRValue(); + PHINode *EntryPart = PHINode::Create( + Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); + + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); + EntryPart->addIncoming(Start, VectorPH); + EntryPart->setDebugLoc(DL); + for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) + State.set(this, EntryPart, Part); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + printAsOperand(O, SlotTracker); + O << " = CANONICAL-INDUCTION"; +} +#endif + +bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) { + bool IsUniform = vputils::onlyFirstLaneUsed(this); + return all_of(users(), + [&](const VPUser *U) { return U->usesScalars(this); }) && + (IsUniform || !VF.isScalable()); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + printAsOperand(O, SlotTracker); + O << " = WIDEN-POINTER-INDUCTION "; + getStartValue()->printAsOperand(O, SlotTracker); + O << ", " << *IndDesc.getStep(); +} +#endif + +void VPExpandSCEVRecipe::execute(VPTransformState &State) { + assert(!State.Instance && "cannot be used in per-lane"); + const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); + SCEVExpander Exp(SE, DL, "induction"); + + Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), + &*State.Builder.GetInsertPoint()); + + for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) + State.set(this, Res, Part); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + getVPSingleValue()->printAsOperand(O, SlotTracker); + O << " = EXPAND SCEV " << *Expr; +} +#endif + +void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { + Value *CanonicalIV = State.get(getOperand(0), 0); + Type *STy = CanonicalIV->getType(); + IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); + ElementCount VF = State.VF; + Value *VStart = VF.isScalar() + ? CanonicalIV + : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); + for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { + Value *VStep = createStepForVF(Builder, STy, VF, Part); + if (VF.isVector()) { + VStep = Builder.CreateVectorSplat(VF, VStep); + VStep = + Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); + } + Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); + State.set(this, CanonicalVectorIV, Part); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + printAsOperand(O, SlotTracker); + O << " = WIDEN-CANONICAL-INDUCTION "; + printOperands(O, SlotTracker); +} +#endif + +void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { + auto &Builder = State.Builder; + // Create a vector from the initial value. + auto *VectorInit = getStartValue()->getLiveInIRValue(); + + Type *VecTy = State.VF.isScalar() + ? VectorInit->getType() + : VectorType::get(VectorInit->getType(), State.VF); + + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); + if (State.VF.isVector()) { + auto *IdxTy = Builder.getInt32Ty(); + auto *One = ConstantInt::get(IdxTy, 1); + IRBuilder<>::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(VectorPH->getTerminator()); + auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); + auto *LastIdx = Builder.CreateSub(RuntimeVF, One); + VectorInit = Builder.CreateInsertElement( + PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); + } + + // Create a phi node for the new recurrence. + PHINode *EntryPart = PHINode::Create( + VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); + EntryPart->addIncoming(VectorInit, VectorPH); + State.set(this, EntryPart, 0); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; + printAsOperand(O, SlotTracker); + O << " = phi "; + printOperands(O, SlotTracker); +} +#endif + +void VPReductionPHIRecipe::execute(VPTransformState &State) { + PHINode *PN = cast(getUnderlyingValue()); + auto &Builder = State.Builder; + + // In order to support recurrences we need to be able to vectorize Phi nodes. + // Phi nodes have cycles, so we need to vectorize them in two stages. This is + // stage #1: We create a new vector PHI node with no incoming edges. We'll use + // this value when we vectorize all of the instructions that use the PHI. + bool ScalarPHI = State.VF.isScalar() || IsInLoop; + Type *VecTy = + ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); + + BasicBlock *HeaderBB = State.CFG.PrevBB; + assert(State.CurrentVectorLoop->getHeader() == HeaderBB && + "recipe must be in the vector loop header"); + unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; + for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { + Value *EntryPart = + PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt()); + State.set(this, EntryPart, Part); + } + + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); + + // Reductions do not have to start at zero. They can start with + // any loop invariant values. + VPValue *StartVPV = getStartValue(); + Value *StartV = StartVPV->getLiveInIRValue(); + + Value *Iden = nullptr; + RecurKind RK = RdxDesc.getRecurrenceKind(); + if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || + RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) { + // MinMax reduction have the start value as their identify. + if (ScalarPHI) { + Iden = StartV; + } else { + IRBuilderBase::InsertPointGuard IPBuilder(Builder); + Builder.SetInsertPoint(VectorPH->getTerminator()); + StartV = Iden = + Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); + } + } else { + Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), + RdxDesc.getFastMathFlags()); + + if (!ScalarPHI) { + Iden = Builder.CreateVectorSplat(State.VF, Iden); + IRBuilderBase::InsertPointGuard IPBuilder(Builder); + Builder.SetInsertPoint(VectorPH->getTerminator()); + Constant *Zero = Builder.getInt32(0); + StartV = Builder.CreateInsertElement(Iden, StartV, Zero); + } + } + + for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { + Value *EntryPart = State.get(this, Part); + // Make sure to add the reduction start value only to the + // first unroll part. + Value *StartVal = (Part == 0) ? StartV : Iden; + cast(EntryPart)->addIncoming(StartVal, VectorPH); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "WIDEN-REDUCTION-PHI "; + + printAsOperand(O, SlotTracker); + O << " = phi "; + printOperands(O, SlotTracker); +} +#endif + +void VPWidenPHIRecipe::execute(VPTransformState &State) { + assert(EnableVPlanNativePath && + "Non-native vplans are not expected to have VPWidenPHIRecipes."); + + // Currently we enter here in the VPlan-native path for non-induction + // PHIs where all control flow is uniform. We simply widen these PHIs. + // Create a vector phi with no operands - the vector phi operands will be + // set at the end of vector code generation. + VPBasicBlock *Parent = getParent(); + VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion(); + unsigned StartIdx = 0; + // For phis in header blocks of loop regions, use the index of the value + // coming from the preheader. + if (LoopRegion->getEntryBasicBlock() == Parent) { + for (unsigned I = 0; I < getNumOperands(); ++I) { + if (getIncomingBlock(I) == + LoopRegion->getSinglePredecessor()->getExitingBasicBlock()) + StartIdx = I; + } + } + Value *Op0 = State.get(getOperand(StartIdx), 0); + Type *VecTy = Op0->getType(); + Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); + State.set(this, VecPhi, 0); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "WIDEN-PHI "; + + auto *OriginalPhi = cast(getUnderlyingValue()); + // Unless all incoming values are modeled in VPlan print the original PHI + // directly. + // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming + // values as VPValues. + if (getNumOperands() != OriginalPhi->getNumOperands()) { + O << VPlanIngredient(OriginalPhi); + return; + } + + printAsOperand(O, SlotTracker); + O << " = phi "; + printOperands(O, SlotTracker); +} +#endif diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp index 9e19e172dea5..3a7e77fd9efd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp @@ -15,16 +15,10 @@ //===----------------------------------------------------------------------===// #include "VPlan.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/PostOrderIterator.h" +#include "VPlanValue.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/VectorUtils.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" @@ -32,12 +26,9 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include #include -#include #include using namespace llvm; @@ -396,7 +387,7 @@ VPInstruction *VPlanSlp::buildGraph(ArrayRef Values) { return markFailed(); assert(getOpcode(Values) && "Opcodes for all values must match"); - unsigned ValuesOpcode = getOpcode(Values).getValue(); + unsigned ValuesOpcode = *getOpcode(Values); SmallVector CombinedOperands; if (areCommutative(Values)) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 70ce773a8a85..cca484e13bf1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -13,6 +13,8 @@ #include "VPlanTransforms.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/IVDescriptors.h" using namespace llvm; @@ -22,17 +24,15 @@ void VPlanTransforms::VPInstructionsToVPRecipes( GetIntOrFpInductionDescriptor, SmallPtrSetImpl &DeadInstructions, ScalarEvolution &SE) { - auto *TopRegion = cast(Plan->getEntry()); - ReversePostOrderTraversal RPOT(TopRegion->getEntry()); - - for (VPBlockBase *Base : RPOT) { - // Do not widen instructions in pre-header and exit blocks. - if (Base->getNumPredecessors() == 0 || Base->getNumSuccessors() == 0) - continue; - - VPBasicBlock *VPBB = Base->getEntryBasicBlock(); + ReversePostOrderTraversal> + RPOT(Plan->getEntry()); + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(RPOT)) { + VPRecipeBase *Term = VPBB->getTerminator(); + auto EndIter = Term ? Term->getIterator() : VPBB->end(); // Introduce each ingredient into VPlan. - for (VPRecipeBase &Ingredient : llvm::make_early_inc_range(*VPBB)) { + for (VPRecipeBase &Ingredient : + make_early_inc_range(make_range(VPBB->begin(), EndIter))) { + VPValue *VPV = Ingredient.getVPSingleValue(); Instruction *Inst = cast(VPV->getUnderlyingValue()); if (DeadInstructions.count(Inst)) { @@ -47,8 +47,10 @@ void VPlanTransforms::VPInstructionsToVPRecipes( auto *Phi = cast(VPPhi->getUnderlyingValue()); if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) { VPValue *Start = Plan->getOrAddVPValue(II->getStartValue()); + VPValue *Step = + vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE); NewRecipe = - new VPWidenIntOrFpInductionRecipe(Phi, Start, *II, false, true); + new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II, true); } else { Plan->addVPValue(Phi, VPPhi); continue; @@ -295,14 +297,19 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) { } void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) { - SmallVector> CastsToRemove; - for (auto &Phi : Plan.getEntry()->getEntryBasicBlock()->phis()) { + for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { auto *IV = dyn_cast(&Phi); if (!IV || IV->getTruncInst()) continue; - // Visit all casts connected to IV and in Casts. Collect them. - // remember them for removal. + // A sequence of IR Casts has potentially been recorded for IV, which + // *must be bypassed* when the IV is vectorized, because the vectorized IV + // will produce the desired casted value. This sequence forms a def-use + // chain and is provided in reverse order, ending with the cast that uses + // the IV phi. Search for the recipe of the last cast in the chain and + // replace it with the original IV. Note that only the final cast is + // expected to have users outside the cast-chain and the dead casts left + // over will be cleaned up later. auto &Casts = IV->getInductionDescriptor().getCastInsts(); VPValue *FindMyCast = IV; for (Instruction *IRCast : reverse(Casts)) { @@ -315,14 +322,9 @@ void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) { break; } } - assert(FoundUserCast && "Missing a cast to remove"); - CastsToRemove.emplace_back(FoundUserCast, IV); FindMyCast = FoundUserCast->getVPSingleValue(); } - } - for (auto &E : CastsToRemove) { - E.first->getVPSingleValue()->replaceAllUsesWith(E.second); - E.first->eraseFromParent(); + FindMyCast->replaceAllUsesWith(IV); } } @@ -358,3 +360,73 @@ void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) { } } } + +void VPlanTransforms::removeDeadRecipes(VPlan &Plan) { + ReversePostOrderTraversal> + RPOT(Plan.getEntry()); + + for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly(RPOT))) { + // The recipes in the block are processed in reverse order, to catch chains + // of dead recipes. + for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) { + if (R.mayHaveSideEffects() || any_of(R.definedValues(), [](VPValue *V) { + return V->getNumUsers() > 0; + })) + continue; + R.eraseFromParent(); + } + } +} + +void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { + SmallVector ToRemove; + VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1)); + for (VPRecipeBase &Phi : HeaderVPBB->phis()) { + auto *IV = dyn_cast(&Phi); + if (!IV) + continue; + if (HasOnlyVectorVFs && + none_of(IV->users(), [IV](VPUser *U) { return U->usesScalars(IV); })) + continue; + + const InductionDescriptor &ID = IV->getInductionDescriptor(); + VPValue *Step = + vputils::getOrCreateVPValueForSCEVExpr(Plan, ID.getStep(), SE); + Instruction *TruncI = IV->getTruncInst(); + VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe( + IV->getPHINode()->getType(), ID, Plan.getCanonicalIV(), + IV->getStartValue(), Step, TruncI ? TruncI->getType() : nullptr); + HeaderVPBB->insert(Steps, HeaderVPBB->getFirstNonPhi()); + + // Update scalar users of IV to use Step instead. Use SetVector to ensure + // the list of users doesn't contain duplicates. + SetVector Users(IV->user_begin(), IV->user_end()); + for (VPUser *U : Users) { + if (HasOnlyVectorVFs && !U->usesScalars(IV)) + continue; + for (unsigned I = 0, E = U->getNumOperands(); I != E; I++) { + if (U->getOperand(I) != IV) + continue; + U->setOperand(I, Steps); + } + } + } +} + +void VPlanTransforms::removeRedundantExpandSCEVRecipes(VPlan &Plan) { + DenseMap SCEV2VPV; + + for (VPRecipeBase &R : + make_early_inc_range(*Plan.getEntry()->getEntryBasicBlock())) { + auto *ExpR = dyn_cast(&R); + if (!ExpR) + continue; + + auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR}); + if (I.second) + continue; + ExpR->replaceAllUsesWith(I.first->second); + ExpR->eraseFromParent(); + } +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index e74409a86466..3372e255dff7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -14,8 +14,7 @@ #define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H #include "VPlan.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" +#include "llvm/ADT/STLFunctionalExtras.h" namespace llvm { @@ -23,6 +22,7 @@ class InductionDescriptor; class Instruction; class PHINode; class ScalarEvolution; +class Loop; struct VPlanTransforms { /// Replaces the VPInstructions in \p Plan with corresponding @@ -49,6 +49,18 @@ struct VPlanTransforms { /// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV /// recipe, if it exists. static void removeRedundantCanonicalIVs(VPlan &Plan); + + static void removeDeadRecipes(VPlan &Plan); + + /// If any user of a VPWidenIntOrFpInductionRecipe needs scalar values, + /// provide them by building scalar steps off of the canonical scalar IV and + /// update the original IV's users. This is an optional optimization to reduce + /// the needs of vector extracts. + static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE); + + /// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing + /// them with already existing recipes expanding the same SCEV expression. + static void removeRedundantExpandSCEVRecipes(VPlan &Plan); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 5296d2b9485c..5fc676834331 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -106,6 +106,7 @@ public: VPVFirstOrderRecurrencePHISC, VPVWidenPHISC, VPVWidenIntOrFpInductionSC, + VPVWidenPointerInductionSC, VPVPredInstPHI, VPVReductionPHISC, }; @@ -207,9 +208,7 @@ public: /// Subclass identifier (for isa/dyn_cast). enum class VPUserID { Recipe, - // TODO: Currently VPUsers are used in VPBlockBase, but in the future the - // only VPUsers should either be recipes or live-outs. - Block + LiveOut, }; private: @@ -286,6 +285,22 @@ public: /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPDef *Recipe); + + /// Returns true if the VPUser uses scalars of operand \p Op. Conservatively + /// returns if only first (scalar) lane is used, as default. + virtual bool usesScalars(const VPValue *Op) const { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return onlyFirstLaneUsed(Op); + } + + /// Returns true if the VPUser only uses the first lane of operand \p Op. + /// Conservatively returns false. + virtual bool onlyFirstLaneUsed(const VPValue *Op) const { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return false; + } }; /// This class augments a recipe with a set of VPValues defined by the recipe. @@ -327,10 +342,12 @@ public: /// type identification. using VPRecipeTy = enum { VPBranchOnMaskSC, + VPExpandSCEVSC, VPInstructionSC, VPInterleaveSC, VPReductionSC, VPReplicateSC, + VPScalarIVStepsSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenGEPSC, @@ -344,6 +361,7 @@ public: VPFirstOrderRecurrencePHISC, VPWidenPHISC, VPWidenIntOrFpInductionSC, + VPWidenPointerInductionSC, VPPredInstPHISC, VPReductionPHISC, VPFirstPHISC = VPBlendSC, diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index d36f250995e1..f917883145c0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -43,17 +43,20 @@ static bool hasDuplicates(const SmallVectorImpl &VPBlockVec) { /// \p Region. Checks in this function are generic for VPBlockBases. They are /// not specific for VPBasicBlocks or VPRegionBlocks. static void verifyBlocksInRegion(const VPRegionBlock *Region) { - for (const VPBlockBase *VPB : - make_range(df_iterator::begin(Region->getEntry()), - df_iterator::end(Region->getExit()))) { + for (const VPBlockBase *VPB : make_range( + df_iterator::begin(Region->getEntry()), + df_iterator::end(Region->getExiting()))) { // Check block's parent. assert(VPB->getParent() == Region && "VPBlockBase has wrong parent"); + auto *VPBB = dyn_cast(VPB); // Check block's condition bit. - if (VPB->getNumSuccessors() > 1) - assert(VPB->getCondBit() && "Missing condition bit!"); + if (VPB->getNumSuccessors() > 1 || (VPBB && VPBB->isExiting())) + assert(VPBB && VPBB->getTerminator() && + "Block has multiple successors but doesn't " + "have a proper branch recipe!"); else - assert(!VPB->getCondBit() && "Unexpected condition bit!"); + assert((!VPBB || !VPBB->getTerminator()) && "Unexpected branch recipe!"); // Check block's successors. const auto &Successors = VPB->getSuccessors(); @@ -94,13 +97,14 @@ static void verifyBlocksInRegion(const VPRegionBlock *Region) { /// VPBlockBases. Do not recurse inside nested VPRegionBlocks. static void verifyRegion(const VPRegionBlock *Region) { const VPBlockBase *Entry = Region->getEntry(); - const VPBlockBase *Exit = Region->getExit(); + const VPBlockBase *Exiting = Region->getExiting(); - // Entry and Exit shouldn't have any predecessor/successor, respectively. + // Entry and Exiting shouldn't have any predecessor/successor, respectively. assert(!Entry->getNumPredecessors() && "Region entry has predecessors."); - assert(!Exit->getNumSuccessors() && "Region exit has successors."); + assert(!Exiting->getNumSuccessors() && + "Region exiting block has successors."); (void)Entry; - (void)Exit; + (void)Exiting; verifyBlocksInRegion(Region); } @@ -111,9 +115,9 @@ static void verifyRegionRec(const VPRegionBlock *Region) { verifyRegion(Region); // Recurse inside nested regions. - for (const VPBlockBase *VPB : - make_range(df_iterator::begin(Region->getEntry()), - df_iterator::end(Region->getExit()))) { + for (const VPBlockBase *VPB : make_range( + df_iterator::begin(Region->getEntry()), + df_iterator::end(Region->getExiting()))) { if (const auto *SubRegion = dyn_cast(VPB)) verifyRegionRec(SubRegion); } @@ -157,7 +161,7 @@ bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) { } } - const VPRegionBlock *TopRegion = cast(Plan.getEntry()); + const VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); const VPBasicBlock *Entry = dyn_cast(TopRegion->getEntry()); if (!Entry) { errs() << "VPlan entry block is not a VPBasicBlock\n"; @@ -170,19 +174,19 @@ bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) { return false; } - const VPBasicBlock *Exit = dyn_cast(TopRegion->getExit()); - if (!Exit) { - errs() << "VPlan exit block is not a VPBasicBlock\n"; + const VPBasicBlock *Exiting = dyn_cast(TopRegion->getExiting()); + if (!Exiting) { + errs() << "VPlan exiting block is not a VPBasicBlock\n"; return false; } - if (Exit->empty()) { - errs() << "VPlan vector loop exit must end with BranchOnCount " + if (Exiting->empty()) { + errs() << "VPlan vector loop exiting block must end with BranchOnCount " "VPInstruction but is empty\n"; return false; } - auto *LastInst = dyn_cast(std::prev(Exit->end())); + auto *LastInst = dyn_cast(std::prev(Exiting->end())); if (!LastInst || LastInst->getOpcode() != VPInstruction::BranchOnCount) { errs() << "VPlan vector loop exit must end with BranchOnCount " "VPInstruction\n"; @@ -197,10 +201,17 @@ bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) { errs() << "region entry block has predecessors\n"; return false; } - if (Region->getExit()->getNumSuccessors() != 0) { - errs() << "region exit block has successors\n"; + if (Region->getExiting()->getNumSuccessors() != 0) { + errs() << "region exiting block has successors\n"; return false; } } + + for (auto &KV : Plan.getLiveOuts()) + if (KV.second->getNumOperands() != 1) { + errs() << "live outs must have a single operand\n"; + return false; + } + return true; } diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 620d388199e0..90598937affc 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -103,11 +103,13 @@ private: bool foldSingleElementStore(Instruction &I); bool scalarizeLoadExtract(Instruction &I); bool foldShuffleOfBinops(Instruction &I); + bool foldShuffleFromReductions(Instruction &I); + bool foldSelectShuffle(Instruction &I, bool FromReduction = false); void replaceValue(Value &Old, Value &New) { Old.replaceAllUsesWith(&New); - New.takeName(&Old); if (auto *NewI = dyn_cast(&New)) { + New.takeName(&Old); Worklist.pushUsersToWorkList(*NewI); Worklist.pushValue(NewI); } @@ -152,12 +154,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts(); assert(isa(SrcPtr->getType()) && "Expected a pointer type"); - // If original AS != Load's AS, we can't bitcast the original pointer and have - // to use Load's operand instead. Ideally we would want to strip pointer casts - // without changing AS, but there's no API to do that ATM. unsigned AS = Load->getPointerAddressSpace(); - if (AS != SrcPtr->getType()->getPointerAddressSpace()) - SrcPtr = Load->getPointerOperand(); // We are potentially transforming byte-sized (8-bit) memory accesses, so make // sure we have all of our type-based constraints in place for this target. @@ -245,7 +242,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { // It is safe and potentially profitable to load a vector directly: // inselt undef, load Scalar, 0 --> load VecPtr IRBuilder<> Builder(Load); - Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS)); + Value *CastedPtr = Builder.CreatePointerBitCastOrAddrSpaceCast( + SrcPtr, MinVecTy->getPointerTo(AS)); Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment); VecLd = Builder.CreateShuffleVector(VecLd, Mask); @@ -259,12 +257,12 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { ExtractElementInst *VectorCombine::getShuffleExtract( ExtractElementInst *Ext0, ExtractElementInst *Ext1, unsigned PreferredExtractIndex = InvalidIndex) const { - assert(isa(Ext0->getIndexOperand()) && - isa(Ext1->getIndexOperand()) && - "Expected constant extract indexes"); + auto *Index0C = dyn_cast(Ext0->getIndexOperand()); + auto *Index1C = dyn_cast(Ext1->getIndexOperand()); + assert(Index0C && Index1C && "Expected constant extract indexes"); - unsigned Index0 = cast(Ext0->getIndexOperand())->getZExtValue(); - unsigned Index1 = cast(Ext1->getIndexOperand())->getZExtValue(); + unsigned Index0 = Index0C->getZExtValue(); + unsigned Index1 = Index1C->getZExtValue(); // If the extract indexes are identical, no shuffle is needed. if (Index0 == Index1) @@ -310,9 +308,10 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, const Instruction &I, ExtractElementInst *&ConvertToShuffle, unsigned PreferredExtractIndex) { - assert(isa(Ext0->getOperand(1)) && - isa(Ext1->getOperand(1)) && - "Expected constant extract indexes"); + auto *Ext0IndexC = dyn_cast(Ext0->getOperand(1)); + auto *Ext1IndexC = dyn_cast(Ext1->getOperand(1)); + assert(Ext0IndexC && Ext1IndexC && "Expected constant extract indexes"); + unsigned Opcode = I.getOpcode(); Type *ScalarTy = Ext0->getType(); auto *VecTy = cast(Ext0->getOperand(0)->getType()); @@ -335,8 +334,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, // Get cost estimates for the extract elements. These costs will factor into // both sequences. - unsigned Ext0Index = cast(Ext0->getOperand(1))->getZExtValue(); - unsigned Ext1Index = cast(Ext1->getOperand(1))->getZExtValue(); + unsigned Ext0Index = Ext0IndexC->getZExtValue(); + unsigned Ext1Index = Ext1IndexC->getZExtValue(); InstructionCost Extract0Cost = TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index); @@ -698,8 +697,9 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) { ScalarInst->copyIRFlags(&I); // Fold the vector constants in the original vectors into a new base vector. - Constant *NewVecC = IsCmp ? ConstantExpr::getCompare(Pred, VecC0, VecC1) - : ConstantExpr::get(Opcode, VecC0, VecC1); + Value *NewVecC = + IsCmp ? Builder.CreateCmp(Pred, VecC0, VecC1) + : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1); Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index); replaceValue(I, *Insert); return true; @@ -1019,12 +1019,8 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) { return false; NumInstChecked++; } - } - - if (!LastCheckedInst) - LastCheckedInst = UI; - else if (LastCheckedInst->comesBefore(UI)) LastCheckedInst = UI; + } auto ScalarIdx = canScalarizeAccess(FixedVT, UI->getOperand(1), &I, AC, DT); if (!ScalarIdx.isSafe()) { @@ -1121,6 +1117,339 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) { return true; } +/// Given a commutative reduction, the order of the input lanes does not alter +/// the results. We can use this to remove certain shuffles feeding the +/// reduction, removing the need to shuffle at all. +bool VectorCombine::foldShuffleFromReductions(Instruction &I) { + auto *II = dyn_cast(&I); + if (!II) + return false; + switch (II->getIntrinsicID()) { + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_umax: + break; + default: + return false; + } + + // Find all the inputs when looking through operations that do not alter the + // lane order (binops, for example). Currently we look for a single shuffle, + // and can ignore splat values. + std::queue Worklist; + SmallPtrSet Visited; + ShuffleVectorInst *Shuffle = nullptr; + if (auto *Op = dyn_cast(I.getOperand(0))) + Worklist.push(Op); + + while (!Worklist.empty()) { + Value *CV = Worklist.front(); + Worklist.pop(); + if (Visited.contains(CV)) + continue; + + // Splats don't change the order, so can be safely ignored. + if (isSplatValue(CV)) + continue; + + Visited.insert(CV); + + if (auto *CI = dyn_cast(CV)) { + if (CI->isBinaryOp()) { + for (auto *Op : CI->operand_values()) + Worklist.push(Op); + continue; + } else if (auto *SV = dyn_cast(CI)) { + if (Shuffle && Shuffle != SV) + return false; + Shuffle = SV; + continue; + } + } + + // Anything else is currently an unknown node. + return false; + } + + if (!Shuffle) + return false; + + // Check all uses of the binary ops and shuffles are also included in the + // lane-invariant operations (Visited should be the list of lanewise + // instructions, including the shuffle that we found). + for (auto *V : Visited) + for (auto *U : V->users()) + if (!Visited.contains(U) && U != &I) + return false; + + FixedVectorType *VecType = + dyn_cast(II->getOperand(0)->getType()); + if (!VecType) + return false; + FixedVectorType *ShuffleInputType = + dyn_cast(Shuffle->getOperand(0)->getType()); + if (!ShuffleInputType) + return false; + int NumInputElts = ShuffleInputType->getNumElements(); + + // Find the mask from sorting the lanes into order. This is most likely to + // become a identity or concat mask. Undef elements are pushed to the end. + SmallVector ConcatMask; + Shuffle->getShuffleMask(ConcatMask); + sort(ConcatMask, [](int X, int Y) { return (unsigned)X < (unsigned)Y; }); + bool UsesSecondVec = + any_of(ConcatMask, [&](int M) { return M >= NumInputElts; }); + InstructionCost OldCost = TTI.getShuffleCost( + UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc, VecType, + Shuffle->getShuffleMask()); + InstructionCost NewCost = TTI.getShuffleCost( + UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc, VecType, + ConcatMask); + + LLVM_DEBUG(dbgs() << "Found a reduction feeding from a shuffle: " << *Shuffle + << "\n"); + LLVM_DEBUG(dbgs() << " OldCost: " << OldCost << " vs NewCost: " << NewCost + << "\n"); + if (NewCost < OldCost) { + Builder.SetInsertPoint(Shuffle); + Value *NewShuffle = Builder.CreateShuffleVector( + Shuffle->getOperand(0), Shuffle->getOperand(1), ConcatMask); + LLVM_DEBUG(dbgs() << "Created new shuffle: " << *NewShuffle << "\n"); + replaceValue(*Shuffle, *NewShuffle); + } + + // See if we can re-use foldSelectShuffle, getting it to reduce the size of + // the shuffle into a nicer order, as it can ignore the order of the shuffles. + return foldSelectShuffle(*Shuffle, true); +} + +/// This method looks for groups of shuffles acting on binops, of the form: +/// %x = shuffle ... +/// %y = shuffle ... +/// %a = binop %x, %y +/// %b = binop %x, %y +/// shuffle %a, %b, selectmask +/// We may, especially if the shuffle is wider than legal, be able to convert +/// the shuffle to a form where only parts of a and b need to be computed. On +/// architectures with no obvious "select" shuffle, this can reduce the total +/// number of operations if the target reports them as cheaper. +bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { + auto *SVI = dyn_cast(&I); + auto *VT = dyn_cast(I.getType()); + if (!SVI || !VT) + return false; + auto *Op0 = dyn_cast(SVI->getOperand(0)); + auto *Op1 = dyn_cast(SVI->getOperand(1)); + if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() || + VT != Op0->getType()) + return false; + auto *SVI0A = dyn_cast(Op0->getOperand(0)); + auto *SVI0B = dyn_cast(Op0->getOperand(1)); + auto *SVI1A = dyn_cast(Op1->getOperand(0)); + auto *SVI1B = dyn_cast(Op1->getOperand(1)); + auto checkSVNonOpUses = [&](Instruction *I) { + if (!I || I->getOperand(0)->getType() != VT) + return true; + return any_of(I->users(), [&](User *U) { return U != Op0 && U != Op1; }); + }; + if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) || + checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B)) + return false; + + // Collect all the uses that are shuffles that we can transform together. We + // may not have a single shuffle, but a group that can all be transformed + // together profitably. + SmallVector Shuffles; + auto collectShuffles = [&](Instruction *I) { + for (auto *U : I->users()) { + auto *SV = dyn_cast(U); + if (!SV || SV->getType() != VT) + return false; + if (!llvm::is_contained(Shuffles, SV)) + Shuffles.push_back(SV); + } + return true; + }; + if (!collectShuffles(Op0) || !collectShuffles(Op1)) + return false; + // From a reduction, we need to be processing a single shuffle, otherwise the + // other uses will not be lane-invariant. + if (FromReduction && Shuffles.size() > 1) + return false; + + // For each of the output shuffles, we try to sort all the first vector + // elements to the beginning, followed by the second array elements at the + // end. If the binops are legalized to smaller vectors, this may reduce total + // number of binops. We compute the ReconstructMask mask needed to convert + // back to the original lane order. + SmallVector V1, V2; + SmallVector> ReconstructMasks; + int MaxV1Elt = 0, MaxV2Elt = 0; + unsigned NumElts = VT->getNumElements(); + for (ShuffleVectorInst *SVN : Shuffles) { + SmallVector Mask; + SVN->getShuffleMask(Mask); + + // Check the operands are the same as the original, or reversed (in which + // case we need to commute the mask). + Value *SVOp0 = SVN->getOperand(0); + Value *SVOp1 = SVN->getOperand(1); + if (SVOp0 == Op1 && SVOp1 == Op0) { + std::swap(SVOp0, SVOp1); + ShuffleVectorInst::commuteShuffleMask(Mask, NumElts); + } + if (SVOp0 != Op0 || SVOp1 != Op1) + return false; + + // Calculate the reconstruction mask for this shuffle, as the mask needed to + // take the packed values from Op0/Op1 and reconstructing to the original + // order. + SmallVector ReconstructMask; + for (unsigned I = 0; I < Mask.size(); I++) { + if (Mask[I] < 0) { + ReconstructMask.push_back(-1); + } else if (Mask[I] < static_cast(NumElts)) { + MaxV1Elt = std::max(MaxV1Elt, Mask[I]); + auto It = find(V1, Mask[I]); + if (It != V1.end()) + ReconstructMask.push_back(It - V1.begin()); + else { + ReconstructMask.push_back(V1.size()); + V1.push_back(Mask[I]); + } + } else { + MaxV2Elt = std::max(MaxV2Elt, Mask[I] - NumElts); + auto It = find(V2, Mask[I] - NumElts); + if (It != V2.end()) + ReconstructMask.push_back(NumElts + It - V2.begin()); + else { + ReconstructMask.push_back(NumElts + V2.size()); + V2.push_back(Mask[I] - NumElts); + } + } + } + + // For reductions, we know that the lane ordering out doesn't alter the + // result. In-order can help simplify the shuffle away. + if (FromReduction) + sort(ReconstructMask); + ReconstructMasks.push_back(ReconstructMask); + } + + // If the Maximum element used from V1 and V2 are not larger than the new + // vectors, the vectors are already packes and performing the optimization + // again will likely not help any further. This also prevents us from getting + // stuck in a cycle in case the costs do not also rule it out. + if (V1.empty() || V2.empty() || + (MaxV1Elt == static_cast(V1.size()) - 1 && + MaxV2Elt == static_cast(V2.size()) - 1)) + return false; + + // Calculate the masks needed for the new input shuffles, which get padded + // with undef + SmallVector V1A, V1B, V2A, V2B; + for (unsigned I = 0; I < V1.size(); I++) { + V1A.push_back(SVI0A->getMaskValue(V1[I])); + V1B.push_back(SVI0B->getMaskValue(V1[I])); + } + for (unsigned I = 0; I < V2.size(); I++) { + V2A.push_back(SVI1A->getMaskValue(V2[I])); + V2B.push_back(SVI1B->getMaskValue(V2[I])); + } + while (V1A.size() < NumElts) { + V1A.push_back(UndefMaskElem); + V1B.push_back(UndefMaskElem); + } + while (V2A.size() < NumElts) { + V2A.push_back(UndefMaskElem); + V2B.push_back(UndefMaskElem); + } + + auto AddShuffleCost = [&](InstructionCost C, ShuffleVectorInst *SV) { + return C + + TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, SV->getShuffleMask()); + }; + auto AddShuffleMaskCost = [&](InstructionCost C, ArrayRef Mask) { + return C + TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, Mask); + }; + + // Get the costs of the shuffles + binops before and after with the new + // shuffle masks. + InstructionCost CostBefore = + TTI.getArithmeticInstrCost(Op0->getOpcode(), VT) + + TTI.getArithmeticInstrCost(Op1->getOpcode(), VT); + CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(), + InstructionCost(0), AddShuffleCost); + // This set helps us only cost each unique shuffle once. + SmallPtrSet InputShuffles( + {SVI0A, SVI0B, SVI1A, SVI1B}); + CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(), + InstructionCost(0), AddShuffleCost); + + // The new binops will be unused for lanes past the used shuffle lengths. + // These types attempt to get the correct cost for that from the target. + FixedVectorType *Op0SmallVT = + FixedVectorType::get(VT->getScalarType(), V1.size()); + FixedVectorType *Op1SmallVT = + FixedVectorType::get(VT->getScalarType(), V2.size()); + InstructionCost CostAfter = + TTI.getArithmeticInstrCost(Op0->getOpcode(), Op0SmallVT) + + TTI.getArithmeticInstrCost(Op1->getOpcode(), Op1SmallVT); + CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(), + InstructionCost(0), AddShuffleMaskCost); + std::set> OutputShuffleMasks({V1A, V1B, V2A, V2B}); + CostAfter += + std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(), + InstructionCost(0), AddShuffleMaskCost); + + if (CostBefore <= CostAfter) + return false; + + // The cost model has passed, create the new instructions. + Builder.SetInsertPoint(SVI0A); + Value *NSV0A = Builder.CreateShuffleVector(SVI0A->getOperand(0), + SVI0A->getOperand(1), V1A); + Builder.SetInsertPoint(SVI0B); + Value *NSV0B = Builder.CreateShuffleVector(SVI0B->getOperand(0), + SVI0B->getOperand(1), V1B); + Builder.SetInsertPoint(SVI1A); + Value *NSV1A = Builder.CreateShuffleVector(SVI1A->getOperand(0), + SVI1A->getOperand(1), V2A); + Builder.SetInsertPoint(SVI1B); + Value *NSV1B = Builder.CreateShuffleVector(SVI1B->getOperand(0), + SVI1B->getOperand(1), V2B); + Builder.SetInsertPoint(Op0); + Value *NOp0 = Builder.CreateBinOp((Instruction::BinaryOps)Op0->getOpcode(), + NSV0A, NSV0B); + if (auto *I = dyn_cast(NOp0)) + I->copyIRFlags(Op0, true); + Builder.SetInsertPoint(Op1); + Value *NOp1 = Builder.CreateBinOp((Instruction::BinaryOps)Op1->getOpcode(), + NSV1A, NSV1B); + if (auto *I = dyn_cast(NOp1)) + I->copyIRFlags(Op1, true); + + for (int S = 0, E = ReconstructMasks.size(); S != E; S++) { + Builder.SetInsertPoint(Shuffles[S]); + Value *NSV = Builder.CreateShuffleVector(NOp0, NOp1, ReconstructMasks[S]); + replaceValue(*Shuffles[S], *NSV); + } + + Worklist.pushValue(NSV0A); + Worklist.pushValue(NSV0B); + Worklist.pushValue(NSV1A); + Worklist.pushValue(NSV1B); + for (auto *S : Shuffles) + Worklist.add(S); + return true; +} + /// This is the entry point for all transforms. Pass manager differences are /// handled in the callers of this function. bool VectorCombine::run() { @@ -1140,6 +1469,8 @@ bool VectorCombine::run() { MadeChange |= foldBitcastShuf(I); MadeChange |= foldExtractedCmps(I); MadeChange |= foldShuffleOfBinops(I); + MadeChange |= foldShuffleFromReductions(I); + MadeChange |= foldSelectShuffle(I); } MadeChange |= scalarizeBinopOrCmp(I); MadeChange |= scalarizeLoadExtract(I); diff --git a/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/llvm/lib/Transforms/Vectorize/Vectorize.cpp index 010ca28fc237..208e5eeea864 100644 --- a/llvm/lib/Transforms/Vectorize/Vectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/Vectorize.cpp @@ -15,7 +15,6 @@ #include "llvm/Transforms/Vectorize.h" #include "llvm-c/Initialization.h" #include "llvm-c/Transforms/Vectorize.h" -#include "llvm/Analysis/Passes.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/InitializePasses.h" #include "llvm/PassRegistry.h" diff --git a/llvm/lib/WindowsDriver/MSVCPaths.cpp b/llvm/lib/WindowsDriver/MSVCPaths.cpp new file mode 100644 index 000000000000..0661ed7c6ae1 --- /dev/null +++ b/llvm/lib/WindowsDriver/MSVCPaths.cpp @@ -0,0 +1,719 @@ +//===-- MSVCPaths.cpp - MSVC path-parsing helpers -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/WindowsDriver/MSVCPaths.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/VersionTuple.h" +#include "llvm/Support/VirtualFileSystem.h" +#include + +#ifdef _WIN32 +#include "llvm/Support/ConvertUTF.h" +#endif + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#define NOGDI +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#endif + +#ifdef _MSC_VER +// Don't support SetupApi on MinGW. +#define USE_MSVC_SETUP_API + +// Make sure this comes before MSVCSetupApi.h +#include + +#include "llvm/Support/COM.h" +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnon-virtual-dtor" +#endif +#include "llvm/WindowsDriver/MSVCSetupApi.h" +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +_COM_SMARTPTR_TYPEDEF(ISetupConfiguration, __uuidof(ISetupConfiguration)); +_COM_SMARTPTR_TYPEDEF(ISetupConfiguration2, __uuidof(ISetupConfiguration2)); +_COM_SMARTPTR_TYPEDEF(ISetupHelper, __uuidof(ISetupHelper)); +_COM_SMARTPTR_TYPEDEF(IEnumSetupInstances, __uuidof(IEnumSetupInstances)); +_COM_SMARTPTR_TYPEDEF(ISetupInstance, __uuidof(ISetupInstance)); +_COM_SMARTPTR_TYPEDEF(ISetupInstance2, __uuidof(ISetupInstance2)); +#endif + +static std::string +getHighestNumericTupleInDirectory(llvm::vfs::FileSystem &VFS, + llvm::StringRef Directory) { + std::string Highest; + llvm::VersionTuple HighestTuple; + + std::error_code EC; + for (llvm::vfs::directory_iterator DirIt = VFS.dir_begin(Directory, EC), + DirEnd; + !EC && DirIt != DirEnd; DirIt.increment(EC)) { + auto Status = VFS.status(DirIt->path()); + if (!Status || !Status->isDirectory()) + continue; + llvm::StringRef CandidateName = llvm::sys::path::filename(DirIt->path()); + llvm::VersionTuple Tuple; + if (Tuple.tryParse(CandidateName)) // tryParse() returns true on error. + continue; + if (Tuple > HighestTuple) { + HighestTuple = Tuple; + Highest = CandidateName.str(); + } + } + + return Highest; +} + +static bool getWindows10SDKVersionFromPath(llvm::vfs::FileSystem &VFS, + const std::string &SDKPath, + std::string &SDKVersion) { + llvm::SmallString<128> IncludePath(SDKPath); + llvm::sys::path::append(IncludePath, "Include"); + SDKVersion = getHighestNumericTupleInDirectory(VFS, IncludePath); + return !SDKVersion.empty(); +} + +static bool getWindowsSDKDirViaCommandLine( + llvm::vfs::FileSystem &VFS, llvm::Optional WinSdkDir, + llvm::Optional WinSdkVersion, + llvm::Optional WinSysRoot, std::string &Path, int &Major, + std::string &Version) { + if (WinSdkDir || WinSysRoot) { + // Don't validate the input; trust the value supplied by the user. + // The motivation is to prevent unnecessary file and registry access. + llvm::VersionTuple SDKVersion; + if (WinSdkVersion) + SDKVersion.tryParse(*WinSdkVersion); + + if (WinSysRoot) { + llvm::SmallString<128> SDKPath(*WinSysRoot); + llvm::sys::path::append(SDKPath, "Windows Kits"); + if (!SDKVersion.empty()) + llvm::sys::path::append(SDKPath, llvm::Twine(SDKVersion.getMajor())); + else + llvm::sys::path::append( + SDKPath, getHighestNumericTupleInDirectory(VFS, SDKPath)); + Path = std::string(SDKPath.str()); + } else { + Path = WinSdkDir->str(); + } + + if (!SDKVersion.empty()) { + Major = SDKVersion.getMajor(); + Version = SDKVersion.getAsString(); + } else if (getWindows10SDKVersionFromPath(VFS, Path, Version)) { + Major = 10; + } + return true; + } + return false; +} + +#ifdef _WIN32 +static bool readFullStringValue(HKEY hkey, const char *valueName, + std::string &value) { + std::wstring WideValueName; + if (!llvm::ConvertUTF8toWide(valueName, WideValueName)) + return false; + + DWORD result = 0; + DWORD valueSize = 0; + DWORD type = 0; + // First just query for the required size. + result = RegQueryValueExW(hkey, WideValueName.c_str(), NULL, &type, NULL, + &valueSize); + if (result != ERROR_SUCCESS || type != REG_SZ || !valueSize) + return false; + std::vector buffer(valueSize); + result = RegQueryValueExW(hkey, WideValueName.c_str(), NULL, NULL, &buffer[0], + &valueSize); + if (result == ERROR_SUCCESS) { + std::wstring WideValue(reinterpret_cast(buffer.data()), + valueSize / sizeof(wchar_t)); + if (valueSize && WideValue.back() == L'\0') { + WideValue.pop_back(); + } + // The destination buffer must be empty as an invariant of the conversion + // function; but this function is sometimes called in a loop that passes in + // the same buffer, however. Simply clear it out so we can overwrite it. + value.clear(); + return llvm::convertWideToUTF8(WideValue, value); + } + return false; +} +#endif + +/// Read registry string. +/// This also supports a means to look for high-versioned keys by use +/// of a $VERSION placeholder in the key path. +/// $VERSION in the key path is a placeholder for the version number, +/// causing the highest value path to be searched for and used. +/// I.e. "SOFTWARE\\Microsoft\\VisualStudio\\$VERSION". +/// There can be additional characters in the component. Only the numeric +/// characters are compared. This function only searches HKLM. +static bool getSystemRegistryString(const char *keyPath, const char *valueName, + std::string &value, std::string *phValue) { +#ifndef _WIN32 + return false; +#else + HKEY hRootKey = HKEY_LOCAL_MACHINE; + HKEY hKey = NULL; + long lResult; + bool returnValue = false; + + const char *placeHolder = strstr(keyPath, "$VERSION"); + std::string bestName; + // If we have a $VERSION placeholder, do the highest-version search. + if (placeHolder) { + const char *keyEnd = placeHolder - 1; + const char *nextKey = placeHolder; + // Find end of previous key. + while ((keyEnd > keyPath) && (*keyEnd != '\\')) + keyEnd--; + // Find end of key containing $VERSION. + while (*nextKey && (*nextKey != '\\')) + nextKey++; + size_t partialKeyLength = keyEnd - keyPath; + char partialKey[256]; + if (partialKeyLength >= sizeof(partialKey)) + partialKeyLength = sizeof(partialKey) - 1; + strncpy(partialKey, keyPath, partialKeyLength); + partialKey[partialKeyLength] = '\0'; + HKEY hTopKey = NULL; + lResult = RegOpenKeyExA(hRootKey, partialKey, 0, KEY_READ | KEY_WOW64_32KEY, + &hTopKey); + if (lResult == ERROR_SUCCESS) { + char keyName[256]; + double bestValue = 0.0; + DWORD index, size = sizeof(keyName) - 1; + for (index = 0; RegEnumKeyExA(hTopKey, index, keyName, &size, NULL, NULL, + NULL, NULL) == ERROR_SUCCESS; + index++) { + const char *sp = keyName; + while (*sp && !llvm::isDigit(*sp)) + sp++; + if (!*sp) + continue; + const char *ep = sp + 1; + while (*ep && (llvm::isDigit(*ep) || (*ep == '.'))) + ep++; + char numBuf[32]; + strncpy(numBuf, sp, sizeof(numBuf) - 1); + numBuf[sizeof(numBuf) - 1] = '\0'; + double dvalue = strtod(numBuf, NULL); + if (dvalue > bestValue) { + // Test that InstallDir is indeed there before keeping this index. + // Open the chosen key path remainder. + bestName = keyName; + // Append rest of key. + bestName.append(nextKey); + lResult = RegOpenKeyExA(hTopKey, bestName.c_str(), 0, + KEY_READ | KEY_WOW64_32KEY, &hKey); + if (lResult == ERROR_SUCCESS) { + if (readFullStringValue(hKey, valueName, value)) { + bestValue = dvalue; + if (phValue) + *phValue = bestName; + returnValue = true; + } + RegCloseKey(hKey); + } + } + size = sizeof(keyName) - 1; + } + RegCloseKey(hTopKey); + } + } else { + lResult = + RegOpenKeyExA(hRootKey, keyPath, 0, KEY_READ | KEY_WOW64_32KEY, &hKey); + if (lResult == ERROR_SUCCESS) { + if (readFullStringValue(hKey, valueName, value)) + returnValue = true; + if (phValue) + phValue->clear(); + RegCloseKey(hKey); + } + } + return returnValue; +#endif // _WIN32 +} + +namespace llvm { + +const char *archToWindowsSDKArch(Triple::ArchType Arch) { + switch (Arch) { + case Triple::ArchType::x86: + return "x86"; + case Triple::ArchType::x86_64: + return "x64"; + case Triple::ArchType::arm: + return "arm"; + case Triple::ArchType::aarch64: + return "arm64"; + default: + return ""; + } +} + +const char *archToLegacyVCArch(Triple::ArchType Arch) { + switch (Arch) { + case Triple::ArchType::x86: + // x86 is default in legacy VC toolchains. + // e.g. x86 libs are directly in /lib as opposed to /lib/x86. + return ""; + case Triple::ArchType::x86_64: + return "amd64"; + case Triple::ArchType::arm: + return "arm"; + case Triple::ArchType::aarch64: + return "arm64"; + default: + return ""; + } +} + +const char *archToDevDivInternalArch(Triple::ArchType Arch) { + switch (Arch) { + case Triple::ArchType::x86: + return "i386"; + case Triple::ArchType::x86_64: + return "amd64"; + case Triple::ArchType::arm: + return "arm"; + case Triple::ArchType::aarch64: + return "arm64"; + default: + return ""; + } +} + +bool appendArchToWindowsSDKLibPath(int SDKMajor, SmallString<128> LibPath, + Triple::ArchType Arch, std::string &path) { + if (SDKMajor >= 8) { + sys::path::append(LibPath, archToWindowsSDKArch(Arch)); + } else { + switch (Arch) { + // In Windows SDK 7.x, x86 libraries are directly in the Lib folder. + case Triple::x86: + break; + case Triple::x86_64: + sys::path::append(LibPath, "x64"); + break; + case Triple::arm: + // It is not necessary to link against Windows SDK 7.x when targeting ARM. + return false; + default: + return false; + } + } + + path = std::string(LibPath.str()); + return true; +} + +std::string getSubDirectoryPath(SubDirectoryType Type, ToolsetLayout VSLayout, + const std::string &VCToolChainPath, + Triple::ArchType TargetArch, + StringRef SubdirParent) { + const char *SubdirName; + const char *IncludeName; + switch (VSLayout) { + case ToolsetLayout::OlderVS: + SubdirName = archToLegacyVCArch(TargetArch); + IncludeName = "include"; + break; + case ToolsetLayout::VS2017OrNewer: + SubdirName = archToWindowsSDKArch(TargetArch); + IncludeName = "include"; + break; + case ToolsetLayout::DevDivInternal: + SubdirName = archToDevDivInternalArch(TargetArch); + IncludeName = "inc"; + break; + } + + SmallString<256> Path(VCToolChainPath); + if (!SubdirParent.empty()) + sys::path::append(Path, SubdirParent); + + switch (Type) { + case SubDirectoryType::Bin: + if (VSLayout == ToolsetLayout::VS2017OrNewer) { + // MSVC ships with two linkers: a 32-bit x86 and 64-bit x86 linker. + // On x86, pick the linker that corresponds to the current process. + // On ARM64, pick the 32-bit x86 linker; the 64-bit one doesn't run + // on Windows 10. + // + // FIXME: Consider using IsWow64GuestMachineSupported to figure out + // if we can invoke the 64-bit linker. It's generally preferable + // because it won't run out of address-space. + const bool HostIsX64 = + Triple(sys::getProcessTriple()).getArch() == Triple::x86_64; + const char *const HostName = HostIsX64 ? "Hostx64" : "Hostx86"; + sys::path::append(Path, "bin", HostName, SubdirName); + } else { // OlderVS or DevDivInternal + sys::path::append(Path, "bin", SubdirName); + } + break; + case SubDirectoryType::Include: + sys::path::append(Path, IncludeName); + break; + case SubDirectoryType::Lib: + sys::path::append(Path, "lib", SubdirName); + break; + } + return std::string(Path.str()); +} + +bool useUniversalCRT(ToolsetLayout VSLayout, const std::string &VCToolChainPath, + Triple::ArchType TargetArch, vfs::FileSystem &VFS) { + SmallString<128> TestPath(getSubDirectoryPath( + SubDirectoryType::Include, VSLayout, VCToolChainPath, TargetArch)); + sys::path::append(TestPath, "stdlib.h"); + return !VFS.exists(TestPath); +} + +bool getWindowsSDKDir(vfs::FileSystem &VFS, Optional WinSdkDir, + Optional WinSdkVersion, + Optional WinSysRoot, std::string &Path, + int &Major, std::string &WindowsSDKIncludeVersion, + std::string &WindowsSDKLibVersion) { + // Trust /winsdkdir and /winsdkversion if present. + if (getWindowsSDKDirViaCommandLine(VFS, WinSdkDir, WinSdkVersion, WinSysRoot, + Path, Major, WindowsSDKIncludeVersion)) { + WindowsSDKLibVersion = WindowsSDKIncludeVersion; + return true; + } + + // FIXME: Try env vars (%WindowsSdkDir%, %UCRTVersion%) before going to + // registry. + + // Try the Windows registry. + std::string RegistrySDKVersion; + if (!getSystemRegistryString( + "SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows\\$VERSION", + "InstallationFolder", Path, &RegistrySDKVersion)) + return false; + if (Path.empty() || RegistrySDKVersion.empty()) + return false; + + WindowsSDKIncludeVersion.clear(); + WindowsSDKLibVersion.clear(); + Major = 0; + std::sscanf(RegistrySDKVersion.c_str(), "v%d.", &Major); + if (Major <= 7) + return true; + if (Major == 8) { + // Windows SDK 8.x installs libraries in a folder whose names depend on the + // version of the OS you're targeting. By default choose the newest, which + // usually corresponds to the version of the OS you've installed the SDK on. + const char *Tests[] = {"winv6.3", "win8", "win7"}; + for (const char *Test : Tests) { + SmallString<128> TestPath(Path); + sys::path::append(TestPath, "Lib", Test); + if (VFS.exists(TestPath)) { + WindowsSDKLibVersion = Test; + break; + } + } + return !WindowsSDKLibVersion.empty(); + } + if (Major == 10) { + if (!getWindows10SDKVersionFromPath(VFS, Path, WindowsSDKIncludeVersion)) + return false; + WindowsSDKLibVersion = WindowsSDKIncludeVersion; + return true; + } + // Unsupported SDK version + return false; +} + +bool getUniversalCRTSdkDir(vfs::FileSystem &VFS, Optional WinSdkDir, + Optional WinSdkVersion, + Optional WinSysRoot, std::string &Path, + std::string &UCRTVersion) { + // If /winsdkdir is passed, use it as location for the UCRT too. + // FIXME: Should there be a dedicated /ucrtdir to override /winsdkdir? + int Major; + if (getWindowsSDKDirViaCommandLine(VFS, WinSdkDir, WinSdkVersion, WinSysRoot, + Path, Major, UCRTVersion)) + return true; + + // FIXME: Try env vars (%UniversalCRTSdkDir%, %UCRTVersion%) before going to + // registry. + + // vcvarsqueryregistry.bat for Visual Studio 2015 queries the registry + // for the specific key "KitsRoot10". So do we. + if (!getSystemRegistryString( + "SOFTWARE\\Microsoft\\Windows Kits\\Installed Roots", "KitsRoot10", + Path, nullptr)) + return false; + + return getWindows10SDKVersionFromPath(VFS, Path, UCRTVersion); +} + +bool findVCToolChainViaCommandLine(vfs::FileSystem &VFS, + Optional VCToolsDir, + Optional VCToolsVersion, + Optional WinSysRoot, + std::string &Path, ToolsetLayout &VSLayout) { + // Don't validate the input; trust the value supplied by the user. + // The primary motivation is to prevent unnecessary file and registry access. + if (VCToolsDir || WinSysRoot) { + if (WinSysRoot) { + SmallString<128> ToolsPath(*WinSysRoot); + sys::path::append(ToolsPath, "VC", "Tools", "MSVC"); + std::string ToolsVersion; + if (VCToolsVersion) + ToolsVersion = VCToolsVersion->str(); + else + ToolsVersion = getHighestNumericTupleInDirectory(VFS, ToolsPath); + sys::path::append(ToolsPath, ToolsVersion); + Path = std::string(ToolsPath.str()); + } else { + Path = VCToolsDir->str(); + } + VSLayout = ToolsetLayout::VS2017OrNewer; + return true; + } + return false; +} + +bool findVCToolChainViaEnvironment(vfs::FileSystem &VFS, std::string &Path, + ToolsetLayout &VSLayout) { + // These variables are typically set by vcvarsall.bat + // when launching a developer command prompt. + if (Optional VCToolsInstallDir = + sys::Process::GetEnv("VCToolsInstallDir")) { + // This is only set by newer Visual Studios, and it leads straight to + // the toolchain directory. + Path = std::move(*VCToolsInstallDir); + VSLayout = ToolsetLayout::VS2017OrNewer; + return true; + } + if (Optional VCInstallDir = + sys::Process::GetEnv("VCINSTALLDIR")) { + // If the previous variable isn't set but this one is, then we've found + // an older Visual Studio. This variable is set by newer Visual Studios too, + // so this check has to appear second. + // In older Visual Studios, the VC directory is the toolchain. + Path = std::move(*VCInstallDir); + VSLayout = ToolsetLayout::OlderVS; + return true; + } + + // We couldn't find any VC environment variables. Let's walk through PATH and + // see if it leads us to a VC toolchain bin directory. If it does, pick the + // first one that we find. + if (Optional PathEnv = sys::Process::GetEnv("PATH")) { + SmallVector PathEntries; + StringRef(*PathEnv).split(PathEntries, sys::EnvPathSeparator); + for (StringRef PathEntry : PathEntries) { + if (PathEntry.empty()) + continue; + + SmallString<256> ExeTestPath; + + // If cl.exe doesn't exist, then this definitely isn't a VC toolchain. + ExeTestPath = PathEntry; + sys::path::append(ExeTestPath, "cl.exe"); + if (!VFS.exists(ExeTestPath)) + continue; + + // cl.exe existing isn't a conclusive test for a VC toolchain; clang also + // has a cl.exe. So let's check for link.exe too. + ExeTestPath = PathEntry; + sys::path::append(ExeTestPath, "link.exe"); + if (!VFS.exists(ExeTestPath)) + continue; + + // whatever/VC/bin --> old toolchain, VC dir is toolchain dir. + StringRef TestPath = PathEntry; + bool IsBin = sys::path::filename(TestPath).equals_insensitive("bin"); + if (!IsBin) { + // Strip any architecture subdir like "amd64". + TestPath = sys::path::parent_path(TestPath); + IsBin = sys::path::filename(TestPath).equals_insensitive("bin"); + } + if (IsBin) { + StringRef ParentPath = sys::path::parent_path(TestPath); + StringRef ParentFilename = sys::path::filename(ParentPath); + if (ParentFilename.equals_insensitive("VC")) { + Path = std::string(ParentPath); + VSLayout = ToolsetLayout::OlderVS; + return true; + } + if (ParentFilename.equals_insensitive("x86ret") || + ParentFilename.equals_insensitive("x86chk") || + ParentFilename.equals_insensitive("amd64ret") || + ParentFilename.equals_insensitive("amd64chk")) { + Path = std::string(ParentPath); + VSLayout = ToolsetLayout::DevDivInternal; + return true; + } + + } else { + // This could be a new (>=VS2017) toolchain. If it is, we should find + // path components with these prefixes when walking backwards through + // the path. + // Note: empty strings match anything. + StringRef ExpectedPrefixes[] = {"", "Host", "bin", "", + "MSVC", "Tools", "VC"}; + + auto It = sys::path::rbegin(PathEntry); + auto End = sys::path::rend(PathEntry); + for (StringRef Prefix : ExpectedPrefixes) { + if (It == End) + goto NotAToolChain; + if (!It->startswith_insensitive(Prefix)) + goto NotAToolChain; + ++It; + } + + // We've found a new toolchain! + // Back up 3 times (/bin/Host/arch) to get the root path. + StringRef ToolChainPath(PathEntry); + for (int i = 0; i < 3; ++i) + ToolChainPath = sys::path::parent_path(ToolChainPath); + + Path = std::string(ToolChainPath); + VSLayout = ToolsetLayout::VS2017OrNewer; + return true; + } + + NotAToolChain: + continue; + } + } + return false; +} + +bool findVCToolChainViaSetupConfig(vfs::FileSystem &VFS, std::string &Path, + ToolsetLayout &VSLayout) { +#if !defined(USE_MSVC_SETUP_API) + return false; +#else + // FIXME: This really should be done once in the top-level program's main + // function, as it may have already been initialized with a different + // threading model otherwise. + sys::InitializeCOMRAII COM(sys::COMThreadingMode::SingleThreaded); + HRESULT HR; + + // _com_ptr_t will throw a _com_error if a COM calls fail. + // The LLVM coding standards forbid exception handling, so we'll have to + // stop them from being thrown in the first place. + // The destructor will put the regular error handler back when we leave + // this scope. + struct SuppressCOMErrorsRAII { + static void __stdcall handler(HRESULT hr, IErrorInfo *perrinfo) {} + + SuppressCOMErrorsRAII() { _set_com_error_handler(handler); } + + ~SuppressCOMErrorsRAII() { _set_com_error_handler(_com_raise_error); } + + } COMErrorSuppressor; + + ISetupConfigurationPtr Query; + HR = Query.CreateInstance(__uuidof(SetupConfiguration)); + if (FAILED(HR)) + return false; + + IEnumSetupInstancesPtr EnumInstances; + HR = ISetupConfiguration2Ptr(Query)->EnumAllInstances(&EnumInstances); + if (FAILED(HR)) + return false; + + ISetupInstancePtr Instance; + HR = EnumInstances->Next(1, &Instance, nullptr); + if (HR != S_OK) + return false; + + ISetupInstancePtr NewestInstance; + Optional NewestVersionNum; + do { + bstr_t VersionString; + uint64_t VersionNum; + HR = Instance->GetInstallationVersion(VersionString.GetAddress()); + if (FAILED(HR)) + continue; + HR = ISetupHelperPtr(Query)->ParseVersion(VersionString, &VersionNum); + if (FAILED(HR)) + continue; + if (!NewestVersionNum || (VersionNum > NewestVersionNum)) { + NewestInstance = Instance; + NewestVersionNum = VersionNum; + } + } while ((HR = EnumInstances->Next(1, &Instance, nullptr)) == S_OK); + + if (!NewestInstance) + return false; + + bstr_t VCPathWide; + HR = NewestInstance->ResolvePath(L"VC", VCPathWide.GetAddress()); + if (FAILED(HR)) + return false; + + std::string VCRootPath; + convertWideToUTF8(std::wstring(VCPathWide), VCRootPath); + + SmallString<256> ToolsVersionFilePath(VCRootPath); + sys::path::append(ToolsVersionFilePath, "Auxiliary", "Build", + "Microsoft.VCToolsVersion.default.txt"); + + auto ToolsVersionFile = MemoryBuffer::getFile(ToolsVersionFilePath); + if (!ToolsVersionFile) + return false; + + SmallString<256> ToolchainPath(VCRootPath); + sys::path::append(ToolchainPath, "Tools", "MSVC", + ToolsVersionFile->get()->getBuffer().rtrim()); + auto Status = VFS.status(ToolchainPath); + if (!Status || !Status->isDirectory()) + return false; + + Path = std::string(ToolchainPath.str()); + VSLayout = ToolsetLayout::VS2017OrNewer; + return true; +#endif +} + +bool findVCToolChainViaRegistry(std::string &Path, ToolsetLayout &VSLayout) { + std::string VSInstallPath; + if (getSystemRegistryString(R"(SOFTWARE\Microsoft\VisualStudio\$VERSION)", + "InstallDir", VSInstallPath, nullptr) || + getSystemRegistryString(R"(SOFTWARE\Microsoft\VCExpress\$VERSION)", + "InstallDir", VSInstallPath, nullptr)) { + if (!VSInstallPath.empty()) { + SmallString<256> VCPath(StringRef(VSInstallPath.c_str(), + VSInstallPath.find(R"(\Common7\IDE)"))); + sys::path::append(VCPath, "VC"); + + Path = std::string(VCPath.str()); + VSLayout = ToolsetLayout::OlderVS; + return true; + } + } + return false; +} + +} // namespace llvm diff --git a/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp b/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp index 40c03f7b0de7..8f5c53faf91e 100644 --- a/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp +++ b/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp @@ -14,8 +14,6 @@ #include "llvm/Config/config.h" #include "llvm/Support/MemoryBuffer.h" -#include - #if LLVM_ENABLE_LIBXML2 #include #endif @@ -706,7 +704,7 @@ bool windows_manifest::isAvailable() { return false; } WindowsManifestMerger::WindowsManifestMerger() : Impl(std::make_unique()) {} -WindowsManifestMerger::~WindowsManifestMerger() {} +WindowsManifestMerger::~WindowsManifestMerger() = default; Error WindowsManifestMerger::merge(MemoryBufferRef Manifest) { return Impl->merge(Manifest); diff --git a/llvm/lib/XRay/FDRTraceWriter.cpp b/llvm/lib/XRay/FDRTraceWriter.cpp index 71c09bd4fce4..2b80740ed436 100644 --- a/llvm/lib/XRay/FDRTraceWriter.cpp +++ b/llvm/lib/XRay/FDRTraceWriter.cpp @@ -74,7 +74,7 @@ FDRTraceWriter::FDRTraceWriter(raw_ostream &O, const XRayFileHeader &H) OS.write(FreeFormBytes); } -FDRTraceWriter::~FDRTraceWriter() {} +FDRTraceWriter::~FDRTraceWriter() = default; Error FDRTraceWriter::visit(BufferExtents &R) { return writeMetadata<7u>(OS, R.size()); diff --git a/llvm/tools/bugpoint/CrashDebugger.cpp b/llvm/tools/bugpoint/CrashDebugger.cpp index d127ea0945f2..9912f59f0ba6 100644 --- a/llvm/tools/bugpoint/CrashDebugger.cpp +++ b/llvm/tools/bugpoint/CrashDebugger.cpp @@ -270,7 +270,7 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector &Funcs) { // First, remove aliases to functions we're about to purge. for (GlobalAlias &Alias : M->aliases()) { GlobalObject *Root = Alias.getAliaseeObject(); - Function *F = dyn_cast_or_null(Root); + auto *F = dyn_cast(Root); if (F) { if (Functions.count(F)) // We're keeping this function. @@ -278,7 +278,7 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector &Funcs) { } else if (Root->isNullValue()) { // This referenced a globalalias that we've already replaced, // so we still need to replace this alias. - } else if (!F) { + } else { // Not a function, therefore not something we mess with. continue; } diff --git a/llvm/tools/bugpoint/ExecutionDriver.cpp b/llvm/tools/bugpoint/ExecutionDriver.cpp index f06f378962d9..2b06e8f3b365 100644 --- a/llvm/tools/bugpoint/ExecutionDriver.cpp +++ b/llvm/tools/bugpoint/ExecutionDriver.cpp @@ -105,7 +105,7 @@ namespace llvm { // program being debugged. cl::list InputArgv("args", cl::Positional, cl::desc("..."), - cl::ZeroOrMore, cl::PositionalEatsArgs); + cl::PositionalEatsArgs); cl::opt OutputPrefix("output-prefix", cl::init("bugpoint"), @@ -114,19 +114,19 @@ cl::opt namespace { cl::list ToolArgv("tool-args", cl::Positional, - cl::desc("..."), cl::ZeroOrMore, + cl::desc("..."), cl::PositionalEatsArgs); cl::list SafeToolArgv("safe-tool-args", cl::Positional, cl::desc("..."), - cl::ZeroOrMore, cl::PositionalEatsArgs); + cl::PositionalEatsArgs); cl::opt CCBinary("gcc", cl::init(""), cl::desc("The gcc binary to use.")); cl::list CCToolArgv("gcc-tool-args", cl::Positional, cl::desc("..."), - cl::ZeroOrMore, cl::PositionalEatsArgs); + cl::PositionalEatsArgs); } //===----------------------------------------------------------------------===// diff --git a/llvm/tools/bugpoint/OptimizerDriver.cpp b/llvm/tools/bugpoint/OptimizerDriver.cpp index e67e877c13af..d425a8c5b49a 100644 --- a/llvm/tools/bugpoint/OptimizerDriver.cpp +++ b/llvm/tools/bugpoint/OptimizerDriver.cpp @@ -117,7 +117,7 @@ cl::opt SilencePasses( static cl::list OptArgs("opt-args", cl::Positional, cl::desc("..."), - cl::ZeroOrMore, cl::PositionalEatsArgs); + cl::PositionalEatsArgs); /// runPasses - Run the specified passes on Program, outputting a bitcode file /// and writing the filename into OutputFile if successful. If the diff --git a/llvm/tools/bugpoint/bugpoint.cpp b/llvm/tools/bugpoint/bugpoint.cpp index 937ec23231b0..6e3f237d0a39 100644 --- a/llvm/tools/bugpoint/bugpoint.cpp +++ b/llvm/tools/bugpoint/bugpoint.cpp @@ -65,11 +65,7 @@ static cl::opt // PassNameParser. // static cl::list - PassList(cl::desc("Passes available:"), cl::ZeroOrMore); - -static cl::opt - StandardLinkOpts("std-link-opts", - cl::desc("Include the standard link time optimizations")); + PassList(cl::desc("Passes available:")); static cl::opt OptLevelO1("O1", cl::desc("Optimization level 1. Identical to 'opt -O1'")); @@ -203,12 +199,6 @@ int main(int argc, char **argv) { AddToDriver PM(D); - if (StandardLinkOpts) { - PassManagerBuilder Builder; - Builder.Inliner = createFunctionInliningPass(); - Builder.populateLTOPassManager(PM); - } - if (OptLevelO1) AddOptimizationPasses(PM, 1, 0); else if (OptLevelO2) diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index c07f4e66486c..8d82d78b15b5 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" #include "llvm/InitializePasses.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" @@ -117,12 +118,10 @@ static cl::opt // Determine optimization level. static cl::opt -OptLevel("O", - cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " - "(default = '-O2')"), - cl::Prefix, - cl::ZeroOrMore, - cl::init(' ')); + OptLevel("O", + cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " + "(default = '-O2')"), + cl::Prefix, cl::init(' ')); static cl::opt TargetTriple("mtriple", cl::desc("Override target triple for module")); @@ -212,7 +211,7 @@ static RunPassOption RunPassOpt; static cl::opt> RunPass( "run-pass", cl::desc("Run compiler only for specified passes (comma separated list)"), - cl::value_desc("pass-name"), cl::ZeroOrMore, cl::location(RunPassOpt)); + cl::value_desc("pass-name"), cl::location(RunPassOpt)); static int compileModule(char **, LLVMContext &); @@ -369,6 +368,7 @@ int main(int argc, char **argv) { initializeHardwareLoopsPass(*Registry); initializeTransformUtils(*Registry); initializeReplaceWithVeclibLegacyPass(*Registry); + initializeTLSVariableHoistLegacyPassPass(*Registry); // Initialize debugging passes. initializeScavengerTestPass(*Registry); @@ -501,14 +501,26 @@ static int compileModule(char **argv, LLVMContext &Context) { TargetMachine::parseBinutilsVersion(BinutilsVersion); Options.DisableIntegratedAS = NoIntegratedAssembler; Options.MCOptions.ShowMCEncoding = ShowMCEncoding; - Options.MCOptions.MCUseDwarfDirectory = DwarfDirectory; Options.MCOptions.AsmVerbose = AsmVerbose; Options.MCOptions.PreserveAsmComments = PreserveComments; Options.MCOptions.IASSearchPaths = IncludeDirs; Options.MCOptions.SplitDwarfFile = SplitDwarfFile; + if (DwarfDirectory.getPosition()) { + Options.MCOptions.MCUseDwarfDirectory = + DwarfDirectory ? MCTargetOptions::EnableDwarfDirectory + : MCTargetOptions::DisableDwarfDirectory; + } else { + // -dwarf-directory is not set explicitly. Some assemblers + // (e.g. GNU as or ptxas) do not support `.file directory' + // syntax prior to DWARFv5. Let the target decide the default + // value. + Options.MCOptions.MCUseDwarfDirectory = + MCTargetOptions::DefaultDwarfDirectory; + } }; Optional RM = codegen::getExplicitRelocModel(); + Optional CM = codegen::getExplicitCodeModel(); const Target *TheTarget = nullptr; std::unique_ptr Target; @@ -535,14 +547,13 @@ static int compileModule(char **argv, LLVMContext &Context) { // On AIX, setting the relocation model to anything other than PIC is // considered a user error. - if (TheTriple.isOSAIX() && RM.hasValue() && *RM != Reloc::PIC_) + if (TheTriple.isOSAIX() && RM && *RM != Reloc::PIC_) reportError("invalid relocation model, AIX only supports PIC", InputFilename); InitializeOptions(TheTriple); Target = std::unique_ptr(TheTarget->createTargetMachine( - TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM, - codegen::getExplicitCodeModel(), OLvl)); + TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM, CM, OLvl)); assert(Target && "Could not allocate target machine!"); return Target->createDataLayout().getStringRepresentation(); @@ -562,6 +573,10 @@ static int compileModule(char **argv, LLVMContext &Context) { } if (!TargetTriple.empty()) M->setTargetTriple(Triple::normalize(TargetTriple)); + + Optional CM_IR = M->getCodeModel(); + if (!CM && CM_IR) + Target->setCodeModel(CM_IR.getValue()); } else { TheTriple = Triple(Triple::normalize(TargetTriple)); if (TheTriple.getTriple().empty()) @@ -578,7 +593,7 @@ static int compileModule(char **argv, LLVMContext &Context) { // On AIX, setting the relocation model to anything other than PIC is // considered a user error. - if (TheTriple.isOSAIX() && RM.hasValue() && *RM != Reloc::PIC_) { + if (TheTriple.isOSAIX() && RM && *RM != Reloc::PIC_) { WithColor::error(errs(), argv[0]) << "invalid relocation model, AIX only supports PIC.\n"; return 1; @@ -586,8 +601,7 @@ static int compileModule(char **argv, LLVMContext &Context) { InitializeOptions(TheTriple); Target = std::unique_ptr(TheTarget->createTargetMachine( - TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM, - codegen::getExplicitCodeModel(), OLvl)); + TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM, CM, OLvl)); assert(Target && "Could not allocate target machine!"); // If we don't have a module then just exit now. We do this down diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp index d20daa07196b..f2e3886bdf07 100644 --- a/llvm/tools/lli/lli.cpp +++ b/llvm/tools/lli/lli.cpp @@ -28,12 +28,15 @@ #include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h" #include "llvm/ExecutionEngine/Orc/DebugUtils.h" +#include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h" #include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h" +#include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h" #include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h" #include "llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" #include "llvm/ExecutionEngine/Orc/LLJIT.h" +#include "llvm/ExecutionEngine/Orc/MachOPlatform.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" #include "llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h" #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" @@ -120,6 +123,9 @@ namespace { "RuntimeDyld"), clEnumValN(JITLinkerKind::JITLink, "jitlink", "Orc-specific linker"))); + cl::opt OrcRuntime("orc-runtime", + cl::desc("Use ORC runtime from given path"), + cl::init("")); cl::opt LazyJITCompileThreads("compile-threads", @@ -144,8 +150,7 @@ namespace { "-extra-module arguments.")); cl::list - Dylibs("dlopen", cl::desc("Dynamic libraries to load before linking"), - cl::ZeroOrMore); + Dylibs("dlopen", cl::desc("Dynamic libraries to load before linking")); // The MCJIT supports building for a target address space separate from // the JIT compilation process. Use a forked process and a copying @@ -166,13 +171,10 @@ namespace { cl::value_desc("filename"), cl::init("")); // Determine optimization level. - cl::opt - OptLevel("O", - cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " - "(default = '-O2')"), - cl::Prefix, - cl::ZeroOrMore, - cl::init(' ')); + cl::opt OptLevel("O", + cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " + "(default = '-O2')"), + cl::Prefix, cl::init(' ')); cl::opt TargetTriple("mtriple", cl::desc("Override target triple for module")); @@ -234,13 +236,15 @@ namespace { cl::desc("Do not resolve lli process symbols in JIT'd code"), cl::init(false)); - enum class LLJITPlatform { Inactive, DetectHost, GenericIR }; + enum class LLJITPlatform { Inactive, DetectHost, ORC, GenericIR }; cl::opt Platform("lljit-platform", cl::desc("Platform to use with LLJIT"), cl::init(LLJITPlatform::DetectHost), cl::values(clEnumValN(LLJITPlatform::DetectHost, "DetectHost", "Select based on JIT target triple"), + clEnumValN(LLJITPlatform::ORC, "ORC", + "Use ORCPlatform with the ORC runtime"), clEnumValN(LLJITPlatform::GenericIR, "GenericIR", "Use LLJITGenericIRPlatform"), clEnumValN(LLJITPlatform::Inactive, "Inactive", @@ -369,6 +373,53 @@ private: } }; +class ORCPlatformSupport : public orc::LLJIT::PlatformSupport { +public: + ORCPlatformSupport(orc::LLJIT &J) : J(J) {} + + Error initialize(orc::JITDylib &JD) override { + using llvm::orc::shared::SPSExecutorAddr; + using llvm::orc::shared::SPSString; + using SPSDLOpenSig = SPSExecutorAddr(SPSString, int32_t); + enum dlopen_mode : int32_t { + ORC_RT_RTLD_LAZY = 0x1, + ORC_RT_RTLD_NOW = 0x2, + ORC_RT_RTLD_LOCAL = 0x4, + ORC_RT_RTLD_GLOBAL = 0x8 + }; + + if (auto WrapperAddr = J.lookup("__orc_rt_jit_dlopen_wrapper")) { + return J.getExecutionSession().callSPSWrapper( + *WrapperAddr, DSOHandles[&JD], JD.getName(), + int32_t(ORC_RT_RTLD_LAZY)); + } else + return WrapperAddr.takeError(); + } + + Error deinitialize(orc::JITDylib &JD) override { + using llvm::orc::shared::SPSExecutorAddr; + using SPSDLCloseSig = int32_t(SPSExecutorAddr); + + if (auto WrapperAddr = J.lookup("__orc_rt_jit_dlclose_wrapper")) { + int32_t result; + auto E = J.getExecutionSession().callSPSWrapper( + *WrapperAddr, result, DSOHandles[&JD]); + if (E) + return E; + else if (result) + return make_error("dlclose failed", + inconvertibleErrorCode()); + DSOHandles.erase(&JD); + } else + return WrapperAddr.takeError(); + return Error::success(); + } + +private: + orc::LLJIT &J; + DenseMap DSOHandles; +}; + // On Mingw and Cygwin, an external symbol named '__main' is called from the // generated 'main' function to allow static initialization. To avoid linking // problems with remote targets (because lli's remote target support does not @@ -881,7 +932,7 @@ int runOrcJIT(const char *ProgName) { } Builder.setLazyCompileFailureAddr( - pointerToJITTargetAddress(exitOnLazyCallThroughFailure)); + orc::ExecutorAddr::fromPtr(exitOnLazyCallThroughFailure)); Builder.setNumCompileThreads(LazyJITCompileThreads); // If the object cache is enabled then set a custom compile function @@ -908,21 +959,29 @@ int runOrcJIT(const char *ProgName) { } // Set up LLJIT platform. - { - LLJITPlatform P = Platform; - if (P == LLJITPlatform::DetectHost) + LLJITPlatform P = Platform; + if (P == LLJITPlatform::DetectHost) { + if (JITLinker == JITLinkerKind::JITLink && !OrcRuntime.empty() && + (TT->isOSBinFormatMachO() || TT->isOSBinFormatELF())) + P = LLJITPlatform::ORC; + else P = LLJITPlatform::GenericIR; - - switch (P) { - case LLJITPlatform::GenericIR: - // Nothing to do: LLJITBuilder will use this by default. - break; - case LLJITPlatform::Inactive: - Builder.setPlatformSetUp(orc::setUpInactivePlatform); - break; - default: - llvm_unreachable("Unrecognized platform value"); - } + } + switch (P) { + case LLJITPlatform::ORC: + Builder.setPlatformSetUp([](llvm::orc::LLJIT &J) -> llvm::Error { + J.setPlatformSupport(std::make_unique(J)); + return Error::success(); + }); + break; + case LLJITPlatform::GenericIR: + // Nothing to do: LLJITBuilder will use this by default. + break; + case LLJITPlatform::Inactive: + Builder.setPlatformSetUp(orc::setUpInactivePlatform); + break; + default: + llvm_unreachable("Unrecognized platform value"); } std::unique_ptr EPC = nullptr; @@ -930,13 +989,15 @@ int runOrcJIT(const char *ProgName) { EPC = ExitOnErr(orc::SelfExecutorProcessControl::Create( std::make_shared())); - Builder.setObjectLinkingLayerCreator([&EPC](orc::ExecutionSession &ES, - const Triple &) { + Builder.setObjectLinkingLayerCreator([&EPC, &P](orc::ExecutionSession &ES, + const Triple &TT) { auto L = std::make_unique(ES, EPC->getMemMgr()); - L->addPlugin(std::make_unique( - ES, ExitOnErr(orc::EPCEHFrameRegistrar::Create(ES)))); - L->addPlugin(std::make_unique( - ES, ExitOnErr(orc::createJITLoaderGDBRegistrar(ES)))); + if (P != LLJITPlatform::ORC) { + L->addPlugin(std::make_unique( + ES, ExitOnErr(orc::EPCEHFrameRegistrar::Create(ES)))); + L->addPlugin(std::make_unique( + ES, ExitOnErr(orc::createJITLoaderGDBRegistrar(ES)))); + } return L; }); } @@ -983,6 +1044,31 @@ int runOrcJIT(const char *ProgName) { std::make_unique(GenerateBuiltinFunctions, Mangle)); + if (P == LLJITPlatform::ORC) { + if (auto *OLL = llvm::dyn_cast(ObjLayer)) { + auto &ES = J->getExecutionSession(); + if (TT->isOSBinFormatMachO()) { + if (auto P = llvm::orc::MachOPlatform::Create( + ES, *OLL, J->getMainJITDylib(), OrcRuntime.c_str())) + ES.setPlatform(std::move(*P)); + else + ExitOnErr(P.takeError()); + } else if (TT->isOSBinFormatELF()) { + if (auto P = llvm::orc::ELFNixPlatform::Create( + ES, *OLL, J->getMainJITDylib(), OrcRuntime.c_str())) + ES.setPlatform(std::move(*P)); + else + ExitOnErr(P.takeError()); + } else { + errs() << "No ORC platform support\n"; + exit(1); + } + } else { + errs() << "ORC platform requires JITLink\n"; + exit(1); + } + } + // Regular modules are greedy: They materialize as a whole and trigger // materialization for all required symbols recursively. Lazy modules go // through partitioning and they replace outgoing calls with reexport stubs @@ -1049,23 +1135,21 @@ int runOrcJIT(const char *ProgName) { for (auto &ThreadEntryPoint : ThreadEntryPoints) { auto EntryPointSym = ExitOnErr(J->lookup(ThreadEntryPoint)); typedef void (*EntryPointPtr)(); - auto EntryPoint = - reinterpret_cast(static_cast(EntryPointSym.getAddress())); + auto EntryPoint = EntryPointSym.toPtr(); AltEntryThreads.push_back(std::thread([EntryPoint]() { EntryPoint(); })); } // Resolve and run the main function. - JITEvaluatedSymbol MainSym = ExitOnErr(J->lookup(EntryFunc)); + auto MainAddr = ExitOnErr(J->lookup(EntryFunc)); int Result; if (EPC) { // ExecutorProcessControl-based execution with JITLink. - Result = ExitOnErr( - EPC->runAsMain(orc::ExecutorAddr(MainSym.getAddress()), InputArgv)); + Result = ExitOnErr(EPC->runAsMain(MainAddr, InputArgv)); } else { // Manual in-process execution with RuntimeDyld. using MainFnTy = int(int, char *[]); - auto MainFn = jitTargetAddressToFunction(MainSym.getAddress()); + auto MainFn = MainAddr.toPtr(); Result = orc::runAsMain(MainFn, InputArgv, StringRef(InputFile)); } diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp index 8842162f5216..e964dc8256a5 100644 --- a/llvm/tools/llvm-ar/llvm-ar.cpp +++ b/llvm/tools/llvm-ar/llvm-ar.cpp @@ -22,6 +22,7 @@ #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" +#include "llvm/Object/XCOFFObjectFile.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" @@ -61,32 +62,30 @@ static StringRef ToolName; // The basename of this program. static StringRef Stem; -const char RanlibHelp[] = R"(OVERVIEW: LLVM Ranlib (llvm-ranlib) - - This program generates an index to speed access to archives - -USAGE: llvm-ranlib - -OPTIONS: - -h --help - Display available options - -v --version - Display the version of this program - -D - Use zero for timestamps and uids/gids (default) - -U - Use actual timestamps and uids/gids -)"; - -const char ArHelp[] = R"(OVERVIEW: LLVM Archiver - -USAGE: llvm-ar [options] [-][modifiers] [relpos] [count] [files] - llvm-ar -M [\n\n" + << "OPTIONS:\n" + << " -h --help - Display available options\n" + << " -v --version - Display the version of this program\n" + << " -D - Use zero for timestamps and uids/gids " + "(default)\n" + << " -U - Use actual timestamps and uids/gids\n"; +} -OPTIONS: +static void printArHelp(StringRef ToolName) { + const char ArOptions[] = + R"(OPTIONS: --format - archive format to create =default - default =gnu - gnu =darwin - darwin =bsd - bsd + =aix - aix (big archive) --plugin= - ignored for compatibility -h --help - display this help and exit + --output - the directory to extract archive members to --rsp-quoting - quoting style for response files =posix - posix =windows - windows @@ -126,11 +125,20 @@ MODIFIERS: [V] - display the version and exit )"; + outs() << "OVERVIEW: LLVM Archiver\n\n" + << "USAGE: " + ToolName + + " [options] [-][modifiers] [relpos] " + "[count] [files]\n" + << " " + ToolName + " -M [ PositionalArgs; static bool MRI; namespace { -enum Format { Default, GNU, BSD, DARWIN, Unknown }; +enum Format { Default, GNU, BSD, DARWIN, BIGARCHIVE, Unknown }; } static Format FormatType = Default; @@ -230,6 +238,9 @@ static int CountParam = 0; // command line. static std::string ArchiveName; +// Output directory specified by --output. +static std::string OutputDir; + static std::vector> ArchiveBuffers; static std::vector> Archives; @@ -447,6 +458,19 @@ static ArchiveOperation parseCommandLine() { if (AddLibrary && Operation != QuickAppend) badUsage("the 'L' modifier is only applicable to the 'q' operation"); + if (!OutputDir.empty()) { + if (Operation != Extract) + badUsage("--output is only applicable to the 'x' operation"); + bool IsDir = false; + // If OutputDir is not a directory, create_directories may still succeed if + // all components of the path prefix are directories. Test is_directory as + // well. + if (!sys::fs::create_directories(OutputDir)) + sys::fs::is_directory(OutputDir, IsDir); + if (!IsDir) + fail("'" + OutputDir + "' is not a directory"); + } + // Return the parsed operation to the caller return Operation; } @@ -547,7 +571,15 @@ static void doExtract(StringRef Name, const object::Archive::Child &C) { failIfError(ModeOrErr.takeError()); sys::fs::perms Mode = ModeOrErr.get(); - llvm::StringRef outputFilePath = sys::path::filename(Name); + StringRef outputFilePath; + SmallString<128> path; + if (OutputDir.empty()) { + outputFilePath = sys::path::filename(Name); + } else { + sys::path::append(path, OutputDir, sys::path::filename(Name)); + outputFilePath = path.str(); + } + if (Verbose) outs() << "x - " << outputFilePath << '\n'; @@ -652,8 +684,6 @@ static void performReadOperation(ArchiveOperation Operation, static void addChildMember(std::vector &Members, const object::Archive::Child &M, bool FlattenArchive = false) { - if (Thin && !M.getParent()->isThin()) - fail("cannot convert a regular archive to a thin one"); Expected NMOrErr = NewArchiveMember::getOldMember(M, Deterministic); failIfError(NMOrErr.takeError()); @@ -875,48 +905,18 @@ computeNewArchiveMembers(ArchiveOperation Operation, return Ret; } -static object::Archive::Kind getDefaultForHost() { - return Triple(sys::getProcessTriple()).isOSDarwin() - ? object::Archive::K_DARWIN - : object::Archive::K_GNU; -} - -static object::Archive::Kind getKindFromMember(const NewArchiveMember &Member) { - auto MemBufferRef = Member.Buf->getMemBufferRef(); - Expected> OptionalObject = - object::ObjectFile::createObjectFile(MemBufferRef); - - if (OptionalObject) - return isa(**OptionalObject) - ? object::Archive::K_DARWIN - : object::Archive::K_GNU; - - // squelch the error in case we had a non-object file - consumeError(OptionalObject.takeError()); - - // If we're adding a bitcode file to the archive, detect the Archive kind - // based on the target triple. - LLVMContext Context; - if (identify_magic(MemBufferRef.getBuffer()) == file_magic::bitcode) { - if (auto ObjOrErr = object::SymbolicFile::createSymbolicFile( - MemBufferRef, file_magic::bitcode, &Context)) { - auto &IRObject = cast(**ObjOrErr); - return Triple(IRObject.getTargetTriple()).isOSDarwin() - ? object::Archive::K_DARWIN - : object::Archive::K_GNU; - } else { - // Squelch the error in case this was not a SymbolicFile. - consumeError(ObjOrErr.takeError()); - } - } - - return getDefaultForHost(); -} - static void performWriteOperation(ArchiveOperation Operation, object::Archive *OldArchive, std::unique_ptr OldArchiveBuf, std::vector *NewMembersP) { + if (OldArchive) { + if (Thin && !OldArchive->isThin()) + fail("cannot convert a regular archive to a thin one"); + + if (OldArchive->isThin()) + Thin = true; + } + std::vector NewMembers; if (!NewMembersP) NewMembers = computeNewArchiveMembers(Operation, OldArchive); @@ -926,14 +926,23 @@ static void performWriteOperation(ArchiveOperation Operation, case Default: if (Thin) Kind = object::Archive::K_GNU; - else if (OldArchive) + else if (OldArchive) { Kind = OldArchive->kind(); - else if (NewMembersP) - Kind = !NewMembersP->empty() ? getKindFromMember(NewMembersP->front()) - : getDefaultForHost(); + if (Kind == object::Archive::K_BSD) { + auto InferredKind = object::Archive::K_BSD; + if (NewMembersP && !NewMembersP->empty()) + InferredKind = NewMembersP->front().detectKindFromObject(); + else if (!NewMembers.empty()) + InferredKind = NewMembers.front().detectKindFromObject(); + if (InferredKind == object::Archive::K_DARWIN) + Kind = object::Archive::K_DARWIN; + } + } else if (NewMembersP) + Kind = !NewMembersP->empty() ? NewMembersP->front().detectKindFromObject() + : object::Archive::getDefaultKindForHost(); else - Kind = !NewMembers.empty() ? getKindFromMember(NewMembers.front()) - : getDefaultForHost(); + Kind = !NewMembers.empty() ? NewMembers.front().detectKindFromObject() + : object::Archive::getDefaultKindForHost(); break; case GNU: Kind = object::Archive::K_GNU; @@ -948,6 +957,11 @@ static void performWriteOperation(ArchiveOperation Operation, fail("only the gnu format has a thin mode"); Kind = object::Archive::K_DARWIN; break; + case BIGARCHIVE: + if (Thin) + fail("only the gnu format has a thin mode"); + Kind = object::Archive::K_AIXBIG; + break; case Unknown: llvm_unreachable(""); } @@ -1073,8 +1087,12 @@ static void runMRIScript() { switch (Command) { case MRICommand::AddLib: { + if (!Create) + fail("no output archive has been opened"); object::Archive &Lib = readLibrary(Rest); { + if (Thin && !Lib.isThin()) + fail("cannot add a regular archive's contents to a thin archive"); Error Err = Error::success(); for (auto &Member : Lib.children(Err)) addChildMember(NewMembers, Member, /*FlattenArchive=*/Thin); @@ -1083,6 +1101,8 @@ static void runMRIScript() { break; } case MRICommand::AddMod: + if (!Create) + fail("no output archive has been opened"); addMember(NewMembers, Rest); break; case MRICommand::CreateThin: @@ -1095,6 +1115,8 @@ static void runMRIScript() { if (Saved) fail("file already saved"); ArchiveName = std::string(Rest); + if (ArchiveName.empty()) + fail("missing archive name"); break; case MRICommand::Delete: { llvm::erase_if(NewMembers, [=](NewArchiveMember &M) { @@ -1116,7 +1138,8 @@ static void runMRIScript() { // Nothing to do if not saved. if (Saved) - performOperation(ReplaceOrInsert, &NewMembers); + performOperation(ReplaceOrInsert, /*OldArchive=*/nullptr, + /*OldArchiveBuf=*/nullptr, &NewMembers); exit(0); } @@ -1219,12 +1242,18 @@ static int ar_main(int argc, char **argv) { .Case("gnu", GNU) .Case("darwin", DARWIN) .Case("bsd", BSD) + .Case("bigarchive", BIGARCHIVE) .Default(Unknown); if (FormatType == Unknown) fail(std::string("Invalid format ") + Match); continue; } + if ((Match = matchFlagWithArg("output", ArgIt, Argv))) { + OutputDir = Match; + continue; + } + if (matchFlagWithArg("plugin", ArgIt, Argv) || matchFlagWithArg("rsp-quoting", ArgIt, Argv)) continue; @@ -1274,7 +1303,7 @@ static int ranlib_main(int argc, char **argv) { return performOperation(CreateSymTab, nullptr); } -int main(int argc, char **argv) { +int llvm_ar_main(int argc, char **argv) { InitLLVM X(argc, argv); ToolName = argv[0]; diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp index ef801287c1be..6932e9b5bd31 100644 --- a/llvm/tools/llvm-cov/CodeCoverage.cpp +++ b/llvm/tools/llvm-cov/CodeCoverage.cpp @@ -265,8 +265,7 @@ bool CodeCoverageTool::isEquivalentFile(StringRef FilePath1, StringRef FilePath2) { auto Status1 = getFileStatus(FilePath1); auto Status2 = getFileStatus(FilePath2); - return Status1.hasValue() && Status2.hasValue() && - sys::fs::equivalent(Status1.getValue(), Status2.getValue()); + return Status1 && Status2 && sys::fs::equivalent(*Status1, *Status2); } ErrorOr @@ -621,14 +620,14 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) { cl::Positional, cl::desc("Covered executable or object file.")); cl::list CovFilenames( - "object", cl::desc("Coverage executable or object file"), cl::ZeroOrMore); + "object", cl::desc("Coverage executable or object file")); cl::opt DebugDumpCollectedObjects( "dump-collected-objects", cl::Optional, cl::Hidden, cl::desc("Show the collected coverage object files")); - cl::list InputSourceFiles( - cl::Positional, cl::desc(""), cl::ZeroOrMore); + cl::list InputSourceFiles(cl::Positional, + cl::desc("")); cl::opt DebugDumpCollectedPaths( "dump-collected-paths", cl::Optional, cl::Hidden, @@ -665,32 +664,32 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) { cl::list NameFilters( "name", cl::Optional, cl::desc("Show code coverage only for functions with the given name"), - cl::ZeroOrMore, cl::cat(FilteringCategory)); + cl::cat(FilteringCategory)); cl::list NameFilterFiles( "name-allowlist", cl::Optional, cl::desc("Show code coverage only for functions listed in the given " "file"), - cl::ZeroOrMore, cl::cat(FilteringCategory)); + cl::cat(FilteringCategory)); // Allow for accepting previous option name. cl::list NameFilterFilesDeprecated( "name-whitelist", cl::Optional, cl::Hidden, cl::desc("Show code coverage only for functions listed in the given " "file. Deprecated, use -name-allowlist instead"), - cl::ZeroOrMore, cl::cat(FilteringCategory)); + cl::cat(FilteringCategory)); cl::list NameRegexFilters( "name-regex", cl::Optional, cl::desc("Show code coverage only for functions that match the given " "regular expression"), - cl::ZeroOrMore, cl::cat(FilteringCategory)); + cl::cat(FilteringCategory)); cl::list IgnoreFilenameRegexFilters( "ignore-filename-regex", cl::Optional, cl::desc("Skip source code files with file paths that match the given " "regular expression"), - cl::ZeroOrMore, cl::cat(FilteringCategory)); + cl::cat(FilteringCategory)); cl::opt RegionCoverageLtFilter( "region-coverage-lt", cl::Optional, @@ -883,6 +882,9 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) { } CoverageArches.emplace_back(Arch); } + if (CoverageArches.size() == 1) + CoverageArches.insert(CoverageArches.end(), ObjectFilenames.size() - 1, + CoverageArches[0]); if (CoverageArches.size() != ObjectFilenames.size()) { error("Number of architectures doesn't match the number of objects"); return 1; @@ -973,6 +975,11 @@ int CodeCoverageTool::doShow(int argc, const char **argv, "project-title", cl::Optional, cl::desc("Set project title for the coverage report")); + cl::opt CovWatermark( + "coverage-watermark", cl::Optional, + cl::desc(", value indicate thresholds for high and low" + "coverage watermark")); + auto Err = commandLineParser(argc, argv); if (Err) return Err; @@ -982,6 +989,47 @@ int CodeCoverageTool::doShow(int argc, const char **argv, return 1; } + ViewOpts.HighCovWatermark = 100.0; + ViewOpts.LowCovWatermark = 80.0; + if (!CovWatermark.empty()) { + auto WaterMarkPair = StringRef(CovWatermark).split(','); + if (WaterMarkPair.first.empty() || WaterMarkPair.second.empty()) { + error("invalid argument '" + CovWatermark + + "', must be in format 'high,low'", + "-coverage-watermark"); + return 1; + } + + char *EndPointer = nullptr; + ViewOpts.HighCovWatermark = + strtod(WaterMarkPair.first.begin(), &EndPointer); + if (EndPointer != WaterMarkPair.first.end()) { + error("invalid number '" + WaterMarkPair.first + + "', invalid value for 'high'", + "-coverage-watermark"); + return 1; + } + + ViewOpts.LowCovWatermark = + strtod(WaterMarkPair.second.begin(), &EndPointer); + if (EndPointer != WaterMarkPair.second.end()) { + error("invalid number '" + WaterMarkPair.second + + "', invalid value for 'low'", + "-coverage-watermark"); + return 1; + } + + if (ViewOpts.HighCovWatermark > 100 || ViewOpts.LowCovWatermark < 0 || + ViewOpts.HighCovWatermark <= ViewOpts.LowCovWatermark) { + error( + "invalid number range '" + CovWatermark + + "', must be both high and low should be between 0-100, and high " + "> low", + "-coverage-watermark"); + return 1; + } + } + ViewOpts.ShowLineNumbers = true; ViewOpts.ShowLineStats = ShowLineExecutionCounts.getNumOccurrences() != 0 || !ShowRegions || ShowBestLineRegionsCounts; diff --git a/llvm/tools/llvm-cov/CoverageViewOptions.h b/llvm/tools/llvm-cov/CoverageViewOptions.h index 045fb1787bce..c6e99819f319 100644 --- a/llvm/tools/llvm-cov/CoverageViewOptions.h +++ b/llvm/tools/llvm-cov/CoverageViewOptions.h @@ -50,6 +50,8 @@ struct CoverageViewOptions { std::string CreatedTimeStr; unsigned NumThreads; std::string CompilationDirectory; + float HighCovWatermark; + float LowCovWatermark; /// Change the output's stream color if the colors are enabled. ColoredRawOstream colored_ostream(raw_ostream &OS, diff --git a/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp b/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp index 56efc40b9349..46782c9b3c9a 100644 --- a/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp +++ b/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp @@ -338,24 +338,24 @@ void CoveragePrinterHTML::emitFileSummary(raw_ostream &OS, StringRef SF, SmallVector Columns; // Format a coverage triple and add the result to the list of columns. - auto AddCoverageTripleToColumn = [&Columns](unsigned Hit, unsigned Total, - float Pctg) { - std::string S; - { - raw_string_ostream RSO{S}; - if (Total) - RSO << format("%*.2f", 7, Pctg) << "% "; - else - RSO << "- "; - RSO << '(' << Hit << '/' << Total << ')'; - } - const char *CellClass = "column-entry-yellow"; - if (Hit == Total) - CellClass = "column-entry-green"; - else if (Pctg < 80.0) - CellClass = "column-entry-red"; - Columns.emplace_back(tag("td", tag("pre", S), CellClass)); - }; + auto AddCoverageTripleToColumn = + [&Columns, this](unsigned Hit, unsigned Total, float Pctg) { + std::string S; + { + raw_string_ostream RSO{S}; + if (Total) + RSO << format("%*.2f", 7, Pctg) << "% "; + else + RSO << "- "; + RSO << '(' << Hit << '/' << Total << ')'; + } + const char *CellClass = "column-entry-yellow"; + if (Pctg >= Opts.HighCovWatermark) + CellClass = "column-entry-green"; + else if (Pctg < Opts.LowCovWatermark) + CellClass = "column-entry-red"; + Columns.emplace_back(tag("td", tag("pre", S), CellClass)); + }; // Simplify the display file path, and wrap it in a link if requested. std::string Filename; @@ -538,7 +538,7 @@ void SourceCoverageViewHTML::renderLine(raw_ostream &OS, LineRef L, auto Highlight = [&](const std::string &Snippet, unsigned LC, unsigned RC) { if (getOptions().Debug) HighlightedRanges.emplace_back(LC, RC); - return tag("span", Snippet, std::string(Color.getValue())); + return tag("span", Snippet, std::string(*Color)); }; auto CheckIfUncovered = [&](const CoverageSegment *S) { @@ -561,12 +561,12 @@ void SourceCoverageViewHTML::renderLine(raw_ostream &OS, LineRef L, else Color = None; - if (Color.hasValue()) + if (Color) Snippets[I + 1] = Highlight(Snippets[I + 1], CurSeg->Col, CurSeg->Col + Snippets[I + 1].size()); } - if (Color.hasValue() && Segments.empty()) + if (Color && Segments.empty()) Snippets.back() = Highlight(Snippets.back(), 1, 1 + Snippets.back().size()); if (getOptions().Debug) { diff --git a/llvm/tools/llvm-cov/TestingSupport.cpp b/llvm/tools/llvm-cov/TestingSupport.cpp index 9c6b25f2f585..289a1621660b 100644 --- a/llvm/tools/llvm-cov/TestingSupport.cpp +++ b/llvm/tools/llvm-cov/TestingSupport.cpp @@ -12,6 +12,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp b/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp index 1430674dbadc..02f4c8493903 100644 --- a/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp +++ b/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp @@ -36,7 +36,7 @@ namespace opts { cl::OptionCategory CXXDumpCategory("CXX Dump Options"); cl::list InputFilenames(cl::Positional, cl::desc(""), - cl::ZeroOrMore, cl::cat(CXXDumpCategory)); + cl::cat(CXXDumpCategory)); } // namespace opts namespace llvm { diff --git a/llvm/tools/llvm-cxxfilt/Opts.td b/llvm/tools/llvm-cxxfilt/Opts.td index 93f865245fe6..f652a1a7f88b 100644 --- a/llvm/tools/llvm-cxxfilt/Opts.td +++ b/llvm/tools/llvm-cxxfilt/Opts.td @@ -16,7 +16,7 @@ multiclass Eq { def help : FF<"help", "Display this help">; defm strip_underscore : BB<"strip-underscore", "Strip the leading underscore", "Don't strip the leading underscore">; -def types : FF<"types", "">; +def types : FF<"types", "Attempt to demangle types as well as function names">; def version : FF<"version", "Display the version">; defm : Eq<"format", "Specify mangling format. Currently ignored because only 'gnu' is supported">; diff --git a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp index ccfaaa96deb2..1cea9e29faa4 100644 --- a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp +++ b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp @@ -140,7 +140,7 @@ static void demangleLine(llvm::raw_ostream &OS, StringRef Mangled, bool Split) { OS.flush(); } -int main(int argc, char **argv) { +int llvm_cxxfilt_main(int argc, char **argv) { InitLLVM X(argc, argv); BumpPtrAllocator A; StringSaver Saver(A); diff --git a/llvm/tools/llvm-dis/llvm-dis.cpp b/llvm/tools/llvm-dis/llvm-dis.cpp index 7b3c3e7706a6..4996fc12ae32 100644 --- a/llvm/tools/llvm-dis/llvm-dis.cpp +++ b/llvm/tools/llvm-dis/llvm-dis.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" @@ -37,7 +38,7 @@ using namespace llvm; static cl::OptionCategory DisCategory("Disassembler Options"); -static cl::list InputFilenames(cl::Positional, cl::ZeroOrMore, +static cl::list InputFilenames(cl::Positional, cl::desc("[input bitcode]..."), cl::cat(DisCategory)); @@ -179,8 +180,13 @@ int main(int argc, char **argv) { } for (std::string InputFilename : InputFilenames) { - std::unique_ptr MB = ExitOnErr( - errorOrToExpected(MemoryBuffer::getFileOrSTDIN(InputFilename))); + ErrorOr> BufferOrErr = + MemoryBuffer::getFileOrSTDIN(InputFilename); + if (std::error_code EC = BufferOrErr.getError()) { + WithColor::error() << InputFilename << ": " << EC.message() << '\n'; + return 1; + } + std::unique_ptr MB = std::move(BufferOrErr.get()); BitcodeFileContents IF = ExitOnErr(llvm::getBitcodeFileContents(*MB)); diff --git a/llvm/tools/llvm-dwarfdump/Statistics.cpp b/llvm/tools/llvm-dwarfdump/Statistics.cpp index 5c08e43b4b09..ed92665e0483 100644 --- a/llvm/tools/llvm-dwarfdump/Statistics.cpp +++ b/llvm/tools/llvm-dwarfdump/Statistics.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/StringSet.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/JSON.h" @@ -1043,14 +1044,19 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, LocStats.LocalVarNonEntryValLocStats); J.objectEnd(); OS << '\n'; - LLVM_DEBUG(llvm::dbgs() << "Total Availability: " - << (int)std::round((VarParamWithLoc.Value * 100.0) / + LLVM_DEBUG( + llvm::dbgs() << "Total Availability: " + << (VarParamTotal.Value + ? (int)std::round((VarParamWithLoc.Value * 100.0) / VarParamTotal.Value) - << "%\n"; - llvm::dbgs() << "PC Ranges covered: " - << (int)std::round( + : 0) + << "%\n"; + llvm::dbgs() << "PC Ranges covered: " + << (GlobalStats.ScopeBytes.Value + ? (int)std::round( (GlobalStats.ScopeBytesCovered.Value * 100.0) / GlobalStats.ScopeBytes.Value) - << "%\n"); + : 0) + << "%\n"); return true; } diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index 9c2ddc3867a5..f7d3052c8c4d 100644 --- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -15,6 +15,8 @@ #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" #include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" +#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Object/Archive.h" #include "llvm/Object/MachOUniversal.h" @@ -24,6 +26,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" #include "llvm/Support/Regex.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/ToolOutputFile.h" @@ -119,7 +122,7 @@ using namespace cl; OptionCategory DwarfDumpCategory("Specific Options"); static list InputFilenames(Positional, desc(""), - ZeroOrMore, cat(DwarfDumpCategory)); + cat(DwarfDumpCategory)); cl::OptionCategory SectionCategory("Section-specific Dump Options", "These control which sections are dumped. " @@ -245,6 +248,10 @@ static cl::opt cl::desc("Show the sizes of all debug sections, " "expressed in bytes."), cat(DwarfDumpCategory)); +static cl::opt + ShowSources("show-sources", + cl::desc("Show the sources across all compilation units."), + cat(DwarfDumpCategory)); static opt Verify("verify", desc("Verify the DWARF debug info."), cat(DwarfDumpCategory)); static opt Quiet("quiet", desc("Use with -verify to not emit to STDOUT."), @@ -464,6 +471,87 @@ static bool lookup(ObjectFile &Obj, DWARFContext &DICtx, uint64_t Address, return true; } +// Collect all sources referenced from the given line table, scoped to the given +// CU compilation directory. +static bool collectLineTableSources(const DWARFDebugLine::LineTable <, + StringRef CompDir, + std::vector &Sources) { + bool Result = true; + llvm::Optional LastIndex = LT.getLastValidFileIndex(); + for (uint64_t I = LT.hasFileAtIndex(0) ? 0 : 1, + E = LastIndex ? *LastIndex + 1 : 0; + I < E; ++I) { + std::string Path; + Result &= LT.getFileNameByIndex( + I, CompDir, DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, + Path); + Sources.push_back(std::move(Path)); + } + return Result; +} + +static bool collectObjectSources(ObjectFile &Obj, DWARFContext &DICtx, + const Twine &Filename, raw_ostream &OS) { + bool Result = true; + std::vector Sources; + + bool HasCompileUnits = false; + for (const auto &CU : DICtx.compile_units()) { + HasCompileUnits = true; + // Extract paths from the line table for this CU. This allows combining the + // compilation directory with the line information, in case both the include + // directory and file names in the line table are relative. + const DWARFDebugLine::LineTable *LT = DICtx.getLineTableForUnit(CU.get()); + StringRef CompDir = CU->getCompilationDir(); + if (LT) { + Result &= collectLineTableSources(*LT, CompDir, Sources); + } else { + // Since there's no line table for this CU, collect the name from the CU + // itself. + const char *Name = CU->getUnitDIE().getShortName(); + if (!Name) { + WithColor::warning() + << Filename << ": missing name for compilation unit\n"; + continue; + } + SmallString<64> AbsName; + if (sys::path::is_relative(Name, sys::path::Style::posix) && + sys::path::is_relative(Name, sys::path::Style::windows)) + AbsName = CompDir; + sys::path::append(AbsName, Name); + Sources.push_back(std::string(AbsName)); + } + } + + if (!HasCompileUnits) { + // Since there's no compile units available, walk the line tables and + // extract out any referenced paths. + DWARFDataExtractor LineData(DICtx.getDWARFObj(), + DICtx.getDWARFObj().getLineSection(), + DICtx.isLittleEndian(), 0); + DWARFDebugLine::SectionParser Parser(LineData, DICtx, DICtx.normal_units()); + while (!Parser.done()) { + const auto RecoverableErrorHandler = [&](Error Err) { + Result = false; + WithColor::defaultErrorHandler(std::move(Err)); + }; + void (*UnrecoverableErrorHandler)(Error Err) = error; + + DWARFDebugLine::LineTable LT = + Parser.parseNext(RecoverableErrorHandler, UnrecoverableErrorHandler); + Result &= collectLineTableSources(LT, /*CompDir=*/"", Sources); + } + } + + // Dedup and order the sources. + llvm::sort(Sources.begin(), Sources.end()); + Sources.erase(std::unique(Sources.begin(), Sources.end()), Sources.end()); + + for (StringRef Name : Sources) + OS << Name << "\n"; + return Result; +} + static bool dumpObjectFile(ObjectFile &Obj, DWARFContext &DICtx, const Twine &Filename, raw_ostream &OS) { logAllUnhandledErrors(DICtx.loadRegisterInfo(Obj), errs(), @@ -677,6 +765,9 @@ int main(int argc, char **argv) { } else if (ShowSectionSizes) { for (auto Object : Objects) Success &= handleFile(Object, collectObjectSectionSizes, OutputFile.os()); + } else if (ShowSources) { + for (auto Object : Objects) + Success &= handleFile(Object, collectObjectSources, OutputFile.os()); } else { for (auto Object : Objects) Success &= handleFile(Object, dumpObjectFile, OutputFile.os()); diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp index 4b6f7bc8dd34..d2d162d648c0 100644 --- a/llvm/tools/llvm-dwp/llvm-dwp.cpp +++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp @@ -19,11 +19,14 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCTargetOptionsCommandFlags.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/InitLLVM.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/ToolOutputFile.h" @@ -33,13 +36,13 @@ using namespace llvm::object; static mc::RegisterMCTargetOptionsFlags MCTargetOptionsFlags; cl::OptionCategory DwpCategory("Specific Options"); -static cl::list InputFiles(cl::Positional, cl::ZeroOrMore, - cl::desc(""), - cl::cat(DwpCategory)); +static cl::list + InputFiles(cl::Positional, cl::desc(""), cl::cat(DwpCategory)); static cl::list ExecFilenames( - "e", cl::ZeroOrMore, - cl::desc("Specify the executable/library files to get the list of *.dwo from"), + "e", + cl::desc( + "Specify the executable/library files to get the list of *.dwo from"), cl::value_desc("filename"), cl::cat(DwpCategory)); static cl::opt OutputFilename(cl::Required, "o", @@ -162,7 +165,7 @@ int main(int argc, char **argv) { if (!MII) return error("no instr info info for target " + TripleName, Context); - MCCodeEmitter *MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, MC); + MCCodeEmitter *MCE = TheTarget->createMCCodeEmitter(*MII, MC); if (!MCE) return error("no code emitter for target " + TripleName, Context); @@ -193,7 +196,7 @@ int main(int argc, char **argv) { return 1; } - MS->Finish(); + MS->finish(); OutFile.keep(); return 0; } diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index 3cdef529504e..ffd2a390d9c3 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -66,8 +66,7 @@ static cl::opt // ExtractFuncs - The functions to extract from the module. static cl::list ExtractFuncs("func", cl::desc("Specify function to extract"), - cl::ZeroOrMore, cl::value_desc("function"), - cl::cat(ExtractCat)); + cl::value_desc("function"), cl::cat(ExtractCat)); // ExtractRegExpFuncs - The functions, matched via regular expression, to // extract from the module. @@ -75,8 +74,7 @@ static cl::list ExtractRegExpFuncs("rfunc", cl::desc("Specify function(s) to extract using a " "regular expression"), - cl::ZeroOrMore, cl::value_desc("rfunction"), - cl::cat(ExtractCat)); + cl::value_desc("rfunction"), cl::cat(ExtractCat)); // ExtractBlocks - The blocks to extract from the module. static cl::list ExtractBlocks( @@ -90,14 +88,12 @@ static cl::list ExtractBlocks( " --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n" " --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one " "with bb2."), - cl::ZeroOrMore, cl::value_desc("function:bb1[;bb2...]"), - cl::cat(ExtractCat)); + cl::value_desc("function:bb1[;bb2...]"), cl::cat(ExtractCat)); // ExtractAlias - The alias to extract from the module. static cl::list ExtractAliases("alias", cl::desc("Specify alias to extract"), - cl::ZeroOrMore, cl::value_desc("alias"), - cl::cat(ExtractCat)); + cl::value_desc("alias"), cl::cat(ExtractCat)); // ExtractRegExpAliases - The aliases, matched via regular expression, to // extract from the module. @@ -105,14 +101,12 @@ static cl::list ExtractRegExpAliases("ralias", cl::desc("Specify alias(es) to extract using a " "regular expression"), - cl::ZeroOrMore, cl::value_desc("ralias"), - cl::cat(ExtractCat)); + cl::value_desc("ralias"), cl::cat(ExtractCat)); // ExtractGlobals - The globals to extract from the module. static cl::list ExtractGlobals("glob", cl::desc("Specify global to extract"), - cl::ZeroOrMore, cl::value_desc("global"), - cl::cat(ExtractCat)); + cl::value_desc("global"), cl::cat(ExtractCat)); // ExtractRegExpGlobals - The globals, matched via regular expression, to // extract from the module... @@ -120,8 +114,7 @@ static cl::list ExtractRegExpGlobals("rglob", cl::desc("Specify global(s) to extract using a " "regular expression"), - cl::ZeroOrMore, cl::value_desc("rglobal"), - cl::cat(ExtractCat)); + cl::value_desc("rglobal"), cl::cat(ExtractCat)); static cl::opt OutputAssembly("S", cl::desc("Write output as LLVM assembly"), diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp index 9abe8efaa4e8..6585b193b2cb 100644 --- a/llvm/tools/llvm-link/llvm-link.cpp +++ b/llvm/tools/llvm-link/llvm-link.cpp @@ -48,7 +48,7 @@ static cl::list InputFilenames(cl::Positional, cl::OneOrMore, cl::cat(LinkCategory)); static cl::list OverridingInputs( - "override", cl::ZeroOrMore, cl::value_desc("filename"), + "override", cl::value_desc("filename"), cl::desc( "input bitcode file which can override previously defined symbol(s)"), cl::cat(LinkCategory)); @@ -56,7 +56,7 @@ static cl::list OverridingInputs( // Option to simulate function importing for testing. This enables using // llvm-link to simulate ThinLTO backend processes. static cl::list Imports( - "import", cl::ZeroOrMore, cl::value_desc("function:filename"), + "import", cl::value_desc("function:filename"), cl::desc("Pair of function name and filename, where function should be " "imported from bitcode in filename"), cl::cat(LinkCategory)); @@ -124,6 +124,11 @@ static cl::opt NoVerify("disable-verify", cl::desc("Do not run the verifier"), cl::Hidden, cl::cat(LinkCategory)); +static cl::opt IgnoreNonBitcode( + "ignore-non-bitcode", + cl::desc("Do not report an error for non-bitcode files in archives"), + cl::Hidden); + static ExitOnError ExitOnErr; // Read the specified bitcode file in and return it. This routine searches the @@ -164,11 +169,16 @@ static std::unique_ptr loadArFile(const char *Argv0, if (Verbose) errs() << "Reading library archive file '" << ArchiveName << "' to memory\n"; - Error Err = Error::success(); - object::Archive Archive(*Buffer, Err); - ExitOnErr(std::move(Err)); + Expected> ArchiveOrError = + object::Archive::create(Buffer->getMemBufferRef()); + if (!ArchiveOrError) + ExitOnErr(ArchiveOrError.takeError()); + + std::unique_ptr Archive = std::move(ArchiveOrError.get()); + Linker L(*Result); - for (const object::Archive::Child &C : Archive.children(Err)) { + Error Err = Error::success(); + for (const object::Archive::Child &C : Archive->children(Err)) { Expected Ename = C.getName(); if (Error E = Ename.takeError()) { errs() << Argv0 << ": "; @@ -194,6 +204,8 @@ static std::unique_ptr loadArFile(const char *Argv0, MemBuf.get().getBufferStart()), reinterpret_cast( MemBuf.get().getBufferEnd()))) { + if (IgnoreNonBitcode) + continue; errs() << Argv0 << ": "; WithColor::error() << " member of archive is not a bitcode file: '" << ChildName << "'\n"; diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp index 8fc3a5d68500..c8266616b73d 100644 --- a/llvm/tools/llvm-lto/llvm-lto.cpp +++ b/llvm/tools/llvm-lto/llvm-lto.cpp @@ -71,7 +71,7 @@ static cl::opt OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " "(default = '-O2')"), - cl::Prefix, cl::ZeroOrMore, cl::init('2'), cl::cat(LTOCategory)); + cl::Prefix, cl::init('2'), cl::cat(LTOCategory)); static cl::opt IndexStats("thinlto-index-stats", @@ -210,12 +210,12 @@ static cl::opt OutputFilename("o", cl::init(""), static cl::list ExportedSymbols( "exported-symbol", cl::desc("List of symbols to export from the resulting object file"), - cl::ZeroOrMore, cl::cat(LTOCategory)); + cl::cat(LTOCategory)); static cl::list DSOSymbols("dso-symbol", cl::desc("Symbol to put in the symtab in the resulting dso"), - cl::ZeroOrMore, cl::cat(LTOCategory)); + cl::cat(LTOCategory)); static cl::opt ListSymbolsOnly( "list-symbols-only", cl::init(false), @@ -256,10 +256,6 @@ static cl::opt PrintMachOCPUOnly( cl::desc("Instead of running LTO, print the mach-o cpu in each IR file"), cl::cat(LTOCategory)); -static cl::opt UseNewPM( - "use-new-pm", cl::desc("Run LTO passes using the new pass manager"), - cl::init(LLVM_ENABLE_NEW_PASS_MANAGER), cl::Hidden, cl::cat(LTOCategory)); - static cl::opt DebugPassManager("debug-pass-manager", cl::init(false), cl::Hidden, cl::desc("Print pass management debugging information"), @@ -604,7 +600,6 @@ public: ThinGenerator.setCacheMaxSizeFiles(ThinLTOCacheMaxSizeFiles); ThinGenerator.setCacheMaxSizeBytes(ThinLTOCacheMaxSizeBytes); ThinGenerator.setFreestanding(EnableFreestanding); - ThinGenerator.setUseNewPM(UseNewPM); ThinGenerator.setDebugPassManager(DebugPassManager); // Add all the exported symbols to the table of symbols to preserve. @@ -1015,6 +1010,7 @@ int main(int argc, char **argv) { CodeGen.setCodePICModel(codegen::getExplicitRelocModel()); CodeGen.setFreestanding(EnableFreestanding); + CodeGen.setDebugPassManager(DebugPassManager); CodeGen.setDebugInfo(LTO_DEBUG_MODEL_DWARF); CodeGen.setTargetOptions(Options); @@ -1069,10 +1065,8 @@ int main(int argc, char **argv) { CodeGen.setOptLevel(OptLevel - '0'); CodeGen.setAttrs(codegen::getMAttrs()); - CodeGen.setUseNewPM(UseNewPM); - if (auto FT = codegen::getExplicitFileType()) - CodeGen.setFileType(FT.getValue()); + CodeGen.setFileType(*FT); if (!OutputFilename.empty()) { if (SaveLinkedModuleFile) { diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp index 7416e5850944..f79db36d2d2d 100644 --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -37,9 +37,10 @@ using namespace lto; static codegen::RegisterCodeGenFlags CGF; static cl::opt - OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " - "(default = '-O2')"), - cl::Prefix, cl::ZeroOrMore, cl::init('2')); + OptLevel("O", + cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " + "(default = '-O2')"), + cl::Prefix, cl::init('2')); static cl::opt CGOptLevel( "cg-opt-level", @@ -67,11 +68,23 @@ static cl::opt AAPipeline("aa-pipeline", static cl::opt SaveTemps("save-temps", cl::desc("Save temporary files")); static cl::opt - ThinLTODistributedIndexes("thinlto-distributed-indexes", cl::init(false), + ThinLTODistributedIndexes("thinlto-distributed-indexes", cl::desc("Write out individual index and " "import files for the " "distributed backend case")); +static cl::opt + ThinLTOEmitIndexes("thinlto-emit-indexes", + cl::desc("Write out individual index files via " + "InProcessThinLTO")); + +static cl::opt + ThinLTOEmitImports("thinlto-emit-imports", + cl::desc("Write out individual imports files via " + "InProcessThinLTO. Has no effect unless " + "specified with -thinlto-emit-indexes or " + "-thinlto-distributed-indexes")); + // Default to using all available threads in the system, but using only one // thread per core (no SMT). // Use -thinlto-threads=all to use hardware_concurrency() instead, which means @@ -89,8 +102,7 @@ static cl::list SymbolResolutions( " runtime and is known to be in this linkage unit\n" " x - externally visible: the definition of this symbol is\n" " visible outside of the LTO unit\n" - "A resolution for each symbol must be specified."), - cl::ZeroOrMore); + "A resolution for each symbol must be specified")); static cl::opt OverrideTriple( "override-triple", @@ -141,15 +153,14 @@ static cl::opt static cl::opt RunCSIRInstr("lto-cspgo-gen", cl::desc("Run PGO context sensitive IR instrumentation"), - cl::init(false), cl::Hidden); + cl::Hidden); -static cl::opt - UseNewPM("use-new-pm", - cl::desc("Run LTO passes using the new pass manager"), - cl::init(LLVM_ENABLE_NEW_PASS_MANAGER), cl::Hidden); +static cl::opt LtoOpaquePointers("lto-opaque-pointers", + cl::desc("Enable opaque pointer types"), + cl::init(true), cl::Hidden); static cl::opt - DebugPassManager("debug-pass-manager", cl::init(false), cl::Hidden, + DebugPassManager("debug-pass-manager", cl::Hidden, cl::desc("Print pass management debugging information")); static cl::opt @@ -162,7 +173,7 @@ static cl::list static cl::opt EnableFreestanding( "lto-freestanding", cl::desc("Enable Freestanding (disable builtins / TLI) during LTO"), - cl::init(false), cl::Hidden); + cl::Hidden); static void check(Error E, std::string Msg) { if (!E) @@ -242,7 +253,7 @@ static int run(int argc, char **argv) { Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple()); Conf.MAttrs = codegen::getMAttrs(); if (auto RM = codegen::getExplicitRelocModel()) - Conf.RelocModel = RM.getValue(); + Conf.RelocModel = *RM; Conf.CodeModel = codegen::getExplicitCodeModel(); Conf.DebugPassManager = DebugPassManager; @@ -267,7 +278,6 @@ static int run(int argc, char **argv) { Conf.AAPipeline = AAPipeline; Conf.OptLevel = OptLevel - '0'; - Conf.UseNewPM = UseNewPM; Conf.Freestanding = EnableFreestanding; for (auto &PluginFN : PassPlugins) Conf.PassPlugins.push_back(PluginFN); @@ -290,24 +300,27 @@ static int run(int argc, char **argv) { } if (auto FT = codegen::getExplicitFileType()) - Conf.CGFileType = FT.getValue(); + Conf.CGFileType = *FT; Conf.OverrideTriple = OverrideTriple; Conf.DefaultTriple = DefaultTriple; Conf.StatsFile = StatsFile; Conf.PTO.LoopVectorization = Conf.OptLevel > 1; Conf.PTO.SLPVectorization = Conf.OptLevel > 1; + Conf.OpaquePointers = LtoOpaquePointers; ThinBackend Backend; if (ThinLTODistributedIndexes) - Backend = createWriteIndexesThinBackend(/* OldPrefix */ "", - /* NewPrefix */ "", - /* ShouldEmitImportsFiles */ true, - /* LinkedObjectsFile */ nullptr, - /* OnWrite */ {}); + Backend = + createWriteIndexesThinBackend(/* OldPrefix */ "", + /* NewPrefix */ "", ThinLTOEmitImports, + /* LinkedObjectsFile */ nullptr, + /* OnWrite */ {}); else Backend = createInProcessThinBackend( - llvm::heavyweight_hardware_concurrency(Threads)); + llvm::heavyweight_hardware_concurrency(Threads), + /* OnWrite */ {}, ThinLTOEmitIndexes, ThinLTOEmitImports); + // Track whether we hit an error; in particular, in the multi-threaded case, // we can't exit() early because the rest of the threads wouldn't have had a // change to be join-ed, and that would result in a "terminate called without diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp index 4e5a12e53a6b..2a525f53ec29 100644 --- a/llvm/tools/llvm-mc/llvm-mc.cpp +++ b/llvm/tools/llvm-mc/llvm-mc.cpp @@ -541,7 +541,7 @@ int main(int argc, char **argv) { // Set up the AsmStreamer. std::unique_ptr CE; if (ShowEncoding) - CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); + CE.reset(TheTarget->createMCCodeEmitter(*MCII, Ctx)); std::unique_ptr MAB( TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); @@ -561,7 +561,7 @@ int main(int argc, char **argv) { OS = BOS.get(); } - MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); + MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, Ctx); MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions); Str.reset(TheTarget->createMCObjectStreamer( TheTriple, Ctx, std::unique_ptr(MAB), diff --git a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp index 6cdd0ba797aa..cb8e1822ee30 100644 --- a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp +++ b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp @@ -16,6 +16,7 @@ #include "CodeRegionGenerator.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCTargetOptions.h" @@ -62,10 +63,10 @@ public: uint64_t Size = 0, unsigned ByteAlignment = 0, SMLoc Loc = SMLoc()) override {} void emitGPRel32Value(const MCExpr *Value) override {} - void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {} - void EmitCOFFSymbolStorageClass(int StorageClass) override {} - void EmitCOFFSymbolType(int Type) override {} - void EndCOFFSymbolDef() override {} + void beginCOFFSymbolDef(const MCSymbol *Symbol) override {} + void emitCOFFSymbolStorageClass(int StorageClass) override {} + void emitCOFFSymbolType(int Type) override {} + void endCOFFSymbolDef() override {} ArrayRef GetInstructionSequence(unsigned Index) const { return Regions.getInstructionSequence(Index); diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp index caa8554a416a..67b636737b97 100644 --- a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp +++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp @@ -70,7 +70,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const { else if (IIVDEntry.Latency < 100) TempStream << ' '; - if (IIVDEntry.RThroughput.hasValue()) { + if (IIVDEntry.RThroughput) { double RT = IIVDEntry.RThroughput.getValue(); TempStream << format("%.2f", RT) << ' '; if (RT < 10.0) @@ -152,7 +152,7 @@ InstructionInfoView::toJSON(const InstructionInfoViewData &IIVD) const { {"mayLoad", IIVD.mayLoad}, {"mayStore", IIVD.mayStore}, {"hasUnmodeledSideEffects", IIVD.hasUnmodeledSideEffects}}); - JO.try_emplace("RThroughput", IIVD.RThroughput.getValueOr(0.0)); + JO.try_emplace("RThroughput", IIVD.RThroughput.value_or(0.0)); return JO; } diff --git a/llvm/tools/llvm-mca/Views/InstructionView.h b/llvm/tools/llvm-mca/Views/InstructionView.h index cec07eef6a80..ae57246fc35f 100644 --- a/llvm/tools/llvm-mca/Views/InstructionView.h +++ b/llvm/tools/llvm-mca/Views/InstructionView.h @@ -17,9 +17,10 @@ #include "llvm/MCA/View.h" #include "llvm/Support/JSON.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { +class MCInstPrinter; + namespace mca { // The base class for views that deal with individual machine instructions. diff --git a/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp b/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp index 7a341d4c2079..06caeda344c8 100644 --- a/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp +++ b/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp @@ -48,23 +48,23 @@ void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) { } else if (Event.Type == HWInstructionEvent::Dispatched) { const Instruction &Inst = *Event.IR.getInstruction(); const unsigned Index = Event.IR.getSourceIndex(); - if (LQResourceID && Inst.getDesc().MayLoad && + if (LQResourceID && Inst.getMayLoad() && MostRecentLoadDispatched != Index) { Usage[LQResourceID].SlotsInUse++; MostRecentLoadDispatched = Index; } - if (SQResourceID && Inst.getDesc().MayStore && + if (SQResourceID && Inst.getMayStore() && MostRecentStoreDispatched != Index) { Usage[SQResourceID].SlotsInUse++; MostRecentStoreDispatched = Index; } } else if (Event.Type == HWInstructionEvent::Executed) { const Instruction &Inst = *Event.IR.getInstruction(); - if (LQResourceID && Inst.getDesc().MayLoad) { + if (LQResourceID && Inst.getMayLoad()) { assert(Usage[LQResourceID].SlotsInUse); Usage[LQResourceID].SlotsInUse--; } - if (SQResourceID && Inst.getDesc().MayStore) { + if (SQResourceID && Inst.getMayStore()) { assert(Usage[SQResourceID].SlotsInUse); Usage[SQResourceID].SlotsInUse--; } diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index 1826491f3f30..409de283e5a1 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -465,6 +465,21 @@ int main(int argc, char **argv) { const MCSchedModel &SM = STI->getSchedModel(); + std::unique_ptr IPP; + if (!DisableCustomBehaviour) { + // TODO: It may be a good idea to separate CB and IPP so that they can + // be used independently of each other. What I mean by this is to add + // an extra command-line arg --disable-ipp so that CB and IPP can be + // toggled without needing to toggle both of them together. + IPP = std::unique_ptr( + TheTarget->createInstrPostProcess(*STI, *MCII)); + } + if (!IPP) { + // If the target doesn't have its own IPP implemented (or the -disable-cb + // flag is set) then we use the base class (which does nothing). + IPP = std::make_unique(*STI, *MCII); + } + // Create an instruction builder. mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get()); @@ -479,7 +494,7 @@ int main(int argc, char **argv) { unsigned RegionIdx = 0; std::unique_ptr MCE( - TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); + TheTarget->createMCCodeEmitter(*MCII, Ctx)); assert(MCE && "Unable to create code emitter!"); std::unique_ptr MAB(TheTarget->createMCAsmBackend( @@ -498,16 +513,7 @@ int main(int argc, char **argv) { ArrayRef Insts = Region->getInstructions(); mca::CodeEmitter CE(*STI, *MAB, *MCE, Insts); - std::unique_ptr IPP; - if (!DisableCustomBehaviour) { - IPP = std::unique_ptr( - TheTarget->createInstrPostProcess(*STI, *MCII)); - } - if (!IPP) - // If the target doesn't have its own IPP implemented (or the - // -disable-cb flag is set) then we use the base class - // (which does nothing). - IPP = std::make_unique(*STI, *MCII); + IPP->resetState(); SmallVector> LoweredSequence; for (const MCInst &MCI : Insts) { @@ -536,7 +542,8 @@ int main(int argc, char **argv) { LoweredSequence.emplace_back(std::move(Inst.get())); } - mca::SourceMgr S(LoweredSequence, PrintInstructionTables ? 1 : Iterations); + mca::CircularSourceMgr S(LoweredSequence, + PrintInstructionTables ? 1 : Iterations); if (PrintInstructionTables) { // Create a pipeline, stages, and a printer. diff --git a/llvm/tools/llvm-modextract/llvm-modextract.cpp b/llvm/tools/llvm-modextract/llvm-modextract.cpp index b1d6bfb790ec..50f503ae0ac4 100644 --- a/llvm/tools/llvm-modextract/llvm-modextract.cpp +++ b/llvm/tools/llvm-modextract/llvm-modextract.cpp @@ -17,6 +17,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/WithColor.h" diff --git a/llvm/tools/llvm-nm/Opts.td b/llvm/tools/llvm-nm/Opts.td index 3a790890909a..60ac134269b3 100644 --- a/llvm/tools/llvm-nm/Opts.td +++ b/llvm/tools/llvm-nm/Opts.td @@ -13,10 +13,12 @@ multiclass Eq { def : Separate<["--"], name>, Alias(NAME #_EQ)>; } +def X : JoinedOrSeparate<["-"], "X">, HelpText<"Specifies the type of ELF, XCOFF, or IR object file to examine. The value must be one of: 32, 64, 32_64, any (default)">; def debug_syms : FF<"debug-syms", "Show all symbols, even debugger only">; def defined_only : FF<"defined-only", "Show only defined symbols">; defm demangle : BB<"demangle", "Demangle C++ symbol names", "Don't demangle symbol names">; def dynamic : FF<"dynamic", "Display dynamic symbols instead of normal symbols">; +def export_symbols : FF<"export-symbols", "Export symbol list for all inputs">; def extern_only : FF<"extern-only", "Show only external symbols">; defm format : Eq<"format", "Specify output format: bsd (default), posix, sysv, darwin, just-symbols">, MetaVarName<"">; def help : FF<"help", "Display this help">; @@ -48,6 +50,11 @@ def no_dyldinfo : FF<"no-dyldinfo", "Don't add any symbols from the dyldinfo">, def s : F<"s", "Dump only symbols from this segment and section name">, Group; def x : F<"x", "Print symbol entry in hex">, Group; +// XCOFF specific options. +def grp_xcoff_o : OptionGroup<"kind">, HelpText<"llvm-nm XCOFF Specific Options">; + +def no_rsrc : FF<"no-rsrc", "Exclude resource file symbols (__rsrc) from the export symbol list.">, Group; + def : FF<"just-symbol-name", "Alias for --format=just-symbols">, Alias, AliasArgs<["just-symbols"]>, Flags<[HelpHidden]>; def : FF<"portability", "Alias for --format=posix">, Alias, AliasArgs<["posix"]>; @@ -70,7 +77,7 @@ def : F<"r", "Alias for --reverse-sort">, Alias; def : F<"S", "Alias for --print-size">, Alias; def : JoinedOrSeparate<["-"], "t">, HelpText<"Alias for --radix">, Alias, MetaVarName<"">; def : F<"u", "Alias for --undefined-only">, Alias; -def : F<"U", "Deprecated alias for --defined-only">, Alias, Flags<[HelpHidden]>; +def : F<"U", "Alias for --defined-only">, Alias; def : F<"v", "Alias for --numeric-sort">, Alias; def : F<"V", "Alias for --version">, Alias; -def : F<"W", "Deprecated alias for --no-weak">, Alias, Flags<[HelpHidden]>; +def : F<"W", "Alias for --no-weak">, Alias; diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp index f1d8b0026429..f0def8b74e60 100644 --- a/llvm/tools/llvm-nm/llvm-nm.cpp +++ b/llvm/tools/llvm-nm/llvm-nm.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/XCOFF.h" #include "llvm/Demangle/Demangle.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" @@ -83,13 +84,16 @@ public: }; enum OutputFormatTy { bsd, sysv, posix, darwin, just_symbols }; +enum class BitModeTy { Bit32, Bit64, Bit32_64, Any }; } // namespace static bool ArchiveMap; +static BitModeTy BitMode; static bool DebugSyms; static bool DefinedOnly; static bool Demangle; static bool DynamicSyms; +static bool ExportSymbols; static bool ExternalOnly; static OutputFormatTy OutputFormat; static bool NoLLVMBitcode; @@ -105,6 +109,9 @@ static bool SizeSort; static bool UndefinedOnly; static bool WithoutAliases; +// XCOFF-specific options. +static bool NoRsrc; + namespace { enum Radix { d, o, x }; } // namespace @@ -128,7 +135,8 @@ static bool HadError = false; static StringRef ToolName; -static void warn(Error Err, Twine FileName, Twine Context = Twine()) { +static void warn(Error Err, Twine FileName, Twine Context = Twine(), + Twine Archive = Twine()) { assert(Err); // Flush the standard output so that the warning isn't interleaved with other @@ -137,8 +145,9 @@ static void warn(Error Err, Twine FileName, Twine Context = Twine()) { handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) { WithColor::warning(errs(), ToolName) - << FileName << ": " << (Context.str().empty() ? "" : Context + ": ") - << EI.message() << "\n"; + << (Archive.str().empty() ? FileName : Archive + "(" + FileName + ")") + << ": " << (Context.str().empty() ? "" : Context + ": ") << EI.message() + << "\n"; }); } @@ -211,6 +220,8 @@ struct NMSymbol { StringRef SectionName; StringRef TypeName; BasicSymbolRef Sym; + StringRef Visibility; + // The Sym field above points to the native symbol in the object file, // for Mach-O when we are creating symbols from the dyld info the above // pointer is null as there is no native symbol. In these cases the fields @@ -222,40 +233,59 @@ struct NMSymbol { uint8_t NSect; uint16_t NDesc; std::string IndirectName; -}; -} // anonymous namespace -static bool compareSymbolAddress(const NMSymbol &A, const NMSymbol &B) { - bool ADefined; - // Symbol flags have been checked in the caller. - if (A.Sym.getRawDataRefImpl().p) { - uint32_t AFlags = cantFail(A.Sym.getFlags()); - ADefined = !(AFlags & SymbolRef::SF_Undefined); - } else { - ADefined = A.TypeChar != 'U'; + bool isDefined() const { + if (Sym.getRawDataRefImpl().p) { + uint32_t Flags = cantFail(Sym.getFlags()); + return !(Flags & SymbolRef::SF_Undefined); + } + return TypeChar != 'U'; } - bool BDefined; - // Symbol flags have been checked in the caller. - if (B.Sym.getRawDataRefImpl().p) { - uint32_t BFlags = cantFail(B.Sym.getFlags()); - BDefined = !(BFlags & SymbolRef::SF_Undefined); - } else { - BDefined = B.TypeChar != 'U'; + + bool initializeFlags(const SymbolicFile &Obj) { + Expected SymFlagsOrErr = Sym.getFlags(); + if (!SymFlagsOrErr) { + // TODO: Test this error. + error(SymFlagsOrErr.takeError(), Obj.getFileName()); + return false; + } + SymFlags = *SymFlagsOrErr; + return true; } - return std::make_tuple(ADefined, A.Address, A.Name, A.Size) < - std::make_tuple(BDefined, B.Address, B.Name, B.Size); -} -static bool compareSymbolSize(const NMSymbol &A, const NMSymbol &B) { - return std::make_tuple(A.Size, A.Name, A.Address) < - std::make_tuple(B.Size, B.Name, B.Address); -} + bool shouldPrint() const { + bool Undefined = SymFlags & SymbolRef::SF_Undefined; + bool Global = SymFlags & SymbolRef::SF_Global; + bool Weak = SymFlags & SymbolRef::SF_Weak; + bool FormatSpecific = SymFlags & SymbolRef::SF_FormatSpecific; + if ((!Undefined && UndefinedOnly) || (Undefined && DefinedOnly) || + (!Global && ExternalOnly) || (Weak && NoWeakSymbols) || + (FormatSpecific && !(SpecialSyms || DebugSyms))) + return false; + return true; + } +}; -static bool compareSymbolName(const NMSymbol &A, const NMSymbol &B) { +bool operator<(const NMSymbol &A, const NMSymbol &B) { + if (NumericSort) + return std::make_tuple(A.isDefined(), A.Address, A.Name, A.Size) < + std::make_tuple(B.isDefined(), B.Address, B.Name, B.Size); + if (SizeSort) + return std::make_tuple(A.Size, A.Name, A.Address) < + std::make_tuple(B.Size, B.Name, B.Address); + if (ExportSymbols) + return std::make_tuple(A.Name, A.Visibility) < + std::make_tuple(B.Name, B.Visibility); return std::make_tuple(A.Name, A.Size, A.Address) < std::make_tuple(B.Name, B.Size, B.Address); } +bool operator>(const NMSymbol &A, const NMSymbol &B) { return B < A; } +bool operator==(const NMSymbol &A, const NMSymbol &B) { + return !(A < B) && !(B < A); +} +} // anonymous namespace + static char isSymbolList64Bit(SymbolicFile &Obj) { if (auto *IRObj = dyn_cast(&Obj)) return Triple(IRObj->getTargetTriple()).isArch64Bit(); @@ -263,7 +293,6 @@ static char isSymbolList64Bit(SymbolicFile &Obj) { return false; if (XCOFFObjectFile *XCOFFObj = dyn_cast(&Obj)) return XCOFFObj->is64Bit(); - if (isa(Obj)) return false; if (TapiFile *Tapi = dyn_cast(&Obj)) @@ -274,7 +303,6 @@ static char isSymbolList64Bit(SymbolicFile &Obj) { } static StringRef CurrentFilename; -static std::vector SymbolList; static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I); @@ -658,27 +686,28 @@ static void writeFileName(raw_ostream &S, StringRef ArchiveName, } } -static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName, - StringRef ArchiveName, - StringRef ArchitectureName) { - if (!NoSort) { - using Comparator = bool (*)(const NMSymbol &, const NMSymbol &); - Comparator Cmp; - if (NumericSort) - Cmp = &compareSymbolAddress; - else if (SizeSort) - Cmp = &compareSymbolSize; - else - Cmp = &compareSymbolName; +static void sortSymbolList(std::vector &SymbolList) { + if (NoSort) + return; - if (ReverseSort) - llvm::sort(SymbolList, [=](const NMSymbol &A, const NMSymbol &B) -> bool { - return Cmp(B, A); - }); - else - llvm::sort(SymbolList, Cmp); + if (ReverseSort) + llvm::sort(SymbolList, std::greater<>()); + else + llvm::sort(SymbolList); +} + +static void printExportSymbolList(const std::vector &SymbolList) { + for (const NMSymbol &Sym : SymbolList) { + outs() << Sym.Name; + if (!Sym.Visibility.empty()) + outs() << ' ' << Sym.Visibility; + outs() << '\n'; } +} +static void printSymbolList(SymbolicFile &Obj, + std::vector &SymbolList, bool printName, + StringRef ArchiveName, StringRef ArchitectureName) { if (!PrintFileName) { if ((OutputFormat == bsd || OutputFormat == posix || OutputFormat == just_symbols) && @@ -725,7 +754,9 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName, } for (const NMSymbol &S : SymbolList) { - uint32_t SymFlags; + if (!S.shouldPrint()) + continue; + std::string Name = S.Name; MachOObjectFile *MachO = dyn_cast(&Obj); if (Demangle) { @@ -737,25 +768,7 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName, if (Optional Opt = Fn(S.Name)) Name = *Opt; } - if (S.Sym.getRawDataRefImpl().p) { - Expected SymFlagsOrErr = S.Sym.getFlags(); - if (!SymFlagsOrErr) { - // TODO: Test this error. - error(SymFlagsOrErr.takeError(), Obj.getFileName()); - return; - } - SymFlags = *SymFlagsOrErr; - } else - SymFlags = S.SymFlags; - bool Undefined = SymFlags & SymbolRef::SF_Undefined; - bool Global = SymFlags & SymbolRef::SF_Global; - bool Weak = SymFlags & SymbolRef::SF_Weak; - bool FormatSpecific = SymFlags & SymbolRef::SF_FormatSpecific; - if ((!Undefined && UndefinedOnly) || (Undefined && DefinedOnly) || - (!Global && ExternalOnly) || (Weak && NoWeakSymbols) || - (FormatSpecific && !(SpecialSyms || DebugSyms))) - continue; if (PrintFileName) writeFileName(outs(), ArchiveName, ArchitectureName); if ((OutputFormat == just_symbols || @@ -1141,7 +1154,7 @@ static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I, // getNsectForSegSect() is used to implement the Mach-O "-s segname sectname" // option to dump only those symbols from that section in a Mach-O file. -// It is called once for each Mach-O file from dumpSymbolNamesFromObject() +// It is called once for each Mach-O file from getSymbolNamesFromObject() // to get the section number for that named section from the command line // arguments. It returns the section number for that section in the Mach-O // file or zero it is not present. @@ -1163,7 +1176,7 @@ static unsigned getNsectForSegSect(MachOObjectFile *Obj) { // getNsectInMachO() is used to implement the Mach-O "-s segname sectname" // option to dump only those symbols from that section in a Mach-O file. // It is called once for each symbol in a Mach-O file from -// dumpSymbolNamesFromObject() and returns the section number for that symbol +// getSymbolNamesFromObject() and returns the section number for that symbol // if it is in a section, else it returns 0. static unsigned getNsectInMachO(MachOObjectFile &Obj, BasicSymbolRef Sym) { DataRefImpl Symb = Sym.getRawDataRefImpl(); @@ -1175,7 +1188,8 @@ static unsigned getNsectInMachO(MachOObjectFile &Obj, BasicSymbolRef Sym) { return (STE.n_type & MachO::N_TYPE) == MachO::N_SECT ? STE.n_sect : 0; } -static void dumpSymbolsFromDLInfoMachO(MachOObjectFile &MachO) { +static void dumpSymbolsFromDLInfoMachO(MachOObjectFile &MachO, + std::vector &SymbolList) { size_t I = SymbolList.size(); std::string ExportsNameBuffer; raw_string_ostream EOS(ExportsNameBuffer); @@ -1642,28 +1656,127 @@ static void dumpSymbolsFromDLInfoMachO(MachOObjectFile &MachO) { } } -static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName, - StringRef ArchiveName = {}, - StringRef ArchitectureName = {}) { +static bool shouldDump(SymbolicFile &Obj) { + // The -X option is currently only implemented for XCOFF, ELF, and IR object + // files. The option isn't fundamentally impossible with other formats, just + // isn't implemented. + if (!isa(Obj) && !isa(Obj) && + !isa(Obj)) + return true; + + return isSymbolList64Bit(Obj) ? BitMode != BitModeTy::Bit32 + : BitMode != BitModeTy::Bit64; +} + +static void getXCOFFExports(XCOFFObjectFile *XCOFFObj, + std::vector &SymbolList, + StringRef ArchiveName) { + // Skip Shared object file. + if (XCOFFObj->getFlags() & XCOFF::F_SHROBJ) + return; + + for (SymbolRef Sym : XCOFFObj->symbols()) { + // There is no visibility in old 32 bit XCOFF object file interpret. + bool HasVisibilityAttr = + XCOFFObj->is64Bit() || (XCOFFObj->auxiliaryHeader32() && + (XCOFFObj->auxiliaryHeader32()->getVersion() == + XCOFF::NEW_XCOFF_INTERPRET)); + + if (HasVisibilityAttr) { + XCOFFSymbolRef XCOFFSym = XCOFFObj->toSymbolRef(Sym.getRawDataRefImpl()); + uint16_t SymType = XCOFFSym.getSymbolType(); + if ((SymType & XCOFF::VISIBILITY_MASK) == XCOFF::SYM_V_INTERNAL) + continue; + if ((SymType & XCOFF::VISIBILITY_MASK) == XCOFF::SYM_V_HIDDEN) + continue; + } + + Expected SymSecOrErr = Sym.getSection(); + if (!SymSecOrErr) { + warn(SymSecOrErr.takeError(), XCOFFObj->getFileName(), + "for symbol with index " + + Twine(XCOFFObj->getSymbolIndex(Sym.getRawDataRefImpl().p)), + ArchiveName); + continue; + } + section_iterator SecIter = *SymSecOrErr; + // If the symbol is not in a text or data section, it is not exported. + if (SecIter == XCOFFObj->section_end()) + continue; + if (!(SecIter->isText() || SecIter->isData() || SecIter->isBSS())) + continue; + + StringRef SymName = cantFail(Sym.getName()); + if (SymName.empty()) + continue; + if (SymName.startswith("__sinit") || SymName.startswith("__sterm") || + SymName.front() == '.' || SymName.front() == '(') + continue; + + // Check the SymName regex matching with "^__[0-9]+__". + if (SymName.size() > 4 && SymName.startswith("__") && + SymName.endswith("__")) { + if (std::all_of(SymName.begin() + 2, SymName.end() - 2, isDigit)) + continue; + } + + if (SymName == "__rsrc" && NoRsrc) + continue; + + if (SymName.startswith("__tf1")) + SymName = SymName.substr(6); + else if (SymName.startswith("__tf9")) + SymName = SymName.substr(14); + + NMSymbol S = {}; + S.Name = SymName.str(); + S.Sym = Sym; + + if (HasVisibilityAttr) { + XCOFFSymbolRef XCOFFSym = XCOFFObj->toSymbolRef(Sym.getRawDataRefImpl()); + uint16_t SymType = XCOFFSym.getSymbolType(); + if ((SymType & XCOFF::VISIBILITY_MASK) == XCOFF::SYM_V_PROTECTED) + S.Visibility = "protected"; + else if ((SymType & XCOFF::VISIBILITY_MASK) == XCOFF::SYM_V_EXPORTED) + S.Visibility = "export"; + } + if (S.initializeFlags(*XCOFFObj)) + SymbolList.push_back(S); + } +} + +static Expected +getDynamicSyms(SymbolicFile &Obj) { + const auto *E = dyn_cast(&Obj); + if (!E) + return createError("File format has no dynamic symbol table"); + return E->getDynamicSymbolIterators(); +} + +// Returns false if there is error found or true otherwise. +static bool getSymbolNamesFromObject(SymbolicFile &Obj, + std::vector &SymbolList) { auto Symbols = Obj.symbols(); std::vector SymbolVersions; + if (DynamicSyms) { - const auto *E = dyn_cast(&Obj); - if (!E) { - error("File format has no dynamic symbol table", Obj.getFileName()); - return; + Expected SymbolsOrErr = + getDynamicSyms(Obj); + if (!SymbolsOrErr) { + error(SymbolsOrErr.takeError(), Obj.getFileName()); + return false; + } + Symbols = *SymbolsOrErr; + if (const auto *E = dyn_cast(&Obj)) { + if (Expected> VersionsOrErr = + E->readDynsymVersions()) + SymbolVersions = std::move(*VersionsOrErr); + else + WithColor::warning(errs(), ToolName) + << "unable to read symbol versions: " + << toString(VersionsOrErr.takeError()) << "\n"; } - Symbols = E->getDynamicSymbolIterators(); - - if (Expected> VersionsOrErr = - E->readDynsymVersions()) - SymbolVersions = std::move(*VersionsOrErr); - else - WithColor::warning(errs(), ToolName) - << "unable to read symbol versions: " - << toString(VersionsOrErr.takeError()) << "\n"; } - // If a "-s segname sectname" option was specified and this is a Mach-O // file get the section number for that section in this object file. unsigned int Nsect = 0; @@ -1672,8 +1785,9 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName, Nsect = getNsectForSegSect(MachO); // If this section is not in the object file no symbols are printed. if (Nsect == 0) - return; + return false; } + if (!(MachO && DyldInfoOnly)) { size_t I = -1; for (BasicSymbolRef Sym : Symbols) { @@ -1681,7 +1795,7 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName, Expected SymFlagsOrErr = Sym.getFlags(); if (!SymFlagsOrErr) { error(SymFlagsOrErr.takeError(), Obj.getFileName()); - return; + return false; } // Don't drop format specifc symbols for ARM and AArch64 ELF targets, they @@ -1734,7 +1848,8 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName, (SymbolVersions[I].IsVerDef ? "@@" : "@") + SymbolVersions[I].Name; S.Sym = Sym; - SymbolList.push_back(S); + if (S.initializeFlags(Obj)) + SymbolList.push_back(S); } } @@ -1745,16 +1860,66 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName, // language symbols for example. The option -only-dyldinfo will fake up // all symbols from the dyld export trie as well as the bind info. if (MachO && !NoDyldInfo) - dumpSymbolsFromDLInfoMachO(*MachO); + dumpSymbolsFromDLInfoMachO(*MachO, SymbolList); + return true; +} + +static void printObjectLabel(bool PrintArchiveName, StringRef ArchiveName, + StringRef ArchitectureName, + StringRef ObjectFileName) { + outs() << "\n"; + if (ArchiveName.empty() || !PrintArchiveName) + outs() << ObjectFileName; + else + outs() << ArchiveName << "(" << ObjectFileName << ")"; + if (!ArchitectureName.empty()) + outs() << " (for architecture " << ArchitectureName << ")"; + outs() << ":\n"; +} + +static Expected hasSymbols(SymbolicFile &Obj) { + if (DynamicSyms) { + Expected DynamicSymsOrErr = + getDynamicSyms(Obj); + if (!DynamicSymsOrErr) + return DynamicSymsOrErr.takeError(); + return !DynamicSymsOrErr->empty(); + } + return !Obj.symbols().empty(); +} + +static void dumpSymbolNamesFromObject( + SymbolicFile &Obj, std::vector &SymbolList, + bool PrintSymbolObject, bool PrintObjectLabel, StringRef ArchiveName = {}, + StringRef ArchitectureName = {}, StringRef ObjectName = {}, + bool PrintArchiveName = true) { + if (!shouldDump(Obj)) + return; + + if (ExportSymbols && Obj.isXCOFF()) { + XCOFFObjectFile *XCOFFObj = cast(&Obj); + getXCOFFExports(XCOFFObj, SymbolList, ArchiveName); + return; + } + + if (PrintObjectLabel && !ExportSymbols) + printObjectLabel(PrintArchiveName, ArchiveName, ArchitectureName, + ObjectName.empty() ? Obj.getFileName() : ObjectName); + if (!getSymbolNamesFromObject(Obj, SymbolList) || ExportSymbols) + return; CurrentFilename = Obj.getFileName(); - if (Symbols.empty() && SymbolList.empty() && !Quiet) { + // If there is an error in hasSymbols(), the error should be encountered in + // function getSymbolNamesFromObject first. + if (!cantFail(hasSymbols(Obj)) && SymbolList.empty() && !Quiet) { writeFileName(errs(), ArchiveName, ArchitectureName); errs() << "no symbols\n"; } - sortAndPrintSymbolList(Obj, printName, ArchiveName, ArchitectureName); + sortSymbolList(SymbolList); + printSymbolList(Obj, SymbolList, PrintSymbolObject, ArchiveName, + ArchitectureName); } // checkMachOAndArchFlags() checks to see if the SymbolicFile is a Mach-O file @@ -1762,7 +1927,7 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName, // check to make sure this Mach-O file is one of those architectures or all // architectures was specificed. If not then an error is generated and this // routine returns false. Else it returns true. -static bool checkMachOAndArchFlags(SymbolicFile *O, std::string &Filename) { +static bool checkMachOAndArchFlags(SymbolicFile *O, StringRef Filename) { auto *MachO = dyn_cast(O); if (!MachO || ArchAll || ArchFlags.empty()) @@ -1789,282 +1954,172 @@ static bool checkMachOAndArchFlags(SymbolicFile *O, std::string &Filename) { return true; } -static void dumpSymbolNamesFromFile(std::string &Filename) { - ErrorOr> BufferOrErr = - MemoryBuffer::getFileOrSTDIN(Filename); - if (error(BufferOrErr.getError(), Filename)) - return; - - LLVMContext Context; - LLVMContext *ContextPtr = NoLLVMBitcode ? nullptr : &Context; - Expected> BinaryOrErr = - createBinary(BufferOrErr.get()->getMemBufferRef(), ContextPtr); - if (!BinaryOrErr) { - error(BinaryOrErr.takeError(), Filename); - return; - } - Binary &Bin = *BinaryOrErr.get(); - - if (Archive *A = dyn_cast(&Bin)) { - if (ArchiveMap) { - Archive::symbol_iterator I = A->symbol_begin(); - Archive::symbol_iterator E = A->symbol_end(); - if (I != E) { - outs() << "Archive map\n"; - for (; I != E; ++I) { - Expected C = I->getMember(); - if (!C) { - error(C.takeError(), Filename); - break; - } - Expected FileNameOrErr = C->getName(); - if (!FileNameOrErr) { - error(FileNameOrErr.takeError(), Filename); - break; - } - StringRef SymName = I->getName(); - outs() << SymName << " in " << FileNameOrErr.get() << "\n"; - } - outs() << "\n"; +static void dumpArchiveMap(Archive *A, StringRef Filename) { + Archive::symbol_iterator I = A->symbol_begin(); + Archive::symbol_iterator E = A->symbol_end(); + if (I != E) { + outs() << "Archive map\n"; + for (; I != E; ++I) { + Expected C = I->getMember(); + if (!C) { + error(C.takeError(), Filename); + break; } + Expected FileNameOrErr = C->getName(); + if (!FileNameOrErr) { + error(FileNameOrErr.takeError(), Filename); + break; + } + StringRef SymName = I->getName(); + outs() << SymName << " in " << FileNameOrErr.get() << "\n"; } + outs() << "\n"; + } +} - { - Error Err = Error::success(); - for (auto &C : A->children(Err)) { - Expected> ChildOrErr = - C.getAsBinary(ContextPtr); - if (!ChildOrErr) { - if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) - error(std::move(E), Filename, C); - continue; - } - if (SymbolicFile *O = dyn_cast(&*ChildOrErr.get())) { - if (!MachOPrintSizeWarning && PrintSize && isa(O)) { - WithColor::warning(errs(), ToolName) - << "sizes with -print-size for Mach-O files are always zero.\n"; - MachOPrintSizeWarning = true; - } - if (!checkMachOAndArchFlags(O, Filename)) - return; - if (!PrintFileName) { - outs() << "\n"; - if (isa(O)) { - outs() << Filename << "(" << O->getFileName() << ")"; - } else - outs() << O->getFileName(); - outs() << ":\n"; - } - dumpSymbolNamesFromObject(*O, false, Filename); - } +static void dumpArchive(Archive *A, std::vector &SymbolList, + StringRef Filename, LLVMContext *ContextPtr) { + if (ArchiveMap) + dumpArchiveMap(A, Filename); + + Error Err = Error::success(); + for (auto &C : A->children(Err)) { + Expected> ChildOrErr = C.getAsBinary(ContextPtr); + if (!ChildOrErr) { + if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) + error(std::move(E), Filename, C); + continue; + } + if (SymbolicFile *O = dyn_cast(&*ChildOrErr.get())) { + if (!MachOPrintSizeWarning && PrintSize && isa(O)) { + WithColor::warning(errs(), ToolName) + << "sizes with -print-size for Mach-O files are always zero.\n"; + MachOPrintSizeWarning = true; } - if (Err) - error(std::move(Err), A->getFileName()); + if (!checkMachOAndArchFlags(O, Filename)) + return; + dumpSymbolNamesFromObject(*O, SymbolList, /*PrintSymbolObject=*/false, + !PrintFileName, Filename, + /*ArchitectureName=*/{}, O->getFileName(), + /*PrintArchiveName=*/false); } - return; } - if (MachOUniversalBinary *UB = dyn_cast(&Bin)) { - // If we have a list of architecture flags specified dump only those. - if (!ArchAll && !ArchFlags.empty()) { - // Look for a slice in the universal binary that matches each ArchFlag. - bool ArchFound; - for (unsigned i = 0; i < ArchFlags.size(); ++i) { - ArchFound = false; - for (MachOUniversalBinary::object_iterator I = UB->begin_objects(), - E = UB->end_objects(); - I != E; ++I) { - if (ArchFlags[i] == I->getArchFlagName()) { - ArchFound = true; - Expected> ObjOrErr = - I->getAsObjectFile(); - std::string ArchiveName; - std::string ArchitectureName; - ArchiveName.clear(); - ArchitectureName.clear(); - if (ObjOrErr) { - ObjectFile &Obj = *ObjOrErr.get(); - if (ArchFlags.size() > 1) { - if (PrintFileName) - ArchitectureName = I->getArchFlagName(); - else - outs() << "\n" << Obj.getFileName() << " (for architecture " - << I->getArchFlagName() << ")" - << ":\n"; + if (Err) + error(std::move(Err), A->getFileName()); +} + +static void dumpMachOUniversalBinaryMatchArchFlags( + MachOUniversalBinary *UB, std::vector &SymbolList, + StringRef Filename, LLVMContext *ContextPtr) { + // Look for a slice in the universal binary that matches each ArchFlag. + bool ArchFound; + for (unsigned i = 0; i < ArchFlags.size(); ++i) { + ArchFound = false; + for (MachOUniversalBinary::object_iterator I = UB->begin_objects(), + E = UB->end_objects(); + I != E; ++I) { + if (ArchFlags[i] == I->getArchFlagName()) { + ArchFound = true; + Expected> ObjOrErr = I->getAsObjectFile(); + std::string ArchiveName; + std::string ArchitectureName; + ArchiveName.clear(); + ArchitectureName.clear(); + if (ObjOrErr) { + ObjectFile &Obj = *ObjOrErr.get(); + if (ArchFlags.size() > 1) + ArchitectureName = I->getArchFlagName(); + dumpSymbolNamesFromObject(Obj, SymbolList, + /*PrintSymbolObject=*/false, + (ArchFlags.size() > 1) && !PrintFileName, + ArchiveName, ArchitectureName); + } else if (auto E = + isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) { + error(std::move(E), Filename, + ArchFlags.size() > 1 ? StringRef(I->getArchFlagName()) + : StringRef()); + continue; + } else if (Expected> AOrErr = + I->getAsArchive()) { + std::unique_ptr &A = *AOrErr; + Error Err = Error::success(); + for (auto &C : A->children(Err)) { + Expected> ChildOrErr = + C.getAsBinary(ContextPtr); + if (!ChildOrErr) { + if (auto E = + isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) { + error(std::move(E), Filename, C, + ArchFlags.size() > 1 ? StringRef(I->getArchFlagName()) + : StringRef()); } - dumpSymbolNamesFromObject(Obj, false, ArchiveName, - ArchitectureName); - } else if (auto E = isNotObjectErrorInvalidFileType( - ObjOrErr.takeError())) { - error(std::move(E), Filename, ArchFlags.size() > 1 ? - StringRef(I->getArchFlagName()) : StringRef()); continue; - } else if (Expected> AOrErr = - I->getAsArchive()) { - std::unique_ptr &A = *AOrErr; - Error Err = Error::success(); - for (auto &C : A->children(Err)) { - Expected> ChildOrErr = - C.getAsBinary(ContextPtr); - if (!ChildOrErr) { - if (auto E = isNotObjectErrorInvalidFileType( - ChildOrErr.takeError())) { - error(std::move(E), Filename, C, ArchFlags.size() > 1 ? - StringRef(I->getArchFlagName()) : StringRef()); - } - continue; - } - if (SymbolicFile *O = - dyn_cast(&*ChildOrErr.get())) { - if (PrintFileName) { - ArchiveName = std::string(A->getFileName()); - if (ArchFlags.size() > 1) - ArchitectureName = I->getArchFlagName(); - } else { - outs() << "\n" << A->getFileName(); - outs() << "(" << O->getFileName() << ")"; - if (ArchFlags.size() > 1) { - outs() << " (for architecture " << I->getArchFlagName() - << ")"; - } - outs() << ":\n"; - } - dumpSymbolNamesFromObject(*O, false, ArchiveName, - ArchitectureName); - } - } - if (Err) - error(std::move(Err), A->getFileName()); - } else { - consumeError(AOrErr.takeError()); - error(Filename + " for architecture " + - StringRef(I->getArchFlagName()) + - " is not a Mach-O file or an archive file", - "Mach-O universal file"); } - } - } - if (!ArchFound) { - error(ArchFlags[i], - "file: " + Filename + " does not contain architecture"); - return; - } - } - return; - } - // No architecture flags were specified so if this contains a slice that - // matches the host architecture dump only that. - if (!ArchAll) { - Triple HostTriple = MachOObjectFile::getHostArch(); - StringRef HostArchName = HostTriple.getArchName(); - for (MachOUniversalBinary::object_iterator I = UB->begin_objects(), - E = UB->end_objects(); - I != E; ++I) { - if (HostArchName == I->getArchFlagName()) { - Expected> ObjOrErr = I->getAsObjectFile(); - std::string ArchiveName; - if (ObjOrErr) { - ObjectFile &Obj = *ObjOrErr.get(); - dumpSymbolNamesFromObject(Obj, false); - } else if (auto E = isNotObjectErrorInvalidFileType( - ObjOrErr.takeError())) { - error(std::move(E), Filename); - return; - } else if (Expected> AOrErr = - I->getAsArchive()) { - std::unique_ptr &A = *AOrErr; - Error Err = Error::success(); - for (auto &C : A->children(Err)) { - Expected> ChildOrErr = - C.getAsBinary(ContextPtr); - if (!ChildOrErr) { - if (auto E = isNotObjectErrorInvalidFileType( - ChildOrErr.takeError())) - error(std::move(E), Filename, C); - continue; - } - if (SymbolicFile *O = - dyn_cast(&*ChildOrErr.get())) { - if (PrintFileName) - ArchiveName = std::string(A->getFileName()); - else - outs() << "\n" << A->getFileName() << "(" << O->getFileName() - << ")" - << ":\n"; - dumpSymbolNamesFromObject(*O, false, ArchiveName); - } + if (SymbolicFile *O = dyn_cast(&*ChildOrErr.get())) { + ArchiveName = std::string(A->getFileName()); + if (ArchFlags.size() > 1) + ArchitectureName = I->getArchFlagName(); + dumpSymbolNamesFromObject( + *O, SymbolList, /*PrintSymbolObject=*/false, !PrintFileName, + ArchiveName, ArchitectureName); } - if (Err) - error(std::move(Err), A->getFileName()); - } else { - consumeError(AOrErr.takeError()); - error(Filename + " for architecture " + - StringRef(I->getArchFlagName()) + - " is not a Mach-O file or an archive file", - "Mach-O universal file"); } - return; + if (Err) + error(std::move(Err), A->getFileName()); + } else { + consumeError(AOrErr.takeError()); + error(Filename + " for architecture " + + StringRef(I->getArchFlagName()) + + " is not a Mach-O file or an archive file", + "Mach-O universal file"); } } } - // Either all architectures have been specified or none have been specified - // and this does not contain the host architecture so dump all the slices. - bool moreThanOneArch = UB->getNumberOfObjects() > 1; - for (const MachOUniversalBinary::ObjectForArch &O : UB->objects()) { - Expected> ObjOrErr = O.getAsObjectFile(); + if (!ArchFound) { + error(ArchFlags[i], + "file: " + Filename + " does not contain architecture"); + return; + } + } +} + +// Returns true If the binary contains a slice that matches the host +// architecture, or false otherwise. +static bool dumpMachOUniversalBinaryMatchHost(MachOUniversalBinary *UB, + std::vector &SymbolList, + StringRef Filename, + LLVMContext *ContextPtr) { + Triple HostTriple = MachOObjectFile::getHostArch(); + StringRef HostArchName = HostTriple.getArchName(); + for (MachOUniversalBinary::object_iterator I = UB->begin_objects(), + E = UB->end_objects(); + I != E; ++I) { + if (HostArchName == I->getArchFlagName()) { + Expected> ObjOrErr = I->getAsObjectFile(); std::string ArchiveName; - std::string ArchitectureName; - ArchiveName.clear(); - ArchitectureName.clear(); if (ObjOrErr) { ObjectFile &Obj = *ObjOrErr.get(); - if (PrintFileName) { - if (isa(Obj) && moreThanOneArch) - ArchitectureName = O.getArchFlagName(); - } else { - if (moreThanOneArch) - outs() << "\n"; - outs() << Obj.getFileName(); - if (isa(Obj) && moreThanOneArch) - outs() << " (for architecture " << O.getArchFlagName() << ")"; - outs() << ":\n"; - } - dumpSymbolNamesFromObject(Obj, false, ArchiveName, ArchitectureName); - } else if (auto E = isNotObjectErrorInvalidFileType( - ObjOrErr.takeError())) { - error(std::move(E), Filename, moreThanOneArch ? - StringRef(O.getArchFlagName()) : StringRef()); - continue; - } else if (Expected> AOrErr = - O.getAsArchive()) { + dumpSymbolNamesFromObject(Obj, SymbolList, /*PrintSymbolObject=*/false, + /*PrintObjectLabel=*/false); + } else if (auto E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) + error(std::move(E), Filename); + else if (Expected> AOrErr = I->getAsArchive()) { std::unique_ptr &A = *AOrErr; Error Err = Error::success(); for (auto &C : A->children(Err)) { Expected> ChildOrErr = - C.getAsBinary(ContextPtr); + C.getAsBinary(ContextPtr); if (!ChildOrErr) { - if (auto E = isNotObjectErrorInvalidFileType( - ChildOrErr.takeError())) - error(std::move(E), Filename, C, moreThanOneArch ? - StringRef(ArchitectureName) : StringRef()); + if (auto E = + isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) + error(std::move(E), Filename, C); continue; } - if (SymbolicFile *F = dyn_cast(&*ChildOrErr.get())) { - if (PrintFileName) { - ArchiveName = std::string(A->getFileName()); - if (isa(F) && moreThanOneArch) - ArchitectureName = O.getArchFlagName(); - } else { - outs() << "\n" << A->getFileName(); - if (isa(F)) { - outs() << "(" << F->getFileName() << ")"; - if (moreThanOneArch) - outs() << " (for architecture " << O.getArchFlagName() - << ")"; - } else - outs() << ":" << F->getFileName(); - outs() << ":\n"; - } - dumpSymbolNamesFromObject(*F, false, ArchiveName, ArchitectureName); + if (SymbolicFile *O = dyn_cast(&*ChildOrErr.get())) { + ArchiveName = std::string(A->getFileName()); + dumpSymbolNamesFromObject(*O, SymbolList, + /*PrintSymbolObject=*/false, + !PrintFileName, ArchiveName); } } if (Err) @@ -2072,49 +2127,176 @@ static void dumpSymbolNamesFromFile(std::string &Filename) { } else { consumeError(AOrErr.takeError()); error(Filename + " for architecture " + - StringRef(O.getArchFlagName()) + - " is not a Mach-O file or an archive file", + StringRef(I->getArchFlagName()) + + " is not a Mach-O file or an archive file", "Mach-O universal file"); } + return true; } - return; } + return false; +} - if (TapiUniversal *TU = dyn_cast(&Bin)) { - for (const TapiUniversal::ObjectForArch &I : TU->objects()) { - StringRef ArchName = I.getArchFlagName(); - const bool ShowArch = - ArchFlags.empty() || llvm::is_contained(ArchFlags, ArchName); - if (!ShowArch) - continue; - if (!AddInlinedInfo && !I.isTopLevelLib()) - continue; - if (auto ObjOrErr = I.getAsObjectFile()) { - outs() << "\n" - << I.getInstallName() << " (for architecture " << ArchName << ")" - << ":\n"; - dumpSymbolNamesFromObject(*ObjOrErr.get(), false, {}, ArchName); - } else if (Error E = - isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) { - error(std::move(E), Filename, ArchName); +static void dumpMachOUniversalBinaryArchAll(MachOUniversalBinary *UB, + std::vector &SymbolList, + StringRef Filename, + LLVMContext *ContextPtr) { + bool moreThanOneArch = UB->getNumberOfObjects() > 1; + for (const MachOUniversalBinary::ObjectForArch &O : UB->objects()) { + Expected> ObjOrErr = O.getAsObjectFile(); + std::string ArchiveName; + std::string ArchitectureName; + ArchiveName.clear(); + ArchitectureName.clear(); + if (ObjOrErr) { + ObjectFile &Obj = *ObjOrErr.get(); + if (isa(Obj) && moreThanOneArch) + ArchitectureName = O.getArchFlagName(); + dumpSymbolNamesFromObject(Obj, SymbolList, /*PrintSymbolObject=*/false, + !PrintFileName, ArchiveName, ArchitectureName); + } else if (auto E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) { + error(std::move(E), Filename, + moreThanOneArch ? StringRef(O.getArchFlagName()) : StringRef()); + continue; + } else if (Expected> AOrErr = O.getAsArchive()) { + std::unique_ptr &A = *AOrErr; + Error Err = Error::success(); + for (auto &C : A->children(Err)) { + Expected> ChildOrErr = + C.getAsBinary(ContextPtr); + if (!ChildOrErr) { + if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) + error(std::move(E), Filename, C, + moreThanOneArch ? StringRef(ArchitectureName) : StringRef()); + continue; + } + if (SymbolicFile *F = dyn_cast(&*ChildOrErr.get())) { + ArchiveName = std::string(A->getFileName()); + if (isa(F) && moreThanOneArch) + ArchitectureName = O.getArchFlagName(); + dumpSymbolNamesFromObject(*F, SymbolList, /*PrintSymbolObject=*/false, + !PrintFileName, ArchiveName, + ArchitectureName); + } } + if (Err) + error(std::move(Err), A->getFileName()); + } else { + consumeError(AOrErr.takeError()); + error(Filename + " for architecture " + StringRef(O.getArchFlagName()) + + " is not a Mach-O file or an archive file", + "Mach-O universal file"); } + } +} +static void dumpMachOUniversalBinary(MachOUniversalBinary *UB, + std::vector &SymbolList, + StringRef Filename, + LLVMContext *ContextPtr) { + // If we have a list of architecture flags specified dump only those. + if (!ArchAll && !ArchFlags.empty()) { + dumpMachOUniversalBinaryMatchArchFlags(UB, SymbolList, Filename, + ContextPtr); return; } - if (SymbolicFile *O = dyn_cast(&Bin)) { - if (!MachOPrintSizeWarning && PrintSize && isa(O)) { - WithColor::warning(errs(), ToolName) - << "sizes with --print-size for Mach-O files are always zero.\n"; - MachOPrintSizeWarning = true; + // No architecture flags were specified so if this contains a slice that + // matches the host architecture dump only that. + if (!ArchAll && + dumpMachOUniversalBinaryMatchHost(UB, SymbolList, Filename, ContextPtr)) + return; + + // Either all architectures have been specified or none have been specified + // and this does not contain the host architecture so dump all the slices. + dumpMachOUniversalBinaryArchAll(UB, SymbolList, Filename, ContextPtr); +} + +static void dumpTapiUniversal(TapiUniversal *TU, + std::vector &SymbolList, + StringRef Filename) { + for (const TapiUniversal::ObjectForArch &I : TU->objects()) { + StringRef ArchName = I.getArchFlagName(); + const bool ShowArch = + ArchFlags.empty() || llvm::is_contained(ArchFlags, ArchName); + if (!ShowArch) + continue; + if (!AddInlinedInfo && !I.isTopLevelLib()) + continue; + if (auto ObjOrErr = I.getAsObjectFile()) + dumpSymbolNamesFromObject( + *ObjOrErr.get(), SymbolList, /*PrintSymbolObject=*/false, + /*PrintObjectLabel=*/true, + /*ArchiveName=*/{}, ArchName, I.getInstallName()); + else if (Error E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) { + error(std::move(E), Filename, ArchName); } - if (!checkMachOAndArchFlags(O, Filename)) - return; - dumpSymbolNamesFromObject(*O, true); } } +static void dumpSymbolicFile(SymbolicFile *O, std::vector &SymbolList, + StringRef Filename) { + if (!MachOPrintSizeWarning && PrintSize && isa(O)) { + WithColor::warning(errs(), ToolName) + << "sizes with --print-size for Mach-O files are always zero.\n"; + MachOPrintSizeWarning = true; + } + if (!checkMachOAndArchFlags(O, Filename)) + return; + dumpSymbolNamesFromObject(*O, SymbolList, /*PrintSymbolObject=*/true, + /*PrintObjectLabel=*/false); +} + +static std::vector dumpSymbolNamesFromFile(StringRef Filename) { + std::vector SymbolList; + ErrorOr> BufferOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + if (error(BufferOrErr.getError(), Filename)) + return SymbolList; + + // Always enable opaque pointers, to handle archives with mixed typed and + // opaque pointer bitcode files gracefully. As we're only reading symbols, + // the used pointer types don't matter. + LLVMContext Context; + Context.setOpaquePointers(true); + LLVMContext *ContextPtr = NoLLVMBitcode ? nullptr : &Context; + Expected> BinaryOrErr = + createBinary(BufferOrErr.get()->getMemBufferRef(), ContextPtr); + if (!BinaryOrErr) { + error(BinaryOrErr.takeError(), Filename); + return SymbolList; + } + Binary &Bin = *BinaryOrErr.get(); + if (Archive *A = dyn_cast(&Bin)) + dumpArchive(A, SymbolList, Filename, ContextPtr); + else if (MachOUniversalBinary *UB = dyn_cast(&Bin)) + dumpMachOUniversalBinary(UB, SymbolList, Filename, ContextPtr); + else if (TapiUniversal *TU = dyn_cast(&Bin)) + dumpTapiUniversal(TU, SymbolList, Filename); + else if (SymbolicFile *O = dyn_cast(&Bin)) + dumpSymbolicFile(O, SymbolList, Filename); + return SymbolList; +} + +static void +exportSymbolNamesFromFiles(const std::vector &InputFilenames) { + std::vector SymbolList; + for (const auto &FileName : InputFilenames) { + std::vector FileSymList = dumpSymbolNamesFromFile(FileName); + SymbolList.insert(SymbolList.end(), FileSymList.begin(), FileSymList.end()); + } + + // Delete symbols which should not be printed from SymolList. + SymbolList.erase( + llvm::remove_if(SymbolList, + [](const NMSymbol &s) { return !s.shouldPrint(); }), + SymbolList.end()); + sortSymbolList(SymbolList); + SymbolList.erase(std::unique(SymbolList.begin(), SymbolList.end()), + SymbolList.end()); + printExportSymbolList(SymbolList); +} + int main(int argc, char **argv) { InitLLVM X(argc, argv); BumpPtrAllocator A; @@ -2169,6 +2351,12 @@ int main(int argc, char **argv) { PrintFileName = Args.hasArg(OPT_print_file_name); PrintSize = Args.hasArg(OPT_print_size); ReverseSort = Args.hasArg(OPT_reverse_sort); + ExportSymbols = Args.hasArg(OPT_export_symbols); + if (ExportSymbols) { + ExternalOnly = true; + DefinedOnly = true; + } + Quiet = Args.hasArg(OPT_quiet); V = Args.getLastArgValue(OPT_radix_EQ, "x"); if (V == "o") @@ -2185,6 +2373,18 @@ int main(int argc, char **argv) { UndefinedOnly = Args.hasArg(OPT_undefined_only); WithoutAliases = Args.hasArg(OPT_without_aliases); + StringRef Mode = Args.getLastArgValue(OPT_X, "any"); + if (Mode == "32") + BitMode = BitModeTy::Bit32; + else if (Mode == "64") + BitMode = BitModeTy::Bit64; + else if (Mode == "32_64") + BitMode = BitModeTy::Bit32_64; + else if (Mode == "any") + BitMode = BitModeTy::Any; + else + error("-X value should be one of: 32, 64, 32_64, (default) any"); + // Mach-O specific options. FormatMachOasHex = Args.hasArg(OPT_x); AddDyldInfo = Args.hasArg(OPT_add_dyldinfo); @@ -2192,6 +2392,9 @@ int main(int argc, char **argv) { DyldInfoOnly = Args.hasArg(OPT_dyldinfo_only); NoDyldInfo = Args.hasArg(OPT_no_dyldinfo); + // XCOFF specific options. + NoRsrc = Args.hasArg(OPT_no_rsrc); + // llvm-nm only reads binary files. if (error(sys::ChangeStdinToBinary())) return 1; @@ -2249,7 +2452,10 @@ int main(int argc, char **argv) { if (NoDyldInfo && (AddDyldInfo || DyldInfoOnly)) error("--no-dyldinfo can't be used with --add-dyldinfo or --dyldinfo-only"); - llvm::for_each(InputFilenames, dumpSymbolNamesFromFile); + if (ExportSymbols) + exportSymbolNamesFromFiles(InputFilenames); + else + llvm::for_each(InputFilenames, dumpSymbolNamesFromFile); if (HadError) return 1; diff --git a/llvm/tools/llvm-objcopy/BitcodeStripOpts.td b/llvm/tools/llvm-objcopy/BitcodeStripOpts.td index cc178164b03c..21db854b1e6f 100644 --- a/llvm/tools/llvm-objcopy/BitcodeStripOpts.td +++ b/llvm/tools/llvm-objcopy/BitcodeStripOpts.td @@ -17,8 +17,14 @@ def help : Flag<["--"], "help">; def h : Flag<["-"], "h">, Alias; def version : Flag<["--"], "version">, - HelpText<"Print the version and exit.">; + HelpText<"Print the version and exit">; def V : Flag<["-"], "V">, Alias, HelpText<"Alias for --version">; + +def remove : Flag<["-"], "r">, + HelpText<"Remove the __LLVM bitcode segment entirely">; + +def output : JoinedOrSeparate<["-"], "o">, HelpText<"Write output to ">, + MetaVarName<"">; diff --git a/llvm/tools/llvm-objcopy/COFF/COFFConfig.h b/llvm/tools/llvm-objcopy/COFF/COFFConfig.h deleted file mode 100644 index 7bf673fa4af9..000000000000 --- a/llvm/tools/llvm-objcopy/COFF/COFFConfig.h +++ /dev/null @@ -1,27 +0,0 @@ -//===- COFFConfig.h ---------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_OBJCOPY_COFF_COFFCONFIG_H -#define LLVM_TOOLS_LLVM_OBJCOPY_COFF_COFFCONFIG_H - -#include "llvm/ADT/Optional.h" - -namespace llvm { -namespace objcopy { - -// Coff specific configuration for copying/stripping a single file. -struct COFFConfig { - Optional Subsystem; - Optional MajorSubsystemVersion; - Optional MinorSubsystemVersion; -}; - -} // namespace objcopy -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_OBJCOPY_COFF_COFFCONFIG_H diff --git a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp deleted file mode 100644 index e0039cd3a675..000000000000 --- a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp +++ /dev/null @@ -1,297 +0,0 @@ -//===- COFFObjcopy.cpp ----------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "COFFObjcopy.h" -#include "COFFConfig.h" -#include "CommonConfig.h" -#include "Object.h" -#include "Reader.h" -#include "Writer.h" - -#include "llvm/Object/Binary.h" -#include "llvm/Object/COFF.h" -#include "llvm/Support/CRC.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/Path.h" -#include - -namespace llvm { -namespace objcopy { -namespace coff { - -using namespace object; -using namespace COFF; - -static bool isDebugSection(const Section &Sec) { - return Sec.Name.startswith(".debug"); -} - -static uint64_t getNextRVA(const Object &Obj) { - if (Obj.getSections().empty()) - return 0; - const Section &Last = Obj.getSections().back(); - return alignTo(Last.Header.VirtualAddress + Last.Header.VirtualSize, - Obj.IsPE ? Obj.PeHeader.SectionAlignment : 1); -} - -static Expected> -createGnuDebugLinkSectionContents(StringRef File) { - ErrorOr> LinkTargetOrErr = - MemoryBuffer::getFile(File); - if (!LinkTargetOrErr) - return createFileError(File, LinkTargetOrErr.getError()); - auto LinkTarget = std::move(*LinkTargetOrErr); - uint32_t CRC32 = llvm::crc32(arrayRefFromStringRef(LinkTarget->getBuffer())); - - StringRef FileName = sys::path::filename(File); - size_t CRCPos = alignTo(FileName.size() + 1, 4); - std::vector Data(CRCPos + 4); - memcpy(Data.data(), FileName.data(), FileName.size()); - support::endian::write32le(Data.data() + CRCPos, CRC32); - return Data; -} - -// Adds named section with given contents to the object. -static void addSection(Object &Obj, StringRef Name, ArrayRef Contents, - uint32_t Characteristics) { - bool NeedVA = Characteristics & (IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ | - IMAGE_SCN_MEM_WRITE); - - Section Sec; - Sec.setOwnedContents(Contents); - Sec.Name = Name; - Sec.Header.VirtualSize = NeedVA ? Sec.getContents().size() : 0u; - Sec.Header.VirtualAddress = NeedVA ? getNextRVA(Obj) : 0u; - Sec.Header.SizeOfRawData = - NeedVA ? alignTo(Sec.Header.VirtualSize, - Obj.IsPE ? Obj.PeHeader.FileAlignment : 1) - : Sec.getContents().size(); - // Sec.Header.PointerToRawData is filled in by the writer. - Sec.Header.PointerToRelocations = 0; - Sec.Header.PointerToLinenumbers = 0; - // Sec.Header.NumberOfRelocations is filled in by the writer. - Sec.Header.NumberOfLinenumbers = 0; - Sec.Header.Characteristics = Characteristics; - - Obj.addSections(Sec); -} - -static Error addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) { - Expected> Contents = - createGnuDebugLinkSectionContents(DebugLinkFile); - if (!Contents) - return Contents.takeError(); - - addSection(Obj, ".gnu_debuglink", *Contents, - IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | - IMAGE_SCN_MEM_DISCARDABLE); - - return Error::success(); -} - -static uint32_t flagsToCharacteristics(SectionFlag AllFlags, uint32_t OldChar) { - // Need to preserve alignment flags. - const uint32_t PreserveMask = - IMAGE_SCN_ALIGN_1BYTES | IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_ALIGN_4BYTES | - IMAGE_SCN_ALIGN_8BYTES | IMAGE_SCN_ALIGN_16BYTES | - IMAGE_SCN_ALIGN_32BYTES | IMAGE_SCN_ALIGN_64BYTES | - IMAGE_SCN_ALIGN_128BYTES | IMAGE_SCN_ALIGN_256BYTES | - IMAGE_SCN_ALIGN_512BYTES | IMAGE_SCN_ALIGN_1024BYTES | - IMAGE_SCN_ALIGN_2048BYTES | IMAGE_SCN_ALIGN_4096BYTES | - IMAGE_SCN_ALIGN_8192BYTES; - - // Setup new section characteristics based on the flags provided in command - // line. - uint32_t NewCharacteristics = (OldChar & PreserveMask) | IMAGE_SCN_MEM_READ; - - if ((AllFlags & SectionFlag::SecAlloc) && !(AllFlags & SectionFlag::SecLoad)) - NewCharacteristics |= IMAGE_SCN_CNT_UNINITIALIZED_DATA; - if (AllFlags & SectionFlag::SecNoload) - NewCharacteristics |= IMAGE_SCN_LNK_REMOVE; - if (!(AllFlags & SectionFlag::SecReadonly)) - NewCharacteristics |= IMAGE_SCN_MEM_WRITE; - if (AllFlags & SectionFlag::SecDebug) - NewCharacteristics |= - IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_DISCARDABLE; - if (AllFlags & SectionFlag::SecCode) - NewCharacteristics |= IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE; - if (AllFlags & SectionFlag::SecData) - NewCharacteristics |= IMAGE_SCN_CNT_INITIALIZED_DATA; - if (AllFlags & SectionFlag::SecShare) - NewCharacteristics |= IMAGE_SCN_MEM_SHARED; - if (AllFlags & SectionFlag::SecExclude) - NewCharacteristics |= IMAGE_SCN_LNK_REMOVE; - - return NewCharacteristics; -} - -static Error handleArgs(const CommonConfig &Config, - const COFFConfig &COFFConfig, Object &Obj) { - // Perform the actual section removals. - Obj.removeSections([&Config](const Section &Sec) { - // Contrary to --only-keep-debug, --only-section fully removes sections that - // aren't mentioned. - if (!Config.OnlySection.empty() && !Config.OnlySection.matches(Sec.Name)) - return true; - - if (Config.StripDebug || Config.StripAll || Config.StripAllGNU || - Config.DiscardMode == DiscardType::All || Config.StripUnneeded) { - if (isDebugSection(Sec) && - (Sec.Header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) != 0) - return true; - } - - if (Config.ToRemove.matches(Sec.Name)) - return true; - - return false; - }); - - if (Config.OnlyKeepDebug) { - // For --only-keep-debug, we keep all other sections, but remove their - // content. The VirtualSize field in the section header is kept intact. - Obj.truncateSections([](const Section &Sec) { - return !isDebugSection(Sec) && Sec.Name != ".buildid" && - ((Sec.Header.Characteristics & - (IMAGE_SCN_CNT_CODE | IMAGE_SCN_CNT_INITIALIZED_DATA)) != 0); - }); - } - - // StripAll removes all symbols and thus also removes all relocations. - if (Config.StripAll || Config.StripAllGNU) - for (Section &Sec : Obj.getMutableSections()) - Sec.Relocs.clear(); - - // If we need to do per-symbol removals, initialize the Referenced field. - if (Config.StripUnneeded || Config.DiscardMode == DiscardType::All || - !Config.SymbolsToRemove.empty()) - if (Error E = Obj.markSymbols()) - return E; - - for (Symbol &Sym : Obj.getMutableSymbols()) { - auto I = Config.SymbolsToRename.find(Sym.Name); - if (I != Config.SymbolsToRename.end()) - Sym.Name = I->getValue(); - } - - auto ToRemove = [&](const Symbol &Sym) -> Expected { - // For StripAll, all relocations have been stripped and we remove all - // symbols. - if (Config.StripAll || Config.StripAllGNU) - return true; - - if (Config.SymbolsToRemove.matches(Sym.Name)) { - // Explicitly removing a referenced symbol is an error. - if (Sym.Referenced) - return createStringError( - llvm::errc::invalid_argument, - "'" + Config.OutputFilename + "': not stripping symbol '" + - Sym.Name.str() + "' because it is named in a relocation"); - return true; - } - - if (!Sym.Referenced) { - // With --strip-unneeded, GNU objcopy removes all unreferenced local - // symbols, and any unreferenced undefined external. - // With --strip-unneeded-symbol we strip only specific unreferenced - // local symbol instead of removing all of such. - if (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC || - Sym.Sym.SectionNumber == 0) - if (Config.StripUnneeded || - Config.UnneededSymbolsToRemove.matches(Sym.Name)) - return true; - - // GNU objcopy keeps referenced local symbols and external symbols - // if --discard-all is set, similar to what --strip-unneeded does, - // but undefined local symbols are kept when --discard-all is set. - if (Config.DiscardMode == DiscardType::All && - Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC && - Sym.Sym.SectionNumber != 0) - return true; - } - - return false; - }; - - // Actually do removals of symbols. - if (Error Err = Obj.removeSymbols(ToRemove)) - return Err; - - if (!Config.SetSectionFlags.empty()) - for (Section &Sec : Obj.getMutableSections()) { - const auto It = Config.SetSectionFlags.find(Sec.Name); - if (It != Config.SetSectionFlags.end()) - Sec.Header.Characteristics = flagsToCharacteristics( - It->second.NewFlags, Sec.Header.Characteristics); - } - - for (const auto &Flag : Config.AddSection) { - StringRef SecName, FileName; - std::tie(SecName, FileName) = Flag.split("="); - - auto BufOrErr = MemoryBuffer::getFile(FileName); - if (!BufOrErr) - return createFileError(FileName, errorCodeToError(BufOrErr.getError())); - auto Buf = std::move(*BufOrErr); - - uint32_t Characteristics; - const auto It = Config.SetSectionFlags.find(SecName); - if (It != Config.SetSectionFlags.end()) - Characteristics = flagsToCharacteristics(It->second.NewFlags, 0); - else - Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_ALIGN_1BYTES; - - addSection( - Obj, SecName, - makeArrayRef(reinterpret_cast(Buf->getBufferStart()), - Buf->getBufferSize()), - Characteristics); - } - - if (!Config.AddGnuDebugLink.empty()) - if (Error E = addGnuDebugLink(Obj, Config.AddGnuDebugLink)) - return E; - - if (COFFConfig.Subsystem || COFFConfig.MajorSubsystemVersion || - COFFConfig.MinorSubsystemVersion) { - if (!Obj.IsPE) - return createStringError( - errc::invalid_argument, - "'" + Config.OutputFilename + - "': unable to set subsystem on a relocatable object file"); - if (COFFConfig.Subsystem) - Obj.PeHeader.Subsystem = *COFFConfig.Subsystem; - if (COFFConfig.MajorSubsystemVersion) - Obj.PeHeader.MajorSubsystemVersion = *COFFConfig.MajorSubsystemVersion; - if (COFFConfig.MinorSubsystemVersion) - Obj.PeHeader.MinorSubsystemVersion = *COFFConfig.MinorSubsystemVersion; - } - - return Error::success(); -} - -Error executeObjcopyOnBinary(const CommonConfig &Config, - const COFFConfig &COFFConfig, COFFObjectFile &In, - raw_ostream &Out) { - COFFReader Reader(In); - Expected> ObjOrErr = Reader.create(); - if (!ObjOrErr) - return createFileError(Config.InputFilename, ObjOrErr.takeError()); - Object *Obj = ObjOrErr->get(); - assert(Obj && "Unable to deserialize COFF object"); - if (Error E = handleArgs(Config, COFFConfig, *Obj)) - return createFileError(Config.InputFilename, std::move(E)); - COFFWriter Writer(*Obj, Out); - if (Error E = Writer.write()) - return createFileError(Config.OutputFilename, std::move(E)); - return Error::success(); -} - -} // end namespace coff -} // end namespace objcopy -} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h deleted file mode 100644 index 2c7ccd34653d..000000000000 --- a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h +++ /dev/null @@ -1,33 +0,0 @@ -//===- COFFObjcopy.h --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_OBJCOPY_COFFOBJCOPY_H -#define LLVM_TOOLS_OBJCOPY_COFFOBJCOPY_H - -namespace llvm { -class Error; -class raw_ostream; - -namespace object { -class COFFObjectFile; -} // end namespace object - -namespace objcopy { -struct CommonConfig; -struct COFFConfig; - -namespace coff { - -Error executeObjcopyOnBinary(const CommonConfig &Config, const COFFConfig &, - object::COFFObjectFile &In, raw_ostream &Out); - -} // end namespace coff -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_TOOLS_OBJCOPY_COFFOBJCOPY_H diff --git a/llvm/tools/llvm-objcopy/COFF/Object.cpp b/llvm/tools/llvm-objcopy/COFF/Object.cpp deleted file mode 100644 index ec2628c7eca9..000000000000 --- a/llvm/tools/llvm-objcopy/COFF/Object.cpp +++ /dev/null @@ -1,132 +0,0 @@ -//===- Object.cpp ---------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Object.h" -#include "llvm/ADT/DenseSet.h" -#include - -namespace llvm { -namespace objcopy { -namespace coff { - -using namespace object; - -void Object::addSymbols(ArrayRef NewSymbols) { - for (Symbol S : NewSymbols) { - S.UniqueId = NextSymbolUniqueId++; - Symbols.emplace_back(S); - } - updateSymbols(); -} - -void Object::updateSymbols() { - SymbolMap = DenseMap(Symbols.size()); - for (Symbol &Sym : Symbols) - SymbolMap[Sym.UniqueId] = &Sym; -} - -const Symbol *Object::findSymbol(size_t UniqueId) const { - return SymbolMap.lookup(UniqueId); -} - -Error Object::removeSymbols( - function_ref(const Symbol &)> ToRemove) { - Error Errs = Error::success(); - llvm::erase_if(Symbols, [ToRemove, &Errs](const Symbol &Sym) { - Expected ShouldRemove = ToRemove(Sym); - if (!ShouldRemove) { - Errs = joinErrors(std::move(Errs), ShouldRemove.takeError()); - return false; - } - return *ShouldRemove; - }); - - updateSymbols(); - return Errs; -} - -Error Object::markSymbols() { - for (Symbol &Sym : Symbols) - Sym.Referenced = false; - for (const Section &Sec : Sections) { - for (const Relocation &R : Sec.Relocs) { - auto It = SymbolMap.find(R.Target); - if (It == SymbolMap.end()) - return createStringError(object_error::invalid_symbol_index, - "relocation target %zu not found", R.Target); - It->second->Referenced = true; - } - } - return Error::success(); -} - -void Object::addSections(ArrayRef

NewSections) { - for (Section S : NewSections) { - S.UniqueId = NextSectionUniqueId++; - Sections.emplace_back(S); - } - updateSections(); -} - -void Object::updateSections() { - SectionMap = DenseMap(Sections.size()); - size_t Index = 1; - for (Section &S : Sections) { - SectionMap[S.UniqueId] = &S; - S.Index = Index++; - } -} - -const Section *Object::findSection(ssize_t UniqueId) const { - return SectionMap.lookup(UniqueId); -} - -void Object::removeSections(function_ref ToRemove) { - DenseSet AssociatedSections; - auto RemoveAssociated = [&AssociatedSections](const Section &Sec) { - return AssociatedSections.contains(Sec.UniqueId); - }; - do { - DenseSet RemovedSections; - llvm::erase_if(Sections, [ToRemove, &RemovedSections](const Section &Sec) { - bool Remove = ToRemove(Sec); - if (Remove) - RemovedSections.insert(Sec.UniqueId); - return Remove; - }); - // Remove all symbols referring to the removed sections. - AssociatedSections.clear(); - llvm::erase_if( - Symbols, [&RemovedSections, &AssociatedSections](const Symbol &Sym) { - // If there are sections that are associative to a removed - // section, - // remove those as well as nothing will include them (and we can't - // leave them dangling). - if (RemovedSections.contains(Sym.AssociativeComdatTargetSectionId)) - AssociatedSections.insert(Sym.TargetSectionId); - return RemovedSections.contains(Sym.TargetSectionId); - }); - ToRemove = RemoveAssociated; - } while (!AssociatedSections.empty()); - updateSections(); - updateSymbols(); -} - -void Object::truncateSections(function_ref ToTruncate) { - for (Section &Sec : Sections) { - if (ToTruncate(Sec)) { - Sec.clearContents(); - Sec.Relocs.clear(); - Sec.Header.SizeOfRawData = 0; - } - } -} - -} // end namespace coff -} // end namespace objcopy -} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/COFF/Object.h b/llvm/tools/llvm-objcopy/COFF/Object.h deleted file mode 100644 index 0e854b58cbdb..000000000000 --- a/llvm/tools/llvm-objcopy/COFF/Object.h +++ /dev/null @@ -1,211 +0,0 @@ -//===- Object.h -------------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_OBJCOPY_COFF_OBJECT_H -#define LLVM_TOOLS_OBJCOPY_COFF_OBJECT_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/BinaryFormat/COFF.h" -#include "llvm/Object/COFF.h" -#include -#include -#include - -namespace llvm { -namespace objcopy { -namespace coff { - -struct Relocation { - Relocation() = default; - Relocation(const object::coff_relocation &R) : Reloc(R) {} - - object::coff_relocation Reloc; - size_t Target = 0; - StringRef TargetName; // Used for diagnostics only -}; - -struct Section { - object::coff_section Header; - std::vector Relocs; - StringRef Name; - ssize_t UniqueId; - size_t Index; - - ArrayRef getContents() const { - if (!OwnedContents.empty()) - return OwnedContents; - return ContentsRef; - } - - void setContentsRef(ArrayRef Data) { - OwnedContents.clear(); - ContentsRef = Data; - } - - void setOwnedContents(std::vector &&Data) { - ContentsRef = ArrayRef(); - OwnedContents = std::move(Data); - } - - void clearContents() { - ContentsRef = ArrayRef(); - OwnedContents.clear(); - } - -private: - ArrayRef ContentsRef; - std::vector OwnedContents; -}; - -struct AuxSymbol { - AuxSymbol(ArrayRef In) { - assert(In.size() == sizeof(Opaque)); - std::copy(In.begin(), In.end(), Opaque); - } - - ArrayRef getRef() const { - return ArrayRef(Opaque, sizeof(Opaque)); - } - - uint8_t Opaque[sizeof(object::coff_symbol16)]; -}; - -struct Symbol { - object::coff_symbol32 Sym; - StringRef Name; - std::vector AuxData; - StringRef AuxFile; - ssize_t TargetSectionId; - ssize_t AssociativeComdatTargetSectionId = 0; - Optional WeakTargetSymbolId; - size_t UniqueId; - size_t RawIndex; - bool Referenced; -}; - -struct Object { - bool IsPE = false; - - object::dos_header DosHeader; - ArrayRef DosStub; - - object::coff_file_header CoffFileHeader; - - bool Is64 = false; - object::pe32plus_header PeHeader; - uint32_t BaseOfData = 0; // pe32plus_header lacks this field. - - std::vector DataDirectories; - - ArrayRef getSymbols() const { return Symbols; } - // This allows mutating individual Symbols, but not mutating the list - // of symbols itself. - iterator_range::iterator> getMutableSymbols() { - return make_range(Symbols.begin(), Symbols.end()); - } - - const Symbol *findSymbol(size_t UniqueId) const; - - void addSymbols(ArrayRef NewSymbols); - Error removeSymbols(function_ref(const Symbol &)> ToRemove); - - // Set the Referenced field on all Symbols, based on relocations in - // all sections. - Error markSymbols(); - - ArrayRef
getSections() const { return Sections; } - // This allows mutating individual Sections, but not mutating the list - // of sections itself. - iterator_range::iterator> getMutableSections() { - return make_range(Sections.begin(), Sections.end()); - } - - const Section *findSection(ssize_t UniqueId) const; - - void addSections(ArrayRef
NewSections); - void removeSections(function_ref ToRemove); - void truncateSections(function_ref ToTruncate); - -private: - std::vector Symbols; - DenseMap SymbolMap; - - size_t NextSymbolUniqueId = 0; - - std::vector
Sections; - DenseMap SectionMap; - - ssize_t NextSectionUniqueId = 1; // Allow a UniqueId 0 to mean undefined. - - // Update SymbolMap. - void updateSymbols(); - - // Update SectionMap and Index in each Section. - void updateSections(); -}; - -// Copy between coff_symbol16 and coff_symbol32. -// The source and destination files can use either coff_symbol16 or -// coff_symbol32, while we always store them as coff_symbol32 in the -// intermediate data structure. -template -void copySymbol(Symbol1Ty &Dest, const Symbol2Ty &Src) { - static_assert(sizeof(Dest.Name.ShortName) == sizeof(Src.Name.ShortName), - "Mismatched name sizes"); - memcpy(Dest.Name.ShortName, Src.Name.ShortName, sizeof(Dest.Name.ShortName)); - Dest.Value = Src.Value; - Dest.SectionNumber = Src.SectionNumber; - Dest.Type = Src.Type; - Dest.StorageClass = Src.StorageClass; - Dest.NumberOfAuxSymbols = Src.NumberOfAuxSymbols; -} - -// Copy between pe32_header and pe32plus_header. -// We store the intermediate state in a pe32plus_header. -template -void copyPeHeader(PeHeader1Ty &Dest, const PeHeader2Ty &Src) { - Dest.Magic = Src.Magic; - Dest.MajorLinkerVersion = Src.MajorLinkerVersion; - Dest.MinorLinkerVersion = Src.MinorLinkerVersion; - Dest.SizeOfCode = Src.SizeOfCode; - Dest.SizeOfInitializedData = Src.SizeOfInitializedData; - Dest.SizeOfUninitializedData = Src.SizeOfUninitializedData; - Dest.AddressOfEntryPoint = Src.AddressOfEntryPoint; - Dest.BaseOfCode = Src.BaseOfCode; - Dest.ImageBase = Src.ImageBase; - Dest.SectionAlignment = Src.SectionAlignment; - Dest.FileAlignment = Src.FileAlignment; - Dest.MajorOperatingSystemVersion = Src.MajorOperatingSystemVersion; - Dest.MinorOperatingSystemVersion = Src.MinorOperatingSystemVersion; - Dest.MajorImageVersion = Src.MajorImageVersion; - Dest.MinorImageVersion = Src.MinorImageVersion; - Dest.MajorSubsystemVersion = Src.MajorSubsystemVersion; - Dest.MinorSubsystemVersion = Src.MinorSubsystemVersion; - Dest.Win32VersionValue = Src.Win32VersionValue; - Dest.SizeOfImage = Src.SizeOfImage; - Dest.SizeOfHeaders = Src.SizeOfHeaders; - Dest.CheckSum = Src.CheckSum; - Dest.Subsystem = Src.Subsystem; - Dest.DLLCharacteristics = Src.DLLCharacteristics; - Dest.SizeOfStackReserve = Src.SizeOfStackReserve; - Dest.SizeOfStackCommit = Src.SizeOfStackCommit; - Dest.SizeOfHeapReserve = Src.SizeOfHeapReserve; - Dest.SizeOfHeapCommit = Src.SizeOfHeapCommit; - Dest.LoaderFlags = Src.LoaderFlags; - Dest.NumberOfRvaAndSize = Src.NumberOfRvaAndSize; -} - -} // end namespace coff -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_TOOLS_OBJCOPY_COFF_OBJECT_H diff --git a/llvm/tools/llvm-objcopy/COFF/Reader.cpp b/llvm/tools/llvm-objcopy/COFF/Reader.cpp deleted file mode 100644 index d1beacb3bd67..000000000000 --- a/llvm/tools/llvm-objcopy/COFF/Reader.cpp +++ /dev/null @@ -1,226 +0,0 @@ -//===- Reader.cpp ---------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Reader.h" -#include "Object.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/COFF.h" -#include "llvm/Object/COFF.h" -#include "llvm/Support/ErrorHandling.h" -#include -#include - -namespace llvm { -namespace objcopy { -namespace coff { - -using namespace object; -using namespace COFF; - -Error COFFReader::readExecutableHeaders(Object &Obj) const { - const dos_header *DH = COFFObj.getDOSHeader(); - Obj.Is64 = COFFObj.is64(); - if (!DH) - return Error::success(); - - Obj.IsPE = true; - Obj.DosHeader = *DH; - if (DH->AddressOfNewExeHeader > sizeof(*DH)) - Obj.DosStub = ArrayRef(reinterpret_cast(&DH[1]), - DH->AddressOfNewExeHeader - sizeof(*DH)); - - if (COFFObj.is64()) { - Obj.PeHeader = *COFFObj.getPE32PlusHeader(); - } else { - const pe32_header *PE32 = COFFObj.getPE32Header(); - copyPeHeader(Obj.PeHeader, *PE32); - // The pe32plus_header (stored in Object) lacks the BaseOfData field. - Obj.BaseOfData = PE32->BaseOfData; - } - - for (size_t I = 0; I < Obj.PeHeader.NumberOfRvaAndSize; I++) { - const data_directory *Dir = COFFObj.getDataDirectory(I); - if (!Dir) - return errorCodeToError(object_error::parse_failed); - Obj.DataDirectories.emplace_back(*Dir); - } - return Error::success(); -} - -Error COFFReader::readSections(Object &Obj) const { - std::vector
Sections; - // Section indexing starts from 1. - for (size_t I = 1, E = COFFObj.getNumberOfSections(); I <= E; I++) { - Expected SecOrErr = COFFObj.getSection(I); - if (!SecOrErr) - return SecOrErr.takeError(); - const coff_section *Sec = *SecOrErr; - Sections.push_back(Section()); - Section &S = Sections.back(); - S.Header = *Sec; - S.Header.Characteristics &= ~COFF::IMAGE_SCN_LNK_NRELOC_OVFL; - ArrayRef Contents; - if (Error E = COFFObj.getSectionContents(Sec, Contents)) - return E; - S.setContentsRef(Contents); - ArrayRef Relocs = COFFObj.getRelocations(Sec); - for (const coff_relocation &R : Relocs) - S.Relocs.push_back(R); - if (Expected NameOrErr = COFFObj.getSectionName(Sec)) - S.Name = *NameOrErr; - else - return NameOrErr.takeError(); - } - Obj.addSections(Sections); - return Error::success(); -} - -Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const { - std::vector Symbols; - Symbols.reserve(COFFObj.getRawNumberOfSymbols()); - ArrayRef
Sections = Obj.getSections(); - for (uint32_t I = 0, E = COFFObj.getRawNumberOfSymbols(); I < E;) { - Expected SymOrErr = COFFObj.getSymbol(I); - if (!SymOrErr) - return SymOrErr.takeError(); - COFFSymbolRef SymRef = *SymOrErr; - - Symbols.push_back(Symbol()); - Symbol &Sym = Symbols.back(); - // Copy symbols from the original form into an intermediate coff_symbol32. - if (IsBigObj) - copySymbol(Sym.Sym, - *reinterpret_cast(SymRef.getRawPtr())); - else - copySymbol(Sym.Sym, - *reinterpret_cast(SymRef.getRawPtr())); - auto NameOrErr = COFFObj.getSymbolName(SymRef); - if (!NameOrErr) - return NameOrErr.takeError(); - Sym.Name = *NameOrErr; - - ArrayRef AuxData = COFFObj.getSymbolAuxData(SymRef); - size_t SymSize = IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16); - assert(AuxData.size() == SymSize * SymRef.getNumberOfAuxSymbols()); - // The auxillary symbols are structs of sizeof(coff_symbol16) each. - // In the big object format (where symbols are coff_symbol32), each - // auxillary symbol is padded with 2 bytes at the end. Copy each - // auxillary symbol to the Sym.AuxData vector. For file symbols, - // the whole range of aux symbols are interpreted as one null padded - // string instead. - if (SymRef.isFileRecord()) - Sym.AuxFile = StringRef(reinterpret_cast(AuxData.data()), - AuxData.size()) - .rtrim('\0'); - else - for (size_t I = 0; I < SymRef.getNumberOfAuxSymbols(); I++) - Sym.AuxData.push_back(AuxData.slice(I * SymSize, sizeof(AuxSymbol))); - - // Find the unique id of the section - if (SymRef.getSectionNumber() <= - 0) // Special symbol (undefined/absolute/debug) - Sym.TargetSectionId = SymRef.getSectionNumber(); - else if (static_cast(SymRef.getSectionNumber() - 1) < - Sections.size()) - Sym.TargetSectionId = Sections[SymRef.getSectionNumber() - 1].UniqueId; - else - return createStringError(object_error::parse_failed, - "section number out of range"); - // For section definitions, check if it is comdat associative, and if - // it is, find the target section unique id. - const coff_aux_section_definition *SD = SymRef.getSectionDefinition(); - const coff_aux_weak_external *WE = SymRef.getWeakExternal(); - if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) { - int32_t Index = SD->getNumber(IsBigObj); - if (Index <= 0 || static_cast(Index - 1) >= Sections.size()) - return createStringError(object_error::parse_failed, - "unexpected associative section index"); - Sym.AssociativeComdatTargetSectionId = Sections[Index - 1].UniqueId; - } else if (WE) { - // This is a raw symbol index for now, but store it in the Symbol - // until we've added them to the Object, which assigns the final - // unique ids. - Sym.WeakTargetSymbolId = WE->TagIndex; - } - I += 1 + SymRef.getNumberOfAuxSymbols(); - } - Obj.addSymbols(Symbols); - return Error::success(); -} - -Error COFFReader::setSymbolTargets(Object &Obj) const { - std::vector RawSymbolTable; - for (const Symbol &Sym : Obj.getSymbols()) { - RawSymbolTable.push_back(&Sym); - for (size_t I = 0; I < Sym.Sym.NumberOfAuxSymbols; I++) - RawSymbolTable.push_back(nullptr); - } - for (Symbol &Sym : Obj.getMutableSymbols()) { - // Convert WeakTargetSymbolId from the original raw symbol index to - // a proper unique id. - if (Sym.WeakTargetSymbolId) { - if (*Sym.WeakTargetSymbolId >= RawSymbolTable.size()) - return createStringError(object_error::parse_failed, - "weak external reference out of range"); - const Symbol *Target = RawSymbolTable[*Sym.WeakTargetSymbolId]; - if (Target == nullptr) - return createStringError(object_error::parse_failed, - "invalid SymbolTableIndex"); - Sym.WeakTargetSymbolId = Target->UniqueId; - } - } - for (Section &Sec : Obj.getMutableSections()) { - for (Relocation &R : Sec.Relocs) { - if (R.Reloc.SymbolTableIndex >= RawSymbolTable.size()) - return createStringError(object_error::parse_failed, - "SymbolTableIndex out of range"); - const Symbol *Sym = RawSymbolTable[R.Reloc.SymbolTableIndex]; - if (Sym == nullptr) - return createStringError(object_error::parse_failed, - "invalid SymbolTableIndex"); - R.Target = Sym->UniqueId; - R.TargetName = Sym->Name; - } - } - return Error::success(); -} - -Expected> COFFReader::create() const { - auto Obj = std::make_unique(); - - bool IsBigObj = false; - if (const coff_file_header *CFH = COFFObj.getCOFFHeader()) { - Obj->CoffFileHeader = *CFH; - } else { - const coff_bigobj_file_header *CBFH = COFFObj.getCOFFBigObjHeader(); - if (!CBFH) - return createStringError(object_error::parse_failed, - "no COFF file header returned"); - // Only copying the few fields from the bigobj header that we need - // and won't recreate in the end. - Obj->CoffFileHeader.Machine = CBFH->Machine; - Obj->CoffFileHeader.TimeDateStamp = CBFH->TimeDateStamp; - IsBigObj = true; - } - - if (Error E = readExecutableHeaders(*Obj)) - return std::move(E); - if (Error E = readSections(*Obj)) - return std::move(E); - if (Error E = readSymbols(*Obj, IsBigObj)) - return std::move(E); - if (Error E = setSymbolTargets(*Obj)) - return std::move(E); - - return std::move(Obj); -} - -} // end namespace coff -} // end namespace objcopy -} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/COFF/Reader.h b/llvm/tools/llvm-objcopy/COFF/Reader.h deleted file mode 100644 index 48c050b6ea11..000000000000 --- a/llvm/tools/llvm-objcopy/COFF/Reader.h +++ /dev/null @@ -1,41 +0,0 @@ -//===- Reader.h -------------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_OBJCOPY_COFF_READER_H -#define LLVM_TOOLS_OBJCOPY_COFF_READER_H - -#include "llvm/BinaryFormat/COFF.h" -#include "llvm/Object/COFF.h" -#include "llvm/Support/Error.h" - -namespace llvm { -namespace objcopy { -namespace coff { - -struct Object; - -using object::COFFObjectFile; - -class COFFReader { - const COFFObjectFile &COFFObj; - - Error readExecutableHeaders(Object &Obj) const; - Error readSections(Object &Obj) const; - Error readSymbols(Object &Obj, bool IsBigObj) const; - Error setSymbolTargets(Object &Obj) const; - -public: - explicit COFFReader(const COFFObjectFile &O) : COFFObj(O) {} - Expected> create() const; -}; - -} // end namespace coff -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_TOOLS_OBJCOPY_COFF_READER_H diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.cpp b/llvm/tools/llvm-objcopy/COFF/Writer.cpp deleted file mode 100644 index cbd0e4261238..000000000000 --- a/llvm/tools/llvm-objcopy/COFF/Writer.cpp +++ /dev/null @@ -1,457 +0,0 @@ -//===- Writer.cpp ---------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Writer.h" -#include "Object.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/COFF.h" -#include "llvm/Object/COFF.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/ErrorHandling.h" -#include -#include - -namespace llvm { -namespace objcopy { -namespace coff { - -using namespace object; -using namespace COFF; - -Error COFFWriter::finalizeRelocTargets() { - for (Section &Sec : Obj.getMutableSections()) { - for (Relocation &R : Sec.Relocs) { - const Symbol *Sym = Obj.findSymbol(R.Target); - if (Sym == nullptr) - return createStringError(object_error::invalid_symbol_index, - "relocation target '%s' (%zu) not found", - R.TargetName.str().c_str(), R.Target); - R.Reloc.SymbolTableIndex = Sym->RawIndex; - } - } - return Error::success(); -} - -Error COFFWriter::finalizeSymbolContents() { - for (Symbol &Sym : Obj.getMutableSymbols()) { - if (Sym.TargetSectionId <= 0) { - // Undefined, or a special kind of symbol. These negative values - // are stored in the SectionNumber field which is unsigned. - Sym.Sym.SectionNumber = static_cast(Sym.TargetSectionId); - } else { - const Section *Sec = Obj.findSection(Sym.TargetSectionId); - if (Sec == nullptr) - return createStringError(object_error::invalid_symbol_index, - "symbol '%s' points to a removed section", - Sym.Name.str().c_str()); - Sym.Sym.SectionNumber = Sec->Index; - - if (Sym.Sym.NumberOfAuxSymbols == 1 && - Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC) { - coff_aux_section_definition *SD = - reinterpret_cast( - Sym.AuxData[0].Opaque); - uint32_t SDSectionNumber; - if (Sym.AssociativeComdatTargetSectionId == 0) { - // Not a comdat associative section; just set the Number field to - // the number of the section itself. - SDSectionNumber = Sec->Index; - } else { - Sec = Obj.findSection(Sym.AssociativeComdatTargetSectionId); - if (Sec == nullptr) - return createStringError( - object_error::invalid_symbol_index, - "symbol '%s' is associative to a removed section", - Sym.Name.str().c_str()); - SDSectionNumber = Sec->Index; - } - // Update the section definition with the new section number. - SD->NumberLowPart = static_cast(SDSectionNumber); - SD->NumberHighPart = static_cast(SDSectionNumber >> 16); - } - } - // Check that we actually have got AuxData to match the weak symbol target - // we want to set. Only >= 1 would be required, but only == 1 makes sense. - if (Sym.WeakTargetSymbolId && Sym.Sym.NumberOfAuxSymbols == 1) { - coff_aux_weak_external *WE = - reinterpret_cast(Sym.AuxData[0].Opaque); - const Symbol *Target = Obj.findSymbol(*Sym.WeakTargetSymbolId); - if (Target == nullptr) - return createStringError(object_error::invalid_symbol_index, - "symbol '%s' is missing its weak target", - Sym.Name.str().c_str()); - WE->TagIndex = Target->RawIndex; - } - } - return Error::success(); -} - -void COFFWriter::layoutSections() { - for (auto &S : Obj.getMutableSections()) { - if (S.Header.SizeOfRawData > 0) - S.Header.PointerToRawData = FileSize; - FileSize += S.Header.SizeOfRawData; // For executables, this is already - // aligned to FileAlignment. - if (S.Relocs.size() >= 0xffff) { - S.Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL; - S.Header.NumberOfRelocations = 0xffff; - S.Header.PointerToRelocations = FileSize; - FileSize += sizeof(coff_relocation); - } else { - S.Header.NumberOfRelocations = S.Relocs.size(); - S.Header.PointerToRelocations = S.Relocs.size() ? FileSize : 0; - } - - FileSize += S.Relocs.size() * sizeof(coff_relocation); - FileSize = alignTo(FileSize, FileAlignment); - - if (S.Header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA) - SizeOfInitializedData += S.Header.SizeOfRawData; - } -} - -size_t COFFWriter::finalizeStringTable() { - for (const auto &S : Obj.getSections()) - if (S.Name.size() > COFF::NameSize) - StrTabBuilder.add(S.Name); - - for (const auto &S : Obj.getSymbols()) - if (S.Name.size() > COFF::NameSize) - StrTabBuilder.add(S.Name); - - StrTabBuilder.finalize(); - - for (auto &S : Obj.getMutableSections()) { - memset(S.Header.Name, 0, sizeof(S.Header.Name)); - if (S.Name.size() > COFF::NameSize) { - snprintf(S.Header.Name, sizeof(S.Header.Name), "/%d", - (int)StrTabBuilder.getOffset(S.Name)); - } else { - memcpy(S.Header.Name, S.Name.data(), S.Name.size()); - } - } - for (auto &S : Obj.getMutableSymbols()) { - if (S.Name.size() > COFF::NameSize) { - S.Sym.Name.Offset.Zeroes = 0; - S.Sym.Name.Offset.Offset = StrTabBuilder.getOffset(S.Name); - } else { - strncpy(S.Sym.Name.ShortName, S.Name.data(), COFF::NameSize); - } - } - return StrTabBuilder.getSize(); -} - -template -std::pair COFFWriter::finalizeSymbolTable() { - size_t RawSymIndex = 0; - for (auto &S : Obj.getMutableSymbols()) { - // Symbols normally have NumberOfAuxSymbols set correctly all the time. - // For file symbols, we need to know the output file's symbol size to be - // able to calculate the number of slots it occupies. - if (!S.AuxFile.empty()) - S.Sym.NumberOfAuxSymbols = - alignTo(S.AuxFile.size(), sizeof(SymbolTy)) / sizeof(SymbolTy); - S.RawIndex = RawSymIndex; - RawSymIndex += 1 + S.Sym.NumberOfAuxSymbols; - } - return std::make_pair(RawSymIndex * sizeof(SymbolTy), sizeof(SymbolTy)); -} - -Error COFFWriter::finalize(bool IsBigObj) { - size_t SymTabSize, SymbolSize; - std::tie(SymTabSize, SymbolSize) = IsBigObj - ? finalizeSymbolTable() - : finalizeSymbolTable(); - - if (Error E = finalizeRelocTargets()) - return E; - if (Error E = finalizeSymbolContents()) - return E; - - size_t SizeOfHeaders = 0; - FileAlignment = 1; - size_t PeHeaderSize = 0; - if (Obj.IsPE) { - Obj.DosHeader.AddressOfNewExeHeader = - sizeof(Obj.DosHeader) + Obj.DosStub.size(); - SizeOfHeaders += Obj.DosHeader.AddressOfNewExeHeader + sizeof(PEMagic); - - FileAlignment = Obj.PeHeader.FileAlignment; - Obj.PeHeader.NumberOfRvaAndSize = Obj.DataDirectories.size(); - - PeHeaderSize = Obj.Is64 ? sizeof(pe32plus_header) : sizeof(pe32_header); - SizeOfHeaders += - PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size(); - } - Obj.CoffFileHeader.NumberOfSections = Obj.getSections().size(); - SizeOfHeaders += - IsBigObj ? sizeof(coff_bigobj_file_header) : sizeof(coff_file_header); - SizeOfHeaders += sizeof(coff_section) * Obj.getSections().size(); - SizeOfHeaders = alignTo(SizeOfHeaders, FileAlignment); - - Obj.CoffFileHeader.SizeOfOptionalHeader = - PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size(); - - FileSize = SizeOfHeaders; - SizeOfInitializedData = 0; - - layoutSections(); - - if (Obj.IsPE) { - Obj.PeHeader.SizeOfHeaders = SizeOfHeaders; - Obj.PeHeader.SizeOfInitializedData = SizeOfInitializedData; - - if (!Obj.getSections().empty()) { - const Section &S = Obj.getSections().back(); - Obj.PeHeader.SizeOfImage = - alignTo(S.Header.VirtualAddress + S.Header.VirtualSize, - Obj.PeHeader.SectionAlignment); - } - - // If the PE header had a checksum, clear it, since it isn't valid - // any longer. (We don't calculate a new one.) - Obj.PeHeader.CheckSum = 0; - } - - size_t StrTabSize = finalizeStringTable(); - - size_t PointerToSymbolTable = FileSize; - // StrTabSize <= 4 is the size of an empty string table, only consisting - // of the length field. - if (SymTabSize == 0 && StrTabSize <= 4 && Obj.IsPE) { - // For executables, don't point to the symbol table and skip writing - // the length field, if both the symbol and string tables are empty. - PointerToSymbolTable = 0; - StrTabSize = 0; - } - - size_t NumRawSymbols = SymTabSize / SymbolSize; - Obj.CoffFileHeader.PointerToSymbolTable = PointerToSymbolTable; - Obj.CoffFileHeader.NumberOfSymbols = NumRawSymbols; - FileSize += SymTabSize + StrTabSize; - FileSize = alignTo(FileSize, FileAlignment); - - return Error::success(); -} - -void COFFWriter::writeHeaders(bool IsBigObj) { - uint8_t *Ptr = reinterpret_cast(Buf->getBufferStart()); - if (Obj.IsPE) { - memcpy(Ptr, &Obj.DosHeader, sizeof(Obj.DosHeader)); - Ptr += sizeof(Obj.DosHeader); - memcpy(Ptr, Obj.DosStub.data(), Obj.DosStub.size()); - Ptr += Obj.DosStub.size(); - memcpy(Ptr, PEMagic, sizeof(PEMagic)); - Ptr += sizeof(PEMagic); - } - if (!IsBigObj) { - memcpy(Ptr, &Obj.CoffFileHeader, sizeof(Obj.CoffFileHeader)); - Ptr += sizeof(Obj.CoffFileHeader); - } else { - // Generate a coff_bigobj_file_header, filling it in with the values - // from Obj.CoffFileHeader. All extra fields that don't exist in - // coff_file_header can be set to hardcoded values. - coff_bigobj_file_header BigObjHeader; - BigObjHeader.Sig1 = IMAGE_FILE_MACHINE_UNKNOWN; - BigObjHeader.Sig2 = 0xffff; - BigObjHeader.Version = BigObjHeader::MinBigObjectVersion; - BigObjHeader.Machine = Obj.CoffFileHeader.Machine; - BigObjHeader.TimeDateStamp = Obj.CoffFileHeader.TimeDateStamp; - memcpy(BigObjHeader.UUID, BigObjMagic, sizeof(BigObjMagic)); - BigObjHeader.unused1 = 0; - BigObjHeader.unused2 = 0; - BigObjHeader.unused3 = 0; - BigObjHeader.unused4 = 0; - // The value in Obj.CoffFileHeader.NumberOfSections is truncated, thus - // get the original one instead. - BigObjHeader.NumberOfSections = Obj.getSections().size(); - BigObjHeader.PointerToSymbolTable = Obj.CoffFileHeader.PointerToSymbolTable; - BigObjHeader.NumberOfSymbols = Obj.CoffFileHeader.NumberOfSymbols; - - memcpy(Ptr, &BigObjHeader, sizeof(BigObjHeader)); - Ptr += sizeof(BigObjHeader); - } - if (Obj.IsPE) { - if (Obj.Is64) { - memcpy(Ptr, &Obj.PeHeader, sizeof(Obj.PeHeader)); - Ptr += sizeof(Obj.PeHeader); - } else { - pe32_header PeHeader; - copyPeHeader(PeHeader, Obj.PeHeader); - // The pe32plus_header (stored in Object) lacks the BaseOfData field. - PeHeader.BaseOfData = Obj.BaseOfData; - - memcpy(Ptr, &PeHeader, sizeof(PeHeader)); - Ptr += sizeof(PeHeader); - } - for (const auto &DD : Obj.DataDirectories) { - memcpy(Ptr, &DD, sizeof(DD)); - Ptr += sizeof(DD); - } - } - for (const auto &S : Obj.getSections()) { - memcpy(Ptr, &S.Header, sizeof(S.Header)); - Ptr += sizeof(S.Header); - } -} - -void COFFWriter::writeSections() { - for (const auto &S : Obj.getSections()) { - uint8_t *Ptr = reinterpret_cast(Buf->getBufferStart()) + - S.Header.PointerToRawData; - ArrayRef Contents = S.getContents(); - std::copy(Contents.begin(), Contents.end(), Ptr); - - // For executable sections, pad the remainder of the raw data size with - // 0xcc, which is int3 on x86. - if ((S.Header.Characteristics & IMAGE_SCN_CNT_CODE) && - S.Header.SizeOfRawData > Contents.size()) - memset(Ptr + Contents.size(), 0xcc, - S.Header.SizeOfRawData - Contents.size()); - - Ptr += S.Header.SizeOfRawData; - - if (S.Relocs.size() >= 0xffff) { - object::coff_relocation R; - R.VirtualAddress = S.Relocs.size() + 1; - R.SymbolTableIndex = 0; - R.Type = 0; - memcpy(Ptr, &R, sizeof(R)); - Ptr += sizeof(R); - } - for (const auto &R : S.Relocs) { - memcpy(Ptr, &R.Reloc, sizeof(R.Reloc)); - Ptr += sizeof(R.Reloc); - } - } -} - -template void COFFWriter::writeSymbolStringTables() { - uint8_t *Ptr = reinterpret_cast(Buf->getBufferStart()) + - Obj.CoffFileHeader.PointerToSymbolTable; - for (const auto &S : Obj.getSymbols()) { - // Convert symbols back to the right size, from coff_symbol32. - copySymbol(*reinterpret_cast(Ptr), - S.Sym); - Ptr += sizeof(SymbolTy); - if (!S.AuxFile.empty()) { - // For file symbols, just write the string into the aux symbol slots, - // assuming that the unwritten parts are initialized to zero in the memory - // mapped file. - std::copy(S.AuxFile.begin(), S.AuxFile.end(), Ptr); - Ptr += S.Sym.NumberOfAuxSymbols * sizeof(SymbolTy); - } else { - // For other auxillary symbols, write their opaque payload into one symbol - // table slot each. For big object files, the symbols are larger than the - // opaque auxillary symbol struct and we leave padding at the end of each - // entry. - for (const AuxSymbol &AuxSym : S.AuxData) { - ArrayRef Ref = AuxSym.getRef(); - std::copy(Ref.begin(), Ref.end(), Ptr); - Ptr += sizeof(SymbolTy); - } - } - } - if (StrTabBuilder.getSize() > 4 || !Obj.IsPE) { - // Always write a string table in object files, even an empty one. - StrTabBuilder.write(Ptr); - Ptr += StrTabBuilder.getSize(); - } -} - -Error COFFWriter::write(bool IsBigObj) { - if (Error E = finalize(IsBigObj)) - return E; - - Buf = WritableMemoryBuffer::getNewMemBuffer(FileSize); - if (!Buf) - return createStringError(llvm::errc::not_enough_memory, - "failed to allocate memory buffer of " + - Twine::utohexstr(FileSize) + " bytes."); - - writeHeaders(IsBigObj); - writeSections(); - if (IsBigObj) - writeSymbolStringTables(); - else - writeSymbolStringTables(); - - if (Obj.IsPE) - if (Error E = patchDebugDirectory()) - return E; - - // TODO: Implement direct writing to the output stream (without intermediate - // memory buffer Buf). - Out.write(Buf->getBufferStart(), Buf->getBufferSize()); - return Error::success(); -} - -Expected COFFWriter::virtualAddressToFileAddress(uint32_t RVA) { - for (const auto &S : Obj.getSections()) { - if (RVA >= S.Header.VirtualAddress && - RVA < S.Header.VirtualAddress + S.Header.SizeOfRawData) - return S.Header.PointerToRawData + RVA - S.Header.VirtualAddress; - } - return createStringError(object_error::parse_failed, - "debug directory payload not found"); -} - -// Locate which sections contain the debug directories, iterate over all -// the debug_directory structs in there, and set the PointerToRawData field -// in all of them, according to their new physical location in the file. -Error COFFWriter::patchDebugDirectory() { - if (Obj.DataDirectories.size() <= DEBUG_DIRECTORY) - return Error::success(); - const data_directory *Dir = &Obj.DataDirectories[DEBUG_DIRECTORY]; - if (Dir->Size <= 0) - return Error::success(); - for (const auto &S : Obj.getSections()) { - if (Dir->RelativeVirtualAddress >= S.Header.VirtualAddress && - Dir->RelativeVirtualAddress < - S.Header.VirtualAddress + S.Header.SizeOfRawData) { - if (Dir->RelativeVirtualAddress + Dir->Size > - S.Header.VirtualAddress + S.Header.SizeOfRawData) - return createStringError(object_error::parse_failed, - "debug directory extends past end of section"); - - size_t Offset = Dir->RelativeVirtualAddress - S.Header.VirtualAddress; - uint8_t *Ptr = reinterpret_cast(Buf->getBufferStart()) + - S.Header.PointerToRawData + Offset; - uint8_t *End = Ptr + Dir->Size; - while (Ptr < End) { - debug_directory *Debug = reinterpret_cast(Ptr); - if (Debug->PointerToRawData) { - if (Expected FilePosOrErr = - virtualAddressToFileAddress(Debug->AddressOfRawData)) - Debug->PointerToRawData = *FilePosOrErr; - else - return FilePosOrErr.takeError(); - } - Ptr += sizeof(debug_directory); - Offset += sizeof(debug_directory); - } - // Debug directory found and patched, all done. - return Error::success(); - } - } - return createStringError(object_error::parse_failed, - "debug directory not found"); -} - -Error COFFWriter::write() { - bool IsBigObj = Obj.getSections().size() > MaxNumberOfSections16; - if (IsBigObj && Obj.IsPE) - return createStringError(object_error::parse_failed, - "too many sections for executable"); - return write(IsBigObj); -} - -} // end namespace coff -} // end namespace objcopy -} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.h b/llvm/tools/llvm-objcopy/COFF/Writer.h deleted file mode 100644 index eed43b3e5814..000000000000 --- a/llvm/tools/llvm-objcopy/COFF/Writer.h +++ /dev/null @@ -1,63 +0,0 @@ -//===- Writer.h -------------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_OBJCOPY_COFF_WRITER_H -#define LLVM_TOOLS_OBJCOPY_COFF_WRITER_H - -#include "llvm/MC/StringTableBuilder.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/MemoryBuffer.h" -#include -#include - -namespace llvm { -namespace objcopy { -namespace coff { - -struct Object; - -class COFFWriter { - Object &Obj; - std::unique_ptr Buf; - raw_ostream &Out; - - size_t FileSize; - size_t FileAlignment; - size_t SizeOfInitializedData; - StringTableBuilder StrTabBuilder; - - template std::pair finalizeSymbolTable(); - Error finalizeRelocTargets(); - Error finalizeSymbolContents(); - void layoutSections(); - size_t finalizeStringTable(); - - Error finalize(bool IsBigObj); - - void writeHeaders(bool IsBigObj); - void writeSections(); - template void writeSymbolStringTables(); - - Error write(bool IsBigObj); - - Error patchDebugDirectory(); - Expected virtualAddressToFileAddress(uint32_t RVA); - -public: - virtual ~COFFWriter() {} - Error write(); - - COFFWriter(Object &Obj, raw_ostream &Out) - : Obj(Obj), Out(Out), StrTabBuilder(StringTableBuilder::WinCOFF) {} -}; - -} // end namespace coff -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_TOOLS_OBJCOPY_COFF_WRITER_H diff --git a/llvm/tools/llvm-objcopy/CommonConfig.h b/llvm/tools/llvm-objcopy/CommonConfig.h deleted file mode 100644 index ea39a6da2ba5..000000000000 --- a/llvm/tools/llvm-objcopy/CommonConfig.h +++ /dev/null @@ -1,260 +0,0 @@ -//===- CommonConfig.h -------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_OBJCOPY_COMMONCONFIG_H -#define LLVM_TOOLS_LLVM_OBJCOPY_COMMONCONFIG_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/CachedHashString.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Object/ELFTypes.h" -#include "llvm/Support/GlobPattern.h" -#include "llvm/Support/Regex.h" -// Necessary for llvm::DebugCompressionType::None -#include "llvm/Target/TargetOptions.h" -#include - -namespace llvm { -namespace objcopy { - -enum class FileFormat { - Unspecified, - ELF, - Binary, - IHex, -}; - -// This type keeps track of the machine info for various architectures. This -// lets us map architecture names to ELF types and the e_machine value of the -// ELF file. -struct MachineInfo { - MachineInfo(uint16_t EM, uint8_t ABI, bool Is64, bool IsLittle) - : EMachine(EM), OSABI(ABI), Is64Bit(Is64), IsLittleEndian(IsLittle) {} - // Alternative constructor that defaults to NONE for OSABI. - MachineInfo(uint16_t EM, bool Is64, bool IsLittle) - : MachineInfo(EM, ELF::ELFOSABI_NONE, Is64, IsLittle) {} - // Default constructor for unset fields. - MachineInfo() : MachineInfo(0, 0, false, false) {} - uint16_t EMachine; - uint8_t OSABI; - bool Is64Bit; - bool IsLittleEndian; -}; - -// Flags set by --set-section-flags or --rename-section. Interpretation of these -// is format-specific and not all flags are meaningful for all object file -// formats. This is a bitmask; many section flags may be set. -enum SectionFlag { - SecNone = 0, - SecAlloc = 1 << 0, - SecLoad = 1 << 1, - SecNoload = 1 << 2, - SecReadonly = 1 << 3, - SecDebug = 1 << 4, - SecCode = 1 << 5, - SecData = 1 << 6, - SecRom = 1 << 7, - SecMerge = 1 << 8, - SecStrings = 1 << 9, - SecContents = 1 << 10, - SecShare = 1 << 11, - SecExclude = 1 << 12, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/SecExclude) -}; - -struct SectionRename { - StringRef OriginalName; - StringRef NewName; - Optional NewFlags; -}; - -struct SectionFlagsUpdate { - StringRef Name; - SectionFlag NewFlags; -}; - -enum class DiscardType { - None, // Default - All, // --discard-all (-x) - Locals, // --discard-locals (-X) -}; - -enum class MatchStyle { - Literal, // Default for symbols. - Wildcard, // Default for sections, or enabled with --wildcard (-w). - Regex, // Enabled with --regex. -}; - -class NameOrPattern { - StringRef Name; - // Regex is shared between multiple CommonConfig instances. - std::shared_ptr R; - std::shared_ptr G; - bool IsPositiveMatch = true; - - NameOrPattern(StringRef N) : Name(N) {} - NameOrPattern(std::shared_ptr R) : R(R) {} - NameOrPattern(std::shared_ptr G, bool IsPositiveMatch) - : G(G), IsPositiveMatch(IsPositiveMatch) {} - -public: - // ErrorCallback is used to handle recoverable errors. An Error returned - // by the callback aborts the parsing and is then returned by this function. - static Expected - create(StringRef Pattern, MatchStyle MS, - llvm::function_ref ErrorCallback); - - bool isPositiveMatch() const { return IsPositiveMatch; } - Optional getName() const { - if (!R && !G) - return Name; - return None; - } - bool operator==(StringRef S) const { - return R ? R->match(S) : G ? G->match(S) : Name == S; - } - bool operator!=(StringRef S) const { return !operator==(S); } -}; - -// Matcher that checks symbol or section names against the command line flags -// provided for that option. -class NameMatcher { - DenseSet PosNames; - std::vector PosPatterns; - std::vector NegMatchers; - -public: - Error addMatcher(Expected Matcher) { - if (!Matcher) - return Matcher.takeError(); - if (Matcher->isPositiveMatch()) { - if (Optional MaybeName = Matcher->getName()) - PosNames.insert(CachedHashStringRef(*MaybeName)); - else - PosPatterns.push_back(std::move(*Matcher)); - } else { - NegMatchers.push_back(std::move(*Matcher)); - } - return Error::success(); - } - bool matches(StringRef S) const { - return (PosNames.contains(CachedHashStringRef(S)) || - is_contained(PosPatterns, S)) && - !is_contained(NegMatchers, S); - } - bool empty() const { - return PosNames.empty() && PosPatterns.empty() && NegMatchers.empty(); - } -}; - -enum class SymbolFlag { - Global, - Local, - Weak, - Default, - Hidden, - Protected, - File, - Section, - Object, - Function, - IndirectFunction, - Debug, - Constructor, - Warning, - Indirect, - Synthetic, - UniqueObject, -}; - -// Symbol info specified by --add-symbol option. Symbol flags not supported -// by a concrete format should be ignored. -struct NewSymbolInfo { - StringRef SymbolName; - StringRef SectionName; - uint64_t Value = 0; - std::vector Flags; - std::vector BeforeSyms; -}; - -// Configuration for copying/stripping a single file. -struct CommonConfig { - // Main input/output options - StringRef InputFilename; - FileFormat InputFormat = FileFormat::Unspecified; - StringRef OutputFilename; - FileFormat OutputFormat = FileFormat::Unspecified; - - // Only applicable when --output-format!=binary (e.g. elf64-x86-64). - Optional OutputArch; - - // Advanced options - StringRef AddGnuDebugLink; - // Cached gnu_debuglink's target CRC - uint32_t GnuDebugLinkCRC32; - Optional ExtractPartition; - StringRef SplitDWO; - StringRef SymbolsPrefix; - StringRef AllocSectionsPrefix; - DiscardType DiscardMode = DiscardType::None; - - // Repeated options - std::vector AddSection; - std::vector DumpSection; - std::vector UpdateSection; - - // Section matchers - NameMatcher KeepSection; - NameMatcher OnlySection; - NameMatcher ToRemove; - - // Symbol matchers - NameMatcher SymbolsToGlobalize; - NameMatcher SymbolsToKeep; - NameMatcher SymbolsToLocalize; - NameMatcher SymbolsToRemove; - NameMatcher UnneededSymbolsToRemove; - NameMatcher SymbolsToWeaken; - NameMatcher SymbolsToKeepGlobal; - - // Map options - StringMap SectionsToRename; - StringMap SetSectionAlignment; - StringMap SetSectionFlags; - StringMap SymbolsToRename; - - // Symbol info specified by --add-symbol option. - std::vector SymbolsToAdd; - - // Boolean options - bool DeterministicArchives = true; - bool ExtractDWO = false; - bool ExtractMainPartition = false; - bool OnlyKeepDebug = false; - bool PreserveDates = false; - bool StripAll = false; - bool StripAllGNU = false; - bool StripDWO = false; - bool StripDebug = false; - bool StripNonAlloc = false; - bool StripSections = false; - bool StripUnneeded = false; - bool Weaken = false; - bool DecompressDebugSections = false; - - DebugCompressionType CompressionType = DebugCompressionType::None; -}; - -} // namespace objcopy -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_OBJCOPY_COMMONCONFIG_H diff --git a/llvm/tools/llvm-objcopy/ConfigManager.cpp b/llvm/tools/llvm-objcopy/ConfigManager.cpp deleted file mode 100644 index 90730c421a46..000000000000 --- a/llvm/tools/llvm-objcopy/ConfigManager.cpp +++ /dev/null @@ -1,1432 +0,0 @@ -//===- ConfigManager.cpp --------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ConfigManager.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/BinaryFormat/COFF.h" -#include "llvm/Option/Arg.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Support/CRC.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compression.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/StringSaver.h" -#include - -using namespace llvm; -using namespace llvm::objcopy; - -namespace { -enum ObjcopyID { - OBJCOPY_INVALID = 0, // This is not an option ID. -#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ - HELPTEXT, METAVAR, VALUES) \ - OBJCOPY_##ID, -#include "ObjcopyOpts.inc" -#undef OPTION -}; - -#define PREFIX(NAME, VALUE) const char *const OBJCOPY_##NAME[] = VALUE; -#include "ObjcopyOpts.inc" -#undef PREFIX - -const opt::OptTable::Info ObjcopyInfoTable[] = { -#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ - HELPTEXT, METAVAR, VALUES) \ - {OBJCOPY_##PREFIX, \ - NAME, \ - HELPTEXT, \ - METAVAR, \ - OBJCOPY_##ID, \ - opt::Option::KIND##Class, \ - PARAM, \ - FLAGS, \ - OBJCOPY_##GROUP, \ - OBJCOPY_##ALIAS, \ - ALIASARGS, \ - VALUES}, -#include "ObjcopyOpts.inc" -#undef OPTION -}; - -class ObjcopyOptTable : public opt::OptTable { -public: - ObjcopyOptTable() : OptTable(ObjcopyInfoTable) { - setGroupedShortOptions(true); - } -}; - -enum InstallNameToolID { - INSTALL_NAME_TOOL_INVALID = 0, // This is not an option ID. -#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ - HELPTEXT, METAVAR, VALUES) \ - INSTALL_NAME_TOOL_##ID, -#include "InstallNameToolOpts.inc" -#undef OPTION -}; - -#define PREFIX(NAME, VALUE) \ - const char *const INSTALL_NAME_TOOL_##NAME[] = VALUE; -#include "InstallNameToolOpts.inc" -#undef PREFIX - -const opt::OptTable::Info InstallNameToolInfoTable[] = { -#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ - HELPTEXT, METAVAR, VALUES) \ - {INSTALL_NAME_TOOL_##PREFIX, \ - NAME, \ - HELPTEXT, \ - METAVAR, \ - INSTALL_NAME_TOOL_##ID, \ - opt::Option::KIND##Class, \ - PARAM, \ - FLAGS, \ - INSTALL_NAME_TOOL_##GROUP, \ - INSTALL_NAME_TOOL_##ALIAS, \ - ALIASARGS, \ - VALUES}, -#include "InstallNameToolOpts.inc" -#undef OPTION -}; - -class InstallNameToolOptTable : public opt::OptTable { -public: - InstallNameToolOptTable() : OptTable(InstallNameToolInfoTable) {} -}; - -enum BitcodeStripID { - BITCODE_STRIP_INVALID = 0, // This is not an option ID. -#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ - HELPTEXT, METAVAR, VALUES) \ - BITCODE_STRIP_##ID, -#include "BitcodeStripOpts.inc" -#undef OPTION -}; - -#define PREFIX(NAME, VALUE) const char *const BITCODE_STRIP_##NAME[] = VALUE; -#include "BitcodeStripOpts.inc" -#undef PREFIX - -const opt::OptTable::Info BitcodeStripInfoTable[] = { -#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ - HELPTEXT, METAVAR, VALUES) \ - {BITCODE_STRIP_##PREFIX, \ - NAME, \ - HELPTEXT, \ - METAVAR, \ - BITCODE_STRIP_##ID, \ - opt::Option::KIND##Class, \ - PARAM, \ - FLAGS, \ - BITCODE_STRIP_##GROUP, \ - BITCODE_STRIP_##ALIAS, \ - ALIASARGS, \ - VALUES}, -#include "BitcodeStripOpts.inc" -#undef OPTION -}; - -class BitcodeStripOptTable : public opt::OptTable { -public: - BitcodeStripOptTable() : OptTable(BitcodeStripInfoTable) {} -}; - -enum StripID { - STRIP_INVALID = 0, // This is not an option ID. -#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ - HELPTEXT, METAVAR, VALUES) \ - STRIP_##ID, -#include "StripOpts.inc" -#undef OPTION -}; - -#define PREFIX(NAME, VALUE) const char *const STRIP_##NAME[] = VALUE; -#include "StripOpts.inc" -#undef PREFIX - -const opt::OptTable::Info StripInfoTable[] = { -#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ - HELPTEXT, METAVAR, VALUES) \ - {STRIP_##PREFIX, NAME, HELPTEXT, \ - METAVAR, STRIP_##ID, opt::Option::KIND##Class, \ - PARAM, FLAGS, STRIP_##GROUP, \ - STRIP_##ALIAS, ALIASARGS, VALUES}, -#include "StripOpts.inc" -#undef OPTION -}; - -class StripOptTable : public opt::OptTable { -public: - StripOptTable() : OptTable(StripInfoTable) { setGroupedShortOptions(true); } -}; - -} // namespace - -static SectionFlag parseSectionRenameFlag(StringRef SectionName) { - return llvm::StringSwitch(SectionName) - .CaseLower("alloc", SectionFlag::SecAlloc) - .CaseLower("load", SectionFlag::SecLoad) - .CaseLower("noload", SectionFlag::SecNoload) - .CaseLower("readonly", SectionFlag::SecReadonly) - .CaseLower("debug", SectionFlag::SecDebug) - .CaseLower("code", SectionFlag::SecCode) - .CaseLower("data", SectionFlag::SecData) - .CaseLower("rom", SectionFlag::SecRom) - .CaseLower("merge", SectionFlag::SecMerge) - .CaseLower("strings", SectionFlag::SecStrings) - .CaseLower("contents", SectionFlag::SecContents) - .CaseLower("share", SectionFlag::SecShare) - .CaseLower("exclude", SectionFlag::SecExclude) - .Default(SectionFlag::SecNone); -} - -static Expected -parseSectionFlagSet(ArrayRef SectionFlags) { - SectionFlag ParsedFlags = SectionFlag::SecNone; - for (StringRef Flag : SectionFlags) { - SectionFlag ParsedFlag = parseSectionRenameFlag(Flag); - if (ParsedFlag == SectionFlag::SecNone) - return createStringError( - errc::invalid_argument, - "unrecognized section flag '%s'. Flags supported for GNU " - "compatibility: alloc, load, noload, readonly, exclude, debug, " - "code, data, rom, share, contents, merge, strings", - Flag.str().c_str()); - ParsedFlags |= ParsedFlag; - } - - return ParsedFlags; -} - -static Expected parseRenameSectionValue(StringRef FlagValue) { - if (!FlagValue.contains('=')) - return createStringError(errc::invalid_argument, - "bad format for --rename-section: missing '='"); - - // Initial split: ".foo" = ".bar,f1,f2,..." - auto Old2New = FlagValue.split('='); - SectionRename SR; - SR.OriginalName = Old2New.first; - - // Flags split: ".bar" "f1" "f2" ... - SmallVector NameAndFlags; - Old2New.second.split(NameAndFlags, ','); - SR.NewName = NameAndFlags[0]; - - if (NameAndFlags.size() > 1) { - Expected ParsedFlagSet = - parseSectionFlagSet(makeArrayRef(NameAndFlags).drop_front()); - if (!ParsedFlagSet) - return ParsedFlagSet.takeError(); - SR.NewFlags = *ParsedFlagSet; - } - - return SR; -} - -static Expected> -parseSetSectionAlignment(StringRef FlagValue) { - if (!FlagValue.contains('=')) - return createStringError( - errc::invalid_argument, - "bad format for --set-section-alignment: missing '='"); - auto Split = StringRef(FlagValue).split('='); - if (Split.first.empty()) - return createStringError( - errc::invalid_argument, - "bad format for --set-section-alignment: missing section name"); - uint64_t NewAlign; - if (Split.second.getAsInteger(0, NewAlign)) - return createStringError( - errc::invalid_argument, - "invalid alignment for --set-section-alignment: '%s'", - Split.second.str().c_str()); - return std::make_pair(Split.first, NewAlign); -} - -static Expected -parseSetSectionFlagValue(StringRef FlagValue) { - if (!StringRef(FlagValue).contains('=')) - return createStringError(errc::invalid_argument, - "bad format for --set-section-flags: missing '='"); - - // Initial split: ".foo" = "f1,f2,..." - auto Section2Flags = StringRef(FlagValue).split('='); - SectionFlagsUpdate SFU; - SFU.Name = Section2Flags.first; - - // Flags split: "f1" "f2" ... - SmallVector SectionFlags; - Section2Flags.second.split(SectionFlags, ','); - Expected ParsedFlagSet = parseSectionFlagSet(SectionFlags); - if (!ParsedFlagSet) - return ParsedFlagSet.takeError(); - SFU.NewFlags = *ParsedFlagSet; - - return SFU; -} - -namespace { -struct TargetInfo { - FileFormat Format; - MachineInfo Machine; -}; -} // namespace - -// FIXME: consolidate with the bfd parsing used by lld. -static const StringMap TargetMap{ - // Name, {EMachine, 64bit, LittleEndian} - // x86 - {"elf32-i386", {ELF::EM_386, false, true}}, - {"elf32-x86-64", {ELF::EM_X86_64, false, true}}, - {"elf64-x86-64", {ELF::EM_X86_64, true, true}}, - // Intel MCU - {"elf32-iamcu", {ELF::EM_IAMCU, false, true}}, - // ARM - {"elf32-littlearm", {ELF::EM_ARM, false, true}}, - // ARM AArch64 - {"elf64-aarch64", {ELF::EM_AARCH64, true, true}}, - {"elf64-littleaarch64", {ELF::EM_AARCH64, true, true}}, - // RISC-V - {"elf32-littleriscv", {ELF::EM_RISCV, false, true}}, - {"elf64-littleriscv", {ELF::EM_RISCV, true, true}}, - // PowerPC - {"elf32-powerpc", {ELF::EM_PPC, false, false}}, - {"elf32-powerpcle", {ELF::EM_PPC, false, true}}, - {"elf64-powerpc", {ELF::EM_PPC64, true, false}}, - {"elf64-powerpcle", {ELF::EM_PPC64, true, true}}, - // MIPS - {"elf32-bigmips", {ELF::EM_MIPS, false, false}}, - {"elf32-ntradbigmips", {ELF::EM_MIPS, false, false}}, - {"elf32-ntradlittlemips", {ELF::EM_MIPS, false, true}}, - {"elf32-tradbigmips", {ELF::EM_MIPS, false, false}}, - {"elf32-tradlittlemips", {ELF::EM_MIPS, false, true}}, - {"elf64-tradbigmips", {ELF::EM_MIPS, true, false}}, - {"elf64-tradlittlemips", {ELF::EM_MIPS, true, true}}, - // SPARC - {"elf32-sparc", {ELF::EM_SPARC, false, false}}, - {"elf32-sparcel", {ELF::EM_SPARC, false, true}}, - {"elf32-hexagon", {ELF::EM_HEXAGON, false, true}}, -}; - -static Expected -getOutputTargetInfoByTargetName(StringRef TargetName) { - StringRef OriginalTargetName = TargetName; - bool IsFreeBSD = TargetName.consume_back("-freebsd"); - auto Iter = TargetMap.find(TargetName); - if (Iter == std::end(TargetMap)) - return createStringError(errc::invalid_argument, - "invalid output format: '%s'", - OriginalTargetName.str().c_str()); - MachineInfo MI = Iter->getValue(); - if (IsFreeBSD) - MI.OSABI = ELF::ELFOSABI_FREEBSD; - - FileFormat Format; - if (TargetName.startswith("elf")) - Format = FileFormat::ELF; - else - // This should never happen because `TargetName` is valid (it certainly - // exists in the TargetMap). - llvm_unreachable("unknown target prefix"); - - return {TargetInfo{Format, MI}}; -} - -static Error addSymbolsFromFile(NameMatcher &Symbols, BumpPtrAllocator &Alloc, - StringRef Filename, MatchStyle MS, - function_ref ErrorCallback) { - StringSaver Saver(Alloc); - SmallVector Lines; - auto BufOrErr = MemoryBuffer::getFile(Filename); - if (!BufOrErr) - return createFileError(Filename, BufOrErr.getError()); - - BufOrErr.get()->getBuffer().split(Lines, '\n'); - for (StringRef Line : Lines) { - // Ignore everything after '#', trim whitespace, and only add the symbol if - // it's not empty. - auto TrimmedLine = Line.split('#').first.trim(); - if (!TrimmedLine.empty()) - if (Error E = Symbols.addMatcher(NameOrPattern::create( - Saver.save(TrimmedLine), MS, ErrorCallback))) - return E; - } - - return Error::success(); -} - -Expected -NameOrPattern::create(StringRef Pattern, MatchStyle MS, - function_ref ErrorCallback) { - switch (MS) { - case MatchStyle::Literal: - return NameOrPattern(Pattern); - case MatchStyle::Wildcard: { - SmallVector Data; - bool IsPositiveMatch = true; - if (Pattern[0] == '!') { - IsPositiveMatch = false; - Pattern = Pattern.drop_front(); - } - Expected GlobOrErr = GlobPattern::create(Pattern); - - // If we couldn't create it as a glob, report the error, but try again with - // a literal if the error reporting is non-fatal. - if (!GlobOrErr) { - if (Error E = ErrorCallback(GlobOrErr.takeError())) - return std::move(E); - return create(Pattern, MatchStyle::Literal, ErrorCallback); - } - - return NameOrPattern(std::make_shared(*GlobOrErr), - IsPositiveMatch); - } - case MatchStyle::Regex: { - SmallVector Data; - return NameOrPattern(std::make_shared( - ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data))); - } - } - llvm_unreachable("Unhandled llvm.objcopy.MatchStyle enum"); -} - -static Error addSymbolsToRenameFromFile(StringMap &SymbolsToRename, - BumpPtrAllocator &Alloc, - StringRef Filename) { - StringSaver Saver(Alloc); - SmallVector Lines; - auto BufOrErr = MemoryBuffer::getFile(Filename); - if (!BufOrErr) - return createFileError(Filename, BufOrErr.getError()); - - BufOrErr.get()->getBuffer().split(Lines, '\n'); - size_t NumLines = Lines.size(); - for (size_t LineNo = 0; LineNo < NumLines; ++LineNo) { - StringRef TrimmedLine = Lines[LineNo].split('#').first.trim(); - if (TrimmedLine.empty()) - continue; - - std::pair Pair = Saver.save(TrimmedLine).split(' '); - StringRef NewName = Pair.second.trim(); - if (NewName.empty()) - return createStringError(errc::invalid_argument, - "%s:%zu: missing new symbol name", - Filename.str().c_str(), LineNo + 1); - SymbolsToRename.insert({Pair.first, NewName}); - } - return Error::success(); -} - -template static ErrorOr getAsInteger(StringRef Val) { - T Result; - if (Val.getAsInteger(0, Result)) - return errc::invalid_argument; - return Result; -} - -namespace { - -enum class ToolType { Objcopy, Strip, InstallNameTool, BitcodeStrip }; - -} // anonymous namespace - -static void printHelp(const opt::OptTable &OptTable, raw_ostream &OS, - ToolType Tool) { - StringRef HelpText, ToolName; - switch (Tool) { - case ToolType::Objcopy: - ToolName = "llvm-objcopy"; - HelpText = " [options] input [output]"; - break; - case ToolType::Strip: - ToolName = "llvm-strip"; - HelpText = " [options] inputs..."; - break; - case ToolType::InstallNameTool: - ToolName = "llvm-install-name-tool"; - HelpText = " [options] input"; - break; - case ToolType::BitcodeStrip: - ToolName = "llvm-bitcode-strip"; - HelpText = " [options] input"; - break; - } - OptTable.printHelp(OS, (ToolName + HelpText).str().c_str(), - (ToolName + " tool").str().c_str()); - // TODO: Replace this with libOption call once it adds extrahelp support. - // The CommandLine library has a cl::extrahelp class to support this, - // but libOption does not have that yet. - OS << "\nPass @FILE as argument to read options from FILE.\n"; -} - -static Expected parseNewSymbolInfo(StringRef FlagValue) { - // Parse value given with --add-symbol option and create the - // new symbol if possible. The value format for --add-symbol is: - // - // =[
:][,] - // - // where: - // - symbol name, can be empty string - //
- optional section name. If not given ABS symbol is created - // - symbol value, can be decimal or hexadecimal number prefixed - // with 0x. - // - optional flags affecting symbol type, binding or visibility. - NewSymbolInfo SI; - StringRef Value; - std::tie(SI.SymbolName, Value) = FlagValue.split('='); - if (Value.empty()) - return createStringError( - errc::invalid_argument, - "bad format for --add-symbol, missing '=' after '%s'", - SI.SymbolName.str().c_str()); - - if (Value.contains(':')) { - std::tie(SI.SectionName, Value) = Value.split(':'); - if (SI.SectionName.empty() || Value.empty()) - return createStringError( - errc::invalid_argument, - "bad format for --add-symbol, missing section name or symbol value"); - } - - SmallVector Flags; - Value.split(Flags, ','); - if (Flags[0].getAsInteger(0, SI.Value)) - return createStringError(errc::invalid_argument, "bad symbol value: '%s'", - Flags[0].str().c_str()); - - using Functor = std::function; - SmallVector UnsupportedFlags; - for (size_t I = 1, NumFlags = Flags.size(); I < NumFlags; ++I) - static_cast( - StringSwitch(Flags[I]) - .CaseLower("global", - [&] { SI.Flags.push_back(SymbolFlag::Global); }) - .CaseLower("local", [&] { SI.Flags.push_back(SymbolFlag::Local); }) - .CaseLower("weak", [&] { SI.Flags.push_back(SymbolFlag::Weak); }) - .CaseLower("default", - [&] { SI.Flags.push_back(SymbolFlag::Default); }) - .CaseLower("hidden", - [&] { SI.Flags.push_back(SymbolFlag::Hidden); }) - .CaseLower("protected", - [&] { SI.Flags.push_back(SymbolFlag::Protected); }) - .CaseLower("file", [&] { SI.Flags.push_back(SymbolFlag::File); }) - .CaseLower("section", - [&] { SI.Flags.push_back(SymbolFlag::Section); }) - .CaseLower("object", - [&] { SI.Flags.push_back(SymbolFlag::Object); }) - .CaseLower("function", - [&] { SI.Flags.push_back(SymbolFlag::Function); }) - .CaseLower( - "indirect-function", - [&] { SI.Flags.push_back(SymbolFlag::IndirectFunction); }) - .CaseLower("debug", [&] { SI.Flags.push_back(SymbolFlag::Debug); }) - .CaseLower("constructor", - [&] { SI.Flags.push_back(SymbolFlag::Constructor); }) - .CaseLower("warning", - [&] { SI.Flags.push_back(SymbolFlag::Warning); }) - .CaseLower("indirect", - [&] { SI.Flags.push_back(SymbolFlag::Indirect); }) - .CaseLower("synthetic", - [&] { SI.Flags.push_back(SymbolFlag::Synthetic); }) - .CaseLower("unique-object", - [&] { SI.Flags.push_back(SymbolFlag::UniqueObject); }) - .StartsWithLower("before=", - [&] { - StringRef SymNamePart = - Flags[I].split('=').second; - - if (!SymNamePart.empty()) - SI.BeforeSyms.push_back(SymNamePart); - }) - .Default([&] { UnsupportedFlags.push_back(Flags[I]); }))(); - if (!UnsupportedFlags.empty()) - return createStringError(errc::invalid_argument, - "unsupported flag%s for --add-symbol: '%s'", - UnsupportedFlags.size() > 1 ? "s" : "", - join(UnsupportedFlags, "', '").c_str()); - - return SI; -} - -Expected ConfigManager::getELFConfig() const { - return ELF; -} - -Expected ConfigManager::getCOFFConfig() const { - if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() || - !Common.AllocSectionsPrefix.empty() || !Common.DumpSection.empty() || - !Common.KeepSection.empty() || !Common.SymbolsToGlobalize.empty() || - !Common.SymbolsToKeep.empty() || !Common.SymbolsToLocalize.empty() || - !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() || - !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || - Common.ExtractDWO || Common.PreserveDates || Common.StripDWO || - Common.StripNonAlloc || Common.StripSections || Common.Weaken || - Common.DecompressDebugSections || - Common.DiscardMode == DiscardType::Locals || - !Common.SymbolsToAdd.empty()) { - return createStringError(llvm::errc::invalid_argument, - "option not supported by llvm-objcopy for COFF"); - } - - return COFF; -} - -Expected ConfigManager::getMachOConfig() const { - if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() || - !Common.AllocSectionsPrefix.empty() || !Common.KeepSection.empty() || - !Common.SymbolsToGlobalize.empty() || !Common.SymbolsToKeep.empty() || - !Common.SymbolsToLocalize.empty() || !Common.SymbolsToWeaken.empty() || - !Common.SymbolsToKeepGlobal.empty() || !Common.SectionsToRename.empty() || - !Common.UnneededSymbolsToRemove.empty() || - !Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() || - Common.ExtractDWO || Common.PreserveDates || Common.StripAllGNU || - Common.StripDWO || Common.StripNonAlloc || Common.StripSections || - Common.Weaken || Common.DecompressDebugSections || Common.StripUnneeded || - Common.DiscardMode == DiscardType::Locals || - !Common.SymbolsToAdd.empty()) { - return createStringError(llvm::errc::invalid_argument, - "option not supported by llvm-objcopy for MachO"); - } - - return MachO; -} - -Expected ConfigManager::getWasmConfig() const { - if (!Common.AddGnuDebugLink.empty() || Common.ExtractPartition || - !Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() || - !Common.AllocSectionsPrefix.empty() || - Common.DiscardMode != DiscardType::None || !Common.SymbolsToAdd.empty() || - !Common.SymbolsToGlobalize.empty() || !Common.SymbolsToLocalize.empty() || - !Common.SymbolsToKeep.empty() || !Common.SymbolsToRemove.empty() || - !Common.UnneededSymbolsToRemove.empty() || - !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() || - !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || - !Common.SetSectionFlags.empty() || !Common.SymbolsToRename.empty()) { - return createStringError( - llvm::errc::invalid_argument, - "only flags for section dumping, removal, and addition are supported"); - } - - return Wasm; -} - -// ParseObjcopyOptions returns the config and sets the input arguments. If a -// help flag is set then ParseObjcopyOptions will print the help messege and -// exit. -Expected -objcopy::parseObjcopyOptions(ArrayRef RawArgsArr, - function_ref ErrorCallback) { - DriverConfig DC; - ObjcopyOptTable T; - - const char *const *DashDash = - std::find_if(RawArgsArr.begin(), RawArgsArr.end(), - [](StringRef Str) { return Str == "--"; }); - ArrayRef ArgsArr = makeArrayRef(RawArgsArr.begin(), DashDash); - if (DashDash != RawArgsArr.end()) - DashDash = std::next(DashDash); - - unsigned MissingArgumentIndex, MissingArgumentCount; - llvm::opt::InputArgList InputArgs = - T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); - - if (InputArgs.size() == 0 && DashDash == RawArgsArr.end()) { - printHelp(T, errs(), ToolType::Objcopy); - exit(1); - } - - if (InputArgs.hasArg(OBJCOPY_help)) { - printHelp(T, outs(), ToolType::Objcopy); - exit(0); - } - - if (InputArgs.hasArg(OBJCOPY_version)) { - outs() << "llvm-objcopy, compatible with GNU objcopy\n"; - cl::PrintVersionMessage(); - exit(0); - } - - SmallVector Positional; - - for (auto Arg : InputArgs.filtered(OBJCOPY_UNKNOWN)) - return createStringError(errc::invalid_argument, "unknown argument '%s'", - Arg->getAsString(InputArgs).c_str()); - - for (auto Arg : InputArgs.filtered(OBJCOPY_INPUT)) - Positional.push_back(Arg->getValue()); - std::copy(DashDash, RawArgsArr.end(), std::back_inserter(Positional)); - - if (Positional.empty()) - return createStringError(errc::invalid_argument, "no input file specified"); - - if (Positional.size() > 2) - return createStringError(errc::invalid_argument, - "too many positional arguments"); - - ConfigManager ConfigMgr; - CommonConfig &Config = ConfigMgr.Common; - COFFConfig &COFFConfig = ConfigMgr.COFF; - ELFConfig &ELFConfig = ConfigMgr.ELF; - MachOConfig &MachOConfig = ConfigMgr.MachO; - Config.InputFilename = Positional[0]; - Config.OutputFilename = Positional[Positional.size() == 1 ? 0 : 1]; - if (InputArgs.hasArg(OBJCOPY_target) && - (InputArgs.hasArg(OBJCOPY_input_target) || - InputArgs.hasArg(OBJCOPY_output_target))) - return createStringError( - errc::invalid_argument, - "--target cannot be used with --input-target or --output-target"); - - if (InputArgs.hasArg(OBJCOPY_regex) && InputArgs.hasArg(OBJCOPY_wildcard)) - return createStringError(errc::invalid_argument, - "--regex and --wildcard are incompatible"); - - MatchStyle SectionMatchStyle = InputArgs.hasArg(OBJCOPY_regex) - ? MatchStyle::Regex - : MatchStyle::Wildcard; - MatchStyle SymbolMatchStyle = InputArgs.hasArg(OBJCOPY_regex) - ? MatchStyle::Regex - : InputArgs.hasArg(OBJCOPY_wildcard) - ? MatchStyle::Wildcard - : MatchStyle::Literal; - StringRef InputFormat, OutputFormat; - if (InputArgs.hasArg(OBJCOPY_target)) { - InputFormat = InputArgs.getLastArgValue(OBJCOPY_target); - OutputFormat = InputArgs.getLastArgValue(OBJCOPY_target); - } else { - InputFormat = InputArgs.getLastArgValue(OBJCOPY_input_target); - OutputFormat = InputArgs.getLastArgValue(OBJCOPY_output_target); - } - - // FIXME: Currently, we ignore the target for non-binary/ihex formats - // explicitly specified by -I option (e.g. -Ielf32-x86-64) and guess the - // format by llvm::object::createBinary regardless of the option value. - Config.InputFormat = StringSwitch(InputFormat) - .Case("binary", FileFormat::Binary) - .Case("ihex", FileFormat::IHex) - .Default(FileFormat::Unspecified); - - if (InputArgs.hasArg(OBJCOPY_new_symbol_visibility)) { - const uint8_t Invalid = 0xff; - StringRef VisibilityStr = - InputArgs.getLastArgValue(OBJCOPY_new_symbol_visibility); - - ELFConfig.NewSymbolVisibility = StringSwitch(VisibilityStr) - .Case("default", ELF::STV_DEFAULT) - .Case("hidden", ELF::STV_HIDDEN) - .Case("internal", ELF::STV_INTERNAL) - .Case("protected", ELF::STV_PROTECTED) - .Default(Invalid); - - if (ELFConfig.NewSymbolVisibility == Invalid) - return createStringError(errc::invalid_argument, - "'%s' is not a valid symbol visibility", - VisibilityStr.str().c_str()); - } - - for (const auto *Arg : InputArgs.filtered(OBJCOPY_subsystem)) { - StringRef Subsystem, Version; - std::tie(Subsystem, Version) = StringRef(Arg->getValue()).split(':'); - COFFConfig.Subsystem = - StringSwitch(Subsystem.lower()) - .Case("boot_application", - COFF::IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION) - .Case("console", COFF::IMAGE_SUBSYSTEM_WINDOWS_CUI) - .Case("efi_application", COFF::IMAGE_SUBSYSTEM_EFI_APPLICATION) - .Case("efi_boot_service_driver", - COFF::IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER) - .Case("efi_rom", COFF::IMAGE_SUBSYSTEM_EFI_ROM) - .Case("efi_runtime_driver", - COFF::IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER) - .Case("native", COFF::IMAGE_SUBSYSTEM_NATIVE) - .Case("posix", COFF::IMAGE_SUBSYSTEM_POSIX_CUI) - .Case("windows", COFF::IMAGE_SUBSYSTEM_WINDOWS_GUI) - .Default(COFF::IMAGE_SUBSYSTEM_UNKNOWN); - if (*COFFConfig.Subsystem == COFF::IMAGE_SUBSYSTEM_UNKNOWN) - return createStringError(errc::invalid_argument, - "'%s' is not a valid subsystem", - Subsystem.str().c_str()); - if (!Version.empty()) { - StringRef Major, Minor; - std::tie(Major, Minor) = Version.split('.'); - unsigned Number; - if (Major.getAsInteger(10, Number)) - return createStringError(errc::invalid_argument, - "'%s' is not a valid subsystem major version", - Major.str().c_str()); - COFFConfig.MajorSubsystemVersion = Number; - Number = 0; - if (!Minor.empty() && Minor.getAsInteger(10, Number)) - return createStringError(errc::invalid_argument, - "'%s' is not a valid subsystem minor version", - Minor.str().c_str()); - COFFConfig.MinorSubsystemVersion = Number; - } - } - - Config.OutputFormat = StringSwitch(OutputFormat) - .Case("binary", FileFormat::Binary) - .Case("ihex", FileFormat::IHex) - .Default(FileFormat::Unspecified); - if (Config.OutputFormat == FileFormat::Unspecified) { - if (OutputFormat.empty()) { - Config.OutputFormat = Config.InputFormat; - } else { - Expected Target = - getOutputTargetInfoByTargetName(OutputFormat); - if (!Target) - return Target.takeError(); - Config.OutputFormat = Target->Format; - Config.OutputArch = Target->Machine; - } - } - - if (auto Arg = InputArgs.getLastArg(OBJCOPY_compress_debug_sections, - OBJCOPY_compress_debug_sections_eq)) { - Config.CompressionType = DebugCompressionType::Z; - - if (Arg->getOption().getID() == OBJCOPY_compress_debug_sections_eq) { - Config.CompressionType = - StringSwitch( - InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections_eq)) - .Case("zlib-gnu", DebugCompressionType::GNU) - .Case("zlib", DebugCompressionType::Z) - .Default(DebugCompressionType::None); - if (Config.CompressionType == DebugCompressionType::None) - return createStringError( - errc::invalid_argument, - "invalid or unsupported --compress-debug-sections format: %s", - InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections_eq) - .str() - .c_str()); - } - if (!zlib::isAvailable()) - return createStringError( - errc::invalid_argument, - "LLVM was not compiled with LLVM_ENABLE_ZLIB: can not compress"); - } - - Config.AddGnuDebugLink = InputArgs.getLastArgValue(OBJCOPY_add_gnu_debuglink); - // The gnu_debuglink's target is expected to not change or else its CRC would - // become invalidated and get rejected. We can avoid recalculating the - // checksum for every target file inside an archive by precomputing the CRC - // here. This prevents a significant amount of I/O. - if (!Config.AddGnuDebugLink.empty()) { - auto DebugOrErr = MemoryBuffer::getFile(Config.AddGnuDebugLink); - if (!DebugOrErr) - return createFileError(Config.AddGnuDebugLink, DebugOrErr.getError()); - auto Debug = std::move(*DebugOrErr); - Config.GnuDebugLinkCRC32 = - llvm::crc32(arrayRefFromStringRef(Debug->getBuffer())); - } - Config.SplitDWO = InputArgs.getLastArgValue(OBJCOPY_split_dwo); - Config.SymbolsPrefix = InputArgs.getLastArgValue(OBJCOPY_prefix_symbols); - Config.AllocSectionsPrefix = - InputArgs.getLastArgValue(OBJCOPY_prefix_alloc_sections); - if (auto Arg = InputArgs.getLastArg(OBJCOPY_extract_partition)) - Config.ExtractPartition = Arg->getValue(); - - for (auto Arg : InputArgs.filtered(OBJCOPY_redefine_symbol)) { - if (!StringRef(Arg->getValue()).contains('=')) - return createStringError(errc::invalid_argument, - "bad format for --redefine-sym"); - auto Old2New = StringRef(Arg->getValue()).split('='); - if (!Config.SymbolsToRename.insert(Old2New).second) - return createStringError(errc::invalid_argument, - "multiple redefinition of symbol '%s'", - Old2New.first.str().c_str()); - } - - for (auto Arg : InputArgs.filtered(OBJCOPY_redefine_symbols)) - if (Error E = addSymbolsToRenameFromFile(Config.SymbolsToRename, DC.Alloc, - Arg->getValue())) - return std::move(E); - - for (auto Arg : InputArgs.filtered(OBJCOPY_rename_section)) { - Expected SR = - parseRenameSectionValue(StringRef(Arg->getValue())); - if (!SR) - return SR.takeError(); - if (!Config.SectionsToRename.try_emplace(SR->OriginalName, *SR).second) - return createStringError(errc::invalid_argument, - "multiple renames of section '%s'", - SR->OriginalName.str().c_str()); - } - for (auto Arg : InputArgs.filtered(OBJCOPY_set_section_alignment)) { - Expected> NameAndAlign = - parseSetSectionAlignment(Arg->getValue()); - if (!NameAndAlign) - return NameAndAlign.takeError(); - Config.SetSectionAlignment[NameAndAlign->first] = NameAndAlign->second; - } - for (auto Arg : InputArgs.filtered(OBJCOPY_set_section_flags)) { - Expected SFU = - parseSetSectionFlagValue(Arg->getValue()); - if (!SFU) - return SFU.takeError(); - if (!Config.SetSectionFlags.try_emplace(SFU->Name, *SFU).second) - return createStringError( - errc::invalid_argument, - "--set-section-flags set multiple times for section '%s'", - SFU->Name.str().c_str()); - } - // Prohibit combinations of --set-section-flags when the section name is used - // by --rename-section, either as a source or a destination. - for (const auto &E : Config.SectionsToRename) { - const SectionRename &SR = E.second; - if (Config.SetSectionFlags.count(SR.OriginalName)) - return createStringError( - errc::invalid_argument, - "--set-section-flags=%s conflicts with --rename-section=%s=%s", - SR.OriginalName.str().c_str(), SR.OriginalName.str().c_str(), - SR.NewName.str().c_str()); - if (Config.SetSectionFlags.count(SR.NewName)) - return createStringError( - errc::invalid_argument, - "--set-section-flags=%s conflicts with --rename-section=%s=%s", - SR.NewName.str().c_str(), SR.OriginalName.str().c_str(), - SR.NewName.str().c_str()); - } - - for (auto Arg : InputArgs.filtered(OBJCOPY_remove_section)) - if (Error E = Config.ToRemove.addMatcher(NameOrPattern::create( - Arg->getValue(), SectionMatchStyle, ErrorCallback))) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_keep_section)) - if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create( - Arg->getValue(), SectionMatchStyle, ErrorCallback))) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_only_section)) - if (Error E = Config.OnlySection.addMatcher(NameOrPattern::create( - Arg->getValue(), SectionMatchStyle, ErrorCallback))) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_add_section)) { - StringRef ArgValue(Arg->getValue()); - if (!ArgValue.contains('=')) - return createStringError(errc::invalid_argument, - "bad format for --add-section: missing '='"); - if (ArgValue.split("=").second.empty()) - return createStringError( - errc::invalid_argument, - "bad format for --add-section: missing file name"); - Config.AddSection.push_back(ArgValue); - } - for (auto Arg : InputArgs.filtered(OBJCOPY_update_section)) { - StringRef ArgValue(Arg->getValue()); - if (!ArgValue.contains('=')) - return createStringError(errc::invalid_argument, - "bad format for --update-section: missing '='"); - if (ArgValue.split("=").second.empty()) - return createStringError( - errc::invalid_argument, - "bad format for --update-section: missing file name"); - Config.UpdateSection.push_back(ArgValue); - } - for (auto *Arg : InputArgs.filtered(OBJCOPY_dump_section)) { - StringRef Value(Arg->getValue()); - if (Value.split('=').second.empty()) - return createStringError( - errc::invalid_argument, - "bad format for --dump-section, expected section=file"); - Config.DumpSection.push_back(Value); - } - Config.StripAll = InputArgs.hasArg(OBJCOPY_strip_all); - Config.StripAllGNU = InputArgs.hasArg(OBJCOPY_strip_all_gnu); - Config.StripDebug = InputArgs.hasArg(OBJCOPY_strip_debug); - Config.StripDWO = InputArgs.hasArg(OBJCOPY_strip_dwo); - Config.StripSections = InputArgs.hasArg(OBJCOPY_strip_sections); - Config.StripNonAlloc = InputArgs.hasArg(OBJCOPY_strip_non_alloc); - Config.StripUnneeded = InputArgs.hasArg(OBJCOPY_strip_unneeded); - Config.ExtractDWO = InputArgs.hasArg(OBJCOPY_extract_dwo); - Config.ExtractMainPartition = - InputArgs.hasArg(OBJCOPY_extract_main_partition); - ELFConfig.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden); - Config.Weaken = InputArgs.hasArg(OBJCOPY_weaken); - if (InputArgs.hasArg(OBJCOPY_discard_all, OBJCOPY_discard_locals)) - Config.DiscardMode = - InputArgs.hasFlag(OBJCOPY_discard_all, OBJCOPY_discard_locals) - ? DiscardType::All - : DiscardType::Locals; - Config.OnlyKeepDebug = InputArgs.hasArg(OBJCOPY_only_keep_debug); - ELFConfig.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols); - MachOConfig.KeepUndefined = InputArgs.hasArg(OBJCOPY_keep_undefined); - Config.DecompressDebugSections = - InputArgs.hasArg(OBJCOPY_decompress_debug_sections); - if (Config.DiscardMode == DiscardType::All) { - Config.StripDebug = true; - ELFConfig.KeepFileSymbols = true; - } - for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol)) - if (Error E = Config.SymbolsToLocalize.addMatcher(NameOrPattern::create( - Arg->getValue(), SymbolMatchStyle, ErrorCallback))) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbols)) - if (Error E = addSymbolsFromFile(Config.SymbolsToLocalize, DC.Alloc, - Arg->getValue(), SymbolMatchStyle, - ErrorCallback)) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbol)) - if (Error E = Config.SymbolsToKeepGlobal.addMatcher(NameOrPattern::create( - Arg->getValue(), SymbolMatchStyle, ErrorCallback))) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbols)) - if (Error E = addSymbolsFromFile(Config.SymbolsToKeepGlobal, DC.Alloc, - Arg->getValue(), SymbolMatchStyle, - ErrorCallback)) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbol)) - if (Error E = Config.SymbolsToGlobalize.addMatcher(NameOrPattern::create( - Arg->getValue(), SymbolMatchStyle, ErrorCallback))) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbols)) - if (Error E = addSymbolsFromFile(Config.SymbolsToGlobalize, DC.Alloc, - Arg->getValue(), SymbolMatchStyle, - ErrorCallback)) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbol)) - if (Error E = Config.SymbolsToWeaken.addMatcher(NameOrPattern::create( - Arg->getValue(), SymbolMatchStyle, ErrorCallback))) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbols)) - if (Error E = addSymbolsFromFile(Config.SymbolsToWeaken, DC.Alloc, - Arg->getValue(), SymbolMatchStyle, - ErrorCallback)) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbol)) - if (Error E = Config.SymbolsToRemove.addMatcher(NameOrPattern::create( - Arg->getValue(), SymbolMatchStyle, ErrorCallback))) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbols)) - if (Error E = addSymbolsFromFile(Config.SymbolsToRemove, DC.Alloc, - Arg->getValue(), SymbolMatchStyle, - ErrorCallback)) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbol)) - if (Error E = - Config.UnneededSymbolsToRemove.addMatcher(NameOrPattern::create( - Arg->getValue(), SymbolMatchStyle, ErrorCallback))) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbols)) - if (Error E = addSymbolsFromFile(Config.UnneededSymbolsToRemove, DC.Alloc, - Arg->getValue(), SymbolMatchStyle, - ErrorCallback)) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbol)) - if (Error E = Config.SymbolsToKeep.addMatcher(NameOrPattern::create( - Arg->getValue(), SymbolMatchStyle, ErrorCallback))) - return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbols)) - if (Error E = - addSymbolsFromFile(Config.SymbolsToKeep, DC.Alloc, Arg->getValue(), - SymbolMatchStyle, ErrorCallback)) - return std::move(E); - for (auto *Arg : InputArgs.filtered(OBJCOPY_add_symbol)) { - Expected SymInfo = parseNewSymbolInfo(Arg->getValue()); - if (!SymInfo) - return SymInfo.takeError(); - - Config.SymbolsToAdd.push_back(*SymInfo); - } - - ELFConfig.AllowBrokenLinks = InputArgs.hasArg(OBJCOPY_allow_broken_links); - - Config.DeterministicArchives = InputArgs.hasFlag( - OBJCOPY_enable_deterministic_archives, - OBJCOPY_disable_deterministic_archives, /*default=*/true); - - Config.PreserveDates = InputArgs.hasArg(OBJCOPY_preserve_dates); - - if (Config.PreserveDates && - (Config.OutputFilename == "-" || Config.InputFilename == "-")) - return createStringError(errc::invalid_argument, - "--preserve-dates requires a file"); - - for (auto Arg : InputArgs) - if (Arg->getOption().matches(OBJCOPY_set_start)) { - auto EAddr = getAsInteger(Arg->getValue()); - if (!EAddr) - return createStringError( - EAddr.getError(), "bad entry point address: '%s'", Arg->getValue()); - - ELFConfig.EntryExpr = [EAddr](uint64_t) { return *EAddr; }; - } else if (Arg->getOption().matches(OBJCOPY_change_start)) { - auto EIncr = getAsInteger(Arg->getValue()); - if (!EIncr) - return createStringError(EIncr.getError(), - "bad entry point increment: '%s'", - Arg->getValue()); - auto Expr = ELFConfig.EntryExpr ? std::move(ELFConfig.EntryExpr) - : [](uint64_t A) { return A; }; - ELFConfig.EntryExpr = [Expr, EIncr](uint64_t EAddr) { - return Expr(EAddr) + *EIncr; - }; - } - - if (Config.DecompressDebugSections && - Config.CompressionType != DebugCompressionType::None) { - return createStringError( - errc::invalid_argument, - "cannot specify both --compress-debug-sections and " - "--decompress-debug-sections"); - } - - if (Config.DecompressDebugSections && !zlib::isAvailable()) - return createStringError( - errc::invalid_argument, - "LLVM was not compiled with LLVM_ENABLE_ZLIB: cannot decompress"); - - if (Config.ExtractPartition && Config.ExtractMainPartition) - return createStringError(errc::invalid_argument, - "cannot specify --extract-partition together with " - "--extract-main-partition"); - - DC.CopyConfigs.push_back(std::move(ConfigMgr)); - return std::move(DC); -} - -// ParseInstallNameToolOptions returns the config and sets the input arguments. -// If a help flag is set then ParseInstallNameToolOptions will print the help -// messege and exit. -Expected -objcopy::parseInstallNameToolOptions(ArrayRef ArgsArr) { - DriverConfig DC; - ConfigManager ConfigMgr; - CommonConfig &Config = ConfigMgr.Common; - MachOConfig &MachOConfig = ConfigMgr.MachO; - InstallNameToolOptTable T; - unsigned MissingArgumentIndex, MissingArgumentCount; - llvm::opt::InputArgList InputArgs = - T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); - - if (MissingArgumentCount) - return createStringError( - errc::invalid_argument, - "missing argument to " + - StringRef(InputArgs.getArgString(MissingArgumentIndex)) + - " option"); - - if (InputArgs.size() == 0) { - printHelp(T, errs(), ToolType::InstallNameTool); - exit(1); - } - - if (InputArgs.hasArg(INSTALL_NAME_TOOL_help)) { - printHelp(T, outs(), ToolType::InstallNameTool); - exit(0); - } - - if (InputArgs.hasArg(INSTALL_NAME_TOOL_version)) { - outs() << "llvm-install-name-tool, compatible with cctools " - "install_name_tool\n"; - cl::PrintVersionMessage(); - exit(0); - } - - for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_add_rpath)) - MachOConfig.RPathToAdd.push_back(Arg->getValue()); - - for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_prepend_rpath)) - MachOConfig.RPathToPrepend.push_back(Arg->getValue()); - - for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_delete_rpath)) { - StringRef RPath = Arg->getValue(); - - // Cannot add and delete the same rpath at the same time. - if (is_contained(MachOConfig.RPathToAdd, RPath)) - return createStringError( - errc::invalid_argument, - "cannot specify both -add_rpath '%s' and -delete_rpath '%s'", - RPath.str().c_str(), RPath.str().c_str()); - if (is_contained(MachOConfig.RPathToPrepend, RPath)) - return createStringError( - errc::invalid_argument, - "cannot specify both -prepend_rpath '%s' and -delete_rpath '%s'", - RPath.str().c_str(), RPath.str().c_str()); - - MachOConfig.RPathsToRemove.insert(RPath); - } - - for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_rpath)) { - StringRef Old = Arg->getValue(0); - StringRef New = Arg->getValue(1); - - auto Match = [=](StringRef RPath) { return RPath == Old || RPath == New; }; - - // Cannot specify duplicate -rpath entries - auto It1 = find_if( - MachOConfig.RPathsToUpdate, - [&Match](const DenseMap::value_type &OldNew) { - return Match(OldNew.getFirst()) || Match(OldNew.getSecond()); - }); - if (It1 != MachOConfig.RPathsToUpdate.end()) - return createStringError(errc::invalid_argument, - "cannot specify both -rpath '" + - It1->getFirst() + "' '" + It1->getSecond() + - "' and -rpath '" + Old + "' '" + New + "'"); - - // Cannot specify the same rpath under both -delete_rpath and -rpath - auto It2 = find_if(MachOConfig.RPathsToRemove, Match); - if (It2 != MachOConfig.RPathsToRemove.end()) - return createStringError(errc::invalid_argument, - "cannot specify both -delete_rpath '" + *It2 + - "' and -rpath '" + Old + "' '" + New + "'"); - - // Cannot specify the same rpath under both -add_rpath and -rpath - auto It3 = find_if(MachOConfig.RPathToAdd, Match); - if (It3 != MachOConfig.RPathToAdd.end()) - return createStringError(errc::invalid_argument, - "cannot specify both -add_rpath '" + *It3 + - "' and -rpath '" + Old + "' '" + New + "'"); - - // Cannot specify the same rpath under both -prepend_rpath and -rpath. - auto It4 = find_if(MachOConfig.RPathToPrepend, Match); - if (It4 != MachOConfig.RPathToPrepend.end()) - return createStringError(errc::invalid_argument, - "cannot specify both -prepend_rpath '" + *It4 + - "' and -rpath '" + Old + "' '" + New + "'"); - - MachOConfig.RPathsToUpdate.insert({Old, New}); - } - - if (auto *Arg = InputArgs.getLastArg(INSTALL_NAME_TOOL_id)) { - MachOConfig.SharedLibId = Arg->getValue(); - if (MachOConfig.SharedLibId->empty()) - return createStringError(errc::invalid_argument, - "cannot specify an empty id"); - } - - for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_change)) - MachOConfig.InstallNamesToUpdate.insert( - {Arg->getValue(0), Arg->getValue(1)}); - - MachOConfig.RemoveAllRpaths = - InputArgs.hasArg(INSTALL_NAME_TOOL_delete_all_rpaths); - - SmallVector Positional; - for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_UNKNOWN)) - return createStringError(errc::invalid_argument, "unknown argument '%s'", - Arg->getAsString(InputArgs).c_str()); - for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_INPUT)) - Positional.push_back(Arg->getValue()); - if (Positional.empty()) - return createStringError(errc::invalid_argument, "no input file specified"); - if (Positional.size() > 1) - return createStringError( - errc::invalid_argument, - "llvm-install-name-tool expects a single input file"); - Config.InputFilename = Positional[0]; - Config.OutputFilename = Positional[0]; - - DC.CopyConfigs.push_back(std::move(ConfigMgr)); - return std::move(DC); -} - -Expected -objcopy::parseBitcodeStripOptions(ArrayRef ArgsArr) { - DriverConfig DC; - ConfigManager ConfigMgr; - CommonConfig &Config = ConfigMgr.Common; - BitcodeStripOptTable T; - unsigned MissingArgumentIndex, MissingArgumentCount; - opt::InputArgList InputArgs = - T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); - - if (InputArgs.size() == 0) { - printHelp(T, errs(), ToolType::BitcodeStrip); - exit(1); - } - - if (InputArgs.hasArg(BITCODE_STRIP_help)) { - printHelp(T, outs(), ToolType::BitcodeStrip); - exit(0); - } - - if (InputArgs.hasArg(BITCODE_STRIP_version)) { - outs() << "llvm-bitcode-strip, compatible with cctools " - "bitcode_strip\n"; - cl::PrintVersionMessage(); - exit(0); - } - - for (auto *Arg : InputArgs.filtered(BITCODE_STRIP_UNKNOWN)) - return createStringError(errc::invalid_argument, "unknown argument '%s'", - Arg->getAsString(InputArgs).c_str()); - - SmallVector Positional; - for (auto *Arg : InputArgs.filtered(BITCODE_STRIP_INPUT)) - Positional.push_back(Arg->getValue()); - if (Positional.size() > 1) - return createStringError(errc::invalid_argument, - "llvm-bitcode-strip expects a single input file"); - assert(!Positional.empty()); - Config.InputFilename = Positional[0]; - Config.OutputFilename = Positional[0]; - - DC.CopyConfigs.push_back(std::move(ConfigMgr)); - return std::move(DC); -} - -// ParseStripOptions returns the config and sets the input arguments. If a -// help flag is set then ParseStripOptions will print the help messege and -// exit. -Expected -objcopy::parseStripOptions(ArrayRef RawArgsArr, - function_ref ErrorCallback) { - const char *const *DashDash = - std::find_if(RawArgsArr.begin(), RawArgsArr.end(), - [](StringRef Str) { return Str == "--"; }); - ArrayRef ArgsArr = makeArrayRef(RawArgsArr.begin(), DashDash); - if (DashDash != RawArgsArr.end()) - DashDash = std::next(DashDash); - - StripOptTable T; - unsigned MissingArgumentIndex, MissingArgumentCount; - llvm::opt::InputArgList InputArgs = - T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); - - if (InputArgs.size() == 0 && DashDash == RawArgsArr.end()) { - printHelp(T, errs(), ToolType::Strip); - exit(1); - } - - if (InputArgs.hasArg(STRIP_help)) { - printHelp(T, outs(), ToolType::Strip); - exit(0); - } - - if (InputArgs.hasArg(STRIP_version)) { - outs() << "llvm-strip, compatible with GNU strip\n"; - cl::PrintVersionMessage(); - exit(0); - } - - SmallVector Positional; - for (auto Arg : InputArgs.filtered(STRIP_UNKNOWN)) - return createStringError(errc::invalid_argument, "unknown argument '%s'", - Arg->getAsString(InputArgs).c_str()); - for (auto Arg : InputArgs.filtered(STRIP_INPUT)) - Positional.push_back(Arg->getValue()); - std::copy(DashDash, RawArgsArr.end(), std::back_inserter(Positional)); - - if (Positional.empty()) - return createStringError(errc::invalid_argument, "no input file specified"); - - if (Positional.size() > 1 && InputArgs.hasArg(STRIP_output)) - return createStringError( - errc::invalid_argument, - "multiple input files cannot be used in combination with -o"); - - ConfigManager ConfigMgr; - CommonConfig &Config = ConfigMgr.Common; - ELFConfig &ELFConfig = ConfigMgr.ELF; - MachOConfig &MachOConfig = ConfigMgr.MachO; - - if (InputArgs.hasArg(STRIP_regex) && InputArgs.hasArg(STRIP_wildcard)) - return createStringError(errc::invalid_argument, - "--regex and --wildcard are incompatible"); - MatchStyle SectionMatchStyle = - InputArgs.hasArg(STRIP_regex) ? MatchStyle::Regex : MatchStyle::Wildcard; - MatchStyle SymbolMatchStyle = InputArgs.hasArg(STRIP_regex) - ? MatchStyle::Regex - : InputArgs.hasArg(STRIP_wildcard) - ? MatchStyle::Wildcard - : MatchStyle::Literal; - ELFConfig.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links); - Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug); - - if (InputArgs.hasArg(STRIP_discard_all, STRIP_discard_locals)) - Config.DiscardMode = - InputArgs.hasFlag(STRIP_discard_all, STRIP_discard_locals) - ? DiscardType::All - : DiscardType::Locals; - Config.StripSections = InputArgs.hasArg(STRIP_strip_sections); - Config.StripUnneeded = InputArgs.hasArg(STRIP_strip_unneeded); - if (auto Arg = InputArgs.getLastArg(STRIP_strip_all, STRIP_no_strip_all)) - Config.StripAll = Arg->getOption().getID() == STRIP_strip_all; - Config.StripAllGNU = InputArgs.hasArg(STRIP_strip_all_gnu); - MachOConfig.StripSwiftSymbols = InputArgs.hasArg(STRIP_strip_swift_symbols); - Config.OnlyKeepDebug = InputArgs.hasArg(STRIP_only_keep_debug); - ELFConfig.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols); - MachOConfig.KeepUndefined = InputArgs.hasArg(STRIP_keep_undefined); - - for (auto Arg : InputArgs.filtered(STRIP_keep_section)) - if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create( - Arg->getValue(), SectionMatchStyle, ErrorCallback))) - return std::move(E); - - for (auto Arg : InputArgs.filtered(STRIP_remove_section)) - if (Error E = Config.ToRemove.addMatcher(NameOrPattern::create( - Arg->getValue(), SectionMatchStyle, ErrorCallback))) - return std::move(E); - - for (auto Arg : InputArgs.filtered(STRIP_strip_symbol)) - if (Error E = Config.SymbolsToRemove.addMatcher(NameOrPattern::create( - Arg->getValue(), SymbolMatchStyle, ErrorCallback))) - return std::move(E); - - for (auto Arg : InputArgs.filtered(STRIP_keep_symbol)) - if (Error E = Config.SymbolsToKeep.addMatcher(NameOrPattern::create( - Arg->getValue(), SymbolMatchStyle, ErrorCallback))) - return std::move(E); - - if (!InputArgs.hasArg(STRIP_no_strip_all) && !Config.StripDebug && - !Config.StripUnneeded && Config.DiscardMode == DiscardType::None && - !Config.StripAllGNU && Config.SymbolsToRemove.empty()) - Config.StripAll = true; - - if (Config.DiscardMode == DiscardType::All) { - Config.StripDebug = true; - ELFConfig.KeepFileSymbols = true; - } - - Config.DeterministicArchives = - InputArgs.hasFlag(STRIP_enable_deterministic_archives, - STRIP_disable_deterministic_archives, /*default=*/true); - - Config.PreserveDates = InputArgs.hasArg(STRIP_preserve_dates); - Config.InputFormat = FileFormat::Unspecified; - Config.OutputFormat = FileFormat::Unspecified; - - DriverConfig DC; - if (Positional.size() == 1) { - Config.InputFilename = Positional[0]; - Config.OutputFilename = - InputArgs.getLastArgValue(STRIP_output, Positional[0]); - DC.CopyConfigs.push_back(std::move(ConfigMgr)); - } else { - StringMap InputFiles; - for (StringRef Filename : Positional) { - if (InputFiles[Filename]++ == 1) { - if (Filename == "-") - return createStringError( - errc::invalid_argument, - "cannot specify '-' as an input file more than once"); - if (Error E = ErrorCallback(createStringError( - errc::invalid_argument, "'%s' was already specified", - Filename.str().c_str()))) - return std::move(E); - } - Config.InputFilename = Filename; - Config.OutputFilename = Filename; - DC.CopyConfigs.push_back(ConfigMgr); - } - } - - if (Config.PreserveDates && (is_contained(Positional, "-") || - InputArgs.getLastArgValue(STRIP_output) == "-")) - return createStringError(errc::invalid_argument, - "--preserve-dates requires a file"); - - return std::move(DC); -} diff --git a/llvm/tools/llvm-objcopy/ConfigManager.h b/llvm/tools/llvm-objcopy/ConfigManager.h deleted file mode 100644 index c0d0e8bbc721..000000000000 --- a/llvm/tools/llvm-objcopy/ConfigManager.h +++ /dev/null @@ -1,80 +0,0 @@ -//===- ConfigManager.h ----------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_OBJCOPY_CONFIGMANAGER_H -#define LLVM_TOOLS_LLVM_OBJCOPY_CONFIGMANAGER_H - -#include "COFF/COFFConfig.h" -#include "CommonConfig.h" -#include "ELF/ELFConfig.h" -#include "MachO/MachOConfig.h" -#include "MultiFormatConfig.h" -#include "wasm/WasmConfig.h" -#include "llvm/Support/Allocator.h" -#include - -namespace llvm { -namespace objcopy { - -// ConfigManager keeps all configurations and prepare -// format-specific options. -struct ConfigManager : public MultiFormatConfig { - virtual ~ConfigManager() {} - - const CommonConfig &getCommonConfig() const override { return Common; } - Expected getELFConfig() const override; - Expected getCOFFConfig() const override; - Expected getMachOConfig() const override; - Expected getWasmConfig() const override; - - // All configs. - CommonConfig Common; - ELFConfig ELF; - COFFConfig COFF; - MachOConfig MachO; - WasmConfig Wasm; -}; - -// Configuration for the overall invocation of this tool. When invoked as -// objcopy, will always contain exactly one CopyConfig. When invoked as strip, -// will contain one or more CopyConfigs. -struct DriverConfig { - SmallVector CopyConfigs; - BumpPtrAllocator Alloc; -}; - -// ParseObjcopyOptions returns the config and sets the input arguments. If a -// help flag is set then ParseObjcopyOptions will print the help messege and -// exit. ErrorCallback is used to handle recoverable errors. An Error returned -// by the callback aborts the parsing and is then returned by this function. -Expected -parseObjcopyOptions(ArrayRef ArgsArr, - llvm::function_ref ErrorCallback); - -// ParseInstallNameToolOptions returns the config and sets the input arguments. -// If a help flag is set then ParseInstallNameToolOptions will print the help -// messege and exit. -Expected -parseInstallNameToolOptions(ArrayRef ArgsArr); - -// ParseBitcodeStripOptions returns the config and sets the input arguments. -// If a help flag is set then ParseBitcodeStripOptions will print the help -// messege and exit. -Expected parseBitcodeStripOptions(ArrayRef ArgsArr); - -// ParseStripOptions returns the config and sets the input arguments. If a -// help flag is set then ParseStripOptions will print the help messege and -// exit. ErrorCallback is used to handle recoverable errors. An Error returned -// by the callback aborts the parsing and is then returned by this function. -Expected -parseStripOptions(ArrayRef ArgsArr, - llvm::function_ref ErrorCallback); -} // namespace objcopy -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_OBJCOPY_CONFIGMANAGER_H diff --git a/llvm/tools/llvm-objcopy/ELF/ELFConfig.h b/llvm/tools/llvm-objcopy/ELF/ELFConfig.h deleted file mode 100644 index 229a8d61fb83..000000000000 --- a/llvm/tools/llvm-objcopy/ELF/ELFConfig.h +++ /dev/null @@ -1,38 +0,0 @@ -//===- ELFConfig.h ----------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_OBJCOPY_ELF_ELFCONFIG_H -#define LLVM_TOOLS_LLVM_OBJCOPY_ELF_ELFCONFIG_H - -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Object/ELFTypes.h" -#include - -namespace llvm { -namespace objcopy { - -// ELF specific configuration for copying/stripping a single file. -struct ELFConfig { - uint8_t NewSymbolVisibility = (uint8_t)ELF::STV_DEFAULT; - - // ELF entry point address expression. The input parameter is an entry point - // address in the input ELF file. The entry address in the output file is - // calculated with EntryExpr(input_address), when either --set-start or - // --change-start is used. - std::function EntryExpr; - - bool AllowBrokenLinks = false; - bool KeepFileSymbols = false; - bool LocalizeHidden = false; -}; - -} // namespace objcopy -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_OBJCOPY_ELF_ELFCONFIG_H diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp deleted file mode 100644 index f8521fa0d5b7..000000000000 --- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp +++ /dev/null @@ -1,833 +0,0 @@ -//===- ELFObjcopy.cpp -----------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ELFObjcopy.h" -#include "CommonConfig.h" -#include "ELFConfig.h" -#include "Object.h" -#include "llvm-objcopy.h" -#include "llvm/ADT/BitmaskEnum.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/ELF.h" -#include "llvm/MC/MCTargetOptions.h" -#include "llvm/Object/Binary.h" -#include "llvm/Object/ELFObjectFile.h" -#include "llvm/Object/ELFTypes.h" -#include "llvm/Object/Error.h" -#include "llvm/Option/Option.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Compression.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ErrorOr.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Memory.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace llvm; -using namespace llvm::ELF; -using namespace llvm::objcopy; -using namespace llvm::objcopy::elf; -using namespace llvm::object; - -using SectionPred = std::function; - -static bool isDebugSection(const SectionBase &Sec) { - return StringRef(Sec.Name).startswith(".debug") || - StringRef(Sec.Name).startswith(".zdebug") || Sec.Name == ".gdb_index"; -} - -static bool isDWOSection(const SectionBase &Sec) { - return StringRef(Sec.Name).endswith(".dwo"); -} - -static bool onlyKeepDWOPred(const Object &Obj, const SectionBase &Sec) { - // We can't remove the section header string table. - if (&Sec == Obj.SectionNames) - return false; - // Short of keeping the string table we want to keep everything that is a DWO - // section and remove everything else. - return !isDWOSection(Sec); -} - -static uint64_t getNewShfFlags(SectionFlag AllFlags) { - uint64_t NewFlags = 0; - if (AllFlags & SectionFlag::SecAlloc) - NewFlags |= ELF::SHF_ALLOC; - if (!(AllFlags & SectionFlag::SecReadonly)) - NewFlags |= ELF::SHF_WRITE; - if (AllFlags & SectionFlag::SecCode) - NewFlags |= ELF::SHF_EXECINSTR; - if (AllFlags & SectionFlag::SecMerge) - NewFlags |= ELF::SHF_MERGE; - if (AllFlags & SectionFlag::SecStrings) - NewFlags |= ELF::SHF_STRINGS; - if (AllFlags & SectionFlag::SecExclude) - NewFlags |= ELF::SHF_EXCLUDE; - return NewFlags; -} - -static uint64_t getSectionFlagsPreserveMask(uint64_t OldFlags, - uint64_t NewFlags) { - // Preserve some flags which should not be dropped when setting flags. - // Also, preserve anything OS/processor dependant. - const uint64_t PreserveMask = - (ELF::SHF_COMPRESSED | ELF::SHF_GROUP | ELF::SHF_LINK_ORDER | - ELF::SHF_MASKOS | ELF::SHF_MASKPROC | ELF::SHF_TLS | - ELF::SHF_INFO_LINK) & - ~ELF::SHF_EXCLUDE; - return (OldFlags & PreserveMask) | (NewFlags & ~PreserveMask); -} - -static void setSectionFlagsAndType(SectionBase &Sec, SectionFlag Flags) { - Sec.Flags = getSectionFlagsPreserveMask(Sec.Flags, getNewShfFlags(Flags)); - - // In GNU objcopy, certain flags promote SHT_NOBITS to SHT_PROGBITS. This rule - // may promote more non-ALLOC sections than GNU objcopy, but it is fine as - // non-ALLOC SHT_NOBITS sections do not make much sense. - if (Sec.Type == SHT_NOBITS && - (!(Sec.Flags & ELF::SHF_ALLOC) || - Flags & (SectionFlag::SecContents | SectionFlag::SecLoad))) - Sec.Type = SHT_PROGBITS; -} - -static ElfType getOutputElfType(const Binary &Bin) { - // Infer output ELF type from the input ELF object - if (isa>(Bin)) - return ELFT_ELF32LE; - if (isa>(Bin)) - return ELFT_ELF64LE; - if (isa>(Bin)) - return ELFT_ELF32BE; - if (isa>(Bin)) - return ELFT_ELF64BE; - llvm_unreachable("Invalid ELFType"); -} - -static ElfType getOutputElfType(const MachineInfo &MI) { - // Infer output ELF type from the binary arch specified - if (MI.Is64Bit) - return MI.IsLittleEndian ? ELFT_ELF64LE : ELFT_ELF64BE; - else - return MI.IsLittleEndian ? ELFT_ELF32LE : ELFT_ELF32BE; -} - -static std::unique_ptr createELFWriter(const CommonConfig &Config, - Object &Obj, raw_ostream &Out, - ElfType OutputElfType) { - // Depending on the initial ELFT and OutputFormat we need a different Writer. - switch (OutputElfType) { - case ELFT_ELF32LE: - return std::make_unique>(Obj, Out, !Config.StripSections, - Config.OnlyKeepDebug); - case ELFT_ELF64LE: - return std::make_unique>(Obj, Out, !Config.StripSections, - Config.OnlyKeepDebug); - case ELFT_ELF32BE: - return std::make_unique>(Obj, Out, !Config.StripSections, - Config.OnlyKeepDebug); - case ELFT_ELF64BE: - return std::make_unique>(Obj, Out, !Config.StripSections, - Config.OnlyKeepDebug); - } - llvm_unreachable("Invalid output format"); -} - -static std::unique_ptr createWriter(const CommonConfig &Config, - Object &Obj, raw_ostream &Out, - ElfType OutputElfType) { - switch (Config.OutputFormat) { - case FileFormat::Binary: - return std::make_unique(Obj, Out); - case FileFormat::IHex: - return std::make_unique(Obj, Out); - default: - return createELFWriter(Config, Obj, Out, OutputElfType); - } -} - -template -static Error makeStringError(std::error_code EC, const Twine &Msg, - Ts &&... Args) { - std::string FullMsg = (EC.message() + ": " + Msg).str(); - return createStringError(EC, FullMsg.c_str(), std::forward(Args)...); -} - -static Error dumpSectionToFile(StringRef SecName, StringRef Filename, - Object &Obj) { - for (auto &Sec : Obj.sections()) { - if (Sec.Name == SecName) { - if (Sec.Type == SHT_NOBITS) - return createStringError(object_error::parse_failed, - "cannot dump section '%s': it has no contents", - SecName.str().c_str()); - Expected> BufferOrErr = - FileOutputBuffer::create(Filename, Sec.OriginalData.size()); - if (!BufferOrErr) - return BufferOrErr.takeError(); - std::unique_ptr Buf = std::move(*BufferOrErr); - std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), - Buf->getBufferStart()); - if (Error E = Buf->commit()) - return E; - return Error::success(); - } - } - return createStringError(object_error::parse_failed, "section '%s' not found", - SecName.str().c_str()); -} - -static bool isCompressable(const SectionBase &Sec) { - return !(Sec.Flags & ELF::SHF_COMPRESSED) && - StringRef(Sec.Name).startswith(".debug"); -} - -static Error replaceDebugSections( - Object &Obj, function_ref ShouldReplace, - function_ref(const SectionBase *)> AddSection) { - // Build a list of the debug sections we are going to replace. - // We can't call `AddSection` while iterating over sections, - // because it would mutate the sections array. - SmallVector ToReplace; - for (auto &Sec : Obj.sections()) - if (ShouldReplace(Sec)) - ToReplace.push_back(&Sec); - - // Build a mapping from original section to a new one. - DenseMap FromTo; - for (SectionBase *S : ToReplace) { - Expected NewSection = AddSection(S); - if (!NewSection) - return NewSection.takeError(); - - FromTo[S] = *NewSection; - } - - return Obj.replaceSections(FromTo); -} - -static bool isAArch64MappingSymbol(const Symbol &Sym) { - if (Sym.Binding != STB_LOCAL || Sym.Type != STT_NOTYPE || - Sym.getShndx() == SHN_UNDEF) - return false; - StringRef Name = Sym.Name; - if (!Name.consume_front("$x") && !Name.consume_front("$d")) - return false; - return Name.empty() || Name.startswith("."); -} - -static bool isArmMappingSymbol(const Symbol &Sym) { - if (Sym.Binding != STB_LOCAL || Sym.Type != STT_NOTYPE || - Sym.getShndx() == SHN_UNDEF) - return false; - StringRef Name = Sym.Name; - if (!Name.consume_front("$a") && !Name.consume_front("$d") && - !Name.consume_front("$t")) - return false; - return Name.empty() || Name.startswith("."); -} - -// Check if the symbol should be preserved because it is required by ABI. -static bool isRequiredByABISymbol(const Object &Obj, const Symbol &Sym) { - switch (Obj.Machine) { - case EM_AARCH64: - // Mapping symbols should be preserved for a relocatable object file. - return Obj.isRelocatable() && isAArch64MappingSymbol(Sym); - case EM_ARM: - // Mapping symbols should be preserved for a relocatable object file. - return Obj.isRelocatable() && isArmMappingSymbol(Sym); - default: - return false; - } -} - -static bool isUnneededSymbol(const Symbol &Sym) { - return !Sym.Referenced && - (Sym.Binding == STB_LOCAL || Sym.getShndx() == SHN_UNDEF) && - Sym.Type != STT_SECTION; -} - -static Error updateAndRemoveSymbols(const CommonConfig &Config, - const ELFConfig &ELFConfig, Object &Obj) { - // TODO: update or remove symbols only if there is an option that affects - // them. - if (!Obj.SymbolTable) - return Error::success(); - - Obj.SymbolTable->updateSymbols([&](Symbol &Sym) { - // Common and undefined symbols don't make sense as local symbols, and can - // even cause crashes if we localize those, so skip them. - if (!Sym.isCommon() && Sym.getShndx() != SHN_UNDEF && - ((ELFConfig.LocalizeHidden && - (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) || - Config.SymbolsToLocalize.matches(Sym.Name))) - Sym.Binding = STB_LOCAL; - - // Note: these two globalize flags have very similar names but different - // meanings: - // - // --globalize-symbol: promote a symbol to global - // --keep-global-symbol: all symbols except for these should be made local - // - // If --globalize-symbol is specified for a given symbol, it will be - // global in the output file even if it is not included via - // --keep-global-symbol. Because of that, make sure to check - // --globalize-symbol second. - if (!Config.SymbolsToKeepGlobal.empty() && - !Config.SymbolsToKeepGlobal.matches(Sym.Name) && - Sym.getShndx() != SHN_UNDEF) - Sym.Binding = STB_LOCAL; - - if (Config.SymbolsToGlobalize.matches(Sym.Name) && - Sym.getShndx() != SHN_UNDEF) - Sym.Binding = STB_GLOBAL; - - if (Config.SymbolsToWeaken.matches(Sym.Name) && Sym.Binding == STB_GLOBAL) - Sym.Binding = STB_WEAK; - - if (Config.Weaken && Sym.Binding == STB_GLOBAL && - Sym.getShndx() != SHN_UNDEF) - Sym.Binding = STB_WEAK; - - const auto I = Config.SymbolsToRename.find(Sym.Name); - if (I != Config.SymbolsToRename.end()) - Sym.Name = std::string(I->getValue()); - - if (!Config.SymbolsPrefix.empty() && Sym.Type != STT_SECTION) - Sym.Name = (Config.SymbolsPrefix + Sym.Name).str(); - }); - - // The purpose of this loop is to mark symbols referenced by sections - // (like GroupSection or RelocationSection). This way, we know which - // symbols are still 'needed' and which are not. - if (Config.StripUnneeded || !Config.UnneededSymbolsToRemove.empty() || - !Config.OnlySection.empty()) { - for (SectionBase &Sec : Obj.sections()) - Sec.markSymbols(); - } - - auto RemoveSymbolsPred = [&](const Symbol &Sym) { - if (Config.SymbolsToKeep.matches(Sym.Name) || - (ELFConfig.KeepFileSymbols && Sym.Type == STT_FILE)) - return false; - - if (Config.SymbolsToRemove.matches(Sym.Name)) - return true; - - if (Config.StripAll || Config.StripAllGNU) - return true; - - if (isRequiredByABISymbol(Obj, Sym)) - return false; - - if (Config.StripDebug && Sym.Type == STT_FILE) - return true; - - if ((Config.DiscardMode == DiscardType::All || - (Config.DiscardMode == DiscardType::Locals && - StringRef(Sym.Name).startswith(".L"))) && - Sym.Binding == STB_LOCAL && Sym.getShndx() != SHN_UNDEF && - Sym.Type != STT_FILE && Sym.Type != STT_SECTION) - return true; - - if ((Config.StripUnneeded || - Config.UnneededSymbolsToRemove.matches(Sym.Name)) && - (!Obj.isRelocatable() || isUnneededSymbol(Sym))) - return true; - - // We want to remove undefined symbols if all references have been stripped. - if (!Config.OnlySection.empty() && !Sym.Referenced && - Sym.getShndx() == SHN_UNDEF) - return true; - - return false; - }; - - return Obj.removeSymbols(RemoveSymbolsPred); -} - -static Error replaceAndRemoveSections(const CommonConfig &Config, - const ELFConfig &ELFConfig, Object &Obj) { - SectionPred RemovePred = [](const SectionBase &) { return false; }; - - // Removes: - if (!Config.ToRemove.empty()) { - RemovePred = [&Config](const SectionBase &Sec) { - return Config.ToRemove.matches(Sec.Name); - }; - } - - if (Config.StripDWO) - RemovePred = [RemovePred](const SectionBase &Sec) { - return isDWOSection(Sec) || RemovePred(Sec); - }; - - if (Config.ExtractDWO) - RemovePred = [RemovePred, &Obj](const SectionBase &Sec) { - return onlyKeepDWOPred(Obj, Sec) || RemovePred(Sec); - }; - - if (Config.StripAllGNU) - RemovePred = [RemovePred, &Obj](const SectionBase &Sec) { - if (RemovePred(Sec)) - return true; - if ((Sec.Flags & SHF_ALLOC) != 0) - return false; - if (&Sec == Obj.SectionNames) - return false; - switch (Sec.Type) { - case SHT_SYMTAB: - case SHT_REL: - case SHT_RELA: - case SHT_STRTAB: - return true; - } - return isDebugSection(Sec); - }; - - if (Config.StripSections) { - RemovePred = [RemovePred](const SectionBase &Sec) { - return RemovePred(Sec) || Sec.ParentSegment == nullptr; - }; - } - - if (Config.StripDebug || Config.StripUnneeded) { - RemovePred = [RemovePred](const SectionBase &Sec) { - return RemovePred(Sec) || isDebugSection(Sec); - }; - } - - if (Config.StripNonAlloc) - RemovePred = [RemovePred, &Obj](const SectionBase &Sec) { - if (RemovePred(Sec)) - return true; - if (&Sec == Obj.SectionNames) - return false; - return (Sec.Flags & SHF_ALLOC) == 0 && Sec.ParentSegment == nullptr; - }; - - if (Config.StripAll) - RemovePred = [RemovePred, &Obj](const SectionBase &Sec) { - if (RemovePred(Sec)) - return true; - if (&Sec == Obj.SectionNames) - return false; - if (StringRef(Sec.Name).startswith(".gnu.warning")) - return false; - // We keep the .ARM.attribute section to maintain compatibility - // with Debian derived distributions. This is a bug in their - // patchset as documented here: - // https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=943798 - if (Sec.Type == SHT_ARM_ATTRIBUTES) - return false; - if (Sec.ParentSegment != nullptr) - return false; - return (Sec.Flags & SHF_ALLOC) == 0; - }; - - if (Config.ExtractPartition || Config.ExtractMainPartition) { - RemovePred = [RemovePred](const SectionBase &Sec) { - if (RemovePred(Sec)) - return true; - if (Sec.Type == SHT_LLVM_PART_EHDR || Sec.Type == SHT_LLVM_PART_PHDR) - return true; - return (Sec.Flags & SHF_ALLOC) != 0 && !Sec.ParentSegment; - }; - } - - // Explicit copies: - if (!Config.OnlySection.empty()) { - RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) { - // Explicitly keep these sections regardless of previous removes. - if (Config.OnlySection.matches(Sec.Name)) - return false; - - // Allow all implicit removes. - if (RemovePred(Sec)) - return true; - - // Keep special sections. - if (Obj.SectionNames == &Sec) - return false; - if (Obj.SymbolTable == &Sec || - (Obj.SymbolTable && Obj.SymbolTable->getStrTab() == &Sec)) - return false; - - // Remove everything else. - return true; - }; - } - - if (!Config.KeepSection.empty()) { - RemovePred = [&Config, RemovePred](const SectionBase &Sec) { - // Explicitly keep these sections regardless of previous removes. - if (Config.KeepSection.matches(Sec.Name)) - return false; - // Otherwise defer to RemovePred. - return RemovePred(Sec); - }; - } - - // This has to be the last predicate assignment. - // If the option --keep-symbol has been specified - // and at least one of those symbols is present - // (equivalently, the updated symbol table is not empty) - // the symbol table and the string table should not be removed. - if ((!Config.SymbolsToKeep.empty() || ELFConfig.KeepFileSymbols) && - Obj.SymbolTable && !Obj.SymbolTable->empty()) { - RemovePred = [&Obj, RemovePred](const SectionBase &Sec) { - if (&Sec == Obj.SymbolTable || &Sec == Obj.SymbolTable->getStrTab()) - return false; - return RemovePred(Sec); - }; - } - - if (Error E = Obj.removeSections(ELFConfig.AllowBrokenLinks, RemovePred)) - return E; - - if (Config.CompressionType != DebugCompressionType::None) { - if (Error Err = replaceDebugSections( - Obj, isCompressable, - [&Config, &Obj](const SectionBase *S) -> Expected { - Expected NewSection = - CompressedSection::create(*S, Config.CompressionType); - if (!NewSection) - return NewSection.takeError(); - - return &Obj.addSection(std::move(*NewSection)); - })) - return Err; - } else if (Config.DecompressDebugSections) { - if (Error Err = replaceDebugSections( - Obj, - [](const SectionBase &S) { return isa(&S); }, - [&Obj](const SectionBase *S) { - const CompressedSection *CS = cast(S); - return &Obj.addSection(*CS); - })) - return Err; - } - - return Error::success(); -} - -// Add symbol to the Object symbol table with the specified properties. -static void addSymbol(Object &Obj, const NewSymbolInfo &SymInfo, - uint8_t DefaultVisibility) { - SectionBase *Sec = Obj.findSection(SymInfo.SectionName); - uint64_t Value = Sec ? Sec->Addr + SymInfo.Value : SymInfo.Value; - - uint8_t Bind = ELF::STB_GLOBAL; - uint8_t Type = ELF::STT_NOTYPE; - uint8_t Visibility = DefaultVisibility; - - for (SymbolFlag FlagValue : SymInfo.Flags) - switch (FlagValue) { - case SymbolFlag::Global: - Bind = ELF::STB_GLOBAL; - break; - case SymbolFlag::Local: - Bind = ELF::STB_LOCAL; - break; - case SymbolFlag::Weak: - Bind = ELF::STB_WEAK; - break; - case SymbolFlag::Default: - Visibility = ELF::STV_DEFAULT; - break; - case SymbolFlag::Hidden: - Visibility = ELF::STV_HIDDEN; - break; - case SymbolFlag::Protected: - Visibility = ELF::STV_PROTECTED; - break; - case SymbolFlag::File: - Type = ELF::STT_FILE; - break; - case SymbolFlag::Section: - Type = ELF::STT_SECTION; - break; - case SymbolFlag::Object: - Type = ELF::STT_OBJECT; - break; - case SymbolFlag::Function: - Type = ELF::STT_FUNC; - break; - case SymbolFlag::IndirectFunction: - Type = ELF::STT_GNU_IFUNC; - break; - default: /* Other flag values are ignored for ELF. */ - break; - }; - - Obj.SymbolTable->addSymbol( - SymInfo.SymbolName, Bind, Type, Sec, Value, Visibility, - Sec ? (uint16_t)SYMBOL_SIMPLE_INDEX : (uint16_t)SHN_ABS, 0); -} - -static Error -handleUserSection(StringRef Flag, - function_ref)> F) { - std::pair SecPair = Flag.split("="); - StringRef SecName = SecPair.first; - StringRef File = SecPair.second; - ErrorOr> BufOrErr = MemoryBuffer::getFile(File); - if (!BufOrErr) - return createFileError(File, errorCodeToError(BufOrErr.getError())); - std::unique_ptr Buf = std::move(*BufOrErr); - ArrayRef Data( - reinterpret_cast(Buf->getBufferStart()), - Buf->getBufferSize()); - return F(SecName, Data); -} - -// This function handles the high level operations of GNU objcopy including -// handling command line options. It's important to outline certain properties -// we expect to hold of the command line operations. Any operation that "keeps" -// should keep regardless of a remove. Additionally any removal should respect -// any previous removals. Lastly whether or not something is removed shouldn't -// depend a) on the order the options occur in or b) on some opaque priority -// system. The only priority is that keeps/copies overrule removes. -static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, - Object &Obj) { - if (Config.OutputArch) { - Obj.Machine = Config.OutputArch.getValue().EMachine; - Obj.OSABI = Config.OutputArch.getValue().OSABI; - } - - if (!Config.SplitDWO.empty() && Config.ExtractDWO) { - return Obj.removeSections( - ELFConfig.AllowBrokenLinks, - [&Obj](const SectionBase &Sec) { return onlyKeepDWOPred(Obj, Sec); }); - } - - // Dump sections before add/remove for compatibility with GNU objcopy. - for (StringRef Flag : Config.DumpSection) { - StringRef SectionName; - StringRef FileName; - std::tie(SectionName, FileName) = Flag.split('='); - if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) - return E; - } - - // It is important to remove the sections first. For example, we want to - // remove the relocation sections before removing the symbols. That allows - // us to avoid reporting the inappropriate errors about removing symbols - // named in relocations. - if (Error E = replaceAndRemoveSections(Config, ELFConfig, Obj)) - return E; - - if (Error E = updateAndRemoveSymbols(Config, ELFConfig, Obj)) - return E; - - if (!Config.SectionsToRename.empty()) { - std::vector RelocSections; - DenseSet RenamedSections; - for (SectionBase &Sec : Obj.sections()) { - auto *RelocSec = dyn_cast(&Sec); - const auto Iter = Config.SectionsToRename.find(Sec.Name); - if (Iter != Config.SectionsToRename.end()) { - const SectionRename &SR = Iter->second; - Sec.Name = std::string(SR.NewName); - if (SR.NewFlags.hasValue()) - setSectionFlagsAndType(Sec, SR.NewFlags.getValue()); - RenamedSections.insert(&Sec); - } else if (RelocSec && !(Sec.Flags & SHF_ALLOC)) - // Postpone processing relocation sections which are not specified in - // their explicit '--rename-section' commands until after their target - // sections are renamed. - // Dynamic relocation sections (i.e. ones with SHF_ALLOC) should be - // renamed only explicitly. Otherwise, renaming, for example, '.got.plt' - // would affect '.rela.plt', which is not desirable. - RelocSections.push_back(RelocSec); - } - - // Rename relocation sections according to their target sections. - for (RelocationSectionBase *RelocSec : RelocSections) { - auto Iter = RenamedSections.find(RelocSec->getSection()); - if (Iter != RenamedSections.end()) - RelocSec->Name = (RelocSec->getNamePrefix() + (*Iter)->Name).str(); - } - } - - // Add a prefix to allocated sections and their relocation sections. This - // should be done after renaming the section by Config.SectionToRename to - // imitate the GNU objcopy behavior. - if (!Config.AllocSectionsPrefix.empty()) { - DenseSet PrefixedSections; - for (SectionBase &Sec : Obj.sections()) { - if (Sec.Flags & SHF_ALLOC) { - Sec.Name = (Config.AllocSectionsPrefix + Sec.Name).str(); - PrefixedSections.insert(&Sec); - } else if (auto *RelocSec = dyn_cast(&Sec)) { - // Rename relocation sections associated to the allocated sections. - // For example, if we rename .text to .prefix.text, we also rename - // .rel.text to .rel.prefix.text. - // - // Dynamic relocation sections (SHT_REL[A] with SHF_ALLOC) are handled - // above, e.g., .rela.plt is renamed to .prefix.rela.plt, not - // .rela.prefix.plt since GNU objcopy does so. - const SectionBase *TargetSec = RelocSec->getSection(); - if (TargetSec && (TargetSec->Flags & SHF_ALLOC)) { - // If the relocation section comes *after* the target section, we - // don't add Config.AllocSectionsPrefix because we've already added - // the prefix to TargetSec->Name. Otherwise, if the relocation - // section comes *before* the target section, we add the prefix. - if (PrefixedSections.count(TargetSec)) - Sec.Name = (RelocSec->getNamePrefix() + TargetSec->Name).str(); - else - Sec.Name = (RelocSec->getNamePrefix() + Config.AllocSectionsPrefix + - TargetSec->Name) - .str(); - } - } - } - } - - if (!Config.SetSectionAlignment.empty()) { - for (SectionBase &Sec : Obj.sections()) { - auto I = Config.SetSectionAlignment.find(Sec.Name); - if (I != Config.SetSectionAlignment.end()) - Sec.Align = I->second; - } - } - - if (Config.OnlyKeepDebug) - for (auto &Sec : Obj.sections()) - if (Sec.Flags & SHF_ALLOC && Sec.Type != SHT_NOTE) - Sec.Type = SHT_NOBITS; - - for (const auto &Flag : Config.AddSection) { - auto AddSection = [&](StringRef Name, ArrayRef Data) { - OwnedDataSection &NewSection = - Obj.addSection(Name, Data); - if (Name.startswith(".note") && Name != ".note.GNU-stack") - NewSection.Type = SHT_NOTE; - return Error::success(); - }; - if (Error E = handleUserSection(Flag, AddSection)) - return E; - } - - for (StringRef Flag : Config.UpdateSection) { - auto UpdateSection = [&](StringRef Name, ArrayRef Data) { - return Obj.updateSection(Name, Data); - }; - if (Error E = handleUserSection(Flag, UpdateSection)) - return E; - } - - if (!Config.AddGnuDebugLink.empty()) - Obj.addSection(Config.AddGnuDebugLink, - Config.GnuDebugLinkCRC32); - - // If the symbol table was previously removed, we need to create a new one - // before adding new symbols. - if (!Obj.SymbolTable && !Config.SymbolsToAdd.empty()) - if (Error E = Obj.addNewSymbolTable()) - return E; - - for (const NewSymbolInfo &SI : Config.SymbolsToAdd) - addSymbol(Obj, SI, ELFConfig.NewSymbolVisibility); - - // --set-section-flags works with sections added by --add-section. - if (!Config.SetSectionFlags.empty()) { - for (auto &Sec : Obj.sections()) { - const auto Iter = Config.SetSectionFlags.find(Sec.Name); - if (Iter != Config.SetSectionFlags.end()) { - const SectionFlagsUpdate &SFU = Iter->second; - setSectionFlagsAndType(Sec, SFU.NewFlags); - } - } - } - - if (ELFConfig.EntryExpr) - Obj.Entry = ELFConfig.EntryExpr(Obj.Entry); - return Error::success(); -} - -static Error writeOutput(const CommonConfig &Config, Object &Obj, - raw_ostream &Out, ElfType OutputElfType) { - std::unique_ptr Writer = - createWriter(Config, Obj, Out, OutputElfType); - if (Error E = Writer->finalize()) - return E; - return Writer->write(); -} - -Error objcopy::elf::executeObjcopyOnIHex(const CommonConfig &Config, - const ELFConfig &ELFConfig, - MemoryBuffer &In, raw_ostream &Out) { - IHexReader Reader(&In); - Expected> Obj = Reader.create(true); - if (!Obj) - return Obj.takeError(); - - const ElfType OutputElfType = - getOutputElfType(Config.OutputArch.getValueOr(MachineInfo())); - if (Error E = handleArgs(Config, ELFConfig, **Obj)) - return E; - return writeOutput(Config, **Obj, Out, OutputElfType); -} - -Error objcopy::elf::executeObjcopyOnRawBinary(const CommonConfig &Config, - const ELFConfig &ELFConfig, - MemoryBuffer &In, - raw_ostream &Out) { - BinaryReader Reader(&In, ELFConfig.NewSymbolVisibility); - Expected> Obj = Reader.create(true); - if (!Obj) - return Obj.takeError(); - - // Prefer OutputArch (-O) if set, otherwise fallback to BinaryArch - // (-B). - const ElfType OutputElfType = - getOutputElfType(Config.OutputArch.getValueOr(MachineInfo())); - if (Error E = handleArgs(Config, ELFConfig, **Obj)) - return E; - return writeOutput(Config, **Obj, Out, OutputElfType); -} - -Error objcopy::elf::executeObjcopyOnBinary(const CommonConfig &Config, - const ELFConfig &ELFConfig, - object::ELFObjectFileBase &In, - raw_ostream &Out) { - ELFReader Reader(&In, Config.ExtractPartition); - Expected> Obj = - Reader.create(!Config.SymbolsToAdd.empty()); - if (!Obj) - return Obj.takeError(); - // Prefer OutputArch (-O) if set, otherwise infer it from the input. - const ElfType OutputElfType = - Config.OutputArch ? getOutputElfType(Config.OutputArch.getValue()) - : getOutputElfType(In); - - if (Error E = handleArgs(Config, ELFConfig, **Obj)) - return createFileError(Config.InputFilename, std::move(E)); - - if (Error E = writeOutput(Config, **Obj, Out, OutputElfType)) - return createFileError(Config.InputFilename, std::move(E)); - - return Error::success(); -} diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h deleted file mode 100644 index 852661e68f37..000000000000 --- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h +++ /dev/null @@ -1,40 +0,0 @@ -//===- ELFObjcopy.h ---------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_OBJCOPY_ELFOBJCOPY_H -#define LLVM_TOOLS_OBJCOPY_ELFOBJCOPY_H - -namespace llvm { -class Error; -class MemoryBuffer; -class raw_ostream; - -namespace object { -class ELFObjectFileBase; -} // end namespace object - -namespace objcopy { -struct CommonConfig; -struct ELFConfig; - -namespace elf { -Error executeObjcopyOnIHex(const CommonConfig &Config, - const ELFConfig &ELFConfig, MemoryBuffer &In, - raw_ostream &Out); -Error executeObjcopyOnRawBinary(const CommonConfig &Config, - const ELFConfig &ELFConfig, MemoryBuffer &In, - raw_ostream &Out); -Error executeObjcopyOnBinary(const CommonConfig &Config, - const ELFConfig &ELFConfig, - object::ELFObjectFileBase &In, raw_ostream &Out); - -} // end namespace elf -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_TOOLS_OBJCOPY_ELFOBJCOPY_H diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp deleted file mode 100644 index 659e12bf0306..000000000000 --- a/llvm/tools/llvm-objcopy/ELF/Object.cpp +++ /dev/null @@ -1,2826 +0,0 @@ -//===- Object.cpp ---------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Object.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/BinaryFormat/ELF.h" -#include "llvm/MC/MCTargetOptions.h" -#include "llvm/Object/ELF.h" -#include "llvm/Object/ELFObjectFile.h" -#include "llvm/Support/Compression.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileOutputBuffer.h" -#include "llvm/Support/Path.h" -#include -#include -#include -#include -#include -#include -#include - -using namespace llvm; -using namespace llvm::ELF; -using namespace llvm::objcopy::elf; -using namespace llvm::object; - -template void ELFWriter::writePhdr(const Segment &Seg) { - uint8_t *B = reinterpret_cast(Buf->getBufferStart()) + - Obj.ProgramHdrSegment.Offset + Seg.Index * sizeof(Elf_Phdr); - Elf_Phdr &Phdr = *reinterpret_cast(B); - Phdr.p_type = Seg.Type; - Phdr.p_flags = Seg.Flags; - Phdr.p_offset = Seg.Offset; - Phdr.p_vaddr = Seg.VAddr; - Phdr.p_paddr = Seg.PAddr; - Phdr.p_filesz = Seg.FileSize; - Phdr.p_memsz = Seg.MemSize; - Phdr.p_align = Seg.Align; -} - -Error SectionBase::removeSectionReferences( - bool, function_ref) { - return Error::success(); -} - -Error SectionBase::removeSymbols(function_ref) { - return Error::success(); -} - -Error SectionBase::initialize(SectionTableRef) { return Error::success(); } -void SectionBase::finalize() {} -void SectionBase::markSymbols() {} -void SectionBase::replaceSectionReferences( - const DenseMap &) {} -void SectionBase::onRemove() {} - -template void ELFWriter::writeShdr(const SectionBase &Sec) { - uint8_t *B = - reinterpret_cast(Buf->getBufferStart()) + Sec.HeaderOffset; - Elf_Shdr &Shdr = *reinterpret_cast(B); - Shdr.sh_name = Sec.NameIndex; - Shdr.sh_type = Sec.Type; - Shdr.sh_flags = Sec.Flags; - Shdr.sh_addr = Sec.Addr; - Shdr.sh_offset = Sec.Offset; - Shdr.sh_size = Sec.Size; - Shdr.sh_link = Sec.Link; - Shdr.sh_info = Sec.Info; - Shdr.sh_addralign = Sec.Align; - Shdr.sh_entsize = Sec.EntrySize; -} - -template Error ELFSectionSizer::visit(Section &) { - return Error::success(); -} - -template Error ELFSectionSizer::visit(OwnedDataSection &) { - return Error::success(); -} - -template Error ELFSectionSizer::visit(StringTableSection &) { - return Error::success(); -} - -template -Error ELFSectionSizer::visit(DynamicRelocationSection &) { - return Error::success(); -} - -template -Error ELFSectionSizer::visit(SymbolTableSection &Sec) { - Sec.EntrySize = sizeof(Elf_Sym); - Sec.Size = Sec.Symbols.size() * Sec.EntrySize; - // Align to the largest field in Elf_Sym. - Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word); - return Error::success(); -} - -template -Error ELFSectionSizer::visit(RelocationSection &Sec) { - Sec.EntrySize = Sec.Type == SHT_REL ? sizeof(Elf_Rel) : sizeof(Elf_Rela); - Sec.Size = Sec.Relocations.size() * Sec.EntrySize; - // Align to the largest field in Elf_Rel(a). - Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word); - return Error::success(); -} - -template -Error ELFSectionSizer::visit(GnuDebugLinkSection &) { - return Error::success(); -} - -template Error ELFSectionSizer::visit(GroupSection &Sec) { - Sec.Size = sizeof(Elf_Word) + Sec.GroupMembers.size() * sizeof(Elf_Word); - return Error::success(); -} - -template -Error ELFSectionSizer::visit(SectionIndexSection &) { - return Error::success(); -} - -template Error ELFSectionSizer::visit(CompressedSection &) { - return Error::success(); -} - -template -Error ELFSectionSizer::visit(DecompressedSection &) { - return Error::success(); -} - -Error BinarySectionWriter::visit(const SectionIndexSection &Sec) { - return createStringError(errc::operation_not_permitted, - "cannot write symbol section index table '" + - Sec.Name + "' "); -} - -Error BinarySectionWriter::visit(const SymbolTableSection &Sec) { - return createStringError(errc::operation_not_permitted, - "cannot write symbol table '" + Sec.Name + - "' out to binary"); -} - -Error BinarySectionWriter::visit(const RelocationSection &Sec) { - return createStringError(errc::operation_not_permitted, - "cannot write relocation section '" + Sec.Name + - "' out to binary"); -} - -Error BinarySectionWriter::visit(const GnuDebugLinkSection &Sec) { - return createStringError(errc::operation_not_permitted, - "cannot write '" + Sec.Name + "' out to binary"); -} - -Error BinarySectionWriter::visit(const GroupSection &Sec) { - return createStringError(errc::operation_not_permitted, - "cannot write '" + Sec.Name + "' out to binary"); -} - -Error SectionWriter::visit(const Section &Sec) { - if (Sec.Type != SHT_NOBITS) - llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset); - - return Error::success(); -} - -static bool addressOverflows32bit(uint64_t Addr) { - // Sign extended 32 bit addresses (e.g 0xFFFFFFFF80000000) are ok - return Addr > UINT32_MAX && Addr + 0x80000000 > UINT32_MAX; -} - -template static T checkedGetHex(StringRef S) { - T Value; - bool Fail = S.getAsInteger(16, Value); - assert(!Fail); - (void)Fail; - return Value; -} - -// Fills exactly Len bytes of buffer with hexadecimal characters -// representing value 'X' -template -static Iterator toHexStr(T X, Iterator It, size_t Len) { - // Fill range with '0' - std::fill(It, It + Len, '0'); - - for (long I = Len - 1; I >= 0; --I) { - unsigned char Mod = static_cast(X) & 15; - *(It + I) = hexdigit(Mod, false); - X >>= 4; - } - assert(X == 0); - return It + Len; -} - -uint8_t IHexRecord::getChecksum(StringRef S) { - assert((S.size() & 1) == 0); - uint8_t Checksum = 0; - while (!S.empty()) { - Checksum += checkedGetHex(S.take_front(2)); - S = S.drop_front(2); - } - return -Checksum; -} - -IHexLineData IHexRecord::getLine(uint8_t Type, uint16_t Addr, - ArrayRef Data) { - IHexLineData Line(getLineLength(Data.size())); - assert(Line.size()); - auto Iter = Line.begin(); - *Iter++ = ':'; - Iter = toHexStr(Data.size(), Iter, 2); - Iter = toHexStr(Addr, Iter, 4); - Iter = toHexStr(Type, Iter, 2); - for (uint8_t X : Data) - Iter = toHexStr(X, Iter, 2); - StringRef S(Line.data() + 1, std::distance(Line.begin() + 1, Iter)); - Iter = toHexStr(getChecksum(S), Iter, 2); - *Iter++ = '\r'; - *Iter++ = '\n'; - assert(Iter == Line.end()); - return Line; -} - -static Error checkRecord(const IHexRecord &R) { - switch (R.Type) { - case IHexRecord::Data: - if (R.HexData.size() == 0) - return createStringError( - errc::invalid_argument, - "zero data length is not allowed for data records"); - break; - case IHexRecord::EndOfFile: - break; - case IHexRecord::SegmentAddr: - // 20-bit segment address. Data length must be 2 bytes - // (4 bytes in hex) - if (R.HexData.size() != 4) - return createStringError( - errc::invalid_argument, - "segment address data should be 2 bytes in size"); - break; - case IHexRecord::StartAddr80x86: - case IHexRecord::StartAddr: - if (R.HexData.size() != 8) - return createStringError(errc::invalid_argument, - "start address data should be 4 bytes in size"); - // According to Intel HEX specification '03' record - // only specifies the code address within the 20-bit - // segmented address space of the 8086/80186. This - // means 12 high order bits should be zeroes. - if (R.Type == IHexRecord::StartAddr80x86 && - R.HexData.take_front(3) != "000") - return createStringError(errc::invalid_argument, - "start address exceeds 20 bit for 80x86"); - break; - case IHexRecord::ExtendedAddr: - // 16-31 bits of linear base address - if (R.HexData.size() != 4) - return createStringError( - errc::invalid_argument, - "extended address data should be 2 bytes in size"); - break; - default: - // Unknown record type - return createStringError(errc::invalid_argument, "unknown record type: %u", - static_cast(R.Type)); - } - return Error::success(); -} - -// Checks that IHEX line contains valid characters. -// This allows converting hexadecimal data to integers -// without extra verification. -static Error checkChars(StringRef Line) { - assert(!Line.empty()); - if (Line[0] != ':') - return createStringError(errc::invalid_argument, - "missing ':' in the beginning of line."); - - for (size_t Pos = 1; Pos < Line.size(); ++Pos) - if (hexDigitValue(Line[Pos]) == -1U) - return createStringError(errc::invalid_argument, - "invalid character at position %zu.", Pos + 1); - return Error::success(); -} - -Expected IHexRecord::parse(StringRef Line) { - assert(!Line.empty()); - - // ':' + Length + Address + Type + Checksum with empty data ':LLAAAATTCC' - if (Line.size() < 11) - return createStringError(errc::invalid_argument, - "line is too short: %zu chars.", Line.size()); - - if (Error E = checkChars(Line)) - return std::move(E); - - IHexRecord Rec; - size_t DataLen = checkedGetHex(Line.substr(1, 2)); - if (Line.size() != getLength(DataLen)) - return createStringError(errc::invalid_argument, - "invalid line length %zu (should be %zu)", - Line.size(), getLength(DataLen)); - - Rec.Addr = checkedGetHex(Line.substr(3, 4)); - Rec.Type = checkedGetHex(Line.substr(7, 2)); - Rec.HexData = Line.substr(9, DataLen * 2); - - if (getChecksum(Line.drop_front(1)) != 0) - return createStringError(errc::invalid_argument, "incorrect checksum."); - if (Error E = checkRecord(Rec)) - return std::move(E); - return Rec; -} - -static uint64_t sectionPhysicalAddr(const SectionBase *Sec) { - Segment *Seg = Sec->ParentSegment; - if (Seg && Seg->Type != ELF::PT_LOAD) - Seg = nullptr; - return Seg ? Seg->PAddr + Sec->OriginalOffset - Seg->OriginalOffset - : Sec->Addr; -} - -void IHexSectionWriterBase::writeSection(const SectionBase *Sec, - ArrayRef Data) { - assert(Data.size() == Sec->Size); - const uint32_t ChunkSize = 16; - uint32_t Addr = sectionPhysicalAddr(Sec) & 0xFFFFFFFFU; - while (!Data.empty()) { - uint64_t DataSize = std::min(Data.size(), ChunkSize); - if (Addr > SegmentAddr + BaseAddr + 0xFFFFU) { - if (Addr > 0xFFFFFU) { - // Write extended address record, zeroing segment address - // if needed. - if (SegmentAddr != 0) - SegmentAddr = writeSegmentAddr(0U); - BaseAddr = writeBaseAddr(Addr); - } else { - // We can still remain 16-bit - SegmentAddr = writeSegmentAddr(Addr); - } - } - uint64_t SegOffset = Addr - BaseAddr - SegmentAddr; - assert(SegOffset <= 0xFFFFU); - DataSize = std::min(DataSize, 0x10000U - SegOffset); - writeData(0, SegOffset, Data.take_front(DataSize)); - Addr += DataSize; - Data = Data.drop_front(DataSize); - } -} - -uint64_t IHexSectionWriterBase::writeSegmentAddr(uint64_t Addr) { - assert(Addr <= 0xFFFFFU); - uint8_t Data[] = {static_cast((Addr & 0xF0000U) >> 12), 0}; - writeData(2, 0, Data); - return Addr & 0xF0000U; -} - -uint64_t IHexSectionWriterBase::writeBaseAddr(uint64_t Addr) { - assert(Addr <= 0xFFFFFFFFU); - uint64_t Base = Addr & 0xFFFF0000U; - uint8_t Data[] = {static_cast(Base >> 24), - static_cast((Base >> 16) & 0xFF)}; - writeData(4, 0, Data); - return Base; -} - -void IHexSectionWriterBase::writeData(uint8_t, uint16_t, - ArrayRef Data) { - Offset += IHexRecord::getLineLength(Data.size()); -} - -Error IHexSectionWriterBase::visit(const Section &Sec) { - writeSection(&Sec, Sec.Contents); - return Error::success(); -} - -Error IHexSectionWriterBase::visit(const OwnedDataSection &Sec) { - writeSection(&Sec, Sec.Data); - return Error::success(); -} - -Error IHexSectionWriterBase::visit(const StringTableSection &Sec) { - // Check that sizer has already done its work - assert(Sec.Size == Sec.StrTabBuilder.getSize()); - // We are free to pass an invalid pointer to writeSection as long - // as we don't actually write any data. The real writer class has - // to override this method . - writeSection(&Sec, {nullptr, static_cast(Sec.Size)}); - return Error::success(); -} - -Error IHexSectionWriterBase::visit(const DynamicRelocationSection &Sec) { - writeSection(&Sec, Sec.Contents); - return Error::success(); -} - -void IHexSectionWriter::writeData(uint8_t Type, uint16_t Addr, - ArrayRef Data) { - IHexLineData HexData = IHexRecord::getLine(Type, Addr, Data); - memcpy(Out.getBufferStart() + Offset, HexData.data(), HexData.size()); - Offset += HexData.size(); -} - -Error IHexSectionWriter::visit(const StringTableSection &Sec) { - assert(Sec.Size == Sec.StrTabBuilder.getSize()); - std::vector Data(Sec.Size); - Sec.StrTabBuilder.write(Data.data()); - writeSection(&Sec, Data); - return Error::success(); -} - -Error Section::accept(SectionVisitor &Visitor) const { - return Visitor.visit(*this); -} - -Error Section::accept(MutableSectionVisitor &Visitor) { - return Visitor.visit(*this); -} - -Error SectionWriter::visit(const OwnedDataSection &Sec) { - llvm::copy(Sec.Data, Out.getBufferStart() + Sec.Offset); - return Error::success(); -} - -static constexpr std::array ZlibGnuMagic = {{'Z', 'L', 'I', 'B'}}; - -static bool isDataGnuCompressed(ArrayRef Data) { - return Data.size() > ZlibGnuMagic.size() && - std::equal(ZlibGnuMagic.begin(), ZlibGnuMagic.end(), Data.data()); -} - -template -static std::tuple -getDecompressedSizeAndAlignment(ArrayRef Data) { - const bool IsGnuDebug = isDataGnuCompressed(Data); - const uint64_t DecompressedSize = - IsGnuDebug - ? support::endian::read64be(Data.data() + ZlibGnuMagic.size()) - : reinterpret_cast *>(Data.data())->ch_size; - const uint64_t DecompressedAlign = - IsGnuDebug ? 1 - : reinterpret_cast *>(Data.data()) - ->ch_addralign; - - return std::make_tuple(DecompressedSize, DecompressedAlign); -} - -template -Error ELFSectionWriter::visit(const DecompressedSection &Sec) { - const size_t DataOffset = isDataGnuCompressed(Sec.OriginalData) - ? (ZlibGnuMagic.size() + sizeof(Sec.Size)) - : sizeof(Elf_Chdr_Impl); - - StringRef CompressedContent( - reinterpret_cast(Sec.OriginalData.data()) + DataOffset, - Sec.OriginalData.size() - DataOffset); - - SmallVector DecompressedContent; - if (Error Err = zlib::uncompress(CompressedContent, DecompressedContent, - static_cast(Sec.Size))) - return createStringError(errc::invalid_argument, - "'" + Sec.Name + "': " + toString(std::move(Err))); - - uint8_t *Buf = reinterpret_cast(Out.getBufferStart()) + Sec.Offset; - std::copy(DecompressedContent.begin(), DecompressedContent.end(), Buf); - - return Error::success(); -} - -Error BinarySectionWriter::visit(const DecompressedSection &Sec) { - return createStringError(errc::operation_not_permitted, - "cannot write compressed section '" + Sec.Name + - "' "); -} - -Error DecompressedSection::accept(SectionVisitor &Visitor) const { - return Visitor.visit(*this); -} - -Error DecompressedSection::accept(MutableSectionVisitor &Visitor) { - return Visitor.visit(*this); -} - -Error OwnedDataSection::accept(SectionVisitor &Visitor) const { - return Visitor.visit(*this); -} - -Error OwnedDataSection::accept(MutableSectionVisitor &Visitor) { - return Visitor.visit(*this); -} - -void OwnedDataSection::appendHexData(StringRef HexData) { - assert((HexData.size() & 1) == 0); - while (!HexData.empty()) { - Data.push_back(checkedGetHex(HexData.take_front(2))); - HexData = HexData.drop_front(2); - } - Size = Data.size(); -} - -Error BinarySectionWriter::visit(const CompressedSection &Sec) { - return createStringError(errc::operation_not_permitted, - "cannot write compressed section '" + Sec.Name + - "' "); -} - -template -Error ELFSectionWriter::visit(const CompressedSection &Sec) { - uint8_t *Buf = reinterpret_cast(Out.getBufferStart()) + Sec.Offset; - if (Sec.CompressionType == DebugCompressionType::None) { - std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf); - return Error::success(); - } - - if (Sec.CompressionType == DebugCompressionType::GNU) { - const char *Magic = "ZLIB"; - memcpy(Buf, Magic, strlen(Magic)); - Buf += strlen(Magic); - const uint64_t DecompressedSize = - support::endian::read64be(&Sec.DecompressedSize); - memcpy(Buf, &DecompressedSize, sizeof(DecompressedSize)); - Buf += sizeof(DecompressedSize); - } else { - Elf_Chdr_Impl Chdr; - Chdr.ch_type = ELF::ELFCOMPRESS_ZLIB; - Chdr.ch_size = Sec.DecompressedSize; - Chdr.ch_addralign = Sec.DecompressedAlign; - memcpy(Buf, &Chdr, sizeof(Chdr)); - Buf += sizeof(Chdr); - } - - std::copy(Sec.CompressedData.begin(), Sec.CompressedData.end(), Buf); - return Error::success(); -} - -Expected -CompressedSection::create(const SectionBase &Sec, - DebugCompressionType CompressionType) { - Error Err = Error::success(); - CompressedSection Section(Sec, CompressionType, Err); - - if (Err) - return std::move(Err); - - return Section; -} -Expected -CompressedSection::create(ArrayRef CompressedData, - uint64_t DecompressedSize, - uint64_t DecompressedAlign) { - return CompressedSection(CompressedData, DecompressedSize, DecompressedAlign); -} - -CompressedSection::CompressedSection(const SectionBase &Sec, - DebugCompressionType CompressionType, - Error &OutErr) - : SectionBase(Sec), CompressionType(CompressionType), - DecompressedSize(Sec.OriginalData.size()), DecompressedAlign(Sec.Align) { - ErrorAsOutParameter EAO(&OutErr); - - if (Error Err = zlib::compress( - StringRef(reinterpret_cast(OriginalData.data()), - OriginalData.size()), - CompressedData)) { - OutErr = createStringError(llvm::errc::invalid_argument, - "'" + Name + "': " + toString(std::move(Err))); - return; - } - - size_t ChdrSize; - if (CompressionType == DebugCompressionType::GNU) { - Name = ".z" + Sec.Name.substr(1); - ChdrSize = sizeof("ZLIB") - 1 + sizeof(uint64_t); - } else { - Flags |= ELF::SHF_COMPRESSED; - ChdrSize = - std::max(std::max(sizeof(object::Elf_Chdr_Impl), - sizeof(object::Elf_Chdr_Impl)), - std::max(sizeof(object::Elf_Chdr_Impl), - sizeof(object::Elf_Chdr_Impl))); - } - Size = ChdrSize + CompressedData.size(); - Align = 8; -} - -CompressedSection::CompressedSection(ArrayRef CompressedData, - uint64_t DecompressedSize, - uint64_t DecompressedAlign) - : CompressionType(DebugCompressionType::None), - DecompressedSize(DecompressedSize), DecompressedAlign(DecompressedAlign) { - OriginalData = CompressedData; -} - -Error CompressedSection::accept(SectionVisitor &Visitor) const { - return Visitor.visit(*this); -} - -Error CompressedSection::accept(MutableSectionVisitor &Visitor) { - return Visitor.visit(*this); -} - -void StringTableSection::addString(StringRef Name) { StrTabBuilder.add(Name); } - -uint32_t StringTableSection::findIndex(StringRef Name) const { - return StrTabBuilder.getOffset(Name); -} - -void StringTableSection::prepareForLayout() { - StrTabBuilder.finalize(); - Size = StrTabBuilder.getSize(); -} - -Error SectionWriter::visit(const StringTableSection &Sec) { - Sec.StrTabBuilder.write(reinterpret_cast(Out.getBufferStart()) + - Sec.Offset); - return Error::success(); -} - -Error StringTableSection::accept(SectionVisitor &Visitor) const { - return Visitor.visit(*this); -} - -Error StringTableSection::accept(MutableSectionVisitor &Visitor) { - return Visitor.visit(*this); -} - -template -Error ELFSectionWriter::visit(const SectionIndexSection &Sec) { - uint8_t *Buf = reinterpret_cast(Out.getBufferStart()) + Sec.Offset; - llvm::copy(Sec.Indexes, reinterpret_cast(Buf)); - return Error::success(); -} - -Error SectionIndexSection::initialize(SectionTableRef SecTable) { - Size = 0; - Expected Sec = - SecTable.getSectionOfType( - Link, - "Link field value " + Twine(Link) + " in section " + Name + - " is invalid", - "Link field value " + Twine(Link) + " in section " + Name + - " is not a symbol table"); - if (!Sec) - return Sec.takeError(); - - setSymTab(*Sec); - Symbols->setShndxTable(this); - return Error::success(); -} - -void SectionIndexSection::finalize() { Link = Symbols->Index; } - -Error SectionIndexSection::accept(SectionVisitor &Visitor) const { - return Visitor.visit(*this); -} - -Error SectionIndexSection::accept(MutableSectionVisitor &Visitor) { - return Visitor.visit(*this); -} - -static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) { - switch (Index) { - case SHN_ABS: - case SHN_COMMON: - return true; - } - - if (Machine == EM_AMDGPU) { - return Index == SHN_AMDGPU_LDS; - } - - if (Machine == EM_HEXAGON) { - switch (Index) { - case SHN_HEXAGON_SCOMMON: - case SHN_HEXAGON_SCOMMON_1: - case SHN_HEXAGON_SCOMMON_2: - case SHN_HEXAGON_SCOMMON_4: - case SHN_HEXAGON_SCOMMON_8: - return true; - } - } - return false; -} - -// Large indexes force us to clarify exactly what this function should do. This -// function should return the value that will appear in st_shndx when written -// out. -uint16_t Symbol::getShndx() const { - if (DefinedIn != nullptr) { - if (DefinedIn->Index >= SHN_LORESERVE) - return SHN_XINDEX; - return DefinedIn->Index; - } - - if (ShndxType == SYMBOL_SIMPLE_INDEX) { - // This means that we don't have a defined section but we do need to - // output a legitimate section index. - return SHN_UNDEF; - } - - assert(ShndxType == SYMBOL_ABS || ShndxType == SYMBOL_COMMON || - (ShndxType >= SYMBOL_LOPROC && ShndxType <= SYMBOL_HIPROC) || - (ShndxType >= SYMBOL_LOOS && ShndxType <= SYMBOL_HIOS)); - return static_cast(ShndxType); -} - -bool Symbol::isCommon() const { return getShndx() == SHN_COMMON; } - -void SymbolTableSection::assignIndices() { - uint32_t Index = 0; - for (auto &Sym : Symbols) - Sym->Index = Index++; -} - -void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type, - SectionBase *DefinedIn, uint64_t Value, - uint8_t Visibility, uint16_t Shndx, - uint64_t SymbolSize) { - Symbol Sym; - Sym.Name = Name.str(); - Sym.Binding = Bind; - Sym.Type = Type; - Sym.DefinedIn = DefinedIn; - if (DefinedIn != nullptr) - DefinedIn->HasSymbol = true; - if (DefinedIn == nullptr) { - if (Shndx >= SHN_LORESERVE) - Sym.ShndxType = static_cast(Shndx); - else - Sym.ShndxType = SYMBOL_SIMPLE_INDEX; - } - Sym.Value = Value; - Sym.Visibility = Visibility; - Sym.Size = SymbolSize; - Sym.Index = Symbols.size(); - Symbols.emplace_back(std::make_unique(Sym)); - Size += this->EntrySize; -} - -Error SymbolTableSection::removeSectionReferences( - bool AllowBrokenLinks, function_ref ToRemove) { - if (ToRemove(SectionIndexTable)) - SectionIndexTable = nullptr; - if (ToRemove(SymbolNames)) { - if (!AllowBrokenLinks) - return createStringError( - llvm::errc::invalid_argument, - "string table '%s' cannot be removed because it is " - "referenced by the symbol table '%s'", - SymbolNames->Name.data(), this->Name.data()); - SymbolNames = nullptr; - } - return removeSymbols( - [ToRemove](const Symbol &Sym) { return ToRemove(Sym.DefinedIn); }); -} - -void SymbolTableSection::updateSymbols(function_ref Callable) { - std::for_each(std::begin(Symbols) + 1, std::end(Symbols), - [Callable](SymPtr &Sym) { Callable(*Sym); }); - std::stable_partition( - std::begin(Symbols), std::end(Symbols), - [](const SymPtr &Sym) { return Sym->Binding == STB_LOCAL; }); - assignIndices(); -} - -Error SymbolTableSection::removeSymbols( - function_ref ToRemove) { - Symbols.erase( - std::remove_if(std::begin(Symbols) + 1, std::end(Symbols), - [ToRemove](const SymPtr &Sym) { return ToRemove(*Sym); }), - std::end(Symbols)); - Size = Symbols.size() * EntrySize; - assignIndices(); - return Error::success(); -} - -void SymbolTableSection::replaceSectionReferences( - const DenseMap &FromTo) { - for (std::unique_ptr &Sym : Symbols) - if (SectionBase *To = FromTo.lookup(Sym->DefinedIn)) - Sym->DefinedIn = To; -} - -Error SymbolTableSection::initialize(SectionTableRef SecTable) { - Size = 0; - Expected Sec = - SecTable.getSectionOfType( - Link, - "Symbol table has link index of " + Twine(Link) + - " which is not a valid index", - "Symbol table has link index of " + Twine(Link) + - " which is not a string table"); - if (!Sec) - return Sec.takeError(); - - setStrTab(*Sec); - return Error::success(); -} - -void SymbolTableSection::finalize() { - uint32_t MaxLocalIndex = 0; - for (std::unique_ptr &Sym : Symbols) { - Sym->NameIndex = - SymbolNames == nullptr ? 0 : SymbolNames->findIndex(Sym->Name); - if (Sym->Binding == STB_LOCAL) - MaxLocalIndex = std::max(MaxLocalIndex, Sym->Index); - } - // Now we need to set the Link and Info fields. - Link = SymbolNames == nullptr ? 0 : SymbolNames->Index; - Info = MaxLocalIndex + 1; -} - -void SymbolTableSection::prepareForLayout() { - // Reserve proper amount of space in section index table, so we can - // layout sections correctly. We will fill the table with correct - // indexes later in fillShdnxTable. - if (SectionIndexTable) - SectionIndexTable->reserve(Symbols.size()); - - // Add all of our strings to SymbolNames so that SymbolNames has the right - // size before layout is decided. - // If the symbol names section has been removed, don't try to add strings to - // the table. - if (SymbolNames != nullptr) - for (std::unique_ptr &Sym : Symbols) - SymbolNames->addString(Sym->Name); -} - -void SymbolTableSection::fillShndxTable() { - if (SectionIndexTable == nullptr) - return; - // Fill section index table with real section indexes. This function must - // be called after assignOffsets. - for (const std::unique_ptr &Sym : Symbols) { - if (Sym->DefinedIn != nullptr && Sym->DefinedIn->Index >= SHN_LORESERVE) - SectionIndexTable->addIndex(Sym->DefinedIn->Index); - else - SectionIndexTable->addIndex(SHN_UNDEF); - } -} - -Expected -SymbolTableSection::getSymbolByIndex(uint32_t Index) const { - if (Symbols.size() <= Index) - return createStringError(errc::invalid_argument, - "invalid symbol index: " + Twine(Index)); - return Symbols[Index].get(); -} - -Expected SymbolTableSection::getSymbolByIndex(uint32_t Index) { - Expected Sym = - static_cast(this)->getSymbolByIndex(Index); - if (!Sym) - return Sym.takeError(); - - return const_cast(*Sym); -} - -template -Error ELFSectionWriter::visit(const SymbolTableSection &Sec) { - Elf_Sym *Sym = reinterpret_cast(Out.getBufferStart() + Sec.Offset); - // Loop though symbols setting each entry of the symbol table. - for (const std::unique_ptr &Symbol : Sec.Symbols) { - Sym->st_name = Symbol->NameIndex; - Sym->st_value = Symbol->Value; - Sym->st_size = Symbol->Size; - Sym->st_other = Symbol->Visibility; - Sym->setBinding(Symbol->Binding); - Sym->setType(Symbol->Type); - Sym->st_shndx = Symbol->getShndx(); - ++Sym; - } - return Error::success(); -} - -Error SymbolTableSection::accept(SectionVisitor &Visitor) const { - return Visitor.visit(*this); -} - -Error SymbolTableSection::accept(MutableSectionVisitor &Visitor) { - return Visitor.visit(*this); -} - -StringRef RelocationSectionBase::getNamePrefix() const { - switch (Type) { - case SHT_REL: - return ".rel"; - case SHT_RELA: - return ".rela"; - default: - llvm_unreachable("not a relocation section"); - } -} - -Error RelocationSection::removeSectionReferences( - bool AllowBrokenLinks, function_ref ToRemove) { - if (ToRemove(Symbols)) { - if (!AllowBrokenLinks) - return createStringError( - llvm::errc::invalid_argument, - "symbol table '%s' cannot be removed because it is " - "referenced by the relocation section '%s'", - Symbols->Name.data(), this->Name.data()); - Symbols = nullptr; - } - - for (const Relocation &R : Relocations) { - if (!R.RelocSymbol || !R.RelocSymbol->DefinedIn || - !ToRemove(R.RelocSymbol->DefinedIn)) - continue; - return createStringError(llvm::errc::invalid_argument, - "section '%s' cannot be removed: (%s+0x%" PRIx64 - ") has relocation against symbol '%s'", - R.RelocSymbol->DefinedIn->Name.data(), - SecToApplyRel->Name.data(), R.Offset, - R.RelocSymbol->Name.c_str()); - } - - return Error::success(); -} - -template -Error RelocSectionWithSymtabBase::initialize( - SectionTableRef SecTable) { - if (Link != SHN_UNDEF) { - Expected Sec = SecTable.getSectionOfType( - Link, - "Link field value " + Twine(Link) + " in section " + Name + - " is invalid", - "Link field value " + Twine(Link) + " in section " + Name + - " is not a symbol table"); - if (!Sec) - return Sec.takeError(); - - setSymTab(*Sec); - } - - if (Info != SHN_UNDEF) { - Expected Sec = - SecTable.getSection(Info, "Info field value " + Twine(Info) + - " in section " + Name + " is invalid"); - if (!Sec) - return Sec.takeError(); - - setSection(*Sec); - } else - setSection(nullptr); - - return Error::success(); -} - -template -void RelocSectionWithSymtabBase::finalize() { - this->Link = Symbols ? Symbols->Index : 0; - - if (SecToApplyRel != nullptr) - this->Info = SecToApplyRel->Index; -} - -template -static void setAddend(Elf_Rel_Impl &, uint64_t) {} - -template -static void setAddend(Elf_Rel_Impl &Rela, uint64_t Addend) { - Rela.r_addend = Addend; -} - -template -static void writeRel(const RelRange &Relocations, T *Buf, bool IsMips64EL) { - for (const auto &Reloc : Relocations) { - Buf->r_offset = Reloc.Offset; - setAddend(*Buf, Reloc.Addend); - Buf->setSymbolAndType(Reloc.RelocSymbol ? Reloc.RelocSymbol->Index : 0, - Reloc.Type, IsMips64EL); - ++Buf; - } -} - -template -Error ELFSectionWriter::visit(const RelocationSection &Sec) { - uint8_t *Buf = reinterpret_cast(Out.getBufferStart()) + Sec.Offset; - if (Sec.Type == SHT_REL) - writeRel(Sec.Relocations, reinterpret_cast(Buf), - Sec.getObject().IsMips64EL); - else - writeRel(Sec.Relocations, reinterpret_cast(Buf), - Sec.getObject().IsMips64EL); - return Error::success(); -} - -Error RelocationSection::accept(SectionVisitor &Visitor) const { - return Visitor.visit(*this); -} - -Error RelocationSection::accept(MutableSectionVisitor &Visitor) { - return Visitor.visit(*this); -} - -Error RelocationSection::removeSymbols( - function_ref ToRemove) { - for (const Relocation &Reloc : Relocations) - if (Reloc.RelocSymbol && ToRemove(*Reloc.RelocSymbol)) - return createStringError( - llvm::errc::invalid_argument, - "not stripping symbol '%s' because it is named in a relocation", - Reloc.RelocSymbol->Name.data()); - return Error::success(); -} - -void RelocationSection::markSymbols() { - for (const Relocation &Reloc : Relocations) - if (Reloc.RelocSymbol) - Reloc.RelocSymbol->Referenced = true; -} - -void RelocationSection::replaceSectionReferences( - const DenseMap &FromTo) { - // Update the target section if it was replaced. - if (SectionBase *To = FromTo.lookup(SecToApplyRel)) - SecToApplyRel = To; -} - -Error SectionWriter::visit(const DynamicRelocationSection &Sec) { - llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset); - return Error::success(); -} - -Error DynamicRelocationSection::accept(SectionVisitor &Visitor) const { - return Visitor.visit(*this); -} - -Error DynamicRelocationSection::accept(MutableSectionVisitor &Visitor) { - return Visitor.visit(*this); -} - -Error DynamicRelocationSection::removeSectionReferences( - bool AllowBrokenLinks, function_ref ToRemove) { - if (ToRemove(Symbols)) { - if (!AllowBrokenLinks) - return createStringError( - llvm::errc::invalid_argument, - "symbol table '%s' cannot be removed because it is " - "referenced by the relocation section '%s'", - Symbols->Name.data(), this->Name.data()); - Symbols = nullptr; - } - - // SecToApplyRel contains a section referenced by sh_info field. It keeps - // a section to which the relocation section applies. When we remove any - // sections we also remove their relocation sections. Since we do that much - // earlier, this assert should never be triggered. - assert(!SecToApplyRel || !ToRemove(SecToApplyRel)); - return Error::success(); -} - -Error Section::removeSectionReferences( - bool AllowBrokenDependency, - function_ref ToRemove) { - if (ToRemove(LinkSection)) { - if (!AllowBrokenDependency) - return createStringError(llvm::errc::invalid_argument, - "section '%s' cannot be removed because it is " - "referenced by the section '%s'", - LinkSection->Name.data(), this->Name.data()); - LinkSection = nullptr; - } - return Error::success(); -} - -void GroupSection::finalize() { - this->Info = Sym ? Sym->Index : 0; - this->Link = SymTab ? SymTab->Index : 0; - // Linker deduplication for GRP_COMDAT is based on Sym->Name. The local/global - // status is not part of the equation. If Sym is localized, the intention is - // likely to make the group fully localized. Drop GRP_COMDAT to suppress - // deduplication. See https://groups.google.com/g/generic-abi/c/2X6mR-s2zoc - if ((FlagWord & GRP_COMDAT) && Sym && Sym->Binding == STB_LOCAL) - this->FlagWord &= ~GRP_COMDAT; -} - -Error GroupSection::removeSectionReferences( - bool AllowBrokenLinks, function_ref ToRemove) { - if (ToRemove(SymTab)) { - if (!AllowBrokenLinks) - return createStringError( - llvm::errc::invalid_argument, - "section '.symtab' cannot be removed because it is " - "referenced by the group section '%s'", - this->Name.data()); - SymTab = nullptr; - Sym = nullptr; - } - llvm::erase_if(GroupMembers, ToRemove); - return Error::success(); -} - -Error GroupSection::removeSymbols(function_ref ToRemove) { - if (ToRemove(*Sym)) - return createStringError(llvm::errc::invalid_argument, - "symbol '%s' cannot be removed because it is " - "referenced by the section '%s[%d]'", - Sym->Name.data(), this->Name.data(), this->Index); - return Error::success(); -} - -void GroupSection::markSymbols() { - if (Sym) - Sym->Referenced = true; -} - -void GroupSection::replaceSectionReferences( - const DenseMap &FromTo) { - for (SectionBase *&Sec : GroupMembers) - if (SectionBase *To = FromTo.lookup(Sec)) - Sec = To; -} - -void GroupSection::onRemove() { - // As the header section of the group is removed, drop the Group flag in its - // former members. - for (SectionBase *Sec : GroupMembers) - Sec->Flags &= ~SHF_GROUP; -} - -Error Section::initialize(SectionTableRef SecTable) { - if (Link == ELF::SHN_UNDEF) - return Error::success(); - - Expected Sec = - SecTable.getSection(Link, "Link field value " + Twine(Link) + - " in section " + Name + " is invalid"); - if (!Sec) - return Sec.takeError(); - - LinkSection = *Sec; - - if (LinkSection->Type == ELF::SHT_SYMTAB) - LinkSection = nullptr; - - return Error::success(); -} - -void Section::finalize() { this->Link = LinkSection ? LinkSection->Index : 0; } - -void GnuDebugLinkSection::init(StringRef File) { - FileName = sys::path::filename(File); - // The format for the .gnu_debuglink starts with the file name and is - // followed by a null terminator and then the CRC32 of the file. The CRC32 - // should be 4 byte aligned. So we add the FileName size, a 1 for the null - // byte, and then finally push the size to alignment and add 4. - Size = alignTo(FileName.size() + 1, 4) + 4; - // The CRC32 will only be aligned if we align the whole section. - Align = 4; - Type = OriginalType = ELF::SHT_PROGBITS; - Name = ".gnu_debuglink"; - // For sections not found in segments, OriginalOffset is only used to - // establish the order that sections should go in. By using the maximum - // possible offset we cause this section to wind up at the end. - OriginalOffset = std::numeric_limits::max(); -} - -GnuDebugLinkSection::GnuDebugLinkSection(StringRef File, - uint32_t PrecomputedCRC) - : FileName(File), CRC32(PrecomputedCRC) { - init(File); -} - -template -Error ELFSectionWriter::visit(const GnuDebugLinkSection &Sec) { - unsigned char *Buf = - reinterpret_cast(Out.getBufferStart()) + Sec.Offset; - Elf_Word *CRC = - reinterpret_cast(Buf + Sec.Size - sizeof(Elf_Word)); - *CRC = Sec.CRC32; - llvm::copy(Sec.FileName, Buf); - return Error::success(); -} - -Error GnuDebugLinkSection::accept(SectionVisitor &Visitor) const { - return Visitor.visit(*this); -} - -Error GnuDebugLinkSection::accept(MutableSectionVisitor &Visitor) { - return Visitor.visit(*this); -} - -template -Error ELFSectionWriter::visit(const GroupSection &Sec) { - ELF::Elf32_Word *Buf = - reinterpret_cast(Out.getBufferStart() + Sec.Offset); - support::endian::write32(Buf++, Sec.FlagWord); - for (SectionBase *S : Sec.GroupMembers) - support::endian::write32(Buf++, S->Index); - return Error::success(); -} - -Error GroupSection::accept(SectionVisitor &Visitor) const { - return Visitor.visit(*this); -} - -Error GroupSection::accept(MutableSectionVisitor &Visitor) { - return Visitor.visit(*this); -} - -// Returns true IFF a section is wholly inside the range of a segment -static bool sectionWithinSegment(const SectionBase &Sec, const Segment &Seg) { - // If a section is empty it should be treated like it has a size of 1. This is - // to clarify the case when an empty section lies on a boundary between two - // segments and ensures that the section "belongs" to the second segment and - // not the first. - uint64_t SecSize = Sec.Size ? Sec.Size : 1; - - // Ignore just added sections. - if (Sec.OriginalOffset == std::numeric_limits::max()) - return false; - - if (Sec.Type == SHT_NOBITS) { - if (!(Sec.Flags & SHF_ALLOC)) - return false; - - bool SectionIsTLS = Sec.Flags & SHF_TLS; - bool SegmentIsTLS = Seg.Type == PT_TLS; - if (SectionIsTLS != SegmentIsTLS) - return false; - - return Seg.VAddr <= Sec.Addr && - Seg.VAddr + Seg.MemSize >= Sec.Addr + SecSize; - } - - return Seg.Offset <= Sec.OriginalOffset && - Seg.Offset + Seg.FileSize >= Sec.OriginalOffset + SecSize; -} - -// Returns true IFF a segment's original offset is inside of another segment's -// range. -static bool segmentOverlapsSegment(const Segment &Child, - const Segment &Parent) { - - return Parent.OriginalOffset <= Child.OriginalOffset && - Parent.OriginalOffset + Parent.FileSize > Child.OriginalOffset; -} - -static bool compareSegmentsByOffset(const Segment *A, const Segment *B) { - // Any segment without a parent segment should come before a segment - // that has a parent segment. - if (A->OriginalOffset < B->OriginalOffset) - return true; - if (A->OriginalOffset > B->OriginalOffset) - return false; - return A->Index < B->Index; -} - -void BasicELFBuilder::initFileHeader() { - Obj->Flags = 0x0; - Obj->Type = ET_REL; - Obj->OSABI = ELFOSABI_NONE; - Obj->ABIVersion = 0; - Obj->Entry = 0x0; - Obj->Machine = EM_NONE; - Obj->Version = 1; -} - -void BasicELFBuilder::initHeaderSegment() { Obj->ElfHdrSegment.Index = 0; } - -StringTableSection *BasicELFBuilder::addStrTab() { - auto &StrTab = Obj->addSection(); - StrTab.Name = ".strtab"; - - Obj->SectionNames = &StrTab; - return &StrTab; -} - -SymbolTableSection *BasicELFBuilder::addSymTab(StringTableSection *StrTab) { - auto &SymTab = Obj->addSection(); - - SymTab.Name = ".symtab"; - SymTab.Link = StrTab->Index; - - // The symbol table always needs a null symbol - SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0); - - Obj->SymbolTable = &SymTab; - return &SymTab; -} - -Error BasicELFBuilder::initSections() { - for (SectionBase &Sec : Obj->sections()) - if (Error Err = Sec.initialize(Obj->sections())) - return Err; - - return Error::success(); -} - -void BinaryELFBuilder::addData(SymbolTableSection *SymTab) { - auto Data = ArrayRef( - reinterpret_cast(MemBuf->getBufferStart()), - MemBuf->getBufferSize()); - auto &DataSection = Obj->addSection
(Data); - DataSection.Name = ".data"; - DataSection.Type = ELF::SHT_PROGBITS; - DataSection.Size = Data.size(); - DataSection.Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE; - - std::string SanitizedFilename = MemBuf->getBufferIdentifier().str(); - std::replace_if( - std::begin(SanitizedFilename), std::end(SanitizedFilename), - [](char C) { return !isAlnum(C); }, '_'); - Twine Prefix = Twine("_binary_") + SanitizedFilename; - - SymTab->addSymbol(Prefix + "_start", STB_GLOBAL, STT_NOTYPE, &DataSection, - /*Value=*/0, NewSymbolVisibility, 0, 0); - SymTab->addSymbol(Prefix + "_end", STB_GLOBAL, STT_NOTYPE, &DataSection, - /*Value=*/DataSection.Size, NewSymbolVisibility, 0, 0); - SymTab->addSymbol(Prefix + "_size", STB_GLOBAL, STT_NOTYPE, nullptr, - /*Value=*/DataSection.Size, NewSymbolVisibility, SHN_ABS, - 0); -} - -Expected> BinaryELFBuilder::build() { - initFileHeader(); - initHeaderSegment(); - - SymbolTableSection *SymTab = addSymTab(addStrTab()); - if (Error Err = initSections()) - return std::move(Err); - addData(SymTab); - - return std::move(Obj); -} - -// Adds sections from IHEX data file. Data should have been -// fully validated by this time. -void IHexELFBuilder::addDataSections() { - OwnedDataSection *Section = nullptr; - uint64_t SegmentAddr = 0, BaseAddr = 0; - uint32_t SecNo = 1; - - for (const IHexRecord &R : Records) { - uint64_t RecAddr; - switch (R.Type) { - case IHexRecord::Data: - // Ignore empty data records - if (R.HexData.empty()) - continue; - RecAddr = R.Addr + SegmentAddr + BaseAddr; - if (!Section || Section->Addr + Section->Size != RecAddr) { - // OriginalOffset field is only used to sort sections before layout, so - // instead of keeping track of real offsets in IHEX file, and as - // layoutSections() and layoutSectionsForOnlyKeepDebug() use - // llvm::stable_sort(), we can just set it to a constant (zero). - Section = &Obj->addSection( - ".sec" + std::to_string(SecNo), RecAddr, - ELF::SHF_ALLOC | ELF::SHF_WRITE, 0); - SecNo++; - } - Section->appendHexData(R.HexData); - break; - case IHexRecord::EndOfFile: - break; - case IHexRecord::SegmentAddr: - // 20-bit segment address. - SegmentAddr = checkedGetHex(R.HexData) << 4; - break; - case IHexRecord::StartAddr80x86: - case IHexRecord::StartAddr: - Obj->Entry = checkedGetHex(R.HexData); - assert(Obj->Entry <= 0xFFFFFU); - break; - case IHexRecord::ExtendedAddr: - // 16-31 bits of linear base address - BaseAddr = checkedGetHex(R.HexData) << 16; - break; - default: - llvm_unreachable("unknown record type"); - } - } -} - -Expected> IHexELFBuilder::build() { - initFileHeader(); - initHeaderSegment(); - StringTableSection *StrTab = addStrTab(); - addSymTab(StrTab); - if (Error Err = initSections()) - return std::move(Err); - addDataSections(); - - return std::move(Obj); -} - -template -ELFBuilder::ELFBuilder(const ELFObjectFile &ElfObj, Object &Obj, - Optional ExtractPartition) - : ElfFile(ElfObj.getELFFile()), Obj(Obj), - ExtractPartition(ExtractPartition) { - Obj.IsMips64EL = ElfFile.isMips64EL(); -} - -template void ELFBuilder::setParentSegment(Segment &Child) { - for (Segment &Parent : Obj.segments()) { - // Every segment will overlap with itself but we don't want a segment to - // be its own parent so we avoid that situation. - if (&Child != &Parent && segmentOverlapsSegment(Child, Parent)) { - // We want a canonical "most parental" segment but this requires - // inspecting the ParentSegment. - if (compareSegmentsByOffset(&Parent, &Child)) - if (Child.ParentSegment == nullptr || - compareSegmentsByOffset(&Parent, Child.ParentSegment)) { - Child.ParentSegment = &Parent; - } - } - } -} - -template Error ELFBuilder::findEhdrOffset() { - if (!ExtractPartition) - return Error::success(); - - for (const SectionBase &Sec : Obj.sections()) { - if (Sec.Type == SHT_LLVM_PART_EHDR && Sec.Name == *ExtractPartition) { - EhdrOffset = Sec.Offset; - return Error::success(); - } - } - return createStringError(errc::invalid_argument, - "could not find partition named '" + - *ExtractPartition + "'"); -} - -template -Error ELFBuilder::readProgramHeaders(const ELFFile &HeadersFile) { - uint32_t Index = 0; - - Expected::Elf_Phdr_Range> Headers = - HeadersFile.program_headers(); - if (!Headers) - return Headers.takeError(); - - for (const typename ELFFile::Elf_Phdr &Phdr : *Headers) { - if (Phdr.p_offset + Phdr.p_filesz > HeadersFile.getBufSize()) - return createStringError( - errc::invalid_argument, - "program header with offset 0x" + Twine::utohexstr(Phdr.p_offset) + - " and file size 0x" + Twine::utohexstr(Phdr.p_filesz) + - " goes past the end of the file"); - - ArrayRef Data{HeadersFile.base() + Phdr.p_offset, - (size_t)Phdr.p_filesz}; - Segment &Seg = Obj.addSegment(Data); - Seg.Type = Phdr.p_type; - Seg.Flags = Phdr.p_flags; - Seg.OriginalOffset = Phdr.p_offset + EhdrOffset; - Seg.Offset = Phdr.p_offset + EhdrOffset; - Seg.VAddr = Phdr.p_vaddr; - Seg.PAddr = Phdr.p_paddr; - Seg.FileSize = Phdr.p_filesz; - Seg.MemSize = Phdr.p_memsz; - Seg.Align = Phdr.p_align; - Seg.Index = Index++; - for (SectionBase &Sec : Obj.sections()) - if (sectionWithinSegment(Sec, Seg)) { - Seg.addSection(&Sec); - if (!Sec.ParentSegment || Sec.ParentSegment->Offset > Seg.Offset) - Sec.ParentSegment = &Seg; - } - } - - auto &ElfHdr = Obj.ElfHdrSegment; - ElfHdr.Index = Index++; - ElfHdr.OriginalOffset = ElfHdr.Offset = EhdrOffset; - - const typename ELFT::Ehdr &Ehdr = HeadersFile.getHeader(); - auto &PrHdr = Obj.ProgramHdrSegment; - PrHdr.Type = PT_PHDR; - PrHdr.Flags = 0; - // The spec requires us to have p_vaddr % p_align == p_offset % p_align. - // Whereas this works automatically for ElfHdr, here OriginalOffset is - // always non-zero and to ensure the equation we assign the same value to - // VAddr as well. - PrHdr.OriginalOffset = PrHdr.Offset = PrHdr.VAddr = EhdrOffset + Ehdr.e_phoff; - PrHdr.PAddr = 0; - PrHdr.FileSize = PrHdr.MemSize = Ehdr.e_phentsize * Ehdr.e_phnum; - // The spec requires us to naturally align all the fields. - PrHdr.Align = sizeof(Elf_Addr); - PrHdr.Index = Index++; - - // Now we do an O(n^2) loop through the segments in order to match up - // segments. - for (Segment &Child : Obj.segments()) - setParentSegment(Child); - setParentSegment(ElfHdr); - setParentSegment(PrHdr); - - return Error::success(); -} - -template -Error ELFBuilder::initGroupSection(GroupSection *GroupSec) { - if (GroupSec->Align % sizeof(ELF::Elf32_Word) != 0) - return createStringError(errc::invalid_argument, - "invalid alignment " + Twine(GroupSec->Align) + - " of group section '" + GroupSec->Name + "'"); - SectionTableRef SecTable = Obj.sections(); - if (GroupSec->Link != SHN_UNDEF) { - auto SymTab = SecTable.template getSectionOfType( - GroupSec->Link, - "link field value '" + Twine(GroupSec->Link) + "' in section '" + - GroupSec->Name + "' is invalid", - "link field value '" + Twine(GroupSec->Link) + "' in section '" + - GroupSec->Name + "' is not a symbol table"); - if (!SymTab) - return SymTab.takeError(); - - Expected Sym = (*SymTab)->getSymbolByIndex(GroupSec->Info); - if (!Sym) - return createStringError(errc::invalid_argument, - "info field value '" + Twine(GroupSec->Info) + - "' in section '" + GroupSec->Name + - "' is not a valid symbol index"); - GroupSec->setSymTab(*SymTab); - GroupSec->setSymbol(*Sym); - } - if (GroupSec->Contents.size() % sizeof(ELF::Elf32_Word) || - GroupSec->Contents.empty()) - return createStringError(errc::invalid_argument, - "the content of the section " + GroupSec->Name + - " is malformed"); - const ELF::Elf32_Word *Word = - reinterpret_cast(GroupSec->Contents.data()); - const ELF::Elf32_Word *End = - Word + GroupSec->Contents.size() / sizeof(ELF::Elf32_Word); - GroupSec->setFlagWord( - support::endian::read32(Word++)); - for (; Word != End; ++Word) { - uint32_t Index = support::endian::read32(Word); - Expected Sec = SecTable.getSection( - Index, "group member index " + Twine(Index) + " in section '" + - GroupSec->Name + "' is invalid"); - if (!Sec) - return Sec.takeError(); - - GroupSec->addMember(*Sec); - } - - return Error::success(); -} - -template -Error ELFBuilder::initSymbolTable(SymbolTableSection *SymTab) { - Expected Shdr = ElfFile.getSection(SymTab->Index); - if (!Shdr) - return Shdr.takeError(); - - Expected StrTabData = ElfFile.getStringTableForSymtab(**Shdr); - if (!StrTabData) - return StrTabData.takeError(); - - ArrayRef ShndxData; - - Expected::Elf_Sym_Range> Symbols = - ElfFile.symbols(*Shdr); - if (!Symbols) - return Symbols.takeError(); - - for (const typename ELFFile::Elf_Sym &Sym : *Symbols) { - SectionBase *DefSection = nullptr; - - Expected Name = Sym.getName(*StrTabData); - if (!Name) - return Name.takeError(); - - if (Sym.st_shndx == SHN_XINDEX) { - if (SymTab->getShndxTable() == nullptr) - return createStringError(errc::invalid_argument, - "symbol '" + *Name + - "' has index SHN_XINDEX but no " - "SHT_SYMTAB_SHNDX section exists"); - if (ShndxData.data() == nullptr) { - Expected ShndxSec = - ElfFile.getSection(SymTab->getShndxTable()->Index); - if (!ShndxSec) - return ShndxSec.takeError(); - - Expected> Data = - ElfFile.template getSectionContentsAsArray(**ShndxSec); - if (!Data) - return Data.takeError(); - - ShndxData = *Data; - if (ShndxData.size() != Symbols->size()) - return createStringError( - errc::invalid_argument, - "symbol section index table does not have the same number of " - "entries as the symbol table"); - } - Elf_Word Index = ShndxData[&Sym - Symbols->begin()]; - Expected Sec = Obj.sections().getSection( - Index, - "symbol '" + *Name + "' has invalid section index " + Twine(Index)); - if (!Sec) - return Sec.takeError(); - - DefSection = *Sec; - } else if (Sym.st_shndx >= SHN_LORESERVE) { - if (!isValidReservedSectionIndex(Sym.st_shndx, Obj.Machine)) { - return createStringError( - errc::invalid_argument, - "symbol '" + *Name + - "' has unsupported value greater than or equal " - "to SHN_LORESERVE: " + - Twine(Sym.st_shndx)); - } - } else if (Sym.st_shndx != SHN_UNDEF) { - Expected Sec = Obj.sections().getSection( - Sym.st_shndx, "symbol '" + *Name + - "' is defined has invalid section index " + - Twine(Sym.st_shndx)); - if (!Sec) - return Sec.takeError(); - - DefSection = *Sec; - } - - SymTab->addSymbol(*Name, Sym.getBinding(), Sym.getType(), DefSection, - Sym.getValue(), Sym.st_other, Sym.st_shndx, Sym.st_size); - } - - return Error::success(); -} - -template -static void getAddend(uint64_t &, const Elf_Rel_Impl &) {} - -template -static void getAddend(uint64_t &ToSet, const Elf_Rel_Impl &Rela) { - ToSet = Rela.r_addend; -} - -template -static Error initRelocations(RelocationSection *Relocs, T RelRange) { - for (const auto &Rel : RelRange) { - Relocation ToAdd; - ToAdd.Offset = Rel.r_offset; - getAddend(ToAdd.Addend, Rel); - ToAdd.Type = Rel.getType(Relocs->getObject().IsMips64EL); - - if (uint32_t Sym = Rel.getSymbol(Relocs->getObject().IsMips64EL)) { - if (!Relocs->getObject().SymbolTable) - return createStringError( - errc::invalid_argument, - "'" + Relocs->Name + "': relocation references symbol with index " + - Twine(Sym) + ", but there is no symbol table"); - Expected SymByIndex = - Relocs->getObject().SymbolTable->getSymbolByIndex(Sym); - if (!SymByIndex) - return SymByIndex.takeError(); - - ToAdd.RelocSymbol = *SymByIndex; - } - - Relocs->addRelocation(ToAdd); - } - - return Error::success(); -} - -Expected SectionTableRef::getSection(uint32_t Index, - Twine ErrMsg) { - if (Index == SHN_UNDEF || Index > Sections.size()) - return createStringError(errc::invalid_argument, ErrMsg); - return Sections[Index - 1].get(); -} - -template -Expected SectionTableRef::getSectionOfType(uint32_t Index, - Twine IndexErrMsg, - Twine TypeErrMsg) { - Expected BaseSec = getSection(Index, IndexErrMsg); - if (!BaseSec) - return BaseSec.takeError(); - - if (T *Sec = dyn_cast(*BaseSec)) - return Sec; - - return createStringError(errc::invalid_argument, TypeErrMsg); -} - -template -Expected ELFBuilder::makeSection(const Elf_Shdr &Shdr) { - switch (Shdr.sh_type) { - case SHT_REL: - case SHT_RELA: - if (Shdr.sh_flags & SHF_ALLOC) { - if (Expected> Data = ElfFile.getSectionContents(Shdr)) - return Obj.addSection(*Data); - else - return Data.takeError(); - } - return Obj.addSection(Obj); - case SHT_STRTAB: - // If a string table is allocated we don't want to mess with it. That would - // mean altering the memory image. There are no special link types or - // anything so we can just use a Section. - if (Shdr.sh_flags & SHF_ALLOC) { - if (Expected> Data = ElfFile.getSectionContents(Shdr)) - return Obj.addSection
(*Data); - else - return Data.takeError(); - } - return Obj.addSection(); - case SHT_HASH: - case SHT_GNU_HASH: - // Hash tables should refer to SHT_DYNSYM which we're not going to change. - // Because of this we don't need to mess with the hash tables either. - if (Expected> Data = ElfFile.getSectionContents(Shdr)) - return Obj.addSection
(*Data); - else - return Data.takeError(); - case SHT_GROUP: - if (Expected> Data = ElfFile.getSectionContents(Shdr)) - return Obj.addSection(*Data); - else - return Data.takeError(); - case SHT_DYNSYM: - if (Expected> Data = ElfFile.getSectionContents(Shdr)) - return Obj.addSection(*Data); - else - return Data.takeError(); - case SHT_DYNAMIC: - if (Expected> Data = ElfFile.getSectionContents(Shdr)) - return Obj.addSection(*Data); - else - return Data.takeError(); - case SHT_SYMTAB: { - auto &SymTab = Obj.addSection(); - Obj.SymbolTable = &SymTab; - return SymTab; - } - case SHT_SYMTAB_SHNDX: { - auto &ShndxSection = Obj.addSection(); - Obj.SectionIndexTable = &ShndxSection; - return ShndxSection; - } - case SHT_NOBITS: - return Obj.addSection
(ArrayRef()); - default: { - Expected> Data = ElfFile.getSectionContents(Shdr); - if (!Data) - return Data.takeError(); - - Expected Name = ElfFile.getSectionName(Shdr); - if (!Name) - return Name.takeError(); - - if (Name->startswith(".zdebug") || (Shdr.sh_flags & ELF::SHF_COMPRESSED)) { - uint64_t DecompressedSize, DecompressedAlign; - std::tie(DecompressedSize, DecompressedAlign) = - getDecompressedSizeAndAlignment(*Data); - Expected NewSection = - CompressedSection::create(*Data, DecompressedSize, DecompressedAlign); - if (!NewSection) - return NewSection.takeError(); - - return Obj.addSection(std::move(*NewSection)); - } - - return Obj.addSection
(*Data); - } - } -} - -template Error ELFBuilder::readSectionHeaders() { - uint32_t Index = 0; - Expected::Elf_Shdr_Range> Sections = - ElfFile.sections(); - if (!Sections) - return Sections.takeError(); - - for (const typename ELFFile::Elf_Shdr &Shdr : *Sections) { - if (Index == 0) { - ++Index; - continue; - } - Expected Sec = makeSection(Shdr); - if (!Sec) - return Sec.takeError(); - - Expected SecName = ElfFile.getSectionName(Shdr); - if (!SecName) - return SecName.takeError(); - Sec->Name = SecName->str(); - Sec->Type = Sec->OriginalType = Shdr.sh_type; - Sec->Flags = Sec->OriginalFlags = Shdr.sh_flags; - Sec->Addr = Shdr.sh_addr; - Sec->Offset = Shdr.sh_offset; - Sec->OriginalOffset = Shdr.sh_offset; - Sec->Size = Shdr.sh_size; - Sec->Link = Shdr.sh_link; - Sec->Info = Shdr.sh_info; - Sec->Align = Shdr.sh_addralign; - Sec->EntrySize = Shdr.sh_entsize; - Sec->Index = Index++; - Sec->OriginalIndex = Sec->Index; - Sec->OriginalData = - ArrayRef(ElfFile.base() + Shdr.sh_offset, - (Shdr.sh_type == SHT_NOBITS) ? (size_t)0 : Shdr.sh_size); - } - - return Error::success(); -} - -template Error ELFBuilder::readSections(bool EnsureSymtab) { - uint32_t ShstrIndex = ElfFile.getHeader().e_shstrndx; - if (ShstrIndex == SHN_XINDEX) { - Expected Sec = ElfFile.getSection(0); - if (!Sec) - return Sec.takeError(); - - ShstrIndex = (*Sec)->sh_link; - } - - if (ShstrIndex == SHN_UNDEF) - Obj.HadShdrs = false; - else { - Expected Sec = - Obj.sections().template getSectionOfType( - ShstrIndex, - "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " + - " is invalid", - "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " + - " does not reference a string table"); - if (!Sec) - return Sec.takeError(); - - Obj.SectionNames = *Sec; - } - - // If a section index table exists we'll need to initialize it before we - // initialize the symbol table because the symbol table might need to - // reference it. - if (Obj.SectionIndexTable) - if (Error Err = Obj.SectionIndexTable->initialize(Obj.sections())) - return Err; - - // Now that all of the sections have been added we can fill out some extra - // details about symbol tables. We need the symbol table filled out before - // any relocations. - if (Obj.SymbolTable) { - if (Error Err = Obj.SymbolTable->initialize(Obj.sections())) - return Err; - if (Error Err = initSymbolTable(Obj.SymbolTable)) - return Err; - } else if (EnsureSymtab) { - if (Error Err = Obj.addNewSymbolTable()) - return Err; - } - - // Now that all sections and symbols have been added we can add - // relocations that reference symbols and set the link and info fields for - // relocation sections. - for (SectionBase &Sec : Obj.sections()) { - if (&Sec == Obj.SymbolTable) - continue; - if (Error Err = Sec.initialize(Obj.sections())) - return Err; - if (auto RelSec = dyn_cast(&Sec)) { - Expected::Elf_Shdr_Range> Sections = - ElfFile.sections(); - if (!Sections) - return Sections.takeError(); - - const typename ELFFile::Elf_Shdr *Shdr = - Sections->begin() + RelSec->Index; - if (RelSec->Type == SHT_REL) { - Expected::Elf_Rel_Range> Rels = - ElfFile.rels(*Shdr); - if (!Rels) - return Rels.takeError(); - - if (Error Err = initRelocations(RelSec, *Rels)) - return Err; - } else { - Expected::Elf_Rela_Range> Relas = - ElfFile.relas(*Shdr); - if (!Relas) - return Relas.takeError(); - - if (Error Err = initRelocations(RelSec, *Relas)) - return Err; - } - } else if (auto GroupSec = dyn_cast(&Sec)) { - if (Error Err = initGroupSection(GroupSec)) - return Err; - } - } - - return Error::success(); -} - -template Error ELFBuilder::build(bool EnsureSymtab) { - if (Error E = readSectionHeaders()) - return E; - if (Error E = findEhdrOffset()) - return E; - - // The ELFFile whose ELF headers and program headers are copied into the - // output file. Normally the same as ElfFile, but if we're extracting a - // loadable partition it will point to the partition's headers. - Expected> HeadersFile = ELFFile::create(toStringRef( - {ElfFile.base() + EhdrOffset, ElfFile.getBufSize() - EhdrOffset})); - if (!HeadersFile) - return HeadersFile.takeError(); - - const typename ELFFile::Elf_Ehdr &Ehdr = HeadersFile->getHeader(); - Obj.OSABI = Ehdr.e_ident[EI_OSABI]; - Obj.ABIVersion = Ehdr.e_ident[EI_ABIVERSION]; - Obj.Type = Ehdr.e_type; - Obj.Machine = Ehdr.e_machine; - Obj.Version = Ehdr.e_version; - Obj.Entry = Ehdr.e_entry; - Obj.Flags = Ehdr.e_flags; - - if (Error E = readSections(EnsureSymtab)) - return E; - return readProgramHeaders(*HeadersFile); -} - -Writer::~Writer() {} - -Reader::~Reader() {} - -Expected> -BinaryReader::create(bool /*EnsureSymtab*/) const { - return BinaryELFBuilder(MemBuf, NewSymbolVisibility).build(); -} - -Expected> IHexReader::parse() const { - SmallVector Lines; - std::vector Records; - bool HasSections = false; - - MemBuf->getBuffer().split(Lines, '\n'); - Records.reserve(Lines.size()); - for (size_t LineNo = 1; LineNo <= Lines.size(); ++LineNo) { - StringRef Line = Lines[LineNo - 1].trim(); - if (Line.empty()) - continue; - - Expected R = IHexRecord::parse(Line); - if (!R) - return parseError(LineNo, R.takeError()); - if (R->Type == IHexRecord::EndOfFile) - break; - HasSections |= (R->Type == IHexRecord::Data); - Records.push_back(*R); - } - if (!HasSections) - return parseError(-1U, "no sections"); - - return std::move(Records); -} - -Expected> -IHexReader::create(bool /*EnsureSymtab*/) const { - Expected> Records = parse(); - if (!Records) - return Records.takeError(); - - return IHexELFBuilder(*Records).build(); -} - -Expected> ELFReader::create(bool EnsureSymtab) const { - auto Obj = std::make_unique(); - if (auto *O = dyn_cast>(Bin)) { - ELFBuilder Builder(*O, *Obj, ExtractPartition); - if (Error Err = Builder.build(EnsureSymtab)) - return std::move(Err); - return std::move(Obj); - } else if (auto *O = dyn_cast>(Bin)) { - ELFBuilder Builder(*O, *Obj, ExtractPartition); - if (Error Err = Builder.build(EnsureSymtab)) - return std::move(Err); - return std::move(Obj); - } else if (auto *O = dyn_cast>(Bin)) { - ELFBuilder Builder(*O, *Obj, ExtractPartition); - if (Error Err = Builder.build(EnsureSymtab)) - return std::move(Err); - return std::move(Obj); - } else if (auto *O = dyn_cast>(Bin)) { - ELFBuilder Builder(*O, *Obj, ExtractPartition); - if (Error Err = Builder.build(EnsureSymtab)) - return std::move(Err); - return std::move(Obj); - } - return createStringError(errc::invalid_argument, "invalid file type"); -} - -template void ELFWriter::writeEhdr() { - Elf_Ehdr &Ehdr = *reinterpret_cast(Buf->getBufferStart()); - std::fill(Ehdr.e_ident, Ehdr.e_ident + 16, 0); - Ehdr.e_ident[EI_MAG0] = 0x7f; - Ehdr.e_ident[EI_MAG1] = 'E'; - Ehdr.e_ident[EI_MAG2] = 'L'; - Ehdr.e_ident[EI_MAG3] = 'F'; - Ehdr.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32; - Ehdr.e_ident[EI_DATA] = - ELFT::TargetEndianness == support::big ? ELFDATA2MSB : ELFDATA2LSB; - Ehdr.e_ident[EI_VERSION] = EV_CURRENT; - Ehdr.e_ident[EI_OSABI] = Obj.OSABI; - Ehdr.e_ident[EI_ABIVERSION] = Obj.ABIVersion; - - Ehdr.e_type = Obj.Type; - Ehdr.e_machine = Obj.Machine; - Ehdr.e_version = Obj.Version; - Ehdr.e_entry = Obj.Entry; - // We have to use the fully-qualified name llvm::size - // since some compilers complain on ambiguous resolution. - Ehdr.e_phnum = llvm::size(Obj.segments()); - Ehdr.e_phoff = (Ehdr.e_phnum != 0) ? Obj.ProgramHdrSegment.Offset : 0; - Ehdr.e_phentsize = (Ehdr.e_phnum != 0) ? sizeof(Elf_Phdr) : 0; - Ehdr.e_flags = Obj.Flags; - Ehdr.e_ehsize = sizeof(Elf_Ehdr); - if (WriteSectionHeaders && Obj.sections().size() != 0) { - Ehdr.e_shentsize = sizeof(Elf_Shdr); - Ehdr.e_shoff = Obj.SHOff; - // """ - // If the number of sections is greater than or equal to - // SHN_LORESERVE (0xff00), this member has the value zero and the actual - // number of section header table entries is contained in the sh_size field - // of the section header at index 0. - // """ - auto Shnum = Obj.sections().size() + 1; - if (Shnum >= SHN_LORESERVE) - Ehdr.e_shnum = 0; - else - Ehdr.e_shnum = Shnum; - // """ - // If the section name string table section index is greater than or equal - // to SHN_LORESERVE (0xff00), this member has the value SHN_XINDEX (0xffff) - // and the actual index of the section name string table section is - // contained in the sh_link field of the section header at index 0. - // """ - if (Obj.SectionNames->Index >= SHN_LORESERVE) - Ehdr.e_shstrndx = SHN_XINDEX; - else - Ehdr.e_shstrndx = Obj.SectionNames->Index; - } else { - Ehdr.e_shentsize = 0; - Ehdr.e_shoff = 0; - Ehdr.e_shnum = 0; - Ehdr.e_shstrndx = 0; - } -} - -template void ELFWriter::writePhdrs() { - for (auto &Seg : Obj.segments()) - writePhdr(Seg); -} - -template void ELFWriter::writeShdrs() { - // This reference serves to write the dummy section header at the begining - // of the file. It is not used for anything else - Elf_Shdr &Shdr = - *reinterpret_cast(Buf->getBufferStart() + Obj.SHOff); - Shdr.sh_name = 0; - Shdr.sh_type = SHT_NULL; - Shdr.sh_flags = 0; - Shdr.sh_addr = 0; - Shdr.sh_offset = 0; - // See writeEhdr for why we do this. - uint64_t Shnum = Obj.sections().size() + 1; - if (Shnum >= SHN_LORESERVE) - Shdr.sh_size = Shnum; - else - Shdr.sh_size = 0; - // See writeEhdr for why we do this. - if (Obj.SectionNames != nullptr && Obj.SectionNames->Index >= SHN_LORESERVE) - Shdr.sh_link = Obj.SectionNames->Index; - else - Shdr.sh_link = 0; - Shdr.sh_info = 0; - Shdr.sh_addralign = 0; - Shdr.sh_entsize = 0; - - for (SectionBase &Sec : Obj.sections()) - writeShdr(Sec); -} - -template Error ELFWriter::writeSectionData() { - for (SectionBase &Sec : Obj.sections()) - // Segments are responsible for writing their contents, so only write the - // section data if the section is not in a segment. Note that this renders - // sections in segments effectively immutable. - if (Sec.ParentSegment == nullptr) - if (Error Err = Sec.accept(*SecWriter)) - return Err; - - return Error::success(); -} - -template void ELFWriter::writeSegmentData() { - for (Segment &Seg : Obj.segments()) { - size_t Size = std::min(Seg.FileSize, Seg.getContents().size()); - std::memcpy(Buf->getBufferStart() + Seg.Offset, Seg.getContents().data(), - Size); - } - - for (auto it : Obj.getUpdatedSections()) { - SectionBase *Sec = it.first; - ArrayRef Data = it.second; - - auto *Parent = Sec->ParentSegment; - assert(Parent && "This section should've been part of a segment."); - uint64_t Offset = - Sec->OriginalOffset - Parent->OriginalOffset + Parent->Offset; - llvm::copy(Data, Buf->getBufferStart() + Offset); - } - - // Iterate over removed sections and overwrite their old data with zeroes. - for (auto &Sec : Obj.removedSections()) { - Segment *Parent = Sec.ParentSegment; - if (Parent == nullptr || Sec.Type == SHT_NOBITS || Sec.Size == 0) - continue; - uint64_t Offset = - Sec.OriginalOffset - Parent->OriginalOffset + Parent->Offset; - std::memset(Buf->getBufferStart() + Offset, 0, Sec.Size); - } -} - -template -ELFWriter::ELFWriter(Object &Obj, raw_ostream &Buf, bool WSH, - bool OnlyKeepDebug) - : Writer(Obj, Buf), WriteSectionHeaders(WSH && Obj.HadShdrs), - OnlyKeepDebug(OnlyKeepDebug) {} - -Error Object::updateSection(StringRef Name, ArrayRef Data) { - auto It = llvm::find_if(Sections, - [&](const SecPtr &Sec) { return Sec->Name == Name; }); - if (It == Sections.end()) - return createStringError(errc::invalid_argument, "section '%s' not found", - Name.str().c_str()); - - auto *OldSec = It->get(); - if (!OldSec->hasContents()) - return createStringError( - errc::invalid_argument, - "section '%s' can't be updated because it does not have contents", - Name.str().c_str()); - - if (Data.size() > OldSec->Size && OldSec->ParentSegment) - return createStringError(errc::invalid_argument, - "cannot fit data of size %zu into section '%s' " - "with size %zu that is part of a segment", - Data.size(), Name.str().c_str(), OldSec->Size); - - if (!OldSec->ParentSegment) { - *It = std::make_unique(*OldSec, Data); - } else { - // The segment writer will be in charge of updating these contents. - OldSec->Size = Data.size(); - UpdatedSections[OldSec] = Data; - } - - return Error::success(); -} - -Error Object::removeSections( - bool AllowBrokenLinks, std::function ToRemove) { - - auto Iter = std::stable_partition( - std::begin(Sections), std::end(Sections), [=](const SecPtr &Sec) { - if (ToRemove(*Sec)) - return false; - if (auto RelSec = dyn_cast(Sec.get())) { - if (auto ToRelSec = RelSec->getSection()) - return !ToRemove(*ToRelSec); - } - return true; - }); - if (SymbolTable != nullptr && ToRemove(*SymbolTable)) - SymbolTable = nullptr; - if (SectionNames != nullptr && ToRemove(*SectionNames)) - SectionNames = nullptr; - if (SectionIndexTable != nullptr && ToRemove(*SectionIndexTable)) - SectionIndexTable = nullptr; - // Now make sure there are no remaining references to the sections that will - // be removed. Sometimes it is impossible to remove a reference so we emit - // an error here instead. - std::unordered_set RemoveSections; - RemoveSections.reserve(std::distance(Iter, std::end(Sections))); - for (auto &RemoveSec : make_range(Iter, std::end(Sections))) { - for (auto &Segment : Segments) - Segment->removeSection(RemoveSec.get()); - RemoveSec->onRemove(); - RemoveSections.insert(RemoveSec.get()); - } - - // For each section that remains alive, we want to remove the dead references. - // This either might update the content of the section (e.g. remove symbols - // from symbol table that belongs to removed section) or trigger an error if - // a live section critically depends on a section being removed somehow - // (e.g. the removed section is referenced by a relocation). - for (auto &KeepSec : make_range(std::begin(Sections), Iter)) { - if (Error E = KeepSec->removeSectionReferences( - AllowBrokenLinks, [&RemoveSections](const SectionBase *Sec) { - return RemoveSections.find(Sec) != RemoveSections.end(); - })) - return E; - } - - // Transfer removed sections into the Object RemovedSections container for use - // later. - std::move(Iter, Sections.end(), std::back_inserter(RemovedSections)); - // Now finally get rid of them all together. - Sections.erase(Iter, std::end(Sections)); - return Error::success(); -} - -Error Object::replaceSections( - const DenseMap &FromTo) { - auto SectionIndexLess = [](const SecPtr &Lhs, const SecPtr &Rhs) { - return Lhs->Index < Rhs->Index; - }; - assert(llvm::is_sorted(Sections, SectionIndexLess) && - "Sections are expected to be sorted by Index"); - // Set indices of new sections so that they can be later sorted into positions - // of removed ones. - for (auto &I : FromTo) - I.second->Index = I.first->Index; - - // Notify all sections about the replacement. - for (auto &Sec : Sections) - Sec->replaceSectionReferences(FromTo); - - if (Error E = removeSections( - /*AllowBrokenLinks=*/false, - [=](const SectionBase &Sec) { return FromTo.count(&Sec) > 0; })) - return E; - llvm::sort(Sections, SectionIndexLess); - return Error::success(); -} - -Error Object::removeSymbols(function_ref ToRemove) { - if (SymbolTable) - for (const SecPtr &Sec : Sections) - if (Error E = Sec->removeSymbols(ToRemove)) - return E; - return Error::success(); -} - -Error Object::addNewSymbolTable() { - assert(!SymbolTable && "Object must not has a SymbolTable."); - - // Reuse an existing SHT_STRTAB section if it exists. - StringTableSection *StrTab = nullptr; - for (SectionBase &Sec : sections()) { - if (Sec.Type == ELF::SHT_STRTAB && !(Sec.Flags & SHF_ALLOC)) { - StrTab = static_cast(&Sec); - - // Prefer a string table that is not the section header string table, if - // such a table exists. - if (SectionNames != &Sec) - break; - } - } - if (!StrTab) - StrTab = &addSection(); - - SymbolTableSection &SymTab = addSection(); - SymTab.Name = ".symtab"; - SymTab.Link = StrTab->Index; - if (Error Err = SymTab.initialize(sections())) - return Err; - SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0); - - SymbolTable = &SymTab; - - return Error::success(); -} - -// Orders segments such that if x = y->ParentSegment then y comes before x. -static void orderSegments(std::vector &Segments) { - llvm::stable_sort(Segments, compareSegmentsByOffset); -} - -// This function finds a consistent layout for a list of segments starting from -// an Offset. It assumes that Segments have been sorted by orderSegments and -// returns an Offset one past the end of the last segment. -static uint64_t layoutSegments(std::vector &Segments, - uint64_t Offset) { - assert(llvm::is_sorted(Segments, compareSegmentsByOffset)); - // The only way a segment should move is if a section was between two - // segments and that section was removed. If that section isn't in a segment - // then it's acceptable, but not ideal, to simply move it to after the - // segments. So we can simply layout segments one after the other accounting - // for alignment. - for (Segment *Seg : Segments) { - // We assume that segments have been ordered by OriginalOffset and Index - // such that a parent segment will always come before a child segment in - // OrderedSegments. This means that the Offset of the ParentSegment should - // already be set and we can set our offset relative to it. - if (Seg->ParentSegment != nullptr) { - Segment *Parent = Seg->ParentSegment; - Seg->Offset = - Parent->Offset + Seg->OriginalOffset - Parent->OriginalOffset; - } else { - Seg->Offset = - alignTo(Offset, std::max(Seg->Align, 1), Seg->VAddr); - } - Offset = std::max(Offset, Seg->Offset + Seg->FileSize); - } - return Offset; -} - -// This function finds a consistent layout for a list of sections. It assumes -// that the ->ParentSegment of each section has already been laid out. The -// supplied starting Offset is used for the starting offset of any section that -// does not have a ParentSegment. It returns either the offset given if all -// sections had a ParentSegment or an offset one past the last section if there -// was a section that didn't have a ParentSegment. -template -static uint64_t layoutSections(Range Sections, uint64_t Offset) { - // Now the offset of every segment has been set we can assign the offsets - // of each section. For sections that are covered by a segment we should use - // the segment's original offset and the section's original offset to compute - // the offset from the start of the segment. Using the offset from the start - // of the segment we can assign a new offset to the section. For sections not - // covered by segments we can just bump Offset to the next valid location. - // While it is not necessary, layout the sections in the order based on their - // original offsets to resemble the input file as close as possible. - std::vector OutOfSegmentSections; - uint32_t Index = 1; - for (auto &Sec : Sections) { - Sec.Index = Index++; - if (Sec.ParentSegment != nullptr) { - auto Segment = *Sec.ParentSegment; - Sec.Offset = - Segment.Offset + (Sec.OriginalOffset - Segment.OriginalOffset); - } else - OutOfSegmentSections.push_back(&Sec); - } - - llvm::stable_sort(OutOfSegmentSections, - [](const SectionBase *Lhs, const SectionBase *Rhs) { - return Lhs->OriginalOffset < Rhs->OriginalOffset; - }); - for (auto *Sec : OutOfSegmentSections) { - Offset = alignTo(Offset, Sec->Align == 0 ? 1 : Sec->Align); - Sec->Offset = Offset; - if (Sec->Type != SHT_NOBITS) - Offset += Sec->Size; - } - return Offset; -} - -// Rewrite sh_offset after some sections are changed to SHT_NOBITS and thus -// occupy no space in the file. -static uint64_t layoutSectionsForOnlyKeepDebug(Object &Obj, uint64_t Off) { - // The layout algorithm requires the sections to be handled in the order of - // their offsets in the input file, at least inside segments. - std::vector Sections; - Sections.reserve(Obj.sections().size()); - uint32_t Index = 1; - for (auto &Sec : Obj.sections()) { - Sec.Index = Index++; - Sections.push_back(&Sec); - } - llvm::stable_sort(Sections, - [](const SectionBase *Lhs, const SectionBase *Rhs) { - return Lhs->OriginalOffset < Rhs->OriginalOffset; - }); - - for (auto *Sec : Sections) { - auto *FirstSec = Sec->ParentSegment && Sec->ParentSegment->Type == PT_LOAD - ? Sec->ParentSegment->firstSection() - : nullptr; - - // The first section in a PT_LOAD has to have congruent offset and address - // modulo the alignment, which usually equals the maximum page size. - if (FirstSec && FirstSec == Sec) - Off = alignTo(Off, Sec->ParentSegment->Align, Sec->Addr); - - // sh_offset is not significant for SHT_NOBITS sections, but the congruence - // rule must be followed if it is the first section in a PT_LOAD. Do not - // advance Off. - if (Sec->Type == SHT_NOBITS) { - Sec->Offset = Off; - continue; - } - - if (!FirstSec) { - // FirstSec being nullptr generally means that Sec does not have the - // SHF_ALLOC flag. - Off = Sec->Align ? alignTo(Off, Sec->Align) : Off; - } else if (FirstSec != Sec) { - // The offset is relative to the first section in the PT_LOAD segment. Use - // sh_offset for non-SHF_ALLOC sections. - Off = Sec->OriginalOffset - FirstSec->OriginalOffset + FirstSec->Offset; - } - Sec->Offset = Off; - Off += Sec->Size; - } - return Off; -} - -// Rewrite p_offset and p_filesz of non-PT_PHDR segments after sh_offset values -// have been updated. -static uint64_t layoutSegmentsForOnlyKeepDebug(std::vector &Segments, - uint64_t HdrEnd) { - uint64_t MaxOffset = 0; - for (Segment *Seg : Segments) { - if (Seg->Type == PT_PHDR) - continue; - - // The segment offset is generally the offset of the first section. - // - // For a segment containing no section (see sectionWithinSegment), if it has - // a parent segment, copy the parent segment's offset field. This works for - // empty PT_TLS. If no parent segment, use 0: the segment is not useful for - // debugging anyway. - const SectionBase *FirstSec = Seg->firstSection(); - uint64_t Offset = - FirstSec ? FirstSec->Offset - : (Seg->ParentSegment ? Seg->ParentSegment->Offset : 0); - uint64_t FileSize = 0; - for (const SectionBase *Sec : Seg->Sections) { - uint64_t Size = Sec->Type == SHT_NOBITS ? 0 : Sec->Size; - if (Sec->Offset + Size > Offset) - FileSize = std::max(FileSize, Sec->Offset + Size - Offset); - } - - // If the segment includes EHDR and program headers, don't make it smaller - // than the headers. - if (Seg->Offset < HdrEnd && HdrEnd <= Seg->Offset + Seg->FileSize) { - FileSize += Offset - Seg->Offset; - Offset = Seg->Offset; - FileSize = std::max(FileSize, HdrEnd - Offset); - } - - Seg->Offset = Offset; - Seg->FileSize = FileSize; - MaxOffset = std::max(MaxOffset, Offset + FileSize); - } - return MaxOffset; -} - -template void ELFWriter::initEhdrSegment() { - Segment &ElfHdr = Obj.ElfHdrSegment; - ElfHdr.Type = PT_PHDR; - ElfHdr.Flags = 0; - ElfHdr.VAddr = 0; - ElfHdr.PAddr = 0; - ElfHdr.FileSize = ElfHdr.MemSize = sizeof(Elf_Ehdr); - ElfHdr.Align = 0; -} - -template void ELFWriter::assignOffsets() { - // We need a temporary list of segments that has a special order to it - // so that we know that anytime ->ParentSegment is set that segment has - // already had its offset properly set. - std::vector OrderedSegments; - for (Segment &Segment : Obj.segments()) - OrderedSegments.push_back(&Segment); - OrderedSegments.push_back(&Obj.ElfHdrSegment); - OrderedSegments.push_back(&Obj.ProgramHdrSegment); - orderSegments(OrderedSegments); - - uint64_t Offset; - if (OnlyKeepDebug) { - // For --only-keep-debug, the sections that did not preserve contents were - // changed to SHT_NOBITS. We now rewrite sh_offset fields of sections, and - // then rewrite p_offset/p_filesz of program headers. - uint64_t HdrEnd = - sizeof(Elf_Ehdr) + llvm::size(Obj.segments()) * sizeof(Elf_Phdr); - Offset = layoutSectionsForOnlyKeepDebug(Obj, HdrEnd); - Offset = std::max(Offset, - layoutSegmentsForOnlyKeepDebug(OrderedSegments, HdrEnd)); - } else { - // Offset is used as the start offset of the first segment to be laid out. - // Since the ELF Header (ElfHdrSegment) must be at the start of the file, - // we start at offset 0. - Offset = layoutSegments(OrderedSegments, 0); - Offset = layoutSections(Obj.sections(), Offset); - } - // If we need to write the section header table out then we need to align the - // Offset so that SHOffset is valid. - if (WriteSectionHeaders) - Offset = alignTo(Offset, sizeof(Elf_Addr)); - Obj.SHOff = Offset; -} - -template size_t ELFWriter::totalSize() const { - // We already have the section header offset so we can calculate the total - // size by just adding up the size of each section header. - if (!WriteSectionHeaders) - return Obj.SHOff; - size_t ShdrCount = Obj.sections().size() + 1; // Includes null shdr. - return Obj.SHOff + ShdrCount * sizeof(Elf_Shdr); -} - -template Error ELFWriter::write() { - // Segment data must be written first, so that the ELF header and program - // header tables can overwrite it, if covered by a segment. - writeSegmentData(); - writeEhdr(); - writePhdrs(); - if (Error E = writeSectionData()) - return E; - if (WriteSectionHeaders) - writeShdrs(); - - // TODO: Implement direct writing to the output stream (without intermediate - // memory buffer Buf). - Out.write(Buf->getBufferStart(), Buf->getBufferSize()); - return Error::success(); -} - -static Error removeUnneededSections(Object &Obj) { - // We can remove an empty symbol table from non-relocatable objects. - // Relocatable objects typically have relocation sections whose - // sh_link field points to .symtab, so we can't remove .symtab - // even if it is empty. - if (Obj.isRelocatable() || Obj.SymbolTable == nullptr || - !Obj.SymbolTable->empty()) - return Error::success(); - - // .strtab can be used for section names. In such a case we shouldn't - // remove it. - auto *StrTab = Obj.SymbolTable->getStrTab() == Obj.SectionNames - ? nullptr - : Obj.SymbolTable->getStrTab(); - return Obj.removeSections(false, [&](const SectionBase &Sec) { - return &Sec == Obj.SymbolTable || &Sec == StrTab; - }); -} - -template Error ELFWriter::finalize() { - // It could happen that SectionNames has been removed and yet the user wants - // a section header table output. We need to throw an error if a user tries - // to do that. - if (Obj.SectionNames == nullptr && WriteSectionHeaders) - return createStringError(llvm::errc::invalid_argument, - "cannot write section header table because " - "section header string table was removed"); - - if (Error E = removeUnneededSections(Obj)) - return E; - - // We need to assign indexes before we perform layout because we need to know - // if we need large indexes or not. We can assign indexes first and check as - // we go to see if we will actully need large indexes. - bool NeedsLargeIndexes = false; - if (Obj.sections().size() >= SHN_LORESERVE) { - SectionTableRef Sections = Obj.sections(); - // Sections doesn't include the null section header, so account for this - // when skipping the first N sections. - NeedsLargeIndexes = - any_of(drop_begin(Sections, SHN_LORESERVE - 1), - [](const SectionBase &Sec) { return Sec.HasSymbol; }); - // TODO: handle case where only one section needs the large index table but - // only needs it because the large index table hasn't been removed yet. - } - - if (NeedsLargeIndexes) { - // This means we definitely need to have a section index table but if we - // already have one then we should use it instead of making a new one. - if (Obj.SymbolTable != nullptr && Obj.SectionIndexTable == nullptr) { - // Addition of a section to the end does not invalidate the indexes of - // other sections and assigns the correct index to the new section. - auto &Shndx = Obj.addSection(); - Obj.SymbolTable->setShndxTable(&Shndx); - Shndx.setSymTab(Obj.SymbolTable); - } - } else { - // Since we don't need SectionIndexTable we should remove it and all - // references to it. - if (Obj.SectionIndexTable != nullptr) { - // We do not support sections referring to the section index table. - if (Error E = Obj.removeSections(false /*AllowBrokenLinks*/, - [this](const SectionBase &Sec) { - return &Sec == Obj.SectionIndexTable; - })) - return E; - } - } - - // Make sure we add the names of all the sections. Importantly this must be - // done after we decide to add or remove SectionIndexes. - if (Obj.SectionNames != nullptr) - for (const SectionBase &Sec : Obj.sections()) - Obj.SectionNames->addString(Sec.Name); - - initEhdrSegment(); - - // Before we can prepare for layout the indexes need to be finalized. - // Also, the output arch may not be the same as the input arch, so fix up - // size-related fields before doing layout calculations. - uint64_t Index = 0; - auto SecSizer = std::make_unique>(); - for (SectionBase &Sec : Obj.sections()) { - Sec.Index = Index++; - if (Error Err = Sec.accept(*SecSizer)) - return Err; - } - - // The symbol table does not update all other sections on update. For - // instance, symbol names are not added as new symbols are added. This means - // that some sections, like .strtab, don't yet have their final size. - if (Obj.SymbolTable != nullptr) - Obj.SymbolTable->prepareForLayout(); - - // Now that all strings are added we want to finalize string table builders, - // because that affects section sizes which in turn affects section offsets. - for (SectionBase &Sec : Obj.sections()) - if (auto StrTab = dyn_cast(&Sec)) - StrTab->prepareForLayout(); - - assignOffsets(); - - // layoutSections could have modified section indexes, so we need - // to fill the index table after assignOffsets. - if (Obj.SymbolTable != nullptr) - Obj.SymbolTable->fillShndxTable(); - - // Finally now that all offsets and indexes have been set we can finalize any - // remaining issues. - uint64_t Offset = Obj.SHOff + sizeof(Elf_Shdr); - for (SectionBase &Sec : Obj.sections()) { - Sec.HeaderOffset = Offset; - Offset += sizeof(Elf_Shdr); - if (WriteSectionHeaders) - Sec.NameIndex = Obj.SectionNames->findIndex(Sec.Name); - Sec.finalize(); - } - - size_t TotalSize = totalSize(); - Buf = WritableMemoryBuffer::getNewMemBuffer(TotalSize); - if (!Buf) - return createStringError(errc::not_enough_memory, - "failed to allocate memory buffer of " + - Twine::utohexstr(TotalSize) + " bytes"); - - SecWriter = std::make_unique>(*Buf); - return Error::success(); -} - -Error BinaryWriter::write() { - for (const SectionBase &Sec : Obj.allocSections()) - if (Error Err = Sec.accept(*SecWriter)) - return Err; - - // TODO: Implement direct writing to the output stream (without intermediate - // memory buffer Buf). - Out.write(Buf->getBufferStart(), Buf->getBufferSize()); - return Error::success(); -} - -Error BinaryWriter::finalize() { - // Compute the section LMA based on its sh_offset and the containing segment's - // p_offset and p_paddr. Also compute the minimum LMA of all non-empty - // sections as MinAddr. In the output, the contents between address 0 and - // MinAddr will be skipped. - uint64_t MinAddr = UINT64_MAX; - for (SectionBase &Sec : Obj.allocSections()) { - if (Sec.ParentSegment != nullptr) - Sec.Addr = - Sec.Offset - Sec.ParentSegment->Offset + Sec.ParentSegment->PAddr; - if (Sec.Type != SHT_NOBITS && Sec.Size > 0) - MinAddr = std::min(MinAddr, Sec.Addr); - } - - // Now that every section has been laid out we just need to compute the total - // file size. This might not be the same as the offset returned by - // layoutSections, because we want to truncate the last segment to the end of - // its last non-empty section, to match GNU objcopy's behaviour. - TotalSize = 0; - for (SectionBase &Sec : Obj.allocSections()) - if (Sec.Type != SHT_NOBITS && Sec.Size > 0) { - Sec.Offset = Sec.Addr - MinAddr; - TotalSize = std::max(TotalSize, Sec.Offset + Sec.Size); - } - - Buf = WritableMemoryBuffer::getNewMemBuffer(TotalSize); - if (!Buf) - return createStringError(errc::not_enough_memory, - "failed to allocate memory buffer of " + - Twine::utohexstr(TotalSize) + " bytes"); - SecWriter = std::make_unique(*Buf); - return Error::success(); -} - -bool IHexWriter::SectionCompare::operator()(const SectionBase *Lhs, - const SectionBase *Rhs) const { - return (sectionPhysicalAddr(Lhs) & 0xFFFFFFFFU) < - (sectionPhysicalAddr(Rhs) & 0xFFFFFFFFU); -} - -uint64_t IHexWriter::writeEntryPointRecord(uint8_t *Buf) { - IHexLineData HexData; - uint8_t Data[4] = {}; - // We don't write entry point record if entry is zero. - if (Obj.Entry == 0) - return 0; - - if (Obj.Entry <= 0xFFFFFU) { - Data[0] = ((Obj.Entry & 0xF0000U) >> 12) & 0xFF; - support::endian::write(&Data[2], static_cast(Obj.Entry), - support::big); - HexData = IHexRecord::getLine(IHexRecord::StartAddr80x86, 0, Data); - } else { - support::endian::write(Data, static_cast(Obj.Entry), - support::big); - HexData = IHexRecord::getLine(IHexRecord::StartAddr, 0, Data); - } - memcpy(Buf, HexData.data(), HexData.size()); - return HexData.size(); -} - -uint64_t IHexWriter::writeEndOfFileRecord(uint8_t *Buf) { - IHexLineData HexData = IHexRecord::getLine(IHexRecord::EndOfFile, 0, {}); - memcpy(Buf, HexData.data(), HexData.size()); - return HexData.size(); -} - -Error IHexWriter::write() { - IHexSectionWriter Writer(*Buf); - // Write sections. - for (const SectionBase *Sec : Sections) - if (Error Err = Sec->accept(Writer)) - return Err; - - uint64_t Offset = Writer.getBufferOffset(); - // Write entry point address. - Offset += writeEntryPointRecord( - reinterpret_cast(Buf->getBufferStart()) + Offset); - // Write EOF. - Offset += writeEndOfFileRecord( - reinterpret_cast(Buf->getBufferStart()) + Offset); - assert(Offset == TotalSize); - - // TODO: Implement direct writing to the output stream (without intermediate - // memory buffer Buf). - Out.write(Buf->getBufferStart(), Buf->getBufferSize()); - return Error::success(); -} - -Error IHexWriter::checkSection(const SectionBase &Sec) { - uint64_t Addr = sectionPhysicalAddr(&Sec); - if (addressOverflows32bit(Addr) || addressOverflows32bit(Addr + Sec.Size - 1)) - return createStringError( - errc::invalid_argument, - "Section '%s' address range [0x%llx, 0x%llx] is not 32 bit", - Sec.Name.c_str(), Addr, Addr + Sec.Size - 1); - return Error::success(); -} - -Error IHexWriter::finalize() { - // We can't write 64-bit addresses. - if (addressOverflows32bit(Obj.Entry)) - return createStringError(errc::invalid_argument, - "Entry point address 0x%llx overflows 32 bits", - Obj.Entry); - - for (const SectionBase &Sec : Obj.sections()) - if ((Sec.Flags & ELF::SHF_ALLOC) && Sec.Type != ELF::SHT_NOBITS && - Sec.Size > 0) { - if (Error E = checkSection(Sec)) - return E; - Sections.insert(&Sec); - } - - std::unique_ptr EmptyBuffer = - WritableMemoryBuffer::getNewMemBuffer(0); - if (!EmptyBuffer) - return createStringError(errc::not_enough_memory, - "failed to allocate memory buffer of 0 bytes"); - - IHexSectionWriterBase LengthCalc(*EmptyBuffer); - for (const SectionBase *Sec : Sections) - if (Error Err = Sec->accept(LengthCalc)) - return Err; - - // We need space to write section records + StartAddress record - // (if start adress is not zero) + EndOfFile record. - TotalSize = LengthCalc.getBufferOffset() + - (Obj.Entry ? IHexRecord::getLineLength(4) : 0) + - IHexRecord::getLineLength(0); - - Buf = WritableMemoryBuffer::getNewMemBuffer(TotalSize); - if (!Buf) - return createStringError(errc::not_enough_memory, - "failed to allocate memory buffer of " + - Twine::utohexstr(TotalSize) + " bytes"); - - return Error::success(); -} - -namespace llvm { -namespace objcopy { -namespace elf { - -template class ELFBuilder; -template class ELFBuilder; -template class ELFBuilder; -template class ELFBuilder; - -template class ELFWriter; -template class ELFWriter; -template class ELFWriter; -template class ELFWriter; - -} // end namespace elf -} // end namespace objcopy -} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/ELF/Object.h b/llvm/tools/llvm-objcopy/ELF/Object.h deleted file mode 100644 index 681ab8f56381..000000000000 --- a/llvm/tools/llvm-objcopy/ELF/Object.h +++ /dev/null @@ -1,1113 +0,0 @@ -//===- Object.h -------------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_OBJCOPY_OBJECT_H -#define LLVM_TOOLS_OBJCOPY_OBJECT_H - -#include "CommonConfig.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/ELF.h" -#include "llvm/MC/StringTableBuilder.h" -#include "llvm/Object/ELFObjectFile.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/FileOutputBuffer.h" -#include "llvm/Support/MemoryBuffer.h" -#include -#include -#include -#include -#include -#include - -namespace llvm { -enum class DebugCompressionType; -namespace objcopy { -namespace elf { - -class SectionBase; -class Section; -class OwnedDataSection; -class StringTableSection; -class SymbolTableSection; -class RelocationSection; -class DynamicRelocationSection; -class GnuDebugLinkSection; -class GroupSection; -class SectionIndexSection; -class CompressedSection; -class DecompressedSection; -class Segment; -class Object; -struct Symbol; - -class SectionTableRef { - ArrayRef> Sections; - -public: - using iterator = pointee_iterator *>; - - explicit SectionTableRef(ArrayRef> Secs) - : Sections(Secs) {} - SectionTableRef(const SectionTableRef &) = default; - - iterator begin() const { return iterator(Sections.data()); } - iterator end() const { return iterator(Sections.data() + Sections.size()); } - size_t size() const { return Sections.size(); } - - Expected getSection(uint32_t Index, Twine ErrMsg); - - template - Expected getSectionOfType(uint32_t Index, Twine IndexErrMsg, - Twine TypeErrMsg); -}; - -enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE }; - -class SectionVisitor { -public: - virtual ~SectionVisitor() = default; - - virtual Error visit(const Section &Sec) = 0; - virtual Error visit(const OwnedDataSection &Sec) = 0; - virtual Error visit(const StringTableSection &Sec) = 0; - virtual Error visit(const SymbolTableSection &Sec) = 0; - virtual Error visit(const RelocationSection &Sec) = 0; - virtual Error visit(const DynamicRelocationSection &Sec) = 0; - virtual Error visit(const GnuDebugLinkSection &Sec) = 0; - virtual Error visit(const GroupSection &Sec) = 0; - virtual Error visit(const SectionIndexSection &Sec) = 0; - virtual Error visit(const CompressedSection &Sec) = 0; - virtual Error visit(const DecompressedSection &Sec) = 0; -}; - -class MutableSectionVisitor { -public: - virtual ~MutableSectionVisitor() = default; - - virtual Error visit(Section &Sec) = 0; - virtual Error visit(OwnedDataSection &Sec) = 0; - virtual Error visit(StringTableSection &Sec) = 0; - virtual Error visit(SymbolTableSection &Sec) = 0; - virtual Error visit(RelocationSection &Sec) = 0; - virtual Error visit(DynamicRelocationSection &Sec) = 0; - virtual Error visit(GnuDebugLinkSection &Sec) = 0; - virtual Error visit(GroupSection &Sec) = 0; - virtual Error visit(SectionIndexSection &Sec) = 0; - virtual Error visit(CompressedSection &Sec) = 0; - virtual Error visit(DecompressedSection &Sec) = 0; -}; - -class SectionWriter : public SectionVisitor { -protected: - WritableMemoryBuffer &Out; - -public: - virtual ~SectionWriter() = default; - - Error visit(const Section &Sec) override; - Error visit(const OwnedDataSection &Sec) override; - Error visit(const StringTableSection &Sec) override; - Error visit(const DynamicRelocationSection &Sec) override; - virtual Error visit(const SymbolTableSection &Sec) override = 0; - virtual Error visit(const RelocationSection &Sec) override = 0; - virtual Error visit(const GnuDebugLinkSection &Sec) override = 0; - virtual Error visit(const GroupSection &Sec) override = 0; - virtual Error visit(const SectionIndexSection &Sec) override = 0; - virtual Error visit(const CompressedSection &Sec) override = 0; - virtual Error visit(const DecompressedSection &Sec) override = 0; - - explicit SectionWriter(WritableMemoryBuffer &Buf) : Out(Buf) {} -}; - -template class ELFSectionWriter : public SectionWriter { -private: - using Elf_Word = typename ELFT::Word; - using Elf_Rel = typename ELFT::Rel; - using Elf_Rela = typename ELFT::Rela; - using Elf_Sym = typename ELFT::Sym; - -public: - virtual ~ELFSectionWriter() {} - Error visit(const SymbolTableSection &Sec) override; - Error visit(const RelocationSection &Sec) override; - Error visit(const GnuDebugLinkSection &Sec) override; - Error visit(const GroupSection &Sec) override; - Error visit(const SectionIndexSection &Sec) override; - Error visit(const CompressedSection &Sec) override; - Error visit(const DecompressedSection &Sec) override; - - explicit ELFSectionWriter(WritableMemoryBuffer &Buf) : SectionWriter(Buf) {} -}; - -template class ELFSectionSizer : public MutableSectionVisitor { -private: - using Elf_Rel = typename ELFT::Rel; - using Elf_Rela = typename ELFT::Rela; - using Elf_Sym = typename ELFT::Sym; - using Elf_Word = typename ELFT::Word; - using Elf_Xword = typename ELFT::Xword; - -public: - Error visit(Section &Sec) override; - Error visit(OwnedDataSection &Sec) override; - Error visit(StringTableSection &Sec) override; - Error visit(DynamicRelocationSection &Sec) override; - Error visit(SymbolTableSection &Sec) override; - Error visit(RelocationSection &Sec) override; - Error visit(GnuDebugLinkSection &Sec) override; - Error visit(GroupSection &Sec) override; - Error visit(SectionIndexSection &Sec) override; - Error visit(CompressedSection &Sec) override; - Error visit(DecompressedSection &Sec) override; -}; - -#define MAKE_SEC_WRITER_FRIEND \ - friend class SectionWriter; \ - friend class IHexSectionWriterBase; \ - friend class IHexSectionWriter; \ - template friend class ELFSectionWriter; \ - template friend class ELFSectionSizer; - -class BinarySectionWriter : public SectionWriter { -public: - virtual ~BinarySectionWriter() {} - - Error visit(const SymbolTableSection &Sec) override; - Error visit(const RelocationSection &Sec) override; - Error visit(const GnuDebugLinkSection &Sec) override; - Error visit(const GroupSection &Sec) override; - Error visit(const SectionIndexSection &Sec) override; - Error visit(const CompressedSection &Sec) override; - Error visit(const DecompressedSection &Sec) override; - - explicit BinarySectionWriter(WritableMemoryBuffer &Buf) - : SectionWriter(Buf) {} -}; - -using IHexLineData = SmallVector; - -struct IHexRecord { - // Memory address of the record. - uint16_t Addr; - // Record type (see below). - uint16_t Type; - // Record data in hexadecimal form. - StringRef HexData; - - // Helper method to get file length of the record - // including newline character - static size_t getLength(size_t DataSize) { - // :LLAAAATT[DD...DD]CC' - return DataSize * 2 + 11; - } - - // Gets length of line in a file (getLength + CRLF). - static size_t getLineLength(size_t DataSize) { - return getLength(DataSize) + 2; - } - - // Given type, address and data returns line which can - // be written to output file. - static IHexLineData getLine(uint8_t Type, uint16_t Addr, - ArrayRef Data); - - // Parses the line and returns record if possible. - // Line should be trimmed from whitespace characters. - static Expected parse(StringRef Line); - - // Calculates checksum of stringified record representation - // S must NOT contain leading ':' and trailing whitespace - // characters - static uint8_t getChecksum(StringRef S); - - enum Type { - // Contains data and a 16-bit starting address for the data. - // The byte count specifies number of data bytes in the record. - Data = 0, - // Must occur exactly once per file in the last line of the file. - // The data field is empty (thus byte count is 00) and the address - // field is typically 0000. - EndOfFile = 1, - // The data field contains a 16-bit segment base address (thus byte - // count is always 02) compatible with 80x86 real mode addressing. - // The address field (typically 0000) is ignored. The segment address - // from the most recent 02 record is multiplied by 16 and added to each - // subsequent data record address to form the physical starting address - // for the data. This allows addressing up to one megabyte of address - // space. - SegmentAddr = 2, - // or 80x86 processors, specifies the initial content of the CS:IP - // registers. The address field is 0000, the byte count is always 04, - // the first two data bytes are the CS value, the latter two are the - // IP value. - StartAddr80x86 = 3, - // Allows for 32 bit addressing (up to 4GiB). The record's address field - // is ignored (typically 0000) and its byte count is always 02. The two - // data bytes (big endian) specify the upper 16 bits of the 32 bit - // absolute address for all subsequent type 00 records - ExtendedAddr = 4, - // The address field is 0000 (not used) and the byte count is always 04. - // The four data bytes represent a 32-bit address value. In the case of - // 80386 and higher CPUs, this address is loaded into the EIP register. - StartAddr = 5, - // We have no other valid types - InvalidType = 6 - }; -}; - -// Base class for IHexSectionWriter. This class implements writing algorithm, -// but doesn't actually write records. It is used for output buffer size -// calculation in IHexWriter::finalize. -class IHexSectionWriterBase : public BinarySectionWriter { - // 20-bit segment address - uint32_t SegmentAddr = 0; - // Extended linear address - uint32_t BaseAddr = 0; - - // Write segment address corresponding to 'Addr' - uint64_t writeSegmentAddr(uint64_t Addr); - // Write extended linear (base) address corresponding to 'Addr' - uint64_t writeBaseAddr(uint64_t Addr); - -protected: - // Offset in the output buffer - uint64_t Offset = 0; - - void writeSection(const SectionBase *Sec, ArrayRef Data); - virtual void writeData(uint8_t Type, uint16_t Addr, ArrayRef Data); - -public: - explicit IHexSectionWriterBase(WritableMemoryBuffer &Buf) - : BinarySectionWriter(Buf) {} - - uint64_t getBufferOffset() const { return Offset; } - Error visit(const Section &Sec) final; - Error visit(const OwnedDataSection &Sec) final; - Error visit(const StringTableSection &Sec) override; - Error visit(const DynamicRelocationSection &Sec) final; - using BinarySectionWriter::visit; -}; - -// Real IHEX section writer -class IHexSectionWriter : public IHexSectionWriterBase { -public: - IHexSectionWriter(WritableMemoryBuffer &Buf) : IHexSectionWriterBase(Buf) {} - - void writeData(uint8_t Type, uint16_t Addr, ArrayRef Data) override; - Error visit(const StringTableSection &Sec) override; -}; - -class Writer { -protected: - Object &Obj; - std::unique_ptr Buf; - raw_ostream &Out; - -public: - virtual ~Writer(); - virtual Error finalize() = 0; - virtual Error write() = 0; - - Writer(Object &O, raw_ostream &Out) : Obj(O), Out(Out) {} -}; - -template class ELFWriter : public Writer { -private: - using Elf_Addr = typename ELFT::Addr; - using Elf_Shdr = typename ELFT::Shdr; - using Elf_Phdr = typename ELFT::Phdr; - using Elf_Ehdr = typename ELFT::Ehdr; - - void initEhdrSegment(); - - void writeEhdr(); - void writePhdr(const Segment &Seg); - void writeShdr(const SectionBase &Sec); - - void writePhdrs(); - void writeShdrs(); - Error writeSectionData(); - void writeSegmentData(); - - void assignOffsets(); - - std::unique_ptr> SecWriter; - - size_t totalSize() const; - -public: - virtual ~ELFWriter() {} - bool WriteSectionHeaders; - - // For --only-keep-debug, select an alternative section/segment layout - // algorithm. - bool OnlyKeepDebug; - - Error finalize() override; - Error write() override; - ELFWriter(Object &Obj, raw_ostream &Out, bool WSH, bool OnlyKeepDebug); -}; - -class BinaryWriter : public Writer { -private: - std::unique_ptr SecWriter; - - uint64_t TotalSize = 0; - -public: - ~BinaryWriter() {} - Error finalize() override; - Error write() override; - BinaryWriter(Object &Obj, raw_ostream &Out) : Writer(Obj, Out) {} -}; - -class IHexWriter : public Writer { - struct SectionCompare { - bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const; - }; - - std::set Sections; - size_t TotalSize = 0; - - Error checkSection(const SectionBase &Sec); - uint64_t writeEntryPointRecord(uint8_t *Buf); - uint64_t writeEndOfFileRecord(uint8_t *Buf); - -public: - ~IHexWriter() {} - Error finalize() override; - Error write() override; - IHexWriter(Object &Obj, raw_ostream &Out) : Writer(Obj, Out) {} -}; - -class SectionBase { -public: - std::string Name; - Segment *ParentSegment = nullptr; - uint64_t HeaderOffset = 0; - uint32_t Index = 0; - - uint32_t OriginalIndex = 0; - uint64_t OriginalFlags = 0; - uint64_t OriginalType = ELF::SHT_NULL; - uint64_t OriginalOffset = std::numeric_limits::max(); - - uint64_t Addr = 0; - uint64_t Align = 1; - uint32_t EntrySize = 0; - uint64_t Flags = 0; - uint64_t Info = 0; - uint64_t Link = ELF::SHN_UNDEF; - uint64_t NameIndex = 0; - uint64_t Offset = 0; - uint64_t Size = 0; - uint64_t Type = ELF::SHT_NULL; - ArrayRef OriginalData; - bool HasSymbol = false; - - SectionBase() = default; - SectionBase(const SectionBase &) = default; - - virtual ~SectionBase() = default; - - virtual Error initialize(SectionTableRef SecTable); - virtual void finalize(); - // Remove references to these sections. The list of sections must be sorted. - virtual Error - removeSectionReferences(bool AllowBrokenLinks, - function_ref ToRemove); - virtual Error removeSymbols(function_ref ToRemove); - virtual Error accept(SectionVisitor &Visitor) const = 0; - virtual Error accept(MutableSectionVisitor &Visitor) = 0; - virtual void markSymbols(); - virtual void - replaceSectionReferences(const DenseMap &); - virtual bool hasContents() const { return false; } - // Notify the section that it is subject to removal. - virtual void onRemove(); -}; - -class Segment { -private: - struct SectionCompare { - bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const { - // Some sections might have the same address if one of them is empty. To - // fix this we can use the lexicographic ordering on ->Addr and the - // original index. - if (Lhs->OriginalOffset == Rhs->OriginalOffset) - return Lhs->OriginalIndex < Rhs->OriginalIndex; - return Lhs->OriginalOffset < Rhs->OriginalOffset; - } - }; - -public: - uint32_t Type = 0; - uint32_t Flags = 0; - uint64_t Offset = 0; - uint64_t VAddr = 0; - uint64_t PAddr = 0; - uint64_t FileSize = 0; - uint64_t MemSize = 0; - uint64_t Align = 0; - - uint32_t Index = 0; - uint64_t OriginalOffset = 0; - Segment *ParentSegment = nullptr; - ArrayRef Contents; - std::set Sections; - - explicit Segment(ArrayRef Data) : Contents(Data) {} - Segment() = default; - - const SectionBase *firstSection() const { - if (!Sections.empty()) - return *Sections.begin(); - return nullptr; - } - - void removeSection(const SectionBase *Sec) { Sections.erase(Sec); } - void addSection(const SectionBase *Sec) { Sections.insert(Sec); } - - ArrayRef getContents() const { return Contents; } -}; - -class Section : public SectionBase { - MAKE_SEC_WRITER_FRIEND - - ArrayRef Contents; - SectionBase *LinkSection = nullptr; - -public: - explicit Section(ArrayRef Data) : Contents(Data) {} - - Error accept(SectionVisitor &Visitor) const override; - Error accept(MutableSectionVisitor &Visitor) override; - Error removeSectionReferences( - bool AllowBrokenLinks, - function_ref ToRemove) override; - Error initialize(SectionTableRef SecTable) override; - void finalize() override; - bool hasContents() const override { - return Type != ELF::SHT_NOBITS && Type != ELF::SHT_NULL; - } -}; - -class OwnedDataSection : public SectionBase { - MAKE_SEC_WRITER_FRIEND - - std::vector Data; - -public: - OwnedDataSection(StringRef SecName, ArrayRef Data) - : Data(std::begin(Data), std::end(Data)) { - Name = SecName.str(); - Type = OriginalType = ELF::SHT_PROGBITS; - Size = Data.size(); - OriginalOffset = std::numeric_limits::max(); - } - - OwnedDataSection(const Twine &SecName, uint64_t SecAddr, uint64_t SecFlags, - uint64_t SecOff) { - Name = SecName.str(); - Type = OriginalType = ELF::SHT_PROGBITS; - Addr = SecAddr; - Flags = OriginalFlags = SecFlags; - OriginalOffset = SecOff; - } - - OwnedDataSection(SectionBase &S, ArrayRef Data) - : SectionBase(S), Data(std::begin(Data), std::end(Data)) { - Size = Data.size(); - } - - void appendHexData(StringRef HexData); - Error accept(SectionVisitor &Sec) const override; - Error accept(MutableSectionVisitor &Visitor) override; - bool hasContents() const override { return true; } -}; - -class CompressedSection : public SectionBase { - MAKE_SEC_WRITER_FRIEND - - DebugCompressionType CompressionType; - uint64_t DecompressedSize; - uint64_t DecompressedAlign; - SmallVector CompressedData; - -public: - static Expected - create(const SectionBase &Sec, DebugCompressionType CompressionType); - static Expected create(ArrayRef CompressedData, - uint64_t DecompressedSize, - uint64_t DecompressedAlign); - - uint64_t getDecompressedSize() const { return DecompressedSize; } - uint64_t getDecompressedAlign() const { return DecompressedAlign; } - - Error accept(SectionVisitor &Visitor) const override; - Error accept(MutableSectionVisitor &Visitor) override; - - static bool classof(const SectionBase *S) { - return (S->OriginalFlags & ELF::SHF_COMPRESSED) || - (StringRef(S->Name).startswith(".zdebug")); - } - -private: - CompressedSection(const SectionBase &Sec, - DebugCompressionType CompressionType, Error &Err); - CompressedSection(ArrayRef CompressedData, uint64_t DecompressedSize, - uint64_t DecompressedAlign); -}; - -class DecompressedSection : public SectionBase { - MAKE_SEC_WRITER_FRIEND - -public: - explicit DecompressedSection(const CompressedSection &Sec) - : SectionBase(Sec) { - Size = Sec.getDecompressedSize(); - Align = Sec.getDecompressedAlign(); - Flags = OriginalFlags = (Flags & ~ELF::SHF_COMPRESSED); - if (StringRef(Name).startswith(".zdebug")) - Name = "." + Name.substr(2); - } - - Error accept(SectionVisitor &Visitor) const override; - Error accept(MutableSectionVisitor &Visitor) override; -}; - -// There are two types of string tables that can exist, dynamic and not dynamic. -// In the dynamic case the string table is allocated. Changing a dynamic string -// table would mean altering virtual addresses and thus the memory image. So -// dynamic string tables should not have an interface to modify them or -// reconstruct them. This type lets us reconstruct a string table. To avoid -// this class being used for dynamic string tables (which has happened) the -// classof method checks that the particular instance is not allocated. This -// then agrees with the makeSection method used to construct most sections. -class StringTableSection : public SectionBase { - MAKE_SEC_WRITER_FRIEND - - StringTableBuilder StrTabBuilder; - -public: - StringTableSection() : StrTabBuilder(StringTableBuilder::ELF) { - Type = OriginalType = ELF::SHT_STRTAB; - } - - void addString(StringRef Name); - uint32_t findIndex(StringRef Name) const; - void prepareForLayout(); - Error accept(SectionVisitor &Visitor) const override; - Error accept(MutableSectionVisitor &Visitor) override; - - static bool classof(const SectionBase *S) { - if (S->OriginalFlags & ELF::SHF_ALLOC) - return false; - return S->OriginalType == ELF::SHT_STRTAB; - } -}; - -// Symbols have a st_shndx field that normally stores an index but occasionally -// stores a different special value. This enum keeps track of what the st_shndx -// field means. Most of the values are just copies of the special SHN_* values. -// SYMBOL_SIMPLE_INDEX means that the st_shndx is just an index of a section. -enum SymbolShndxType { - SYMBOL_SIMPLE_INDEX = 0, - SYMBOL_ABS = ELF::SHN_ABS, - SYMBOL_COMMON = ELF::SHN_COMMON, - SYMBOL_LOPROC = ELF::SHN_LOPROC, - SYMBOL_AMDGPU_LDS = ELF::SHN_AMDGPU_LDS, - SYMBOL_HEXAGON_SCOMMON = ELF::SHN_HEXAGON_SCOMMON, - SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2, - SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4, - SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8, - SYMBOL_HIPROC = ELF::SHN_HIPROC, - SYMBOL_LOOS = ELF::SHN_LOOS, - SYMBOL_HIOS = ELF::SHN_HIOS, - SYMBOL_XINDEX = ELF::SHN_XINDEX, -}; - -struct Symbol { - uint8_t Binding; - SectionBase *DefinedIn = nullptr; - SymbolShndxType ShndxType; - uint32_t Index; - std::string Name; - uint32_t NameIndex; - uint64_t Size; - uint8_t Type; - uint64_t Value; - uint8_t Visibility; - bool Referenced = false; - - uint16_t getShndx() const; - bool isCommon() const; -}; - -class SectionIndexSection : public SectionBase { - MAKE_SEC_WRITER_FRIEND - -private: - std::vector Indexes; - SymbolTableSection *Symbols = nullptr; - -public: - virtual ~SectionIndexSection() {} - void addIndex(uint32_t Index) { - assert(Size > 0); - Indexes.push_back(Index); - } - - void reserve(size_t NumSymbols) { - Indexes.reserve(NumSymbols); - Size = NumSymbols * 4; - } - void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; } - Error initialize(SectionTableRef SecTable) override; - void finalize() override; - Error accept(SectionVisitor &Visitor) const override; - Error accept(MutableSectionVisitor &Visitor) override; - - SectionIndexSection() { - Name = ".symtab_shndx"; - Align = 4; - EntrySize = 4; - Type = OriginalType = ELF::SHT_SYMTAB_SHNDX; - } -}; - -class SymbolTableSection : public SectionBase { - MAKE_SEC_WRITER_FRIEND - - void setStrTab(StringTableSection *StrTab) { SymbolNames = StrTab; } - void assignIndices(); - -protected: - std::vector> Symbols; - StringTableSection *SymbolNames = nullptr; - SectionIndexSection *SectionIndexTable = nullptr; - - using SymPtr = std::unique_ptr; - -public: - SymbolTableSection() { Type = OriginalType = ELF::SHT_SYMTAB; } - - void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn, - uint64_t Value, uint8_t Visibility, uint16_t Shndx, - uint64_t SymbolSize); - void prepareForLayout(); - // An 'empty' symbol table still contains a null symbol. - bool empty() const { return Symbols.size() == 1; } - void setShndxTable(SectionIndexSection *ShndxTable) { - SectionIndexTable = ShndxTable; - } - const SectionIndexSection *getShndxTable() const { return SectionIndexTable; } - void fillShndxTable(); - const SectionBase *getStrTab() const { return SymbolNames; } - Expected getSymbolByIndex(uint32_t Index) const; - Expected getSymbolByIndex(uint32_t Index); - void updateSymbols(function_ref Callable); - - Error removeSectionReferences( - bool AllowBrokenLinks, - function_ref ToRemove) override; - Error initialize(SectionTableRef SecTable) override; - void finalize() override; - Error accept(SectionVisitor &Visitor) const override; - Error accept(MutableSectionVisitor &Visitor) override; - Error removeSymbols(function_ref ToRemove) override; - void replaceSectionReferences( - const DenseMap &FromTo) override; - - static bool classof(const SectionBase *S) { - return S->OriginalType == ELF::SHT_SYMTAB; - } -}; - -struct Relocation { - Symbol *RelocSymbol = nullptr; - uint64_t Offset; - uint64_t Addend; - uint32_t Type; -}; - -// All relocation sections denote relocations to apply to another section. -// However, some relocation sections use a dynamic symbol table and others use -// a regular symbol table. Because the types of the two symbol tables differ in -// our system (because they should behave differently) we can't uniformly -// represent all relocations with the same base class if we expose an interface -// that mentions the symbol table type. So we split the two base types into two -// different classes, one which handles the section the relocation is applied to -// and another which handles the symbol table type. The symbol table type is -// taken as a type parameter to the class (see RelocSectionWithSymtabBase). -class RelocationSectionBase : public SectionBase { -protected: - SectionBase *SecToApplyRel = nullptr; - -public: - const SectionBase *getSection() const { return SecToApplyRel; } - void setSection(SectionBase *Sec) { SecToApplyRel = Sec; } - - StringRef getNamePrefix() const; - - static bool classof(const SectionBase *S) { - return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA; - } -}; - -// Takes the symbol table type to use as a parameter so that we can deduplicate -// that code between the two symbol table types. -template -class RelocSectionWithSymtabBase : public RelocationSectionBase { - void setSymTab(SymTabType *SymTab) { Symbols = SymTab; } - -protected: - RelocSectionWithSymtabBase() = default; - - SymTabType *Symbols = nullptr; - -public: - Error initialize(SectionTableRef SecTable) override; - void finalize() override; -}; - -class RelocationSection - : public RelocSectionWithSymtabBase { - MAKE_SEC_WRITER_FRIEND - - std::vector Relocations; - const Object &Obj; - -public: - RelocationSection(const Object &O) : Obj(O) {} - void addRelocation(Relocation Rel) { Relocations.push_back(Rel); } - Error accept(SectionVisitor &Visitor) const override; - Error accept(MutableSectionVisitor &Visitor) override; - Error removeSectionReferences( - bool AllowBrokenLinks, - function_ref ToRemove) override; - Error removeSymbols(function_ref ToRemove) override; - void markSymbols() override; - void replaceSectionReferences( - const DenseMap &FromTo) override; - const Object &getObject() const { return Obj; } - - static bool classof(const SectionBase *S) { - if (S->OriginalFlags & ELF::SHF_ALLOC) - return false; - return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA; - } -}; - -// TODO: The way stripping and groups interact is complicated -// and still needs to be worked on. - -class GroupSection : public SectionBase { - MAKE_SEC_WRITER_FRIEND - const SymbolTableSection *SymTab = nullptr; - Symbol *Sym = nullptr; - ELF::Elf32_Word FlagWord; - SmallVector GroupMembers; - -public: - // TODO: Contents is present in several classes of the hierarchy. - // This needs to be refactored to avoid duplication. - ArrayRef Contents; - - explicit GroupSection(ArrayRef Data) : Contents(Data) {} - - void setSymTab(const SymbolTableSection *SymTabSec) { SymTab = SymTabSec; } - void setSymbol(Symbol *S) { Sym = S; } - void setFlagWord(ELF::Elf32_Word W) { FlagWord = W; } - void addMember(SectionBase *Sec) { GroupMembers.push_back(Sec); } - - Error accept(SectionVisitor &) const override; - Error accept(MutableSectionVisitor &Visitor) override; - void finalize() override; - Error removeSectionReferences( - bool AllowBrokenLinks, - function_ref ToRemove) override; - Error removeSymbols(function_ref ToRemove) override; - void markSymbols() override; - void replaceSectionReferences( - const DenseMap &FromTo) override; - void onRemove() override; - - static bool classof(const SectionBase *S) { - return S->OriginalType == ELF::SHT_GROUP; - } -}; - -class DynamicSymbolTableSection : public Section { -public: - explicit DynamicSymbolTableSection(ArrayRef Data) : Section(Data) {} - - static bool classof(const SectionBase *S) { - return S->OriginalType == ELF::SHT_DYNSYM; - } -}; - -class DynamicSection : public Section { -public: - explicit DynamicSection(ArrayRef Data) : Section(Data) {} - - static bool classof(const SectionBase *S) { - return S->OriginalType == ELF::SHT_DYNAMIC; - } -}; - -class DynamicRelocationSection - : public RelocSectionWithSymtabBase { - MAKE_SEC_WRITER_FRIEND - -private: - ArrayRef Contents; - -public: - explicit DynamicRelocationSection(ArrayRef Data) : Contents(Data) {} - - Error accept(SectionVisitor &) const override; - Error accept(MutableSectionVisitor &Visitor) override; - Error removeSectionReferences( - bool AllowBrokenLinks, - function_ref ToRemove) override; - - static bool classof(const SectionBase *S) { - if (!(S->OriginalFlags & ELF::SHF_ALLOC)) - return false; - return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA; - } -}; - -class GnuDebugLinkSection : public SectionBase { - MAKE_SEC_WRITER_FRIEND - -private: - StringRef FileName; - uint32_t CRC32; - - void init(StringRef File); - -public: - // If we add this section from an external source we can use this ctor. - explicit GnuDebugLinkSection(StringRef File, uint32_t PrecomputedCRC); - Error accept(SectionVisitor &Visitor) const override; - Error accept(MutableSectionVisitor &Visitor) override; -}; - -class Reader { -public: - virtual ~Reader(); - virtual Expected> create(bool EnsureSymtab) const = 0; -}; - -using object::Binary; -using object::ELFFile; -using object::ELFObjectFile; -using object::OwningBinary; - -class BasicELFBuilder { -protected: - std::unique_ptr Obj; - - void initFileHeader(); - void initHeaderSegment(); - StringTableSection *addStrTab(); - SymbolTableSection *addSymTab(StringTableSection *StrTab); - Error initSections(); - -public: - BasicELFBuilder() : Obj(std::make_unique()) {} -}; - -class BinaryELFBuilder : public BasicELFBuilder { - MemoryBuffer *MemBuf; - uint8_t NewSymbolVisibility; - void addData(SymbolTableSection *SymTab); - -public: - BinaryELFBuilder(MemoryBuffer *MB, uint8_t NewSymbolVisibility) - : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {} - - Expected> build(); -}; - -class IHexELFBuilder : public BasicELFBuilder { - const std::vector &Records; - - void addDataSections(); - -public: - IHexELFBuilder(const std::vector &Records) : Records(Records) {} - - Expected> build(); -}; - -template class ELFBuilder { -private: - using Elf_Addr = typename ELFT::Addr; - using Elf_Shdr = typename ELFT::Shdr; - using Elf_Word = typename ELFT::Word; - - const ELFFile &ElfFile; - Object &Obj; - size_t EhdrOffset = 0; - Optional ExtractPartition; - - void setParentSegment(Segment &Child); - Error readProgramHeaders(const ELFFile &HeadersFile); - Error initGroupSection(GroupSection *GroupSec); - Error initSymbolTable(SymbolTableSection *SymTab); - Error readSectionHeaders(); - Error readSections(bool EnsureSymtab); - Error findEhdrOffset(); - Expected makeSection(const Elf_Shdr &Shdr); - -public: - ELFBuilder(const ELFObjectFile &ElfObj, Object &Obj, - Optional ExtractPartition); - - Error build(bool EnsureSymtab); -}; - -class BinaryReader : public Reader { - MemoryBuffer *MemBuf; - uint8_t NewSymbolVisibility; - -public: - BinaryReader(MemoryBuffer *MB, const uint8_t NewSymbolVisibility) - : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {} - Expected> create(bool EnsureSymtab) const override; -}; - -class IHexReader : public Reader { - MemoryBuffer *MemBuf; - - Expected> parse() const; - Error parseError(size_t LineNo, Error E) const { - return LineNo == -1U - ? createFileError(MemBuf->getBufferIdentifier(), std::move(E)) - : createFileError(MemBuf->getBufferIdentifier(), LineNo, - std::move(E)); - } - template - Error parseError(size_t LineNo, char const *Fmt, const Ts &... Vals) const { - Error E = createStringError(errc::invalid_argument, Fmt, Vals...); - return parseError(LineNo, std::move(E)); - } - -public: - IHexReader(MemoryBuffer *MB) : MemBuf(MB) {} - - Expected> create(bool EnsureSymtab) const override; -}; - -class ELFReader : public Reader { - Binary *Bin; - Optional ExtractPartition; - -public: - Expected> create(bool EnsureSymtab) const override; - explicit ELFReader(Binary *B, Optional ExtractPartition) - : Bin(B), ExtractPartition(ExtractPartition) {} -}; - -class Object { -private: - using SecPtr = std::unique_ptr; - using SegPtr = std::unique_ptr; - - std::vector Sections; - std::vector Segments; - std::vector RemovedSections; - DenseMap> UpdatedSections; - - static bool sectionIsAlloc(const SectionBase &Sec) { - return Sec.Flags & ELF::SHF_ALLOC; - }; - -public: - template - using ConstRange = iterator_range>::const_iterator>>; - - // It is often the case that the ELF header and the program header table are - // not present in any segment. This could be a problem during file layout, - // because other segments may get assigned an offset where either of the - // two should reside, which will effectively corrupt the resulting binary. - // Other than that we use these segments to track program header offsets - // when they may not follow the ELF header. - Segment ElfHdrSegment; - Segment ProgramHdrSegment; - - uint8_t OSABI; - uint8_t ABIVersion; - uint64_t Entry; - uint64_t SHOff; - uint32_t Type; - uint32_t Machine; - uint32_t Version; - uint32_t Flags; - - bool HadShdrs = true; - bool MustBeRelocatable = false; - StringTableSection *SectionNames = nullptr; - SymbolTableSection *SymbolTable = nullptr; - SectionIndexSection *SectionIndexTable = nullptr; - - bool IsMips64EL = false; - - SectionTableRef sections() const { return SectionTableRef(Sections); } - iterator_range< - filter_iterator::const_iterator>, - decltype(§ionIsAlloc)>> - allocSections() const { - return make_filter_range(make_pointee_range(Sections), sectionIsAlloc); - } - - const auto &getUpdatedSections() const { return UpdatedSections; } - Error updateSection(StringRef Name, ArrayRef Data); - - SectionBase *findSection(StringRef Name) { - auto SecIt = - find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; }); - return SecIt == Sections.end() ? nullptr : SecIt->get(); - } - SectionTableRef removedSections() { return SectionTableRef(RemovedSections); } - - ConstRange segments() const { return make_pointee_range(Segments); } - - Error removeSections(bool AllowBrokenLinks, - std::function ToRemove); - Error replaceSections(const DenseMap &FromTo); - Error removeSymbols(function_ref ToRemove); - template T &addSection(Ts &&... Args) { - auto Sec = std::make_unique(std::forward(Args)...); - auto Ptr = Sec.get(); - MustBeRelocatable |= isa(*Ptr); - Sections.emplace_back(std::move(Sec)); - Ptr->Index = Sections.size(); - return *Ptr; - } - Error addNewSymbolTable(); - Segment &addSegment(ArrayRef Data) { - Segments.emplace_back(std::make_unique(Data)); - return *Segments.back(); - } - bool isRelocatable() const { - return (Type != ELF::ET_DYN && Type != ELF::ET_EXEC) || MustBeRelocatable; - } -}; - -} // end namespace elf -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_TOOLS_OBJCOPY_OBJECT_H diff --git a/llvm/tools/llvm-objcopy/MachO/MachOConfig.h b/llvm/tools/llvm-objcopy/MachO/MachOConfig.h deleted file mode 100644 index 93f9facfcf0b..000000000000 --- a/llvm/tools/llvm-objcopy/MachO/MachOConfig.h +++ /dev/null @@ -1,43 +0,0 @@ -//===- MachOConfig.h --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_OBJCOPY_MACHO_MACHOCONFIG_H -#define LLVM_TOOLS_LLVM_OBJCOPY_MACHO_MACHOCONFIG_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringRef.h" -#include - -namespace llvm { -namespace objcopy { - -// Mach-O specific configuration for copying/stripping a single file. -struct MachOConfig { - // Repeated options - std::vector RPathToAdd; - std::vector RPathToPrepend; - DenseMap RPathsToUpdate; - DenseMap InstallNamesToUpdate; - DenseSet RPathsToRemove; - - // install-name-tool's id option - Optional SharedLibId; - - // Boolean options - bool StripSwiftSymbols = false; - bool KeepUndefined = false; - - // install-name-tool's --delete_all_rpaths - bool RemoveAllRpaths = false; -}; - -} // namespace objcopy -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_OBJCOPY_MACHO_MACHOCONFIG_H diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp deleted file mode 100644 index 6b731abd9ed9..000000000000 --- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp +++ /dev/null @@ -1,441 +0,0 @@ -//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "MachOLayoutBuilder.h" -#include "llvm/Support/Alignment.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; -using namespace llvm::objcopy::macho; - -StringTableBuilder::Kind -MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) { - if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) - return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO; - return Is64Bit ? StringTableBuilder::MachO64Linked - : StringTableBuilder::MachOLinked; -} - -uint32_t MachOLayoutBuilder::computeSizeOfCmds() const { - uint32_t Size = 0; - for (const LoadCommand &LC : O.LoadCommands) { - const MachO::macho_load_command &MLC = LC.MachOLoadCommand; - auto cmd = MLC.load_command_data.cmd; - switch (cmd) { - case MachO::LC_SEGMENT: - Size += sizeof(MachO::segment_command) + - sizeof(MachO::section) * LC.Sections.size(); - continue; - case MachO::LC_SEGMENT_64: - Size += sizeof(MachO::segment_command_64) + - sizeof(MachO::section_64) * LC.Sections.size(); - continue; - } - - switch (cmd) { -#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ - case MachO::LCName: \ - Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \ - break; -#include "llvm/BinaryFormat/MachO.def" -#undef HANDLE_LOAD_COMMAND - } - } - - return Size; -} - -void MachOLayoutBuilder::constructStringTable() { - for (std::unique_ptr &Sym : O.SymTable.Symbols) - StrTableBuilder.add(Sym->Name); - StrTableBuilder.finalize(); -} - -void MachOLayoutBuilder::updateSymbolIndexes() { - uint32_t Index = 0; - for (auto &Symbol : O.SymTable.Symbols) - Symbol->Index = Index++; -} - -// Updates the index and the number of local/external/undefined symbols. -void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) { - assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB); - // Make sure that nlist entries in the symbol table are sorted by the those - // types. The order is: local < defined external < undefined external. - assert(llvm::is_sorted(O.SymTable.Symbols, - [](const std::unique_ptr &A, - const std::unique_ptr &B) { - bool AL = A->isLocalSymbol(), - BL = B->isLocalSymbol(); - if (AL != BL) - return AL; - return !AL && !A->isUndefinedSymbol() && - B->isUndefinedSymbol(); - }) && - "Symbols are not sorted by their types."); - - uint32_t NumLocalSymbols = 0; - auto Iter = O.SymTable.Symbols.begin(); - auto End = O.SymTable.Symbols.end(); - for (; Iter != End; ++Iter) { - if ((*Iter)->isExternalSymbol()) - break; - - ++NumLocalSymbols; - } - - uint32_t NumExtDefSymbols = 0; - for (; Iter != End; ++Iter) { - if ((*Iter)->isUndefinedSymbol()) - break; - - ++NumExtDefSymbols; - } - - MLC.dysymtab_command_data.ilocalsym = 0; - MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols; - MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols; - MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; - MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; - MLC.dysymtab_command_data.nundefsym = - O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols); -} - -// Recomputes and updates offset and size fields in load commands and sections -// since they could be modified. -uint64_t MachOLayoutBuilder::layoutSegments() { - auto HeaderSize = - Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); - const bool IsObjectFile = - O.Header.FileType == MachO::HeaderFileType::MH_OBJECT; - uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0; - for (LoadCommand &LC : O.LoadCommands) { - auto &MLC = LC.MachOLoadCommand; - StringRef Segname; - uint64_t SegmentVmAddr; - uint64_t SegmentVmSize; - switch (MLC.load_command_data.cmd) { - case MachO::LC_SEGMENT: - SegmentVmAddr = MLC.segment_command_data.vmaddr; - SegmentVmSize = MLC.segment_command_data.vmsize; - Segname = StringRef(MLC.segment_command_data.segname, - strnlen(MLC.segment_command_data.segname, - sizeof(MLC.segment_command_data.segname))); - break; - case MachO::LC_SEGMENT_64: - SegmentVmAddr = MLC.segment_command_64_data.vmaddr; - SegmentVmSize = MLC.segment_command_64_data.vmsize; - Segname = StringRef(MLC.segment_command_64_data.segname, - strnlen(MLC.segment_command_64_data.segname, - sizeof(MLC.segment_command_64_data.segname))); - break; - default: - continue; - } - - if (Segname == "__LINKEDIT") { - // We update the __LINKEDIT segment later (in layoutTail). - assert(LC.Sections.empty() && "__LINKEDIT segment has sections"); - LinkEditLoadCommand = &MLC; - continue; - } - - // Update file offsets and sizes of sections. - uint64_t SegOffset = Offset; - uint64_t SegFileSize = 0; - uint64_t VMSize = 0; - for (std::unique_ptr
&Sec : LC.Sections) { - assert(SegmentVmAddr <= Sec->Addr && - "Section's address cannot be smaller than Segment's one"); - uint32_t SectOffset = Sec->Addr - SegmentVmAddr; - if (IsObjectFile) { - if (!Sec->hasValidOffset()) { - Sec->Offset = 0; - } else { - uint64_t PaddingSize = - offsetToAlignment(SegFileSize, Align(1ull << Sec->Align)); - Sec->Offset = SegOffset + SegFileSize + PaddingSize; - Sec->Size = Sec->Content.size(); - SegFileSize += PaddingSize + Sec->Size; - } - } else { - if (!Sec->hasValidOffset()) { - Sec->Offset = 0; - } else { - Sec->Offset = SegOffset + SectOffset; - Sec->Size = Sec->Content.size(); - SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size); - } - } - VMSize = std::max(VMSize, SectOffset + Sec->Size); - } - - if (IsObjectFile) { - Offset += SegFileSize; - } else { - Offset = alignTo(Offset + SegFileSize, PageSize); - SegFileSize = alignTo(SegFileSize, PageSize); - // Use the original vmsize if the segment is __PAGEZERO. - VMSize = - Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize); - } - - switch (MLC.load_command_data.cmd) { - case MachO::LC_SEGMENT: - MLC.segment_command_data.cmdsize = - sizeof(MachO::segment_command) + - sizeof(MachO::section) * LC.Sections.size(); - MLC.segment_command_data.nsects = LC.Sections.size(); - MLC.segment_command_data.fileoff = SegOffset; - MLC.segment_command_data.vmsize = VMSize; - MLC.segment_command_data.filesize = SegFileSize; - break; - case MachO::LC_SEGMENT_64: - MLC.segment_command_64_data.cmdsize = - sizeof(MachO::segment_command_64) + - sizeof(MachO::section_64) * LC.Sections.size(); - MLC.segment_command_64_data.nsects = LC.Sections.size(); - MLC.segment_command_64_data.fileoff = SegOffset; - MLC.segment_command_64_data.vmsize = VMSize; - MLC.segment_command_64_data.filesize = SegFileSize; - break; - } - } - - return Offset; -} - -uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) { - for (LoadCommand &LC : O.LoadCommands) - for (std::unique_ptr
&Sec : LC.Sections) { - Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset; - Sec->NReloc = Sec->Relocations.size(); - Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc; - } - - return Offset; -} - -Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { - // If we are building the layout of an executable or dynamic library - // which does not have any segments other than __LINKEDIT, - // the Offset can be equal to zero by this time. It happens because of the - // convention that in such cases the file offsets specified by LC_SEGMENT - // start with zero (unlike the case of a relocatable object file). - const uint64_t HeaderSize = - Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); - assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) || - Offset >= HeaderSize + O.Header.SizeOfCmds) && - "Incorrect tail offset"); - Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds); - - // The order of LINKEDIT elements is as follows: - // rebase info, binding info, weak binding info, lazy binding info, export - // trie, data-in-code, symbol table, indirect symbol table, symbol table - // strings, code signature. - uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); - uint64_t StartOfLinkEdit = Offset; - uint64_t StartOfRebaseInfo = StartOfLinkEdit; - uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size(); - uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size(); - uint64_t StartOfLazyBindingInfo = - StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size(); - uint64_t StartOfExportTrie = - StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size(); - uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size(); - uint64_t StartOfDyldExportsTrie = - StartOfFunctionStarts + O.FunctionStarts.Data.size(); - uint64_t StartOfChainedFixups = - StartOfDyldExportsTrie + O.ExportsTrie.Data.size(); - uint64_t StartOfDataInCode = - StartOfChainedFixups + O.ChainedFixups.Data.size(); - uint64_t StartOfLinkerOptimizationHint = - StartOfDataInCode + O.DataInCode.Data.size(); - uint64_t StartOfSymbols = - StartOfLinkerOptimizationHint + O.LinkerOptimizationHint.Data.size(); - uint64_t StartOfIndirectSymbols = - StartOfSymbols + NListSize * O.SymTable.Symbols.size(); - uint64_t StartOfSymbolStrings = - StartOfIndirectSymbols + - sizeof(uint32_t) * O.IndirectSymTable.Symbols.size(); - uint64_t StartOfCodeSignature = - StartOfSymbolStrings + StrTableBuilder.getSize(); - uint32_t CodeSignatureSize = 0; - if (O.CodeSignatureCommandIndex) { - StartOfCodeSignature = alignTo(StartOfCodeSignature, 16); - - // Note: These calculations are to be kept in sync with the same - // calculations performed in LLD's CodeSignatureSection. - const uint32_t AllHeadersSize = - alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1, - CodeSignature.Align); - const uint32_t BlockCount = - (StartOfCodeSignature + CodeSignature.BlockSize - 1) / - CodeSignature.BlockSize; - const uint32_t Size = - alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize, - CodeSignature.Align); - - CodeSignature.StartOffset = StartOfCodeSignature; - CodeSignature.AllHeadersSize = AllHeadersSize; - CodeSignature.BlockCount = BlockCount; - CodeSignature.OutputFileName = OutputFileName; - CodeSignature.Size = Size; - CodeSignatureSize = Size; - } - uint64_t LinkEditSize = - StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit; - - // Now we have determined the layout of the contents of the __LINKEDIT - // segment. Update its load command. - if (LinkEditLoadCommand) { - MachO::macho_load_command *MLC = LinkEditLoadCommand; - switch (LinkEditLoadCommand->load_command_data.cmd) { - case MachO::LC_SEGMENT: - MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command); - MLC->segment_command_data.fileoff = StartOfLinkEdit; - MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize); - MLC->segment_command_data.filesize = LinkEditSize; - break; - case MachO::LC_SEGMENT_64: - MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64); - MLC->segment_command_64_data.fileoff = StartOfLinkEdit; - MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize); - MLC->segment_command_64_data.filesize = LinkEditSize; - break; - } - } - - for (LoadCommand &LC : O.LoadCommands) { - auto &MLC = LC.MachOLoadCommand; - auto cmd = MLC.load_command_data.cmd; - switch (cmd) { - case MachO::LC_CODE_SIGNATURE: - MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature; - MLC.linkedit_data_command_data.datasize = CodeSignatureSize; - break; - case MachO::LC_SYMTAB: - MLC.symtab_command_data.symoff = StartOfSymbols; - MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); - MLC.symtab_command_data.stroff = StartOfSymbolStrings; - MLC.symtab_command_data.strsize = StrTableBuilder.getSize(); - break; - case MachO::LC_DYSYMTAB: { - if (MLC.dysymtab_command_data.ntoc != 0 || - MLC.dysymtab_command_data.nmodtab != 0 || - MLC.dysymtab_command_data.nextrefsyms != 0 || - MLC.dysymtab_command_data.nlocrel != 0 || - MLC.dysymtab_command_data.nextrel != 0) - return createStringError(llvm::errc::not_supported, - "shared library is not yet supported"); - - if (!O.IndirectSymTable.Symbols.empty()) { - MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols; - MLC.dysymtab_command_data.nindirectsyms = - O.IndirectSymTable.Symbols.size(); - } - - updateDySymTab(MLC); - break; - } - case MachO::LC_DATA_IN_CODE: - MLC.linkedit_data_command_data.dataoff = StartOfDataInCode; - MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size(); - break; - case MachO::LC_LINKER_OPTIMIZATION_HINT: - MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint; - MLC.linkedit_data_command_data.datasize = - O.LinkerOptimizationHint.Data.size(); - break; - case MachO::LC_FUNCTION_STARTS: - MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts; - MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size(); - break; - case MachO::LC_DYLD_CHAINED_FIXUPS: - MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups; - MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size(); - break; - case MachO::LC_DYLD_EXPORTS_TRIE: - MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie; - MLC.linkedit_data_command_data.datasize = O.ExportsTrie.Data.size(); - break; - case MachO::LC_DYLD_INFO: - case MachO::LC_DYLD_INFO_ONLY: - MLC.dyld_info_command_data.rebase_off = - O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo; - MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size(); - MLC.dyld_info_command_data.bind_off = - O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo; - MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size(); - MLC.dyld_info_command_data.weak_bind_off = - O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo; - MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size(); - MLC.dyld_info_command_data.lazy_bind_off = - O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo; - MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size(); - MLC.dyld_info_command_data.export_off = - O.Exports.Trie.empty() ? 0 : StartOfExportTrie; - MLC.dyld_info_command_data.export_size = O.Exports.Trie.size(); - break; - // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in - // is not an offset in the binary file, instead, it is a - // relative virtual address. At the moment modification of the __TEXT - // segment of executables isn't supported anyway (e.g. data in code entries - // are not recalculated). Moreover, in general - // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because - // without making additional assumptions (e.g. that the entire __TEXT - // segment should be encrypted) we do not know how to recalculate the - // boundaries of the encrypted part. For now just copy over these load - // commands until we encounter a real world usecase where - // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted. - case MachO::LC_ENCRYPTION_INFO: - case MachO::LC_ENCRYPTION_INFO_64: - case MachO::LC_LOAD_DYLINKER: - case MachO::LC_MAIN: - case MachO::LC_RPATH: - case MachO::LC_SEGMENT: - case MachO::LC_SEGMENT_64: - case MachO::LC_VERSION_MIN_MACOSX: - case MachO::LC_VERSION_MIN_IPHONEOS: - case MachO::LC_VERSION_MIN_TVOS: - case MachO::LC_VERSION_MIN_WATCHOS: - case MachO::LC_BUILD_VERSION: - case MachO::LC_ID_DYLIB: - case MachO::LC_LOAD_DYLIB: - case MachO::LC_LOAD_WEAK_DYLIB: - case MachO::LC_UUID: - case MachO::LC_SOURCE_VERSION: - case MachO::LC_THREAD: - case MachO::LC_UNIXTHREAD: - case MachO::LC_SUB_FRAMEWORK: - case MachO::LC_SUB_UMBRELLA: - case MachO::LC_SUB_CLIENT: - case MachO::LC_SUB_LIBRARY: - case MachO::LC_LINKER_OPTION: - // Nothing to update. - break; - default: - // Abort if it's unsupported in order to prevent corrupting the object. - return createStringError(llvm::errc::not_supported, - "unsupported load command (cmd=0x%x)", cmd); - } - } - - return Error::success(); -} - -Error MachOLayoutBuilder::layout() { - O.Header.NCmds = O.LoadCommands.size(); - O.Header.SizeOfCmds = computeSizeOfCmds(); - constructStringTable(); - updateSymbolIndexes(); - uint64_t Offset = layoutSegments(); - Offset = layoutRelocations(Offset); - return layoutTail(Offset); -} diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h deleted file mode 100644 index 44d03b4af7e8..000000000000 --- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h +++ /dev/null @@ -1,97 +0,0 @@ -//===- MachOLayoutBuilder.h -------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H -#define LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H - -#include "MachOObjcopy.h" -#include "Object.h" - -namespace llvm { -namespace objcopy { -namespace macho { - -/// When MachO binaries include a LC_CODE_SIGNATURE load command, -/// the __LINKEDIT data segment will include a section corresponding -/// to the LC_CODE_SIGNATURE load command. This section serves as a signature -/// for the binary. Included in the CodeSignature section is a header followed -/// by a hash of the binary. If present, the CodeSignature section is the -/// last component of the binary. -struct CodeSignatureInfo { - // NOTE: These values are to be kept in sync with those in - // LLD's CodeSignatureSection class. - - static constexpr uint32_t Align = 16; - static constexpr uint8_t BlockSizeShift = 12; - // The binary is read in blocks of the following size. - static constexpr size_t BlockSize = (1 << BlockSizeShift); // 4 KiB - // For each block, a SHA256 hash (256 bits, 32 bytes) is written to - // the CodeSignature section. - static constexpr size_t HashSize = 256 / 8; - static constexpr size_t BlobHeadersSize = llvm::alignTo<8>( - sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex)); - // The size of the entire header depends upon the filename the binary is being - // written to, but the rest of the header is fixed in size. - static constexpr uint32_t FixedHeadersSize = - BlobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory); - - // The offset relative to the start of the binary where - // the CodeSignature section should begin. - uint32_t StartOffset; - // The size of the entire header, output file name size included. - uint32_t AllHeadersSize; - // The number of blocks required to hash the binary. - uint32_t BlockCount; - StringRef OutputFileName; - // The size of the entire CodeSignature section, including both the header and - // hashes. - uint32_t Size; -}; - -class MachOLayoutBuilder { - Object &O; - bool Is64Bit; - StringRef OutputFileName; - uint64_t PageSize; - CodeSignatureInfo CodeSignature; - - // Points to the __LINKEDIT segment if it exists. - MachO::macho_load_command *LinkEditLoadCommand = nullptr; - StringTableBuilder StrTableBuilder; - - uint32_t computeSizeOfCmds() const; - void constructStringTable(); - void updateSymbolIndexes(); - void updateDySymTab(MachO::macho_load_command &MLC); - uint64_t layoutSegments(); - uint64_t layoutRelocations(uint64_t Offset); - Error layoutTail(uint64_t Offset); - - static StringTableBuilder::Kind getStringTableBuilderKind(const Object &O, - bool Is64Bit); - -public: - MachOLayoutBuilder(Object &O, bool Is64Bit, StringRef OutputFileName, - uint64_t PageSize) - : O(O), Is64Bit(Is64Bit), OutputFileName(OutputFileName), - PageSize(PageSize), - StrTableBuilder(getStringTableBuilderKind(O, Is64Bit)) {} - - // Recomputes and updates fields in the given object such as file offsets. - Error layout(); - - StringTableBuilder &getStringTableBuilder() { return StrTableBuilder; } - - const CodeSignatureInfo &getCodeSignature() { return CodeSignature; } -}; - -} // end namespace macho -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp deleted file mode 100644 index 0f92ca516bef..000000000000 --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ /dev/null @@ -1,549 +0,0 @@ -//===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "MachOObjcopy.h" -#include "../llvm-objcopy.h" -#include "CommonConfig.h" -#include "MachO/MachOConfig.h" -#include "MachOReader.h" -#include "MachOWriter.h" -#include "MultiFormatConfig.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/Object/ArchiveWriter.h" -#include "llvm/Object/MachOUniversal.h" -#include "llvm/Object/MachOUniversalWriter.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/FileOutputBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/SmallVectorMemoryBuffer.h" - -using namespace llvm; -using namespace llvm::objcopy; -using namespace llvm::objcopy::macho; -using namespace llvm::object; - -using SectionPred = std::function &Sec)>; -using LoadCommandPred = std::function; - -#ifndef NDEBUG -static bool isLoadCommandWithPayloadString(const LoadCommand &LC) { - // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and - // LC_LAZY_LOAD_DYLIB - return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH || - LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB || - LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB || - LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB; -} -#endif - -static StringRef getPayloadString(const LoadCommand &LC) { - assert(isLoadCommandWithPayloadString(LC) && - "unsupported load command encountered"); - - return StringRef(reinterpret_cast(LC.Payload.data()), - LC.Payload.size()) - .rtrim('\0'); -} - -static Error removeSections(const CommonConfig &Config, Object &Obj) { - SectionPred RemovePred = [](const std::unique_ptr
&) { - return false; - }; - - if (!Config.ToRemove.empty()) { - RemovePred = [&Config, RemovePred](const std::unique_ptr
&Sec) { - return Config.ToRemove.matches(Sec->CanonicalName); - }; - } - - if (Config.StripAll || Config.StripDebug) { - // Remove all debug sections. - RemovePred = [RemovePred](const std::unique_ptr
&Sec) { - if (Sec->Segname == "__DWARF") - return true; - - return RemovePred(Sec); - }; - } - - if (!Config.OnlySection.empty()) { - // Overwrite RemovePred because --only-section takes priority. - RemovePred = [&Config](const std::unique_ptr
&Sec) { - return !Config.OnlySection.matches(Sec->CanonicalName); - }; - } - - return Obj.removeSections(RemovePred); -} - -static void markSymbols(const CommonConfig &, Object &Obj) { - // Symbols referenced from the indirect symbol table must not be removed. - for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols) - if (ISE.Symbol) - (*ISE.Symbol)->Referenced = true; -} - -static void updateAndRemoveSymbols(const CommonConfig &Config, - const MachOConfig &MachOConfig, - Object &Obj) { - for (SymbolEntry &Sym : Obj.SymTable) { - auto I = Config.SymbolsToRename.find(Sym.Name); - if (I != Config.SymbolsToRename.end()) - Sym.Name = std::string(I->getValue()); - } - - auto RemovePred = [Config, MachOConfig, - &Obj](const std::unique_ptr &N) { - if (N->Referenced) - return false; - if (MachOConfig.KeepUndefined && N->isUndefinedSymbol()) - return false; - if (N->n_desc & MachO::REFERENCED_DYNAMICALLY) - return false; - if (Config.StripAll) - return true; - if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT)) - return true; - // This behavior is consistent with cctools' strip. - if (MachOConfig.StripSwiftSymbols && - (Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion && - *Obj.SwiftVersion && N->isSwiftSymbol()) - return true; - return false; - }; - - Obj.SymTable.removeSymbols(RemovePred); -} - -template -static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) { - assert(isLoadCommandWithPayloadString(LC) && - "unsupported load command encountered"); - - uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8); - - LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize; - LC.Payload.assign(NewCmdsize - sizeof(LCType), 0); - std::copy(S.begin(), S.end(), LC.Payload.begin()); -} - -static LoadCommand buildRPathLoadCommand(StringRef Path) { - LoadCommand LC; - MachO::rpath_command RPathLC; - RPathLC.cmd = MachO::LC_RPATH; - RPathLC.path = sizeof(MachO::rpath_command); - RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8); - LC.MachOLoadCommand.rpath_command_data = RPathLC; - LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0); - std::copy(Path.begin(), Path.end(), LC.Payload.begin()); - return LC; -} - -static Error processLoadCommands(const MachOConfig &MachOConfig, Object &Obj) { - // Remove RPaths. - DenseSet RPathsToRemove(MachOConfig.RPathsToRemove.begin(), - MachOConfig.RPathsToRemove.end()); - - LoadCommandPred RemovePred = [&RPathsToRemove, - &MachOConfig](const LoadCommand &LC) { - if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) { - // When removing all RPaths we don't need to care - // about what it contains - if (MachOConfig.RemoveAllRpaths) - return true; - - StringRef RPath = getPayloadString(LC); - if (RPathsToRemove.count(RPath)) { - RPathsToRemove.erase(RPath); - return true; - } - } - return false; - }; - - if (Error E = Obj.removeLoadCommands(RemovePred)) - return E; - - // Emit an error if the Mach-O binary does not contain an rpath path name - // specified in -delete_rpath. - for (StringRef RPath : MachOConfig.RPathsToRemove) { - if (RPathsToRemove.count(RPath)) - return createStringError(errc::invalid_argument, - "no LC_RPATH load command with path: %s", - RPath.str().c_str()); - } - - DenseSet RPaths; - - // Get all existing RPaths. - for (LoadCommand &LC : Obj.LoadCommands) { - if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) - RPaths.insert(getPayloadString(LC)); - } - - // Throw errors for invalid RPaths. - for (const auto &OldNew : MachOConfig.RPathsToUpdate) { - StringRef Old = OldNew.getFirst(); - StringRef New = OldNew.getSecond(); - if (!RPaths.contains(Old)) - return createStringError(errc::invalid_argument, - "no LC_RPATH load command with path: " + Old); - if (RPaths.contains(New)) - return createStringError(errc::invalid_argument, - "rpath '" + New + - "' would create a duplicate load command"); - } - - // Update load commands. - for (LoadCommand &LC : Obj.LoadCommands) { - switch (LC.MachOLoadCommand.load_command_data.cmd) { - case MachO::LC_ID_DYLIB: - if (MachOConfig.SharedLibId) - updateLoadCommandPayloadString( - LC, *MachOConfig.SharedLibId); - break; - - case MachO::LC_RPATH: { - StringRef RPath = getPayloadString(LC); - StringRef NewRPath = MachOConfig.RPathsToUpdate.lookup(RPath); - if (!NewRPath.empty()) - updateLoadCommandPayloadString(LC, NewRPath); - break; - } - - // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB - // here once llvm-objcopy supports them. - case MachO::LC_LOAD_DYLIB: - case MachO::LC_LOAD_WEAK_DYLIB: - StringRef InstallName = getPayloadString(LC); - StringRef NewInstallName = - MachOConfig.InstallNamesToUpdate.lookup(InstallName); - if (!NewInstallName.empty()) - updateLoadCommandPayloadString(LC, - NewInstallName); - break; - } - } - - // Add new RPaths. - for (StringRef RPath : MachOConfig.RPathToAdd) { - if (RPaths.contains(RPath)) - return createStringError(errc::invalid_argument, - "rpath '" + RPath + - "' would create a duplicate load command"); - RPaths.insert(RPath); - Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath)); - } - - for (StringRef RPath : MachOConfig.RPathToPrepend) { - if (RPaths.contains(RPath)) - return createStringError(errc::invalid_argument, - "rpath '" + RPath + - "' would create a duplicate load command"); - - RPaths.insert(RPath); - Obj.LoadCommands.insert(Obj.LoadCommands.begin(), - buildRPathLoadCommand(RPath)); - } - - // Unlike appending rpaths, the indexes of subsequent load commands must - // be recalculated after prepending one. - if (!MachOConfig.RPathToPrepend.empty()) - Obj.updateLoadCommandIndexes(); - - return Error::success(); -} - -static Error dumpSectionToFile(StringRef SecName, StringRef Filename, - Object &Obj) { - for (LoadCommand &LC : Obj.LoadCommands) - for (const std::unique_ptr
&Sec : LC.Sections) { - if (Sec->CanonicalName == SecName) { - Expected> BufferOrErr = - FileOutputBuffer::create(Filename, Sec->Content.size()); - if (!BufferOrErr) - return BufferOrErr.takeError(); - std::unique_ptr Buf = std::move(*BufferOrErr); - llvm::copy(Sec->Content, Buf->getBufferStart()); - - if (Error E = Buf->commit()) - return E; - return Error::success(); - } - } - - return createStringError(object_error::parse_failed, "section '%s' not found", - SecName.str().c_str()); -} - -static Error addSection(StringRef SecName, StringRef Filename, Object &Obj) { - ErrorOr> BufOrErr = - MemoryBuffer::getFile(Filename); - if (!BufOrErr) - return createFileError(Filename, errorCodeToError(BufOrErr.getError())); - std::unique_ptr Buf = std::move(*BufOrErr); - - std::pair Pair = SecName.split(','); - StringRef TargetSegName = Pair.first; - Section Sec(TargetSegName, Pair.second); - Sec.Content = Obj.NewSectionsContents.save(Buf->getBuffer()); - Sec.Size = Sec.Content.size(); - - // Add the a section into an existing segment. - for (LoadCommand &LC : Obj.LoadCommands) { - Optional SegName = LC.getSegmentName(); - if (SegName && SegName == TargetSegName) { - uint64_t Addr = *LC.getSegmentVMAddr(); - for (const std::unique_ptr
&S : LC.Sections) - Addr = std::max(Addr, S->Addr + S->Size); - LC.Sections.push_back(std::make_unique
(Sec)); - LC.Sections.back()->Addr = Addr; - return Error::success(); - } - } - - // There's no segment named TargetSegName. Create a new load command and - // Insert a new section into it. - LoadCommand &NewSegment = - Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384)); - NewSegment.Sections.push_back(std::make_unique
(Sec)); - NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr(); - return Error::success(); -} - -static Expected
findSection(StringRef SecName, Object &O) { - StringRef SegName; - std::tie(SegName, SecName) = SecName.split(","); - auto FoundSeg = - llvm::find_if(O.LoadCommands, [SegName](const LoadCommand &LC) { - return LC.getSegmentName() == SegName; - }); - if (FoundSeg == O.LoadCommands.end()) - return createStringError(errc::invalid_argument, - "could not find segment with name '%s'", - SegName.str().c_str()); - auto FoundSec = llvm::find_if(FoundSeg->Sections, - [SecName](const std::unique_ptr
&Sec) { - return Sec->Sectname == SecName; - }); - if (FoundSec == FoundSeg->Sections.end()) - return createStringError(errc::invalid_argument, - "could not find section with name '%s'", - SecName.str().c_str()); - - assert(FoundSec->get()->CanonicalName == (SegName + "," + SecName).str()); - return *FoundSec->get(); -} - -static Error updateSection(StringRef SecName, StringRef Filename, Object &O) { - Expected
SecToUpdateOrErr = findSection(SecName, O); - - if (!SecToUpdateOrErr) - return SecToUpdateOrErr.takeError(); - Section &Sec = *SecToUpdateOrErr; - - ErrorOr> BufOrErr = - MemoryBuffer::getFile(Filename); - if (!BufOrErr) - return createFileError(Filename, errorCodeToError(BufOrErr.getError())); - std::unique_ptr Buf = std::move(*BufOrErr); - - if (Buf->getBufferSize() > Sec.Size) - return createStringError( - errc::invalid_argument, - "new section cannot be larger than previous section"); - Sec.Content = O.NewSectionsContents.save(Buf->getBuffer()); - Sec.Size = Sec.Content.size(); - return Error::success(); -} - -// isValidMachOCannonicalName returns success if Name is a MachO cannonical name -// (",
") and lengths of both segment and section names are -// valid. -static Error isValidMachOCannonicalName(StringRef Name) { - if (Name.count(',') != 1) - return createStringError(errc::invalid_argument, - "invalid section name '%s' (should be formatted " - "as ',
')", - Name.str().c_str()); - - std::pair Pair = Name.split(','); - if (Pair.first.size() > 16) - return createStringError(errc::invalid_argument, - "too long segment name: '%s'", - Pair.first.str().c_str()); - if (Pair.second.size() > 16) - return createStringError(errc::invalid_argument, - "too long section name: '%s'", - Pair.second.str().c_str()); - return Error::success(); -} - -static Error handleArgs(const CommonConfig &Config, - const MachOConfig &MachOConfig, Object &Obj) { - // Dump sections before add/remove for compatibility with GNU objcopy. - for (StringRef Flag : Config.DumpSection) { - StringRef SectionName; - StringRef FileName; - std::tie(SectionName, FileName) = Flag.split('='); - if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) - return E; - } - - if (Error E = removeSections(Config, Obj)) - return E; - - // Mark symbols to determine which symbols are still needed. - if (Config.StripAll) - markSymbols(Config, Obj); - - updateAndRemoveSymbols(Config, MachOConfig, Obj); - - if (Config.StripAll) - for (LoadCommand &LC : Obj.LoadCommands) - for (std::unique_ptr
&Sec : LC.Sections) - Sec->Relocations.clear(); - - for (const auto &Flag : Config.AddSection) { - std::pair SecPair = Flag.split("="); - StringRef SecName = SecPair.first; - StringRef File = SecPair.second; - if (Error E = isValidMachOCannonicalName(SecName)) - return E; - if (Error E = addSection(SecName, File, Obj)) - return E; - } - - for (const auto &Flag : Config.UpdateSection) { - StringRef SectionName; - StringRef FileName; - std::tie(SectionName, FileName) = Flag.split('='); - if (Error E = isValidMachOCannonicalName(SectionName)) - return E; - if (Error E = updateSection(SectionName, FileName, Obj)) - return E; - } - - if (Error E = processLoadCommands(MachOConfig, Obj)) - return E; - - return Error::success(); -} - -Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config, - const MachOConfig &MachOConfig, - object::MachOObjectFile &In, - raw_ostream &Out) { - MachOReader Reader(In); - Expected> O = Reader.create(); - if (!O) - return createFileError(Config.InputFilename, O.takeError()); - - if (O->get()->Header.FileType == MachO::HeaderFileType::MH_PRELOAD) - return createStringError(std::errc::not_supported, - "%s: MH_PRELOAD files are not supported", - Config.InputFilename.str().c_str()); - - if (Error E = handleArgs(Config, MachOConfig, **O)) - return createFileError(Config.InputFilename, std::move(E)); - - // Page size used for alignment of segment sizes in Mach-O executables and - // dynamic libraries. - uint64_t PageSize; - switch (In.getArch()) { - case Triple::ArchType::arm: - case Triple::ArchType::aarch64: - case Triple::ArchType::aarch64_32: - PageSize = 16384; - break; - default: - PageSize = 4096; - } - - MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(), - sys::path::filename(Config.OutputFilename), PageSize, Out); - if (auto E = Writer.finalize()) - return E; - return Writer.write(); -} - -Error objcopy::macho::executeObjcopyOnMachOUniversalBinary( - const MultiFormatConfig &Config, const MachOUniversalBinary &In, - raw_ostream &Out) { - SmallVector, 2> Binaries; - SmallVector Slices; - for (const auto &O : In.objects()) { - Expected> ArOrErr = O.getAsArchive(); - if (ArOrErr) { - Expected> NewArchiveMembersOrErr = - createNewArchiveMembers(Config, **ArOrErr); - if (!NewArchiveMembersOrErr) - return NewArchiveMembersOrErr.takeError(); - Expected> OutputBufferOrErr = - writeArchiveToBuffer(*NewArchiveMembersOrErr, - (*ArOrErr)->hasSymbolTable(), (*ArOrErr)->kind(), - Config.getCommonConfig().DeterministicArchives, - (*ArOrErr)->isThin()); - if (!OutputBufferOrErr) - return OutputBufferOrErr.takeError(); - Expected> BinaryOrErr = - object::createBinary(**OutputBufferOrErr); - if (!BinaryOrErr) - return BinaryOrErr.takeError(); - Binaries.emplace_back(std::move(*BinaryOrErr), - std::move(*OutputBufferOrErr)); - Slices.emplace_back(*cast(Binaries.back().getBinary()), - O.getCPUType(), O.getCPUSubType(), - O.getArchFlagName(), O.getAlign()); - continue; - } - // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class - // ObjectForArch return an Error in case of the type mismatch. We need to - // check each in turn to see what kind of slice this is, so ignore errors - // produced along the way. - consumeError(ArOrErr.takeError()); - - Expected> ObjOrErr = O.getAsObjectFile(); - if (!ObjOrErr) { - consumeError(ObjOrErr.takeError()); - return createStringError( - std::errc::invalid_argument, - "slice for '%s' of the universal Mach-O binary " - "'%s' is not a Mach-O object or an archive", - O.getArchFlagName().c_str(), - Config.getCommonConfig().InputFilename.str().c_str()); - } - std::string ArchFlagName = O.getArchFlagName(); - - SmallVector Buffer; - raw_svector_ostream MemStream(Buffer); - - Expected MachO = Config.getMachOConfig(); - if (!MachO) - return MachO.takeError(); - - if (Error E = executeObjcopyOnBinary(Config.getCommonConfig(), *MachO, - **ObjOrErr, MemStream)) - return E; - - auto MB = std::make_unique( - std::move(Buffer), ArchFlagName, /*RequiresNullTerminator=*/false); - Expected> BinaryOrErr = object::createBinary(*MB); - if (!BinaryOrErr) - return BinaryOrErr.takeError(); - Binaries.emplace_back(std::move(*BinaryOrErr), std::move(MB)); - Slices.emplace_back(*cast(Binaries.back().getBinary()), - O.getAlign()); - } - - if (Error Err = writeUniversalBinaryToStream(Slices, Out)) - return Err; - - return Error::success(); -} diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h deleted file mode 100644 index d03eee9d5fdb..000000000000 --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h +++ /dev/null @@ -1,39 +0,0 @@ -//===- MachOObjcopy.h -------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H -#define LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H - -namespace llvm { -class Error; -class raw_ostream; - -namespace object { -class MachOObjectFile; -class MachOUniversalBinary; -} // end namespace object - -namespace objcopy { -struct CommonConfig; -struct MachOConfig; -class MultiFormatConfig; - -namespace macho { -Error executeObjcopyOnBinary(const CommonConfig &Config, - const MachOConfig &MachOConfig, - object::MachOObjectFile &In, raw_ostream &Out); - -Error executeObjcopyOnMachOUniversalBinary( - const MultiFormatConfig &Config, const object::MachOUniversalBinary &In, - raw_ostream &Out); - -} // end namespace macho -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp deleted file mode 100644 index d68d1692997a..000000000000 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ /dev/null @@ -1,374 +0,0 @@ -//===- MachOReader.cpp ------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "MachOReader.h" -#include "Object.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Object/MachO.h" -#include "llvm/Support/Errc.h" -#include - -using namespace llvm; -using namespace llvm::objcopy; -using namespace llvm::objcopy::macho; - -void MachOReader::readHeader(Object &O) const { - O.Header.Magic = MachOObj.getHeader().magic; - O.Header.CPUType = MachOObj.getHeader().cputype; - O.Header.CPUSubType = MachOObj.getHeader().cpusubtype; - O.Header.FileType = MachOObj.getHeader().filetype; - O.Header.NCmds = MachOObj.getHeader().ncmds; - O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds; - O.Header.Flags = MachOObj.getHeader().flags; -} - -template -static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { - StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); - StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); - Section S(SegName, SectName); - S.Index = Index; - S.Addr = Sec.addr; - S.Size = Sec.size; - S.OriginalOffset = Sec.offset; - S.Align = Sec.align; - S.RelOff = Sec.reloff; - S.NReloc = Sec.nreloc; - S.Flags = Sec.flags; - S.Reserved1 = Sec.reserved1; - S.Reserved2 = Sec.reserved2; - S.Reserved3 = 0; - return S; -} - -Section constructSection(const MachO::section &Sec, uint32_t Index) { - return constructSectionCommon(Sec, Index); -} - -Section constructSection(const MachO::section_64 &Sec, uint32_t Index) { - Section S = constructSectionCommon(Sec, Index); - S.Reserved3 = Sec.reserved3; - return S; -} - -template -Expected>> static extractSections( - const object::MachOObjectFile::LoadCommandInfo &LoadCmd, - const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) { - std::vector> Sections; - for (auto Curr = reinterpret_cast(LoadCmd.Ptr + - sizeof(SegmentType)), - End = reinterpret_cast(LoadCmd.Ptr + - LoadCmd.C.cmdsize); - Curr < End; ++Curr) { - SectionType Sec; - memcpy((void *)&Sec, Curr, sizeof(SectionType)); - - if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) - MachO::swapStruct(Sec); - - Sections.push_back( - std::make_unique
(constructSection(Sec, NextSectionIndex))); - - Section &S = *Sections.back(); - - Expected SecRef = - MachOObj.getSection(NextSectionIndex++); - if (!SecRef) - return SecRef.takeError(); - - Expected> Data = - MachOObj.getSectionContents(SecRef->getRawDataRefImpl()); - if (!Data) - return Data.takeError(); - - S.Content = - StringRef(reinterpret_cast(Data->data()), Data->size()); - - const uint32_t CPUType = MachOObj.getHeader().cputype; - S.Relocations.reserve(S.NReloc); - for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()), - RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl()); - RI != RE; ++RI) { - RelocationInfo R; - R.Symbol = nullptr; // We'll fill this field later. - R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); - R.Scattered = MachOObj.isRelocationScattered(R.Info); - unsigned Type = MachOObj.getAnyRelocationType(R.Info); - // TODO Support CPU_TYPE_ARM. - R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 && - Type == MachO::ARM64_RELOC_ADDEND); - R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info); - S.Relocations.push_back(R); - } - - assert(S.NReloc == S.Relocations.size() && - "Incorrect number of relocations"); - } - return std::move(Sections); -} - -Error MachOReader::readLoadCommands(Object &O) const { - // For MachO sections indices start from 1. - uint32_t NextSectionIndex = 1; - static constexpr char TextSegmentName[] = "__TEXT"; - for (auto LoadCmd : MachOObj.load_commands()) { - LoadCommand LC; - switch (LoadCmd.C.cmd) { - case MachO::LC_CODE_SIGNATURE: - O.CodeSignatureCommandIndex = O.LoadCommands.size(); - break; - case MachO::LC_SEGMENT: - // LoadCmd.Ptr might not be aligned temporarily as - // MachO::segment_command requires, but the segname char pointer do not - // have alignment restrictions. - if (StringRef(reinterpret_cast( - LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) == - TextSegmentName) - O.TextSegmentCommandIndex = O.LoadCommands.size(); - - if (Expected>> Sections = - extractSections( - LoadCmd, MachOObj, NextSectionIndex)) - LC.Sections = std::move(*Sections); - else - return Sections.takeError(); - break; - case MachO::LC_SEGMENT_64: - // LoadCmd.Ptr might not be aligned temporarily as - // MachO::segment_command_64 requires, but the segname char pointer do - // not have alignment restrictions. - if (StringRef(reinterpret_cast( - LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) == - TextSegmentName) - O.TextSegmentCommandIndex = O.LoadCommands.size(); - - if (Expected>> Sections = - extractSections( - LoadCmd, MachOObj, NextSectionIndex)) - LC.Sections = std::move(*Sections); - else - return Sections.takeError(); - break; - case MachO::LC_SYMTAB: - O.SymTabCommandIndex = O.LoadCommands.size(); - break; - case MachO::LC_DYSYMTAB: - O.DySymTabCommandIndex = O.LoadCommands.size(); - break; - case MachO::LC_DYLD_INFO: - case MachO::LC_DYLD_INFO_ONLY: - O.DyLdInfoCommandIndex = O.LoadCommands.size(); - break; - case MachO::LC_DATA_IN_CODE: - O.DataInCodeCommandIndex = O.LoadCommands.size(); - break; - case MachO::LC_LINKER_OPTIMIZATION_HINT: - O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size(); - break; - case MachO::LC_FUNCTION_STARTS: - O.FunctionStartsCommandIndex = O.LoadCommands.size(); - break; - case MachO::LC_DYLD_EXPORTS_TRIE: - O.ExportsTrieCommandIndex = O.LoadCommands.size(); - break; - case MachO::LC_DYLD_CHAINED_FIXUPS: - O.ChainedFixupsCommandIndex = O.LoadCommands.size(); - break; - } -#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ - case MachO::LCName: \ - memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \ - sizeof(MachO::LCStruct)); \ - if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \ - MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \ - if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \ - LC.Payload = ArrayRef( \ - reinterpret_cast(const_cast(LoadCmd.Ptr)) + \ - sizeof(MachO::LCStruct), \ - LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \ - break; - - switch (LoadCmd.C.cmd) { - default: - memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr, - sizeof(MachO::load_command)); - if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) - MachO::swapStruct(LC.MachOLoadCommand.load_command_data); - if (LoadCmd.C.cmdsize > sizeof(MachO::load_command)) - LC.Payload = ArrayRef( - reinterpret_cast(const_cast(LoadCmd.Ptr)) + - sizeof(MachO::load_command), - LoadCmd.C.cmdsize - sizeof(MachO::load_command)); - break; -#include "llvm/BinaryFormat/MachO.def" - } - O.LoadCommands.push_back(std::move(LC)); - } - return Error::success(); -} - -template -SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { - assert(nlist.n_strx < StrTable.size() && - "n_strx exceeds the size of the string table"); - SymbolEntry SE; - SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); - SE.n_type = nlist.n_type; - SE.n_sect = nlist.n_sect; - SE.n_desc = nlist.n_desc; - SE.n_value = nlist.n_value; - return SE; -} - -void MachOReader::readSymbolTable(Object &O) const { - StringRef StrTable = MachOObj.getStringTableData(); - for (auto Symbol : MachOObj.symbols()) { - SymbolEntry SE = - (MachOObj.is64Bit() - ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry( - Symbol.getRawDataRefImpl())) - : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry( - Symbol.getRawDataRefImpl()))); - - O.SymTable.Symbols.push_back(std::make_unique(SE)); - } -} - -void MachOReader::setSymbolInRelocationInfo(Object &O) const { - std::vector Sections; - for (auto &LC : O.LoadCommands) - for (std::unique_ptr
&Sec : LC.Sections) - Sections.push_back(Sec.get()); - - for (LoadCommand &LC : O.LoadCommands) - for (std::unique_ptr
&Sec : LC.Sections) - for (auto &Reloc : Sec->Relocations) - if (!Reloc.Scattered && !Reloc.IsAddend) { - const uint32_t SymbolNum = - Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian()); - if (Reloc.Extern) { - Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum); - } else { - // FIXME: Refactor error handling in MachOReader and report an error - // if we encounter an invalid relocation. - assert(SymbolNum >= 1 && SymbolNum <= Sections.size() && - "Invalid section index."); - Reloc.Sec = Sections[SymbolNum - 1]; - } - } -} - -void MachOReader::readRebaseInfo(Object &O) const { - O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes(); -} - -void MachOReader::readBindInfo(Object &O) const { - O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes(); -} - -void MachOReader::readWeakBindInfo(Object &O) const { - O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes(); -} - -void MachOReader::readLazyBindInfo(Object &O) const { - O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes(); -} - -void MachOReader::readExportInfo(Object &O) const { - O.Exports.Trie = MachOObj.getDyldInfoExportsTrie(); -} - -void MachOReader::readLinkData(Object &O, Optional LCIndex, - LinkData &LD) const { - if (!LCIndex) - return; - const MachO::linkedit_data_command &LC = - O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; - LD.Data = - arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize)); -} - -void MachOReader::readDataInCodeData(Object &O) const { - return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode); -} - -void MachOReader::readLinkerOptimizationHint(Object &O) const { - return readLinkData(O, O.LinkerOptimizationHintCommandIndex, - O.LinkerOptimizationHint); -} - -void MachOReader::readFunctionStartsData(Object &O) const { - return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts); -} - -void MachOReader::readExportsTrie(Object &O) const { - return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie); -} - -void MachOReader::readChainedFixups(Object &O) const { - return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups); -} - -void MachOReader::readIndirectSymbolTable(Object &O) const { - MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); - constexpr uint32_t AbsOrLocalMask = - MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS; - for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) { - uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i); - if ((Index & AbsOrLocalMask) != 0) - O.IndirectSymTable.Symbols.emplace_back(Index, None); - else - O.IndirectSymTable.Symbols.emplace_back( - Index, O.SymTable.getSymbolByIndex(Index)); - } -} - -void MachOReader::readSwiftVersion(Object &O) const { - struct ObjCImageInfo { - uint32_t Version; - uint32_t Flags; - } ImageInfo; - - for (const LoadCommand &LC : O.LoadCommands) - for (const std::unique_ptr
&Sec : LC.Sections) - if (Sec->Sectname == "__objc_imageinfo" && - (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" || - Sec->Segname == "__DATA_DIRTY") && - Sec->Content.size() >= sizeof(ObjCImageInfo)) { - memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo)); - if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { - sys::swapByteOrder(ImageInfo.Version); - sys::swapByteOrder(ImageInfo.Flags); - } - O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff; - return; - } -} - -Expected> MachOReader::create() const { - auto Obj = std::make_unique(); - readHeader(*Obj); - if (Error E = readLoadCommands(*Obj)) - return std::move(E); - readSymbolTable(*Obj); - setSymbolInRelocationInfo(*Obj); - readRebaseInfo(*Obj); - readBindInfo(*Obj); - readWeakBindInfo(*Obj); - readLazyBindInfo(*Obj); - readExportInfo(*Obj); - readDataInCodeData(*Obj); - readLinkerOptimizationHint(*Obj); - readFunctionStartsData(*Obj); - readExportsTrie(*Obj); - readChainedFixups(*Obj); - readIndirectSymbolTable(*Obj); - readSwiftVersion(*Obj); - return std::move(Obj); -} diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.h b/llvm/tools/llvm-objcopy/MachO/MachOReader.h deleted file mode 100644 index b29e86ca642e..000000000000 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.h +++ /dev/null @@ -1,57 +0,0 @@ -//===- MachOReader.h --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "MachOObjcopy.h" -#include "Object.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Object/MachO.h" -#include - -namespace llvm { -namespace objcopy { -namespace macho { - -// The hierarchy of readers is responsible for parsing different inputs: -// raw binaries and regular MachO object files. -class Reader { -public: - virtual ~Reader(){}; - virtual Expected> create() const = 0; -}; - -class MachOReader : public Reader { - const object::MachOObjectFile &MachOObj; - - void readHeader(Object &O) const; - Error readLoadCommands(Object &O) const; - void readSymbolTable(Object &O) const; - void setSymbolInRelocationInfo(Object &O) const; - void readRebaseInfo(Object &O) const; - void readBindInfo(Object &O) const; - void readWeakBindInfo(Object &O) const; - void readLazyBindInfo(Object &O) const; - void readExportInfo(Object &O) const; - void readLinkData(Object &O, Optional LCIndex, LinkData &LD) const; - void readCodeSignature(Object &O) const; - void readDataInCodeData(Object &O) const; - void readLinkerOptimizationHint(Object &O) const; - void readFunctionStartsData(Object &O) const; - void readExportsTrie(Object &O) const; - void readChainedFixups(Object &O) const; - void readIndirectSymbolTable(Object &O) const; - void readSwiftVersion(Object &O) const; - -public: - explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {} - - Expected> create() const override; -}; - -} // end namespace macho -} // end namespace objcopy -} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp deleted file mode 100644 index 52f20794cc57..000000000000 --- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp +++ /dev/null @@ -1,748 +0,0 @@ -//===- MachOWriter.cpp ------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "MachOWriter.h" -#include "MachOLayoutBuilder.h" -#include "Object.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Object/MachO.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/SHA256.h" -#include - -#if defined(__APPLE__) -#include -#endif - -using namespace llvm; -using namespace llvm::objcopy::macho; -using namespace llvm::support::endian; - -size_t MachOWriter::headerSize() const { - return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); -} - -size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; } - -size_t MachOWriter::symTableSize() const { - return O.SymTable.Symbols.size() * - (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist)); -} - -size_t MachOWriter::totalSize() const { - // Going from tail to head and looking for an appropriate "anchor" to - // calculate the total size assuming that all the offsets are either valid - // ("true") or 0 (0 indicates that the corresponding part is missing). - - SmallVector Ends; - if (O.SymTabCommandIndex) { - const MachO::symtab_command &SymTabCommand = - O.LoadCommands[*O.SymTabCommandIndex] - .MachOLoadCommand.symtab_command_data; - if (SymTabCommand.symoff) - Ends.push_back(SymTabCommand.symoff + symTableSize()); - if (SymTabCommand.stroff) - Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize); - } - if (O.DyLdInfoCommandIndex) { - const MachO::dyld_info_command &DyLdInfoCommand = - O.LoadCommands[*O.DyLdInfoCommandIndex] - .MachOLoadCommand.dyld_info_command_data; - if (DyLdInfoCommand.rebase_off) { - assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && - "Incorrect rebase opcodes size"); - Ends.push_back(DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size); - } - if (DyLdInfoCommand.bind_off) { - assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && - "Incorrect bind opcodes size"); - Ends.push_back(DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size); - } - if (DyLdInfoCommand.weak_bind_off) { - assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && - "Incorrect weak bind opcodes size"); - Ends.push_back(DyLdInfoCommand.weak_bind_off + - DyLdInfoCommand.weak_bind_size); - } - if (DyLdInfoCommand.lazy_bind_off) { - assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && - "Incorrect lazy bind opcodes size"); - Ends.push_back(DyLdInfoCommand.lazy_bind_off + - DyLdInfoCommand.lazy_bind_size); - } - if (DyLdInfoCommand.export_off) { - assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && - "Incorrect trie size"); - Ends.push_back(DyLdInfoCommand.export_off + DyLdInfoCommand.export_size); - } - } - - if (O.DySymTabCommandIndex) { - const MachO::dysymtab_command &DySymTabCommand = - O.LoadCommands[*O.DySymTabCommandIndex] - .MachOLoadCommand.dysymtab_command_data; - - if (DySymTabCommand.indirectsymoff) - Ends.push_back(DySymTabCommand.indirectsymoff + - sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); - } - - if (O.CodeSignatureCommandIndex) { - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.CodeSignatureCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; - if (LinkEditDataCommand.dataoff) - Ends.push_back(LinkEditDataCommand.dataoff + - LinkEditDataCommand.datasize); - } - - if (O.DataInCodeCommandIndex) { - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.DataInCodeCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; - - if (LinkEditDataCommand.dataoff) - Ends.push_back(LinkEditDataCommand.dataoff + - LinkEditDataCommand.datasize); - } - - if (O.LinkerOptimizationHintCommandIndex) { - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.LinkerOptimizationHintCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; - - if (LinkEditDataCommand.dataoff) - Ends.push_back(LinkEditDataCommand.dataoff + - LinkEditDataCommand.datasize); - } - - if (O.FunctionStartsCommandIndex) { - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.FunctionStartsCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; - - if (LinkEditDataCommand.dataoff) - Ends.push_back(LinkEditDataCommand.dataoff + - LinkEditDataCommand.datasize); - } - - if (O.ChainedFixupsCommandIndex) { - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.ChainedFixupsCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; - - if (LinkEditDataCommand.dataoff) - Ends.push_back(LinkEditDataCommand.dataoff + - LinkEditDataCommand.datasize); - } - - if (O.ExportsTrieCommandIndex) { - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.ExportsTrieCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; - - if (LinkEditDataCommand.dataoff) - Ends.push_back(LinkEditDataCommand.dataoff + - LinkEditDataCommand.datasize); - } - - // Otherwise, use the last section / reloction. - for (const LoadCommand &LC : O.LoadCommands) - for (const std::unique_ptr
&S : LC.Sections) { - if (!S->hasValidOffset()) { - assert((S->Offset == 0) && "Skipped section's offset must be zero"); - assert((S->isVirtualSection() || S->Size == 0) && - "Non-zero-fill sections with zero offset must have zero size"); - continue; - } - assert((S->Offset != 0) && - "Non-zero-fill section's offset cannot be zero"); - Ends.push_back(S->Offset + S->Size); - if (S->RelOff) - Ends.push_back(S->RelOff + - S->NReloc * sizeof(MachO::any_relocation_info)); - } - - if (!Ends.empty()) - return *std::max_element(Ends.begin(), Ends.end()); - - // Otherwise, we have only Mach header and load commands. - return headerSize() + loadCommandsSize(); -} - -void MachOWriter::writeHeader() { - MachO::mach_header_64 Header; - - Header.magic = O.Header.Magic; - Header.cputype = O.Header.CPUType; - Header.cpusubtype = O.Header.CPUSubType; - Header.filetype = O.Header.FileType; - Header.ncmds = O.Header.NCmds; - Header.sizeofcmds = O.Header.SizeOfCmds; - Header.flags = O.Header.Flags; - Header.reserved = O.Header.Reserved; - - if (IsLittleEndian != sys::IsLittleEndianHost) - MachO::swapStruct(Header); - - auto HeaderSize = - Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); - memcpy(Buf->getBufferStart(), &Header, HeaderSize); -} - -void MachOWriter::writeLoadCommands() { - uint8_t *Begin = - reinterpret_cast(Buf->getBufferStart()) + headerSize(); - for (const LoadCommand &LC : O.LoadCommands) { - // Construct a load command. - MachO::macho_load_command MLC = LC.MachOLoadCommand; - switch (MLC.load_command_data.cmd) { - case MachO::LC_SEGMENT: - if (IsLittleEndian != sys::IsLittleEndianHost) - MachO::swapStruct(MLC.segment_command_data); - memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command)); - Begin += sizeof(MachO::segment_command); - - for (const std::unique_ptr
&Sec : LC.Sections) - writeSectionInLoadCommand(*Sec, Begin); - continue; - case MachO::LC_SEGMENT_64: - if (IsLittleEndian != sys::IsLittleEndianHost) - MachO::swapStruct(MLC.segment_command_64_data); - memcpy(Begin, &MLC.segment_command_64_data, - sizeof(MachO::segment_command_64)); - Begin += sizeof(MachO::segment_command_64); - - for (const std::unique_ptr
&Sec : LC.Sections) - writeSectionInLoadCommand(*Sec, Begin); - continue; - } - -#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ - case MachO::LCName: \ - assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \ - MLC.load_command_data.cmdsize); \ - if (IsLittleEndian != sys::IsLittleEndianHost) \ - MachO::swapStruct(MLC.LCStruct##_data); \ - memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \ - Begin += sizeof(MachO::LCStruct); \ - if (!LC.Payload.empty()) \ - memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \ - Begin += LC.Payload.size(); \ - break; - - // Copy the load command as it is. - switch (MLC.load_command_data.cmd) { - default: - assert(sizeof(MachO::load_command) + LC.Payload.size() == - MLC.load_command_data.cmdsize); - if (IsLittleEndian != sys::IsLittleEndianHost) - MachO::swapStruct(MLC.load_command_data); - memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command)); - Begin += sizeof(MachO::load_command); - if (!LC.Payload.empty()) - memcpy(Begin, LC.Payload.data(), LC.Payload.size()); - Begin += LC.Payload.size(); - break; -#include "llvm/BinaryFormat/MachO.def" - } - } -} - -template -void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) { - StructType Temp; - assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name"); - assert(Sec.Sectname.size() <= sizeof(Temp.sectname) && - "too long section name"); - memset(&Temp, 0, sizeof(StructType)); - memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size()); - memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size()); - Temp.addr = Sec.Addr; - Temp.size = Sec.Size; - Temp.offset = Sec.Offset; - Temp.align = Sec.Align; - Temp.reloff = Sec.RelOff; - Temp.nreloc = Sec.NReloc; - Temp.flags = Sec.Flags; - Temp.reserved1 = Sec.Reserved1; - Temp.reserved2 = Sec.Reserved2; - - if (IsLittleEndian != sys::IsLittleEndianHost) - MachO::swapStruct(Temp); - memcpy(Out, &Temp, sizeof(StructType)); - Out += sizeof(StructType); -} - -void MachOWriter::writeSections() { - for (const LoadCommand &LC : O.LoadCommands) - for (const std::unique_ptr
&Sec : LC.Sections) { - if (!Sec->hasValidOffset()) { - assert((Sec->Offset == 0) && "Skipped section's offset must be zero"); - assert((Sec->isVirtualSection() || Sec->Size == 0) && - "Non-zero-fill sections with zero offset must have zero size"); - continue; - } - - assert(Sec->Offset && "Section offset can not be zero"); - assert((Sec->Size == Sec->Content.size()) && "Incorrect section size"); - memcpy(Buf->getBufferStart() + Sec->Offset, Sec->Content.data(), - Sec->Content.size()); - for (size_t Index = 0; Index < Sec->Relocations.size(); ++Index) { - RelocationInfo RelocInfo = Sec->Relocations[Index]; - if (!RelocInfo.Scattered && !RelocInfo.IsAddend) { - const uint32_t SymbolNum = RelocInfo.Extern - ? (*RelocInfo.Symbol)->Index - : (*RelocInfo.Sec)->Index; - RelocInfo.setPlainRelocationSymbolNum(SymbolNum, IsLittleEndian); - } - if (IsLittleEndian != sys::IsLittleEndianHost) - MachO::swapStruct( - reinterpret_cast(RelocInfo.Info)); - memcpy(Buf->getBufferStart() + Sec->RelOff + - Index * sizeof(MachO::any_relocation_info), - &RelocInfo.Info, sizeof(RelocInfo.Info)); - } - } -} - -template -void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out, - uint32_t Nstrx) { - NListType ListEntry; - ListEntry.n_strx = Nstrx; - ListEntry.n_type = SE.n_type; - ListEntry.n_sect = SE.n_sect; - ListEntry.n_desc = SE.n_desc; - ListEntry.n_value = SE.n_value; - - if (IsLittleEndian != sys::IsLittleEndianHost) - MachO::swapStruct(ListEntry); - memcpy(Out, reinterpret_cast(&ListEntry), sizeof(NListType)); - Out += sizeof(NListType); -} - -void MachOWriter::writeStringTable() { - if (!O.SymTabCommandIndex) - return; - const MachO::symtab_command &SymTabCommand = - O.LoadCommands[*O.SymTabCommandIndex] - .MachOLoadCommand.symtab_command_data; - - uint8_t *StrTable = (uint8_t *)Buf->getBufferStart() + SymTabCommand.stroff; - LayoutBuilder.getStringTableBuilder().write(StrTable); -} - -void MachOWriter::writeSymbolTable() { - if (!O.SymTabCommandIndex) - return; - const MachO::symtab_command &SymTabCommand = - O.LoadCommands[*O.SymTabCommandIndex] - .MachOLoadCommand.symtab_command_data; - - char *SymTable = (char *)Buf->getBufferStart() + SymTabCommand.symoff; - for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end(); - Iter != End; Iter++) { - SymbolEntry *Sym = Iter->get(); - uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name); - - if (Is64Bit) - writeNListEntry(*Sym, IsLittleEndian, SymTable, Nstrx); - else - writeNListEntry(*Sym, IsLittleEndian, SymTable, Nstrx); - } -} - -void MachOWriter::writeRebaseInfo() { - if (!O.DyLdInfoCommandIndex) - return; - const MachO::dyld_info_command &DyLdInfoCommand = - O.LoadCommands[*O.DyLdInfoCommandIndex] - .MachOLoadCommand.dyld_info_command_data; - char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.rebase_off; - assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && - "Incorrect rebase opcodes size"); - memcpy(Out, O.Rebases.Opcodes.data(), O.Rebases.Opcodes.size()); -} - -void MachOWriter::writeBindInfo() { - if (!O.DyLdInfoCommandIndex) - return; - const MachO::dyld_info_command &DyLdInfoCommand = - O.LoadCommands[*O.DyLdInfoCommandIndex] - .MachOLoadCommand.dyld_info_command_data; - char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.bind_off; - assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && - "Incorrect bind opcodes size"); - memcpy(Out, O.Binds.Opcodes.data(), O.Binds.Opcodes.size()); -} - -void MachOWriter::writeWeakBindInfo() { - if (!O.DyLdInfoCommandIndex) - return; - const MachO::dyld_info_command &DyLdInfoCommand = - O.LoadCommands[*O.DyLdInfoCommandIndex] - .MachOLoadCommand.dyld_info_command_data; - char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.weak_bind_off; - assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && - "Incorrect weak bind opcodes size"); - memcpy(Out, O.WeakBinds.Opcodes.data(), O.WeakBinds.Opcodes.size()); -} - -void MachOWriter::writeLazyBindInfo() { - if (!O.DyLdInfoCommandIndex) - return; - const MachO::dyld_info_command &DyLdInfoCommand = - O.LoadCommands[*O.DyLdInfoCommandIndex] - .MachOLoadCommand.dyld_info_command_data; - char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.lazy_bind_off; - assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && - "Incorrect lazy bind opcodes size"); - memcpy(Out, O.LazyBinds.Opcodes.data(), O.LazyBinds.Opcodes.size()); -} - -void MachOWriter::writeExportInfo() { - if (!O.DyLdInfoCommandIndex) - return; - const MachO::dyld_info_command &DyLdInfoCommand = - O.LoadCommands[*O.DyLdInfoCommandIndex] - .MachOLoadCommand.dyld_info_command_data; - char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.export_off; - assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && - "Incorrect export trie size"); - memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size()); -} - -void MachOWriter::writeIndirectSymbolTable() { - if (!O.DySymTabCommandIndex) - return; - - const MachO::dysymtab_command &DySymTabCommand = - O.LoadCommands[*O.DySymTabCommandIndex] - .MachOLoadCommand.dysymtab_command_data; - - uint32_t *Out = - (uint32_t *)(Buf->getBufferStart() + DySymTabCommand.indirectsymoff); - for (const IndirectSymbolEntry &Sym : O.IndirectSymTable.Symbols) { - uint32_t Entry = (Sym.Symbol) ? (*Sym.Symbol)->Index : Sym.OriginalIndex; - if (IsLittleEndian != sys::IsLittleEndianHost) - sys::swapByteOrder(Entry); - *Out++ = Entry; - } -} - -void MachOWriter::writeLinkData(Optional LCIndex, const LinkData &LD) { - if (!LCIndex) - return; - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; - char *Out = (char *)Buf->getBufferStart() + LinkEditDataCommand.dataoff; - assert((LinkEditDataCommand.datasize == LD.Data.size()) && - "Incorrect data size"); - memcpy(Out, LD.Data.data(), LD.Data.size()); -} - -static uint64_t -getSegmentFileOffset(const LoadCommand &TextSegmentLoadCommand) { - const MachO::macho_load_command &MLC = - TextSegmentLoadCommand.MachOLoadCommand; - switch (MLC.load_command_data.cmd) { - case MachO::LC_SEGMENT: - return MLC.segment_command_data.fileoff; - case MachO::LC_SEGMENT_64: - return MLC.segment_command_64_data.fileoff; - default: - return 0; - } -} - -static uint64_t getSegmentFileSize(const LoadCommand &TextSegmentLoadCommand) { - const MachO::macho_load_command &MLC = - TextSegmentLoadCommand.MachOLoadCommand; - switch (MLC.load_command_data.cmd) { - case MachO::LC_SEGMENT: - return MLC.segment_command_data.filesize; - case MachO::LC_SEGMENT_64: - return MLC.segment_command_64_data.filesize; - default: - return 0; - } -} - -void MachOWriter::writeCodeSignatureData() { - // NOTE: This CodeSignature section behaviour must be kept in sync with that - // performed in LLD's CodeSignatureSection::write / - // CodeSignatureSection::writeHashes. Furthermore, this call must occur only - // after the rest of the binary has already been written to the buffer. This - // is because the buffer is read from to perform the necessary hashing. - - // The CodeSignature section is the last section in the MachO binary and - // contains a hash of all content in the binary before it. Since llvm-objcopy - // has likely modified the target binary, the hash must be regenerated - // entirely. To generate this hash, we must read from the start of the binary - // (HashReadStart) to just before the start of the CodeSignature section - // (HashReadEnd). - - const CodeSignatureInfo &CodeSignature = LayoutBuilder.getCodeSignature(); - - uint8_t *BufferStart = reinterpret_cast(Buf->getBufferStart()); - uint8_t *HashReadStart = BufferStart; - uint8_t *HashReadEnd = BufferStart + CodeSignature.StartOffset; - - // The CodeSignature section begins with a header, after which the hashes - // of each page of the binary are written. - uint8_t *HashWriteStart = HashReadEnd + CodeSignature.AllHeadersSize; - - uint32_t TextSegmentFileOff = 0; - uint32_t TextSegmentFileSize = 0; - if (O.TextSegmentCommandIndex) { - const LoadCommand &TextSegmentLoadCommand = - O.LoadCommands[*O.TextSegmentCommandIndex]; - assert(TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd == - MachO::LC_SEGMENT || - TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd == - MachO::LC_SEGMENT_64); - assert(StringRef(TextSegmentLoadCommand.MachOLoadCommand - .segment_command_data.segname) == "__TEXT"); - TextSegmentFileOff = getSegmentFileOffset(TextSegmentLoadCommand); - TextSegmentFileSize = getSegmentFileSize(TextSegmentLoadCommand); - } - - const uint32_t FileNamePad = CodeSignature.AllHeadersSize - - CodeSignature.FixedHeadersSize - - CodeSignature.OutputFileName.size(); - - // Write code section header. - auto *SuperBlob = reinterpret_cast(HashReadEnd); - write32be(&SuperBlob->magic, MachO::CSMAGIC_EMBEDDED_SIGNATURE); - write32be(&SuperBlob->length, CodeSignature.Size); - write32be(&SuperBlob->count, 1); - auto *BlobIndex = reinterpret_cast(&SuperBlob[1]); - write32be(&BlobIndex->type, MachO::CSSLOT_CODEDIRECTORY); - write32be(&BlobIndex->offset, CodeSignature.BlobHeadersSize); - auto *CodeDirectory = reinterpret_cast( - HashReadEnd + CodeSignature.BlobHeadersSize); - write32be(&CodeDirectory->magic, MachO::CSMAGIC_CODEDIRECTORY); - write32be(&CodeDirectory->length, - CodeSignature.Size - CodeSignature.BlobHeadersSize); - write32be(&CodeDirectory->version, MachO::CS_SUPPORTSEXECSEG); - write32be(&CodeDirectory->flags, MachO::CS_ADHOC | MachO::CS_LINKER_SIGNED); - write32be(&CodeDirectory->hashOffset, - sizeof(MachO::CS_CodeDirectory) + - CodeSignature.OutputFileName.size() + FileNamePad); - write32be(&CodeDirectory->identOffset, sizeof(MachO::CS_CodeDirectory)); - CodeDirectory->nSpecialSlots = 0; - write32be(&CodeDirectory->nCodeSlots, CodeSignature.BlockCount); - write32be(&CodeDirectory->codeLimit, CodeSignature.StartOffset); - CodeDirectory->hashSize = static_cast(CodeSignature.HashSize); - CodeDirectory->hashType = MachO::kSecCodeSignatureHashSHA256; - CodeDirectory->platform = 0; - CodeDirectory->pageSize = CodeSignature.BlockSizeShift; - CodeDirectory->spare2 = 0; - CodeDirectory->scatterOffset = 0; - CodeDirectory->teamOffset = 0; - CodeDirectory->spare3 = 0; - CodeDirectory->codeLimit64 = 0; - write64be(&CodeDirectory->execSegBase, TextSegmentFileOff); - write64be(&CodeDirectory->execSegLimit, TextSegmentFileSize); - write64be(&CodeDirectory->execSegFlags, O.Header.FileType == MachO::MH_EXECUTE - ? MachO::CS_EXECSEG_MAIN_BINARY - : 0); - - auto *Id = reinterpret_cast(&CodeDirectory[1]); - memcpy(Id, CodeSignature.OutputFileName.begin(), - CodeSignature.OutputFileName.size()); - memset(Id + CodeSignature.OutputFileName.size(), 0, FileNamePad); - - // Write the hashes. - uint8_t *CurrHashReadPosition = HashReadStart; - uint8_t *CurrHashWritePosition = HashWriteStart; - while (CurrHashReadPosition < HashReadEnd) { - StringRef Block(reinterpret_cast(CurrHashReadPosition), - std::min(HashReadEnd - CurrHashReadPosition, - static_cast(CodeSignature.BlockSize))); - SHA256 Hasher; - Hasher.update(Block); - StringRef Hash = Hasher.final(); - assert(Hash.size() == CodeSignature.HashSize); - memcpy(CurrHashWritePosition, Hash.data(), CodeSignature.HashSize); - CurrHashReadPosition += CodeSignature.BlockSize; - CurrHashWritePosition += CodeSignature.HashSize; - } -#if defined(__APPLE__) - // This is macOS-specific work-around and makes no sense for any - // other host OS. See https://openradar.appspot.com/FB8914231 - // - // The macOS kernel maintains a signature-verification cache to - // quickly validate applications at time of execve(2). The trouble - // is that for the kernel creates the cache entry at the time of the - // mmap(2) call, before we have a chance to write either the code to - // sign or the signature header+hashes. The fix is to invalidate - // all cached data associated with the output file, thus discarding - // the bogus prematurely-cached signature. - msync(BufferStart, CodeSignature.StartOffset + CodeSignature.Size, - MS_INVALIDATE); -#endif -} - -void MachOWriter::writeDataInCodeData() { - return writeLinkData(O.DataInCodeCommandIndex, O.DataInCode); -} - -void MachOWriter::writeLinkerOptimizationHint() { - return writeLinkData(O.LinkerOptimizationHintCommandIndex, - O.LinkerOptimizationHint); -} - -void MachOWriter::writeFunctionStartsData() { - return writeLinkData(O.FunctionStartsCommandIndex, O.FunctionStarts); -} - -void MachOWriter::writeChainedFixupsData() { - return writeLinkData(O.ChainedFixupsCommandIndex, O.ChainedFixups); -} - -void MachOWriter::writeExportsTrieData() { - return writeLinkData(O.ExportsTrieCommandIndex, O.ExportsTrie); -} - -void MachOWriter::writeTail() { - typedef void (MachOWriter::*WriteHandlerType)(); - typedef std::pair WriteOperation; - SmallVector Queue; - - if (O.SymTabCommandIndex) { - const MachO::symtab_command &SymTabCommand = - O.LoadCommands[*O.SymTabCommandIndex] - .MachOLoadCommand.symtab_command_data; - if (SymTabCommand.symoff) - Queue.push_back({SymTabCommand.symoff, &MachOWriter::writeSymbolTable}); - if (SymTabCommand.stroff) - Queue.push_back({SymTabCommand.stroff, &MachOWriter::writeStringTable}); - } - - if (O.DyLdInfoCommandIndex) { - const MachO::dyld_info_command &DyLdInfoCommand = - O.LoadCommands[*O.DyLdInfoCommandIndex] - .MachOLoadCommand.dyld_info_command_data; - if (DyLdInfoCommand.rebase_off) - Queue.push_back( - {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo}); - if (DyLdInfoCommand.bind_off) - Queue.push_back({DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo}); - if (DyLdInfoCommand.weak_bind_off) - Queue.push_back( - {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo}); - if (DyLdInfoCommand.lazy_bind_off) - Queue.push_back( - {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo}); - if (DyLdInfoCommand.export_off) - Queue.push_back( - {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo}); - } - - if (O.DySymTabCommandIndex) { - const MachO::dysymtab_command &DySymTabCommand = - O.LoadCommands[*O.DySymTabCommandIndex] - .MachOLoadCommand.dysymtab_command_data; - - if (DySymTabCommand.indirectsymoff) - Queue.emplace_back(DySymTabCommand.indirectsymoff, - &MachOWriter::writeIndirectSymbolTable); - } - - if (O.CodeSignatureCommandIndex) { - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.CodeSignatureCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; - - if (LinkEditDataCommand.dataoff) - Queue.emplace_back(LinkEditDataCommand.dataoff, - &MachOWriter::writeCodeSignatureData); - } - - if (O.DataInCodeCommandIndex) { - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.DataInCodeCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; - - if (LinkEditDataCommand.dataoff) - Queue.emplace_back(LinkEditDataCommand.dataoff, - &MachOWriter::writeDataInCodeData); - } - - if (O.LinkerOptimizationHintCommandIndex) { - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.LinkerOptimizationHintCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; - - if (LinkEditDataCommand.dataoff) - Queue.emplace_back(LinkEditDataCommand.dataoff, - &MachOWriter::writeLinkerOptimizationHint); - } - - if (O.FunctionStartsCommandIndex) { - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.FunctionStartsCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; - - if (LinkEditDataCommand.dataoff) - Queue.emplace_back(LinkEditDataCommand.dataoff, - &MachOWriter::writeFunctionStartsData); - } - - if (O.ChainedFixupsCommandIndex) { - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.ChainedFixupsCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; - - if (LinkEditDataCommand.dataoff) - Queue.emplace_back(LinkEditDataCommand.dataoff, - &MachOWriter::writeChainedFixupsData); - } - - if (O.ExportsTrieCommandIndex) { - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.ExportsTrieCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; - - if (LinkEditDataCommand.dataoff) - Queue.emplace_back(LinkEditDataCommand.dataoff, - &MachOWriter::writeExportsTrieData); - } - - llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) { - return LHS.first < RHS.first; - }); - - for (auto WriteOp : Queue) - (this->*WriteOp.second)(); -} - -Error MachOWriter::finalize() { return LayoutBuilder.layout(); } - -Error MachOWriter::write() { - size_t TotalSize = totalSize(); - Buf = WritableMemoryBuffer::getNewMemBuffer(TotalSize); - if (!Buf) - return createStringError(errc::not_enough_memory, - "failed to allocate memory buffer of " + - Twine::utohexstr(TotalSize) + " bytes"); - memset(Buf->getBufferStart(), 0, totalSize()); - writeHeader(); - writeLoadCommands(); - writeSections(); - writeTail(); - - // TODO: Implement direct writing to the output stream (without intermediate - // memory buffer Buf). - Out.write(Buf->getBufferStart(), Buf->getBufferSize()); - return Error::success(); -} diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h deleted file mode 100644 index a172534dac8a..000000000000 --- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h +++ /dev/null @@ -1,71 +0,0 @@ -//===- MachOWriter.h --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "MachOLayoutBuilder.h" -#include "MachOObjcopy.h" -#include "Object.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Object/MachO.h" - -namespace llvm { -class Error; - -namespace objcopy { -namespace macho { - -class MachOWriter { - Object &O; - bool Is64Bit; - bool IsLittleEndian; - uint64_t PageSize; - std::unique_ptr Buf; - raw_ostream &Out; - MachOLayoutBuilder LayoutBuilder; - - size_t headerSize() const; - size_t loadCommandsSize() const; - size_t symTableSize() const; - size_t strTableSize() const; - - void writeHeader(); - void writeLoadCommands(); - template - void writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out); - void writeSections(); - void writeSymbolTable(); - void writeStringTable(); - void writeRebaseInfo(); - void writeBindInfo(); - void writeWeakBindInfo(); - void writeLazyBindInfo(); - void writeExportInfo(); - void writeIndirectSymbolTable(); - void writeLinkData(Optional LCIndex, const LinkData &LD); - void writeCodeSignatureData(); - void writeDataInCodeData(); - void writeLinkerOptimizationHint(); - void writeFunctionStartsData(); - void writeChainedFixupsData(); - void writeExportsTrieData(); - void writeTail(); - -public: - MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, - StringRef OutputFileName, uint64_t PageSize, raw_ostream &Out) - : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), - PageSize(PageSize), Out(Out), - LayoutBuilder(O, Is64Bit, OutputFileName, PageSize) {} - - size_t totalSize() const; - Error finalize(); - Error write(); -}; - -} // end namespace macho -} // end namespace objcopy -} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/MachO/Object.cpp b/llvm/tools/llvm-objcopy/MachO/Object.cpp deleted file mode 100644 index 6312adbbc9f7..000000000000 --- a/llvm/tools/llvm-objcopy/MachO/Object.cpp +++ /dev/null @@ -1,214 +0,0 @@ -//===- Object.cpp - Mach-O object file model --------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Object.h" -#include "llvm/ADT/SmallPtrSet.h" -#include - -using namespace llvm; -using namespace llvm::objcopy::macho; - -const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const { - assert(Index < Symbols.size() && "invalid symbol index"); - return Symbols[Index].get(); -} - -SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) { - return const_cast( - static_cast(this)->getSymbolByIndex(Index)); -} - -void SymbolTable::removeSymbols( - function_ref &)> ToRemove) { - llvm::erase_if(Symbols, ToRemove); -} - -void Object::updateLoadCommandIndexes() { - static constexpr char TextSegmentName[] = "__TEXT"; - // Update indices of special load commands - for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) { - LoadCommand &LC = LoadCommands[Index]; - switch (LC.MachOLoadCommand.load_command_data.cmd) { - case MachO::LC_CODE_SIGNATURE: - CodeSignatureCommandIndex = Index; - break; - case MachO::LC_SEGMENT: - if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) == - TextSegmentName) - TextSegmentCommandIndex = Index; - break; - case MachO::LC_SEGMENT_64: - if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) == - TextSegmentName) - TextSegmentCommandIndex = Index; - break; - case MachO::LC_SYMTAB: - SymTabCommandIndex = Index; - break; - case MachO::LC_DYSYMTAB: - DySymTabCommandIndex = Index; - break; - case MachO::LC_DYLD_INFO: - case MachO::LC_DYLD_INFO_ONLY: - DyLdInfoCommandIndex = Index; - break; - case MachO::LC_DATA_IN_CODE: - DataInCodeCommandIndex = Index; - break; - case MachO::LC_LINKER_OPTIMIZATION_HINT: - LinkerOptimizationHintCommandIndex = Index; - break; - case MachO::LC_FUNCTION_STARTS: - FunctionStartsCommandIndex = Index; - break; - case MachO::LC_DYLD_CHAINED_FIXUPS: - ChainedFixupsCommandIndex = Index; - break; - case MachO::LC_DYLD_EXPORTS_TRIE: - ExportsTrieCommandIndex = Index; - break; - } - } -} - -Error Object::removeLoadCommands( - function_ref ToRemove) { - auto It = std::stable_partition( - LoadCommands.begin(), LoadCommands.end(), - [&](const LoadCommand &LC) { return !ToRemove(LC); }); - LoadCommands.erase(It, LoadCommands.end()); - - updateLoadCommandIndexes(); - return Error::success(); -} - -Error Object::removeSections( - function_ref &)> ToRemove) { - DenseMap OldIndexToSection; - uint32_t NextSectionIndex = 1; - for (LoadCommand &LC : LoadCommands) { - auto It = std::stable_partition( - std::begin(LC.Sections), std::end(LC.Sections), - [&](const std::unique_ptr
&Sec) { return !ToRemove(Sec); }); - for (auto I = LC.Sections.begin(), End = It; I != End; ++I) { - OldIndexToSection[(*I)->Index] = I->get(); - (*I)->Index = NextSectionIndex++; - } - LC.Sections.erase(It, LC.Sections.end()); - } - - auto IsDead = [&](const std::unique_ptr &S) -> bool { - Optional Section = S->section(); - return (Section && !OldIndexToSection.count(*Section)); - }; - - SmallPtrSet DeadSymbols; - for (const std::unique_ptr &Sym : SymTable.Symbols) - if (IsDead(Sym)) - DeadSymbols.insert(Sym.get()); - - for (const LoadCommand &LC : LoadCommands) - for (const std::unique_ptr
&Sec : LC.Sections) - for (const RelocationInfo &R : Sec->Relocations) - if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol)) - return createStringError(std::errc::invalid_argument, - "symbol '%s' defined in section with index " - "'%u' cannot be removed because it is " - "referenced by a relocation in section '%s'", - (*R.Symbol)->Name.c_str(), - *((*R.Symbol)->section()), - Sec->CanonicalName.c_str()); - SymTable.removeSymbols(IsDead); - for (std::unique_ptr &S : SymTable.Symbols) - if (S->section()) - S->n_sect = OldIndexToSection[S->n_sect]->Index; - return Error::success(); -} - -uint64_t Object::nextAvailableSegmentAddress() const { - uint64_t HeaderSize = - is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); - uint64_t Addr = HeaderSize + Header.SizeOfCmds; - for (const LoadCommand &LC : LoadCommands) { - const MachO::macho_load_command &MLC = LC.MachOLoadCommand; - switch (MLC.load_command_data.cmd) { - case MachO::LC_SEGMENT: - Addr = std::max(Addr, - static_cast(MLC.segment_command_data.vmaddr) + - MLC.segment_command_data.vmsize); - break; - case MachO::LC_SEGMENT_64: - Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr + - MLC.segment_command_64_data.vmsize); - break; - default: - continue; - } - } - return Addr; -} - -template -static void -constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType, - StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) { - assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name"); - memset(&Seg, 0, sizeof(SegmentType)); - Seg.cmd = CmdType; - strncpy(Seg.segname, SegName.data(), SegName.size()); - Seg.maxprot |= - (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); - Seg.initprot |= - (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); - Seg.vmaddr = SegVMAddr; - Seg.vmsize = SegVMSize; -} - -LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) { - LoadCommand LC; - const uint64_t SegVMAddr = nextAvailableSegmentAddress(); - if (is64Bit()) - constructSegment(LC.MachOLoadCommand.segment_command_64_data, - MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize); - else - constructSegment(LC.MachOLoadCommand.segment_command_data, - MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize); - - LoadCommands.push_back(std::move(LC)); - return LoadCommands.back(); -} - -/// Extracts a segment name from a string which is possibly non-null-terminated. -static StringRef extractSegmentName(const char *SegName) { - return StringRef(SegName, - strnlen(SegName, sizeof(MachO::segment_command::segname))); -} - -Optional LoadCommand::getSegmentName() const { - const MachO::macho_load_command &MLC = MachOLoadCommand; - switch (MLC.load_command_data.cmd) { - case MachO::LC_SEGMENT: - return extractSegmentName(MLC.segment_command_data.segname); - case MachO::LC_SEGMENT_64: - return extractSegmentName(MLC.segment_command_64_data.segname); - default: - return None; - } -} - -Optional LoadCommand::getSegmentVMAddr() const { - const MachO::macho_load_command &MLC = MachOLoadCommand; - switch (MLC.load_command_data.cmd) { - case MachO::LC_SEGMENT: - return MLC.segment_command_data.vmaddr; - case MachO::LC_SEGMENT_64: - return MLC.segment_command_64_data.vmaddr; - default: - return None; - } -} diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h deleted file mode 100644 index 13aaf42634b0..000000000000 --- a/llvm/tools/llvm-objcopy/MachO/Object.h +++ /dev/null @@ -1,374 +0,0 @@ -//===- Object.h - Mach-O object file model ----------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_OBJCOPY_MACHO_OBJECT_H -#define LLVM_OBJCOPY_MACHO_OBJECT_H - -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/MC/StringTableBuilder.h" -#include "llvm/ObjectYAML/DWARFYAML.h" -#include "llvm/Support/StringSaver.h" -#include "llvm/Support/YAMLTraits.h" -#include -#include -#include - -namespace llvm { -namespace objcopy { -namespace macho { - -struct MachHeader { - uint32_t Magic; - uint32_t CPUType; - uint32_t CPUSubType; - uint32_t FileType; - uint32_t NCmds; - uint32_t SizeOfCmds; - uint32_t Flags; - uint32_t Reserved = 0; -}; - -struct RelocationInfo; -struct Section { - uint32_t Index; - std::string Segname; - std::string Sectname; - // CanonicalName is a string formatted as “,". - std::string CanonicalName; - uint64_t Addr = 0; - uint64_t Size = 0; - // Offset in the input file. - Optional OriginalOffset; - uint32_t Offset = 0; - uint32_t Align = 0; - uint32_t RelOff = 0; - uint32_t NReloc = 0; - uint32_t Flags = 0; - uint32_t Reserved1 = 0; - uint32_t Reserved2 = 0; - uint32_t Reserved3 = 0; - StringRef Content; - std::vector Relocations; - - Section(StringRef SegName, StringRef SectName) - : Segname(std::string(SegName)), Sectname(std::string(SectName)), - CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {} - - Section(StringRef SegName, StringRef SectName, StringRef Content) - : Segname(std::string(SegName)), Sectname(std::string(SectName)), - CanonicalName((Twine(SegName) + Twine(',') + SectName).str()), - Content(Content) {} - - MachO::SectionType getType() const { - return static_cast(Flags & MachO::SECTION_TYPE); - } - - bool isVirtualSection() const { - return (getType() == MachO::S_ZEROFILL || - getType() == MachO::S_GB_ZEROFILL || - getType() == MachO::S_THREAD_LOCAL_ZEROFILL); - } - - bool hasValidOffset() const { - return !(isVirtualSection() || (OriginalOffset && *OriginalOffset == 0)); - } -}; - -struct LoadCommand { - // The type MachO::macho_load_command is defined in llvm/BinaryFormat/MachO.h - // and it is a union of all the structs corresponding to various load - // commands. - MachO::macho_load_command MachOLoadCommand; - - // The raw content of the payload of the load command (located right after the - // corresponding struct). In some cases it is either empty or can be - // copied-over without digging into its structure. - std::vector Payload; - - // Some load commands can contain (inside the payload) an array of sections, - // though the contents of the sections are stored separately. The struct - // Section describes only sections' metadata and where to find the - // corresponding content inside the binary. - std::vector> Sections; - - // Returns the segment name if the load command is a segment command. - Optional getSegmentName() const; - - // Returns the segment vm address if the load command is a segment command. - Optional getSegmentVMAddr() const; -}; - -// A symbol information. Fields which starts with "n_" are same as them in the -// nlist. -struct SymbolEntry { - std::string Name; - bool Referenced = false; - uint32_t Index; - uint8_t n_type; - uint8_t n_sect; - uint16_t n_desc; - uint64_t n_value; - - bool isExternalSymbol() const { return n_type & MachO::N_EXT; } - - bool isLocalSymbol() const { return !isExternalSymbol(); } - - bool isUndefinedSymbol() const { - return (n_type & MachO::N_TYPE) == MachO::N_UNDF; - } - - bool isSwiftSymbol() const { - return StringRef(Name).startswith("_$s") || - StringRef(Name).startswith("_$S"); - } - - Optional section() const { - return n_sect == MachO::NO_SECT ? None : Optional(n_sect); - } -}; - -/// The location of the symbol table inside the binary is described by LC_SYMTAB -/// load command. -struct SymbolTable { - std::vector> Symbols; - - using iterator = pointee_iterator< - std::vector>::const_iterator>; - - iterator begin() const { return iterator(Symbols.begin()); } - iterator end() const { return iterator(Symbols.end()); } - - const SymbolEntry *getSymbolByIndex(uint32_t Index) const; - SymbolEntry *getSymbolByIndex(uint32_t Index); - void removeSymbols( - function_ref &)> ToRemove); -}; - -struct IndirectSymbolEntry { - // The original value in an indirect symbol table. Higher bits encode extra - // information (INDIRECT_SYMBOL_LOCAL and INDIRECT_SYMBOL_ABS). - uint32_t OriginalIndex; - /// The Symbol referenced by this entry. It's None if the index is - /// INDIRECT_SYMBOL_LOCAL or INDIRECT_SYMBOL_ABS. - Optional Symbol; - - IndirectSymbolEntry(uint32_t OriginalIndex, Optional Symbol) - : OriginalIndex(OriginalIndex), Symbol(Symbol) {} -}; - -struct IndirectSymbolTable { - std::vector Symbols; -}; - -/// The location of the string table inside the binary is described by LC_SYMTAB -/// load command. -struct StringTable { - std::vector Strings; -}; - -struct RelocationInfo { - // The referenced symbol entry. Set if !Scattered && Extern. - Optional Symbol; - // The referenced section. Set if !Scattered && !Extern. - Optional Sec; - // True if Info is a scattered_relocation_info. - bool Scattered; - // True if the type is an ADDEND. r_symbolnum holds the addend instead of a - // symbol index. - bool IsAddend; - // True if the r_symbolnum points to a section number (i.e. r_extern=0). - bool Extern; - MachO::any_relocation_info Info; - - unsigned getPlainRelocationSymbolNum(bool IsLittleEndian) { - if (IsLittleEndian) - return Info.r_word1 & 0xffffff; - return Info.r_word1 >> 8; - } - - void setPlainRelocationSymbolNum(unsigned SymbolNum, bool IsLittleEndian) { - assert(SymbolNum < (1 << 24) && "SymbolNum out of range"); - if (IsLittleEndian) - Info.r_word1 = (Info.r_word1 & ~0x00ffffff) | SymbolNum; - else - Info.r_word1 = (Info.r_word1 & ~0xffffff00) | (SymbolNum << 8); - } -}; - -/// The location of the rebase info inside the binary is described by -/// LC_DYLD_INFO load command. Dyld rebases an image whenever dyld loads it at -/// an address different from its preferred address. The rebase information is -/// a stream of byte sized opcodes whose symbolic names start with -/// REBASE_OPCODE_. Conceptually the rebase information is a table of tuples: -/// -/// The opcodes are a compressed way to encode the table by only -/// encoding when a column changes. In addition simple patterns -/// like "every n'th offset for m times" can be encoded in a few -/// bytes. -struct RebaseInfo { - // At the moment we do not parse this info (and it is simply copied over), - // but the proper support will be added later. - ArrayRef Opcodes; -}; - -/// The location of the bind info inside the binary is described by -/// LC_DYLD_INFO load command. Dyld binds an image during the loading process, -/// if the image requires any pointers to be initialized to symbols in other -/// images. The bind information is a stream of byte sized opcodes whose -/// symbolic names start with BIND_OPCODE_. Conceptually the bind information is -/// a table of tuples: The opcodes are a compressed way to encode the table by -/// only encoding when a column changes. In addition simple patterns like for -/// runs of pointers initialized to the same value can be encoded in a few -/// bytes. -struct BindInfo { - // At the moment we do not parse this info (and it is simply copied over), - // but the proper support will be added later. - ArrayRef Opcodes; -}; - -/// The location of the weak bind info inside the binary is described by -/// LC_DYLD_INFO load command. Some C++ programs require dyld to unique symbols -/// so that all images in the process use the same copy of some code/data. This -/// step is done after binding. The content of the weak_bind info is an opcode -/// stream like the bind_info. But it is sorted alphabetically by symbol name. -/// This enable dyld to walk all images with weak binding information in order -/// and look for collisions. If there are no collisions, dyld does no updating. -/// That means that some fixups are also encoded in the bind_info. For -/// instance, all calls to "operator new" are first bound to libstdc++.dylib -/// using the information in bind_info. Then if some image overrides operator -/// new that is detected when the weak_bind information is processed and the -/// call to operator new is then rebound. -struct WeakBindInfo { - // At the moment we do not parse this info (and it is simply copied over), - // but the proper support will be added later. - ArrayRef Opcodes; -}; - -/// The location of the lazy bind info inside the binary is described by -/// LC_DYLD_INFO load command. Some uses of external symbols do not need to be -/// bound immediately. Instead they can be lazily bound on first use. The -/// lazy_bind contains a stream of BIND opcodes to bind all lazy symbols. Normal -/// use is that dyld ignores the lazy_bind section when loading an image. -/// Instead the static linker arranged for the lazy pointer to initially point -/// to a helper function which pushes the offset into the lazy_bind area for the -/// symbol needing to be bound, then jumps to dyld which simply adds the offset -/// to lazy_bind_off to get the information on what to bind. -struct LazyBindInfo { - ArrayRef Opcodes; -}; - -/// The location of the export info inside the binary is described by -/// LC_DYLD_INFO load command. The symbols exported by a dylib are encoded in a -/// trie. This is a compact representation that factors out common prefixes. It -/// also reduces LINKEDIT pages in RAM because it encodes all information (name, -/// address, flags) in one small, contiguous range. The export area is a stream -/// of nodes. The first node sequentially is the start node for the trie. Nodes -/// for a symbol start with a uleb128 that is the length of the exported symbol -/// information for the string so far. If there is no exported symbol, the node -/// starts with a zero byte. If there is exported info, it follows the length. -/// First is a uleb128 containing flags. Normally, it is followed by -/// a uleb128 encoded offset which is location of the content named -/// by the symbol from the mach_header for the image. If the flags -/// is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags is -/// a uleb128 encoded library ordinal, then a zero terminated -/// UTF8 string. If the string is zero length, then the symbol -/// is re-export from the specified dylib with the same name. -/// If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following -/// the flags is two uleb128s: the stub offset and the resolver offset. -/// The stub is used by non-lazy pointers. The resolver is used -/// by lazy pointers and must be called to get the actual address to use. -/// After the optional exported symbol information is a byte of -/// how many edges (0-255) that this node has leaving it, -/// followed by each edge. -/// Each edge is a zero terminated UTF8 of the addition chars -/// in the symbol, followed by a uleb128 offset for the node that -/// edge points to. -struct ExportInfo { - ArrayRef Trie; -}; - -struct LinkData { - ArrayRef Data; -}; - -struct Object { - MachHeader Header; - std::vector LoadCommands; - - SymbolTable SymTable; - StringTable StrTable; - - RebaseInfo Rebases; - BindInfo Binds; - WeakBindInfo WeakBinds; - LazyBindInfo LazyBinds; - ExportInfo Exports; - IndirectSymbolTable IndirectSymTable; - LinkData DataInCode; - LinkData LinkerOptimizationHint; - LinkData FunctionStarts; - LinkData ExportsTrie; - LinkData ChainedFixups; - - Optional SwiftVersion; - - /// The index of LC_CODE_SIGNATURE load command if present. - Optional CodeSignatureCommandIndex; - /// The index of LC_SYMTAB load command if present. - Optional SymTabCommandIndex; - /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present. - Optional DyLdInfoCommandIndex; - /// The index LC_DYSYMTAB load command if present. - Optional DySymTabCommandIndex; - /// The index LC_DATA_IN_CODE load command if present. - Optional DataInCodeCommandIndex; - /// The index of LC_LINKER_OPTIMIZATIN_HINT load command if present. - Optional LinkerOptimizationHintCommandIndex; - /// The index LC_FUNCTION_STARTS load command if present. - Optional FunctionStartsCommandIndex; - /// The index LC_DYLD_CHAINED_FIXUPS load command if present. - Optional ChainedFixupsCommandIndex; - /// The index LC_DYLD_EXPORTS_TRIE load command if present. - Optional ExportsTrieCommandIndex; - /// The index of the LC_SEGMENT or LC_SEGMENT_64 load command - /// corresponding to the __TEXT segment. - Optional TextSegmentCommandIndex; - - BumpPtrAllocator Alloc; - StringSaver NewSectionsContents; - - Object() : NewSectionsContents(Alloc) {} - - Error - removeSections(function_ref &)> ToRemove); - - Error removeLoadCommands(function_ref ToRemove); - - void updateLoadCommandIndexes(); - - /// Creates a new segment load command in the object and returns a reference - /// to the newly created load command. The caller should verify that SegName - /// is not too long (SegName.size() should be less than or equal to 16). - LoadCommand &addSegment(StringRef SegName, uint64_t SegVMSize); - - bool is64Bit() const { - return Header.Magic == MachO::MH_MAGIC_64 || - Header.Magic == MachO::MH_CIGAM_64; - } - - uint64_t nextAvailableSegmentAddress() const; -}; - -} // end namespace macho -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_OBJCOPY_MACHO_OBJECT_H diff --git a/llvm/tools/llvm-objcopy/MultiFormatConfig.h b/llvm/tools/llvm-objcopy/MultiFormatConfig.h deleted file mode 100644 index 31d9883d6d3a..000000000000 --- a/llvm/tools/llvm-objcopy/MultiFormatConfig.h +++ /dev/null @@ -1,37 +0,0 @@ -//===- MultiFormatConfig.h --------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_OBJCOPY_MULTIFORMATCONFIG_H -#define LLVM_TOOLS_LLVM_OBJCOPY_MULTIFORMATCONFIG_H - -#include "llvm/Support/Error.h" - -namespace llvm { -namespace objcopy { - -struct CommonConfig; -struct ELFConfig; -struct COFFConfig; -struct MachOConfig; -struct WasmConfig; - -class MultiFormatConfig { -public: - virtual ~MultiFormatConfig() {} - - virtual const CommonConfig &getCommonConfig() const = 0; - virtual Expected getELFConfig() const = 0; - virtual Expected getCOFFConfig() const = 0; - virtual Expected getMachOConfig() const = 0; - virtual Expected getWasmConfig() const = 0; -}; - -} // namespace objcopy -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_OBJCOPY_MULTIFORMATCONFIG_H diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp new file mode 100644 index 000000000000..5b2b4b5704d8 --- /dev/null +++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp @@ -0,0 +1,1364 @@ +//===- ObjcopyOptions.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ObjcopyOptions.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/ConfigManager.h" +#include "llvm/ObjCopy/MachO/MachOConfig.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Support/CRC.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace llvm::objcopy; + +namespace { +enum ObjcopyID { + OBJCOPY_INVALID = 0, // This is not an option ID. +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + OBJCOPY_##ID, +#include "ObjcopyOpts.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) const char *const OBJCOPY_##NAME[] = VALUE; +#include "ObjcopyOpts.inc" +#undef PREFIX + +const opt::OptTable::Info ObjcopyInfoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + {OBJCOPY_##PREFIX, \ + NAME, \ + HELPTEXT, \ + METAVAR, \ + OBJCOPY_##ID, \ + opt::Option::KIND##Class, \ + PARAM, \ + FLAGS, \ + OBJCOPY_##GROUP, \ + OBJCOPY_##ALIAS, \ + ALIASARGS, \ + VALUES}, +#include "ObjcopyOpts.inc" +#undef OPTION +}; + +class ObjcopyOptTable : public opt::OptTable { +public: + ObjcopyOptTable() : OptTable(ObjcopyInfoTable) { + setGroupedShortOptions(true); + } +}; + +enum InstallNameToolID { + INSTALL_NAME_TOOL_INVALID = 0, // This is not an option ID. +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + INSTALL_NAME_TOOL_##ID, +#include "InstallNameToolOpts.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) \ + const char *const INSTALL_NAME_TOOL_##NAME[] = VALUE; +#include "InstallNameToolOpts.inc" +#undef PREFIX + +const opt::OptTable::Info InstallNameToolInfoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + {INSTALL_NAME_TOOL_##PREFIX, \ + NAME, \ + HELPTEXT, \ + METAVAR, \ + INSTALL_NAME_TOOL_##ID, \ + opt::Option::KIND##Class, \ + PARAM, \ + FLAGS, \ + INSTALL_NAME_TOOL_##GROUP, \ + INSTALL_NAME_TOOL_##ALIAS, \ + ALIASARGS, \ + VALUES}, +#include "InstallNameToolOpts.inc" +#undef OPTION +}; + +class InstallNameToolOptTable : public opt::OptTable { +public: + InstallNameToolOptTable() : OptTable(InstallNameToolInfoTable) {} +}; + +enum BitcodeStripID { + BITCODE_STRIP_INVALID = 0, // This is not an option ID. +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + BITCODE_STRIP_##ID, +#include "BitcodeStripOpts.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) const char *const BITCODE_STRIP_##NAME[] = VALUE; +#include "BitcodeStripOpts.inc" +#undef PREFIX + +const opt::OptTable::Info BitcodeStripInfoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + {BITCODE_STRIP_##PREFIX, \ + NAME, \ + HELPTEXT, \ + METAVAR, \ + BITCODE_STRIP_##ID, \ + opt::Option::KIND##Class, \ + PARAM, \ + FLAGS, \ + BITCODE_STRIP_##GROUP, \ + BITCODE_STRIP_##ALIAS, \ + ALIASARGS, \ + VALUES}, +#include "BitcodeStripOpts.inc" +#undef OPTION +}; + +class BitcodeStripOptTable : public opt::OptTable { +public: + BitcodeStripOptTable() : OptTable(BitcodeStripInfoTable) {} +}; + +enum StripID { + STRIP_INVALID = 0, // This is not an option ID. +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + STRIP_##ID, +#include "StripOpts.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) const char *const STRIP_##NAME[] = VALUE; +#include "StripOpts.inc" +#undef PREFIX + +const opt::OptTable::Info StripInfoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + {STRIP_##PREFIX, NAME, HELPTEXT, \ + METAVAR, STRIP_##ID, opt::Option::KIND##Class, \ + PARAM, FLAGS, STRIP_##GROUP, \ + STRIP_##ALIAS, ALIASARGS, VALUES}, +#include "StripOpts.inc" +#undef OPTION +}; + +class StripOptTable : public opt::OptTable { +public: + StripOptTable() : OptTable(StripInfoTable) { setGroupedShortOptions(true); } +}; + +} // namespace + +static SectionFlag parseSectionRenameFlag(StringRef SectionName) { + return llvm::StringSwitch(SectionName) + .CaseLower("alloc", SectionFlag::SecAlloc) + .CaseLower("load", SectionFlag::SecLoad) + .CaseLower("noload", SectionFlag::SecNoload) + .CaseLower("readonly", SectionFlag::SecReadonly) + .CaseLower("debug", SectionFlag::SecDebug) + .CaseLower("code", SectionFlag::SecCode) + .CaseLower("data", SectionFlag::SecData) + .CaseLower("rom", SectionFlag::SecRom) + .CaseLower("merge", SectionFlag::SecMerge) + .CaseLower("strings", SectionFlag::SecStrings) + .CaseLower("contents", SectionFlag::SecContents) + .CaseLower("share", SectionFlag::SecShare) + .CaseLower("exclude", SectionFlag::SecExclude) + .Default(SectionFlag::SecNone); +} + +static Expected +parseSectionFlagSet(ArrayRef SectionFlags) { + SectionFlag ParsedFlags = SectionFlag::SecNone; + for (StringRef Flag : SectionFlags) { + SectionFlag ParsedFlag = parseSectionRenameFlag(Flag); + if (ParsedFlag == SectionFlag::SecNone) + return createStringError( + errc::invalid_argument, + "unrecognized section flag '%s'. Flags supported for GNU " + "compatibility: alloc, load, noload, readonly, exclude, debug, " + "code, data, rom, share, contents, merge, strings", + Flag.str().c_str()); + ParsedFlags |= ParsedFlag; + } + + return ParsedFlags; +} + +static Expected parseRenameSectionValue(StringRef FlagValue) { + if (!FlagValue.contains('=')) + return createStringError(errc::invalid_argument, + "bad format for --rename-section: missing '='"); + + // Initial split: ".foo" = ".bar,f1,f2,..." + auto Old2New = FlagValue.split('='); + SectionRename SR; + SR.OriginalName = Old2New.first; + + // Flags split: ".bar" "f1" "f2" ... + SmallVector NameAndFlags; + Old2New.second.split(NameAndFlags, ','); + SR.NewName = NameAndFlags[0]; + + if (NameAndFlags.size() > 1) { + Expected ParsedFlagSet = + parseSectionFlagSet(makeArrayRef(NameAndFlags).drop_front()); + if (!ParsedFlagSet) + return ParsedFlagSet.takeError(); + SR.NewFlags = *ParsedFlagSet; + } + + return SR; +} + +static Expected> +parseSetSectionAlignment(StringRef FlagValue) { + if (!FlagValue.contains('=')) + return createStringError( + errc::invalid_argument, + "bad format for --set-section-alignment: missing '='"); + auto Split = StringRef(FlagValue).split('='); + if (Split.first.empty()) + return createStringError( + errc::invalid_argument, + "bad format for --set-section-alignment: missing section name"); + uint64_t NewAlign; + if (Split.second.getAsInteger(0, NewAlign)) + return createStringError( + errc::invalid_argument, + "invalid alignment for --set-section-alignment: '%s'", + Split.second.str().c_str()); + return std::make_pair(Split.first, NewAlign); +} + +static Expected +parseSetSectionFlagValue(StringRef FlagValue) { + if (!StringRef(FlagValue).contains('=')) + return createStringError(errc::invalid_argument, + "bad format for --set-section-flags: missing '='"); + + // Initial split: ".foo" = "f1,f2,..." + auto Section2Flags = StringRef(FlagValue).split('='); + SectionFlagsUpdate SFU; + SFU.Name = Section2Flags.first; + + // Flags split: "f1" "f2" ... + SmallVector SectionFlags; + Section2Flags.second.split(SectionFlags, ','); + Expected ParsedFlagSet = parseSectionFlagSet(SectionFlags); + if (!ParsedFlagSet) + return ParsedFlagSet.takeError(); + SFU.NewFlags = *ParsedFlagSet; + + return SFU; +} + +namespace { +struct TargetInfo { + FileFormat Format; + MachineInfo Machine; +}; +} // namespace + +// FIXME: consolidate with the bfd parsing used by lld. +static const StringMap TargetMap{ + // Name, {EMachine, 64bit, LittleEndian} + // x86 + {"elf32-i386", {ELF::EM_386, false, true}}, + {"elf32-x86-64", {ELF::EM_X86_64, false, true}}, + {"elf64-x86-64", {ELF::EM_X86_64, true, true}}, + // Intel MCU + {"elf32-iamcu", {ELF::EM_IAMCU, false, true}}, + // ARM + {"elf32-littlearm", {ELF::EM_ARM, false, true}}, + // ARM AArch64 + {"elf64-aarch64", {ELF::EM_AARCH64, true, true}}, + {"elf64-littleaarch64", {ELF::EM_AARCH64, true, true}}, + // RISC-V + {"elf32-littleriscv", {ELF::EM_RISCV, false, true}}, + {"elf64-littleriscv", {ELF::EM_RISCV, true, true}}, + // PowerPC + {"elf32-powerpc", {ELF::EM_PPC, false, false}}, + {"elf32-powerpcle", {ELF::EM_PPC, false, true}}, + {"elf64-powerpc", {ELF::EM_PPC64, true, false}}, + {"elf64-powerpcle", {ELF::EM_PPC64, true, true}}, + // MIPS + {"elf32-bigmips", {ELF::EM_MIPS, false, false}}, + {"elf32-ntradbigmips", {ELF::EM_MIPS, false, false}}, + {"elf32-ntradlittlemips", {ELF::EM_MIPS, false, true}}, + {"elf32-tradbigmips", {ELF::EM_MIPS, false, false}}, + {"elf32-tradlittlemips", {ELF::EM_MIPS, false, true}}, + {"elf64-tradbigmips", {ELF::EM_MIPS, true, false}}, + {"elf64-tradlittlemips", {ELF::EM_MIPS, true, true}}, + // SPARC + {"elf32-sparc", {ELF::EM_SPARC, false, false}}, + {"elf32-sparcel", {ELF::EM_SPARC, false, true}}, + {"elf32-hexagon", {ELF::EM_HEXAGON, false, true}}, +}; + +static Expected +getOutputTargetInfoByTargetName(StringRef TargetName) { + StringRef OriginalTargetName = TargetName; + bool IsFreeBSD = TargetName.consume_back("-freebsd"); + auto Iter = TargetMap.find(TargetName); + if (Iter == std::end(TargetMap)) + return createStringError(errc::invalid_argument, + "invalid output format: '%s'", + OriginalTargetName.str().c_str()); + MachineInfo MI = Iter->getValue(); + if (IsFreeBSD) + MI.OSABI = ELF::ELFOSABI_FREEBSD; + + FileFormat Format; + if (TargetName.startswith("elf")) + Format = FileFormat::ELF; + else + // This should never happen because `TargetName` is valid (it certainly + // exists in the TargetMap). + llvm_unreachable("unknown target prefix"); + + return {TargetInfo{Format, MI}}; +} + +static Error addSymbolsFromFile(NameMatcher &Symbols, BumpPtrAllocator &Alloc, + StringRef Filename, MatchStyle MS, + function_ref ErrorCallback) { + StringSaver Saver(Alloc); + SmallVector Lines; + auto BufOrErr = MemoryBuffer::getFile(Filename); + if (!BufOrErr) + return createFileError(Filename, BufOrErr.getError()); + + BufOrErr.get()->getBuffer().split(Lines, '\n'); + for (StringRef Line : Lines) { + // Ignore everything after '#', trim whitespace, and only add the symbol if + // it's not empty. + auto TrimmedLine = Line.split('#').first.trim(); + if (!TrimmedLine.empty()) + if (Error E = Symbols.addMatcher(NameOrPattern::create( + Saver.save(TrimmedLine), MS, ErrorCallback))) + return E; + } + + return Error::success(); +} + +static Error addSymbolsToRenameFromFile(StringMap &SymbolsToRename, + BumpPtrAllocator &Alloc, + StringRef Filename) { + StringSaver Saver(Alloc); + SmallVector Lines; + auto BufOrErr = MemoryBuffer::getFile(Filename); + if (!BufOrErr) + return createFileError(Filename, BufOrErr.getError()); + + BufOrErr.get()->getBuffer().split(Lines, '\n'); + size_t NumLines = Lines.size(); + for (size_t LineNo = 0; LineNo < NumLines; ++LineNo) { + StringRef TrimmedLine = Lines[LineNo].split('#').first.trim(); + if (TrimmedLine.empty()) + continue; + + std::pair Pair = Saver.save(TrimmedLine).split(' '); + StringRef NewName = Pair.second.trim(); + if (NewName.empty()) + return createStringError(errc::invalid_argument, + "%s:%zu: missing new symbol name", + Filename.str().c_str(), LineNo + 1); + SymbolsToRename.insert({Pair.first, NewName}); + } + return Error::success(); +} + +template static ErrorOr getAsInteger(StringRef Val) { + T Result; + if (Val.getAsInteger(0, Result)) + return errc::invalid_argument; + return Result; +} + +namespace { + +enum class ToolType { Objcopy, Strip, InstallNameTool, BitcodeStrip }; + +} // anonymous namespace + +static void printHelp(const opt::OptTable &OptTable, raw_ostream &OS, + ToolType Tool) { + StringRef HelpText, ToolName; + switch (Tool) { + case ToolType::Objcopy: + ToolName = "llvm-objcopy"; + HelpText = " [options] input [output]"; + break; + case ToolType::Strip: + ToolName = "llvm-strip"; + HelpText = " [options] inputs..."; + break; + case ToolType::InstallNameTool: + ToolName = "llvm-install-name-tool"; + HelpText = " [options] input"; + break; + case ToolType::BitcodeStrip: + ToolName = "llvm-bitcode-strip"; + HelpText = " [options] input"; + break; + } + OptTable.printHelp(OS, (ToolName + HelpText).str().c_str(), + (ToolName + " tool").str().c_str()); + // TODO: Replace this with libOption call once it adds extrahelp support. + // The CommandLine library has a cl::extrahelp class to support this, + // but libOption does not have that yet. + OS << "\nPass @FILE as argument to read options from FILE.\n"; +} + +static Expected parseNewSymbolInfo(StringRef FlagValue) { + // Parse value given with --add-symbol option and create the + // new symbol if possible. The value format for --add-symbol is: + // + // =[
:][,] + // + // where: + // - symbol name, can be empty string + //
- optional section name. If not given ABS symbol is created + // - symbol value, can be decimal or hexadecimal number prefixed + // with 0x. + // - optional flags affecting symbol type, binding or visibility. + NewSymbolInfo SI; + StringRef Value; + std::tie(SI.SymbolName, Value) = FlagValue.split('='); + if (Value.empty()) + return createStringError( + errc::invalid_argument, + "bad format for --add-symbol, missing '=' after '%s'", + SI.SymbolName.str().c_str()); + + if (Value.contains(':')) { + std::tie(SI.SectionName, Value) = Value.split(':'); + if (SI.SectionName.empty() || Value.empty()) + return createStringError( + errc::invalid_argument, + "bad format for --add-symbol, missing section name or symbol value"); + } + + SmallVector Flags; + Value.split(Flags, ','); + if (Flags[0].getAsInteger(0, SI.Value)) + return createStringError(errc::invalid_argument, "bad symbol value: '%s'", + Flags[0].str().c_str()); + + using Functor = std::function; + SmallVector UnsupportedFlags; + for (size_t I = 1, NumFlags = Flags.size(); I < NumFlags; ++I) + static_cast( + StringSwitch(Flags[I]) + .CaseLower("global", + [&] { SI.Flags.push_back(SymbolFlag::Global); }) + .CaseLower("local", [&] { SI.Flags.push_back(SymbolFlag::Local); }) + .CaseLower("weak", [&] { SI.Flags.push_back(SymbolFlag::Weak); }) + .CaseLower("default", + [&] { SI.Flags.push_back(SymbolFlag::Default); }) + .CaseLower("hidden", + [&] { SI.Flags.push_back(SymbolFlag::Hidden); }) + .CaseLower("protected", + [&] { SI.Flags.push_back(SymbolFlag::Protected); }) + .CaseLower("file", [&] { SI.Flags.push_back(SymbolFlag::File); }) + .CaseLower("section", + [&] { SI.Flags.push_back(SymbolFlag::Section); }) + .CaseLower("object", + [&] { SI.Flags.push_back(SymbolFlag::Object); }) + .CaseLower("function", + [&] { SI.Flags.push_back(SymbolFlag::Function); }) + .CaseLower( + "indirect-function", + [&] { SI.Flags.push_back(SymbolFlag::IndirectFunction); }) + .CaseLower("debug", [&] { SI.Flags.push_back(SymbolFlag::Debug); }) + .CaseLower("constructor", + [&] { SI.Flags.push_back(SymbolFlag::Constructor); }) + .CaseLower("warning", + [&] { SI.Flags.push_back(SymbolFlag::Warning); }) + .CaseLower("indirect", + [&] { SI.Flags.push_back(SymbolFlag::Indirect); }) + .CaseLower("synthetic", + [&] { SI.Flags.push_back(SymbolFlag::Synthetic); }) + .CaseLower("unique-object", + [&] { SI.Flags.push_back(SymbolFlag::UniqueObject); }) + .StartsWithLower("before=", + [&] { + StringRef SymNamePart = + Flags[I].split('=').second; + + if (!SymNamePart.empty()) + SI.BeforeSyms.push_back(SymNamePart); + }) + .Default([&] { UnsupportedFlags.push_back(Flags[I]); }))(); + if (!UnsupportedFlags.empty()) + return createStringError(errc::invalid_argument, + "unsupported flag%s for --add-symbol: '%s'", + UnsupportedFlags.size() > 1 ? "s" : "", + join(UnsupportedFlags, "', '").c_str()); + + return SI; +} + +// Parse input option \p ArgValue and load section data. This function +// extracts section name and name of the file keeping section data from +// ArgValue, loads data from the file, and stores section name and data +// into the vector of new sections \p NewSections. +static Error loadNewSectionData(StringRef ArgValue, StringRef OptionName, + std::vector &NewSections) { + if (!ArgValue.contains('=')) + return createStringError(errc::invalid_argument, + "bad format for " + OptionName + ": missing '='"); + + std::pair SecPair = ArgValue.split("="); + if (SecPair.second.empty()) + return createStringError(errc::invalid_argument, "bad format for " + + OptionName + + ": missing file name"); + + ErrorOr> BufOrErr = + MemoryBuffer::getFile(SecPair.second); + if (!BufOrErr) + return createFileError(SecPair.second, + errorCodeToError(BufOrErr.getError())); + + NewSections.push_back({SecPair.first, std::move(*BufOrErr)}); + return Error::success(); +} + +// parseObjcopyOptions returns the config and sets the input arguments. If a +// help flag is set then parseObjcopyOptions will print the help messege and +// exit. +Expected +objcopy::parseObjcopyOptions(ArrayRef RawArgsArr, + function_ref ErrorCallback) { + DriverConfig DC; + ObjcopyOptTable T; + + const char *const *DashDash = + std::find_if(RawArgsArr.begin(), RawArgsArr.end(), + [](StringRef Str) { return Str == "--"; }); + ArrayRef ArgsArr = makeArrayRef(RawArgsArr.begin(), DashDash); + if (DashDash != RawArgsArr.end()) + DashDash = std::next(DashDash); + + unsigned MissingArgumentIndex, MissingArgumentCount; + llvm::opt::InputArgList InputArgs = + T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); + + if (InputArgs.size() == 0 && DashDash == RawArgsArr.end()) { + printHelp(T, errs(), ToolType::Objcopy); + exit(1); + } + + if (InputArgs.hasArg(OBJCOPY_help)) { + printHelp(T, outs(), ToolType::Objcopy); + exit(0); + } + + if (InputArgs.hasArg(OBJCOPY_version)) { + outs() << "llvm-objcopy, compatible with GNU objcopy\n"; + cl::PrintVersionMessage(); + exit(0); + } + + SmallVector Positional; + + for (auto Arg : InputArgs.filtered(OBJCOPY_UNKNOWN)) + return createStringError(errc::invalid_argument, "unknown argument '%s'", + Arg->getAsString(InputArgs).c_str()); + + for (auto Arg : InputArgs.filtered(OBJCOPY_INPUT)) + Positional.push_back(Arg->getValue()); + std::copy(DashDash, RawArgsArr.end(), std::back_inserter(Positional)); + + if (Positional.empty()) + return createStringError(errc::invalid_argument, "no input file specified"); + + if (Positional.size() > 2) + return createStringError(errc::invalid_argument, + "too many positional arguments"); + + ConfigManager ConfigMgr; + CommonConfig &Config = ConfigMgr.Common; + COFFConfig &COFFConfig = ConfigMgr.COFF; + ELFConfig &ELFConfig = ConfigMgr.ELF; + MachOConfig &MachOConfig = ConfigMgr.MachO; + Config.InputFilename = Positional[0]; + Config.OutputFilename = Positional[Positional.size() == 1 ? 0 : 1]; + if (InputArgs.hasArg(OBJCOPY_target) && + (InputArgs.hasArg(OBJCOPY_input_target) || + InputArgs.hasArg(OBJCOPY_output_target))) + return createStringError( + errc::invalid_argument, + "--target cannot be used with --input-target or --output-target"); + + if (InputArgs.hasArg(OBJCOPY_regex) && InputArgs.hasArg(OBJCOPY_wildcard)) + return createStringError(errc::invalid_argument, + "--regex and --wildcard are incompatible"); + + MatchStyle SectionMatchStyle = InputArgs.hasArg(OBJCOPY_regex) + ? MatchStyle::Regex + : MatchStyle::Wildcard; + MatchStyle SymbolMatchStyle + = InputArgs.hasArg(OBJCOPY_regex) ? MatchStyle::Regex + : InputArgs.hasArg(OBJCOPY_wildcard) ? MatchStyle::Wildcard + : MatchStyle::Literal; + StringRef InputFormat, OutputFormat; + if (InputArgs.hasArg(OBJCOPY_target)) { + InputFormat = InputArgs.getLastArgValue(OBJCOPY_target); + OutputFormat = InputArgs.getLastArgValue(OBJCOPY_target); + } else { + InputFormat = InputArgs.getLastArgValue(OBJCOPY_input_target); + OutputFormat = InputArgs.getLastArgValue(OBJCOPY_output_target); + } + + // FIXME: Currently, we ignore the target for non-binary/ihex formats + // explicitly specified by -I option (e.g. -Ielf32-x86-64) and guess the + // format by llvm::object::createBinary regardless of the option value. + Config.InputFormat = StringSwitch(InputFormat) + .Case("binary", FileFormat::Binary) + .Case("ihex", FileFormat::IHex) + .Default(FileFormat::Unspecified); + + if (InputArgs.hasArg(OBJCOPY_new_symbol_visibility)) { + const uint8_t Invalid = 0xff; + StringRef VisibilityStr = + InputArgs.getLastArgValue(OBJCOPY_new_symbol_visibility); + + ELFConfig.NewSymbolVisibility = StringSwitch(VisibilityStr) + .Case("default", ELF::STV_DEFAULT) + .Case("hidden", ELF::STV_HIDDEN) + .Case("internal", ELF::STV_INTERNAL) + .Case("protected", ELF::STV_PROTECTED) + .Default(Invalid); + + if (ELFConfig.NewSymbolVisibility == Invalid) + return createStringError(errc::invalid_argument, + "'%s' is not a valid symbol visibility", + VisibilityStr.str().c_str()); + } + + for (const auto *Arg : InputArgs.filtered(OBJCOPY_subsystem)) { + StringRef Subsystem, Version; + std::tie(Subsystem, Version) = StringRef(Arg->getValue()).split(':'); + COFFConfig.Subsystem = + StringSwitch(Subsystem.lower()) + .Case("boot_application", + COFF::IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION) + .Case("console", COFF::IMAGE_SUBSYSTEM_WINDOWS_CUI) + .Case("efi_application", COFF::IMAGE_SUBSYSTEM_EFI_APPLICATION) + .Case("efi_boot_service_driver", + COFF::IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER) + .Case("efi_rom", COFF::IMAGE_SUBSYSTEM_EFI_ROM) + .Case("efi_runtime_driver", + COFF::IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER) + .Case("native", COFF::IMAGE_SUBSYSTEM_NATIVE) + .Case("posix", COFF::IMAGE_SUBSYSTEM_POSIX_CUI) + .Case("windows", COFF::IMAGE_SUBSYSTEM_WINDOWS_GUI) + .Default(COFF::IMAGE_SUBSYSTEM_UNKNOWN); + if (*COFFConfig.Subsystem == COFF::IMAGE_SUBSYSTEM_UNKNOWN) + return createStringError(errc::invalid_argument, + "'%s' is not a valid subsystem", + Subsystem.str().c_str()); + if (!Version.empty()) { + StringRef Major, Minor; + std::tie(Major, Minor) = Version.split('.'); + unsigned Number; + if (Major.getAsInteger(10, Number)) + return createStringError(errc::invalid_argument, + "'%s' is not a valid subsystem major version", + Major.str().c_str()); + COFFConfig.MajorSubsystemVersion = Number; + Number = 0; + if (!Minor.empty() && Minor.getAsInteger(10, Number)) + return createStringError(errc::invalid_argument, + "'%s' is not a valid subsystem minor version", + Minor.str().c_str()); + COFFConfig.MinorSubsystemVersion = Number; + } + } + + Config.OutputFormat = StringSwitch(OutputFormat) + .Case("binary", FileFormat::Binary) + .Case("ihex", FileFormat::IHex) + .Default(FileFormat::Unspecified); + if (Config.OutputFormat == FileFormat::Unspecified) { + if (OutputFormat.empty()) { + Config.OutputFormat = Config.InputFormat; + } else { + Expected Target = + getOutputTargetInfoByTargetName(OutputFormat); + if (!Target) + return Target.takeError(); + Config.OutputFormat = Target->Format; + Config.OutputArch = Target->Machine; + } + } + + if (auto Arg = InputArgs.getLastArg(OBJCOPY_compress_debug_sections, + OBJCOPY_compress_debug_sections_eq)) { + Config.CompressionType = DebugCompressionType::Z; + + if (Arg->getOption().getID() == OBJCOPY_compress_debug_sections_eq) { + Config.CompressionType = + StringSwitch( + InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections_eq)) + .Case("zlib", DebugCompressionType::Z) + .Default(DebugCompressionType::None); + if (Config.CompressionType == DebugCompressionType::None) + return createStringError( + errc::invalid_argument, + "invalid or unsupported --compress-debug-sections format: %s", + InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections_eq) + .str() + .c_str()); + } + if (!zlib::isAvailable()) + return createStringError( + errc::invalid_argument, + "LLVM was not compiled with LLVM_ENABLE_ZLIB: can not compress"); + } + + Config.AddGnuDebugLink = InputArgs.getLastArgValue(OBJCOPY_add_gnu_debuglink); + // The gnu_debuglink's target is expected to not change or else its CRC would + // become invalidated and get rejected. We can avoid recalculating the + // checksum for every target file inside an archive by precomputing the CRC + // here. This prevents a significant amount of I/O. + if (!Config.AddGnuDebugLink.empty()) { + auto DebugOrErr = MemoryBuffer::getFile(Config.AddGnuDebugLink); + if (!DebugOrErr) + return createFileError(Config.AddGnuDebugLink, DebugOrErr.getError()); + auto Debug = std::move(*DebugOrErr); + Config.GnuDebugLinkCRC32 = + llvm::crc32(arrayRefFromStringRef(Debug->getBuffer())); + } + Config.SplitDWO = InputArgs.getLastArgValue(OBJCOPY_split_dwo); + Config.SymbolsPrefix = InputArgs.getLastArgValue(OBJCOPY_prefix_symbols); + Config.AllocSectionsPrefix = + InputArgs.getLastArgValue(OBJCOPY_prefix_alloc_sections); + if (auto Arg = InputArgs.getLastArg(OBJCOPY_extract_partition)) + Config.ExtractPartition = Arg->getValue(); + + for (auto Arg : InputArgs.filtered(OBJCOPY_redefine_symbol)) { + if (!StringRef(Arg->getValue()).contains('=')) + return createStringError(errc::invalid_argument, + "bad format for --redefine-sym"); + auto Old2New = StringRef(Arg->getValue()).split('='); + if (!Config.SymbolsToRename.insert(Old2New).second) + return createStringError(errc::invalid_argument, + "multiple redefinition of symbol '%s'", + Old2New.first.str().c_str()); + } + + for (auto Arg : InputArgs.filtered(OBJCOPY_redefine_symbols)) + if (Error E = addSymbolsToRenameFromFile(Config.SymbolsToRename, DC.Alloc, + Arg->getValue())) + return std::move(E); + + for (auto Arg : InputArgs.filtered(OBJCOPY_rename_section)) { + Expected SR = + parseRenameSectionValue(StringRef(Arg->getValue())); + if (!SR) + return SR.takeError(); + if (!Config.SectionsToRename.try_emplace(SR->OriginalName, *SR).second) + return createStringError(errc::invalid_argument, + "multiple renames of section '%s'", + SR->OriginalName.str().c_str()); + } + for (auto Arg : InputArgs.filtered(OBJCOPY_set_section_alignment)) { + Expected> NameAndAlign = + parseSetSectionAlignment(Arg->getValue()); + if (!NameAndAlign) + return NameAndAlign.takeError(); + Config.SetSectionAlignment[NameAndAlign->first] = NameAndAlign->second; + } + for (auto Arg : InputArgs.filtered(OBJCOPY_set_section_flags)) { + Expected SFU = + parseSetSectionFlagValue(Arg->getValue()); + if (!SFU) + return SFU.takeError(); + if (!Config.SetSectionFlags.try_emplace(SFU->Name, *SFU).second) + return createStringError( + errc::invalid_argument, + "--set-section-flags set multiple times for section '%s'", + SFU->Name.str().c_str()); + } + // Prohibit combinations of --set-section-flags when the section name is used + // by --rename-section, either as a source or a destination. + for (const auto &E : Config.SectionsToRename) { + const SectionRename &SR = E.second; + if (Config.SetSectionFlags.count(SR.OriginalName)) + return createStringError( + errc::invalid_argument, + "--set-section-flags=%s conflicts with --rename-section=%s=%s", + SR.OriginalName.str().c_str(), SR.OriginalName.str().c_str(), + SR.NewName.str().c_str()); + if (Config.SetSectionFlags.count(SR.NewName)) + return createStringError( + errc::invalid_argument, + "--set-section-flags=%s conflicts with --rename-section=%s=%s", + SR.NewName.str().c_str(), SR.OriginalName.str().c_str(), + SR.NewName.str().c_str()); + } + + for (auto Arg : InputArgs.filtered(OBJCOPY_remove_section)) + if (Error E = Config.ToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_keep_section)) + if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_only_section)) + if (Error E = Config.OnlySection.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_add_section)) { + if (Error Err = loadNewSectionData(Arg->getValue(), "--add-section", + Config.AddSection)) + return std::move(Err); + } + for (auto Arg : InputArgs.filtered(OBJCOPY_update_section)) { + if (Error Err = loadNewSectionData(Arg->getValue(), "--update-section", + Config.UpdateSection)) + return std::move(Err); + } + for (auto *Arg : InputArgs.filtered(OBJCOPY_dump_section)) { + StringRef Value(Arg->getValue()); + if (Value.split('=').second.empty()) + return createStringError( + errc::invalid_argument, + "bad format for --dump-section, expected section=file"); + Config.DumpSection.push_back(Value); + } + Config.StripAll = InputArgs.hasArg(OBJCOPY_strip_all); + Config.StripAllGNU = InputArgs.hasArg(OBJCOPY_strip_all_gnu); + Config.StripDebug = InputArgs.hasArg(OBJCOPY_strip_debug); + Config.StripDWO = InputArgs.hasArg(OBJCOPY_strip_dwo); + Config.StripSections = InputArgs.hasArg(OBJCOPY_strip_sections); + Config.StripNonAlloc = InputArgs.hasArg(OBJCOPY_strip_non_alloc); + Config.StripUnneeded = InputArgs.hasArg(OBJCOPY_strip_unneeded); + Config.ExtractDWO = InputArgs.hasArg(OBJCOPY_extract_dwo); + Config.ExtractMainPartition = + InputArgs.hasArg(OBJCOPY_extract_main_partition); + ELFConfig.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden); + Config.Weaken = InputArgs.hasArg(OBJCOPY_weaken); + if (auto *Arg = + InputArgs.getLastArg(OBJCOPY_discard_all, OBJCOPY_discard_locals)) { + Config.DiscardMode = Arg->getOption().matches(OBJCOPY_discard_all) + ? DiscardType::All + : DiscardType::Locals; + } + Config.OnlyKeepDebug = InputArgs.hasArg(OBJCOPY_only_keep_debug); + ELFConfig.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols); + MachOConfig.KeepUndefined = InputArgs.hasArg(OBJCOPY_keep_undefined); + Config.DecompressDebugSections = + InputArgs.hasArg(OBJCOPY_decompress_debug_sections); + if (Config.DiscardMode == DiscardType::All) { + Config.StripDebug = true; + ELFConfig.KeepFileSymbols = true; + } + for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol)) + if (Error E = Config.SymbolsToLocalize.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbols)) + if (Error E = addSymbolsFromFile(Config.SymbolsToLocalize, DC.Alloc, + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbol)) + if (Error E = Config.SymbolsToKeepGlobal.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbols)) + if (Error E = addSymbolsFromFile(Config.SymbolsToKeepGlobal, DC.Alloc, + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbol)) + if (Error E = Config.SymbolsToGlobalize.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbols)) + if (Error E = addSymbolsFromFile(Config.SymbolsToGlobalize, DC.Alloc, + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbol)) + if (Error E = Config.SymbolsToWeaken.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbols)) + if (Error E = addSymbolsFromFile(Config.SymbolsToWeaken, DC.Alloc, + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbol)) + if (Error E = Config.SymbolsToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbols)) + if (Error E = addSymbolsFromFile(Config.SymbolsToRemove, DC.Alloc, + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbol)) + if (Error E = + Config.UnneededSymbolsToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbols)) + if (Error E = addSymbolsFromFile(Config.UnneededSymbolsToRemove, DC.Alloc, + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbol)) + if (Error E = Config.SymbolsToKeep.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbols)) + if (Error E = + addSymbolsFromFile(Config.SymbolsToKeep, DC.Alloc, Arg->getValue(), + SymbolMatchStyle, ErrorCallback)) + return std::move(E); + for (auto *Arg : InputArgs.filtered(OBJCOPY_add_symbol)) { + Expected SymInfo = parseNewSymbolInfo(Arg->getValue()); + if (!SymInfo) + return SymInfo.takeError(); + + Config.SymbolsToAdd.push_back(*SymInfo); + } + + ELFConfig.AllowBrokenLinks = InputArgs.hasArg(OBJCOPY_allow_broken_links); + + Config.DeterministicArchives = InputArgs.hasFlag( + OBJCOPY_enable_deterministic_archives, + OBJCOPY_disable_deterministic_archives, /*default=*/true); + + Config.PreserveDates = InputArgs.hasArg(OBJCOPY_preserve_dates); + + if (Config.PreserveDates && + (Config.OutputFilename == "-" || Config.InputFilename == "-")) + return createStringError(errc::invalid_argument, + "--preserve-dates requires a file"); + + for (auto Arg : InputArgs) + if (Arg->getOption().matches(OBJCOPY_set_start)) { + auto EAddr = getAsInteger(Arg->getValue()); + if (!EAddr) + return createStringError( + EAddr.getError(), "bad entry point address: '%s'", Arg->getValue()); + + ELFConfig.EntryExpr = [EAddr](uint64_t) { return *EAddr; }; + } else if (Arg->getOption().matches(OBJCOPY_change_start)) { + auto EIncr = getAsInteger(Arg->getValue()); + if (!EIncr) + return createStringError(EIncr.getError(), + "bad entry point increment: '%s'", + Arg->getValue()); + auto Expr = ELFConfig.EntryExpr ? std::move(ELFConfig.EntryExpr) + : [](uint64_t A) { return A; }; + ELFConfig.EntryExpr = [Expr, EIncr](uint64_t EAddr) { + return Expr(EAddr) + *EIncr; + }; + } + + if (Config.DecompressDebugSections && + Config.CompressionType != DebugCompressionType::None) { + return createStringError( + errc::invalid_argument, + "cannot specify both --compress-debug-sections and " + "--decompress-debug-sections"); + } + + if (Config.DecompressDebugSections && !zlib::isAvailable()) + return createStringError( + errc::invalid_argument, + "LLVM was not compiled with LLVM_ENABLE_ZLIB: cannot decompress"); + + if (Config.ExtractPartition && Config.ExtractMainPartition) + return createStringError(errc::invalid_argument, + "cannot specify --extract-partition together with " + "--extract-main-partition"); + + DC.CopyConfigs.push_back(std::move(ConfigMgr)); + return std::move(DC); +} + +// parseInstallNameToolOptions returns the config and sets the input arguments. +// If a help flag is set then parseInstallNameToolOptions will print the help +// messege and exit. +Expected +objcopy::parseInstallNameToolOptions(ArrayRef ArgsArr) { + DriverConfig DC; + ConfigManager ConfigMgr; + CommonConfig &Config = ConfigMgr.Common; + MachOConfig &MachOConfig = ConfigMgr.MachO; + InstallNameToolOptTable T; + unsigned MissingArgumentIndex, MissingArgumentCount; + llvm::opt::InputArgList InputArgs = + T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); + + if (MissingArgumentCount) + return createStringError( + errc::invalid_argument, + "missing argument to " + + StringRef(InputArgs.getArgString(MissingArgumentIndex)) + + " option"); + + if (InputArgs.size() == 0) { + printHelp(T, errs(), ToolType::InstallNameTool); + exit(1); + } + + if (InputArgs.hasArg(INSTALL_NAME_TOOL_help)) { + printHelp(T, outs(), ToolType::InstallNameTool); + exit(0); + } + + if (InputArgs.hasArg(INSTALL_NAME_TOOL_version)) { + outs() << "llvm-install-name-tool, compatible with cctools " + "install_name_tool\n"; + cl::PrintVersionMessage(); + exit(0); + } + + for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_add_rpath)) + MachOConfig.RPathToAdd.push_back(Arg->getValue()); + + for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_prepend_rpath)) + MachOConfig.RPathToPrepend.push_back(Arg->getValue()); + + for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_delete_rpath)) { + StringRef RPath = Arg->getValue(); + + // Cannot add and delete the same rpath at the same time. + if (is_contained(MachOConfig.RPathToAdd, RPath)) + return createStringError( + errc::invalid_argument, + "cannot specify both -add_rpath '%s' and -delete_rpath '%s'", + RPath.str().c_str(), RPath.str().c_str()); + if (is_contained(MachOConfig.RPathToPrepend, RPath)) + return createStringError( + errc::invalid_argument, + "cannot specify both -prepend_rpath '%s' and -delete_rpath '%s'", + RPath.str().c_str(), RPath.str().c_str()); + + MachOConfig.RPathsToRemove.insert(RPath); + } + + for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_rpath)) { + StringRef Old = Arg->getValue(0); + StringRef New = Arg->getValue(1); + + auto Match = [=](StringRef RPath) { return RPath == Old || RPath == New; }; + + // Cannot specify duplicate -rpath entries + auto It1 = find_if( + MachOConfig.RPathsToUpdate, + [&Match](const DenseMap::value_type &OldNew) { + return Match(OldNew.getFirst()) || Match(OldNew.getSecond()); + }); + if (It1 != MachOConfig.RPathsToUpdate.end()) + return createStringError(errc::invalid_argument, + "cannot specify both -rpath '" + + It1->getFirst() + "' '" + It1->getSecond() + + "' and -rpath '" + Old + "' '" + New + "'"); + + // Cannot specify the same rpath under both -delete_rpath and -rpath + auto It2 = find_if(MachOConfig.RPathsToRemove, Match); + if (It2 != MachOConfig.RPathsToRemove.end()) + return createStringError(errc::invalid_argument, + "cannot specify both -delete_rpath '" + *It2 + + "' and -rpath '" + Old + "' '" + New + "'"); + + // Cannot specify the same rpath under both -add_rpath and -rpath + auto It3 = find_if(MachOConfig.RPathToAdd, Match); + if (It3 != MachOConfig.RPathToAdd.end()) + return createStringError(errc::invalid_argument, + "cannot specify both -add_rpath '" + *It3 + + "' and -rpath '" + Old + "' '" + New + "'"); + + // Cannot specify the same rpath under both -prepend_rpath and -rpath. + auto It4 = find_if(MachOConfig.RPathToPrepend, Match); + if (It4 != MachOConfig.RPathToPrepend.end()) + return createStringError(errc::invalid_argument, + "cannot specify both -prepend_rpath '" + *It4 + + "' and -rpath '" + Old + "' '" + New + "'"); + + MachOConfig.RPathsToUpdate.insert({Old, New}); + } + + if (auto *Arg = InputArgs.getLastArg(INSTALL_NAME_TOOL_id)) { + MachOConfig.SharedLibId = Arg->getValue(); + if (MachOConfig.SharedLibId->empty()) + return createStringError(errc::invalid_argument, + "cannot specify an empty id"); + } + + for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_change)) + MachOConfig.InstallNamesToUpdate.insert( + {Arg->getValue(0), Arg->getValue(1)}); + + MachOConfig.RemoveAllRpaths = + InputArgs.hasArg(INSTALL_NAME_TOOL_delete_all_rpaths); + + SmallVector Positional; + for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_UNKNOWN)) + return createStringError(errc::invalid_argument, "unknown argument '%s'", + Arg->getAsString(InputArgs).c_str()); + for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_INPUT)) + Positional.push_back(Arg->getValue()); + if (Positional.empty()) + return createStringError(errc::invalid_argument, "no input file specified"); + if (Positional.size() > 1) + return createStringError( + errc::invalid_argument, + "llvm-install-name-tool expects a single input file"); + Config.InputFilename = Positional[0]; + Config.OutputFilename = Positional[0]; + + DC.CopyConfigs.push_back(std::move(ConfigMgr)); + return std::move(DC); +} + +Expected +objcopy::parseBitcodeStripOptions(ArrayRef ArgsArr, + function_ref ErrorCallback) { + DriverConfig DC; + ConfigManager ConfigMgr; + CommonConfig &Config = ConfigMgr.Common; + MachOConfig &MachOConfig = ConfigMgr.MachO; + BitcodeStripOptTable T; + unsigned MissingArgumentIndex, MissingArgumentCount; + opt::InputArgList InputArgs = + T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); + + if (InputArgs.size() == 0) { + printHelp(T, errs(), ToolType::BitcodeStrip); + exit(1); + } + + if (InputArgs.hasArg(BITCODE_STRIP_help)) { + printHelp(T, outs(), ToolType::BitcodeStrip); + exit(0); + } + + if (InputArgs.hasArg(BITCODE_STRIP_version)) { + outs() << "llvm-bitcode-strip, compatible with cctools " + "bitcode_strip\n"; + cl::PrintVersionMessage(); + exit(0); + } + + for (auto *Arg : InputArgs.filtered(BITCODE_STRIP_UNKNOWN)) + return createStringError(errc::invalid_argument, "unknown argument '%s'", + Arg->getAsString(InputArgs).c_str()); + + SmallVector Positional; + for (auto *Arg : InputArgs.filtered(BITCODE_STRIP_INPUT)) + Positional.push_back(Arg->getValue()); + if (Positional.size() > 1) + return createStringError(errc::invalid_argument, + "llvm-bitcode-strip expects a single input file"); + assert(!Positional.empty()); + Config.InputFilename = Positional[0]; + + if (!InputArgs.hasArg(BITCODE_STRIP_output)) { + return createStringError(errc::invalid_argument, + "-o is a required argument"); + } + Config.OutputFilename = InputArgs.getLastArgValue(BITCODE_STRIP_output); + + if (!InputArgs.hasArg(BITCODE_STRIP_remove)) + return createStringError(errc::invalid_argument, "no action specified"); + + // We only support -r for now, which removes all bitcode sections and + // the __LLVM segment if it's now empty. + cantFail(Config.ToRemove.addMatcher(NameOrPattern::create( + "__LLVM,__bundle", MatchStyle::Literal, ErrorCallback))); + MachOConfig.EmptySegmentsToRemove.insert("__LLVM"); + + DC.CopyConfigs.push_back(std::move(ConfigMgr)); + return std::move(DC); +} + +// parseStripOptions returns the config and sets the input arguments. If a +// help flag is set then parseStripOptions will print the help messege and +// exit. +Expected +objcopy::parseStripOptions(ArrayRef RawArgsArr, + function_ref ErrorCallback) { + const char *const *DashDash = + std::find_if(RawArgsArr.begin(), RawArgsArr.end(), + [](StringRef Str) { return Str == "--"; }); + ArrayRef ArgsArr = makeArrayRef(RawArgsArr.begin(), DashDash); + if (DashDash != RawArgsArr.end()) + DashDash = std::next(DashDash); + + StripOptTable T; + unsigned MissingArgumentIndex, MissingArgumentCount; + llvm::opt::InputArgList InputArgs = + T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); + + if (InputArgs.size() == 0 && DashDash == RawArgsArr.end()) { + printHelp(T, errs(), ToolType::Strip); + exit(1); + } + + if (InputArgs.hasArg(STRIP_help)) { + printHelp(T, outs(), ToolType::Strip); + exit(0); + } + + if (InputArgs.hasArg(STRIP_version)) { + outs() << "llvm-strip, compatible with GNU strip\n"; + cl::PrintVersionMessage(); + exit(0); + } + + SmallVector Positional; + for (auto Arg : InputArgs.filtered(STRIP_UNKNOWN)) + return createStringError(errc::invalid_argument, "unknown argument '%s'", + Arg->getAsString(InputArgs).c_str()); + for (auto Arg : InputArgs.filtered(STRIP_INPUT)) + Positional.push_back(Arg->getValue()); + std::copy(DashDash, RawArgsArr.end(), std::back_inserter(Positional)); + + if (Positional.empty()) + return createStringError(errc::invalid_argument, "no input file specified"); + + if (Positional.size() > 1 && InputArgs.hasArg(STRIP_output)) + return createStringError( + errc::invalid_argument, + "multiple input files cannot be used in combination with -o"); + + ConfigManager ConfigMgr; + CommonConfig &Config = ConfigMgr.Common; + ELFConfig &ELFConfig = ConfigMgr.ELF; + MachOConfig &MachOConfig = ConfigMgr.MachO; + + if (InputArgs.hasArg(STRIP_regex) && InputArgs.hasArg(STRIP_wildcard)) + return createStringError(errc::invalid_argument, + "--regex and --wildcard are incompatible"); + MatchStyle SectionMatchStyle = + InputArgs.hasArg(STRIP_regex) ? MatchStyle::Regex : MatchStyle::Wildcard; + MatchStyle SymbolMatchStyle + = InputArgs.hasArg(STRIP_regex) ? MatchStyle::Regex + : InputArgs.hasArg(STRIP_wildcard) ? MatchStyle::Wildcard + : MatchStyle::Literal; + ELFConfig.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links); + Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug); + + if (auto *Arg = InputArgs.getLastArg(STRIP_discard_all, STRIP_discard_locals)) + Config.DiscardMode = Arg->getOption().matches(STRIP_discard_all) + ? DiscardType::All + : DiscardType::Locals; + Config.StripSections = InputArgs.hasArg(STRIP_strip_sections); + Config.StripUnneeded = InputArgs.hasArg(STRIP_strip_unneeded); + if (auto Arg = InputArgs.getLastArg(STRIP_strip_all, STRIP_no_strip_all)) + Config.StripAll = Arg->getOption().getID() == STRIP_strip_all; + Config.StripAllGNU = InputArgs.hasArg(STRIP_strip_all_gnu); + MachOConfig.StripSwiftSymbols = InputArgs.hasArg(STRIP_strip_swift_symbols); + Config.OnlyKeepDebug = InputArgs.hasArg(STRIP_only_keep_debug); + ELFConfig.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols); + MachOConfig.KeepUndefined = InputArgs.hasArg(STRIP_keep_undefined); + + for (auto Arg : InputArgs.filtered(STRIP_keep_section)) + if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); + + for (auto Arg : InputArgs.filtered(STRIP_remove_section)) + if (Error E = Config.ToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); + + for (auto Arg : InputArgs.filtered(STRIP_strip_symbol)) + if (Error E = Config.SymbolsToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); + + for (auto Arg : InputArgs.filtered(STRIP_keep_symbol)) + if (Error E = Config.SymbolsToKeep.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); + + if (!InputArgs.hasArg(STRIP_no_strip_all) && !Config.StripDebug && + !Config.OnlyKeepDebug && !Config.StripUnneeded && + Config.DiscardMode == DiscardType::None && !Config.StripAllGNU && + Config.SymbolsToRemove.empty()) + Config.StripAll = true; + + if (Config.DiscardMode == DiscardType::All) { + Config.StripDebug = true; + ELFConfig.KeepFileSymbols = true; + } + + Config.DeterministicArchives = + InputArgs.hasFlag(STRIP_enable_deterministic_archives, + STRIP_disable_deterministic_archives, /*default=*/true); + + Config.PreserveDates = InputArgs.hasArg(STRIP_preserve_dates); + Config.InputFormat = FileFormat::Unspecified; + Config.OutputFormat = FileFormat::Unspecified; + + DriverConfig DC; + if (Positional.size() == 1) { + Config.InputFilename = Positional[0]; + Config.OutputFilename = + InputArgs.getLastArgValue(STRIP_output, Positional[0]); + DC.CopyConfigs.push_back(std::move(ConfigMgr)); + } else { + StringMap InputFiles; + for (StringRef Filename : Positional) { + if (InputFiles[Filename]++ == 1) { + if (Filename == "-") + return createStringError( + errc::invalid_argument, + "cannot specify '-' as an input file more than once"); + if (Error E = ErrorCallback(createStringError( + errc::invalid_argument, "'%s' was already specified", + Filename.str().c_str()))) + return std::move(E); + } + Config.InputFilename = Filename; + Config.OutputFilename = Filename; + DC.CopyConfigs.push_back(ConfigMgr); + } + } + + if (Config.PreserveDates && (is_contained(Positional, "-") || + InputArgs.getLastArgValue(STRIP_output) == "-")) + return createStringError(errc::invalid_argument, + "--preserve-dates requires a file"); + + return std::move(DC); +} diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.h b/llvm/tools/llvm-objcopy/ObjcopyOptions.h new file mode 100644 index 000000000000..f7fa2af304d7 --- /dev/null +++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.h @@ -0,0 +1,58 @@ +//===- ObjcopyOptions.h ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_OBJCOPY_OBJCOPYOPTIONS_H +#define LLVM_TOOLS_LLVM_OBJCOPY_OBJCOPYOPTIONS_H + +#include "llvm/ObjCopy/ConfigManager.h" +#include "llvm/Support/Allocator.h" +#include + +namespace llvm { +namespace objcopy { + +// Configuration for the overall invocation of this tool. When invoked as +// objcopy, will always contain exactly one CopyConfig. When invoked as strip, +// will contain one or more CopyConfigs. +struct DriverConfig { + SmallVector CopyConfigs; + BumpPtrAllocator Alloc; +}; + +// ParseObjcopyOptions returns the config and sets the input arguments. If a +// help flag is set then ParseObjcopyOptions will print the help messege and +// exit. ErrorCallback is used to handle recoverable errors. An Error returned +// by the callback aborts the parsing and is then returned by this function. +Expected +parseObjcopyOptions(ArrayRef ArgsArr, + llvm::function_ref ErrorCallback); + +// ParseInstallNameToolOptions returns the config and sets the input arguments. +// If a help flag is set then ParseInstallNameToolOptions will print the help +// messege and exit. +Expected +parseInstallNameToolOptions(ArrayRef ArgsArr); + +// ParseBitcodeStripOptions returns the config and sets the input arguments. +// If a help flag is set then ParseBitcodeStripOptions will print the help +// messege and exit. +Expected +parseBitcodeStripOptions(ArrayRef ArgsArr, + llvm::function_ref ErrorCallback); + +// ParseStripOptions returns the config and sets the input arguments. If a +// help flag is set then ParseStripOptions will print the help messege and +// exit. ErrorCallback is used to handle recoverable errors. An Error returned +// by the callback aborts the parsing and is then returned by this function. +Expected +parseStripOptions(ArrayRef ArgsArr, + llvm::function_ref ErrorCallback); +} // namespace objcopy +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_OBJCOPY_OBJCOPYOPTIONS_H diff --git a/llvm/tools/llvm-objcopy/ObjcopyOpts.td b/llvm/tools/llvm-objcopy/ObjcopyOpts.td index bfd66caf41ed..ff73265989f3 100644 --- a/llvm/tools/llvm-objcopy/ObjcopyOpts.td +++ b/llvm/tools/llvm-objcopy/ObjcopyOpts.td @@ -32,9 +32,9 @@ defm new_symbol_visibility : Eq<"new-symbol-visibility", "Visibility of " def compress_debug_sections : Flag<["--"], "compress-debug-sections">; def compress_debug_sections_eq : Joined<["--"], "compress-debug-sections=">, - MetaVarName<"[ zlib | zlib-gnu ]">, + MetaVarName<"[ zlib ]">, HelpText<"Compress DWARF debug sections using specified style. Supported " - "styles: 'zlib-gnu' and 'zlib'">; + "formats: 'zlib'">; def decompress_debug_sections : Flag<["--"], "decompress-debug-sections">, HelpText<"Decompress DWARF debug sections.">; defm split_dwo @@ -222,5 +222,5 @@ defm add_symbol MetaVarName<"name=[section:]value[,flags]">; defm update_section - : Eq<"update-section", "Add section with contents from a file .">, + : Eq<"update-section", "Replace the contents of section with contents from a file .">, MetaVarName<"name=file">; diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp index a5963985f78a..aa262152ed64 100644 --- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp @@ -6,23 +6,22 @@ // //===----------------------------------------------------------------------===// -#include "llvm-objcopy.h" -#include "COFF/COFFConfig.h" -#include "COFF/COFFObjcopy.h" -#include "CommonConfig.h" -#include "ConfigManager.h" -#include "ELF/ELFConfig.h" -#include "ELF/ELFObjcopy.h" -#include "MachO/MachOConfig.h" -#include "MachO/MachOObjcopy.h" -#include "wasm/WasmConfig.h" -#include "wasm/WasmObjcopy.h" - +#include "ObjcopyOptions.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/ObjCopy/COFF/COFFConfig.h" +#include "llvm/ObjCopy/COFF/COFFObjcopy.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/ELF/ELFConfig.h" +#include "llvm/ObjCopy/ELF/ELFObjcopy.h" +#include "llvm/ObjCopy/MachO/MachOConfig.h" +#include "llvm/ObjCopy/MachO/MachOObjcopy.h" +#include "llvm/ObjCopy/ObjCopy.h" +#include "llvm/ObjCopy/wasm/WasmConfig.h" +#include "llvm/ObjCopy/wasm/WasmObjcopy.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/Binary.h" @@ -42,6 +41,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileUtilities.h" #include "llvm/Support/Host.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/Memory.h" @@ -87,7 +87,7 @@ static Expected getDriverConfig(ArrayRef Args) { }; if (Is("bitcode-strip") || Is("bitcode_strip")) - return parseBitcodeStripOptions(Args); + return parseBitcodeStripOptions(Args, reportWarning); else if (Is("strip")) return parseStripOptions(Args, reportWarning); else if (Is("install-name-tool") || Is("install_name_tool")) @@ -96,40 +96,6 @@ static Expected getDriverConfig(ArrayRef Args) { return parseObjcopyOptions(Args, reportWarning); } -// For regular archives this function simply calls llvm::writeArchive, -// For thin archives it writes the archive file itself as well as its members. -static Error deepWriteArchive(StringRef ArcName, - ArrayRef NewMembers, - bool WriteSymtab, object::Archive::Kind Kind, - bool Deterministic, bool Thin) { - if (Error E = writeArchive(ArcName, NewMembers, WriteSymtab, Kind, - Deterministic, Thin)) - return createFileError(ArcName, std::move(E)); - - if (!Thin) - return Error::success(); - - for (const NewArchiveMember &Member : NewMembers) { - // For regular files (as is the case for deepWriteArchive), - // FileOutputBuffer::create will return OnDiskBuffer. - // OnDiskBuffer uses a temporary file and then renames it. So in reality - // there is no inefficiency / duplicated in-memory buffers in this case. For - // now in-memory buffers can not be completely avoided since - // NewArchiveMember still requires them even though writeArchive does not - // write them on disk. - Expected> FB = - FileOutputBuffer::create(Member.MemberName, Member.Buf->getBufferSize(), - FileOutputBuffer::F_executable); - if (!FB) - return FB.takeError(); - std::copy(Member.Buf->getBufferStart(), Member.Buf->getBufferEnd(), - (*FB)->getBufferStart()); - if (Error E = (*FB)->commit()) - return E; - } - return Error::success(); -} - /// The function executeObjcopyOnIHex does the dispatch based on the format /// of the output specified by the command line options. static Error executeObjcopyOnIHex(ConfigManager &ConfigMgr, MemoryBuffer &In, @@ -166,162 +132,16 @@ static Error executeObjcopyOnRawBinary(ConfigManager &ConfigMgr, llvm_unreachable("unsupported output format"); } -/// The function executeObjcopyOnBinary does the dispatch based on the format -/// of the input binary (ELF, MachO or COFF). -static Error executeObjcopyOnBinary(const MultiFormatConfig &Config, - object::Binary &In, raw_ostream &Out) { - if (auto *ELFBinary = dyn_cast(&In)) { - Expected ELFConfig = Config.getELFConfig(); - if (!ELFConfig) - return ELFConfig.takeError(); - - return elf::executeObjcopyOnBinary(Config.getCommonConfig(), *ELFConfig, - *ELFBinary, Out); - } else if (auto *COFFBinary = dyn_cast(&In)) { - Expected COFFConfig = Config.getCOFFConfig(); - if (!COFFConfig) - return COFFConfig.takeError(); - - return coff::executeObjcopyOnBinary(Config.getCommonConfig(), *COFFConfig, - *COFFBinary, Out); - } else if (auto *MachOBinary = dyn_cast(&In)) { - Expected MachOConfig = Config.getMachOConfig(); - if (!MachOConfig) - return MachOConfig.takeError(); - - return macho::executeObjcopyOnBinary(Config.getCommonConfig(), *MachOConfig, - *MachOBinary, Out); - } else if (auto *MachOUniversalBinary = - dyn_cast(&In)) { - return macho::executeObjcopyOnMachOUniversalBinary( - Config, *MachOUniversalBinary, Out); - } else if (auto *WasmBinary = dyn_cast(&In)) { - Expected WasmConfig = Config.getWasmConfig(); - if (!WasmConfig) - return WasmConfig.takeError(); - - return objcopy::wasm::executeObjcopyOnBinary(Config.getCommonConfig(), - *WasmConfig, *WasmBinary, Out); - } else - return createStringError(object_error::invalid_file_type, - "unsupported object file format"); -} - -namespace llvm { -namespace objcopy { - -Expected> -createNewArchiveMembers(const MultiFormatConfig &Config, const Archive &Ar) { - std::vector NewArchiveMembers; - Error Err = Error::success(); - for (const Archive::Child &Child : Ar.children(Err)) { - Expected ChildNameOrErr = Child.getName(); - if (!ChildNameOrErr) - return createFileError(Ar.getFileName(), ChildNameOrErr.takeError()); - - Expected> ChildOrErr = Child.getAsBinary(); - if (!ChildOrErr) - return createFileError(Ar.getFileName() + "(" + *ChildNameOrErr + ")", - ChildOrErr.takeError()); - - SmallVector Buffer; - raw_svector_ostream MemStream(Buffer); - - if (Error E = executeObjcopyOnBinary(Config, *ChildOrErr->get(), MemStream)) - return std::move(E); - - Expected Member = NewArchiveMember::getOldMember( - Child, Config.getCommonConfig().DeterministicArchives); - if (!Member) - return createFileError(Ar.getFileName(), Member.takeError()); - - Member->Buf = std::make_unique( - std::move(Buffer), ChildNameOrErr.get(), - /*RequiresNullTerminator=*/false); - Member->MemberName = Member->Buf->getBufferIdentifier(); - NewArchiveMembers.push_back(std::move(*Member)); - } - if (Err) - return createFileError(Config.getCommonConfig().InputFilename, - std::move(Err)); - return std::move(NewArchiveMembers); -} - -} // end namespace objcopy -} // end namespace llvm - -static Error executeObjcopyOnArchive(const ConfigManager &ConfigMgr, - const object::Archive &Ar) { - Expected> NewArchiveMembersOrErr = - createNewArchiveMembers(ConfigMgr, Ar); - if (!NewArchiveMembersOrErr) - return NewArchiveMembersOrErr.takeError(); - const CommonConfig &Config = ConfigMgr.getCommonConfig(); - return deepWriteArchive(Config.OutputFilename, *NewArchiveMembersOrErr, - Ar.hasSymbolTable(), Ar.kind(), - Config.DeterministicArchives, Ar.isThin()); -} - -static Error restoreStatOnFile(StringRef Filename, - const sys::fs::file_status &Stat, - const ConfigManager &ConfigMgr) { - int FD; - const CommonConfig &Config = ConfigMgr.getCommonConfig(); - - // Writing to stdout should not be treated as an error here, just - // do not set access/modification times or permissions. - if (Filename == "-") - return Error::success(); - - if (auto EC = - sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting)) - return createFileError(Filename, EC); - - if (Config.PreserveDates) - if (auto EC = sys::fs::setLastAccessAndModificationTime( - FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime())) - return createFileError(Filename, EC); - - sys::fs::file_status OStat; - if (std::error_code EC = sys::fs::status(FD, OStat)) - return createFileError(Filename, EC); - if (OStat.type() == sys::fs::file_type::regular_file) { -#ifndef _WIN32 - // Keep ownership if llvm-objcopy is called under root. - if (Config.InputFilename == Config.OutputFilename && OStat.getUser() == 0) - sys::fs::changeFileOwnership(FD, Stat.getUser(), Stat.getGroup()); -#endif - - sys::fs::perms Perm = Stat.permissions(); - if (Config.InputFilename != Config.OutputFilename) - Perm = static_cast(Perm & ~sys::fs::getUmask() & ~06000); -#ifdef _WIN32 - if (auto EC = sys::fs::setPermissions(Filename, Perm)) -#else - if (auto EC = sys::fs::setPermissions(FD, Perm)) -#endif - return createFileError(Filename, EC); - } - - if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD)) - return createFileError(Filename, EC); - - return Error::success(); -} - /// The function executeObjcopy does the higher level dispatch based on the type /// of input (raw binary, archive or single object file) and takes care of the /// format-agnostic modifications, i.e. preserving dates. static Error executeObjcopy(ConfigManager &ConfigMgr) { CommonConfig &Config = ConfigMgr.Common; - sys::fs::file_status Stat; - if (Config.InputFilename != "-") { - if (auto EC = sys::fs::status(Config.InputFilename, Stat)) - return createFileError(Config.InputFilename, EC); - } else { - Stat.permissions(static_cast(0777)); - } + Expected PermsApplierOrErr = + FilePermissionsApplier::create(Config.InputFilename); + if (!PermsApplierOrErr) + return PermsApplierOrErr.takeError(); std::function ObjcopyFunc; @@ -390,19 +210,20 @@ static Error executeObjcopy(ConfigManager &ConfigMgr) { } } - if (Error E = restoreStatOnFile(Config.OutputFilename, Stat, ConfigMgr)) + if (Error E = + PermsApplierOrErr->apply(Config.OutputFilename, Config.PreserveDates)) return E; - if (!Config.SplitDWO.empty()) { - Stat.permissions(static_cast(0666)); - if (Error E = restoreStatOnFile(Config.SplitDWO, Stat, ConfigMgr)) + if (!Config.SplitDWO.empty()) + if (Error E = + PermsApplierOrErr->apply(Config.SplitDWO, Config.PreserveDates, + static_cast(0666))) return E; - } return Error::success(); } -int main(int argc, char **argv) { +int llvm_objcopy_main(int argc, char **argv) { InitLLVM X(argc, argv); ToolName = argv[0]; diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.h b/llvm/tools/llvm-objcopy/llvm-objcopy.h deleted file mode 100644 index 182c95dc64c8..000000000000 --- a/llvm/tools/llvm-objcopy/llvm-objcopy.h +++ /dev/null @@ -1,34 +0,0 @@ -//===- llvm-objcopy.h -------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_OBJCOPY_OBJCOPY_H -#define LLVM_TOOLS_OBJCOPY_OBJCOPY_H - -#include "llvm/Support/Error.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { - -struct NewArchiveMember; - -namespace object { - -class Archive; - -} // end namespace object - -namespace objcopy { -class MultiFormatConfig; -Expected> -createNewArchiveMembers(const MultiFormatConfig &Config, - const object::Archive &Ar); - -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_TOOLS_OBJCOPY_OBJCOPY_H diff --git a/llvm/tools/llvm-objcopy/wasm/Object.cpp b/llvm/tools/llvm-objcopy/wasm/Object.cpp deleted file mode 100644 index e7a2956fedca..000000000000 --- a/llvm/tools/llvm-objcopy/wasm/Object.cpp +++ /dev/null @@ -1,34 +0,0 @@ -//===- Object.cpp ---------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Object.h" - -#include "llvm/Support/LEB128.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { -namespace objcopy { -namespace wasm { - -using namespace object; -using namespace llvm::wasm; - -void Object::addSectionWithOwnedContents( - Section NewSection, std::unique_ptr &&Content) { - Sections.push_back(NewSection); - OwnedContents.emplace_back(std::move(Content)); -} - -void Object::removeSections(function_ref ToRemove) { - // TODO: remove reloc sections for the removed section, handle symbols, etc. - llvm::erase_if(Sections, ToRemove); -} - -} // end namespace wasm -} // end namespace objcopy -} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/wasm/Object.h b/llvm/tools/llvm-objcopy/wasm/Object.h deleted file mode 100644 index 9db91c41e2e2..000000000000 --- a/llvm/tools/llvm-objcopy/wasm/Object.h +++ /dev/null @@ -1,47 +0,0 @@ -//===- Object.h -------------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_OBJCOPY_WASM_OBJECT_H -#define LLVM_TOOLS_LLVM_OBJCOPY_WASM_OBJECT_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Object/Wasm.h" -#include "llvm/Support/MemoryBuffer.h" -#include - -namespace llvm { -namespace objcopy { -namespace wasm { - -struct Section { - // For now, each section is only an opaque binary blob with no distinction - // between custom and known sections. - uint8_t SectionType; - StringRef Name; - ArrayRef Contents; -}; - -struct Object { - llvm::wasm::WasmObjectHeader Header; - // For now don't discriminate between kinds of sections. - std::vector
Sections; - - void addSectionWithOwnedContents(Section NewSection, - std::unique_ptr &&Content); - void removeSections(function_ref ToRemove); - -private: - std::vector> OwnedContents; -}; - -} // end namespace wasm -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_TOOLS_LLVM_OBJCOPY_WASM_OBJECT_H diff --git a/llvm/tools/llvm-objcopy/wasm/Reader.cpp b/llvm/tools/llvm-objcopy/wasm/Reader.cpp deleted file mode 100644 index 13fa84ad8020..000000000000 --- a/llvm/tools/llvm-objcopy/wasm/Reader.cpp +++ /dev/null @@ -1,33 +0,0 @@ -//===- Reader.cpp ---------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Reader.h" - -namespace llvm { -namespace objcopy { -namespace wasm { - -using namespace object; -using namespace llvm::wasm; - -Expected> Reader::create() const { - auto Obj = std::make_unique(); - Obj->Header = WasmObj.getHeader(); - std::vector
Sections; - Obj->Sections.reserve(WasmObj.getNumSections()); - for (const SectionRef &Sec : WasmObj.sections()) { - const WasmSection &WS = WasmObj.getWasmSection(Sec); - Obj->Sections.push_back( - {static_cast(WS.Type), WS.Name, WS.Content}); - } - return std::move(Obj); -} - -} // end namespace wasm -} // end namespace objcopy -} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/wasm/Reader.h b/llvm/tools/llvm-objcopy/wasm/Reader.h deleted file mode 100644 index 2dcf7dde029a..000000000000 --- a/llvm/tools/llvm-objcopy/wasm/Reader.h +++ /dev/null @@ -1,31 +0,0 @@ -//===- Reader.h -------------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_OBJCOPY_WASM_READER_H -#define LLVM_TOOLS_LLVM_OBJCOPY_WASM_READER_H - -#include "Object.h" - -namespace llvm { -namespace objcopy { -namespace wasm { - -class Reader { -public: - explicit Reader(const object::WasmObjectFile &O) : WasmObj(O) {} - Expected> create() const; - -private: - const object::WasmObjectFile &WasmObj; -}; - -} // end namespace wasm -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_TOOLS_LLVM_OBJCOPY_WASM_READER_H diff --git a/llvm/tools/llvm-objcopy/wasm/WasmConfig.h b/llvm/tools/llvm-objcopy/wasm/WasmConfig.h deleted file mode 100644 index 4e40926ae453..000000000000 --- a/llvm/tools/llvm-objcopy/wasm/WasmConfig.h +++ /dev/null @@ -1,21 +0,0 @@ -//===- WasmConfig.h ---------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMCONFIG_H -#define LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMCONFIG_H - -namespace llvm { -namespace objcopy { - -// Wasm specific configuration for copying/stripping a single file. -struct WasmConfig {}; - -} // namespace objcopy -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMCONFIG_H diff --git a/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp b/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp deleted file mode 100644 index 397d09757e54..000000000000 --- a/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp +++ /dev/null @@ -1,162 +0,0 @@ -//===- WasmObjcopy.cpp ----------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "WasmObjcopy.h" -#include "CommonConfig.h" -#include "Object.h" -#include "Reader.h" -#include "Writer.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/FileOutputBuffer.h" - -namespace llvm { -namespace objcopy { -namespace wasm { - -using namespace object; -using SectionPred = std::function; - -static bool isDebugSection(const Section &Sec) { - return Sec.Name.startswith(".debug"); -} - -static bool isLinkerSection(const Section &Sec) { - return Sec.Name.startswith("reloc.") || Sec.Name == "linking"; -} - -static bool isNameSection(const Section &Sec) { return Sec.Name == "name"; } - -// Sections which are known to be "comments" or informational and do not affect -// program semantics. -static bool isCommentSection(const Section &Sec) { - return Sec.Name == "producers"; -} - -static Error dumpSectionToFile(StringRef SecName, StringRef Filename, - Object &Obj) { - for (const Section &Sec : Obj.Sections) { - if (Sec.Name == SecName) { - ArrayRef Contents = Sec.Contents; - Expected> BufferOrErr = - FileOutputBuffer::create(Filename, Contents.size()); - if (!BufferOrErr) - return BufferOrErr.takeError(); - std::unique_ptr Buf = std::move(*BufferOrErr); - std::copy(Contents.begin(), Contents.end(), Buf->getBufferStart()); - if (Error E = Buf->commit()) - return E; - return Error::success(); - } - } - return createStringError(errc::invalid_argument, "section '%s' not found", - SecName.str().c_str()); -} - -static void removeSections(const CommonConfig &Config, Object &Obj) { - SectionPred RemovePred = [](const Section &) { return false; }; - - // Explicitly-requested sections. - if (!Config.ToRemove.empty()) { - RemovePred = [&Config](const Section &Sec) { - return Config.ToRemove.matches(Sec.Name); - }; - } - - if (Config.StripDebug) { - RemovePred = [RemovePred](const Section &Sec) { - return RemovePred(Sec) || isDebugSection(Sec); - }; - } - - if (Config.StripAll) { - RemovePred = [RemovePred](const Section &Sec) { - return RemovePred(Sec) || isDebugSection(Sec) || isLinkerSection(Sec) || - isNameSection(Sec) || isCommentSection(Sec); - }; - } - - if (Config.OnlyKeepDebug) { - RemovePred = [&Config](const Section &Sec) { - // Keep debug sections, unless explicitly requested to remove. - // Remove everything else, including known sections. - return Config.ToRemove.matches(Sec.Name) || !isDebugSection(Sec); - }; - } - - if (!Config.OnlySection.empty()) { - RemovePred = [&Config](const Section &Sec) { - // Explicitly keep these sections regardless of previous removes. - // Remove everything else, inluding known sections. - return !Config.OnlySection.matches(Sec.Name); - }; - } - - if (!Config.KeepSection.empty()) { - RemovePred = [&Config, RemovePred](const Section &Sec) { - // Explicitly keep these sections regardless of previous removes. - if (Config.KeepSection.matches(Sec.Name)) - return false; - // Otherwise defer to RemovePred. - return RemovePred(Sec); - }; - } - - Obj.removeSections(RemovePred); -} - -static Error handleArgs(const CommonConfig &Config, Object &Obj) { - // Only support AddSection, DumpSection, RemoveSection for now. - for (StringRef Flag : Config.DumpSection) { - StringRef SecName; - StringRef FileName; - std::tie(SecName, FileName) = Flag.split("="); - if (Error E = dumpSectionToFile(SecName, FileName, Obj)) - return createFileError(FileName, std::move(E)); - } - - removeSections(Config, Obj); - - for (StringRef Flag : Config.AddSection) { - StringRef SecName, FileName; - std::tie(SecName, FileName) = Flag.split("="); - ErrorOr> BufOrErr = - MemoryBuffer::getFile(FileName); - if (!BufOrErr) - return createFileError(FileName, errorCodeToError(BufOrErr.getError())); - Section Sec; - Sec.SectionType = llvm::wasm::WASM_SEC_CUSTOM; - Sec.Name = SecName; - std::unique_ptr Buf = std::move(*BufOrErr); - Sec.Contents = makeArrayRef( - reinterpret_cast(Buf->getBufferStart()), - Buf->getBufferSize()); - Obj.addSectionWithOwnedContents(Sec, std::move(Buf)); - } - - return Error::success(); -} - -Error executeObjcopyOnBinary(const CommonConfig &Config, const WasmConfig &, - object::WasmObjectFile &In, raw_ostream &Out) { - Reader TheReader(In); - Expected> ObjOrErr = TheReader.create(); - if (!ObjOrErr) - return createFileError(Config.InputFilename, ObjOrErr.takeError()); - Object *Obj = ObjOrErr->get(); - assert(Obj && "Unable to deserialize Wasm object"); - if (Error E = handleArgs(Config, *Obj)) - return E; - Writer TheWriter(*Obj, Out); - if (Error E = TheWriter.write()) - return createFileError(Config.OutputFilename, std::move(E)); - return Error::success(); -} - -} // end namespace wasm -} // end namespace objcopy -} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.h b/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.h deleted file mode 100644 index 28268e38c584..000000000000 --- a/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.h +++ /dev/null @@ -1,32 +0,0 @@ -//===- WasmObjcopy.h -------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMOBJCOPY_H -#define LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMOBJCOPY_H - -namespace llvm { -class Error; -class raw_ostream; - -namespace object { -class WasmObjectFile; -} // end namespace object - -namespace objcopy { -struct CommonConfig; -struct WasmConfig; - -namespace wasm { -Error executeObjcopyOnBinary(const CommonConfig &Config, const WasmConfig &, - object::WasmObjectFile &In, raw_ostream &Out); - -} // end namespace wasm -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMOBJCOPY_H diff --git a/llvm/tools/llvm-objcopy/wasm/Writer.cpp b/llvm/tools/llvm-objcopy/wasm/Writer.cpp deleted file mode 100644 index 2fad9e60c50f..000000000000 --- a/llvm/tools/llvm-objcopy/wasm/Writer.cpp +++ /dev/null @@ -1,79 +0,0 @@ -//===- Writer.cpp ---------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Writer.h" -#include "llvm/BinaryFormat/Wasm.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/LEB128.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { -namespace objcopy { -namespace wasm { - -using namespace object; -using namespace llvm::wasm; - -Writer::SectionHeader Writer::createSectionHeader(const Section &S, - size_t &SectionSize) { - SectionHeader Header; - raw_svector_ostream OS(Header); - OS << S.SectionType; - bool HasName = S.SectionType == WASM_SEC_CUSTOM; - SectionSize = S.Contents.size(); - if (HasName) - SectionSize += getULEB128Size(S.Name.size()) + S.Name.size(); - // Pad the LEB value out to 5 bytes to make it a predictable size, and - // match the behavior of clang. - encodeULEB128(SectionSize, OS, 5); - if (HasName) { - encodeULEB128(S.Name.size(), OS); - OS << S.Name; - } - // Total section size is the content size plus 1 for the section type and - // 5 for the LEB-encoded size. - SectionSize = SectionSize + 1 + 5; - return Header; -} - -size_t Writer::finalize() { - size_t ObjectSize = sizeof(WasmMagic) + sizeof(WasmVersion); - SectionHeaders.reserve(Obj.Sections.size()); - // Finalize the headers of each section so we know the total size. - for (const Section &S : Obj.Sections) { - size_t SectionSize; - SectionHeaders.push_back(createSectionHeader(S, SectionSize)); - ObjectSize += SectionSize; - } - return ObjectSize; -} - -Error Writer::write() { - size_t TotalSize = finalize(); - Out.reserveExtraSpace(TotalSize); - - // Write the header. - Out.write(Obj.Header.Magic.data(), Obj.Header.Magic.size()); - uint32_t Version; - support::endian::write32le(&Version, Obj.Header.Version); - Out.write(reinterpret_cast(&Version), sizeof(Version)); - - // Write each section. - for (size_t I = 0, S = SectionHeaders.size(); I < S; ++I) { - Out.write(SectionHeaders[I].data(), SectionHeaders[I].size()); - Out.write(reinterpret_cast(Obj.Sections[I].Contents.data()), - Obj.Sections[I].Contents.size()); - } - - return Error::success(); -} - -} // end namespace wasm -} // end namespace objcopy -} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/wasm/Writer.h b/llvm/tools/llvm-objcopy/wasm/Writer.h deleted file mode 100644 index 4404cd8caf84..000000000000 --- a/llvm/tools/llvm-objcopy/wasm/Writer.h +++ /dev/null @@ -1,49 +0,0 @@ -//===- Writer.h -------------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_OBJCOPY_WASM_WRITER_H -#define LLVM_TOOLS_LLVM_OBJCOPY_WASM_WRITER_H - -#include "Object.h" -#include -#include - -namespace llvm { -namespace objcopy { -namespace wasm { - -class Writer { -public: - Writer(Object &Obj, raw_ostream &Out) : Obj(Obj), Out(Out) {} - Error write(); - -private: - using SectionHeader = SmallVector; - Object &Obj; - raw_ostream &Out; - std::vector SectionHeaders; - - /// Generate a wasm section section header for S. - /// The header consists of - /// * A one-byte section ID (aka the section type). - /// * The size of the section contents, encoded as ULEB128. - /// * If the section is a custom section (type 0) it also has a name, which is - /// encoded as a length-prefixed string. The encoded section size *includes* - /// this string. - /// See https://webassembly.github.io/spec/core/binary/modules.html#sections - /// Return the header and store the total size in SectionSize. - static SectionHeader createSectionHeader(const Section &S, - size_t &SectionSize); - size_t finalize(); -}; - -} // end namespace wasm -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_TOOLS_LLVM_OBJCOPY_WASM_WRITER_H diff --git a/llvm/tools/llvm-objdump/COFFDump.cpp b/llvm/tools/llvm-objdump/COFFDump.cpp index 32fdd1a4d5c3..e085e26c3cd0 100644 --- a/llvm/tools/llvm-objdump/COFFDump.cpp +++ b/llvm/tools/llvm-objdump/COFFDump.cpp @@ -430,21 +430,12 @@ static void printTLSDirectory(const COFFObjectFile *Obj) { if (!PE32Header && !PE32PlusHeader) return; - const data_directory *DataDir = Obj->getDataDirectory(COFF::TLS_TABLE); - if (!DataDir || DataDir->RelativeVirtualAddress == 0) - return; - - uintptr_t IntPtr = 0; - if (Error E = - Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr)) - reportError(std::move(E), Obj->getFileName()); - if (PE32Header) { - auto *TLSDir = reinterpret_cast(IntPtr); - printTLSDirectoryT(TLSDir); + if (auto *TLSDir = Obj->getTLSDirectory32()) + printTLSDirectoryT(TLSDir); } else { - auto *TLSDir = reinterpret_cast(IntPtr); - printTLSDirectoryT(TLSDir); + if (auto *TLSDir = Obj->getTLSDirectory64()) + printTLSDirectoryT(TLSDir); } outs() << "\n"; @@ -459,19 +450,10 @@ static void printLoadConfiguration(const COFFObjectFile *Obj) { if (Obj->getMachine() != COFF::IMAGE_FILE_MACHINE_I386) return; - const data_directory *DataDir = Obj->getDataDirectory(COFF::LOAD_CONFIG_TABLE); - if (!DataDir) - reportError("no load config data dir", Obj->getFileName()); - - uintptr_t IntPtr = 0; - if (DataDir->RelativeVirtualAddress == 0) + auto *LoadConf = Obj->getLoadConfig32(); + if (!LoadConf) return; - if (Error E = - Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr)) - reportError(std::move(E), Obj->getFileName()); - - auto *LoadConf = reinterpret_cast(IntPtr); outs() << "Load configuration:" << "\n Timestamp: " << LoadConf->TimeDateStamp << "\n Major Version: " << LoadConf->MajorVersion @@ -544,11 +526,11 @@ static void printImportTables(const COFFObjectFile *Obj) { // Prints export tables. The export table is a table containing the list of // exported symbol from the DLL. static void printExportTable(const COFFObjectFile *Obj) { - outs() << "Export Table:\n"; export_directory_iterator I = Obj->export_directory_begin(); export_directory_iterator E = Obj->export_directory_end(); if (I == E) return; + outs() << "Export Table:\n"; StringRef DllName; uint32_t OrdinalBase; if (I->getDllName(DllName)) diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp index 98e71497d022..ca73dafe2b8e 100644 --- a/llvm/tools/llvm-objdump/ELFDump.cpp +++ b/llvm/tools/llvm-objdump/ELFDump.cpp @@ -171,8 +171,12 @@ uint64_t objdump::getELFSectionLMA(const object::ELFSectionRef &Sec) { template static void printDynamicSection(const ELFFile &Elf, StringRef Filename) { - ArrayRef DynamicEntries = - unwrapOrError(Elf.dynamicEntries(), Filename); + auto DynamicEntriesOrErr = Elf.dynamicEntries(); + if (!DynamicEntriesOrErr) { + reportWarning(toString(DynamicEntriesOrErr.takeError()), Filename); + return; + } + ArrayRef DynamicEntries = *DynamicEntriesOrErr; // Find the maximum tag name length to format the value column properly. size_t MaxLen = 0; diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp index 31867625f0e5..60c34158941b 100644 --- a/llvm/tools/llvm-objdump/MachODump.cpp +++ b/llvm/tools/llvm-objdump/MachODump.cpp @@ -81,6 +81,7 @@ bool objdump::DataInCode; bool objdump::FunctionStarts; bool objdump::LinkOptHints; bool objdump::InfoPlist; +bool objdump::DyldInfo; bool objdump::DylibsUsed; bool objdump::DylibId; bool objdump::Verbose; @@ -111,6 +112,7 @@ void objdump::parseMachOOptions(const llvm::opt::InputArgList &InputArgs) { FunctionStarts = InputArgs.hasArg(OBJDUMP_function_starts); LinkOptHints = InputArgs.hasArg(OBJDUMP_link_opt_hints); InfoPlist = InputArgs.hasArg(OBJDUMP_info_plist); + DyldInfo = InputArgs.hasArg(OBJDUMP_dyld_info); DylibsUsed = InputArgs.hasArg(OBJDUMP_dylibs_used); DylibId = InputArgs.hasArg(OBJDUMP_dylib_id); Verbose = !InputArgs.hasArg(OBJDUMP_non_verbose); @@ -188,8 +190,12 @@ typedef DiceTable::iterator dice_table_iterator; namespace { struct ScopedXarFile { xar_t xar; - ScopedXarFile(const char *filename, int32_t flags) - : xar(xar_open(filename, flags)) {} + ScopedXarFile(const char *filename, int32_t flags) { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-declarations" + xar = xar_open(filename, flags); +#pragma clang diagnostic pop + } ~ScopedXarFile() { if (xar) xar_close(xar); @@ -1178,6 +1184,20 @@ static void PrintLinkOptHints(MachOObjectFile *O) { } } +static void printMachOChainedFixups(object::MachOObjectFile *Obj) { + Error Err = Error::success(); + for (const object::MachOChainedFixupEntry &Entry : Obj->fixupTable(Err)) { + (void)Entry; + } + if (Err) + reportError(std::move(Err), Obj->getFileName()); +} + +static void PrintDyldInfo(MachOObjectFile *O) { + outs() << "dyld information:" << '\n'; + printMachOChainedFixups(O); +} + static void PrintDylibs(MachOObjectFile *O, bool JustId) { unsigned Index = 0; for (const auto &Load : O->load_commands()) { @@ -1896,8 +1916,8 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF, // UniversalHeaders or ArchiveHeaders. if (Disassemble || Relocations || PrivateHeaders || ExportsTrie || Rebase || Bind || SymbolTable || LazyBind || WeakBind || IndirectSymbols || - DataInCode || FunctionStarts || LinkOptHints || DylibsUsed || DylibId || - Rpaths || ObjcMetaData || (!FilterSections.empty())) { + DataInCode || FunctionStarts || LinkOptHints || DyldInfo || DylibsUsed || + DylibId || Rpaths || ObjcMetaData || (!FilterSections.empty())) { if (LeadingHeaders) { outs() << Name; if (!ArchiveMemberName.empty()) @@ -1966,6 +1986,8 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF, DumpSectionContents(FileName, MachOOF, Verbose); if (InfoPlist) DumpInfoPlistSectionContents(FileName, MachOOF); + if (DyldInfo) + PrintDyldInfo(MachOOF); if (DylibsUsed) PrintDylibs(MachOOF, false); if (DylibId) @@ -2586,7 +2608,8 @@ struct DisassembleInfo { // value of TagType is currently 1 (for the LLVMOpInfo1 struct). If symbolic // information is returned then this function returns 1 else it returns 0. static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, - uint64_t Size, int TagType, void *TagBuf) { + uint64_t OpSize, uint64_t InstSize, int TagType, + void *TagBuf) { struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo; struct LLVMOpInfo1 *op_info = (struct LLVMOpInfo1 *)TagBuf; uint64_t value = op_info->Value; @@ -2603,7 +2626,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, unsigned int Arch = info->O->getArch(); if (Arch == Triple::x86) { - if (Size != 1 && Size != 2 && Size != 4 && Size != 0) + if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0) return 0; if (info->O->getHeader().filetype != MachO::MH_OBJECT) { // TODO: @@ -2683,7 +2706,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, return 0; } if (Arch == Triple::x86_64) { - if (Size != 1 && Size != 2 && Size != 4 && Size != 0) + if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0) return 0; // For non MH_OBJECT types, like MH_KEXT_BUNDLE, Search the external // relocation entries of a linked image (if any) for an entry that matches @@ -2715,7 +2738,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, // adds the Pc. But for x86_64 external relocation entries the Value // is the offset from the external symbol. if (info->O->getAnyRelocationPCRel(RE)) - op_info->Value -= Pc + Offset + Size; + op_info->Value -= Pc + InstSize; const char *name = unwrapOrError(Symbol.getName(), info->O->getFileName()).data(); op_info->AddSymbol.Present = 1; @@ -2753,7 +2776,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, // adds the Pc. But for x86_64 external relocation entries the Value // is the offset from the external symbol. if (info->O->getAnyRelocationPCRel(RE)) - op_info->Value -= Pc + Offset + Size; + op_info->Value -= Pc + InstSize; const char *name = unwrapOrError(Symbol.getName(), info->O->getFileName()).data(); unsigned Type = info->O->getAnyRelocationType(RE); @@ -2781,7 +2804,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, return 0; } if (Arch == Triple::arm) { - if (Offset != 0 || (Size != 4 && Size != 2)) + if (Offset != 0 || (InstSize != 4 && InstSize != 2)) return 0; if (info->O->getHeader().filetype != MachO::MH_OBJECT) { // TODO: @@ -2918,7 +2941,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, return 1; } if (Arch == Triple::aarch64) { - if (Offset != 0 || Size != 4) + if (Offset != 0 || InstSize != 4) return 0; if (info->O->getHeader().filetype != MachO::MH_OBJECT) { // TODO: @@ -9141,14 +9164,20 @@ static void PrintNoteLoadCommand(MachO::note_command Nt) { outs() << " size " << Nt.size << "\n"; } -static void PrintBuildToolVersion(MachO::build_tool_version bv) { - outs() << " tool " << MachOObjectFile::getBuildTool(bv.tool) << "\n"; +static void PrintBuildToolVersion(MachO::build_tool_version bv, bool verbose) { + outs() << " tool "; + if (verbose) + outs() << MachOObjectFile::getBuildTool(bv.tool); + else + outs() << bv.tool; + outs() << "\n"; outs() << " version " << MachOObjectFile::getVersionString(bv.version) << "\n"; } static void PrintBuildVersionLoadCommand(const MachOObjectFile *obj, - MachO::build_version_command bd) { + MachO::build_version_command bd, + bool verbose) { outs() << " cmd LC_BUILD_VERSION\n"; outs() << " cmdsize " << bd.cmdsize; if (bd.cmdsize != @@ -9157,8 +9186,12 @@ static void PrintBuildVersionLoadCommand(const MachOObjectFile *obj, outs() << " Incorrect size\n"; else outs() << "\n"; - outs() << " platform " << MachOObjectFile::getBuildPlatform(bd.platform) - << "\n"; + outs() << " platform "; + if (verbose) + outs() << MachOObjectFile::getBuildPlatform(bd.platform); + else + outs() << bd.platform; + outs() << "\n"; if (bd.sdk) outs() << " sdk " << MachOObjectFile::getVersionString(bd.sdk) << "\n"; @@ -9169,7 +9202,7 @@ static void PrintBuildVersionLoadCommand(const MachOObjectFile *obj, outs() << " ntools " << bd.ntools << "\n"; for (unsigned i = 0; i < bd.ntools; ++i) { MachO::build_tool_version bv = obj->getBuildToolVersion(i); - PrintBuildToolVersion(bv); + PrintBuildToolVersion(bv, verbose); } } @@ -10146,7 +10179,7 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t filetype, } else if (Command.C.cmd == MachO::LC_BUILD_VERSION) { MachO::build_version_command Bv = Obj->getBuildVersionLoadCommand(Command); - PrintBuildVersionLoadCommand(Obj, Bv); + PrintBuildVersionLoadCommand(Obj, Bv, verbose); } else if (Command.C.cmd == MachO::LC_SOURCE_VERSION) { MachO::source_version_command Sd = Obj->getSourceVersionCommand(Command); PrintSourceVersionCommand(Sd); diff --git a/llvm/tools/llvm-objdump/MachODump.h b/llvm/tools/llvm-objdump/MachODump.h index 7568062bd6b0..12783e15b425 100644 --- a/llvm/tools/llvm-objdump/MachODump.h +++ b/llvm/tools/llvm-objdump/MachODump.h @@ -36,6 +36,7 @@ void parseMachOOptions(const llvm::opt::InputArgList &InputArgs); extern bool Bind; extern bool DataInCode; extern std::string DisSymName; +extern bool DyldInfo; extern bool DylibId; extern bool DylibsUsed; extern bool ExportsTrie; diff --git a/llvm/tools/llvm-objdump/ObjdumpOpts.td b/llvm/tools/llvm-objdump/ObjdumpOpts.td index 9f27a6cdf163..00d7d8ccff17 100644 --- a/llvm/tools/llvm-objdump/ObjdumpOpts.td +++ b/llvm/tools/llvm-objdump/ObjdumpOpts.td @@ -81,6 +81,9 @@ def dwarf_EQ : Joined<["--"], "dwarf=">, def fault_map_section : Flag<["--"], "fault-map-section">, HelpText<"Display the content of the fault map section">; +def offloading : Flag<["--"], "offloading">, + HelpText<"Display the content of the offloading section">; + def file_headers : Flag<["--"], "file-headers">, HelpText<"Display the contents of the overall file header">; def : Flag<["-"], "f">, Alias, @@ -296,6 +299,12 @@ def info_plist : Flag<["--"], "info-plist">, "Mach-O objects (requires --macho)">, Group; +def dyld_info : Flag<["--"], "dyld_info">, + HelpText<"Print bind and rebase information used by dyld to resolve " + "external references in a final linked binary " + "(requires --macho)">, + Group; + def dylibs_used : Flag<["--"], "dylibs-used">, HelpText<"Print the shared libraries used for linked " "Mach-O files (requires --macho)">, diff --git a/llvm/tools/llvm-objdump/OffloadDump.cpp b/llvm/tools/llvm-objdump/OffloadDump.cpp new file mode 100644 index 000000000000..7d4461f0a70e --- /dev/null +++ b/llvm/tools/llvm-objdump/OffloadDump.cpp @@ -0,0 +1,102 @@ +//===-- OffloadDump.cpp - Offloading dumper ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the offloading-specific dumper for llvm-objdump. +/// +//===----------------------------------------------------------------------===// +#include "OffloadDump.h" +#include "llvm-objdump.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::objdump; + +constexpr const char OffloadSectionString[] = ".llvm.offloading"; + +/// Get the printable name of the image kind. +static StringRef getImageName(const OffloadBinary &OB) { + switch (OB.getImageKind()) { + case IMG_Object: + return "elf"; + case IMG_Bitcode: + return "llvm ir"; + case IMG_Cubin: + return "cubin"; + case IMG_Fatbinary: + return "fatbinary"; + case IMG_PTX: + return "ptx"; + default: + return ""; + } +} + +static void printBinary(const OffloadBinary &OB, uint64_t Index) { + outs() << "\nOFFLOADING IMAGE [" << Index << "]:\n"; + outs() << left_justify("kind", 16) << getImageName(OB) << "\n"; + outs() << left_justify("arch", 16) << OB.getArch() << "\n"; + outs() << left_justify("triple", 16) << OB.getTriple() << "\n"; + outs() << left_justify("producer", 16) + << getOffloadKindName(OB.getOffloadKind()) << "\n"; +} + +static Error visitAllBinaries(const OffloadBinary &OB) { + uint64_t Offset = 0; + uint64_t Index = 0; + while (Offset < OB.getMemoryBufferRef().getBufferSize()) { + MemoryBufferRef Buffer = + MemoryBufferRef(OB.getData().drop_front(Offset), OB.getFileName()); + auto BinaryOrErr = OffloadBinary::create(Buffer); + if (!BinaryOrErr) + return BinaryOrErr.takeError(); + + OffloadBinary &Binary = **BinaryOrErr; + printBinary(Binary, Index++); + + Offset += Binary.getSize(); + } + return Error::success(); +} + +/// Print the embedded offloading contents of an ObjectFile \p O. +void llvm::dumpOffloadBinary(const ObjectFile &O) { + for (SectionRef Sec : O.sections()) { + Expected Name = Sec.getName(); + if (!Name || !Name->startswith(OffloadSectionString)) + continue; + + Expected Contents = Sec.getContents(); + if (!Contents) + reportError(Contents.takeError(), O.getFileName()); + + MemoryBufferRef Buffer = MemoryBufferRef(*Contents, O.getFileName()); + auto BinaryOrErr = OffloadBinary::create(Buffer); + if (!BinaryOrErr) + reportError(O.getFileName(), "while extracting offloading files: " + + toString(BinaryOrErr.takeError())); + OffloadBinary &Binary = **BinaryOrErr; + + // Print out all the binaries that are contained in this buffer. If we fail + // to parse a binary before reaching the end of the buffer emit a warning. + if (Error Err = visitAllBinaries(Binary)) + reportWarning("while parsing offloading files: " + + toString(std::move(Err)), + O.getFileName()); + } +} + +/// Print the contents of an offload binary file \p OB. This may contain +/// multiple binaries stored in the same buffer. +void llvm::dumpOffloadSections(const OffloadBinary &OB) { + // Print out all the binaries that are contained at this buffer. If we fail to + // parse a binary before reaching the end of the buffer emit a warning. + if (Error Err = visitAllBinaries(OB)) + reportWarning("while parsing offloading files: " + toString(std::move(Err)), + OB.getFileName()); +} diff --git a/llvm/tools/llvm-objdump/OffloadDump.h b/llvm/tools/llvm-objdump/OffloadDump.h new file mode 100644 index 000000000000..75f188e9d506 --- /dev/null +++ b/llvm/tools/llvm-objdump/OffloadDump.h @@ -0,0 +1,22 @@ +//===-- OffloadDump.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_OBJDUMP_OFFLOADDUMP_H +#define LLVM_TOOLS_LLVM_OBJDUMP_OFFLOADDUMP_H + +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/OffloadBinary.h" + +namespace llvm { + +void dumpOffloadSections(const object::OffloadBinary &OB); +void dumpOffloadBinary(const object::ObjectFile &O); + +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-objdump/OtoolOpts.td b/llvm/tools/llvm-objdump/OtoolOpts.td index 61ea701ed75d..e8bef284c0e9 100644 --- a/llvm/tools/llvm-objdump/OtoolOpts.td +++ b/llvm/tools/llvm-objdump/OtoolOpts.td @@ -47,7 +47,6 @@ def X : Flag<["-"], "X">, HelpText<"omit leading addresses or headers">; // -addr_slide=arg // -function_offsets - // Obsolete and unsupported: def grp_obsolete : OptionGroup<"kind">, HelpText<"Obsolete and unsupported flags">; diff --git a/llvm/tools/llvm-objdump/SourcePrinter.cpp b/llvm/tools/llvm-objdump/SourcePrinter.cpp index 8befac546204..c8ea6b543245 100644 --- a/llvm/tools/llvm-objdump/SourcePrinter.cpp +++ b/llvm/tools/llvm-objdump/SourcePrinter.cpp @@ -16,6 +16,8 @@ #include "llvm-objdump.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringSet.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" +#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/FormatVariadic.h" diff --git a/llvm/tools/llvm-objdump/SourcePrinter.h b/llvm/tools/llvm-objdump/SourcePrinter.h index 31d46e3108f6..29ef19c98c80 100644 --- a/llvm/tools/llvm-objdump/SourcePrinter.h +++ b/llvm/tools/llvm-objdump/SourcePrinter.h @@ -13,6 +13,7 @@ #include "llvm/ADT/StringSet.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/FormattedStream.h" #include #include diff --git a/llvm/tools/llvm-objdump/XCOFFDump.cpp b/llvm/tools/llvm-objdump/XCOFFDump.cpp index b8fb2ed3d063..159741bebb67 100644 --- a/llvm/tools/llvm-objdump/XCOFFDump.cpp +++ b/llvm/tools/llvm-objdump/XCOFFDump.cpp @@ -106,7 +106,7 @@ std::string objdump::getXCOFFSymbolDescription(const SymbolInfoTy &SymbolInfo, if (SymbolInfo.XCOFFSymInfo.StorageMappingClass && !SymbolInfo.XCOFFSymInfo.IsLabel) { const XCOFF::StorageMappingClass Smc = - SymbolInfo.XCOFFSymInfo.StorageMappingClass.getValue(); + *SymbolInfo.XCOFFSymInfo.StorageMappingClass; Result.append(("[" + XCOFF::getMappingClassString(Smc) + "]").str()); } diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 6b238fa01d25..7cd47da9efd9 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -20,6 +20,7 @@ #include "ELFDump.h" #include "MachODump.h" #include "ObjdumpOptID.h" +#include "OffloadDump.h" #include "SourcePrinter.h" #include "WasmDump.h" #include "XCOFFDump.h" @@ -33,6 +34,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/Demangle/Demangle.h" #include "llvm/MC/MCAsmInfo.h" @@ -52,10 +54,12 @@ #include "llvm/Object/COFF.h" #include "llvm/Object/COFFImportFile.h" #include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ELFTypes.h" #include "llvm/Object/FaultMapParser.h" #include "llvm/Object/MachO.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/OffloadBinary.h" #include "llvm/Object/Wasm.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" @@ -196,6 +200,7 @@ std::string objdump::MCPU; std::vector objdump::MAttrs; bool objdump::ShowRawInsn; bool objdump::LeadingAddr; +static bool Offloading; static bool RawClangAST; bool objdump::Relocations; bool objdump::PrintImmHex; @@ -440,8 +445,13 @@ static bool isArmElf(const ObjectFile *Obj) { return Elf && Elf->getEMachine() == ELF::EM_ARM; } +static bool isCSKYElf(const ObjectFile *Obj) { + const auto *Elf = dyn_cast(Obj); + return Elf && Elf->getEMachine() == ELF::EM_CSKY; +} + static bool hasMappingSymbols(const ObjectFile *Obj) { - return isArmElf(Obj) || isAArch64Elf(Obj); + return isArmElf(Obj) || isAArch64Elf(Obj) || isCSKYElf(Obj) ; } static void printRelocation(formatted_raw_ostream &OS, StringRef FileName, @@ -957,6 +967,9 @@ SymbolInfoTy objdump::createSymbolInfo(const ObjectFile *Obj, getXCOFFSymbolCsectSMC(XCOFFObj, Symbol); return SymbolInfoTy(Addr, Name, Smc, SymbolIndex, isLabel(XCOFFObj, Symbol)); + } else if (Obj->isXCOFF()) { + const SymbolRef::Type SymType = unwrapOrError(Symbol.getType(), FileName); + return SymbolInfoTy(Addr, Name, SymType, true); } else return SymbolInfoTy(Addr, Name, Obj->isELF() ? getElfSymbolType(Obj, Symbol) @@ -973,11 +986,29 @@ static SymbolInfoTy createDummySymbolInfo(const ObjectFile *Obj, } static void -collectLocalBranchTargets(ArrayRef Bytes, const MCInstrAnalysis *MIA, - MCDisassembler *DisAsm, MCInstPrinter *IP, - const MCSubtargetInfo *STI, uint64_t SectionAddr, - uint64_t Start, uint64_t End, - std::unordered_map &Labels) { +collectBBAddrMapLabels(const std::unordered_map &AddrToBBAddrMap, + uint64_t SectionAddr, uint64_t Start, uint64_t End, + std::unordered_map> &Labels) { + if (AddrToBBAddrMap.empty()) + return; + Labels.clear(); + uint64_t StartAddress = SectionAddr + Start; + uint64_t EndAddress = SectionAddr + End; + auto Iter = AddrToBBAddrMap.find(StartAddress); + if (Iter == AddrToBBAddrMap.end()) + return; + for (unsigned I = 0, Size = Iter->second.BBEntries.size(); I < Size; ++I) { + uint64_t BBAddress = Iter->second.BBEntries[I].Offset + Iter->second.Addr; + if (BBAddress >= EndAddress) + continue; + Labels[BBAddress].push_back(("BB" + Twine(I)).str()); + } +} + +static void collectLocalBranchTargets( + ArrayRef Bytes, const MCInstrAnalysis *MIA, MCDisassembler *DisAsm, + MCInstPrinter *IP, const MCSubtargetInfo *STI, uint64_t SectionAddr, + uint64_t Start, uint64_t End, std::unordered_map &Labels) { // So far only supports PowerPC and X86. if (!STI->getTargetTriple().isPPC() && !STI->getTargetTriple().isX86()) return; @@ -1006,7 +1037,6 @@ collectLocalBranchTargets(ArrayRef Bytes, const MCInstrAnalysis *MIA, !(STI->getTargetTriple().isPPC() && Target == Index)) Labels[Target] = ("L" + Twine(LabelCount++)).str(); } - Index += Size; } } @@ -1241,6 +1271,20 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, if (!SectSize) continue; + std::unordered_map AddrToBBAddrMap; + if (SymbolizeOperands) { + if (auto *Elf = dyn_cast(Obj)) { + // Read the BB-address-map corresponding to this section, if present. + auto SectionBBAddrMapsOrErr = Elf->readBBAddrMap(Section.getIndex()); + if (!SectionBBAddrMapsOrErr) + reportWarning(toString(SectionBBAddrMapsOrErr.takeError()), + Obj->getFileName()); + for (auto &FunctionBBAddrMap : *SectionBBAddrMapsOrErr) + AddrToBBAddrMap.emplace(FunctionBBAddrMap.Addr, + std::move(FunctionBBAddrMap)); + } + } + // Get the list of all the symbols in this section. SectionSymbolsTy &Symbols = AllSymbols[Section]; std::vector MappingSymbols; @@ -1367,7 +1411,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, // Right now, most targets return None i.e ignore to treat a symbol // separately. But WebAssembly decodes preludes for some symbols. // - if (Status.hasValue()) { + if (Status) { if (Status.getValue() == MCDisassembler::Fail) { outs() << "// Error in decoding " << SymbolName << " : Decoding failed region as bytes.\n"; @@ -1404,9 +1448,13 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, formatted_raw_ostream FOS(outs()); std::unordered_map AllLabels; - if (SymbolizeOperands) + std::unordered_map> BBAddrMapLabels; + if (SymbolizeOperands) { collectLocalBranchTargets(Bytes, MIA, DisAsm, IP, PrimarySTI, SectionAddr, Index, End, AllLabels); + collectBBAddrMapLabels(AddrToBBAddrMap, SectionAddr, Index, End, + BBAddrMapLabels); + } while (Index < End) { // ARM and AArch64 ELF binaries can interleave data and text in the @@ -1450,9 +1498,15 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, } // Print local label if there's any. - auto Iter = AllLabels.find(SectionAddr + Index); - if (Iter != AllLabels.end()) - FOS << "<" << Iter->second << ">:\n"; + auto Iter1 = BBAddrMapLabels.find(SectionAddr + Index); + if (Iter1 != BBAddrMapLabels.end()) { + for (StringRef Label : Iter1->second) + FOS << "<" << Label << ">:\n"; + } else { + auto Iter2 = AllLabels.find(SectionAddr + Index); + if (Iter2 != AllLabels.end()) + FOS << "<" << Iter2->second << ">:\n"; + } // Disassemble a real instruction or a data when disassemble all is // provided @@ -1547,6 +1601,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, } // Print the labels corresponding to the target if there's any. + bool BBAddrMapLabelAvailable = BBAddrMapLabels.count(Target); bool LabelAvailable = AllLabels.count(Target); if (TargetSym != nullptr) { uint64_t TargetAddress = TargetSym->Addr; @@ -1560,14 +1615,18 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, // Always Print the binary symbol precisely corresponding to // the target address. *TargetOS << TargetName; - } else if (!LabelAvailable) { + } else if (BBAddrMapLabelAvailable) { + *TargetOS << BBAddrMapLabels[Target].front(); + } else if (LabelAvailable) { + *TargetOS << AllLabels[Target]; + } else { // Always Print the binary symbol plus an offset if there's no // local label corresponding to the target address. *TargetOS << TargetName << "+0x" << Twine::utohexstr(Disp); - } else { - *TargetOS << AllLabels[Target]; } *TargetOS << ">"; + } else if (BBAddrMapLabelAvailable) { + *TargetOS << " <" << BBAddrMapLabels[Target].front() << ">"; } else if (LabelAvailable) { *TargetOS << " <" << AllLabels[Target] << ">"; } @@ -1634,9 +1693,12 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { // Package up features to be passed to target/subtarget SubtargetFeatures Features = Obj->getFeatures(); - if (!MAttrs.empty()) + if (!MAttrs.empty()) { for (unsigned I = 0; I != MAttrs.size(); ++I) Features.AddFeature(MAttrs[I]); + } else if (MCPU.empty() && Obj->getArch() == llvm::Triple::aarch64) { + Features.AddFeature("+all"); + } std::unique_ptr MRI( TheTarget->createMCRegInfo(TripleName)); @@ -1653,7 +1715,7 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { "no assembly info for target " + TripleName); if (MCPU.empty()) - MCPU = Obj->tryGetCPUName().getValueOr("").str(); + MCPU = Obj->tryGetCPUName().value_or("").str(); std::unique_ptr STI( TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString())); @@ -1721,10 +1783,6 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { void objdump::printRelocations(const ObjectFile *Obj) { StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64; - // Regular objdump doesn't print relocations in non-relocatable object - // files. - if (!Obj->isRelocatableObject()) - return; // Build a mapping from relocation target to a vector of relocation // sections. Usually, there is an only one relocation section for @@ -1732,6 +1790,8 @@ void objdump::printRelocations(const ObjectFile *Obj) { MapVector> SecToRelSec; uint64_t Ndx; for (const SectionRef &Section : ToolSectionFilter(*Obj, &Ndx)) { + if (Obj->isELF() && (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC)) + continue; if (Section.relocation_begin() == Section.relocation_end()) continue; Expected SecOrErr = Section.getRelocatedSection(); @@ -2073,7 +2133,7 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol, dyn_cast(O), Symbol); if (SymRef) { - Expected NameOrErr = SymRef.getValue().getName(); + Expected NameOrErr = SymRef->getName(); if (NameOrErr) { outs() << " (csect:"; @@ -2227,13 +2287,13 @@ static void printFaultMaps(const ObjectFile *Obj) { outs() << "FaultMap table:\n"; - if (!FaultMapSection.hasValue()) { + if (!FaultMapSection) { outs() << "\n"; return; } StringRef FaultMapContents = - unwrapOrError(FaultMapSection.getValue().getContents(), Obj->getFileName()); + unwrapOrError(FaultMapSection->getContents(), Obj->getFileName()); FaultMapParser FMP(FaultMapContents.bytes_begin(), FaultMapContents.bytes_end()); @@ -2423,6 +2483,8 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr, printRawClangAST(O); if (FaultMapSection) printFaultMaps(O); + if (Offloading) + dumpOffloadBinary(*O); } static void dumpObject(const COFFImportFile *I, const Archive *A, @@ -2486,6 +2548,8 @@ static void dumpInput(StringRef file) { dumpObject(O); else if (MachOUniversalBinary *UB = dyn_cast(&Binary)) parseInputMachO(UB); + else if (OffloadBinary *OB = dyn_cast(&Binary)) + dumpOffloadSections(*OB); else reportError(errorCodeToError(object_error::invalid_file_type), file); } @@ -2589,6 +2653,7 @@ static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) { } DynamicRelocations = InputArgs.hasArg(OBJDUMP_dynamic_reloc); FaultMapSection = InputArgs.hasArg(OBJDUMP_fault_map_section); + Offloading = InputArgs.hasArg(OBJDUMP_offloading); FileHeaders = InputArgs.hasArg(OBJDUMP_file_headers); SectionContents = InputArgs.hasArg(OBJDUMP_full_contents); PrintLines = InputArgs.hasArg(OBJDUMP_line_numbers); @@ -2756,12 +2821,12 @@ int main(int argc, char **argv) { if (!ArchiveHeaders && !Disassemble && DwarfDumpType == DIDT_Null && !DynamicRelocations && !FileHeaders && !PrivateHeaders && !RawClangAST && !Relocations && !SectionHeaders && !SectionContents && !SymbolTable && - !DynamicSymbolTable && !UnwindInfo && !FaultMapSection && - !(MachOOpt && - (Bind || DataInCode || DylibId || DylibsUsed || ExportsTrie || - FirstPrivateHeader || FunctionStarts || IndirectSymbols || InfoPlist || - LazyBind || LinkOptHints || ObjcMetaData || Rebase || Rpaths || - UniversalHeaders || WeakBind || !FilterSections.empty()))) { + !DynamicSymbolTable && !UnwindInfo && !FaultMapSection && !Offloading && + !(MachOOpt && (Bind || DataInCode || DyldInfo || DylibId || DylibsUsed || + ExportsTrie || FirstPrivateHeader || FunctionStarts || + IndirectSymbols || InfoPlist || LazyBind || LinkOptHints || + ObjcMetaData || Rebase || Rpaths || UniversalHeaders || + WeakBind || !FilterSections.empty()))) { T->printHelp(ToolName); return 2; } diff --git a/llvm/tools/llvm-pdbutil/BytesOutputStyle.cpp b/llvm/tools/llvm-pdbutil/BytesOutputStyle.cpp index ffc907e09f11..4c851e14a12d 100644 --- a/llvm/tools/llvm-pdbutil/BytesOutputStyle.cpp +++ b/llvm/tools/llvm-pdbutil/BytesOutputStyle.cpp @@ -8,7 +8,6 @@ #include "BytesOutputStyle.h" -#include "FormatUtil.h" #include "StreamUtil.h" #include "llvm-pdbutil.h" @@ -17,6 +16,7 @@ #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" +#include "llvm/DebugInfo/PDB/Native/FormatUtil.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" @@ -83,13 +83,13 @@ static void printHeader(LinePrinter &P, const Twine &S) { } BytesOutputStyle::BytesOutputStyle(PDBFile &File) - : File(File), P(2, false, outs()) {} + : File(File), P(2, false, outs(), opts::Filters) {} Error BytesOutputStyle::dump() { - if (opts::bytes::DumpBlockRange.hasValue()) { + if (opts::bytes::DumpBlockRange) { auto &R = *opts::bytes::DumpBlockRange; - uint32_t Max = R.Max.getValueOr(R.Min); + uint32_t Max = R.Max.value_or(R.Min); if (Max < R.Min) return make_error( @@ -104,9 +104,9 @@ Error BytesOutputStyle::dump() { P.NewLine(); } - if (opts::bytes::DumpByteRange.hasValue()) { + if (opts::bytes::DumpByteRange) { auto &R = *opts::bytes::DumpByteRange; - uint32_t Max = R.Max.getValueOr(File.getFileSize()); + uint32_t Max = R.Max.value_or(File.getFileSize()); if (Max < R.Min) return make_error("Invalid byte range specified. Max < Min", diff --git a/llvm/tools/llvm-pdbutil/BytesOutputStyle.h b/llvm/tools/llvm-pdbutil/BytesOutputStyle.h index d3aceb47679e..cd28032fe7cd 100644 --- a/llvm/tools/llvm-pdbutil/BytesOutputStyle.h +++ b/llvm/tools/llvm-pdbutil/BytesOutputStyle.h @@ -9,10 +9,10 @@ #ifndef LLVM_TOOLS_LLVMPDBDUMP_BYTESOUTPUTSTYLE_H #define LLVM_TOOLS_LLVMPDBDUMP_BYTESOUTPUTSTYLE_H -#include "LinePrinter.h" #include "OutputStyle.h" #include "StreamUtil.h" +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" #include "llvm/Support/Error.h" namespace llvm { diff --git a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp index ef299ea9d482..a173eb1faa62 100644 --- a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp +++ b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp @@ -8,8 +8,6 @@ #include "DumpOutputStyle.h" -#include "FormatUtil.h" -#include "InputFile.h" #include "MinimalSymbolDumper.h" #include "MinimalTypeDumper.h" #include "StreamUtil.h" @@ -38,10 +36,13 @@ #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" +#include "llvm/DebugInfo/PDB/Native/FormatUtil.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" #include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/PDB/Native/InputFile.h" #include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" @@ -61,7 +62,7 @@ using namespace llvm::msf; using namespace llvm::pdb; DumpOutputStyle::DumpOutputStyle(InputFile &File) - : File(File), P(2, false, outs()) { + : File(File), P(2, false, outs(), opts::Filters) { if (opts::dump::DumpTypeRefStats) RefTracker.reset(new TypeReferenceTracker(File)); } @@ -99,8 +100,8 @@ Error DumpOutputStyle::dump() { } if (opts::dump::DumpSymbolStats) { - if (auto EC = dumpSymbolStats()) - return EC; + ExitOnError Err("Unexpected error processing module stats: "); + Err(dumpSymbolStats()); P.NewLine(); } @@ -129,33 +130,33 @@ Error DumpOutputStyle::dump() { } if (opts::dump::DumpModules) { - if (auto EC = dumpModules()) - return EC; + ExitOnError Err("Unexpected error processing modules: "); + Err(dumpModules()); } if (opts::dump::DumpModuleFiles) { - if (auto EC = dumpModuleFiles()) - return EC; + ExitOnError Err("Unexpected error processing files: "); + Err(dumpModuleFiles()); } if (opts::dump::DumpLines) { - if (auto EC = dumpLines()) - return EC; + ExitOnError Err("Unexpected error processing lines: "); + Err(dumpLines()); } if (opts::dump::DumpInlineeLines) { - if (auto EC = dumpInlineeLines()) - return EC; + ExitOnError Err("Unexpected error processing inlinee lines: "); + Err(dumpInlineeLines()); } if (opts::dump::DumpXmi) { - if (auto EC = dumpXmi()) - return EC; + ExitOnError Err("Unexpected error processing cross module imports: "); + Err(dumpXmi()); } if (opts::dump::DumpXme) { - if (auto EC = dumpXme()) - return EC; + ExitOnError Err("Unexpected error processing cross module exports: "); + Err(dumpXme()); } if (opts::dump::DumpFpo) { @@ -198,9 +199,8 @@ Error DumpOutputStyle::dump() { } if (opts::dump::DumpSymbols) { - auto EC = File.isPdb() ? dumpModuleSymsForPdb() : dumpModuleSymsForObj(); - if (EC) - return EC; + ExitOnError Err("Unexpected error processing symbols: "); + Err(File.isPdb() ? dumpModuleSymsForPdb() : dumpModuleSymsForObj()); } if (opts::dump::DumpTypeRefStats) { @@ -260,7 +260,7 @@ Error DumpOutputStyle::dumpFileSummary() { P.formatLine("Has Globals: {0}", getPdb().hasPDBGlobalsStream()); P.formatLine("Has Publics: {0}", getPdb().hasPDBPublicsStream()); if (getPdb().hasPDBDbiStream()) { - auto &DBI = Err(getPdb().getPDBDbiStream()); + DbiStream &DBI = Err(getPdb().getPDBDbiStream()); P.formatLine("Is incrementally linked: {0}", DBI.isIncrementallyLinked()); P.formatLine("Has conflicting types: {0}", DBI.hasCTypes()); P.formatLine("Is stripped: {0}", DBI.isStripped()); @@ -343,36 +343,6 @@ static void printModuleDetailStats(LinePrinter &P, StringRef Label, } } -static bool isMyCode(const SymbolGroup &Group) { - if (Group.getFile().isObj()) - return true; - - StringRef Name = Group.name(); - if (Name.startswith("Import:")) - return false; - if (Name.endswith_insensitive(".dll")) - return false; - if (Name.equals_insensitive("* linker *")) - return false; - if (Name.startswith_insensitive("f:\\binaries\\Intermediate\\vctools")) - return false; - if (Name.startswith_insensitive("f:\\dd\\vctools\\crt")) - return false; - return true; -} - -static bool shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group) { - if (opts::dump::JustMyCode && !isMyCode(Group)) - return false; - - // If the arg was not specified on the command line, always dump all modules. - if (opts::dump::DumpModi.getNumOccurrences() == 0) - return true; - - // Otherwise, only dump if this is the same module specified. - return (opts::dump::DumpModi == Idx); -} - Error DumpOutputStyle::dumpStreamSummary() { printHeader(P, "Streams"); @@ -389,7 +359,7 @@ Error DumpOutputStyle::dumpStreamSummary() { uint32_t StreamCount = getPdb().getNumStreams(); uint32_t MaxStreamSize = getPdb().getMaxStreamSize(); - for (uint16_t StreamIdx = 0; StreamIdx < StreamCount; ++StreamIdx) { + for (uint32_t StreamIdx = 0; StreamIdx < StreamCount; ++StreamIdx) { P.formatLine( "Stream {0} ({1} bytes): [{2}]", fmt_align(StreamIdx, AlignStyle::Right, NumDigits(StreamCount)), @@ -409,93 +379,6 @@ Error DumpOutputStyle::dumpStreamSummary() { return Error::success(); } -static Expected getModuleDebugStream(PDBFile &File, - uint32_t Index) { - ExitOnError Err("Unexpected error: "); - - auto &Dbi = Err(File.getPDBDbiStream()); - const auto &Modules = Dbi.modules(); - auto Modi = Modules.getModuleDescriptor(Index); - - uint16_t ModiStream = Modi.getModuleStreamIndex(); - if (ModiStream == kInvalidStreamIndex) - return make_error(raw_error_code::no_stream, - "Module stream not present"); - - auto ModStreamData = File.createIndexedStream(ModiStream); - - ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); - if (auto EC = ModS.reload()) - return make_error(raw_error_code::corrupt_file, - "Invalid module stream"); - - return std::move(ModS); -} - -template -static void -iterateOneModule(InputFile &File, const Optional &HeaderScope, - const SymbolGroup &SG, uint32_t Modi, CallbackT Callback) { - if (HeaderScope) { - HeaderScope->P.formatLine( - "Mod {0:4} | `{1}`: ", - fmt_align(Modi, AlignStyle::Right, HeaderScope->LabelWidth), SG.name()); - } - - AutoIndent Indent(HeaderScope); - Callback(Modi, SG); -} - -template -static void iterateSymbolGroups(InputFile &Input, - const Optional &HeaderScope, - CallbackT Callback) { - AutoIndent Indent(HeaderScope); - - ExitOnError Err("Unexpected error processing modules: "); - - if (opts::dump::DumpModi.getNumOccurrences() > 0) { - assert(opts::dump::DumpModi.getNumOccurrences() == 1); - uint32_t Modi = opts::dump::DumpModi; - SymbolGroup SG(&Input, Modi); - iterateOneModule(Input, withLabelWidth(HeaderScope, NumDigits(Modi)), SG, - Modi, Callback); - return; - } - - uint32_t I = 0; - - for (const auto &SG : Input.symbol_groups()) { - if (shouldDumpSymbolGroup(I, SG)) - iterateOneModule(Input, withLabelWidth(HeaderScope, NumDigits(I)), SG, I, - Callback); - - ++I; - } -} - -template -static void iterateModuleSubsections( - InputFile &File, const Optional &HeaderScope, - llvm::function_ref - Callback) { - - iterateSymbolGroups(File, HeaderScope, - [&](uint32_t Modi, const SymbolGroup &SG) { - for (const auto &SS : SG.getDebugSubsections()) { - SubsectionT Subsection; - - if (SS.kind() != Subsection.kind()) - continue; - - BinaryStreamReader Reader(SS.getRecordData()); - if (auto EC = Subsection.initialize(Reader)) - continue; - Callback(Modi, SG, Subsection); - } - }); -} - static Expected, ArrayRef>> loadSectionHeaders(PDBFile &File, DbgHeaderType Type) { @@ -504,7 +387,7 @@ loadSectionHeaders(PDBFile &File, DbgHeaderType Type) { "Section headers require a DBI Stream, which could not be loaded", inconvertibleErrorCode()); - auto &Dbi = cantFail(File.getPDBDbiStream()); + DbiStream &Dbi = cantFail(File.getPDBDbiStream()); uint32_t SI = Dbi.getDebugStreamIndex(Type); if (SI == kInvalidStreamIndex) @@ -529,10 +412,10 @@ loadSectionHeaders(PDBFile &File, DbgHeaderType Type) { return std::make_pair(std::move(Stream), Headers); } -static std::vector getSectionNames(PDBFile &File) { +static Expected> getSectionNames(PDBFile &File) { auto ExpectedHeaders = loadSectionHeaders(File, DbgHeaderType::SectionHdr); if (!ExpectedHeaders) - return {}; + return ExpectedHeaders.takeError(); std::unique_ptr Stream; ArrayRef Headers; @@ -590,31 +473,44 @@ Error DumpOutputStyle::dumpModules() { } AutoIndent Indent(P); - ExitOnError Err("Unexpected error processing modules: "); - auto &Stream = Err(getPdb().getPDBDbiStream()); + Expected StreamOrErr = getPdb().getPDBDbiStream(); + if (!StreamOrErr) + return StreamOrErr.takeError(); + DbiStream &Stream = *StreamOrErr; const DbiModuleList &Modules = Stream.modules(); - iterateSymbolGroups( - File, PrintScope{P, 11}, [&](uint32_t Modi, const SymbolGroup &Strings) { + return iterateSymbolGroups( + File, PrintScope{P, 11}, + [&](uint32_t Modi, const SymbolGroup &Strings) -> Error { auto Desc = Modules.getModuleDescriptor(Modi); if (opts::dump::DumpSectionContribs) { - std::vector Sections = getSectionNames(getPdb()); + auto SectionsOrErr = getSectionNames(getPdb()); + if (!SectionsOrErr) + return SectionsOrErr.takeError(); + ArrayRef Sections = *SectionsOrErr; dumpSectionContrib(P, Desc.getSectionContrib(), Sections, 0); } P.formatLine("Obj: `{0}`: ", Desc.getObjFileName()); P.formatLine("debug stream: {0}, # files: {1}, has ec info: {2}", Desc.getModuleStreamIndex(), Desc.getNumberOfFiles(), Desc.hasECInfo()); - StringRef PdbFilePath = - Err(Stream.getECName(Desc.getPdbFilePathNameIndex())); - StringRef SrcFilePath = - Err(Stream.getECName(Desc.getSourceFileNameIndex())); + + auto PdbPathOrErr = Stream.getECName(Desc.getPdbFilePathNameIndex()); + if (!PdbPathOrErr) + return PdbPathOrErr.takeError(); + StringRef PdbFilePath = *PdbPathOrErr; + + auto SrcPathOrErr = Stream.getECName(Desc.getSourceFileNameIndex()); + if (!SrcPathOrErr) + return SrcPathOrErr.takeError(); + StringRef SrcFilePath = *SrcPathOrErr; + P.formatLine("pdb file ni: {0} `{1}`, src file ni: {2} `{3}`", Desc.getPdbFilePathNameIndex(), PdbFilePath, Desc.getSourceFileNameIndex(), SrcFilePath); + return Error::success(); }); - return Error::success(); } Error DumpOutputStyle::dumpModuleFiles() { @@ -630,18 +526,20 @@ Error DumpOutputStyle::dumpModuleFiles() { return Error::success(); } - ExitOnError Err("Unexpected error processing modules: "); - - iterateSymbolGroups(File, PrintScope{P, 11}, - [this, &Err](uint32_t Modi, const SymbolGroup &Strings) { - auto &Stream = Err(getPdb().getPDBDbiStream()); + return iterateSymbolGroups( + File, PrintScope{P, 11}, + [this](uint32_t Modi, const SymbolGroup &Strings) -> Error { + Expected StreamOrErr = getPdb().getPDBDbiStream(); + if (!StreamOrErr) + return StreamOrErr.takeError(); + DbiStream &Stream = *StreamOrErr; - const DbiModuleList &Modules = Stream.modules(); - for (const auto &F : Modules.source_files(Modi)) { - Strings.formatFromFileName(P, F); - } - }); - return Error::success(); + const DbiModuleList &Modules = Stream.modules(); + for (const auto &F : Modules.source_files(Modi)) { + Strings.formatFromFileName(P, F); + } + return Error::success(); + }); } Error DumpOutputStyle::dumpSymbolStats() { @@ -652,39 +550,40 @@ Error DumpOutputStyle::dumpSymbolStats() { return Error::success(); } - ExitOnError Err("Unexpected error processing modules: "); - StatCollection SymStats; StatCollection ChunkStats; - - Optional Scope; - if (File.isPdb()) - Scope.emplace(P, 2); - - iterateSymbolGroups(File, Scope, [&](uint32_t Modi, const SymbolGroup &SG) { - StatCollection SS = getSymbolStats(SG, SymStats); - StatCollection CS = getChunkStats(SG, ChunkStats); - - if (SG.getFile().isPdb()) { - AutoIndent Indent(P); - auto Modules = cantFail(File.pdb().getPDBDbiStream()).modules(); - uint32_t ModCount = Modules.getModuleCount(); - DbiModuleDescriptor Desc = Modules.getModuleDescriptor(Modi); - uint32_t StreamIdx = Desc.getModuleStreamIndex(); - - if (StreamIdx == kInvalidStreamIndex) { - P.formatLine("Mod {0} (debug info not present): [{1}]", - fmt_align(Modi, AlignStyle::Right, NumDigits(ModCount)), - Desc.getModuleName()); - return; - } - P.formatLine("Stream {0}, {1} bytes", StreamIdx, - getPdb().getStreamByteSize(StreamIdx)); - - printModuleDetailStats(P, "Symbols", SS); - printModuleDetailStats(P, "Chunks", CS); - } - }); + PrintScope Scope(P, 2); + + if (Error Err = iterateSymbolGroups( + File, Scope, [&](uint32_t Modi, const SymbolGroup &SG) -> Error { + StatCollection SS = getSymbolStats(SG, SymStats); + StatCollection CS = getChunkStats(SG, ChunkStats); + + if (!SG.getFile().isPdb()) + return Error::success(); + + AutoIndent Indent(P); + auto Modules = cantFail(File.pdb().getPDBDbiStream()).modules(); + uint32_t ModCount = Modules.getModuleCount(); + DbiModuleDescriptor Desc = Modules.getModuleDescriptor(Modi); + uint32_t StreamIdx = Desc.getModuleStreamIndex(); + + if (StreamIdx == kInvalidStreamIndex) { + P.formatLine( + "Mod {0} (debug info not present): [{1}]", + fmt_align(Modi, AlignStyle::Right, NumDigits(ModCount)), + Desc.getModuleName()); + return Error::success(); + } + P.formatLine("Stream {0}, {1} bytes", StreamIdx, + getPdb().getStreamByteSize(StreamIdx)); + + printModuleDetailStats(P, "Symbols", SS); + printModuleDetailStats(P, "Chunks", CS); + + return Error::success(); + })) + return Err; if (SymStats.Totals.Count > 0) { P.printLine(" Summary |"); @@ -944,11 +843,11 @@ Error DumpOutputStyle::dumpLines() { uint32_t LastModi = UINT32_MAX; uint32_t LastNameIndex = UINT32_MAX; - iterateModuleSubsections( + return iterateModuleSubsections( File, PrintScope{P, 4}, - [this, &LastModi, &LastNameIndex](uint32_t Modi, - const SymbolGroup &Strings, - DebugLinesSubsectionRef &Lines) { + [this, &LastModi, + &LastNameIndex](uint32_t Modi, const SymbolGroup &Strings, + DebugLinesSubsectionRef &Lines) -> Error { uint16_t Segment = Lines.header()->RelocSegment; uint32_t Begin = Lines.header()->RelocOffset; uint32_t End = Begin + Lines.header()->CodeSize; @@ -970,9 +869,8 @@ Error DumpOutputStyle::dumpLines() { P.NewLine(); typesetLinesAndColumns(P, Begin, Block); } + return Error::success(); }); - - return Error::success(); } Error DumpOutputStyle::dumpInlineeLines() { @@ -983,10 +881,10 @@ Error DumpOutputStyle::dumpInlineeLines() { return Error::success(); } - iterateModuleSubsections( + return iterateModuleSubsections( File, PrintScope{P, 2}, [this](uint32_t Modi, const SymbolGroup &Strings, - DebugInlineeLinesSubsectionRef &Lines) { + DebugInlineeLinesSubsectionRef &Lines) -> Error { P.formatLine("{0,+8} | {1,+5} | {2}", "Inlinee", "Line", "Source File"); for (const auto &Entry : Lines) { P.formatLine("{0,+8} | {1,+5} | ", Entry.Header->Inlinee, @@ -998,9 +896,8 @@ Error DumpOutputStyle::dumpInlineeLines() { } } P.NewLine(); + return Error::success(); }); - - return Error::success(); } Error DumpOutputStyle::dumpXmi() { @@ -1011,10 +908,10 @@ Error DumpOutputStyle::dumpXmi() { return Error::success(); } - iterateModuleSubsections( + return iterateModuleSubsections( File, PrintScope{P, 2}, [this](uint32_t Modi, const SymbolGroup &Strings, - DebugCrossModuleImportsSubsectionRef &Imports) { + DebugCrossModuleImportsSubsectionRef &Imports) -> Error { P.formatLine("{0,=32} | {1}", "Imported Module", "Type IDs"); for (const auto &Xmi : Imports) { @@ -1039,9 +936,8 @@ Error DumpOutputStyle::dumpXmi() { typesetItemList(TIs, P.getIndentLevel() + 35, 12, " "); P.formatLine("{0,+32} | {1}", Module, Result); } + return Error::success(); }); - - return Error::success(); } Error DumpOutputStyle::dumpXme() { @@ -1052,18 +948,17 @@ Error DumpOutputStyle::dumpXme() { return Error::success(); } - iterateModuleSubsections( + return iterateModuleSubsections( File, PrintScope{P, 2}, [this](uint32_t Modi, const SymbolGroup &Strings, - DebugCrossModuleExportsSubsectionRef &Exports) { + DebugCrossModuleExportsSubsectionRef &Exports) -> Error { P.formatLine("{0,-10} | {1}", "Local ID", "Global ID"); for (const auto &Export : Exports) { P.formatLine("{0,+10:X+} | {1}", TypeIndex(Export.Local), TypeIndex(Export.Global)); } + return Error::success(); }); - - return Error::success(); } std::string formatFrameType(object::frame_type FT) { @@ -1084,7 +979,7 @@ Error DumpOutputStyle::dumpOldFpo(PDBFile &File) { printHeader(P, "Old FPO Data"); ExitOnError Err("Error dumping old fpo data:"); - auto &Dbi = Err(File.getPDBDbiStream()); + DbiStream &Dbi = Err(File.getPDBDbiStream()); if (!Dbi.hasOldFpoRecords()) { printStreamNotPresent("FPO"); @@ -1111,7 +1006,7 @@ Error DumpOutputStyle::dumpNewFpo(PDBFile &File) { printHeader(P, "New FPO Data"); ExitOnError Err("Error dumping new fpo data:"); - auto &Dbi = Err(File.getPDBDbiStream()); + DbiStream &Dbi = Err(File.getPDBDbiStream()); if (!Dbi.hasNewFpoRecords()) { printStreamNotPresent("New FPO"); @@ -1232,10 +1127,10 @@ Error DumpOutputStyle::dumpStringTableFromPdb() { } Error DumpOutputStyle::dumpStringTableFromObj() { - iterateModuleSubsections( + return iterateModuleSubsections( File, PrintScope{P, 4}, [&](uint32_t Modi, const SymbolGroup &Strings, - DebugStringTableSubsectionRef &Strings2) { + DebugStringTableSubsectionRef &Strings2) -> Error { BinaryStreamRef StringTableBuffer = Strings2.getBuffer(); BinaryStreamReader Reader(StringTableBuffer); while (Reader.bytesRemaining() > 0) { @@ -1248,8 +1143,8 @@ Error DumpOutputStyle::dumpStringTableFromObj() { P.formatLine("{0} | {1}", fmt_align(Offset, AlignStyle::Right, 4), Str); } + return Error::success(); }); - return Error::success(); } Error DumpOutputStyle::dumpNamedStreams() { @@ -1352,10 +1247,16 @@ static void dumpPartialTypeStream(LinePrinter &Printer, for (const auto &I : TiList) { TypeIndex TI(I); - CVType Type = Types.getType(TI); - if (auto EC = codeview::visitTypeRecord(Type, TI, V)) - Printer.formatLine("An error occurred dumping type record {0}: {1}", TI, - toString(std::move(EC))); + if (TI.isSimple()) { + Printer.formatLine("{0} | {1}", fmt_align(I, AlignStyle::Right, Width), + Types.getTypeName(TI)); + } else if (Optional Type = Types.tryGetType(TI)) { + if (auto EC = codeview::visitTypeRecord(*Type, TI, V)) + Printer.formatLine("An error occurred dumping type record {0}: {1}", + TI, toString(std::move(EC))); + } else { + Printer.formatLine("Type {0} doesn't exist in TPI stream", TI); + } } } } @@ -1526,8 +1427,6 @@ Error DumpOutputStyle::dumpModuleSymsForObj() { AutoIndent Indent(P); - ExitOnError Err("Unexpected error processing symbols: "); - auto &Types = File.types(); SymbolVisitorCallbackPipeline Pipeline; @@ -1538,25 +1437,18 @@ Error DumpOutputStyle::dumpModuleSymsForObj() { Pipeline.addCallbackToPipeline(Dumper); CVSymbolVisitor Visitor(Pipeline); - std::unique_ptr SymbolError; - - iterateModuleSubsections( + return iterateModuleSubsections( File, PrintScope{P, 2}, [&](uint32_t Modi, const SymbolGroup &Strings, - DebugSymbolsSubsectionRef &Symbols) { + DebugSymbolsSubsectionRef &Symbols) -> Error { Dumper.setSymbolGroup(&Strings); for (auto Symbol : Symbols) { if (auto EC = Visitor.visitSymbolRecord(Symbol)) { - SymbolError = std::make_unique(std::move(EC)); - return; + return EC; } } + return Error::success(); }); - - if (SymbolError) - return std::move(*SymbolError); - - return Error::success(); } Error DumpOutputStyle::dumpModuleSymsForPdb() { @@ -1568,18 +1460,18 @@ Error DumpOutputStyle::dumpModuleSymsForPdb() { } AutoIndent Indent(P); - ExitOnError Err("Unexpected error processing symbols: "); auto &Ids = File.ids(); auto &Types = File.types(); - iterateSymbolGroups( - File, PrintScope{P, 2}, [&](uint32_t I, const SymbolGroup &Strings) { + return iterateSymbolGroups( + File, PrintScope{P, 2}, + [&](uint32_t I, const SymbolGroup &Strings) -> Error { auto ExpectedModS = getModuleDebugStream(File.pdb(), I); if (!ExpectedModS) { P.formatLine("Error loading module stream {0}. {1}", I, toString(ExpectedModS.takeError())); - return; + return Error::success(); } ModuleDebugStreamRef &ModS = *ExpectedModS; @@ -1593,14 +1485,25 @@ Error DumpOutputStyle::dumpModuleSymsForPdb() { Pipeline.addCallbackToPipeline(Dumper); CVSymbolVisitor Visitor(Pipeline); auto SS = ModS.getSymbolsSubstream(); - if (auto EC = - Visitor.visitSymbolStream(ModS.getSymbolArray(), SS.Offset)) { + if (opts::Filters.SymbolOffset) { + CVSymbolVisitor::FilterOptions Filter; + Filter.SymbolOffset = opts::Filters.SymbolOffset; + Filter.ParentRecursiveDepth = opts::Filters.ParentRecurseDepth; + Filter.ChildRecursiveDepth = opts::Filters.ChildrenRecurseDepth; + if (auto EC = Visitor.visitSymbolStreamFiltered(ModS.getSymbolArray(), + Filter)) { + P.formatLine("Error while processing symbol records. {0}", + toString(std::move(EC))); + return EC; + } + } else if (auto EC = Visitor.visitSymbolStream(ModS.getSymbolArray(), + SS.Offset)) { P.formatLine("Error while processing symbol records. {0}", toString(std::move(EC))); - return; + return EC; } + return Error::success(); }); - return Error::success(); } Error DumpOutputStyle::dumpTypeRefStats() { @@ -1925,7 +1828,7 @@ Error DumpOutputStyle::dumpSectionContribs() { AutoIndent Indent(P); ExitOnError Err("Error dumping section contributions: "); - auto &Dbi = Err(getPdb().getPDBDbiStream()); + DbiStream &Dbi = Err(getPdb().getPDBDbiStream()); class Visitor : public ISectionContribVisitor { public: @@ -1948,8 +1851,11 @@ Error DumpOutputStyle::dumpSectionContribs() { ArrayRef Names; }; - std::vector Names = getSectionNames(getPdb()); - Visitor V(P, makeArrayRef(Names)); + auto NamesOrErr = getSectionNames(getPdb()); + if (!NamesOrErr) + return NamesOrErr.takeError(); + ArrayRef Names = *NamesOrErr; + Visitor V(P, Names); Dbi.visitSectionContributions(V); return Error::success(); } @@ -1970,7 +1876,7 @@ Error DumpOutputStyle::dumpSectionMap() { AutoIndent Indent(P); ExitOnError Err("Error dumping section map: "); - auto &Dbi = Err(getPdb().getPDBDbiStream()); + DbiStream &Dbi = Err(getPdb().getPDBDbiStream()); uint32_t I = 0; for (auto &M : Dbi.getSectionMap()) { diff --git a/llvm/tools/llvm-pdbutil/DumpOutputStyle.h b/llvm/tools/llvm-pdbutil/DumpOutputStyle.h index 041fb93a18a5..217d25d66d8b 100644 --- a/llvm/tools/llvm-pdbutil/DumpOutputStyle.h +++ b/llvm/tools/llvm-pdbutil/DumpOutputStyle.h @@ -9,13 +9,13 @@ #ifndef LLVM_TOOLS_LLVMPDBDUMP_DUMPOUTPUTSTYLE_H #define LLVM_TOOLS_LLVMPDBDUMP_DUMPOUTPUTSTYLE_H -#include "LinePrinter.h" #include "OutputStyle.h" #include "StreamUtil.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include diff --git a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp index b631bdf8f2b1..13a5f6ea6fe7 100644 --- a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp +++ b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp @@ -8,17 +8,20 @@ #include "ExplainOutputStyle.h" -#include "FormatUtil.h" -#include "InputFile.h" #include "StreamUtil.h" #include "llvm-pdbutil.h" #include "llvm/DebugInfo/CodeView/Formatters.h" +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" +#include "llvm/DebugInfo/PDB/Native/FormatUtil.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/PDB/Native/InputFile.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Object/COFF.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/Error.h" @@ -29,7 +32,7 @@ using namespace llvm::msf; using namespace llvm::pdb; ExplainOutputStyle::ExplainOutputStyle(InputFile &File, uint64_t FileOffset) - : File(File), FileOffset(FileOffset), P(2, false, outs()) {} + : File(File), FileOffset(FileOffset), P(2, false, outs(), opts::Filters) {} Error ExplainOutputStyle::dump() { P.formatLine("Explaining file offset {0} of file '{1}'.", FileOffset, diff --git a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.h b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.h index f405cf615e92..e3d19f25a9ea 100644 --- a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.h +++ b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.h @@ -9,9 +9,10 @@ #ifndef LLVM_TOOLS_LLVMPDBDUMP_EXPLAINOUTPUTSTYLE_H #define LLVM_TOOLS_LLVMPDBDUMP_EXPLAINOUTPUTSTYLE_H -#include "LinePrinter.h" #include "OutputStyle.h" +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" + #include namespace llvm { diff --git a/llvm/tools/llvm-pdbutil/FormatUtil.cpp b/llvm/tools/llvm-pdbutil/FormatUtil.cpp deleted file mode 100644 index b4837398f1d0..000000000000 --- a/llvm/tools/llvm-pdbutil/FormatUtil.cpp +++ /dev/null @@ -1,258 +0,0 @@ -//===- FormatUtil.cpp ----------------------------------------- *- C++ --*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "FormatUtil.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/BinaryFormat/COFF.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/Support/FormatAdapters.h" -#include "llvm/Support/FormatVariadic.h" - -using namespace llvm; -using namespace llvm::codeview; -using namespace llvm::pdb; - -std::string llvm::pdb::truncateStringBack(StringRef S, uint32_t MaxLen) { - if (MaxLen == 0 || S.size() <= MaxLen || S.size() <= 3) - return std::string(S); - - assert(MaxLen >= 3); - uint32_t FinalLen = std::min(S.size(), MaxLen - 3); - S = S.take_front(FinalLen); - return std::string(S) + std::string("..."); -} - -std::string llvm::pdb::truncateStringMiddle(StringRef S, uint32_t MaxLen) { - if (MaxLen == 0 || S.size() <= MaxLen || S.size() <= 3) - return std::string(S); - - assert(MaxLen >= 3); - uint32_t FinalLen = std::min(S.size(), MaxLen - 3); - StringRef Front = S.take_front(FinalLen / 2); - StringRef Back = S.take_back(Front.size()); - return std::string(Front) + std::string("...") + std::string(Back); -} - -std::string llvm::pdb::truncateStringFront(StringRef S, uint32_t MaxLen) { - if (MaxLen == 0 || S.size() <= MaxLen || S.size() <= 3) - return std::string(S); - - assert(MaxLen >= 3); - S = S.take_back(MaxLen - 3); - return std::string("...") + std::string(S); -} - -std::string llvm::pdb::truncateQuotedNameFront(StringRef Label, StringRef Name, - uint32_t MaxLen) { - uint32_t RequiredExtraChars = Label.size() + 1 + 2; - if (MaxLen == 0 || RequiredExtraChars + Name.size() <= MaxLen) - return formatv("{0} \"{1}\"", Label, Name).str(); - - assert(MaxLen >= RequiredExtraChars); - std::string TN = truncateStringFront(Name, MaxLen - RequiredExtraChars); - return formatv("{0} \"{1}\"", Label, TN).str(); -} - -std::string llvm::pdb::truncateQuotedNameBack(StringRef Label, StringRef Name, - uint32_t MaxLen) { - uint32_t RequiredExtraChars = Label.size() + 1 + 2; - if (MaxLen == 0 || RequiredExtraChars + Name.size() <= MaxLen) - return formatv("{0} \"{1}\"", Label, Name).str(); - - assert(MaxLen >= RequiredExtraChars); - std::string TN = truncateStringBack(Name, MaxLen - RequiredExtraChars); - return formatv("{0} \"{1}\"", Label, TN).str(); -} - -std::string llvm::pdb::typesetItemList(ArrayRef Opts, - uint32_t IndentLevel, uint32_t GroupSize, - StringRef Sep) { - std::string Result; - while (!Opts.empty()) { - ArrayRef ThisGroup; - ThisGroup = Opts.take_front(GroupSize); - Opts = Opts.drop_front(ThisGroup.size()); - Result += join(ThisGroup, Sep); - if (!Opts.empty()) { - Result += Sep; - Result += "\n"; - Result += std::string(formatv("{0}", fmt_repeat(' ', IndentLevel))); - } - } - return Result; -} - -std::string llvm::pdb::typesetStringList(uint32_t IndentLevel, - ArrayRef Strings) { - std::string Result = "["; - for (const auto &S : Strings) { - Result += std::string(formatv("\n{0}{1}", fmt_repeat(' ', IndentLevel), S)); - } - Result += "]"; - return Result; -} - -std::string llvm::pdb::formatChunkKind(DebugSubsectionKind Kind, - bool Friendly) { - if (Friendly) { - switch (Kind) { - RETURN_CASE(DebugSubsectionKind, None, "none"); - RETURN_CASE(DebugSubsectionKind, Symbols, "symbols"); - RETURN_CASE(DebugSubsectionKind, Lines, "lines"); - RETURN_CASE(DebugSubsectionKind, StringTable, "strings"); - RETURN_CASE(DebugSubsectionKind, FileChecksums, "checksums"); - RETURN_CASE(DebugSubsectionKind, FrameData, "frames"); - RETURN_CASE(DebugSubsectionKind, InlineeLines, "inlinee lines"); - RETURN_CASE(DebugSubsectionKind, CrossScopeImports, "xmi"); - RETURN_CASE(DebugSubsectionKind, CrossScopeExports, "xme"); - RETURN_CASE(DebugSubsectionKind, ILLines, "il lines"); - RETURN_CASE(DebugSubsectionKind, FuncMDTokenMap, "func md token map"); - RETURN_CASE(DebugSubsectionKind, TypeMDTokenMap, "type md token map"); - RETURN_CASE(DebugSubsectionKind, MergedAssemblyInput, - "merged assembly input"); - RETURN_CASE(DebugSubsectionKind, CoffSymbolRVA, "coff symbol rva"); - } - } else { - switch (Kind) { - RETURN_CASE(DebugSubsectionKind, None, "none"); - RETURN_CASE(DebugSubsectionKind, Symbols, "DEBUG_S_SYMBOLS"); - RETURN_CASE(DebugSubsectionKind, Lines, "DEBUG_S_LINES"); - RETURN_CASE(DebugSubsectionKind, StringTable, "DEBUG_S_STRINGTABLE"); - RETURN_CASE(DebugSubsectionKind, FileChecksums, "DEBUG_S_FILECHKSMS"); - RETURN_CASE(DebugSubsectionKind, FrameData, "DEBUG_S_FRAMEDATA"); - RETURN_CASE(DebugSubsectionKind, InlineeLines, "DEBUG_S_INLINEELINES"); - RETURN_CASE(DebugSubsectionKind, CrossScopeImports, - "DEBUG_S_CROSSSCOPEIMPORTS"); - RETURN_CASE(DebugSubsectionKind, CrossScopeExports, - "DEBUG_S_CROSSSCOPEEXPORTS"); - RETURN_CASE(DebugSubsectionKind, ILLines, "DEBUG_S_IL_LINES"); - RETURN_CASE(DebugSubsectionKind, FuncMDTokenMap, - "DEBUG_S_FUNC_MDTOKEN_MAP"); - RETURN_CASE(DebugSubsectionKind, TypeMDTokenMap, - "DEBUG_S_TYPE_MDTOKEN_MAP"); - RETURN_CASE(DebugSubsectionKind, MergedAssemblyInput, - "DEBUG_S_MERGED_ASSEMBLYINPUT"); - RETURN_CASE(DebugSubsectionKind, CoffSymbolRVA, - "DEBUG_S_COFF_SYMBOL_RVA"); - } - } - return formatUnknownEnum(Kind); -} - -std::string llvm::pdb::formatSymbolKind(SymbolKind K) { - switch (uint32_t(K)) { -#define SYMBOL_RECORD(EnumName, value, name) \ - case EnumName: \ - return #EnumName; -#define CV_SYMBOL(EnumName, value) SYMBOL_RECORD(EnumName, value, EnumName) -#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" - } - return formatUnknownEnum(K); -} - -std::string llvm::pdb::formatTypeLeafKind(TypeLeafKind K) { - switch (K) { -#define TYPE_RECORD(EnumName, value, name) \ - case EnumName: \ - return #EnumName; -#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" - default: - return formatv("UNKNOWN RECORD ({0:X})", - static_cast>(K)) - .str(); - } -} - -std::string llvm::pdb::formatSegmentOffset(uint16_t Segment, uint32_t Offset) { - return std::string(formatv("{0:4}:{1:4}", Segment, Offset)); -} - -#define PUSH_CHARACTERISTIC_FLAG(Enum, TheOpt, Value, Style, Descriptive) \ - PUSH_FLAG(Enum, TheOpt, Value, \ - ((Style == CharacteristicStyle::HeaderDefinition) ? #TheOpt \ - : Descriptive)) - -#define PUSH_MASKED_CHARACTERISTIC_FLAG(Enum, Mask, TheOpt, Value, Style, \ - Descriptive) \ - PUSH_MASKED_FLAG(Enum, Mask, TheOpt, Value, \ - ((Style == CharacteristicStyle::HeaderDefinition) \ - ? #TheOpt \ - : Descriptive)) - -std::string llvm::pdb::formatSectionCharacteristics(uint32_t IndentLevel, - uint32_t C, - uint32_t FlagsPerLine, - StringRef Separator, - CharacteristicStyle Style) { - using SC = COFF::SectionCharacteristics; - std::vector Opts; - if (C == COFF::SC_Invalid) - return "invalid"; - if (C == 0) - return "none"; - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_TYPE_NOLOAD, C, Style, "noload"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_TYPE_NO_PAD, C, Style, "no padding"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_CNT_CODE, C, Style, "code"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_CNT_INITIALIZED_DATA, C, Style, - "initialized data"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_CNT_UNINITIALIZED_DATA, C, Style, - "uninitialized data"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_LNK_OTHER, C, Style, "other"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_LNK_INFO, C, Style, "info"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_LNK_REMOVE, C, Style, "remove"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_LNK_COMDAT, C, Style, "comdat"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_GPREL, C, Style, "gp rel"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_PURGEABLE, C, Style, "purgeable"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_16BIT, C, Style, "16-bit"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_LOCKED, C, Style, "locked"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_PRELOAD, C, Style, "preload"); - PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_1BYTES, C, - Style, "1 byte align"); - PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_2BYTES, C, - Style, "2 byte align"); - PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_4BYTES, C, - Style, "4 byte align"); - PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_8BYTES, C, - Style, "8 byte align"); - PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_16BYTES, C, - Style, "16 byte align"); - PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_32BYTES, C, - Style, "32 byte align"); - PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_64BYTES, C, - Style, "64 byte align"); - PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_128BYTES, C, - Style, "128 byte align"); - PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_256BYTES, C, - Style, "256 byte align"); - PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_512BYTES, C, - Style, "512 byte align"); - PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_1024BYTES, C, - Style, "1024 byte align"); - PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_2048BYTES, C, - Style, "2048 byte align"); - PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_4096BYTES, C, - Style, "4096 byte align"); - PUSH_MASKED_CHARACTERISTIC_FLAG(SC, 0xF00000, IMAGE_SCN_ALIGN_8192BYTES, C, - Style, "8192 byte align"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_LNK_NRELOC_OVFL, C, Style, - "noreloc overflow"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_DISCARDABLE, C, Style, - "discardable"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_NOT_CACHED, C, Style, - "not cached"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_NOT_PAGED, C, Style, "not paged"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_SHARED, C, Style, "shared"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_EXECUTE, C, Style, - "execute permissions"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_READ, C, Style, - "read permissions"); - PUSH_CHARACTERISTIC_FLAG(SC, IMAGE_SCN_MEM_WRITE, C, Style, - "write permissions"); - return typesetItemList(Opts, IndentLevel, FlagsPerLine, Separator); -} diff --git a/llvm/tools/llvm-pdbutil/FormatUtil.h b/llvm/tools/llvm-pdbutil/FormatUtil.h deleted file mode 100644 index b99ccec215b5..000000000000 --- a/llvm/tools/llvm-pdbutil/FormatUtil.h +++ /dev/null @@ -1,141 +0,0 @@ -//===- FormatUtil.h ------------------------------------------- *- C++ --*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVMPDBUTIL_FORMAT_UTIL_H -#define LLVM_TOOLS_LLVMPDBUTIL_FORMAT_UTIL_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/FormatAdapters.h" -#include "llvm/Support/FormatVariadic.h" - -#include -#include - -namespace llvm { -namespace pdb { - -std::string truncateStringBack(StringRef S, uint32_t MaxLen); -std::string truncateStringMiddle(StringRef S, uint32_t MaxLen); -std::string truncateStringFront(StringRef S, uint32_t MaxLen); -std::string truncateQuotedNameFront(StringRef Label, StringRef Name, - uint32_t MaxLen); -std::string truncateQuotedNameBack(StringRef Label, StringRef Name, - uint32_t MaxLen); - -#define PUSH_MASKED_FLAG(Enum, Mask, TheOpt, Value, Text) \ - if (Enum::TheOpt == (Value & Mask)) \ - Opts.push_back(Text); - -#define PUSH_FLAG(Enum, TheOpt, Value, Text) \ - PUSH_MASKED_FLAG(Enum, Enum::TheOpt, TheOpt, Value, Text) - -#define RETURN_CASE(Enum, X, Ret) \ - case Enum::X: \ - return Ret; - -template std::string formatUnknownEnum(T Value) { - return formatv("unknown ({0})", static_cast>(Value)) - .str(); -} - -std::string formatSegmentOffset(uint16_t Segment, uint32_t Offset); - -enum class CharacteristicStyle { - HeaderDefinition, // format as windows header definition - Descriptive, // format as human readable words -}; -std::string formatSectionCharacteristics( - uint32_t IndentLevel, uint32_t C, uint32_t FlagsPerLine, - StringRef Separator, - CharacteristicStyle Style = CharacteristicStyle::HeaderDefinition); - -std::string typesetItemList(ArrayRef Opts, uint32_t IndentLevel, - uint32_t GroupSize, StringRef Sep); - -std::string typesetStringList(uint32_t IndentLevel, - ArrayRef Strings); - -std::string formatChunkKind(codeview::DebugSubsectionKind Kind, - bool Friendly = true); -std::string formatSymbolKind(codeview::SymbolKind K); -std::string formatTypeLeafKind(codeview::TypeLeafKind K); - -/// Returns the number of digits in the given integer. -inline int NumDigits(uint64_t N) { - if (N < 10ULL) - return 1; - if (N < 100ULL) - return 2; - if (N < 1000ULL) - return 3; - if (N < 10000ULL) - return 4; - if (N < 100000ULL) - return 5; - if (N < 1000000ULL) - return 6; - if (N < 10000000ULL) - return 7; - if (N < 100000000ULL) - return 8; - if (N < 1000000000ULL) - return 9; - if (N < 10000000000ULL) - return 10; - if (N < 100000000000ULL) - return 11; - if (N < 1000000000000ULL) - return 12; - if (N < 10000000000000ULL) - return 13; - if (N < 100000000000000ULL) - return 14; - if (N < 1000000000000000ULL) - return 15; - if (N < 10000000000000000ULL) - return 16; - if (N < 100000000000000000ULL) - return 17; - if (N < 1000000000000000000ULL) - return 18; - if (N < 10000000000000000000ULL) - return 19; - return 20; -} - -namespace detail { -template -struct EndianAdapter final - : public FormatAdapter> { - using EndianType = - support::detail::packed_endian_specific_integral; - - explicit EndianAdapter(EndianType &&Item) - : FormatAdapter(std::move(Item)) {} - - void format(llvm::raw_ostream &Stream, StringRef Style) override { - format_provider::format(static_cast(this->Item), Stream, Style); - } -}; -} // namespace detail - -template -detail::EndianAdapter -fmtle(support::detail::packed_endian_specific_integral - Value) { - return detail::EndianAdapter(std::move(Value)); -} -} -} // namespace llvm -#endif diff --git a/llvm/tools/llvm-pdbutil/InputFile.cpp b/llvm/tools/llvm-pdbutil/InputFile.cpp deleted file mode 100644 index 40b35625b6f8..000000000000 --- a/llvm/tools/llvm-pdbutil/InputFile.cpp +++ /dev/null @@ -1,510 +0,0 @@ -//===- InputFile.cpp ------------------------------------------ *- C++ --*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "InputFile.h" - -#include "FormatUtil.h" -#include "LinePrinter.h" - -#include "llvm/BinaryFormat/Magic.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" -#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" -#include "llvm/DebugInfo/PDB/Native/DbiStream.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" -#include "llvm/DebugInfo/PDB/Native/PDBFile.h" -#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" -#include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/TpiStream.h" -#include "llvm/DebugInfo/PDB/PDB.h" -#include "llvm/Object/COFF.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/FormatVariadic.h" - -using namespace llvm; -using namespace llvm::codeview; -using namespace llvm::object; -using namespace llvm::pdb; - -InputFile::InputFile() {} -InputFile::~InputFile() {} - -static Expected -getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) { - ExitOnError Err("Unexpected error: "); - - auto &Dbi = Err(File.getPDBDbiStream()); - const auto &Modules = Dbi.modules(); - if (Index >= Modules.getModuleCount()) - return make_error(raw_error_code::index_out_of_bounds, - "Invalid module index"); - - auto Modi = Modules.getModuleDescriptor(Index); - - ModuleName = Modi.getModuleName(); - - uint16_t ModiStream = Modi.getModuleStreamIndex(); - if (ModiStream == kInvalidStreamIndex) - return make_error(raw_error_code::no_stream, - "Module stream not present"); - - auto ModStreamData = File.createIndexedStream(ModiStream); - - ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); - if (auto EC = ModS.reload()) - return make_error(raw_error_code::corrupt_file, - "Invalid module stream"); - - return std::move(ModS); -} - -static inline bool isCodeViewDebugSubsection(object::SectionRef Section, - StringRef Name, - BinaryStreamReader &Reader) { - if (Expected NameOrErr = Section.getName()) { - if (*NameOrErr != Name) - return false; - } else { - consumeError(NameOrErr.takeError()); - return false; - } - - Expected ContentsOrErr = Section.getContents(); - if (!ContentsOrErr) { - consumeError(ContentsOrErr.takeError()); - return false; - } - - Reader = BinaryStreamReader(*ContentsOrErr, support::little); - uint32_t Magic; - if (Reader.bytesRemaining() < sizeof(uint32_t)) - return false; - cantFail(Reader.readInteger(Magic)); - if (Magic != COFF::DEBUG_SECTION_MAGIC) - return false; - return true; -} - -static inline bool isDebugSSection(object::SectionRef Section, - DebugSubsectionArray &Subsections) { - BinaryStreamReader Reader; - if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader)) - return false; - - cantFail(Reader.readArray(Subsections, Reader.bytesRemaining())); - return true; -} - -static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) { - BinaryStreamReader Reader; - if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) && - !isCodeViewDebugSubsection(Section, ".debug$P", Reader)) - return false; - cantFail(Reader.readArray(Types, Reader.bytesRemaining())); - return true; -} - -static std::string formatChecksumKind(FileChecksumKind Kind) { - switch (Kind) { - RETURN_CASE(FileChecksumKind, None, "None"); - RETURN_CASE(FileChecksumKind, MD5, "MD5"); - RETURN_CASE(FileChecksumKind, SHA1, "SHA-1"); - RETURN_CASE(FileChecksumKind, SHA256, "SHA-256"); - } - return formatUnknownEnum(Kind); -} - -template -static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) { - if (Append) - Printer.format(std::forward(args)...); - else - Printer.formatLine(std::forward(args)...); -} - -SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) { - if (!File) - return; - - if (File->isPdb()) - initializeForPdb(GroupIndex); - else { - Name = ".debug$S"; - uint32_t I = 0; - for (const auto &S : File->obj().sections()) { - DebugSubsectionArray SS; - if (!isDebugSSection(S, SS)) - continue; - - if (!SC.hasChecksums() || !SC.hasStrings()) - SC.initialize(SS); - - if (I == GroupIndex) - Subsections = SS; - - if (SC.hasChecksums() && SC.hasStrings()) - break; - } - rebuildChecksumMap(); - } -} - -StringRef SymbolGroup::name() const { return Name; } - -void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) { - Subsections = SS; -} - -void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); } - -void SymbolGroup::initializeForPdb(uint32_t Modi) { - assert(File && File->isPdb()); - - // PDB always uses the same string table, but each module has its own - // checksums. So we only set the strings if they're not already set. - if (!SC.hasStrings()) { - auto StringTable = File->pdb().getStringTable(); - if (StringTable) - SC.setStrings(StringTable->getStringTable()); - else - consumeError(StringTable.takeError()); - } - - SC.resetChecksums(); - auto MDS = getModuleDebugStream(File->pdb(), Name, Modi); - if (!MDS) { - consumeError(MDS.takeError()); - return; - } - - DebugStream = std::make_shared(std::move(*MDS)); - Subsections = DebugStream->getSubsectionsArray(); - SC.initialize(Subsections); - rebuildChecksumMap(); -} - -void SymbolGroup::rebuildChecksumMap() { - if (!SC.hasChecksums()) - return; - - for (const auto &Entry : SC.checksums()) { - auto S = SC.strings().getString(Entry.FileNameOffset); - if (!S) - continue; - ChecksumsByFile[*S] = Entry; - } -} - -const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const { - assert(File && File->isPdb() && DebugStream); - return *DebugStream; -} - -Expected SymbolGroup::getNameFromStringTable(uint32_t Offset) const { - return SC.strings().getString(Offset); -} - -void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File, - bool Append) const { - auto FC = ChecksumsByFile.find(File); - if (FC == ChecksumsByFile.end()) { - formatInternal(Printer, Append, "- (no checksum) {0}", File); - return; - } - - formatInternal(Printer, Append, "- ({0}: {1}) {2}", - formatChecksumKind(FC->getValue().Kind), - toHex(FC->getValue().Checksum), File); -} - -void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer, - uint32_t Offset, - bool Append) const { - if (!SC.hasChecksums()) { - formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); - return; - } - - auto Iter = SC.checksums().getArray().at(Offset); - if (Iter == SC.checksums().getArray().end()) { - formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); - return; - } - - uint32_t FO = Iter->FileNameOffset; - auto ExpectedFile = getNameFromStringTable(FO); - if (!ExpectedFile) { - formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); - consumeError(ExpectedFile.takeError()); - return; - } - if (Iter->Kind == FileChecksumKind::None) { - formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile); - } else { - formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile, - formatChecksumKind(Iter->Kind), toHex(Iter->Checksum)); - } -} - -Expected InputFile::open(StringRef Path, bool AllowUnknownFile) { - InputFile IF; - if (!llvm::sys::fs::exists(Path)) - return make_error(formatv("File {0} not found", Path), - inconvertibleErrorCode()); - - file_magic Magic; - if (auto EC = identify_magic(Path, Magic)) - return make_error( - formatv("Unable to identify file type for file {0}", Path), EC); - - if (Magic == file_magic::coff_object) { - Expected> BinaryOrErr = createBinary(Path); - if (!BinaryOrErr) - return BinaryOrErr.takeError(); - - IF.CoffObject = std::move(*BinaryOrErr); - IF.PdbOrObj = llvm::cast(IF.CoffObject.getBinary()); - return std::move(IF); - } - - if (Magic == file_magic::pdb) { - std::unique_ptr Session; - if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session)) - return std::move(Err); - - IF.PdbSession.reset(static_cast(Session.release())); - IF.PdbOrObj = &IF.PdbSession->getPDBFile(); - - return std::move(IF); - } - - if (!AllowUnknownFile) - return make_error( - formatv("File {0} is not a supported file type", Path), - inconvertibleErrorCode()); - - auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false, - /*RequiresNullTerminator=*/false); - if (!Result) - return make_error( - formatv("File {0} could not be opened", Path), Result.getError()); - - IF.UnknownFile = std::move(*Result); - IF.PdbOrObj = IF.UnknownFile.get(); - return std::move(IF); -} - -PDBFile &InputFile::pdb() { - assert(isPdb()); - return *PdbOrObj.get(); -} - -const PDBFile &InputFile::pdb() const { - assert(isPdb()); - return *PdbOrObj.get(); -} - -object::COFFObjectFile &InputFile::obj() { - assert(isObj()); - return *PdbOrObj.get(); -} - -const object::COFFObjectFile &InputFile::obj() const { - assert(isObj()); - return *PdbOrObj.get(); -} - -MemoryBuffer &InputFile::unknown() { - assert(isUnknown()); - return *PdbOrObj.get(); -} - -const MemoryBuffer &InputFile::unknown() const { - assert(isUnknown()); - return *PdbOrObj.get(); -} - -StringRef InputFile::getFilePath() const { - if (isPdb()) - return pdb().getFilePath(); - if (isObj()) - return obj().getFileName(); - assert(isUnknown()); - return unknown().getBufferIdentifier(); -} - -bool InputFile::hasTypes() const { - if (isPdb()) - return pdb().hasPDBTpiStream(); - - for (const auto &Section : obj().sections()) { - CVTypeArray Types; - if (isDebugTSection(Section, Types)) - return true; - } - return false; -} - -bool InputFile::hasIds() const { - if (isObj()) - return false; - return pdb().hasPDBIpiStream(); -} - -bool InputFile::isPdb() const { return PdbOrObj.is(); } - -bool InputFile::isObj() const { - return PdbOrObj.is(); -} - -bool InputFile::isUnknown() const { return PdbOrObj.is(); } - -codeview::LazyRandomTypeCollection & -InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { - if (Types && Kind == kTypes) - return *Types; - if (Ids && Kind == kIds) - return *Ids; - - if (Kind == kIds) { - assert(isPdb() && pdb().hasPDBIpiStream()); - } - - // If the collection was already initialized, we should have just returned it - // in step 1. - if (isPdb()) { - TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types; - auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream() - : pdb().getPDBTpiStream()); - - auto &Array = Stream.typeArray(); - uint32_t Count = Stream.getNumTypeRecords(); - auto Offsets = Stream.getTypeIndexOffsets(); - Collection = - std::make_unique(Array, Count, Offsets); - return *Collection; - } - - assert(isObj()); - assert(Kind == kTypes); - assert(!Types); - - for (const auto &Section : obj().sections()) { - CVTypeArray Records; - if (!isDebugTSection(Section, Records)) - continue; - - Types = std::make_unique(Records, 100); - return *Types; - } - - Types = std::make_unique(100); - return *Types; -} - -codeview::LazyRandomTypeCollection &InputFile::types() { - return getOrCreateTypeCollection(kTypes); -} - -codeview::LazyRandomTypeCollection &InputFile::ids() { - // Object files have only one type stream that contains both types and ids. - // Similarly, some PDBs don't contain an IPI stream, and for those both types - // and IDs are in the same stream. - if (isObj() || !pdb().hasPDBIpiStream()) - return types(); - - return getOrCreateTypeCollection(kIds); -} - -iterator_range InputFile::symbol_groups() { - return make_range(symbol_groups_begin(), - symbol_groups_end()); -} - -SymbolGroupIterator InputFile::symbol_groups_begin() { - return SymbolGroupIterator(*this); -} - -SymbolGroupIterator InputFile::symbol_groups_end() { - return SymbolGroupIterator(); -} - -SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {} - -SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) { - if (File.isObj()) { - SectionIter = File.obj().section_begin(); - scanToNextDebugS(); - } -} - -bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const { - bool E = isEnd(); - bool RE = R.isEnd(); - if (E || RE) - return E == RE; - - if (Value.File != R.Value.File) - return false; - return Index == R.Index; -} - -const SymbolGroup &SymbolGroupIterator::operator*() const { - assert(!isEnd()); - return Value; -} -SymbolGroup &SymbolGroupIterator::operator*() { - assert(!isEnd()); - return Value; -} - -SymbolGroupIterator &SymbolGroupIterator::operator++() { - assert(Value.File && !isEnd()); - ++Index; - if (isEnd()) - return *this; - - if (Value.File->isPdb()) { - Value.updatePdbModi(Index); - return *this; - } - - scanToNextDebugS(); - return *this; -} - -void SymbolGroupIterator::scanToNextDebugS() { - assert(SectionIter.hasValue()); - auto End = Value.File->obj().section_end(); - auto &Iter = *SectionIter; - assert(!isEnd()); - - while (++Iter != End) { - DebugSubsectionArray SS; - SectionRef SR = *Iter; - if (!isDebugSSection(SR, SS)) - continue; - - Value.updateDebugS(SS); - return; - } -} - -bool SymbolGroupIterator::isEnd() const { - if (!Value.File) - return true; - if (Value.File->isPdb()) { - auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream()); - uint32_t Count = Dbi.modules().getModuleCount(); - assert(Index <= Count); - return Index == Count; - } - - assert(SectionIter.hasValue()); - return *SectionIter == Value.File->obj().section_end(); -} diff --git a/llvm/tools/llvm-pdbutil/InputFile.h b/llvm/tools/llvm-pdbutil/InputFile.h deleted file mode 100644 index 633ab34a54d4..000000000000 --- a/llvm/tools/llvm-pdbutil/InputFile.h +++ /dev/null @@ -1,154 +0,0 @@ -//===- InputFile.h -------------------------------------------- *- C++ --*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVMPDBDUMP_INPUTFILE_H -#define LLVM_TOOLS_LLVMPDBDUMP_INPUTFILE_H - -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/PointerUnion.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/iterator.h" -#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" -#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" -#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" -#include "llvm/Object/Binary.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/Error.h" - -namespace llvm { -namespace codeview { -class LazyRandomTypeCollection; -} -namespace object { -class COFFObjectFile; -} // namespace object - -namespace pdb { -class InputFile; -class LinePrinter; -class PDBFile; -class NativeSession; -class SymbolGroupIterator; -class SymbolGroup; - -class InputFile { - InputFile(); - - std::unique_ptr PdbSession; - object::OwningBinary CoffObject; - std::unique_ptr UnknownFile; - PointerUnion PdbOrObj; - - using TypeCollectionPtr = std::unique_ptr; - - TypeCollectionPtr Types; - TypeCollectionPtr Ids; - - enum TypeCollectionKind { kTypes, kIds }; - codeview::LazyRandomTypeCollection & - getOrCreateTypeCollection(TypeCollectionKind Kind); - -public: - ~InputFile(); - InputFile(InputFile &&Other) = default; - - static Expected open(StringRef Path, - bool AllowUnknownFile = false); - - PDBFile &pdb(); - const PDBFile &pdb() const; - object::COFFObjectFile &obj(); - const object::COFFObjectFile &obj() const; - MemoryBuffer &unknown(); - const MemoryBuffer &unknown() const; - - StringRef getFilePath() const; - - bool hasTypes() const; - bool hasIds() const; - - codeview::LazyRandomTypeCollection &types(); - codeview::LazyRandomTypeCollection &ids(); - - iterator_range symbol_groups(); - SymbolGroupIterator symbol_groups_begin(); - SymbolGroupIterator symbol_groups_end(); - - bool isPdb() const; - bool isObj() const; - bool isUnknown() const; -}; - -class SymbolGroup { - friend class SymbolGroupIterator; - -public: - explicit SymbolGroup(InputFile *File, uint32_t GroupIndex = 0); - - Expected getNameFromStringTable(uint32_t Offset) const; - - void formatFromFileName(LinePrinter &Printer, StringRef File, - bool Append = false) const; - - void formatFromChecksumsOffset(LinePrinter &Printer, uint32_t Offset, - bool Append = false) const; - - StringRef name() const; - - codeview::DebugSubsectionArray getDebugSubsections() const { - return Subsections; - } - const ModuleDebugStreamRef &getPdbModuleStream() const; - - const InputFile &getFile() const { return *File; } - InputFile &getFile() { return *File; } - - bool hasDebugStream() const { return DebugStream != nullptr; } - -private: - void initializeForPdb(uint32_t Modi); - void updatePdbModi(uint32_t Modi); - void updateDebugS(const codeview::DebugSubsectionArray &SS); - - void rebuildChecksumMap(); - InputFile *File = nullptr; - StringRef Name; - codeview::DebugSubsectionArray Subsections; - std::shared_ptr DebugStream; - codeview::StringsAndChecksumsRef SC; - StringMap ChecksumsByFile; -}; - -class SymbolGroupIterator - : public iterator_facade_base { -public: - SymbolGroupIterator(); - explicit SymbolGroupIterator(InputFile &File); - SymbolGroupIterator(const SymbolGroupIterator &Other) = default; - SymbolGroupIterator &operator=(const SymbolGroupIterator &R) = default; - - const SymbolGroup &operator*() const; - SymbolGroup &operator*(); - - bool operator==(const SymbolGroupIterator &R) const; - SymbolGroupIterator &operator++(); - -private: - void scanToNextDebugS(); - bool isEnd() const; - - uint32_t Index = 0; - Optional SectionIter; - SymbolGroup Value; -}; - -} // namespace pdb -} // namespace llvm - -#endif diff --git a/llvm/tools/llvm-pdbutil/LinePrinter.cpp b/llvm/tools/llvm-pdbutil/LinePrinter.cpp deleted file mode 100644 index dd6ca5bf41b1..000000000000 --- a/llvm/tools/llvm-pdbutil/LinePrinter.cpp +++ /dev/null @@ -1,335 +0,0 @@ -//===- LinePrinter.cpp ------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "LinePrinter.h" - -#include "llvm-pdbutil.h" - -#include "llvm/ADT/STLExtras.h" -#include "llvm/DebugInfo/MSF/MSFCommon.h" -#include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/DebugInfo/PDB/Native/PDBFile.h" -#include "llvm/DebugInfo/PDB/UDTLayout.h" -#include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/FormatAdapters.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/Regex.h" - -#include - -using namespace llvm; -using namespace llvm::msf; -using namespace llvm::pdb; - -namespace { -bool IsItemExcluded(llvm::StringRef Item, - std::list &IncludeFilters, - std::list &ExcludeFilters) { - if (Item.empty()) - return false; - - auto match_pred = [Item](llvm::Regex &R) { return R.match(Item); }; - - // Include takes priority over exclude. If the user specified include - // filters, and none of them include this item, them item is gone. - if (!IncludeFilters.empty() && !any_of(IncludeFilters, match_pred)) - return true; - - if (any_of(ExcludeFilters, match_pred)) - return true; - - return false; -} -} - -using namespace llvm; - -LinePrinter::LinePrinter(int Indent, bool UseColor, llvm::raw_ostream &Stream) - : OS(Stream), IndentSpaces(Indent), CurrentIndent(0), UseColor(UseColor) { - SetFilters(ExcludeTypeFilters, opts::pretty::ExcludeTypes.begin(), - opts::pretty::ExcludeTypes.end()); - SetFilters(ExcludeSymbolFilters, opts::pretty::ExcludeSymbols.begin(), - opts::pretty::ExcludeSymbols.end()); - SetFilters(ExcludeCompilandFilters, opts::pretty::ExcludeCompilands.begin(), - opts::pretty::ExcludeCompilands.end()); - - SetFilters(IncludeTypeFilters, opts::pretty::IncludeTypes.begin(), - opts::pretty::IncludeTypes.end()); - SetFilters(IncludeSymbolFilters, opts::pretty::IncludeSymbols.begin(), - opts::pretty::IncludeSymbols.end()); - SetFilters(IncludeCompilandFilters, opts::pretty::IncludeCompilands.begin(), - opts::pretty::IncludeCompilands.end()); -} - -void LinePrinter::Indent(uint32_t Amount) { - if (Amount == 0) - Amount = IndentSpaces; - CurrentIndent += Amount; -} - -void LinePrinter::Unindent(uint32_t Amount) { - if (Amount == 0) - Amount = IndentSpaces; - CurrentIndent = std::max(0, CurrentIndent - Amount); -} - -void LinePrinter::NewLine() { - OS << "\n"; - OS.indent(CurrentIndent); -} - -void LinePrinter::print(const Twine &T) { OS << T; } - -void LinePrinter::printLine(const Twine &T) { - NewLine(); - OS << T; -} - -bool LinePrinter::IsClassExcluded(const ClassLayout &Class) { - if (IsTypeExcluded(Class.getName(), Class.getSize())) - return true; - if (Class.deepPaddingSize() < opts::pretty::PaddingThreshold) - return true; - return false; -} - -void LinePrinter::formatBinary(StringRef Label, ArrayRef Data, - uint64_t StartOffset) { - NewLine(); - OS << Label << " ("; - if (!Data.empty()) { - OS << "\n"; - OS << format_bytes_with_ascii(Data, StartOffset, 32, 4, - CurrentIndent + IndentSpaces, true); - NewLine(); - } - OS << ")"; -} - -void LinePrinter::formatBinary(StringRef Label, ArrayRef Data, - uint64_t Base, uint64_t StartOffset) { - NewLine(); - OS << Label << " ("; - if (!Data.empty()) { - OS << "\n"; - Base += StartOffset; - OS << format_bytes_with_ascii(Data, Base, 32, 4, - CurrentIndent + IndentSpaces, true); - NewLine(); - } - OS << ")"; -} - -namespace { -struct Run { - Run() = default; - explicit Run(uint32_t Block) : Block(Block) {} - uint32_t Block = 0; - uint64_t ByteLen = 0; -}; -} // namespace - -static std::vector computeBlockRuns(uint32_t BlockSize, - const msf::MSFStreamLayout &Layout) { - std::vector Runs; - if (Layout.Length == 0) - return Runs; - - ArrayRef Blocks = Layout.Blocks; - assert(!Blocks.empty()); - uint64_t StreamBytesRemaining = Layout.Length; - uint32_t CurrentBlock = Blocks[0]; - Runs.emplace_back(CurrentBlock); - while (!Blocks.empty()) { - Run *CurrentRun = &Runs.back(); - uint32_t NextBlock = Blocks.front(); - if (NextBlock < CurrentBlock || (NextBlock - CurrentBlock > 1)) { - Runs.emplace_back(NextBlock); - CurrentRun = &Runs.back(); - } - uint64_t Used = - std::min(static_cast(BlockSize), StreamBytesRemaining); - CurrentRun->ByteLen += Used; - StreamBytesRemaining -= Used; - CurrentBlock = NextBlock; - Blocks = Blocks.drop_front(); - } - return Runs; -} - -static std::pair findRun(uint64_t Offset, ArrayRef Runs) { - for (const auto &R : Runs) { - if (Offset < R.ByteLen) - return std::make_pair(R, Offset); - Offset -= R.ByteLen; - } - llvm_unreachable("Invalid offset!"); -} - -void LinePrinter::formatMsfStreamData(StringRef Label, PDBFile &File, - uint32_t StreamIdx, - StringRef StreamPurpose, uint64_t Offset, - uint64_t Size) { - if (StreamIdx >= File.getNumStreams()) { - formatLine("Stream {0}: Not present", StreamIdx); - return; - } - if (Size + Offset > File.getStreamByteSize(StreamIdx)) { - formatLine( - "Stream {0}: Invalid offset and size, range out of stream bounds", - StreamIdx); - return; - } - - auto S = File.createIndexedStream(StreamIdx); - if (!S) { - NewLine(); - formatLine("Stream {0}: Not present", StreamIdx); - return; - } - - uint64_t End = - (Size == 0) ? S->getLength() : std::min(Offset + Size, S->getLength()); - Size = End - Offset; - - formatLine("Stream {0}: {1} (dumping {2:N} / {3:N} bytes)", StreamIdx, - StreamPurpose, Size, S->getLength()); - AutoIndent Indent(*this); - BinaryStreamRef Slice(*S); - BinarySubstreamRef Substream; - Substream.Offset = Offset; - Substream.StreamData = Slice.drop_front(Offset).keep_front(Size); - - auto Layout = File.getStreamLayout(StreamIdx); - formatMsfStreamData(Label, File, Layout, Substream); -} - -void LinePrinter::formatMsfStreamData(StringRef Label, PDBFile &File, - const msf::MSFStreamLayout &Stream, - BinarySubstreamRef Substream) { - BinaryStreamReader Reader(Substream.StreamData); - - auto Runs = computeBlockRuns(File.getBlockSize(), Stream); - - NewLine(); - OS << Label << " ("; - while (Reader.bytesRemaining() > 0) { - OS << "\n"; - - Run FoundRun; - uint64_t RunOffset; - std::tie(FoundRun, RunOffset) = findRun(Substream.Offset, Runs); - assert(FoundRun.ByteLen >= RunOffset); - uint64_t Len = FoundRun.ByteLen - RunOffset; - Len = std::min(Len, Reader.bytesRemaining()); - uint64_t Base = FoundRun.Block * File.getBlockSize() + RunOffset; - ArrayRef Data; - consumeError(Reader.readBytes(Data, Len)); - OS << format_bytes_with_ascii(Data, Base, 32, 4, - CurrentIndent + IndentSpaces, true); - if (Reader.bytesRemaining() > 0) { - NewLine(); - OS << formatv(" {0}", - fmt_align("", AlignStyle::Center, 114, '-')); - } - Substream.Offset += Len; - } - NewLine(); - OS << ")"; -} - -void LinePrinter::formatMsfStreamBlocks( - PDBFile &File, const msf::MSFStreamLayout &StreamLayout) { - auto Blocks = makeArrayRef(StreamLayout.Blocks); - uint64_t L = StreamLayout.Length; - - while (L > 0) { - NewLine(); - assert(!Blocks.empty()); - OS << formatv("Block {0} (\n", uint32_t(Blocks.front())); - uint64_t UsedBytes = - std::min(L, static_cast(File.getBlockSize())); - ArrayRef BlockData = - cantFail(File.getBlockData(Blocks.front(), File.getBlockSize())); - uint64_t BaseOffset = Blocks.front(); - BaseOffset *= File.getBlockSize(); - OS << format_bytes_with_ascii(BlockData, BaseOffset, 32, 4, - CurrentIndent + IndentSpaces, true); - NewLine(); - OS << ")"; - NewLine(); - L -= UsedBytes; - Blocks = Blocks.drop_front(); - } -} - -bool LinePrinter::IsTypeExcluded(llvm::StringRef TypeName, uint64_t Size) { - if (IsItemExcluded(TypeName, IncludeTypeFilters, ExcludeTypeFilters)) - return true; - if (Size < opts::pretty::SizeThreshold) - return true; - return false; -} - -bool LinePrinter::IsSymbolExcluded(llvm::StringRef SymbolName) { - return IsItemExcluded(SymbolName, IncludeSymbolFilters, ExcludeSymbolFilters); -} - -bool LinePrinter::IsCompilandExcluded(llvm::StringRef CompilandName) { - return IsItemExcluded(CompilandName, IncludeCompilandFilters, - ExcludeCompilandFilters); -} - -WithColor::WithColor(LinePrinter &P, PDB_ColorItem C) - : OS(P.OS), UseColor(P.hasColor()) { - if (UseColor) - applyColor(C); -} - -WithColor::~WithColor() { - if (UseColor) - OS.resetColor(); -} - -void WithColor::applyColor(PDB_ColorItem C) { - switch (C) { - case PDB_ColorItem::None: - OS.resetColor(); - return; - case PDB_ColorItem::Comment: - OS.changeColor(raw_ostream::GREEN, false); - return; - case PDB_ColorItem::Address: - OS.changeColor(raw_ostream::YELLOW, /*bold=*/true); - return; - case PDB_ColorItem::Keyword: - OS.changeColor(raw_ostream::MAGENTA, true); - return; - case PDB_ColorItem::Register: - case PDB_ColorItem::Offset: - OS.changeColor(raw_ostream::YELLOW, false); - return; - case PDB_ColorItem::Type: - OS.changeColor(raw_ostream::CYAN, true); - return; - case PDB_ColorItem::Identifier: - OS.changeColor(raw_ostream::CYAN, false); - return; - case PDB_ColorItem::Path: - OS.changeColor(raw_ostream::CYAN, false); - return; - case PDB_ColorItem::Padding: - case PDB_ColorItem::SectionHeader: - OS.changeColor(raw_ostream::RED, true); - return; - case PDB_ColorItem::LiteralValue: - OS.changeColor(raw_ostream::GREEN, true); - return; - } -} diff --git a/llvm/tools/llvm-pdbutil/LinePrinter.h b/llvm/tools/llvm-pdbutil/LinePrinter.h deleted file mode 100644 index b6bb77280fd5..000000000000 --- a/llvm/tools/llvm-pdbutil/LinePrinter.h +++ /dev/null @@ -1,167 +0,0 @@ -//===- LinePrinter.h ------------------------------------------ *- C++ --*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVMPDBDUMP_LINEPRINTER_H -#define LLVM_TOOLS_LLVMPDBDUMP_LINEPRINTER_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/BinaryStreamRef.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/raw_ostream.h" - -#include - -namespace llvm { -namespace msf { -class MSFStreamLayout; -} // namespace msf -namespace pdb { - -class ClassLayout; -class PDBFile; - -class LinePrinter { - friend class WithColor; - -public: - LinePrinter(int Indent, bool UseColor, raw_ostream &Stream); - - void Indent(uint32_t Amount = 0); - void Unindent(uint32_t Amount = 0); - void NewLine(); - - void printLine(const Twine &T); - void print(const Twine &T); - template void formatLine(const char *Fmt, Ts &&... Items) { - printLine(formatv(Fmt, std::forward(Items)...)); - } - template void format(const char *Fmt, Ts &&... Items) { - print(formatv(Fmt, std::forward(Items)...)); - } - - void formatBinary(StringRef Label, ArrayRef Data, - uint64_t StartOffset); - void formatBinary(StringRef Label, ArrayRef Data, uint64_t BaseAddr, - uint64_t StartOffset); - - void formatMsfStreamData(StringRef Label, PDBFile &File, uint32_t StreamIdx, - StringRef StreamPurpose, uint64_t Offset, - uint64_t Size); - void formatMsfStreamData(StringRef Label, PDBFile &File, - const msf::MSFStreamLayout &Stream, - BinarySubstreamRef Substream); - void formatMsfStreamBlocks(PDBFile &File, const msf::MSFStreamLayout &Stream); - - bool hasColor() const { return UseColor; } - raw_ostream &getStream() { return OS; } - int getIndentLevel() const { return CurrentIndent; } - - bool IsClassExcluded(const ClassLayout &Class); - bool IsTypeExcluded(llvm::StringRef TypeName, uint64_t Size); - bool IsSymbolExcluded(llvm::StringRef SymbolName); - bool IsCompilandExcluded(llvm::StringRef CompilandName); - -private: - template - void SetFilters(std::list &List, Iter Begin, Iter End) { - List.clear(); - for (; Begin != End; ++Begin) - List.emplace_back(StringRef(*Begin)); - } - - raw_ostream &OS; - int IndentSpaces; - int CurrentIndent; - bool UseColor; - - std::list ExcludeCompilandFilters; - std::list ExcludeTypeFilters; - std::list ExcludeSymbolFilters; - - std::list IncludeCompilandFilters; - std::list IncludeTypeFilters; - std::list IncludeSymbolFilters; -}; - -struct PrintScope { - explicit PrintScope(LinePrinter &P, uint32_t IndentLevel) - : P(P), IndentLevel(IndentLevel) {} - explicit PrintScope(const PrintScope &Other, uint32_t LabelWidth) - : P(Other.P), IndentLevel(Other.IndentLevel), LabelWidth(LabelWidth) {} - - LinePrinter &P; - uint32_t IndentLevel; - uint32_t LabelWidth = 0; -}; - -inline Optional withLabelWidth(const Optional &Scope, - uint32_t W) { - if (!Scope) - return None; - return PrintScope{*Scope, W}; -} - -struct AutoIndent { - explicit AutoIndent(LinePrinter &L, uint32_t Amount = 0) - : L(&L), Amount(Amount) { - L.Indent(Amount); - } - explicit AutoIndent(const Optional &Scope) { - if (Scope.hasValue()) { - L = &Scope->P; - Amount = Scope->IndentLevel; - } - } - ~AutoIndent() { - if (L) - L->Unindent(Amount); - } - - LinePrinter *L = nullptr; - uint32_t Amount = 0; -}; - -template -inline raw_ostream &operator<<(LinePrinter &Printer, const T &Item) { - return Printer.getStream() << Item; -} - -enum class PDB_ColorItem { - None, - Address, - Type, - Comment, - Padding, - Keyword, - Offset, - Identifier, - Path, - SectionHeader, - LiteralValue, - Register, -}; - -class WithColor { -public: - WithColor(LinePrinter &P, PDB_ColorItem C); - ~WithColor(); - - raw_ostream &get() { return OS; } - -private: - void applyColor(PDB_ColorItem C); - raw_ostream &OS; - bool UseColor; -}; -} -} - -#endif diff --git a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp index e6b5d21f36e5..8e17284871a9 100644 --- a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp +++ b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp @@ -8,17 +8,19 @@ #include "MinimalSymbolDumper.h" -#include "FormatUtil.h" -#include "InputFile.h" -#include "LinePrinter.h" - #include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/Formatters.h" #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/PDB/Native/FormatUtil.h" +#include "llvm/DebugInfo/PDB/Native/InputFile.h" +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" +#include "llvm/Object/COFF.h" #include "llvm/Support/FormatVariadic.h" using namespace llvm; diff --git a/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp b/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp index 08006e9c62d4..be7e487673fb 100644 --- a/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp +++ b/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp @@ -8,8 +8,6 @@ #include "MinimalTypeDumper.h" -#include "FormatUtil.h" -#include "LinePrinter.h" #include "TypeReferenceTracker.h" #include "llvm-pdbutil.h" @@ -19,8 +17,13 @@ #include "llvm/DebugInfo/CodeView/Formatters.h" #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/PDB/Native/FormatUtil.h" +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/TpiHashing.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/Object/COFF.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" diff --git a/llvm/tools/llvm-pdbutil/OutputStyle.h b/llvm/tools/llvm-pdbutil/OutputStyle.h index da93c32053f3..8cc9016d79a2 100644 --- a/llvm/tools/llvm-pdbutil/OutputStyle.h +++ b/llvm/tools/llvm-pdbutil/OutputStyle.h @@ -9,9 +9,10 @@ #ifndef LLVM_TOOLS_LLVMPDBDUMP_OUTPUTSTYLE_H #define LLVM_TOOLS_LLVMPDBDUMP_OUTPUTSTYLE_H -#include "llvm/Support/Error.h" - namespace llvm { + +class Error; + namespace pdb { class OutputStyle { diff --git a/llvm/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp index cd01a4004819..895066146a9d 100644 --- a/llvm/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp +++ b/llvm/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "PrettyBuiltinDumper.h" -#include "LinePrinter.h" +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" using namespace llvm; @@ -90,6 +90,8 @@ StringRef BuiltinDumper::getTypeName(const PDBSymbolTypeBuiltin &Symbol) { return "char16_t"; case PDB_BuiltinType::Char32: return "char32_t"; + case PDB_BuiltinType::Char8: + return "char8_t"; case PDB_BuiltinType::None: return "..."; } diff --git a/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp index b7eccac5988c..2285ed16d2a5 100644 --- a/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp +++ b/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp @@ -8,13 +8,14 @@ #include "PrettyClassDefinitionDumper.h" -#include "LinePrinter.h" #include "PrettyClassLayoutGraphicalDumper.h" #include "llvm-pdbutil.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallString.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" #include "llvm/DebugInfo/PDB/UDTLayout.h" diff --git a/llvm/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp index a522935e34f1..1ade7f397030 100644 --- a/llvm/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp +++ b/llvm/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp @@ -8,7 +8,6 @@ #include "PrettyClassLayoutGraphicalDumper.h" -#include "LinePrinter.h" #include "PrettyClassDefinitionDumper.h" #include "PrettyEnumDumper.h" #include "PrettyFunctionDumper.h" @@ -17,8 +16,10 @@ #include "PrettyVariableDumper.h" #include "llvm-pdbutil.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/PDBSymbolData.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" #include "llvm/DebugInfo/PDB/UDTLayout.h" #include "llvm/Support/Format.h" diff --git a/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp index cf769ff66472..591bd4f93702 100644 --- a/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp +++ b/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp @@ -8,7 +8,6 @@ #include "PrettyCompilandDumper.h" -#include "LinePrinter.h" #include "PrettyFunctionDumper.h" #include "llvm-pdbutil.h" diff --git a/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp index 9ed5893f252e..64557ff09c72 100644 --- a/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp +++ b/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp @@ -8,10 +8,11 @@ #include "PrettyEnumDumper.h" -#include "LinePrinter.h" #include "PrettyBuiltinDumper.h" #include "llvm-pdbutil.h" +#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/PDBSymbolData.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" diff --git a/llvm/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp index fede031ec0c0..34436c572c8a 100644 --- a/llvm/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp +++ b/llvm/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp @@ -7,8 +7,9 @@ //===----------------------------------------------------------------------===// #include "PrettyExternalSymbolDumper.h" -#include "LinePrinter.h" +#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h" +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" #include "llvm/DebugInfo/PDB/PDBSymbolExe.h" #include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h" #include "llvm/Support/Format.h" diff --git a/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp index b820ca333965..83cf4d918322 100644 --- a/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp +++ b/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp @@ -7,16 +7,19 @@ //===----------------------------------------------------------------------===// #include "PrettyFunctionDumper.h" -#include "LinePrinter.h" #include "PrettyBuiltinDumper.h" +#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" #include "llvm/DebugInfo/PDB/PDBExtras.h" #include "llvm/DebugInfo/PDB/PDBSymbolData.h" #include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h" #include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeArray.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h" diff --git a/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp index 2f7a39803ca5..9547d4e4ed35 100644 --- a/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp +++ b/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp @@ -8,7 +8,6 @@ #include "PrettyTypeDumper.h" -#include "LinePrinter.h" #include "PrettyBuiltinDumper.h" #include "PrettyClassDefinitionDumper.h" #include "PrettyEnumDumper.h" @@ -16,6 +15,8 @@ #include "PrettyTypedefDumper.h" #include "llvm-pdbutil.h" +#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/PDBSymbolExe.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeArray.h" @@ -25,6 +26,7 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" #include "llvm/DebugInfo/PDB/UDTLayout.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/FormatVariadic.h" diff --git a/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp index ef73a8cdf9c4..197aa07299d1 100644 --- a/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp +++ b/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp @@ -8,13 +8,15 @@ #include "PrettyTypedefDumper.h" -#include "LinePrinter.h" #include "PrettyBuiltinDumper.h" #include "PrettyFunctionDumper.h" #include "PrettyTypeDumper.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" #include "llvm/DebugInfo/PDB/PDBExtras.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h" diff --git a/llvm/tools/llvm-pdbutil/PrettyVariableDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyVariableDumper.cpp index 6dd7cc384cc9..e9ac6984356c 100644 --- a/llvm/tools/llvm-pdbutil/PrettyVariableDumper.cpp +++ b/llvm/tools/llvm-pdbutil/PrettyVariableDumper.cpp @@ -8,21 +8,23 @@ #include "PrettyVariableDumper.h" -#include "LinePrinter.h" #include "PrettyBuiltinDumper.h" #include "PrettyFunctionDumper.h" #include "llvm-pdbutil.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/PDBSymbolData.h" #include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeArray.h" -#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/Format.h" diff --git a/llvm/tools/llvm-pdbutil/StreamUtil.cpp b/llvm/tools/llvm-pdbutil/StreamUtil.cpp index d0d0a9fbe927..878fb77353fa 100644 --- a/llvm/tools/llvm-pdbutil/StreamUtil.cpp +++ b/llvm/tools/llvm-pdbutil/StreamUtil.cpp @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// #include "StreamUtil.h" -#include "FormatUtil.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleList.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" +#include "llvm/DebugInfo/PDB/Native/FormatUtil.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" @@ -95,7 +95,7 @@ void llvm::pdb::discoverStreamPurposes(PDBFile &File, } Streams.resize(StreamCount); - for (uint16_t StreamIdx = 0; StreamIdx < StreamCount; ++StreamIdx) { + for (uint32_t StreamIdx = 0; StreamIdx < StreamCount; ++StreamIdx) { if (StreamIdx == OldMSFDirectory) Streams[StreamIdx] = stream(StreamPurpose::Other, "Old MSF Directory", StreamIdx); diff --git a/llvm/tools/llvm-pdbutil/TypeReferenceTracker.cpp b/llvm/tools/llvm-pdbutil/TypeReferenceTracker.cpp index f184f02e01ee..d813bc22a93c 100644 --- a/llvm/tools/llvm-pdbutil/TypeReferenceTracker.cpp +++ b/llvm/tools/llvm-pdbutil/TypeReferenceTracker.cpp @@ -9,10 +9,12 @@ #include "TypeReferenceTracker.h" #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" -#include "llvm/DebugInfo/PDB/Native/PDBFile.h" -#include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" +#include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/Object/COFF.h" using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/tools/llvm-pdbutil/TypeReferenceTracker.h b/llvm/tools/llvm-pdbutil/TypeReferenceTracker.h index 8861731ab6ee..c586f6523c57 100644 --- a/llvm/tools/llvm-pdbutil/TypeReferenceTracker.h +++ b/llvm/tools/llvm-pdbutil/TypeReferenceTracker.h @@ -9,14 +9,13 @@ #ifndef LLVM_TOOLS_LLVMPDBDUMP_TYPEREFERENCETRACKER_H #define LLVM_TOOLS_LLVMPDBDUMP_TYPEREFERENCETRACKER_H -#include "InputFile.h" - #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" +#include "llvm/DebugInfo/PDB/Native/InputFile.h" #include "llvm/Support/Error.h" namespace llvm { diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp index b152ebd6dccb..3b922a7bea21 100644 --- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp +++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp @@ -15,8 +15,6 @@ #include "BytesOutputStyle.h" #include "DumpOutputStyle.h" #include "ExplainOutputStyle.h" -#include "InputFile.h" -#include "LinePrinter.h" #include "OutputStyle.h" #include "PrettyClassDefinitionDumper.h" #include "PrettyCompilandDumper.h" @@ -44,14 +42,18 @@ #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBInjectedSource.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h" #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Native/InputFile.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" @@ -67,6 +69,7 @@ #include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymbolThunk.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h" @@ -195,6 +198,8 @@ static cl::opt Typedefs("typedefs", cl::desc("Dump typedefs"), cl::sub(DiaDumpSubcommand)); } // namespace diadump +FilterOptions Filters; + namespace pretty { cl::list InputFilenames(cl::Positional, cl::desc(""), @@ -211,7 +216,7 @@ cl::opt ShowInjectedSourceContent( cl::list WithName( "with-name", cl::desc("Display any symbol or type with the specified exact name"), - cl::cat(TypeCategory), cl::ZeroOrMore, cl::sub(PrettySubcommand)); + cl::cat(TypeCategory), cl::sub(PrettySubcommand)); cl::opt Compilands("compilands", cl::desc("Display compilands"), cl::cat(TypeCategory), cl::sub(PrettySubcommand)); @@ -224,7 +229,7 @@ cl::opt Externals("externals", cl::desc("Dump external symbols"), cl::cat(TypeCategory), cl::sub(PrettySubcommand)); cl::list SymTypes( "sym-types", cl::desc("Type of symbols to dump (default all)"), - cl::cat(TypeCategory), cl::sub(PrettySubcommand), cl::ZeroOrMore, + cl::cat(TypeCategory), cl::sub(PrettySubcommand), cl::values( clEnumValN(SymLevel::Thunks, "thunks", "Display thunk symbols"), clEnumValN(SymLevel::Data, "data", "Display data symbols"), @@ -310,28 +315,31 @@ cl::opt ColorOutput("color-output", cl::desc("Override use of color (default = isatty)"), cl::cat(OtherOptions), cl::sub(PrettySubcommand)); -cl::list ExcludeTypes( - "exclude-types", cl::desc("Exclude types by regular expression"), - cl::ZeroOrMore, cl::cat(FilterCategory), cl::sub(PrettySubcommand)); -cl::list ExcludeSymbols( - "exclude-symbols", cl::desc("Exclude symbols by regular expression"), - cl::ZeroOrMore, cl::cat(FilterCategory), cl::sub(PrettySubcommand)); -cl::list ExcludeCompilands( - "exclude-compilands", cl::desc("Exclude compilands by regular expression"), - cl::ZeroOrMore, cl::cat(FilterCategory), cl::sub(PrettySubcommand)); +cl::list + ExcludeTypes("exclude-types", + cl::desc("Exclude types by regular expression"), + cl::cat(FilterCategory), cl::sub(PrettySubcommand)); +cl::list + ExcludeSymbols("exclude-symbols", + cl::desc("Exclude symbols by regular expression"), + cl::cat(FilterCategory), cl::sub(PrettySubcommand)); +cl::list + ExcludeCompilands("exclude-compilands", + cl::desc("Exclude compilands by regular expression"), + cl::cat(FilterCategory), cl::sub(PrettySubcommand)); cl::list IncludeTypes( "include-types", cl::desc("Include only types which match a regular expression"), - cl::ZeroOrMore, cl::cat(FilterCategory), cl::sub(PrettySubcommand)); + cl::cat(FilterCategory), cl::sub(PrettySubcommand)); cl::list IncludeSymbols( "include-symbols", cl::desc("Include only symbols which match a regular expression"), - cl::ZeroOrMore, cl::cat(FilterCategory), cl::sub(PrettySubcommand)); + cl::cat(FilterCategory), cl::sub(PrettySubcommand)); cl::list IncludeCompilands( "include-compilands", cl::desc("Include only compilands those which match a regular expression"), - cl::ZeroOrMore, cl::cat(FilterCategory), cl::sub(PrettySubcommand)); + cl::cat(FilterCategory), cl::sub(PrettySubcommand)); cl::opt SizeThreshold( "min-type-size", cl::desc("Displays only those types which are greater " "than or equal to the specified size."), @@ -384,7 +392,7 @@ cl::opt cl::sub(BytesSubcommand), cl::cat(MsfBytes)); cl::list - DumpStreamData("stream-data", cl::CommaSeparated, cl::ZeroOrMore, + DumpStreamData("stream-data", cl::CommaSeparated, cl::desc("Dump binary data from specified streams. Format " "is SN[:Start][@Size]"), cl::sub(BytesSubcommand), cl::cat(MsfBytes)); @@ -407,14 +415,12 @@ cl::opt TypeServerMap("type-server", cl::desc("Dump type server map"), cl::opt ECData("ec", cl::desc("Dump edit and continue map"), cl::sub(BytesSubcommand), cl::cat(DbiBytes)); -cl::list - TypeIndex("type", - cl::desc("Dump the type record with the given type index"), - cl::ZeroOrMore, cl::CommaSeparated, cl::sub(BytesSubcommand), - cl::cat(TypeCategory)); +cl::list TypeIndex( + "type", cl::desc("Dump the type record with the given type index"), + cl::CommaSeparated, cl::sub(BytesSubcommand), cl::cat(TypeCategory)); cl::list IdIndex("id", cl::desc("Dump the id record with the given type index"), - cl::ZeroOrMore, cl::CommaSeparated, cl::sub(BytesSubcommand), + cl::CommaSeparated, cl::sub(BytesSubcommand), cl::cat(TypeCategory)); cl::opt ModuleIndex( @@ -500,7 +506,7 @@ cl::opt DontResolveForwardRefs( cl::cat(TypeOptions), cl::sub(DumpSubcommand)); cl::list DumpTypeIndex( - "type-index", cl::ZeroOrMore, cl::CommaSeparated, + "type-index", cl::CommaSeparated, cl::desc("only dump types with the specified hexadecimal type index"), cl::cat(TypeOptions), cl::sub(DumpSubcommand)); @@ -516,7 +522,7 @@ cl::opt DumpIdExtras("id-extras", cl::desc("dump id hashes and index offsets"), cl::cat(TypeOptions), cl::sub(DumpSubcommand)); cl::list DumpIdIndex( - "id-index", cl::ZeroOrMore, cl::CommaSeparated, + "id-index", cl::CommaSeparated, cl::desc("only dump ids with the specified hexadecimal type index"), cl::cat(TypeOptions), cl::sub(DumpSubcommand)); @@ -536,7 +542,7 @@ cl::list DumpGlobalNames( "global-name", cl::desc( "With -globals, only dump globals whose name matches the given value"), - cl::cat(SymbolOptions), cl::sub(DumpSubcommand), cl::ZeroOrMore); + cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); cl::opt DumpPublics("publics", cl::desc("dump Publics stream data"), cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); cl::opt DumpPublicExtras("public-extras", @@ -557,6 +563,27 @@ cl::opt cl::opt DumpFpo("fpo", cl::desc("dump FPO records"), cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); +cl::opt DumpSymbolOffset( + "symbol-offset", cl::Optional, + cl::desc("only dump symbol record with the specified symbol offset"), + cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); +cl::opt DumpParents("show-parents", + cl::desc("dump the symbols record's all parents."), + cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); +cl::opt + DumpParentDepth("parent-recurse-depth", cl::Optional, cl::init(-1U), + cl::desc("only recurse to a depth of N when displaying " + "parents of a symbol record."), + cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); +cl::opt DumpChildren("show-children", + cl::desc("dump the symbols record's all children."), + cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); +cl::opt + DumpChildrenDepth("children-recurse-depth", cl::Optional, cl::init(-1U), + cl::desc("only recurse to a depth of N when displaying " + "children of a symbol record."), + cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); + // MODULE & FILE OPTIONS cl::opt DumpModules("modules", cl::desc("dump compiland information"), cl::cat(FileOptions), cl::sub(DumpSubcommand)); @@ -680,7 +707,7 @@ cl::opt DumpModuleFiles("module-files", cl::desc("dump file information"), cl::cat(FileOptions), cl::sub(PdbToYamlSubcommand)); cl::list DumpModuleSubsections( - "subsections", cl::ZeroOrMore, cl::CommaSeparated, + "subsections", cl::CommaSeparated, cl::desc("dump subsections from each module's debug stream"), ChunkValues, cl::cat(FileOptions), cl::sub(PdbToYamlSubcommand)); cl::opt DumpModuleSyms("module-syms", cl::desc("dump module symbols"), @@ -764,7 +791,7 @@ static void yamlToPdb(StringRef Path) { PDBFileBuilder Builder(Allocator); uint32_t BlockSize = 4096; - if (YamlObj.Headers.hasValue()) + if (YamlObj.Headers) BlockSize = YamlObj.Headers->SuperBlock.BlockSize; ExitOnErr(Builder.initialize(BlockSize)); // Add each of the reserved streams. We ignore stream metadata in the @@ -779,7 +806,7 @@ static void yamlToPdb(StringRef Path) { StringsAndChecksums Strings; Strings.setStrings(std::make_shared()); - if (YamlObj.StringTable.hasValue()) { + if (YamlObj.StringTable) { for (auto S : *YamlObj.StringTable) Strings.strings()->insert(S); } @@ -789,7 +816,7 @@ static void yamlToPdb(StringRef Path) { pdb::yaml::PdbTpiStream DefaultTpiStream; pdb::yaml::PdbTpiStream DefaultIpiStream; - const auto &Info = YamlObj.PdbStream.getValueOr(DefaultInfoStream); + const auto &Info = YamlObj.PdbStream.value_or(DefaultInfoStream); auto &InfoBuilder = Builder.getInfoBuilder(); InfoBuilder.setAge(Info.Age); @@ -799,7 +826,7 @@ static void yamlToPdb(StringRef Path) { for (auto F : Info.Features) InfoBuilder.addFeature(F); - const auto &Dbi = YamlObj.DbiStream.getValueOr(DefaultDbiStream); + const auto &Dbi = YamlObj.DbiStream.value_or(DefaultDbiStream); auto &DbiBuilder = Builder.getDbiBuilder(); DbiBuilder.setAge(Dbi.Age); DbiBuilder.setBuildNumber(Dbi.BuildNumber); @@ -814,7 +841,7 @@ static void yamlToPdb(StringRef Path) { for (auto S : MI.SourceFiles) ExitOnErr(DbiBuilder.addModuleSourceFile(ModiBuilder, S)); - if (MI.Modi.hasValue()) { + if (MI.Modi) { const auto &ModiStream = *MI.Modi; for (auto Symbol : ModiStream.Symbols) { ModiBuilder.addSymbol( @@ -834,7 +861,7 @@ static void yamlToPdb(StringRef Path) { } auto &TpiBuilder = Builder.getTpiBuilder(); - const auto &Tpi = YamlObj.TpiStream.getValueOr(DefaultTpiStream); + const auto &Tpi = YamlObj.TpiStream.value_or(DefaultTpiStream); TpiBuilder.setVersionHeader(Tpi.Version); AppendingTypeTableBuilder TS(Allocator); for (const auto &R : Tpi.Records) { @@ -842,7 +869,7 @@ static void yamlToPdb(StringRef Path) { TpiBuilder.addTypeRecord(Type.RecordData, None); } - const auto &Ipi = YamlObj.IpiStream.getValueOr(DefaultIpiStream); + const auto &Ipi = YamlObj.IpiStream.value_or(DefaultIpiStream); auto &IpiBuilder = Builder.getIpiBuilder(); IpiBuilder.setVersionHeader(Ipi.Version); for (const auto &R : Ipi.Records) { @@ -1068,7 +1095,7 @@ static void dumpPretty(StringRef Path) { const bool UseColor = opts::pretty::ColorOutput == cl::BOU_UNSET ? Stream.has_colors() : opts::pretty::ColorOutput == cl::BOU_TRUE; - LinePrinter Printer(2, UseColor, Stream); + LinePrinter Printer(2, UseColor, Stream, opts::Filters); auto GlobalScope(Session->getGlobalScope()); if (!GlobalScope) @@ -1506,6 +1533,44 @@ int main(int Argc, const char **Argv) { llvm::sys::InitializeCOMRAII COM(llvm::sys::COMThreadingMode::MultiThreaded); + // Initialize the filters for LinePrinter. + auto propagate = [&](auto &Target, auto &Reference) { + for (std::string &Option : Reference) + Target.push_back(Option); + }; + + propagate(opts::Filters.ExcludeTypes, opts::pretty::ExcludeTypes); + propagate(opts::Filters.ExcludeTypes, opts::pretty::ExcludeTypes); + propagate(opts::Filters.ExcludeSymbols, opts::pretty::ExcludeSymbols); + propagate(opts::Filters.ExcludeCompilands, opts::pretty::ExcludeCompilands); + propagate(opts::Filters.IncludeTypes, opts::pretty::IncludeTypes); + propagate(opts::Filters.IncludeSymbols, opts::pretty::IncludeSymbols); + propagate(opts::Filters.IncludeCompilands, opts::pretty::IncludeCompilands); + opts::Filters.PaddingThreshold = opts::pretty::PaddingThreshold; + opts::Filters.SizeThreshold = opts::pretty::SizeThreshold; + opts::Filters.JustMyCode = opts::dump::JustMyCode; + if (opts::dump::DumpModi.getNumOccurrences() > 0) { + if (opts::dump::DumpModi.getNumOccurrences() != 1) { + errs() << "argument '-modi' specified more than once.\n"; + errs().flush(); + exit(1); + } + opts::Filters.DumpModi = opts::dump::DumpModi; + } + if (opts::dump::DumpSymbolOffset) { + if (opts::dump::DumpModi.getNumOccurrences() != 1) { + errs() + << "need to specify argument '-modi' when using '-symbol-offset'.\n"; + errs().flush(); + exit(1); + } + opts::Filters.SymbolOffset = opts::dump::DumpSymbolOffset; + if (opts::dump::DumpParents) + opts::Filters.ParentRecurseDepth = opts::dump::DumpParentDepth; + if (opts::dump::DumpChildren) + opts::Filters.ChildrenRecurseDepth = opts::dump::DumpChildrenDepth; + } + if (opts::PdbToYamlSubcommand) { pdb2Yaml(opts::pdb2yaml::InputFilename.front()); } else if (opts::YamlToPdbSubcommand) { @@ -1544,14 +1609,14 @@ int main(int Argc, const char **Argv) { // it needs to be escaped again in the C++. So matching a single \ in the // input requires 4 \es in the C++. if (opts::pretty::ExcludeCompilerGenerated) { - opts::pretty::ExcludeTypes.push_back("__vc_attributes"); - opts::pretty::ExcludeCompilands.push_back("\\* Linker \\*"); + opts::Filters.ExcludeTypes.push_back("__vc_attributes"); + opts::Filters.ExcludeCompilands.push_back("\\* Linker \\*"); } if (opts::pretty::ExcludeSystemLibraries) { - opts::pretty::ExcludeCompilands.push_back( + opts::Filters.ExcludeCompilands.push_back( "f:\\\\binaries\\\\Intermediate\\\\vctools\\\\crt_bld"); - opts::pretty::ExcludeCompilands.push_back("f:\\\\dd\\\\vctools\\\\crt"); - opts::pretty::ExcludeCompilands.push_back( + opts::Filters.ExcludeCompilands.push_back("f:\\\\dd\\\\vctools\\\\crt"); + opts::Filters.ExcludeCompilands.push_back( "d:\\\\th.obj.x86fre\\\\minkernel"); } llvm::for_each(opts::pretty::InputFilenames, dumpPretty); diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.h b/llvm/tools/llvm-pdbutil/llvm-pdbutil.h index 9fe92c2c9d75..455fe5f28191 100644 --- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.h +++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.h @@ -12,6 +12,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -50,6 +51,8 @@ enum class ModuleSubsection { All }; +extern FilterOptions Filters; + namespace pretty { enum class ClassDefinitionFormat { None, Layout, All }; diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 6000460d3c23..9c6586483ef0 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -19,6 +19,7 @@ #include "llvm/ProfileData/InstrProfCorrelator.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/InstrProfWriter.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/RawMemProfReader.h" #include "llvm/ProfileData/SampleProfReader.h" @@ -37,6 +38,7 @@ #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include +#include using namespace llvm; @@ -89,6 +91,7 @@ static void exitWithError(Error E, StringRef Whence = "") { } exitWithError(IPE.message(), std::string(Whence), std::string(Hint)); }); + return; } exitWithError(toString(std::move(E)), std::string(Whence)); @@ -237,7 +240,7 @@ static void overlapInput(const std::string &BaseFilename, /// Load an input into a writer context. static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, const InstrProfCorrelator *Correlator, - WriterContext *WC) { + const StringRef ProfiledBinary, WriterContext *WC) { std::unique_lock CtxGuard{WC->Lock}; // Copy the filename, because llvm::ThreadPool copied the input "const @@ -245,6 +248,48 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, // invalid outside of this packaged task. std::string Filename = Input.Filename; + using ::llvm::memprof::RawMemProfReader; + if (RawMemProfReader::hasFormat(Input.Filename)) { + auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary); + if (!ReaderOrErr) { + exitWithError(ReaderOrErr.takeError(), Input.Filename); + } + std::unique_ptr Reader = std::move(ReaderOrErr.get()); + // Check if the profile types can be merged, e.g. clang frontend profiles + // should not be merged with memprof profiles. + if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) { + consumeError(std::move(E)); + WC->Errors.emplace_back( + make_error( + "Cannot merge MemProf profile with Clang generated profile.", + std::error_code()), + Filename); + return; + } + + auto MemProfError = [&](Error E) { + instrprof_error IPE = InstrProfError::take(std::move(E)); + WC->Errors.emplace_back(make_error(IPE), Filename); + }; + + // Add the frame mappings into the writer context. + const auto &IdToFrame = Reader->getFrameMapping(); + for (const auto &I : IdToFrame) { + bool Succeeded = WC->Writer.addMemProfFrame( + /*Id=*/I.first, /*Frame=*/I.getSecond(), MemProfError); + // If we weren't able to add the frame mappings then it doesn't make sense + // to try to add the records from this profile. + if (!Succeeded) + return; + } + const auto &FunctionProfileData = Reader->getProfileData(); + // Add the memprof records into the writer context. + for (const auto &I : FunctionProfileData) { + WC->Writer.addMemProfRecord(/*Id=*/I.first, /*Record=*/I.second); + } + return; + } + auto ReaderOrErr = InstrProfReader::create(Input.Filename, Correlator); if (Error E = ReaderOrErr.takeError()) { // Skip the empty profiles by returning sliently. @@ -330,7 +375,8 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse, - unsigned NumThreads, FailureMode FailMode) { + unsigned NumThreads, FailureMode FailMode, + const StringRef ProfiledBinary) { if (OutputFormat != PF_Binary && OutputFormat != PF_Compact_Binary && OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text) exitWithError("unknown format is specified"); @@ -363,14 +409,15 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs, if (NumThreads == 1) { for (const auto &Input : Inputs) - loadInput(Input, Remapper, Correlator.get(), Contexts[0].get()); + loadInput(Input, Remapper, Correlator.get(), ProfiledBinary, + Contexts[0].get()); } else { ThreadPool Pool(hardware_concurrency(NumThreads)); // Load the inputs in parallel (N/NumThreads serial steps). unsigned Ctx = 0; for (const auto &Input : Inputs) { - Pool.async(loadInput, Input, Remapper, Correlator.get(), + Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary, Contexts[Ctx].get()); Ctx = (Ctx + 1) % NumThreads; } @@ -587,7 +634,7 @@ static void supplementInstrProfile( SmallSet WriterErrorCodes; auto WC = std::make_unique(OutputSparse, ErrorLock, WriterErrorCodes); - loadInput(Inputs[0], nullptr, nullptr, WC.get()); + loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get()); if (WC->Errors.size() > 0) exitWithError(std::move(WC->Errors[0].first), InstrFilename); @@ -708,7 +755,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, LLVMContext Context; sampleprof::ProfileSymbolList WriterList; Optional ProfileIsProbeBased; - Optional ProfileIsCSFlat; + Optional ProfileIsCS; for (const auto &Input : Inputs) { auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, FSDiscriminatorPassOption); @@ -730,15 +777,14 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, } SampleProfileMap &Profiles = Reader->getProfiles(); - if (ProfileIsProbeBased.hasValue() && + if (ProfileIsProbeBased && ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased) exitWithError( "cannot merge probe-based profile with non-probe-based profile"); ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased; - if (ProfileIsCSFlat.hasValue() && - ProfileIsCSFlat != FunctionSamples::ProfileIsCSFlat) + if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS) exitWithError("cannot merge CS profile with non-CS profile"); - ProfileIsCSFlat = FunctionSamples::ProfileIsCSFlat; + ProfileIsCS = FunctionSamples::ProfileIsCS; for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end(); I != E; ++I) { sampleprof_error Result = sampleprof_error::success; @@ -761,7 +807,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, WriterList.merge(*ReaderList); } - if (ProfileIsCSFlat && (SampleMergeColdContext || SampleTrimColdContext)) { + if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) { // Use threshold calculated from profile summary unless specified. SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); auto Summary = Builder.computeSummaryForProfiles(ProfileMap); @@ -776,10 +822,10 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, SampleMergeColdContext, SampleColdContextFrameDepth, false); } - if (ProfileIsCSFlat && GenCSNestedProfile) { + if (ProfileIsCS && GenCSNestedProfile) { CSProfileConverter CSConverter(ProfileMap); CSConverter.convertProfiles(); - ProfileIsCSFlat = FunctionSamples::ProfileIsCSFlat = false; + ProfileIsCS = FunctionSamples::ProfileIsCS = false; } auto WriterOrErr = @@ -933,7 +979,7 @@ static int merge_main(int argc, const char *argv[]) { cl::desc( "Trim context sample profiles whose count is below cold threshold")); cl::opt SampleColdContextFrameDepth( - "sample-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore, + "sample-frame-depth-for-cold-context", cl::init(1), cl::desc("Keep the last K frames while merging cold profile. 1 means the " "context-less base profile")); cl::opt GenPartialProfile( @@ -949,7 +995,7 @@ static int merge_main(int argc, const char *argv[]) { "zero-counter-threshold", cl::init(0.7), cl::Hidden, cl::desc("For the function which is cold in instr profile but hot in " "sample profile, if the ratio of the number of zero counters " - "divided by the the total number of counters is above the " + "divided by the total number of counters is above the " "threshold, the profile of the function will be regarded as " "being harmful for performance and will be dropped.")); cl::opt SupplMinSizeThreshold( @@ -967,6 +1013,9 @@ static int merge_main(int argc, const char *argv[]) { cl::opt DebugInfoFilename( "debug-info", cl::init(""), cl::desc("Use the provided debug info to correlate the raw profile.")); + cl::opt ProfiledBinary( + "profiled-binary", cl::init(""), + cl::desc("Path to binary from which the profile was collected.")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -1009,7 +1058,7 @@ static int merge_main(int argc, const char *argv[]) { if (ProfileKind == instr) mergeInstrProfile(WeightedInputs, DebugInfoFilename, Remapper.get(), OutputFilename, OutputFormat, OutputSparse, NumThreads, - FailureMode); + FailureMode, ProfiledBinary); else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, @@ -1040,7 +1089,7 @@ static void overlapInstrProfile(const std::string &BaseFilename, OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n"; exit(0); } - loadInput(WeightedInput, nullptr, nullptr, &Context); + loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context); overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS, IsCS); Overlap.dump(OS); @@ -1936,7 +1985,7 @@ std::error_code SampleOverlapAggregator::loadProfiles() { if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased()) exitWithError( "cannot compare probe-based profile with non-probe-based profile"); - if (BaseReader->profileIsCSFlat() != TestReader->profileIsCSFlat()) + if (BaseReader->profileIsCS() != TestReader->profileIsCS()) exitWithError("cannot compare CS profile with non-CS profile"); // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in @@ -2097,7 +2146,7 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts, auto ReaderOrErr = InstrProfReader::create(Filename); std::vector Cutoffs = std::move(DetailedSummaryCutoffs); if (ShowDetailedSummary && Cutoffs.empty()) { - Cutoffs = {800000, 900000, 950000, 990000, 999000, 999900, 999990}; + Cutoffs = ProfileSummaryBuilder::DefaultCutoffs; } InstrProfSummaryBuilder Builder(std::move(Cutoffs)); if (Error E = ReaderOrErr.takeError()) @@ -2480,14 +2529,21 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts, return 0; } -static int showMemProfProfile(const std::string &Filename, raw_fd_ostream &OS) { - auto ReaderOr = llvm::memprof::RawMemProfReader::create(Filename); +static int showMemProfProfile(const std::string &Filename, + const std::string &ProfiledBinary, + raw_fd_ostream &OS) { + auto ReaderOr = llvm::memprof::RawMemProfReader::create( + Filename, ProfiledBinary, /*KeepNames=*/true); if (Error E = ReaderOr.takeError()) - exitWithError(std::move(E), Filename); + // Since the error can be related to the profile or the binary we do not + // pass whence. Instead additional context is provided where necessary in + // the error message. + exitWithError(std::move(E), /*Whence*/ ""); std::unique_ptr Reader( ReaderOr.get().release()); - Reader->printSummaries(OS); + + Reader->printYAML(OS); return 0; } @@ -2587,6 +2643,9 @@ static int show_main(int argc, const char *argv[]) { cl::opt ShowCovered( "covered", cl::init(false), cl::desc("Show only the functions that have been executed.")); + cl::opt ProfiledBinary( + "profiled-binary", cl::init(""), + cl::desc("Path to binary from which the profile was collected.")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n"); @@ -2624,7 +2683,7 @@ static int show_main(int argc, const char *argv[]) { ShowAllFunctions, ShowDetailedSummary, ShowFunction, ShowProfileSymbolList, ShowSectionInfoOnly, ShowHotFuncList, OS); - return showMemProfProfile(Filename, OS); + return showMemProfProfile(Filename, ProfiledBinary, OS); } int main(int argc, const char *argv[]) { diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp index 78be632f2153..b7cbf353c43f 100644 --- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp +++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp @@ -78,10 +78,10 @@ raw_ostream &operator<<(raw_ostream &OS, const ARM::WinEH::ReturnType &RT) { OS << "pop {pc}"; break; case ARM::WinEH::ReturnType::RT_B: - OS << "b target"; + OS << "bx "; break; case ARM::WinEH::ReturnType::RT_BW: - OS << "b.w target"; + OS << "b.w "; break; case ARM::WinEH::ReturnType::RT_NoEpilogue: OS << "(no epilogue)"; @@ -174,26 +174,47 @@ const Decoder::RingEntry Decoder::Ring64[] = { { 0xff, 0xec, 1, &Decoder::opcode_clear_unwound_to_call }, }; -void Decoder::printRegisters(const std::pair &RegisterMask) { - static const char * const GPRRegisterNames[16] = { - "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", - "r11", "ip", "sp", "lr", "pc", - }; +static void printRange(raw_ostream &OS, ListSeparator &LS, unsigned First, + unsigned Last, char Letter) { + if (First == Last) + OS << LS << Letter << First; + else + OS << LS << Letter << First << "-" << Letter << Last; +} - const uint16_t GPRMask = std::get<0>(RegisterMask); - const uint16_t VFPMask = std::get<1>(RegisterMask); +static void printRange(raw_ostream &OS, uint32_t Mask, ListSeparator &LS, + unsigned Start, unsigned End, char Letter) { + int First = -1; + for (unsigned RI = Start; RI <= End; ++RI) { + if (Mask & (1 << RI)) { + if (First < 0) + First = RI; + } else { + if (First >= 0) { + printRange(OS, LS, First, RI - 1, Letter); + First = -1; + } + } + } + if (First >= 0) + printRange(OS, LS, First, End, Letter); +} + +void Decoder::printGPRMask(uint16_t GPRMask) { + OS << '{'; + ListSeparator LS; + printRange(OS, GPRMask, LS, 0, 12, 'r'); + if (GPRMask & (1 << 14)) + OS << LS << "lr"; + if (GPRMask & (1 << 15)) + OS << LS << "pc"; + OS << '}'; +} +void Decoder::printVFPMask(uint32_t VFPMask) { OS << '{'; ListSeparator LS; - for (unsigned RI = 0, RE = 11; RI < RE; ++RI) - if (GPRMask & (1 << RI)) - OS << LS << GPRRegisterNames[RI]; - for (unsigned RI = 0, RE = 32; RI < RE; ++RI) - if (VFPMask & (1 << RI)) - OS << LS << "d" << unsigned(RI); - for (unsigned RI = 11, RE = 16; RI < RE; ++RI) - if (GPRMask & (1 << RI)) - OS << LS << GPRRegisterNames[RI]; + printRange(OS, VFPMask, LS, 0, 31, 'd'); OS << '}'; } @@ -325,7 +346,7 @@ bool Decoder::opcode_10Lxxxxx(const uint8_t *OC, unsigned &Offset, SW.startLine() << format("0x%02x 0x%02x ; %s.w ", OC[Offset + 0], OC[Offset + 1], Prologue ? "push" : "pop"); - printRegisters(std::make_pair(RegisterMask, 0)); + printGPRMask(RegisterMask); OS << '\n'; Offset += 2; @@ -346,7 +367,7 @@ bool Decoder::opcode_1100xxxx(const uint8_t *OC, unsigned &Offset, bool Decoder::opcode_11010Lxx(const uint8_t *OC, unsigned &Offset, unsigned Length, bool Prologue) { - unsigned Link = (OC[Offset] & 0x4) >> 3; + unsigned Link = (OC[Offset] & 0x4) >> 2; unsigned Count = (OC[Offset] & 0x3); uint16_t GPRMask = (Link << (Prologue ? 14 : 15)) @@ -354,7 +375,7 @@ bool Decoder::opcode_11010Lxx(const uint8_t *OC, unsigned &Offset, SW.startLine() << format("0x%02x ; %s ", OC[Offset], Prologue ? "push" : "pop"); - printRegisters(std::make_pair(GPRMask, 0)); + printGPRMask(GPRMask); OS << '\n'; ++Offset; @@ -371,7 +392,7 @@ bool Decoder::opcode_11011Lxx(const uint8_t *OC, unsigned &Offset, SW.startLine() << format("0x%02x ; %s.w ", OC[Offset], Prologue ? "push" : "pop"); - printRegisters(std::make_pair(GPRMask, 0)); + printGPRMask(GPRMask); OS << '\n'; ++Offset; @@ -385,7 +406,7 @@ bool Decoder::opcode_11100xxx(const uint8_t *OC, unsigned &Offset, SW.startLine() << format("0x%02x ; %s ", OC[Offset], Prologue ? "vpush" : "vpop"); - printRegisters(std::make_pair(0, VFPMask)); + printVFPMask(VFPMask); OS << '\n'; ++Offset; @@ -407,12 +428,12 @@ bool Decoder::opcode_111010xx(const uint8_t *OC, unsigned &Offset, bool Decoder::opcode_1110110L(const uint8_t *OC, unsigned &Offset, unsigned Length, bool Prologue) { - uint8_t GPRMask = ((OC[Offset + 0] & 0x01) << (Prologue ? 14 : 15)) - | ((OC[Offset + 1] & 0xff) << 0); + uint16_t GPRMask = ((OC[Offset + 0] & 0x01) << (Prologue ? 14 : 15)) + | ((OC[Offset + 1] & 0xff) << 0); SW.startLine() << format("0x%02x 0x%02x ; %s ", OC[Offset + 0], OC[Offset + 1], Prologue ? "push" : "pop"); - printRegisters(std::make_pair(GPRMask, 0)); + printGPRMask(GPRMask); OS << '\n'; Offset += 2; @@ -437,11 +458,13 @@ bool Decoder::opcode_11101110(const uint8_t *OC, unsigned &Offset, bool Decoder::opcode_11101111(const uint8_t *OC, unsigned &Offset, unsigned Length, bool Prologue) { - assert(!Prologue && "may not be used in prologue"); - if (OC[Offset + 1] & 0xf0) SW.startLine() << format("0x%02x 0x%02x ; reserved\n", OC[Offset + 0], OC[Offset + 1]); + else if (Prologue) + SW.startLine() + << format("0x%02x 0x%02x ; str.w lr, [sp, #-%u]!\n", + OC[Offset + 0], OC[Offset + 1], OC[Offset + 1] << 2); else SW.startLine() << format("0x%02x 0x%02x ; ldr.w lr, [sp], #%u\n", @@ -455,11 +478,11 @@ bool Decoder::opcode_11110101(const uint8_t *OC, unsigned &Offset, unsigned Length, bool Prologue) { unsigned Start = (OC[Offset + 1] & 0xf0) >> 4; unsigned End = (OC[Offset + 1] & 0x0f) >> 0; - uint32_t VFPMask = ((1 << (End - Start)) - 1) << Start; + uint32_t VFPMask = ((1 << (End + 1 - Start)) - 1) << Start; SW.startLine() << format("0x%02x 0x%02x ; %s ", OC[Offset + 0], OC[Offset + 1], Prologue ? "vpush" : "vpop"); - printRegisters(std::make_pair(0, VFPMask)); + printVFPMask(VFPMask); OS << '\n'; Offset += 2; @@ -470,11 +493,11 @@ bool Decoder::opcode_11110110(const uint8_t *OC, unsigned &Offset, unsigned Length, bool Prologue) { unsigned Start = (OC[Offset + 1] & 0xf0) >> 4; unsigned End = (OC[Offset + 1] & 0x0f) >> 0; - uint32_t VFPMask = ((1 << (End - Start)) - 1) << 16; + uint32_t VFPMask = ((1 << (End + 1 - Start)) - 1) << (16 + Start); SW.startLine() << format("0x%02x 0x%02x ; %s ", OC[Offset + 0], OC[Offset + 1], Prologue ? "vpush" : "vpop"); - printRegisters(std::make_pair(0, VFPMask)); + printVFPMask(VFPMask); OS << '\n'; Offset += 2; @@ -553,14 +576,14 @@ bool Decoder::opcode_11111100(const uint8_t *OC, unsigned &Offset, bool Decoder::opcode_11111101(const uint8_t *OC, unsigned &Offset, unsigned Length, bool Prologue) { - SW.startLine() << format("0x%02x ; b\n", OC[Offset]); + SW.startLine() << format("0x%02x ; bx \n", OC[Offset]); ++Offset; return true; } bool Decoder::opcode_11111110(const uint8_t *OC, unsigned &Offset, unsigned Length, bool Prologue) { - SW.startLine() << format("0x%02x ; b.w\n", OC[Offset]); + SW.startLine() << format("0x%02x ; b.w \n", OC[Offset]); ++Offset; return true; } @@ -948,7 +971,7 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF, if (XData.E()) { ArrayRef UC = XData.UnwindByteCode(); - if (isAArch64 || !XData.F()) { + { ListScope PS(SW, "Prologue"); decodeOpcodes(UC, 0, /*Prologue=*/true); } @@ -971,8 +994,9 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF, SW.printNumber("EpilogueStartIndex", isAArch64 ? ES.EpilogueStartIndexAArch64() : ES.EpilogueStartIndexARM()); - if (ES.ES & ~0xffc3ffff) - SW.printNumber("ReservedBits", (ES.ES >> 18) & 0xF); + unsigned ReservedMask = isAArch64 ? 0xF : 0x3; + if ((ES.ES >> 18) & ReservedMask) + SW.printNumber("ReservedBits", (ES.ES >> 18) & ReservedMask); ListScope Opcodes(SW, "Opcodes"); decodeOpcodes(XData.UnwindByteCode(), @@ -1110,17 +1134,75 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF, SW.printString("Function", formatSymbol(FunctionName, FunctionAddress, FunctionOffset)); - if (!isAArch64) - SW.printBoolean("Fragment", - RF.Flag() == RuntimeFunctionFlag::RFF_PackedFragment); + SW.printBoolean("Fragment", + RF.Flag() == RuntimeFunctionFlag::RFF_PackedFragment); SW.printNumber("FunctionLength", RF.FunctionLength()); SW.startLine() << "ReturnType: " << RF.Ret() << '\n'; SW.printBoolean("HomedParameters", RF.H()); - SW.startLine() << "SavedRegisters: "; - printRegisters(SavedRegisterMask(RF)); - OS << '\n'; + SW.printNumber("Reg", RF.Reg()); + SW.printNumber("R", RF.R()); + SW.printBoolean("LinkRegister", RF.L()); + SW.printBoolean("Chaining", RF.C()); SW.printNumber("StackAdjustment", StackAdjustment(RF) << 2); + { + ListScope PS(SW, "Prologue"); + + uint16_t GPRMask, VFPMask; + std::tie(GPRMask, VFPMask) = SavedRegisterMask(RF, /*Prologue=*/true); + + if (StackAdjustment(RF) && !PrologueFolding(RF)) + SW.startLine() << "sub sp, sp, #" << StackAdjustment(RF) * 4 << "\n"; + if (VFPMask) { + SW.startLine() << "vpush "; + printVFPMask(VFPMask); + OS << "\n"; + } + if (RF.C()) { + // Count the number of registers pushed below R11 + int FpOffset = 4 * countPopulation(GPRMask & ((1U << 11) - 1)); + if (FpOffset) + SW.startLine() << "add.w r11, sp, #" << FpOffset << "\n"; + else + SW.startLine() << "mov r11, sp\n"; + } + if (GPRMask) { + SW.startLine() << "push "; + printGPRMask(GPRMask); + OS << "\n"; + } + if (RF.H()) + SW.startLine() << "push {r0-r3}\n"; + } + + if (RF.Ret() != ReturnType::RT_NoEpilogue) { + ListScope PS(SW, "Epilogue"); + + uint16_t GPRMask, VFPMask; + std::tie(GPRMask, VFPMask) = SavedRegisterMask(RF, /*Prologue=*/false); + + if (StackAdjustment(RF) && !EpilogueFolding(RF)) + SW.startLine() << "add sp, sp, #" << StackAdjustment(RF) * 4 << "\n"; + if (VFPMask) { + SW.startLine() << "vpop "; + printVFPMask(VFPMask); + OS << "\n"; + } + if (GPRMask) { + SW.startLine() << "pop "; + printGPRMask(GPRMask); + OS << "\n"; + } + if (RF.H()) { + if (RF.L() == 0 || RF.Ret() != ReturnType::RT_POP) + SW.startLine() << "add sp, sp, #16\n"; + else + SW.startLine() << "ldr pc, [sp], #20\n"; + } + if (RF.Ret() != ReturnType::RT_POP) + SW.startLine() << RF.Ret() << '\n'; + } + return true; } @@ -1189,11 +1271,11 @@ bool Decoder::dumpPackedARM64Entry(const object::COFFObjectFile &COFF, SW.startLine() << format("sub sp, sp, #%d\n", LocSZ); } if (RF.H()) { - SW.startLine() << format("stp x6, x7, [sp, #%d]\n", IntSZ + FpSZ + 48); - SW.startLine() << format("stp x4, x5, [sp, #%d]\n", IntSZ + FpSZ + 32); - SW.startLine() << format("stp x2, x3, [sp, #%d]\n", IntSZ + FpSZ + 16); + SW.startLine() << format("stp x6, x7, [sp, #%d]\n", SavSZ - 16); + SW.startLine() << format("stp x4, x5, [sp, #%d]\n", SavSZ - 32); + SW.startLine() << format("stp x2, x3, [sp, #%d]\n", SavSZ - 48); if (RF.RegI() > 0 || RF.RegF() > 0 || RF.CR() == 1) { - SW.startLine() << format("stp x0, x1, [sp, #%d]\n", IntSZ + FpSZ); + SW.startLine() << format("stp x0, x1, [sp, #%d]\n", SavSZ - 64); } else { // This case isn't documented; if neither RegI nor RegF nor CR=1 // have decremented the stack pointer by SavSZ, we need to do it here diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.h b/llvm/tools/llvm-readobj/ARMWinEHPrinter.h index 920d4e5f7332..ceaa866ff215 100644 --- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.h +++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.h @@ -133,7 +133,8 @@ class Decoder { void decodeOpcodes(ArrayRef Opcodes, unsigned Offset, bool Prologue); - void printRegisters(const std::pair &RegisterMask); + void printGPRMask(uint16_t Mask); + void printVFPMask(uint32_t Mask); ErrorOr getSectionContaining(const object::COFFObjectFile &COFF, uint64_t Address); diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 04a67225401f..ae2dec5d15fb 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1204,6 +1204,7 @@ const EnumEntry ElfMachineType[] = { ENUM_ENT(EM_LANAI, "EM_LANAI"), ENUM_ENT(EM_BPF, "EM_BPF"), ENUM_ENT(EM_VE, "NEC SX-Aurora Vector Engine"), + ENUM_ENT(EM_LOONGARCH, "LoongArch"), }; const EnumEntry ElfSymbolBindings[] = { @@ -1241,10 +1242,17 @@ const EnumEntry ElfSectionFlags[] = { ENUM_ENT(SHF_GROUP, "G"), ENUM_ENT(SHF_TLS, "T"), ENUM_ENT(SHF_COMPRESSED, "C"), - ENUM_ENT(SHF_GNU_RETAIN, "R"), ENUM_ENT(SHF_EXCLUDE, "E"), }; +const EnumEntry ElfGNUSectionFlags[] = { + ENUM_ENT(SHF_GNU_RETAIN, "R") +}; + +const EnumEntry ElfSolarisSectionFlags[] = { + ENUM_ENT(SHF_SUNW_NODISCARD, "R") +}; + const EnumEntry ElfXCoreSectionFlags[] = { ENUM_ENT(XCORE_SHF_CP_SECTION, ""), ENUM_ENT(XCORE_SHF_DP_SECTION, "") @@ -1274,9 +1282,19 @@ const EnumEntry ElfX86_64SectionFlags[] = { }; static std::vector> -getSectionFlagsForTarget(unsigned EMachine) { +getSectionFlagsForTarget(unsigned EOSAbi, unsigned EMachine) { std::vector> Ret(std::begin(ElfSectionFlags), std::end(ElfSectionFlags)); + switch (EOSAbi) { + case ELFOSABI_SOLARIS: + Ret.insert(Ret.end(), std::begin(ElfSolarisSectionFlags), + std::end(ElfSolarisSectionFlags)); + break; + default: + Ret.insert(Ret.end(), std::begin(ElfGNUSectionFlags), + std::end(ElfGNUSectionFlags)); + break; + } switch (EMachine) { case EM_ARM: Ret.insert(Ret.end(), std::begin(ElfARMSectionFlags), @@ -1304,7 +1322,8 @@ getSectionFlagsForTarget(unsigned EMachine) { return Ret; } -static std::string getGNUFlags(unsigned EMachine, uint64_t Flags) { +static std::string getGNUFlags(unsigned EOSAbi, unsigned EMachine, + uint64_t Flags) { // Here we are trying to build the flags string in the same way as GNU does. // It is not that straightforward. Imagine we have sh_flags == 0x90000000. // SHF_EXCLUDE ("E") has a value of 0x80000000 and SHF_MASKPROC is 0xf0000000. @@ -1315,7 +1334,7 @@ static std::string getGNUFlags(unsigned EMachine, uint64_t Flags) { bool HasOSFlag = false; bool HasProcFlag = false; std::vector> FlagsList = - getSectionFlagsForTarget(EMachine); + getSectionFlagsForTarget(EOSAbi, EMachine); while (Flags) { // Take the least significant bit as a flag. uint64_t Flag = Flags & -Flags; @@ -1371,6 +1390,8 @@ static StringRef segmentTypeToString(unsigned Arch, unsigned Type) { LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_ABIFLAGS); } break; + case ELF::EM_RISCV: + switch (Type) { LLVM_READOBJ_ENUM_CASE(ELF, PT_RISCV_ATTRIBUTES); } } switch (Type) { @@ -1404,12 +1425,16 @@ static std::string getGNUPtType(unsigned Arch, unsigned Type) { return std::string(": ") + to_string(format_hex(Type, 1)); // E.g. "PT_ARM_EXIDX" -> "EXIDX". - if (Seg.startswith("PT_ARM_")) - return Seg.drop_front(7).str(); + if (Seg.consume_front("PT_ARM_")) + return Seg.str(); // E.g. "PT_MIPS_REGINFO" -> "REGINFO". - if (Seg.startswith("PT_MIPS_")) - return Seg.drop_front(8).str(); + if (Seg.consume_front("PT_MIPS_")) + return Seg.str(); + + // E.g. "PT_RISCV_ATTRIBUTES" + if (Seg.consume_front("PT_RISCV_")) + return Seg.str(); // E.g. "PT_LOAD" -> "LOAD". assert(Seg.startswith("PT_")); @@ -1508,6 +1533,7 @@ const EnumEntry ElfHeaderAMDGPUFlagsABIVersion3[] = { LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX940), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012), @@ -1518,6 +1544,11 @@ const EnumEntry ElfHeaderAMDGPUFlagsABIVersion3[] = { LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1034), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1035), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1036), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1100), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1101), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1102), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1103), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_V3), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_V3) }; @@ -1562,6 +1593,7 @@ const EnumEntry ElfHeaderAMDGPUFlagsABIVersion4[] = { LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX940), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012), @@ -1572,6 +1604,11 @@ const EnumEntry ElfHeaderAMDGPUFlagsABIVersion4[] = { LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1034), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1035), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1036), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1100), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1101), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1102), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1103), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ANY_V4), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_OFF_V4), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ON_V4), @@ -2265,6 +2302,7 @@ std::string ELFDumper::getDynamicEntry(uint64_t Type, case DT_MIPS_PLTGOT: case DT_MIPS_RWPLT: case DT_MIPS_RLD_MAP_REL: + case DT_MIPS_XHASH: return FormatHexValue(Value); case DT_MIPS_FLAGS: return FormatFlags(Value, makeArrayRef(ElfDynamicDTMipsFlags)); @@ -3277,7 +3315,7 @@ template void GNUELFDumper::printFileHeaders() { OS.PadToColumn(2u); OS << "Version:"; OS.PadToColumn(37u); - OS << to_hexString(e.e_ident[ELF::EI_VERSION]); + OS << utohexstr(e.e_ident[ELF::EI_VERSION]); if (e.e_version == ELF::EV_CURRENT) OS << " (current)"; OS << "\n"; @@ -3290,19 +3328,19 @@ template void GNUELFDumper::printFileHeaders() { Str = E->AltName.str(); } else { if (e.e_type >= ET_LOPROC) - Str = "Processor Specific: (" + to_hexString(e.e_type, false) + ")"; + Str = "Processor Specific: (" + utohexstr(e.e_type, /*LowerCase=*/true) + ")"; else if (e.e_type >= ET_LOOS) - Str = "OS Specific: (" + to_hexString(e.e_type, false) + ")"; + Str = "OS Specific: (" + utohexstr(e.e_type, /*LowerCase=*/true) + ")"; else - Str = ": " + to_hexString(e.e_type, false); + Str = ": " + utohexstr(e.e_type, /*LowerCase=*/true); } printFields(OS, "Type:", Str); Str = enumToString(e.e_machine, makeArrayRef(ElfMachineType)); printFields(OS, "Machine:", Str); - Str = "0x" + to_hexString(e.e_version); + Str = "0x" + utohexstr(e.e_version); printFields(OS, "Version:", Str); - Str = "0x" + to_hexString(e.e_entry); + Str = "0x" + utohexstr(e.e_entry); printFields(OS, "Entry point address:", Str); Str = to_string(e.e_phoff) + " (bytes into file)"; printFields(OS, "Start of program headers:", Str); @@ -3319,7 +3357,7 @@ template void GNUELFDumper::printFileHeaders() { else if (e.e_machine == EM_AVR) ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderAVRFlags), unsigned(ELF::EF_AVR_ARCH_MASK)); - Str = "0x" + to_hexString(e.e_flags); + Str = "0x" + utohexstr(e.e_flags); if (!ElfFlags.empty()) Str = Str + ", " + ElfFlags; printFields(OS, "Flags:", Str); @@ -3497,7 +3535,7 @@ void GNUELFDumper::printRelRelaReloc(const Relocation &R, Addend = " + "; } } - Addend += to_hexString(RelAddend, false); + Addend += utohexstr(RelAddend, /*LowerCase=*/true); } OS << Addend << "\n"; } @@ -3529,7 +3567,7 @@ void GNUELFDumper::printDynamicRelocHeader(unsigned Type, StringRef Name, const DynRegionInfo &Reg) { uint64_t Offset = Reg.Addr - this->Obj.base(); OS << "\n'" << Name.str().c_str() << "' relocation section at offset 0x" - << to_hexString(Offset, false) << " contains " << Reg.Size << " bytes:\n"; + << utohexstr(Offset, /*LowerCase=*/true) << " contains " << Reg.Size << " bytes:\n"; printRelocHeaderFields(OS, Type); } @@ -3582,7 +3620,7 @@ template void GNUELFDumper::printRelocations() { uintX_t Offset = Sec.sh_offset; StringRef Name = this->getPrintableSectionName(Sec); OS << "\nRelocation section '" << Name << "' at offset 0x" - << to_hexString(Offset, false) << " contains " << EntriesNum + << utohexstr(Offset, /*LowerCase=*/true) << " contains " << EntriesNum << " entries:\n"; printRelocHeaderFields(OS, Sec.sh_type); this->printRelocationsHelper(Sec); @@ -3597,30 +3635,30 @@ template void GNUELFDumper::printRelocations() { // returned as '' followed by the type value. static std::string getSectionTypeOffsetString(unsigned Type) { if (Type >= SHT_LOOS && Type <= SHT_HIOS) - return "LOOS+0x" + to_hexString(Type - SHT_LOOS); + return "LOOS+0x" + utohexstr(Type - SHT_LOOS); else if (Type >= SHT_LOPROC && Type <= SHT_HIPROC) - return "LOPROC+0x" + to_hexString(Type - SHT_LOPROC); + return "LOPROC+0x" + utohexstr(Type - SHT_LOPROC); else if (Type >= SHT_LOUSER && Type <= SHT_HIUSER) - return "LOUSER+0x" + to_hexString(Type - SHT_LOUSER); - return "0x" + to_hexString(Type) + ": "; + return "LOUSER+0x" + utohexstr(Type - SHT_LOUSER); + return "0x" + utohexstr(Type) + ": "; } static std::string getSectionTypeString(unsigned Machine, unsigned Type) { StringRef Name = getELFSectionTypeName(Machine, Type); // Handle SHT_GNU_* type names. - if (Name.startswith("SHT_GNU_")) { - if (Name == "SHT_GNU_HASH") + if (Name.consume_front("SHT_GNU_")) { + if (Name == "HASH") return "GNU_HASH"; // E.g. SHT_GNU_verneed -> VERNEED. - return Name.drop_front(8).upper(); + return Name.upper(); } if (Name == "SHT_SYMTAB_SHNDX") return "SYMTAB SECTION INDICES"; - if (Name.startswith("SHT_")) - return Name.drop_front(4).str(); + if (Name.consume_front("SHT_")) + return Name.str(); return getSectionTypeOffsetString(Type); } @@ -3647,7 +3685,7 @@ template void GNUELFDumper::printSectionHeaders() { ArrayRef Sections = cantFail(this->Obj.sections()); OS << "There are " << to_string(Sections.size()) << " section headers, starting at offset " - << "0x" << to_hexString(this->Obj.getHeader().e_shoff, false) << ":\n\n"; + << "0x" << utohexstr(this->Obj.getHeader().e_shoff, /*LowerCase=*/true) << ":\n\n"; OS << "Section Headers:\n"; Field Fields[11] = { {"[Nr]", 2}, {"Name", 7}, {"Type", 25}, @@ -3680,7 +3718,8 @@ template void GNUELFDumper::printSectionHeaders() { Fields[4].Str = to_string(format_hex_no_prefix(Sec.sh_offset, 6)); Fields[5].Str = to_string(format_hex_no_prefix(Sec.sh_size, 6)); Fields[6].Str = to_string(format_hex_no_prefix(Sec.sh_entsize, 2)); - Fields[7].Str = getGNUFlags(this->Obj.getHeader().e_machine, Sec.sh_flags); + Fields[7].Str = getGNUFlags(this->Obj.getHeader().e_ident[ELF::EI_OSABI], + this->Obj.getHeader().e_machine, Sec.sh_flags); Fields[8].Str = to_string(Sec.sh_link); Fields[9].Str = to_string(Sec.sh_info); Fields[10].Str = to_string(Sec.sh_addralign); @@ -3804,7 +3843,7 @@ void GNUELFDumper::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, Other &= ~STO_AARCH64_VARIANT_PCS; Fields[5].Str += " [VARIANT_PCS"; if (Other != 0) - Fields[5].Str.append(" | " + to_hexString(Other, false)); + Fields[5].Str.append(" | " + utohexstr(Other, /*LowerCase=*/true)); Fields[5].Str.append("]"); } } else if (this->Obj.getHeader().e_machine == ELF::EM_RISCV) { @@ -3813,7 +3852,7 @@ void GNUELFDumper::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, Other &= ~STO_RISCV_VARIANT_CC; Fields[5].Str += " [VARIANT_CC"; if (Other != 0) - Fields[5].Str.append(" | " + to_hexString(Other, false)); + Fields[5].Str.append(" | " + utohexstr(Other, /*LowerCase=*/true)); Fields[5].Str.append("]"); } } else { @@ -4025,7 +4064,7 @@ template void GNUELFDumper::printSectionDetails() { ArrayRef Sections = cantFail(this->Obj.sections()); OS << "There are " << to_string(Sections.size()) << " section headers, starting at offset " - << "0x" << to_hexString(this->Obj.getHeader().e_shoff, false) << ":\n\n"; + << "0x" << utohexstr(this->Obj.getHeader().e_shoff, /*LowerCase=*/true) << ":\n\n"; OS << "Section Headers:\n"; @@ -5041,6 +5080,57 @@ static bool printGNUNote(raw_ostream &OS, uint32_t NoteType, return true; } +using AndroidNoteProperties = std::vector>; +static AndroidNoteProperties getAndroidNoteProperties(uint32_t NoteType, + ArrayRef Desc) { + AndroidNoteProperties Props; + switch (NoteType) { + case ELF::NT_ANDROID_TYPE_MEMTAG: + if (Desc.empty()) { + Props.emplace_back("Invalid .note.android.memtag", ""); + return Props; + } + + switch (Desc[0] & NT_MEMTAG_LEVEL_MASK) { + case NT_MEMTAG_LEVEL_NONE: + Props.emplace_back("Tagging Mode", "NONE"); + break; + case NT_MEMTAG_LEVEL_ASYNC: + Props.emplace_back("Tagging Mode", "ASYNC"); + break; + case NT_MEMTAG_LEVEL_SYNC: + Props.emplace_back("Tagging Mode", "SYNC"); + break; + default: + Props.emplace_back( + "Tagging Mode", + ("Unknown (" + Twine::utohexstr(Desc[0] & NT_MEMTAG_LEVEL_MASK) + ")") + .str()); + break; + } + Props.emplace_back("Heap", + (Desc[0] & NT_MEMTAG_HEAP) ? "Enabled" : "Disabled"); + Props.emplace_back("Stack", + (Desc[0] & NT_MEMTAG_STACK) ? "Enabled" : "Disabled"); + break; + default: + return Props; + } + return Props; +} + +static bool printAndroidNote(raw_ostream &OS, uint32_t NoteType, + ArrayRef Desc) { + // Return true if we were able to pretty-print the note, false otherwise. + AndroidNoteProperties Props = getAndroidNoteProperties(NoteType, Desc); + if (Props.empty()) + return false; + for (const auto &KV : Props) + OS << " " << KV.first << ": " << KV.second << '\n'; + OS << '\n'; + return true; +} + template static bool printLLVMOMPOFFLOADNote(raw_ostream &OS, uint32_t NoteType, ArrayRef Desc) { @@ -5400,6 +5490,13 @@ const NoteType LLVMOMPOFFLOADNoteTypes[] = { "NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION (producing toolchain version)"}, }; +const NoteType AndroidNoteTypes[] = { + {ELF::NT_ANDROID_TYPE_IDENT, "NT_ANDROID_TYPE_IDENT"}, + {ELF::NT_ANDROID_TYPE_KUSER, "NT_ANDROID_TYPE_KUSER"}, + {ELF::NT_ANDROID_TYPE_MEMTAG, + "NT_ANDROID_TYPE_MEMTAG (Android memory tagging information)"}, +}; + const NoteType CoreNoteTypes[] = { {ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"}, {ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"}, @@ -5508,6 +5605,8 @@ StringRef getNoteTypeName(const typename ELFT::Note &Note, unsigned ELFType) { return FindNote(AMDGPUNoteTypes); if (Name == "LLVMOMPOFFLOAD") return FindNote(LLVMOMPOFFLOADNoteTypes); + if (Name == "Android") + return FindNote(AndroidNoteTypes); if (ELFType == ELF::ET_CORE) return FindNote(CoreNoteTypes); @@ -5658,6 +5757,9 @@ template void GNUELFDumper::printNotes() { return NoteOrErr.takeError(); } } + } else if (Name == "Android") { + if (printAndroidNote(OS, Type, Descriptor)) + return Error::success(); } if (!Descriptor.empty()) { OS << " description data:"; @@ -5838,7 +5940,7 @@ template SmallVector ELFDumper::getSymbolIndexesForFunctionAddress( uint64_t SymValue, Optional FunctionSec) { SmallVector SymbolIndexes; - if (!this->AddressToIndexMap.hasValue()) { + if (!this->AddressToIndexMap) { // Populate the address to index map upon the first invocation of this // function. this->AddressToIndexMap.emplace(); @@ -5991,9 +6093,8 @@ void ELFDumper::printStackSize(const Relocation &R, return; } - uint64_t SymValue = - Resolver(R.Type, Offset, RelocSymValue, Data.getAddress(&Offset), - R.Addend.getValueOr(0)); + uint64_t SymValue = Resolver(R.Type, Offset, RelocSymValue, + Data.getAddress(&Offset), R.Addend.value_or(0)); this->printFunctionStackSize(SymValue, FunctionSec, StackSizeSec, Data, &Offset); } @@ -6368,7 +6469,7 @@ template void LLVMELFDumper::printFileHeaders() { else TypeStr = "Unknown"; } - W.printString("Type", TypeStr + " (0x" + to_hexString(E.e_type) + ")"); + W.printString("Type", TypeStr + " (0x" + utohexstr(E.e_type) + ")"); W.printEnum("Machine", E.e_machine, makeArrayRef(ElfMachineType)); W.printNumber("Version", E.e_version); @@ -6501,7 +6602,8 @@ template void LLVMELFDumper::printSectionHeaders() { int SectionIndex = -1; std::vector> FlagsList = - getSectionFlagsForTarget(this->Obj.getHeader().e_machine); + getSectionFlagsForTarget(this->Obj.getHeader().e_ident[ELF::EI_OSABI], + this->Obj.getHeader().e_machine); for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { DictScope SectionD(W, "Section"); W.printNumber("Index", ++SectionIndex); @@ -6932,8 +7034,10 @@ template void LLVMELFDumper::printCGProfile() { template void LLVMELFDumper::printBBAddrMaps() { bool IsRelocatable = this->Obj.getHeader().e_type == ELF::ET_REL; for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { - if (Sec.sh_type != SHT_LLVM_BB_ADDR_MAP) + if (Sec.sh_type != SHT_LLVM_BB_ADDR_MAP && + Sec.sh_type != SHT_LLVM_BB_ADDR_MAP_V0) { continue; + } Optional FunctionSec = None; if (IsRelocatable) FunctionSec = @@ -7024,6 +7128,17 @@ static bool printGNUNoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, return true; } +static bool printAndroidNoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, + ScopedPrinter &W) { + // Return true if we were able to pretty-print the note, false otherwise. + AndroidNoteProperties Props = getAndroidNoteProperties(NoteType, Desc); + if (Props.empty()) + return false; + for (const auto &KV : Props) + W.printString(KV.first, KV.second); + return true; +} + template static bool printLLVMOMPOFFLOADNoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, @@ -7126,6 +7241,9 @@ template void LLVMELFDumper::printNotes() { return N.takeError(); } } + } else if (Name == "Android") { + if (printAndroidNoteLLVMStyle(Type, Descriptor, W)) + return Error::success(); } if (!Descriptor.empty()) { W.printBinaryBlock("Description data", Descriptor); diff --git a/llvm/tools/llvm-readobj/MachODumper.cpp b/llvm/tools/llvm-readobj/MachODumper.cpp index 599b0355917e..4931ab575bb2 100644 --- a/llvm/tools/llvm-readobj/MachODumper.cpp +++ b/llvm/tools/llvm-readobj/MachODumper.cpp @@ -13,6 +13,7 @@ #include "ObjDumper.h" #include "StackMapPrinter.h" #include "llvm-readobj.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Object/MachO.h" @@ -39,6 +40,11 @@ public: void printNeededLibraries() override; + bool canCompareSymbols() const override { return true; } + bool compareSymbolsByName(object::SymbolRef LHS, + object::SymbolRef RHS) const override; + bool compareSymbolsByType(object::SymbolRef LHS, + object::SymbolRef RHS) const override; // MachO-specific. void printMachODataInCode() override; void printMachOVersionMin() override; @@ -51,10 +57,14 @@ private: template void printFileHeaders(const MachHeader &Header); - StringRef getSymbolName(const SymbolRef &Symbol); + StringRef getSymbolName(const SymbolRef &Symbol) const; + uint8_t getSymbolType(const SymbolRef &Symbol) const; void printSymbols() override; + void printSymbols(Optional SymComp) override; void printDynamicSymbols() override; + void printDynamicSymbols(Optional SymComp) override; + void printSymbol(const SymbolRef &Symbol, ScopedPrinter &W); void printSymbol(const SymbolRef &Symbol); void printRelocation(const RelocationRef &Reloc); @@ -602,7 +612,7 @@ void MachODumper::printRelocation(const MachOObjectFile *Obj, } } -StringRef MachODumper::getSymbolName(const SymbolRef &Symbol) { +StringRef MachODumper::getSymbolName(const SymbolRef &Symbol) const { Expected SymbolNameOrErr = Symbol.getName(); if (!SymbolNameOrErr) { reportError(SymbolNameOrErr.takeError(), Obj->getFileName()); @@ -610,19 +620,50 @@ StringRef MachODumper::getSymbolName(const SymbolRef &Symbol) { return *SymbolNameOrErr; } -void MachODumper::printSymbols() { - ListScope Group(W, "Symbols"); +uint8_t MachODumper::getSymbolType(const SymbolRef &Symbol) const { + return Obj->is64Bit() + ? Obj->getSymbol64TableEntry(Symbol.getRawDataRefImpl()).n_type + : Obj->getSymbolTableEntry(Symbol.getRawDataRefImpl()).n_type; +} + +bool MachODumper::compareSymbolsByName(SymbolRef LHS, SymbolRef RHS) const { + return getSymbolName(LHS).str().compare(getSymbolName(RHS).str()) < 0; +} + +bool MachODumper::compareSymbolsByType(SymbolRef LHS, SymbolRef RHS) const { + return getSymbolType(LHS) < getSymbolType(RHS); +} + +void MachODumper::printSymbols() { printSymbols(None); } - for (const SymbolRef &Symbol : Obj->symbols()) { - printSymbol(Symbol); +void MachODumper::printSymbols(Optional SymComp) { + ListScope Group(W, "Symbols"); + if (SymComp) { + auto SymbolRange = Obj->symbols(); + std::vector SortedSymbols(SymbolRange.begin(), + SymbolRange.end()); + llvm::stable_sort(SortedSymbols, *SymComp); + for (SymbolRef Symbol : SortedSymbols) + printSymbol(Symbol); + } else { + for (const SymbolRef &Symbol : Obj->symbols()) { + printSymbol(Symbol); + } } } void MachODumper::printDynamicSymbols() { ListScope Group(W, "DynamicSymbols"); } +void MachODumper::printDynamicSymbols(Optional SymComp) { + ListScope Group(W, "DynamicSymbols"); +} void MachODumper::printSymbol(const SymbolRef &Symbol) { + printSymbol(Symbol, W); +} + +void MachODumper::printSymbol(const SymbolRef &Symbol, ScopedPrinter &W) { StringRef SymbolName = getSymbolName(Symbol); MachOSymbol MOSymbol; diff --git a/llvm/tools/llvm-readobj/ObjDumper.h b/llvm/tools/llvm-readobj/ObjDumper.h index a09a243d381e..292efd2ae350 100644 --- a/llvm/tools/llvm-readobj/ObjDumper.h +++ b/llvm/tools/llvm-readobj/ObjDumper.h @@ -9,9 +9,14 @@ #ifndef LLVM_TOOLS_LLVM_READOBJ_OBJDUMPER_H #define LLVM_TOOLS_LLVM_READOBJ_OBJDUMPER_H +#include #include #include +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" @@ -25,7 +30,7 @@ class COFFImportFile; class ObjectFile; class XCOFFObjectFile; class ELFObjectFileBase; -} +} // namespace object namespace codeview { class GlobalTypeTableBuilder; class MergingTypeTableBuilder; @@ -33,6 +38,33 @@ class MergingTypeTableBuilder; class ScopedPrinter; +// Comparator to compare symbols. +// Usage: the caller registers predicates (i.e., how to compare the symbols) by +// calling addPredicate(). The order in which predicates are registered is also +// their priority. +class SymbolComparator { +public: + using CompPredicate = + std::function; + + // Each Obj format has a slightly different way of retrieving a symbol's info + // So we defer the predicate's impl to each format. + void addPredicate(CompPredicate Pred) { Predicates.push_back(Pred); } + + bool operator()(object::SymbolRef LHS, object::SymbolRef RHS) { + for (CompPredicate Pred : Predicates) { + if (Pred(LHS, RHS)) + return true; + if (Pred(RHS, LHS)) + return false; + } + return false; + } + +private: + SmallVector Predicates; +}; + class ObjDumper { public: ObjDumper(ScopedPrinter &Writer, StringRef ObjName); @@ -52,6 +84,17 @@ public: if (PrintDynamicSymbols) printDynamicSymbols(); } + virtual void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols, + llvm::Optional SymComp) { + if (SymComp) { + if (PrintSymbols) + printSymbols(SymComp); + if (PrintDynamicSymbols) + printDynamicSymbols(SymComp); + } else { + printSymbols(PrintSymbols, PrintDynamicSymbols); + } + } virtual void printProgramHeaders(bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) { if (PrintProgramHeaders) @@ -62,6 +105,17 @@ public: virtual void printUnwindInfo() = 0; + // Symbol comparison functions. + virtual bool canCompareSymbols() const { return false; } + virtual bool compareSymbolsByName(object::SymbolRef LHS, + object::SymbolRef RHS) const { + return true; + } + virtual bool compareSymbolsByType(object::SymbolRef LHS, + object::SymbolRef RHS) const { + return true; + } + // Only implemented for ELF at this time. virtual void printDependentLibs() {} virtual void printDynamicRelocations() { } @@ -133,7 +187,9 @@ protected: private: virtual void printSymbols() {} + virtual void printSymbols(llvm::Optional Comp) {} virtual void printDynamicSymbols() {} + virtual void printDynamicSymbols(llvm::Optional Comp) {} virtual void printProgramHeaders() {} virtual void printSectionMapping() {} diff --git a/llvm/tools/llvm-readobj/Opts.td b/llvm/tools/llvm-readobj/Opts.td index d0f273fa60c7..4687fc71245f 100644 --- a/llvm/tools/llvm-readobj/Opts.td +++ b/llvm/tools/llvm-readobj/Opts.td @@ -37,6 +37,7 @@ def section_mapping : FF<"section-mapping", "Display the section to segment mapp def section_mapping_EQ_false : FF<"section-mapping=false", "Don't display the section to segment mapping">, Flags<[HelpHidden]>; def section_relocations : FF<"section-relocations", "Display relocations for each section shown. This option has no effect for GNU style output">; def section_symbols : FF<"section-symbols", "Display symbols for each section shown. This option has no effect for GNU style output">; +defm sort_symbols : Eq<"sort-symbols", "Specify the keys to sort the symbols before displaying symtab">; def stack_sizes : FF<"stack-sizes", "Display contents of all stack sizes sections. This option has no effect for GNU style output">; def stackmap : FF<"stackmap", "Display contents of stackmap section">; defm string_dump : Eq<"string-dump", "Display the specified section(s) as a list of strings">, MetaVarName<"">; @@ -86,7 +87,7 @@ def coff_tls_directory : FF<"coff-tls-directory", "Display TLS directory">, Grou // XCOFF specific options. def grp_xcoff : OptionGroup<"kind">, HelpText<"OPTIONS (XCOFF specific)">; -def auxiliary_header : FF<"auxiliary-header" , "display the auxiliary header">, Group; +def auxiliary_header : FF<"auxiliary-header" , "Display the auxiliary header">, Group; def help : FF<"help", "Display this help">; def version : FF<"version", "Display the version">; diff --git a/llvm/tools/llvm-readobj/WasmDumper.cpp b/llvm/tools/llvm-readobj/WasmDumper.cpp index b4d726016437..cf80a2d13d2d 100644 --- a/llvm/tools/llvm-readobj/WasmDumper.cpp +++ b/llvm/tools/llvm-readobj/WasmDumper.cpp @@ -179,13 +179,15 @@ void WasmDumper::printSectionHeaders() { if (!Seg.Name.empty()) W.printString("Name", Seg.Name); W.printNumber("Size", static_cast(Seg.Content.size())); - if (Seg.Offset.Opcode == wasm::WASM_OPCODE_I32_CONST) - W.printNumber("Offset", Seg.Offset.Value.Int32); - else if (Seg.Offset.Opcode == wasm::WASM_OPCODE_I64_CONST) - W.printNumber("Offset", Seg.Offset.Value.Int64); - else if (Seg.Offset.Opcode == wasm::WASM_OPCODE_GLOBAL_GET) { + if (Seg.Offset.Extended) + llvm_unreachable("extended const exprs not supported"); + else if (Seg.Offset.Inst.Opcode == wasm::WASM_OPCODE_I32_CONST) + W.printNumber("Offset", Seg.Offset.Inst.Value.Int32); + else if (Seg.Offset.Inst.Opcode == wasm::WASM_OPCODE_I64_CONST) + W.printNumber("Offset", Seg.Offset.Inst.Value.Int64); + else if (Seg.Offset.Inst.Opcode == wasm::WASM_OPCODE_GLOBAL_GET) { ListScope Group(W, "Offset"); - W.printNumber("Global", Seg.Offset.Value.Global); + W.printNumber("Global", Seg.Offset.Inst.Value.Global); } else llvm_unreachable("unknown init expr opcode"); } diff --git a/llvm/tools/llvm-readobj/XCOFFDumper.cpp b/llvm/tools/llvm-readobj/XCOFFDumper.cpp index 6e778d558d4f..ccae66f20127 100644 --- a/llvm/tools/llvm-readobj/XCOFFDumper.cpp +++ b/llvm/tools/llvm-readobj/XCOFFDumper.cpp @@ -17,7 +17,6 @@ #include "llvm/Support/ScopedPrinter.h" #include -#include using namespace llvm; using namespace object; @@ -41,6 +40,8 @@ public: void printNeededLibraries() override; void printStringTable() override; + ScopedPrinter &getScopedPrinter() const { return W; } + private: template void printSectionHeaders(ArrayRef Sections); template void printGenericSectionHeader(T &Sec) const; @@ -113,6 +114,8 @@ void XCOFFDumper::printFileHeaders() { } void XCOFFDumper::printAuxiliaryHeader() { + DictScope DS(W, "AuxiliaryHeader"); + if (Obj.is64Bit()) printAuxiliaryHeader(Obj.auxiliaryHeader64()); else @@ -736,6 +739,46 @@ void XCOFFDumper::printGenericSectionHeader(T &Sec) const { W.printNumber("NumberOfLineNumbers", Sec.NumberOfLineNumbers); } +enum PrintStyle { Hex, Number }; +template +static void printAuxMemberHelper(PrintStyle Style, const char *MemberName, + const T &Member, const V *AuxHeader, + uint16_t AuxSize, uint16_t &PartialFieldOffset, + const char *&PartialFieldName, + ScopedPrinter &W) { + ptrdiff_t Offset = reinterpret_cast(&Member) - + reinterpret_cast(AuxHeader); + if (Offset + sizeof(Member) <= AuxSize) + Style == Hex ? W.printHex(MemberName, Member) + : W.printNumber(MemberName, Member); + else if (Offset < AuxSize) { + PartialFieldOffset = Offset; + PartialFieldName = MemberName; + } +} + +template +void checkAndPrintAuxHeaderParseError(const char *PartialFieldName, + uint16_t PartialFieldOffset, + uint16_t AuxSize, T &AuxHeader, + XCOFFDumper *Dumper) { + if (PartialFieldOffset < AuxSize) { + Dumper->reportUniqueWarning(Twine("only partial field for ") + + PartialFieldName + " at offset (" + + Twine(PartialFieldOffset) + ")"); + Dumper->getScopedPrinter().printBinary( + "Raw data", "", + ArrayRef(reinterpret_cast(&AuxHeader) + + PartialFieldOffset, + AuxSize - PartialFieldOffset)); + } else if (sizeof(AuxHeader) < AuxSize) + Dumper->getScopedPrinter().printBinary( + "Extra raw data", "", + ArrayRef(reinterpret_cast(&AuxHeader) + + sizeof(AuxHeader), + AuxSize - sizeof(AuxHeader))); +} + void XCOFFDumper::printAuxiliaryHeader( const XCOFFAuxiliaryHeader32 *AuxHeader) { if (AuxHeader == nullptr) @@ -744,44 +787,40 @@ void XCOFFDumper::printAuxiliaryHeader( uint16_t PartialFieldOffset = AuxSize; const char *PartialFieldName = nullptr; - DictScope DS(W, "AuxiliaryHeader"); - -#define PrintAuxMember32(H, S, T) \ - if (offsetof(XCOFFAuxiliaryHeader32, T) + \ - sizeof(XCOFFAuxiliaryHeader32::T) <= \ - AuxSize) \ - W.print##H(S, AuxHeader->T); \ - else if (offsetof(XCOFFAuxiliaryHeader32, T) < AuxSize) { \ - PartialFieldOffset = offsetof(XCOFFAuxiliaryHeader32, T); \ - PartialFieldName = S; \ - } + auto PrintAuxMember = [&](PrintStyle Style, const char *MemberName, + auto &Member) { + printAuxMemberHelper(Style, MemberName, Member, AuxHeader, AuxSize, + PartialFieldOffset, PartialFieldName, W); + }; - PrintAuxMember32(Hex, "Magic", AuxMagic); - PrintAuxMember32(Hex, "Version", Version); - PrintAuxMember32(Hex, "Size of .text section", TextSize); - PrintAuxMember32(Hex, "Size of .data section", InitDataSize); - PrintAuxMember32(Hex, "Size of .bss section", BssDataSize); - PrintAuxMember32(Hex, "Entry point address", EntryPointAddr); - PrintAuxMember32(Hex, ".text section start address", TextStartAddr); - PrintAuxMember32(Hex, ".data section start address", DataStartAddr); - PrintAuxMember32(Hex, "TOC anchor address", TOCAnchorAddr); - PrintAuxMember32(Number, "Section number of entryPoint", SecNumOfEntryPoint); - PrintAuxMember32(Number, "Section number of .text", SecNumOfText); - PrintAuxMember32(Number, "Section number of .data", SecNumOfData); - PrintAuxMember32(Number, "Section number of TOC", SecNumOfTOC); - PrintAuxMember32(Number, "Section number of loader data", SecNumOfLoader); - PrintAuxMember32(Number, "Section number of .bss", SecNumOfBSS); - PrintAuxMember32(Hex, "Maxium alignment of .text", MaxAlignOfText); - PrintAuxMember32(Hex, "Maxium alignment of .data", MaxAlignOfData); - PrintAuxMember32(Hex, "Module type", ModuleType); - PrintAuxMember32(Hex, "CPU type of objects", CpuFlag); - PrintAuxMember32(Hex, "(Reserved)", CpuType); - PrintAuxMember32(Hex, "Maximum stack size", MaxStackSize); - PrintAuxMember32(Hex, "Maximum data size", MaxDataSize); - PrintAuxMember32(Hex, "Reserved for debugger", ReservedForDebugger); - PrintAuxMember32(Hex, "Text page size", TextPageSize); - PrintAuxMember32(Hex, "Data page size", DataPageSize); - PrintAuxMember32(Hex, "Stack page size", StackPageSize); + PrintAuxMember(Hex, "Magic", AuxHeader->AuxMagic); + PrintAuxMember(Hex, "Version", AuxHeader->Version); + PrintAuxMember(Hex, "Size of .text section", AuxHeader->TextSize); + PrintAuxMember(Hex, "Size of .data section", AuxHeader->InitDataSize); + PrintAuxMember(Hex, "Size of .bss section", AuxHeader->BssDataSize); + PrintAuxMember(Hex, "Entry point address", AuxHeader->EntryPointAddr); + PrintAuxMember(Hex, ".text section start address", AuxHeader->TextStartAddr); + PrintAuxMember(Hex, ".data section start address", AuxHeader->DataStartAddr); + PrintAuxMember(Hex, "TOC anchor address", AuxHeader->TOCAnchorAddr); + PrintAuxMember(Number, "Section number of entryPoint", + AuxHeader->SecNumOfEntryPoint); + PrintAuxMember(Number, "Section number of .text", AuxHeader->SecNumOfText); + PrintAuxMember(Number, "Section number of .data", AuxHeader->SecNumOfData); + PrintAuxMember(Number, "Section number of TOC", AuxHeader->SecNumOfTOC); + PrintAuxMember(Number, "Section number of loader data", + AuxHeader->SecNumOfLoader); + PrintAuxMember(Number, "Section number of .bss", AuxHeader->SecNumOfBSS); + PrintAuxMember(Hex, "Maxium alignment of .text", AuxHeader->MaxAlignOfText); + PrintAuxMember(Hex, "Maxium alignment of .data", AuxHeader->MaxAlignOfData); + PrintAuxMember(Hex, "Module type", AuxHeader->ModuleType); + PrintAuxMember(Hex, "CPU type of objects", AuxHeader->CpuFlag); + PrintAuxMember(Hex, "(Reserved)", AuxHeader->CpuType); + PrintAuxMember(Hex, "Maximum stack size", AuxHeader->MaxStackSize); + PrintAuxMember(Hex, "Maximum data size", AuxHeader->MaxDataSize); + PrintAuxMember(Hex, "Reserved for debugger", AuxHeader->ReservedForDebugger); + PrintAuxMember(Hex, "Text page size", AuxHeader->TextPageSize); + PrintAuxMember(Hex, "Data page size", AuxHeader->DataPageSize); + PrintAuxMember(Hex, "Stack page size", AuxHeader->StackPageSize); if (offsetof(XCOFFAuxiliaryHeader32, FlagAndTDataAlignment) + sizeof(XCOFFAuxiliaryHeader32::FlagAndTDataAlignment) <= AuxSize) { @@ -790,35 +829,11 @@ void XCOFFDumper::printAuxiliaryHeader( AuxHeader->getTDataAlignment()); } - PrintAuxMember32(Number, "Section number for .tdata", SecNumOfTData); - PrintAuxMember32(Number, "Section number for .tbss", SecNumOfTBSS); + PrintAuxMember(Number, "Section number for .tdata", AuxHeader->SecNumOfTData); + PrintAuxMember(Number, "Section number for .tbss", AuxHeader->SecNumOfTBSS); - // Deal with error. - if (PartialFieldOffset < AuxSize) { - std::string ErrInfo; - llvm::raw_string_ostream StringOS(ErrInfo); - StringOS << "Only partial field for " << PartialFieldName << " at offset (" - << PartialFieldOffset << ")."; - StringOS.flush(); - reportWarning( - make_error(ErrInfo, object_error::parse_failed), - "-"); - W.printBinary( - "Raw data", "", - ArrayRef((const uint8_t *)(AuxHeader) + PartialFieldOffset, - AuxSize - PartialFieldOffset)); - } else if (sizeof(XCOFFAuxiliaryHeader32) < AuxSize) { - reportWarning(make_error( - "There are extra data beyond auxiliary header", - object_error::parse_failed), - "-"); - W.printBinary("Extra raw data", "", - ArrayRef((const uint8_t *)(AuxHeader) + - sizeof(XCOFFAuxiliaryHeader32), - AuxSize - sizeof(XCOFFAuxiliaryHeader32))); - } - -#undef PrintAuxMember32 + checkAndPrintAuxHeaderParseError(PartialFieldName, PartialFieldOffset, + AuxSize, *AuxHeader, this); } void XCOFFDumper::printAuxiliaryHeader( @@ -829,38 +844,34 @@ void XCOFFDumper::printAuxiliaryHeader( uint16_t PartialFieldOffset = AuxSize; const char *PartialFieldName = nullptr; - DictScope DS(W, "AuxiliaryHeader"); - -#define PrintAuxMember64(H, S, T) \ - if (offsetof(XCOFFAuxiliaryHeader64, T) + \ - sizeof(XCOFFAuxiliaryHeader64::T) <= \ - AuxSize) \ - W.print##H(S, AuxHeader->T); \ - else if (offsetof(XCOFFAuxiliaryHeader64, T) < AuxSize) { \ - PartialFieldOffset = offsetof(XCOFFAuxiliaryHeader64, T); \ - PartialFieldName = S; \ - } + auto PrintAuxMember = [&](PrintStyle Style, const char *MemberName, + auto &Member) { + printAuxMemberHelper(Style, MemberName, Member, AuxHeader, AuxSize, + PartialFieldOffset, PartialFieldName, W); + }; - PrintAuxMember64(Hex, "Magic", AuxMagic); - PrintAuxMember64(Hex, "Version", Version); - PrintAuxMember64(Hex, "Reserved for debugger", ReservedForDebugger); - PrintAuxMember64(Hex, ".text section start address", TextStartAddr); - PrintAuxMember64(Hex, ".data section start address", DataStartAddr); - PrintAuxMember64(Hex, "TOC anchor address", TOCAnchorAddr); - PrintAuxMember64(Number, "Section number of entryPoint", SecNumOfEntryPoint); - PrintAuxMember64(Number, "Section number of .text", SecNumOfText); - PrintAuxMember64(Number, "Section number of .data", SecNumOfData); - PrintAuxMember64(Number, "Section number of TOC", SecNumOfTOC); - PrintAuxMember64(Number, "Section number of loader data", SecNumOfLoader); - PrintAuxMember64(Number, "Section number of .bss", SecNumOfBSS); - PrintAuxMember64(Hex, "Maxium alignment of .text", MaxAlignOfText); - PrintAuxMember64(Hex, "Maxium alignment of .data", MaxAlignOfData); - PrintAuxMember64(Hex, "Module type", ModuleType); - PrintAuxMember64(Hex, "CPU type of objects", CpuFlag); - PrintAuxMember64(Hex, "(Reserved)", CpuType); - PrintAuxMember64(Hex, "Text page size", TextPageSize); - PrintAuxMember64(Hex, "Data page size", DataPageSize); - PrintAuxMember64(Hex, "Stack page size", StackPageSize); + PrintAuxMember(Hex, "Magic", AuxHeader->AuxMagic); + PrintAuxMember(Hex, "Version", AuxHeader->Version); + PrintAuxMember(Hex, "Reserved for debugger", AuxHeader->ReservedForDebugger); + PrintAuxMember(Hex, ".text section start address", AuxHeader->TextStartAddr); + PrintAuxMember(Hex, ".data section start address", AuxHeader->DataStartAddr); + PrintAuxMember(Hex, "TOC anchor address", AuxHeader->TOCAnchorAddr); + PrintAuxMember(Number, "Section number of entryPoint", + AuxHeader->SecNumOfEntryPoint); + PrintAuxMember(Number, "Section number of .text", AuxHeader->SecNumOfText); + PrintAuxMember(Number, "Section number of .data", AuxHeader->SecNumOfData); + PrintAuxMember(Number, "Section number of TOC", AuxHeader->SecNumOfTOC); + PrintAuxMember(Number, "Section number of loader data", + AuxHeader->SecNumOfLoader); + PrintAuxMember(Number, "Section number of .bss", AuxHeader->SecNumOfBSS); + PrintAuxMember(Hex, "Maxium alignment of .text", AuxHeader->MaxAlignOfText); + PrintAuxMember(Hex, "Maxium alignment of .data", AuxHeader->MaxAlignOfData); + PrintAuxMember(Hex, "Module type", AuxHeader->ModuleType); + PrintAuxMember(Hex, "CPU type of objects", AuxHeader->CpuFlag); + PrintAuxMember(Hex, "(Reserved)", AuxHeader->CpuType); + PrintAuxMember(Hex, "Text page size", AuxHeader->TextPageSize); + PrintAuxMember(Hex, "Data page size", AuxHeader->DataPageSize); + PrintAuxMember(Hex, "Stack page size", AuxHeader->StackPageSize); if (offsetof(XCOFFAuxiliaryHeader64, FlagAndTDataAlignment) + sizeof(XCOFFAuxiliaryHeader64::FlagAndTDataAlignment) <= AuxSize) { @@ -868,42 +879,18 @@ void XCOFFDumper::printAuxiliaryHeader( W.printHex("Alignment of thread-local storage", AuxHeader->getTDataAlignment()); } - PrintAuxMember64(Hex, "Size of .text section", TextSize); - PrintAuxMember64(Hex, "Size of .data section", InitDataSize); - PrintAuxMember64(Hex, "Size of .bss section", BssDataSize); - PrintAuxMember64(Hex, "Entry point address", EntryPointAddr); - PrintAuxMember64(Hex, "Maximum stack size", MaxStackSize); - PrintAuxMember64(Hex, "Maximum data size", MaxDataSize); - PrintAuxMember64(Number, "Section number for .tdata", SecNumOfTData); - PrintAuxMember64(Number, "Section number for .tbss", SecNumOfTBSS); - PrintAuxMember64(Hex, "Additional flags 64-bit XCOFF", XCOFF64Flag); - - if (PartialFieldOffset < AuxSize) { - std::string ErrInfo; - llvm::raw_string_ostream StringOS(ErrInfo); - StringOS << "Only partial field for " << PartialFieldName << " at offset (" - << PartialFieldOffset << ")."; - StringOS.flush(); - reportWarning( - make_error(ErrInfo, object_error::parse_failed), - "-"); - ; - W.printBinary( - "Raw data", "", - ArrayRef((const uint8_t *)(AuxHeader) + PartialFieldOffset, - AuxSize - PartialFieldOffset)); - } else if (sizeof(XCOFFAuxiliaryHeader64) < AuxSize) { - reportWarning(make_error( - "There are extra data beyond auxiliary header", - object_error::parse_failed), - "-"); - W.printBinary("Extra raw data", "", - ArrayRef((const uint8_t *)(AuxHeader) + - sizeof(XCOFFAuxiliaryHeader64), - AuxSize - sizeof(XCOFFAuxiliaryHeader64))); - } - -#undef PrintAuxMember64 + PrintAuxMember(Hex, "Size of .text section", AuxHeader->TextSize); + PrintAuxMember(Hex, "Size of .data section", AuxHeader->InitDataSize); + PrintAuxMember(Hex, "Size of .bss section", AuxHeader->BssDataSize); + PrintAuxMember(Hex, "Entry point address", AuxHeader->EntryPointAddr); + PrintAuxMember(Hex, "Maximum stack size", AuxHeader->MaxStackSize); + PrintAuxMember(Hex, "Maximum data size", AuxHeader->MaxDataSize); + PrintAuxMember(Number, "Section number for .tdata", AuxHeader->SecNumOfTData); + PrintAuxMember(Number, "Section number for .tbss", AuxHeader->SecNumOfTBSS); + PrintAuxMember(Hex, "Additional flags 64-bit XCOFF", AuxHeader->XCOFF64Flag); + + checkAndPrintAuxHeaderParseError(PartialFieldName, PartialFieldOffset, + AuxSize, *AuxHeader, this); } template diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp index 543b0de82cdf..e1ebbeb41f28 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -21,6 +21,7 @@ #include "llvm-readobj.h" #include "ObjDumper.h" #include "WindowsResourceDumper.h" +#include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" #include "llvm/MC/TargetRegistry.h" @@ -83,6 +84,14 @@ public: }; enum OutputFormatTy { bsd, sysv, posix, darwin, just_symbols }; + +enum SortSymbolKeyTy { + NAME = 0, + TYPE = 1, + UNKNOWN = 100, + // TODO: add ADDRESS, SIZE as needed. +}; + } // namespace namespace opts { @@ -113,6 +122,7 @@ static bool StringTable; static bool Symbols; static bool UnwindInfo; static cl::boolOrDefault SectionMapping; +static SmallVector SortKeys; // ELF specific options. static bool DynamicTable; @@ -253,6 +263,19 @@ static void parseOptions(const opt::InputArgList &Args) { opts::ProgramHeaders = Args.hasArg(OPT_program_headers); opts::RawRelr = Args.hasArg(OPT_raw_relr); opts::SectionGroups = Args.hasArg(OPT_section_groups); + if (Arg *A = Args.getLastArg(OPT_sort_symbols_EQ)) { + std::string SortKeysString = A->getValue(); + for (StringRef KeyStr : llvm::split(A->getValue(), ",")) { + SortSymbolKeyTy KeyType = StringSwitch(KeyStr) + .Case("name", SortSymbolKeyTy::NAME) + .Case("type", SortSymbolKeyTy::TYPE) + .Default(SortSymbolKeyTy::UNKNOWN); + if (KeyType == SortSymbolKeyTy::UNKNOWN) + error("--sort-symbols value should be 'name' or 'type', but was '" + + Twine(KeyStr) + "'"); + opts::SortKeys.push_back(KeyType); + } + } opts::VersionInfo = Args.hasArg(OPT_version_info); // Mach-O specific options. @@ -334,11 +357,39 @@ static void dumpObject(ObjectFile &Obj, ScopedPrinter &Writer, toString(std::move(ContentErr)); ObjDumper *Dumper; + Optional SymComp; Expected> DumperOrErr = createDumper(Obj, Writer); if (!DumperOrErr) reportError(DumperOrErr.takeError(), FileStr); Dumper = (*DumperOrErr).get(); + if (!opts::SortKeys.empty()) { + if (Dumper->canCompareSymbols()) { + SymComp = SymbolComparator(); + for (SortSymbolKeyTy Key : opts::SortKeys) { + switch (Key) { + case NAME: + SymComp->addPredicate([Dumper](SymbolRef LHS, SymbolRef RHS) { + return Dumper->compareSymbolsByName(LHS, RHS); + }); + break; + case TYPE: + SymComp->addPredicate([Dumper](SymbolRef LHS, SymbolRef RHS) { + return Dumper->compareSymbolsByType(LHS, RHS); + }); + break; + case UNKNOWN: + llvm_unreachable("Unsupported sort key"); + } + } + + } else { + reportWarning(createStringError( + errc::invalid_argument, + "--sort-symbols is not supported yet for this format"), + FileStr); + } + } Dumper->printFileSummary(FileStr, Obj, opts::InputFilenames, A); if (opts::FileHeaders) @@ -374,7 +425,7 @@ static void dumpObject(ObjectFile &Obj, ScopedPrinter &Writer, if (opts::UnwindInfo) Dumper->printUnwindInfo(); if (opts::Symbols || opts::DynamicSymbols) - Dumper->printSymbols(opts::Symbols, opts::DynamicSymbols); + Dumper->printSymbols(opts::Symbols, opts::DynamicSymbols, SymComp); if (!opts::StringDump.empty()) Dumper->printSectionsAsString(Obj, opts::StringDump); if (!opts::HexDump.empty()) diff --git a/llvm/tools/llvm-readobj/llvm-readobj.h b/llvm/tools/llvm-readobj/llvm-readobj.h index 0ea695d1673d..989cd0aba6c0 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.h +++ b/llvm/tools/llvm-readobj/llvm-readobj.h @@ -9,10 +9,13 @@ #ifndef LLVM_TOOLS_LLVM_READOBJ_LLVM_READOBJ_H #define LLVM_TOOLS_LLVM_READOBJ_LLVM_READOBJ_H +#include "ObjDumper.h" + +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/ErrorOr.h" #include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" #include namespace llvm { diff --git a/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp b/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp index 21339a3f8f3d..df82fb04e8e6 100644 --- a/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp +++ b/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp @@ -46,7 +46,7 @@ using namespace llvm::object; static cl::OptionCategory RTDyldCategory("RTDyld Options"); -static cl::list InputFileList(cl::Positional, cl::ZeroOrMore, +static cl::list InputFileList(cl::Positional, cl::desc(""), cl::cat(RTDyldCategory)); @@ -79,11 +79,11 @@ static cl::opt cl::init("_main"), cl::cat(RTDyldCategory)); static cl::list Dylibs("dylib", cl::desc("Add library."), - cl::ZeroOrMore, cl::cat(RTDyldCategory)); + cl::cat(RTDyldCategory)); static cl::list InputArgv("args", cl::Positional, cl::desc("..."), - cl::ZeroOrMore, cl::PositionalEatsArgs, + cl::PositionalEatsArgs, cl::cat(RTDyldCategory)); static cl::opt @@ -98,7 +98,7 @@ static cl::opt static cl::list CheckFiles("check", cl::desc("File containing RuntimeDyld verifier checks."), - cl::ZeroOrMore, cl::cat(RTDyldCategory)); + cl::cat(RTDyldCategory)); static cl::opt PreallocMemory("preallocate", @@ -127,14 +127,13 @@ static cl::list SpecificSectionMappings("map-section", cl::desc("For -verify only: Map a section to a " "specific address."), - cl::ZeroOrMore, cl::Hidden, - cl::cat(RTDyldCategory)); + cl::Hidden, cl::cat(RTDyldCategory)); static cl::list DummySymbolMappings( "dummy-extern", cl::desc("For -verify only: Inject a symbol into the extern " "symbol table."), - cl::ZeroOrMore, cl::Hidden, cl::cat(RTDyldCategory)); + cl::Hidden, cl::cat(RTDyldCategory)); static cl::opt PrintAllocationRequests( "print-alloc-requests", @@ -286,7 +285,7 @@ private: uintptr_t SlabSize = 0; uintptr_t CurrentSlabOffset = 0; SectionIDMap *SecIDMap = nullptr; -#if defined(__x86_64__) && defined(__ELF__) +#if defined(__x86_64__) && defined(__ELF__) && defined(__linux__) unsigned UsedTLSStorage = 0; #endif }; @@ -350,7 +349,7 @@ uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size, // In case the execution needs TLS storage, we define a very small TLS memory // area here that will be used in allocateTLSSection(). -#if defined(__x86_64__) && defined(__ELF__) +#if defined(__x86_64__) && defined(__ELF__) && defined(__linux__) extern "C" { alignas(16) __attribute__((visibility("hidden"), tls_model("initial-exec"), used)) thread_local char LLVMRTDyldTLSSpace[16]; @@ -361,7 +360,7 @@ TrivialMemoryManager::TLSSection TrivialMemoryManager::allocateTLSSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, StringRef SectionName) { -#if defined(__x86_64__) && defined(__ELF__) +#if defined(__x86_64__) && defined(__ELF__) && defined(__linux__) if (Size + UsedTLSStorage > sizeof(LLVMRTDyldTLSSpace)) { return {}; } diff --git a/llvm/tools/llvm-sim/llvm-sim.cpp b/llvm/tools/llvm-sim/llvm-sim.cpp index 26e370ff30f1..6879d73c4434 100644 --- a/llvm/tools/llvm-sim/llvm-sim.cpp +++ b/llvm/tools/llvm-sim/llvm-sim.cpp @@ -85,10 +85,9 @@ exportToFile(const StringRef FilePath, Optional End = getPositionInModule((*C.back()).Inst, LLVMInstNum); - assert(Start.hasValue() && + assert(Start && "Could not find instruction number for first instruction"); - assert(End.hasValue() && - "Could not find instruction number for last instruction"); + assert(End && "Could not find instruction number for last instruction"); J.object([&] { J.attribute("start", Start.getValue()); diff --git a/llvm/tools/llvm-stress/llvm-stress.cpp b/llvm/tools/llvm-stress/llvm-stress.cpp index 9135d60fdf92..e15d1d6048c7 100644 --- a/llvm/tools/llvm-stress/llvm-stress.cpp +++ b/llvm/tools/llvm-stress/llvm-stress.cpp @@ -69,41 +69,10 @@ static cl::opt OutputFilename("o", cl::value_desc("filename"), cl::cat(StressCategory)); -static LLVMContext Context; - -namespace cl { - -template <> class parser final : public basic_parser { -public: - parser(Option &O) : basic_parser(O) {} - - // Parse options as IR types. Return true on error. - bool parse(Option &O, StringRef, StringRef Arg, Type *&Value) { - if (Arg == "half") Value = Type::getHalfTy(Context); - else if (Arg == "fp128") Value = Type::getFP128Ty(Context); - else if (Arg == "x86_fp80") Value = Type::getX86_FP80Ty(Context); - else if (Arg == "ppc_fp128") Value = Type::getPPC_FP128Ty(Context); - else if (Arg == "x86_mmx") Value = Type::getX86_MMXTy(Context); - else if (Arg.startswith("i")) { - unsigned N = 0; - Arg.drop_front().getAsInteger(10, N); - if (N > 0) - Value = Type::getIntNTy(Context, N); - } - - if (!Value) - return O.error("Invalid IR scalar type: '" + Arg + "'!"); - return false; - } - - StringRef getValueName() const override { return "IR scalar type"; } -}; - -} // end namespace cl - -static cl::list AdditionalScalarTypes("types", cl::CommaSeparated, - cl::desc("Additional IR scalar types " - "(always includes i1, i8, i16, i32, i64, float and double)")); +static cl::list AdditionalScalarTypes( + "types", cl::CommaSeparated, + cl::desc("Additional IR scalar types " + "(always includes i1, i8, i16, i32, i64, float and double)")); namespace { @@ -185,7 +154,38 @@ struct Modifier { public: /// C'tor Modifier(BasicBlock *Block, PieceTable *PT, Random *R) - : BB(Block), PT(PT), Ran(R), Context(BB->getContext()) {} + : BB(Block), PT(PT), Ran(R), Context(BB->getContext()) { + ScalarTypes.assign({Type::getInt1Ty(Context), Type::getInt8Ty(Context), + Type::getInt16Ty(Context), Type::getInt32Ty(Context), + Type::getInt64Ty(Context), Type::getFloatTy(Context), + Type::getDoubleTy(Context)}); + + for (auto &Arg : AdditionalScalarTypes) { + Type *Ty = nullptr; + if (Arg == "half") + Ty = Type::getHalfTy(Context); + else if (Arg == "fp128") + Ty = Type::getFP128Ty(Context); + else if (Arg == "x86_fp80") + Ty = Type::getX86_FP80Ty(Context); + else if (Arg == "ppc_fp128") + Ty = Type::getPPC_FP128Ty(Context); + else if (Arg == "x86_mmx") + Ty = Type::getX86_MMXTy(Context); + else if (Arg.startswith("i")) { + unsigned N = 0; + Arg.drop_front().getAsInteger(10, N); + if (N > 0) + Ty = Type::getIntNTy(Context, N); + } + if (!Ty) { + errs() << "Invalid IR scalar type: '" << Arg << "'!\n"; + exit(1); + } + + ScalarTypes.push_back(Ty); + } + } /// virtual D'tor to silence warnings. virtual ~Modifier() = default; @@ -310,20 +310,6 @@ protected: /// Pick a random scalar type. Type *pickScalarType() { - static std::vector ScalarTypes; - if (ScalarTypes.empty()) { - ScalarTypes.assign({ - Type::getInt1Ty(Context), - Type::getInt8Ty(Context), - Type::getInt16Ty(Context), - Type::getInt32Ty(Context), - Type::getInt64Ty(Context), - Type::getFloatTy(Context), - Type::getDoubleTy(Context) - }); - llvm::append_range(ScalarTypes, AdditionalScalarTypes); - } - return ScalarTypes[getRandom() % ScalarTypes.size()]; } @@ -338,6 +324,8 @@ protected: /// Context LLVMContext &Context; + + std::vector ScalarTypes; }; struct LoadModifier: public Modifier { @@ -347,8 +335,10 @@ struct LoadModifier: public Modifier { void Act() override { // Try to use predefined pointers. If non-exist, use undef pointer value; Value *Ptr = getRandomPointerValue(); - Value *V = new LoadInst(Ptr->getType()->getPointerElementType(), Ptr, "L", - BB->getTerminator()); + Type *Ty = Ptr->getType()->isOpaquePointerTy() + ? pickType() + : Ptr->getType()->getNonOpaquePointerElementType(); + Value *V = new LoadInst(Ty, Ptr, "L", BB->getTerminator()); PT->push_back(V); } }; @@ -360,14 +350,16 @@ struct StoreModifier: public Modifier { void Act() override { // Try to use predefined pointers. If non-exist, use undef pointer value; Value *Ptr = getRandomPointerValue(); - Value *Val = getRandomValue(Ptr->getType()->getPointerElementType()); - Type *ValTy = Val->getType(); + Type *ValTy = Ptr->getType()->isOpaquePointerTy() + ? pickType() + : Ptr->getType()->getNonOpaquePointerElementType(); // Do not store vectors of i1s because they are unsupported // by the codegen. if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() == 1) return; + Value *Val = getRandomValue(ValTy); new StoreInst(Val, Ptr, BB->getTerminator()); } }; @@ -745,6 +737,7 @@ int main(int argc, char **argv) { cl::HideUnrelatedOptions({&StressCategory, &getColorCategory()}); cl::ParseCommandLineOptions(argc, argv, "llvm codegen stress-tester\n"); + LLVMContext Context; auto M = std::make_unique("/tmp/autogen.bc", Context); Function *F = GenEmptyFunction(M.get()); diff --git a/llvm/tools/llvm-strings/llvm-strings.cpp b/llvm/tools/llvm-strings/llvm-strings.cpp index 438eed33d283..71d1321ee0ba 100644 --- a/llvm/tools/llvm-strings/llvm-strings.cpp +++ b/llvm/tools/llvm-strings/llvm-strings.cpp @@ -64,8 +64,7 @@ public: static StringRef ToolName; static cl::list InputFileNames(cl::Positional, - cl::desc(""), - cl::ZeroOrMore); + cl::desc("")); static int MinLength = 4; static bool PrintFileName; diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td index 6026e24d6ffa..6742e086d6ff 100644 --- a/llvm/tools/llvm-symbolizer/Opts.td +++ b/llvm/tools/llvm-symbolizer/Opts.td @@ -21,11 +21,17 @@ defm adjust_vma : Eq<"adjust-vma", "Add specified offset to object file addresses">, MetaVarName<"">; def basenames : Flag<["--"], "basenames">, HelpText<"Strip directory names from paths">; +defm build_id : Eq<"build-id", "Build ID used to look up the object file">; +defm cache_size : Eq<"cache-size", "Max size in bytes of the in-memory binary cache.">; +def color : F<"color", "Use color when symbolizing log markup.">; +def color_EQ : Joined<["--"], "color=">, HelpText<"Whether to use color when symbolizing log markup: always, auto, never">, Values<"always,auto,never">; defm debug_file_directory : Eq<"debug-file-directory", "Path to directory where to look for debug files">, MetaVarName<"">; +defm debuginfod : B<"debuginfod", "Use debuginfod to find debug binaries", "Don't use debuginfod to find debug binaries">; defm default_arch : Eq<"default-arch", "Default architecture (for multi-arch objects)">, Group; defm demangle : B<"demangle", "Demangle function names", "Don't demangle function names">; +def filter_markup : Flag<["--"], "filter-markup">, HelpText<"Filter symbolizer markup from stdin.">; def functions : F<"functions", "Print function name for a given address">; def functions_EQ : Joined<["--"], "functions=">, HelpText<"Print function name for a given address">, Values<"none,short,linkage">; def help : F<"help", "Display this help">; diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index 66a2e703129b..b782c7a1720a 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -15,10 +15,16 @@ //===----------------------------------------------------------------------===// #include "Opts.inc" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" #include "llvm/DebugInfo/Symbolize/DIPrinter.h" +#include "llvm/DebugInfo/Symbolize/Markup.h" +#include "llvm/DebugInfo/Symbolize/MarkupFilter.h" +#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/Debuginfod/DIFetcher.h" +#include "llvm/Debuginfod/Debuginfod.h" #include "llvm/Debuginfod/HTTPClient.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" @@ -102,9 +108,31 @@ enum class Command { Frame, }; +static void enableDebuginfod(LLVMSymbolizer &Symbolizer) { + static bool IsEnabled = false; + if (IsEnabled) + return; + IsEnabled = true; + // Look up symbols using the debuginfod client. + Symbolizer.addDIFetcher(std::make_unique()); + // The HTTPClient must be initialized for use by the debuginfod client. + HTTPClient::initialize(); +} + +static SmallVector parseBuildID(StringRef Str) { + std::string Bytes; + if (!tryGetFromHex(Str, Bytes)) + return {}; + ArrayRef BuildID(reinterpret_cast(Bytes.data()), + Bytes.size()); + return SmallVector(BuildID.begin(), BuildID.end()); +} + static bool parseCommand(StringRef BinaryName, bool IsAddr2Line, StringRef InputString, Command &Cmd, - std::string &ModuleName, uint64_t &ModuleOffset) { + std::string &ModuleName, + SmallVectorImpl &BuildID, + uint64_t &ModuleOffset) { const char kDelimiters[] = " \n\r"; ModuleName = ""; if (InputString.consume_front("CODE ")) { @@ -117,9 +145,31 @@ static bool parseCommand(StringRef BinaryName, bool IsAddr2Line, // If no cmd, assume it's CODE. Cmd = Command::Code; } - const char *Pos = InputString.data(); + + const char *Pos; // Skip delimiters and parse input filename (if needed). - if (BinaryName.empty()) { + if (BinaryName.empty() && BuildID.empty()) { + bool HasFilePrefix = false; + bool HasBuildIDPrefix = false; + while (true) { + if (InputString.consume_front("FILE:")) { + if (HasFilePrefix) + return false; + HasFilePrefix = true; + continue; + } + if (InputString.consume_front("BUILDID:")) { + if (HasBuildIDPrefix) + return false; + HasBuildIDPrefix = true; + continue; + } + break; + } + if (HasFilePrefix && HasBuildIDPrefix) + return false; + + Pos = InputString.data(); Pos += strspn(Pos, kDelimiters); if (*Pos == '"' || *Pos == '\'') { char Quote = *Pos; @@ -134,7 +184,14 @@ static bool parseCommand(StringRef BinaryName, bool IsAddr2Line, ModuleName = std::string(Pos, NameLength); Pos += NameLength; } + if (HasBuildIDPrefix) { + BuildID = parseBuildID(ModuleName); + if (BuildID.empty()) + return false; + ModuleName.clear(); + } } else { + Pos = InputString.data(); ModuleName = BinaryName.str(); } // Skip delimiters and parse module offset. @@ -148,31 +205,24 @@ static bool parseCommand(StringRef BinaryName, bool IsAddr2Line, return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset); } -static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA, - bool IsAddr2Line, OutputStyle Style, - StringRef InputString, LLVMSymbolizer &Symbolizer, - DIPrinter &Printer) { - Command Cmd; - std::string ModuleName; - uint64_t Offset = 0; - if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line, - StringRef(InputString), Cmd, ModuleName, Offset)) { - Printer.printInvalidCommand({ModuleName, None}, InputString); - return; - } - +template +void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd, + uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline, + OutputStyle Style, LLVMSymbolizer &Symbolizer, + DIPrinter &Printer) { uint64_t AdjustedOffset = Offset - AdjustVMA; + object::SectionedAddress Address = {AdjustedOffset, + object::SectionedAddress::UndefSection}; if (Cmd == Command::Data) { - Expected ResOrErr = Symbolizer.symbolizeData( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + Expected ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); } else if (Cmd == Command::Frame) { - Expected> ResOrErr = Symbolizer.symbolizeFrame( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + Expected> ResOrErr = + Symbolizer.symbolizeFrame(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); - } else if (Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line)) { - Expected ResOrErr = Symbolizer.symbolizeInlinedCode( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + } else if (ShouldInline) { + Expected ResOrErr = + Symbolizer.symbolizeInlinedCode(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); } else if (Style == OutputStyle::GNU) { // With PrintFunctions == FunctionNameKind::LinkageName (default) @@ -181,8 +231,8 @@ static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA, // caller function in the inlining chain. This contradicts the existing // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only // the topmost function, which suits our needs better. - Expected ResOrErr = Symbolizer.symbolizeInlinedCode( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + Expected ResOrErr = + Symbolizer.symbolizeInlinedCode(ModuleSpec, Address); Expected Res0OrErr = !ResOrErr ? Expected(ResOrErr.takeError()) @@ -190,10 +240,39 @@ static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA, : ResOrErr->getFrame(0)); print({ModuleName, Offset}, Res0OrErr, Printer); } else { - Expected ResOrErr = Symbolizer.symbolizeCode( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + Expected ResOrErr = + Symbolizer.symbolizeCode(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); } + Symbolizer.pruneCache(); +} + +static void symbolizeInput(const opt::InputArgList &Args, + ArrayRef IncomingBuildID, + uint64_t AdjustVMA, bool IsAddr2Line, + OutputStyle Style, StringRef InputString, + LLVMSymbolizer &Symbolizer, DIPrinter &Printer) { + Command Cmd; + std::string ModuleName; + SmallVector BuildID(IncomingBuildID.begin(), IncomingBuildID.end()); + uint64_t Offset = 0; + if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line, + StringRef(InputString), Cmd, ModuleName, BuildID, Offset)) { + Printer.printInvalidCommand({ModuleName, None}, InputString); + return; + } + bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line); + if (!BuildID.empty()) { + assert(ModuleName.empty()); + if (!Args.hasArg(OPT_no_debuginfod)) + enableDebuginfod(Symbolizer); + std::string BuildIDStr = toHex(BuildID); + executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline, + Style, Symbolizer, Printer); + } else { + executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline, + Style, Symbolizer, Printer); + } } static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl, @@ -260,10 +339,52 @@ static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args, return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName; } +static Optional parseColorArg(const opt::InputArgList &Args) { + if (Args.hasArg(OPT_color)) + return true; + if (const opt::Arg *A = Args.getLastArg(OPT_color_EQ)) + return StringSwitch>(A->getValue()) + .Case("always", true) + .Case("never", false) + .Case("auto", None); + return None; +} + +static SmallVector parseBuildIDArg(const opt::InputArgList &Args, + int ID) { + const opt::Arg *A = Args.getLastArg(ID); + if (!A) + return {}; + + StringRef V(A->getValue()); + SmallVector BuildID = parseBuildID(V); + if (BuildID.empty()) { + errs() << A->getSpelling() + ": expected a build ID, but got '" + V + "'\n"; + exit(1); + } + return BuildID; +} + +// Symbolize the markup from stdin and write the result to stdout. +static void filterMarkup(const opt::InputArgList &Args) { + MarkupParser Parser; + MarkupFilter Filter(outs(), parseColorArg(Args)); + for (std::string InputString; std::getline(std::cin, InputString);) { + InputString += '\n'; + Parser.parseLine(InputString); + Filter.beginLine(InputString); + while (Optional Element = Parser.nextNode()) + Filter.filter(*Element); + } + Parser.flush(); + while (Optional Element = Parser.nextNode()) + Filter.filter(*Element); +} + +ExitOnError ExitOnErr; + int main(int argc, char **argv) { InitLLVM X(argc, argv); - // The HTTPClient must be initialized for use by the debuginfod client. - HTTPClient::initialize(); sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded); bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line"); @@ -304,6 +425,8 @@ int main(int argc, char **argv) { } #endif Opts.UseSymbolTable = true; + if (Args.hasArg(OPT_cache_size_EQ)) + parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize); Config.PrintAddress = Args.hasArg(OPT_addresses); Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None; Config.Pretty = Args.hasArg(OPT_pretty_print); @@ -319,6 +442,11 @@ int main(int argc, char **argv) { } } + if (Args.hasArg(OPT_filter_markup)) { + filterMarkup(Args); + return 0; + } + auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM; if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) { if (strcmp(A->getValue(), "GNU") == 0) @@ -329,7 +457,23 @@ int main(int argc, char **argv) { Style = OutputStyle::LLVM; } + if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) { + errs() << "error: cannot specify both --build-id and --obj\n"; + return EXIT_FAILURE; + } + SmallVector BuildID = parseBuildIDArg(Args, OPT_build_id_EQ); + LLVMSymbolizer Symbolizer(Opts); + + // A debuginfod lookup could succeed if a HTTP client is available and at + // least one backing URL is configured. + bool ShouldUseDebuginfodByDefault = + HTTPClient::isAvailable() && + !ExitOnErr(getDefaultDebuginfodUrls()).empty(); + if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod, + ShouldUseDebuginfodByDefault)) + enableDebuginfod(Symbolizer); + std::unique_ptr Printer; if (Style == OutputStyle::GNU) Printer = std::make_unique(outs(), errs(), Config); @@ -348,15 +492,15 @@ int main(int argc, char **argv) { std::string StrippedInputString(InputString); llvm::erase_if(StrippedInputString, [](char c) { return c == '\r' || c == '\n'; }); - symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, StrippedInputString, - Symbolizer, *Printer); + symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, + StrippedInputString, Symbolizer, *Printer); outs().flush(); } } else { Printer->listBegin(); for (StringRef Address : InputAddresses) - symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, Address, Symbolizer, - *Printer); + symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address, + Symbolizer, *Printer); Printer->listEnd(); } diff --git a/llvm/tools/llvm-tapi-diff/llvm-tapi-diff.cpp b/llvm/tools/llvm-tapi-diff/llvm-tapi-diff.cpp index 772f124c5a59..09dd6f76bf6e 100644 --- a/llvm/tools/llvm-tapi-diff/llvm-tapi-diff.cpp +++ b/llvm/tools/llvm-tapi-diff/llvm-tapi-diff.cpp @@ -15,6 +15,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/InitLLVM.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp index 4a69f96a597a..7deeaef40caf 100644 --- a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp +++ b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp @@ -338,6 +338,7 @@ int main(int argc, char *argv[]) { assert(TLIandSDKboth + TLIandSDKneither + TLIdoesSDKdoesnt + TLIdoesntSDKdoes == LibFunc::NumLibFuncs); + (void) TLIandSDKneither; outs() << "<< Total TLI yes SDK no: " << TLIdoesSDKdoesnt << "\n>> Total TLI no SDK yes: " << TLIdoesntSDKdoes << "\n== Total TLI yes SDK yes: " << TLIandSDKboth; diff --git a/llvm/tools/llvm-xray/func-id-helper.cpp b/llvm/tools/llvm-xray/func-id-helper.cpp index afc912a6398e..ce4eafd071ec 100644 --- a/llvm/tools/llvm-xray/func-id-helper.cpp +++ b/llvm/tools/llvm-xray/func-id-helper.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "func-id-helper.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include diff --git a/llvm/tools/llvm-xray/func-id-helper.h b/llvm/tools/llvm-xray/func-id-helper.h index c6ce198170d5..d99fb7c1cfb0 100644 --- a/llvm/tools/llvm-xray/func-id-helper.h +++ b/llvm/tools/llvm-xray/func-id-helper.h @@ -13,6 +13,7 @@ #define LLVM_TOOLS_LLVM_XRAY_FUNC_ID_HELPER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include diff --git a/llvm/tools/llvm-xray/xray-graph-diff.cpp b/llvm/tools/llvm-xray/xray-graph-diff.cpp index f22ea06e0537..bcadade86bb5 100644 --- a/llvm/tools/llvm-xray/xray-graph-diff.cpp +++ b/llvm/tools/llvm-xray/xray-graph-diff.cpp @@ -22,6 +22,7 @@ #include "xray-color-helper.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/XRay/Trace.h" using namespace llvm; diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index af3308939442..17c5da408560 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "NewPMDriver.h" -#include "PassPrinters.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -66,10 +65,6 @@ static cl::opt DebugPM( DebugLogging::Verbose, "verbose", "Print extra information about adaptors and pass managers"))); -static cl::list - PassPlugins("load-pass-plugin", - cl::desc("Load passes from plugin library")); - // This flag specifies a textual description of the alias analysis pipeline to // use when querying for aliasing information. It only works in concert with // the "passes" flag above. @@ -122,11 +117,28 @@ static cl::opt PipelineEarlySimplificationEPPipeline( cl::desc("A textual description of the module pass pipeline inserted at " "the EarlySimplification extension point into default pipelines"), cl::Hidden); +static cl::opt OptimizerEarlyEPPipeline( + "passes-ep-optimizer-early", + cl::desc("A textual description of the module pass pipeline inserted at " + "the OptimizerEarly extension point into default pipelines"), + cl::Hidden); static cl::opt OptimizerLastEPPipeline( "passes-ep-optimizer-last", cl::desc("A textual description of the module pass pipeline inserted at " "the OptimizerLast extension point into default pipelines"), cl::Hidden); +static cl::opt FullLinkTimeOptimizationEarlyEPPipeline( + "passes-ep-full-link-time-optimization-early", + cl::desc("A textual description of the module pass pipeline inserted at " + "the FullLinkTimeOptimizationEarly extension point into default " + "pipelines"), + cl::Hidden); +static cl::opt FullLinkTimeOptimizationLastEPPipeline( + "passes-ep-full-link-time-optimization-last", + cl::desc("A textual description of the module pass pipeline inserted at " + "the FullLinkTimeOptimizationLast extension point into default " + "pipelines"), + cl::Hidden); // Individual pipeline tuning options. extern cl::opt DisableLoopUnrolling; @@ -223,12 +235,35 @@ static void registerEPCallbacks(PassBuilder &PB) { ExitOnError Err("Unable to parse EarlySimplification pipeline: "); Err(PB.parsePassPipeline(PM, PipelineEarlySimplificationEPPipeline)); }); - if (tryParsePipelineText(PB, OptimizerLastEPPipeline)) + if (tryParsePipelineText(PB, OptimizerEarlyEPPipeline)) + PB.registerOptimizerEarlyEPCallback( + [&PB](ModulePassManager &PM, OptimizationLevel) { + ExitOnError Err("Unable to parse OptimizerEarlyEP pipeline: "); + Err(PB.parsePassPipeline(PM, OptimizerEarlyEPPipeline)); + }); + if (tryParsePipelineText(PB, OptimizerLastEPPipeline)) PB.registerOptimizerLastEPCallback( [&PB](ModulePassManager &PM, OptimizationLevel) { ExitOnError Err("Unable to parse OptimizerLastEP pipeline: "); Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline)); }); + if (tryParsePipelineText( + PB, FullLinkTimeOptimizationEarlyEPPipeline)) + PB.registerFullLinkTimeOptimizationEarlyEPCallback( + [&PB](ModulePassManager &PM, OptimizationLevel) { + ExitOnError Err( + "Unable to parse FullLinkTimeOptimizationEarlyEP pipeline: "); + Err(PB.parsePassPipeline(PM, + FullLinkTimeOptimizationEarlyEPPipeline)); + }); + if (tryParsePipelineText( + PB, FullLinkTimeOptimizationLastEPPipeline)) + PB.registerFullLinkTimeOptimizationLastEPCallback( + [&PB](ModulePassManager &PM, OptimizationLevel) { + ExitOnError Err( + "Unable to parse FullLinkTimeOptimizationLastEP pipeline: "); + Err(PB.parsePassPipeline(PM, FullLinkTimeOptimizationLastEPPipeline)); + }); } #define HANDLE_EXTENSION(Ext) \ @@ -240,6 +275,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, ToolOutputFile *ThinLTOLinkOut, ToolOutputFile *OptRemarkFile, StringRef PassPipeline, ArrayRef Passes, + ArrayRef PassPlugins, OutputKind OK, VerifierKind VK, bool ShouldPreserveAssemblyUseListOrder, bool ShouldPreserveBitcodeUseListOrder, @@ -312,33 +348,17 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, PassBuilder PB(TM, PTO, P, &PIC); registerEPCallbacks(PB); - // Load requested pass plugins and let them register pass builder callbacks - for (auto &PluginFN : PassPlugins) { - auto PassPlugin = PassPlugin::Load(PluginFN); - if (!PassPlugin) { - errs() << "Failed to load passes from '" << PluginFN - << "'. Request ignored.\n"; - continue; - } - - PassPlugin->registerPassBuilderCallbacks(PB); - } + // For any loaded plugins, let them register pass builder callbacks. + for (auto &PassPlugin : PassPlugins) + PassPlugin.registerPassBuilderCallbacks(PB); PB.registerPipelineParsingCallback( [](StringRef Name, ModulePassManager &MPM, ArrayRef) { AddressSanitizerOptions Opts; if (Name == "asan-pipeline") { - MPM.addPass( - RequireAnalysisPass()); MPM.addPass(ModuleAddressSanitizerPass(Opts)); return true; - } else if (Name == "asan-function-pipeline") { - MPM.addPass( - RequireAnalysisPass()); - MPM.addPass( - createModuleToFunctionPassAdaptor(AddressSanitizerPass(Opts))); - return true; } return false; }); diff --git a/llvm/tools/opt/NewPMDriver.h b/llvm/tools/opt/NewPMDriver.h index 056f7d6a9b80..16bb205afdca 100644 --- a/llvm/tools/opt/NewPMDriver.h +++ b/llvm/tools/opt/NewPMDriver.h @@ -20,12 +20,12 @@ #ifndef LLVM_TOOLS_OPT_NEWPMDRIVER_H #define LLVM_TOOLS_OPT_NEWPMDRIVER_H -#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/CommandLine.h" namespace llvm { class StringRef; class Module; +class PassPlugin; class TargetMachine; class ToolOutputFile; class TargetLibraryInfoImpl; @@ -69,7 +69,8 @@ bool runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, TargetLibraryInfoImpl *TLII, ToolOutputFile *Out, ToolOutputFile *ThinLinkOut, ToolOutputFile *OptRemarkFile, StringRef PassPipeline, ArrayRef PassInfos, - opt_tool::OutputKind OK, opt_tool::VerifierKind VK, + ArrayRef PassPlugins, opt_tool::OutputKind OK, + opt_tool::VerifierKind VK, bool ShouldPreserveAssemblyUseListOrder, bool ShouldPreserveBitcodeUseListOrder, bool EmitSummaryIndex, bool EmitModuleHash, diff --git a/llvm/tools/opt/PassPrinters.cpp b/llvm/tools/opt/PassPrinters.cpp deleted file mode 100644 index 4e81b5d29c4d..000000000000 --- a/llvm/tools/opt/PassPrinters.cpp +++ /dev/null @@ -1,212 +0,0 @@ -//===- PassPrinters.cpp - Utilities to print analysis info for passes -----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Utilities to print analysis info for various kinds of passes. -/// -//===----------------------------------------------------------------------===// - -#include "PassPrinters.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/CallGraphSCCPass.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/RegionPass.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Function.h" -#include "llvm/Pass.h" -#include "llvm/Support/raw_ostream.h" -#include - -using namespace llvm; - -namespace { - -struct FunctionPassPrinter : public FunctionPass { - const PassInfo *PassToPrint; - raw_ostream &Out; - static char ID; - std::string PassName; - - FunctionPassPrinter(const PassInfo *PI, raw_ostream &out) - : FunctionPass(ID), PassToPrint(PI), Out(out) { - std::string PassToPrintName = std::string(PassToPrint->getPassName()); - PassName = "FunctionPass Printer: " + PassToPrintName; - } - - bool runOnFunction(Function &F) override { - Out << "Printing analysis '" << PassToPrint->getPassName() - << "' for function '" << F.getName() << "':\n"; - - // Get and print pass... - getAnalysisID(PassToPrint->getTypeInfo()).print(Out, F.getParent()); - return false; - } - - StringRef getPassName() const override { return PassName; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredID(PassToPrint->getTypeInfo()); - AU.setPreservesAll(); - } -}; - -char FunctionPassPrinter::ID = 0; - -struct CallGraphSCCPassPrinter : public CallGraphSCCPass { - static char ID; - const PassInfo *PassToPrint; - raw_ostream &Out; - std::string PassName; - - CallGraphSCCPassPrinter(const PassInfo *PI, raw_ostream &out) - : CallGraphSCCPass(ID), PassToPrint(PI), Out(out) { - std::string PassToPrintName = std::string(PassToPrint->getPassName()); - PassName = "CallGraphSCCPass Printer: " + PassToPrintName; - } - - bool runOnSCC(CallGraphSCC &SCC) override { - Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n"; - - // Get and print pass... - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); - if (F) - getAnalysisID(PassToPrint->getTypeInfo()) - .print(Out, F->getParent()); - } - return false; - } - - StringRef getPassName() const override { return PassName; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredID(PassToPrint->getTypeInfo()); - AU.setPreservesAll(); - } -}; - -char CallGraphSCCPassPrinter::ID = 0; - -struct ModulePassPrinter : public ModulePass { - static char ID; - const PassInfo *PassToPrint; - raw_ostream &Out; - std::string PassName; - - ModulePassPrinter(const PassInfo *PI, raw_ostream &out) - : ModulePass(ID), PassToPrint(PI), Out(out) { - std::string PassToPrintName = std::string(PassToPrint->getPassName()); - PassName = "ModulePass Printer: " + PassToPrintName; - } - - bool runOnModule(Module &M) override { - Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n"; - - // Get and print pass... - getAnalysisID(PassToPrint->getTypeInfo()).print(Out, &M); - return false; - } - - StringRef getPassName() const override { return PassName; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredID(PassToPrint->getTypeInfo()); - AU.setPreservesAll(); - } -}; - -char ModulePassPrinter::ID = 0; - -struct LoopPassPrinter : public LoopPass { - static char ID; - const PassInfo *PassToPrint; - raw_ostream &Out; - std::string PassName; - - LoopPassPrinter(const PassInfo *PI, raw_ostream &out) - : LoopPass(ID), PassToPrint(PI), Out(out) { - std::string PassToPrintName = std::string(PassToPrint->getPassName()); - PassName = "LoopPass Printer: " + PassToPrintName; - } - - bool runOnLoop(Loop *L, LPPassManager &LPM) override { - Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n"; - - // Get and print pass... - getAnalysisID(PassToPrint->getTypeInfo()) - .print(Out, L->getHeader()->getParent()->getParent()); - return false; - } - - StringRef getPassName() const override { return PassName; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredID(PassToPrint->getTypeInfo()); - AU.setPreservesAll(); - } -}; - -char LoopPassPrinter::ID = 0; - -struct RegionPassPrinter : public RegionPass { - static char ID; - const PassInfo *PassToPrint; - raw_ostream &Out; - std::string PassName; - - RegionPassPrinter(const PassInfo *PI, raw_ostream &out) - : RegionPass(ID), PassToPrint(PI), Out(out) { - std::string PassToPrintName = std::string(PassToPrint->getPassName()); - PassName = "RegionPass Printer: " + PassToPrintName; - } - - bool runOnRegion(Region *R, RGPassManager &RGM) override { - Out << "Printing analysis '" << PassToPrint->getPassName() << "' for " - << "region: '" << R->getNameStr() << "' in function '" - << R->getEntry()->getParent()->getName() << "':\n"; - // Get and print pass... - getAnalysisID(PassToPrint->getTypeInfo()) - .print(Out, R->getEntry()->getParent()->getParent()); - return false; - } - - StringRef getPassName() const override { return PassName; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredID(PassToPrint->getTypeInfo()); - AU.setPreservesAll(); - } -}; - -char RegionPassPrinter::ID = 0; - -} // end anonymous namespace - -FunctionPass *llvm::createFunctionPassPrinter(const PassInfo *PI, - raw_ostream &OS) { - return new FunctionPassPrinter(PI, OS); -} - -CallGraphSCCPass *llvm::createCallGraphPassPrinter(const PassInfo *PI, - raw_ostream &OS) { - return new CallGraphSCCPassPrinter(PI, OS); -} - -ModulePass *llvm::createModulePassPrinter(const PassInfo *PI, raw_ostream &OS) { - return new ModulePassPrinter(PI, OS); -} - -LoopPass *llvm::createLoopPassPrinter(const PassInfo *PI, raw_ostream &OS) { - return new LoopPassPrinter(PI, OS); -} - -RegionPass *llvm::createRegionPassPrinter(const PassInfo *PI, raw_ostream &OS) { - return new RegionPassPrinter(PI, OS); -} diff --git a/llvm/tools/opt/PassPrinters.h b/llvm/tools/opt/PassPrinters.h deleted file mode 100644 index a4e1921399fc..000000000000 --- a/llvm/tools/opt/PassPrinters.h +++ /dev/null @@ -1,40 +0,0 @@ -//=- PassPrinters.h - Utilities to print analysis info for passes -*- C++ -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Utilities to print analysis info for various kinds of passes. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_OPT_PASSPRINTERS_H -#define LLVM_TOOLS_OPT_PASSPRINTERS_H - -namespace llvm { - -class CallGraphSCCPass; -class FunctionPass; -class ModulePass; -class LoopPass; -class PassInfo; -class raw_ostream; -class RegionPass; - -FunctionPass *createFunctionPassPrinter(const PassInfo *PI, raw_ostream &out); - -CallGraphSCCPass *createCallGraphPassPrinter(const PassInfo *PI, - raw_ostream &out); - -ModulePass *createModulePassPrinter(const PassInfo *PI, raw_ostream &out); - -LoopPass *createLoopPassPrinter(const PassInfo *PI, raw_ostream &out); - -RegionPass *createRegionPassPrinter(const PassInfo *PI, raw_ostream &out); - -} // end namespace llvm - -#endif // LLVM_TOOLS_OPT_PASSPRINTERS_H diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 7793a5471793..0e013ef3b9fd 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -13,7 +13,6 @@ #include "BreakpointPrinter.h" #include "NewPMDriver.h" -#include "PassPrinters.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" @@ -32,6 +31,7 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/LegacyPassNameParser.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" #include "llvm/InitializePasses.h" @@ -39,6 +39,7 @@ #include "llvm/LinkAllPasses.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Passes/PassPlugin.h" #include "llvm/Remarks/HotnessThresholdParser.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" @@ -51,7 +52,6 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Coroutines.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" @@ -74,7 +74,7 @@ static cl::opt EnableNewPassManager( cl::desc("Enable the new pass manager, translating " "'opt -foo' to 'opt -passes=foo'. This is strictly for the new PM " "migration, use '-passes=' when possible."), - cl::init(LLVM_ENABLE_NEW_PASS_MANAGER)); + cl::init(true)); // This flag specifies a textual description of the optimization pass pipeline // to run over the module. This flag switches opt to use the new pass manager @@ -192,14 +192,9 @@ static cl::opt DisableSimplifyLibCalls("disable-simplify-libcalls", cl::desc("Disable simplify-libcalls")); -static cl::list -DisableBuiltins("disable-builtin", - cl::desc("Disable specific target library builtin function"), - cl::ZeroOrMore); - -static cl::opt - AnalyzeOnly("analyze", cl::desc("Only perform analysis, no optimization. " - "Legacy pass manager only.")); +static cl::list DisableBuiltins( + "disable-builtin", + cl::desc("Disable specific target library builtin function")); static cl::opt EnableDebugify( "enable-debugify", @@ -252,11 +247,6 @@ static cl::opt DiscardValueNames( cl::desc("Discard names from Value (other than GlobalValue)."), cl::init(false), cl::Hidden); -static cl::opt Coroutines( - "enable-coroutines", - cl::desc("Enable coroutine passes."), - cl::init(false), cl::Hidden); - static cl::opt TimeTrace( "time-trace", cl::desc("Record time trace")); @@ -300,6 +290,10 @@ static cl::opt RemarksFormat( cl::desc("The format used for serializing remarks (default: YAML)"), cl::value_desc("format"), cl::init("yaml")); +static cl::list + PassPlugins("load-pass-plugin", + cl::desc("Load passes from plugin library")); + namespace llvm { cl::opt PGOKindFlag("pgo-kind", cl::init(NoPGO), cl::Hidden, @@ -370,9 +364,6 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM, if (TM) TM->adjustPassManager(Builder); - if (Coroutines) - addCoroutinePassesToExtensionPoints(Builder); - switch (PGOKindFlag) { case InstrGen: Builder.EnablePGOInstrGen = true; @@ -484,7 +475,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) { "x86-", "xcore-", "wasm-", "systemz-", "ppc-", "nvvm-", "nvptx-", "mips-", "lanai-", "hexagon-", "bpf-", "avr-", "thumb2-", "arm-", "si-", "gcn-", "amdgpu-", "aarch64-", - "amdgcn-", "polly-", "riscv-"}; + "amdgcn-", "polly-", "riscv-", "dxil-"}; std::vector PassNameContain = {"ehprepare"}; std::vector PassNameExact = { "safe-stack", "cost-model", @@ -498,7 +489,11 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) { "generic-to-nvvm", "expandmemcmp", "loop-reduce", "lower-amx-type", "pre-amx-config", "lower-amx-intrinsics", - "polyhedral-info", "replace-with-veclib"}; + "polyhedral-info", "print-polyhedral-info", + "replace-with-veclib", "jmc-instrument", + "dot-regions", "dot-regions-only", + "view-regions", "view-regions-only", + "select-optimize"}; for (const auto &P : PassNamePrefix) if (Pass.startswith(P)) return true; @@ -535,7 +530,6 @@ int main(int argc, char **argv) { // Initialize passes PassRegistry &Registry = *PassRegistry::getPassRegistry(); initializeCore(Registry); - initializeCoroutines(Registry); initializeScalarOpts(Registry); initializeObjCARCOpts(Registry); initializeVectorization(Registry); @@ -550,6 +544,7 @@ int main(int argc, char **argv) { // supported. initializeExpandMemCmpPassPass(Registry); initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); + initializeSelectOptimizePass(Registry); initializeCodeGenPreparePass(Registry); initializeAtomicExpandPass(Registry); initializeRewriteSymbolsLegacyPassPass(Registry); @@ -572,18 +567,38 @@ int main(int argc, char **argv) { initializeHardwareLoopsPass(Registry); initializeTypePromotionPass(Registry); initializeReplaceWithVeclibLegacyPass(Registry); + initializeJMCInstrumenterPass(Registry); #ifdef BUILD_EXAMPLES initializeExampleIRTransforms(Registry); #endif + SmallVector PluginList; + PassPlugins.setCallback([&](const std::string &PluginPath) { + auto Plugin = PassPlugin::Load(PluginPath); + if (!Plugin) { + errs() << "Failed to load passes from '" << PluginPath + << "'. Request ignored.\n"; + return; + } + PluginList.emplace_back(Plugin.get()); + }); + cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .bc modular optimizer and analysis printer\n"); LLVMContext Context; - if (AnalyzeOnly && NoOutput) { - errs() << argv[0] << ": analyze mode conflicts with no-output mode.\n"; + // If `-passes=` is specified, use NPM. + // If `-enable-new-pm` is specified and there are no codegen passes, use NPM. + // e.g. `-enable-new-pm -sroa` will use NPM. + // but `-enable-new-pm -codegenprepare` will still revert to legacy PM. + const bool UseNPM = (EnableNewPassManager && !shouldForceLegacyPM()) || + PassPipeline.getNumOccurrences() > 0; + + if (!UseNPM && PluginList.size()) { + errs() << argv[0] << ": " << PassPlugins.ArgStr + << " specified with legacy PM.\n"; return 1; } @@ -722,7 +737,7 @@ int main(int argc, char **argv) { // If the output is set to be emitted to standard out, and standard out is a // console, print out a warning message and refuse to do it. We don't // impress anyone by spewing tons of binary goo to a terminal. - if (!Force && !NoOutput && !AnalyzeOnly && !OutputAssembly) + if (!Force && !NoOutput && !OutputAssembly) if (CheckBitcodeOutputToConsole(Out->os())) NoOutput = true; @@ -748,19 +763,7 @@ int main(int argc, char **argv) { } } - // If `-passes=` is specified, use NPM. - // If `-enable-new-pm` is specified and there are no codegen passes, use NPM. - // e.g. `-enable-new-pm -sroa` will use NPM. - // but `-enable-new-pm -codegenprepare` will still revert to legacy PM. - if ((EnableNewPassManager && !shouldForceLegacyPM()) || - PassPipeline.getNumOccurrences() > 0) { - if (AnalyzeOnly) { - errs() << "Cannot specify -analyze under new pass manager, either " - "specify '-enable-new-pm=0', or use the corresponding new pass " - "manager pass, e.g. '-passes=print'. For a " - "full list of passes, see the '--print-passes' flag.\n"; - return 1; - } + if (UseNPM) { if (legacy::debugPassSpecified()) { errs() << "-debug-pass does not work with the new PM, either use " @@ -778,8 +781,9 @@ int main(int argc, char **argv) { errs() << "Cannot specify multiple -O#\n"; return 1; } - if (NumOLevel > 0 && PassPipeline.getNumOccurrences() > 0) { - errs() << "Cannot specify -O# and --passes=, use " + if (NumOLevel > 0 && + (PassPipeline.getNumOccurrences() > 0 || PassList.size() > 0)) { + errs() << "Cannot specify -O# and --passes=/--foo-pass, use " "-passes='default,other-pass'\n"; return 1; } @@ -817,7 +821,7 @@ int main(int argc, char **argv) { // layer. return runPassPipeline(argv[0], *M, TM.get(), &TLII, Out.get(), ThinLinkOut.get(), RemarksFile.get(), Pipeline, - Passes, OK, VK, PreserveAssemblyUseListOrder, + Passes, PluginList, OK, VK, PreserveAssemblyUseListOrder, PreserveBitcodeUseListOrder, EmitSummaryIndex, EmitModuleHash, EnableDebugify) ? 0 @@ -829,13 +833,13 @@ int main(int argc, char **argv) { // the (-check)-debugify passes. DebugifyCustomPassManager Passes; DebugifyStatsMap DIStatsMap; - DebugInfoPerPassMap DIPreservationMap; + DebugInfoPerPass DebugInfoBeforePass; if (DebugifyEach) { Passes.setDebugifyMode(DebugifyMode::SyntheticDebugInfo); Passes.setDIStatsMap(DIStatsMap); } else if (VerifyEachDebugInfoPreserve) { Passes.setDebugifyMode(DebugifyMode::OriginalDebugInfo); - Passes.setDIPreservationMap(DIPreservationMap); + Passes.setDebugInfoBeforePass(DebugInfoBeforePass); if (!VerifyDIPreserveExport.empty()) Passes.setOrigDIVerifyBugsReportFilePath(VerifyDIPreserveExport); } @@ -855,10 +859,10 @@ int main(int argc, char **argv) { Passes.setDIStatsMap(DIStatsMap); Passes.add(createDebugifyModulePass()); } else if (VerifyDebugInfoPreserve) { - Passes.setDIPreservationMap(DIPreservationMap); + Passes.setDebugInfoBeforePass(DebugInfoBeforePass); Passes.add(createDebugifyModulePass( DebugifyMode::OriginalDebugInfo, "", - &(Passes.getDebugInfoPerPassMap()))); + &(Passes.getDebugInfoPerPass()))); } } @@ -934,30 +938,8 @@ int main(int argc, char **argv) { else errs() << argv[0] << ": cannot create pass: " << PassInf->getPassName() << "\n"; - if (P) { - PassKind Kind = P->getPassKind(); + if (P) addPass(Passes, P); - - if (AnalyzeOnly) { - switch (Kind) { - case PT_Region: - Passes.add(createRegionPassPrinter(PassInf, Out->os())); - break; - case PT_Loop: - Passes.add(createLoopPassPrinter(PassInf, Out->os())); - break; - case PT_Function: - Passes.add(createFunctionPassPrinter(PassInf, Out->os())); - break; - case PT_CallGraphSCC: - Passes.add(createCallGraphPassPrinter(PassInf, Out->os())); - break; - default: - Passes.add(createModulePassPrinter(PassInf, Out->os())); - break; - } - } - } } if (OptLevelO0) @@ -997,7 +979,7 @@ int main(int argc, char **argv) { Passes.setOrigDIVerifyBugsReportFilePath(VerifyDIPreserveExport); Passes.add(createCheckDebugifyModulePass( false, "", nullptr, DebugifyMode::OriginalDebugInfo, - &(Passes.getDebugInfoPerPassMap()), VerifyDIPreserveExport)); + &(Passes.getDebugInfoPerPass()), VerifyDIPreserveExport)); } } @@ -1010,7 +992,7 @@ int main(int argc, char **argv) { std::unique_ptr BOS; raw_ostream *OS = nullptr; - const bool ShouldEmitOutput = !NoOutput && !AnalyzeOnly; + const bool ShouldEmitOutput = !NoOutput; // Write bitcode or assembly to the output as the last step... if (ShouldEmitOutput || RunTwice) { diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index be17d5c718c2..1acc2a86d176 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -95,6 +95,7 @@ // //===----------------------------------------------------------------------===// +#include "CodeGenInstruction.h" #include "CodeGenTarget.h" #include "SubtargetFeatureInfo.h" #include "Types.h" @@ -3394,7 +3395,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { StringTable.GetOrAddStringOffset(LenMnemonic, false)); } - OS << "static const char *const MnemonicTable =\n"; + OS << "static const char MnemonicTable[] =\n"; StringTable.EmitString(OS); OS << ";\n\n"; diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp index 9283ceeb31e0..1d738274c75a 100644 --- a/llvm/utils/TableGen/AsmWriterEmitter.cpp +++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp @@ -19,15 +19,14 @@ #include "Types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" @@ -868,8 +867,6 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { IAPrinter IAP(CGA.Result->getAsString(), FlatAliasAsmString, NumMIOps); - bool CantHandle = false; - unsigned MIOpNum = 0; for (unsigned i = 0, e = LastOpNo; i != e; ++i) { // Skip over tied operands as they're not part of an alias declaration. @@ -969,10 +966,9 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { break; } case CodeGenInstAlias::ResultOperand::K_Reg: - // If this is zero_reg, something's playing tricks we're not - // equipped to handle. if (!CGA.ResultOperands[i].getRegister()) { - CantHandle = true; + IAP.addCond(std::string(formatv( + "AliasPatternCond::K_Reg, {0}::NoRegister", Namespace))); break; } @@ -985,8 +981,6 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { MIOpNum += RO.getMINumOperands(); } - if (CantHandle) continue; - std::vector ReqFeatures; if (PassSubtarget) { // We only consider ReqFeatures predicates if PassSubtarget @@ -1005,6 +999,17 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { if (D->getNumArgs() == 0) PrintFatalError(R->getLoc(), "Invalid AssemblerCondDag!"); bool IsOr = CombineType == "any_of"; + // Change (any_of FeatureAll, (any_of ...)) to (any_of FeatureAll, ...). + if (IsOr && D->getNumArgs() == 2 && isa(D->getArg(1))) { + DagInit *RHS = dyn_cast(D->getArg(1)); + SmallVector Args{D->getArg(0)}; + SmallVector ArgNames{D->getArgName(0)}; + for (unsigned i = 0, e = RHS->getNumArgs(); i != e; ++i) { + Args.push_back(RHS->getArg(i)); + ArgNames.push_back(RHS->getArgName(i)); + } + D = DagInit::get(D->getOperator(), nullptr, Args, ArgNames); + } for (auto *Arg : D->getArgs()) { bool IsNeg = false; diff --git a/llvm/utils/TableGen/AsmWriterInst.cpp b/llvm/utils/TableGen/AsmWriterInst.cpp index 887abbac9d3b..4a78108d6f4a 100644 --- a/llvm/utils/TableGen/AsmWriterInst.cpp +++ b/llvm/utils/TableGen/AsmWriterInst.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "AsmWriterInst.h" +#include "CodeGenInstruction.h" #include "CodeGenTarget.h" #include "llvm/ADT/StringExtras.h" #include "llvm/TableGen/Error.h" diff --git a/llvm/utils/TableGen/Attributes.cpp b/llvm/utils/TableGen/Attributes.cpp index 5deac4b34bf2..1f975f52d6e7 100644 --- a/llvm/utils/TableGen/Attributes.cpp +++ b/llvm/utils/TableGen/Attributes.cpp @@ -6,10 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/MemoryBuffer.h" #include "llvm/TableGen/Record.h" -#include -#include #include using namespace llvm; diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp index 127ae6247bd9..8f080cd250ab 100644 --- a/llvm/utils/TableGen/CallingConvEmitter.cpp +++ b/llvm/utils/TableGen/CallingConvEmitter.cpp @@ -15,12 +15,19 @@ #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" -#include using namespace llvm; namespace { class CallingConvEmitter { RecordKeeper &Records; + unsigned Counter; + std::string CurrentAction; + bool SwiftAction; + + std::map> AssignedRegsMap; + std::map> AssignedSwiftRegsMap; + std::map> DelegateToMap; + public: explicit CallingConvEmitter(RecordKeeper &R) : Records(R) {} @@ -29,7 +36,7 @@ public: private: void EmitCallingConv(Record *CC, raw_ostream &O); void EmitAction(Record *Action, unsigned Indent, raw_ostream &O); - unsigned Counter; + void EmitArgRegisterLists(raw_ostream &O); }; } // End anonymous namespace @@ -39,6 +46,7 @@ void CallingConvEmitter::run(raw_ostream &O) { // Emit prototypes for all of the non-custom CC's so that they can forward ref // each other. Records.startTimer("Emit prototypes"); + O << "#ifndef GET_CC_REGISTER_LISTS\n\n"; for (Record *CC : CCs) { if (!CC->getValueAsBit("Custom")) { unsigned Pad = CC->getName().size(); @@ -59,18 +67,28 @@ void CallingConvEmitter::run(raw_ostream &O) { // Emit each non-custom calling convention description in full. Records.startTimer("Emit full descriptions"); for (Record *CC : CCs) { - if (!CC->getValueAsBit("Custom")) + if (!CC->getValueAsBit("Custom")) { EmitCallingConv(CC, O); + } } -} + EmitArgRegisterLists(O); + + O << "\n#endif // CC_REGISTER_LIST\n"; +} void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { ListInit *CCActions = CC->getValueAsListInit("Actions"); Counter = 0; + CurrentAction = CC->getName().str(); + // Call upon the creation of a map entry from the void! + // We want an entry in AssignedRegsMap for every action, even if that + // entry is empty. + AssignedRegsMap[CurrentAction] = {}; + O << "\n\n"; - unsigned Pad = CC->getName().size(); + unsigned Pad = CurrentAction.size(); if (CC->getValueAsBit("Entry")) { O << "bool llvm::"; Pad += 12; @@ -78,13 +96,21 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { O << "static bool "; Pad += 13; } - O << CC->getName() << "(unsigned ValNo, MVT ValVT,\n" + O << CurrentAction << "(unsigned ValNo, MVT ValVT,\n" << std::string(Pad, ' ') << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n" << std::string(Pad, ' ') << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n"; // Emit all of the actions, in order. for (unsigned i = 0, e = CCActions->size(); i != e; ++i) { + Record *Action = CCActions->getElementAsRecord(i); + SwiftAction = llvm::any_of(Action->getSuperClasses(), + [](const std::pair &Class) { + std::string Name = + Class.first->getNameInitAsString(); + return StringRef(Name).startswith("CCIfSwift"); + }); + O << "\n"; - EmitAction(CCActions->getElementAsRecord(i), 2, O); + EmitAction(Action, 2, O); } O << "\n return true; // CC didn't match.\n"; @@ -94,7 +120,7 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, raw_ostream &O) { std::string IndentStr = std::string(Indent, ' '); - + if (Action->isSubClassOf("CCPredicateAction")) { O << IndentStr << "if ("; @@ -122,18 +148,30 @@ void CallingConvEmitter::EmitAction(Record *Action, O << IndentStr << "if (!" << CC->getName() << "(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))\n" << IndentStr << " return false;\n"; + DelegateToMap[CurrentAction].insert(CC->getName().str()); } else if (Action->isSubClassOf("CCAssignToReg")) { ListInit *RegList = Action->getValueAsListInit("RegList"); if (RegList->size() == 1) { - O << IndentStr << "if (unsigned Reg = State.AllocateReg("; - O << getQualifiedName(RegList->getElementAsRecord(0)) << ")) {\n"; + std::string Name = getQualifiedName(RegList->getElementAsRecord(0)); + O << IndentStr << "if (unsigned Reg = State.AllocateReg(" << Name + << ")) {\n"; + if (SwiftAction) + AssignedSwiftRegsMap[CurrentAction].insert(Name); + else + AssignedRegsMap[CurrentAction].insert(Name); } else { O << IndentStr << "static const MCPhysReg RegList" << ++Counter << "[] = {\n"; O << IndentStr << " "; ListSeparator LS; - for (unsigned i = 0, e = RegList->size(); i != e; ++i) - O << LS << getQualifiedName(RegList->getElementAsRecord(i)); + for (unsigned i = 0, e = RegList->size(); i != e; ++i) { + std::string Name = getQualifiedName(RegList->getElementAsRecord(i)); + if (SwiftAction) + AssignedSwiftRegsMap[CurrentAction].insert(Name); + else + AssignedRegsMap[CurrentAction].insert(Name); + O << LS << Name; + } O << "\n" << IndentStr << "};\n"; O << IndentStr << "if (unsigned Reg = State.AllocateReg(RegList" << Counter << ")) {\n"; @@ -288,6 +326,83 @@ void CallingConvEmitter::EmitAction(Record *Action, } } +void CallingConvEmitter::EmitArgRegisterLists(raw_ostream &O) { + // Transitively merge all delegated CCs into AssignedRegsMap. + using EntryTy = std::pair>; + bool Redo; + do { + Redo = false; + std::deque Worklist(DelegateToMap.begin(), DelegateToMap.end()); + + while (!Worklist.empty()) { + EntryTy Entry = Worklist.front(); + Worklist.pop_front(); + + const std::string &CCName = Entry.first; + std::set &Registers = Entry.second; + if (!Registers.empty()) + continue; + + for (auto &InnerEntry : Worklist) { + const std::string &InnerCCName = InnerEntry.first; + std::set &InnerRegisters = InnerEntry.second; + + if (InnerRegisters.find(CCName) != InnerRegisters.end()) { + AssignedRegsMap[InnerCCName].insert( + AssignedRegsMap[CCName].begin(), + AssignedRegsMap[CCName].end()); + InnerRegisters.erase(CCName); + } + } + + DelegateToMap.erase(CCName); + Redo = true; + } + } while (Redo); + + if (AssignedRegsMap.empty()) + return; + + O << "\n#else\n\n"; + + for (auto &Entry : AssignedRegsMap) { + const std::string &RegName = Entry.first; + std::set &Registers = Entry.second; + + if (RegName.empty()) + continue; + + O << "const MCRegister " << Entry.first << "_ArgRegs[] = { "; + + if (Registers.empty()) { + O << "0"; + } else { + ListSeparator LS; + for (const std::string &Reg : Registers) + O << LS << Reg; + } + + O << " };\n"; + } + + if (AssignedSwiftRegsMap.empty()) + return; + + O << "\n// Registers used by Swift.\n"; + for (auto &Entry : AssignedSwiftRegsMap) { + const std::string &RegName = Entry.first; + std::set &Registers = Entry.second; + + O << "const MCRegister " << RegName << "_Swift_ArgRegs[] = { "; + + ListSeparator LS; + for (const std::string &Reg : Registers) + O << LS << Reg; + + O << " };\n"; + } +} + namespace llvm { void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS) { diff --git a/llvm/utils/TableGen/CodeBeadsGen.cpp b/llvm/utils/TableGen/CodeBeadsGen.cpp deleted file mode 100644 index 18a6d6d19eb2..000000000000 --- a/llvm/utils/TableGen/CodeBeadsGen.cpp +++ /dev/null @@ -1,137 +0,0 @@ -//===---------- CodeBeadsGen.cpp - Code Beads Generator -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// CodeBeads are data fields carrying auxiliary information for instructions. -// -// Under the hood it's simply implemented by a `bits` field (with arbitrary -// length) in each TG instruction description, where this TG backend will -// generate a helper function to access it. -// -// This is especially useful for expressing variable length encoding -// instructions and complex addressing modes. Since in those cases each -// instruction is usually associated with large amount of information like -// addressing mode details used on a specific operand. Instead of retreating to -// ad-hoc methods to figure out these information when encoding an instruction, -// CodeBeads provide a clean table for the instruction encoder to lookup. -//===----------------------------------------------------------------------===// - -#include "CodeGenTarget.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Debug.h" -#include "llvm/TableGen/Error.h" -#include "llvm/TableGen/Record.h" -#include "llvm/TableGen/TableGenBackend.h" -#include -#include -#include -using namespace llvm; - -namespace { - -class CodeBeadsGen { - RecordKeeper &Records; - -public: - CodeBeadsGen(RecordKeeper &R) : Records(R) {} - void run(raw_ostream &OS); -}; - -void CodeBeadsGen::run(raw_ostream &OS) { - CodeGenTarget Target(Records); - std::vector Insts = Records.getAllDerivedDefinitions("Instruction"); - - // For little-endian instruction bit encodings, reverse the bit order - Target.reverseBitsForLittleEndianEncoding(); - - ArrayRef NumberedInstructions = - Target.getInstructionsByEnumValue(); - - // Emit function declaration - OS << "const uint8_t *llvm::" << Target.getInstNamespace(); - OS << "::getMCInstrBeads(unsigned Opcode) {\n"; - - // First, get the maximum bit length among all beads. And do some - // simple validation - unsigned MaxBitLength = 0; - - for (const CodeGenInstruction *CGI : NumberedInstructions) { - Record *R = CGI->TheDef; - if (!R->getValue("Beads")) - continue; - - BitsInit *BI = R->getValueAsBitsInit("Beads"); - if (!BI->isComplete()) { - PrintFatalError(R->getLoc(), "Record `" + R->getName() + - "', bit field 'Beads' is not complete"); - } - - MaxBitLength = std::max(MaxBitLength, BI->getNumBits()); - } - - // Number of bytes - unsigned Parts = MaxBitLength / 8; - - // Emit instruction base values - OS << " static const uint8_t InstBits[][" << Parts << "] = {\n"; - for (const CodeGenInstruction *CGI : NumberedInstructions) { - Record *R = CGI->TheDef; - - if (R->getValueAsString("Namespace") == "TargetOpcode" || - !R->getValue("Beads")) { - OS << "\t{ 0x0 },\t// "; - if (R->getValueAsBit("isPseudo")) - OS << "(Pseudo) "; - OS << R->getName() << "\n"; - continue; - } - - BitsInit *BI = R->getValueAsBitsInit("Beads"); - - // Convert to byte array: - // [dcba] -> [a][b][c][d] - OS << "\t{"; - for (unsigned p = 0; p < Parts; ++p) { - unsigned Right = 8 * p; - unsigned Left = Right + 8; - - uint8_t Value = 0; - for (unsigned i = Right; i != Left; ++i) { - unsigned Shift = i % 8; - if (auto *B = dyn_cast(BI->getBit(i))) { - Value |= (static_cast(B->getValue()) << Shift); - } else { - PrintFatalError(R->getLoc(), "Record `" + R->getName() + - "', bit 'Beads[" + Twine(i) + - "]' is not defined"); - } - } - - if (p) - OS << ','; - OS << " 0x"; - OS.write_hex(Value); - OS << ""; - } - OS << " }," << '\t' << "// " << R->getName() << "\n"; - } - OS << "\t{ 0x0 }\n };\n"; - - // Emit initial function code - OS << " return InstBits[Opcode];\n" - << "}\n\n"; -} - -} // End anonymous namespace - -namespace llvm { - -void EmitCodeBeads(RecordKeeper &RK, raw_ostream &OS) { - emitSourceFileHeader("Machine Code Beads", OS); - CodeBeadsGen(RK).run(OS); -} - -} // namespace llvm diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp index fbac0d969917..2b9931b23c11 100644 --- a/llvm/utils/TableGen/CodeEmitterGen.cpp +++ b/llvm/utils/TableGen/CodeEmitterGen.cpp @@ -16,11 +16,13 @@ #include "CodeGenTarget.h" #include "SubtargetFeatureInfo.h" #include "Types.h" +#include "VarLenCodeEmitterGen.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" #include @@ -117,16 +119,16 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, (!NamedOpIndices.empty() && NamedOpIndices.count( CGI.Operands.getSubOperandNumber(NumberedOp).first)))) { ++NumberedOp; + } - if (NumberedOp >= CGI.Operands.back().MIOperandNo + - CGI.Operands.back().MINumOperands) { - errs() << "Too few operands in record " << R->getName() << - " (no match for variable " << VarName << "):\n"; - errs() << *R; - errs() << '\n'; - - return; - } + if (NumberedOp >= + CGI.Operands.back().MIOperandNo + CGI.Operands.back().MINumOperands) { + std::string E; + raw_string_ostream S(E); + S << "Too few operands in record " << R->getName() + << " (no match for variable " << VarName << "):\n"; + S << *R; + PrintFatalError(R, E); } OpIdx = NumberedOp++; @@ -396,132 +398,138 @@ void CodeEmitterGen::run(raw_ostream &o) { ArrayRef NumberedInstructions = Target.getInstructionsByEnumValue(); - const CodeGenHwModes &HWM = Target.getHwModes(); - // The set of HwModes used by instruction encodings. - std::set HwModes; - BitWidth = 0; - for (const CodeGenInstruction *CGI : NumberedInstructions) { - Record *R = CGI->TheDef; - if (R->getValueAsString("Namespace") == "TargetOpcode" || - R->getValueAsBit("isPseudo")) - continue; + if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) { + Record *R = CGI->TheDef; + return R->getValue("Inst") && isa(R->getValueInit("Inst")); + })) { + emitVarLenCodeEmitter(Records, o); + } else { + const CodeGenHwModes &HWM = Target.getHwModes(); + // The set of HwModes used by instruction encodings. + std::set HwModes; + BitWidth = 0; + for (const CodeGenInstruction *CGI : NumberedInstructions) { + Record *R = CGI->TheDef; + if (R->getValueAsString("Namespace") == "TargetOpcode" || + R->getValueAsBit("isPseudo")) + continue; - if (const RecordVal *RV = R->getValue("EncodingInfos")) { - if (DefInit *DI = dyn_cast_or_null(RV->getValue())) { - EncodingInfoByHwMode EBM(DI->getDef(), HWM); - for (auto &KV : EBM) { - BitsInit *BI = KV.second->getValueAsBitsInit("Inst"); - BitWidth = std::max(BitWidth, BI->getNumBits()); - HwModes.insert(KV.first); + if (const RecordVal *RV = R->getValue("EncodingInfos")) { + if (DefInit *DI = dyn_cast_or_null(RV->getValue())) { + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + for (auto &KV : EBM) { + BitsInit *BI = KV.second->getValueAsBitsInit("Inst"); + BitWidth = std::max(BitWidth, BI->getNumBits()); + HwModes.insert(KV.first); + } + continue; } - continue; } + BitsInit *BI = R->getValueAsBitsInit("Inst"); + BitWidth = std::max(BitWidth, BI->getNumBits()); + } + UseAPInt = BitWidth > 64; + + // Emit function declaration + if (UseAPInt) { + o << "void " << Target.getName() + << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" + << " SmallVectorImpl &Fixups,\n" + << " APInt &Inst,\n" + << " APInt &Scratch,\n" + << " const MCSubtargetInfo &STI) const {\n"; + } else { + o << "uint64_t " << Target.getName(); + o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" + << " SmallVectorImpl &Fixups,\n" + << " const MCSubtargetInfo &STI) const {\n"; } - BitsInit *BI = R->getValueAsBitsInit("Inst"); - BitWidth = std::max(BitWidth, BI->getNumBits()); - } - UseAPInt = BitWidth > 64; - - // Emit function declaration - if (UseAPInt) { - o << "void " << Target.getName() - << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" - << " SmallVectorImpl &Fixups,\n" - << " APInt &Inst,\n" - << " APInt &Scratch,\n" - << " const MCSubtargetInfo &STI) const {\n"; - } else { - o << "uint64_t " << Target.getName(); - o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" - << " SmallVectorImpl &Fixups,\n" - << " const MCSubtargetInfo &STI) const {\n"; - } - - // Emit instruction base values - if (HwModes.empty()) { - emitInstructionBaseValues(o, NumberedInstructions, Target, -1); - } else { - for (unsigned HwMode : HwModes) - emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode); - } - if (!HwModes.empty()) { - o << " const uint64_t *InstBits;\n"; - o << " unsigned HwMode = STI.getHwMode();\n"; - o << " switch (HwMode) {\n"; - o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n"; - for (unsigned I : HwModes) { - o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name - << "; break;\n"; + // Emit instruction base values + if (HwModes.empty()) { + emitInstructionBaseValues(o, NumberedInstructions, Target, -1); + } else { + for (unsigned HwMode : HwModes) + emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode); } - o << " };\n"; - } - // Map to accumulate all the cases. - std::map> CaseMap; + if (!HwModes.empty()) { + o << " const uint64_t *InstBits;\n"; + o << " unsigned HwMode = STI.getHwMode();\n"; + o << " switch (HwMode) {\n"; + o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n"; + for (unsigned I : HwModes) { + o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name + << "; break;\n"; + } + o << " };\n"; + } - // Construct all cases statement for each opcode - for (Record *R : Insts) { - if (R->getValueAsString("Namespace") == "TargetOpcode" || - R->getValueAsBit("isPseudo")) - continue; - std::string InstName = - (R->getValueAsString("Namespace") + "::" + R->getName()).str(); - std::string Case = getInstructionCase(R, Target); + // Map to accumulate all the cases. + std::map> CaseMap; - CaseMap[Case].push_back(std::move(InstName)); - } + // Construct all cases statement for each opcode + for (Record *R : Insts) { + if (R->getValueAsString("Namespace") == "TargetOpcode" || + R->getValueAsBit("isPseudo")) + continue; + std::string InstName = + (R->getValueAsString("Namespace") + "::" + R->getName()).str(); + std::string Case = getInstructionCase(R, Target); - // Emit initial function code - if (UseAPInt) { - int NumWords = APInt::getNumWords(BitWidth); - int NumBytes = (BitWidth + 7) / 8; - o << " const unsigned opcode = MI.getOpcode();\n" - << " if (Inst.getBitWidth() != " << BitWidth << ")\n" - << " Inst = Inst.zext(" << BitWidth << ");\n" - << " if (Scratch.getBitWidth() != " << BitWidth << ")\n" - << " Scratch = Scratch.zext(" << BitWidth << ");\n" - << " LoadIntFromMemory(Inst, (const uint8_t *)&InstBits[opcode * " - << NumWords << "], " << NumBytes << ");\n" - << " APInt &Value = Inst;\n" - << " APInt &op = Scratch;\n" - << " switch (opcode) {\n"; - } else { - o << " const unsigned opcode = MI.getOpcode();\n" - << " uint64_t Value = InstBits[opcode];\n" - << " uint64_t op = 0;\n" - << " (void)op; // suppress warning\n" - << " switch (opcode) {\n"; - } + CaseMap[Case].push_back(std::move(InstName)); + } + + // Emit initial function code + if (UseAPInt) { + int NumWords = APInt::getNumWords(BitWidth); + o << " const unsigned opcode = MI.getOpcode();\n" + << " if (Scratch.getBitWidth() != " << BitWidth << ")\n" + << " Scratch = Scratch.zext(" << BitWidth << ");\n" + << " Inst = APInt(" << BitWidth + << ", makeArrayRef(InstBits + opcode * " << NumWords << ", " << NumWords + << "));\n" + << " APInt &Value = Inst;\n" + << " APInt &op = Scratch;\n" + << " switch (opcode) {\n"; + } else { + o << " const unsigned opcode = MI.getOpcode();\n" + << " uint64_t Value = InstBits[opcode];\n" + << " uint64_t op = 0;\n" + << " (void)op; // suppress warning\n" + << " switch (opcode) {\n"; + } - // Emit each case statement - std::map>::iterator IE, EE; - for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) { - const std::string &Case = IE->first; - std::vector &InstList = IE->second; + // Emit each case statement + std::map>::iterator IE, EE; + for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) { + const std::string &Case = IE->first; + std::vector &InstList = IE->second; - for (int i = 0, N = InstList.size(); i < N; i++) { - if (i) o << "\n"; - o << " case " << InstList[i] << ":"; + for (int i = 0, N = InstList.size(); i < N; i++) { + if (i) + o << "\n"; + o << " case " << InstList[i] << ":"; + } + o << " {\n"; + o << Case; + o << " break;\n" + << " }\n"; } - o << " {\n"; - o << Case; - o << " break;\n" - << " }\n"; - } - // Default case: unhandled opcode - o << " default:\n" - << " std::string msg;\n" - << " raw_string_ostream Msg(msg);\n" - << " Msg << \"Not supported instr: \" << MI;\n" - << " report_fatal_error(msg.c_str());\n" - << " }\n"; - if (UseAPInt) - o << " Inst = Value;\n"; - else - o << " return Value;\n"; - o << "}\n\n"; + // Default case: unhandled opcode + o << " default:\n" + << " std::string msg;\n" + << " raw_string_ostream Msg(msg);\n" + << " Msg << \"Not supported instr: \" << MI;\n" + << " report_fatal_error(Msg.str().c_str());\n" + << " }\n"; + if (UseAPInt) + o << " Inst = Value;\n"; + else + o << " return Value;\n"; + o << "}\n\n"; + } const auto &All = SubtargetFeatureInfo::getAll(Records); std::map SubtargetFeatures; diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index a1f8f4809d5f..9d6adb6d2c37 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "CodeGenDAGPatterns.h" +#include "CodeGenInstruction.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" @@ -2815,6 +2816,7 @@ void TreePattern::ComputeNamedNodes(TreePatternNode *N) { TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName) { + RecordKeeper &RK = TheInit->getRecordKeeper(); if (DefInit *DI = dyn_cast(TheInit)) { Record *R = DI->getDef(); @@ -2853,13 +2855,13 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit, if (!OpName.empty()) error("Constant int or bit argument should not have a name!"); if (isa(TheInit)) - TheInit = TheInit->convertInitializerTo(IntRecTy::get()); + TheInit = TheInit->convertInitializerTo(IntRecTy::get(RK)); return std::make_shared(TheInit, 1); } if (BitsInit *BI = dyn_cast(TheInit)) { // Turn this into an IntInit. - Init *II = BI->convertInitializerTo(IntRecTy::get()); + Init *II = BI->convertInitializerTo(IntRecTy::get(RK)); if (!II || !isa(II)) error("Bits value must be constants!"); return ParseTreePattern(II, OpName); @@ -2958,8 +2960,8 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit, else // Otherwise, no chain. Operator = getDAGPatterns().get_intrinsic_wo_chain_sdnode(); - Children.insert(Children.begin(), - std::make_shared(IntInit::get(IID), 1)); + Children.insert(Children.begin(), std::make_shared( + IntInit::get(RK, IID), 1)); } if (Operator->isSubClassOf("ComplexPattern")) { @@ -4366,7 +4368,7 @@ void CodeGenDAGPatterns::ExpandHwModeBasedTypes() { PatternsToMatch.emplace_back(P.getSrcRecord(), P.getPredicates(), std::move(NewSrc), std::move(NewDst), P.getDstRegs(), P.getAddedComplexity(), - Record::getNewUID(), Mode, Check); + Record::getNewUID(Records), Mode, Check); }; for (PatternToMatch &P : Copy) { @@ -4742,7 +4744,7 @@ void CodeGenDAGPatterns::GenerateVariants() { PatternsToMatch[i].getSrcRecord(), PatternsToMatch[i].getPredicates(), Variant, PatternsToMatch[i].getDstPatternShared(), PatternsToMatch[i].getDstRegs(), - PatternsToMatch[i].getAddedComplexity(), Record::getNewUID(), + PatternsToMatch[i].getAddedComplexity(), Record::getNewUID(Records), PatternsToMatch[i].getForceMode(), PatternsToMatch[i].getHwModeFeatures()); } diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.h b/llvm/utils/TableGen/CodeGenDAGPatterns.h index 39d81230a4f2..94694a96eb90 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.h +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.h @@ -28,7 +28,6 @@ #include #include #include -#include #include namespace llvm { diff --git a/llvm/utils/TableGen/CodeGenInstruction.cpp b/llvm/utils/TableGen/CodeGenInstruction.cpp index 78b698c31b2b..ba12633ace8c 100644 --- a/llvm/utils/TableGen/CodeGenInstruction.cpp +++ b/llvm/utils/TableGen/CodeGenInstruction.cpp @@ -12,7 +12,6 @@ #include "CodeGenInstruction.h" #include "CodeGenTarget.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/TableGen/Error.h" @@ -416,6 +415,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R) hasExtraDefRegAllocReq = R->getValueAsBit("hasExtraDefRegAllocReq"); isCodeGenOnly = R->getValueAsBit("isCodeGenOnly"); isPseudo = R->getValueAsBit("isPseudo"); + isMeta = R->getValueAsBit("isMeta"); ImplicitDefs = R->getValueAsListOfDefs("Defs"); ImplicitUses = R->getValueAsListOfDefs("Uses"); @@ -632,8 +632,8 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo, if (!BI->isComplete()) return false; // Convert the bits init to an integer and use that for the result. - IntInit *II = - dyn_cast_or_null(BI->convertInitializerTo(IntRecTy::get())); + IntInit *II = dyn_cast_or_null( + BI->convertInitializerTo(IntRecTy::get(BI->getRecordKeeper()))); if (!II) return false; ResOp = ResultOperand(II->getValue()); diff --git a/llvm/utils/TableGen/CodeGenInstruction.h b/llvm/utils/TableGen/CodeGenInstruction.h index e0ce5d433602..d3de6d95780c 100644 --- a/llvm/utils/TableGen/CodeGenInstruction.h +++ b/llvm/utils/TableGen/CodeGenInstruction.h @@ -16,13 +16,13 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/MachineValueType.h" -#include "llvm/Support/SMLoc.h" #include #include #include #include namespace llvm { +class SMLoc; template class ArrayRef; class Record; class DagInit; @@ -271,6 +271,7 @@ template class ArrayRef; bool hasExtraDefRegAllocReq : 1; bool isCodeGenOnly : 1; bool isPseudo : 1; + bool isMeta : 1; bool isRegSequence : 1; bool isExtractSubreg : 1; bool isInsertSubreg : 1; diff --git a/llvm/utils/TableGen/CodeGenIntrinsics.h b/llvm/utils/TableGen/CodeGenIntrinsics.h index b005a5866f80..599795e3c065 100644 --- a/llvm/utils/TableGen/CodeGenIntrinsics.h +++ b/llvm/utils/TableGen/CodeGenIntrinsics.h @@ -26,7 +26,7 @@ struct CodeGenIntrinsic { Record *TheDef; // The actual record defining this intrinsic. std::string Name; // The name of the LLVM function "llvm.bswap.i32" std::string EnumName; // The name of the enum "bswap_i32" - std::string GCCBuiltinName; // Name of the corresponding GCC builtin, or "". + std::string ClangBuiltinName; // Name of the corresponding GCC builtin, or "". std::string MSBuiltinName; // Name of the corresponding MS builtin, or "". std::string TargetPrefix; // Target prefix, e.g. "ppc" for t-s intrinsics. @@ -125,6 +125,9 @@ struct CodeGenIntrinsic { /// True if the intrinsic is no-return. bool isNoReturn; + /// True if the intrinsic is no-callback. + bool isNoCallback; + /// True if the intrinsic is no-sync. bool isNoSync; diff --git a/llvm/utils/TableGen/CodeGenMapTable.cpp b/llvm/utils/TableGen/CodeGenMapTable.cpp index 38871eb8cf3c..02695942f5c1 100644 --- a/llvm/utils/TableGen/CodeGenMapTable.cpp +++ b/llvm/utils/TableGen/CodeGenMapTable.cpp @@ -75,8 +75,8 @@ // //===----------------------------------------------------------------------===// +#include "CodeGenInstruction.h" #include "CodeGenTarget.h" -#include "llvm/Support/Format.h" #include "llvm/TableGen/Error.h" using namespace llvm; typedef std::map > InstrRelMapTy; diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp index afaeb73ffab1..2c61be713afc 100644 --- a/llvm/utils/TableGen/CodeGenRegisters.cpp +++ b/llvm/utils/TableGen/CodeGenRegisters.cpp @@ -12,21 +12,18 @@ //===----------------------------------------------------------------------===// #include "CodeGenRegisters.h" -#include "CodeGenTarget.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IntEqClasses.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" @@ -204,12 +201,16 @@ namespace { class RegUnitIterator { CodeGenRegister::Vec::const_iterator RegI, RegE; CodeGenRegister::RegUnitList::iterator UnitI, UnitE; + static CodeGenRegister::RegUnitList Sentinel; public: RegUnitIterator(const CodeGenRegister::Vec &Regs): RegI(Regs.begin()), RegE(Regs.end()) { - if (RegI != RegE) { + if (RegI == RegE) { + UnitI = Sentinel.end(); + UnitE = Sentinel.end(); + } else { UnitI = (*RegI)->getRegUnits().begin(); UnitE = (*RegI)->getRegUnits().end(); advance(); @@ -240,6 +241,8 @@ protected: } }; +CodeGenRegister::RegUnitList RegUnitIterator::Sentinel; + } // end anonymous namespace // Return true of this unit appears in RegUnits. @@ -635,6 +638,7 @@ struct TupleExpander : SetTheory::Expander { Def->getValueAsListOfStrings("RegAsmNames"); // Zip them up. + RecordKeeper &RK = Def->getRecords(); for (unsigned n = 0; n != Length; ++n) { std::string Name; Record *Proto = Lists[0][n]; @@ -651,13 +655,13 @@ struct TupleExpander : SetTheory::Expander { SmallVector CostPerUse; CostPerUse.insert(CostPerUse.end(), CostList->begin(), CostList->end()); - StringInit *AsmName = StringInit::get(""); + StringInit *AsmName = StringInit::get(RK, ""); if (!RegNames.empty()) { if (RegNames.size() <= n) PrintFatalError(Def->getLoc(), "Register tuple definition missing name for '" + Name + "'."); - AsmName = StringInit::get(RegNames[n]); + AsmName = StringInit::get(RK, RegNames[n]); } // Create a new Record representing the synthesized register. This record @@ -696,7 +700,7 @@ struct TupleExpander : SetTheory::Expander { // Composite registers are always covered by sub-registers. if (Field == "CoveredBySubRegs") - RV.setValue(BitInit::get(true)); + RV.setValue(BitInit::get(RK, true)); // Copy fields from the RegisterTuples def. if (Field == "SubRegIndices" || @@ -1105,6 +1109,17 @@ void CodeGenRegisterClass::buildRegUnitSet(const CodeGenRegBank &RegBank, std::back_inserter(RegUnits)); } +//===----------------------------------------------------------------------===// +// CodeGenRegisterCategory +//===----------------------------------------------------------------------===// + +CodeGenRegisterCategory::CodeGenRegisterCategory(CodeGenRegBank &RegBank, + Record *R) + : TheDef(R), Name(std::string(R->getName())) { + for (Record *RegClass : R->getValueAsListOfDefs("Classes")) + Classes.push_back(RegBank.getRegClass(RegClass)); +} + //===----------------------------------------------------------------------===// // CodeGenRegBank //===----------------------------------------------------------------------===// @@ -1222,6 +1237,12 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records, for (auto &RC : RegClasses) RC.EnumValue = i++; CodeGenRegisterClass::computeSubClasses(*this); + + // Read in the register category definitions. + std::vector RCats = + Records.getAllDerivedDefinitions("RegisterCategory"); + for (auto *R : RCats) + RegCategories.emplace_back(*this, R); } // Create a synthetic CodeGenSubRegIndex without a corresponding Record. @@ -1794,6 +1815,7 @@ void CodeGenRegBank::computeRegUnitWeights() { unsigned NumIters = 0; for (bool Changed = true; Changed; ++NumIters) { assert(NumIters <= NumNativeRegUnits && "Runaway register unit weights"); + (void) NumIters; Changed = false; for (auto &Reg : Registers) { CodeGenRegister::RegUnitList NormalUnits; diff --git a/llvm/utils/TableGen/CodeGenRegisters.h b/llvm/utils/TableGen/CodeGenRegisters.h index c9fcf83b0a8a..0fc8b3ef80dd 100644 --- a/llvm/utils/TableGen/CodeGenRegisters.h +++ b/llvm/utils/TableGen/CodeGenRegisters.h @@ -27,7 +27,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachineValueType.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/SetTheory.h" #include @@ -476,6 +475,26 @@ namespace llvm { static void computeSubClasses(CodeGenRegBank&); }; + // Register categories are used when we need to deterine the category a + // register falls into (GPR, vector, fixed, etc.) without having to know + // specific information about the target architecture. + class CodeGenRegisterCategory { + Record *TheDef; + std::string Name; + std::list Classes; + + public: + CodeGenRegisterCategory(CodeGenRegBank &, Record *R); + CodeGenRegisterCategory(CodeGenRegisterCategory &) = delete; + + // Return the Record that defined this class, or NULL if the class was + // created by TableGen. + Record *getDef() const { return TheDef; } + + std::string getName() const { return Name; } + std::list getClasses() const { return Classes; } + }; + // Register units are used to model interference and register pressure. // Every register is assigned one or more register units such that two // registers overlap if and only if they have a register unit in common. @@ -559,6 +578,13 @@ namespace llvm { typedef std::map RCKeyMap; RCKeyMap Key2RC; + // Register categories. + std::list RegCategories; + DenseMap Def2RCat; + using RCatKeyMap = + std::map; + RCatKeyMap Key2RCat; + // Remember each unique set of register units. Initially, this contains a // unique set for each register class. Simliar sets are coalesced with // pruneUnitSets and new supersets are inferred during computeRegUnitSets. @@ -719,6 +745,14 @@ namespace llvm { return RegClasses; } + std::list &getRegCategories() { + return RegCategories; + } + + const std::list &getRegCategories() const { + return RegCategories; + } + // Find a register class from its def. CodeGenRegisterClass *getRegClass(const Record *) const; diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp index e47bda725a17..4933bfc476f4 100644 --- a/llvm/utils/TableGen/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/CodeGenSchedule.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" diff --git a/llvm/utils/TableGen/CodeGenSchedule.h b/llvm/utils/TableGen/CodeGenSchedule.h index a331a30b51a8..f7e35b0c808f 100644 --- a/llvm/utils/TableGen/CodeGenSchedule.h +++ b/llvm/utils/TableGen/CodeGenSchedule.h @@ -17,11 +17,8 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/SetTheory.h" -#include namespace llvm { diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp index 2c1583f7979d..af2e8576af2e 100644 --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -14,16 +14,13 @@ //===----------------------------------------------------------------------===// #include "CodeGenTarget.h" -#include "CodeGenDAGPatterns.h" +#include "CodeGenInstruction.h" #include "CodeGenIntrinsics.h" #include "CodeGenSchedule.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Timer.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" -#include "llvm/TableGen/TableGenBackend.h" #include using namespace llvm; @@ -56,9 +53,12 @@ StringRef llvm::getName(MVT::SimpleValueType T) { } StringRef llvm::getEnumName(MVT::SimpleValueType T) { + // clang-format off switch (T) { case MVT::Other: return "MVT::Other"; case MVT::i1: return "MVT::i1"; + case MVT::i2: return "MVT::i2"; + case MVT::i4: return "MVT::i4"; case MVT::i8: return "MVT::i8"; case MVT::i16: return "MVT::i16"; case MVT::i32: return "MVT::i32"; @@ -91,6 +91,8 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) { case MVT::v256i1: return "MVT::v256i1"; case MVT::v512i1: return "MVT::v512i1"; case MVT::v1024i1: return "MVT::v1024i1"; + case MVT::v128i2: return "MVT::v128i2"; + case MVT::v64i4: return "MVT::v64i4"; case MVT::v1i8: return "MVT::v1i8"; case MVT::v2i8: return "MVT::v2i8"; case MVT::v4i8: return "MVT::v4i8"; @@ -227,6 +229,8 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) { case MVT::nxv2bf16: return "MVT::nxv2bf16"; case MVT::nxv4bf16: return "MVT::nxv4bf16"; case MVT::nxv8bf16: return "MVT::nxv8bf16"; + case MVT::nxv16bf16: return "MVT::nxv16bf16"; + case MVT::nxv32bf16: return "MVT::nxv32bf16"; case MVT::nxv1f32: return "MVT::nxv1f32"; case MVT::nxv2f32: return "MVT::nxv2f32"; case MVT::nxv4f32: return "MVT::nxv4f32"; @@ -245,6 +249,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) { case MVT::externref: return "MVT::externref"; default: llvm_unreachable("ILLEGAL VALUE TYPE!"); } + // clang-format on } /// getQualifiedName - Return the name of the specified record, with a @@ -471,7 +476,7 @@ GetInstByName(const char *Name, return I->second.get(); } -static const char *const FixedInstrs[] = { +static const char *FixedInstrs[] = { #define HANDLE_TARGET_OPCODE(OPC) #OPC, #include "llvm/Support/TargetOpcodes.def" nullptr}; @@ -555,7 +560,7 @@ void CodeGenTarget::reverseBitsForLittleEndianEncoding() { NewBits[middle] = BI->getBit(middle); } - BitsInit *NewBI = BitsInit::get(NewBits); + BitsInit *NewBI = BitsInit::get(Records, NewBits); // Update the bits in reversed order so that emitInstrOpBits will get the // correct endianness. @@ -666,6 +671,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R, isCommutative = false; canThrow = false; isNoReturn = false; + isNoCallback = false; isNoSync = false; isNoFree = false; isWillReturn = false; @@ -682,8 +688,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R, EnumName = DefName.substr(4); - if (R->getValue("GCCBuiltinName")) // Ignore a missing GCCBuiltinName field. - GCCBuiltinName = std::string(R->getValueAsString("GCCBuiltinName")); + if (R->getValue("ClangBuiltinName")) // Ignore a missing ClangBuiltinName field. + ClangBuiltinName = std::string(R->getValueAsString("ClangBuiltinName")); if (R->getValue("MSBuiltinName")) // Ignore a missing MSBuiltinName field. MSBuiltinName = std::string(R->getValueAsString("MSBuiltinName")); @@ -864,6 +870,8 @@ void CodeGenIntrinsic::setProperty(Record *R) { isConvergent = true; else if (R->getName() == "IntrNoReturn") isNoReturn = true; + else if (R->getName() == "IntrNoCallback") + isNoCallback = true; else if (R->getName() == "IntrNoSync") isNoSync = true; else if (R->getName() == "IntrNoFree") diff --git a/llvm/utils/TableGen/CodeGenTarget.h b/llvm/utils/TableGen/CodeGenTarget.h index 5bd84c873f2f..f14828f2c347 100644 --- a/llvm/utils/TableGen/CodeGenTarget.h +++ b/llvm/utils/TableGen/CodeGenTarget.h @@ -17,16 +17,15 @@ #define LLVM_UTILS_TABLEGEN_CODEGENTARGET_H #include "CodeGenHwModes.h" -#include "CodeGenInstruction.h" #include "CodeGenRegisters.h" #include "InfoByHwMode.h" #include "SDNodeProperties.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/TableGen/Record.h" -#include namespace llvm { +class RecordKeeper; +class Record; +class CodeGenInstruction; struct CodeGenRegister; class CodeGenSchedModels; class CodeGenTarget; diff --git a/llvm/utils/TableGen/DAGISelEmitter.cpp b/llvm/utils/TableGen/DAGISelEmitter.cpp index 2f211e2958fa..d012a0172a8f 100644 --- a/llvm/utils/TableGen/DAGISelEmitter.cpp +++ b/llvm/utils/TableGen/DAGISelEmitter.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "CodeGenDAGPatterns.h" +#include "CodeGenInstruction.h" #include "DAGISelMatcher.h" #include "llvm/Support/Debug.h" #include "llvm/TableGen/Record.h" diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp index 5b0d16a8f3c8..777e75dcd929 100644 --- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp +++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp @@ -13,9 +13,7 @@ #include "CodeGenDAGPatterns.h" #include "DAGISelMatcher.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Support/CommandLine.h" diff --git a/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/llvm/utils/TableGen/DAGISelMatcherGen.cpp index 2361ed8a7a95..44bff4c67ab3 100644 --- a/llvm/utils/TableGen/DAGISelMatcherGen.cpp +++ b/llvm/utils/TableGen/DAGISelMatcherGen.cpp @@ -6,9 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "DAGISelMatcher.h" #include "CodeGenDAGPatterns.h" +#include "CodeGenInstruction.h" #include "CodeGenRegisters.h" +#include "DAGISelMatcher.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/TableGen/Error.h" diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp index 27161d261e85..f2d9165c5c8c 100644 --- a/llvm/utils/TableGen/DFAEmitter.cpp +++ b/llvm/utils/TableGen/DFAEmitter.cpp @@ -21,7 +21,6 @@ //===----------------------------------------------------------------------===// #include "DFAEmitter.h" -#include "CodeGenTarget.h" #include "SequenceToOffsetTable.h" #include "TableGenBackends.h" #include "llvm/ADT/SmallVector.h" @@ -30,9 +29,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Record.h" -#include "llvm/TableGen/TableGenBackend.h" #include #include +#include #include #include #include @@ -306,6 +305,7 @@ void Automaton::emit(raw_ostream &OS) { } LLVM_DEBUG(dbgs() << " NFA automaton has " << SeenStates.size() << " states with " << NumTransitions << " transitions.\n"); + (void) NumTransitions; const auto &ActionTypes = Transitions.back().getTypes(); OS << "// The type of an action in the " << Name << " automaton.\n"; diff --git a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp index 9cbdbc19c206..6704d747f715 100644 --- a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp +++ b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp @@ -17,9 +17,7 @@ #include "CodeGenSchedule.h" #include "CodeGenTarget.h" #include "DFAEmitter.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Record.h" diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp new file mode 100644 index 000000000000..fd58e798b445 --- /dev/null +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -0,0 +1,374 @@ +//===- DXILEmitter.cpp - DXIL operation Emitter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// DXILEmitter uses the descriptions of DXIL operation to construct enum and +// helper functions for DXIL operation. +// +//===----------------------------------------------------------------------===// + +#include "SequenceToOffsetTable.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" + +using namespace llvm; + +namespace { + +struct DXILShaderModel { + int Major; + int Minor; +}; +struct DXILParam { + int Pos; // position in parameter list + StringRef Type; // llvm type name, $o for overload, $r for resource + // type, $cb for legacy cbuffer, $u4 for u4 struct + StringRef Name; // short, unique name + StringRef Doc; // the documentation description of this parameter + bool IsConst; // whether this argument requires a constant value in the IR + StringRef EnumName; // the name of the enum type if applicable + int MaxValue; // the maximum value for this parameter if applicable + DXILParam(const Record *R) { + Name = R->getValueAsString("name"); + Pos = R->getValueAsInt("pos"); + Type = R->getValueAsString("llvm_type"); + if (R->getValue("doc")) + Doc = R->getValueAsString("doc"); + IsConst = R->getValueAsBit("is_const"); + EnumName = R->getValueAsString("enum_name"); + MaxValue = R->getValueAsInt("max_value"); + } +}; + +struct DXILOperationData { + StringRef Name; // short, unique name + + StringRef DXILOp; // name of DXIL operation + int DXILOpID; // ID of DXIL operation + StringRef DXILClass; // name of the opcode class + StringRef Category; // classification for this instruction + StringRef Doc; // the documentation description of this instruction + + SmallVector Params; // the operands that this instruction takes + StringRef OverloadTypes; // overload types if applicable + StringRef FnAttr; // attribute shorthands: rn=does not access + // memory,ro=only reads from memory + StringRef Intrinsic; // The llvm intrinsic map to DXILOp. Default is "" which + // means no map exist + bool IsDeriv; // whether this is some kind of derivative + bool IsGradient; // whether this requires a gradient calculation + bool IsFeedback; // whether this is a sampler feedback op + bool IsWave; // whether this requires in-wave, cross-lane functionality + bool RequiresUniformInputs; // whether this operation requires that all + // of its inputs are uniform across the wave + SmallVector + ShaderStages; // shader stages to which this applies, empty for all. + DXILShaderModel ShaderModel; // minimum shader model required + DXILShaderModel ShaderModelTranslated; // minimum shader model required with + // translation by linker + SmallVector counters; // counters for this inst. + DXILOperationData(const Record *R) { + Name = R->getValueAsString("name"); + DXILOp = R->getValueAsString("dxil_op"); + DXILOpID = R->getValueAsInt("dxil_opid"); + DXILClass = R->getValueAsDef("op_class")->getValueAsString("name"); + Category = R->getValueAsDef("category")->getValueAsString("name"); + + if (R->getValue("llvm_intrinsic")) { + auto *IntrinsicDef = R->getValueAsDef("llvm_intrinsic"); + auto DefName = IntrinsicDef->getName(); + assert(DefName.startswith("int_") && "invalid intrinsic name"); + // Remove the int_ from intrinsic name. + Intrinsic = DefName.substr(4); + } + + Doc = R->getValueAsString("doc"); + + ListInit *ParamList = R->getValueAsListInit("ops"); + for (unsigned i = 0; i < ParamList->size(); ++i) { + Record *Param = ParamList->getElementAsRecord(i); + Params.emplace_back(DXILParam(Param)); + } + OverloadTypes = R->getValueAsString("oload_types"); + FnAttr = R->getValueAsString("fn_attr"); + } +}; +} // end anonymous namespace + +static void emitDXILOpEnum(DXILOperationData &DXILOp, raw_ostream &OS) { + // Name = ID, // Doc + OS << DXILOp.Name << " = " << DXILOp.DXILOpID << ", // " << DXILOp.Doc + << "\n"; +} + +static std::string buildCategoryStr(StringSet<> &Cetegorys) { + std::string Str; + raw_string_ostream OS(Str); + for (auto &It : Cetegorys) { + OS << " " << It.getKey(); + } + return OS.str(); +} + +// Emit enum declaration for DXIL. +static void emitDXILEnums(std::vector &DXILOps, + raw_ostream &OS) { + // Sort by Category + OpName. + std::sort(DXILOps.begin(), DXILOps.end(), + [](DXILOperationData &A, DXILOperationData &B) { + // Group by Category first. + if (A.Category == B.Category) + // Inside same Category, order by OpName. + return A.DXILOp < B.DXILOp; + else + return A.Category < B.Category; + }); + + OS << "// Enumeration for operations specified by DXIL\n"; + OS << "enum class OpCode : unsigned {\n"; + + StringMap> ClassMap; + StringRef PrevCategory = ""; + for (auto &DXILOp : DXILOps) { + StringRef Category = DXILOp.Category; + if (Category != PrevCategory) { + OS << "\n// " << Category << "\n"; + PrevCategory = Category; + } + emitDXILOpEnum(DXILOp, OS); + auto It = ClassMap.find(DXILOp.DXILClass); + if (It != ClassMap.end()) { + It->second.insert(DXILOp.Category); + } else { + ClassMap[DXILOp.DXILClass].insert(DXILOp.Category); + } + } + + OS << "\n};\n\n"; + + std::vector> ClassVec; + for (auto &It : ClassMap) { + ClassVec.emplace_back( + std::make_pair(It.getKey().str(), buildCategoryStr(It.second))); + } + // Sort by Category + ClassName. + std::sort(ClassVec.begin(), ClassVec.end(), + [](std::pair &A, + std::pair &B) { + StringRef ClassA = A.first; + StringRef CategoryA = A.second; + StringRef ClassB = B.first; + StringRef CategoryB = B.second; + // Group by Category first. + if (CategoryA == CategoryB) + // Inside same Category, order by ClassName. + return ClassA < ClassB; + else + return CategoryA < CategoryB; + }); + + OS << "// Groups for DXIL operations with equivalent function templates\n"; + OS << "enum class OpCodeClass : unsigned {\n"; + PrevCategory = ""; + for (auto &It : ClassVec) { + + StringRef Category = It.second; + if (Category != PrevCategory) { + OS << "\n// " << Category << "\n"; + PrevCategory = Category; + } + StringRef Name = It.first; + OS << Name << ",\n"; + } + OS << "\n};\n\n"; +} + +// Emit map from llvm intrinsic to DXIL operation. +static void emitDXILIntrinsicMap(std::vector &DXILOps, + raw_ostream &OS) { + OS << "\n"; + // FIXME: use array instead of SmallDenseMap. + OS << "static const SmallDenseMap LowerMap = " + "{\n"; + for (auto &DXILOp : DXILOps) { + if (DXILOp.Intrinsic.empty()) + continue; + // {Intrinsic::sin, DXIL::OpCode::Sin}, + OS << " { Intrinsic::" << DXILOp.Intrinsic + << ", DXIL::OpCode::" << DXILOp.DXILOp << "},\n"; + } + OS << "};\n"; + OS << "\n"; +} + +static std::string emitDXILOperationFnAttr(StringRef FnAttr) { + return StringSwitch(FnAttr) + .Case("rn", "Attribute::ReadNone") + .Case("ro", "Attribute::ReadOnly") + .Default("Attribute::None"); +} + +static std::string getOverloadKind(StringRef Overload) { + return StringSwitch(Overload) + .Case("half", "OverloadKind::HALF") + .Case("float", "OverloadKind::FLOAT") + .Case("double", "OverloadKind::DOUBLE") + .Case("i1", "OverloadKind::I1") + .Case("i16", "OverloadKind::I16") + .Case("i32", "OverloadKind::I32") + .Case("i64", "OverloadKind::I64") + .Case("udt", "OverloadKind::UserDefineType") + .Case("obj", "OverloadKind::ObjectType") + .Default("OverloadKind::VOID"); +} + +static std::string getDXILOperationOverload(StringRef Overloads) { + SmallVector OverloadStrs; + Overloads.split(OverloadStrs, ';', /*MaxSplit*/ -1, /*KeepEmpty*/ false); + // Format is: OverloadKind::FLOAT | OverloadKind::HALF + assert(!OverloadStrs.empty() && "Invalid overloads"); + auto It = OverloadStrs.begin(); + std::string Result; + raw_string_ostream OS(Result); + OS << getOverloadKind(*It); + for (++It; It != OverloadStrs.end(); ++It) { + OS << " | " << getOverloadKind(*It); + } + return OS.str(); +} + +static std::string lowerFirstLetter(StringRef Name) { + if (Name.empty()) + return ""; + + std::string LowerName = Name.str(); + LowerName[0] = llvm::toLower(Name[0]); + return LowerName; +} + +static std::string getDXILOpClassName(StringRef DXILOpClass) { + // Lower first letter expect for special case. + return StringSwitch(DXILOpClass) + .Case("CBufferLoad", "cbufferLoad") + .Case("CBufferLoadLegacy", "cbufferLoadLegacy") + .Case("GSInstanceID", "gsInstanceID") + .Default(lowerFirstLetter(DXILOpClass)); +} + +static void emitDXILOperationTable(std::vector &DXILOps, + raw_ostream &OS) { + // Sort by DXILOpID. + std::sort(DXILOps.begin(), DXILOps.end(), + [](DXILOperationData &A, DXILOperationData &B) { + return A.DXILOpID < B.DXILOpID; + }); + + // Collect Names. + SequenceToOffsetTable OpClassStrings; + SequenceToOffsetTable OpStrings; + + StringSet<> ClassSet; + for (auto &DXILOp : DXILOps) { + OpStrings.add(DXILOp.DXILOp.str()); + + if (ClassSet.find(DXILOp.DXILClass) != ClassSet.end()) + continue; + ClassSet.insert(DXILOp.DXILClass); + OpClassStrings.add(getDXILOpClassName(DXILOp.DXILClass)); + } + + // Layout names. + OpStrings.layout(); + OpClassStrings.layout(); + + // Emit the DXIL operation table. + //{DXIL::OpCode::Sin, OpCodeNameIndex, OpCodeClass::Unary, + // OpCodeClassNameIndex, + // OverloadKind::FLOAT | OverloadKind::HALF, Attribute::AttrKind::ReadNone}, + OS << "static const OpCodeProperty *getOpCodeProperty(DXIL::OpCode DXILOp) " + "{\n"; + + OS << " static const OpCodeProperty OpCodeProps[] = {\n"; + for (auto &DXILOp : DXILOps) { + OS << " { DXIL::OpCode::" << DXILOp.DXILOp << ", " + << OpStrings.get(DXILOp.DXILOp.str()) + << ", OpCodeClass::" << DXILOp.DXILClass << ", " + << OpClassStrings.get(getDXILOpClassName(DXILOp.DXILClass)) << ", " + << getDXILOperationOverload(DXILOp.OverloadTypes) << ", " + << emitDXILOperationFnAttr(DXILOp.FnAttr) << " },\n"; + } + OS << " };\n"; + + OS << " // FIXME: change search to indexing with\n"; + OS << " // DXILOp once all DXIL op is added.\n"; + OS << " OpCodeProperty TmpProp;\n"; + OS << " TmpProp.OpCode = DXILOp;\n"; + OS << " const OpCodeProperty *Prop =\n"; + OS << " llvm::lower_bound(OpCodeProps, TmpProp,\n"; + OS << " [](const OpCodeProperty &A, const " + "OpCodeProperty &B) {\n"; + OS << " return A.OpCode < B.OpCode;\n"; + OS << " });\n"; + OS << " assert(Prop && \"fail to find OpCodeProperty\");\n"; + OS << " return Prop;\n"; + OS << "}\n\n"; + + // Emit the string tables. + OS << "static const char *getOpCodeName(DXIL::OpCode DXILOp) {\n\n"; + + OpStrings.emitStringLiteralDef(OS, + " static const char DXILOpCodeNameTable[]"); + + OS << " auto *Prop = getOpCodeProperty(DXILOp);\n"; + OS << " unsigned Index = Prop->OpCodeNameOffset;\n"; + OS << " return DXILOpCodeNameTable + Index;\n"; + OS << "}\n\n"; + + OS << "static const char *getOpCodeClassName(const OpCodeProperty &Prop) " + "{\n\n"; + + OpClassStrings.emitStringLiteralDef( + OS, " static const char DXILOpCodeClassNameTable[]"); + + OS << " unsigned Index = Prop.OpCodeClassNameOffset;\n"; + OS << " return DXILOpCodeClassNameTable + Index;\n"; + OS << "}\n "; +} + +namespace llvm { + +void EmitDXILOperation(RecordKeeper &Records, raw_ostream &OS) { + std::vector Ops = Records.getAllDerivedDefinitions("dxil_op"); + OS << "// Generated code, do not edit.\n"; + OS << "\n"; + + std::vector DXILOps; + DXILOps.reserve(Ops.size()); + for (auto *Record : Ops) { + DXILOps.emplace_back(DXILOperationData(Record)); + } + + OS << "#ifdef DXIL_OP_ENUM\n"; + emitDXILEnums(DXILOps, OS); + OS << "#endif\n\n"; + + OS << "#ifdef DXIL_OP_INTRINSIC_MAP\n"; + emitDXILIntrinsicMap(DXILOps, OS); + OS << "#endif\n\n"; + + OS << "#ifdef DXIL_OP_OPERATION_TABLE\n"; + emitDXILOperationTable(DXILOps, OS); + OS << "#endif\n\n"; + + OS << "\n"; +} + +} // namespace llvm diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp new file mode 100644 index 000000000000..8477e0639f90 --- /dev/null +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -0,0 +1,2705 @@ +//===---------------- DecoderEmitter.cpp - Decoder Generator --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// It contains the tablegen backend that emits the decoder functions for +// targets with fixed/variable length instruction set. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenInstruction.h" +#include "CodeGenTarget.h" +#include "InfoByHwMode.h" +#include "VarLenCodeEmitterGen.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCDecoderOps.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "decoder-emitter" + +namespace { + +STATISTIC(NumEncodings, "Number of encodings considered"); +STATISTIC(NumEncodingsLackingDisasm, "Number of encodings without disassembler info"); +STATISTIC(NumInstructions, "Number of instructions considered"); +STATISTIC(NumEncodingsSupported, "Number of encodings supported"); +STATISTIC(NumEncodingsOmitted, "Number of encodings omitted"); + +struct EncodingField { + unsigned Base, Width, Offset; + EncodingField(unsigned B, unsigned W, unsigned O) + : Base(B), Width(W), Offset(O) { } +}; + +struct OperandInfo { + std::vector Fields; + std::string Decoder; + bool HasCompleteDecoder; + uint64_t InitValue; + + OperandInfo(std::string D, bool HCD) + : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {} + + void addField(unsigned Base, unsigned Width, unsigned Offset) { + Fields.push_back(EncodingField(Base, Width, Offset)); + } + + unsigned numFields() const { return Fields.size(); } + + typedef std::vector::const_iterator const_iterator; + + const_iterator begin() const { return Fields.begin(); } + const_iterator end() const { return Fields.end(); } +}; + +typedef std::vector DecoderTable; +typedef uint32_t DecoderFixup; +typedef std::vector FixupList; +typedef std::vector FixupScopeList; +typedef SmallSetVector PredicateSet; +typedef SmallSetVector DecoderSet; +struct DecoderTableInfo { + DecoderTable Table; + FixupScopeList FixupStack; + PredicateSet Predicates; + DecoderSet Decoders; +}; + +struct EncodingAndInst { + const Record *EncodingDef; + const CodeGenInstruction *Inst; + StringRef HwModeName; + + EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst, + StringRef HwModeName = "") + : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {} +}; + +struct EncodingIDAndOpcode { + unsigned EncodingID; + unsigned Opcode; + + EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {} + EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode) + : EncodingID(EncodingID), Opcode(Opcode) {} +}; + +raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) { + if (Value.EncodingDef != Value.Inst->TheDef) + OS << Value.EncodingDef->getName() << ":"; + OS << Value.Inst->TheDef->getName(); + return OS; +} + +class DecoderEmitter { + RecordKeeper &RK; + std::vector NumberedEncodings; + +public: + // Defaults preserved here for documentation, even though they aren't + // strictly necessary given the way that this is currently being called. + DecoderEmitter(RecordKeeper &R, std::string PredicateNamespace, + std::string GPrefix = "if (", + std::string GPostfix = " == MCDisassembler::Fail)", + std::string ROK = "MCDisassembler::Success", + std::string RFail = "MCDisassembler::Fail", std::string L = "") + : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)), + GuardPrefix(std::move(GPrefix)), GuardPostfix(std::move(GPostfix)), + ReturnOK(std::move(ROK)), ReturnFail(std::move(RFail)), + Locals(std::move(L)) {} + + // Emit the decoder state machine table. + void emitTable(formatted_raw_ostream &o, DecoderTable &Table, + unsigned Indentation, unsigned BitWidth, + StringRef Namespace) const; + void emitInstrLenTable(formatted_raw_ostream &OS, + std::vector &InstrLen) const; + void emitPredicateFunction(formatted_raw_ostream &OS, + PredicateSet &Predicates, + unsigned Indentation) const; + void emitDecoderFunction(formatted_raw_ostream &OS, + DecoderSet &Decoders, + unsigned Indentation) const; + + // run - Output the code emitter + void run(raw_ostream &o); + +private: + CodeGenTarget Target; + +public: + std::string PredicateNamespace; + std::string GuardPrefix, GuardPostfix; + std::string ReturnOK, ReturnFail; + std::string Locals; +}; + +} // end anonymous namespace + +// The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system +// for a bit value. +// +// BIT_UNFILTERED is used as the init value for a filter position. It is used +// only for filter processings. +typedef enum { + BIT_TRUE, // '1' + BIT_FALSE, // '0' + BIT_UNSET, // '?' + BIT_UNFILTERED // unfiltered +} bit_value_t; + +static bool ValueSet(bit_value_t V) { + return (V == BIT_TRUE || V == BIT_FALSE); +} + +static bool ValueNotSet(bit_value_t V) { + return (V == BIT_UNSET); +} + +static int Value(bit_value_t V) { + return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1); +} + +static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) { + if (BitInit *bit = dyn_cast(bits.getBit(index))) + return bit->getValue() ? BIT_TRUE : BIT_FALSE; + + // The bit is uninitialized. + return BIT_UNSET; +} + +// Prints the bit value for each position. +static void dumpBits(raw_ostream &o, const BitsInit &bits) { + for (unsigned index = bits.getNumBits(); index > 0; --index) { + switch (bitFromBits(bits, index - 1)) { + case BIT_TRUE: + o << "1"; + break; + case BIT_FALSE: + o << "0"; + break; + case BIT_UNSET: + o << "_"; + break; + default: + llvm_unreachable("unexpected return value from bitFromBits"); + } + } +} + +static BitsInit &getBitsField(const Record &def, StringRef str) { + const RecordVal *RV = def.getValue(str); + if (BitsInit *Bits = dyn_cast(RV->getValue())) + return *Bits; + + // variable length instruction + VarLenInst VLI = VarLenInst(cast(RV->getValue()), RV); + SmallVector Bits; + + for (auto &SI : VLI) { + if (const BitsInit *BI = dyn_cast(SI.Value)) { + for (unsigned Idx = 0U; Idx < BI->getNumBits(); ++Idx) { + Bits.push_back(BI->getBit(Idx)); + } + } else if (const BitInit *BI = dyn_cast(SI.Value)) { + Bits.push_back(const_cast(BI)); + } else { + for (unsigned Idx = 0U; Idx < SI.BitWidth; ++Idx) + Bits.push_back(UnsetInit::get(def.getRecords())); + } + } + + return *BitsInit::get(def.getRecords(), Bits); +} + +// Representation of the instruction to work on. +typedef std::vector insn_t; + +namespace { + +static const uint64_t NO_FIXED_SEGMENTS_SENTINEL = -1ULL; + +class FilterChooser; + +/// Filter - Filter works with FilterChooser to produce the decoding tree for +/// the ISA. +/// +/// It is useful to think of a Filter as governing the switch stmts of the +/// decoding tree in a certain level. Each case stmt delegates to an inferior +/// FilterChooser to decide what further decoding logic to employ, or in another +/// words, what other remaining bits to look at. The FilterChooser eventually +/// chooses a best Filter to do its job. +/// +/// This recursive scheme ends when the number of Opcodes assigned to the +/// FilterChooser becomes 1 or if there is a conflict. A conflict happens when +/// the Filter/FilterChooser combo does not know how to distinguish among the +/// Opcodes assigned. +/// +/// An example of a conflict is +/// +/// Conflict: +/// 111101000.00........00010000.... +/// 111101000.00........0001........ +/// 1111010...00........0001........ +/// 1111010...00.................... +/// 1111010......................... +/// 1111............................ +/// ................................ +/// VST4q8a 111101000_00________00010000____ +/// VST4q8b 111101000_00________00010000____ +/// +/// The Debug output shows the path that the decoding tree follows to reach the +/// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced +/// even registers, while VST4q8b is a vst4 to double-spaced odd registers. +/// +/// The encoding info in the .td files does not specify this meta information, +/// which could have been used by the decoder to resolve the conflict. The +/// decoder could try to decode the even/odd register numbering and assign to +/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a" +/// version and return the Opcode since the two have the same Asm format string. +class Filter { +protected: + const FilterChooser *Owner;// points to the FilterChooser who owns this filter + unsigned StartBit; // the starting bit position + unsigned NumBits; // number of bits to filter + bool Mixed; // a mixed region contains both set and unset bits + + // Map of well-known segment value to the set of uid's with that value. + std::map> + FilteredInstructions; + + // Set of uid's with non-constant segment values. + std::vector VariableInstructions; + + // Map of well-known segment value to its delegate. + std::map> FilterChooserMap; + + // Number of instructions which fall under FilteredInstructions category. + unsigned NumFiltered; + + // Keeps track of the last opcode in the filtered bucket. + EncodingIDAndOpcode LastOpcFiltered; + +public: + Filter(Filter &&f); + Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed); + + ~Filter() = default; + + unsigned getNumFiltered() const { return NumFiltered; } + + EncodingIDAndOpcode getSingletonOpc() const { + assert(NumFiltered == 1); + return LastOpcFiltered; + } + + // Return the filter chooser for the group of instructions without constant + // segment values. + const FilterChooser &getVariableFC() const { + assert(NumFiltered == 1); + assert(FilterChooserMap.size() == 1); + return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second); + } + + // Divides the decoding task into sub tasks and delegates them to the + // inferior FilterChooser's. + // + // A special case arises when there's only one entry in the filtered + // instructions. In order to unambiguously decode the singleton, we need to + // match the remaining undecoded encoding bits against the singleton. + void recurse(); + + // Emit table entries to decode instructions given a segment or segments of + // bits. + void emitTableEntry(DecoderTableInfo &TableInfo) const; + + // Returns the number of fanout produced by the filter. More fanout implies + // the filter distinguishes more categories of instructions. + unsigned usefulness() const; +}; // end class Filter + +} // end anonymous namespace + +// These are states of our finite state machines used in FilterChooser's +// filterProcessor() which produces the filter candidates to use. +typedef enum { + ATTR_NONE, + ATTR_FILTERED, + ATTR_ALL_SET, + ATTR_ALL_UNSET, + ATTR_MIXED +} bitAttr_t; + +/// FilterChooser - FilterChooser chooses the best filter among a set of Filters +/// in order to perform the decoding of instructions at the current level. +/// +/// Decoding proceeds from the top down. Based on the well-known encoding bits +/// of instructions available, FilterChooser builds up the possible Filters that +/// can further the task of decoding by distinguishing among the remaining +/// candidate instructions. +/// +/// Once a filter has been chosen, it is called upon to divide the decoding task +/// into sub-tasks and delegates them to its inferior FilterChoosers for further +/// processings. +/// +/// It is useful to think of a Filter as governing the switch stmts of the +/// decoding tree. And each case is delegated to an inferior FilterChooser to +/// decide what further remaining bits to look at. +namespace { + +class FilterChooser { +protected: + friend class Filter; + + // Vector of codegen instructions to choose our filter. + ArrayRef AllInstructions; + + // Vector of uid's for this filter chooser to work on. + // The first member of the pair is the opcode id being decoded, the second is + // the opcode id that should be emitted. + const std::vector &Opcodes; + + // Lookup table for the operand decoding of instructions. + const std::map> &Operands; + + // Vector of candidate filters. + std::vector Filters; + + // Array of bit values passed down from our parent. + // Set to all BIT_UNFILTERED's for Parent == NULL. + std::vector FilterBitValues; + + // Links to the FilterChooser above us in the decoding tree. + const FilterChooser *Parent; + + // Index of the best filter from Filters. + int BestIndex; + + // Width of instructions + unsigned BitWidth; + + // Parent emitter + const DecoderEmitter *Emitter; + +public: + FilterChooser(ArrayRef Insts, + const std::vector &IDs, + const std::map> &Ops, + unsigned BW, const DecoderEmitter *E) + : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), + FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1), + BitWidth(BW), Emitter(E) { + doFilter(); + } + + FilterChooser(ArrayRef Insts, + const std::vector &IDs, + const std::map> &Ops, + const std::vector &ParentFilterBitValues, + const FilterChooser &parent) + : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), + FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1), + BitWidth(parent.BitWidth), Emitter(parent.Emitter) { + doFilter(); + } + + FilterChooser(const FilterChooser &) = delete; + void operator=(const FilterChooser &) = delete; + + unsigned getBitWidth() const { return BitWidth; } + +protected: + // Populates the insn given the uid. + void insnWithID(insn_t &Insn, unsigned Opcode) const { + BitsInit &Bits = getBitsField(*AllInstructions[Opcode].EncodingDef, "Inst"); + Insn.resize(BitWidth > Bits.getNumBits() ? BitWidth : Bits.getNumBits(), + BIT_UNSET); + // We may have a SoftFail bitmask, which specifies a mask where an encoding + // may differ from the value in "Inst" and yet still be valid, but the + // disassembler should return SoftFail instead of Success. + // + // This is used for marking UNPREDICTABLE instructions in the ARM world. + const RecordVal *RV = + AllInstructions[Opcode].EncodingDef->getValue("SoftFail"); + const BitsInit *SFBits = RV ? dyn_cast(RV->getValue()) : nullptr; + for (unsigned i = 0; i < Bits.getNumBits(); ++i) { + if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE) + Insn[i] = BIT_UNSET; + else + Insn[i] = bitFromBits(Bits, i); + } + } + + // Emit the name of the encoding/instruction pair. + void emitNameWithID(raw_ostream &OS, unsigned Opcode) const { + const Record *EncodingDef = AllInstructions[Opcode].EncodingDef; + const Record *InstDef = AllInstructions[Opcode].Inst->TheDef; + if (EncodingDef != InstDef) + OS << EncodingDef->getName() << ":"; + OS << InstDef->getName(); + } + + // Populates the field of the insn given the start position and the number of + // consecutive bits to scan for. + // + // Returns false if there exists any uninitialized bit value in the range. + // Returns true, otherwise. + bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit, + unsigned NumBits) const; + + /// dumpFilterArray - dumpFilterArray prints out debugging info for the given + /// filter array as a series of chars. + void dumpFilterArray(raw_ostream &o, + const std::vector & filter) const; + + /// dumpStack - dumpStack traverses the filter chooser chain and calls + /// dumpFilterArray on each filter chooser up to the top level one. + void dumpStack(raw_ostream &o, const char *prefix) const; + + Filter &bestFilter() { + assert(BestIndex != -1 && "BestIndex not set"); + return Filters[BestIndex]; + } + + bool PositionFiltered(unsigned i) const { + return ValueSet(FilterBitValues[i]); + } + + // Calculates the island(s) needed to decode the instruction. + // This returns a lit of undecoded bits of an instructions, for example, + // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be + // decoded bits in order to verify that the instruction matches the Opcode. + unsigned getIslands(std::vector &StartBits, + std::vector &EndBits, + std::vector &FieldVals, + const insn_t &Insn) const; + + // Emits code to check the Predicates member of an instruction are true. + // Returns true if predicate matches were emitted, false otherwise. + bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation, + unsigned Opc) const; + bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, + raw_ostream &OS) const; + + bool doesOpcodeNeedPredicate(unsigned Opc) const; + unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const; + void emitPredicateTableEntry(DecoderTableInfo &TableInfo, + unsigned Opc) const; + + void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, + unsigned Opc) const; + + // Emits table entries to decode the singleton. + void emitSingletonTableEntry(DecoderTableInfo &TableInfo, + EncodingIDAndOpcode Opc) const; + + // Emits code to decode the singleton, and then to decode the rest. + void emitSingletonTableEntry(DecoderTableInfo &TableInfo, + const Filter &Best) const; + + void emitBinaryParser(raw_ostream &o, unsigned &Indentation, + const OperandInfo &OpInfo, + bool &OpHasCompleteDecoder) const; + + void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc, + bool &HasCompleteDecoder) const; + unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc, + bool &HasCompleteDecoder) const; + + // Assign a single filter and run with it. + void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed); + + // reportRegion is a helper function for filterProcessor to mark a region as + // eligible for use as a filter region. + void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex, + bool AllowMixed); + + // FilterProcessor scans the well-known encoding bits of the instructions and + // builds up a list of candidate filters. It chooses the best filter and + // recursively descends down the decoding tree. + bool filterProcessor(bool AllowMixed, bool Greedy = true); + + // Decides on the best configuration of filter(s) to use in order to decode + // the instructions. A conflict of instructions may occur, in which case we + // dump the conflict set to the standard error. + void doFilter(); + +public: + // emitTableEntries - Emit state machine entries to decode our share of + // instructions. + void emitTableEntries(DecoderTableInfo &TableInfo) const; +}; + +} // end anonymous namespace + +/////////////////////////// +// // +// Filter Implementation // +// // +/////////////////////////// + +Filter::Filter(Filter &&f) + : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed), + FilteredInstructions(std::move(f.FilteredInstructions)), + VariableInstructions(std::move(f.VariableInstructions)), + FilterChooserMap(std::move(f.FilterChooserMap)), NumFiltered(f.NumFiltered), + LastOpcFiltered(f.LastOpcFiltered) { +} + +Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, + bool mixed) + : Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) { + assert(StartBit + NumBits - 1 < Owner->BitWidth); + + NumFiltered = 0; + LastOpcFiltered = {0, 0}; + + for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) { + insn_t Insn; + + // Populates the insn given the uid. + Owner->insnWithID(Insn, Owner->Opcodes[i].EncodingID); + + uint64_t Field; + // Scans the segment for possibly well-specified encoding bits. + bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits); + + if (ok) { + // The encoding bits are well-known. Lets add the uid of the + // instruction into the bucket keyed off the constant field value. + LastOpcFiltered = Owner->Opcodes[i]; + FilteredInstructions[Field].push_back(LastOpcFiltered); + ++NumFiltered; + } else { + // Some of the encoding bit(s) are unspecified. This contributes to + // one additional member of "Variable" instructions. + VariableInstructions.push_back(Owner->Opcodes[i]); + } + } + + assert((FilteredInstructions.size() + VariableInstructions.size() > 0) + && "Filter returns no instruction categories"); +} + +// Divides the decoding task into sub tasks and delegates them to the +// inferior FilterChooser's. +// +// A special case arises when there's only one entry in the filtered +// instructions. In order to unambiguously decode the singleton, we need to +// match the remaining undecoded encoding bits against the singleton. +void Filter::recurse() { + // Starts by inheriting our parent filter chooser's filter bit values. + std::vector BitValueArray(Owner->FilterBitValues); + + if (!VariableInstructions.empty()) { + // Conservatively marks each segment position as BIT_UNSET. + for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) + BitValueArray[StartBit + bitIndex] = BIT_UNSET; + + // Delegates to an inferior filter chooser for further processing on this + // group of instructions whose segment values are variable. + FilterChooserMap.insert(std::make_pair(NO_FIXED_SEGMENTS_SENTINEL, + std::make_unique(Owner->AllInstructions, + VariableInstructions, Owner->Operands, BitValueArray, *Owner))); + } + + // No need to recurse for a singleton filtered instruction. + // See also Filter::emit*(). + if (getNumFiltered() == 1) { + assert(FilterChooserMap.size() == 1); + return; + } + + // Otherwise, create sub choosers. + for (const auto &Inst : FilteredInstructions) { + + // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. + for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) { + if (Inst.first & (1ULL << bitIndex)) + BitValueArray[StartBit + bitIndex] = BIT_TRUE; + else + BitValueArray[StartBit + bitIndex] = BIT_FALSE; + } + + // Delegates to an inferior filter chooser for further processing on this + // category of instructions. + FilterChooserMap.insert(std::make_pair( + Inst.first, std::make_unique( + Owner->AllInstructions, Inst.second, + Owner->Operands, BitValueArray, *Owner))); + } +} + +static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups, + uint32_t DestIdx) { + // Any NumToSkip fixups in the current scope can resolve to the + // current location. + for (FixupList::const_reverse_iterator I = Fixups.rbegin(), + E = Fixups.rend(); + I != E; ++I) { + // Calculate the distance from the byte following the fixup entry byte + // to the destination. The Target is calculated from after the 16-bit + // NumToSkip entry itself, so subtract two from the displacement here + // to account for that. + uint32_t FixupIdx = *I; + uint32_t Delta = DestIdx - FixupIdx - 3; + // Our NumToSkip entries are 24-bits. Make sure our table isn't too + // big. + assert(Delta < (1u << 24)); + Table[FixupIdx] = (uint8_t)Delta; + Table[FixupIdx + 1] = (uint8_t)(Delta >> 8); + Table[FixupIdx + 2] = (uint8_t)(Delta >> 16); + } +} + +// Emit table entries to decode instructions given a segment or segments +// of bits. +void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { + TableInfo.Table.push_back(MCD::OPC_ExtractField); + TableInfo.Table.push_back(StartBit); + TableInfo.Table.push_back(NumBits); + + // A new filter entry begins a new scope for fixup resolution. + TableInfo.FixupStack.emplace_back(); + + DecoderTable &Table = TableInfo.Table; + + size_t PrevFilter = 0; + bool HasFallthrough = false; + for (auto &Filter : FilterChooserMap) { + // Field value -1 implies a non-empty set of variable instructions. + // See also recurse(). + if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) { + HasFallthrough = true; + + // Each scope should always have at least one filter value to check + // for. + assert(PrevFilter != 0 && "empty filter set!"); + FixupList &CurScope = TableInfo.FixupStack.back(); + // Resolve any NumToSkip fixups in the current scope. + resolveTableFixups(Table, CurScope, Table.size()); + CurScope.clear(); + PrevFilter = 0; // Don't re-process the filter's fallthrough. + } else { + Table.push_back(MCD::OPC_FilterValue); + // Encode and emit the value to filter against. + uint8_t Buffer[16]; + unsigned Len = encodeULEB128(Filter.first, Buffer); + Table.insert(Table.end(), Buffer, Buffer + Len); + // Reserve space for the NumToSkip entry. We'll backpatch the value + // later. + PrevFilter = Table.size(); + Table.push_back(0); + Table.push_back(0); + Table.push_back(0); + } + + // We arrive at a category of instructions with the same segment value. + // Now delegate to the sub filter chooser for further decodings. + // The case may fallthrough, which happens if the remaining well-known + // encoding bits do not match exactly. + Filter.second->emitTableEntries(TableInfo); + + // Now that we've emitted the body of the handler, update the NumToSkip + // of the filter itself to be able to skip forward when false. Subtract + // two as to account for the width of the NumToSkip field itself. + if (PrevFilter) { + uint32_t NumToSkip = Table.size() - PrevFilter - 3; + assert(NumToSkip < (1u << 24) && "disassembler decoding table too large!"); + Table[PrevFilter] = (uint8_t)NumToSkip; + Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8); + Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16); + } + } + + // Any remaining unresolved fixups bubble up to the parent fixup scope. + assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!"); + FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1; + FixupScopeList::iterator Dest = Source - 1; + llvm::append_range(*Dest, *Source); + TableInfo.FixupStack.pop_back(); + + // If there is no fallthrough, then the final filter should get fixed + // up according to the enclosing scope rather than the current position. + if (!HasFallthrough) + TableInfo.FixupStack.back().push_back(PrevFilter); +} + +// Returns the number of fanout produced by the filter. More fanout implies +// the filter distinguishes more categories of instructions. +unsigned Filter::usefulness() const { + if (!VariableInstructions.empty()) + return FilteredInstructions.size(); + else + return FilteredInstructions.size() + 1; +} + +////////////////////////////////// +// // +// Filterchooser Implementation // +// // +////////////////////////////////// + +// Emit the decoder state machine table. +void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table, + unsigned Indentation, unsigned BitWidth, + StringRef Namespace) const { + OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace + << BitWidth << "[] = {\n"; + + Indentation += 2; + + // FIXME: We may be able to use the NumToSkip values to recover + // appropriate indentation levels. + DecoderTable::const_iterator I = Table.begin(); + DecoderTable::const_iterator E = Table.end(); + while (I != E) { + assert (I < E && "incomplete decode table entry!"); + + uint64_t Pos = I - Table.begin(); + OS << "/* " << Pos << " */"; + OS.PadToColumn(12); + + switch (*I) { + default: + PrintFatalError("invalid decode table opcode"); + case MCD::OPC_ExtractField: { + ++I; + unsigned Start = *I++; + unsigned Len = *I++; + OS.indent(Indentation) << "MCD::OPC_ExtractField, " << Start << ", " + << Len << ", // Inst{"; + if (Len > 1) + OS << (Start + Len - 1) << "-"; + OS << Start << "} ...\n"; + break; + } + case MCD::OPC_FilterValue: { + ++I; + OS.indent(Indentation) << "MCD::OPC_FilterValue, "; + // The filter value is ULEB128 encoded. + while (*I >= 128) + OS << (unsigned)*I++ << ", "; + OS << (unsigned)*I++ << ", "; + + // 24-bit numtoskip value. + uint8_t Byte = *I++; + uint32_t NumToSkip = Byte; + OS << (unsigned)Byte << ", "; + Byte = *I++; + OS << (unsigned)Byte << ", "; + NumToSkip |= Byte << 8; + Byte = *I++; + OS << utostr(Byte) << ", "; + NumToSkip |= Byte << 16; + OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; + break; + } + case MCD::OPC_CheckField: { + ++I; + unsigned Start = *I++; + unsigned Len = *I++; + OS.indent(Indentation) << "MCD::OPC_CheckField, " << Start << ", " + << Len << ", ";// << Val << ", " << NumToSkip << ",\n"; + // ULEB128 encoded field value. + for (; *I >= 128; ++I) + OS << (unsigned)*I << ", "; + OS << (unsigned)*I++ << ", "; + // 24-bit numtoskip value. + uint8_t Byte = *I++; + uint32_t NumToSkip = Byte; + OS << (unsigned)Byte << ", "; + Byte = *I++; + OS << (unsigned)Byte << ", "; + NumToSkip |= Byte << 8; + Byte = *I++; + OS << utostr(Byte) << ", "; + NumToSkip |= Byte << 16; + OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; + break; + } + case MCD::OPC_CheckPredicate: { + ++I; + OS.indent(Indentation) << "MCD::OPC_CheckPredicate, "; + for (; *I >= 128; ++I) + OS << (unsigned)*I << ", "; + OS << (unsigned)*I++ << ", "; + + // 24-bit numtoskip value. + uint8_t Byte = *I++; + uint32_t NumToSkip = Byte; + OS << (unsigned)Byte << ", "; + Byte = *I++; + OS << (unsigned)Byte << ", "; + NumToSkip |= Byte << 8; + Byte = *I++; + OS << utostr(Byte) << ", "; + NumToSkip |= Byte << 16; + OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; + break; + } + case MCD::OPC_Decode: + case MCD::OPC_TryDecode: { + bool IsTry = *I == MCD::OPC_TryDecode; + ++I; + // Extract the ULEB128 encoded Opcode to a buffer. + uint8_t Buffer[16], *p = Buffer; + while ((*p++ = *I++) >= 128) + assert((p - Buffer) <= (ptrdiff_t)sizeof(Buffer) + && "ULEB128 value too large!"); + // Decode the Opcode value. + unsigned Opc = decodeULEB128(Buffer); + OS.indent(Indentation) << "MCD::OPC_" << (IsTry ? "Try" : "") + << "Decode, "; + for (p = Buffer; *p >= 128; ++p) + OS << (unsigned)*p << ", "; + OS << (unsigned)*p << ", "; + + // Decoder index. + for (; *I >= 128; ++I) + OS << (unsigned)*I << ", "; + OS << (unsigned)*I++ << ", "; + + if (!IsTry) { + OS << "// Opcode: " << NumberedEncodings[Opc] << "\n"; + break; + } + + // Fallthrough for OPC_TryDecode. + + // 24-bit numtoskip value. + uint8_t Byte = *I++; + uint32_t NumToSkip = Byte; + OS << (unsigned)Byte << ", "; + Byte = *I++; + OS << (unsigned)Byte << ", "; + NumToSkip |= Byte << 8; + Byte = *I++; + OS << utostr(Byte) << ", "; + NumToSkip |= Byte << 16; + + OS << "// Opcode: " << NumberedEncodings[Opc] + << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; + break; + } + case MCD::OPC_SoftFail: { + ++I; + OS.indent(Indentation) << "MCD::OPC_SoftFail"; + // Positive mask + uint64_t Value = 0; + unsigned Shift = 0; + do { + OS << ", " << (unsigned)*I; + Value += (*I & 0x7f) << Shift; + Shift += 7; + } while (*I++ >= 128); + if (Value > 127) { + OS << " /* 0x"; + OS.write_hex(Value); + OS << " */"; + } + // Negative mask + Value = 0; + Shift = 0; + do { + OS << ", " << (unsigned)*I; + Value += (*I & 0x7f) << Shift; + Shift += 7; + } while (*I++ >= 128); + if (Value > 127) { + OS << " /* 0x"; + OS.write_hex(Value); + OS << " */"; + } + OS << ",\n"; + break; + } + case MCD::OPC_Fail: { + ++I; + OS.indent(Indentation) << "MCD::OPC_Fail,\n"; + break; + } + } + } + OS.indent(Indentation) << "0\n"; + + Indentation -= 2; + + OS.indent(Indentation) << "};\n\n"; +} + +void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS, + std::vector &InstrLen) const { + OS << "static const uint8_t InstrLenTable[] = {\n"; + for (unsigned &Len : InstrLen) { + OS << Len << ",\n"; + } + OS << "};\n\n"; +} + +void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS, + PredicateSet &Predicates, + unsigned Indentation) const { + // The predicate function is just a big switch statement based on the + // input predicate index. + OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, " + << "const FeatureBitset &Bits) {\n"; + Indentation += 2; + if (!Predicates.empty()) { + OS.indent(Indentation) << "switch (Idx) {\n"; + OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; + unsigned Index = 0; + for (const auto &Predicate : Predicates) { + OS.indent(Indentation) << "case " << Index++ << ":\n"; + OS.indent(Indentation+2) << "return (" << Predicate << ");\n"; + } + OS.indent(Indentation) << "}\n"; + } else { + // No case statement to emit + OS.indent(Indentation) << "llvm_unreachable(\"Invalid index!\");\n"; + } + Indentation -= 2; + OS.indent(Indentation) << "}\n\n"; +} + +void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, + DecoderSet &Decoders, + unsigned Indentation) const { + // The decoder function is just a big switch statement based on the + // input decoder index. + OS.indent(Indentation) << "template \n"; + OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S," + << " unsigned Idx, InsnType insn, MCInst &MI,\n"; + OS.indent(Indentation) + << " uint64_t " + << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n"; + Indentation += 2; + OS.indent(Indentation) << "DecodeComplete = true;\n"; + // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits + // It would be better for emitBinaryParser to use a 64-bit tmp whenever + // possible but fall back to an InsnType-sized tmp for truly large fields. + OS.indent(Indentation) << "using TmpType = " + "std::conditional_t::" + "value, InsnType, uint64_t>;\n"; + OS.indent(Indentation) << "TmpType tmp;\n"; + OS.indent(Indentation) << "switch (Idx) {\n"; + OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; + unsigned Index = 0; + for (const auto &Decoder : Decoders) { + OS.indent(Indentation) << "case " << Index++ << ":\n"; + OS << Decoder; + OS.indent(Indentation+2) << "return S;\n"; + } + OS.indent(Indentation) << "}\n"; + Indentation -= 2; + OS.indent(Indentation) << "}\n\n"; +} + +// Populates the field of the insn given the start position and the number of +// consecutive bits to scan for. +// +// Returns false if and on the first uninitialized bit value encountered. +// Returns true, otherwise. +bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn, + unsigned StartBit, unsigned NumBits) const { + Field = 0; + + for (unsigned i = 0; i < NumBits; ++i) { + if (Insn[StartBit + i] == BIT_UNSET) + return false; + + if (Insn[StartBit + i] == BIT_TRUE) + Field = Field | (1ULL << i); + } + + return true; +} + +/// dumpFilterArray - dumpFilterArray prints out debugging info for the given +/// filter array as a series of chars. +void FilterChooser::dumpFilterArray(raw_ostream &o, + const std::vector &filter) const { + for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) { + switch (filter[bitIndex - 1]) { + case BIT_UNFILTERED: + o << "."; + break; + case BIT_UNSET: + o << "_"; + break; + case BIT_TRUE: + o << "1"; + break; + case BIT_FALSE: + o << "0"; + break; + } + } +} + +/// dumpStack - dumpStack traverses the filter chooser chain and calls +/// dumpFilterArray on each filter chooser up to the top level one. +void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) const { + const FilterChooser *current = this; + + while (current) { + o << prefix; + dumpFilterArray(o, current->FilterBitValues); + o << '\n'; + current = current->Parent; + } +} + +// Calculates the island(s) needed to decode the instruction. +// This returns a list of undecoded bits of an instructions, for example, +// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be +// decoded bits in order to verify that the instruction matches the Opcode. +unsigned FilterChooser::getIslands(std::vector &StartBits, + std::vector &EndBits, + std::vector &FieldVals, + const insn_t &Insn) const { + unsigned Num, BitNo; + Num = BitNo = 0; + + uint64_t FieldVal = 0; + + // 0: Init + // 1: Water (the bit value does not affect decoding) + // 2: Island (well-known bit value needed for decoding) + int State = 0; + + for (unsigned i = 0; i < BitWidth; ++i) { + int64_t Val = Value(Insn[i]); + bool Filtered = PositionFiltered(i); + switch (State) { + default: llvm_unreachable("Unreachable code!"); + case 0: + case 1: + if (Filtered || Val == -1) + State = 1; // Still in Water + else { + State = 2; // Into the Island + BitNo = 0; + StartBits.push_back(i); + FieldVal = Val; + } + break; + case 2: + if (Filtered || Val == -1) { + State = 1; // Into the Water + EndBits.push_back(i - 1); + FieldVals.push_back(FieldVal); + ++Num; + } else { + State = 2; // Still in Island + ++BitNo; + FieldVal = FieldVal | Val << BitNo; + } + break; + } + } + // If we are still in Island after the loop, do some housekeeping. + if (State == 2) { + EndBits.push_back(BitWidth - 1); + FieldVals.push_back(FieldVal); + ++Num; + } + + assert(StartBits.size() == Num && EndBits.size() == Num && + FieldVals.size() == Num); + return Num; +} + +void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation, + const OperandInfo &OpInfo, + bool &OpHasCompleteDecoder) const { + const std::string &Decoder = OpInfo.Decoder; + + bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0; + + if (UseInsertBits) { + o.indent(Indentation) << "tmp = 0x"; + o.write_hex(OpInfo.InitValue); + o << ";\n"; + } + + for (const EncodingField &EF : OpInfo) { + o.indent(Indentation); + if (UseInsertBits) + o << "insertBits(tmp, "; + else + o << "tmp = "; + o << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')'; + if (UseInsertBits) + o << ", " << EF.Offset << ", " << EF.Width << ')'; + else if (EF.Offset != 0) + o << " << " << EF.Offset; + o << ";\n"; + } + + if (Decoder != "") { + OpHasCompleteDecoder = OpInfo.HasCompleteDecoder; + o.indent(Indentation) << Emitter->GuardPrefix << Decoder + << "(MI, tmp, Address, Decoder)" + << Emitter->GuardPostfix + << " { " << (OpHasCompleteDecoder ? "" : "DecodeComplete = false; ") + << "return MCDisassembler::Fail; }\n"; + } else { + OpHasCompleteDecoder = true; + o.indent(Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n"; + } +} + +void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation, + unsigned Opc, bool &HasCompleteDecoder) const { + HasCompleteDecoder = true; + + for (const auto &Op : Operands.find(Opc)->second) { + // If a custom instruction decoder was specified, use that. + if (Op.numFields() == 0 && !Op.Decoder.empty()) { + HasCompleteDecoder = Op.HasCompleteDecoder; + OS.indent(Indentation) << Emitter->GuardPrefix << Op.Decoder + << "(MI, insn, Address, Decoder)" + << Emitter->GuardPostfix + << " { " << (HasCompleteDecoder ? "" : "DecodeComplete = false; ") + << "return MCDisassembler::Fail; }\n"; + break; + } + + bool OpHasCompleteDecoder; + emitBinaryParser(OS, Indentation, Op, OpHasCompleteDecoder); + if (!OpHasCompleteDecoder) + HasCompleteDecoder = false; + } +} + +unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, + unsigned Opc, + bool &HasCompleteDecoder) const { + // Build up the predicate string. + SmallString<256> Decoder; + // FIXME: emitDecoder() function can take a buffer directly rather than + // a stream. + raw_svector_ostream S(Decoder); + unsigned I = 4; + emitDecoder(S, I, Opc, HasCompleteDecoder); + + // Using the full decoder string as the key value here is a bit + // heavyweight, but is effective. If the string comparisons become a + // performance concern, we can implement a mangling of the predicate + // data easily enough with a map back to the actual string. That's + // overkill for now, though. + + // Make sure the predicate is in the table. + Decoders.insert(CachedHashString(Decoder)); + // Now figure out the index for when we write out the table. + DecoderSet::const_iterator P = find(Decoders, Decoder.str()); + return (unsigned)(P - Decoders.begin()); +} + +// If ParenIfBinOp is true, print a surrounding () if Val uses && or ||. +bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, + raw_ostream &OS) const { + if (auto *D = dyn_cast(&Val)) { + if (!D->getDef()->isSubClassOf("SubtargetFeature")) + return true; + OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString() + << "]"; + return false; + } + if (auto *D = dyn_cast(&Val)) { + std::string Op = D->getOperator()->getAsString(); + if (Op == "not" && D->getNumArgs() == 1) { + OS << '!'; + return emitPredicateMatchAux(*D->getArg(0), true, OS); + } + if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) { + bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true); + if (Paren) + OS << '('; + ListSeparator LS(Op == "any_of" ? " || " : " && "); + for (auto *Arg : D->getArgs()) { + OS << LS; + if (emitPredicateMatchAux(*Arg, ParenIfBinOp, OS)) + return true; + } + if (Paren) + OS << ')'; + return false; + } + } + return true; +} + +bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation, + unsigned Opc) const { + ListInit *Predicates = + AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); + bool IsFirstEmission = true; + for (unsigned i = 0; i < Predicates->size(); ++i) { + Record *Pred = Predicates->getElementAsRecord(i); + if (!Pred->getValue("AssemblerMatcherPredicate")) + continue; + + if (!isa(Pred->getValue("AssemblerCondDag")->getValue())) + continue; + + if (!IsFirstEmission) + o << " && "; + if (emitPredicateMatchAux(*Pred->getValueAsDag("AssemblerCondDag"), + Predicates->size() > 1, o)) + PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); + IsFirstEmission = false; + } + return !Predicates->empty(); +} + +bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const { + ListInit *Predicates = + AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); + for (unsigned i = 0; i < Predicates->size(); ++i) { + Record *Pred = Predicates->getElementAsRecord(i); + if (!Pred->getValue("AssemblerMatcherPredicate")) + continue; + + if (isa(Pred->getValue("AssemblerCondDag")->getValue())) + return true; + } + return false; +} + +unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo, + StringRef Predicate) const { + // Using the full predicate string as the key value here is a bit + // heavyweight, but is effective. If the string comparisons become a + // performance concern, we can implement a mangling of the predicate + // data easily enough with a map back to the actual string. That's + // overkill for now, though. + + // Make sure the predicate is in the table. + TableInfo.Predicates.insert(CachedHashString(Predicate)); + // Now figure out the index for when we write out the table. + PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate); + return (unsigned)(P - TableInfo.Predicates.begin()); +} + +void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo, + unsigned Opc) const { + if (!doesOpcodeNeedPredicate(Opc)) + return; + + // Build up the predicate string. + SmallString<256> Predicate; + // FIXME: emitPredicateMatch() functions can take a buffer directly rather + // than a stream. + raw_svector_ostream PS(Predicate); + unsigned I = 0; + emitPredicateMatch(PS, I, Opc); + + // Figure out the index into the predicate table for the predicate just + // computed. + unsigned PIdx = getPredicateIndex(TableInfo, PS.str()); + SmallString<16> PBytes; + raw_svector_ostream S(PBytes); + encodeULEB128(PIdx, S); + + TableInfo.Table.push_back(MCD::OPC_CheckPredicate); + // Predicate index + for (unsigned i = 0, e = PBytes.size(); i != e; ++i) + TableInfo.Table.push_back(PBytes[i]); + // Push location for NumToSkip backpatching. + TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); + TableInfo.Table.push_back(0); + TableInfo.Table.push_back(0); + TableInfo.Table.push_back(0); +} + +void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo, + unsigned Opc) const { + const RecordVal *RV = AllInstructions[Opc].EncodingDef->getValue("SoftFail"); + BitsInit *SFBits = RV ? dyn_cast(RV->getValue()) : nullptr; + + if (!SFBits) return; + BitsInit *InstBits = + AllInstructions[Opc].EncodingDef->getValueAsBitsInit("Inst"); + + APInt PositiveMask(BitWidth, 0ULL); + APInt NegativeMask(BitWidth, 0ULL); + for (unsigned i = 0; i < BitWidth; ++i) { + bit_value_t B = bitFromBits(*SFBits, i); + bit_value_t IB = bitFromBits(*InstBits, i); + + if (B != BIT_TRUE) continue; + + switch (IB) { + case BIT_FALSE: + // The bit is meant to be false, so emit a check to see if it is true. + PositiveMask.setBit(i); + break; + case BIT_TRUE: + // The bit is meant to be true, so emit a check to see if it is false. + NegativeMask.setBit(i); + break; + default: + // The bit is not set; this must be an error! + errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " + << AllInstructions[Opc] << " is set but Inst{" << i + << "} is unset!\n" + << " - You can only mark a bit as SoftFail if it is fully defined" + << " (1/0 - not '?') in Inst\n"; + return; + } + } + + bool NeedPositiveMask = PositiveMask.getBoolValue(); + bool NeedNegativeMask = NegativeMask.getBoolValue(); + + if (!NeedPositiveMask && !NeedNegativeMask) + return; + + TableInfo.Table.push_back(MCD::OPC_SoftFail); + + SmallString<16> MaskBytes; + raw_svector_ostream S(MaskBytes); + if (NeedPositiveMask) { + encodeULEB128(PositiveMask.getZExtValue(), S); + for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) + TableInfo.Table.push_back(MaskBytes[i]); + } else + TableInfo.Table.push_back(0); + if (NeedNegativeMask) { + MaskBytes.clear(); + encodeULEB128(NegativeMask.getZExtValue(), S); + for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) + TableInfo.Table.push_back(MaskBytes[i]); + } else + TableInfo.Table.push_back(0); +} + +// Emits table entries to decode the singleton. +void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, + EncodingIDAndOpcode Opc) const { + std::vector StartBits; + std::vector EndBits; + std::vector FieldVals; + insn_t Insn; + insnWithID(Insn, Opc.EncodingID); + + // Look for islands of undecoded bits of the singleton. + getIslands(StartBits, EndBits, FieldVals, Insn); + + unsigned Size = StartBits.size(); + + // Emit the predicate table entry if one is needed. + emitPredicateTableEntry(TableInfo, Opc.EncodingID); + + // Check any additional encoding fields needed. + for (unsigned I = Size; I != 0; --I) { + unsigned NumBits = EndBits[I-1] - StartBits[I-1] + 1; + TableInfo.Table.push_back(MCD::OPC_CheckField); + TableInfo.Table.push_back(StartBits[I-1]); + TableInfo.Table.push_back(NumBits); + uint8_t Buffer[16], *p; + encodeULEB128(FieldVals[I-1], Buffer); + for (p = Buffer; *p >= 128 ; ++p) + TableInfo.Table.push_back(*p); + TableInfo.Table.push_back(*p); + // Push location for NumToSkip backpatching. + TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); + // The fixup is always 24-bits, so go ahead and allocate the space + // in the table so all our relative position calculations work OK even + // before we fully resolve the real value here. + TableInfo.Table.push_back(0); + TableInfo.Table.push_back(0); + TableInfo.Table.push_back(0); + } + + // Check for soft failure of the match. + emitSoftFailTableEntry(TableInfo, Opc.EncodingID); + + bool HasCompleteDecoder; + unsigned DIdx = + getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder); + + // Produce OPC_Decode or OPC_TryDecode opcode based on the information + // whether the instruction decoder is complete or not. If it is complete + // then it handles all possible values of remaining variable/unfiltered bits + // and for any value can determine if the bitpattern is a valid instruction + // or not. This means OPC_Decode will be the final step in the decoding + // process. If it is not complete, then the Fail return code from the + // decoder method indicates that additional processing should be done to see + // if there is any other instruction that also matches the bitpattern and + // can decode it. + TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode : + MCD::OPC_TryDecode); + NumEncodingsSupported++; + uint8_t Buffer[16], *p; + encodeULEB128(Opc.Opcode, Buffer); + for (p = Buffer; *p >= 128 ; ++p) + TableInfo.Table.push_back(*p); + TableInfo.Table.push_back(*p); + + SmallString<16> Bytes; + raw_svector_ostream S(Bytes); + encodeULEB128(DIdx, S); + + // Decoder index + for (unsigned i = 0, e = Bytes.size(); i != e; ++i) + TableInfo.Table.push_back(Bytes[i]); + + if (!HasCompleteDecoder) { + // Push location for NumToSkip backpatching. + TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); + // Allocate the space for the fixup. + TableInfo.Table.push_back(0); + TableInfo.Table.push_back(0); + TableInfo.Table.push_back(0); + } +} + +// Emits table entries to decode the singleton, and then to decode the rest. +void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, + const Filter &Best) const { + EncodingIDAndOpcode Opc = Best.getSingletonOpc(); + + // complex singletons need predicate checks from the first singleton + // to refer forward to the variable filterchooser that follows. + TableInfo.FixupStack.emplace_back(); + + emitSingletonTableEntry(TableInfo, Opc); + + resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), + TableInfo.Table.size()); + TableInfo.FixupStack.pop_back(); + + Best.getVariableFC().emitTableEntries(TableInfo); +} + +// Assign a single filter and run with it. Top level API client can initialize +// with a single filter to start the filtering process. +void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit, + bool mixed) { + Filters.clear(); + Filters.emplace_back(*this, startBit, numBit, true); + BestIndex = 0; // Sole Filter instance to choose from. + bestFilter().recurse(); +} + +// reportRegion is a helper function for filterProcessor to mark a region as +// eligible for use as a filter region. +void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit, + unsigned BitIndex, bool AllowMixed) { + if (RA == ATTR_MIXED && AllowMixed) + Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true); + else if (RA == ATTR_ALL_SET && !AllowMixed) + Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false); +} + +// FilterProcessor scans the well-known encoding bits of the instructions and +// builds up a list of candidate filters. It chooses the best filter and +// recursively descends down the decoding tree. +bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { + Filters.clear(); + BestIndex = -1; + unsigned numInstructions = Opcodes.size(); + + assert(numInstructions && "Filter created with no instructions"); + + // No further filtering is necessary. + if (numInstructions == 1) + return true; + + // Heuristics. See also doFilter()'s "Heuristics" comment when num of + // instructions is 3. + if (AllowMixed && !Greedy) { + assert(numInstructions == 3); + + for (auto Opcode : Opcodes) { + std::vector StartBits; + std::vector EndBits; + std::vector FieldVals; + insn_t Insn; + + insnWithID(Insn, Opcode.EncodingID); + + // Look for islands of undecoded bits of any instruction. + if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) { + // Found an instruction with island(s). Now just assign a filter. + runSingleFilter(StartBits[0], EndBits[0] - StartBits[0] + 1, true); + return true; + } + } + } + + unsigned BitIndex; + + // We maintain BIT_WIDTH copies of the bitAttrs automaton. + // The automaton consumes the corresponding bit from each + // instruction. + // + // Input symbols: 0, 1, and _ (unset). + // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED. + // Initial state: NONE. + // + // (NONE) ------- [01] -> (ALL_SET) + // (NONE) ------- _ ----> (ALL_UNSET) + // (ALL_SET) ---- [01] -> (ALL_SET) + // (ALL_SET) ---- _ ----> (MIXED) + // (ALL_UNSET) -- [01] -> (MIXED) + // (ALL_UNSET) -- _ ----> (ALL_UNSET) + // (MIXED) ------ . ----> (MIXED) + // (FILTERED)---- . ----> (FILTERED) + + std::vector bitAttrs; + + // FILTERED bit positions provide no entropy and are not worthy of pursuing. + // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position. + for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) + if (FilterBitValues[BitIndex] == BIT_TRUE || + FilterBitValues[BitIndex] == BIT_FALSE) + bitAttrs.push_back(ATTR_FILTERED); + else + bitAttrs.push_back(ATTR_NONE); + + for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) { + insn_t insn; + + insnWithID(insn, Opcodes[InsnIndex].EncodingID); + + for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { + switch (bitAttrs[BitIndex]) { + case ATTR_NONE: + if (insn[BitIndex] == BIT_UNSET) + bitAttrs[BitIndex] = ATTR_ALL_UNSET; + else + bitAttrs[BitIndex] = ATTR_ALL_SET; + break; + case ATTR_ALL_SET: + if (insn[BitIndex] == BIT_UNSET) + bitAttrs[BitIndex] = ATTR_MIXED; + break; + case ATTR_ALL_UNSET: + if (insn[BitIndex] != BIT_UNSET) + bitAttrs[BitIndex] = ATTR_MIXED; + break; + case ATTR_MIXED: + case ATTR_FILTERED: + break; + } + } + } + + // The regionAttr automaton consumes the bitAttrs automatons' state, + // lowest-to-highest. + // + // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed) + // States: NONE, ALL_SET, MIXED + // Initial state: NONE + // + // (NONE) ----- F --> (NONE) + // (NONE) ----- S --> (ALL_SET) ; and set region start + // (NONE) ----- U --> (NONE) + // (NONE) ----- M --> (MIXED) ; and set region start + // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region + // (ALL_SET) -- S --> (ALL_SET) + // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region + // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region + // (MIXED) ---- F --> (NONE) ; and report a MIXED region + // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region + // (MIXED) ---- U --> (NONE) ; and report a MIXED region + // (MIXED) ---- M --> (MIXED) + + bitAttr_t RA = ATTR_NONE; + unsigned StartBit = 0; + + for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { + bitAttr_t bitAttr = bitAttrs[BitIndex]; + + assert(bitAttr != ATTR_NONE && "Bit without attributes"); + + switch (RA) { + case ATTR_NONE: + switch (bitAttr) { + case ATTR_FILTERED: + break; + case ATTR_ALL_SET: + StartBit = BitIndex; + RA = ATTR_ALL_SET; + break; + case ATTR_ALL_UNSET: + break; + case ATTR_MIXED: + StartBit = BitIndex; + RA = ATTR_MIXED; + break; + default: + llvm_unreachable("Unexpected bitAttr!"); + } + break; + case ATTR_ALL_SET: + switch (bitAttr) { + case ATTR_FILTERED: + reportRegion(RA, StartBit, BitIndex, AllowMixed); + RA = ATTR_NONE; + break; + case ATTR_ALL_SET: + break; + case ATTR_ALL_UNSET: + reportRegion(RA, StartBit, BitIndex, AllowMixed); + RA = ATTR_NONE; + break; + case ATTR_MIXED: + reportRegion(RA, StartBit, BitIndex, AllowMixed); + StartBit = BitIndex; + RA = ATTR_MIXED; + break; + default: + llvm_unreachable("Unexpected bitAttr!"); + } + break; + case ATTR_MIXED: + switch (bitAttr) { + case ATTR_FILTERED: + reportRegion(RA, StartBit, BitIndex, AllowMixed); + StartBit = BitIndex; + RA = ATTR_NONE; + break; + case ATTR_ALL_SET: + reportRegion(RA, StartBit, BitIndex, AllowMixed); + StartBit = BitIndex; + RA = ATTR_ALL_SET; + break; + case ATTR_ALL_UNSET: + reportRegion(RA, StartBit, BitIndex, AllowMixed); + RA = ATTR_NONE; + break; + case ATTR_MIXED: + break; + default: + llvm_unreachable("Unexpected bitAttr!"); + } + break; + case ATTR_ALL_UNSET: + llvm_unreachable("regionAttr state machine has no ATTR_UNSET state"); + case ATTR_FILTERED: + llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state"); + } + } + + // At the end, if we're still in ALL_SET or MIXED states, report a region + switch (RA) { + case ATTR_NONE: + break; + case ATTR_FILTERED: + break; + case ATTR_ALL_SET: + reportRegion(RA, StartBit, BitIndex, AllowMixed); + break; + case ATTR_ALL_UNSET: + break; + case ATTR_MIXED: + reportRegion(RA, StartBit, BitIndex, AllowMixed); + break; + } + + // We have finished with the filter processings. Now it's time to choose + // the best performing filter. + BestIndex = 0; + bool AllUseless = true; + unsigned BestScore = 0; + + for (unsigned i = 0, e = Filters.size(); i != e; ++i) { + unsigned Usefulness = Filters[i].usefulness(); + + if (Usefulness) + AllUseless = false; + + if (Usefulness > BestScore) { + BestIndex = i; + BestScore = Usefulness; + } + } + + if (!AllUseless) + bestFilter().recurse(); + + return !AllUseless; +} // end of FilterChooser::filterProcessor(bool) + +// Decides on the best configuration of filter(s) to use in order to decode +// the instructions. A conflict of instructions may occur, in which case we +// dump the conflict set to the standard error. +void FilterChooser::doFilter() { + unsigned Num = Opcodes.size(); + assert(Num && "FilterChooser created with no instructions"); + + // Try regions of consecutive known bit values first. + if (filterProcessor(false)) + return; + + // Then regions of mixed bits (both known and unitialized bit values allowed). + if (filterProcessor(true)) + return; + + // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where + // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a + // well-known encoding pattern. In such case, we backtrack and scan for the + // the very first consecutive ATTR_ALL_SET region and assign a filter to it. + if (Num == 3 && filterProcessor(true, false)) + return; + + // If we come to here, the instruction decoding has failed. + // Set the BestIndex to -1 to indicate so. + BestIndex = -1; +} + +// emitTableEntries - Emit state machine entries to decode our share of +// instructions. +void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { + if (Opcodes.size() == 1) { + // There is only one instruction in the set, which is great! + // Call emitSingletonDecoder() to see whether there are any remaining + // encodings bits. + emitSingletonTableEntry(TableInfo, Opcodes[0]); + return; + } + + // Choose the best filter to do the decodings! + if (BestIndex != -1) { + const Filter &Best = Filters[BestIndex]; + if (Best.getNumFiltered() == 1) + emitSingletonTableEntry(TableInfo, Best); + else + Best.emitTableEntry(TableInfo); + return; + } + + // We don't know how to decode these instructions! Dump the + // conflict set and bail. + + // Print out useful conflict information for postmortem analysis. + errs() << "Decoding Conflict:\n"; + + dumpStack(errs(), "\t\t"); + + for (auto Opcode : Opcodes) { + errs() << '\t'; + emitNameWithID(errs(), Opcode.EncodingID); + errs() << " "; + dumpBits( + errs(), + getBitsField(*AllInstructions[Opcode.EncodingID].EncodingDef, "Inst")); + errs() << '\n'; + } +} + +static std::string findOperandDecoderMethod(Record *Record) { + std::string Decoder; + + RecordVal *DecoderString = Record->getValue("DecoderMethod"); + StringInit *String = DecoderString ? + dyn_cast(DecoderString->getValue()) : nullptr; + if (String) { + Decoder = std::string(String->getValue()); + if (!Decoder.empty()) + return Decoder; + } + + if (Record->isSubClassOf("RegisterOperand")) + Record = Record->getValueAsDef("RegClass"); + + if (Record->isSubClassOf("RegisterClass")) { + Decoder = "Decode" + Record->getName().str() + "RegisterClass"; + } else if (Record->isSubClassOf("PointerLikeRegClass")) { + Decoder = "DecodePointerLikeRegClass" + + utostr(Record->getValueAsInt("RegClassKind")); + } + + return Decoder; +} + +OperandInfo getOpInfo(Record *TypeRecord) { + std::string Decoder = findOperandDecoderMethod(TypeRecord); + + RecordVal *HasCompleteDecoderVal = TypeRecord->getValue("hasCompleteDecoder"); + BitInit *HasCompleteDecoderBit = + HasCompleteDecoderVal + ? dyn_cast(HasCompleteDecoderVal->getValue()) + : nullptr; + bool HasCompleteDecoder = + HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true; + + return OperandInfo(Decoder, HasCompleteDecoder); +} + +void parseVarLenInstOperand(const Record &Def, + std::vector &Operands, + const CodeGenInstruction &CGI) { + + const RecordVal *RV = Def.getValue("Inst"); + VarLenInst VLI(cast(RV->getValue()), RV); + SmallVector TiedTo; + + for (unsigned Idx = 0; Idx < CGI.Operands.size(); ++Idx) { + auto &Op = CGI.Operands[Idx]; + if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0) + for (auto *Arg : Op.MIOperandInfo->getArgs()) + Operands.push_back(getOpInfo(cast(Arg)->getDef())); + else + Operands.push_back(getOpInfo(Op.Rec)); + + int TiedReg = Op.getTiedRegister(); + TiedTo.push_back(-1); + if (TiedReg != -1) { + TiedTo[Idx] = TiedReg; + TiedTo[TiedReg] = Idx; + } + } + + unsigned CurrBitPos = 0; + for (auto &EncodingSegment : VLI) { + unsigned Offset = 0; + StringRef OpName; + + if (const StringInit *SI = dyn_cast(EncodingSegment.Value)) { + OpName = SI->getValue(); + } else if (const DagInit *DI = dyn_cast(EncodingSegment.Value)) { + OpName = cast(DI->getArg(0))->getValue(); + Offset = cast(DI->getArg(2))->getValue(); + } + + if (!OpName.empty()) { + auto OpSubOpPair = + const_cast(CGI).Operands.ParseOperandName( + OpName); + unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair); + Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); + + int TiedReg = TiedTo[OpSubOpPair.first]; + if (TiedReg != -1) { + unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber( + std::make_pair(TiedReg, OpSubOpPair.second)); + Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); + } + } + + CurrBitPos += EncodingSegment.BitWidth; + } +} + +static unsigned +populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, + const CodeGenInstruction &CGI, unsigned Opc, + std::map> &Operands, + bool IsVarLenInst) { + const Record &Def = *CGI.TheDef; + // If all the bit positions are not specified; do not decode this instruction. + // We are bound to fail! For proper disassembly, the well-known encoding bits + // of the instruction must be fully specified. + + BitsInit &Bits = getBitsField(EncodingDef, "Inst"); + if (Bits.allInComplete()) + return 0; + + std::vector InsnOperands; + + // If the instruction has specified a custom decoding hook, use that instead + // of trying to auto-generate the decoder. + StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod"); + if (InstDecoder != "") { + bool HasCompleteInstDecoder = EncodingDef.getValueAsBit("hasCompleteDecoder"); + InsnOperands.push_back( + OperandInfo(std::string(InstDecoder), HasCompleteInstDecoder)); + Operands[Opc] = InsnOperands; + return Bits.getNumBits(); + } + + // Generate a description of the operand of the instruction that we know + // how to decode automatically. + // FIXME: We'll need to have a way to manually override this as needed. + + // Gather the outputs/inputs of the instruction, so we can find their + // positions in the encoding. This assumes for now that they appear in the + // MCInst in the order that they're listed. + std::vector> InOutOperands; + DagInit *Out = Def.getValueAsDag("OutOperandList"); + DagInit *In = Def.getValueAsDag("InOperandList"); + for (unsigned i = 0; i < Out->getNumArgs(); ++i) + InOutOperands.push_back( + std::make_pair(Out->getArg(i), Out->getArgNameStr(i))); + for (unsigned i = 0; i < In->getNumArgs(); ++i) + InOutOperands.push_back( + std::make_pair(In->getArg(i), In->getArgNameStr(i))); + + // Search for tied operands, so that we can correctly instantiate + // operands that are not explicitly represented in the encoding. + std::map TiedNames; + for (unsigned i = 0; i < CGI.Operands.size(); ++i) { + int tiedTo = CGI.Operands[i].getTiedRegister(); + if (tiedTo != -1) { + std::pair SO = + CGI.Operands.getSubOperandNumber(tiedTo); + TiedNames[std::string(InOutOperands[i].second)] = + std::string(InOutOperands[SO.first].second); + TiedNames[std::string(InOutOperands[SO.first].second)] = + std::string(InOutOperands[i].second); + } + } + + if (IsVarLenInst) { + parseVarLenInstOperand(EncodingDef, InsnOperands, CGI); + } else { + std::map> NumberedInsnOperands; + std::set NumberedInsnOperandsNoTie; + if (Target.getInstructionSet()->getValueAsBit( + "decodePositionallyEncodedOperands")) { + const std::vector &Vals = Def.getValues(); + unsigned NumberedOp = 0; + + std::set NamedOpIndices; + if (Target.getInstructionSet()->getValueAsBit( + "noNamedPositionallyEncodedOperands")) + // Collect the set of operand indices that might correspond to named + // operand, and skip these when assigning operands based on position. + for (unsigned i = 0, e = Vals.size(); i != e; ++i) { + unsigned OpIdx; + if (!CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx)) + continue; + + NamedOpIndices.insert(OpIdx); + } + + for (unsigned i = 0, e = Vals.size(); i != e; ++i) { + // Ignore fixed fields in the record, we're looking for values like: + // bits<5> RST = { ?, ?, ?, ?, ? }; + if (Vals[i].isNonconcreteOK() || Vals[i].getValue()->isComplete()) + continue; + + // Determine if Vals[i] actually contributes to the Inst encoding. + unsigned bi = 0; + for (; bi < Bits.getNumBits(); ++bi) { + VarInit *Var = nullptr; + VarBitInit *BI = dyn_cast(Bits.getBit(bi)); + if (BI) + Var = dyn_cast(BI->getBitVar()); + else + Var = dyn_cast(Bits.getBit(bi)); + + if (Var && Var->getName() == Vals[i].getName()) + break; + } + + if (bi == Bits.getNumBits()) + continue; + + // Skip variables that correspond to explicitly-named operands. + unsigned OpIdx; + if (CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx)) + continue; + + // Get the bit range for this operand: + unsigned bitStart = bi++, bitWidth = 1; + for (; bi < Bits.getNumBits(); ++bi) { + VarInit *Var = nullptr; + VarBitInit *BI = dyn_cast(Bits.getBit(bi)); + if (BI) + Var = dyn_cast(BI->getBitVar()); + else + Var = dyn_cast(Bits.getBit(bi)); + + if (!Var) + break; + + if (Var->getName() != Vals[i].getName()) + break; + + ++bitWidth; + } + + unsigned NumberOps = CGI.Operands.size(); + while (NumberedOp < NumberOps && + (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) || + (!NamedOpIndices.empty() && + NamedOpIndices.count( + CGI.Operands.getSubOperandNumber(NumberedOp).first)))) + ++NumberedOp; + + OpIdx = NumberedOp++; + + // OpIdx now holds the ordered operand number of Vals[i]. + std::pair SO = + CGI.Operands.getSubOperandNumber(OpIdx); + const std::string &Name = CGI.Operands[SO.first].Name; + + LLVM_DEBUG(dbgs() << "Numbered operand mapping for " << Def.getName() + << ": " << Name << "(" << SO.first << ", " + << SO.second << ") => " << Vals[i].getName() << "\n"); + + std::string Decoder; + Record *TypeRecord = CGI.Operands[SO.first].Rec; + + RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod"); + StringInit *String = + DecoderString ? dyn_cast(DecoderString->getValue()) + : nullptr; + if (String && String->getValue() != "") + Decoder = std::string(String->getValue()); + + if (Decoder == "" && CGI.Operands[SO.first].MIOperandInfo && + CGI.Operands[SO.first].MIOperandInfo->getNumArgs()) { + Init *Arg = CGI.Operands[SO.first].MIOperandInfo->getArg(SO.second); + if (DefInit *DI = cast(Arg)) + TypeRecord = DI->getDef(); + } + + bool isReg = false; + if (TypeRecord->isSubClassOf("RegisterOperand")) + TypeRecord = TypeRecord->getValueAsDef("RegClass"); + if (TypeRecord->isSubClassOf("RegisterClass")) { + Decoder = "Decode" + TypeRecord->getName().str() + "RegisterClass"; + isReg = true; + } else if (TypeRecord->isSubClassOf("PointerLikeRegClass")) { + Decoder = "DecodePointerLikeRegClass" + + utostr(TypeRecord->getValueAsInt("RegClassKind")); + isReg = true; + } + + DecoderString = TypeRecord->getValue("DecoderMethod"); + String = DecoderString ? dyn_cast(DecoderString->getValue()) + : nullptr; + if (!isReg && String && String->getValue() != "") + Decoder = std::string(String->getValue()); + + RecordVal *HasCompleteDecoderVal = + TypeRecord->getValue("hasCompleteDecoder"); + BitInit *HasCompleteDecoderBit = + HasCompleteDecoderVal + ? dyn_cast(HasCompleteDecoderVal->getValue()) + : nullptr; + bool HasCompleteDecoder = + HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true; + + OperandInfo OpInfo(Decoder, HasCompleteDecoder); + OpInfo.addField(bitStart, bitWidth, 0); + + NumberedInsnOperands[Name].push_back(OpInfo); + + // FIXME: For complex operands with custom decoders we can't handle tied + // sub-operands automatically. Skip those here and assume that this is + // fixed up elsewhere. + if (CGI.Operands[SO.first].MIOperandInfo && + CGI.Operands[SO.first].MIOperandInfo->getNumArgs() > 1 && String && + String->getValue() != "") + NumberedInsnOperandsNoTie.insert(Name); + } + } + + // For each operand, see if we can figure out where it is encoded. + for (const auto &Op : InOutOperands) { + if (!NumberedInsnOperands[std::string(Op.second)].empty()) { + llvm::append_range(InsnOperands, + NumberedInsnOperands[std::string(Op.second)]); + continue; + } + if (!NumberedInsnOperands[TiedNames[std::string(Op.second)]].empty()) { + if (!NumberedInsnOperandsNoTie.count( + TiedNames[std::string(Op.second)])) { + // Figure out to which (sub)operand we're tied. + unsigned i = + CGI.Operands.getOperandNamed(TiedNames[std::string(Op.second)]); + int tiedTo = CGI.Operands[i].getTiedRegister(); + if (tiedTo == -1) { + i = CGI.Operands.getOperandNamed(Op.second); + tiedTo = CGI.Operands[i].getTiedRegister(); + } + + if (tiedTo != -1) { + std::pair SO = + CGI.Operands.getSubOperandNumber(tiedTo); + + InsnOperands.push_back( + NumberedInsnOperands[TiedNames[std::string(Op.second)]] + [SO.second]); + } + } + continue; + } + + // At this point, we can locate the decoder field, but we need to know how + // to interpret it. As a first step, require the target to provide + // callbacks for decoding register classes. + + OperandInfo OpInfo = getOpInfo(cast(Op.first)->getDef()); + + // Some bits of the operand may be required to be 1 depending on the + // instruction's encoding. Collect those bits. + if (const RecordVal *EncodedValue = EncodingDef.getValue(Op.second)) + if (const BitsInit *OpBits = + dyn_cast(EncodedValue->getValue())) + for (unsigned I = 0; I < OpBits->getNumBits(); ++I) + if (const BitInit *OpBit = dyn_cast(OpBits->getBit(I))) + if (OpBit->getValue()) + OpInfo.InitValue |= 1ULL << I; + + unsigned Base = ~0U; + unsigned Width = 0; + unsigned Offset = 0; + + for (unsigned bi = 0; bi < Bits.getNumBits(); ++bi) { + VarInit *Var = nullptr; + VarBitInit *BI = dyn_cast(Bits.getBit(bi)); + if (BI) + Var = dyn_cast(BI->getBitVar()); + else + Var = dyn_cast(Bits.getBit(bi)); + + if (!Var) { + if (Base != ~0U) { + OpInfo.addField(Base, Width, Offset); + Base = ~0U; + Width = 0; + Offset = 0; + } + continue; + } + + if ((Var->getName() != Op.second && + Var->getName() != TiedNames[std::string(Op.second)])) { + if (Base != ~0U) { + OpInfo.addField(Base, Width, Offset); + Base = ~0U; + Width = 0; + Offset = 0; + } + continue; + } + + if (Base == ~0U) { + Base = bi; + Width = 1; + Offset = BI ? BI->getBitNum() : 0; + } else if (BI && BI->getBitNum() != Offset + Width) { + OpInfo.addField(Base, Width, Offset); + Base = bi; + Width = 1; + Offset = BI->getBitNum(); + } else { + ++Width; + } + } + + if (Base != ~0U) + OpInfo.addField(Base, Width, Offset); + + if (OpInfo.numFields() > 0) + InsnOperands.push_back(OpInfo); + } + } + + Operands[Opc] = InsnOperands; + +#if 0 + LLVM_DEBUG({ + // Dumps the instruction encoding bits. + dumpBits(errs(), Bits); + + errs() << '\n'; + + // Dumps the list of operand info. + for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { + const CGIOperandList::OperandInfo &Info = CGI.Operands[i]; + const std::string &OperandName = Info.Name; + const Record &OperandDef = *Info.Rec; + + errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n"; + } + }); +#endif + + return Bits.getNumBits(); +} + +// emitFieldFromInstruction - Emit the templated helper function +// fieldFromInstruction(). +// On Windows we make sure that this function is not inlined when +// using the VS compiler. It has a bug which causes the function +// to be optimized out in some circustances. See llvm.org/pr38292 +static void emitFieldFromInstruction(formatted_raw_ostream &OS) { + OS << "// Helper functions for extracting fields from encoded instructions.\n" + << "// InsnType must either be integral or an APInt-like object that " + "must:\n" + << "// * be default-constructible and copy-constructible\n" + << "// * be constructible from an APInt (this can be private)\n" + << "// * Support insertBits(bits, startBit, numBits)\n" + << "// * Support extractBitsAsZExtValue(numBits, startBit)\n" + << "// * Support the ~, &, ==, and != operators with other objects of " + "the same type\n" + << "// * Support the != and bitwise & with uint64_t\n" + << "// * Support put (<<) to raw_ostream&\n" + << "template \n" + << "#if defined(_MSC_VER) && !defined(__clang__)\n" + << "__declspec(noinline)\n" + << "#endif\n" + << "static std::enable_if_t::value, InsnType>\n" + << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n" + << " unsigned numBits) {\n" + << " assert(startBit + numBits <= 64 && \"Cannot support >64-bit " + "extractions!\");\n" + << " assert(startBit + numBits <= (sizeof(InsnType) * 8) &&\n" + << " \"Instruction field out of bounds!\");\n" + << " InsnType fieldMask;\n" + << " if (numBits == sizeof(InsnType) * 8)\n" + << " fieldMask = (InsnType)(-1LL);\n" + << " else\n" + << " fieldMask = (((InsnType)1 << numBits) - 1) << startBit;\n" + << " return (insn & fieldMask) >> startBit;\n" + << "}\n" + << "\n" + << "template \n" + << "static std::enable_if_t::value, " + "uint64_t>\n" + << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n" + << " unsigned numBits) {\n" + << " return insn.extractBitsAsZExtValue(numBits, startBit);\n" + << "}\n\n"; +} + +// emitInsertBits - Emit the templated helper function insertBits(). +static void emitInsertBits(formatted_raw_ostream &OS) { + OS << "// Helper function for inserting bits extracted from an encoded " + "instruction into\n" + << "// a field.\n" + << "template \n" + << "static std::enable_if_t::value>\n" + << "insertBits(InsnType &field, InsnType bits, unsigned startBit, " + "unsigned numBits) {\n" + << " assert(startBit + numBits <= sizeof field * 8);\n" + << " field |= (InsnType)bits << startBit;\n" + << "}\n" + << "\n" + << "template \n" + << "static std::enable_if_t::value>\n" + << "insertBits(InsnType &field, uint64_t bits, unsigned startBit, " + "unsigned numBits) {\n" + << " field.insertBits(bits, startBit, numBits);\n" + << "}\n\n"; +} + +// emitDecodeInstruction - Emit the templated helper function +// decodeInstruction(). +static void emitDecodeInstruction(formatted_raw_ostream &OS, + bool IsVarLenInst) { + OS << "template \n" + << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], " + "MCInst &MI,\n" + << " InsnType insn, uint64_t " + "Address,\n" + << " const MCDisassembler *DisAsm,\n" + << " const MCSubtargetInfo &STI"; + if (IsVarLenInst) { + OS << ",\n" + << " llvm::function_ref makeUp"; + } + OS << ") {\n" + << " const FeatureBitset &Bits = STI.getFeatureBits();\n" + << "\n" + << " const uint8_t *Ptr = DecodeTable;\n" + << " uint64_t CurFieldValue = 0;\n" + << " DecodeStatus S = MCDisassembler::Success;\n" + << " while (true) {\n" + << " ptrdiff_t Loc = Ptr - DecodeTable;\n" + << " switch (*Ptr) {\n" + << " default:\n" + << " errs() << Loc << \": Unexpected decode table opcode!\\n\";\n" + << " return MCDisassembler::Fail;\n" + << " case MCD::OPC_ExtractField: {\n" + << " unsigned Start = *++Ptr;\n" + << " unsigned Len = *++Ptr;\n" + << " ++Ptr;\n"; + if (IsVarLenInst) + OS << " makeUp(insn, Start + Len);\n"; + OS << " CurFieldValue = fieldFromInstruction(insn, Start, Len);\n" + << " LLVM_DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << " + "\", \"\n" + << " << Len << \"): \" << CurFieldValue << \"\\n\");\n" + << " break;\n" + << " }\n" + << " case MCD::OPC_FilterValue: {\n" + << " // Decode the field value.\n" + << " unsigned Len;\n" + << " uint64_t Val = decodeULEB128(++Ptr, &Len);\n" + << " Ptr += Len;\n" + << " // NumToSkip is a plain 24-bit integer.\n" + << " unsigned NumToSkip = *Ptr++;\n" + << " NumToSkip |= (*Ptr++) << 8;\n" + << " NumToSkip |= (*Ptr++) << 16;\n" + << "\n" + << " // Perform the filter operation.\n" + << " if (Val != CurFieldValue)\n" + << " Ptr += NumToSkip;\n" + << " LLVM_DEBUG(dbgs() << Loc << \": OPC_FilterValue(\" << Val << " + "\", \" << NumToSkip\n" + << " << \"): \" << ((Val != CurFieldValue) ? \"FAIL:\" " + ": \"PASS:\")\n" + << " << \" continuing at \" << (Ptr - DecodeTable) << " + "\"\\n\");\n" + << "\n" + << " break;\n" + << " }\n" + << " case MCD::OPC_CheckField: {\n" + << " unsigned Start = *++Ptr;\n" + << " unsigned Len = *++Ptr;\n"; + if (IsVarLenInst) + OS << " makeUp(insn, Start + Len);\n"; + OS << " uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);\n" + << " // Decode the field value.\n" + << " unsigned PtrLen = 0;\n" + << " uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);\n" + << " Ptr += PtrLen;\n" + << " // NumToSkip is a plain 24-bit integer.\n" + << " unsigned NumToSkip = *Ptr++;\n" + << " NumToSkip |= (*Ptr++) << 8;\n" + << " NumToSkip |= (*Ptr++) << 16;\n" + << "\n" + << " // If the actual and expected values don't match, skip.\n" + << " if (ExpectedValue != FieldValue)\n" + << " Ptr += NumToSkip;\n" + << " LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckField(\" << Start << " + "\", \"\n" + << " << Len << \", \" << ExpectedValue << \", \" << " + "NumToSkip\n" + << " << \"): FieldValue = \" << FieldValue << \", " + "ExpectedValue = \"\n" + << " << ExpectedValue << \": \"\n" + << " << ((ExpectedValue == FieldValue) ? \"PASS\\n\" : " + "\"FAIL\\n\"));\n" + << " break;\n" + << " }\n" + << " case MCD::OPC_CheckPredicate: {\n" + << " unsigned Len;\n" + << " // Decode the Predicate Index value.\n" + << " unsigned PIdx = decodeULEB128(++Ptr, &Len);\n" + << " Ptr += Len;\n" + << " // NumToSkip is a plain 24-bit integer.\n" + << " unsigned NumToSkip = *Ptr++;\n" + << " NumToSkip |= (*Ptr++) << 8;\n" + << " NumToSkip |= (*Ptr++) << 16;\n" + << " // Check the predicate.\n" + << " bool Pred;\n" + << " if (!(Pred = checkDecoderPredicate(PIdx, Bits)))\n" + << " Ptr += NumToSkip;\n" + << " (void)Pred;\n" + << " LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckPredicate(\" << PIdx " + "<< \"): \"\n" + << " << (Pred ? \"PASS\\n\" : \"FAIL\\n\"));\n" + << "\n" + << " break;\n" + << " }\n" + << " case MCD::OPC_Decode: {\n" + << " unsigned Len;\n" + << " // Decode the Opcode value.\n" + << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" + << " Ptr += Len;\n" + << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" + << " Ptr += Len;\n" + << "\n" + << " MI.clear();\n" + << " MI.setOpcode(Opc);\n" + << " bool DecodeComplete;\n"; + if (IsVarLenInst) { + OS << " Len = InstrLenTable[Opc];\n" + << " makeUp(insn, Len);\n"; + } + OS << " S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, " + "DecodeComplete);\n" + << " assert(DecodeComplete);\n" + << "\n" + << " LLVM_DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n" + << " << \", using decoder \" << DecodeIdx << \": \"\n" + << " << (S != MCDisassembler::Fail ? \"PASS\" : " + "\"FAIL\") << \"\\n\");\n" + << " return S;\n" + << " }\n" + << " case MCD::OPC_TryDecode: {\n" + << " unsigned Len;\n" + << " // Decode the Opcode value.\n" + << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" + << " Ptr += Len;\n" + << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" + << " Ptr += Len;\n" + << " // NumToSkip is a plain 24-bit integer.\n" + << " unsigned NumToSkip = *Ptr++;\n" + << " NumToSkip |= (*Ptr++) << 8;\n" + << " NumToSkip |= (*Ptr++) << 16;\n" + << "\n" + << " // Perform the decode operation.\n" + << " MCInst TmpMI;\n" + << " TmpMI.setOpcode(Opc);\n" + << " bool DecodeComplete;\n" + << " S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, " + "DecodeComplete);\n" + << " LLVM_DEBUG(dbgs() << Loc << \": OPC_TryDecode: opcode \" << " + "Opc\n" + << " << \", using decoder \" << DecodeIdx << \": \");\n" + << "\n" + << " if (DecodeComplete) {\n" + << " // Decoding complete.\n" + << " LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? \"PASS\" : " + "\"FAIL\") << \"\\n\");\n" + << " MI = TmpMI;\n" + << " return S;\n" + << " } else {\n" + << " assert(S == MCDisassembler::Fail);\n" + << " // If the decoding was incomplete, skip.\n" + << " Ptr += NumToSkip;\n" + << " LLVM_DEBUG(dbgs() << \"FAIL: continuing at \" << (Ptr - " + "DecodeTable) << \"\\n\");\n" + << " // Reset decode status. This also drops a SoftFail status " + "that could be\n" + << " // set before the decode attempt.\n" + << " S = MCDisassembler::Success;\n" + << " }\n" + << " break;\n" + << " }\n" + << " case MCD::OPC_SoftFail: {\n" + << " // Decode the mask values.\n" + << " unsigned Len;\n" + << " uint64_t PositiveMask = decodeULEB128(++Ptr, &Len);\n" + << " Ptr += Len;\n" + << " uint64_t NegativeMask = decodeULEB128(Ptr, &Len);\n" + << " Ptr += Len;\n" + << " bool Fail = (insn & PositiveMask) != 0 || (~insn & " + "NegativeMask) != 0;\n" + << " if (Fail)\n" + << " S = MCDisassembler::SoftFail;\n" + << " LLVM_DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? " + "\"FAIL\\n\" : \"PASS\\n\"));\n" + << " break;\n" + << " }\n" + << " case MCD::OPC_Fail: {\n" + << " LLVM_DEBUG(dbgs() << Loc << \": OPC_Fail\\n\");\n" + << " return MCDisassembler::Fail;\n" + << " }\n" + << " }\n" + << " }\n" + << " llvm_unreachable(\"bogosity detected in disassembler state " + "machine!\");\n" + << "}\n\n"; +} + +// Emits disassembler code for instruction decoding. +void DecoderEmitter::run(raw_ostream &o) { + formatted_raw_ostream OS(o); + OS << "#include \"llvm/MC/MCInst.h\"\n"; + OS << "#include \"llvm/MC/MCSubtargetInfo.h\"\n"; + OS << "#include \"llvm/MC/SubtargetFeature.h\"\n"; + OS << "#include \"llvm/Support/DataTypes.h\"\n"; + OS << "#include \"llvm/Support/Debug.h\"\n"; + OS << "#include \"llvm/Support/LEB128.h\"\n"; + OS << "#include \"llvm/Support/raw_ostream.h\"\n"; + OS << "#include \n"; + OS << '\n'; + OS << "namespace llvm {\n\n"; + + emitFieldFromInstruction(OS); + emitInsertBits(OS); + + Target.reverseBitsForLittleEndianEncoding(); + + // Parameterize the decoders based on namespace and instruction width. + std::set HwModeNames; + const auto &NumberedInstructions = Target.getInstructionsByEnumValue(); + NumberedEncodings.reserve(NumberedInstructions.size()); + DenseMap IndexOfInstruction; + // First, collect all HwModes referenced by the target. + for (const auto &NumberedInstruction : NumberedInstructions) { + IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size(); + + if (const RecordVal *RV = + NumberedInstruction->TheDef->getValue("EncodingInfos")) { + if (auto *DI = dyn_cast_or_null(RV->getValue())) { + const CodeGenHwModes &HWM = Target.getHwModes(); + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + for (auto &KV : EBM) + HwModeNames.insert(HWM.getMode(KV.first).Name); + } + } + } + + // If HwModeNames is empty, add the empty string so we always have one HwMode. + if (HwModeNames.empty()) + HwModeNames.insert(""); + + for (const auto &NumberedInstruction : NumberedInstructions) { + IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size(); + + if (const RecordVal *RV = + NumberedInstruction->TheDef->getValue("EncodingInfos")) { + if (DefInit *DI = dyn_cast_or_null(RV->getValue())) { + const CodeGenHwModes &HWM = Target.getHwModes(); + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + for (auto &KV : EBM) { + NumberedEncodings.emplace_back(KV.second, NumberedInstruction, + HWM.getMode(KV.first).Name); + HwModeNames.insert(HWM.getMode(KV.first).Name); + } + continue; + } + } + // This instruction is encoded the same on all HwModes. Emit it for all + // HwModes. + for (StringRef HwModeName : HwModeNames) + NumberedEncodings.emplace_back(NumberedInstruction->TheDef, + NumberedInstruction, HwModeName); + } + for (const auto &NumberedAlias : RK.getAllDerivedDefinitions("AdditionalEncoding")) + NumberedEncodings.emplace_back( + NumberedAlias, + &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf"))); + + std::map, std::vector> + OpcMap; + std::map> Operands; + std::vector InstrLen; + + bool IsVarLenInst = + any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) { + RecordVal *RV = CGI->TheDef->getValue("Inst"); + return RV && isa(RV->getValue()); + }); + unsigned MaxInstLen = 0; + + for (unsigned i = 0; i < NumberedEncodings.size(); ++i) { + const Record *EncodingDef = NumberedEncodings[i].EncodingDef; + const CodeGenInstruction *Inst = NumberedEncodings[i].Inst; + const Record *Def = Inst->TheDef; + unsigned Size = EncodingDef->getValueAsInt("Size"); + if (Def->getValueAsString("Namespace") == "TargetOpcode" || + Def->getValueAsBit("isPseudo") || + Def->getValueAsBit("isAsmParserOnly") || + Def->getValueAsBit("isCodeGenOnly")) { + NumEncodingsLackingDisasm++; + continue; + } + + if (i < NumberedInstructions.size()) + NumInstructions++; + NumEncodings++; + + if (!Size && !IsVarLenInst) + continue; + + if (IsVarLenInst) + InstrLen.resize(NumberedInstructions.size(), 0); + + if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, i, + Operands, IsVarLenInst)) { + if (IsVarLenInst) { + MaxInstLen = std::max(MaxInstLen, Len); + InstrLen[i] = Len; + } + std::string DecoderNamespace = + std::string(EncodingDef->getValueAsString("DecoderNamespace")); + if (!NumberedEncodings[i].HwModeName.empty()) + DecoderNamespace += + std::string("_") + NumberedEncodings[i].HwModeName.str(); + OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back( + i, IndexOfInstruction.find(Def)->second); + } else { + NumEncodingsOmitted++; + } + } + + DecoderTableInfo TableInfo; + for (const auto &Opc : OpcMap) { + // Emit the decoder for this namespace+width combination. + ArrayRef NumberedEncodingsRef( + NumberedEncodings.data(), NumberedEncodings.size()); + FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands, + IsVarLenInst ? MaxInstLen : 8 * Opc.first.second, this); + + // The decode table is cleared for each top level decoder function. The + // predicates and decoders themselves, however, are shared across all + // decoders to give more opportunities for uniqueing. + TableInfo.Table.clear(); + TableInfo.FixupStack.clear(); + TableInfo.Table.reserve(16384); + TableInfo.FixupStack.emplace_back(); + FC.emitTableEntries(TableInfo); + // Any NumToSkip fixups in the top level scope can resolve to the + // OPC_Fail at the end of the table. + assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!"); + // Resolve any NumToSkip fixups in the current scope. + resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), + TableInfo.Table.size()); + TableInfo.FixupStack.clear(); + + TableInfo.Table.push_back(MCD::OPC_Fail); + + // Print the table to the output stream. + emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), Opc.first.first); + OS.flush(); + } + + // For variable instruction, we emit a instruction length table + // to let the decoder know how long the instructions are. + // You can see example usage in M68k's disassembler. + if (IsVarLenInst) + emitInstrLenTable(OS, InstrLen); + // Emit the predicate function. + emitPredicateFunction(OS, TableInfo.Predicates, 0); + + // Emit the decoder function. + emitDecoderFunction(OS, TableInfo.Decoders, 0); + + // Emit the main entry point for the decoder, decodeInstruction(). + emitDecodeInstruction(OS, IsVarLenInst); + + OS << "\n} // end namespace llvm\n"; +} + +namespace llvm { + +void EmitDecoder(RecordKeeper &RK, raw_ostream &OS, + const std::string &PredicateNamespace, + const std::string &GPrefix, const std::string &GPostfix, + const std::string &ROK, const std::string &RFail, + const std::string &L) { + DecoderEmitter(RK, PredicateNamespace, GPrefix, GPostfix, ROK, RFail, L) + .run(OS); +} + +} // end namespace llvm diff --git a/llvm/utils/TableGen/DirectiveEmitter.cpp b/llvm/utils/TableGen/DirectiveEmitter.cpp index b21bf369d18e..f3751591f3d9 100644 --- a/llvm/utils/TableGen/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/DirectiveEmitter.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/StringSet.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" -#include "llvm/TableGen/TableGenBackend.h" using namespace llvm; @@ -368,8 +367,7 @@ void GenerateCaseForVersionedClauses(const std::vector &Clauses, const auto ClauseFormattedName = VerClause.getClause().getFormattedName(); - if (Cases.find(ClauseFormattedName) == Cases.end()) { - Cases.insert(ClauseFormattedName); + if (Cases.insert(ClauseFormattedName).second) { OS << " case " << DirLang.getClausePrefix() << ClauseFormattedName << ":\n"; OS << " return " << VerClause.getMinVersion() diff --git a/llvm/utils/TableGen/DisassemblerEmitter.cpp b/llvm/utils/TableGen/DisassemblerEmitter.cpp index 7c3f53b31bf4..297d12c5d0e9 100644 --- a/llvm/utils/TableGen/DisassemblerEmitter.cpp +++ b/llvm/utils/TableGen/DisassemblerEmitter.cpp @@ -95,12 +95,11 @@ using namespace llvm::X86Disassembler; namespace llvm { -extern void EmitFixedLenDecoder(RecordKeeper &RK, raw_ostream &OS, - const std::string &PredicateNamespace, - const std::string &GPrefix, - const std::string &GPostfix, - const std::string &ROK, - const std::string &RFail, const std::string &L); +extern void EmitDecoder(RecordKeeper &RK, raw_ostream &OS, + const std::string &PredicateNamespace, + const std::string &GPrefix, const std::string &GPostfix, + const std::string &ROK, const std::string &RFail, + const std::string &L); void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) { CodeGenTarget Target(Records); @@ -140,17 +139,16 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) { if (PredicateNamespace == "Thumb") PredicateNamespace = "ARM"; - EmitFixedLenDecoder(Records, OS, PredicateNamespace, - "if (!Check(S, ", "))", - "S", "MCDisassembler::Fail", - " MCDisassembler::DecodeStatus S = " - "MCDisassembler::Success;\n(void)S;"); + EmitDecoder(Records, OS, PredicateNamespace, "if (!Check(S, ", "))", "S", + "MCDisassembler::Fail", + " MCDisassembler::DecodeStatus S = " + "MCDisassembler::Success;\n(void)S;"); return; } - EmitFixedLenDecoder(Records, OS, std::string(Target.getName()), "if (", - " == MCDisassembler::Fail)", "MCDisassembler::Success", - "MCDisassembler::Fail", ""); + EmitDecoder(Records, OS, std::string(Target.getName()), "if (", + " == MCDisassembler::Fail)", "MCDisassembler::Success", + "MCDisassembler::Fail", ""); } } // end namespace llvm diff --git a/llvm/utils/TableGen/ExegesisEmitter.cpp b/llvm/utils/TableGen/ExegesisEmitter.cpp index 77654cbc92fd..bc8ccdac557b 100644 --- a/llvm/utils/TableGen/ExegesisEmitter.cpp +++ b/llvm/utils/TableGen/ExegesisEmitter.cpp @@ -13,15 +13,11 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" -#include #include -#include #include #include #include diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp index ac9fe6db4328..49c2ead468e3 100644 --- a/llvm/utils/TableGen/FastISelEmitter.cpp +++ b/llvm/utils/TableGen/FastISelEmitter.cpp @@ -17,8 +17,8 @@ //===----------------------------------------------------------------------===// #include "CodeGenDAGPatterns.h" +#include "CodeGenInstruction.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" diff --git a/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp b/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp deleted file mode 100644 index c5dd1e626696..000000000000 --- a/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp +++ /dev/null @@ -1,2560 +0,0 @@ -//===------------ FixedLenDecoderEmitter.cpp - Decoder Generator ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// It contains the tablegen backend that emits the decoder functions for -// targets with fixed length instruction set. -// -//===----------------------------------------------------------------------===// - -#include "CodeGenInstruction.h" -#include "CodeGenTarget.h" -#include "InfoByHwMode.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/CachedHashString.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCFixedLenDisassembler.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/LEB128.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/TableGen/Error.h" -#include "llvm/TableGen/Record.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace llvm; - -#define DEBUG_TYPE "decoder-emitter" - -namespace { - -STATISTIC(NumEncodings, "Number of encodings considered"); -STATISTIC(NumEncodingsLackingDisasm, "Number of encodings without disassembler info"); -STATISTIC(NumInstructions, "Number of instructions considered"); -STATISTIC(NumEncodingsSupported, "Number of encodings supported"); -STATISTIC(NumEncodingsOmitted, "Number of encodings omitted"); - -struct EncodingField { - unsigned Base, Width, Offset; - EncodingField(unsigned B, unsigned W, unsigned O) - : Base(B), Width(W), Offset(O) { } -}; - -struct OperandInfo { - std::vector Fields; - std::string Decoder; - bool HasCompleteDecoder; - uint64_t InitValue; - - OperandInfo(std::string D, bool HCD) - : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {} - - void addField(unsigned Base, unsigned Width, unsigned Offset) { - Fields.push_back(EncodingField(Base, Width, Offset)); - } - - unsigned numFields() const { return Fields.size(); } - - typedef std::vector::const_iterator const_iterator; - - const_iterator begin() const { return Fields.begin(); } - const_iterator end() const { return Fields.end(); } -}; - -typedef std::vector DecoderTable; -typedef uint32_t DecoderFixup; -typedef std::vector FixupList; -typedef std::vector FixupScopeList; -typedef SmallSetVector PredicateSet; -typedef SmallSetVector DecoderSet; -struct DecoderTableInfo { - DecoderTable Table; - FixupScopeList FixupStack; - PredicateSet Predicates; - DecoderSet Decoders; -}; - -struct EncodingAndInst { - const Record *EncodingDef; - const CodeGenInstruction *Inst; - StringRef HwModeName; - - EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst, - StringRef HwModeName = "") - : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {} -}; - -struct EncodingIDAndOpcode { - unsigned EncodingID; - unsigned Opcode; - - EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {} - EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode) - : EncodingID(EncodingID), Opcode(Opcode) {} -}; - -raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) { - if (Value.EncodingDef != Value.Inst->TheDef) - OS << Value.EncodingDef->getName() << ":"; - OS << Value.Inst->TheDef->getName(); - return OS; -} - -class FixedLenDecoderEmitter { - RecordKeeper &RK; - std::vector NumberedEncodings; - -public: - // Defaults preserved here for documentation, even though they aren't - // strictly necessary given the way that this is currently being called. - FixedLenDecoderEmitter(RecordKeeper &R, std::string PredicateNamespace, - std::string GPrefix = "if (", - std::string GPostfix = " == MCDisassembler::Fail)", - std::string ROK = "MCDisassembler::Success", - std::string RFail = "MCDisassembler::Fail", - std::string L = "") - : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)), - GuardPrefix(std::move(GPrefix)), GuardPostfix(std::move(GPostfix)), - ReturnOK(std::move(ROK)), ReturnFail(std::move(RFail)), - Locals(std::move(L)) {} - - // Emit the decoder state machine table. - void emitTable(formatted_raw_ostream &o, DecoderTable &Table, - unsigned Indentation, unsigned BitWidth, - StringRef Namespace) const; - void emitPredicateFunction(formatted_raw_ostream &OS, - PredicateSet &Predicates, - unsigned Indentation) const; - void emitDecoderFunction(formatted_raw_ostream &OS, - DecoderSet &Decoders, - unsigned Indentation) const; - - // run - Output the code emitter - void run(raw_ostream &o); - -private: - CodeGenTarget Target; - -public: - std::string PredicateNamespace; - std::string GuardPrefix, GuardPostfix; - std::string ReturnOK, ReturnFail; - std::string Locals; -}; - -} // end anonymous namespace - -// The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system -// for a bit value. -// -// BIT_UNFILTERED is used as the init value for a filter position. It is used -// only for filter processings. -typedef enum { - BIT_TRUE, // '1' - BIT_FALSE, // '0' - BIT_UNSET, // '?' - BIT_UNFILTERED // unfiltered -} bit_value_t; - -static bool ValueSet(bit_value_t V) { - return (V == BIT_TRUE || V == BIT_FALSE); -} - -static bool ValueNotSet(bit_value_t V) { - return (V == BIT_UNSET); -} - -static int Value(bit_value_t V) { - return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1); -} - -static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) { - if (BitInit *bit = dyn_cast(bits.getBit(index))) - return bit->getValue() ? BIT_TRUE : BIT_FALSE; - - // The bit is uninitialized. - return BIT_UNSET; -} - -// Prints the bit value for each position. -static void dumpBits(raw_ostream &o, const BitsInit &bits) { - for (unsigned index = bits.getNumBits(); index > 0; --index) { - switch (bitFromBits(bits, index - 1)) { - case BIT_TRUE: - o << "1"; - break; - case BIT_FALSE: - o << "0"; - break; - case BIT_UNSET: - o << "_"; - break; - default: - llvm_unreachable("unexpected return value from bitFromBits"); - } - } -} - -static BitsInit &getBitsField(const Record &def, StringRef str) { - BitsInit *bits = def.getValueAsBitsInit(str); - return *bits; -} - -// Representation of the instruction to work on. -typedef std::vector insn_t; - -namespace { - -static const uint64_t NO_FIXED_SEGMENTS_SENTINEL = -1ULL; - -class FilterChooser; - -/// Filter - Filter works with FilterChooser to produce the decoding tree for -/// the ISA. -/// -/// It is useful to think of a Filter as governing the switch stmts of the -/// decoding tree in a certain level. Each case stmt delegates to an inferior -/// FilterChooser to decide what further decoding logic to employ, or in another -/// words, what other remaining bits to look at. The FilterChooser eventually -/// chooses a best Filter to do its job. -/// -/// This recursive scheme ends when the number of Opcodes assigned to the -/// FilterChooser becomes 1 or if there is a conflict. A conflict happens when -/// the Filter/FilterChooser combo does not know how to distinguish among the -/// Opcodes assigned. -/// -/// An example of a conflict is -/// -/// Conflict: -/// 111101000.00........00010000.... -/// 111101000.00........0001........ -/// 1111010...00........0001........ -/// 1111010...00.................... -/// 1111010......................... -/// 1111............................ -/// ................................ -/// VST4q8a 111101000_00________00010000____ -/// VST4q8b 111101000_00________00010000____ -/// -/// The Debug output shows the path that the decoding tree follows to reach the -/// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced -/// even registers, while VST4q8b is a vst4 to double-spaced odd registers. -/// -/// The encoding info in the .td files does not specify this meta information, -/// which could have been used by the decoder to resolve the conflict. The -/// decoder could try to decode the even/odd register numbering and assign to -/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a" -/// version and return the Opcode since the two have the same Asm format string. -class Filter { -protected: - const FilterChooser *Owner;// points to the FilterChooser who owns this filter - unsigned StartBit; // the starting bit position - unsigned NumBits; // number of bits to filter - bool Mixed; // a mixed region contains both set and unset bits - - // Map of well-known segment value to the set of uid's with that value. - std::map> - FilteredInstructions; - - // Set of uid's with non-constant segment values. - std::vector VariableInstructions; - - // Map of well-known segment value to its delegate. - std::map> FilterChooserMap; - - // Number of instructions which fall under FilteredInstructions category. - unsigned NumFiltered; - - // Keeps track of the last opcode in the filtered bucket. - EncodingIDAndOpcode LastOpcFiltered; - -public: - Filter(Filter &&f); - Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed); - - ~Filter() = default; - - unsigned getNumFiltered() const { return NumFiltered; } - - EncodingIDAndOpcode getSingletonOpc() const { - assert(NumFiltered == 1); - return LastOpcFiltered; - } - - // Return the filter chooser for the group of instructions without constant - // segment values. - const FilterChooser &getVariableFC() const { - assert(NumFiltered == 1); - assert(FilterChooserMap.size() == 1); - return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second); - } - - // Divides the decoding task into sub tasks and delegates them to the - // inferior FilterChooser's. - // - // A special case arises when there's only one entry in the filtered - // instructions. In order to unambiguously decode the singleton, we need to - // match the remaining undecoded encoding bits against the singleton. - void recurse(); - - // Emit table entries to decode instructions given a segment or segments of - // bits. - void emitTableEntry(DecoderTableInfo &TableInfo) const; - - // Returns the number of fanout produced by the filter. More fanout implies - // the filter distinguishes more categories of instructions. - unsigned usefulness() const; -}; // end class Filter - -} // end anonymous namespace - -// These are states of our finite state machines used in FilterChooser's -// filterProcessor() which produces the filter candidates to use. -typedef enum { - ATTR_NONE, - ATTR_FILTERED, - ATTR_ALL_SET, - ATTR_ALL_UNSET, - ATTR_MIXED -} bitAttr_t; - -/// FilterChooser - FilterChooser chooses the best filter among a set of Filters -/// in order to perform the decoding of instructions at the current level. -/// -/// Decoding proceeds from the top down. Based on the well-known encoding bits -/// of instructions available, FilterChooser builds up the possible Filters that -/// can further the task of decoding by distinguishing among the remaining -/// candidate instructions. -/// -/// Once a filter has been chosen, it is called upon to divide the decoding task -/// into sub-tasks and delegates them to its inferior FilterChoosers for further -/// processings. -/// -/// It is useful to think of a Filter as governing the switch stmts of the -/// decoding tree. And each case is delegated to an inferior FilterChooser to -/// decide what further remaining bits to look at. -namespace { - -class FilterChooser { -protected: - friend class Filter; - - // Vector of codegen instructions to choose our filter. - ArrayRef AllInstructions; - - // Vector of uid's for this filter chooser to work on. - // The first member of the pair is the opcode id being decoded, the second is - // the opcode id that should be emitted. - const std::vector &Opcodes; - - // Lookup table for the operand decoding of instructions. - const std::map> &Operands; - - // Vector of candidate filters. - std::vector Filters; - - // Array of bit values passed down from our parent. - // Set to all BIT_UNFILTERED's for Parent == NULL. - std::vector FilterBitValues; - - // Links to the FilterChooser above us in the decoding tree. - const FilterChooser *Parent; - - // Index of the best filter from Filters. - int BestIndex; - - // Width of instructions - unsigned BitWidth; - - // Parent emitter - const FixedLenDecoderEmitter *Emitter; - -public: - FilterChooser(ArrayRef Insts, - const std::vector &IDs, - const std::map> &Ops, - unsigned BW, const FixedLenDecoderEmitter *E) - : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), - FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1), - BitWidth(BW), Emitter(E) { - doFilter(); - } - - FilterChooser(ArrayRef Insts, - const std::vector &IDs, - const std::map> &Ops, - const std::vector &ParentFilterBitValues, - const FilterChooser &parent) - : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), - FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1), - BitWidth(parent.BitWidth), Emitter(parent.Emitter) { - doFilter(); - } - - FilterChooser(const FilterChooser &) = delete; - void operator=(const FilterChooser &) = delete; - - unsigned getBitWidth() const { return BitWidth; } - -protected: - // Populates the insn given the uid. - void insnWithID(insn_t &Insn, unsigned Opcode) const { - BitsInit &Bits = getBitsField(*AllInstructions[Opcode].EncodingDef, "Inst"); - - // We may have a SoftFail bitmask, which specifies a mask where an encoding - // may differ from the value in "Inst" and yet still be valid, but the - // disassembler should return SoftFail instead of Success. - // - // This is used for marking UNPREDICTABLE instructions in the ARM world. - BitsInit *SFBits = - AllInstructions[Opcode].EncodingDef->getValueAsBitsInit("SoftFail"); - - for (unsigned i = 0; i < BitWidth; ++i) { - if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE) - Insn.push_back(BIT_UNSET); - else - Insn.push_back(bitFromBits(Bits, i)); - } - } - - // Emit the name of the encoding/instruction pair. - void emitNameWithID(raw_ostream &OS, unsigned Opcode) const { - const Record *EncodingDef = AllInstructions[Opcode].EncodingDef; - const Record *InstDef = AllInstructions[Opcode].Inst->TheDef; - if (EncodingDef != InstDef) - OS << EncodingDef->getName() << ":"; - OS << InstDef->getName(); - } - - // Populates the field of the insn given the start position and the number of - // consecutive bits to scan for. - // - // Returns false if there exists any uninitialized bit value in the range. - // Returns true, otherwise. - bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit, - unsigned NumBits) const; - - /// dumpFilterArray - dumpFilterArray prints out debugging info for the given - /// filter array as a series of chars. - void dumpFilterArray(raw_ostream &o, - const std::vector & filter) const; - - /// dumpStack - dumpStack traverses the filter chooser chain and calls - /// dumpFilterArray on each filter chooser up to the top level one. - void dumpStack(raw_ostream &o, const char *prefix) const; - - Filter &bestFilter() { - assert(BestIndex != -1 && "BestIndex not set"); - return Filters[BestIndex]; - } - - bool PositionFiltered(unsigned i) const { - return ValueSet(FilterBitValues[i]); - } - - // Calculates the island(s) needed to decode the instruction. - // This returns a lit of undecoded bits of an instructions, for example, - // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be - // decoded bits in order to verify that the instruction matches the Opcode. - unsigned getIslands(std::vector &StartBits, - std::vector &EndBits, - std::vector &FieldVals, - const insn_t &Insn) const; - - // Emits code to check the Predicates member of an instruction are true. - // Returns true if predicate matches were emitted, false otherwise. - bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation, - unsigned Opc) const; - - bool doesOpcodeNeedPredicate(unsigned Opc) const; - unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const; - void emitPredicateTableEntry(DecoderTableInfo &TableInfo, - unsigned Opc) const; - - void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, - unsigned Opc) const; - - // Emits table entries to decode the singleton. - void emitSingletonTableEntry(DecoderTableInfo &TableInfo, - EncodingIDAndOpcode Opc) const; - - // Emits code to decode the singleton, and then to decode the rest. - void emitSingletonTableEntry(DecoderTableInfo &TableInfo, - const Filter &Best) const; - - void emitBinaryParser(raw_ostream &o, unsigned &Indentation, - const OperandInfo &OpInfo, - bool &OpHasCompleteDecoder) const; - - void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc, - bool &HasCompleteDecoder) const; - unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc, - bool &HasCompleteDecoder) const; - - // Assign a single filter and run with it. - void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed); - - // reportRegion is a helper function for filterProcessor to mark a region as - // eligible for use as a filter region. - void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex, - bool AllowMixed); - - // FilterProcessor scans the well-known encoding bits of the instructions and - // builds up a list of candidate filters. It chooses the best filter and - // recursively descends down the decoding tree. - bool filterProcessor(bool AllowMixed, bool Greedy = true); - - // Decides on the best configuration of filter(s) to use in order to decode - // the instructions. A conflict of instructions may occur, in which case we - // dump the conflict set to the standard error. - void doFilter(); - -public: - // emitTableEntries - Emit state machine entries to decode our share of - // instructions. - void emitTableEntries(DecoderTableInfo &TableInfo) const; -}; - -} // end anonymous namespace - -/////////////////////////// -// // -// Filter Implementation // -// // -/////////////////////////// - -Filter::Filter(Filter &&f) - : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed), - FilteredInstructions(std::move(f.FilteredInstructions)), - VariableInstructions(std::move(f.VariableInstructions)), - FilterChooserMap(std::move(f.FilterChooserMap)), NumFiltered(f.NumFiltered), - LastOpcFiltered(f.LastOpcFiltered) { -} - -Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, - bool mixed) - : Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) { - assert(StartBit + NumBits - 1 < Owner->BitWidth); - - NumFiltered = 0; - LastOpcFiltered = {0, 0}; - - for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) { - insn_t Insn; - - // Populates the insn given the uid. - Owner->insnWithID(Insn, Owner->Opcodes[i].EncodingID); - - uint64_t Field; - // Scans the segment for possibly well-specified encoding bits. - bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits); - - if (ok) { - // The encoding bits are well-known. Lets add the uid of the - // instruction into the bucket keyed off the constant field value. - LastOpcFiltered = Owner->Opcodes[i]; - FilteredInstructions[Field].push_back(LastOpcFiltered); - ++NumFiltered; - } else { - // Some of the encoding bit(s) are unspecified. This contributes to - // one additional member of "Variable" instructions. - VariableInstructions.push_back(Owner->Opcodes[i]); - } - } - - assert((FilteredInstructions.size() + VariableInstructions.size() > 0) - && "Filter returns no instruction categories"); -} - -// Divides the decoding task into sub tasks and delegates them to the -// inferior FilterChooser's. -// -// A special case arises when there's only one entry in the filtered -// instructions. In order to unambiguously decode the singleton, we need to -// match the remaining undecoded encoding bits against the singleton. -void Filter::recurse() { - // Starts by inheriting our parent filter chooser's filter bit values. - std::vector BitValueArray(Owner->FilterBitValues); - - if (!VariableInstructions.empty()) { - // Conservatively marks each segment position as BIT_UNSET. - for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) - BitValueArray[StartBit + bitIndex] = BIT_UNSET; - - // Delegates to an inferior filter chooser for further processing on this - // group of instructions whose segment values are variable. - FilterChooserMap.insert(std::make_pair(NO_FIXED_SEGMENTS_SENTINEL, - std::make_unique(Owner->AllInstructions, - VariableInstructions, Owner->Operands, BitValueArray, *Owner))); - } - - // No need to recurse for a singleton filtered instruction. - // See also Filter::emit*(). - if (getNumFiltered() == 1) { - assert(FilterChooserMap.size() == 1); - return; - } - - // Otherwise, create sub choosers. - for (const auto &Inst : FilteredInstructions) { - - // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. - for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) { - if (Inst.first & (1ULL << bitIndex)) - BitValueArray[StartBit + bitIndex] = BIT_TRUE; - else - BitValueArray[StartBit + bitIndex] = BIT_FALSE; - } - - // Delegates to an inferior filter chooser for further processing on this - // category of instructions. - FilterChooserMap.insert(std::make_pair( - Inst.first, std::make_unique( - Owner->AllInstructions, Inst.second, - Owner->Operands, BitValueArray, *Owner))); - } -} - -static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups, - uint32_t DestIdx) { - // Any NumToSkip fixups in the current scope can resolve to the - // current location. - for (FixupList::const_reverse_iterator I = Fixups.rbegin(), - E = Fixups.rend(); - I != E; ++I) { - // Calculate the distance from the byte following the fixup entry byte - // to the destination. The Target is calculated from after the 16-bit - // NumToSkip entry itself, so subtract two from the displacement here - // to account for that. - uint32_t FixupIdx = *I; - uint32_t Delta = DestIdx - FixupIdx - 3; - // Our NumToSkip entries are 24-bits. Make sure our table isn't too - // big. - assert(Delta < (1u << 24)); - Table[FixupIdx] = (uint8_t)Delta; - Table[FixupIdx + 1] = (uint8_t)(Delta >> 8); - Table[FixupIdx + 2] = (uint8_t)(Delta >> 16); - } -} - -// Emit table entries to decode instructions given a segment or segments -// of bits. -void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { - TableInfo.Table.push_back(MCD::OPC_ExtractField); - TableInfo.Table.push_back(StartBit); - TableInfo.Table.push_back(NumBits); - - // A new filter entry begins a new scope for fixup resolution. - TableInfo.FixupStack.emplace_back(); - - DecoderTable &Table = TableInfo.Table; - - size_t PrevFilter = 0; - bool HasFallthrough = false; - for (auto &Filter : FilterChooserMap) { - // Field value -1 implies a non-empty set of variable instructions. - // See also recurse(). - if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) { - HasFallthrough = true; - - // Each scope should always have at least one filter value to check - // for. - assert(PrevFilter != 0 && "empty filter set!"); - FixupList &CurScope = TableInfo.FixupStack.back(); - // Resolve any NumToSkip fixups in the current scope. - resolveTableFixups(Table, CurScope, Table.size()); - CurScope.clear(); - PrevFilter = 0; // Don't re-process the filter's fallthrough. - } else { - Table.push_back(MCD::OPC_FilterValue); - // Encode and emit the value to filter against. - uint8_t Buffer[16]; - unsigned Len = encodeULEB128(Filter.first, Buffer); - Table.insert(Table.end(), Buffer, Buffer + Len); - // Reserve space for the NumToSkip entry. We'll backpatch the value - // later. - PrevFilter = Table.size(); - Table.push_back(0); - Table.push_back(0); - Table.push_back(0); - } - - // We arrive at a category of instructions with the same segment value. - // Now delegate to the sub filter chooser for further decodings. - // The case may fallthrough, which happens if the remaining well-known - // encoding bits do not match exactly. - Filter.second->emitTableEntries(TableInfo); - - // Now that we've emitted the body of the handler, update the NumToSkip - // of the filter itself to be able to skip forward when false. Subtract - // two as to account for the width of the NumToSkip field itself. - if (PrevFilter) { - uint32_t NumToSkip = Table.size() - PrevFilter - 3; - assert(NumToSkip < (1u << 24) && "disassembler decoding table too large!"); - Table[PrevFilter] = (uint8_t)NumToSkip; - Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8); - Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16); - } - } - - // Any remaining unresolved fixups bubble up to the parent fixup scope. - assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!"); - FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1; - FixupScopeList::iterator Dest = Source - 1; - llvm::append_range(*Dest, *Source); - TableInfo.FixupStack.pop_back(); - - // If there is no fallthrough, then the final filter should get fixed - // up according to the enclosing scope rather than the current position. - if (!HasFallthrough) - TableInfo.FixupStack.back().push_back(PrevFilter); -} - -// Returns the number of fanout produced by the filter. More fanout implies -// the filter distinguishes more categories of instructions. -unsigned Filter::usefulness() const { - if (!VariableInstructions.empty()) - return FilteredInstructions.size(); - else - return FilteredInstructions.size() + 1; -} - -////////////////////////////////// -// // -// Filterchooser Implementation // -// // -////////////////////////////////// - -// Emit the decoder state machine table. -void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS, - DecoderTable &Table, - unsigned Indentation, - unsigned BitWidth, - StringRef Namespace) const { - OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace - << BitWidth << "[] = {\n"; - - Indentation += 2; - - // FIXME: We may be able to use the NumToSkip values to recover - // appropriate indentation levels. - DecoderTable::const_iterator I = Table.begin(); - DecoderTable::const_iterator E = Table.end(); - while (I != E) { - assert (I < E && "incomplete decode table entry!"); - - uint64_t Pos = I - Table.begin(); - OS << "/* " << Pos << " */"; - OS.PadToColumn(12); - - switch (*I) { - default: - PrintFatalError("invalid decode table opcode"); - case MCD::OPC_ExtractField: { - ++I; - unsigned Start = *I++; - unsigned Len = *I++; - OS.indent(Indentation) << "MCD::OPC_ExtractField, " << Start << ", " - << Len << ", // Inst{"; - if (Len > 1) - OS << (Start + Len - 1) << "-"; - OS << Start << "} ...\n"; - break; - } - case MCD::OPC_FilterValue: { - ++I; - OS.indent(Indentation) << "MCD::OPC_FilterValue, "; - // The filter value is ULEB128 encoded. - while (*I >= 128) - OS << (unsigned)*I++ << ", "; - OS << (unsigned)*I++ << ", "; - - // 24-bit numtoskip value. - uint8_t Byte = *I++; - uint32_t NumToSkip = Byte; - OS << (unsigned)Byte << ", "; - Byte = *I++; - OS << (unsigned)Byte << ", "; - NumToSkip |= Byte << 8; - Byte = *I++; - OS << utostr(Byte) << ", "; - NumToSkip |= Byte << 16; - OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; - break; - } - case MCD::OPC_CheckField: { - ++I; - unsigned Start = *I++; - unsigned Len = *I++; - OS.indent(Indentation) << "MCD::OPC_CheckField, " << Start << ", " - << Len << ", ";// << Val << ", " << NumToSkip << ",\n"; - // ULEB128 encoded field value. - for (; *I >= 128; ++I) - OS << (unsigned)*I << ", "; - OS << (unsigned)*I++ << ", "; - // 24-bit numtoskip value. - uint8_t Byte = *I++; - uint32_t NumToSkip = Byte; - OS << (unsigned)Byte << ", "; - Byte = *I++; - OS << (unsigned)Byte << ", "; - NumToSkip |= Byte << 8; - Byte = *I++; - OS << utostr(Byte) << ", "; - NumToSkip |= Byte << 16; - OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; - break; - } - case MCD::OPC_CheckPredicate: { - ++I; - OS.indent(Indentation) << "MCD::OPC_CheckPredicate, "; - for (; *I >= 128; ++I) - OS << (unsigned)*I << ", "; - OS << (unsigned)*I++ << ", "; - - // 24-bit numtoskip value. - uint8_t Byte = *I++; - uint32_t NumToSkip = Byte; - OS << (unsigned)Byte << ", "; - Byte = *I++; - OS << (unsigned)Byte << ", "; - NumToSkip |= Byte << 8; - Byte = *I++; - OS << utostr(Byte) << ", "; - NumToSkip |= Byte << 16; - OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; - break; - } - case MCD::OPC_Decode: - case MCD::OPC_TryDecode: { - bool IsTry = *I == MCD::OPC_TryDecode; - ++I; - // Extract the ULEB128 encoded Opcode to a buffer. - uint8_t Buffer[16], *p = Buffer; - while ((*p++ = *I++) >= 128) - assert((p - Buffer) <= (ptrdiff_t)sizeof(Buffer) - && "ULEB128 value too large!"); - // Decode the Opcode value. - unsigned Opc = decodeULEB128(Buffer); - OS.indent(Indentation) << "MCD::OPC_" << (IsTry ? "Try" : "") - << "Decode, "; - for (p = Buffer; *p >= 128; ++p) - OS << (unsigned)*p << ", "; - OS << (unsigned)*p << ", "; - - // Decoder index. - for (; *I >= 128; ++I) - OS << (unsigned)*I << ", "; - OS << (unsigned)*I++ << ", "; - - if (!IsTry) { - OS << "// Opcode: " << NumberedEncodings[Opc] << "\n"; - break; - } - - // Fallthrough for OPC_TryDecode. - - // 24-bit numtoskip value. - uint8_t Byte = *I++; - uint32_t NumToSkip = Byte; - OS << (unsigned)Byte << ", "; - Byte = *I++; - OS << (unsigned)Byte << ", "; - NumToSkip |= Byte << 8; - Byte = *I++; - OS << utostr(Byte) << ", "; - NumToSkip |= Byte << 16; - - OS << "// Opcode: " << NumberedEncodings[Opc] - << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; - break; - } - case MCD::OPC_SoftFail: { - ++I; - OS.indent(Indentation) << "MCD::OPC_SoftFail"; - // Positive mask - uint64_t Value = 0; - unsigned Shift = 0; - do { - OS << ", " << (unsigned)*I; - Value += (*I & 0x7f) << Shift; - Shift += 7; - } while (*I++ >= 128); - if (Value > 127) { - OS << " /* 0x"; - OS.write_hex(Value); - OS << " */"; - } - // Negative mask - Value = 0; - Shift = 0; - do { - OS << ", " << (unsigned)*I; - Value += (*I & 0x7f) << Shift; - Shift += 7; - } while (*I++ >= 128); - if (Value > 127) { - OS << " /* 0x"; - OS.write_hex(Value); - OS << " */"; - } - OS << ",\n"; - break; - } - case MCD::OPC_Fail: { - ++I; - OS.indent(Indentation) << "MCD::OPC_Fail,\n"; - break; - } - } - } - OS.indent(Indentation) << "0\n"; - - Indentation -= 2; - - OS.indent(Indentation) << "};\n\n"; -} - -void FixedLenDecoderEmitter:: -emitPredicateFunction(formatted_raw_ostream &OS, PredicateSet &Predicates, - unsigned Indentation) const { - // The predicate function is just a big switch statement based on the - // input predicate index. - OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, " - << "const FeatureBitset &Bits) {\n"; - Indentation += 2; - if (!Predicates.empty()) { - OS.indent(Indentation) << "switch (Idx) {\n"; - OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; - unsigned Index = 0; - for (const auto &Predicate : Predicates) { - OS.indent(Indentation) << "case " << Index++ << ":\n"; - OS.indent(Indentation+2) << "return (" << Predicate << ");\n"; - } - OS.indent(Indentation) << "}\n"; - } else { - // No case statement to emit - OS.indent(Indentation) << "llvm_unreachable(\"Invalid index!\");\n"; - } - Indentation -= 2; - OS.indent(Indentation) << "}\n\n"; -} - -void FixedLenDecoderEmitter:: -emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders, - unsigned Indentation) const { - // The decoder function is just a big switch statement based on the - // input decoder index. - OS.indent(Indentation) << "template \n"; - OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S," - << " unsigned Idx, InsnType insn, MCInst &MI,\n"; - OS.indent(Indentation) << " uint64_t " - << "Address, const void *Decoder, bool &DecodeComplete) {\n"; - Indentation += 2; - OS.indent(Indentation) << "DecodeComplete = true;\n"; - // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits - // It would be better for emitBinaryParser to use a 64-bit tmp whenever - // possible but fall back to an InsnType-sized tmp for truly large fields. - OS.indent(Indentation) << "using TmpType = " - "std::conditional_t::" - "value, InsnType, uint64_t>;\n"; - OS.indent(Indentation) << "TmpType tmp;\n"; - OS.indent(Indentation) << "switch (Idx) {\n"; - OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; - unsigned Index = 0; - for (const auto &Decoder : Decoders) { - OS.indent(Indentation) << "case " << Index++ << ":\n"; - OS << Decoder; - OS.indent(Indentation+2) << "return S;\n"; - } - OS.indent(Indentation) << "}\n"; - Indentation -= 2; - OS.indent(Indentation) << "}\n\n"; -} - -// Populates the field of the insn given the start position and the number of -// consecutive bits to scan for. -// -// Returns false if and on the first uninitialized bit value encountered. -// Returns true, otherwise. -bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn, - unsigned StartBit, unsigned NumBits) const { - Field = 0; - - for (unsigned i = 0; i < NumBits; ++i) { - if (Insn[StartBit + i] == BIT_UNSET) - return false; - - if (Insn[StartBit + i] == BIT_TRUE) - Field = Field | (1ULL << i); - } - - return true; -} - -/// dumpFilterArray - dumpFilterArray prints out debugging info for the given -/// filter array as a series of chars. -void FilterChooser::dumpFilterArray(raw_ostream &o, - const std::vector &filter) const { - for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) { - switch (filter[bitIndex - 1]) { - case BIT_UNFILTERED: - o << "."; - break; - case BIT_UNSET: - o << "_"; - break; - case BIT_TRUE: - o << "1"; - break; - case BIT_FALSE: - o << "0"; - break; - } - } -} - -/// dumpStack - dumpStack traverses the filter chooser chain and calls -/// dumpFilterArray on each filter chooser up to the top level one. -void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) const { - const FilterChooser *current = this; - - while (current) { - o << prefix; - dumpFilterArray(o, current->FilterBitValues); - o << '\n'; - current = current->Parent; - } -} - -// Calculates the island(s) needed to decode the instruction. -// This returns a list of undecoded bits of an instructions, for example, -// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be -// decoded bits in order to verify that the instruction matches the Opcode. -unsigned FilterChooser::getIslands(std::vector &StartBits, - std::vector &EndBits, - std::vector &FieldVals, - const insn_t &Insn) const { - unsigned Num, BitNo; - Num = BitNo = 0; - - uint64_t FieldVal = 0; - - // 0: Init - // 1: Water (the bit value does not affect decoding) - // 2: Island (well-known bit value needed for decoding) - int State = 0; - - for (unsigned i = 0; i < BitWidth; ++i) { - int64_t Val = Value(Insn[i]); - bool Filtered = PositionFiltered(i); - switch (State) { - default: llvm_unreachable("Unreachable code!"); - case 0: - case 1: - if (Filtered || Val == -1) - State = 1; // Still in Water - else { - State = 2; // Into the Island - BitNo = 0; - StartBits.push_back(i); - FieldVal = Val; - } - break; - case 2: - if (Filtered || Val == -1) { - State = 1; // Into the Water - EndBits.push_back(i - 1); - FieldVals.push_back(FieldVal); - ++Num; - } else { - State = 2; // Still in Island - ++BitNo; - FieldVal = FieldVal | Val << BitNo; - } - break; - } - } - // If we are still in Island after the loop, do some housekeeping. - if (State == 2) { - EndBits.push_back(BitWidth - 1); - FieldVals.push_back(FieldVal); - ++Num; - } - - assert(StartBits.size() == Num && EndBits.size() == Num && - FieldVals.size() == Num); - return Num; -} - -void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation, - const OperandInfo &OpInfo, - bool &OpHasCompleteDecoder) const { - const std::string &Decoder = OpInfo.Decoder; - - bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0; - - if (UseInsertBits) { - o.indent(Indentation) << "tmp = 0x"; - o.write_hex(OpInfo.InitValue); - o << ";\n"; - } - - for (const EncodingField &EF : OpInfo) { - o.indent(Indentation); - if (UseInsertBits) - o << "insertBits(tmp, "; - else - o << "tmp = "; - o << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')'; - if (UseInsertBits) - o << ", " << EF.Offset << ", " << EF.Width << ')'; - else if (EF.Offset != 0) - o << " << " << EF.Offset; - o << ";\n"; - } - - if (Decoder != "") { - OpHasCompleteDecoder = OpInfo.HasCompleteDecoder; - o.indent(Indentation) << Emitter->GuardPrefix << Decoder - << "(MI, tmp, Address, Decoder)" - << Emitter->GuardPostfix - << " { " << (OpHasCompleteDecoder ? "" : "DecodeComplete = false; ") - << "return MCDisassembler::Fail; }\n"; - } else { - OpHasCompleteDecoder = true; - o.indent(Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n"; - } -} - -void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation, - unsigned Opc, bool &HasCompleteDecoder) const { - HasCompleteDecoder = true; - - for (const auto &Op : Operands.find(Opc)->second) { - // If a custom instruction decoder was specified, use that. - if (Op.numFields() == 0 && !Op.Decoder.empty()) { - HasCompleteDecoder = Op.HasCompleteDecoder; - OS.indent(Indentation) << Emitter->GuardPrefix << Op.Decoder - << "(MI, insn, Address, Decoder)" - << Emitter->GuardPostfix - << " { " << (HasCompleteDecoder ? "" : "DecodeComplete = false; ") - << "return MCDisassembler::Fail; }\n"; - break; - } - - bool OpHasCompleteDecoder; - emitBinaryParser(OS, Indentation, Op, OpHasCompleteDecoder); - if (!OpHasCompleteDecoder) - HasCompleteDecoder = false; - } -} - -unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, - unsigned Opc, - bool &HasCompleteDecoder) const { - // Build up the predicate string. - SmallString<256> Decoder; - // FIXME: emitDecoder() function can take a buffer directly rather than - // a stream. - raw_svector_ostream S(Decoder); - unsigned I = 4; - emitDecoder(S, I, Opc, HasCompleteDecoder); - - // Using the full decoder string as the key value here is a bit - // heavyweight, but is effective. If the string comparisons become a - // performance concern, we can implement a mangling of the predicate - // data easily enough with a map back to the actual string. That's - // overkill for now, though. - - // Make sure the predicate is in the table. - Decoders.insert(CachedHashString(Decoder)); - // Now figure out the index for when we write out the table. - DecoderSet::const_iterator P = find(Decoders, Decoder.str()); - return (unsigned)(P - Decoders.begin()); -} - -bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation, - unsigned Opc) const { - ListInit *Predicates = - AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); - bool IsFirstEmission = true; - for (unsigned i = 0; i < Predicates->size(); ++i) { - Record *Pred = Predicates->getElementAsRecord(i); - if (!Pred->getValue("AssemblerMatcherPredicate")) - continue; - - if (!isa(Pred->getValue("AssemblerCondDag")->getValue())) - continue; - - const DagInit *D = Pred->getValueAsDag("AssemblerCondDag"); - std::string CombineType = D->getOperator()->getAsString(); - if (CombineType != "any_of" && CombineType != "all_of") - PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); - if (D->getNumArgs() == 0) - PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); - bool IsOr = CombineType == "any_of"; - - if (!IsFirstEmission) - o << " && "; - - if (IsOr) - o << "("; - - ListSeparator LS(IsOr ? " || " : " && "); - for (auto *Arg : D->getArgs()) { - o << LS; - if (auto *NotArg = dyn_cast(Arg)) { - if (NotArg->getOperator()->getAsString() != "not" || - NotArg->getNumArgs() != 1) - PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); - Arg = NotArg->getArg(0); - o << "!"; - } - if (!isa(Arg) || - !cast(Arg)->getDef()->isSubClassOf("SubtargetFeature")) - PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); - o << "Bits[" << Emitter->PredicateNamespace << "::" << Arg->getAsString() - << "]"; - } - - if (IsOr) - o << ")"; - - IsFirstEmission = false; - } - return !Predicates->empty(); -} - -bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const { - ListInit *Predicates = - AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); - for (unsigned i = 0; i < Predicates->size(); ++i) { - Record *Pred = Predicates->getElementAsRecord(i); - if (!Pred->getValue("AssemblerMatcherPredicate")) - continue; - - if (isa(Pred->getValue("AssemblerCondDag")->getValue())) - return true; - } - return false; -} - -unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo, - StringRef Predicate) const { - // Using the full predicate string as the key value here is a bit - // heavyweight, but is effective. If the string comparisons become a - // performance concern, we can implement a mangling of the predicate - // data easily enough with a map back to the actual string. That's - // overkill for now, though. - - // Make sure the predicate is in the table. - TableInfo.Predicates.insert(CachedHashString(Predicate)); - // Now figure out the index for when we write out the table. - PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate); - return (unsigned)(P - TableInfo.Predicates.begin()); -} - -void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo, - unsigned Opc) const { - if (!doesOpcodeNeedPredicate(Opc)) - return; - - // Build up the predicate string. - SmallString<256> Predicate; - // FIXME: emitPredicateMatch() functions can take a buffer directly rather - // than a stream. - raw_svector_ostream PS(Predicate); - unsigned I = 0; - emitPredicateMatch(PS, I, Opc); - - // Figure out the index into the predicate table for the predicate just - // computed. - unsigned PIdx = getPredicateIndex(TableInfo, PS.str()); - SmallString<16> PBytes; - raw_svector_ostream S(PBytes); - encodeULEB128(PIdx, S); - - TableInfo.Table.push_back(MCD::OPC_CheckPredicate); - // Predicate index - for (unsigned i = 0, e = PBytes.size(); i != e; ++i) - TableInfo.Table.push_back(PBytes[i]); - // Push location for NumToSkip backpatching. - TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); - TableInfo.Table.push_back(0); - TableInfo.Table.push_back(0); - TableInfo.Table.push_back(0); -} - -void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo, - unsigned Opc) const { - BitsInit *SFBits = - AllInstructions[Opc].EncodingDef->getValueAsBitsInit("SoftFail"); - if (!SFBits) return; - BitsInit *InstBits = - AllInstructions[Opc].EncodingDef->getValueAsBitsInit("Inst"); - - APInt PositiveMask(BitWidth, 0ULL); - APInt NegativeMask(BitWidth, 0ULL); - for (unsigned i = 0; i < BitWidth; ++i) { - bit_value_t B = bitFromBits(*SFBits, i); - bit_value_t IB = bitFromBits(*InstBits, i); - - if (B != BIT_TRUE) continue; - - switch (IB) { - case BIT_FALSE: - // The bit is meant to be false, so emit a check to see if it is true. - PositiveMask.setBit(i); - break; - case BIT_TRUE: - // The bit is meant to be true, so emit a check to see if it is false. - NegativeMask.setBit(i); - break; - default: - // The bit is not set; this must be an error! - errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " - << AllInstructions[Opc] << " is set but Inst{" << i - << "} is unset!\n" - << " - You can only mark a bit as SoftFail if it is fully defined" - << " (1/0 - not '?') in Inst\n"; - return; - } - } - - bool NeedPositiveMask = PositiveMask.getBoolValue(); - bool NeedNegativeMask = NegativeMask.getBoolValue(); - - if (!NeedPositiveMask && !NeedNegativeMask) - return; - - TableInfo.Table.push_back(MCD::OPC_SoftFail); - - SmallString<16> MaskBytes; - raw_svector_ostream S(MaskBytes); - if (NeedPositiveMask) { - encodeULEB128(PositiveMask.getZExtValue(), S); - for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) - TableInfo.Table.push_back(MaskBytes[i]); - } else - TableInfo.Table.push_back(0); - if (NeedNegativeMask) { - MaskBytes.clear(); - encodeULEB128(NegativeMask.getZExtValue(), S); - for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) - TableInfo.Table.push_back(MaskBytes[i]); - } else - TableInfo.Table.push_back(0); -} - -// Emits table entries to decode the singleton. -void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, - EncodingIDAndOpcode Opc) const { - std::vector StartBits; - std::vector EndBits; - std::vector FieldVals; - insn_t Insn; - insnWithID(Insn, Opc.EncodingID); - - // Look for islands of undecoded bits of the singleton. - getIslands(StartBits, EndBits, FieldVals, Insn); - - unsigned Size = StartBits.size(); - - // Emit the predicate table entry if one is needed. - emitPredicateTableEntry(TableInfo, Opc.EncodingID); - - // Check any additional encoding fields needed. - for (unsigned I = Size; I != 0; --I) { - unsigned NumBits = EndBits[I-1] - StartBits[I-1] + 1; - TableInfo.Table.push_back(MCD::OPC_CheckField); - TableInfo.Table.push_back(StartBits[I-1]); - TableInfo.Table.push_back(NumBits); - uint8_t Buffer[16], *p; - encodeULEB128(FieldVals[I-1], Buffer); - for (p = Buffer; *p >= 128 ; ++p) - TableInfo.Table.push_back(*p); - TableInfo.Table.push_back(*p); - // Push location for NumToSkip backpatching. - TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); - // The fixup is always 24-bits, so go ahead and allocate the space - // in the table so all our relative position calculations work OK even - // before we fully resolve the real value here. - TableInfo.Table.push_back(0); - TableInfo.Table.push_back(0); - TableInfo.Table.push_back(0); - } - - // Check for soft failure of the match. - emitSoftFailTableEntry(TableInfo, Opc.EncodingID); - - bool HasCompleteDecoder; - unsigned DIdx = - getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder); - - // Produce OPC_Decode or OPC_TryDecode opcode based on the information - // whether the instruction decoder is complete or not. If it is complete - // then it handles all possible values of remaining variable/unfiltered bits - // and for any value can determine if the bitpattern is a valid instruction - // or not. This means OPC_Decode will be the final step in the decoding - // process. If it is not complete, then the Fail return code from the - // decoder method indicates that additional processing should be done to see - // if there is any other instruction that also matches the bitpattern and - // can decode it. - TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode : - MCD::OPC_TryDecode); - NumEncodingsSupported++; - uint8_t Buffer[16], *p; - encodeULEB128(Opc.Opcode, Buffer); - for (p = Buffer; *p >= 128 ; ++p) - TableInfo.Table.push_back(*p); - TableInfo.Table.push_back(*p); - - SmallString<16> Bytes; - raw_svector_ostream S(Bytes); - encodeULEB128(DIdx, S); - - // Decoder index - for (unsigned i = 0, e = Bytes.size(); i != e; ++i) - TableInfo.Table.push_back(Bytes[i]); - - if (!HasCompleteDecoder) { - // Push location for NumToSkip backpatching. - TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); - // Allocate the space for the fixup. - TableInfo.Table.push_back(0); - TableInfo.Table.push_back(0); - TableInfo.Table.push_back(0); - } -} - -// Emits table entries to decode the singleton, and then to decode the rest. -void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, - const Filter &Best) const { - EncodingIDAndOpcode Opc = Best.getSingletonOpc(); - - // complex singletons need predicate checks from the first singleton - // to refer forward to the variable filterchooser that follows. - TableInfo.FixupStack.emplace_back(); - - emitSingletonTableEntry(TableInfo, Opc); - - resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), - TableInfo.Table.size()); - TableInfo.FixupStack.pop_back(); - - Best.getVariableFC().emitTableEntries(TableInfo); -} - -// Assign a single filter and run with it. Top level API client can initialize -// with a single filter to start the filtering process. -void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit, - bool mixed) { - Filters.clear(); - Filters.emplace_back(*this, startBit, numBit, true); - BestIndex = 0; // Sole Filter instance to choose from. - bestFilter().recurse(); -} - -// reportRegion is a helper function for filterProcessor to mark a region as -// eligible for use as a filter region. -void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit, - unsigned BitIndex, bool AllowMixed) { - if (RA == ATTR_MIXED && AllowMixed) - Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true); - else if (RA == ATTR_ALL_SET && !AllowMixed) - Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false); -} - -// FilterProcessor scans the well-known encoding bits of the instructions and -// builds up a list of candidate filters. It chooses the best filter and -// recursively descends down the decoding tree. -bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { - Filters.clear(); - BestIndex = -1; - unsigned numInstructions = Opcodes.size(); - - assert(numInstructions && "Filter created with no instructions"); - - // No further filtering is necessary. - if (numInstructions == 1) - return true; - - // Heuristics. See also doFilter()'s "Heuristics" comment when num of - // instructions is 3. - if (AllowMixed && !Greedy) { - assert(numInstructions == 3); - - for (auto Opcode : Opcodes) { - std::vector StartBits; - std::vector EndBits; - std::vector FieldVals; - insn_t Insn; - - insnWithID(Insn, Opcode.EncodingID); - - // Look for islands of undecoded bits of any instruction. - if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) { - // Found an instruction with island(s). Now just assign a filter. - runSingleFilter(StartBits[0], EndBits[0] - StartBits[0] + 1, true); - return true; - } - } - } - - unsigned BitIndex; - - // We maintain BIT_WIDTH copies of the bitAttrs automaton. - // The automaton consumes the corresponding bit from each - // instruction. - // - // Input symbols: 0, 1, and _ (unset). - // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED. - // Initial state: NONE. - // - // (NONE) ------- [01] -> (ALL_SET) - // (NONE) ------- _ ----> (ALL_UNSET) - // (ALL_SET) ---- [01] -> (ALL_SET) - // (ALL_SET) ---- _ ----> (MIXED) - // (ALL_UNSET) -- [01] -> (MIXED) - // (ALL_UNSET) -- _ ----> (ALL_UNSET) - // (MIXED) ------ . ----> (MIXED) - // (FILTERED)---- . ----> (FILTERED) - - std::vector bitAttrs; - - // FILTERED bit positions provide no entropy and are not worthy of pursuing. - // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position. - for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) - if (FilterBitValues[BitIndex] == BIT_TRUE || - FilterBitValues[BitIndex] == BIT_FALSE) - bitAttrs.push_back(ATTR_FILTERED); - else - bitAttrs.push_back(ATTR_NONE); - - for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) { - insn_t insn; - - insnWithID(insn, Opcodes[InsnIndex].EncodingID); - - for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { - switch (bitAttrs[BitIndex]) { - case ATTR_NONE: - if (insn[BitIndex] == BIT_UNSET) - bitAttrs[BitIndex] = ATTR_ALL_UNSET; - else - bitAttrs[BitIndex] = ATTR_ALL_SET; - break; - case ATTR_ALL_SET: - if (insn[BitIndex] == BIT_UNSET) - bitAttrs[BitIndex] = ATTR_MIXED; - break; - case ATTR_ALL_UNSET: - if (insn[BitIndex] != BIT_UNSET) - bitAttrs[BitIndex] = ATTR_MIXED; - break; - case ATTR_MIXED: - case ATTR_FILTERED: - break; - } - } - } - - // The regionAttr automaton consumes the bitAttrs automatons' state, - // lowest-to-highest. - // - // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed) - // States: NONE, ALL_SET, MIXED - // Initial state: NONE - // - // (NONE) ----- F --> (NONE) - // (NONE) ----- S --> (ALL_SET) ; and set region start - // (NONE) ----- U --> (NONE) - // (NONE) ----- M --> (MIXED) ; and set region start - // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region - // (ALL_SET) -- S --> (ALL_SET) - // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region - // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region - // (MIXED) ---- F --> (NONE) ; and report a MIXED region - // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region - // (MIXED) ---- U --> (NONE) ; and report a MIXED region - // (MIXED) ---- M --> (MIXED) - - bitAttr_t RA = ATTR_NONE; - unsigned StartBit = 0; - - for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { - bitAttr_t bitAttr = bitAttrs[BitIndex]; - - assert(bitAttr != ATTR_NONE && "Bit without attributes"); - - switch (RA) { - case ATTR_NONE: - switch (bitAttr) { - case ATTR_FILTERED: - break; - case ATTR_ALL_SET: - StartBit = BitIndex; - RA = ATTR_ALL_SET; - break; - case ATTR_ALL_UNSET: - break; - case ATTR_MIXED: - StartBit = BitIndex; - RA = ATTR_MIXED; - break; - default: - llvm_unreachable("Unexpected bitAttr!"); - } - break; - case ATTR_ALL_SET: - switch (bitAttr) { - case ATTR_FILTERED: - reportRegion(RA, StartBit, BitIndex, AllowMixed); - RA = ATTR_NONE; - break; - case ATTR_ALL_SET: - break; - case ATTR_ALL_UNSET: - reportRegion(RA, StartBit, BitIndex, AllowMixed); - RA = ATTR_NONE; - break; - case ATTR_MIXED: - reportRegion(RA, StartBit, BitIndex, AllowMixed); - StartBit = BitIndex; - RA = ATTR_MIXED; - break; - default: - llvm_unreachable("Unexpected bitAttr!"); - } - break; - case ATTR_MIXED: - switch (bitAttr) { - case ATTR_FILTERED: - reportRegion(RA, StartBit, BitIndex, AllowMixed); - StartBit = BitIndex; - RA = ATTR_NONE; - break; - case ATTR_ALL_SET: - reportRegion(RA, StartBit, BitIndex, AllowMixed); - StartBit = BitIndex; - RA = ATTR_ALL_SET; - break; - case ATTR_ALL_UNSET: - reportRegion(RA, StartBit, BitIndex, AllowMixed); - RA = ATTR_NONE; - break; - case ATTR_MIXED: - break; - default: - llvm_unreachable("Unexpected bitAttr!"); - } - break; - case ATTR_ALL_UNSET: - llvm_unreachable("regionAttr state machine has no ATTR_UNSET state"); - case ATTR_FILTERED: - llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state"); - } - } - - // At the end, if we're still in ALL_SET or MIXED states, report a region - switch (RA) { - case ATTR_NONE: - break; - case ATTR_FILTERED: - break; - case ATTR_ALL_SET: - reportRegion(RA, StartBit, BitIndex, AllowMixed); - break; - case ATTR_ALL_UNSET: - break; - case ATTR_MIXED: - reportRegion(RA, StartBit, BitIndex, AllowMixed); - break; - } - - // We have finished with the filter processings. Now it's time to choose - // the best performing filter. - BestIndex = 0; - bool AllUseless = true; - unsigned BestScore = 0; - - for (unsigned i = 0, e = Filters.size(); i != e; ++i) { - unsigned Usefulness = Filters[i].usefulness(); - - if (Usefulness) - AllUseless = false; - - if (Usefulness > BestScore) { - BestIndex = i; - BestScore = Usefulness; - } - } - - if (!AllUseless) - bestFilter().recurse(); - - return !AllUseless; -} // end of FilterChooser::filterProcessor(bool) - -// Decides on the best configuration of filter(s) to use in order to decode -// the instructions. A conflict of instructions may occur, in which case we -// dump the conflict set to the standard error. -void FilterChooser::doFilter() { - unsigned Num = Opcodes.size(); - assert(Num && "FilterChooser created with no instructions"); - - // Try regions of consecutive known bit values first. - if (filterProcessor(false)) - return; - - // Then regions of mixed bits (both known and unitialized bit values allowed). - if (filterProcessor(true)) - return; - - // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where - // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a - // well-known encoding pattern. In such case, we backtrack and scan for the - // the very first consecutive ATTR_ALL_SET region and assign a filter to it. - if (Num == 3 && filterProcessor(true, false)) - return; - - // If we come to here, the instruction decoding has failed. - // Set the BestIndex to -1 to indicate so. - BestIndex = -1; -} - -// emitTableEntries - Emit state machine entries to decode our share of -// instructions. -void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { - if (Opcodes.size() == 1) { - // There is only one instruction in the set, which is great! - // Call emitSingletonDecoder() to see whether there are any remaining - // encodings bits. - emitSingletonTableEntry(TableInfo, Opcodes[0]); - return; - } - - // Choose the best filter to do the decodings! - if (BestIndex != -1) { - const Filter &Best = Filters[BestIndex]; - if (Best.getNumFiltered() == 1) - emitSingletonTableEntry(TableInfo, Best); - else - Best.emitTableEntry(TableInfo); - return; - } - - // We don't know how to decode these instructions! Dump the - // conflict set and bail. - - // Print out useful conflict information for postmortem analysis. - errs() << "Decoding Conflict:\n"; - - dumpStack(errs(), "\t\t"); - - for (auto Opcode : Opcodes) { - errs() << '\t'; - emitNameWithID(errs(), Opcode.EncodingID); - errs() << " "; - dumpBits( - errs(), - getBitsField(*AllInstructions[Opcode.EncodingID].EncodingDef, "Inst")); - errs() << '\n'; - } -} - -static std::string findOperandDecoderMethod(TypedInit *TI) { - std::string Decoder; - - Record *Record = cast(TI)->getDef(); - - RecordVal *DecoderString = Record->getValue("DecoderMethod"); - StringInit *String = DecoderString ? - dyn_cast(DecoderString->getValue()) : nullptr; - if (String) { - Decoder = std::string(String->getValue()); - if (!Decoder.empty()) - return Decoder; - } - - if (Record->isSubClassOf("RegisterOperand")) - Record = Record->getValueAsDef("RegClass"); - - if (Record->isSubClassOf("RegisterClass")) { - Decoder = "Decode" + Record->getName().str() + "RegisterClass"; - } else if (Record->isSubClassOf("PointerLikeRegClass")) { - Decoder = "DecodePointerLikeRegClass" + - utostr(Record->getValueAsInt("RegClassKind")); - } - - return Decoder; -} - -static bool -populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, - const CodeGenInstruction &CGI, unsigned Opc, - std::map> &Operands) { - const Record &Def = *CGI.TheDef; - // If all the bit positions are not specified; do not decode this instruction. - // We are bound to fail! For proper disassembly, the well-known encoding bits - // of the instruction must be fully specified. - - BitsInit &Bits = getBitsField(EncodingDef, "Inst"); - if (Bits.allInComplete()) return false; - - std::vector InsnOperands; - - // If the instruction has specified a custom decoding hook, use that instead - // of trying to auto-generate the decoder. - StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod"); - if (InstDecoder != "") { - bool HasCompleteInstDecoder = EncodingDef.getValueAsBit("hasCompleteDecoder"); - InsnOperands.push_back( - OperandInfo(std::string(InstDecoder), HasCompleteInstDecoder)); - Operands[Opc] = InsnOperands; - return true; - } - - // Generate a description of the operand of the instruction that we know - // how to decode automatically. - // FIXME: We'll need to have a way to manually override this as needed. - - // Gather the outputs/inputs of the instruction, so we can find their - // positions in the encoding. This assumes for now that they appear in the - // MCInst in the order that they're listed. - std::vector> InOutOperands; - DagInit *Out = Def.getValueAsDag("OutOperandList"); - DagInit *In = Def.getValueAsDag("InOperandList"); - for (unsigned i = 0; i < Out->getNumArgs(); ++i) - InOutOperands.push_back(std::make_pair(Out->getArg(i), - Out->getArgNameStr(i))); - for (unsigned i = 0; i < In->getNumArgs(); ++i) - InOutOperands.push_back(std::make_pair(In->getArg(i), - In->getArgNameStr(i))); - - // Search for tied operands, so that we can correctly instantiate - // operands that are not explicitly represented in the encoding. - std::map TiedNames; - for (unsigned i = 0; i < CGI.Operands.size(); ++i) { - int tiedTo = CGI.Operands[i].getTiedRegister(); - if (tiedTo != -1) { - std::pair SO = - CGI.Operands.getSubOperandNumber(tiedTo); - TiedNames[std::string(InOutOperands[i].second)] = - std::string(InOutOperands[SO.first].second); - TiedNames[std::string(InOutOperands[SO.first].second)] = - std::string(InOutOperands[i].second); - } - } - - std::map> NumberedInsnOperands; - std::set NumberedInsnOperandsNoTie; - if (Target.getInstructionSet()-> - getValueAsBit("decodePositionallyEncodedOperands")) { - const std::vector &Vals = Def.getValues(); - unsigned NumberedOp = 0; - - std::set NamedOpIndices; - if (Target.getInstructionSet()-> - getValueAsBit("noNamedPositionallyEncodedOperands")) - // Collect the set of operand indices that might correspond to named - // operand, and skip these when assigning operands based on position. - for (unsigned i = 0, e = Vals.size(); i != e; ++i) { - unsigned OpIdx; - if (!CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx)) - continue; - - NamedOpIndices.insert(OpIdx); - } - - for (unsigned i = 0, e = Vals.size(); i != e; ++i) { - // Ignore fixed fields in the record, we're looking for values like: - // bits<5> RST = { ?, ?, ?, ?, ? }; - if (Vals[i].isNonconcreteOK() || Vals[i].getValue()->isComplete()) - continue; - - // Determine if Vals[i] actually contributes to the Inst encoding. - unsigned bi = 0; - for (; bi < Bits.getNumBits(); ++bi) { - VarInit *Var = nullptr; - VarBitInit *BI = dyn_cast(Bits.getBit(bi)); - if (BI) - Var = dyn_cast(BI->getBitVar()); - else - Var = dyn_cast(Bits.getBit(bi)); - - if (Var && Var->getName() == Vals[i].getName()) - break; - } - - if (bi == Bits.getNumBits()) - continue; - - // Skip variables that correspond to explicitly-named operands. - unsigned OpIdx; - if (CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx)) - continue; - - // Get the bit range for this operand: - unsigned bitStart = bi++, bitWidth = 1; - for (; bi < Bits.getNumBits(); ++bi) { - VarInit *Var = nullptr; - VarBitInit *BI = dyn_cast(Bits.getBit(bi)); - if (BI) - Var = dyn_cast(BI->getBitVar()); - else - Var = dyn_cast(Bits.getBit(bi)); - - if (!Var) - break; - - if (Var->getName() != Vals[i].getName()) - break; - - ++bitWidth; - } - - unsigned NumberOps = CGI.Operands.size(); - while (NumberedOp < NumberOps && - (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) || - (!NamedOpIndices.empty() && NamedOpIndices.count( - CGI.Operands.getSubOperandNumber(NumberedOp).first)))) - ++NumberedOp; - - OpIdx = NumberedOp++; - - // OpIdx now holds the ordered operand number of Vals[i]. - std::pair SO = - CGI.Operands.getSubOperandNumber(OpIdx); - const std::string &Name = CGI.Operands[SO.first].Name; - - LLVM_DEBUG(dbgs() << "Numbered operand mapping for " << Def.getName() - << ": " << Name << "(" << SO.first << ", " << SO.second - << ") => " << Vals[i].getName() << "\n"); - - std::string Decoder; - Record *TypeRecord = CGI.Operands[SO.first].Rec; - - RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod"); - StringInit *String = DecoderString ? - dyn_cast(DecoderString->getValue()) : nullptr; - if (String && String->getValue() != "") - Decoder = std::string(String->getValue()); - - if (Decoder == "" && - CGI.Operands[SO.first].MIOperandInfo && - CGI.Operands[SO.first].MIOperandInfo->getNumArgs()) { - Init *Arg = CGI.Operands[SO.first].MIOperandInfo-> - getArg(SO.second); - if (DefInit *DI = cast(Arg)) - TypeRecord = DI->getDef(); - } - - bool isReg = false; - if (TypeRecord->isSubClassOf("RegisterOperand")) - TypeRecord = TypeRecord->getValueAsDef("RegClass"); - if (TypeRecord->isSubClassOf("RegisterClass")) { - Decoder = "Decode" + TypeRecord->getName().str() + "RegisterClass"; - isReg = true; - } else if (TypeRecord->isSubClassOf("PointerLikeRegClass")) { - Decoder = "DecodePointerLikeRegClass" + - utostr(TypeRecord->getValueAsInt("RegClassKind")); - isReg = true; - } - - DecoderString = TypeRecord->getValue("DecoderMethod"); - String = DecoderString ? - dyn_cast(DecoderString->getValue()) : nullptr; - if (!isReg && String && String->getValue() != "") - Decoder = std::string(String->getValue()); - - RecordVal *HasCompleteDecoderVal = - TypeRecord->getValue("hasCompleteDecoder"); - BitInit *HasCompleteDecoderBit = HasCompleteDecoderVal ? - dyn_cast(HasCompleteDecoderVal->getValue()) : nullptr; - bool HasCompleteDecoder = HasCompleteDecoderBit ? - HasCompleteDecoderBit->getValue() : true; - - OperandInfo OpInfo(Decoder, HasCompleteDecoder); - OpInfo.addField(bitStart, bitWidth, 0); - - NumberedInsnOperands[Name].push_back(OpInfo); - - // FIXME: For complex operands with custom decoders we can't handle tied - // sub-operands automatically. Skip those here and assume that this is - // fixed up elsewhere. - if (CGI.Operands[SO.first].MIOperandInfo && - CGI.Operands[SO.first].MIOperandInfo->getNumArgs() > 1 && - String && String->getValue() != "") - NumberedInsnOperandsNoTie.insert(Name); - } - } - - // For each operand, see if we can figure out where it is encoded. - for (const auto &Op : InOutOperands) { - if (!NumberedInsnOperands[std::string(Op.second)].empty()) { - llvm::append_range(InsnOperands, - NumberedInsnOperands[std::string(Op.second)]); - continue; - } - if (!NumberedInsnOperands[TiedNames[std::string(Op.second)]].empty()) { - if (!NumberedInsnOperandsNoTie.count(TiedNames[std::string(Op.second)])) { - // Figure out to which (sub)operand we're tied. - unsigned i = - CGI.Operands.getOperandNamed(TiedNames[std::string(Op.second)]); - int tiedTo = CGI.Operands[i].getTiedRegister(); - if (tiedTo == -1) { - i = CGI.Operands.getOperandNamed(Op.second); - tiedTo = CGI.Operands[i].getTiedRegister(); - } - - if (tiedTo != -1) { - std::pair SO = - CGI.Operands.getSubOperandNumber(tiedTo); - - InsnOperands.push_back( - NumberedInsnOperands[TiedNames[std::string(Op.second)]] - [SO.second]); - } - } - continue; - } - - TypedInit *TI = cast(Op.first); - - // At this point, we can locate the decoder field, but we need to know how - // to interpret it. As a first step, require the target to provide - // callbacks for decoding register classes. - std::string Decoder = findOperandDecoderMethod(TI); - Record *TypeRecord = cast(TI)->getDef(); - - RecordVal *HasCompleteDecoderVal = - TypeRecord->getValue("hasCompleteDecoder"); - BitInit *HasCompleteDecoderBit = HasCompleteDecoderVal ? - dyn_cast(HasCompleteDecoderVal->getValue()) : nullptr; - bool HasCompleteDecoder = HasCompleteDecoderBit ? - HasCompleteDecoderBit->getValue() : true; - - OperandInfo OpInfo(Decoder, HasCompleteDecoder); - - // Some bits of the operand may be required to be 1 depending on the - // instruction's encoding. Collect those bits. - if (const RecordVal *EncodedValue = EncodingDef.getValue(Op.second)) - if (const BitsInit *OpBits = dyn_cast(EncodedValue->getValue())) - for (unsigned I = 0; I < OpBits->getNumBits(); ++I) - if (const BitInit *OpBit = dyn_cast(OpBits->getBit(I))) - if (OpBit->getValue()) - OpInfo.InitValue |= 1ULL << I; - - unsigned Base = ~0U; - unsigned Width = 0; - unsigned Offset = 0; - - for (unsigned bi = 0; bi < Bits.getNumBits(); ++bi) { - VarInit *Var = nullptr; - VarBitInit *BI = dyn_cast(Bits.getBit(bi)); - if (BI) - Var = dyn_cast(BI->getBitVar()); - else - Var = dyn_cast(Bits.getBit(bi)); - - if (!Var) { - if (Base != ~0U) { - OpInfo.addField(Base, Width, Offset); - Base = ~0U; - Width = 0; - Offset = 0; - } - continue; - } - - if (Var->getName() != Op.second && - Var->getName() != TiedNames[std::string(Op.second)]) { - if (Base != ~0U) { - OpInfo.addField(Base, Width, Offset); - Base = ~0U; - Width = 0; - Offset = 0; - } - continue; - } - - if (Base == ~0U) { - Base = bi; - Width = 1; - Offset = BI ? BI->getBitNum() : 0; - } else if (BI && BI->getBitNum() != Offset + Width) { - OpInfo.addField(Base, Width, Offset); - Base = bi; - Width = 1; - Offset = BI->getBitNum(); - } else { - ++Width; - } - } - - if (Base != ~0U) - OpInfo.addField(Base, Width, Offset); - - if (OpInfo.numFields() > 0) - InsnOperands.push_back(OpInfo); - } - - Operands[Opc] = InsnOperands; - -#if 0 - LLVM_DEBUG({ - // Dumps the instruction encoding bits. - dumpBits(errs(), Bits); - - errs() << '\n'; - - // Dumps the list of operand info. - for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { - const CGIOperandList::OperandInfo &Info = CGI.Operands[i]; - const std::string &OperandName = Info.Name; - const Record &OperandDef = *Info.Rec; - - errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n"; - } - }); -#endif - - return true; -} - -// emitFieldFromInstruction - Emit the templated helper function -// fieldFromInstruction(). -// On Windows we make sure that this function is not inlined when -// using the VS compiler. It has a bug which causes the function -// to be optimized out in some circustances. See llvm.org/pr38292 -static void emitFieldFromInstruction(formatted_raw_ostream &OS) { - OS << "// Helper functions for extracting fields from encoded instructions.\n" - << "// InsnType must either be integral or an APInt-like object that " - "must:\n" - << "// * be default-constructible and copy-constructible\n" - << "// * be constructible from a uint64_t\n" - << "// * be constructible from an APInt (this can be private)\n" - << "// * Support insertBits(bits, startBit, numBits)\n" - << "// * Support extractBitsAsZExtValue(numBits, startBit)\n" - << "// * be convertible to bool\n" - << "// * Support the ~, &, ==, and != operators with other objects of " - "the same type\n" - << "// * Support put (<<) to raw_ostream&\n" - << "template \n" - << "#if defined(_MSC_VER) && !defined(__clang__)\n" - << "__declspec(noinline)\n" - << "#endif\n" - << "static std::enable_if_t::value, InsnType>\n" - << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n" - << " unsigned numBits) {\n" - << " assert(startBit + numBits <= 64 && \"Cannot support >64-bit " - "extractions!\");\n" - << " assert(startBit + numBits <= (sizeof(InsnType) * 8) &&\n" - << " \"Instruction field out of bounds!\");\n" - << " InsnType fieldMask;\n" - << " if (numBits == sizeof(InsnType) * 8)\n" - << " fieldMask = (InsnType)(-1LL);\n" - << " else\n" - << " fieldMask = (((InsnType)1 << numBits) - 1) << startBit;\n" - << " return (insn & fieldMask) >> startBit;\n" - << "}\n" - << "\n" - << "template \n" - << "static std::enable_if_t::value, " - "uint64_t>\n" - << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n" - << " unsigned numBits) {\n" - << " return insn.extractBitsAsZExtValue(numBits, startBit);\n" - << "}\n\n"; -} - -// emitInsertBits - Emit the templated helper function insertBits(). -static void emitInsertBits(formatted_raw_ostream &OS) { - OS << "// Helper function for inserting bits extracted from an encoded " - "instruction into\n" - << "// a field.\n" - << "template \n" - << "static std::enable_if_t::value>\n" - << "insertBits(InsnType &field, InsnType bits, unsigned startBit, " - "unsigned numBits) {\n" - << " assert(startBit + numBits <= sizeof field * 8);\n" - << " field |= (InsnType)bits << startBit;\n" - << "}\n" - << "\n" - << "template \n" - << "static std::enable_if_t::value>\n" - << "insertBits(InsnType &field, uint64_t bits, unsigned startBit, " - "unsigned numBits) {\n" - << " field.insertBits(bits, startBit, numBits);\n" - << "}\n\n"; -} - -// emitDecodeInstruction - Emit the templated helper function -// decodeInstruction(). -static void emitDecodeInstruction(formatted_raw_ostream &OS) { - OS << "template \n" - << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], " - "MCInst &MI,\n" - << " InsnType insn, uint64_t " - "Address,\n" - << " const void *DisAsm,\n" - << " const MCSubtargetInfo &STI) {\n" - << " const FeatureBitset &Bits = STI.getFeatureBits();\n" - << "\n" - << " const uint8_t *Ptr = DecodeTable;\n" - << " InsnType CurFieldValue = 0;\n" - << " DecodeStatus S = MCDisassembler::Success;\n" - << " while (true) {\n" - << " ptrdiff_t Loc = Ptr - DecodeTable;\n" - << " switch (*Ptr) {\n" - << " default:\n" - << " errs() << Loc << \": Unexpected decode table opcode!\\n\";\n" - << " return MCDisassembler::Fail;\n" - << " case MCD::OPC_ExtractField: {\n" - << " unsigned Start = *++Ptr;\n" - << " unsigned Len = *++Ptr;\n" - << " ++Ptr;\n" - << " CurFieldValue = fieldFromInstruction(insn, Start, Len);\n" - << " LLVM_DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << " - "\", \"\n" - << " << Len << \"): \" << CurFieldValue << \"\\n\");\n" - << " break;\n" - << " }\n" - << " case MCD::OPC_FilterValue: {\n" - << " // Decode the field value.\n" - << " unsigned Len;\n" - << " InsnType Val = decodeULEB128(++Ptr, &Len);\n" - << " Ptr += Len;\n" - << " // NumToSkip is a plain 24-bit integer.\n" - << " unsigned NumToSkip = *Ptr++;\n" - << " NumToSkip |= (*Ptr++) << 8;\n" - << " NumToSkip |= (*Ptr++) << 16;\n" - << "\n" - << " // Perform the filter operation.\n" - << " if (Val != CurFieldValue)\n" - << " Ptr += NumToSkip;\n" - << " LLVM_DEBUG(dbgs() << Loc << \": OPC_FilterValue(\" << Val << " - "\", \" << NumToSkip\n" - << " << \"): \" << ((Val != CurFieldValue) ? \"FAIL:\" " - ": \"PASS:\")\n" - << " << \" continuing at \" << (Ptr - DecodeTable) << " - "\"\\n\");\n" - << "\n" - << " break;\n" - << " }\n" - << " case MCD::OPC_CheckField: {\n" - << " unsigned Start = *++Ptr;\n" - << " unsigned Len = *++Ptr;\n" - << " InsnType FieldValue = fieldFromInstruction(insn, Start, Len);\n" - << " // Decode the field value.\n" - << " InsnType ExpectedValue = decodeULEB128(++Ptr, &Len);\n" - << " Ptr += Len;\n" - << " // NumToSkip is a plain 24-bit integer.\n" - << " unsigned NumToSkip = *Ptr++;\n" - << " NumToSkip |= (*Ptr++) << 8;\n" - << " NumToSkip |= (*Ptr++) << 16;\n" - << "\n" - << " // If the actual and expected values don't match, skip.\n" - << " if (ExpectedValue != FieldValue)\n" - << " Ptr += NumToSkip;\n" - << " LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckField(\" << Start << " - "\", \"\n" - << " << Len << \", \" << ExpectedValue << \", \" << " - "NumToSkip\n" - << " << \"): FieldValue = \" << FieldValue << \", " - "ExpectedValue = \"\n" - << " << ExpectedValue << \": \"\n" - << " << ((ExpectedValue == FieldValue) ? \"PASS\\n\" : " - "\"FAIL\\n\"));\n" - << " break;\n" - << " }\n" - << " case MCD::OPC_CheckPredicate: {\n" - << " unsigned Len;\n" - << " // Decode the Predicate Index value.\n" - << " unsigned PIdx = decodeULEB128(++Ptr, &Len);\n" - << " Ptr += Len;\n" - << " // NumToSkip is a plain 24-bit integer.\n" - << " unsigned NumToSkip = *Ptr++;\n" - << " NumToSkip |= (*Ptr++) << 8;\n" - << " NumToSkip |= (*Ptr++) << 16;\n" - << " // Check the predicate.\n" - << " bool Pred;\n" - << " if (!(Pred = checkDecoderPredicate(PIdx, Bits)))\n" - << " Ptr += NumToSkip;\n" - << " (void)Pred;\n" - << " LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckPredicate(\" << PIdx " - "<< \"): \"\n" - << " << (Pred ? \"PASS\\n\" : \"FAIL\\n\"));\n" - << "\n" - << " break;\n" - << " }\n" - << " case MCD::OPC_Decode: {\n" - << " unsigned Len;\n" - << " // Decode the Opcode value.\n" - << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" - << " Ptr += Len;\n" - << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" - << " Ptr += Len;\n" - << "\n" - << " MI.clear();\n" - << " MI.setOpcode(Opc);\n" - << " bool DecodeComplete;\n" - << " S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, " - "DecodeComplete);\n" - << " assert(DecodeComplete);\n" - << "\n" - << " LLVM_DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n" - << " << \", using decoder \" << DecodeIdx << \": \"\n" - << " << (S != MCDisassembler::Fail ? \"PASS\" : " - "\"FAIL\") << \"\\n\");\n" - << " return S;\n" - << " }\n" - << " case MCD::OPC_TryDecode: {\n" - << " unsigned Len;\n" - << " // Decode the Opcode value.\n" - << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" - << " Ptr += Len;\n" - << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" - << " Ptr += Len;\n" - << " // NumToSkip is a plain 24-bit integer.\n" - << " unsigned NumToSkip = *Ptr++;\n" - << " NumToSkip |= (*Ptr++) << 8;\n" - << " NumToSkip |= (*Ptr++) << 16;\n" - << "\n" - << " // Perform the decode operation.\n" - << " MCInst TmpMI;\n" - << " TmpMI.setOpcode(Opc);\n" - << " bool DecodeComplete;\n" - << " S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, " - "DecodeComplete);\n" - << " LLVM_DEBUG(dbgs() << Loc << \": OPC_TryDecode: opcode \" << " - "Opc\n" - << " << \", using decoder \" << DecodeIdx << \": \");\n" - << "\n" - << " if (DecodeComplete) {\n" - << " // Decoding complete.\n" - << " LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? \"PASS\" : " - "\"FAIL\") << \"\\n\");\n" - << " MI = TmpMI;\n" - << " return S;\n" - << " } else {\n" - << " assert(S == MCDisassembler::Fail);\n" - << " // If the decoding was incomplete, skip.\n" - << " Ptr += NumToSkip;\n" - << " LLVM_DEBUG(dbgs() << \"FAIL: continuing at \" << (Ptr - " - "DecodeTable) << \"\\n\");\n" - << " // Reset decode status. This also drops a SoftFail status " - "that could be\n" - << " // set before the decode attempt.\n" - << " S = MCDisassembler::Success;\n" - << " }\n" - << " break;\n" - << " }\n" - << " case MCD::OPC_SoftFail: {\n" - << " // Decode the mask values.\n" - << " unsigned Len;\n" - << " InsnType PositiveMask = decodeULEB128(++Ptr, &Len);\n" - << " Ptr += Len;\n" - << " InsnType NegativeMask = decodeULEB128(Ptr, &Len);\n" - << " Ptr += Len;\n" - << " bool Fail = (insn & PositiveMask) || (~insn & NegativeMask);\n" - << " if (Fail)\n" - << " S = MCDisassembler::SoftFail;\n" - << " LLVM_DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? " - "\"FAIL\\n\" : \"PASS\\n\"));\n" - << " break;\n" - << " }\n" - << " case MCD::OPC_Fail: {\n" - << " LLVM_DEBUG(dbgs() << Loc << \": OPC_Fail\\n\");\n" - << " return MCDisassembler::Fail;\n" - << " }\n" - << " }\n" - << " }\n" - << " llvm_unreachable(\"bogosity detected in disassembler state " - "machine!\");\n" - << "}\n\n"; -} - -// Emits disassembler code for instruction decoding. -void FixedLenDecoderEmitter::run(raw_ostream &o) { - formatted_raw_ostream OS(o); - OS << "#include \"llvm/MC/MCInst.h\"\n"; - OS << "#include \"llvm/Support/DataTypes.h\"\n"; - OS << "#include \"llvm/Support/Debug.h\"\n"; - OS << "#include \"llvm/Support/LEB128.h\"\n"; - OS << "#include \"llvm/Support/raw_ostream.h\"\n"; - OS << "#include \n"; - OS << '\n'; - OS << "namespace llvm {\n\n"; - - emitFieldFromInstruction(OS); - emitInsertBits(OS); - - Target.reverseBitsForLittleEndianEncoding(); - - // Parameterize the decoders based on namespace and instruction width. - std::set HwModeNames; - const auto &NumberedInstructions = Target.getInstructionsByEnumValue(); - NumberedEncodings.reserve(NumberedInstructions.size()); - DenseMap IndexOfInstruction; - // First, collect all HwModes referenced by the target. - for (const auto &NumberedInstruction : NumberedInstructions) { - IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size(); - - if (const RecordVal *RV = - NumberedInstruction->TheDef->getValue("EncodingInfos")) { - if (auto *DI = dyn_cast_or_null(RV->getValue())) { - const CodeGenHwModes &HWM = Target.getHwModes(); - EncodingInfoByHwMode EBM(DI->getDef(), HWM); - for (auto &KV : EBM) - HwModeNames.insert(HWM.getMode(KV.first).Name); - } - } - } - - // If HwModeNames is empty, add the empty string so we always have one HwMode. - if (HwModeNames.empty()) - HwModeNames.insert(""); - - for (const auto &NumberedInstruction : NumberedInstructions) { - IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size(); - - if (const RecordVal *RV = - NumberedInstruction->TheDef->getValue("EncodingInfos")) { - if (DefInit *DI = dyn_cast_or_null(RV->getValue())) { - const CodeGenHwModes &HWM = Target.getHwModes(); - EncodingInfoByHwMode EBM(DI->getDef(), HWM); - for (auto &KV : EBM) { - NumberedEncodings.emplace_back(KV.second, NumberedInstruction, - HWM.getMode(KV.first).Name); - HwModeNames.insert(HWM.getMode(KV.first).Name); - } - continue; - } - } - // This instruction is encoded the same on all HwModes. Emit it for all - // HwModes. - for (StringRef HwModeName : HwModeNames) - NumberedEncodings.emplace_back(NumberedInstruction->TheDef, - NumberedInstruction, HwModeName); - } - for (const auto &NumberedAlias : RK.getAllDerivedDefinitions("AdditionalEncoding")) - NumberedEncodings.emplace_back( - NumberedAlias, - &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf"))); - - std::map, std::vector> - OpcMap; - std::map> Operands; - - for (unsigned i = 0; i < NumberedEncodings.size(); ++i) { - const Record *EncodingDef = NumberedEncodings[i].EncodingDef; - const CodeGenInstruction *Inst = NumberedEncodings[i].Inst; - const Record *Def = Inst->TheDef; - unsigned Size = EncodingDef->getValueAsInt("Size"); - if (Def->getValueAsString("Namespace") == "TargetOpcode" || - Def->getValueAsBit("isPseudo") || - Def->getValueAsBit("isAsmParserOnly") || - Def->getValueAsBit("isCodeGenOnly")) { - NumEncodingsLackingDisasm++; - continue; - } - - if (i < NumberedInstructions.size()) - NumInstructions++; - NumEncodings++; - - if (!Size) - continue; - - if (populateInstruction(Target, *EncodingDef, *Inst, i, Operands)) { - std::string DecoderNamespace = - std::string(EncodingDef->getValueAsString("DecoderNamespace")); - if (!NumberedEncodings[i].HwModeName.empty()) - DecoderNamespace += - std::string("_") + NumberedEncodings[i].HwModeName.str(); - OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back( - i, IndexOfInstruction.find(Def)->second); - } else { - NumEncodingsOmitted++; - } - } - - DecoderTableInfo TableInfo; - for (const auto &Opc : OpcMap) { - // Emit the decoder for this namespace+width combination. - ArrayRef NumberedEncodingsRef( - NumberedEncodings.data(), NumberedEncodings.size()); - FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands, - 8 * Opc.first.second, this); - - // The decode table is cleared for each top level decoder function. The - // predicates and decoders themselves, however, are shared across all - // decoders to give more opportunities for uniqueing. - TableInfo.Table.clear(); - TableInfo.FixupStack.clear(); - TableInfo.Table.reserve(16384); - TableInfo.FixupStack.emplace_back(); - FC.emitTableEntries(TableInfo); - // Any NumToSkip fixups in the top level scope can resolve to the - // OPC_Fail at the end of the table. - assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!"); - // Resolve any NumToSkip fixups in the current scope. - resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), - TableInfo.Table.size()); - TableInfo.FixupStack.clear(); - - TableInfo.Table.push_back(MCD::OPC_Fail); - - // Print the table to the output stream. - emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), Opc.first.first); - OS.flush(); - } - - // Emit the predicate function. - emitPredicateFunction(OS, TableInfo.Predicates, 0); - - // Emit the decoder function. - emitDecoderFunction(OS, TableInfo.Decoders, 0); - - // Emit the main entry point for the decoder, decodeInstruction(). - emitDecodeInstruction(OS); - - OS << "\n} // end namespace llvm\n"; -} - -namespace llvm { - -void EmitFixedLenDecoder(RecordKeeper &RK, raw_ostream &OS, - const std::string &PredicateNamespace, - const std::string &GPrefix, - const std::string &GPostfix, const std::string &ROK, - const std::string &RFail, const std::string &L) { - FixedLenDecoderEmitter(RK, PredicateNamespace, GPrefix, GPostfix, - ROK, RFail, L).run(OS); -} - -} // end namespace llvm diff --git a/llvm/utils/TableGen/GICombinerEmitter.cpp b/llvm/utils/TableGen/GICombinerEmitter.cpp index 0dea1ef00e4b..77e05aebf53a 100644 --- a/llvm/utils/TableGen/GICombinerEmitter.cpp +++ b/llvm/utils/TableGen/GICombinerEmitter.cpp @@ -933,28 +933,27 @@ void GICombinerEmitter::run(raw_ostream &OS) { "getRuleIdxForIdentifier(RangePair.first);\n" << " const auto Last = " "getRuleIdxForIdentifier(RangePair.second);\n" - << " if (!First.hasValue() || !Last.hasValue())\n" + << " if (!First || !Last)\n" << " return None;\n" << " if (First >= Last)\n" << " report_fatal_error(\"Beginning of range should be before " "end of range\");\n" << " return {{*First, *Last + 1}};\n" - << " } else if (RangePair.first == \"*\") {\n" + << " }\n" + << " if (RangePair.first == \"*\") {\n" << " return {{0, " << Rules.size() << "}};\n" - << " } else {\n" - << " const auto I = getRuleIdxForIdentifier(RangePair.first);\n" - << " if (!I.hasValue())\n" - << " return None;\n" - << " return {{*I, *I + 1}};\n" << " }\n" - << " return None;\n" + << " const auto I = getRuleIdxForIdentifier(RangePair.first);\n" + << " if (!I)\n" + << " return None;\n" + << " return {{*I, *I + 1}};\n" << "}\n\n"; for (bool Enabled : {true, false}) { OS << "bool " << getClassName() << "RuleConfig::setRule" << (Enabled ? "Enabled" : "Disabled") << "(StringRef RuleIdentifier) {\n" << " auto MaybeRange = getRuleRangeForIdentifier(RuleIdentifier);\n" - << " if (!MaybeRange.hasValue())\n" + << " if (!MaybeRange)\n" << " return false;\n" << " for (auto I = MaybeRange->first; I < MaybeRange->second; ++I)\n" << " DisabledRules." << (Enabled ? "reset" : "set") << "(I);\n" diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp index 7e037dd03b60..8be32d2effa6 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp +++ b/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp @@ -48,7 +48,7 @@ void GIMatchDag::writeDOTGraph(raw_ostream &OS, StringRef ID) const { << Assignment.first << ")"; Separator = ", "; } - OS << format("|%p|", &N); + OS << llvm::format("|%p|", &N); writePorts("d", N->getOperandInfo()); OS << "}\""; if (N->isMatchRoot()) @@ -82,7 +82,7 @@ void GIMatchDag::writeDOTGraph(raw_ostream &OS, StringRef ID) const { writePorts("s", N->getOperandInfo()); OS << "|" << N->getName() << "|"; N->printDescription(OS); - OS << format("|%p|", &N); + OS << llvm::format("|%p|", &N); writePorts("d", N->getOperandInfo()); OS << "}\",style=dotted]\n"; } diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchTree.h b/llvm/utils/TableGen/GlobalISel/GIMatchTree.h index 56df37731c09..55a86259661d 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchTree.h +++ b/llvm/utils/TableGen/GlobalISel/GIMatchTree.h @@ -32,11 +32,11 @@ public: Optional OpIdx = None) : Name(Name), InstrID(InstrID), OpIdx(OpIdx) {} - bool isInstr() const { return !OpIdx.hasValue(); } + bool isInstr() const { return !OpIdx; } StringRef getName() const { return Name; } unsigned getInstrID() const { return InstrID; } unsigned getOpIdx() const { - assert(OpIdx.hasValue() && "Is not an operand binding"); + assert(OpIdx && "Is not an operand binding"); return *OpIdx; } }; diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 018aa7ee2f71..c8eac56d03e6 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -30,6 +30,7 @@ //===----------------------------------------------------------------------===// #include "CodeGenDAGPatterns.h" +#include "CodeGenInstruction.h" #include "SubtargetFeatureInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" @@ -465,9 +466,9 @@ public: MatchTableRecord(Optional LabelID_, StringRef EmitStr, unsigned NumElements, unsigned Flags, int64_t RawValue = std::numeric_limits::min()) - : LabelID(LabelID_.getValueOr(~0u)), EmitStr(EmitStr), + : LabelID(LabelID_.value_or(~0u)), EmitStr(EmitStr), NumElements(NumElements), Flags(Flags), RawValue(RawValue) { - assert((!LabelID_.hasValue() || LabelID != ~0u) && + assert((!LabelID_ || LabelID != ~0u) && "This value is reserved for non-labels"); } MatchTableRecord(const MatchTableRecord &Other) = default; @@ -2935,12 +2936,12 @@ public: } void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override { - Table << MatchTable::Opcode(SubOperand.hasValue() ? "GIR_ComplexSubOperandRenderer" - : "GIR_ComplexRenderer") + Table << MatchTable::Opcode(SubOperand ? "GIR_ComplexSubOperandRenderer" + : "GIR_ComplexRenderer") << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID) << MatchTable::Comment("RendererID") << MatchTable::IntValue(RendererID); - if (SubOperand.hasValue()) + if (SubOperand) Table << MatchTable::Comment("SubOperand") << MatchTable::IntValue(SubOperand.getValue()); Table << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak; @@ -3815,12 +3816,15 @@ Expected GlobalISelEmitter::addBuiltinPredicates( if (!ParsedAddrSpaces.empty()) { InsnMatcher.addPredicate( 0, ParsedAddrSpaces); + return InsnMatcher; } } int64_t MinAlign = Predicate.getMinAlignment(); - if (MinAlign > 0) + if (MinAlign > 0) { InsnMatcher.addPredicate(0, MinAlign); + return InsnMatcher; + } } // G_LOAD is used for both non-extending and any-extending loads. @@ -4269,7 +4273,7 @@ Error GlobalISelEmitter::importChildMatcher( auto MaybeInsnOperand = OM.addPredicate( InsnMatcher.getRuleMatcher(), SrcChild->getName()); - if (!MaybeInsnOperand.hasValue()) { + if (!MaybeInsnOperand) { // This isn't strictly true. If the user were to provide exactly the same // matchers as the original operand then we could allow it. However, it's // simpler to not permit the redundant specification. @@ -4400,7 +4404,7 @@ Expected GlobalISelEmitter::importExplicitUseRenderer( TreePatternNode *DstChild) { const auto &SubOperand = Rule.getComplexSubOperand(DstChild->getName()); - if (SubOperand.hasValue()) { + if (SubOperand) { DstMIBuilder.addRenderer( *std::get<0>(*SubOperand), DstChild->getName(), std::get<1>(*SubOperand), std::get<2>(*SubOperand)); @@ -4802,7 +4806,7 @@ Expected GlobalISelEmitter::importExplicitUseRenderers( const auto SrcRCDstRCPair = RC->getMatchingSubClassWithSubRegs(CGRegs, SubIdx); - if (SrcRCDstRCPair.hasValue()) { + if (SrcRCDstRCPair) { assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass"); if (SrcRCDstRCPair->first != RC) return failedImport("EXTRACT_SUBREG requires an additional COPY"); @@ -5533,6 +5537,7 @@ std::vector GlobalISelEmitter::optimizeRules( ProcessCurrentGroup(); LLVM_DEBUG(dbgs() << "NumGroups: " << NumGroups << "\n"); + (void) NumGroups; assert(CurrentGroup->empty() && "The last group wasn't properly processed"); return OptRules; } diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index 3c92aa0cc27a..a7a4f4f5f1a7 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -36,6 +36,12 @@ using namespace llvm; +cl::OptionCategory InstrInfoEmitterCat("Options for -gen-instr-info"); +static cl::opt ExpandMIOperandInfo( + "instr-info-expand-mi-operand-info", + cl::desc("Expand operand's MIOperandInfo DAG into suboperands"), + cl::cat(InstrInfoEmitterCat), cl::init(true)); + namespace { class InstrInfoEmitter { @@ -379,6 +385,9 @@ void InstrInfoEmitter::emitOperandTypeMappings( OS << "namespace " << Namespace << " {\n"; OS << "LLVM_READONLY\n"; OS << "static int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n"; + auto getInstrName = [&](int I) -> StringRef { + return NumberedInstructions[I]->TheDef->getName(); + }; // TODO: Factor out duplicate operand lists to compress the tables. if (!NumberedInstructions.empty()) { std::vector OperandOffsets; @@ -388,7 +397,7 @@ void InstrInfoEmitter::emitOperandTypeMappings( OperandOffsets.push_back(CurrentOffset); for (const auto &Op : Inst->Operands) { const DagInit *MIOI = Op.MIOperandInfo; - if (!MIOI || MIOI->getNumArgs() == 0) { + if (!ExpandMIOperandInfo || !MIOI || MIOI->getNumArgs() == 0) { // Single, anonymous, operand. OperandRecords.push_back(Op.Rec); ++CurrentOffset; @@ -408,8 +417,10 @@ void InstrInfoEmitter::emitOperandTypeMappings( OS << ((OperandRecords.size() <= UINT16_MAX) ? " const uint16_t" : " const uint32_t"); OS << " Offsets[] = {\n"; - for (int I = 0, E = OperandOffsets.size(); I != E; ++I) + for (int I = 0, E = OperandOffsets.size(); I != E; ++I) { + OS << " /* " << getInstrName(I) << " */\n"; OS << " " << OperandOffsets[I] << ",\n"; + } OS << " };\n"; // Add an entry for the end so that we don't need to special case it below. @@ -419,22 +430,22 @@ void InstrInfoEmitter::emitOperandTypeMappings( // Size the signed integer operand type to save space. assert(EnumVal <= INT16_MAX && "Too many operand types for operand types table"); + OS << "\n using namespace OpTypes;\n"; OS << ((EnumVal <= INT8_MAX) ? " const int8_t" : " const int16_t"); OS << " OpcodeOperandTypes[] = {\n "; - for (int I = 0, E = OperandRecords.size(), CurOffset = 1; I != E; ++I) { + for (int I = 0, E = OperandRecords.size(), CurOffset = 0; I != E; ++I) { // We print each Opcode's operands in its own row. if (I == OperandOffsets[CurOffset]) { - OS << "\n "; - // If there are empty rows, mark them with an empty comment. + OS << "\n /* " << getInstrName(CurOffset) << " */\n "; while (OperandOffsets[++CurOffset] == I) - OS << "/**/\n "; + OS << "/* " << getInstrName(CurOffset) << " */\n "; } Record *OpR = OperandRecords[I]; if ((OpR->isSubClassOf("Operand") || OpR->isSubClassOf("RegisterOperand") || OpR->isSubClassOf("RegisterClass")) && !OpR->isAnonymous()) - OS << "OpTypes::" << OpR->getName(); + OS << OpR->getName(); else OS << -1; OS << ", "; @@ -449,6 +460,31 @@ void InstrInfoEmitter::emitOperandTypeMappings( OS << "} // end namespace " << Namespace << "\n"; OS << "} // end namespace llvm\n"; OS << "#endif // GET_INSTRINFO_OPERAND_TYPE\n\n"; + + OS << "#ifdef GET_INSTRINFO_MEM_OPERAND_SIZE\n"; + OS << "#undef GET_INSTRINFO_MEM_OPERAND_SIZE\n"; + OS << "namespace llvm {\n"; + OS << "namespace " << Namespace << " {\n"; + OS << "LLVM_READONLY\n"; + OS << "static int getMemOperandSize(int OpType) {\n"; + OS << " switch (OpType) {\n"; + std::map> SizeToOperandName; + for (const Record *Op : Operands) { + if (!Op->isSubClassOf("X86MemOperand")) + continue; + if (int Size = Op->getValueAsInt("Size")) + SizeToOperandName[Size].push_back(Op->getName()); + } + OS << " default: return 0;\n"; + for (auto KV : SizeToOperandName) { + for (const StringRef &OperandName : KV.second) + OS << " case OpTypes::" << OperandName << ":\n"; + OS << " return " << KV.first << ";\n\n"; + } + OS << " }\n}\n"; + OS << "} // end namespace " << Namespace << "\n"; + OS << "} // end namespace llvm\n"; + OS << "#endif // GET_INSTRINFO_MEM_OPERAND_SIZE\n\n"; } void InstrInfoEmitter::emitLogicalOperandSizeMappings( @@ -943,6 +979,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num, // Emit all of the target independent flags... if (Inst.isPreISelOpcode) OS << "|(1ULL< &Sig) { + // clang-format off if (MVT(VT).isInteger()) { unsigned BitWidth = MVT(VT).getFixedSizeInBits(); switch (BitWidth) { default: PrintFatalError("unhandled integer type width in intrinsic!"); case 1: return Sig.push_back(IIT_I1); + case 2: return Sig.push_back(IIT_I2); + case 4: return Sig.push_back(IIT_I4); case 8: return Sig.push_back(IIT_I8); case 16: return Sig.push_back(IIT_I16); case 32: return Sig.push_back(IIT_I32); @@ -291,6 +297,7 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT, case MVT::funcref: return Sig.push_back(IIT_FUNCREF); } + // clang-format on } #if defined(_MSC_VER) && !defined(__clang__) @@ -327,6 +334,13 @@ static void EncodeFixedType(Record *R, std::vector &ArgCodes, // Encode LLVMMatchType ArgNo Sig.push_back(Number); return; + } else if (R->isSubClassOf("LLVMAnyPointerToElt")) { + Sig.push_back(IIT_ANYPTR_TO_ELT); + // Encode overloaded ArgNo + Sig.push_back(NextArgCode++); + // Encode LLVMMatchType ArgNo + Sig.push_back(Number); + return; } else if (R->isSubClassOf("LLVMPointerToElt")) Sig.push_back(IIT_PTR_TO_ELT); else if (R->isSubClassOf("LLVMVectorElementType")) @@ -415,6 +429,9 @@ static void UpdateArgCodes(Record *R, std::vector &ArgCodes, if (R->isSubClassOf("LLVMVectorOfAnyPointersToElt")) { ArgCodes.push_back(3 /*vAny*/); ++NumInserted; + } else if (R->isSubClassOf("LLVMAnyPointerToElt")) { + ArgCodes.push_back(4 /*iPTRAny*/); + ++NumInserted; } return; } @@ -599,6 +616,9 @@ struct AttributeComparator { if (L->isNoReturn != R->isNoReturn) return R->isNoReturn; + if (L->isNoCallback != R->isNoCallback) + return R->isNoCallback; + if (L->isNoSync != R->isNoSync) return R->isNoSync; @@ -748,16 +768,18 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, if (!Intrinsic.canThrow || (Intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem && !Intrinsic.hasSideEffects) || - Intrinsic.isNoReturn || Intrinsic.isNoSync || Intrinsic.isNoFree || - Intrinsic.isWillReturn || Intrinsic.isCold || Intrinsic.isNoDuplicate || - Intrinsic.isNoMerge || Intrinsic.isConvergent || - Intrinsic.isSpeculatable) { + Intrinsic.isNoReturn || Intrinsic.isNoCallback || Intrinsic.isNoSync || + Intrinsic.isNoFree || Intrinsic.isWillReturn || Intrinsic.isCold || + Intrinsic.isNoDuplicate || Intrinsic.isNoMerge || + Intrinsic.isConvergent || Intrinsic.isSpeculatable) { OS << " const Attribute::AttrKind Atts[] = {"; ListSeparator LS(","); if (!Intrinsic.canThrow) OS << LS << "Attribute::NoUnwind"; if (Intrinsic.isNoReturn) OS << LS << "Attribute::NoReturn"; + if (Intrinsic.isNoCallback) + OS << LS << "Attribute::NoCallback"; if (Intrinsic.isNoSync) OS << LS << "Attribute::NoSync"; if (Intrinsic.isNoFree) @@ -858,14 +880,15 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, } void IntrinsicEmitter::EmitIntrinsicToBuiltinMap( - const CodeGenIntrinsicTable &Ints, bool IsGCC, raw_ostream &OS) { - StringRef CompilerName = (IsGCC ? "GCC" : "MS"); + const CodeGenIntrinsicTable &Ints, bool IsClang, raw_ostream &OS) { + StringRef CompilerName = (IsClang ? "Clang" : "MS"); + StringRef UpperCompilerName = (IsClang ? "CLANG" : "MS"); typedef std::map> BIMTy; BIMTy BuiltinMap; StringToOffsetTable Table; for (unsigned i = 0, e = Ints.size(); i != e; ++i) { const std::string &BuiltinName = - IsGCC ? Ints[i].GCCBuiltinName : Ints[i].MSBuiltinName; + IsClang ? Ints[i].ClangBuiltinName : Ints[i].MSBuiltinName; if (!BuiltinName.empty()) { // Get the map for this target prefix. std::map &BIM = @@ -883,7 +906,7 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap( OS << "// This is used by the C front-end. The builtin name is passed\n"; OS << "// in as BuiltinName, and a target prefix (e.g. 'ppc') is passed\n"; OS << "// in as TargetPrefix. The result is assigned to 'IntrinsicID'.\n"; - OS << "#ifdef GET_LLVM_INTRINSIC_FOR_" << CompilerName << "_BUILTIN\n"; + OS << "#ifdef GET_LLVM_INTRINSIC_FOR_" << UpperCompilerName << "_BUILTIN\n"; OS << "Intrinsic::ID Intrinsic::getIntrinsicFor" << CompilerName << "Builtin(const char " diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp index d54132f3190b..182cd0076090 100644 --- a/llvm/utils/TableGen/OptParserEmitter.cpp +++ b/llvm/utils/TableGen/OptParserEmitter.cpp @@ -172,7 +172,7 @@ static MarshallingInfo createMarshallingInfo(const Record &R) { Ret.NormalizedValuesScope = R.getValueAsString("NormalizedValuesScope"); Ret.ImpliedCheck = R.getValueAsString("ImpliedCheck"); Ret.ImpliedValue = - R.getValueAsOptionalString("ImpliedValue").getValueOr(Ret.DefaultValue); + R.getValueAsOptionalString("ImpliedValue").value_or(Ret.DefaultValue); Ret.ShouldParse = R.getValueAsString("ShouldParse"); Ret.Normalizer = R.getValueAsString("Normalizer"); diff --git a/llvm/utils/TableGen/OptRSTEmitter.cpp b/llvm/utils/TableGen/OptRSTEmitter.cpp index 11d896229f5b..03c7326e817a 100644 --- a/llvm/utils/TableGen/OptRSTEmitter.cpp +++ b/llvm/utils/TableGen/OptRSTEmitter.cpp @@ -60,18 +60,43 @@ void EmitOptRST(RecordKeeper &Records, raw_ostream &OS) { // Print the option name. OS << R->getValueAsString("Name"); + StringRef MetaVarName; // Print the meta-variable. if (!isa(R->getValueInit("MetaVarName"))) { + MetaVarName = R->getValueAsString("MetaVarName"); + } else if (!isa(R->getValueInit("Values"))) + MetaVarName = ""; + + if (!MetaVarName.empty()) { OS << '='; - OS.write_escaped(R->getValueAsString("MetaVarName")); + OS.write_escaped(MetaVarName); } OS << "\n\n"; + std::string HelpText; // The option help text. if (!isa(R->getValueInit("HelpText"))) { + HelpText = R->getValueAsString("HelpText").trim().str(); + if (!HelpText.empty() && HelpText.back() != '.') + HelpText.push_back('.'); + } + + if (!isa(R->getValueInit("Values"))) { + SmallVector Values; + SplitString(R->getValueAsString("Values"), Values, ","); + HelpText += (" " + MetaVarName + " must be '").str(); + + if (Values.size() > 1) { + HelpText += join(Values.begin(), Values.end() - 1, "', '"); + HelpText += "' or '"; + } + HelpText += (Values.front() + "'.").str(); + } + + if (!HelpText.empty()) { OS << ' '; - OS.write_escaped(R->getValueAsString("HelpText")); + OS.write_escaped(HelpText); OS << "\n\n"; } } diff --git a/llvm/utils/TableGen/PseudoLoweringEmitter.cpp b/llvm/utils/TableGen/PseudoLoweringEmitter.cpp index 6acb630299c1..dc04174217fb 100644 --- a/llvm/utils/TableGen/PseudoLoweringEmitter.cpp +++ b/llvm/utils/TableGen/PseudoLoweringEmitter.cpp @@ -109,7 +109,8 @@ addDagOperandMapping(Record *Rec, DagInit *Dag, CodeGenInstruction &Insn, OperandMap[BaseIdx + i].Data.Imm = II->getValue(); ++OpsAdded; } else if (auto *BI = dyn_cast(Dag->getArg(i))) { - auto *II = cast(BI->convertInitializerTo(IntRecTy::get())); + auto *II = + cast(BI->convertInitializerTo(IntRecTy::get(Records))); OperandMap[BaseIdx + i].Kind = OpData::Imm; OperandMap[BaseIdx + i].Data.Imm = II->getValue(); ++OpsAdded; diff --git a/llvm/utils/TableGen/RegisterBankEmitter.cpp b/llvm/utils/TableGen/RegisterBankEmitter.cpp index d97d7acb87a7..e6689b211a7d 100644 --- a/llvm/utils/TableGen/RegisterBankEmitter.cpp +++ b/llvm/utils/TableGen/RegisterBankEmitter.cpp @@ -172,9 +172,8 @@ static void visitRegisterBankClasses( SmallPtrSetImpl &VisitedRCs) { // Make sure we only visit each class once to avoid infinite loops. - if (VisitedRCs.count(RC)) + if (!VisitedRCs.insert(RC).second) return; - VisitedRCs.insert(RC); // Visit each explicitly named class. VisitFn(RC, Kind.str()); @@ -266,9 +265,8 @@ void RegisterBankEmitter::emitBaseClassImplementation( << "::NumRegisterBanks) {\n" << " // Assert that RegBank indices match their ID's\n" << "#ifndef NDEBUG\n" - << " unsigned Index = 0;\n" - << " for (const auto &RB : RegBanks)\n" - << " assert(Index++ == RB->getID() && \"Index != ID\");\n" + << " for (auto RB : enumerate(RegBanks))\n" + << " assert(RB.index() == RB.value()->getID() && \"Index != ID\");\n" << "#endif // NDEBUG\n" << "}\n" << "} // end namespace llvm\n"; diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp index 1ed7bc103f9c..3a0fa564074e 100644 --- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp +++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp @@ -268,7 +268,7 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank, OS << "// Get the name of this register unit pressure set.\n" << "const char *" << ClassName << "::\n" << "getRegPressureSetName(unsigned Idx) const {\n" - << " static const char *const PressureNameTable[] = {\n"; + << " static const char *PressureNameTable[] = {\n"; unsigned MaxRegUnitWeight = 0; for (unsigned i = 0; i < NumSets; ++i ) { const RegUnitSet &RegUnits = RegBank.getRegSetAt(i); @@ -753,7 +753,7 @@ RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS, } OS << " };\n\n"; - OS << " --IdxA; assert(IdxA < " << SubRegIndicesSize << ");\n" + OS << " --IdxA; assert(IdxA < " << SubRegIndicesSize << "); (void) IdxA;\n" << " --IdxB; assert(IdxB < " << SubRegIndicesSize << ");\n"; if (Rows.size() > 1) OS << " return Rows[RowMap[IdxA]][IdxB];\n"; @@ -814,12 +814,14 @@ RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS, OS << " // Sequence " << Idx << "\n"; Idx += Sequence.size() + 1; } + auto *IntType = getMinimalTypeForRange(*std::max_element( + SubReg2SequenceIndexMap.begin(), SubReg2SequenceIndexMap.end())); OS << " };\n" - " static const MaskRolOp *const CompositeSequences[] = {\n"; + " static const " + << IntType << " CompositeSequences[] = {\n"; for (size_t i = 0, e = SubRegIndices.size(); i != e; ++i) { OS << " "; - unsigned Idx = SubReg2SequenceIndexMap[i]; - OS << format("&LaneMaskComposeSequences[%u]", Idx); + OS << SubReg2SequenceIndexMap[i]; if (i+1 != e) OS << ","; OS << " // to " << SubRegIndices[i].getName() << "\n"; @@ -832,7 +834,9 @@ RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS, " --IdxA; assert(IdxA < " << SubRegIndices.size() << " && \"Subregister index out of bounds\");\n" " LaneBitmask Result;\n" - " for (const MaskRolOp *Ops = CompositeSequences[IdxA]; Ops->Mask.any(); ++Ops) {\n" + " for (const MaskRolOp *Ops =\n" + " &LaneMaskComposeSequences[CompositeSequences[IdxA]];\n" + " Ops->Mask.any(); ++Ops) {\n" " LaneBitmask::Type M = LaneMask.getAsInteger() & Ops->Mask.getAsInteger();\n" " if (unsigned S = Ops->RotateLeft)\n" " Result |= LaneBitmask((M << S) | (M >> (LaneBitmask::BitWidth - S)));\n" @@ -849,7 +853,9 @@ RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS, " --IdxA; assert(IdxA < " << SubRegIndices.size() << " && \"Subregister index out of bounds\");\n" " LaneBitmask Result;\n" - " for (const MaskRolOp *Ops = CompositeSequences[IdxA]; Ops->Mask.any(); ++Ops) {\n" + " for (const MaskRolOp *Ops =\n" + " &LaneMaskComposeSequences[CompositeSequences[IdxA]];\n" + " Ops->Mask.any(); ++Ops) {\n" " LaneBitmask::Type M = LaneMask.getAsInteger();\n" " if (unsigned S = Ops->RotateLeft)\n" " Result |= LaneBitmask((M >> S) | (M << (LaneBitmask::BitWidth - S)));\n" @@ -1046,25 +1052,24 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target, RegClassStrings.add(Name); - // Emit the register list now. - OS << " // " << Name << " Register Class...\n" - << " const MCPhysReg " << Name - << "[] = {\n "; - for (Record *Reg : Order) { - OS << getQualifiedName(Reg) << ", "; - } - OS << "\n };\n\n"; - - OS << " // " << Name << " Bit set.\n" - << " const uint8_t " << Name - << "Bits[] = {\n "; - BitVectorEmitter BVE; - for (Record *Reg : Order) { - BVE.add(Target.getRegBank().getReg(Reg)->EnumValue); - } - BVE.print(OS); - OS << "\n };\n\n"; + // Emit the register list now (unless it would be a zero-length array). + if (!Order.empty()) { + OS << " // " << Name << " Register Class...\n" + << " const MCPhysReg " << Name << "[] = {\n "; + for (Record *Reg : Order) { + OS << getQualifiedName(Reg) << ", "; + } + OS << "\n };\n\n"; + OS << " // " << Name << " Bit set.\n" + << " const uint8_t " << Name << "Bits[] = {\n "; + BitVectorEmitter BVE; + for (Record *Reg : Order) { + BVE.add(Target.getRegBank().getReg(Reg)->EnumValue); + } + BVE.print(OS); + OS << "\n };\n\n"; + } } OS << "} // end anonymous namespace\n\n"; @@ -1076,14 +1081,17 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target, << "MCRegisterClasses[] = {\n"; for (const auto &RC : RegisterClasses) { + ArrayRef Order = RC.getOrder(); + std::string RCName = Order.empty() ? "nullptr" : RC.getName(); + std::string RCBitsName = Order.empty() ? "nullptr" : RC.getName() + "Bits"; + std::string RCBitsSize = Order.empty() ? "0" : "sizeof(" + RCBitsName + ")"; assert(isInt<8>(RC.CopyCost) && "Copy cost too large."); uint32_t RegSize = 0; if (RC.RSI.isSimple()) RegSize = RC.RSI.getSimple().RegSize; - OS << " { " << RC.getName() << ", " << RC.getName() << "Bits, " + OS << " { " << RCName << ", " << RCBitsName << ", " << RegClassStrings.get(RC.getName()) << ", " << RC.getOrder().size() - << ", sizeof(" << RC.getName() << "Bits), " - << RC.getQualifiedName() + "RegClassID" + << ", " << RCBitsSize << ", " << RC.getQualifiedName() + "RegClassID" << ", " << RegSize << ", " << RC.CopyCost << ", " << (RC.Allocatable ? "true" : "false") << " },\n"; } @@ -1176,6 +1184,12 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target, << "unsigned RegUnit) const override;\n" << " ArrayRef getRegMaskNames() const override;\n" << " ArrayRef getRegMasks() const override;\n" + << " bool isGeneralPurposeRegister(const MachineFunction &, " + << "MCRegister) const override;\n" + << " bool isFixedRegister(const MachineFunction &, " + << "MCRegister) const override;\n" + << " bool isArgumentRegister(const MachineFunction &, " + << "MCRegister) const override;\n" << " /// Devirtualized TargetFrameLowering.\n" << " static const " << TargetName << "FrameLowering *getFrameLowering(\n" << " const MachineFunction &MF);\n" @@ -1250,7 +1264,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target, OS << "};\n"; // Emit SubRegIndex names, skipping 0. - OS << "\nstatic const char *const SubRegIndexNameTable[] = { \""; + OS << "\nstatic const char *SubRegIndexNameTable[] = { \""; for (const auto &Idx : SubRegIndices) { OS << Idx.getName(); @@ -1620,10 +1634,54 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target, } OS << "}\n\n"; + const std::list &RegCategories = + RegBank.getRegCategories(); + OS << "bool " << ClassName << "::\n" + << "isGeneralPurposeRegister(const MachineFunction &MF, " + << "MCRegister PhysReg) const {\n" + << " return\n"; + for (const CodeGenRegisterCategory &Category : RegCategories) + if (Category.getName() == "GeneralPurposeRegisters") { + for (const CodeGenRegisterClass *RC : Category.getClasses()) + OS << " " << RC->getQualifiedName() + << "RegClass.contains(PhysReg) ||\n"; + break; + } + OS << " false;\n"; + OS << "}\n\n"; + + OS << "bool " << ClassName << "::\n" + << "isFixedRegister(const MachineFunction &MF, " + << "MCRegister PhysReg) const {\n" + << " return\n"; + for (const CodeGenRegisterCategory &Category : RegCategories) + if (Category.getName() == "FixedRegisters") { + for (const CodeGenRegisterClass *RC : Category.getClasses()) + OS << " " << RC->getQualifiedName() + << "RegClass.contains(PhysReg) ||\n"; + break; + } + OS << " false;\n"; + OS << "}\n\n"; + + OS << "bool " << ClassName << "::\n" + << "isArgumentRegister(const MachineFunction &MF, " + << "MCRegister PhysReg) const {\n" + << " return\n"; + for (const CodeGenRegisterCategory &Category : RegCategories) + if (Category.getName() == "ArgumentRegisters") { + for (const CodeGenRegisterClass *RC : Category.getClasses()) + OS << " " << RC->getQualifiedName() + << "RegClass.contains(PhysReg) ||\n"; + break; + } + OS << " false;\n"; + OS << "}\n\n"; + OS << "ArrayRef " << ClassName << "::getRegMaskNames() const {\n"; if (!CSRSets.empty()) { - OS << " static const char *const Names[] = {\n"; + OS << " static const char *Names[] = {\n"; for (Record *CSRSet : CSRSets) OS << " " << '"' << CSRSet->getName() << '"' << ",\n"; OS << " };\n"; @@ -1683,6 +1741,8 @@ void RegisterInfoEmitter::debugDump(raw_ostream &OS) { OS << "\tLaneMask: " << PrintLaneMask(RC.LaneMask) << '\n'; OS << "\tHasDisjunctSubRegs: " << RC.HasDisjunctSubRegs << '\n'; OS << "\tCoveredBySubRegs: " << RC.CoveredBySubRegs << '\n'; + OS << "\tAllocatable: " << RC.Allocatable << '\n'; + OS << "\tAllocationPriority: " << unsigned(RC.AllocationPriority) << '\n'; OS << "\tRegs:"; for (const CodeGenRegister *R : RC.getMembers()) { OS << " " << R->getName(); diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp index dc5c96c662be..ea849807de03 100644 --- a/llvm/utils/TableGen/SearchableTableEmitter.cpp +++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp @@ -30,7 +30,9 @@ using namespace llvm; namespace { int getAsInt(Init *B) { - return cast(B->convertInitializerTo(IntRecTy::get()))->getValue(); + return cast( + B->convertInitializerTo(IntRecTy::get(B->getRecordKeeper()))) + ->getValue(); } int getInt(Record *R, StringRef Field) { return getAsInt(R->getValueInit(Field)); diff --git a/llvm/utils/TableGen/SequenceToOffsetTable.h b/llvm/utils/TableGen/SequenceToOffsetTable.h index 41cdefdb1949..1b3451c24cb0 100644 --- a/llvm/utils/TableGen/SequenceToOffsetTable.h +++ b/llvm/utils/TableGen/SequenceToOffsetTable.h @@ -170,18 +170,18 @@ public: /// `EmitLongStrLiterals` is false void emitStringLiteralDef(raw_ostream &OS, const llvm::Twine &Decl) const { assert(Entries && "Call layout() before emitStringLiteralDef()"); - if (EmitLongStrLiterals) { - OS << "\n#ifdef __GNUC__\n" - << "#pragma GCC diagnostic push\n" - << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n" - << "#endif\n" - << Decl << " = {\n"; - } else { + if (!EmitLongStrLiterals) { OS << Decl << " = {\n"; emit(OS, printChar, "0"); - OS << "\n};\n\n"; + OS << " 0\n};\n\n"; return; } + + OS << "\n#ifdef __GNUC__\n" + << "#pragma GCC diagnostic push\n" + << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n" + << "#endif\n" + << Decl << " = {\n"; for (auto I : Seqs) { OS << " /* " << I.second << " */ \""; for (auto C : I.first) { diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 78bbb3196e5c..88827607b517 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -74,6 +74,7 @@ class SubtargetEmitter { std::string Target; void Enumeration(raw_ostream &OS, DenseMap &FeatureMap); + void EmitSubtargetInfoMacroCalls(raw_ostream &OS); unsigned FeatureKeyValues(raw_ostream &OS, const DenseMap &FeatureMap); unsigned CPUKeyValues(raw_ostream &OS, @@ -122,8 +123,7 @@ class SubtargetEmitter { void EmitSchedModel(raw_ostream &OS); void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS); - void ParseFeaturesFunction(raw_ostream &OS, unsigned NumFeatures, - unsigned NumProcs); + void ParseFeaturesFunction(raw_ostream &OS); public: SubtargetEmitter(RecordKeeper &R, CodeGenTarget &TGT) @@ -193,6 +193,42 @@ static void printFeatureMask(raw_ostream &OS, RecVec &FeatureList, OS << "} } }"; } +/// Emit some information about the SubtargetFeature as calls to a macro so +/// that they can be used from C++. +void SubtargetEmitter::EmitSubtargetInfoMacroCalls(raw_ostream &OS) { + OS << "\n#ifdef GET_SUBTARGETINFO_MACRO\n"; + + std::vector FeatureList = + Records.getAllDerivedDefinitions("SubtargetFeature"); + llvm::sort(FeatureList, LessRecordFieldName()); + + for (const Record *Feature : FeatureList) { + const StringRef Attribute = Feature->getValueAsString("Attribute"); + const StringRef Value = Feature->getValueAsString("Value"); + + // Only handle boolean features for now, excluding BitVectors and enums. + const bool IsBool = (Value == "false" || Value == "true") && + !StringRef(Attribute).contains('['); + if (!IsBool) + continue; + + // Some features default to true, with values set to false if enabled. + const char *Default = Value == "false" ? "true" : "false"; + + // Define the getter with lowercased first char: xxxYyy() { return XxxYyy; } + const std::string Getter = + Attribute.substr(0, 1).lower() + Attribute.substr(1).str(); + + OS << "GET_SUBTARGETINFO_MACRO(" << Attribute << ", " << Default << ", " + << Getter << ")\n"; + } + OS << "#undef GET_SUBTARGETINFO_MACRO\n"; + OS << "#endif // GET_SUBTARGETINFO_MACRO\n\n"; + + OS << "\n#ifdef GET_SUBTARGETINFO_MC_DESC\n"; + OS << "#undef GET_SUBTARGETINFO_MC_DESC\n\n"; +} + // // FeatureKeyValues - Emit data of all the subtarget features. Used by the // command line. @@ -1681,13 +1717,9 @@ void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName, OS << " return 0;\n}\n"; } -// -// ParseFeaturesFunction - Produces a subtarget specific function for parsing +// Produces a subtarget specific function for parsing // the subtarget features string. -// -void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS, - unsigned NumFeatures, - unsigned NumProcs) { +void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) { std::vector Features = Records.getAllDerivedDefinitions("SubtargetFeature"); llvm::sort(Features, LessRecord()); @@ -1803,8 +1835,7 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "} // end namespace llvm\n\n"; OS << "#endif // GET_SUBTARGETINFO_ENUM\n\n"; - OS << "\n#ifdef GET_SUBTARGETINFO_MC_DESC\n"; - OS << "#undef GET_SUBTARGETINFO_MC_DESC\n\n"; + EmitSubtargetInfoMacroCalls(OS); OS << "namespace llvm {\n"; #if 0 @@ -1858,7 +1889,7 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "#include \"llvm/Support/Debug.h\"\n"; OS << "#include \"llvm/Support/raw_ostream.h\"\n\n"; - ParseFeaturesFunction(OS, NumFeatures, NumProcs); + ParseFeaturesFunction(OS); OS << "#endif // GET_SUBTARGETINFO_TARGET_DESC\n\n"; diff --git a/llvm/utils/TableGen/SubtargetFeatureInfo.cpp b/llvm/utils/TableGen/SubtargetFeatureInfo.cpp index 33a22776f2df..f4f360fb5be2 100644 --- a/llvm/utils/TableGen/SubtargetFeatureInfo.cpp +++ b/llvm/utils/TableGen/SubtargetFeatureInfo.cpp @@ -108,6 +108,39 @@ void SubtargetFeatureInfo::emitComputeAvailableFeatures( OS << "}\n\n"; } +// If ParenIfBinOp is true, print a surrounding () if Val uses && or ||. +static bool emitFeaturesAux(StringRef TargetName, const Init &Val, + bool ParenIfBinOp, raw_ostream &OS) { + if (auto *D = dyn_cast(&Val)) { + if (!D->getDef()->isSubClassOf("SubtargetFeature")) + return true; + OS << "FB[" << TargetName << "::" << D->getAsString() << "]"; + return false; + } + if (auto *D = dyn_cast(&Val)) { + std::string Op = D->getOperator()->getAsString(); + if (Op == "not" && D->getNumArgs() == 1) { + OS << '!'; + return emitFeaturesAux(TargetName, *D->getArg(0), true, OS); + } + if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) { + bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true); + if (Paren) + OS << '('; + ListSeparator LS(Op == "any_of" ? " || " : " && "); + for (auto *Arg : D->getArgs()) { + OS << LS; + if (emitFeaturesAux(TargetName, *Arg, ParenIfBinOp, OS)) + return true; + } + if (Paren) + OS << ')'; + return false; + } + } + return true; +} + void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures( StringRef TargetName, StringRef ClassName, StringRef FuncName, SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS) { @@ -118,37 +151,8 @@ void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures( const SubtargetFeatureInfo &SFI = SF.second; OS << " if ("; - - const DagInit *D = SFI.TheDef->getValueAsDag("AssemblerCondDag"); - std::string CombineType = D->getOperator()->getAsString(); - if (CombineType != "any_of" && CombineType != "all_of") - PrintFatalError(SFI.TheDef->getLoc(), "Invalid AssemblerCondDag!"); - if (D->getNumArgs() == 0) - PrintFatalError(SFI.TheDef->getLoc(), "Invalid AssemblerCondDag!"); - bool IsOr = CombineType == "any_of"; - - if (IsOr) - OS << "("; - - ListSeparator LS(IsOr ? " || " : " && "); - for (auto *Arg : D->getArgs()) { - OS << LS; - if (auto *NotArg = dyn_cast(Arg)) { - if (NotArg->getOperator()->getAsString() != "not" || - NotArg->getNumArgs() != 1) - PrintFatalError(SFI.TheDef->getLoc(), "Invalid AssemblerCondDag!"); - Arg = NotArg->getArg(0); - OS << "!"; - } - if (!isa(Arg) || - !cast(Arg)->getDef()->isSubClassOf("SubtargetFeature")) - PrintFatalError(SFI.TheDef->getLoc(), "Invalid AssemblerCondDag!"); - OS << "FB[" << TargetName << "::" << Arg->getAsString() << "]"; - } - - if (IsOr) - OS << ")"; - + emitFeaturesAux(TargetName, *SFI.TheDef->getValueAsDag("AssemblerCondDag"), + /*ParenIfBinOp=*/false, OS); OS << ")\n"; OS << " Features.set(" << SFI.getEnumBitName() << ");\n"; } diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp index 2d4a45f889be..efd641887232 100644 --- a/llvm/utils/TableGen/TableGen.cpp +++ b/llvm/utils/TableGen/TableGen.cpp @@ -25,7 +25,6 @@ enum ActionType { NullBackend, DumpJSON, GenEmitter, - GenCodeBeads, GenRegisterInfo, GenInstrInfo, GenInstrDocs, @@ -52,11 +51,13 @@ enum ActionType { GenGICombiner, GenX86EVEX2VEXTables, GenX86FoldTables, + GenX86MnemonicTables, GenRegisterBank, GenExegesis, GenAutomata, GenDirectivesEnumDecl, GenDirectivesEnumImpl, + GenDXILOperation, }; namespace llvm { @@ -81,8 +82,6 @@ cl::opt Action( clEnumValN(DumpJSON, "dump-json", "Dump all records as machine-readable JSON"), clEnumValN(GenEmitter, "gen-emitter", "Generate machine code emitter"), - clEnumValN(GenCodeBeads, "gen-code-beads", - "Generate machine code beads"), clEnumValN(GenRegisterInfo, "gen-register-info", "Generate registers and register classes info"), clEnumValN(GenInstrInfo, "gen-instr-info", @@ -130,6 +129,8 @@ cl::opt Action( "Generate X86 EVEX to VEX compress tables"), clEnumValN(GenX86FoldTables, "gen-x86-fold-tables", "Generate X86 fold tables"), + clEnumValN(GenX86MnemonicTables, "gen-x86-mnemonic-tables", + "Generate X86 mnemonic tables"), clEnumValN(GenRegisterBank, "gen-register-bank", "Generate registers bank descriptions"), clEnumValN(GenExegesis, "gen-exegesis", @@ -138,7 +139,9 @@ cl::opt Action( clEnumValN(GenDirectivesEnumDecl, "gen-directive-decl", "Generate directive related declaration code (header file)"), clEnumValN(GenDirectivesEnumImpl, "gen-directive-impl", - "Generate directive related implementation code"))); + "Generate directive related implementation code"), + clEnumValN(GenDXILOperation, "gen-dxil-operation", + "Generate DXIL operation information"))); cl::OptionCategory PrintEnumsCat("Options for -print-enums"); cl::opt Class("class", cl::desc("Print Enum list for this class"), @@ -161,9 +164,6 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenEmitter: EmitCodeEmitter(Records, OS); break; - case GenCodeBeads: - EmitCodeBeads(Records, OS); - break; case GenRegisterInfo: EmitRegisterInfo(Records, OS); break; @@ -257,6 +257,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenX86EVEX2VEXTables: EmitX86EVEX2VEXTables(Records, OS); break; + case GenX86MnemonicTables: + EmitX86MnemonicTables(Records, OS); + break; case GenX86FoldTables: EmitX86FoldTables(Records, OS); break; @@ -272,6 +275,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenDirectivesEnumImpl: EmitDirectivesImpl(Records, OS); break; + case GenDXILOperation: + EmitDXILOperation(Records, OS); + break; } return false; diff --git a/llvm/utils/TableGen/TableGenBackends.h b/llvm/utils/TableGen/TableGenBackends.h index 71db8dc77b05..4dff13095696 100644 --- a/llvm/utils/TableGen/TableGenBackends.h +++ b/llvm/utils/TableGen/TableGenBackends.h @@ -67,7 +67,6 @@ void EmitAsmMatcher(RecordKeeper &RK, raw_ostream &OS); void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS); void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS); void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS); -void EmitCodeBeads(RecordKeeper &RK, raw_ostream &OS); void EmitDAGISel(RecordKeeper &RK, raw_ostream &OS); void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS); void EmitDisassembler(RecordKeeper &RK, raw_ostream &OS); @@ -88,11 +87,13 @@ void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS); void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS); void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS); void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS); +void EmitX86MnemonicTables(RecordKeeper &RK, raw_ostream &OS); void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS); void EmitExegesis(RecordKeeper &RK, raw_ostream &OS); void EmitAutomata(RecordKeeper &RK, raw_ostream &OS); void EmitDirectivesDecl(RecordKeeper &RK, raw_ostream &OS); void EmitDirectivesImpl(RecordKeeper &RK, raw_ostream &OS); +void EmitDXILOperation(RecordKeeper &RK, raw_ostream &OS); } // End llvm namespace diff --git a/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp b/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp new file mode 100644 index 000000000000..a6bbe2f7ff37 --- /dev/null +++ b/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp @@ -0,0 +1,487 @@ +//===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The CodeEmitterGen component for variable-length instructions. +// +// The basic CodeEmitterGen is almost exclusively designed for fixed- +// length instructions. A good analogy for its encoding scheme is how printf +// works: The (immutable) formatting string represent the fixed values in the +// encoded instruction. Placeholders (i.e. %something), on the other hand, +// represent encoding for instruction operands. +// ``` +// printf("1101 %src 1001 %dst", , +// ); +// ``` +// VarLenCodeEmitterGen in this file provides an alternative encoding scheme +// that works more like a C++ stream operator: +// ``` +// OS << 0b1101; +// if (Cond) +// OS << OperandEncoding0; +// OS << 0b1001 << OperandEncoding1; +// ``` +// You are free to concatenate arbitrary types (and sizes) of encoding +// fragments on any bit position, bringing more flexibilities on defining +// encoding for variable-length instructions. +// +// In a more specific way, instruction encoding is represented by a DAG type +// `Inst` field. Here is an example: +// ``` +// dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001, +// (operand "$dst", 4)); +// ``` +// It represents the following instruction encoding: +// ``` +// MSB LSB +// 11011001 +// ``` +// For more details about DAG operators in the above snippet, please +// refer to \file include/llvm/Target/Target.td. +// +// VarLenCodeEmitter will convert the above DAG into the same helper function +// generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except +// for few details). +// +//===----------------------------------------------------------------------===// + +#include "VarLenCodeEmitterGen.h" +#include "CodeGenHwModes.h" +#include "CodeGenInstruction.h" +#include "CodeGenTarget.h" +#include "InfoByHwMode.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Error.h" + +using namespace llvm; + +namespace { + +class VarLenCodeEmitterGen { + RecordKeeper &Records; + + DenseMap VarLenInsts; + + // Emit based values (i.e. fixed bits in the encoded instructions) + void emitInstructionBaseValues( + raw_ostream &OS, + ArrayRef NumberedInstructions, + CodeGenTarget &Target, int HwMode = -1); + + std::string getInstructionCase(Record *R, CodeGenTarget &Target); + std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef, + CodeGenTarget &Target); + +public: + explicit VarLenCodeEmitterGen(RecordKeeper &R) : Records(R) {} + + void run(raw_ostream &OS); +}; + +} // end anonymous namespace + +VarLenInst::VarLenInst(const DagInit *DI, const RecordVal *TheDef) + : TheDef(TheDef), NumBits(0U) { + buildRec(DI); + for (const auto &S : Segments) + NumBits += S.BitWidth; +} + +void VarLenInst::buildRec(const DagInit *DI) { + assert(TheDef && "The def record is nullptr ?"); + + std::string Op = DI->getOperator()->getAsString(); + + if (Op == "ascend" || Op == "descend") { + bool Reverse = Op == "descend"; + int i = Reverse ? DI->getNumArgs() - 1 : 0; + int e = Reverse ? -1 : DI->getNumArgs(); + int s = Reverse ? -1 : 1; + for (; i != e; i += s) { + const Init *Arg = DI->getArg(i); + if (const auto *BI = dyn_cast(Arg)) { + if (!BI->isComplete()) + PrintFatalError(TheDef->getLoc(), + "Expecting complete bits init in `" + Op + "`"); + Segments.push_back({BI->getNumBits(), BI}); + } else if (const auto *BI = dyn_cast(Arg)) { + if (!BI->isConcrete()) + PrintFatalError(TheDef->getLoc(), + "Expecting concrete bit init in `" + Op + "`"); + Segments.push_back({1, BI}); + } else if (const auto *SubDI = dyn_cast(Arg)) { + buildRec(SubDI); + } else { + PrintFatalError(TheDef->getLoc(), "Unrecognized type of argument in `" + + Op + "`: " + Arg->getAsString()); + } + } + } else if (Op == "operand") { + // (operand , <# of bits>, [(encoder )]) + if (DI->getNumArgs() < 2) + PrintFatalError(TheDef->getLoc(), + "Expecting at least 2 arguments for `operand`"); + HasDynamicSegment = true; + const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1); + if (!isa(OperandName) || !isa(NumBits)) + PrintFatalError(TheDef->getLoc(), "Invalid argument types for `operand`"); + + auto NumBitsVal = cast(NumBits)->getValue(); + if (NumBitsVal <= 0) + PrintFatalError(TheDef->getLoc(), "Invalid number of bits for `operand`"); + + StringRef CustomEncoder; + if (DI->getNumArgs() >= 3) + CustomEncoder = getCustomEncoderName(DI->getArg(2)); + Segments.push_back( + {static_cast(NumBitsVal), OperandName, CustomEncoder}); + } else if (Op == "slice") { + // (slice , , , + // [(encoder )]) + if (DI->getNumArgs() < 3) + PrintFatalError(TheDef->getLoc(), + "Expecting at least 3 arguments for `slice`"); + HasDynamicSegment = true; + Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1), + *LoBit = DI->getArg(2); + if (!isa(OperandName) || !isa(HiBit) || + !isa(LoBit)) + PrintFatalError(TheDef->getLoc(), "Invalid argument types for `slice`"); + + auto HiBitVal = cast(HiBit)->getValue(), + LoBitVal = cast(LoBit)->getValue(); + if (HiBitVal < 0 || LoBitVal < 0) + PrintFatalError(TheDef->getLoc(), "Invalid bit range for `slice`"); + bool NeedSwap = false; + unsigned NumBits = 0U; + if (HiBitVal < LoBitVal) { + NeedSwap = true; + NumBits = static_cast(LoBitVal - HiBitVal + 1); + } else { + NumBits = static_cast(HiBitVal - LoBitVal + 1); + } + + StringRef CustomEncoder; + if (DI->getNumArgs() >= 4) + CustomEncoder = getCustomEncoderName(DI->getArg(3)); + + if (NeedSwap) { + // Normalization: Hi bit should always be the second argument. + Init *const NewArgs[] = {OperandName, LoBit, HiBit}; + Segments.push_back({NumBits, + DagInit::get(DI->getOperator(), nullptr, NewArgs, {}), + CustomEncoder}); + } else { + Segments.push_back({NumBits, DI, CustomEncoder}); + } + } +} + +void VarLenCodeEmitterGen::run(raw_ostream &OS) { + CodeGenTarget Target(Records); + auto Insts = Records.getAllDerivedDefinitions("Instruction"); + + auto NumberedInstructions = Target.getInstructionsByEnumValue(); + const CodeGenHwModes &HWM = Target.getHwModes(); + + // The set of HwModes used by instruction encodings. + std::set HwModes; + for (const CodeGenInstruction *CGI : NumberedInstructions) { + Record *R = CGI->TheDef; + + // Create the corresponding VarLenInst instance. + if (R->getValueAsString("Namespace") == "TargetOpcode" || + R->getValueAsBit("isPseudo")) + continue; + + if (const RecordVal *RV = R->getValue("EncodingInfos")) { + if (auto *DI = dyn_cast_or_null(RV->getValue())) { + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + for (auto &KV : EBM) { + HwModes.insert(KV.first); + Record *EncodingDef = KV.second; + RecordVal *RV = EncodingDef->getValue("Inst"); + DagInit *DI = cast(RV->getValue()); + VarLenInsts.insert({EncodingDef, VarLenInst(DI, RV)}); + } + continue; + } + } + RecordVal *RV = R->getValue("Inst"); + DagInit *DI = cast(RV->getValue()); + VarLenInsts.insert({R, VarLenInst(DI, RV)}); + } + + // Emit function declaration + OS << "void " << Target.getName() + << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" + << " SmallVectorImpl &Fixups,\n" + << " APInt &Inst,\n" + << " APInt &Scratch,\n" + << " const MCSubtargetInfo &STI) const {\n"; + + // Emit instruction base values + if (HwModes.empty()) { + emitInstructionBaseValues(OS, NumberedInstructions, Target); + } else { + for (unsigned HwMode : HwModes) + emitInstructionBaseValues(OS, NumberedInstructions, Target, (int)HwMode); + } + + if (!HwModes.empty()) { + OS << " const unsigned **Index;\n"; + OS << " const uint64_t *InstBits;\n"; + OS << " unsigned HwMode = STI.getHwMode();\n"; + OS << " switch (HwMode) {\n"; + OS << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n"; + for (unsigned I : HwModes) { + OS << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name + << "; Index = Index_" << HWM.getMode(I).Name << "; break;\n"; + } + OS << " };\n"; + } + + // Emit helper function to retrieve base values. + OS << " auto getInstBits = [&](unsigned Opcode) -> APInt {\n" + << " unsigned NumBits = Index[Opcode][0];\n" + << " if (!NumBits)\n" + << " return APInt::getZeroWidth();\n" + << " unsigned Idx = Index[Opcode][1];\n" + << " ArrayRef Data(&InstBits[Idx], " + << "APInt::getNumWords(NumBits));\n" + << " return APInt(NumBits, Data);\n" + << " };\n"; + + // Map to accumulate all the cases. + std::map> CaseMap; + + // Construct all cases statement for each opcode + for (Record *R : Insts) { + if (R->getValueAsString("Namespace") == "TargetOpcode" || + R->getValueAsBit("isPseudo")) + continue; + std::string InstName = + (R->getValueAsString("Namespace") + "::" + R->getName()).str(); + std::string Case = getInstructionCase(R, Target); + + CaseMap[Case].push_back(std::move(InstName)); + } + + // Emit initial function code + OS << " const unsigned opcode = MI.getOpcode();\n" + << " switch (opcode) {\n"; + + // Emit each case statement + for (const auto &C : CaseMap) { + const std::string &Case = C.first; + const auto &InstList = C.second; + + ListSeparator LS("\n"); + for (const auto &InstName : InstList) + OS << LS << " case " << InstName << ":"; + + OS << " {\n"; + OS << Case; + OS << " break;\n" + << " }\n"; + } + // Default case: unhandled opcode + OS << " default:\n" + << " std::string msg;\n" + << " raw_string_ostream Msg(msg);\n" + << " Msg << \"Not supported instr: \" << MI;\n" + << " report_fatal_error(Msg.str().c_str());\n" + << " }\n"; + OS << "}\n\n"; +} + +static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits, + unsigned &Index) { + if (!Bits.getNumWords()) { + IS.indent(4) << "{/*NumBits*/0, /*Index*/0},"; + return; + } + + IS.indent(4) << "{/*NumBits*/" << Bits.getBitWidth() << ", " + << "/*Index*/" << Index << "},"; + + SS.indent(4); + for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index) + SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),"; +} + +void VarLenCodeEmitterGen::emitInstructionBaseValues( + raw_ostream &OS, ArrayRef NumberedInstructions, + CodeGenTarget &Target, int HwMode) { + std::string IndexArray, StorageArray; + raw_string_ostream IS(IndexArray), SS(StorageArray); + + const CodeGenHwModes &HWM = Target.getHwModes(); + if (HwMode == -1) { + IS << " static const unsigned Index[][2] = {\n"; + SS << " static const uint64_t InstBits[] = {\n"; + } else { + StringRef Name = HWM.getMode(HwMode).Name; + IS << " static const unsigned Index_" << Name << "[][2] = {\n"; + SS << " static const uint64_t InstBits_" << Name << "[] = {\n"; + } + + unsigned NumFixedValueWords = 0U; + for (const CodeGenInstruction *CGI : NumberedInstructions) { + Record *R = CGI->TheDef; + + if (R->getValueAsString("Namespace") == "TargetOpcode" || + R->getValueAsBit("isPseudo")) { + IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\n"; + continue; + } + + Record *EncodingDef = R; + if (const RecordVal *RV = R->getValue("EncodingInfos")) { + if (auto *DI = dyn_cast_or_null(RV->getValue())) { + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + if (EBM.hasMode(HwMode)) + EncodingDef = EBM.get(HwMode); + } + } + + auto It = VarLenInsts.find(EncodingDef); + if (It == VarLenInsts.end()) + PrintFatalError(EncodingDef, "VarLenInst not found for this record"); + const VarLenInst &VLI = It->second; + + unsigned i = 0U, BitWidth = VLI.size(); + + // Start by filling in fixed values. + APInt Value(BitWidth, 0); + auto SI = VLI.begin(), SE = VLI.end(); + // Scan through all the segments that have fixed-bits values. + while (i < BitWidth && SI != SE) { + unsigned SegmentNumBits = SI->BitWidth; + if (const auto *BI = dyn_cast(SI->Value)) { + for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) { + auto *B = cast(BI->getBit(Idx)); + Value.setBitVal(i + Idx, B->getValue()); + } + } + if (const auto *BI = dyn_cast(SI->Value)) + Value.setBitVal(i, BI->getValue()); + + i += SegmentNumBits; + ++SI; + } + + emitInstBits(IS, SS, Value, NumFixedValueWords); + IS << '\t' << "// " << R->getName() << "\n"; + if (Value.getNumWords()) + SS << '\t' << "// " << R->getName() << "\n"; + } + IS.indent(4) << "{/*NumBits*/0, /*Index*/0}\n };\n"; + SS.indent(4) << "UINT64_C(0)\n };\n"; + + OS << IS.str() << SS.str(); +} + +std::string VarLenCodeEmitterGen::getInstructionCase(Record *R, + CodeGenTarget &Target) { + std::string Case; + if (const RecordVal *RV = R->getValue("EncodingInfos")) { + if (auto *DI = dyn_cast_or_null(RV->getValue())) { + const CodeGenHwModes &HWM = Target.getHwModes(); + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + Case += " switch (HwMode) {\n"; + Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n"; + for (auto &KV : EBM) { + Case += " case " + itostr(KV.first) + ": {\n"; + Case += getInstructionCaseForEncoding(R, KV.second, Target); + Case += " break;\n"; + Case += " }\n"; + } + Case += " }\n"; + return Case; + } + } + return getInstructionCaseForEncoding(R, R, Target); +} + +std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding( + Record *R, Record *EncodingDef, CodeGenTarget &Target) { + auto It = VarLenInsts.find(EncodingDef); + if (It == VarLenInsts.end()) + PrintFatalError(EncodingDef, "Parsed encoding record not found"); + const VarLenInst &VLI = It->second; + size_t BitWidth = VLI.size(); + + CodeGenInstruction &CGI = Target.getInstruction(R); + + std::string Case; + raw_string_ostream SS(Case); + // Resize the scratch buffer. + if (BitWidth && !VLI.isFixedValueOnly()) + SS.indent(6) << "Scratch = Scratch.zext(" << BitWidth << ");\n"; + // Populate based value. + SS.indent(6) << "Inst = getInstBits(opcode);\n"; + + // Process each segment in VLI. + size_t Offset = 0U; + for (const auto &ES : VLI) { + unsigned NumBits = ES.BitWidth; + const Init *Val = ES.Value; + // If it's a StringInit or DagInit, it's a reference to an operand + // or part of an operand. + if (isa(Val) || isa(Val)) { + StringRef OperandName; + unsigned LoBit = 0U; + if (const auto *SV = dyn_cast(Val)) { + OperandName = SV->getValue(); + } else { + // Normalized: (slice , , ) + const auto *DV = cast(Val); + OperandName = cast(DV->getArg(0))->getValue(); + LoBit = static_cast(cast(DV->getArg(2))->getValue()); + } + + auto OpIdx = CGI.Operands.ParseOperandName(OperandName); + unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx); + StringRef CustomEncoder = CGI.Operands[OpIdx.first].EncoderMethodName; + if (ES.CustomEncoder.size()) + CustomEncoder = ES.CustomEncoder; + + SS.indent(6) << "Scratch.clearAllBits();\n"; + SS.indent(6) << "// op: " << OperandName.drop_front(1) << "\n"; + if (CustomEncoder.empty()) + SS.indent(6) << "getMachineOpValue(MI, MI.getOperand(" + << utostr(FlatOpIdx) << ")"; + else + SS.indent(6) << CustomEncoder << "(MI, /*OpIdx=*/" << utostr(FlatOpIdx); + + SS << ", /*Pos=*/" << utostr(Offset) << ", Scratch, Fixups, STI);\n"; + + SS.indent(6) << "Inst.insertBits(" + << "Scratch.extractBits(" << utostr(NumBits) << ", " + << utostr(LoBit) << ")" + << ", " << Offset << ");\n"; + } + Offset += NumBits; + } + + StringRef PostEmitter = R->getValueAsString("PostEncoderMethod"); + if (!PostEmitter.empty()) + SS.indent(6) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n"; + + return Case; +} + +namespace llvm { + +void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS) { + VarLenCodeEmitterGen(R).run(OS); +} + +} // end namespace llvm diff --git a/llvm/utils/TableGen/VarLenCodeEmitterGen.h b/llvm/utils/TableGen/VarLenCodeEmitterGen.h new file mode 100644 index 000000000000..5bdedee1dd51 --- /dev/null +++ b/llvm/utils/TableGen/VarLenCodeEmitterGen.h @@ -0,0 +1,66 @@ +//===- VarLenCodeEmitterGen.h - CEG for variable-length insts ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declare the CodeEmitterGen component for variable-length +// instructions. See the .cpp file for more details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_UTILS_TABLEGEN_VARLENCODEEMITTERGEN_H +#define LLVM_UTILS_TABLEGEN_VARLENCODEEMITTERGEN_H + +#include "llvm/TableGen/Record.h" + +namespace llvm { + +struct EncodingSegment { + unsigned BitWidth; + const Init *Value; + StringRef CustomEncoder = ""; +}; + +class VarLenInst { + const RecordVal *TheDef; + size_t NumBits; + + // Set if any of the segment is not fixed value. + bool HasDynamicSegment; + + SmallVector Segments; + + void buildRec(const DagInit *DI); + + StringRef getCustomEncoderName(const Init *EI) const { + if (const auto *DI = dyn_cast(EI)) { + if (DI->getNumArgs() && isa(DI->getArg(0))) + return cast(DI->getArg(0))->getValue(); + } + return ""; + } + +public: + VarLenInst() : TheDef(nullptr), NumBits(0U), HasDynamicSegment(false) {} + + explicit VarLenInst(const DagInit *DI, const RecordVal *TheDef); + + /// Number of bits + size_t size() const { return NumBits; } + + using const_iterator = decltype(Segments)::const_iterator; + + const_iterator begin() const { return Segments.begin(); } + const_iterator end() const { return Segments.end(); } + size_t getNumSegments() const { return Segments.size(); } + + bool isFixedValueOnly() const { return !HasDynamicSegment; } +}; + +void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS); + +} // end namespace llvm +#endif diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp index 74969053f095..dc037e4409ab 100644 --- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp +++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp @@ -37,8 +37,9 @@ void emitWebAssemblyDisassemblerTables( if (!Def.getValue("Inst")) continue; auto &Inst = *Def.getValueAsBitsInit("Inst"); - auto Opc = static_cast( - reinterpret_cast(Inst.convertInitializerTo(IntRecTy::get())) + RecordKeeper &RK = Inst.getRecordKeeper(); + unsigned Opc = static_cast( + cast(Inst.convertInitializerTo(IntRecTy::get(RK))) ->getValue()); if (Opc == 0xFFFFFFFF) continue; // No opcode defined. @@ -54,11 +55,7 @@ void emitWebAssemblyDisassemblerTables( auto &CGIP = OpcodeTable[Prefix][Opc]; // All wasm instructions have a StackBased field of type string, we only // want the instructions for which this is "true". - auto StackString = - Def.getValue("StackBased")->getValue()->getCastTo(StringRecTy::get()); - auto IsStackBased = - StackString && - reinterpret_cast(StackString)->getValue() == "true"; + bool IsStackBased = Def.getValueAsBit("StackBased"); if (!IsStackBased) continue; if (CGIP.second) { @@ -66,14 +63,11 @@ void emitWebAssemblyDisassemblerTables( // should be the canonical one. This determines which variant gets // printed in a disassembly. We want e.g. "call" not "i32.call", and // "end" when we don't know if its "end_loop" or "end_block" etc. - auto IsCanonicalExisting = CGIP.second->TheDef->getValue("IsCanonical") - ->getValue() - ->getAsString() == "1"; + bool IsCanonicalExisting = CGIP.second->TheDef->getValueAsBit("IsCanonical"); // We already have one marked explicitly as canonical, so keep it. if (IsCanonicalExisting) continue; - auto IsCanonicalNew = - Def.getValue("IsCanonical")->getValue()->getAsString() == "1"; + bool IsCanonicalNew = Def.getValueAsBit("IsCanonical"); // If the new one is explicitly marked as canonical, take it. if (!IsCanonicalNew) { // Neither the existing or new instruction is canonical. diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp index 81ddea99740d..2fa8fce81422 100644 --- a/llvm/utils/TableGen/X86DisassemblerTables.cpp +++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp @@ -105,8 +105,7 @@ static inline bool inheritsFrom(InstructionContext child, case IC_64BIT_ADSIZE: return (noPrefix && inheritsFrom(child, IC_64BIT_OPSIZE_ADSIZE, noPrefix)); case IC_64BIT_OPSIZE_ADSIZE: - return (noPrefix && - inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE, noPrefix)); + return false; case IC_XD: return inheritsFrom(child, IC_64BIT_XD); case IC_XS: @@ -127,11 +126,10 @@ static inline bool inheritsFrom(InstructionContext child, case IC_64BIT_OPSIZE: return inheritsFrom(child, IC_64BIT_REXW_OPSIZE) || (!AdSize64 && inheritsFrom(child, IC_64BIT_OPSIZE_ADSIZE)) || - (!AdSize64 && inheritsFrom(child, IC_64BIT_REXW_ADSIZE)) || - (!AdSize64 && inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE)); + (!AdSize64 && inheritsFrom(child, IC_64BIT_REXW_ADSIZE)); case IC_64BIT_XD: - return (inheritsFrom(child, IC_64BIT_REXW_XD) || - (!AdSize64 && inheritsFrom(child, IC_64BIT_XD_ADSIZE))); + return(inheritsFrom(child, IC_64BIT_REXW_XD) || + (!AdSize64 && inheritsFrom(child, IC_64BIT_XD_ADSIZE))); case IC_64BIT_XS: return(inheritsFrom(child, IC_64BIT_REXW_XS) || (!AdSize64 && inheritsFrom(child, IC_64BIT_XS_ADSIZE))); @@ -161,12 +159,7 @@ static inline bool inheritsFrom(InstructionContext child, case IC_VEX_OPSIZE: return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE)) || (VEX_WIG && inheritsFrom(child, IC_VEX_W_OPSIZE)) || - (VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE)) || - inheritsFrom(child, IC_64BIT_VEX_OPSIZE); - case IC_64BIT_VEX_OPSIZE: - return inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE); - case IC_64BIT_VEX_OPSIZE_ADSIZE: - return false; + (VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE)); case IC_VEX_W: return VEX_LIG && inheritsFrom(child, IC_VEX_L_W); case IC_VEX_W_XS: @@ -673,7 +666,6 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2, unsigned &i1, unsigned &i2, unsigned &ModRMTableNum, ModRMDecision &decision) const { - static uint32_t sTableNumber = 0; static uint32_t sEntryNumber = 1; ModRMDecisionType dt = getDecisionType(decision); @@ -753,8 +745,6 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2, assert(sEntryNumber < 65536U && "Index into ModRMDecision is too large for uint16_t!"); (void)sEntryNumber; - - ++sTableNumber; } void DisassemblerTables::emitOpcodeDecision(raw_ostream &o1, raw_ostream &o2, @@ -891,9 +881,6 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const { if ((index & ATTR_EVEX) || (index & ATTR_VEX) || (index & ATTR_VEXL)) { if (index & ATTR_EVEX) o << "IC_EVEX"; - else if ((index & (ATTR_64BIT | ATTR_VEXL | ATTR_REXW | ATTR_OPSIZE)) == - (ATTR_64BIT | ATTR_OPSIZE)) - o << "IC_64BIT_VEX"; else o << "IC_VEX"; @@ -905,13 +892,9 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const { if (index & ATTR_REXW) o << "_W"; - if (index & ATTR_OPSIZE) { + if (index & ATTR_OPSIZE) o << "_OPSIZE"; - if ((index & (ATTR_64BIT | ATTR_EVEX | ATTR_VEX | ATTR_VEXL | - ATTR_REXW | ATTR_ADSIZE)) == - (ATTR_64BIT | ATTR_VEX | ATTR_ADSIZE)) - o << "_ADSIZE"; - } else if (index & ATTR_XD) + else if (index & ATTR_XD) o << "_XD"; else if (index & ATTR_XS) o << "_XS"; @@ -925,7 +908,8 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const { if (index & ATTR_EVEXB) o << "_B"; } - } else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS)) + } + else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS)) o << "IC_64BIT_REXW_XS"; else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD)) o << "IC_64BIT_REXW_XD"; diff --git a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp index 36c71843d70e..1384330ee8a1 100644 --- a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp @@ -11,11 +11,14 @@ /// //===----------------------------------------------------------------------===// +#include "CodeGenInstruction.h" #include "CodeGenTarget.h" +#include "X86RecognizableInstr.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/TableGenBackend.h" using namespace llvm; +using namespace X86Disassembler; namespace { @@ -108,28 +111,25 @@ public: IsMatch(const CodeGenInstruction *EVEXInst) : EVEXInst(EVEXInst) {} bool operator()(const CodeGenInstruction *VEXInst) { - Record *RecE = EVEXInst->TheDef; - Record *RecV = VEXInst->TheDef; - bool EVEX_W = RecE->getValueAsBit("HasVEX_W"); - bool VEX_W = RecV->getValueAsBit("HasVEX_W"); - bool VEX_WIG = RecV->getValueAsBit("IgnoresVEX_W"); - bool EVEX_WIG = RecE->getValueAsBit("IgnoresVEX_W"); - bool EVEX_W1_VEX_W0 = RecE->getValueAsBit("EVEX_W1_VEX_W0"); - - if (RecV->getValueAsDef("OpEnc")->getName().str() != "EncVEX" || - RecV->getValueAsBit("isCodeGenOnly") != RecE->getValueAsBit("isCodeGenOnly") || + RecognizableInstrBase VEXRI(*VEXInst); + RecognizableInstrBase EVEXRI(*EVEXInst); + bool VEX_W = VEXRI.HasVEX_W; + bool EVEX_W = EVEXRI.HasVEX_W; + bool VEX_WIG = VEXRI.IgnoresVEX_W; + bool EVEX_WIG = EVEXRI.IgnoresVEX_W; + bool EVEX_W1_VEX_W0 = EVEXInst->TheDef->getValueAsBit("EVEX_W1_VEX_W0"); + + if (VEXRI.IsCodeGenOnly != EVEXRI.IsCodeGenOnly || // VEX/EVEX fields - RecV->getValueAsDef("OpPrefix") != RecE->getValueAsDef("OpPrefix") || - RecV->getValueAsDef("OpMap") != RecE->getValueAsDef("OpMap") || - RecV->getValueAsBit("hasVEX_4V") != RecE->getValueAsBit("hasVEX_4V") || - RecV->getValueAsBit("hasEVEX_L2") != RecE->getValueAsBit("hasEVEX_L2") || - RecV->getValueAsBit("hasVEX_L") != RecE->getValueAsBit("hasVEX_L") || + VEXRI.OpPrefix != EVEXRI.OpPrefix || VEXRI.OpMap != EVEXRI.OpMap || + VEXRI.HasVEX_4V != EVEXRI.HasVEX_4V || + VEXRI.HasVEX_L != EVEXRI.HasVEX_L || // Match is allowed if either is VEX_WIG, or they match, or EVEX // is VEX_W1X and VEX is VEX_W0. (!(VEX_WIG || (!EVEX_WIG && EVEX_W == VEX_W) || (EVEX_W1_VEX_W0 && EVEX_W && !VEX_W))) || // Instruction's format - RecV->getValueAsDef("Form") != RecE->getValueAsDef("Form")) + VEXRI.Form != EVEXRI.Form) return false; // This is needed for instructions with intrinsic version (_Int). @@ -160,31 +160,6 @@ public: return true; } - -private: - static inline bool isRegisterOperand(const Record *Rec) { - return Rec->isSubClassOf("RegisterClass") || - Rec->isSubClassOf("RegisterOperand"); - } - - static inline bool isMemoryOperand(const Record *Rec) { - return Rec->isSubClassOf("Operand") && - Rec->getValueAsString("OperandType") == "OPERAND_MEMORY"; - } - - static inline bool isImmediateOperand(const Record *Rec) { - return Rec->isSubClassOf("Operand") && - Rec->getValueAsString("OperandType") == "OPERAND_IMMEDIATE"; - } - - static inline unsigned int getRegOperandSize(const Record *RegRec) { - if (RegRec->isSubClassOf("RegisterClass")) - return RegRec->getValueAsInt("Alignment"); - if (RegRec->isSubClassOf("RegisterOperand")) - return RegRec->getValueAsDef("RegClass")->getValueAsInt("Alignment"); - - llvm_unreachable("Register operand's size not known!"); - } }; void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) { @@ -206,23 +181,19 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) { Target.getInstructionsByEnumValue(); for (const CodeGenInstruction *Inst : NumberedInstructions) { + const Record *Def = Inst->TheDef; // Filter non-X86 instructions. - if (!Inst->TheDef->isSubClassOf("X86Inst")) + if (!Def->isSubClassOf("X86Inst")) continue; + RecognizableInstrBase RI(*Inst); // Add VEX encoded instructions to one of VEXInsts vectors according to // it's opcode. - if (Inst->TheDef->getValueAsDef("OpEnc")->getName() == "EncVEX") { - uint64_t Opcode = getValueFromBitsInit(Inst->TheDef-> - getValueAsBitsInit("Opcode")); - VEXInsts[Opcode].push_back(Inst); - } + if (RI.Encoding == X86Local::VEX) + VEXInsts[RI.Opcode].push_back(Inst); // Add relevant EVEX encoded instructions to EVEXInsts - else if (Inst->TheDef->getValueAsDef("OpEnc")->getName() == "EncEVEX" && - !Inst->TheDef->getValueAsBit("hasEVEX_K") && - !Inst->TheDef->getValueAsBit("hasEVEX_B") && - !Inst->TheDef->getValueAsBit("hasEVEX_L2") && - !Inst->TheDef->getValueAsBit("notEVEX2VEXConvertible")) + else if (RI.Encoding == X86Local::EVEX && !RI.HasEVEX_K && !RI.HasEVEX_B && + !RI.HasEVEX_L2 && !Def->getValueAsBit("notEVEX2VEXConvertible")) EVEXInsts.push_back(Inst); } diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp index 2a29331eb7e8..5b3f11848de6 100644 --- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -18,6 +18,7 @@ #include "llvm/TableGen/TableGenBackend.h" using namespace llvm; +using namespace X86Disassembler; namespace { @@ -51,27 +52,32 @@ const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD", // For manually mapping instructions that do not match by their encoding. const ManualMapEntry ManualMapSet[] = { - { "ADD16ri_DB", "ADD16mi", NO_UNFOLD }, - { "ADD16ri8_DB", "ADD16mi8", NO_UNFOLD }, - { "ADD16rr_DB", "ADD16mr", NO_UNFOLD }, - { "ADD32ri_DB", "ADD32mi", NO_UNFOLD }, - { "ADD32ri8_DB", "ADD32mi8", NO_UNFOLD }, - { "ADD32rr_DB", "ADD32mr", NO_UNFOLD }, - { "ADD64ri32_DB", "ADD64mi32", NO_UNFOLD }, - { "ADD64ri8_DB", "ADD64mi8", NO_UNFOLD }, - { "ADD64rr_DB", "ADD64mr", NO_UNFOLD }, - { "ADD8ri_DB", "ADD8mi", NO_UNFOLD }, - { "ADD8rr_DB", "ADD8mr", NO_UNFOLD }, - { "ADD16rr_DB", "ADD16rm", NO_UNFOLD }, - { "ADD32rr_DB", "ADD32rm", NO_UNFOLD }, - { "ADD64rr_DB", "ADD64rm", NO_UNFOLD }, - { "ADD8rr_DB", "ADD8rm", NO_UNFOLD }, - { "PUSH16r", "PUSH16rmm", UNFOLD }, - { "PUSH32r", "PUSH32rmm", UNFOLD }, - { "PUSH64r", "PUSH64rmm", UNFOLD }, - { "TAILJMPr", "TAILJMPm", UNFOLD }, - { "TAILJMPr64", "TAILJMPm64", UNFOLD }, - { "TAILJMPr64_REX", "TAILJMPm64_REX", UNFOLD }, + { "ADD16ri_DB", "ADD16mi", NO_UNFOLD }, + { "ADD16ri8_DB", "ADD16mi8", NO_UNFOLD }, + { "ADD16rr_DB", "ADD16mr", NO_UNFOLD }, + { "ADD32ri_DB", "ADD32mi", NO_UNFOLD }, + { "ADD32ri8_DB", "ADD32mi8", NO_UNFOLD }, + { "ADD32rr_DB", "ADD32mr", NO_UNFOLD }, + { "ADD64ri32_DB", "ADD64mi32", NO_UNFOLD }, + { "ADD64ri8_DB", "ADD64mi8", NO_UNFOLD }, + { "ADD64rr_DB", "ADD64mr", NO_UNFOLD }, + { "ADD8ri_DB", "ADD8mi", NO_UNFOLD }, + { "ADD8rr_DB", "ADD8mr", NO_UNFOLD }, + { "ADD16rr_DB", "ADD16rm", NO_UNFOLD }, + { "ADD32rr_DB", "ADD32rm", NO_UNFOLD }, + { "ADD64rr_DB", "ADD64rm", NO_UNFOLD }, + { "ADD8rr_DB", "ADD8rm", NO_UNFOLD }, + { "MMX_MOVD64from64rr", "MMX_MOVQ64mr", UNFOLD }, + { "MMX_MOVD64grr", "MMX_MOVD64mr", UNFOLD }, + { "MOVLHPSrr", "MOVHPSrm", NO_UNFOLD }, + { "PUSH16r", "PUSH16rmm", UNFOLD }, + { "PUSH32r", "PUSH32rmm", UNFOLD }, + { "PUSH64r", "PUSH64rmm", UNFOLD }, + { "TAILJMPr", "TAILJMPm", UNFOLD }, + { "TAILJMPr64", "TAILJMPm64", UNFOLD }, + { "TAILJMPr64_REX", "TAILJMPm64_REX", UNFOLD }, + { "VMOVLHPSZrr", "VMOVHPSZ128rm", NO_UNFOLD }, + { "VMOVLHPSrr", "VMOVHPSrm", NO_UNFOLD }, }; @@ -114,16 +120,21 @@ class X86FoldTablesEmitter { OS << "X86::" << MemInst->TheDef->getName() << ","; OS.PadToColumn(75); + std::string Attrs; if (IsLoad) - OS << "TB_FOLDED_LOAD | "; + Attrs += "TB_FOLDED_LOAD | "; if (IsStore) - OS << "TB_FOLDED_STORE | "; + Attrs += "TB_FOLDED_STORE | "; if (CannotUnfold) - OS << "TB_NO_REVERSE | "; + Attrs += "TB_NO_REVERSE | "; if (IsAligned) - OS << "TB_ALIGN_" << Alignment << " | "; + Attrs += "TB_ALIGN_" + std::to_string(Alignment) + " | "; - OS << "0 },\n"; + StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("| "); + if (SimplifiedAttrs.empty()) + SimplifiedAttrs = "0"; + + OS << SimplifiedAttrs << " },\n"; } bool operator<(const X86FoldTableEntry &RHS) const { @@ -207,56 +218,6 @@ static inline uint64_t getValueFromBitsInit(const BitsInit *B) { return Value; } -// Returns true if the two given BitsInits represent the same integer value -static inline bool equalBitsInits(const BitsInit *B1, const BitsInit *B2) { - if (B1->getNumBits() != B2->getNumBits()) - PrintFatalError("Comparing two BitsInits with different sizes!"); - - for (unsigned i = 0, e = B1->getNumBits(); i != e; ++i) { - BitInit *Bit1 = cast(B1->getBit(i)); - BitInit *Bit2 = cast(B2->getBit(i)); - if (Bit1->getValue() != Bit2->getValue()) - return false; - } - return true; -} - -// Return the size of the register operand -static inline unsigned int getRegOperandSize(const Record *RegRec) { - if (RegRec->isSubClassOf("RegisterOperand")) - RegRec = RegRec->getValueAsDef("RegClass"); - if (RegRec->isSubClassOf("RegisterClass")) - return RegRec->getValueAsListOfDefs("RegTypes")[0]->getValueAsInt("Size"); - - llvm_unreachable("Register operand's size not known!"); -} - -// Return the size of the memory operand -static inline unsigned getMemOperandSize(const Record *MemRec) { - if (MemRec->isSubClassOf("Operand")) { - StringRef Name = - MemRec->getValueAsDef("ParserMatchClass")->getValueAsString("Name"); - if (Name == "Mem8") - return 8; - if (Name == "Mem16") - return 16; - if (Name == "Mem32") - return 32; - if (Name == "Mem64") - return 64; - if (Name == "Mem80") - return 80; - if (Name == "Mem128") - return 128; - if (Name == "Mem256") - return 256; - if (Name == "Mem512") - return 512; - } - - llvm_unreachable("Memory operand's size not known!"); -} - // Return true if the instruction defined as a register flavor. static inline bool hasRegisterFormat(const Record *Inst) { const BitsInit *FormBits = Inst->getValueAsBitsInit("FormBits"); @@ -279,22 +240,6 @@ static inline bool isNOREXRegClass(const Record *Op) { return Op->getName().contains("_NOREX"); } -static inline bool isRegisterOperand(const Record *Rec) { - return Rec->isSubClassOf("RegisterClass") || - Rec->isSubClassOf("RegisterOperand") || - Rec->isSubClassOf("PointerLikeRegClass"); -} - -static inline bool isMemoryOperand(const Record *Rec) { - return Rec->isSubClassOf("Operand") && - Rec->getValueAsString("OperandType") == "OPERAND_MEMORY"; -} - -static inline bool isImmediateOperand(const Record *Rec) { - return Rec->isSubClassOf("Operand") && - Rec->getValueAsString("OperandType") == "OPERAND_IMMEDIATE"; -} - // Get the alternative instruction pointed by "FoldGenRegForm" field. static inline const CodeGenInstruction * getAltRegInst(const CodeGenInstruction *I, const RecordKeeper &Records, @@ -312,61 +257,59 @@ getAltRegInst(const CodeGenInstruction *I, const RecordKeeper &Records, // matches the EVEX instruction of this object. class IsMatch { const CodeGenInstruction *MemInst; + unsigned Variant; public: - IsMatch(const CodeGenInstruction *Inst, const RecordKeeper &Records) - : MemInst(Inst) {} + IsMatch(const CodeGenInstruction *Inst, unsigned V) + : MemInst(Inst), Variant(V) {} bool operator()(const CodeGenInstruction *RegInst) { - Record *MemRec = MemInst->TheDef; - Record *RegRec = RegInst->TheDef; + X86Disassembler::RecognizableInstrBase RegRI(*RegInst); + X86Disassembler::RecognizableInstrBase MemRI(*MemInst); + const Record *RegRec = RegInst->TheDef; + const Record *MemRec = MemInst->TheDef; + + // EVEX_B means different things for memory and register forms. + if (RegRI.HasEVEX_B != 0 || MemRI.HasEVEX_B != 0) + return false; + + // Instruction's format - The register form's "Form" field should be + // the opposite of the memory form's "Form" field. + if (!areOppositeForms(RegRI.Form, MemRI.Form)) + return false; + + // X86 encoding is crazy, e.g + // + // f3 0f c7 30 vmxon (%rax) + // f3 0f c7 f0 senduipi %rax + // + // This two instruction have similiar encoding fields but are unrelated + if (X86Disassembler::getMnemonic(MemInst, Variant) != + X86Disassembler::getMnemonic(RegInst, Variant)) + return false; // Return false if one (at least) of the encoding fields of both // instructions do not match. - if (RegRec->getValueAsDef("OpEnc") != MemRec->getValueAsDef("OpEnc") || - !equalBitsInits(RegRec->getValueAsBitsInit("Opcode"), - MemRec->getValueAsBitsInit("Opcode")) || - // VEX/EVEX fields - RegRec->getValueAsDef("OpPrefix") != - MemRec->getValueAsDef("OpPrefix") || - RegRec->getValueAsDef("OpMap") != MemRec->getValueAsDef("OpMap") || - RegRec->getValueAsDef("OpSize") != MemRec->getValueAsDef("OpSize") || - RegRec->getValueAsDef("AdSize") != MemRec->getValueAsDef("AdSize") || - RegRec->getValueAsBit("hasVEX_4V") != - MemRec->getValueAsBit("hasVEX_4V") || - RegRec->getValueAsBit("hasEVEX_K") != - MemRec->getValueAsBit("hasEVEX_K") || - RegRec->getValueAsBit("hasEVEX_Z") != - MemRec->getValueAsBit("hasEVEX_Z") || - // EVEX_B means different things for memory and register forms. - RegRec->getValueAsBit("hasEVEX_B") != 0 || - MemRec->getValueAsBit("hasEVEX_B") != 0 || + if (RegRI.Encoding != MemRI.Encoding || RegRI.Opcode != MemRI.Opcode || + RegRI.OpPrefix != MemRI.OpPrefix || RegRI.OpMap != MemRI.OpMap || + RegRI.OpSize != MemRI.OpSize || RegRI.AdSize != MemRI.AdSize || + RegRI.HasREX_W != MemRI.HasREX_W || + RegRI.HasVEX_4V != MemRI.HasVEX_4V || + RegRI.HasVEX_L != MemRI.HasVEX_L || + RegRI.HasVEX_W != MemRI.HasVEX_W || + RegRI.IgnoresVEX_L != MemRI.IgnoresVEX_L || + RegRI.IgnoresVEX_W != MemRI.IgnoresVEX_W || + RegRI.HasEVEX_K != MemRI.HasEVEX_K || + RegRI.HasEVEX_KZ != MemRI.HasEVEX_KZ || + RegRI.HasEVEX_L2 != MemRI.HasEVEX_L2 || RegRec->getValueAsBit("hasEVEX_RC") != MemRec->getValueAsBit("hasEVEX_RC") || - RegRec->getValueAsBit("hasREX_WPrefix") != - MemRec->getValueAsBit("hasREX_WPrefix") || RegRec->getValueAsBit("hasLockPrefix") != MemRec->getValueAsBit("hasLockPrefix") || RegRec->getValueAsBit("hasNoTrackPrefix") != MemRec->getValueAsBit("hasNoTrackPrefix") || - RegRec->getValueAsBit("hasVEX_L") != - MemRec->getValueAsBit("hasVEX_L") || - RegRec->getValueAsBit("hasEVEX_L2") != - MemRec->getValueAsBit("hasEVEX_L2") || - RegRec->getValueAsBit("ignoresVEX_L") != - MemRec->getValueAsBit("ignoresVEX_L") || - RegRec->getValueAsBit("HasVEX_W") != - MemRec->getValueAsBit("HasVEX_W") || - RegRec->getValueAsBit("IgnoresVEX_W") != - MemRec->getValueAsBit("IgnoresVEX_W") || RegRec->getValueAsBit("EVEX_W1_VEX_W0") != - MemRec->getValueAsBit("EVEX_W1_VEX_W0") || - // Instruction's format - The register form's "Form" field should be - // the opposite of the memory form's "Form" field. - !areOppositeForms(RegRec->getValueAsBitsInit("FormBits"), - MemRec->getValueAsBitsInit("FormBits")) || - RegRec->getValueAsBit("isAsmParserOnly") != - MemRec->getValueAsBit("isAsmParserOnly")) + MemRec->getValueAsBit("EVEX_W1_VEX_W0")) return false; // Make sure the sizes of the operands of both instructions suit each other. @@ -419,31 +362,24 @@ public: private: // Return true of the 2 given forms are the opposite of each other. - bool areOppositeForms(const BitsInit *RegFormBits, - const BitsInit *MemFormBits) { - uint64_t MemFormNum = getValueFromBitsInit(MemFormBits); - uint64_t RegFormNum = getValueFromBitsInit(RegFormBits); - - if ((MemFormNum == X86Local::MRM0m && RegFormNum == X86Local::MRM0r) || - (MemFormNum == X86Local::MRM1m && RegFormNum == X86Local::MRM1r) || - (MemFormNum == X86Local::MRM2m && RegFormNum == X86Local::MRM2r) || - (MemFormNum == X86Local::MRM3m && RegFormNum == X86Local::MRM3r) || - (MemFormNum == X86Local::MRM4m && RegFormNum == X86Local::MRM4r) || - (MemFormNum == X86Local::MRM5m && RegFormNum == X86Local::MRM5r) || - (MemFormNum == X86Local::MRM6m && RegFormNum == X86Local::MRM6r) || - (MemFormNum == X86Local::MRM7m && RegFormNum == X86Local::MRM7r) || - (MemFormNum == X86Local::MRMXm && RegFormNum == X86Local::MRMXr) || - (MemFormNum == X86Local::MRMXmCC && RegFormNum == X86Local::MRMXrCC) || - (MemFormNum == X86Local::MRMDestMem && - RegFormNum == X86Local::MRMDestReg) || - (MemFormNum == X86Local::MRMSrcMem && - RegFormNum == X86Local::MRMSrcReg) || - (MemFormNum == X86Local::MRMSrcMem4VOp3 && - RegFormNum == X86Local::MRMSrcReg4VOp3) || - (MemFormNum == X86Local::MRMSrcMemOp4 && - RegFormNum == X86Local::MRMSrcRegOp4) || - (MemFormNum == X86Local::MRMSrcMemCC && - RegFormNum == X86Local::MRMSrcRegCC)) + bool areOppositeForms(unsigned RegForm, unsigned MemForm) { + if ((MemForm == X86Local::MRM0m && RegForm == X86Local::MRM0r) || + (MemForm == X86Local::MRM1m && RegForm == X86Local::MRM1r) || + (MemForm == X86Local::MRM2m && RegForm == X86Local::MRM2r) || + (MemForm == X86Local::MRM3m && RegForm == X86Local::MRM3r) || + (MemForm == X86Local::MRM4m && RegForm == X86Local::MRM4r) || + (MemForm == X86Local::MRM5m && RegForm == X86Local::MRM5r) || + (MemForm == X86Local::MRM6m && RegForm == X86Local::MRM6r) || + (MemForm == X86Local::MRM7m && RegForm == X86Local::MRM7r) || + (MemForm == X86Local::MRMXm && RegForm == X86Local::MRMXr) || + (MemForm == X86Local::MRMXmCC && RegForm == X86Local::MRMXrCC) || + (MemForm == X86Local::MRMDestMem && RegForm == X86Local::MRMDestReg) || + (MemForm == X86Local::MRMSrcMem && RegForm == X86Local::MRMSrcReg) || + (MemForm == X86Local::MRMSrcMem4VOp3 && + RegForm == X86Local::MRMSrcReg4VOp3) || + (MemForm == X86Local::MRMSrcMemOp4 && + RegForm == X86Local::MRMSrcRegOp4) || + (MemForm == X86Local::MRMSrcMemCC && RegForm == X86Local::MRMSrcRegCC)) return true; return false; @@ -535,7 +471,10 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr, for (unsigned i = RegOutSize, e = RegInstr->Operands.size(); i < e; i++) { Record *RegOpRec = RegInstr->Operands[i].Rec; Record *MemOpRec = MemInstr->Operands[i].Rec; - if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec)) { + // PointerLikeRegClass: For instructions like TAILJMPr, TAILJMPr64, TAILJMPr64_REX + if ((isRegisterOperand(RegOpRec) || + RegOpRec->isSubClassOf("PointerLikeRegClass")) && + isMemoryOperand(MemOpRec)) { switch (i) { case 0: addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0); @@ -583,10 +522,9 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) { Target.getInstructionsByEnumValue(); for (const CodeGenInstruction *Inst : NumberedInstructions) { - if (!Inst->TheDef->getNameInit() || !Inst->TheDef->isSubClassOf("X86Inst")) - continue; - const Record *Rec = Inst->TheDef; + if (!Rec->isSubClassOf("X86Inst") || Rec->getValueAsBit("isAsmParserOnly")) + continue; // - Do not proceed if the instruction is marked as notMemoryFoldable. // - Instructions including RST register class operands are not relevant @@ -611,6 +549,8 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) { } } + Record *AsmWriter = Target.getAsmWriter(); + unsigned Variant = AsmWriter->getValueAsInt("Variant"); // For each memory form instruction, try to find its register form // instruction. for (const CodeGenInstruction *MemInst : MemInsts) { @@ -626,7 +566,7 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) { // opcode. std::vector &OpcRegInsts = RegInstsIt->second; - auto Match = find_if(OpcRegInsts, IsMatch(MemInst, Records)); + auto Match = find_if(OpcRegInsts, IsMatch(MemInst, Variant)); if (Match != OpcRegInsts.end()) { const CodeGenInstruction *RegInst = *Match; // If the matched instruction has it's "FoldGenRegForm" set, map the diff --git a/llvm/utils/TableGen/X86MnemonicTables.cpp b/llvm/utils/TableGen/X86MnemonicTables.cpp new file mode 100644 index 000000000000..f405e051e355 --- /dev/null +++ b/llvm/utils/TableGen/X86MnemonicTables.cpp @@ -0,0 +1,94 @@ +//==- X86MnemonicTables.cpp - Generate mnemonic extraction tables. -*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This tablegen backend is responsible for emitting tables that group +// instructions by their mnemonic name wrt AsmWriter Variant (e.g. isADD, etc). +// +//===----------------------------------------------------------------------===// + +#include "CodeGenInstruction.h" +#include "CodeGenTarget.h" +#include "X86RecognizableInstr.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/TableGenBackend.h" + +using namespace llvm; + +namespace { + +class X86MnemonicTablesEmitter { + CodeGenTarget Target; + +public: + X86MnemonicTablesEmitter(RecordKeeper &R) : Target(R) {} + + // Output X86 mnemonic tables. + void run(raw_ostream &OS); +}; + +void X86MnemonicTablesEmitter::run(raw_ostream &OS) { + emitSourceFileHeader("X86 Mnemonic tables", OS); + OS << "namespace llvm {\nnamespace X86 {\n\n"; + Record *AsmWriter = Target.getAsmWriter(); + unsigned Variant = AsmWriter->getValueAsInt("Variant"); + + // Hold all instructions grouped by mnemonic + StringMap> MnemonicToCGInstrMap; + + ArrayRef NumberedInstructions = + Target.getInstructionsByEnumValue(); + for (const CodeGenInstruction *I : NumberedInstructions) { + const Record *Def = I->TheDef; + // Filter non-X86 instructions. + if (!Def->isSubClassOf("X86Inst")) + continue; + X86Disassembler::RecognizableInstrBase RI(*I); + if (!RI.shouldBeEmitted()) + continue; + if ( // Non-parsable instruction defs contain prefix as part of AsmString + Def->getValueAsString("AsmVariantName") == "NonParsable" || + // Skip prefix byte + RI.Form == X86Local::PrefixByte) + continue; + std::string Mnemonic = X86Disassembler::getMnemonic(I, Variant); + MnemonicToCGInstrMap[Mnemonic].push_back(I); + } + + OS << "#ifdef GET_X86_MNEMONIC_TABLES_H\n"; + OS << "#undef GET_X86_MNEMONIC_TABLES_H\n\n"; + for (StringRef Mnemonic : MnemonicToCGInstrMap.keys()) + OS << "bool is" << Mnemonic << "(unsigned Opcode);\n"; + OS << "#endif // GET_X86_MNEMONIC_TABLES_H\n\n"; + + OS << "#ifdef GET_X86_MNEMONIC_TABLES_CPP\n"; + OS << "#undef GET_X86_MNEMONIC_TABLES_CPP\n\n"; + for (StringRef Mnemonic : MnemonicToCGInstrMap.keys()) { + OS << "bool is" << Mnemonic << "(unsigned Opcode) {\n"; + auto Mnemonics = MnemonicToCGInstrMap[Mnemonic]; + if (Mnemonics.size() == 1) { + const CodeGenInstruction *CGI = *Mnemonics.begin(); + OS << "\treturn Opcode == " << CGI->TheDef->getName() << ";\n}\n\n"; + } else { + OS << "\tswitch (Opcode) {\n"; + for (const CodeGenInstruction *CGI : Mnemonics) { + OS << "\tcase " << CGI->TheDef->getName() << ":\n"; + } + OS << "\t\treturn true;\n\t}\n\treturn false;\n}\n\n"; + } + } + OS << "#endif // GET_X86_MNEMONIC_TABLES_CPP\n\n"; + OS << "} // end namespace X86\n} // end namespace llvm"; +} + +} // namespace + +namespace llvm { +void EmitX86MnemonicTables(RecordKeeper &RK, raw_ostream &OS) { + X86MnemonicTablesEmitter(RK).run(OS); +} +} // namespace llvm diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp index 4023d8f57318..9afde66fe6f3 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -24,6 +24,51 @@ using namespace llvm; using namespace X86Disassembler; +std::string X86Disassembler::getMnemonic(const CodeGenInstruction *I, unsigned Variant) { + std::string AsmString = I->FlattenAsmStringVariants(I->AsmString, Variant); + StringRef Mnemonic(AsmString); + // Extract a mnemonic assuming it's separated by \t + Mnemonic = Mnemonic.take_until([](char C) { return C == '\t'; }); + + // Special case: CMOVCC, JCC, SETCC have "${cond}" in mnemonic. + // Replace it with "CC" in-place. + size_t CondPos = Mnemonic.find("${cond}"); + if (CondPos != StringRef::npos) + Mnemonic = AsmString.replace(CondPos, StringRef::npos, "CC"); + return Mnemonic.upper(); +} + +bool X86Disassembler::isRegisterOperand(const Record *Rec) { + return Rec->isSubClassOf("RegisterClass") || + Rec->isSubClassOf("RegisterOperand"); +} + +bool X86Disassembler::isMemoryOperand(const Record *Rec) { + return Rec->isSubClassOf("Operand") && + Rec->getValueAsString("OperandType") == "OPERAND_MEMORY"; +} + +bool X86Disassembler::isImmediateOperand(const Record *Rec) { + return Rec->isSubClassOf("Operand") && + Rec->getValueAsString("OperandType") == "OPERAND_IMMEDIATE"; +} + +unsigned X86Disassembler::getRegOperandSize(const Record *RegRec) { + if (RegRec->isSubClassOf("RegisterClass")) + return RegRec->getValueAsInt("Alignment"); + if (RegRec->isSubClassOf("RegisterOperand")) + return RegRec->getValueAsDef("RegClass")->getValueAsInt("Alignment"); + + llvm_unreachable("Register operand's size not known!"); +} + +unsigned X86Disassembler::getMemOperandSize(const Record *MemRec) { + if (MemRec->isSubClassOf("X86MemOperand")) + return MemRec->getValueAsInt("Size"); + + llvm_unreachable("Memory operand's size not known!"); +} + /// byteFromBitsInit - Extracts a value at most 8 bits in width from a BitsInit. /// Useful for switch statements and the like. /// @@ -61,55 +106,49 @@ static uint8_t byteFromRec(const Record* rec, StringRef name) { return byteFromBitsInit(*bits); } -RecognizableInstr::RecognizableInstr(DisassemblerTables &tables, - const CodeGenInstruction &insn, - InstrUID uid) { - UID = uid; - - Rec = insn.TheDef; - Name = std::string(Rec->getName()); - Spec = &tables.specForUID(UID); - - if (!Rec->isSubClassOf("X86Inst")) { - ShouldBeEmitted = false; - return; - } - +RecognizableInstrBase::RecognizableInstrBase(const CodeGenInstruction &insn) { + const Record *Rec = insn.TheDef; + assert(Rec->isSubClassOf("X86Inst") && "Not a X86 Instruction"); OpPrefix = byteFromRec(Rec, "OpPrefixBits"); - OpMap = byteFromRec(Rec, "OpMapBits"); - Opcode = byteFromRec(Rec, "Opcode"); - Form = byteFromRec(Rec, "FormBits"); + OpMap = byteFromRec(Rec, "OpMapBits"); + Opcode = byteFromRec(Rec, "Opcode"); + Form = byteFromRec(Rec, "FormBits"); Encoding = byteFromRec(Rec, "OpEncBits"); - - OpSize = byteFromRec(Rec, "OpSizeBits"); - AdSize = byteFromRec(Rec, "AdSizeBits"); - HasREX_WPrefix = Rec->getValueAsBit("hasREX_WPrefix"); - HasVEX_4V = Rec->getValueAsBit("hasVEX_4V"); - HasVEX_W = Rec->getValueAsBit("HasVEX_W"); - IgnoresVEX_W = Rec->getValueAsBit("IgnoresVEX_W"); - IgnoresVEX_L = Rec->getValueAsBit("ignoresVEX_L"); - HasEVEX_L2Prefix = Rec->getValueAsBit("hasEVEX_L2"); - HasEVEX_K = Rec->getValueAsBit("hasEVEX_K"); - HasEVEX_KZ = Rec->getValueAsBit("hasEVEX_Z"); - HasEVEX_B = Rec->getValueAsBit("hasEVEX_B"); - IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly"); - ForceDisassemble = Rec->getValueAsBit("ForceDisassemble"); - CD8_Scale = byteFromRec(Rec, "CD8_Scale"); - - Name = std::string(Rec->getName()); - - Operands = &insn.Operands.OperandList; - - HasVEX_LPrefix = Rec->getValueAsBit("hasVEX_L"); + OpSize = byteFromRec(Rec, "OpSizeBits"); + AdSize = byteFromRec(Rec, "AdSizeBits"); + HasREX_W = Rec->getValueAsBit("hasREX_W"); + HasVEX_4V = Rec->getValueAsBit("hasVEX_4V"); + HasVEX_W = Rec->getValueAsBit("HasVEX_W"); + IgnoresVEX_W = Rec->getValueAsBit("IgnoresVEX_W"); + IgnoresVEX_L = Rec->getValueAsBit("ignoresVEX_L"); + HasEVEX_L2 = Rec->getValueAsBit("hasEVEX_L2"); + HasEVEX_K = Rec->getValueAsBit("hasEVEX_K"); + HasEVEX_KZ = Rec->getValueAsBit("hasEVEX_Z"); + HasEVEX_B = Rec->getValueAsBit("hasEVEX_B"); + IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly"); + IsAsmParserOnly = Rec->getValueAsBit("isAsmParserOnly"); + ForceDisassemble = Rec->getValueAsBit("ForceDisassemble"); + CD8_Scale = byteFromRec(Rec, "CD8_Scale"); + HasVEX_L = Rec->getValueAsBit("hasVEX_L"); EncodeRC = HasEVEX_B && (Form == X86Local::MRMDestReg || Form == X86Local::MRMSrcReg); +} + +bool RecognizableInstrBase::shouldBeEmitted() const { + return Form != X86Local::Pseudo && (!IsCodeGenOnly || ForceDisassemble) && + !IsAsmParserOnly; +} +RecognizableInstr::RecognizableInstr(DisassemblerTables &tables, + const CodeGenInstruction &insn, + InstrUID uid) + : RecognizableInstrBase(insn), Rec(insn.TheDef), Name(Rec->getName().str()), + Is32Bit(false), Is64Bit(false), Operands(&insn.Operands.OperandList), + UID(uid), Spec(&tables.specForUID(uid)) { // Check for 64-bit inst which does not require REX - Is32Bit = false; - Is64Bit = false; // FIXME: Is there some better way to check for In64BitMode? - std::vector Predicates = Rec->getValueAsListOfDefs("Predicates"); + std::vector Predicates = Rec->getValueAsListOfDefs("Predicates"); for (unsigned i = 0, e = Predicates.size(); i != e; ++i) { if (Predicates[i]->getName().contains("Not64Bit") || Predicates[i]->getName().contains("In32Bit")) { @@ -121,29 +160,19 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables, break; } } - - if (Form == X86Local::Pseudo || (IsCodeGenOnly && !ForceDisassemble)) { - ShouldBeEmitted = false; - return; - } - - ShouldBeEmitted = true; } void RecognizableInstr::processInstr(DisassemblerTables &tables, const CodeGenInstruction &insn, - InstrUID uid) -{ - // Ignore "asm parser only" instructions. - if (insn.TheDef->getValueAsBit("isAsmParserOnly")) + InstrUID uid) { + if (!insn.TheDef->isSubClassOf("X86Inst")) return; - RecognizableInstr recogInstr(tables, insn, uid); - if (recogInstr.shouldBeEmitted()) { - recogInstr.emitInstructionSpecifier(); - recogInstr.emitDecodePath(tables); - } + if (!recogInstr.shouldBeEmitted()) + return; + recogInstr.emitInstructionSpecifier(); + recogInstr.emitDecodePath(tables); } #define EVEX_KB(n) (HasEVEX_KZ && HasEVEX_B ? n##_KZ_B : \ @@ -155,12 +184,12 @@ InstructionContext RecognizableInstr::insnContext() const { InstructionContext insnContext; if (Encoding == X86Local::EVEX) { - if (HasVEX_LPrefix && HasEVEX_L2Prefix) { + if (HasVEX_L && HasEVEX_L2) { errs() << "Don't support VEX.L if EVEX_L2 is enabled: " << Name << "\n"; llvm_unreachable("Don't support VEX.L if EVEX_L2 is enabled"); } // VEX_L & VEX_W - if (!EncodeRC && HasVEX_LPrefix && HasVEX_W) { + if (!EncodeRC && HasVEX_L && HasVEX_W) { if (OpPrefix == X86Local::PD) insnContext = EVEX_KB(IC_EVEX_L_W_OPSIZE); else if (OpPrefix == X86Local::XS) @@ -173,7 +202,7 @@ InstructionContext RecognizableInstr::insnContext() const { errs() << "Instruction does not use a prefix: " << Name << "\n"; llvm_unreachable("Invalid prefix"); } - } else if (!EncodeRC && HasVEX_LPrefix) { + } else if (!EncodeRC && HasVEX_L) { // VEX_L if (OpPrefix == X86Local::PD) insnContext = EVEX_KB(IC_EVEX_L_OPSIZE); @@ -187,7 +216,7 @@ InstructionContext RecognizableInstr::insnContext() const { errs() << "Instruction does not use a prefix: " << Name << "\n"; llvm_unreachable("Invalid prefix"); } - } else if (!EncodeRC && HasEVEX_L2Prefix && HasVEX_W) { + } else if (!EncodeRC && HasEVEX_L2 && HasVEX_W) { // EVEX_L2 & VEX_W if (OpPrefix == X86Local::PD) insnContext = EVEX_KB(IC_EVEX_L2_W_OPSIZE); @@ -201,7 +230,7 @@ InstructionContext RecognizableInstr::insnContext() const { errs() << "Instruction does not use a prefix: " << Name << "\n"; llvm_unreachable("Invalid prefix"); } - } else if (!EncodeRC && HasEVEX_L2Prefix) { + } else if (!EncodeRC && HasEVEX_L2) { // EVEX_L2 if (OpPrefix == X86Local::PD) insnContext = EVEX_KB(IC_EVEX_L2_OPSIZE); @@ -246,7 +275,7 @@ InstructionContext RecognizableInstr::insnContext() const { } /// eof EVEX } else if (Encoding == X86Local::VEX || Encoding == X86Local::XOP) { - if (HasVEX_LPrefix && HasVEX_W) { + if (HasVEX_L && HasVEX_W) { if (OpPrefix == X86Local::PD) insnContext = IC_VEX_L_W_OPSIZE; else if (OpPrefix == X86Local::XS) @@ -259,20 +288,15 @@ InstructionContext RecognizableInstr::insnContext() const { errs() << "Instruction does not use a prefix: " << Name << "\n"; llvm_unreachable("Invalid prefix"); } - } else if (OpPrefix == X86Local::PD && HasVEX_LPrefix) + } else if (OpPrefix == X86Local::PD && HasVEX_L) insnContext = IC_VEX_L_OPSIZE; else if (OpPrefix == X86Local::PD && HasVEX_W) insnContext = IC_VEX_W_OPSIZE; - else if (OpPrefix == X86Local::PD && Is64Bit && - AdSize == X86Local::AdSize32) - insnContext = IC_64BIT_VEX_OPSIZE_ADSIZE; - else if (OpPrefix == X86Local::PD && Is64Bit) - insnContext = IC_64BIT_VEX_OPSIZE; else if (OpPrefix == X86Local::PD) insnContext = IC_VEX_OPSIZE; - else if (HasVEX_LPrefix && OpPrefix == X86Local::XS) + else if (HasVEX_L && OpPrefix == X86Local::XS) insnContext = IC_VEX_L_XS; - else if (HasVEX_LPrefix && OpPrefix == X86Local::XD) + else if (HasVEX_L && OpPrefix == X86Local::XD) insnContext = IC_VEX_L_XD; else if (HasVEX_W && OpPrefix == X86Local::XS) insnContext = IC_VEX_W_XS; @@ -280,7 +304,7 @@ InstructionContext RecognizableInstr::insnContext() const { insnContext = IC_VEX_W_XD; else if (HasVEX_W && OpPrefix == X86Local::PS) insnContext = IC_VEX_W; - else if (HasVEX_LPrefix && OpPrefix == X86Local::PS) + else if (HasVEX_L && OpPrefix == X86Local::PS) insnContext = IC_VEX_L; else if (OpPrefix == X86Local::XD) insnContext = IC_VEX_XD; @@ -292,10 +316,10 @@ InstructionContext RecognizableInstr::insnContext() const { errs() << "Instruction does not use a prefix: " << Name << "\n"; llvm_unreachable("Invalid prefix"); } - } else if (Is64Bit || HasREX_WPrefix || AdSize == X86Local::AdSize64) { - if (HasREX_WPrefix && (OpSize == X86Local::OpSize16 || OpPrefix == X86Local::PD)) + } else if (Is64Bit || HasREX_W || AdSize == X86Local::AdSize64) { + if (HasREX_W && (OpSize == X86Local::OpSize16 || OpPrefix == X86Local::PD)) insnContext = IC_64BIT_REXW_OPSIZE; - else if (HasREX_WPrefix && AdSize == X86Local::AdSize32) + else if (HasREX_W && AdSize == X86Local::AdSize32) insnContext = IC_64BIT_REXW_ADSIZE; else if (OpSize == X86Local::OpSize16 && OpPrefix == X86Local::XD) insnContext = IC_64BIT_XD_OPSIZE; @@ -309,15 +333,15 @@ InstructionContext RecognizableInstr::insnContext() const { insnContext = IC_64BIT_OPSIZE; else if (AdSize == X86Local::AdSize32) insnContext = IC_64BIT_ADSIZE; - else if (HasREX_WPrefix && OpPrefix == X86Local::XS) + else if (HasREX_W && OpPrefix == X86Local::XS) insnContext = IC_64BIT_REXW_XS; - else if (HasREX_WPrefix && OpPrefix == X86Local::XD) + else if (HasREX_W && OpPrefix == X86Local::XD) insnContext = IC_64BIT_REXW_XD; else if (OpPrefix == X86Local::XD) insnContext = IC_64BIT_XD; else if (OpPrefix == X86Local::XS) insnContext = IC_64BIT_XS; - else if (HasREX_WPrefix) + else if (HasREX_W) insnContext = IC_64BIT_REXW; else insnContext = IC_64BIT; @@ -392,7 +416,7 @@ void RecognizableInstr::handleOperand(bool optional, unsigned &operandIndex, adjustOperandEncoding(encoding); Spec->operands[operandIndex].encoding = encoding; Spec->operands[operandIndex].type = - typeFromString(std::string(typeName), HasREX_WPrefix, OpSize); + typeFromString(std::string(typeName), HasREX_W, OpSize); ++operandIndex; ++physicalOperandIndex; @@ -835,13 +859,13 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { if (Form == X86Local::AddRegFrm || Form == X86Local::MRMSrcRegCC || Form == X86Local::MRMSrcMemCC || Form == X86Local::MRMXrCC || Form == X86Local::MRMXmCC || Form == X86Local::AddCCFrm) { - unsigned Count = Form == X86Local::AddRegFrm ? 8 : 16; + uint8_t Count = Form == X86Local::AddRegFrm ? 8 : 16; assert(((opcodeToSet % Count) == 0) && "ADDREG_FRM opcode not aligned"); uint8_t currentOpcode; - for (currentOpcode = opcodeToSet; currentOpcode < opcodeToSet + Count; - ++currentOpcode) + for (currentOpcode = opcodeToSet; + currentOpcode < (uint8_t)(opcodeToSet + Count); ++currentOpcode) tables.setTableFields(*opcodeType, insnContext(), currentOpcode, *filter, UID, Is32Bit, OpPrefix == 0, IgnoresVEX_L || EncodeRC, @@ -857,9 +881,9 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { #define TYPE(str, type) if (s == str) return type; OperandType RecognizableInstr::typeFromString(const std::string &s, - bool hasREX_WPrefix, + bool hasREX_W, uint8_t OpSize) { - if(hasREX_WPrefix) { + if(hasREX_W) { // For instructions with a REX_W prefix, a declared 32-bit register encoding // is special. TYPE("GR32", TYPE_R32) diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h index 8f557d9ee5f5..67aba26a142b 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.h +++ b/llvm/utils/TableGen/X86RecognizableInstr.h @@ -158,16 +158,8 @@ namespace X86Disassembler { class DisassemblerTables; -/// RecognizableInstr - Encapsulates all information required to decode a single -/// instruction, as extracted from the LLVM instruction tables. Has methods -/// to interpret the information available in the LLVM tables, and to emit the -/// instruction into DisassemblerTables. -class RecognizableInstr { -private: - /// The opcode of the instruction, as used in an MCInst - InstrUID UID; - /// The record from the .td files corresponding to this instruction - const Record* Rec; +/// Extract common fields of a single X86 instruction from a CodeGenInstruction +struct RecognizableInstrBase { /// The OpPrefix field from the record uint8_t OpPrefix; /// The OpMap field from the record @@ -183,20 +175,20 @@ private: uint8_t OpSize; /// The AdSize field from the record uint8_t AdSize; - /// The hasREX_WPrefix field from the record - bool HasREX_WPrefix; + /// The hasREX_W field from the record + bool HasREX_W; /// The hasVEX_4V field from the record bool HasVEX_4V; /// The HasVEX_WPrefix field from the record bool HasVEX_W; /// The IgnoresVEX_W field from the record bool IgnoresVEX_W; - /// Inferred from the operands; indicates whether the L bit in the VEX prefix is set - bool HasVEX_LPrefix; + /// The hasVEX_L field from the record + bool HasVEX_L; /// The ignoreVEX_L field from the record bool IgnoresVEX_L; /// The hasEVEX_L2Prefix field from the record - bool HasEVEX_L2Prefix; + bool HasEVEX_L2; /// The hasEVEX_K field from the record bool HasEVEX_K; /// The hasEVEX_KZ field from the record @@ -207,27 +199,39 @@ private: bool EncodeRC; /// The isCodeGenOnly field from the record bool IsCodeGenOnly; + /// The isAsmParserOnly field from the record + bool IsAsmParserOnly; /// The ForceDisassemble field from the record bool ForceDisassemble; // The CD8_Scale field from the record uint8_t CD8_Scale; - // Whether the instruction has the predicate "In64BitMode" - bool Is64Bit; - // Whether the instruction has the predicate "In32BitMode" - bool Is32Bit; + /// \param insn The CodeGenInstruction to extract information from. + RecognizableInstrBase(const CodeGenInstruction &insn); + /// \returns true if this instruction should be emitted + bool shouldBeEmitted() const; +}; +/// RecognizableInstr - Encapsulates all information required to decode a single +/// instruction, as extracted from the LLVM instruction tables. Has methods +/// to interpret the information available in the LLVM tables, and to emit the +/// instruction into DisassemblerTables. +class RecognizableInstr : public RecognizableInstrBase { +private: + /// The record from the .td files corresponding to this instruction + const Record* Rec; /// The instruction name as listed in the tables std::string Name; - - /// Indicates whether the instruction should be emitted into the decode - /// tables; regardless, it will be emitted into the instruction info table - bool ShouldBeEmitted; - + // Whether the instruction has the predicate "In32BitMode" + bool Is32Bit; + // Whether the instruction has the predicate "In64BitMode" + bool Is64Bit; /// The operands of the instruction, as listed in the CodeGenInstruction. /// They are not one-to-one with operands listed in the MCInst; for example, /// memory operands expand to 5 operands in the MCInst const std::vector* Operands; + /// The opcode of the instruction, as used in an MCInst + InstrUID UID; /// The description of the instruction that is emitted into the instruction /// info table InstructionSpecifier* Spec; @@ -243,7 +247,7 @@ private: /// /// @param s - The string, as extracted by calling Rec->getName() /// on a CodeGenInstruction::OperandInfo. - /// @param hasREX_WPrefix - Indicates whether the instruction has a REX.W + /// @param hasREX_W - Indicates whether the instruction has a REX.W /// prefix. If it does, 32-bit register operands stay /// 32-bit regardless of the operand size. /// @param OpSize Indicates the operand size of the instruction. @@ -251,7 +255,7 @@ private: /// register sizes keep their size. /// @return - The operand's type. static OperandType typeFromString(const std::string& s, - bool hasREX_WPrefix, uint8_t OpSize); + bool hasREX_W, uint8_t OpSize); /// immediateEncodingFromString - Translates an immediate encoding from the /// string provided in the LLVM tables to an OperandEncoding for use in @@ -314,19 +318,6 @@ private: (const std::string&, uint8_t OpSize)); - /// shouldBeEmitted - Returns the shouldBeEmitted field. Although filter() - /// filters out many instructions, at various points in decoding we - /// determine that the instruction should not actually be decodable. In - /// particular, MMX MOV instructions aren't emitted, but they're only - /// identified during operand parsing. - /// - /// @return - true if at this point we believe the instruction should be - /// emitted; false if not. This will return false if filter() returns false - /// once emitInstructionSpecifier() has been called. - bool shouldBeEmitted() const { - return ShouldBeEmitted; - } - /// emitInstructionSpecifier - Loads the instruction specifier for the current /// instruction into a DisassemblerTables. /// @@ -339,6 +330,7 @@ private: /// decode information for the current instruction. void emitDecodePath(DisassemblerTables &tables) const; +public: /// Constructor - Initializes a RecognizableInstr with the appropriate fields /// from a CodeGenInstruction. /// @@ -348,7 +340,6 @@ private: RecognizableInstr(DisassemblerTables &tables, const CodeGenInstruction &insn, InstrUID uid); -public: /// processInstr - Accepts a CodeGenInstruction and loads decode information /// for it into a DisassemblerTables if appropriate. /// @@ -362,6 +353,12 @@ public: InstrUID uid); }; +std::string getMnemonic(const CodeGenInstruction *I, unsigned Variant); +bool isRegisterOperand(const Record *Rec); +bool isMemoryOperand(const Record *Rec); +bool isImmediateOperand(const Record *Rec); +unsigned getRegOperandSize(const Record *RegRec); +unsigned getMemOperandSize(const Record *MemRec); } // namespace X86Disassembler } // namespace llvm -- cgit v1.2.3